From c203826dda2e71206eca1cbcd18ebee8689a7794 Mon Sep 17 00:00:00 2001 From: Werner Kramer Date: Mon, 2 Jan 2023 08:59:29 +0100 Subject: [PATCH 01/15] Copy cta source to own folder --- costa/native/cta/cta_util_timing.f90 | 89 + costa/native/cta/include/cta.h | 57 + costa/native/cta/include/cta_array.h | 317 ++ costa/native/cta/include/cta_bb_modbuild.h | 153 + .../cta/include/cta_bb_modbuild_utils.h | 171 + costa/native/cta/include/cta_datatypes.h | 208 + costa/native/cta/include/cta_datetime.h | 122 + costa/native/cta/include/cta_defaults.h | 111 + costa/native/cta/include/cta_errors.h | 108 + costa/native/cta/include/cta_f77blas.h | 167 + costa/native/cta/include/cta_f77lapack.h | 101 + costa/native/cta/include/cta_file.h | 133 + costa/native/cta/include/cta_flush.h | 49 + costa/native/cta/include/cta_functions.h | 152 + costa/native/cta/include/cta_handles.h | 178 + costa/native/cta/include/cta_initialise.h | 46 + costa/native/cta/include/cta_interface.h | 143 + costa/native/cta/include/cta_matrix.h | 297 ++ costa/native/cta/include/cta_matrix_blas.h | 294 ++ costa/native/cta/include/cta_mem.h | 64 + costa/native/cta/include/cta_message.h | 100 + costa/native/cta/include/cta_metainfo.h | 191 + costa/native/cta/include/cta_method.h | 89 + costa/native/cta/include/cta_modbuild_b3b.h | 140 + .../cta/include/cta_modbuild_b3b_utils.h | 170 + costa/native/cta/include/cta_modbuild_par.h | 56 + costa/native/cta/include/cta_modbuild_sp.h | 78 + costa/native/cta/include/cta_model.h | 587 +++ costa/native/cta/include/cta_model_factory.h | 84 + .../native/cta/include/cta_model_utilities.h | 76 + costa/native/cta/include/cta_modelcombiner.h | 45 + costa/native/cta/include/cta_obsdescr.h | 179 + .../native/cta/include/cta_obsdescr_sqlite3.h | 158 + costa/native/cta/include/cta_obsdescr_table.h | 132 + costa/native/cta/include/cta_pack.h | 149 + costa/native/cta/include/cta_par.h | 138 + costa/native/cta/include/cta_reltable.h | 172 + costa/native/cta/include/cta_resultwriter.h | 187 + costa/native/cta/include/cta_sobs.h | 228 + costa/native/cta/include/cta_sobs_combine.h | 48 + costa/native/cta/include/cta_sobs_netcdf.h | 200 + costa/native/cta/include/cta_sobs_sqlite3.h | 188 + costa/native/cta/include/cta_string.h | 184 + costa/native/cta/include/cta_system.h | 71 + costa/native/cta/include/cta_time.h | 189 + costa/native/cta/include/cta_tree.h | 187 + costa/native/cta/include/cta_treevector.h | 512 ++ costa/native/cta/include/cta_usr_matrix.h | 456 ++ costa/native/cta/include/cta_usr_method.h | 151 + costa/native/cta/include/cta_usr_model.h | 437 ++ costa/native/cta/include/cta_usr_obs_desc.h | 220 + .../cta/include/cta_usr_stoch_observer.h | 294 ++ costa/native/cta/include/cta_usr_vector.h | 440 ++ costa/native/cta/include/cta_util_methods.h | 90 + costa/native/cta/include/cta_util_sort.h | 53 + costa/native/cta/include/cta_util_sqlite3.h | 91 + .../native/cta/include/cta_util_statistics.h | 62 + costa/native/cta/include/cta_vector.h | 363 ++ costa/native/cta/include/cta_vector_blas.h | 322 ++ costa/native/cta/include/cta_xml.h | 57 + costa/native/cta/include/ctai.h | 37 + costa/native/cta/include/ctai_datatypes.h | 53 + costa/native/cta/include/ctai_handles.h | 69 + costa/native/cta/include/ctai_sobs.h | 41 + costa/native/cta/include/ctai_string.h | 48 + costa/native/cta/include/ctai_vector.h | 103 + costa/native/cta/include/ctai_xml.h | 74 + costa/native/cta/include/f_cta_utils.h | 61 + .../cta/include/modbuild_sp_model_template.h | 123 + costa/native/cta/src/CMakeLists.txt | 23 + costa/native/cta/src/cta_array.c | 1293 +++++ costa/native/cta/src/cta_datatypes.c | 224 + costa/native/cta/src/cta_datetime.c | 130 + costa/native/cta/src/cta_defaults.c | 91 + costa/native/cta/src/cta_file.c | 399 ++ costa/native/cta/src/cta_flush.c | 32 + costa/native/cta/src/cta_functions.c | 606 +++ costa/native/cta/src/cta_handles.c | 975 ++++ costa/native/cta/src/cta_initialise.c | 144 + costa/native/cta/src/cta_interface.c | 228 + costa/native/cta/src/cta_matrix.c | 960 ++++ costa/native/cta/src/cta_matrix_blas.c | 805 +++ costa/native/cta/src/cta_mem.c | 59 + costa/native/cta/src/cta_message.c | 111 + costa/native/cta/src/cta_metainfo.c | 1147 +++++ costa/native/cta/src/cta_method.c | 208 + costa/native/cta/src/cta_modbuild_par.c | 3296 +++++++++++++ costa/native/cta/src/cta_modbuild_sp.c | 1351 ++++++ costa/native/cta/src/cta_model.c | 2120 ++++++++ costa/native/cta/src/cta_model_factory.c | 983 ++++ costa/native/cta/src/cta_model_utilities.c | 78 + costa/native/cta/src/cta_obsdescr.c | 576 +++ costa/native/cta/src/cta_obsdescr_combine.c | 392 ++ costa/native/cta/src/cta_obsdescr_maori.c | 59 + costa/native/cta/src/cta_obsdescr_netcdf.c | 550 +++ costa/native/cta/src/cta_obsdescr_sqlite3.c | 564 +++ costa/native/cta/src/cta_obsdescr_table.c | 611 +++ costa/native/cta/src/cta_obsdescr_user.c | 59 + costa/native/cta/src/cta_pack.c | 381 ++ costa/native/cta/src/cta_par.c | 905 ++++ costa/native/cta/src/cta_reltable.c | 642 +++ costa/native/cta/src/cta_resultwriter.c | 251 + costa/native/cta/src/cta_sobs.c | 814 ++++ costa/native/cta/src/cta_sobs_combine.c | 704 +++ costa/native/cta/src/cta_sobs_factory.c | 310 ++ costa/native/cta/src/cta_sobs_maori.c | 74 + costa/native/cta/src/cta_sobs_netcdf.c | 1021 ++++ costa/native/cta/src/cta_sobs_sqlite3.c | 743 +++ costa/native/cta/src/cta_sobs_user.c | 75 + costa/native/cta/src/cta_string.c | 454 ++ costa/native/cta/src/cta_time.c | 634 +++ costa/native/cta/src/cta_tree.c | 771 +++ costa/native/cta/src/cta_treevector.c | 4301 +++++++++++++++++ costa/native/cta/src/cta_util_methods.c | 453 ++ costa/native/cta/src/cta_util_sort.c | 78 + costa/native/cta/src/cta_util_sqlite3.c | 372 ++ costa/native/cta/src/cta_util_statistics.c | 151 + costa/native/cta/src/cta_vector.c | 1747 +++++++ costa/native/cta/src/cta_vector_blas.c | 1357 ++++++ costa/native/cta/src/cta_xml.c | 860 ++++ costa/native/cta/src/f_cta_defaults.c | 47 + costa/native/cta/src/f_cta_utils.c | 62 + costa/native/cta_f90/cta_f90_parameters.f90 | 25 + costa/native/cta_f90/generated/cta_f90.f90 | 87 + .../cta_f90/generated/cta_f90_array.f90 | 487 ++ .../cta_f90/generated/cta_f90_datatypes.f90 | 23 + .../cta_f90/generated/cta_f90_datetime.f90 | 150 + .../native/cta_f90/generated/cta_f90_file.f90 | 149 + .../cta_f90/generated/cta_f90_flush_mod.f90 | 22 + .../cta_f90/generated/cta_f90_functions.f90 | 145 + .../cta_f90/generated/cta_f90_handles.f90 | 223 + .../cta_f90/generated/cta_f90_initialise.f90 | 27 + .../cta_f90/generated/cta_f90_interface.f90 | 109 + .../cta_f90/generated/cta_f90_matrix.f90 | 373 ++ .../native/cta_f90/generated/cta_f90_mem.f90 | 19 + .../cta_f90/generated/cta_f90_message.f90 | 60 + .../cta_f90/generated/cta_f90_metainfo.f90 | 60 + .../cta_f90/generated/cta_f90_method.f90 | 71 + .../generated/cta_f90_modbuild_par.f90 | 34 + .../cta_f90/generated/cta_f90_model.f90 | 618 +++ .../generated/cta_f90_model_factory.f90 | 43 + .../generated/cta_f90_model_utilities.f90 | 42 + .../cta_f90/generated/cta_f90_obsdescr.f90 | 191 + .../native/cta_f90/generated/cta_f90_pack.f90 | 604 +++ .../native/cta_f90/generated/cta_f90_par.f90 | 62 + .../cta_f90/generated/cta_f90_parameters.f90 | 25 + .../cta_f90/generated/cta_f90_reltable.f90 | 157 + .../generated/cta_f90_resultwriter.f90 | 91 + .../native/cta_f90/generated/cta_f90_sobs.f90 | 271 ++ .../cta_f90/generated/cta_f90_string.f90 | 229 + .../native/cta_f90/generated/cta_f90_time.f90 | 249 + .../native/cta_f90/generated/cta_f90_tree.f90 | 242 + .../cta_f90/generated/cta_f90_treevector.f90 | 1146 +++++ .../generated/cta_f90_util_methods.f90 | 85 + .../cta_f90/generated/cta_f90_util_sort.f90 | 31 + .../generated/cta_f90_util_statistics.f90 | 44 + .../cta_f90/generated/cta_f90_vector.f90 | 1013 ++++ .../native/cta_f90/generated/cta_f90_xml.f90 | 39 + costa/native/cta_f90/include/cta_f77.inc | 247 + costa/native/cta_f90/include/cta_f90.inc | 19 + .../cta_f90/include/cta_f90_contains.inc | 42 + costa/native/external/blas/caxpy.f | 34 + costa/native/external/blas/ccopy.f | 33 + costa/native/external/blas/cdotc.f | 38 + costa/native/external/blas/cdotu.f | 37 + costa/native/external/blas/cgbmv.f | 322 ++ costa/native/external/blas/cgemm.f | 414 ++ costa/native/external/blas/cgemv.f | 281 ++ costa/native/external/blas/cgerc.f | 157 + costa/native/external/blas/cgeru.f | 157 + costa/native/external/blas/chbmv.f | 309 ++ costa/native/external/blas/chemm.f | 304 ++ costa/native/external/blas/chemv.f | 266 + costa/native/external/blas/cher.f | 212 + costa/native/external/blas/cher2.f | 249 + costa/native/external/blas/cher2k.f | 371 ++ costa/native/external/blas/cherk.f | 328 ++ costa/native/external/blas/chpmv.f | 270 ++ costa/native/external/blas/chpr.f | 217 + costa/native/external/blas/chpr2.f | 251 + costa/native/external/blas/crotg.f | 20 + costa/native/external/blas/cscal.f | 28 + costa/native/external/blas/csscal.f | 29 + costa/native/external/blas/cswap.f | 36 + costa/native/external/blas/csymm.f | 296 ++ costa/native/external/blas/csyr2k.f | 324 ++ costa/native/external/blas/csyrk.f | 293 ++ costa/native/external/blas/ctbmv.f | 377 ++ costa/native/external/blas/ctbsv.f | 381 ++ costa/native/external/blas/ctpmv.f | 338 ++ costa/native/external/blas/ctpsv.f | 341 ++ costa/native/external/blas/ctrmm.f | 392 ++ costa/native/external/blas/ctrmv.f | 321 ++ costa/native/external/blas/ctrsm.f | 414 ++ costa/native/external/blas/ctrsv.f | 324 ++ costa/native/external/blas/dasum.f | 43 + costa/native/external/blas/daxpy.f | 48 + costa/native/external/blas/dcabs1.f | 8 + costa/native/external/blas/dcopy.f | 50 + costa/native/external/blas/ddot.f | 49 + costa/native/external/blas/dgbmv.f | 300 ++ costa/native/external/blas/dgemm.f | 313 ++ costa/native/external/blas/dgemv.f | 261 + costa/native/external/blas/dger.f | 157 + costa/native/external/blas/dnrm2.f | 60 + costa/native/external/blas/drot.f | 37 + costa/native/external/blas/drotg.f | 27 + costa/native/external/blas/dsbmv.f | 303 ++ costa/native/external/blas/dscal.f | 43 + costa/native/external/blas/dspmv.f | 262 + costa/native/external/blas/dspr.f | 198 + costa/native/external/blas/dspr2.f | 229 + costa/native/external/blas/dswap.f | 56 + costa/native/external/blas/dsymm.f | 294 ++ costa/native/external/blas/dsymv.f | 262 + costa/native/external/blas/dsyr.f | 197 + costa/native/external/blas/dsyr2.f | 230 + costa/native/external/blas/dsyr2k.f | 327 ++ costa/native/external/blas/dsyrk.f | 294 ++ costa/native/external/blas/dtbmv.f | 342 ++ costa/native/external/blas/dtbsv.f | 346 ++ costa/native/external/blas/dtpmv.f | 299 ++ costa/native/external/blas/dtpsv.f | 302 ++ costa/native/external/blas/dtrmm.f | 355 ++ costa/native/external/blas/dtrmv.f | 286 ++ costa/native/external/blas/dtrsm.f | 378 ++ costa/native/external/blas/dtrsv.f | 289 ++ costa/native/external/blas/dzasum.f | 34 + costa/native/external/blas/dznrm2.f | 67 + costa/native/external/blas/icamax.f | 43 + costa/native/external/blas/idamax.f | 39 + costa/native/external/blas/isamax.f | 39 + costa/native/external/blas/izamax.f | 41 + costa/native/external/blas/lsame.f | 87 + costa/native/external/blas/sasum.f | 44 + costa/native/external/blas/saxpy.f | 48 + costa/native/external/blas/scasum.f | 34 + costa/native/external/blas/scnrm2.f | 67 + costa/native/external/blas/scopy.f | 50 + costa/native/external/blas/sdot.f | 49 + costa/native/external/blas/sgbmv.f | 300 ++ costa/native/external/blas/sgemm.f | 313 ++ costa/native/external/blas/sgemv.f | 261 + costa/native/external/blas/sger.f | 157 + costa/native/external/blas/snrm2.f | 60 + costa/native/external/blas/srot.f | 37 + costa/native/external/blas/srotg.f | 27 + costa/native/external/blas/ssbmv.f | 303 ++ costa/native/external/blas/sscal.f | 43 + costa/native/external/blas/sspmv.f | 262 + costa/native/external/blas/sspr.f | 198 + costa/native/external/blas/sspr2.f | 229 + costa/native/external/blas/sswap.f | 56 + costa/native/external/blas/ssymm.f | 294 ++ costa/native/external/blas/ssymv.f | 262 + costa/native/external/blas/ssyr.f | 197 + costa/native/external/blas/ssyr2.f | 230 + costa/native/external/blas/ssyr2k.f | 327 ++ costa/native/external/blas/ssyrk.f | 294 ++ costa/native/external/blas/stbmv.f | 342 ++ costa/native/external/blas/stbsv.f | 346 ++ costa/native/external/blas/stpmv.f | 299 ++ costa/native/external/blas/stpsv.f | 302 ++ costa/native/external/blas/strmm.f | 355 ++ costa/native/external/blas/strmv.f | 286 ++ costa/native/external/blas/strsm.f | 378 ++ costa/native/external/blas/strsv.f | 289 ++ costa/native/external/blas/xerbla.f | 43 + costa/native/external/blas/zaxpy.f | 34 + costa/native/external/blas/zcopy.f | 33 + costa/native/external/blas/zdotc.f | 36 + costa/native/external/blas/zdotu.f | 36 + costa/native/external/blas/zdscal.f | 30 + costa/native/external/blas/zgbmv.f | 322 ++ costa/native/external/blas/zgemm.f | 415 ++ costa/native/external/blas/zgemv.f | 281 ++ costa/native/external/blas/zgerc.f | 157 + costa/native/external/blas/zgeru.f | 157 + costa/native/external/blas/zhbmv.f | 309 ++ costa/native/external/blas/zhemm.f | 304 ++ costa/native/external/blas/zhemv.f | 266 + costa/native/external/blas/zher.f | 212 + costa/native/external/blas/zher2.f | 249 + costa/native/external/blas/zher2k.f | 372 ++ costa/native/external/blas/zherk.f | 330 ++ costa/native/external/blas/zhpmv.f | 270 ++ costa/native/external/blas/zhpr.f | 217 + costa/native/external/blas/zhpr2.f | 251 + costa/native/external/blas/zrotg.f | 21 + costa/native/external/blas/zscal.f | 29 + costa/native/external/blas/zswap.f | 36 + costa/native/external/blas/zsymm.f | 296 ++ costa/native/external/blas/zsyr2k.f | 324 ++ costa/native/external/blas/zsyrk.f | 293 ++ costa/native/external/blas/ztbmv.f | 377 ++ costa/native/external/blas/ztbsv.f | 381 ++ costa/native/external/blas/ztpmv.f | 338 ++ costa/native/external/blas/ztpsv.f | 341 ++ costa/native/external/blas/ztrmm.f | 392 ++ costa/native/external/blas/ztrmv.f | 321 ++ costa/native/external/blas/ztrsm.f | 414 ++ costa/native/external/blas/ztrsv.f | 324 ++ costa/native/external/lapack/cbdsqr.f | 733 +++ costa/native/external/lapack/cgbbrd.f | 466 ++ costa/native/external/lapack/cgbcon.f | 230 + costa/native/external/lapack/cgbequ.f | 248 + costa/native/external/lapack/cgbrfs.f | 361 ++ costa/native/external/lapack/cgbsv.f | 143 + costa/native/external/lapack/cgbsvx.f | 518 ++ costa/native/external/lapack/cgbtf2.f | 203 + costa/native/external/lapack/cgbtrf.f | 443 ++ costa/native/external/lapack/cgbtrs.f | 215 + costa/native/external/lapack/cgebak.f | 190 + costa/native/external/lapack/cgebal.f | 331 ++ costa/native/external/lapack/cgebd2.f | 249 + costa/native/external/lapack/cgebrd.f | 270 ++ costa/native/external/lapack/cgecon.f | 189 + costa/native/external/lapack/cgeequ.f | 234 + costa/native/external/lapack/cgees.f | 322 ++ costa/native/external/lapack/cgeesx.f | 371 ++ costa/native/external/lapack/cgeev.f | 391 ++ costa/native/external/lapack/cgeevx.f | 520 ++ costa/native/external/lapack/cgegs.f | 442 ++ costa/native/external/lapack/cgegv.f | 591 +++ costa/native/external/lapack/cgehd2.f | 149 + costa/native/external/lapack/cgehrd.f | 254 + costa/native/external/lapack/cgelq2.f | 124 + costa/native/external/lapack/cgelqf.f | 196 + costa/native/external/lapack/cgels.f | 405 ++ costa/native/external/lapack/cgelsd.f | 547 +++ costa/native/external/lapack/cgelss.f | 639 +++ costa/native/external/lapack/cgelsx.f | 358 ++ costa/native/external/lapack/cgelsy.f | 386 ++ costa/native/external/lapack/cgeql2.f | 122 + costa/native/external/lapack/cgeqlf.f | 205 + costa/native/external/lapack/cgeqp3.f | 285 ++ costa/native/external/lapack/cgeqpf.f | 225 + costa/native/external/lapack/cgeqr2.f | 122 + costa/native/external/lapack/cgeqrf.f | 197 + costa/native/external/lapack/cgerfs.f | 341 ++ costa/native/external/lapack/cgerq2.f | 125 + costa/native/external/lapack/cgerqf.f | 205 + costa/native/external/lapack/cgesc2.f | 134 + costa/native/external/lapack/cgesdd.f | 1950 ++++++++ costa/native/external/lapack/cgesv.f | 108 + costa/native/external/lapack/cgesvd.f | 3618 ++++++++++++++ costa/native/external/lapack/cgesvx.f | 484 ++ costa/native/external/lapack/cgetc2.f | 146 + costa/native/external/lapack/cgetf2.f | 136 + costa/native/external/lapack/cgetrf.f | 160 + costa/native/external/lapack/cgetri.f | 194 + costa/native/external/lapack/cgetrs.f | 150 + costa/native/external/lapack/cggbak.f | 216 + costa/native/external/lapack/cggbal.f | 474 ++ costa/native/external/lapack/cgges.f | 476 ++ costa/native/external/lapack/cggesx.f | 545 +++ costa/native/external/lapack/cggev.f | 449 ++ costa/native/external/lapack/cggevx.f | 640 +++ costa/native/external/lapack/cggglm.f | 213 + costa/native/external/lapack/cgghrd.f | 256 + costa/native/external/lapack/cgglse.f | 218 + costa/native/external/lapack/cggqrf.f | 212 + costa/native/external/lapack/cggrqf.f | 212 + costa/native/external/lapack/cggsvd.f | 334 ++ costa/native/external/lapack/cggsvp.f | 403 ++ costa/native/external/lapack/cgtcon.f | 167 + costa/native/external/lapack/cgtrfs.f | 369 ++ costa/native/external/lapack/cgtsv.f | 174 + costa/native/external/lapack/cgtsvx.f | 294 ++ costa/native/external/lapack/cgttrf.f | 175 + costa/native/external/lapack/cgttrs.f | 143 + costa/native/external/lapack/cgtts2.f | 272 ++ costa/native/external/lapack/chbev.f | 209 + costa/native/external/lapack/chbevd.f | 298 ++ costa/native/external/lapack/chbevx.f | 417 ++ costa/native/external/lapack/chbgst.f | 1377 ++++++ costa/native/external/lapack/chbgv.f | 192 + costa/native/external/lapack/chbgvd.f | 295 ++ costa/native/external/lapack/chbgvx.f | 374 ++ costa/native/external/lapack/chbtrd.f | 589 +++ costa/native/external/lapack/checon.f | 159 + costa/native/external/lapack/cheev.f | 220 + costa/native/external/lapack/cheevd.f | 296 ++ costa/native/external/lapack/cheevr.f | 522 ++ costa/native/external/lapack/cheevx.f | 426 ++ costa/native/external/lapack/chegs2.f | 225 + costa/native/external/lapack/chegst.f | 260 + costa/native/external/lapack/chegv.f | 229 + costa/native/external/lapack/chegvd.f | 297 ++ costa/native/external/lapack/chegvx.f | 329 ++ costa/native/external/lapack/cherfs.f | 339 ++ costa/native/external/lapack/chesv.f | 171 + costa/native/external/lapack/chesvx.f | 299 ++ costa/native/external/lapack/chetd2.f | 259 + costa/native/external/lapack/chetf2.f | 544 +++ costa/native/external/lapack/chetrd.f | 297 ++ costa/native/external/lapack/chetrf.f | 282 ++ costa/native/external/lapack/chetri.f | 328 ++ costa/native/external/lapack/chetrs.f | 394 ++ costa/native/external/lapack/chgeqz.f | 734 +++ costa/native/external/lapack/chpcon.f | 155 + costa/native/external/lapack/chpev.f | 197 + costa/native/external/lapack/chpevd.f | 280 ++ costa/native/external/lapack/chpevx.f | 384 ++ costa/native/external/lapack/chpgst.f | 216 + costa/native/external/lapack/chpgv.f | 197 + costa/native/external/lapack/chpgvd.f | 291 ++ costa/native/external/lapack/chpgvx.f | 284 ++ costa/native/external/lapack/chprfs.f | 337 ++ costa/native/external/lapack/chpsv.f | 149 + costa/native/external/lapack/chpsvx.f | 279 ++ costa/native/external/lapack/chptrd.f | 238 + costa/native/external/lapack/chptrf.f | 581 +++ costa/native/external/lapack/chptri.f | 344 ++ costa/native/external/lapack/chptrs.f | 402 ++ costa/native/external/lapack/chsein.f | 351 ++ costa/native/external/lapack/chseqr.f | 474 ++ costa/native/external/lapack/clabrd.f | 329 ++ costa/native/external/lapack/clacgv.f | 61 + costa/native/external/lapack/clacon.f | 211 + costa/native/external/lapack/clacp2.f | 92 + costa/native/external/lapack/clacpy.f | 91 + costa/native/external/lapack/clacrm.f | 111 + costa/native/external/lapack/clacrt.f | 91 + costa/native/external/lapack/cladiv.f | 47 + costa/native/external/lapack/claed0.f | 289 ++ costa/native/external/lapack/claed7.f | 267 + costa/native/external/lapack/claed8.f | 364 ++ costa/native/external/lapack/claein.f | 264 + costa/native/external/lapack/claesy.f | 153 + costa/native/external/lapack/claev2.f | 96 + costa/native/external/lapack/clags2.f | 305 ++ costa/native/external/lapack/clagtm.f | 234 + costa/native/external/lapack/clahef.f | 648 +++ costa/native/external/lapack/clahqr.f | 383 ++ costa/native/external/lapack/clahrd.f | 212 + costa/native/external/lapack/claic1.f | 296 ++ costa/native/external/lapack/clals0.f | 434 ++ costa/native/external/lapack/clalsa.f | 504 ++ costa/native/external/lapack/clalsd.f | 597 +++ costa/native/external/lapack/clangb.f | 155 + costa/native/external/lapack/clange.f | 146 + costa/native/external/lapack/clangt.f | 142 + costa/native/external/lapack/clanhb.f | 202 + costa/native/external/lapack/clanhe.f | 188 + costa/native/external/lapack/clanhp.f | 202 + costa/native/external/lapack/clanhs.f | 143 + costa/native/external/lapack/clanht.f | 126 + costa/native/external/lapack/clansb.f | 188 + costa/native/external/lapack/clansp.f | 207 + costa/native/external/lapack/clansy.f | 175 + costa/native/external/lapack/clantb.f | 286 ++ costa/native/external/lapack/clantp.f | 287 ++ costa/native/external/lapack/clantr.f | 278 ++ costa/native/external/lapack/clapll.f | 104 + costa/native/external/lapack/clapmt.f | 135 + costa/native/external/lapack/claqgb.f | 170 + costa/native/external/lapack/claqge.f | 156 + costa/native/external/lapack/claqhb.f | 152 + costa/native/external/lapack/claqhe.f | 148 + costa/native/external/lapack/claqhp.f | 147 + costa/native/external/lapack/claqp2.f | 170 + costa/native/external/lapack/claqps.f | 260 + costa/native/external/lapack/claqsb.f | 150 + costa/native/external/lapack/claqsp.f | 142 + costa/native/external/lapack/claqsy.f | 143 + costa/native/external/lapack/clar1v.f | 328 ++ costa/native/external/lapack/clar2v.f | 98 + costa/native/external/lapack/clarcm.f | 111 + costa/native/external/lapack/clarf.f | 121 + costa/native/external/lapack/clarfb.f | 609 +++ costa/native/external/lapack/clarfg.f | 146 + costa/native/external/lapack/clarft.f | 225 + costa/native/external/lapack/clarfx.f | 641 +++ costa/native/external/lapack/clargv.f | 225 + costa/native/external/lapack/clarnv.f | 131 + costa/native/external/lapack/clarrv.f | 433 ++ costa/native/external/lapack/clartg.f | 193 + costa/native/external/lapack/clartv.f | 79 + costa/native/external/lapack/clarz.f | 158 + costa/native/external/lapack/clarzb.f | 235 + costa/native/external/lapack/clarzt.f | 187 + costa/native/external/lapack/clascl.f | 268 + costa/native/external/lapack/claset.f | 115 + costa/native/external/lapack/clasr.f | 325 ++ costa/native/external/lapack/classq.f | 102 + costa/native/external/lapack/claswp.f | 120 + costa/native/external/lapack/clasyf.f | 598 +++ costa/native/external/lapack/clatbs.f | 909 ++++ costa/native/external/lapack/clatdf.f | 242 + costa/native/external/lapack/clatps.f | 895 ++++ costa/native/external/lapack/clatrd.f | 280 ++ costa/native/external/lapack/clatrs.f | 880 ++++ costa/native/external/lapack/clatrz.f | 134 + costa/native/external/lapack/clatzm.f | 147 + costa/native/external/lapack/clauu2.f | 144 + costa/native/external/lapack/clauum.f | 161 + costa/native/external/lapack/cpbcon.f | 194 + costa/native/external/lapack/cpbequ.f | 168 + costa/native/external/lapack/cpbrfs.f | 342 ++ costa/native/external/lapack/cpbstf.f | 264 + costa/native/external/lapack/cpbsv.f | 152 + costa/native/external/lapack/cpbsvx.f | 423 ++ costa/native/external/lapack/cpbtf2.f | 201 + costa/native/external/lapack/cpbtrf.f | 372 ++ costa/native/external/lapack/cpbtrs.f | 146 + costa/native/external/lapack/cpocon.f | 180 + costa/native/external/lapack/cpoequ.f | 138 + costa/native/external/lapack/cporfs.f | 333 ++ costa/native/external/lapack/cposv.f | 122 + costa/native/external/lapack/cposvx.f | 378 ++ costa/native/external/lapack/cpotf2.f | 175 + costa/native/external/lapack/cpotrf.f | 187 + costa/native/external/lapack/cpotri.f | 97 + costa/native/external/lapack/cpotrs.f | 133 + costa/native/external/lapack/cppcon.f | 179 + costa/native/external/lapack/cppequ.f | 170 + costa/native/external/lapack/cpprfs.f | 331 ++ costa/native/external/lapack/cppsv.f | 134 + costa/native/external/lapack/cppsvx.f | 383 ++ costa/native/external/lapack/cpptrf.f | 179 + costa/native/external/lapack/cpptri.f | 131 + costa/native/external/lapack/cpptrs.f | 135 + costa/native/external/lapack/cptcon.f | 151 + costa/native/external/lapack/cpteqr.f | 191 + costa/native/external/lapack/cptrfs.f | 367 ++ costa/native/external/lapack/cptsv.f | 101 + costa/native/external/lapack/cptsvx.f | 238 + costa/native/external/lapack/cpttrf.f | 169 + costa/native/external/lapack/cpttrs.f | 136 + costa/native/external/lapack/cptts2.f | 177 + costa/native/external/lapack/crot.f | 92 + costa/native/external/lapack/cspcon.f | 155 + costa/native/external/lapack/cspmv.f | 265 + costa/native/external/lapack/cspr.f | 214 + costa/native/external/lapack/csprfs.f | 336 ++ costa/native/external/lapack/cspsv.f | 149 + costa/native/external/lapack/cspsvx.f | 279 ++ costa/native/external/lapack/csptrf.f | 556 +++ costa/native/external/lapack/csptri.f | 338 ++ costa/native/external/lapack/csptrs.f | 378 ++ costa/native/external/lapack/csrot.f | 53 + costa/native/external/lapack/csrscl.f | 115 + costa/native/external/lapack/cstedc.f | 390 ++ costa/native/external/lapack/cstegr.f | 405 ++ costa/native/external/lapack/cstein.f | 377 ++ costa/native/external/lapack/csteqr.f | 504 ++ costa/native/external/lapack/csycon.f | 159 + costa/native/external/lapack/csymv.f | 265 + costa/native/external/lapack/csyr.f | 199 + costa/native/external/lapack/csyrfs.f | 339 ++ costa/native/external/lapack/csysv.f | 171 + costa/native/external/lapack/csysvx.f | 299 ++ costa/native/external/lapack/csytf2.f | 515 ++ costa/native/external/lapack/csytrf.f | 287 ++ costa/native/external/lapack/csytri.f | 314 ++ costa/native/external/lapack/csytrs.f | 370 ++ costa/native/external/lapack/ctbcon.f | 205 + costa/native/external/lapack/ctbrfs.f | 393 ++ costa/native/external/lapack/ctbtrs.f | 163 + costa/native/external/lapack/ctgevc.f | 632 +++ costa/native/external/lapack/ctgex2.f | 264 + costa/native/external/lapack/ctgexc.f | 207 + costa/native/external/lapack/ctgsen.f | 642 +++ costa/native/external/lapack/ctgsja.f | 526 ++ costa/native/external/lapack/ctgsna.f | 402 ++ costa/native/external/lapack/ctgsy2.f | 357 ++ costa/native/external/lapack/ctgsyl.f | 548 +++ costa/native/external/lapack/ctpcon.f | 194 + costa/native/external/lapack/ctprfs.f | 387 ++ costa/native/external/lapack/ctptri.f | 177 + costa/native/external/lapack/ctptrs.f | 154 + costa/native/external/lapack/ctrcon.f | 200 + costa/native/external/lapack/ctrevc.f | 390 ++ costa/native/external/lapack/ctrexc.f | 162 + costa/native/external/lapack/ctrrfs.f | 378 ++ costa/native/external/lapack/ctrsen.f | 358 ++ costa/native/external/lapack/ctrsna.f | 354 ++ costa/native/external/lapack/ctrsyl.f | 368 ++ costa/native/external/lapack/ctrti2.f | 147 + costa/native/external/lapack/ctrtri.f | 178 + costa/native/external/lapack/ctrtrs.f | 149 + costa/native/external/lapack/ctzrqf.f | 174 + costa/native/external/lapack/ctzrzf.f | 241 + costa/native/external/lapack/cung2l.f | 129 + costa/native/external/lapack/cung2r.f | 131 + costa/native/external/lapack/cungbr.f | 246 + costa/native/external/lapack/cunghr.f | 166 + costa/native/external/lapack/cungl2.f | 137 + costa/native/external/lapack/cunglq.f | 216 + costa/native/external/lapack/cungql.f | 214 + costa/native/external/lapack/cungqr.f | 217 + costa/native/external/lapack/cungr2.f | 135 + costa/native/external/lapack/cungrq.f | 215 + costa/native/external/lapack/cungtr.f | 185 + costa/native/external/lapack/cunm2l.f | 197 + costa/native/external/lapack/cunm2r.f | 202 + costa/native/external/lapack/cunmbr.f | 282 ++ costa/native/external/lapack/cunmhr.f | 203 + costa/native/external/lapack/cunml2.f | 206 + costa/native/external/lapack/cunmlq.f | 269 ++ costa/native/external/lapack/cunmql.f | 258 + costa/native/external/lapack/cunmqr.f | 262 + costa/native/external/lapack/cunmr2.f | 199 + costa/native/external/lapack/cunmr3.f | 213 + costa/native/external/lapack/cunmrq.f | 265 + costa/native/external/lapack/cunmrz.f | 294 ++ costa/native/external/lapack/cunmtr.f | 224 + costa/native/external/lapack/cupgtr.f | 162 + costa/native/external/lapack/cupmtr.f | 268 + costa/native/external/lapack/dbdsdc.f | 427 ++ costa/native/external/lapack/dbdsqr.f | 733 +++ costa/native/external/lapack/ddisna.f | 180 + costa/native/external/lapack/dgbbrd.f | 444 ++ costa/native/external/lapack/dgbcon.f | 222 + costa/native/external/lapack/dgbequ.f | 240 + costa/native/external/lapack/dgbrfs.f | 351 ++ costa/native/external/lapack/dgbsv.f | 143 + costa/native/external/lapack/dgbsvx.f | 517 ++ costa/native/external/lapack/dgbtf2.f | 203 + costa/native/external/lapack/dgbtrf.f | 442 ++ costa/native/external/lapack/dgbtrs.f | 187 + costa/native/external/lapack/dgebak.f | 189 + costa/native/external/lapack/dgebal.f | 323 ++ costa/native/external/lapack/dgebd2.f | 238 + costa/native/external/lapack/dgebrd.f | 269 ++ costa/native/external/lapack/dgecon.f | 181 + costa/native/external/lapack/dgeequ.f | 226 + costa/native/external/lapack/dgees.f | 431 ++ costa/native/external/lapack/dgeesx.f | 502 ++ costa/native/external/lapack/dgeev.f | 409 ++ costa/native/external/lapack/dgeevx.f | 544 +++ costa/native/external/lapack/dgegs.f | 470 ++ costa/native/external/lapack/dgegv.f | 641 +++ costa/native/external/lapack/dgehd2.f | 150 + costa/native/external/lapack/dgehrd.f | 255 + costa/native/external/lapack/dgelq2.f | 122 + costa/native/external/lapack/dgelqf.f | 196 + costa/native/external/lapack/dgels.f | 403 ++ costa/native/external/lapack/dgelsd.f | 529 ++ costa/native/external/lapack/dgelss.f | 613 +++ costa/native/external/lapack/dgelsx.f | 350 ++ costa/native/external/lapack/dgelsy.f | 379 ++ costa/native/external/lapack/dgeql2.f | 123 + costa/native/external/lapack/dgeqlf.f | 205 + costa/native/external/lapack/dgeqp3.f | 279 ++ costa/native/external/lapack/dgeqpf.f | 222 + costa/native/external/lapack/dgeqr2.f | 122 + costa/native/external/lapack/dgeqrf.f | 197 + costa/native/external/lapack/dgerfs.f | 332 ++ costa/native/external/lapack/dgerq2.f | 123 + costa/native/external/lapack/dgerqf.f | 205 + costa/native/external/lapack/dgesc2.f | 133 + costa/native/external/lapack/dgesdd.f | 1335 +++++ costa/native/external/lapack/dgesv.f | 108 + costa/native/external/lapack/dgesvd.f | 3417 +++++++++++++ costa/native/external/lapack/dgesvx.f | 482 ++ costa/native/external/lapack/dgetc2.f | 147 + costa/native/external/lapack/dgetf2.f | 135 + costa/native/external/lapack/dgetrf.f | 160 + costa/native/external/lapack/dgetri.f | 193 + costa/native/external/lapack/dgetrs.f | 150 + costa/native/external/lapack/dggbak.f | 216 + costa/native/external/lapack/dggbal.f | 461 ++ costa/native/external/lapack/dgges.f | 550 +++ costa/native/external/lapack/dggesx.f | 640 +++ costa/native/external/lapack/dggev.f | 481 ++ costa/native/external/lapack/dggevx.f | 698 +++ costa/native/external/lapack/dggglm.f | 212 + costa/native/external/lapack/dgghrd.f | 253 + costa/native/external/lapack/dgglse.f | 217 + costa/native/external/lapack/dggqrf.f | 212 + costa/native/external/lapack/dggrqf.f | 212 + costa/native/external/lapack/dggsvd.f | 336 ++ costa/native/external/lapack/dggsvp.f | 394 ++ costa/native/external/lapack/dgtcon.f | 166 + costa/native/external/lapack/dgtrfs.f | 357 ++ costa/native/external/lapack/dgtsv.f | 263 + costa/native/external/lapack/dgtsvx.f | 293 ++ costa/native/external/lapack/dgttrf.f | 169 + costa/native/external/lapack/dgttrs.f | 141 + costa/native/external/lapack/dgtts2.f | 197 + costa/native/external/lapack/dhgeqz.f | 1243 +++++ costa/native/external/lapack/dhsein.f | 412 ++ costa/native/external/lapack/dhseqr.f | 467 ++ costa/native/external/lapack/dlabad.f | 56 + costa/native/external/lapack/dlabrd.f | 291 ++ costa/native/external/lapack/dlacon.f | 204 + costa/native/external/lapack/dlacpy.f | 88 + costa/native/external/lapack/dladiv.f | 63 + costa/native/external/lapack/dlae2.f | 124 + costa/native/external/lapack/dlaebz.f | 552 +++ costa/native/external/lapack/dlaed0.f | 350 ++ costa/native/external/lapack/dlaed1.f | 196 + costa/native/external/lapack/dlaed2.f | 435 ++ costa/native/external/lapack/dlaed3.f | 265 + costa/native/external/lapack/dlaed4.f | 846 ++++ costa/native/external/lapack/dlaed5.f | 125 + costa/native/external/lapack/dlaed6.f | 299 ++ costa/native/external/lapack/dlaed7.f | 288 ++ costa/native/external/lapack/dlaed8.f | 400 ++ costa/native/external/lapack/dlaed9.f | 206 + costa/native/external/lapack/dlaeda.f | 218 + costa/native/external/lapack/dlaein.f | 532 ++ costa/native/external/lapack/dlaev2.f | 170 + costa/native/external/lapack/dlaexc.f | 355 ++ costa/native/external/lapack/dlag2.f | 301 ++ costa/native/external/lapack/dlags2.f | 270 ++ costa/native/external/lapack/dlagtf.f | 191 + costa/native/external/lapack/dlagtm.f | 191 + costa/native/external/lapack/dlagts.f | 305 ++ costa/native/external/lapack/dlagv2.f | 290 ++ costa/native/external/lapack/dlahqr.f | 435 ++ costa/native/external/lapack/dlahrd.f | 206 + costa/native/external/lapack/dlaic1.f | 293 ++ costa/native/external/lapack/dlaln2.f | 508 ++ costa/native/external/lapack/dlals0.f | 375 ++ costa/native/external/lapack/dlalsa.f | 363 ++ costa/native/external/lapack/dlalsd.f | 433 ++ costa/native/external/lapack/dlamch.f | 857 ++++ costa/native/external/lapack/dlamrg.f | 104 + costa/native/external/lapack/dlangb.f | 155 + costa/native/external/lapack/dlange.f | 145 + costa/native/external/lapack/dlangt.f | 142 + costa/native/external/lapack/dlanhs.f | 142 + costa/native/external/lapack/dlansb.f | 187 + costa/native/external/lapack/dlansp.f | 197 + costa/native/external/lapack/dlanst.f | 125 + costa/native/external/lapack/dlansy.f | 174 + costa/native/external/lapack/dlantb.f | 285 ++ costa/native/external/lapack/dlantp.f | 286 ++ costa/native/external/lapack/dlantr.f | 277 ++ costa/native/external/lapack/dlanv2.f | 206 + costa/native/external/lapack/dlapll.f | 100 + costa/native/external/lapack/dlapmt.f | 135 + costa/native/external/lapack/dlapy2.f | 54 + costa/native/external/lapack/dlapy3.f | 54 + costa/native/external/lapack/dlaqgb.f | 169 + costa/native/external/lapack/dlaqge.f | 155 + costa/native/external/lapack/dlaqp2.f | 166 + costa/native/external/lapack/dlaqps.f | 245 + costa/native/external/lapack/dlaqsb.f | 149 + costa/native/external/lapack/dlaqsp.f | 141 + costa/native/external/lapack/dlaqsy.f | 142 + costa/native/external/lapack/dlaqtr.f | 666 +++ costa/native/external/lapack/dlar1v.f | 323 ++ costa/native/external/lapack/dlar2v.f | 87 + costa/native/external/lapack/dlarf.f | 116 + costa/native/external/lapack/dlarfb.f | 588 +++ costa/native/external/lapack/dlarfg.f | 138 + costa/native/external/lapack/dlarft.f | 218 + costa/native/external/lapack/dlarfx.f | 639 +++ costa/native/external/lapack/dlargv.f | 100 + costa/native/external/lapack/dlarnv.f | 116 + costa/native/external/lapack/dlarrb.f | 282 ++ costa/native/external/lapack/dlarre.f | 318 ++ costa/native/external/lapack/dlarrf.f | 150 + costa/native/external/lapack/dlarrv.f | 419 ++ costa/native/external/lapack/dlartg.f | 143 + costa/native/external/lapack/dlartv.f | 77 + costa/native/external/lapack/dlaruv.f | 368 ++ costa/native/external/lapack/dlarz.f | 153 + costa/native/external/lapack/dlarzb.f | 221 + costa/native/external/lapack/dlarzt.f | 185 + costa/native/external/lapack/dlas2.f | 122 + costa/native/external/lapack/dlascl.f | 268 + costa/native/external/lapack/dlasd0.f | 231 + costa/native/external/lapack/dlasd1.f | 233 + costa/native/external/lapack/dlasd2.f | 513 ++ costa/native/external/lapack/dlasd3.f | 359 ++ costa/native/external/lapack/dlasd4.f | 891 ++++ costa/native/external/lapack/dlasd5.f | 164 + costa/native/external/lapack/dlasd6.f | 306 ++ costa/native/external/lapack/dlasd7.f | 445 ++ costa/native/external/lapack/dlasd8.f | 254 + costa/native/external/lapack/dlasd9.f | 256 + costa/native/external/lapack/dlasda.f | 391 ++ costa/native/external/lapack/dlasdq.f | 317 ++ costa/native/external/lapack/dlasdt.f | 106 + costa/native/external/lapack/dlaset.f | 115 + costa/native/external/lapack/dlasq1.f | 149 + costa/native/external/lapack/dlasq2.f | 436 ++ costa/native/external/lapack/dlasq3.f | 298 ++ costa/native/external/lapack/dlasq4.f | 330 ++ costa/native/external/lapack/dlasq5.f | 196 + costa/native/external/lapack/dlasq6.f | 176 + costa/native/external/lapack/dlasr.f | 325 ++ costa/native/external/lapack/dlasrt.f | 244 + costa/native/external/lapack/dlassq.f | 89 + costa/native/external/lapack/dlasv2.f | 250 + costa/native/external/lapack/dlaswp.f | 120 + costa/native/external/lapack/dlasy2.f | 382 ++ costa/native/external/lapack/dlasyf.f | 588 +++ costa/native/external/lapack/dlatbs.f | 724 +++ costa/native/external/lapack/dlatdf.f | 238 + costa/native/external/lapack/dlatps.f | 713 +++ costa/native/external/lapack/dlatrd.f | 259 + costa/native/external/lapack/dlatrs.f | 702 +++ costa/native/external/lapack/dlatrz.f | 128 + costa/native/external/lapack/dlatzm.f | 143 + costa/native/external/lapack/dlauu2.f | 136 + costa/native/external/lapack/dlauum.f | 156 + costa/native/external/lapack/dopgtr.f | 161 + costa/native/external/lapack/dopmtr.f | 258 + costa/native/external/lapack/dorg2l.f | 128 + costa/native/external/lapack/dorg2r.f | 130 + costa/native/external/lapack/dorgbr.f | 245 + costa/native/external/lapack/dorghr.f | 165 + costa/native/external/lapack/dorgl2.f | 134 + costa/native/external/lapack/dorglq.f | 216 + costa/native/external/lapack/dorgql.f | 214 + costa/native/external/lapack/dorgqr.f | 217 + costa/native/external/lapack/dorgr2.f | 132 + costa/native/external/lapack/dorgrq.f | 214 + costa/native/external/lapack/dorgtr.f | 184 + costa/native/external/lapack/dorm2l.f | 194 + costa/native/external/lapack/dorm2r.f | 198 + costa/native/external/lapack/dormbr.f | 282 ++ costa/native/external/lapack/dormhr.f | 202 + costa/native/external/lapack/dorml2.f | 198 + costa/native/external/lapack/dormlq.f | 268 + costa/native/external/lapack/dormql.f | 257 + costa/native/external/lapack/dormqr.f | 261 + costa/native/external/lapack/dormr2.f | 194 + costa/native/external/lapack/dormr3.f | 207 + costa/native/external/lapack/dormrq.f | 264 + costa/native/external/lapack/dormrz.f | 288 ++ costa/native/external/lapack/dormtr.f | 223 + costa/native/external/lapack/dpbcon.f | 188 + costa/native/external/lapack/dpbequ.f | 167 + costa/native/external/lapack/dpbrfs.f | 337 ++ costa/native/external/lapack/dpbstf.f | 251 + costa/native/external/lapack/dpbsv.f | 152 + costa/native/external/lapack/dpbsvx.f | 424 ++ costa/native/external/lapack/dpbtf2.f | 195 + costa/native/external/lapack/dpbtrf.f | 365 ++ costa/native/external/lapack/dpbtrs.f | 146 + costa/native/external/lapack/dpocon.f | 173 + costa/native/external/lapack/dpoequ.f | 137 + costa/native/external/lapack/dporfs.f | 327 ++ costa/native/external/lapack/dposv.f | 122 + costa/native/external/lapack/dposvx.f | 379 ++ costa/native/external/lapack/dpotf2.f | 168 + costa/native/external/lapack/dpotrf.f | 184 + costa/native/external/lapack/dpotri.f | 97 + costa/native/external/lapack/dpotrs.f | 133 + costa/native/external/lapack/dppcon.f | 172 + costa/native/external/lapack/dppequ.f | 169 + costa/native/external/lapack/dpprfs.f | 324 ++ costa/native/external/lapack/dppsv.f | 134 + costa/native/external/lapack/dppsvx.f | 383 ++ costa/native/external/lapack/dpptrf.f | 178 + costa/native/external/lapack/dpptri.f | 129 + costa/native/external/lapack/dpptrs.f | 135 + costa/native/external/lapack/dptcon.f | 150 + costa/native/external/lapack/dpteqr.f | 190 + costa/native/external/lapack/dptrfs.f | 302 ++ costa/native/external/lapack/dptsv.f | 100 + costa/native/external/lapack/dptsvx.f | 235 + costa/native/external/lapack/dpttrf.f | 153 + costa/native/external/lapack/dpttrs.f | 115 + costa/native/external/lapack/dptts2.f | 94 + costa/native/external/lapack/drscl.f | 115 + costa/native/external/lapack/dsbev.f | 206 + costa/native/external/lapack/dsbevd.f | 265 + costa/native/external/lapack/dsbevx.f | 411 ++ costa/native/external/lapack/dsbgst.f | 1346 ++++++ costa/native/external/lapack/dsbgv.f | 189 + costa/native/external/lapack/dsbgvd.f | 270 ++ costa/native/external/lapack/dsbgvx.f | 371 ++ costa/native/external/lapack/dsbtrd.f | 553 +++ costa/native/external/lapack/dsecnd.f | 34 + costa/native/external/lapack/dspcon.f | 158 + costa/native/external/lapack/dspev.f | 188 + costa/native/external/lapack/dspevd.f | 249 + costa/native/external/lapack/dspevx.f | 377 ++ costa/native/external/lapack/dspgst.f | 209 + costa/native/external/lapack/dspgv.f | 196 + costa/native/external/lapack/dspgvd.f | 281 ++ costa/native/external/lapack/dspgvx.f | 286 ++ costa/native/external/lapack/dsprfs.f | 331 ++ costa/native/external/lapack/dspsv.f | 149 + costa/native/external/lapack/dspsvx.f | 279 ++ costa/native/external/lapack/dsptrd.f | 229 + costa/native/external/lapack/dsptrf.f | 548 +++ costa/native/external/lapack/dsptri.f | 335 ++ costa/native/external/lapack/dsptrs.f | 378 ++ costa/native/external/lapack/dstebz.f | 652 +++ costa/native/external/lapack/dstedc.f | 397 ++ costa/native/external/lapack/dstegr.f | 400 ++ costa/native/external/lapack/dstein.f | 362 ++ costa/native/external/lapack/dsteqr.f | 501 ++ costa/native/external/lapack/dsterf.f | 365 ++ costa/native/external/lapack/dstev.f | 165 + costa/native/external/lapack/dstevd.f | 217 + costa/native/external/lapack/dstevr.f | 434 ++ costa/native/external/lapack/dstevx.f | 346 ++ costa/native/external/lapack/dsycon.f | 161 + costa/native/external/lapack/dsyev.f | 213 + costa/native/external/lapack/dsyevd.f | 265 + costa/native/external/lapack/dsyevr.f | 497 ++ costa/native/external/lapack/dsyevx.f | 421 ++ costa/native/external/lapack/dsygs2.f | 212 + costa/native/external/lapack/dsygst.f | 250 + costa/native/external/lapack/dsygv.f | 227 + costa/native/external/lapack/dsygvd.f | 277 ++ costa/native/external/lapack/dsygvx.f | 326 ++ costa/native/external/lapack/dsyrfs.f | 335 ++ costa/native/external/lapack/dsysv.f | 171 + costa/native/external/lapack/dsysvx.f | 297 ++ costa/native/external/lapack/dsytd2.f | 249 + costa/native/external/lapack/dsytf2.f | 511 ++ costa/native/external/lapack/dsytrd.f | 295 ++ costa/native/external/lapack/dsytrf.f | 288 ++ costa/native/external/lapack/dsytri.f | 313 ++ costa/native/external/lapack/dsytrs.f | 370 ++ costa/native/external/lapack/dtbcon.f | 198 + costa/native/external/lapack/dtbrfs.f | 381 ++ costa/native/external/lapack/dtbtrs.f | 163 + costa/native/external/lapack/dtgevc.f | 1146 +++++ costa/native/external/lapack/dtgex2.f | 582 +++ costa/native/external/lapack/dtgexc.f | 434 ++ costa/native/external/lapack/dtgsen.f | 718 +++ costa/native/external/lapack/dtgsja.f | 516 ++ costa/native/external/lapack/dtgsna.f | 585 +++ costa/native/external/lapack/dtgsy2.f | 950 ++++ costa/native/external/lapack/dtgsyl.f | 534 ++ costa/native/external/lapack/dtpcon.f | 187 + costa/native/external/lapack/dtprfs.f | 375 ++ costa/native/external/lapack/dtptri.f | 176 + costa/native/external/lapack/dtptrs.f | 154 + costa/native/external/lapack/dtrcon.f | 193 + costa/native/external/lapack/dtrevc.f | 1005 ++++ costa/native/external/lapack/dtrexc.f | 346 ++ costa/native/external/lapack/dtrrfs.f | 371 ++ costa/native/external/lapack/dtrsen.f | 457 ++ costa/native/external/lapack/dtrsna.f | 493 ++ costa/native/external/lapack/dtrsyl.f | 914 ++++ costa/native/external/lapack/dtrti2.f | 147 + costa/native/external/lapack/dtrtri.f | 177 + costa/native/external/lapack/dtrtrs.f | 148 + costa/native/external/lapack/dtzrqf.f | 165 + costa/native/external/lapack/dtzrzf.f | 241 + costa/native/external/lapack/dzsum1.f | 82 + costa/native/external/lapack/icmax1.f | 96 + costa/native/external/lapack/ieeeck.f | 148 + costa/native/external/lapack/ilaenv.f | 547 +++ costa/native/external/lapack/izmax1.f | 96 + costa/native/external/lapack/lsame.f | 87 + costa/native/external/lapack/lsamen.f | 68 + costa/native/external/lapack/sbdsdc.f | 427 ++ costa/native/external/lapack/sbdsqr.f | 733 +++ costa/native/external/lapack/scsum1.f | 82 + costa/native/external/lapack/sdisna.f | 180 + costa/native/external/lapack/second.f | 34 + costa/native/external/lapack/sgbbrd.f | 444 ++ costa/native/external/lapack/sgbcon.f | 222 + costa/native/external/lapack/sgbequ.f | 240 + costa/native/external/lapack/sgbrfs.f | 351 ++ costa/native/external/lapack/sgbsv.f | 143 + costa/native/external/lapack/sgbsvx.f | 517 ++ costa/native/external/lapack/sgbtf2.f | 203 + costa/native/external/lapack/sgbtrf.f | 442 ++ costa/native/external/lapack/sgbtrs.f | 187 + costa/native/external/lapack/sgebak.f | 189 + costa/native/external/lapack/sgebal.f | 323 ++ costa/native/external/lapack/sgebd2.f | 238 + costa/native/external/lapack/sgebrd.f | 269 ++ costa/native/external/lapack/sgecon.f | 181 + costa/native/external/lapack/sgeequ.f | 226 + costa/native/external/lapack/sgees.f | 431 ++ costa/native/external/lapack/sgeesx.f | 502 ++ costa/native/external/lapack/sgeev.f | 410 ++ costa/native/external/lapack/sgeevx.f | 543 +++ costa/native/external/lapack/sgegs.f | 470 ++ costa/native/external/lapack/sgegv.f | 641 +++ costa/native/external/lapack/sgehd2.f | 150 + costa/native/external/lapack/sgehrd.f | 255 + costa/native/external/lapack/sgelq2.f | 122 + costa/native/external/lapack/sgelqf.f | 196 + costa/native/external/lapack/sgels.f | 403 ++ costa/native/external/lapack/sgelsd.f | 530 ++ costa/native/external/lapack/sgelss.f | 613 +++ costa/native/external/lapack/sgelsx.f | 350 ++ costa/native/external/lapack/sgelsy.f | 379 ++ costa/native/external/lapack/sgeql2.f | 123 + costa/native/external/lapack/sgeqlf.f | 205 + costa/native/external/lapack/sgeqp3.f | 279 ++ costa/native/external/lapack/sgeqpf.f | 221 + costa/native/external/lapack/sgeqr2.f | 122 + costa/native/external/lapack/sgeqrf.f | 197 + costa/native/external/lapack/sgerfs.f | 332 ++ costa/native/external/lapack/sgerq2.f | 123 + costa/native/external/lapack/sgerqf.f | 205 + costa/native/external/lapack/sgesc2.f | 133 + costa/native/external/lapack/sgesdd.f | 1335 +++++ costa/native/external/lapack/sgesv.f | 108 + costa/native/external/lapack/sgesvd.f | 3417 +++++++++++++ costa/native/external/lapack/sgesvx.f | 482 ++ costa/native/external/lapack/sgetc2.f | 147 + costa/native/external/lapack/sgetf2.f | 135 + costa/native/external/lapack/sgetrf.f | 160 + costa/native/external/lapack/sgetri.f | 193 + costa/native/external/lapack/sgetrs.f | 150 + costa/native/external/lapack/sggbak.f | 216 + costa/native/external/lapack/sggbal.f | 461 ++ costa/native/external/lapack/sgges.f | 548 +++ costa/native/external/lapack/sggesx.f | 640 +++ costa/native/external/lapack/sggev.f | 482 ++ costa/native/external/lapack/sggevx.f | 698 +++ costa/native/external/lapack/sggglm.f | 212 + costa/native/external/lapack/sgghrd.f | 253 + costa/native/external/lapack/sgglse.f | 217 + costa/native/external/lapack/sggqrf.f | 212 + costa/native/external/lapack/sggrqf.f | 212 + costa/native/external/lapack/sggsvd.f | 336 ++ costa/native/external/lapack/sggsvp.f | 394 ++ costa/native/external/lapack/sgtcon.f | 166 + costa/native/external/lapack/sgtrfs.f | 357 ++ costa/native/external/lapack/sgtsv.f | 263 + costa/native/external/lapack/sgtsvx.f | 293 ++ costa/native/external/lapack/sgttrf.f | 169 + costa/native/external/lapack/sgttrs.f | 141 + costa/native/external/lapack/sgtts2.f | 197 + costa/native/external/lapack/shgeqz.f | 1243 +++++ costa/native/external/lapack/shsein.f | 412 ++ costa/native/external/lapack/shseqr.f | 467 ++ costa/native/external/lapack/slabad.f | 56 + costa/native/external/lapack/slabrd.f | 291 ++ costa/native/external/lapack/slacon.f | 204 + costa/native/external/lapack/slacpy.f | 88 + costa/native/external/lapack/sladiv.f | 63 + costa/native/external/lapack/slae2.f | 124 + costa/native/external/lapack/slaebz.f | 552 +++ costa/native/external/lapack/slaed0.f | 350 ++ costa/native/external/lapack/slaed1.f | 196 + costa/native/external/lapack/slaed2.f | 435 ++ costa/native/external/lapack/slaed3.f | 265 + costa/native/external/lapack/slaed4.f | 846 ++++ costa/native/external/lapack/slaed5.f | 125 + costa/native/external/lapack/slaed6.f | 299 ++ costa/native/external/lapack/slaed7.f | 288 ++ costa/native/external/lapack/slaed8.f | 400 ++ costa/native/external/lapack/slaed9.f | 206 + costa/native/external/lapack/slaeda.f | 218 + costa/native/external/lapack/slaein.f | 532 ++ costa/native/external/lapack/slaev2.f | 170 + costa/native/external/lapack/slaexc.f | 354 ++ costa/native/external/lapack/slag2.f | 301 ++ costa/native/external/lapack/slags2.f | 270 ++ costa/native/external/lapack/slagtf.f | 191 + costa/native/external/lapack/slagtm.f | 191 + costa/native/external/lapack/slagts.f | 305 ++ costa/native/external/lapack/slagv2.f | 290 ++ costa/native/external/lapack/slahqr.f | 435 ++ costa/native/external/lapack/slahrd.f | 206 + costa/native/external/lapack/slaic1.f | 293 ++ costa/native/external/lapack/slaln2.f | 508 ++ costa/native/external/lapack/slals0.f | 375 ++ costa/native/external/lapack/slalsa.f | 363 ++ costa/native/external/lapack/slalsd.f | 433 ++ costa/native/external/lapack/slamch.f | 857 ++++ costa/native/external/lapack/slamrg.f | 104 + costa/native/external/lapack/slangb.f | 155 + costa/native/external/lapack/slange.f | 145 + costa/native/external/lapack/slangt.f | 142 + costa/native/external/lapack/slanhs.f | 142 + costa/native/external/lapack/slansb.f | 187 + costa/native/external/lapack/slansp.f | 197 + costa/native/external/lapack/slanst.f | 125 + costa/native/external/lapack/slansy.f | 174 + costa/native/external/lapack/slantb.f | 285 ++ costa/native/external/lapack/slantp.f | 286 ++ costa/native/external/lapack/slantr.f | 277 ++ costa/native/external/lapack/slanv2.f | 206 + costa/native/external/lapack/slapll.f | 100 + costa/native/external/lapack/slapmt.f | 135 + costa/native/external/lapack/slapy2.f | 54 + costa/native/external/lapack/slapy3.f | 54 + costa/native/external/lapack/slaqgb.f | 169 + costa/native/external/lapack/slaqge.f | 155 + costa/native/external/lapack/slaqp2.f | 166 + costa/native/external/lapack/slaqps.f | 245 + costa/native/external/lapack/slaqsb.f | 149 + costa/native/external/lapack/slaqsp.f | 141 + costa/native/external/lapack/slaqsy.f | 142 + costa/native/external/lapack/slaqtr.f | 666 +++ costa/native/external/lapack/slar1v.f | 323 ++ costa/native/external/lapack/slar2v.f | 87 + costa/native/external/lapack/slarf.f | 116 + costa/native/external/lapack/slarfb.f | 588 +++ costa/native/external/lapack/slarfg.f | 138 + costa/native/external/lapack/slarft.f | 218 + costa/native/external/lapack/slarfx.f | 638 +++ costa/native/external/lapack/slargv.f | 100 + costa/native/external/lapack/slarnv.f | 116 + costa/native/external/lapack/slarrb.f | 282 ++ costa/native/external/lapack/slarre.f | 318 ++ costa/native/external/lapack/slarrf.f | 150 + costa/native/external/lapack/slarrv.f | 417 ++ costa/native/external/lapack/slartg.f | 143 + costa/native/external/lapack/slartv.f | 77 + costa/native/external/lapack/slaruv.f | 368 ++ costa/native/external/lapack/slarz.f | 153 + costa/native/external/lapack/slarzb.f | 221 + costa/native/external/lapack/slarzt.f | 185 + costa/native/external/lapack/slas2.f | 122 + costa/native/external/lapack/slascl.f | 268 + costa/native/external/lapack/slasd0.f | 231 + costa/native/external/lapack/slasd1.f | 233 + costa/native/external/lapack/slasd2.f | 513 ++ costa/native/external/lapack/slasd3.f | 359 ++ costa/native/external/lapack/slasd4.f | 891 ++++ costa/native/external/lapack/slasd5.f | 164 + costa/native/external/lapack/slasd6.f | 306 ++ costa/native/external/lapack/slasd7.f | 445 ++ costa/native/external/lapack/slasd8.f | 254 + costa/native/external/lapack/slasd9.f | 256 + costa/native/external/lapack/slasda.f | 391 ++ costa/native/external/lapack/slasdq.f | 317 ++ costa/native/external/lapack/slasdt.f | 106 + costa/native/external/lapack/slaset.f | 115 + costa/native/external/lapack/slasq1.f | 149 + costa/native/external/lapack/slasq2.f | 436 ++ costa/native/external/lapack/slasq3.f | 298 ++ costa/native/external/lapack/slasq4.f | 330 ++ costa/native/external/lapack/slasq5.f | 196 + costa/native/external/lapack/slasq6.f | 176 + costa/native/external/lapack/slasr.f | 325 ++ costa/native/external/lapack/slasrt.f | 244 + costa/native/external/lapack/slassq.f | 89 + costa/native/external/lapack/slasv2.f | 250 + costa/native/external/lapack/slaswp.f | 120 + costa/native/external/lapack/slasy2.f | 382 ++ costa/native/external/lapack/slasyf.f | 588 +++ costa/native/external/lapack/slatbs.f | 724 +++ costa/native/external/lapack/slatdf.f | 238 + costa/native/external/lapack/slatps.f | 713 +++ costa/native/external/lapack/slatrd.f | 259 + costa/native/external/lapack/slatrs.f | 702 +++ costa/native/external/lapack/slatrz.f | 128 + costa/native/external/lapack/slatzm.f | 143 + costa/native/external/lapack/slauu2.f | 136 + costa/native/external/lapack/slauum.f | 156 + costa/native/external/lapack/sopgtr.f | 161 + costa/native/external/lapack/sopmtr.f | 258 + costa/native/external/lapack/sorg2l.f | 128 + costa/native/external/lapack/sorg2r.f | 130 + costa/native/external/lapack/sorgbr.f | 245 + costa/native/external/lapack/sorghr.f | 165 + costa/native/external/lapack/sorgl2.f | 134 + costa/native/external/lapack/sorglq.f | 216 + costa/native/external/lapack/sorgql.f | 214 + costa/native/external/lapack/sorgqr.f | 217 + costa/native/external/lapack/sorgr2.f | 132 + costa/native/external/lapack/sorgrq.f | 214 + costa/native/external/lapack/sorgtr.f | 184 + costa/native/external/lapack/sorm2l.f | 194 + costa/native/external/lapack/sorm2r.f | 198 + costa/native/external/lapack/sormbr.f | 283 ++ costa/native/external/lapack/sormhr.f | 203 + costa/native/external/lapack/sorml2.f | 198 + costa/native/external/lapack/sormlq.f | 269 ++ costa/native/external/lapack/sormql.f | 258 + costa/native/external/lapack/sormqr.f | 262 + costa/native/external/lapack/sormr2.f | 194 + costa/native/external/lapack/sormr3.f | 207 + costa/native/external/lapack/sormrq.f | 265 + costa/native/external/lapack/sormrz.f | 289 ++ costa/native/external/lapack/sormtr.f | 224 + costa/native/external/lapack/spbcon.f | 188 + costa/native/external/lapack/spbequ.f | 167 + costa/native/external/lapack/spbrfs.f | 337 ++ costa/native/external/lapack/spbstf.f | 251 + costa/native/external/lapack/spbsv.f | 152 + costa/native/external/lapack/spbsvx.f | 424 ++ costa/native/external/lapack/spbtf2.f | 195 + costa/native/external/lapack/spbtrf.f | 365 ++ costa/native/external/lapack/spbtrs.f | 146 + costa/native/external/lapack/spocon.f | 173 + costa/native/external/lapack/spoequ.f | 137 + costa/native/external/lapack/sporfs.f | 327 ++ costa/native/external/lapack/sposv.f | 122 + costa/native/external/lapack/sposvx.f | 379 ++ costa/native/external/lapack/spotf2.f | 168 + costa/native/external/lapack/spotrf.f | 184 + costa/native/external/lapack/spotri.f | 97 + costa/native/external/lapack/spotrs.f | 133 + costa/native/external/lapack/sppcon.f | 172 + costa/native/external/lapack/sppequ.f | 169 + costa/native/external/lapack/spprfs.f | 324 ++ costa/native/external/lapack/sppsv.f | 134 + costa/native/external/lapack/sppsvx.f | 383 ++ costa/native/external/lapack/spptrf.f | 178 + costa/native/external/lapack/spptri.f | 129 + costa/native/external/lapack/spptrs.f | 135 + costa/native/external/lapack/sptcon.f | 150 + costa/native/external/lapack/spteqr.f | 190 + costa/native/external/lapack/sptrfs.f | 302 ++ costa/native/external/lapack/sptsv.f | 100 + costa/native/external/lapack/sptsvx.f | 235 + costa/native/external/lapack/spttrf.f | 153 + costa/native/external/lapack/spttrs.f | 115 + costa/native/external/lapack/sptts2.f | 94 + costa/native/external/lapack/srscl.f | 115 + costa/native/external/lapack/ssbev.f | 206 + costa/native/external/lapack/ssbevd.f | 265 + costa/native/external/lapack/ssbevx.f | 411 ++ costa/native/external/lapack/ssbgst.f | 1346 ++++++ costa/native/external/lapack/ssbgv.f | 189 + costa/native/external/lapack/ssbgvd.f | 270 ++ costa/native/external/lapack/ssbgvx.f | 371 ++ costa/native/external/lapack/ssbtrd.f | 553 +++ costa/native/external/lapack/sspcon.f | 158 + costa/native/external/lapack/sspev.f | 188 + costa/native/external/lapack/sspevd.f | 249 + costa/native/external/lapack/sspevx.f | 377 ++ costa/native/external/lapack/sspgst.f | 209 + costa/native/external/lapack/sspgv.f | 196 + costa/native/external/lapack/sspgvd.f | 281 ++ costa/native/external/lapack/sspgvx.f | 286 ++ costa/native/external/lapack/ssprfs.f | 331 ++ costa/native/external/lapack/sspsv.f | 149 + costa/native/external/lapack/sspsvx.f | 279 ++ costa/native/external/lapack/ssptrd.f | 228 + costa/native/external/lapack/ssptrf.f | 548 +++ costa/native/external/lapack/ssptri.f | 335 ++ costa/native/external/lapack/ssptrs.f | 378 ++ costa/native/external/lapack/sstebz.f | 651 +++ costa/native/external/lapack/sstedc.f | 397 ++ costa/native/external/lapack/sstegr.f | 400 ++ costa/native/external/lapack/sstein.f | 362 ++ costa/native/external/lapack/ssteqr.f | 501 ++ costa/native/external/lapack/ssterf.f | 365 ++ costa/native/external/lapack/sstev.f | 165 + costa/native/external/lapack/sstevd.f | 217 + costa/native/external/lapack/sstevr.f | 434 ++ costa/native/external/lapack/sstevx.f | 346 ++ costa/native/external/lapack/ssycon.f | 161 + costa/native/external/lapack/ssyev.f | 213 + costa/native/external/lapack/ssyevd.f | 265 + costa/native/external/lapack/ssyevr.f | 497 ++ costa/native/external/lapack/ssyevx.f | 421 ++ costa/native/external/lapack/ssygs2.f | 212 + costa/native/external/lapack/ssygst.f | 250 + costa/native/external/lapack/ssygv.f | 227 + costa/native/external/lapack/ssygvd.f | 277 ++ costa/native/external/lapack/ssygvx.f | 325 ++ costa/native/external/lapack/ssyrfs.f | 335 ++ costa/native/external/lapack/ssysv.f | 171 + costa/native/external/lapack/ssysvx.f | 297 ++ costa/native/external/lapack/ssytd2.f | 248 + costa/native/external/lapack/ssytf2.f | 511 ++ costa/native/external/lapack/ssytrd.f | 295 ++ costa/native/external/lapack/ssytrf.f | 288 ++ costa/native/external/lapack/ssytri.f | 313 ++ costa/native/external/lapack/ssytrs.f | 370 ++ costa/native/external/lapack/stbcon.f | 198 + costa/native/external/lapack/stbrfs.f | 381 ++ costa/native/external/lapack/stbtrs.f | 163 + costa/native/external/lapack/stgevc.f | 1146 +++++ costa/native/external/lapack/stgex2.f | 582 +++ costa/native/external/lapack/stgexc.f | 434 ++ costa/native/external/lapack/stgsen.f | 718 +++ costa/native/external/lapack/stgsja.f | 516 ++ costa/native/external/lapack/stgsna.f | 585 +++ costa/native/external/lapack/stgsy2.f | 950 ++++ costa/native/external/lapack/stgsyl.f | 534 ++ costa/native/external/lapack/stpcon.f | 187 + costa/native/external/lapack/stprfs.f | 375 ++ costa/native/external/lapack/stptri.f | 176 + costa/native/external/lapack/stptrs.f | 154 + costa/native/external/lapack/strcon.f | 193 + costa/native/external/lapack/strevc.f | 1006 ++++ costa/native/external/lapack/strexc.f | 346 ++ costa/native/external/lapack/strrfs.f | 371 ++ costa/native/external/lapack/strsen.f | 457 ++ costa/native/external/lapack/strsna.f | 493 ++ costa/native/external/lapack/strsyl.f | 914 ++++ costa/native/external/lapack/strti2.f | 147 + costa/native/external/lapack/strtri.f | 177 + costa/native/external/lapack/strtrs.f | 148 + costa/native/external/lapack/stzrqf.f | 165 + costa/native/external/lapack/stzrzf.f | 241 + costa/native/external/lapack/xerbla.f | 46 + costa/native/external/lapack/zbdsqr.f | 733 +++ costa/native/external/lapack/zdrot.f | 54 + costa/native/external/lapack/zdrscl.f | 115 + costa/native/external/lapack/zgbbrd.f | 466 ++ costa/native/external/lapack/zgbcon.f | 230 + costa/native/external/lapack/zgbequ.f | 248 + costa/native/external/lapack/zgbrfs.f | 361 ++ costa/native/external/lapack/zgbsv.f | 143 + costa/native/external/lapack/zgbsvx.f | 518 ++ costa/native/external/lapack/zgbtf2.f | 203 + costa/native/external/lapack/zgbtrf.f | 443 ++ costa/native/external/lapack/zgbtrs.f | 215 + costa/native/external/lapack/zgebak.f | 190 + costa/native/external/lapack/zgebal.f | 331 ++ costa/native/external/lapack/zgebd2.f | 249 + costa/native/external/lapack/zgebrd.f | 269 ++ costa/native/external/lapack/zgecon.f | 189 + costa/native/external/lapack/zgeequ.f | 234 + costa/native/external/lapack/zgees.f | 322 ++ costa/native/external/lapack/zgeesx.f | 371 ++ costa/native/external/lapack/zgeev.f | 391 ++ costa/native/external/lapack/zgeevx.f | 521 ++ costa/native/external/lapack/zgegs.f | 442 ++ costa/native/external/lapack/zgegv.f | 591 +++ costa/native/external/lapack/zgehd2.f | 149 + costa/native/external/lapack/zgehrd.f | 254 + costa/native/external/lapack/zgelq2.f | 124 + costa/native/external/lapack/zgelqf.f | 196 + costa/native/external/lapack/zgels.f | 405 ++ costa/native/external/lapack/zgelsd.f | 545 +++ costa/native/external/lapack/zgelss.f | 639 +++ costa/native/external/lapack/zgelsx.f | 358 ++ costa/native/external/lapack/zgelsy.f | 386 ++ costa/native/external/lapack/zgeql2.f | 122 + costa/native/external/lapack/zgeqlf.f | 205 + costa/native/external/lapack/zgeqp3.f | 285 ++ costa/native/external/lapack/zgeqpf.f | 225 + costa/native/external/lapack/zgeqr2.f | 122 + costa/native/external/lapack/zgeqrf.f | 197 + costa/native/external/lapack/zgerfs.f | 341 ++ costa/native/external/lapack/zgerq2.f | 124 + costa/native/external/lapack/zgerqf.f | 205 + costa/native/external/lapack/zgesc2.f | 134 + costa/native/external/lapack/zgesdd.f | 1950 ++++++++ costa/native/external/lapack/zgesv.f | 108 + costa/native/external/lapack/zgesvd.f | 3618 ++++++++++++++ costa/native/external/lapack/zgesvx.f | 484 ++ costa/native/external/lapack/zgetc2.f | 146 + costa/native/external/lapack/zgetf2.f | 136 + costa/native/external/lapack/zgetrf.f | 160 + costa/native/external/lapack/zgetri.f | 194 + costa/native/external/lapack/zgetrs.f | 150 + costa/native/external/lapack/zggbak.f | 216 + costa/native/external/lapack/zggbal.f | 474 ++ costa/native/external/lapack/zgges.f | 475 ++ costa/native/external/lapack/zggesx.f | 544 +++ costa/native/external/lapack/zggev.f | 448 ++ costa/native/external/lapack/zggevx.f | 640 +++ costa/native/external/lapack/zggglm.f | 213 + costa/native/external/lapack/zgghrd.f | 256 + costa/native/external/lapack/zgglse.f | 218 + costa/native/external/lapack/zggqrf.f | 212 + costa/native/external/lapack/zggrqf.f | 212 + costa/native/external/lapack/zggsvd.f | 334 ++ costa/native/external/lapack/zggsvp.f | 403 ++ costa/native/external/lapack/zgtcon.f | 167 + costa/native/external/lapack/zgtrfs.f | 369 ++ costa/native/external/lapack/zgtsv.f | 174 + costa/native/external/lapack/zgtsvx.f | 294 ++ costa/native/external/lapack/zgttrf.f | 175 + costa/native/external/lapack/zgttrs.f | 143 + costa/native/external/lapack/zgtts2.f | 272 ++ costa/native/external/lapack/zhbev.f | 209 + costa/native/external/lapack/zhbevd.f | 298 ++ costa/native/external/lapack/zhbevx.f | 417 ++ costa/native/external/lapack/zhbgst.f | 1378 ++++++ costa/native/external/lapack/zhbgv.f | 192 + costa/native/external/lapack/zhbgvd.f | 295 ++ costa/native/external/lapack/zhbgvx.f | 374 ++ costa/native/external/lapack/zhbtrd.f | 589 +++ costa/native/external/lapack/zhecon.f | 159 + costa/native/external/lapack/zheev.f | 220 + costa/native/external/lapack/zheevd.f | 296 ++ costa/native/external/lapack/zheevr.f | 522 ++ costa/native/external/lapack/zheevx.f | 426 ++ costa/native/external/lapack/zhegs2.f | 225 + costa/native/external/lapack/zhegst.f | 260 + costa/native/external/lapack/zhegv.f | 229 + costa/native/external/lapack/zhegvd.f | 297 ++ costa/native/external/lapack/zhegvx.f | 330 ++ costa/native/external/lapack/zherfs.f | 339 ++ costa/native/external/lapack/zhesv.f | 171 + costa/native/external/lapack/zhesvx.f | 299 ++ costa/native/external/lapack/zhetd2.f | 259 + costa/native/external/lapack/zhetf2.f | 546 +++ costa/native/external/lapack/zhetrd.f | 297 ++ costa/native/external/lapack/zhetrf.f | 282 ++ costa/native/external/lapack/zhetri.f | 328 ++ costa/native/external/lapack/zhetrs.f | 394 ++ costa/native/external/lapack/zhgeqz.f | 735 +++ costa/native/external/lapack/zhpcon.f | 155 + costa/native/external/lapack/zhpev.f | 197 + costa/native/external/lapack/zhpevd.f | 280 ++ costa/native/external/lapack/zhpevx.f | 384 ++ costa/native/external/lapack/zhpgst.f | 216 + costa/native/external/lapack/zhpgv.f | 197 + costa/native/external/lapack/zhpgvd.f | 291 ++ costa/native/external/lapack/zhpgvx.f | 284 ++ costa/native/external/lapack/zhprfs.f | 337 ++ costa/native/external/lapack/zhpsv.f | 149 + costa/native/external/lapack/zhpsvx.f | 279 ++ costa/native/external/lapack/zhptrd.f | 238 + costa/native/external/lapack/zhptrf.f | 582 +++ costa/native/external/lapack/zhptri.f | 344 ++ costa/native/external/lapack/zhptrs.f | 402 ++ costa/native/external/lapack/zhsein.f | 351 ++ costa/native/external/lapack/zhseqr.f | 474 ++ costa/native/external/lapack/zlabrd.f | 329 ++ costa/native/external/lapack/zlacgv.f | 61 + costa/native/external/lapack/zlacon.f | 211 + costa/native/external/lapack/zlacp2.f | 92 + costa/native/external/lapack/zlacpy.f | 91 + costa/native/external/lapack/zlacrm.f | 111 + costa/native/external/lapack/zlacrt.f | 91 + costa/native/external/lapack/zladiv.f | 47 + costa/native/external/lapack/zlaed0.f | 289 ++ costa/native/external/lapack/zlaed7.f | 267 + costa/native/external/lapack/zlaed8.f | 364 ++ costa/native/external/lapack/zlaein.f | 264 + costa/native/external/lapack/zlaesy.f | 153 + costa/native/external/lapack/zlaev2.f | 96 + costa/native/external/lapack/zlags2.f | 309 ++ costa/native/external/lapack/zlagtm.f | 234 + costa/native/external/lapack/zlahef.f | 648 +++ costa/native/external/lapack/zlahqr.f | 384 ++ costa/native/external/lapack/zlahrd.f | 212 + costa/native/external/lapack/zlaic1.f | 296 ++ costa/native/external/lapack/zlals0.f | 434 ++ costa/native/external/lapack/zlalsa.f | 504 ++ costa/native/external/lapack/zlalsd.f | 599 +++ costa/native/external/lapack/zlangb.f | 155 + costa/native/external/lapack/zlange.f | 146 + costa/native/external/lapack/zlangt.f | 142 + costa/native/external/lapack/zlanhb.f | 202 + costa/native/external/lapack/zlanhe.f | 188 + costa/native/external/lapack/zlanhp.f | 202 + costa/native/external/lapack/zlanhs.f | 143 + costa/native/external/lapack/zlanht.f | 126 + costa/native/external/lapack/zlansb.f | 188 + costa/native/external/lapack/zlansp.f | 207 + costa/native/external/lapack/zlansy.f | 175 + costa/native/external/lapack/zlantb.f | 286 ++ costa/native/external/lapack/zlantp.f | 287 ++ costa/native/external/lapack/zlantr.f | 278 ++ costa/native/external/lapack/zlapll.f | 104 + costa/native/external/lapack/zlapmt.f | 135 + costa/native/external/lapack/zlaqgb.f | 170 + costa/native/external/lapack/zlaqge.f | 156 + costa/native/external/lapack/zlaqhb.f | 152 + costa/native/external/lapack/zlaqhe.f | 148 + costa/native/external/lapack/zlaqhp.f | 147 + costa/native/external/lapack/zlaqp2.f | 170 + costa/native/external/lapack/zlaqps.f | 260 + costa/native/external/lapack/zlaqsb.f | 150 + costa/native/external/lapack/zlaqsp.f | 142 + costa/native/external/lapack/zlaqsy.f | 143 + costa/native/external/lapack/zlar1v.f | 328 ++ costa/native/external/lapack/zlar2v.f | 98 + costa/native/external/lapack/zlarcm.f | 111 + costa/native/external/lapack/zlarf.f | 121 + costa/native/external/lapack/zlarfb.f | 609 +++ costa/native/external/lapack/zlarfg.f | 146 + costa/native/external/lapack/zlarft.f | 225 + costa/native/external/lapack/zlarfx.f | 642 +++ costa/native/external/lapack/zlargv.f | 225 + costa/native/external/lapack/zlarnv.f | 131 + costa/native/external/lapack/zlarrv.f | 437 ++ costa/native/external/lapack/zlartg.f | 193 + costa/native/external/lapack/zlartv.f | 79 + costa/native/external/lapack/zlarz.f | 158 + costa/native/external/lapack/zlarzb.f | 235 + costa/native/external/lapack/zlarzt.f | 187 + costa/native/external/lapack/zlascl.f | 268 + costa/native/external/lapack/zlaset.f | 115 + costa/native/external/lapack/zlasr.f | 325 ++ costa/native/external/lapack/zlassq.f | 102 + costa/native/external/lapack/zlaswp.f | 120 + costa/native/external/lapack/zlasyf.f | 598 +++ costa/native/external/lapack/zlatbs.f | 909 ++++ costa/native/external/lapack/zlatdf.f | 242 + costa/native/external/lapack/zlatps.f | 895 ++++ costa/native/external/lapack/zlatrd.f | 280 ++ costa/native/external/lapack/zlatrs.f | 880 ++++ costa/native/external/lapack/zlatrz.f | 134 + costa/native/external/lapack/zlatzm.f | 147 + costa/native/external/lapack/zlauu2.f | 144 + costa/native/external/lapack/zlauum.f | 161 + costa/native/external/lapack/zpbcon.f | 194 + costa/native/external/lapack/zpbequ.f | 168 + costa/native/external/lapack/zpbrfs.f | 342 ++ costa/native/external/lapack/zpbstf.f | 264 + costa/native/external/lapack/zpbsv.f | 152 + costa/native/external/lapack/zpbsvx.f | 423 ++ costa/native/external/lapack/zpbtf2.f | 201 + costa/native/external/lapack/zpbtrf.f | 372 ++ costa/native/external/lapack/zpbtrs.f | 146 + costa/native/external/lapack/zpocon.f | 180 + costa/native/external/lapack/zpoequ.f | 138 + costa/native/external/lapack/zporfs.f | 333 ++ costa/native/external/lapack/zposv.f | 122 + costa/native/external/lapack/zposvx.f | 378 ++ costa/native/external/lapack/zpotf2.f | 175 + costa/native/external/lapack/zpotrf.f | 187 + costa/native/external/lapack/zpotri.f | 97 + costa/native/external/lapack/zpotrs.f | 133 + costa/native/external/lapack/zppcon.f | 179 + costa/native/external/lapack/zppequ.f | 170 + costa/native/external/lapack/zpprfs.f | 331 ++ costa/native/external/lapack/zppsv.f | 134 + costa/native/external/lapack/zppsvx.f | 383 ++ costa/native/external/lapack/zpptrf.f | 179 + costa/native/external/lapack/zpptri.f | 131 + costa/native/external/lapack/zpptrs.f | 135 + costa/native/external/lapack/zptcon.f | 151 + costa/native/external/lapack/zpteqr.f | 191 + costa/native/external/lapack/zptrfs.f | 367 ++ costa/native/external/lapack/zptsv.f | 101 + costa/native/external/lapack/zptsvx.f | 238 + costa/native/external/lapack/zpttrf.f | 169 + costa/native/external/lapack/zpttrs.f | 136 + costa/native/external/lapack/zptts2.f | 177 + costa/native/external/lapack/zrot.f | 92 + costa/native/external/lapack/zspcon.f | 155 + costa/native/external/lapack/zspmv.f | 265 + costa/native/external/lapack/zspr.f | 214 + costa/native/external/lapack/zsprfs.f | 336 ++ costa/native/external/lapack/zspsv.f | 149 + costa/native/external/lapack/zspsvx.f | 279 ++ costa/native/external/lapack/zsptrf.f | 556 +++ costa/native/external/lapack/zsptri.f | 338 ++ costa/native/external/lapack/zsptrs.f | 378 ++ costa/native/external/lapack/zstedc.f | 390 ++ costa/native/external/lapack/zstegr.f | 405 ++ costa/native/external/lapack/zstein.f | 377 ++ costa/native/external/lapack/zsteqr.f | 504 ++ costa/native/external/lapack/zsycon.f | 159 + costa/native/external/lapack/zsymv.f | 265 + costa/native/external/lapack/zsyr.f | 199 + costa/native/external/lapack/zsyrfs.f | 339 ++ costa/native/external/lapack/zsysv.f | 171 + costa/native/external/lapack/zsysvx.f | 299 ++ costa/native/external/lapack/zsytf2.f | 515 ++ costa/native/external/lapack/zsytrf.f | 287 ++ costa/native/external/lapack/zsytri.f | 314 ++ costa/native/external/lapack/zsytrs.f | 370 ++ costa/native/external/lapack/ztbcon.f | 205 + costa/native/external/lapack/ztbrfs.f | 393 ++ costa/native/external/lapack/ztbtrs.f | 163 + costa/native/external/lapack/ztgevc.f | 632 +++ costa/native/external/lapack/ztgex2.f | 268 + costa/native/external/lapack/ztgexc.f | 207 + costa/native/external/lapack/ztgsen.f | 645 +++ costa/native/external/lapack/ztgsja.f | 526 ++ costa/native/external/lapack/ztgsna.f | 402 ++ costa/native/external/lapack/ztgsy2.f | 357 ++ costa/native/external/lapack/ztgsyl.f | 550 +++ costa/native/external/lapack/ztpcon.f | 194 + costa/native/external/lapack/ztprfs.f | 387 ++ costa/native/external/lapack/ztptri.f | 177 + costa/native/external/lapack/ztptrs.f | 154 + costa/native/external/lapack/ztrcon.f | 200 + costa/native/external/lapack/ztrevc.f | 390 ++ costa/native/external/lapack/ztrexc.f | 163 + costa/native/external/lapack/ztrrfs.f | 378 ++ costa/native/external/lapack/ztrsen.f | 358 ++ costa/native/external/lapack/ztrsna.f | 353 ++ costa/native/external/lapack/ztrsyl.f | 368 ++ costa/native/external/lapack/ztrti2.f | 147 + costa/native/external/lapack/ztrtri.f | 178 + costa/native/external/lapack/ztrtrs.f | 149 + costa/native/external/lapack/ztzrqf.f | 174 + costa/native/external/lapack/ztzrzf.f | 241 + costa/native/external/lapack/zung2l.f | 129 + costa/native/external/lapack/zung2r.f | 131 + costa/native/external/lapack/zungbr.f | 246 + costa/native/external/lapack/zunghr.f | 166 + costa/native/external/lapack/zungl2.f | 137 + costa/native/external/lapack/zunglq.f | 216 + costa/native/external/lapack/zungql.f | 214 + costa/native/external/lapack/zungqr.f | 217 + costa/native/external/lapack/zungr2.f | 135 + costa/native/external/lapack/zungrq.f | 215 + costa/native/external/lapack/zungtr.f | 185 + costa/native/external/lapack/zunm2l.f | 197 + costa/native/external/lapack/zunm2r.f | 202 + costa/native/external/lapack/zunmbr.f | 281 ++ costa/native/external/lapack/zunmhr.f | 202 + costa/native/external/lapack/zunml2.f | 206 + costa/native/external/lapack/zunmlq.f | 268 + costa/native/external/lapack/zunmql.f | 257 + costa/native/external/lapack/zunmqr.f | 261 + costa/native/external/lapack/zunmr2.f | 199 + costa/native/external/lapack/zunmr3.f | 213 + costa/native/external/lapack/zunmrq.f | 264 + costa/native/external/lapack/zunmrz.f | 293 ++ costa/native/external/lapack/zunmtr.f | 223 + costa/native/external/lapack/zupgtr.f | 162 + costa/native/external/lapack/zupmtr.f | 268 + 1593 files changed, 468683 insertions(+) create mode 100644 costa/native/cta/cta_util_timing.f90 create mode 100644 costa/native/cta/include/cta.h create mode 100644 costa/native/cta/include/cta_array.h create mode 100644 costa/native/cta/include/cta_bb_modbuild.h create mode 100644 costa/native/cta/include/cta_bb_modbuild_utils.h create mode 100644 costa/native/cta/include/cta_datatypes.h create mode 100644 costa/native/cta/include/cta_datetime.h create mode 100644 costa/native/cta/include/cta_defaults.h create mode 100644 costa/native/cta/include/cta_errors.h create mode 100644 costa/native/cta/include/cta_f77blas.h create mode 100644 costa/native/cta/include/cta_f77lapack.h create mode 100644 costa/native/cta/include/cta_file.h create mode 100644 costa/native/cta/include/cta_flush.h create mode 100644 costa/native/cta/include/cta_functions.h create mode 100644 costa/native/cta/include/cta_handles.h create mode 100644 costa/native/cta/include/cta_initialise.h create mode 100644 costa/native/cta/include/cta_interface.h create mode 100644 costa/native/cta/include/cta_matrix.h create mode 100644 costa/native/cta/include/cta_matrix_blas.h create mode 100644 costa/native/cta/include/cta_mem.h create mode 100644 costa/native/cta/include/cta_message.h create mode 100644 costa/native/cta/include/cta_metainfo.h create mode 100644 costa/native/cta/include/cta_method.h create mode 100644 costa/native/cta/include/cta_modbuild_b3b.h create mode 100644 costa/native/cta/include/cta_modbuild_b3b_utils.h create mode 100644 costa/native/cta/include/cta_modbuild_par.h create mode 100644 costa/native/cta/include/cta_modbuild_sp.h create mode 100644 costa/native/cta/include/cta_model.h create mode 100644 costa/native/cta/include/cta_model_factory.h create mode 100644 costa/native/cta/include/cta_model_utilities.h create mode 100644 costa/native/cta/include/cta_modelcombiner.h create mode 100644 costa/native/cta/include/cta_obsdescr.h create mode 100644 costa/native/cta/include/cta_obsdescr_sqlite3.h create mode 100644 costa/native/cta/include/cta_obsdescr_table.h create mode 100644 costa/native/cta/include/cta_pack.h create mode 100644 costa/native/cta/include/cta_par.h create mode 100644 costa/native/cta/include/cta_reltable.h create mode 100644 costa/native/cta/include/cta_resultwriter.h create mode 100644 costa/native/cta/include/cta_sobs.h create mode 100644 costa/native/cta/include/cta_sobs_combine.h create mode 100644 costa/native/cta/include/cta_sobs_netcdf.h create mode 100644 costa/native/cta/include/cta_sobs_sqlite3.h create mode 100644 costa/native/cta/include/cta_string.h create mode 100644 costa/native/cta/include/cta_system.h create mode 100644 costa/native/cta/include/cta_time.h create mode 100644 costa/native/cta/include/cta_tree.h create mode 100644 costa/native/cta/include/cta_treevector.h create mode 100644 costa/native/cta/include/cta_usr_matrix.h create mode 100644 costa/native/cta/include/cta_usr_method.h create mode 100644 costa/native/cta/include/cta_usr_model.h create mode 100644 costa/native/cta/include/cta_usr_obs_desc.h create mode 100644 costa/native/cta/include/cta_usr_stoch_observer.h create mode 100644 costa/native/cta/include/cta_usr_vector.h create mode 100644 costa/native/cta/include/cta_util_methods.h create mode 100644 costa/native/cta/include/cta_util_sort.h create mode 100644 costa/native/cta/include/cta_util_sqlite3.h create mode 100644 costa/native/cta/include/cta_util_statistics.h create mode 100644 costa/native/cta/include/cta_vector.h create mode 100644 costa/native/cta/include/cta_vector_blas.h create mode 100644 costa/native/cta/include/cta_xml.h create mode 100644 costa/native/cta/include/ctai.h create mode 100644 costa/native/cta/include/ctai_datatypes.h create mode 100644 costa/native/cta/include/ctai_handles.h create mode 100644 costa/native/cta/include/ctai_sobs.h create mode 100644 costa/native/cta/include/ctai_string.h create mode 100644 costa/native/cta/include/ctai_vector.h create mode 100644 costa/native/cta/include/ctai_xml.h create mode 100644 costa/native/cta/include/f_cta_utils.h create mode 100644 costa/native/cta/include/modbuild_sp_model_template.h create mode 100644 costa/native/cta/src/CMakeLists.txt create mode 100644 costa/native/cta/src/cta_array.c create mode 100644 costa/native/cta/src/cta_datatypes.c create mode 100644 costa/native/cta/src/cta_datetime.c create mode 100644 costa/native/cta/src/cta_defaults.c create mode 100644 costa/native/cta/src/cta_file.c create mode 100644 costa/native/cta/src/cta_flush.c create mode 100644 costa/native/cta/src/cta_functions.c create mode 100644 costa/native/cta/src/cta_handles.c create mode 100644 costa/native/cta/src/cta_initialise.c create mode 100644 costa/native/cta/src/cta_interface.c create mode 100644 costa/native/cta/src/cta_matrix.c create mode 100644 costa/native/cta/src/cta_matrix_blas.c create mode 100644 costa/native/cta/src/cta_mem.c create mode 100644 costa/native/cta/src/cta_message.c create mode 100644 costa/native/cta/src/cta_metainfo.c create mode 100644 costa/native/cta/src/cta_method.c create mode 100644 costa/native/cta/src/cta_modbuild_par.c create mode 100644 costa/native/cta/src/cta_modbuild_sp.c create mode 100644 costa/native/cta/src/cta_model.c create mode 100644 costa/native/cta/src/cta_model_factory.c create mode 100644 costa/native/cta/src/cta_model_utilities.c create mode 100644 costa/native/cta/src/cta_obsdescr.c create mode 100644 costa/native/cta/src/cta_obsdescr_combine.c create mode 100644 costa/native/cta/src/cta_obsdescr_maori.c create mode 100644 costa/native/cta/src/cta_obsdescr_netcdf.c create mode 100644 costa/native/cta/src/cta_obsdescr_sqlite3.c create mode 100644 costa/native/cta/src/cta_obsdescr_table.c create mode 100644 costa/native/cta/src/cta_obsdescr_user.c create mode 100644 costa/native/cta/src/cta_pack.c create mode 100644 costa/native/cta/src/cta_par.c create mode 100644 costa/native/cta/src/cta_reltable.c create mode 100644 costa/native/cta/src/cta_resultwriter.c create mode 100644 costa/native/cta/src/cta_sobs.c create mode 100644 costa/native/cta/src/cta_sobs_combine.c create mode 100644 costa/native/cta/src/cta_sobs_factory.c create mode 100644 costa/native/cta/src/cta_sobs_maori.c create mode 100644 costa/native/cta/src/cta_sobs_netcdf.c create mode 100644 costa/native/cta/src/cta_sobs_sqlite3.c create mode 100644 costa/native/cta/src/cta_sobs_user.c create mode 100644 costa/native/cta/src/cta_string.c create mode 100644 costa/native/cta/src/cta_time.c create mode 100644 costa/native/cta/src/cta_tree.c create mode 100644 costa/native/cta/src/cta_treevector.c create mode 100644 costa/native/cta/src/cta_util_methods.c create mode 100644 costa/native/cta/src/cta_util_sort.c create mode 100644 costa/native/cta/src/cta_util_sqlite3.c create mode 100644 costa/native/cta/src/cta_util_statistics.c create mode 100644 costa/native/cta/src/cta_vector.c create mode 100644 costa/native/cta/src/cta_vector_blas.c create mode 100644 costa/native/cta/src/cta_xml.c create mode 100644 costa/native/cta/src/f_cta_defaults.c create mode 100644 costa/native/cta/src/f_cta_utils.c create mode 100644 costa/native/cta_f90/cta_f90_parameters.f90 create mode 100644 costa/native/cta_f90/generated/cta_f90.f90 create mode 100644 costa/native/cta_f90/generated/cta_f90_array.f90 create mode 100644 costa/native/cta_f90/generated/cta_f90_datatypes.f90 create mode 100644 costa/native/cta_f90/generated/cta_f90_datetime.f90 create mode 100644 costa/native/cta_f90/generated/cta_f90_file.f90 create mode 100644 costa/native/cta_f90/generated/cta_f90_flush_mod.f90 create mode 100644 costa/native/cta_f90/generated/cta_f90_functions.f90 create mode 100644 costa/native/cta_f90/generated/cta_f90_handles.f90 create mode 100644 costa/native/cta_f90/generated/cta_f90_initialise.f90 create mode 100644 costa/native/cta_f90/generated/cta_f90_interface.f90 create mode 100644 costa/native/cta_f90/generated/cta_f90_matrix.f90 create mode 100644 costa/native/cta_f90/generated/cta_f90_mem.f90 create mode 100644 costa/native/cta_f90/generated/cta_f90_message.f90 create mode 100644 costa/native/cta_f90/generated/cta_f90_metainfo.f90 create mode 100644 costa/native/cta_f90/generated/cta_f90_method.f90 create mode 100644 costa/native/cta_f90/generated/cta_f90_modbuild_par.f90 create mode 100644 costa/native/cta_f90/generated/cta_f90_model.f90 create mode 100644 costa/native/cta_f90/generated/cta_f90_model_factory.f90 create mode 100644 costa/native/cta_f90/generated/cta_f90_model_utilities.f90 create mode 100644 costa/native/cta_f90/generated/cta_f90_obsdescr.f90 create mode 100644 costa/native/cta_f90/generated/cta_f90_pack.f90 create mode 100644 costa/native/cta_f90/generated/cta_f90_par.f90 create mode 100644 costa/native/cta_f90/generated/cta_f90_parameters.f90 create mode 100644 costa/native/cta_f90/generated/cta_f90_reltable.f90 create mode 100644 costa/native/cta_f90/generated/cta_f90_resultwriter.f90 create mode 100644 costa/native/cta_f90/generated/cta_f90_sobs.f90 create mode 100644 costa/native/cta_f90/generated/cta_f90_string.f90 create mode 100644 costa/native/cta_f90/generated/cta_f90_time.f90 create mode 100644 costa/native/cta_f90/generated/cta_f90_tree.f90 create mode 100644 costa/native/cta_f90/generated/cta_f90_treevector.f90 create mode 100644 costa/native/cta_f90/generated/cta_f90_util_methods.f90 create mode 100644 costa/native/cta_f90/generated/cta_f90_util_sort.f90 create mode 100644 costa/native/cta_f90/generated/cta_f90_util_statistics.f90 create mode 100644 costa/native/cta_f90/generated/cta_f90_vector.f90 create mode 100644 costa/native/cta_f90/generated/cta_f90_xml.f90 create mode 100644 costa/native/cta_f90/include/cta_f77.inc create mode 100644 costa/native/cta_f90/include/cta_f90.inc create mode 100644 costa/native/cta_f90/include/cta_f90_contains.inc create mode 100644 costa/native/external/blas/caxpy.f create mode 100644 costa/native/external/blas/ccopy.f create mode 100644 costa/native/external/blas/cdotc.f create mode 100644 costa/native/external/blas/cdotu.f create mode 100644 costa/native/external/blas/cgbmv.f create mode 100644 costa/native/external/blas/cgemm.f create mode 100644 costa/native/external/blas/cgemv.f create mode 100644 costa/native/external/blas/cgerc.f create mode 100644 costa/native/external/blas/cgeru.f create mode 100644 costa/native/external/blas/chbmv.f create mode 100644 costa/native/external/blas/chemm.f create mode 100644 costa/native/external/blas/chemv.f create mode 100644 costa/native/external/blas/cher.f create mode 100644 costa/native/external/blas/cher2.f create mode 100644 costa/native/external/blas/cher2k.f create mode 100644 costa/native/external/blas/cherk.f create mode 100644 costa/native/external/blas/chpmv.f create mode 100644 costa/native/external/blas/chpr.f create mode 100644 costa/native/external/blas/chpr2.f create mode 100644 costa/native/external/blas/crotg.f create mode 100644 costa/native/external/blas/cscal.f create mode 100644 costa/native/external/blas/csscal.f create mode 100644 costa/native/external/blas/cswap.f create mode 100644 costa/native/external/blas/csymm.f create mode 100644 costa/native/external/blas/csyr2k.f create mode 100644 costa/native/external/blas/csyrk.f create mode 100644 costa/native/external/blas/ctbmv.f create mode 100644 costa/native/external/blas/ctbsv.f create mode 100644 costa/native/external/blas/ctpmv.f create mode 100644 costa/native/external/blas/ctpsv.f create mode 100644 costa/native/external/blas/ctrmm.f create mode 100644 costa/native/external/blas/ctrmv.f create mode 100644 costa/native/external/blas/ctrsm.f create mode 100644 costa/native/external/blas/ctrsv.f create mode 100644 costa/native/external/blas/dasum.f create mode 100644 costa/native/external/blas/daxpy.f create mode 100644 costa/native/external/blas/dcabs1.f create mode 100644 costa/native/external/blas/dcopy.f create mode 100644 costa/native/external/blas/ddot.f create mode 100644 costa/native/external/blas/dgbmv.f create mode 100644 costa/native/external/blas/dgemm.f create mode 100644 costa/native/external/blas/dgemv.f create mode 100644 costa/native/external/blas/dger.f create mode 100644 costa/native/external/blas/dnrm2.f create mode 100644 costa/native/external/blas/drot.f create mode 100644 costa/native/external/blas/drotg.f create mode 100644 costa/native/external/blas/dsbmv.f create mode 100644 costa/native/external/blas/dscal.f create mode 100644 costa/native/external/blas/dspmv.f create mode 100644 costa/native/external/blas/dspr.f create mode 100644 costa/native/external/blas/dspr2.f create mode 100644 costa/native/external/blas/dswap.f create mode 100644 costa/native/external/blas/dsymm.f create mode 100644 costa/native/external/blas/dsymv.f create mode 100644 costa/native/external/blas/dsyr.f create mode 100644 costa/native/external/blas/dsyr2.f create mode 100644 costa/native/external/blas/dsyr2k.f create mode 100644 costa/native/external/blas/dsyrk.f create mode 100644 costa/native/external/blas/dtbmv.f create mode 100644 costa/native/external/blas/dtbsv.f create mode 100644 costa/native/external/blas/dtpmv.f create mode 100644 costa/native/external/blas/dtpsv.f create mode 100644 costa/native/external/blas/dtrmm.f create mode 100644 costa/native/external/blas/dtrmv.f create mode 100644 costa/native/external/blas/dtrsm.f create mode 100644 costa/native/external/blas/dtrsv.f create mode 100644 costa/native/external/blas/dzasum.f create mode 100644 costa/native/external/blas/dznrm2.f create mode 100644 costa/native/external/blas/icamax.f create mode 100644 costa/native/external/blas/idamax.f create mode 100644 costa/native/external/blas/isamax.f create mode 100644 costa/native/external/blas/izamax.f create mode 100644 costa/native/external/blas/lsame.f create mode 100644 costa/native/external/blas/sasum.f create mode 100644 costa/native/external/blas/saxpy.f create mode 100644 costa/native/external/blas/scasum.f create mode 100644 costa/native/external/blas/scnrm2.f create mode 100644 costa/native/external/blas/scopy.f create mode 100644 costa/native/external/blas/sdot.f create mode 100644 costa/native/external/blas/sgbmv.f create mode 100644 costa/native/external/blas/sgemm.f create mode 100644 costa/native/external/blas/sgemv.f create mode 100644 costa/native/external/blas/sger.f create mode 100644 costa/native/external/blas/snrm2.f create mode 100644 costa/native/external/blas/srot.f create mode 100644 costa/native/external/blas/srotg.f create mode 100644 costa/native/external/blas/ssbmv.f create mode 100644 costa/native/external/blas/sscal.f create mode 100644 costa/native/external/blas/sspmv.f create mode 100644 costa/native/external/blas/sspr.f create mode 100644 costa/native/external/blas/sspr2.f create mode 100644 costa/native/external/blas/sswap.f create mode 100644 costa/native/external/blas/ssymm.f create mode 100644 costa/native/external/blas/ssymv.f create mode 100644 costa/native/external/blas/ssyr.f create mode 100644 costa/native/external/blas/ssyr2.f create mode 100644 costa/native/external/blas/ssyr2k.f create mode 100644 costa/native/external/blas/ssyrk.f create mode 100644 costa/native/external/blas/stbmv.f create mode 100644 costa/native/external/blas/stbsv.f create mode 100644 costa/native/external/blas/stpmv.f create mode 100644 costa/native/external/blas/stpsv.f create mode 100644 costa/native/external/blas/strmm.f create mode 100644 costa/native/external/blas/strmv.f create mode 100644 costa/native/external/blas/strsm.f create mode 100644 costa/native/external/blas/strsv.f create mode 100644 costa/native/external/blas/xerbla.f create mode 100644 costa/native/external/blas/zaxpy.f create mode 100644 costa/native/external/blas/zcopy.f create mode 100644 costa/native/external/blas/zdotc.f create mode 100644 costa/native/external/blas/zdotu.f create mode 100644 costa/native/external/blas/zdscal.f create mode 100644 costa/native/external/blas/zgbmv.f create mode 100644 costa/native/external/blas/zgemm.f create mode 100644 costa/native/external/blas/zgemv.f create mode 100644 costa/native/external/blas/zgerc.f create mode 100644 costa/native/external/blas/zgeru.f create mode 100644 costa/native/external/blas/zhbmv.f create mode 100644 costa/native/external/blas/zhemm.f create mode 100644 costa/native/external/blas/zhemv.f create mode 100644 costa/native/external/blas/zher.f create mode 100644 costa/native/external/blas/zher2.f create mode 100644 costa/native/external/blas/zher2k.f create mode 100644 costa/native/external/blas/zherk.f create mode 100644 costa/native/external/blas/zhpmv.f create mode 100644 costa/native/external/blas/zhpr.f create mode 100644 costa/native/external/blas/zhpr2.f create mode 100644 costa/native/external/blas/zrotg.f create mode 100644 costa/native/external/blas/zscal.f create mode 100644 costa/native/external/blas/zswap.f create mode 100644 costa/native/external/blas/zsymm.f create mode 100644 costa/native/external/blas/zsyr2k.f create mode 100644 costa/native/external/blas/zsyrk.f create mode 100644 costa/native/external/blas/ztbmv.f create mode 100644 costa/native/external/blas/ztbsv.f create mode 100644 costa/native/external/blas/ztpmv.f create mode 100644 costa/native/external/blas/ztpsv.f create mode 100644 costa/native/external/blas/ztrmm.f create mode 100644 costa/native/external/blas/ztrmv.f create mode 100644 costa/native/external/blas/ztrsm.f create mode 100644 costa/native/external/blas/ztrsv.f create mode 100644 costa/native/external/lapack/cbdsqr.f create mode 100644 costa/native/external/lapack/cgbbrd.f create mode 100644 costa/native/external/lapack/cgbcon.f create mode 100644 costa/native/external/lapack/cgbequ.f create mode 100644 costa/native/external/lapack/cgbrfs.f create mode 100644 costa/native/external/lapack/cgbsv.f create mode 100644 costa/native/external/lapack/cgbsvx.f create mode 100644 costa/native/external/lapack/cgbtf2.f create mode 100644 costa/native/external/lapack/cgbtrf.f create mode 100644 costa/native/external/lapack/cgbtrs.f create mode 100644 costa/native/external/lapack/cgebak.f create mode 100644 costa/native/external/lapack/cgebal.f create mode 100644 costa/native/external/lapack/cgebd2.f create mode 100644 costa/native/external/lapack/cgebrd.f create mode 100644 costa/native/external/lapack/cgecon.f create mode 100644 costa/native/external/lapack/cgeequ.f create mode 100644 costa/native/external/lapack/cgees.f create mode 100644 costa/native/external/lapack/cgeesx.f create mode 100644 costa/native/external/lapack/cgeev.f create mode 100644 costa/native/external/lapack/cgeevx.f create mode 100644 costa/native/external/lapack/cgegs.f create mode 100644 costa/native/external/lapack/cgegv.f create mode 100644 costa/native/external/lapack/cgehd2.f create mode 100644 costa/native/external/lapack/cgehrd.f create mode 100644 costa/native/external/lapack/cgelq2.f create mode 100644 costa/native/external/lapack/cgelqf.f create mode 100644 costa/native/external/lapack/cgels.f create mode 100644 costa/native/external/lapack/cgelsd.f create mode 100644 costa/native/external/lapack/cgelss.f create mode 100644 costa/native/external/lapack/cgelsx.f create mode 100644 costa/native/external/lapack/cgelsy.f create mode 100644 costa/native/external/lapack/cgeql2.f create mode 100644 costa/native/external/lapack/cgeqlf.f create mode 100644 costa/native/external/lapack/cgeqp3.f create mode 100644 costa/native/external/lapack/cgeqpf.f create mode 100644 costa/native/external/lapack/cgeqr2.f create mode 100644 costa/native/external/lapack/cgeqrf.f create mode 100644 costa/native/external/lapack/cgerfs.f create mode 100644 costa/native/external/lapack/cgerq2.f create mode 100644 costa/native/external/lapack/cgerqf.f create mode 100644 costa/native/external/lapack/cgesc2.f create mode 100644 costa/native/external/lapack/cgesdd.f create mode 100644 costa/native/external/lapack/cgesv.f create mode 100644 costa/native/external/lapack/cgesvd.f create mode 100644 costa/native/external/lapack/cgesvx.f create mode 100644 costa/native/external/lapack/cgetc2.f create mode 100644 costa/native/external/lapack/cgetf2.f create mode 100644 costa/native/external/lapack/cgetrf.f create mode 100644 costa/native/external/lapack/cgetri.f create mode 100644 costa/native/external/lapack/cgetrs.f create mode 100644 costa/native/external/lapack/cggbak.f create mode 100644 costa/native/external/lapack/cggbal.f create mode 100644 costa/native/external/lapack/cgges.f create mode 100644 costa/native/external/lapack/cggesx.f create mode 100644 costa/native/external/lapack/cggev.f create mode 100644 costa/native/external/lapack/cggevx.f create mode 100644 costa/native/external/lapack/cggglm.f create mode 100644 costa/native/external/lapack/cgghrd.f create mode 100644 costa/native/external/lapack/cgglse.f create mode 100644 costa/native/external/lapack/cggqrf.f create mode 100644 costa/native/external/lapack/cggrqf.f create mode 100644 costa/native/external/lapack/cggsvd.f create mode 100644 costa/native/external/lapack/cggsvp.f create mode 100644 costa/native/external/lapack/cgtcon.f create mode 100644 costa/native/external/lapack/cgtrfs.f create mode 100644 costa/native/external/lapack/cgtsv.f create mode 100644 costa/native/external/lapack/cgtsvx.f create mode 100644 costa/native/external/lapack/cgttrf.f create mode 100644 costa/native/external/lapack/cgttrs.f create mode 100644 costa/native/external/lapack/cgtts2.f create mode 100644 costa/native/external/lapack/chbev.f create mode 100644 costa/native/external/lapack/chbevd.f create mode 100644 costa/native/external/lapack/chbevx.f create mode 100644 costa/native/external/lapack/chbgst.f create mode 100644 costa/native/external/lapack/chbgv.f create mode 100644 costa/native/external/lapack/chbgvd.f create mode 100644 costa/native/external/lapack/chbgvx.f create mode 100644 costa/native/external/lapack/chbtrd.f create mode 100644 costa/native/external/lapack/checon.f create mode 100644 costa/native/external/lapack/cheev.f create mode 100644 costa/native/external/lapack/cheevd.f create mode 100644 costa/native/external/lapack/cheevr.f create mode 100644 costa/native/external/lapack/cheevx.f create mode 100644 costa/native/external/lapack/chegs2.f create mode 100644 costa/native/external/lapack/chegst.f create mode 100644 costa/native/external/lapack/chegv.f create mode 100644 costa/native/external/lapack/chegvd.f create mode 100644 costa/native/external/lapack/chegvx.f create mode 100644 costa/native/external/lapack/cherfs.f create mode 100644 costa/native/external/lapack/chesv.f create mode 100644 costa/native/external/lapack/chesvx.f create mode 100644 costa/native/external/lapack/chetd2.f create mode 100644 costa/native/external/lapack/chetf2.f create mode 100644 costa/native/external/lapack/chetrd.f create mode 100644 costa/native/external/lapack/chetrf.f create mode 100644 costa/native/external/lapack/chetri.f create mode 100644 costa/native/external/lapack/chetrs.f create mode 100644 costa/native/external/lapack/chgeqz.f create mode 100644 costa/native/external/lapack/chpcon.f create mode 100644 costa/native/external/lapack/chpev.f create mode 100644 costa/native/external/lapack/chpevd.f create mode 100644 costa/native/external/lapack/chpevx.f create mode 100644 costa/native/external/lapack/chpgst.f create mode 100644 costa/native/external/lapack/chpgv.f create mode 100644 costa/native/external/lapack/chpgvd.f create mode 100644 costa/native/external/lapack/chpgvx.f create mode 100644 costa/native/external/lapack/chprfs.f create mode 100644 costa/native/external/lapack/chpsv.f create mode 100644 costa/native/external/lapack/chpsvx.f create mode 100644 costa/native/external/lapack/chptrd.f create mode 100644 costa/native/external/lapack/chptrf.f create mode 100644 costa/native/external/lapack/chptri.f create mode 100644 costa/native/external/lapack/chptrs.f create mode 100644 costa/native/external/lapack/chsein.f create mode 100644 costa/native/external/lapack/chseqr.f create mode 100644 costa/native/external/lapack/clabrd.f create mode 100644 costa/native/external/lapack/clacgv.f create mode 100644 costa/native/external/lapack/clacon.f create mode 100644 costa/native/external/lapack/clacp2.f create mode 100644 costa/native/external/lapack/clacpy.f create mode 100644 costa/native/external/lapack/clacrm.f create mode 100644 costa/native/external/lapack/clacrt.f create mode 100644 costa/native/external/lapack/cladiv.f create mode 100644 costa/native/external/lapack/claed0.f create mode 100644 costa/native/external/lapack/claed7.f create mode 100644 costa/native/external/lapack/claed8.f create mode 100644 costa/native/external/lapack/claein.f create mode 100644 costa/native/external/lapack/claesy.f create mode 100644 costa/native/external/lapack/claev2.f create mode 100644 costa/native/external/lapack/clags2.f create mode 100644 costa/native/external/lapack/clagtm.f create mode 100644 costa/native/external/lapack/clahef.f create mode 100644 costa/native/external/lapack/clahqr.f create mode 100644 costa/native/external/lapack/clahrd.f create mode 100644 costa/native/external/lapack/claic1.f create mode 100644 costa/native/external/lapack/clals0.f create mode 100644 costa/native/external/lapack/clalsa.f create mode 100644 costa/native/external/lapack/clalsd.f create mode 100644 costa/native/external/lapack/clangb.f create mode 100644 costa/native/external/lapack/clange.f create mode 100644 costa/native/external/lapack/clangt.f create mode 100644 costa/native/external/lapack/clanhb.f create mode 100644 costa/native/external/lapack/clanhe.f create mode 100644 costa/native/external/lapack/clanhp.f create mode 100644 costa/native/external/lapack/clanhs.f create mode 100644 costa/native/external/lapack/clanht.f create mode 100644 costa/native/external/lapack/clansb.f create mode 100644 costa/native/external/lapack/clansp.f create mode 100644 costa/native/external/lapack/clansy.f create mode 100644 costa/native/external/lapack/clantb.f create mode 100644 costa/native/external/lapack/clantp.f create mode 100644 costa/native/external/lapack/clantr.f create mode 100644 costa/native/external/lapack/clapll.f create mode 100644 costa/native/external/lapack/clapmt.f create mode 100644 costa/native/external/lapack/claqgb.f create mode 100644 costa/native/external/lapack/claqge.f create mode 100644 costa/native/external/lapack/claqhb.f create mode 100644 costa/native/external/lapack/claqhe.f create mode 100644 costa/native/external/lapack/claqhp.f create mode 100644 costa/native/external/lapack/claqp2.f create mode 100644 costa/native/external/lapack/claqps.f create mode 100644 costa/native/external/lapack/claqsb.f create mode 100644 costa/native/external/lapack/claqsp.f create mode 100644 costa/native/external/lapack/claqsy.f create mode 100644 costa/native/external/lapack/clar1v.f create mode 100644 costa/native/external/lapack/clar2v.f create mode 100644 costa/native/external/lapack/clarcm.f create mode 100644 costa/native/external/lapack/clarf.f create mode 100644 costa/native/external/lapack/clarfb.f create mode 100644 costa/native/external/lapack/clarfg.f create mode 100644 costa/native/external/lapack/clarft.f create mode 100644 costa/native/external/lapack/clarfx.f create mode 100644 costa/native/external/lapack/clargv.f create mode 100644 costa/native/external/lapack/clarnv.f create mode 100644 costa/native/external/lapack/clarrv.f create mode 100644 costa/native/external/lapack/clartg.f create mode 100644 costa/native/external/lapack/clartv.f create mode 100644 costa/native/external/lapack/clarz.f create mode 100644 costa/native/external/lapack/clarzb.f create mode 100644 costa/native/external/lapack/clarzt.f create mode 100644 costa/native/external/lapack/clascl.f create mode 100644 costa/native/external/lapack/claset.f create mode 100644 costa/native/external/lapack/clasr.f create mode 100644 costa/native/external/lapack/classq.f create mode 100644 costa/native/external/lapack/claswp.f create mode 100644 costa/native/external/lapack/clasyf.f create mode 100644 costa/native/external/lapack/clatbs.f create mode 100644 costa/native/external/lapack/clatdf.f create mode 100644 costa/native/external/lapack/clatps.f create mode 100644 costa/native/external/lapack/clatrd.f create mode 100644 costa/native/external/lapack/clatrs.f create mode 100644 costa/native/external/lapack/clatrz.f create mode 100644 costa/native/external/lapack/clatzm.f create mode 100644 costa/native/external/lapack/clauu2.f create mode 100644 costa/native/external/lapack/clauum.f create mode 100644 costa/native/external/lapack/cpbcon.f create mode 100644 costa/native/external/lapack/cpbequ.f create mode 100644 costa/native/external/lapack/cpbrfs.f create mode 100644 costa/native/external/lapack/cpbstf.f create mode 100644 costa/native/external/lapack/cpbsv.f create mode 100644 costa/native/external/lapack/cpbsvx.f create mode 100644 costa/native/external/lapack/cpbtf2.f create mode 100644 costa/native/external/lapack/cpbtrf.f create mode 100644 costa/native/external/lapack/cpbtrs.f create mode 100644 costa/native/external/lapack/cpocon.f create mode 100644 costa/native/external/lapack/cpoequ.f create mode 100644 costa/native/external/lapack/cporfs.f create mode 100644 costa/native/external/lapack/cposv.f create mode 100644 costa/native/external/lapack/cposvx.f create mode 100644 costa/native/external/lapack/cpotf2.f create mode 100644 costa/native/external/lapack/cpotrf.f create mode 100644 costa/native/external/lapack/cpotri.f create mode 100644 costa/native/external/lapack/cpotrs.f create mode 100644 costa/native/external/lapack/cppcon.f create mode 100644 costa/native/external/lapack/cppequ.f create mode 100644 costa/native/external/lapack/cpprfs.f create mode 100644 costa/native/external/lapack/cppsv.f create mode 100644 costa/native/external/lapack/cppsvx.f create mode 100644 costa/native/external/lapack/cpptrf.f create mode 100644 costa/native/external/lapack/cpptri.f create mode 100644 costa/native/external/lapack/cpptrs.f create mode 100644 costa/native/external/lapack/cptcon.f create mode 100644 costa/native/external/lapack/cpteqr.f create mode 100644 costa/native/external/lapack/cptrfs.f create mode 100644 costa/native/external/lapack/cptsv.f create mode 100644 costa/native/external/lapack/cptsvx.f create mode 100644 costa/native/external/lapack/cpttrf.f create mode 100644 costa/native/external/lapack/cpttrs.f create mode 100644 costa/native/external/lapack/cptts2.f create mode 100644 costa/native/external/lapack/crot.f create mode 100644 costa/native/external/lapack/cspcon.f create mode 100644 costa/native/external/lapack/cspmv.f create mode 100644 costa/native/external/lapack/cspr.f create mode 100644 costa/native/external/lapack/csprfs.f create mode 100644 costa/native/external/lapack/cspsv.f create mode 100644 costa/native/external/lapack/cspsvx.f create mode 100644 costa/native/external/lapack/csptrf.f create mode 100644 costa/native/external/lapack/csptri.f create mode 100644 costa/native/external/lapack/csptrs.f create mode 100644 costa/native/external/lapack/csrot.f create mode 100644 costa/native/external/lapack/csrscl.f create mode 100644 costa/native/external/lapack/cstedc.f create mode 100644 costa/native/external/lapack/cstegr.f create mode 100644 costa/native/external/lapack/cstein.f create mode 100644 costa/native/external/lapack/csteqr.f create mode 100644 costa/native/external/lapack/csycon.f create mode 100644 costa/native/external/lapack/csymv.f create mode 100644 costa/native/external/lapack/csyr.f create mode 100644 costa/native/external/lapack/csyrfs.f create mode 100644 costa/native/external/lapack/csysv.f create mode 100644 costa/native/external/lapack/csysvx.f create mode 100644 costa/native/external/lapack/csytf2.f create mode 100644 costa/native/external/lapack/csytrf.f create mode 100644 costa/native/external/lapack/csytri.f create mode 100644 costa/native/external/lapack/csytrs.f create mode 100644 costa/native/external/lapack/ctbcon.f create mode 100644 costa/native/external/lapack/ctbrfs.f create mode 100644 costa/native/external/lapack/ctbtrs.f create mode 100644 costa/native/external/lapack/ctgevc.f create mode 100644 costa/native/external/lapack/ctgex2.f create mode 100644 costa/native/external/lapack/ctgexc.f create mode 100644 costa/native/external/lapack/ctgsen.f create mode 100644 costa/native/external/lapack/ctgsja.f create mode 100644 costa/native/external/lapack/ctgsna.f create mode 100644 costa/native/external/lapack/ctgsy2.f create mode 100644 costa/native/external/lapack/ctgsyl.f create mode 100644 costa/native/external/lapack/ctpcon.f create mode 100644 costa/native/external/lapack/ctprfs.f create mode 100644 costa/native/external/lapack/ctptri.f create mode 100644 costa/native/external/lapack/ctptrs.f create mode 100644 costa/native/external/lapack/ctrcon.f create mode 100644 costa/native/external/lapack/ctrevc.f create mode 100644 costa/native/external/lapack/ctrexc.f create mode 100644 costa/native/external/lapack/ctrrfs.f create mode 100644 costa/native/external/lapack/ctrsen.f create mode 100644 costa/native/external/lapack/ctrsna.f create mode 100644 costa/native/external/lapack/ctrsyl.f create mode 100644 costa/native/external/lapack/ctrti2.f create mode 100644 costa/native/external/lapack/ctrtri.f create mode 100644 costa/native/external/lapack/ctrtrs.f create mode 100644 costa/native/external/lapack/ctzrqf.f create mode 100644 costa/native/external/lapack/ctzrzf.f create mode 100644 costa/native/external/lapack/cung2l.f create mode 100644 costa/native/external/lapack/cung2r.f create mode 100644 costa/native/external/lapack/cungbr.f create mode 100644 costa/native/external/lapack/cunghr.f create mode 100644 costa/native/external/lapack/cungl2.f create mode 100644 costa/native/external/lapack/cunglq.f create mode 100644 costa/native/external/lapack/cungql.f create mode 100644 costa/native/external/lapack/cungqr.f create mode 100644 costa/native/external/lapack/cungr2.f create mode 100644 costa/native/external/lapack/cungrq.f create mode 100644 costa/native/external/lapack/cungtr.f create mode 100644 costa/native/external/lapack/cunm2l.f create mode 100644 costa/native/external/lapack/cunm2r.f create mode 100644 costa/native/external/lapack/cunmbr.f create mode 100644 costa/native/external/lapack/cunmhr.f create mode 100644 costa/native/external/lapack/cunml2.f create mode 100644 costa/native/external/lapack/cunmlq.f create mode 100644 costa/native/external/lapack/cunmql.f create mode 100644 costa/native/external/lapack/cunmqr.f create mode 100644 costa/native/external/lapack/cunmr2.f create mode 100644 costa/native/external/lapack/cunmr3.f create mode 100644 costa/native/external/lapack/cunmrq.f create mode 100644 costa/native/external/lapack/cunmrz.f create mode 100644 costa/native/external/lapack/cunmtr.f create mode 100644 costa/native/external/lapack/cupgtr.f create mode 100644 costa/native/external/lapack/cupmtr.f create mode 100644 costa/native/external/lapack/dbdsdc.f create mode 100644 costa/native/external/lapack/dbdsqr.f create mode 100644 costa/native/external/lapack/ddisna.f create mode 100644 costa/native/external/lapack/dgbbrd.f create mode 100644 costa/native/external/lapack/dgbcon.f create mode 100644 costa/native/external/lapack/dgbequ.f create mode 100644 costa/native/external/lapack/dgbrfs.f create mode 100644 costa/native/external/lapack/dgbsv.f create mode 100644 costa/native/external/lapack/dgbsvx.f create mode 100644 costa/native/external/lapack/dgbtf2.f create mode 100644 costa/native/external/lapack/dgbtrf.f create mode 100644 costa/native/external/lapack/dgbtrs.f create mode 100644 costa/native/external/lapack/dgebak.f create mode 100644 costa/native/external/lapack/dgebal.f create mode 100644 costa/native/external/lapack/dgebd2.f create mode 100644 costa/native/external/lapack/dgebrd.f create mode 100644 costa/native/external/lapack/dgecon.f create mode 100644 costa/native/external/lapack/dgeequ.f create mode 100644 costa/native/external/lapack/dgees.f create mode 100644 costa/native/external/lapack/dgeesx.f create mode 100644 costa/native/external/lapack/dgeev.f create mode 100644 costa/native/external/lapack/dgeevx.f create mode 100644 costa/native/external/lapack/dgegs.f create mode 100644 costa/native/external/lapack/dgegv.f create mode 100644 costa/native/external/lapack/dgehd2.f create mode 100644 costa/native/external/lapack/dgehrd.f create mode 100644 costa/native/external/lapack/dgelq2.f create mode 100644 costa/native/external/lapack/dgelqf.f create mode 100644 costa/native/external/lapack/dgels.f create mode 100644 costa/native/external/lapack/dgelsd.f create mode 100644 costa/native/external/lapack/dgelss.f create mode 100644 costa/native/external/lapack/dgelsx.f create mode 100644 costa/native/external/lapack/dgelsy.f create mode 100644 costa/native/external/lapack/dgeql2.f create mode 100644 costa/native/external/lapack/dgeqlf.f create mode 100644 costa/native/external/lapack/dgeqp3.f create mode 100644 costa/native/external/lapack/dgeqpf.f create mode 100644 costa/native/external/lapack/dgeqr2.f create mode 100644 costa/native/external/lapack/dgeqrf.f create mode 100644 costa/native/external/lapack/dgerfs.f create mode 100644 costa/native/external/lapack/dgerq2.f create mode 100644 costa/native/external/lapack/dgerqf.f create mode 100644 costa/native/external/lapack/dgesc2.f create mode 100644 costa/native/external/lapack/dgesdd.f create mode 100644 costa/native/external/lapack/dgesv.f create mode 100644 costa/native/external/lapack/dgesvd.f create mode 100644 costa/native/external/lapack/dgesvx.f create mode 100644 costa/native/external/lapack/dgetc2.f create mode 100644 costa/native/external/lapack/dgetf2.f create mode 100644 costa/native/external/lapack/dgetrf.f create mode 100644 costa/native/external/lapack/dgetri.f create mode 100644 costa/native/external/lapack/dgetrs.f create mode 100644 costa/native/external/lapack/dggbak.f create mode 100644 costa/native/external/lapack/dggbal.f create mode 100644 costa/native/external/lapack/dgges.f create mode 100644 costa/native/external/lapack/dggesx.f create mode 100644 costa/native/external/lapack/dggev.f create mode 100644 costa/native/external/lapack/dggevx.f create mode 100644 costa/native/external/lapack/dggglm.f create mode 100644 costa/native/external/lapack/dgghrd.f create mode 100644 costa/native/external/lapack/dgglse.f create mode 100644 costa/native/external/lapack/dggqrf.f create mode 100644 costa/native/external/lapack/dggrqf.f create mode 100644 costa/native/external/lapack/dggsvd.f create mode 100644 costa/native/external/lapack/dggsvp.f create mode 100644 costa/native/external/lapack/dgtcon.f create mode 100644 costa/native/external/lapack/dgtrfs.f create mode 100644 costa/native/external/lapack/dgtsv.f create mode 100644 costa/native/external/lapack/dgtsvx.f create mode 100644 costa/native/external/lapack/dgttrf.f create mode 100644 costa/native/external/lapack/dgttrs.f create mode 100644 costa/native/external/lapack/dgtts2.f create mode 100644 costa/native/external/lapack/dhgeqz.f create mode 100644 costa/native/external/lapack/dhsein.f create mode 100644 costa/native/external/lapack/dhseqr.f create mode 100644 costa/native/external/lapack/dlabad.f create mode 100644 costa/native/external/lapack/dlabrd.f create mode 100644 costa/native/external/lapack/dlacon.f create mode 100644 costa/native/external/lapack/dlacpy.f create mode 100644 costa/native/external/lapack/dladiv.f create mode 100644 costa/native/external/lapack/dlae2.f create mode 100644 costa/native/external/lapack/dlaebz.f create mode 100644 costa/native/external/lapack/dlaed0.f create mode 100644 costa/native/external/lapack/dlaed1.f create mode 100644 costa/native/external/lapack/dlaed2.f create mode 100644 costa/native/external/lapack/dlaed3.f create mode 100644 costa/native/external/lapack/dlaed4.f create mode 100644 costa/native/external/lapack/dlaed5.f create mode 100644 costa/native/external/lapack/dlaed6.f create mode 100644 costa/native/external/lapack/dlaed7.f create mode 100644 costa/native/external/lapack/dlaed8.f create mode 100644 costa/native/external/lapack/dlaed9.f create mode 100644 costa/native/external/lapack/dlaeda.f create mode 100644 costa/native/external/lapack/dlaein.f create mode 100644 costa/native/external/lapack/dlaev2.f create mode 100644 costa/native/external/lapack/dlaexc.f create mode 100644 costa/native/external/lapack/dlag2.f create mode 100644 costa/native/external/lapack/dlags2.f create mode 100644 costa/native/external/lapack/dlagtf.f create mode 100644 costa/native/external/lapack/dlagtm.f create mode 100644 costa/native/external/lapack/dlagts.f create mode 100644 costa/native/external/lapack/dlagv2.f create mode 100644 costa/native/external/lapack/dlahqr.f create mode 100644 costa/native/external/lapack/dlahrd.f create mode 100644 costa/native/external/lapack/dlaic1.f create mode 100644 costa/native/external/lapack/dlaln2.f create mode 100644 costa/native/external/lapack/dlals0.f create mode 100644 costa/native/external/lapack/dlalsa.f create mode 100644 costa/native/external/lapack/dlalsd.f create mode 100644 costa/native/external/lapack/dlamch.f create mode 100644 costa/native/external/lapack/dlamrg.f create mode 100644 costa/native/external/lapack/dlangb.f create mode 100644 costa/native/external/lapack/dlange.f create mode 100644 costa/native/external/lapack/dlangt.f create mode 100644 costa/native/external/lapack/dlanhs.f create mode 100644 costa/native/external/lapack/dlansb.f create mode 100644 costa/native/external/lapack/dlansp.f create mode 100644 costa/native/external/lapack/dlanst.f create mode 100644 costa/native/external/lapack/dlansy.f create mode 100644 costa/native/external/lapack/dlantb.f create mode 100644 costa/native/external/lapack/dlantp.f create mode 100644 costa/native/external/lapack/dlantr.f create mode 100644 costa/native/external/lapack/dlanv2.f create mode 100644 costa/native/external/lapack/dlapll.f create mode 100644 costa/native/external/lapack/dlapmt.f create mode 100644 costa/native/external/lapack/dlapy2.f create mode 100644 costa/native/external/lapack/dlapy3.f create mode 100644 costa/native/external/lapack/dlaqgb.f create mode 100644 costa/native/external/lapack/dlaqge.f create mode 100644 costa/native/external/lapack/dlaqp2.f create mode 100644 costa/native/external/lapack/dlaqps.f create mode 100644 costa/native/external/lapack/dlaqsb.f create mode 100644 costa/native/external/lapack/dlaqsp.f create mode 100644 costa/native/external/lapack/dlaqsy.f create mode 100644 costa/native/external/lapack/dlaqtr.f create mode 100644 costa/native/external/lapack/dlar1v.f create mode 100644 costa/native/external/lapack/dlar2v.f create mode 100644 costa/native/external/lapack/dlarf.f create mode 100644 costa/native/external/lapack/dlarfb.f create mode 100644 costa/native/external/lapack/dlarfg.f create mode 100644 costa/native/external/lapack/dlarft.f create mode 100644 costa/native/external/lapack/dlarfx.f create mode 100644 costa/native/external/lapack/dlargv.f create mode 100644 costa/native/external/lapack/dlarnv.f create mode 100644 costa/native/external/lapack/dlarrb.f create mode 100644 costa/native/external/lapack/dlarre.f create mode 100644 costa/native/external/lapack/dlarrf.f create mode 100644 costa/native/external/lapack/dlarrv.f create mode 100644 costa/native/external/lapack/dlartg.f create mode 100644 costa/native/external/lapack/dlartv.f create mode 100644 costa/native/external/lapack/dlaruv.f create mode 100644 costa/native/external/lapack/dlarz.f create mode 100644 costa/native/external/lapack/dlarzb.f create mode 100644 costa/native/external/lapack/dlarzt.f create mode 100644 costa/native/external/lapack/dlas2.f create mode 100644 costa/native/external/lapack/dlascl.f create mode 100644 costa/native/external/lapack/dlasd0.f create mode 100644 costa/native/external/lapack/dlasd1.f create mode 100644 costa/native/external/lapack/dlasd2.f create mode 100644 costa/native/external/lapack/dlasd3.f create mode 100644 costa/native/external/lapack/dlasd4.f create mode 100644 costa/native/external/lapack/dlasd5.f create mode 100644 costa/native/external/lapack/dlasd6.f create mode 100644 costa/native/external/lapack/dlasd7.f create mode 100644 costa/native/external/lapack/dlasd8.f create mode 100644 costa/native/external/lapack/dlasd9.f create mode 100644 costa/native/external/lapack/dlasda.f create mode 100644 costa/native/external/lapack/dlasdq.f create mode 100644 costa/native/external/lapack/dlasdt.f create mode 100644 costa/native/external/lapack/dlaset.f create mode 100644 costa/native/external/lapack/dlasq1.f create mode 100644 costa/native/external/lapack/dlasq2.f create mode 100644 costa/native/external/lapack/dlasq3.f create mode 100644 costa/native/external/lapack/dlasq4.f create mode 100644 costa/native/external/lapack/dlasq5.f create mode 100644 costa/native/external/lapack/dlasq6.f create mode 100644 costa/native/external/lapack/dlasr.f create mode 100644 costa/native/external/lapack/dlasrt.f create mode 100644 costa/native/external/lapack/dlassq.f create mode 100644 costa/native/external/lapack/dlasv2.f create mode 100644 costa/native/external/lapack/dlaswp.f create mode 100644 costa/native/external/lapack/dlasy2.f create mode 100644 costa/native/external/lapack/dlasyf.f create mode 100644 costa/native/external/lapack/dlatbs.f create mode 100644 costa/native/external/lapack/dlatdf.f create mode 100644 costa/native/external/lapack/dlatps.f create mode 100644 costa/native/external/lapack/dlatrd.f create mode 100644 costa/native/external/lapack/dlatrs.f create mode 100644 costa/native/external/lapack/dlatrz.f create mode 100644 costa/native/external/lapack/dlatzm.f create mode 100644 costa/native/external/lapack/dlauu2.f create mode 100644 costa/native/external/lapack/dlauum.f create mode 100644 costa/native/external/lapack/dopgtr.f create mode 100644 costa/native/external/lapack/dopmtr.f create mode 100644 costa/native/external/lapack/dorg2l.f create mode 100644 costa/native/external/lapack/dorg2r.f create mode 100644 costa/native/external/lapack/dorgbr.f create mode 100644 costa/native/external/lapack/dorghr.f create mode 100644 costa/native/external/lapack/dorgl2.f create mode 100644 costa/native/external/lapack/dorglq.f create mode 100644 costa/native/external/lapack/dorgql.f create mode 100644 costa/native/external/lapack/dorgqr.f create mode 100644 costa/native/external/lapack/dorgr2.f create mode 100644 costa/native/external/lapack/dorgrq.f create mode 100644 costa/native/external/lapack/dorgtr.f create mode 100644 costa/native/external/lapack/dorm2l.f create mode 100644 costa/native/external/lapack/dorm2r.f create mode 100644 costa/native/external/lapack/dormbr.f create mode 100644 costa/native/external/lapack/dormhr.f create mode 100644 costa/native/external/lapack/dorml2.f create mode 100644 costa/native/external/lapack/dormlq.f create mode 100644 costa/native/external/lapack/dormql.f create mode 100644 costa/native/external/lapack/dormqr.f create mode 100644 costa/native/external/lapack/dormr2.f create mode 100644 costa/native/external/lapack/dormr3.f create mode 100644 costa/native/external/lapack/dormrq.f create mode 100644 costa/native/external/lapack/dormrz.f create mode 100644 costa/native/external/lapack/dormtr.f create mode 100644 costa/native/external/lapack/dpbcon.f create mode 100644 costa/native/external/lapack/dpbequ.f create mode 100644 costa/native/external/lapack/dpbrfs.f create mode 100644 costa/native/external/lapack/dpbstf.f create mode 100644 costa/native/external/lapack/dpbsv.f create mode 100644 costa/native/external/lapack/dpbsvx.f create mode 100644 costa/native/external/lapack/dpbtf2.f create mode 100644 costa/native/external/lapack/dpbtrf.f create mode 100644 costa/native/external/lapack/dpbtrs.f create mode 100644 costa/native/external/lapack/dpocon.f create mode 100644 costa/native/external/lapack/dpoequ.f create mode 100644 costa/native/external/lapack/dporfs.f create mode 100644 costa/native/external/lapack/dposv.f create mode 100644 costa/native/external/lapack/dposvx.f create mode 100644 costa/native/external/lapack/dpotf2.f create mode 100644 costa/native/external/lapack/dpotrf.f create mode 100644 costa/native/external/lapack/dpotri.f create mode 100644 costa/native/external/lapack/dpotrs.f create mode 100644 costa/native/external/lapack/dppcon.f create mode 100644 costa/native/external/lapack/dppequ.f create mode 100644 costa/native/external/lapack/dpprfs.f create mode 100644 costa/native/external/lapack/dppsv.f create mode 100644 costa/native/external/lapack/dppsvx.f create mode 100644 costa/native/external/lapack/dpptrf.f create mode 100644 costa/native/external/lapack/dpptri.f create mode 100644 costa/native/external/lapack/dpptrs.f create mode 100644 costa/native/external/lapack/dptcon.f create mode 100644 costa/native/external/lapack/dpteqr.f create mode 100644 costa/native/external/lapack/dptrfs.f create mode 100644 costa/native/external/lapack/dptsv.f create mode 100644 costa/native/external/lapack/dptsvx.f create mode 100644 costa/native/external/lapack/dpttrf.f create mode 100644 costa/native/external/lapack/dpttrs.f create mode 100644 costa/native/external/lapack/dptts2.f create mode 100644 costa/native/external/lapack/drscl.f create mode 100644 costa/native/external/lapack/dsbev.f create mode 100644 costa/native/external/lapack/dsbevd.f create mode 100644 costa/native/external/lapack/dsbevx.f create mode 100644 costa/native/external/lapack/dsbgst.f create mode 100644 costa/native/external/lapack/dsbgv.f create mode 100644 costa/native/external/lapack/dsbgvd.f create mode 100644 costa/native/external/lapack/dsbgvx.f create mode 100644 costa/native/external/lapack/dsbtrd.f create mode 100644 costa/native/external/lapack/dsecnd.f create mode 100644 costa/native/external/lapack/dspcon.f create mode 100644 costa/native/external/lapack/dspev.f create mode 100644 costa/native/external/lapack/dspevd.f create mode 100644 costa/native/external/lapack/dspevx.f create mode 100644 costa/native/external/lapack/dspgst.f create mode 100644 costa/native/external/lapack/dspgv.f create mode 100644 costa/native/external/lapack/dspgvd.f create mode 100644 costa/native/external/lapack/dspgvx.f create mode 100644 costa/native/external/lapack/dsprfs.f create mode 100644 costa/native/external/lapack/dspsv.f create mode 100644 costa/native/external/lapack/dspsvx.f create mode 100644 costa/native/external/lapack/dsptrd.f create mode 100644 costa/native/external/lapack/dsptrf.f create mode 100644 costa/native/external/lapack/dsptri.f create mode 100644 costa/native/external/lapack/dsptrs.f create mode 100644 costa/native/external/lapack/dstebz.f create mode 100644 costa/native/external/lapack/dstedc.f create mode 100644 costa/native/external/lapack/dstegr.f create mode 100644 costa/native/external/lapack/dstein.f create mode 100644 costa/native/external/lapack/dsteqr.f create mode 100644 costa/native/external/lapack/dsterf.f create mode 100644 costa/native/external/lapack/dstev.f create mode 100644 costa/native/external/lapack/dstevd.f create mode 100644 costa/native/external/lapack/dstevr.f create mode 100644 costa/native/external/lapack/dstevx.f create mode 100644 costa/native/external/lapack/dsycon.f create mode 100644 costa/native/external/lapack/dsyev.f create mode 100644 costa/native/external/lapack/dsyevd.f create mode 100644 costa/native/external/lapack/dsyevr.f create mode 100644 costa/native/external/lapack/dsyevx.f create mode 100644 costa/native/external/lapack/dsygs2.f create mode 100644 costa/native/external/lapack/dsygst.f create mode 100644 costa/native/external/lapack/dsygv.f create mode 100644 costa/native/external/lapack/dsygvd.f create mode 100644 costa/native/external/lapack/dsygvx.f create mode 100644 costa/native/external/lapack/dsyrfs.f create mode 100644 costa/native/external/lapack/dsysv.f create mode 100644 costa/native/external/lapack/dsysvx.f create mode 100644 costa/native/external/lapack/dsytd2.f create mode 100644 costa/native/external/lapack/dsytf2.f create mode 100644 costa/native/external/lapack/dsytrd.f create mode 100644 costa/native/external/lapack/dsytrf.f create mode 100644 costa/native/external/lapack/dsytri.f create mode 100644 costa/native/external/lapack/dsytrs.f create mode 100644 costa/native/external/lapack/dtbcon.f create mode 100644 costa/native/external/lapack/dtbrfs.f create mode 100644 costa/native/external/lapack/dtbtrs.f create mode 100644 costa/native/external/lapack/dtgevc.f create mode 100644 costa/native/external/lapack/dtgex2.f create mode 100644 costa/native/external/lapack/dtgexc.f create mode 100644 costa/native/external/lapack/dtgsen.f create mode 100644 costa/native/external/lapack/dtgsja.f create mode 100644 costa/native/external/lapack/dtgsna.f create mode 100644 costa/native/external/lapack/dtgsy2.f create mode 100644 costa/native/external/lapack/dtgsyl.f create mode 100644 costa/native/external/lapack/dtpcon.f create mode 100644 costa/native/external/lapack/dtprfs.f create mode 100644 costa/native/external/lapack/dtptri.f create mode 100644 costa/native/external/lapack/dtptrs.f create mode 100644 costa/native/external/lapack/dtrcon.f create mode 100644 costa/native/external/lapack/dtrevc.f create mode 100644 costa/native/external/lapack/dtrexc.f create mode 100644 costa/native/external/lapack/dtrrfs.f create mode 100644 costa/native/external/lapack/dtrsen.f create mode 100644 costa/native/external/lapack/dtrsna.f create mode 100644 costa/native/external/lapack/dtrsyl.f create mode 100644 costa/native/external/lapack/dtrti2.f create mode 100644 costa/native/external/lapack/dtrtri.f create mode 100644 costa/native/external/lapack/dtrtrs.f create mode 100644 costa/native/external/lapack/dtzrqf.f create mode 100644 costa/native/external/lapack/dtzrzf.f create mode 100644 costa/native/external/lapack/dzsum1.f create mode 100644 costa/native/external/lapack/icmax1.f create mode 100644 costa/native/external/lapack/ieeeck.f create mode 100644 costa/native/external/lapack/ilaenv.f create mode 100644 costa/native/external/lapack/izmax1.f create mode 100644 costa/native/external/lapack/lsame.f create mode 100644 costa/native/external/lapack/lsamen.f create mode 100644 costa/native/external/lapack/sbdsdc.f create mode 100644 costa/native/external/lapack/sbdsqr.f create mode 100644 costa/native/external/lapack/scsum1.f create mode 100644 costa/native/external/lapack/sdisna.f create mode 100644 costa/native/external/lapack/second.f create mode 100644 costa/native/external/lapack/sgbbrd.f create mode 100644 costa/native/external/lapack/sgbcon.f create mode 100644 costa/native/external/lapack/sgbequ.f create mode 100644 costa/native/external/lapack/sgbrfs.f create mode 100644 costa/native/external/lapack/sgbsv.f create mode 100644 costa/native/external/lapack/sgbsvx.f create mode 100644 costa/native/external/lapack/sgbtf2.f create mode 100644 costa/native/external/lapack/sgbtrf.f create mode 100644 costa/native/external/lapack/sgbtrs.f create mode 100644 costa/native/external/lapack/sgebak.f create mode 100644 costa/native/external/lapack/sgebal.f create mode 100644 costa/native/external/lapack/sgebd2.f create mode 100644 costa/native/external/lapack/sgebrd.f create mode 100644 costa/native/external/lapack/sgecon.f create mode 100644 costa/native/external/lapack/sgeequ.f create mode 100644 costa/native/external/lapack/sgees.f create mode 100644 costa/native/external/lapack/sgeesx.f create mode 100644 costa/native/external/lapack/sgeev.f create mode 100644 costa/native/external/lapack/sgeevx.f create mode 100644 costa/native/external/lapack/sgegs.f create mode 100644 costa/native/external/lapack/sgegv.f create mode 100644 costa/native/external/lapack/sgehd2.f create mode 100644 costa/native/external/lapack/sgehrd.f create mode 100644 costa/native/external/lapack/sgelq2.f create mode 100644 costa/native/external/lapack/sgelqf.f create mode 100644 costa/native/external/lapack/sgels.f create mode 100644 costa/native/external/lapack/sgelsd.f create mode 100644 costa/native/external/lapack/sgelss.f create mode 100644 costa/native/external/lapack/sgelsx.f create mode 100644 costa/native/external/lapack/sgelsy.f create mode 100644 costa/native/external/lapack/sgeql2.f create mode 100644 costa/native/external/lapack/sgeqlf.f create mode 100644 costa/native/external/lapack/sgeqp3.f create mode 100644 costa/native/external/lapack/sgeqpf.f create mode 100644 costa/native/external/lapack/sgeqr2.f create mode 100644 costa/native/external/lapack/sgeqrf.f create mode 100644 costa/native/external/lapack/sgerfs.f create mode 100644 costa/native/external/lapack/sgerq2.f create mode 100644 costa/native/external/lapack/sgerqf.f create mode 100644 costa/native/external/lapack/sgesc2.f create mode 100644 costa/native/external/lapack/sgesdd.f create mode 100644 costa/native/external/lapack/sgesv.f create mode 100644 costa/native/external/lapack/sgesvd.f create mode 100644 costa/native/external/lapack/sgesvx.f create mode 100644 costa/native/external/lapack/sgetc2.f create mode 100644 costa/native/external/lapack/sgetf2.f create mode 100644 costa/native/external/lapack/sgetrf.f create mode 100644 costa/native/external/lapack/sgetri.f create mode 100644 costa/native/external/lapack/sgetrs.f create mode 100644 costa/native/external/lapack/sggbak.f create mode 100644 costa/native/external/lapack/sggbal.f create mode 100644 costa/native/external/lapack/sgges.f create mode 100644 costa/native/external/lapack/sggesx.f create mode 100644 costa/native/external/lapack/sggev.f create mode 100644 costa/native/external/lapack/sggevx.f create mode 100644 costa/native/external/lapack/sggglm.f create mode 100644 costa/native/external/lapack/sgghrd.f create mode 100644 costa/native/external/lapack/sgglse.f create mode 100644 costa/native/external/lapack/sggqrf.f create mode 100644 costa/native/external/lapack/sggrqf.f create mode 100644 costa/native/external/lapack/sggsvd.f create mode 100644 costa/native/external/lapack/sggsvp.f create mode 100644 costa/native/external/lapack/sgtcon.f create mode 100644 costa/native/external/lapack/sgtrfs.f create mode 100644 costa/native/external/lapack/sgtsv.f create mode 100644 costa/native/external/lapack/sgtsvx.f create mode 100644 costa/native/external/lapack/sgttrf.f create mode 100644 costa/native/external/lapack/sgttrs.f create mode 100644 costa/native/external/lapack/sgtts2.f create mode 100644 costa/native/external/lapack/shgeqz.f create mode 100644 costa/native/external/lapack/shsein.f create mode 100644 costa/native/external/lapack/shseqr.f create mode 100644 costa/native/external/lapack/slabad.f create mode 100644 costa/native/external/lapack/slabrd.f create mode 100644 costa/native/external/lapack/slacon.f create mode 100644 costa/native/external/lapack/slacpy.f create mode 100644 costa/native/external/lapack/sladiv.f create mode 100644 costa/native/external/lapack/slae2.f create mode 100644 costa/native/external/lapack/slaebz.f create mode 100644 costa/native/external/lapack/slaed0.f create mode 100644 costa/native/external/lapack/slaed1.f create mode 100644 costa/native/external/lapack/slaed2.f create mode 100644 costa/native/external/lapack/slaed3.f create mode 100644 costa/native/external/lapack/slaed4.f create mode 100644 costa/native/external/lapack/slaed5.f create mode 100644 costa/native/external/lapack/slaed6.f create mode 100644 costa/native/external/lapack/slaed7.f create mode 100644 costa/native/external/lapack/slaed8.f create mode 100644 costa/native/external/lapack/slaed9.f create mode 100644 costa/native/external/lapack/slaeda.f create mode 100644 costa/native/external/lapack/slaein.f create mode 100644 costa/native/external/lapack/slaev2.f create mode 100644 costa/native/external/lapack/slaexc.f create mode 100644 costa/native/external/lapack/slag2.f create mode 100644 costa/native/external/lapack/slags2.f create mode 100644 costa/native/external/lapack/slagtf.f create mode 100644 costa/native/external/lapack/slagtm.f create mode 100644 costa/native/external/lapack/slagts.f create mode 100644 costa/native/external/lapack/slagv2.f create mode 100644 costa/native/external/lapack/slahqr.f create mode 100644 costa/native/external/lapack/slahrd.f create mode 100644 costa/native/external/lapack/slaic1.f create mode 100644 costa/native/external/lapack/slaln2.f create mode 100644 costa/native/external/lapack/slals0.f create mode 100644 costa/native/external/lapack/slalsa.f create mode 100644 costa/native/external/lapack/slalsd.f create mode 100644 costa/native/external/lapack/slamch.f create mode 100644 costa/native/external/lapack/slamrg.f create mode 100644 costa/native/external/lapack/slangb.f create mode 100644 costa/native/external/lapack/slange.f create mode 100644 costa/native/external/lapack/slangt.f create mode 100644 costa/native/external/lapack/slanhs.f create mode 100644 costa/native/external/lapack/slansb.f create mode 100644 costa/native/external/lapack/slansp.f create mode 100644 costa/native/external/lapack/slanst.f create mode 100644 costa/native/external/lapack/slansy.f create mode 100644 costa/native/external/lapack/slantb.f create mode 100644 costa/native/external/lapack/slantp.f create mode 100644 costa/native/external/lapack/slantr.f create mode 100644 costa/native/external/lapack/slanv2.f create mode 100644 costa/native/external/lapack/slapll.f create mode 100644 costa/native/external/lapack/slapmt.f create mode 100644 costa/native/external/lapack/slapy2.f create mode 100644 costa/native/external/lapack/slapy3.f create mode 100644 costa/native/external/lapack/slaqgb.f create mode 100644 costa/native/external/lapack/slaqge.f create mode 100644 costa/native/external/lapack/slaqp2.f create mode 100644 costa/native/external/lapack/slaqps.f create mode 100644 costa/native/external/lapack/slaqsb.f create mode 100644 costa/native/external/lapack/slaqsp.f create mode 100644 costa/native/external/lapack/slaqsy.f create mode 100644 costa/native/external/lapack/slaqtr.f create mode 100644 costa/native/external/lapack/slar1v.f create mode 100644 costa/native/external/lapack/slar2v.f create mode 100644 costa/native/external/lapack/slarf.f create mode 100644 costa/native/external/lapack/slarfb.f create mode 100644 costa/native/external/lapack/slarfg.f create mode 100644 costa/native/external/lapack/slarft.f create mode 100644 costa/native/external/lapack/slarfx.f create mode 100644 costa/native/external/lapack/slargv.f create mode 100644 costa/native/external/lapack/slarnv.f create mode 100644 costa/native/external/lapack/slarrb.f create mode 100644 costa/native/external/lapack/slarre.f create mode 100644 costa/native/external/lapack/slarrf.f create mode 100644 costa/native/external/lapack/slarrv.f create mode 100644 costa/native/external/lapack/slartg.f create mode 100644 costa/native/external/lapack/slartv.f create mode 100644 costa/native/external/lapack/slaruv.f create mode 100644 costa/native/external/lapack/slarz.f create mode 100644 costa/native/external/lapack/slarzb.f create mode 100644 costa/native/external/lapack/slarzt.f create mode 100644 costa/native/external/lapack/slas2.f create mode 100644 costa/native/external/lapack/slascl.f create mode 100644 costa/native/external/lapack/slasd0.f create mode 100644 costa/native/external/lapack/slasd1.f create mode 100644 costa/native/external/lapack/slasd2.f create mode 100644 costa/native/external/lapack/slasd3.f create mode 100644 costa/native/external/lapack/slasd4.f create mode 100644 costa/native/external/lapack/slasd5.f create mode 100644 costa/native/external/lapack/slasd6.f create mode 100644 costa/native/external/lapack/slasd7.f create mode 100644 costa/native/external/lapack/slasd8.f create mode 100644 costa/native/external/lapack/slasd9.f create mode 100644 costa/native/external/lapack/slasda.f create mode 100644 costa/native/external/lapack/slasdq.f create mode 100644 costa/native/external/lapack/slasdt.f create mode 100644 costa/native/external/lapack/slaset.f create mode 100644 costa/native/external/lapack/slasq1.f create mode 100644 costa/native/external/lapack/slasq2.f create mode 100644 costa/native/external/lapack/slasq3.f create mode 100644 costa/native/external/lapack/slasq4.f create mode 100644 costa/native/external/lapack/slasq5.f create mode 100644 costa/native/external/lapack/slasq6.f create mode 100644 costa/native/external/lapack/slasr.f create mode 100644 costa/native/external/lapack/slasrt.f create mode 100644 costa/native/external/lapack/slassq.f create mode 100644 costa/native/external/lapack/slasv2.f create mode 100644 costa/native/external/lapack/slaswp.f create mode 100644 costa/native/external/lapack/slasy2.f create mode 100644 costa/native/external/lapack/slasyf.f create mode 100644 costa/native/external/lapack/slatbs.f create mode 100644 costa/native/external/lapack/slatdf.f create mode 100644 costa/native/external/lapack/slatps.f create mode 100644 costa/native/external/lapack/slatrd.f create mode 100644 costa/native/external/lapack/slatrs.f create mode 100644 costa/native/external/lapack/slatrz.f create mode 100644 costa/native/external/lapack/slatzm.f create mode 100644 costa/native/external/lapack/slauu2.f create mode 100644 costa/native/external/lapack/slauum.f create mode 100644 costa/native/external/lapack/sopgtr.f create mode 100644 costa/native/external/lapack/sopmtr.f create mode 100644 costa/native/external/lapack/sorg2l.f create mode 100644 costa/native/external/lapack/sorg2r.f create mode 100644 costa/native/external/lapack/sorgbr.f create mode 100644 costa/native/external/lapack/sorghr.f create mode 100644 costa/native/external/lapack/sorgl2.f create mode 100644 costa/native/external/lapack/sorglq.f create mode 100644 costa/native/external/lapack/sorgql.f create mode 100644 costa/native/external/lapack/sorgqr.f create mode 100644 costa/native/external/lapack/sorgr2.f create mode 100644 costa/native/external/lapack/sorgrq.f create mode 100644 costa/native/external/lapack/sorgtr.f create mode 100644 costa/native/external/lapack/sorm2l.f create mode 100644 costa/native/external/lapack/sorm2r.f create mode 100644 costa/native/external/lapack/sormbr.f create mode 100644 costa/native/external/lapack/sormhr.f create mode 100644 costa/native/external/lapack/sorml2.f create mode 100644 costa/native/external/lapack/sormlq.f create mode 100644 costa/native/external/lapack/sormql.f create mode 100644 costa/native/external/lapack/sormqr.f create mode 100644 costa/native/external/lapack/sormr2.f create mode 100644 costa/native/external/lapack/sormr3.f create mode 100644 costa/native/external/lapack/sormrq.f create mode 100644 costa/native/external/lapack/sormrz.f create mode 100644 costa/native/external/lapack/sormtr.f create mode 100644 costa/native/external/lapack/spbcon.f create mode 100644 costa/native/external/lapack/spbequ.f create mode 100644 costa/native/external/lapack/spbrfs.f create mode 100644 costa/native/external/lapack/spbstf.f create mode 100644 costa/native/external/lapack/spbsv.f create mode 100644 costa/native/external/lapack/spbsvx.f create mode 100644 costa/native/external/lapack/spbtf2.f create mode 100644 costa/native/external/lapack/spbtrf.f create mode 100644 costa/native/external/lapack/spbtrs.f create mode 100644 costa/native/external/lapack/spocon.f create mode 100644 costa/native/external/lapack/spoequ.f create mode 100644 costa/native/external/lapack/sporfs.f create mode 100644 costa/native/external/lapack/sposv.f create mode 100644 costa/native/external/lapack/sposvx.f create mode 100644 costa/native/external/lapack/spotf2.f create mode 100644 costa/native/external/lapack/spotrf.f create mode 100644 costa/native/external/lapack/spotri.f create mode 100644 costa/native/external/lapack/spotrs.f create mode 100644 costa/native/external/lapack/sppcon.f create mode 100644 costa/native/external/lapack/sppequ.f create mode 100644 costa/native/external/lapack/spprfs.f create mode 100644 costa/native/external/lapack/sppsv.f create mode 100644 costa/native/external/lapack/sppsvx.f create mode 100644 costa/native/external/lapack/spptrf.f create mode 100644 costa/native/external/lapack/spptri.f create mode 100644 costa/native/external/lapack/spptrs.f create mode 100644 costa/native/external/lapack/sptcon.f create mode 100644 costa/native/external/lapack/spteqr.f create mode 100644 costa/native/external/lapack/sptrfs.f create mode 100644 costa/native/external/lapack/sptsv.f create mode 100644 costa/native/external/lapack/sptsvx.f create mode 100644 costa/native/external/lapack/spttrf.f create mode 100644 costa/native/external/lapack/spttrs.f create mode 100644 costa/native/external/lapack/sptts2.f create mode 100644 costa/native/external/lapack/srscl.f create mode 100644 costa/native/external/lapack/ssbev.f create mode 100644 costa/native/external/lapack/ssbevd.f create mode 100644 costa/native/external/lapack/ssbevx.f create mode 100644 costa/native/external/lapack/ssbgst.f create mode 100644 costa/native/external/lapack/ssbgv.f create mode 100644 costa/native/external/lapack/ssbgvd.f create mode 100644 costa/native/external/lapack/ssbgvx.f create mode 100644 costa/native/external/lapack/ssbtrd.f create mode 100644 costa/native/external/lapack/sspcon.f create mode 100644 costa/native/external/lapack/sspev.f create mode 100644 costa/native/external/lapack/sspevd.f create mode 100644 costa/native/external/lapack/sspevx.f create mode 100644 costa/native/external/lapack/sspgst.f create mode 100644 costa/native/external/lapack/sspgv.f create mode 100644 costa/native/external/lapack/sspgvd.f create mode 100644 costa/native/external/lapack/sspgvx.f create mode 100644 costa/native/external/lapack/ssprfs.f create mode 100644 costa/native/external/lapack/sspsv.f create mode 100644 costa/native/external/lapack/sspsvx.f create mode 100644 costa/native/external/lapack/ssptrd.f create mode 100644 costa/native/external/lapack/ssptrf.f create mode 100644 costa/native/external/lapack/ssptri.f create mode 100644 costa/native/external/lapack/ssptrs.f create mode 100644 costa/native/external/lapack/sstebz.f create mode 100644 costa/native/external/lapack/sstedc.f create mode 100644 costa/native/external/lapack/sstegr.f create mode 100644 costa/native/external/lapack/sstein.f create mode 100644 costa/native/external/lapack/ssteqr.f create mode 100644 costa/native/external/lapack/ssterf.f create mode 100644 costa/native/external/lapack/sstev.f create mode 100644 costa/native/external/lapack/sstevd.f create mode 100644 costa/native/external/lapack/sstevr.f create mode 100644 costa/native/external/lapack/sstevx.f create mode 100644 costa/native/external/lapack/ssycon.f create mode 100644 costa/native/external/lapack/ssyev.f create mode 100644 costa/native/external/lapack/ssyevd.f create mode 100644 costa/native/external/lapack/ssyevr.f create mode 100644 costa/native/external/lapack/ssyevx.f create mode 100644 costa/native/external/lapack/ssygs2.f create mode 100644 costa/native/external/lapack/ssygst.f create mode 100644 costa/native/external/lapack/ssygv.f create mode 100644 costa/native/external/lapack/ssygvd.f create mode 100644 costa/native/external/lapack/ssygvx.f create mode 100644 costa/native/external/lapack/ssyrfs.f create mode 100644 costa/native/external/lapack/ssysv.f create mode 100644 costa/native/external/lapack/ssysvx.f create mode 100644 costa/native/external/lapack/ssytd2.f create mode 100644 costa/native/external/lapack/ssytf2.f create mode 100644 costa/native/external/lapack/ssytrd.f create mode 100644 costa/native/external/lapack/ssytrf.f create mode 100644 costa/native/external/lapack/ssytri.f create mode 100644 costa/native/external/lapack/ssytrs.f create mode 100644 costa/native/external/lapack/stbcon.f create mode 100644 costa/native/external/lapack/stbrfs.f create mode 100644 costa/native/external/lapack/stbtrs.f create mode 100644 costa/native/external/lapack/stgevc.f create mode 100644 costa/native/external/lapack/stgex2.f create mode 100644 costa/native/external/lapack/stgexc.f create mode 100644 costa/native/external/lapack/stgsen.f create mode 100644 costa/native/external/lapack/stgsja.f create mode 100644 costa/native/external/lapack/stgsna.f create mode 100644 costa/native/external/lapack/stgsy2.f create mode 100644 costa/native/external/lapack/stgsyl.f create mode 100644 costa/native/external/lapack/stpcon.f create mode 100644 costa/native/external/lapack/stprfs.f create mode 100644 costa/native/external/lapack/stptri.f create mode 100644 costa/native/external/lapack/stptrs.f create mode 100644 costa/native/external/lapack/strcon.f create mode 100644 costa/native/external/lapack/strevc.f create mode 100644 costa/native/external/lapack/strexc.f create mode 100644 costa/native/external/lapack/strrfs.f create mode 100644 costa/native/external/lapack/strsen.f create mode 100644 costa/native/external/lapack/strsna.f create mode 100644 costa/native/external/lapack/strsyl.f create mode 100644 costa/native/external/lapack/strti2.f create mode 100644 costa/native/external/lapack/strtri.f create mode 100644 costa/native/external/lapack/strtrs.f create mode 100644 costa/native/external/lapack/stzrqf.f create mode 100644 costa/native/external/lapack/stzrzf.f create mode 100644 costa/native/external/lapack/xerbla.f create mode 100644 costa/native/external/lapack/zbdsqr.f create mode 100644 costa/native/external/lapack/zdrot.f create mode 100644 costa/native/external/lapack/zdrscl.f create mode 100644 costa/native/external/lapack/zgbbrd.f create mode 100644 costa/native/external/lapack/zgbcon.f create mode 100644 costa/native/external/lapack/zgbequ.f create mode 100644 costa/native/external/lapack/zgbrfs.f create mode 100644 costa/native/external/lapack/zgbsv.f create mode 100644 costa/native/external/lapack/zgbsvx.f create mode 100644 costa/native/external/lapack/zgbtf2.f create mode 100644 costa/native/external/lapack/zgbtrf.f create mode 100644 costa/native/external/lapack/zgbtrs.f create mode 100644 costa/native/external/lapack/zgebak.f create mode 100644 costa/native/external/lapack/zgebal.f create mode 100644 costa/native/external/lapack/zgebd2.f create mode 100644 costa/native/external/lapack/zgebrd.f create mode 100644 costa/native/external/lapack/zgecon.f create mode 100644 costa/native/external/lapack/zgeequ.f create mode 100644 costa/native/external/lapack/zgees.f create mode 100644 costa/native/external/lapack/zgeesx.f create mode 100644 costa/native/external/lapack/zgeev.f create mode 100644 costa/native/external/lapack/zgeevx.f create mode 100644 costa/native/external/lapack/zgegs.f create mode 100644 costa/native/external/lapack/zgegv.f create mode 100644 costa/native/external/lapack/zgehd2.f create mode 100644 costa/native/external/lapack/zgehrd.f create mode 100644 costa/native/external/lapack/zgelq2.f create mode 100644 costa/native/external/lapack/zgelqf.f create mode 100644 costa/native/external/lapack/zgels.f create mode 100644 costa/native/external/lapack/zgelsd.f create mode 100644 costa/native/external/lapack/zgelss.f create mode 100644 costa/native/external/lapack/zgelsx.f create mode 100644 costa/native/external/lapack/zgelsy.f create mode 100644 costa/native/external/lapack/zgeql2.f create mode 100644 costa/native/external/lapack/zgeqlf.f create mode 100644 costa/native/external/lapack/zgeqp3.f create mode 100644 costa/native/external/lapack/zgeqpf.f create mode 100644 costa/native/external/lapack/zgeqr2.f create mode 100644 costa/native/external/lapack/zgeqrf.f create mode 100644 costa/native/external/lapack/zgerfs.f create mode 100644 costa/native/external/lapack/zgerq2.f create mode 100644 costa/native/external/lapack/zgerqf.f create mode 100644 costa/native/external/lapack/zgesc2.f create mode 100644 costa/native/external/lapack/zgesdd.f create mode 100644 costa/native/external/lapack/zgesv.f create mode 100644 costa/native/external/lapack/zgesvd.f create mode 100644 costa/native/external/lapack/zgesvx.f create mode 100644 costa/native/external/lapack/zgetc2.f create mode 100644 costa/native/external/lapack/zgetf2.f create mode 100644 costa/native/external/lapack/zgetrf.f create mode 100644 costa/native/external/lapack/zgetri.f create mode 100644 costa/native/external/lapack/zgetrs.f create mode 100644 costa/native/external/lapack/zggbak.f create mode 100644 costa/native/external/lapack/zggbal.f create mode 100644 costa/native/external/lapack/zgges.f create mode 100644 costa/native/external/lapack/zggesx.f create mode 100644 costa/native/external/lapack/zggev.f create mode 100644 costa/native/external/lapack/zggevx.f create mode 100644 costa/native/external/lapack/zggglm.f create mode 100644 costa/native/external/lapack/zgghrd.f create mode 100644 costa/native/external/lapack/zgglse.f create mode 100644 costa/native/external/lapack/zggqrf.f create mode 100644 costa/native/external/lapack/zggrqf.f create mode 100644 costa/native/external/lapack/zggsvd.f create mode 100644 costa/native/external/lapack/zggsvp.f create mode 100644 costa/native/external/lapack/zgtcon.f create mode 100644 costa/native/external/lapack/zgtrfs.f create mode 100644 costa/native/external/lapack/zgtsv.f create mode 100644 costa/native/external/lapack/zgtsvx.f create mode 100644 costa/native/external/lapack/zgttrf.f create mode 100644 costa/native/external/lapack/zgttrs.f create mode 100644 costa/native/external/lapack/zgtts2.f create mode 100644 costa/native/external/lapack/zhbev.f create mode 100644 costa/native/external/lapack/zhbevd.f create mode 100644 costa/native/external/lapack/zhbevx.f create mode 100644 costa/native/external/lapack/zhbgst.f create mode 100644 costa/native/external/lapack/zhbgv.f create mode 100644 costa/native/external/lapack/zhbgvd.f create mode 100644 costa/native/external/lapack/zhbgvx.f create mode 100644 costa/native/external/lapack/zhbtrd.f create mode 100644 costa/native/external/lapack/zhecon.f create mode 100644 costa/native/external/lapack/zheev.f create mode 100644 costa/native/external/lapack/zheevd.f create mode 100644 costa/native/external/lapack/zheevr.f create mode 100644 costa/native/external/lapack/zheevx.f create mode 100644 costa/native/external/lapack/zhegs2.f create mode 100644 costa/native/external/lapack/zhegst.f create mode 100644 costa/native/external/lapack/zhegv.f create mode 100644 costa/native/external/lapack/zhegvd.f create mode 100644 costa/native/external/lapack/zhegvx.f create mode 100644 costa/native/external/lapack/zherfs.f create mode 100644 costa/native/external/lapack/zhesv.f create mode 100644 costa/native/external/lapack/zhesvx.f create mode 100644 costa/native/external/lapack/zhetd2.f create mode 100644 costa/native/external/lapack/zhetf2.f create mode 100644 costa/native/external/lapack/zhetrd.f create mode 100644 costa/native/external/lapack/zhetrf.f create mode 100644 costa/native/external/lapack/zhetri.f create mode 100644 costa/native/external/lapack/zhetrs.f create mode 100644 costa/native/external/lapack/zhgeqz.f create mode 100644 costa/native/external/lapack/zhpcon.f create mode 100644 costa/native/external/lapack/zhpev.f create mode 100644 costa/native/external/lapack/zhpevd.f create mode 100644 costa/native/external/lapack/zhpevx.f create mode 100644 costa/native/external/lapack/zhpgst.f create mode 100644 costa/native/external/lapack/zhpgv.f create mode 100644 costa/native/external/lapack/zhpgvd.f create mode 100644 costa/native/external/lapack/zhpgvx.f create mode 100644 costa/native/external/lapack/zhprfs.f create mode 100644 costa/native/external/lapack/zhpsv.f create mode 100644 costa/native/external/lapack/zhpsvx.f create mode 100644 costa/native/external/lapack/zhptrd.f create mode 100644 costa/native/external/lapack/zhptrf.f create mode 100644 costa/native/external/lapack/zhptri.f create mode 100644 costa/native/external/lapack/zhptrs.f create mode 100644 costa/native/external/lapack/zhsein.f create mode 100644 costa/native/external/lapack/zhseqr.f create mode 100644 costa/native/external/lapack/zlabrd.f create mode 100644 costa/native/external/lapack/zlacgv.f create mode 100644 costa/native/external/lapack/zlacon.f create mode 100644 costa/native/external/lapack/zlacp2.f create mode 100644 costa/native/external/lapack/zlacpy.f create mode 100644 costa/native/external/lapack/zlacrm.f create mode 100644 costa/native/external/lapack/zlacrt.f create mode 100644 costa/native/external/lapack/zladiv.f create mode 100644 costa/native/external/lapack/zlaed0.f create mode 100644 costa/native/external/lapack/zlaed7.f create mode 100644 costa/native/external/lapack/zlaed8.f create mode 100644 costa/native/external/lapack/zlaein.f create mode 100644 costa/native/external/lapack/zlaesy.f create mode 100644 costa/native/external/lapack/zlaev2.f create mode 100644 costa/native/external/lapack/zlags2.f create mode 100644 costa/native/external/lapack/zlagtm.f create mode 100644 costa/native/external/lapack/zlahef.f create mode 100644 costa/native/external/lapack/zlahqr.f create mode 100644 costa/native/external/lapack/zlahrd.f create mode 100644 costa/native/external/lapack/zlaic1.f create mode 100644 costa/native/external/lapack/zlals0.f create mode 100644 costa/native/external/lapack/zlalsa.f create mode 100644 costa/native/external/lapack/zlalsd.f create mode 100644 costa/native/external/lapack/zlangb.f create mode 100644 costa/native/external/lapack/zlange.f create mode 100644 costa/native/external/lapack/zlangt.f create mode 100644 costa/native/external/lapack/zlanhb.f create mode 100644 costa/native/external/lapack/zlanhe.f create mode 100644 costa/native/external/lapack/zlanhp.f create mode 100644 costa/native/external/lapack/zlanhs.f create mode 100644 costa/native/external/lapack/zlanht.f create mode 100644 costa/native/external/lapack/zlansb.f create mode 100644 costa/native/external/lapack/zlansp.f create mode 100644 costa/native/external/lapack/zlansy.f create mode 100644 costa/native/external/lapack/zlantb.f create mode 100644 costa/native/external/lapack/zlantp.f create mode 100644 costa/native/external/lapack/zlantr.f create mode 100644 costa/native/external/lapack/zlapll.f create mode 100644 costa/native/external/lapack/zlapmt.f create mode 100644 costa/native/external/lapack/zlaqgb.f create mode 100644 costa/native/external/lapack/zlaqge.f create mode 100644 costa/native/external/lapack/zlaqhb.f create mode 100644 costa/native/external/lapack/zlaqhe.f create mode 100644 costa/native/external/lapack/zlaqhp.f create mode 100644 costa/native/external/lapack/zlaqp2.f create mode 100644 costa/native/external/lapack/zlaqps.f create mode 100644 costa/native/external/lapack/zlaqsb.f create mode 100644 costa/native/external/lapack/zlaqsp.f create mode 100644 costa/native/external/lapack/zlaqsy.f create mode 100644 costa/native/external/lapack/zlar1v.f create mode 100644 costa/native/external/lapack/zlar2v.f create mode 100644 costa/native/external/lapack/zlarcm.f create mode 100644 costa/native/external/lapack/zlarf.f create mode 100644 costa/native/external/lapack/zlarfb.f create mode 100644 costa/native/external/lapack/zlarfg.f create mode 100644 costa/native/external/lapack/zlarft.f create mode 100644 costa/native/external/lapack/zlarfx.f create mode 100644 costa/native/external/lapack/zlargv.f create mode 100644 costa/native/external/lapack/zlarnv.f create mode 100644 costa/native/external/lapack/zlarrv.f create mode 100644 costa/native/external/lapack/zlartg.f create mode 100644 costa/native/external/lapack/zlartv.f create mode 100644 costa/native/external/lapack/zlarz.f create mode 100644 costa/native/external/lapack/zlarzb.f create mode 100644 costa/native/external/lapack/zlarzt.f create mode 100644 costa/native/external/lapack/zlascl.f create mode 100644 costa/native/external/lapack/zlaset.f create mode 100644 costa/native/external/lapack/zlasr.f create mode 100644 costa/native/external/lapack/zlassq.f create mode 100644 costa/native/external/lapack/zlaswp.f create mode 100644 costa/native/external/lapack/zlasyf.f create mode 100644 costa/native/external/lapack/zlatbs.f create mode 100644 costa/native/external/lapack/zlatdf.f create mode 100644 costa/native/external/lapack/zlatps.f create mode 100644 costa/native/external/lapack/zlatrd.f create mode 100644 costa/native/external/lapack/zlatrs.f create mode 100644 costa/native/external/lapack/zlatrz.f create mode 100644 costa/native/external/lapack/zlatzm.f create mode 100644 costa/native/external/lapack/zlauu2.f create mode 100644 costa/native/external/lapack/zlauum.f create mode 100644 costa/native/external/lapack/zpbcon.f create mode 100644 costa/native/external/lapack/zpbequ.f create mode 100644 costa/native/external/lapack/zpbrfs.f create mode 100644 costa/native/external/lapack/zpbstf.f create mode 100644 costa/native/external/lapack/zpbsv.f create mode 100644 costa/native/external/lapack/zpbsvx.f create mode 100644 costa/native/external/lapack/zpbtf2.f create mode 100644 costa/native/external/lapack/zpbtrf.f create mode 100644 costa/native/external/lapack/zpbtrs.f create mode 100644 costa/native/external/lapack/zpocon.f create mode 100644 costa/native/external/lapack/zpoequ.f create mode 100644 costa/native/external/lapack/zporfs.f create mode 100644 costa/native/external/lapack/zposv.f create mode 100644 costa/native/external/lapack/zposvx.f create mode 100644 costa/native/external/lapack/zpotf2.f create mode 100644 costa/native/external/lapack/zpotrf.f create mode 100644 costa/native/external/lapack/zpotri.f create mode 100644 costa/native/external/lapack/zpotrs.f create mode 100644 costa/native/external/lapack/zppcon.f create mode 100644 costa/native/external/lapack/zppequ.f create mode 100644 costa/native/external/lapack/zpprfs.f create mode 100644 costa/native/external/lapack/zppsv.f create mode 100644 costa/native/external/lapack/zppsvx.f create mode 100644 costa/native/external/lapack/zpptrf.f create mode 100644 costa/native/external/lapack/zpptri.f create mode 100644 costa/native/external/lapack/zpptrs.f create mode 100644 costa/native/external/lapack/zptcon.f create mode 100644 costa/native/external/lapack/zpteqr.f create mode 100644 costa/native/external/lapack/zptrfs.f create mode 100644 costa/native/external/lapack/zptsv.f create mode 100644 costa/native/external/lapack/zptsvx.f create mode 100644 costa/native/external/lapack/zpttrf.f create mode 100644 costa/native/external/lapack/zpttrs.f create mode 100644 costa/native/external/lapack/zptts2.f create mode 100644 costa/native/external/lapack/zrot.f create mode 100644 costa/native/external/lapack/zspcon.f create mode 100644 costa/native/external/lapack/zspmv.f create mode 100644 costa/native/external/lapack/zspr.f create mode 100644 costa/native/external/lapack/zsprfs.f create mode 100644 costa/native/external/lapack/zspsv.f create mode 100644 costa/native/external/lapack/zspsvx.f create mode 100644 costa/native/external/lapack/zsptrf.f create mode 100644 costa/native/external/lapack/zsptri.f create mode 100644 costa/native/external/lapack/zsptrs.f create mode 100644 costa/native/external/lapack/zstedc.f create mode 100644 costa/native/external/lapack/zstegr.f create mode 100644 costa/native/external/lapack/zstein.f create mode 100644 costa/native/external/lapack/zsteqr.f create mode 100644 costa/native/external/lapack/zsycon.f create mode 100644 costa/native/external/lapack/zsymv.f create mode 100644 costa/native/external/lapack/zsyr.f create mode 100644 costa/native/external/lapack/zsyrfs.f create mode 100644 costa/native/external/lapack/zsysv.f create mode 100644 costa/native/external/lapack/zsysvx.f create mode 100644 costa/native/external/lapack/zsytf2.f create mode 100644 costa/native/external/lapack/zsytrf.f create mode 100644 costa/native/external/lapack/zsytri.f create mode 100644 costa/native/external/lapack/zsytrs.f create mode 100644 costa/native/external/lapack/ztbcon.f create mode 100644 costa/native/external/lapack/ztbrfs.f create mode 100644 costa/native/external/lapack/ztbtrs.f create mode 100644 costa/native/external/lapack/ztgevc.f create mode 100644 costa/native/external/lapack/ztgex2.f create mode 100644 costa/native/external/lapack/ztgexc.f create mode 100644 costa/native/external/lapack/ztgsen.f create mode 100644 costa/native/external/lapack/ztgsja.f create mode 100644 costa/native/external/lapack/ztgsna.f create mode 100644 costa/native/external/lapack/ztgsy2.f create mode 100644 costa/native/external/lapack/ztgsyl.f create mode 100644 costa/native/external/lapack/ztpcon.f create mode 100644 costa/native/external/lapack/ztprfs.f create mode 100644 costa/native/external/lapack/ztptri.f create mode 100644 costa/native/external/lapack/ztptrs.f create mode 100644 costa/native/external/lapack/ztrcon.f create mode 100644 costa/native/external/lapack/ztrevc.f create mode 100644 costa/native/external/lapack/ztrexc.f create mode 100644 costa/native/external/lapack/ztrrfs.f create mode 100644 costa/native/external/lapack/ztrsen.f create mode 100644 costa/native/external/lapack/ztrsna.f create mode 100644 costa/native/external/lapack/ztrsyl.f create mode 100644 costa/native/external/lapack/ztrti2.f create mode 100644 costa/native/external/lapack/ztrtri.f create mode 100644 costa/native/external/lapack/ztrtrs.f create mode 100644 costa/native/external/lapack/ztzrqf.f create mode 100644 costa/native/external/lapack/ztzrzf.f create mode 100644 costa/native/external/lapack/zung2l.f create mode 100644 costa/native/external/lapack/zung2r.f create mode 100644 costa/native/external/lapack/zungbr.f create mode 100644 costa/native/external/lapack/zunghr.f create mode 100644 costa/native/external/lapack/zungl2.f create mode 100644 costa/native/external/lapack/zunglq.f create mode 100644 costa/native/external/lapack/zungql.f create mode 100644 costa/native/external/lapack/zungqr.f create mode 100644 costa/native/external/lapack/zungr2.f create mode 100644 costa/native/external/lapack/zungrq.f create mode 100644 costa/native/external/lapack/zungtr.f create mode 100644 costa/native/external/lapack/zunm2l.f create mode 100644 costa/native/external/lapack/zunm2r.f create mode 100644 costa/native/external/lapack/zunmbr.f create mode 100644 costa/native/external/lapack/zunmhr.f create mode 100644 costa/native/external/lapack/zunml2.f create mode 100644 costa/native/external/lapack/zunmlq.f create mode 100644 costa/native/external/lapack/zunmql.f create mode 100644 costa/native/external/lapack/zunmqr.f create mode 100644 costa/native/external/lapack/zunmr2.f create mode 100644 costa/native/external/lapack/zunmr3.f create mode 100644 costa/native/external/lapack/zunmrq.f create mode 100644 costa/native/external/lapack/zunmrz.f create mode 100644 costa/native/external/lapack/zunmtr.f create mode 100644 costa/native/external/lapack/zupgtr.f create mode 100644 costa/native/external/lapack/zupmtr.f diff --git a/costa/native/cta/cta_util_timing.f90 b/costa/native/cta/cta_util_timing.f90 new file mode 100644 index 000000000..8d8727cee --- /dev/null +++ b/costa/native/cta/cta_util_timing.f90 @@ -0,0 +1,89 @@ +! +! $URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_util_timing.f90 $ +! $Revision: 680 $, $Date: 2008-10-10 17:01:17 +0200 (Fri, 10 Oct 2008) $ +! +!COSTA: Problem solving environment for data assimilation +!Copyright (C) 2008 Nils van Velzen +! +!This library is free software; you can redistribute it and/or +!modify it under the terms of the GNU Lesser General Public +!License as published by the Free Software Foundation; either +!version 2.1 of the License, or (at your option) any later version. +! +!This library is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +!Lesser General Public License for more details. +! +!You should have received a copy of the GNU Lesser General Public +!License along with this library; if not, write to the Free Software +!Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +!DEC$ ATTRIBUTES DLLEXPORT::cta_util_timing +subroutine cta_util_timing(dtimes,itime,start) +implicit none +real(kind=8), dimension(2,3,*), intent(inout) ::dtimes +integer, intent(in) ::itime +integer, intent(in) ::start + +real(kind=8) ::wtime,ctime + +call cta_util_timing_walltime(wtime) +call cpu_time(ctime) +if (start>0) then + dtimes(1,1,itime)=wtime + dtimes(2,1,itime)=ctime +else + dtimes(1,2,itime)=wtime + dtimes(2,2,itime)=ctime + dtimes(1,3,itime)=dtimes(1,3,itime)+(dtimes(1,2,itime)-dtimes(1,1,itime)) + dtimes(2,3,itime)=dtimes(2,3,itime)+(dtimes(2,2,itime)-dtimes(2,1,itime)) +endif + + +end subroutine cta_util_timing + + +subroutine cta_util_timing_cputime(dtime) +implicit none +real(kind=8), intent(out) ::dtime + +call cpu_time(dtime) + +end subroutine cta_util_timing_cputime + + + + +subroutine cta_util_timing_walltime(dtime) +implicit none +real(kind=8), intent(out) ::dtime + +integer, dimension(8) ::times +integer ::year,month,day,hour,minutes,seconds,milisec,julnum +integer ::imon1 + + +call date_and_time(values=times) +year =times(1) +month =times(2) +day =times(3) +hour =times(5) +minutes=times(6) +seconds=times(7) +milisec=times(8) + +!calculate julian day number +imon1 = (month-14)/12; +julnum = day - 32075 + 1461 * ( year + 4800 + imon1 ) / 4 & + + 367 * ( month - 2 - imon1 * 12 ) / 12 & + - 3 * ( ( year + 4900 + imon1 ) / 100 ) / 4; + +dtime=(((dble(julnum)*24.0d0+dble(hour))*60.0d0+dble(minutes))*60.0d0)+dble(seconds)+dble(milisec)*1.0d-3 + + +end subroutine cta_util_timing_walltime + + + diff --git a/costa/native/cta/include/cta.h b/costa/native/cta/include/cta.h new file mode 100644 index 000000000..bde7d2814 --- /dev/null +++ b/costa/native/cta/include/cta.h @@ -0,0 +1,57 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** + \mainpage +TODO: fill the main page of the doxygen documentation + +**/ + +#ifndef CTA_H +#define CTA_H +#include "cta_initialise.h" +#include "cta_datatypes.h" +#include "cta_interface.h" +#include "cta_functions.h" +#include "cta_file.h" +#include "cta_string.h" +#include "cta_vector.h" +#include "cta_vector_blas.h" +#include "cta_tree.h" +#include "cta_matrix.h" +#include "cta_matrix_blas.h" +#include "cta_sobs.h" +//#include "cta_sobs_sqlite3.h" +#include "cta_treevector.h" +#include "cta_model.h" +#include "cta_model_factory.h" +#include "cta_errors.h" +#include "cta_xml.h" +#include "cta_defaults.h" +#include "cta_modbuild_sp.h" +#include "cta_modbuild_par.h" +#include "cta_pack.h" +#include "cta_metainfo.h" +#include "cta_modelcombiner.h" +#include "cta_util_sort.h" +#include "cta_util_statistics.h" +#include "cta_util_methods.h" +#include "cta_message.h" + +#endif diff --git a/costa/native/cta/include/cta_array.h b/costa/native/cta/include/cta_array.h new file mode 100644 index 000000000..5d6bac3b0 --- /dev/null +++ b/costa/native/cta/include/cta_array.h @@ -0,0 +1,317 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2012 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_array.h +\brief Interface description of array component. + + An array in OpenDA is an N-dimensional Matrix of values. + In a similar wat as in Matlab and Octave a vector is a special case of a Matrix/Array. + +*/ + +#ifndef CTA_ARRAY_H +#define CTA_ARRAY_H +#include "cta_system.h" +#include "cta_handles.h" +#include "cta_datatypes.h" +/* Function Handle */ +#ifdef __cplusplus +extern "C" { +#endif + +typedef CTA_Handle CTA_Array; + +/** \brief Free Array instance + * + * \param h IO handle of Array instance CTA_NULL on return + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Array_Free(CTA_Array *h); + +/** \brief Create a new Array instance and fill with given values + * + * \param values I values of new Array + * \param nDimensions I Number of dimensions of new array + * \param dimensions I Number of elements in each dimension + * \param h O Array handle of new instance + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Array_CreateAsDoubles(double *values, int nDimensions, int *dimensions, CTA_Array *h); + +/** \brief Get number of dimensions, eg 1 for a vector and 2 for a matrix + * \param h I Array handle + * \return rank of the array + */ +CTAEXPORT int CTA_Array_getNumberOfDimensions(CTA_Array h); + +/** \brief Get the sizes of the individual dimensions + * \param h I Array handle + * \param nDimensions O size of array, eg 2 for the array [[3,4,10],[3,4,10]] + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Array_getnDimensions(CTA_Array h, int *nDimensions); + +/** \brief Get the size of the array for each dimension. + * The length of the return value equals the number of dimensions. + * + * \param h I Array handle + * \param dimensions O size of array, eg [3,4,10] + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Array_getDimensions(CTA_Array h, int *dimensions); + +/** \brief Total number of elements in an array. + * This is equal to the product of the values returned by getDimension and is thus just for c + * + * \param h I Array handle + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Array_length(CTA_Array h); + +/** \brief Get all values of an array. + * The values are guaranteed be a copy if + * copyValues==true or may be a reference if copyValues==false. + * Note that the values are never guaranteed to be a reference and can not be used + * to change the array. + * + * \param h I Array handle + * \param values O Values from the array + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Array_getValuesAsDoubles(CTA_Array h, double *values); + +/** \brief Get a part of the values from the array. + * Eg let a=[[1,2,3][4,5,6]] then getValuesAsDoubles(1,2) + * returns [2,3] + * Note that negative values are allowed to denote counting from the end, is -1 denotes the last value. + * \param h I Array handle + * \param firstIndex I firstIndex of selection + * \param lastIndex I lastIndex of selection + * \param values O Values from the array + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Array_getValuesAsDoubles_indexrange(CTA_Array h, int firstIndex, int lastIndex, double *values); + +/** \brief Get a value from an array for specific indices. + * Eg let a=[[1,2,3][4,5,6]] then getValueAsDouble(5) + * returns 6 + * Note that negative values are allowed to denote counting from the end, is -1 denotes the last value. + * \param h I Array handle + * \param index I index specifier + * \param value O double value at the specific indices + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Array_getValueAsDoubles_index(CTA_Array h, int index, double *value); + +/** \brief Get a value from an array for specific indices. + * Eg let a=[[1,2,3][4,5,6]] then getValueAsDouble([1,2]) + * returns 6 + * Note that negative values are allowed to denote counting from the end, is -1 denotes the last value. + * \param h I Array handle + * \param nIndices I length of array of indices indices + * \param indices I indices of value + * \param value O value from array + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Array_getValueAsDouble_indices(CTA_Array h, int nIndices, int* indices, double *value); + +/** \brief Set the values of this array. + * \param h I Array handle + * \param values I the values as an array of doubles + * \param length I length of array values + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Array_setValuesAsDoubles(CTA_Array h, double *values, int length); + +/** \brief Set a value from an array for specific indices. + * Eg let a=[[1,2,3][4,5,6]] then setValueAsDouble([1,2],60.) + * results in a=[[1,2,3][4,5,60.]] + * Note that negative values are allowed to denote counting from the end, is -1 denotes the last value. + * \param h I Array handle + * \param nIndices I length of array indices + * \param indices I index specifier + * \param value I values at the specific indices + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Array_setValueAsDouble_indices(CTA_Array h, int nIndices, int *indices, double value); + +/** \brief Set part of the values of this array. + * Let a=[[1,2,3][4,5,6]] then setValuesAsDoubles(1,2,[20,30]) will result in a=[[1,20,30][4,5,6]] + * Note that negative values are allowed to denote counting from the end, is -1 denotes the last value. + * \param h I Array handle + * \param firstIndex I specifies the start of the selection + * \param lastIndex I specifies the end of the selection + * \param values I that will replace the selected range of numbers + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Array_setValuesAsDoubles_indexrange(CTA_Array h, int firstIndex, int lastIndex, double *values); + +/** \brief Get a value from an array for specific indices. + * Eg let a=[[1,2,3][4,5,6]] then setValueAsDouble(5,60.) + * results in a=[[1,2,3][4,5,60.]] + * Note that negative values are allowed to denote counting from the end, is -1 denotes the last value. + * \param h I Array handle + * \param index I index specifier + * \param value I value that will replace the selected number + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Array_setValueAsDouble_index(CTA_Array h, int index, double value); + +/** \brief Set whole vector equal to a constant value. + * Note: This method can only be used if all elements of the vector + * have the same data type. + * \param h I Array handle + * \param value I value to set + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Array_setConstant(CTA_Array h, double value); + +/** \brief Perform a values += alpha * axpyValues operation on each value in this array. + * \param h I Array handle + * \param alpha I The alpha in state variable += alpha * vector. + * \param axpyValues I the values for the axpy-operation on all values in this array. + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Array_axpyOnValues(CTA_Array h, double alpha, double *axpyValues); + +/** \brief Multiply each value in this array with the corresponding multiplication factor. + * \param h I Array handle + * \param multiplicationFactors I the multiplication factors for all array values. + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Array_multiplyValues(CTA_Array h, double *multiplicationFactors); + +/** \brief Change the dimensions of an array. The new array should have the same length + * \param h I Array handle + * \param nDimensions I length of array dimensions + * \param dimensions I new dimensions of array + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Array_reshape(CTA_Array h, int nDimensions, int *dimensions); + + +/** \brief Get part of the array by selection of a subset in one dimension. + * Eg. a=[[1,2,3],[4,5,6]] a.getSlice(0,0) returns [1,2,3] + * Note that the number of dimensions IS reduced by one. + * \param h I Array handle + * \param dimension I Dimension to make selection in + * \param index I Element in dimension that is selected + * \param h_out O new Array with selected selection + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Array_getSlice(CTA_Array h, int dimension, int index, CTA_Array *h_out); + +/** \brief Get part of the array by selection of a subset in one dimension. + * Eg. a=[[1,2,3],[4,5,6],[7,8,9]] a.getSlice(0,0,1) returns [[1,2],[3,4]] + * Note that the number of dimensions is NOT reduced by one. + * \param h I Array handle + * \param dimension I Dimension to make selection in + * \param minIndex I start of range to select + * \param maxIndex I end of range to select + * \param h_out O new Array with selected selection + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Array_getSlice_range(CTA_Array h, int dimension, int minIndex, int maxIndex, CTA_Array *h_out); + +/** \brief Get part of the array by selection of a subset in one dimension. + * Eg. a=[[1,2,3],[4,5,6],[7,8,9]] a.getSlice(0,0,1) returns [1,2,3,4] + * \param h I Array handle + * \param dimension I Dimension to make selection in + * \param minIndex I start of range to select + * \param maxIndex I end of range to select + * \param values O selected values + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Array_getSliceAsDoubles_range(CTA_Array h, int dimension, int minIndex, int maxIndex, double *values); + + +/** \brief Set the values of a part of an array. + * Eg. a=[[1,2,3],[4,5,6],[7,8,9]] and a.setSlice([11,12,13],1,1) + * sets the second column a=[[1,11,3],[4,12,6],[7,13,9]] + * Note that the dimension of the slice is one smaller than for the array. + * \param h I Array handle + * \param slice I Values to set + * \param dimension I Dimension that is replaced + * \param index I Index of selected dimension to replace + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Array_setSliceAsDoubles(CTA_Array h, double *slice, int dimension, int index); + + +/** \brief Set the values of a part of an array. + * Eg. a=[[1,2,3],[4,5,6],[7,8,9]] and a.setSlice([11,12,13],1,1) + * sets the second column a=[[1,11,3],[4,12,6],[7,13,9]] + * Note that the dimension of the slice is one smaller than for the array. + * \param h I Array handle + * \param slice_h I Values to set + * \param dimension I Dimension that is replaced + * \param index I Index of selected dimension to replace + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Array_setSliceAsArray(CTA_Array h, CTA_Array slice_h, int dimension, int index); + +/** \brief Set the values of a part of an array. + * Eg. a=[[1,2,3],[4,5,6],[7,8,9]] and a.setSlice([[11,12,13],[14,15,16]],1,1,2) + * sets the second and third columns a=[[1,11,14],[4,12,15],[7,13,16]] + * Note that the dimension of the slice is the same as for the array. + * \param h I Array handle + * \param slice_h I Values to set + * \param dimension I Dimension that is replaced + * \param minIndex I Start of range in Dimension to be replaced + * \param maxIndex I End of range in Dimension to be replaced + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Array_setSliceAsArray_range(CTA_Array h, CTA_Array slice_h, int dimension, int minIndex, int maxIndex); + +/** \brief Set the values of a part of an array. + * Eg. a=[[1,2,3],[4,5,6],[7,8,9]] and a.setSlice([11,12,13,14,15,16],1,1,2) + * sets the second and third columns a=[[1,11,14],[4,12,15],[7,13,16]] + * Note that the dimension of the slice is the same as for the array. + * \param h I Array handle + * \param slice I Values to set + * \param dimension I Dimension that is replaced + * \param minIndex I Start of range in Dimension to be replaced + * \param maxIndex I End of range in Dimension to be replaced + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Array_setSliceAsDoubles_range(CTA_Array h, double *slice, int dimension, int minIndex, int maxIndex); + +/** \brief Convert indices in multiple dimensions to position in the one-dimensional array as + * returned eg by getValuesAsDoubles + * Eg. a=[[1,2,3],[4,5,6],[7,8,9]] then valueIndex([1,0]) returns 3 which points to the value 4 here. + * \param h I Array handle + * \param nIndices I length of indices + * \param indices I indices of element + * \param index O position of element + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Array_valueIndex(CTA_Array h, int nIndices, int *indices, int *index); + + +#ifdef __cplusplus +} +#endif + + + + + +#endif diff --git a/costa/native/cta/include/cta_bb_modbuild.h b/costa/native/cta/include/cta_bb_modbuild.h new file mode 100644 index 000000000..c0aa446ae --- /dev/null +++ b/costa/native/cta/include/cta_bb_modbuild.h @@ -0,0 +1,153 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_modbuild_b3.h +\brief Description of the COSTA blackbox component +*/ + +#ifndef CTA_MODBUILD_BB_H +#define CTA_MODBUILD_BB_H + +#include +#ifdef WIN32 +#include +#endif +#include "cta.h" +#include "cta_system.h" +#include "f_cta_utils.h" +#include "cta_datatypes.h" +#include "cta_model_utilities.h" +#include "cta_handles.h" +#include "cta_datetime.h" + +#define STRING_MAX 1024 /* maximum length of string */ + +#define BB_ENCODING ("utf-8") /* XML encodiing */ + + +typedef struct BB_Variable BB_Variable; +typedef BB_Variable * BB_VariablePntr; + +struct BB_Variable { + char *name; /* variable name */ + double value; /* variable value */ +}; + +typedef struct BB_Station BB_Station; +typedef BB_Station * BB_StationPntr; + +struct BB_Station { + char *name; /* station name */ + char *location; /* station location */ + char *filename; /* station filename */ + int nvariables; /* station number of variables */ + BB_VariablePntr *variables; /* station variables */ +}; + +typedef struct BB_Forcing BB_Forcing; +typedef BB_Forcing * BB_ForcingPntr; + +struct BB_Forcing { + char *name; /* forcing name */ + char *file; /* forcing file */ + char *element; /* forcing element */ + char *property; /* forcing property */ + char *item; /* forcing item */ + double value; /* forcing value */ +}; + +typedef struct BB_Parameter BB_Parameter; +typedef BB_Parameter * BB_ParameterPntr; + +struct BB_Parameter { + char *name; /* parameter name */ + char *file; /* parameter file */ + char *element; /* parameter element */ + char *property; /* parameter property */ + char *item; /* parameter item */ + double value; /* parameter value */ +}; + +typedef struct BB_StateExchange BB_StateExchange; +typedef BB_StateExchange * BB_StateExchangePntr; + +struct BB_StateExchange { + char *executable; /* name of blackbox executable */ + char *state2model; /* name of input file */ + char *model2state; /* name of output file */ + char *outputSteps; /* output steps 'last' or 'all' */ + char *state2model_file; /* filename to write state */ + char *model2state_file; /* filename to read state */ + char *initial_state_file; /* filename containing initial state */ + CTA_Func state2model_func; /* Handle to user function for writing state 2 model */ + CTA_Func model2state_func; /* Handle to user function for writing model 2 state */ + CTA_Func initial_state_func; /* Handle to user function for reading initial state */ + int nofquantities; /* number of quantities == number of substates */ + int quantsize; /* length of quantityvector */ + int nofdimensions; /* number of co-ordinate dimensions */ + CTA_String *hquantid; /* id of each quantity */ + CTA_String *hdimid; /* id of each dimension */ + int *dimlength; /* length of each dimension */ +}; + +typedef struct BB_Model BB_Model; +typedef BB_Model * BB_ModelPntr; + +struct BB_Model{ + char *type; /* model type */ + char *description; /* model description */ + double timestep; /* simulation timestep in seconds */ + int simulationNumber; /* simulation number */ + char *workingdir; /* working map of simulation */ + char *templatedir; /* map with simulation template */ + char *simulationMap; /* map with simulation results */ + int stateLength; /* length of the state vector */ + int nstations; /* number of stations */ + BB_StateExchangePntr stateexchange; /* pointer to the state exchange variables */ + BB_StationPntr *stations; /* pointer to the list of stations */ + int nparameters; /* number of parameters */ + BB_ParameterPntr *parameters; /* pointer to the list of parameters */ + int nforcings; /* number of forcings */ + BB_ForcingPntr *forcings; /* pointer to the list of forcings */ +}; + +static int BB_simulationNumber=0; // Number of the simulation + +#define BB_INDEX_THIS ( 0) /* Handle of instance */ +#define BB_INDEX_TIME ( 1) /* Time instance of model (state) */ +#define BB_INDEX_STATE ( 2) /* State vector of model */ +#define BB_INDEX_FORCINGS ( 3) /* Tree vector containing the forcings of the model */ +#define BB_INDEX_PARAMETERS ( 4) /* Tree vector of model parameters */ +#define BB_INDEX_USERDATA ( 5) /* Userdata */ + +#define BB_SIZE_DATABLK ( 6) + + +/** \brief Create the model class of the BB Black-box builder + * + * \note This is not a user function. It is called at initialization of the + * COSTA environment. + * + * \param modelcls O receives handle of the BB-modelbuilder class + */ +CTANOEXPORT void CTA_Modbuild_b3b_CreateClass(CTA_ModelClass *modelcls); + +#endif + diff --git a/costa/native/cta/include/cta_bb_modbuild_utils.h b/costa/native/cta/include/cta_bb_modbuild_utils.h new file mode 100644 index 000000000..ec7493325 --- /dev/null +++ b/costa/native/cta/include/cta_bb_modbuild_utils.h @@ -0,0 +1,171 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2006 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_modbuild_b3b_utils_h.h +\brief Description of the COSTA blackbox component utilisties +*/ + +#ifndef CTA_MODBUILD_BB_UTILS_H +#define CTA_MODBUILD_BB_UTILS_H + +#include +#include +#include +#include +#include + +#include "cta.h" +#include "cta_xml.h" +#include "cta_bb_modbuild.h" + +/** \brief Copy a string + * + * \param name I string to be copied + * + * \return copy of the string + */ +CTANOEXPORT char * BB_CopyText(const char *in); + +/** \brief Reallocates a block of memory + * + * \param memblock I Pointer to previously allocated memory block + * \param size I New size in bytes. + * + * \return pointer to the allocated space + */ +CTANOEXPORT void * BB_Realloc(void *memblock, size_t size); + +/** \brief Allocates a block of memory + * + * \param nelem I Number of elements + * \param elsize I Element size + * + * \return pointer to the allocated space + */ +CTANOEXPORT void * BB_Malloc(size_t nelem, size_t elsize); + +/** \brief Frees a block of memory that is currently allocated + * + * \param pntr I starting address of the memory block to be freed + * + * \return no return value + */ +CTANOEXPORT void BB_Free(void *pntr); + +/** \brief Frees a model that is currently allocated + * + * \param pntr I starting address of the model to be freed + * + * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE + */ +CTANOEXPORT int BB_Free_Model(BB_ModelPntr model); + +/** \brief Frees a stateexchange that is currently allocated + * + * \param pntr I starting address of the stateexchange to be freed + * + * \return no return value + */ +CTANOEXPORT void BB_Free_StateExchange(BB_StateExchangePntr stateexchange); + +/** \brief Frees a parameter that is currently allocated + * + * \param pntr I starting address of the parameter to be freed + * \return no return value + */ +CTANOEXPORT void BB_Free_Parameter(BB_ParameterPntr parameter); + +/** \brief Frees a forcing that is currently allocated + * + * \param pntr I starting address of the forcing to be freed + * \return no return value + */ +CTANOEXPORT void BB_Free_Forcing(BB_ForcingPntr forcings); + +/** \brief Frees a station that is currently allocated + * + * \param pntr I starting address of the station to be freed + * \return no return value + */ +CTANOEXPORT void BB_Free_Station(BB_StationPntr stations); + +/** \brief Convert COSTA state vector to BB model + * + * \param model I Pointer naar de model data + * \param start I Simulatie starttijd in modified julian day + * \param stop I Simulatie stoptijd in modified julian day + * \param sstate I Model state to be written + * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE + */ +CTANOEXPORT int BB_State2Model(BB_ModelPntr model, double start, double stop, CTA_TreeVector sstate); + +/** \brief Convert BB model to COSTA state vector + * + * \param model I Pointer naar de model data + * \param start I Simulatie starttijd in modified julian day + * \param stop I Simulatie stoptijd in modified julian day + * \param sstate O Pointer naar state vector + * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE + */ +CTANOEXPORT int BB_Model2State(BB_ModelPntr model, double start, double stop, CTA_TreeVector *sstate); + +/** \brief Open model2state file + * + * \param model I Pointer naar de model data + * \param htree O Pointer naar costa tree + * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE + */ +CTANOEXPORT int BB_OpenModel2State(BB_ModelPntr model, CTA_Tree *htree); + +/** \brief Get value of BB model + * + * \param model I Pointer naar de model data + * \param htree I Pointer naar costa tree + * \param station I Name of the station + * \param variable I Name of the variabele + * \param time I tijdstip in MJD + * \param value O waarde van de variabele op tijdstip van station + * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE + */ +CTANOEXPORT int BB_Model2StateByStationVariableTime(BB_ModelPntr model, CTA_Tree htree, char *station, char *variable, double time, double *value); + +/** \brief + * + * \param model I Pointer naar de model data + * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE + */ +CTANOEXPORT int BB_Run_Model(BB_ModelPntr model); + +/** \brief + * + * \param + * \return + */ +CTANOEXPORT BB_ModelPntr BB_Read_Model(int simulationNumber, CTA_Tree hmodelinput, CTA_Handle *sstate, CTA_Handle *sparam, CTA_Handle *sforc); + +/** \brief Check for a COSTA error + * + * \param ierr I COSTA error code + * \param msg I Error message + * \return no return value + */ +CTANOEXPORT void BB_CheckError(int ierr, const char *msg); + +#endif diff --git a/costa/native/cta/include/cta_datatypes.h b/costa/native/cta/include/cta_datatypes.h new file mode 100644 index 000000000..457d278c4 --- /dev/null +++ b/costa/native/cta/include/cta_datatypes.h @@ -0,0 +1,208 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_datatypes.h +\brief Definitions of constants and datatypes that are publically used in COSTA +*/ + + +#ifndef CTA_DATA_TYPS_H +#define CTA_DATA_TYPS_H +#include +#include +#include "cta_system.h" + +/** The datatpe of a COSTA Datatype */ +typedef int CTA_Datatype; + +/** A COSTA-function CTA_Function *function + is a void-pointer. If it points to an + existing function, it may be used to call + that function, simply by + retval = function(arguments); +*/ +typedef void CTA_Function(); + +/*! handle to a COSTA object */ +#define CTA_HANDLE (-1) +/*! A handle of a COSTA interface object */ +#define CTA_INTERFACE (-2) +/*! A handle of a COSTA function object */ +#define CTA_FUNCTION (-3) +/*! A handle of a COSTA vector object */ +#define CTA_VECTOR (-4) +/*! A handle of a COSTA Vector class */ +#define CTA_VECTORCLASS (-5) +/*! A handle of a COSTA (sub) treevector */ +#define CTA_TREEVECTOR (-6) +/*! A handle of a COSTA Matrix class */ +#define CTA_MATRIXCLASS (-7) +/*! A handle of a COSTA Matrix */ +#define CTA_MATRIX (-8) +/*! A handle of a COSTA Covariance matrix class */ +#define CTA_COVMATCLASS (-9) +/*! A handle of a COSTA Covariance matrix */ +#define CTA_COVMAT (-10) +/*! A handle of a COSTA Interpolation matrix */ +#define CTA_INTPOL (-11) +/*! A handle of a COSTA observation component */ +#define CTA_OBS (-12) +/*! A handle of a COSTA Model class */ +#define CTA_MODELCLASS (-13) +/*! A handle of a COSTA Model */ +#define CTA_MODEL (-14) +/*! A handle of a COSTA Time */ +#define CTA_TIME (-15) +/*! A handle of a COSTA StochObserver object */ +#define CTA_SOBS (-16) +/*! A handle of a COSTA StochObserver class */ +#define CTA_SOBSCLASS (-17) +/*! A handle of a COSTA ObserverDescription object */ +#define CTA_OBSDESCR (-18) +/*! A handle of a COSTA ObserverDescription class */ +#define CTA_OBSDESCRCLASS (CTA_SOBSCLASS) +/*! A handle of a COSTA Method object */ +#define CTA_METHODCLASS (-19) +/*! A handle of a COSTA Method object */ +#define CTA_METHOD (-20) +/*! A handle of a COSTA tree object */ +#define CTA_TREE (-21) +/*! A handle of a COSTA Pack-object */ +#define CTA_PACK (-22) +/*! A handle to an unknown data object */ +#define CTA_DATABLOCK (-23) +/*! A handle of a COSTA meta information object */ +#define CTA_METAINFO (-24) +/*! A handle of a COSTA meta information object */ +#define CTA_METAINFOCLASS (-25) +/*! A handle of a COSTA relation table object */ +#define CTA_RELTABLE (-26) +/*! An explicit subtreevector (for XML-use) */ +#define CTA_SUBTREEVECTOR (-27) +/*! CTA_Datatypes constant: COSTA-string */ +#define CTA_STRING (-28) +/*! CTA_Datatypes constant: COSTA-file */ +#define CTA_FILE (-29) +/*! CTA_Datatypes constant: COSTA-array */ +#define CTA_ARRAY (-30) + + + +// Codes to indicate CTA_Datatypes +/*! COSTA CTA_Datatypes: arbitrary void */ +#define CTA_VOID (-100) +/*! CTA_Datatypes constant: integer (scalar) */ +#define CTA_INTEGER (-101) +/*! CTA_Datatypes constant: single precision real (scalar) */ +#define CTA_REAL (-102) +/*! CTA_Datatypes constant: double precision real (scalar) */ +#define CTA_DOUBLE (-103) +/*! CTA_Datatypes constant: FORTRAN-string */ +#define CTA_FSTRING (-104) +/*! CTA_Datatypes constant: C-string */ +#define CTA_CSTRING (-105) + + +/*! CTA_Datatypes constant: integer array */ +#define CTA_1DINTEGER (-201) +/*! CTA_Datatypes constant: single precision real array */ +#define CTA_1DREAL (-202) +/*! CTA_Datatypes constant: double precision real array */ +#define CTA_1DDOUBLE (-203) +/*! CTA_Datatypes constant: array of FORTRAN-strings */ +#define CTA_1DFSTRING (-204) +/*! CTA_Datatypes constant: array of C-strings */ +#define CTA_1DCSTRING (-205) + +// Common Constants + +/*! COSTA flag/constant: Default */ +#define CTA_DEFAULT (0) +/*! COSTA flag/constant: NULL-Handle */ +#define CTA_NULL (0) +/*! COSTA flag/constant: FALSE */ +#define CTA_FALSE (0) +/*! COSTA flag/constant: TRUE */ +#define CTA_TRUE (1) + +#ifndef BOOL +/*! Datatype for boolians */ +#define BOOL int +/*! Boolean value of TRUE */ +#ifndef TRUE +#define TRUE (1) +#endif +/*! Boolean value of FALSE */ +#ifndef FALSE +#define FALSE (0) +#endif +#endif +#endif + +#ifndef M_PI +/*! constant pi */ +#define M_PI (3.14159265358979) +#endif + +/*! The machine precision. More precisely, `eps' is the largest + relative spacing between any two adjacent numbers in the machine's + floating point system. This number is obviously system-dependent. + On machines that support 64 bit IEEE floating point arithmetic, + `eps' is approximately 2.2204e-16. + This variable is not set automatically (yet) +*/ +#ifndef M_EPS +#define M_EPS (2.22044604925031e-16) + + +#define CTA_STRLEN_TAG (80) +#define CTA_STRLEN_NAME (80) + + +//#define CTA_ASSIMOBS (1) +//#define CTA_VALIDATEOBS (2) +//#define CTA_ALLOBS (3) + +/*! flush stdout and stderr in a number of the COSTA methods. useful for debugging */ +#define CTA_FLUSH_ALWAYS ( 1) + +/*! FORTRAN file unit for standard output */ +#define CTA_F77_STDOUT ( 6) + +#ifdef __cplusplus +extern "C" { +#endif + + +/** \brief Get the result of the C-function sizeof for a COSTA datatype + * + * \param datatype I COSTA data type + * \param size O receives result sizeof-function + * \return CTA_OK if successful + */ +CTAEXPORT int CTA_SizeOf(CTA_Datatype datatype, int *size); + +#ifdef __cplusplus +} +#endif + + +#endif + diff --git a/costa/native/cta/include/cta_datetime.h b/costa/native/cta/include/cta_datetime.h new file mode 100644 index 000000000..63d419ad5 --- /dev/null +++ b/costa/native/cta/include/cta_datetime.h @@ -0,0 +1,122 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2007 Johan Ansink + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_datetime.h +\brief Interface description of the default COSTA datetime component. + +A datetime object describes a date and time object +*/ + +#ifndef CTA_DATETIME_H +#define CTA_DATETIME_H + +#include +#include "cta_handles.h" +#include "cta_datatypes.h" +#include "cta_system.h" + +#define MJDREF 2400000.5 + +#ifdef __cplusplus +extern "C" { +#endif + +/** \brief Julian day number from Gregorian date. + * + * \param year I Year + * \param month I Month + * \param day I Day + * \param hour I Hour + * \param minute I Minute + * \param second I Second + * \param jd O Julian day number + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_DateTime_GregorianToJulian(int year, int month, int day, int hour, int minute, int second, double *jd); + +/** \brief Modified Julian day number from Gregorian date. + * + * \param year I Year + * \param month I Month + * \param day I Day + * \param hour I Hour + * \param minute I Minute + * \param second I Second + * \param mjd O Modified Julian day number + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_DateTime_GregorianToModifiedJulian(int year, int month, int day, int hour, int minute, int second, double *mjd); + +/** \brief Convert days into hours, minutes, and seconds. + * + * \param days I Year + * \param hour O Hour + * \param minute O Minute + * \param second O Second + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_DateTime_DaysToHMS (double days, int *hour, int *minute, int *second); + +/** \brief Julian day number from Modified Julian day number + * + * \param mjd I Modified Julian day number + * \param jd O Julian day number + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_DateTime_ModifiedJulianToJulian(double mjd, double *jd); + +/** \brief Gregorian calendar date from Julian day number + * + * \param jd I Julian day number + * \param year O Year + * \param month O Month + * \param day O Day + * \param hour O Hour + * \param minute O Minute + * \param second O Second + * \param jd O Julian day number + * + * \return error status: CTA_OK if successful +*/ +CTAEXPORT int CTA_DateTime_JulianToGregorian(double jd, int *year, int *month, int *day, int *hour, int *minute, int *second); + +/** \brief Gregorian calendar date from Modified Julian day number + * + * \param mjd I Modified Julian day number + * \param year O Year + * \param month O Month + * \param day O Day + * \param hour O Hour + * \param minute O Minute + * \param second O Second + * + * \return error status: CTA_OK if successful +*/ +CTAEXPORT int CTA_DateTime_ModifiedJulianToGregorian(double mjd, int *year, int *month, int *day, int *hour, int *minute, int *second); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/costa/native/cta/include/cta_defaults.h b/costa/native/cta/include/cta_defaults.h new file mode 100644 index 000000000..ed4cce296 --- /dev/null +++ b/costa/native/cta/include/cta_defaults.h @@ -0,0 +1,111 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_defaults.h +\brief A list of all default class implementations. +*/ + +#ifndef CTA_DEFAULTS_H +#define CTA_DEFAULTS_H +#include "cta_system.h" +#include "cta_vector.h" +#include "cta_matrix.h" +#include "cta_sobs.h" +#include "cta_model.h" +#include "cta_file.h" + +#ifdef __cplusplus +extern "C" { +#endif +/** default dense vector class implementation */ +CTAEXPORT extern CTA_VecClass CTA_DEFAULT_VECTOR; + +/** default stochastic observer class implementation + * based on an sqlite3 database */ +CTAEXPORT extern CTA_SObsClass CTA_DEFAULT_SOBS; + +/** stochastic observer class implementation + * that concatenates stochastic observers of arbitrary kind */ +CTAEXPORT extern CTA_SObsClass CTA_COMBINE_SOBS; + +/** stochastic observer class implementation + * based on NetCDF input files */ +CTAEXPORT extern CTA_SObsClass CTA_NETCDF_SOBS; + +/** stochastic observer class implementation + * based on NetCDF input files */ +CTAEXPORT extern CTA_SObsClass CTA_MAORI_SOBS; + +/** stochastic observer class implementation + * based on user provided library */ +CTAEXPORT extern CTA_SObsClass CTA_USER_SOBS; + +/** observer description class implementation based on a simple table */ +CTAEXPORT extern CTA_ObsDescrClass CTA_OBSDESCR_TABLE; + +/** default dense matrix class implementation */ +CTAEXPORT extern CTA_MatClass CTA_DEFAULT_MATRIX; + +/** The SP (Single processor) model builder */ +CTAEXPORT extern CTA_ModelClass CTA_MODBUILD_SP; + +/** The PAR (multiple processor parallel) model builder */ +CTAEXPORT extern CTA_ModelClass CTA_MODBUILD_PAR; + + +/** File handle that actually prints to standard out */ +CTAEXPORT extern CTA_File CTA_FILE_STDOUT; + +/** The modelcombiner */ +CTAEXPORT extern CTA_ModelClass CTA_MODELCOMBINER; + +/** The B3B (Black box) model builder */ +CTAEXPORT extern CTA_ModelClass CTA_MODBUILD_B3B; + +/** The 'New' (Black box) model builder */ +CTAEXPORT extern CTA_ModelClass CTA_MODBUILD_BB; + +/** Operator for dispaying the scaled RMS of the roots of a treevector to use with method + * CTA_TreeVector_OpOnLeafs */ +CTAEXPORT CTA_Func extern CTA_OP_ROOT_RMS; + +/** Operator for finding the index of the maxabs of the roots of a + * treevector to use with method CTA_TreeVector_OpOnLeafs */ +CTAEXPORT CTA_Func extern CTA_OP_ROOT_AMAX; + +/** Operator for finding displaying the values at given indices of the + * roots of a treevector to use with method CTA_TreeVector_OpOnLeafs */ +CTAEXPORT CTA_Func extern CTA_OP_ROOT_PRINTI; + +/** Operator for dispaying the scaled Sum-of-Squares of the roots of a + treevector to use with method CTA_TreeVector_OpOnLeafs */ +CTAEXPORT CTA_Func extern CTA_OP_ROOT_SSQ; + +/** Initial random seed */ +CTAEXPORT extern long int CTA_INITIAL_RANDOM_SEED; + +/** Name of the user provided dynamic library with user functions */ +CTAEXPORT extern char userDefaultDynamicLibrary[256]; + + +#ifdef __cplusplus +} +#endif +#endif diff --git a/costa/native/cta/include/cta_errors.h b/costa/native/cta/include/cta_errors.h new file mode 100644 index 000000000..5b0bbba86 --- /dev/null +++ b/costa/native/cta/include/cta_errors.h @@ -0,0 +1,108 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_errors.h +\brief Definitions of error return values used by COSTA. +*/ + + +#ifndef CTA_ERRORS_H +#define CTA_ERRORS_H + +// Common errors +/** No errors occured */ +#define CTA_OK ( 0) +/** The handle of a COSTA component is not associated to any + COSTA component nor is it the CTA_NULL handle. */ +#define CTA_ILLEGAL_HANDLE (-2) +/** A handle of a COSTA component instance corresponds to a + different component than expected */ +#define CTA_INCOMPATIBLE_HANDLE (-3) +/** A handle of a COSTA component cannot be found in administration */ +#define CTA_HANDLE_NOT_FOUND (-4) +/** Output array is too short to contain result */ +#define CTA_ARRAY_TOO_SHORT (10) +/** The datatype does not exist or is not supported */ +#define CTA_ILLEGAL_DATATYPE (11) +/** Dimensions of the component are different from input */ +#define CTA_DIMENSION_ERROR (12) +/** The operation is not (yet) supported for given combination of vectors-classes */ +#define CTA_INCOMPATIBLE_VECTORS (13) +/** The concatenation operation is not possible */ +#define CTA_CONCAT_NOT_POSSIBLE (14) +/** The setval operation is not possible */ +#define CTA_SETVAL_NOT_POSSIBLE (15) +/** Requested item not found */ +#define CTA_ITEM_NOT_FOUND (16) +/** Trying to access an non-initialised subtreevector */ +#define CTA_UNINITIALISED_SUBTREEVECTOR (17) +/** Operation between two different treevectors is not possible because build-up/dimension is different */ +#define CTA_TREEVECTORS_NOT_COMPATIBLE (18) +/** The operation is not (yet) supported for given combination of matrix-classes */ +#define CTA_INCOMPATIBLE_MATRICES (19) +/** There is no implementation available for this method */ +#define CTA_NOT_IMPLEMENTED (20) +/** Illegal specification of userdata argument */ +#define CTA_WRONG_USERDATA (21) +/** Opening of file failed */ +#define CTA_CANNOT_OPEN_FILE (22) +/** Command is not valid */ +#define CTA_INVALID_COMMAND (23) +/** Error in external library */ +#define CTA_EXTERNAL_ERROR (24) +/** The matrix is singular */ +#define CTA_SINGULAR_MATRIX (25) +/** The buffer length is too small */ +#define CTA_BUFFER_TOO_SMALL (26) +/** The import/export format is not supported */ +#define CTA_FORMAT_NOT_SUPPORTED (27) +/** The metainfo of two treevectors is incompatible */ +#define CTA_INCOMPATIBLE_METAINFO (28) +/** The combined model is invalid */ +#define CTA_COMBINED_MODEL_ERROR (29) +/** The announced observations do not fit into timespan of compute */ +#define CTA_ANNOUNCED_OBS_INTERVAL_ERROR (30) + +/** Operation is not (yet) implemented for given object */ +#define CTA_INPUT_OBJECT_NOT_SUPPORTED (31) +/** Input arguments are of incompatible data type */ +#define CTA_INPUT_OBJECTS_ARE_INCOMPATIBLE (32) +/** The relation tables cannot be combined */ +#define CTA_RELTABLES_CANNOT_BE_COMBINED (33) +/** The matrix is not square */ +#define CTA_MATRIX_IS_NOT_SQUARE (34) + +/** The input value is not correct */ +#define CTA_ILLEGAL_INPUT_ARGUMENT (34) + +/** Cannot find the process group */ +#define CTA_CANNOT_FIND_PROCESS_GROUP (35) + +/** Some JNI-interface error */ +#define CTA_JNI_INTERFACING_ERROR (36) +#define CTA_RESULTWRITER_ERROR (37) + +/** Hmm. you have found an internal error in the COSTA implementation */ +#define CTA_INTERNAL_ERROR (911) +/** You tried to do something that should work but has not been implemented yet in COSTA. + Please implement and help improving COSTA*/ +#define CTA_NOT_YET_SUPPORTED (999) + +#endif diff --git a/costa/native/cta/include/cta_f77blas.h b/costa/native/cta/include/cta_f77blas.h new file mode 100644 index 000000000..114a47b3e --- /dev/null +++ b/costa/native/cta/include/cta_f77blas.h @@ -0,0 +1,167 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_f77blas.h +\brief Package of internally used functions for C/FORTRAN compatibility. +*/ + +#ifndef CTA_F77BLAS_H +#define CTA_F77BLAS_H +#include "cta_system.h" + +/*------------------*/ +/* Single precision */ +/*------------------*/ + +#define SSCAL_F77 F77_FUNC(sscal,SSCAL) +#ifdef __cplusplus +extern "C" /* prevent C++ name mangling */ +#endif +CTAEXPORT void SSCAL_F77(int *n, float *alpha, float *x, int *incx); + +#define SCOPY_F77 F77_FUNC(scopy,SCOPY) +#ifdef __cplusplus +extern "C" /*bla */ +#endif +CTAEXPORT void SCOPY_F77(int *n, float *x, int *incx, float *y, int *incy); + +#define SAXPY_F77 F77_FUNC(saxpy,SAXPY) +#ifdef __cplusplus +extern "C" /* prevent C++ name mangling */ +#endif +CTAEXPORT void SAXPY_F77(int *n, float *alpha, float *x, int *incx, float *y, int *incy); + +#define SDOT_F77 F77_FUNC(sdot,SDOT) +#ifdef __cplusplus +extern "C" /* prevent C++ name mangling */ +#endif +CTAEXPORT float SDOT_F77(int *n, float *x, int *incx, float *y, int *incy); + +#define SNRM2_F77 F77_FUNC(snrm2,SNRM2) +#ifdef __cplusplus +extern "C" /* prevent C++ name mangling */ +#endif +CTAEXPORT float SNRM2_F77(int *n, float *x, int *incx); + +#define ISAMAX_F77 F77_FUNC(isamax,ISAMAX) +#ifdef __cplusplus +extern "C" /* prevent C++ name mangling */ +#endif +CTAEXPORT int ISAMAX_F77(int *n, float *x, int *incx); + +#define SGER_F77 F77_FUNC(sger,sger) +#ifdef __cplusplus +extern "C" /* prevent C++ name mangling */ +#endif +CTAEXPORT int SGER_F77(int *m, int *n, float *alpha, float *x, int *incx, + float *y, int *incy, float *a, int *lda); + +#define SGEMV_F77 F77_FUNC(sgemv,SGEMV) +#ifdef __cplusplus +extern "C" /* prevent C++ name mangling */ +#endif +CTAEXPORT void SGEMV_F77(char *trans, int *m, int *n, float* alpha, float *A, int *lda, + float *x, int *incx, float *beta, float *y, int *incy, + int lentrans); + +#define SGEMM_F77 F77_FUNC(sgemm,SGEMM) +#ifdef __cplusplus +extern "C" /* prevent C++ name mangling */ +#endif +CTAEXPORT void SGEMM_F77(char *transa, char *transb, int *m, int *n, int *k, + float* alpha, float *A, int *lda, + float *B, int *ldb, float *beta, float *C, int *ldc, + int lentrans1, int lenstran2); + +/*------------------*/ +/* Double precision */ +/*------------------*/ +#define DSCAL_F77 F77_FUNC(dscal,DSCAL) +#ifdef __cplusplus +extern "C" /* prevent C++ name mangling */ +#endif +CTAEXPORT void DSCAL_F77(int *n, double *alpha, double *x, int *incx); + +#define DCOPY_F77 F77_FUNC(dcopy,DCOPY) +#ifdef __cplusplus +extern "C" /*bla */ +#endif +CTAEXPORT void DCOPY_F77(int *n, double *x, int *incx, double *y, int *incy); + +#define DAXPY_F77 F77_FUNC(daxpy,DAXPY) +#ifdef __cplusplus +extern "C" /* prevent C++ name mangling */ +#endif +CTAEXPORT void DAXPY_F77(int *n, double *alpha, double *x, int *incx, float *y, int *incy); + +#define DDOT_F77 F77_FUNC(ddot,DDOT) +#ifdef __cplusplus +extern "C" /* prevent C++ name mangling */ +#endif +CTAEXPORT double DDOT_F77(int *n, double *x, int *incx, double *y, int *incy); + +#define DNRM2_F77 F77_FUNC(dnrm2,DNRM2) +#ifdef __cplusplus +extern "C" /* prevent C++ name mangling */ +#endif +CTAEXPORT double DNRM2_F77(int *n, double *x, int *incx); + +#define IDAMAX_F77 F77_FUNC(idamax,IDAMAX) +#ifdef __cplusplus +extern "C" /* prevent C++ name mangling */ +#endif +CTAEXPORT int IDAMAX_F77(int *n, double *x, int *incx); + +#define DGER_F77 F77_FUNC(dger,dger) +#ifdef __cplusplus +extern "C" /* prevent C++ name mangling */ +#endif +CTAEXPORT int DGER_F77(int *m, int *n, double *alpha, double *x, int *incx, + double *y, int *incy, double *a, int *lda); + +#define DGEMV_F77 F77_FUNC(dgemv,DGEMV) +#ifdef __cplusplus +extern "C" /* prevent C++ name mangling */ +#endif +CTAEXPORT void DGEMV_F77(char *trans, int *m, int *n, double* alpha, double *A, int *lda, + double *x, int *incx, double *beta, double *y, int *incy, + int lentrans); + +#define DGEMM_F77 F77_FUNC(dgemm,DGEMM) +#ifdef __cplusplus +extern "C" /* prevent C++ name mangling */ +#endif +CTAEXPORT void DGEMM_F77(char *transa, char *transb, int *m, int *n, int *k, + double* alpha, double *A, int *lda, + double *B, int *ldb, double *beta, double *C, int *ldc, + int lentrans1, int lenstran2); + + + + + +#endif + + + + + + + diff --git a/costa/native/cta/include/cta_f77lapack.h b/costa/native/cta/include/cta_f77lapack.h new file mode 100644 index 000000000..dd71f2990 --- /dev/null +++ b/costa/native/cta/include/cta_f77lapack.h @@ -0,0 +1,101 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_f77lapack.h +\brief Package of internally used functions for C/FORTRAN compatibility. +*/ + +#ifndef CTA_F77LAPACK_H +#define CTA_F77LAPACK_H +#include "cta_system.h" + +/*------------------*/ +/* Single precision */ +/*------------------*/ + +#define SGESV_F77 F77_FUNC(sgesv,SGESV) +#ifdef __cplusplus +extern "C" /* prevent C++ name mangling */ +#endif +CTAEXPORT void SGESV_F77(int *n, int *nrhs, float *A, int *lda, int *ipiv, + float *B, int *ldb, int *info); + +#define SGETRS_F77 F77_FUNC(sgetrs,SGETRS) +#ifdef __cplusplus +extern "C" /* prevent C++ name mangling */ +#endif +CTAEXPORT void SGETRS_F77(char *trans, int *n, int *nrhs, float *A, int *lda, int *ipiv, + float *B, int *ldb, int *info, int lentrans); + +#define SGEEV_F77 F77_FUNC(sgeev,SGEEV) +#ifdef __cplusplus +extern "C" /* prevent C++ name mangling */ +#endif +CTAEXPORT void SGEEV_F77( char *JOBVL, char *JOBVR, int *N, float *A, + int *LDA, float *WR, float *WI, float *VL, + int *LDVL, float *VR, int *LDVR, + float *WORK, int *LWORK, int *INFO); + +/*------------------*/ +/* Double precision */ +/*------------------*/ +#define DGESV_F77 F77_FUNC(dgesv,DGESV) +#ifdef __cplusplus +extern "C" /* prevent C++ name mangling */ +#endif +CTAEXPORT void DGESV_F77(int *n, int *nrhs, double *A, int *lda, int *ipiv, + double *B, int *ldb, int *info); + +#define DGETRS_F77 F77_FUNC(dgetrs,DGETRS) +#ifdef __cplusplus +extern "C" /* prevent C++ name mangling */ +#endif +CTAEXPORT void DGETRS_F77(char *trans, int *n, int *nrhs, double *A, int *lda, int *ipiv, + double *B, int *ldb, int *info, int lentrans); + +#define DPOSV_F77 F77_FUNC(dposv,DPOSV) +#ifdef __cplusplus +extern "C" /* prevent C++ name mangling */ +#endif +CTAEXPORT void DPOSV_F77(char *uplo, int *n, int *nrhs, double *A, int *lda, double *B, + int *ldb, int *info); + +#define DLASRT_F77 F77_FUNC(dlasrt,DLASRT) +#ifdef __cplusplus +extern "C" /* prevent C++ name mangling */ +#endif +CTAEXPORT void DLASRT_F77(char *ID, int *n, double *D, int *info, int lenid); + +#define DGEEV_F77 F77_FUNC(dgeev,DGEEV) +#ifdef __cplusplus +extern "C" /* prevent C++ name mangling */ +#endif +CTAEXPORT void DGEEV_F77( char *JOBVL, char *JOBVR, int *N, double *A, + int *LDA, double *WR, double *WI, double *VL, + int *LDVL, double *VR, int *LDVR, + double *WORK, int *LWORK, int *INFO); + +#endif + + + + + + diff --git a/costa/native/cta/include/cta_file.h b/costa/native/cta/include/cta_file.h new file mode 100644 index 000000000..db0de178b --- /dev/null +++ b/costa/native/cta/include/cta_file.h @@ -0,0 +1,133 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_file.h +\brief Functions for the implementation of CTA_FILE objects. +*/ + +#ifndef CTA_FILE_H +#define CTA_FILE_H + +#include +#include "cta_system.h" +#include "cta_string.h" +#include "cta_handles.h" +#include "cta_datatypes.h" + +#ifdef __cplusplus +extern "C" { +#endif +/* Function Handle */ +typedef CTA_Handle CTA_File; + +/* functions */ + +/** \brief Create a new COSTA file + * for holding a C file descriptor of a FORTRAN file LUN + * + * \note This call does not open a file. + * No FORTRAN support in this version + * + * \param hfile O receives handle of created file + * \return CTA_OK if successful + */ +CTAEXPORT int CTA_File_Create(CTA_File *hfile); + +/** \brief Free a new COSTA file-handle + * + * \note The File is not closed (in this version) + * + * \param hfile IO handle of COSTA file CTA_NULL on return + * \return CTA_OK if successful + */ +CTAEXPORT int CTA_File_Free (CTA_File *hfile); + +/** \brief Get the C-file descriptor of the COSTA file + * + * \note + * + * \param hfile I handle of COSTA file-handle + * \param file O receives file descriptor + * \return CTA_OK if successful + */ +CTAEXPORT int CTA_File_Get (CTA_File hfile, FILE **file); + +/** \brief Get the NETCDF file id of the COSTA file + * + * \note + * + * \param hfile I handle of COSTA file-handle + * \param ncid O receives NETCDF file id + * \return CTA_OK if successful + */ +CTAEXPORT int CTA_File_GetNetcdf(CTA_File hfile,int *ncid); + + +/** \brief Set the C-file descriptor of the COSTA file + * + * \note + * + * \param hfile IO handle of COSTA file-handle + * \param file I file descriptor + * \return CTA_OK if successful + */ +CTAEXPORT int CTA_File_Set (CTA_File hfile, FILE *file); + +/** \brief Open a C-file and set descriptor + * + * \note + * + * \param hfile I provide a valid handle of COSTA file + * \param sname I file path + * \param smode I open-mode (see C fopen documentation) + * if CTA_NULL is provided, the file will be + * opened with read/write access (file pointer at begin + * of file) + * \return CTA_OK if successful + */ +CTAEXPORT int CTA_File_Open (CTA_File hfile, CTA_String sname, CTA_String smode); + +/** \brief check whether file is a NETCDF file + * + * \param hfile I provide a valid handle of COSTA file + * \param isnetcdf O CTA_TRUE if file is NETCDF file CTA_FALSE otherwise + * + * \return CTA_OK if successful + */ +CTAEXPORT int CTA_File_IsNetcdf(CTA_File hfile, int *isnetcdf); + + +/** \brief Write a string to file + * + * \note + * + * \param hfile I handle of COSTA file + * \param str I string that must be written to file + * \param eol I Add end of line, CTA_TRUE for adding end of line or CTA_FALSE otherwise + * \return CTA_OK if successful + */ +CTAEXPORT int CTA_File_WriteStr(CTA_File hfile, char *str, int eol); + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/costa/native/cta/include/cta_flush.h b/costa/native/cta/include/cta_flush.h new file mode 100644 index 000000000..97a4b8fee --- /dev/null +++ b/costa/native/cta/include/cta_flush.h @@ -0,0 +1,49 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_flush.h +\brief Flush function for compatibility reasons with FORTRAN. +*/ + +#ifndef CTA_FLUSH_H +#define CTA_FLUSH_H + +#include "cta_system.h" + +/* Function Handle */ +#ifdef __cplusplus +extern "C" { +#endif + +/* functions */ +/** \brief Flushes STDOUT and STDERR both for C as FORTRAN + * + * \note Function is introduced because some FORTRAN compilers use + * different flushing than the C compiler. As a result the order of output + * generated by both languages is "random". This routine makes sure that + * all buffers have been flushed. + * + */ +CTAEXPORT void CTA_Flush(); +#ifdef __cplusplus +} +#endif +#endif + diff --git a/costa/native/cta/include/cta_functions.h b/costa/native/cta/include/cta_functions.h new file mode 100644 index 000000000..ba3fca5cc --- /dev/null +++ b/costa/native/cta/include/cta_functions.h @@ -0,0 +1,152 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_functions.h +\brief Functions for working with CTA_Function objects. CTA_Function objects can be used to address functions. +*/ + +#ifndef CTA_FUNCTIONS_H +#define CTA_FUNCTIONS_H +#include "cta_system.h" +#include "cta_datatypes.h" +#include "cta_handles.h" +#include "cta_interface.h" +#include "ctai_xml.h" + +/* Function Handle */ +typedef CTA_Handle CTA_Func; + +#ifdef __cplusplus +extern "C" { +#endif + + +/** \brief Create a new COSTA function. + * + * \note Argument name is only used for debugging and output. + * + * \param name I name of the new function for debugging purposes + * \param function I pointer to function that has to be associated + * with new COSTA function + * \param hintf I handle of associated interface + * \param hfunc O receives handle of created COSTA function + * \return CTA_OK if successful + */ +CTAEXPORT int CTA_Func_Create(const char *name, CTA_Function *function , + const CTA_Intf hintf, CTA_Func *hfunc); + +/** \brief Create a COSTA function loaded from dynamic library +* +* + +* \param libraryName I Name of the dynamic library excluding the extention (.dll, .so, .dylib) eg "libmyroutines" +* \param functionName I Name of the function to load. +* \param name I Name of the function used inside the OpenDA administration +* \param id I Name (id) of the handle that is created (needed when stored as part of a tree +* \return Handle to function or CTA_NULL in case of an error. +* +* \note When the load of the function fails, some decorated, typical Fortran variations are tried as well +* +*/ +CTANOF90 CTAEXPORT CTA_Func CTA_CreateFuncDynamicLib(char *libraryName, char *functionName, char *name, char *id); + +/** \brief Duplicates a user defined function + * + * \param hfunc I COSTA user function handle + * \param hdupl I duplication of hfunc + * \return error status: CTA_OK + */ +CTAEXPORT int CTA_Func_Duplicate(CTA_Func hfunc, CTA_Func *hdupl); + + + +/** \brief Free a new COSTA function. + * + * \note + * + * \param hfunc IO handle of COSTA function, replaced by CTA_NULL on return + * \return CTA_OK if sucessful + */ +CTAEXPORT int CTA_Func_Free(CTA_Func *hfunc); + +/** \brief Get interface of COSTA function. + * + * \note For performance reasons, the interface is not a copy but a handle + * to the actual interface, it should NOT be freed by the calling routine! + * + * \param hfunc I handle of COSTA function + * \param hintf O receives handle of interface of function + * \return CTA_OK if successful + */ +CTAEXPORT int CTA_Func_GetIntf(const CTA_Func hfunc, CTA_Intf *hintf); + +/** \brief Get function pointer of function + * + * \note There is no FORTRAN verion of this function available + * + * \param hfunc I handle of COSTA function. + * \param function O receives pointer to function + * \return CTA_OK if successful + */ +CTAEXPORT int CTA_Func_GetFunc(const CTA_Func hfunc, CTA_Function **function); + +/** \brief Get name of function + * + * \note Future versions will return a COSTA string handle. + * + * \param hfunc I handle of COSTA function. + * \param name O handle of string object that is to receive function name, must exist before calling + * \return CTA_OK if successful + */ +CTAEXPORT int CTA_Func_GetName(const CTA_Func hfunc, CTA_String name); + +/** \brief Set userdata of function + * + * \note Frees existing user data and replaces it with userdata + * + * \param hfunc IO handle of COSTA function. + * \param userdata I new userdata handles + * \return CTA_OK if successful + */ +CTAEXPORT int CTA_Func_SetUserdata(const CTA_Func hfunc, const CTA_Handle userdata ); + +/** \brief Get userdata of function + * + * \param hfunc I handle of COSTA function. + * \param userdata O userdata handle + * \return CTA_OK if successful + */ +CTAEXPORT int CTA_Func_GetUserdata(const CTA_Func hfunc, CTA_Handle userdata ); + +/** \brief Create a COSTA function from XML +* (load from dynamic load library). +* +* \param cur_node I Current XML node +* \return Handle to create or CTA_NULL in case of an error. +*/ +CTANOF90 CTAEXPORT CTA_Func CTAI_XML_CreateFunc(xmlNode *cur_node); + + + +#ifdef __cplusplus +} +#endif +#endif + diff --git a/costa/native/cta/include/cta_handles.h b/costa/native/cta/include/cta_handles.h new file mode 100644 index 000000000..794cbf29a --- /dev/null +++ b/costa/native/cta/include/cta_handles.h @@ -0,0 +1,178 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_handles.h +\brief Description of functions for working with CTA_Handle objects that are being used to access COSTA objects. +*/ + +#ifndef CTA_HANDLES_H +#define CTA_HANDLES_H + +#include "cta_datatypes.h" + +// Constants + +// Derived types +CTAEXPORT typedef int CTA_Handle; + +/* String instance handle - needed for GetName() */ +CTAEXPORT typedef CTA_Handle CTA_String; + +#ifdef __cplusplus +extern "C" { +#endif + +// Interfaces +/** \brief Create a new COSTA handle + * + * \param name I name associated with handle + * \param datatype I data type of handle + * \param data I block of data associated to handle + * \param handle O receives COSTA handle + * \return error status: CTA_OK + */ +CTAEXPORT int CTA_Handle_Create(const char *name, const CTA_Datatype datatype, + void *data, CTA_Handle *handle); + +/** \brief Free a COSTA handle + * + * \note The data part of the handle is NOT freed. + * + * \param handle IO handle that is to be freed, replaced by CTA_NULL on return. + * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE + */ +CTAEXPORT int CTA_Handle_Free(CTA_Handle *handle); + +/** \brief Free a COSTA handle + * + * \note Calls datatype-specific free methods, if available. + * + * \param handle IO handle that must be freed, replaced by CTA_NULL on return. + * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE + */ +CTAEXPORT int CTA_Handle_Free_All(CTA_Handle *handle); + +/** \brief Check whether a handle is valid and checks type + * + * \note The handle CTA_NULL is not valid. + * + * \param handle I COSTA handle + * \param datatype I data type to compare handle with + * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE + * + */ +CTAEXPORT int CTA_Handle_Check(const CTA_Handle handle,const CTA_Datatype datatype); + +/** \brief Get pointer to data element of handle + * + * \param handle I COSTA handle + * \param data O receives pointer to data element + * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE + */ +CTAEXPORT int CTA_Handle_GetData(const CTA_Handle handle, void **data); + +/** \brief Get the value the handle points to + * + * \param handle I COSTA handle + * \param value O receives pointer to data element + * \param datatype I specify the data type of *value, must be the same as data type of handle + * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE + */ +CTAEXPORT int CTA_Handle_GetValue(const CTA_Handle handle, void *value, CTA_Datatype datatype); + +/** \brief Get name associated with handle + * + * \param handle I COSTA handle + * \param hname O receives name of data type + * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE + */ +CTAEXPORT int CTA_Handle_GetName(const CTA_Handle handle, CTA_String hname); + +/** \brief Get data type associated with handle + * + * \param handle I COSTA handle + * \param datatype O receives data type of handle + * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE + */ +CTAEXPORT int CTA_Handle_GetDatatype(const CTA_Handle handle, CTA_Datatype *datatype); + +/** \brief Set name associated with handle (internal use only!) + * + * \param handle IO COSTA handle + * \param name I COSTA string + * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE + */ +CTAEXPORT int CTAI_Handle_SetName(const CTA_Handle handle, const char *name); + +/** \brief Find a handle by name and data type in the COSTA handle administration + * + * \param sname I name of handle + * \param datatype I data type of handle + * \param handlenr O receives the handle (only if return value is CTA_OK) + * \return error status: CTA_OK, CTA_HANDLE_NOT_FOUND + */ +CTAEXPORT int CTA_Handle_Find(CTA_String sname, CTA_Datatype datatype, int *handlenr); + +/** \brief Print overview of all COSTA handles + * + * \return error status: CTA_OK + */ +CTAEXPORT int CTA_Handle_Printall(); + +/** \brief Counts all handles sorts them by type and prints overview. + * This function can be usefull for detecting memory leaks that are + * the result of not freeing instances of COSTA objects costa objects. + * + * \param location I String to indicate location of call + * \return error status: CTA_OK + */ +CTAEXPORT int CTA_Handle_PrintInfo(const char *location); + +/** \brief Get the reference count of the handle + * + * \param handle I COSTA handle + * \param refCount O reference count of handle + * \return error status: CTA_OK + * + */ +CTAEXPORT int CTA_Handle_GetRefCount(const CTA_Handle handle, int *refCount); + +/** \brief Increase the reference count of the handle + * + * \param handle I COSTA handle + * \return error status: CTA_OK + * + */ +CTAEXPORT int CTA_Handle_IncRefCount(const CTA_Handle handle); + +/** \brief Decrease the reference count of the handle + * + * \param handle I COSTA handle + * \return error status: CTA_OK + * + */ +CTAEXPORT int CTA_Handle_DecrRefCount(const CTA_Handle handle); + + +#ifdef __cplusplus +} +#endif +#endif + diff --git a/costa/native/cta/include/cta_initialise.h b/costa/native/cta/include/cta_initialise.h new file mode 100644 index 000000000..e85ff4408 --- /dev/null +++ b/costa/native/cta/include/cta_initialise.h @@ -0,0 +1,46 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_initialise.h +\brief CTA_Initialise for COSTA initialisation. +*/ + +#ifndef CTA_INITIALISE_H +#define CTA_INITIALISE_H +#include "cta_system.h" +#include "cta_vector.h" + +#ifdef __cplusplus +extern "C" { +#endif +/** \brief Initialize the COSTA environment + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Core_Initialise(); + +/** \brief Finalise the COSTA environment + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Core_Finalise(); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/costa/native/cta/include/cta_interface.h b/costa/native/cta/include/cta_interface.h new file mode 100644 index 000000000..ffbee4fff --- /dev/null +++ b/costa/native/cta/include/cta_interface.h @@ -0,0 +1,143 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_interface.h +\brief Description of the COSTA interface componennt used for +communication with user implementation classes. + +To create an object of your user implementation go through the following steps: + +1: Create a new class (=implementation) of a COSTA component by calling the +appropriate DefineClass() function providing the name of the class, a matrix with +CTA_Function objects describing the user functions. + +example: + +void Usr_Object_initialise(CTA_MatClass *hmatcl){ + CTA_Intf hintf=0;\n + CTA_Func h_func[CTA_MATRIX_NUMFUNC];\n +// CTA_Func h_create_size;\n +// CTA_Func h_Create_Init;\n + int ierr;\n + + ierr=CTA_Func_Create(" ",&CTAI_Matrix_Create_Size, hintf,&h_func[CTA_MATRIX_CREATE_SIZE]);\n + ierr=CTA_Func_Create(" ",&CTAI_Matrix_Create_Init, hintf,&h_func[CTA_MATRIX_CREATE_INIT]);\n + ierr=CTA_Func_Create(" ",&CTAI_Matrix_getvals, hintf,&h_func[CTA_MATRIX_GETVALS] );\n + ierr=CTA_Func_Create(" ",&CTAI_Matrix_getval, hintf,&h_func[CTA_MATRIX_GETVAL] );\n + ierr=CTA_Func_Create(" ",&CTAI_Matrix_setcol, hintf,&h_func[CTA_MATRIX_SETCOL] );\n + ierr=CTA_Func_Create(" ",&CTAI_Matrix_setvals, hintf,&h_func[CTA_MATRIX_SETVALS] );\n + ierr=CTA_Func_Create(" ",&CTAI_Matrix_setval, hintf,&h_func[CTA_MATRIX_SETVAL] );\n + ierr=CTA_Func_Create(" ",&CTAI_Matrix_setconst, hintf,&h_func[CTA_MATRIX_SETCONST] );\n + ierr=CTA_Func_Create(" ",&CTAI_Matrix_Export, hintf,&h_func[CTA_MATRIX_EXPORT] );\n + ierr=CTA_Func_Create(" ",&CTAI_Matrix_Ger, hintf,&h_func[CTA_MATRIX_GER] );\n + ierr=CTA_Func_Create(" ",&CTAI_Matrix_Inv, hintf,&h_func[CTA_MATRIX_INV] );\n + ierr=CTA_Func_Create(" ",&CTAI_Matrix_Gemv, hintf,&h_func[CTA_MATRIX_GEMV] );\n + ierr=CTA_Func_Create(" ",&CTAI_Matrix_Gemm, hintf,&h_func[CTA_MATRIX_GEMM] );\n + ierr=CTA_Func_Create(" ",&CTAI_Matrix_Axpy, hintf,&h_func[CTA_MATRIX_AXPY] );\n + ierr=CTA_Func_Create(" ",&CTAI_Matrix_free, hintf,&h_func[CTA_MATRIX_FREE] );\n + + ierr=CTA_Object_DefineClass("cta_matrix_blas",h_func,hmatcl);\n +} + +2: use the resulting class object for creating the user object in following way: + +int CTA_Matrix_Create(classobject, ..., *userdata, Usr_Object *usrObject); + +*/ + +#ifndef CTA_INTERFACE_H +#define CTA_INTERFACE_H +#include "cta_system.h" +#include "cta_datatypes.h" +#include "cta_handles.h" + +CTAEXPORT typedef int CTA_Intf; + +#ifdef __cplusplus +extern "C" { +#endif + +/** \brief Create a new interface + * + * \param name I name of the new interface + * \param argtyp I list with the data types of arguments + * \param narg I number of arguments of interface + * \param hintf O receives the new COSTA interface handle + * \return error status: CTA_OK + */ +CTAEXPORT int CTA_Intf_Create(const char *name, const CTA_Datatype *argtyp, + const int narg,CTA_Intf *hintf); + +/** \brief Free an interface + * + * \note Freeing CTA_NULL is allowed. + * + * \param hintf IO handle of interface, replaced by CTA_NULL on return + * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE + */ +CTAEXPORT int CTA_Intf_Free(CTA_Intf *hintf); + +/** \brief Match two interfaces for compatibility argumentlist-argumentlist + * + * \note Two interfaces are compatible if all arguments have the same datatype, + * CTA_VOID is compatible with all other arguments except for CTA_FSTRING + * + * \param argtyp1 I list with the data types of arguments of first interface + * \param narg1 I number of arguments in first interface + * \param argtyp2 I list with the data types of arguments of second interface + * \param narg2 I number of arguments in second interface + * \param flag O receives TRUE if interfaces are compatible FALSE ortherwise + * \return error status: CTA_OK + */ +CTAEXPORT int CTA_Intf_Match_aa(const CTA_Datatype *argtyp1, const int narg1, + const CTA_Datatype *argtyp2, const int narg2, + BOOL *flag); + +/** \brief Match two interfaces for compatibility handle-argumentlist + * + * \note Two interfaces are compatible if all arguments have the same datatype, + * CTA_VOID is compatible with all other arguments except for CTA_FSTRING + * + * \param hintf1 I handle of first interface + * \param argtyp2 I list with the data types of arguments of second interface + * \param narg2 I number of arguments in second interface + * \param flag O receives TRUE if interfaces are compatible FALSE ortherwise + * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE + */ +CTAEXPORT int CTA_Intf_Match_ha(const CTA_Intf hintf1, + const CTA_Datatype *argtyp2, const int narg2, BOOL *flag); + +/** \brief Match two interfaces for compatibility handle-handle + * + * \note two interfaces are compatible if all arguments have the same datatype, + * CTA_VOID is compatible with all other arguments except for CTA_FSTRING + * + * \param hintf1 I handle of first interface + * \param hintf2 I handle of second interface + * \param flag O receives TRUE if interfaces are compatible FALSE ortherwise + * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE + */ +CTAEXPORT int CTA_Intf_Match_hh(const CTA_Intf hintf1, const CTA_Intf hintf2, BOOL *flag); + +#ifdef __cplusplus +} +#endif +#endif + diff --git a/costa/native/cta/include/cta_matrix.h b/costa/native/cta/include/cta_matrix.h new file mode 100644 index 000000000..f08426460 --- /dev/null +++ b/costa/native/cta/include/cta_matrix.h @@ -0,0 +1,297 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_matrix.h +\brief The interface description of the COSTA matrix component. For user implementation see cta_usr_matrix.h. + +CTA_Matrix is the default class implementation for matrices. +*/ + +#ifndef CTA_MATRIX_H +#define CTA_MATRIX_H +#include "cta_system.h" +#include "cta_handles.h" +#include "cta_datatypes.h" +#include "cta_functions.h" +#include "cta_vector.h" + +/* Function Handle */ +typedef CTA_Handle CTA_Matrix; +typedef CTA_Handle CTA_MatClass; + +/* parameters for different functions */ +#define CTA_MATRIX_CREATE_SIZE ( 1) +#define CTA_MATRIX_CREATE_INIT ( 2) +#define I_CTA_MATRIX_GETVALS ( 4) +#define I_CTA_MATRIX_GETVAL ( 5) +#define I_CTA_MATRIX_SETCOL ( 6) +#define I_CTA_MATRIX_SETVALS ( 7) +#define I_CTA_MATRIX_SETVAL ( 8) +#define I_CTA_MATRIX_SETCONST ( 9) +#define I_CTA_MATRIX_FREE (10) +#define I_CTA_MATRIX_EXPORT (11) +#define I_CTA_MATRIX_GER (12) +#define I_CTA_MATRIX_INV (13) +#define I_CTA_MATRIX_GEMV (14) +#define I_CTA_MATRIX_GEMM (15) +#define I_CTA_MATRIX_AXPY (16) +#define CTA_MATRIX_NUMFUNC (17) + +#ifdef __cplusplus +extern "C" { +#endif + +/** \brief Create a new class (=implementation) of a COSTA matrix component. + * + * \param name I name of the new matrix class + * \param h_func I COSTA function handles for functions that implement class, + * missing functions must have value CTA_NULL + * \param hmatcl O receives handle of new matrix class + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Matrix_DefineClass(const char *name, + const CTA_Func h_func[CTA_MATRIX_NUMFUNC], + CTA_MatClass *hmatcl); + +/** \brief Duplicate a matrix instance. + * + * \note Only size, data type and class type are duplicated, the values are not + * copied. + * + * \param hmat1 I handle of matrix to be duplicated + * \param hmat2 O receives handle of duplicate matrix, empty handle before calling + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Matrix_Duplicate(CTA_Matrix hmat1, CTA_Matrix *hmat2); + +/** \brief Create a new matrix. + * + * \note + * + * \param hmatcl I matrix class of new matrix + * \param m I number of rows + * \param n I number of columns + * \param datatype I datatype of elements in matrix + * \param userdata IO userdata for creation (depends on class) + * \param hmat O receives handle of new matrix + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Matrix_Create(CTA_MatClass hmatcl, const int m, const int n, + CTA_Datatype datatype, CTA_Handle userdata, CTA_Matrix *hmat); + +/* *\brief Get size of matrix. + * + * \note + * + * \param hmat I handle of matrix + * \param m O receives number of rows + * \param n O receives number of columns + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Matrix_GetSize(CTA_Matrix hmat, int *m, int *n); + +/** \brief Get datatype of matrix + * + * \note + * + * \param hmat I handle of matrix + * \param datatype O receives data type of matrix + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Matrix_GetDatatype(CTA_Matrix hmat, CTA_Datatype *datatype); + +/** \brief Get copy of all values in matrix. + * + * \note The elements in the matrix are returned column-wise + * (FORTRAN matrix representation) + * + * \param hmat I handle of matrix + * \param vals O copy of values in matrix + * \param m I number of rows of vals (must be the same as for the matrix) + * \param n I number of columns of vals (must be the same as for the matrix) + * \param datatype I data type of *vals, must be the same as data type of matrix elements + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Matrix_GetVals(CTA_Matrix hmat, void *vals, int m, int n, + CTA_Datatype datatype); + +/** \brief Get copy of single value in the matrix. + * + * \note Counting of the indices starts from 1. + * \param hmat I handle of matrix + * \param val O receives copy of value in matrix + * \param m I row index of value to be copied + * \param n I column index of value to be copied + * \param datatype I data type of *val, must be the same as data type of matrix elements + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Matrix_GetVal (CTA_Matrix hmat, void *val , int m, int n, + CTA_Datatype datatype); + +/** \brief Set whole matrix to one single value. + * + * \note + * \param hmat IO handle of matrix + * \param val I value that must be set + * \param datatype I data type of *val, must be the same as data type of matrix elements + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Matrix_SetConstant(CTA_Matrix hmat, void *val, CTA_Datatype datatype); + +/** \brief Set values of a single column of the matrix. + * + * \note Counting of the indices starts from 1. + * \param hmat IO handle of matrix + * \param n I index of matrix column to set + * \param hvec I handle of COSTA vector the values have to be set to, length must be the same as number of rows of matrix + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Matrix_SetCol(CTA_Matrix hmat, int n, CTA_Vector hvec); + +/** \brief Set all values of the matrix. + * + * \note The elements in vals should be column-wise + * (FORTRAN matrix representation) + * \param hmat IO handle of matrix + * \param vals I copy of values in matrix + * \param m I number of rows of vals (must be the same as for the matrix) + * \param n I number of columns of vals (must be the same as for the matrix) + * \param datatype I data type of vals + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Matrix_SetVals(CTA_Matrix hmat, void *vals, int m, int n, + CTA_Datatype datatype); + +/** \brief Set a single value in the matrix. + * + * \param hmat IO handle of matrix + * \param val I value to be set at position (m,n) + * \param m I row index + * \param n I column index + * \param datatype I data type of *val, must be the same as data type of matrix elements + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Matrix_SetVal(CTA_Matrix hmat, void *val, int m, int n, + CTA_Datatype datatype); + +/** \brief Export a matrix. + * + * \note CTA_DEFAULT_MATRIX supports exporting to:\n + * file (usedoc is handle of COSTA file)\n + * + * \param hmat I handle of matrix + * \param usedoc I configuration of output + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Matrix_Export(CTA_Matrix hmat, CTA_Handle usedoc); + + +/** \brief Perform a rank 1 operation A:=alpha*x*y'+A + * + * \param hmat IO handle of matrix A + * \param alpha I scalar + * \param vx I vector x + * \param vy I vector y + * \return error status: CTA_OK if successful + * \note it is allowed that are the same object (*vx==*vy) + */ +CTAEXPORT int CTA_Matrix_Ger(CTA_Matrix hmat, double alpha, CTA_Vector vx, + CTA_Vector vy); + + +/** \brief Compute inverse of a square matrix A:=inv(A) + * + * \param hmat IO handle of matrix A + * \return error status: CTA_OK if successful + * + */ +CTAEXPORT int CTA_Matrix_Inv(CTA_Matrix hmat); + +/** \brief Perform the matrix multiplication y:=alpha*OP(A)*x+beta*y + * where op(X)=X, X^T + * + * \param hmat I handle of matrix (A from the equation above) + * \param trans I transpose flag CTA_TRUE/CTA_FALSE for matrix A + * \param alpha I scalar + * \param vx I vector x + * \param beta I scalar + * \param vy IO vector y + * \return error status: CTA_OK if successful + * \note it is allowed that vectors are the same object (*vx==*vy) + */ +CTAEXPORT int CTA_Matrix_Gemv(CTA_Matrix hmat, int trans, double alpha, + CTA_Vector vx, double beta, CTA_Vector vy); + +/** \brief Perform the matrix multication C:=alpha*op(A)*op(B)+beta*C + where op(X)=X, X^T + * + * \param mC IO handle of matrix C + * \param transa I transpose flag CTA_TRUE/CTA_FALSE for matrix A + * \param transb I transpose flag CTA_TRUE/CTA_FALSE for matrix A + * \param alpha I scalar + * \param mA I handle of matrix A + * \param mB I handle of matrix B + * \param beta I scalar + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Matrix_Gemm(CTA_Matrix mC, int transa, int transb, double alpha, + CTA_Matrix mA, CTA_Matrix mB, double beta); + +/** \brief Perform the matrix addition Y:=alpha*X+Y + * + * \param mY IO handle of matrix Y + * \param alpha I scalar + * \param mX I handle of matrix X + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Matrix_Axpy(CTA_Matrix mY, double alpha, CTA_Matrix mX); + + +/** \brief Computes the eigenvalues and optionally the eigenvectors + * of a general matrix A + * + * The computed eigenvectors are normalized to have Euclidean norm + * equal to 1 and largest component real. + * + * \param A I Matrix A + * \param eigvals O Vector with eigenvalues + * \param eigvecs O Matrix with eigenvalues. The eigenvectors are + * not computed when eigvecs in CTA_NULL on entry + * \return error status: CTA_OK if successful + * \note the eigenvalues can be complex. Since COSTA does not yet support + * complex vectors, only the real part of the eigenvalues is + * returned. + */ +CTAEXPORT int CTA_Matrix_EigVals(CTA_Matrix A, CTA_Vector eigvals, CTA_Matrix eigvecs); + + +/** \brief Free the matrix object + * + * \Note hmat=CTA_NULL is allowed + * + * \param hmat IO handle of matrix, replaced by CTA_NULL on return. + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Matrix_Free(CTA_Matrix *hmat); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/costa/native/cta/include/cta_matrix_blas.h b/costa/native/cta/include/cta_matrix_blas.h new file mode 100644 index 000000000..6acf74c84 --- /dev/null +++ b/costa/native/cta/include/cta_matrix_blas.h @@ -0,0 +1,294 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + + + +#ifndef CTA_MATRIX_BLAS_H +#define CTA_MATRIX_BLAS_H + +#include "cta_f77blas.h" +#include "cta_f77lapack.h" +#include "cta_datatypes.h" +#include "cta_handles.h" +#include "cta_vector.h" +#include "cta_matrix.h" +#include "cta_functions.h" + + +/** +\file cta_matrix_blas.h +\brief Interface description of the COSTA BLAS-matrix component (a standard implementation class of CTA_Matrix) that can be used for standard BLAS operations. + +BLAS matrix object data: + +\code +typedef struct { +CTA_Datatype datatype; +void *values; +int m; +int n; +int *perm; +BOOL inverse; +} CTAI_Matrix_blas; +\endcode +\n +*/ + + + + +typedef struct { +CTA_Datatype datatype; +void *values; +int m; +int n; +int *perm; +BOOL inverse; +} CTAI_Matrix_blas; + +#ifdef __cplusplus +extern "C" { +#endif + +/** \brief Define the BLAS matrix class. Use the resulting class object when calling CTA_Matrix_Create(). + * + * \param hmatcl IO receives class BLAS matrix class object, must be empty before calling + * \return no return value + */ +CTANOEXPORT void CTA_Matrix_blas_initialise(CTA_MatClass *hmatcl); + + +/** \brief Implementation that forms part of the create process. + * + * Gives the memory size of a new BLAS matrix object. + * + * \param m I dimension, m (rows) + * \param n I dimension, n (columns) + * \param datatype I data type of matrix elements + * \param userdata IO user data (not being used) + * \param retval O receives return value + * \param memsize O receives the number of bytes which are necessary to store one + BLAS matrix class, with a pointer to the contents (data), but without the + contents themselves + * \return no return value + */ +void CTAI_Matrix_Create_Size(int *m, int *n, CTA_Datatype *datatype, + CTA_Handle userdata, int *retval, int *memsize); + +/** \brief Implementation that forms part of the create process. + * + * Prepares a BLAS matrix object for use. + * + * \param x IO pointer to object data of BLAS matrix + * \param m I dimension, m (rows) + * \param n I dimension, n (columns) + * \param datatype I data type of the matrix elements + * \param userdata IO user data (not being used) + * \param retval O receives return value + * \return no return value + */ +void CTAI_Matrix_Create_Init( CTAI_Matrix_blas *x, int *m, int *n, + CTA_Datatype *datatype, CTA_Handle userdata, int *retval); + + +/** \brief Get all values from the BLAS matrix. + * + * \note At index CTA_MATRIX_GETVALS in the function list of the class descriptor. + * + * \param x I pointer to object data of BLAS matrix + * \param vals O receives values; must exist before calling + * \param m I number of rows of *vals, must be the same as number of rows in matrix + * \param n I number of columns of *vals, must be the same as number of columns in matrix + * \param datatype I data type of value; must be the same as data type of matrix elements + * \param userdata IO user data (not being used) + * \param retval O receives return value + * \return no return value + */ +void CTAI_Matrix_getvals( CTAI_Matrix_blas *x, void *vals, int *m, + int *n, CTA_Datatype *datatype, int *retval); + + +/** \brief Get a single value from the BLAS matrix. + * + * \param x I pointer to object data of BLAS matrix + * \param val O receives value; must exist before calling + * \param m I row index of matrix value to get + * \param n I column index of matrix value to get + * \param datatype I data type of val; must be the same as data type of matrix elements + * \param retval O receives return value + * \return no return value + */ +void CTAI_Matrix_getval ( CTAI_Matrix_blas *x, void *val, int *m, + int *n, CTA_Datatype *datatype, int *retval); + + +/** \brief Set column of BLAS matrix. + * + * \param x IO pointer to object data of BLAS matrix + * \param n I index of column to set + * \param hvec I handle of sending column; dimensions must be compatible + * \param retval O receives return value + * \return no return value + */ +void CTAI_Matrix_setcol( CTAI_Matrix_blas *x, int *n, CTA_Vector *hvec, int *retval); + + +/** \brief Set all values of BLAS matrix. + * + * \param x IO pointer to object data of BLAS matrix + * \param vals I values to set to + * \param m I number of rows of *vals, must be the same as number of rows in matrix + * \param n I number of columns of *vals, must be the same as number of columns in matrix + * \param datatype I data type of value; must be the same as data type of matrix elements + * \param retval O receives return value + * \return no return value + */ +void CTAI_Matrix_setvals( CTAI_Matrix_blas *x, void *vals, int *m, int *n, + CTA_Datatype *datatype, int *retval); + + +/** \brief Set a single value of BLAS matrix. + * + * \param x IO pointer to object data of BLAS matrix + * \param val I value to set to + * \param m I row index of matrix value to set + * \param n I column index of matrix value to set + * \param datatype I data type of val; must be the same as data type of matrix elements + * \param retval O receives return value + * \return no return value + */ +void CTAI_Matrix_setval ( CTAI_Matrix_blas *x, void *val , int *m, int *n, + CTA_Datatype *datatype, int *retval); + + +/** \brief Set all BLAS matrix elements to constant value. + * + * \note At index CTA_MATRIX_SETCONST in the function list of the class descriptor. + * + * \param x IO pointer to object data of BLAS matrix + * \param val I constant value to set to + * \param datatype I data type of val; must be the same as data type of matrix elements + * \param retval O receives return value + * \return no return value + */ +void CTAI_Matrix_setconst( CTAI_Matrix_blas *x, void *val, + CTA_Datatype *datatype, int *retval); + + +/** \brief Export BLAS matrix. + * + * \note Export a BLAS matrix to file (userdata is a handle of COSTA file) or + * + * \param x I pointer to object data of BLAS matrix + * \param userdata IO user data (see note) + * \param retval O receives return value + * \return no return value + */ +void CTAI_Matrix_Export( + CTAI_Matrix_blas *x, + CTA_Handle userdata, + int *retval); + + +/** \brief Apply the BLAS operation GER: A=A+(alpha)x(y(T)) + * + * i.e. for matrix A, vectors x and y and scalar alpha + * + * \param A IO pointer to object data of BLAS matrix A; must exist before calling + * \param alpha I scalar + * \param vx I handle of vector x + * \param vy I handle of vector y + * \param retval O receives return value + * \return no return value + */ +void CTAI_Matrix_Ger( CTAI_Matrix_blas *A, double *alpha, CTA_Vector *vx, + CTA_Vector *vy, int *retval); + + +/** \brief Invert BLAS matrix. + * + * \param A IO pointer to object data of BLAS matrix + * \param retval O receives return value + * \return no return value + */ +void CTAI_Matrix_Inv( CTAI_Matrix_blas *A, int *retval); + + +/** \brief Apply the BLAS operation GEMV: A=(alpha)Ax+(beta)y + * + * i.e. for matrix A, vectors x (optionally transposed) and y and scalars alpha and beta + * + * \param A IO pointer to object data of BLAS matrix + * \param transa I transpose flag for matrix A (CTA_TRUE for transposing or CTA_FALSE otherwise) + * \param alpha I scalar + * \param vx I handle of vector x + * \param beta I scalar + * \param vy I handle of vector y + * \param retval O receives return value + * \return no return value + */ +void CTAI_Matrix_Gemv( CTAI_Matrix_blas *A, int *trans, double *alpha, CTA_Vector *vx, + double *beta, CTA_Vector *vy, int *retval); + + +/** \brief Apply the BLAS operation GEMM: C=(alpha)AB+(beta)C + * + * i.e. for matrices A,B (both optionally transposed) and C, scalars alpha and beta + * + * \param C IO pointer to object data of BLAS matrix C + * \param transa I transpose flag for matrix A (CTA_TRUE for transposing or CTA_FALSE otherwise) + * \param transb I transpose flag for matrix B (CTA_TRUE for transposing or CTA_FALSE otherwise) + * \param alpha I scalar + * \param A I pointer to object data of BLAS matrix A + * \param B I pointer to object data of BLAS matrix B + * \param beta I scalar + * \param retval O receives return value + * \return no return value + */ +void CTAI_Matrix_Gemm( CTAI_Matrix_blas *C, int *transa, int *transb, double *alpha, + CTAI_Matrix_blas *A, CTAI_Matrix_blas *B, double *beta, int *retval); + + +/** \brief Apply the BLAS operation AXPY: Y=Y+(alpha)X + * + * i.e. for matrices X and Y, scalar alpha + * + * \param y IO pointer to object data of BLAS matrix y + * \param alpha I scalar + * \param x I pointer to object data of BLAS matrix x + * \param retval O receives return value + * \return no return value + */ +void CTAI_Matrix_Axpy( CTAI_Matrix_blas *y, double *alpha, CTAI_Matrix_blas *x, int *retval); + + +/** \brief Free the BLAS matrix object data and associated resources. + * + * \param x IO pointer to object data of BLAS matrix + * \param retval O receives return value + * \return no return value + */ +void CTAI_Matrix_free( CTAI_Matrix_blas *x, int *retval); + +#ifdef __cplusplus +} +#endif + + +#endif diff --git a/costa/native/cta/include/cta_mem.h b/costa/native/cta/include/cta_mem.h new file mode 100644 index 000000000..5c896297d --- /dev/null +++ b/costa/native/cta/include/cta_mem.h @@ -0,0 +1,64 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2011 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_mem.h +\brief Memory management routines. These routines are equivalent to standard C + routines but allows us to do additional checking and debugging +*/ + +#ifndef CTA_MEM_H +#define CTA_MEM_H + +#include +#include "cta_system.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** \brief Free memory that has been allocated with CTA_Malloc or CTA_Realloc + * + * \param ptr I pointer to memory block to deallocate + */ +CTAEXPORT void CTA_Free(void *ptr); + +/** \brief Allocates size bytes of memory and returns a pointer + * to the allocated memory. + * + * \param size I number of bytes to allocate + * + * \return pointer to allocated memory. NULL is returned when allocation fails + */ +CTANOF90 CTAEXPORT void* CTA_Malloc(size_t size); + +/** \brief Changes the size of the allocated memory block pointed by ptr + * Values in ptr are copied to returned memoy block + * \param size I pointer to memoryblock of which the size is changed + * \param size I number of bytes of returned memory block + * + * \return pointer to allocated memory. NULL is returned when allocation fails + */ +CTANOF90 CTAEXPORT void* CTA_Realloc(void *ptr, size_t size); + +#ifdef __cplusplus +} +#endif +#endif + diff --git a/costa/native/cta/include/cta_message.h b/costa/native/cta/include/cta_message.h new file mode 100644 index 000000000..d1244998c --- /dev/null +++ b/costa/native/cta/include/cta_message.h @@ -0,0 +1,100 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/openda_1/public/trunk/openda_core/native/src/cta/cta_interface.c $ +$Revision: 671 $, $Date: 2008-10-07 14:49:42 +0200 (di, 07 okt 2008) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef CTA_MESSAGE_H +#define CTA_MESSAGE_H +#include "cta_functions.h" + + +#ifdef __cplusplus +extern "C" { +#endif + +/** \brief Write a message + * + * \param className I name of class that writes the message + * \param method I name of the method that writes the message + * \param message I message + * \param type I type of message + * -'M':message + * -'I':Info + * -'W':Warning + * -'E':Error + * -'F':Fatal error (will terminate application) + */ +CTAEXPORT void CTA_Message_Write(const char *className, const char *method, const char *message, char type); + +/** \brief Set an external writer for handling messages + * + * An external writer must comply to the following C interface: + * + * void my_writer(char *className, char *method, char *message, char type); + * + * \param externalWriter I External writer + * + * + * \note Fortran writers are not yet supported + */ +CTAEXPORT void CTA_Message_SetExternalWriter(CTA_Func externalWriter); + + +/** \brief Toggle message handler between quiet and normal mode. + * in the quiet mode no messages are send (not even to external writers) + * + * \param setting I set message handler in quiet mode CTA_TRUE/CTA_FALSE + * + */ +CTAEXPORT void CTA_Message_Quiet(int setting); + + +/** \brief Macro for writing errors + * + * The name of the class and the method name must be set by the defines CLSNAM and METHOD + * + * \param message I Error message + * + */ +#define CTA_WRITE_ERROR(message) CTA_Message_Write(CLASSNAME,METHOD,message,'E') + +/** \brief Macro for writing info messages + * + * The name of the class and the method name must be set by the defines CLSNAM and METHOD + * + * \param message I Info message + * + */ +#define CTA_WRITE_INFO(message) CTA_Message_Write(CLASSNAME,METHOD,message,'I') + +/** \brief Macro for writing warnings + * + * The name of the class and the method name must be set by the defines CLSNAM and METHOD + * + * \param message I Warning message + * + */ +#define CTA_WRITE_WARNING(message) CTA_Message_Write(CLASSNAME,METHOD,message,'W') + +#ifdef __cplusplus +} +#endif +#endif + diff --git a/costa/native/cta/include/cta_metainfo.h b/costa/native/cta/include/cta_metainfo.h new file mode 100644 index 000000000..9a66a03f3 --- /dev/null +++ b/costa/native/cta/include/cta_metainfo.h @@ -0,0 +1,191 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_obsdescr.h +\brief Interface description of the COSTA default observation descriptor component. For user implementation see cta_usr_obs_descr.h. + +CTA_ObsDescr is used for describing observations. +*/ + +#ifndef CTA_METAINFO_H +#define CTA_METAINFO_H +#include "cta_system.h" +#include "cta_handles.h" +#include "cta_datatypes.h" +#include "cta_functions.h" +#include "cta_string.h" +#include "cta_vector.h" +#include "cta_matrix.h" + +/* Function Handle */ +typedef CTA_Handle CTA_Grid; +typedef CTA_Handle CTA_Metainfo; +typedef CTA_Handle CTA_MetainfoClass; + +#include "cta_sobs.h" + +/* parameters for different functions */ +#define CTA_METAINFO_CREATE_SIZE ( 1) +#define CTA_METAINFO_CREATE_INIT ( 2) +#define I_CTA_METAINFO_FREE ( 3) +#define I_CTA_METAINFO_GET_REST ( 4) +#define I_CTA_METAINFO_GET_KEYS ( 5) +#define I_CTA_METAINFO_COUNT_OBSERVATIONS ( 6) +#define I_CTA_METAINFO_COUNT_PROPERTIES ( 7) +#define I_CTA_METAINFO_EXPORT ( 8) +#define CTA_METAINFO_NUMFUNC ( 9) + + + + +typedef struct { +char name[CTA_STRLEN_TAG]; + int type; + int nx, ny, nz,nsize; +double x_origin, y_origin, z_origin; +double dx,dy,dz; + char refdimp[10][80]; // possible reference to tree node where coordinates are +} CTAI_Gridm; + +/* EXPLANATION TYPE OF GRID: +-99: undefined +-3: 3D regular grid, only by reference +-2: 2D regular grid, only by reference +-1: 1D regular grid, only by reference +1: 1D regular grid +2: 2D regular grid +3: 3D regular grid +10: curve (?) +*/ + +#ifdef __cplusplus +extern "C" { +#endif + + + +/** \brief Create a new class (=implementation) of a COSTA observation description component. + * + * \param name I name of the new observation description class + * \param h_func I COSTA function handles for functions that implement class, + * missing functions must have value CTA_NULL + * \param hobsdscrcl O receives handle of new observation description class + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Metainfo_DefineClass(const char *name, + const CTA_Func h_func[CTA_METAINFO_NUMFUNC], + CTA_MetainfoClass *hobsdscrcl); + +/** \brief Create a new observation description instance. + * + * \param hsobscl I class of new observation description + * \param usrdat IO data of the stochastic observer for which + a descriptor is to be created + * \param hobsdscr O receives handle of created observation description object + * \return error status: CTA_OK if successful + */ + CTAEXPORT int CTA_Metainfo_Create( CTA_Metainfo *hobsdscr); +// int CTA_Metainfo_Create( CTA_MetainfoClass hsobscl, +// CTA_Handle usrdat, CTA_Metainfo *hobsdscr); + +///** \brief Check whether given stochastic observer corresponds to this observation description +// * +// * \param hobsdscr I handle of observation description +// * \param hsobs I handle of stochastic observer +// * \return error status: CTA_OK if successful +// */ +//int CTA_Metainfo_Check_SObs( CTA_Metainfo hobsdscr, CTA_StochObs hsobs); + +/** \brief Get properties/values that correspond to a given key. + * + * \param hobsdscr I handle of observation description + * \param Key I key for which the value is asked + * \param Properties IO COSTA-vector that is to receive the values + * \param datatype I data type of elements in properties vector, must be the same as of queried properties + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Metainfo_SetUnit( CTA_Metainfo hobsdscr, char* Key); + + + +/** \brief set tag of metainfo + +*/ +CTANOEXPORT int CTA_Metainfo_GetDescription(CTA_Metainfo hobsdescr, char **description); + +CTANOEXPORT int CTA_Metainfo_IsEqual( CTA_Metainfo hmeta_x, CTA_Metainfo hmeta_y ); + +CTANOEXPORT int CTAI_Grid_IsEqual( CTAI_Gridm *hgrid1, CTAI_Gridm *hgrid2 ); + +CTANOEXPORT int CTA_Metainfo_Import(CTA_Metainfo minfo, CTA_Handle usrdata); + + +CTANOEXPORT int CTA_Metainfo_SetTag(CTA_Metainfo hobsdescr, char* tagname); + + +CTANOEXPORT int CTA_Metainfo_Free( CTA_Metainfo *hobsdscr); + + +CTANOEXPORT int CTA_Metainfo_SetDescription(CTA_Metainfo hobsdescr, char* description); + +CTANOEXPORT int CTA_Metainfo_SetRest(CTA_Metainfo hobsdescr, int *rest); + +CTANOEXPORT int CTA_Metainfo_SetGrid(CTA_Metainfo hobsdescr, CTAI_Gridm *hgrid); + +CTANOEXPORT int CTA_Metainfo_GetGrid(CTA_Metainfo hobsdescr, CTAI_Gridm *hgrid); + +CTANOEXPORT int CTA_Metainfo_Copy(CTA_Metainfo hmeta_x, CTA_Metainfo hmeta_y); + +CTANOEXPORT int CTA_Metainfo_GetRest(CTA_Metainfo hobsdescr, int *rest); + +CTANOEXPORT int CTA_Metainfo_GetTag(CTA_Metainfo hobsdescr, char *tagname); + +CTANOEXPORT int CTA_Metainfo_SetBelongsTo(CTA_Metainfo hobsdescr, char* tagname); + +CTANOEXPORT int CTA_Metainfo_GetBelongsTo(CTA_Metainfo hobsdescr, char *tagname); + +CTANOEXPORT int CTAI_Grid_Interpolate(CTAI_Gridm gridy, CTAI_Gridm gridx, CTA_Vector vecx, CTA_Vector vecx_to_y); + +CTANOEXPORT int CTA_Metainfo_Export(CTA_Metainfo minfo, CTA_Handle usrdata); + +CTANOEXPORT int CTA_Metainfo_GetUnit(CTA_Metainfo hobsdescr, char *nameofunit); + +CTANOEXPORT int CTAI_XML_CreateGrid(xmlNode *cur_node, CTAI_Gridm *thisgrid); + + +/* tijdelijk */ +CTANOF90 CTAEXPORT int CTA_Metainfo_setRegGrid(CTA_Metainfo hdescr, char *name, + int nx, int ny, int nz, + double x_origin, double y_origin, + double z_origin, + double dx, double dy, double dz); + +CTANOF90 CTAEXPORT int CTA_Metainfo_getRegGrid(CTA_Metainfo hdescr, char *name, + int *nx, int *ny, int *nz, + double *x_origin, double *y_origin, + double *z_origin, + double *dx, double *dy, double *dz); + +#ifdef __cplusplus +} +#endif + + +#endif diff --git a/costa/native/cta/include/cta_method.h b/costa/native/cta/include/cta_method.h new file mode 100644 index 000000000..612124114 --- /dev/null +++ b/costa/native/cta/include/cta_method.h @@ -0,0 +1,89 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_method.h +\brief The interface description of the COSTA method component (this component is still under construction). For user implementation see cta_usr_method.h. + +The COSTA method component is provided for calling data assimilation and calibration methods in a uniform way. +*/ + +#ifndef CTA_METHOD_H +#define CTA_METHOD_H +#include "cta_system.h" +#include "cta_errors.h" +#include "cta_handles.h" +#include "cta_datatypes.h" +#include "cta_functions.h" + +/* Method handle */ +typedef CTA_Handle CTA_Method; +typedef CTA_Handle CTA_MethClass; + +/* parameters for different functions */ +#define CTA_METH_CREATE_SIZE ( 1) +#define CTA_METH_CREATE_INIT ( 2) +#define CTA_METH_RUN ( 3) +#define CTA_METH_FREE ( 4) +#define CTA_METH_NUMFUNC ( 5) + +#ifdef __cplusplus +extern "C" { +#endif + +/** \brief Create a new class (=implementation) of a COSTA method. + * + * \param name I name of the new method class + * \param h_func I COSTA function handles for functions that implement class, + * missing functions must have value CTA_NULL + * \param hmethcl O receives handle of new method class + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Meth_DefineClass(const char *name, const CTA_Func h_func[CTA_METH_NUMFUNC], + CTA_MethClass *hmethcl); + +/** \brief Create an instance of a method. + * + * \param hmethcl I method class of new object + * \param userdata IO user data for creation (depends on class) + * \param hmeth O receives handle of new method + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Meth_Create(CTA_Method hmethcl, CTA_Handle userdata, CTA_Method *hmeth); + +/** \brief Run method. + * \param hmeth I handle of method + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Meth_Run(CTA_Method hmeth); + +/** \brief Free the method object. + * + * \Note hmeth=CTA_NULL is allowed + * + * \param hmeth IO handle of method, replaced by CTA_NULL on return. + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Meth_Free(CTA_Method *hmeth); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/costa/native/cta/include/cta_modbuild_b3b.h b/costa/native/cta/include/cta_modbuild_b3b.h new file mode 100644 index 000000000..772a5bef5 --- /dev/null +++ b/costa/native/cta/include/cta_modbuild_b3b.h @@ -0,0 +1,140 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_modbuild_b3.h +\brief Description of the COSTA blackbox component +*/ + +#ifndef CTA_MODBUILD_B3B_H +#define CTA_MODBUILD_B3B_H + +#include +#ifdef WIN32 +#include +#endif +#include "cta.h" +#include "cta_system.h" +#include "f_cta_utils.h" +#include "cta_datatypes.h" +#include "cta_model_utilities.h" +#include "cta_handles.h" +#include "cta_datetime.h" + +#define STRING_MAX 1024 /* maximum length of string */ + +#define B3B_ENCODING ("utf-8") /* XML encodiing */ + + +typedef struct B3B_Variable B3B_Variable; +typedef B3B_Variable * B3B_VariablePntr; + +struct B3B_Variable { + char *name; /* variable name */ + double value; /* variable value */ +}; + +typedef struct B3B_Station B3B_Station; +typedef B3B_Station * B3B_StationPntr; + +struct B3B_Station { + char *name; /* station name */ + char *location; /* station location */ + char *filename; /* station filename */ + int nvariables; /* station number of variables */ + B3B_VariablePntr *variables; /* station variables */ +}; + +typedef struct B3B_Forcing B3B_Forcing; +typedef B3B_Forcing * B3B_ForcingPntr; + +struct B3B_Forcing { + char *name; /* forcing name */ + char *file; /* forcing file */ + char *element; /* forcing element */ + char *property; /* forcing property */ + char *item; /* forcing item */ + double value; /* forcing value */ +}; + +typedef struct B3B_Parameter B3B_Parameter; +typedef B3B_Parameter * B3B_ParameterPntr; + +struct B3B_Parameter { + char *name; /* parameter name */ + char *file; /* parameter file */ + char *element; /* parameter element */ + char *property; /* parameter property */ + char *item; /* parameter item */ + double value; /* parameter value */ +}; + +typedef struct B3B_StateExchange B3B_StateExchange; +typedef B3B_StateExchange * B3B_StateExchangePntr; + +struct B3B_StateExchange { + char *executable; /* name of blackbox executable */ + char *state2model; /* name of input file */ + char *model2state; /* name of output file */ + char *outputSteps; /* output steps 'last' or 'all' */ +}; + +typedef struct B3B_Model B3B_Model; +typedef B3B_Model * B3B_ModelPntr; + +struct B3B_Model{ + char *type; /* model type */ + char *description; /* model description */ + double timestep; /* simulation timestep in seconds */ + int simulationNumber; /* simulation number */ + char *workingdir; /* working map of simulation */ + char *templatedir; /* map with simulation template */ + char *simulationMap; /* map with simulation results */ + int stateLength; /* length of the state vector */ + int nstations; /* number of stations */ + B3B_StateExchangePntr stateexchange; /* pointer to the state exchange variables */ + B3B_StationPntr *stations; /* pointer to the list of stations */ + int nparameters; /* number of parameters */ + B3B_ParameterPntr *parameters; /* pointer to the list of parameters */ + int nforcings; /* number of forcings */ + B3B_ForcingPntr *forcings; /* pointer to the list of forcings */ +}; + +#define B3B_INDEX_THIS ( 0) /* Handle of instance */ +#define B3B_INDEX_TIME ( 1) /* Time instance of model (state) */ +#define B3B_INDEX_STATE ( 2) /* State vector of model */ +#define B3B_INDEX_FORCINGS ( 3) /* Tree vector containing the forcings of the model */ +#define B3B_INDEX_PARAMETERS ( 4) /* Tree vector of model parameters */ +#define B3B_INDEX_USERDATA ( 5) /* Userdata */ + +#define B3B_SIZE_DATABLK ( 6) + + +/** \brief Create the model class of the B3B Black-box builder + * + * \note This is not a user function. It is called at initialization of the + * COSTA environment. + * + * \param modelcls O receives handle of the B3B-modelbuilder class + */ + +CTANOEXPORT void CTA_Modbuild_b3b_CreateClass(CTA_ModelClass *modelcls); + +#endif + diff --git a/costa/native/cta/include/cta_modbuild_b3b_utils.h b/costa/native/cta/include/cta_modbuild_b3b_utils.h new file mode 100644 index 000000000..eeb016d3c --- /dev/null +++ b/costa/native/cta/include/cta_modbuild_b3b_utils.h @@ -0,0 +1,170 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2006 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_modbuild_b3b_utils_h.h +\brief Description of the COSTA blackbox component utilisties +*/ + +#ifndef CTA_MODBUILD_B3B_UTILS_H +#define CTA_MODBUILD_B3B_UTILS_H + +#include +#include +#include +#include +#include + +#include "cta.h" +#include "cta_xml.h" +#include "cta_modbuild_b3b.h" + +/** \brief Copy a string + * + * \param name I string to be copied + * + * \return copy of the string + */ +CTANOEXPORT char * B3B_CopyText(const char *in); + +/** \brief Reallocates a block of memory + * + * \param memblock I Pointer to previously allocated memory block + * \param size I New size in bytes. + * + * \return pointer to the allocated space + */ +CTANOEXPORT void * B3B_Realloc(void *memblock, size_t size); + +/** \brief Allocates a block of memory + * + * \param nelem I Number of elements + * \param elsize I Element size + * + * \return pointer to the allocated space + */ +CTANOEXPORT void * B3B_Malloc(size_t nelem, size_t elsize); + +/** \brief Frees a block of memory that is currently allocated + * + * \param pntr I starting address of the memory block to be freed + * + * \return no return value + */ +CTANOEXPORT void B3B_Free(void *pntr); + +/** \brief Frees a model that is currently allocated + * + * \param pntr I starting address of the model to be freed + * + * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE + */ +CTANOEXPORT int B3B_Free_Model(B3B_ModelPntr model); + +/** \brief Frees a stateexchange that is currently allocated + * + * \param pntr I starting address of the stateexchange to be freed + * + * \return no return value + */ +CTANOEXPORT void B3B_Free_StateExchange(B3B_StateExchangePntr stateexchange); + +/** \brief Frees a parameter that is currently allocated + * + * \param pntr I starting address of the parameter to be freed + * \return no return value + */ +CTANOEXPORT void B3B_Free_Parameter(B3B_ParameterPntr parameter); + +/** \brief Frees a forcing that is currently allocated + * + * \param pntr I starting address of the forcing to be freed + * \return no return value + */ +CTANOEXPORT void B3B_Free_Forcing(B3B_ForcingPntr forcings); + +/** \brief Frees a station that is currently allocated + * + * \param pntr I starting address of the station to be freed + * \return no return value + */ +CTANOEXPORT void B3B_Free_Station(B3B_StationPntr stations); + +/** \brief Convert COSTA state vector to B3B model + * + * \param model I Pointer naar de model data + * \param start I Simulatie starttijd in modified julian day + * \param stop I Simulatie stoptijd in modified julian day + * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE + */ +CTANOEXPORT int B3B_State2Model(B3B_ModelPntr model, double start, double stop); + +/** \brief Convert B3B model to COSTA state vector + * + * \param model I Pointer naar de model data + * \param start I Simulatie starttijd in modified julian day + * \param stop I Simulatie stoptijd in modified julian day + * \param sstate O Pointer naar state vector + * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE + */ +CTANOEXPORT int B3B_Model2State(B3B_ModelPntr model, double start, double stop, CTA_TreeVector *sstate); + +/** \brief Open model2state file + * + * \param model I Pointer naar de model data + * \param htree O Pointer naar costa tree + * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE + */ +CTANOEXPORT int B3B_OpenModel2State(B3B_ModelPntr model, CTA_Tree *htree); + +/** \brief Get value of B3B model + * + * \param model I Pointer naar de model data + * \param htree I Pointer naar costa tree + * \param station I Name of the station + * \param variable I Name of the variabele + * \param time I tijdstip in MJD + * \param value O waarde van de variabele op tijdstip van station + * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE + */ +CTANOEXPORT int B3B_Model2StateByStationVariableTime(B3B_ModelPntr model, CTA_Tree htree, char *station, char *variable, double time, double *value); + +/** \brief + * + * \param model I Pointer naar de model data + * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE + */ +CTANOEXPORT int B3B_Run_Model(B3B_ModelPntr model); + +/** \brief + * + * \param + * \return + */ +CTANOEXPORT B3B_ModelPntr B3B_Read_Model(int simulationNumber, CTA_Tree hmodelinput, CTA_Handle *sstate, CTA_Handle *sparam, CTA_Handle *sforc); + +/** \brief Check for a COSTA error + * + * \param ierr I COSTA error code + * \param msg I Error message + * \return no return value + */ +CTANOEXPORT void B3B_CheckError(int ierr, const char *msg); + +#endif diff --git a/costa/native/cta/include/cta_modbuild_par.h b/costa/native/cta/include/cta_modbuild_par.h new file mode 100644 index 000000000..57c0c6d4b --- /dev/null +++ b/costa/native/cta/include/cta_modbuild_par.h @@ -0,0 +1,56 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_modbuild_par.h + +\brief Par-Modelbuilder. +*/ + +#ifndef CTA_MODBUILD_PAR_H +#define CTA_MODBUILD_PAR_H +#include "cta_handles.h" +#include "cta_datatypes.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** \brief Create the model class of the Par-Modelbuilder and initilizes MPI + * + * \note This is not a user function. It is called at initialization of the + * COSTA environment. + * + * \param modelcls O receives handle of the SP-modelbuilder class + */ +CTAEXPORT void CTA_Modbuild_par_CreateClass(CTA_ModelClass *modelcls); + +/** \brief Stop the model class of the Par-Modelbuilder and finalize MPI + * + * \note This is not a user function. It is called at the finalization of the + * COSTA environment. + * + */ +CTAEXPORT void CTA_Modbuild_par_Finalize(); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/costa/native/cta/include/cta_modbuild_sp.h b/costa/native/cta/include/cta_modbuild_sp.h new file mode 100644 index 000000000..6fdb8edf4 --- /dev/null +++ b/costa/native/cta/include/cta_modbuild_sp.h @@ -0,0 +1,78 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_modbuild_sp.h +\brief SP-Modelbuilder. +*/ + +#ifndef CTA_MODBUILD_SP_H +#define CTA_MODBUILD_SP_H +#include "cta_handles.h" +#include "cta_datatypes.h" +#include "cta_metainfo.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** \brief Create the model class of the SP-Modelbuilder + * + * \note This is not a user function. It is called at initialization of the + * COSTA environment. + * + * \param modelcls O receives handle of the SP-modelbuilder class + */ +CTANOEXPORT void CTA_Modbuild_sp_CreateClass(CTA_ModelClass *modelcls); +CTANOEXPORT void modbuild_sp_create_size(CTA_Handle userdata, int *memsize, int *ierr); +CTANOEXPORT void modbuild_sp_create_init(CTA_Handle *this_obj, CTA_Handle *data, CTA_Handle *hinput, int *ierr); +CTANOEXPORT void modbuild_sp_free(CTA_Handle *data ,int *ierr); +CTANOEXPORT void modbuild_sp_compute(CTA_Handle *data, CTA_Time *timespan, int *ierr); +CTANOEXPORT void modbuild_sp_setstate(CTA_Handle *data, CTA_TreeVector *state, int *ierr); +CTANOEXPORT void modbuild_sp_getstate(CTA_Handle *data, CTA_TreeVector *state, int *ierr); +CTANOEXPORT void modbuild_sp_axpymodel(CTA_Handle *datay, double *alpha, CTA_Handle *datax, int *ierr); +CTANOEXPORT void modbuild_sp_axpystate(CTA_Handle *data, double *alpha, CTA_TreeVector *statex, int *ierr); +CTANOEXPORT void modbuild_sp_setforc(CTA_Handle *data, CTA_TreeVector *state, int *ierr); +CTANOEXPORT void modbuild_sp_getforc(CTA_Handle *data, CTA_TreeVector *state, int *ierr); +CTANOEXPORT void modbuild_sp_axpyforc(CTA_Handle *data, CTA_Time *tspan, double *alpha, CTA_TreeVector *statex, int *ierr); +CTANOEXPORT void modbuild_sp_setparam(CTA_Handle *data, CTA_TreeVector *state, int *ierr); +CTANOEXPORT void modbuild_sp_getparam(CTA_Handle *data, CTA_TreeVector *state, int *ierr); +CTANOEXPORT void modbuild_sp_axpyparam(CTA_Handle *data, double *alpha, CTA_TreeVector *statex, int *ierr); +CTANOEXPORT void modbuild_sp_getnoisecount(CTA_Handle *data, int* nnoise, int* ierr); +CTANOEXPORT void modbuild_sp_getnoisecovar(CTA_Handle *data, CTA_TreeVector *colsvar, int* ierr); +CTANOEXPORT void modbuild_sp_getobsvalues(CTA_Handle *data, CTA_Time *ttime, CTA_ObsDescr *hdescr, CTA_Vector *vval, int* ierr); +CTANOEXPORT void modbuild_sp_getobsselect(CTA_Handle *data, CTA_Time *ttime, CTA_ObsDescr *hdescr, CTA_String *sselect, int* ierr); +CTANOEXPORT void modbuild_sp_addnoise(CTA_Handle *data, CTA_Time *ttime, int* ierr); +CTANOEXPORT void modbuild_sp_ar1_create(CTA_Handle *hinput, CTA_TreeVector *state, CTA_TreeVector *sbound, + CTA_TreeVector *sparam, int *nnoise, CTA_Time *time0, + CTA_String *snamnoise, CTA_Handle *husrdata, int *ierr); +CTANOEXPORT void modbuild_sp_ar1_covar(CTA_TreeVector *colsvar,int* nnoise, CTA_Handle husrdata,int* ierr); +CTANOEXPORT void modbuild_sp_ar1_compute(CTA_Time *timesspan,CTA_TreeVector *state, CTA_TreeVector *saxpyforc, + BOOL *baddnoise, CTA_TreeVector *sparam, CTA_Handle husrdata, + int* ierr); +CTANOEXPORT void modbuild_sp_ar1_getobsval(CTA_TreeVector *state, CTA_ObsDescr *hdescr, CTA_Vector *vval, CTA_Handle *husrdata, int *ierr); +CTANOEXPORT int modbuild_sp_compute_covars(int nmodel, double *Qar1, double *p0, CTAI_Gridm hgrid); +CTANOEXPORT void CTA_Modbuild_sp_CreateClass(CTA_ModelClass *modelcls); + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/costa/native/cta/include/cta_model.h b/costa/native/cta/include/cta_model.h new file mode 100644 index 000000000..0f9df3965 --- /dev/null +++ b/costa/native/cta/include/cta_model.h @@ -0,0 +1,587 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_model.h +\brief Interface description of the COSTA default model component. For user implementation see cta_usr_model.h. + +Functions for creating and working with models. CTA_Model is the default class implementation for models. +*/ + +#ifndef CTA_MODEL_H +#define CTA_MODEL_H +#include "cta_handles.h" +#include "cta_datatypes.h" +#include "cta_functions.h" +#include "cta_treevector.h" +#include "cta_time.h" +#include "cta_obsdescr.h" + +/* Function Handle */ +typedef CTA_Handle CTA_Model; +typedef CTA_Handle CTA_ModelClass; + +/* parameters for different user functions */ +#define I_CTA_MODEL_CREATE_SIZE ( 0) +#define I_CTA_MODEL_CREATE_INIT ( 1) +#define I_CTA_MODEL_FREE ( 2) +#define I_CTA_MODEL_COMPUTE ( 3) +#define I_CTA_MODEL_SET_STATE ( 4) +#define I_CTA_MODEL_GET_STATE ( 5) +#define CTA_MODEL_AXPY_STATE ( 6) +#define CTA_MODEL_AXPY_MODEL ( 7) +#define CTA_MODEL_SET_FORC ( 8) +#define CTA_MODEL_GET_FORC ( 9) +#define CTA_MODEL_AXPY_FORC (10) +#define CTA_MODEL_SET_PARAM (11) +#define CTA_MODEL_GET_PARAM (12) +#define CTA_MODEL_AXPY_PARAM (13) +#define CTA_MODEL_GET_STATESCALING (14) +#define CTA_MODEL_GET_TIMEHORIZON (15) +#define CTA_MODEL_GET_CURRENTTIME (16) + +/* stochastic functions */ +#define CTA_MODEL_GET_NOISE_COUNT (17) +#define CTA_MODEL_GET_NOISE_COVAR (18) + +/* handling of observations */ +#define CTA_MODEL_GET_OBSVALUES (19) +#define CTA_MODEL_GET_OBSSELECT (20) +#define CTA_MODEL_ANNOUNCE_OBSVALUES (21) + +#define CTA_MODEL_ADD_NOISE (22) +#define I_CTA_MODEL_EXPORT (23) +#define I_CTA_MODEL_IMPORT (24) + +/* Methods for adjoint models */ +#define CTA_MODEL_ADJ_SET_FORC (25) +#define CTA_MODEL_ADJ_COMPUTE (26) +#define CTA_MODEL_ADJ_PREPARE (27) + +/* Localization */ +#define I_CTA_MODEL_GETOBSLOCALIZATION (28) + +/* Loading and restoring of state */ +#define CTA_MODEL_LOAD_PERSISTENTSTATE (29) +#define CTA_MODEL_SAVE_INTERNALSTATE (30) +#define CTA_MODEL_RESTORE_INTERNALSTATE (31) +#define CTA_MODEL_RELEASE_INTERNALSTATE (32) +#define CTA_MODEL_SAVE_PERSISTENTSTATE (33) + +/* Domain and local analysis support */ +#define CTA_MODEL_GET_NUMDOMAINS (34) +#define CTA_MODEL_GET_OBSSELECTOR (35) +#define CTA_MODEL_GET_OBSLOCALIZATIONDOMAIN (36) +#define CTA_MODEL_GET_STATEDOMAIN (37) +#define CTA_MODEL_AXPY_STATEDOMAIN (38) + +/* Number of model function handles */ +#define CTA_MODEL_NUMFUNC (39) + +#ifdef __cplusplus +extern "C" { +#endif + +/** \brief Create a model instance + * + * \param hmodcl I model class of new instance + * \param userdata IO user data needed for creation (depends on modelclass) + * \param hmodel O receives handle of new model instance + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Model_Create(CTA_ModelClass hmodcl, CTA_Handle userdata, CTA_Model *hmodel); + +/** \brief Compute model for given timespan + * + * \param hmodel IO handle of model instance + * \param htime I timespan for which to compute + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Model_Compute(CTA_Model hmodel, CTA_Time htime); + +/** \brief Add noise during during the given timespan at + * the Compute + * + * \note Noise is added in the compute-method + * \param hmodel IO handle of model instance + * \param htime I timespan for which to compute adding noise + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Model_AddNoise(CTA_Model hmodel, CTA_Time htime); + +/** \brief Set the internal state of the model. + * + * \note A copy of the state is set + * + * \param hmodel IO handle of model instance + * \param hstate I handle of new state + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Model_SetState(CTA_Model hmodel, CTA_TreeVector hstate); + +/** \brief Get a copy of the internal state. + * + * \note Optionally a tree-vector is created. In that case the caller of this + * method is responsible for freeing that tree-vector. The input state must be compatible + * (same size and or composition) as the models internal state. + * \note If *hstate == CTA_NULL a new object is created, user is responsible for freeing this object. + * + * \param hmodel I handle of model instance + * \param hstate IO receives state of the model, *hstate can be CTA_NULL on calling (see note) + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Model_GetState(CTA_Model hmodel, CTA_TreeVector *hstate); + +/** \brief Perform axpy operation on the internal state. + * + * \note AXPY: y=alpha*x+y. y corresponds to the models + * internal state and x can be a state vector or a model + + * \param hmodel IO handle of model instance (y) + * \param alpha I alpha + * \param hx I handle of x (state or model) + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Model_AxpyState(CTA_Model hmodel, double alpha, CTA_Handle hx); + +/** \brief Get element-wise scaling for model state + * + * The values in the state-vector are compared on "importance" in various + * algorithms like RRSQRT and COFFEE. The model state holds in general + * various quantities like concentration, velicity, location etc in + * arbitrary units. The scaling vector (that can be model state dependend) + * makes it possible to meaningfull compare elements in the state-vector + * for importance. Various methods are available like a transformation to + * enery. + * + * The scaling vector represents a diagonal scaling matrix but is + * respresented by a tree-vector. + * + * \note The elementwise scaling is returned in the form of a tree-vector + * with same build-up as the tree-vector of the model state. The scaling vector + * is created whenever hscale==CTA_NULL on input, the caller is + * responsible for freeing this object. + * + * \param hmodel I handle of model instance + * \param hscale IO receives state scaling vector for the model state, + *hstate can be CTA_NULL on calling (see note) + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Model_GetStateScaling(CTA_Model hmodel, CTA_TreeVector *hscale); + + +/** \brief Set the models forcings. + * + * \note Set the forcings (constant) for the given timespan. + * The model will fall back to its own forcings definition + * outside the given timespan. + * + * \param hmodel IO handle of model instance + * \param tspan I time span on which to set the forcing values + * \param hforc I handle of vector with new forcings + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Model_SetForc(CTA_Model hmodel, CTA_Time tspan, CTA_TreeVector hforc); + +/** \brief Get a copy of the values of the models forcings + * + * \note Optionally a tree-vector is created in that case the caller of this + * method is responsible for freeing that tree-vector. The input tree-vector + * must be compatible (same size and or composition) as the models + * internal tree-vector representing the forcings. + * If the forcings of the model are not constant for the given timespan + * the result is dependent on the model-implementation + * \note If *hforc == CTA_NULL a new object is created, user is responsible for freeing this object. + * + * \param hmodel I handle of model instance + * \param tspan I timespan for wich the given forcings are valid + * \param hforc IO receives models forcings, *hforc can be CTA_NULL on calling (see note) + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Model_GetForc(CTA_Model hmodel, CTA_Time tspan, CTA_TreeVector *hforc); + +/** \brief Perform axpy operation on the models forcings. + * + * \note AXPY: y=alpha*x+y. y corresponds to the models + * internal forcings. + * The adjustment to the forcings (alpha*x) is only valid for the given + * time span. Note that the model will use y(t)+x for the given time span + * where y(t) denotes the default forcings of the model. + * + * \param hmodel IO handle of model instance (y) + * \param tspan I time span for wich the given forcings are valid + * \param alpha I scalar + * \param hx I handle of forcings tree-vector x + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Model_AxpyForc( CTA_Model hmodel, CTA_Time tspan, double alpha, CTA_TreeVector hx); + +/** \brief Set parameters of the model. + * + * \param hmodel IO handle of model instance + * \param hparam I handle of parameters vector + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Model_SetParam(CTA_Model hmodel, CTA_TreeVector hparam); + +/** \brief Get a copy of the parameters of the model. + * + * \note Optionally a tree-vector is created in that case the caller of this + * method is responsible for freeing that tree-vector. The input tree-vector + * must be compatible (same size and or composition) as the models + * internal tree-vector representing the parameters. + * \note If *hforc == CTA_NULL a new object is created, user is responsible for freeing this object. + * + * \param hmodel I handle of model instance + * \param hparam IO receives model forcings, *hforc can equal CTA_NULL on calling (see note) + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Model_GetParam(CTA_Model hmodel, CTA_TreeVector *hparam); + +/** \brief Perform axpy operation on the models parameters. + * + * \note AXPY: y=alpha*x+y where y corresponds to the models + * internal parameters. + * + * \param hmodel IO handle of model instance (y) + * \param alpha I alpha + * \param hx I handle of treevector of parameters (x) + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Model_AxpyParam( CTA_Model hmodel, double alpha, CTA_TreeVector hx); + +/** \brief Return the timehorizon on the model. + * The time horizon is the initial overal simulation span for which the mode is configured + * + * \param hmodel I handle of model instance + * \param tHorizon I time horizon of model + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Model_GetTimeHorizon( CTA_Model hmodel, CTA_Time tHorizon); + +/** \brief Return the current time of the model. + * + * \param hmodel I handle of model instance + * \param tCurrent I time corresponding the the model state + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Model_GetCurrentTime( CTA_Model hmodel, CTA_Time tCurrent); + +/** \brief Get covariance matrix of noise parameters. + * + * \note ONLY for Stochastic models. + * The covariance matrix is represented by an array + * of tree-vectors (columns of the matrix) + * optionally a tree-vector is created in that case the caller of this + * method is responsible for freeing that tree-vector. The input tree-vector + * must be compatible (same size and or composition) as the models + * internal tree-vector. + * \note If hstmat[icol] == CTA_NULL a new object is created, user is responsible for freeing this object. + * + * \param hmodel I handle of model instance + * \param hstmat O receives array of tree-vectors, *hstmat can equal CTA_NULL on calling (see note) + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Model_GetNoiseCovar(CTA_Model hmodel, CTA_TreeVector *hstmat); + +/** \brief Get number of noise parameters: the number of columns of the noise covariance matrix. + * + * \note ONLY for Stochastic models. + * + * \param hmodel I handle of model instance + * \param nnoise O receives number of noise parameters + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Model_GetNoiseCount(CTA_Model hmodel,int *nnoise); + +/** \brief Free model instance. + * + * \note ONLY for Stochastic models. + * + * \param hmodel IO handle of model instance, replaced by CTA_NULL on return + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Model_Free(CTA_Model *hmodel); + +/** \brief Announce to the model what observations will be requested. + * + * Before the compute method this method is used to announce what + * obeservation will be requested after the CTA_Model_Compute using the + * CTA_Model_GetObsvalues method. + * + * For some simulation models it is more efficient to do a single simulation + * (a single CTA_Model_Compute call) for a particular simulation span then + * simulating the same simulation span in a number of steps (multiple + * CTA_Model_Compute calls). + * + * This method can be used to announce for what observations the model + * must provide a prediction in advance. This method must be called prior + * to the CTA_Compute method and makes it possible to perform simulations + * over a longer time interval without the need to interupt the computations + * in order to get the predictions at intermediate time instances. + * + * Notes on the behavior of the method: + * - The observation description used in the first CTA_Model_GetObsValues + * after the compute MUST be the same as the observation description + * used in the announce. + * - All observations that are announced MUST be in the timespan of the + * following CTA_Model_Compute. + * - The announced observations can only be retreved ONCE after the + * CTA_Model_Compute. + * - A CTA_Model_SetState or CTA_Model_AxpyState will reset the announced + * CTA_Model_AnnounceObsValues administration (since stored predictions + * might not be valid anymore) + * + * \param hmodel I handle of model instance + * \param hdescr I observation description component + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Model_AnnounceObsValues(CTA_Model hmodel, CTA_ObsDescr hdescr); + +/** \brief Get (interpolate) the models internal state to the + * observations described as specified in the observation + * description component. + * + * \note The interface supports a the time instance for time-interpolation. + * It depends on the model whether and how this is supported. + * + * \param hmodel I handle of model instance + * \param htime I time instance (for checking and time-interpolation if + * supported by model) + * \param hdescr I observation description component + * \param values O receives values of the models internal state corresponding to + * observations as described in hdescr + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Model_GetObsValues(CTA_Model hmodel, CTA_Time htime, + CTA_ObsDescr hdescr, CTA_Vector values); + + +/** \brief Get for each observation a localization scaling vector + * + * \param hmodel I handle of model instance + * \param hdescr I observation description for which we want localization scaling vectors + * \param distance I characteristic distance + * \param locVecs O costa vector of handles to treevectors (scaling vectors). The treevectors + * are created when the indices are CTA_NULL on entry + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Model_GetObsLocalization( CTA_Model hmodel, + CTA_ObsDescr hdescr, double distance, CTA_Vector locVecs); + + +/** \brief Get a query for the stochastic observer in order to + * filter out the observations that can actually be provided by the model. + * + * \param hmodel I handle of model instance + * \param htime I time instance + * \param hdescr I observation description component + * \param sselect O receives a query to filter out the observations, must exist before calling + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Model_GetObsSelect(CTA_Model hmodel, CTA_Time htime, + CTA_ObsDescr hdescr, CTA_String sselect); + +/* \brief Save the current state of the model in a treevector + * + * \param hmodel I handle of model instance + * \param instanceID O ID to retrieve the CTA_Handle to the treevector + * \return error status: CTA_OK if successful +*/ +CTAEXPORT int CTA_Model_SaveInternalState(CTA_Model hmodel, CTA_String *instanceID); + +/* \brief Restore the state of the model to a previously saved state + * + * \param hmodel I handle of model instance + * \param instanceID O ID to retrieve the CTA_Handle to the treevector with the previously saved state + * \return error status: CTA_OK if successful + */ + CTAEXPORT int CTA_Model_RestoreInternalState(CTA_Model hmodel, CTA_String instanceID); + +/* \brief Free memory of a previously saved state + * + * \param hmodel I handle of model instance + * \param instanceID I ID to retrieve the CTA_Handle to the treevector to release + * \return error status: CTA_OK if successful + */ + CTAEXPORT int CTA_Model_ReleaseInternalState(CTA_Model hmodel, CTA_String instanceID); + +/* \brief Save a previously saved internal state to file + * + * \param hmodel I handle of model instance + * \param filename I name of file to be written + * \param instanceID I ID to retrieve the CTA_Handle to the treevector to export to file + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Model_SavePersistentState(CTA_Model hmodel, CTA_String filename, CTA_String instanceID); + +/* \brief Load a model state from file and save it as an internal state + * + * \param hmodel I handle of model instance + * \param filename I name of file to read state from + * \param instanceID O ID to retrieve the CTA_Handle to the treevector that holds the state + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Model_LoadPersistentState(CTA_Model hmodel, CTA_String filename, CTA_String *instanceID); + +/** \brief Export the whole internal state of a model + * This export function will export the whole state of the model such that + * a so called "restart" start from this point yielding the same results. + * There are no ruled on the format that is used to store the data. + * Various extra otions are valid but a model will in most cases support an export + * to a file and to a COSTA pack object. + * + * + * \param hmodel I handle of model instance + * \param hexport I target for export e.g. CTA_File or CTA_Pack + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Model_Export( CTA_Model hmodel, CTA_Handle hexport); + +/** \brief Import the whole internal state of a model + * After the inport the models internal state is exactly the same as the point that + * the export was created using CTA_Model_Export. + * + * + * \param hmodel I handle of model instance + * \param himport I handle with data created by CTA_MODEL_Export e.g. CTA_File or CTA_Pack + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Model_Import( CTA_Model hmodel, CTA_Handle himport); + +/** \brief + * The adjoint model is mostly forced with scaled observation minus model residuals + * This routines is used to set the forcing. + * + * \param hmodel IO handle of model instance + * \param hdescr I handle to observation descriptions needed for interpretation of the forcing vector + * \param vforc I vector with the forcing values + * + * \return error status: CTA_OK if successful + */ +CTANOEXPORT int CTA_Model_AdjSetointForc(CTA_Model hmodel, CTA_ObsDescr hdescr, CTA_Vector vforc); + +/** \brief + * bla + * HOMEWORK FOR JULIUS! + * + * \param hmodel I handle of model instance + * + * \return error status: CTA_OK if successful + */ +CTANOEXPORT int CTA_Model_AdjPrepare(CTA_Model hmodel, CTA_Time time); + +/** \brief + * bla + * HOMEWORK FOR JULIUS! + * + * \param hmodel I handle of model instance + * + * \return error status: CTA_OK if successful + */ +CTANOEXPORT int CTA_Model_AdjCompute(CTA_Model hmodel, CTA_Time time); + +/*Initernal non-user routines */ +int CTAI_Model_PerformTimesteps( + CTA_Model hmodel, /* handle of model */ + CTA_Function *function, /* Function that must be called */ + CTA_Time htime, /* timespan to compute */ + int mindBarrier /* Flag to activate barrier check fixed timestepping */ +); + + + +/** \brief Get the number of domains for local analysis + * + * \param hmodel I handle of model instance + * \param distance I characteristic distance + * \param ndomains O number of domains + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Model_GetNumDomains(CTA_Model hmodel, double distance, int *ndomains); + + +/** \brief Get selection of observations that are relevnet for assimilation in the given domain + * + * \param hmodel I handle of model instance + * \param hdescr I observation description of all observations + * \param distance I characteristic distance + * \param idomain I domain number + * \param selection O costa vector with the indices of the relevant observations (0 based) + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Model_GetObsSelector( CTA_Model hmodel, + CTA_ObsDescr hdescr, double distance, int idomain, CTA_Vector *selection); + + + + + +/** \brief Get for each observation a localization scaling vector for single domain + * + * \param hmodel I handle of model instance + * \param hdescr I observation description for which we want localization scaling vectors + * \param distance I characteristic distance + * \param idomain I domain number + * \param locVecs O costa vector of handles to treevectors (scaling vectors). The treevectors + * are created when the indices are CTA_NULL on entry + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Model_GetObsLocalizationDomain( CTA_Model hmodel, + CTA_ObsDescr hdescr, double distance, int idomain, CTA_Vector locVecs); + + +/** \brief Get a copy of the internal state. + * + * \note Optionally a tree-vector is created. In that case the caller of this + * method is responsible for freeing that tree-vector. The input state must be compatible + * (same size and or composition) as the models internal state. + * \note If *hstate == CTA_NULL a new object is created, user is responsible for freeing this object. + * + * \param hmodel I handle of model instance + * \param idomain I domain number + * \param hstate IO receives state of the model, *hstate can be CTA_NULL on calling (see note) + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Model_GetStateDomain(CTA_Model hmodel, int idomain, CTA_TreeVector *hstate); + +/** \brief Perform axpy operation on the internal state for a single domain + * + * \note AXPY: y=alpha*x+y. y corresponds to the models + * internal state and x can be a state vector or a model + + * \param hmodel IO handle of model instance (y) + * \param alpha I alpha + * \param idomain I domain number + * \param hx I handle of x (state or model) + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Model_AxpyStateDomain(CTA_Model hmodel, double alpha, int idomain, CTA_Handle hx); + +#ifdef __cplusplus +} +#endif + + +#endif + diff --git a/costa/native/cta/include/cta_model_factory.h b/costa/native/cta/include/cta_model_factory.h new file mode 100644 index 000000000..c08005f37 --- /dev/null +++ b/costa/native/cta/include/cta_model_factory.h @@ -0,0 +1,84 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_model.h +\brief Interface description of the COSTA default model component. For user implementation see cta_usr_model.h. + +Functions for creating and working with models. CTA_Model is the default class implementation for models. +*/ + +#ifndef CTA_MODEL_FACTORY_H +#define CTA_MODEL_FACTORY_H + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** \brief Create a COSTA modell class from XML input file +* (load from methods from dynamic load library) +* +* \param fName I XML-configuration file +* \param modelClass O Class of new model Factory +* \return Model class handle +*/ +CTAEXPORT int CTA_ModelFactory_New(const char *fName, CTA_ModelClass* modelClass ); + + +/** \brief Create a COSTA modell class from XML +* (load from methods from dynamic load library). +* +* \param cur_node I Current XML node +* \return Handle to create or CTA_NULL in case of an error. +*/ +CTAEXPORT CTA_ModelClass CTAI_XML_CreateModelClass(xmlNode *cur_node); + +/** \brief Define a new class (=implementation) of a COSTA model component + * + * \param name I name of the new model class + * \param h_func I COSTA function handles for functions that implement class, + * missing functions must have value CTA_NULL + * \param hmodcl O receives handle of new model class + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Model_DefineClass(const char *name, const CTA_Func h_func[CTA_MODEL_NUMFUNC], CTA_ModelClass *hmodcl); + +const char *CTAI_ModelFac_GetImplements(CTA_ModelClass hmodcl); + +int CTAI_ModelFac_GetParallelData(CTA_ModelClass hmodcl, char **implements, char **parallel_type, + char **spawn_workers, char **nproc, char **ntimes, char **dumProcs); + + +int CTAI_ModelFac_GetBlock(CTA_ModelClass hmodcl, CTA_Model hmodel); +int CTAI_ModelFac_GetBarrierData(CTA_ModelClass hmodcl, char **flag_barrier, double *t_step ); +int CTAI_ModelFac_AddModelInstance(CTA_ModelClass hmodcl, CTA_Model hmodel); +int CTAI_ModelFac_SetBlock(CTA_ModelClass hmodcl, CTA_Model hmodel); +int CTAI_ModelFac_TimeStepAllModels(CTA_ModelClass hmodcl, CTA_Function *function, double tstart, double tstop); + + + +#ifdef __cplusplus +} +#endif + + +#endif + diff --git a/costa/native/cta/include/cta_model_utilities.h b/costa/native/cta/include/cta_model_utilities.h new file mode 100644 index 000000000..b8ace658a --- /dev/null +++ b/costa/native/cta/include/cta_model_utilities.h @@ -0,0 +1,76 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_model_utilities.h +\brief A set of utilities that simplify the creation of COSTA model components. + +A set of utility routines. These routines are originally designed for +the rapid development of COSTA model components. But their use is not limited +to model components. + +\note This is just a set of utility routines it does not define any COSTA component + +*/ + + +#ifndef CTA_MODEL_UTILITIES_H +#define CTA_MODEL_UTILITIES_H +#include "cta_system.h" +#include "cta_handles.h" +#include "cta_datatypes.h" +#include "cta_tree.h" +#include "cta_string.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** \brief Handles model configuration tree or name of input xml-file + * + * When a new instance of the a model component is created it needs some + * input. The most convenient way is to provide the root to a COSTA-tree + * with configuration information the name of a XML-configuration file + * This routine will check whether the input handle is a COSTA-tree or + * the name of a XML-configuration file. A COSTA-tree containing the content + * of the XML-file is created when the input is the name of the XML-file. + * + * \note A COSTA-tree is created if the input is the name of an XML-file. + * the handle of the input tree is returned otherwise. This means that + * depending on the input, a tree is created. The routine will return whether + * the returned tree is created by this routine. The caller is responsible + * for freeing the tree when necessary. + * + * \param hinput I handle of tree (CTA_Tree) with model input or string + * (CTA_Sring) with name of xml-input file + * \param tinput O receives handle of tree (CTA_Tree) with model input + * \param cleanup O receives flag (CTA_TRUE/CTA_FALSE) indicating whether tinput is + * created and must be freed by the caller + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Model_Util_InputTree(CTA_Handle hinput, CTA_Tree *tinput, int *cleanup); + +#ifdef __cplusplus +} +#endif +#endif + + + diff --git a/costa/native/cta/include/cta_modelcombiner.h b/costa/native/cta/include/cta_modelcombiner.h new file mode 100644 index 000000000..6845bd730 --- /dev/null +++ b/costa/native/cta/include/cta_modelcombiner.h @@ -0,0 +1,45 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_modelcombiner.h +\brief ModelCombiner. +*/ + +#ifndef CTA_MODELCOMBINER_H +#define CTA_MODELCOMBINER_H +#include "cta_handles.h" +#include "cta_datatypes.h" +#include "cta_metainfo.h" + +#ifdef __cplusplus +extern "C" { +#endif +/** \brief Create the model class of the ModelCombiner + * + * \note This is not a user function. It is called at initialization of the + * COSTA environment. + * + * \param modelcls O receives handle of the modelcombiner class + */ +CTANOEXPORT void CTA_Modelcombiner_CreateClass(CTA_ModelClass *modelcls); +#ifdef __cplusplus +} +#endif +#endif diff --git a/costa/native/cta/include/cta_obsdescr.h b/costa/native/cta/include/cta_obsdescr.h new file mode 100644 index 000000000..29e832c30 --- /dev/null +++ b/costa/native/cta/include/cta_obsdescr.h @@ -0,0 +1,179 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_obsdescr.h +\brief Interface description of the COSTA default observation descriptor component. For user implementation see cta_usr_obs_descr.h. + +CTA_ObsDescr is used for describing observations. +*/ + +#ifndef CTA_OBSDESCR_H +#define CTA_OBSDESCR_H +#include "cta_system.h" +#include "cta_handles.h" +#include "cta_datatypes.h" +#include "cta_functions.h" +#include "cta_string.h" +#include "cta_vector.h" +#include "cta_matrix.h" +#include "cta_reltable.h" + +/* Function Handle */ +typedef CTA_Handle CTA_ObsDescr; +typedef CTA_Handle CTA_ObsDescrClass; + +#include "cta_sobs.h" + +/* parameters for different functions */ +#define I_CTA_OBSDESCR_CREATE_SIZE ( 1) +#define I_CTA_OBSDESCR_CREATE_INIT ( 2) +#define I_CTA_OBSDESCR_FREE ( 3) +#define I_CTA_OBSDESCR_GET_PROPERTIES ( 4) +#define I_CTA_OBSDESCR_GET_KEYS ( 5) +#define I_CTA_OBSDESCR_COUNT_OBSERVATIONS ( 6) +#define I_CTA_OBSDESCR_COUNT_PROPERTIES ( 7) +#define I_CTA_OBSDESCR_EXPORT ( 8) +#define I_CTA_OBSDESCR_SELECTION ( 9) +#define I_CTA_OBSDESCR_NUMFUNC (10) + +#ifdef __cplusplus +extern "C" { +#endif + +/** \brief Create a new class (=implementation) of a COSTA observation description component. + * + * \param name I name of the new observation description class + * \param h_func I COSTA function handles for functions that implement class, + * missing functions must have value CTA_NULL + * \param hobsdscrcl O receives handle of new observation description class + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_ObsDescr_DefineClass(const char *name, + const CTA_Func h_func[I_CTA_OBSDESCR_NUMFUNC], + CTA_ObsDescrClass *hobsdscrcl); + + +/** \brief Create a new observation description instance. + * + * \param hsobscl I class of new observation description + * \param usrdat IO data of the stochastic observer for which + * a descriptor is to be created + * \param hobsdscr O receives handle of created observation description + * object + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_ObsDescr_Create( CTA_ObsDescrClass hsobscl, + CTA_Handle usrdat, CTA_ObsDescr *hobsdscr); + +/** \brief Create a new observation description that is subset of existing observation description. + * + * \param hobsdescr I the observation description to create a subset + * from + * \param selection I selection criterion (subset of SQL) + * \param reltab O Relation table specifying the relation between + * the original and new observation description + * component. Note no relation table is created when + * reltab==CTA_NULL on entry + * \param hobsdescrout O the new COSTA-stochastic observer, empty before + * calling, caller responsible for freeing after use + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_ObsDescr_CreateSel( CTA_StochObs hobsdescr, CTA_String selection, + CTA_RelTable reltab, CTA_StochObs *hobsdescrout); + +/** \brief Create a new observation description that is subset of existing observation description. + * All observations in the interval (t1,t2] (note t1 is not part + * of the interval!) of the time span are selected. + * + * \param hobsdescr I the observation description to create a subset + * from + * \param timespan I time span over which selection has to be made + * \param reltab O Relation table specifying the relation between + * the original and new observation description + * component. Note no relation table is created when + * reltab==CTA_NULL on enty + * \param hobsdescrout O the new COSTA-stochastic observer, empty before + * calling, caller responsible for freeing after use + * \return error status: CTA_OK if successful + */ + +CTAEXPORT int CTA_ObsDescr_CreateTimSel( CTA_ObsDescr hobsdescr, CTA_Time timespan, + CTA_RelTable reltab, CTA_ObsDescr *hobsdescrout); + +/** \brief Get properties/values that correspond to a given key. + * + * \param hobsdscr I handle of observation description + * \param Key I key for which the value is asked + * \param Properties IO COSTA-vector that is to receive the values + * \param datatype I data type of elements in properties vector, must be the same as of queried properties + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_ObsDescr_Get_ValueProperties( CTA_ObsDescr hobsdscr, const char* Key, + CTA_Vector Properties, CTA_Datatype datatype); + +/** \brief Get all keys names. + * + * \param hobsdscr I handle of observation description + * \param Keys O receives all keys (COSTA-string vector); must exist before calling and be large enough + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_ObsDescr_Get_PropertyKeys(CTA_ObsDescr hobsdscr, CTA_Vector Keys); + +/** \brief Get number of properties/keys. + * + * \param hobsdscr I handle of observation description + * \param nkeys O receives number of properties/keys + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_ObsDescr_Property_Count( CTA_ObsDescr hobsdscr, int *nkeys); + +/** \brief Get number of observations. + * + * \param hobsdscr I handle of observation description + * \param nobs O receives the number of observations + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_ObsDescr_Observation_Count( CTA_ObsDescr hobsdscr, int *nobs); + +/** \brief Export observation description. + * + * The default observation description CTA_DEFAULT_OBSDESC supports exporting to:\n + * TODO + * + * \param hdescr I handle of observation description + * \param usrdat IO export configuration/medium + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_ObsDescr_Export(CTA_ObsDescr hdescr, CTA_Handle usrdat); + +/** \brief Free observation description object. + * + * \param hobsdscr IO handle of observation description, replaced by CTA_NULL on return. + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_ObsDescr_Free( + CTA_ObsDescr *hobsdscr /* Handle of observation description */ + ); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/costa/native/cta/include/cta_obsdescr_sqlite3.h b/costa/native/cta/include/cta_obsdescr_sqlite3.h new file mode 100644 index 000000000..1cd51afad --- /dev/null +++ b/costa/native/cta/include/cta_obsdescr_sqlite3.h @@ -0,0 +1,158 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_obsdescr_sqlite3.h +\brief SQLite3 user implementation of the observation descriptor interface. +*/ + +#ifndef CTA_OBSDESCR_SQLITE3_H +#define CTA_OBSDESCR_SQLITE3_H + +#include "cta_f77blas.h" +#include "cta_datatypes.h" +#include "cta_handles.h" +#include "cta_functions.h" +#include "cta_obsdescr.h" +#include "cta_reltable.h" +#include "cta_util_sqlite3.h" +#include "cta_sobs_sqlite3.h" + +typedef struct { +CTA_Handle myhandle; +CTAI_util_sqlite3_database *database; +char* condition; +int n_keys; +int nmeasr; +CTA_String *Keys; +} CTAI_ObsDescr_sqlite3; + + +#ifdef __cplusplus +extern "C" { +#endif +/** \brief Initialization function for defining ObsDescr_SQLite3 class + * + * \return no return value + */ +CTANOEXPORT void CTA_ObsDescr_sqlite3_initialise(); + + +/** \brief Implementation function as part of the create process. + * + * \param memsize O receives the size of object data of new observation description + * \param retval O receives return value + * \return no return value + */ +void CTAI_ObsDescr_sqlite3_Create_Size( int *memsize,int *retval ); + + +/** \brief Implementation function as part of the create process. + Makes object ready to use. + * + * \note userdata must contain handle of COSTA stochastic observer belonging to observation description + * + * \param myhandle I Handle assigned by COSTA + * \param descr IO pointer to object data of observation description + * \param usrdat I user data (see note) + * \param retval O receives the return value + * \return no return value + */ +void CTAI_ObsDescr_sqlite3_Create_Init(CTA_ObsDescr *myhandle, + CTAI_ObsDescr_sqlite3 *descr, + CTA_Handle *usrdat, int *retval); + + +/** \brief Count the number of properties in the observation descriptor object. + * + * \param descr I pointer to object data of observation description + * \param nkeys O receives number of property keys + * \param retval O receives return value + * \return no return value + */ +void CTAI_ObsDescr_sqlite3_Property_Count(CTAI_ObsDescr_sqlite3 *descr, + int* nkeys, int *retval); + + +/** \brief Count the number of observations in the observation descriptor object. + * + * \param descr I pointer to object data of observation description + * \param nobs O receives the number of observations + * \param retval O receives return value + * \return no return value + */ +void CTAI_ObsDescr_sqlite3_Observation_Count(CTAI_ObsDescr_sqlite3 *descr, + int* nobs, int *retval); + + +/** \brief Get keys of observation descriptor object. + * + * \param descr I pointer to object data of observation description + * \param Keys I handle of string vector that receives key descriptions, must exist before calling and have enough elements + * \param retval O receives return value + * \return no return value + */ +void CTAI_ObsDescr_sqlite3_Get_Keys(CTAI_ObsDescr_sqlite3 *descr, + CTA_Vector * Keys, int *retval); + + +/** \brief Get properties of CTAI_ObsDescr object. + * + * \param descr I pointer to object data of observation description + * \param Key I pointer to C string describing key of which to get the properties + * \param Properties I handle of vector that receives properties, must exist before calling and have enough elements + * \param datatype O data type of vector elements + * \param retval O receives return value + * \return no return value + */ +void CTAI_ObsDescr_sqlite3_Get_Properties(CTAI_ObsDescr_sqlite3 *descr, + const char *Key, CTA_Vector *Properties, CTA_Datatype *datatype, + int *retval); + + +/** \brief Free the object data and associated resources. + * + * \param descr IO object data of observation description to be freed + * \param retval O receives return value + * \return no return value + */ +void CTAI_ObsDescr_sqlite3_Free(CTAI_ObsDescr_sqlite3 *descr, + int *retval); + +/** \brief Create a new observation description that is subset of existing observation description. + * + * \param descr I object data of observation description to be freed + * \param selection I selection criterion (subset of SQL) + * \param reltab O Relation table specifying the relation between + * the original and new observation description + * component. Note no relation table is created when + * reltab==CTA_NULL on entry + * \param my_handle_out I Handle of new relation table + * \param descrout O new observation description created subset + * \param retval O receives return value + */ +void CTAI_ObsDescr_sqlite3_CreateSel(CTAI_ObsDescr_sqlite3 *descr, + CTA_String *selection, CTA_RelTable *reltab, + CTA_ObsDescr *myhandle_out, + CTAI_ObsDescr_sqlite3 *descrout, int *retval); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/costa/native/cta/include/cta_obsdescr_table.h b/costa/native/cta/include/cta_obsdescr_table.h new file mode 100644 index 000000000..0802afe2d --- /dev/null +++ b/costa/native/cta/include/cta_obsdescr_table.h @@ -0,0 +1,132 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include +#include +#include +#include +#include "cta.h" + +typedef struct { +int nkeys; +int nmeasr; +char **Keys; +char ***Columns; +} CTAI_ObsDescr_table; + +#ifdef __cplusplus +extern "C" { +#endif + +/** \brief Explanation + * + * \param memsize O The number of bytes which are necessary to + * store one CTAI_SObs_sqlite3, with a + * pointer to the contents (data), + * but without the contents themselves. + * \param retval O Error code (see cta_datatypes.h for possible + * error codes) + * \return no return value + */ +void CTAI_ObsDescr_table_Create_Size(int *memsize, int *retval); + + /** \brief Allocate the memory which is necessary to + * store the data necessary for a sqlite3-observer + * + * \param myhandle I Handle assigned by COSTA + * \param descr I The sqlite3-observation description + * for which the memory must be + * allocated + * \param usrdat I User data + * \param retval O Error code + * \return no return value + */ +void CTAI_ObsDescr_table_Create_Init(CTA_ObsDescr myhandle, CTAI_ObsDescr_table *descr, +CTA_Handle this, CTA_Handle *usrdat, int *retval); + +/** \brief Explanation + * + * \param IO descr + * \param IO Keys + * \param retval O Error code + * \return no return value + */ +void CTAI_ObsDescr_table_Get_Keys(CTAI_ObsDescr_table *descr, CTA_Vector *Keys, int *retval); + +/** \brief Explanation + * + * \param IO descr + * \param IO nkeys + * \param retval O Error code + * \return no return value + */ +void CTAI_ObsDescr_table_Property_Count(CTAI_ObsDescr_table *descr, int *nkeys, int *retval); + +/** \brief Explanation + * + * \param descr I + * \param nobs I + * \param retval O Error code + * \return no return value + */ +void CTAI_ObsDescr_table_Observation_Count(CTAI_ObsDescr_table *descr, int *nobs, int *retval); + +/** \brief Explanation + * + * \param descr I + * \param Key I + * \param Properties I + * \param datatype I + * \param nobs I + * \param retval O Error code + * \return no return value + */ +void CTAI_ObsDescr_table_Get_Properties(CTAI_ObsDescr_table *descr, const char *Key, CTA_Vector *Properties, + CTA_Datatype *datatype,int *retval); + +/** \brief Explanation + * + * \param descr I + * \param nobs I + * \param usrdat I + * \param retval O Error code + * \return no return value + */ +void CTAI_ObsDescr_table_Export(CTAI_ObsDescr_table *descr, CTA_Handle *usrdat, int *retval); + +/** \brief Explanation + * + * \param descr I + * \param retval O Error code + * \return no return value + */ +void CTAI_ObsDescr_table_Free(CTAI_ObsDescr_table *descr, int *retval); + + +/** \brief The vector h_func is filled with COSTA-function handles of the + * implementations in this file. + * \param hobsdescrcl I + * \return no return value + */ +CTANOEXPORT void CTA_ObsDescr_table_initialise(CTA_ObsDescrClass *hobsdescrcl); + +#ifdef __cplusplus +} +#endif + diff --git a/costa/native/cta/include/cta_pack.h b/costa/native/cta/include/cta_pack.h new file mode 100644 index 000000000..8430dfde3 --- /dev/null +++ b/costa/native/cta/include/cta_pack.h @@ -0,0 +1,149 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +#ifndef CTA_PACK_H +#define CTA_PACK_H +#include "cta_system.h" +#include "cta_handles.h" +#include "cta_datatypes.h" + +/** +\file cta_pack.h + +\brief The interface description of the COSTA pack component. + +The pack component is used for storing non-sequential data before it is +saved to file communicated in a parallel environment. +The pack component contains a memory buffer that can be filled with data. The size of the buffer is +automatically increased when new data is added. + +The pack component uses the FIFO princeple. Data that is added first can be retreved first. + +*/ + +/* Function Handle */ +typedef CTA_Handle CTA_Pack; + +/*! Reset pack/unpack pointer of pack object */ +#define CTA_PACK_RESET (-1) + +#ifdef __cplusplus +extern "C" { +#endif + +/** \brief Create a pack instance. + * + * \param initsize I the initial size >=0 of the buffer + * \param hpack O receives handle of new pack object + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Pack_Create(int initsize, CTA_Pack *hpack); + +/** \brief Free a pack instance. + * + * \param hpack IO handle of pack object, replaced by CTA_NULL on return + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Pack_Free(CTA_Pack *hpack); + +/** \brief Add data to pack object. + * + * \param hpack IO handle of pack object + * \param data I data that must be packed + * \param lendat I size of the data to be packed (chars) + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Pack_Add(CTA_Pack hpack, void *data, int lendat); + +/** \brief Unpack (get) data from pack object. + * + * \param hpack IO handle of pack object + * \param data O buffer that receives data that is unpacked from pack-buffer (buffer length must be >= lendat) + * \param lendat I size of the data to be unpacked (chars) + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Pack_Get(CTA_Pack hpack, void *data, int lendat); + +/** \brief Get pointer to pack-buffer. + * + * \param hpack I handle of pack object + * + * \return pointer to buffer + */ +CTANOF90 CTAEXPORT char* CTA_Pack_GetPtr(CTA_Pack hpack); + +/** \brief Get length of packed data in pack-buffer. + * + * \param hpack I handle of pack object + * + * \return length packed data + */ +CTAEXPORT int CTA_Pack_GetLen(CTA_Pack hpack); + +/** \brief Only update administration for added elements + * + * This function can be used to update the administration after the + * pack-buffer is filled externally (e.g. using an mpi_recv) + * + * \param hpack I handle of pack object + * \param lendat I number of added elements (chars) + * + * \return length packed data + */ +CTAEXPORT int CTA_Pack_AddCnt(CTA_Pack hpack, int lendat); + +/** \brief Get the internal pack and unpack pointers + * + * This function can be used to save to pointers and + * reset the state of the pack component after unpacking or adding + * some data + * + * \param hpack I handle of pack object + * \param ip1 O unpack pointer + * \param ip2 O pack pointer + * + * \return length packed data + */ +CTAEXPORT int CTA_Pack_GetIndx(CTA_Pack hpack, int *ip1, int *ip2); + +/** \brief Set the internal pack and unpack pointers + * + * This function can be used to restore the pointers and + * reset the state of the pack component after unpacking or adding + * some data + * + * \param hpack I handle of pack object + * \param ip1 I unpack pointer. In order to reset all unpackin + * set to CTA_PACK_RESET + * \param ip2 I pack pointer. In order to reset the whole pack object + * set to CTA_PACK_RESET + * + * \return length packed data + */ + + +CTAEXPORT int CTA_Pack_SetIndx(CTA_Pack hpack, int ip1, int ip2); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/costa/native/cta/include/cta_par.h b/costa/native/cta/include/cta_par.h new file mode 100644 index 000000000..ab9032cfc --- /dev/null +++ b/costa/native/cta/include/cta_par.h @@ -0,0 +1,138 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +#ifndef CTA_PAR_H +#define CTA_PAR_H +#include "cta_system.h" +#include "cta_handles.h" +#include "cta_datatypes.h" + +#ifdef USE_MPI +#include "mpi.h" +#endif + +/** +\file cta_par.h + +\brief Interface for creating parallel applications with COSTA */ + +/** Type of processes */ +enum CTA_ParProcType {CTA_ParMaster, CTA_ParWorker, CTA_ParOther}; + + +#ifdef __cplusplus +extern "C" { +#endif + +/** Flag to indicate whether this is a filter process or model/worker process */ +CTAEXPORT extern int CTA_FILTER_PROCESS; + + +/** Flag CTA_TRUE/CTA_FALSE to indicate whether run is in a parallel + * environment */ +CTAEXPORT extern int CTA_IS_PARALLEL; + +/** Rank in world of all processes Note this variable is only + * used for debugging and output. */ +CTAEXPORT extern int CTA_PAR_MY_RANK; + +/** Kind of this process */ +CTAEXPORT extern enum CTA_ParProcType CTA_MY_PROC_TYPE; + +#ifdef USE_MPI +/** The whole communication universe */ +CTAEXPORT extern MPI_Comm CTA_COMM_WORLD; + +/** My own communication group */ +CTAEXPORT extern MPI_Comm CTA_COMM_MYWORLD; + +/** Group consisting of master process and all worker processes the master + * communicates with */ +CTAEXPORT extern MPI_Comm CTA_COMM_MASTER_WORKER; + +#endif + +/** \brief Initialises parallel environment for a process that spawned + * The executable is spawned using MPI_COMM_SPAWN or MPI_COMM_SPAWN_MULTIPLE + * + * It will set up the communication groups and optionally starts the parallel + * model builder + * + * \param StartPar I CTA_TRUE/CTA_FALSE start parallel model builder + * + * Note when a worker process is part of a Master-Worker model and it does + * not implement the COSTA model interface it should not start the parallel model builder + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Par_WorkerSpawn(int StartPar); + + +/** \brief Initialises parallel environment and create process groups + * + * \param parConfig I configuration input from XML-file + * \param StartPar I CTA_TRUE/CTA_FALSE start parallel model builder + * + * Note when a worker process is part of a Master-Worker model and it does + * not implement the COSTA model interface it should not start the parallel model builder + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Par_CreateGroups(int parConfig, int StartPar); + +#ifdef USE_MPI +/** \brief Get the Fortran (integer handles) of the communicators + * + * \param cta_comm_world O Fortran communcator CTA_COMM_WORLD + * \param cta_comm_myworld O Fortran communcator CTA_COMM_MYWORLD + * \param cta_comm_master_worker O Fortran communcator CTA_COMM_MASTER_WORKER + * + */ +CTANOF90 CTAEXPORT int CTA_Par_CreateNewCreateGetComm(CTA_ModelClass modelCls, MPI_Comm *comm); +#endif + +#ifdef USE_MPI +/** \brief Get a communicator for a process group by index + * + * \param indx I indx of communicator + * \param comm O MPI communicator + * \return CTA_OK when succesfull. The value is CTA_CANNOT_FIND_PROCESS_GROUP is + * returned when the communicator cannot be found + * + */ +CTANOF90 CTAEXPORT int CTA_Par_GetAllCommByIndex(int indx, MPI_Comm *comm); +#endif + +/** \brief Get the global number of COSTA process group and index of this process + * + * \param itime O The index of this process in the group + * \param iGroup O The group number of this process belongs to (1..nGroups) + * + * \note If this function is called by the COSTA master process or in a sequential run it will return 0 for iGroup + * + */ +CTAEXPORT void CTA_Par_GetGroupInfo(int *iGroup, int *itime); + + + +#ifdef __cplusplus +} +#endif +#endif + + diff --git a/costa/native/cta/include/cta_reltable.h b/costa/native/cta/include/cta_reltable.h new file mode 100644 index 000000000..eb9043a70 --- /dev/null +++ b/costa/native/cta/include/cta_reltable.h @@ -0,0 +1,172 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_reltable.h +\brief Relation table component that defines a relation between two + elements (ordered) sets of elements. + + Relation tables are used to define a relation between elements in sets. + Examples are the elements of a vector, matrix, tree vector or in a + stochastic observer. + + The Relation table can be used for copying elements from one set to the + other optionally using interpolation (not yet supported). + +*/ + +#ifndef CTA_RELTABLE_H +#define CTA_RELTABLE_H +#include "cta_system.h" +#include "cta_handles.h" +#include "cta_datatypes.h" +#include "cta_vector.h" + +/* Function Handle */ +typedef CTA_Handle CTA_RelTable; +#ifdef __cplusplus +extern "C" { +#endif + +/* functions */ + +/** \brief Create a relation table + * + * \param hreltable O created relation table + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_RelTable_Create(CTA_RelTable *hreltable); + +/** \brief Free a relation table object. + * + * \param hreltable IO relation table to be freed, + * value is set to CTA_NULL on return. + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_RelTable_Free(CTA_RelTable *hreltable); + +/** \brief Copy elements according to relation table + * + * \note we currently only support copying of elements + * between two vector instances. Other types of + * COSTA object will be supported when needed + * in later versions + * + * \param hreltable I handle of relation table + * \param hfrom I Origin object to copy data from + * \param hto I Target object to copy data to + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_RelTable_Apply(CTA_RelTable hreltable, + CTA_Handle hfrom, CTA_Handle hto); + +/** \brief Copy elements according to inverse of relation table + * + * \note we currently only support copying of elements + * between two vector instances. Other types of + * COSTA object will be supported when needed + * in later versions + * + * \param hreltable I handle of relation table + * \param hfrom I Origin object to copy data from + * \param hto I Target object to copy data to + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_RelTable_ApplyInv(CTA_RelTable hreltable, + CTA_Handle hfrom, CTA_Handle hto); + + +/** \brief Set a relation table + * A Set a relation table that defines a selection of elements + * + * \param hreltable O relation table that is set + * \param vselect I (integer) vector with indices of elements + from the target set that are selected. + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_RelTable_SetSelect(CTA_RelTable hreltable, CTA_Vector vselect); + +/** \brief Get the number of elements that are copied when the table is applied + * + * + * \param hreltable I relation table + * \param nelt O number of elements that are copied + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_RelTable_Count(CTA_RelTable hreltable, int *nelt); + + +/** \brief Set a relation table that is combination of two + * relation tables. + * + * Set a relation table that is the combination of two exisiting relation + * tables. It is possible to use the inverse of the relation tables when + * needed + *. + * A usefull application of this method is to create a relation table that + * defines a relation between a subset of elements from set1 and a subset of + * the elements of set2. In order to set a relation table of this kind first + * create two relation tables: + * hrel1 elements from set 1 that have a relation with the elements from set 2, + * hrel2 elements from set 2 that have a relation with the elements from set 1 +* + * The combined relation table of hrel1 and inverse(hrel2) is a relation + * table that spcifies the relation of a subset of elements from set1 and a + * subset of elements from set2. + * + * \param hreltable O relation table that is set + * \param hrel1 I first relation table + * \param inverse1 I use inverse of hrel1 (CTA_TRUE/CTA_FALSE) + * \param hrel2 I first relation table + * \param inverse2 I use inverse of hrel2 (CTA_TRUE/CTA_FALSE) + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_RelTable_SetTableCombine(CTA_RelTable hreltable, + CTA_RelTable hrel1, int inverse1, + CTA_RelTable hrel2, int inverse2 ); + + + + +/** \brief Apply relation table to two components + * + * \param reltable (I) Data of relation table + * \param hrom (I) Source set of elements + * \param hto (IO) Target set (some will be overwitten) + * \param iverse (I) CTA_TRUE/CTA_FALSE apply inverse table + * + * \return error status: CTA_OK if successful + * \note Internal routine not a user routine + */ +int CTAI_RelTable_Apply(CTA_RelTable hreltable, + CTA_Handle hfrom, CTA_Handle hto, int inverse); + + + +#ifdef __cplusplus +} +#endif +#endif diff --git a/costa/native/cta/include/cta_resultwriter.h b/costa/native/cta/include/cta_resultwriter.h new file mode 100644 index 000000000..38f2d6109 --- /dev/null +++ b/costa/native/cta/include/cta_resultwriter.h @@ -0,0 +1,187 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/openda_1/public/trunk/core/native/src/cta/cta_time.c $ +$Revision: 2751 $, $Date: 2011-09-09 08:58:46 +0200 (Fri, 09 Sep 2011) $ + +OpenDA +Copyright (C) 2013 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** + * OpenDA support the concept of resultwriters. The Data assimilation algorithm + * will presents intermediate results and information on the computations to the + * result writer. Each implementation of a resultwriter can then handle this data + * in its own way. + * + * To implement an native resultwriter the following methods need to be implemented. + * Note: for explanation on variables see resultwriter function methods. All strings + * have a len_... containing the result of strlen (used for using c-strings in Fortran) + * + * C: + * + * void putmessage(int iDWriter, CTA_Tree config, char* workingDir, char* message, int *retval, + * int len_workingDir, int len_message); + * + * void putvalue(int iDWriter, CTA_Tree config, char* workingDir, int id, int handle, + * int outputlevel, char* context, int iteration, int &retval, + * len_workingDir, len_id, len_context); + * + * void putiterationreport(int iDWriter, CTA_Tree config, char *workingDir, int iteration, + * double cost, CTA_Vector handle, int *retval, int len_workingDir); + * + * void freewriter(int iDWriter); + * + * Fortran 2003: + * + * interface + * subroutine putmessage(iDWriter, config, workingDir, message, retval, & + * len_workingDir, len_message) & + * bind(C, NAME='putmessage') + * use iso_c_binding + * implicit none + * include 'cta_f90.inc' + * + * integer(C_INT), VALUE ::iDWriter, config, len_workingDir, len_message + * integer(C_INT) ::retval + * character(kind=c_char) ::workingdir(*), message(*) + * end subroutine putmessage + * end interface + * + * + * interface + * subroutine putvalue(iDWriter, config, workingDir, id, handle, & + * outputlevel, context, iteration, retval, & + * len_workingDir, len_id, len_context) & + * bind(C, NAME='putvalue') + * use iso_c_binding + * implicit none + * include 'cta_f90.inc' + * integer(C_INT), VALUE ::iDWriter, config, handle, outputlevel, iteration, & + * len_workingDir, len_id, len_context + * integer(C_INT) ::retval + * character(kind=c_char) ::workingdir(*), id(*), context(*) + * + * end subroutine putvalue + * end interface + * + * interface + * subroutine putiterationreport(iDWriter, config, workingDir, iteration, cost, & + * handle, retval, len_workingDir) & + * bind(C, NAME='putiterationreport') + * use iso_c_binding + * implicit none + * include 'cta_f90.inc' + * integer(C_INT), VALUE ::iDWriter, config, iteration, handle, len_workingDir + * real(C_DOUBLE), VALUE ::cost + * integer(C_INT) ::retval + * character(kind=c_char) ::workingdir(*) + * end subroutine putiterationreport + * end interface + * + * interface + * subroutine freewriter(iDWriter) & + * bind(C, NAME='freewriter') + * use iso_c_binding + * implicit none + * include 'cta_f90.inc' + * integer(C_INT), VALUE ::iDWriter + * end subroutine freewriter + * end interface + + * + * The resultwriter must have an xml-configuration file containing at least the following + * (additional field may be used for private usage by the implementation) + * Note name of library and funtion name must correspond to the user implementation. + * + * + * + * + * + * + * + * + * + * + * + * @author nils van velzen + * + */ + +#ifndef CTA_RESULTWRITER_H +#define CTA_RESULTWRITER_H +#include "cta_system.h" +#include "cta_handles.h" +#include "cta_datatypes.h" +/* Function Handle */ +#ifdef __cplusplus +extern "C" { +#endif + + +/** \brief Handle a string message send to the resultwriter + * + * \param idWriter I ID of this resultwriter (Counter of number of native result writers) + * \param config I Name of XML configuration file containting the function pointers and additional information + * \param workingDir I Full path to working directory + * \param message I Message send to resultwriter + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Resultwriter_putmessage(int idWriter, char *config, char *workingDir, char *message); + +/** \brief Handle a string message send to the resultwriter + * + * \param idWriter I ID of this resultwriter (Counter of number of native result writers) + * \param config I Name of XML configuration file containting the function pointers and additional information + * \param workingDir I Full path to working directory + * \param id I Name of the variable/array send to the resultwriter + * \param handle I Handle (Vector or TreeVector) of variable + * \param outputLevel I Selected output level (see opendabridge for possible values) + * \param context I Location from which the resultwriter was called + * \param iteration I Iteration number from which the resultwriter was called + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Resultwriter_putvalue(int idWriter, char *config, char *workingDir, char *id, int handle, int outputLevel, char *context, int iteration); + +/** \brief Handle a string message send to the resultwriter + * + * \param idWriter I ID of this resultwriter (Counter of number of native result writers) + * \param config I Name of XML configuration file containting the function pointers and additional information + * \param workingDir I Full path to working directory + * \param iteration I Iteration number from which the resultwriter was called + * \param cost I Value of cost function + * \param handle I Handle (Vector or TreeVector) of the current parameters + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Resultwriter_putiterationreport(int idWriter, char *config, char *workingDir, int iteration, double cost, int handle); + +/** \brief Free a resultwriter (close output files etc). + * + * \param idWriter I ID of this resultwriter (Counter of number of native result writers) + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Resultwriter_free(int idWriter); + + +#ifdef __cplusplus +} +#endif +#endif + + diff --git a/costa/native/cta/include/cta_sobs.h b/costa/native/cta/include/cta_sobs.h new file mode 100644 index 000000000..2b667e3ee --- /dev/null +++ b/costa/native/cta/include/cta_sobs.h @@ -0,0 +1,228 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_sobs.h +\brief Interface description of the COSTA default stochastic observer component. For user implementation see cta_usr_sobs.h. +*/ + +#ifndef CTA_SOBS_H +#define CTA_SOBS_H +#include "cta_system.h" +#include "cta_handles.h" +#include "cta_datatypes.h" +#include "cta_functions.h" +#include "cta_vector.h" +#include "cta_matrix.h" +#include "cta_time.h" + +/* StochObs and Decription Handles */ +typedef CTA_Handle CTA_StochObs; +typedef CTA_Handle CTA_SObsClass; + +#include "cta_obsdescr.h" + +/* parameters for different functions */ +#define CTA_SOBS_CREATE_SIZE ( 1) +#define CTA_SOBS_CREATE_INIT ( 2) +#define I_CTA_SOBS_FREE ( 3) +#define CTA_SOBS_CREATE_SELECTION ( 4) +#define I_CTA_SOBS_COUNT ( 5) +#define CTA_SOBS_GET_OBS_DESCRIPTION ( 6) +#define CTA_SOBS_GET_VALUES ( 7) +#define CTA_SOBS_GET_REALISATION ( 8) +#define CTA_SOBS_GET_EXPECTATION ( 9) +#define CTA_SOBS_EVALUATE_PDF (10) +#define CTA_SOBS_GET_COV_MATRIX (11) +#define CTA_SOBS_GET_VARIANCE (12) +#define I_CTA_SOBS_EXPORT (13) +#define CTA_SOBS_GET_TIMES (14) +#define CTA_SOBS_NUMFUNC (15) + +#ifdef __cplusplus +extern "C" { +#endif + +/** \brief Create a new class (=implementation) of a COSTA stochastic observer component. + * + * \param name I name of the new stochastic observer class + * \param h_func I COSTA function handles for functions that implement class, + * missing functions must have value CTA_NULL + * \param descrcl I class of the observation description that is created by stochastic observer + * \param hstochobscl O handle of new stochastic observer class + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_SObs_DefineClass(const char *name, const CTA_Func h_func[CTA_SOBS_NUMFUNC], + CTA_ObsDescrClass descrcl, CTA_SObsClass *hstochobscl); + +/** \brief Create an instance of a stocastic observer + * + * \param hstochobscl I stochastic observer class of new stochastic observer + * \param userdata IO userdata for creation (depends on class) + * \param hstochobs O receives handle of new stochastic observer object + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_SObs_Create(CTA_SObsClass hstochobscl, CTA_Handle userdata, + CTA_StochObs *hstochobs); + +/** \brief Create a new stochastic observer that is subset of existing stochastic observer. + * + * \param hsobsin I handle of the existing stochastic observer of + * which a selection is to be made + * \param userdata IO inputs necessary for making a selection (depends on user implementation) + * \param hsobsout O receives handle of the new COSTA-stochastic observer, empty before calling, caller responsible for freeing after use + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_SObs_CreateSel(CTA_StochObs hsobsin, CTA_Handle userdata, + CTA_StochObs *hsobsout); + +/** \brief Create a new stoch observer that is subset in time of existing stochastic observer. + * + * All observations in the closed interval [t1,t2] of the time span are selected. + * + * \param hsobsin I handle of the stochastic observer of + * which a selection is to be made + * \param timespan I time span over which selection has to be made + * \param hsobsout O receives handle of the new COSTA-stochastic observer, empty before calling + * \return error states: CTA_OK if successful + */ +CTAEXPORT int CTA_SObs_CreateTimSel(CTA_StochObs hsobsin, CTA_Time timespan, + CTA_StochObs *hsobsout); + + +/** \brief Count the number of elements in stochastic observer. + * \param hsobs I handle of the stochastic observer + * \param nmeasr O receives number of measurements in this observer + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_SObs_Count(CTA_StochObs hsobs, int *nmeasr); + + +/** \brief Get a vector with the measurements. + * + * \param hsobs I handle of the stochastic observer + * \param hvec IO handle of vector that receives the measurements; must exist before calling + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_SObs_GetVal(CTA_StochObs hsobs,CTA_Vector hvec); + + +/** \brief Count the times associated to the measurements. + * + * \param hsobs I handle of the stochastic observer + * \param hvec IO handle to vector that receives the times; must exist before calling + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_SObs_GetTimes( CTA_StochObs hsobs,CTA_Vector hvec); + + +/** \brief Draw random values (measurements) according to the probability density + * function of the mesurements. + * + * \param hsobs I handle of the stochastic observer + * \param hvec IO handle of vector that receives the draw (measurements); must exist before calling + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_SObs_GetRealisation(CTA_StochObs hsobs, CTA_Vector hvec); + + +/** \brief Get expectation of the probability density function of the mesurements. + * + * \param hsobs I handle of the stochastic observer + * \param hvec IO handle of vector that receives the expectation values; must exist before calling + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_SObs_GetExpectation(CTA_StochObs hsobs, CTA_Vector hvec); + + +/** \brief Get the value of the probability density function of the mesurements at given location. + * + * \param hsobs I handle of the stochastic observer + * \param hvecx I handle of vector with location for evaluating pdf + * \param hvecy IO handle of vector that is to contain the pdf-value; must exist before calling + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_SObs_EvalPDF(CTA_StochObs hsobs, CTA_Vector hvecx, CTA_Vector hvecy); + + +/** \brief Get covariance matrix of probability density function of the measurements. + * + * \param hsobs I handle of the stochastic observer + * \param hmat IO handle of matrix that receives the covariance matrix; must exist before calling + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_SObs_GetCovMat(CTA_StochObs hsobs, CTA_Matrix hmat); + + +/** \brief Get variance of probability density function of the mesurements. + * + * \param hsobs I handle of the stochastic observer + * \param hvec IO handle of vector that receives the variance; must exist before calling + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_SObs_GetVar( CTA_StochObs hsobs, CTA_Vector hvec); + + +/** \brief Get standard deviation of probability density function of the measurements. + * + * \param hsobs I handle of the stochastic observer + * \param hvec IO handle of vector that is to contain the standard deviation + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_SObs_GetStd( CTA_StochObs hsobs, CTA_Vector hvec); + + +/** \brief Create the observation description corresponding to the stochastic observer. + * + * \note Caller is responsible for freeing the here created observation description + * + * \param hsobs I handle of the stochastic observer + * \param hobsdescr O receives handle of newly created observation description class, empty before calling + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_SObs_GetDescription(CTA_StochObs hsobs, CTA_ObsDescr *hobsdescr); + + +/** \brief Export the stochastic observer. + * + * \note Supported by CTA_DEFAULT_SOBS:\n + * output to file (userdata must contain handle of COSTA file)\n + * + * \param hsobs I handle of the stochastic observer + * \param userdata I configuration of output + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_SObs_Export(CTA_StochObs hsobs, CTA_Handle userdata); + +/** \brief Free the stochastic observer + * + * \Note hsobs=CTA_NULL is allowed + * + * \param hsobs IO handle of the stochastic observer, replaced by CTA_NULL on return + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_SObs_Free( + CTA_StochObs *hsobs /* Handle of stochastic observer */ + ); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/costa/native/cta/include/cta_sobs_combine.h b/costa/native/cta/include/cta_sobs_combine.h new file mode 100644 index 000000000..fefafd394 --- /dev/null +++ b/costa/native/cta/include/cta_sobs_combine.h @@ -0,0 +1,48 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_sobs_combine.h +\brief COMBINE implementation of the stochastic observer interface. +*/ + + +#include "cta_f77blas.h" +#include "cta_datatypes.h" +#include "cta_handles.h" +#include "cta_functions.h" +#include "cta_util_sqlite3.h" +#include "cta_sobs.h" +#include "cta_obsdescr.h" + + +/* Dit is een struct om wat op te slaan wat we nodig hebben + Enigszins analoog aan CTAI_SOBS en de datablock van de modelcombiner */ + +typedef struct { +int nstations; +double timeoffset; +int nmeasr; +int * stations; +int nofsubsobs; // number of stochastic observers +CTA_Vector subsobs; // list of handles of stochastic observers +} CTAI_SObs_combine; + + +void CTA_SObs_combine_initialise(CTA_SObsClass *hsobscl); diff --git a/costa/native/cta/include/cta_sobs_netcdf.h b/costa/native/cta/include/cta_sobs_netcdf.h new file mode 100644 index 000000000..d906ec306 --- /dev/null +++ b/costa/native/cta/include/cta_sobs_netcdf.h @@ -0,0 +1,200 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_sobs_netcdf.h +\brief Netcdf user implementation of the stochastic observer interface. +*/ + +#ifndef CTA_SOBS_NETCDF_H +#define CTA_SOBS_NETCDF_H + +#include "cta_f77blas.h" +#include "cta_datatypes.h" +#include "cta_handles.h" +#include "cta_functions.h" +#include "cta_sobs.h" +#include "cta_obsdescr.h" +#if HAVE_LIBNETCDF +//#include +#endif + +typedef struct { +int nusers; +int ncid; +char *dbname; +} CTAI_OMI_database; + +typedef struct { + CTAI_OMI_database *database; + int nstations; + double timeoffset; + int nmeasr; + int nmeasr_orig; //number of observations in the netcdf-file + CTA_RelTable selectionReltab; + CTA_Time tspan; + float bb_lon[2]; + float bb_lat[2]; +} CTAI_SObs_netcdf; + + +#ifdef __cplusplus +extern "C" { +#endif + +/** \brief Initialization function for defining the SObs_netcdf class. + * + * \return no return value + */ +CTANOEXPORT void CTA_SObs_netcdf_initialise(); + + +/** \brief Implementation function that is part of object create process + * + * \param memsize O receives size of new CTAI_SObs_netcdf object + * \param retval O receives return value + * \return no return value + */ +CTANOEXPORT void CTAI_SObs_netcdf_Create_Size( int *memsize,int *retval ); + + +/** \brief Implementation function that is part of object create process. + * + * \note userdata must contain handle of COSTA string giving the NETCDF database name.\n + * + * \param x IO pointer to object data of stochastic observer + * \param userdata I pointer to user data + * \param retval O receives return value + * \return no return value + */ +CTANOEXPORT void CTAI_SObs_netcdf_Create_Init(CTAI_SObs_netcdf *x, + CTA_Handle userdata, int *retval); + + +/** \brief Create a new stochastic observer that is subset of existing stochastic observer. + * + * \note userdata must contain handle of COSTA string giving the selection condition.\n + * \note This function fills in the object data of the new stochastic observer. The handle is created by CTA_SObs_CreateSel(). + * + * \param obsin I pointer to object data of the existing stochastic observer + * \param userdata I pointer to userdata + * \param obsout O pointer to object data of the new COSTA-stochastic observer, must exist before calling + * \param retval I receives return value + * \return no return value + */ +CTANOEXPORT void CTAI_SObs_netcdf_CreateSel( CTAI_SObs_netcdf *obsin, + CTA_Handle *userdata, CTAI_SObs_netcdf *obsout, int *retval); + + +/** \brief Count the number of measurements + * + * \param x I pointer to object data of stochastic observer + * \param nmeasr O receives number of measurements + * \param retval O receives the return value + * \return no return value + */ +CTANOEXPORT void CTAI_SObs_netcdf_Count( CTAI_SObs_netcdf *x, int *nmeasr, + int *retval); + + +/** \brief Get a vector with the measurements. + * + * \param x I pointer to object data of stochastic observer + * \param hvec O handle of vector that receives the measurements, must exist before calling + * \param retval O receives the return value + * \return no return value + */ +CTANOEXPORT void CTAI_SObs_netcdf_GetVals( CTAI_SObs_netcdf *x, CTA_Vector *hvec, + int *retval); + + +/** \brief Get times associated with the measurements. + * + * \param x I pointer to object data of stochastic observer + * \param hvec O handle of vector that receives the associated times, must exist before calling + * \param retval O receives the return value + * \return no return value + */ +CTANOEXPORT void CTAI_SObs_netcdf_GetTimes( CTAI_SObs_netcdf *x, CTA_Vector *hvec, + int *retval); + + +/** \brief Get variance of probability density function of the mesurements. + * + * \param x I pointer to object data of stochastic observer + * \param hvec I handle of the vector that receives the variances, must exist before calling + * \param returnvar O flag: TRUE for getting variance, FALSE for getting standard deviation + * \param retval O receives return value + * \return no return value + */ +CTANOEXPORT void CTAI_SObs_netcdf_GetVariances( CTAI_SObs_netcdf *x, + CTA_Vector *hvec, int *returnvar, int *retval); + + +/** \brief Draw random values (measurements) according to the probability density + * function of the mesurements. + * + * \param x I pointer to object data of stochastic observer + * \param hvec O handle of vector that receives the draw (measurements); must exist before calling + * \param retval O receives return value + * \return no return value + */ +CTANOEXPORT void CTAI_SObs_netcdf_GetRealisation( CTAI_SObs_netcdf *x, + CTA_Vector *hvec, int *retval); + + +/** \brief Get covariance matrix of probability density function of the measurements. + * + * \param x I pointer to object data of stochastic observer + * \param hmat O handle of matrix that receives the covariance matrix; must exist before calling + * \param retval O receives return value + * \return no return value + */ +CTANOEXPORT void CTAI_SObs_netcdf_GetCovMat( CTAI_SObs_netcdf *x, CTA_Matrix *hmat, + int *retval); + + +/** \brief Export a CTAI_SObs object to file or standard output. + * + * \note Supported:\n + * output to file (userdata must contain handle of COSTA file)\n + * + * \param x I pointer to object data of stochastic observer + * \param userdata I user data + * \param retval O receives return value + * \return no return value + */ +CTANOEXPORT void CTAI_SObs_netcdf_export( CTAI_SObs_netcdf *x, CTA_Handle *userdata, + int *retval); + + +/** \brief Free the object data and associated resources. + * + * \param x I pointer to object data of stochastic observer + * \param retval O receives return value + * \return no return value + */ +CTANOEXPORT void CTAI_SObs_netcdf_Free( CTAI_SObs_netcdf *x, int *retval); + +#ifdef __cplusplus +extern } +#endif + +#endif + diff --git a/costa/native/cta/include/cta_sobs_sqlite3.h b/costa/native/cta/include/cta_sobs_sqlite3.h new file mode 100644 index 000000000..cd9902b28 --- /dev/null +++ b/costa/native/cta/include/cta_sobs_sqlite3.h @@ -0,0 +1,188 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_sobs_sqlite3.h +\brief SQLite3 user implementation of the stochastic observer interface. +*/ + +#ifndef CTA_SOBS_SQLITE3_H +#define CTA_SOBS_SQLITE3_H + +#include "cta_f77blas.h" +#include "cta_datatypes.h" +#include "cta_handles.h" +#include "cta_functions.h" +#include "cta_util_sqlite3.h" +#include "cta_sobs.h" +#include "cta_obsdescr.h" +#include + +typedef struct { +CTAI_util_sqlite3_database *database; +char* condition; +int nstations; +int nmeasr; +int * stations; +} CTAI_SObs_sqlite3; + + +#ifdef __cplusplus +extern "C" { +#endif + +/** \brief Initialization function for defining the SObs_sqlite3 class. + * + * \return no return value + */ +CTANOEXPORT void CTA_SObs_sqlite3_initialise(); + + +/** \brief Implementation function that is part of object create process + * + * \param memsize O receives size of new CTAI_SObs_sqlite3 object + * \param retval O receives return value + * \return no return value + */ +void CTAI_SObs_sqlite3_Create_Size( int *memsize,int *retval ); + + +/** \brief Implementation function that is part of object create process. + * + * \note userdata must contain handle of COSTA string giving the SQLITE3 database name.\n + * + * \param x IO pointer to object data of stochastic observer + * \param userdata I pointer to user data + * \param retval O receives return value + * \return no return value + */ +void CTAI_SObs_sqlite3_Create_Init(CTAI_SObs_sqlite3 *x, + CTA_Handle userdata, int *retval); + + +/** \brief Create a new stochastic observer that is subset of existing stochastic observer. + * + * \note userdata must contain handle of COSTA string giving the selection condition.\n + * \note This function fills in the object data of the new stochastic observer. The handle is created by CTA_SObs_CreateSel(). + * + * \param obsin I pointer to object data of the existing stochastic observer + * \param userdata I pointer to userdata + * \param obsout O pointer to object data of the new COSTA-stochastic observer, must exist before calling + * \param retval I receives return value + * \return no return value + */ +void CTAI_SObs_sqlite3_CreateSel( CTAI_SObs_sqlite3 *obsin, + CTA_Handle *userdata, CTAI_SObs_sqlite3 *obsout, int *retval); + + +/** \brief Count the number of measurements + * + * \param x I pointer to object data of stochastic observer + * \param nmeasr O receives number of measurements + * \param retval O receives the return value + * \return no return value + */ +void CTAI_SObs_sqlite3_Count( CTAI_SObs_sqlite3 *x, int *nmeasr, + int *retval); + + +/** \brief Get a vector with the measurements. + * + * \param x I pointer to object data of stochastic observer + * \param hvec O handle of vector that receives the measurements, must exist before calling + * \param retval O receives the return value + * \return no return value + */ +void CTAI_SObs_sqlite3_GetVals( CTAI_SObs_sqlite3 *x, CTA_Vector *hvec, + int *retval); + + +/** \brief Get times associated with the measurements. + * + * \param x I pointer to object data of stochastic observer + * \param hvec O handle of vector that receives the associated times, must exist before calling + * \param retval O receives the return value + * \return no return value + */ +void CTAI_SObs_sqlite3_GetTimes( CTAI_SObs_sqlite3 *x, CTA_Vector *hvec, + int *retval); + + +/** \brief Get variance of probability density function of the mesurements. + * + * \param x I pointer to object data of stochastic observer + * \param hvec I handle of the vector that receives the variances, must exist before calling + * \param returnvar O flag: TRUE for getting variance, FALSE for getting standard deviation + * \param retval O receives return value + * \return no return value + */ +void CTAI_SObs_sqlite3_GetVariances( CTAI_SObs_sqlite3 *x, + CTA_Vector *hvec, int *returnvar, int *retval); + + +/** \brief Draw random values (measurements) according to the probability density + * function of the mesurements. + * + * \param x I pointer to object data of stochastic observer + * \param hvec O handle of vector that receives the draw (measurements); must exist before calling + * \param retval O receives return value + * \return no return value + */ +void CTAI_SObs_sqlite3_GetRealisation( CTAI_SObs_sqlite3 *x, + CTA_Vector *hvec, int *retval); + + +/** \brief Get covariance matrix of probability density function of the measurements. + * + * \param x I pointer to object data of stochastic observer + * \param hmat O handle of matrix that receives the covariance matrix; must exist before calling + * \param retval O receives return value + * \return no return value + */ +void CTAI_SObs_sqlite3_GetCovMat( CTAI_SObs_sqlite3 *x, CTA_Matrix *hmat, + int *retval); + + +/** \brief Export a CTAI_SObs object to file or standard output. + * + * \note Supported:\n + * output to file (userdata must contain handle of COSTA file)\n + * + * \param x I pointer to object data of stochastic observer + * \param userdata I user data + * \param retval O receives return value + * \return no return value + */ +void CTAI_SObs_sqlite3_export( CTAI_SObs_sqlite3 *x, CTA_Handle *userdata, + int *retval); + + +/** \brief Free the object data and associated resources. + * + * \param x I pointer to object data of stochastic observer + * \param retval O receives return value + * \return no return value + */ +void CTAI_SObs_sqlite3_Free( CTAI_SObs_sqlite3 *x, int *retval); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/costa/native/cta/include/cta_string.h b/costa/native/cta/include/cta_string.h new file mode 100644 index 000000000..b60cc31be --- /dev/null +++ b/costa/native/cta/include/cta_string.h @@ -0,0 +1,184 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_string.h +\brief Interface description of the default COSTA string component. + +Utilities including most of the basic string operations and access to the string data itself. +*/ + +#ifndef CTA_STRING_H +#define CTA_STRING_H +#include "cta_system.h" +#include "cta_handles.h" +#include "cta_datatypes.h" + +/* String instance handle declared in cta_handles.h */ +#ifdef __cplusplus +extern "C" { +#endif + +/** \brief Create a new COSTA string instance. + * + * \param hstring O handle of created string + * \return CTA_OK if successful + */ +CTAEXPORT int CTA_String_Create(CTA_String *hstring); + +/** \brief Create a new COSTA string that is a copy of an existing one + * + * \param hto O receives the handle of the created string + * \param hfrom I handle of string to copy + * + * \return CTA_OK if successful + */ +CTAEXPORT int CTA_String_Copy(CTA_String *hto, CTA_String hfrom); + +/** \brief Free the COSTA string instance. + * + * \note + * + * \param hstring IO handle of the string instance, replaced by CTA_NULL on return + * \return CTA_OK if successful + */ +CTAEXPORT int CTA_String_Free(CTA_String *hstring); + +/** \brief Get the number of characters in string. + * + * \note The returned length is the number of characters excluding the + * 0-character. + * + * \param hstring I handle of the string + * \param len O receives the number of characters in string + * \return CTA_OK if successful + */ +CTAEXPORT int CTA_String_GetLength(CTA_String hstring, int *len); + +/** \brief Set the string to new content. + * + * \param hstring IO handle of the string + * \param str I new content + * \return CTA_OK if successful + */ +CTAEXPORT int CTA_String_Set(CTA_String hstring, const char *str); + +/** \brief Get a copy of the string. + * + * \note It is the responsibility of the caller making str large enough to + * hold the string and trailing 0-character. + * + * \param hstring I handle of the string + * \param str O buffer that receives a copy of the string including trailing 0-character + * \return CTA_OK if successful + */ +CTAEXPORT int CTA_String_Get(CTA_String hstring, char *str); + +/** \brief Get the (scalar) value of a string + * + * \note It is the responsibility of the caller that parameter value is large enough to + * hold the value as specified by the datatype. + * + * \param hstring I handle of the string + * \param value O receives the value + * \param datatype I data type of value + * \return CTA_OK if successful + */ +CTAEXPORT int CTA_String_GetValue(CTA_String hstring, void *value, CTA_Datatype datatype); + + +/** \brief Create new string that is a concatination of existing strings. + * + * \param istring IO handle of the string (first string in concatination) + * and whole concatinated string on return + * \param xstring I handle of the second string (extension string) + * \return CTA_OK if successful + */ +CTAEXPORT int CTA_String_Conc(CTA_String istring, CTA_String xstring); + + +/** \brief Get a pointer to the contents of the string (INTERNAL USE) + * + * \param hstring I handle of the string + * \return pointer to the string contents + */ +CTAEXPORT char* CTAI_String_GetPtr(CTA_String hstring); + + +/** \brief Imports string. + * + * Supports: pack objects (usrdata must be handle of pack object to import from) + * + * \param hstring IO handle of the string + * \param usrdata I configuration of import + * \return CTA_OK if successful + * + * \note Only CTA_Pack is currently supported fot usrdata + */ +CTAEXPORT int CTA_String_Import(CTA_String hstring, CTA_Handle usrdata); + +/** \brief Exports length of string and string itself. + * + * Supports: pack objects (usrdata must be handle of pack object to export to) + * + * \param hstring I handle of the string + * \param usrdata IO configuration of export + * \return CTA_OK if successful + * + * \note Only CTA_Pack is currently supported fot usrdata + */ +CTAEXPORT int CTA_String_Export(CTA_String hstring, CTA_Handle usrdata); + +/** \brief Check whether string is equal to COSTA string. + * + * + * \param hstring I handle of the string + * \param str0 I string to compare hsting with + * \return CTA_TRUE/CTA_FALSE + * + * \note Only CTA_Pack is currently supported fot usrdata + */ +CTAEXPORT int CTA_String_Equals_Char(CTA_String hstring, const char *str0); + +/** \brief Check whether two COSTA strings are equal. + * + * + * \param hstring1 I handle of first string + * \param hstring2 I handle of second string + * \return CTA_TRUE/CTA_FALSE + * + * \note Only CTA_Pack is currently supported fot usrdata + */ +CTAEXPORT int CTA_Strings_Equal(CTA_String hstring1, CTA_String hstring2); + + +/** \brief Create a duplication of a COSTA string + * + * \param hfrom I handle of string to copy + * \param hto O handle of created string + * \return CTA_OK if successful + */ +CTAEXPORT int CTA_String_Duplicate(CTA_String hfrom, CTA_String *hto); + +/*=====================================================================*/ +#ifdef __cplusplus +} +#endif +#endif + diff --git a/costa/native/cta/include/cta_system.h b/costa/native/cta/include/cta_system.h new file mode 100644 index 000000000..9b7c4a312 --- /dev/null +++ b/costa/native/cta/include/cta_system.h @@ -0,0 +1,71 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2007 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_system.h +\brief Macros for handing system dependent issues +*/ + +#ifndef CTA_SYSTEM_H +#define CTA_SYSTEM_H + +#define CTANOEXPORT /* does not do anything yet, but it makes clear which routines are internal */ +#define CTANOF90 /* does not do anything yet, but it makes clear if no fortran90 header is needed */ +#ifdef WIN32 + + #ifdef CTALIB + #define CTAEXPORT __declspec(dllexport) + #else + #define CTAEXPORT __declspec(dllimport) + #endif + + #ifdef FTN_CAPITAL + #define F77_FUNC(X,Y) Y + #define F77_FUNC_NOEXP(X,Y) Y + #else + #define F77_FUNC(X,Y) X + #define F77_FUNC_NOEXP(X,Y) X + #endif + + #ifdef FTN_CAPITAL + #define F77_CALL(X,Y) __cdecl Y + #define CF77_CALL(X,Y) Y + #else + #define F77_CALL(X,Y) __cdecl X + #define CF77_CALL(X,Y) X + #endif + + +#else + + #define CTAEXPORT + #ifdef FTN_CAPITAL + #define F77_FUNC(X,Y) Y ## _ + #define F77_CALL(X,Y) Y ## _ + #define CF77_CALL(X,Y) Y ## _ + #else + #define F77_FUNC(X,Y) X ## _ + #define F77_CALL(X,Y) X ## _ + #define CF77_CALL(X,Y) X ## _ + #endif + +#endif + +#endif + diff --git a/costa/native/cta/include/cta_time.h b/costa/native/cta/include/cta_time.h new file mode 100644 index 000000000..f62b4147b --- /dev/null +++ b/costa/native/cta/include/cta_time.h @@ -0,0 +1,189 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_time.h +\brief Interface description of the default COSTA time component. + +A time object describes a time span (time of start, time of end). Utility functions are provided for changing and evaluating or comparing time spans. +*/ + +#ifndef CTA_TIME_H +#define CTA_TIME_H +#include "cta_system.h" +#include "cta_handles.h" +#include "cta_datatypes.h" +/* Function Handle */ +#ifdef __cplusplus +extern "C" { +#endif + +typedef CTA_Handle CTA_Time; + +/* functions */ + +/** \brief Create a time object. + * + * \param htime O receives handle of newly created time object + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Time_Create(CTA_Time *htime); + +/** \brief Free a time object. + * + * \param htime IO handle of time object to be freed, replaced by CTA_NULL on return. + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Time_Free(CTA_Time *htime); + +/** \brief Set the time span. + * + * \param htime IO time object of which to set time span + * \param tstart I starting time + * \param tend I ending time + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Time_SetSpan(CTA_Time htime,double tstart, double tend); + +/** \brief Get the time span. + * + * \param htime I time object of which to get time span + * \param tstart O receives the starting time + * \param tend O receives ending time + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Time_GetSpan(CTA_Time htime,double *tstart, double *tend); + +/** \brief Set the time step. + * + * \param htime IO time object of which to set time step + * \param tstep I new time step + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Time_SetStep(CTA_Time htime,double tstep); + +/** \brief Get time step. + * + * \param htime IO time object of which to get time step + * \param tstep O receives time step + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Time_GetStep(CTA_Time htime,double *tstep); + +/** \brief Count number of timesteps in time + * + * \param htime I time object (see function description) + * \param nsteps O number of timesteps + * + * \return error status: CTA_OK if successful + * + * \note number of steps is rounded to nearest integer + */ +CTAEXPORT int CTA_Time_CountSteps(CTA_Time htime, int *nsteps); + +/** \brief Get interval of i-th step + * + * \param htime I time object (see function description) + * \param istep I interval of step + * \param hstep O time step of model + * + * \return error status: CTA_OK if successful + * + * \note intervals are counted from 1 to nsteps + */ +CTAEXPORT int CTA_Time_GetTimeStep(CTA_Time htime, int istep, CTA_Time hstep); + +/** \brief Check whether htimesub is within time span of htime. + * + * \param htimesub I time object (see function description) + * \param htime I time object (see function description) + * \param inspan O receives TRUE if htimesub is within time span of htime or FALSE otherwise + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Time_InSpan(CTA_Time htimesub, CTA_Time htime, BOOL *inspan); + +/** \brief Check whether time step of time object equals t + * + * \param htime I time object + * \param t I time step to compare + * \param isstep O receives TRUE if t equals time step of time object or FALSE otherwise + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Time_IsStep(CTA_Time htime, double t, BOOL *isstep); + +/** \brief Copy a time object. + * + * \param hfrom I time object to copy from + * \param hto O handle of time object that receives copy, must exist before calling + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Time_Copy(CTA_Time hfrom, CTA_Time hto); + +/** \brief Export a time object. + * exports the whole internal state of the time object to given target + * CTA_FILE will export the time component in a MATLAB/OCTAVE readable form + * CTA_PACK will pack the content + * + * \param htime I time object to export + * \param hexport I target for export (CTA_FILE or CTA_PACK) + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Time_Export(CTA_Time htime, CTA_Handle hexport); + +/** \brief Import a time object. + * imports the whole internal state of the time object from given source + * + * \param htime I time object to import to + * \param himport I source of import (CTA_PACK) + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Time_Import(CTA_Time htime, CTA_Handle himport); + + +/** \brief Returns whether time object describes an timespan or a single + * instance. + * + * \param htime I time object to import to + * \param isspan O time object is a time timespan (CTA_TRUE/CTA_FALSE) + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Time_IsSpan(CTA_Time htime, int *isspan); + + + + + + + + + +#ifdef __cplusplus +} +#endif + + +#endif diff --git a/costa/native/cta/include/cta_tree.h b/costa/native/cta/include/cta_tree.h new file mode 100644 index 000000000..25d6472cc --- /dev/null +++ b/costa/native/cta/include/cta_tree.h @@ -0,0 +1,187 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_tree.h + +\brief Interface description of the COSTA tree component. + +Store data in a CTA_Tree object in tree form. Access the members in the following way: + branch1\\subbranch\\member or branch1/branch2/member +*/ + +#ifndef CTA_TREE_H +#define CTA_TREE_H +#include "cta_system.h" +#include "cta_handles.h" +#include "cta_datatypes.h" + +/* Tree instance handle */ +typedef CTA_Handle CTA_Tree; +#ifdef __cplusplus +extern "C" { +#endif +/** \brief Create a new COSTA tree instance + * + * \note + * + * \param htree O receives handle of created tree + * \return CTA_OK if successful + */ +CTAEXPORT int CTA_Tree_Create(CTA_Tree *htree); + + +/** \brief Free the COSTA tree instance + * + * \note + * + * \param htree IO handle of the tree instance, replaced by CTA_NULL on return + * \return CTA_OK if successful + */ +CTAEXPORT int CTA_Tree_Free(CTA_Tree *htree); + + +/** \brief Add a COSTA handle to the COSTA tree + * + * \note + * + * \param htree IO handle of the tree object (parent) + * \param name I name of the COSTA item + * \param hitem I handle of the COSTA item to add (do not free the object after adding it to the tree) + * \return CTA_OK if successful + */ +CTAEXPORT int CTA_Tree_AddHandle(CTA_Tree htree, const char *name, CTA_Handle hitem); + + +/** \brief Count the number of COSTA handles specified by the given path. + * + * \param htree I handle of the tree object + * \param path I path of the item, separated by / or \\ + * \param count O receives the number of items found + * \return CTA_OK if successful or CTA_ITEM_NOT_FOUND in case of not found + */ +CTAEXPORT int CTA_Tree_CountHandles(CTA_Tree htree, CTA_String path, int *count); + +/** \brief Count the number of COSTA handles specified by the given path. + * + * \param htree I handle of the tree object + * \param path I path of the item, separated by / or \ + * \param count O receives the number of items found + * \return CTA_OK if successful or CTA_ITEM_NOT_FOUND in case of not found + */ +CTAEXPORT int CTA_Tree_CountHandlesStr(CTA_Tree htree, char *path, int *count); + + +/** \brief Get a COSTA handle from the COSTA tree (by path) + * + * \note In case of trees with default values, returns the default value. + * \note The returned handle must not be freed. + * + * \param htree I handle of the tree object + * \param path I path of the item, separated by / or \\ + * \param hitem O receives the handle of the COSTA item, or CTA_NULL in case not found, do not free this handle. + * \return CTA_OK if successful or CTA_ITEM_NOT_FOUND in case of not found + */ +CTAEXPORT int CTA_Tree_GetHandle(CTA_Tree htree, CTA_String path, CTA_Handle *hitem); + + +/** \brief Get the value of a COSTA handle from the COSTA tree (by path) + * + * \note In case of trees with default values, returns the default value. + * + * \param htree I handle of the tree object + * \param path I COSTA string describing path of the item, separated by / or \ + * \param value O receives the value of the COSTA item, or CTA_NULL in case of not found + * \param datatype I data type of parameter value, must be the same as item in tree + * \return CTA_OK if successful or CTA_ITEM_NOT_FOUND in case of not found + */ +CTAEXPORT int CTA_Tree_GetValue(CTA_Tree htree, CTA_String path, void *value, CTA_Datatype datatype); + +/** \brief Get a COSTA handle from the COSTA tree (by path) + * + * \note In case of trees with default values, returns the default value. + * \note The returned handle must not be freed. + * + * \param htree I handle of the tree object + * \param str I C string describing path of the item, separated by / or \ + * \param hitem O receives the handle of the COSTA item, or CTA_NULL in case of not found, do not free this handle + * \return CTA_OK if successful or CTA_ITEM_NOT_FOUND in case of not found + */ +CTAEXPORT int CTA_Tree_GetHandleStr(CTA_Tree htree, char* str, CTA_Handle *hitem); + + +/** \brief Get the value of a COSTA handle from the COSTA tree (by path) + * + * \note In case of trees with default values, returns the default value. + * + * \param htree I handle of the tree instance + * \param str I C string describing path of the item, separated by / or \ + * \param value O receives the value of the COSTA item, or CTA_NULL in case of not found + * \param datatype I data type of the value specified + * \return CTA_OK if successful or CTA_ITEM_NOT_FOUND in case of not found + */ +CTAEXPORT int CTA_Tree_GetValueStr(CTA_Tree htree, char* str, void *value, CTA_Datatype datatype); + + +/** \brief Count the number of elements on the current level of the COSTA tree + * + * \param htree I handle of the tree level + * \param count O receives the number of elements on the current tree level + * \return CTA_OK if successful + */ +CTAEXPORT int CTA_Tree_CountItems(CTA_Tree htree, int *count); + + +/** \brief Get a handle (by index) on the current level of the COSTA tree + * + * \param htree I handle of the tree level + * \param index I index of the item to return, 1 <= index <= CTA_Tree_CountItems() + * \param hitem O receives handle of the item at given index + * \return CTA_OK if successful + */ +CTAEXPORT int CTA_Tree_GetItem(CTA_Tree htree, int index, CTA_Handle *hitem); + + +/** \brief Get the value of a COSTA handle from the COSTA tree (by index) + * + * \note In case of trees with default values, returns the default value. + * + * \param htree I handle of the tree instance + * \param index I index of the item + * \param value O receives value of the COSTA item, or CTA_NULL in case of not found + * \param datatype I data type of the value specified + * \return CTA_OK if successful or CTA_ITEM_NOT_FOUND in case not found + */ +CTAEXPORT int CTA_Tree_GetItemValue(CTA_Tree htree, int index, void *value, CTA_Datatype datatype); + + +/** \brief Print a COSTA tree to STDOUT + * + * \note + * + * \param htree I handle of the tree + * \return CTA_OK if successful + */ +CTAEXPORT int CTA_Tree_Print(CTA_Tree htree); + +/*=====================================================================*/ +#ifdef __cplusplus +} +#endif +#endif diff --git a/costa/native/cta/include/cta_treevector.h b/costa/native/cta/include/cta_treevector.h new file mode 100644 index 000000000..cd4c8ab45 --- /dev/null +++ b/costa/native/cta/include/cta_treevector.h @@ -0,0 +1,512 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +#ifndef CTA_TREEVECTOR_H +#define CTA_TREEVECTOR_H +#include "cta_system.h" +#include "cta_datatypes.h" +#include "cta_handles.h" +#include "cta_vector.h" +#include "cta_matrix.h" +#include "cta_metainfo.h" + +/** +\file cta_treevector.h +\brief Interface description of the COSTA default tree-vector component. + +The tree-vector is an extension of a vector component. A tree-vector +either contains a single vector or is a concatenation of a number of +tree-vectors, called sub-tree-vectors in this context. The usage of +sub-tree-vectors makes is possible to concatenate models or extend models +as is done when a deterministic model is extended into a stochastic model. +The sub-tree-vectors are also very useful inside the model source code where the +whole state of the model is not represented by a single vector. +The default tree-vector component uses COSTA vector-components for storing the values. + +Each (sub-)tree-vector has a tag. Tree-vectors having the same tag are considered +to be the same, meaning they have the same buildup in sub-tree-vectors, length and +datatypes. +*/ + + +/** + Type definition of a handle to a COSTA tree-vector instance +*/ +typedef CTA_Handle CTA_TreeVector; + +#ifdef __cplusplus +extern "C" { +#endif + +/** \brief Create a tree-vector. + * + * \param name I name of the tree-vector, this is a human readable + * string used for (debug) output and not by the algorithms + * itself + * \param tag I tag of this tree-vector + * \param treevec O new tree-vector + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_TreeVector_Create(const char *name, const char *tag, CTA_TreeVector *treevec); + +/** \brief Duplicate a tree-vector. + * + * \note Duplication means that a new tree-vector is created that is identical to + * the originating tree-vector. All data in the original tree-vector is also copied. + * + * \param treevec1 I handle of treevector to be duplicated + * \param treevec2 O receives handle to duplicate + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_TreeVector_Duplicate(CTA_TreeVector treevec1, CTA_TreeVector *treevec2 ); + + +/** \brief Define a tree-vector to be a concatination of other tree-vectors. + * + * \note The concatenation is done by reference (handle). The sub-tree-vectors that + * are concatenated are not copied. + * + * \param treevec1 I tree-vector that will be concatenation of the sub-tree-vectors provided in parameter treevecs + * \param treevecs I array of the sub-tree-vectors + * \param ntreevecs I number of sub-tree-vectors in treevecs + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_TreeVector_Conc(CTA_TreeVector treevec1, CTA_TreeVector *treevecs, int ntreevecs); + +/** \brief Get the handle of a sub-tree-vectors using its tag. + * + * \note This is done by reference (handle). The handle of the + * returned sub-tree-vector is not a copy + * + * \param treevec I Tree-vector + * \param tag I tag of the requested sub-tree-vector + * \param subtreevec O receives handle of the requested sub-tree-vectors, this is by reference, not a copy + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_TreeVector_GetSubTreeVec(CTA_TreeVector treevec, const char *tag, CTA_TreeVector *subtreevec); + +/** \brief Get the tag of a sub-tree-vector using its index (starting with 0). + * + * \param treevec I Tree-vector + * \param index I index of the requested sub-tree-vector + * \param tag O String of standard length containnig the tag + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_TreeVector_GetSubTreeVecId(CTA_TreeVector treevec, int index, char tag[CTA_STRLEN_TAG]); + + +/** \brief Get the handle of a first-layer sub-tree-vector using its index. + * + * \note The concatination is done by reference (handle). The handle of the + * returned sub-tree-vector is not a copy + * + * \param treevec I Tree-vector + * \param index I index of requested sub-tree-vector. Note that the first sub-tree-vector has index 1. + * \param subtreevec O receives handle of the requested sub-tree-vector, this is by reference, not a copy + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_TreeVector_GetSubTreeVecIndex(CTA_TreeVector treevec, int index, + CTA_TreeVector *subtreevec); + +/** \brief Get number of sub-treevectors + * + * \note In case of a leaf 0 is returned + * + * \param treevec I Tree-vector + * \param numSubTrees O Number of sub-treevectors + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_TreeVector_GetNumSubTree(CTA_TreeVector treevec, int* numSubTrees); + + +/** \brief Get the tag of the tree-vector. + * + * Note tag should be large enough to hold the result + * length of CTA_STRLEN_TAG is always save (no internal protection) + * + * \param treevec I Tree-vector + * \param tag O receives the tag of the requested sub-tree-vector (see note) + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_TreeVector_GetTag(CTA_TreeVector treevec, char *tag); + + +/** \brief Set the values of the tree-vector + * + * \note This operation is only possible when all data elements in the tree-vector + * are of the same type and the size of the tree-vector corresponds to the + * size of the input vector. + * + * \param treevec IO TreeVector + * \param hvec I handle of the vector containing new values (see note) + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_TreeVector_SetVec(CTA_TreeVector treevec, CTA_Vector hvec); + +/** \brief Get the values of the tree-vector. + * + * \note This operation is only possible when all data elements in the tree-vector + * are of the same type and the size of the tree-vector corresponds to the + * vector size. + * + * \param treevec I Tree-vector + * \param hvec O Vector that is receiving the values; must exist before calling + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_TreeVector_GetVec(CTA_TreeVector treevec, CTA_Vector hvec); + +/** \brief Axpy operation between two tree-vectors. + * + * \note Axpy: y=alpha*x+y. Add alpha times tree-vector x to + * this tree-vector (y). + * + * \param y IO Tree-vector (y) + * \param alpha I scalar + * \param x I Tree-vector (x) + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_TreeVector_Axpy(CTA_TreeVector y, double alpha, CTA_TreeVector x); + +/** \brief Compute dot product of two tree-vectors. + * + * \note dotprod = sum[all i] (treevec1_i * treevec2_i) + * + * \param treevec1 I first tree-vector + * \param treevec2 I second tree-vector + * \param dotprod O receives the dot product + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_TreeVector_Dot(CTA_TreeVector treevec1, CTA_TreeVector treevec2, double *dotprod); + +/** \brief Compute the 2-norm of a tree-vector. + * + * \param treevec1 I Tree-vector + * \param nrm2 O receives the 2-norm + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_TreeVector_Nrm2(CTA_TreeVector treevec1, double *nrm2); + + +/** \brief Copy a tree-vector + * + * \note The two tree-vectors must be compatible: same structure and datatypes. + * + * \param treevec1 I sending tree-vector + * \param treevec2 O receiving tree-vector + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_TreeVector_Copy(CTA_TreeVector treevec1, CTA_TreeVector treevec2); + +/** \brief Set whole tree-vector equal to a constant value. + * + * \note This method can only be used if all elements of the tree-vector + * have the same data type. + * + * \param treevec IO TreeVector + * \param val I value to set + * \param datatype I data type of val, must be same as data type of tree-vector + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_TreeVector_SetConstant(CTA_TreeVector treevec, void *val,CTA_Datatype datatype); + +/** \brief Scale tree-vector. + * + * \param treevec IO handle of tree-vector + * \param alpha I scalar + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_TreeVector_Scal(CTA_TreeVector treevec, double alpha); + +/** \brief Set all values of the tree-vector. + * + * \note This method can only be used if all elements of the tree-vector + * are of the same data type. + * + * \param treevec IO Tree-vector + * \param val I values to be set + * \param nval I number of elements in val + * \param datatype I data type of *val, must be the same as data type of elements in tree-vector + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_TreeVector_SetVals(CTA_TreeVector treevec, void *val,int nval, CTA_Datatype datatype); + + +/** \brief Get all values of the tree-vector. + * + * \note This method can only be used if all elements of the tree-vector + * are of the same data type. + * + * \param treevec I Tree-vector + * \param val O receives the values + * \param nval I number of elements in val + * \param datatype I data type of *val, must be the same as data type of elements in tree-vector + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_TreeVector_GetVals(CTA_TreeVector treevec, void *val,int nval,CTA_Datatype datatype); + +/** \brief Set single value of the tree-vector. + * + * \param treevec IO Tree-Vector + * \param i I index of value in tree-vector + * \param val I value to be set + * \param datatype I data type of *val, must be the same as data type of element in tree-vector + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_TreeVector_SetVal(CTA_TreeVector treevec, int i, void *val, CTA_Datatype datatype); + + +/** \brief Get single value of the tree-vector. + * + * \param treevec I Tree-vector + * \param i I index in value in tree-vector + * \param val O returned value + * \param datatype I data type of *val, must be the same as data type of element in tree-vector + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_TreeVector_GetVal(CTA_TreeVector treevec, int i, void *val,CTA_Datatype datatype); + +/** \brief Get size of tree-vector. + * + * \param treevec I Tree-vector + * \param n O receives size of tree-vector + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_TreeVector_GetSize(CTA_TreeVector treevec, int *n); + +/** \brief Export tree-vector. + * + * Can export tree-vector to file or pack object.\n + * usrdata must contain a handle of the file or pack object to be used.\n + * Dependency: CTA_Vector_Export() + * + * + * \param treevec I Tree-vector + * \param usrdata I export properties + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_TreeVector_Export(CTA_TreeVector treevec, CTA_Handle usrdata); + +/** \brief Import Tree-vector. + * + * Can import tree-vector from file or pack object.\n + * usrdata must contain a handle of the file or pack object to be used.\n + * Dependency: CTA_Vector_Import() + * + * + * \param treevec I Tree-vector + * \param usrdata I import properties + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_TreeVector_Import(CTA_TreeVector treevec, CTA_Handle usrdata); + +/** \brief Import Tree-vector as flat vector. + * + * Can import tree-vector from netcdf file.\n + * usrdata must contain a handle of the file .\n + * Dependency: CTA_Vector_VImport() + * + * + * \param treevec I Tree-vector + * \param usrdata I import properties + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_TreeVector_VImport(CTA_TreeVector treevec, CTA_Handle usrdata); + + +/** \brief Free Tree-vector. + * + * \param treevec I handle of tree-vector + * \param recursive I also free all sub-tree-vectors, yes: CTA_TRUE or no: CTA_FALSE + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_TreeVector_Free(CTA_TreeVector *treevec, int recursive); + +/** \brief Print tree-vector information. + * + * Gives following information:\n\n + * Tree-vector information:\n + * tag: [tag]\n + * nsubtreevecs: [number of sub-tree-vectors]\n + * + * If nsubtreevecs > 0: recursively prints all sub-tree-vectors + * Else prints:\n + * leaf: yes\n + * tree-vector size (leaf) + * + * \param treevec I tree-vector + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_TreeVector_Info(CTA_TreeVector treevec); + +/** \brief Perform the matrix multiplication C:=alpha*op(A)*op(B)+beta*C + where op(X)=X, X^T. However C and A are matrices of wich the columns are + tree-vectors + * + * \param sC IO array of tree-vector (matrix C) + * \param nc I number of columns of C (dimension of sC) + * \param transa I transpose flag CTA_TRUE/CTA_FALSE for matrix A (not supported) + * \param transb I transpose flag CTA_TRUE/CTA_FALSE for matrix B + * \param alpha I scalar + * \param sA I handle of matrix A + * \param na I number of columns of A (dimension of sA) + * \param mB I handle of matrix B + * \param beta I scalar + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_TreeVector_Gemm(CTA_TreeVector *sC, int nc, int transa, int transb, double alpha, CTA_TreeVector *sA, int na, + CTA_Matrix mB, double beta); + + +/** \brief Generate XML from one COSTA tree-vector +* +* \param treevec I handle of a COSTA tree-vector +* \param writer I the XML text writer +*/ +CTAEXPORT void CTAI_XML_WriteTreeVec(CTA_TreeVector treevec, xmlTextWriter *writer); + +/** \brief Create a COSTA tree-vector from XML. +* +* \param cur_node I Current XML node +* \return Handle to create or CTA_NULL in case of an error. +*/ +CTAEXPORT CTA_TreeVector CTAI_XML_CreateTreeVec(xmlNode *cur_node); + + +/** \brief Perform given operation on all leafs of the treevector +* +* \param treevec1 I handle of first COSTA tree-vector +* \param treevec2 I handle of second COSTA tree-vector +* \param treevec I handle of a COSTA tree-vector +* \param op I operation to perform on the leafs +* \param arg I additional argument of operation + * \return error status: CTA_OK if successful +*/ +CTAEXPORT int CTA_TreeVector_OpOnLeafs(CTA_TreeVector treevec1, CTA_TreeVector treevec2, CTA_Func op, CTA_Handle arg); + +/** \brief Elementwise division of two vectors +* \note y:=y./x +* +* \param y I handle of a COSTA tree-vector (y) +* \param x I handle of a COSTA tree-vector (y) + * \return error status: CTA_OK if successful +*/ +CTAEXPORT int CTA_TreeVector_ElmDiv(CTA_TreeVector y, CTA_TreeVector x); + +/** \brief Elementwise multiplication of two vectors +* \note y:=y.*x +* +* \param y I handle of a COSTA tree-vector (y) +* \param x I handle of a COSTA tree-vector (y) +* \return error status: CTA_OK if successful +*/ +CTAEXPORT int CTA_TreeVector_ElmProd(CTA_TreeVector y, CTA_TreeVector x); + +/** \brief Elementwise sqare root +* \note y:=sqrt(y) +* +* \param y I handle of a COSTA tree-vector (y) +* \return error status: CTA_OK if successful +*/ +CTAEXPORT int CTA_TreeVector_ElmSqrt(CTA_TreeVector y); + + +/** \brief Set nocompute flag of a sub-tree vector +* +* When this flag is set, the values of the sub-treevector will +* be ignored in all basic vector operations (including asking the +* total length of the tree-vector). This propertie is used for +* additionally adding some meta information +* +* \note the nocompute flag is set at the level of the parent! +* so the "isolated" sub-treevector can be used in basic vector +* operations. +* +* \param x I handle of a COSTA tree-vector (y) +* \param tag I tag of sub-treevector +* \return error status: CTA_OK if successful +*/ +CTAEXPORT int CTA_TreeVector_SetSubTreeNocompute(CTA_TreeVector x, const char *tag); + +/** \brief Increase the reference count of a treevector and all subtrevectors + * + * \param treevec I handle of a COSTA tree-vector +* \return error status: CTA_OK if successful + * + */ +CTAEXPORT int CTA_TreeVector_IncRefCount(CTA_TreeVector treevec); + + + +CTAEXPORT int CTA_TreeVector_SetMetainfo(CTA_TreeVector treevec, CTA_Metainfo minfo); +CTAEXPORT int CTA_TreeVector_GetMetainfo(CTA_TreeVector treevec, CTA_Metainfo minfo); + + +CTAEXPORT int CTAI_TreeVec_GetVecNumHandles(CTA_TreeVector treevec); + +CTAEXPORT int CTAI_TreeVec_List(CTA_TreeVector treevec, CTA_Vector taglist, int *indx); + +CTAEXPORT int CTA_TreeVector_List(CTA_TreeVector treevec, CTA_Vector taglist ); + +CTAEXPORT int CTA_TreeVector_GetVecNumHandles(CTA_TreeVector treevec); + +CTAEXPORT void CTAI_Treevector_Operation_ScaledRMS(char *tag, CTA_Vector v1, + CTA_Vector vscal, CTA_Handle hdum, int *retval); + +CTAEXPORT void CTAI_Treevector_Operation_Amax(char *tag, CTA_Vector v1, + CTA_Vector * v2, CTA_Handle hdum, int *retval); + +CTAEXPORT void CTAI_Treevector_Operation_PrintEntry(char *tag, CTA_Vector v1, + CTA_Vector v2, CTA_Handle hdum, int *retval); + +CTAEXPORT void CTAI_Treevector_Operation_ScaledSSQ(char *tag, CTA_Vector v1, + CTA_Vector vscal, CTA_Handle hdum, int *retval); + +CTAEXPORT void CTAI_Treevector_Operation_MaxAbs(char *tag, CTA_Vector v1, + CTA_Vector , CTA_Handle hdum, int *retval); + +#ifdef __cplusplus +} +#endif +#endif + diff --git a/costa/native/cta/include/cta_usr_matrix.h b/costa/native/cta/include/cta_usr_matrix.h new file mode 100644 index 000000000..dd1e0a234 --- /dev/null +++ b/costa/native/cta/include/cta_usr_matrix.h @@ -0,0 +1,456 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_usr_matrix.h + +\brief In this file a description is given of the interface of user matrix functions. +When creating your own user matrix class use the following as template. + +The Usr_Matrix to which is being referred in this template can be substituted by your own user matrix. + +Step 1: for creating your own user matrix class call the function CTA_Matrix_DefineClass(). + +Example: + +\code +typedef CTA_Handle CTA_MatrixClass; + +CTA_Func h_func[CTA_MATRIX_NUMFUNC]; +CTA_MatrixClass my_own_matrix_class; + +ierr=CTA_Func_Create(" ",&usr_matrix_create_size, hintf, &h_func[CTA_MATRIX_CREATE_SIZE]); +//...for all implementation functions... + +CTA_Matrix_DefineClass("classname", h_func, &my_own_matrix_class);\endcode + + + +Making a user matrix class involves the implementation of the following functions: + +CTA_MATRIX_CREATE_SIZE \n +CTA_MATRIX_CREATE_INIT \n +CTA_MATRIX_GETVALS \n +CTA_MATRIX_GETVAL \n +CTA_MATRIX_SETCOL \n +CTA_MATRIX_SETVALS \n +CTA_MATRIX_SETVAL \n +CTA_MATRIX_SETCONST \n +CTA_MATRIX_FREE \n +CTA_MATRIX_EXPORT \n +CTA_MATRIX_GER \n +CTA_MATRIX_INV \n +CTA_MATRIX_GEMV \n +CTA_MATRIX_GEMM \n +CTA_MATRIX_AXPY \n +CTA_MATRIX_NUMFUNC + +For creating an implementation function see documentation of CTA_Func_Create(). + +Step 2: to create an object of the newly defined matrix class call CTA_Matrix_Create() in the +same way as creating a CTA_Matrix but with a different class handle, i.e. the user class handle from step 1 above. + +Example: + +\code +Usr_Matrix usrmat; //user matrix object +int n = 10; +int m = 10; +CTA_Datatype datatype = CTAI_String2Type("CTA_STRING"); +CTA_Handle userdata = CTA_NULL; +CTA_Matrix_Create(my_own_matrix_class, m, n, datatype, &userdata, &usrmat); +\endcode +\n +Note 1: with object data is meant only the object itself including pointer(s) to its contents, but +not the contents of the matrix.\n\n +Note 2: matrix indices start from 1, m and n for rows and columns respectively.\n\n +*/ + +/* parameters for different functions */ +//#define CTA_MATRIX_CREATE_SIZE ( 1) +//#define CTA_MATRIX_CREATE_INIT ( 2) +//#define CTA_MATRIX_GETVALS ( 4) +//#define CTA_MATRIX_GETVAL ( 5) +//#define CTA_MATRIX_SETCOL ( 6) +//#define CTA_MATRIX_SETVALS ( 7) +//#define CTA_MATRIX_SETVAL ( 8) +//#define CTA_MATRIX_SETCONST ( 9) +//#define CTA_MATRIX_FREE (10) +//#define CTA_MATRIX_EXPORT (11) +//#define CTA_MATRIX_GER (12) +//#define CTA_MATRIX_INV (13) +//#define CTA_MATRIX_GEMV (14) +//#define CTA_MATRIX_GEMM (15) +//#define CTA_MATRIX_AXPY (16) +//#define CTA_MATRIX_NUMFUNC (17) + + + + +//#define CTA_MATRIX_CREATE_SIZE ( 1) + +#ifdef __cplusplus +extern "C" { +#endif + + +/** \brief Implementation that forms part of the create process. + * + * Must give the memory size of a new user matrix object. + * + * Example: + * \code +//in header file: +typedef struct { + //your own user object data goes here... +}USR_MATRIX; + +//user implementation: +void usr_matrix_create_size(...){ + *memsize = sizeof(USR_MATRIX); + *retval = CTA_OK; +} + \endcode + * \note At index CTA_MATRIX_CREATE_SIZE in the function list of the class descriptor. + * + * \param m I dimension, m (rows) + * \param n I dimension, n (columns) + * \param datatype I data type of matrix elements + * \param userdata IO user data + * \param retval O must receive return value of user implementation function + * \param memsize O must receive the number of bytes which are necessary to store one + user matrix class, with a pointer to the contents (data), but without the + contents themselves + * \return no return value + */ +void usr_matrix_create_size( + int *m, + int *n, + CTA_Datatype *datatype, + void *userdata, + int *retval, + int *memsize + ); + + +//#define CTA_MATRIX_CREATE_INIT ( 2) +/** \brief Implementation that forms part of the create process. + * + * The user matrix object needs to be made ready for use. + * + * \note At index CTA_MATRIX_CREATE_INIT in the function list of the class descriptor. + * + * \param objectdata IO pointer to object data of user matrix + * \param m I dimension, m (rows) + * \param n I dimension, n (columns) + * \param datatype I data type of the matrix elements + * \param *userdata IO user data + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_matrix_create_init( + Usr_Matrix *objectdata, + int *m, + int *n, + CTA_Datatype *datatype, + void *userdata, + int *retval + ); + + +//#define CTA_MATRIX_GETVALS ( 4) +/** \brief Implementation for getting all values from the user matrix. + * + * \note At index CTA_MATRIX_GETVALS in the function list of the class descriptor. + * + * \param objectdata I pointer to object data of user matrix + * \param vals O must receive values; must exist before calling + * \param m I number of rows of *vals, must be the same as number of rows in matrix + * \param n I number of columns of *vals, must be the same as number of columns in matrix + * \param datatype I data type of value; must be the same as data type of matrix elements + * \param *userdata IO user data + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_matrix_getvals( + Usr_Matrix *objectdata, + void *vals, + int *m, + int *n, + CTA_Datatype *datatype, + void *userdata, + int *retval + ); + + +//#define CTA_MATRIX_GETVAL ( 5) +/** \brief Implementation for getting a single value from the user matrix. + * + * \note At index CTA_MATRIX_GETVAL in the function list of the class descriptor. + * + * \param objectdata I pointer to object data of user matrix + * \param val O must receive value; must exist before calling + * \param m I row index of matrix value to get + * \param n I column index of matrix value to get + * \param datatype I data type of val; must be the same as data type of matrix elements + * \param *userdata IO user data + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_matrix_getval( + Usr_Matrix *objectdata, + void *val, + int *m, + int *n, + CTA_Datatype *datatype, + void *userdata, + int *retval + ); + + +//#define CTA_MATRIX_SETCOL ( 6) +/** \brief Implementation for setting column of user matrix. + * + * \note At index CTA_MATRIX_SETCOL in the function list of the class descriptor. + * + * \param objectdata IO pointer to object data of user matrix + * \param n I index of column to set + * \param hvec I handle of sending column; dimensions must be compatible + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_matrix_setcol( + Usr_Matrix *objectdata, + int *n, + CTA_Vector *hvec, + int *retval + ); + + +//#define CTA_MATRIX_SETVALS ( 7) +/** \brief Implementation for setting all values of user matrix. + * + * \note At index CTA_MATRIX_SETVALS in the function list of the class descriptor. + * + * \param objectdata IO pointer to object data of user matrix + * \param vals I values to set to + * \param m I number of rows of *vals, must be the same as number of rows in matrix + * \param n I number of columns of *vals, must be the same as number of columns in matrix + * \param datatype I data type of value; must be the same as data type of matrix elements + * \param *userdata IO user data + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_matrix_setvals( + Usr_Matrix *objectdata, + void *vals, + int *m, + int *n, + CTA_Datatype *datatype, + void *userdata, + int *retval + ); + + +//#define CTA_MATRIX_SETVAL ( 8) +/** \brief Implementation for setting a single value of user matrix. + * + * \note At index CTA_MATRIX_SETVAL in the function list of the class descriptor. + * + * \param objectdata IO pointer to object data of user matrix + * \param val I value to set to + * \param m I row index of matrix value to set + * \param n I column index of matrix value to set + * \param datatype I data type of val; must be the same as data type of matrix elements + * \param *userdata IO user data + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_matrix_setval( + Usr_Matrix *objectdata, + void *val, + int *m, + int *n, + CTA_Datatype *datatype, + void *userdata, + int *retval + ); + +//#define CTA_MATRIX_SETCONST ( 9) +/** \brief Implementation for setting all user matrix elements to constant value. + * + * \note At index CTA_MATRIX_SETCONST in the function list of the class descriptor. + * + * \param objectdata IO pointer to object data of user matrix + * \param val I constant value to set to + * \param datatype I data type of val; must be the same as data type of matrix elements + * \param *userdata IO user data + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_matrix_setconst( + Usr_Matrix *objectdata, + void *val, + CTA_Datatype *datatype, + void *userdata, + int *retval + ); + +//#define CTA_MATRIX_FREE (10) +/** \brief Implementation for freeing the object data and associated resources. + * + * \note At index CTA_MATRIX_FREE in the function list of the class descriptor. + * + * \param objectdata IO pointer to object data of user matrix + * \param *userdata IO user data + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_matrix_free( + Usr_Matrix *objectdata, + CTA_Handle *userdata, + int *retval + ); + +//#define CTA_MATRIX_EXPORT (11) +/** \brief Implementation for exporting user matrix. + * + * \note At index CTA_MATRIX_EXPORT in the function list of the class descriptor. + * + * \param objectdata I pointer to object data of user matrix + * \param *userdata IO user data + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_matrix_export( + Usr_Matrix *objectdata, + CTA_Handle *userdata, + int *retval); + + +//#define CTA_MATRIX_GER (12) +/** \brief Implementation for applying the BLAS operation GER: A=A+(alpha)x(y(T)) + * + * i.e. for matrix A, vectors x and y and scalar alpha + * + * \note At index CTA_MATRIX_GER in the function list of the class descriptor. + * + * \param A IO pointer to object data of user matrix A; must exist before calling + * \param alpha I scalar + * \param vx I handle of vector x + * \param vy I handle of vector y + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_matrix_ger( + Usr_Matrix *A, + double *alpha, + CTA_Vector *vx, + CTA_Vector *vy, + int *retval); + +//#define (13) +/** \brief Implementation for inverting user matrix. + * + * \note At index CTA_MATRIX_INV in the function list of the class descriptor. + * + * \param A IO pointer to object data of user matrix + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_matrix_inv( + Usr_Matrix *A, + int *retval + ); + +//#define CTA_MATRIX_GEMV (14) +/** \brief Implementation for applying the BLAS operation GEMV: A=(alpha)Ax+(beta)y + * + * i.e. for matrix A, vectors x (optionally transposed) and y and scalars alpha and beta + * + * \note At index CTA_MATRIX_GEMV in the function list of the class descriptor. + * + * \param A IO pointer to object data of user matrix + * \param trans I transpose flag CTA_TRUE/CTA_FALSE for matrix A + * \param alpha I scalar + * \param vx I handle of vector x + * \param beta I scalar + * \param vy I handle of vector y + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_matrix_gemv( + Usr_Matrix *A, + int *trans, + double *alpha, + CTA_Vector *vx, + double *beta, + CTA_Vector *vy, + int *retval); + +//#define CTA_MATRIX_GEMM (15) +/** \brief Implementation for applying the BLAS operation GEMM: C=(alpha)AB+(beta)C + * + * i.e. for matrices A,B (both optionally transposed) and C, scalars alpha and beta + * + * \note At index CTA_MATRIX_GEMM in the function list of the class descriptor. + * + * \param C IO pointer to object data of user matrix C + * \param transa I transpose flag for matrix A (CTA_TRUE for transposing or CTA_FALSE otherwise) + * \param transb I transpose flag for matrix B (CTA_TRUE for transposing or CTA_FALSE otherwise) + * \param alpha I scalar + * \param A I pointer to object data of user matrix A + * \param B I pointer to object data of user matrix B + * \param beta I scalar + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_matrix_gemm( + Usr_Matrix *C, + int *transa, + int *transb, + double *alpha, + Usr_Matrix *A, + Usr_Matrix *B, + double *beta, + int *retval); + +//#define CTA_MATRIX_AXPY (16) +/** \brief Implementation for applying the BLAS operation AXPY: Y=Y+(alpha)X + * + * i.e. for matrices X and Y, scalar alpha + * + * \note At index CTA_MATRIX_AXPY in the function list of the class descriptor. + * + * \param y IO pointer to object data of user matrix y + * \param alpha I scalar + * \param x I pointer to object data of user matrix x + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_matrix_axpy( + Usr_Matrix *y, + double *alpha, + Usr_Matrix *x, + int *retval + ); + +#ifdef __cplusplus +} +#endif diff --git a/costa/native/cta/include/cta_usr_method.h b/costa/native/cta/include/cta_usr_method.h new file mode 100644 index 000000000..19ea485a3 --- /dev/null +++ b/costa/native/cta/include/cta_usr_method.h @@ -0,0 +1,151 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_usr_method.h + +\brief In this file a description is given of the interface of user method functions. +When creating your own user method class use the following as template. + +The Usr_Meth to which is being referred in this template can be substituted by your own user method object. + +Step 1: for creating your own user method class call the function CTA_Meth_DefineClass(). + +Example: + +\code +typedef CTA_Handle CTA_MethClass; + +CTA_Func h_func[CTA_METH_NUMFUNC]; +CTA_MethClass my_own_meth_class; + +ierr=CTA_Func_Create(" ",&usr_meth_create_size, hintf, &h_func[CTA_METH_CREATE_SIZE]); +//...for all implementation functions... + +CTA_Meth_DefineClass("classname", h_func, &my_own_meth_class);\endcode + +Making a user method class involves the implementation of the following functions: + +CTA_METH_CREATE_SIZE \n +CTA_METH_CREATE_INIT \n +CTA_METH_RUN \n +CTA_METH_FREE \n + +For creating an implementation function see documentation of CTA_Func_Create(). + +Step 2: to create an object of the newly defined method class call CTA_Meth_Create() in the +same way as creating a CTA_Meth object but with a different class handle, i.e. the user class handle from step 1 above. + +Example: + +\code +Usr_Meth usrmeth; //user method object +CTA_Handle userdata = CTA_NULL; +CTA_Meth_Create(my_own_meth_class, &userdata, &usrmeth); +\endcode +\n +Note 1: with object data is meant only the object itself including pointer(s) to its contents, but +not the contents themselves.\n\n +*/ + + +//#define CTA_METH_CREATE_SIZE ( 1) +/** \brief Implementation that forms part of the create process. + * + * Must give the memory size of a new method user object. + * + * Example: + * \code +//in header file: +typedef struct { + //your own user object data goes here... +}USR_METH; + +//user implementation: +void usr_meth_create_size(...){ + *memsize = sizeof(USR_METH); + *retval = CTA_OK; +} + \endcode + * + * \note At index CTA_METH_CREATE_SIZE in the function list of the class descriptor. + * + * \param memsize O must receive the number of bytes which are necessary to store one + user method class, with a pointer to the contents (data), but without the + contents themselves + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_meth_create_size(int *memsize, int *retval); + + + +//#define CTA_METH_CREATE_INIT ( 2) +/** \brief Implementation that forms part of the create process. + * + * The user method object needs to be made ready for use. + * + * \note At index CTA_METH_CREATE_INIT in the function list of the class descriptor. + * + * \param objectdata I pointer to user method object data + * \param userdata IO user data + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_meth_create_init(Usr_Meth *objectdata, void *userdata, int *retval); + + + +//#define CTA_METH_RUN ( 3) +/** \brief Implementation for running user method. + * + * \note At index CTA_METH_RUN in the function list of the class descriptor. + * + * \param objectdata I pointer to user method object data + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_meth_run(Usr_Meth *objectdata, int *retval); + + + +//#define CTA_METH_FREE ( 4) +/** \brief Implementation for freeing the object data and associated resources. + * + * \note At index CTA_METH_FREE in the function list of the class descriptor. + * + * \param objectdata I pointer to user method object data + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_meth_free(Usr_Meth *objectdata, int *retval); + + + + + + + + + + + + + + diff --git a/costa/native/cta/include/cta_usr_model.h b/costa/native/cta/include/cta_usr_model.h new file mode 100644 index 000000000..0b53a6666 --- /dev/null +++ b/costa/native/cta/include/cta_usr_model.h @@ -0,0 +1,437 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_usr_model.h + +\brief In this file a description is given of the interface of user model functions. +When creating your own user model class use the following as template. + +The Usr_Model to which is being referred in this template can be substituted by your own user model object. + +Step 1: for creating your own user model class call the function CTA_Model_DefineClass(). + +Example: + +\code +typedef CTA_Handle CTA_ModelClass; + +CTA_Func h_func[CTA_MODEL_NUMFUNC]; +CTA_ModelClass my_own_model_class; + +ierr=CTA_Func_Create(" ",&usr_model_create_size, hintf, &h_func[I_CTA_MODEL_CREATE_SIZE]); +//...for all implementation functions... + +CTA_Model_DefineClass("classname", h_func, &my_own_model_class);\endcode + +Making a user method class involves the implementation of the following functions: + +CTA_MODEL_CREATE_SIZE \n +CTA_MODEL_CREATE_INIT \n +CTA_MODEL_FREE \n +CTA_MODEL_COMPUTE \n +CTA_MODEL_SET_STATE \n +CTA_MODEL_GET_STATE \n +CTA_MODEL_AXPY_STATE \n +CTA_MODEL_AXPY_MODEL \n +CTA_MODEL_SET_FORC \n +CTA_MODEL_GET_FORC \n +CTA_MODEL_AXPY_FORC \n +CTA_MODEL_SET_PARAM \n +CTA_MODEL_GET_PARAM \n +CTA_MODEL_AXPY_PARAM \n +CTA_MODEL_IMPORT \n +CTA_MODEL_EXPORT \n +CTA_MODEL_GET_STATESCALING + +For creating an implementation function see documentation of CTA_Func_Create(). + +Step 2: to create an object of the newly defined model class call CTA_Model_Create() in the +same way as creating a CTA_Model object but with a different class handle, i.e. the user class handle from step 1 above. + +Example: + +\code +Usr_Model usrmodel; //user model object +CTA_Handle userdata = CTA_NULL; +CTA_Model_Create(my_own_model_class, &userdata, &usrmodel); +\endcode +\n +Note 1: with object data is meant only the object itself including pointer(s) to its contents, but +not the contents themselves.\n\n +*/ + + +//#define CTA_MODEL_CREATE_SIZE +/** \brief Implementation that forms part of the create process. + * + * Must give the memory size of a new user model object. + * + * Example: + * \code +//in header file: +typedef struct { + //your own user object data goes here... +}USR_MODEL; + +//user implementation: +void usr_model_create_size(...){ + *memsize = sizeof(USR_MODEL); + *retval = CTA_OK; +} + \endcode + * + * \note At index I_CTA_MODEL_CREATE_SIZE in the function list of the class descriptor. + * + * \param userdata IO pointer to user data + * \param memsize O must receive the number of bytes which are necessary to store one + user method class, with a pointer to the contents (data), but without the + contents themselves + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_model_create_size(void *userdata, int *memsize, int *retval); + + +//#define CTA_MODEL_CREATE_INIT +/** \brief Implementation that forms part of the create process. + * + * The user model object needs to be made ready for use. + * + * \note At index CTA_MODEL_CREATE_INIT in the function list of the class descriptor. + * + * \param hmodel I handle of own model instance + * \param objectdata IO pointer to object data + * \param userdata IO pointer to user data + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_model_create_init(CTA_Model *hmodel, Usr_Model *objectdata, void *userdata, int *retval); + + +//#define CTA_MODEL_FREE +/** \brief Implementation for freeing the object data and associated resources. + * + * \note At index CTA_MODEL_FREE in the function list of the class descriptor. + * + * \param objectdata IO pointer to object data + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_model_free(Usr_Model *objectdata, int *retval); + + +//#define CTA_MODEL_COMPUTE +/** \brief Implementation for computing a simulation of user model. A simulation is computed at the given time span. + * + * + * + * \note At index CTA_MODEL_COMPUTE in the function list of the class descriptor. + * + * \param objectdata IO pointer to object data + * \param htime I handle of time object describing time span of simulation + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_model_compute(Usr_Model *objectdata, CTA_Time *htime, int *retval); + + +//#define CTA_MODEL_SET_STATE +/** \brief Implementation for setting model state. + * + * \note At index I_CTA_MODEL_SET_STATE in the function list of the class descriptor. + * + * \param objectdata IO pointer to object data + * \param hstate I handle of tree-vector describing new model state + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_model_set_state(Usr_Model *objectdata, CTA_TreeVector *hstate, int *retval); + + +//#define CTA_MODEL_GET_STATE +/** \brief Implementation for getting a model state. + * + * \note At index I_CTA_MODEL_GET_STATE in the function list of the class descriptor. + * + * \param objectdata I pointer to object data + * \param hstate O handle of tree-vector (if CTA_NULL a new object must be created and caller is responsible for freeing) that must receive model state description + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_model_get_state(Usr_Model *objectdata, CTA_Handle *hstate, int *retval); + + +//#define CTA_MODEL_AXPY_STATE +/** \brief Implementation for: modelstate=alpha*state+modelstate + * + * \note At index CTA_MODEL_AXPY_STATE in the function list of the class descriptor. + * + * \note If not implemented, a default implementation will be used. + * \param objectdatay IO pointer to object data of model y + * \param alpha I scalar + * \param hstatex I handle of tree-vector x + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_model_axpy_state(Usr_Model *objectdatay, double *alpha, CTA_TreeVector *hstatex, int *retval); + + +//#define CTA_MODEL_AXPY_MODEL +/** \brief Implementation for: modelstatey=alpha*modelstatex+modelstatey + * + * \note At index CTA_MODEL_AXPY_MODEL in the function list of the class descriptor. + * + * \note If not implemented, a default implementation will be used. + * \param alpha I scalar + * \param objectdatax IO pointer to object data of model x + * \param objectdatay I pointer to object data of model y + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_model_axpy_model(Usr_Model *objectdatay, double *alpha, Usr_Model *objectdatax, int *retval); + + +//#define CTA_MODEL_SET_FORC +/** \brief Implementation for setting the forcing values of the user model + * + * \note At index CTA_MODEL_SET_FORC in the function list of the class descriptor. + * + * \param objectdata IO pointer to object data + * \param tspan I time span on which to set the forcing values + * \param forc I handle of tree-vector object with new forcings + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_model_set_forc(Usr_Model *objectdata, CTA_Time *tspan, CTA_Handle *forc, int *retval); + + +//#define CTA_MODEL_GET_FORC +/** \brief Implementation for getting the forcing values. + * + * \note At index CTA_MODEL_GET_FORC in the function list of the class descriptor. + * \note If forc equals CTA_NULL a new object is created, the caller is responsible for freeing it after usage + * + * \param objectdata I pointer to object data + * \param alpha I scalar + * \param forc I handle of tree-vector object that must receive forcing values (if equal to CTA_NULL, it must be created and caller is responsible for freeing) + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_model_get_forc(Usr_Model *objectdata, double *alpha, CTA_TreeVector *forc, int *retval); + + +//#define CTA_MODEL_AXPY_FORC +/** \brief Implementation for performing axpy operation on the models forcings. + * + * \note At index CTA_MODEL_AXPY_FORC in the function list of the class descriptor. + * + * \note AXPY: y=alpha*x+y. y corresponds to the models + * internal forcings. + * The adjustment to the forcings (alpha*x) is only valid for the given + * time span. Note that the model will use y(t)+x for the given time span + * where y(t) denotes the default forcings of the model. + * + * \param objectdata IO pointer to object data + * \param tspan I handle of time span object + * \param alpha I scalar + * \param forc I handle of forcings x + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_model_axpy_forc(Usr_Model *objectdata, CTA_Time *tspan, int *alpha, CTA_TreeVector *forc, int *retval); + + +//#define CTA_MODEL_SET_PARAM +/** \brief Implementation for setting model parameters. + * + * \note At index CTA_MODEL_SET_PARAM in the function list of the class descriptor. + * + * \param objectdata I pointer to object data + * \param hparam I handle of parameter object + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_model_set_param(Usr_Model *objectdata, CTA_TreeVector *hparam, int *retval); + + +//#define CTA_MODEL_GET_PARAM +/** \brief Implementation for getting model parameters. + * + * \note At index CTA_MODEL_GET_PARAM in the function list of the class descriptor. + * + * \param objectdata I pointer to object data + * \param hparam O handle of object (if equal to CTA_NULL it must be created and caller is responsible for freeing) that must receive model parameters + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_model_get_param(Usr_Model *objectdata, CTA_TreeVector *hparam, int *retval); + + +//#define CTA_MODEL_AXPY_PARAM +/** \brief Implementation for performing axpy operation on the models parameters. + * + * \note At index CTA_MODEL_AXPY_PARAM in the function list of the class descriptor. + * + * \note AXPY: y=alpha*x+y where y corresponds to the models + * internal parameters. + * + * \param objectdata IO pointer to object data + * \param alpha I scalar + * \param hparamx I handle of parameter x that is added to parameters of the model + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_model_axpy_param(Usr_Model *objectdata, double *alpha, CTA_TreeVector *hparamx, int *retval); + +//#define CTA_MODEL_GET_NOISE_COUNT +/** \brief Implementation for returning the number of noise parameters: + * the number of columns of the noise covariance matrix. + * + * \note ONLY for Stochastic models. + * + * \param objectdata IO pointer to object data + * \param nnoise O receives number of noise parameters + * \param retval O error status: CTA_OK if successful + */ +void usr_model_get_noisecount(Usr_Model *objectdata, int *nnoise, int *retval); + +//#define CTA_MODEL_GET_NOISE_COVAR +/** \brief Implementation of returning covariance matrix of noise parameters. + * + * \note ONLY for Stochastic models. + * The covariance matrix is represented by an array + * of tree-vectors (columns of the matrix) + * optionally a tree-vector is created in that case the caller of this + * method is responsible for freeing that tree-vector. The input tree-vector + * must be compatible (same size and or composition) as the models + * internal tree-vector. + * \note If hstmat[icol] == CTA_NULL a new object is created, user is responsible for freeing this object. + * + * \param objectdata IO pointer to object data + * \param hstmat O receives array of tree-vectors, *hstmat can equal CTA_NULL on calling (see note) + * \param retval O error status: CTA_OK if successful + */ +void usr_model_get_noisecovar(Usr_Model *objectdata, CTA_TreeVector *hstmat, int *retval); + +//#define CTA_MODEL_GET_OBSVALUES +/** \brief Implementations to get (interpolate) the models internal state to the + * observations described as specified in the observation + * description component. + * + * \note The interface supports a the time instance for time-interpolation. + * It depends on the model whether and how this is supported. + * + * \param objectdata IO pointer to object data + * \param htime I time instance (for checking and time-interpolation if + * supported by model) + * \param hdescr I observation description component + * \param values O receives values of the models internal state corresponding to + * observations as described in hdescr + * \param retval O error status: CTA_OK if successful + */ +void usr_model_getobsvalues(Usr_Model *objectdata, CTA_Time *htime, CTA_ObsDescr *hdescr, CTA_Vector *values, int *retval); + +//#define CTA_MODEL_GET_OBSSELECT +/** \brief Implements to get a query for the stochastic observer in order to + * filter out the observations that can actually be provided by the model. + * + * \param objectdata IO pointer to object data + * \param htime I time instance + * \param hdescr I observation description component + * \param sselect O receives a query to filter out the observations, must exist before calling + * \param retval O error status: CTA_OK if successful + */ +void usr_model_getobsselect(Usr_Model *objectdata, CTA_Time *htime, CTA_ObsDescr *hdescr, CTA_String *sselect, int *retval); + +//#define CTA_MODEL_ADD_NOISE +/** \brief Add noise during during the given timespan at + * the Compute + * + * \note Noise is added in the compute-method + * + * \param objectdata IO pointer to object data + * \param htime I timespan for which to compute adding noise + * \param retval O error status: CTA_OK if successful + */ +void usr_model_addnoise(Usr_Model *objectdata, CTA_Time *htime, int *retval); + +//#define CTA_MODEL_EXPORT +/** \brief Implements the export the whole internal state of a model + * This export function will export the whole state of the model such that + * a so called "restart" start from this point yielding the same results. + * There are no ruled on the format that is used to store the data. + * Various extra otions are valid but a model will in most cases support an export + * to a file and to a COSTA pack object. + * + * + * \param objectdata IO pointer to object data + * \param hexport I target for export e.g. CTA_File or CTA_Pack + * \param retval O error status: CTA_OK if successful + */ +void usr_model_export(Usr_Model *objectdata, CTA_Time *hexport, int *retval); + +//#define CTA_MODEL_IMPORT +/** \brief Implements the import the whole internal state of a model + * After the inport the models internal state is exactly the same as the point that + * the export was created using CTA_Model_Export. + * + * + * \param objectdata IO pointer to object data + * \param himport I handle with data created by CTA_MODEL_Export e.g. CTA_File or CTA_Pack + * \param retval O error status: CTA_OK if successful + */ +void usr_model_import(Usr_Model *objectdata, CTA_Time *himport, int *retval); + + +//#define cta_model_get_statescaling +/** \brief implements the return of an element wise scaling vector for the + * model state (method cta_model_getstatescaling) + * + * + * \param objectdata io pointer to object data + * \param hscale o tree-vector containing scaling vector. is created + * when hscale==cta_null on input + * \param retval o error status: cta_ok if successful + */ +void usr_model_getstatescaling(Usr_Model *objectdata, CTA_TreeVector *hscale, int *retval); + +//#define CTA_MODEL_GETCURRENTTIME +/** \brief implements the return of the current time of the model instance + * (method CTA_Model_GetCurrentTime) + * + * + * \param objectdata io pointer to object data + * \param tCurrent o Current time of the model instance + * \param retval o error status: cta_ok if successful + */ +void usr_model_getcurrenttime(Usr_Model *objectdata, int* tCurrent, int* retval); + +//#define CTA_MODEL_GETTIMEHORIZON +/** \brief implements the return of the time horizon of the model instance + * (method CTA_Model_GetTimeHorizon) + * + * + * \param objectdata io pointer to object data + * \param tCurrent o Time horizon of the model instance + * \param retval o error status: cta_ok if successful + */ +void usr_model_gettimehorizon(Usr_Model *objectdata, int* tHorizon, int* retval); + + diff --git a/costa/native/cta/include/cta_usr_obs_desc.h b/costa/native/cta/include/cta_usr_obs_desc.h new file mode 100644 index 000000000..a6a5adab2 --- /dev/null +++ b/costa/native/cta/include/cta_usr_obs_desc.h @@ -0,0 +1,220 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_usr_obs_desc.h + +\brief In this file a description is given of the interface of user observation descriptor functions. +When creating your own user observation descriptor class use the following as template. + +The CTA_Usr_ObsDescr to which is being referred in this template can be substituted by your own user observation descriptor object. + +Step 1: for creating your own user model class call the function CTA_Model_DefineClass(). + +Example: + +\code +typedef CTA_Handle CTA_ObsDescrClass; + +CTA_Func h_func[CTA_OBSDESCR_NUMFUNC]; +CTA_ObsDescrClass my_own_obsdescr_class; + +ierr=CTA_Func_Create(" ",&usr_obsdescr_create_size, hintf, &h_func[CTA_OBSDESCR_CREATE_SIZE]); +//...for all implementation functions... + +CTA_ObsDescr_DefineClass("classname", h_func, &my_own_obsdescr_class);\endcode + +Making a new observation descriptor class involves the implementation of the following functions: + +CTA_OBSDESCR_CREATE_SIZE \n +CTA_OBSDESCR_CREATE_INIT \n +CTA_OBSDESCR_FREE \n +CTA_OBSDESCR_GET_PROPERTIES \n +CTA_OBSDESCR_GET_KEYS \n +CTA_OBSDESCR_COUNT_OBSERVATIONS \n +CTA_OBSDESCR_COUNT_PROPERTIES \n +CTA_OBSDESCR_EXPORT + +For creating an implementation function see documentation of CTA_Func_Create(). + +Step 2: to create an object of the newly defined observation descriptor class call CTA_ObsDescr_Create() in the +same way as creating a CTA_ObsDescr object but with a different class handle, i.e. the user class handle from step 1 above. + +Example: + +\code +Usr_ObsDescr usrobsdescr; //user observation descriptor object +CTA_Handle userdata = CTA_NULL; +CTA_ObsDescr_Create(my_own_obsdescr_class, &userdata, &usrobsdescr); +\endcode +\n +Note 1: with object data is meant only the object itself including pointer(s) to its contents, but +not the contents of the observation descriptor.\n\n +*/ + + + +//#define CTA_OBSDESCR_CREATE_SIZE ( 1) +/** \brief Implementation that forms part of the create process. + * + * Must give the memory size of a new object. + * + * Example: + * \code +//in header file: +typedef struct { + //your own user object data goes here... +}USR_OBSDESCR; + +//user implementation: +void usr_obsdescr_create_size(...){ + *memsize = sizeof(USR_OBSDESCR); + *retval = CTA_OK; +} + \endcode + * + * \note At index CTA_OBSDESCR_CREATE_SIZE in the function list of the class descriptor. + * + * \param memsize O must receive the number of bytes which are necessary to store one + user stochastic observer class, with a pointer to the contents (data), but without the + contents themselves + * \param retval O must receive return value of user implementation function + + * \return no return value + */ +void usr_obsdescr_create_size(int *memsize, int *retval); + + +//#define CTA_OBSDESCR_CREATE_INIT ( 2) +/** \brief Implementation that forms part of the create process. + * + * The user observation descriptor object needs to be made ready for use. + * + * \note At index CTA_OBSDESCR_CREATE_INIT in the function list of the class descriptor. + * + * \param myhandle I Handle assigned by COSTA + * \param objectdata I pointer to object data of user stochastic observer + * \param userdata IO user data + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_obsdescr_create_init(CTA_ObsDescr myhandle, Usr_ObsDescr *objectdata, + void *userdata, int *retval); + + +//#define CTA_OBSDESCR_FREE ( 3) +/** \brief Implementation for freeing the object data and associated resources. + * + * \note At index CTA_OBSDESCR_FREE in the function list of the class descriptor. + * + * \param objectdata I pointer to object data of user stochastic observer + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_obsdescr_free(Usr_ObsDescr *objectdata, int *retval); + + +//#define CTA_OBSDESCR_GET_PROPERTIES ( 4) +/** \brief Implementation for gettings properties associated with given key. + * + * \note At index CTA_OBSDESCR_GET_PROPERTIES in the function list of the class descriptor. + * + * \param objectdata I pointer to object data of user stochastic observer + * \param key I description of property key + * \param properties O vector that must receive properties associated with given key; must exist before calling + * \param datatype I data type of elements in properties vector + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_obsdescr_get_properties(Usr_ObsDescr *objectdata, + const char *key, CTA_Vector *properties, CTA_Datatype *datatype, int *retval); + + +//#define CTA_OBSDESCR_GET_KEYS ( 5) +/** \brief Implementation for getting all key names of user observation descriptor. + * + * \note At index CTA_OBSDESCR_GET_KEYS in the function list of the class descriptor. + * + * \param objectdata I pointer to object data of user stochastic observer + * \param keys O handle vector that must receive key descriptions; must exist before calling + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_obsdescr_get_keys(Usr_ObsDescr *objectdata, CTA_Vector *keys, int *retval); + + +//#define CTA_OBSDESCR_COUNT_OBSERVATIONS ( 6) +/** \brief Implementation for counting number of observations. + * + * \note At index CTA_OBSDESCR_COUNT_OBSERVATIONS in the function list of the class descriptor. + * + * \param objectdata I pointer to object data of user stochastic observer + * \param nobs O must receive number of observations + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_obsdescr_count_observations(Usr_ObsDescr *objectdata, int* nobs, int *retval); + + +//#define CTA_OBSDESCR_COUNT_PROPERTIES ( 7) +/** \brief Implementation for counting number of properties. + * + * \note At index CTA_OBSDESCR_COUNT_PROPERTIES in the function list of the class descriptor. + * + * \param objectdata I pointer to object data of user stochastic observer + * \param nkeys O must receive number of property keys + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_obsdescr_count_properties(Usr_ObsDescr *objectdata, int* nkeys, int *retval); + + +//#define CTA_OBSDESCR_EXPORT ( 8) +/** \brief Implementation for exporting user observation descriptor. + * + * \note At index CTA_OBSDESCR_EXPORT in the function list of the class descriptor. + * + * \param objectdata I pointer to object data of user stochastic observer + * \param userdata IO user data + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_obsdescr_export(Usr_ObsDescr *objectdata, void *userdata, int *retval); + + +//#define CTA_OBSDESCR_SELECTION ( 9) +/** \brief Create a new observation description that is subset of existing + * observation description. + * + * \param objectdata I object data of observation description of the + * initial observation description insntance + * \param selection I selection criterion (subset of SQL) + * \param reltab O Relation table specifying the relation between + * the original and new observation description + * component. Note no relation table is created when + * reltab==CTA_NULL on enty + * \param objectdata_out O new observation description created subset + * \param retval O receives return value + */ +void usr_obsdescr_createsel(Usr_ObsDescr *descr, + CTA_String *selection, CTA_RelTable reltab, + CTA_ObsDescr myhandle_out, + Usr_ObsDescr *descrout, int *retval); + + diff --git a/costa/native/cta/include/cta_usr_stoch_observer.h b/costa/native/cta/include/cta_usr_stoch_observer.h new file mode 100644 index 000000000..cca48d63b --- /dev/null +++ b/costa/native/cta/include/cta_usr_stoch_observer.h @@ -0,0 +1,294 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_usr_stoch_observer.h + +\brief In this file a description is given of the interface of user stochastic observer functions. +When creating your own user stochastic observer class use the following as template. + +The Usr_SObs to which is being referred in this template can be substituted by your own user stochastic observer object. + +Step 1: for creating your own user stochastic observer class call the function CTA_SObs_DefineClass(). + +Example: + +\code +typedef CTA_Handle CTA_SObsClass; + +CTA_Func h_func[CTA_SOBS_NUMFUNC]; +CTA_SObsClass my_own_sobs_class; +CTA_ObsDescrClass obsdescrclass; //observation descriptor class that is being used by stochastic observer + +ierr=CTA_Func_Create(" ",&usr_sobs_create_size, hintf, &h_func[CTA_SOBS_CREATE_SIZE]); +//...for all implementation functions... + +CTA_SObs_DefineClass("classname", h_func, obsdescrclass, &my_own_sobs_class);\endcode + + + +Making a user stochastic observer class involves the implementation of the following functions: + +CTA_SOBS_CREATE_SIZE \n +CTA_SOBS_CREATE_INIT \n +CTA_SOBS_FREE \n +CTA_SOBS_CREATE_SELECTION \n +CTA_SOBS_COUNT \n +CTA_SOBS_GET_OBS_DESCRIPTION \n +CTA_SOBS_GET_VALUES \n +CTA_SOBS_GET_REALISATION \n +CTA_SOBS_GET_EXPECTATION \n +CTA_SOBS_EVALUATE_PDF \n +CTA_SOBS_GET_COV_MATRIX \n +CTA_SOBS_GET_VARIANCE \n +CTA_SOBS_EXPORT \n +CTA_SOBS_GET_TIMES \n + +For creating an implementation function see documentation of CTA_Func_Create(). + +Step 2: to create an object of the newly defined stochastic observer class call CTA_SObs_Create() in the +same way as creating a CTA_SObs object but with a different class handle, i.e. the user class handle from step 1 above. + +Example: + +\code +Usr_SObs usrsobs; //user stochastic observer object +CTA_Handle userdata = CTA_NULL; +CTA_SObs_Create(my_own_sobs_class, &userdata, &usrsobs); +\endcode +\n +Note 1: with object data is meant only the object itself including pointer(s) to its contents, but +not the contents of the stochastic observer.\n\n +*/ + + + +//#define CTA_SOBS_CREATE_SIZE ( 1) +/** \brief Implementation that forms part of the create process. + * + * Must give the memory size of a new user stochastic observer object. + * + * Example: + * \code +//in header file: +typedef struct { + //your own user object data goes here... +}USR_SOBS; + +//user implementation: +void usr_sobs_create_size(...){ + *memsize = sizeof(USR_SOBS); + *retval = CTA_OK; +} + \endcode + * + * \note At index CTA_SOBS_CREATE_SIZE in the function list of the class descriptor. + * + * \param memsize O must receive the number of bytes which are necessary to store one + user stochastic observer class, with a pointer to the contents (data), but without the + contents themselves + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_sobs_create_size(int *memsize, int *retval); + + +//#define CTA_SOBS_CREATE_INIT ( 2) +/** \brief Implementation that forms part of the create process. + * + * The user stochastic observer object needs to be made ready for use. + * + * \note At index CTA_SOBS_CREATE_INIT in the function list of the class descriptor. + * + * \param sobsdata I pointer to user object data + * \param userdata IO user data + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_sobs_create_init(Usr_SObs *sobsdata, void *userdata, int *retval); + + +//#define CTA_SOBS_FREE ( 3) +/** \brief Implementation for freeing the object data and associated resources. + * + * \note At index CTA_SOBS_FREE in the function list of the class descriptor. + * + * \param sobsdata IO pointer to user object data + * \param userdata IO user data + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_sobs_free(Usr_SObs *sobsdata, void *userdata, int *retval); + + +//#define CTA_SOBS_CREATE_SELECTION ( 4) +/** \brief Implementation for creating a new stochastic observer that is subset of existing stochastic observer. + * + * \note At index CTA_SOBS_CREATE_SELECTION in the function list of the class descriptor. + * \note CTA_SObs_CreateTimSel() calls this implementation with userdata a handle of a time selection string. + * + * \param sobsdatain I pointer to object data of in-object + * \param userdata IO user data + * \param sobsdataout IO pointer to object data of out-object + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_sobs_create_selection(Usr_SObs *sobsdatain, void *userdata, Usr_SObs *sobsdataout, int *retval); + + +//#define CTA_SOBS_COUNT ( 5) +/** \brief Implementation for counting the number of measures +in user stochastic observer object. + * + * \note At index CTA_SOBS_COUNT in the function list of the class descriptor. + * + * \param sobsdata IO pointer to user object data + * \param nmeasr O must receive number of measures in object + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_sobs_count(Usr_SObs *sobsdata, int *nmeasr, int *retval); + + +//#define CTA_SOBS_GET_OBS_DESCRIPTION ( 6) +/** \brief Implementation for creating the observation description corresponding to the stochastic observer. + * + * \note At index CTA_SOBS_GET_DESCRIPTION in the function list of the class descriptor. + * \note Caller is responsible for freeing the here created observation description + * \note THIS IMPLEMENTATION FUNCTION IS NOT SUPPORTED + * + * \param sobsdata IO pointer to user object data + * \return no return value + */ +void usr_sobs_get_description(Usr_SObs *sobsdata); + + +//#define CTA_SOBS_GET_VALUES ( 7) +/** \brief Implementation for getting all values of user stochastic observer object. + * + * \note At index CTA_SOBS_GET_VALUES in the function list of the class descriptor. + * + * \param sobsdata IO pointer to user object data + * \param hvec O handle of vector that must receive the values; must exist before calling; + must be of appropriate data type + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_sobs_get_values(Usr_SObs *sobsdata, CTA_Vector *hvec, int *retval); + + +//#define CTA_SOBS_GET_REALISATION ( 8) +/** \brief Implementation for calculating stochastic realizations for all the + measurements in a user stochastic observer. + * + * \note At index CTA_SOBS_GET_REALISATION in the function list of the class descriptor. + * + * \param sobsdata I pointer to user object data + * \param hvec O handle of vector that must receive realisation; must exist before calling + * \param retval O must receive return value of implementation function + * \return no return value + */ +void usr_sobs_get_realisation(Usr_SObs *sobsdata, CTA_Vector *hvec, int *retval); + + +//#define CTA_SOBS_GET_EXPECTATION ( 9) +/** \brief Implementation for getting the expectation of the probability density function of the mesurements. + * + * \note At index CTA_SOBS_GET_EXPECTATION in the function list of the class descriptor. + * + * \param sobsdata I pointer to user object data + * \param hvec O handle of vector that must receive expectation; must exist before calling + * \param retval O must receive return value of implementation function + * \return no return value + */ +void usr_sobs_get_expectation(Usr_SObs *sobsdata, CTA_Vector *hvec, int *retval); + + +//#define CTA_SOBS_EVALUATE_PDF (10) +/** \brief Implementation for getting the value of the probability density function of the mesurements at given location. + * + * \note At index CTA_SOBS_EVALUATE_PDF in the function list of the class descriptor. + * + * \param sobsdata I pointer to user object data + * \param location I handle of vector with location for evaluating pdf + * \param pdfvalue O handle of vector that must receive result of evaluation; must exist before calling + * \param retval O must receive return value of implementation function + * \return no return value + */ +void usr_sobs_evaluate_pdf(Usr_SObs *sobsdata, CTA_Vector *location, CTA_Vector *pdfvalue, int *retval); + + +//#define CTA_SOBS_GET_COV_MATRIX (11) +/** \brief Implementation for getting all the variances of the measurements in a + user stochastic observer. + * + * \note At index CTA_SOBS_GET_COV_MATRIX in the function list of the class descriptor. + * + * \param sobsdata I pointer to user object data + * \param hmatrix O handle of matrix object that must receive the covariance matrix; must exist before calling + * \param retval O must receive return value of implementation function + * \return no return value + */ +void usr_sobs_get_cov_matrix(Usr_SObs *sobsdata, CTA_Matrix *hmatrix, int *retval); + + +//#define CTA_SOBS_GET_VARIANCE (12) +/** \brief Implementation for calculating all variances or standard deviations. + * + * \note At index CTA_SOBS_GET_VARIANCE in the function list of the class descriptor. + * + * \param sobsdata I pointer to user object data + * \param variance O handle of vector that must receive variances / standard + deviations; must exist before calling + * \param varflag I variance flag: CTA_TRUE for calculating variances, + CTA_FALSE for standard deviations + * \param retval O must receive return value of implementation function + * \return no return value + */ +void usr_sobs_get_variance(Usr_SObs *sobsdata, CTA_Vector *variance, int *varflag, int *retval); + + +//#define CTA_SOBS_EXPORT (13) +/** \brief Implementation for exporting user stochastic observer object. + * + * \note At index CTA_SOBS_EXPORT in the function list of the class descriptor. + * + * \param sobsdata I pointer to user object data + * \param userdata IO pointer to user data + * \param retval O must receive return value of implementation function + * \return no return value + */ +void usr_sobs_export(Usr_SObs *sobsdata, void *userdata, int *retval); + + +//#define CTA_SOBS_GET_TIMES (14) +/** \brief Implementation for getting all times associated with the measurements. + * + * \note At index CTA_SOBS_GET_TIMES in the function list of the class descriptor. + * + * \param sobsdata I pointer to user object data + * \param times O handle of vector that must receive times; must exist before calling + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_sobs_get_times(Usr_SObs *sobsdata, CTA_Vector *times, int *retval); + + + + diff --git a/costa/native/cta/include/cta_usr_vector.h b/costa/native/cta/include/cta_usr_vector.h new file mode 100644 index 000000000..1e0a3913d --- /dev/null +++ b/costa/native/cta/include/cta_usr_vector.h @@ -0,0 +1,440 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/* parameters for different functions */ +#define CTA_VECTOR_CREATE_SIZE ( 1) +#define CTA_VECTOR_CREATE_INIT ( 2) +#define CTA_VECTOR_GETVALS ( 3) +#define CTA_VECTOR_SETVALS ( 4) +#define CTA_VECTOR_SETCONST ( 5) +#define CTA_VECTOR_SCAL ( 6) +#define CTA_VECTOR_COPY ( 7) +#define CTA_VECTOR_AXPY ( 8) +#define CTA_VECTOR_DOT ( 9) +#define CTA_VECTOR_NRM2 (10) +#define CTA_VECTOR_AMAX (11) +#define CTA_VECTOR_GETMAXLEN (12) +#define CTA_VECTOR_FREE (13) +#define CTA_VECTOR_GETVAL (14) +#define CTA_VECTOR_SETVAL (15) +#define CTA_VECTOR_EXPORT (16) +#define CTA_VECTOR_PRINT_TABLE (17) +#define CTA_VECTOR_APPENDVAL (18) +#define CTA_VECTOR_ELMDIV (19) +#define CTA_VECTOR_IMPORT (20) +#define CTA_VECTOR_NUMFUNC (21) + +/** +\file cta_usr_vector.h + +\brief In this file a description is given of the interface of user vector functions. +When creating your own vector class use the following as template. + +The Usr_Vector to which is being referred in this template can be substituted by your own user vector object. + +Step 1: for creating your own user vector class call the function CTA_Vector_DefineClass(). + +Example: + +\code +typedef CTA_Handle CTA_VectorClass; + +CTA_Func h_func[CTA_VECTOR_NUMFUNC]; +CTA_VectorClass my_own_vector_class; + +ierr=CTA_Func_Create(" ",&usr_vector_create_size, hintf, &h_func[CTA_VECTOR_CREATE_SIZE]); +//...for all implementation functions... + +CTA_Vector_DefineClass("classname", h_func, &my_own_vector_class);\endcode + +Making a user vector class involves the implementation of the following functions: + +CTA_VECTOR_CREATE_SIZE \n +CTA_VECTOR_CREATE_INIT \n +CTA_VECTOR_GETVALS \n +CTA_VECTOR_SETVALS \n +CTA_VECTOR_SETCONST \n +CTA_VECTOR_SCAL \n +CTA_VECTOR_COPY \n +CTA_VECTOR_AXPY \n +CTA_VECTOR_DOT \n +CTA_VECTOR_NRM2 \n +CTA_VECTOR_AMAX \n +CTA_VECTOR_GETMAXLEN \n +CTA_VECTOR_FREE \n +CTA_VECTOR_GETVAL \n +CTA_VECTOR_SETVAL \n +CTA_VECTOR_EXPORT \n +CTA_VECTOR_PRINT_TABLE \n +CTA_VECTOR_APPENDVAL \n +CTA_VECTOR_ELMDIV \n +CTA_VECTOR_IMPORT \n + +For creating an implementation function see documentation of CTA_Func_Create(). + +Step 2: to create an object of the newly defined vector class call CTA_Vector_Create() in the +same way as creating a CTA_Vector but with a different class handle, i.e. the user class handle from step 1 above. + +Example: + +\code +Usr_Vector usrvec; //user vector object +int nelements = 10; +CTA_Datatype datatype = CTAI_String2Type("CTA_STRING"); +CTA_Handle userdata = CTA_NULL; +CTA_Vector_Create(my_own_vector_class, nelements, datatype, &userdata, &usrvec); +\endcode +\n +Note 1: with object data is meant only the object itself including pointer(s) to its contents, but +not the contents of the vector.\n\n +Note 2: vector indices start from 1.\n\n +*/ + + +//CTA_VECTOR_CREATE_SIZE ( 1) +/** \brief Implementation that forms part of the create process. + * + * Must give the memory size of a new object. + * + * Example: + * \code +//in header file: +typedef struct { + //your own user object data goes here... +}USR_VECTOR; + +//user implementation: +void usr_vector_create_size(...){ + *memsize = sizeof(USR_VECTOR); + *retval = CTA_OK; +} + \endcode + * + * \note At index CTA_VECTOR_CREATE_SIZE in the function list of the class descriptor. + * + * \param n I dimension + * \param datatype I data type of the vector elements + * \param userdata IO user data + * \param retval O must receive return value of user implementation function + * \param memsize O must receive the number of bytes which are necessary to store one + user vector class, with a pointer to the contents (data), but without the + contents themselves + * \return no return value + */ +void usr_vector_create_size(int *n, CTA_Datatype *datatype, + void *userdata, int *retval, int *memsize); + + +//CTA_VECTOR_CREATE_INIT ( 2) +/** \brief Implementation that forms part of the create process. + * + * The user vector object needs to be made ready for use. + * + * \note At index CTA_VECTOR_CREATE_INIT in the function list of the class descriptor. + * + * \param objectdata IO pointer to object data of user vector + * \param n I size of vector data + * \param datatype I data type of the vector elements + * \param userdata IO user data + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_vector_create_init(Usr_Vector *objectdata, int *n, CTA_Datatype *datatype, + void *userdata, int *retval); + + +//CTA_VECTOR_GETVALS ( 3) +/** \brief Implementation for retrieving all values of the user vector. + * + * \note At index CTA_VECTOR_GETVALS in the function list of the class descriptor. + * + * \param x I pointer to object data of the user vector + * \param vals O must receive the values of the user vector + * \param n I number of elements in the vals buffer, must equal number of elements in user vector + * \param datatype I data type of elements of vals, must be the same as data type of user vector x + * \param userdata IO user data + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_vector_getvals(Usr_Vector *x, void *vals, int *n, CTA_Datatype *datatype, + void *userdata, int *retval); + + +//CTA_VECTOR_SETVALS ( 4) +/** \brief Implementation for setting all values of the user vector. + * + * \note At index CTA_VECTOR_SETVALS in the function list of the class descriptor. + * + * \param x I pointer to object data of user vector + * \param vals I new values + * \param n I number of elements in the vals buffer, must equal number of elements in user vector + * \param datatype I data type of elements of vals, must be the same as data type of user vector x + * \param userdata IO user data + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_vector_setvals(Usr_Vector *x, void *vals, int *n, CTA_Datatype *datatype, + void *userdata, int *retval); + + +//CTA_VECTOR_SETCONST ( 5) +/** \brief Implementation for setting all values of user vector to same value. + * + * \note At index CTA_VECTOR_SETCONST in the function list of the class descriptor. + * + * \param x I pointer to object data of user vector + * \param val I value to set vector elements to + * \param datatype I data type of elements of vals, must be the same as data type of user vector x + * \param userdata IO user data + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_vector_setconst(Usr_Vector *x, void *val, CTA_Datatype *datatype, + void *userdata, int *retval); + + +//CTA_VECTOR_SCAL ( 6) +/** \brief Implementation for scaling the user vector: x=ax + * + * \note At index CTA_VECTOR_SCAL in the function list of the class descriptor. + * + * \param x IO pointer to object data of user vector + * \param alpha I scaling factor + * \param userdata IO user data + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_vector_scal(Usr_Vector *x, double *alpha, void *userdata, int *retval); + + +//CTA_VECTOR_COPY ( 7) +/** \brief Implementation for copying the user vector. + * + * \note At index CTA_VECTOR_COPY in the function list of the class descriptor. + * + * \param x I pointer to object data of a user vector + * \param y O pointer to object data of user vector y that must receive copy of x; must exist before calling + * \param userdata IO user data + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_vector_copy(Usr_Vector *x, Usr_Vector *y, void *userdata, + int *retval); + + +//CTA_VECTOR_AXPY ( 8) +/** \brief Implementation for applying the BLAS operation axpy: y=y+ax + * + * \note At index CTA_VECTOR_AXPY in the function list of the class descriptor. + * + * \param y IO pointer to object data of user vector y; must exist before calling + * \param alpha I scaling factor + * \param x I pointer to object data of user vector x + * \param userdata IO user data + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_vector_axpy(Usr_Vector *y, double *alpha, Usr_Vector *x, + void *userdata, int *retval); + +//CTA_VECTOR_DOT ( 9) +/** \brief Implementation for calculating the dot product of two user vectors x.y + * + * \note At index CTA_VECTOR_DOT in the function list of the class descriptor. + * + * \param y I pointer to object data of user vector y + * \param x I pointer to object data of user vector x + * \param userdata IO user data + * \param dotprod O must receive the dot product + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_vector_dot(Usr_Vector *x, Usr_Vector *y, void *userdata, + double *dotprod, int *retval); + +//CTA_VECTOR_NRM2 (10) +/** \brief Implementation for calculating the 2-norm of a user vector + * + * \note At index CTA_VECTOR_NRM2 in the function list of the class descriptor. + * + * \param x I pointer to object data of user vector + * \param userdata IO user data + * \param norm2 O must receive the 2-norm + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_vector_nrm2(Usr_Vector *x, void *userdata, double *norm2, int *retval); + +//CTA_VECTOR_AMAX (11) +/** \brief Implementation for getting the index of the element with the largest maximum value. First index is 1. + * + * \note At index CTA_VECTOR_AMAX in the function list of the class descriptor. + * + * \param x I pointer to object data of user vector + * \param userdata IO user data + * \param iloc O must receive index of maximum value + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_vector_amax(Usr_Vector *x, void *userdata, int *iloc, int *retval); + + +//CTA_VECTOR_GETMAXLEN (12) +/** \brief Implementation for getting the maximum length in case of vector with string elements. + * + * \note At index CTA_VECTOR_GETMAXLEN in the function list of the class descriptor. + * + * \param x I pointer to object data of user vector + * \param userdata IO user data + * \param maxlen O must receive maximum length + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_vector_getmaxlen(Usr_Vector *x, void *userdata, int *maxlen, int *retval); + + +//CTA_VECTOR_FREE (13) +/** \brief Implementation for freeing the user vector. + * + * \note At index CTA_VECTOR_FREE in the function list of the class descriptor. + * + * \param x IO pointer to object data of user vector + * \param userdata IO user data + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_vector_free(Usr_Vector *x, void *userdata, int *retval); + + +//CTA_VECTOR_GETVAL (14) +/** \brief Implementation for retrieving a single value of user vector at given index + * + * \note At index CTA_VECTOR_GETVAL in the function list of the class descriptor. + * + * \param x I pointer to object data of user vector + * \param i I index of value to retrieve + * \param val O must receive value; must exist before calling + * \param datatype I data type of *val, must be the same as data type of user vector x + * \param userdata IO user data + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_vector_getval(Usr_Vector *x, int *i, void *val, CTA_Datatype *datatype, + void *userdata, int *retval); + + +//CTA_VECTOR_SETVAL (15) +/** \brief Implementation for setting a single value of a user vector. + * + * \note At index CTA_VECTOR_SETVAL in the function list of the class descriptor. + * + * \param x I pointer to object data of user vector + * \param i I index of value to set + * \param val I new data of value + * \param datatype I data type of *val, must be the same as data type of user vector x + * \param userdata IO user data + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_vector_setval(Usr_Vector *x, int *i, void *val, CTA_Datatype *datatype, + void *userdata, int *retval); + + +//CTA_VECTOR_EXPORT (16) +/** \brief Implementation for exporting a user vector. + * + * \note At index CTA_VECTOR_EXPORT in the function list of the class descriptor. + * \n For example implementation for exporting a user vector to a pack object or to screen. + * + * \param x I pointer to object data of user vector + * \param userdata IO user data + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_vector_export(Usr_Vector *x, void userdata, int *retval); + + +//CTA_VECTOR_PRINT_TABLE (17) +/** \brief Implementation for printing a table. + * + * \note At index CTA_VECTOR_PRINT_TABLE in the function list of the class descriptor. + * + * \param table IO array of vectors that forms the table to be filled + * \param ncolumns I number of columns + * \param formats I user vector + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_vector_print_table(Usr_Vector **table, int *ncolumns, + Usr_Vector * formats, int *retval); + + +//CTA_VECTOR_APPENDVAL (18) +/** \brief Implementation for appending a single value to the user vector. + * + * \note At index CTA_VECTOR_APPENDVAL in the function list of the class descriptor. + * + * \param x IO pointer to object data of user vector + * \param val I value to append + * \param datatype I data type of value to append, must be the same as data type of user vector x + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_vector_appendval(Usr_Vector *x, void *val, CTA_Datatype *datatype, + int *retval); + + +//CTA_VECTOR_ELMDIV (19) +/** \brief Implementation for element-wise division of user vector. + * + * \note At index CTA_VECTOR_ELMDIV in the function list of the class descriptor. + * + * \param y IO pointer to object data of receiving user vector + * \param x I pointer to object data of user vector x + * \param userdata IO user data + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_vector_elmdiv(Usr_Vector *y, CTAI_Vector_blas *x, + void *userdata, int *retval); + + + +//CTA_VECTOR_IMPORT (20) +/** \brief Implementation for importing a user vector. + * + * \note At index CTA_VECTOR_IMPORT in the function list of the class descriptor. + * + * \param x O pointer to object data of user vector that must receive import result + * \param userdata IO user data + * \param retval O must receive return value of user implementation function + * \return no return value + */ +void usr_vector_import(Usr_Vector *x, void userdata, int *retval); + + + + + + + + + + + diff --git a/costa/native/cta/include/cta_util_methods.h b/costa/native/cta/include/cta_util_methods.h new file mode 100644 index 000000000..f96211336 --- /dev/null +++ b/costa/native/cta/include/cta_util_methods.h @@ -0,0 +1,90 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_util_methods.h +\brief Utility routines for creating data assimilation methods. +\note This is just a set of utility routines it does not define any COSTA component + +*/ + +#ifndef CTA_UTIL_METHODS_H +#define CTA_UTIL_METHODS_H + +#ifdef __cplusplus +extern "C" { +#endif + +/** \brief print the predicted values and the observed values + * + * \param fgModel O Model of foreground run (with data + * assimilation) or Vector with the predicted + * values of the foreground run. + * if set to CTA_NULL NaN will be printed as result + * \param bgModel I Model of background run (without data + * assimilation) or Vector with the predicted + * values of the background run. + * if set to CTA_NULL NaN will be printed as result + * \param sObs I Stochastic observer + * \param time I Corresponding time + * \param file I Output file (note CTA_FILE_STDOUT prints to screen) + * \param printHeader I Print header CTA_TRUE/CTA_FALSE + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Util_MethodsPrintObservations(CTA_Handle fgModel, + CTA_Handle bgModel, CTA_StochObs sObs, CTA_Time time, CTA_File file, + int printHeader); + +/** \brief Make an initial selection of the observations + * + * The selection of observations is based on the given simulation timespan + * and the criterion provided by the model (CTA_Model_GetObsSelect) + * + * \note sObsSel is created and should be freed by the caller of this routine + * + * \param model I Model (for CTA_Model_GetObsSelect) + * \param sObsAll I All observations + * \param spanSim I Simulation timespan + * \param sObsSel O Selection of observations + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Util_MethodsSelectObservations(CTA_Model model, CTA_StochObs sObsAll, CTA_Time spanSim, CTA_StochObs *sObsSel); + +/** \brief Create an output file for filter predictions at station + * locations + * + * The routine CTA_Util_MethodsPrintObservations can be used for writing + * the results. + * \note the header is written by this call + * + * \param stationFile (I) Name of result file + * \param fStationFile (O) Handle to result file + * + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Util_MethodsOpenResultFile(char *stationFile, + CTA_File *fStationFile); + +#ifdef __cplusplus +} +#endif +#endif + diff --git a/costa/native/cta/include/cta_util_sort.h b/costa/native/cta/include/cta_util_sort.h new file mode 100644 index 000000000..2833bdc16 --- /dev/null +++ b/costa/native/cta/include/cta_util_sort.h @@ -0,0 +1,53 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2007 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_util_sort.h +\brief Interface to the sorting utility functions of COSTA. + This header file handles the C/FORTRAN interfacing +*/ + +#ifndef CTA_UTILSORT_H +#define CTA_UTILSORT_H +#include "cta_system.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** \brief Sort an integer array using the Quicksort algorithm. + * An additional interger array is permutated in way as the unsorted array + * + * + * \param list (input/output) array, dimension (N) + * On entry, the array to be sorted. + * On exit, list has been sorted into increasing order + * + * \param indx (input/output) array, dimension (N) + + * \param n (input) INTEGER + * The length of the array D. + * + */ +CTAEXPORT void CTA_Util_IQSort2(int *list, int *indx, int n); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/costa/native/cta/include/cta_util_sqlite3.h b/costa/native/cta/include/cta_util_sqlite3.h new file mode 100644 index 000000000..d6a5677a9 --- /dev/null +++ b/costa/native/cta/include/cta_util_sqlite3.h @@ -0,0 +1,91 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_util_sqlite3.h +\brief Callback functions used for SQL access. +\note This is just a set of routines it does not define any COSTA component +*/ + +#ifndef CTA_UTIL_SQLITE3_H +#define CTA_UTIL_SQLITE3_H +#include +#include "cta_string.h" +#include "cta_datatypes.h" + +typedef struct { +int index; +int dimension; +CTA_Datatype datatype; +void* values; +} CTAI_counter_vector; + +typedef struct { +sqlite3 *db; +int nusers; +char *name; +} CTAI_util_sqlite3_database; + +#ifdef __cplusplus +extern "C" { +#endif + +/** \brief Utility for returning values. Callback-function for sqlite3. + * + * \param vout I See SQLITE3 documentation + * \param argc I See SQLITE3 documentation + * \param argv I See SQLITE3 documentation + * \param azColName I See SQLITE3 documentation + * \return non-zero if an error occured (see SQLITE3 documentation) + */ +int CTAI_util_sqlite3_return_values(void *vout, + int argc, char **argv, char **azColName); + + +/** \brief Utility for selecting values. Callback-function for sqlite3. + * + * \param out O selected values + * \param nout I dimension of the output array + * \param datatype I data type of the output array + * \param db I the database + * \param selection I the column-name which is returned + * \param condition I the selection creterion + * \return non-zero if an error occured (see SQLITE3 documentation) + */ +int CTAI_util_sqlite3_select_values( void *out, + int nout, CTA_Datatype datatype, sqlite3 *db, + const char *selection, const char *condition); + + +/** \brief Utility for returning keys. Callback-function for sqlite3. + * + * \param n_keys O number of keys in table + * \param Keys O Keys of table + * \param db I pointer to sql database + * \param condition I the selection creterion + * \return non-zero if an error occured (see SQLITE3 documentation) + */ +int CTAI_util_sqlite3_return_keys( int *n_keys, CTA_String **Keys, + sqlite3 *db, const char *condition); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/costa/native/cta/include/cta_util_statistics.h b/costa/native/cta/include/cta_util_statistics.h new file mode 100644 index 000000000..edeff84b0 --- /dev/null +++ b/costa/native/cta/include/cta_util_statistics.h @@ -0,0 +1,62 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_util_statistics.h +\brief Utility for calculating a realization of a standard normal distribution. +\note This is just a set of utility routines it does not define any COSTA component + +*/ + +#ifndef CTA_UTIL_STATISTICS_H +#define CTA_UTIL_STATISTICS_H +#include "cta_system.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** \brief Initialize the random generator. + * \note only initialize the random generator once. + * \param seed I some positive initial seed + */ +CTAEXPORT void CTA_rand_seed(int seed); + + +/** \brief Get an uniform random number from the interval [0 1]. + * + * \param x O receives the random number + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_rand_u(double *x); + + +/** \brief Get a random number from a normal distribution whit mean 0 and + * standard deviation 1. + * + * \param x O receives the random number + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_rand_n(double *x); + +#ifdef __cplusplus +} +#endif +#endif + diff --git a/costa/native/cta/include/cta_vector.h b/costa/native/cta/include/cta_vector.h new file mode 100644 index 000000000..d166b7577 --- /dev/null +++ b/costa/native/cta/include/cta_vector.h @@ -0,0 +1,363 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_vector.h +\brief Interface description of the default COSTA vector componennt. For user implementation see cta_usr_vector.h. + +Basic functions and utilities for vector operations on CTA_VECTOR objects. +*/ + +#ifndef CTA_VECTOR_H +#define CTA_VECTOR_H +#include "cta_system.h" +#include "cta_handles.h" +#include "cta_datatypes.h" +#include "cta_functions.h" + +/* Function Handle */ +typedef CTA_Handle CTA_Vector; +typedef CTA_Handle CTA_VecClass; + +/* parameters for different functions */ +#define I_CTA_VECTOR_CREATE_SIZE ( 1) +#define I_CTA_VECTOR_CREATE_INIT ( 2) +#define I_CTA_VECTOR_GETVALS ( 3) +#define I_CTA_VECTOR_SETVALS ( 4) +#define I_CTA_VECTOR_SETCONST ( 5) +#define I_CTA_VECTOR_SCAL ( 6) +#define I_CTA_VECTOR_COPY ( 7) +#define I_CTA_VECTOR_AXPY ( 8) +#define I_CTA_VECTOR_DOT ( 9) +#define I_CTA_VECTOR_NRM2 (10) +#define I_CTA_VECTOR_AMAX (11) +#define I_CTA_VECTOR_GETMAXLEN (12) +#define I_CTA_VECTOR_FREE (13) +#define I_CTA_VECTOR_GETVAL (14) +#define I_CTA_VECTOR_SETVAL (15) +#define I_CTA_VECTOR_EXPORT (16) +#define I_CTA_VECTOR_PRINT_TABLE (17) +#define I_CTA_VECTOR_APPENDVAL (18) +#define I_CTA_VECTOR_ELMDIV (19) +#define I_CTA_VECTOR_IMPORT (20) +#define I_CTA_VECTOR_ELMPROD (21) +#define I_CTA_VECTOR_ELMSQRT (22) +#define I_CTA_VECTOR_NUMFUNC (23) + +// CTA_Vector_Create Create a new COSTA-vector: +// CTA_Vector_DefineClass ... + +/* function interfaces */ +#ifdef __cplusplus +extern "C" { +#endif +/** \brief Create a new class (=implementation) of a COSTA vector component. + * + * \param name I name of the new vector class + * \param h_func I COSTA function handles for functions that implement class. + * Missing functions must have value CTA_NULL + * \param hveccl O receives handle of new vector class + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Vector_DefineClass( + const char *name, + const CTA_Func h_func[I_CTA_VECTOR_NUMFUNC], + CTA_VecClass *hveccl + ); + +/** \brief Duplicate a vector object. + * + * \note Only size, data type and type (class) are duplicated, the content is not + * copied. + * + * \param hvector1 I handle of vector to be duplicated + * \param hvector2 O receives handle of new duplicate vector, empty before calling + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Vector_Duplicate(CTA_Vector hvector1, CTA_Vector *hvector2); + +/** \brief Create a new vector. + * + * \note + * + * \param hveccl I vector class of new vector + * \param n I number of elements + * \param datatype I data type of elements in vector + * \param userdata IO user data for creation (depends on class) + * \param hvector O receives handle of new vector + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Vector_Create(CTA_VecClass hveccl, const int n, CTA_Datatype datatype, + CTA_Handle userdata, CTA_Vector *hvector); + +/** \brief Get size of vector. + * + * \param hvec I handle of vector + * \param n O receives number of elements + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Vector_GetSize(CTA_Vector hvec, int *n); + +/** \brief Get a copy of a single element in the vector. + * + * \param hvec I handle of vector + * \param i I index of element + * \param vals O receives copy of value in vector, must exist before calling + * \param datatype I data type of *vals, must be the same as data type of elements in vector + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Vector_GetVal(CTA_Vector hvec, int i, void *vals, + CTA_Datatype datatype); + +/** \brief Get a copy of all elements in the vector. + * + * \param hvec I handle of vector + * \param vals O receives copy of all elements in vector, must exist before calling + * \param n I length of array vals + * \param datatype I data type of *vals, must be the same as data type of elements in vector + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Vector_GetVals(CTA_Vector hvec, void *vals, int n, + CTA_Datatype datatype); + +/** \brief Set a copy of an element in the vector. + * + * \note The value is copied in the vector. + * + * \param hvec IO handle of vector + * \param i I index of element + * \param val I new value that is copied into element at given index + * \param datatype I data type of *val, must be the same as data type of elements in vector + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Vector_SetVal(CTA_Vector hvec, int i, void *val, + CTA_Datatype datatype); + +/** \brief Append a copy of an element to a vector. + * + * \note The value is copied into the vector, size of vector is increased. + * + * \param hvec IO handle of vector + * \param val I value that needs to be added to vector + * \param datatype I data type of *val, must be the same as data type of elements in vector + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Vector_AppendVal(CTA_Vector hvec, const void *val, CTA_Datatype datatype); + +/** \brief Set all elementes in the vector. + * + * \note The values are copied in the vector. + * + * \param hvec IO handle of vector + * \param vals I values that need to be copied to vector + * \param n I number of elements in vals + * \param datatype I data type of *vals, must be the same as data type of elements in vector + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Vector_SetVals(CTA_Vector hvec, void *vals, int n, + CTA_Datatype datatype); + +/** \brief Scale a vector. + * + * \note scale: x=alpha*x + * + * \param hvec IO handle of vector + * \param alpha I scalar + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Vector_Scal(CTA_Vector hvec, double alpha); + +/** \brief Copy a vector + * + * \note This function copies the vector content (y=x), but does not make a new vector. + * + * \param hvec_x I handle of sending vector + * \param hvec_y O handle of receiving vector + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Vector_Copy(CTA_Vector hvec_x, CTA_Vector hvec_y); + +/** \brief Operation axpy for two vectors x and y. + * + * \note axpy: y=alpha*x+y + * + * \param hvec_y IO handle of vector y + * \param alpha I scalar + * \param hvec_x I handle of vector x + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Vector_Axpy(CTA_Vector hvec_y, double alpha, CTA_Vector hvec_x); + +/** \brief Dot product operation of two vectors. + * + * \note dot: dotprod=x^t*y + * + * \param hvec_x I handle of vector x + * \param hvec_y I handle of vector y + * \param dotprod O receives dot product + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Vector_Dot(CTA_Vector hvec_x, CTA_Vector hvec_y, double *dotprod); + +/** \brief Compute 2-norm of vector. + * + * \note 2-norm: sqrt(x^t*x) + * + * \param hvec_x I handle of vector x + * \param norm2 O receives 2-norm + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Vector_Nrm2(CTA_Vector hvec_x, double *norm2); + +/** \brief Find index of element in vector with largest absolute value + * + * \param hvec_x I handle of vector + * \param iloc O receives index of largest absolute value + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Vector_Amax(CTA_Vector hvec_x, int *iloc); + +/** \brief Find largest length of elements in vector. + * + * \note e.g. length of string in a vector of strings + * + * \param hvec_x I handle of vector + * \param maxlen O receives largest length of elements in vector + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Vector_GetMaxLen( + CTA_Vector hvec_x, /* Handle of the first vector */ + int *maxlen + ); + +/** \brief Export a vector to file, stdout or pack object. + * + * \Note CTA_DEFAULT_VECTOR supports exporting to:\n + * file (usrdata a handle of COSTA file)\n + * pack object (usrdata a handle of COSTA pack object)\n + * + * \param hvec I handle of vector + * \param usrdata I configuration of output + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Vector_Export( + CTA_Vector hvec, /* Handle of the vector */ + CTA_Handle usrdata + ); + +/** \brief Import a vector. + * + * \note CTA_DEFAULT_VECTOR supports importing from pack object (usrdata[0] a pack handle). + * \note Data type and size of vector can be changed due to this action + * + * \param hvec I handle of vector + * \param usrdata I configuration of output + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Vector_Import( CTA_Vector hvec, CTA_Handle usrdata); + + +/** \brief Print table, each column built up by a vector. + * + * \note CTA_DEFAULT_VECTOR TODO + * + * \param table I array of vector handles, these vectors form the table to be filled + * \param ncolumns I number of columns in table + * (number of vector handles in table) + * \param vformats I handle of vector of string (size ncolumns) containing the + * formats for printing each column (C-format) + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Vector_Print_Table(CTA_Vector* table, int ncolumns, + CTA_Vector vformats); + +/** \brief Free the vector object. + * + * + * \Note hvec_x=CTA_NULL is allowed + * + * \param hvec_x IO handle of vector, replaced by CTA_NULL on return + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Vector_Free(CTA_Vector *hvec_x); + + +/** \brief Get type of vector. + * + * \param hvec I handle of vector + * \param datatype O receives data type of vector + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Vector_GetDatatype( + CTA_Vector hvec, /* Handle of the vector */ + CTA_Datatype *datatype /* Returned data type */ + ); + +/** \brief Set whole vector to one single value. + * + * \Note hvec=CTA_NULL is allowed, function returns error code + * + * \param hvec IO handle of vector + * \param val I value that must be set + * \param datatype I data type of *val, must be the same as data type of elements in vector + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Vector_SetConstant( + CTA_Vector hvec, /* Handle of the vector */ + void *val, /* value that must be set */ + CTA_Datatype datatype /* Data type */ + ); + +/** \brief Element wise division + * + * \note y=x./y + * + * \param hvec_y IO handle of vector y + * \param hvec_x I handle of vector x + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Vector_ElmDiv(CTA_Vector hvec_y, CTA_Vector hvec_x); + + +/** \brief Element wise product + * + * \note y=x.y + * + * \param hvec_y IO handle of vector y + * \param hvec_x I handle of vector x + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Vector_ElmProd(CTA_Vector hvec_y, CTA_Vector hvec_x); + +/** \brief Element wise square root + * + * \note y=y.^0.5 + * + * \param hvec_x IO handle of vector y + * \return error status: CTA_OK if successful + */ +CTAEXPORT int CTA_Vector_ElmSqrt(CTA_Vector hvec_x); + + + + +#ifdef __cplusplus +} +#endif +#endif diff --git a/costa/native/cta/include/cta_vector_blas.h b/costa/native/cta/include/cta_vector_blas.h new file mode 100644 index 000000000..370af0910 --- /dev/null +++ b/costa/native/cta/include/cta_vector_blas.h @@ -0,0 +1,322 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef CTA_VECTOR_BLAS_H +#define CTA_VECTOR_BLAS_H + +#include "cta_f77blas.h" +#include "cta_datatypes.h" +#include "cta_handles.h" +#include "cta_vector.h" +#include "cta_functions.h" +#include "cta_pack.h" + +/** +\file cta_vector_blas.h +\brief Interface description of the COSTA BLAS-vector component (a standard implementation class of CTA_Vector) that can be used for standard BLAS operations. + + +BLAS matrix object data: + +\code +typedef struct { +CTA_Datatype datatype; +void *values; +int n; +int size; +} CTAI_Vector_blas; +\endcode +\n + + +*/ + +typedef struct { +CTA_Datatype datatype; +void *values; +int n; // total number of elements +int nDimensions; // total number of dimensions +int dimensions[7]; // number of elements in each dimension +int size; +} CTAI_Vector_blas; + +#ifdef __cplusplus +extern "C" { +#endif +/** \brief Define the BLAS vector class. Use the resulting class object when calling CTA_Vector_Create(). + * + * \param hveccl IO receives class BLAS vector class object, must be empty before calling + * \return no return value + */ +CTANOEXPORT void CTA_Vector_blas_initialise(CTA_VecClass *hveccl); + + +/** \brief Part of the create process, get size of a new object. + * + * \param n O receives the size of a new vector blas object + * \param datatype I data type + * \param usrdata IO user data (not being used) + * \param retval O receives return value of the function, CTA_OK if succesful + * \param memsize O the number of bytes which are necessary to store one + CTAI_Vector_blas, with a pointer to the contents (data), but without the + contents themselves + * \return no return value + */ +void CTAI_Vector_Create_Size(int *n, CTA_Datatype *datatype, + CTA_Handle *usrdata, int *retval, int *memsize); + + +/** \brief Part of the create process, initialize vector object. + * + * \param x IO pointer to object data of user vector + * \param n I size of vector data + * \param datatype I data type of the vector elements + * \param usrdata IO user data (not being used) + * \param retval O receives return value + * \return no return value + */ +void CTAI_Vector_Create_Init(CTAI_Vector_blas *x, int *n, + CTA_Datatype *datatype, + CTA_Handle *usrdata, int *retval); + + +/** \brief Retrieve all values of the BLAS vector. + * + * \param x I pointer to object data of the vector + * \param vals O receives the values of the vector + * \param n I number of elements in the vals buffer, must equal number of elements in BLAS vector + * \param datatype I data type of elements of vals, must be the same as data type of BLAS vector x + * \param retval O receives return value + * \return no return value + */ +void CTAI_Vector_getvals(CTAI_Vector_blas *x, void *vals, int *n, CTA_Datatype *datatype, int *retval); + + +/** \brief Retrieve a single value of the BLAS vector at given index. + * + * \param x I pointer to object data of BLAS vector + * \param i I index of value to retrieve + * \param val O receives value; must exist before calling + * \param datatype I data type of *val, must be the same as data type of BLAS vector x + * \param retval O receives return value + * \return no return value + */ +void CTAI_Vector_getval(CTAI_Vector_blas *x, int *i, void *val, CTA_Datatype *datatype, int *retval); + + +/** \brief Set all values of a BLAS vector. + * + * \param x I pointer to object data of BLAS vector + * \param vals I new values + * \param n I number of elements in the vals buffer, must equal number of elements in BLAS vector + * \param datatype I data type of elements of vals, must be the same as data type of BLAS vector x + * \param retval O receives return value + * \return no return value + */ +void CTAI_Vector_setvals(CTAI_Vector_blas *x, void *vals, int *n, CTA_Datatype *datatype, int *retval); + + +/** \brief Set a single value of a BLAS vector. + * + * \param x I pointer to object data of BLAS vector + * \param i I index of value to set + * \param val I new data of value + * \param datatype I data type of *val, must be the same as data type of BLAS vector x + * \param retval O receives return value + * \return no return value + */ +void CTAI_Vector_setval(CTAI_Vector_blas *x, int *i, void *val, CTA_Datatype *datatype, int *retval); + + +/** \brief Set all values of BLAS vector to the same value. + * + * \param x I pointer to object data of BLAS vector + * \param val I value to set vector elements to + * \param datatype I data type of elements of vals, must be the same as data type of BLAS vector x + * \param retval O receives return value + * \return no return value + */ +void CTAI_Vector_setconst(CTAI_Vector_blas *x, void *val, CTA_Datatype *datatype, int *retval); + + +/** \brief Scale the BLAS vector: x=ax + * + * \param x IO pointer to object data of user vector + * \param alpha I scalar + * \param retval O receives return value + * \return no return value + */ +void CTAI_Vector_scal(CTAI_Vector_blas *x, double *alpha, int *retval); + + +/** \brief Copy the BLAS vector. + * + * \param x I pointer to object data of a BLAS vector + * \param y O pointer to object data of BLAS vector y that receives copy of x; must exist before calling + * \param retval O receives return value + * \return no return value + */ +void CTAI_Vector_copy(CTAI_Vector_blas *x, CTAI_Vector_blas *y, int *retval); + + +/** \brief Apply the BLAS operation axpy: y=y+a*x + * + * \param y IO pointer to object data of BLAS vector y; must exist before calling + * \param alpha I scalar + * \param x I pointer to object data of BLAS vector x + * \param retval O receives return value + * \return no return value + */ +void CTAI_Vector_axpy(CTAI_Vector_blas *y, double *alpha, CTAI_Vector_blas *x, int *retval); + + +/** \brief Calculate the dot product of two BLAS vectors: x.y + * + * \param y I pointer to object data of BLAS vector y + * \param x I pointer to object data of BLAS vector x + * \param dotprod O receives the dot product + * \param retval O receives return value + * \return no return value + */ +void CTAI_Vector_dot(CTAI_Vector_blas *x, CTAI_Vector_blas *y, double *dotprod, int *retval); + + +/** \brief Calculate the 2-norm of a BLAS vector + * + * \param x I pointer to object data of BLAS vector + * \param norm2 O receives 2-norm + * \param retval O receives return value + * \return no return value + */ +void CTAI_Vector_nrm2(CTAI_Vector_blas *x, double *norm2, int *retval); + + +/** \brief Get the index of the maximum vector value. First index is 1. + * + * \param x I pointer to object data of BLAS vector + * \param iloc O receives index of maximum value + * \param retval O receives return value + * \return no return value + */ +void CTAI_Vector_amax(CTAI_Vector_blas *x, int *iloc, int *retval); + + +/** \brief Get the maximum length in case of BLAS vector with string elements. + * + * \param x I pointer to object data of BLAS vector + * \param maxlen O receives maximum length + * \param retval O receives return value + * \return no return value + */ +void CTAI_Vector_GetMaxLen(CTAI_Vector_blas *x, int *maxlen, int *retval); + +/** \brief Export a BLAS vector. + * + * \note Supports exporting float, double, int and CTA_String values.\n + * Possibilities: + * Export to file (usrdata contains handle of COSTA file).\n + * Export to pack object (usrdata contains handle of COSTA pack object).\n + * + * + * \param x I pointer to object data of BLAS vector + * \param usrdata IO user data + * \param retval O receives return value + * \return no return value + */ +void CTAI_Vector_export(CTAI_Vector_blas *x, CTA_Handle *usrdata, int *retval); + + +/** \brief Import a BLAS vector. + * + * \note Supported:\n + Import from pack object (usrdata contains handle of COSTA pack object).\n + * + * \param x O pointer to object data of BLAS vector that receives import result + * \param usrdata IO user data + * \param retval O receives return value + * \return no return value + */ +void CTAI_Vector_import(CTAI_Vector_blas *x, CTA_Handle *usrdata, int *retval); + + +/** \brief Print table. TODO + * + * \param table IO + * \param ncolumns I number of columns + * \param formats I user vector + * \param retval O receives return value + * \return no return value + */ +void CTAI_Vector_Print_Table(CTAI_Vector_blas **table, int *ncolumns, + CTAI_Vector_blas * formats, int *retval); + + +/** \brief Append single value to the BLAS vector. + * + * \param x IO pointer to object data of BLAS vector + * \param val I value to append + * \param datatype I data type of value to append, must be the same as data type of user vector x + * \param retval O receives return value + * \return no return value + */ +void CTAI_Vector_appendval(CTAI_Vector_blas *x, void *val, CTA_Datatype *datatype, + int *retval); + + +/** \brief Free the BLAS vector object data and associated resources. + * + * \param x IO pointer to object data of BLAS vector + * \param retval O receives return value + * \return no return value + */ +void CTAI_Vector_free(CTAI_Vector_blas *x, int *retval); + + +/** \brief Element-wise division of BLAS vector. + * + * \param y IO pointer to object data of receiving BLAS vector + * \param x I pointer to object data of user vector x + * \param retval O receives return value + * \return no return value + */ +void CTAI_Vector_elmdiv(CTAI_Vector_blas *y, CTAI_Vector_blas *x, int *retval); + +/** \brief Element-wise product of BLAS vector. + * + * \param y IO pointer to object data of receiving BLAS vector + * \param x I pointer to object data of user vector x + * \param retval O receives return value + * \return no return value + */ +void CTAI_Vector_elmprod(CTAI_Vector_blas *y, CTAI_Vector_blas *x, int *retval); + +/** \brief Element-wise square root of BLAS vector. + * + * \param y IO pointer to object data of receiving BLAS vector + * \param retval O receives return value + * \return no return value + */ +void CTAI_Vector_elmsqrt(CTAI_Vector_blas *y, int *retval); + + +#ifdef __cplusplus +} +#endif +#endif + diff --git a/costa/native/cta/include/cta_xml.h b/costa/native/cta/include/cta_xml.h new file mode 100644 index 000000000..9ac327728 --- /dev/null +++ b/costa/native/cta/include/cta_xml.h @@ -0,0 +1,57 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_xml.h +\brief Utilities for XML access: reading/writing of trees from/to a XML file. +\note This is just a set of utility routines it does not define any COSTA component + +*/ + +#ifndef CTA_XML_H +#define CTA_XML_H +#include "cta_system.h" +#include "cta_tree.h" +#include "cta_string.h" + +/* tree functions */ +#ifdef __cplusplus +extern "C" { +#endif +/** \brief Read a COSTA XML file into a new tree. + * + * \param hfname I file name of XML file to read + * \param hroot O handle of a new COSTA tree + * \return CTA_OK if successful + */ +CTAEXPORT int CTA_XML_Read(CTA_String hfname, CTA_Tree *hroot); + + +/** \brief Write a tree to a COSTA XML file + * + * \param hfname I file name of XML file to write + * \param hroot I handle of a COSTA tree + * \return CTA_OK if successful + */ +CTAEXPORT int CTA_XML_Write(CTA_String hfname, CTA_Tree hroot); + +#ifdef __cplusplus +} +#endif +#endif /* CTA_XML_H */ diff --git a/costa/native/cta/include/ctai.h b/costa/native/cta/include/ctai.h new file mode 100644 index 000000000..ae6266632 --- /dev/null +++ b/costa/native/cta/include/ctai.h @@ -0,0 +1,37 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file ctai.h + +\brief Definition of MIN(a,b) and MAX(a,b) +*/ + +#ifndef NL_VORTECH_CTAI_H +#define NL_VORTECH_CTAI_H + +#ifndef MAX +#define MAX(a,b) (a>b ? a: b) +#endif + +#ifndef MIN +#define MIN(a,b) (a +#include +#include +#include +#include + + +/* COSTA-specific element properties */ +#define CTAI_XML_DATABASE (xmlChar*)("database") +#define CTAI_XML_SELECT (xmlChar*)("select") +#define CTAI_XML_TIMEOFFSET (xmlChar*)("timeoffset") +#define CTAI_XML_ID (xmlChar*)("id") +#define CTAI_XML_START (xmlChar*)("start") +#define CTAI_XML_STEP (xmlChar*)("step") +#define CTAI_XML_STOP (xmlChar*)("stop") +#define CTAI_XML_TAG (xmlChar*)("tag") +#define CTAI_XML_NAME (xmlChar*)("name") +#define CTAI_XML_DATATYPE (xmlChar*)("datatype") +#define CTAI_XML_DIMENSION (xmlChar*)("dimension") +#define CTAI_XML_VALUES (xmlChar*)("values") +#define CTAI_XML_VECTOR (xmlChar*)("vector") +#define CTAI_XML_VALUE (xmlChar*)("value") +#define CTAI_XML_LIBRARY (xmlChar*)("library") +#define CTAI_XML_FUNCTION (xmlChar*)("function") +#define CTAI_XML_UNIT (xmlChar*)("unit") +#define CTAI_XML_MISSINGVALUE (xmlChar*)("missingValue") +#define CTAI_XML_GRID (xmlChar*)("grid") +#define CTAI_XML_CAPTION (xmlChar*)("caption") +#define CTAI_XML_LENGTH (xmlChar*)("length") +#define CTAI_XML_EXCLUDEFROMVECTOR (xmlChar*)("excludeFromVector") +#define CTAI_XML_IMPLEMENTS (xmlChar*)("implements") +#define CTAI_XML_NPROC (xmlChar*)("nproc") +#define CTAI_XML_PARALLEL_TYPE (xmlChar*)("parallel_type") +#define CTAI_XML_SPAWN_WORKERS (xmlChar*)("spawn_workers") +#define CTAI_XML_NTIMES (xmlChar*)("ntimes") +#define CTAI_XML_DUMPROCS (xmlChar*)("dumproc") +#define CTAI_XML_SOBSCLASSNAME (xmlChar*)("class") +#define CTAI_XML_FLAG_BARRIER (xmlChar*)("flag_barrier") +#define CTAI_XML_T_STEP (xmlChar*)("T_step") + +#define MY_ENCODING (xmlChar*)("ISO-8859-1") + + + +#endif /* CTA_XML_H */ diff --git a/costa/native/cta/include/f_cta_utils.h b/costa/native/cta/include/f_cta_utils.h new file mode 100644 index 000000000..a7b44eea2 --- /dev/null +++ b/costa/native/cta/include/f_cta_utils.h @@ -0,0 +1,61 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file f_cta_utils.h + +\brief String utility functions for FORTRAN / C compatibility. +*/ + +#include +#include "cta_system.h" +#include "cta_datatypes.h" +#include "cta_errors.h" + +#ifndef F_CTA_UTILS_H +#define F_CTA_UTILS_H +#ifdef __cplusplus +extern "C" { +#endif + +/** \brief Create C string from FORTRAN string + * + * \param f_str I pointer to FORTRAN string + * \param c_str O buffer for receiving C string + * \param len_f I length of FORTRAN string + * + * \return error status: always CTA_OK (no internal error check) + */ +int CTA_fstr2cstr(char *f_str,char *c_str, int len_f); + +/** \brief Create FORTRAN string from C string + * + * \param c_str I pointer to C string + * \param f_str O buffer for receiving FORTRAN string + * \param len_f I length of C string + * + * \return error status: always CTA_OK (no internal error check) + */ +int CTA_cstr2fstr(char *c_str,char *f_str, int len_f); + + +#ifdef __cplusplus +} +#endif +#endif diff --git a/costa/native/cta/include/modbuild_sp_model_template.h b/costa/native/cta/include/modbuild_sp_model_template.h new file mode 100644 index 000000000..9694f2165 --- /dev/null +++ b/costa/native/cta/include/modbuild_sp_model_template.h @@ -0,0 +1,123 @@ +/* +COSTA: Problem solving environment for data assimilation +Copyright (C) 2007 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_modbuild_sp_model_template.h + +\brief Interfaces of the user-routines that can be provided in order to create a model using the SP-modelbuilder + +Copy this file to your_model.c and fill in the routines. + + +*/ +#include "cta.h" +#include "f_cta_utils.h" + +#define CREATEFUNC_F77 CF77_CALL(usr_createfunc,USR_CREATEFUNC) + + + + +/** \brief Creates a new model instance + * This routine creates and initialises a new model instance. + * + * \param hinput I Model configuration CTA_Tree of CTA_String + * \param state O Model state (initialized to initial value. Note this statevector must be created + * \param sbound O State-vector for the offset on the forcings. Set CTA_NULL if not used. Note this statevector must be created + * \param nnoise O The number of noise parameters in model state. Set 0 in case of a deterministic model + * \param time0 O Time instance of the initial state state. The time object is already created + * \param snamnoise O Name of the substate containing the noise parameters. The string is already created + * \param husrdata O Handle that can be used for storing instance specific data + * \param ierr O Return flag CTA_OK if successful +*/ +void usr_create(CTA_Handle *hinput, CTA_TreeVector *state, CTA_TreeVector *sbound, + CTA_TreeVector *sparam, int *nnoise, CTA_Time *time0, + CTA_String *snamnoise, CTA_Handle *husrdata, int *ierr); + +/** \brief Compute timestep(s) + * This routine is computes several timesteps over a giving timespan. + * + * \param timespan I Timespan to simulate + * \param state IO State vector + * \param saxpyforc I Offset on models forcings + * \param baddnoise I flag (CTA_TRUE/CTA_FALSE) whether to add noise + * \param sparam I Model parameters + * \param husrdata IO Instance specific data + * \param ierr O Return flag CTA_OK if successful +*/ +void usr_compute(CTA_Time *timespan, CTA_TreeVector *state, CTA_TreeVector *saxpyforc, + int *baddnoise, CTA_TreeVector *sparam, CTA_Handle *husrdata, + int *ierr); + +/** \brief Return the root of noise covariance matrix + * This routine is responsible for returning the covariance matrix of the noise parameters. + * + * \param colsvar O Covariance of noise parameters array of noise. + * Represented als an array (nnoise) of tree-vectors. + * Note the sub-tree-vectors are already allocated. + * \param nnoise I Number of noise parameters + * \param husrdata IO Instance specific data + * \param ierr O Return flag CTA_OK if successful +*/ +void usr_covar(CTA_TreeVector *colsvar, int *nnoise, CTA_Handle *husrdata, int *ierr); + +/** \brief Return values that correspond to observed values + * This routine is responsible for the transformation of the state-vector to the observations. + * + * \param state I state vector + * \param hdescr I Observation description of observations + * \param vval O Model (state) values corresponding to observations in hdescr + * \param husrdata IO Instance specific data + * \param ierr O Return flag CTA_OK if successful +*/ +void usr_obs(CTA_TreeVector *state, CTA_ObsDescr *hdescr, CTA_Vector *vval, + CTA_Handle *husrdata, int *ierr); + + +/** \brief Select criterion for observations that can be used for the model. + * This routine is responsible for producing a selection criterion that will filter out all invalid observations. + * Invalid observations are observations for which the model cannot produce a corresponding value. For example + * observations that are outside the computational domain. + * + * \param state I state vector + * \param ttime I timespan for selection + * \param hdescr I observation description of all available observations + * \param sselect O The select criterion to filter out all invalid observations + * \param husrdata IO Instance specific data + * \param ierr O Return flag CTA_OK if successful +*/ +void usr_obssel(CTA_TreeVector *state, CTA_Time *ttime, CTA_ObsDescr *hdescr, + CTA_String *sselect, CTA_Handle *husrdata, int* ierr); + + +void usr_SP_Model_CreateFunc(){ +CTA_Intf hintf; +CTA_Func func; + hintf = CTA_NULL; + CTA_Func_Create("usr_create", usr_create, hintf, &func); + CTA_Func_Create("usr_compute", usr_compute, hintf, &func); + CTA_Func_Create("usr_covar", usr_covar, hintf, &func); + CTA_Func_Create("usr_obs", usr_obs, hintf, &func); + CTA_Func_Create("usr_obssel", usr_obssel, hintf, &func); +} + +void CREATEFUNC_F77(){ + usr_CreateFunc(); +} + diff --git a/costa/native/cta/src/CMakeLists.txt b/costa/native/cta/src/CMakeLists.txt new file mode 100644 index 000000000..afd3dcd6a --- /dev/null +++ b/costa/native/cta/src/CMakeLists.txt @@ -0,0 +1,23 @@ +cmake_minimum_required(VERSION 3.9.1) + +set(LIBRARY_OUTPUT_PATH ${CMAKE_BINARY_DIR}/lib) +set(SOURCES + cta_array.c cta_matrix_blas.c cta_obsdescr_combine.c cta_sobs_combine.c cta_util_sort.c + cta_datatypes.c cta_mem.c cta_obsdescr_maori.c cta_sobs_factory.c cta_util_sqlite3.c + cta_datetime.c cta_message.c cta_obsdescr_netcdf.c cta_sobs_maori.c cta_util_statistics.c + cta_defaults.c cta_metainfo.c cta_obsdescr_sqlite3.c cta_sobs_netcdf.c cta_vector.c + cta_file.c cta_method.c cta_obsdescr_table.c cta_sobs_sqlite3.c cta_vector_blas.c + cta_flush.c cta_modbuild_par.c cta_obsdescr_user.c cta_sobs_user.c cta_xml.c + cta_functions.c cta_modbuild_sp.c cta_pack.c cta_string.c f_cta_defaults.c + cta_handles.c cta_model.c cta_par.c cta_time.c f_cta_utils.c + cta_initialise.c cta_model_factory.c cta_reltable.c cta_tree.c + cta_interface.c cta_model_utilities.c cta_resultwriter.c cta_treevector.c + cta_matrix.c cta_obsdescr.c cta_sobs.c cta_util_methods.c +) + +add_library(cta SHARED ${SOURCES}) +target_include_directories(cta PUBLIC ../include) +target_include_directories(cta PUBLIC ${CMAKE_SOURCE_DIR}/external) + +#target_link_libraries(cta INTERFACE libxml) + diff --git a/costa/native/cta/src/cta_array.c b/costa/native/cta/src/cta_array.c new file mode 100644 index 000000000..064ac8db2 --- /dev/null +++ b/costa/native/cta/src/cta_array.c @@ -0,0 +1,1293 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/openda_1/public/trunk/core/native/src/cta/cta_time.c $ +$Revision: 2751 $, $Date: 2011-09-09 08:58:46 +0200 (Fri, 09 Sep 2011) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2012 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include +#include +#include "cta_array.h" + +#include "cta_mem.h" +#include "f_cta_utils.h" +#include "cta_errors.h" +#include "cta_message.h" + +#define CTA_ARRAY_FREE_F77 F77_CALL(CTA_ARRAY_FREE,CTA_ARRAY_FREE) +#define CTA_ARRAY_CREATEASDOUBLES_F77 F77_CALL(CTA_ARRAY_CREATEASDOUBLES,CTA_ARRAY_CREATEASDOUBLES) +#define CTA_ARRAY_GETNDIMENSIONS_F77 F77_CALL(CTA_ARRAY_GETNDIMENSIONS,CTA_ARRAY_GETNDIMENSIONS) +#define CTA_ARRAY_GETDIMENSIONS_F77 F77_CALL(CTA_ARRAY_GETDIMENSIONS,CTA_ARRAY_GETDIMENSIONS) +#define CTA_ARRAY_LENGTH_F77 F77_CALL(CTA_ARRAY_LENGTH,CTA_ARRAY_LENGTH) +#define CTA_ARRAY_GETVALUESASDOUBLES_F77 F77_CALL(CTA_ARRAY_GETVALUESASDOUBLES,CTA_ARRAY_GETVALUESASDOUBLES) +#define CTA_ARRAY_GETVALUESASDOUBLES_INDEXRANGE_F77 F77_CALL(CTA_ARRAY_GETVALUESASDOUBLES_INDEXRANGE,CTA_ARRAY_GETVALUESASDOUBLES_INDEXRANGE) +#define CTA_ARRAY_GETVALUEASDOUBLES_INDEX_F77 F77_CALL(CTA_ARRAY_GETVALUEASDOUBLES_INDEX,CTA_ARRAY_GETVALUEASDOUBLES_INDEX) +#define CTA_ARRAY_GETVALUEASDOUBLE_INDICES_F77 F77_CALL(CTA_ARRAY_GETVALUEASDOUBLE_INDICES,CTA_ARRAY_GETVALUEASDOUBLE_INDICES) +#define CTA_ARRAY_SETVALUESASDOUBLES_F77 F77_CALL(CTA_ARRAY_SETVALUESASDOUBLES,CTA_ARRAY_SETVALUESASDOUBLES) +#define CTA_ARRAY_SETVALUEASDOUBLE_INDICES_F77 F77_CALL(CTA_ARRAY_SETVALUEASDOUBLE_INDICES,CTA_ARRAY_SETVALUEASDOUBLE_INDICES) +#define CTA_ARRAY_SETVALUESASDOUBLES_INDEXRANGE_F77 F77_CALL(CTA_ARRAY_SETVALUESASDOUBLES_INDEXRANGE,CTA_ARRAY_SETVALUESASDOUBLES_INDEXRANGE) +#define CTA_ARRAY_SETVALUEASDOUBLE_INDEX_F77 F77_CALL(CTA_ARRAY_SETVALUEASDOUBLE_INDEX,CTA_ARRAY_SETVALUEASDOUBLE_INDEX) +#define CTA_ARRAY_SETCONSTANT_F77 F77_CALL(CTA_ARRAY_SETCONSTANT,CTA_ARRAY_SETCONSTANT) +#define CTA_ARRAY_AXPYONVALUES_F77 F77_CALL(CTA_ARRAY_AXPYONVALUES,CTA_ARRAY_AXPYONVALUES) +#define CTA_ARRAY_MULTIPLYVALUES_F77 F77_CALL(CTA_ARRAY_MULTIPLYVALUES,CTA_ARRAY_MULTIPLYVALUES) +#define CTA_ARRAY_RESHAPE_F77 F77_CALL(CTA_ARRAY_RESHAPE,CTA_ARRAY_RESHAPE) +#define CTA_ARRAY_GETSLICE_F77 F77_CALL(CTA_ARRAY_GETSLICE,CTA_ARRAY_GETSLICE) +#define CTA_ARRAY_GETSLICE_RANGE_F77 F77_CALL(CTA_ARRAY_GETSLICE_RANGE,CTA_ARRAY_GETSLICE_RANGE) +#define CTA_ARRAY_GETSLICEASDOUBLES_RANGE_F77 F77_CALL(CTA_ARRAY_GETSLICEASDOUBLES_RANGE,CTA_ARRAY_GETSLICEASDOUBLES_RANGE) +#define CTA_ARRAY_SETSLICEASDOUBLES_F77 F77_CALL(CTA_ARRAY_SETSLICEASDOUBLES,CTA_ARRAY_SETSLICEASDOUBLES) +#define CTA_ARRAY_SETSLICEASARRAY_F77 F77_CALL(CTA_ARRAY_SETSLICEASARRAY,CTA_ARRAY_SETSLICEASARRAY) +#define CTA_ARRAY_SETSLICEASARRAY_RANGE_F77 F77_CALL(CTA_ARRAY_SETSLICEASARRAY_RANGE,CTA_ARRAY_SETSLICEASARRAY_RANGE) +#define CTA_ARRAY_SETSLICEASDOUBLES_RANGE_F77 F77_CALL(CTA_ARRAY_SETSLICEASDOUBLES_RANGE,CTA_ARRAY_SETSLICEASDOUBLES_RANGE) +#define CTA_ARRAY_VALUEINDEX_F77 F77_CALL(CTA_ARRAY_VALUEINDEX,CTA_ARRAY_VALUEINDEX) + + +#define CLASSNAME "CTA_Array" + +/** + * This is an implementation of an array of doubles with multiple dimensions, a is eg available in fortran or + * matlab. It is written as an interface to allow for other implementations with highly optimized code or using parallel + * computing. + * + * Assumptions: + * - The ordering of double[] java-arrays is with the first dimension running slowest, eg the array [[1,2],[3,4],[5,6]] + * is returned/set as [1,2,3,4,5,6] , this is consistent with conventions in c or java. + * - Indices start at 0, consistent with c/java conventions. + * - The index for dimension starts counting at 0, thus 0 denotes the first dimension. + * + * @author nils van velzen + * + */ + +/* Struct holding all data associated to an native array */ +typedef struct { + double *values; + int n; + int nValues; + int *dimensions; + int nDimensions; +} CTAI_Array; + +#define MaxDimensions (20) + + +/* Some utility routines */ + +#undef METHOD +#define METHOD "CTAI_Array_getData" +int CTAI_Array_getData(CTA_Array h, CTAI_Array **cthis){ + + int retval=CTA_Handle_Check((CTA_Handle) h,CTA_ARRAY); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_array handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) h,(void **) cthis); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + return CTA_OK; +} + + +#undef METHOD +#define METHOD "CTAI_Array_getData" +int CTAI_Array_valueIndex(CTAI_Array *cthis, int nIndices, int *indices){ + + int blockSize=1; + int result=0; + int i; + + if (cthis->nDimensions!=nIndices){ + char msg[120]; + sprintf(msg, "number of indices (%d) does not match dimensions of array (%d)",nIndices, cthis->nDimensions); + CTA_WRITE_ERROR(msg); + return 0; + } + + for(i=cthis->nDimensions-1;i>=0;i--){ + int indexI=indices[i]; + if(indexI<0){indexI+=cthis->dimensions[i];} + result+=indexI*blockSize; + blockSize*=cthis->dimensions[i]; + } + return result; +} + + + + +int CTAI_Array_Create(CTA_Array *h){ + CTAI_Array *cthis; + int retval; + + cthis=CTA_Malloc(sizeof(CTAI_Array)); + cthis->values=NULL; + cthis->n=0; + cthis->nValues=0; + cthis->nDimensions=0; + retval=CTA_Handle_Create("array",CTA_ARRAY,cthis,h); + if (retval) { + CTA_WRITE_ERROR("Cannot create time handle"); + CTA_Free(cthis); + return retval; + } + return CTA_OK; +} + +int CTA_Array_Free(CTA_Array *h){ + + CTAI_Array *cthis; + int ierr=CTAI_Array_getData(*h, &cthis); + if (ierr==CTA_OK){ + if (cthis->values){ + CTA_Free(cthis->values); + } + CTA_Free(cthis); + ierr=CTA_Handle_Free(h); + } + return ierr; +} + + +int CTAI_Array_CreateDim(int nDimensions, int *dimensions, CTA_Array *h){ + CTAI_Array *cthis; + + int ierr=CTAI_Array_Create(h); + + ierr=CTAI_Array_getData(*h, &cthis); + if (ierr==CTA_OK){ + int i; + int n=dimensions[0]; + cthis->nDimensions=nDimensions; + cthis->dimensions=CTA_Malloc(nDimensions*sizeof(int)); + for (i=0;idimensions[i]=dimensions[i]; + } + for (i=1;in=n; + cthis->values=CTA_Malloc(n*sizeof(double)); + } + else { + CTA_WRITE_ERROR("Handle is not correct"); + return ierr; + } + return CTA_OK; +} + +int CTA_Array_CreateAsDoubles(double *values, int nDimensions, int *dimensions, CTA_Array *h){ + CTAI_Array *cthis; + int ierr; + CTAI_Array_CreateDim(nDimensions, dimensions, h); + ierr=CTAI_Array_getData(*h, &cthis); + if (ierr==CTA_OK){ + int i; + for (i=0;in;i++){ + cthis->values[i]=values[i]; + } + } + else { + CTA_WRITE_ERROR("Handle is not correct"); + } + return ierr; +} + + + + //storage array + + /** + * Constructor based on java arrays as input. + * @param values + * @param dimensions + * @param copyValues + */ +//// public Array(double[] values, int[] dimensions, boolean copyValues){ +//// //check size +//// int rank=dimensions.length; +// int numberOfValues=dimensions[0]; +// for(int i=1;i valuesList= new ArrayList(); +// int[] counter = new int[maxDimensions]; +// int[] dims = new int[maxDimensions]; +// int curDim=-1; //current dimension +// int rank=0; +// while(stringIndexdims[curDim]) dims[curDim]=counter[curDim]; +// counter[curDim]=0; +// curDim--; +// if(curDim<-1){ +// throw new RuntimeException("Too many closing brackets in array at position="+stringIndex +// +" in string="+source); +// } +// stringIndex++; +// }else if(source.charAt(stringIndex)==','){ +// counter[curDim]++; +// stringIndex++; +// }else{ //try to find a number +// int indexEnd=source.indexOf('}', stringIndex); +// // if a comma comes first +// int indexComma=source.indexOf(',', stringIndex); +// if((indexComma>=0)&(indexCommanDimensions; + } + else { + CTA_WRITE_ERROR("Handle is not correct"); + return 0; + } +} + + +/** + * Get the number of array dimensions + * @return size of array, eg 2 for the array [[3,4,10],[3,4,10]] + */ +#undef METHOD +#define METHOD "CTA_Array_getnDimensions" +int CTA_Array_getnDimensions(CTA_Array h, int *nDimensions){ + CTAI_Array *cthis=NULL; + int ierr=CTAI_Array_getData(h, &cthis); + if (ierr==CTA_OK){ + *nDimensions=cthis->nDimensions; + } + else { + CTA_WRITE_ERROR("Handle is not correct"); + } + return ierr; +} + + +/** + * Get the size of the array for each dimension. The length of the return value equals + * the number of dimensions. + * @return size of array, eg [3,4,10] + */ +#undef METHOD +#define METHOD "CTA_Array_getDimensions" +int CTA_Array_getDimensions(CTA_Array h, int *dimensions){ + CTAI_Array *cthis=NULL; + int ierr=CTAI_Array_getData(h, &cthis); + if (ierr==CTA_OK){ + int i; + for (i=0; inDimensions; i++){ + dimensions[i]=cthis->dimensions[i]; + } + } + else { + CTA_WRITE_ERROR("Handle is not correct"); + } + return ierr; +} + +/** + * Total number of elements in an array. this is equal to the product of the values + * returned by getDimension and is thus just for c + * @return + */ +#undef METHOD +#define METHOD "CTA_Array_length" +int CTA_Array_length(CTA_Array h){ + CTAI_Array *cthis=NULL; + int ierr=CTAI_Array_getData(h, &cthis); + if (ierr==CTA_OK){ + return cthis->n; + } + else { + CTA_WRITE_ERROR("Handle is not correct"); + } + return ierr; +} + +/** + * Get all values of an array. The values are guaranteed be a copy if + * copyValues==true or may be a reference if copyValues==false. + * Note that the values are never guaranteed to be a reference and can not be used + * to change the array. + * @param copyValues + * @return + */ +#undef METHOD +#define METHOD "CTA_Array_getValuesAsDoubles" +int CTA_Array_getValuesAsDoubles(CTA_Array h, double *values){ + CTAI_Array *cthis=NULL; + int ierr=CTAI_Array_getData(h, &cthis); + if (ierr==CTA_OK){ + int i; + for (i=0; in; i++){ + values[i]=cthis->values[i]; + } + } + else { + CTA_WRITE_ERROR("Handle is not correct"); + } + return ierr; +} + +/** + * Get a part of the values from the array. + * Eg let a=[[1,2,3][4,5,6]] then getValuesAsDoubles(1,2) + * returns [2,3] + * Note that negative values are allowed to denote counting from the end, is -1 denotes the last value. + * @param firstIndex + * @param lastIndex + * @return + */ +#undef METHOD +#define METHOD "CTA_Array_getValuesAsDoubles_indexrange" +int CTA_Array_getValuesAsDoubles_indexrange(CTA_Array h, int firstIndex, int lastIndex, double *values){ + CTAI_Array *cthis=NULL; + int ierr=CTAI_Array_getData(h, &cthis); + if (ierr==CTA_OK){ + int resultLength; + int i; + int n=cthis->n; + if(firstIndex<0){firstIndex+=n;} + if(lastIndex<0){lastIndex+=n;} + resultLength=lastIndex-firstIndex+1; + for (i=0;ivalues[firstIndex+i]; + } + } + else { + CTA_WRITE_ERROR("Handle is not correct"); + } + return ierr; +} + +/** + * Get a value from an array for specific indices. + * Eg let a=[[1,2,3][4,5,6]] then getValueAsDouble(5) + * returns 6 + * Note that negative values are allowed to denote counting from the end, is -1 denotes the last value. + * @param indices index specifier + * @return double value at the specific indices + */ + +#undef METHOD +#define METHOD "CTA_Array_getValueAsDoubles_index" +int CTA_Array_getValueAsDoubles_index(CTA_Array h, int index, double *value) { + CTAI_Array *cthis=NULL; + int ierr=CTAI_Array_getData(h, &cthis); + if (ierr==CTA_OK){ + int n=cthis->n; + if(index<0){index+=n;} + *value=cthis->values[index]; + } + else { + CTA_WRITE_ERROR("Handle is not correct"); + } + return ierr; +} + + +/** + * Get a value from an array for specific indices. + * Eg let a=[[1,2,3][4,5,6]] then getValueAsDouble([1,2]) + * returns 6 + * Note that negative values are allowed to denote counting from the end, is -1 denotes the last value. + * @return + */ +#undef METHOD +#define METHOD "CTA_Array_getValueAsDoubles_indices" +int CTA_Array_getValueAsDouble_indices(CTA_Array h, int nIndices, int* indices, double *value){ + CTAI_Array *cthis=NULL; + int ierr=CTAI_Array_getData(h, &cthis); + if (ierr==CTA_OK){ + int index = CTAI_Array_valueIndex(cthis, nIndices, indices); + *value=cthis->values[index]; + } + else { + CTA_WRITE_ERROR("Handle is not correct"); + } + return ierr; +} + +/** + * Set the values of this array. + * @param values the values as an array of doubles + */ +#undef METHOD +#define METHOD "CTA_Array_setValuesAsDoubles" +int CTA_Array_setValuesAsDoubles(CTA_Array h, double *values, int length) { + CTAI_Array *cthis=NULL; + int ierr=CTAI_Array_getData(h, &cthis); + if (ierr==CTA_OK){ + if (cthis->n==length){ + int i; + for (i=0;ivalues[i]=values[i]; + } + } + else { + char msg[120]; + sprintf(msg,"The length of the array is %d but the user tries to set %d values\n",cthis->n,length); + CTA_WRITE_ERROR(msg); + } + } + else { + CTA_WRITE_ERROR("Handle is not correct"); + } + return ierr; +} + +/** + * Set a value from an array for specific indices. + * Eg let a=[[1,2,3][4,5,6]] then setValueAsDouble([1,2],60.) + * results in a=[[1,2,3][4,5,60.]] + * Note that negative values are allowed to denote counting from the end, is -1 denotes the last value. + * @param indices index specifier + * @param value at the specific indices + */ +#undef METHOD +#define METHOD "CTA_Array_setValueAsDouble_indices" +int CTA_Array_setValueAsDouble_indices(CTA_Array h, int nIndices, int *indices, double value){ + CTAI_Array *cthis=NULL; + int ierr=CTAI_Array_getData(h, &cthis); + if (ierr==CTA_OK){ + int index = CTAI_Array_valueIndex(cthis, nIndices, indices); + cthis->values[index]=value; + } + else { + CTA_WRITE_ERROR("Handle is not correct"); + } + return ierr; +} + +/** + * Set part of the values of this array. + * Let a=[[1,2,3][4,5,6]] then setValuesAsDoubles(1,2,[20,30]) will result in a=[[1,20,30][4,5,6]] + * Note that negative values are allowed to denote counting from the end, is -1 denotes the last value. + * @param firstIndex specifies the start of the selection + * @param lastIndex specifies the end of the selection + * @param values that will replace the selected range of numbers + */ +#undef METHOD +#define METHOD "CTA_Array_setValuesAsDoubles_indexrange" +int CTA_Array_setValuesAsDoubles_indexrange(CTA_Array h, int firstIndex, int lastIndex, double *values){ + CTAI_Array *cthis=NULL; + int ierr=CTAI_Array_getData(h, &cthis); + if (ierr==CTA_OK){ + int resultLength; + int i; + int n=cthis->n; + if(firstIndex<0){firstIndex+=n;} + if(lastIndex<0){lastIndex+=n;} + resultLength=lastIndex-firstIndex+1; + for (i=0;ivalues[firstIndex+i]=values[i]; + } + } + else { + CTA_WRITE_ERROR("Handle is not correct"); + } + return ierr; +} + + /** + * Get a value from an array for specific indices. + * Eg let a=[[1,2,3][4,5,6]] then setValueAsDouble(5,60.) + * results in a=[[1,2,3][4,5,60.]] + * Note that negative values are allowed to denote counting from the end, is -1 denotes the last value. + * @param indices index specifier + * @param value at the specific indices + */ +#undef METHOD +#define METHOD "CTA_Array_setValueAsDouble_index" +int CTA_Array_setValueAsDouble_index(CTA_Array h, int index, double value) { + CTAI_Array *cthis=NULL; + int ierr=CTAI_Array_getData(h, &cthis); + if (ierr==CTA_OK){ + int n=cthis->n; + if(index<0){index+=n;} + cthis->values[index]=value; + } + else { + CTA_WRITE_ERROR("Handle is not correct"); + } + return ierr; +} + +/** + * Set whole vector equal to a constant value. + *

+ * Note: This method can only be used if all elements of the vector + * have the same data type. + * + * @param value value to set + */ +#undef METHOD +#define METHOD "CTA_Array_setConstant" +int CTA_Array_setConstant(CTA_Array h, double value) { + CTAI_Array *cthis=NULL; + int ierr=CTAI_Array_getData(h, &cthis); + if (ierr==CTA_OK){ + int i; + for (i=0;in;i++){ + cthis->values[i]=value; + } + } + else { + CTA_WRITE_ERROR("Handle is not correct"); + } + return ierr; +} + +/** + * Perform a values += alpha * axpyValues operation on each value in this array. + * @param alpha the alpha in state variable += alpha * vector. + * @param axpyValues the values for the axpy-operation on all values in this array. + */ +#undef METHOD +#define METHOD "CTA_Array_axpyOnValues" +int CTA_Array_axpyOnValues(CTA_Array h, double alpha, double *axpyValues) { + CTAI_Array *cthis=NULL; + int ierr=CTAI_Array_getData(h, &cthis); + if (ierr==CTA_OK){ + int i; + for (i=0;in;i++){ + cthis->values[i] += alpha*axpyValues[i]; + } + } + else { + CTA_WRITE_ERROR("Handle is not correct"); + } + return ierr; +} + +/** + * Multiply each value in this array with the corresponding multiplication factor. + * @param multiplicationFactors the multiplication factors for all array values. + */ +#undef METHOD +#define METHOD "CTA_Array_multiplyValues" +int CTA_Array_multiplyValues(CTA_Array h, double* multiplicationFactors) { + CTAI_Array *cthis=NULL; + int ierr=CTAI_Array_getData(h, &cthis); + if (ierr==CTA_OK){ + int i; + for (i=0;in;i++){ + cthis->values[i] *= multiplicationFactors[i]; + } + } + else { + CTA_WRITE_ERROR("Handle is not correct"); + } + return ierr; +} + +/** + * Change the dimensions of an array. The new array should have the same length + * @param dimensions + */ +#undef METHOD +#define METHOD "CTA_Array_reshape" +int CTA_Array_reshape(CTA_Array h, int nDimensions, int *dimensions){ + CTAI_Array *cthis=NULL; + int ierr=CTAI_Array_getData(h, &cthis); + if (ierr==CTA_OK){ + int n=dimensions[0]; + int i; + for(i=1;in){ + char msg[120]; + sprintf(msg,"Can not reshape to a different length. Length %d to %d.",cthis->n, n); + CTA_WRITE_ERROR(msg); + return CTA_DIMENSION_ERROR; + } + else { + int iDim; + cthis->nDimensions=nDimensions; + for (iDim=0; iDimdimensions[i]=dimensions[i]; + } + } + } + else { + CTA_WRITE_ERROR("Handle is not correct"); + } + return ierr; +} + + +///** +// * Is it allowed to write beyond the current end of the array. Some implementations, especially ones writing to disk +// * can increase the size of the first dimension of the array. +// * +// * Multiple growing dimensions requires moving the existing data if it implemented with a one-dimensional storage, +// * eg as double[]. This is NOT SUPPORTED. +// * +// * @return +// */ +//// boolean allowsGrowingFirstDimension(){ +//// return false; +////} +// + + + + + +/** + * Get part of the array by selection of a subset in one dimension. + * Eg. a=[[1,2,3],[4,5,6]] a.getSlice(0,0) returns [1,2,3] + * Note that the number of dimensions IS reduced by one. + * @param dimension + * @param index + * @return + */ +#undef METHOD +#define METHOD "CTA_Array_getSlice" +int CTA_Array_getSlice(CTA_Array h, int dimension, int index, CTA_Array *h_out){ + + CTAI_Array *cthis=NULL; + CTAI_Array *cthis_out=NULL; + int ierr; + + *h_out=CTA_NULL; + ierr=CTAI_Array_getData(h, &cthis); + + if (ierr==CTA_OK){ + // indexing eg A(:,: ,index,:,: ) + // A(left,at ,right) + int nDimsLeft=dimension; + int dimsLeft[MaxDimensions]; + int dims[MaxDimensions]; + int rankRight; + int dimsRight[MaxDimensions]; + int nDims; + int i; + for (i=0; idimensions[i]; + dims[i]=dimsLeft[i]; + } + rankRight=cthis->nDimensions-dimension-1; + for (i=0;idimensions[dimension+1+i]; + dims[i+dimension]=dimsRight[i]; + } + + nDims=nDimsLeft+rankRight; + ierr=CTAI_Array_CreateDim(nDims, dims, h_out); + if (ierr==CTA_OK){ + ierr=CTAI_Array_getData(*h_out, &cthis_out); + } + if (ierr==CTA_OK){ + + // prepare for copy + int blockSize = 1; + int offset; + int i; + int srcPos; + int destPos; + int stride; + int count; + + for(i=0;idimensions[dimension]*blockSize; + count = 1; + for(i=0;ivalues[destPos+j]=cthis->values[srcPos+j]; + } + srcPos+=stride; + destPos+=blockSize; + } + } + else { + CTA_WRITE_ERROR("Cannot allocate return array"); + } + } + else { + CTA_WRITE_ERROR("Handle is not correct"); + } + return ierr; +} + +/** + * Get part of the array by selection of a subset in one dimension. + * Eg. a=[[1,2,3],[4,5,6],[7,8,9]] a.getSlice(0,0,1) returns [[1,2],[3,4]] + * Note that the number of dimensions is NOT reduced by one. + * @param dimension + * @param minIndex + * @param maxIndex + * @return + */ +#undef METHOD +#define METHOD "CTA_Array_getSlice_range" +int CTA_Array_getSlice_range(CTA_Array h, int dimension, int minIndex, int maxIndex, CTA_Array *h_out){ + // indexing eg A(:,: ,index,:,: ) + // A(left,at ,right) + CTAI_Array *cthis=NULL; + CTAI_Array *cthis_out=NULL; + int ierr=CTAI_Array_getData(h, &cthis); + if (ierr==CTA_OK){ + int rankRight; + int dimsLeft[MaxDimensions]; + int dimsRight[MaxDimensions]; + int dims[MaxDimensions]; + int nDims; + int i; + int nDimsLeft=dimension; + for (i=0;idimensions[i]; + } + rankRight=cthis->nDimensions-dimension-1; + for (i=0;idimensions[dimension+1+i]; + } + for (i=0; inDimensions;i++){ + dims[i]=cthis->dimensions[i]; + } + dims[dimension]=maxIndex-minIndex+1; + nDims=cthis->nDimensions; + + ierr=CTAI_Array_CreateDim(nDims, dims, h_out); + if (ierr==CTA_OK){ + ierr=CTAI_Array_getData(*h_out, &cthis_out); + } + if (ierr==CTA_OK){ + // prepare for copy + int minOffset; + int stride; + int count; + int srcPos; + int destPos; + int j,k; + + int blockSize = 1; + for(i=0;idimensions[dimension]*blockSize; + count = 1; + for(i=0;ivalues[destPos+k]=cthis->values[srcPos+k]; + } + srcPos+=blockSize; + destPos+=blockSize; + } + srcPos+=stride; + destPos+=blockSize; + } + } + else { + CTA_WRITE_ERROR("Cannot allocate return array"); + } + } + else { + CTA_WRITE_ERROR("Handle is not correct"); + } + return ierr; +} + +/** + * Get part of the array by selection of a subset in one dimension. + * Eg. a=[[1,2,3],[4,5,6],[7,8,9]] a.getSlice(0,0,1) returns [1,2,3,4] + * @param dimension + * @param minIndex + * @param maxIndex + * @return + */ +#undef METHOD +#define METHOD "CTA_Array_getSliceAsDoubles_range" +int CTA_Array_getSliceAsDoubles_range(CTA_Array h, int dimension, int minIndex, int maxIndex, double *values){ + // indexing eg A(:,: ,index,:,: ) + // A(left,at ,right) + CTAI_Array *cthis=NULL; + int ierr=CTAI_Array_getData(h, &cthis); + if (ierr==CTA_OK){ + int dimsLeft[MaxDimensions]; + int rankRight; + int dimsRight[MaxDimensions]; + int i; + int blockSize; + int count; + int minOffset; + int stride; + int srcPos; + int destPos; + int j,k; + int nDimsLeft=dimension; + int dims[MaxDimensions]; + for (i=0;idimensions[i]; + } + rankRight=cthis->nDimensions-dimension-1; + for (i=0;idimensions[dimension+1+i]; + } + for (i=0; inDimensions;i++){ + dims[i]=cthis->dimensions[i]; + } + dims[dimension]=maxIndex-minIndex+1; + + // prepare for copy + blockSize = 1; + for(i=0;idimensions[dimension]*blockSize; + count = 1; + for(i=0;ivalues[srcPos+k]; + } + srcPos+=blockSize; + destPos+=blockSize; + } + srcPos+=stride; + destPos+=blockSize; + } + } + else { + CTA_WRITE_ERROR("Handle is not correct"); + } + return ierr; +} + + + +/** + * Set the values of a part of an array. + * Eg. a=[[1,2,3],[4,5,6],[7,8,9]] and a.setSlice([11,12,13],1,1) + * sets the second column a=[[1,11,3],[4,12,6],[7,13,9]] + * Note that the dimension of the slice is one smaller than for the array. + * @param slice + * @param dimension + * @param index + */ +#undef METHOD +#define METHOD "CTA_Array_setSliceAsDoubles" +int CTA_Array_setSliceAsDoubles(CTA_Array h, double *slice, int dimension, int index){ + CTAI_Array *cthis=NULL; + int ierr=CTAI_Array_getData(h, &cthis); + if (ierr==CTA_OK){ + // indexing eg A(:,: ,index,:,: ) + // A(left,at ,right) + int dimsLeft[MaxDimensions]; + int rankRight; + int dimsRight[MaxDimensions]; + int nDimsLeft=dimension; + int i; + int blockSize; + int offset; + int stride; + int count; + int srcPos; + int destPos; + + for (i=0;idimensions[i];} + + rankRight=cthis->nDimensions-dimension-1; + + for (i=0;idimensions[dimension+1+i]; + } + + // prepare for copy + blockSize = 1; + for(i=0;idimensions[dimension]*blockSize; + count = 1; + for(i=0;ivalues[destPos+j]=slice[srcPos+j]; + } + destPos+=stride; + srcPos+=blockSize; + } + } + else { + CTA_WRITE_ERROR("Handle is not correct"); + } + return ierr; +} + + +/** + * Set the values of a part of an array. + * Eg. a=[[1,2,3],[4,5,6],[7,8,9]] and a.setSlice([11,12,13],1,1) + * sets the second column a=[[1,11,3],[4,12,6],[7,13,9]] + * Note that the dimension of the slice is one smaller than for the array. + * @param slice + * @param dimension + * @param index + */ +#undef METHOD +#define METHOD "CTA_Array_setSliceAsArray" +int CTA_Array_setSliceAsArray(CTA_Array h, CTA_Array slice_h, int dimension, int index){ + CTAI_Array *slice; + int ierr=CTAI_Array_getData(slice_h, &slice); + if (ierr==CTA_OK){ + ierr=CTA_Array_setSliceAsDoubles(h, slice->values, dimension, index); + if (ierr!=CTA_OK){ + CTA_WRITE_ERROR("Error setting the values of the slice"); + } + } + else { + CTA_WRITE_ERROR("Handle slice_h is not correct"); + } + return CTA_OK; +} + + + +/** + * Set the values of a part of an array. + * Eg. a=[[1,2,3],[4,5,6],[7,8,9]] and a.setSlice([[11,12,13],[14,15,16]],1,1,2) + * sets the second and third columns a=[[1,11,14],[4,12,15],[7,13,16]] + * Note that the dimension of the slice is the same as for the array. + * @param slice + * @param dimension + * @param minIndex + * @param maxIndex + */ +#undef METHOD +#define METHOD "CTA_Array_setSliceAsArray_range" +int CTA_Array_setSliceAsArray_range(CTA_Array h, CTA_Array slice_h, int dimension, int minIndex, int maxIndex){ + CTAI_Array *slice; + int ierr=CTAI_Array_getData(slice_h, &slice); + if (ierr==CTA_OK){ + ierr=CTA_Array_setSliceAsDoubles_range(h, slice->values, dimension, minIndex, maxIndex); + if (ierr!=CTA_OK){ + CTA_WRITE_ERROR("Error setting the values of the slice"); + } + } + else { + CTA_WRITE_ERROR("Handle slice_h is not correct"); + } + return ierr; + +} + + +/** + * Set the values of a part of an array. + * Eg. a=[[1,2,3],[4,5,6],[7,8,9]] and a.setSlice([11,12,13,14,15,16],1,1,2) + * sets the second and third columns a=[[1,11,14],[4,12,15],[7,13,16]] + * Note that the dimension of the slice is the same as for the array. + * @param slice + * @param dimension + * @param minIndex + * @param maxIndex + */ +#undef METHOD +#define METHOD "CTA_Array_setSliceAsDoubles_range" +int CTA_Array_setSliceAsDoubles_range(CTA_Array h, double *slice, int dimension, int minIndex, int maxIndex){ + CTAI_Array *cthis=NULL; + int dimsRight[MaxDimensions]; + + + int ierr=CTAI_Array_getData(h, &cthis); + if (ierr==CTA_OK){ + + // indexing eg A(:,: ,index,:,: ) + // A(left,at ,right) + int dimsLeft[MaxDimensions]; + int dims[MaxDimensions]; + int nDimsLeft=dimension; + int i; + int blockSize; + int minOffset; + int stride; + int count; + int destPos; + int srcPos; + int rankRight=cthis->nDimensions-1; + + for (i=0; idimensions[i];} + for (i=0; idimensions[dimension+1+i];} + for (i=0; inDimensions; i++){dims[i]=cthis->dimensions[i];} + dims[dimension]=maxIndex-minIndex+1; + + // prepare for copy + blockSize = 1; + for(i=0;idimensions[dimension]*blockSize; + count = 1; + for(i=0;ivalues[destPos+k]=slice[srcPos+k]; + } + destPos+=blockSize; + srcPos+=blockSize; + } + destPos+=stride; + srcPos+=blockSize; + } + } + else { + CTA_WRITE_ERROR("Handle is not correct"); + } + return ierr; +} + + +/** + * Convert indices in multiple dimensions to position in the one-dimensional array as + * returned eg by getValuesAsDoubles + * Eg. a=[[1,2,3],[4,5,6],[7,8,9]] then valueIndex([1,0]) returns 3 which points to the value 4 here. + * @param indices + * @return position + */ +#undef METHOD +#define METHOD "CTA_Array_reshape" +int CTA_Array_valueIndex(CTA_Array h, int nIndices, int *indices, int *index){ + CTAI_Array *cthis=NULL; + int ierr=CTAI_Array_getData(h, &cthis); + if (ierr==CTA_OK){ + *index = CTAI_Array_valueIndex(cthis, nIndices, indices); + } + else { + *index=0; + CTA_WRITE_ERROR("Handle is not correct"); + } + return ierr; +} + + + +CTAEXPORT void CTA_ARRAY_FREE_F77(CTA_Array *h, int *ierr){ + *ierr = CTA_Array_Free(h); +} +CTAEXPORT void CTA_ARRAY_CREATEASDOUBLES_F77(double *values, int *nDimensions, int *dimensions, CTA_Array *h, int *ierr){ + *ierr = CTA_Array_CreateAsDoubles(values, *nDimensions, dimensions, h); +} + +//void CTA_Array_getNumberOfDimensions(CTA_Array h); + +CTAEXPORT void CTA_ARRAY_GETNDIMENSIONS_F77(CTA_Array *h, int *nDimensions, int *ierr){ + *ierr = CTA_Array_getnDimensions(*h, nDimensions); +} + +CTAEXPORT void CTA_ARRAY_GETDIMENSIONS_F77(CTA_Array *h, int *dimensions, int *ierr){ + *ierr = CTA_Array_getDimensions(*h, dimensions); +} + +CTAEXPORT void CTA_ARRAY_LENGTH_F77(CTA_Array *h, int *length, int *ierr){ + *length = CTA_Array_length(*h); + *ierr = CTA_OK; +} + +CTAEXPORT void CTA_ARRAY_GETVALUESASDOUBLES_F77(CTA_Array *h, double *values, int *ierr){ + *ierr = CTA_Array_getValuesAsDoubles(*h, values); +} + +CTAEXPORT void CTA_ARRAY_GETVALUESASDOUBLES_INDEXRANGE_F77(CTA_Array *h, int *firstIndex, int *lastIndex, double *values, int *ierr){ + *ierr = CTA_Array_getValuesAsDoubles_indexrange(*h, *firstIndex, *lastIndex, values); +} + +CTAEXPORT void CTA_ARRAY_GETVALUEASDOUBLES_INDEX_F77(CTA_Array *h, int *index, double *value, int *ierr){ + *ierr = CTA_Array_getValueAsDoubles_index(*h, *index, value); +} + +CTAEXPORT void CTA_ARRAY_GETVALUEASDOUBLE_INDICES_F77(CTA_Array *h, int *nIndices, int* indices, double *value, int *ierr){ + *ierr = CTA_Array_getValueAsDouble_indices(*h, *nIndices, indices, value); +} + +CTAEXPORT void CTA_ARRAY_SETVALUESASDOUBLES_F77(CTA_Array *h, double *values, int *length, int *ierr){ + *ierr = CTA_Array_setValuesAsDoubles(*h, values, *length); +} + +CTAEXPORT void CTA_ARRAY_SETVALUEASDOUBLE_INDICES_F77(CTA_Array *h, int *nIndices, int *indices, double *value, int *ierr){ + *ierr = CTA_Array_setValueAsDouble_indices(*h, *nIndices, indices, *value); +} + +CTAEXPORT void CTA_ARRAY_SETVALUESASDOUBLES_INDEXRANGE_F77(CTA_Array *h, int *firstIndex, int *lastIndex, double *values, int *ierr){ + *ierr = CTA_Array_setValuesAsDoubles_indexrange(*h, *firstIndex, *lastIndex, values); +} + +CTAEXPORT void CTA_ARRAY_SETVALUEASDOUBLE_INDEX_F77(CTA_Array *h, int *index, double *value, int *ierr){ + *ierr = CTA_Array_setValueAsDouble_index(*h, *index, *value); +} + +CTAEXPORT void CTA_ARRAY_SETCONSTANT_F77(CTA_Array *h, double *value, int *ierr){ + *ierr = CTA_Array_setConstant(*h, *value); +} + +CTAEXPORT void CTA_ARRAY_AXPYONVALUES_F77(CTA_Array *h, double *alpha, double *axpyValues, int *ierr){ + *ierr = CTA_Array_axpyOnValues(*h, *alpha, axpyValues); +} + +CTAEXPORT void CTA_ARRAY_MULTIPLYVALUES_F77(CTA_Array *h, double* multiplicationFactors, int *ierr){ + *ierr = CTA_Array_multiplyValues(*h, multiplicationFactors); +} + +CTAEXPORT void CTA_ARRAY_RESHAPE_F77(CTA_Array *h, int *nDimensions, int *dimensions, int *ierr){ + *ierr = CTA_Array_reshape(*h, *nDimensions, dimensions); +} + +CTAEXPORT void CTA_ARRAY_GETSLICE_F77(CTA_Array *h, int *dimension, int *index, CTA_Array *h_out, int *ierr){ + *ierr = CTA_Array_getSlice(*h, *dimension, *index, h_out); +} + +CTAEXPORT void CTA_ARRAY_GETSLICE_RANGE_F77(CTA_Array *h, int *dimension, int *minIndex, int *maxIndex, CTA_Array *h_out, int *ierr){ + *ierr = CTA_Array_getSlice_range(*h, *dimension, *minIndex, *maxIndex, h_out); +} + +CTAEXPORT void CTA_ARRAY_GETSLICEASDOUBLES_RANGE_F77(CTA_Array *h, int *dimension, int *minIndex, int *maxIndex, double *values, int *ierr){ + *ierr = CTA_Array_getSliceAsDoubles_range(*h, *dimension, *minIndex, *maxIndex, values); +} + +CTAEXPORT void CTA_ARRAY_SETSLICEASDOUBLES_F77(CTA_Array *h, double *slice, int *dimension, int *index, int *ierr){ + *ierr = CTA_Array_setSliceAsDoubles(*h, slice, *dimension, *index); +} + +CTAEXPORT void CTA_ARRAY_SETSLICEASARRAY_F77(CTA_Array *h, CTA_Array *slice_h, int *dimension, int *index, int *ierr){ + *ierr = CTA_Array_setSliceAsArray(*h, *slice_h, *dimension, *index); +} + +CTAEXPORT void CTA_ARRAY_SETSLICEASARRAY_RANGE_F77(CTA_Array *h, CTA_Array *slice_h, int *dimension, int *minIndex, int *maxIndex, int *ierr){ + *ierr = CTA_Array_setSliceAsArray_range(*h, *slice_h, *dimension, *minIndex, *maxIndex); +} + +CTAEXPORT void CTA_ARRAY_SETSLICEASDOUBLES_RANGE_F77(CTA_Array *h, double *slice, int *dimension, int *minIndex, int *maxIndex, int *ierr){ + *ierr = CTA_Array_setSliceAsDoubles_range(*h, slice, *dimension, *minIndex, *maxIndex); +} + +CTAEXPORT void CTA_ARRAY_VALUEINDEX_F77(CTA_Array *h, int *nIndices, int *indices, int *index, int *ierr){ + *ierr = CTA_Array_valueIndex(*h, *nIndices, indices, index); +} + + diff --git a/costa/native/cta/src/cta_datatypes.c b/costa/native/cta/src/cta_datatypes.c new file mode 100644 index 000000000..c808dd33b --- /dev/null +++ b/costa/native/cta/src/cta_datatypes.c @@ -0,0 +1,224 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_datatypes.c $ +$Revision: 3406 $, $Date: 2012-08-16 15:25:53 +0200 (Thu, 16 Aug 2012) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include +#include "f_cta_utils.h" +#include "cta_errors.h" +#include "cta_datatypes.h" +#include "cta_message.h" + +#define CTA_SIZEOF_F77 F77_CALL(cta_sizeof,CTA_SIZEOF) +#define CLASSNAME "CTA_Datatypes" + +#undef METHOD +#define METHOD "SizeOf" +int CTA_SizeOf(CTA_Datatype datatype, int *size){ + + switch(datatype){ + case CTA_HANDLE: + case CTA_INTEGER: + case CTA_STRING: + *size=sizeof(int); + break; + case CTA_REAL: + *size=sizeof(float); + break; + case CTA_DOUBLE: + *size=sizeof(double); + break; + default: + CTA_WRITE_ERROR("This datatype is unknown."); + return CTA_ILLEGAL_DATATYPE; + break; + } + return CTA_OK; +} + +const char *CTAI_Type2String(CTA_Datatype datatype) { + switch(datatype){ + case CTA_HANDLE: + return "CTA_HANDLE"; + case CTA_INTERFACE: + return "CTA_INTERFACE"; + case CTA_FUNCTION: + return "CTA_FUNCTION"; + case CTA_VECTOR: + return "treeVectorLeaf"; + case CTA_VECTORCLASS: + return "CTA_VECTORCLASS"; + case CTA_TREEVECTOR: + return "treeVector"; + case CTA_SUBTREEVECTOR: + return "subTreeVector"; + case CTA_MATRIXCLASS: + return "CTA_MATRIXCLASS"; + case CTA_MATRIX: + return "CTA_MATRIX"; + case CTA_COVMATCLASS: + return "CTA_COVMATCLASS"; + case CTA_COVMAT: + return "CTA_COVMAT"; + case CTA_INTPOL: + return "CTA_INTPOL"; + case CTA_OBS: + return "CTA_OBS"; + case CTA_MODELCLASS: + return "CTA_MODELCLASS"; + case CTA_MODEL: + return "CTA_MODEL"; + case CTA_TIME: + return "CTA_TIME"; + case CTA_SOBS: + return "CTA_SOBS"; + case CTA_SOBSCLASS: /* and CTA_OBSDESCRCLASS */ + return "CTA_SOBSCLASS"; + case CTA_OBSDESCR: + return "CTA_OBSDESCR"; + case CTA_METHODCLASS: + return "CTA_METHODCLASS"; + case CTA_METHOD: + return "CTA_METHOD"; + case CTA_TREE: + return "CTA_TREE"; + case CTA_VOID: + return "CTA_VOID"; + case CTA_INTEGER: + return "CTA_INTEGER"; + case CTA_REAL: + return "CTA_REAL"; + case CTA_DOUBLE: + return "CTA_DOUBLE"; + case CTA_FSTRING: + return "CTA_FSTRING"; + case CTA_CSTRING: + return "CTA_CSTRING"; + case CTA_STRING: + return "CTA_STRING"; + case CTA_FILE: + return "CTA_FILE"; + case CTA_1DINTEGER : + return "CTA_1DINTEGER"; + case CTA_1DREAL: + return "CTA_1DREAL"; + case CTA_1DDOUBLE: + return "CTA_1DDOUBLE"; + case CTA_1DFSTRING: + return "CTA_1DFSTRING"; + case CTA_1DCSTRING: + return "CTA_1DCSTRING"; + case CTA_ARRAY: + return "CTA_ARRAY"; + } + return "CTA_ILLEGAL_DATATYPE"; +} + +CTA_Datatype CTAI_String2Type(const char *dt) { + if (0 == strcmp("CTA_HANDLE", dt)) { + return CTA_HANDLE; + } else if (0 == strcmp("CTA_INTERFACE", dt)) { + return CTA_INTERFACE; + } else if (0 == strcmp("CTA_FUNCTION", dt)) { + return CTA_FUNCTION; + } else if (0 == strcmp("CTA_VECTOR", dt)) { + return CTA_VECTOR; + } else if (0 == strcmp("treeVectorLeaf", dt)) { + return CTA_VECTOR; + } else if (0 == strcmp("CTA_VECTORCLASS", dt)) { + return CTA_VECTORCLASS; + } else if (0 == strcmp("CTA_TREEVECTOR", dt)) { + return CTA_TREEVECTOR; + } else if (0 == strcmp("treeVector", dt)) { + return CTA_TREEVECTOR; + } else if (0 == strcmp("subTreeVector", dt)) { + return CTA_TREEVECTOR; + } else if (0 == strcmp("CTA_MATRIXCLASS", dt)) { + return CTA_MATRIXCLASS; + } else if (0 == strcmp("CTA_MATRIX", dt)) { + return CTA_MATRIX; + } else if (0 == strcmp("CTA_COVMATCLASS", dt)) { + return CTA_COVMATCLASS; + } else if (0 == strcmp("CTA_COVMAT", dt)) { + return CTA_COVMAT; + } else if (0 == strcmp("CTA_INTPOL", dt)) { + return CTA_INTPOL; + } else if (0 == strcmp("CTA_OBS", dt)) { + return CTA_OBS; + } else if (0 == strcmp("CTA_MODELCLASS", dt)) { + return CTA_MODELCLASS; + } else if (0 == strcmp("CTA_MODEL", dt)) { + return CTA_MODEL; + } else if (0 == strcmp("CTA_TIME", dt)) { + return CTA_TIME; + } else if (0 == strcmp("CTA_SOBS", dt)) { + return CTA_SOBS; + } else if (0 == strcmp("CTA_SOBSCLASS", dt)) { + return CTA_SOBSCLASS; + } else if (0 == strcmp("CTA_OBSDESCRCLASS", dt)) { + return CTA_OBSDESCRCLASS; + } else if (0 == strcmp("CTA_OBSDESCR", dt)) { + return CTA_OBSDESCR; + } else if (0 == strcmp("CTA_METHODCLASS", dt)) { + return CTA_METHODCLASS; + } else if (0 == strcmp("CTA_METHOD", dt)) { + return CTA_METHOD; + } else if (0 == strcmp("CTA_TREE", dt)) { + return CTA_TREE; + } else if (0 == strcmp("CTA_VOID", dt)) { + return CTA_VOID; + } else if (0 == strcmp("CTA_INTEGER", dt)) { + return CTA_INTEGER; + } else if (0 == strcmp("CTA_REAL", dt)) { + return CTA_REAL; + } else if (0 == strcmp("CTA_DOUBLE", dt)) { + return CTA_DOUBLE; + } else if (0 == strcmp("CTA_FSTRING", dt)) { + return CTA_FSTRING; + } else if (0 == strcmp("CTA_CSTRING", dt)) { + return CTA_CSTRING; + } else if (0 == strcmp("CTA_STRING", dt)) { + return CTA_STRING; + } else if (0 == strcmp("CTA_FILE", dt)) { + return CTA_FILE; + } else if (0 == strcmp("CTA_1DINTEGER ", dt)) { + return CTA_1DINTEGER; + } else if (0 == strcmp("CTA_1DREAL", dt)) { + return CTA_1DREAL; + } else if (0 == strcmp("CTA_1DDOUBLE", dt)) { + return CTA_1DDOUBLE; + } else if (0 == strcmp("CTA_1DFSTRING", dt)) { + return CTA_1DFSTRING; + } else if (0 == strcmp("CTA_1DCSTRING", dt)) { + return CTA_1DCSTRING; + } else if (0 == strcmp("CTA_ARRAY", dt)) { + return CTA_ARRAY; + } + return CTA_NULL; +} + +/* Interfacing with Fortran */ + +CTAEXPORT void CTA_SIZEOF_F77 (int *datatype, int *size, int *ierr){ + *ierr=CTA_SizeOf((CTA_Datatype) *datatype, size); +} + + + diff --git a/costa/native/cta/src/cta_datetime.c b/costa/native/cta/src/cta_datetime.c new file mode 100644 index 000000000..bf0d2a206 --- /dev/null +++ b/costa/native/cta/src/cta_datetime.c @@ -0,0 +1,130 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_datetime.c $ +$Revision: 3407 $, $Date: 2012-08-17 13:50:50 +0200 (Fri, 17 Aug 2012) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2007 Johan Ansink + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include + +#include "f_cta_utils.h" +#include "ctai.h" +#include "cta_datetime.h" +#include "cta_errors.h" + +#define CTA_DATETIME_MODIFIEDJULIANTOGREGORIAN_F77 F77_CALL(cta_datetime_modifiedjuliantogregorian,CTA_DATETIME_MODIFIEDJULIANTOGREGORIAN) +#define CTA_DATETIME_GREGORIANTOMODIFIEDJULIAN_F77 F77_CALL(cta_datetime_gregoriantomodifiedjulian,CTA_DATETIME_GREGORIANTOMODIFIEDJULIAN) + +int CTA_DateTime_GregorianToJulian(int year, int month, int day, int hour, int minute, int second, double *jd){ + + /*Julian day number from Gregorian date */ + double a, y, m; + + a = floor((14 - month)/12); + y = year + 4800 - a; + m = month + 12*a - 3; + + /* For a date in the Gregorian calendar */ + + *jd = day + floor((153.*m + 2.)/5.) + y*365. + floor(y/4.) - floor(y/100.) + floor(y/400.) + - 32045. + ( second + 60.*minute + 3600.*(hour - 12.) )/86400.0; + + return CTA_OK; +} + +int CTA_DateTime_GregorianToModifiedJulian(int year, int month, int day, int hour, int minute, int second, double *mjd){ + double jd; + + CTA_DateTime_GregorianToJulian(year,month,day,hour,minute,second,&jd); + *mjd = jd- MJDREF; + return CTA_OK; +} + +int CTA_DateTime_DaysToHMS (double days, int *h, int *m, int *s){ + int hour, minute, second; + + second = (int)(86400 * days); + /* get number of hours */ + hour = second/3600; + /* remove the hours */ + second = second - 3600*hour; + /* get number of minutes */ + minute = second/60; + /* remove the minutes */ + second = second - 60*minute; + + *h = hour; + *m = minute; + *s = second; + + return CTA_OK; +} + +int CTA_DateTime_ModifiedJulianToJulian(double mjd, double *jd){ + *jd = mjd + MJDREF; + return CTA_OK; +} + +int CTA_DateTime_JulianToGregorian(double jd, int *year, int *month, int *day, int *hour, int *minute, int *second){ + + double ijd; // integer part + double fjd; // fraction part + double a,b,c,d,e,m; + + ijd = floor(jd + 0.5); + + fjd = jd - ijd + 0.5; + CTA_DateTime_DaysToHMS(fjd, hour, minute, second); + + a = ijd + 32044; + b = floor((4 * a + 3) / 146097); + c = a - floor((b * 146097) / 4); + + d = floor((4 * c + 3) / 1461); + e = c - floor((1461 * d) / 4); + m = floor((5 * e + 2) / 153); + + *day = (int)(e - floor((153 * m + 2) / 5) + 1); + *month = (int)(m + 3 - 12 * floor(m / 10)); + *year = (int)(b * 100 + d - 4800 + floor(m / 10)); + + return CTA_OK; +} + +int CTA_DateTime_ModifiedJulianToGregorian(double mjd, int *year, int *month, int *day, int *hour, int *minute, int *second){ + + int ierr; // error code + double jd; // Julian date + + ierr=CTA_DateTime_ModifiedJulianToJulian(mjd, &jd); + if (ierr == CTA_OK){ + ierr=CTA_DateTime_JulianToGregorian(jd, year, month, day, hour, minute, second); + } + return ierr; +} + +/* Interfacing with Fortran */ + +CTAEXPORT void CTA_DATETIME_GREGORIANTOMODIFIEDJULIAN_F77(int *year, int *month, int *day, int *hour, int *minute, int *second, double *mjd, int *ierr){ + *ierr=CTA_DateTime_GregorianToModifiedJulian(*year, *month, *day, *hour, *minute, *second, mjd); +} + +CTAEXPORT void CTA_DATETIME_MODIFIEDJULIANTOGREGORIAN_F77(double *mjd, int *year, int *month, int *day, int *hour, int *minute, int *second, int *ierr){ + *ierr=CTA_DateTime_ModifiedJulianToGregorian(*mjd, year, month, day, hour, minute, second); +} + diff --git a/costa/native/cta/src/cta_defaults.c b/costa/native/cta/src/cta_defaults.c new file mode 100644 index 000000000..203727e6a --- /dev/null +++ b/costa/native/cta/src/cta_defaults.c @@ -0,0 +1,91 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_defaults.c $ +$Revision: 3406 $, $Date: 2012-08-16 15:25:53 +0200 (Thu, 16 Aug 2012) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "cta_defaults.h" + +/** default dense vector class implementation */ +CTA_VecClass CTA_DEFAULT_VECTOR; + +/** default stochastic observer class implementation */ +CTA_SObsClass CTA_DEFAULT_SOBS; + +/** observer description class implementation based on a simple table */ +CTA_ObsDescrClass CTA_OBSDESCR_TABLE; + +/** stochastic observer class implementation + * that concatenates stochastic observers of arbitrary kind */ +CTA_SObsClass CTA_COMBINE_SOBS; + +/** stochastic observer class implementation + * based on NetCDF input files */ +CTA_SObsClass CTA_NETCDF_SOBS; + +/** stochastic observer class implementation + * based on MAORI observations library */ +CTA_SObsClass CTA_MAORI_SOBS; + +/** stochastic observer class implementation + * based on user provided library */ +CTA_SObsClass CTA_USER_SOBS; + +/** default dense matrix class implementation */ +CTA_MatClass CTA_DEFAULT_MATRIX; + +/** The SP (Single processor) model builder */ +CTA_ModelClass CTA_MODBUILD_SP; + +/** The PAR (multiple processor parallel) model builder */ +CTA_ModelClass CTA_MODBUILD_PAR; + +/** File handle that actually prints to standard out */ +CTA_File CTA_FILE_STDOUT; + +/** The modelcombiner */ +CTA_ModelClass CTA_MODELCOMBINER; + +/** The B3B (Black box) model builder */ +CTA_ModelClass CTA_MODBUILD_B3B; + +/** The 'New' (Black box) model builder */ +CTA_ModelClass CTA_MODBUILD_BB; + +/** Operator for dispaying the scaled RMS of the roots of a treevector to use with method + * CTA_TreeVector_OpOnLeafs */ +CTA_Func CTA_OP_ROOT_RMS; + +/** Operator for displaying the values in given indices of the roots of a + * treevector to use with method CTA_TreeVector_OpOnLeafs */ +CTA_Func CTA_OP_ROOT_PRINTI; + +/** Operator for calculating the locations of maxabs-values of the roots + * of a treevector to use with method CTA_TreeVector_OpOnLeafs */ +CTA_Func CTA_OP_ROOT_AMAX; + +/** Operator for dispaying the scaled Sum-of-Squares of the roots of a + * treevector to use with method CTA_TreeVector_OpOnLeafs */ +CTA_Func CTA_OP_ROOT_SSQ; + +/** Initial random seed */ +long int CTA_INITIAL_RANDOM_SEED; + +/** Name of the user provided dynamic library with user functions */ +char userDefaultDynamicLibrary[256]; diff --git a/costa/native/cta/src/cta_file.c b/costa/native/cta/src/cta_file.c new file mode 100644 index 000000000..0b4517493 --- /dev/null +++ b/costa/native/cta/src/cta_file.c @@ -0,0 +1,399 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_file.c $ +$Revision: 3286 $, $Date: 2012-05-02 18:15:05 +0200 (Wed, 02 May 2012) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +#include +#include +#if HAVE_LIBNETCDF +#include +#endif +#include "cta_mem.h" +#include "f_cta_utils.h" +#include "cta_file.h" +#include "cta_errors.h" +#include "cta_message.h" + +#define IDEBUG (0) + +#define CTA_FILE_CREATE_F77 F77_CALL(cta_file_create,CTA_FILE_CREATE) +#define CTA_FILE_FREE_F77 F77_CALL(cta_file_free,CTA_FILE_FREE) +//#define CTA_FILE_GET_F77 F77_CALL(cta_file_get,CTA_FILE_GET) +//#define CTA_FILE_SET_F77 F77_CALL(cta_file_set,CTA_FILE_SET) +#define CTA_FILE_OPEN_F77 F77_CALL(cta_file_open,CTA_FILE_OPEN) +#define CTA_FILE_WRITESTR_F77 F77_CALL(cta_file_writestr,CTA_FILE_WRITESTR) + +#define CTA_FILE_GETNETCDF_F77 F77_CALL(cta_file_getnetcdf,CTA_FILE_GETNETCDF) +#define CTA_File_ISNETCDF_F77 F77_CALL(cta_fILE_isnetcdf,CTA_FILE_ISNETCDF) +#define CLASSNAME "CTA_File" + +typedef enum {text, netcdf} cta_filetype; + + +/* Struct holding all data associated to a COSTA file */ + +typedef struct { +cta_filetype fileType; +FILE *file; +int ncid; +} CTAI_File; + + + +int CTA_File_Create(CTA_File *hfile){ + + CTAI_File *cta_file; + int retval; + + /* allocate memory for new File object */ + cta_file=CTA_Malloc(sizeof(CTAI_File)); + cta_file->fileType = text; + cta_file->file = NULL; + cta_file->ncid = 0; + + /* Allocate new handle and return eror when unsuccesfull */ + retval=CTA_Handle_Create("file",CTA_FILE,cta_file,hfile); + return retval; +}; + + +#undef METHOD +#define METHOD "Open" +int CTA_File_Open(CTA_File hfile, CTA_String spath, CTA_String smode){ + FILE *file; + char *path; + char *mode; + int lenpath, lenmode, retval; + int ncid; + + CTAI_File *cta_file; + + if (IDEBUG>0){printf("Start of CTA_File_Open\n");} + path=NULL; + mode=NULL; + + /* Chech handle and get data struct */ + retval=CTA_Handle_Check((CTA_Handle) hfile,CTA_FILE); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_time handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) hfile,(void**) &cta_file); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + // handle path + retval=CTA_String_GetLength(spath, &lenpath); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get length of string"); + return retval; + } + path=CTA_Malloc((lenpath+1)*sizeof(char)); + retval=CTA_String_Get(spath, path); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get string"); + return retval; + } + + // determine the filetype + if (strlen(path)>3){ + if (0==strncmp(path+strlen(path)-3,".nc",3)){ + cta_file->fileType=netcdf; + } + } + if (cta_file->fileType==netcdf){ +#if HAVE_LIBNETCDF + if (IDEBUG>0){printf("Open netcdf file\n");} +{ + // handle mode + if (smode==CTA_NULL){ + mode=CTA_Malloc(3*sizeof(char)); + strcpy(mode,"w"); + } else { + retval=CTA_String_GetLength(smode, &lenmode); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get length of string"); return retval; + } + mode=CTA_Malloc((lenmode+1)*sizeof(char)); + retval=CTA_String_Get(smode, mode); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get string"); + return retval; + } + } + // first check if file already exists;so we first try nc_open!!! + if (0==strcmp(mode,"w")) { + retval = nc_create(path, NC_CLOBBER, &ncid); + if (retval != NC_NOERR) { // no success; so just create it + char message[1024]; + sprintf(message,"Cannot create netCDF-file for no-append writing %s: %s\n", path, nc_strerror(retval)); + CTA_WRITE_ERROR(message); + } + if (IDEBUG>0){printf("Created new netcdf file\n");} + } else { //"r" or "a" + retval = nc_open(path, NC_WRITE, &ncid); + if (retval!=0){ + char message[1024]; + sprintf(message,"Cannot open netCDF-file %s for appending or reading: %s\n", path, nc_strerror(retval)); + CTA_WRITE_ERROR(message); + } + if (IDEBUG>0){printf("Open existing netcdf file\n");} + } + cta_file->ncid=ncid; +} + +#else + printf("CTA_File_Open: WARNING COSTA is compiled without NETCDF support\n"); + printf(" Will open file %s as normal text file \n",path); + cta_file->fileType=text; +#endif + } + + // Open normal text file + if (cta_file->fileType==text){ + + // handle mode + if (smode==CTA_NULL){ + mode=CTA_Malloc(3*sizeof(char)); + strcpy(mode,"w+"); + } else { + retval=CTA_String_GetLength(smode, &lenmode); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get length of string"); + return retval; + } + mode=CTA_Malloc((lenmode+1)*sizeof(char)); + retval=CTA_String_Get(smode, mode); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get string"); + return retval; + } + } + + // Open the file + file=fopen(path,mode); + CTA_File_Set(hfile,file); + } + if (path) free(path); + if (mode) free(mode); + if (IDEBUG>0){printf("End of CTA_File_Open");} + return CTA_OK; +} + + #undef METHOD + #define METHOD "WriteStr" + int CTA_File_WriteStr(CTA_File hfile, char *str, int eol){ + CTAI_File *cta_file; + int retval; + + retval=CTA_Handle_Check((CTA_Handle) hfile,CTA_FILE); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_time handle"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) hfile,(void**) &cta_file); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + fprintf(cta_file->file,"%s",str); + if (eol==CTA_TRUE){ + fprintf(cta_file->file,"\n"); + } + return CTA_OK; +} + + +#undef METHOD +#define METHOD "File_Free" +int CTA_File_Free(CTA_File *hfile) +{ + CTAI_File *cta_file; + int retval; + + + retval=CTA_Handle_Check((CTA_Handle) *hfile,CTA_FILE); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_file handle"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) *hfile,(void**) &cta_file); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + //Close netcdf file + if (cta_file->fileType==netcdf){ +#if HAVE_LIBNETCDF + if (cta_file->ncid != 0) { + if ((retval = nc_close(cta_file->ncid))){ + printf("CTA_File_Free: cannot close netCDF-file: %s\n", + nc_strerror(retval)); + } + } +#endif + } + + free(cta_file); + + retval=CTA_Handle_Free(hfile); + + return retval; +} + +#undef METHOD +#define METHOD "IsNetcdf" +int CTA_File_IsNetcdf(CTA_File hfile, int *isnetcdf) { + CTAI_File *cta_file; + int retval; + + *isnetcdf=CTA_FALSE; + + retval=CTA_Handle_Check((CTA_Handle) hfile,CTA_FILE); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_file handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) hfile,(void**) &cta_file); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + if (cta_file->fileType==netcdf){ + *isnetcdf=CTA_TRUE; + } + return CTA_OK; +} + +#undef METHOD +#define METHOD "File_Set" +int CTA_File_Set(CTA_File hfile, FILE* file) +{ + CTAI_File *cta_file; + int retval; + + retval=CTA_Handle_Check((CTA_Handle) hfile,CTA_FILE); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_file handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) hfile,(void**) &cta_file); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + cta_file->file = file; + return CTA_OK; +} + +#undef METHOD +#define METHOD "File_Get" +int CTA_File_Get(CTA_File hfile,FILE** file) +{ + CTAI_File *cta_file; + int retval; + + retval=CTA_Handle_Check((CTA_Handle) hfile,CTA_FILE); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_time handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) hfile,(void**) &cta_file); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + *file = cta_file->file; + return CTA_OK; +} + +#undef METHOD +#define METHOD "File_GetNetcdf" +int CTA_File_GetNetcdf(CTA_File hfile,int *ncid) +{ + CTAI_File *cta_file; + int retval; + + retval=CTA_Handle_Check((CTA_Handle) hfile,CTA_FILE); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_time handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) hfile,(void**) &cta_file); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + *ncid = cta_file->ncid; + return CTA_OK; +} + + + + +/* Interfacing with Fortran */ + +CTAEXPORT void CTA_FILE_CREATE_F77(int *hfile, int *ierr){ + *ierr=CTA_File_Create((CTA_File*) hfile); +} + +CTAEXPORT void CTA_FILE_FREE_F77(int *hfile, int *ierr){ + *ierr=CTA_File_Free ((CTA_File*) hfile); +} + +//NOT avaialble for Fortran: +//int CTA_File_Get (CTA_File hfile, FILE **file); +//int CTA_File_Set (CTA_File hfile, FILE *file); + +CTAEXPORT void CTA_FILE_OPEN_F77(int *hfile, int *sname, int *smode, int *ierr){ + *ierr=CTA_File_Open((CTA_File) *hfile, (CTA_String) *sname, + (CTA_String) *smode); +} + + +CTAEXPORT void CTA_FILE_WRITESTR_F77(int *hfile, char *str, int *eol, int *ierr, + int len_str){ + + char *c_str; + /* create a c-string equivalent to name */ + c_str=CTA_Malloc((len_str+1)*sizeof(char)); + CTA_fstr2cstr(str,c_str,len_str); + + *ierr=CTA_File_WriteStr((CTA_File) *hfile, c_str, *eol); + + free(c_str); +} + + +CTAEXPORT void CTA_FILE_GETNETCDF_F77(int *hfile,int *ncid, int *ierr){ + *ierr=CTA_File_GetNetcdf((CTA_File) *hfile, ncid); +} + + +CTAEXPORT void CTA_File_ISNETCDF_F77(int *hfile, int *isnetcdf, int *ierr) { + *ierr=CTA_File_IsNetcdf((CTA_File) *hfile, isnetcdf); +} diff --git a/costa/native/cta/src/cta_flush.c b/costa/native/cta/src/cta_flush.c new file mode 100644 index 000000000..bd9d270b2 --- /dev/null +++ b/costa/native/cta/src/cta_flush.c @@ -0,0 +1,32 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_flush.c $ +$Revision: 3453 $, $Date: 2012-09-04 12:10:55 +0200 (Tue, 04 Sep 2012) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "f_cta_utils.h" +#include "cta.h" + +void CTA_Flush() +{ +#ifndef WIN32 + fflush(stdout); + fflush(stderr); +#endif +} diff --git a/costa/native/cta/src/cta_functions.c b/costa/native/cta/src/cta_functions.c new file mode 100644 index 000000000..62f1068c9 --- /dev/null +++ b/costa/native/cta/src/cta_functions.c @@ -0,0 +1,606 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_functions.c $ +$Revision: 3797 $, $Date: 2013-02-05 16:05:31 +0100 (Tue, 05 Feb 2013) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +// Header files for loading dynamic librarys and functions +#ifdef HAVE_DLFCN_H +#include +#endif +#ifdef WIN32 +#include +#endif + +#include "ctype.h" +#include +#include "cta_mem.h" +#include "f_cta_utils.h" +#include "cta_functions.h" +#include "cta_errors.h" +#include "ctai_xml.h" +#include "cta_message.h" + +#define CTA_FUNC_CREATE_F77 F77_CALL(cta_func_create,CTA_FUNC_CREATE) +#define CTA_FUNC_FREE_F77 F77_CALL(cta_func_free,CTA_FUNC_FREE) +#define CTA_FUNC_GETINTF_F77 F77_CALL(cta_func_getintf,CTA_FUNC_GETINTF) +#define CTA_FUNC_GETNAME_F77 F77_CALL(cta_func_getname,CTA_FUNC_GETNAME) +#define CLASSNAME "CTA_Functions" + + +#define IDEBUG (0) +// Struct containing specific data associated to a COSTA function handle +typedef struct { + void *function; //pointer to function + CTA_Intf hintf; //handle of corresponding interface + CTA_Handle userdata; //userdata (array of COSTA handles) + void *libhandle; //Handle of dynamic library + int refcount; //Number of references to the function +} CTAI_Function; + + +/** \brief Creates a new user defined function + * + * \param name I name of the new user function + * \param function I pointer to user funtion + * \param hintf I COSTA interface handle + * \param hfunc O COSTA user function handle + * \return error status: CTA_OK + */ +#undef METHOD +#define METHOD "Func_Create" +int CTA_Func_Create(const char *name, CTA_Function *function, + const CTA_Intf hintf, CTA_Func *hfunc){ + + int retval; //Return value of a call + CTAI_Function *data; //Function specific data + + // Allocate data and set properties + data=CTA_Malloc(sizeof(CTAI_Function)); + + // Set properties + if (IDEBUG>0) { + printf("CTA_Func_Create: creating function name=%s\n",name); + printf("function pointer is %p\n",function); + } + data->function=function; + data->userdata=CTA_NULL; + data->libhandle=NULL; + data->refcount=1; + + // Allocate new handle and return eror when unsuccesfull + retval=CTA_Handle_Create(name,CTA_FUNCTION,data,hfunc); + if (retval) { + CTA_WRITE_ERROR("Cannot create handle"); + return retval; + } + if (IDEBUG>0) printf("function handle is %d\n",*hfunc); + + return CTA_OK; +} + +/** \brief Duplicates a user defined function + * + * \param hfunc I COSTA user function handle + * \param hdupl I duplication of hfunc + * \return error status: CTA_OK + */ +#undef METHOD +#define METHOD "Duplicate" +int CTA_Func_Duplicate(CTA_Func hfunc, CTA_Func *hdupl){ + + int retval; //Return value of a call + CTAI_Function *data; //Function specific data + + // Set default return value + *hdupl=CTA_NULL; + + // If handle is CTA_NULL -> nothing to be done + if (hfunc==CTA_NULL) return CTA_OK; + + // Check Handle and return error if handle is not valid + retval=CTA_Handle_Check(hfunc,CTA_FUNCTION); + if (retval) { + CTA_WRITE_ERROR("Handle is not a cta_function handle"); + return retval; + } + + // get data + CTA_Handle_GetData(hfunc,(void*) &data); + + // increase reference count + data->refcount++; + + // copy handle + *hdupl=hfunc; + + return CTA_OK; +} + +/** \brief Frees a user defined function + * + * \note Freeing CTA_NULL is allowed. + * + * \param hfunc IO handle of user function. The value is + * CTA_NULL on return + * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE + */ +#undef METHOD +#define METHOD "Free" +int CTA_Func_Free(CTA_Func *hfunc){ + + int retval; //Return value of a call + CTAI_Function *data; //Function specific data + char msg[256]; + + // If handle is CTA_NULL -> nothing to be done + if (*hfunc==CTA_NULL) return CTA_OK; + + // Check Handle and return error if handle is not valid + retval=CTA_Handle_Check(*hfunc,CTA_FUNCTION); + if (retval) { + CTA_WRITE_ERROR("Handle is not a cta_function handle"); + return retval; + } + + // get data + CTA_Handle_GetData(*hfunc,(void*) &data); + + // decrease reference count + data->refcount--; + + // If last reference is cleared, delete function + if (data->refcount==0){ + // Check whether to unload dll + if (data->libhandle) { +#ifdef HAVE_DLFCN_H + if (dlclose(data->libhandle)){ + sprintf(msg, "Cannot close library: %s\n",dlerror()); + CTA_WRITE_WARNING(msg); + } +#endif + } + + // Free data item + free(data); + + //Free Handle + CTA_Handle_Free(hfunc); + } else { + *hfunc=CTA_NULL; + } + return CTA_OK; +} + + +/** \brief Returns interface handle of a user defined function + * + * \param hfunc I handle of user function + * \param hintf O handle of interface + * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE + */ +#undef METHOD +#define METHOD "GetIntf" +int CTA_Func_GetIntf(const CTA_Func hfunc, CTA_Intf *hintf){ + + int retval; //Return value of a call + CTAI_Function *data; //Function specific data + + // Check Handle and return error if handle is not valid + retval=CTA_Handle_Check(hfunc,CTA_FUNCTION); + if (retval) { + CTA_WRITE_ERROR("Handle is not a cta_function handle"); + return retval; + } + + // get data-item + CTA_Handle_GetData(hfunc,(void*) &data); + + // return interface + *hintf=data->hintf; + + return CTA_OK; +} + +/** \brief Returns function pointer of a user function + * + * \param hfunc handle of user function + * \param function pointer to user defined function + * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE + */ +#undef METHOD +#define METHOD "GetFunc" +int CTA_Func_GetFunc(const CTA_Func hfunc, CTA_Function **function){ + + int retval; //Return value of a call + CTAI_Function *data; //Function specific data + + // Check Handle and return error if handle is not valid + retval=CTA_Handle_Check(hfunc,CTA_FUNCTION); + if (retval) { + CTA_WRITE_ERROR("Handle is not a cta_function handle"); + return retval; + } + // Get data-item + CTA_Handle_GetData(hfunc,(void*) &data); + + // Set function pointer + *function= (CTA_Function*) data->function; + return CTA_OK; +} + +/** \brief Returns name of a user function + * + * \param hfunc I handle of user function + * \param name O name of user function, + * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE + */ +#undef METHOD +#define METHOD "GetName" +int CTA_Func_GetName(const CTA_Func hfunc, CTA_String name){ + + int retval; //Return value of a call + + // Check Handle and return error if handle is not valid + retval=CTA_Handle_Check(hfunc,CTA_FUNCTION); + if (retval) { + CTA_WRITE_ERROR("Handle is not a cta_function handle"); + return retval; + } + + retval=CTA_Handle_GetName(hfunc,name); + return retval; +} + + +/** \brief Set userdata for a user defined function + * + * \param hfunc I handle of user function + * \param userdata I handles to userdata + * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE + */ +#undef METHOD +#define METHOD "SetUSerData" +int CTA_Func_SetUserdata(const CTA_Func hfunc, const CTA_Handle userdata ){ + + int retval; //Return value of a call + CTAI_Function *data; //Function specific data + + + // Check Handle and return error if handle is not valid + retval=CTA_Handle_Check(hfunc,CTA_FUNCTION); + if (retval) { + CTA_WRITE_ERROR("Handle is not a cta_function handle"); + return retval; + } + + // Get data-item + CTA_Handle_GetData(hfunc,(void*) &data); + + // Allocate userdata and copy values + data->userdata=userdata; + + return CTA_OK; +} +/** \brief Returns the userdata array of a user defined function + * + * \param hfunc I handle of user function + * \param userdata O handles to userdata + * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE, + * CTA_ARRAY_TOO_SHORT + */ +#undef METHOD +#define METHOD "GetUserdata" +int CTA_Func_GetUserdata(const CTA_Func hfunc, CTA_Handle userdata ){ + + int retval; //Return value of a call + CTAI_Function *data; //Function specific data + + // Check Handle and return error if handle is not valid + retval=CTA_Handle_Check(hfunc,CTA_FUNCTION); + if (retval) { + CTA_WRITE_ERROR("Handle is not a cta_function handle"); + return retval; + } + + // Get data-item + CTA_Handle_GetData(hfunc,(void*) &data); + + // Copy userdata + userdata=data->userdata; + + return CTA_OK; +} + + +/** \brief Change the name of the library into a platform specific name +* +* \param libname_in Suggested name of the library +* \param libname_out Name of library on platform +* \return Handle to create or CTA_NULL in case of an error. +*/ +void CTAI_DLLName(const char *libname_in, char *libname_out){ + + int i; + /* step1 strip of extension (if any) */ + for (i= strlen(libname_in)-1;i>=0;i--){ + if (libname_in[i]=='.') {break;} + } + if (i>0) { + strncpy(libname_out,libname_in,i); + libname_out[i]='\0'; + } + else { + strcpy(libname_out,libname_in); + } + +#ifdef WIN32 + strcat(libname_out,".dll"); +#else +#ifdef MAC_OS_X + strcat(libname_out,".dylib"); +#else + strcat(libname_out,".so"); +#endif +#endif + +} + + +/** \brief Change the name of the function into a Fortran function name* +* \param funcname I name of the function +* \param f_funcname O Fortran name of the function +* +* \note f_funcname is allocated by this function and should be freed by +* the calling routine. + +*/ +void CTAI_FunctionNameFortran(char *funcname, char **f_funcname, int mode){ + + int i, lenName, lenName2; + // Make different variants of the function name + /* 1: Name + 2: Name_ + 3: name + 4: name_ + 5: NAME + 6: NAME_ + */ + lenName=strlen(funcname); + lenName2 = lenName + 1; + if (mode == 2 || mode == 4 || mode ==6) {lenName2 = lenName + 2;} + *f_funcname=CTA_Malloc((lenName2)*sizeof(char)); + + for (i=0;ifunction = dlsym(data->libhandle, funcname); + #else + // windows + data->function = GetProcAddress(data->libhandle, funcname); + #endif + return CTA_OK; +} + + +CTA_Func CTA_CreateFuncDynamicLib(char *libraryName, char *functionName, char *name, char *id){ + CTA_Func hfunc; /* the new function */ +// xmlChar *id = NULL; /* id of function in XML-tree */ +// xmlChar *library_inp = NULL; /* name of the dynamic link library */ +// xmlChar *funcname =NULL; /* name of the function */ + char* f_funcname=NULL; /* Fortran name of funcname */ + int retval; /* return status of creation */ + char library[256]; /* name of the dynamic link library */ +#if defined WIN32 + WCHAR wLibrary[256]; + int i; +#endif + + CTAI_Function *data; //Function specific data + int nerror; + char msg[256]; + nerror=0; + + /* Add the dynamic library extention to the library name as used on this OS (so, dll, dylib,...) */ + CTAI_DLLName(libraryName, library); + + /* Create a new COSTA function instance */ + retval=CTA_Func_Create((char *) name, NULL , CTA_NULL, &hfunc); + if (retval!=CTA_OK){ + CTA_WRITE_WARNING("Cannot create a new COSTA function"); + return CTA_NULL; + } + + /* Get data block */ + CTA_Handle_GetData(hfunc,(void*) &data); + + /* WINDOWS VERSION FOR LOADING A FUNCTION FROM A SHARED LIBRARY: */ + #if defined HAVE_DLFCN_H || defined WIN32 + /* Open Dynamic link library */ + #ifdef HAVE_DLFCN_H + data->libhandle=dlopen(library, RTLD_LAZY); + #else + /* convert string wLibrary to WCHAR, not sure this is the right way. + but it seems to work */ + for (i=0;i<256;i++){wLibrary[i]=library[i];} + data->libhandle=LoadLibrary(wLibrary); + #endif + + /* Check library handle */ + if (data->libhandle==NULL){ + sprintf(msg,"Error in CTAI_XML_CreateFunc:Cannot open library %s\n",library); + CTA_WRITE_WARNING(msg); + #ifdef HAVE_DLFCN_H + sprintf(msg, "error is: |%s| \n",dlerror()); + CTA_WRITE_WARNING(msg); + #else + sprintf(msg, "GetLastError code is %d\n",GetLastError()); + CTA_WRITE_WARNING(msg); + #endif + nerror++; + } + + /* Load function and try all kinds of typical fortran variants */ + /* 1: original approach: Name */ + retval = CTAI_GetProcAddress(data, (char *) functionName); + if (data->function == NULL){ /* 2: Name_ */ + CTAI_FunctionNameFortran((char *) functionName, &f_funcname,2); + retval=CTAI_GetProcAddress(data, f_funcname); + free(f_funcname); + } + if (data->function == NULL){ /* 3: name */ + CTAI_FunctionNameFortran((char *) functionName, &f_funcname,3); + retval=CTAI_GetProcAddress(data, f_funcname); + free(f_funcname); + } + if (data->function == NULL){ /* 4: name_ */ + CTAI_FunctionNameFortran((char *) functionName, &f_funcname,4); + retval=CTAI_GetProcAddress(data, f_funcname); + free(f_funcname); + } + if (data->function == NULL){ /* 5: NAME */ + CTAI_FunctionNameFortran((char *) functionName, &f_funcname,5); + retval=CTAI_GetProcAddress(data, f_funcname); + free(f_funcname); + } + if (data->function == NULL){ /* 6: NAME_ */ + CTAI_FunctionNameFortran((char *) functionName, &f_funcname,6); + retval=CTAI_GetProcAddress(data, f_funcname); + free(f_funcname); + } + + /* Check function pointer */ + if (data->function==NULL){ + sprintf(msg, "WARNING in CTAI_XML_CreateFunc:Cannot load function %s\n",functionName); + CTA_WRITE_WARNING(msg); + #ifdef HAVE_DLFCN_H + sprintf(msg,"%s\n",dlerror()); + CTA_WRITE_WARNING(msg); + #else + sprintf(msg,"GetLastError code is %d\n",GetLastError()); + CTA_WRITE_WARNING(msg); + #endif + nerror++; + } + #else + sprintf(msg, "Warning: Cannot load functions dynamically\n"); + CTA_WRITE_WARNING(msg); + sprintf(msg, "Compile COSTA with support for Dynamic loading \n"); + CTA_WRITE_WARNING(msg); + nerror++; + #endif + + + //finaly Free created function handle in case of an error or set the id of the handle + if (nerror>0) { + retval=CTA_Func_Free(&hfunc); + } else { + /* Set id (=name) of handle */ + CTAI_Handle_SetName(hfunc, (char *) id); + } + + return hfunc; +} + + +/** \brief Create a COSTA function from XML +* (load from dynamic load library). +* +* \param cur_node I Current XML node +* \return Handle to create or CTA_NULL in case of an error. +*/ +CTA_Func CTAI_XML_CreateFunc(xmlNode *cur_node) { + + CTA_Func hfunc; /* the new function */ + xmlChar *id = NULL; /* id of function in XML-tree */ + xmlChar *name = NULL; /* (lookup) name of the function */ + xmlChar *libraryName = NULL; /* name of the dynamic link library */ + xmlChar *functionName =NULL; /* name of the function */ + + /* Parse this node's attributes */ + /* Get id */ + id = xmlGetProp(cur_node, CTAI_XML_ID); + + /* Get name */ + name = xmlGetProp(cur_node, CTAI_XML_NAME); + + /* Get tag */ + libraryName = xmlGetProp(cur_node, CTAI_XML_LIBRARY); + + /* Get function name */ + functionName = xmlGetProp(cur_node, CTAI_XML_FUNCTION); + + hfunc=CTA_CreateFuncDynamicLib((char*) libraryName, (char*) functionName, (char*) name, (char*) id); + + return hfunc; +} + +/* Interfacing with Fortran */ + +CTAEXPORT void CTA_FUNC_CREATE_F77(char *name, void *function , int *hintf, int *hfunc, + int *ierr, int len_name){ + char *c_name; + // create a c-string equivalent to name + c_name=CTA_Malloc((len_name+1)*sizeof(char)); + CTA_fstr2cstr(name,c_name,len_name); + + *ierr=CTA_Func_Create(c_name, (CTA_Function*) function , (CTA_Intf) *hintf, + (CTA_Func *) hfunc); + free(c_name); +} + + +CTAEXPORT void CTA_FUNC_FREE_F77(CTA_Func *hfunc, int* ierr){ + + *ierr=CTA_Func_Free((CTA_Func*) hfunc); +} + +CTAEXPORT void CTA_FUNC_GETINTF_F77(int *hfunc, int *hintf, int *ierr){ + + *ierr=CTA_Func_GetIntf((CTA_Func) *hfunc, (CTA_Intf*) hintf); +}; + +// It is not possible to create a Fortran interface for CTA_Func_GetFunc +//int CTA_Func_GetFunc(const CTA_Func hfunc, CTA_Function **function); + + +CTAEXPORT void CTA_FUNC_GETNAME_F77(int *hfunc, int *hname, int *ierr){ +/* + char *c_name; //C-string copy of name + c_name=CTA_Malloc((len_name+1)*sizeof(char)); + *ierr=CTA_Func_GetName((CTA_Func) *hfunc, c_name); + CTA_cstr2fstr(c_name,name,len_name); + free(c_name); +*/ + *ierr=CTA_Func_GetName((CTA_Func) *hfunc, (CTA_String) *hname); +}; + diff --git a/costa/native/cta/src/cta_handles.c b/costa/native/cta/src/cta_handles.c new file mode 100644 index 000000000..f27b958c1 --- /dev/null +++ b/costa/native/cta/src/cta_handles.c @@ -0,0 +1,975 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_handles.c $ +$Revision: 3406 $, $Date: 2012-08-16 15:25:53 +0200 (Thu, 16 Aug 2012) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +#include +#include +#include + +#include "cta_mem.h" +#include "f_cta_utils.h" +#include "cta_string.h" +#include "cta_handles.h" +#include "cta_errors.h" +#include "cta_message.h" + +#define CTA_HANDLE_CREATE_F77 F77_CALL(cta_handle_create,CTA_HANDLE_CREATE) +#define CTA_HANDLE_FREE_F77 F77_CALL(cta_handle_free,CTA_HANDLE_FREE) +#define CTA_HANDLE_CHECK_F77 F77_CALL(cta_handle_check,CTA_HANDLE_CHECK) +#define CTA_HANDLE_GETNAME_F77 F77_CALL(cta_handle_getname,CTA_HANDLE_GETNAME) +#define CTA_HANDLE_GETVALUE_F77 F77_CALL(cta_handle_getvalue,CTA_HANDLE_GETVALUE) +#define CTA_HANDLE_GETDATATYPE_F77 F77_CALL(cta_handle_getdatatype,CTA_HANDLE_GETDATATYPE) +#define CTA_HANDLE_FIND_F77 F77_CALL(cta_handle_find,CTA_HANDLE_FIND) +#define CTA_HANDLE_PRINTALL_F77 F77_CALL(cta_handle_printall,CTA_HANDLE_PRINTALL) +#define CTA_HANDLE_PRINTINFO_F77 F77_CALL(cta_handle_printinfo,CTA_HANDLE_PRINTINFO) + +#define CLASSNAME "CTA_Handles" + +#define IDEBUG (1) + +int counter =0; +static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; + +CTAEXPORT extern int CTA_PAR_MY_RANK; + + +long CTAI_Vector_GetMemsize(); + + + + +/* Struct holding all data associated to an COSTA Handle */ +typedef struct { + char *name; // Name of handle for debugging and informational messages + CTA_Datatype datatype; // Data type associated with this handle + void *data; // Pointer to the type dependent data + int refCount; // Number of references from which we expect a free + // (only used for a limited number of components used from java) + int globCount; //DEBUG + char debugTag[80]; +} CTAI_HandleInfo; + +/* + Handle administration: list of all COSTA handles and their data. + The array of pointers to CTAI_Handles contains all data. The COSTA handles + are the indices in this array. + + Finding our way in the administration: + The size information is the most important and holds the current length of + CTA_Handles. The other two CTAI_Handles_free, holding the number of free + elements and CTAI_Handles_last are only usefull for performance. + CTAI_Handles_free is used to easy check whether the array CTAI_HandleInfo must + be enlarged when a new handle is to be created. + CTAI_Handles_last points to the postion where the last element is added or one + position before the last handle that has been freed. It is the starting position + for finding a new free position in CTAI_Handles. + +*/ +static CTAI_HandleInfo **CTAI_Handles=NULL; //List of handles +static int CTAI_Handles_size=0; // Size of list +static int CTAI_Handles_free=0; // Number of free handles +static int CTAI_Handles_last=0; // Position of last allocated handle handle + // or just before last freed handle + + +/** \brief Creates a new COSTA handle + * + * \param name I name associated to handle + * \param datatype I datatype of handle + * \param data I block of data associated to handle + * \param handle O COSTA handle + * \return error status: CTA_OK + */ + +#undef METHOD +#define METHOD "Create" +int CTA_Handle_Create(const char *name, const CTA_Datatype datatype, + void *data, CTA_Handle *handle){ + + int i; //index counter + + // Initialisation + *handle=CTA_NULL; + + pthread_mutex_lock( &mutex ); + + // Some additional debug stuff when we expect a memleak + if (counter%10000 ==0){ + pthread_mutex_unlock( &mutex ); + // CTA_Handle_PrintInfo("CTA_handle_Create "); + pthread_mutex_lock( &mutex ); + } + counter++; + + // Find a new handle: + if (CTAI_Handles_size == 0) + { + // IF (list does not exist yet) THEN + // Create a list + int newlen=1000; + //int newlen=100000; + CTAI_Handles=CTA_Malloc(newlen*sizeof(CTA_Handle*)); + + // Initialise list with null-pointers + for (i=0;i extent list and choose first new handle + int oldlen=CTAI_Handles_size; + //int newlen=oldlen*2; + int newlen=oldlen*2; + + + CTA_Handle **Handles_new=realloc(CTAI_Handles,newlen*sizeof(CTA_Handle*)); + if (Handles_new==NULL){ + CTA_WRITE_ERROR("FATAL Cannot allocate new memory to store handels"); + exit(-1); + } + else { + CTAI_Handles = Handles_new; + } + + // Initialise reallocated part + for (i=oldlen;iname = CTA_Malloc(1 + strlen(name)); + strcpy(CTAI_Handles[*handle]->name, name); + CTAI_Handles[*handle]->name[strlen(name)] = '\0'; + } else { + CTAI_Handles[*handle]->name = CTA_Malloc(1); + CTAI_Handles[*handle]->name[0] = '\0'; + } + CTAI_Handles[*handle]->datatype=datatype; + CTAI_Handles[*handle]->data=data; + CTAI_Handles[*handle]->refCount=1; + + pthread_mutex_unlock( &mutex ); + + return CTA_OK; +} + + +#undef METHOD +#define METHOD "GetRefCount" +int CTA_Handle_GetRefCount(const CTA_Handle handle, int *refCount){ + + pthread_mutex_lock( &mutex ); + + if (handle>=0 && handlerefCount; + // else, function handle is not used ->return CTA_ILLEGAL_HANDLE + } + else { + char message[1024]; + pthread_mutex_unlock( &mutex ); + sprintf(message,"Handle nr %d is NOT in list of %d handles",handle,CTAI_Handles_size); + CTA_WRITE_ERROR(message); + + + return CTA_ILLEGAL_HANDLE; + } + + + pthread_mutex_unlock( &mutex ); + + return CTA_OK; +} + +#undef METHOD +#define METHOD "IncRefCount" +int CTA_Handle_IncRefCount(const CTA_Handle handle){ + + pthread_mutex_lock( &mutex ); + + if (handle>=0 && handlerefCount++; + + // else, function handle is not used ->return CTA_ILLEGAL_HANDLE + } + else { + + char message[1024]; + pthread_mutex_unlock( &mutex ); + sprintf(message,"Handle nr %d is not in list of %d handles",handle,CTAI_Handles_size); + CTA_WRITE_ERROR(message); + + + return CTA_ILLEGAL_HANDLE; + } + pthread_mutex_unlock( &mutex ); + + return CTA_OK; +} + +#undef METHOD +#define METHOD "DecrRefCount" +int CTA_Handle_DecrRefCount(const CTA_Handle handle){ + + pthread_mutex_lock( &mutex ); + + if (handle>=0 && handlerefCount--; + // else, function handle is not used ->return CTA_ILLEGAL_HANDLE + } + else { + + char message[1024]; + pthread_mutex_unlock( &mutex ); + + sprintf(message,"Handle nr %d is not in list of %d handles",handle,CTAI_Handles_size); + CTA_WRITE_ERROR(message); + + + return CTA_ILLEGAL_HANDLE; + } + + pthread_mutex_unlock( &mutex ); + + return CTA_OK; +} + + +/** \brief Checks whether a handle is valid and checks type + * + * \note The handle CTA_NULL is not valid. + * + * \param handle I COSTA handle + * \param datatype I The datatype the handle must be compatible with + * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE + * + */ + +#undef METHOD +#define METHOD "Check" +int CTA_Handle_Check(const CTA_Handle handle, + const CTA_Datatype datatype){ + + pthread_mutex_lock( &mutex ); + + if (handle>=0 && handledatatype!=datatype){ + // datatype does not correspond -> return CTA_INCOMPATIBLE_HANDLE + + char message[1024]; + pthread_mutex_unlock( &mutex ); + sprintf(message,"Handle is of type %d instead of %d .\nSee cta_datatypes.h for more information.",CTAI_Handles[handle]->datatype, datatype); + CTA_WRITE_ERROR(message); + + + return CTA_INCOMPATIBLE_HANDLE; + } + // else, function handle is not used ->return CTA_ILLEGAL_HANDLE + }else{ + + char message[1024]; + pthread_mutex_unlock( &mutex ); + + sprintf(message,"Handle nr %d is not in list of %d handles",handle,CTAI_Handles_size); + CTA_WRITE_ERROR(message); + return CTA_ILLEGAL_HANDLE; + } + + pthread_mutex_unlock( &mutex ); + + return CTA_OK; +} + +/** \brief Frees a COSTA handle + * + * \note The data part of the handle is NOT freed, freeing CTA_NULL is + * + * \param handle I/O handle that must be freed, CTA_NULL on return. + * allowed + * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE + */ +#undef METHOD +#define METHOD "Free" +int CTA_Handle_Free(CTA_Handle *handle){ + + // If handle is CTA_FUNC_NULL -> nothing to be done + if (*handle==CTA_NULL) return CTA_OK; + // if handle is not ok -> return CTA_ILLEGAL_HANDLE + if (CTA_OK != CTA_Handle_Check(*handle,CTA_HANDLE)) { + CTA_WRITE_ERROR("Handle is not a cta_handle handle"); + return CTA_ILLEGAL_HANDLE; + } + + pthread_mutex_lock( &mutex ); + + // Free associated data + free(CTAI_Handles[*handle]->name); + free(CTAI_Handles[*handle]); + CTAI_Handles[*handle]=NULL; + // Adjust pointer administration + CTAI_Handles_free++; + CTAI_Handles_last=*handle-1; + // Set value of hfunc + *handle=CTA_NULL; + + pthread_mutex_unlock( &mutex ); + + return CTA_OK; +} + + +/** \brief Returns pointer to data element of handle + * + * \param handle I COSTA handle + * \param data O pointer to data element + * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE + */ +#undef METHOD +#define METHOD "GetData" +int CTA_Handle_GetData(const CTA_Handle handle, void **data){ + + // if handle is not ok -> return CTA_ILLEGAL_HANDLE + if (CTA_OK != CTA_Handle_Check(handle,CTA_HANDLE)) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return CTA_ILLEGAL_HANDLE; + } + + pthread_mutex_lock( &mutex ); + + // return data + *data=CTAI_Handles[handle]->data; + + pthread_mutex_unlock( &mutex ); + + return CTA_OK; +} + + +/** \brief Returns data associated with handle (INTERNAL USE ONLY) + * + * \param handle I COSTA handle + * \return handle name + */ +const void *CTAI_Handle_GetData(const CTA_Handle handle){ + void *data; + + pthread_mutex_unlock( &mutex ); + + data = CTAI_Handles[handle]->data; + + pthread_mutex_unlock( &mutex ); + + return data; +} + + +/** \brief Returns name associated with handle + * + * \param handle I COSTA handle + * \param name O name + * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE + */ +#undef METHOD +#define METHOD "GetName" +int CTA_Handle_GetName(const CTA_Handle handle, CTA_String hname){ + int retVal; + char *name; + + // if handle is not ok -> return CTA_ILLEGAL_HANDLE + if (CTA_OK != CTA_Handle_Check(handle,CTA_HANDLE)) { + CTA_WRITE_ERROR("Handle is not a cta_handle handle"); + return CTA_ILLEGAL_HANDLE; + } + + if (CTA_OK != CTA_Handle_Check(hname,CTA_STRING)){ + CTA_WRITE_ERROR("Handle is not a cta_string handle"); + return CTA_ILLEGAL_HANDLE; + } + + // return name + + pthread_mutex_lock( &mutex ); + name = CTAI_Handles[handle]->name; + pthread_mutex_unlock( &mutex ); + + retVal=CTA_String_Set(hname, name); + + + return retVal; +} + +/** \brief Set name associated with handle (INTERNAL ONLY) + * + * \param handle I COSTA handle + * \param hname I COSTA string + * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE + */ +int CTAI_Handle_SetName(const CTA_Handle handle, const char *name) +{ + pthread_mutex_lock( &mutex ); + + if (strcmp(CTAI_Handles[handle]->name,name)!=0){ + CTAI_Handles[handle]->name = realloc(CTAI_Handles[handle]->name, 1 + strlen(name)); + strcpy(CTAI_Handles[handle]->name, name); + } + + pthread_mutex_unlock( &mutex ); + + return CTA_OK; +} + + +/** \brief Returns name associated with handle (INTERNAL USE ONLY) + * + * \param handle I COSTA handle + * \return handle name + */ +const char *CTAI_Handle_GetName(const CTA_Handle handle){ + char *retStr; + + pthread_mutex_lock( &mutex ); + + retStr = CTAI_Handles[handle]->name; + + pthread_mutex_unlock( &mutex ); + + return retStr; +} + + +/** \brief Returns datatype associated with handle + * + * \param handle I COSTA handle + * \param datatype O name of data type + * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE + */ +#undef METHOD +#define METHOD "CGetDatatype" +int CTA_Handle_GetDatatype(const CTA_Handle handle, CTA_Datatype *datatype){ + + + // if handle is not ok -> return CTA_ILLEGAL_HANDLE + if (CTA_OK != CTA_Handle_Check(handle,CTA_HANDLE)) { + CTA_WRITE_ERROR("Handle is not a cta_handle handle"); + + + return CTA_ILLEGAL_HANDLE; + } + pthread_mutex_lock( &mutex ); + + // return datatype + *datatype=CTAI_Handles[handle]->datatype; + + pthread_mutex_unlock( &mutex ); + + return CTA_OK; +} + + +/** \brief Returns datatype associated with handle + * + * \param handle I COSTA handle + * \return name of data type, + */ +CTA_Datatype CTAI_Handle_GetDatatype(const CTA_Handle handle){ + + CTA_Datatype retVal; + + pthread_mutex_lock( &mutex ); + retVal = CTAI_Handles[handle]->datatype; + + + pthread_mutex_unlock( &mutex ); + + return retVal; +} + + +/** \brief Frees a COSTA handle and its associated data + * + * \note The data part of the handle is also freed + * + * \param handle I/O handle that must be freed, CTA_NULL on return. + * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE + */ +int CTA_Intf_Free(CTA_Handle *handle); +int CTA_Func_Free(CTA_Handle *handle); +int CTA_Vector_Free(CTA_Handle *handle); +int CTA_TreeVector_Free(CTA_Handle *handle, int recursive); +int CTA_Matrix_Free(CTA_Handle *handle); +int CTA_Model_Free(CTA_Handle *handle); +int CTA_SObs_Free(CTA_Handle *handle); +int CTA_Tree_Free(CTA_Handle *handle); +int CTA_String_Free(CTA_Handle *handle); +int CTA_File_Free(CTA_Handle *handle); +int CTA_Time_Free(CTA_Handle *handle); +int CTA_ObsDescr_Free(CTA_Handle *handle); +int CTA_Pack_Free(CTA_Handle *handle); +int CTA_Metainfo_Free(CTA_Handle *handle); +int CTA_RelTable_Free(CTA_Handle *handle); +int CTA_Array_Free(CTA_Handle *handle); + +int CTA_Handle_Free_All(CTA_Handle *handle){ + + /* If handle is CTA_FUNC_NULL -> nothing to be done */ + if (*handle==CTA_NULL) return CTA_OK; + + switch (CTAI_Handles[*handle]->datatype){ + case CTA_HANDLE : + if (IDEBUG>0) printf("Delete CTA_HANDLE\n"); + return CTA_Handle_Free(handle); + case CTA_INTERFACE : + if (IDEBUG>0) printf("Delete CTA_INTERFACE\n"); + return CTA_Intf_Free(handle); + case CTA_FUNCTION : + if (IDEBUG>0) printf("Delete CTA_FUNCTION\n"); + return CTA_Func_Free(handle); + case CTA_VECTOR : + if (IDEBUG>0) printf("Delete CTA_VECTOR\n"); + return CTA_Vector_Free(handle); + case CTA_VECTORCLASS : + if (IDEBUG>0) { + printf("Delete CTA_VECTORCLASS\n"); + printf("WARNING: Cannot delete CTA_VECTORCLASS\n"); + } + return CTA_Handle_Free(handle); + case CTA_TREEVECTOR : + if (IDEBUG>0) printf("Delete CTA_TREEVECTOR\n"); + return CTA_TreeVector_Free(handle,CTA_TRUE); + case CTA_MATRIXCLASS : + if (IDEBUG>0) { + printf("Delete CTA_MAXTRIXCLASS\n"); + printf("WARNING: Cannot delete CTA_MATRIXCLASS\n"); + } + return CTA_Handle_Free(handle); + case CTA_MATRIX : + if (IDEBUG>0) printf("Delete CTA_MATRIX\n"); + return CTA_Matrix_Free(handle); + case CTA_COVMATCLASS : + if (IDEBUG>0) { + printf("Delete CTA_COVMATCLASS\n"); + printf("WARNING: Cannot delete CTA_COVMATCLASS\n"); + } + return CTA_Handle_Free(handle); + case CTA_COVMAT : + if (IDEBUG>0) { + printf("Delete CTA_COVMAT\n"); + printf("WARNING: Cannot delete CTA_COVMAT\n"); + } + return CTA_OK; + // return CTA_Handle_Free(handle); + case CTA_INTPOL : + if (IDEBUG>0) { + printf("Delete CTA_INTPOL\n"); + printf("WARNING: Cannot delete CTA_INTPOL\n"); + } + return CTA_OK; + // return CTA_Handle_Free(handle); + case CTA_OBS : + if (IDEBUG>0) { + printf("Delete CTA_OBS\n"); + printf("WARNING: Cannot delete CTA_OBS\n"); + } + return CTA_OK; + case CTA_MODELCLASS : + if (IDEBUG>0) { + printf("Delete CTA_MODELCLASS\n"); + printf("WARNING: Cannot delete CTA_MODELCLASS\n"); + } + return CTA_OK; + // return CTA_Handle_Free(handle); + case CTA_MODEL : + if (IDEBUG>0) printf("Delete CTA_MODEL\n"); + return CTA_Model_Free(handle); + case CTA_TIME : + if (IDEBUG>0) printf("Delete CTA_TIME\n"); + return CTA_Time_Free(handle); + case CTA_SOBS : + if (IDEBUG>0) printf("Delete CTA_SOBS\n"); + return CTA_SObs_Free(handle); + case CTA_SOBSCLASS : + if (IDEBUG>0) { + printf("Delete CTA_SOBCLASS\n"); + printf("WARNING: Cannot delete CTA_OBSCLASS\n"); + } + return CTA_OK; + case CTA_OBSDESCR : + if (IDEBUG>0) printf("Delete CTA_OBSDESCR\n"); + return CTA_ObsDescr_Free(handle); + case CTA_METHODCLASS : + if (IDEBUG>0) { + printf("Delete CTA_METHODCLASS\n"); + printf("WARNING: Cannot delete CTA_METHODCLASS\n"); + } + return CTA_OK; + case CTA_METHOD : + if (IDEBUG>0) { + printf("Delete CTA_METHOD\n"); + printf("WARNING: Cannot delete CTA_METHOD\n"); + } + return CTA_OK; + case CTA_TREE : + return CTA_Tree_Free(handle); + case CTA_PACK : + if (IDEBUG>0) printf("Delete CTA_PACK\n"); + return CTA_Pack_Free(handle); + case CTA_DATABLOCK : + if (IDEBUG>0) { + printf("Delete CTA_DATABLOCK\n"); + printf("WARNING: Cannot delete CTA_DATABLOCK\n"); + } + return CTA_OK; + case CTA_METAINFO : + if (IDEBUG>0) printf("Delete CTA_METAINFO\n"); + return CTA_Metainfo_Free(handle); + case CTA_METAINFOCLASS : + if (IDEBUG>0) { + printf("Delete CTA_METAINFOCLASS\n"); + printf("WARNING: Cannot delete CTA_METAINFOCLASS\n"); + } + return CTA_OK; + case CTA_RELTABLE : + if (IDEBUG>0) printf("Delete CTA_RELTABLE\n"); + return CTA_RelTable_Free(handle); + case CTA_SUBTREEVECTOR : + if (IDEBUG>0) { + printf("Delete CTA_SUBTREEVECTOR\n"); + printf("WARNING: Cannot delete CTA_SUBTREEVECTOR\n"); + } + return CTA_OK; + case CTA_STRING : + if (IDEBUG>0) printf("Delete CTA_STRING\n"); + return CTA_String_Free(handle); + case CTA_FILE : + if (IDEBUG>0) printf("Delete CTA_FILE\n"); + return CTA_File_Free(handle); + case CTA_ARRAY : + if (IDEBUG>0) printf("Delete CTA_ARRAY\n"); + return CTA_Array_Free(handle); + default: + printf("INTERNAL ERROR IN CTA_Handle_Free_All\n"); + printf("WARNING: Cannot delete handle of datatype %d\n", + CTAI_Handles[*handle]->datatype); + exit(-1); + } +} + + + + +/** \brief Returns the value a handle points to + * + * \param handle I COSTA handle + * \param data O pointer to data element + * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE + */ +int CTA_Handle_GetValue(const CTA_Handle handle, void *value, CTA_Datatype datatype) +{ + CTA_Datatype mytype; + + /* If handle is CTA_FUNC_NULL -> nothing to be done */ + if (handle==CTA_NULL) return CTA_OK; + + mytype = CTAI_Handle_GetDatatype(handle); + + /* Return the handle value in case of identical types or CTA_HANDLE */ + if (mytype == datatype || datatype == CTA_HANDLE) { + *(CTA_Handle*)value = handle; + return CTA_OK; + } + + /* In case of string, attempt to convert */ + if (mytype == CTA_STRING) { + return CTA_String_GetValue(handle, value, datatype); + } + + /* Otherwise, return CTA_INCOMPATIBLE_HANDLE */ + return CTA_INCOMPATIBLE_HANDLE; +} + +#undef METHOD +#define METHOD "Find" +int CTA_Handle_Find(CTA_String sname, CTA_Datatype datatype, int *handlenr) +{ + int i,retval, len; + char *sstr; + + + /* Copy the input string into a C-string */ + retval = CTA_String_GetLength(sname,&len); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get Length"); + return retval; + } + sstr=CTA_Malloc((len+1)*sizeof(char)); + retval = CTA_String_Get(sname,sstr); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get String"); + + free(sstr); + return retval; + } + + + pthread_mutex_lock( &mutex ); + + for (i=0; i < CTAI_Handles_size;i++){ + + if (CTAI_Handles[i] != NULL) { + + if (!strncmp(CTAI_Handles[i]->name,sstr,len)) { + + //printf("gevonden! %d \n",i); + *handlenr = i; + + pthread_mutex_unlock( &mutex ); + + free(sstr); + return CTA_OK; } + + } + } + + + pthread_mutex_unlock( &mutex ); + + free(sstr); + return CTA_HANDLE_NOT_FOUND; +} + + +int CTA_Handle_Printall() +{ + int i,ierr; + char *sstr; + int len; + + pthread_mutex_lock( &mutex ); + + for (i=0; i < CTAI_Handles_size;i++) + { + if (CTAI_Handles[i] != NULL) + { + printf("Name of handle %d is '%s'", i, CTAI_Handles[i]->name); + + if (CTAI_Handles[i]->datatype == CTA_STRING) { + + pthread_mutex_unlock( &mutex ); + ierr = CTA_String_GetLength(i,&len); + pthread_mutex_lock( &mutex ); + + sstr=CTA_Malloc((len+1)*sizeof(char)); + + pthread_mutex_unlock( &mutex ); + + ierr=CTA_String_Get(i, sstr); + + pthread_mutex_lock( &mutex ); + + printf(": %s",sstr); + } + printf("\n"); + } + } + pthread_mutex_unlock( &mutex ); + return ierr; +} + +int CTA_Handle_PrintInfo(const char *location) +{ + int i; + int ntotal; + int ncount[32]; + int nrefcount[32]; + int datatype; + long memvec; + float fmemvec; + + pthread_mutex_lock( &mutex ); + + memvec=CTAI_Vector_GetMemsize(); + fmemvec=(float) memvec/(1024.0*1024.0); + + /* initialise counting variables */ + ntotal=0; + for (i=0;i<32;i++){ncount[i]=0; nrefcount[i]=0;} + + for (i=0; i < CTAI_Handles_size;i++) + { + if (CTAI_Handles[i] != NULL) + { + ntotal++; + datatype=-CTAI_Handles[i]->datatype; + if ( datatype<0 || datatype >31 ){ + printf("ERROR HANDLE %d has datatype %d",i ,-datatype); + } + else { + ncount[datatype]++; + nrefcount[datatype] = nrefcount[datatype] + CTAI_Handles[i]->refCount; + } + } + else{ + ncount[31]++; + } + } + fflush(stdout); + printf("#%d CTA_Handle_PrintInfo: NOTE: SPECIAL VERSION OF CTA_HANDLES.c:\n", CTA_PAR_MY_RANK); + printf("#%d CTA_Handle_PrintInfo: Overview of handles:\n", CTA_PAR_MY_RANK); + printf("#%d location=%s\n", CTA_PAR_MY_RANK,location); + printf("#%d #handles including empty slots: %d\n", CTA_PAR_MY_RANK,CTAI_Handles_size); + printf("#%d empty slots : %d \n", CTA_PAR_MY_RANK,ncount[31]); + printf("#%d type number of handles number of refcounts\n", CTA_PAR_MY_RANK ); + printf("#%d CTA_HANDLE : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_HANDLE]); + printf("#%d CTA_INTERFACE : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_INTERFACE ]); + printf("#%d CTA_FUNCTION : %d %d\n", CTA_PAR_MY_RANK,ncount[-CTA_FUNCTION ], nrefcount[-CTA_FUNCTION ]); + printf("#%d CTA_VECTOR : %d %d %f Mb\n", CTA_PAR_MY_RANK,ncount[-CTA_VECTOR ], nrefcount[-CTA_VECTOR ], fmemvec); + printf("#%d CTA_VECTORCLASS : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_VECTORCLASS ]); + printf("#%d CTA_TREEVECTOR : %d %d\n", CTA_PAR_MY_RANK,ncount[-CTA_TREEVECTOR], nrefcount[-CTA_TREEVECTOR ]); + printf("#%d CTA_MATRIXCLASS : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_MATRIXCLASS ]); + printf("#%d CTA_MATRIX : %d %d\n", CTA_PAR_MY_RANK,ncount[-CTA_MATRIX ], nrefcount[-CTA_MATRIX ]); + printf("#%d CTA_COVMATCLASS : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_COVMATCLASS ]); + printf("#%d CTA_COVMAT : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_COVMAT ]); + printf("#%d CTA_INTPOL : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_INTPOL ]); + printf("#%d CTA_OBS : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_OBS ]); + printf("#%d CTA_MODELCLASS : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_MODELCLASS ]); + printf("#%d CTA_MODEL : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_MODEL ]); + printf("#%d CTA_TIME : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_TIME ]); + printf("#%d CTA_SOBS : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_SOBS ]); + printf("#%d CTA_SOBSCLASS : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_SOBSCLASS ]); + printf("#%d CTA_OBSDESCR : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_OBSDESCR ]); + printf("#%d CTA_METHODCLASS : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_METHODCLASS ]); + printf("#%d CTA_METHOD : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_METHOD ]); + printf("#%d CTA_TREE : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_TREE ]); + printf("#%d CTA_PACK : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_PACK ]); + printf("#%d CTA_DATABLOCK : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_DATABLOCK ]); + printf("#%d CTA_METAINFO : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_METAINFO ]); + printf("#%d CTA_METAINFOCLASS : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_METAINFOCLASS]); + printf("#%d CTA_RELTABLE : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_RELTABLE ]); + printf("#%d CTA_SUBTREEVECTOR : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_SUBTREEVECTOR]); + printf("#%d CTA_STRING : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_STRING ]); + printf("#%d CTA_FILE : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_FILE ]); + printf("#%d -----+\n", CTA_PAR_MY_RANK); + printf("#%d Total : %d \n", CTA_PAR_MY_RANK,ntotal); + fflush(stdout); + + pthread_mutex_unlock( &mutex ); + + return CTA_OK; +} + + + + +/* Interfacing with Fortran */ +CTAEXPORT void CTA_HANDLE_CREATE_F77(char *name, int *datatype, int *data, + int *handle, int *ierr, int len_name){ + char *c_name; + // create a c-string equivalent to name + c_name=CTA_Malloc((len_name+1)*sizeof(char)); + CTA_fstr2cstr(name,c_name,len_name); + + *ierr=CTA_Handle_Create(c_name, (CTA_Datatype) *datatype, (void *) data, + (CTA_Handle*) handle); + + free(c_name); +} + +CTAEXPORT void CTA_HANDLE_FREE_F77(int *handle, int *ierr){ + + *ierr=CTA_Handle_Free((CTA_Handle *) handle); +} + +CTAEXPORT void CTA_HANDLE_FREE_ALLF77(int *handle, int *ierr){ + + *ierr=CTA_Handle_Free_All((CTA_Handle *) handle); +} + +CTAEXPORT void CTA_HANDLE_CHECK_F77(int *handle, int *datatype, int *ierr){ + + *ierr=CTA_Handle_Check((CTA_Handle) *handle, (CTA_Datatype) *datatype); +} + + +// Note it is not possible to handle a CTA_Handle_GetData operation +// In fortran due to the lack of poiters +// void CTA_Handle_GetData(int *handle, void **data ,int *ierr); + + +CTAEXPORT void CTA_HANDLE_GETNAME_F77(int *handle, int *name, int*ierr){ +/* + char * c_name; //C-string copy of name + c_name=CTA_Malloc((len_name+1)*sizeof(char)); + *ierr=CTA_Handle_GetName((CTA_Handle) *handle, name); + CTA_cstr2fstr(c_name,name,len_name); + free(c_name); +*/ + *ierr=CTA_Handle_GetName((CTA_Handle) *handle, (CTA_String)*name); +} + +/* Try to get the value from the given handle */ +CTAEXPORT void CTA_HANDLE_GETVALUE_F77(int *handle, void *value, int *datatype, int*ierr){ + *ierr=CTA_Handle_GetValue((CTA_Handle) *handle, value, *datatype); +} + +/* Try to get the value from the given handle */ +CTAEXPORT void CTA_HANDLE_GETDATATYPE_F77(int *handle, int *datatype, int*ierr){ + *ierr=CTA_Handle_GetDatatype((CTA_Handle) *handle, datatype); +} + + +CTAEXPORT void CTA_HANDLE_FIND_F77(int *sname, int *datatype, int *handlenr, + int *ierr){ + *ierr=CTA_Handle_Find((CTA_String) *sname, (CTA_Datatype) *datatype, + handlenr); +} + +CTAEXPORT void CTA_HANDLE_PRINTALL_F77(int*ierr){ + *ierr=CTA_Handle_Printall(); +} + +CTAEXPORT void CTA_HANDLE_PRINTINFO_F77(char *location, int*ierr, int len_str){ + char *c_str; + + c_str=CTA_Malloc((len_str+1)*sizeof(char)); + CTA_fstr2cstr(location,c_str,len_str); + + *ierr=CTA_Handle_PrintInfo(c_str); + free(c_str); +} diff --git a/costa/native/cta/src/cta_initialise.c b/costa/native/cta/src/cta_initialise.c new file mode 100644 index 000000000..11cd4e28f --- /dev/null +++ b/costa/native/cta/src/cta_initialise.c @@ -0,0 +1,144 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_initialise.c $ +$Revision: 4055 $, $Date: 2013-07-03 10:16:57 +0200 (Wed, 03 Jul 2013) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include +#include "f_cta_utils.h" +#include "cta.h" +#include "cta_obsdescr_table.h" +#include "cta_modbuild_b3b.h" +#include "cta_sobs_netcdf.h" +#include "cta_sobs_combine.h" +#include "cta_sobs_sqlite3.h" +#include "cta_message.h" + +#define CLASSNAME "CTA_Core" +#define CTA_INITIALISE_F77 F77_CALL(cta_core_initialise,CTA_CORE_INITIALISE) +#define CTA_FINALISE_F77 F77_CALL(cta_core_finalise,CTA_CORE_FINALISE) +#define CTA_SOBS_MAORI_INITIALISE_F77 F77_CALL(cta_sobs_maori_initialise, CTA_SOBS_MAORI_INITIALISE) + +#define IDEBUG (0) + +int is_initialised=0; + +#define METHOD "Initialise" +int CTA_Core_Initialise() +{ + + //Fixes a very very nasty bug that results in non working atof (string -> double) conversions + setlocale(LC_ALL, "C"); + + CTA_INITIAL_RANDOM_SEED=16111970; //Bas ;-) +// CTA_INITIAL_RANDOM_SEED=2101975; //Nils ;-) + + // Default library name for user routines + strcpy(userDefaultDynamicLibrary,"libuseropenda"); + + char message[64]; + sprintf(message,"OpenDA Native initialize: Initial random seed %ld\n",CTA_INITIAL_RANDOM_SEED); + CTA_WRITE_INFO(message); + + CTA_rand_seed((long) CTA_INITIAL_RANDOM_SEED); + if (! is_initialised) + { + is_initialised=1; + CTA_File_Create(&CTA_FILE_STDOUT); + CTA_File_Set(CTA_FILE_STDOUT, stdout); + CTA_SObs_sqlite3_initialise(&CTA_DEFAULT_SOBS); + + CTA_SObs_netcdf_initialise(&CTA_NETCDF_SOBS); + + CTA_SObs_combine_initialise(&CTA_COMBINE_SOBS); + + CTA_ObsDescr_table_initialise(&CTA_OBSDESCR_TABLE); + + CTA_Vector_blas_initialise (&CTA_DEFAULT_VECTOR); + + CTA_Matrix_blas_initialise (&CTA_DEFAULT_MATRIX); + CTA_Modbuild_sp_CreateClass(&CTA_MODBUILD_SP); + + CTA_Func_Create("CTAI_Treevector_Operation_ScaledRMS", + &CTAI_Treevector_Operation_ScaledRMS, CTA_NULL, + &CTA_OP_ROOT_RMS); + + CTA_Func_Create("CTAI_Treevector_Operation_Amax", + &CTAI_Treevector_Operation_Amax, CTA_NULL, + &CTA_OP_ROOT_AMAX); + + CTA_Func_Create("CTAI_Treevector_Operation_PrintEntry", + &CTAI_Treevector_Operation_PrintEntry, CTA_NULL, + &CTA_OP_ROOT_PRINTI); + + CTA_Func_Create("CTAI_Treevector_Operation_ScaledSSQ", + &CTAI_Treevector_Operation_ScaledSSQ, CTA_NULL, + &CTA_OP_ROOT_SSQ); + + CTA_Message_Quiet(1); + // Additional observers: + // 1) MAORI + CTA_SObs_maori_initialize(&CTA_MAORI_SOBS); + // 2) USER + CTA_SObs_user_initialize(&CTA_USER_SOBS); + CTA_Message_Quiet(0); + + CTA_MODBUILD_PAR=CTA_NULL; + + if (IDEBUG) { + printf("cta_initialise :CTA_MODBUILD_PAR =%d\n",CTA_MODBUILD_PAR); + printf("cta_initialise :CTA_FILE_STDOUT =%d\n",CTA_FILE_STDOUT); + printf("cta_initialise :CTA_DEFAULT_SOBS =%d\n",CTA_DEFAULT_SOBS); + printf("cta_initialise :CTA_NETCDF_SOBS =%d\n",CTA_NETCDF_SOBS); + printf("cta_initialise :CTA_COMBINE_SOBS =%d\n",CTA_COMBINE_SOBS); + printf("cta_initialise :CTA_OBSDESCR_TABLE=%d\n",CTA_OBSDESCR_TABLE); + printf("cta_initialise :CTA_DEFAULT_VECTOR=%d\n",CTA_DEFAULT_VECTOR); + printf("cta_initialise :CTA_DEFAULT_MATRIX=%d\n",CTA_DEFAULT_MATRIX); + printf("cta_initialise :CTA_MODBUILD_SP =%d\n",CTA_MODBUILD_SP); + printf("cta_initialise :CTA_MODELCOMBINER =%d\n",CTA_MODELCOMBINER); + printf("cta_initialise :CTA_MODBUILD_B3B =%d\n",CTA_MODBUILD_B3B); + printf("cta_initialise :CTA_MODBUILD_BB =%d\n",CTA_MODBUILD_BB); + printf("cta_initialise :CTA_OP_ROOT_RMS =%d\n",CTA_OP_ROOT_RMS); + printf("cta_initialise :CTA_OP_ROOT_AMAX =%d\n",CTA_OP_ROOT_AMAX); + printf("cta_initialise :CTA_OP_ROOT_PRINTI=%d\n",CTA_OP_ROOT_PRINTI); + printf("cta_initialise :CTA_OP_ROOT_SSQ =%d\n",CTA_OP_ROOT_SSQ); + printf("cta_initialise :CTA_MAORI_SOBS =%d\n",CTA_MAORI_SOBS); + printf("cta_initialise :CTA_USER_SOBS =%d\n",CTA_USER_SOBS); + } + }; + return CTA_OK; +} + + +int CTA_Core_Finalise(){ + if (IDEBUG) printf("Calling CTA_Finalise\n"); + CTA_Modbuild_par_Finalize(); + return CTA_OK; +} + + +/* Interfacing with Fortran */ + +CTAEXPORT void CTA_INITIALISE_F77 (int *ierr){ + *ierr=CTA_Core_Initialise(); +} + +CTAEXPORT void CTA_FINALISE_F77 (int *ierr){ + *ierr=CTA_Core_Finalise(); +} diff --git a/costa/native/cta/src/cta_interface.c b/costa/native/cta/src/cta_interface.c new file mode 100644 index 000000000..a035d9da1 --- /dev/null +++ b/costa/native/cta/src/cta_interface.c @@ -0,0 +1,228 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_interface.c $ +$Revision: 2751 $, $Date: 2011-09-09 08:58:46 +0200 (Fri, 09 Sep 2011) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "cta_mem.h" +#include "cta_interface.h" +#include "cta_errors.h" +#include "cta_message.h" + +#define CLASSNAME "CTA_Interface" + +/* Struct holding all data associated to an COSTA interface */ +// Struct containing specific data associated to a COSTA interface +typedef struct { +int narg; +CTA_Datatype *argtyp; +} CTAI_Interface; + + +/** \brief Creates a new interface + * + * \param name I name of the new interface + * \param narg I number of arguments of interface + * \param argtyp I list with the datatypes of arguments + * \param hintf O COSTA interface handle + * \return error status: CTA_OK + */ +#undef METHOD +#define METHOD "Create" +int CTA_Intf_Create(const char *name, const CTA_Datatype *argtyp, + const int narg,CTA_Intf *hintf){ + + int retval; //Return value of a call + CTAI_Interface *data; //Interface specific data + int i; //loop counter + + // Allocate data and set properties + data=CTA_Malloc(sizeof(CTAI_Interface)); + + // Set properties + data->narg=narg; + data->argtyp=CTA_Malloc(narg*sizeof(CTA_Datatype)); + + // Copy values + for (i=0;iargtyp[i]=argtyp[i]; + }; + + // Allocate new handle and return eror when unsuccesfull + retval=CTA_Handle_Create(name,CTA_INTERFACE,data,hintf); + if (retval) { + CTA_WRITE_ERROR("Cannot create handle"); + return retval; + } + + return CTA_OK; +} + + +/** \brief Frees an interface + * + * \note Freeing CTA_NULL is allowed. + * + * \param hintf I/O handle of interface. The value is + * CTA_NULL on return + * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE + */ +#undef METHOD +#define METHOD "Free" +int CTA_Intf_Free(CTA_Intf *hintf){ + + int retval; //Return value of a call + CTAI_Interface *data; //Interface specific data + + // If handle is CTA_NULL -> nothing to be done + if (*hintf==CTA_NULL) return CTA_OK; + + // Check Handle and return error if handle is not valid + retval=CTA_Handle_Check(*hintf,CTA_INTERFACE); + if (retval) { + CTA_WRITE_ERROR("Handle is not a cta_interface handle"); + return retval; + } + + // Free data item + CTA_Handle_GetData(*hintf,(void*) &data); + if (data->argtyp) free(data->argtyp); + free(data); + + //Free Handle + CTA_Handle_Free(hintf); + + return CTA_OK; +} + +/** \brief Mactches two interfaces for compatibility argumentlist-argumentlist + * + * \note two interfaces are compatible if all arguments have the same datatype + * CTA_VOID is compatible with all other arguments except for CTA_FSTRING + + * \param argtyp1 I list with the datatypes of arguments of first interface + * \param narg1 I number of argumetns in first interface + * \param argtyp2 I list with the datatypes of arguments of second interface + * \param narg2 I number of argumetns in second interface + * \param flag O TRUE if interfaces are compatible FALSE ortherwise + * \return error status: CTA_OK + */ +int CTA_Intf_Match_aa(const CTA_Datatype *argtyp1, const int narg1, + const CTA_Datatype *argtyp2, const int narg2, + BOOL *flag){ + + // Check all arguments: + // note CTA_VOID can be combined with all other datatypes except for + // CTA_FSTRING + *flag=FALSE; + + if (narg1==narg2){ + int i; + BOOL allok=TRUE; + for (i=0;iargtyp, data1->narg, + argtyp2, narg2, flag); + return retval; +} + + +/** \brief Mactches two interfaces for compatibility handle-handle + * + * \note two interfaces are compatible if all arguments have the same datatype + * CTA_VOID is compatible with all other arguments except for CTA_FSTRING + + * \param hintf1 I Handle of first interface + * \param hintf2 I Handle of second + * \param flag O TRUE if interfaces are compatible FALSE ortherwise + * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE + */ +#undef METHOD +#define METHOD "Match_hh" +int CTA_Intf_Match_hh(const CTA_Intf hintf1, const CTA_Intf hintf2, BOOL *flag){ + + int retval; + CTAI_Interface *data1, *data2; + + // Check Handle and return error if handle is not valid + retval=CTA_Handle_Check(hintf1,CTA_INTERFACE); + if (retval) { + CTA_WRITE_ERROR("Handle is not a cta_interface handle"); + return retval; + } + retval=CTA_Handle_Check(hintf2,CTA_INTERFACE); + if (retval) { + CTA_WRITE_ERROR("Handle is not a cta_interface handle"); + return retval; + } + + // Get data for both interfaces + CTA_Handle_GetData(hintf1,(void*) &data1); + CTA_Handle_GetData(hintf2,(void*) &data2); + + retval=CTA_Intf_Match_aa(data1->argtyp, data1->narg, + data2->argtyp, data2->narg, flag); + return retval; +} + diff --git a/costa/native/cta/src/cta_matrix.c b/costa/native/cta/src/cta_matrix.c new file mode 100644 index 000000000..aa7e3521b --- /dev/null +++ b/costa/native/cta/src/cta_matrix.c @@ -0,0 +1,960 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_matrix.c $ +$Revision: 3407 $, $Date: 2012-08-17 13:50:50 +0200 (Fri, 17 Aug 2012) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +#include +#include "cta_mem.h" +#include "f_cta_utils.h" +#include "cta_f77lapack.h" +#include "cta_flush.h" +#include "cta_matrix.h" +#include "cta_errors.h" +#include "ctai.h" +#include "cta_message.h" + +#define CTA_MATRIX_DEFINECLASS_F77 F77_CALL(cta_matrix_defineclass,CTA_MATRIX_DEFINECLASS) +#define CTA_MATRIX_DUPLICATE_F77 F77_CALL(cta_matrix_duplicate,CTA_MATRIX_DUPLICATE) +#define CTA_MATRIX_CREATE_F77 F77_CALL(cta_matrix_create,CTA_MATRIX_CREATE) +#define CTA_MATRIX_GETSIZE_F77 F77_CALL(cta_matrix_getsize,CTA_MATRIX_GETSIZE) +#define CTA_MATRIX_GETDATATYPE_F77 F77_CALL(cta_matrix_getdatatype,CTA_MATRIX_GETDATATYPE) +#define CTA_MATRIX_GETVALS_F77 F77_CALL(cta_matrix_getvals,CTA_MATRIX_GETVALS) +#define CTA_MATRIX_GETVAL_F77 F77_CALL(cta_matrix_getval,CTA_MATRIX_GETVAL) +#define CTA_MATRIX_SETCONSTANT_F77 F77_CALL(cta_matrix_setconstant,CTA_MATRIX_SETCONSTANT) +#define CTA_MATRIX_SETCOL_F77 F77_CALL(cta_matrix_setcol,CTA_MATRIX_SETCOL) +#define CTA_MATRIX_SETVALS_F77 F77_CALL(cta_matrix_setvals,CTA_MATRIX_SETVALS) +#define CTA_MATRIX_SETVAL_F77 F77_CALL(cta_matrix_setval,CTA_MATRIX_SETVAL) +#define CTA_MATRIX_EXPORT_F77 F77_CALL(cta_matrix_export,CTA_MATRIX_EXPORT) +#define CTA_MATRIX_GER_F77 F77_CALL(cta_matrix_ger,CTA_MATRIX_GER) +#define CTA_MATRIX_GEMV_F77 F77_CALL(cta_matrix_gemv,CTA_MATRIX_GEMV) +#define CTA_MATRIX_GEMM_F77 F77_CALL(cta_matrix_gemm,CTA_MATRIX_GEMM) +#define CTA_MATRIX_INV_F77 F77_CALL(cta_matrix_inv,CTA_MATRIX_INV) +#define CTA_MATRIX_AXPY_F77 F77_CALL(cta_matrix_axpy,CTA_MATRIX_AXPY) +#define CTA_MATRIX_EIGVALS_F77 F77_CALL(cta_matrix_eigvals,CTA_MATRIX_EIGVALS) +#define CTA_MATRIX_FREE_F77 F77_CALL(cta_matrix_free,CTA_MATRIX_FREE) +#define CLASSNAME "CTA_Matrix" + +/* Struct holding all data associated to an COSTA Vector */ + +typedef struct { +CTA_Func functions[CTA_MATRIX_NUMFUNC]; +CTA_MatClass hmatcl; +int m; +int n; +CTA_Datatype datatype; +CTA_Handle userdata; +void *data; /*implementation specific data */ +} CTAI_Matrix; + +typedef struct { +CTA_Func functions[CTA_MATRIX_NUMFUNC]; +} CTAI_MatrixClass; + + +int CTA_Matrix_DefineClass( + const char *name, + const CTA_Func h_func[CTA_MATRIX_NUMFUNC], + CTA_MatClass *hmatcl + ){ + + CTAI_MatrixClass *data; + int retval; + + /* Allocate new Vector object */ + data=CTA_Malloc(sizeof(CTAI_MatrixClass)); + + data->functions[CTA_MATRIX_CREATE_SIZE]=h_func[CTA_MATRIX_CREATE_SIZE]; + data->functions[CTA_MATRIX_CREATE_INIT]=h_func[CTA_MATRIX_CREATE_INIT]; + data->functions[I_CTA_MATRIX_GETVALS ]=h_func[I_CTA_MATRIX_GETVALS ]; + data->functions[I_CTA_MATRIX_GETVAL ]=h_func[I_CTA_MATRIX_GETVAL ]; + data->functions[I_CTA_MATRIX_SETCOL ]=h_func[I_CTA_MATRIX_SETCOL ]; + data->functions[I_CTA_MATRIX_SETVALS ]=h_func[I_CTA_MATRIX_SETVALS ]; + data->functions[I_CTA_MATRIX_SETVAL ]=h_func[I_CTA_MATRIX_SETVAL ]; + data->functions[I_CTA_MATRIX_SETCONST ]=h_func[I_CTA_MATRIX_SETCONST ]; + data->functions[I_CTA_MATRIX_EXPORT ]=h_func[I_CTA_MATRIX_EXPORT ]; + data->functions[I_CTA_MATRIX_GER ]=h_func[I_CTA_MATRIX_GER ]; + data->functions[I_CTA_MATRIX_INV ]=h_func[I_CTA_MATRIX_INV ]; + data->functions[I_CTA_MATRIX_GEMV ]=h_func[I_CTA_MATRIX_GEMV ]; + data->functions[I_CTA_MATRIX_GEMM ]=h_func[I_CTA_MATRIX_GEMM ]; + data->functions[I_CTA_MATRIX_AXPY ]=h_func[I_CTA_MATRIX_AXPY ]; + data->functions[I_CTA_MATRIX_FREE ]=h_func[I_CTA_MATRIX_FREE ]; + + // Allocate new handle and return eror when unsuccesfull + retval=CTA_Handle_Create(name,CTA_MATRIXCLASS,data,hmatcl); + return retval; +} + +#undef METHOD +#define METHOD "Duplicate" +int CTA_Matrix_Duplicate(CTA_Matrix hmatrix1, CTA_Matrix *hmatrix2){ + + CTAI_Matrix *data; /* All data of vector hvec */ + int retval; + + /* Get class data containing all function pointers */ + retval=CTA_Handle_Check((CTA_Handle) hmatrix1,CTA_MATRIX); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_matrix handle"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) hmatrix1,(void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + retval=CTA_Matrix_Create(data->hmatcl, data->m, data->n, data->datatype, + data->userdata, hmatrix2); + + return retval; +} + +#undef METHOD +#define METHOD "Create" +int CTA_Matrix_Create(CTA_MatClass hmatcl, const int m, const int n, + CTA_Datatype datatype, CTA_Handle userdata, + CTA_Matrix *hmatrix){ + + CTAI_Matrix *matrix; + int memsize; + int retval; + CTAI_MatrixClass *clsdata; + CTA_Function *function; + int i; + + /* Get class data containing all function pointers */ + retval=CTA_Handle_Check((CTA_Handle) hmatcl,CTA_MATRIXCLASS); + if (retval!=CTA_OK){ + CTA_WRITE_ERROR("Handle is not a cta_matrixclass handle"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) hmatcl,(void*) &clsdata); + if (retval!=CTA_OK){ + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + /* determine size of data object (CTA_MATRIX_CREATE_SIZE)*/ + retval=CTA_Func_GetFunc(clsdata->functions[CTA_MATRIX_CREATE_SIZE], + &function); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function CTA_MATRIX_CREATE_SIZE"); + return retval; + } + + (void) function(&m, &n,&datatype,userdata,&retval,&memsize); + if (retval){ + CTA_WRITE_ERROR("Error in function"); + return retval; + } + + /* allocate memory for new matrix object */ + matrix=CTA_Malloc(sizeof(CTAI_Matrix)); + matrix->data=CTA_Malloc(memsize); + + /* copy function pointers */ + for (i=0;ifunctions[i]=clsdata->functions[i]; + } + /* set other general information */ + matrix->hmatcl=hmatcl; + matrix->m=m; + matrix->n=n; + matrix->datatype=datatype; + matrix->userdata=userdata; + + /* Initilise and fill new vector */ + retval=CTA_Func_GetFunc(clsdata->functions[CTA_MATRIX_CREATE_INIT], + &function); + if (retval!=CTA_OK){ + CTA_WRITE_ERROR("Cannot get function CTA_MATRIX_CREATE_INIT"); + return retval; + } + (void) function(matrix->data, &m, &n, &datatype, userdata, &retval); + if (retval) { + CTA_WRITE_ERROR("Error in function"); + return retval; + } + + /* Allocate new handle and return eror when unsuccesfull */ + retval=CTA_Handle_Create("vector",CTA_MATRIX,matrix,hmatrix); + if (retval) { + CTA_WRITE_ERROR("Cannot create handle for CTA_MATRIX"); + return retval; + } + + return CTA_OK; +} +#undef METHOD +#define METHOD "GetSize" +int CTA_Matrix_GetSize( + CTA_Matrix hmat, /* Handle of the matrix */ + int *m, /* number of rows */ + int *n /* number of cols */ + ){ + + CTAI_Matrix *data; /* All data of matrix hmat */ + int retval; /* Return value of COSTA call */ + + /* Get pointer to implementation of this function */ + retval=CTA_Handle_Check((CTA_Handle) hmat,CTA_MATRIX); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_matrix handle"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) hmat,(void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retreive handle data"); + return retval; + } + + *m=data->m; + *n=data->n; + return CTA_OK; +}; +#undef METHOD +#define METHOD "GetDatatype" +int CTA_Matrix_GetDatatype( + CTA_Matrix hmat, /* Handle of the matrix */ + CTA_Datatype *datatype /* Returned data type */ + ){ + + CTAI_Matrix *data; /* All data of matrix hmat */ + int retval; /* Return value of COSTA call */ + + /* Get pointer to implementation of this function */ + retval=CTA_Handle_Check((CTA_Handle) hmat,CTA_MATRIX); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_matrix handle"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) hmat,(void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retreive handle data"); + return retval; + } + + *datatype=data->datatype; + return CTA_OK; +}; + +#undef METHOD +#define METHOD "GetVals" +int CTA_Matrix_GetVals( + CTA_Matrix hmat, /* Handle of the matrix */ + void *vals, /* Retured value */ + int m, /* number of rows (fortran) matrix vals */ + int n, /* number of columns (fortran) matrix vals */ + CTA_Datatype datatype /* Data type */ + ){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Matrix *data; /* All data of matrix hmat */ + CTA_Function *function; /* Function that must be called */ + + /* Get pointer to implementation of this function */ + retval=CTA_Handle_Check((CTA_Handle) hmat,CTA_MATRIX); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_matrix handle"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) hmat,(void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retreive handle data"); + return retval; + } + + CTA_Func_GetFunc(data->functions[I_CTA_MATRIX_GETVALS],&function); + + /* Call (user) implementation */ + function(data->data,vals,&m,&n,&datatype,&retval); + return retval; +}; + + + +#undef METHOD +#define METHOD "GetVal" +int CTA_Matrix_GetVal( + CTA_Matrix hmat, /* Handle of the matrix */ + void *val, /* Returned value */ + int m, /* number of rows (fortran) matrix vals */ + int n, /* number of columns (fortran) matrix vals */ + CTA_Datatype datatype /* Data type */ + ){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Matrix *data; /* All data of matrix hmat */ + CTA_Function *function; /* Function that must be called */ + + /* Get pointer to implementation of this function */ + retval=CTA_Handle_Check((CTA_Handle) hmat,CTA_MATRIX); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_matrix handle"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) hmat,(void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retreive handle data"); + return retval; + } + CTA_Func_GetFunc(data->functions[I_CTA_MATRIX_GETVAL],&function); + + /* Call (user) implementation */ + function(data->data,val,&m,&n,&datatype,&retval); + return retval; +}; + + +#undef METHOD +#define METHOD "SetConstant" +int CTA_Matrix_SetConstant( + CTA_Matrix hmat, /* Handle of the matrix */ + void *val, /* value that must be set */ + CTA_Datatype datatype /* Data type */ + ){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Matrix *data; /* All data of matrix hmat */ + CTA_Function *function; /* Function that must be called */ + + /* Get pointer to implementation of this function */ + retval=CTA_Handle_Check((CTA_Handle) hmat,CTA_MATRIX); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_matrix handle"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) hmat, (void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retreive handle data"); + return retval; + } + + CTA_Func_GetFunc(data->functions[I_CTA_MATRIX_SETCONST],&function); + + /* Call (user) implementation */ + function(data->data,val,&datatype,&retval); + return retval; +}; + +#undef METHOD +#define METHOD "Export" +int CTA_Matrix_Export( + CTA_Matrix hmat, /* Handle of the matrix */ + CTA_Handle userdata /* user-data */ + ){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Matrix *data; /* All data of matrix hmat */ + CTA_Function *function; /* Function that must be called */ + + /* Get pointer to implementation of this function */ + retval=CTA_Handle_Check((CTA_Handle) hmat,CTA_MATRIX); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_matrix handle"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) hmat, (void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retreive handle data"); + return retval; + } + + CTA_Func_GetFunc(data->functions[I_CTA_MATRIX_EXPORT],&function); + + /* Call (user) implementation */ + function(data->data,userdata,&retval); + return retval; +}; + + +#undef METHOD +#define METHOD "SetCol" +int CTA_Matrix_SetCol( + CTA_Matrix hmat, /* Handle of the matrix */ + int n, /* column number */ + CTA_Vector hvec /* new values for column */ + ) +{ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Matrix *data; /* All data of matrix hmat */ + CTA_Function *function; /* Function that must be called */ + + /* Get pointer to implementation of this function */ + retval=CTA_Handle_Check((CTA_Handle) hmat,CTA_MATRIX); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_matrix handle"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) hmat, (void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retreive handle data"); + return retval; + } + + CTA_Func_GetFunc(data->functions[I_CTA_MATRIX_SETCOL],&function); + + /* Call (user) implementation */ + function(data->data,&n,&hvec,&retval); + return retval; +}; + +#undef METHOD +#define METHOD "SetVals" +int CTA_Matrix_SetVals( + CTA_Matrix hmat, /* Handle of the matrix */ + void *vals, /* value that must be set */ + int m, /* number of rows */ + int n, /* number of cols */ + CTA_Datatype datatype /* Data type */ + ){ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Matrix *data; /* All data of matrix hmat */ + CTA_Function *function; /* Function that must be called */ + + /* Get pointer to implementation of this function */ + retval=CTA_Handle_Check((CTA_Handle) hmat,CTA_MATRIX); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_matrix handle"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) hmat, (void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retreive handle data"); + return retval; + } + + CTA_Func_GetFunc(data->functions[I_CTA_MATRIX_SETVALS],&function); + + /* Call (user) implementation */ + function(data->data,vals,&m, &n,&datatype,&retval); + return retval; +}; + + +#undef METHOD +#define METHOD "SetVal" +int CTA_Matrix_SetVal( + CTA_Matrix hmat, /* Handle of the matrix */ + void *vals, /* value that must be set */ + int m, /* row index */ + int n, /* col index */ + CTA_Datatype datatype /* Data type */ + ){ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Matrix *data; /* All data of matrix hmat */ + CTA_Function *function; /* Function that must be called */ + + /* Get pointer to implementation of this function */ + retval=CTA_Handle_Check((CTA_Handle) hmat,CTA_MATRIX); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_matrix handle"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) hmat, (void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retreive handle data"); + return retval; + } + + CTA_Func_GetFunc(data->functions[I_CTA_MATRIX_SETVAL],&function); + + /* Call (user) implementation */ + function(data->data,vals,&m, &n,&datatype,&retval); + return retval; +}; + + +#undef METHOD +#define METHOD "Ger" +int CTA_Matrix_Ger(CTA_Matrix hmat, double alpha, CTA_Vector vx, + CTA_Vector vy){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Matrix *data; /* All data of matrix hmat */ + CTA_Function *function; /* Function that must be called */ + + /* Get pointer to implementation of this function */ + retval=CTA_Handle_Check((CTA_Handle) hmat,CTA_MATRIX); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_matrix handle"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) hmat, (void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retreive handle data"); + return retval; + } + + CTA_Func_GetFunc(data->functions[I_CTA_MATRIX_GER],&function); + + /* Call (user) implementation */ + function(data->data,&alpha,&vx,&vy,&retval); + return retval; +} + +#undef METHOD +#define METHOD "Inv" +int CTA_Matrix_Inv(CTA_Matrix hmat){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Matrix *data; /* All data of matrix hmat */ + CTA_Function *function; /* Function that must be called */ + + /* Get pointer to implementation of this function */ + retval=CTA_Handle_Check((CTA_Handle) hmat,CTA_MATRIX); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_matrix handle"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) hmat, (void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retreive handle data"); + return retval; + } + + CTA_Func_GetFunc(data->functions[I_CTA_MATRIX_INV],&function); + + /* Call (user) implementation */ + function(data->data,&retval); + return retval; +} + +#undef METHOD +#define METHOD "Gemv" +int CTA_Matrix_Gemv(CTA_Matrix hmat, int trans, double alpha, CTA_Vector vx, + double beta, CTA_Vector vy){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Matrix *data; /* All data of matrix hmat */ + CTA_Function *function; /* Function that must be called */ + + /* Get pointer to implementation of this function */ + retval=CTA_Handle_Check((CTA_Handle) hmat,CTA_MATRIX); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_matrix handle"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) hmat, (void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retreive handle data"); + return retval; + } + + CTA_Func_GetFunc(data->functions[I_CTA_MATRIX_GEMV],&function); + + /* Call (user) implementation */ + function(data->data, &trans, &alpha,&vx,&beta,&vy,&retval); + return retval; +} + + +#undef METHOD +#define METHOD "Gemm" +int CTA_Matrix_Gemm(CTA_Matrix mC, int transa, int transb, double alpha, + CTA_Matrix mA, CTA_Matrix mB, double beta){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Matrix *dataA; /* All data of matrix A */ + CTAI_Matrix *dataB; /* All data of matrix B */ + CTAI_Matrix *dataC; /* All data of matrix C */ + CTA_Function *function; /* Function that must be called */ + + /* Get pointer to implementation of this function */ + retval=CTA_Handle_Check((CTA_Handle) mA,CTA_MATRIX); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_matrix handle"); + return retval; + } + + retval=CTA_Handle_Check((CTA_Handle) mB,CTA_MATRIX); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_matrix handle"); + return retval; + } + + retval=CTA_Handle_Check((CTA_Handle) mC,CTA_MATRIX); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_matrix handle"); + return retval; + } + + + retval=CTA_Handle_GetData((CTA_Handle) mA, (void*) &dataA); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retreive handle data"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) mB, (void*) &dataB); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retreive handle data"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) mC, (void*) &dataC); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retreive handle data"); + return retval; + } + + if (dataA->functions[I_CTA_MATRIX_GEMV]!= + dataB->functions[I_CTA_MATRIX_GEMV] || + dataA->functions[I_CTA_MATRIX_GEMV]!= + dataC->functions[I_CTA_MATRIX_GEMV]){ + return CTA_INCOMPATIBLE_MATRICES; + } + + CTA_Func_GetFunc(dataC->functions[I_CTA_MATRIX_GEMM],&function); + + /* Call (user) implementation */ + function(dataC->data, &transa, &transb, &alpha, dataA->data, + dataB->data, &beta, &retval); + return retval; +} + + +#undef METHOD +#define METHOD "Free" +int CTA_Matrix_Free( + CTA_Matrix *hmat /* Handle of matrix */ + ){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Matrix *data; /* All data of matrix hmat */ + CTA_Function *function; /* Function that must be called */ + + /* Get pointer to implementation of this function */ + retval=CTA_Handle_Check((CTA_Handle) *hmat,CTA_MATRIX); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_matrix handle"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) *hmat, (void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retreive handle data"); + return retval; + } + + CTA_Func_GetFunc(data->functions[I_CTA_MATRIX_FREE],&function); + + /* Call (user) implementation */ + function(data->data,&retval); + free(data->data); + free(data); + retval=CTA_Handle_Free(hmat); + + return retval; +}; + +#undef METHOD +#define METHOD "Axpy" +int CTA_Matrix_Axpy(CTA_Matrix mY, double alpha, CTA_Matrix mX){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Matrix *dataX; /* All data of matrix A */ + CTAI_Matrix *dataY; /* All data of matrix B */ + CTA_Function *function; /* Function that must be called */ + + /* Get pointer to implementation of this function */ + retval=CTA_Handle_Check((CTA_Handle) mY,CTA_MATRIX); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_matrix handle"); + return retval; + } + + retval=CTA_Handle_Check((CTA_Handle) mX,CTA_MATRIX); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_matrix handle"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) mY, (void*) &dataY); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retreive handle data"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) mX, (void*) &dataX); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retreive handle data"); + return retval; + } + + if (dataY->functions[I_CTA_MATRIX_AXPY]!= + dataX->functions[I_CTA_MATRIX_AXPY]){ + return CTA_INCOMPATIBLE_MATRICES; + } + + CTA_Func_GetFunc(dataY->functions[I_CTA_MATRIX_AXPY],&function); + + /* Call (user) implementation */ + function(dataY->data, &alpha, dataX->data, &retval); + return retval; +} + + + +int CTA_Matrix_EigVals(CTA_Matrix hmat, CTA_Vector eigvals, + CTA_Matrix Reigvecs){ + +char leigv, reigv; + +int nrows, ncols, neig, n; +void *A, *VL, *VR, *eigR, *eigI, *work; +CTA_Datatype datatype; +int size, lwork, info; +CTA_Matrix Leigvecs=CTA_NULL; +double dlength; +float slength; + + /* get dimensions and check them */ + CTA_Matrix_GetSize(hmat, &nrows, &ncols); + if (nrows!=ncols) { + return CTA_MATRIX_IS_NOT_SQUARE; + } + n=ncols; + CTA_Matrix_GetDatatype(hmat,&datatype); + CTA_SizeOf(datatype,&size); + + /* Check vector of eigen values */ + CTA_Vector_GetSize(eigvals,&neig); + if (neig!=n){ + return CTA_DIMENSION_ERROR; + } + + /* Check matrix of left eigenvectors */ + leigv='N'; + if (Leigvecs!=CTA_NULL){ + leigv='V'; + CTA_Matrix_GetSize(Leigvecs, &nrows, &ncols); + if (ncols!=n || nrows!=n) { + return CTA_DIMENSION_ERROR; + } + } + + /* Check matrix of right eigenvectors */ + reigv='N'; + if (Reigvecs!=CTA_NULL){ + reigv='V'; + CTA_Matrix_GetSize(Reigvecs, &nrows, &ncols); + if (ncols!=n || nrows!=n) { + return CTA_DIMENSION_ERROR; + } + } + + A=CTA_Malloc(size*n*n); + VL=CTA_Malloc(size*n*n); + VR=CTA_Malloc(size*n*n); + CTA_Matrix_GetVals(hmat, A, n, n, datatype); + + eigR=CTA_Malloc(size*n); + eigI=CTA_Malloc(size*n); + + switch (datatype){ + case CTA_REAL : + /* Determine amount of work memory */ + lwork=-1; + SGEEV_F77( &leigv, &reigv, &n, A, &n, eigR, eigI, VL, &n, VR, &n, &slength, + &lwork, &info); + + /* Allocate work memory and start compute */ + lwork=(int) slength; + work=CTA_Malloc(size*lwork); + + SGEEV_F77( &leigv, &reigv, &n, A, &n, eigR, eigI, VL, &n, VR, &n, work, + &lwork, &info); + break; + case CTA_DOUBLE : + lwork=-1; + DGEEV_F77( &leigv, &reigv, &n, A, &n, eigR, eigI, VL, &n, VR, &n, + &dlength, &lwork, &info); + + /* Allocate work memory and start compute */ + lwork=(int) dlength; + work=CTA_Malloc(size*lwork); + + DGEEV_F77( &leigv, &reigv, &n, A, &n, eigR, eigI, VL, &n, VR, &n, work, + &lwork, &info); + + break; + } + + /* Set left eigenvalues */ + if (Leigvecs!=CTA_NULL){ + CTA_Matrix_SetVals(Leigvecs, VL, n, n, datatype); + } + + /* Set right eigenvalues */ + if (Reigvecs!=CTA_NULL){ + CTA_Matrix_SetVals(Reigvecs, VR, n, n, datatype); + } + + /* Set real-part eigenvalues */ + CTA_Vector_SetVals(eigvals, eigR, n, datatype); + + free(A); + free(VL); + free(VR); + free(eigR); + free(eigI); + free(work); + return CTA_OK; +} + +/* Interfacing with Fortran */ + +CTAEXPORT void CTA_MATRIX_DEFINECLASS_F77(char *name, int *h_func,int *hmatcl, int *ierr, + int len_name){ + + char *c_name; + // create a c-string equivalent to name + c_name=CTA_Malloc((len_name+1)*sizeof(char)); + CTA_fstr2cstr(name,c_name,len_name); + *ierr=CTA_Matrix_DefineClass(name, (CTA_Func*) h_func, + (CTA_MatClass*) hmatcl); + free(c_name); +} + +CTAEXPORT void CTA_MATRIX_DUPLICATE_F77(int *hmatrix1, int *hmatrix2, int *ierr){ + *ierr=CTA_Matrix_Duplicate((CTA_Matrix) *hmatrix1, (CTA_Matrix*) hmatrix2); +} + +CTAEXPORT void CTA_MATRIX_CREATE_F77(int *hmatcl, int *m, int *n, int *datatype, + int *userdata, int *hmatrix, int *ierr){ + *ierr=CTA_Matrix_Create((CTA_MatClass) *hmatcl, *m, *n, + (CTA_Datatype) *datatype, (CTA_Handle) *userdata, (CTA_Matrix*) hmatrix); +} + +CTAEXPORT void CTA_MATRIX_GETSIZE_F77(int *hmat, int *m, int *n, int *ierr){ + *ierr=CTA_Matrix_GetSize((CTA_Matrix) *hmat, m, n); +} + +CTAEXPORT void CTA_MATRIX_GETDATATYPE_F77(int *hmat, int *datatype ,int *ierr){ + *ierr=CTA_Matrix_GetDatatype((CTA_Matrix) *hmat, (CTA_Datatype*) datatype); +} + +CTAEXPORT void CTA_MATRIX_GETVALS_F77(int *hmat, void *vals, int *m, int *n, + int *datatype, int *ierr){ + *ierr=CTA_Matrix_GetVals((CTA_Matrix) *hmat, vals, *m, *n, + (CTA_Datatype) *datatype); +} + +CTAEXPORT void CTA_MATRIX_GETVAL_F77(int *hmat, void *vals, int *m, int *n, + int *datatype, int *ierr){ + *ierr=CTA_Matrix_GetVal((CTA_Matrix) *hmat, vals, *m, *n, + (CTA_Datatype) *datatype); +} + + +CTAEXPORT void CTA_MATRIX_SETCONSTANT_F77(int *hmat, void *val, int *datatype, int *ierr){ + *ierr=CTA_Matrix_SetConstant((CTA_Matrix) *hmat, val, + (CTA_Datatype) *datatype); +} + + +CTAEXPORT void CTA_MATRIX_SETCOL_F77(int *hmat, int *n, int *hvec, int *ierr){ + *ierr=CTA_Matrix_SetCol((CTA_Matrix) *hmat, *n, (CTA_Vector) *hvec); +} + +CTAEXPORT void CTA_MATRIX_SETVALS_F77(int *hmat, void *vals, int *m, int *n, + int *datatype, int *ierr){ + *ierr=CTA_Matrix_SetVals((CTA_Matrix) *hmat, vals, *m, *n, + (CTA_Datatype) *datatype); +} + +CTAEXPORT void CTA_MATRIX_SETVAL_F77(int *hmat, void *val, int *m, int *n, + int *datatype, int *ierr){ + *ierr=CTA_Matrix_SetVal((CTA_Matrix) *hmat, val, *m, *n, + (CTA_Datatype) *datatype); +} + +CTAEXPORT void CTA_MATRIX_EXPORT_F77(int *hmat, int *usedoc, int *ierr){ + *ierr=CTA_Matrix_Export((CTA_Matrix) *hmat, (CTA_Handle) *usedoc); + +} + +CTAEXPORT void CTA_MATRIX_GER_F77(int *hmat, double *alpha, int *vx, int *vy, int *ierr){ + *ierr=CTA_Matrix_Ger((CTA_Matrix) *hmat, (double) *alpha, (CTA_Vector) *vx, + (CTA_Vector) *vy); +} + +CTAEXPORT void CTA_MATRIX_INV_F77(int *hmat, int *ierr){ + *ierr=CTA_Matrix_Inv((CTA_Matrix) *hmat); +} + +CTAEXPORT void CTA_MATRIX_GEMV_F77(int *hmat, int *trans, double *alpha, int *vx, double *beta, int *vy, int *ierr){ + *ierr=CTA_Matrix_Gemv((CTA_Matrix) *hmat, *trans, (double) *alpha, (CTA_Vector) *vx, + (double) *beta, (CTA_Vector) *vy); +} + +CTAEXPORT void CTA_MATRIX_GEMM_F77(int *mC, int *transa, int *transb, double *alpha, + int *mA, int *mB, double *beta, int *ierr){ + *ierr=CTA_Matrix_Gemm((CTA_Matrix) *mC, *transa, *transb, (double) *alpha, + (CTA_Matrix) *mA, (CTA_Matrix) *mB, (double) *beta); +} + + + +CTAEXPORT void CTA_MATRIX_AXPY_F77(int *mY, double *alpha, int *mX, int *ierr){ + *ierr=CTA_Matrix_Axpy((CTA_Matrix) *mY, *alpha, (CTA_Matrix) *mX); +} + + +CTAEXPORT void CTA_MATRIX_EIGVALS_F77(int *hmat, int *eigvals, int +*eigvecs, int *ierr){ + *ierr=CTA_Matrix_EigVals((CTA_Matrix) *hmat, (CTA_Vector) *eigvals, (CTA_Matrix) *eigvecs ); +} + + + + +CTAEXPORT void CTA_MATRIX_FREE_F77(int *hmat, int *ierr){ + *ierr=CTA_Matrix_Free((CTA_Matrix*) hmat); +} + + diff --git a/costa/native/cta/src/cta_matrix_blas.c b/costa/native/cta/src/cta_matrix_blas.c new file mode 100644 index 000000000..d06b74ad6 --- /dev/null +++ b/costa/native/cta/src/cta_matrix_blas.c @@ -0,0 +1,805 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_matrix_blas.c $ +$Revision: 3361 $, $Date: 2012-07-04 16:52:30 +0200 (Wed, 04 Jul 2012) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +#include +#include "cta_mem.h" +#include "cta_system.h" +#include "cta_flush.h" +#include "cta_matrix_blas.h" +#include "cta_errors.h" +#include "cta_file.h" +#include "ctai.h" +#include "cta_message.h" + +#define CLASSNAME "CTA_Matrix_blas" + + +void CTA_Matrix_blas_initialise(CTA_MatClass *hmatcl) +{ + CTA_Intf hintf=0; + CTA_Func h_func[CTA_MATRIX_NUMFUNC]; + + CTA_Func_Create(" ",&CTAI_Matrix_Create_Size, hintf,&h_func[CTA_MATRIX_CREATE_SIZE]); + CTA_Func_Create(" ",&CTAI_Matrix_Create_Init, hintf,&h_func[CTA_MATRIX_CREATE_INIT]); + CTA_Func_Create(" ",&CTAI_Matrix_getvals, hintf,&h_func[I_CTA_MATRIX_GETVALS] ); + CTA_Func_Create(" ",&CTAI_Matrix_getval, hintf,&h_func[I_CTA_MATRIX_GETVAL] ); + CTA_Func_Create(" ",&CTAI_Matrix_setcol, hintf,&h_func[I_CTA_MATRIX_SETCOL] ); + CTA_Func_Create(" ",&CTAI_Matrix_setvals, hintf,&h_func[I_CTA_MATRIX_SETVALS] ); + CTA_Func_Create(" ",&CTAI_Matrix_setval, hintf,&h_func[I_CTA_MATRIX_SETVAL] ); + CTA_Func_Create(" ",&CTAI_Matrix_setconst, hintf,&h_func[I_CTA_MATRIX_SETCONST] ); + CTA_Func_Create(" ",&CTAI_Matrix_Export, hintf,&h_func[I_CTA_MATRIX_EXPORT] ); + CTA_Func_Create(" ",&CTAI_Matrix_Ger, hintf,&h_func[I_CTA_MATRIX_GER] ); + CTA_Func_Create(" ",&CTAI_Matrix_Inv, hintf,&h_func[I_CTA_MATRIX_INV] ); + CTA_Func_Create(" ",&CTAI_Matrix_Gemv, hintf,&h_func[I_CTA_MATRIX_GEMV] ); + CTA_Func_Create(" ",&CTAI_Matrix_Gemm, hintf,&h_func[I_CTA_MATRIX_GEMM] ); + CTA_Func_Create(" ",&CTAI_Matrix_Axpy, hintf,&h_func[I_CTA_MATRIX_AXPY] ); + CTA_Func_Create(" ",&CTAI_Matrix_free, hintf,&h_func[I_CTA_MATRIX_FREE] ); + + CTA_Matrix_DefineClass("cta_matrix_blas",h_func,hmatcl); +} + +void CTAI_Matrix_Create_Size( + int *m, + int *n, + CTA_Datatype *datatype, + CTA_Handle userdata, + int *retval, + int *memsize + ){ + + *memsize=(int) sizeof(CTAI_Matrix_blas); + *retval=CTA_OK; +}; + +void CTAI_Matrix_Create_Init( + CTAI_Matrix_blas *x, + int *m, + int *n, + CTA_Datatype *datatype, + CTA_Handle userdata, + int *retval + ){ + if (*datatype==CTA_REAL){ + x->values=CTA_Malloc(sizeof(float)*(*m)*(*n)); + } + else if (*datatype==CTA_DOUBLE){ + x->values=CTA_Malloc(sizeof(double)*(*m)*(*n)); + } + else if (*datatype==CTA_INTEGER){ + x->values=CTA_Malloc(sizeof(int)*(*m)*(*n)); + } + else { + *retval=CTA_ILLEGAL_DATATYPE; + } + x->datatype=*datatype; + x->n=*n; + x->m=*m; + x->inverse=FALSE; + x->perm=NULL; +}; + + +void CTAI_Matrix_construct_inverse(CTAI_Matrix_blas *A) +{ + void *lu; + double *dA; + int i, length, info; + int one=1; + char notrans='N'; + + length=A->n*A->m; + if (A->datatype==CTA_REAL){ + float *sA; + lu=CTA_Malloc(sizeof(float)*length); + SCOPY_F77(&length,A->values,&one,(float *) lu,&one); + // Create identity matrix + sA=A->values; + for (i=0;in+1){ sA[i]=1.0;} + // Compute inverse + SGETRS_F77(¬rans,&(A->n), &A->n, lu, &(A->n), A->perm, A->values, + &(A->n), &info,1); + free(lu); + + } + else if (A->datatype==CTA_DOUBLE){ + lu=CTA_Malloc(sizeof(double)*length); + DCOPY_F77(&length,A->values,&one,(double *) lu,&one); + // Create identity matrix + dA=A->values; + for (i=0;in+1){ dA[i]=1.0;} + // Compute inverse + DGETRS_F77(¬rans,&(A->n), &A->n, lu, &(A->n), A->perm, A->values, + &(A->n), &info,1); + free(lu); + } + else { + printf("INTERNAL ERROR IN CTAI_Matrix_construct_inverse\n"); + exit(-1); + } + A->inverse=FALSE; + +} + + +void CTAI_Matrix_getvals( + CTAI_Matrix_blas *A, + void *vals, + int *m, + int *n, + CTA_Datatype *datatype, + int *retval + ){ + /* Local variables */ + int one=1; + int length; + + /* check dimensions */ + if (*m!=A->m){ + *retval=CTA_DIMENSION_ERROR; + return; + } + if (*n!=A->n){ + *retval=CTA_DIMENSION_ERROR; + return; + } + + /* check data types */ + if (*datatype!=A->datatype){ + *retval=CTA_INCOMPATIBLE_MATRICES; + return; + } + + if (A->inverse) { + /* OK we are now going to really going to invert the matrix */ + CTAI_Matrix_construct_inverse(A); + } + + /* copy values using BLAS copy */ + length=(*n)*(*m); + if (*datatype==CTA_REAL || *datatype == CTA_INTEGER){ + SCOPY_F77(&length,A->values,&one,vals,&one); + } + else { + DCOPY_F77(&length,A->values,&one,vals,&one); + } + *retval=CTA_OK; +}; + +void CTAI_Matrix_getval( + CTAI_Matrix_blas *x, + void *val, + int *m, + int *n, + CTA_Datatype *datatype, + int *retval + ){ + /* Local variables */ + + if (x->inverse) { + /* OK we are now going to really going to invert the matrix */ + CTAI_Matrix_construct_inverse(x); + } + /* check dimensions */ + if (*m>x->m || *m<1){ + *retval=CTA_DIMENSION_ERROR; + return; + } + if (*n>x->n || *n<1){ + *retval=CTA_DIMENSION_ERROR; + return; + } + + /* check data types */ + if (*datatype!=x->datatype){ + *retval=CTA_INCOMPATIBLE_MATRICES; + return; + } + + if (*datatype==CTA_REAL) + { + float * values = (float *) x->values; + float * value = (float *) val; + *value = values[ (*m-1) + (x->m) * (*n-1) ]; + } + else if (*datatype==CTA_INTEGER) + { + int * values = (int *) x->values; + int * value = (int *) val; + *value = values[ (*m-1) + (*n-1) * (x->m) ]; + } + else if (*datatype==CTA_DOUBLE) + { + double * values = (double *) x->values; + double * value = (double *) val; + *value = values[ (*m-1) + (x->m) * (*n-1) ]; + } + else + { + *retval = CTA_ILLEGAL_DATATYPE; + return; + } + *retval=CTA_OK; +}; + + + +void CTAI_Matrix_setcol( + CTAI_Matrix_blas *x, + int *n, + CTA_Vector *hvec, + int *retval + ){ + /* Local variables */ + CTA_Datatype datatype; + int length; + void * values; + + if (x->inverse){ + /* OK we are now going to really going to invert the matrix */ + CTAI_Matrix_construct_inverse(x); + } + /* Check datatype: must be same as matrix' */ + *retval = CTA_Vector_GetDatatype( *hvec, &datatype); + if (*retval != CTA_OK) return; + + if (datatype!=x->datatype) + { + *retval = CTA_ILLEGAL_DATATYPE; + return; + } + + /* Check the dimensions */ + *retval = CTA_Vector_GetSize(*hvec, &length); + if (*retval != CTA_OK) return; + if (length != x->m) + { + *retval = CTA_DIMENSION_ERROR; return; + } + + /* Find the location where the values will be stored */ + if (datatype==CTA_REAL) + { + float * xvalues = (float*) x->values; + values = &xvalues[ (x->m) * (*n-1)]; + } + else if (datatype==CTA_INTEGER) + { + int * xvalues = (int *) x->values; + values = &xvalues[ (x->m) * (*n-1)]; + } + else if (datatype == CTA_DOUBLE) + { + double * xvalues = (double*) x->values; + values = &xvalues[ (x->m) * (*n-1)]; + } + else + { + *retval = CTA_ILLEGAL_DATATYPE; + return; + } + + *retval = CTA_Vector_GetVals( *hvec, values, length, datatype); + if (*retval != CTA_OK) return; + + *retval=CTA_OK; +}; + + + +void CTAI_Matrix_setvals( + CTAI_Matrix_blas *x, + void *vals, + int *m, + int *n, + CTA_Datatype *datatype, + int *retval + ){ + /* Local variables */ + int one=1; + int length; + + if (x->inverse){ + /* OK we are now going to really going to invert the matrix */ + CTAI_Matrix_construct_inverse(x); + } + /* check dimensions */ + if (*m!=x->m){ + *retval=CTA_DIMENSION_ERROR; + return; + } if (*n!=x->n){ + *retval=CTA_DIMENSION_ERROR; + return; + } + + /* check data types */ + if (*datatype!=x->datatype){ + *retval=CTA_INCOMPATIBLE_MATRICES; + return; + } + + length=(*n)*(*m); + /* copy values using BLAS copy */ + if (*datatype==CTA_REAL || *datatype==CTA_INTEGER){ + SCOPY_F77(&length,vals,&one,x->values,&one); + } + else { + DCOPY_F77(&length,vals,&one,x->values,&one); + } + *retval=CTA_OK; +}; + + + +void CTAI_Matrix_setval( + CTAI_Matrix_blas *x, + void *val, + int *m, + int *n, + CTA_Datatype *datatype, + int *retval + ){ + + if (x->inverse){ + /* OK we are now going to really going to invert the matrix */ + CTAI_Matrix_construct_inverse(x); + } + /* check dimensions */ + if ( *m > x->m || *m<1){ + *retval=CTA_DIMENSION_ERROR; + return; + } if (*n > x->n || *n<1){ + *retval=CTA_DIMENSION_ERROR; + return; + } + + /* check data types */ + if (*datatype!=x->datatype){ + *retval=CTA_INCOMPATIBLE_MATRICES; + return; + } + + /* copy values using BLAS copy */ + if (*datatype==CTA_REAL) + { + float * values= (float*) x->values; + float * value = (float*) val; + values[ (*m-1) + (*n-1) * (x->m) ] = *value; + } + else if (*datatype==CTA_DOUBLE) + { + double * values= (double*) x->values; + double * value = (double*) val; + values[ (*m-1) + (*n-1)* (x->m) ] = *value; + } + else if (*datatype==CTA_INTEGER) + { + int * values= (int*) x->values; + int * value = (int*) val; + values[ (*m-1) + (*n-1)* (x->m) ] = *value; + } + *retval=CTA_OK; +}; + +void CTAI_Matrix_setconst( + CTAI_Matrix_blas *x, + void *val, + CTA_Datatype *datatype, + int *retval + ){ + /* Local variables */ + int length; + int i; + + if (x->inverse) { + /* OK we are now going to really going to invert the matrix */ + CTAI_Matrix_construct_inverse(x); + } + /* check data types */ + if (*datatype!=x->datatype){ + *retval=CTA_INCOMPATIBLE_MATRICES; + return; + } + + /* set values */ + length=x->m*x->n; + if (*datatype==CTA_REAL) + { + float *value = (float *) val; + float *values= (float *) x->values; + for (i=0;ivalues; + for (i=0;ivalues; + for (i=0;iinverse){ + /* OK we are now going to really going to invert the matrix */ + CTAI_Matrix_construct_inverse(x); + } + + if (CTA_Handle_Check(userdata,CTA_FILE)==CTA_OK) { + int i0; + *retval = CTA_File_Get(userdata,&file); + + if (CTA_FLUSH_ALWAYS) CTA_Flush(); + + for (i0=0; i0n; i0+=5) + { + int j; + fprintf(file,"matrix(:,%d:%d)=[",i0+1,MIN(x->n,i0+5)); + for (j=0; jm; j++) + { + int i; + fprintf(file,"\n "); + for (i=i0; in,i0+5);i++) + { + if (x->datatype == CTA_DOUBLE) + { + double *values = (double*) x->values; + //fprintf(file," %10.3lg",values[j+x->m*i]); + fprintf(file," %18.8le",values[j+x->m*i]); + } + else if (x->datatype == CTA_REAL) + { + float *values = (float *) x->values; + fprintf(file," %10.3g",values[j+x->m*i]); + } + else if (x->datatype == CTA_INTEGER) + { + int *values = (int *) x->values; + fprintf(file," %10d",values[j+x->m*i]); + } + } + } + fprintf(file,"];\n\n"); + if (CTA_FLUSH_ALWAYS) CTA_Flush(); + } + } else { + *retval=CTA_FORMAT_NOT_SUPPORTED; + return; + } + + *retval = CTA_OK; +} + +void CTAI_Matrix_Ger( + CTAI_Matrix_blas *A, + double *alpha, + CTA_Vector *vx, + CTA_Vector *vy, + int *retval) +{ + void *x, *y; + int nx, ny; + int sizeoftyp; + int one; + float salpha; + one=1; + + if (A->inverse){ + /* OK we are now going to really going to invert the matrix */ + CTAI_Matrix_construct_inverse(A); + } + *retval=CTA_SizeOf(A->datatype, &sizeoftyp); + if (*retval!=CTA_OK) return; + *retval=CTA_Vector_GetSize(*vx,&nx); + if (*retval!=CTA_OK) return; + *retval=CTA_Vector_GetSize(*vy,&ny); + if (*retval!=CTA_OK) return; + x=CTA_Malloc(sizeoftyp*nx); + y=CTA_Malloc(sizeoftyp*ny); + *retval=CTA_Vector_GetVals(*vx,x,nx,A->datatype); + if (*retval!=CTA_OK) return; + *retval=CTA_Vector_GetVals(*vy,y,ny,A->datatype); + if (*retval!=CTA_OK) return; + if (A->datatype==CTA_REAL){ + salpha=(float) *alpha; + SGER_F77(&(A->m),&(A->n),&salpha,x,&one,y,&one,A->values,&(A->m)); + *retval=CTA_OK; + } + else if (A->datatype==CTA_DOUBLE){ + DGER_F77(&(A->m),&(A->n),alpha,x,&one,y,&one,A->values,&(A->m)); + *retval=CTA_OK; + } + else { + *retval=CTA_ILLEGAL_DATATYPE; + } + free(x); + free(y); + + + return; +} + +void CTAI_Matrix_Gemv( + CTAI_Matrix_blas *A, + int *trans, + double *alpha, + CTA_Vector *vx, + double *beta, + CTA_Vector *vy, + int *retval) +{ + void *x, *y; + int nx, ny, ma, na; + int sizeoftyp; + int one; + float salpha; + float sbeta; + char ctrans; + one=1; + + if (A->inverse) { + /* OK we are now going to really going to invert the matrix */ + CTAI_Matrix_construct_inverse(A); + } + + if (*trans) {ctrans='T'; ma=A->n; na=A->m;} + else {ctrans='N'; ma=A->m; na=A->n;} + + *retval=CTA_SizeOf(A->datatype, &sizeoftyp); + if (*retval!=CTA_OK) return; + *retval=CTA_Vector_GetSize(*vx,&nx); + if (*retval!=CTA_OK) return; + *retval=CTA_Vector_GetSize(*vy,&ny); + if (*retval!=CTA_OK) return; + + /* check dimensions */ + if (ny!=ma || nx!=na){ + *retval=CTA_DIMENSION_ERROR; + } + + x=CTA_Malloc(sizeoftyp*nx); + y=CTA_Malloc(sizeoftyp*ny); + *retval=CTA_Vector_GetVals(*vx,x,nx,A->datatype); + if (*retval!=CTA_OK) return; + *retval=CTA_Vector_GetVals(*vy,y,ny,A->datatype); + if (*retval!=CTA_OK) return; + if (A->datatype==CTA_REAL){ + salpha=(float) *alpha; + sbeta=(float) *beta; + SGEMV_F77(&ctrans,&(A->m),&(A->n),&salpha,A->values,&(A->m),x,&one,&sbeta,y,&one,1); + *retval=CTA_OK; + } + else if (A->datatype==CTA_DOUBLE){ + DGEMV_F77(&ctrans,&(A->m),&(A->n),alpha,A->values,&(A->m),x,&one,beta,y,&one,1); + *retval=CTA_OK; + } + else { + *retval=CTA_ILLEGAL_DATATYPE; + } + *retval=CTA_Vector_SetVals(*vy,y,ny,A->datatype); + if (*retval!=CTA_OK) return; + + free(x); + free(y); + + + return; +} + + +void CTAI_Matrix_Gemm( + CTAI_Matrix_blas *C, + int *transa, + int *transb, + double *alpha, + CTAI_Matrix_blas *A, + CTAI_Matrix_blas *B, + double *beta, + int *retval) +{ + int ma, mb, mc, na, nb, nc; + float salpha; + float sbeta; + char ctransa; + char ctransb; + + // check dimensions + mc=C->m; + nc=C->n; + if (*transa) {ctransa='T'; ma=A->n; na=A->m;} + else {ctransa='N'; ma=A->m; na=A->n;} + if (*transb) {ctransb='T'; mb=B->n; nb=B->m;} + else {ctransb='N'; mb=B->m; nb=B->n;} + + if (ma!=mc || nb!=mc || na!=mb) { + *retval=CTA_DIMENSION_ERROR; + } + + if (A->datatype != B->datatype || A->datatype != C->datatype){ + *retval=CTA_NOT_YET_SUPPORTED; return; + } + + if (A->inverse) { + /* OK we are now going to really going to invert the matrix */ + CTAI_Matrix_construct_inverse(A); + } + if (B->inverse) { + /* OK we are now going to really going to invert the matrix */ + CTAI_Matrix_construct_inverse(B); + } + if (C->inverse) { + /* OK we are now going to really going to invert the matrix */ + CTAI_Matrix_construct_inverse(C); + } + + if (C->datatype==CTA_REAL){ + salpha=(float) *alpha; + sbeta=(float) *beta; + SGEMM_F77(&ctransa,&ctransb,&mc,&nc,&na,&salpha,A->values,&(A->m),B->values,&(B->m), + &sbeta,C->values,&(C->m),1,1); + *retval=CTA_OK; + } + else if (C->datatype==CTA_DOUBLE){ + DGEMM_F77(&ctransa,&ctransb,&mc,&nc,&na,alpha,A->values,&(A->m),B->values,&(B->m), + beta,C->values,&(C->m),1,1); + *retval=CTA_OK; + } + else { + *retval=CTA_ILLEGAL_DATATYPE; + } + return; +} + + + + + + + + + + +void CTAI_Matrix_Inv( + CTAI_Matrix_blas *A, + int *retval + ) +{ + int nul=0; + double *ddum; + float *sdum; + int info; + + info = 0; + if (A->inverse) { + /* OK we are now going to really going to invert the matrix */ + CTAI_Matrix_construct_inverse(A); + } + if (A->m != A->m) {*retval=CTA_DIMENSION_ERROR; return;} + + // Create permutation array + if (!A->perm) A->perm=CTA_Malloc(sizeof(int)*A->n); + + // Compute LU-decomposition of A + if (A->datatype==CTA_REAL){ + sdum=NULL; + SGESV_F77(&(A->n), &nul, A->values, &(A->n), A->perm, sdum, &(A->n), &info); + } + else if (A->datatype==CTA_DOUBLE){ + ddum=NULL; + DGESV_F77(&(A->n), &nul, A->values, &(A->n), A->perm, ddum, &(A->n), &info); + } + else { + *retval=CTA_ILLEGAL_DATATYPE; + } + A->inverse=TRUE; + + if (info>0) { + *retval=CTA_SINGULAR_MATRIX; + } + else if (info<0){ + *retval=CTA_INTERNAL_ERROR; + } + else { + *retval=CTA_OK; + } +}; + +void CTAI_Matrix_Axpy( + CTAI_Matrix_blas *y, + double *alpha, + CTAI_Matrix_blas *x, + int *retval + ){ + /* Local variables */ + int one=1; + int length; + float salpha; + + if (y->inverse) { + CTAI_Matrix_construct_inverse(y); + } + if (x->inverse) { + CTAI_Matrix_construct_inverse(x); + } + /* check dimensions */ + if (x->m!=y->m){ + *retval=CTA_DIMENSION_ERROR; + return; + } + if (x->n!=y->n){ + *retval=CTA_DIMENSION_ERROR; + return; + } + + /* check data types */ + if (y->datatype!=x->datatype){ + *retval=CTA_INCOMPATIBLE_MATRICES; + return; + } + + length=(y->n)*(y->m); + /* copy values using BLAS copy */ + if (y->datatype==CTA_REAL || y->datatype==CTA_INTEGER){ + salpha=(float) *alpha; + SAXPY_F77(&length,&salpha,x->values,&one,y->values,&one); + } + else { + DAXPY_F77(&length,alpha,x->values,&one,y->values,&one); + } + *retval=CTA_OK; +}; + + +void CTAI_Matrix_free( + CTAI_Matrix_blas *x, + int *retval + ) +{ + free(x->values); + if (x->perm); free(x->perm); + *retval=CTA_OK; +}; + + + + + diff --git a/costa/native/cta/src/cta_mem.c b/costa/native/cta/src/cta_mem.c new file mode 100644 index 000000000..c7ea295ea --- /dev/null +++ b/costa/native/cta/src/cta_mem.c @@ -0,0 +1,59 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/openda_1/public/trunk/core/native/src/cta/cta_handles.c $ +$Revision: 2738 $, $Date: 2011-09-05 10:48:32 +0200 (Mon, 05 Sep 2011) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2011 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#define DEBUG 0 +#include +#include +#include "cta_mem.h" + + + +void CTA_Free(void *ptr){ + free(ptr); +} + +void * CTA_Malloc(size_t size){ + void *ptr = malloc(size); +#ifdef DEBUG + if (! ptr){ + printf("CTA_Malloc: Warning: malloc returns NULL. (size=%zd)\n",size); + } + else { +// memset(ptr,'@', size); + } +#endif + return ptr; + +} + +void * CTA_Realloc(void *ptr, size_t size){ + void *ptrOut = realloc(ptr, size); +#ifdef DEBUG + if (! ptrOut){ + printf("CTA_Malloc: Warning: malloc returns NULL. (size=%zd)\n",size); + } +#endif + return ptrOut; +} + + + diff --git a/costa/native/cta/src/cta_message.c b/costa/native/cta/src/cta_message.c new file mode 100644 index 000000000..01df3e8e2 --- /dev/null +++ b/costa/native/cta/src/cta_message.c @@ -0,0 +1,111 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/openda_1/public/trunk/openda_core/native/src/cta/cta_interface.c $ +$Revision: 671 $, $Date: 2008-10-07 14:49:42 +0200 (di, 07 okt 2008) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "cta_message.h" +#include "ctai_handles.h" +#include "cta_errors.h" + + +#define CTA_MESSAGE_QUIET_F77 F77_CALL(cta_message_quiet,CTA_MESSAGE_QUIET) + +#define CLASSNAME "CTA_Message" + +// Handle to an external writer +CTA_Func CTAI_Message_externalWriter=CTA_NULL; + +int CTAI_Quiet=0; + +void CTA_Message_Quiet(int setting){ + CTAI_Quiet=setting; +} + + +#define METHOD "Write" +void CTA_Message_Write(const char *className, const char *method, const char *message, char type){ + + + CTA_Function *externalFunction; + int ierr; + + // if program is in quiet mode return directly + if (CTAI_Quiet) return; + + if (CTAI_Message_externalWriter==CTA_NULL) { + if (type=='m' || type=='M' ) { + printf("%s\n",message); + } + else if (type=='i' || type=='I') { + printf("Info: (%s.%s) %s\n",className, method, message); + } + else if (type=='w' || type=='W') { + printf("Warning: (%s.%s) %s\n",className, method, message); + } + else if (type=='e' || type=='E') { + printf("Error: (%s.%s) %s\n",className, method, message); + } + else if (type=='f' || type=='F') { + + printf("FATAL ERROR: (%s.%s) %s\n",className, method, message); + printf("APPLICATION WILL BE TERMINATED\n"); + exit(-1); + } + else { + printf("Error: (cta_msg.CTA_Message_Write) illegal message type (%c)\n", type); + printf("Message: (%s.%s) %s\n",className, method, message); + } + } + else { + ierr=CTA_Func_GetFunc(CTAI_Message_externalWriter, &externalFunction); + if (ierr != CTA_OK) { + // Error, first "kill" external writer before writing error + CTAI_Message_externalWriter=CTA_NULL; + printf("Cannot get pointer to external writer"); + } + else { + // Call external writer + (void) externalFunction(className, method, message, type); + } + } +} + +#undef METHOD +#define METHOD "SetExternalWriter" +void CTA_Message_SetExternalWriter(CTA_Func externalWriter){ + + if (externalWriter==CTA_NULL){ + CTAI_Message_externalWriter=CTA_NULL; + } + else { + if (CTAI_Handle_GetDatatype(externalWriter)!=CTA_FUNCTION){ + CTA_WRITE_ERROR("Input handle is not a function handle.\n External writer is not changed\n"); + } + else { + CTAI_Message_externalWriter=externalWriter; + } + } +} + +/* Interfacing with Fortran */ + +CTAEXPORT void CTA_MESSAGE_QUIET_F77(int *setting){ + CTA_Message_Quiet(*setting); +} diff --git a/costa/native/cta/src/cta_metainfo.c b/costa/native/cta/src/cta_metainfo.c new file mode 100644 index 000000000..c428d25cf --- /dev/null +++ b/costa/native/cta/src/cta_metainfo.c @@ -0,0 +1,1147 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_metainfo.c $ +$Revision: 3407 $, $Date: 2012-08-17 13:50:50 +0200 (Fri, 17 Aug 2012) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include +#include "cta_mem.h" +#include "f_cta_utils.h" +#include "cta_sobs.h" +#include "cta_obsdescr.h" +#include "cta_metainfo.h" +#include "cta_errors.h" +#include "cta_handles.h" +#include "ctai.h" +#include "cta_file.h" +#include "cta_defaults.h" +#include "ctai_xml.h" +#include "cta_message.h" +#include "cta_pack.h" + +#define CTA_METAINFO_DEFINEClASS_F77 F77_CALL(cta_metainfo_definecLass,CTA_METAINFO_DEFINECLASS) +#define CTA_METAINFO_CREATE_F77 F77_CALL(cta_metainfo_create,CTA_METAINFO_CREATE) +#define CTA_METAINFO_CHECK_SOBS_F77 F77_CALL(cta_metainfo_check_sobs,CTA_METAINFO_CHECK_SOBS) +#define CTA_METAINFO_GET_PROPERTIES_F77 F77_CALL(cta_metainfo_get_properties,CTA_METAINFO_GET_PROPERTIES) +#define CTA_METAINFO_GET_KEYS_F77 F77_CALL(cta_metainfo_get_keys,CTA_METAINFO_GET_KEYS) +#define CTA_METAINFO_PROPERTY_COUNT_F77 F77_CALL(cta_metainfo_property_count,CTA_METAINFO_PROPERTY_COUNT) +#define CTA_METAINFO_OBSERVATION_COUNT_F77 F77_CALL(cta_metainfo_observation_count,CTA_METAINFO_OBSERVATION_COUNT) +#define CTA_METAINFO_EXPORT_F77 F77_CALL(cta_metainfo_export,CTA_METAINFO_EXPORT) +#define CTA_METAINFO_IMPORT_F77 F77_CALL(cta_metainfo_import,CTA_METAINFO_IMPORT) +#define CTA_METAINFO_FREE_F77 F77_CALL(cta_metainfo_free,CTA_METAINFO_FREE) +#define CTA_METAINFO_SETTAG_F77 F77_CALL(cta_metainfo_settag,CTA_METAINFO_SETTAG) +#define CTA_METAINFO_SETBELONGSTO_F77 F77_CALL(cta_metainfo_setbelongsto,CTA_METAINFO_SETBELONGSTO) +#define CTA_METAINFO_SETGRID_F77 F77_CALL(cta_metainfo_setgrid,CTA_METAINFO_SETGRID) + +#define CTA_METAINFO_SETREGGRID_F77 F77_CALL(cta_metainfo_setreggrid,CTA_METAINFO_SETREGGRID) +#define CTA_METAINFO_GETREGGRID_F77 F77_CALL(cta_metainfo_getreggrid,CTA_METAINFO_GETREGGRID) +#define CLASSNAME "CTA_Metainfo" + +#define IDEBUG (0) + + +/* Struct holding all data associated to an COSTA Vector */ +typedef struct { +CTA_Func functions[CTA_METAINFO_NUMFUNC]; +} CTAI_MetainfoClass; // A MetainfoClass contains a list of the member-functions + + + + + +typedef struct { +char unit[CTA_STRLEN_TAG]; +char tag[CTA_STRLEN_TAG]; +char belongs_to[CTA_STRLEN_TAG]; +char *description; +int rest; +CTAI_Gridm *thisgrid; +CTA_Func functions[CTA_METAINFO_NUMFUNC]; // See cta_vector.h for a list of +} CTAI_Metainfo; + + + + +int CTAI_Grid_Init( + CTAI_Gridm *hgrid, /* Handle of the grid */ + int nx, int ny,int nz ) + { + /* Local variables */ + int retval; /* Return value of COSTA call */ + + /* copy */ + strcpy(hgrid->name,"gridname") ; + hgrid->type = -99; //undefined + hgrid->nx = nx; hgrid->ny = ny; hgrid->nz = nz; + hgrid->x_origin = 0.1; hgrid->y_origin = 0.0; + hgrid->z_origin = 0.0; + hgrid->dx = 1.0; hgrid->dy = 1.0; hgrid->dz = 1.0; + strcpy(hgrid->refdimp[1],"none"); + strcpy(hgrid->refdimp[2],"none"); + strcpy(hgrid->refdimp[3],"none"); + retval = 0; + + return retval; +}; + + +int CTAI_Grid_IsEqual( + CTAI_Gridm *hgrid1, /* Handle of the grid */ + CTAI_Gridm *hgrid2 ){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + int dimseq, chareq, realeq; + double eps1=1.0E-4; + double eps2=1.0E-6; + /* compare */ + retval = CTA_DIMENSION_ERROR; + chareq = (strcmp(hgrid1->name, hgrid2->name)==0); + dimseq = ((hgrid1->type == hgrid2->type) && \ + hgrid1->nx == hgrid2->nx && hgrid1->ny == hgrid2->ny \ + && hgrid1->nz == hgrid2->nz ); + realeq = ((fabs(hgrid1->x_origin - hgrid2->x_origin) + \ + fabs(hgrid1->y_origin - hgrid2->y_origin) + \ + fabs(hgrid1->z_origin - hgrid2->z_origin) < eps1) && \ + (fabs(hgrid1->dx - hgrid2->dx) + \ + fabs(hgrid1->dy - hgrid2->dy) + \ + fabs(hgrid1->dz - hgrid2->dz) < eps2)); + if (dimseq && realeq) { + if (!chareq) { printf("Grids are equal, but names differ \n");} + retval = 0;} + return retval; +}; + + +int CTAI_Grid_Copy( + CTAI_Gridm *hgrid1, /* Handle of the sending grid */ + CTAI_Gridm *hgrid2 /* Handle of the receiving grid */ + ){ + + /* Local variables */ + int retval=0; /* Return value of COSTA call */ + + /* copy */ + // printf("hgrid1->name=%s\n",hgrid1->name); + if ( + (strcmp(hgrid1->name,"gridname") != 0) & + (strcmp(hgrid1->name,"AR(1)-process") != 0) + ) {// return -1234567; +} + strcpy(hgrid2->name, hgrid1->name) ; + hgrid2->type = hgrid1->type; + hgrid2->nx = hgrid1->nx; hgrid2->ny = hgrid1->ny; hgrid2->nz = hgrid1->nz; + hgrid2->x_origin = hgrid1->x_origin; hgrid2->y_origin = hgrid1->y_origin; + hgrid2->z_origin = hgrid1->z_origin; + hgrid2->dx = hgrid1->dx; hgrid2->dy = hgrid1->dy; hgrid2->dz = hgrid1->dz; + hgrid2->nsize = hgrid1->nsize; + strcpy(hgrid2->refdimp[1],hgrid1->refdimp[1]); + strcpy(hgrid2->refdimp[2],hgrid1->refdimp[2]); + strcpy(hgrid2->refdimp[3],hgrid1->refdimp[3]); + + return retval; +}; + + +/* ------------------------------------------------ */ + +int CTAI_Grid_Interpolate(CTAI_Gridm gridy, //desired grid + CTAI_Gridm gridx, // grid where vecx lives + CTA_Vector vecx, // the vector to be interpolated + // from gridx to gridy + CTA_Vector vecx_to_y ) // the resulting vector, living on gridy +{ + int retval, i,j,k,mi,mj,mk; + int size_vecx; + double px,py,pz; + double xL, xR, yL, yR, zL, zR; + int ixL, ixR, iyL, iyR, izL, izR; + double fLLL, fRLL, fLRL, fRRL, fLLR, fRLR, fLRR, fRRR; + double fyLL, fyRL, fyLR, fyRR, fzL, fzR, fint; + + + if (CTAI_Grid_IsEqual(&gridx,&gridy) == CTA_OK) { + // immediately finished; copy vecx_to_y to vecx + if (IDEBUG>0) { printf("interpolating vectors on grid: grids are equal! \n");} + retval = CTA_Vector_Copy(vecx, vecx_to_y); + return retval; + } + + + CTA_Vector_GetSize(vecx, &size_vecx); + if (size_vecx != gridx.nsize) return CTA_DIMENSION_ERROR; + + + // dit werkt alleen goed voor uniform en rectilinear I en II, + // niet voor irregular/ curvilinear! + // verder: eenheidsvectoren van grids moeten gelijk zijn! (niet gedraaid tov elkaar) + // anders moet gridx sowieso eerst getransformeerd naar irregular grid + // met basisvectoren van gridy + + + + /* loop along grid points of desired gridy */ + for (i=1; i <= gridy.nx ;i++){ + for (j=1; j <= gridy.ny ;j++){ + for (k=1; k <= gridy.nz ;k++){ + px = gridy.x_origin + (i-1)*gridy.dx ; py = gridy.y_origin + (j-1)*gridy.dy ; + pz = gridy.z_origin + (k-1)*gridy.dz ; + + + /* search for grid points around px and values in x-direction */ + if (gridx.nx ==1) {xL = gridx.x_origin; xR = xL + 1.0; ixL = 1; ixR=1;} + else if (gridx.x_origin > px) {xL = gridx.x_origin; xR = xL +gridx.dx; ixL=1;ixR=2; } + else if (gridx.x_origin+gridx.dx*(gridx.nx-1)< px) { + xR = gridx.x_origin+gridx.dx*(gridx.nx-1); xL = xR -gridx.dx;ixL=gridx.nx-1;ixR=gridx.nx; } + else { + for (mi=1; mi <= gridx.nx-1; mi++) { + if (gridx.x_origin + mi*gridx.dx > px ) { + xR = gridx.x_origin+mi*gridx.dx ; xL = xR - gridx.dx; ixL=mi; ixR=mi+1; + break;} + } + } + /* same for y direction */ + if (gridx.ny ==1) {yL = gridx.y_origin; yR = yL + 1.0; iyL = 1; iyR=1;} + else if (gridx.y_origin > py) {yL = gridx.y_origin; yR = yL +gridx.dy; iyL=1;iyR=2; } + else if (gridx.y_origin+gridx.dy*(gridx.ny-1)< py) { + yR = gridx.y_origin+gridx.dy*(gridx.ny-1); yL = yR -gridx.dy;iyL=gridx.ny-1;iyR=gridx.ny; } + else { + for (mj=1; mj <= gridx.ny-1; mj++) { + if (gridx.y_origin + mj*gridx.dy > py ) { + yR = gridx.y_origin+mj*gridx.dy ; yL = yR - gridx.dy; iyL=mj; iyR=mj+1; + break;} + } + } + /* same for z direction */ + if (gridx.nz ==1) {zL = gridx.z_origin; zR = zL + 1.0; izL = 1; izR=1;} + else if (gridx.z_origin > pz) {zL = gridx.z_origin; zR = zL +gridx.dz; izL=1;izR=2; } + else if (gridx.z_origin+gridx.dz*(gridx.nz-1)< pz) { + zR = gridx.z_origin+gridx.dz*(gridx.nz-1); zL = zR -gridx.dz;izL=gridx.nz-1;izR=gridx.nz; } + else { + for (mk=1; mk <= gridx.nz-1; mk++) { + if (gridx.z_origin + mk*gridx.dz > pz ) { + zR = gridx.z_origin+mk*gridx.dz ; zL = zR - gridx.dz; izL=mk; izR=mk+1; + break;} + } + } + + // is it always possible to convert to double? + CTA_Vector_GetVal(vecx,ixL + (iyL-1)*gridx.nx +(izL-1)*gridx.nx*gridx.ny, &fLLL, CTA_DOUBLE); + CTA_Vector_GetVal(vecx,ixR + (iyL-1)*gridx.nx +(izL-1)*gridx.nx*gridx.ny, &fRLL, CTA_DOUBLE); + CTA_Vector_GetVal(vecx,ixL + (iyR-1)*gridx.nx +(izL-1)*gridx.nx*gridx.ny, &fLRL, CTA_DOUBLE); + CTA_Vector_GetVal(vecx,ixR + (iyR-1)*gridx.nx +(izL-1)*gridx.nx*gridx.ny, &fRRL, CTA_DOUBLE); + + CTA_Vector_GetVal(vecx,ixL + (iyL-1)*gridx.nx +(izR-1)*gridx.nx*gridx.ny, &fLLR, CTA_DOUBLE); + CTA_Vector_GetVal(vecx,ixR + (iyL-1)*gridx.nx +(izR-1)*gridx.nx*gridx.ny, &fRLR, CTA_DOUBLE); + CTA_Vector_GetVal(vecx,ixL + (iyR-1)*gridx.nx +(izR-1)*gridx.nx*gridx.ny, &fLRR, CTA_DOUBLE); + CTA_Vector_GetVal(vecx,ixR + (iyR-1)*gridx.nx +(izR-1)*gridx.nx*gridx.ny, &fRRR, CTA_DOUBLE); + + + // printf("distances %f %f %f %f %f %f \n",xR-px, yR-py, zR-pz,xR-xL,yR-yL,zR-zL); + + + // lower z- layer + fyLL = ((xR-px)/(xR-xL))*fLLL + ((px-xL)/(xR-xL))*fRLL ; + fyRL = ((xR-px)/(xR-xL))*fLRL + ((px-xL)/(xR-xL))*fRRL ; + fzL = ((yR-py)/(yR-yL))*fyLL + ((py-yL)/(yR-yL))*fyRL ; + + // upper z-layer + fyLR = ((xR-px)/(xR-xL))*fLLR + ((px-xL)/(xR-xL))*fRLR ; + fyRR = ((xR-px)/(xR-xL))*fLRR + ((px-xL)/(xR-xL))*fRRR ; + fzR = ((yR-py)/(yR-yL))*fyLR + ((py-yL)/(yR-yL))*fyRR ; + + fint = ((zR-pz)/(zR-zL))*fzL + ((pz-zL)/(zR-zL))*fzR; + + + retval = CTA_Vector_SetVal(vecx_to_y,(i + (j-1)*gridy.nx), &fint, CTA_DOUBLE); + + }; + + + } + } + + + return CTA_OK; + + +}; + +/* ------------------------------------------------ */ +int CTAI_XML_CreateGrid(xmlNode *cur_node, CTAI_Gridm *thisgrid) { + xmlNode *values_node = NULL; /* values child node */ + xmlNode *val_node = NULL; /* value child node */ + xmlChar *idtmp; + xmlChar *lentmp; + xmlChar *axestmp; + xmlChar *reftmp; + + BOOL isphysicalSpace = FALSE; + int ndims_comp = 0; + int ndims_phys = 0; + char dimidc[10][CTA_STRLEN_TAG]; // names of comp. dimensions + char dimidp[10][CTA_STRLEN_TAG]; // names of physical dimensions + char axes[10][CTA_STRLEN_TAG]; // list of referenced comp. dimensions + int dimlength[10]; // number of elements of comp. dimension + + + /* fill some properties of the grid */ + thisgrid->type = -1; // actual coordinates not yet read + strcpy(thisgrid->name,"xml_grid"); + thisgrid->nsize = 1; + + for (values_node = cur_node->children; values_node; values_node = values_node->next) { + if (0 == strcmp("computationalSpace", (char *) values_node->name)) { + /* read dimensions */ + + for (val_node = values_node->children; val_node; val_node = val_node->next) { + if (0 == strcmp("dimension", (char *) val_node->name)) { + ndims_comp = ndims_comp + 1; + + idtmp = xmlGetProp(val_node, CTAI_XML_ID); + strcpy(dimidc[ndims_comp], (char *) idtmp); + lentmp = xmlGetProp(val_node, CTAI_XML_LENGTH); + dimlength[ndims_comp] = atoi((char *) lentmp); + thisgrid->nsize = thisgrid->nsize*dimlength[ndims_comp]; + // printf("grid comp dims: %d %d %s \n",ndims_comp,dimlength[ndims_comp],dimidc[ndims_comp]); + } + } + if (ndims_comp < 1 || ndims_comp > 3) {printf("wrong number of computational dimensions\n"); + exit(-1); + } + } + + /* the physicalSpace part is not always present. In that case an integer grid can be formed using + the computationalSpace. */ + + if (0 == strcmp("physicalSpace", (char *) values_node->name)) { + isphysicalSpace = TRUE; + /* read dimensions */ + for (val_node = values_node->children; val_node; val_node = val_node->next) { + if (0 == strcmp("dimension", (char *) val_node->name)) { + ndims_phys = ndims_phys + 1; + + idtmp = xmlGetProp(val_node, CTAI_XML_ID); + strcpy(dimidp[ndims_phys], (char *) idtmp); + + axestmp = xmlGetProp(val_node, (xmlChar *) "axes"); + strcpy(axes[ndims_phys], (char *) axestmp); + + reftmp = xmlGetProp(val_node, (xmlChar *) "ref"); + strcpy(thisgrid->refdimp[ndims_phys], (char *) reftmp); + + // printf("grid phys dims: %d |%s|%s|%s| \n", + // ndims_phys,dimidp[ndims_phys],axes[ndims_phys],thisgrid->refdimp[ndims_phys]); + } + } + } + } + if (isphysicalSpace == FALSE) { // make an integer, regular grid + thisgrid->nx = dimlength[1]; + if (ndims_comp == 1) { + thisgrid->type = 1; thisgrid->ny = 1; //degenerated 2D + thisgrid->x_origin = 0.0; thisgrid->y_origin = 0.0; thisgrid->z_origin = 0.0; + thisgrid->dx = 1.0; thisgrid->dy = 1.0; thisgrid->dz = 1.0; + }; + if (ndims_comp == 2 ) { + thisgrid->type = 1; thisgrid->ny=dimlength[2]; // 2D + thisgrid->x_origin = 1.0; thisgrid->y_origin = 1.0; thisgrid->z_origin = 0.0; + thisgrid->dx = 1.0; thisgrid->dy = 1.0; thisgrid->dz = 1.0; + }; + if (ndims_comp == 3 ) { thisgrid->type = 2; + thisgrid->ny = dimlength[2];thisgrid->nz=dimlength[3]; // 3D + thisgrid->x_origin = 1.0; thisgrid->y_origin = 1.0; thisgrid->z_origin = 1.0; + thisgrid->dx = 1.0; thisgrid->dy = 1.0; thisgrid->dz = 1.0; + } + } else { ; + /* the reference to the coordinate vectors is contained in the grid. + We have to wait until the whole tree has been read to be able to access it. */ + thisgrid->type = -ndims_comp; // this denotes (minus) the number of refernces to coordinates + } + + return CTA_OK; + }; + +/* ------------------------------------------------ */ + +#undef METHOD +#define METHOD "DefineClass" +int CTA_Metainfo_DefineClass( + // INPUTS: + const char *name, // Name of the new stochobs class + const CTA_Func h_func[CTA_METAINFO_NUMFUNC], // function handles to + // the implementations of the + // stochobs-class' functions. + // OUTPUTS: + CTA_MetainfoClass *hdescrcl) // The (handle to) the new + // observation descriptor-class +{ + + CTAI_MetainfoClass *data; + int retval; + + /* Allocate new Metainfo object */ + data=CTA_Malloc(sizeof(CTAI_MetainfoClass)); + data->functions[CTA_METAINFO_CREATE_SIZE ]= + h_func[CTA_METAINFO_CREATE_SIZE ]; + data->functions[CTA_METAINFO_CREATE_INIT ]= + h_func[CTA_METAINFO_CREATE_INIT ]; + data->functions[I_CTA_METAINFO_FREE ]= + h_func[I_CTA_METAINFO_FREE ]; + data->functions[I_CTA_METAINFO_GET_REST ]= + h_func[I_CTA_METAINFO_GET_REST ]; + data->functions[I_CTA_METAINFO_GET_KEYS ]= + h_func[I_CTA_METAINFO_GET_KEYS ]; + data->functions[I_CTA_METAINFO_COUNT_OBSERVATIONS ]= + h_func[I_CTA_METAINFO_COUNT_OBSERVATIONS ]; + data->functions[I_CTA_METAINFO_COUNT_PROPERTIES ]= + h_func[I_CTA_METAINFO_COUNT_PROPERTIES ]; + data->functions[I_CTA_METAINFO_EXPORT ]= + h_func[I_CTA_METAINFO_EXPORT ]; + + // Allocate new handle + retval=CTA_Handle_Create(name,CTA_METAINFOCLASS,data,hdescrcl); + retval=CTA_Handle_GetData((CTA_Handle) *hdescrcl,(void**) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + // return error when unsuccesfull + return retval; +} + +int CTA_Metainfo_Create(CTA_Metainfo *hmetainfo){ + + CTAI_Metainfo *metainfo; + CTAI_Gridm initgrid; + int retval; + + /* allocate memory for new metainfo object */ + metainfo=CTA_Malloc(sizeof(CTAI_Metainfo)); + metainfo->rest=0; + strcpy(metainfo->unit,"-"); + strcpy(metainfo->tag,"_"); + strcpy(metainfo->belongs_to,"NONE"); + metainfo->description = CTA_Malloc(9*sizeof(char)); + strcpy(metainfo->description,"none yet"); + + retval=CTAI_Grid_Init(&initgrid,3,2,1); + if (retval) { + CTA_WRITE_ERROR("Cannot initialize Grid"); + return retval; + } + // printf("cta_metainfo_create 1: initgrid.nx %d \n ",initgrid.nx); + //printf("cta_metainfo_create 1: initgrid.x_origin %f \n ",initgrid.x_origin); + + metainfo->thisgrid=CTA_Malloc(sizeof(CTAI_Gridm)); + + retval=CTAI_Grid_Copy(&initgrid, metainfo->thisgrid); + // printf("cta_metainfo_create 2: %d \n ",initgrid.nx); + + if (retval) { + CTA_WRITE_ERROR("Cannot copy grid"); + return retval; + } + + /* Allocate new handle and return error when unsuccesfull */ + retval=CTA_Handle_Create("metainfo",CTA_METAINFO,metainfo,hmetainfo); + + if (retval){ + CTA_WRITE_ERROR("Handle is not a cta_metainfo handle"); + return retval; + } + + return CTA_OK; +} + + + + +#undef METHOD +#define METHOD "Copy" +int CTA_Metainfo_Copy( + CTA_Metainfo hmeta_x, /* Handle of the sending metainfo */ + CTA_Metainfo hmeta_y /* Handle of the receiving metainfo */ + ){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Metainfo *data_x; /* All data of vector hvec_x */ + CTAI_Metainfo *data_y; /* All data of vector hvec_y */ + + + retval=CTA_Handle_Check((CTA_Handle) hmeta_x,CTA_METAINFO); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_metainfo handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) hmeta_x, (void*) &data_x); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + retval=CTA_Handle_Check((CTA_Handle) hmeta_y,CTA_METAINFO); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_metainfo handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) hmeta_y, (void*) &data_y); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + + /* copy */ + // printf("!!!controle copy:|%s|%s| \n", data_x->tag, data_y->tag); + strcpy(data_y->unit, data_x->unit) ; + strcpy(data_y->tag, data_x->tag) ; + strcpy(data_y->belongs_to, data_x->belongs_to) ; + data_y->rest = data_x->rest ; + // data_y->thisgrid=CTA_Malloc(sizeof(CTAI_Gridm)); + retval = CTAI_Grid_Copy(data_x->thisgrid, data_y->thisgrid); + + + return retval; +}; + + +#undef METHOD +#define METHOD "IsEqual" +int CTA_Metainfo_IsEqual( + CTA_Metainfo hmeta_x, /* Handle of the metainfo1 */ + CTA_Metainfo hmeta_y /* Handle of the metainfo2 */ + ){ + + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Metainfo *data_x; /* All data of vector hvec_x */ + CTAI_Metainfo *data_y; /* All data of vector hvec_y */ + + retval=CTA_Handle_Check((CTA_Handle) hmeta_x,CTA_METAINFO); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_metainfo handle"); + return CTA_FALSE; + } + retval=CTA_Handle_GetData((CTA_Handle) hmeta_x, (void*) &data_x); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return CTA_FALSE; + } + + retval=CTA_Handle_Check((CTA_Handle) hmeta_y,CTA_METAINFO); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_metainfo handle"); + return CTA_FALSE; + } + retval=CTA_Handle_GetData((CTA_Handle) hmeta_y, (void*) &data_y); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return CTA_FALSE; + } + + + /* compare */ + retval = CTAI_Grid_IsEqual(data_x->thisgrid, data_y->thisgrid); + if (retval) return CTA_FALSE; + + retval = (strcmp(data_x->unit, data_y->unit)==0); + if (retval) return CTA_FALSE; + + retval = (strcmp(data_x->tag, data_y->tag)==0); + if (retval) return CTA_FALSE; + + retval = (strcmp(data_x->belongs_to, data_y->belongs_to)==0); + if (retval) return CTA_FALSE; + + retval = (data_x->rest == data_y->rest); + if (retval) return CTA_TRUE; + + return CTA_FALSE; +}; + + + + + +#undef METHOD +#define METHOD "SetUnit" +int CTA_Metainfo_SetUnit(CTA_Metainfo hobsdescr, char* nameofunit){ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Metainfo *metainfo; /* All data of observer hmetainfo */ + + /* Get pointer to struct with observation description data */ + retval=CTA_Handle_GetData((CTA_Handle) hobsdescr,(void**) &metainfo); + // printf("metainfo_setunit getdata %d \n", retval); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + strcpy(metainfo->unit,nameofunit); + return retval; +} + +#undef METHOD +#define METHOD "SetTag" +int CTA_Metainfo_SetTag(CTA_Metainfo hobsdescr, char* tagname){ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Metainfo *metainfo; /* All data of observer hmetainfo */ + + /* Get pointer to struct with observation description data */ + retval=CTA_Handle_GetData((CTA_Handle) hobsdescr,(void**) &metainfo); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + strcpy(metainfo->tag,tagname); + return retval; +} + +#undef METHOD +#define METHOD "SetDescription" +int CTA_Metainfo_SetDescription(CTA_Metainfo hobsdescr, char* description){ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Metainfo *metainfo; /* All data of observer hmetainfo */ + + /* Get pointer to struct with observation description data */ + retval=CTA_Handle_GetData((CTA_Handle) hobsdescr,(void**) &metainfo); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + if (metainfo->description !=NULL) { + free(metainfo->description); + } + metainfo->description = CTA_Malloc((1+strlen(description))*sizeof(char)); + strcpy(metainfo->description,description); + return retval; +} + +#undef METHOD +#define METHOD "GetDescription" +int CTA_Metainfo_GetDescription(CTA_Metainfo hobsdescr, char **description){ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Metainfo *metainfo; /* All data of observer hmetainfo */ + + /* Get pointer to struct with observation description data */ + retval=CTA_Handle_GetData((CTA_Handle) hobsdescr,(void**) &metainfo); + if (retval!=CTA_OK){ + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + /* Note: description is supposed not be allocated yet */ + if (*description!=NULL) { + free(*description); + } + *description = CTA_Malloc((1+strlen(metainfo->description))*sizeof(char)); + strcpy(*description, metainfo->description); + return retval; +} + +#undef METHOD +#define METHOD "SetBelongsTo" +int CTA_Metainfo_SetBelongsTo(CTA_Metainfo hobsdescr, char* tagname){ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Metainfo *metainfo; /* All data of observer hmetainfo */ + + /* Get pointer to struct with observation description data */ + retval=CTA_Handle_GetData((CTA_Handle) hobsdescr,(void**) &metainfo); + if (retval!=CTA_OK){ + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + strcpy(metainfo->belongs_to,tagname); + return retval; +} + + + +#undef METHOD +#define METHOD "GetUnit" +int CTA_Metainfo_GetUnit(CTA_Metainfo hobsdescr, char *nameofunit){ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Metainfo *metainfo; /* All data of observer hmetainfo */ + + /* Get pointer to struct with observation description data */ + retval=CTA_Handle_GetData((CTA_Handle) hobsdescr,(void**) &metainfo); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + strcpy(nameofunit,metainfo->unit); + return retval; +} + +#undef METHOD +#define METHOD "GetTag" +int CTA_Metainfo_GetTag(CTA_Metainfo hobsdescr, char *tagname){ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Metainfo *metainfo; /* All data of observer hmetainfo */ + + /* Get pointer to struct with observation description data */ + retval=CTA_Handle_GetData((CTA_Handle) hobsdescr,(void**) &metainfo); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + strcpy(tagname,metainfo->tag); + return retval; +} + +#undef METHOD +#define METHOD "GetBelongsTo" +int CTA_Metainfo_GetBelongsTo(CTA_Metainfo hobsdescr, char *tagname){ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Metainfo *metainfo; /* All data of observer hmetainfo */ + + /* Get pointer to struct with observation description data */ + retval=CTA_Handle_GetData((CTA_Handle) hobsdescr,(void**) &metainfo); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + strcpy(tagname,metainfo->belongs_to); + return retval; +} + + +#undef METHOD +#define METHOD "GetRest" +int CTA_Metainfo_GetRest(CTA_Metainfo hobsdescr, int *rest){ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Metainfo *metainfo; /* All data of observer hmetainfo */ + + + retval=CTA_Handle_GetData((CTA_Handle) hobsdescr,(void**) &metainfo); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + *rest = metainfo->rest; + if (retval) { + CTA_WRITE_ERROR("Error in CTA_Metainfo_GetRest"); + return retval; + } + + // retval = CTAI_Metainfo_member_function(hobsdescr, CTA_METAINFO_GET_REST, + // &metainfo, &memfun); + //if (retval) return retval; + + /* Call (user) implementation */ + //memfun(metainfo,rest,&retval); + return retval; +} + +#undef METHOD +#define METHOD "SetRest" +int CTA_Metainfo_SetRest(CTA_Metainfo hobsdescr, int *rest){ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Metainfo *metainfo; /* All data of observer hmetainfo */ + + + retval=CTA_Handle_GetData((CTA_Handle) hobsdescr,(void**) &metainfo); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + metainfo->rest = *rest; + if (retval) { + CTA_WRITE_ERROR("Error in CTA_Metainfo_SetRest"); + return retval; + } + return retval; +} + + +#undef METHOD +#define METHOD "GetGrid" +int CTA_Metainfo_GetGrid(CTA_Metainfo hobsdescr, CTAI_Gridm *hgrid){ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Metainfo *metainfo; /* All data of observer hmetainfo */ + + /* Get pointer to struct with observation description data */ + retval=CTA_Handle_Check((CTA_Handle) hobsdescr,CTA_METAINFO); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_metainfo handle"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) hobsdescr,(void**) &metainfo); + // printf("metainfo_getgrid%d \n", retval); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + retval = CTAI_Grid_Copy(metainfo->thisgrid, hgrid); + //printf("metainfo_getgrid%d \n", retval); + + if (retval) { + CTA_WRITE_ERROR("Error in CTA_Metainfo_GetGrid"); + return retval; + } + return retval; +} + +#undef METHOD +#define METHOD "SetGrid" +int CTA_Metainfo_SetGrid(CTA_Metainfo hobsdescr, CTAI_Gridm *hgrid){ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Metainfo *metainfo; /* All data of observer hmetainfo */ + + /* Get pointer to struct with observation description data */ + retval=CTA_Handle_GetData((CTA_Handle) hobsdescr,(void**) &metainfo); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + retval = CTAI_Grid_Copy(hgrid, metainfo->thisgrid); + if (retval) { + CTA_WRITE_ERROR("Error in CTA_Metainfo_SetGrid"); + return retval; + } + return retval; +} + + + +int CTA_Metainfo_Export(CTA_Metainfo minfo, CTA_Handle usrdata){ + int retval; + CTAI_Gridm thisgrid; + char unitname[CTA_STRLEN_TAG]; + FILE *file; //File pointer + CTA_Datatype userDataType; + + retval = CTA_Metainfo_GetUnit(minfo,unitname); + retval = CTA_Metainfo_GetGrid(minfo,&thisgrid); + + retval = CTA_Handle_GetDatatype(usrdata, &userDataType); + if (retval != CTA_OK) return retval; + + if (userDataType == CTA_PACK) { + if (IDEBUG>0) printf("Metainfo: packing:\n"); + retval=CTA_Pack_Add(usrdata,unitname,sizeof(char)*CTA_STRLEN_TAG); + retval=CTA_Pack_Add(usrdata,&thisgrid,sizeof(CTAI_Gridm)); + } + + else if (userDataType == CTA_FILE) { + retval=CTA_File_Get(usrdata,&file); + if (thisgrid.type > -99 ) { // grid is defined + fprintf(file," Metainfo: gridtype, size: %d %d\n",thisgrid.type, thisgrid.nsize); + fprintf(file," Metainfo: phys. dimensions: %d %d %d\n",thisgrid.nx, thisgrid.ny,thisgrid.nz); + if (thisgrid.type < 0) {fprintf(file," Metainfo: coordinates-reference : %s\n", + thisgrid.refdimp[1]);} + else { + fprintf(file," Metainfo: grid dx,dy,dz: %f %f %f\n",thisgrid.dx, thisgrid.dy, thisgrid.dz); + fprintf(file," Metainfo: grid origin: %f %f %f\n", + thisgrid.x_origin, thisgrid.y_origin, thisgrid.z_origin); + } + } + } + else { + return retval=CTA_FORMAT_NOT_SUPPORTED; + } + + + + return retval; + +} + + + +int CTA_Metainfo_Import(CTA_Metainfo minfo, CTA_Handle usrdata){ + int retval; + CTAI_Gridm thisgrid; + char unitname[CTA_STRLEN_TAG]; + + + if (CTA_Handle_Check(usrdata,CTA_PACK)==CTA_OK) { + + if (IDEBUG>0) printf("Metainfo: unpacking:\n"); + retval=CTA_Pack_Get(usrdata,unitname,sizeof(char)*CTA_STRLEN_TAG); + retval=CTA_Pack_Get(usrdata,&thisgrid,sizeof(CTAI_Gridm)); + + retval = CTA_Metainfo_SetUnit(minfo,unitname); + retval = CTA_Metainfo_SetGrid(minfo,&thisgrid); + + // controle: + if (IDEBUG> 4) { + printf("CONTROLE: metainfo import:\n -------------------------- \n"); + retval = CTA_Metainfo_Export(minfo,CTA_FILE_STDOUT); + printf("controle: end -------------------------- \n"); + } + + } + + else { + return CTA_FORMAT_NOT_SUPPORTED; + } + + + + return retval; + +} + + + +#undef METHOD +#define METHOD "Free" +int CTA_Metainfo_Free(CTA_Metainfo *hdescr){ + int retval; /* Return value of COSTA call */ + CTAI_Metainfo *descr; /* All data of observer hmetainfo */ + + /* Check for quick return */ + if (*hdescr==CTA_NULL) return CTA_OK; + + /* Get pointer to struct with observation description data */ + retval=CTA_Handle_GetData((CTA_Handle) *hdescr,(void*) &descr); + if (retval!=CTA_OK) { + char message[1024]; + sprintf(message,"metainfo_free: error %d \n",retval); + CTA_WRITE_ERROR(message); + return retval; + } + + free(descr->thisgrid); + free(descr); + retval = CTA_Handle_Free(hdescr); + if (retval!=CTA_OK) { + char message[1024]; + sprintf(message,"metainfo_free: handle_free : error %d \n",retval); + CTA_WRITE_ERROR(message); + } + return retval; +} + + +#undef METHOD +#define METHOD "SetRegGrid" +int CTA_Metainfo_setRegGrid(CTA_Metainfo hdescr, char *name, + int nx, int ny, int nz, + double x_origin, double y_origin, + double z_origin, + double dx, double dy, double dz){ + + int retval; /* Return value of COSTA call */ + CTAI_Metainfo* descr; /* All data of observer hmetainfo */ + CTAI_Gridm *grid; + int typeGrid; + + + /* Check for quick return */ + if (hdescr==CTA_NULL) return CTA_OK; + + /* Get pointer to struct with observation description data */ + retval=CTA_Handle_GetData((CTA_Handle) hdescr,(void**) &descr); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + /* Just set the grid info */ + grid=descr->thisgrid; + strcpy(grid->name,name); + + if (ny==0){ + typeGrid = 1; + } else if (nz==0) { + typeGrid = 2; + } else { + typeGrid = 3; + } + + + + + grid->type =typeGrid; + grid->nx =nx; + grid->ny =ny; + grid->nz =nz; + grid->nsize =nx*ny; + grid->x_origin =x_origin; + grid->y_origin =y_origin; + grid->z_origin =z_origin; + grid->dx =dx; + grid->dy =dy; + grid->dz =dz; + + return CTA_OK; + /*refdimp[10][80]*/ +} + +#undef METHOD +#define METHOD "getRegGrid" +int CTA_Metainfo_getRegGrid(CTA_Metainfo hdescr, char *name, + int *nx, int *ny, int *nz, + double *x_origin, double *y_origin, + double *z_origin, + double *dx, double *dy, double *dz){ + + int retval; /* Return value of COSTA call */ + CTAI_Metainfo* descr; /* All data of observer hmetainfo */ + CTAI_Gridm *grid; + + + /* Check for quick return */ + if (hdescr==CTA_NULL) return CTA_OK; + + /* Get pointer to struct with observation description data */ + retval=CTA_Handle_GetData((CTA_Handle) hdescr,(void**) &descr); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + /* Just set the grid info */ + grid=descr->thisgrid; + strcpy(name,grid->name); + *nx = grid->nx; + *ny = grid->ny; + *nz = grid->nz; + *x_origin = grid->x_origin; + *y_origin = grid->y_origin; + *z_origin = grid->z_origin; + *dx = grid->dx; + *dy = grid->dy; + *dz = grid->dz; + + return CTA_OK; + /*refdimp[10][80]*/ +} + + + +/* Interfacing with Fortran */ + +CTAEXPORT void CTA_METAINFO_DEFINECLASS_F77( char *name, int *h_func, int *hobsdscrcl,int *ierr, int len_name){ + + char *c_name; + // create a c-string equivalent to name + c_name=CTA_Malloc((len_name+1)*sizeof(char)); + CTA_fstr2cstr(name,c_name,len_name); + + *ierr=CTA_Metainfo_DefineClass(name, (CTA_Func*) h_func, (CTA_MetainfoClass*) hobsdscrcl); + + free(c_name); +} +/* +void CTA_METAINFO_CREATE_F77(int *hsobscl, int *usrdat, + int *hmetainfo, int *ierr){ + + *ierr=CTA_Metainfo_Create((CTA_MetainfoClass) *hsobscl, (CTA_Handle) *usrdat, + (CTA_Metainfo*) hmetainfo); +*/ + +CTAEXPORT void CTA_METAINFO_CREATE_F77(int *hmetainfo, int *ierr){ + + *ierr=CTA_Metainfo_Create((CTA_Metainfo*) hmetainfo); +} + + +CTAEXPORT void CTA_METAINFO_SETTAG_F77(int *hobsdescr, char *tag, int *ierr, int len_tag) { + char *c_tag; + // create a c-string equivalent to tag + c_tag = CTA_Malloc((len_tag+1)*sizeof(char)); + CTA_fstr2cstr(tag,c_tag,len_tag); + + *ierr = CTA_Metainfo_SetTag((CTA_Metainfo) *hobsdescr, c_tag); + free(c_tag); +}; + +CTAEXPORT void CTA_METAINFO_SETBELONGSTO_F77(int *hobsdescr, char *tag, int *ierr, int len_tag) { + char *c_tag; + // create a c-string equivalent to tag + c_tag = CTA_Malloc((len_tag+1)*sizeof(char)); + CTA_fstr2cstr(tag,c_tag,len_tag); + + *ierr = CTA_Metainfo_SetBelongsTo((CTA_Metainfo) *hobsdescr, c_tag); + free(c_tag); +}; + + +CTAEXPORT void CTA_METAINFO_SETGRID_F77(int *hobsdescr, int *hgrid, int *ierr) { + *ierr= CTA_Metainfo_SetGrid((CTA_Metainfo) *hobsdescr, (CTAI_Gridm*) hgrid); +} + + +CTAEXPORT void CTA_METAINFO_EXPORT_F77(int *hdescr, int *usrdat, int *ierr){ + *ierr=CTA_Metainfo_Export((CTA_Metainfo) *hdescr, (CTA_Handle) *usrdat); +} + +CTAEXPORT void CTA_METAINFO_IMPORT_F77(int *hdescr, int *usrdat, int *ierr){ + *ierr=CTA_Metainfo_Import((CTA_Metainfo) *hdescr, (CTA_Handle) *usrdat); +} + + +CTAEXPORT void CTA_METAINFO_FREE_F77(int *hobsdscr, int *ierr){ + *ierr=CTA_Metainfo_Free((CTA_Metainfo *) hobsdscr); +} + + +CTAEXPORT void CTA_METAINFO_GETREGGRID_F77(int *hdescr, char *name, + int *nx, int *ny, int *nz, + double *x_origin, double *y_origin, + double *z_origin, + double *dx, double *dy, double *dz, int *ierr, int lenstr){ + char c_name[CTA_STRLEN_TAG]; + *ierr = CTA_Metainfo_getRegGrid((CTA_Metainfo) *hdescr, c_name, nx, ny, nz, + x_origin, y_origin, z_origin, dx, dy, dz); + + CTA_cstr2fstr(c_name,name,lenstr); +} + +CTAEXPORT void CTA_METAINFO_SETREGGRID_F77(int *hdescr, char *name, + int *nx, int *ny, int *nz, + double *x_origin, double *y_origin, + double *z_origin, + double *dx, double *dy, double *dz, int *ierr, int strlen){ + + char *c_name; + // create a c-string equivalent to name + c_name=CTA_Malloc((strlen+1)*sizeof(char)); + CTA_fstr2cstr(name,c_name,strlen); + + *ierr=CTA_Metainfo_setRegGrid((CTA_Metainfo) *hdescr, c_name, + *nx, *ny, *nz, + *x_origin, *y_origin, + *z_origin, + *dx, *dy, *dz); + free(c_name); + +} + + + diff --git a/costa/native/cta/src/cta_method.c b/costa/native/cta/src/cta_method.c new file mode 100644 index 000000000..373ab3781 --- /dev/null +++ b/costa/native/cta/src/cta_method.c @@ -0,0 +1,208 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_method.c $ +$Revision: 2751 $, $Date: 2011-09-09 08:58:46 +0200 (Fri, 09 Sep 2011) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "cta_mem.h" +#include "cta_method.h" +#include "cta_message.h" + +#define CLASSNAME "CTA_Method" + + +/* Struct holding all data associated to an COSTA Vector */ +typedef struct { +CTA_Func functions[CTA_METH_NUMFUNC]; +} CTAI_MethodClass; // A SObsClass contains a list of the member-functions + + + +typedef struct { +CTA_Func functions[CTA_METH_NUMFUNC]; +void *data; // pointer to the implementation-specific data. +} CTAI_Method; + + + +#undef METHOD +#define METHOD "DefineClass" +int CTA_Meth_DefineClass(const char *name, const CTA_Func h_func[CTA_METH_NUMFUNC], + CTA_MethClass *hmethcl){ + + CTAI_MethodClass *data; + int retval; + + /* Allocate new StochObs object */ + data=CTA_Malloc(sizeof(CTAI_MethodClass)); + data->functions[CTA_METH_CREATE_SIZE]=h_func[CTA_METH_CREATE_SIZE]; + data->functions[CTA_METH_CREATE_INIT]=h_func[CTA_METH_CREATE_INIT]; + data->functions[CTA_METH_RUN ] =h_func[CTA_METH_RUN ]; + data->functions[CTA_METH_FREE ]=h_func[CTA_METH_FREE ]; + + // Allocate new handle + retval=CTA_Handle_Create(name,CTA_METHOD,data,hmethcl); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_method handle"); + return retval; + } + // return error when unsuccesfull + return retval; +} + + +#undef METHOD +#define METHOD "Create" +int CTA_Meth_Create(CTA_Method hmethcl, CTA_Handle userdata, + CTA_Method *hmeth){ + + CTAI_Method *meth; + int memsize; + int retval; + CTAI_MethodClass *clsdata; + CTA_Function *my_Create_Size, *my_Create_Init; + int i; + + /* Get class data containing all function pointers */ + retval=CTA_Handle_Check((CTA_Handle) hmethcl,CTA_METHODCLASS); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_methodclass handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) hmethcl,(void**) &clsdata); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + + /* determine size of data object (CTA_SOBS_CREATE_SIZE)*/ + retval=CTA_Func_GetFunc(clsdata->functions[CTA_METH_CREATE_SIZE], + &my_Create_Size); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function CTA_METH_CREATE_SIZE"); + return retval; + } + my_Create_Size(&memsize,&retval); + if (retval) { + CTA_WRITE_ERROR("Error in my_Create_Size"); + return retval; + } + + /* allocate memory for new stochobs object */ + meth=CTA_Malloc(sizeof(CTAI_Method)); + meth->data=CTA_Malloc(memsize); + + /* copy function pointers */ + for (i=0;ifunctions[i]=clsdata->functions[i]; + } + + /* Initialise and fill new stochobs */ + retval=CTA_Func_GetFunc(clsdata->functions[CTA_METH_CREATE_INIT], + &my_Create_Init); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function CTA_METH_CREATE_INIT"); + return retval; + } + my_Create_Init(meth->data, userdata, &retval); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in my_Create_Init"); + return retval; + } + + /* Allocate new handle and return error when unsuccesfull */ + retval=CTA_Handle_Create("method",CTA_METHOD,meth,hmeth); + if (retval) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + return CTA_OK; +} + + + + +int CTAI_Meth_member_function( + // INPUTS + CTA_Method hmeth, /* Handle of the method of + which a member function is wanted */ + int member, /* Code of the member function */ + // OUTPUT + CTAI_Method **meth, /* All data of method */ + CTA_Function **memfun /* Member-Function pointer */ +) +{ + int retval; + /* Check that the given handle is indeed an observer */ + retval=CTA_Handle_Check((CTA_Handle) hmeth, CTA_METHOD); + if (retval!=CTA_OK) return retval; + + /* Get pointer to struct with observer data */ + retval=CTA_Handle_GetData((CTA_Handle) hmeth,(void**) meth); + if (retval!=CTA_OK) return retval; + + /* Get pointer to implementation of this function */ + retval=CTA_Func_GetFunc((*meth)->functions[member],memfun); + return retval; +} + + +int CTA_Meth_Run(CTA_Method hmeth) { + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Method *meth; /* All data of method */ + CTA_Function *memfun; /* Function that must be called */ + + /* Look up member function and method data */ + retval = CTAI_Meth_member_function(hmeth, CTA_METH_RUN, + &meth, &memfun); + if (retval!=CTA_OK) return retval; + + /* Call (user) implementation */ + memfun(meth->data,&retval); + return retval; +}; + +int CTA_Meth_Free(CTA_Method *hmeth){ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Method *meth; /* All data of meth */ + CTA_Function *my_free; /* Function that must be called */ + + /* Get pointer to implementation of this function */ + + if (*hmeth==CTA_NULL) return CTA_OK; + + /* Look up member function and method data */ + retval = CTAI_Meth_member_function(*hmeth, CTA_METH_FREE, + &meth, &my_free); + if (retval!=CTA_OK) return retval; + + /* Call (user) implementation */ + my_free(meth->data,&retval); + free(meth->data); + free(meth); + retval=CTA_Handle_Free(hmeth); + + return retval; +}; + + diff --git a/costa/native/cta/src/cta_modbuild_par.c b/costa/native/cta/src/cta_modbuild_par.c new file mode 100644 index 000000000..e4c7d32c7 --- /dev/null +++ b/costa/native/cta/src/cta_modbuild_par.c @@ -0,0 +1,3296 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_modbuild_par.c $ +$Revision: 4056 $, $Date: 2013-07-03 14:19:55 +0200 (Wed, 03 Jul 2013) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2006 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "cta_mem.h" +#include "cta.h" +#include "f_cta_utils.h" +#include "cta_model_utilities.h" + +#ifdef USE_MPI +#include "mpi.h" +#include "cta_par.h" + +#define IDEBUG (1) +#define EXITONERROR (1) +#define ALL_WORKERS (-1) +#define MASTER (0) +#define NOOSOBS (1) + +#define CTA_PACK_DEFAULT_SIZE (1000000) + + +double MISVAL=-1E20; + +enum TypeState {STATE, FORC, PARAM, SCAL}; + + +typedef struct { +CTA_Model mthis; /* Handles of this model (local handle) */ +int *mremote; /* Remote handle numbers of the model for each process 1..nworker */ +MPI_Comm comm; /* Communicator to use for this model instance */ +int is_intercomm; /* Flag indicating whether comm is an intercommunication group */ +int nworker; /* Number of workers to adress to */ +CTA_String *SelectObs; /* Selection for observations for all models */ +} CTAI_Modbuild_par; + + +int ctai_rank_ind = 1; /* Rank of new worker to be created */ +int ctai_rank =-1; /* My rank */ +int ctai_rank_master= 0; /* rank of master */ +int ctai_comm_size =-1; /* Total number of workers */ +int hmodel_scratch =CTA_NULL; /* handle of extra model instance on worker */ + +CTA_ObsDescr last_hdescr=CTA_NULL; +CTA_ObsDescr last_hdescr_tab=CTA_NULL; + +CTA_Model *createdModels =NULL; +int numCreatedModels=0; + +void modbuild_par_getobslocalizationdomain(CTAI_Modbuild_par *data, + CTA_ObsDescr *hdescr, double *distance, int *idomain, CTA_Vector *locVecs, int *ierr); + + +void modbuild_models_add(CTA_Model hmodel){ + int iModel; + CTA_Model *newList; + + printf("Adding model %d\n",hmodel); + numCreatedModels++; + newList= (CTA_Model*) CTA_Malloc(numCreatedModels*sizeof(CTA_Model)); + for (iModel=0; iModel=0){ + numCreatedModels--; + if (numCreatedModels>0){ + CTA_Model *newList= (CTA_Model*) CTA_Malloc(numCreatedModels*sizeof(CTA_Model)); + for (iModel=0;iModel0){ + printf("modbuild_par_sendtaskandhandle\n"); + printf("task=%s rank=%d, \n",task,rank); + printf("nworker=%d\n",data->nworker); + } + + + /* Loop over all worker processes of this model */ + for (irank=1;irank<=data->nworker;irank++){ + if (irank==rank || rank<0) { + if (data->is_intercomm){ + jrank=irank-1; + } + else { + jrank=irank; + } + + + /* Send Task */ + lentask=strlen(task)+1; + MPI_Send(task,lentask, MPI_CHAR, jrank, tag, data->comm); + + /* Send remote model handle */ + if (IDEBUG>0) printf("# %d modbuild_par_sendtaskandhandle: send task (%s) and handle (%d) to worker %d\n", + CTA_PAR_MY_RANK, task, data->mremote[irank],irank); + MPI_Send((void*) &data->mremote[irank], 1, MPI_INT, jrank, tag, data->comm); + } + } +} + + +/* Send a state-vector */ +void modbuild_par_sendstate(CTA_TreeVector state, CTAI_Modbuild_par *data, int tag){ + int lentask; + CTA_Pack hpack; + char *pack; + CTA_TreeVector state_sub; + MPI_Comm comm; + int irank,jrank, nworker; + + /* Handle to send to master if data==NULL) */ + if (data) { + nworker=data->nworker; + comm=data->comm; + } else { + nworker=1; + comm=CTA_COMM_MASTER_WORKER; + } + + + /* Loop over all worker processes of this model */ + for (irank=1;irank<=nworker;irank++){ + /* Get substate (when necessary) */ + if (nworker>1){ + CTA_TreeVector_GetSubTreeVecIndex (state, irank, &state_sub); + } else { + state_sub=state; + } + + CTA_Pack_Create(CTA_PACK_DEFAULT_SIZE,&hpack); + + CTA_TreeVector_Export(state_sub,hpack); + lentask= CTA_Pack_GetLen(hpack); + pack = CTA_Pack_GetPtr(hpack); + + /* Handle to send to master if data==NULL) */ + if (data){ jrank=irank;} else {jrank=0;} + + if (data && data->is_intercomm) jrank--; + + + /* Send length of packed state */ + MPI_Send(&lentask, 1, MPI_INT, jrank, tag, comm); + + /* Send packed state */ + MPI_Send(pack, lentask, MPI_CHAR, jrank, tag, comm); + + /* Free pack-buffer */ + CTA_Pack_Free(&hpack); + } +} + +/* Receive a state-vector */ +void modbuild_par_recvstate(CTA_TreeVector *state, CTAI_Modbuild_par *data, int tag){ + MPI_Status status; + int retval; + int lenpack; + CTA_Pack hpack; + char *pack; + char sNum[20]; + char rootwrk[30]; + MPI_Comm comm; + + CTA_TreeVector *state_subs, state_sub; + int nworker, irank, jrank; + + /* Handle to send to master if data==NULL) */ + if (data) { + nworker=data->nworker; + comm=data->comm; + } else { + nworker=1; + comm=CTA_COMM_MASTER_WORKER; + } + + /* Create state including sub-states when needed */ + if (*state==CTA_NULL) { + CTA_TreeVector_Create("MASTERroot","MASTERroot",state); + + /* Create sub-states as well when we have multiple workers */ + if (nworker>1) { + state_subs=(CTA_Vector*) CTA_Malloc(sizeof(CTA_TreeVector)*nworker); + + + for (irank=1;irank<=nworker;irank++){ + + sprintf(sNum,"%d",irank); + strcpy(rootwrk,"rtwrk"); + strcat(rootwrk,sNum); + CTA_TreeVector_Create("root_worker",rootwrk,&(state_subs[irank-1])); + } + CTA_TreeVector_Conc (*state, state_subs, nworker); + free(state_subs); + } + //printf("modbuild_par_recstate: state did not exist yet; created \n"); + } + + /* Receive all states from worker */ + for (irank=1;irank<=nworker;irank++){ + if (nworker>1){ + retval=CTA_TreeVector_GetSubTreeVecIndex (*state, irank, &state_sub); + } else { + state_sub=*state; + } + + /* Handle receive from master if data==NULL) */ + if (data){ jrank=irank;} else {jrank=0;} + + if (data && data->is_intercomm) jrank--; + + /* Receive state-vector */ + retval=MPI_Recv(&lenpack, 1, MPI_INT, jrank,1234, comm, &status); + retval=CTA_Pack_Create(lenpack,&hpack); + pack =CTA_Pack_GetPtr(hpack); + + retval=MPI_Recv(pack, lenpack, MPI_CHAR, jrank,1234, comm, &status); + retval=CTA_Pack_AddCnt(hpack,lenpack); + + /* unpack the received-state */ + retval=CTA_TreeVector_Import(state_sub,hpack); + if (retval!=CTA_OK) { + printf("modbuild_par_recvstate: error (%d) in CTA_TreeVector_Import\n",retval); + exit(-1); + } + + /* Free pack-buffer */ + retval=CTA_Pack_Free(&hpack); + + } + if (IDEBUG>0) { + printf("---------------------------------modbuild_par_recstate: whole state DONE \n"); + printf("................................ whole state is: \n"); + retval = CTA_TreeVector_Info(*state); + } +} + + +void modbuild_par_sendobsdescr(CTAI_Modbuild_par *data, int krank, CTA_Time ttime, + CTA_ObsDescr hdescr, int tag, int *ierr){ + + double span[2]; + int lentask; + CTA_Pack hpack; + char *pack; + int irank, jrank; + + + hpack=CTA_NULL; + /* handle an empty desciption */ + if (hdescr==CTA_NULL){ + hpack=CTA_NULL; + lentask= 0; + } + else { + if (IDEBUG>0) printf("modbuild_par_sendobsdescr :Start\n"); + /* First delete an existing obsdescr */ + if (! last_hdescr_tab == CTA_NULL) { + if (IDEBUG>0) printf("modbuild_par_sendobsdescr: MEMLEAK FIXED\n"); + CTA_ObsDescr_Free(&last_hdescr_tab); + } + /* Create a new table obsdescr */ + *ierr=CTA_ObsDescr_Create(CTA_OBSDESCR_TABLE,hdescr, &last_hdescr_tab); + if (IDEBUG>0) printf("Created table version ierr=%d\n",*ierr); + if (*ierr!=CTA_OK) return; + + /* Pack hdescr_tab */ + *ierr=CTA_Pack_Create(CTA_PACK_DEFAULT_SIZE,&hpack); + if (*ierr!=CTA_OK) return; + *ierr=CTA_ObsDescr_Export(last_hdescr_tab, hpack); + if (*ierr!=CTA_OK) return; + + /* Send hdescr_tab */ + lentask= CTA_Pack_GetLen(hpack); + pack = CTA_Pack_GetPtr(hpack); + } + + /* Send to all workers */ + for (irank=1;irank<=data->nworker;irank++){ + if (irank==krank || krank<0){ + + jrank=irank; + if (data && data->is_intercomm) jrank--; + + + /* Send timespan */ + *ierr=CTA_Time_GetSpan(ttime, &span[0], &span[1]); + + if (*ierr!=CTA_OK) return; + if (IDEBUG>0) {printf("#%d Sending timespan to %d\n",CTA_PAR_MY_RANK,irank);} + *ierr=MPI_Send((void*) span, 2, MPI_DOUBLE, jrank, tag, data->comm); + if (*ierr!=CTA_OK) return; + if (IDEBUG>0) {printf("#%d Sending done\n",CTA_PAR_MY_RANK);} + + + if (IDEBUG>0) {printf("#%d Sending lenpack to %d\n",CTA_PAR_MY_RANK,irank);} + /* Send length of packed obsdescr */ + MPI_Send(&lentask, 1, MPI_INT, jrank, tag, + data->comm); + if (lentask>0){ + if (IDEBUG>0) {printf("#%d Sending packed obsdescr to %d\n",CTA_PAR_MY_RANK,irank);} + /* Send packed obsdescr */ + MPI_Send(pack, lentask, MPI_CHAR, jrank, tag, data->comm); + } + } + } + + /* Free vars we do not need */ + *ierr=CTA_Pack_Free(&hpack); + if (*ierr!=CTA_OK) return; + + *ierr=CTA_OK; + if (IDEBUG>0) printf("modbuild_par_sendobsdescr :end\n"); +} + + + + +void modbuild_par_recvobsdescr(CTA_Time *timespan, CTA_ObsDescr *hdescr_tab, + int tag){ + + MPI_Status status; + CTA_Pack hpack; + double span[2]; + int lenpack, retval; + char *pack; + + /* Receive the simulation timespan */ + retval=CTA_Time_Create(timespan); + + if (IDEBUG>0) {printf("#%d Receiving timespan from master\n",CTA_PAR_MY_RANK);} + retval=MPI_Recv(span, 2, MPI_DOUBLE, ctai_rank_master,tag, + CTA_COMM_MASTER_WORKER, &status); + retval=CTA_Time_SetSpan(*timespan, span[0], span[1]); + + /* Receive the observation description */ + if (IDEBUG>0) {printf("#%d Receiving lenpack from master\n",CTA_PAR_MY_RANK);} + retval=MPI_Recv(&lenpack, 1, MPI_INT, ctai_rank_master,tag, + CTA_COMM_MASTER_WORKER, &status); + if (IDEBUG>0) {printf("#%d lenpack =%d\n",CTA_PAR_MY_RANK, lenpack);} + + if (lenpack==0){ + CTA_ObsDescr_Free(hdescr_tab); + *hdescr_tab=CTA_NULL; + } + else { + retval=CTA_Pack_Create(lenpack,&hpack); + pack =CTA_Pack_GetPtr(hpack); + + if (IDEBUG>0) {printf("#%d Receiving packed obsdescr from master\n",CTA_PAR_MY_RANK);} + retval=MPI_Recv(pack, lenpack, MPI_CHAR, ctai_rank_master,tag, + CTA_COMM_MASTER_WORKER, &status); + if (IDEBUG>0) {printf("#%d Done Receiving packed obsdescr from master\n",CTA_PAR_MY_RANK); } + + retval=CTA_Pack_AddCnt(hpack,lenpack); + + /* Create the observation description */ + if (! *hdescr_tab == CTA_NULL){ + printf("modbuild_par_sendobsdescr :MEMLEAK (2) FIXED\n"); + CTA_ObsDescr_Free(hdescr_tab); + } + retval=CTA_ObsDescr_Create(CTA_OBSDESCR_TABLE,hpack, hdescr_tab); + if (IDEBUG>0) {printf("#%d Done Creating obsdescr %d\n",CTA_PAR_MY_RANK, retval);} + retval=CTA_Pack_Free(&hpack); + } + if (IDEBUG>0) {printf("#%d End of modbuild_par_recvobsdescr\n",CTA_PAR_MY_RANK);} +} + +/* Determine the size of the datablock of one model instance */ +void modbuild_par_create_size(CTA_Handle *userdata, int *memsize, + int *ierr){ + *memsize=(int) sizeof(CTAI_Modbuild_par); + *ierr=CTA_OK; +} + +/* Create a model-instance */ +void modbuild_par_create_init(CTA_Handle *this, CTAI_Modbuild_par *data , + CTA_Handle *hinput, int *ierr){ + + MPI_Status status; + CTA_String sclsnam; + CTA_ModelClass modcls; + CTA_Handle tmodel; + const char *clsnam, *modinp; + int lennamcls, lenmodel; + int rank, jrank; + int tag, lentask; + int isintercomm; + char *task; + + int cleanup; /*Flag indicating whether tinput must be freed */ + CTA_Tree tinput; + + /* Convert input into a COSTA-tree if it is not done already */ + *ierr=CTA_Model_Util_InputTree(*hinput, &tinput, &cleanup); + if (*ierr!=CTA_OK) return; + + /* Handle input :*/ + + /* Determine name of model class to worker process */ + *ierr=CTA_Tree_GetHandleStr(tinput,"modelclass", &modcls); + if (*ierr!=CTA_OK) return; + + /* get name of model class */ + CTA_String_Create(&sclsnam); + CTA_Handle_GetName(modcls,sclsnam); + clsnam=CTAI_String_GetPtr(sclsnam); + + /* Get parallel information */ + *ierr=CTA_Par_CreateNewCreateGetComm(modcls, &(data->comm)); + if (*ierr!=CTA_OK){ + printf("FATAL ERROR CTA_Par_CreateNewCreateGetComm -failed \n"); + exit(-1); + } + + /* Test whether this is an inter communicator */ + /* Test whether this is an inter communicator */ + MPI_Comm_test_inter(data->comm, &(data->is_intercomm)); + + /* Determine the number of workers */ + if (data->is_intercomm){ + MPI_Comm_remote_size(data->comm,&(data->nworker)); + } + else { + MPI_Comm_size(data->comm,&(data->nworker)); + //printf("DE GROEP BESTAAT UIT %d processes \n", data->nworker); + data->nworker--; + //printf("DE GROEP BESTAAT UIT %d workers \n", data->nworker); + } + + /* Allocate array mremote holding remote handle numbers create one + * larger in order to avoind 0-indexing */ + data->mremote=CTA_Malloc((data->nworker+1)*sizeof(int)); + + /* Determine the model input name of model class to worker process */ + *ierr=CTA_Tree_GetHandleStr(tinput,"model", &tmodel); + if (*ierr!=CTA_OK) return; + + /* We cannot pack and send whole costa trees. + We only support a single string */ + *ierr=CTA_String_GetLength(tmodel, &lenmodel); + if (*ierr==CTA_OK) { + modinp=CTAI_String_GetPtr(tmodel); + } else { + printf("modbuild_par_create_init: input /modelbuild_par/model "); + printf("must be a string.\n"); + printf("packing and sending of COSTA-tree objects is not yet "); + printf("supported.\n"); + exit(-1); + } + /* Send data to worker (s)*/ + tag=1234; + task="cta_model_create"; + lentask=strlen(task)+1; + + + /* Communicate to all workers Note rank 0 is master! */ + for (rank=1;rank<=data->nworker;rank++){ + if (IDEBUG>0) printf("#%d Creating model instance on worker %d\n",CTA_PAR_MY_RANK, rank); + + jrank=rank; + if (data && data->is_intercomm) jrank--; + + /* Send task */ + + MPI_Comm_test_inter(data->comm,&isintercomm); + + MPI_Send("cta_model_create",lentask, MPI_CHAR, jrank,tag, + data->comm); + + /* Send name of model class */ + lennamcls=strlen(clsnam); + MPI_Send((void*) clsnam, lennamcls+1, MPI_CHAR, jrank,tag, + data->comm); + /* Send name of input */ + lenmodel=strlen(modinp); + MPI_Send((void*) modinp, lenmodel+1, MPI_CHAR, jrank,tag, + data->comm); + } + /* Receive local handle if new model-instances */ + for (rank=1;rank<=data->nworker;rank++){ + + jrank=rank; + if (data && data->is_intercomm) jrank--; + + MPI_Recv(&(data->mremote[rank]),1,MPI_INT, jrank, 1234, + data->comm,&status); + if (IDEBUG>0) printf("#%d Done creating model instance on worker\n",CTA_PAR_MY_RANK); + } + /* Set rest of data */ + data->mthis=*this; + + /* Set selections */ + data->SelectObs=NULL; + + *ierr=CTA_OK; + + CTA_String_Free(&sclsnam); + /* Clean-up input tree */ + if (cleanup==CTA_TRUE){ + CTA_Tree_Free(&tinput); + } +} + +int receive_pack_object( int rank, int tag, BOOL add_length, CTA_Pack hpack, MPI_Comm comm) +{ + int retval, lenpack; + char* buffer; + MPI_Status status; + + if (IDEBUG>0) printf("receive_pack_object: ervoor \n"); + + retval=MPI_Recv(&lenpack,1,MPI_INT,rank,tag,comm,&status); + + if (IDEBUG>0) printf("receive_pack_object: lenpack %d\n", lenpack); + + buffer= (char*) CTA_Malloc(lenpack*sizeof(char)); + retval=MPI_Recv(buffer,lenpack,MPI_CHAR,rank,tag,comm,&status); + + /* Additional add length in pack object */ + if (add_length) { + retval=CTA_Pack_Add(hpack,&lenpack,sizeof(int)); + } + + /* Add packed model state */ + retval=CTA_Pack_Add(hpack,buffer,lenpack); + free(buffer); + return retval; +} + +void modbuild_par_export( + CTAI_Modbuild_par *data, + CTA_Handle *hpack, /* export object (e.q. handle to file or pack instance) */ + int *ierr) +{ + int tag; + int irank, jrank; + + if (IDEBUG>0) printf("modbuild_par_export\n"); + + tag=1234; + + /* Send Task and model handle */ + modbuild_par_sendtaskandhandle("cta_model_export", ALL_WORKERS, data, tag); + + for (irank=1;irank<=data->nworker;irank++){ + + jrank=irank; + if (data && data->is_intercomm) jrank--; + + /* Send receiving rank */ + + if (IDEBUG>0) printf("modbuild_par_export: send rank of receiving process (%d) to process %d\n",ctai_rank_master, irank); + MPI_Send(&ctai_rank_master, 1, MPI_INT, jrank,tag, data->comm); + } + + for (irank=1;irank<=data->nworker;irank++){ + + jrank=irank; + if (data && data->is_intercomm) jrank--; + +// Hmmmmmmm what is the global rank??? + /* Receive the export (currently only supporting COSTA pack object) */ + *ierr=receive_pack_object(jrank,tag, TRUE, *hpack, data->comm); + + } + + + + +} + +void CTA_Modbuild_par_worker_export(){ + + MPI_Status status; + CTA_Model hmodel; + int lentask, retval, tag, rank; + CTA_Pack hpack; + char *pack; + + if (IDEBUG>0) printf("# %d modbuild_par_worker_export\n",CTA_PAR_MY_RANK); + + tag=1234; + + /* Receive the model handle */ + retval=MPI_Recv(&hmodel, 1, MPI_INT, ctai_rank_master,tag,CTA_COMM_MASTER_WORKER,&status); + if (IDEBUG>0) printf("# %d modbuild_par_worker_export: receiving model handle %d\n",CTA_PAR_MY_RANK, hmodel); + /* Receive receiving rank */ + retval=MPI_Recv(&rank, 1, MPI_INT, ctai_rank_master,tag,CTA_COMM_MASTER_WORKER,&status); + + if (IDEBUG>0) printf("# %d modbuild_par_worker_export: receiving rank %d\n",CTA_PAR_MY_RANK, rank); + + /* Create local pack object */ + retval = CTA_Pack_Create(CTA_PACK_DEFAULT_SIZE,&hpack); + retval = CTA_Model_Export(hmodel,hpack); + if (retval!=CTA_OK) { + printf("#%d modbuild_par_worker_exort: FATAL ERROR in CTA_Model_Export ierr=%d\n",CTA_PAR_MY_RANK, retval); + exit(-1); + } + + lentask = CTA_Pack_GetLen(hpack); + pack = CTA_Pack_GetPtr(hpack); + + /* Send length of packed state */ + retval=MPI_Send(&lentask, 1, MPI_INT, rank, tag, CTA_COMM_MASTER_WORKER); + /* Send packed state */ + retval=MPI_Send(pack, lentask, MPI_CHAR, rank, tag, CTA_COMM_MASTER_WORKER); + /* Free pack-buffer */ + retval=CTA_Pack_Free(&hpack); +} + + +void modbuild_par_import( + CTAI_Modbuild_par *data, + CTA_Handle *himport, /* import object (e.q. handle to file or pack instance) */ + int *ierr) +{ + + int tag, lentask; + char *pack; + int irank, jrank, ip1, ip2; + + + if (IDEBUG>0) printf("modbuild_par_import\n"); + + tag=1234; + + /* Send Task and model handle */ + modbuild_par_sendtaskandhandle("cta_model_import", ALL_WORKERS, data, tag); + for (irank=1;irank<=data->nworker;irank++){ + + jrank=irank; + if (data && data->is_intercomm) jrank--; + + /* Unpack length of packed data for this worker */ + *ierr=CTA_Pack_Get(*himport, &lentask, sizeof(int)); + if (*ierr!=CTA_OK) return; + + /* Get pointer to data */ + pack = CTA_Pack_GetPtr(*himport); + + /* Send length of data to worker */ + *ierr=MPI_Send(&lentask, 1, MPI_INT, jrank,tag,data->comm); + if (*ierr!=CTA_OK) return; + + /* Send packed state */ + *ierr=MPI_Send(pack, lentask, MPI_CHAR, jrank,tag,data->comm); + if (*ierr!=CTA_OK) return; + + /* Set pointer in packed data */ + *ierr=CTA_Pack_GetIndx(*himport, &ip1, &ip2); + if (*ierr!=CTA_OK) return; + *ierr=CTA_Pack_SetIndx(*himport, ip1+lentask, ip2); + if (*ierr!=CTA_OK) return; + } +} + +void CTA_Modbuild_par_worker_import() +{ + CTA_Pack himport; /* import object (e.q. handle to file or pack instance)*/ + CTA_Model hmodel; + MPI_Status status; + int retval, tag; + + if (IDEBUG>0) printf("#%d modbuild_par_worker_import\n",CTA_PAR_MY_RANK ); + + tag=1234; + + /* Receive the model handle */ + retval = MPI_Recv(&hmodel, 1, MPI_INT, ctai_rank_master,tag,CTA_COMM_MASTER_WORKER,&status); + + retval = CTA_Pack_Create(CTA_PACK_DEFAULT_SIZE,&himport); + if (retval!=CTA_OK) { + printf("#%d modbuild_par_worker_import: FATAL ERROR in COSTA METHOD ierr=%d\n",CTA_PAR_MY_RANK, retval); + exit(-1); + } + + retval = receive_pack_object(ctai_rank_master,tag,FALSE,himport,CTA_COMM_MASTER_WORKER); + retval = CTA_Model_Import(hmodel,himport); + if (retval!=CTA_OK) { + printf("#%d modbuild_par_worker_import: FATAL ERROR in CTA_Model_Import ierr=%d\n",CTA_PAR_MY_RANK, retval); + exit(-1); + } + retval = CTA_Pack_Free(&himport); +} + + +void modbuild_par_free(CTAI_Modbuild_par *data ,int *ierr){ + int tag; + + tag=1234; + + if (IDEBUG>0) printf("#%d modbuild_par_free\n",CTA_PAR_MY_RANK); + + /* Send Task and model handle */ + modbuild_par_sendtaskandhandle("cta_model_free", ALL_WORKERS, data, tag); + + *ierr=CTA_OK; +} + +void modbuild_par_compute( + CTAI_Modbuild_par *data, + CTA_Time *timespan, /* current time, "now" */ + int *ierr) +{ + double span[2]; + int tag; + int irank, jrank; + + if (IDEBUG>0) printf("#%d modbuild_par_compute\n",CTA_PAR_MY_RANK); + + tag=1234; + + /* Send Task and model handle */ + modbuild_par_sendtaskandhandle("cta_model_compute", ALL_WORKERS, data, tag); + + /* Send timespan */ + CTA_Time_GetSpan(*timespan, &span[0], &span[1]); + for (irank=1;irank<=data->nworker;irank++){ + + jrank=irank; + if (data && data->is_intercomm) jrank--; + + MPI_Send((void*) span, 2, MPI_DOUBLE,jrank,tag,data->comm); + } + *ierr=CTA_OK; +} + + +void modbuild_par_setstate(CTAI_Modbuild_par *data, CTA_TreeVector *state, int *ierr){ + + int tag; + + if (IDEBUG>0) printf("#%d modbuild_par_setstate\n",CTA_PAR_MY_RANK); + tag=1234; + + /* Send Task and model handle */ + modbuild_par_sendtaskandhandle("cta_model_setstate", ALL_WORKERS, data, tag); + + /* Send the state */ + modbuild_par_sendstate(*state, data, 1234); + + *ierr=CTA_OK; +} + +void modbuild_par_getstate(CTAI_Modbuild_par *data, CTA_TreeVector *state, int *ierr){ + + int tag; + + if (IDEBUG>0) printf("#%d modbuild_par_getstate\n",CTA_PAR_MY_RANK); + + tag=1234; + + /* Send Task and model handle */ + modbuild_par_sendtaskandhandle("cta_model_getstate", ALL_WORKERS, data, tag); + + /* Receive the state */ + modbuild_par_recvstate(state, data, tag); + + *ierr=CTA_OK; + +} + +void modbuild_par_getstatescaling(CTAI_Modbuild_par *data, CTA_TreeVector *state, int *ierr){ + + int tag; + + if (IDEBUG>0) printf("#%d modbuild_par_getstatescaling\n",CTA_PAR_MY_RANK); + + tag=1234; + + /* Send Task and model handle */ + modbuild_par_sendtaskandhandle("cta_model_getstatescaling", ALL_WORKERS, data, tag); + + /* Receive the state */ + modbuild_par_recvstate(state, data, tag); + + *ierr=CTA_OK; + +} + + + void modbuild_par_axpymodel(CTAI_Modbuild_par *datay,double *alpha, + CTAI_Modbuild_par *datax, int *ierr) + { + int tag; + int OneWorker; + int irank, jrank; + +// printf("AXPYMODEL NOT IMPLEMENTED\n"); +// exit(-1); + +// if (ctai_comm_size>1) { + tag=1234; + + /* Send Task and model handle */ + modbuild_par_sendtaskandhandle("cta_model_axpymodel", ALL_WORKERS, datay, tag); + /* Send alpha */ + for (irank=1;irank<=datay->nworker;irank++){ + jrank=irank; + if (datay && datay->is_intercomm) jrank--; + + MPI_Send(alpha, 1, MPI_DOUBLE, jrank,tag, datay->comm); + } + + /* Check whether the two models are on the same worker */ + OneWorker=datay->comm==datax->comm; + + /* Send flag "Oneworker" */ + for (irank=1;irank<=datay->nworker;irank++){ + jrank=irank; + if (datay && datay->is_intercomm) jrank--; + + MPI_Send(&OneWorker, 1, MPI_INT, jrank,tag, datay->comm); + } + + + + + + + if (OneWorker) { + /* Both models are on the same worker, the worker can handle it by + * itself + */ + + /* Send model handle of x */ + for (irank=1;irank<=datay->nworker;irank++){ + + jrank=irank; + if (datay && datay->is_intercomm) jrank--; + + MPI_Send(&datax->mremote[irank], 1, MPI_INT, jrank,tag, datay->comm); + } + } else { + printf("AXPYMODEL NOT FOR MODELS IN DIFFERENT WORKER GROUPS IS NOT (YET) IMPLEMENTED\n"); + exit(-1); + +// /* Send sending rank */ +// retval=MPI_Send(&datax->rank, 1, MPI_INT, datay->rank,tag, +// CTA_COMM_WORKER); +// +// /* Ask second worker to export to worker that holds model y */ +// modbuild_par_sendtaskandhandle("cta_model_export", datax, tag); +// +// /* Send receiving rank */ +// retval=MPI_Send(&datay->rank, 1, MPI_INT, datax->rank,tag, +// CTA_COMM_WORKER); + + } +// } else { +// *ierr=CTA_Model_AxpyState(datay->mremote,*alpha,datax->mremote); +// } + +} + + +void modbuild_par_axpystate(CTAI_Modbuild_par *data, double *alpha, + CTA_TreeVector *statex, int *ierr){ + + int tag, irank, jrank; + + if (IDEBUG>0) printf("#%d modbuild_par_axpystate\n",CTA_PAR_MY_RANK); + + tag=1234; + + /* Send Task and model handle */ + modbuild_par_sendtaskandhandle("cta_model_axpystate", ALL_WORKERS, data, tag); + + /* Send alpha */ + for (irank=1;irank<=data->nworker;irank++){ + jrank=irank; + if (data && data->is_intercomm) jrank--; + + MPI_Send(alpha, 1, MPI_DOUBLE, jrank, tag, + data->comm); + } + /* Send the state */ + modbuild_par_sendstate(*statex, data, 1234); + + *ierr=CTA_OK; +} + +void modbuild_par_setforc(CTAI_Modbuild_par *data, CTA_Time *timespan, + CTA_TreeVector *state, int *ierr){ + + int tag, irank, jrank; + double span[2]; + + tag=1234; + + /* Send Task and model handle */ + modbuild_par_sendtaskandhandle("cta_model_setstate", ALL_WORKERS, data, tag); + + /* Send timespan */ + CTA_Time_GetSpan(*timespan, &span[0], &span[1]); + for (irank=1;irank<=data->nworker;irank++){ + + jrank=irank; + if (data && data->is_intercomm) jrank--; + + MPI_Send((void*) span, 2, MPI_DOUBLE, jrank,tag,data->comm); + } + /* Send the state */ + modbuild_par_sendstate(*state, data, 1234); + + *ierr=CTA_OK; + +} + +void modbuild_par_getforc(CTAI_Modbuild_par *data, CTA_Time *timespan, + CTA_TreeVector *state, int *ierr){ + int tag, irank, jrank; + double span[2]; + + tag=1234; + + /* Send Task and model handle */ + modbuild_par_sendtaskandhandle("cta_model_getforc", ALL_WORKERS, data, tag); + + /* Send timespan */ + CTA_Time_GetSpan(*timespan, &span[0], &span[1]); + for (irank=1;irank<=data->nworker;irank++){ + + jrank=irank; + if (data && data->is_intercomm) jrank--; + + MPI_Send((void*) span, 2, MPI_DOUBLE,jrank,tag,data->comm); + } + + /* Receive the state */ + modbuild_par_recvstate(state, data, 1234); + + *ierr=CTA_OK; + +} + +void modbuild_par_axpyforc(CTAI_Modbuild_par *data, CTA_Time *timespan, + double *alpha, CTA_TreeVector *statex, int *ierr){ + int tag, irank, jrank; + double span[2]; + + tag=1234; + + /* Send Task and model handle */ + modbuild_par_sendtaskandhandle("cta_model_axpyforc", ALL_WORKERS, data, tag); + + /* Send timespan */ + CTA_Time_GetSpan(*timespan, &span[0], &span[1]); + + for (irank=1;irank<=data->nworker; irank++){ + + jrank=irank; + if (data && data->is_intercomm) jrank--; + + MPI_Send((void*) span, 2, MPI_DOUBLE, jrank,tag,data->comm); + + /* Send alpha */ + MPI_Send(alpha, 1, MPI_DOUBLE, jrank,tag, data->comm); + } + + /* Send the state */ + modbuild_par_sendstate(*statex, data, 1234); + + *ierr=CTA_OK; +} + +void modbuild_par_setparam(CTAI_Modbuild_par *data, CTA_TreeVector *state, + int *ierr){ + int tag; + + tag=1234; + + /* Send Task and model handle */ + modbuild_par_sendtaskandhandle("cta_model_setparam", ALL_WORKERS, data, tag); + + /* Send the state */ + modbuild_par_sendstate(*state, data, 1234); + + *ierr=CTA_OK; + +} + +void modbuild_par_getparam(CTAI_Modbuild_par *data, CTA_TreeVector *state, + int *ierr){ + int tag; + + tag=1234; + + /* Send Task and model handle */ + modbuild_par_sendtaskandhandle("cta_model_getparam", ALL_WORKERS, data, tag); + + /* Create root of state-vector */ + if (*state==CTA_NULL) CTA_TreeVector_Create("root","root",state); + + /* Receive the state */ + modbuild_par_recvstate(state, data, 1234); + + *ierr=CTA_OK; +} + +void modbuild_par_axpyparam(CTAI_Modbuild_par *data, double *alpha, + CTA_TreeVector *statex, int *ierr){ + int tag, irank, jrank; + + tag=1234; + + /* Send Task and model handle */ + modbuild_par_sendtaskandhandle("cta_model_axpyparam", ALL_WORKERS, data, tag); + + /* Send alpha */ + for (irank=1;irank<=data->nworker;irank++){ + + jrank=irank; + if (data && data->is_intercomm) jrank--; + + MPI_Send(alpha, 1, MPI_DOUBLE, jrank,tag, data->comm); + } + + /* Send the state */ + modbuild_par_sendstate(*statex, data, 1234); + + *ierr=CTA_OK; +} + +void modbuild_par_getnoisecount(CTAI_Modbuild_par *data, int* nnoise, int* ierr) { + + MPI_Status status; + int tag; + int nnoise_sub, irank, jrank; + + *nnoise=0; + + if (IDEBUG>0) printf("#%d modbuild_par_getnoisecount\n",CTA_PAR_MY_RANK); + + tag=1234; + + /* Send Task and model handle */ + modbuild_par_sendtaskandhandle("cta_model_getnoisecount", ALL_WORKERS, data, tag); + + for (irank=1;irank<=data->nworker;irank++){ + + jrank=irank; + if (data && data->is_intercomm) jrank--; + + + /* Receive number of noise parameters */ + MPI_Recv(&nnoise_sub,1,MPI_INT, jrank, 1234, data->comm,&status); + if (irank==1) { + *nnoise=nnoise_sub; + } else { + if (*nnoise!=nnoise_sub) { + printf("Error number of noise parameters of models is not the same\n"); + printf("#%d rank 1 has %d parameters\n", CTA_PAR_MY_RANK, *nnoise); + printf("#%d rank %d has %d parameters\n",CTA_PAR_MY_RANK, irank, nnoise_sub); + exit(-1); + } + } + } + + if (IDEBUG>0) printf("#%d Number of noise parameters %d\n",CTA_PAR_MY_RANK,*nnoise); + *ierr=CTA_OK; +} + +void modbuild_par_getnoisecovar(CTAI_Modbuild_par *data, CTA_TreeVector *colsvar, int* ierr){ + + + MPI_Status status; + int tag, nnoise, icol; + int irank, jrank; + + if (IDEBUG>0) printf("#%d modbuild_par_getnoisecovar\n",CTA_PAR_MY_RANK); + + tag=1234; + + /* Send Task and model handle */ + modbuild_par_sendtaskandhandle("cta_model_getnoisecovar", ALL_WORKERS, + data, tag); + + /* Receive number of noise parameters from all workers + * actually one worker is enough but for the time beeing they all send + * it */ + for (irank=1;irank<=data->nworker;irank++){ + + jrank=irank; + if (data && data->is_intercomm) jrank--; + + /* Receive number of noise parameters */ + MPI_Recv(&nnoise,1,MPI_INT, jrank, 1234, + data->comm,&status); + + } + + /* Receive the columns of the covariance matrix one-by-one */ + for (icol=0;icol0) {printf("Reveived column of covar"); +// retval=CTA_TreeVector_Export(colsvar[icol],CTA_FILE_STDOUT); +// } + } + *ierr=CTA_OK; +} + +void modbuild_par_getobsselect_communicate(CTAI_Modbuild_par *data, int krank, CTA_Time *ttime, CTA_ObsDescr *hdescr, + CTA_String *sselect, int* ierr){ + + MPI_Status status; + int tag; + char select[1024]; + CTA_String sselect_sub, sor; + + int irank, jrank; + int retval_method; + int retval_all; + CTA_String_Create(&sselect_sub); + CTA_String_Create(&sor); + CTA_String_Set(sor," OR "); + + strcpy(select,"( "); + + tag=1234; + + if (IDEBUG>0) printf("#%d modbuild_par_getobsselect: krank=%d\n",CTA_PAR_MY_RANK, krank); + if (IDEBUG>0) printf("#%d modbuild_par_getobsselect: ttime=%d\n",CTA_PAR_MY_RANK, *ttime); + + /* Send Task and model handle */ + + modbuild_par_sendtaskandhandle("cta_model_getobsselect", krank, data, tag); + /* Send time and obsdescr (to all workers) */ + if (IDEBUG>0) printf("#%d Send timespan to krank=%d\n",CTA_PAR_MY_RANK, krank); + modbuild_par_sendobsdescr(data, krank, *ttime, *hdescr, tag, ierr); + if (*ierr!=CTA_OK){ + printf("ERROR in modbuild_par_getobsselect: modbuild_par_sendobsdescr returned with error %d\n",*ierr); if (EXITONERROR) exit(-1); + return; + } + retval_all=CTA_OK; + for (irank=1;irank<=data->nworker;irank++){ + if (irank==krank || krank<0){ + + jrank=irank; + if (data && data->is_intercomm) jrank--; + + /* Receive the error string */ + MPI_Recv(&retval_method,1,MPI_INT, jrank, 1234, data->comm,&status); + if (retval_all==CTA_OK && retval_method!=CTA_OK) retval_all=retval_method; + + /* Receive the select string */ + MPI_Recv(select+2,1022,MPI_CHAR, jrank, 1234, + data->comm,&status); + strcpy(select+strlen(select)," )"); + + + if (IDEBUG>0) printf("#%d modbuild_par_getobsselect: selection rank %d is %s\n",CTA_PAR_MY_RANK, + irank, select); + + if (irank==1 || krank>0){ + CTA_String_Set(*sselect,select); + + } else { + CTA_String_Set(sselect_sub,select); + CTA_String_Conc(*sselect,sor); + CTA_String_Conc(*sselect,sselect_sub); + } + } + if (IDEBUG>0) printf("#%d modbuild_par_getobsselect: selection string is %s\n",CTA_PAR_MY_RANK, CTAI_String_GetPtr(*sselect)); + *ierr=retval_all; + + if (IDEBUG>0) printf("#%d end of modbuild_par_getobsselect\n",CTA_PAR_MY_RANK); + } + CTA_String_Free(&sselect_sub); + CTA_String_Free(&sor); +} + + +void modbuild_par_announceobsvalues(CTAI_Modbuild_par *data, CTA_ObsDescr *hdescr, int* ierr){ + + CTA_Time timedum; + int retval; + int tag; + int irank, jrank; + CTA_ObsDescr *subObsDescr; + CTA_RelTable *reltab; + int nobs, nobs_sub; + + CTA_ObsDescr_Observation_Count (*hdescr,&nobs); + if (IDEBUG>0) printf("#%d modbuild_par_announceobsvalues; nobs tot:%d \n",CTA_PAR_MY_RANK, nobs); + + tag=1234; + *ierr=CTA_OK; + + // Create dummy time + CTA_Time_Create(&timedum); + + if (NOOSOBS < 1) { + // We need to know what observations the various processes can provide + // + if (IDEBUG>0) printf("#%d modbuild_par_getobsvalues: asking for getobsselect to processes \n",CTA_PAR_MY_RANK); + if (data->nworker>1 && !data->SelectObs){ + data->SelectObs= (CTA_ObsDescr*) CTA_Malloc((data->nworker+1)*sizeof(CTA_ObsDescr)); + for (irank=1;irank<=data->nworker;irank++){ + CTA_String_Create(&(data->SelectObs[irank])); + // printf("calling modbuild_par_getobsselect_communicate... \n"); + modbuild_par_getobsselect_communicate(data, irank, &timedum, hdescr, &(data->SelectObs[irank]), &retval); + } + } + } + /* Create stochastic observers for different process */ + subObsDescr= (CTA_ObsDescr*) CTA_Malloc(sizeof(CTA_ObsDescr)*(data->nworker+1)); + + if (data->nworker==1){ + subObsDescr[1]=*hdescr; + } else { + if (NOOSOBS < 1) { + + reltab=CTA_Malloc(sizeof(CTA_RelTable)*(data->nworker+1)); + for (irank=1;irank<=data->nworker;irank++){ + if (IDEBUG>0) printf("#%d Creating Obs selection for process %d\n",CTA_PAR_MY_RANK, irank); + if (IDEBUG>0) printf("#%d Creating Obs selection='%s'\n",CTA_PAR_MY_RANK, CTAI_String_GetPtr(data->SelectObs[irank])); + CTA_RelTable_Create(&(reltab[irank])); + retval=CTA_ObsDescr_CreateSel(*hdescr, data->SelectObs[irank], reltab[irank], &subObsDescr[irank]); + if (retval!=CTA_OK) { + printf("#%d Error creating selection for worker %d process error code is %d\n",CTA_PAR_MY_RANK, irank, retval); + exit(-1); + } + + } + } else { + for (irank=1;irank<=data->nworker;irank++){ + subObsDescr[irank]=*hdescr; + } + // reltab not defined + } + } + + /* Send observation descriptions of the various sub processes */ + for (irank=1;irank<=data->nworker;irank++){ + + jrank=irank; + if (data && data->is_intercomm) jrank--; + + if (IDEBUG>0) printf("#%d Prepare to send Obsdescr to worker %d\n",CTA_PAR_MY_RANK, irank); + + CTA_ObsDescr_Observation_Count (subObsDescr[irank],&nobs_sub); + if (IDEBUG>0) printf("#%d Number of observations in obsdescr for worker %d is %d\n",CTA_PAR_MY_RANK, irank, nobs_sub); + if (nobs_sub>0) { + + /* Send Task and model handle */ + modbuild_par_sendtaskandhandle("cta_model_announceobsvalues", irank, data, tag); + if (IDEBUG>0) printf("#%d done sending task and handle \n",CTA_PAR_MY_RANK); + + /* Send time and obsdescr */ + if (IDEBUG>0) printf("#%d Sending observation description to worker %d\n",CTA_PAR_MY_RANK, irank); + modbuild_par_sendobsdescr(data, irank, timedum, subObsDescr[irank], tag, ierr); + if (*ierr!=CTA_OK) return; + + } + } + /* Free obsdescr of sub-domains */ + if (data->nworker>1){ + if (NOOSOBS < 1) { + for (irank=1;irank<=data->nworker;irank++){ + CTA_ObsDescr_Free(&(subObsDescr[irank])); + CTA_RelTable_Free(&(reltab[irank])); + } + free(reltab); + } else { // do nothing + + } + } + + CTA_Time_Free(&timedum); + free(subObsDescr); + *ierr=CTA_OK; +} + + + +void modbuild_par_getobsvalues(CTAI_Modbuild_par *data, CTA_Time *ttime, + CTA_ObsDescr *hdescr, CTA_Vector *vval, + int* ierr){ + + int retval; + int tag; + int lenpack; + int irank, jrank; + CTA_Pack hpack; + char *pack; + MPI_Status status; + CTA_ObsDescr *subObsDescr; + CTA_RelTable *reltab; + CTA_Vector vsub; + int nobs, nobs_sub; + double dmisval; + CTA_Datatype datatype; + int size_type; + double value; + int iobs; + + dmisval = -1E-20; + + + + CTA_ObsDescr_Observation_Count (*hdescr,&nobs); + CTA_Vector_SetConstant(*vval,&dmisval,CTA_DOUBLE); + if (IDEBUG>0) printf("#%d modbuild_par_getobsvalues; nobs tot:%d \n",CTA_PAR_MY_RANK, nobs); + + tag=1234; + *ierr=CTA_OK; + + if (NOOSOBS < 1) { + // We need to know what observations the various processes can provide + // + if (IDEBUG>0) printf("#%d modbuild_par_getobsvalues: asking for getobsselect to processes \n",CTA_PAR_MY_RANK); + if (data->nworker>1 && !data->SelectObs){ + data->SelectObs=CTA_Malloc((data->nworker+1)*sizeof(CTA_ObsDescr)); + for (irank=1;irank<=data->nworker;irank++){ + CTA_String_Create(&(data->SelectObs[irank])); + // printf("calling modbuild_par_getobsselect_communicate... \n"); + modbuild_par_getobsselect_communicate(data, irank, ttime, hdescr, &(data->SelectObs[irank]), &retval); + } + } + } + /* Create stochastic observers for different process */ + subObsDescr=CTA_Malloc(sizeof(CTA_ObsDescr)*(data->nworker+1)); + + if (data->nworker==1){ + subObsDescr[1]=*hdescr; + } else { + if (NOOSOBS < 1) { + + reltab=CTA_Malloc(sizeof(CTA_RelTable)*(data->nworker+1)); + for (irank=1;irank<=data->nworker;irank++){ + if (IDEBUG>0) printf("#%d Creating Obs selection for process %d\n",CTA_PAR_MY_RANK, irank); + if (IDEBUG>0) printf("#%d Creating Obs selection='%s'\n",CTA_PAR_MY_RANK, CTAI_String_GetPtr(data->SelectObs[irank])); + CTA_RelTable_Create(&(reltab[irank])); + retval=CTA_ObsDescr_CreateSel(*hdescr, data->SelectObs[irank], reltab[irank], &subObsDescr[irank]); + if (retval!=CTA_OK) { + printf("#%d Error creating selection for worker %d process error code is %d\n",CTA_PAR_MY_RANK, irank, retval); + exit(-1); + } + + } + } else { + for (irank=1;irank<=data->nworker;irank++){ + subObsDescr[irank]=*hdescr; + } + // reltab not defined + } + } + + /* Send observation descriptions of the various sub processes */ + for (irank=1;irank<=data->nworker;irank++){ + + jrank=irank; + if (data && data->is_intercomm) jrank--; + + if (IDEBUG>0) printf("#%d Prepare to send Obsdescr to worker %d\n",CTA_PAR_MY_RANK, irank); + + CTA_ObsDescr_Observation_Count (subObsDescr[irank],&nobs_sub); + if (IDEBUG>0) printf("#%d Number of observations in obsdescr for worker %d is %d\n",CTA_PAR_MY_RANK, irank, nobs_sub); + if (nobs_sub>0) { + + /* Send Task and model handle */ + modbuild_par_sendtaskandhandle("cta_model_getobsvalues", irank, data, tag); + if (IDEBUG>0) printf("#%d done sending task and handle \n",CTA_PAR_MY_RANK); + + /* create vector for individual processes */ + if (data->nworker>1) { + if (NOOSOBS < 1) { // probably unneccessary since nobs_sub = nobs; + CTA_Vector_Create(CTA_DEFAULT_VECTOR,nobs_sub,CTA_DOUBLE,CTA_NULL,&vsub); + } else { + CTA_Vector_Create(CTA_DEFAULT_VECTOR,nobs,CTA_DOUBLE,CTA_NULL,&vsub); + } + } else { + vsub=*vval; + } + + /* Send time and obsdescr */ + if (IDEBUG>0) printf("#%d Sending observation description to worker %d\n",CTA_PAR_MY_RANK, irank); + modbuild_par_sendobsdescr(data, irank, *ttime, subObsDescr[irank], tag, ierr); + if (*ierr!=CTA_OK) return; + + /* Receive vector */ + if (IDEBUG>0) printf("#%d Waiting for a vector form worker %d\n",CTA_PAR_MY_RANK, irank); + retval=MPI_Recv(&lenpack, 1, MPI_INT, jrank,tag, + data->comm, &status); + if (IDEBUG>0) printf("lenpack %d\n", lenpack); + retval=CTA_Pack_Create(lenpack,&hpack); + pack =CTA_Pack_GetPtr(hpack); + + retval=MPI_Recv(pack, lenpack, MPI_CHAR, jrank,tag, + data->comm, &status); + *ierr=CTA_Pack_AddCnt(hpack,lenpack); + + if (IDEBUG>0) printf("#%d Received vector from worker %d\n",CTA_PAR_MY_RANK, irank); + if (*ierr!=CTA_OK) return; + + if (IDEBUG>0) printf("modbuild_par_getobsvalues: receiving vectors done\n"); + *ierr=CTA_Vector_Import(vsub,hpack); + if (*ierr!=CTA_OK) return; + + /* Free workvars */ + *ierr=CTA_Pack_Free(&hpack); + if (*ierr!=CTA_OK) return; + + /* if noosobs, then vsub has the same size as vval (all observations) but contains MISVALS. + These MISVALS have to be filtered out. */ + + /* Copy observations of domain to global vector */ + if (data->nworker>1) { + if (NOOSOBS < 1) { + retval=CTA_RelTable_ApplyInv(reltab[irank], vsub, *vval); + if (*ierr!=CTA_OK) { + printf("modbuild_par_getobsvalues ERROR in CTA_RelTable_ApplyInv\n"); + return; + } + } else { + + *ierr=CTA_Vector_GetDatatype(vsub,&datatype); + *ierr=CTA_SizeOf(datatype,&size_type); + + for ( iobs = 0; iobs < nobs; iobs++) { + *ierr = CTA_Vector_GetVal(vsub,iobs+1, &value,datatype); + if ( value > MISVAL+1.0) { + *ierr = CTA_Vector_SetVal(*vval, iobs+1, &value,datatype); + if (*ierr!=CTA_OK) { + printf("modbuild_par_getobsvalues ERROR %d in filling vvals from vsub\n",*ierr); + return; + } + } + } + + } + + CTA_Vector_Free(&vsub); + } + } + else { + if (IDEBUG>0) printf("#%d No need to ask observations since there are none \n",CTA_PAR_MY_RANK); + } + } + /* Free obsdescr of sub-domains */ + if (data->nworker>1){ + if (NOOSOBS < 1) { + for (irank=1;irank<=data->nworker;irank++){ + CTA_ObsDescr_Free(&(subObsDescr[irank])); + CTA_RelTable_Free(&(reltab[irank])); + } + free(reltab); + } else { // do nothing + + } + } + free(subObsDescr); + // printf("@@ getobsvalues end; final array of predictions:\n"); + //CTA_Vector_Export(*vval, CTA_FILE_STDOUT); + + *ierr=CTA_OK; +} + + +void modbuild_par_getobslocalization(CTAI_Modbuild_par *data, CTA_ObsDescr *hdescr, + double *distance, CTA_Vector *locVecs, int* ierr) { + int minone=-1; + modbuild_par_getobslocalizationdomain(data, hdescr, distance, &minone, locVecs, ierr); + +} + + +void modbuild_par_getobsselect(CTAI_Modbuild_par *data, CTA_Time *ttime, CTA_ObsDescr *hdescr, + CTA_String *sselect, int* ierr){ + + CTA_ObsDescr hdescr_null=CTA_NULL; + + if (IDEBUG>0) printf("#%d modbuild_par_getobsselect hdescr=%d\n",CTA_PAR_MY_RANK, *hdescr); + + /* First try do not send the observation description */ + modbuild_par_getobsselect_communicate(data, ALL_WORKERS, ttime, &hdescr_null, sselect, ierr); + if (ierr!=CTA_OK) { + /* Second try and send the observation description */ + modbuild_par_getobsselect_communicate(data, ALL_WORKERS, ttime, hdescr, sselect, ierr); + } + if (IDEBUG>0) printf("#%d END if modbuild_par_getobsselect ierr=%d\n",CTA_PAR_MY_RANK, *ierr); +} + + +void modbuild_par_gettime(CTAI_Modbuild_par *data, CTA_Time *timespan, char* task, int* ierr){ + + double span[2]; + int tag; + int irank, jrank; + MPI_Status status; + + tag=1234; + + /* Send Task and model handle */ + modbuild_par_sendtaskandhandle(task, ALL_WORKERS, data, tag); + + /* Send timespan */ + for (irank=1;irank<=data->nworker;irank++){ + + jrank=irank; + if (data && data->is_intercomm) jrank--; + + MPI_Recv(span, 2, MPI_DOUBLE, jrank,1234, data->comm, &status); + CTA_Time_SetSpan(*timespan, span[0], span[1]); + } + + *ierr=CTA_OK; +} + + +void modbuild_par_getcurrenttime(CTAI_Modbuild_par *data, CTA_Time *timespan, int* ierr){ + + if (IDEBUG>0) printf("#%d modbuild_par_getcurrenttime\n",CTA_PAR_MY_RANK); + modbuild_par_gettime(data, timespan, "cta_model_getcurrenttime", ierr); + +} + +void modbuild_par_gettimehorizon(CTAI_Modbuild_par *data, CTA_Time *timespan, int* ierr){ + + if (IDEBUG>0) printf("#%d modbuild_par_gettimehorizon\n",CTA_PAR_MY_RANK); + modbuild_par_gettime(data, timespan, "cta_model_gettimehorizon", ierr); + +} + + +void modbuild_par_addnoise(CTAI_Modbuild_par *data, CTA_Time *timespan, int* ierr){ + + double span[2]; + int tag; + int irank, jrank; + + if (IDEBUG>0) printf("#%d modbuild_par_addnoise\n",CTA_PAR_MY_RANK); + + tag=1234; + + /* Send Task and model handle */ + modbuild_par_sendtaskandhandle("cta_model_addnoise", ALL_WORKERS, data, tag); + + /* Send timespan */ + CTA_Time_GetSpan(*timespan, &span[0], &span[1]); + for (irank=1;irank<=data->nworker;irank++){ + jrank=irank; + if (data && data->is_intercomm) jrank--; + + MPI_Send((void*) span, 2, MPI_DOUBLE,jrank,tag,data->comm); + } + + *ierr=CTA_OK; +} + +void modbuild_par_savepersistentstate(CTAI_Modbuild_par *data, CTA_String *filename, CTA_String *instanceID, int *ierr){ + MPI_Status status; + CTA_Pack hpack; + int irank, jrank; + int tag, lenpack, dumval; + char str1[256], str2[256],fname[256]; + char *pack; + + if (IDEBUG>0) printf("#%d modbuild_par_savepersistentstate\n",CTA_PAR_MY_RANK); + + tag=1234; + /* Send Task and model handle */ + modbuild_par_sendtaskandhandle("cta_model_savepersistentstate", ALL_WORKERS, data, tag); + + /* Create pack object for instance ID */ + CTA_Pack_Create(CTA_PACK_DEFAULT_SIZE,&hpack); + CTA_String_Export(*instanceID,hpack); + tag=7321; + for (irank=1;irank<=data->nworker;irank++){ + jrank=irank; + if (data && data->is_intercomm) jrank--; + + /*Send packed instanceID to each worker process */ + lenpack= CTA_Pack_GetLen(hpack); + pack=CTA_Pack_GetPtr(hpack); + MPI_Send(&lenpack, 1, MPI_INT, jrank, tag, + data->comm); + MPI_Send(pack, lenpack, MPI_CHAR, jrank , tag, + data->comm); + } + /* Free packed object */ + CTA_Pack_Free(&hpack); + + /* Check extension NetCDF and temporarily remove it */ + CTA_String_Get(*filename,str1); + if (strlen(str1)>3){ + if (0==strcmp( ".nc",(str1+strlen(str1)-3))){ + str1[strlen(str1)-3]='\0'; + } else { + printf("#%d Error: only export to NetCDF possible '%s'\n", + CTA_PAR_MY_RANK, str1); + exit(-1); + } + } + tag=3217; + for (irank=1;irank<=data->nworker;irank++){ + jrank=irank; + if (data && data->is_intercomm) jrank--; + + /*Add rank of worker process to string containing filename*/ + strcpy(fname,str1); + sprintf(str2,"_part%04d",jrank); + strcat(fname,str2); + strcat(fname,".nc"); + if (IDEBUG>0) printf("send filename '%s' to process #%d \n",fname, jrank); + CTA_String_Set(*filename,fname); + + /* send packed filename to each worker process */ + CTA_Pack_Create(CTA_PACK_DEFAULT_SIZE,&hpack); + CTA_String_Export(*filename,hpack); + + lenpack= CTA_Pack_GetLen(hpack); + pack=CTA_Pack_GetPtr(hpack); + MPI_Send(&lenpack, 1, MPI_INT, jrank, tag, + data->comm); + MPI_Send(pack, lenpack, MPI_CHAR, jrank , tag, + data->comm); + CTA_Pack_Free(&hpack); + } + + /* Receive CTA_OK to prevent the master from collecting zip files + before they are written */ + tag = 2173; + for (irank=1;irank<=data->nworker;irank++){ + jrank=irank; + if (data && data->is_intercomm) jrank--; + + MPI_Recv(&dumval, 1, MPI_INT, jrank, tag, data->comm, &status); + } + *ierr=CTA_OK; +} + +void modbuild_par_loadpersistentstate(CTAI_Modbuild_par *data, CTA_String *filename, CTA_String *instanceID, int *ierr){ + + MPI_Status status; + CTA_Pack hpack; + CTA_String hID; + int irank, jrank; + int tag, lenpack; + char str1[256], str2[256],fname[256]; + char *pack; + + if (IDEBUG>0) printf("#%d modbuild_par_loadpersistentstate\n",CTA_PAR_MY_RANK); + + tag=1234; + + /* Send Task and model handle */ + modbuild_par_sendtaskandhandle("cta_model_loadpersistentstate", ALL_WORKERS, data, tag); + + /* Check extension NetCDF and temporarily remove it */ + CTA_String_Get(*filename,str1); + if (strlen(str1)>3){ + if (0==strcmp( ".nc",(str1+strlen(str1)-3))){ + str1[strlen(str1)-3]='\0'; + } else { + printf("#%d Error: only export to NetCDF possible '%s'\n", + CTA_PAR_MY_RANK, str1); + exit(-1); + } + } + tag=3218; + for (irank=1;irank<=data->nworker;irank++){ + jrank=irank; + if (data && data->is_intercomm) jrank--; + + /*Add rank of worker process to string containing filename*/ + strcpy(fname,str1); + sprintf(str2,"_part%04d",jrank); + strcat(fname,str2); + strcat(fname,".nc"); + if (IDEBUG>0) printf("send filename '%s' to process #%d \n",fname, jrank); + CTA_String_Set(*filename,fname); + + /* send packed filename to each worker process */ + CTA_Pack_Create(CTA_PACK_DEFAULT_SIZE,&hpack); + CTA_String_Export(*filename,hpack); + + lenpack= CTA_Pack_GetLen(hpack); + pack=CTA_Pack_GetPtr(hpack); + MPI_Send(&lenpack, 1, MPI_INT, jrank, tag, + data->comm); + MPI_Send(pack, lenpack, MPI_CHAR, jrank , tag, + data->comm); + CTA_Pack_Free(&hpack); + } + + /* Receive instanceID from each worker process */ + CTA_String_Create(&hID); + tag=8321; + for (irank=1;irank<=data->nworker;irank++){ + jrank=irank; + if (data && data->is_intercomm) jrank--; + + /* Receive string: TODO: teach CTA_Pack this simple and repetitive task: + CTA_Pack hpack = CTA_Pack_MPI_Receive(jrank,tag,comm); + */ + MPI_Recv(&lenpack, 1, MPI_INT, jrank,tag, data->comm, &status); + CTA_Pack_Create(lenpack,&hpack); + pack =CTA_Pack_GetPtr(hpack); + MPI_Recv(pack, lenpack, MPI_CHAR, jrank, tag, data->comm, &status); + CTA_Pack_AddCnt(hpack,lenpack); + + /* Get string from pack object*/ + CTA_String_Import(hID,hpack); + CTA_Pack_Free(&hpack); + + if (irank==1) { + *instanceID=hID; + } + if (IDEBUG>0) { + char *tmpstr = CTAI_String_GetPtr(hID); + printf("# %d LPS: received instanceID '%s'\n",CTA_PAR_MY_RANK, tmpstr); + } + } + *ierr=CTA_OK; +} + +void modbuild_par_saveinternalstate(CTAI_Modbuild_par *data, CTA_String *instanceID, int *ierr){ + + MPI_Status status; + CTA_String hID; + CTA_Pack hpack; + int tag, lenpack; + int irank, jrank; + char *pack; + + if (IDEBUG>0) printf("#%d modbuild_par_saveinternalstate\n",CTA_PAR_MY_RANK); + + tag=1234; + /* Send Task and model handle */ + modbuild_par_sendtaskandhandle("cta_model_saveinternalstate", ALL_WORKERS, data, tag); + + /* Receive instanceID from each worker process */ + CTA_String_Create(&hID); + tag=6321; + for (irank=1;irank<=data->nworker;irank++){ + jrank=irank; + if (data && data->is_intercomm) jrank--; + + /* Receive string: TODO: teach CTA_Pack this simple and repetitive task: + CTA_Pack hpack = CTA_Pack_MPI_Receive(jrank,tag,comm); + */ + MPI_Recv(&lenpack, 1, MPI_INT, jrank,tag, data->comm, &status); + CTA_Pack_Create(lenpack,&hpack); + pack =CTA_Pack_GetPtr(hpack); + MPI_Recv(pack, lenpack, MPI_CHAR, jrank, tag, data->comm, &status); + CTA_Pack_AddCnt(hpack,lenpack); + + /* Get string from pack object*/ + CTA_String_Import(hID,hpack); + CTA_Pack_Free(&hpack); + + if (irank==1) { + *instanceID=hID; + } + if (IDEBUG>0) { + char *tmpstr = CTAI_String_GetPtr(hID); + printf("# %d SIS: received instanceID '%s'\n",CTA_PAR_MY_RANK, tmpstr); + } + } + *ierr=CTA_OK; +} + +void modbuild_par_restoreinternalstate(CTAI_Modbuild_par *data, CTA_String *instanceID, int *ierr){ + + int irank, jrank; + int tag, lenpack; + CTA_Pack hpack; + char *pack; + + if (IDEBUG>0) printf("#%d modbuild_par_restoreinternalstate\n",CTA_PAR_MY_RANK); + + tag=1234; + /* Send Task and model handle */ + modbuild_par_sendtaskandhandle("cta_model_restoreinternalstate", ALL_WORKERS, data, tag); + + tag=5321; + /* Create pack object for instance ID */ + CTA_Pack_Create(CTA_PACK_DEFAULT_SIZE,&hpack); + CTA_String_Export(*instanceID,hpack); + + for (irank=1;irank<=data->nworker;irank++){ + jrank=irank; + if (data && data->is_intercomm) jrank--; + + /*Send packed instanceID to each worker process */ + lenpack= CTA_Pack_GetLen(hpack); + pack=CTA_Pack_GetPtr(hpack); + MPI_Send(&lenpack, 1, MPI_INT, jrank, tag, + data->comm); + MPI_Send(pack, lenpack, MPI_CHAR, jrank , tag, + data->comm); + } + /* Free local objects */ + CTA_Pack_Free(&hpack); + + *ierr=CTA_OK; +} + +void modbuild_par_releaseinternalstate(CTAI_Modbuild_par *data, CTA_String *instanceID, int *ierr){ + + int irank, jrank; + int tag, lenpack; + CTA_Pack hpack; + char *pack; + + if (IDEBUG>0) printf("#%d modbuild_par_releaseinternalstate\n",CTA_PAR_MY_RANK); + + tag=1234; + /* Send Task and model handle */ + modbuild_par_sendtaskandhandle("cta_model_releaseinternalstate", ALL_WORKERS, data, tag); + + tag=9321; + /* Create pack object for instance ID */ + CTA_Pack_Create(CTA_PACK_DEFAULT_SIZE,&hpack); + CTA_String_Export(*instanceID,hpack); + + for (irank=1;irank<=data->nworker;irank++){ + jrank=irank; + if (data && data->is_intercomm) jrank--; + + /*Send packed instanceID to each worker process */ + lenpack= CTA_Pack_GetLen(hpack); + pack=CTA_Pack_GetPtr(hpack); + MPI_Send(&lenpack, 1, MPI_INT, jrank, tag, + data->comm); + MPI_Send(pack, lenpack, MPI_CHAR, jrank , tag, + data->comm); + } + /* Free local objects */ + CTA_Pack_Free(&hpack); + + *ierr=CTA_OK; +} + +/** \brief Get the number of domains for local analysis + * + * \param data IO instance data block + * \param distance I characteristic distance + * \param ndomains O number of domains + * \param ierr O error status: CTA_OK if successful + */ +void modbuild_par_getnumdomains(CTAI_Modbuild_par *data, double *distance, int *ndomains, int *ierr){ +} + + +/** \brief Get selection of observations that are relevnet for assimilation in the given domain + * + * \param data IO instance data block + * \param hdescr I observation description of all observations + * \param distance I characteristic distance + * \param idomain I domain number + * \param selection O costa vector with the indices of the relevant observations (0 based) + * \param ierr O error status: CTA_OK if successful + */ +void modbuild_par_getobsselector(CTAI_Modbuild_par *data, + CTA_ObsDescr *hdescr, double *distance, int *idomain, CTA_Vector *selection, int *ierr){ +} + + + + + +/** \brief Get for each observation a localization scaling vector for single domain + * + * \param data IO instance data block + * \param hdescr I observation description for which we want localization scaling vectors + * \param distance I characteristic distance + * \param idomain I domain number + * \param locVecs O costa vector of handles to treevectors (scaling vectors). The treevectors + * are created when the indices are CTA_NULL on entry + * \param ierr O error status: CTA_OK if successful + */ +void modbuild_par_getobslocalizationdomain(CTAI_Modbuild_par *data, + CTA_ObsDescr *hdescr, double *distance, int *idomain, CTA_Vector *locVecs, int *ierr){ + + //NB locvecs (size: nobs) is vector containing handles + + int tag; + int lenpack; + int irank, jrank, lentask; + CTA_Pack hpack; + CTA_Time ttime; + char *pack; + CTA_ObsDescr *subObsDescr; + int nobs, iobs; + CTA_TreeVector *locstate; + int locIDEBUG = 0; + + if (locIDEBUG>0) { printf("modbuild_par_getobslocalization START\n");} + + tag = 1234; + + CTA_ObsDescr_Observation_Count (*hdescr,&nobs); + + if (nobs>0){ + /* Create stochastic observers for different process */ + subObsDescr=CTA_Malloc(sizeof(CTA_ObsDescr)*(data->nworker+1)); + + if (data->nworker==1){ + subObsDescr[1]=*hdescr; + } else { + for (irank=1;irank<=data->nworker;irank++){ + subObsDescr[irank]=*hdescr; + } + } + + /* Pack the vector containing handles*/ + lenpack=nobs*sizeof(int)+100; + CTA_Pack_Create(lenpack,&hpack); + CTA_Vector_Export(*locVecs, hpack); + + /* Send vector to workers */ + lentask= CTA_Pack_GetLen(hpack); + pack = CTA_Pack_GetPtr(hpack); + + /* Send observation descriptions of the various sub processes */ + for (irank=1;irank<=data->nworker;irank++){ + + jrank=irank; + if (data && data->is_intercomm) jrank--; + + if (locIDEBUG>0) printf("#%d Prepare to send Obsdescr to worker %d\n",CTA_PAR_MY_RANK, irank); + + /* Send Task and model handle */ + modbuild_par_sendtaskandhandle("cta_model_getobslocalization", irank, data, tag); + if (locIDEBUG>0) printf("#%d done sending task and handle \n",CTA_PAR_MY_RANK); + + /* Send time and obsdescr */ + /* note: time is not needed but we want to use the sendobsdescr function */ + CTA_Time_Create(&ttime); + CTA_Time_SetSpan(ttime, 0.0,0.0); + + if (locIDEBUG>0) printf("#%d Sending observation description to worker %d\n",CTA_PAR_MY_RANK, irank); + modbuild_par_sendobsdescr(data, irank, ttime, subObsDescr[irank], tag, ierr); + if (*ierr!=CTA_OK) return; + + CTA_Time_Free(&ttime); + + /* send characteristic distance */ + MPI_Send(distance, 1, MPI_DOUBLE, jrank, tag, + data->comm); + + /* send domain number */ + MPI_Send(distance, 1, MPI_INT, jrank, tag, + data->comm); + + /* Send length of packed vector */ + if (locIDEBUG>0) printf("#%d Send vector to worker (lentask)\n", irank); + MPI_Send(&lentask, 1, MPI_INT, jrank, tag, + data->comm); + + /* Send packed vector */ + MPI_Send(pack, lentask, MPI_CHAR, jrank, tag, + data->comm); + if (locIDEBUG>0) printf("#%d Done sending vector\n",CTA_PAR_MY_RANK); + + } + + locstate=CTA_Malloc(sizeof(CTA_TreeVector) *nobs); + + /* receive for each observation the localization state from all workers */ + for ( iobs = 0; iobs < nobs; iobs++) { + if (locIDEBUG>0) printf("modbuild_par_getobslocalization: receiving state of observation %d \n",iobs); + locstate[iobs] = CTA_NULL; + modbuild_par_recvstate(&(locstate[iobs]), data, 1234); + + /* put the handles in the cta-vector */ + CTA_Vector_SetVal(*locVecs,iobs+1,&(locstate[iobs]),CTA_HANDLE); + } + + + if (locIDEBUG>0) {printf("modbuild_par_getobslocalization: returning vector of handles: \n"); + CTA_Vector_Export(*locVecs, CTA_FILE_STDOUT); + } + + /* Free work variables/objects */ + CTA_Pack_Free(&hpack); + free(subObsDescr); + free(locstate); + } + + + + + +} + + +/** \brief Get a copy of the internal state. + * + * \note Optionally a tree-vector is created. In that case the caller of this + * method is responsible for freeing that tree-vector. The input state must be compatible + * (same size and or composition) as the models internal state. + * \note If *hstate == CTA_NULL a new object is created, user is responsible for freeing this object. + * + * \param data IO instance data block + * \param idomain I domain number + * \param hstate IO receives state of the model, *hstate can be CTA_NULL on calling (see note) + * \param ierr O error status: CTA_OK if successful + */ +void modbuild_par_getstatedomain(CTAI_Modbuild_par *data, int *idomain, CTA_TreeVector *hstate, int *ierr){ +} + +/** \brief Perform axpy operation on the internal state for a single domain + * + * \note AXPY: y=alpha*x+y. y corresponds to the models + * internal state and x can be a state vector or a model + + * \param data IO instance data block + * \param alpha I alpha + * \param idomain I domain number + * \param hx I handle of x (state or model) + * \param ierr O error status: CTA_OK if successful + */ +void modbuild_par_model_axpystatedomain(CTAI_Modbuild_par *data, double *alpha, int *idomain, CTA_Handle *hx, int *ierr){ +} + + + + + + + + + + +void CTA_Modbuild_par_worker_create(){ + MPI_Status status; + int retval; + CTA_Model hmodel; + /* cta_model_create */ + char clsnam[256], modinp[256]; + CTA_String sclsnam, smodinp; + CTA_ModelClass hmodcl; + + /* Create some work-variables */ + CTA_String_Create(&sclsnam); + CTA_String_Create(&smodinp); + /* Receive the name of the modelclass */ + retval=MPI_Recv(clsnam,256,MPI_CHAR, ctai_rank_master, 1234, + CTA_COMM_MASTER_WORKER,&status); + CTA_String_Set(sclsnam,clsnam); + if (IDEBUG>0) printf("#%d name of model-class is '%s'\n",CTA_PAR_MY_RANK,clsnam); + /* Receive the name of the models input-file */ + retval=MPI_Recv(modinp,256,MPI_CHAR, ctai_rank_master, 1234, + CTA_COMM_MASTER_WORKER,&status); + CTA_String_Set(smodinp,modinp); + if (IDEBUG>0) printf("#%d model-input is '%s'\n",CTA_PAR_MY_RANK, modinp); + + /* Find the handle of the modelclass */ + retval=CTA_Handle_Find(sclsnam, CTA_MODELCLASS, &hmodcl); + if (retval!=CTA_OK) { + printf("#%d Cannot find model-class with name '%s'\n", + CTA_PAR_MY_RANK, CTAI_String_GetPtr(sclsnam)); + exit(-1); + } + /* Create the model */ + if (IDEBUG>0) printf("#%d Calling model-create for modell class %d\n",CTA_PAR_MY_RANK, hmodcl); + + retval=CTA_Model_Create(hmodcl,smodinp,&hmodel); + if (retval!=CTA_OK) { + printf("#%d CTA_Modbuild_par_worker:Error creating model instance ",CTA_PAR_MY_RANK); + printf("ierr='%d'\n",retval); + exit(-1); + } + + /* add new model to list */ + modbuild_models_add(hmodel); + + + /* Do we need to create our sratch model? */ + if (hmodel_scratch==CTA_NULL) { + if (IDEBUG>0){ printf("WARNING NOT CREATING SCRATCH MODEL\n"); } +// retval=CTA_Model_Create(hmodcl,smodinp,&hmodel_scratch); +// if (retval!=CTA_OK) { +// printf("#%d Failed to create scratch model\n",CTA_PAR_MY_RANK); +// printf("#%d CTA_Modbuild_par_worker:Error creating model instance ",CTA_PAR_MY_RANK); +// printf("ierr='%d'\n",retval); +// exit(-1); +// } + } + + + /* Send the handle of new model instance to master */ + retval=MPI_Send(&hmodel,1, MPI_INT,ctai_rank_master,1234,CTA_COMM_MASTER_WORKER); + + /* Free work variables */ + CTA_String_Free(&sclsnam); + CTA_String_Free(&smodinp); +} + +void CTA_Modbuild_par_worker_compute(){ + MPI_Status status; + CTA_Time timespan; + CTA_Model hmodel; + double span[2]; + + + if (IDEBUG>0) printf("#%d start CTA_Modbuild_par_worker_compute\n",CTA_PAR_MY_RANK); + + /* Receive the model handle */ + MPI_Recv(&hmodel, 1, MPI_INT, ctai_rank_master,1234, + CTA_COMM_MASTER_WORKER, &status); + + /* Receive the simulation timespan */ + CTA_Time_Create(×pan); + MPI_Recv(span, 2, MPI_DOUBLE, ctai_rank_master,1234, + CTA_COMM_MASTER_WORKER, &status); + CTA_Time_SetSpan(timespan, span[0], span[1]); + + /* Perform the compute */ + CTA_Model_Compute(hmodel,timespan); + + /* Free work variables */ + CTA_Time_Free(×pan); + + if (IDEBUG>0) printf("#%d end CTA_Modbuild_par_worker_compute\n",CTA_PAR_MY_RANK); + +} + + +void CTA_Modbuild_par_worker_setstate(int typeset){ + MPI_Status status; + CTA_Time timespan; + CTA_Model hmodel; + double span[2]; + CTA_TreeVector state; + + /* Receive the model handle */ + MPI_Recv(&hmodel, 1, MPI_INT, ctai_rank_master,1234, + CTA_COMM_MASTER_WORKER, &status); + + /* Receive the simulation timespan (Forcings) */ + timespan=CTA_NULL; + if (typeset==FORC) { + CTA_Time_Create(×pan); + MPI_Recv(span, 2, MPI_DOUBLE, ctai_rank_master,1234, + CTA_COMM_MASTER_WORKER, &status); + CTA_Time_SetSpan(timespan, span[0], span[1]); + } + + /* Receive the state */ + state=CTA_NULL; + modbuild_par_recvstate(&state, NULL, 1234); + + /* Perform the setstate */ + if (typeset==STATE){ + CTA_Model_SetState(hmodel,state); + } else if (typeset==FORC){ + CTA_Model_SetForc(hmodel,timespan,state); + } else if (typeset==PARAM){ + CTA_Model_SetParam(hmodel,state); + } else { + printf("Internal error in CTA_Modbuild_par_worker_setstate\n"); + exit(-1); + } + CTA_Time_Free(×pan); + + /* Free work variables */ + CTA_TreeVector_Free(&state,CTA_TRUE); +} + +void CTA_Modbuild_par_worker_getstate(int typeget){ + MPI_Status status; + CTA_Time timespan; + CTA_Model hmodel; + double span[2]; + int retval; + CTA_TreeVector state; + + /* Receive the model handle */ + retval=MPI_Recv(&hmodel, 1, MPI_INT, ctai_rank_master,1234, + CTA_COMM_MASTER_WORKER, &status); + + /* Receive the simulation timespan (Forcings) */ + timespan=CTA_NULL; + if (typeget==FORC) { + retval=CTA_Time_Create(×pan); + retval=MPI_Recv(span, 2, MPI_DOUBLE, ctai_rank_master,1234, + CTA_COMM_MASTER_WORKER, &status); + retval=CTA_Time_SetSpan(timespan, span[0], span[1]); + } + + /* Perform the getstate */ + state=CTA_NULL; + if (typeget==STATE) { + if (IDEBUG>0) printf("#%d Calling CTA_Model_GetState\n",CTA_PAR_MY_RANK); + retval=CTA_Model_GetState(hmodel,&state); + if (retval!=CTA_OK) { + printf("#%d CTA_Modbuild_par_worker_getstate: error (%d) calling CTA_Model_GetState\n",CTA_PAR_MY_RANK,retval ); + exit(-1); + } + } else if (typeget==FORC){ + retval=CTA_Model_GetForc(hmodel,timespan, &state); + } else if (typeget==PARAM){ + retval=CTA_Model_GetParam(hmodel,&state); + } else if (typeget==SCAL){ + retval=CTA_Model_GetStateScaling(hmodel,&state); + } else { + printf("#%d Internal error in CTA_Modbuild_par_worker_getstate\n",CTA_PAR_MY_RANK); + exit(-1); + } + + /* Send the state */ + + modbuild_par_sendstate(state, NULL, 1234); + + /* Free work variables */ + retval=CTA_TreeVector_Free(&state,CTA_TRUE); + retval=CTA_Time_Free(×pan); +} + +void CTA_Modbuild_par_worker_axpymodel(){ + MPI_Status status; + double alpha; + int OneWorker; + int retval; + CTA_Model hmodely, hmodelx; + int rank; + CTA_Pack himport; + int tag; + + tag=1234; + + /* Receive the model handle */ + retval=MPI_Recv(&hmodely, 1, MPI_INT, ctai_rank_master,tag, + CTA_COMM_MASTER_WORKER, &status); + + /* Receive alpha */ + retval=MPI_Recv(&alpha, 1, MPI_DOUBLE, ctai_rank_master,tag, + CTA_COMM_MASTER_WORKER, &status); + + /* Receive Oneworker option */ + retval=MPI_Recv(&OneWorker, 1, MPI_INT, ctai_rank_master,tag, + CTA_COMM_MASTER_WORKER, &status); + + if (OneWorker) { + /* Receive handle of other model */ + retval=MPI_Recv(&hmodelx, 1, MPI_INT, ctai_rank_master,tag, + CTA_COMM_MASTER_WORKER, &status); + + } else { + /* Receive rank of worker that will send the export */ + retval=MPI_Recv(&rank, 1, MPI_INT, ctai_rank_master,tag, + CTA_COMM_MASTER_WORKER, &status); + + /* Receive export */ + printf("ERROR SCRATCH MODEL DOES NOT WORK!\n"); + exit(-1); + hmodelx=hmodel_scratch; + retval = CTA_Pack_Create(CTA_PACK_DEFAULT_SIZE,&himport); + retval = receive_pack_object(rank,tag,FALSE, himport, CTA_COMM_MASTER_WORKER); + retval = CTA_Model_Import(hmodelx,himport); + retval = CTA_Pack_Free(&himport); + } + + retval=CTA_Model_AxpyState(hmodely,alpha,hmodelx); + + if (retval!=CTA_OK) { + printf("#%d CTA_Modbuild_par_worker_axpymodel: error (%d) calling CTA_Model_Axpy\n",CTA_PAR_MY_RANK,retval ); + exit(-1); + } +} + +void CTA_Modbuild_par_worker_axpy(int typeaxpy){ + MPI_Status status; + CTA_Time timespan; + CTA_Model hmodel; + CTA_TreeVector state; + double alpha; + double span[2]; + + /* Receive the model handle */ + MPI_Recv(&hmodel, 1, MPI_INT, ctai_rank_master,1234, + CTA_COMM_MASTER_WORKER, &status); + + /* Receive the simulation timespan (Forcings) */ + timespan=CTA_NULL; + if (typeaxpy==FORC) { + CTA_Time_Create(×pan); + MPI_Recv(span, 2, MPI_DOUBLE, ctai_rank_master,1234, + CTA_COMM_MASTER_WORKER, &status); + CTA_Time_SetSpan(timespan, span[0], span[1]); + } + + /* Receive alpha */ + MPI_Recv(&alpha, 1, MPI_DOUBLE, ctai_rank_master,1234, + CTA_COMM_MASTER_WORKER, &status); + + /* Receive the state */ + state=CTA_NULL; + modbuild_par_recvstate(&state, NULL, 1234); + + /* Perform the axpy */ + if (typeaxpy==STATE) { + CTA_Model_AxpyState(hmodel,alpha,state); + } else if (typeaxpy==FORC) { + CTA_Model_AxpyForc(hmodel,timespan,alpha,state); + } else if (typeaxpy==PARAM) { + CTA_Model_AxpyParam(hmodel,alpha,state); + } else { + printf("#%d Internal error in CTA_Modbuild_par_worker_axpy\n",CTA_PAR_MY_RANK); + exit(-1); + } + + /* Free work variables */ + CTA_TreeVector_Free(&state,CTA_TRUE); + CTA_Time_Free(×pan); +} + + +void CTA_Modbuild_par_worker_gettime(char *task){ + MPI_Status status; + CTA_Time timespan; + CTA_Model hmodel; + double span[2]; + int tag; + + tag=1234; + + /* Receive the model handle */ + MPI_Recv(&hmodel, 1, MPI_INT, ctai_rank_master,tag, + CTA_COMM_MASTER_WORKER, &status); + + timespan=CTA_NULL; + CTA_Time_Create(×pan); + + if (strcmp(task, "cta_model_getcurrenttime") ==0) { + CTA_Model_GetCurrentTime(hmodel,timespan); + } else if (strcmp(task, "cta_model_gettimehorizon") ==0) { + CTA_Model_GetTimeHorizon(hmodel,timespan); + } else { + printf("#%d CTA_Modbuild_par_worker_gettime: Unkown task '%s'",CTA_PAR_MY_RANK,task); + exit(-1); + } + + /* Sent timespan */ + CTA_Time_GetSpan(timespan, &span[0], &span[1]); + + MPI_Send(span, 2, MPI_DOUBLE, ctai_rank_master,tag, + CTA_COMM_MASTER_WORKER); + + /* Free workvars */ + CTA_Time_Free(×pan); +} + + +void CTA_Modbuild_par_worker_addnoise(){ + + MPI_Status status; + CTA_Time timespan; + CTA_Model hmodel; + double span[2]; + int tag; + + tag=1234; + + /* Receive the model handle */ + MPI_Recv(&hmodel, 1, MPI_INT, ctai_rank_master,tag, + CTA_COMM_MASTER_WORKER, &status); + + /* Receive the simulation timespan */ + timespan=CTA_NULL; + CTA_Time_Create(×pan); + MPI_Recv(span, 2, MPI_DOUBLE, ctai_rank_master,tag, + CTA_COMM_MASTER_WORKER, &status); + CTA_Time_SetSpan(timespan, span[0], span[1]); + + /* Perform the interpolation */ + CTA_Model_AddNoise(hmodel, timespan); + + /* Free workvars */ + CTA_Time_Free(×pan); +} + +void CTA_Modbuild_par_worker_savepersistentstate(){ + + MPI_Status status; + CTA_Model hmodel; + CTA_Pack hpacki, hpackf; + CTA_String hID, fname; + int tag, lenpack, dumval; + char *pack; + + if (IDEBUG>0) printf("#%d start CTA_Modbuild_par_worker_savepersistentstate\n",CTA_PAR_MY_RANK); + + tag=1234; + /* Receive the model handle */ + MPI_Recv(&hmodel, 1, MPI_INT, ctai_rank_master,tag, + CTA_COMM_MASTER_WORKER, &status); + + /* Receive instanceID */ + tag = 7321; + MPI_Recv(&lenpack, 1, MPI_INT, ctai_rank_master,tag, + CTA_COMM_MASTER_WORKER, &status); + CTA_Pack_Create(lenpack,&hpacki); + pack =CTA_Pack_GetPtr(hpacki); + MPI_Recv(pack, lenpack, MPI_CHAR, ctai_rank_master,tag, + CTA_COMM_MASTER_WORKER, &status); + CTA_Pack_AddCnt(hpacki,lenpack); + + /* Get string from pack object*/ + CTA_String_Create(&hID); + CTA_String_Import(hID,hpacki); + CTA_Pack_Free(&hpacki); + if (IDEBUG>0) { + char *ID = CTAI_String_GetPtr(hID); + printf("#%d worker SPS received instance ID = '%s'\n",CTA_PAR_MY_RANK,ID); + } + + tag = 3217; + /* Receive filename */ + MPI_Recv(&lenpack, 1, MPI_INT, ctai_rank_master,tag, + CTA_COMM_MASTER_WORKER, &status); + CTA_Pack_Create(lenpack,&hpackf); + pack =CTA_Pack_GetPtr(hpackf); + MPI_Recv(pack, lenpack, MPI_CHAR, ctai_rank_master,tag, + CTA_COMM_MASTER_WORKER, &status); + CTA_Pack_AddCnt(hpackf,lenpack); + + /* Get string from pack object*/ + CTA_String_Create(&fname); + CTA_String_Import(fname,hpackf); + CTA_Pack_Free(&hpackf); + if (IDEBUG>0) { + char *fstr = CTAI_String_GetPtr(fname); + printf("#%d worker SPS received filename = '%s'\n",CTA_PAR_MY_RANK,fstr); + } + + /* Write state to file for own domain*/ + CTA_Model_SavePersistentState(hmodel, fname, hID); + + /* Return CTA_OK for each worker process to prevent the master to start + zipping files before they are written */ + tag = 2173; + dumval = CTA_OK; + MPI_Send(&dumval, 1, MPI_INT, ctai_rank_master, tag, + CTA_COMM_MASTER_WORKER); +} + +void CTA_Modbuild_par_worker_loadpersistentstate(){ + + MPI_Status status; + CTA_Model hmodel; + CTA_Pack hpack; + CTA_Pack hpackf; + CTA_String hID, fname; + int tag, lenpack; + char *pack; + + tag=1234; + + /* Receive the model handle */ + MPI_Recv(&hmodel, 1, MPI_INT, ctai_rank_master,tag, + CTA_COMM_MASTER_WORKER, &status); + + if (IDEBUG>0) { + printf("#%d LPS received model handle = '%d'\n",CTA_PAR_MY_RANK,hmodel); + } + + tag = 3218; + /* Receive filename */ + MPI_Recv(&lenpack, 1, MPI_INT, ctai_rank_master,tag, + CTA_COMM_MASTER_WORKER, &status); + CTA_Pack_Create(lenpack,&hpackf); + pack =CTA_Pack_GetPtr(hpackf); + MPI_Recv(pack, lenpack, MPI_CHAR, ctai_rank_master,tag, + CTA_COMM_MASTER_WORKER, &status); + CTA_Pack_AddCnt(hpackf,lenpack); + + /* Get string from pack object*/ + CTA_String_Create(&fname); + CTA_String_Import(fname,hpackf); + CTA_Pack_Free(&hpackf); + if (IDEBUG>0) { + char *fstr = CTAI_String_GetPtr(fname); + printf("#%d worker LPS received filename = '%s'\n",CTA_PAR_MY_RANK,fstr); + } + + /* Read state from file for own domain*/ + CTA_String_Create(&hID); + CTA_Model_LoadPersistentState(hmodel, fname, &hID); + + /* Print the ID string for debugging reasons*/ + if (IDEBUG>0) { + char *select = CTAI_String_GetPtr(hID); + printf("#%d: Debugging: my instanceID is '%s'\n",CTA_PAR_MY_RANK, select); + } + + /* Send instance ID */ + CTA_Pack_Create(CTA_PACK_DEFAULT_SIZE,&hpack); + CTA_String_Export(hID,hpack); + + /* Send package: TODO: teach CTA_Pack this simple repetitive task */ + tag = 8321; + lenpack= CTA_Pack_GetLen(hpack); + pack=CTA_Pack_GetPtr(hpack); + MPI_Send(&lenpack, 1, MPI_INT, ctai_rank_master, tag, + CTA_COMM_MASTER_WORKER); + MPI_Send(pack, lenpack, MPI_CHAR,ctai_rank_master, tag, + CTA_COMM_MASTER_WORKER); + + /* Free local objects */ + CTA_Pack_Free(&hpack); +} + +void CTA_Modbuild_par_worker_saveinternalstate(){ + + MPI_Status status; + CTA_Model hmodel; + CTA_Pack hpack; + CTA_String hID; + int tag, lenpack; + char *pack; + + if (IDEBUG>0) printf("#%d start CTA_Modbuild_par_worker_saveinternalstate\n",CTA_PAR_MY_RANK); + + tag=1234; + /* Receive the model handle */ + MPI_Recv(&hmodel, 1, MPI_INT, ctai_rank_master,tag, + CTA_COMM_MASTER_WORKER, &status); + if (IDEBUG>0) printf("# %d modbuild_par_worker_saveinternalstate: receiving model handle %d\n",CTA_PAR_MY_RANK, hmodel); + + /* Save internal state for each worker process and get ID back*/ + CTA_String_Create(&hID); + CTA_Model_SaveInternalState(hmodel,&hID); + + /* Print the ID string for debugging reasons*/ + if (IDEBUG>0) { + char *select = CTAI_String_GetPtr(hID); + printf("#%d: Debugging: my instanceID is '%s'\n",CTA_PAR_MY_RANK, select); + } + + /* Send instance ID */ + CTA_Pack_Create(CTA_PACK_DEFAULT_SIZE,&hpack); + CTA_String_Export(hID,hpack); + + /* Send package: TODO: teach CTA_Pack this simple repetitive task */ + tag = 6321; + lenpack= CTA_Pack_GetLen(hpack); + pack=CTA_Pack_GetPtr(hpack); + MPI_Send(&lenpack, 1, MPI_INT, ctai_rank_master, tag, + CTA_COMM_MASTER_WORKER); + MPI_Send(pack, lenpack, MPI_CHAR,ctai_rank_master, tag, + CTA_COMM_MASTER_WORKER); + + /* Free local objects */ + CTA_Pack_Free(&hpack); + } + +void CTA_Modbuild_par_worker_restoreinternalstate(){ + + MPI_Status status; + CTA_Model hmodel; + CTA_String hID; + CTA_Pack hpack; + int tag, lenpack; + char *pack; + + if (IDEBUG>0) printf("#%d start CTA_Modbuild_par_worker_restoreinternalstate\n",CTA_PAR_MY_RANK); + + tag=1234; + /* Receive the model handle */ + MPI_Recv(&hmodel, 1, MPI_INT, ctai_rank_master,tag, + CTA_COMM_MASTER_WORKER, &status); + + /* Receive instanceID to be restored */ + tag = 5321; + MPI_Recv(&lenpack, 1, MPI_INT, ctai_rank_master,tag, + CTA_COMM_MASTER_WORKER, &status); + CTA_Pack_Create(lenpack,&hpack); + pack =CTA_Pack_GetPtr(hpack); + MPI_Recv(pack, lenpack, MPI_CHAR, ctai_rank_master,tag, + CTA_COMM_MASTER_WORKER, &status); + CTA_Pack_AddCnt(hpack,lenpack); + + /* Get string from pack object*/ + CTA_String_Create(&hID); + CTA_String_Import(hID,hpack); + if (IDEBUG>0) { + char *ID = CTAI_String_GetPtr(hID); + printf("#%d worker RIS received instance ID = '%s'\n",CTA_PAR_MY_RANK, ID); + } + /* Release the internal state */ + CTA_Model_RestoreInternalState(hmodel, hID); + + /* Free local objects */ + CTA_Pack_Free(&hpack); + CTA_String_Free(&hID); +} + +void CTA_Modbuild_par_worker_releaseinternalstate(){ + + MPI_Status status; + CTA_Model hmodel; + CTA_String hID; + CTA_Pack hpack; + int tag, lenpack; + char *pack; + + if (IDEBUG>0) printf("#%d start CTA_Modbuild_par_worker_releaseinternalstate\n",CTA_PAR_MY_RANK); + + tag=1234; + /* Receive the model handle */ + MPI_Recv(&hmodel, 1, MPI_INT, ctai_rank_master,tag, + CTA_COMM_MASTER_WORKER, &status); + + /* Receive instanceID to be released */ + tag = 9321; + MPI_Recv(&lenpack, 1, MPI_INT, ctai_rank_master,tag, + CTA_COMM_MASTER_WORKER, &status); + CTA_Pack_Create(lenpack,&hpack); + pack =CTA_Pack_GetPtr(hpack); + MPI_Recv(pack, lenpack, MPI_CHAR, ctai_rank_master,tag, + CTA_COMM_MASTER_WORKER, &status); + CTA_Pack_AddCnt(hpack,lenpack); + + /* Get string from pack object*/ + CTA_String_Create(&hID); + CTA_String_Import(hID,hpack); + if (IDEBUG>0) { + char *ID = CTAI_String_GetPtr(hID); + printf("#%d worker RLS received instance ID = '%s'\n",CTA_PAR_MY_RANK, ID); + } + /* Release the internal state */ + CTA_Model_ReleaseInternalState(hmodel, hID); + + /* Free local objects */ + CTA_Pack_Free(&hpack); + CTA_String_Free(&hID); +} + +void CTA_Modbuild_par_worker_free(){ + + MPI_Status status; + CTA_Model hmodel; + int tag; + + tag=1234; + + /* Receive the model handle */ + MPI_Recv(&hmodel, 1, MPI_INT, ctai_rank_master,tag, + CTA_COMM_MASTER_WORKER, &status); + + /* Free model */ + modbuild_models_delete(hmodel); + CTA_Model_Free(&hmodel); +} + +void CTA_Modbuild_par_worker_getnoisecount(){ + MPI_Status status; + CTA_Model hmodel; + int tag, retval, nnoise; + + tag=1234; + + /* Receive the model handle */ + retval=MPI_Recv(&hmodel, 1, MPI_INT, ctai_rank_master,tag, + CTA_COMM_MASTER_WORKER, &status); + + /* Get number of noise parameters */ + retval=CTA_Model_GetNoiseCount(hmodel, &nnoise); + if (retval!=CTA_OK) { + printf("#%d CTA_Modbuild_par_worker_getnoisecount: error (%d) calling CTA_Model_GetNoiseCount\n",CTA_PAR_MY_RANK,retval ); + exit(-1); + } + + /* Send number of noise parameters */ + retval=MPI_Send(&nnoise,1, MPI_INT,ctai_rank_master,1234, + CTA_COMM_MASTER_WORKER); + +} + +void CTA_Modbuild_par_worker_getnoisecovar(){ + MPI_Status status; + CTA_Model hmodel; + int tag, retval, nnoise, inoise; + CTA_TreeVector *hcovar; + tag=1234; + + /* Receive the model handle */ + retval=MPI_Recv(&hmodel, 1, MPI_INT, ctai_rank_master,tag, + CTA_COMM_MASTER_WORKER, &status); + + /* Get number of noise parameters */ + retval=CTA_Model_GetNoiseCount(hmodel, &nnoise); + if (retval!=CTA_OK) { + printf("#%d CTA_Modbuild_par_worker_getnoisecount: error (%d) calling CTA_Model_GetNoiseCount\n", + CTA_PAR_MY_RANK,retval ); + exit(-1); + } + + /* Send number of noise parameters */ + retval=MPI_Send(&nnoise,1, MPI_INT,ctai_rank_master,1234, + CTA_COMM_MASTER_WORKER); + + if (IDEBUG>0){printf ("#%d Number of noise parameters is %d",CTA_PAR_MY_RANK,nnoise);} + + /* Allocate workspace */ + hcovar=CTA_Malloc(nnoise*sizeof(CTA_TreeVector)); + for (inoise=0;inoise0) printf("CREATE THE CTA_OBSDESCR_TABLE1\n"); + + /* Create the observation description */ +// retval=CTA_ObsDescr_Create(CTA_OBSDESCR_TABLE,hpack, &hdescr_tab); +// retval=CTA_Pack_Free(&hpack); +// if (IDEBUG>0) printf("Send vector to master\n"); + + /* Create vector for holding values */ + retval=CTA_ObsDescr_Observation_Count(hdescr_tab, &nmeasr); + if (IDEBUG>0) printf("#%d Number of observations %d\n",CTA_PAR_MY_RANK,nmeasr ); + retval=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nmeasr, CTA_DOUBLE, CTA_NULL, &values); + + /* Perform the interpolation */ + retval=CTA_Model_GetObsValues(hmodel, timespan, hdescr_tab, values); + if (retval!=CTA_OK) { + printf("#%d CTA_Modbuild_par_worker_getobsvalues Error (%d) calling CTA_Model_GetObsValues \n",CTA_PAR_MY_RANK, retval); + exit(-1); + } + /* Pack the vector */ + lenpack=nmeasr*sizeof(double)+100; + retval=CTA_Pack_Create(lenpack,&hpack); + retval=CTA_Vector_Export(values, hpack); + + /* Send vector to master */ + lentask= CTA_Pack_GetLen(hpack); + pack = CTA_Pack_GetPtr(hpack); + + /* Send length of packed state */ + if (IDEBUG>0) printf("#%d Send vector to master (lentask)\n",CTA_PAR_MY_RANK); + retval=MPI_Send(&lentask, 1, MPI_INT, ctai_rank_master, tag, + CTA_COMM_MASTER_WORKER); + if (IDEBUG>0) printf("#%d Send vector to master (packed vector)\n",CTA_PAR_MY_RANK); + + /* Send packed state */ + retval=MPI_Send(pack, lentask, MPI_CHAR, ctai_rank_master, tag, + CTA_COMM_MASTER_WORKER); + if (IDEBUG>0) printf("#%d Done sending vector\n",CTA_PAR_MY_RANK); + + + /* Free workvars */ + retval=CTA_Pack_Free(&hpack); + retval=CTA_Time_Free(×pan); + retval=CTA_Vector_Free(&values); + retval=CTA_ObsDescr_Free(&hdescr_tab); + + +} + +/* ----------------------------------------- */ + + +void CTA_Modbuild_par_worker_getobslocalization(){ + + MPI_Status status; + CTA_Pack hpack; + CTA_ObsDescr hdescr_tab; + CTA_Model hmodel; + CTA_Vector values; + int lenpack, retval, nmeasr, tag; + char *pack; + CTA_Time timespan; + int iobs; + CTA_Handle hstate; + int locIDEBUG = 0; + double distance; + + tag=1234; + + /* Receive the model handle */ + retval=MPI_Recv(&hmodel, 1, MPI_INT, ctai_rank_master,tag, + CTA_COMM_MASTER_WORKER, &status); + + timespan=CTA_NULL; + hdescr_tab=CTA_NULL; + modbuild_par_recvobsdescr(×pan, &hdescr_tab, tag); + if (locIDEBUG>0) printf("#%d received obsdescr %d\n",CTA_PAR_MY_RANK,hdescr_tab ); + + /* Create vector for holding values */ + retval=CTA_ObsDescr_Observation_Count(hdescr_tab, &nmeasr); + if (locIDEBUG>0) printf("#%d Number of observations %d\n",CTA_PAR_MY_RANK,nmeasr ); + + retval=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nmeasr, CTA_HANDLE, CTA_NULL, &values); + + /* receive distance */ + if (IDEBUG>0) {printf("#%d Receiving characteristic distance from master\n",CTA_PAR_MY_RANK);} + retval=MPI_Recv(&distance, 1, MPI_DOUBLE, ctai_rank_master,tag, + CTA_COMM_MASTER_WORKER, &status); + + + /* Receive the vector with handles */ + if (locIDEBUG>0) {printf("#%d Receiving lenpack from master\n",CTA_PAR_MY_RANK);} + retval=MPI_Recv(&lenpack, 1, MPI_INT, ctai_rank_master,tag, + CTA_COMM_MASTER_WORKER, &status); + if (locIDEBUG>0) {printf("#%d vector locvals lenpack =%d\n",CTA_PAR_MY_RANK, lenpack);} + + if (lenpack==0){ + // free something? + } + else { + retval=CTA_Pack_Create(lenpack,&hpack); + pack =CTA_Pack_GetPtr(hpack); + + if (locIDEBUG>0) {printf("#%d Receiving packed locvals from master\n",CTA_PAR_MY_RANK);} + retval=MPI_Recv(pack, lenpack, MPI_CHAR, ctai_rank_master,tag, + CTA_COMM_MASTER_WORKER, &status); + if (IDEBUG>0) {printf("#%d Done Receiving packed locvals from master\n",CTA_PAR_MY_RANK); } + + retval=CTA_Pack_AddCnt(hpack,lenpack); + + /* Create the vector */ + retval = CTA_Vector_Import(values,hpack); + if (IDEBUG>0) {printf("#%d Done Creating vector %d\n",CTA_PAR_MY_RANK, retval);} + retval=CTA_Pack_Free(&hpack); + } + + if (locIDEBUG>0) printf("#%d now obtained null vector from master %d\n",CTA_PAR_MY_RANK,values ); + + /* Perform the interpolation */ + retval=CTA_Model_GetObsLocalization(hmodel, hdescr_tab, distance, values); + if (retval!=CTA_OK) { + printf("#%d CTA_Modbuild_par_worker_getobslocalization Error (%d) calling CTA_Model_GetOblocalization \n",CTA_PAR_MY_RANK, retval); + exit(-1); + } + + /* The vector cannot be packed since the state handles are only known locally. Therefore, + we have to pack and send the corresponding states. We do this for each observation.*/ + for (iobs=0;iobs0) printf("#%d sending localization tv (handle %d) of obs %d\n",CTA_PAR_MY_RANK,hstate,iobs+1 ); + modbuild_par_sendstate(hstate, NULL, 1234); + } + + + /* Free workvars */ + retval=CTA_Vector_Free(&values); + retval=CTA_ObsDescr_Free(&hdescr_tab); + retval=CTA_Time_Free(×pan); +} + + +void CTA_Modbuild_par_worker_getobsselect(){ + + MPI_Status status; + CTA_Time timespan; + CTA_String sselect; + CTA_ObsDescr hdescr_tab; + CTA_Model hmodel; + int lenstr, tag; + int retval_method; + char *select; + + tag=1234; + + /* Receive the model handle */ + MPI_Recv(&hmodel, 1, MPI_INT, ctai_rank_master,tag, + CTA_COMM_MASTER_WORKER, &status); + + timespan=CTA_NULL; + hdescr_tab=CTA_NULL; + modbuild_par_recvobsdescr(×pan, &hdescr_tab, tag); + + /* Call obsselect */ + CTA_String_Create(&sselect); + retval_method=CTA_Model_GetObsSelect(hmodel,timespan,hdescr_tab,sselect); + + /* Send the error code back to the master */ + MPI_Send(&retval_method,1, MPI_INT,ctai_rank_master,tag, CTA_COMM_MASTER_WORKER); + + /* Send the select string */ + select=CTAI_String_GetPtr(sselect); + if (IDEBUG>0) printf("#%d CTA_Modbuild_par_worker_getobsselect: My @@ selection is '%s'\n",CTA_PAR_MY_RANK, select); + lenstr=strlen(select)+1; + MPI_Send(select,lenstr, MPI_CHAR,ctai_rank_master,tag, + CTA_COMM_MASTER_WORKER); + + /* Free workvars */ + CTA_String_Free(&sselect); + CTA_ObsDescr_Free(&hdescr_tab); + CTA_Time_Free(×pan); +} + +void CTA_Modbuild_par_worker(){ + MPI_Status status; + char task[256]; + + /* Note we have to set our own random-seed */ + CTA_INITIAL_RANDOM_SEED=CTA_INITIAL_RANDOM_SEED+ctai_rank; + if (IDEBUG>0) printf("#%d Set initial random seed: %ld\n", CTA_PAR_MY_RANK, CTA_INITIAL_RANDOM_SEED); + CTA_rand_seed((long) CTA_INITIAL_RANDOM_SEED); + + if (IDEBUG>0) printf("#%d I am the listening process of worker %d\n", CTA_PAR_MY_RANK, ctai_rank); + for (;1;){ + if (IDEBUG>0) printf("#%d waiting for message of master...\n",CTA_PAR_MY_RANK); + + MPI_Recv(task,256,MPI_CHAR, ctai_rank_master, 1234, + CTA_COMM_MASTER_WORKER ,&status); + + + if (IDEBUG>0) {printf("%d My task is: %s \n",CTA_PAR_MY_RANK,task);} + + if (strcmp(task, "cta_model_create") ==0) { + CTA_Modbuild_par_worker_create(); + } else if (strcmp(task, "cta_model_compute") ==0) { + CTA_Modbuild_par_worker_compute(); + } else if (strcmp(task, "cta_model_setstate") ==0) { + CTA_Modbuild_par_worker_setstate(STATE); + } else if (strcmp(task, "cta_model_getstate") ==0) { + CTA_Modbuild_par_worker_getstate(STATE); + } else if (strcmp(task, "cta_model_axpystate") ==0) { + CTA_Modbuild_par_worker_axpy(STATE); + } else if (strcmp(task, "cta_model_axpymodel") ==0) { + CTA_Modbuild_par_worker_axpymodel(); + } else if (strcmp(task, "cta_model_setforc") ==0) { + CTA_Modbuild_par_worker_setstate(FORC); + } else if (strcmp(task, "cta_model_getforc") ==0) { + CTA_Modbuild_par_worker_getstate(FORC); + } else if (strcmp(task, "cta_model_getstatescaling")==0) { + CTA_Modbuild_par_worker_getstate(SCAL); + } else if (strcmp(task, "cta_model_axpyforc") ==0) { + CTA_Modbuild_par_worker_axpy(FORC); + } else if (strcmp(task, "cta_model_getnoisecount") ==0) { + CTA_Modbuild_par_worker_getnoisecount(); + } else if (strcmp(task, "cta_model_getnoisecovar") ==0) { + CTA_Modbuild_par_worker_getnoisecovar(); + } else if (strcmp(task, "cta_model_getobsselect") ==0) { + CTA_Modbuild_par_worker_getobsselect(); + } else if (strcmp(task, "cta_model_addnoise") ==0) { + CTA_Modbuild_par_worker_addnoise(); + } else if (strcmp(task, "cta_model_export") ==0) { + CTA_Modbuild_par_worker_export(); + } else if (strcmp(task, "cta_model_import") ==0) { + CTA_Modbuild_par_worker_import(); + } else if (strcmp(task, "cta_model_getobsvalues") ==0) { + CTA_Modbuild_par_worker_getobsvalues(); + } else if (strcmp(task, "cta_model_getobslocalization") ==0) { + CTA_Modbuild_par_worker_getobslocalization(); + } else if (strcmp(task, "cta_model_getcurrenttime") ==0) { + CTA_Modbuild_par_worker_gettime(task); + } else if (strcmp(task, "cta_model_gettimehorizon") ==0) { + CTA_Modbuild_par_worker_gettime(task); + } else if (strcmp(task, "cta_model_free") ==0) { + CTA_Modbuild_par_worker_free(); + } else if (strcmp(task, "cta_model_savepersistentstate") ==0) { + CTA_Modbuild_par_worker_savepersistentstate(); + } else if (strcmp(task, "cta_model_loadpersistentstate") ==0) { + CTA_Modbuild_par_worker_loadpersistentstate(); + } else if (strcmp(task, "cta_model_saveinternalstate") ==0) { + CTA_Modbuild_par_worker_saveinternalstate(); + } else if (strcmp(task, "cta_model_restoreinternalstate") ==0) { + CTA_Modbuild_par_worker_restoreinternalstate(); + } else if (strcmp(task, "cta_model_releaseinternalstate") ==0) { + CTA_Modbuild_par_worker_releaseinternalstate(); + } else if (strcmp(task, "cta_model_announceobsvalues") ==0) { + CTA_Modbuild_par_worker_announceobsvalues(); + } else if (strcmp(task, "finalize") ==0) { + if (IDEBUG>0) printf("Task is finalize\n"); + // Free all models (in order to finalize workers as well) + modbuild_models_delete_all_models(); + MPI_Finalize(); + exit(0); + } else { + printf("#%d Unkown task '%s'",CTA_PAR_MY_RANK,task); + exit(-1); + } + } + printf("#%d CTA_Modbuild_par_worker: Internal error\n",CTA_PAR_MY_RANK); + exit(-1); +} + +#endif +void CTA_Modbuild_par_Finalize(){ +#if USE_MPI + + /* Send Task */ + char *task; + int lentask, irank; + int tag=1234; + int comSize; + MPI_Comm comm; + int is_intercomm; + int indx; + int isInit; + int rank_first; + + + if (IDEBUG>0) printf("#%d CTA_Modbuild_par_Finalize\n",CTA_PAR_MY_RANK); + // Loop over all communicators + task="finalize"; + lentask=strlen(task)+1; + + comm=MPI_COMM_WORLD; + for (indx=0; comm!=MPI_COMM_NULL; indx++){ + CTA_Par_GetAllCommByIndex(indx, &comm); + if (comm!=MPI_COMM_NULL){ + /* Determine processes to send finalize task + But first we have to check the kind of communicater + */ + MPI_Comm_test_inter(comm, &is_intercomm); + if (is_intercomm){ + rank_first=0; + MPI_Comm_remote_size(comm,&comSize); + } + else { + rank_first=1; + MPI_Comm_size(comm,&comSize); + } + + if (IDEBUG>0) printf("Size of communication group :%d\n", comSize); + if (IDEBUG>0) printf("Communicator is inter comm :%d\n",is_intercomm); + for (irank=rank_first;irank0) printf("Sending finalize to process %d\n", irank); + MPI_Send(task,lentask, MPI_CHAR, irank,tag, + comm); + } + } + } + + // Only finalize when mpi is initialized + if (IDEBUG>0) printf("#%d CTA_Modbuild_par_Finalize calling MPI_Finalize\n",CTA_PAR_MY_RANK); + MPI_Initialized(&isInit); + if (isInit){ + MPI_Finalize(); + } + exit(0); +#endif +} + + + +void CTA_Modbuild_par_CreateClass(CTA_ModelClass *modelcls){ + +#ifdef USE_MPI + + int ierr; //COSTA return value + int aierr[CTA_MODEL_NUMFUNC]; + int i; + CTA_Func hfunc[CTA_MODEL_NUMFUNC]; + CTA_Intf intf; + + + /* Calculate number of nodes */ + MPI_Comm_size(CTA_COMM_WORLD,&ctai_comm_size); + + /* Determine whether you are master or worker */ + MPI_Comm_rank(CTA_COMM_WORLD,&ctai_rank); + + if (CTA_FILTER_PROCESS) { + if (IDEBUG>0) printf("#%d I'm a filter process; my rank is %d\n",CTA_PAR_MY_RANK, ctai_rank); + } else { + /* I am a worker starting to wait for new tasks */ + if (IDEBUG>0) printf("#%d I'm a worker my rank is %d\n",CTA_PAR_MY_RANK, ctai_rank); + CTA_Modbuild_par_worker(); + } + + // Create a COSTA model component from my own implementation + intf=CTA_NULL; + for (i=0; i +#include "cta_mem.h" +#include "cta.h" +#include "ctai.h" +#include "f_cta_utils.h" +#include "cta_model_utilities.h" +#include "cta_util_statistics.h" +#include "cta_metainfo.h" +#include "cta_message.h" + +#define INDX_THIS ( 0) /* Handle of instance */ +#define INDX_TIME ( 1) /* Time instance of model (state) */ +#define INDX_TIMEHORIZON ( 2) /* Time instance of model (state) */ +#define INDX_STATE ( 3) /* State vector of model */ +#define INDX_ADD_NOISE_SPAN ( 4) +#define INDX_AXPY_FORC_SPAN ( 5) /* Timespan to add given offset to forcings */ +#define INDX_AXPY_FORC ( 6) /* offset to be added to model forcings */ +#define INDX_PARAM ( 7) /* State vector of model parameters */ +#define INDX_NAME_NOISE ( 8) +#define INDX_NNOISE ( 9) +#define INDX_ZERO_FORC (10) /* zero noise vector */ +#define INDX_USRDATA (11) +#define INDX_USRFUNC_CREATE (12) +#define INDX_USRFUNC_COVAR (13) +#define INDX_USRFUNC_OBS (14) +#define INDX_USRFUNC_OBSSEL (15) +#define INDX_USRFUNC_COMPUTE (16) +#define INDX_USRFUNC_FREE (17) +#define INDX_FORC (18) /* State vector of forcings */ +#define INDX_ADJ_SPAN (19) /* Time span to prepaire adjoint for */ +#define INDX_ADJ_DESCR_FORC (20) /* Observation description describing the forcings of the adjoint */ +#define INDX_ADJ_VFORC (21) /* Vector The forcings of the adjoint run that have been set */ +#define SIZE_DATABLK (22) + +#define IDEBUG (0) +#define CLASSNAME "CTA_Modbuild_sp" + +void modbuild_sp_create_size(CTA_Handle userdata, int *memsize, int *ierr){ + /* Create size for datablock holding: + 0 CTA_Model: mthis Handle of this model + 1 CTA_Time : time Time instance of model + 2 CTA_TreeVector: state Model state + 3 CTA_TreeVector: snoise Noise offset state-vector + 4 CTA_TreeVector: sparam Parameter state-vector + 5 CTA_Handle: tusrmod User data for model + 6... CTA_Func: func User functions + */ + *memsize=sizeof(CTA_Handle)*(SIZE_DATABLK); +} + +/** \brief Get a user function as specified in the input-tree + * \note internal function + * + * \param tinput I input-configuration tree + * \param path I path in input tree to user function + * \param func O handle to user function + * \return CTA_OK if successful + */ +#undef METHOD +#define METHOD "get_user_function" +int modbuild_sp_getusrfunction(CTA_Tree tinput, char *path, CTA_Func *func){ + CTA_Handle hfunc; + CTA_Func func_wrk; + CTA_Datatype datatype; + int retval; + + retval=CTA_Tree_GetHandleStr(tinput,path, &hfunc); + if (retval==CTA_OK) { + CTA_Handle_GetDatatype(hfunc, &datatype); + if (datatype==CTA_STRING) { + retval=CTA_Handle_Find(hfunc, CTA_FUNCTION, &func_wrk); + if (retval!=CTA_OK){ + char message[1024]; + sprintf(message,"Cannot find the function that is specified in %s \n",path); + CTA_WRITE_INFO(message); + return retval; + } + retval=CTA_Func_Duplicate(func_wrk,func); + if (retval!=CTA_OK) return retval; + } else if (datatype==CTA_FUNCTION) { + retval=CTA_Func_Duplicate(hfunc,func); + if (retval!=CTA_OK) return retval; + } else { + char message[1024]; + sprintf(message,"Specification in \"%s\" is neither CTA_STRING nor CTA_FUNC \n",path); + CTA_WRITE_ERROR(message); + return CTA_INCOMPATIBLE_HANDLE; + } + } else { + char message[1024]; + sprintf(message,"Path \"%s\" is not specified in input\n",path); + CTA_WRITE_INFO(message); + } + return CTA_OK; +} + +#undef METHOD +#define METHOD "Create" +void modbuild_sp_create_init(CTA_Handle *this, CTA_Handle *data, CTA_Handle *hinput, int *ierr){ + int cleanup; /*Flag indicating whether tinput must be freed */ + CTA_Func fusrcreate; /* Function handle of user implementation */ + CTA_Function *usrcreate; /* Function pointer of user implementation */ + CTA_Tree tinput; /* Input configuration tree */ + int retval; /* return status of COSTA method */ + CTA_String funcar1; /* Name of ar(1)-function */ + CTA_Handle hmodelinput; /* Handle to model specific input */ + CTA_Handle hspecialinput; /* Handle to special model input */ + double dzero; /* double with value 0.0 */ + double tstart, tend; /* start and end time of time horizon of model */ + + if (IDEBUG) printf("Start of modbuild_sp_create_init\n"); + + dzero=0.0; + + + /* Convert input into a COSTA-tree if it is not done already */ + *ierr=CTA_Model_Util_InputTree(*hinput, &tinput, &cleanup); + if (*ierr!=CTA_OK) return; + + /* Initialise DATA */ + + data[INDX_THIS] =*this; + data[INDX_STATE] =CTA_NULL; + data[INDX_AXPY_FORC] =CTA_NULL; + data[INDX_PARAM] =CTA_NULL; + data[INDX_NNOISE] =0; + data[INDX_ZERO_FORC] =CTA_NULL; + data[INDX_USRDATA] =CTA_NULL; + data[INDX_USRFUNC_CREATE] =CTA_NULL; + data[INDX_USRFUNC_COVAR] =CTA_NULL; + data[INDX_USRFUNC_OBS] =CTA_NULL; + data[INDX_USRFUNC_OBSSEL] =CTA_NULL; + data[INDX_USRFUNC_COMPUTE] =CTA_NULL; + data[INDX_USRFUNC_FREE] =CTA_NULL; + + + *ierr=CTA_Time_Create(&data[INDX_TIME]); + *ierr=CTA_Time_Create(&data[INDX_TIMEHORIZON]); + *ierr=CTA_Time_Create(&data[INDX_ADD_NOISE_SPAN]); + *ierr=CTA_Time_Create(&data[INDX_AXPY_FORC_SPAN]); + *ierr=CTA_String_Create(&data[INDX_NAME_NOISE]); + *ierr=CTA_Time_Create(&data[INDX_ADJ_SPAN]); + + /* First, check for a special 'model builder'-model: the AR(1) process model */ + + retval=CTA_Tree_GetHandleStr(tinput,"/modelbuild_sp/special_ar1", + &hspecialinput); + + if (retval ==CTA_OK) { + /* skip the remainder and provide the special ar(1) model */ + /* define the following functions directly: + modbuild_sp_ar1_compute + modbuild_sp_ar1_covar + + */ + *ierr = CTA_String_Create(&funcar1); + + *ierr = CTA_String_Set(funcar1, "modbuild_sp_ar1_create"); + *ierr=CTA_Handle_Find(funcar1, CTA_FUNCTION, &data[INDX_USRFUNC_CREATE]); + *ierr = CTA_String_Set(funcar1, "modbuild_sp_ar1_compute"); + *ierr=CTA_Handle_Find(funcar1, CTA_FUNCTION, &data[INDX_USRFUNC_COMPUTE]); + *ierr = CTA_String_Set(funcar1, "modbuild_sp_ar1_covar"); + *ierr=CTA_Handle_Find(funcar1, CTA_FUNCTION, &data[INDX_USRFUNC_COVAR]); + *ierr = CTA_String_Set(funcar1, "modbuild_sp_ar1_getobsval"); + *ierr=CTA_Handle_Find(funcar1, CTA_FUNCTION, &data[INDX_USRFUNC_OBS]); + + retval=CTA_Tree_GetHandleStr(tinput,"/modelbuild_sp/special_ar1", &hmodelinput); + + + } + else { + /* Get the provided function names from the input and store the + corresponding function handles in data-array */ + retval=modbuild_sp_getusrfunction(tinput, "/modelbuild_sp/functions/create", &data[INDX_USRFUNC_CREATE]); + retval=modbuild_sp_getusrfunction(tinput,"/modelbuild_sp/functions/covariance", &data[INDX_USRFUNC_COVAR]); + retval=modbuild_sp_getusrfunction(tinput, "/modelbuild_sp/functions/getobsvals", &data[INDX_USRFUNC_OBS]); + retval=modbuild_sp_getusrfunction(tinput, "/modelbuild_sp/functions/getobssel", &data[INDX_USRFUNC_OBSSEL]); + retval=modbuild_sp_getusrfunction(tinput, "/modelbuild_sp/functions/compute", &data[INDX_USRFUNC_COMPUTE]); + retval=modbuild_sp_getusrfunction(tinput, "/modelbuild_sp/functions/free", &data[INDX_USRFUNC_FREE]); + + + if (IDEBUG>0) { + printf("Modbuild_sp: the following function handles have been created:\n"); + printf("create: %d \n",data[INDX_USRFUNC_CREATE]); + printf("covariance: %d \n",data[INDX_USRFUNC_COVAR]); + printf("getobsvals: %d \n",data[INDX_USRFUNC_OBS]); + printf("getobssel: %d \n",data[INDX_USRFUNC_OBSSEL]); + printf("compute: %d \n",data[INDX_USRFUNC_COMPUTE]); + printf("free: %d \n",data[INDX_USRFUNC_FREE]); + printf("end message.\n"); + } + + /* Get model input from the input tree */ + retval=CTA_Tree_GetHandleStr(tinput,"/modelbuild_sp/model", &hmodelinput); + + } // end if: if ar1 then use standard function else read from xml-file + + /* Call the user supplied create function */ + fusrcreate=data[INDX_USRFUNC_CREATE]; + // printf("indx_usrfunc_create %d\n",INDX_USRFUNC_CREATE); + if (fusrcreate==CTA_NULL) { + CTA_WRITE_ERROR("Usrfunc_create function does not exist! \n"); + *ierr=CTA_NOT_IMPLEMENTED; + return; + } + + /* Get function pointer */ + *ierr=CTA_Func_GetFunc(fusrcreate,&usrcreate); + + if (*ierr!=CTA_OK) return; + + /* CALL USR_CREATE(CTA_Handle *hmodelinput, CTA_TreeVector *state, + CTA_TreeVector *sbound, CTA_TreeVector *sparam, int *nnoise, + CTA_Time *time0, CTA_String *snamnoise, + CTA_Handle *husrdata, int* ierr) */ + + /* Get input of the model itself */ + + /* call user function */ + if (IDEBUG) printf("Calling user function 'usrcreate'\n"); + usrcreate(&hmodelinput, &data[INDX_STATE], &data[INDX_AXPY_FORC], + &data[INDX_PARAM], &data[INDX_NNOISE], &data[INDX_TIMEHORIZON], + &data[INDX_NAME_NOISE], &data[INDX_USRDATA], ierr); + if (IDEBUG) printf("End of user function 'usrcreate'\n"); + if (*ierr!=CTA_OK) return; + + /* Set time (Start of the time horizon) */ + *ierr=CTA_Time_GetSpan(data[INDX_TIMEHORIZON],&tstart, &tend); + if (*ierr!=CTA_OK) return; + *ierr=CTA_Time_SetSpan(data[INDX_TIME], tstart, tend); + if (*ierr!=CTA_OK) return; + + /* Some additional initialisation: */ + //printf(" additional initialisation \n"); + + /* Set forcings axpy state vectors */ + if (data[INDX_AXPY_FORC]!=CTA_NULL) { + *ierr=CTA_TreeVector_SetConstant(data[INDX_AXPY_FORC], &dzero,CTA_DOUBLE); + if (*ierr!=CTA_OK) return; + *ierr=CTA_TreeVector_Duplicate(data[INDX_AXPY_FORC],&data[INDX_ZERO_FORC]); + if (*ierr!=CTA_OK) return; + } + + *ierr=CTA_Time_SetSpan(data[INDX_ADD_NOISE_SPAN],0.0,-1.0); /* empty span */ + if (*ierr!=CTA_OK) return; + *ierr=CTA_Time_SetSpan(data[INDX_AXPY_FORC_SPAN],0.0,-1.0); /* empty span */ + if (*ierr!=CTA_OK) return; + *ierr=CTA_Time_SetSpan(data[INDX_ADJ_SPAN],0.0,-1.0); /* empty span */ + if (*ierr!=CTA_OK) return; + + if (IDEBUG) printf("before cleaning up the input tree\n"); + + + /* Clean-up input tree */ + if (cleanup==CTA_TRUE){ + *ierr=CTA_Tree_Free(&tinput); + } + *ierr=CTA_OK; + if (IDEBUG) printf(" end of create_init \n"); +} + +void modbuild_sp_export(CTA_Handle *data, CTA_Handle *hexport, int *ierr){ + + CTA_Datatype datatype; + + /* Get type of handle of hexport */ + *ierr=CTA_Handle_GetDatatype(*hexport, &datatype); + if (*ierr != CTA_OK) return; + + /* Export header */ + if (datatype==CTA_PACK){ + CTA_Pack_Add(*hexport,data,sizeof(CTA_HANDLE)*SIZE_DATABLK); + } + else if (datatype==CTA_FILE){ + *ierr=CTA_FORMAT_NOT_SUPPORTED; + return; + } + else { + *ierr=CTA_FORMAT_NOT_SUPPORTED; + return; + } + + /* Pack current time */ + if (data[INDX_TIME]!=CTA_NULL){ + CTA_Time_Export(data[INDX_TIME], *hexport); + } + + /* Pack time horizon */ + if (data[INDX_TIMEHORIZON]!=CTA_NULL){ + CTA_Time_Export(data[INDX_TIMEHORIZON], *hexport); + } + + + /* Pack state-vector */ + if (data[INDX_STATE]!=CTA_NULL){ + CTA_TreeVector_Export(data[INDX_STATE], *hexport); + } + /* Pack noise span */ + if (data[INDX_ADD_NOISE_SPAN]!=CTA_NULL){ + CTA_Time_Export(data[INDX_ADD_NOISE_SPAN], *hexport); + } + /* Pack forcings span */ + if (data[INDX_AXPY_FORC_SPAN]!=CTA_NULL){ + CTA_Time_Export(data[INDX_AXPY_FORC_SPAN], *hexport); + } + + /* Pack adjoint span */ + if (data[INDX_ADJ_SPAN]!=CTA_NULL){ + CTA_Time_Export(data[INDX_ADJ_SPAN], *hexport); + } + + /* Pack forcings addition */ + if (data[INDX_AXPY_FORC]!=CTA_NULL){ + CTA_TreeVector_Export(data[INDX_AXPY_FORC], *hexport); + } + + /* Pack model parameters */ + if (data[INDX_PARAM]!=CTA_NULL){ + CTA_TreeVector_Export(data[INDX_PARAM], *hexport); + } +} + + +void modbuild_sp_import(CTA_Handle *data, CTA_Handle *himport, int *ierr){ + + CTA_Handle header[SIZE_DATABLK]; + CTA_Datatype datatype; + + /* Get type of handle of hexport */ + *ierr=CTA_Handle_GetDatatype(*himport, &datatype); + if (*ierr != CTA_OK) return; + + /* Import header */ + if (datatype==CTA_PACK){ + CTA_Pack_Get(*himport,header,sizeof(CTA_HANDLE)*SIZE_DATABLK); + } + else if (datatype==CTA_FILE){ + *ierr=CTA_FORMAT_NOT_SUPPORTED; + return; + } + else { + *ierr=CTA_FORMAT_NOT_SUPPORTED; + return; + } + + /* Unpack current time */ + if (header[INDX_TIME]!=CTA_NULL){ + CTA_Time_Import(data[INDX_TIME], *himport); + } + + /* Unpack time horizon */ + if (header[INDX_TIMEHORIZON]!=CTA_NULL){ + CTA_Time_Import(data[INDX_TIMEHORIZON], *himport); + } + + /* Unpack state-vector */ + if (header[INDX_STATE]!=CTA_NULL){ + CTA_TreeVector_Import(data[INDX_STATE], *himport); + } + + /* Unpack noise span */ + if (header[INDX_ADD_NOISE_SPAN]!=CTA_NULL){ + CTA_Time_Import(data[INDX_ADD_NOISE_SPAN], *himport); + } + + /* Unpack forcings span */ + if (header[INDX_AXPY_FORC_SPAN]!=CTA_NULL){ + CTA_Time_Import(data[INDX_AXPY_FORC_SPAN], *himport); + } + + /* Unpack adjoint span */ + if (header[INDX_ADJ_SPAN]!=CTA_NULL){ + CTA_Time_Import(data[INDX_ADJ_SPAN], *himport); + } + + /* Unpack forcings addition */ + if (header[INDX_AXPY_FORC]!=CTA_NULL){ + CTA_TreeVector_Import(data[INDX_AXPY_FORC], *himport); + } + + /* Unpack model parameters */ + if (header[INDX_PARAM]!=CTA_NULL){ + CTA_TreeVector_Import(data[INDX_PARAM], *himport); + } +} + + +void modbuild_sp_adjcompute(CTA_Handle *data ,int* time, int *ierr){ + *ierr=CTA_NOT_IMPLEMENTED; +} + +void modbuild_sp_adjprepare(CTA_Handle *data ,int* time, int *ierr){ + /* do we already have a add noise timespan? */ + if (data[INDX_ADJ_SPAN]==CTA_NULL) { + *ierr=CTA_Time_Create(&data[INDX_ADJ_SPAN]); + if (*ierr!=CTA_OK) return; + } + /* Just copy time span */ + *ierr=CTA_Time_Copy(*time,data[INDX_ADJ_SPAN]); +} + + +void modbuild_sp_adjsetforc(CTA_Handle *data ,int* hdescr, int *vforc, int *ierr){ + int nmeasr; + CTA_Vector vforc_temp; + + *ierr=CTA_ObsDescr_Observation_Count(*hdescr, &nmeasr); + if (*ierr!=CTA_OK) return; + if (nmeasr<0) return; + + /* Create work variables */ + *ierr=CTA_Vector_Duplicate(*vforc, &vforc_temp); + if (*ierr!=CTA_OK) return; + + /* TODO WE HAVE TO COPY THE OBJECT HOMEWORK FOR NILS FOR TIME BEEING HOPE THAT OBJECT IS NOT DELETED AND ONLY SAVE HANDLE!!!!!! */ + if (data[INDX_ADJ_DESCR_FORC]!=CTA_NULL){ + *ierr=CTA_Vector_Create(CTA_DEFAULT_VECTOR, 1, CTA_HANDLE, CTA_NULL, &data[INDX_ADJ_DESCR_FORC]); + *ierr=CTA_Vector_SetVal(data[INDX_ADJ_DESCR_FORC], 1, hdescr, CTA_HANDLE); + + *ierr=CTA_Vector_Duplicate(*vforc, &data[INDX_ADJ_VFORC]); + } + *ierr=CTA_Vector_AppendVal(data[INDX_ADJ_DESCR_FORC], hdescr, CTA_HANDLE); + + *ierr=CTA_Vector_AppendVal(data[INDX_ADJ_VFORC], &vforc_temp, CTA_HANDLE); +} + + +void modbuild_sp_free(CTA_Handle *data ,int *ierr){ + + CTA_Func fusrfree; /* Function handle of user implementation */ + CTA_Function *usrfree; /* Function pointer of user implementation */ + CTA_Handle husrdata; /* Handle to user data */ + + fusrfree = data[INDX_USRFUNC_FREE]; + husrdata = data[INDX_USRDATA]; + + if (fusrfree!=CTA_NULL){ + /* Get function pointer */ + *ierr=CTA_Func_GetFunc(fusrfree,&usrfree); + if (*ierr!=CTA_OK) return; + usrfree(&husrdata, ierr); + } + *ierr=CTA_OK; +} + +#undef METHOD +#define METHOD "Compute" +void modbuild_sp_compute( + CTA_Handle *data, + CTA_Time *timSim, /* current time, "now" */ + int *ierr) +{ + CTA_TreeVector state; /* state vector of model */ + CTA_TreeVector sparam; /* state vector of model parameters */ + CTA_TreeVector saxpyforc; /* state vector of added forcings */ + CTA_Func fusrcompute; /* Function handle of user implementation */ + CTA_Function *usrcompute; /* Function pointer of user implementation */ + CTA_Handle husrdata; /* Handle to model-spefic data */ + CTA_Time timemod; /* Current time instance of model */ + CTA_Time timeaddnoise; /* Time instance for model to add noise */ + CTA_Time timeaxpy; /* Timespan to apply given noise axpy */ + double t1, t2, t1Sim, tDum; /* start and end of simulation timespan */ + BOOL inspan; /* simulation timespan in inside given span */ + BOOL baddnoise; /* Model must add its own noise or not */ + CTA_Time timespan; /* Timespan of computations */ + int isSpan; /* Flag indicating whether timSim is a timespan */ + double eps; /* Tollerence for comparing time instances */ + + + fusrcompute = data[INDX_USRFUNC_COMPUTE]; + + /* check whether we have an implementation and number of noise + parameters is >0 */ + if (fusrcompute==CTA_NULL) { + CTA_WRITE_ERROR("user compute function does not exist \n"); + *ierr=CTA_NOT_IMPLEMENTED; + return; + } + + state = data[INDX_STATE]; + timemod = data[INDX_TIME]; + timeaddnoise = data[INDX_ADD_NOISE_SPAN]; + timeaxpy = data[INDX_AXPY_FORC_SPAN]; + sparam = data[INDX_PARAM]; + husrdata = data[INDX_USRDATA]; + + + /* time management */ + *ierr=CTA_Time_GetSpan(*timSim,&t1Sim, &t2); + *ierr=CTA_Time_GetSpan(timemod,&t1, &tDum); + + *ierr=CTA_Time_IsSpan(*timSim, &isSpan); + if (isSpan == CTA_TRUE){ + eps=M_EPS*fabs(t1Sim)+M_EPS; + if (fabs(t1Sim-t1)>eps) { + printf("Error in modbuild_sp_compute: current time of model is %f; start of simulation span is %f\n", + t1Sim,t1); + exit(-1); + } + } + + /* Set the simulation span of this compute */ + *ierr=CTA_Time_Create(×pan); + *ierr=CTA_Time_SetSpan(timespan, t1, t2); + + /* Addition axpy of noise */ + *ierr=CTA_Time_InSpan(timespan, timeaxpy, &inspan); /* is "now" inside timeasxpy? */ + if (*ierr!=CTA_OK) { + CTA_WRITE_ERROR("Timespan is not in interval"); + return; + } + if (inspan) { + saxpyforc=data[INDX_AXPY_FORC]; + } else { + saxpyforc=data[INDX_ZERO_FORC]; + } + + /* Addition of noise by model itself */ + + *ierr=CTA_Time_InSpan(timespan, timeaddnoise, &baddnoise); + if (*ierr!=CTA_OK) { + printf("modbuild_sp_compute:cta_time_inspan noise: ierr: %d \n",*ierr); + return; + } + + /* Get function pointer */ + *ierr=CTA_Func_GetFunc(fusrcompute,&usrcompute); + if (*ierr!=CTA_OK) return; + + *ierr=CTA_Time_GetSpan(timespan, &t1, &t2); +// printf("cta_modbuild_compute:before call my_compute t1, t2: %f %f \n", t1,t2); + fflush(stdout); + + + /* CALL USR_COMPUTE(CTA_Time *timesspan,CTA_TreeVector *state, + CTA_TreeVector *saxpyforc, BOOL *baddnoise, + CTA_TreeVector *sparam, CTA_Handle husrdata, + int* ierr) */ + usrcompute(×pan, &state, &saxpyforc, &baddnoise, &sparam, + &husrdata, ierr); + + if (*ierr!=CTA_OK) { + printf("modbuild_sp_compute:usr_compute: WRONG: %d \n",*ierr); + fflush(stdout); + return; + } + + // printf("Back in modbuild_sp_compute; adjust timespan \n"); + + *ierr=CTA_Time_SetSpan(timemod, t2, t2); + if (*ierr!=CTA_OK) { + printf("modbuild_sp_compute:CTA_Time_SetSpan: %d ",*ierr); + fflush(stdout); + return; + } + + *ierr=CTA_Time_Free(×pan); + +} + + +void modbuild_sp_setstate(CTA_Handle *data, CTA_TreeVector *state, int *ierr){ + + *ierr=CTA_TreeVector_Copy(*state,data[INDX_STATE]); +} + +void modbuild_sp_getstate(CTA_Handle *data, CTA_TreeVector *state, int *ierr){ + + if (IDEBUG>9) {printf("DEBUG: modbuild_sp_getstate:state : %d \n",*state);} + + /* Duplicate the state if necessary */ + if (*state==CTA_NULL){ + + *ierr=CTA_TreeVector_Duplicate(data[INDX_STATE], state); + } + else { + /* Just copy the state */ + + *ierr=CTA_TreeVector_Copy(data[INDX_STATE], *state); + } +} + + +void modbuild_sp_axpymodel(CTA_Handle *datay, double *alpha, CTA_Handle *datax, + int *ierr){ + + *ierr=CTA_TreeVector_Axpy(datay[INDX_STATE], *alpha, datax[INDX_STATE]); +} + + +void modbuild_sp_axpystate(CTA_Handle *data, double *alpha, CTA_TreeVector *statex, + int *ierr){ + + *ierr=CTA_TreeVector_Axpy(data[INDX_STATE], *alpha, *statex); +} + +void modbuild_sp_setforc(CTA_Handle *data, CTA_TreeVector *state, int *ierr){ + + *ierr=CTA_TreeVector_Copy(*state,data[INDX_FORC]); +} + +void modbuild_sp_getforc(CTA_Handle *data, CTA_TreeVector *state, int *ierr){ + + /* Duplicate the state if necessary */ + if (*state==CTA_NULL){ + *ierr=CTA_TreeVector_Duplicate(data[INDX_FORC], state); + } + else { + /* Just copy the state */ + *ierr=CTA_TreeVector_Copy(data[INDX_FORC], *state); + } + +} + +void modbuild_sp_axpyforc(CTA_Handle *data, CTA_Time *tspan, double *alpha, + CTA_TreeVector *statex, int *ierr){ + + + /* Do we have a time object ? */ + if (data[INDX_AXPY_FORC_SPAN]==CTA_NULL) { + *ierr=CTA_Time_Create(&data[INDX_AXPY_FORC_SPAN]); + if (*ierr!=CTA_OK) return; + } + /* Copy time */ + *ierr=CTA_Time_Copy(data[INDX_AXPY_FORC_SPAN],*tspan); + if (*ierr!=CTA_OK) return; + + /* Copy state */ + *ierr=CTA_TreeVector_Copy(*statex,data[INDX_AXPY_FORC]); + if (*ierr!=CTA_OK) return; + + /* apply scaling factor alpha */ + *ierr=CTA_TreeVector_Scal(data[INDX_AXPY_FORC], *alpha); + if (*ierr!=CTA_OK) return; +} + +void modbuild_sp_setparam(CTA_Handle *data, CTA_TreeVector *state, int *ierr){ + *ierr=CTA_TreeVector_Copy(*state,data[INDX_PARAM]); + /* *ierr=CTA_NOT_IMPLEMENTED; */ /*wednesday, 27-09-06 jhs*/ +} + +void modbuild_sp_getparam(CTA_Handle *data, CTA_TreeVector *state, int *ierr){ + + /* Duplicate the state if necessary */ + if (*state==CTA_NULL){ + *ierr=CTA_TreeVector_Duplicate(data[INDX_PARAM], state); + } + else { + /* Just copy the state */ + *ierr=CTA_TreeVector_Copy(data[INDX_PARAM], *state); + } + + /* *ierr=CTA_NOT_IMPLEMENTED;*/ /*wednesday, 27-09-06 jhs*/ +} + +void modbuild_sp_axpyparam(CTA_Handle *data, double *alpha, CTA_TreeVector *statex, + int *ierr){ + *ierr=CTA_TreeVector_Axpy(data[INDX_PARAM], *alpha, *statex); + /* *ierr=CTA_NOT_IMPLEMENTED; */ /*wednesday, 27-09-06 jhs*/ +} + +void modbuild_sp_getnoisecount(CTA_Handle *data, int* nnoise, int* ierr) { + *nnoise=data[INDX_NNOISE]; + *ierr=CTA_OK; +} + +void modbuild_sp_getcurrenttime(CTA_Handle *data, int* tCurrent, int* ierr) { + + *ierr=CTA_Time_Copy(data[INDX_TIME],*tCurrent); +} + +void modbuild_sp_gettimehorizon(CTA_Handle *data, int* tHorizon, int* ierr) { + + *ierr=CTA_Time_Copy(data[INDX_TIMEHORIZON],*tHorizon); +} + + +void modbuild_sp_getnoisecovar(CTA_Handle *data, CTA_TreeVector *colsvar, int* ierr){ + int nnoise; /* number of noise parameters */ + CTA_Handle husrdata; /* Handle to model-spefic data */ + CTA_TreeVector state; /* state vector of model */ + CTA_TreeVector snoise; /* noise state vector */ + CTA_TreeVector snoisesub; /* name of noise state vector */ + int inoise; /* loop counter over noise parameters */ + CTA_Func fusrcovar; /* Function handle of user implementation */ + CTA_Function *usrcovar;/* Function pointer of user implementation */ + char *tag; /* name/tag of substate with noise */ + int len; /* length of string tag (excluding char(0))*/ + + nnoise = data[INDX_NNOISE]; + state = data[INDX_STATE]; + snoisesub = data[INDX_NAME_NOISE]; + snoise = CTA_NULL; + fusrcovar = data[INDX_USRFUNC_COVAR]; + husrdata = data[INDX_USRDATA]; + + /* check whether we have an implementation and number of noise + parameters is >0 */ + + + if (nnoise==0 || fusrcovar==CTA_NULL) { + *ierr=CTA_NOT_IMPLEMENTED; + return; + } + + + /* create state-vectors when colsvar contains CTA_NULL */ + for (inoise=0; inoise0 */ + if (fusrobs==CTA_NULL) { + *ierr=CTA_NOT_IMPLEMENTED; + return; + } + + /* Get function pointer */ + *ierr=CTA_Func_GetFunc(fusrobs,&usrobs); + if (*ierr!=CTA_OK) return; + + /* CALL USR_OBS(CTA_TreeVector *state ,CTA_ObsDescr *hdescr CTA_Vector *vval, + CTA_Handle husrdata, int* ierr) */ + usrobs(&state, hdescr, vval, &husrdata, ierr); + +} + +void modbuild_sp_getobsselect(CTA_Handle *data, CTA_Time *ttime, CTA_ObsDescr *hdescr, + CTA_String *sselect, int* ierr){ + + CTA_TreeVector state; /* state vector of model */ + CTA_Handle husrdata; /* Handle to model-spefic data */ + CTA_Func fusrobssel; /* Function handle of user implementation */ + CTA_Function *usrobssel; /* Function pointer of user implementation */ + + if (IDEBUG>10){printf("modbuild_sp_getobsselect START \n");} + + state = data[INDX_STATE]; + fusrobssel = data[INDX_USRFUNC_OBSSEL]; + husrdata = data[INDX_USRDATA]; + + /* check whether we have an implementation and number of noise + parameters is >0 */ + if (fusrobssel==CTA_NULL) { + if (IDEBUG>10){printf("modbuild_sp_getobsselect ; returning 1>0 \n");} + *ierr=CTA_String_Set(*sselect," 1 >= 0 "); + return; + } + + /* Get function pointer */ + *ierr=CTA_Func_GetFunc(fusrobssel,&usrobssel); + if (*ierr!=CTA_OK) return; + + /* CALL USR_OBSSEL(CTA_TreeVector *state, CTA_Time *ttime, CTA_ObsDescr *hdescr, + CTA_String *sselect, CTA_Handle husrdata, + int* ierr) */ + usrobssel(&state, ttime, hdescr, sselect, &husrdata, ierr); + +} + +void modbuild_sp_addnoise(CTA_Handle *data, CTA_Time *ttime, int* ierr){ + + /* do we already have a add noise timespan? */ + if (data[INDX_ADD_NOISE_SPAN]==CTA_NULL) { + *ierr=CTA_Time_Create(&data[INDX_ADD_NOISE_SPAN]); + if (*ierr!=CTA_OK) return; + } + /* Just copy time span */ + *ierr=CTA_Time_Copy(*ttime,data[INDX_ADD_NOISE_SPAN]); +} + + + + + + + /* CALL USR_CREATE(CTA_Handle *hmodelinput, CTA_TreeVector *state, + CTA_TreeVector *sbound, CTA_TreeVector *sparam, int *nnoise, + CTA_Time *time0, CTA_String *snamnoise, + CTA_Handle *husrdata, int* ierr) */ + + +/* +typedef struct { +char name[20]; + BOOL is2D; + int nx, ny, nz,nsize; +double x_origin, y_origin, z_origin; +double dx,dy,dz; +} CTAI_Grid; + +typedef struct { +char name[20]; + int npoints; +double x_origin, y_origin; +CTA_Vector h_x_coords,h_y_coords; +} CTAI_curve; +*/ + + +void modbuild_sp_ar1_create(CTA_Handle *hinput, CTA_TreeVector *state, + CTA_TreeVector *sbound, CTA_TreeVector *sparam, int *nnoise, + CTA_Time *tHorizon, CTA_String *snamnoise, + CTA_Handle *husrdata, int *ierr){ + double p0[3]; + double zeroes = 0.0; + CTA_Vector hvec1,hvec2, h_x_coords,h_y_coords; + CTA_String htag, hgridtype; + int len, retval; + char *str_grid, *str_tag; + CTAI_Gridm thisgrid, hgrid2; + CTA_Metainfo hdescr_state; + CTA_Handle hhandle; + int rest0=12; + CTA_Time inputTimeHorizon; + + *ierr=CTA_Tree_GetValueStr(*hinput,"parameters/std_dev",&p0[0],CTA_DOUBLE); + *ierr=CTA_Tree_GetValueStr(*hinput,"parameters/kar_t",&p0[1],CTA_DOUBLE); + *ierr=CTA_Tree_GetValueStr(*hinput,"parameters/kar_l",&p0[2],CTA_DOUBLE); + if (*ierr != CTA_OK) { + printf("Error in ar1_create. Initial parameters not specified in input\n"); + return; + } + + *ierr=CTA_Metainfo_Create(&hdescr_state); + // printf("metainfo_create: retval %d \n", *ierr); + + /* get the tag of the metainfo of the noise model */ + *ierr = CTA_Tree_GetHandleStr(*hinput,"tag",&htag); + *ierr = CTA_String_GetLength (htag, &len); + str_tag=CTA_Malloc((len+1)*sizeof(char)); + *ierr = CTA_String_Get(htag, str_tag); + + *ierr=CTA_Metainfo_SetRest(hdescr_state,&rest0); + + *ierr=CTA_Metainfo_SetTag(hdescr_state,str_tag); + + // *ierr=CTA_Metainfo_SetTag(hdescr_state,"ar1-noise"); + + *ierr = CTA_Tree_GetHandleStr(*hinput,"grid/type_id",&hgridtype); + if (*ierr != CTA_OK) { + printf("Error in ar1_create. Grid type not given. \n"); + return; } + + *ierr = CTA_String_GetLength (hgridtype, &len); + str_grid=CTA_Malloc((len+1)*sizeof(char)); + *ierr = CTA_String_Get(hgridtype, str_grid); + if (!strncmp("2D",str_grid,len) || !strncmp("3D",str_grid,len )) { + // printf("grid type is 2D/3D, now reading more parameters \n"); + + strcpy(thisgrid.name,"AR(1)-process"); + thisgrid.type = 2; + thisgrid.nz = 1; thisgrid.dz = 1.0; thisgrid.z_origin = 0.0; + *ierr=CTA_Tree_GetValueStr(*hinput,"grid/gridsize/nx", &thisgrid.nx, CTA_INTEGER); + + *ierr=CTA_Tree_GetValueStr(*hinput,"grid/gridsize/ny", &thisgrid.ny, CTA_INTEGER); + + *ierr=CTA_Tree_GetValueStr(*hinput,"grid/gridparams/x_origin", + &thisgrid.x_origin, CTA_DOUBLE); + *ierr=CTA_Tree_GetValueStr(*hinput,"grid/gridparams/y_origin", + &thisgrid.y_origin, CTA_DOUBLE); + *ierr=CTA_Tree_GetValueStr(*hinput,"grid/gridparams/dx",&thisgrid.dx, CTA_DOUBLE); + *ierr=CTA_Tree_GetValueStr(*hinput,"grid/gridparams/dy",&thisgrid.dy, CTA_DOUBLE); + + if (!strncmp("3D",str_grid,len )) { + thisgrid.type = 3; + *ierr=CTA_Tree_GetValueStr(*hinput,"grid/gridsize/nz", &thisgrid.nz, CTA_INTEGER); + *ierr=CTA_Tree_GetValueStr(*hinput,"grid/gridparams/z_origin", + &thisgrid.z_origin, CTA_DOUBLE); + *ierr=CTA_Tree_GetValueStr(*hinput,"grid/gridparams/dz",&thisgrid.dz,CTA_DOUBLE); + } + thisgrid.nsize = thisgrid.nx * thisgrid.ny * thisgrid.nz; + + + } // end of reading 2D/3D grid info + + + if (!strncmp("curve",str_grid,len)) { + // printf("grid type is curve, now reading more parameters \n"); + + strcpy(thisgrid.name,"AR(1)-process"); + thisgrid.type = 10; + *ierr=CTA_Tree_GetValueStr(*hinput,"grid/gridsize/nsize", &thisgrid.nsize, CTA_INTEGER); + *ierr=CTA_Tree_GetValueStr(*hinput,"grid/gridparams/x_origin", + &thisgrid.x_origin, CTA_DOUBLE); + *ierr=CTA_Tree_GetValueStr(*hinput,"grid/gridparams/y_origin", + &thisgrid.y_origin, CTA_DOUBLE); + // read vector with x-coords and y-coords + *ierr=CTA_Tree_GetHandleStr(*hinput,"grid/x-coords",&h_x_coords); + *ierr=CTA_Tree_GetHandleStr(*hinput,"grid/y-coords",&h_y_coords); + } // end of reading curve info + // printf("end of reading grid info \n"); + + *ierr = CTA_TreeVector_Create("ar1 model","ar1_model",state); + if (*ierr != CTA_OK) {printf("Error in ar1_create. no state \n");return; } + *ierr=CTA_Vector_Create(CTA_DEFAULT_VECTOR,thisgrid.nsize, CTA_DOUBLE, CTA_NULL, &hvec1); + + CTA_Vector_SetConstant(hvec1,&zeroes,CTA_DOUBLE); + CTA_TreeVector_SetVec(*state,hvec1); + + + if (*ierr != CTA_OK) { printf("Error in ar1_create. State not initialised \n"); + return; + } + + + /* attach grid information to state meta-info */ + // printf("Now grid will be attached to metainfo \n"); + *ierr=CTA_Metainfo_SetGrid(hdescr_state, &thisgrid); + // printf("QQ metainfo_setgrid: nsize(=8*12) %d |%d| \n", *ierr ,thisgrid.nsize); + if (*ierr != CTA_OK) { printf("Error in ar1_create.grid not attached \n"); + return; + } + + // attach meta-info to state + + // printf("Now meta_info will be attached to state \n"); + *ierr = CTA_TreeVector_SetMetainfo(*state, hdescr_state); + if (*ierr != CTA_OK) { printf("Error in ar1_create.meta-info not attached \n"); + return; + } + + *sbound = CTA_NULL; + + /* fill husrdata with grid and parameters; necessary for ar1_covar */ + /* do this by constructing a tree */ + *ierr=CTA_Vector_Create(CTA_DEFAULT_VECTOR, 3, CTA_DOUBLE, CTA_NULL, &hvec2); + *ierr=CTA_Vector_SetVals(hvec2,p0,3,CTA_DOUBLE); + + CTA_Tree_Create(husrdata); + + CTA_Tree_AddHandle(*husrdata, "p0", hvec2); + + *ierr=CTA_Handle_Create("grid",CTA_DATABLOCK,&hgrid2,&hhandle); + if (*ierr!=CTA_OK) {printf("ar1_create: error husrdata tree \n");} + *ierr=CTA_Tree_AddHandle(*husrdata, "hgrid", hhandle); + if (*ierr!=CTA_OK) {printf("ar1_create: error husrdata tree \n");} + + + + /* fill sparam with parameters */ + *ierr = CTA_TreeVector_Create(" ","ar1_params",sparam); + *ierr=CTA_TreeVector_SetVec(*sparam,hvec2); + if (*ierr!=CTA_OK) {printf("error vector params create %d\n",*ierr);} + // *sparam = CTA_NULL; + + + *nnoise = thisgrid.nsize; + //printf("ar1_create: nnoise %d \n",*nnoise); + + /* Read time from the input configuration */ + retval=CTA_Tree_GetHandleStr (*hinput, "timehorizon", &inputTimeHorizon); + if (retval==CTA_OK){ + *ierr=CTA_Time_Copy(inputTimeHorizon,*tHorizon); + } else { + printf("WARNING: No time horizon has been set for AR(1) model\n"); + printf("WARNING: Using default horizon [0, 1.0e6]\n"); + *ierr=CTA_Time_SetSpan(*tHorizon,0.0,1.0e6); /* empty span */ + } + if (*ierr!=CTA_OK) {printf("error handling the time horizon of the model %d\n",*ierr);} + + + *ierr=CTA_String_Set(*snamnoise,"ar1_model"); + + // printf("End of modbuild_sp_ar1_create \n"); + printf("-------------------------- \n"); +} + +/*------------------------------------------------------------------------------------*/ + + /* CALL USR_COVAR(CTA_TreeVector *colsvar,int* nnoise, CTA_Handle husrdata, + int* ierr) */ +void modbuild_sp_ar1_covar(CTA_TreeVector *colsvar,int* nnoise, CTA_Handle husrdata,int* ierr){ + int i,j,ier2; + double p0[3]; + int nmodel; + CTA_Handle hhandle; + CTAI_Gridm *hgrid; + CTA_Vector hvec1, hvec2; + double *Lar1; + double *col1; + + /* NB gridinfo moet uit de husrdata gehaald! */ + /* en ook de parameters in p0! */ + /* note: nnoise equals nmodel */ + + nmodel = *nnoise; + Lar1=CTA_Malloc(nmodel*nmodel*sizeof(double)); + col1=CTA_Malloc(nmodel*sizeof(double)); + hgrid=CTA_Malloc(sizeof(CTAI_Gridm)); + + CTA_Vector_Create(CTA_DEFAULT_VECTOR, 3, CTA_DOUBLE, CTA_NULL, &hvec1); + ier2 = CTA_Tree_GetHandleStr(husrdata,"/p0",&hvec1); + if (IDEBUG) printf ("-----------ar1_covar:tree: ier2=%d \n",ier2); + ier2=CTA_Vector_GetVals(hvec1,p0,3,CTA_DOUBLE); + if (IDEBUG) printf("ar1_covar: getvals ier p0 %d %f \n",ier2, p0[1]); + + ier2 = CTA_Tree_GetHandleStr(husrdata,"/hgrid",&hhandle); + + ier2=CTA_Handle_GetData((CTA_Handle) hhandle,(void**) &hgrid); + + if (IDEBUG) printf("ar1_covar: ier2 grid.nx %d %d \n",ier2,hgrid->nx); + + + ier2 = modbuild_sp_compute_covars(nmodel,Lar1,p0, *hgrid); + + for (i=0;ib ? a: b) +#define IDEBUG (1) + + +#define CTA_MODEL_DEFINECLASS_F77 F77_CALL(cta_model_defineclass,CTA_MODEL_DEFINECLASS) +#define CTA_MODEL_CREATE_F77 F77_CALL(cta_model_create,CTA_MODEL_CREATE) +#define CTA_MODEL_COMPUTE_F77 F77_CALL(cta_model_compute,CTA_MODEL_COMPUTE) +#define CTA_MODEL_SETSTATE_F77 F77_CALL(cta_model_setstate,CTA_MODEL_SETSTATE) +#define CTA_MODEL_GETSTATE_F77 F77_CALL(cta_model_getstate,CTA_MODEL_GETSTATE) +#define CTA_MODEL_AXPYSTATE_F77 F77_CALL(cta_model_axpystate,CTA_MODEL_AXPYSTATE) +#define CTA_MODEL_GETSTATESCALING_F77 F77_CALL(cta_model_getstatescaling,CTA_MODEL_GETSTATESCALING) +#define CTA_MODEL_SETFORC_F77 F77_CALL(cta_model_setforc,CTA_MODEL_SETFORC) +#define CTA_MODEL_GETFORC_F77 F77_CALL(cta_model_getforc,CTA_MODEL_GETFORC) +#define CTA_MODEL_AXPYFORC_F77 F77_CALL(cta_model_axpyforc,CTA_MODEL_AXPYFORC) +#define CTA_MODEL_SETPARAM_F77 F77_CALL(cta_model_setparam,CTA_MODEL_SETPARAM) +#define CTA_MODEL_GETPARAM_F77 F77_CALL(cta_model_getparam,CTA_MODEL_GETPARAM) +#define CTA_MODEL_AXPYPARAM_F77 F77_CALL(cta_model_axpyparam,CTA_MODEL_AXPYPARAM) +#define CTA_MODEL_GETTIMEHORIZON_F77 F77_CALL(cta_model_gettimehorizon,CTA_MODEL_GETTIMEHORIZON) +#define CTA_MODEL_GETCURRENTTIME_F77 F77_CALL(cta_model_getcurrenttime,CTA_MODEL_GETCURRENTTIME) +#define CTA_MODEL_GETNOISECOVAR_F77 F77_CALL(cta_model_getnoisecovar,CTA_MODEL_GETNOISECOVAR) +#define CTA_MODEL_GETNOISECOUNT_F77 F77_CALL(cta_model_getnoisecount,CTA_MODEL_GETNOISECOUNT) +#define CTA_MODEL_FREE_F77 F77_CALL(cta_model_free,CTA_MODEL_FREE) +#define CTA_MODEL_ANNOUNCEOBSVALUES_F77 F77_CALL(cta_model_announceobsvalues,CTA_MODEL_ANNOUNCEOBSVALUES) +#define CTA_MODEL_GETOBSVALUES_F77 F77_CALL(cta_model_getobsvalues,CTA_MODEL_GETOBSVALUES) +#define CTA_MODEL_GETOBSSELECT_F77 F77_CALL(cta_model_getobsselect,CTA_MODEL_GETOBSSELECT) +#define CTA_MODEL_ADDNOISE_F77 F77_CALL(cta_model_addnoise,CTA_MODEL_ADDNOISE) +#define CTA_MODEL_IMPORT_F77 F77_CALL(cta_model_import,CTA_MODEL_IMPORT) +#define CTA_MODEL_EXPORT_F77 F77_CALL(cta_model_export,CTA_MODEL_EXPORT) +#define CTA_MODEL_GETOBSLOCALIZATION_F77 F77_CALL(cta_model_getobslocalization,CTA_MODEL_GETOBSLOCALIZATION) +#define CTA_MODEL_ADJSETFORC_F77 F77_CALL(cta_model_adjsetforc,CTA_MODEL_ADJSETFORC) +#define CTA_MODEL_ADJCOMPUTE_F77 F77_CALL(cta_model_adjcompute,CTA_MODEL_ADJCOMPUTE) +#define CTA_MODEL_ADJPREPARE_F77 F77_CALL(cta_model_adjprepare,CTA_MODEL_ADJPREPARE) +#define CTA_MODEL_LOADPERSISTENTSTATE_F77 F77_CALL(cta_model_loadpersistentstate,CTA_MODEL_LOADPERSISTENTSTATE) +#define CTA_MODEL_SAVEINTERNALSTATE_F77 F77_CALL(cta_model_saveinternalstate,CTA_MODEL_SAVEINTERNALSTATE) +#define CTA_MODEL_RESTOREINTERNALSTATE_F77 F77_CALL(cta_model_restoreinternalstate,CTA_MODEL_RESTOREINTERNALSTATE) +#define CTA_MODEL_RELEASEINTERNALSTATE_F77 F77_CALL(cta_model_releaseinternalstate,CTA_MODEL_RELEASEINTERNALSTATE) +#define CTA_MODEL_SAVEPERSISTENTSTATE_F77 F77_CALL(cta_model_savepersistentstate,CTA_MODEL_SAVEPERSISTENTSTATE) + +#define CLASSNAME "CTA_Model" + + +/* Struct holding all data associated to an COSTA Model */ +typedef struct{ + CTA_ObsDescr ObsDescr; + double *times; + int ntimes; + int isset; + CTA_Vector values; +} CTAI_AnnoinceObs; + +typedef struct { + int flag; + double t_step; +} CTAI_Barrier; + +typedef struct { +CTA_Func functions[CTA_MODEL_NUMFUNC]; +CTA_ModelClass hmodcl; +void *data; /*implementation specific data */ +CTAI_AnnoinceObs announced; +CTAI_Barrier barrier; +} CTAI_Model; + +typedef struct { +CTA_Func functions[CTA_MODEL_NUMFUNC]; +} CTAI_ModelClass; + + +void CTAI_Clear_Announced(CTAI_AnnoinceObs *announced, BOOL init){ + + /* Free existing data */ + if (!init){ + if (announced->times) { + free(announced->times); + announced->times=NULL; + } + if (announced->values!=CTA_NULL){ + CTA_Vector_Free(&(announced->values)); + } + if (announced->ObsDescr !=CTA_NULL){ + CTA_ObsDescr_Free(&(announced->ObsDescr)); + } + } + + announced->ObsDescr=CTA_NULL; + announced->values=CTA_NULL; + announced->times=NULL; + announced->ntimes=0; + announced->isset=FALSE; +} + + + +/** \brief Check model handle and return data of a model instance + * + * \param hmodel I Model instance handle + * \param findex I Index/ID of user function + * \param data O Data block of model + * \param function O function pointer to user function + * \return error status: CTA_OK if successful + */ +int CTAI_Model_GetDataAndFunc(CTA_Model hmodel, int findex, CTAI_Model **data, CTA_Function **function){ + int retval; /* Return status of COSTA method */ + + /* Check type of handle */ + retval=CTA_Handle_Check((CTA_Handle) hmodel,CTA_MODEL); + if (retval!=CTA_OK) return retval; + + /* Get Modeldata object */ + retval=CTA_Handle_GetData((CTA_Handle) hmodel, (void**) data); + if (retval!=CTA_OK) return retval; + + /* Check whether user function is implemented */ + if ((*data)->functions[findex]==CTA_NULL) return CTA_NOT_IMPLEMENTED; + + /* Get function */ + retval=CTA_Func_GetFunc((*data)->functions[findex],function); + if (retval!=CTA_OK) return retval; + + return CTA_OK; +} + +#undef METHOD +#define METHOD "GetSet" +/* function used by the different get and set functions + (all user implementations have the same interface) */ +int CTAI_Model_GetSet( + CTA_Model hmodel, /* handle of model */ + CTA_Time tspan, + CTA_Handle *hthing, /* set or returned item */ + int Func_ID, /* ID (parameter) of user function */ + int idomain +){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTA_Function *function; /* Function that must be called */ + CTAI_Model *data; /* Data associated to model */ + double one; /* value 1.0 in double */ + CTA_ModelClass hmodcl; /* handle to ModelClass */ + int isBlocked; /* flag indicating tha model is currently blocked */ + + /* Check type of handle */ + retval=CTA_Handle_Check((CTA_Handle) hmodel,CTA_MODEL); + if (retval!=CTA_OK) return retval; + + /* Get Modeldata object */ + retval=CTA_Handle_GetData((CTA_Handle) hmodel, (void*) &data); + if (retval!=CTA_OK) return retval; + + /* Check if model is currently blocked */ + hmodcl = data->hmodcl; + isBlocked = CTAI_ModelFac_GetBlock(hmodcl, hmodel); + if (isBlocked==CTA_TRUE) { + if (Func_ID==I_CTA_MODEL_GET_STATE || Func_ID==CTA_MODEL_GET_STATEDOMAIN ){ + CTA_WRITE_WARNING("CTA_MODEL_GETSTATE: model is currently blocked.\n"); + } + else { + CTA_WRITE_ERROR("This operation is not permitted while model is blocked\n"); + return -1; + } + } + + /* Clear the announced administration in case of I_CTA_MODEL_SET_STATE */ + if (Func_ID==I_CTA_MODEL_SET_STATE){ + CTAI_Clear_Announced(&(data->announced), FALSE); + } + + /* Check existence of Function pointer :*/ + + /* -special treatment for CTA_MODEL_GET_STATESCALING */ + /* if function does not exist return state containing 1.0 */ + if (Func_ID==CTA_MODEL_GET_STATESCALING){ + if (data->functions[Func_ID]==CTA_NULL){ + /* create a state-vector when necessary by calling getstate */ + if (*hthing==CTA_NULL) { + if (Func_ID==CTA_MODEL_GET_STATESCALING){ + retval=CTAI_Model_GetSet(hmodel, CTA_NULL, hthing, + I_CTA_MODEL_GET_STATE, idomain); + } + if (retval!=CTA_OK) return retval; + } + /* set identity */ + one=1.0; + return CTA_TreeVector_SetConstant((CTA_TreeVector) *hthing,&one,CTA_DOUBLE); + } + } + + if (data->functions[Func_ID]==CTA_NULL) return CTA_NOT_IMPLEMENTED; + + retval=CTA_Func_GetFunc(data->functions[Func_ID],&function); + if (retval!=CTA_OK) return retval; + + /* Call function */ + if (Func_ID== CTA_MODEL_SET_FORC || Func_ID== CTA_MODEL_GET_FORC ){ + function(data->data, &tspan, hthing,&retval); + } + else if (Func_ID == CTA_MODEL_GET_STATEDOMAIN){ + function(data->data, &idomain, hthing, &retval); + } else { + function(data->data, hthing,&retval); + } + return retval; +} + +#undef METHOD +#define METHOD "SetBarrier" +/* Set Barrier information for this model */ +int CTAI_Model_SetBarrier(CTA_ModelClass hmodcl, CTAI_Barrier *barrier ) { + + int retval; /* Return value of COSTA call */ + char *flag_barrier; /* flag */ + double t_step; /* maximum interval that model may run */ + + if (IDEBUG>0) {printf("CTA_Model_SetBarrier: Start of function \n");} + + /* get barrier information from the model class */ + retval = CTAI_ModelFac_GetBarrierData(hmodcl, &flag_barrier, &t_step); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error: cannot get barrier information."); + return retval; + } + + barrier->flag = CTA_FALSE; + barrier->t_step = 0.0; + + if (flag_barrier != NULL) { + if ( (0 == strcmp("true", flag_barrier)) && t_step ) { + /* Set flag_barrier */ + barrier->flag = CTA_TRUE; + /* Set t_step */ + barrier->t_step = t_step; + } + } + if (IDEBUG>0) {printf("CTA_Model_SetBarrier: End of function \n");} + return CTA_OK; +} + +int CTAI_Model_Axpy( + CTA_Model hmodel, /* y element in axpy (handle of model) */ + CTA_Time tspan, + double alpha, /* scalar multyplication factor for x */ + CTA_TreeVector h_x, /* x element in axpy state handle of model handle */ + int axpy_id, /* ID (parameter) of axpy user function */ + int get_id, /* ID (parameter) of get user function */ + int set_id /* ID (parameter) of set user function */ + ){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTA_Function *function; /* Function that must be called */ + CTAI_Model *data; /* Data associated to model hmodel */ + CTA_TreeVector y; /* internal y in model state */ + + + /* Check type of model handle */ + retval=CTA_Handle_Check((CTA_Handle) hmodel,CTA_MODEL); + if (retval!=CTA_OK) return retval; + + /* Get data object from model */ + retval=CTA_Handle_GetData((CTA_Handle) hmodel, (void*) &data); + if (retval!=CTA_OK) return retval; + + /* Check whether user implementation is available */ + if (axpy_id==CTA_NULL){ + /* Try default implementation :*/ + if (get_id==CTA_NULL || set_id==CTA_NULL) return CTA_NOT_IMPLEMENTED; + + /* get y-vector from model */ + y=CTA_NULL; + retval=CTAI_Model_GetSet(hmodel,tspan, &y,get_id, -1); + if (retval!=CTA_OK) return retval; + + /* do a state axpy */ + retval=CTA_TreeVector_Axpy(y, alpha,h_x); + if (retval!=CTA_OK) return retval; + + /* set new y-vector in model */ + retval=CTAI_Model_GetSet(hmodel,tspan, &y,set_id, -1); + if (retval!=CTA_OK) return retval; + + /* free work variable */ + retval=CTA_TreeVector_Free(&y,CTA_TRUE); + if (retval!=CTA_OK) return retval; + + } + else{ + /* Use user implementation */ + retval=CTA_Func_GetFunc(data->functions[axpy_id],&function); + if (retval!=CTA_OK) return retval; + if (axpy_id==CTA_MODEL_AXPY_FORC){ + function(data->data, &tspan, &alpha, &h_x, &retval); + } + else { + function(data->data, &alpha, &h_x, &retval); + } + if (retval!=CTA_OK) return retval; + } + return CTA_OK; +} + + +//int CTA_Model_DefineClass( +// const char *name, +// const CTA_Func h_func[CTA_MODEL_NUMFUNC], +// CTA_ModelClass *hmodcl +// ){ +// +// CTAI_ModelClass *data; +// int retval; +// int i; +// +// /* Allocate new Vector object */ +// data=CTA_Malloc(sizeof(CTAI_ModelClass)); +// +// for (i=0;ifunctions[i]=h_func[i]; +// } +// +// // Allocate new handle and return eror when unsuccesfull +// retval=CTA_Handle_Create(name,CTA_MODELCLASS,data,hmodcl); +// return retval; +//} + +#undef METHOD +#define METHOD "Create" +int CTA_Model_Create(CTA_ModelClass hmodcl, CTA_Handle userdata, CTA_Model *hmodel){ + + CTAI_Model *model; + int memsize; + int retval; + CTAI_ModelClass *clsdata; + CTA_Function *function; + CTA_Tree tConfig; + + if (IDEBUG>0) { + printf("Start of CTA_Model_Create hmodcl=%d userdata=%d\n",hmodcl,userdata); + } + + /* Check in what context we are */ + if (!(hmodcl==CTA_MODBUILD_PAR) && + CTA_IS_PARALLEL==CTA_TRUE && + CTA_MY_PROC_TYPE==CTA_ParMaster) { + + /* We are not creating an instance of the parallel model builder */ + /* We are running parallel and this is the master process */ + /* We create the model on one of the worker processes */ + CTA_Tree_Create(&tConfig); + CTA_Tree_AddHandle(tConfig, "model", userdata); + CTA_Tree_AddHandle(tConfig, "modelclass", hmodcl); + /* Create model (using parallel modelbuild model */ + //printf("CREATE A MODEL USING PARALLE MODEL BUILDER\n"); + retval=CTA_Model_Create(CTA_MODBUILD_PAR, tConfig, hmodel); + if (retval!=CTA_OK){ + char message[1024]; + sprintf(message,"#%d Error in model create %d\n",CTA_PAR_MY_RANK, retval); + } + + // DON't free since we will lose the input then!!! + // Have to fix this in Tree-object // + // retval=CTA_Tree_Free(&tConfig); + + //printf("WE ZIJN NA DE PARALLEL MODEL CREATE!!!\n"); + } else { + int i; + //printf("CREATING A MODEL LOCALLY\n"); + + /* We are running sequential or we are a worker process */ + /* We need to create the model instance locally */ + + //printf("WE ZIJN BIJ DE SEQUENTIAL MODEL CREATE!!!\n"); + //exit(-1); + + + if (IDEBUG>0) printf("#%d Start of cta_model_create\n", CTA_PAR_MY_RANK); + //if (IDEBUG>0) printf("#%d hmodcls=%d type=%d",CTA_PAR_MY_RANK, hmodcl,CTAI_Handle_GetDatatype(hmodcl)); + if (IDEBUG>0) printf("#%d hmodcls=%d type=%d \n",CTA_PAR_MY_RANK, hmodcl,999); + + retval=CTA_Handle_Check((CTA_Handle) hmodcl,CTA_MODELCLASS); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_modelclass handle"); + return retval; + } + + /* Get class data containing all function pointers */ + retval=CTA_Handle_GetData((CTA_Handle) hmodcl,(void*) &clsdata); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + if (IDEBUG>0) printf("#%d cta_model_create: Initialise model (size)\n", CTA_PAR_MY_RANK); + /* determine size of data object (CTA_VECTOR_CREATE_SIZE)*/ + if (clsdata->functions[I_CTA_MODEL_CREATE_SIZE]==CTA_NULL) + { + printf("#%d ERROR in CTA_Model_Create\n", CTA_PAR_MY_RANK); + printf("#%d There is no handle for the function 'Create_size'\n", CTA_PAR_MY_RANK); + return CTA_NOT_IMPLEMENTED; + } + retval=CTA_Func_GetFunc(clsdata->functions[I_CTA_MODEL_CREATE_SIZE],&function); + if (retval!=CTA_OK) { + char message[1024]; + sprintf(message,"#%d ERROR in CTA_Model_Create;\nCannot get handle for function 'Create_size'\n", CTA_PAR_MY_RANK); + CTA_WRITE_ERROR(message); + return retval; + } + (void) function(&userdata,&memsize,&retval); + if (retval) { + CTA_WRITE_ERROR("Error in function CTA_MODEL_CREATE_SIZE"); + return retval; + } + + /* allocate memory for new model object */ + model=CTA_Malloc(sizeof(CTAI_Model)); + model->data=CTA_Malloc(memsize); + + /* copy function pointers */ + for (i=0;ifunctions[i]=clsdata->functions[i]; + } + /* set other general information */ + model->hmodcl=hmodcl; + + // set barrier information for current model + retval = CTAI_Model_SetBarrier( hmodcl, &model->barrier); + if (retval) { + CTA_WRITE_ERROR("Error in CTA_Model_SetBarrierData"); + return retval; + } + + /* Initialise announced observation admin */ + CTAI_Clear_Announced(&(model->announced), TRUE); + + /* Copy user's function handles */ + if (clsdata->functions[I_CTA_MODEL_CREATE_INIT]==CTA_NULL) return CTA_NOT_IMPLEMENTED; + + /* Allocate new handle and return error when unsuccesfull */ + retval=CTA_Handle_Create("model",CTA_MODEL,model,hmodel); + if (retval) { + CTA_WRITE_ERROR("Handle CTA_MODEL cannot be created"); + return retval; + } + + if (IDEBUG>0) printf("#%d cta_model_create: Initialise model (init)\n", CTA_PAR_MY_RANK); + /* Initialise a new model */ + retval=CTA_Func_GetFunc(clsdata->functions[I_CTA_MODEL_CREATE_INIT],&function); + + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function CTA_MODEL_CREATE_INIT"); + return retval; + } + + (void) function(hmodel, model->data, &userdata, &retval); + if (IDEBUG>0) printf("#%d cta_model_create: Initialise model (done) retval=%d\n", CTA_PAR_MY_RANK,retval); + if (retval) { + CTA_WRITE_ERROR("Error in function CTA_MODEL_CREATE_INIT"); + return retval; + } + } + /* The model has been created. Administrate this model in the modelclass object */ + CTAI_ModelFac_AddModelInstance(hmodcl, *hmodel); + + if (IDEBUG>0) printf("#%d cta_model_create: end of function\n", CTA_PAR_MY_RANK); + return CTA_OK; +} + + +#undef METHOD +#define METHOD "AddNoise" +int CTA_Model_AddNoise( + CTA_Model hmodel, /* handle of model */ + CTA_Time htime /* timespan of simulation */ +){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTA_Function *function; /* Function that must be called */ + CTAI_Model *data; /* Data assosiated to model */ + + /* Check type of handle */ + retval=CTA_Handle_Check((CTA_Handle) hmodel,CTA_MODEL); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_model handle"); + return retval; + } + + /* Get Modeldata object */ + retval=CTA_Handle_GetData((CTA_Handle) hmodel, (void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + /* Get Function pointer */ + if (data->functions[CTA_MODEL_ADD_NOISE]==CTA_NULL) return CTA_NOT_IMPLEMENTED; + retval=CTA_Func_GetFunc(data->functions[CTA_MODEL_ADD_NOISE],&function); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function CTA_MODEL_ADD_NOISE"); + return retval; + } + + /* Call function */ + function(data->data,&htime,&retval); + return retval; +} + +#undef METHOD +#define METHOD "PerformTimesteps" +/* handle barrier information and advance the model in time */ +int CTAI_Model_PerformTimesteps( + CTA_Model hmodel, /* handle of model */ + CTA_Function *function, /* Function that must be called */ + CTA_Time htime, /* timespan to compute */ + int mindBarrier /* Flag to activate barrier check fixed timestepping */ +){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Model *data; /* Data associated to model */ + double tstart, tstop; /* Begin and end time of htime */ + double tstep; /* Maximum number of steps that model can proceed */ + int isspan; /* flag indicating whether input is timespan */ + int isBlocked; /* flag indicating that maximum number of model steps is exceeded */ + + if (IDEBUG>0) { + printf("CTA_MODEL DEBUG: Calling performTimeStep function \n"); + } + + /* Check type of handle */ + retval=CTA_Handle_Check((CTA_Handle) hmodel,CTA_MODEL); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_model handle"); + return retval; + } + + /* Get Modeldata object */ + retval=CTA_Handle_GetData((CTA_Handle) hmodel, (void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + isBlocked = CTA_FALSE; + if (mindBarrier == CTA_TRUE){ + /* Time management */ + retval=CTA_Time_GetSpan(htime,&tstart,&tstop); + retval=CTA_Time_IsSpan(htime, &isspan); + /* Is there a possible barrier? */ + if (data->barrier.flag==CTA_TRUE) { + int nstep; + tstep = data->barrier.t_step; + nstep = (int) ((tstop-tstart)/tstep+0.5); + if (IDEBUG>0) { + printf("CTA_performTimesteps: Time information: \n"); + printf("Tstart= %f, Tstep=%f, Tstop=%f, nStep=%d\n ", tstart, tstep, tstop, nstep); + } + if (nstep>1) { + if (IDEBUG>0) printf("CTA_performTimesteps: set block; I have to wait\n"); + isBlocked = CTA_TRUE; + } + else { + if (IDEBUG>0) printf("CTA_performTimesteps: no barrier in this time interval\n"); + } + } + } + + if (isBlocked == CTA_TRUE) { + if (IDEBUG>0) printf("CTA_performTimesteps: TimeStepAllModels\n"); + /* set block for this model in modelclass administration */ + retval=CTAI_ModelFac_SetBlock(data->hmodcl,hmodel); + /* advance all childmodels */ + retval=CTAI_ModelFac_TimeStepAllModels(data->hmodcl,function, tstart, tstop); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("problem in TimeStepAllModels \n"); + return retval; + } + } else { + if (IDEBUG>0) printf("CTA_performTimesteps: compute for model %d \n", hmodel); + function(data->data,&htime,&retval); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("problem in compute"); + return retval; + } + } + return CTA_OK; +} + + + + +#undef METHOD +#define METHOD "Compute" +/* handle announced observations if necessary and advance the model in + time by calling method PerformTimeSteps */ +int CTA_Model_Compute( + CTA_Model hmodel, /* handle of model */ + CTA_Time htime /* timespan of simulation */ +){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTA_Function *function; /* Function that must be called */ + CTAI_Model *data; /* Data associated to model */ + double tstart, tstop; /* Begin and end time of htime */ + double t1, t2; /* begin and end time of sub-step */ + CTA_Time hstep; /* Singe step in compute */ + int ntimes; /* number of times where observations are + * available */ + int ntimes_loop; /* number time intervals for compute */ + CTA_ObsDescr obsdescr_sub;/* Observation description of subset of + observations */ + int nobs; /* number of observations in obsdescr_sub */ + CTA_RelTable reltab; /* Relation table for selection of + observations within a give timespan */ + CTA_Vector vals; /* predicted values in a sub-step */ + double eps; /* small value for comparing doubles */ + CTA_Time currentTime; /* time instance of the model */ + int isspan; /* flag indicating whether input is timespan */ + + /* Time management */ + retval=CTA_Time_GetSpan(htime,&tstart,&tstop); + retval=CTA_Time_IsSpan(htime, &isspan); + if (isspan!=CTA_TRUE) { + // Get the current time of the model + CTA_Time_Create(¤tTime); + CTA_Model_GetCurrentTime(hmodel,currentTime); + retval=CTA_Time_GetSpan(currentTime,&t1,&t2); + tstart=t1; + CTA_Time_SetSpan(htime,tstart,tstop); + CTA_Time_Free(¤tTime); + } + + if (IDEBUG) printf("entering cta_model_compute: tstart,tstop: %d %f %f \n",htime,tstart,tstop); + + if (IDEBUG) printf("CTA_MODEL DEBUG: Calling compute function \n"); + + /* Check type of handle */ + retval=CTA_Handle_Check((CTA_Handle) hmodel,CTA_MODEL); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_model handle"); + return retval; + } + + /* Get Modeldata object */ + retval=CTA_Handle_GetData((CTA_Handle) hmodel, (void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + /* Get Function pointer */ + if (data->functions[I_CTA_MODEL_COMPUTE]==CTA_NULL) return CTA_NOT_IMPLEMENTED; + retval=CTA_Func_GetFunc(data->functions[I_CTA_MODEL_COMPUTE],&function); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function I_CTA_MODEL_COMPUTE"); + return retval; + } + + /* Do we need to handle announced observations? */ + ntimes=data->announced.ntimes; + if (IDEBUG) printf("We have %d times with announced observations\n",ntimes); + if (ntimes>0){ + int itime; + /* Do we need to interrupt the computations? */ + + if (IDEBUG>0) { + for (itime=0;itimeannounced.times[itime]); + } + printf("First announced time %f\n",data->announced.times[0]); + printf("Last announced time %f\n",data->announced.times[ntimes-1]); + printf("compute interval [%f,%f]\n",tstart,tstop); + } + /* check whether announce is within this compute interval */ + if (data->announced.times[0]<=tstart || + tstop announced.times[ntimes-1]){ + fprintf(stderr,"Error: Interval of announced observations lies before current time of the model\n"); + fprintf(stderr,"Tstart= %g, first announced=%g\n", + tstart,data->announced.times[0]); + fprintf(stderr,"Tstop= %g, last announced=%g\n", + tstop,data->announced.times[ntimes-1]); + return CTA_ANNOUNCED_OBS_INTERVAL_ERROR; + } + + /* Create relation table for observations */ + retval=CTA_RelTable_Create(&reltab); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot create RelTable"); + return retval; + } + + /* determine number of steps in the time-loop by checking whether + the last observation is at time tstop */ + eps=M_EPS*MAX(fabs(tstart),fabs(tstop))+M_EPS; + if (fabs(tstop-data->announced.times[ntimes-1])0) {printf("Number of steps in compute is %d\n",ntimes_loop);} + + /* loop over all intervals of observations */ + CTA_Time_Create(&hstep); + CTA_Time_SetSpan(hstep,tstart,data->announced.times[0]); + + for (itime=0;itime0){ + CTA_Time_GetSpan(hstep,&t1,&t2); + printf("running from %f to %f \n",t1,t2); + } + + /* create an obs-selection for this timestep */ + + retval=CTA_ObsDescr_CreateTimSel(data->announced.ObsDescr, hstep, + reltab, &obsdescr_sub); + if (retval!=CTA_OK) { + char message[1024]; + sprintf(message,"Cannot create obs-selection for timestep %d",hstep); + CTA_WRITE_ERROR(message); + return retval; + } + + /* Determine the number of observations */ + retval=CTA_ObsDescr_Observation_Count(obsdescr_sub, &nobs); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot determine the number of observations"); + return retval; + } + + if (IDEBUG) {printf("Number of obs in this step are %d\n",nobs);} + + /* perform timestep(s) */ + retval = CTAI_Model_PerformTimesteps(hmodel,function,hstep,CTA_TRUE); + if (retval!=CTA_OK) { + char message[1024]; + sprintf(message,"Error in performing timestep %d",hstep); + CTA_WRITE_ERROR(message); + return retval; + } + + /* create vector for holding the observations */ + retval=CTA_Vector_Create(CTA_DEFAULT_VECTOR,nobs,CTA_DOUBLE,CTA_NULL, + &vals); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot create vector"); + return retval; + } + + /* get observed values */ + retval=CTA_Model_GetObsValues(hmodel,hstep,obsdescr_sub,vals); + if (retval!=CTA_OK) { + char message[1024]; + sprintf(message,"Cannot get observated values of timestep %d",hstep); + CTA_WRITE_ERROR(message); + return retval; + } + + /* copy observed values in global vector */ + retval=CTA_RelTable_ApplyInv(reltab, vals, data->announced.values); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot copy observed values in global vector"); + return retval; + } + + if (IDEBUG) { + printf("predicted values:\n"); + CTA_Vector_Export(data->announced.values,CTA_FILE_STDOUT); + } + /* Free variables */ + retval=CTA_Vector_Free(&vals); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot free vector"); + return retval; + } + + retval=CTA_ObsDescr_Free(&obsdescr_sub); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot free ObsDescr??"); + return retval; + } + + + /* set timespan for next loop */ + if (itimeannounced.times[itime], + data->announced.times[itime+1]); + } else { + CTA_Time_SetSpan(hstep,data->announced.times[ntimes-1],tstop); + } + } + + /* Mark announced observations as being set */ + data->announced.isset=TRUE; + + /* Free variables */ + retval=CTA_Time_Free(&hstep); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot free time"); + return retval; + } + + retval=CTA_RelTable_Free(&reltab); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot free RelTable"); + return retval; + } + + if (IDEBUG) printf("CTA_MODEL DEBUG: (1) Return code export function %d \n", retval); + + } else { + /* No model handles, we only need to call the compute method */ + retval=CTA_Time_GetSpan(htime,&tstart,&tstop); + retval = CTAI_Model_PerformTimesteps(hmodel,function,htime,CTA_TRUE); + if (IDEBUG) printf("CTA_MODEL DEBUG: (2) Return code compute function %d \n", + retval); + return retval; + } + return CTA_OK; +} + + + +int CTA_Model_SetState( + CTA_Model hmodel, /* handle of model */ + CTA_TreeVector hstate /* model state that must be set */ +){ + + return CTAI_Model_GetSet(hmodel, CTA_NULL, &hstate,I_CTA_MODEL_SET_STATE, -1); +} + +int CTA_Model_GetStateScaling( + CTA_Model hmodel, /* handle of model */ + CTA_TreeVector *hstate /* returned scaling vector for model state */ +){ + return CTAI_Model_GetSet(hmodel, CTA_NULL, hstate, + CTA_MODEL_GET_STATESCALING, -1); +} + + +int CTAI_Model_GetObsLocalization( + CTA_Model hmodel, /* handle of model */ + CTA_ObsDescr hdescr, /* observation description */ + double distance, /* characteristic distance */ + int iDomain, /* sub domain number (<0 whole model) */ + CTA_Vector locVecs +){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTA_Function *function; /* Function that must be called */ + CTAI_Model *data; /* Data associated to model */ + int iObs, nObs; + double one=1.0; + CTA_TreeVector hState; + + int domain_version; + + + domain_version=iDomain>=0; + + if (domain_version){ + retval=CTAI_Model_GetDataAndFunc(hmodel, CTA_MODEL_GET_OBSLOCALIZATIONDOMAIN, &data, &function); + } + else { + retval=CTAI_Model_GetDataAndFunc(hmodel, I_CTA_MODEL_GETOBSLOCALIZATION, &data, &function); + } + + if (retval == CTA_NOT_IMPLEMENTED){ + printf("Debug: CTA_Model_GetObsLocalization: No localization function is implemented for this model\n"); + printf("Debug: Consider running method without localization (if possible), this will improve performance \n"); + /* Return identity localization */ + + retval = CTA_Vector_GetSize(locVecs, &nObs); + printf("Lengte van de obs-vector is %d\n",nObs); + + for (iObs=1; iObs<=nObs; iObs++){ + CTA_Vector_GetVal(locVecs, iObs, &hState, CTA_HANDLE); + if (domain_version){ + CTA_Model_GetStateDomain(hmodel,iDomain, &hState); + } + else { + CTA_Model_GetState(hmodel,&hState); + } + CTA_TreeVector_SetConstant(hState, &one, CTA_DOUBLE ); + CTA_Vector_SetVal(locVecs, iObs, &hState, CTA_HANDLE); + } + return retval; + } + else if (retval == CTA_OK) { + /* Call user supplied implementation */ + if (IDEBUG) printf("CTA_MODEL DEBUG: Calling Localization function \n"); + if (domain_version){ + function(data->data, &hdescr, &distance, iDomain, &locVecs, &retval); + } + else { + function(data->data, &hdescr, &distance, &locVecs, &retval); + } + if (IDEBUG) printf("CTA_MODEL DEBUG: Return code Localization function %d \n", + retval); + return retval; + } + else { + /* Error in retrieving function handle */ + CTA_WRITE_ERROR("Error in CTAI_Model_GetDataAndFunc"); + return retval; + } +} + + +int CTA_Model_GetObsLocalization( + CTA_Model hmodel, /* handle of model */ + CTA_ObsDescr hdescr, /* observation description */ + double distance, /* characteristic distance */ + CTA_Vector locVecs +){ + + return CTAI_Model_GetObsLocalization(hmodel,hdescr, distance, -1, locVecs); + +} + + + + + +int CTA_Model_GetState( + CTA_Model hmodel, /* handle of model */ + CTA_TreeVector *hstate /* returned model state */ +){ + // printf("cta_model_getstate : model %d state %d\n ",hmodel,*hstate); + return CTAI_Model_GetSet(hmodel, CTA_NULL, hstate, I_CTA_MODEL_GET_STATE, -1); +} + +int CTA_Model_SetForc( + CTA_Model hmodel, /* handle of model */ + CTA_Time tspan, + CTA_TreeVector hforc /* model forcings values that must be set */ +){ + return CTAI_Model_GetSet(hmodel,tspan, &hforc,CTA_MODEL_SET_FORC, -1); +} + +int CTA_Model_GetForc( + CTA_Model hmodel, /* handle of model */ + CTA_Time tspan, + CTA_TreeVector *hforc /* returned model forcings values */ +){ + return CTAI_Model_GetSet(hmodel,tspan, hforc,CTA_MODEL_GET_FORC, -1); + +} + +int CTA_Model_AxpyForc( + CTA_Model hmodel, /* handle of model */ + CTA_Time tspan, + double alpha, /* alpha of axpy */ + CTA_TreeVector h_x /* x-vector of axpy */ +){ + return CTAI_Model_Axpy(hmodel, tspan, alpha, h_x, CTA_MODEL_AXPY_FORC, + CTA_MODEL_GET_FORC,CTA_MODEL_SET_FORC); +} + +int CTA_Model_SetParam( + CTA_Model hmodel, /* handle of model */ + CTA_TreeVector hparam /* model parameters that must be set */ +){ + return CTAI_Model_GetSet(hmodel,CTA_NULL, &hparam,CTA_MODEL_SET_PARAM, -1); +} + +int CTA_Model_GetParam( + CTA_Model hmodel, /* handle of model */ + CTA_TreeVector *hparam /* returned model parameters */ +){ + return CTAI_Model_GetSet(hmodel,CTA_NULL, hparam,CTA_MODEL_GET_PARAM, -1); +} + +int CTA_Model_AxpyParam( + CTA_Model hmodel, /* handle of model */ + double alpha, /* alpha of axpy */ + CTA_TreeVector h_x /* x-vector of axpy */ +){ + return CTAI_Model_Axpy(hmodel, CTA_NULL, alpha, h_x, CTA_MODEL_AXPY_PARAM, + CTA_MODEL_GET_PARAM,CTA_MODEL_SET_PARAM); +} + +/* AXPY operation for models: add alpha times a vector to a model + Importante notice: the usual order of the arguments differs from + the order we use in this function. In BLAS we have: + y=alpha*x+y, yielding in a call like axpy(alpha,x,y) + + For the model axpy, the model state is y.! Because the model handle + is the first argument in all COSTA model calls. axpy for models + is defined by; + axpy(y,alpha,x) + + The "vector" x can have two different types + 1) CTA_TreeVector (COSTA state vector) + 2) CTA_MODEL (COSTA model) +*/ +#undef METHOD +#define METHOD "AxpyState" +int CTA_Model_AxpyState( + CTA_Model hmodel, /* y element in axpy (handle of model) */ + double alpha, /* scalar multyplication factor for x */ + CTA_Handle h_x /* x element in axpy state handle of model handle */ +){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTA_Function *function; /* Function that must be called */ + CTAI_Model *data_y; /* Data assosiated to model hmodel */ + CTAI_Model *data_x; /* Data assosiated to model h_x */ + CTA_TreeVector hstate_x; /* State vector (x) */ + CTA_TreeVector hstate_y; /* State vector (y) */ + BOOL same_models; /* flag: model class of hmode and h_x are the same */ + CTA_Handle dt; + + /* Check type of model handle */ + retval=CTA_Handle_Check((CTA_Handle) hmodel,CTA_MODEL); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_model handle"); + return retval; + } + + /* Get Modeldata object */ + retval=CTA_Handle_GetData((CTA_Handle) hmodel, (void*) &data_y); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + /* Clear the announced administration */ + CTAI_Clear_Announced(&(data_y->announced), FALSE); + retval = CTA_Handle_GetDatatype(h_x, &dt); + if (retval != CTA_OK) {return retval; } + + if (dt == CTA_MODEL) { + + /* This is a MODEL+alpha*MODEL axpy */ + /* Get Modeldata object of x */ + retval=CTA_Handle_GetData((CTA_Handle) h_x, (void*) &data_x); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + /* check whether models are of the same model class */ + same_models=(data_y->hmodcl==data_x->hmodcl); + + /* If models are not the same or no user implementation is available: */ + if (same_models && data_y->functions[CTA_MODEL_AXPY_MODEL]==CTA_NULL) + { + /* Use default implementation : */ + /* -> get state from model h_x */ + /* -> call CTA_Model_Axpy with state */ + + hstate_x=CTA_NULL; + retval=CTA_Model_GetState(h_x,&hstate_x); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get state"); + return retval; + } + + retval=CTA_Model_AxpyState(hmodel,alpha,hstate_x); + if (retval!=CTA_OK){ + CTA_WRITE_ERROR("Cannot get AxpyState"); + return retval; + } + + retval=CTA_TreeVector_Free(&hstate_x, CTA_TRUE); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot free tree vector"); + return retval; + } + } + else + { + /* Use user implementation */ + retval=CTA_Func_GetFunc(data_y->functions[CTA_MODEL_AXPY_MODEL],&function); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function CTA_MODEL_AXPY_MODEL"); + return retval; + } + function(data_y->data, &alpha, data_x->data, &retval); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in function CTA_MODEL_AXPY_MODEL"); + return retval; + } + } + } + else{ + /* This is a MODEL+alpha*STATE axpy */ + + + if (dt != CTA_TREEVECTOR) { + + CTA_WRITE_ERROR("Handle is not a cta_treevector"); + return CTA_ILLEGAL_HANDLE; + } + + /* Check whether user implementation is available */ + if (data_y->functions[CTA_MODEL_AXPY_STATE]==CTA_NULL) + { + /* Use default implementation */ + hstate_y=CTA_NULL; + retval=CTA_Model_GetState(hmodel,&hstate_y); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get state"); + return retval; + } + + retval=CTA_TreeVector_Axpy(hstate_y,alpha, h_x); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in CTA_TreeVector_Axpy"); + return retval; + } + retval=CTA_Model_SetState(hmodel,hstate_y); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in CTA_Model_SetState"); + return retval; + } + retval=CTA_TreeVector_Free(&hstate_y, CTA_TRUE); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot free TreeVector"); + return retval; + } + } + else + { + /* Use user implementation */ + retval=CTA_Func_GetFunc(data_y->functions[CTA_MODEL_AXPY_STATE],&function); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function CTA_MODEL_AXPY_STATE"); + return retval; + } + function(data_y->data, &alpha, &h_x, &retval); +// function(&alpha, &h_x, data_y->data,&retval); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in function CTA_MODEL_AXPY_STATE"); + return retval; + } + } + } + return CTA_OK; +} + + + + +#undef METHOD +#define METHOD "GetNoiseCovar" +int CTA_Model_GetNoiseCovar( + CTA_Model hmodel, /* handle of model */ + CTA_TreeVector *hstmat /* returned state-matrix */ +){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTA_Function *function; /* Function that must be called */ + CTAI_Model *data; /* Data assosiated to model */ + + + /* Check type of handle */ + retval=CTA_Handle_Check((CTA_Handle) hmodel,CTA_MODEL); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_model handle"); + return retval; + } + + /* Get Modeldata object */ + retval=CTA_Handle_GetData((CTA_Handle) hmodel, (void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + /* Get Function pointer */ + if (data->functions[CTA_MODEL_GET_NOISE_COVAR]==CTA_NULL){ + CTA_WRITE_ERROR("Error in function ..."); + return CTA_NOT_IMPLEMENTED; + } + + retval=CTA_Func_GetFunc(data->functions[CTA_MODEL_GET_NOISE_COVAR],&function); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function CTA_MODEL_GET_NOISE_COVAR"); + return CTA_NOT_IMPLEMENTED; + } + + /* Call function */ + function(data->data,hstmat,&retval); + + return retval; +} + +#undef METHOD +#define METHOD "GetNoiseCount" +int CTA_Model_GetNoiseCount( + CTA_Model hmodel, /* handle of model */ + int *nnoise /* returned number of noise parameters */ +){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTA_Function *function; /* Function that must be called */ + CTAI_Model *data; /* Data assosiated to model */ + + /* Check type of handle */ + retval=CTA_Handle_Check((CTA_Handle) hmodel,CTA_MODEL); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_model handle"); + return retval; + } + + /* Get Modeldata object */ + retval=CTA_Handle_GetData((CTA_Handle) hmodel, (void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + /* Get Function pointer */ + if (data->functions[CTA_MODEL_GET_NOISE_COUNT]==CTA_NULL) return CTA_NOT_IMPLEMENTED; + retval=CTA_Func_GetFunc(data->functions[CTA_MODEL_GET_NOISE_COUNT],&function); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function CTA_MODEL_GET_COISE_COUNT"); + return retval; + } + + /* Call function */ + function(data->data,nnoise,&retval); + return retval; +} + +#undef METHOD +#define METHOD "Free" +int CTA_Model_Free( + CTA_Model *hmodel /* Handle of model */ + ){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Model *data; /* All data of hmodel */ + CTA_Function *function; /* Function that must be called */ + + /* Get pointer to implementation of this function */ + + if (*hmodel==CTA_NULL) return CTA_OK; + + retval=CTA_Handle_Check((CTA_Handle) *hmodel,CTA_MODEL); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_model handle"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) *hmodel, (void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + /* call user implementation if available */ + if (data->functions[I_CTA_MODEL_FREE]!=CTA_NULL){ + retval=CTA_Func_GetFunc(data->functions[I_CTA_MODEL_FREE],&function); + if (retval!=CTA_OK) return retval; + function(data->data,&retval); + } + free(data->data); + free(data); + retval=CTA_Handle_Free(hmodel); + + return retval; +}; + +/** \brief Fill the struct announced with info from the given + * observation description. + * + * \param hdescr I Observation description with annonced observations + * \param announced O handle of model instance + * \return error status: CTA_OK if successful + */ +#undef METHOD +#define METHOD "AnnounceObsValues_Set" +int CTA_Model_AnnounceObsValues_Set( + CTA_ObsDescr hdescr, /* observation description */ + CTAI_AnnoinceObs *announced /* data-block of model */ +){ + /* Local variables */ + int nobs; /* Number of observations */ + double *allTimes; /* Array with all available times */ + CTA_Vector vtimes; /* Vector with all available times */ + int info; + int nunique; /* Number of unique times of observations */ + double lastTime; /* Previous time in list */ + int i,itime; /* Loop counters */ + char direc; /* Sorting direction */ + + /* free all administration that might be available */ + CTAI_Clear_Announced(announced, FALSE); + + if (hdescr!=CTA_NULL) { + int retval; /* Return value of COSTA call */ + + /* If there are any observations in the description */ + retval=CTA_ObsDescr_Observation_Count(hdescr, &nobs); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("There are not any observations in the description"); + return retval; + } + + if (IDEBUG) {printf("CTA_Model_AnnounceObsValues_Set: nobs=%d\n",nobs);} + + if (nobs>0){ + /* Allocate an vector for holding all predicted values */ + retval=CTA_Vector_Create(CTA_DEFAULT_VECTOR,nobs, + CTA_DOUBLE,CTA_NULL,&(announced->values)); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot create vector"); + return retval; + } + + /* Allocate vector for holding all times of the observations */ + retval=CTA_Vector_Create(CTA_DEFAULT_VECTOR,nobs, + CTA_DOUBLE,CTA_NULL,&vtimes); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot create vector"); + return retval; + } + + /* Get times associated to all observations */ + retval=CTA_ObsDescr_Get_ValueProperties(hdescr, "TIME", vtimes, CTA_DOUBLE); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get times associated to all observations"); + return retval; + } + + /* Get all values in a C-array */ + allTimes=CTA_Malloc(nobs*sizeof(double)); + retval=CTA_Vector_GetVals(vtimes,allTimes,nobs,CTA_DOUBLE); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get values in array"); + return retval; + } + + /* Sort times */ + direc='I'; + DLASRT_F77(&direc, &nobs, allTimes, &info ,1); + + /* count the number of different times in allTimes */ + nunique=1; + lastTime=allTimes[0]; + for (i=1;i0) { + printf("CTA_Model_AnnounceObsValues_Set: observations at %d times\n", + nunique); + printf("CTA_Model_AnnounceObsValues_Set: times are: \n"); + + for (itime=0;itimetimes=CTA_Malloc(nunique*sizeof(double)); + announced->ntimes=nunique; + announced->isset=FALSE; + announced->times[0]=allTimes[0]; + nunique=0; + for (i=1;itimes[nunique]!=allTimes[i]){ + nunique++; + announced->times[nunique]=allTimes[i]; + } + } + free(allTimes); + CTA_Vector_Free(&vtimes); + CTA_ObsDescr_Create(CTA_OBSDESCR_TABLE, hdescr, + &(announced->ObsDescr)); + } + } + return CTA_OK; +} + + +#undef METHOD +#define METHOD "AnnounceObsValues" +int CTA_Model_AnnounceObsValues( + CTA_Model hmodel, /* handle of model */ + CTA_ObsDescr hdescr /* observation description */ +){ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTA_Function *function; /* Function that must be called */ + CTAI_Model *data; /* Data assosiated to model */ + + if (IDEBUG) {printf("CTA_Model_AnnounceObsValues: start\n");} + if (IDEBUG) {printf("CTA_Model_AnnounceObsValues: hmodel=%d, hdescr=%d\n", + hmodel,hdescr);} + + /* Check type of handle */ + retval=CTA_Handle_Check((CTA_Handle) hmodel,CTA_MODEL); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_model handle"); + return retval; + } + + /* Get Modeldata object */ + retval=CTA_Handle_GetData((CTA_Handle) hmodel, (void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + /* Get Function pointer */ + if (data->functions[CTA_MODEL_ANNOUNCE_OBSVALUES]!=CTA_NULL) { + if (IDEBUG) {printf("CTA_Model_AnnounceObsValues: using user-implementation\n");} + + /* User implementation is available of this method */ + retval=CTA_Func_GetFunc(data->functions[CTA_MODEL_ANNOUNCE_OBSVALUES],&function); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function CTA_Model_AnnoundeObsValues"); + return retval; + } + + /* Call function */ + function(data->data, &hdescr, &retval); + } else { + if (IDEBUG) {printf("CTA_Model_AnnounceObsValues: using costa-implementation\n");} + + /* Not available, then COSTA will try to do it for you :*/ + /* COSTA will fill the announced administration: */ + retval=CTA_Model_AnnounceObsValues_Set(hdescr, &(data->announced)); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot fill announced administation"); + return retval; + } + } + return CTA_OK; +} + + +#undef METHOD +#define METHOD "GetObsValues" +int CTA_Model_GetObsValues( + CTA_Model hmodel, /* handle of model */ + CTA_Time htime, /* timespan of simulation */ + CTA_ObsDescr hdescr, /* observation description */ + CTA_Vector values /* returned (interpolated) values */ +){ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTA_Function *function; /* Function that must be called */ + CTAI_Model *data; /* Data assosiated to model */ + + /* Check type of handle */ + retval=CTA_Handle_Check((CTA_Handle) hmodel,CTA_MODEL); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_model handle"); + return retval; + } + + /* Get Modeldata object */ + retval=CTA_Handle_GetData((CTA_Handle) hmodel, (void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + /* Do we have announced obs */ + if (data->announced.isset){ + + /* We shoud check whether obsdescr is same as announced + But for now we will not check just return the values + */ + + CTA_Vector_Copy(data->announced.values,values); + + /* We will now free the internal administration */ + CTAI_Clear_Announced(&(data->announced), FALSE); + + return CTA_OK; + } else { + /* We do not have announced observations just call user function */ + + /* Get Function pointer */ + if (data->functions[CTA_MODEL_GET_OBSVALUES]==CTA_NULL){ + return CTA_NOT_IMPLEMENTED; + } + retval=CTA_Func_GetFunc(data->functions[CTA_MODEL_GET_OBSVALUES], + &function); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function CTA_MODEL_GET_OBSVALUES"); + return retval; + } + + /* Call function */ + function(data->data,&htime, &hdescr, &values, &retval); + + return retval; + } +} + +int CTA_Model_GetObsSelect( + CTA_Model hmodel, /* handle of model */ + CTA_Time htime, /* timespan of simulation */ + CTA_ObsDescr hdescr, /* observation description */ + CTA_String sselect /* query to filter out the observations */ +){ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTA_Function *function; /* Function that must be called */ + CTAI_Model *data; /* Data associated to model */ + + if (IDEBUG>10) {printf("cta_model_getobsselect START \n");} + + /* Check type of handle */ + retval=CTA_Handle_Check((CTA_Handle) hmodel,CTA_MODEL); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_model handle"); + return retval; + } + /* Get Modeldata object */ + retval=CTA_Handle_GetData((CTA_Handle) hmodel, (void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + /* Get Function pointer */ + if (data->functions[CTA_MODEL_GET_OBSSELECT]==CTA_NULL) { + /* Obsselect is not implemented : return "*" */ + retval=CTA_String_Set(sselect," 1 >= 0 "); + return retval; + + } else { + retval=CTA_Func_GetFunc(data->functions[CTA_MODEL_GET_OBSSELECT],&function); + + if (retval!=CTA_OK) { + char message[1024]; + sprintf(message,"cta_model_getobsselect: user function not found %d \n",retval); + CTA_WRITE_ERROR(message); + return retval; + } + + if (IDEBUG>10) {printf("cta_model_getobsselect call user function \n");} + + /* Call function */ + function(data->data,&htime, &hdescr, &sselect, &retval); + return retval; + }} + + +#undef METHOD +#define METHOD "Export" +int CTA_Model_Export( + CTA_Model hmodel, /* handle of model */ + CTA_Handle hexport /* export object (e.q. handle to file or pack instance) */ +){ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTA_Function *function; /* Function that must be called */ + CTAI_Model *data; /* Data associated to model */ + + retval=CTAI_Model_GetDataAndFunc(hmodel, I_CTA_MODEL_EXPORT, &data, &function); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in CTAI_Model_Get_DataAndFunk"); + return retval; + } + + if (IDEBUG) printf("CTA_MODEL DEBUG: Calling export function \n"); + function(data->data, &hexport, &retval); + if (IDEBUG) printf("CTA_MODEL DEBUG: Return code export function %d \n", + retval); + return retval; +} + +#undef METHOD +#define METHOD "Import" +int CTA_Model_Import( + CTA_Model hmodel, /* handle of model */ + CTA_Handle himport /* import object (e.q. handle to file or pack instance) */ +){ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTA_Function *function; /* Function that must be called */ + CTAI_Model *data; /* Data associated to model */ + + retval=CTAI_Model_GetDataAndFunc(hmodel, I_CTA_MODEL_IMPORT, &data, &function); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in GetDataAndFunc"); + return retval; + } + + if (IDEBUG) printf("CTA_MODEL DEBUG: Calling import function \n"); + function(data->data, &himport, &retval); + if (IDEBUG) printf("CTA_MODEL DEBUG: Return code import function %d \n", + retval); + return retval; +} + + +#undef METHOD +#define METHOD "GetTimeHorizon" +int CTA_Model_GetTimeHorizon( + CTA_Model hmodel, + CTA_Time tHorizon +){ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTA_Function *function; /* Function that must be called */ + CTAI_Model *data; /* Data associated to model */ + + retval=CTAI_Model_GetDataAndFunc(hmodel, CTA_MODEL_GET_TIMEHORIZON, &data, &function); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in CTAI_Model_GetDataAndFunc"); + return retval; + } + + if (IDEBUG) printf("CTA_MODEL DEBUG: Calling TimeHorizon function \n"); + function(data->data, &tHorizon, &retval); + if (IDEBUG) printf("CTA_MODEL DEBUG: Return code TimeHorizon function %d \n", + retval); + return retval; +} + +#undef METHOD +#define METHOD "GetCurrentTime" +int CTA_Model_GetCurrentTime( + CTA_Model hmodel, + CTA_Time tCurrent +){ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTA_Function *function; /* Function that must be called */ + CTAI_Model *data; /* Data associated to model */ + + retval=CTAI_Model_GetDataAndFunc(hmodel, CTA_MODEL_GET_CURRENTTIME, &data, &function); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in CTAI_Model_GetDataAndFunc"); + return retval; + } + + if (IDEBUG) printf("CTA_MODEL DEBUG: Calling CurrentTime function \n"); + function(data->data, &tCurrent, &retval); + if (IDEBUG) printf("CTA_MODEL DEBUG: Return code CurrentTime function %d \n", + retval); + return retval; +} + + +#undef METHOD +#define METHOD "AdjCompute" +int CTA_Model_AdjCompute(CTA_Model hmodel, CTA_Time time){ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTA_Function *function; /* Function that must be called */ + CTAI_Model *data; /* Data associated to model */ + + retval=CTAI_Model_GetDataAndFunc(hmodel, CTA_MODEL_ADJ_COMPUTE, &data, &function); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in CTAI_Model_GetDataAndFunc"); + return retval; + } + + function(data->data, &time, &retval); + return retval; +} + + +#undef METHOD +#define METHOD "AdjPrepare" +int CTA_Model_AdjPrepare(CTA_Model hmodel, CTA_Time time){ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTA_Function *function; /* Function that must be called */ + CTAI_Model *data; /* Data associated to model */ + + retval=CTAI_Model_GetDataAndFunc(hmodel, CTA_MODEL_ADJ_PREPARE, &data, &function); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in CTAI_Model_GetDataAndFunc"); + return retval; + } + + function(data->data, &time, &retval); + return retval; +} + + +#undef METHOD +#define METHOD "AdjSetForc" +int CTA_Model_AdjSetForc(CTA_Model hmodel, CTA_ObsDescr hdescr, CTA_Vector vforc){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTA_Function *function; /* Function that must be called */ + CTAI_Model *data; /* Data associated to model */ + + retval=CTAI_Model_GetDataAndFunc(hmodel, CTA_MODEL_ADJ_SET_FORC, &data, &function); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in CTAI_Model_GetDataAndFunc"); + return retval; + } + function(data->data, &hdescr, &vforc, &retval); + return retval; + +} + +#undef METHOD +#define METHOD "LoadPersistentState" +int CTA_Model_LoadPersistentState(CTA_Model hmodel, CTA_String filename, CTA_String *instanceID){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTA_Function *function; /* Function that must be called */ + CTAI_Model *data; /* Data associated to model */ + + retval=CTAI_Model_GetDataAndFunc(hmodel, CTA_MODEL_LOAD_PERSISTENTSTATE, &data, &function); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in CTA_Model_LoadPersistentState"); + return retval; + } + function(data->data, &filename, instanceID, &retval); + return retval; +} + +#undef METHOD +#define METHOD "SaveInternalState" +int CTA_Model_SaveInternalState(CTA_Model hmodel, CTA_String *instanceID){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTA_Function *function; /* Function that must be called */ + CTAI_Model *data; /* Data associated to model */ + + if (IDEBUG>0) { + printf("Start of CTA_Model_SaveInternalState \n"); + printf("Model handle = %d\n",hmodel); + } + + retval=CTAI_Model_GetDataAndFunc(hmodel, CTA_MODEL_SAVE_INTERNALSTATE, &data, &function); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in CTA_Model_SaveInternalState"); + return retval; + } + function(data->data, instanceID, &retval); + if (IDEBUG>0) { + printf("End of CTA_Model_SaveInternalState \n"); + } + return retval; +} + +#undef METHOD +#define METHOD "RestoreInternalState" +int CTA_Model_RestoreInternalState(CTA_Model hmodel, CTA_String instanceID){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTA_Function *function; /* Function that must be called */ + CTAI_Model *data; /* Data associated to model */ + + retval=CTAI_Model_GetDataAndFunc(hmodel, CTA_MODEL_RESTORE_INTERNALSTATE, &data, &function); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in CTA_Model_RestoreState"); + return retval; + } + function(data->data, &instanceID, &retval); + return retval; +} + +#undef METHOD +#define METHOD "ReleaseInternalState" +int CTA_Model_ReleaseInternalState(CTA_Model hmodel, CTA_String instanceID){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTA_Function *function; /* Function that must be called */ + CTAI_Model *data; /* Data associated to model */ + + if (IDEBUG>0) { + printf("Start of CTA_Model_ReleaseInternalState \n"); + } + retval=CTAI_Model_GetDataAndFunc(hmodel, CTA_MODEL_RELEASE_INTERNALSTATE, &data, &function); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in CTA_Model_ReleaseInternalState"); + return retval; + } + function(data->data, &instanceID, &retval); + return retval; +} + +#undef METHOD +#define METHOD "SavePersistentState" +// DE fop methode die eigenlijk bij een ImodelState zou moeten horen +int CTA_Model_SavePersistentState(CTA_Model hmodel, CTA_String filename, CTA_String instanceID){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTA_Function *function; /* Function that must be called */ + CTAI_Model *data; /* Data associated to model */ + + if (IDEBUG>0) { + printf("Start of CTA_Model_SavePersistentState \n"); + } + + retval=CTAI_Model_GetDataAndFunc(hmodel, CTA_MODEL_SAVE_PERSISTENTSTATE, &data, &function); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in CTA_Model_SavePersistentState"); + return retval; + } + /* Create new handle with usrdata */ + //retval=CTA_Handle_Create("file",CTA_FILE,filename,hfile); + + function(data->data, &filename, &instanceID, &retval); + return retval; +} + +/** \brief Get the number of domains for local analysis + * + * \param hmodel I handle of model instance + * \param distance I characteristic distance + * \param ndomain O number of domains + * \param locVecs O costa vector of handles to treevectors (scaling vectors). The treevectors + * are created when the indices are CTA_NULL on entry + * + * \return error status: CTA_OK if successful + */ +#undef METHOD +#define METHOD "GetNumDomains" +int CTA_Model_GetNumDomains(CTA_Model hmodel, double distance, int *nDomains){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTA_Function *function; /* Function that must be called */ + CTAI_Model *data; /* Data associated to model */ + + retval=CTAI_Model_GetDataAndFunc(hmodel, CTA_MODEL_GET_NUMDOMAINS, &data, &function); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in retreving function pointer of CTA_MODEL_GET_NUMDOMAINS"); + return retval; + } + + if (IDEBUG) printf("CTA_MODEL DEBUG: Callling CTA_MODEL_GET_NUMDOMAINS function \n"); + function(data->data, &distance, nDomains, &retval); + if (IDEBUG) printf("CTA_MODEL DEBUG: Return code CTA_MODEL_GET_NUMDOMAINS function %d\n", + retval); + return retval; +} + + +/** \brief Get selection of observations that are relevnet for assimilation in the given domain + * + * \param hmodel I handle of model instance + * \param hdescr I observation description of all observations + * \param distance I characteristic distance + * \param idomain I domain number + * \param locVecs O costa vector of handles to treevectors (scaling vectors). The treevectors + * are created when the indices are CTA_NULL on entry + * + * \return error status: CTA_OK if successful + */ +#undef METHOD +#define METHOD "GetObsSelector" +int CTA_Model_GetObsSelector( CTA_Model hmodel, + CTA_ObsDescr hdescr, double distance, int iDomain, CTA_Vector *selection){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTA_Function *function; /* Function that must be called */ + CTAI_Model *data; /* Data associated to model */ + + retval=CTAI_Model_GetDataAndFunc(hmodel, CTA_MODEL_GET_OBSSELECTOR, &data, &function); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in retreving function pointer of CTA_MODEL_GET_OBSSELECTOR"); + return retval; + } + + if (IDEBUG) printf("CTA_MODEL DEBUG: Callling CTA_MODEL_GET_OBSSELECTOR function \n"); + function(data->data, hdescr, &distance, &iDomain, selection, &retval); + if (IDEBUG) printf("CTA_MODEL DEBUG: Return code CTA_MODEL_GET_OBSSELECTOR function %d\n", + retval); + return retval; +} + + + +/** \brief Get for each observation a localization scaling vector for single domain + * + * \param hmodel I handle of model instance + * \param hdescr I observation description for which we want localization scaling vectors + * \param distance I characteristic distance + * \param idomain I domain number + * \param locVecs O costa vector of handles to treevectors (scaling vectors). The treevectors + * are created when the indices are CTA_NULL on entry + * + * \return error status: CTA_OK if successful + */ +#undef METHOD +#define METHOD "GetObsLocalizationDomain" +int CTA_Model_GetObsLocalizationDomain( CTA_Model hmodel, + CTA_ObsDescr hdescr, double distance, int iDomain, CTA_Vector locVecs){ + + return CTAI_Model_GetObsLocalization(hmodel, hdescr, distance, iDomain, locVecs); + +} + + +/** \brief Get a copy of the internal state. + * + * \note Optionally a tree-vector is created. In that case the caller of this + * method is responsible for freeing that tree-vector. The input state must be compatible + * (same size and or composition) as the models internal state. + * \note If *hstate == CTA_NULL a new object is created, user is responsible for freeing this object. + * + * \param hmodel I handle of model instance + * \param iDomain I domain number + * \param hstate IO receives state of the model, *hstate can be CTA_NULL on calling (see note) + * \return error status: CTA_OK if successful + */ +#undef METHOD +#define METHOD "GetStateDomain" +int CTA_Model_GetStateDomain(CTA_Model hmodel, int iDomain, CTA_TreeVector *hstate){ + return CTAI_Model_GetSet(hmodel, CTA_NULL, hstate, I_CTA_MODEL_GET_STATE, iDomain); +} + +/** \brief Perform axpy operation on the internal state for a single domain + * + * \note AXPY: y=alpha*x+y. y corresponds to the models + * internal state and x can be a state vector or a model + + * \param hmodel IO handle of model instance (y) + * \param alpha I alpha + * \param hx I handle of x (state vector ) + * \param iDomain I domain number + * \return error status: CTA_OK if successful + */ +#undef METHOD +#define METHOD "AxpyStateDomain" +int CTA_Model_AxpyStateDomain(CTA_Model hmodel, double alpha, int iDomain, CTA_Handle hx){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTA_Function *function; /* Function that must be called */ + CTAI_Model *data; /* Data associated to model */ + + + retval=CTAI_Model_GetDataAndFunc(hmodel, CTA_MODEL_AXPY_STATEDOMAIN, &data, &function); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in retreving function pointer of CTA_MODEL_GET_OBSSELECTOR"); + return retval; + } + function(data->data, &alpha, &hx, &iDomain, &retval); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in function CTA_MODEL_AXPY_STATEDOMAIN"); + return retval; + } + return CTA_OK; +} + + +CTAEXPORT void CTA_MODEL_CREATE_F77(int *hmodcl, int *userdata, int *hmodel, int *ierr){ + *ierr=CTA_Model_Create((CTA_ModelClass) *hmodcl, (CTA_Handle) *userdata, (CTA_Model*) hmodel); +} + +CTAEXPORT void CTA_MODEL_COMPUTE_F77(int *hmodel, int *htime, int *ierr){ + *ierr=CTA_Model_Compute((CTA_Model) *hmodel, (CTA_Time) *htime); +} + +CTAEXPORT void CTA_MODEL_ADDNOISE_F77(int *hmodel, int *htime, int *ierr){ + *ierr=CTA_Model_AddNoise((CTA_Model) *hmodel, (CTA_Time) *htime); +} +CTAEXPORT void CTA_MODEL_SETSTATE_F77(int *hmodel, int *hstate, int *ierr){ + *ierr=CTA_Model_SetState((CTA_Model) *hmodel, (CTA_TreeVector) *hstate); +} + +CTAEXPORT void CTA_MODEL_GETSTATE_F77(int *hmodel, int *hstate, int *ierr){ + *ierr=CTA_Model_GetState((CTA_Model) *hmodel, (CTA_TreeVector*) hstate); +} + +CTAEXPORT void CTA_MODEL_AXPYSTATE_F77(int *hmodel, double *alpha, int *hstate_x, int *ierr){ + *ierr=CTA_Model_AxpyState((CTA_Model) *hmodel, (double) *alpha, (CTA_Handle) *hstate_x); +} + +CTAEXPORT void CTA_MODEL_GETSTATESCALING_F77(int *hmodel, int *hscal, int *ierr){ + *ierr=CTA_Model_GetStateScaling((CTA_Model) *hmodel, (CTA_TreeVector*) hscal); +} + +CTAEXPORT void CTA_MODEL_SETFORC_F77(int *hmodel, int *tspan, int *hstate, int *ierr){ + *ierr=CTA_Model_SetForc((CTA_Model) *hmodel, (CTA_Time) *tspan, (CTA_TreeVector) *hstate); +} + +CTAEXPORT void CTA_MODEL_GETFORC_F77(int *hmodel, int *tspan, int *hstate, int *ierr){ + *ierr=CTA_Model_GetForc((CTA_Model) *hmodel, (CTA_Time) *tspan, (CTA_TreeVector*) hstate); +} + +CTAEXPORT void CTA_MODEL_AXPYFORC_F77(int *hmodel, int *tspan, double *alpha, int *hstate_x, int *ierr){ + *ierr=CTA_Model_AxpyForc((CTA_Model) *hmodel, (CTA_Time) *tspan, (double) *alpha, (CTA_TreeVector) *hstate_x); +} + +CTAEXPORT void CTA_MODEL_SETPARAM_F77(int *hmodel, int *hstate, int *ierr){ + *ierr=CTA_Model_SetParam((CTA_Model) *hmodel, (CTA_TreeVector) *hstate); +} + +CTAEXPORT void CTA_MODEL_GETPARAM_F77(int *hmodel, int *hstate, int *ierr){ + *ierr=CTA_Model_GetParam((CTA_Model) *hmodel, (CTA_TreeVector*) hstate); +} + +CTAEXPORT void CTA_MODEL_AXPYPARAM_F77(int *hmodel, double *alpha, int *hstate_x, int *ierr){ + *ierr=CTA_Model_AxpyParam((CTA_Model) *hmodel, (double) *alpha, (CTA_TreeVector) *hstate_x); +} + +CTAEXPORT void CTA_MODEL_GETNOISECOVAR_F77(int *hmodel, int *hstmat, int *ierr){ + *ierr=CTA_Model_GetNoiseCovar((CTA_Model) *hmodel, (CTA_TreeVector*) hstmat); +} + +CTAEXPORT void CTA_MODEL_GETNOISECOUNT_F77(int *hmodel,int *nnoise, int *ierr){ + *ierr=CTA_Model_GetNoiseCount((CTA_Model) *hmodel,(CTA_Time *) nnoise); +}; + +CTAEXPORT void CTA_MODEL_FREE_F77(int *hmodel, int *ierr){ + *ierr=CTA_Model_Free((CTA_Model*) hmodel); +}; + +CTAEXPORT void CTA_MODEL_EXPORT_F77(int *hmodel, int *hexport, int *ierr){ + *ierr=CTA_Model_Export((CTA_Model) *hmodel, (CTA_Handle) *hexport); +}; + +CTAEXPORT void CTA_MODEL_IMPORT_F77(int *hmodel, int *himport, int *ierr){ + *ierr=CTA_Model_Import((CTA_Model) *hmodel, (CTA_Handle) *himport); +}; + +CTAEXPORT void CTA_MODEL_GETTIMEHORIZON_F77(int *hmodel, int *tHorizon, + int *ierr){ + *ierr=CTA_Model_GetTimeHorizon((CTA_Model) *hmodel, (CTA_Time) *tHorizon); +}; + +CTAEXPORT void CTA_MODEL_GETCURRENTTIME_F77(int *hmodel, int *tHorizon, + int *ierr){ + *ierr=CTA_Model_GetCurrentTime((CTA_Model) *hmodel, (CTA_Time) *tHorizon); +}; + +CTAEXPORT void CTA_MODEL_ANNOUNCEOBSVALUES_F77(int *hmodel, int *hdescr, int *ierr){ + *ierr=CTA_Model_AnnounceObsValues((CTA_Model) *hmodel, + (CTA_ObsDescr) *hdescr); +}; + +CTAEXPORT void CTA_MODEL_GETOBSVALUES_F77(int *hmodel, int *htime, int *hdescr, int *values, int *ierr){ + + *ierr=CTA_Model_GetObsValues((CTA_Model) *hmodel, (CTA_Time) *htime, + (CTA_ObsDescr) *hdescr, (CTA_Vector) *values); +} + +CTAEXPORT void CTA_MODEL_GETOBSLOCALIZATION_F77 (int *hmodel, int *hdescr, double *distance, int *locVecs, int*ierr){ + + *ierr=CTA_Model_GetObsLocalization((CTA_Model) *hmodel,(CTA_ObsDescr) *hdescr, (double) *distance, (CTA_Vector) *locVecs); +} + +CTAEXPORT void CTA_MODEL_GETOBSSELECT_F77(int *hmodel, int *htime, int *hdescr, int *sselect, int *ierr){ + + *ierr=CTA_Model_GetObsSelect((CTA_Model) *hmodel, (CTA_Time) *htime, + (CTA_ObsDescr) *hdescr, (CTA_String) *sselect); +}; + +CTAEXPORT void CTA_MODEL_SAVEINTERNALSTATE_F77(int *hmodel, int *instanceID, int *ierr){ + *ierr=CTA_Model_SaveInternalState((CTA_Model) *hmodel, (CTA_String *) instanceID); +}; + +CTAEXPORT void CTA_MODEL_RESTOREINTERNALSTATE_F77(int *hmodel, int *instanceID, int *ierr){ + *ierr=CTA_Model_RestoreInternalState((CTA_Model) *hmodel, (CTA_String) *instanceID); +}; + +CTAEXPORT void CTA_MODEL_RELEASEINTERNALSTATE_F77(int *hmodel, int *instanceID, int *ierr){ + *ierr=CTA_Model_ReleaseInternalState((CTA_Model) *hmodel, (CTA_String) *instanceID); +}; + +CTAEXPORT void CTA_MODEL_SAVEPERSISTENTSTATE_F77(int *hmodel, int *filename, int *instanceID, int *ierr){ + *ierr=CTA_Model_SavePersistentState((CTA_Model) *hmodel, (CTA_String) *filename, (CTA_String) *instanceID); +}; + +CTAEXPORT void CTA_MODEL_LOADPERSISTENTSTATE_F77(int *hmodel, int *filename, int *instanceID, int *ierr){ + *ierr=CTA_Model_LoadPersistentState((CTA_Model) *hmodel, (CTA_String) *filename, (CTA_String *) instanceID); +}; diff --git a/costa/native/cta/src/cta_model_factory.c b/costa/native/cta/src/cta_model_factory.c new file mode 100644 index 000000000..dfbfe46dd --- /dev/null +++ b/costa/native/cta/src/cta_model_factory.c @@ -0,0 +1,983 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_model_factory.c $ +$Revision: 4029 $, $Date: 2013-06-13 16:30:19 +0200 (Thu, 13 Jun 2013) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "cta.h" + +#include "cta_string.h" +#include "cta_mem.h" +#include "cta_datatypes.h" +#include "ctai_xml.h" +#include "cta_handles.h" +#include "ctai_handles.h" +#include "f_cta_utils.h" +#include "cta_model.h" +#include "cta_errors.h" +#include "cta_f77lapack.h" +#include "cta_reltable.h" +#include "cta_par.h" +#include "cta_message.h" +#include "ctai.h" + +#if defined(_WIN32) || defined(_WIN64) + #define snprintf _snprintf + #define vsnprintf _vsnprintf + #define strcasecmp _stricmp + #define strncasecmp _strnicmp +#endif + + +#define MAX(a,b) (a>b ? a: b) +#define IDEBUG (1) + +#define CTA_MODEL_DEFINECLASS_F77 F77_CALL(cta_model_defineclass,CTA_MODEL_DEFINECLASS) + +#define CLASSNAME "CTA_Model_Factory" + +typedef struct { +CTA_Func functions[CTA_MODEL_NUMFUNC]; +char *implements; +char *parallel_type; +char *spawn_workers; +char *nproc; +char *ntimes; +char *dumProcs; +char *flag_barrier; +double t_step; +int nChildModels; /* Number of child models that are created (and still alive) from this class. */ +CTA_Model *ChildModels; /* Handles of all child models */ +int *ChildComputeIsBlocked; /* Flags indication which child models are blocked due to a compute over interval > t_step */ +} CTAI_ModelClass; + + +int CTAI_ModelFac_SetImplements(CTA_ModelClass hmodcl, char *implements){ + + CTAI_ModelClass *clsdata; + int retval; + + /* Get class data containing all function pointers */ + retval=CTA_Handle_Check((CTA_Handle) hmodcl,CTA_MODELCLASS); + if (retval!=CTA_OK) return retval; + retval=CTA_Handle_GetData((CTA_Handle) hmodcl,(void*) &clsdata); + if (retval!=CTA_OK) return retval; + + if (clsdata->implements) free(clsdata->implements); + if (implements) { + clsdata->implements=CTA_Malloc(sizeof(char)*(strlen(implements)+1)); + strcpy(clsdata->implements,implements); + } else { + clsdata->implements=CTA_Malloc(sizeof(char)*(strlen("unknown")+1)); + strcpy(clsdata->implements,"unknown"); + } + return CTA_OK; +} + +int CTAI_Model_DuplicateClass(CTA_ModelClass hmodcl, + CTA_ModelClass *hmodcl_dup){ + + CTAI_ModelClass *clsdata; + CTAI_ModelClass *clsdata_dup; + CTA_Func hfunc; + CTA_String sname; + char *implements, *parallel_type, *nproc, *ntimes, *dumProcs, *spawn_workers; + char *flag_barrier; + double t_step; + int nChildModels; + CTA_Model *ChildModels; + int *ChildComputeIsBlocked; + int retval; + int i; + + /* Get class data containing all function pointers */ + retval=CTA_Handle_Check((CTA_Handle) hmodcl,CTA_MODELCLASS); + if (retval!=CTA_OK) return retval; + retval=CTA_Handle_GetData((CTA_Handle) hmodcl,(void*) &clsdata); + if (retval!=CTA_OK) return retval; + + /* Allocate new ModelClass object */ + clsdata_dup=CTA_Malloc(sizeof(CTAI_ModelClass)); + clsdata_dup->implements = NULL; + clsdata_dup->parallel_type = NULL; + clsdata_dup->spawn_workers = NULL; + clsdata_dup->nproc = NULL; + clsdata_dup->ntimes = NULL; + clsdata_dup->dumProcs = NULL; + clsdata_dup->flag_barrier = NULL; + clsdata_dup->t_step = 0.0; + clsdata_dup->nChildModels = 0; + clsdata_dup->ChildModels = NULL; + clsdata_dup->ChildComputeIsBlocked = NULL; + + /* Duplicate all function pointers */ + for (i=0;ifunctions[i], &hfunc); + clsdata_dup->functions[i]=hfunc; + } + + /* Duplicate implements */ + implements=clsdata->implements; + if (implements) { + clsdata_dup->implements=CTA_Malloc(sizeof(char)*(strlen(implements)+1)); + strcpy(clsdata_dup->implements,implements); + } + + /* Duplicate parallel_type */ + parallel_type=clsdata->parallel_type; + if(parallel_type) { + clsdata_dup->parallel_type=CTA_Malloc(sizeof(char)*(strlen(parallel_type)+1)); + strcpy(clsdata_dup->parallel_type,parallel_type); + } + + /* Duplicate spawn_workers */ + spawn_workers=clsdata->spawn_workers; + if(spawn_workers) { + clsdata_dup->spawn_workers=CTA_Malloc(sizeof(char)*(strlen(spawn_workers)+1)); + strcpy(clsdata_dup->spawn_workers,spawn_workers); + } + + /* Duplicate nproc */ + nproc=clsdata->parallel_type; + if (nproc) { + clsdata_dup->nproc=CTA_Malloc(sizeof(char)*(strlen(nproc)+1)); + strcpy(clsdata_dup->nproc,nproc); + } + + /* Duplicate ntimes */ + ntimes=clsdata->ntimes; + if (ntimes) { + clsdata_dup->ntimes=CTA_Malloc(sizeof(char)*(strlen(ntimes)+1)); + strcpy(clsdata_dup->ntimes,ntimes); + } + + /* Duplicate dumProcs */ + dumProcs=clsdata->dumProcs; + if (dumProcs) { + clsdata_dup->dumProcs=CTA_Malloc(sizeof(char)*(strlen(dumProcs)+1)); + strcpy(clsdata_dup->dumProcs,dumProcs); + } + + /* Duplicate flag_barrier */ + flag_barrier=clsdata->flag_barrier; + if (flag_barrier) { + clsdata_dup->flag_barrier=CTA_Malloc(sizeof(char)*(strlen(flag_barrier)+1)); + strcpy(clsdata_dup->flag_barrier,flag_barrier); + } + + /* Duplicate t_step */ + t_step=clsdata->t_step; + clsdata_dup->t_step=t_step; + + /* Duplicate administration of child models */ + nChildModels = clsdata->nChildModels; + clsdata_dup->nChildModels=nChildModels; + + ChildModels = clsdata->ChildModels; + if (ChildModels) { + clsdata_dup->ChildModels = CTA_Malloc(nChildModels*sizeof(CTA_Model)); + memcpy(clsdata_dup->ChildModels, ChildModels, sizeof(clsdata_dup->ChildModels)); + } + + ChildComputeIsBlocked = clsdata->ChildComputeIsBlocked; + if (ChildComputeIsBlocked) { + clsdata_dup->ChildComputeIsBlocked = CTA_Malloc(nChildModels*sizeof(int)); + memcpy(clsdata_dup->ChildComputeIsBlocked, ChildComputeIsBlocked, sizeof(clsdata_dup->ChildComputeIsBlocked)); + } + + // Allocate new handle and return error when unsuccesfull + CTA_String_Create(&sname); + CTA_Handle_GetName(hmodcl, sname); + if (IDEBUG>0) printf("CTAI_Model_DuplicateClass: NAME OF MODEL CLASS IS %s\n", CTAI_String_GetPtr(sname)); + + + retval=CTA_Handle_Create(CTAI_String_GetPtr(sname), CTA_MODELCLASS, + clsdata_dup, hmodcl_dup); + CTA_String_Free(&sname); + return retval; +} + +/* Add new model instance to list of all models */ +int CTAI_ModelFac_AddModelInstance(CTA_ModelClass hmodcl, CTA_Model hmodel){ + + /* Local variables */ + CTAI_ModelClass *clsdata; /* Data associated to modelclass */ + int retval; /* Return value of COSTA call */ + int imodel; /* loop variable */ + + if (IDEBUG>0) printf("CTA_ModelFac_AddModelInstance: Start of function \n"); + + /* Get class data */ + retval=CTA_Handle_Check((CTA_Handle) hmodcl,CTA_MODELCLASS); + if (retval!=CTA_OK) return retval; + retval=CTA_Handle_GetData((CTA_Handle) hmodcl,(void*) &clsdata); + if (retval!=CTA_OK) return retval; + + // reallocate 2 arrays + initialisation + clsdata->nChildModels++; + + if (clsdata->ChildModels==NULL) { + clsdata->ChildModels = CTA_Malloc(clsdata->nChildModels*sizeof(CTA_Model)); + } else { + CTA_Model *tmp1 = CTA_Realloc(clsdata->ChildModels, clsdata->nChildModels*sizeof(CTA_Model)); + if (tmp1!=NULL) { + clsdata->ChildModels = tmp1; + }else{ + free(clsdata->ChildModels); + printf("error allocating"); + return 1; + } + } + + if (clsdata->ChildComputeIsBlocked==NULL) { + clsdata->ChildComputeIsBlocked = CTA_Malloc(clsdata->nChildModels*sizeof(int)); + } else { + CTA_Model *tmp2 = CTA_Realloc(clsdata->ChildComputeIsBlocked, clsdata->nChildModels*sizeof(int)); + if (tmp2!=NULL) { + clsdata->ChildComputeIsBlocked = tmp2; + } else{ + free(clsdata->ChildComputeIsBlocked); + printf("error allocating"); + return 1; + } + } + + clsdata->ChildModels[clsdata->nChildModels-1] = hmodel; + clsdata->ChildComputeIsBlocked[clsdata->nChildModels-1] = CTA_FALSE; + + if (IDEBUG>0) { + printf("number of Child models: %d \n", clsdata->nChildModels); + for (imodel=0;imodelnChildModels;imodel++){ + printf("i, hmodel,flag: %d, %d, %d \n",imodel,clsdata->ChildModels[imodel], + clsdata->ChildComputeIsBlocked[imodel]); + } + } + if (IDEBUG>0) printf("CTA_ModelFac_AddModelInstance: End of function \n"); + return CTA_OK; +} + +/* Remove model instance from list of all models */ +int CTAI_ModelFac_DelModelInstance(CTA_ModelClass hmodcl, CTA_Model hmodel){ + + /* Local variables */ + CTAI_ModelClass *clsdata; /* Data associated to modelclass */ + int retval; /* Return value of COSTA call */ + int imodel; /* loop variable */ + int ifound; /* flag indicating model was found in list */ + + if (IDEBUG>0) printf("CTA_ModelFac_DelModelInstance: Start of function \n"); + + /* Get class data */ + retval=CTA_Handle_Check((CTA_Handle) hmodcl,CTA_MODELCLASS); + if (retval!=CTA_OK) return retval; + retval=CTA_Handle_GetData((CTA_Handle) hmodcl,(void*) &clsdata); + if (retval!=CTA_OK) return retval; + + ifound = -1; + /* find index of model to be removed from list*/ + for (imodel=0;imodelnChildModels;imodel++){ + if (clsdata->ChildModels[imodel] == hmodel) { + ifound = imodel; + } + } + + /* overwrite information of this model */ + if (ifound>0) { + clsdata->nChildModels--; + for (imodel=ifound;imodelnChildModels;imodel++) { + clsdata->ChildModels[imodel] = clsdata->ChildModels[imodel+1]; + clsdata->ChildComputeIsBlocked[imodel] = clsdata->ChildComputeIsBlocked[imodel+1]; + } + } + if (IDEBUG>0) printf("CTA_ModelFac_DelModelInstance: End of function \n"); + return CTA_OK; +} + +#undef METHOD +#define METHOD "SetBlock" +/* Set block for own model if barrier falls within current compute interval */ +int CTAI_ModelFac_SetBlock(CTA_ModelClass hmodcl, CTA_Model hmodel){ + + /* Local variables */ + CTAI_ModelClass *clsdata; /* Data associated to modelclass */ + int retval; /* Return value of COSTA call */ + int imodel; /* loop variable */ + + if (IDEBUG>0) printf("CTA_ModelFac_SetBlock: Start of function \n"); + + /* Get class data */ + retval=CTA_Handle_Check((CTA_Handle) hmodcl,CTA_MODELCLASS); + if (retval!=CTA_OK) return retval; + retval=CTA_Handle_GetData((CTA_Handle) hmodcl,(void*) &clsdata); + if (retval!=CTA_OK) return retval; + + for (imodel=0;imodelnChildModels;imodel++){ + if (clsdata->ChildModels[imodel]==hmodel){ + clsdata->ChildComputeIsBlocked[imodel]=CTA_TRUE; + } + } + if (IDEBUG>0) printf("CTA_ModelFac_SetBlock: End of function \n"); + return CTA_OK; +} + +#undef METHOD +#define METHOD "GetBlock" +/* Return value of clsdata->ChildComputeIsBlocked[hmodel] */ +int CTAI_ModelFac_GetBlock(CTA_ModelClass hmodcl, CTA_Model hmodel){ + + /* Local variables */ + CTAI_ModelClass *clsdata; /* Data associated to modelclass */ + int retval; /* Return value of COSTA call */ + int imodel; /* loop variable */ + + if (IDEBUG>0) printf("CTA_ModelFac_GetBlock: Start of function \n"); + + /* Get class data */ + retval=CTA_Handle_Check((CTA_Handle) hmodcl,CTA_MODELCLASS); + if (retval!=CTA_OK) return retval; + retval=CTA_Handle_GetData((CTA_Handle) hmodcl,(void*) &clsdata); + if (retval!=CTA_OK) return retval; + + for (imodel=0;imodelnChildModels;imodel++){ + if (clsdata->ChildModels[imodel]==hmodel){ + return clsdata->ChildComputeIsBlocked[imodel]; + } + } + CTA_WRITE_ERROR("Model was not found in list of Child Models\n"); + return -1; +} + +#undef METHOD +#define METHOD "TimeStepAllModels" +int CTAI_ModelFac_TimeStepAllModels(CTA_ModelClass hmodcl, CTA_Function *function, double tstart, double tstop){ + + /* Local variables */ + CTAI_ModelClass *clsdata; /* Data associated to modelclass */ + CTA_Model hmodel; /* Handle of model */ + CTA_Time hstep; /* timespan to compute */ + double t1,t2; /* Begin and end time of hstep */ + int retval; /* Return value of COSTA call */ + int imodel,istep; /* loop counters */ + int nStep; /* number of sub-intervals to step */ + int nblock; /* number of models currently blocked */ + double tstep; /* maximum interval that model may run */ + + if (IDEBUG>0) printf("CTA_ModelFac_TimeStepAllModels: Start of function \n"); + + /* Get class data */ + retval=CTA_Handle_Check((CTA_Handle) hmodcl,CTA_MODELCLASS); + if (retval!=CTA_OK) return retval; + retval=CTA_Handle_GetData((CTA_Handle) hmodcl,(void*) &clsdata); + if (retval!=CTA_OK) return retval; + + /* Find number of blocked models */ + nblock = 0; + for (imodel = 0;imodel < clsdata->nChildModels;imodel++){ + nblock = nblock + clsdata->ChildComputeIsBlocked[imodel]; + } + if (nblock==clsdata->nChildModels) { + /* all child models are blocked*/ + if (IDEBUG>0) printf("CTA_ModelFac_TimeStepAllModels: All model instances are waiting\n"); + + /* determine number of barrier steps in compute interval */ + tstep = clsdata->t_step; + nStep=(int) ((tstop-tstart)/tstep + 0.5); + if (IDEBUG>0) printf("CTA_ModelFac_TimeStepAllModels: number of timesteps: nStep = %d\n",nStep); + + CTA_Time_Create(&hstep); + + /* loop over all barrier intervals */ + t1 = tstart; + for (istep=0; istep < nStep; istep++){ + t2 = MIN(t1 + tstep,tstop); + retval=CTA_Time_SetSpan(hstep,t1,t2); + if (IDEBUG>0) printf("CTA_ModelFac_TimeStepAllModels: iStep=%d of nStep=%d tstep=%f\n",istep, nStep,tstep); + if (IDEBUG>0) printf("CTA_ModelFac_TimeStepAllModels: tstart=%f tstop=%f\n",t1,t2); + + /* loop over all models */ + for (imodel = 0;imodel < clsdata->nChildModels;imodel++){ + if (IDEBUG>0) printf("model %d: from t=%f to t=%f\n",imodel,t1,t2); + + /* Check type of handle */ + hmodel = clsdata->ChildModels[imodel]; + retval=CTA_Handle_Check((CTA_Handle) hmodel,CTA_MODEL); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_model handle"); + return retval; + } + retval = CTAI_Model_PerformTimesteps(hmodel,function,hstep,CTA_FALSE); + } + t1 = t2; + } + + //reset barrier information for all child models + if (IDEBUG>0) {printf("CTA_ModelFac_TimeStepAllModels: barrier information is reset. \n");} + for (imodel = 0;imodel < clsdata->nChildModels;imodel++){ + clsdata->ChildComputeIsBlocked[imodel] = CTA_FALSE; + } + CTA_Time_Free(&hstep); + } + if (IDEBUG>0) {printf("CTA_ModelFac_TimeStepAllModels: End of function \n");} + return CTA_OK; +} + +int CTAI_ModelFac_SetBarrierData(CTA_ModelClass hmodcl, char *flag_barrier, char *t_step ) { + + CTAI_ModelClass *clsdata; /* Data associated to modelclass */ + int retval; /* Return value of COSTA call */ + double fac; /* multiply factor */ + double tval; /* help variable */ + + if (IDEBUG>0) { + printf("CTA_ModelFac_SetBarrierData: Start of function \n"); + printf("flag = %s, t_step = %s \n", flag_barrier, t_step); + } + + /* Get class data containing all function pointers */ + retval=CTA_Handle_Check((CTA_Handle) hmodcl,CTA_MODELCLASS); + if (retval!=CTA_OK) return retval; + retval=CTA_Handle_GetData((CTA_Handle) hmodcl,(void*) &clsdata); + if (retval!=CTA_OK) return retval; + + if (clsdata->flag_barrier) free(clsdata->flag_barrier); + + clsdata->flag_barrier = CTA_FALSE; + clsdata->t_step = 0.0; + + if (flag_barrier != NULL) { + if ( (0 == strcmp("true", flag_barrier)) && t_step ) { + /* Set flag_barrier */ + clsdata->flag_barrier=CTA_Malloc(sizeof(char)*(strlen(flag_barrier)+1)); + strcpy(clsdata->flag_barrier,flag_barrier); + + /* Set t_step. */ + /* Possibly remove units MJD/HOUR/MIN/SEC */ + fac = 1.0; + if (strlen(t_step)>4) { + if (0==strcasecmp( "HOUR",(t_step+strlen(t_step)-4))) { + t_step[strlen(t_step)-4]='\0'; + fac = 1.0/24; + } + } + if (strlen(t_step)>3) { + if (0==strcasecmp( "MJD",(t_step+strlen(t_step)-3))) { + t_step[strlen(t_step)-3]='\0'; + } + if (0==strcasecmp( "SEC",(t_step+strlen(t_step)-3))) { + t_step[strlen(t_step)-3]='\0'; + fac = 1.0/(24*60*60); + } + if (0==strcasecmp( "MIN",(t_step+strlen(t_step)-3))) { + t_step[strlen(t_step)-3]='\0'; + fac = 1.0/(24*60); + } + } + tval = atof(t_step); + clsdata->t_step=fac*tval; + } else { + if (0 == strcmp("false", flag_barrier)) { + /* all ok no barrier info is specified */ + } else { + CTA_WRITE_ERROR("Error: Incomplete specification of barrier information.\n"); + exit(-1); + } + } + } + if (IDEBUG>0) { + printf("CTA_ModelFac_SetBarrierData: End of function \n"); + printf("clsdata->flag_barrier = %s, clsdata->t_step = %f \n", clsdata->flag_barrier, clsdata->t_step); + } + return CTA_OK; +} + +int CTAI_ModelFac_SetParallelData(CTA_ModelClass hmodcl, char *implements, char *parallel_type, char *spawn_workers, char *nproc, char *ntimes, char *dumProcs ){ + + CTAI_ModelClass *clsdata; + int retval; + + /* Get class data containing all function pointers */ + retval=CTA_Handle_Check((CTA_Handle) hmodcl,CTA_MODELCLASS); + if (retval!=CTA_OK) return retval; + retval=CTA_Handle_GetData((CTA_Handle) hmodcl,(void*) &clsdata); + if (retval!=CTA_OK) return retval; + + if (clsdata->parallel_type) free(clsdata->parallel_type); + if (clsdata->spawn_workers) free(clsdata->spawn_workers); + if (clsdata->nproc) free(clsdata->nproc); + if (clsdata->ntimes) free(clsdata->ntimes); + if (clsdata->implements) free(clsdata->implements); + if (clsdata->dumProcs) free(clsdata->dumProcs); + + clsdata->parallel_type =NULL; + clsdata->spawn_workers =NULL; + clsdata->nproc =NULL; + clsdata->ntimes =NULL; + clsdata->implements =NULL; + clsdata->dumProcs =NULL; + + /* Set implements */ + retval=CTAI_ModelFac_SetImplements(hmodcl, implements); + +// printf("%p, %p, %p, %p\n",parallel_type, nproc, ntimes, implements); + if (parallel_type && nproc && ntimes && implements) { + + /* Set parallel_type */ + clsdata->parallel_type=CTA_Malloc(sizeof(char)*(strlen(parallel_type)+1)); + strcpy(clsdata->parallel_type,parallel_type); + + /* Set spawn_workers */ + clsdata->spawn_workers=CTA_Malloc(sizeof(char)*(strlen(spawn_workers)+1)); + strcpy(clsdata->spawn_workers,spawn_workers); + + /* Set nproc */ + clsdata->nproc=CTA_Malloc(sizeof(char)*(strlen(nproc)+1)); + strcpy(clsdata->nproc,nproc); + + /* Set ntimes */ + clsdata->ntimes=CTA_Malloc(sizeof(char)*(strlen(ntimes)+1)); + strcpy(clsdata->ntimes,ntimes); + + if (dumProcs){ + clsdata->dumProcs=CTA_Malloc(sizeof(char)*(strlen(dumProcs)+1)); + strcpy(clsdata->dumProcs,dumProcs); + } + } else if (!parallel_type && !nproc && !ntimes) { + /* all ok no parallel info is specified */ + } else { + printf("Error: Incomplete specification of parallel information of model_factory\n"); + printf("All or none of the fields 'parallel_type', 'nproc', 'ntimes' must be specified in the input\n"); + printf("specified are:\n"); + if (parallel_type) printf("parallel_type='%s'\n",parallel_type); + if (spawn_workers) printf("spawn_workers='%s'\n",spawn_workers); + if (nproc) printf("nproc ='%s'\n",nproc); + if (ntimes) printf("ntimes ='%s'\n",ntimes); + if (implements) printf("implements ='%s'\n",implements); + exit(-1); + } + + return CTA_OK; + +} + +int CTAI_ModelFac_GetParallelData(CTA_ModelClass hmodcl, char **implements, char **parallel_type, char **spawn_workers, char **nproc, char **ntimes, char **dumProcs ){ + + CTAI_ModelClass *clsdata; + int retval; + + /* Get class data containing all function pointers */ + retval=CTA_Handle_Check((CTA_Handle) hmodcl,CTA_MODELCLASS); + if (retval!=CTA_OK) return retval; + retval=CTA_Handle_GetData((CTA_Handle) hmodcl,(void*) &clsdata); + if (retval!=CTA_OK) return retval; + + *parallel_type = clsdata->parallel_type; + *spawn_workers = clsdata->spawn_workers; + *nproc = clsdata->nproc; + *ntimes = clsdata->ntimes; + *implements = clsdata->implements; + *dumProcs = clsdata->dumProcs; + + return CTA_OK; + +} + +int CTAI_ModelFac_GetBarrierData(CTA_ModelClass hmodcl, char **flag_barrier, double *t_step ) { + + CTAI_ModelClass *clsdata; + int retval; + + /* Get class data containing all function pointers */ + retval=CTA_Handle_Check((CTA_Handle) hmodcl,CTA_MODELCLASS); + if (retval!=CTA_OK) return retval; + + retval=CTA_Handle_GetData((CTA_Handle) hmodcl,(void*) &clsdata); + if (retval!=CTA_OK) return retval; + + *flag_barrier = clsdata->flag_barrier; + *t_step = clsdata->t_step; + + if (IDEBUG>0) { + printf("CTA_ModelFac_GetBarrierData: flag_barrier = %s, t_step = %f \n", *flag_barrier, *t_step); + } + return CTA_OK; + +} + +const char *CTAI_ModelFac_GetImplements(CTA_ModelClass hmodcl){ + + CTAI_ModelClass *clsdata; + int retval; + + /* Get class data containing all function pointers */ + retval=CTA_Handle_Check((CTA_Handle) hmodcl,CTA_MODELCLASS); + if (retval!=CTA_OK) return NULL; + retval=CTA_Handle_GetData((CTA_Handle) hmodcl,(void*) &clsdata); + if (retval!=CTA_OK) return NULL; + + return clsdata->implements; +} + +#undef METHOD +#define METHOD "New" +int CTA_ModelFactory_New(const char *fName, CTA_ModelClass *modelClass){ + + int retval; + CTA_String sFile; + + CTA_Tree hTree; + CTA_ModelClass modelClassFromTree; + + /* Read the model configuration file */ + if (IDEBUG>0) {printf("CTA_ModelFactory_New: Start of function \n");} + if (IDEBUG>0) {printf("CTA_ModelFactory_New: name of configuration file: %s\n",fName);} + retval=CTA_String_Create(&sFile); + retval=CTA_String_Set(sFile,fName); + retval=CTA_XML_Read(sFile, &hTree); + retval=CTA_String_Free(&sFile); + + /* Get model class from tree */ + if (IDEBUG>0) {printf("CTA_ModelFactory_New: Get the modelclass from the tree \n");} + retval=CTA_Tree_GetItem(hTree,1,&modelClassFromTree); + if (retval!=CTA_OK){ + char message[1024]; + sprintf(message,"CTA_ModelFactory_New cannot get first item from input-tree \n"); + CTA_WRITE_ERROR(message); + return retval; + } + + /* Duplicate the modelclass since it will be deleted + when the tree is deleted*/ + if (IDEBUG>0) {printf("CTA_ModelFactory_New: Duplicate this model class \n");} + retval=CTAI_Model_DuplicateClass(modelClassFromTree, modelClass); + if (retval!=CTA_OK){ + char message[1024]; + sprintf(message,"CTA_ModelFactory_New cannot duplicate class from input-file\n"); + CTA_WRITE_ERROR(message); + return retval; + } + /* Delete the input-tree */ + if (IDEBUG>0) {printf("CTA_ModelFactory_New: Delete input tree \n");} + retval=CTA_Tree_Free(&hTree); + if (retval!=CTA_OK){ + char message[1024]; + sprintf(message,"CTA_ModelFactory_New cannot free input tree\n"); + CTA_WRITE_ERROR(message); + return retval; + } + + if (IDEBUG>0) {printf("CTA_ModelFactory_New: end of function class handle is %d\n",*modelClass);} + return CTA_OK; +} + +int CTA_Model_DefineClass( + const char *name, + const CTA_Func h_func[CTA_MODEL_NUMFUNC], + CTA_ModelClass *hmodcl + ){ + + CTAI_ModelClass *data; + int retval; + int i; + + /* Allocate new Vector object */ + data=CTA_Malloc(sizeof(CTAI_ModelClass)); + data->implements = NULL; + data->parallel_type = NULL; + data->spawn_workers = NULL; + data->nproc = NULL; + data->ntimes = NULL; + data->dumProcs = NULL; + data->flag_barrier = NULL; + data->t_step = 0.0; + data->nChildModels = 0; + data->ChildModels = NULL; + data->ChildComputeIsBlocked = NULL; + + for (i=0;ifunctions[i]=h_func[i]; + } + data->implements=CTA_Malloc(sizeof(char)*(strlen("unknown")+1)); + strcpy(data->implements,"unknown"); + + // Allocate new handle and return eror when unsuccesfull + retval=CTA_Handle_Create(name,CTA_MODELCLASS,data,hmodcl); + return retval; +} + + +int CTA_Model_DefineClass2( + const char *name, + const char *implements, + const CTA_Func h_func[CTA_MODEL_NUMFUNC], + CTA_ModelClass *hmodcl + ){ + + CTAI_ModelClass *data; + int retval; + int i; + + if (IDEBUG>0) printf("CTA_Model_DefineClass2\n"); + + /* Allocate new Vector object */ + data=CTA_Malloc(sizeof(CTAI_ModelClass)); + data->implements = NULL; + data->parallel_type = NULL; + data->spawn_workers = NULL; + data->nproc = NULL; + data->ntimes = NULL; + data->dumProcs = NULL; + data->flag_barrier = NULL; + data->t_step = 0.0; + data->nChildModels = 0; + data->ChildModels = NULL; + data->ChildComputeIsBlocked = NULL; + + for (i=0;ifunctions[i]=h_func[i]; + } + + /* Implements model */ + if (implements) { + data->implements=CTA_Malloc(sizeof(char)*(strlen(implements)+1)); + strcpy(data->implements,implements); + } else { + data->implements=CTA_Malloc(sizeof(char)*(strlen("unknown")+1)); + strcpy(data->implements,"unknown"); + } + + // Allocate new handle and return error when unsuccesfull + retval=CTA_Handle_Create(name,CTA_MODELCLASS,data,hmodcl); + return retval; +} + + +/* Interfacing with Fortran */ +CTAEXPORT void CTA_MODEL_DEFINECLASS_F77(char *name, int *h_func, int *hmodcl, int *ierr, int len_name){ + char *c_name; + + /* create a c-string equivalent to name */ + c_name=CTA_Malloc((len_name+1)*sizeof(char)); + CTA_fstr2cstr(name,c_name,len_name); + + *ierr=CTA_Model_DefineClass(c_name, (CTA_Func*) h_func, (CTA_ModelClass*) hmodcl); + free(c_name); +} + + +/** \brief Create a COSTA modell class from XML +* (load from methods from dynamic load library). +* +* \param cur_node I Current XML node +* \return Handle to create or CTA_NULL in case of an error. +*/ +CTA_ModelClass CTAI_XML_CreateModelClass(xmlNode *cur_node) { + + CTA_Func hfunc; /* the new function */ + xmlChar *id = NULL; /* id of function in XML-tree */ + xmlChar *clsname = NULL; /* (lookup) name of the model class */ + xmlChar *flag_barrier = NULL; /* flag indicating that model cannot be advanced unlimited*/ + xmlChar *t_step = NULL; /* maximum number of time-steps that model may perform */ + xmlChar *implements = NULL; /* Name of model that is implemented */ + xmlChar *parallel_type = NULL; /* Type of parallelism that is used */ + xmlChar *spawn_workers = NULL; /* Spawn worker processes (1) yes/ (0) no */ + xmlChar *nproc = NULL; /* number of processes for a single instance */ + xmlChar *ntimes = NULL; /* number of times the workers will be assinged to processors */ + xmlChar *dumProcs = NULL; /* ranks of the dummy processes in the worker-worker model */ + int retval; /* return status of creation */ + CTA_Func h_func[CTA_MODEL_NUMFUNC]; /* List of functions */ + xmlNode *func_node = NULL; /* values of children nodes */ + int i; + CTA_ModelClass hmodcl; /* function class */ + CTA_ModelClass hmodcl_old; + const char *name; + CTA_String sclsname; + + if (IDEBUG>0) printf("CTAI_XML_CreateModelClass: Start of function\n"); + /* Parse this node's attributes */ + /* Get id */ + id = xmlGetProp(cur_node, CTAI_XML_ID); + + /* Get class name */ + clsname = xmlGetProp(cur_node, CTAI_XML_NAME); + + /* Get implements_model */ + implements = xmlGetProp(cur_node, CTAI_XML_IMPLEMENTS); + + /* parallel options: */ + /*-number of processes */ + nproc = xmlGetProp(cur_node, CTAI_XML_NPROC); + + /*-type of parallel run */ + parallel_type = xmlGetProp(cur_node, CTAI_XML_PARALLEL_TYPE); + + /*-type of parallel run */ + spawn_workers = xmlGetProp(cur_node, CTAI_XML_SPAWN_WORKERS); + + /*-Number of times the parallel group must be created */ + ntimes = xmlGetProp(cur_node, CTAI_XML_NTIMES); + + /*-Process numbers (ranks) of dummy processes */ + dumProcs = xmlGetProp(cur_node, CTAI_XML_DUMPROCS); + + /* Flag for barrier moment */ + flag_barrier = xmlGetProp(cur_node, CTAI_XML_FLAG_BARRIER); + + /* Get T_step */ + t_step = xmlGetProp(cur_node, CTAI_XML_T_STEP); + + /* Check whether this is a known model-class */ + CTA_String_Create(&sclsname); + + CTA_String_Set(sclsname,(char *) clsname); + + retval=CTA_Handle_Find(sclsname, CTA_MODELCLASS, &hmodcl_old); + + CTA_String_Free(&sclsname); + + if (retval==CTA_OK) { + /* duplicate model class */ + CTAI_Model_DuplicateClass(hmodcl_old, &hmodcl); + + /* Set implements and any parallel information set for this model? */ + CTAI_ModelFac_SetParallelData(hmodcl, (char *) implements, (char *) parallel_type, (char *) spawn_workers, (char *) nproc, (char *) ntimes, (char *) dumProcs); + + /* model-class is known */ + if (IDEBUG>0) printf("CTAI_XML_CreateModelClass: found modelclass '%s'\n",clsname); + if (IDEBUG>0) printf("CTAI_XML_CreateModelClass: duplication is created \n"); + + } else { + /* Set all function handles to CTA_NULL */ + for (i=0;ichildren; func_node; + func_node = func_node->next) { + if (0 == strcmp("CTA_FUNCTION", (char *) func_node->name)){ + hfunc=CTAI_XML_CreateFunc(func_node); + if (hfunc!=CTA_NULL){ + name = CTAI_Handle_GetName(hfunc); + if (0 == strcmp("CTA_MODEL_CREATE_SIZE", name)){ + h_func[I_CTA_MODEL_CREATE_SIZE ]=hfunc; + } else if (0 == strcmp("CTA_MODEL_CREATE_INIT", name)){ + h_func[I_CTA_MODEL_CREATE_INIT ]=hfunc; + } else if (0 == strcmp("CTA_MODEL_FREE", name)){ + h_func[I_CTA_MODEL_FREE ]=hfunc; + } else if (0 == strcmp("CTA_MODEL_COMPUTE", name)){ + h_func[I_CTA_MODEL_COMPUTE ]=hfunc; + } else if (0 == strcmp("CTA_MODEL_SET_STATE", name)){ + h_func[I_CTA_MODEL_SET_STATE ]=hfunc; + } else if (0 == strcmp("CTA_MODEL_GET_STATE", name)){ + h_func[I_CTA_MODEL_GET_STATE ]=hfunc; + } else if (0 == strcmp("CTA_MODEL_AXPY_STATE", name)){ + h_func[CTA_MODEL_AXPY_STATE ]=hfunc; + } else if (0 == strcmp("CTA_MODEL_AXPY_MODEL", name)){ + h_func[CTA_MODEL_AXPY_MODEL ]=hfunc; + } else if (0 == strcmp("CTA_MODEL_SET_FORC", name)){ + h_func[CTA_MODEL_SET_FORC ]=hfunc; + } else if (0 == strcmp("CTA_MODEL_GET_FORC", name)){ + h_func[CTA_MODEL_GET_FORC ]=hfunc; + } else if (0 == strcmp("CTA_MODEL_AXPY_FORC", name)){ + h_func[CTA_MODEL_AXPY_FORC ]=hfunc; + } else if (0 == strcmp("CTA_MODEL_SET_PARAM", name)){ + h_func[CTA_MODEL_SET_PARAM ]=hfunc; + } else if (0 == strcmp("CTA_MODEL_GET_PARAM", name)){ + h_func[CTA_MODEL_GET_PARAM ]=hfunc; + } else if (0 == strcmp("CTA_MODEL_AXPY_PARAM", name)){ + h_func[CTA_MODEL_AXPY_PARAM ]=hfunc; + } else if (0 == strcmp("CTA_MODEL_GET_NOISE_COUNT", name)){ + h_func[CTA_MODEL_GET_NOISE_COUNT]=hfunc; + } else if (0 == strcmp("CTA_MODEL_GET_NOISE_COVAR", name)){ + h_func[CTA_MODEL_GET_NOISE_COVAR]=hfunc; + } else if (0 == strcmp("CTA_MODEL_GET_OBSVALUES", name)){ + h_func[CTA_MODEL_GET_OBSVALUES ]=hfunc; + } else if (0 == strcmp("CTA_MODEL_ANNOUNCE_OBSVALUES", name)){ + h_func[CTA_MODEL_ANNOUNCE_OBSVALUES ]=hfunc; + } else if (0 == strcmp("CTA_MODEL_GET_OBSSELECT", name)){ + h_func[CTA_MODEL_GET_OBSSELECT ]=hfunc; + } else if (0 == strcmp("CTA_MODEL_ADD_NOISE", name)){ + h_func[CTA_MODEL_ADD_NOISE ]=hfunc; + } else if (0 == strcmp("CTA_MODEL_IMPORT", name)){ + h_func[I_CTA_MODEL_IMPORT ]=hfunc; + } else if (0 == strcmp("CTA_MODEL_EXPORT", name)){ + h_func[I_CTA_MODEL_EXPORT ]=hfunc; + } else if (0 == strcmp("CTA_MODEL_GET_STATESCALING", name)){ + h_func[CTA_MODEL_GET_STATESCALING]=hfunc; + } else if (0 == strcmp("CTA_MODEL_GETTIMEHORIZON", name)){ + h_func[CTA_MODEL_GET_TIMEHORIZON]=hfunc; + } else if (0 == strcmp("CTA_MODEL_GETCURRENTTIME", name)){ + h_func[CTA_MODEL_GET_CURRENTTIME]=hfunc; + } else if (0 == strcmp("CTA_MODEL_GETOBSLOCALIZATION", name)){ + h_func[I_CTA_MODEL_GETOBSLOCALIZATION]=hfunc; + } else if (0 == strcmp("CTA_MODEL_SAVEINTERNALSTATE", name)){ + h_func[CTA_MODEL_SAVE_INTERNALSTATE]=hfunc; + } else if (0 == strcmp("CTA_MODEL_RESTOREINTERNALSTATE", name)){ + h_func[CTA_MODEL_RESTORE_INTERNALSTATE]=hfunc; + } else if (0 == strcmp("CTA_MODEL_RELEASEINTERNALSTATE", name)){ + h_func[CTA_MODEL_RELEASE_INTERNALSTATE]=hfunc; + } else if (0 == strcmp("CTA_MODEL_SAVEPERSISTENTSTATE", name)){ + h_func[CTA_MODEL_SAVE_PERSISTENTSTATE]=hfunc; + } else if (0 == strcmp("CTA_MODEL_LOADPERSISTENTSTATE", name)){ + h_func[CTA_MODEL_LOAD_PERSISTENTSTATE]=hfunc; + } else if (0 == strcmp("CTA_MODEL_GET_NUMDOMAINS", name)){ + h_func[CTA_MODEL_GET_NUMDOMAINS]=hfunc; + } else if (0 == strcmp("CTA_MODEL_GET_OBSSELECTOR", name)){ + h_func[CTA_MODEL_GET_OBSSELECTOR]=hfunc; + } else if (0 == strcmp("CTA_MODEL_GET_OBSLOCALIZATIONDOMAIN", name)){ + h_func[CTA_MODEL_GET_OBSLOCALIZATIONDOMAIN]=hfunc; + } else if (0 == strcmp("CTA_MODEL_GET_STATEDOMAIN", name)){ + h_func[CTA_MODEL_GET_STATEDOMAIN]=hfunc; + } else if (0 == strcmp("CTA_MODEL_AXPY_STATEDOMAIN", name)){ + h_func[CTA_MODEL_AXPY_STATEDOMAIN]=hfunc; + } else { + printf("CTAI_XML_CreateModelClass :Warning found unknown node %s\n", name); + } + } + } + } + /* Create new function class */ + retval=CTA_Model_DefineClass2((char *) clsname, (char *) implements, h_func, &hmodcl); + /* Set barrier information */ + CTAI_ModelFac_SetBarrierData(hmodcl, (char *) flag_barrier, (char *) t_step); + /* Set parallel info */ + CTAI_ModelFac_SetParallelData(hmodcl, (char *) implements, (char *) parallel_type, (char *) spawn_workers, (char *) nproc, (char *) ntimes, (char *) dumProcs); + + /* Set id (=name) of handle */ + CTAI_Handle_SetName(hmodcl, (char *) id); + + } + CTAI_Handle_SetName(hmodcl, (char *) id); + xmlFree(id); + xmlFree(clsname); + xmlFree(implements); + xmlFree(parallel_type); + xmlFree(spawn_workers); + xmlFree(nproc); + xmlFree(ntimes); + xmlFree(dumProcs); + xmlFree(flag_barrier); + xmlFree(t_step); + + if (IDEBUG>0) printf("CTAI_XML_CreateModelClass: End of function\n"); + return hmodcl; +} + + + diff --git a/costa/native/cta/src/cta_model_utilities.c b/costa/native/cta/src/cta_model_utilities.c new file mode 100644 index 000000000..3fef1b1f1 --- /dev/null +++ b/costa/native/cta/src/cta_model_utilities.c @@ -0,0 +1,78 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_model_utilities.c $ +$Revision: 2288 $, $Date: 2011-05-11 16:18:21 +0200 (Wed, 11 May 2011) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +#include +#include +#include "f_cta_utils.h" +#include "cta_errors.h" +#include "cta_model_utilities.h" +#include "cta_xml.h" +#include "cta_message.h" + +#define CTA_MODEL_UTIL_INPUTTREE_F77 F77_CALL(cta_model_util_inputtree,CTA_MODEL_UTIL_INPUTTREE) + +#define CLASSNAME "CTA_Model_Utilities" +/* Several utility routines that can be of usage when creating COSTA model components */ + +#undef METHOD +#define METHOD "Util_Inputtree" +int CTA_Model_Util_InputTree(CTA_Handle hinput, CTA_Tree *tinput, int *cleanup){ + int ierr; /*Return value of a costa method */ + int retval; + CTA_Handle dt; + + /* check handle it must be a tree or an input-filename */ + retval = CTA_Handle_GetDatatype(hinput, &dt); + if (retval != CTA_OK) {return retval; + } else { + if (dt == CTA_TREE) { + + //printf("ik denk dat dit een boom is \n"); + /* It is a tree only copy handle of root of tree */ + *tinput=hinput; + *cleanup=CTA_FALSE; + return CTA_OK; + } else if (dt == CTA_STRING) { + /* It is a string. Parse input-file with this name */ + // printf("ik denk dat dit een cta-string is \n"); + ierr=CTA_XML_Read(hinput, tinput); + if (ierr!=CTA_OK) { + CTA_WRITE_ERROR("xml_read failed"); + return ierr; + } + *cleanup=CTA_TRUE; + return CTA_OK; + } else { + /* It is nor a tree nor a string */ + char message[1024]; + sprintf(message,"Input is neither tree nor string! \n"); + CTA_WRITE_ERROR(message); + return CTA_INPUT_OBJECT_NOT_SUPPORTED; + } + } +} + +/* Interfacing with Fortran */ + +CTAEXPORT void CTA_MODEL_UTIL_INPUTTREE_F77(int *hinput, int *tinput, int *cleanup, int *ierr){ + *ierr= CTA_Model_Util_InputTree((CTA_Handle) *hinput, (CTA_Tree*) tinput, cleanup); +} + diff --git a/costa/native/cta/src/cta_obsdescr.c b/costa/native/cta/src/cta_obsdescr.c new file mode 100644 index 000000000..39f963b92 --- /dev/null +++ b/costa/native/cta/src/cta_obsdescr.c @@ -0,0 +1,576 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_obsdescr.c $ +$Revision: 3730 $, $Date: 2012-12-19 11:07:29 +0100 (Wed, 19 Dec 2012) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include +#include "f_cta_utils.h" +#include "cta_mem.h" +#include "cta_sobs.h" +#include "cta_obsdescr.h" +#include "cta_errors.h" +#include "cta_handles.h" +#include "ctai.h" +#include "cta_message.h" + +#define CTA_OBSDESCR_DEFINECLASS_F77 F77_CALL(cta_obsdescr_defineclass,CTA_OBSDESCR_DEFINECLASS) +#define CTA_OBSDESCR_CREATE_F77 F77_CALL(cta_obsdescr_create,CTA_OBSDESCR_CREATE) +#define CTA_OBSDESCR_CHECK_SOBS_F77 F77_CALL(cta_obsdescr_check_sobs,CTA_OBSDESCR_CHECK_SOBS) +#define CTA_OBSDESCR_GET_VALUEPROPERTIES_F77 F77_CALL(cta_obsdescr_get_valueproperties,CTA_OBSDESCR_GET_VALUEPROPERTIES) +#define CTA_OBSDESCR_GET_PROPERTY_KEYS_F77 F77_CALL(cta_obsdescr_get_propertykeys,CTA_OBSDESCR_GET_PROPERTYKEYS) +#define CTA_OBSDESCR_PROPERTY_COUNT_F77 F77_CALL(cta_obsdescr_property_count,CTA_OBSDESCR_PROPERTY_COUNT) +#define CTA_OBSDESCR_OBSERVATION_COUNT_F77 F77_CALL(cta_obsdescr_observation_count,CTA_OBSDESCR_OBSERVATION_COUNT) +#define CTA_OBSDESCR_EXPORT_F77 F77_CALL(cta_obsdescr_export,CTA_OBSDESCR_EXPORT) +#define CTA_OBSDESCR_FREE_F77 F77_CALL(cta_obsdescr_free,CTA_OBSDESCR_FREE) + +#define CLASSNAME "CTA_Obsdescr" + +/* Struct holding all data associated to an COSTA Vector */ +typedef struct { +CTA_Func functions[I_CTA_OBSDESCR_NUMFUNC]; +} CTAI_ObsDescrClass; // A ObsDescrClass contains a list of the member-functions + + +typedef struct { +CTA_Func functions[I_CTA_OBSDESCR_NUMFUNC]; // See cta_obsdescr.h for a list of + // available stochobs-functions +CTA_ObsDescrClass hdescrcl; // ObsDescr-class +void *data; // pointer to the implementation-specific data. +//CTA_StochObs hsobs; // The observer of which this is a description +} CTAI_ObsDescr; + + + +#undef METHOD +#define METHOD "DefineClass" +int CTA_ObsDescr_DefineClass( + // INPUTS: + const char *name, // Name of the new stochobs class + const CTA_Func h_func[I_CTA_OBSDESCR_NUMFUNC], // function handles to + // the implementations of the + // stochobs-class' functions. + // OUTPUTS: + CTA_ObsDescrClass *hdescrcl) // The (handle to) the new + // observation descriptor-class +{ + + CTAI_ObsDescrClass *data; + int retval; + int i; + + /* Allocate new ObsDescr object */ + data=CTA_Malloc(sizeof(CTAI_ObsDescrClass)); + + for (i=0;ifunctions[i]=h_func[i];} + + // Allocate new handle + retval=CTA_Handle_Create(name,CTA_OBSDESCRCLASS,data,hdescrcl); + retval=CTA_Handle_GetData((CTA_Handle) *hdescrcl,(void**) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + // return error when unsuccesfull + return retval; +} + + +int CTAI_ObsDescr_member_function( + // INPUTS + CTA_ObsDescr hdescr, /* Handle of the stochastic observer of + which a member function is wanted */ + int member, /* Code of the member function */ + // OUTPUT + CTAI_ObsDescr **descr, /* All data of obs-descr hdescr */ + CTA_Function **memfun /* Member-Function pointer */ +) +{ + int retval; + + if (hdescr==CTA_NULL){ + printf("CTAI_ObsDescr_member_function WARNING NULL handle\n"); + } + /* Check that the given handle is indeed an observation description */ + retval=CTA_Handle_Check((CTA_Handle) hdescr, CTA_OBSDESCR); + if (retval!=CTA_OK) return retval; + + /* Get pointer to struct with observation description data */ + retval=CTA_Handle_GetData((CTA_Handle) hdescr,(void**) descr); + if (retval!=CTA_OK) return retval; + + /* Get pointer to implementation of this function */ + retval=CTA_Func_GetFunc((*descr)->functions[member],memfun); + return retval; +} + + +#undef METHOD +#define METHOD "Create" +int CTA_ObsDescr_Create( + // INPUTS: + CTA_ObsDescrClass hdescrcl, // class of observer hsobs + CTA_Handle usrdat, // Data of the stochastic observer for which + // a descriptor is to be created + // OUTPUTS: + CTA_ObsDescr *hdescr) // The new COSTA-stochastic observer + // (handle) +{ + CTAI_ObsDescr *descr; + int memsize; + int retval; + CTAI_ObsDescrClass *clsdata; + CTA_Function *my_Create_Size, *my_Create_Init; + int i; + + /* Get class data containing all function pointers */ + retval=CTA_Handle_Check((CTA_Handle) hdescrcl,CTA_OBSDESCRCLASS); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a CTA_OBSDESCRCLASS handle"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) hdescrcl,(void**) &clsdata); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + /* determine size of data object (CTA_OBSDESCR_CREATE_SIZE)*/ + retval=CTA_Func_GetFunc(clsdata->functions[I_CTA_OBSDESCR_CREATE_SIZE], + &my_Create_Size); + + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function CTA_OBSDESCR_CREATE_SIZE"); + return retval; + } + + my_Create_Size(&memsize,&retval); + if (retval) { + CTA_WRITE_ERROR("Error in Create_Size"); + return retval; + } + + /* allocate memory for new observation description object */ + descr=CTA_Malloc(sizeof(CTAI_ObsDescr)); + descr->data=CTA_Malloc(memsize); + + /* copy function pointers */ + for (i=0;ifunctions[i]=clsdata->functions[i]; + } + + /* set other general information */ + descr->hdescrcl=hdescrcl; +// descr->hsobs=hsobs; + + /* Initialise and fill new obsdescr */ + retval=CTA_Func_GetFunc(clsdata->functions[I_CTA_OBSDESCR_CREATE_INIT], + &my_Create_Init); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function CTA_OBSDESCR_CREATE_INIT"); + return retval; + } + + /* Allocate new handle and return error when unsuccesfull */ + retval=CTA_Handle_Create("obsdescr",CTA_OBSDESCR,descr,hdescr); + if (retval) { + CTA_WRITE_ERROR("Cannot create handle"); + return retval; + } + + my_Create_Init(hdescr, descr->data, &usrdat, &retval); + if (retval!=CTA_OK) { + printf("error in create init: %d \n",retval); + CTA_WRITE_ERROR("Error in Create Init"); + return retval; + } + + return CTA_OK; +} + +#undef METHOD +#define METHOD "CreateSel" +int CTA_ObsDescr_CreateSel( CTA_StochObs hobsdescr, CTA_String selection, + CTA_RelTable reltab, CTA_StochObs *hobsdescrout) +{ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_ObsDescr* sobsin; /* All data of observer hsobsin */ + CTAI_ObsDescr* sobsout; /* All data of observer hsobsout */ + CTA_Function *memfun; /* Function that must be called */ + CTA_Function *my_Create_Size; /* Function for determining mem-block */ + int memsize,i; + + /* Look up the Create-size function and the data of the input observer*/ + retval=CTA_Handle_Check((CTA_Handle) hobsdescr,CTA_OBSDESCR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_obsdescr handle"); + return retval; + } + + /* Get the create-size member function */ + retval = CTA_Handle_GetData(hobsdescr,(void **) &sobsin); + retval = CTA_Func_GetFunc(sobsin->functions[I_CTA_OBSDESCR_CREATE_SIZE], + &my_Create_Size); + + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function CTA_OBSDESCR_CREATE_SIZE"); + return retval; + } + + my_Create_Size(&memsize,&retval); + if (retval) { + CTA_WRITE_ERROR("Error in Create_Size"); + return retval; + } + + sobsout=CTA_Malloc(sizeof(CTAI_ObsDescr)); + sobsout->data=CTA_Malloc(memsize); + + /* copy function pointers and observation class handle */ + for (i=0;ifunctions[i]=sobsin->functions[i]; } + + + /* Look up member function and observer data */ + retval = CTA_Func_GetFunc(sobsin->functions[I_CTA_OBSDESCR_SELECTION], + &memfun); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function CTA_OBSDESCR_SELECTION"); + return retval; + } + + /* Allocate new handle and return error when unsuccesfull */ + retval=CTA_Handle_Create("obsdescr",CTA_OBSDESCR,sobsout,hobsdescrout); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot create handle"); + return retval; + } + + /* Call (user) implementation */ + memfun(sobsin->data,&selection,&reltab, hobsdescrout, sobsout->data,&retval); + + return retval; +}; + +#undef METHOD +#define METHOD "CreateTimeSe1" +int CTA_ObsDescr_CreateTimSel(CTA_ObsDescr hobsdescr, CTA_Time timespan, + CTA_RelTable reltab, CTA_ObsDescr *hobsdescrout) +{ + int ierr; + char str[80]; + double t1,t2; + double eps; + + CTA_String sselect; + + + *hobsdescrout=CTA_NULL; + if (hobsdescr!=CTA_NULL){ + // Get interval of timespan (t1,t2) + ierr=CTA_Time_GetSpan(timespan,&t1,&t2); + if (ierr!=CTA_OK) { + char message[1024]; + sprintf(message,"Cannot get interval of timespan (%g,%g)",t1,t2); + CTA_WRITE_ERROR(message); + return ierr; + } + // Create the Selection string (SQL-statement) + ierr=CTA_String_Create(&sselect); + if (ierr!=CTA_OK) { + CTA_WRITE_ERROR("Cannot create string"); + return ierr; + } + + eps=(t2-t1)*1.0e-4+1.0e-16; + t1=t1+eps; + t2=t2+eps; + sprintf(str, "time BETWEEN %f AND %f",t1, t2 ); + ierr=CTA_String_Set(sselect,str); + if (ierr!=CTA_OK) { + CTA_WRITE_ERROR("Cannot set string"); + return ierr; + } + + // Create the selcection + ierr=CTA_ObsDescr_CreateSel(hobsdescr, sselect, reltab, hobsdescrout); + if (ierr!=CTA_OK) { + CTA_WRITE_ERROR("Cannot create selection"); + return ierr; + } + + // Free work variables + ierr=CTA_String_Free(&sselect); + if (ierr!=CTA_OK) { + CTA_WRITE_ERROR("Cannot free string"); + return ierr; + } + } + return CTA_OK; +}; + + + + +#undef METHOD +#define METHOD "Get_PropertyKeys" +int CTA_ObsDescr_Get_PropertyKeys( + // INPUTS: + CTA_ObsDescr hobsdescr, /* Handle of the observation description of + which a property is to be returned */ + // OUTPUTS: + CTA_Vector Keys) /* Name of the key */ +{ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_ObsDescr* obsdescr; /* All data of observer hobsdescr */ + CTA_Function *memfun; /* Function that must be called */ + + /* trivial return we accept CTA_NULL -> empty vector */ + if (hobsdescr==CTA_NULL){ + Keys = CTA_NULL; + return CTA_OK; + } + + /* Look up the member function and the data of the observation descriptor*/ + retval = CTAI_ObsDescr_member_function(hobsdescr, I_CTA_OBSDESCR_GET_KEYS, + &obsdescr, &memfun); + if (retval) { + CTA_WRITE_ERROR("Error in function CTA_OBSDESCR_GET_KEYS"); + return retval; + } + + /* Call (user) implementation */ + memfun(obsdescr->data,&Keys,&retval); + + return retval; +} + +#undef METHOD +#define METHOD "Property_Count" +int CTA_ObsDescr_Property_Count(CTA_ObsDescr hobsdscr, int *nkeys){ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_ObsDescr* obsdescr; /* All data of observer hobsdescr */ + CTA_Function *memfun; /* Function that must be called */ + char msg[256]; + + /* trivial return: CTA_NULL means empty description: no keys */ + if (hobsdscr==CTA_NULL){ + *nkeys=0; + return CTA_OK; + } + + /* Look up the member function and the data of the observation descriptor*/ + retval = CTAI_ObsDescr_member_function(hobsdscr, I_CTA_OBSDESCR_COUNT_PROPERTIES, + &obsdescr, &memfun); + if (retval) { + sprintf(msg,"Cannot find the member function I_CTA_OBSDESCR_COUNT_PROPERTIES retval=%d",retval); + CTA_WRITE_ERROR(msg); + return retval; + } + + /* Call (user) implementation */ + memfun(obsdescr->data,nkeys,&retval); + + return retval; +} + +#undef METHOD +#define METHOD "Observation_Count" +int CTA_ObsDescr_Observation_Count(CTA_ObsDescr hobsdscr, int *nobs){ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_ObsDescr* obsdescr; /* All data of observer hobsdescr */ + CTA_Function *memfun; /* Function that must be called */ + char msg[256]; + + /* trivial return we accept CTA_NULL -> nobs=0 */ + if (hobsdscr==CTA_NULL){ + *nobs=0; + return CTA_OK; + } + + /* Look up the member function and the data of the observation descriptor*/ + retval = CTAI_ObsDescr_member_function(hobsdscr, I_CTA_OBSDESCR_COUNT_OBSERVATIONS, + &obsdescr, &memfun); + if (retval) { + sprintf(msg,"Cannot find the member function I_CTA_OBSDESCR_COUNT_OBSERVATIONS retval=%d",retval); + CTA_WRITE_ERROR(msg); + return retval; + } + + /* Call (user) implementation */ + memfun(obsdescr->data,nobs,&retval); + + return retval; +} + +#undef METHOD +#define METHOD "Get_ValueProperties" +int CTA_ObsDescr_Get_ValueProperties( + CTA_ObsDescr hobsdscr, + const char* Key, + CTA_Vector Properties, + CTA_Datatype datatype) +{ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_ObsDescr* obsdescr; /* All data of observer hobsdescr */ + CTA_Function *memfun; /* Function that must be called */ + char msg[256]; + + + /* Look up the member function and the data of the observation descriptor*/ + retval = CTAI_ObsDescr_member_function(hobsdscr, I_CTA_OBSDESCR_GET_PROPERTIES, + &obsdescr, &memfun); + if (retval) { + sprintf(msg,"Cannot find the member function I_CTA_OBSDESCR_GET_PROPERTIES retval=%d",retval); + CTA_WRITE_ERROR(msg); + return retval; + } + + + /* Call (user) implementation */ + memfun(obsdescr->data,Key,&Properties,&datatype,&retval); + + return retval; +} + + +#undef METHOD +#define METHOD "Export" +int CTA_ObsDescr_Export(CTA_ObsDescr hdescr, CTA_Handle usrdat){ + int retval; /* Return value of COSTA call */ + CTAI_ObsDescr* descr; /* All data of observer hobsdescr */ + CTA_Function *memfun; /* Function that must be called */ + char msg[256]; + + /* Look up the member function and the data of the observation descriptor*/ + retval = CTAI_ObsDescr_member_function(hdescr, I_CTA_OBSDESCR_EXPORT, + &descr, &memfun); + if (retval) { + sprintf(msg,"Cannot find the member function I_CTA_OBSDESCR_EXPORT retval=%d",retval); + CTA_WRITE_ERROR(msg); + return retval; + } + + /* Call (user) implementation */ + memfun(descr->data,&usrdat,&retval); + return retval; +} + + + + +#undef METHOD +#define METHOD "Free" +int CTA_ObsDescr_Free(CTA_ObsDescr *hdescr){ + int retval; /* Return value of COSTA call */ + CTAI_ObsDescr* descr; /* All data of observer hobsdescr */ + CTA_Function *memfun; /* Function that must be called */ + char msg[256]; + + /* Check for quick return */ + if (*hdescr==CTA_NULL) return CTA_OK; + + /* Look up the member function and the data of the observation descriptor*/ + retval = CTAI_ObsDescr_member_function(*hdescr, I_CTA_OBSDESCR_FREE, + &descr, &memfun); + if (retval) { + sprintf(msg,"Cannot find the member function I_CTA_OBSDESCR_FREE retval=%d",retval); + CTA_WRITE_ERROR(msg); + return retval; + } + + /* Call (user) implementation */ + memfun(descr->data,&retval); + + free(descr->data); + free(descr); + retval = CTA_Handle_Free(hdescr); + return retval; +} + +/* Interfacing with Fortran */ + +CTAEXPORT void CTA_OBSDESCR_DEFINECLASS_F77( char *name, int *h_func, int *hobsdscrcl,int *ierr, int len_name){ +//CTAEXPORT void cta_obsdescr_defineclass_( char *name, int *h_func, int *hobsdscrcl,int *ierr, int len_name){ + + char *c_name; + // create a c-string equivalent to name + c_name=CTA_Malloc((len_name+1)*sizeof(char)); + CTA_fstr2cstr(name,c_name,len_name); + + *ierr=CTA_ObsDescr_DefineClass(name, (CTA_Func*) h_func, (CTA_ObsDescrClass*) hobsdscrcl); + + free(c_name); +} + +CTAEXPORT void CTA_OBSDESCR_CREATE_F77(int *hsobscl, int *usrdat, + int *hobsdescr, int *ierr){ + + *ierr=CTA_ObsDescr_Create((CTA_ObsDescrClass) *hsobscl, (CTA_Handle) *usrdat, + (CTA_ObsDescr*) hobsdescr); +} + +//void CTA_OBSDESCR_CHECK_SOBS_F77(int *hobsdescr, int *hsobs ,int *ierr){ +// *ierr=CTA_ObsDescr_Check_SObs((CTA_ObsDescr) *hobsdescr, (CTA_StochObs) *hsobs); +//} + +CTAEXPORT void CTA_OBSDESCR_GET_VALUEPROPERTIES_F77( + int *hobsdscr, char *key, int *Property, int *datatype, + int *ierr, int len_key) +{ + char *c_Key; + c_Key=CTA_Malloc((len_key+1)*sizeof(char)); + CTA_fstr2cstr(key,c_Key,len_key); + *ierr=CTA_ObsDescr_Get_ValueProperties( + (CTA_ObsDescr) *hobsdscr, + c_Key, + (CTA_Vector) *Property, + (CTA_Datatype) *datatype); + free(c_Key); +} + +CTAEXPORT void CTA_OBSDESCR_GET_PROPERTY_KEYS_F77(int *hobsdscr, int *Keys, int *ierr){ + *ierr=CTA_ObsDescr_Get_PropertyKeys( (CTA_ObsDescr) *hobsdscr, (CTA_Vector) *Keys); +} + +CTAEXPORT void CTA_OBSDESCR_PROPERTY_COUNT_F77(int *hobsdscr, int *nkeys, int *ierr){ + *ierr=CTA_ObsDescr_Property_Count( (CTA_ObsDescr) *hobsdscr, nkeys); +} + +CTAEXPORT void CTA_OBSDESCR_OBSERVATION_COUNT_F77(int *hobsdscr, int *nobs, int *ierr){ + *ierr=CTA_ObsDescr_Observation_Count((CTA_ObsDescr) *hobsdscr, nobs); +} + +CTAEXPORT void CTA_OBSDESCR_EXPORT_F77(int *hdescr, int *usrdat, int *ierr){ + *ierr=CTA_ObsDescr_Export((CTA_ObsDescr) *hdescr, (CTA_Handle) *usrdat); +} + +CTAEXPORT void CTA_OBSDESCR_FREE_F77(int *hobsdscr, int *ierr){ + *ierr=CTA_ObsDescr_Free((CTA_ObsDescr *) hobsdscr); +} + + + diff --git a/costa/native/cta/src/cta_obsdescr_combine.c b/costa/native/cta/src/cta_obsdescr_combine.c new file mode 100644 index 000000000..b0dbbc302 --- /dev/null +++ b/costa/native/cta/src/cta_obsdescr_combine.c @@ -0,0 +1,392 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/trunk/costa/src/cta/cta_obsdescr_combine.c $ +$Revision: 671 $, $Date: 2008-10-07 14:49:42 +0200 (Tue, 07 Oct 2008) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include +#include +#include +#include +#include "cta_mem.h" +#include "cta_file.h" +#include "cta_errors.h" +#include "cta_string.h" +//#include "cta_obsdescr_combine.h" +#include "cta_defaults.h" + +#include "cta_sobs_combine.h" + +#define IDEBUG (0) + +typedef struct { +CTA_Handle myhandle; +char* condition; +int n_keys; +int nmeasr; +CTA_String *Keys; +int nofsubdescr; // number of descriptions +CTA_Vector subdescr; // list of handles of descriptions +} CTAI_ObsDescr_combine; + + +void CTAI_ObsDescr_combine_Create_Size( + // OUTPUTS: + int *memsize, // The number of bytes which are necessary to + // store one CTAI_SObs_combine, with a + // pointer to the contents (data), + // but without the contents themselves. + int *retval // error code (see cta_datatypes.h for possible + // error codes) + ){ +// *memsize=(int) sizeof(CTAI_SObs_combine); + *memsize=(int) sizeof(CTAI_ObsDescr_combine); + *retval=CTA_OK; +}; + +void CTAI_ObsDescr_combine_Create_Init( +/* + Allocate the memory which is necessary to store the data necessary for a + combine-observer +*/ + // INPUT: + CTA_ObsDescr *myhandle, /* Handle assigned by COSTA */ + // IN-OUTPUTS + CTAI_ObsDescr_combine *descr,// The combine-observation description + // for which the memory must be + // allocated + // INPUTS: + CTA_Handle *usrdat, // User data: de data from the stochobs. Here it is the CTAI_Sobs_combine + // OUTPUTS + int *retval) // Error code. +{ + CTAI_SObs_combine *x; + int nofsubdescr, idescr, ierr; + CTA_Handle subsob; + CTA_Vector hvec1; + CTA_ObsDescr hsubdescr; // description of sub-stochobs subsob + + // get the CTAI_Sobs_Combine + *retval= CTA_Handle_GetData(*usrdat, (void **) &x); + if (*retval!=CTA_OK) return; + + /* Copy the information about the number of subsobs */ + /* Note: the number of subdescr is, by definition, equal to the number of subsobs. */ + descr->nofsubdescr = x->nofsubsobs ; + + nofsubdescr = descr->nofsubdescr; + /* create a vector for the handles of ghe observation descriptions */ + ierr=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nofsubdescr, CTA_HANDLE, CTA_NULL, &hvec1); + + for (idescr=1; idescr<=nofsubdescr; idescr++) { + //get the sub-stochastic observer + ierr=CTA_Vector_GetVal(x->subsobs,idescr,&subsob,CTA_HANDLE); + /* check if handle is a stochastic observation? (superfluous check) */ + ierr=CTA_Handle_Check((CTA_Handle) subsob,CTA_SOBS); + if (IDEBUG > 0) + printf("sobs_obsdescr_combine_create_init: TEST if subsobs %d is real (sub-)sobs : retval %d \n",idescr,ierr); + /* Each subsobs already has its own description created. */ + CTA_SObs_GetDescription(subsob,&hsubdescr); + + /* put handle in cta-vector of handles */ + CTA_Vector_SetVal(hvec1,idescr,&hsubdescr,CTA_HANDLE); + + } + + + /* Finally, set the vector with sub descriptions */ + descr->subdescr = hvec1; + +}; + +void CTAI_ObsDescr_combine_CreateSel(CTAI_ObsDescr_combine *descr, + CTA_String *selection, CTA_RelTable *reltab, + CTA_ObsDescr *myhandle_out, + CTAI_ObsDescr_combine *descrout, int *retval){ + + int nofsubdescr,idescr; + CTA_Handle subdescr_in ; + int ierr; + CTA_Vector hvec1; + CTA_ObsDescr subdescr_out; + + nofsubdescr = descr->nofsubdescr; + + /* Warning: only the memory of descrout has been created already, + the nofsubdescr and vector have to be defined now. */ + descrout->nofsubdescr = nofsubdescr; + ierr=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nofsubdescr, CTA_HANDLE, CTA_NULL, &hvec1); + + for (idescr=1; idescr<=nofsubdescr; idescr++) { + ierr=CTA_Vector_GetVal(descr->subdescr,idescr,&subdescr_in,CTA_HANDLE); + /* call the createsel function for each individual sub-sobdescr */ + if (ierr == CTA_OK) ierr = CTA_ObsDescr_CreateSel(subdescr_in, *selection, + *reltab, &subdescr_out); + + /* put the selected subdescr in the vector descrout->subdescr */ + if (ierr == CTA_OK) ierr=CTA_Vector_SetVal(hvec1,idescr,&subdescr_out,CTA_HANDLE); + if (ierr != CTA_OK) { + printf("ctai_obsdescr_createsel: ERROR %d \n",ierr);} + + } + /* set the cta_vector of the descrout */ + descrout->subdescr = hvec1; + + *retval = ierr ; + /* TODO: think about reltab and myhandle. */ + + +} + + + +void CTAI_ObsDescr_combine_Get_Keys( + CTAI_ObsDescr_combine *descr, + CTA_Vector *Keys, + int *retval) +{ + + int nofsubdescr,idescr; + CTA_Handle subdescr; + int nsubkeys,ierr, offset, j; + CTA_Vector hsubvec; + CTA_String hstr1; + + nofsubdescr = descr->nofsubdescr; + offset=0; + for (idescr=1; idescr<=nofsubdescr; idescr++) { + ierr=CTA_Vector_GetVal(descr->subdescr,idescr,&subdescr,CTA_HANDLE); + /* count the number of keys in sub-obsdescr */ + ierr=CTA_ObsDescr_Property_Count(subdescr, &nsubkeys); + + /* create a vector holding the keys of the sub-obsdescr */ + ierr=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nsubkeys, CTA_STRING, CTA_NULL, &hsubvec); + /* call the get_propertykeys function for each individual sub-sobdescr */ + ierr = CTA_ObsDescr_Get_PropertyKeys(subdescr, hsubvec); + + /* add the new keys to the existing keys */ + ierr = CTA_String_Create(&hstr1); + for (j=1; j<= nsubkeys; j++){ + CTA_Vector_GetVal(hsubvec,j, &hstr1,CTA_STRING); + CTA_Vector_SetVal(*Keys,offset+j, &hstr1,CTA_STRING); + } + offset = offset + nsubkeys; + ierr = CTA_Vector_Free(&hsubvec); + } + *retval = ierr; + + // TODO: hier moeten wel de dubbele uit! +}; + +void CTAI_ObsDescr_combine_Property_Count( + CTAI_ObsDescr_combine *descr, + int *nkeys, + int *retval) +{ + int nofsubdescr,idescr; + CTA_Handle subdescr; + int nsubkeys,ierr; + + *nkeys = 0; + nofsubdescr = descr->nofsubdescr; + for (idescr=1; idescr<=nofsubdescr; idescr++) { + ierr=CTA_Vector_GetVal(descr->subdescr,idescr,&subdescr,CTA_HANDLE); + /* call the propertycount function for each individual sub-obsdescr */ + ierr = CTA_ObsDescr_Property_Count(subdescr, &nsubkeys); + *nkeys = *nkeys + nsubkeys; + } + *retval = ierr; +} +; + +void CTAI_ObsDescr_combine_Observation_Count( + CTAI_ObsDescr_combine *descr, + int *nobs, + int *retval) +{ + + int nofsubdescr,idescr; + CTA_Handle subdescr; + int nsubobs,ierr; + + *nobs = 0; + nofsubdescr = descr->nofsubdescr; + for (idescr=1; idescr<=nofsubdescr; idescr++) { + ierr=CTA_Vector_GetVal(descr->subdescr,idescr,&subdescr,CTA_HANDLE); + /* check if handle is a observation description? */ + ierr=CTA_Handle_Check((CTA_Handle) subdescr,CTA_OBSDESCR); + + if (IDEBUG > 0) {printf("ctai_obsdescr_combine_observation_count: "); + printf("TEST if subdescr %d is a real (sub-)descr : retval %d \n",idescr,ierr);} + + /* call the count function for each individual sub-sobdescr */ + ierr = CTA_ObsDescr_Observation_Count(subdescr, &nsubobs); + *nobs = *nobs + nsubobs; + } + *retval = ierr; +}; + + +void CTAI_ObsDescr_combine_Get_Properties( + CTAI_ObsDescr_combine *descr, + const char *Key, + CTA_Vector *Properties, + CTA_Datatype *datatype, + int *retval) +{ + int nofsubdescr,idescr,j; + CTA_Handle subdescr; + int ierr, nsubobs, ntotobs,offset; + CTA_Vector hsubvec; + CTA_String *strval, *substrval; + double *val, *subval; + + // CTA_String *strval, *substrval; + + /* first ask the total number of values of the vector to be filled*/ + ierr = CTA_Vector_GetSize(*Properties,&ntotobs); + +/* allocate the main array containing all the values */ + if(*datatype == CTA_STRING) {strval = CTA_Malloc(sizeof(CTA_String)*ntotobs); + if (IDEBUG > 0) printf("obsdescr_combine_get_properties: Warning: string as propertydatatype!\n");} + else { val = CTA_Malloc(sizeof(double)*ntotobs);} + + + offset = 0; + nofsubdescr = descr->nofsubdescr; + for (idescr=1; idescr<=nofsubdescr; idescr++) { + ierr=CTA_Vector_GetVal(descr->subdescr,idescr,&subdescr,CTA_HANDLE); + /* First count the number of observations in sub descr */ + ierr = CTA_ObsDescr_Observation_Count(subdescr, &nsubobs); + + if (nsubobs > 0) { + + /* create the subvector to receive the values of the subdescr */ + ierr=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nsubobs, *datatype, CTA_NULL, &hsubvec); + + /* call the get_valueproperties function for each individual sub-sobdescr */ + ierr = CTA_ObsDescr_Get_ValueProperties(subdescr, Key, hsubvec, *datatype); + + /* put the values in the sub-array. In case of String-output, create the strings */ + if (*datatype == CTA_STRING) { + substrval = CTA_Malloc(sizeof(CTA_String)*nsubobs); + for (j=0; jnofsubdescr; + ierr = CTA_OK; + for (idescr=1; idescr<=nofsubdescr; idescr++) { + if (ierr == CTA_OK) ierr=CTA_Vector_GetVal(descr->subdescr,idescr,&subdescr,CTA_HANDLE); + if (ierr == CTA_OK) ierr = CTA_ObsDescr_Free(&subdescr); + } + *retval = ierr; + +} + + +void CTA_ObsDescr_combine_initialise(CTA_ObsDescrClass *hobsdescrcl) +{ + CTA_Intf hintf=0; + CTA_Func h_func[I_CTA_OBSDESCR_NUMFUNC]; + + // The vector h_func is filled with COSTA-function handles of the + // implementations in this file. + CTA_Func_Create(" ",&CTAI_ObsDescr_combine_Create_Size, hintf, + &h_func[I_CTA_OBSDESCR_CREATE_SIZE]); + CTA_Func_Create(" ",&CTAI_ObsDescr_combine_Create_Init, hintf, + &h_func[I_CTA_OBSDESCR_CREATE_INIT]); + CTA_Func_Create(" ",&CTAI_ObsDescr_combine_Property_Count, hintf, + &h_func[I_CTA_OBSDESCR_COUNT_PROPERTIES]); + CTA_Func_Create(" ",&CTAI_ObsDescr_combine_Get_Properties, hintf, + &h_func[I_CTA_OBSDESCR_GET_PROPERTIES]); + CTA_Func_Create(" ",&CTAI_ObsDescr_combine_Observation_Count, hintf, + &h_func[I_CTA_OBSDESCR_COUNT_OBSERVATIONS]); + CTA_Func_Create(" ",&CTAI_ObsDescr_combine_Get_Keys, hintf, + &h_func[I_CTA_OBSDESCR_GET_KEYS]); + CTA_Func_Create(" ",&CTAI_ObsDescr_combine_Free, hintf, + &h_func[I_CTA_OBSDESCR_FREE]); + CTA_Func_Create(" ",&CTAI_ObsDescr_combine_CreateSel, hintf, + &h_func[I_CTA_OBSDESCR_SELECTION]); + CTA_ObsDescr_DefineClass("cta_obsdescr_combine",h_func,hobsdescrcl); + +} + + + + diff --git a/costa/native/cta/src/cta_obsdescr_maori.c b/costa/native/cta/src/cta_obsdescr_maori.c new file mode 100644 index 000000000..8cf96d6d0 --- /dev/null +++ b/costa/native/cta/src/cta_obsdescr_maori.c @@ -0,0 +1,59 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/trunk/costa/src/cta/cta_obsdescr_netcdf.c $ +$Revision: 671 $, $Date: 2008-10-07 14:49:42 +0200 (Tue, 07 Oct 2008) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include +#include +#include "cta.h" + +void CTA_ObsDescr_maori_initialize(CTA_ObsDescrClass *hobsdescrcl) +{ + CTA_Func h_func[I_CTA_OBSDESCR_NUMFUNC]; + char *libraryName; + char *functionName; + + // The vector h_func is filled with function read from the maori user dynamic library + + libraryName="libleoda.so"; + functionName="ctai_obsdescr_maori_create_size"; + h_func[I_CTA_OBSDESCR_CREATE_SIZE] = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName); + functionName="ctai_obsdescr_maori_create_init"; + h_func[I_CTA_OBSDESCR_CREATE_INIT] = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName); + functionName="ctai_obsdescr_maori_property_count"; + h_func[I_CTA_OBSDESCR_COUNT_PROPERTIES] = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName); + functionName="ctai_obsdescr_maori_get_properties"; + h_func[I_CTA_OBSDESCR_GET_PROPERTIES] = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName); + functionName="ctai_obsdescr_maori_observations_count"; + h_func[I_CTA_OBSDESCR_COUNT_OBSERVATIONS] = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName); + functionName="ctai_obsdescr_maori_get_keys"; + h_func[I_CTA_OBSDESCR_GET_KEYS] = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName); + functionName="ctai_obsdescr_maori_free"; + h_func[I_CTA_OBSDESCR_FREE] = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName); + functionName="ctai_obsdescr_maori_selection"; + h_func[I_CTA_OBSDESCR_SELECTION] = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName); + + CTA_ObsDescr_DefineClass("cta_obsdescr_maori",h_func,hobsdescrcl); + + +} + + + diff --git a/costa/native/cta/src/cta_obsdescr_netcdf.c b/costa/native/cta/src/cta_obsdescr_netcdf.c new file mode 100644 index 000000000..0b3d012a8 --- /dev/null +++ b/costa/native/cta/src/cta_obsdescr_netcdf.c @@ -0,0 +1,550 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/trunk/costa/src/cta/cta_obsdescr_netcdf.c $ +$Revision: 671 $, $Date: 2008-10-07 14:49:42 +0200 (Tue, 07 Oct 2008) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include +#include +#include +#include +#include "cta.h" +#include "cta_mem.h" +#include "cta_sobs_netcdf.h" +#include "cta_defaults.h" + +#define IDEBUG (0) + +#define MAX(a,b) (a>b ? a: b) +#define MIN(a,b) (a>b ? b: a) + + +/* Dit is een struct om wat op te slaan wat we nodig hebben */ +typedef struct { +#if HAVE_LIBNETCDF + CTA_Handle myhandle; + char* condition; + int n_keys; + int size_1Dproperties; + int nmeasr; + int nmeasr_orig; + CTA_String *Keys; + CTA_RelTable selectionReltab; + CTA_Time tspan; + float bb_lon[2]; + float bb_lat[2]; + CTAI_OMI_database *database; + CTAI_SObs_netcdf *sobs; +#else + int dum; +#endif +} CTAI_ObsDescr_netcdf; + + + + +void CTAI_ObsDescr_netcdf_Create_Size( + // OUTPUTS: + int *memsize, // The number of bytes which are necessary to + // store one CTAI_SObs_netcdf, with a + // pointer to the contents (data), + // but without the contents themelves. + int *retval // error code (see cta_datatypes.h for possible + // error codes) + ){ +#if HAVE_LIBNETCDF + +// *memsize=(int) sizeof(CTAI_SObs_netcdf); + *memsize=(int) sizeof(CTAI_ObsDescr_netcdf); + *retval=CTA_OK; +#else + printf("Error: CTAI_ObsDescr_netcdf_Create_Size: COSTA is compiled without NETCD suport\n"); +#endif +}; + +void CTAI_ObsDescr_netcdf_Create_Init( +/* + Allocate the memory which is necesary to store the data necessary for a + netcdf-observer +*/ + // INPUT: + CTA_ObsDescr *myhandle, /* Handle assigned by COSTA */ + // IN-OUTPUTS + CTAI_ObsDescr_netcdf *descr,// The netcdf-observation description + // for which the memory must be + // allocated + // INPUTS: + CTA_Handle *usrdat, // User data: database-name + // OUTPUTS + int *retval) // Error code. +{ +#if HAVE_LIBNETCDF + + CTAI_SObs_netcdf *sobs; // User data: database-name + int ierr,i,j ; + CTA_Time tspan0; + char buffer[20]; + + descr->myhandle=*myhandle; + + *retval= CTA_Handle_GetData(*usrdat, (void **) &sobs); + if (*retval!=CTA_OK) { + printf("error: ctai_obsdescr-netcdf_create_init : handle_getdata \n"); + return; + } + + ierr=CTA_Time_Create(&tspan0); + ierr=CTA_Time_SetSpan(tspan0, 0.0, 24.0); + descr->tspan = tspan0; + + // store the handle of the stochastic observer + descr->sobs = sobs; + + descr->database = sobs->database; + (descr->database->nusers)++; + + // Copy the conditionCTA_RelTable *hreltable ? + ierr = CTA_RelTable_Create(&(descr->selectionReltab)); + descr->selectionReltab = sobs->selectionReltab; + + // Store the keys ? + // Here: do it ad hoc since our OMI-netcdf database is very specific + descr->size_1Dproperties = 30; //here: for kernel and pressure_levels + descr->n_keys=2 + descr->size_1Dproperties + descr->size_1Dproperties+1 ; + descr->Keys = CTA_Malloc(sizeof(CTA_String)* (descr->n_keys)); + for (i=0; i< descr->n_keys; i++) + { + ierr = CTA_String_Create( &(descr->Keys[i]) ); + if (ierr != CTA_OK) {printf("error in cta_obsdecr_init %d \n",ierr);return ; } + } + /* Key: 0:long + 1: lat + 2-31 kernel + 32-62 pressure_levels + */ + + ierr = CTA_String_Set(descr->Keys[0],"longitude"); + ierr = CTA_String_Set(descr->Keys[1],"latitude"); + for (j=1; j<= descr->size_1Dproperties; j++){ + sprintf(buffer,"kernel_%d",j); + ierr = CTA_String_Set(descr->Keys[1+j],buffer); + } + for (j=1; j<= descr->size_1Dproperties + 1; j++){ + sprintf(buffer,"pressure_levels_%d",j); + ierr = CTA_String_Set(descr->Keys[31+j],buffer); + } + + if (ierr != CTA_OK) {printf("error cta_string_set%d\n",ierr ); return;} + + // Copy the dimension of the observer + descr->nmeasr = sobs->nmeasr; + descr->nmeasr_orig = sobs->nmeasr_orig; + *retval=CTA_OK; +#else + printf("Error: CTAI_ObsDescr_netcdf_Create_Init: COSTA is compiled without NETCD suport\n"); + *retval=CTA_NOT_IMPLEMENTED; +#endif + +}; + + +void CTAI_ObsDescr_netcdf_CreateSel(CTAI_ObsDescr_netcdf *descr, + CTA_String *selection, CTA_RelTable *reltab, + CTA_ObsDescr *myhandle_out, + CTAI_ObsDescr_netcdf *descrout, int *retval){ + +#if HAVE_LIBNETCDF + + int len, ierr,i ; + char *condition; + double t1_in,t2_in, t1,t2, t1_out, t2_out; + int nstations, count_out; + CTA_Vector hvec_time; + double *vec_time; + int *flag; + + // Get the condition + // Allocate a name-string + *retval = CTA_String_GetLength(*selection, &len); + if (*retval!=CTA_OK) return; + + // Get the condition + condition = CTA_Malloc((len+1)*sizeof(char)); + + *retval = CTA_String_Get(*selection, condition); + if (IDEBUG>0) printf("CTAI_ObsDescr_netcdf_CreateSel: given condition is '%s'\n",condition); + if (*retval!=CTA_OK) return; + + // Set handle of new observation description + descrout->myhandle=*myhandle_out; + + + // IF the callling sequence has been : cta_sobs_createtimsel -> cta_sobs_creatsel -> + // ctai_sobs_netcdf_createsel, THEN + // The condition is the string starting with 'time BETWEEN' + if (strstr(condition,"time BETWEEN") != NULL) { + ierr = sscanf(condition,"%*s %lf %lf",&t1,&t2); + + } else { + t1 = 0.0; t2 = 24.0; + } + printf("ctai_obsdescr_createsel: time selection: %f %f \n",t1,t2); + + // It may be possible that a spatial restriction is given. We will implement that later. TODO + + + // Combine the input-condition and the condition of the input observer. + // In our netcdf-case, the condition is a combination of spatial window and timespan. + ierr = CTA_Time_GetSpan(descr->tspan, &t1_in, &t2_in); + t1_out = MAX(t1_in,t1); + t2_out = MIN(t2_in,t2); + ierr = CTA_Time_SetSpan(descrout->tspan, t1_out, t2_out); + + + + // first get the time vector. + ierr = CTA_Vector_Create(CTA_DEFAULT_VECTOR, descr->nmeasr, CTA_DOUBLE, CTA_NULL, &hvec_time); + CTAI_SObs_netcdf_GetTimes(descr->sobs, &hvec_time, &ierr); + + printf("cta_obsdescr_netcdf: manipulation sobs %p %d \n",descr->sobs,ierr); + + vec_time = CTA_Malloc(descr->sobs->nmeasr * sizeof(double)); + ierr=CTA_Vector_GetVals(hvec_time,vec_time, descr->sobs->nmeasr, CTA_DOUBLE); + + // We use the relation table to investigate if the current selection should be restricted more. + count_out = 0; + flag=CTA_Malloc((descr->sobs->nmeasr)*sizeof(int)); + for (i=0; i < descr->sobs->nmeasr; i++) { + flag[i]= 0; + if (vec_time[i] >= t1_out && vec_time[i] <=t2_out) { + flag[i] = 1; + + count_out = count_out + 1; + } + } + nstations = count_out; + + descrout->nmeasr = nstations; + descrout->n_keys = descr->n_keys; + descrout->size_1Dproperties = descr->size_1Dproperties; + + if (*reltab!=CTA_NULL) { + *retval=CTAI_ObsDescr_CreateRelTable(descr->myhandle, + descrout->myhandle, *reltab); + } else { + *retval=CTA_OK; + } + + descrout->database = descr->database; + descrout->database->nusers++; + + free(vec_time); + free(flag); + *retval = CTA_OK; + +#else + printf("Error: CTAI_ObsDescr_netcdf_CreateSel: COSTA is compiled without NETCD support\n"); + *retval=CTA_NOT_IMPLEMENTED; +#endif + + +} + + + +void CTAI_ObsDescr_netcdf_Get_Keys( + CTAI_ObsDescr_netcdf *descr, + CTA_Vector *Keys, + int *retval) +{ +#if HAVE_LIBNETCDF + int i; + for (i=1; i<=descr->n_keys; i++) + { + *retval = CTA_Vector_SetVal(*Keys,i,&(descr->Keys[i-1]),CTA_STRING); + }; + + *retval = CTA_OK; + + +#else + printf("Error: CTAI_ObsDescr_netcdf_Get_Keys: COSTA is compiled without NETCD suport\n"); + *retval=CTA_NOT_IMPLEMENTED; +#endif + +}; + +void CTAI_ObsDescr_netcdf_Property_Count( + CTAI_ObsDescr_netcdf *descr, + int *nkeys, + int *retval) +{ +#if HAVE_LIBNETCDF + *nkeys = descr->n_keys; + *retval = CTA_OK; +#else + printf("Error: CTAI_ObsDescr_netcdf_Property_Count: COSTA is compiled without NETCD suport\n"); + *retval=CTA_NOT_IMPLEMENTED; +#endif +}; + +void CTAI_ObsDescr_netcdf_Observation_Count( + CTAI_ObsDescr_netcdf *descr, + int *nobs, + int *retval) +{ +#if HAVE_LIBNETCDF + *nobs = descr->nmeasr; + *retval = CTA_OK; +#else + printf("Error: CTAI_ObsDescr_netcdf_Observation_Count: COSTA is compiled without NETCD suport\n"); + *retval=CTA_NOT_IMPLEMENTED; +#endif +}; + + +void CTAI_ObsDescr_netcdf_Get_Properties( + CTAI_ObsDescr_netcdf *descr, + const char *Key, + CTA_Vector *Properties, + CTA_Datatype *datatype, + int *retval) +{ +#if HAVE_LIBNETCDF + + int ierr,ncid,varid; + int i, ival, dim2; + float *nc_values, *selected_nc_values, *nc_tot_values, *nc_values_lon,*nc_values_lat; + char tmpchar[20]; + CTA_String hstr; + + if (IDEBUG>0) {printf("ctai_obsdescr_netcdf_get_properties; START\n");} + if (IDEBUG>0) {printf("ctai_obsdescr_netcdf_get_properties: key: %s of: |%s| \n",Key,descr->database->dbname);} + + + // first allocate space for temporary arrays + dim2 = 1; + if (strstr(Key,"kernel")!=NULL){dim2 = descr->size_1Dproperties;} + if (strstr(Key,"pressure_levels")!=NULL){dim2 = descr->size_1Dproperties+1 ;} + nc_tot_values = CTA_Malloc(descr->nmeasr_orig * dim2 * sizeof(float)); + + + nc_values = CTA_Malloc(descr->nmeasr_orig * sizeof(float)); + nc_values_lon = CTA_Malloc(descr->nmeasr_orig * sizeof(float)); + nc_values_lat = CTA_Malloc(descr->nmeasr_orig * sizeof(float)); + selected_nc_values = CTA_Malloc(descr->nmeasr * sizeof(float)); + + + // Get the netcdf-id of the database + ncid = descr->database->ncid; + + + /* now obtain the desired secondary variable, given by the keyname */ + /* For our specific OMI-netcdf-file, we need: */ + + /* always: get the lon and lat */ + ierr = nc_inq_varid(ncid, "longitude", &varid); + ierr = nc_get_var_float(ncid, varid, &nc_values_lon[0]); + ierr = nc_inq_varid(ncid, "latitude", &varid); + ierr = nc_get_var_float(ncid, varid, &nc_values_lat[0]); + if (ierr != CTA_OK) + {printf("Error: could not read the property lat/lon \n"); + *retval = -1; return; + } + + if (strcmp(Key,"NAME")==0){ + /* this key should not be necessary, but it is naively asked by certain COSTA-routines + like CTA_Util_MethodsPrintObservations */ + for (i=0; i < descr->nmeasr_orig; i++){ + nc_values[i] = i*1.0; + } + +} else if (strcmp(Key,"longitude")==0){ + + if (IDEBUG>0) {printf("ctai_obsdescr_netcdf_get_properties: LONGITUDE \n");} + + // read the lon vector. We know the size: nmeasr_orig. + // ierr = nc_inq_varid(ncid, "longitude", &varid); + //ierr = nc_get_var_float(ncid, varid, &nc_values[0]); + for (i=0; i < descr->nmeasr_orig; i++){ + nc_values[i] = nc_values_lon[i]; } + + } else if (strcmp(Key,"latitude")==0) { + // ierr = nc_inq_varid(ncid, "latitude", &varid); + //ierr = nc_get_var_float(ncid, varid, &nc_values[0]); + for (i=0; i < descr->nmeasr_orig; i++){ + nc_values[i] = nc_values_lat[i]; } + + } else if (strstr(Key,"pressure_levels")!=NULL) { + ierr = nc_inq_varid(ncid, "pressure_levels", &varid); + sscanf(&Key[16] ,"%d",&ival); + if (IDEBUG>0) {printf("Getting key %s, number: %d \n",Key,ival);} + + ierr = nc_get_var_float(ncid, varid, &nc_tot_values[0]); + + if (ierr != CTA_OK) + {printf("Error: could not read the property %s %d \n",Key,ierr); + *retval = -1; return; + } + for (i=0; i < descr->nmeasr_orig; i++){ + nc_values[i] = nc_tot_values[ival-1 + (i* (descr->size_1Dproperties+1))]; + } + + + + } else if (strstr(Key,"kernel")!=NULL) { + + ierr = nc_inq_varid(ncid, "kernel", &varid); + sscanf(&Key[7] ,"%d",&ival); + if (IDEBUG>0) {printf("Getting key %s, number: %d \n",Key,ival);} + + ierr = nc_get_var_float(ncid, varid, &nc_tot_values[0]); + + if (ierr != CTA_OK) + {printf("Error: could not read the property %s %d \n",Key,ierr); + *retval = -1; return; + } + for (i=0; i < descr->nmeasr_orig; i++){ + nc_values[i] = nc_tot_values[ival-1 + (i* descr->size_1Dproperties)]; + } + + + }else { + printf("Key %s is NOT YET IMPLEMENTED!!! \n",Key); + *retval = CTA_NOT_IMPLEMENTED; + return; + + } + + // now fill the properties vector with the obtained values. The relation table is used to get only the selection. + ierr = CTA_RelTable_ApplyVal(descr->selectionReltab, nc_values,descr->nmeasr_orig, + selected_nc_values,descr->nmeasr,CTA_REAL ); + + if (IDEBUG>0) {printf("ctai_obsdescr_netcdf_get_properties:ierr %d \n",ierr);} + if (IDEBUG>0) {printf("selected values %f %f %d %d \n",selected_nc_values[0], selected_nc_values[1],descr->nmeasr, *datatype);} + + + + if (*datatype == CTA_STRING){ + if (IDEBUG>0) {printf("cta-string-expected;workaround \n");} + ierr = CTA_String_Create(&hstr); + for (i=0; inmeasr; i++){ + sprintf(tmpchar,"%f",selected_nc_values[i]); + ierr = CTA_String_Set(hstr,tmpchar); + ierr=CTA_Vector_SetVal(*Properties,i+1,&hstr, CTA_STRING); + } + ierr = CTA_String_Free(&hstr); + } else { + + ierr=CTA_Vector_SetVals(*Properties,selected_nc_values, descr->nmeasr, CTA_REAL); + } + + if (IDEBUG>0) {printf("ctai_obsdescr_netcdf_get_properties:setvals %d \n",ierr);} + + if (ierr != CTA_OK) + {printf("Error: could not fill the property vector %s \n",Key); + *retval = -1; return; + } + + free(nc_values); + free(nc_values_lon); + free(nc_values_lat); + free(selected_nc_values); + free(nc_tot_values); + + if (*retval!=CTA_OK) return; + + *retval = CTA_OK; +#else + printf("CTAI_ObsDescr_netcdf_Get_Properties :Version is compiled without NETCDF support.\n"); + *retval=CTA_NOT_IMPLEMENTED; +#endif + +}; + + + + + +void CTAI_ObsDescr_netcdf_Free( + CTAI_ObsDescr_netcdf *descr, + int *retval) +{ +#if HAVE_LIBNETCDF + int i, ierr; + + + descr->database->nusers--; + + // printf("cta_obsdescr_free (netcdf): now %d users remaining of database %s \n",descr->database->nusers, descr->database->dbname); + + if (descr->database->nusers==0) + { + + if ((ierr = nc_close(descr->database->ncid))){ + printf("CTA_Sobs_netcdf_Free: cannot close netCDF-file: %d\n", + nc_strerror(ierr)); + } + free(descr->database->dbname); + free(descr->database); + + } + + + + for (i=0; in_keys; i++){ CTA_String_Free(&(descr->Keys[i]));} + free(descr->Keys); + + *retval = CTA_OK; +#else + printf("CTAI_ObsDescr_netcdf_Get_Properties :Version is compiled without NETCDF support.\n"); + *retval=CTA_NOT_IMPLEMENTED; +#endif +} + + +void CTA_ObsDescr_netcdf_initialise(CTA_ObsDescrClass *hobsdescrcl) +{ + CTA_Intf hintf=0; + CTA_Func h_func[I_CTA_OBSDESCR_NUMFUNC]; + + // The vector h_func is filled with COSTA-function handles of the + // implementations in this file. + CTA_Func_Create(" ",&CTAI_ObsDescr_netcdf_Create_Size, hintf, + &h_func[I_CTA_OBSDESCR_CREATE_SIZE]); + CTA_Func_Create(" ",&CTAI_ObsDescr_netcdf_Create_Init, hintf, + &h_func[I_CTA_OBSDESCR_CREATE_INIT]); + CTA_Func_Create(" ",&CTAI_ObsDescr_netcdf_Property_Count, hintf, + &h_func[I_CTA_OBSDESCR_COUNT_PROPERTIES]); + CTA_Func_Create(" ",&CTAI_ObsDescr_netcdf_Get_Properties, hintf, + &h_func[I_CTA_OBSDESCR_GET_PROPERTIES]); + CTA_Func_Create(" ",&CTAI_ObsDescr_netcdf_Observation_Count, hintf, + &h_func[I_CTA_OBSDESCR_COUNT_OBSERVATIONS]); + CTA_Func_Create(" ",&CTAI_ObsDescr_netcdf_Get_Keys, hintf, + &h_func[I_CTA_OBSDESCR_GET_KEYS]); + CTA_Func_Create(" ",&CTAI_ObsDescr_netcdf_Free, hintf, + &h_func[I_CTA_OBSDESCR_FREE]); + CTA_Func_Create(" ",&CTAI_ObsDescr_netcdf_CreateSel, hintf, + &h_func[I_CTA_OBSDESCR_SELECTION]); + CTA_ObsDescr_DefineClass("cta_obsdescr_netcdf",h_func,hobsdescrcl); + +} + + + diff --git a/costa/native/cta/src/cta_obsdescr_sqlite3.c b/costa/native/cta/src/cta_obsdescr_sqlite3.c new file mode 100644 index 000000000..e7c90aab6 --- /dev/null +++ b/costa/native/cta/src/cta_obsdescr_sqlite3.c @@ -0,0 +1,564 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_obsdescr_sqlite3.c $ +$Revision: 3361 $, $Date: 2012-07-04 16:52:30 +0200 (Wed, 04 Jul 2012) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include +#include +#include +#include +#include "cta_mem.h" +#include "cta_file.h" +#include "cta_errors.h" +#include "cta_string.h" +#include "cta_util_sqlite3.h" +#include "cta_obsdescr_sqlite3.h" +#include "cta_defaults.h" + +#define IDEBUG (0) +/* + Fills the relation table for the elements in hobsdescr2 that are also in + hobsdescr1. + + Note hobsdescr2 MUST be a subset of hobsdescr1! + +*/ +int CTAI_ObsDescr_CreateRelTable(CTA_ObsDescr hobsdescr1, CTA_ObsDescr hobsdescr2, + CTA_RelTable reltab){ + int nobs1, nobs2; + CTA_Vector vtime1, vtime2, vid1, vid2, vselect; + int ilo, iup, imid, i2; + double t1, t2; + int id1, id2; + int retval; + BOOL found; + + /* Count number of observations */ + CTA_ObsDescr_Observation_Count(hobsdescr1,&nobs1); + CTA_ObsDescr_Observation_Count(hobsdescr2,&nobs2); + + /* Check whether nobs1>=nobs2 since hobsdecr2 must be a subset of + * hobsdecr1 + */ + if (nobs1looking for %f, %d\n",t2,id2); + + /* Use bi-section in order to find the elements */ + ilo=1; + iup=nobs1; + if (IDEBUG) printf("ilo=%d, iup=%d \n",ilo,iup); + + found=FALSE; + for (;;){ + imid=(iup+ilo)/2; + CTA_Vector_GetVal(vtime1, imid, &t1, CTA_DOUBLE); + CTA_Vector_GetVal(vid1, imid, &id1, CTA_INTEGER); + if (IDEBUG) printf("imid= %d found %f, %d\n",imid, t1,id1); + if (t1==t2 && id1==id2){ + found=TRUE; + break; + } + + /* exit loop when element is not there :*/ + if(ilo==iup){break;} /*Ony one element there */ + + /* check whether element (t1,id1)<(t2,id2) and + * set new values for ilo and iup + */ + if (t1myhandle=*myhandle; + + *retval= CTA_Handle_GetData(*usrdat, (void **) &sobs); + if (*retval!=CTA_OK) return; + + // Link the data base to this descriptor; increase number of users + descr->database = sobs->database; + (descr->database->nusers)++; + + // Copy the conditionCTA_RelTable *hreltable + descr->condition = CTA_Malloc(sizeof(char)*(1+strlen(sobs->condition))); + strcpy(descr->condition,sobs->condition); + + // Store the keys + *retval = CTAI_util_sqlite3_return_keys( + &(descr->n_keys), + &(descr->Keys), + descr->database->db, + descr->condition); + + if (*retval != CTA_OK) return; + + // Copy the dimension of the observer + descr->nmeasr = sobs->nmeasr; + *retval=CTA_OK; +}; + +void CTAI_ObsDescr_sqlite3_CreateSel(CTAI_ObsDescr_sqlite3 *descr, + CTA_String *selection, CTA_RelTable *reltab, + CTA_ObsDescr *myhandle_out, + CTAI_ObsDescr_sqlite3 *descrout, int *retval){ + + + int i,len; + char *condition; + + // Get the condition + // Allocate a name-string + *retval = CTA_String_GetLength(*selection, &len); + if (*retval!=CTA_OK) return; + + // Get the condition + condition = CTA_Malloc((len+1)*sizeof(char)); + *retval = CTA_String_Get(*selection, condition); + if (IDEBUG>0) printf("CTAI_ObsDescr_sqlite3_CreateSel: given condition is '%s'\n",condition); + if (IDEBUG>0) printf("CTAI_ObsDescr_sqlite3_CreateSel: Internal condition is '%s'\n",descr->condition); + if (*retval!=CTA_OK) return; + + // Set handle of new observation description + descrout->myhandle=*myhandle_out; + + // Link the database also to this observer and keep track of the number of + // observers using this database + descrout->database = descr->database; + descrout->database->nusers++; + + // Combine the input-condition and the condition of the input observer. + if (strcmp(descr->condition,"")==0) + { + descrout->condition = condition; + } + else + { + descrout->condition = CTA_Malloc(sizeof(char)* + ( strlen(condition) + + strlen(descr->condition) + + strlen(" ( ) AND ( ) ") + ) ); + sprintf(descrout->condition,"(%s) AND (%s)",condition, + descr->condition); + free(condition); + } + + // copy number of keys + descrout->n_keys=descr->n_keys; + + // Count the measurements in this observer + *retval = CTAI_util_sqlite3_select_values( + &(descrout->nmeasr), 1, CTA_INTEGER, descrout->database->db, + "count(stations.station_id)",descrout->condition); + + // copy the Keys + descrout->Keys=CTA_Malloc(descr->n_keys*sizeof(CTA_String)); + for (i=0;in_keys;i++){ + *retval=CTA_String_Duplicate(descr->Keys[i],&descrout->Keys[i]); + if (*retval!=CTA_OK) return; + } + + if (*reltab!=CTA_NULL) { + *retval=CTAI_ObsDescr_CreateRelTable(descr->myhandle, + descrout->myhandle, *reltab); + } else { + *retval=CTA_OK; + } +} + + + +void CTAI_ObsDescr_sqlite3_Get_Keys( + CTAI_ObsDescr_sqlite3 *descr, + CTA_Vector *Keys, + int *retval) +{ + int i; + for (i=1; i<=descr->n_keys; i++) + { + *retval = CTA_Vector_SetVal(*Keys,i,&(descr->Keys[i-1]),CTA_STRING); + }; + + *retval = CTA_OK; +}; + +void CTAI_ObsDescr_sqlite3_Property_Count( + CTAI_ObsDescr_sqlite3 *descr, + int *nkeys, + int *retval) +{ + *nkeys = descr->n_keys; + *retval = CTA_OK; +}; + +void CTAI_ObsDescr_sqlite3_Observation_Count( + CTAI_ObsDescr_sqlite3 *descr, + int *nobs, + int *retval) +{ + *nobs = descr->nmeasr; + *retval = CTA_OK; +}; + + +BOOL isNaN(double x) +{ + return (!(x>0 || x<=0)); +} + +void CTAI_ObsDescr_sqlite3_Get_Properties( + CTAI_ObsDescr_sqlite3 *descr, + const char *Key, + CTA_Vector *Properties, + CTA_Datatype *datatype, + int *retval) +{ + // Find the key among the key names + int i; + int len; + BOOL found=0; + int isize; + void * StatProp; + void * TimeProp; + char *KeyLong; + int retStat; + int retTime; + void * PropArray; + + + for (i=0; !found && in_keys; i++) + { + char *str; + *retval = CTA_String_GetLength(descr->Keys[i],&len); + if (*retval!=CTA_OK) return; + + str=CTA_Malloc((len+1)*sizeof(char)); + *retval = CTA_String_Get(descr->Keys[i],str); + if (*retval!=CTA_OK){ + free(str); + return; + } + found = strcmp(Key,str)==0; + free(str); + } + i--; + + // Error if the key was not found + if (!found) {*retval= CTA_ITEM_NOT_FOUND; return;} + + // Declare an array for the properties + if (*datatype == CTA_INTEGER) {isize = sizeof(int);} + else if(*datatype == CTA_REAL) {isize = sizeof(float);} + else if(*datatype == CTA_DOUBLE) {isize = sizeof(double);} + else if(*datatype == CTA_STRING) {isize = sizeof(CTA_String);} + else {*retval = CTA_ILLEGAL_DATATYPE; return;} + + StatProp = CTA_Malloc(isize*descr->nmeasr); + TimeProp = CTA_Malloc(isize*descr->nmeasr); + + // In case of String-output, create the strings + if (*datatype == CTA_STRING) + { + CTA_String * PropStr=StatProp; + for (i=0; inmeasr; i++) + { + *retval = CTA_String_Create(&PropStr[i]); + if (*retval != CTA_OK) return; + } + + PropStr=TimeProp; + for (i=0; inmeasr; i++) + { + *retval = CTA_String_Create(&PropStr[i]); + if (*retval != CTA_OK) return; + } + } + + + // Get the properties from the stationary information + KeyLong=CTA_Malloc((strlen(Key)+strlen("stations.")+1)); + sprintf(KeyLong,"stations.%s",Key); + retStat = CTAI_util_sqlite3_select_values( + StatProp, descr->nmeasr, *datatype, + descr->database->db, KeyLong, descr->condition); + + // Get the properties from the time dependent information + sprintf(KeyLong,"data.%s",Key); + retTime = CTAI_util_sqlite3_select_values( + TimeProp, descr->nmeasr, *datatype, + descr->database->db, KeyLong, descr->condition); + + free(KeyLong); + if (retStat != CTA_OK && retTime != CTA_OK) {*retval = retStat; return;} + + // Merge the two answers + + if (retStat == CTA_OK && retTime == CTA_OK) + { + // Column found in stationary and in time-dependent information + + // Supplement the Stationary data array with instationary data, where + // these are available. Check that the stationary and instationary data do + // not conflict. + if (*datatype == CTA_DOUBLE) + { + double * dStatProp = StatProp; + double * dTimeProp = TimeProp; + for (i=0; inmeasr; i++) + { + if (isNaN(dStatProp[i]) && !isNaN(dTimeProp[i])) + { dStatProp[i] = dTimeProp[i]; } + else if ( !isNaN(dStatProp[i]) && !isNaN(dTimeProp[i]) && + dStatProp[i] != dTimeProp[i] ) + { *retval = CTA_INCOMPATIBLE_VECTORS; return;} + } + + } + else if (*datatype == CTA_INTEGER) + { + int * iStatProp = StatProp; + int * iTimeProp = TimeProp; + int iNAN =-987654; + for (i=0; inmeasr; i++) + { + if (iStatProp[i] == iNAN && iTimeProp[i] != iNAN) + { iStatProp[i] = iTimeProp[i]; } + else if (iStatProp[i] != iNAN && iTimeProp[i] != iNAN && + iStatProp[i] != iTimeProp[i] ) + { *retval = CTA_INCOMPATIBLE_VECTORS; return;} + } + } + else if (*datatype == CTA_STRING) + { + CTA_String * sStatProp = StatProp; + CTA_String * sTimeProp = TimeProp; + for (i=0; inmeasr; i++) + { + int statlen, timelen; + *retval = CTA_String_GetLength(sStatProp[i],&statlen); + if (*retval != CTA_OK) return; + *retval = CTA_String_GetLength(sTimeProp[i],&timelen); + if (*retval != CTA_OK) return; + + if (statlen == 0 && timelen != 0) + { + CTA_String help = sStatProp[i]; + sStatProp[i] = sTimeProp[i]; + sTimeProp[i] = help; + } + else if (statlen != 0 && timelen != 0 ) + { + char *statstr; + char *timestr; + if (statlen != timelen) + { *retval = CTA_INCOMPATIBLE_VECTORS; return;} + + statstr=CTA_Malloc((statlen+1)*sizeof(char)); + timestr=CTA_Malloc((timelen+1)*sizeof(char)); + *retval = CTA_String_Get(sStatProp[i],statstr); + if (*retval != CTA_OK) return; + *retval = CTA_String_Get(sTimeProp[i],timestr); + if (*retval != CTA_OK) return; + if (strcmp(statstr,timestr) != 0) + { *retval = CTA_INCOMPATIBLE_VECTORS; return;} + free(statstr); + free(timestr); + } + } + } + else + { + *retval = CTA_ILLEGAL_DATATYPE; return; + } + + PropArray = StatProp; + } + else if (retStat == CTA_OK) + { + PropArray = StatProp; + } + else + { + PropArray = TimeProp; + } + *retval = CTA_Vector_SetVals(*Properties, PropArray, descr->nmeasr, *datatype); + if (*retval != CTA_OK) return; + + + // In case of String-output, erase the strings + if (*datatype == CTA_STRING) + { + CTA_String * PropStr=StatProp; + for (i=0; inmeasr; i++) + { + *retval = CTA_String_Free(&PropStr[i]); + if (*retval != CTA_OK) return; + } + PropStr=TimeProp; + for (i=0; inmeasr; i++) + { + *retval = CTA_String_Free(&PropStr[i]); + if (*retval != CTA_OK) return; + } + } + + free(StatProp); + free(TimeProp); + *retval = CTA_OK; +} + + +void CTAI_ObsDescr_sqlite3_Free( + CTAI_ObsDescr_sqlite3 *descr, + int *retval) +{ + int i; + + descr->database->nusers--; + if (descr->database->nusers==0) + { + + sqlite3_close(descr->database->db); + free(descr->database->name); + free(descr->database); + + } + free(descr->condition); + + for (i=0; in_keys; i++){ CTA_String_Free(&(descr->Keys[i]));} + free(descr->Keys); + + + *retval = CTA_OK; +} + diff --git a/costa/native/cta/src/cta_obsdescr_table.c b/costa/native/cta/src/cta_obsdescr_table.c new file mode 100644 index 000000000..872567f04 --- /dev/null +++ b/costa/native/cta/src/cta_obsdescr_table.c @@ -0,0 +1,611 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_obsdescr_table.c $ +$Revision: 3407 $, $Date: 2012-08-17 13:50:50 +0200 (Fri, 17 Aug 2012) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#define IDEBUG (0) + +#include +#include +#include +#include +#include +#include "cta.h" +#include "cta_mem.h" + +typedef struct { +int nkeys; +int nmeasr; +char **Keys; +char ***Columns; +} CTAI_ObsDescr_table; + + +void CTAI_ObsDescr_table_Create_Size( + // OUTPUTS: + int *memsize, // The number of bytes which are necessary to + // store one CTAI_SObs_sqlite3, with a + // pointer to the contents (data), + // but without the contents themselves. + int *retval // error code (see cta_datatypes.h for possible + // error codes) + ){ + *memsize=(int) sizeof(CTAI_ObsDescr_table); + *retval=CTA_OK; +}; + +void CTAI_ObsDescr_table_Create_Init( +/* + Allocate the memory which is necessary to store the data necessary for a + table-observer +*/ + // INPUT: + CTA_ObsDescr myhandle, /* Handle assigned by COSTA */ + // IN-OUTPUTS + CTAI_ObsDescr_table *descr,// The table-observation description + // for which the memory must be + // allocated + // INPUTS: + CTA_Handle *usrdat, // User data + // OUTPUTS + int *retval) // Error code. +{ + CTA_Datatype datatype; + int ikey, imeasr, nkeys, nmeasr, lenkey, lencol, len; + CTA_Vector vKeys, vCol; + CTA_String sKey; + CTA_String sCol; + const char *key; + + if (IDEBUG>0) printf("CTAI_ObsDescr_table_Create_Init: Start handle=%d\n",*usrdat); + /* Check input we only support a pack array input and an observation + * description component */ + *retval=CTA_Handle_GetDatatype(*usrdat, &datatype); + if (datatype==CTA_PACK) { + + /* Get number of keys and number of measurements */ + *retval=CTA_Pack_Get(*usrdat,&nkeys,sizeof(int)); + if (*retval!=CTA_OK) return; + if (IDEBUG>0) printf("CTAI_ObsDescr_table_Create_Init: nkeys=%d\n",nkeys); + *retval=CTA_Pack_Get(*usrdat,&nmeasr,sizeof(int)); + if (IDEBUG>0) printf("CTAI_ObsDescr_table_Create_Init: nmeasr=%d\n",nmeasr); + if (*retval!=CTA_OK) return; + } else if (datatype==CTA_OBSDESCR) { + if (IDEBUG>0) printf("CTAI_ObsDescr_table_Create_Init: datatype==CTA_OBSDESCR\n"); + /* Get number of keys and number of measurements */ + *retval=CTA_ObsDescr_Property_Count(*usrdat, &nkeys); + if (IDEBUG>0) printf("CTAI_ObsDescr_table_Create_Init: nkeys=%d ierr=%d\n",nkeys, *retval); + if (*retval!=CTA_OK) return; + *retval=CTA_ObsDescr_Observation_Count(*usrdat, &nmeasr); + if (IDEBUG>0) printf("CTAI_ObsDescr_table_Create_Init: nmeasr=%d ierr=%d\n",nmeasr, *retval); + if (*retval!=CTA_OK) return; + } else { + *retval=CTA_NOT_IMPLEMENTED; + return; + } + /* Allocate arrays */ + descr->Keys=CTA_Malloc(nkeys*sizeof(char*)); + descr->Columns=CTA_Malloc(nkeys*sizeof(char**)); + + for (ikey=0;ikeyColumns[ikey]=CTA_Malloc(nmeasr*sizeof(char*)); + } + /* Nullify the strings */ + for (ikey=0;ikeyKeys[ikey]=NULL; + for (imeasr=0;imeasrColumns[ikey][imeasr]=NULL; + } + } + /* Set nkeys and nmeasr */ + descr->nkeys=nkeys; + descr->nmeasr=nmeasr; + + if (IDEBUG>0) printf("CTAI_ObsDescr_table_Create_Init: datatype=%d \n",datatype); + + + if (datatype==CTA_PACK) { + /* Unpack all keys */ + if (IDEBUG>0) printf("CTAI_ObsDescr_table_Create_Init: unpack-keys\n"); + for (ikey=0; ikeynkeys; ikey++){ + /* unpack stringlength */ + *retval=CTA_Pack_Get(*usrdat,&len,sizeof(int)); + if (*retval!=CTA_OK) return; + /* pack string */ + descr->Keys[ikey]=CTA_Malloc(len*sizeof(char)); + *retval=CTA_Pack_Get(*usrdat,descr->Keys[ikey],len*sizeof(char)); + if (*retval!=CTA_OK) return; + if (IDEBUG>0) printf("CTAI_ObsDescr_table_Create_Init: Key[%d]=%s\n", + ikey, descr->Keys[ikey]); + } + + /* Unpack all Columns */ + for (ikey=0; ikeynkeys; ikey++){ + for (imeasr=0; imeasrnmeasr; imeasr++){ + if (IDEBUG>0) printf("CTAI_ObsDescr_table_Create_Init\n"); + /* pack stringlength */ + *retval=CTA_Pack_Get(*usrdat,&len,sizeof(int)); + if (*retval!=CTA_OK) return; + /* pack string */ + descr->Columns[ikey][imeasr]=CTA_Malloc(len*sizeof(char)); + *retval=CTA_Pack_Get(*usrdat,descr->Columns[ikey][imeasr], + len*sizeof(char)); + if (*retval!=CTA_OK) return; + if (IDEBUG>0) printf("CTAI_ObsDescr_table_Create_Init: Val[%d][%d]=%s\n", + ikey,imeasr,descr->Columns[ikey][imeasr]); + } + } + + + + + + } else if (datatype==CTA_OBSDESCR) { + + + if (IDEBUG>0) {printf("CTAI_ObsDescr_table_Create_Init: start \n");} + + /* Allocate vectors for getting out the information */ + + *retval=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nkeys, CTA_STRING, CTA_NULL, &vKeys); + if (*retval!=CTA_OK) return; + *retval=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nmeasr, CTA_STRING, CTA_NULL, &vCol); + if (*retval!=CTA_OK) return; + + *retval=CTA_String_Create(&sKey); + if (*retval!=CTA_OK) return; + *retval=CTA_String_Create(&sCol); + if (*retval!=CTA_OK) return; + + /* Get all Keys */ + *retval=CTA_ObsDescr_Get_PropertyKeys(*usrdat, vKeys); + if (*retval!=CTA_OK) return; + + /* For all keys */ + for (ikey=0; ikeyKeys[ikey]=CTA_Malloc((lenkey+1)*sizeof(char)); + *retval=CTA_String_Get(sKey,descr->Keys[ikey]); + if (*retval!=CTA_OK) return; + + /* Get the properties of this key */ + key=CTAI_String_GetPtr(sKey); + if (IDEBUG>0) {printf("CTAI_ObsDescr_table_Create_Init: get data for key '%s'\n",key);} + // NOTE this is were performance goes lost! + *retval=CTA_ObsDescr_Get_ValueProperties(*usrdat, key, vCol, CTA_STRING); + if (*retval!=CTA_OK) return; +// printf("done\n"); + + /* For all measurement */ + for (imeasr=0; imeasrColumns[ikey][imeasr]=CTA_Malloc((lencol+1)*sizeof(char)); + *retval=CTA_String_Get(sCol,descr->Columns[ikey][imeasr]); + if (IDEBUG>0) printf("CTAI_ObsDescr_table_Create_Init: Val[%d][%d]=%s\n", + ikey,imeasr,descr->Columns[ikey][imeasr]); + if (*retval!=CTA_OK) return; + } + } + + /* Deallocate work variables */ + CTA_Vector_Free(&vKeys); + CTA_Vector_Free(&vCol); + CTA_String_Free(&sKey); + CTA_String_Free(&sCol); + } else { + *retval=CTA_NOT_IMPLEMENTED; + return; + } + + *retval=CTA_OK; +}; + +void CTAI_ObsDescr_table_Get_Keys( + CTAI_ObsDescr_table *descr, + CTA_Vector *Keys, + int *retval) +{ + int i; + CTA_String hlpstr; + + CTA_String_Create(&hlpstr); + for (i=1; i<=descr->nkeys; i++) + { + CTA_String_Set(hlpstr,descr->Keys[i-1]); + *retval = CTA_Vector_SetVal(*Keys,i,&hlpstr,CTA_STRING); + if (IDEBUG>0) {printf("CTAI_ObsDescr_table_Get_Keys: set key '%s' ierr=%d",descr->Keys[i-1],*retval);} + }; + CTA_String_Free(&hlpstr); + + *retval = CTA_OK; +}; + +void CTAI_ObsDescr_table_Property_Count( + CTAI_ObsDescr_table *descr, + int *nkeys, + int *retval) +{ + *nkeys = descr->nkeys; + *retval = CTA_OK; +}; + +void CTAI_ObsDescr_table_Observation_Count( + CTAI_ObsDescr_table *descr, + int *nobs, + int *retval) +{ + *nobs = descr->nmeasr; + *retval = CTA_OK; +}; + + +void CTAI_ObsDescr_table_Get_Properties( + CTAI_ObsDescr_table *descr, + const char *Key, + CTA_Vector *Properties, + CTA_Datatype *datatype, + int *retval) +{ + // Find the key among the key names + BOOL found=0; + int isize, ikey, imeasr; + void *vals; + int *ivals; + float *rvals; + double *dvals; + CTA_String str; + + + + for (ikey=0; !found && ikeynkeys; ikey++) + { + found = strcmp(Key,descr->Keys[ikey])==0; + } + ikey--; + + // Error if the key was not found + if (!found) {*retval= CTA_ITEM_NOT_FOUND; return;} + + // Declare an array for the properties + if (*datatype == CTA_INTEGER) {isize = sizeof(int);} + else if(*datatype == CTA_REAL) {isize = sizeof(float);} + else if(*datatype == CTA_DOUBLE) {isize = sizeof(double);} + else if(*datatype == CTA_STRING) {isize = 0;} + else {*retval = CTA_ILLEGAL_DATATYPE; return;} + + vals=NULL; + if (isize>0) { + isize=isize*descr->nmeasr; + vals=CTA_Malloc(isize); + } else if (*datatype == CTA_STRING) { + CTA_String_Create(&str); + } + + + ivals=vals; + rvals=vals; + dvals=vals; + + for (imeasr=0;imeasrnmeasr;imeasr++){ + + if (*datatype == CTA_INTEGER) { + // Add an integer to the output vector: + + // Read the value + if (descr->Columns[ikey][imeasr] != NULL) { + int nscan = sscanf(descr->Columns[ikey][imeasr],"%d",&ivals[imeasr]); + if (nscan != 1) { + *retval=CTA_ILLEGAL_DATATYPE; + free(vals); + return; + } + } + } else if(*datatype == CTA_DOUBLE) { + // Add a double to the output vector: + + // Read the value + double NaN; + + NaN = 0; NaN = 1.0/NaN; NaN=(1.0+NaN)/NaN; + dvals[imeasr]=NaN; + + if (descr->Columns[ikey][imeasr] != NULL) { + int nscan = sscanf(descr->Columns[ikey][imeasr],"%lf",&dvals[imeasr]); + if (nscan != 1) { + *retval=CTA_ILLEGAL_DATATYPE; + free(vals); + return; + } + } + } else if(*datatype == CTA_REAL) { + // Add a float to the output vector: + + // Read the value + float NaN; + NaN = 0; + NaN = (float) 1.0/NaN; + NaN=NaN/NaN; + rvals[imeasr]=NaN; + if (descr->Columns[ikey][imeasr] != NULL) { + int nscan = sscanf(descr->Columns[ikey][imeasr],"%f",&rvals[imeasr]); + if (nscan != 1) { + *retval=CTA_ILLEGAL_DATATYPE; + free(vals); + return; + } + } + } else if(*datatype == CTA_STRING) { + // Add a string to the output vector: + if (descr->Columns[ikey][imeasr] != NULL) { + // Set value in array of Strings + *retval = CTA_String_Set(str, descr->Columns[ikey][imeasr]); + if (*retval!=CTA_OK) return; + } else { + *retval = CTA_String_Set(str,""); + if (*retval!=CTA_OK) return; + } + *retval=CTA_Vector_SetVal(*Properties,imeasr+1,&str,*datatype); + } else { + // No other data types supported (yet) + *retval=CTA_ILLEGAL_DATATYPE; + }; + } + /* Values in the return vector (except for characters) */ + if (*datatype == CTA_INTEGER || *datatype == CTA_REAL || + *datatype == CTA_DOUBLE){ + *retval=CTA_Vector_SetVals(*Properties,vals,descr->nmeasr,*datatype); + } + + if (vals) { + free(vals); + } else if (*datatype == CTA_STRING) { + CTA_String_Free(&str); + } + +} + +void CTAI_ObsDescr_table_Export(CTAI_ObsDescr_table *descr, CTA_Handle +*usrdat, int *retval) +{ + int ikey, imeasr, len; + BOOL packout; + + packout = (CTA_Handle_Check(*usrdat,CTA_PACK)==CTA_OK); + if (packout) { + + /* pack nkeys and nmeasr */ + *retval=CTA_Pack_Add(*usrdat,&descr->nkeys,sizeof(int)); + if (*retval!=CTA_OK) return; + *retval=CTA_Pack_Add(*usrdat,&descr->nmeasr,sizeof(int)); + if (*retval!=CTA_OK) return; + + /* Pack all keys */ + for (ikey=0; ikeynkeys; ikey++){ + /* pack stringlength */ + len=strlen(descr->Keys[ikey])+1; + *retval=CTA_Pack_Add(*usrdat,&len,sizeof(int)); + if (*retval!=CTA_OK) return; + /* pack string */ + *retval=CTA_Pack_Add(*usrdat,descr->Keys[ikey],len*sizeof(char)); + if (*retval!=CTA_OK) return; + } + + /* Pack all Columns */ + for (ikey=0; ikeynkeys; ikey++){ + for (imeasr=0; imeasrnmeasr; imeasr++){ + /* pack stringlength */ + len=strlen(descr->Columns[ikey][imeasr])+1; + *retval=CTA_Pack_Add(*usrdat,&len,sizeof(int)); + if (*retval!=CTA_OK) return; + /* pack string */ + *retval=CTA_Pack_Add(*usrdat,descr->Columns[ikey][imeasr], + len*sizeof(char)); + if (*retval!=CTA_OK) return; + } + } + } else { + *retval=CTA_FORMAT_NOT_SUPPORTED; + return; + } + *retval=CTA_OK; + return; +} + +void CTAI_ObsDescr_table_Free( + CTAI_ObsDescr_table *descr, + int *retval) +{ + int ikey, imeasr; + + for (ikey=0; ikeynkeys; ikey++) { + + free (descr->Keys[ikey]); + for (imeasr=0; imeasrnmeasr; imeasr++) { + free(descr->Columns[ikey][imeasr]); + } + free(descr->Columns[ikey]); + } + free(descr->Keys); + free(descr->Columns); + + *retval=CTA_OK; +} + + + +void CTAI_ObsDescr_table_CreateSel(CTAI_ObsDescr_table *descr, + CTA_String *selection, CTA_RelTable *reltab, + CTA_ObsDescr *myhandle_out, + CTAI_ObsDescr_table *descrout, int *retval){ + + + int i, len, imeasr, nmeasr; + int ikey, nkeys, irow; + int datatype; + int *index; + char *condition; + double t1,t2,t; + CTA_Vector vtimes, vselect; + + // Get the condition + // Allocate a name-string + *retval = CTA_String_GetLength(*selection, &len); + if (*retval!=CTA_OK) return; + + // Get the condition + condition = CTA_Malloc((len+1)*sizeof(char)); + *retval = CTA_String_Get(*selection, condition); + if (*retval!=CTA_OK) return; + + *retval = CTA_String_Get(*selection, condition); + if (*retval!=CTA_OK) return; + + + // convert to upper case + for (i=0;inmeasr, datatype, CTA_NULL, + &vtimes); + CTAI_ObsDescr_table_Get_Properties(descr,"TIME", &vtimes, &datatype, + retval); + if (*retval!=CTA_OK) return; + + + // find observations are in given timespan + nmeasr=-1; + index=CTA_Malloc(descr->nmeasr*sizeof(int)); + for (i=1;i<=descr->nmeasr;i++){ + CTA_Vector_GetVal(vtimes, i, &t, CTA_DOUBLE); + if (t1nkeys; + + descrout->nkeys=nkeys; + descrout->nmeasr=nmeasr; + + /* Allocate table */ + descrout->Keys=CTA_Malloc(nkeys*sizeof(char*)); + descrout->Columns=CTA_Malloc(nkeys*sizeof(char**)); + for (ikey=0;ikeyColumns[ikey]=CTA_Malloc(nmeasr*sizeof(char*)); + } + /* Copy Keys */ + for (ikey=0;ikeyKeys[ikey]); + descrout->Keys[ikey]=CTA_Malloc(sizeof(char)*(len+1)); + strcpy(descrout->Keys[ikey],descr->Keys[ikey]); + } + + /* Copy rows of table */ + for (imeasr=0;imeasrColumns[ikey][irow]==NULL){ + descrout->Columns[ikey][imeasr]=NULL; + } else { + len=strlen(descr->Columns[ikey][irow]); + descrout->Columns[ikey][imeasr]=CTA_Malloc(sizeof(char)*(len+1)); + strcpy(descrout->Columns[ikey][imeasr], + descr->Columns[ikey][irow]); + } + } + } + + // Clean up memory + free(index); + CTA_Vector_Free(&vtimes); +} + + + + +void CTA_ObsDescr_table_initialise(CTA_ObsDescrClass *hobsdescrcl) +{ + CTA_Intf hintf=0; + CTA_Func h_func[I_CTA_OBSDESCR_NUMFUNC]; + + // The vector h_func is filled with COSTA-function handles of the + // implementations in this file. + CTA_Func_Create(" ",&CTAI_ObsDescr_table_Create_Size, hintf, + &h_func[I_CTA_OBSDESCR_CREATE_SIZE]); + CTA_Func_Create(" ",&CTAI_ObsDescr_table_Create_Init, hintf, + &h_func[I_CTA_OBSDESCR_CREATE_INIT]); + CTA_Func_Create(" ",&CTAI_ObsDescr_table_Property_Count, hintf, + &h_func[I_CTA_OBSDESCR_COUNT_PROPERTIES]); + CTA_Func_Create(" ",&CTAI_ObsDescr_table_Get_Properties, hintf, + &h_func[I_CTA_OBSDESCR_GET_PROPERTIES]); + CTA_Func_Create(" ",&CTAI_ObsDescr_table_Observation_Count, hintf, + &h_func[I_CTA_OBSDESCR_COUNT_OBSERVATIONS]); + CTA_Func_Create(" ",&CTAI_ObsDescr_table_Get_Keys, hintf, + &h_func[I_CTA_OBSDESCR_GET_KEYS]); + CTA_Func_Create(" ",&CTAI_ObsDescr_table_Export, hintf, + &h_func[I_CTA_OBSDESCR_EXPORT]); + CTA_Func_Create(" ",&CTAI_ObsDescr_table_Free, hintf, + &h_func[I_CTA_OBSDESCR_FREE]); + CTA_Func_Create(" ",&CTAI_ObsDescr_table_CreateSel, hintf, + &h_func[I_CTA_OBSDESCR_SELECTION]); + + CTA_ObsDescr_DefineClass("cta_obsdescr_table",h_func,hobsdescrcl); +} diff --git a/costa/native/cta/src/cta_obsdescr_user.c b/costa/native/cta/src/cta_obsdescr_user.c new file mode 100644 index 000000000..81c05a587 --- /dev/null +++ b/costa/native/cta/src/cta_obsdescr_user.c @@ -0,0 +1,59 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/trunk/costa/src/cta/cta_obsdescr_netcdf.c $ +$Revision: 671 $, $Date: 2008-10-07 14:49:42 +0200 (Tue, 07 Oct 2008) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include +#include +#include "cta.h" + +void CTA_ObsDescr_user_initialize(CTA_ObsDescrClass *hobsdescrcl) +{ + CTA_Func h_func[I_CTA_OBSDESCR_NUMFUNC]; + char *libraryName; + char *functionName; + + // The vector h_func is filled with function read from the default user dynamic library + + libraryName=userDefaultDynamicLibrary; + functionName="user_obsdescr_create_size"; + h_func[I_CTA_OBSDESCR_CREATE_SIZE] = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName); + functionName="user_obsdescr_create_size"; + h_func[I_CTA_OBSDESCR_CREATE_INIT] = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName); + functionName="user_obsdescr_count_properties"; + h_func[I_CTA_OBSDESCR_COUNT_PROPERTIES] = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName); + functionName="user_obsdescr_get_properties"; + h_func[I_CTA_OBSDESCR_GET_PROPERTIES] = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName); + functionName="user_obsdescr_count_observations"; + h_func[I_CTA_OBSDESCR_COUNT_OBSERVATIONS] = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName); + functionName="user_obsdescr_get_keys"; + h_func[I_CTA_OBSDESCR_GET_KEYS] = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName); + functionName="user_obsdescr_free"; + h_func[I_CTA_OBSDESCR_FREE] = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName); + functionName="user_obsdescr_selection"; + h_func[I_CTA_OBSDESCR_SELECTION] = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName); + + CTA_ObsDescr_DefineClass("cta_obsdescr_user",h_func,hobsdescrcl); + + +} + + + diff --git a/costa/native/cta/src/cta_pack.c b/costa/native/cta/src/cta_pack.c new file mode 100644 index 000000000..a66a73839 --- /dev/null +++ b/costa/native/cta/src/cta_pack.c @@ -0,0 +1,381 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_pack.c $ +$Revision: 2932 $, $Date: 2011-11-30 17:10:24 +0100 (Wed, 30 Nov 2011) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include +#include +#include "cta_mem.h" +#include "f_cta_utils.h" +#include "ctai.h" +#include "cta_pack.h" +#include "cta_errors.h" +#include "cta_message.h" + +#define CTA_PACK_CREATE_F77 F77_CALL(cta_pack_create,CTA_PACK_CREATE) +#define CTA_PACK_FREE_F77 F77_CALL(cta_pack_free,CTA_PACK_FREE) +#define CTA_PACK_ADD_F77 F77_CALL(cta_pack_add,CTA_PACK_ADD) +#define CTA_PACK_GET_F77 F77_CALL(cta_pack_get,CTA_PACK_GET) +#define CTA_PACK_GETPTR_F77 F77_CALL(cta_pack_getptr,CTA_PACK_GETPTR) +#define CTA_PACK_GETLEN_F77 F77_CALL(cta_pack_getlen,CTA_PACK_GETLEN) +#define CTA_PACK_ADDCNT_F77 F77_CALL(cta_pack_addcnt,CTA_PACK_ADDCNT) +#define CTA_PACK_GETINDX_F77 F77_CALL(cta_pack_getindx,CTA_PACK_GETINDX) +#define CTA_PACK_SETINDX_F77 F77_CALL(cta_pack_setindx,CTA_PACK_SETINDX) + +#define CLASSNAME "CTA_Pack" + +/* Struct holding all data associated to a COSTA package */ + +typedef struct { +char *pack_array; +int lenpack; +int ip1; +int ip2; +} CTAI_Pack; + + +#undef METHOD +#define METHOD "Create" +int CTA_Pack_Create(int initsize, CTA_Pack *hpack){ + + CTAI_Pack *pack; + int retval; + + /* allocate memory for new pack object */ + pack=CTA_Malloc(sizeof(CTAI_Pack)); + pack->pack_array=NULL; + pack->lenpack=0; + pack->ip1=0; + pack->ip2=0; + + /* Allocate new handle and return eror when unsuccesfull */ + retval=CTA_Handle_Create("pack",CTA_PACK,pack,hpack); + if (retval) { + CTA_WRITE_ERROR("Cannot create handle"); + return retval; + } + + if (initsize>0) { + pack->pack_array=CTA_Malloc(initsize); + pack->lenpack=initsize; + } + + return CTA_OK; +} + +#undef METHOD +#define METHOD "Free" +int CTA_Pack_Free(CTA_Pack *hpack){ + + CTAI_Pack *pack; + int retval; + + if (*hpack==CTA_NULL) return CTA_OK; + + retval=CTA_Handle_Check((CTA_Handle) *hpack,CTA_PACK); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_pack handle"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) *hpack,(void**) &pack); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + if (pack->pack_array) {free(pack->pack_array);} + free(pack); + retval=CTA_Handle_Free(hpack); + return retval; + +} + +#undef METHOD +#define METHOD "Add" +int CTA_Pack_Add(CTA_Pack hpack, void *data, int lendat){ + + CTAI_Pack *pack; + int retval; + + retval=CTA_Handle_Check((CTA_Handle) hpack,CTA_PACK); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_pack handle"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) hpack,(void**) &pack); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + +// printf("pack->lenpack=%d\n",pack->lenpack); +// printf("lendat=%d",lendat); +// printf("pack->ip2=%d"); + // Check whether buffer is long enough and reallocate when necessary + if (pack->lenpackip2) { + pack->lenpack = (int)(pack->lenpack*1.5) + lendat; + // printf("CTA_Pack_Add: reallocate new size %d\n", pack->lenpack); + pack->pack_array=realloc(pack->pack_array, pack->lenpack); + } + + // pack new data + memcpy(pack->pack_array+pack->ip2, data, lendat); + pack->ip2=pack->ip2+lendat; + return CTA_OK; +} + +#undef METHOD +#define METHOD "Get" +int CTA_Pack_Get(CTA_Pack hpack, void *data, int lendat){ + + CTAI_Pack *pack; + int retval; + + retval=CTA_Handle_Check((CTA_Handle) hpack,CTA_PACK); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_pack handle"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) hpack,(void**) &pack); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + // Check whether requested data can be in buffer + if (pack->ip1+lendat>pack->lenpack) { + return CTA_BUFFER_TOO_SMALL; + } + + // pack new data + memcpy(data, pack->pack_array+pack->ip1, lendat); + pack->ip1=pack->ip1+lendat; + return CTA_OK; +} + + + +#undef METHOD +#define METHOD "GetPtr" +char* CTA_Pack_GetPtr(CTA_Pack hpack){ + + CTAI_Pack *pack; + int retval; + + retval=CTA_Handle_Check((CTA_Handle) hpack,CTA_PACK); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_pack handle"); + return NULL; + } + retval=CTA_Handle_GetData((CTA_Handle) hpack,(void**) &pack); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return NULL; + } + + return &(pack->pack_array[pack->ip1]); +} + +#undef METHOD +#define METHOD "GetLen" +int CTA_Pack_GetLen(CTA_Pack hpack){ + + CTAI_Pack *pack; + int retval; + + retval=CTA_Handle_Check((CTA_Handle) hpack,CTA_PACK); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_pack handle"); + return -1; + } + retval=CTA_Handle_GetData((CTA_Handle) hpack,(void**) &pack); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return -1; + } + + return pack->ip2-pack->ip1; +} + +#undef METHOD +#define METHOD "AddCnt" +int CTA_Pack_AddCnt(CTA_Pack hpack, int lendat){ + CTAI_Pack *pack; + int retval; + + retval=CTA_Handle_Check((CTA_Handle) hpack,CTA_PACK); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_pack handle"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) hpack,(void**) &pack); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + if (pack->ip2+lendat>pack->lenpack) { + return CTA_BUFFER_TOO_SMALL; + } + + pack->ip2=pack->ip2+lendat; + + return CTA_OK; +} + + +#undef METHOD +#define METHOD "Debug" +int CTA_Pack_Debug(CTA_Pack hpack){ + + CTAI_Pack *pack; + int retval; + + retval=CTA_Handle_Check((CTA_Handle) hpack,CTA_PACK); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_pack handle"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) hpack,(void**) &pack); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + // Check whether requested data can be in buffer + printf("DEBUG OF PACK OBJECT %d\n",hpack ); + printf("geheugen adress %p\n",&pack); + printf("ip1=%d lenpack=%d\n",pack->ip1, pack->lenpack); + printf("END OF PACK OBJECT %d\n",hpack ); + + return CTA_NULL; +} + +#undef METHOD +#define METHOD "GEtIndx" +int CTA_Pack_GetIndx(CTA_Pack hpack, int *ip1, int *ip2){ + + CTAI_Pack *pack; + int retval; + + retval=CTA_Handle_Check((CTA_Handle) hpack,CTA_PACK); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_pack handle"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) hpack,(void**) &pack); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + *ip1=pack->ip1; + *ip2=pack->ip2; + + return CTA_OK; +} + + +#undef METHOD +#define METHOD "SetIndex" +int CTA_Pack_SetIndx(CTA_Pack hpack, int ip1, int ip2){ + + CTAI_Pack *pack; + int retval; + + retval=CTA_Handle_Check((CTA_Handle) hpack,CTA_PACK); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_pack handle"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) hpack,(void**) &pack); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + +// printf("ip1, ip2 =%d %d\n",pack->ip1, pack->ip2); + if (ip1==CTA_PACK_RESET) { + pack->ip1=0; + } + else if (ip2==CTA_PACK_RESET) { + pack->ip1=0; + pack->ip2=0; + } + else { + pack->ip2=ip2; + pack->ip1=ip1; + } + + return CTA_OK; +} + + +CTAEXPORT void CTA_PACK_CREATE_F77(int *initsize, int *hpack, int *ierr){ + *ierr=CTA_Pack_Create(*initsize, (CTA_Pack*) hpack); +} + + +CTAEXPORT void CTA_PACK_FREE_F77(int *hpack, int *ierr){ + *ierr=CTA_Pack_Free((CTA_Pack*) hpack); +} + +CTAEXPORT void CTA_PACK_ADD_F77(int *hpack, void *data, int *lendat, int *ierr){ + *ierr=CTA_Pack_Add((CTA_Pack) *hpack, data, *lendat); +} + +CTAEXPORT void CTA_PACK_GET_F77(int *hpack, void *data, int *lendat, int *ierr){ + *ierr=CTA_Pack_Get((CTA_Pack) *hpack, data, *lendat); +} + + +CTAEXPORT char* CTA_PACK_GETPTR_F77(int *hpack){ + return CTA_Pack_GetPtr((CTA_Pack) *hpack); +} + +CTAEXPORT void CTA_PACK_GETLEN_F77(int *hpack, int *ierr){ + *ierr=CTA_Pack_GetLen((CTA_Pack) *hpack); +} + +CTAEXPORT void CTA_PACK_ADDCNT_F77(int *hpack, int *lendat, int *ierr){ + *ierr=CTA_Pack_AddCnt((CTA_Pack) *hpack, *lendat); +} + +CTAEXPORT void CTA_PACK_GETINDX_F77(int *hpack, int *ip1, int *ip2, int *ierr){ + *ierr=CTA_Pack_GetIndx((CTA_Pack) *hpack, ip1, ip2); +} + +CTAEXPORT void CTA_PACK_SETINDX_F77(int *hpack, int *ip1, int *ip2, int *ierr){ + *ierr=CTA_Pack_SetIndx((CTA_Pack) *hpack, *ip1, *ip2); +} + + + + + + + diff --git a/costa/native/cta/src/cta_par.c b/costa/native/cta/src/cta_par.c new file mode 100644 index 000000000..f0b86abdf --- /dev/null +++ b/costa/native/cta/src/cta_par.c @@ -0,0 +1,905 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_par.c $ +$Revision: 4073 $, $Date: 2013-07-31 15:43:55 +0200 (Wed, 31 Jul 2013) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2008 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "cta_mem.h" +#include "cta.h" +#include +#ifdef USE_MPI +//#include "cta_util_mpi.h" +#include "mpi.h" +#endif +#include "cta_par.h" + +#define CLASSNAME "CTA_Par" + +#define CTA_PAR_WORKERSPAWN_F77 F77_CALL(cta_par_workerspawn,CTA_PAR_WORKERSPAWN) +#define CTA_PAR_GETFCOMM_F77 F77_CALL(cta_par_getfcomm,CTA_PAR_GETFCOMM) +#define CTA_PAR_CREATEGROUPS_F77 F77_CALL(cta_par_creategroups,CTA_PAR_CREATEGROUPS) +#define CTA_PAR_GETGROUPINFO_F77 F77_CALL(cta_par_getgroupinfo,CTA_PAR_GETGROUPINFO) + +#define IDEBUG (0) +enum CTAI_ParType {WorkerWorker,MasterWorker}; +enum CTA_ParProcType CTA_MY_PROC_TYPE=CTA_ParMaster; +int CTA_IS_PARALLEL=CTA_FALSE; +int CTA_PAR_MY_RANK=0; +#ifdef USE_MPI + +typedef struct { +int spawn_workers; // spawn worker processes 0 (false) 1 (true) +char *name; // Name of group +char *forModel; // Model (tag) to use this model for +int nProc; // Number of processes in instance of group +int nTimes; // Number of instances of this group +int iTime; // Index in communicators used for providing new groups when models are created +enum CTAI_ParType parType; // Type of parallel model +MPI_Comm *CommNoMaster; +MPI_Comm *CommWithMaster; +int nDumProcs; +int *dumProcs; +} CTAI_Group; + +/* Public variables */ + +MPI_Comm CTA_COMM_WORLD=MPI_COMM_WORLD; // Communication group of all processes in the COSTA universe. +MPI_Comm CTA_COMM_MYWORLD=MPI_COMM_WORLD; // Private world of this process (excluding COSTA master). +MPI_Comm CTA_COMM_MASTER_WORKER=MPI_COMM_WORLD; // Communcication group between the master and this worker. +int CTA_FILTER_PROCESS=0; + +int myRankInWorld=-1; + +int CTAI_iGroup=0; // Group number I belong to (not valid for the master) +int CTAI_nGroups=0; // Global number of groups + +CTAI_Group *CTAI_AllGroups; + +/* Local interfaces */ +void CTAI_Par_FatalError(int ierr, char* msg); + + + +#undef METHOD +#define METHOD "CTAI_Par_NumGroups" +void CTAI_Par_NumGroups(int parConfig, int *ngroups1, int *ngroups2){ + int ierr; + CTA_Handle hitem; + int i, ncount, datatype; + char *implements, *parallel_type, *spawn_workers, *nproc, *ntimes, *dumProcs; + + *ngroups1=0; + *ngroups2=0; + + /* first count the groups in parallel configuration */ + ierr=CTA_Tree_CountHandlesStr(parConfig,"parallel/process_groups/group", + ngroups1); + if (ierr!=CTA_OK){ + *ngroups1=0; + } + + /* Second look for all model classes in configuration */ + ierr=CTA_Tree_CountItems (parConfig, &ncount); + for (i=1;i<=ncount;i++){ + CTA_Tree_GetItem (parConfig, i, &hitem); + CTA_Handle_GetDatatype(hitem, &datatype); + /* check whether it is a model class */ + if (datatype==CTA_MODELCLASS){ + /* Get paralel information */ + CTAI_ModelFac_GetParallelData(hitem, &implements, ¶llel_type, &spawn_workers, &nproc, &ntimes, &dumProcs); + /* increase number of parallel groups when parallel information is available */ + if (implements && parallel_type && nproc && ntimes) (*ngroups2)++; + } + } +} + +#undef METHOD +#define METHOD "CTAI_Par_GetGroupConfig2" +void CTAI_Par_GetGroupConfig2(CTA_Tree parConfig, int iGroup, CTA_ModelClass *groupConfig){ + int icount,i; + int ncount; + int datatype; + CTA_Handle hitem; + char *implements, *parallel_type, *spawn_workers, *nproc, *ntimes, *dumProcs; + + icount=0; + *groupConfig=CTA_NULL; + + + /* Second look for all model clases in configuration */ + CTA_Tree_CountItems (parConfig, &ncount); + for (i=1;i<=ncount;i++){ + CTA_Tree_GetItem (parConfig, i, &hitem); + CTA_Handle_GetDatatype(hitem, &datatype); + /* check whether it is a model class */ + if (datatype==CTA_MODELCLASS){ + /* Get paralel information */ + CTAI_ModelFac_GetParallelData(hitem, &implements, ¶llel_type, &spawn_workers, &nproc, &ntimes, &dumProcs); + /* increase number of parallel groups when parallel information is available */ + if (implements && parallel_type && nproc && ntimes) { + if (iGroup==icount) { + *groupConfig=hitem; + return; + } + icount++; + } + } + } +} + + + +#undef METHOD +#define METHOD "CTAI_Par_GetGroupConfig" +void CTAI_Par_GetGroupConfig(CTA_Tree parConfig, int iGroup, CTA_Tree *groupConfig){ + int ierr; + CTA_Tree tGroups; + + ierr=CTA_Tree_GetHandleStr(parConfig,"parallel/process_groups", &tGroups); + if (ierr==CTA_OK) { + ierr=CTA_Tree_GetItem(tGroups, iGroup+1, groupConfig); + } + + if (ierr!=CTA_OK){ + *groupConfig=CTA_NULL; + } +} + +/* Distribute the groups over the available processes */ +/* We set field nTimes for groups with free multiplicity */ +#undef METHOD +#define METHOD "CTAI_Par_SetNtimes" +void CTAI_Par_SetNtimes(CTAI_Group *allGroups, int nGroups, int nProcWorld){ + int nodes_left; /* Number of processes that are left */ + BOOL didAddGroup; /* Assigned group to nodes in last cycle */ + BOOL first; /* Flag indicating first time in loop distributing processes to groups */ + int iLoop; /* Assigment loop counter */ + int nProcGroup; /* Number of processes in a group */ + int nTimes; /* multiplicity of group <=0 yet unknown number */ + int iGroup; /* Counter over all groups */ + int myRank; + int mySize; + char msg[256]; /* String for writing messages */ + + nodes_left=nProcWorld-1; + didAddGroup=TRUE; + + MPI_Comm_rank(CTA_COMM_WORLD, &myRank); + MPI_Comm_size(CTA_COMM_WORLD, &mySize); + if (IDEBUG) { + printf("#%d DEBUG: CTAI_Par_SetNtimes Size of CTA_COMM_WORLD =%d:\n",myRank, mySize); + MPI_Comm_size(MPI_COMM_WORLD, &mySize); + printf("#%d DEBUG: CTAI_Par_SetNtimes Size of MPI_COMM_WORLD =%d:\n",myRank, mySize); + printf("#%d DEBUG: CTAI_Par_SetNtimes nProcWorld=%d:\n",myRank, nProcWorld); + printf("#%d DEBUG: CTAI_Par_SetNtimes nGroups=%d:\n",nGroups, nProcWorld); + for (iGroup=0; iGroup0) || first;iLoop++){ + first=FALSE; + didAddGroup=FALSE; + /* Loop over all groups */ + for (iGroup=0;iGroup0) { + if (iLoop==0) { + nodes_left=nodes_left-nProcGroup*nTimes; + didAddGroup=TRUE; + } + } + else { + if ((nodes_left>=nProcGroup) | (iLoop==0)){ + nodes_left=nodes_left-nProcGroup; + allGroups[iGroup].nTimes=nTimes-1; + didAddGroup=TRUE; + } + } + } + } + } + + /* Check for more nodes requested than available */ + if (nodes_left<0){ + sprintf(msg,"%d spawned processes too less. Please spawn more processes or cange parallel configuration\n", + nProcWorld-nodes_left); + CTA_WRITE_ERROR(msg); + exit(-1); + } + + /* Processes have been spawned that cannot be assigned + Probably, the user has made an error so we give an error as well. + Note: we even cannot continue since we will not be able to terminate + this process in a correct way at the end of the program + */ + if (nodes_left>0){ + sprintf(msg,"%d spawned processes cannot be assigned please spawn less processes or change parallel configuration.\n", + nodes_left); + CTA_WRITE_ERROR(msg); + exit(-1); + } + + /* set nTimes field positive for all groups */ + if (IDEBUG) printf("#%d CTAI_Par_SetNtimes nGroups is %d\n",myRank,nGroups); + for (iGroup=0;iGroup0) { + /* This is the complicated case were there are a number of dummy processes + * that do not communicate with the COSTA master */ + iOff=0; + for (iProc=0;iProcnDumProcs=0; + group->dumProcs=NULL; + + + if (datatype==CTA_MODELCLASS){ + /* get information from the modelcls */ + CTAI_ModelFac_GetParallelData(parConfig, &implements, ¶llel_type, &spawn_workers, &nproc, &ntimes, &dumProcs); + + // NAME (using implements) + group->name=CTA_Malloc((strlen(implements)+1)*sizeof(char)); + strcpy(group->name,implements); + + // NPROC + group->nProc = atoi(nproc); + + // SPAWN WORKERS + group->spawn_workers = (spawn_workers && (spawn_workers[1]=='Y' || spawn_workers[1]=='y')); + + //FORMODEL (implements) + group->forModel=CTA_Malloc((strlen(implements)+1)*sizeof(char)); + strcpy(group->forModel,implements); + + // PARALLEL_TYPE + if (strcmp(parallel_type,"Worker-Worker")==0){ + group->parType=WorkerWorker; + } else if (strcmp(parallel_type,"Master-Worker")==0){ + group->parType=MasterWorker; + } else { + CTAI_Par_FatalError(CTA_ILLEGAL_INPUT_ARGUMENT, + "parallel_type must have value 'Worker-Worker' or 'Master-Worker'"); + } + // NTIMES + group->nTimes = atoi(ntimes); + + // ITIMES + group->iTime=0; + + + + } else { + + // NAME + ierr=CTA_Tree_GetHandleStr(parConfig,"group/name", &sName); + CTAI_Par_FatalError(ierr, "cannot find group/name in input"); + ierr=CTA_String_GetLength(sName, &len); + group->name=CTA_Malloc((len+1)*sizeof(char)); + ierr=CTA_String_Get(sName, group->name); + + //NPROC + ierr=CTA_Tree_GetValueStr(parConfig,"group/nproc", &(group->nProc), CTA_INTEGER); + CTAI_Par_FatalError(ierr, "cannot find group/nproc in input"); + + //FORMODEL + ierr=CTA_Tree_GetHandleStr(parConfig,"group/use_for_model", &sUseFor); + CTAI_Par_FatalError(ierr, "cannot find group/use_for_model in input"); + CTA_String_GetLength(sUseFor, &len); + group->forModel=CTA_Malloc((len+1)*sizeof(char)); + CTA_String_Get(sUseFor, group->forModel); + + // PARALLEL_TYPE + ierr=CTA_Tree_GetHandleStr(parConfig,"group/parallel_type", &sParType); + CTAI_Par_FatalError(ierr, "cannot find group/parallel_type in input"); + if (strcmp(CTAI_String_GetPtr(sParType),"Worker-Worker")==0){ + group->parType=WorkerWorker; + } else if (strcmp(CTAI_String_GetPtr(sParType),"Master-Worker")==0){ + group->parType=MasterWorker; + } else { + CTAI_Par_FatalError(CTA_ILLEGAL_INPUT_ARGUMENT, + "group/parallel_type must have value 'Worker-Worker' or 'Master-Worker'"); + } + + // SPAWN_WORKER + group->spawn_workers=FALSE; + ierr=CTA_Tree_GetHandleStr(parConfig,"group/spawn_workers", &sSpawn_workers); + if (ierr==CTA_OK){ + spawn_workers=CTAI_String_GetPtr(sSpawn_workers); + group->spawn_workers=(spawn_workers[0]=='Y' || spawn_workers[0]=='y'); + } + + // NTIMES + ierr=CTA_Tree_GetValueStr(parConfig,"group/ntimes", &(group->nTimes), CTA_INTEGER); + if (ierr!=CTA_OK){ + group->nTimes=0; + } + + // ITIMES + group->iTime=0; + + + // Set dummy processes (only in case of Worker-Worker processes + if ( group->parType==WorkerWorker){ + // Set message writer in quiet mode since it is ok if this info + // does not exist + CTA_Message_Quiet(CTA_TRUE); + ierr=CTA_Tree_GetHandleStr(parConfig,"group/dumproc", &sDumProcs); + CTA_Message_Quiet(CTA_FALSE); + if (ierr==CTA_OK){ + txtstr=CTAI_String_GetPtr(sDumProcs); + s = txtstr; + for (i=0; s[i]; s[i]==',' ? i++ : *s++); + group->nDumProcs = strlen(txtstr) == 0 ? 0 : i+1; + if (group->nDumProcs > 0) { + group->dumProcs = CTA_Malloc(sizeof(int)*group->nDumProcs); + for (i=0; ; i++,txtstr=NULL) { + txtpart = strtok(txtstr,","); + if (txtpart == NULL) break; + group->dumProcs[i]=atoi(txtpart); + } + } + if (IDEBUG) printf("#%d Dummy process is selected in Worker-Worker concept\n", myRank); + } + } + } + if (CTA_FILTER_PROCESS) { + if (IDEBUG) { + printf("----- OpenDA communication/model group information ------\n"); + printf("Name of group =%s\n",group->name); + printf("number of proccesses =%d\n",group->nProc); + printf("Used for model =%s\n",group->forModel); + printf("Number of replications =%d\n",group->nTimes); + printf("Kind of parallelization ="); + if (group->parType==WorkerWorker){ + printf("WorkerWorker\n"); + } else { + printf("MasterWorker\n"); + } + printf("Spawn workers ="); + if (group->spawn_workers){ + printf("Yes\n"); + } else { + printf("No\n"); + } + } + } + return CTA_OK; +} + + + + +#undef METHOD +#define METHOD "CTAI_Par_FatalError" +void CTAI_Par_FatalError(int ierr, char* msg){ + if (ierr!=CTA_OK){ + printf("FATAL ERROR:\n"); + printf("%s\n", msg); + exit(-1); + } +} + +#endif + +#undef METHOD +#define METHOD "CTA_Par_WorkerSpawn" +int CTA_Par_WorkerSpawn(int StartPar){ +#ifdef USE_MPI + int retval; /** return value of MPI-call */ + int comm_size; /** Size of communcation group/number of spawned processes */ + + if (IDEBUG) printf("Debug: start of %s\n",METHOD); + /* initialise MPI. */ + retval=MPI_Init(NULL,NULL); + retval=MPI_Comm_size(CTA_COMM_WORLD, &comm_size); + retval=MPI_Comm_rank(CTA_COMM_WORLD, &myRankInWorld); + + if (IDEBUG) { + if (myRankInWorld==0) printf("%s Number of processes that are spawned :%d\n",METHOD, comm_size); + } + /* Set rank of this process, note: this is not in the whole world! */ + CTA_PAR_MY_RANK=myRankInWorld; + + /* By definition: we are running parallel */ + CTA_IS_PARALLEL=TRUE; + + /* By definition: this process is a worker process */ + CTA_FILTER_PROCESS=FALSE; + CTA_MY_PROC_TYPE=CTA_ParWorker; + + /* Set communicators */ + CTA_COMM_MYWORLD = MPI_COMM_WORLD; + retval=MPI_Comm_get_parent(&CTA_COMM_MASTER_WORKER); + if (retval){ + printf("Error in MPI_Comm_get_parent\n"); + exit(-1); + } + + /* Do we need to start the modelbuilder */ + if (StartPar==CTA_TRUE) { + if(IDEBUG) { + printf("=================================================\n"); + printf("Starting the modelbuilder\n"); + printf("Inter communicator is %p\n",CTA_COMM_MASTER_WORKER); + printf("=================================================\n"); + } + CTA_Modbuild_par_CreateClass(&CTA_MODBUILD_PAR); + } +#endif + return CTA_OK; + + } + + +#undef METHOD +#define METHOD "CTA_Par_CreateGroups" +int CTA_Par_CreateGroups(int parConfig, int StartPar){ + +#ifdef USE_MPI + + int comm_size; + int iGroup; + CTA_Tree groupConfig; + int nGroups1, nGroups2; + + /* initialise MPI. We now assume that COSTA is the whole world. + * This might change in the future */ + MPI_Init(NULL,NULL); + MPI_Comm_size(CTA_COMM_WORLD, &comm_size); + + /* When this function is called we will always run in parallel */ + CTA_IS_PARALLEL=CTA_TRUE; + + if (IDEBUG) { + if (myRankInWorld==0) printf("CTA_Par_CreateGroups: Size of CTA_COMM_WORLD is %d\n",comm_size); + } + + /* Get the rank of this process */ + MPI_Comm_rank(CTA_COMM_WORLD, &myRankInWorld); + CTA_PAR_MY_RANK=myRankInWorld; + + CTA_FILTER_PROCESS=(myRankInWorld==0); + + /* Set master and worker */ + if (CTA_FILTER_PROCESS) { + CTA_MY_PROC_TYPE=CTA_ParMaster; + } else { + CTA_MY_PROC_TYPE=CTA_ParWorker; + } + + /* Read configuration */ + if (IDEBUG) { + if (CTA_FILTER_PROCESS) printf("CTA_Par_CreateGroups: number of processes is %d\n",comm_size); + } + + /* Count number of process groups */ + CTAI_Par_NumGroups(parConfig, &nGroups1, &nGroups2); + CTAI_nGroups=nGroups1+nGroups2; + if (IDEBUG) { + if (CTA_FILTER_PROCESS) printf("CTA_Par_CreateGroups: number groups defined globally =%d\n",nGroups1); + if (CTA_FILTER_PROCESS) printf("CTA_Par_CreateGroups: number groups defined locally =%d\n",nGroups2); + if (CTA_FILTER_PROCESS) printf("CTA_Par_CreateGroups: total number of defined ngroups =%d\n",CTAI_nGroups); + } + + /* Get properties of the groups and store them in CTAI_AllGroups */ + CTAI_AllGroups=NULL; + if (CTAI_nGroups>0) { + CTAI_AllGroups=CTA_Malloc(CTAI_nGroups*sizeof(CTAI_Group)); + + for (iGroup=0;iGroupnsel2; + *nsel2=reltable->nsel1; + *sel1=reltable->sel2; + *sel2=reltable->sel1; + } + else { + *nsel1=reltable->nsel1; + *nsel2=reltable->nsel2; + *sel1=reltable->sel1; + *sel2=reltable->sel2; + } +} + + +/** \brief Apply relation table to two components + * + * \param reltable (I) Data of relation table + * \param hrom (I) Source set of elements + * \param hto (IO) Target set (some will be overwitten) + * \param iverse (I) CTA_TRUE/CTA_FALSE apply inverse table + * + * \return error status: CTA_OK if successful + */ +int CTAI_RelTable_Apply(CTA_RelTable hreltable, + CTA_Handle hfrom, CTA_Handle hto, int inverse){ + + int retval; + CTAI_RelTable *reltable; + int size_of_datatype; + void *val; + int isel, isel1, isel2; + int nsel, nsel1, nsel2; + int *sel1, *sel2; + CTA_Datatype type_from, type_to, datatype; + + /* Get data object of relation table */ + retval=CTAI_RelTable_GetData(hreltable, &reltable); + if (retval!=CTA_OK) return retval; + + /* Set relation table administration depending on inverse option */ + CTAI_GetRelTableInfo(reltable, inverse, &nsel1, &sel1, &nsel2, &sel2); + + /* check kind of object to copy elements from: */ + retval=CTA_Handle_GetDatatype(hfrom, &type_from); + if (retval!=CTA_OK) return retval; + retval=CTA_Handle_GetDatatype(hto, &type_to); + if (retval!=CTA_OK) return retval; + + /* Are they both of the same datatype? */ + if (type_to!=type_from) return CTA_INPUT_OBJECTS_ARE_INCOMPATIBLE; + + /* At this time we use an expensive way of applying the relation tables + we need to make changes (extentions to the interface) of the + supported objects in order to improve the performance */ + switch(type_from){ + case CTA_VECTOR: + /* get datatype of vector object */ + retval=CTA_Vector_GetDatatype(hfrom,&datatype); + /* strings are not yet supported */ + + + /* allocate memory block for storing values */ + retval=CTA_SizeOf(datatype, &size_of_datatype); + if (retval!=CTA_OK) return retval; + val=CTA_Malloc(size_of_datatype); + + /* Create string instance for holding copied values + when vector contains strings */ + if (datatype==CTA_STRING){ + retval=CTA_String_Create(val); + if (retval!=CTA_OK) return retval; + }; + + /* Loop over all elements that need to be copied */ + nsel=MAX(nsel1,nsel2); + for (isel=1;isel<=nsel;isel++){ + if (nsel1==0) {isel1=isel;} else {isel1=sel1[isel-1];} + if (nsel2==0) {isel2=isel;} else {isel2=sel2[isel-1];} + + retval=CTA_Vector_GetVal(hfrom,isel1,val,datatype); + if (retval!=CTA_OK) {return retval;} + retval=CTA_Vector_SetVal(hto,isel2,val,datatype); + if (retval!=CTA_OK) {return retval;} + } + + /* Free string instance (when strings have been copied) */ + if (datatype==CTA_STRING){ + retval=CTA_String_Free(val); + if (retval!=CTA_OK) return retval; + }; + free(val); + + break; + case CTA_TREEVECTOR: + return CTA_NOT_YET_SUPPORTED; + break; + default: + return CTA_INPUT_OBJECT_NOT_SUPPORTED; + break; + } + return CTA_OK; +} + +void CTAI_RelTable_SetWorkingCopy(int nsel1, int *sel1, int nsel2, int *sel2, + int *nelm, int **from, int **to){ + + int i; + + + *nelm=MAX(nsel1,nsel2); + *from=CTA_Malloc((*nelm)*sizeof(int)); + *to =CTA_Malloc((*nelm)*sizeof(int)); + + /* fill the from table */ + if (nsel1==0) { + for (i=0;i<*nelm;i++){(*from)[i]=i+1;} + } else { + for (i=0;i<*nelm;i++){(*from)[i]=sel1[i];} + } + + /* fill the to table */ + if (nsel2==0) { + for (i=0;i<*nelm;i++){(*to)[i]=i+1;} + } else { + for (i=0;i<*nelm;i++){(*to)[i]=sel2[i];} + } + + /* now we have a table; shall we sort the first or second column? */ + + + +} + + + + + +#undef METHOD +#define METHOD "Create" +int CTA_RelTable_Create(CTA_RelTable *hreltable){ + + CTAI_RelTable *reltable; + int retval; + + /* allocate memory for new time object */ + reltable=CTA_Malloc(sizeof(CTAI_RelTable)); + + /* initialise administration */ + reltable->nsel1=0; + reltable->nsel2=0; + reltable->sel1=NULL; + reltable->sel2=NULL; + + /* Allocate new handle and return error when unsuccesfull */ + retval=CTA_Handle_Create("reltable",CTA_RELTABLE,reltable,hreltable); + if (retval) { + CTA_WRITE_ERROR("Cannot create handle"); + return retval; + } + + return CTA_OK; +} + +#undef METHOD +#define METHOD "Create" +int CTA_RelTable_Free(CTA_RelTable *hreltable){ + + CTAI_RelTable *reltable; + int retval; + + /* Get data object of relation table */ + retval=CTAI_RelTable_GetData(*hreltable, &reltable); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + /* free selection arrays */ + if (reltable->sel1){free(reltable->sel1);} + if (reltable->sel2){free(reltable->sel2);} + + /* free data block and handle */ + free(reltable); + retval=CTA_Handle_Free(hreltable); + return retval; +} + +#undef METHOD +#define METHOD "ApplyVal" +int CTA_RelTable_ApplyVal(CTA_RelTable hreltable, + void *vfrom, int nfrom, + void *vto, int nto, CTA_Datatype datatype){ + int retval; + CTA_Vector hfrom, hto; + + /* create and fill the 'from' vector */ + retval=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nfrom, datatype, CTA_NULL, &hfrom); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot create vector"); + return retval; + } + + retval=CTA_Vector_SetVals(hfrom, vfrom, nfrom, datatype); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot set values"); + return retval; + } + + /* create the 'to' vector */ + retval=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nto, datatype, CTA_NULL, &hto); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot create vector"); + return retval; + } + + retval = CTA_RelTable_Apply(hreltable, hfrom, hto); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot apply RelTabkle"); + return retval; + } + + /* fill the 'vto' array and clean up */ + retval=CTA_Vector_GetVals(hto, vto, nto, datatype); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get values"); + return retval; + } + + retval=CTA_Vector_Free(&hfrom); + retval=CTA_Vector_Free(&hto); + return retval; + +} + +int CTA_RelTable_ApplyInvVal(CTA_RelTable hreltable, + void *vfrom, int nfrom, + void *vto, int nto, CTA_Datatype datatype){ + + return CTA_RelTable_ApplyVal(hreltable, vto,nto, vfrom,nfrom, datatype); + +} + +int CTA_RelTable_Apply(CTA_RelTable hreltable, + CTA_Handle hfrom, CTA_Handle hto){ + + return CTAI_RelTable_Apply(hreltable, hfrom, hto, FALSE); + +} + +int CTA_RelTable_ApplyInv(CTA_RelTable hreltable, + CTA_Handle hfrom, CTA_Handle hto){ + + return CTAI_RelTable_Apply(hreltable, hfrom, hto, TRUE); +} + + +#undef METHOD +#define METHOD "SetSelectVal" +int CTA_RelTable_SetSelectVal(CTA_RelTable hreltable, void *val, int nval, CTA_Datatype datatype){ + CTA_Vector vwork; + int retval; + + retval=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nval, datatype, CTA_NULL, &vwork); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot create verctor handle"); + return retval; + } + retval=CTA_Vector_SetVals(vwork, val, nval, datatype); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot set values in vector"); + return retval; + } + retval=CTA_RelTable_SetSelect(hreltable,vwork); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in CTA_RelTable_SetSelect"); + return retval; + } + retval=CTA_Vector_Free(&vwork); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot free vector"); + return retval; + } + + return CTA_OK; +} + + + +#undef METHOD +#define METHOD "SetSelect" +int CTA_RelTable_SetSelect(CTA_RelTable hreltable, CTA_Vector vselect){ + int retval; + int i, n; + CTAI_RelTable *reltable; + + /* Get data object of relation table */ + retval=CTAI_RelTable_GetData(hreltable, &reltable); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + /* free memory if relation table is used in past */ + if (reltable->sel1){free(reltable->sel1);} + if (reltable->sel2){free(reltable->sel2);} + reltable->nsel2=0; + + /* Set cardinality */ + retval=CTA_Vector_GetSize(vselect, &n); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get size of vector"); + return retval; + } + reltable->nsel1=n; + + /* copy values of vselect into local administration */ + reltable->sel1=CTA_Malloc(n*sizeof(int)); + retval=CTA_Vector_GetVals(vselect, reltable->sel1, n, CTA_INTEGER); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get values from vector"); + return retval; + } + + if (IDEBUG) { + printf("CTA_RelTable_SetSelect: set a new relation table\n"); + printf("reltable->nsel1=%d\n",reltable->nsel1); + printf("reltable->nsel2=%d\n",reltable->nsel2); + printf("reltable->sel1="); + for (i=0;insel1;i++){ + printf("%d ",reltable->sel1[i]); + } + printf("\n"); + printf("reltable->sel2="); + for (i=0;insel2;i++){ + printf("%d ",reltable->sel1[i]); + } + printf("\n"); + } + + return CTA_OK; +} + + +#undef METHOD +#define METHOD "SetTableCombine" +int CTA_RelTable_SetTableCombine(CTA_RelTable hreltable, + CTA_RelTable hrel1, int inverse1, + CTA_RelTable hrel2, int inverse2 ){ + +int retval, i,j; +BOOL found; +int nsel1_rel1, nsel2_rel1, nsel1_rel2, nsel2_rel2; +int *sel1_rel1, *sel2_rel1, *sel1_rel2, *sel2_rel2; +int nelm1, nelm2, *from1, *to1, *from2, *to2; +int *from; + +CTAI_RelTable *reltable, *reltable1, *reltable2; + + /* Get data object of relation tables */ + retval=CTAI_RelTable_GetData(hreltable, &reltable); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + retval=CTAI_RelTable_GetData(hrel1, &reltable1); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + retval=CTAI_RelTable_GetData(hrel2, &reltable2); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + +/* first check for empty tables */ + if (MAX(reltable1->nsel1,reltable1->nsel2) == 0) { + reltable = reltable1; return CTA_OK; //mag deze toekenning? + } + if (MAX(reltable2->nsel1,reltable2->nsel2) == 0) { + reltable = reltable2; return CTA_OK; //mag deze toekenning? + } + + + + /* Set local variables and pointers in order to handle the inverse + * options + */ + CTAI_GetRelTableInfo(reltable1, inverse1, + &nsel1_rel1, &sel1_rel1, + &nsel2_rel1, &sel2_rel1); + + CTAI_GetRelTableInfo(reltable2, inverse2, + &nsel1_rel2, &sel1_rel2, + &nsel2_rel2, &sel2_rel2); + + + /* Create a local copy of administration of relation tables */ + CTAI_RelTable_SetWorkingCopy(nsel1_rel1, sel1_rel1, + nsel2_rel1, sel2_rel1, + &nelm1, &from1, &to1); + + CTAI_RelTable_SetWorkingCopy(nsel1_rel2, sel1_rel2, + nsel2_rel2, sel2_rel2, + &nelm2, &from2, &to2); + + if (IDEBUG > 0) {printf("reltable_combine1 %d %d %d %d \n",nsel1_rel1,nsel2_rel1,nsel1_rel2,nsel2_rel2);} + // printf("from2 01: %d %d \n",from2[0], from2[1]); + + /* permutate towards target of first relation table */ + CTA_Util_IQSort2(to1, from1, nelm1); + + if (IDEBUG > 0) {printf("reltable_combine2-1 %d %d\n",nelm1,from2[0]);} + + /* permutate towards origin of second relation table */ + CTA_Util_IQSort2(to2, from2, nelm2); + + if (IDEBUG > 0) {printf("reltable_combine2-2; nelm1,2:%d %d \n", nelm1, from2[0]);} + + /* find all target elements of first relation table that corresponds + that are an origin element of second relation table. */ + from=CTA_Malloc(sizeof(int)*nelm2); + j=0; + for (i=0;i 0) {printf("reltable_combine4 %d %d\n",nelm1,nelm2);} + + + /* free memory if relation table is used in past */ + if (reltable->sel1){free(reltable->sel1);} + if (reltable->sel2){free(reltable->sel2);} + reltable->nsel2=0; + reltable->sel1=CTA_Malloc(nelm2*sizeof(int)); + reltable->sel2=CTA_Malloc(nelm2*sizeof(int)); + reltable->nsel1=nelm2; + reltable->nsel2=nelm2; + + if (IDEBUG > 0) {printf("reltable_combine6\n");} + + /* Copy the relation table */ + for (i=0;isel1[i]=from[i]; + reltable->sel2[i]=to2[i]; + } + + + /* free all work variables */ + free(to1); free(from1); free(to2); free(from2); free(from); + + return CTA_OK; + +} + + +#undef METHOD +#define METHOD "Count" +int CTA_RelTable_Count(CTA_RelTable hreltable, int *nelt){ + + CTAI_RelTable *reltable; + int retval; + + /* Get data object of relation table */ + retval=CTAI_RelTable_GetData(hreltable, &reltable); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + *nelt=MAX(reltable->nsel1,reltable->nsel2); + + return CTA_OK; +} + + + + + +/* Interfacing with Fortran */ + +CTAEXPORT void CTA_RELTABLE_COUNT_F77(int *hreltable, int *nelt, int *ierr){ + *ierr=CTA_RelTable_Count((CTA_RelTable) *hreltable, nelt); +} + +CTAEXPORT void CTA_RELTABLE_CREATE_F77(int *hreltable, int *ierr){ + *ierr=CTA_RelTable_Create((CTA_RelTable*) hreltable); +} + +CTAEXPORT void CTA_RELTABLE_FREE_F77(int *hreltable, int *ierr){ + *ierr=CTA_RelTable_Free((CTA_RelTable*) hreltable); +} + +CTAEXPORT void CTA_RELTABLE_APPLY_F77(int *hreltable, int *hfrom, int *hto, int *ierr){ + *ierr=CTA_RelTable_Apply((CTA_RelTable) *hreltable, + (CTA_Handle) *hfrom, (CTA_Handle) *hto); +} + +CTAEXPORT void CTA_RELTABLE_APPLY_VAL_F77(int *hreltable, void *hfrom, int *nfrom, + void *hto, int *nto, int *datatype, int *ierr){ + *ierr=CTA_RelTable_ApplyVal((CTA_RelTable) *hreltable, + hfrom, *nfrom, hto, *nto, (CTA_Datatype) *datatype); +} + + +CTAEXPORT void CTA_RELTABLE_APPLYINV_F77(int *hreltable, int *hfrom, int *hto, + int *ierr){ + *ierr=CTA_RelTable_ApplyInv((CTA_RelTable) *hreltable, + (CTA_Handle) *hfrom, (CTA_Handle) *hto); +} + +CTAEXPORT void CTA_RELTABLE_APPLYINV_VAL_F77(int *hreltable, void *hfrom, int *nfrom, + void *hto, int *nto, int *datatype, int *ierr){ + *ierr=CTA_RelTable_ApplyInvVal((CTA_RelTable) *hreltable, + hfrom, *nfrom, hto, *nto, (CTA_Datatype) *datatype); + +} + + +CTAEXPORT void CTA_RELTABLE_SETSELECT_F77(int *hreltable, int *selorig, int *ierr){ + *ierr=CTA_RelTable_SetSelect((CTA_RelTable) *hreltable, + (CTA_Vector) *selorig); +} + +CTAEXPORT void CTA_RELTABLE_SETSELECT_VAL_F77(int *hreltable, void *val, int *nval, int*datatype, int *ierr){ + *ierr=CTA_RelTable_SetSelectVal((CTA_RelTable) *hreltable, val, *nval, + (CTA_Datatype) *datatype); +} + + +CTAEXPORT void CTA_SETTABLECOMBINE_F77(int *hreltable, + int *hrel1, int *inverse1, + int *hrel2, int *inverse2, + int *ierr ){ + *ierr=CTA_RelTable_SetTableCombine((CTA_RelTable) *hreltable, + (CTA_RelTable) *hrel1, *inverse1, + (CTA_RelTable) *hrel2, *inverse2); +} + diff --git a/costa/native/cta/src/cta_resultwriter.c b/costa/native/cta/src/cta_resultwriter.c new file mode 100644 index 000000000..bd334f69f --- /dev/null +++ b/costa/native/cta/src/cta_resultwriter.c @@ -0,0 +1,251 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/openda_1/public/trunk/core/native/src/cta/cta_time.c $ +$Revision: 2751 $, $Date: 2011-09-09 08:58:46 +0200 (Fri, 09 Sep 2011) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2013 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include +#include +#include "cta_resultwriter.h" + +#include "cta_mem.h" +#include "cta_tree.h" +#include "cta_string.h" +#include "cta_xml.h" +#include "f_cta_utils.h" +#include "cta_errors.h" +#include "cta_message.h" + +/** + * OpenDA support the concept of resultwriters. The Data assimilation algorithm + * will presents intermediate results and information on the computations to the + * result writer. Each implementation of a resultwriter can then handle this data + * in its own way. + * + * See header file cta_resultwriter.h for more information + * + * @author nils van velzen + * + */ +#define CLASSNAME "CTA_ResultWriter" +#ifdef WIN32 +#define FILESEP "\\" +#else +#define FILESEP "/" +#endif + + +/* Struct holding all different data associated to an native array */ +#define MaxNativeResultWriters (20) + +static int not_initialized=1; + +typedef struct { + int not_initialized; + int h_config; + CTA_Func h_putmessage; + CTA_Func h_putvalue; + CTA_Func h_putiterationreport; + CTA_Func h_free; +} CTAI_ResultWriterAdmin; + +static CTAI_ResultWriterAdmin resultWriterAdmin[MaxNativeResultWriters]; + + + +#undef METHOD +#define METHOD "CTAI_Resultwriter_handleID" +int CTAI_Resultwriter_handleID(int iDWriter, char *config, char *workingDir){ + char msg[256]; + char filename[265]; + int iID, retval; + CTA_String hfname; + CTA_Tree htree=CTA_NULL; + + + //Check Input + if (iDWriter<0){ + sprintf(msg,"The Id of the resultwriter is negative (%d). This is a programming error in OpenDA",iDWriter); + CTA_WRITE_ERROR(msg); + return 0; + } + if (iDWriter>=MaxNativeResultWriters){ + sprintf(msg,"The ID of the resultwriter is %d >= MaxNativeResultWriters (=%d). This might be caused by a programming error in OpenDA or you are trying to use too many resultwriters.",iDWriter,MaxNativeResultWriters); + CTA_WRITE_ERROR(msg); + return 0; + } + + //Initialize the local administration when needed + if (not_initialized){ + for (iID=0;iID +#include +#include "cta_mem.h" +#include "f_cta_utils.h" +#include "cta_sobs.h" +#include "cta_errors.h" +#include "cta_handles.h" +#include "ctai.h" +#include "ctai_handles.h" +#include "cta_message.h" + +#define CTA_SOBS_CREATE_F77 F77_CALL(cta_sobs_create,CTA_SOBS_CREATE) +#define CTA_SOBS_CREATESEL_F77 F77_CALL(cta_sobs_createsel,CTA_SOBS_CREATESEL) +#define CTA_SOBS_CREATETIMSEL_F77 F77_CALL(cta_sobs_createtimsel,CTA_SOBS_CREATETIMSEL) +#define CTA_SOBS_COUNT_F77 F77_CALL(cta_sobs_count,CTA_SOBS_COUNT) +#define CTA_SOBS_GETVAL_F77 F77_CALL(cta_sobs_getval,CTA_SOBS_GETVAL) +#define CTA_SOBS_GETTIMES_F77 F77_CALL(cta_sobs_gettimes,CTA_SOBS_GETTIMES) +#define CTA_SOBS_GETREALISATION_F77 F77_CALL(cta_sobs_getrealisation,CTA_SOBS_GETREALISATION) +#define CTA_SOBS_GETEXPECTATION_F77 F77_CALL(cta_sobs_getexpectation,CTA_SOBS_GETEXPECTATION) +#define CTA_SOBS_EVALPDF_F77 F77_CALL(cta_sobs_evalpdf,CTA_SOBS_EVALPDF) +#define CTA_SOBS_GETCOVMAT_F77 F77_CALL(cta_sobs_getcovmat,CTA_SOBS_GETCOVMAT) +#define CTA_SOBS_GETVAR_F77 F77_CALL(cta_sobs_getvar,CTA_SOBS_GETVAR) +#define CTA_SOBS_GETSTD_F77 F77_CALL(cta_sobs_getstd,CTA_SOBS_GETSTD) +#define CTA_SOBS_GETDESCRIPTION_F77 F77_CALL(cta_sobs_getdescription,CTA_SOBS_GETDESCRIPTION) +#define CTA_SOBS_EXPORT_F77 F77_CALL(cta_sobs_export,CTA_SOBS_EXPORT) +#define CTA_SOBS_FREE_F77 F77_CALL(cta_sobs_free,CTA_SOBS_FREE) + +#define IDEBUG (0) + +#define CLASSNAME "CTA_Sobs" +/* Struct holding all data associated to an COSTA Vector */ +typedef struct { +CTA_Func functions[CTA_SOBS_NUMFUNC]; +CTA_ObsDescrClass descrcl; +} CTAI_SObsClass; // A SObsClass contains a list of the member-functions + + +typedef struct { +CTA_Func functions[CTA_SOBS_NUMFUNC]; // See cta_sobs.h for a list of + // available stochobs-functions +CTA_SObsClass hsobscl; // StochObs-class +void *data; // pointer to the implementation-specific data. +CTA_ObsDescr hdescr; // The observation-descriptor of this observer +CTA_Handle *userdata; // Copy of the userdata handles +} CTAI_SObs; + + + + + +int CTAI_SObs_member_function( + // INPUTS + CTA_StochObs hsobs, /* Handle of the stochastic observer of + which a member function is wanted */ + int member, /* Code of the member function */ + // OUTPUT + CTAI_SObs **sobs, /* All data of observer hsobs */ + CTA_Function **memfun /* Member-Function pointer */ +) +{ + + int retval; + /* Check that the given handle is indeed an observer */ + retval=CTA_Handle_Check((CTA_Handle) hsobs, CTA_SOBS); + if (retval!=CTA_OK) return retval; + + /* Get pointer to struct with observer data */ + retval=CTA_Handle_GetData((CTA_Handle) hsobs,(void**) sobs); + if (retval!=CTA_OK) return retval; + + /* Get pointer to implementation of this function */ + retval=CTA_Func_GetFunc((*sobs)->functions[member],memfun); + return retval; +} + + +#undef METHOD +#define METHOD "Create" +int CTA_SObs_Create( + // INPUTS: + CTA_SObsClass hsobscl, // stochastic observer class + CTA_Handle userdata, // e.g. the name of the database + // OUTPUTS: + CTA_StochObs *hsobs) // The new COSTA-stochastic observer + // (handle) +{ + CTAI_SObs *sobs; + int memsize; + int retval; + CTAI_SObsClass *clsdata; + CTA_Function *my_Create_Size, *my_Create_Init; + int i; + char message[256]; + + /* Get class data containing all function pointers */ + retval=CTA_Handle_Check((CTA_Handle) hsobscl,CTA_SOBSCLASS); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_sobsclass handle"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) hsobscl,(void**) &clsdata); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + /* determine size of data object (CTA_SOBS_CREATE_SIZE)*/ + if (IDEBUG>0){ printf("CTA_SOBS: get CREATE_SIZE \n"); } + retval=CTA_Func_GetFunc(clsdata->functions[CTA_SOBS_CREATE_SIZE], + &my_Create_Size); + if (IDEBUG>0){ printf("CTA_SOBS: done calling CREATE_SIZE %p\n",my_Create_Size); } + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function CTA_SOB_CREATE_SIZE"); + return retval; + } + if (IDEBUG>0){ printf("Calling my_create_size\n"); } + my_Create_Size(&memsize,&retval); + if (IDEBUG>0){ printf("Done Calling my_create_size\n"); } + if (retval) { + CTA_WRITE_ERROR("Error in my_Create_Size"); + return retval; + } + + /* allocate memory for new stochobs object */ + sobs=CTA_Malloc(sizeof(CTAI_SObs)); + sobs->data=CTA_Malloc(memsize); + sobs->userdata = NULL; + + + /* copy function pointers */ + for (i=0;i0) { printf("Member function[%d]=%d\n",i,clsdata->functions[i]); } + sobs->functions[i]=clsdata->functions[i]; + } + + /* set other general information */ + sobs->hsobscl = hsobscl; + sobs->hdescr = CTA_NULL; + + /* Initialise and fill new stochobs */ + retval=CTA_Func_GetFunc(clsdata->functions[CTA_SOBS_CREATE_INIT], + &my_Create_Init); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function CTA_SOBS_CREATE_INIT"); + return retval; + } + my_Create_Init(sobs->data, userdata, &retval); + if (retval!=CTA_OK) { + sprintf(message,"my_Create_Init returned with (COSTA) error code %d",retval); + CTA_WRITE_ERROR(message); + return retval; + } + + /* Allocate new handle and return eror when unsuccesfull */ + retval=CTA_Handle_Create("stochobs",CTA_SOBS,sobs,hsobs); + if (retval) { + CTA_WRITE_ERROR("Cannot create handle"); + return retval; + } + + return CTA_OK; +} + +#undef METHOD +#define METHOD "CreateTimSel" +int CTA_SObs_CreateTimSel( + CTA_StochObs hsobsin, /* Handle of the stochastic observer of + which a selection is to be made*/ + CTA_Time timespan, /* Timespan over which selection has te be made*/ + CTA_StochObs *hsobsout) /* The new COSTA-stochastic observer + (handle) */ +{ + int ierr; + char str[80]; + double t1,t2; + double eps; + + CTA_String sselect; + + if (IDEBUG>0) { printf("DEBUG: CTA_SObs_CreateTimSel: START \n");} + + *hsobsout=CTA_NULL; + if (hsobsin!=CTA_NULL){ + // Get interval of timespan (t1,t2) + ierr=CTA_Time_GetSpan(timespan,&t1,&t2); + if (ierr!=CTA_OK) { + char message[1024]; + sprintf(message,"Cannot retrieve interval of timespan (%g,%g)",t1,t2); + CTA_WRITE_ERROR(message); + return ierr; + } + // Create the Selection string (SQL-statement) + ierr=CTA_String_Create(&sselect); + if (ierr!=CTA_OK) { + CTA_WRITE_ERROR("Cannot create string"); + return ierr; + } + + eps=(t2-t1)*1.0e-4+1.0e-16; + t1=t1+eps; + t2=t2+eps; + sprintf(str, "time BETWEEN %f AND %f",t1, t2 ); + ierr=CTA_String_Set(sselect,str); + if (ierr!=CTA_OK) { + CTA_WRITE_ERROR("Cannot set string"); + return ierr; + } + + // Create the selcection + ierr=CTA_SObs_CreateSel(hsobsin, sselect, hsobsout); + if (ierr!=CTA_OK) { + CTA_WRITE_ERROR("Error in CreateSel"); + return ierr; + } + + // Free work variables + ierr=CTA_String_Free(&sselect); + if (ierr!=CTA_OK) { + CTA_WRITE_ERROR("Cannot free string"); + return ierr; + } + } + return CTA_OK; +}; + +#undef METHOD +#define METHOD "CreateSel" +int CTA_SObs_CreateSel( + // INPUTS: + CTA_StochObs hsobsin, /* Handle of the stochastic observer of + which a selection is to be made*/ + CTA_Handle userdata, /* Inputs necessary for making a selection */ + // OUTPUTS: + CTA_StochObs *hsobsout) /* The new COSTA-stochastic observer + (handle) */ +{ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_SObs* sobsin; /* All data of observer hsobsin */ + CTAI_SObs* sobsout; /* All data of observer hsobsout */ + CTA_Function *memfun; /* Function that must be called */ + int memsize,i; + + if (hsobsin==CTA_NULL){ + *hsobsout=CTA_NULL; + return CTA_OK; + } + /* Look up the Create-size function and the data of the input observer*/ + retval = CTAI_SObs_member_function(hsobsin, CTA_SOBS_CREATE_SIZE, + &sobsin, &memfun); + if (retval) { + CTA_WRITE_ERROR("Error while looking up the Create-size function and the data of the input observer"); + return retval; + } + + /* Determine the necessary memory size and allocate memory */ + memfun(&memsize,&retval); + if (retval) { + CTA_WRITE_ERROR("Error while determining the necessary memory size and allocate memory"); + return retval; + } + + sobsout=CTA_Malloc(sizeof(CTAI_SObs)); + sobsout->data=CTA_Malloc(memsize); + sobsout->userdata=NULL; + + + /* copy function pointers and observation class handle */ + sobsout->hsobscl=sobsin->hsobscl; + for (i=0;ifunctions[i]=sobsin->functions[i]; } + + /* Look up member function and observer data */ + retval = CTA_Func_GetFunc(sobsout->functions[CTA_SOBS_CREATE_SELECTION], + &memfun); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error while loking up member functions and observer data"); + return retval; + } + + /* Call (user) implementation */ + // printf("memfun, sobsout->functions[i], %d %d\n",&memfun,sobsout->functions[CTA_SOBS_CREATE_SELECTION]); + // printf("cta_sobs_createsel: calling member function with userdata %d \n",userdata); + memfun(sobsin->data,&userdata,sobsout->data,&retval); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error while calling (user) implementation"); + return retval; + } + + /* Allocate new handle and return error when unsuccesfull */ + retval=CTA_Handle_Create("stochobs",CTA_SOBS,sobsout,hsobsout); + if (IDEBUG>0) printf("end of cta_sobs_createsel: retval %d handle %d \n",retval,*hsobsout); + + return retval; +}; + + + +#undef METHOD +#define METHOD "Count" +int CTA_SObs_Count( + // INPUTS: + CTA_StochObs hsobs, /* Handle of the stochastic observer */ + // OUTPUTS: + int *nmeasr /* number of measurements in this observer */ + ) +{ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_SObs *sobs; /* All data of observer hsobs */ + CTA_Function *memfun; /* Function that must be called */ + + /* trivial return we accept CTA_NULL -> nmeasr=0 */ + if (hsobs==CTA_NULL){ + *nmeasr=0; + return CTA_OK; + } + /* Look up member function and sobserver data */ + retval = CTAI_SObs_member_function(hsobs, I_CTA_SOBS_COUNT, + &sobs, &memfun); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error while looking up member function and sobserver data"); + return retval; + } + + /* Call (user) implementation */ + memfun(sobs->data,nmeasr,&retval); + return retval; +}; + +#undef METHOD +#define METHOD "GetvVl" +int CTA_SObs_GetVal( + // INPUTS: + CTA_StochObs hsobs, /* Handle of the stochastic observer */ + // OUTPUTS: + CTA_Vector hvec /* measurements */ + ) +{ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_SObs *sobs; /* All data of observer hsobs */ + CTA_Function *memfun; /* Function that must be called */ + + /* Look up member function and sobserver data */ + retval = CTAI_SObs_member_function(hsobs, CTA_SOBS_GET_VALUES, + &sobs, &memfun); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error while looking up member function and sobserver data"); + return retval; + } + + /* Call (user) implementation */ + memfun(sobs->data,&hvec,&retval); + return retval; +}; + +#undef METHOD +#define METHOD "GetTimes" +int CTA_SObs_GetTimes( + // INPUTS: + CTA_StochObs hsobs, /* Handle of the stochastic observer */ + // OUTPUTS: + CTA_Vector hvec /* time instances associated to measurements */ + ) +{ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_SObs *sobs; /* All data of observer hsobs */ + CTA_Function *memfun; /* Function that must be called */ + + /* Look up member function and sobserver data */ + retval = CTAI_SObs_member_function(hsobs, CTA_SOBS_GET_TIMES, + &sobs, &memfun); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error while looking up member function and sobserver data"); + return retval; + } + + /* Call (user) implementation */ + memfun(sobs->data,&hvec,&retval); + return retval; +}; + + +#undef METHOD +#define METHOD "GetRealisation" +int CTA_SObs_GetRealisation( + // INPUTS: + CTA_StochObs hsobs, /* Handle of the stochastic observer */ + // OUTPUTS: + CTA_Vector hvec /* measurements */ + ) +{ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_SObs *sobs; /* All data of observer hsobs */ + CTA_Function *memfun; /* Function that must be called */ + + /* Look up member function and sobserver data */ + retval = CTAI_SObs_member_function(hsobs, CTA_SOBS_GET_REALISATION, + &sobs, &memfun); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error while looking up member function and sobserver data"); + return retval; + } + + /* Call (user) implementation */ + memfun(sobs->data,&hvec,&retval); + return retval; +}; + + + + +#undef METHOD +#define METHOD "GetExpectation" +int CTA_SObs_GetExpectation( + // INPUTS: + CTA_StochObs hsobs, /* Handle of the stochastic observer */ + // OUTPUTS: + CTA_Vector hvec /* measurements */ + ) +{ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_SObs *sobs; /* All data of observer hsobs */ + CTA_Function *memfun; /* Function that must be called */ + + /* Look up member function and sobserver data */ + retval = CTAI_SObs_member_function( hsobs, CTA_SOBS_GET_EXPECTATION, + &sobs, &memfun); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error while looking up member function and sobserver data"); + return retval; + } + + /* Call (user) implementation */ + memfun(sobs->data,&hvec,&retval); + return retval; +}; + + + +#undef METHOD +#define METHOD "EvalPDF" +int CTA_SObs_EvalPDF( +/* Ik vind dit nogal een vage functie. Ik weet niet wat een mens + hiermee moet gaan doen. In sommige gevallen kan ook geen eerlijk + antwoord worden gegeven (Dirac-achtige dingen en zo) */ + + // INPUTS: + CTA_StochObs hsobs, /* Handle of the stochastic observer */ + CTA_Vector hvecx, /* location for evaluating PDF */ + // OUTPUTS: + CTA_Vector hvecy /* PDF-value */ + ) +{ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_SObs *sobs; /* All data of observer hsobs */ + CTA_Function *memfun; /* Function that must be called */ + + /* Look up member function and sobserver data */ + retval = CTAI_SObs_member_function(hsobs, CTA_SOBS_EVALUATE_PDF, + &sobs, &memfun); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error while looking up member function and sobserver data"); + return retval; + } + + /* Call (user) implementation */ + memfun(sobs->data,&hvecx,&hvecy,&retval); + return retval; +}; + + + + +#undef METHOD +#define METHOD "GetCovMat" +int CTA_SObs_GetCovMat( + // INPUTS: + CTA_StochObs hsobs, /* Handle of the stochastic observer */ + // OUTPUTS: + CTA_Matrix hmat /* Covariance matrix */ + ) +{ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_SObs *sobs; /* All data of observer hsobs */ + CTA_Function *memfun; /* Function that must be called */ + + /* Look up member function and sobserver data */ + retval = CTAI_SObs_member_function( hsobs, CTA_SOBS_GET_COV_MATRIX, + &sobs, &memfun); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error while looking up member function and sobserver data"); + return retval; + } + + /* Call (user) implementation */ + memfun(sobs->data,&hmat,&retval); + return retval; +}; + + + + +#undef METHOD +#define METHOD "GetVar" +int CTA_SObs_GetVar( + // INPUTS: + CTA_StochObs hsobs, /* Handle of the stochastic observer */ + // OUTPUTS: + CTA_Vector hvec /* Variances of measurements */ + ) +{ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_SObs *sobs; /* All data of observer hsobs */ + CTA_Function *memfun; /* Function that must be called */ + int varflag; + + /* Look up member function and sobserver data */ + retval = CTAI_SObs_member_function( hsobs, CTA_SOBS_GET_VARIANCE, + &sobs, &memfun); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error while looking up member function and sobserver data"); + return retval; + } + + /* Call (user) implementation */ + varflag=CTA_TRUE; + memfun(sobs->data,&varflag,&hvec,&retval); + return retval; +}; + +#undef METHOD +#define METHOD "GetStd" +int CTA_SObs_GetStd( + // INPUTS: + CTA_StochObs hsobs, /* Handle of the stochastic observer */ + // OUTPUTS: + CTA_Vector hvec /* Variances of measurements */ + ) +{ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_SObs *sobs; /* All data of observer hsobs */ + CTA_Function *memfun; /* Function that must be called */ + int varflag; + + /* Look up member function and sobserver data */ + retval = CTAI_SObs_member_function( hsobs, CTA_SOBS_GET_VARIANCE, + &sobs, &memfun); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error while looking up member function and sobserver data"); + return retval; + } + + /* Call (user) implementation */ + varflag=CTA_FALSE; + memfun(sobs->data, &varflag, &hvec, &retval); + return retval; +}; + + + + +#undef METHOD +#define METHOD "GetDescription" +int CTA_SObs_GetDescription( + CTA_StochObs hsobs, /* Handle of the stochastic observer */ + CTA_ObsDescr *hobsdescr /* descriptor of the stochastic observer*/ + ) +{ + int retval; + int nmeasr; /* number of mesurements in hsobs */ + CTAI_SObs *sobs; /* All data of observer hsobs */ + CTAI_SObsClass *clsdata; + CTA_Handle hsobs_data; + + /* Check that the given handle is indeed an observer */ + retval=CTA_Handle_Check((CTA_Handle) hsobs, CTA_SOBS); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_sobs handle"); + return retval; + } + /* Get pointer to struct with observer data */ + retval=CTA_Handle_GetData((CTA_Handle) hsobs,(void**) &sobs); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + /* If stochastic observer does not contain any observations + Return CTA_NULL */ + retval=CTA_SObs_Count(hsobs, &nmeasr); + if (nmeasr==0) { + if (IDEBUG > 0) printf("cta_stoch_observer:CTA_SObs_GetDescription: Warning: you asked observations description of empty stochastic observer %d !\n",hsobs); + *hobsdescr=CTA_NULL; + return CTA_OK; + } + retval=CTA_Handle_GetData((CTA_Handle) sobs->hsobscl, + (void**) &clsdata); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + /* Create a handle for holding pointer to data-object "sobs" */ + retval=CTA_Handle_Create("", CTA_DATABLOCK, sobs->data, &hsobs_data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot create handle"); + return retval; + } + + retval = CTA_ObsDescr_Create(clsdata->descrcl, hsobs_data, hobsdescr); + if (retval!=CTA_OK){ + CTA_WRITE_ERROR("Cannot create observation description"); + } + + /* Free handle (note data is not deallocated!) */ + retval=CTA_Handle_Free(&hsobs_data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot free handle"); + return retval; + } + +return CTA_OK; +} + +#undef METHOD +#define METHOD "Export" +int CTA_SObs_Export( + CTA_StochObs hsobs, /* Handle of the stochastic observer */ + CTA_Handle userdata + ) +{ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_SObs *sobs; /* All data of observer hsobs */ + CTA_Function *memfun; /* Function that must be called */ + + /* Look up member function and sobserver data */ + retval = CTAI_SObs_member_function( hsobs, I_CTA_SOBS_EXPORT, + &sobs, &memfun); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error while looking up member function and sobserver data"); + return retval; + } + + /* Call (user) implementation */ + memfun(sobs->data,&userdata,&retval); + return retval; +}; + + + + +#undef METHOD +#define METHOD "Free" +int CTA_SObs_Free( + CTA_StochObs *hsobs /* Handle of stochastic observer */ + ) +{ + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_SObs *sobs; /* All data of stochastic observer */ + CTA_Function *my_free; /* Function that must be called */ + CTA_Handle *userdata; /* User data of function call */ + + + /* Check for quick return */ + if (*hsobs==CTA_NULL) return CTA_OK; + + /* Look up member function and sobserver data */ + retval = CTAI_SObs_member_function(*hsobs, I_CTA_SOBS_FREE, + &sobs, &my_free); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error while looking up member function and sobserver data"); + return retval; + } + + /* Call (user) implementation */ + my_free(sobs->data,&userdata,&retval); + free(sobs->data); + if (sobs->userdata) free(sobs->userdata); + free(sobs); + retval=CTA_Handle_Free(hsobs); + + return retval; +}; + + +CTA_Handle CTAI_SObs_GetUserData(CTA_StochObs hsobs, int index) { +// CTAI_SObs *sobs; /* All data of stochastic observer */ + +// sobs = (CTAI_SObs *)CTAI_Handle_GetData(hsobs); + return CTA_NULL; +} + +/* Interfacing with Fortran */ + +CTAEXPORT void CTA_SOBS_CREATE_F77(int *hstochobscl, int *userdata, + int *hstochobs, int *ierr){ + + *ierr=CTA_SObs_Create((CTA_SObsClass) *hstochobscl, + (CTA_Handle) *userdata, (CTA_StochObs *) hstochobs); +} + + + +CTAEXPORT void CTA_SOBS_CREATESEL_F77(int *hsobsin, int *userdata, + int *hsobsout, int *ierr){ + + *ierr=CTA_SObs_CreateSel((CTA_StochObs) *hsobsin, + (CTA_Handle) *userdata, + (CTA_StochObs*) hsobsout); +} + +CTAEXPORT void CTA_SOBS_CREATETIMSEL_F77(int *hsobsin, int *timespan, + int *hsobsout, int *ierr){ + + *ierr=CTA_SObs_CreateTimSel((CTA_StochObs) *hsobsin, + (CTA_Time) *timespan, + (CTA_StochObs*) hsobsout); +} + + + +CTAEXPORT void CTA_SOBS_COUNT_F77(int *hsobs, int *nmeasr, int *ierr){ + + *ierr=CTA_SObs_Count((CTA_StochObs) *hsobs, nmeasr); +} + + +CTAEXPORT void CTA_SOBS_GETVAL_F77(int *hsobs, int *hvec, int *ierr){ + *ierr=CTA_SObs_GetVal((CTA_StochObs) *hsobs, (CTA_Vector) *hvec); +} + +CTAEXPORT void CTA_SOBS_GETTIMES_F77(int *hsobs, int *hvec, int *ierr){ + *ierr=CTA_SObs_GetTimes((CTA_StochObs) *hsobs, (CTA_Vector) *hvec); +} + +CTAEXPORT void CTA_SOBS_GETREALISATION_F77(int *hsobs, int *hvec, int *ierr){ + *ierr=CTA_SObs_GetRealisation((CTA_StochObs) *hsobs, + (CTA_Vector) *hvec); +} + + +CTAEXPORT void CTA_SOBS_GETEXPECTATION_F77(int *hsobs, int *hvec, int *ierr){ + *ierr=CTA_SObs_GetExpectation((CTA_StochObs) *hsobs, + (CTA_Vector) *hvec); +} + +CTAEXPORT void CTA_SOBS_EVALPDF_F77(int *hsobs, int *hvecx, int *hvecy, int *ierr){ + + *ierr=CTA_SObs_EvalPDF((CTA_StochObs) *hsobs, (CTA_Vector) *hvecx, + (CTA_Vector) *hvecy); +} + +CTAEXPORT void CTA_SOBS_GETCOVMAT_F77(int *hsobs, int *hmat, int *ierr){ + *ierr=CTA_SObs_GetCovMat((CTA_StochObs) *hsobs, (CTA_Matrix) *hmat); +} + + +CTAEXPORT void CTA_SOBS_GETVAR_F77(int *hsobs, int *hvec, int *ierr){ + *ierr=CTA_SObs_GetVar((CTA_StochObs) *hsobs, (CTA_Matrix) *hvec); +} + +CTAEXPORT void CTA_SOBS_GETSTD_F77(int *hsobs, int *hvec, int *ierr){ + *ierr=CTA_SObs_GetStd((CTA_StochObs) *hsobs, (CTA_Matrix) *hvec); +} + +CTAEXPORT void CTA_SOBS_GETDESCRIPTION_F77(int *hsobs, int *hobsdescr, int *ierr){ + *ierr=CTA_SObs_GetDescription((CTA_StochObs) *hsobs, + (CTA_ObsDescr*) hobsdescr); +} + + +CTAEXPORT void CTA_SOBS_EXPORT_F77(int *hsobs, int *userdata, int *ierr){ + *ierr=CTA_SObs_Export((CTA_StochObs) *hsobs, (CTA_Handle) *userdata); +} + +CTAEXPORT void CTA_SOBS_FREE_F77( int *hsobs, int *ierr ){ + *ierr=CTA_SObs_Free( (CTA_StochObs*) hsobs); +} + + diff --git a/costa/native/cta/src/cta_sobs_combine.c b/costa/native/cta/src/cta_sobs_combine.c new file mode 100644 index 000000000..fed3d9f42 --- /dev/null +++ b/costa/native/cta/src/cta_sobs_combine.c @@ -0,0 +1,704 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/trunk/costa/src/cta/cta_stoch_observer_combine.c $ +$Revision: 671 $, $Date: 2008-10-07 14:49:42 +0200 (Tue, 07 Oct 2008) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include +#include +#include "cta_mem.h" +#include "cta.h" +#include "ctai.h" +#include "cta_flush.h" +#include "cta_util_statistics.h" +//#include "cta_util_combine.h" +#include "cta_file.h" +#include "cta_string.h" +//#include "cta_obsdescr_combine.h" +//#include "cta_sobs_combine.h" +#include "cta_sobs.h" +#include "cta_errors.h" +#include "cta_reltable.h" + +#include "cta_sobs_combine.h" + +#define IDEBUG (0) + + + +void CTAI_SObs_combine_Create_Size( + // OUTPUTS: + int *memsize, // The number of bytes which are necessary to store one + // CTAI_SObs_combine, with a pointer to the contents (data), + // but without the contents themselves. + int *retval // error code (see cta_datatypes.h for possible error codes) + ){ + *memsize=(int) sizeof(CTAI_SObs_combine); + *retval=CTA_OK; +}; + + + + + +void CTAI_SObs_combine_Create_Init( +/* + Allocate the memory which is necessary to store the data necessary for a + combined observer +*/ + // IN-OUTPUTS + CTAI_SObs_combine *x, // The combine-observer for which the memory must + // be allocated + // INPUTS: + CTA_Handle userdata, // User data: vector of size 2 containing tree with SObs and timeoffset + // OUTPUTS + int *retval) // Error code. Possible error: Illegal data type +{ + //CTA_Tree hsubsobs_tr; + CTA_Handle *hsubsobsstr; + int ierr; + int nofsubsobs,isob; + CTA_String h_ss_name; + char ss_name[100]; + CTA_Vector hvec1; + double timeoffset; + CTA_Handle userdata1, userdata2; + + ierr=CTA_Handle_Check((CTA_Handle) userdata,CTA_VECTOR); + if (ierr!=CTA_OK) { + printf("CTAI_SObs_combine_Create_Init: userdata is not a vector! \n");exit(-1); + } + ierr=CTA_Vector_GetVal(userdata,1,&userdata1,CTA_HANDLE); + ierr=CTA_Vector_GetVal(userdata,2,&userdata2,CTA_HANDLE); + + ierr=CTA_Handle_Check((CTA_Handle) userdata1,CTA_TREE); + if (ierr!=CTA_OK) { + printf("CTAI_SObs_combine_Create_Init: userdata1 is not a tree! \n"); + } + printf("CTAI_SObs_combine_Create_Init: -----------boom: \n"); + ierr = CTA_Tree_Print(userdata1); + printf("------ \n"); + + + /* determine number of sub-SObs (nofsubsobs) */ + ierr = CTA_Tree_CountItems(userdata1, &nofsubsobs ); + printf("combine-SObs: count subsobs %d %d\n",ierr, nofsubsobs); + x->nofsubsobs = nofsubsobs; + + /* Create array with input configuration of all sobs */ + hsubsobsstr = CTA_Malloc((nofsubsobs+1)*sizeof(CTA_Tree)); + + + + /* Create and store handles to all subsobs */ + for (isob=1; isob<=nofsubsobs; isob++) { + ierr = CTA_Tree_GetItem (userdata1, isob, &hsubsobsstr[isob]); + printf("get substochobs: number :%d ,retval %d \n",isob,ierr); + ierr=CTA_Handle_Check((CTA_Handle) hsubsobsstr[isob],CTA_SOBS); + printf("get substochobs TEST if it is a real sobs : retval %d \n",ierr); + + ierr = CTA_String_Create(&h_ss_name); + ierr=CTA_Handle_GetName(hsubsobsstr[isob],h_ss_name); + // printf("getname: %d \n",ierr); + ierr=CTA_String_Get(h_ss_name, ss_name); + //printf("string get: %d \n",ierr); + //printf("naam is %s \n",ss_name); + } + + /* put array in cta-vector of handles */ + ierr=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nofsubsobs, CTA_HANDLE, CTA_NULL, &hvec1); + + for (isob=1; isob<=nofsubsobs; isob++) { + ierr=CTA_Vector_SetVal(hvec1,isob,&hsubsobsstr[isob],CTA_HANDLE); + } + x->subsobs = hvec1; + + ierr=CTA_Vector_GetVal(userdata2,1,&timeoffset,CTA_DOUBLE); + if (ierr!=CTA_OK) { + printf("CTAI_SObs_combine_Create_Init: userdata2 is not a vector of doubles! \n"); + } + x->timeoffset = timeoffset; + + *retval = ierr; +}; + + + +void CTAI_SObs_combine_CreateSel( +/* + Allocate the memory which is neccesary to store the data necessary for a + combine-observer +*/ + // INPUTS + CTAI_SObs_combine *obsin,// The combine-observer of which a selection + // is to be made + CTA_Handle *userdata_in, // User data: condition + // OUTPUTS + CTAI_SObs_combine *obsout, // The combine-observer which is a selection + // of observer obsin + int *retval) // Error code +{ + CTA_Handle userdata=*userdata_in; + int nofsubsobs,isob; + CTA_Handle subsob; + CTA_StochObs hobsout; + int ierr, totcount, subcount, selected_subcount, sel2count, i; + CTA_Vector hvec1; + CTA_RelTable reltab2; + int * shadow_vals; + int * selected_shadow_vals; + int * sel2_shadow_vals; + + CTA_Datatype datatype; + + CTA_Handle_GetDatatype(userdata, &datatype); + // if (datatype==CTA_STRING){printf("CTAI_SOBS_Combine_Createsel(0): userdata STRING\n"); } + // if (datatype==CTA_RELTABLE){printf("CTAI_SOBS_Combine_Createsel(0): userdata RELTABLE\n"); } + + + /* Warning: if userdata is a relation table, then calling the subsobs with this userdata + will not work. Solution: make a, adjusted relation table + */ + + nofsubsobs = obsin->nofsubsobs; + + /* Warning: only the memory of obsout has been created already, + the nofsubsobs and vector have to be defined now. */ + obsout->nofsubsobs = nofsubsobs; + ierr=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nofsubsobs, CTA_HANDLE, CTA_NULL, &hvec1); + + // printf("CTAI_SOBS_Combine_Createsel(1): nofsubsobs: %d ierr: %d \n",nofsubsobs,ierr); + //printf("CTAI_SOBS_Combine_Createsel(2): userdata: %d \n",userdata); + + + totcount = 0; + for (isob=1; isob<=nofsubsobs; isob++) { + // printf("-------------------sobs_combine_createsel: subsob %d of %d \n",isob, nofsubsobs); + ierr=CTA_Vector_GetVal(obsin->subsobs,isob,&subsob,CTA_HANDLE); + /* check if handle is a stochobs? */ + ierr=CTA_Handle_Check((CTA_Handle) subsob,CTA_SOBS); + if (IDEBUG > 5) { + printf("sobs_combine_createsel: TEST if subsobs %d is a real (sub-)sobs : retval %d \n",isob,ierr);} + /* call the createsel function for each individual sub-sobserver. Adjust userdata in case of reltable! */ + + ierr = CTA_SObs_Count(subsob, &subcount); + + + if (datatype!=CTA_RELTABLE || subcount == 0) { + ierr = CTA_SObs_CreateSel(subsob,userdata, &hobsout); + } + else { /* relation tables has to be redefined for each subsob ! */ + // use the relation table + CTA_RelTable_Count(userdata,&selected_subcount); + + shadow_vals = CTA_Malloc(subcount * sizeof(int)); + + selected_shadow_vals = CTA_Malloc(selected_subcount* sizeof(int)); + sel2_shadow_vals = CTA_Malloc(selected_subcount* sizeof(int)); + for (i=0; i < subcount; i++) { shadow_vals[i] = i+1 ;} + + CTA_RelTable_ApplyVal(userdata, shadow_vals, subcount, + selected_shadow_vals, selected_subcount , CTA_INTEGER); + sel2count = 0; + for (i=0; i < selected_subcount; i++) { + if (selected_shadow_vals[i] - totcount > 0) { + sel2_shadow_vals[sel2count] = selected_shadow_vals[i] - totcount ; + sel2count = sel2count + 1; + } + } + CTA_RelTable_Create(&reltab2); + CTA_RelTable_SetSelectVal(reltab2, sel2_shadow_vals, sel2count, CTA_INTEGER); + + // printf("sobcombine_createsel: nieuwe reltab: van: %d naar %d; %d \n",selected_subcount, sel2count, sel2_shadow_vals[0]); + + ierr = CTA_SObs_CreateSel(subsob,reltab2, &hobsout); + free(shadow_vals); free(selected_shadow_vals); free(sel2_shadow_vals); + } + + /* put the selected subsobs in the vector subsout->subsobs */ + ierr=CTA_Vector_SetVal(hvec1,isob,&hobsout,CTA_HANDLE); + if (ierr != CTA_OK) { + printf("ctai_sobs_createsel: ERROR %d \n",ierr); + + + totcount = totcount + subcount; + } + } + /* set the cta_vector of the obsout */ + obsout->subsobs = hvec1; + + *retval = ierr; + +}; + + +void CTAI_SObs_combine_Count( +// Return the number of measurements in the observer + // INPUTS + CTAI_SObs_combine *x, // The StochObserver of which the number of measurements is + // returned + // OUTPUTS + int *nmeasr, + int *retval + ) +{ + int nofsubsobs, isob, ierr; + CTA_Handle subsob; + int nsubmeasr; + + *nmeasr = 0; + nofsubsobs = x->nofsubsobs; + + for (isob=1; isob<=nofsubsobs; isob++) { + ierr=CTA_Vector_GetVal(x->subsobs,isob,&subsob,CTA_HANDLE); + ierr = CTA_SObs_Count(subsob, &nsubmeasr); + *nmeasr = *nmeasr + nsubmeasr; + } + if (IDEBUG>10) printf("ctai_sobs_combine_count; total: %d\n",*nmeasr); + + *retval = ierr; +}; + + + + +void CTAI_SObs_combine_GetVals( +// Get all the values from a combine-StochObserver + // INPUTS + CTAI_SObs_combine *x, // StochObserver from which the measurements are + // returned + // OUTPUTS + CTA_Vector *hvec, // COSTA-vector containing the values + int *retval + ) +{ + int nsubmeasr, totmeasr,j; + int nofsubsobs, offset, isob, ierr; + CTA_Handle subsob; + CTA_Vector hsubvec; + double *val, *subval; + CTA_String *strval, *substrval; + + CTA_Datatype dt; + + nofsubsobs = x->nofsubsobs; + /* first ask the total number of values and the datatype*/ + ierr = CTA_Vector_GetSize(*hvec,&totmeasr); + ierr = CTA_Vector_GetDatatype(*hvec,&dt); + if (IDEBUG > 5) printf("ctai_sobs_combine_getvals: subsobs;ask total length %d %d \n",nofsubsobs,totmeasr); + + /* allocate the main array containing all the values */ + if(dt == CTA_STRING) {strval = CTA_Malloc(sizeof(CTA_String)*totmeasr); + printf("sobs_combine_getvals: PAS OP: string als propertydatatype!\n");} + else { val = CTA_Malloc(sizeof(double)*totmeasr);} + + offset = 0; + for (isob=1; isob<=nofsubsobs; isob++) { + ierr=CTA_Vector_GetVal(x->subsobs,isob,&subsob,CTA_HANDLE); + /* first count the the measurements in the subsobs */ + ierr = CTA_SObs_Count(subsob, &nsubmeasr); + + if (nsubmeasr > 0) { + + /* create the vector which receives the values */ + ierr=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nsubmeasr, dt, CTA_NULL, &hsubvec); + /* receive the values */ + ierr = CTA_SObs_GetVal(subsob, hsubvec); + + /* put the values in the sub-array. In case of String-output, create the strings */ + if (dt == CTA_STRING) { + substrval = CTA_Malloc(sizeof(CTA_String)*nsubmeasr); + for (j=0; jnofsubsobs; + /* first ask the total number of values and the datatype*/ + ierr = CTA_Vector_GetSize(*hvec,&totmeasr); + ierr = CTA_Vector_GetDatatype(*hvec,&dt); + if (IDEBUG > 0) printf("ctai_sobs_combine_gettimes: subsobs;ask total length %d %d \n",nofsubsobs,totmeasr); + + /* allocate the main array containing all the values */ + val = CTA_Malloc(sizeof(double)*totmeasr); + + offset = 0; + for (isob=1; isob<=nofsubsobs; isob++) { + ierr=CTA_Vector_GetVal(x->subsobs,isob,&subsob,CTA_HANDLE); + /* first count the the measurements in the subsobs */ + ierr = CTA_SObs_Count(subsob, &nsubmeasr); + /* create the vector which receives the values */ + ierr=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nsubmeasr, dt, CTA_NULL, &hsubvec); + /* receive the values */ + ierr = CTA_SObs_GetTimes(subsob, hsubvec); + + /* put the values in the sub-array */ + subval = CTA_Malloc(sizeof(double)*nsubmeasr); + ierr=CTA_Vector_GetVals(hsubvec,subval, nsubmeasr, dt); + + for (j=0; j < nsubmeasr; j++) { val[offset+j] = subval[j] ; } + offset = offset + nsubmeasr; + free(subval); + ierr = CTA_Vector_Free(&hsubvec); + } + /* fill hvec with the main array */ + ierr=CTA_Vector_SetVals(*hvec,val, totmeasr, dt); + free(val); + if (IDEBUG>0) {printf("sobs_combine_gettimes: total %d, first,last:%f %f \n",totmeasr,val[0],val[totmeasr-1]);} + + // for (j=0; j < totmeasr; j++) {printf("%d %f\n",j, val[j]); } + + *retval = ierr; + +}; + +void CTAI_SObs_combine_GetVariances( +// Get all the variances of the measurements in a combine-StochObserver + // INPUTS + CTAI_SObs_combine *x, // StochObserver from which the measurements are + // returned + int *returnvar, //return variance (CTA_TRUE) or std (CTA_FALSE) + // OUTPUTS + CTA_Vector *hvec, // COSTA-vector containing the values + int *retval + ) +{ + int nsubmeasr, offset, totmeasr; + int nofsubsobs, isob, j,ierr; + CTA_Handle subsob; + CTA_Vector hsubvec; + double *val, *subval; + CTA_Datatype dt; + + + nofsubsobs = x->nofsubsobs; + /* first ask the total number of values and the datatype*/ + ierr = CTA_Vector_GetSize(*hvec,&totmeasr); + ierr = CTA_Vector_GetDatatype(*hvec,&dt); + /* allocate the main array containing all the values */ + val = CTA_Malloc(sizeof(double)*totmeasr); + offset = 0; + + for (isob=1; isob<=nofsubsobs; isob++) { + ierr=CTA_Vector_GetVal(x->subsobs,isob,&subsob,CTA_HANDLE); + /* first count the the measurements in the subsobs */ + ierr = CTA_SObs_Count(subsob, &nsubmeasr); + /* create the vector which receives the values */ + ierr=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nsubmeasr, dt, CTA_NULL, &hsubvec); + /* receive the values */ + if (*returnvar == CTA_TRUE) { + ierr = CTA_SObs_GetVar(subsob,hsubvec); + } + else { + ierr = CTA_SObs_GetStd(subsob,hsubvec); + } + /* put the values in the sub-array */ + subval = CTA_Malloc(sizeof(double)*nsubmeasr); + ierr=CTA_Vector_GetVals(hsubvec,subval, nsubmeasr, dt); + /* fill the main vector */ + for (j=0; j < nsubmeasr; j++) { val[offset+j] = subval[j]; } + offset = offset + nsubmeasr; + free(subval); + ierr = CTA_Vector_Free(&hsubvec); + } + /* fill hvec with the main array */ + ierr=CTA_Vector_SetVals(*hvec,val, totmeasr, dt); + free(val); + + *retval = ierr; + +}; + + +void CTAI_SObs_combine_GetRealisation( +// Calculate stochastic realizations for all the measurements in a combine-StochObserver + // INPUTS + CTAI_SObs_combine *x, // StochObserver from which the measurements are + // returned + // OUTPUTS + CTA_Vector *hvec, // COSTA-vector containing the realizations + int *retval + ) +{ + int nsubmeasr, offset, totmeasr; + int nofsubsobs, isob, j,ierr; + CTA_Handle subsob; + CTA_Vector hsubvec; + double *val, *subval; + CTA_Datatype dt; + + + + nofsubsobs = x->nofsubsobs; + /* first ask the total number of values and the datatype*/ + ierr = CTA_Vector_GetSize(*hvec,&totmeasr); + ierr = CTA_Vector_GetDatatype(*hvec,&dt); + + /* allocate the main array containing all the values */ + val = CTA_Malloc(sizeof(double)*totmeasr); + + offset = 0; + for (isob=1; isob<=nofsubsobs; isob++) { + ierr=CTA_Vector_GetVal(x->subsobs,isob,&subsob,CTA_HANDLE); + /* first count the the measurements in the subsobs */ + ierr = CTA_SObs_Count(subsob, &nsubmeasr); + /* create the vector which receives the values */ + ierr=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nsubmeasr, dt, CTA_NULL, &hsubvec); + /* Call the getRealisation of the subsob */ + ierr = CTA_SObs_GetRealisation(subsob,hsubvec); + + /* put the values in the sub-array */ + subval = CTA_Malloc(sizeof(double)*nsubmeasr); + ierr=CTA_Vector_GetVals(hsubvec,subval, nsubmeasr, dt); + /* fill the main vector */ + for (j=0; j < nsubmeasr; j++) { val[offset+j] = subval[j]; } + offset = offset + nsubmeasr; + free(subval); + ierr = CTA_Vector_Free(&hsubvec); + } + /* fill hvec with the main array */ + ierr=CTA_Vector_SetVals(*hvec,val, totmeasr, dt); + free(val); + // for (j=0; j < totmeasr; j++) {printf("%d %f\n",j, val[j]); } + + *retval = ierr; + +}; + +void CTAI_SObs_combine_GetCovMat( +// Get all the variances of the measurements in a combine-StochObserver + // INPUTS + CTAI_SObs_combine *x, // StochObserver from which the measurements are + // returned + // OUTPUTS + CTA_Matrix *hmat, /* Covariance matrix */ + int *retval + ) +{ + int nofsubsobs, isob, offset, j,k,ierr; + int nsubmeasr, nmeasr_tot,n,m; + CTA_Handle subsob; + CTA_Matrix hsubmat; + double *val, *subval, dzero; + CTA_Datatype dt; + + nofsubsobs = x->nofsubsobs; + /* first ask the total number of values and the datatype*/ + ierr = CTA_Matrix_GetSize(*hmat,&m, &n); + if (n != m ) printf("ctai_sobs_combine_getcovmat: ERROR: matrix is not square \n"); + nmeasr_tot = m; + ierr = CTA_Matrix_GetDatatype(*hmat,&dt); + + /* allocate the main array containing all the values */ + val = CTA_Malloc(sizeof(double)*nmeasr_tot*nmeasr_tot); + + dzero = 0.0; + offset = 0; + + /* initialize the combined matrix */ + ierr = CTA_Matrix_SetConstant(*hmat, &dzero, dt); + + for (isob=1; isob<=nofsubsobs; isob++) { + ierr=CTA_Vector_GetVal(x->subsobs,isob,&subsob,CTA_HANDLE); + /* first count the the measurements in the subsobs */ + ierr = CTA_SObs_Count(subsob, &nsubmeasr); + // printf("sobs_combine_getcovmat: sobs_count %d %d \n",nsubmeasr,ierr);CTA_Matrix_Export(*hmat,CTA_FILE_STDOUT); + + if (nsubmeasr > 0) { + /* create the matrix which receives the values */ + ierr=CTA_Matrix_Create(CTA_DEFAULT_MATRIX, nsubmeasr, nsubmeasr, dt, CTA_NULL, &hsubmat); + ierr = CTA_Matrix_SetConstant(hsubmat, &dzero, dt); + + /* Call the getcovmat of the subsob */ + ierr = CTA_SObs_GetCovMat(subsob,hsubmat); + + /* Fill the combined matrix. It is a block matrix, so the first and third quadrant + should contain zeros. This is the case since we initialized the matrix in the beginning. + */ + subval = CTA_Malloc(sizeof(double)*nsubmeasr*nsubmeasr); + ierr = CTA_Matrix_GetVals(hsubmat, subval, nsubmeasr, nsubmeasr, dt); + + /* fill the main vector */ + + for (j=0; j < nsubmeasr; j++) { + for (k=0; k < nsubmeasr; k++) { + val[(offset+j)*nmeasr_tot + offset + k] = subval[j*nsubmeasr + k]; } + } + + offset = offset + nsubmeasr; + free(subval); + ierr = CTA_Matrix_Free(&hsubmat); + } + } + /* fill hmat with the main array */ + ierr=CTA_Matrix_SetVals(*hmat,val, nmeasr_tot, nmeasr_tot, dt); + free(val); + + + *retval = ierr; +}; + + +void CTAI_SObs_combine_export( + CTAI_SObs_combine *x, + CTA_Handle *userdata, + int *retval + ) +{ + int nofsubsobs, isob, ierr; + CTA_Handle subsob; + nofsubsobs = x->nofsubsobs; + for (isob=1; isob<=nofsubsobs; isob++) { + ierr=CTA_Vector_GetVal(x->subsobs,isob,&subsob,CTA_HANDLE); + ierr = CTA_SObs_Export(subsob, *userdata); + } + *retval = ierr; +}; + + +void CTAI_SObs_combine_Free( + CTAI_SObs_combine *x, + int *retval + ) +{ + int nofsubsobs, isob, ierr; + CTA_Handle subsob; + nofsubsobs = x->nofsubsobs; + for (isob=1; isob<=nofsubsobs; isob++) { + ierr=CTA_Vector_GetVal(x->subsobs,isob,&subsob,CTA_HANDLE); + ierr = CTA_SObs_Free(&subsob); + } + *retval = ierr; +} + + +/* + +*/ +void CTA_SObs_combine_initialise(CTA_SObsClass *hsobscl) +{ + CTA_Intf hintf=0; + CTA_Func h_func[CTA_SOBS_NUMFUNC]; + CTA_ObsDescrClass descrcl; + int i; + + // The vector h_func is filled with COSTA-function handles of the + // implementations in this file. + for (i=0;i +//#include +//#include +//#include +//#include + +#include "cta.h" +#include "cta_mem.h" + +#include "cta_datatypes.h" +#include "ctai_xml.h" +#include "cta_handles.h" +#include "ctai_handles.h" +#include "f_cta_utils.h" +#include "cta_model.h" +#include "cta_errors.h" +#include "cta_f77lapack.h" +#include "cta_reltable.h" +#include "cta_par.h" +#include "cta_message.h" + +#define MAX(a,b) (a>b ? a: b) +#define IDEBUG (0) + +#define CTA_SOBS_DEFINECLASS_F77 F77_CALL(cta_sobs_defineclass,CTA_SOBS_DEFINECLASS) + +#define CLASSNAME "CTA_Sobs_Factory" + +typedef struct { +CTA_Func functions[CTA_SOBS_NUMFUNC]; + CTA_ObsDescrClass descrcl; +} CTAI_SObsClass; + + +#undef METHOD +#define METHOD "CTAI_SObs_DuplicateClass" +int CTAI_SObs_DuplicateClass(CTA_SObsClass hsobscl, + CTA_SObsClass *hsobscl_dup){ + + CTAI_SObsClass *clsdata; + CTAI_SObsClass *clsdata_dup; + CTA_Func hfunc; + CTA_String sname; + int retval; + int i; + + /* Get class data containing all function pointers */ + retval=CTA_Handle_Check((CTA_Handle) hsobscl,CTA_SOBSCLASS); + if (retval!=CTA_OK) return retval; + retval=CTA_Handle_GetData((CTA_Handle) hsobscl,(void*) &clsdata); + if (retval!=CTA_OK) return retval; + + + /* Allocate new SObsClass object */ + clsdata_dup=CTA_Malloc(sizeof(CTAI_SObsClass)); + + + /* Duplicate all function pointers */ + for (i=0;ifunctions[i], &hfunc); + clsdata_dup->functions[i]=hfunc; + } + + + // Allocate new handle and return eror when unsuccesfull + CTA_String_Create(&sname); + CTA_Handle_GetName(hsobscl, sname); + if (IDEBUG>0) printf("CTAI_SObs_DuplicateClass: NAME OF SOBS CLASS IS %s\n", CTAI_String_GetPtr(sname)); + + + retval=CTA_Handle_Create(CTAI_String_GetPtr(sname), CTA_SOBSCLASS, + clsdata_dup, hsobscl_dup); + if (retval!=CTA_OK) { + char msg[256]; + sprintf(msg,"Cannot create a handle of type CTA_SOBSCLASS with name '%s'",CTAI_String_GetPtr(sname)); + CTA_WRITE_ERROR(msg); + } + CTA_String_Free(&sname); + return retval; +} + + +#undef METHOD +#define METHOD "DefineClass" +int CTA_SObs_DefineClass( + // INPUTS: + const char *name, // Name of the new stochobs class + const CTA_Func h_func[CTA_SOBS_NUMFUNC], // function handles to + // the implementations of the + // stochobs-class' functions. + CTA_ObsDescrClass descrcl, // + // OUTPUTS: + CTA_SObsClass *hsobscl // The (handle to) the new stochobs-class + ){ + + CTAI_SObsClass *data; + int retval; + int i; + + + /* Allocate new StochObs object */ + data=CTA_Malloc(sizeof(CTAI_SObsClass)); + + data->descrcl = descrcl; + for (i=0;ifunctions[i]=h_func[i]; + } + + // Allocate new handle and return eror when unsuccesfull + retval=CTA_Handle_Create(name,CTA_SOBSCLASS,data,hsobscl); + retval=CTA_Handle_GetData((CTA_Handle) *hsobscl,(void**) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + // return error when unsuccesfull + return retval; +} + +#undef METHOD +#define METHOD "DefineClass2" +int CTA_SObs_DefineClass2( + // INPUTS: + const char *name, // Name of the new stochobs class + const CTA_Func h_func[CTA_SOBS_NUMFUNC], // function handles to + // the implementations of the + // stochobs-class' functions. + CTA_ObsDescrClass descrcl, // + // OUTPUTS: + CTA_SObsClass *hsobscl // The (handle to) the new stochobs-class + ){ + + CTAI_SObsClass *data; + int retval; + int i; + + + /* Allocate new StochObs object */ + data=CTA_Malloc(sizeof(CTAI_SObsClass)); + + data->descrcl = descrcl; + for (i=0;ifunctions[i]=h_func[i]; + } + + // Allocate new handle and return eror when unsuccesfull + retval=CTA_Handle_Create(name,CTA_SOBSCLASS,data,hsobscl); + retval=CTA_Handle_GetData((CTA_Handle) *hsobscl,(void**) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + // return error when unsuccesfull + return retval; +} + + +/* Interfacing with Fortran */ + +CTAEXPORT void CTA_SOBS_DEFINECLASS_F77(char *name, int *h_func, int *descrcl, + int *hstochobscl, int *ierr, + int len_name){ + + char *c_name; + + /* create a c-string equivalent to name */ + c_name=CTA_Malloc((len_name+1)*sizeof(char)); + CTA_fstr2cstr(name,c_name,len_name); + + *ierr=CTA_SObs_DefineClass( name, (CTA_Func*) h_func, + (CTA_ObsDescrClass) *descrcl, (CTA_SObsClass*) hstochobscl); + + free(c_name); +} + +/** \brief Create a COSTA sobsl class from XML +* (load from methods from dynamic load library). +* +* \param cur_node I Current XML node +* \return Handle to create or CTA_NULL in case of an error. +*/ +CTA_SObsClass CTAI_XML_CreateSObsClass(xmlNode *cur_node) { + + CTA_Func hfunc; /* the new function */ + xmlChar *id = NULL; /* id of function in XML-tree */ + xmlChar *clsname = NULL; /* (lookup) name of the sobs class */ + int retval; /* return status of creation */ + CTA_Func h_func[CTA_SOBS_NUMFUNC];/* List of functions */ + xmlNode *func_node = NULL; /* values of children nodes */ + int i; + CTA_SObsClass hsobscl; /* function class */ + CTA_SObsClass hsobscl_old; + const char *name; + CTA_String sclsname; + CTA_ObsDescrClass descrcl; + + + if (IDEBUG>0) printf("CTAI_XML_CreateSObsClass: Start of function\n"); + /* Parse this node's attributes */ + /* Get id */ + id = xmlGetProp(cur_node, CTAI_XML_ID); + + /* Get class name */ + clsname = xmlGetProp(cur_node, CTAI_XML_NAME); + + /* Check whether this is a known sobs-class */ + CTA_String_Create(&sclsname); + CTA_String_Set(sclsname,(char *) clsname); + retval=CTA_Handle_Find(sclsname, CTA_SOBSCLASS, &hsobscl_old); + CTA_String_Free(&sclsname); + + if (retval==CTA_OK) { + /* duplicate sobs class */ + + CTAI_SObs_DuplicateClass(hsobscl_old, &hsobscl); + + /* sobs-class is known */ + if (IDEBUG>0) printf("CTAI_XML_CreateSObsClass: found sobsclass '%s'\n",clsname); + if (IDEBUG>0) printf("CTAI_XML_CreateSObsClass: duplication is created \n"); + + } else { + /* Set all function handles to CTA_NULL */ + for (i=0;ichildren; func_node; + func_node = func_node->next) { + if (0 == strcmp("CTA_FUNCTION", (char *) func_node->name)){ + hfunc=CTAI_XML_CreateFunc(func_node); + if (hfunc!=CTA_NULL){ + name = CTAI_Handle_GetName(hfunc); + if (0 == strcmp("CTA_SOBS_CREATE_SIZE", name)){ + h_func[CTA_SOBS_CREATE_SIZE ]=hfunc; + } else if (0 == strcmp("CTA_SOBS_CREATE_INIT", name)){ + h_func[CTA_SOBS_CREATE_INIT ]=hfunc; + } else if (0 == strcmp("CTA_SOBS_FREE", name)){ + h_func[I_CTA_SOBS_FREE ]=hfunc; + } else if (0 == strcmp("CTA_SOBS_CREATE_SELECTION", name)){ + h_func[CTA_SOBS_CREATE_SELECTION ]=hfunc; + } else if (0 == strcmp("CTA_SOBS_COUNT", name)){ + h_func[I_CTA_SOBS_COUNT ]=hfunc; + } else if (0 == strcmp("CTA_SOBS_OBS_DESCRIPTION", name)){ + h_func[CTA_SOBS_GET_OBS_DESCRIPTION ]=hfunc; + } else if (0 == strcmp("CTA_SOBS_GET_VALUES", name)){ + h_func[CTA_SOBS_GET_VALUES ]=hfunc; + } else if (0 == strcmp("CTA_SOBS_GET_TIMES", name)){ + h_func[CTA_SOBS_GET_TIMES ]=hfunc; + } else if (0 == strcmp("CTA_SOBS_GET_REALISATION", name)){ + h_func[CTA_SOBS_GET_REALISATION ]=hfunc; + } else if (0 == strcmp("CTA_SOBS_GET_EXPECTATION", name)){ + h_func[CTA_SOBS_GET_EXPECTATION ]=hfunc; + } else if (0 == strcmp("CTA_SOBS_EVALUATE_PDF", name)){ + h_func[CTA_SOBS_EVALUATE_PDF ]=hfunc; + } else if (0 == strcmp("CTA_SOBS_GET_COV_MATRIX", name)){ + h_func[CTA_SOBS_GET_COV_MATRIX ]=hfunc; + } else if (0 == strcmp("CTA_SOBS_GET_VARIANCE", name)){ + h_func[CTA_SOBS_GET_VARIANCE ]=hfunc; + } else if (0 == strcmp("CTA_SOBS_EXPORT", name)){ + h_func[I_CTA_SOBS_EXPORT ]=hfunc; + } else { + printf("CTAI_XML_CreateSObsClass :Warning found unknown node %s\n", func_node->name); + } + } + } + } + + /* Create a default observation description. Should be changed??? */ + CTA_ObsDescr_sqlite3_initialise(&descrcl); + + /* Create new function class */ + retval=CTA_SObs_DefineClass2((char *)clsname, h_func, descrcl, &hsobscl); + + /* Set id (=name) of handle */ + CTAI_Handle_SetName(hsobscl, (char *) id); + + } + CTAI_Handle_SetName(hsobscl, (char *) id); + xmlFree(id); + xmlFree(clsname); + + if (IDEBUG>0) printf("CTAI_XML_CreateSObsClass: End of function\n"); + return hsobscl; +} + + + diff --git a/costa/native/cta/src/cta_sobs_maori.c b/costa/native/cta/src/cta_sobs_maori.c new file mode 100644 index 000000000..9c647c698 --- /dev/null +++ b/costa/native/cta/src/cta_sobs_maori.c @@ -0,0 +1,74 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/trunk/costa/src/cta/cta_obsdescr_netcdf.c $ +$Revision: 671 $, $Date: 2008-10-07 14:49:42 +0200 (Tue, 07 Oct 2008) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include +#include +#include "cta.h" + +void CTA_SObs_maori_initialize(CTA_SObsClass *hsobscl) +{ + CTA_Func h_func[CTA_SOBS_NUMFUNC]; + char *libraryName; + char *functionName; + CTA_ObsDescrClass hobsdescrcl; + int i; + + // The vector h_func is filled with function read from the default user dynamic library + libraryName="libleoda.so"; + for (i=0;i +#include +#include "cta_flush.h" + +#if HAVE_LIBNETCDF +#include +#endif + +#include "cta_sobs_netcdf.h" + + +#define MAX(a,b) (a>b ? a: b) +#define IDEBUG (0) + +/* This is a struct to store what we need */ +/* Specific for our case of Netcdf-OMI-data: The model will only ask two aspects: + - certain time selections (typically one-hour intervals) + -It should be situated in a certain rectangular area (lat/lon) +*/ + + + + +void CTAI_SObs_netcdf_Create_Size( + // OUTPUTS: + int *memsize, // The number of bytes which are necessary to store one + // CTAI_SObs_netcdf, with a pointer to the contents (data), + // but without the contents themselves. + int *retval // error code (see cta_datatypes.h for possible error codes) + ){ + *memsize=(int) sizeof(CTAI_SObs_netcdf); + *retval=CTA_OK; +}; + + + + + +void CTAI_SObs_netcdf_Create_Init( + + + +/* + Allocate the memory which is neccesary to store the data necessary for a + netcdf-observer +*/ + // IN-OUTPUTS + CTAI_SObs_netcdf *x, // The netcdf-observer for which the memory must + // be allocated + // INPUTS: + CTA_Handle userdata, // User data: database-name. Note: the string ends with the date + // It is possible that userdata is vector of two handles: (name,timeoffset) + // OUTPUTS + int *retval) // Error code. Possible error: Illegal data type +{ +#if HAVE_LIBNETCDF + char *dbname; + int ncid, varid, nmeasr, intNStations; + size_t nstations; + int ierr, i, len; + int dimids[1]; + CTA_Time tspan0; + float lon1,lon2,lat1,lat2; + int *flag; + CTAI_OMI_database *database; + double timeoffset; + CTA_Handle userdata1, userdata2; + + database = CTA_Malloc(sizeof(CTAI_OMI_database)); + + // Get the name of the database file: + + // Check if userdata is a string (database name) or a vector (name and timeoffset) + ierr=CTA_Handle_Check((CTA_Handle) userdata,CTA_VECTOR); + if (ierr!=CTA_OK) { + //then it should be only the string with the database name + ierr=CTA_Handle_Check((CTA_Handle) userdata,CTA_STRING); + if (ierr!=CTA_OK) { printf("sobs_netcdf: create :: userdata is neither a database name or vector\n");exit(-1);} + userdata1 = userdata; timeoffset = 0.0; + + if (IDEBUG>0) {printf("sobs_netcdf_create 1: timeoffset %f ",timeoffset);} + + } else { //it is a vector: deconstruct it into the database string and the timeoffset + + CTA_Vector_GetVal(userdata,1,&userdata1,CTA_HANDLE); + CTA_Vector_GetVal(userdata,2,&userdata2,CTA_HANDLE); + ierr=CTA_Vector_GetVal(userdata2,1,&timeoffset,CTA_DOUBLE); + if (ierr!=CTA_OK) { + printf("CTAI_SObs_netcdf :: Create: userdata2 is not a vector of doubles! \n"); + } + if (IDEBUG>0) {printf("sobs_netcdf_create 2: timeoffset %f ",timeoffset);} + } + + + // Allocate a name-string + *retval = CTA_String_GetLength(userdata1, &len); + if (*retval!=CTA_OK) return; + // Get the name of the database + dbname = CTA_Malloc((len+1)*sizeof(char)); + *retval = CTA_String_Get(userdata1, dbname); + if (*retval!=CTA_OK) return; + + // Open the database + ierr = nc_open(dbname, NC_NOWRITE, &ncid); + if (ierr != CTA_OK) + {printf("Error: could not open netcdf-file \n"); + *retval = -1; + return;} + + // Count the stations in this observer. + // Note: we denote each valid observation of the satellite as one station with a specific pace and time. + // The number of valid observations is O(10^4), while the total grid is typical O(10^5), in the case of + // OMI-data and our part of Europe: 350x501 and a time series of length 8000. + ierr = nc_inq_varid(ncid, "longitude", &varid); + if (ierr != CTA_OK) + {printf("Error: could not find datetime \n");*retval = -1; return;} + + ierr = nc_inq_vardimid(ncid, varid, dimids); // get ID of time dimension + if (ierr != CTA_OK) + {printf("Error: could not find dimension ID \n");*retval = -1; return;} + + ierr = nc_inq_dimlen(ncid, dimids[0], &nstations); /* get length of time series */ + if (ierr != CTA_OK) + {printf("Error: could not length of time series \n");*retval = -1; return;} + + + // initialise the selection relation table + CTA_RelTable_Create(&(x->selectionReltab)); + + flag=CTA_Malloc(nstations*sizeof(int)); + for (i=0; i < (int) nstations; i++) {flag[i] = i+1;} + + intNStations=(int) nstations; + CTA_RelTable_SetSelectVal(x->selectionReltab, flag, intNStations,CTA_INTEGER); + if (IDEBUG>0) {printf("ctai_sobs_netcdf_init: number of stations/length of time series: %d \n",intNStations);} + + + // initialise the time + CTA_Time_Create(&tspan0); + CTA_Time_SetSpan(tspan0, 0.0, 24.0); + + // initialise the spatial window. Maybe it should be read from netcdf-file (global attribute) + lon1 = -90.0; // far west + lon2 = 90.0; // far east + lat1 = 0.0; //equator + lat2= 90.0; //north pole + + // Measurements in this observer = Number of stations in our case! + nmeasr = nstations; + + // Set the observer-fields + x->tspan = tspan0; + x->bb_lon[0] = lon1; x->bb_lon[1] = lon2; + x->bb_lat[0] = lat1; x->bb_lat[1] = lat2; + + + + database->dbname = dbname; + database->ncid = ncid; + database->nusers = 1; + + x->database = database; + x->nstations = nstations; + x->nmeasr = nmeasr; + x->nmeasr_orig = nmeasr; + + + x->timeoffset = timeoffset; + +if (IDEBUG>0) {printf("END of cta_sobs_netcdf_create_init \n");} + *retval=CTA_OK; + +#else + printf("Error: CTAI_SObs_netcdf_Create_Init: COSTA is compiled without NETCDF support\n"); + *retval=CTA_NOT_IMPLEMENTED; +#endif + +}; + + + +void CTAI_SObs_netcdf_CreateSelString( +/* + Allocate the memory which is necessary to store the data necessary for a + netcdf-observer +*/ + // INPUTS + CTAI_SObs_netcdf *obsin,// The netcdf-observer of which a selection + // is to be made + CTA_Handle userdata, // User data: condition + // OUTPUTS + CTAI_SObs_netcdf *obsout, // The netcdf-observer which is a selection + // of observer obsin + int *retval) // Error code +{ + #if HAVE_LIBNETCDF + int len, ierr,i ; + char *condition; + double t1_in,t2_in, t1,t2, t1_out, t2_out; + float t1_f, t2_f; + int nstations, count_out, varid; + CTA_Vector hvec_time; + double *vec_time; + int *selvec, *flag; + CTA_RelTable reltab1; + float bbx_1, bbx_2, bby_1, bby_2; + float *nc_values1, *nc_values2, *nc_sel_values1, *nc_sel_values2; + + if (IDEBUG>0) {printf("DEBUG: CTAI_SObs_netcdf_CreateSelString START\n");} + if (IDEBUG>0) {printf("DEBUG: *obsin: %p \n", obsin);} + if (IDEBUG>0) {printf("DEBUG: *obsout: %p \n", obsout);} + + // Allocate a name-string + *retval = CTA_String_GetLength(userdata, &len); + if (*retval!=CTA_OK) return; + + // Get the condition + condition = CTA_Malloc((len+1)*sizeof(char)); + ierr = CTA_String_Get(userdata, condition); + if (ierr!=CTA_OK) return; + + // Link the database also to this observer and keep track of the number of + // observers using this database + obsout->database = obsin->database; + obsout->database->nusers++; + + + // IF the callling sequence has been : cta_sobs_createtimsel -> cta_sobs_createsel -> + // ctai_sobs_netcdf_createsel, THEN + // The condition is the string starting with 'time BETWEEN' + if (strstr(condition,"time BETWEEN") != NULL) { + //printf("condition string is: |%s| \n",condition); + ierr = sscanf(condition,"%*s %*s %f %*s %f",&t1_f,&t2_f); t1 = t1_f; t2=t2_f; + + } else { + t1 = 0.0; t2 = 24.0; + } + if (IDEBUG>0){ + printf("ctai_sobs_createselstring: time selection: %s %f %f %d \n",obsout->database->dbname,t1,t2,ierr); + } + + // It may be possible that a spatial restriction is given. + for (i=0; i < 2; i++) { + obsout->bb_lon[i] = obsin->bb_lon[i]; + obsout->bb_lat[i] = obsin->bb_lat[i]; + } + + /* spatial selection: */ + if (strstr(condition,"bounding box") !=NULL) { + ierr = sscanf(condition,"%*s %*s %f %f %f %f ",&bbx_1,&bbx_2, &bby_1,&bby_2); + obsout->bb_lon[0] = bbx_1; obsout->bb_lon[1] = bbx_2; + obsout->bb_lat[0] = bby_1; obsout->bb_lat[1] = bby_2; + // printf("SOBS_NETCDF_CREATESEL: condition |%s| \n",condition); + //printf("SOBS_NETCDF_CREATESEL: condition window is: %f %f %f %f \n",bbx_1,bbx_2,bby_1,bby_2); + } + + // Combine the input-condition and the condition of the input observer. + // In our netcdf-case, the condition is a combination of spatial window and timespan. + ierr = CTA_Time_GetSpan(obsin->tspan, &t1_in, &t2_in); + t1_out = MAX(t1_in,t1); + t2_out = MIN(t2_in,t2); + ierr = CTA_Time_Create(&(obsout->tspan)); + ierr = CTA_Time_SetSpan(obsout->tspan, t1_out, t2_out); + if (ierr != CTA_OK) {printf("time_create in ctai_sobs_netcdf_createsel FAILED!\n");} + + + + // We must recompute the number of stations after selection. Also, the relation table should be adjusted. + + + // first get the time vector. + ierr = CTA_Vector_Create(CTA_DEFAULT_VECTOR, obsin->nmeasr, CTA_DOUBLE, CTA_NULL, &hvec_time); + CTAI_SObs_netcdf_GetTimes(obsin, &hvec_time, &ierr); + vec_time = CTA_Malloc(obsin->nmeasr * sizeof(double)); + ierr=CTA_Vector_GetVals(hvec_time,vec_time, obsin->nmeasr, CTA_DOUBLE); + + + if (IDEBUG>5){ + printf("createsel: ierr time vector %d ; timespan: %f %f \n",ierr, t1_out, t2_out); + printf("vectime, %f %f \n",vec_time[0],vec_time[1]); + } + + // We use the relation table to investigate if the current selection should be restricted more. + + + nc_values1 = CTA_Malloc(obsin->nmeasr_orig * sizeof(float)); + nc_values2 = CTA_Malloc(obsin->nmeasr_orig * sizeof(float)); + nc_sel_values1 = CTA_Malloc(obsin->nmeasr * sizeof(float)); + nc_sel_values2 = CTA_Malloc(obsin->nmeasr * sizeof(float)); + nc_inq_varid(obsin->database->ncid, "longitude", &varid); + nc_get_var_float(obsin->database->ncid, varid, &nc_values1[0]); + nc_inq_varid(obsin->database->ncid, "latitude", &varid); + nc_get_var_float(obsin->database->ncid, varid, &nc_values2[0]); + CTA_RelTable_ApplyVal(obsin->selectionReltab, nc_values1, obsin->nmeasr_orig, + nc_sel_values1,obsin->nmeasr, CTA_REAL); + CTA_RelTable_ApplyVal(obsin->selectionReltab, nc_values2, obsin->nmeasr_orig, + nc_sel_values2,obsin->nmeasr, CTA_REAL); + + if (IDEBUG>10){ + printf("latitudes of obsin: \n"); + for (i=0; i < MIN(15,obsin->nmeasr); i++) { + printf(" %d %f\n ",i, nc_values2[i]); + } + printf("------------%f %f ------\n",obsout->bb_lat[0],obsout->bb_lat[1]); + } + + count_out = 0; + flag=CTA_Malloc((obsin->nmeasr)*sizeof(int)); + for (i=0; i < obsin->nmeasr; i++) { + flag[i]= 0; + if ((vec_time[i] >= t1_out) && (vec_time[i] <=t2_out) && + (nc_sel_values1[i] > obsout->bb_lon[0]) && (nc_sel_values1[i] < obsout->bb_lon[1]) + && (nc_sel_values2[i] > obsout->bb_lat[0]) && (nc_sel_values2[i] < obsout->bb_lat[1]) + ) { + flag[i] = 1; + //printf("selected: %d = %f ; lat %f \n",i,vec_time[i], nc_sel_values2[i]); + count_out = count_out + 1; + } + } + + free(nc_values1); free(nc_values2);free(nc_sel_values1);free(nc_sel_values2); + + nstations = count_out; + + + obsout->nstations = nstations; + obsout->nmeasr = nstations; + + + if (IDEBUG>0) {printf("selection made: now %d stations \n",nstations);} + + // make the relation table for this restriction + ierr = CTA_RelTable_Create(&reltab1); + selvec = CTA_Malloc(obsout->nmeasr * sizeof(int)); + + + count_out = 0; + for (i=0; i < obsin->nmeasr; i++) { + if (flag[i] > 0 && count_out < obsout->nmeasr) { + selvec[count_out] = i+1; + count_out = count_out + 1; + } + } + + ierr = CTA_RelTable_SetSelectVal(reltab1, selvec, obsout->nmeasr, CTA_INTEGER); + + if (IDEBUG>5){printf("createselstring: setselect ierr %d \n",ierr);} + + // now combine the two relation tables to get the whole selection + ierr = CTA_RelTable_Create(&(obsout->selectionReltab)); //moet dit????? + + ierr=CTA_RelTable_SetTableCombine(obsout->selectionReltab, obsin->selectionReltab, CTA_FALSE, + reltab1, CTA_FALSE); + + obsout->nmeasr_orig = obsin->nmeasr_orig; + obsout->timeoffset = obsin->timeoffset; + + //printf("createsel: ierr reltable_combine: end %d \n",ierr); + + free(vec_time);free(selvec); + free(flag); + ierr = CTA_RelTable_Free(&reltab1); + *retval = CTA_OK; + + if (IDEBUG>0){printf("createselstring %s : END: from %d to %d \n",obsin->database->dbname,obsin->nmeasr, obsout->nmeasr);} + if (IDEBUG>10){printf("createsel: END: lat.window %f to %f \n",obsout->bb_lat[0], obsout->bb_lat[1]);} +#else + printf("Error: CTAI_SObs_netcdf_CreateSelString: COSTA is compiled without NETCDF support\n"); + *retval=CTA_NOT_IMPLEMENTED; +#endif + +}; + + +void CTAI_SObs_netcdf_CreateSelRelTab(CTAI_SObs_netcdf *obsin, CTA_RelTable reltab, + CTAI_SObs_netcdf *obsout, int *retval) +{ +#if HAVE_LIBNETCDF + int nsel, ierr, i; + double t1_in,t2_in; + + + // printf(" CTAI_SObs_netcdf_CreateSelRelTab: START: NMEASR %d \n",obsin->nmeasr); + CTA_RelTable_Count(obsin->selectionReltab,&nsel); + + /* Note: this is a quick implementation. The selection is not again checked w.r.t. the spatial window. Use with care! */ + + // First: combine the two relation tables to get the whole selection + ierr = CTA_RelTable_Create(&(obsout->selectionReltab)); //moet dit????? + // printf("createselRelTab: ierr reltable_create van obsout: %d \n",ierr); + ierr=CTA_RelTable_SetTableCombine(obsout->selectionReltab, obsin->selectionReltab, CTA_FALSE, + reltab, CTA_FALSE); + + if (ierr != CTA_OK) { + printf("ERROR in CTAI_SObs_netcdf_CreateSelRelTab: %d \n",ierr); + *retval = ierr; + return; + } + + obsout->nmeasr_orig = obsin->nmeasr_orig; + obsout->timeoffset = obsin->timeoffset; + + CTA_RelTable_Count(obsout->selectionReltab,&nsel); + obsout->nmeasr = nsel; + // will this also work in case of empty selection? Yes. + + + // printf(" CTAI_SObs_netcdf_CreateSelRelTab: %s SELECTION NMEASR %d \n",obsin->database->dbname,nsel); + // printf(" more: handles *obsin %d \n", *obsin); + //printf(" more: handles *obsout %d \n", *obsout); + //printf(" more: handles obsin->reltab ;reltab %d %d \n",obsin->selectionReltab,reltab); + + for (i=0; i < 2; i++) { + obsout->bb_lon[i] = obsin->bb_lon[i]; + obsout->bb_lat[i] = obsin->bb_lat[i]; + } + + + // Link the database also to this observer and keep track of the number of + // observers using this database + obsout->database = obsin->database; + obsout->database->nusers++; + + obsout->nstations = obsout->nmeasr; + + + CTA_Time_GetSpan(obsin->tspan, &t1_in, &t2_in); + CTA_Time_Create(&(obsout->tspan)); + CTA_Time_SetSpan(obsout->tspan, t1_in, t2_in); + + + +#else + printf("Error: CTAI_SObs_netcdf_CreateSelRelTab: COSTA is compiled without NETCDF support\n"); + *retval=CTA_NOT_IMPLEMENTED; +#endif + +}; + + + + + +void CTAI_SObs_netcdf_CreateSel( +/* + Allocate the memory which is neccesary to store the data necessary for a + netcdf-observer +*/ + // INPUTS + CTAI_SObs_netcdf *obsin,// The netcdf-observer of which a selection + // is to be made + CTA_Handle *userdata_in, // User data: condition + // OUTPUTS + CTAI_SObs_netcdf *obsout, // The netcdf-observer which is a selection + // of observer obsin + int *retval) // Error code +{ + CTA_Handle userdata=*userdata_in; +#if HAVE_LIBNETCDF + CTA_Datatype datatype; + + if (IDEBUG > 0) { printf("ctai_sobs_netcf_createsel: obsin : %p \n", obsin);} + if (IDEBUG > 0) { printf("ctai_sobs_netcf_createsel: obsout : %p \n", obsout);} + + + CTA_Handle_GetDatatype(userdata, &datatype); + if (datatype==CTA_STRING){ + if (IDEBUG > 0) { printf("ctai_sobs_netcf_createsel: userdata STRING : %d \n",userdata);} + CTAI_SObs_netcdf_CreateSelString(obsin,userdata,obsout,retval); + } else if (datatype==CTA_RELTABLE){ + if (IDEBUG > 0) { printf("ctai_sobs_netcf_createsel: userdata RELTAB : %d \n",userdata);} + CTAI_SObs_netcdf_CreateSelRelTab(obsin,userdata,obsout,retval); + } else { + printf("Error in CTAI_SObs_netcdf_CreateSel: datatype (%d) of selection is not supported\n", datatype); + *retval=CTA_INPUT_OBJECT_NOT_SUPPORTED; + } +#else + printf("Error: CTAI_SObs_netcdf_CreateSel: COSTA is compiled without NETCDFsupport\n"); + *retval=CTA_NOT_IMPLEMENTED; +#endif +}; + + +void CTAI_SObs_netcdf_Count( +// Return the number of measurements in the observer + // INPUTS + CTAI_SObs_netcdf *x, // The StochObserver of which the number of measurements is + // returned + // OUTPUTS + int *nmeasr, + int *retval + ) +{ +#if HAVE_LIBNETCDF + *nmeasr = x->nmeasr; + *retval = CTA_OK; +#else + printf("Error: CTAI_SObs_netcdf_Count: COSTA is compiled without NETCDFsupport\n"); + *retval=CTA_NOT_IMPLEMENTED; +#endif +}; + + +void CTAI_SObs_netcdf_GetVals( +// Get all the values from a netcdf-StochObserver + // INPUTS + CTAI_SObs_netcdf *x, // StochObserver from which the measurements are + // returned + // OUTPUTS + CTA_Vector *hvec, // COSTA-vector containing the values + int *retval + ) +{ +#if HAVE_LIBNETCDF + int ierr, ncid, varid; + CTA_Datatype datatype; + float *nc_vcd_values; + CTA_Vector hvcd; + + /* Get datatype of vector */ + *retval=CTA_Vector_GetDatatype(*hvec,&datatype); + if (*retval!=CTA_OK) return; + + + /* read values from database. First read entire vcd-array. Note: netcdf file should first be opened */ + // Get the netcdf-id of the database + ncid = x->database->ncid; + // read the vcd vector. We know the size: nmeasr_orig. OMI-vcd vector contains one float per entry. + + // first allocate space for temporary arrays + nc_vcd_values = CTA_Malloc(x->nmeasr_orig * sizeof(float)); + + ierr = nc_inq_varid(ncid, "vcd_trop", &varid); + ierr = nc_get_var_float(ncid, varid, &nc_vcd_values[0]); + if (ierr != CTA_OK) + {printf("Error: could not read vcd_trop \n"); + *retval = -1; + return;} + +// now fill the values array with the vcd_trop. The +// relation table is used to get only the selection. + ierr = CTA_Vector_Create(CTA_DEFAULT_VECTOR, x->nmeasr_orig, CTA_REAL, CTA_NULL,&hvcd); + ierr = CTA_Vector_SetVals(hvcd, nc_vcd_values, x->nmeasr_orig, CTA_REAL); + ierr = CTA_RelTable_Apply(x->selectionReltab, hvcd, *hvec); + + + // TODO: use the kernel and lon/lat info to couple the no2-conc to specific imerid,izonal and iverti + // This is done in the model_obs! + + + + CTA_Vector_Free(&hvcd); + free(nc_vcd_values); + if (*retval!=CTA_OK) return; + + *retval = CTA_OK; +#else + printf("Error: CTAI_SObs_netcdf_GetVals: COSTA is compiled without NETCDFsupport\n"); + *retval=CTA_NOT_IMPLEMENTED; +#endif + + + +}; + +void CTAI_SObs_netcdf_GetTimes( +// Get all the time values from a netcdf-StochObserver + // INPUTS + CTAI_SObs_netcdf *x, // StochObserver from which the measurements are + // returned + // OUTPUTS + CTA_Vector *hvec, // COSTA-vector containing the values + int *retval + ) +{ +#if HAVE_LIBNETCDF + int i, ierr, ncid, varid, idx; + CTA_Datatype datatype; + int * nc_time_values; + double *selected_values; + int * shadow_vals; + int * selected_shadow_vals; + + /* Get datatype of vector */ + *retval=CTA_Vector_GetDatatype(*hvec,&datatype); + if (*retval!=CTA_OK) return; + + /* read values from database. First read entire time array. Note: netcdf file should first be opened */ + // Get the netcdf-id of the database + ncid = x->database->ncid; + // read the time vector. We know the size: nmeasr_orig. OMI-Time vector contains of six integers per entry. + + // first allocate space for temporary arrays + nc_time_values = CTA_Malloc(x->nmeasr_orig * 6 * sizeof(int)); + selected_values = CTA_Malloc(x->nmeasr * sizeof(double)); + shadow_vals = CTA_Malloc(x->nmeasr_orig * sizeof(int)); + selected_shadow_vals = CTA_Malloc(x->nmeasr * sizeof(int)); + + // use the relation table + for (i=0; i < x->nmeasr_orig; i++) { shadow_vals[i] = i+1 ;} + + ierr = CTA_RelTable_ApplyVal(x->selectionReltab, shadow_vals, x->nmeasr_orig, + selected_shadow_vals,x->nmeasr, CTA_INTEGER); + + ierr = nc_inq_varid(ncid, "date_time", &varid); + if (ierr != CTA_OK) + {printf("Error: could not read date_time : %d error ; ncid %d \n",ierr,ncid); + *retval = -1; + return;} + ierr = nc_get_var_int(ncid, varid, &nc_time_values[0]); + + +// now fill the values array with the times. The times have to be re-computed to hours and +// the relation table is used to get only the selection. + + + /* NOTE: times are adjusted with a certain offset! */ + for (i=0; i < x->nmeasr; i++) { + idx = selected_shadow_vals[i] ; + selected_values[i] = 1.0*nc_time_values[(idx-1)*6+3] + + nc_time_values[(idx-1)*6+4]/60.0 + + nc_time_values[(idx-1)*6+5]/3600.0 + x->timeoffset ; + + + } + + // printf("selected_shadow_vals %d \n",selected_shadow_vals[0]); + if (IDEBUG>0) {printf("sobs_netcdf_gettimes: time values: count, first 3: %s %d %f %f %f \n", + x->database->dbname,x->nmeasr, selected_values[0],selected_values[1],selected_values[2]);} + + + /* Set values in vector and clean work-memory */ + *retval = CTA_Vector_SetVals( *hvec, selected_values, x->nmeasr, datatype); + + free(selected_values); + free(nc_time_values); + free(selected_shadow_vals); + free(shadow_vals); + + if (*retval!=CTA_OK) return; + + *retval = CTA_OK; +#else + printf("Error: CTAI_SObs_netcdf_GetTimes: COSTA is compiled without NETCDFsupport\n"); + *retval=CTA_NOT_IMPLEMENTED; +#endif + +}; + +void CTAI_SObs_netcdf_GetVariances( +// Get all the variances of the measurements in a netcdf-StochObserver + // INPUTS + CTAI_SObs_netcdf *x, // StochObserver from which the measurements are + // returned + int *returnvar, //return variance (CTA_TRUE) or std (CTA_FALSE) + // OUTPUTS + CTA_Vector *hvec, // COSTA-vector containing the values + + int *retval + ) +{ +#if HAVE_LIBNETCDF + int ierr, ncid, varid; + float *stdv; + float *nc_values; + + // first allocate space for temporary arrays + nc_values = CTA_Malloc(x->nmeasr_orig * sizeof(float)); + stdv=CTA_Malloc(x->nmeasr*sizeof(float)); + + // Look up all the standard deviations of the measurements in the observer: + + /* read values from database. First read entire vcd-array. Note: netcdf file should first be opened */ + + // Get the netcdf-id of the database + ncid = x->database->ncid; + // read the vcd vector. We know the size: nmeasr_orig. OMI-vcd vector contains one float per entry. + ierr = nc_inq_varid(ncid, "sigma_vcd_trop", &varid); + if (ierr != 0) ierr = nc_get_var_float(ncid, varid, &nc_values[0]); + if (ierr != 0) + {printf("Error: could not read sigma_vcd_trop \n"); + *retval = -1; return;} + +// now fill the stdv array. The relation table is used to get only the selection. + ierr = CTA_RelTable_ApplyVal(x->selectionReltab, nc_values, x->nmeasr_orig, + stdv, x->nmeasr, CTA_REAL); + if (ierr != CTA_OK) + {printf("Error: reltable_applyval in netcdf_getvariances \n"); + *retval = -1; return;} + + // Fill in the variances in the variable *hvec + + *retval = CTA_Vector_SetVals( *hvec, stdv, x->nmeasr, CTA_REAL); + if (*retval!=CTA_OK) return; + + *returnvar = CTA_FALSE; //it is the std, not the var ! + *retval = CTA_OK; +#else + printf("Error: CTAI_SObs_netcdf_GetVariances: COSTA is compiled without NETCDFsupport\n"); + *retval=CTA_NOT_IMPLEMENTED; +#endif + +}; + + +void CTAI_SObs_netcdf_GetRealisation( +// Calculate stochastic realizations for all the measurements in a netcdf-StochObserver + // INPUTS + CTAI_SObs_netcdf *x, // StochObserver from which the measurements are + // returned + // OUTPUTS + CTA_Vector *hvec, // COSTA-vector containing the realizations + int *retval + ) +{ +#if HAVE_LIBNETCDF + double *values, *stdv; + int i, ierr, returnvar; + CTA_Vector hvalues, hstdv; + + values=CTA_Malloc(x->nmeasr*sizeof(double)); + stdv=CTA_Malloc(x->nmeasr*sizeof(double)); + + ierr = CTA_Vector_Create(CTA_DEFAULT_VECTOR, x->nmeasr, CTA_DOUBLE, CTA_NULL, &hvalues); + ierr = CTA_Vector_Create(CTA_DEFAULT_VECTOR, x->nmeasr, CTA_DOUBLE, CTA_NULL, &hstdv); + + // Look up the expectations + CTAI_SObs_netcdf_GetVals( x, &hvalues,&ierr); + if (ierr != CTA_OK) return; + + // Look up the standard deviations + CTAI_SObs_netcdf_GetVariances( x, &hstdv,&returnvar, &ierr); + if (ierr!=CTA_OK) return; + + ierr = CTA_Vector_GetVals(hvalues, values, x->nmeasr, CTA_DOUBLE); + ierr = CTA_Vector_GetVals(hstdv, stdv, x->nmeasr, CTA_DOUBLE); + + // Calculate realizations of all the measurements + for (i=0;inmeasr;i++) + { + double r; + *retval = CTA_rand_n(&r); + if (*retval!=CTA_OK) return; + + if (IDEBUG>0) {printf("cta_sobs_getrealisations:%d: val: %f std: %f r: %f \n", i, values[i],stdv[i],r);} + + if (returnvar == CTA_TRUE) { + values[i] = values[i] + r*stdv[i];} + else { + values[i] = values[i] + r*stdv[i]*stdv[i]; + } + + + } + + // Fill in the results in the vector + *retval = CTA_Vector_SetVals( *hvec, values, x->nmeasr, CTA_DOUBLE); + free(values); + free(stdv); + ierr = CTA_Vector_Free(&hvalues); + ierr = CTA_Vector_Free(&hstdv); + + + if (*retval!=CTA_OK) return; + + *retval = CTA_OK; +#else + printf("Error: CTAI_SObs_netcdf_GetRealisation: COSTA is compiled without NETCDFsupport\n"); + *retval=CTA_NOT_IMPLEMENTED; +#endif +}; + +void CTAI_SObs_netcdf_GetCovMat( +// Get all the variances of the measurements in a netcdf-StochObserver + // INPUTS + CTAI_SObs_netcdf *x, // StochObserver from which the measurements are + // returned + // OUTPUTS + CTA_Matrix *hmat, /* Covariance matrix */ + int *retval + ) +{ +#if HAVE_LIBNETCDF + double *matval; + int i, ierr, ncid, varid; + float *stdv; + float *nc_values; + + // first allocate space for temporary arrays + nc_values = CTA_Malloc(x->nmeasr_orig * sizeof(float)); + stdv=CTA_Malloc(x->nmeasr*sizeof(float)); + + // Look up all the standard deviations of the measurements in the observer: + + /* read values from database. First read entire vcd-array. Note: netcdf file should first be opened */ + // Get the netcdf-id of the database + ncid = x->database->ncid; + // read the vcd vector. We know the size: nmeasr_orig. OMI-vcd vector contains one float per entry. + ierr = nc_inq_varid(ncid, "sigma_vcd_trop", &varid); + ierr = nc_get_var_float(ncid, varid, &nc_values[0]); + if (IDEBUG>0) {printf("variance has been read %f \n",nc_values[0]);} + + + if (ierr != CTA_OK) + {printf("Error: could not read sigma_vcd_trop \n"); + *retval = -1; return;} + +// now fill the stdv array. The relation table is used to get only the selection. + ierr = CTA_RelTable_ApplyVal(x->selectionReltab, nc_values, x->nmeasr_orig, + stdv, x->nmeasr, CTA_REAL); + if (ierr != CTA_OK) + {printf("Error: reltable_applyval in netcdf_getcovmat; nobs:%d of %d \n",x->nmeasr,x->nmeasr_orig); + *retval = -1; return;} + + // Fill in the variances in the variable matval (matrix values) + + if (IDEBUG>0) {printf("filling matrix of size %d...%f %f \n",x->nmeasr,stdv[0],stdv[1]);} + + matval=CTA_Malloc(x->nmeasr * x->nmeasr * sizeof(double)); + for (i=0;i < x->nmeasr * x->nmeasr ; i++){matval[i]=0.0;} + + for (i=0;i < x->nmeasr ; i++){ + matval[i * x->nmeasr + i] = stdv[i]*stdv[i]; + } + free(stdv); + free(nc_values); + + *retval = CTA_Matrix_SetVals( *hmat, matval, x->nmeasr, x->nmeasr, + CTA_DOUBLE); + free(matval); + + if (IDEBUG>0) {printf("getcovmat completed! \n");} + + if (*retval!=CTA_OK) return; + *retval=CTA_OK; + + *retval = CTA_OK; +return ; +#else + printf("Error: CTAI_SObs_netcdf_GetCovMat: COSTA is compiled without NETCDFsupport\n"); + *retval=CTA_NOT_IMPLEMENTED; +#endif +}; + + +void CTAI_SObs_netcdf_export( + CTAI_SObs_netcdf *x, + CTA_Handle *userdata, + int *retval + ) +{ +#if HAVE_LIBNETCDF + // The user-data must be 0, 1 or 2 items: + // the output file (default: stdout) + // and a format-string (default: empty). + // + // Look up the output file + FILE *file; + int i, ierr; + CTA_Vector hvec_time, hvec_vals; + double dval1,dval2; + + if (CTA_Handle_Check(*userdata,CTA_FILE)==CTA_OK) { + *retval = CTA_File_Get(*userdata,&file); + + if (CTA_FLUSH_ALWAYS) CTA_Flush(); + + + // Print the contents of the observer. + fprintf(file,"%% This Observer has the following properties: \n"); + + if (x->nmeasr == 0 ) { + fprintf(file,"This is observer of an EMPTY database\n");} + else { + + + fprintf(file," dbasename='%s';\n",x->database->dbname); + fprintf(file," nusers=%d;\n",x->database->nusers); + fprintf(file," nmeasr_orig; %d nmeasr=%d;\n\n",x->nmeasr_orig, x->nmeasr); + + ierr = CTA_Vector_Create(CTA_DEFAULT_VECTOR, x->nmeasr, CTA_DOUBLE, CTA_NULL,&hvec_time); + ierr = CTA_Vector_Create(CTA_DEFAULT_VECTOR, x->nmeasr, CTA_DOUBLE, CTA_NULL,&hvec_vals); + CTAI_SObs_netcdf_GetTimes(x, &hvec_time, &ierr); + CTAI_SObs_netcdf_GetVals(x, &hvec_vals, &ierr); + + for (i=1; i <= x->nmeasr; i++) { + + CTA_Vector_GetVal(hvec_time, i, &dval1, CTA_DOUBLE); + CTA_Vector_GetVal(hvec_vals, i, &dval2, CTA_DOUBLE); + + fprintf(file," nr:%d time: %f value: %f \n",i,dval1, dval2); + } + CTA_Vector_Free(&hvec_time); + CTA_Vector_Free(&hvec_vals); + + } + }; +#else + printf("Error: CTAI_SObs_netcdf_export: COSTA is compiled without NETCDFsupport\n"); + *retval=CTA_NOT_IMPLEMENTED; +#endif + +}; + +void CTAI_SObs_netcdf_Free( + CTAI_SObs_netcdf *x, + int *retval + ) +{ +#if HAVE_LIBNETCDF + int ierr; + /* We only are allowed to close the database if all users (including all selectioned observers) are gone! */ + // if (x->ncid != 0) { + // if ((ierr = nc_close(x->ncid))){ + // printf("CTA_Sobs_netcdf_Free: cannot close netCDF-file: %s\n", + // nc_strerror(retval)); + // } + // } + x->database->nusers--; + // printf("cta_sobs_free (netcdf): now %d users remaining of database %s \n",x->database->nusers, x->database->dbname); + + ierr = CTA_RelTable_Free(&(x->selectionReltab)); + if (ierr !=CTA_OK) + {printf("CTA_Sobs_netcdf_Free: cannot close relation table \n");} + + if (x->database->nusers == 0) { + if ((ierr = nc_close(x->database->ncid))){ + printf("CTA_Sobs_netcdf_Free: cannot close netCDF-file: %s\n", + nc_strerror(ierr)); + } + free(x->database->dbname); + free(x->database); + + } + + *retval = CTA_OK; +#else + printf("Error: CTAI_SObs_netcdf_Free: COSTA is compiled without NETCDFsupport\n"); + *retval=CTA_NOT_IMPLEMENTED; +#endif +} + + + +/* + +*/ +void CTA_SObs_netcdf_initialise(CTA_SObsClass *hsobscl) +{ + CTA_Intf hintf=0; + CTA_Func h_func[CTA_SOBS_NUMFUNC]; + CTA_ObsDescrClass descrcl; + int i; + + // The vector h_func is filled with COSTA-function handles of the + // implementations in this file. + for (i=0;i +#include +#include "cta_mem.h" +#include "cta_defaults.h" +#include "cta_flush.h" +#include "cta_util_statistics.h" +#include "cta_util_sqlite3.h" +#include "cta_file.h" +#include "cta_string.h" +#include "cta_obsdescr_sqlite3.h" +#include "cta_sobs_sqlite3.h" +#include "cta_errors.h" +#include "cta_message.h" + +#define IDEBUG (0) + +#define CLASSNAME "CTA_SObs_sqlite3" +/* + +*/ +void CTA_SObs_sqlite3_initialise(CTA_SObsClass *hsobscl) +{ + CTA_Intf hintf=0; + CTA_Func h_func[CTA_SOBS_NUMFUNC]; + CTA_ObsDescrClass descrcl; + int i; + + // The vector h_func is filled with COSTA-function handles of the + // implementations in this file. + for (i=0;idb = db; + database->name = dbname; + database->nusers = 1; + + x->database = database; + x->nstations = nstations; + x->stations = stations; + x->nmeasr = nmeasr; + x->condition = condition; + *retval=CTA_OK; +}; + + +void CTAI_SObs_sqlite3_CreateSelString( +/* + Allocate the memory which is neccesary to store the data necessary for a + sqlite3-observer +*/ + // INPUTS + CTAI_SObs_sqlite3 *obsin,// The sqlite3-observer of which a selection + // is to be made + CTA_Handle userdata, // User data: condition + // OUTPUTS + CTAI_SObs_sqlite3 *obsout, // The sqlite3-observer which is a selection + // of observer obsin + int *retval) // Error code +{ + int len; + char *condition; + + // Get the condition + // Allocate a name-string + *retval = CTA_String_GetLength(userdata, &len); + if (*retval!=CTA_OK) return; + + // Get the condition + condition = CTA_Malloc((len+1)*sizeof(char)); + *retval = CTA_String_Get(userdata, condition); + if (*retval!=CTA_OK) return; + + // Link the database also to this observer and keep track of the number of + // observers using this database + obsout->database = obsin->database; + obsout->database->nusers++; + + // Combine the input-condition and the condition of the input observer. + if (strcmp(obsin->condition,"")==0) + { + obsout->condition = condition; + } + else + { + obsout->condition = CTA_Malloc(sizeof(char)* + ( strlen(condition) + + strlen(obsin->condition) + + strlen(" ( ) AND ( ) ") + ) ); + sprintf(obsout->condition,"(%s) AND (%s)",condition,obsin->condition); + if (IDEBUG) printf("CTAI_SObs_sqlite3_CreateSel: DEBUG condition=%s\n", + obsout->condition); + free(condition); + } + + // Count the stations in this observer + *retval = CTAI_util_sqlite3_select_values( + &(obsout->nstations), 1, CTA_INTEGER, obsout->database->db, + "count(distinct stations.station_id)",obsout->condition); + if (*retval!=CTA_OK) return; + + // Save the stations_ids of this observer in array stations + obsout->stations=CTA_Malloc((obsout->nstations)*sizeof(int)); + *retval = CTAI_util_sqlite3_select_values( + obsout->stations, obsout->nstations, CTA_INTEGER, obsout->database->db, + "distinct stations.station_id",obsout->condition); + if (*retval!=CTA_OK) return; + + // Count the measurements in this observer + *retval = CTAI_util_sqlite3_select_values( + &(obsout->nmeasr), 1, CTA_INTEGER, obsout->database->db, + "count(stations.station_id)",obsout->condition); + + if (*retval!=CTA_OK) return; + + *retval=CTA_OK; +}; + + +void CTAI_SObs_sqlite3_CreateSelRelTab( CTAI_SObs_sqlite3 *obsin, CTA_RelTable reltab, + CTAI_SObs_sqlite3 *obsout, int *retval) +{ + int nmeasr; + int isel, nsel; + int *rowIds, *selRowIds; + CTA_Handle hdum=0; + CTA_Vector vRowIds, vSelRowIds; + CTA_String sSel; + char sNum[10]; + char *sel; + + /* Get number of observations */ + CTAI_SObs_sqlite3_Count(obsin, &nmeasr, retval); + if (*retval!=CTA_OK) return; + + /* Create a vector with the rowID's */ + rowIds=CTA_Malloc(nmeasr*sizeof(int)); + *retval = CTAI_util_sqlite3_select_values( + rowIds, nmeasr, CTA_INTEGER, obsin->database->db, + "data._ROWID_", obsin->condition); + if (*retval!=CTA_OK) return; + + *retval=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nmeasr, CTA_INTEGER, hdum, &vRowIds); + if (*retval!=CTA_OK) return; + *retval=CTA_Vector_SetVals(vRowIds,rowIds,nmeasr,CTA_INTEGER); + if (*retval!=CTA_OK) return; + free(rowIds); + + /* Create a vector with the selected rowID's */ + CTA_RelTable_Count(reltab,&nsel); + selRowIds=CTA_Malloc(nsel*sizeof(int)); + *retval=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nsel, CTA_INTEGER, hdum, &vSelRowIds); + if (*retval!=CTA_OK) return; + + /* Apply the relation table */ + *retval=CTAI_RelTable_Apply(reltab, vRowIds, vSelRowIds, CTA_FALSE); + if (*retval!=CTA_OK){ + printf("Error in CTAI_SObs_sqlite3_CreateSelRelTab: Cannot apply selection relation table \n"); + return; + } + + /* Create a selection criterion for */ + *retval=CTA_Vector_GetVals(vSelRowIds,selRowIds,nsel,CTA_INTEGER); + if (*retval!=CTA_OK) return; + + sel=CTA_Malloc((2+nsel*(4+10+16)+2+1)*sizeof(char)); + strcpy(sel,"( "); //2 + for (isel=0;isel0) { + strcat(sel," OR "); //4 + } + sprintf(sNum,"%d",selRowIds[isel]); //10 + strcat(sel," data._ROWID_ = "); //16 + strcat(sel,sNum); + } + strcat(sel," )"); //2 + + printf("========================================\n"); + printf("%s\n",sel); + printf("========================================\n"); + + /* Create the actual selection */ + CTA_String_Create(&sSel); + CTA_String_Set(sSel,sel); + CTAI_SObs_sqlite3_CreateSelString(obsin, sSel, obsout, retval); + CTA_String_Free(&sSel); + if (*retval !=CTA_OK){ + printf("CTAI_SObs_sqlite3_CreateSelRelTab: Error creating selection '%s' \n",sel); + } + + /* Free work variables */ + free(selRowIds); + free(sel); +}; + + + + + + + + + + + + +void CTAI_SObs_sqlite3_CreateSel( +/* + Allocate the memory which is neccesary to store the data necessary for a + sqlite3-observer +*/ + // INPUTS + CTAI_SObs_sqlite3 *obsin,// The sqlite3-observer of which a selection + // is to be made + CTA_Handle *userdata_in, // User data: condition + // OUTPUTS + CTAI_SObs_sqlite3 *obsout, // The sqlite3-observer which is a selection + // of observer obsin + int *retval) // Error code +{ + CTA_Handle userdata=*userdata_in; + CTA_Datatype datatype; + + CTA_Handle_GetDatatype(userdata, &datatype); + if (datatype==CTA_STRING){ + CTAI_SObs_sqlite3_CreateSelString(obsin,userdata,obsout,retval); + } else if (datatype==CTA_RELTABLE){ + CTAI_SObs_sqlite3_CreateSelRelTab(obsin,userdata,obsout,retval); + } else { + printf("Error in CTAI_SObs_sqlite3_CreateSel: datatype (%d) of selection is not supported\n",datatype); + *retval=CTA_INPUT_OBJECT_NOT_SUPPORTED; + } +}; + + +void CTAI_SObs_sqlite3_Count( +// Return the number of measurements in the observer + // INPUTS + CTAI_SObs_sqlite3 *x, // The StochObesrver of which the number of measurements is + // returned + // OUTPUTS + int *nmeasr, + int *retval + ) +{ + /* Local variables */ + *nmeasr = x->nmeasr; + *retval = CTA_OK; +}; + + +void CTAI_SObs_sqlite3_GetVals( +// Get all the values from a sqlite3-StochObserver + // INPUTS + CTAI_SObs_sqlite3 *x, // StochObserver from which the measurements are + // returned + // OUTPUTS + CTA_Vector *hvec, // COSTA-vector containing the values + int *retval + ) +{ + void * values; + int size; + CTA_Datatype datatype; + + /* Get datatype of vector */ + *retval=CTA_Vector_GetDatatype(*hvec,&datatype); + if (*retval!=CTA_OK) return; + + /* allocate space for holding result */ + *retval = CTA_SizeOf(datatype,&size); + if (*retval!=CTA_OK) return; + + values=CTA_Malloc(x->nmeasr*size); + + /* read values from database */ + *retval = CTAI_util_sqlite3_select_values( + values, x->nmeasr, datatype, x->database->db, + "value", x->condition); + if (*retval!=CTA_OK) return; + + /* Set values in vector en clean work-memory */ + *retval = CTA_Vector_SetVals( *hvec, values, x->nmeasr, datatype); + free(values); + if (*retval!=CTA_OK) return; + + + + *retval = CTA_OK; +}; + +void CTAI_SObs_sqlite3_GetTimes( +// Get all the times from a sqlite3-StochObserver + // INPUTS + CTAI_SObs_sqlite3 *x, // StochObserver from which the measurements are + // returned + // OUTPUTS + CTA_Vector *hvec, // COSTA-vector containing the values + int *retval + ) +{ + void * values; + int size; + CTA_Datatype datatype; + + /* Get datatype of vector */ + *retval=CTA_Vector_GetDatatype(*hvec,&datatype); + if (*retval!=CTA_OK) return; + + /* allocate space for holding result */ + *retval = CTA_SizeOf(datatype,&size); + if (*retval!=CTA_OK) return; + + values=CTA_Malloc(x->nmeasr*size); + + /* read values from database */ + *retval = CTAI_util_sqlite3_select_values( + values, x->nmeasr, datatype, x->database->db, + "time", x->condition); + if (*retval!=CTA_OK) return; + + /* Set values in vector en clean work-memory */ + *retval = CTA_Vector_SetVals( *hvec, values, x->nmeasr, datatype); + free(values); + if (*retval!=CTA_OK) return; + + *retval = CTA_OK; +}; + +void CTAI_SObs_sqlite3_GetVariances( +// Get all the variances of the measurements in a sqlite3-StochObserver + // INPUTS + CTAI_SObs_sqlite3 *x, // StochObserver from which the measurements are + // returned + int *returnvar, //return variance (CTA_TRUE) or std (CTA_FALSE) + // OUTPUTS + CTA_Vector *hvec, // COSTA-vector containing the values + int *retval + ) +{ + double *var; + float *rvar; + int i; + int size; + CTA_Datatype datatype; + + /* Get datatype of vector */ + *retval=CTA_Vector_GetDatatype(*hvec,&datatype); + if (*retval!=CTA_OK) return; + + /* allocate space for holding result */ + *retval = CTA_SizeOf(datatype,&size); + if (*retval!=CTA_OK) return; + + var=CTA_Malloc(x->nmeasr*size); + // Look up all the standard deviations of the measurements in the observer + *retval = CTAI_util_sqlite3_select_values( + var, x->nmeasr, datatype, x->database->db, + "standarddeviation",x->condition); + if (*retval!=CTA_OK) return; + + // Calculate the variances (square of the standard deviations) + if (*returnvar==CTA_TRUE){ + if (datatype==CTA_REAL) { + rvar=(float*) var; + for (i=0;inmeasr;i++) + { rvar[i] = rvar[i]*rvar[i]; }; + }else + { + for (i=0;inmeasr;i++) + { var[i] = var[i]*var[i]; }; + } + } + + // Fill in the variances in the vector + *retval = CTA_Vector_SetVals( *hvec, var, x->nmeasr, datatype); + free(var); + if (*retval!=CTA_OK) return; + *retval=CTA_OK; + +}; + + +void CTAI_SObs_sqlite3_GetRealisation( +// Calculate stochastic realizations for all the measurements in a sqlite3-StochObserver + // INPUTS + CTAI_SObs_sqlite3 *x, // StochObserver from which the measurements are + // returned + // OUTPUTS + CTA_Vector *hvec, // COSTA-vector containing the realizations + int *retval + ) +{ + double *values, *stdv; + int i; + + values=CTA_Malloc(x->nmeasr*sizeof(double)); + stdv=CTA_Malloc(x->nmeasr*sizeof(double)); + // Look up the expectations + *retval = CTAI_util_sqlite3_select_values( + values, x->nmeasr, CTA_DOUBLE, x->database->db, + "value",x->condition); + if (*retval!=CTA_OK) return; + + // Look up the standard deviations + *retval = CTAI_util_sqlite3_select_values( + stdv, x->nmeasr, CTA_DOUBLE, x->database->db, + "standarddeviation",x->condition); + if (*retval!=CTA_OK) return; + + // Calculate realizations of all the measurements + for (i=0;inmeasr;i++) + { + double r; + *retval = CTA_rand_n(&r); + if (*retval!=CTA_OK) return; + values[i] = values[i] + r*stdv[i]; + } + + // Fill in the results in the vector + *retval = CTA_Vector_SetVals( *hvec, values, x->nmeasr, CTA_DOUBLE); + free(values); + free(stdv); + + if (*retval!=CTA_OK) return; + + *retval=CTA_OK; +}; + +void CTAI_SObs_sqlite3_GetCovMat( +// Get all the variances of the measurements in a sqlite3-StochObserver + // INPUTS + CTAI_SObs_sqlite3 *x, // StochObserver from which the measurements are + // returned + // OUTPUTS + CTA_Matrix *hmat, /* Covariance matrix */ + int *retval + ) +{ + double *matval; + int i; + double *stdv; + stdv=CTA_Malloc(x->nmeasr*sizeof(double)); + + // Look up all the standard deviations of the measurements in the observer + *retval = CTAI_util_sqlite3_select_values( + stdv, x->nmeasr, CTA_DOUBLE, x->database->db, + "standarddeviation",x->condition); + if (*retval!=CTA_OK) return; + + // Fill in the variances in the variable matval (matrix values) + + matval=CTA_Malloc(x->nmeasr * x->nmeasr * sizeof(double)); + for (i=0;i < x->nmeasr * x->nmeasr ; i++){matval[i]=0.0;} + + for (i=0;i < x->nmeasr ; i++){ + matval[i * x->nmeasr + i] = stdv[i]*stdv[i]; + } + free(stdv); + *retval = CTA_Matrix_SetVals( *hmat, matval, x->nmeasr, x->nmeasr, + CTA_DOUBLE); + free(matval); + if (*retval!=CTA_OK) return; + *retval=CTA_OK; + +}; + + + + +static int CTAI_SObs_printen(void *vfile, int argc, char **argv, char **azColName){ + int i; + FILE * file = *((FILE **) vfile); + for(i=0; inmeasr == 0 ) { + fprintf(file,"This is observer of an EMPTY database\n");} + else { + + + fprintf(file," dbasename='%s';\n",x->database->name); + fprintf(file," nusers=%d;\n",x->database->nusers); + fprintf(file," condition='%s';\n",x->condition); + fprintf(file," stations="); + + for (i=0; i < x->nstations; i++) + { fprintf(file,"%d ",x->stations[i]); } + fprintf(file,"\n"); + fprintf(file," nmeasr=%d;\n\n",x->nmeasr); + + // Optionally, print also the values, using the appropriate SQL-command + //if (strcmp(format,"values also")==0) + { + char *command; + char *zErrMsg; + + int rc; + // Set the constants necessary + const char *whole_table = // The expression which + // produces the complete obs-table + "stations inner join data on stations.station_id = data.station_id"; + + command=CTA_Malloc(( + strlen("select * from where") + + strlen(whole_table) + + strlen(x->condition))*sizeof(char)); + + if (strcmp(x->condition,"")==0) + { + sprintf(command,"select * from %s",whole_table); + } + else + { + sprintf(command,"select * from %s where %s", + whole_table,x->condition); + } + + zErrMsg = NULL; + + rc = sqlite3_exec(x->database->db, command, CTAI_SObs_printen, + (void *) &file, &zErrMsg); + // if (zErrMsg!=NULL){ + // printf("Error message from sqlite3: %s\n", &zErrMsg); + // } + free(command); + if( rc != SQLITE_OK ) {*retval=CTA_INVALID_COMMAND;return;}; + + } + } + + if (CTA_FLUSH_ALWAYS) CTA_Flush(); + + *retval=CTA_OK; + } + else { + *retval=CTA_FORMAT_NOT_SUPPORTED; + } +}; + +void CTAI_SObs_sqlite3_Free( + CTAI_SObs_sqlite3 *x, + int *retval + ) +{ + x->database->nusers--; + if (x->database->nusers==0) + { + sqlite3_close(x->database->db); + free(x->database->name); + free(x->database); + } + free(x->stations); + free(x->condition); + + *retval = CTA_OK; +} diff --git a/costa/native/cta/src/cta_sobs_user.c b/costa/native/cta/src/cta_sobs_user.c new file mode 100644 index 000000000..b7e022d65 --- /dev/null +++ b/costa/native/cta/src/cta_sobs_user.c @@ -0,0 +1,75 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/trunk/costa/src/cta/cta_obsdescr_netcdf.c $ +$Revision: 671 $, $Date: 2008-10-07 14:49:42 +0200 (Tue, 07 Oct 2008) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include +#include +#include "cta.h" + + +void CTA_ObsDescr_user_initialize(CTA_ObsDescrClass *hobsdescrcl); + +void CTA_SObs_user_initialize(CTA_SObsClass *hsobscl) +{ + CTA_Func h_func[CTA_SOBS_NUMFUNC]; + char *libraryName; + char *functionName; + CTA_ObsDescrClass hobsdescrcl; + int i; + + // The vector h_func is filled with function read from the default user dynamic library + libraryName=userDefaultDynamicLibrary; + for (i=0;i +#include +#include "cta_mem.h" +#include "f_cta_utils.h" +#include "cta_errors.h" +#include "cta_vector.h" +#include "cta_string.h" +#include "cta_errors.h" +#include "cta_pack.h" +#include "cta_message.h" + +#define CTA_STRING_CREATE_F77 F77_CALL(cta_string_create,CTA_STRING_CREATE) +#define CTA_STRING_FREE_F77 F77_CALL(cta_string_free,CTA_STRING_FREE) +#define CTA_STRING_GETLENGTH_F77 F77_CALL(cta_string_getlength,CTA_STRING_GETLENGTH) +#define CTA_STRING_SET_F77 F77_CALL(cta_string_set,CTA_STRING_SET) +#define CTA_STRING_GET_F77 F77_CALL(cta_string_get,CTA_STRING_GET) +#define CTA_STRING_GETVALUE_F77 F77_CALL(cta_string_getvalue,CTA_STRING_GETVALUE) +#define CTA_STRING_CONC_F77 F77_CALL(cta_string_conc,CTA_STRING_CONC) +#define CTA_STRING_DUPLICATE_F77 F77_CALL(cta_string_duplicate,CTA_STRING_DUPLICATE) +#define CTA_STRING_EXPORT_F77 F77_CALL(cta_string_export,CTA_STRING_EXPORT) +#define CTA_STRING_IMPORT_F77 F77_CALL(cta_string_import,CTA_STRING_IMPORT) + +#define CLASSNAME "CTA_String" + +/* Struct holding all data associated to an COSTA String */ + +typedef struct { +int len; +char *str; +} CTAI_String; + +int CTA_String_Create(CTA_String *hstring){ + + CTAI_String *string; + int retval; + + /* allocate memory for new string object */ + string=CTA_Malloc(sizeof(CTAI_String)); + string->len=0; + string->str=CTA_Malloc(sizeof(char)); + string->str[0]=(char)0; + + /* Allocate new handle and return eror when unsuccesfull */ + retval=CTA_Handle_Create("string",CTA_STRING,string,hstring); + return retval; +} + +#undef METHOD +#define METHOD "Free" +int CTA_String_Free(CTA_String *hstring) +{ + CTAI_String *string; + int retval; + + retval=CTA_Handle_Check((CTA_Handle) *hstring,CTA_STRING); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_string handle"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) *hstring,(void**) &string); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + free(string->str); + free(string); + + retval=CTA_Handle_Free(hstring); + return retval; +} + +#undef METHOD +#define METHOD "Set" +int CTA_String_Set(CTA_String hstring, const char* str) +{ +// Deallocate the current contents of a cta-string, then +// allocate a new string and fill it with the contents in the input + CTAI_String *string; + int retval; + int len; + + retval=CTA_Handle_Check((CTA_Handle) hstring,CTA_STRING); + + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_string handle"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) hstring,(void**) &string); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + len=(int) strlen(str); + string->len=len; + free(string->str); + string->str=CTA_Malloc((len+1)*sizeof(char)); + + strncpy(string->str,str,len+1); + + return CTA_OK; +} + +#undef METHOD +#define METHOD "Get" +int CTA_String_Get(CTA_String hstring, char *str) +{ + CTAI_String *string; + int retval; + + retval=CTA_Handle_Check((CTA_Handle) hstring,CTA_STRING); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_string handle"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) hstring,(void**) &string); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + strncpy(str,string->str,string->len+1); + + return CTA_OK; +} + +int CTA_String_Equals_Char(CTA_String hstring, const char *str0) +{ + char *str; + int retval, len; + + retval = CTA_String_GetLength(hstring,&len); + str=CTA_Malloc((len+1)*sizeof(char)); + retval = CTA_String_Get(hstring, str); + retval= CTA_FALSE; + if (strcmp(str,str0)==0) retval = CTA_TRUE; + return retval; +} + + +int CTA_Strings_Equal(CTA_String hstring1, CTA_String hstring2) +{ + char *str1, *str2; + int retval, len1, len2; + + retval = CTA_String_GetLength(hstring1,&len1); + str1=CTA_Malloc((len1+1)*sizeof(char)); + retval = CTA_String_Get(hstring1, str1); + retval = CTA_String_GetLength(hstring2,&len2); + str2=CTA_Malloc((len2+1)*sizeof(char)); + retval = CTA_String_Get(hstring2, str2); + retval= CTA_FALSE; + if (strcmp(str1,str2)==0) retval = CTA_TRUE; + return retval; +} + + + + +#undef METHOD +#define METHOD "GetValue" +int CTA_String_GetValue(CTA_String hstring, void *value, CTA_Datatype datatype) +{ + CTAI_String *string; + int retval; + + retval=CTA_Handle_Check((CTA_Handle) hstring, CTA_STRING); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_time handle"); + return retval; + } + + CTA_Handle_GetData((CTA_Handle) hstring,(void**) &string); + switch (datatype) { + case CTA_INTEGER: + *(int*)value = atoi(string->str); + return CTA_OK; + case CTA_REAL: + *(float*)value = (float)atof(string->str); + return CTA_OK; + case CTA_DOUBLE: + *(double*)value = atof(string->str); + return CTA_OK; + case CTA_STRING: + return CTA_String_Get(hstring, (char*)value); + } + return CTA_INCOMPATIBLE_HANDLE; +} + +#undef METHOD +#define METHOD "Conc" +int CTA_String_Conc(CTA_String istring, CTA_String xstring) +{ + int retval, len1, len2, newlen; + char *hlpstr; + CTAI_String *string1; + CTAI_String *string2; + + retval=CTA_Handle_Check((CTA_Handle) istring, CTA_STRING); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_string handle"); + return retval; + } + retval=CTA_Handle_Check((CTA_Handle) xstring, CTA_STRING); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_string handle"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) istring, (void**) &string1); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) xstring, (void**) &string2); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + len1=string1->len; + len2=string2->len; + newlen=len1+len2; + + hlpstr=CTA_Malloc((newlen+1)*sizeof(char)); + strncpy(hlpstr, string1->str,len1); + strncpy(hlpstr+len1,string2->str,len2+1); + + string1->len=newlen; + free(string1->str); + string1->str=hlpstr; + + return CTA_OK; +} + +#undef METHOD +#define METHOD "GetLength" +int CTA_String_GetLength(CTA_String hstring, int *len) +{ + CTAI_String *string; + int retval; + + retval=CTA_Handle_Check((CTA_Handle) hstring,CTA_STRING); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_string handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) hstring,(void**) &string); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + *len = string->len; + + return CTA_OK; +} + +/** \brief Get a pointer to the contents of the string (INTERNAL USE) + * + * \param hstring I handle of the string + * \return Pointer to the string contents + */ +char *CTAI_String_GetPtr(CTA_String hstring) +{ + CTAI_String *str; + CTA_Handle_GetData((CTA_Handle) hstring,(void**) &str); + return str->str; +} + +/** \brief Allocate a string, created from a COSTA string (INTERNAL USE). + * + * \param hstr I COSTA string + * \return C string allocated by this function + */ +char *CTAI_String_Allocate(CTA_String hstr) { + int len; + char* str; + + str=NULL; + if (CTA_OK == CTA_String_GetLength(hstr, &len)){ + str = (char *)CTA_Malloc((1 + len) * sizeof(char)); + CTA_String_Get(hstr, str); + } + return str; +} + +/** \brief Create a duplication of a COSTA string + * + * \param hfrom I handle of string to copy + * \param hto O handle of created string + * \return CTA_OK if successful + */ +#undef METHOD +#define METHOD "Duplicate" +int CTA_String_Duplicate(CTA_String hfrom, CTA_String *hto) { + int retval; + + retval = CTA_String_Create(hto); + if (retval != CTA_OK) { + CTA_WRITE_ERROR("Cannot create string"); + return retval; + } + return CTA_String_Set(*hto, CTAI_String_GetPtr(hfrom)); +} + +#undef METHOD +#define METHOD "Export" +int CTA_String_Export(CTA_String hstring, CTA_Handle usrdata) +{ + int retval; + BOOL packout; + + packout = (CTA_Handle_Check(usrdata,CTA_PACK)==CTA_OK); + if (packout) { + CTAI_String *str; + CTA_Handle_GetData((CTA_Handle) hstring,(void**) &str); + + /* pack length and string itself */ + retval=CTA_Pack_Add(usrdata,&str->len,sizeof(int)); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in CTA_Pack_Add "); + return retval; + } + + retval=CTA_Pack_Add(usrdata,str->str,(str->len+1)*sizeof(char)); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in CTA_Pack_Add "); + return retval; + } + + } else { + return CTA_FORMAT_NOT_SUPPORTED; + } + return CTA_OK; +} + +#undef METHOD +#define METHOD "Import" +int CTA_String_Import(CTA_String hstring, CTA_Handle usrdata) +{ + int retval; + BOOL packout; + + packout = (CTA_Handle_Check(usrdata,CTA_PACK)==CTA_OK); + if (packout) { + CTAI_String *str; + CTA_Handle_GetData((CTA_Handle) hstring,(void**) &str); + + /* unpack length and string itself */ + retval=CTA_Pack_Get(usrdata,&(str->len),sizeof(int)); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in CTA_Pack_Get "); + return retval; + } + str->str=realloc(str->str,sizeof(int)*(str->len+1)); + retval=CTA_Pack_Get(usrdata,str->str,(str->len+1)*sizeof(char)); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in CTA_Pack_Get "); + return retval; + } + + } else { + return CTA_FORMAT_NOT_SUPPORTED; + } + return CTA_OK; +} + + +/* Interfacing with Fortran */ + +CTAEXPORT void CTA_STRING_CREATE_F77(int *hstring, int *ierr){ + *ierr=CTA_String_Create((CTA_String*) hstring); +} + +CTAEXPORT void CTA_STRING_FREE_F77(int *hstring, int *ierr){ + *ierr=CTA_String_Free((CTA_String*) hstring); +} + +CTAEXPORT void CTA_STRING_GETLENGTH_F77(int *hstring,int *len, int *ierr){ + *ierr=CTA_String_GetLength((CTA_String) *hstring, len); +} + +CTAEXPORT void CTA_STRING_SET_F77(int *hstring,char *str, int *ierr, int len_str){ + char *c_str; + + /* create a c-string equivalent to name */ + c_str=CTA_Malloc((len_str+1)*sizeof(char)); + CTA_fstr2cstr(str,c_str,len_str); + + *ierr=CTA_String_Set((CTA_String) *hstring,c_str); + + free(c_str); +} + + +CTAEXPORT void CTA_STRING_GET_F77(int *hstring,char *str, int *ierr, int len_str){ + char *c_str; + + c_str=CTA_Malloc((len_str+1)*sizeof(char)); + *ierr=CTA_String_Get((CTA_String) *hstring,c_str); + if (*ierr!=CTA_OK) return; + *ierr=CTA_cstr2fstr(c_str,str, len_str); + free(c_str); + +} + +CTAEXPORT void CTA_STRING_GETVALUE_F77(int *hstring,void *value,int *datatype, int *ierr){ + *ierr=CTA_String_GetValue((CTA_String) *hstring, value, *datatype); +} + +CTAEXPORT void CTA_STRING_CONC_F77(int *istring, int *xstring, int *ierr){ + *ierr=CTA_String_Conc((CTA_String) *istring, (CTA_String) *xstring); +} + + +CTAEXPORT void CTA_STRING_EXPORT_F77(int *hstring, int *usrdata, int *ierr){ + *ierr=CTA_String_Export((CTA_String) *hstring, (CTA_Handle) *usrdata); +} + +CTAEXPORT void CTA_STRING_IMPORT_F77(int *hstring, int *usrdata, int *ierr){ + *ierr=CTA_String_Import((CTA_String) *hstring, (CTA_Handle) *usrdata); +} + +CTAEXPORT void CTA_STRING_DUPLICATE_F77(int *hfrom, int *hto, int *ierr){ + *ierr=CTA_String_Duplicate((CTA_String) *hfrom, (CTA_Handle*) hto); +} + + diff --git a/costa/native/cta/src/cta_time.c b/costa/native/cta/src/cta_time.c new file mode 100644 index 000000000..80362174a --- /dev/null +++ b/costa/native/cta/src/cta_time.c @@ -0,0 +1,634 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_time.c $ +$Revision: 2751 $, $Date: 2011-09-09 08:58:46 +0200 (Fri, 09 Sep 2011) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include +#include +#include "cta_mem.h" +#include "f_cta_utils.h" +#include "ctai.h" +#include "cta_time.h" +#include "cta_pack.h" +#include "cta_errors.h" +#include "cta_file.h" +#include "cta_message.h" + +#define CTA_TIME_CREATE_F77 F77_CALL(cta_time_create,CTA_TIME_CREATE) +#define CTA_TIME_FREE_F77 F77_CALL(cta_time_free,CTA_TIME_FREE) +#define CTA_TIME_SETSPAN_F77 F77_CALL(cta_time_setspan,CTA_TIME_SETSPAN) +#define CTA_TIME_GETSPAN_F77 F77_CALL(cta_time_getspan,CTA_TIME_GETSPAN) +#define CTA_TIME_SETSTEP_F77 F77_CALL(cta_time_setstep,CTA_TIME_SETSTEP) +#define CTA_TIME_GETSTEP_F77 F77_CALL(cta_time_getstep,CTA_TIME_GETSTEP) +#define CTA_TIME_GETSTEP_F77 F77_CALL(cta_time_getstep,CTA_TIME_GETSTEP) +#define CTA_TIME_INSPAN_F77 F77_CALL(cta_time_inspan,CTA_TIME_INSPAN) +#define CTA_TIME_ISSTEP_F77 F77_CALL(cta_time_isstep,CTA_TIME_ISSTEP) +#define CTA_TIME_COPY_F77 F77_CALL(cta_time_copy,CTA_TIME_COPY) +#define CTA_TIME_COUNTSTEPS_F77 F77_CALL(cta_time_countsteps,CTA_TIME_COUNTSTEPS) +#define CTA_TIME_GETTIMESTEP_F77 F77_CALL(cta_time_gettimestep,CTA_TIME_GETTIMESTEP) +#define CTA_TIME_EXPORT_F77 F77_CALL(cta_time_export,CTA_TIME_EXPORT) +#define CTA_TIME_IMPORT_F77 F77_CALL(cta_time_import,CTA_TIME_IMPORT) +#define CTA_TIME_ISSPAN_F77 F77_CALL(cta_time_isspan,CTA_TIME_ISSPAN) + + +#define CLASSNAME "CTA_Time" + +/* Struct holding all data associated to an COSTA time span */ + +typedef struct { +BOOL isspan; +double t1; +double t2; +double step; +} CTAI_Time; + + +#undef METHOD +#define METHOD "Create" +int CTA_Time_Create(CTA_Time *htime){ + + CTAI_Time *time; + int retval; + + /* allocate memory for new time object */ + time=CTA_Malloc(sizeof(CTAI_Time)); + time->isspan=FALSE; + time->t1=0.0; + time->t2=0.0; + time->step=1.0; + + /* Allocate new handle and return error when unsuccesfull */ + retval=CTA_Handle_Create("time",CTA_TIME,time,htime); + if (retval) { + CTA_WRITE_ERROR("Cannot create time handle"); + return retval; + } + return CTA_OK; +} + +#undef METHOD +#define METHOD "Free" +int CTA_Time_Free(CTA_Time *htime){ + + CTAI_Time *time; + int retval; + + if (*htime==CTA_NULL) return CTA_OK; + + retval=CTA_Handle_Check((CTA_Handle) *htime,CTA_TIME); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_time handle"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) *htime,(void**) &time); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + free(time); + retval=CTA_Handle_Free(htime); + return retval; +} + +#undef METHOD +#define METHOD "IsSpan" +int CTA_Time_IsSpan(CTA_Time htime, int *isspan){ + + CTAI_Time *time; + int retval; + + retval=CTA_Handle_Check((CTA_Handle) htime,CTA_TIME); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_Time handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) htime,(void**) &time); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + + if (time->isspan){ + *isspan=CTA_TRUE; + } else { + *isspan=CTA_FALSE; + } + return CTA_OK; +} + +#undef METHOD +#define METHOD "SetSpan" +int CTA_Time_SetSpan(CTA_Time htime,double tstart, double tend){ + + CTAI_Time *time; + int retval; + + retval=CTA_Handle_Check((CTA_Handle) htime,CTA_TIME); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_time handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) htime,(void**) &time); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + time->t1 = tstart; + time->t2 = tend; + time->isspan = TRUE; + + if (tstart==tend){ + time->isspan = FALSE; + } + + + return CTA_OK; +} + +#undef METHOD +#define METHOD "GetSpan" +int CTA_Time_GetSpan(CTA_Time htime,double *tstart, double *tend){ + + CTAI_Time *time; + int retval; + + retval=CTA_Handle_Check((CTA_Handle) htime,CTA_TIME); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_time handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) htime,(void**) &time); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + *tstart=time->t1; + *tend=time->t2; + + return CTA_OK; +} + +#undef METHOD +#define METHOD "SetStep" +int CTA_Time_SetStep(CTA_Time htime, double tstep){ + + CTAI_Time *time; + int retval; + + retval=CTA_Handle_Check((CTA_Handle) htime,CTA_TIME); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_time handle"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) htime,(void**) &time); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + time->step=tstep; + return CTA_OK; +} + +#undef METHOD +#define METHOD "CountSteps" +int CTA_Time_CountSteps(CTA_Time htime, int *nsteps){ + + CTAI_Time *time; + int retval; + + retval=CTA_Handle_Check((CTA_Handle) htime,CTA_TIME); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_time handle"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) htime,(void**) &time); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + if (time->isspan) { + *nsteps = (int) ((time->t2-time->t1)/time->step +0.5); + } else { + *nsteps = 0; + } + return CTA_OK; +} +#undef METHOD +#define METHOD "GetTimeStep" +int CTA_Time_GetTimeStep(CTA_Time htime, int istep, CTA_Time htime_step){ + CTAI_Time *time1, *time2; + int retval; + + retval=CTA_Handle_Check((CTA_Handle) htime,CTA_TIME); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_time handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) htime,(void**) &time1); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + retval=CTA_Handle_Check((CTA_Handle) htime_step,CTA_TIME); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_time handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) htime_step,(void**) &time2); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + time2->step=time1->step; + if (time1->isspan) { + time2->t1=time1->t1+(double)(istep-1)*time1->step; + time2->t2=time1->t1+(double)(istep) *time1->step; + if (time2->t2>time1->t2) time2->t2=time1->t2; + } else { + time2->step=time1->step; + time2->t1= 0.0; + time2->t2=-1.0; + } + return CTA_OK; + +} + +#undef METHOD +#define METHOD "GetStep" +int CTA_Time_GetStep(CTA_Time htime, double *tstep){ + + CTAI_Time *time; + int retval; + + retval=CTA_Handle_Check((CTA_Handle) htime,CTA_TIME); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_time handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) htime,(void**) &time); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + if (time->isspan) { + *tstep = time->step; + } else { + *tstep = 0; + } + return CTA_OK; +} + +#undef METHOD +#define METHOD "InSpan" +/* is timesub inside time */ +int CTA_Time_InSpan(CTA_Time htimesub, CTA_Time htime, BOOL *inspan){ + + CTAI_Time *time ; + CTAI_Time *timesub ; + int retval; + double tstartsub, tendsub, tstart, tend; + + retval=CTA_Handle_Check((CTA_Handle) htimesub,CTA_TIME); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_time handle"); + return retval; + } + retval=CTA_Handle_Check((CTA_Handle) htime,CTA_TIME); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_time handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) htime,(void**) &time); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) htimesub,(void**) ×ub); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + tstartsub = timesub->t1; + tendsub = timesub->t2; + tstart = time->t1; + tend = time->t2; + + if (tstartsub>tendsub || tstart> tend){ + *inspan=FALSE; + } + else { + *inspan = (tstartsub >= tstart && tendsub <= tend) ; + } + return CTA_OK; +} + +#undef METHOD +#define METHOD "IsStep" + /* is timesub inside time */ +int CTA_Time_IsStep(CTA_Time htime, double t, BOOL *isstep){ + + CTAI_Time *time ; + int retval, ifrac; + double eps, frac, diff; + + retval=CTA_Handle_Check((CTA_Handle) htime,CTA_TIME); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_time handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) htime,(void**) &time); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + /* Don't know what is really nice just try something */ + eps=M_EPS*100.0*MAX(fabs(time->t1),fabs(time->t2))+M_EPS; + + *isstep=FALSE; + if ((time->t1-eps)t2+eps)){ + frac=(t-time->t1)/time->step; + ifrac=(int) (frac +0.5); + diff=fabs(((double) ifrac) - frac); + if ( difft1 = tstart; + to->t2 = tend; + to->step = tstep; + to->isspan = TRUE; + retval = CTA_OK; + + return retval; + } + +#undef METHOD +#define METHOD "Export" +int CTA_Time_Export(CTA_Time htime, CTA_Handle hexport){ + CTA_Datatype datatype; + int retval; + FILE *file; //File pointer + CTAI_Time *time; + + /* Check handle */ + retval=CTA_Handle_Check((CTA_Handle) htime,CTA_TIME); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_time handle"); + return retval; + } + + /* Get data */ + retval=CTA_Handle_GetData((CTA_Handle) htime,(void**) &time); + if (retval!=CTA_OK) return retval; + + /* Get type of handle of hexport */ + retval=CTA_Handle_GetDatatype(hexport, &datatype); + if (retval != CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + /* perform the export depending on hexport */ + if (datatype==CTA_PACK){ + retval=CTA_Pack_Add(hexport,time,sizeof(CTAI_Time)); + } + else if (datatype==CTA_FILE){ + retval=CTA_File_Get(hexport,&file); + if (retval != CTA_OK) { + CTA_WRITE_ERROR("Cannot get file"); + return retval; + } + fprintf(file,"[%lg %lg %lg %d];\n", time->t1, time->t2, time->step, time->isspan); + } + else { + return CTA_FORMAT_NOT_SUPPORTED; + } + return CTA_OK; +} + +#undef METHOD +#define METHOD "Import" +int CTA_Time_Import(CTA_Time htime, CTA_Handle himport){ + CTA_Datatype datatype; + int retval; + CTAI_Time *time; + + /* Check handle */ + retval=CTA_Handle_Check((CTA_Handle) htime,CTA_TIME); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_time handle"); + return retval; + } + + /* Get data */ + retval=CTA_Handle_GetData((CTA_Handle) htime,(void**) &time); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + /* Get type of handle of himport */ + retval=CTA_Handle_GetDatatype(himport, &datatype); + if (retval != CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle datatype"); + return retval; + } + + /* perform the export depending on hexport */ + if (datatype==CTA_PACK){ + retval=CTA_Pack_Get(himport,time,sizeof(CTAI_Time)); + } + else if (datatype==CTA_FILE){ + return CTA_FORMAT_NOT_SUPPORTED; + } + else { + return CTA_FORMAT_NOT_SUPPORTED; + } + return CTA_OK; +} + + +void ctai_gregor(int ijuld, int *iy, int *im, int *id){ +/* id o gregorian day */ +/* ijuld i julian day */ +/* im o gregorian month */ +/* iy o gregorian year */ + + int l,n; +// put julian day in auxiliary variable + l = ijuld + 68569; + n = 4*l / 146097; + l = l - ( 146097*n + 3 ) / 4; + *iy = 4000 * ( l+1 ) / 1461001; + l = l - 1461 * *iy / 4 + 31; + *im = 80 * l / 2447; + +// calculate gregorian day + *id = l - 2447 * *im / 80; + l = *im / 11; + +// calculate gregorian month + *im = *im + 2 - 12 * l; + +// calculate gregorian year + *iy = 100 * ( n- 49 ) + *iy + l; +} + +int ctai_julian ( int iyear, int imonth, int iday ){ + +/* iday i gregorian day + imonth i gregorian month + iyear i gregorian year + julian name of this function +*/ + int ijuld, imon1; +/* + id gregorian day + ijuld julian daynumber + im gregorian month + imon1 auxiliary variable + iy gregorian year +*/ + + imon1 = (imonth-14)/12; + +// calculate julian day number + + ijuld = iday - 32075 + 1461 * ( iyear + 4800 + imon1 ) / 4 + + 367 * ( imonth - 2 - imon1 * 12 ) / 12 + - 3 * ( ( iyear + 4900 + imon1 ) / 100 ) / 4; + return ijuld; +} + +/* Interfacing with Fortran */ + +CTAEXPORT void CTA_TIME_CREATE_F77(int *htime, int *ierr){ + *ierr=CTA_Time_Create((CTA_Time *) htime); +} + +CTAEXPORT void CTA_TIME_FREE_F77(int *htime, int *ierr){ + *ierr=CTA_Time_Free((CTA_Time *) htime); +} +CTAEXPORT void CTA_TIME_SETSPAN_F77(int *htime,double *tstart, double *tend, int *ierr){ + *ierr=CTA_Time_SetSpan((CTA_Time) *htime,*tstart,*tend); +} +CTAEXPORT void CTA_TIME_GETSPAN_F77(int *htime,double *tstart, double *tend, int *ierr){ + *ierr=CTA_Time_GetSpan((CTA_Time) *htime,tstart, tend); +} +CTAEXPORT void CTA_TIME_SETSTEP_F77(int *htime,double *tstep, int *ierr){ + *ierr=CTA_Time_SetStep((CTA_Time) *htime,*tstep); +} +CTAEXPORT void CTA_TIME_GETSTEP_F77(int *htime,double *tstep, int *ierr){ + *ierr=CTA_Time_GetStep((CTA_Time) *htime,tstep); +} + +CTAEXPORT void CTA_TIME_INSPAN_F77(int *htimesub, int *htime, int *inspan, int *ierr){ + *ierr=CTA_Time_InSpan((CTA_Time) *htimesub, (CTA_Time) *htime, (BOOL*) inspan); +} + + +CTAEXPORT void CTA_TIME_ISSTEP_F77(int *htime, double *t, int *isstep, int *ierr){ + *ierr=CTA_Time_IsStep((CTA_Time) *htime, *t, (BOOL*) isstep); +} + +CTAEXPORT void CTA_TIME_COPY_F77(int *hfrom, int *hto, int *ierr){ + *ierr=CTA_Time_Copy((CTA_Time) *hfrom, (CTA_Time) *hto); +} + +CTAEXPORT void CTA_TIME_COUNTSTEPS_F77(int *htime, int *nsteps, int *ierr){ + *ierr=CTA_Time_CountSteps((CTA_Time) *htime, nsteps); +} + + +CTAEXPORT void CTA_TIME_GETTIMESTEP_F77(int *htime, int *istep, int *htime_step, + int *ierr){ + *ierr=CTA_Time_GetTimeStep((CTA_Time) *htime, *istep, + (CTA_Time) *htime_step); +} + + +CTAEXPORT void CTA_TIME_EXPORT_F77(int *htime, int *hexport, int *ierr){ + *ierr=CTA_Time_Export((CTA_Time) *htime, (CTA_Handle) *hexport); +} + + +CTAEXPORT void CTA_TIME_IMPORT_F77(int *htime, int *himport, int *ierr){ + *ierr=CTA_Time_Import((CTA_Time) *htime, (CTA_Handle) *himport); +} + + + +CTAEXPORT void CTA_TIME_ISSPAN_F77(int *htime, int *isspan, int *ierr){ + *ierr=CTA_Time_IsSpan((CTA_Time) *htime, isspan); +} + + + diff --git a/costa/native/cta/src/cta_tree.c b/costa/native/cta/src/cta_tree.c new file mode 100644 index 000000000..63ac6d365 --- /dev/null +++ b/costa/native/cta/src/cta_tree.c @@ -0,0 +1,771 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_tree.c $ +$Revision: 3361 $, $Date: 2012-07-04 16:52:30 +0200 (Wed, 04 Jul 2012) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include +#include +#include "cta_mem.h" +#include "f_cta_utils.h" +#include "cta_defaults.h" +#include "cta_handles.h" +#include "cta_time.h" +#include "cta_vector.h" +#include "cta_errors.h" +#include "cta_tree.h" +#include "ctai_datatypes.h" +#include "ctai_handles.h" +#include "ctai_string.h" +#include "ctai_vector.h" +#include "ctai_sobs.h" +#include "cta_message.h" + +#define SIGNAL_COUNT (-222) + +#define CTA_TREE_CREATE_F77 F77_CALL(cta_tree_create,CTA_TREE_CREATE) +#define CTA_TREE_FREE_F77 F77_CALL(cta_tree_free,CTA_TREE_FREE) +#define CTA_TREE_ADDHANDLE_F77 F77_CALL(cta_tree_addhandle,CTA_TREE_ADDHANDLE) +#define CTA_TREE_COUNT_HANDLES_F77 F77_CALL(cta_tree_counthandles,CTA_TREE_COUNTHANDLES) +#define CTA_TREE_COUNT_HANDLESSTR_F77 F77_CALL(cta_tree_counthandlesstr,CTA_TREE_COUNTHANDLESSTR) +#define CTA_TREE_GET_HANDLESTR_F77 F77_CALL(cta_tree_gethandlestr,CTA_TREE_GETHANDLESTR) +#define CTA_TREE_GET_VALUESTR_F77 F77_CALL(cta_tree_getvaluestr,CTA_TREE_GETVALUESTR) +#define CTA_TREE_GET_HANDLE_F77 F77_CALL(cta_tree_gethandle,CTA_TREE_GETHANDLE) +#define CTA_TREE_GET_VALUE_F77 F77_CALL(cta_tree_getvalue,CTA_TREE_GETVALUE) +#define CTA_TREE_COUNT_ITEMS_F77 F77_CALL(cta_tree_countitems,CTA_TREE_COUNTITEMS) +#define CTA_TREE_GET_ITEM_F77 F77_CALL(cta_tree_getitem,CTA_TREE_GETITEM) +#define CTA_TREE_GET_ITEMVALUE_F77 F77_CALL(cta_tree_getitemvalue,CTA_TREE_GETITEMVALUE) +#define CTA_TREE_PRINT_F77 F77_CALL(cta_tree_print,CTA_TREE_PRINT) + +#define CLASSNAME "CTA_Tree" + + +/* Struct holding all data associated to an COSTA tree */ +typedef struct { + CTA_Vector hvector; +} CTAI_Tree; + + +/* Create a COSTA tree */ +int CTA_Tree_Create(CTA_Tree *htree){ + + CTAI_Tree *tree; + int retval; + + /* Allocate memory for new tree object */ + tree=CTA_Malloc(sizeof(CTAI_Tree)); + retval = CTA_Vector_Create(CTA_DEFAULT_VECTOR, 0, CTA_HANDLE, CTA_HANDLE, &(tree->hvector)); + + /* Allocate new handle and return error when unsuccessful */ + retval=CTA_Handle_Create("tree",CTA_TREE,tree,htree); + return retval; +} + + +/* Release a COSTA tree */ +#undef METHOD +#define METHOD "Free" +int CTA_Tree_Free(CTA_Tree *htree) +{ + CTAI_Tree *tree; + int retval; + + retval=CTA_Handle_Check((CTA_Handle) *htree,CTA_TREE); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_tree handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) *htree,(void**) &tree); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + /* Remove the actual tree */ + retval = CTA_Vector_Free(&(tree->hvector)); + free(tree); + retval=CTA_Handle_Free(htree); + return retval; +} + + +/* Add a COSTA handle to the COSTA tree */ +#undef METHOD +#define METHOD "AddHandle" +int CTA_Tree_AddHandle(CTA_Tree htree, const char *name, CTA_Handle hitem) +{ + CTAI_Tree *tree; + int retval; + + retval=CTA_Handle_Check((CTA_Handle) htree, CTA_TREE); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_tree handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) htree,(void**) &tree); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + retval = CTAI_Handle_SetName(hitem, name); + if (retval != CTA_OK) { + CTA_WRITE_ERROR("Cannot set name"); + return retval; + } + + return CTA_Vector_AppendVal(tree->hvector, &hitem, CTA_HANDLE); +} + + +/* Get the next name from a path */ +/* A path must be separated by / or \ */ +static void CTAI_Tree_GetNextName(const char *path, char *name, const char **psplitpos) { + const char *splitpos; + const char *slashpos; + const char *bslashpos; + const char *srchpath; + + /* If the first character of the path is a separator, skip it */ + srchpath = path; + if (*srchpath == '/' || *srchpath == '\\') { + ++srchpath; + } + + /* Find first path component */ + slashpos = strchr(srchpath, '/'); + bslashpos = strchr(srchpath, '\\'); + if (slashpos && !bslashpos) { + splitpos = slashpos; + } else if (bslashpos && !slashpos) { + splitpos = bslashpos; + } else if (slashpos && bslashpos) { + splitpos = (slashpos < bslashpos ? slashpos : bslashpos); + } else{ + splitpos = NULL; + } + + if (splitpos) { + /* Get the path part upto the split position */ + strncpy(name, srchpath, splitpos - srchpath); + name[splitpos - srchpath] = '\0'; + } else { + /* No split position, take the whole path */ + strcpy(name, srchpath); + } + + *psplitpos = splitpos; +} + + +/* Find a COSTA handle in the COSTA tree */ +/* A path must be separated by / or \ */ +/* Returns the handle, or CTA_NULL if not found */ +static CTA_Handle CTAI_Tree_FindHandle(CTA_Tree htree, const char *path, BOOL skiproot) +{ + const CTAI_Tree *tree; + const char *splitpos; + char *name; + CTA_Handle h; + CTA_Handle hdefault; + CTA_Datatype dt; + int retval; + + /* If path is NULL, return CTA_NULL */ + if (!path) return CTA_NULL; + + /* Get vector */ + retval=CTA_Handle_GetData((CTA_Handle) htree,(void**) &tree); + if (retval!=CTA_OK) return retval; + + /* Allocate and get name */ + name = (char*)CTA_Malloc(1 + strlen(path)); + CTAI_Tree_GetNextName(path, name, &splitpos); + + /* If this name is the root, skip it */ + if (skiproot && splitpos && (*(splitpos + 1) != '\0') && + 0 == strcmp(CTAI_Handle_GetName(htree), name)) { + CTAI_Tree_GetNextName(splitpos + 1, name, &splitpos); + } + + /* Find the given element on this level */ + h = CTAI_Vector_FindHandle(tree->hvector, name); + free(name); + if (h == CTA_NULL) return h; + + + /* Check whether there is path remaining */ + if (splitpos && (*(splitpos + 1) != '\0')) + { + /* Check whether it's a tree */ + retval = CTA_Handle_GetDatatype(h, &dt); + if (retval != CTA_OK) return retval; + if (dt == CTA_TREE) { + /* It's a tree, recurse */ + return CTAI_Tree_FindHandle(h, splitpos + 1, FALSE); + } + } + + /* If the handle found is a tree with a default value, + return the default value */ + retval = CTA_Handle_GetDatatype(h, &dt); + if (retval != CTA_OK) return retval; + if (dt == CTA_TREE) { + /* It's a tree, try to find a default value */ + hdefault = CTAI_Tree_FindHandle(h, "", FALSE); + if (hdefault != CTA_NULL) { + h = hdefault; + } + } + + /* Return the handle found */ + return h; +} + + + +#undef METHOD +#define METHOD "GetHandleStr" +int CTA_Tree_GetHandleStr(CTA_Tree htree, char* str, CTA_Handle *hitem){ + CTA_String hstr; + int retval; + + retval=CTA_String_Create(&hstr); + if (retval != CTA_OK) { + CTA_WRITE_ERROR("Cannot create string"); + return retval; + } + retval=CTA_String_Set(hstr,str); + if (retval != CTA_OK) { + CTA_WRITE_ERROR("Cannot set string"); + return retval; + } + retval=CTA_Tree_GetHandle(htree, hstr, hitem); + if (retval != CTA_OK) { + char message[1024]; + sprintf(message,"Cannot get tree handle: %s \n",str); + CTA_WRITE_INFO(message); + return retval; + } + retval=CTA_String_Free(&hstr); + if (retval != CTA_OK) { + CTA_WRITE_ERROR("Cannot free string"); + return retval; + } + return CTA_OK; +}; + + +#undef METHOD +#define METHOD "GetValueStr" +int CTA_Tree_GetValueStr(CTA_Tree htree, char* str, void *value, CTA_Datatype datatype){ + CTA_String hstr; + int retval; + char msg[256]; + + retval=CTA_String_Create(&hstr); + if (retval != CTA_OK) { + CTA_WRITE_ERROR("Cannot create string"); + return retval; + } + retval=CTA_String_Set(hstr,str); + if (retval != CTA_OK) { + CTA_WRITE_ERROR("Cannot set string"); + return retval; + } + retval=CTA_Tree_GetValue(htree, hstr, value, datatype); + if (retval != CTA_OK) { + sprintf(msg,"Cannot get value '%s' from tree",str); + CTA_WRITE_ERROR(msg); + return retval; + } + retval=CTA_String_Free(&hstr); + if (retval != CTA_OK) { + CTA_WRITE_ERROR("Cannot free string"); + return retval; + } + return CTA_OK; +}; + +/* Find a COSTA handle in the COSTA tree */ +/* A path must be separated by / or \ */ +#undef METHOD +#define METHOD "GetHandle" +int CTA_Tree_GetHandle(CTA_Tree htree, CTA_String path, CTA_Handle *hitem) +{ + int retval; + const char *pstr; + + + retval=CTA_Handle_Check((CTA_Handle) htree, CTA_TREE); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_tree handle"); + return retval; + } + + pstr = CTAI_String_GetPtr(path); + if (pstr) { + *hitem = CTAI_Tree_FindHandle(htree, pstr, TRUE); + return (*hitem == CTA_NULL ? CTA_ITEM_NOT_FOUND : CTA_OK); + } + *hitem = 0; + return CTA_ITEM_NOT_FOUND; +} + + +/** \brief Return the value of a COSTA handle from the COSTA tree (by path) + * + * \note In case of trees with default values, returns the default value. + * + * \param htree I handle of the tree instance + * \param path I path of the item, separated by / or \ + * \param value O value of the COSTA item, or CTA_NULL in case of not found + * \param datatype I datatype of the value specified + * \return CTA_OK if successful or CTA_ITEM_NOT_FOUND in case of not found + */ +int CTA_Tree_GetValue(CTA_Tree htree, CTA_String path, void *value, CTA_Datatype datatype) +{ + int retval; + CTA_Handle hitem; + + retval = CTA_Tree_GetHandle(htree, path, &hitem); + if (retval == CTA_OK) { + retval = CTA_Handle_GetValue(hitem, value, datatype); + } else { + *(CTA_Handle *)value = CTA_NULL; + } + return retval; +} + +/* Count the number of COSTA handles in the COSTA tree */ +/* A path must be separated by / or \ */ +static int CTAI_Tree_CountHandles(CTA_Tree htree, const char *path, BOOL skiproot) +{ + const CTAI_Tree *tree; + const CTAI_Tree *subtree; + const char *splitpos; + const char *skippos = NULL; + char *name; + CTA_Handle h; + CTA_Datatype dt; + int retval; + int count = 0; + BOOL root_skipped = FALSE; + + /* If path is NULL, return CTA_NULL */ + if (!path) return CTA_NULL; + + /* Get vector */ + retval=CTA_Handle_GetData((CTA_Handle) htree,(void**) &tree); + if (retval!=CTA_OK) return -1; + + /* Allocate and get name */ + name = (char*)CTA_Malloc(1 + strlen(path)); + CTAI_Tree_GetNextName(path, name, &splitpos); + + /* If this name is the root, skip it */ + if (skiproot && splitpos && (*(splitpos + 1) != '\0') && + 0 == strcmp(CTAI_Handle_GetName(htree), name)) { + root_skipped = TRUE; + skippos = splitpos; + CTAI_Tree_GetNextName(splitpos + 1, name, &splitpos); + } + + /* Find the given element on this level */ + h = CTAI_Vector_FindHandle(tree->hvector, name); + if (h == CTA_NULL) { + count = -1; + goto return_pos; + } + + /* Check whether there is path remaining */ + if (splitpos && (*(splitpos + 1) != '\0')) + { + /* Check whether it's a tree */ + retval = CTA_Handle_GetDatatype(h, &dt); + if (retval != CTA_OK) { + count = -1; + goto return_pos; + } + if (dt == CTA_TREE) { + /* It's a tree, recurse */ + count = CTAI_Tree_CountHandles(h, splitpos + 1, FALSE); + if (count == SIGNAL_COUNT) { + retval=CTA_Handle_GetData((CTA_Handle) h,(void**) &subtree); + if (retval != CTA_OK) { + count = -1; + goto return_pos; + } + count = CTAI_Vector_CountHandles(subtree->hvector, splitpos + 1); + } + } + } else { + if (root_skipped) { + if (skippos && (*(skippos + 1) != '\0')) { + /* No recursion yet: count elements on root level */ + retval=CTA_Handle_GetData((CTA_Handle)htree, (void**)&subtree); + if (retval != CTA_OK) { + count = -1; + goto return_pos; + } + count = CTAI_Vector_CountHandles(subtree->hvector, skippos + 1); + } else { + /* There is only one root */ + count = 1; + } + } else { + /* Return SIGNAL_COUNT to signal that the count should be determined */ + count = SIGNAL_COUNT; + } + } + +return_pos: + free(name); + return count; +} + + +/** \brief Counts the number of COSTA handles specified by the given path. + * + * \param htree I handle of the tree instance + * \param path I path of the item, separated by / or \ + * \param hitem O number of items found + * \return CTA_OK if successful or CTA_ITEM_NOT_FOUND in case of not found + */ +#undef METHOD +#define METHOD "CountHandles" +int CTA_Tree_CountHandles(CTA_Tree htree, CTA_String path, int *count) { + int retval; + const char *pstr; + + retval=CTA_Handle_Check((CTA_Handle) htree, CTA_TREE); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_tree handle"); + return retval; + } + + pstr = CTAI_String_GetPtr(path); + if (pstr) { + *count = CTAI_Tree_CountHandles(htree, pstr, TRUE); + return (*count < 0 ? CTA_INTERNAL_ERROR : CTA_OK); + } + *count = -1; + return CTA_INTERNAL_ERROR; +} + +/** \brief Counts the number of COSTA handles specified by the given path. + * + * \param htree I handle of the tree instance + * \param path I path of the item, separated by / or \ + * \param hitem O number of items found + * \return CTA_OK if successful or CTA_ITEM_NOT_FOUND in case of not found + */ +#undef METHOD +#define METHOD "CountHandleStr" +int CTA_Tree_CountHandlesStr(CTA_Tree htree, char *path, int *count) { + int retval; + + retval=CTA_Handle_Check((CTA_Handle) htree, CTA_TREE); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_tree handle"); + return retval; + } + + if (path) { + *count = CTAI_Tree_CountHandles(htree, path, TRUE); + return (*count < 0 ? CTA_INTERNAL_ERROR : CTA_OK); + } + *count = -1; + return CTA_INTERNAL_ERROR; +} + + + +/** \brief Return the number of elements on the current level of the COSTA tree + * + * \param htree I handle of the tree level + * \param count O number of elements on the current tree level + * \return CTA_OK if successful + */ +#undef METHOD +#define METHOD "CountItems" +int CTA_Tree_CountItems(CTA_Tree htree, int *count) { + const CTAI_Tree *tree; + int retval; + + retval=CTA_Handle_Check((CTA_Handle) htree, CTA_TREE); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("cta_handle_check in cta_tree_countItems failed."); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) htree,(void**) &tree); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("cta_handle_getdata in cta_tree_countItems failed."); + return retval; + } + return CTA_Vector_GetSize(tree->hvector, count); + } + + +/** \brief Returns an element on the current level of the COSTA tree + * + * \param htree I handle of the tree level + * \param index I index of the item to return. 1 <= index <= CTA_Tree_CountItems() + * \param hitem O handle of the item to return + * \return CTA_OK if successful + */ +#undef METHOD +#define METHOD "GetItem" +int CTA_Tree_GetItem(CTA_Tree htree, int index, CTA_Handle *hitem) { + const CTAI_Tree *tree; + int retval; + + retval=CTA_Handle_Check((CTA_Handle) htree, CTA_TREE); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_tree handle"); + return retval; + }; + retval=CTA_Handle_GetData((CTA_Handle) htree,(void**) &tree); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + return CTA_Vector_GetVal(tree->hvector, index, hitem, CTA_HANDLE); +} + + +/** \brief Return the value of a COSTA handle from the COSTA tree (by index) + * + * \note In case of trees with default values, returns the default value. + * + * \param htree I handle of the tree instance + * \param index I index of the item + * \param value O value of the COSTA item, or CTA_NULL in case of not found + * \param datatype I datatype of the value specified + * \return CTA_OK if successful or CTA_ITEM_NOT_FOUND in case of not found + */ +int CTA_Tree_GetItemValue(CTA_Tree htree, int index, void *value, CTA_Datatype datatype) +{ + int retval; + CTA_Handle hitem; + + retval = CTA_Tree_GetItem(htree, index, &hitem); + if (retval == CTA_OK) { + retval = CTA_Handle_GetValue(hitem, value, datatype); + } else { + *(CTA_Handle *)value = CTA_NULL; + } + return retval; +} + + +/** \brief Print a COSTA tree to STDOUT +* +* \param htree I handle of the tree +* \return CTA_OK if successful +*/ +static void ctatree_print(CTA_Handle h, int depth) +{ + int i; + int j; + int n; + int count; + double t1, t2, ts; + CTA_Datatype dt; + CTAI_Tree* tree; + CTA_Handle val; + const char* text; + + /* Print depth */ + + + for (i = 0; i < depth; ++i) { + printf("| "); + } + + CTA_Handle_GetDatatype(h, &dt); + /* Print handle name */ + printf("name: '%s': ", CTAI_Handle_GetName(h)); + + /* If this handle is a tree, handle each argument at depth + 1 */ + switch (dt) { + case CTA_TREE: + printf("tree\n"); + + /* Get vector */ + CTA_Handle_GetData((CTA_Handle) h,(void**) &tree); + + // ce test of het wel vector is + CTA_Handle_GetDatatype(h, &dt); + //if (dt == CTA_TREE) printf(" inderdaad een tree %d \n",retval ); + + CTA_Tree_CountItems(h, &count) ; + + /* Get number of elements in the current tree */ + CTA_Vector_GetSize(tree->hvector, &n); + + /* Recurse */ + for (i = 1; i <= n; ++i) { + CTA_Vector_GetVal(tree->hvector, i, &val, CTA_HANDLE); + ctatree_print(val, depth + 1); + } + break; + case CTA_STRING: + /* Print string text */ + printf("string: '%s'\n", CTAI_String_GetPtr(h)); + break; + case CTA_TIME: + /* Print time span */ + CTA_Time_GetSpan(h, &t1, &t2); + CTA_Time_GetStep(h, &ts); + printf("time: from %lg to %lg, step %lg\n", t1, t2, ts); + break; + case CTA_VECTOR: + /* Print vector */ + CTA_Vector_GetSize(h, &n); + CTA_Vector_GetDatatype(h, &dt); + printf("vector: type='%s', dim=%d\n", CTAI_Type2String(dt), n); + for (j = 1; j <= n; ++j) { + for (i = 0; i <= depth; ++i) { + printf("| "); + } + text = CTAI_Vector_GetStringVal(h, j); + printf("element[%d] = '%s'\n", j, text); + free((char*)text); + } + break; + case CTA_SOBS: + /* Print stochastic observer */ + val = CTAI_SObs_GetUserData(h, 1); + if (val != CTA_NULL) { + printf("stochastic observer: database='%s'\n", CTAI_String_GetPtr(val)); + } else { + printf("stochastic observer\n"); + } + break; + default: + printf("unknown data type\n"); + break; + } +} + + +/** \brief Print a COSTA tree to STDOUT +* +* \param htree I handle of the tree +* \return CTA_OK if successful +*/ +#undef METHOD +#define METHOD "Print" +int CTA_Tree_Print(CTA_Tree htree) +{ + int retval; + + retval=CTA_Handle_Check((CTA_Handle) htree, CTA_TREE); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_tree handle"); + return retval; + } + + ctatree_print(htree, 0); + return CTA_OK; +} + +/* Interfacing with Fortran */ +CTAEXPORT void CTA_TREE_CREATE_F77(int *htree, int *ierr) { + *ierr=CTA_Tree_Create((CTA_Tree*) htree); +} + +CTAEXPORT void CTA_TREE_FREE_F77(int *htree, int *ierr) { + *ierr=CTA_Tree_Free((CTA_Tree*) htree); +} + +CTAEXPORT void CTA_TREE_ADDHANDLE_F77(int *htree, int *hname, int *hitem, int *ierr) { + *ierr=CTA_Tree_AddHandle((CTA_Tree)*htree, CTAI_String_GetPtr(*hname), (CTA_Handle)*hitem); +} + +CTAEXPORT void CTA_TREE_COUNT_HANDLES_F77(int *htree, int *hpath, int *count, int *ierr) { + *ierr=CTA_Tree_CountHandles((CTA_Tree)*htree, (CTA_String)*hpath, count); +} + +CTAEXPORT void CTA_TREE_COUNT_HANDLESSTR_F77(int *htree, char *path, int *count, int *ierr, int len_str) { + char *c_str; + + /* create a c-string equivalent to name */ + c_str=CTA_Malloc((len_str+1)*sizeof(char)); + CTA_fstr2cstr(path,c_str,len_str); + + + *ierr=CTA_Tree_CountHandlesStr((CTA_Tree)*htree, c_str, count); + free(c_str); +} + + + +CTAEXPORT void CTA_TREE_GET_HANDLESTR_F77(int *htree, char* str, int *hitem, int* ierr, int len_str){ + char *c_str; + + /* create a c-string equivalent to name */ + c_str=CTA_Malloc((len_str+1)*sizeof(char)); + CTA_fstr2cstr(str,c_str,len_str); + + *ierr=CTA_Tree_GetHandleStr((CTA_Tree) *htree, c_str, (CTA_Handle*) hitem); + + free(c_str); +} + + +CTAEXPORT void CTA_TREE_GET_VALUESTR_F77(int *htree, char* str, void *value, int *datatype, + int *ierr, int len_str){ + char *c_str; + + /* create a c-string equivalent to name */ + c_str=CTA_Malloc((len_str+1)*sizeof(char)); + CTA_fstr2cstr(str,c_str,len_str); + + *ierr=CTA_Tree_GetValueStr( (CTA_Tree)*htree, c_str, value, (CTA_Datatype)*datatype); + + free(c_str); +} + + +CTAEXPORT void CTA_TREE_GETHANDLE_F77(int *htree, int *hpath, int *hitem, int *ierr) { + *ierr=CTA_Tree_GetHandle((CTA_Tree)*htree, (CTA_String)*hpath, (CTA_Handle*)hitem); +} + +CTAEXPORT void CTA_TREE_GET_VALUE_F77(int *htree, int *hpath, void *value, int *datatype, int *ierr) { + *ierr=CTA_Tree_GetValue((CTA_Tree)*htree, (CTA_String)*hpath, value, *datatype); +} + +CTAEXPORT void CTA_TREE_COUNT_ITEMS_F77(int *htree, int *count, int *ierr) { + *ierr=CTA_Tree_CountItems((CTA_Tree)*htree, count); +} + +CTAEXPORT void CTA_TREE_GET_ITEM_F77(int *htree, int *index, int *hitem, int *ierr) { + *ierr=CTA_Tree_GetItem((CTA_Tree)*htree, *index, (CTA_Handle*)hitem); +} + +CTAEXPORT void CTA_TREE_GET_ITEMVALUE_F77(int *htree, int *index, void *value, int *datatype, int *ierr) { + *ierr=CTA_Tree_GetItemValue((CTA_Tree)*htree, *index, value, *datatype); +} + +CTAEXPORT void CTA_TREE_PRINT_F77(int *htree, int *ierr) { + *ierr=CTA_Tree_Print((CTA_Tree)*htree); +} + diff --git a/costa/native/cta/src/cta_treevector.c b/costa/native/cta/src/cta_treevector.c new file mode 100644 index 000000000..63df1280d --- /dev/null +++ b/costa/native/cta/src/cta_treevector.c @@ -0,0 +1,4301 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_treevector.c $ +$Revision: 3792 $, $Date: 2013-01-31 12:34:05 +0100 (Thu, 31 Jan 2013) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#define IDEBUG (0) + +#define ELMDIV (1) +#define ELMPROD (2) + +#include +#include +#if HAVE_LIBNETCDF +#include +#endif + + +#include +#include "cta_mem.h" +#include "f_cta_utils.h" +#include "cta_errors.h" +#include "cta_defaults.h" +#include "cta_treevector.h" +#include "ctai_vector.h" +#include "ctai_datatypes.h" +#include "cta_handles.h" +#include "ctai_handles.h" +#include "cta_file.h" +#include "cta_metainfo.h" +#include "ctai_xml.h" +#include "cta_pack.h" +#include "cta_message.h" +#include "cta_metainfo.h" + +#define CTA_TREEVECTOR_CREATE_F77 F77_CALL(cta_treevector_create,CTA_TREEVECTOR_CREATE) +#define CTA_TREEVECTOR_DUPLICATE_F77 F77_CALL(cta_treevector_duplicate,CTA_TREEVECTOR_DUPLICATE) +#define CTA_TREEVECTOR_CONC_F77 F77_CALL(cta_treevector_conc,CTA_TREEVECTOR_CONC) +#define CTA_TREEVECTOR_GETSUBTREEVEC_F77 F77_CALL(cta_treevector_getsubtreevec,CTA_TREEVECTOR_GETSUBTREEVEC) +#define CTA_TREEVECTOR_SETSUBTREENOCOMPUTE_F77 F77_CALL(cta_treevector_setsubtreenocompute,CTA_TREEVECTOR_SETSUBTREENOCOMPUTE) +#define CTA_TREEVECTOR_GETSUBTREEVECINDEX_F77 F77_CALL(cta_treevector_getsubtreevecindex,CTA_TREEVECTOR_GETSUBTREEVECINDEX) +#define CTA_TREEVECTOR_GETTAG_F77 F77_CALL(cta_treevector_gettag,CTA_TREEVECTOR_GETTAG) +#define CTA_TREEVECTOR_SETVEC_F77 F77_CALL(cta_treevector_setvec,CTA_TREEVECTOR_SETVEC) +#define CTA_TREEVECTOR_GETVEC_F77 F77_CALL(cta_treevector_getvec,CTA_TREEVECTOR_GETVEC) +#define CTA_TREEVECTOR_SETVAL_F77 F77_CALL(cta_treevector_setval,CTA_TREEVECTOR_SETVAL) +#define CTA_TREEVECTOR_GETVAL_F77 F77_CALL(cta_treevector_getval,CTA_TREEVECTOR_GETVAL) +#define CTA_TREEVECTOR_SETVALS_F77 F77_CALL(cta_treevector_setvals,CTA_TREEVECTOR_SETVALS) +#define CTA_TREEVECTOR_GETVALS_F77 F77_CALL(cta_treevector_getvals,CTA_TREEVECTOR_GETVALS) +#define CTA_TREEVECTOR_GETSIZE_F77 F77_CALL(cta_treevector_getsize,CTA_TREEVECTOR_GETSIZE) +#define CTA_TREEVECTOR_GETMETAINFO_F77 F77_CALL(cta_treevector_getmetainfo,CTA_TREEVECTOR_GETMETAINFO) +#define CTA_TREEVECTOR_SETMETAINFO_F77 F77_CALL(cta_treevector_setmetainfo,CTA_TREEVECTOR_SETMETAINFO) +#define CTA_TREEVECTOR_COPY_F77 F77_CALL(cta_treevector_copy,CTA_TREEVECTOR_COPY) +#define CTA_TREEVECTOR_AXPY_F77 F77_CALL(cta_treevector_axpy,CTA_TREEVECTOR_AXPY) +#define CTA_TREEVECTOR_DOT_F77 F77_CALL(cta_treevector_dot,CTA_TREEVECTOR_DOT) +#define CTA_TREEVECTOR_NRM2_F77 F77_CALL(cta_treevector_nrm2,CTA_TREEVECTOR_NRM2) +#define CTA_TREEVECTOR_SETCONSTANT_F77 F77_CALL(cta_treevector_setconstant,CTA_TREEVECTOR_SETCONSTANT) +#define CTA_TREEVECTOR_SCAL_F77 F77_CALL(cta_treevector_scal,CTA_TREEVECTOR_SCAL) +#define CTA_TREEVECTOR_EXPORT_F77 F77_CALL(cta_treevector_export,CTA_TREEVECTOR_EXPORT) +#define CTA_TREEVECTOR_IMPORT_F77 F77_CALL(cta_treevector_import,CTA_TREEVECTOR_IMPORT) +#define CTA_TREEVECTOR_FREE_F77 F77_CALL(cta_treevector_free,CTA_TREEVECTOR_FREE) +#define CTA_TREEVECTOR_INFO_F77 F77_CALL(cta_treevector_info,CTA_TREEVECTOR_INFO) +#define CTA_TREEVECTOR_LIST_F77 F77_CALL(cta_treevector_list,CTA_TREEVECTOR_LIST) +#define CTA_TREEVECTOR_GEMM_F77 F77_CALL(cta_treevector_gemm,CTA_TREEVECTOR_GEMM) +#define CTA_TREEVECTOR_OPONLEAFS_F77 F77_CALL(cta_treevector_oponleafs,CTA_TREEVECTOR_OPONLEAFS) +#define CTA_TREEVECTOR_ELMSQRT_F77 F77_CALL(cta_treevector_elmsqrt,CTA_TREEVECTOR_ELMSQRT) +#define CTA_TREEVECTOR_ELMPROD_F77 F77_CALL(cta_treevector_elmprod,CTA_TREEVECTOR_ELMPROD) +#define CTA_TREEVECTOR_GETSUBTREEVECINDEX_F77 F77_CALL(cta_treevector_getsubtreevecindex,CTA_TREEVECTOR_GETSUBTREEVECINDEX) +#define CTA_TREEVECTOR_GETNUMSUBTREE_F77 F77_CALL(cta_treevector_getnumsubtree,CTA_TREEVECTOR_GETNUMSUBTREE) +#define CTA_TREEVECTOR_GETSUBTREEVECID_F77 F77_CALL(cta_treevector_getsubtreevecid,CTA_TREEVECTOR_GETSUBTREEVECID) + + +#define CLASSNAME "CTA_TreeVector" + +/* --------------------------------------------------------------------- */ + +/* Struct holding all data associated to an COSTA Tree-vector */ + + +typedef struct { +char tag[CTA_STRLEN_TAG]; +char name[CTA_STRLEN_NAME]; +int size; +CTA_Vector *v; +int nsubtrees; +CTA_Metainfo *metainfo; +CTA_TreeVector *treevecs; +BOOL *nocompute; +} CTAI_TreeVec; + + +/* Local headers */ + +int CTAI_TreeVector_ExcludeFromVector(CTA_TreeVector treevec, BOOL *excludefv); + + + +#undef METHOD +#define METHOD "Create" +int CTA_TreeVector_Create(const char *name, const char *tag, CTA_TreeVector *treevec){ + int retval; //Return value of a call + CTAI_TreeVec *data; //Treevector specific data + + // Allocate data and set properties + data=CTA_Malloc(sizeof(CTAI_TreeVec)); + + // Set properties + strcpy(data->name,name); + strcpy(data->tag,tag); + data->v=NULL; + data->nsubtrees=0; + data->treevecs=NULL; + data->nocompute=NULL; + + // Allocate new handle and return error when unsuccesfull + retval=CTA_Handle_Create("tree-vector",CTA_TREEVECTOR,data,treevec); + if (retval) { + CTA_WRITE_ERROR("Cannot create tree-vector"); + return retval; + } + + data->metainfo=NULL; + + return CTA_OK; +}; + +#undef METHOD +#define METHOD "Duplicate" +int CTA_TreeVector_Duplicate(CTA_TreeVector treevec1, CTA_TreeVector *treevec2 ){ + int retval; //Return value of a call + CTAI_TreeVec *data1, *data2; //Tree-vector specific data + int istate; // counter over all substates + const char *name; + + if (IDEBUG>10){printf("CTA_TreeVector_Duplicate start\n");} + /* check handle and get data object */ + retval=CTA_Handle_Check((CTA_Handle) treevec1,CTA_TREEVECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_treevector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) treevec1,(void*) &data1); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + name = CTAI_Handle_GetName((CTA_Handle) treevec1); + if (IDEBUG>10){printf("CTA_TreeVector_Duplicate name of state '%s'\n",name);} + if (IDEBUG>10){printf("CTA_TreeVector_Duplicate adress data1=%p\n",data1);} + + /* create new state vector and get data object */ + retval=CTA_TreeVector_Create(name, data1->tag, treevec2); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_treevector handle"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) *treevec2,(void*) &data2); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + + /* check whether this is a leaf */ + if (data1->nsubtrees==0){ + + if (IDEBUG>10){printf("CTA_TreeVector_Duplicate duplicate vector \n");} + /* Duplicate and copy vector */ + + if (data1->v) { + data2->v=CTA_Malloc(sizeof(CTA_Vector)); + retval=CTA_Vector_Duplicate(*data1->v, data2->v); + if (IDEBUG>10){printf("CTA_TreeVector_Duplicate retval Vector ok %d\n",retval);} + + if (retval!=CTA_OK) return retval; + } + if (data1->metainfo) { + // make data2->metainfo. + // *data2->metainfo has been made (and set at NULL) in cta_treevector_create. + data2->metainfo=CTA_Malloc(sizeof(CTA_Metainfo)); + retval=CTA_Metainfo_Create(data2->metainfo); + retval = CTA_Metainfo_Copy(*data1->metainfo,*data2->metainfo); + } + } + + else { + if (IDEBUG>10){printf("CTA_TreeVector_Duplicate duplicate substates \n");} + /* duplicate sub-states */ + data2->treevecs=CTA_Malloc(sizeof(CTA_TreeVector)*data1->nsubtrees); + data2->nocompute=CTA_Malloc(sizeof(BOOL)*data1->nsubtrees); + for (istate=0;istatensubtrees;istate++){ + retval=CTA_TreeVector_Duplicate(data1->treevecs[istate], + &data2->treevecs[istate] ); + if (retval!=CTA_OK) return retval; + data2->nocompute[istate]=data1->nocompute[istate]; + } + data2->nsubtrees=data1->nsubtrees; + + + if (data1->metainfo) { + data2->metainfo=CTA_Malloc(sizeof(CTA_Metainfo)); + retval=CTA_Metainfo_Create(data2->metainfo); + retval = CTA_Metainfo_Copy(*data1->metainfo,*data2->metainfo); + } + + } + if (IDEBUG>10){printf("CTA_TreeVector_Duplicate end of function\n");} + return CTA_OK; +}; + +/* ------------------------------------------------------------*/ + +int CTAI_axpy_grid(CTA_TreeVector y, CTAI_Gridm gridy, double alpha, CTA_TreeVector x, CTAI_Gridm gridx){ + CTAI_TreeVec *datax, *datay; //Tree-vector specific data + CTA_Vector vecx_to_y; + int retval; + + retval=CTA_Handle_GetData((CTA_Handle) x,(void*) &datax); + retval=CTA_Handle_GetData((CTA_Handle) y,(void*) &datay); + + retval=CTA_Vector_Duplicate(*datay->v, &vecx_to_y); + + + + /* interpolate values of vector x on gridx towards gridy. That means the following: + - for each point on gridy: + use bi/trilinear interpolation using 4/8 gridpoints of gridx which surround + gridpoints of y */ + + + + if (!datax->v) return CTA_TREEVECTORS_NOT_COMPATIBLE; + retval = CTAI_Grid_Interpolate(gridy, gridx, *datax->v, vecx_to_y ); + + + + /* use now the standard vector axpy */ + + if (!datay->v) return CTA_TREEVECTORS_NOT_COMPATIBLE; + + retval=CTA_Vector_Axpy(*datay->v, alpha, vecx_to_y); + + if (retval!=CTA_OK) return retval; + + /* free vectors */ + retval = CTA_Vector_Free(&vecx_to_y); + + + return retval; +} + +/* ------------------------------------------------------------*/ + + +#undef METHOD +#define METHOD "Conc" +int CTA_TreeVector_Conc(CTA_TreeVector treevec1, CTA_TreeVector *treevecs, int nsubtrees){ + + int retval; //Return value of a call + CTAI_TreeVec *data; //Tree-vector specific data + int istate; //counter of the states + + + /* check handle and get data object */ + retval=CTA_Handle_Check((CTA_Handle) treevec1,CTA_TREEVECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_treevector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) treevec1,(void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + /* Check handles */ + for (istate=0;istatensubtrees || data->treevecs){ + return CTA_CONCAT_NOT_POSSIBLE; + } + + /* Add handles */ + data->treevecs=CTA_Malloc(sizeof(CTA_TreeVector)*nsubtrees); + data->nocompute=CTA_Malloc(sizeof(BOOL)*nsubtrees); + for (istate=0;istatetreevecs[istate]=treevecs[istate]; + data->nocompute[istate]=FALSE; + } + data->nsubtrees=nsubtrees; + + return CTA_OK; +}; + + +#undef METHOD +#define METHOD "GetSubTreeVec" +int CTA_TreeVector_GetSubTreeVec(CTA_TreeVector treevec, const char *tag, + CTA_TreeVector *hsubstate){ + + int retval; //Return value of a call + CTAI_TreeVec *data; //Tree-vector specific data + int istate; //counter of the states + + *hsubstate=CTA_NULL; + /* check handle and get data object */ + retval=CTA_Handle_Check((CTA_Handle) treevec,CTA_TREEVECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_treevector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + if (strcmp(data->tag,tag)==0 || strcmp(".",tag)==0){ + *hsubstate=treevec; + return CTA_OK; + } + + /* look in all substates */ + + for (istate=0;istatensubtrees;istate++){ + + retval=CTA_TreeVector_GetSubTreeVec(data->treevecs[istate], tag,hsubstate); + if (retval==CTA_OK) return CTA_OK; + }; + + if (IDEBUG>10){ + char msg[1024]; + strcpy(msg,"Cannot find Treevector Item : "); + strncat(msg,tag,1023); + CTA_WRITE_ERROR(msg); + } + return CTA_ITEM_NOT_FOUND; +}; + + +#undef METHOD +#define METHOD "SetSubTreeNocompute" +int CTA_TreeVector_SetSubTreeNocompute(CTA_TreeVector treevec, const char *tag){ + int retval; //Return value of a call + CTAI_TreeVec *data; //Tree-vector specific data + int istate; //counter of the states + char tagsub[CTA_STRLEN_TAG]; + + /* check handle and get data object */ + retval=CTA_Handle_Check((CTA_Handle) treevec,CTA_TREEVECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_treevector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + if (data->nsubtrees == 0) return CTA_OK; + + /* look in all substates */ + + for (istate=0;istatensubtrees;istate++){ + retval = CTA_TreeVector_GetTag(data->treevecs[istate],tagsub); + if (strcmp(tagsub,tag)==0){ + data->nocompute[istate] = TRUE; + } + + /* look in substates of substate */ + retval=CTA_TreeVector_SetSubTreeNocompute(data->treevecs[istate], tag); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in CTA_TreeVector_SetSubTreeNocompute"); + return retval; + } + }; + return CTA_OK; +} + +#undef METHOD +#define METHOD "GetSubTreeVecIndex" +int CTA_TreeVector_GetSubTreeVecIndex(CTA_TreeVector treevec, int index, + CTA_TreeVector *hsubstate){ + int retval; //Return value of a call + CTAI_TreeVec *data; //Tree-vector specific data + char msg[1024]; + + *hsubstate=CTA_NULL; + /* check handle and get data object */ + retval=CTA_Handle_Check((CTA_Handle) treevec,CTA_TREEVECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_treevector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + /* get the substate.Note that we have to subtract 1 because of our counting convention */ + if (index > 0 && index <= data->nsubtrees) { + *hsubstate = data->treevecs[index-1]; + return CTA_OK; + } + if (IDEBUG>10){ + sprintf(msg,"Cannot find sub treevector with index :%d",index); + CTA_WRITE_ERROR(msg); + } + return CTA_ITEM_NOT_FOUND; +}; + +#undef METHOD +#define METHOD "GetSubTreeVecId" +int CTA_TreeVector_GetSubTreeVecId(CTA_TreeVector treevec, int index, char tag[CTA_STRLEN_TAG]){ + int retval; //Return value of a call + CTAI_TreeVec *data; //Tree-vector specific data + + /* check handle and get data object */ + retval=CTA_Handle_Check((CTA_Handle) treevec,CTA_TREEVECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_treevector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + /* get the substate-id. Note that we DO NOT have to subtract 1 because of Java counting convention + (this method is only used by JAVA users!) */ + retval = CTA_TreeVector_GetTag(data->treevecs[index],tag); + + return retval; +}; + + + +#undef METHOD +#define METHOD "GetNumSubTree" +int CTA_TreeVector_GetNumSubTree(CTA_TreeVector treevec, int *numSubTrees){ + int retval; //Return value of a call + CTAI_TreeVec *data; //Tree-vector specific data + + *numSubTrees=0; + /* check handle and get data object */ + retval=CTA_Handle_Check((CTA_Handle) treevec,CTA_TREEVECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_treevector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + /* get number of substates */ + *numSubTrees=data->nsubtrees; + return CTA_OK; +}; + + +#undef METHOD +#define METHOD "GetTag" +int CTA_TreeVector_GetTag(CTA_TreeVector treevec, char *tag){ + + int retval; //Return value of a call + CTAI_TreeVec *data; //Tree-vector specific data + + /* check handle and get data object */ + retval=CTA_Handle_Check((CTA_Handle) treevec,CTA_TREEVECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_treevector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + /* Note this copy is not protected */ + strcpy(tag,data->tag); + + return CTA_OK; +}; + + +#undef METHOD +#define METHOD "GetMetainfo" +int CTA_TreeVector_GetMetainfo(CTA_TreeVector treevec, CTA_Metainfo minfo){ + + int retval; //Return value of a call + CTAI_TreeVec *data; //Tree-vector specific data + int rest; + + + /* check handle and get data object */ + retval=CTA_Handle_Check((CTA_Handle) treevec,CTA_TREEVECTOR); + // printf("cta_treevector_getmetainfo0: %d \n ",retval); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_treevector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data); + // printf("cta_treevector_getmetainfo1: %d \n ",retval); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + if (!data->metainfo ){ + printf("cta_treevector_getmetainfo: NO METAINFO! \n "); + return CTA_ILLEGAL_HANDLE; + } + + + retval=CTA_Handle_Check((CTA_Handle) *data->metainfo, CTA_METAINFO); + //printf("cta_treevector_getmetainfo2: %d \n ",retval); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_treevector handle"); + return retval; + } + + retval=CTA_Handle_Check((CTA_Handle) minfo, CTA_METAINFO); + //printf("cta_treevector_getmetainfo3: %d \n ",retval); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_treevector handle"); + return retval; + } + + retval = CTA_Metainfo_Copy(*data->metainfo, minfo); + retval = CTA_Metainfo_GetRest(minfo,&rest); + + if (retval!=CTA_OK){ + CTA_WRITE_ERROR("cta_treevector_getmetainfo: error"); + return retval; + } + + return CTA_OK; +}; + + +#undef METHOD +#define METHOD "SetMetainfo" +int CTA_TreeVector_SetMetainfo(CTA_TreeVector treevec, CTA_Metainfo minfo){ + + int retval; //Return value of a call + CTAI_TreeVec *data; //Tree-vector specific data + + /* check handle and get data object */ + retval=CTA_Handle_Check((CTA_Handle) treevec,CTA_TREEVECTOR); + // printf("cta_treevector_setmetainfo0: %d \n ",retval); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_treevector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + retval=CTA_Handle_Check((CTA_Handle) minfo, CTA_METAINFO); + //printf("cta_treevector_setmetainfo1: %d \n ",retval); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_treevector handle"); + return retval; + } + + /* er wordt verondersteld dat metainfo van state nog niet is gevuld*/ + /* dus state_setmetainfo is eenmalig per state; anders copy gebruiken! */ + + if (!data->metainfo){ + data->metainfo=CTA_Malloc(sizeof(CTA_Metainfo)); + } + retval=CTA_Metainfo_Create(data->metainfo); + //printf("cta_treevector_setmetainfo2: %d \n ",retval); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in CTA_Metainfo_Create"); + return retval; + } + + //dit kan niet met een toekenning; er is een functie cta_metainfo_copy nodig!!! + + retval = CTA_Metainfo_Copy(minfo,*data->metainfo); + //printf("cta_treevector_setmetainfo3: %d \n ",retval); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in CTA_Metainfo_Copy"); + return retval; + } + + // data->metainfo = minfo; + + return CTA_OK; +}; + +/* ------------------------------------------------------- */ +//int CTA_TreeVector_SetConnection(CTA_TreeVector treevec1, CTA_TreeVector treevec2){ +// int retval; //Return value of a call +// CTA_Metainfo minfo1, minfo2; +// char tag1[CTA_STRLEN_TAG], tag2[CTA_STRLEN_TAG]; +// +// /* get both metainfos; checking occurs in called routine */ +// retval = CTA_TreeVector_GetMetainfo(treevec1, minfo1); +// if (retval!=CTA_OK) return retval; +// retval = CTA_TreeVector_GetMetainfo(treevec2, minfo2); +// if (retval!=CTA_OK) return retval; +// +// retval = CTA_Metainfo_GetTag(minfo1, tag1); +// if (retval!=CTA_OK) return retval; +// retval = CTA_Metainfo_GetTag(minfo2, tag2); +// if (retval!=CTA_OK) return retval; +// +// /* let both metainfos connect to each other */ +// retval = CTA_Metainfo_SetBelongsTo(minfo1, tag2); +// if (retval!=CTA_OK) return retval; +// retval = CTA_Metainfo_SetBelongsTo(minfo2, tag1); +// if (retval!=CTA_OK) return retval; +// +// /* put the changed metainfo back */ +// retval = CTA_TreeVector_SetMetainfo(treevec1, minfo1); +// if (retval!=CTA_OK) return retval; +// retval = CTA_TreeVector_SetMetainfo(treevec2, minfo2); +// if (retval!=CTA_OK) return retval; +// +// return CTA_OK; +//} + +/* ------------------------------------------------------- */ + +// return the total number of leaves/vectors that form the state +int CTA_TreeVector_GetVecNumHandles(CTA_TreeVector treevec){ + return CTAI_TreeVec_GetVecNumHandles(treevec); +}; + + + +// gives a list of all tags of state and substates +int CTA_TreeVector_List(CTA_TreeVector treevec, CTA_Vector taglist ){ + int indx, retval; + indx = 0; + retval=CTAI_TreeVec_List(treevec, taglist, &indx); + return retval; +}; + + +// gives a list of all tags of state and substates +int CTAI_TreeVec_List(CTA_TreeVector treevec, CTA_Vector taglist, int *indx){ + int retval,istate; + CTA_String str1; + CTAI_TreeVec *data; //Tree-vector specific data + + + /* check handle and get data object */ + retval=CTA_Handle_Check((CTA_Handle) treevec,CTA_TREEVECTOR); + if (retval!=CTA_OK) return retval; + retval=CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data); + if (retval!=CTA_OK) return retval; + + printf("State-vector information:\n"); + printf("tag :%s\n",data->tag); + + retval = CTA_String_Create(&str1); + retval = CTA_String_Set(str1,data->tag); + + retval = CTA_Vector_SetVal(taglist, *indx + 1, &str1,CTA_STRING); + + *indx = *indx + 1; + + for (istate=0;istatensubtrees;istate++){ + retval=CTAI_TreeVec_List(data->treevecs[istate], taglist, indx); + if (retval!=CTA_OK) return retval; + } + + return CTA_OK; +}; + + + + + + + + + + +// return the total number of leaves/vectors that form the state +// do not count 'nocompute'-substates +int CTAI_TreeVec_GetVecNumHandles(CTA_TreeVector treevec){ + + CTAI_TreeVec *data; //Tree-vector specific data + int nvec; + int istate; + + CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data); + + // We are in a leaf + if (data->nsubtrees==0){ + return 1; + } else { + nvec=0; + for (istate=0;istatensubtrees;istate++){ + if (data->nocompute[istate] == FALSE) { + nvec=nvec+CTAI_TreeVec_GetVecNumHandles(data->treevecs[istate]); + } + } + return nvec; + } +}; + +int CTAI_TreeVec_GetVecHandles(CTA_TreeVector treevec, CTA_Vector **hvecs, int *indx){ + + int retval; //Return value of a call + CTAI_TreeVec *data; //Tree-vector specific data + int istate; //Counter over substates + + retval=CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data); + if (retval) return retval; + + // We are in a leaf + if (data->nsubtrees==0){ + hvecs[*indx]=data->v; + (*indx)++; + + } else { + for (istate=0;istatensubtrees;istate++){ + if (data->nocompute[istate] == FALSE) { + CTAI_TreeVec_GetVecHandles(data->treevecs[istate], hvecs, indx); + } + } + } + return CTA_OK; +}; + + +#undef METHOD +#define METHOD "SetVec" +int CTA_TreeVector_SetVec(CTA_TreeVector treevec, CTA_Vector hvec){ + + int retval; //Return value of a call + CTAI_TreeVec *data; //Tree-vector specific data + CTA_Vector **hvecs; //array with pointers to all vectors of substates + int nvecs; //number of sub vectors state is build of + int ivec; //counter over sub vectors + int indx; + void *values; //values of hvec + int nglob; //dimension of vector hvec + int nsub; //dimension of a vector of a substate + int size_type; //C- sizeof of data items in hvec + int ioff; //offset in input vector to start of subvector + CTA_Datatype datatype; //Data Type of hvec + char *cvalues; + + /* check handle and get data object */ + retval=CTA_Handle_Check((CTA_Handle) treevec,CTA_TREEVECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_treevector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + /* we are in a leaf -> copy vector */ + if (data->nsubtrees==0) { + if (data->v){ + /* copy vector */ + retval=CTA_Vector_Copy(hvec,*data->v); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot copy vector"); + return retval; + } + }else{ + /* duplicate vector */ + data->v=CTA_Malloc(sizeof(CTA_Vector)); + retval=CTA_Vector_Duplicate(hvec,data->v); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot duplicate vector"); + return retval; + } + } + }else{ + /* we are not in a leaf, try to break vector up */ + /* This is only allowed if all vectors have already been filled-in */ + /* Note: the 'excludefromvector'-leafs are excluded */ + nvecs=CTAI_TreeVec_GetVecNumHandles(treevec); + hvecs=CTA_Malloc(sizeof(CTA_Vector*)*nvecs); + indx=0; + retval=CTAI_TreeVec_GetVecHandles(treevec, hvecs, &indx); + if (retval) { + CTA_WRITE_ERROR("Cannot get treevector handles"); + return retval; + } + /* no NULL vectors are allowed */ + for (ivec=0;ivecnglob) return CTA_INCOMPATIBLE_VECTORS; + if (retval) return retval; + cvalues=values; + + ptr=cvalues; + retval=CTA_Vector_SetVals(*hvecs[ivec],ptr+ioff*size_type,nsub, + datatype); + if (retval) { + CTA_WRITE_ERROR("Cannot set values of vector"); + return retval; + } + ioff=ioff+nsub; + + } + if (ioff!=nglob) return CTA_INCOMPATIBLE_VECTORS; + + /* free work variables */ + free(values); + free(hvecs); + } + return CTA_OK; +}; + +void zet999(double *vals, int n){ + +int i; + + for (i=0;i copy vector */ + if (data->nsubtrees==0) { + if (data->v){ + /* copy vector */ + retval=CTA_Vector_Copy(*data->v,hvec); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot copy vector"); + return retval; + } + } + }else{ + /* we are not in a leave, try to break vector up */ + /* This is only allowed if all vectors have already been filled-in */ + nvecs=CTAI_TreeVec_GetVecNumHandles(treevec); + hvecs=CTA_Malloc(sizeof(CTA_Vector*)*nvecs); + indx=0; + retval=CTAI_TreeVec_GetVecHandles(treevec, hvecs, &indx); + /* no NULL vecors are allowed */ + for (ivec=0;ivecnglob) return CTA_INCOMPATIBLE_VECTORS; + if (retval) return retval; + + ptr=values; + + retval=CTA_Vector_GetVals(*hvecs[ivec],ptr+ioff*size_type,nsub, + datatype); + if (retval) { + CTA_WRITE_ERROR("Cannot get values of vector"); + return retval; + } + ioff=ioff+nsub; + } + if (ioff!=nglob) return CTA_INCOMPATIBLE_VECTORS; + + + /* set values in hvec */ +// zet999(values,nglob); + retval=CTA_Vector_SetVals(hvec,values,nglob,datatype); + if (retval) { + CTA_WRITE_ERROR("Cannot set values of vector"); + return retval; + } + + /* free work variables */ +// zet999(values,nglob); + free(values); + free(hvecs); + } + return CTA_OK; +}; + + +#undef METHOD +#define METHOD "Copy" +int CTA_TreeVector_Copy(CTA_TreeVector treevec1, CTA_TreeVector treevec2){ +// DIT IS EEN ERG KORT DOOR DE BOCHT GEDEELTELIJK INGEKLAPTE ' +// SUBSTATES MOETEN OOK ONDERSTEUND WORDEN !!!! + + + int retval; //Return value of a call + CTAI_TreeVec *data1, *data2; //Tree-vector specific data + int istate; // counter over all substates + + /* check handle and get data object */ + retval=CTA_Handle_Check((CTA_Handle) treevec1,CTA_TREEVECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_treevector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) treevec1,(void*) &data1); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + retval=CTA_Handle_Check((CTA_Handle) treevec2,CTA_TREEVECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_treevector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) treevec2,(void*) &data2); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + // check if there is any meta-information; if yes, make a copy of it. + + + + /* check whether this is a leaf */ + if (data1->nsubtrees==0){ + if (data2->nsubtrees!=0) return CTA_TREEVECTORS_NOT_COMPATIBLE; + + /* copy vector */ + if (data1->v) { + if (!data2->v) return CTA_TREEVECTORS_NOT_COMPATIBLE; + retval=CTA_Vector_Copy(*data1->v, *data2->v); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot copy vector"); + return retval; + } + } + + /* copy metainfo of leaf */ + if (data1->metainfo) { + // check if the other state has a metainfo + // if (!data2->metainfo) printf("leaf: data2 has no metainfo \n" ); + if (!data2->metainfo) return CTA_TREEVECTORS_NOT_COMPATIBLE; + // assume that the other metainfo is of the right size and copy + + retval = CTA_Metainfo_Copy(*data1->metainfo,*data2->metainfo); + } + + + } + else { /* we are in a node. Copy the metainfo of the node, then the substates.*/ + + + if (data1->metainfo) { + // check if the other state has a metainfo + // if (!data2->metainfo) printf("node: data2 has no metainfo \n" ); + if (!data2->metainfo) return CTA_TREEVECTORS_NOT_COMPATIBLE; + // assume that the other metainfo is of the right size and copy + + retval = CTA_Metainfo_Copy(*data1->metainfo,*data2->metainfo); + } + + + /* copy sub-states */ + if (data1->nsubtrees!=data2->nsubtrees) return CTA_TREEVECTORS_NOT_COMPATIBLE; + for (istate=0;istatensubtrees;istate++){ + retval=CTA_TreeVector_Copy(data1->treevecs[istate],data2->treevecs[istate]); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot copy treevector"); + return retval; + } + } + } + return CTA_OK; +}; + + + + +#undef METHOD +#define METHOD "SetConstant" +int CTA_TreeVector_SetConstant(CTA_TreeVector treevec, void *val,CTA_Datatype datatype){ + + int retval; //Return value of a call + CTAI_TreeVec *data; //Tree-vector specific data + int istate; // counter over all substates +// char name[CTA_STRLEN_NAME]; + + /* check handle and get data object */ + retval=CTA_Handle_Check((CTA_Handle) treevec,CTA_TREEVECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_treevector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + /* check whether this is a leaf */ + if (data->nsubtrees==0){ + + /* set constant vector */ + if (data->v) { + retval=CTA_Vector_SetConstant(*data->v,val,datatype); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in CTA_Vector_SetConstant"); + return retval; + } + } + } + else { + /* copy sub-states */ + for (istate=0;istatensubtrees;istate++){ + if (data->nocompute[istate] == FALSE) { + retval=CTA_TreeVector_SetConstant(data->treevecs[istate],val,datatype); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in CTA_TreeVector_SetConstant"); + return retval; + } + } + } + } + + return CTA_OK; +}; + + +#undef METHOD +#define METHOD "Scal" +int CTA_TreeVector_Scal(CTA_TreeVector treevec, double val){ + + int retval; //Return value of a call + CTAI_TreeVec *data; //Tree-vector specific data + int istate; // counter over all substates +// char name[CTA_STRLEN_NAME]; + + /* check handle and get data object */ + retval=CTA_Handle_Check((CTA_Handle) treevec,CTA_TREEVECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_treevector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + /* check whether this is a leaf */ + if (data->nsubtrees==0){ + + /* set constant vector */ + if (data->v) { + retval=CTA_Vector_Scal(*data->v,val); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in CTA_Vector_Scal"); + return retval; + } + } + } + else { + /* copy sub-states */ + for (istate=0;istatensubtrees;istate++){ + if (data->nocompute[istate] == FALSE) { + retval=CTA_TreeVector_Scal(data->treevecs[istate],val); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in CTA_TreeVector_Scal"); + return retval; + } + } + } + } + return CTA_OK; +}; + + +#undef METHOD +#define METHOD "GetSetVal" +int CTAI_TreeVec_GetSetVal(CTA_TreeVector treevec1, int i, void *val, + CTA_Datatype datatype, BOOL lget ){ + int retval; //Return value of a call + CTAI_TreeVec *data1; //Tree-vector specific data + int istate; // counter over all substates + int nstate, offset; + char msg[128]; + + + /* check handle and get data object */ + retval=CTA_Handle_Check((CTA_Handle) treevec1,CTA_TREEVECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_treevector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) treevec1,(void*) &data1); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + /* check whether this is a leaf */ + if (data1->nsubtrees==0){ + /* Set value in vector vector */ + if (data1->v) { + if (lget){ + retval=CTA_Vector_GetVal(*data1->v, i,val, datatype); + } else { + retval=CTA_Vector_SetVal(*data1->v, i,val, datatype); + } + if (retval!=CTA_OK) { + sprintf(msg, "Error %d in getting or setting element %d vals", retval, i); + CTA_WRITE_ERROR(msg); + return retval; + } + } + } + else { + /* Find corresponding substate */ + offset=0; + for (istate=0;istatensubtrees;istate++){ + if (data1->nocompute[istate] == FALSE) { + retval=CTA_TreeVector_GetSize(data1->treevecs[istate], &nstate); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get size of TreeVector"); + return retval; + } + if (nstate>=i-offset) { + retval=CTAI_TreeVec_GetSetVal(data1->treevecs[istate],i-offset, val, + datatype, lget); + return retval; + } + offset=offset+nstate; + } + } + } + return CTA_OK; +}; + + +int CTA_TreeVector_GetVal(CTA_TreeVector treevec, int i, void *val,CTA_Datatype datatype){ + return CTAI_TreeVec_GetSetVal(treevec, i, val, datatype, TRUE ); +}; + +int CTA_TreeVector_SetVal(CTA_TreeVector treevec, int i, void *val,CTA_Datatype datatype){ + return CTAI_TreeVec_GetSetVal(treevec, i, val, datatype, FALSE ); +}; + + + + +#undef METHOD +#define METHOD "SetVals" +int CTA_TreeVector_SetVals(CTA_TreeVector treevec, void *val,int nval, CTA_Datatype datatype){ +// DIT IS EEN ERG KORT DOOR DE BOCHT VERSIE' + CTA_Vector vwork; + int retval; + + retval=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nval, datatype, CTA_NULL, &vwork); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot create vector"); + return retval; + } + retval=CTA_Vector_SetVals(vwork, val, nval, datatype); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot set values in vector"); + return retval; + } + retval=CTA_TreeVector_SetVec(treevec,vwork); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot set vector in vectortree"); + return retval; + } + retval=CTA_Vector_Free(&vwork); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot free vector"); + return retval; + } + + return CTA_OK; +}; + +#undef METHOD +#define METHOD "GetVals" +int CTA_TreeVector_GetVals(CTA_TreeVector treevec, void *val,int nval,CTA_Datatype datatype){ +// DIT IS EEN ERG KORT DOOR DE BOCHT VERSIE' + CTA_Vector vwork; + int retval; + char msg[128]; + + retval=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nval, datatype, CTA_NULL, &vwork); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_treevector handle"); + return retval; + } + + retval=CTA_TreeVector_GetVec(treevec,vwork); + if (retval!=CTA_OK) { + CTA_Vector_Free(&vwork); + sprintf(msg,"Cannot get vector from treevector (length=%d) error code is %d", nval,retval); + CTA_WRITE_ERROR(msg); + return retval; + } + + retval=CTA_Vector_GetVals(vwork, val, nval, datatype); + if (retval!=CTA_OK) { + CTA_Vector_Free(&vwork); + CTA_WRITE_ERROR("Cannot get values from vector"); + return retval; + } + + retval=CTA_Vector_Free(&vwork); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot free vector"); + + + return retval; + } + return CTA_OK; +}; + +/* ----------------------------------------------------------------- */ + +#undef METHOD +#define METHOD "Axpy" +int CTA_TreeVector_Axpy(CTA_TreeVector y, double alpha, CTA_TreeVector x){ +// DIT IS EEN ERG KORT DOOR DE BOCHT GEDEELTELIJK INGEKLAPTE ' +// SUBSTATES MOETEN OOK ONDERSTEUND WORDEN !!!! + + + int retval, retvalA; //Return value of a call + CTAI_TreeVec *datax, *datay; //Tree-vector specific data + int istate, jstate; // counter over all substates + CTA_Metainfo minfox, minfoy; + CTAI_Gridm hgridx, hgridy; + char tagx[CTA_STRLEN_TAG], tagy[CTA_STRLEN_TAG]; + char xbelongsto[CTA_STRLEN_TAG], ybelongsto[CTA_STRLEN_TAG]; + CTA_TreeVector substate_x, substate_y; + CTA_Datatype datatype; + CTA_Vector vecy; + int nx,ny, ierr; + + if (IDEBUG) { + printf("Start of CTA_TreeVector_Axpy(%d,%f,%d)\n",y,alpha,x); + } + // + // + // printf("CTA_TreeVector_Axpy: y=\n"); + // retval=CTA_TreeVector_Export(y,CTA_FILE_STDOUT); + + /* check handle and get data object */ + retval=CTA_Handle_Check((CTA_Handle) x,CTA_TREEVECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Argument x must be a CTA_TreeVector"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) x,(void*) &datax); + if (retval!=CTA_OK) + { printf("ERROR in CTA_TreeVector_Axpy: "); + printf("Handle-getdata of argument x fails\n"); + return retval; + } + retval=CTA_Handle_Check((CTA_Handle) y,CTA_TREEVECTOR); + if (retval!=CTA_OK) { + char message[1024]; + sprintf(message,"Argument y (handle=%d) must be a CTA_TreeVector\n",y); + CTA_WRITE_ERROR(message); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) y,(void*) &datay); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle-getdata of argument y fails"); + return retval; + } + +/* FIRST RULE: if x is flat, y not flat, then if size(x)==size(y) do a crude axpy with one datatype only + This is handy for steady-state filters, when KG has an unknown tree structure */ + retval=CTA_TreeVector_GetSize(x,&nx); + retval=CTA_TreeVector_GetSize(y,&ny); + if (datax->nsubtrees==0 && datay->nsubtrees > 0 && nx == ny){ + retval=CTA_Vector_GetDatatype(*datax->v,&datatype); // usually: cta_double + retval=CTA_Vector_Create(CTA_DEFAULT_VECTOR, ny, datatype, CTA_NULL, &vecy); + retval=CTA_TreeVector_GetVec(y,vecy); + retval=CTA_Vector_Axpy(vecy, alpha,*datax->v); + retval=CTA_TreeVector_SetVec(y,vecy); + retval=CTA_Vector_Free(&vecy); + return retval; + } +/* END OF first rule ; now exploring other possibilities. */ + + + /* decide what to do according to metainfo */ + if (!datax->metainfo || !datay->metainfo ) { + if (IDEBUG) { + printf("CTA_TreeVector_Axpy: NO meta info in one (or both) of the treevectors %s and %s \n", + datax->tag, datay->tag); + } + /* check whether this is a leaf */ + if (datax->nsubtrees==0){ + + if (datay->nsubtrees!=0) { + char message[1024]; + sprintf(message,"Argument y (size %d, handle=%d) contains subtrees while x (size %d) is a leaf.\n",ny,y,nx); + CTA_WRITE_ERROR(message); + return CTA_TREEVECTORS_NOT_COMPATIBLE; + } + + /* axpy operation for vector */ + if (datax->v) { + if (!datay->v) return CTA_TREEVECTORS_NOT_COMPATIBLE; + retval=CTA_Vector_Axpy(*datay->v, alpha,*datax->v); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in CTA_Vector_Axpy"); + return retval; + } + } + } + else { + /* axpy operation for sub-states */ + if (datax->nsubtrees!=datay->nsubtrees){ + char message[1024]; + sprintf(message,"arguments y and x have different number of subtrees \n"); + CTA_WRITE_ERROR(message); + return CTA_TREEVECTORS_NOT_COMPATIBLE; + } + for (istate=0;istatensubtrees;istate++){ + if (datax->nocompute[istate] == FALSE) { + retval=CTA_TreeVector_Axpy(datay->treevecs[istate], alpha,datax->treevecs[istate]); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in CTA_TreeVector_Axpy"); + return retval; + } + } + } + } + return CTA_OK; + } + else { + if (IDEBUG) { + printf("CTA_TreeVector_Axpy meta info available for both "); + printf("treevectors %s and %s \n",datax->tag, datay->tag); + retval=CTA_TreeVector_GetSize(x,&nx); + retval=CTA_TreeVector_GetSize(y,&ny); + printf("sizes: x %d y %d \n",nx,ny); + } + + /* ---------- metainfo exists for both x and y -------------- */ + + retvalA = CTA_OK; + retval=CTA_Metainfo_Create(&minfox); retval=CTA_Metainfo_Create(&minfoy); + + retval = CTA_Metainfo_Copy(*datax->metainfo, minfox); + retval = CTA_Metainfo_Copy(*datay->metainfo, minfoy); + retval = CTA_Metainfo_GetGrid(minfox, &hgridx); + retval = CTA_Metainfo_GetGrid(minfoy, &hgridy); + retval = CTA_Metainfo_GetTag(minfox, tagx); + retval = CTA_Metainfo_GetTag(minfoy, tagy); + retval = CTA_Metainfo_GetBelongsTo(minfox, xbelongsto); + retval = CTA_Metainfo_GetBelongsTo(minfoy, ybelongsto); + + /* -------------------------- */ + /* search for equal tags */ + + if (IDEBUG) { + printf("CTA_TreeVector_Axpy: tagx=%s,tagy=%s\n",tagx, tagy); + } + + if (0==strcmp(tagx, tagy)) { // y and x have same tag ; finished scanning! + + if (IDEBUG) { + printf("CTA_TreeVector_Axpy: equal tags; now calling ctai_axpy_grid\n"); + } + + retvalA = CTAI_axpy_grid(y,hgridy,alpha,x,hgridx); + if (retvalA) { + char message[1024]; + sprintf(message," ERROR cta_treevector_axpy: code %d \n",retval); + CTA_WRITE_ERROR(message); + } + + } + else { + + /* check if tag of y equals tag of substate of x */ + + if (datax->nsubtrees != 0) { + for (istate=0; istatensubtrees;istate++){ + if (datax->nocompute[istate] == FALSE) { + substate_x= datax->treevecs[istate]; + retval = CTA_TreeVector_Axpy(y, alpha, substate_x); + } + // retvalA = max(retvalA, retval); + } + } // loop along substates of x + + + /* check if tag of substate of y equals tag of x */ + if (datay->nsubtrees!=0) { + + for (jstate=0; jstatensubtrees;jstate++){ + if (datay->nocompute[jstate] == FALSE) { + substate_y= datay->treevecs[jstate]; + retval = CTA_TreeVector_Axpy(substate_y, alpha, x); + } + // retvalA = max(retvalA, retval); + } + } + } + /* free variables */ + ierr = CTA_Metainfo_Free(&minfox); + if (IDEBUG>0) {printf("end of ctai_treevector_axpy: free metainfo %d\n",ierr);} + ierr = CTA_Metainfo_Free(&minfoy); + + + + return retvalA; + } // else metainfo exists + + + return CTA_OK; +}; + + +#undef METHOD +#define METHOD "ElmOp" +int CTA_TreeVector_ElmOp(CTA_TreeVector y, CTA_TreeVector x, int oper){ +// DIT IS EEN ERG KORT DOOR DE BOCHT GEDEELTELIJK INGEKLAPTE ' +// SUBSTATES MOETEN OOK ONDERSTEUND WORDEN !!!! + + + int retval; //Return value of a call + CTAI_TreeVec *datax, *datay; //Tree-vector specific data + int istate; // counter over all substates + BOOL noInterpolation; + + /* check handle and get data object */ + retval=CTA_Handle_Check((CTA_Handle) x,CTA_TREEVECTOR); + if (retval!=CTA_OK) { + char message[1024]; + sprintf(message, "ERROR in CTA_TreeVector_Axpy: argument x must be a CTA_TreeVector"); + CTA_WRITE_ERROR(message); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) x,(void*) &datax); + if (retval!=CTA_OK) { + char message[1024]; + sprintf(message, "ERROR in CTA_TreeVector_Axpy: Handle-getdata of argument x fails"); + CTA_WRITE_ERROR(message); + return retval; + } + retval=CTA_Handle_Check((CTA_Handle) y,CTA_TREEVECTOR); + if (retval!=CTA_OK){ + char message[1024]; + sprintf(message, "ERROR in CTA_TreeVector_Axpy: argument y must be a CTA_TreeVector"); + CTA_WRITE_ERROR(message); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) y,(void*) &datay); + if (retval!=CTA_OK) { + char message[1024]; + sprintf(message, "ERROR in CTA_TreeVector_Axpy: Handle-getdata of argument y fails"); + CTA_WRITE_ERROR(message); + return retval; + } + + /* decide what to do according to metainfo */ + /* Note :if only one has meta info just continue without interpolation */ + /* If both meta info's are the same we can continue as well without interpolation */ + + noInterpolation=TRUE; + /* If both have meta info check whether we need interpolation */ + if (datax->metainfo && datay->metainfo){ + if (CTA_Metainfo_IsEqual(*(datax->metainfo), + *(datay->metainfo))==CTA_TRUE){ + noInterpolation=FALSE; + } + } + + if (noInterpolation) { + + /* check whether this is a leaf */ + if (datax->nsubtrees==0){ + if (datay->nsubtrees!=0) { + printf("Error: Vector x is a leaf but vector y has %d children\n",datay->nsubtrees); + return CTA_TREEVECTORS_NOT_COMPATIBLE; // hoeft niet! + } + + /* axpy operation for vector */ + if (datax->v) { + if (!datay->v) { + printf("Error: Vector x is allocated (leaf) but vector v not\n"); + return CTA_TREEVECTORS_NOT_COMPATIBLE; + } + if (oper==ELMDIV){ + retval=CTA_Vector_ElmDiv(*datay->v, *datax->v); + } else { + retval=CTA_Vector_ElmProd(*datay->v, *datax->v); + } + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in CTA_Vector_ElmDiv or CTA_VEctor_ElmProd"); + return retval; + } + } + } + else { + /* axpy operation for sub-states */ + if (datax->nsubtrees!=datay->nsubtrees) { + printf("Error: Number of sub-trees is not the same x(%d) y(%d)\n", + datax->nsubtrees,datay->nsubtrees); + return CTA_TREEVECTORS_NOT_COMPATIBLE; + } + for (istate=0;istatensubtrees;istate++){ + if (datax->nocompute[istate] == FALSE) { + retval=CTA_TreeVector_ElmOp(datay->treevecs[istate], datax->treevecs[istate], oper); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in CTA_TreeVector_ElmOp"); + return retval; + } + } + } + } + return CTA_OK; + } + else { + printf("Error: CTA_TreeVector_ElmDiv with meta info is not supported\n"); + return CTA_TREEVECTORS_NOT_COMPATIBLE; + } // else metainfo exists + + + return CTA_OK; +}; + + +int CTA_TreeVector_ElmDiv(CTA_TreeVector y, CTA_TreeVector x){ + return CTA_TreeVector_ElmOp(y, x, ELMDIV); + +}; + + +int CTA_TreeVector_ElmProd(CTA_TreeVector y, CTA_TreeVector x){ + return CTA_TreeVector_ElmOp(y, x, ELMPROD); +}; + + +/* ------------------------------------------------------------*/ + + +#undef METHOD +#define METHOD "Dot" +int CTA_TreeVector_Dot(CTA_TreeVector treevec1, CTA_TreeVector treevec2, double *dotprod){ +// DIT IS EEN ERG KORT DOOR DE BOCHT GEDEELTELIJK INGEKLAPTE ' +// SUBSTATES MOETEN OOK ONDERSTEUND WORDEN !!!! + + + int retval; //Return value of a call + CTAI_TreeVec *data1, *data2; //Tree-vector specific data + int istate; // counter over all substates +// char name[CTA_STRLEN_NAME]; + double dotwrk; //dot product of substate + + + /* check handle and get data object */ + retval=CTA_Handle_Check((CTA_Handle) treevec1,CTA_TREEVECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_treevector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) treevec1,(void*) &data1); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + retval=CTA_Handle_Check((CTA_Handle) treevec2,CTA_TREEVECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_treevector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) treevec2,(void*) &data2); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + /* check whether this is a leaf */ + if (data1->nsubtrees==0){ + if (data2->nsubtrees!=0) return CTA_TREEVECTORS_NOT_COMPATIBLE; + + /* dot vector */ + if (data1->v) { + if (!data2->v) return CTA_TREEVECTORS_NOT_COMPATIBLE; + retval=CTA_Vector_Dot(*data1->v, *data2->v,dotprod); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in using CTA_Vector_Dot"); + return retval; + } + } + } + else { + /* dot sub-states */ + *dotprod=0.0; + if (data1->nsubtrees!=data2->nsubtrees) return CTA_TREEVECTORS_NOT_COMPATIBLE; + for (istate=0;istatensubtrees;istate++){ + if (data1->nocompute[istate] == FALSE) { + retval=CTA_TreeVector_Dot(data1->treevecs[istate],data2->treevecs[istate], + &dotwrk); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in using CTA_TreeVector_Dot"); + return retval; + } + *dotprod=*dotprod+dotwrk; + } + } + } + return CTA_OK; +}; + +#undef METHOD +#define METHOD "Nrm2" +int CTA_TreeVector_Nrm2(CTA_TreeVector treevec1, double *nrm2){ + + int retval; //Return value of a call + CTAI_TreeVec *data1; //Tree-vector specific data + int istate; // counter over all substates + double nrmwrk; //dot product of substate + + + /* check handle and get data object */ + retval=CTA_Handle_Check((CTA_Handle) treevec1,CTA_TREEVECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_treevector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) treevec1,(void*) &data1); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + /* check whether this is a leaf */ + if (data1->nsubtrees==0){ + /* dot vector */ + if (data1->v) { + retval=CTA_Vector_Nrm2(*data1->v,nrm2); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in using CTA_Vector_Nrm2"); + return retval; + } + } + } + else { + /* dot sub-states */ + *nrm2=0.0; + for (istate=0;istatensubtrees;istate++){ + if (data1->nocompute[istate] == FALSE) { + retval=CTA_TreeVector_Nrm2(data1->treevecs[istate],&nrmwrk); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error in using CTA_TreeVector_Nrm2"); + return retval; + } + *nrm2=*nrm2+(nrmwrk*nrmwrk); + } + } + *nrm2=sqrt(*nrm2); + } + return CTA_OK; +}; + + +#undef METHOD +#define METHOD "GetSize" +int CTA_TreeVector_GetSize(CTA_TreeVector treevec, int *n){ + int retval; //Return value of a call + CTAI_TreeVec *data; //Tree-vector specific data + int nsub; + int istate; + + /* check handle and get data object */ + retval=CTA_Handle_Check((CTA_Handle) treevec,CTA_TREEVECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_treevector handle"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + // We are in a leaf + if (data->nsubtrees==0){ + if (data->v){ + retval=CTA_Vector_GetSize(*data->v,n); + if (retval) { + CTA_WRITE_ERROR("Error while using CTA_Vector_GetSize"); + return retval; + } + } + else { + return CTA_UNINITIALISED_SUBTREEVECTOR; + } + } else { + *n=0; + for (istate=0;istatensubtrees;istate++){ + if (data->nocompute[istate] == FALSE) { + retval=CTA_TreeVector_GetSize(data->treevecs[istate], &nsub); + if (retval) { + CTA_WRITE_ERROR("Error while using CTA_TreeVector_GetSize"); + return retval; + } + *n=*n+nsub; + } + } + + } + return CTA_OK; +} + +// This is a try-out function that writes a tree-vector in +// a NETCDF file + +char *CTAI_AddNameToPath(const char *path,const char *name, const char *sep){ + size_t len1, len2, len3; + char *retStr; + + len1=strlen(path); + len2=strlen(name); + len3=strlen(sep); + retStr=CTA_Malloc(sizeof(char)*(len1+len2+len3+1)); + + strcpy(retStr,path); + strcat(retStr,name); + strcat(retStr,sep); + + return retStr; +} + +//------------------------------------------- +// small function for string manipulation +int replace_char_of_string(char *s, char old, char new){ + + char *p = s; + + while(*p){ + if(*p == old) + *p = new; + ++p; + } + return CTA_OK; +} +//-------------------------------------------- + +int CTAI_CTA_TreeVector_ToNetCDF(CTA_TreeVector treeVec, char *path, int ncid, int timeDimid, BOOL append, int ntime, BOOL isdefvar){ +#if HAVE_LIBNETCDF + +#define ERR(e) {printf("Error: %s\n", nc_strerror(e)); exit(2);} + + int nSize; //Size of the whole Tree-vector + int retval; //Return value of a call + int varid; + void *values; + int dimids[5]; + int dimidsRev[5]; + + char *nameDim1, *nameDim2, *nameDim3, *nameDim4; + char *nameVar; + char *nameVar_; + char *extendedPath; + char nameGrid[20]; + char tvtag[CTA_STRLEN_NAME]; + char modtag[CTA_STRLEN_NAME]; + + /* Grid information */ + int nx; + int ny; + int nz; + int nquant; + double x_origin; + double y_origin; + double z_origin; + double dx; + double dy; + double dz; + + CTA_Datatype datatype; + CTA_Metainfo meta; + CTA_Vector v; + CTAI_TreeVec *data1; //Tree-vector specific data + int istate; // counter over all substates + int ndims[4]; + int numDims; + int idim; + int i; + int size_type; + int locIDEBUG = 0; + + size_t *start; + size_t *count; + + if (IDEBUG>0) {printf("start treevector_netcdf_export; append, isdefvar %d %d\n",append,isdefvar);} + + nameDim1 = NULL; + nameDim2 = NULL; + nameDim3 = NULL; + nameDim4 = NULL; + nameVar_ = NULL; + nameVar = NULL; + + /* check handle and get data object */ + retval=CTA_Handle_Check((CTA_Handle) treeVec,CTA_TREEVECTOR); + if (retval!=CTA_OK) return retval; + retval=CTA_Handle_GetData((CTA_Handle) treeVec,(void*) &data1); + if (retval!=CTA_OK) return retval; + + /* if we are in a leaf */ + if (data1->nsubtrees==0){ + /* export the treevector to netCDF */ + if (data1->v){ + v=*data1->v; + + /* Get the size of the vector */ + retval=CTA_Vector_GetSize(v,&nSize); + if (IDEBUG>0) {printf("nSize=%d %d\n",nSize,retval);} + /* also get the datatype */ + retval=CTA_Vector_GetDatatype(v,&datatype); + + /* if we have meta information */ + if (data1->metainfo){ + meta=*data1->metainfo; + retval=CTA_Metainfo_getRegGrid(meta, nameGrid, &nx, &ny, &nz, &x_origin, &y_origin, &z_origin, &dx, &dy, &dz); + ndims[0]=nx; + ndims[1]=ny; + ndims[2]=nz; + + if (nx*ny*nz == nSize) {numDims=3;} + + if (nz<=1) numDims=2; + if (ny<=1) numDims=1; + + if (numDims == 3 && nx*ny*nz < nSize) { + nquant = nSize / (nx*ny*nz) ; + if (nquant*nx*ny*nz != nSize) { + printf("Error: grid dimension and state dimension do not match %d %d %d %d %d \n",nx,ny,nz,nSize,nquant); + return CTA_DIMENSION_ERROR; + } + + if (locIDEBUG>0) {printf("State is larger than product of grid dimensions. (%d %d %d) \n" + "We assume the state can be divided into %d separate quantities \n",nx,ny,nz,nquant);} + numDims=4; + ndims[3]=nquant; + + } + } else { + if (locIDEBUG>0) {printf("We do not have meta information; so we assume 1D array of size %d\n",nSize);} + numDims=1; + ndims[0]=nSize; + } + + /* Now write the vector to netcdf */ + + /* First construct the names of the dimensions */ + + /* for safety reasons, underscores in tags are replaced by '-' + since underscores play a dividing role in importing netcdf from scratch + */ + strcpy(modtag,data1->tag); + replace_char_of_string(modtag,'_','-'); + + nameVar_=CTAI_AddNameToPath(path,modtag,"_"); + nameDim1=CTAI_AddNameToPath(nameVar_,"x",""); + nameDim2=CTAI_AddNameToPath(nameVar_,"y",""); + nameDim3=CTAI_AddNameToPath(nameVar_,"z",""); + nameDim4=CTAI_AddNameToPath(nameVar_,"n",""); + nameVar=CTAI_AddNameToPath(path,modtag,""); + + if (append || !isdefvar ){ + /* appending a state-vector; we need to get the dimensions from the file */ + /* and the data array */ + + dimids[0]=timeDimid; + if (numDims>=1){ + if ((retval = nc_inq_dimid(ncid, nameDim1, &dimids[1]))) ERR(retval); + } + if (numDims>=2){ + if ((retval = nc_inq_dimid(ncid, nameDim2, &dimids[2]))) ERR(retval); + } + if (numDims>=3){ + if ((retval = nc_inq_dimid(ncid, nameDim3, &dimids[3]))) ERR(retval); + } + if (numDims>=4){ + if ((retval = nc_inq_dimid(ncid, nameDim4, &dimids[4]))) ERR(retval); + } + if ((retval = nc_inq_varid(ncid, nameVar, &varid ))) ERR(retval); + } + else { + if (locIDEBUG>0) {printf("%s: netcdf dimensions are defined: %d \n",data1->tag, numDims);} + + + /* defining a tree-vector so we have to define the dimensions y*/ + retval = nc_redef(ncid); + + dimids[0]=timeDimid; + if (locIDEBUG>0) {printf(" r1977 dimids[0] %d namedim1 %s\n",timeDimid, nameDim1);} + + + if (numDims>=1){ + if ((retval = nc_def_dim(ncid, nameDim1, ndims[0], &dimids[1]))) ERR(retval); + } + if (numDims>=2){ + if ((retval = nc_def_dim(ncid, nameDim2, ndims[1], &dimids[2]))) ERR(retval); + } + if (numDims>=3){ + if ((retval = nc_def_dim(ncid, nameDim3, ndims[2], &dimids[3]))) ERR(retval); + } + if (numDims>=4){ + if ((retval = nc_def_dim(ncid, nameDim4, ndims[3], &dimids[4]))) ERR(retval); + } + } + + /* Reverse all dimensions since NETCDF accesses arrays in the C way */ + dimidsRev[0]=dimids[0]; + for (idim=0;idim0) {printf("netcdf dimensions have been reversed \n");} + + if (!append){ + + if (isdefvar) { + + if (locIDEBUG>0) {printf("ctai_cta_exporttonetcdf: defining new variable %s \n",nameVar);} + + /* Define the array */ + if (datatype == CTA_REAL) { + if ((retval = nc_def_var(ncid, nameVar, NC_FLOAT, numDims+1, dimidsRev, &varid))) ERR(retval); + } else if (datatype == CTA_INTEGER) { + if ((retval = nc_def_var(ncid, nameVar, NC_INT, numDims+1, dimidsRev, &varid))) ERR(retval); + + } else { //default: double + if ((retval = nc_def_var(ncid, nameVar, NC_DOUBLE, numDims+1, dimidsRev, &varid))) ERR(retval); + + } + } + } + free(nameDim1); + free(nameDim2); + free(nameDim3); + free(nameDim4); + free(nameVar_); + free(nameVar); + + if (!isdefvar) { // write the data itself + + /* Get the values of the vector */ + /* Note: we will export all values as float for space considerations. + Output files are mostly meant for visualisation purposes, so floats will be accurate enough. */ + /* NO! for non-visualisation purposes, this gives problems! */ + + retval=CTA_SizeOf(datatype,&size_type); + + values=CTA_Malloc(nSize * size_type); + + retval=CTA_TreeVector_GetTag(treeVec, tvtag); + if (datatype != CTA_STRING) { + retval = CTA_Vector_GetVals(v,values,nSize, datatype); + if (retval !=CTA_OK) { + printf("cta_treevector.c: export netcdf: getting values of %s failed %d \n",&tvtag[0],retval); + } + } + + // At what level must we add our new values??? + count = CTA_Malloc((numDims+1)*sizeof(size_t)); + start = CTA_Malloc((numDims+1)*sizeof(size_t)); + count[0] = 1; + + //reverse dimensions as well + for (idim=0;idim0) { printf("RECURSIVELY calling ctai_cta_treevectortonetcdf! \n");} + + /* create the path name */ + strcpy(modtag,data1->tag); + replace_char_of_string(modtag,'_','-'); + extendedPath=CTAI_AddNameToPath(path,modtag,"_"); + + /* iterate over all sub-tree-vectors */ + for (istate=0;istatensubtrees;istate++){ + CTAI_CTA_TreeVector_ToNetCDF(data1->treevecs[istate], extendedPath, + ncid, timeDimid, append, ntime, isdefvar); + } + free(extendedPath); + return CTA_OK; + } +#else + printf("CTAI_CTA_TreeVector_ToNetCDF :Version is compiled without NETCDF support. This function should never have been called!!!!\n"); + exit(-1); +#endif + return CTA_INTERNAL_ERROR; +} + + + +/* +Import a treevector from netcdf. The caller has no idea yet of +the tree-structure, so treevec is not yet filled/structured (it has just been created, nothing more) +and all variables are concatenated in one large CTA_DOUBLE vector. +In the near future, the ctai_treevector_importfromnetcdf will be made smarter +such that the tree-structure will be created from the netcdf-file. +*/ +int CTA_TreeVector_VImport(CTA_TreeVector treeVec, CTA_File hfile) { +#if HAVE_LIBNETCDF + + int retval; //Return value of a call + int ncid; + int nvars; + int varid; + char var_name[NC_MAX_NAME+1]; + nc_type data_type; + int numdims; //total number of dimensions + int ndimsvar ;// number of dimensions for a certain variable + int natts, i, ivar; + int dimids[5]; + + double *values_d; + float *values_f; + int *values_i; + double *values_tot; + int nsize, nsize_tot; + size_t dimlength; + int size_type; //C- sizeof of data items in hvec + int indx; + CTA_Datatype datatype; //Data Type of hvec + CTA_File_GetNetcdf(hfile,&ncid); + + /* Assume that the treevector has just been created. This routine + simply reads each variable in the netcdf-file and places it in one long vector. + */ + + // first get all dimensions + retval = nc_inq_ndims(ncid, &numdims); + /* the dim-ids are simply in the range 0..numdims-1 */ + + + retval = nc_inq_nvars(ncid, &nvars); + /* The var-ids are simply in the range 0..nvars-1 + We skip the first two, since they are ALWAYS (when created here in the native code) + time and ntime. */ + + + /* first sweep through the netcdf-file to determine the total size */ + nsize_tot = 0; + for (ivar=2; ivar < nvars ; ivar ++) { + varid = ivar; + + retval = nc_inq_var(ncid, varid, var_name, &data_type, &ndimsvar, dimids, &natts); + if (retval != NC_NOERR ) { + // printf("ctai_tv_importfromnetcdf : error nc_inq_var %d :error nr: %d\n",ncid, retval); + } + nsize = 1; //0-th dimension + for (i=0; i < ndimsvar; i++) { + retval = nc_inq_dimlen(ncid, dimids[i], &dimlength); + nsize = nsize * dimlength; + } + nsize_tot = nsize_tot + nsize; //incremented length of receiving vector + } + + /* create receiving vector. Note: this is ALWAYS a double! */ + values_tot = CTA_Malloc(nsize_tot * sizeof(double)); + + /* second sweep through the netcdf-file; read everything */ + + indx = 0; + for (ivar=2; ivar < nvars ; ivar ++) { + varid = ivar; + + retval = nc_inq_var(ncid, varid, var_name, &data_type, &ndimsvar, dimids, &natts); + nsize = 1; //0-th dimension + for (i=0; i < ndimsvar; i++) { + retval = nc_inq_dimlen(ncid, dimids[i], &dimlength); + nsize = nsize * dimlength; + } + + if (data_type == NC_DOUBLE) {datatype = CTA_DOUBLE; + } else if (data_type == NC_FLOAT) {datatype = CTA_REAL; + } else if (data_type == NC_INT) {datatype = CTA_INTEGER; + } else { printf("unknown datatype in netcdf file \n"); + return CTA_ILLEGAL_DATATYPE; + } + retval=CTA_SizeOf(datatype,&size_type); + + + values_d=CTA_Malloc(nsize*sizeof(double)); + values_f=CTA_Malloc(nsize*sizeof(float)); + values_i=CTA_Malloc(nsize*sizeof(int)); + + if (datatype == CTA_REAL) { + retval = nc_get_var_float(ncid, varid, values_f); + for (i=0; i< nsize; i++) { + values_tot[indx + i] = values_f[i]; + } + } else if (datatype == CTA_INTEGER) { + retval = nc_get_var_int(ncid, varid, values_i); + for (i=0; i< nsize; i++) { + values_tot[indx + i] = values_i[i]; + } + + } else if (datatype == CTA_DOUBLE){ + retval = nc_get_var_double(ncid, varid, values_d); + for (i=0; i< nsize; i++) { + values_tot[indx + i] = values_d[i]; + } + } + + + indx = indx + nsize; + free(values_d); + free(values_i); + free(values_f); + + } + + // set the treevector + retval = CTA_TreeVector_SetVals(treeVec, values_tot,nsize_tot, CTA_DOUBLE); + free(values_tot); + + return retval; + +#else + printf("CTAI_TreeVector_VImport :Version is compiled without NETCDF support. This function should never have been called!!!!\n"); + exit(-1); +#endif +}; + +/* ------------------------------------------------------- */ + +int CTAI_TreeVector_ImportFromNetCDF(CTA_TreeVector treeVec, CTA_File hfile){ + + +#if HAVE_LIBNETCDF + + int retval; //Return value of a call + int ncid; + int nvars; + int varid; + char var_name[NC_MAX_NAME+1]; + nc_type data_type; + int numdims; //total number of dimensions + int ndimsvar ;// number of dimensions for a certain variable + int natts, i, ivar; + int dimids[5]; + void *values; + CTA_TreeVector hsubstate; + char *tag, *tag0; + int nsize, tv_size; + size_t dimlength; + int size_type; //C- sizeof of data items in hvec + int indx; + CTA_Datatype datatype; //Data Type of hvec + CTA_Vector **hvec; //array (of length 1) with pointer to vector of hsubstate + + tag0 = NULL; + CTA_File_GetNetcdf(hfile,&ncid); + + /* Assume that the treevector already contains the desired sub-treevectors. This routine + simply reads each variable in the netcdf-file and places it in the proper subtreevector. + */ + + // first get all dimensions + retval = nc_inq_ndims(ncid, &numdims); + /* the dim-ids are simply in the range 0..numdims-1 */ + + + retval = nc_inq_nvars(ncid, &nvars); + /* The var-ids are simply in the range 0..nvars-1 */ + + + for (ivar=0; ivar < nvars ; ivar ++) { + varid = ivar; + + retval = nc_inq_var(ncid, varid, var_name, &data_type, &ndimsvar, dimids, &natts); + if (retval != NC_NOERR ) { + //printf("ctai_tv_importfromnetcdf : error nc_inq_var %d :error nr: %d\n",ncid, retval); + } + nsize = 1; //0-th dimension + for (i=0; i < ndimsvar; i++) { + retval = nc_inq_dimlen(ncid, dimids[i], &dimlength); + nsize = nsize * dimlength; + } + /* Now find the subtreevec matching the var-name */ + // printf("variable : name %s \n",&var_name); + + tag = CTA_Malloc((strlen(var_name)+1)*sizeof(char)); + tag0 = strrchr(&var_name[0],'_'); + if (tag0 != NULL) { + tag = tag0 + 1 ; + } else { + strcpy(tag, "novar"); + if (nvars == 3) { // simple case: no subtrees, only time,ntime,vec + strcpy(tag , &var_name[0]); + } + } + + //printf("last part of variable name = %s, nvars = %d \n",tag, nvars); + + retval = CTA_TreeVector_GetSubTreeVec(treeVec, tag, &hsubstate); + + if (retval == CTA_ITEM_NOT_FOUND) { + //printf(".. no subtreevector found; ignoring the variable %s \n",&var_name[0]); + } else { + retval = CTA_TreeVector_GetSize(hsubstate, &tv_size); + if (tv_size != nsize) { + printf("ctai_tv_importfromnetcdf: size subtreevec (%s) of %d does not match netcdf-size %d \n",tag, tv_size,nsize); + return CTA_DIMENSION_ERROR; + } + /* now determine the datatype */ + + hvec=CTA_Malloc(sizeof(CTA_Vector*)); + indx=0; + retval=CTAI_TreeVec_GetVecHandles(hsubstate, hvec, &indx); + retval=CTA_Vector_GetDatatype(*hvec[0],&datatype); + retval=CTA_SizeOf(datatype,&size_type); + + values = CTA_Malloc(tv_size * size_type); + + if (datatype == CTA_REAL) { + retval = nc_get_var_float(ncid, varid, values); + } else if (datatype == CTA_INTEGER) { + retval = nc_get_var_int(ncid, varid, values); + } else if (datatype == CTA_DOUBLE){ + retval = nc_get_var_double(ncid, varid, values); + } else { + printf("no valid datatype to import %s from netcdf \n",&var_name[0]); + retval = -1; + } + + /* now fill the subtreevector */ + if (retval == CTA_OK) { + retval = CTA_TreeVector_SetVals(hsubstate, values,tv_size, datatype); + } + retval = CTA_OK; + free(values); + printf("variable %s done \n",var_name); + } + } + return retval; + +#else + printf("CTAI_TreeVector_ImportFromNetCDF :Version is compiled without NETCDF support. This function should never have been called!!!!\n"); + exit(-1); +#endif + + + +}; + + + +int CTAI_TreeVector_ExportToNetCDF(CTA_TreeVector treeVec, CTA_File hfile){ + + +#if HAVE_LIBNETCDF + + int retval; //Return value of a call + int timeDimid; + int oneDimid; + int ncid; + BOOL append; + BOOL isdefvar; + size_t start[1]; + int one; + int timeVarid; + int ntimeVarid; + int ntime; + + int idebug=0; + + if (idebug>0) {printf("CTAI_TreeVector_ExportToNetCDF: Export treevector to netcdf\n");} + + CTA_File_GetNetcdf(hfile,&ncid); + + /* Check whether the file already contains the time dimension */ + append=TRUE; + retval = nc_inq_dimid(ncid, "time", &timeDimid); + + if (retval != NC_NOERR){ + if (idebug>0) {printf("CTAI_TreeVector_ExportToNetCDF: First write to this file; ncid: %d\n",ncid);} + if (idebug>0) {printf("CTAI_TreeVector_ExportToNetCDF: creating time and ntime variables\n");} + + /* Hmm could not find it so let's create it */ + if ((retval = nc_def_dim(ncid, "time", NC_UNLIMITED, &timeDimid))) ERR(retval); + + if ((retval = nc_def_dim(ncid, "one", 1, &oneDimid))) ERR(retval); + + + append=FALSE; + + /* Define the dimensions and the array*/ + retval = nc_redef(ncid); + + /* Now create time and counter array */ + if ((retval = nc_def_var(ncid, "time", NC_INT, 1, &timeDimid, &timeVarid))) ERR(retval); + if ((retval = nc_def_var(ncid, "ntime", NC_INT, 1, &oneDimid, &ntimeVarid))) ERR(retval); + + retval = nc_enddef(ncid); + /* Set first value in time array */ + + start[0] = 0; + one = 1; + ntime = 1; + if ((retval = nc_put_var1_int (ncid, timeVarid, start, &one))) { + ERR(retval); + } + if ((retval = nc_put_var1_int (ncid, ntimeVarid, start, &ntime))) ERR(retval); + } + + /* Call the actual write routine */ + if ((retval = nc_inq_varid(ncid, "time", &timeVarid))) ERR(retval); + if ((retval = nc_inq_varid(ncid, "ntime", &ntimeVarid))) ERR(retval); + + start[0] = 0; + retval = nc_get_var1_int (ncid, ntimeVarid, start, &ntime); + + if (append) { + ntime=ntime+1; + start[0] = 0; + if ((retval = nc_put_var1_int (ncid, ntimeVarid, start, &ntime))) ERR(retval); + start[0] = ntime-1; + if ((retval = nc_put_var1_int (ncid, timeVarid, start, &ntime))) ERR(retval); + } + /* first write only the dimensions, if it is the first time */ + + isdefvar = TRUE; + + + if (idebug>0){printf("NOW 1st NETCDF sweep: writing dimensions\n");} + retval=CTAI_CTA_TreeVector_ToNetCDF(treeVec,"",ncid, timeDimid, append, ntime, isdefvar); + /* now end the definition, if it is the first time */ + + if (!append){ + if ((retval = nc_enddef(ncid))) ERR(retval); + } + + /* in the second sweep , actually write the data. */ + isdefvar = FALSE; + if (IDEBUG>0) {printf("NOW 2nd NETCDF sweep: writing data\n");} + retval=CTAI_CTA_TreeVector_ToNetCDF(treeVec,"",ncid, timeDimid, append, ntime, isdefvar); + + return CTA_OK; + +#else + printf("CTAI_TreeVector_ExportToNetCDF :Version is compiled without NETCDF support. This function should never have been called!!!!\n"); + exit(-1); +#endif + + + + +}; + + + +#undef METHOD +#define METHOD "Export" +int CTA_TreeVector_Export(CTA_TreeVector treevec1, CTA_Handle usrdata){ + int retval; //Return value of a call + CTAI_TreeVec *data1; //Tree-vector specific data + CTAI_TreeVec *data2; //subTree-vector specific data + int istate; // counter over all substates + FILE *file; //File pointer + int n; + int isnetcdf; + int locIDEBUG=0; + CTA_Datatype datatype; + BOOL excludefv; + + /* check handle and get data object */ + + if (IDEBUG>10){printf("\nCTA_TreeVector_Export Start\n");} + retval=CTA_Handle_Check((CTA_Handle) treevec1,CTA_TREEVECTOR); + if (locIDEBUG>0){printf("\nCTA_TreeVector_Export1 %d\n",retval);} + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_treevector handle"); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) treevec1,(void*) &data1); + if (locIDEBUG>0){printf("\nCTA_TreeVector_Export2 %d\n",retval);} + if (retval!=CTA_OK) return retval; + + CTA_Message_Quiet(CTA_TRUE); + if (CTA_Handle_Check(usrdata,CTA_FILE)==CTA_OK) { + CTA_Message_Quiet(CTA_FALSE); + CTA_File_IsNetcdf(usrdata,&isnetcdf); + if (isnetcdf==CTA_TRUE){ + if (locIDEBUG>10){printf("\nCTA_TreeVector_Export: NETCDF\n");} + return CTAI_TreeVector_ExportToNetCDF(treevec1, usrdata); + } + else { + if (locIDEBUG>10){printf("\nCTA_TreeVector_Export: NOT :NETCDF\n");} + retval=CTA_File_Get(usrdata,&file); + /* check whether this is a leaf */ + if (data1->nsubtrees==0){ + /* print tag of this state */ + fprintf(file,"%s=",data1->tag); + /* Print vector */ + if (data1->v) { + retval=CTA_Vector_Export(*data1->v, usrdata); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error using CTA_Vector_Export"); + return retval; + } + } + /* print metainfo, if available */ + if (locIDEBUG>0 || 1 ) { + if (data1->metainfo) { + retval=CTA_Metainfo_Export(*data1->metainfo,usrdata); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error using CTA_Metainfo_Export"); + return retval; + } + } + } + } + else { + + /* first print metainfo of node, if available */ + if (data1->metainfo) { + if (locIDEBUG>1) {printf("METAINFO node: %p \n ",data1->metainfo);} + retval=CTA_Metainfo_Export(*data1->metainfo,usrdata); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error using CTA_Metainfo_Export"); + return retval; + } + } + + /* export sub-states */ + for (istate=0;istatensubtrees;istate++){ + /* print tag of this state */ + retval=CTA_TreeVector_Export(data1->treevecs[istate], usrdata); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error using CTA_TreeVector_Export"); + return retval; + } + + /* Get data object of substate (in order to get its name) */ + retval=CTA_Handle_GetData((CTA_Handle) data1->treevecs[istate], + (void*) &data2); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + excludefv = data1->nocompute[istate]; + + fprintf(file,"%s.%s=%s;",data1->tag,data2->tag,data2->tag); + if (excludefv == FALSE) { + fprintf(file,"part %s\n",data2->tag); + } + else { + fprintf(file,"part %s, EXCLUDEFROMVECTOR\n",data2->tag); + } + fprintf(file,"\n"); + } + } + } + } /* next section: handle export to pack object */ + else if (CTA_Handle_Check(usrdata,CTA_PACK)==CTA_OK){ + CTA_Message_Quiet(CTA_FALSE); + if (locIDEBUG>0){printf("\nCTA_TreeVector_Export packing state\n");} + retval=CTA_Pack_Add(usrdata,data1,sizeof(CTAI_TreeVec)); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error using CTA_Pack_Add"); + return retval; + } + /* add metainfo, if available. This can be the case for both node and leaves. */ + if (data1->metainfo) { + //Now pack the metainfo + retval=CTA_Metainfo_Export(*data1->metainfo, usrdata); + if (retval!=CTA_OK) { + printf("cta_treevector_export: metainfo could not be added: %d\n",retval); + return retval; + } + } + if (data1->nsubtrees==0) { + if (data1->v) { + if (locIDEBUG>0){printf("\nCTA_TreeVector_Export packing leaf tag %s \n",data1->tag);} + retval=CTA_Vector_GetSize(*data1->v,&n); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get size of vector"); + return retval; + } + retval=CTA_Pack_Add(usrdata,&n,sizeof(int)); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error using CTA_Pack_Add"); + return retval; + } + retval=CTA_Vector_GetDatatype(*data1->v,&datatype); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get datatype of vector"); + return retval; + } + retval=CTA_Pack_Add(usrdata,&datatype,sizeof(CTA_Datatype)); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error using CTA_Pack_Add"); + return retval; + } + + if (locIDEBUG>0){printf("\nCTA_TreeVector_Export Export vector\n");} + retval=CTA_Vector_Export(*data1->v, usrdata); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error using CTA_Vector_Export"); + return retval; + } + if (locIDEBUG>0){printf("\nCTA_TreeVector_Export Export done\n");} + } + } else { + if (locIDEBUG>0){printf("\nCTA_TreeVector_Export Packing substates of %s\n",data1->tag);} + + /* add excludevector information */ + for (istate=0;istatensubtrees;istate++){ + retval=CTA_Pack_Add(usrdata,&data1->nocompute[istate],sizeof(int)); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error using CTA_Pack_Add"); + return retval; + } + } + + for (istate=0;istatensubtrees;istate++){ + + retval=CTA_TreeVector_Export(data1->treevecs[istate], usrdata); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error using CTA_TreeVector_Export"); + return retval; + } + } + } + } else { + CTA_Message_Quiet(CTA_FALSE); + return CTA_FORMAT_NOT_SUPPORTED; + } + if (IDEBUG>0){printf("CTA_TreeVector_Export End\n");} + return CTA_OK; +}; + + +#undef METHOD +#define METHOD "Import" +int CTA_TreeVector_Import(CTA_TreeVector treevec1, CTA_Handle usrdata){ + int retval; //Return value of a call + CTAI_TreeVec *data1; //Tree-vector specific data + int istate; // counter over all substates + BOOL packout, isfile; + + CTA_Datatype datatype; + int nsubtrees; + CTA_Vector *v; + CTA_TreeVector *treevecs; + CTA_Metainfo *minfo; + BOOL *nocompute; + int n; + int isnetcdf; + char oldtag[CTA_STRLEN_TAG]; + int locIDEBUG = 0; + + + if (locIDEBUG>4) { + printf("\nCTA_TreeVector_Import: Start\n"); + } + /* check handle and get data object */ + retval=CTA_Handle_Check((CTA_Handle) treevec1,CTA_TREEVECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_treevector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) treevec1,(void*) &data1); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + CTA_Message_Quiet(CTA_TRUE); + packout = (CTA_Handle_Check(usrdata,CTA_PACK)==CTA_OK); + CTA_Message_Quiet(CTA_FALSE); + if (packout){ + + if (locIDEBUG>4) { + printf("CTA_TreeVector_Import: importing from pack array %d \n",usrdata ); + } + /* Store some data of current state */ + nsubtrees=data1->nsubtrees; + v=data1->v; + treevecs=data1->treevecs; + nocompute=data1->nocompute; + minfo=data1->metainfo; + strcpy(oldtag,data1->tag); + /* unpack the administration */ + retval=CTA_Pack_Get(usrdata,data1,sizeof(CTAI_TreeVec)); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + if (locIDEBUG>9) { + printf("CTA_TreeVector_Import: data->tag=%s while original tag was %s\n ",data1->tag, oldtag); + printf("CTA_TreeVector_Import: data->nsubtrees=%d\n",data1->nsubtrees); + printf("CTA_TreeVector_Import: data->v =%p\n",data1->v); + printf("CTA_TreeVector_Import: data->metainfo =%p\n",data1->metainfo); + } + + if (strlen(oldtag)>0 && strcmp(oldtag,"unknown")!=0) { strcpy(data1->tag, oldtag) ;} + + /* unpack metainfo */ + if (minfo && !data1->metainfo) { // remove existing meta-info + retval = CTA_Metainfo_Free(minfo); + }; + + if (data1->metainfo) { + /* Check first if a metainfo of the receiving treevector already exists. + If not, create it first. */ + + if (!minfo) { + if (locIDEBUG>4) { + printf("receiving state has no metainfo yet \n"); + printf("old minfo: %p, data1->metainfo: %p\n",minfo,data1->metainfo); + } + //CTA_Malloc? + data1->metainfo = CTA_Malloc(sizeof(CTA_Metainfo)); + retval=CTA_Metainfo_Create(data1->metainfo); + + if (locIDEBUG>4){printf("... therefore it is created. %d \n",retval);} + } else{ //data1->metainfo and minfo + + data1->metainfo = minfo; + } + + retval=CTA_Metainfo_Import(*data1->metainfo,usrdata); + if (retval!=CTA_OK) { + printf("cta_treevector_import: could not get metainfo : %d\n",retval); + return retval; + } + } + + + /* unpack size and datatype of vector */ + if (data1->v) { + retval=CTA_Pack_Get(usrdata,&n,sizeof(int)); + retval=CTA_Pack_Get(usrdata,&datatype,sizeof(CTA_Datatype)); + } + /* do both have a vector (leaf) */ + if (!v && data1->v) { + if (locIDEBUG>4) { + printf("CTA_TreeVector_Import: this is a leaf\n"); + } + /* we need to create a leaf-vector */ + data1->v=CTA_Malloc(sizeof(CTA_Vector)); + /* NOTE SIMPLIFICATION.. CREATING CTA_DEFAULT_VECTOR !! */ + + retval=CTA_Vector_Create(CTA_DEFAULT_VECTOR, n, datatype, CTA_NULL, data1->v); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot create Vector"); + return retval; + } + } else if (v && !data1->v) { + retval=CTA_Vector_Free(v); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot free vector"); + return retval; + } + } else { + data1->v=v; + } + + /* do we have substates */ + if (nsubtrees==data1->nsubtrees) { + data1->treevecs=treevecs; + data1->nocompute=nocompute; /*CVV*/ + + } else { + /* Deallocate all substates (if there are any) */ + if (treevecs) { + for (istate=0;istatensubtrees>0) { + data1->treevecs=CTA_Malloc(sizeof(CTA_TreeVector)*data1->nsubtrees); + data1->nocompute=CTA_Malloc(sizeof(BOOL)*data1->nsubtrees); + for (istate=0;istatensubtrees;istate++){ + retval=CTA_TreeVector_Create("import_state", "unknown", + &data1->treevecs[istate]); + data1->nocompute[istate] = FALSE; + } + } + } + /* unpack all data */ + if (data1->nsubtrees>0) { + if (locIDEBUG>0) { + printf("CTA_TreeVector_Import: this is a node\n"); + } + /* Unpack all substates */ + + for (istate=0;istatensubtrees;istate++){ + retval=CTA_Pack_Get(usrdata,&data1->nocompute[istate],sizeof(int)); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error using CTA_Pack_Get"); + return retval; + } + } + + for (istate=0;istatensubtrees;istate++){ + retval=CTA_TreeVector_Import(data1->treevecs[istate], usrdata); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error using CTA_TreeVector_Import"); + return retval; + } + } + } else if (data1->v) { + retval=CTA_Vector_Import(*data1->v,usrdata); + } + } else { + CTA_Message_Quiet(CTA_TRUE); + isfile = (CTA_Handle_Check(usrdata,CTA_FILE)==CTA_OK) ; + CTA_Message_Quiet(CTA_FALSE); + if (isfile) { + CTA_File_IsNetcdf(usrdata,&isnetcdf); + if (isnetcdf==CTA_TRUE){ + return CTAI_TreeVector_ImportFromNetCDF(treevec1, usrdata); + } else { + return CTA_FORMAT_NOT_SUPPORTED; + } + } else { + return CTA_FORMAT_NOT_SUPPORTED; + } + } + if (locIDEBUG>4) { + printf("CTA_TreeVector_Import: End\n"); + } + return CTA_OK; +}; + + + +#undef METHOD +#define METHOD "Free" +int CTA_TreeVector_Free(CTA_TreeVector *treevec, int recursive){ + + int retval; //Return value of a call + CTAI_TreeVec *data; //Tree-vector specific data + int istate; //Counter over substates + int refCount; //Number of referencs to this treevector + + + /* Check for quick return */ + if (*treevec==CTA_NULL) return CTA_OK; + + /* Get the reference count */ + refCount=1; + CTA_Handle_GetRefCount(*treevec,&refCount); + CTA_Handle_DecrRefCount(*treevec); + + retval=CTA_Handle_GetData((CTA_Handle) *treevec,(void*) &data); + if (retval) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + // We are in a leaf + if (data->nsubtrees==0){ + + if (refCount<=1){ + if (data->v){ + retval=CTA_Vector_Free(data->v); + if (retval) { + CTA_WRITE_ERROR("Cannot free vector"); + return retval; + } + } + if (data->metainfo){ + retval=CTA_Metainfo_Free(data->metainfo); + if (retval) { + CTA_WRITE_ERROR("Cannot free metainfo"); + return retval; + } + } + } + + } + else { + + if (recursive){ + for (istate=0;istatensubtrees;istate++){ + + retval=CTA_TreeVector_Free(&data->treevecs[istate], recursive); + if (retval) { + CTA_WRITE_ERROR("Cannot free treevector"); + return retval; + } + } + } + + + } + if (refCount<=1){ + if (data->v) free(data->v); + if (data->nocompute) free(data->nocompute); + if (data->treevecs) free(data->treevecs); + free(data); + retval=CTA_Handle_Free((CTA_Handle*) treevec); + if (retval) { + CTA_WRITE_ERROR("Cannot free handle"); + return retval; + } + } + + return CTA_OK; +}; + + +#undef METHOD +#define METHOD "Info" +int CTA_TreeVector_Info(CTA_TreeVector treevec){ + + int retval; //Return value of a call + CTAI_TreeVec *data; + int istate; // counter over all substates + int n; + + /* check handle and get data object */ + retval=CTA_Handle_Check((CTA_Handle) treevec,CTA_TREEVECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_treevector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + printf("State-vector information:\n"); + printf("tag :%s\n",data->tag); + printf("nsubtrees :%d\n",data->nsubtrees); + if (data->nsubtrees>0){ + printf("--> nocompute :"); + for (istate=0;istatensubtrees;istate++){ + printf("%d ",data->nocompute[istate]); + } + printf("\n"); + } + + if (data->nsubtrees==0) { + printf("leaf :yes\n"); + n=0; + if (data->v) { + retval=CTA_Vector_GetSize(*data->v,&n); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get size of vector"); + return retval; + } + } + printf("dim :%d\n",n); + } + else { + for (istate=0;istatensubtrees;istate++){ + retval=CTA_TreeVector_Info(data->treevecs[istate]); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve info from treevector"); + return retval; + } + } + } + return CTA_OK; +}; + + +#undef METHOD +#define METHOD "OpOnLeafs" +int CTA_TreeVector_OpOnLeafs( + CTA_TreeVector treevec1, // (First) input treevector + CTA_TreeVector treevec2, // If (input value == empty treevector): + // output treevector + // Else if (input value != CTA_NULL) + // second input treevector + CTA_Func op, // Function to be called for every leaf + CTA_Handle arg) // Extra arguments +{ + int retval; //Return value of a call + + CTA_Function * operator; // the operator given in the input + CTAI_TreeVec *data1, *data2; // contents of the treevectors + int istate; // counter over all substates + BOOL empty2=FALSE; // treevector2 is empty yes/no + CTA_Vector v2; + CTA_TreeVector * t2array; + CTA_TreeVector t2; + int nsubtrees; + char tag2[CTA_STRLEN_TAG]; + + int jdebug=0; + if (jdebug) + { + printf("CTA_TreeVector_OpOnLeafs: start of function %d %d\n", + treevec1, treevec2 ); + } + + /* check handle and get data object */ + retval=CTA_Handle_Check((CTA_Handle) treevec1,CTA_TREEVECTOR); + if (retval!=CTA_OK) + { + char message[1024]; + sprintf(message,"CTA_TreeVector_OpOnLeafs: handle %d is not a TreeVector", + treevec1); + CTA_WRITE_ERROR(message); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) treevec1,(void*) &data1); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + if (treevec2 != CTA_NULL) + { + retval=CTA_Handle_Check((CTA_Handle) treevec2,CTA_TREEVECTOR); + if (retval!=CTA_OK) + { + char message[1024]; + sprintf(message,"CTA_TreeVector_OpOnLeafs: handle %d is not a TreeVector", + treevec2); + CTA_WRITE_ERROR(message); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) treevec2,(void*) &data2); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + } + + if (jdebug) + { + printf("CTA_TreeVector_OpOnLeafs: input is OK"); + } + + /* consistency check */ + if (treevec2 == CTA_NULL) + { + // There is no second treevector + } + if (data2->nsubtrees==0 && data2->v == NULL) + { + // The second treevector is an empty treevector: fill it + // with the output. + empty2 = TRUE; + } + else if (data1->nsubtrees!=data2->nsubtrees) + { + return CTA_TREEVECTORS_NOT_COMPATIBLE; + } + + if (data1->nsubtrees==0) + { + /* No subtrees: this treevector is a leaf treevector + call operator */ + retval = CTA_Func_GetFunc(op, &operator); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error using CTA_Func_GetFunc"); + return retval; + } + + if (treevec2 == CTA_NULL) + { + operator(data1->tag, *data1->v, CTA_NULL, arg, &retval); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error using operator ..."); + return retval; + } + } + else if (empty2) + { + operator(data1->tag, *data1->v, &v2, arg, &retval); + if (retval!=CTA_OK) + { + char message[1024]; + sprintf(message, "CTA_TreeVector_OpOnLeafs: operator %d kapot",op); + CTA_WRITE_ERROR(message); + return retval; + } + if (jdebug) + { + printf("setting vector %d into treevector %d\n",treevec2,v2); + } + retval = CTA_TreeVector_SetVec(treevec2, v2); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot set vector in treevector"); + return retval; + } + } + else + { + operator(data1->tag, *data1->v, *data2->v, arg, &retval); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error using operator ..."); + return retval; + } + } + } + else + { + nsubtrees = data1->nsubtrees; + if (empty2) + { + if (jdebug) + { + printf("CTA_TreeVector_OpOnLeafs: creating %d subleafs\n", + nsubtrees); + } + t2array = CTA_Malloc(nsubtrees*sizeof(CTA_TreeVector)); + } + + /* recursion for all sub-states */ + for (istate=0; istatetreevecs[istate], tag2); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get tag from treevector"); + return retval; + } + + if (jdebug) + { + printf("CTA_TreeVector_OpOnLeafs: creating subleaf %d: '%s'\n", + istate, tag2); + } + + retval=CTA_TreeVector_Create( tag2, tag2, &t2array[istate]); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot create TreeVector"); + return retval; + } + + t2 = t2array[istate]; + } + else + { + t2 = data2->treevecs[istate]; + } + + retval=CTA_TreeVector_OpOnLeafs( + data1->treevecs[istate], t2, op, arg); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error using CTA_TreeVector_OpOnLeafs"); + return retval; + } + } + + if (empty2) + { + if (jdebug) + { + printf("CTA_TreeVector_OpOnLeafs: concatenatng %d leafs\n", + nsubtrees); + } + retval=CTA_TreeVector_Conc( treevec2, t2array, nsubtrees); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error using CTA_TreeVector_COnc"); + return retval; + } + + free(t2array); + } + } + return CTA_OK; +} + + +#undef METHOD +#define METHOD "ElmSqrt" +int CTA_TreeVector_ElmSqrt(CTA_TreeVector y){ + + int retval; //Return value of a call + CTAI_TreeVec *data; + int istate; // counter over all substates + + /* check handle and get data object */ + retval=CTA_Handle_Check((CTA_Handle) y ,CTA_TREEVECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_treevector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) y,(void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + if (data->nsubtrees==0) + { + retval=CTA_Vector_ElmSqrt(*(data->v)); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error using CTA_Vector_ElmSqrt"); + return retval; + } + + } else { + /* recursion for all sub-states */ + for (istate=0;istatensubtrees;istate++){ + retval=CTA_TreeVector_ElmSqrt(data->treevecs[istate]); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error using CTA_TreeVector_ElmSqrt"); + return retval; + } + } + } + return CTA_OK; +} + + +void CTAI_Treevector_Operation_ScaledRMS( + char *tag, + CTA_Vector v1, + CTA_Vector vscal, + CTA_Handle hdum, int *retval) +{ + + int jdebug; + CTA_Vector vwrk; + double dv2, ds2, drms; + + jdebug=0; + + if (jdebug){ + printf("CTAI_Treevector_Operation_ScaledRMS computing rms "); + printf("of leaf of treevector\n"); + } + + /* create working copy of the vector: vwrk = v1; */ + *retval=CTA_Vector_Duplicate(v1,&vwrk); + if (*retval!=CTA_OK) return; + + /* Scale the vector vwrk = v1.*vscal */ + *retval=CTA_Vector_ElmProd(vwrk,vscal); + if (*retval!=CTA_OK) return; + + /* compute sum of squares */ + /* dv2 = | vwrk |^2 = sum( (v1 .* vscal).^2); */ + *retval=CTA_Vector_Nrm2(vwrk,&dv2); + if (*retval!=CTA_OK) return; + dv2=dv2*dv2; + + /* Delete work vector */ + *retval=CTA_Vector_Free(&vwrk); + if (*retval!=CTA_OK) return; + + /* compute scaling vector's sum-of-squares */ + /* ds2 = | vscal |^2 = sum( vscal.^2); */ + *retval=CTA_Vector_Nrm2(vscal,&ds2); + if (*retval!=CTA_OK) return; + ds2=ds2*ds2; + + if (jdebug) + { + printf("scaling vector's sum-of-squares is %g\n",ds2); + printf("rms=sqrt(%g/%g)\n",dv2,ds2); + } + + /* compute RMS */ + drms=sqrt(dv2/ds2); + + /* output the RMS */ + printf(" %32s: %18.8le\n", tag, drms); +} + + + +void CTAI_Treevector_Operation_ScaledSSQ( + char *tag, + CTA_Vector v1, + CTA_Vector vscal, + CTA_Handle hdum, int *retval) +{ + + int jdebug; + CTA_Vector vwrk; + double dv2; + + jdebug=0; + + if (jdebug){ + printf("CTAI_Treevector_Operation_ScaledSSQ computing "); + printf("ssq of leaf of treevector\n"); + } + + /* create working copy of the vector: vwrk = v1; */ + *retval=CTA_Vector_Duplicate(v1,&vwrk); + if (*retval!=CTA_OK) return; + + /* Scale the vector vwrk = v1.*vscal */ + *retval=CTA_Vector_ElmProd(vwrk,vscal); + if (*retval!=CTA_OK) return; + + /* compute sum of squares */ + /* dv2 = | vwrk |^2 = sum( (v1 .* vscal).^2); */ + *retval=CTA_Vector_Nrm2(vwrk,&dv2); + if (*retval!=CTA_OK) return; + dv2=dv2*dv2; + + /* Delete work vector */ + *retval=CTA_Vector_Free(&vwrk); + if (*retval!=CTA_OK) return; + + /* output the SSQ */ + printf(" %32s: %18.8le\n", tag, dv2); +} + + + + + + +void CTAI_Treevector_Operation_Amax( + char *tag, + CTA_Vector vin, + CTA_Vector *vout, + CTA_Handle hdum, int *retval) +{ + + int jdebug=0; + int iloc; + + if (jdebug){ + printf("CTAI_Treevector_Operation_Amax "); + printf("finding maxabs of leaf of treevector %d\n",vin); + } + + *retval = CTA_Vector_Amax(vin, &iloc); + if (*retval!=CTA_OK) return; + + if (jdebug){ + printf("CTAI_Treevector_Operation_Amax "); + printf("maxabs found at %d\n",iloc); + } + + *retval = CTA_Vector_Create( CTA_DEFAULT_VECTOR, 1, CTA_INTEGER, + CTA_NULL, vout); + if (*retval!=CTA_OK) return; + + *retval = CTA_Vector_SetVals( *vout, &iloc, 1, CTA_INTEGER); + if (*retval!=CTA_OK) return; + + if (jdebug){ + printf("CTAI_Treevector_Operation_Amax: succesful ending\n"); + *retval = CTA_Vector_Export( *vout, CTA_FILE_STDOUT); + if (*retval!=CTA_OK) return; + } + +} + +void CTAI_Treevector_Operation_PrintEntry( + char *tag, + CTA_Vector vin, + CTA_Vector viloc, + CTA_Handle hdum, int *retval) +{ + + int jdebug; + double value; + int iloc; + double NaN; + + // Create a NaN + NaN = 0; NaN = 1.0/NaN; NaN=NaN/NaN; + + + jdebug=0; + + if (jdebug){ + printf("CTAI_Treevector_Operation_PrintEntry "); + printf("print an entry of leaf of treevector\n"); + } + + *retval = CTA_Vector_GetVal(viloc, 1, &iloc, CTA_INTEGER); + if (*retval!=CTA_OK) return; + + if (iloc==0){ + value=NaN; + } + else { + *retval = CTA_Vector_GetVal(vin, iloc, &value, CTA_DOUBLE); + if (*retval!=CTA_OK) return; + } + + /* output the location and value */ + printf(" %32s(%6d): %18.8le\n", tag, iloc, value); + + if (jdebug){ + printf("CTAI_Treevector_Operation_PrintEntry: succesful ending\n"); + } + +} + + + + + + + + + +#undef METHOD +#define METHOD "Gemm" +int CTA_TreeVector_Gemm(CTA_TreeVector *hC, int nc, int transa, int transb, double alpha, CTA_TreeVector *hA, int na, + CTA_Matrix mB, double beta){ + int ma, mb, mc, nb, m; + double *B, fac; + double nul=0.0; + int ierr, ic, i, j, indx, ia; + + if (IDEBUG>10) {printf("debug: entering cta_treevector_gemm\n");} + + /* check dimensions */ + if (transa) return CTA_NOT_YET_SUPPORTED; + + ierr=CTA_TreeVector_GetSize(hC[0],&mc); + for (ic=1;ic10) {printf("debug: cta_treevector_gemm END \n");} + + free(B); + return CTA_OK; +} + + +/* -------------------------- */ +void XML_newline(int spc, xmlTextWriter *writer){ + int i; + char spaces[80]; + char str2[5]; + strcpy(str2," "); + strcpy(spaces,""); + + for (i=0; itag); + + /* Write name */ + xmlTextWriterWriteAttribute(writer, CTAI_XML_CAPTION, (xmlChar *) data->name); + + /* write excludefromvector. this information is available as argument! */ + if (isxfv) { + xmlTextWriterWriteAttribute(writer, CTAI_XML_EXCLUDEFROMVECTOR, (xmlChar *) "true" ); + } + + level = level + 3; XML_newline(level,writer); + xmlTextWriterStartElement(writer, (xmlChar *) "description"); + + /* Check for meta-info */ + if (data->metainfo) { + CTA_Metainfo_GetDescription(*data->metainfo,&desctext); + xmlTextWriterWriteString(writer, (xmlChar *) desctext); + xmlTextWriterEndElement(writer); XML_newline(level,writer); + + } else { + /* write (empty) description */ + xmlTextWriterWriteString(writer, (xmlChar *) "no description yet"); + xmlTextWriterEndElement(writer); XML_newline(level,writer); + } + + /* If we have a vector write the vector */ + if (data->v) { + CTAI_XML_WriteVector(*data->v, "","",*data->metainfo,level+3, writer); + } else { + for (isub=0;isubnsubtrees;isub++){ + subtreevector = data->treevecs[isub]; + /* determine nocompute of substate */ + subisxfv = 0; + if (data->nocompute[isub] == TRUE) {subisxfv = 1;} + + CTA_Handle_Check((CTA_Handle) subtreevector,CTA_TREEVECTOR); + CTA_Handle_GetData((CTA_Handle) subtreevector,(void*) &subdata); + /* If the substate is a leaf, write the vector, i.e. skip the statelevel */ + if (subdata->v) { + + if (subdata->metainfo) { + // printf("!!!calling xml_writevector; sub-metainfo: %s %d\n",subdata->tag, *subdata->metainfo); + CTAI_XML_WriteVector(*subdata->v, subdata->tag, subdata->name,*subdata->metainfo,level+3, writer); + // here: possibly add state for coordinates + } + if (!subdata->metainfo) { + // printf("!!!calling xml_writevector; NO sub-metainfo: %s %d\n",subdata->tag,subdata->metainfo); + CTAI_XML_WriteVector(*subdata->v, subdata->tag, subdata->name,0,level+3,writer);} + + } else { + CTAI_XML_WriteTreeVector(1,subisxfv,subtreevector, level, writer); + } + } + } + level=level-3;XML_newline(level,writer); xmlTextWriterEndElement(writer); + XML_newline(level,writer); +} + + +/* -------------------------------------------------------------------- */ +/** \OBSOLETE brief Generate XML from one COSTA state vector +* +* \param hvec I handle of a COSTA state vector +* \param writer I the XML text writer +*/ +void CTAI_XML_WriteState(CTA_TreeVector treevec, xmlTextWriter *writer) { + CTAI_TreeVec *data; //Tree-vector specific data + const char* name; + int isub; + + /* check handle and get data object */ + CTA_Handle_Check((CTA_Handle) treevec,CTA_TREEVECTOR); + CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data); + + /* Check for meta-info */ + if (data->metainfo) { + printf("WARNING in CTAI_XML_WriteState\n"); + printf("State-vector contains META information\n"); + printf("XML-export for META information is not (yet) available \n"); + } + + /* Start an element the the name of the tree handle */ + xmlTextWriterStartElement(writer, (xmlChar *) CTAI_Type2String(CTA_TREEVECTOR)); + + /* Write id (if any) */ + name = CTAI_Handle_GetName(treevec); + if (name && *name) { + xmlTextWriterWriteAttribute(writer, CTAI_XML_ID, (xmlChar *) name); + } + + /* Write name */ + xmlTextWriterWriteAttribute(writer, CTAI_XML_NAME, (xmlChar *) data->name); + + /* Write tag */ + xmlTextWriterWriteAttribute(writer, CTAI_XML_TAG, (xmlChar *) data->tag); + + /* If we a vector write the vector */ + if (data->v) { + CTAI_XML_WriteVector(*data->v,"", "", CTA_NULL, 0,writer); + } else { + for (isub=0;isubnsubtrees;isub++){ + + CTAI_XML_WriteState(data->treevecs[isub], writer); + } + } + xmlTextWriterEndElement(writer); +} + +//------------------------------ + +/** \brief Create a treeVector from XML. +* +* \param cur_node I Current XML node +* \return Handle to create or CTA_NULL in case of an error. +*/ +CTA_TreeVector CTAI_XML_CreatetreeVector(xmlNode *cur_node) { + + CTA_TreeVector treevec; /* new state vector */ + CTA_Vector hsubvec; + xmlChar *name = NULL; /* name of the vector to create */ + xmlChar *tag; /* element/property value */ + xmlNode *sub_node = NULL; /* values child node */ + xmlNode *txt_node = NULL; /* values child node */ + int nsubstates; /* number of sub state vectors */ + int nsubvectors; /* number of subvectors */ + CTA_TreeVector *substates; /* Array of handles of sub state vectors */ + int isub; /* loop counter in substates */ + xmlChar *excludefromvector = NULL; /* name of the vector to create */ + char *desctext = NULL; + CTA_Metainfo minfo; + CTA_Metainfo sub_minfo; + BOOL *excludefv; + /* Parse this node's attributes */ + /* Get id */ + + + /* Get name */ + name = xmlGetProp(cur_node, CTAI_XML_CAPTION); + if (!name){ + name= (xmlChar*) CTA_Malloc(5*sizeof(xmlChar)); + strcpy((char *) name, (char *) "none"); + } + + /* Get tag */ + tag = xmlGetProp(cur_node, CTAI_XML_ID); + + /* Create the new state-vector */ + CTA_TreeVector_Create((char *) name, (char *) tag, &treevec); + + /* Create metainfo */ + CTA_Metainfo_Create(&minfo); + + /* Set id (=name) of handle */ + CTAI_Handle_SetName(treevec, "tree-vector"); + + /* Look for a Vector */ + /* Count number of "TREEVECTOR"-elements */ + nsubstates=0; nsubvectors=0; + for (sub_node = cur_node->children; sub_node; sub_node = sub_node->next) { + if (0 == strcmp("description",(char *) sub_node->name)) { + + for (txt_node = sub_node->children; txt_node; txt_node = txt_node->next) { + if (txt_node->type == XML_TEXT_NODE) { + if (desctext !=NULL) { + free(desctext); + } + desctext = (char *) CTA_Malloc((1+strlen((char *) txt_node->content))*sizeof(char)); + strcpy( desctext, (char *) txt_node->content); + CTA_Metainfo_SetDescription(minfo, (char *) txt_node->content); + + // controle + // retval = CTA_Metainfo_GetDescription(minfo,&desctext); + // printf("controle: desctext: %s\n",desctext); + // free(desctext); + // free(minfo); + } + } + /* attach the metainfo (with only the description) to this state */ + /* note: grid and unit information are in a metainfo on a lower level: */ + /* in the state where the actual vector is */ + CTA_TreeVector_SetMetainfo(treevec, minfo); + + } + else if (CTA_VECTOR == CTAI_String2Type((char *) sub_node->name)) { + /* if a treeVectorLeaf is found */ + // now COSTA needs to build an extra state-level at this point. + nsubvectors++; + } + else if (CTA_TREEVECTOR == CTAI_String2Type((char *) sub_node->name)) { + nsubstates++; + } else { + /* continue */ + } + } + + + isub = 0; + if (nsubvectors+nsubstates>0) { + substates=CTA_Malloc((nsubvectors+nsubstates)*sizeof(CTA_TreeVector)); + excludefv=CTA_Malloc((nsubvectors+nsubstates)*sizeof(BOOL)); + + /* first, loop along all subvectors (COSTA-states with one vector) + We need therefore to make an extra state */ + if (nsubvectors>0) { + for (sub_node = cur_node->children; sub_node; sub_node = sub_node->next) { + + if (CTA_VECTOR == CTAI_String2Type((char *) sub_node->name)) { + + /* Get name */ + name = xmlGetProp(sub_node, CTAI_XML_CAPTION); + if (!name){ + name= (xmlChar *) CTA_Malloc(5*sizeof(xmlChar)); strcpy((char *) name,"none"); + } + + /* Get tag */ + tag = xmlGetProp(sub_node, CTAI_XML_ID); + + /* get excludefromVector */ + excludefromvector = xmlGetProp(sub_node, CTAI_XML_EXCLUDEFROMVECTOR); + excludefv[isub] = FALSE; + + if (excludefromvector) { //it exists + if (0 == strcmp("true", (char *) excludefromvector)) { + excludefv[isub] = TRUE; + } + } + + + CTA_Metainfo_Create(&sub_minfo); + hsubvec = CTAI_XML_CreateVector_New(sub_node,sub_minfo); + + /* now make the extra state to attach the vector and the metainfo */ + /* essentially, we create an extra layer that the xml-description does not need */ + CTA_TreeVector_Create((char *) name, (char *) tag, &substates[isub]); + CTA_TreeVector_SetVec(substates[isub],hsubvec); + + /* here the metainfo with grid should be set */ + CTA_TreeVector_SetMetainfo(substates[isub], sub_minfo); + + isub++; + + } + } + + } + + /* now, add the treevectorsleafs to substatepositions isub+1..(nsubvectors+nsubstates) + (these are COSTA substates) */ + if (nsubstates>0) { + for (sub_node = cur_node->children; sub_node; sub_node = sub_node->next) { + if (CTA_TREEVECTOR == CTAI_String2Type((char *) sub_node->name)) { + + /* get excludefromVector */ + excludefromvector = xmlGetProp(sub_node, CTAI_XML_EXCLUDEFROMVECTOR); + excludefv[isub] = FALSE; + if (excludefromvector) { //it exists + if (0 == strcmp("true", (char *) excludefromvector)) { + excludefv[isub] = TRUE; + } + } + + substates[isub]= CTAI_XML_CreatetreeVector(sub_node); + isub++; + } + } + + } + CTA_TreeVector_Conc(treevec, substates, nsubvectors+nsubstates); + /* fill the excludefromvector list */ + CTAI_TreeVector_ExcludeFromVector(treevec,excludefv); + // CTA_TreeVector_Export(treevec,CTA_FILE_STDOUT); + free(substates); + } + + xmlFree(name); + xmlFree(tag); + xmlFree(excludefromvector); + + return treevec; +} +//------------------------ +/* sweep through the treevector and fill the regular grids using the references to its own + (nocompute)-coordinate-vectors */ +int CTAI_XML_TreeVector_regular_grid(CTA_TreeVector treevec, CTA_TreeVector root ){ + int retval, istate; + CTA_TreeVector coords; + CTAI_TreeVec *data; //Tree-vector specific data + CTAI_Gridm thisgrid; + char *tag0,*tag; + int ndims_phys, j,ncoords, found; + double *vals = NULL; + + /* Parse the whole tree and look for grids */ + + /* check handle and get data object */ + retval=CTA_Handle_Check((CTA_Handle) treevec,CTA_TREEVECTOR); + if (retval!=CTA_OK) return retval; + retval=CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data); + if (retval!=CTA_OK) return retval; + + if (data->metainfo) { + retval = CTA_Metainfo_GetGrid(*data->metainfo,&thisgrid); + if (thisgrid.type > -4 && thisgrid.type <0) { // reference to coords; not yet filled in + ndims_phys = -thisgrid.type; + + /* the first dimension (x) */ + tag0 = strrchr(thisgrid.refdimp[1],'/'); + if (tag0 == NULL) { + tag = thisgrid.refdimp[1];} + else { + tag = tag0 + 1 ; + } + // printf("ctai-xml-tv_reg_grid: searching for tag |%s| \n",tag); + retval = CTA_TreeVector_GetSubTreeVec(root, tag, &coords); + if (retval != CTA_OK) { + printf("ctai_xml_treevector_regular_grid: tag %s not found! \n",tag);exit(-1); + } + retval = CTA_TreeVector_GetSize(coords,&ncoords); + if (ncoords != thisgrid.nsize) { + printf("ctai_xml_treevector_regular_grid: number of coordinates %d is wrong! \n",ncoords);exit(-1); + } + vals = CTA_Malloc(ncoords*sizeof(double)); // allocate for first time + retval = CTA_TreeVector_GetVals(coords, vals, ncoords, CTA_DOUBLE); + thisgrid.x_origin = vals[0]; + for (j=2; j<= ncoords; j++) { + if (abs(vals[j-1] - vals[0]) < 1.0E-5) {break;} + } + thisgrid.nx = j-1; + thisgrid.dx = vals[1]-vals[0]; + if (j >= ncoords) { + if (ndims_phys > 1) {printf("for regular grids a repeating x-sequence is required!\n"); exit(-1);} + thisgrid.nx=ncoords; + } + + /* now, the second dimension (y) */ + if (ndims_phys > 1) { + + tag0 = strrchr(thisgrid.refdimp[2],'/') ; + if (tag0 == NULL) { + tag = thisgrid.refdimp[1];} + else { + tag = tag0 + 1 ; + } + // printf("ctai-xml-tv_reg_grid: searching for tag |%s| \n",tag); + retval = CTA_TreeVector_GetSubTreeVec(root, tag, &coords); + if (retval != CTA_OK) { + printf("ctai_xml_treevector_regular_grid: tag %s not found! \n",tag); + exit(-1); + } + retval = CTA_TreeVector_GetSize(coords,&ncoords); + if (ncoords != thisgrid.nsize) { + printf("ctai_xml_treevector_regular_grid: number of coordinates %d is wrong! \n",ncoords);exit(-1); + } + retval = CTA_TreeVector_GetVals(coords, vals, ncoords, CTA_DOUBLE); + thisgrid.y_origin = vals[0]; + found = 0; + for (j=1; j < ncoords/thisgrid.nx; j++) { + if (abs(vals[0+j*thisgrid.nx] - vals[0]) < 1.0E-5) {found=1; break;} + } + if (found == 1){ + thisgrid.ny = j; + } else { + thisgrid.ny = ncoords/thisgrid.nx; + if (ndims_phys > 2) {printf("for regular grids a repeating y-sequence is required!\n"); exit(-1);} + } + thisgrid.dy = vals[0+thisgrid.nx]-vals[0]; + } + /* now, the third and last dimension (z) */ + if (ndims_phys > 2) { + tag0 = strrchr(thisgrid.refdimp[3],'/') ; + if (tag0 == NULL) { + tag = thisgrid.refdimp[1];} + else { + tag = tag0 + 1 ; + } + //printf("ctai-xml-tv_reg_grid: searching for tag |%s| \n",tag); + retval = CTA_TreeVector_GetSubTreeVec(root, tag, &coords); + if (retval != CTA_OK) { + printf("ctai_xml_treevector_regular_grid: tag %s not found! \n",tag); + exit(-1); + } + retval = CTA_TreeVector_GetSize(coords,&ncoords); + if (ncoords != thisgrid.nsize) { + printf("ctai_xml_treevector_regular_grid: number of coordinates %d is wrong! \n",ncoords);exit(-1); + } + retval = CTA_TreeVector_GetVals(coords, vals, ncoords, CTA_DOUBLE); + thisgrid.z_origin = vals[0]; + found = 0; + for (j=2; j <= thisgrid.nx*thisgrid.ny; j++) { + if (abs(vals[j-1] - vals[0]) > 1.0E-5) {found=1;} + } + if (found == 0){ + thisgrid.nz = ncoords / (thisgrid.nx*thisgrid.ny); + } else { + printf("for regular grids a repeating y-sequence is required!\n"); exit(-1); + } + thisgrid.dz = vals[0+thisgrid.nx*thisgrid.ny]-vals[0]; + + } + thisgrid.type = ndims_phys; + retval = CTA_Metainfo_SetGrid(*data->metainfo,&thisgrid); + // retval = CTA_Metainfo_Copy(minfo,*data->metainfo); + } + } + + + + + /* parse subtrees */ + for (istate=0;istatensubtrees;istate++){ + retval = CTAI_XML_TreeVector_regular_grid(data->treevecs[istate], root); + } + return CTA_OK; +} + + +//--------------------------- +int CTAI_TreeVector_ExcludeFromVector(CTA_TreeVector treevec, BOOL *excludefv){ + + int retval; //Return value of a call + CTAI_TreeVec *data; + int istate; // counter over all substates + + + /* check handle and get data object */ + retval=CTA_Handle_Check((CTA_Handle) treevec,CTA_TREEVECTOR); + if (retval!=CTA_OK) return retval; + retval=CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data); + if (retval!=CTA_OK) return retval; + + if (data->nsubtrees==0) { + printf("CTAI:TreeVector_ExcludeFromVector: ERROR: no substates available.\n"); + return CTA_CONCAT_NOT_POSSIBLE; + } + else { + for (istate=0;istatensubtrees;istate++){ + data->nocompute[istate] = excludefv[istate]; + } + // printf("ctai_treevec_XFV: state: %s %d %d %d %d \n",data->tag,data->nsubtrees,data->nocompute[0],data->nocompute[1],data->nocompute[2]); + } + return CTA_OK; +}; + + + +#undef METHOD +#define METHOD "IncRefCount" +int CTA_TreeVector_IncRefCount(CTA_TreeVector treevec){ + + int retval; //Return value of a call + CTAI_TreeVec *data1; //Tree-vector specific data + int istate; // counter over all substates + + /* check handle and get data object */ + retval=CTA_Handle_Check((CTA_Handle) treevec,CTA_TREEVECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_treevector handle"); + return retval; + } + + /* Increase reference count */ + CTA_Handle_IncRefCount((CTA_Handle) treevec); + + + retval=CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data1); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + /* Increase reference count of all sub treevectors as well */ + for (istate=0;istatensubtrees;istate++){ + retval=CTA_TreeVector_IncRefCount(data1->treevecs[istate]); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error setting reference count"); + return retval; + } + + } + return CTA_OK; +}; + + + + + + + + + + + + + + + + +//---------------------------- + +/* Interfacing with Fortran */ +CTAEXPORT void CTA_TREEVECTOR_CREATE_F77(char *name, char *tag, int *treevec, int *ierr, + int len_name, int len_tag){ + + char *c_name; + char *c_tag; + // create a c-string equivalent to name + c_name=CTA_Malloc((len_name+1)*sizeof(char)); + CTA_fstr2cstr(name,c_name,len_name); + // create a c-string equivalent to tag + c_tag=CTA_Malloc((len_tag+1)*sizeof(char)); + CTA_fstr2cstr(tag,c_tag,len_tag); + + *ierr=CTA_TreeVector_Create(c_name, c_tag, (CTA_TreeVector*) treevec); + + free(c_name); + free(c_tag); + +}; + +CTAEXPORT void CTA_TREEVECTOR_DUPLICATE_F77(int *treevec1, int *treevec2, int *ierr ){ + *ierr=CTA_TreeVector_Duplicate((CTA_TreeVector) *treevec1, (CTA_TreeVector*) treevec2 ); +}; + + +CTAEXPORT void CTA_TREEVECTOR_CONC_F77(int *treevec1, int *treevecs, int *nsubtrees, int *ierr){ + + *ierr=CTA_TreeVector_Conc((CTA_TreeVector) *treevec1, (CTA_TreeVector*) treevecs, *nsubtrees); +}; + + +CTAEXPORT void CTA_TREEVECTOR_GETSUBTREEVEC_F77(int *treevec, char *tag, int *hsubstate, int *ierr, + int len_tag){ + char *c_tag; + // create a c-string equivalent to tag + c_tag=CTA_Malloc((len_tag+1)*sizeof(char)); + CTA_fstr2cstr(tag,c_tag,len_tag); + + *ierr=CTA_TreeVector_GetSubTreeVec((CTA_TreeVector) *treevec, c_tag, + (CTA_TreeVector*) hsubstate) ; + free(c_tag); +}; + +CTAEXPORT void CTA_TREEVECTOR_SETSUBTREENOCOMPUTE_F77(int *treevec, char *tag, int *ierr, + int len_tag){ + char *c_tag; + // create a c-string equivalent to tag + c_tag=CTA_Malloc((len_tag+1)*sizeof(char)); + CTA_fstr2cstr(tag,c_tag,len_tag); + + *ierr=CTA_TreeVector_SetSubTreeNocompute((CTA_TreeVector) *treevec, c_tag) ; + free(c_tag); +}; + + +CTAEXPORT void CTA_TREEVECTOR_GETSUBTREEVECINDEX_F77(int *treevec, int *index, int *hsubstate, int *ierr){ + + *ierr=CTA_TreeVector_GetSubTreeVecIndex((CTA_TreeVector) *treevec, *index, + (CTA_TreeVector*) hsubstate) ; +}; + +CTAEXPORT void CTA_TREEVECTOR_GETSUBTREEVECID_F77(int *treevec, int *index, char *tag, int *ierr, int len_tag){ + char c_tag[CTA_STRLEN_TAG]; + *ierr=CTA_TreeVector_GetSubTreeVecId((CTA_TreeVector) *treevec, *index, + c_tag) ; + CTA_cstr2fstr(c_tag,tag,len_tag); +}; + +CTAEXPORT void CTA_TREEVECTOR_GETTAG_F77(int *treevec, char *tag, int *ierr, int len_tag){ + char c_tag[CTA_STRLEN_TAG]; + + *ierr=CTA_TreeVector_GetTag((CTA_TreeVector) *treevec, c_tag) ; + CTA_cstr2fstr(c_tag,tag,len_tag); +}; + +CTAEXPORT void CTA_TREEVECTOR_SETVEC_F77(int *treevec, int *hvec, int *ierr){ + *ierr=CTA_TreeVector_SetVec((CTA_TreeVector) *treevec, (CTA_Vector) *hvec); +}; + +CTAEXPORT void CTA_TREEVECTOR_GETVEC_F77(int *treevec, int *hvec, int *ierr){ + *ierr=CTA_TreeVector_GetVec((CTA_TreeVector)* treevec, (CTA_Vector) *hvec); +}; + +CTAEXPORT void CTA_TREEVECTOR_SETVALS_F77(int *treevec, void *val,int *nval, int *datatype, int *ierr){ + *ierr=CTA_TreeVector_SetVals((CTA_TreeVector) *treevec, val, *nval, (CTA_Datatype) *datatype); +}; + +CTAEXPORT void CTA_TREEVECTOR_GETVALS_F77(int *treevec, void *val,int *nval, int *datatype, int *ierr){ + *ierr=CTA_TreeVector_GetVals((CTA_TreeVector) *treevec, val, *nval, (CTA_Datatype) *datatype); +}; + + +CTAEXPORT void CTA_TREEVECTOR_GETVAL_F77(int *treevec, int *i, void *val, int *datatype, int *ierr){ + *ierr=CTA_TreeVector_GetVal((CTA_TreeVector) *treevec, *i, val, (CTA_Datatype) *datatype); +}; + +CTAEXPORT void CTA_TREEVECTOR_SETVAL_F77(int *treevec, int *i, void *val, int *datatype, int *ierr){ + *ierr=CTA_TreeVector_SetVal((CTA_TreeVector) *treevec, *i, val, (CTA_Datatype) *datatype); +}; + + + +CTAEXPORT void CTA_TREEVECTOR_GETSIZE_F77(int *treevec, int *n, int *ierr){ + *ierr=CTA_TreeVector_GetSize((CTA_TreeVector) *treevec, n); +}; + +CTAEXPORT void CTA_TREEVECTOR_SETMETAINFO_F77(int *treevec, int *minfo, int *ierr){ + *ierr= CTA_TreeVector_SetMetainfo((CTA_TreeVector) *treevec, (CTA_Metainfo) *minfo); +} + +CTAEXPORT void CTA_TREEVECTOR_GETMETAINFO_F77(int *treevec, int *minfo, int *ierr){ + *ierr= CTA_TreeVector_GetMetainfo((CTA_TreeVector) *treevec, (CTA_Metainfo) *minfo); +} + + +CTAEXPORT void CTA_TREEVECTOR_COPY_F77(int *treevec1, int *treevec2, int *ierr){ + *ierr=CTA_TreeVector_Copy((CTA_TreeVector) *treevec1, (CTA_TreeVector) *treevec2); +} + +CTAEXPORT void CTA_TREEVECTOR_AXPY_F77(int *treevecy, double *alpha, int *treevecx, int *ierr){ + *ierr=CTA_TreeVector_Axpy((CTA_TreeVector) *treevecy, *alpha, (CTA_TreeVector) *treevecx); +} + +CTAEXPORT void CTA_TREEVECTOR_DOT_F77(int *treevec1, int *treevec2, double *dotprod, int *ierr){ + *ierr=CTA_TreeVector_Dot((CTA_TreeVector) *treevec1, (CTA_TreeVector) *treevec2, + dotprod); +} + +CTAEXPORT void CTA_TREEVECTOR_NRM2_F77(int *treevec1, double *nrm2, int *ierr){ + *ierr=CTA_TreeVector_Nrm2((CTA_TreeVector) *treevec1, nrm2); +} + +CTAEXPORT void CTA_TREEVECTOR_SETCONSTANT_F77(int *treevec, void *val, int *datatype, int *ierr){ + *ierr=CTA_TreeVector_SetConstant((CTA_TreeVector) *treevec, val, (CTA_Datatype) *datatype); +} + +CTAEXPORT void CTA_TREEVECTOR_SCAL_F77(int *treevec, double *val, int *ierr){ + *ierr=CTA_TreeVector_Scal((CTA_TreeVector) *treevec, *val); +} + + +CTAEXPORT void CTA_TREEVECTOR_EXPORT_F77(int *hvec_x, int *usrdata, int *ierr){ + *ierr=CTA_TreeVector_Export((CTA_TreeVector) *hvec_x, (CTA_Handle) *usrdata); +} + +CTAEXPORT void CTA_TREEVECTOR_IMPORT_F77(int *hvec_x, int *usrdata, int *ierr){ + *ierr=CTA_TreeVector_Import((CTA_TreeVector) *hvec_x, (CTA_Handle) *usrdata); +} + +CTAEXPORT void CTA_TREEVECTOR_FREE_F77(int *hvec_x, int *recursive, int *ierr){ + *ierr=CTA_TreeVector_Free((CTA_TreeVector *) hvec_x, *recursive); +} + + +CTAEXPORT void CTA_TREEVECTOR_INFO_F77(int *hvec_x, int *ierr){ + *ierr=CTA_TreeVector_Info((CTA_TreeVector) *hvec_x); + +} + +CTAEXPORT void CTA_TREEVECTOR_LIST_F77(int *hvec_x, int *hvec, int *ierr){ + *ierr=CTA_TreeVector_List((CTA_TreeVector) *hvec_x, (CTA_Vector) *hvec); +} + +CTAEXPORT void CTA_TREEVECTOR_GEMM_F77(int *sC, int *nc, int *transa, int *transb, double *alpha, int *sA, int *na, + int *mB, double *beta, int *ierr){ + *ierr=CTA_TreeVector_Gemm((CTA_TreeVector*) sC, *nc, *transa, *transb, *alpha, (CTA_TreeVector*) sA, *na, + (CTA_Matrix) *mB, *beta); +} + + +CTAEXPORT void CTA_TREEVECTOR_OPONLEAFS_F77( int *treevec1, int* treevec2, + int *op, int *arg, int *ierr) +{ + *ierr = CTA_TreeVector_OpOnLeafs( (CTA_TreeVector) *treevec1, + (CTA_TreeVector) *treevec2, (CTA_Func) *op, + (CTA_Handle) *arg); +} + + + +CTAEXPORT void CTA_TREEVECTOR_ELMSQRT_F77(int *y, int *ierr){ + *ierr=CTA_TreeVector_ElmSqrt((CTA_TreeVector) *y); +} + +CTAEXPORT void CTA_TREEVECTOR_ELMPROD_F77(int *y, int *x, int *ierr){ + *ierr=CTA_TreeVector_ElmProd((CTA_TreeVector) *y, (CTA_TreeVector) *x); +} + +CTAEXPORT void CTA_TREEVECTOR_GETNUMSUBTREE_F77(int *treevec, int* numSubTrees, int *ierr){ + *ierr=CTA_TreeVector_GetNumSubTree((CTA_TreeVector) *treevec, numSubTrees); +} + diff --git a/costa/native/cta/src/cta_util_methods.c b/costa/native/cta/src/cta_util_methods.c new file mode 100644 index 000000000..4473f9daf --- /dev/null +++ b/costa/native/cta/src/cta_util_methods.c @@ -0,0 +1,453 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_util_methods.c $ +$Revision: 2751 $, $Date: 2011-09-09 08:58:46 +0200 (Fri, 09 Sep 2011) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2007 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +#include "cta.h" +#include "cta_mem.h" +#include "cta_util_methods.h" +#include "cta_message.h" +#include "f_cta_utils.h" + +#define CTA_UTIL_METHODSPRINTOBSERVATIONS_F77 F77_CALL(cta_util_methodsprintobservations,CTA_UTIL_METHODSPRINTOBSERVATIONS) +#define CTA_UTIL_METHODSSELECTOBSERVATIONS_F77 F77_CALL(cta_util_methodsselectobservations,CTA_UTIL_METHODSSELECTOBSERVATIONS) +#define CTA_UTIL_METHODSOPENRESULTFILE_F77 F77_CALL(cta_util_methodsopenresultfile,CTA_UTIL_METHODSOPENRESULTFILE) + +#define IDEBUG (0) + +#define CLASSNAME "CTA_Util_Methods" + +/** \brief Internal routine prints the predicted values and the observed values + * + * \param fgModel :predicted values of foreground run + * \param bgModel :predicted values of background run + * \param + * if set to CTA_NULL NaN will be printed as result + * \param vObs :Observed values + * \param time :Corresponding time + * \param file :Output file + * + * \return error status: CTA_OK if successful + */ +int CTAI_Util_MethodsPrintObservations(CTA_Vector fgModel, CTA_Vector bgModel, + CTA_Vector vObs, CTA_Vector stationNames, CTA_Time time, + CTA_File file){ + + int iObs, nObs; + int ierr; + CTA_String vStationNames; + double vFgModel, vBgModel, vObservations, vTime; + double t1, t2; + char outStr[1024]; + + ierr=CTA_String_Create(&vStationNames); + + /* Get the number of observations */ + ierr=CTA_Vector_GetSize(fgModel, &nObs); + if (ierr!=CTA_OK) return ierr; + + /* Get time Note we will print end-time of span */ + ierr=CTA_Time_GetSpan(time, &t1, &t2); + if (ierr!=CTA_OK) return ierr; + vTime=t2; + + for (iObs=1;iObs<=nObs;iObs++){ + /* Get values */ + ierr=CTA_Vector_GetVal(fgModel, iObs, &vFgModel, CTA_DOUBLE); + if (ierr!=CTA_OK) return ierr; + ierr=CTA_Vector_GetVal(bgModel, iObs, &vBgModel, CTA_DOUBLE); + if (ierr!=CTA_OK) return ierr; + ierr=CTA_Vector_GetVal(vObs, iObs, &vObservations, CTA_DOUBLE); + if (ierr!=CTA_OK) return ierr; + ierr=CTA_Vector_GetVal(stationNames, iObs, &vStationNames, CTA_STRING); + if (ierr!=CTA_OK) return ierr; + /* Write Line of output */ + + sprintf(outStr,"%18.8le, %18s, %18.8le, %18.8le, %18.8le ", vTime, + CTAI_String_GetPtr(vStationNames), vFgModel, vBgModel, + vObservations); + ierr=CTA_File_WriteStr(file, outStr, CTA_TRUE); + if (ierr!=CTA_OK) return ierr; + } + + /* Free Local work variables */ + ierr=CTA_String_Free(&vStationNames); + if (ierr!=CTA_OK) return ierr; + + return CTA_OK; +} + + +#undef METHOD +#define METHOD "MethodsPrintObservations" +int CTA_Util_MethodsPrintObservations(CTA_Handle fgModel, CTA_Handle bgModel, + CTA_StochObs sObs, CTA_Time time, CTA_File file, int printHeader){ + + CTA_Datatype tFgModel, tBgModel; + int ierr; + BOOL lConvertFg, lConvertBg; + int nobs; + CTA_ObsDescr obsDescr; + CTA_Vector vStationNames, vObs, vFgModel, vBgModel; + double NaN; + char outStr[1024]; + + /* print header */ + if (printHeader) { + sprintf(outStr,"%18s, %18s, %18s, %18s, %18s ", "Time", "Station Name", "Fg-prediction", "Bg-prediction", "Observed"); + ierr=CTA_File_WriteStr(file, outStr, CTA_TRUE); + if (ierr!=CTA_OK) { + CTA_WRITE_ERROR("cannot write struct to file"); + return ierr; + } + } + + /* quick return when sobs is CTA_NULL */ + if (sObs==CTA_NULL) return CTA_OK; + + /* Set value NaN */ + NaN = 0; NaN = 1.0/NaN; NaN=NaN/NaN; + + /* Get the types of the various handles */ + ierr=CTA_Handle_GetDatatype(fgModel, &tFgModel); + if (ierr!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get datatype from fgModel"); + return ierr; + } + ierr=CTA_Handle_GetDatatype(bgModel, &tBgModel); + if (ierr!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get datatype from bgModlel"); + return ierr; + } + + /* set number of observations (nobs) */ + nobs=0; + ierr=CTA_SObs_Count(sObs,&nobs); + if (ierr!=CTA_OK) { + CTA_WRITE_ERROR("Cannot set number of observations"); + return ierr; + } + if (nobs==0) return CTA_OK; + + /* Do we need to convert handles */ + if (fgModel==CTA_NULL || tFgModel==CTA_MODEL){ + lConvertFg=TRUE; + } else if (tFgModel==CTA_VECTOR) { + lConvertFg=FALSE; + } else { + printf("Error in CTA_Util_MethodsPrintObservations:\n"); + printf(" illegal input argument fgModel \n"); + return CTA_ILLEGAL_HANDLE; + } + if (bgModel==CTA_NULL || tBgModel==CTA_MODEL){ + lConvertBg=TRUE; + } else if (tBgModel==CTA_VECTOR) { + lConvertBg=FALSE; + } else { + printf("Error in CTA_Util_MethodsPrintObservations:\n"); + printf(" illegal input argument bgModel \n"); + return CTA_ILLEGAL_HANDLE; + + } + + /* Get the observation description */ + ierr=CTA_SObs_GetDescription(sObs, &obsDescr); + if (ierr!=CTA_OK) { + CTA_WRITE_ERROR("CAnnot get observation description"); + return ierr; + } + + /* Create and set work arrays */ + /* - Station names */ + ierr=CTA_Vector_Create(CTA_DEFAULT_VECTOR,nobs,CTA_STRING,CTA_NULL, + &vStationNames); + if (ierr!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_treevector handle"); + return ierr; + } + ierr=CTA_ObsDescr_Get_ValueProperties(obsDescr, "NAME", vStationNames, CTA_STRING); + if (ierr!=CTA_OK) { + CTA_WRITE_ERROR("Error using CTA_ObsDescr_Get_ValueProperties"); + return ierr; + } + + /*- observed values */ + ierr=CTA_Vector_Create(CTA_DEFAULT_VECTOR,nobs,CTA_DOUBLE,CTA_NULL, + &vObs); + if (ierr!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_treevector handle"); + return ierr; + } + ierr=CTA_SObs_GetVal (sObs, vObs); + if (ierr!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get values form observation"); + return ierr; + } + + /*- predicted values: */ + /* Foreground run */ + if (lConvertFg) { + ierr=CTA_Vector_Create(CTA_DEFAULT_VECTOR,nobs,CTA_DOUBLE,CTA_NULL, + &vFgModel); + if (ierr!=CTA_OK) { + CTA_WRITE_ERROR("Cannot create vector"); + return ierr; + } + if (fgModel==CTA_NULL) { + ierr=CTA_Vector_SetConstant(vFgModel,&NaN,CTA_DOUBLE); + if (ierr!=CTA_OK) { + CTA_WRITE_ERROR("Cannot set constant in vector"); + return ierr; + } + } else { + ierr=CTA_Model_GetObsValues (fgModel, time, obsDescr, vFgModel); + if (ierr!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get observed values from fgModel"); + return ierr; + } + } + } else { + vFgModel = fgModel; + } + + /* Background run */ + if (lConvertBg) { + ierr=CTA_Vector_Create(CTA_DEFAULT_VECTOR,nobs,CTA_DOUBLE,CTA_NULL, + &vBgModel); + if (ierr!=CTA_OK) { + CTA_WRITE_ERROR("Cannot create an vector"); + return ierr; + } + if (bgModel==CTA_NULL) { + ierr=CTA_Vector_SetConstant(vBgModel,&NaN,CTA_DOUBLE); + if (ierr!=CTA_OK) { + CTA_WRITE_ERROR("Cannot set constants in vector"); + return ierr; + } + } else { + ierr=CTA_Model_GetObsValues (bgModel, time, obsDescr, vBgModel); + if (ierr!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get observations from bgModel"); + return ierr; + } + } + } else { + vBgModel = bgModel; + } + + /* print output */ + ierr=CTAI_Util_MethodsPrintObservations(vFgModel, vBgModel, vObs, + vStationNames, time, file); + if (ierr!=CTA_OK) { + CTA_WRITE_ERROR("Error using CTAI_Util_MethodsPrintObservations"); + return ierr; + } + + /* Free work arrays */ + if (lConvertFg) { + CTA_Vector_Free(&vFgModel); + } + if (lConvertBg) { + CTA_Vector_Free(&vBgModel); + } + CTA_Vector_Free(&vStationNames); + CTA_Vector_Free(&vObs); + + CTA_ObsDescr_Free(&obsDescr); + + return CTA_OK; + +} + +#undef METHOD +#define METHOD "Util_MethodsSelectObservations" +int CTA_Util_MethodsSelectObservations(CTA_Model model, CTA_StochObs sObsAll, CTA_Time spanSim, CTA_StochObs *sObsSel){ + +CTA_StochObs sObsSpanSim; +CTA_ObsDescr obsDescrSpanSim; +CTA_String modSelect; +int ierr; + + *sObsSel=CTA_NULL; + + if (sObsAll == CTA_NULL) { + *sObsSel = CTA_NULL; + return CTA_OK; + } + + if (IDEBUG>0) {printf("CTA_Util_MethodsSelectObservations START");} + +/* Select all observations that fit in the simulation time span */ + ierr=CTA_SObs_CreateTimSel(sObsAll,spanSim,&sObsSpanSim); + if (ierr!=CTA_OK) { + char message[1024]; + sprintf(message, "Error %d selecting observations for simulation timespan", + ierr); + CTA_WRITE_ERROR(message); + return ierr; + } + + ierr=CTA_SObs_GetDescription(sObsSpanSim,&obsDescrSpanSim); + if (ierr!=CTA_OK) { + char message[1024]; + sprintf(message, "Error %d in CTA_SObs_GetDescription", ierr); + CTA_WRITE_ERROR(message); + return ierr; + } + ierr=CTA_String_Create(&modSelect); + + if (IDEBUG>0) {printf("CTA_Util_MethodsSelectObservations 2 \n");} + + ierr=CTA_Model_GetObsSelect(model,spanSim,obsDescrSpanSim,modSelect); + + if (IDEBUG>0) {printf("CTA_Util_MethodsSelectObservations 3 \n");} + + if (ierr!=CTA_OK) { + char message[1024]; + sprintf(message, "Error %d in CTA_Model_GetObsSelect", ierr); + CTA_WRITE_ERROR(message); + return ierr; + } + + + ierr=CTA_SObs_CreateSel(sObsSpanSim,modSelect,sObsSel); + if (ierr!=CTA_OK) { + char message[1024]; + sprintf(message, "Error %d in CTA_SObs_CreateSel", ierr); + CTA_WRITE_ERROR(message); + return ierr; + } + + if (IDEBUG>0) {printf("CTA_Util_MethodsSelectObservations 4 \n");} + + CTA_String_Free(&modSelect); + CTA_ObsDescr_Free(&obsDescrSpanSim); + CTA_SObs_Free(&sObsSpanSim); + + return CTA_OK; +} + + +#undef METHOD +#define METHOD "Util_MethodsOpenResultFile" +int CTA_Util_MethodsOpenResultFile(char *stationFile, + CTA_File *fStationFile){ + + CTA_String sfile; + int ierr; + + CTA_String_Create(&sfile); + CTA_String_Set(sfile,stationFile); + CTA_File_Create(fStationFile); + ierr=CTA_File_Open(*fStationFile ,sfile,CTA_NULL); + CTA_String_Free(&sfile); + if (ierr!=CTA_OK) { + CTA_WRITE_ERROR("Cannot free string"); + return ierr; + } + ierr=CTA_Util_MethodsPrintObservations(CTA_NULL, CTA_NULL, CTA_NULL, + CTA_NULL, *fStationFile, CTA_TRUE); + return ierr; +} + + +//int CTA_Util_RankReduction(CTA_State scaling, int *rankIn, int *rankOut, CTA_State *QmatL){ +// +// int icol, ierr; +// CTA_Datatype datatype; +// double *LTL; +// +// /* Check Handles */ +// for (icol=0;icol +#include +#include "cta_util_sort.h" + +void swap(int *x,int *y){ + + int temp; + temp = *x; + *x = *y; + *y = temp; +} + + + +int choose_pivot(int i,int j ){ + return((i+j) /2); +} + + + +void CTAI_Util_IQSort2(int *list, int *indx, int m,int n){ + int key,i,j,k; + if( m < n) + { + k = choose_pivot(m,n); + swap(&list[m],&list[k]); + swap(&indx[m],&indx[k]); + key = list[m]; + i = m+1; + j = n; + while(i <= j) + { + while((i <= n) && (list[i] <= key)) + i++; + while((j >= m) && (list[j] > key)) + j--; + if( i < j){ + swap(&list[i],&list[j]); + swap(&indx[i],&indx[j]); + } + } + // swap two elements + swap(&list[m],&list[j]); + swap(&indx[m],&indx[j]); + // recursively sort the lesser list + CTAI_Util_IQSort2(list,indx,m,j-1); + CTAI_Util_IQSort2(list,indx,j+1,n); + } +} + + +void CTA_Util_IQSort2(int *list, int *indx, int n){ + CTAI_Util_IQSort2(list, indx, 0, n-1); +} + + + diff --git a/costa/native/cta/src/cta_util_sqlite3.c b/costa/native/cta/src/cta_util_sqlite3.c new file mode 100644 index 000000000..b0f2e8d33 --- /dev/null +++ b/costa/native/cta/src/cta_util_sqlite3.c @@ -0,0 +1,372 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_util_sqlite3.c $ +$Revision: 4304 $, $Date: 2014-01-14 07:29:40 +0100 (Tue, 14 Jan 2014) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include +#include +#include +#include "cta_mem.h" +#include "cta_flush.h" +#include "cta_util_sqlite3.h" +#include "cta_file.h" +#include "cta_string.h" +#include "cta_errors.h" + +#define IDEBUG (0) + + +int CTAI_util_sqlite3_return_values(void *vout, + int argc, char **argv, char **azColName) +// Callback-function for sqlite3 +{ + int retval; + int *values; + + // Cast the input to a counter-and-vector-struct + CTAI_counter_vector *out = (CTAI_counter_vector*) vout; + + // Perform checks + if (argc != 1) {return CTA_DIMENSION_ERROR;}; + if (out->index >= out->dimension) {return CTA_DIMENSION_ERROR;}; + + if (out->datatype == CTA_INTEGER) + { + // Add an integer to the output vector: + + // Read the value + int iNAN =-987654; + int value= iNAN; + if (argv[0] != NULL) + { + int nscan = sscanf(argv[0],"%d",&value); + if (nscan != 1) {return CTA_ILLEGAL_DATATYPE;} + } + + // Cast the values-field to an integer-array and store the value + values = (int *) out->values; + values[out->index] = value; + } + else if(out->datatype == CTA_DOUBLE) + { + // Add a double to the output vector: + + // Read the value + double NaN; + double value; + double *values; + + NaN = 0; NaN = 1.0/NaN; NaN=NaN/NaN; + value=NaN; + + if (argv[0] != NULL) + { + int nscan = sscanf(argv[0],"%lf",&value); + if (nscan != 1) {return CTA_ILLEGAL_DATATYPE;} + } + + // Cast the values-field to a double-array and store the value + values = (double *) out->values; + values[out->index] = value; + } + else if(out->datatype == CTA_REAL) + { + // Add a float to the output vector: + + // Read the value + float NaN; + float value; + float *values; + NaN = 0; + NaN = (float) 1.0/NaN; + NaN=NaN/NaN; + value=NaN; + if (argv[0] != NULL) + { + int nscan = sscanf(argv[0],"%f",&value); + if (nscan != 1) {return CTA_ILLEGAL_DATATYPE;} + } + + // Cast the values-field to a double-array and store the value + values = (float *) out->values; + values[out->index] = value; + } + else if(out->datatype == CTA_STRING) + { + // Add a string to the output vector: + + // Read the value + CTA_String *values = (CTA_String *) out->values; + if (argv[0] != NULL) + { + // Cast the values-field to a double-array and store the value + retval = CTA_String_Set(values[out->index],argv[0]); + if (retval!=CTA_OK) return retval; + } + else + { + retval = CTA_String_Set(values[out->index],""); + if (retval!=CTA_OK) return retval; + } + } + else + { + // No other data types supported (yet) + return CTA_ILLEGAL_DATATYPE; + }; + + // Increase the counter, so next value is put in the next spot + out->index++; + return SQLITE_OK; +} + + + + + +int CTAI_util_sqlite3_keynames(void *vout, + int argc, char **argv, char **azColName) +// Callback-function for sqlite3 +{ + // Cast the input to a COSTA-string array + CTA_String *out = (CTA_String*) vout; + + // Copy all the column names into the string array + int i; + for (i=0; i0) printf("Doing query '%s'\n",command); + zErrMsg=NULL; + rc = sqlite3_exec(db, command, CTAI_util_sqlite3_return_values, + (void *) &cvout, &zErrMsg); +// if (zErrMsg!=NULL){ +// printf("Error message from sqlite3: %s\n", zErrMsg); +// } + free(command); + if( rc != SQLITE_OK ) + {return CTA_INVALID_COMMAND;}; + return CTA_OK; +} + + + + +int CTAI_util_sqlite3_return_keys( +/* + Return the keys in the sqlite3-database. Allocate a string-array + to store the names +*/ + // OUTPUTS: + int *n_keys, + CTA_String **Keys, + // INPUTS: + sqlite3 *db, // the database + const char *condition) // the selection creterion +{ + // Set the constants necessary + const char *whole_table = // The expression which produces the + // complete obs-table + "stations inner join data on stations.station_id = data.station_id"; + + char *command; + char *zErrMsg; + int rc; + int i; + + command=CTA_Malloc( sizeof(char) * ( strlen("select * from where ") + + strlen(whole_table) + + strlen(condition) + + 100 + ) ); + + // Construct the query-command + if (strcmp(condition,"") == 0) + { + sprintf(command,"select * from %s",whole_table); + } + else + { + sprintf(command,"select * from %s where %s", + whole_table,condition); + }; + + // Carry out the query + zErrMsg = NULL; + + CTA_Flush(); +// printf("Doing query '%s'\n",command); + CTA_Flush(); + rc = sqlite3_exec(db, command, CTAI_util_sqlite3_count_columns, + (void *) n_keys, &zErrMsg); +// if (zErrMsg!=NULL){ +// printf("Error message from sqlite3: %s\n", zErrMsg); +// } +// printf("Return code '%d'\n",rc); + CTA_Flush(); + if ( rc != 4 ) { + printf("Return code of sqlite3_exec %d see sqlite.h\n",rc); + return CTA_INVALID_COMMAND; + }; + + *Keys = CTA_Malloc(sizeof(CTA_String)* (*n_keys)); + for (i=0; i<*n_keys; i++) + { + rc = CTA_String_Create( (*Keys+i) ); + if (rc != CTA_OK) return rc; + } + + // Carry out the query + zErrMsg = 0; + //printf("Doing query '%s'\n",command); + rc = sqlite3_exec(db, command, CTAI_util_sqlite3_keynames, + (void *) *Keys, &zErrMsg); +// if (zErrMsg!=NULL){ +// printf("Error message from sqlite3: %s\n", zErrMsg); +// } + free(command); + if( rc != 4 ) + {return CTA_INVALID_COMMAND;}; + + // Remove the double entries and the VALUE-entry + for (i=0; i<*n_keys; i++) + { + int ilen; + char *stri; + + rc = CTA_String_GetLength( *(*Keys+i), &ilen); + if (rc != CTA_OK) return rc; + stri=CTA_Malloc((ilen+1)*sizeof(char)); + rc = CTA_String_Get( *(*Keys+i), stri); + if (rc != CTA_OK) return rc; + + // Remove word if it is the key 'VALUE' + if (strcmp(stri,"VALUE")==0) + { + rc = CTA_String_Free(*Keys+i); + if (rc != CTA_OK) return rc; + if (i<*n_keys-1) { *(*Keys+i) = *(*Keys+*n_keys-1); } + (*n_keys)--; + i--; + } + else + { + // Check all the words up to to see if word is already in the list + int j; + for (j=0; j from the list: it is already there. + rc = CTA_String_Free(*Keys+i); + if (rc != CTA_OK) return rc; + if (i<*n_keys-1) { *(*Keys+i) = *(*Keys+*n_keys-1); } + (*n_keys)--; + i--; + j=i; + } + free(strj); + } + } + } + free(stri); + } + + *Keys = realloc(*Keys, sizeof(CTA_String)* (*n_keys)); + + return CTA_OK; +} + diff --git a/costa/native/cta/src/cta_util_statistics.c b/costa/native/cta/src/cta_util_statistics.c new file mode 100644 index 000000000..abaf715fe --- /dev/null +++ b/costa/native/cta/src/cta_util_statistics.c @@ -0,0 +1,151 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_util_statistics.c $ +$Revision: 4200 $, $Date: 2013-11-01 10:59:20 +0100 (Fri, 01 Nov 2013) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include +#include +#include "f_cta_utils.h" +#include "cta_datatypes.h" +#include "cta_errors.h" +#include "cta_util_statistics.h" +#include "cta_message.h" + +#define CTA_RAND_U_F77 F77_CALL(cta_rand_u,CTA_RAND_U) +#define CTA_RAND_N_F77 F77_CALL(cta_rand_n,CTA_RAND_N) + +#define IM1 2147483563 +#define IM2 2147483399 +#define AM (1.0/IM1) +#define IMM1 (IM1-1) +#define IA1 40014 +#define IA2 40692 +#define IQ1 53668 +#define IQ2 52774 +#define IR1 12211 +#define IR2 3791 +#define NTAB 32 +#define NDIV (1+IMM1/NTAB) +#define EPS 1.2e-7 +#define RNMX (1.0-EPS) + +#define CLASSNAME "CTA_Utils_Statisctics" + +static long CTAI_current_state_random=-21075; + +float ran2(long *idum) +{ + int j; + long k; + static long idum2=123456789; static long iy=0; + static long iv[NTAB]; + float temp; + if (*idum <= 0) { + if (-(*idum) < 1) *idum=1; else *idum = -(*idum); idum2=(*idum); + for (j=NTAB+7;j>=0;j--) { + k=(*idum)/IQ1; + *idum=IA1*(*idum-k*IQ1)-k*IR1; + if (*idum < 0) *idum += IM1; + if (j < NTAB) iv[j] = *idum; + } + iy=iv[0]; + } + k=(*idum)/IQ1; + *idum=IA1*(*idum-k*IQ1)-k*IR1; + if (*idum < 0) *idum += IM1; + k=idum2/IQ2; + idum2=IA2*(idum2-k*IQ2)-k*IR2; + if (idum2 < 0) idum2 += IM2; + j=iy/NDIV; + iy=iv[j]-idum2; + iv[j] = *idum; + if (iy < 1) iy += IMM1; + if ((temp= (float) AM*iy) > RNMX){ + return (float) RNMX; + } + else { + return (float) temp; + } +} + +#define METHOD "CTA_rand_seed" +void CTA_rand_seed(int seed){ + char message[64]; + sprintf(message,"Set initial seed: %ld\n",seed); + CTA_WRITE_INFO(message); + if (seed=0) seed=-21075; + if (seed>0) seed=-seed; + CTAI_current_state_random=(long) seed; + ran2(&CTAI_current_state_random); +} + +int CTA_rand_u(double *x) +// Calculate a realization from a uniform [0 1] distribution +{ + //*x=(double) (random()/(double)(RAND_MAX)); + *x=(double) ran2(&CTAI_current_state_random); + return CTA_OK; +} + +int CTA_rand_n(double *x) +// Calculate a realization from a standard normal distribution +{ +// use the box-muller scheme, which calculates two standard +// normal random numbers from two standard uniform random +// numbers. + + // Remember the extra value in variable value2; + // remember whether an extra value is available in variable hebnog + static BOOL hebnog=FALSE; + static double value2; + + if (hebnog) + { + // extra value still available: return it + hebnog = FALSE; + *x = value2; + } + else + { + // no extra value available: calculate 2 normal rando:m numbers + // and return only one + // double r1=(double) (random()+1.0)/(double) (RAND_MAX+1.0); + // double r2=(double) (random()+1.0)/(double) (RAND_MAX+1.0); + double r1= (double) ran2(&CTAI_current_state_random); + double r2= (double) ran2(&CTAI_current_state_random); + double hlp = sqrt(-2*log(r1)); + + *x = hlp * cos(2.0*M_PI*r2); + value2 = hlp * sin(2.0*M_PI*r2); + hebnog = TRUE; + } + return CTA_OK; +} + +/* Interfacing with Fortran */ + +CTAEXPORT void CTA_RAND_U_F77(double *x, int *ierr){ + *ierr=CTA_rand_u(x); +} + +CTAEXPORT void CTA_RAND_N_F77(double *x, int *ierr){ + *ierr=CTA_rand_n(x); +} + diff --git a/costa/native/cta/src/cta_vector.c b/costa/native/cta/src/cta_vector.c new file mode 100644 index 000000000..c28e9c394 --- /dev/null +++ b/costa/native/cta/src/cta_vector.c @@ -0,0 +1,1747 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_vector.c $ +$Revision: 3361 $, $Date: 2012-07-04 16:52:30 +0200 (Wed, 04 Jul 2012) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include +#include +#include "cta_mem.h" +#include "f_cta_utils.h" +#include "cta_errors.h" +#include "cta_datatypes.h" +#include "ctai_datatypes.h" +#include "cta_string.h" +#include "cta_vector.h" +#include "ctai.h" +#include "ctai_string.h" +#include "cta_pack.h" +#include "ctai_xml.h" +#include "cta_defaults.h" +#include "ctai_handles.h" +#include "cta_message.h" +#include "cta_metainfo.h" + +#define CTA_VECTOR_DEFINECLASS_F77 F77_CALL(cta_vector_defineclass,CTA_VECTOR_DEFINECLASS) +#define CTA_VECTOR_DUPLICATE_F77 F77_CALL(cta_vector_duplicate,CTA_VECTOR_DUPLICATE) +#define CTA_VECTOR_CREATE_F77 F77_CALL(cta_vector_create,CTA_VECTOR_CREATE) +#define CTA_VECTOR_GETSIZE_F77 F77_CALL(cta_vector_getsize,CTA_VECTOR_GETSIZE) +#define CTA_VECTOR_GETVALS_F77 F77_CALL(cta_vector_getvals,CTA_VECTOR_GETVALS) +#define CTA_VECTOR_GETVAL_F77 F77_CALL(cta_vector_getval,CTA_VECTOR_GETVAL) +#define CTA_VECTOR_SETVALS_F77 F77_CALL(cta_vector_setvals,CTA_VECTOR_SETVALS) +#define CTA_VECTOR_SETVAL_F77 F77_CALL(cta_vector_setval,CTA_VECTOR_SETVAL) +#define CTA_VECTOR_SETCONSTANT_F77 F77_CALL(cta_vector_setconstant,CTA_VECTOR_SETCONSTANT) +#define CTA_VECTOR_SCAL_F77 F77_CALL(cta_vector_scal,CTA_VECTOR_SCAL) +#define CTA_VECTOR_COPY_F77 F77_CALL(cta_vector_copy,CTA_VECTOR_COPY) +#define CTA_VECTOR_AXPY_F77 F77_CALL(cta_vector_axpy,CTA_VECTOR_AXPY) +#define CTA_VECTOR_DOT_F77 F77_CALL(cta_vector_dot,CTA_VECTOR_DOT) +#define CTA_VECTOR_NRM2_F77 F77_CALL(cta_vector_nrm2,CTA_VECTOR_NRM2) +#define CTA_VECTOR_AMAX_F77 F77_CALL(cta_vector_amax,CTA_VECTOR_AMAX) +#define CTA_VECTOR_GETMAXLEN_F77 F77_CALL(cta_vector_getmaxlen,CTA_VECTOR_GETMAXLEN) +#define CTA_VECTOR_EXPORT_F77 F77_CALL(cta_vector_export,CTA_VECTOR_EXPORT) +#define CTA_VECTOR_IMPORT_F77 F77_CALL(cta_vector_import,CTA_VECTOR_IMPORT) +#define CTA_VECTOR_PRINT_TABLE_F77 F77_CALL(cta_vector_print_table,CTA_VECTOR_PRINT_TABLE) +#define CTA_VECTOR_FREE_F77 F77_CALL(cta_vector_free,CTA_VECTOR_FREE) +#define CTA_VECTOR_ELMDIV_F77 F77_CALL(cta_vector_elmdiv,CTA_VECTOR_ELMDIV) +#define CTA_VECTOR_ELMPROD_F77 F77_CALL(cta_vector_elmprod,CTA_VECTOR_ELMPROD) + +#define CLASSNAME "CTA_Vector" + +/* Struct holding all data associated to an COSTA Vector */ + +static long CTAI_Vector_Memsize=0; + + +typedef struct { +CTA_Func functions[I_CTA_VECTOR_NUMFUNC]; // See cta_vector.h for a list of + // available vector functions +CTA_VecClass hveccl; // Vector class +int n; // dimension +CTA_Datatype datatype; // data type of the vector elements +CTA_Handle usrdata; // ??? +long memsize; // size of allocated memblock +void *data; // pointer to the data. The data are not only + // the vector values. In the case of a BLAS-vector + // for instance, data is a struct consisting of + // the fields datatype, values and n (dimension) + // Some information is stored twice (once directly in a CTA_Vector + // and once in its data. +} CTAI_Vector; + + + +typedef struct { +CTA_Func functions[I_CTA_VECTOR_NUMFUNC]; +} CTAI_VectorClass; // A VectorClass contains a list of + // functions ??? + +/* Local interfaces */ +int CTAI_XML_read_txt_node_size(char *txtstr); +int CTAI_XML_read_txt_node_content(char *txtstr, double *vals); + +/* Global interface from a shared routine from cta_treevector.c */ +void XML_newline(int spc, xmlTextWriter *writer); + + + +int CTA_Vector_DefineClass( + // INPUTS: + const char *name, // Name of the new vector class + const CTA_Func h_func[I_CTA_VECTOR_NUMFUNC], // function handles to + // the implementations of the + // vector class' functions. + // OUTPUTS: + CTA_VecClass *hveccl) // The (handle to) the new vector class +{ + + CTAI_VectorClass *data; + int retval; + + /* Allocate new Vector object */ + data=CTA_Malloc(sizeof(CTAI_VectorClass)); + + data->functions[I_CTA_VECTOR_CREATE_SIZE]=h_func[I_CTA_VECTOR_CREATE_SIZE]; + data->functions[I_CTA_VECTOR_CREATE_INIT]=h_func[I_CTA_VECTOR_CREATE_INIT]; + data->functions[I_CTA_VECTOR_GETVALS] =h_func[I_CTA_VECTOR_GETVALS ]; + data->functions[I_CTA_VECTOR_GETVAL] =h_func[I_CTA_VECTOR_GETVAL ]; + data->functions[I_CTA_VECTOR_SETVALS] =h_func[I_CTA_VECTOR_SETVALS ]; + data->functions[I_CTA_VECTOR_SETVAL] =h_func[I_CTA_VECTOR_SETVAL ]; + data->functions[I_CTA_VECTOR_SETCONST] =h_func[I_CTA_VECTOR_SETCONST ]; + data->functions[I_CTA_VECTOR_SCAL ] =h_func[I_CTA_VECTOR_SCAL ]; + data->functions[I_CTA_VECTOR_COPY ] =h_func[I_CTA_VECTOR_COPY ]; + data->functions[I_CTA_VECTOR_AXPY ] =h_func[I_CTA_VECTOR_AXPY ]; + data->functions[I_CTA_VECTOR_DOT ] =h_func[I_CTA_VECTOR_DOT ]; + data->functions[I_CTA_VECTOR_NRM2 ] =h_func[I_CTA_VECTOR_NRM2 ]; + data->functions[I_CTA_VECTOR_AMAX ] =h_func[I_CTA_VECTOR_AMAX ]; + data->functions[I_CTA_VECTOR_GETMAXLEN] =h_func[I_CTA_VECTOR_GETMAXLEN ]; + data->functions[I_CTA_VECTOR_EXPORT ] =h_func[I_CTA_VECTOR_EXPORT ]; + data->functions[I_CTA_VECTOR_IMPORT ] =h_func[I_CTA_VECTOR_IMPORT ]; + data->functions[I_CTA_VECTOR_PRINT_TABLE]=h_func[I_CTA_VECTOR_PRINT_TABLE]; + data->functions[I_CTA_VECTOR_FREE ] =h_func[I_CTA_VECTOR_FREE ]; + data->functions[I_CTA_VECTOR_APPENDVAL] =h_func[I_CTA_VECTOR_APPENDVAL ]; + data->functions[I_CTA_VECTOR_ELMDIV] =h_func[I_CTA_VECTOR_ELMDIV ]; + data->functions[I_CTA_VECTOR_ELMPROD] =h_func[I_CTA_VECTOR_ELMPROD ]; + data->functions[I_CTA_VECTOR_ELMSQRT] =h_func[I_CTA_VECTOR_ELMSQRT ]; + + + // Allocate new handle + retval=CTA_Handle_Create(name,CTA_VECTORCLASS,data,hveccl); + + // return error when unsuccesfull + return retval; +} + + +#undef METHOD +#define METHOD "Duplicate" +int CTA_Vector_Duplicate(CTA_Vector hvector1, CTA_Vector *hvector2) +{ + CTAI_Vector *data; /* All data of vector hvec */ + int retval; + + /* Get class data containing all function pointers */ + retval=CTA_Handle_Check((CTA_Handle) hvector1,CTA_VECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_treevector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) hvector1,(void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + retval=CTA_Vector_Create(data->hveccl, data->n, data->datatype, + data->usrdata, hvector2); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot create vector"); + return retval; + } + retval=CTA_Vector_Copy(hvector1,*hvector2); + return retval; +} + + + + +#undef METHOD +#define METHOD "Create" +int CTA_Vector_Create( + // INPUTS: + CTA_VecClass hveccl, + const int n, // dimension + CTA_Datatype datatype, // datatype of the vector elements + CTA_Handle usrdata, // ??? + // OUTPUTS + CTA_Vector *hvector) // The new COSTA-function (handle) +{ + CTAI_Vector *vector; + int memsize; + int retval; + CTAI_VectorClass *clsdata; + CTA_Function *my_Create_Size, *my_Create_Init; + int i; + int elSize; + + /* Get class data containing all function pointers */ + retval=CTA_Handle_Check((CTA_Handle) hveccl,CTA_VECTORCLASS); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_vectorclass handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) hveccl,(void*) &clsdata); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + /* determine size of data object (CTA_VECTOR_CREATE_SIZE)*/ + retval=CTA_Func_GetFunc(clsdata->functions[I_CTA_VECTOR_CREATE_SIZE],&my_Create_Size); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function"); + return retval; + } + (void) my_Create_Size(&n,&datatype,&usrdata,&retval,&memsize); + if (retval) { + CTA_WRITE_ERROR("Error using my_Create_Size"); + return retval; + } + + /* allocate memory for new vector object */ + vector=CTA_Malloc(sizeof(CTAI_Vector)); + vector->data=CTA_Malloc(memsize); + + /* Count allocated memory local and global */ + CTA_SizeOf(datatype, &elSize); + vector->memsize=sizeof(CTAI_Vector)+memsize+n*elSize; + CTAI_Vector_Memsize+=vector->memsize; + + /* copy function pointers */ + for (i=0;i< I_CTA_VECTOR_NUMFUNC;i++){ + vector->functions[i]=clsdata->functions[i]; + } + /* set other general information */ + vector->hveccl=hveccl; + vector->n=n; + vector->datatype=datatype; + + vector->usrdata=usrdata; + + /* Initialise and fill new vector */ + retval=CTA_Func_GetFunc(clsdata->functions[I_CTA_VECTOR_CREATE_INIT],&my_Create_Init); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function"); + return retval; + } + (void) my_Create_Init(vector->data, &n, &datatype, &usrdata, &retval); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Error using my_Create_Init"); + return retval; + } + + /* Allocate new handle and return eror when unsuccesfull */ + retval=CTA_Handle_Create("vector",CTA_VECTOR,vector,hvector); + if (retval) { + CTA_WRITE_ERROR("Cannot create handle"); + return retval; + } + + return CTA_OK; + + +} + + +#undef METHOD +#define METHOD "GetSize" +int CTA_Vector_GetSize( + CTA_Vector hvec, /* Handle of the vector */ + int *n /* size of vector */ + ){ + + CTAI_Vector *data; /* All data of vector hvec */ + int retval; /* Return value of COSTA call */ + + /* Get pointer to implementation of this function */ + retval=CTA_Handle_Check((CTA_Handle) hvec,CTA_VECTOR); + if (retval!=CTA_OK) { + char message[1024]; + sprintf(message, "handle check in cta_vector_getsize failed"); + CTA_WRITE_ERROR(message); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) hvec,(void*) &data); + if (retval!=CTA_OK) { + char message[1024]; + sprintf(message, "handle _getdata in cta_vector_getsize failed"); + CTA_WRITE_ERROR(message); + return retval; + } + + *n=data->n; + return CTA_OK; +}; + +#undef METHOD +#define METHOD "GetDatatype" +int CTA_Vector_GetDatatype( + CTA_Vector hvec, /* Handle of the vector */ + CTA_Datatype *datatype /* Returned data type */ + ){ + + CTAI_Vector *data; /* All data of vector hvec */ + int retval; /* Return value of COSTA call */ + + /* Get pointer to implementation of this function */ + retval=CTA_Handle_Check((CTA_Handle) hvec,CTA_VECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_vectorclass handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) hvec,(void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + *datatype=data->datatype; + return CTA_OK; +}; + + + + +#undef METHOD +#define METHOD "GetVals" +int CTA_Vector_GetVals( + CTA_Vector hvec, /* Handle of the vector */ + void *vals, /* Returned value */ + int n, /* length of array vals */ + CTA_Datatype datatype /* Data type */ + ) +{ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Vector *data; /* All data of vector hvec */ + CTA_Function *function; /* Function that must be called */ + + /* Get pointer to implementation of this function */ + retval=CTA_Handle_Check((CTA_Handle) hvec,CTA_VECTOR); + if (retval!=CTA_OK) + { + char message[1024]; + sprintf(message,"cta_vector: Handle %d is not a vector handle", hvec); + CTA_WRITE_ERROR(message); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) hvec,(void*) &data); + if (retval!=CTA_OK) + { + char message[1024]; + sprintf(message,"cta_vector: Handle %d has no data", hvec); + CTA_WRITE_ERROR(message); + return retval; + } + retval=CTA_Func_GetFunc(data->functions[I_CTA_VECTOR_GETVALS],&function); + if (retval!=CTA_OK) + { + char message[1024]; + sprintf(message,"cta_vector: getvals function %d, handle %d is not valid", + I_CTA_VECTOR_GETVALS,data->functions[I_CTA_VECTOR_GETVALS]); + CTA_WRITE_ERROR(message); + return retval; + } + + /* Call (user) implementation */ + function(data->data,vals,&n,&datatype,&retval); + return retval; +}; + + + +#undef METHOD +#define METHOD "GetVal" +int CTA_Vector_GetVal( + CTA_Vector hvec, /* Handle of the vector */ + int i, /* position of value */ + void *val, /* Returned value */ + CTA_Datatype datatype /* Data type */ + ){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Vector *data; /* All data of vector hvec */ + CTA_Function *function; /* Function that must be called */ + + /* Get pointer to implementation of this function */ + retval=CTA_Handle_Check((CTA_Handle) hvec,CTA_VECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_vector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) hvec,(void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + retval=CTA_Func_GetFunc(data->functions[I_CTA_VECTOR_GETVAL],&function); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_GETVAL"); + return retval; + } + + /* Call (user) implementation */ + function(data->data,&i,val,&datatype,&retval); + + return retval; +}; + + +/* Get value as string c; free pointer after use */ +const char *CTAI_Vector_GetStringVal(CTA_Vector hvec, int i) +{ + char *buf = NULL; + double dval; + float fval; + int ival; + int len; + CTA_String sval; + int retval; + CTA_Datatype dt; + + retval = CTA_Vector_GetDatatype(hvec, &dt); + if (retval == CTA_OK) { + switch (dt) { + case CTA_DOUBLE: + retval = CTA_Vector_GetVal(hvec, i, &dval, dt); + if (retval == CTA_OK) { + buf = (char*)CTA_Malloc(64); + sprintf(buf, "%lf", dval); + } + break; + case CTA_REAL: + retval = CTA_Vector_GetVal(hvec, i, &fval, dt); + if (retval == CTA_OK) { + buf = (char*)CTA_Malloc(64); + sprintf(buf, "%g", fval); + } + break; + case CTA_INTEGER: + case CTA_HANDLE: + retval = CTA_Vector_GetVal(hvec, i, &ival, dt); + if (retval == CTA_OK) { + buf = (char*)CTA_Malloc(64); + sprintf(buf, "%d", ival); + } + break; + case CTA_STRING: + retval = CTA_String_Create(&sval); + if (retval != CTA_OK) break; + retval = CTA_Vector_GetVal(hvec, i, &sval, dt); + if (retval == CTA_OK) { + CTA_String_GetLength(sval, &len); + buf = (char*)CTA_Malloc(len + 1); + strcpy(buf, CTAI_String_GetPtr(sval)); + buf[len] = '\0'; + } + CTA_String_Free(&sval); + break; + } + } + return buf; +}; + + +#undef METHOD +#define METHOD "SetConstant" +int CTA_Vector_SetConstant( + CTA_Vector hvec, /* Handle of the vector */ + void *val, /* value that must be set */ + CTA_Datatype datatype /* Data type */ + ){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Vector *data; /* All data of vector hvec */ + CTA_Function *function; /* Function that must be called */ + + /* Get pointer to implementation of this function */ + retval=CTA_Handle_Check((CTA_Handle) hvec,CTA_VECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_vector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) hvec, (void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + retval=CTA_Func_GetFunc(data->functions[I_CTA_VECTOR_SETCONST],&function); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_SETCONST"); + return retval; + } + /* Call (user) implementation */ + function(data->data,val,&datatype,&retval); + return retval; +}; + + + + + +#undef METHOD +#define METHOD "Export" +int CTA_Vector_Export( + CTA_Vector hvec, /* Handle of the vector */ + CTA_Handle usrdata + ){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Vector *data; /* All data of vector hvec */ + CTA_Function *function; /* Function that must be called */ + + + /* Get pointer to implementation of this function */ + retval=CTA_Handle_Check((CTA_Handle) hvec,CTA_VECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_vector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) hvec, (void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + retval=CTA_Func_GetFunc(data->functions[I_CTA_VECTOR_EXPORT],&function); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_EXPORT"); + return retval; + } + + /* Call (user) implementation */ + function(data->data,&usrdata,&retval); + return retval; +}; + +#undef METHOD +#define METHOD "Import" +int CTA_Vector_Import( + CTA_Vector hvec, /* Handle of the vector */ + CTA_Handle usrdata + ){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Vector *data; /* All data of vector hvec */ + CTA_Function *function; /* Function that must be called */ + + + /* Get pointer to implementation of this function */ + retval=CTA_Handle_Check((CTA_Handle) hvec,CTA_VECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_vector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) hvec, (void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + retval=CTA_Func_GetFunc(data->functions[I_CTA_VECTOR_IMPORT],&function); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_IMPORT"); + return retval; + } + + /* Call (user) implementation */ + function(data->data,&usrdata,&retval); + return retval; +}; + + +#undef METHOD +#define METHOD "Print_Table" +int CTA_Vector_Print_Table( + CTA_Vector* vtable, /* Handle of the vector */ + int ncolumns, + CTA_Vector vformats + ) +{ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Vector *data; /* All data of vector hvec */ + void *formats; /* data of vector formats */ + CTA_Function *function; /* Function that must be called */ + void **table = /* All data of vectors in table */ + CTA_Malloc(sizeof(CTAI_Vector *)*ncolumns) ; + + int i; /* loop counter */ + + /* Get pointer to implementation of this function */ + for (i=0; idata; + } + + retval=CTA_Handle_Check((CTA_Handle) vformats, CTA_VECTOR); + if (retval!=CTA_OK) + { + char message[1024]; + sprintf(message, "ERROR in CTA_Vector_Print_Table: last argument is not a CTA_Vector"); + CTA_WRITE_ERROR(message); + return retval; + } + + retval=CTA_Handle_GetData((CTA_Handle) vformats, (void*) &data); + if (retval!=CTA_OK){ + CTA_WRITE_ERROR("ERROR in CTA_Vector_Print_Table: last argument would not return its data"); + return retval; + } + + formats = data->data; + + retval=CTA_Func_GetFunc(data->functions[I_CTA_VECTOR_PRINT_TABLE],&function); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("ERROR in CTA_Vector_Print_Table: could not obtain a PRINT_TABLE function for last argument"); + return retval; + } + + /* Call (user) implementation */ + function(table,&ncolumns,formats,&retval); + if (retval!=CTA_OK) + { + CTA_WRITE_ERROR("ERROR in CTA_Vector_Print_Table: PRINT_TABLE function returned an error code"); + } + + + free(table); + return retval; +}; + + +#undef METHOD +#define METHOD "SetVals" +int CTA_Vector_SetVals( + CTA_Vector hvec, /* Handle of the vector */ + void *vals, /* value that must be set */ + int n, /* Length of array vals */ + CTA_Datatype datatype /* Data type */ + ){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Vector *data; /* All data of vector hvec */ + CTA_Function *function; /* Function that must be called */ + + /* Get pointer to implementation of this function */ + retval=CTA_Handle_Check((CTA_Handle) hvec,CTA_VECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_vector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) hvec, (void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + retval=CTA_Func_GetFunc(data->functions[I_CTA_VECTOR_SETVALS],&function); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_SETVALS"); + return retval; + } + + /* Call (user) implementation */ + function(data->data,vals,&n,&datatype,&retval); + return retval; +}; + +#undef METHOD +#define METHOD "SetVal" +int CTA_Vector_SetVal( + CTA_Vector hvec, /* Handle of the vector */ + int i, /* position of value that must be set */ + void *val, /* value that must be set */ + CTA_Datatype datatype /* Data type */ + ){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Vector *data; /* All data of vector hvec */ + CTA_Function *function; /* Function that must be called */ + + /* Get pointer to implementation of this function */ + retval=CTA_Handle_Check((CTA_Handle) hvec,CTA_VECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_vector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) hvec, (void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + retval=CTA_Func_GetFunc(data->functions[I_CTA_VECTOR_SETVAL],&function); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_SETVAL"); + return retval; + } + + /* Call (user) implementation */ + function(data->data,&i,val,&datatype,&retval); + return retval; +}; + + +/* Set value from string */ +int CTAI_Vector_SetStringVal(CTA_Vector hvec, int i, const char *val, CTA_Datatype datatype) { + void *buf = NULL; /* Buffer to store the converted value */ + int size = 0; /* Buffer size */ + int retval; + + if (CTA_OK == CTA_SizeOf(datatype, &size)) { + buf = CTA_Malloc(size); + switch (datatype) { + case CTA_REAL: + *((float *)buf) = (float)atof(val); + break; + case CTA_DOUBLE: + *((double *)buf) = atof(val); + break; + case CTA_INTEGER: + case CTA_HANDLE: + *((int *)buf) = atoi(val); + break; + case CTA_STRING: + retval = CTA_String_Create((CTA_String *)buf); + if (retval == CTA_OK) { + CTA_String_Set(*((CTA_String *)buf), val); + break; + } + free(buf); + return retval; + default: + free(buf); + return CTA_ILLEGAL_DATATYPE; + } + /* Set the actual value */ + retval = CTA_Vector_SetVal(hvec, i, buf, datatype); + free(buf); + return retval; + } + return CTA_ILLEGAL_DATATYPE; +}; + + +#undef METHOD +#define METHOD "AppendVal" +int CTA_Vector_AppendVal( + CTA_Vector hvec, /* Handle of the vector */ + const void *val, /* value that must be set */ + CTA_Datatype datatype /* Data type */ + ){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Vector *data; /* All data of vector hvec */ + CTA_Function *function; /* Function that must be called */ + + /* Get pointer to implementation of this function */ + retval=CTA_Handle_Check((CTA_Handle) hvec, CTA_VECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_vector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) hvec, (void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + retval=CTA_Func_GetFunc(data->functions[I_CTA_VECTOR_APPENDVAL],&function); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_APPENDVAL"); + return retval; + } + + /* Update vector length */ + ++data->n; + + /* Call (user) implementation */ + function(data->data,val,&datatype,&retval); + return retval; +}; + + + +#undef METHOD +#define METHOD "Scal" +int CTA_Vector_Scal( + CTA_Vector hvec, /* Handle of the vector */ + double alpha /* scaling factor */ + ){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Vector *data; /* All data of vector hvec */ + CTA_Function *function; /* Function that must be called */ + + /* Get pointer to implementation of this function */ + retval=CTA_Handle_Check((CTA_Handle) hvec,CTA_VECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_vector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) hvec, (void*) &data); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + retval=CTA_Func_GetFunc(data->functions[I_CTA_VECTOR_SCAL],&function); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_SCAL"); + return retval; + } + + /* Call (user) implementation */ + function(data->data,&alpha,&retval); + return retval; +}; + +#undef METHOD +#define METHOD "Copy" +int CTA_Vector_Copy( + CTA_Vector hvec_x, /* Handle of the sending vector */ + CTA_Vector hvec_y /* Handle of the receiving vector */ + ){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Vector *data_x; /* All data of vector hvec_x */ + CTAI_Vector *data_y; /* All data of vector hvec_y */ + CTA_Function *function_x; /* Function that must be called */ + CTA_Function *function_y; /* Function that must be called */ + + /* Get pointer to implementation of this function */ + retval=CTA_Handle_Check((CTA_Handle) hvec_x,CTA_VECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_vector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) hvec_x, (void*) &data_x); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + retval=CTA_Handle_Check((CTA_Handle) hvec_y,CTA_VECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_vector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) hvec_y, (void*) &data_y); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + retval=CTA_Func_GetFunc(data_x->functions[I_CTA_VECTOR_COPY],&function_x); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_COPY"); + return retval; + } + retval=CTA_Func_GetFunc(data_y->functions[I_CTA_VECTOR_COPY],&function_y); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_COPY"); + return retval; + } + + /* Check whether implementions match */ + if (function_x==function_y){ + + /* Call (user) implementation */ + function_x(data_x->data,data_y->data,&retval); + return retval; + } + else { + return CTA_INCOMPATIBLE_VECTORS; + } +}; + +#undef METHOD +#define METHOD "ElmSqrt" +int CTA_Vector_ElmSqrt( + CTA_Vector hvec_y /* Handle of the receiving vector */ + ){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Vector *data_y; /* All data of vector hvec_y */ + CTA_Function *function_y; /* Function that must be called */ + + /* Get pointer to implementation of this function */ + retval=CTA_Handle_Check((CTA_Handle) hvec_y,CTA_VECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_vector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) hvec_y, (void*) &data_y); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + retval=CTA_Func_GetFunc(data_y->functions[I_CTA_VECTOR_ELMSQRT],&function_y); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_ELMSQRT"); + return retval; + } + + /* Call (user) implementation */ + function_y(data_y->data,&retval); + return retval; +}; + + +#undef METHOD +#define METHOD "ElmDiv" +int CTA_Vector_ElmDiv( + CTA_Vector hvec_y, /* Handle of the receiving vector */ + CTA_Vector hvec_x /* Handle of the sending vector */ + ){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Vector *data_x; /* All data of vector hvec_x */ + CTAI_Vector *data_y; /* All data of vector hvec_y */ + CTA_Function *function_x; /* Function that must be called */ + CTA_Function *function_y; /* Function that must be called */ + + /* Get pointer to implementation of this function */ + retval=CTA_Handle_Check((CTA_Handle) hvec_x,CTA_VECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_vector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) hvec_x, (void*) &data_x); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + retval=CTA_Handle_Check((CTA_Handle) hvec_y,CTA_VECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_vector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) hvec_y, (void*) &data_y); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + retval=CTA_Func_GetFunc(data_x->functions[I_CTA_VECTOR_ELMDIV],&function_x); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_ELMDIV"); + return retval; + } + retval=CTA_Func_GetFunc(data_y->functions[I_CTA_VECTOR_ELMDIV],&function_y); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_ELMDIV"); + return retval; + } + /* Check whether implementions match */ + if (function_x==function_y){ + + /* Call (user) implementation */ + function_x(data_y->data,data_x->data,&retval); + return retval; + } + else { + return CTA_INCOMPATIBLE_VECTORS; + } +}; + + +#undef METHOD +#define METHOD "ElmProd" +int CTA_Vector_ElmProd( + CTA_Vector hvec_y, /* Handle of the receiving vector */ + CTA_Vector hvec_x /* Handle of the sending vector */ + ){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Vector *data_x; /* All data of vector hvec_x */ + CTAI_Vector *data_y; /* All data of vector hvec_y */ + CTA_Function *function_x; /* Function that must be called */ + CTA_Function *function_y; /* Function that must be called */ + + /* Get pointer to implementation of this function */ + retval=CTA_Handle_Check((CTA_Handle) hvec_x,CTA_VECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_vector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) hvec_x, (void*) &data_x); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + retval=CTA_Handle_Check((CTA_Handle) hvec_y,CTA_VECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_vector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) hvec_y, (void*) &data_y); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + retval=CTA_Func_GetFunc(data_x->functions[I_CTA_VECTOR_ELMPROD],&function_x); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_ELMPROD"); + return retval; + } + retval=CTA_Func_GetFunc(data_y->functions[I_CTA_VECTOR_ELMPROD],&function_y); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_ELMPROD"); + return retval; + } + /* Check whether implementions match */ + if (function_x==function_y){ + + /* Call (user) implementation */ + function_x(data_y->data,data_x->data,&retval); + return retval; + } + else { + return CTA_INCOMPATIBLE_VECTORS; + } +}; + + +#undef METHOD +#define METHOD "Axpy" +int CTA_Vector_Axpy( + CTA_Vector hvec_y, /* Handle of the receiving vector */ + double alpha, /* value of alpha */ + CTA_Vector hvec_x /* Handle of the sending vector */ + ){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Vector *data_x; /* All data of vector hvec_x */ + CTAI_Vector *data_y; /* All data of vector hvec_y */ + CTA_Function *function_x; /* Function that must be called */ + CTA_Function *function_y; /* Function that must be called */ + + /* Get pointer to implementation of this function */ + retval=CTA_Handle_Check((CTA_Handle) hvec_x,CTA_VECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_vector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) hvec_x, (void*) &data_x); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + retval=CTA_Handle_Check((CTA_Handle) hvec_y,CTA_VECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_vector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) hvec_y, (void*) &data_y); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + retval=CTA_Func_GetFunc(data_x->functions[I_CTA_VECTOR_AXPY],&function_x); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_AXPY"); + return retval; + } + retval=CTA_Func_GetFunc(data_y->functions[I_CTA_VECTOR_AXPY],&function_y); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_AXPY"); + return retval; + } + + /* Check whether implementions match */ + if (function_x==function_y){ + + /* Call (user) implementation */ + function_x(data_y->data,&alpha,data_x->data,&retval); + return retval; + } + else { + return CTA_INCOMPATIBLE_VECTORS; + } +}; + + +#undef METHOD +#define METHOD "Dot" +int CTA_Vector_Dot( + CTA_Vector hvec_x, /* Handle of the first vector */ + CTA_Vector hvec_y, /* Handle of the second vector */ + double *dotprod + ){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Vector *data_x; /* All data of vector hvec_x */ + CTAI_Vector *data_y; /* All data of vector hvec_y */ + CTA_Function *function_x; /* Function that must be called */ + CTA_Function *function_y; /* Function that must be called */ + + /* Get pointer to implementation of this function */ + retval=CTA_Handle_Check((CTA_Handle) hvec_x,CTA_VECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_vector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) hvec_x, (void*) &data_x); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + retval=CTA_Handle_Check((CTA_Handle) hvec_y,CTA_VECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_vector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) hvec_y, (void*) &data_y); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + retval=CTA_Func_GetFunc(data_x->functions[I_CTA_VECTOR_DOT],&function_x); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_DOT"); + return retval; + } + retval=CTA_Func_GetFunc(data_y->functions[I_CTA_VECTOR_DOT],&function_y); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_DOT"); + return retval; + } + + /* Check whether implementions match */ + if (function_x==function_y){ + + /* Call (user) implementation */ + function_x(data_x->data,data_y->data,dotprod,&retval); + return retval; + } + else { + return CTA_INCOMPATIBLE_VECTORS; + } +}; + +#undef METHOD +#define METHOD "Dot" +int CTA_Vector_Nrm2( + CTA_Vector hvec_x, /* Handle of the vector */ + double *norm2 + ){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Vector *data_x; /* All data of vector hvec_x */ + CTA_Function *function_x; /* Function that must be called */ + + /* Get pointer to implementation of this function */ + retval=CTA_Handle_Check((CTA_Handle) hvec_x,CTA_VECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_vector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) hvec_x, (void*) &data_x); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + retval=CTA_Func_GetFunc(data_x->functions[I_CTA_VECTOR_NRM2],&function_x); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_NRM2"); + return retval; + } + + /* Call (user) implementation */ + function_x(data_x->data,norm2,&retval); + return retval; +}; + +#undef METHOD +#define METHOD "Amax" +int CTA_Vector_Amax( + CTA_Vector hvec_x, /* Handle of the first vector */ + int *iloc + ){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Vector *data_x; /* All data of vector hvec_x */ + CTA_Function *function_x; /* Function that must be called */ + + /* Get pointer to implementation of this function */ + retval=CTA_Handle_Check((CTA_Handle) hvec_x,CTA_VECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_vector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) hvec_x, (void*) &data_x); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + retval=CTA_Func_GetFunc(data_x->functions[I_CTA_VECTOR_AMAX],&function_x); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_AMAX"); + return retval; + } + /* Call (user) implementation */ + function_x(data_x->data,iloc, &retval); + return retval; +}; + +#undef METHOD +#define METHOD "GetMaxLen" +int CTA_Vector_GetMaxLen( + CTA_Vector hvec_x, /* Handle of the first vector */ + int *maxlen + ){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Vector *data_x; /* All data of vector hvec_x */ + CTA_Function *function_x; /* Function that must be called */ + + /* Get pointer to implementation of this function */ + retval=CTA_Handle_Check((CTA_Handle) hvec_x,CTA_VECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_vector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) hvec_x, (void*) &data_x); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + retval=CTA_Func_GetFunc(data_x->functions[I_CTA_VECTOR_GETMAXLEN],&function_x); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_GETMAXLEN"); + return retval; + } + + /* Call (user) implementation */ + function_x(data_x->data, maxlen, &retval); + return retval; +}; + +#undef METHOD +#define METHOD "Free" +int CTA_Vector_Free( + CTA_Vector *hvec_x /* Handle of vector */ + ){ + + /* Local variables */ + int retval; /* Return value of COSTA call */ + CTAI_Vector *data_x; /* All data of vector hvec_x */ + CTA_Function *function_x; /* Function that must be called */ + + /* Get pointer to implementation of this function */ + + if (*hvec_x==CTA_NULL) return CTA_OK; + retval=CTA_Handle_Check((CTA_Handle) *hvec_x,CTA_VECTOR); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_vector handle"); + return retval; + } + retval=CTA_Handle_GetData((CTA_Handle) *hvec_x, (void*) &data_x); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + retval=CTA_Func_GetFunc(data_x->functions[I_CTA_VECTOR_FREE],&function_x); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_FREE"); + return retval; + } + + + /* Count allocated memory local and global */ + CTAI_Vector_Memsize-=data_x->memsize; + + /* Call (user) implementation */ + function_x(data_x->data,&retval); + free(data_x->data); + free(data_x); + retval=CTA_Handle_Free(hvec_x); + + return retval; +}; + + +/** \brief Generate XML from one COSTA vector +* +* \param hvec I handle of a COSTA vector +* \param writer I the XML text writer +*/ + +void CTAI_XML_WriteVector(CTA_Vector hvec, const char *id, const char *caption, CTA_Metainfo minfo, int level, xmlTextWriter *writer) { + const char *text; + int dimension; + int i; + char unitname[CTA_STRLEN_TAG]; + int misval=0; + CTAI_Gridm thisgrid; + char misvaltxt[20]; + char valtotxt[10]; + + if (minfo) { + // note: description according to xml-scheme should be at a state node, so it is not read here. + CTA_Metainfo_GetUnit(minfo,unitname); + CTA_Metainfo_GetRest(minfo,&misval); + CTA_Metainfo_GetGrid(minfo,&thisgrid); + } + + + /* Start an element the the name of the tree handle */ + xmlTextWriterStartElement(writer, (xmlChar *) CTAI_Type2String(CTA_VECTOR)); + + /* write id (COSTA: tag of parent state) */ + xmlTextWriterWriteAttribute(writer, CTAI_XML_ID, (xmlChar *) id); + + /* Write caption (COSTA: name of parent state */ + xmlTextWriterWriteAttribute(writer, CTAI_XML_CAPTION, (xmlChar *) caption); + XML_newline(level,writer); + + /* Write unit */ + xmlTextWriterStartElement(writer, (xmlChar *) "unit"); + xmlTextWriterWriteString(writer, (xmlChar *) unitname); + xmlTextWriterEndElement(writer); XML_newline(level,writer); + /* Write missingvalue */ + xmlTextWriterStartElement(writer, (xmlChar *) "missingValue"); + sprintf(misvaltxt,"%d",misval); + xmlTextWriterWriteString(writer, (xmlChar *) misvaltxt); + xmlTextWriterEndElement(writer); XML_newline(level,writer); + + if (thisgrid.type > -99) { + /* write grid; always computational dimensions */ + xmlTextWriterStartElement(writer, (xmlChar *) "grid"); level=level+3;XML_newline(level,writer); + xmlTextWriterStartElement(writer, (xmlChar *) "computationalSpace"); level=level+3;XML_newline(level,writer); + + xmlTextWriterStartElement(writer, (xmlChar *) "dimension"); + xmlTextWriterWriteAttribute(writer, CTAI_XML_ID, (xmlChar *) "xi"); + sprintf(valtotxt,"%d",thisgrid.nx); + xmlTextWriterWriteAttribute(writer, (xmlChar *) "length", (xmlChar *) valtotxt); + xmlTextWriterEndElement(writer); + if (thisgrid.type > 1) { + XML_newline(level,writer); + xmlTextWriterStartElement(writer, (xmlChar *) "dimension"); + xmlTextWriterWriteAttribute(writer, CTAI_XML_ID, (xmlChar *) "eta"); + sprintf(valtotxt,"%d",thisgrid.ny); + xmlTextWriterWriteAttribute(writer, (xmlChar *) "length", (xmlChar *) valtotxt); + xmlTextWriterEndElement(writer); + } + if (thisgrid.type > 2) { + XML_newline(level,writer); + xmlTextWriterStartElement(writer, (xmlChar *) "dimension"); + xmlTextWriterWriteAttribute(writer, CTAI_XML_ID, (xmlChar *) "psi"); + sprintf(valtotxt,"%d",thisgrid.nz); + xmlTextWriterWriteAttribute(writer, (xmlChar *) "length", (xmlChar *) valtotxt); + xmlTextWriterEndElement(writer); + } + level=level-3; XML_newline(level,writer); + xmlTextWriterEndElement(writer); XML_newline(level,writer); //computationalSpace + + if (0==strcmp(thisgrid.refdimp[1],"none")) { + printf("no reference for physical space given.\n"); + } else { + xmlTextWriterStartElement(writer, (xmlChar *) "physicalSpace"); level=level+3; XML_newline(level,writer); + + xmlTextWriterStartElement(writer, (xmlChar *) "dimension"); + xmlTextWriterWriteAttribute(writer, (xmlChar *) "id", (xmlChar *) "x"); + xmlTextWriterWriteAttribute(writer, (xmlChar *) "axes", (xmlChar *) "xi"); + xmlTextWriterWriteAttribute(writer, (xmlChar *) "ref", (xmlChar *) thisgrid.refdimp[1]); + xmlTextWriterEndElement(writer); + + if (thisgrid.type > 1) { + XML_newline(level,writer); + xmlTextWriterStartElement(writer, (xmlChar *) "dimension"); + xmlTextWriterWriteAttribute(writer, (xmlChar *) "id", (xmlChar *) "y"); + xmlTextWriterWriteAttribute(writer, (xmlChar *) "axes",(xmlChar *) "eta"); + xmlTextWriterWriteAttribute(writer, (xmlChar *) "ref", (xmlChar *) thisgrid.refdimp[2]); + xmlTextWriterEndElement(writer); + } + if (thisgrid.type > 2) { + XML_newline(level,writer); + xmlTextWriterStartElement(writer, (xmlChar *) "dimension"); + xmlTextWriterWriteAttribute(writer, (xmlChar *) "id", (xmlChar *) "z"); + xmlTextWriterWriteAttribute(writer, (xmlChar *) "axes",(xmlChar *) "psi"); + xmlTextWriterWriteAttribute(writer, (xmlChar *) "ref",(xmlChar *) thisgrid.refdimp[3]); + xmlTextWriterEndElement(writer); + } + level=level-3;XML_newline(level,writer); + + xmlTextWriterEndElement(writer); level=level-3;XML_newline(level,writer); //physicalSpace + } + + + + xmlTextWriterEndElement(writer); XML_newline(level,writer); //grid + } + + + /* Get dimension */ + CTA_Vector_GetSize(hvec, &dimension); + + /* Write values */ + xmlTextWriterStartElement(writer, CTAI_XML_VECTOR); + + /* Write each value */ + for (i = 1; i <= dimension; ++i) { + + text = CTAI_Vector_GetStringVal(hvec, i); + xmlTextWriterWriteString(writer, (xmlChar *) text); + xmlTextWriterWriteString(writer, (xmlChar *) " "); + free((char *)text); + + } + + /* End the tree level elements */ + xmlTextWriterEndElement(writer); + level=level-3; XML_newline(level,writer); + xmlTextWriterEndElement(writer); + XML_newline(level,writer); +} + +/* -------------------------------------------------------------------- */ + +/** \brief Create a COSTA vector from NEW xml-input +* +* \param cur_node I Current XML node +* \return Handle to create or CTA_NULL in case of an error. +*/ +CTA_Vector CTAI_XML_CreateVector_New(xmlNode *cur_node, CTA_Metainfo minfo) { + xmlNode *values_node = NULL; /* values child node */ + xmlNode *txt_node = NULL; /* text node */ + CTA_Vector hnew = CTA_NULL; /* The new COSTA handle */ + xmlChar *val; /* element/property value */ + CTA_Datatype datatype = CTA_DOUBLE; /* datatype of the vector */ + CTA_VecClass vecclass = CTA_DEFAULT_VECTOR; + int dim = 0; /* vector dim */ + char *teststr, *teststr2; + int misval; + double *vals; + CTAI_Gridm thisgrid; + + + /* Parse this node's attributes */ + + /* Get datatype */ + val = xmlGetProp(cur_node, CTAI_XML_DATATYPE); + if (val) { + datatype = CTAI_String2Type((char *) val); + xmlFree(val); + if (datatype != CTA_DOUBLE && datatype != CTA_REAL && + datatype != CTA_INTEGER && datatype != CTA_STRING) return CTA_NULL; + } else { + /* default: */ + datatype = CTA_DOUBLE ; + } + + + /* Get dimension */ + dim = 0; + val = xmlGetProp(cur_node, CTAI_XML_DIMENSION); + if (val) { + dim = atoi((char *) val); + xmlFree(val); + } + + /* First: get all modes other than the 'vector' containing the values */ + for (values_node = cur_node->children; values_node; values_node = values_node->next) { + if (0 == strcmp((char *) CTAI_XML_UNIT, (char *) values_node->name)) { + for (txt_node = values_node->children; txt_node; txt_node = txt_node->next) { + if (txt_node->type == XML_TEXT_NODE) { + // printf("unit: |%s| %d \n",txt_node->content,strlen(txt_node->content)); + CTA_Metainfo_SetUnit(minfo,(char *) txt_node->content); + } + } + } + if (0 == strcmp((char *) CTAI_XML_MISSINGVALUE, (char *) values_node->name)) { + for (txt_node = values_node->children; txt_node; txt_node = txt_node->next) { + if (txt_node->type == XML_TEXT_NODE) { + // printf("missingvalue: |%s| %d \n",txt_node->content,strlen(txt_node->content)); + if (0==strcmp((char *) txt_node->content,"NaN")) {misval=-9999;} + else {misval=atoi((char *) txt_node->content);} + CTA_Metainfo_SetRest(minfo,&misval); + } + } + } + if (0 == strcmp((char *) CTAI_XML_GRID, (char *) values_node->name)) { + // printf("entering ctai_xml_grid ...\n"); + CTAI_XML_CreateGrid(values_node, &thisgrid); + // printf("... done; retval; type: %d %d \n",retval,thisgrid.type); + CTA_Metainfo_SetGrid(minfo, &thisgrid); + } + + } + + + /* TODO: read vector class when it can be something else than CTA_DEFAULT_VECTOR */ + + + /* Get values node. NOTE: it is denoted as 'vector'*/ + values_node = NULL; + for (values_node = cur_node->children; values_node; values_node = values_node->next) { + if (0 == strcmp((char *) CTAI_XML_VECTOR, (char *) values_node->name)) break; + } + + if (values_node) { + + for (txt_node = values_node->children; txt_node; txt_node = txt_node->next) { + if (txt_node->type == XML_TEXT_NODE) { + if (txt_node->content) { + + + teststr = CTA_Malloc((strlen((char *) txt_node->content)+1)*sizeof(char)); + teststr2 = CTA_Malloc((strlen((char *) txt_node->content)+1)*sizeof(char)); + strcpy(teststr,(char *) txt_node->content); + strcpy(teststr2,teststr); + + dim = CTAI_XML_read_txt_node_size(teststr); + + + if (dim > 0) { + /* Create vector */ + CTA_Vector_Create(vecclass, dim, datatype, CTA_NULL, &hnew); + vals = CTA_Malloc((dim)*sizeof(double)); + dim = CTAI_XML_read_txt_node_content(teststr2,vals); + // printf("vals: %f %f %f\n",vals[0],vals[1],vals[2]); + + free(teststr); + free(teststr2); + CTA_Vector_SetVals(hnew, vals,dim, datatype); + + return hnew; + } + + } + } + } + } + + return CTA_NULL; +} + + +/* ---------------------- */ +int CTAI_XML_read_txt_node_size(char *txtstr){ + int i; + // char *saveptr; + char *txtpart; + + //for (i=0; txtpart != NULL; i++) + for (i=0; ; i++,txtstr=NULL) + { + txtpart = strtok(txtstr," "); + if (txtpart == NULL) break; + } + return i ; +} + +int CTAI_XML_read_txt_node_content(char *txtstr, double *vals){ + int i; + // char *saveptr; + char *txtpart; + + //for (i=0; txtpart != NULL; i++) + for (i=0; ; i++,txtstr=NULL) + { + txtpart = strtok(txtstr," "); + // txtpart = strtok_r(txtstr," ",&saveptr); + if (txtpart == NULL) break; + // printf(" %d: %s -> %f\n",i,txtpart, atof(txtpart)); + vals[i]=atof(txtpart); + } + return i ; +} + + +long CTAI_Vector_GetMemsize(){ + return CTAI_Vector_Memsize; +} + + + + +/* ------------------------------------------------- */ + +/* Interfacing with Fortran */ +CTAEXPORT void CTA_VECTOR_DEFINECLASS_F77(char *name,int *h_func,int *hveccl, + int *ierr, int len_name){ + char *c_name; + // create a c-string equivalent to name + c_name=CTA_Malloc((len_name+1)*sizeof(char)); + CTA_fstr2cstr(name,c_name,len_name); + + *ierr=CTA_Vector_DefineClass(c_name, (CTA_Func*) h_func, + (CTA_VecClass*) hveccl); + free(c_name); +}; + +CTAEXPORT void CTA_VECTOR_DUPLICATE_F77(int *hvector1, int *hvector2, int *ierr){ + *ierr=CTA_Vector_Duplicate(*hvector1, hvector2); +}; +CTAEXPORT void CTA_VECTOR_CREATE_F77(int *hveccl, const int *n, int *datatype, + int *usrdata, int *hvector, int *ierr){ + + *ierr=CTA_Vector_Create((CTA_VecClass) *hveccl, *n, (CTA_Datatype) *datatype, + (CTA_Handle) *usrdata, (CTA_Vector*) hvector); + +}; + +CTAEXPORT void CTA_VECTOR_GETSIZE_F77 (int *hvec, int *n, int *ierr){ + + *ierr= CTA_Vector_GetSize(*hvec, n); +}; + +CTAEXPORT void CTA_VECTOR_GETVALS_F77(int *hvec, void *vals, int *n, int *datatype, + int *ierr){ + + *ierr=CTA_Vector_GetVals(*hvec, vals, *n, *datatype); +}; + +CTAEXPORT void CTA_VECTOR_GETVAL_F77(int *hvec, int *i, void *val, int *datatype, + int *ierr){ + + *ierr=CTA_Vector_GetVal(*hvec, *i, val, *datatype); +}; + +CTAEXPORT void CTA_VECTOR_SETVALS_F77(int *hvec, void *vals, int *n, int *datatype, + int *ierr){ + + *ierr=CTA_Vector_SetVals(*hvec, vals, *n, *datatype); +}; + + +CTAEXPORT void CTA_VECTOR_SETVAL_F77(int *hvec, int *i, void *val, int *datatype, + int *ierr){ + + *ierr =CTA_Vector_SetVal((CTA_Vector) *hvec, *i, val, + (CTA_Datatype) *datatype); +} + +CTAEXPORT void CTA_VECTOR_SETCONSTANT_F77(int *hvec, void *val, int *datatype, + int *ierr){ + + *ierr=CTA_Vector_SetConstant(*hvec, val, *datatype); +}; + +CTAEXPORT void CTA_VECTOR_SCAL_F77(int *hvec, double *alpha, int *ierr){ + + *ierr=CTA_Vector_Scal((CTA_Vector) *hvec,*alpha); +}; + +CTAEXPORT void CTA_VECTOR_COPY_F77( int *hvec_x, int *hvec_y, int *ierr){ + *ierr=CTA_Vector_Copy( (CTA_Vector) *hvec_x, (CTA_Vector) *hvec_y); +}; + +CTAEXPORT void CTA_VECTOR_AXPY_F77( int *hvec_y, double *alpha, int *hvec_x, int *ierr){ + *ierr=CTA_Vector_Axpy( (CTA_Vector) *hvec_y, *alpha, (CTA_Vector) *hvec_x); +}; + +CTAEXPORT void CTA_VECTOR_DOT_F77( int *hvec_x, int *hvec_y, double *dotprod, int *ierr){ + *ierr=CTA_Vector_Dot( (CTA_Vector) *hvec_x, (CTA_Vector) *hvec_y, + dotprod); +}; + +CTAEXPORT void CTA_VECTOR_NRM2_F77( int *hvec_x,double *norm2, int *ierr){ + *ierr=CTA_Vector_Nrm2( (CTA_Vector) *hvec_x, norm2); +}; + +CTAEXPORT void CTA_VECTOR_AMAX_F77( int *hvec_x, int *iloc, int *ierr){ + *ierr=CTA_Vector_Amax( (CTA_Vector) *hvec_x, iloc); +}; + +CTAEXPORT void CTA_VECTOR_GETMAXLEN_F77( int *hvec_x, int *maxlen, int *ierr){ + *ierr=CTA_Vector_GetMaxLen( (CTA_Vector) *hvec_x, maxlen); +}; + +CTAEXPORT void CTA_VECTOR_EXPORT_F77(int *hvec_x, int *usrdata, int *ierr){ + *ierr=CTA_Vector_Export((CTA_Vector) *hvec_x, (CTA_Handle) *usrdata); +}; + +CTAEXPORT void CTA_VECTOR_IMPORT_F77(int *hvec_x, int *usrdata, int *ierr){ + *ierr=CTA_Vector_Import((CTA_Vector) *hvec_x, (CTA_Handle) *usrdata); +}; + + + +CTAEXPORT void CTA_VECTOR_PRINT_TABLE_F77(int *table, int *ncolumns, int *vformats, int *ierr){ + *ierr=CTA_Vector_Print_Table((CTA_Vector *) table, *ncolumns, (CTA_Vector) *vformats); +}; + +CTAEXPORT void CTA_VECTOR_FREE_F77(int *hvec_x, int *ierr){ + *ierr=CTA_Vector_Free((CTA_Vector*) hvec_x); +}; + +CTAEXPORT void CTA_VECTOR_ELMDIV_F77( int *hvec_y, int *hvec_x, int *ierr){ + *ierr=CTA_Vector_ElmDiv( (CTA_Vector) *hvec_y, (CTA_Vector) *hvec_x); +}; + +CTAEXPORT void CTA_VECTOR_ELMPROD_F77( int *hvec_y, int *hvec_x, int *ierr){ + *ierr=CTA_Vector_ElmProd( (CTA_Vector) *hvec_y, (CTA_Vector) *hvec_x); +}; + + + diff --git a/costa/native/cta/src/cta_vector_blas.c b/costa/native/cta/src/cta_vector_blas.c new file mode 100644 index 000000000..03132d919 --- /dev/null +++ b/costa/native/cta/src/cta_vector_blas.c @@ -0,0 +1,1357 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_vector_blas.c $ +$Revision: 4445 $, $Date: 2014-06-03 08:56:12 +0200 (Tue, 03 Jun 2014) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +#include +#include +#include +#include +#include "cta_mem.h" +#include "cta_flush.h" +#include "cta_file.h" +#include "cta_errors.h" +#include "cta_string.h" +#include "cta_vector_blas.h" +#include "ctai.h" +#include "ctai_handles.h" +#include "cta_message.h" + +/* Maximum length of a path index */ +#define MAXIXLEN (31) + +#define IDEBUG (0) +#define CLASSNAME "CTA_Vector_blas" + +/* +This file contains an implementation of the CTA_VECTOR_NUMFUNC operations +which a COSTA-vector should be able to perform. These implementations +have been created using BLAS. + + The function CTA_Vector_blas_initialise creates a CTA_VecClass, which + uses these implementations. + + The user may create different vector-classes, which use a different set of + implementations. To do so, a function must be written which s similar to + CTA_Vector_blas_initialise, and implementations must be supplied for + those functions which are differently implemented than the BLAS-ones. + + CTA_Vector_Create_Size return memory-size required for creating a vector + CTA_Vector_blas_initialise create a vector class: set function pointers to all + member variables. + +*/ +void CTA_Vector_blas_initialise(CTA_VecClass *hveccl) +{ + CTA_Intf hintf=0; + CTA_Func h_func[I_CTA_VECTOR_NUMFUNC]; + + // The vector h_func is filled with COSTA-function handles of the implementations in this + // file. + CTA_Func_Create(" ",&CTAI_Vector_Create_Size, hintf,&h_func[I_CTA_VECTOR_CREATE_SIZE]); + CTA_Func_Create(" ",&CTAI_Vector_Create_Init, hintf,&h_func[I_CTA_VECTOR_CREATE_INIT]); + CTA_Func_Create(" ",&CTAI_Vector_getvals, hintf,&h_func[I_CTA_VECTOR_GETVALS] ); + CTA_Func_Create(" ",&CTAI_Vector_getval, hintf,&h_func[I_CTA_VECTOR_GETVAL] ); + CTA_Func_Create(" ",&CTAI_Vector_setvals, hintf,&h_func[I_CTA_VECTOR_SETVALS] ); + CTA_Func_Create(" ",&CTAI_Vector_setval, hintf,&h_func[I_CTA_VECTOR_SETVAL] ); + CTA_Func_Create(" ",&CTAI_Vector_setconst, hintf,&h_func[I_CTA_VECTOR_SETCONST] ); + CTA_Func_Create(" ",&CTAI_Vector_scal, hintf,&h_func[I_CTA_VECTOR_SCAL] ); + CTA_Func_Create(" ",&CTAI_Vector_copy, hintf,&h_func[I_CTA_VECTOR_COPY] ); + CTA_Func_Create(" ",&CTAI_Vector_axpy, hintf,&h_func[I_CTA_VECTOR_AXPY] ); + CTA_Func_Create(" ",&CTAI_Vector_dot, hintf,&h_func[I_CTA_VECTOR_DOT] ); + CTA_Func_Create(" ",&CTAI_Vector_nrm2, hintf,&h_func[I_CTA_VECTOR_NRM2] ); + CTA_Func_Create(" ",&CTAI_Vector_amax, hintf,&h_func[I_CTA_VECTOR_AMAX] ); + CTA_Func_Create(" ",&CTAI_Vector_GetMaxLen, hintf,&h_func[I_CTA_VECTOR_GETMAXLEN] ); + CTA_Func_Create(" ",&CTAI_Vector_export, hintf,&h_func[I_CTA_VECTOR_EXPORT] ); + CTA_Func_Create(" ",&CTAI_Vector_import, hintf,&h_func[I_CTA_VECTOR_IMPORT] ); + CTA_Func_Create(" ",&CTAI_Vector_Print_Table,hintf,&h_func[I_CTA_VECTOR_PRINT_TABLE]); + CTA_Func_Create(" ",&CTAI_Vector_free, hintf,&h_func[I_CTA_VECTOR_FREE] ); + CTA_Func_Create(" ",&CTAI_Vector_appendval, hintf,&h_func[I_CTA_VECTOR_APPENDVAL] ); + CTA_Func_Create(" ",&CTAI_Vector_elmdiv, hintf,&h_func[I_CTA_VECTOR_ELMDIV] ); + CTA_Func_Create(" ",&CTAI_Vector_elmprod, hintf,&h_func[I_CTA_VECTOR_ELMPROD] ); + CTA_Func_Create(" ",&CTAI_Vector_elmsqrt, hintf,&h_func[I_CTA_VECTOR_ELMSQRT] ); + + CTA_Vector_DefineClass("cta_vector_blas",h_func,hveccl); +} + +void CTAI_Vector_Create_Size( + // INPUTS (all unused): + int *n, // + CTA_Datatype *datatype, // + CTA_Handle *usrdata, // + // OUTPUTS: + int *retval, // error code (see cta_datatypes.h for possible error codes) + int *memsize // The number of bytes which are necessary to store one + // CTAI_Vector_blas, with a pointer to the contents (data), but without the + // contents themselves. + ){ + + *memsize=(int) sizeof(CTAI_Vector_blas); + *retval=CTA_OK; +}; + + +void CTAI_Vector_Create_Init( +/* +Allocate the memory which is neccesary to store the vector values +*/ +// IN-OUTPUTS +CTAI_Vector_blas *x, // The COSTA-BLAS-vector for which memory is reserved. +// INPUTS: +int *n, // dimension of the vector +CTA_Datatype *datatype, // data type +CTA_Handle *usrdata, // ??? +// OUTPUTS +int *retval) // Error code. Possible error: Illegal data type +{ + *retval = CTA_OK; + x->n=*n; + x->size=*n; + x->values = NULL; + + // allocate an array + if (*datatype==CTA_REAL){ + if (n>0){ + x->values=CTA_Malloc(*n * sizeof(float)); + } + } + else if (*datatype==CTA_DOUBLE){ + if (n>0) { + x->values=CTA_Malloc(*n * sizeof(double)); + } + } + else if (*datatype==CTA_INTEGER || *datatype==CTA_HANDLE){ + if (n>0){ + x->values=CTA_Malloc(*n * sizeof(int)); + } + } + else if (*datatype==CTA_STRING){ + // For strings: allocate the CTA_String array AND create the strings + if (n>0){ + int i; + CTA_String *StrVal; + + x->values=CTA_Malloc(*n * sizeof(CTA_String)); + StrVal = (CTA_String*) x->values; + for (i=0; i<*n; i++) + { + *retval = CTA_String_Create(&(StrVal[i])); + if (*retval != CTA_OK) return; + } + } + } + else { + x->datatype = CTA_NULL; + x->n=0; + x->size=0; + + *retval=CTA_ILLEGAL_DATATYPE; + return; + } + + // set datatype + x->datatype=*datatype; +}; + +/* Copy single real array to double real array */ +static void CTAI_Single2Double(float *rx, double *dx, int n){ + int i; + for (i=0;ix->n){ + *retval=CTA_DIMENSION_ERROR; + return; + } + //printf("CTAI_Vector_getval: returning the %d-th value\n",*i); + + /* check data types and handle conversion */ + if (*datatype!=x->datatype){ + if (x->datatype==CTA_DOUBLE && *datatype==CTA_REAL) { + double *fptr= (double *)x->values; + CTAI_Double2Single(fptr+(*i-1), val, 1); + } else if (x->datatype==CTA_REAL && *datatype==CTA_DOUBLE){ + float *fptr= (float *)x->values; + CTAI_Single2Double(fptr+(*i-1),val, 1); + } else { + *retval=CTA_INCOMPATIBLE_VECTORS; + return; + } + } + else if (*datatype==CTA_REAL){ + /* copy REAL-values using memcpy */ + fptr=x->values; + memcpy(val,fptr+(*i-1),sizeof(float)); + } + else if (*datatype==CTA_INTEGER || *datatype==CTA_HANDLE){ + /* copy INTEGER-values using memcpy */ + iptr=x->values; + memcpy(val,iptr+(*i-1),sizeof(int)); + } + else if (*datatype==CTA_DOUBLE){ + /* copy DOUBLE-values using memcpy */ + dptr=x->values; + memcpy(val,dptr+(*i-1),sizeof(double)); + } + else if (*datatype==CTA_STRING){ + /* copy CTA_STRING-values using COSTA-functions */ + char *str; + int len; + // Cast the vector data to string-handles + CTA_String *StrPtr = (CTA_String*) x->values; + + // Get the string length + *retval = CTA_String_GetLength(StrPtr[*i-1],&len); + if (*retval!=CTA_OK) return; + + // Get the string contents from the COSTA-string + str=CTA_Malloc((len+1)*sizeof(char)); + *retval = CTA_String_Get(StrPtr[*i-1],str); + if (*retval!=CTA_OK) return; + + // Set the string contents into output COSTA-string + StrPtr = (CTA_String*) val; + *retval = CTA_String_Set( *StrPtr ,str); + if (*retval!=CTA_OK) return; + free(str); + } + else { + *retval=CTA_ILLEGAL_DATATYPE; return; + } + *retval=CTA_OK; +}; + + +void CTAI_Vector_getvals( + // Get all the values from a COSTA-BLAS-vector + CTAI_Vector_blas *x, + void *vals, + int *n, + CTA_Datatype *datatype, + int *retval + ){ + /* Local variables */ + int one=1; + + + /* check dimensions */ + if (*n!=x->n){ + *retval=CTA_DIMENSION_ERROR; + return; + } + + /* check data types and handle conversion */ + if (*datatype!=x->datatype){ + if (x->datatype==CTA_DOUBLE && *datatype==CTA_REAL) { + CTAI_Double2Single(x->values, vals, *n); + } else if (x->datatype==CTA_REAL && *datatype==CTA_DOUBLE){ + CTAI_Single2Double(x->values, vals, *n); + } else { + *retval=CTA_INCOMPATIBLE_VECTORS; + return; + } + } + /* copy values using BLAS copy */ + else if (*datatype==CTA_INTEGER || *datatype==CTA_HANDLE){ + SCOPY_F77(n,x->values,&one,vals,&one); + } + else if (*datatype==CTA_REAL){ + SCOPY_F77(n,x->values,&one,vals,&one); + } + else if (*datatype==CTA_DOUBLE){ + DCOPY_F77(n,x->values,&one,vals,&one); + } + else if (*datatype==CTA_STRING){ + /* copy values using GetVal*/ + + // Cast the vector data to string-handles + CTA_String *StrPtr = (CTA_String*) vals; + + int i; + for (i=1; i<=*n; i++) + { + CTAI_Vector_getval( x, &i, &(StrPtr[i-1]), datatype, retval); + if (*retval!=CTA_OK) return; + } + } + else { + *retval=CTA_ILLEGAL_DATATYPE; return; + } + *retval=CTA_OK; +}; + + + +void CTAI_Vector_setval( + // Set one value of a COSTA-BLAS-vector + CTAI_Vector_blas *x, + int *i, + void *val, + CTA_Datatype *datatype, + int *retval + ){ + /* Local variables */ + + /* check dimensions */ + if (*i<1 || *i>x->n){ + *retval=CTA_DIMENSION_ERROR; + return; + } + + /* check data types and handle conversion */ + if (*datatype!=x->datatype){ + if (*datatype==CTA_REAL && x->datatype==CTA_DOUBLE ) { + double *fptr= (double *)x->values; + CTAI_Single2Double(val, fptr+(*i-1), 1); + } else if (*datatype==CTA_DOUBLE && x->datatype==CTA_REAL){ + float *fptr= (float *)x->values; + CTAI_Double2Single(val, fptr+(*i-1), 1); + } else { + *retval=CTA_INCOMPATIBLE_VECTORS; + return; + } + /* copy values using memcpy */ + } else if (*datatype==CTA_REAL){ + float *fptr= (float *)x->values; + memcpy(fptr+(*i-1),val,sizeof(float)); + } + else if (*datatype==CTA_INTEGER || *datatype==CTA_HANDLE){ + int * iptr= (int *)x->values; + memcpy(iptr+(*i-1),val,sizeof(int)); + } + else if (*datatype==CTA_DOUBLE){ + double *dptr=(double *)x->values; + memcpy(dptr+(*i-1),val,sizeof(double)); + } + else if (*datatype==CTA_STRING){ + int len; + char *str; + CTA_String str_in; + CTA_String *StrPtr; + + /* copy CTA_STRING-values using COSTA-functions */ + // Cast the vector data to string-handles + str_in = *((CTA_String*)val); + + // Get the string length + *retval = CTA_String_GetLength(str_in,&len); + if (*retval!=CTA_OK) return; + + // Get the string contents from the COSTA-string + str=CTA_Malloc((len+1)*sizeof(char)); + *retval = CTA_String_Get(str_in,str); + if (*retval!=CTA_OK) return; + + // Set the string contents into output COSTA-string + StrPtr = (CTA_String*) x->values; + *retval = CTA_String_Set( StrPtr[*i-1] ,str); + if (*retval!=CTA_OK) return; + free(str); + } + else { + *retval=CTA_ILLEGAL_DATATYPE; return; + } + *retval=CTA_OK; +}; + +void CTAI_Vector_setvals( + // Set all values of a COSTA-BLAS-vector + CTAI_Vector_blas *x, + void *vals, + int *n, + CTA_Datatype *datatype, + int *retval + ){ + /* Local variables */ + int one=1; + + /* check dimensions */ + if (*n!=x->n){ + *retval=CTA_DIMENSION_ERROR; + return; + } + + /* check data types and handle conversion */ + if (*datatype!=x->datatype){ + if (*datatype==CTA_REAL && x->datatype==CTA_DOUBLE ) { + CTAI_Single2Double(vals, x->values, *n); + } else if (*datatype==CTA_DOUBLE && x->datatype==CTA_REAL){ + CTAI_Double2Single(vals, x->values, *n); + } else { + *retval=CTA_INCOMPATIBLE_VECTORS; + return; + } + + /* copy values using BLAS copy or memcpy*/ + } else if (*datatype==CTA_INTEGER || *datatype==CTA_HANDLE){ + SCOPY_F77(n,vals,&one,x->values,&one); + } + else if (*datatype==CTA_REAL){ + SCOPY_F77(n,vals,&one,x->values,&one); + } + else if (*datatype==CTA_DOUBLE){ + DCOPY_F77(n,vals,&one,x->values,&one); + } + else if (*datatype==CTA_STRING){ + /* copy CTA_STRING-values using COSTA-functions */ + // Cast the vector data to string-handles + CTA_String *StrPtr = (CTA_String*) vals; + + int i; + for (i=1; i<=*n; i++) + { + CTAI_Vector_setval( x, &i, &(StrPtr[i-1]), datatype, retval); + if (*retval!=CTA_OK) return; + } + + } + else { + *retval=CTA_ILLEGAL_DATATYPE; return; + } + *retval=CTA_OK; +}; + + +#undef METHOD +#define METHOD "CTAI_Vector_setconst" +void CTAI_Vector_setconst( + // Set all values of a COSTA-BLAS-vector to the same value + CTAI_Vector_blas *x, + void *val, + CTA_Datatype *datatype, + int *retval + ){ + /* Local variables */ + float sval; + double dval; + + /* check data types */ + if (*datatype==CTA_REAL) { + memcpy(&sval,val,sizeof(float)); + } else if (*datatype==CTA_DOUBLE) { + memcpy(&dval,val,sizeof(double)); + } + + + if (x->datatype==CTA_REAL && *datatype==CTA_DOUBLE) { + sval=(float) dval; + } else if (x->datatype==CTA_DOUBLE && *datatype==CTA_REAL) { + dval=(double) sval; + } else if (*datatype!=x->datatype){ + CTA_WRITE_ERROR("DataType of vector and constant value is not compatible"); + *retval=CTA_INCOMPATIBLE_VECTORS; + return ; + } + + /* copy values using BLAS copy */ + if (x->datatype==CTA_REAL){ + float *svals = (float *) x->values; + int i; + for (i=0;in;i++){ svals[i]=sval; } + } + else if (x->datatype==CTA_INTEGER || x->datatype==CTA_HANDLE){ + int *ival = (int *) val; + int *ivals = (int *) x->values; + int i; + for (i=0;in;i++){ ivals[i]=*ival; } + } + else if (x->datatype==CTA_DOUBLE){ + double * dvals= (double *) x->values; + int i; + for (i=0;in;i++){ dvals[i]=dval; } + } + else if (x->datatype==CTA_STRING){ + /* copy CTA_STRING-values using COSTA-functions */ + char *str; + // Cast the vector data to string-handles + CTA_String *StrPtr = (CTA_String*) val; + + // Get the string length + int len; + int i; + *retval = CTA_String_GetLength(*StrPtr,&len); + if (*retval!=CTA_OK) return; + + // Get the string contents from the COSTA-string + str=CTA_Malloc((len+1)*sizeof(char)); + *retval = CTA_String_Get(*StrPtr,str); + if (*retval!=CTA_OK) return; + + StrPtr = (CTA_String*) x->values; + for (i=0;in;i++) + { + // Set the string contents into output COSTA-string + *retval = CTA_String_Set( StrPtr[i] ,str); + if (*retval!=CTA_OK) return; + } + free(str); + } + else { + CTA_WRITE_ERROR("DataType is not supported"); + *retval=CTA_ILLEGAL_DATATYPE; return; + } + *retval=CTA_OK; +}; + + + + +void CTAI_Vector_scal( + // Multiply a COSTA-BLAS-vector with a scalar + CTAI_Vector_blas *x, + double *alpha, + int *retval + ){ + /* Local variables */ + int one=1; + float salpha; + + if (x->datatype==CTA_REAL){ + salpha=(float) *alpha; + SSCAL_F77(&x->n,&salpha, x->values,&one); + } + else if (x->datatype==CTA_INTEGER){ + int i; + int *ival = (int *)x->values; + for (i=0; in; i++) + { + ival[i] = (int) (*alpha * ival[i]); + } + } + else if (x->datatype==CTA_DOUBLE){ + DSCAL_F77(&x->n,alpha, x->values,&one); + } + else { + *retval=CTA_ILLEGAL_DATATYPE; return; + } + *retval=CTA_OK; +}; + + +void CTAI_Vector_copy( + // Copy the values of a COSTA-BLAS-vector to another existing one. + CTAI_Vector_blas *x, + CTAI_Vector_blas *y, + int *retval + ){ + /* Local variables */ + int one=1; + + /* check dimensions */ + if (y->n!=x->n){ + *retval=CTA_DIMENSION_ERROR; + return; + } + + /* check data types and handle conversion */ + if (x->datatype!=y->datatype){ + if (x->datatype==CTA_DOUBLE && y->datatype==CTA_REAL) { + CTAI_Double2Single(x->values, y->values, x->n); + + } else if (x->datatype==CTA_REAL && y->datatype==CTA_DOUBLE){ + CTAI_Single2Double(x->values, y->values, x->n); + } else { + *retval=CTA_INCOMPATIBLE_VECTORS; + return; + } + } else if (x->datatype==CTA_REAL){ + SCOPY_F77(&x->n,x->values,&one,y->values,&one); + } + else if (x->datatype==CTA_INTEGER || x->datatype==CTA_HANDLE ){ + SCOPY_F77(&x->n,x->values,&one,y->values,&one); + } + else if (x->datatype==CTA_DOUBLE){ + DCOPY_F77(&x->n,x->values,&one,y->values,&one); + } + else if (x->datatype==CTA_STRING){ + int i; + CTA_String *yval = (CTA_String *)y->values; + for (i=1; i<=x->n; i++) + { + CTAI_Vector_getval( x, &i, &(yval[i-1]), &y->datatype, retval); + if (*retval!=CTA_OK) return; + } + } + else { + *retval=CTA_ILLEGAL_DATATYPE; return; + } + + *retval=CTA_OK; +}; + +void CTAI_Vector_axpy( + CTAI_Vector_blas *y, + double *alpha, + CTAI_Vector_blas *x, + int *retval + ){ + /* Local variables */ + int one=1; + float salpha; + + /* check dimensions */ + if (y->n!=x->n){ + *retval=CTA_DIMENSION_ERROR; + return; + } + + /* check data types */ + if (y->datatype!=x->datatype){ + printf("datatype y=%d datatype y=%d", y->datatype,x->datatype); + *retval=CTA_INCOMPATIBLE_VECTORS; + return; + } + + if (x->datatype==CTA_REAL){ + salpha=(float) *alpha; + SAXPY_F77(&x->n,&salpha,x->values,&one,y->values,&one); + } + else if (x->datatype==CTA_INTEGER){ + int i; + int * xval = (int *) x->values; + int * yval = (int *) y->values; + for (i=0; in; i++) { yval[i] = (int) (yval[i] + *alpha * xval[i]);} + } + else if (x->datatype == CTA_DOUBLE) { + DAXPY_F77(&x->n,alpha,x->values,&one,y->values,&one); + } + else { + *retval=CTA_ILLEGAL_DATATYPE; return; + } + + *retval=CTA_OK; +}; + +void CTAI_Vector_dot( + // Calculate the dot-product of two COSTA-BLAS-vectors. + CTAI_Vector_blas *x, + CTAI_Vector_blas *y, + double *dotprod, + int *retval + ){ + /* Local variables */ + int one=1; + + /* check dimensions */ + if (y->n!=x->n){ + *retval=CTA_DIMENSION_ERROR; + return; + } + + /* check data types */ + if (y->datatype!=x->datatype){ + *retval=CTA_INCOMPATIBLE_VECTORS; + return; + } + + if (x->datatype==CTA_REAL){ + *dotprod= (double) SDOT_F77(&x->n,x->values,&one,y->values,&one); + } + else if (x->datatype==CTA_INTEGER){ + int i; + int * xval = (int *) x->values; + int * yval = (int *) y->values; + *dotprod=0.0; + for (i=0; in; i++) { *dotprod += xval[i]*yval[i];} + } + else if (x->datatype==CTA_DOUBLE){ + *dotprod=DDOT_F77(&x->n,x->values,&one,y->values,&one); + } + else { + *retval=CTA_ILLEGAL_DATATYPE; return; + } + + *retval=CTA_OK; +}; + +void CTAI_Vector_nrm2( + CTAI_Vector_blas *x, + double *norm2, + int *retval + ){ + + /* Local variables */ + int one=1; + + if (x->datatype==CTA_REAL){ + *norm2= (double) SNRM2_F77(&x->n,x->values,&one); + } + else if (x->datatype==CTA_INTEGER){ + int i; + int * xval = (int *) x->values; + *norm2=0.0; + for (i=0; in; i++) { *norm2 += xval[i]*xval[i];} + *norm2 = sqrt(*norm2); + } + else if (x->datatype==CTA_DOUBLE){ + *norm2=DNRM2_F77(&x->n,x->values,&one); + } + else { + *retval=CTA_ILLEGAL_DATATYPE; return; + } + *retval=CTA_OK; +}; + +void CTAI_Vector_amax( + CTAI_Vector_blas *x, + int *iloc, + int *retval + ){ + + /* Local variables */ + int one=1; + + + /* quick return in case of 0-length vector*/ + if (x->n==0){ + *iloc=0; + *retval=CTA_OK; + } + + if (x->datatype==CTA_REAL){ + *iloc=ISAMAX_F77(&x->n,x->values,&one); + } + else if (x->datatype==CTA_INTEGER){ + int i; + int * xval = (int *) x->values; + + // Find the location in C-notation (counting from zero) + *iloc=0; + for (i=1; in; i++) + { + if (abs(xval[i])>abs(xval[*iloc])) {*iloc = i;} + } + + // Increase the index to get a vector-index (counting from one) + (*iloc)++; + } + else if (x->datatype==CTA_DOUBLE){ + *iloc=IDAMAX_F77(&x->n,x->values,&one); + } + else { + *retval=CTA_ILLEGAL_DATATYPE; return; + } + + *retval=CTA_OK; +}; + +void CTAI_Vector_GetMaxLen( + CTAI_Vector_blas *x, + int *maxlen, + int *retval + ) +{ + int i; + int length; + CTA_String * values = (CTA_String*) x->values; + if (x->datatype!=CTA_STRING) + { + *retval = CTA_ILLEGAL_DATATYPE; return; + } + + *maxlen = 0; + for (i=0; in; i++) + { + *retval = CTA_String_GetLength(values[i], &length); + if (*retval != CTA_OK) return; + + if (length>*maxlen) {*maxlen=length;} + } + + *retval=CTA_OK; +}; + +void CTAI_Vector_export( + CTAI_Vector_blas *x, + CTA_Handle *usrdata, + int *retval + ) +{ + /* Local variables */ + int i; + FILE *file; + int size; + + /* set pointers for easy printing */ + float *xreal = (float *) x->values; + double *xdouble = (double *) x->values; + int *xint = (int *) x->values; + CTA_String *xstr = (CTA_String *) x->values; + + if (IDEBUG>0) printf("Vector_blas: Start of export\n"); + if (IDEBUG>0) printf("Vector_blas: size of n %d\n",x->n); + + CTA_Message_Quiet(CTA_TRUE); + if (CTA_Handle_Check(*usrdata,CTA_FILE)==CTA_OK) { + CTA_Message_Quiet(CTA_FALSE); + /* We have to export to a file */ + *retval=CTA_File_Get(*usrdata,&file); + if (*retval != CTA_OK) return; + + if (CTA_FLUSH_ALWAYS) CTA_Flush(); + + if (x->datatype == CTA_STRING) { + fprintf(file,"{"); + } else { + fprintf(file,"["); + } + /* Print the values */ + for (i=0;in;i++){ + if (x->datatype==CTA_REAL){ + fprintf(file,"%10.5g",xreal[i]); + } + else if (x->datatype==CTA_DOUBLE){ + fprintf(file,"%18.8le ",xdouble[i]); + } + else if (x->datatype == CTA_INTEGER){ + fprintf(file,"%d ",xint[i]); + } + else if (x->datatype == CTA_STRING){ + int len; + char *str; + *retval = CTA_String_GetLength(xstr[i],&len); + if (*retval != CTA_OK) return; + + str=CTA_Malloc((len+1)*sizeof(char)); + *retval = CTA_String_Get(xstr[i],str); + if (*retval != CTA_OK) return; + + fprintf(file,"'%s'\n",str); + free(str); + } + if (i+1n) { + fprintf(file,"\n"); + } + } + if (x->datatype == CTA_STRING) { + fprintf(file,"};\n"); + } else { + fprintf(file,"];\n"); + } + if (CTA_FLUSH_ALWAYS) CTA_Flush(); + } + else if (CTA_Handle_Check(*usrdata,CTA_PACK)==CTA_OK) { + CTA_Message_Quiet(CTA_FALSE); + if (IDEBUG>0) printf("Vector_blas: packing vector\n"); + *retval=CTA_Pack_Add(*usrdata,x,sizeof(CTAI_Vector_blas)); + if (x->datatype==CTA_STRING) { + for (i=0;in;i++){ + *retval=CTA_String_Export(xstr[i],*usrdata); + if (retval!=CTA_OK) return; + } + } else { + if (IDEBUG>0) printf("Vector_blas: packing values\n"); + *retval=CTA_SizeOf(x->datatype, &size); + if (IDEBUG>0) printf("Vector_blas: size of data-type %d\n",size); + if (IDEBUG>0) printf("Vector_blas: size of n %d\n",x->n); + *retval=CTA_Pack_Add(*usrdata,x->values,x->n*size); + if (IDEBUG>0) printf("Vector_blas: packing done\n"); + } + } else { + CTA_Message_Quiet(CTA_FALSE); + *retval=CTA_FORMAT_NOT_SUPPORTED; + return; + } + *retval=CTA_OK; +}; + + +void CTAI_Vector_import( + CTAI_Vector_blas *x, + CTA_Handle *usrdata, + int *retval + ) +{ + /* Local variables */ + BOOL packout; + int size; + + void *values; + int n; + int datatype; + + /* set pointers for easy printing */ + + packout = (CTA_Handle_Check(*usrdata,CTA_PACK)==CTA_OK); + + datatype=x->datatype; + n=x->n; + values=x->values; + + if (packout) { + *retval=CTA_Pack_Get(*usrdata,x,sizeof(CTAI_Vector_blas)); + x->values=values; + *retval=CTA_SizeOf(x->datatype, &size); + size=size*x->n; + + // Realloc values part when needed + if (x->datatype!=datatype || n!=x->n) { + x->values=realloc(x->values,size); + } + *retval=CTA_Pack_Get(*usrdata,x->values,size); + } else { + *retval=CTA_FORMAT_NOT_SUPPORTED; + } + *retval=CTA_OK; +}; + + + +void CTAI_Vector_Print_Table( + CTAI_Vector_blas **table, + int *ncolumns, + CTAI_Vector_blas *formats, + int *retval) +{ + int nrows = /* number of rows in the table */ + table[0]->n; + int irow, icol; /* loop counters for rows and columns */ + char *str; + + CTA_String * formvalues = /* */ + (CTA_String *) formats->values; + + char** formstr = /* string array with the format strings */ + CTA_Malloc(sizeof(char *) * (*ncolumns)); + + if (CTA_FLUSH_ALWAYS) CTA_Flush(); + + // Check dimensions + if (formats->n != *ncolumns) + { + *retval = CTA_DIMENSION_ERROR; return; + } + + for (icol=1; icol<*ncolumns; icol++) + { + if (table[icol]->n != nrows) + { + *retval = CTA_DIMENSION_ERROR; return; + } + } + + // Retrieve the format strings + for (icol=0; icol<*ncolumns; icol++) + { + int length; + *retval = CTA_String_GetLength(formvalues[icol],&length); + if (*retval != CTA_OK) return; + + formstr[icol] = CTA_Malloc(sizeof(char)*(length+1)); + *retval = CTA_String_Get(formvalues[icol],formstr[icol]); + if (*retval != CTA_OK) return; + } + + // Print the table + for (irow=0; irow < nrows; irow++) + { + for (icol=0; icol<*ncolumns; icol++) + { + // print item (irow,icol) of the table + if (table[icol]->datatype == CTA_INTEGER || table[icol]->datatype == CTA_HANDLE) + { + int * tabvalues =(int *) table[icol]->values; + printf(formstr[icol],tabvalues[irow]); + } + else if (table[icol]->datatype == CTA_REAL) + { + float * tabvalues =(float *) table[icol]->values; + printf(formstr[icol],tabvalues[irow]); + } + else if (table[icol]->datatype == CTA_DOUBLE) + { + double * tabvalues =(double *) table[icol]->values; + printf(formstr[icol],tabvalues[irow]); + } + else if (table[icol]->datatype == CTA_STRING) + { + int length; + CTA_String * tabvalues =(CTA_String *) table[icol]->values; + + *retval = CTA_String_GetLength(tabvalues[irow],&length); + if (*retval != CTA_OK) return; + + str = CTA_Malloc(sizeof(char)*(length+1)); + *retval = CTA_String_Get(tabvalues[irow],str); + if (*retval != CTA_OK) return; + + printf(formstr[icol],str); + free(str); + } + } + printf("\n"); + } + + // Delete items which are no longer necessary + for (icol=1; icol<*ncolumns; icol++) + { + free(formstr[icol]); + } + free(formstr); + + if (CTA_FLUSH_ALWAYS) CTA_Flush(); + + *retval=CTA_OK; +}; + + +/* Append a value to a vector */ +void CTAI_Vector_appendval( + CTAI_Vector_blas *x, + void *val, + CTA_Datatype *datatype, + int *retval + ){ + int i = 1 + x->n; /* Index of the new element */ + + /* Update n */ + ++(x->n); + + /* Ensure the vector is long enough and update n */ + if (x->n > x->size) + { + x->size = MAX((int)(1.5 * (float)(x->size) + 0.5), 1 + x->n); + + /* Reallocate the values array */ + if (x->datatype==CTA_INTEGER || x->datatype==CTA_HANDLE || x->datatype==CTA_STRING){ + x->values=realloc(x->values, x->size * sizeof(int)); + } + else if (x->datatype==CTA_REAL){ + x->values=realloc(x->values, x->size * sizeof(float)); + } + else if (x->datatype==CTA_DOUBLE){ + x->values=realloc(x->values, x->size * sizeof(double)); + } + else { + *retval=CTA_ILLEGAL_DATATYPE; + return; + } + } + + /* Call setval() to set the value at the new element */ + CTAI_Vector_setval(x, &i, val, datatype, retval); +}; + + +void CTAI_Vector_free( + // The idea of re-alloating a vector seems not to have been considered. + // It would not be so difficult to allow reallocation. + CTAI_Vector_blas *x, + int *retval + ){ + int i; + CTA_String *xstr; + CTA_Handle *xhdl; + + switch (x->datatype) + { + case CTA_STRING: + xstr = (CTA_String *) x->values; + for (i = 0; in; i++) + { + *retval = CTA_String_Free(&xstr[i]); + if (*retval != CTA_OK) return; + } + break; + case CTA_HANDLE: + xhdl = (CTA_Handle *) x->values; + for (i = 0; in; i++) + { + *retval = CTA_Handle_Free_All(&xhdl[i]); + if (*retval != CTA_OK) return; + } + break; + } + + free(x->values); + *retval=CTA_OK; +}; + + +/* Get the index from a path component */ +/* Updates name so it will be without index */ +/* The index is indicated with an ending on '' */ +static int CTAI_Vector_GetPathIndex(const char *name) { + char *open; + const char *close; + char szix[MAXIXLEN+1]; + + /* Find open and close brackets */ + open = strrchr(name, '<'); + close = strrchr(name, '>'); + if (!open || !close || (open + 2) > close || + (close - open) >= MAXIXLEN) { + /* Default index: 1 */ + return 1; + } + + /* Remove index from name */ + *open = '\0'; + + /* Determine the index */ + strncpy(szix, open + 1, close - open - 1); + szix[close - open - 1] = '\0'; + return atoi(szix); +} + + +/* Find the handle with the given name (INTERNAL USE) */ +/* Returns the handle or CTA_NULL if not found or in case of an error */ +CTA_Handle CTAI_Vector_FindHandle(CTA_Vector hvec, const char* name) +{ + CTA_Vector *vector; + CTA_Handle h; + int i; + int n; + int counter; + int index; + + if (CTA_OK != CTA_Handle_GetData(hvec, (void*) &vector)) return CTA_NULL; + if (CTA_OK != CTA_Vector_GetSize(hvec, &n)) return CTA_NULL; + + /* Get index from name */ + index = CTAI_Vector_GetPathIndex(name); + + /* Search for handle */ + counter = 0; + for (i = 1; i <= n; ++i) { + if (CTA_OK != CTA_Vector_GetVal(hvec, i, &h, CTA_HANDLE)) return CTA_NULL; + if (0 == strcmp(CTAI_Handle_GetName(h), name)) { + ++counter; + if (counter == index) return h; + } + } + return CTA_NULL; +} + +/* Count the number of handles with the given name (INTERNAL USE) */ +/* Returns the count or -1 in case of an error */ +int CTAI_Vector_CountHandles(CTA_Vector hvec, const char* name) +{ + CTA_Vector *vector; + CTA_Handle h; + int i; + int n; + int counter; + + if (CTA_OK != CTA_Handle_GetData(hvec, (void*) &vector)) return -1; + if (CTA_OK != CTA_Vector_GetSize(hvec, &n)) return -1; + + counter = 0; + for (i = 1; i <= n; ++i) { + if (CTA_OK != CTA_Vector_GetVal(hvec, i, &h, CTA_HANDLE)) return -1; + if (0 == strcmp(CTAI_Handle_GetName(h), name)) { + ++counter; + } + } + return counter; +} + +void CTAI_Vector_elmdiv( + CTAI_Vector_blas *y, + CTAI_Vector_blas *x, + int *retval + ){ + /* Local variables */ + float *svec_y, *svec_x; + double *dvec_y, *dvec_x; + int i,n; + + /* check dimensions */ + if (y->n!=x->n){ + *retval=CTA_DIMENSION_ERROR; + return; + } + + /* Handle data-type stuff for x */ + if (x->datatype ==CTA_DOUBLE) { + dvec_x=(double*) x->values; + } + else if (x->datatype ==CTA_REAL){ + svec_x=(float*) x->values; + } else { + *retval=CTA_ILLEGAL_DATATYPE; return; + } + + /* Handle data-type stuff for y */ + if (y->datatype ==CTA_DOUBLE) { + dvec_y=(double*) y->values; + } + else if (y->datatype ==CTA_REAL){ + svec_y=(float*) y->values; + } else { + *retval=CTA_ILLEGAL_DATATYPE; return; + } + + n=x->n; + if (x->datatype ==CTA_DOUBLE && y->datatype ==CTA_DOUBLE){ + for (i=0;idatatype ==CTA_REAL && y->datatype ==CTA_REAL){ + for (i=0;idatatype ==CTA_DOUBLE && y->datatype ==CTA_REAL){ + for (i=0;idatatype ==CTA_REAL && y->datatype ==CTA_DOUBLE){ + for (i=0;in!=x->n){ + *retval=CTA_DIMENSION_ERROR; + return; + } + + /* Handle data-type stuff for x */ + if (x->datatype ==CTA_DOUBLE) { + dvec_x=(double*) x->values; + } + else if (x->datatype ==CTA_REAL){ + svec_x=(float*) x->values; + } else { + *retval=CTA_ILLEGAL_DATATYPE; return; + } + + /* Handle data-type stuff for y */ + if (y->datatype ==CTA_DOUBLE) { + dvec_y=(double*) y->values; + } + else if (y->datatype ==CTA_REAL){ + svec_y=(float*) y->values; + } else { + *retval=CTA_ILLEGAL_DATATYPE; return; + } + + n=x->n; + if (x->datatype ==CTA_DOUBLE && y->datatype ==CTA_DOUBLE){ + for (i=0;idatatype ==CTA_REAL && y->datatype ==CTA_REAL){ + for (i=0;idatatype ==CTA_DOUBLE && y->datatype ==CTA_REAL){ + for (i=0;idatatype ==CTA_REAL && y->datatype ==CTA_DOUBLE){ + for (i=0;idatatype ==CTA_DOUBLE) { + dvec_y=(double*) y->values; + } + else if (y->datatype ==CTA_REAL){ + svec_y=(float*) y->values; + } else { + *retval=CTA_ILLEGAL_DATATYPE; return; + } + + n=y->n; + if (y->datatype ==CTA_DOUBLE){ + for (i=0;idatatype ==CTA_REAL){ + for (i=0;i +#include "cta_mem.h" +#include "ctai_xml.h" +#include "f_cta_utils.h" +#include "cta_datatypes.h" +#include "cta_defaults.h" +#include "cta_errors.h" +#include "cta_time.h" +#include "cta_vector.h" +#include "cta_sobs.h" +#include "cta_xml.h" +#include "ctai.h" +#include "ctai_datatypes.h" +#include "ctai_handles.h" +#include "ctai_string.h" +#include "ctai_vector.h" +#include "ctai_sobs.h" +#include "cta_message.h" + + +#define CTA_XML_READ_F77 F77_CALL(cta_xml_read,CTA_XML_READ) +#define CTA_XML_WRITE_F77 F77_CALL(cta_xml_write,CTA_XML_WRITE) + +#define CLASSNAME "CTA_XML" + +#define DEBUG 0 +/* Local interfaces */ +CTA_Handle CTAI_XML_CreateObject_Comb(xmlNode *cur_node); +CTA_TreeVector CTAI_XML_CreatetreeVector(xmlNode *cur_node); +int CTAI_XML_TreeVector_regular_grid(CTA_TreeVector treevec, CTA_TreeVector root ); +CTA_SObsClass CTAI_XML_CreateSObsClass(xmlNode *cur_node); +CTA_ModelClass CTAI_XML_CreateModelClass(xmlNode *cur_node); +void CTAI_XML_WriteTreeVector(int issubtree, int isxfv,CTA_TreeVector treevec, int level,xmlTextWriter *writer); + +/*************************************************************** +READ +***************************************************************/ + +/** \brief Get content of a text element; remove training spaces, tabs etc. +* +* \param cur_node I Text node. +* \return Text content of the given (TEXT) node. To free(). +*/ +static char *CTAI_XML_GetContent(xmlNode *cur_node) { + char *outtxt; + const xmlChar *intxt; + int pos; + + intxt = cur_node->content; + /* Find position of the last non-special character */ + pos = strlen((char *) intxt) - 1; + while (pos >= 0 && intxt[pos] < 33) { + --pos; + } + /* Allocate return string */ + if (pos < 0) { + /* Empty string */ + outtxt = (char*)CTA_Malloc(1); + outtxt[0] = '\0'; + } else { + /* Non-empty string */ + outtxt = (char*)CTA_Malloc(2 + pos); + memcpy(outtxt, intxt, pos + 1); + outtxt[pos + 1]='\0'; + } + return outtxt; +} + + +/** \brief Create a default COSTA tree. +* +* \param cur_node I Current XML node +* \return Handle to create or CTA_NULL in case of an error. +*/ +static CTA_Tree CTAI_XML_CreateDefaultTree(xmlNode *cur_node) { + xmlChar *val; /* element value */ + xmlAttr *cur_prop; /* property loop counter */ + CTA_Tree htree; /* COSTA tree */ + CTA_String hstr; /* COSTA string */ + int retval; + + /* Get node text */ + retval = CTA_Tree_Create(&htree); + if (retval == CTA_OK) { + CTAI_Handle_SetName(htree, (char *) cur_node->name); + + + if (0!=strcmp((char *) cur_node->name,"treeVectorFile")) { + + /* Parse this node's attributes (only if it is not treeVectorFile*/ + + for (cur_prop = cur_node->properties; cur_prop; cur_prop = cur_prop->next) { + + val = xmlGetProp(cur_node, cur_prop->name); + CTA_String_Create(&hstr); + CTA_String_Set(hstr, (char *) val); + xmlFree(val); + CTA_Tree_AddHandle(htree, (char *) cur_prop->name, hstr); + } + } + return htree; + } + return CTA_NULL; +} + + + +static CTA_Tree CTAI_XML_Create_Combine_SObs(xmlNode *startnode, CTA_Tree hparent) { + xmlNode *cur_node; /* node loop counter */ + CTA_Handle hobj; /* COSTA object */ + + + for (cur_node = startnode; cur_node; cur_node = cur_node->next) { + if (cur_node->type == XML_ELEMENT_NODE || cur_node->type == XML_TEXT_NODE) { + /* Create COSTA object from the current node */ + hobj = CTAI_XML_CreateObject_Comb(cur_node); + if (hobj != CTA_NULL) { + if (hparent == CTA_NULL) { + /* Create tree */ + hparent = hobj; + } else { + /* Add object to the parent */ + CTA_Tree_AddHandle(hparent, CTAI_Handle_GetName(hobj), hobj); + } + if (CTAI_Handle_GetDatatype(hobj) == CTA_TREE) { + /* In case of a tree, handle nested COSTA objects */ + CTAI_XML_Create_Combine_SObs(cur_node->children, hobj); + } + } + } + } + + + return hparent; + +} + + +/** \brief Create a COSTA stochastic observer. +* +* \param cur_node I Current XML node +* \return Handle to create or CTA_NULL in case of an error. +*/ +static CTA_StochObs CTAI_XML_CreateSObs(xmlNode *cur_node) { + CTA_StochObs hnew, hnew2; /* The new COSTA handle */ + int retval; + xmlChar *name = NULL; + xmlChar *database = NULL; + xmlChar *selection = NULL; + xmlChar *timeoffset = NULL; + xmlChar *sobs_class_name = NULL; + CTA_SObsClass sobsclass = CTA_DEFAULT_SOBS; + CTA_String hsel; + int is_combined; + CTA_Tree hnew_sub; + CTA_Handle hdb; + CTA_Vector hvec1, htimeoffset; + double timeoffsetval; + + + is_combined = 0; + /* Parse this node's attributes */ + name = xmlGetProp(cur_node, CTAI_XML_ID); + sobs_class_name = xmlGetProp(cur_node, CTAI_XML_SOBSCLASSNAME); + database = xmlGetProp(cur_node, CTAI_XML_DATABASE); + selection = xmlGetProp(cur_node, CTAI_XML_SELECT); + timeoffset = xmlGetProp(cur_node, CTAI_XML_TIMEOFFSET); + + /* first check if the stochobs is a 'combined' one */ + + if (sobs_class_name) { + if (0==strcmp((char *) sobs_class_name,"CTA_COMBINE_SOBS")) { + sobsclass = CTA_COMBINE_SOBS ; + if (DEBUG) printf("-- ctai_create_sobs: combiner! --\n"); + + // read a subtree using the sobs-combiner-xml-read + // hnew_sub is a valid cta_subtree with the cta_sobs already created + hnew_sub = CTAI_XML_Create_Combine_SObs(cur_node->children,CTA_NULL); + is_combined = 1; + } else { //sobsclass can be sql or netcdf or ... + // TODO: better: a handle_find or sobsclass_find action + if (DEBUG) printf("-- ctai_create_sobs: class name given ,NO combiner! --\n"); + if (0==strcmp((char *) sobs_class_name,"CTA_NETCDF_SOBS")) { + sobsclass = CTA_NETCDF_SOBS ; + } + } + } else { // sobsclass remains default + if (DEBUG) printf("-- ctai_create_sobs: NO class name given! --\n"); + }; + + + hnew=CTA_NULL; + + + /* Create stochastic observer */ + + CTA_String_Create(&hdb); + if (database) { + + + CTA_String_Set(hdb, (char *) database); + + + if (timeoffset){ + timeoffsetval = atof((char *) timeoffset); + + // make a vector of two handles: the subtree and the offset value + retval = CTA_Vector_Create(CTA_DEFAULT_VECTOR, 2, CTA_HANDLE, CTA_NULL, &hvec1); + // fill first element (name of database) + retval = CTA_Vector_SetVal(hvec1,1,&hdb,CTA_HANDLE); + //fill second element + retval = CTA_Vector_Create(CTA_DEFAULT_VECTOR, 1, CTA_DOUBLE, CTA_NULL, &htimeoffset); + + retval = CTA_Vector_SetVal(htimeoffset,1,&timeoffsetval,CTA_DOUBLE); + retval = CTA_Vector_SetVal(hvec1,2,&htimeoffset,CTA_HANDLE); + // create sobs + + retval = CTA_SObs_Create(sobsclass, hvec1, &hnew); + + } else { + + // 'old' situation: userdata is name of database + retval = CTA_SObs_Create(sobsclass, hdb, &hnew); + } + } + else { //combined CTA_OBS + if (is_combined == 1) { + /* create the combined SObs with for the 2nd argument not a database + but the subsobs subtree and the timeoffset */ + // make a vector of two handles: the subtree and the offset value + retval = CTA_Vector_Create(CTA_DEFAULT_VECTOR, 2, CTA_HANDLE, CTA_NULL, &hvec1); + // fill first element + retval = CTA_Vector_SetVal(hvec1,1,&hnew_sub,CTA_HANDLE); + //fill second element + retval = CTA_Vector_Create(CTA_DEFAULT_VECTOR, 1, CTA_DOUBLE, CTA_NULL, &htimeoffset); + if (timeoffset){ + timeoffsetval = atof((char *) timeoffset); + } else { + timeoffsetval = 0.0; + } + retval = CTA_Vector_SetVal(htimeoffset,1,&timeoffsetval,CTA_DOUBLE); + retval = CTA_Vector_SetVal(hvec1,2,&htimeoffset,CTA_HANDLE); + + + retval = CTA_SObs_Create(sobsclass, hvec1, &hnew); + + } else { + printf("ERROR: neither database given nor combined observer\n"); + exit(-1); + } + + } + + + CTA_String_Free(&hdb); + if (retval == CTA_OK) { + /* Perform initial selection */ + if (selection) { + CTA_String_Create(&hsel); + CTA_String_Set(hsel, (char *) selection); + retval=CTA_SObs_CreateSel(hnew, hsel, &hnew2); + CTA_String_Free(&hsel); + CTA_SObs_Free(&hnew); + if (retval != CTA_OK){ + return CTA_NULL; + } + hnew=hnew2; + } + + /* Set name (if any) */ + if (name) { + CTAI_Handle_SetName(hnew, (char *) name); + // xmlFree(name); + } else { + CTAI_Handle_SetName(hnew, ""); + } + } + + // if (is_combined == 1) { + // attach combined-subtree to sobs + + //} + + if (name) xmlFree(name); + if (database) xmlFree(database); + if (selection) xmlFree(selection); + + return hnew; +} + + + + CTA_Handle CTAI_XML_CreateObject_Comb(xmlNode *cur_node) { + xmlChar *val; /* Element value */ + CTA_Handle hnew = CTA_NULL; /* The return value */ + + // read objects in part of the xml-file containing the subSObs. The objects are either SObs or default trees. + + if (cur_node->type == XML_TEXT_NODE) { + /* If this node is a non-empty text node, create a string */ + val = (xmlChar *) CTAI_XML_GetContent(cur_node); + + + if (strlen((char *) val) < 1) { + if (val) xmlFree(val); + return CTA_NULL; + } + CTA_String_Create(&hnew); + CTA_String_Set(hnew, (char *) val); + xmlFree(val); + CTAI_Handle_SetName(hnew, ""); + } else { + /* This is not a text node */ + switch (CTAI_String2Type((char *) cur_node->name)) { + case CTA_SOBS: + /* Create a COSTA stochastic observer */ + hnew = CTAI_XML_CreateSObs(cur_node); + break; + } + if (hnew == CTA_NULL) { + /* Default: create a COSTA subtree */ + hnew = CTAI_XML_CreateDefaultTree(cur_node); + } + } + return hnew; +} + + + + +/** \brief Create a COSTA time. +* +* \param cur_node I Current XML node +* \return Handle to create or CTA_NULL in case of an error. +*/ +static CTA_Time CTAI_XML_CreateTime(xmlNode *cur_node) { + CTA_Time hnew; /* The new COSTA handle */ + int retval; + xmlChar *name = NULL; + xmlChar *sstart = NULL; + xmlChar *sstop = NULL; + xmlChar *sstep = NULL; + double dstart; + double dstop; + double dstep; + double tmp; + + /* Parse this node's attributes */ + name = xmlGetProp(cur_node, CTAI_XML_ID); + sstart = xmlGetProp(cur_node, CTAI_XML_START); + sstop = xmlGetProp(cur_node, CTAI_XML_STOP); + sstep = xmlGetProp(cur_node, CTAI_XML_STEP); + + /* Convert to numbers and/or fill defaults */ + if (sstart) { + dstart = atof((char *) sstart); + xmlFree(sstart); + } else { + dstart = 0.0; + } + if (sstop) { + dstop = atof((char *) sstop); + xmlFree(sstop); + } else { + dstop = dstart; + } + if (dstop < dstart) { + tmp = dstart; + dstart = dstop; + dstop = tmp; + } + if (sstep) { + dstep = atof((char *) sstep); + xmlFree(sstep); + } else { + /* Default */ + dstep = MIN(1.0, (dstop - dstart)); + } + + /* Create time */ + retval = CTA_Time_Create(&hnew); + if (retval == CTA_OK) { + /* Set name (if any) */ + if (name) { + CTAI_Handle_SetName(hnew, (char *) name); + xmlFree(name); + } else { + CTAI_Handle_SetName(hnew, ""); + } + + /* Set values */ + CTA_Time_SetSpan(hnew, dstart, dstop); + CTA_Time_SetStep(hnew, dstep); + return hnew; + } + return CTA_NULL; +} + + + + +/** \brief Create a COSTA object. +* +* \param cur_node I Current XML node +* \return Handle for a COSTA node to create or 0 in case of an error. +*/ + static CTA_Handle CTAI_XML_CreateObject(xmlNode *cur_node) { + char *val; /* Element value */ + CTA_Handle hnew = CTA_NULL; /* The return value */ + CTA_Metainfo minfo; + + if (cur_node->type == XML_TEXT_NODE) { + /* If this node is a non-empty text node, create a string */ + val = CTAI_XML_GetContent(cur_node); + + + if (strlen(val) < 1) { + if (val) free(val); + return CTA_NULL; + } + CTA_String_Create(&hnew); + CTA_String_Set(hnew, val); + free(val); + CTAI_Handle_SetName(hnew, ""); + } else { + /* This is not a text node */ + switch (CTAI_String2Type((char *) cur_node->name)) { + case CTA_TIME: + /* Create a COSTA time */ + hnew = CTAI_XML_CreateTime(cur_node); + break; + case CTA_VECTOR: + /* Create a COSTA vector */ + CTA_Metainfo_Create(&minfo); + hnew = CTAI_XML_CreateVector_New(cur_node,minfo); + break; + case CTA_TREEVECTOR: + /* Create a (general) treeVector */ + hnew = CTAI_XML_CreatetreeVector(cur_node); + // retval = CTA_TreeVector_Export(hnew, CTA_FILE_STDOUT); + CTAI_XML_TreeVector_regular_grid(hnew, hnew); + break; + case CTA_FUNCTION: + /* Create a COSTA function */ + hnew = CTAI_XML_CreateFunc(cur_node); + break; + case CTA_SOBS: + /* Create a COSTA stochastic observer */ + hnew = CTAI_XML_CreateSObs(cur_node); + break; + case CTA_SOBSCLASS: + /* Create a COSTA stochastic observer class */ + hnew = CTAI_XML_CreateSObsClass(cur_node); + break; + case CTA_MODELCLASS: + hnew = CTAI_XML_CreateModelClass(cur_node); + break; + } + if (hnew == CTA_NULL) { + /* Default: create a COSTA subtree */ + hnew = CTAI_XML_CreateDefaultTree(cur_node); + } + } + return hnew; +} + + +/** \brief Recursively read elements +* +* \param startnode I XML node to start from +* \param hparent I parent COSTA handle +* \return tree root +*/ +static CTA_Tree CTAI_XML_Read(xmlNode *startnode, CTA_Tree hparent) { + xmlNode *cur_node; /* node loop counter */ + CTA_Handle hobj; /* COSTA object */ + + for (cur_node = startnode; cur_node; cur_node = cur_node->next) { + if (cur_node->type == XML_ELEMENT_NODE || cur_node->type == XML_TEXT_NODE) { + /* Create COSTA object from the current node */ + hobj = CTAI_XML_CreateObject(cur_node); + if (hobj != CTA_NULL) { + if (hparent == CTA_NULL) { + /* Create tree */ + hparent = hobj; + } else { + /* Add object to the parent */ + CTA_Tree_AddHandle(hparent, CTAI_Handle_GetName(hobj), hobj); + } + if (CTAI_Handle_GetDatatype(hobj) == CTA_TREE) { + /* In case of a tree, handle nested COSTA objects */ + CTAI_XML_Read(cur_node->children, hobj); + } + } + } + } + return hparent; +} + + +/** \brief Read a COSTA XML file into a tree +* +* \param hfname I file name of XML file to read +* \param hroot O handle of a COSTA tree +* \return CTA_OK if successful +*/ +int CTA_XML_Read(CTA_String hfname, CTA_Tree *hroot) { + char *fname; + xmlDoc *doc; + xmlNode *root_element; + int retval; + + /* Check handle types */ + retval=CTA_Handle_Check((CTA_Handle) hfname, CTA_STRING); + if (retval!=CTA_OK) return retval; + /* Initialize libxml */ + LIBXML_TEST_VERSION; + + /* Parse the file and get the DOM */ + fname = CTAI_String_Allocate(hfname); + doc = xmlReadFile(fname, NULL, 0); + free(fname); + if (!doc) { + // OLD CODEhroot = NULL; + *hroot = CTA_NULL; + return CTA_CANNOT_OPEN_FILE; + } + +#ifdef LIBXML_XINCLUDE_ENABLED + /* Handle xinclude */ + xmlXIncludeProcess(doc); +#endif + + /* Parse the XML tree */ + root_element = xmlDocGetRootElement(doc); + *hroot = CTAI_XML_Read(root_element, CTA_NULL); + + /* Clean up */ + xmlFreeDoc(doc); + return CTA_OK; +} + + +/*************************************************************** +WRITE +***************************************************************/ +#undef METHOD +#define METHOD "ConvertInput" +/** +* ConvertInput: +* @in: string in a given encoding +* @encoding: the encoding used +* +* Converts @in into UTF-8 for processing with libxml2 APIs +* +* Returns the converted UTF-8 string, or NULL in case of error. +*/ +xmlChar *ConvertInput(const char *in, const char *encoding) +{ + xmlChar *out; + int ret; + int size; + int out_size; + int temp; + xmlCharEncodingHandlerPtr handler; + + if (in == 0) + return 0; + + handler = xmlFindCharEncodingHandler(encoding); + + if (!handler) { + char message[128]; + sprintf(message,"ConvertInput: no encoding handler found for '%s'\n", + encoding ? encoding : ""); + CTA_WRITE_ERROR(message); + return 0; + } + + size = (int) strlen(in) + 1; + out_size = size * 2 - 1; + out = (unsigned char *) xmlMalloc((size_t) out_size); + + if (out != 0) { + temp = size - 1; + ret = handler->input(out, &out_size, (const xmlChar *) in, &temp); + if ((ret < 0) || (temp - size + 1)) { + if (ret < 0) { + CTA_WRITE_ERROR("Conversion is not successful.\n"); + } else { + char message[128]; + sprintf + ("Conversion is not successful. converted: %i octets.\n", + temp); + CTA_WRITE_ERROR(message); + } + + xmlFree(out); + out = 0; + } else { + out = (unsigned char *) xmlRealloc(out, out_size + 1); + out[out_size] = 0; /*null terminating out */ + } + } else { + CTA_WRITE_ERROR("Cannot allocate memory\n"); + } + + return out; +} + + +/** \brief Generate XML from COSTA stochastic observer +* +* \param h I COSTA handle +* \param writer I the XML text writer +*/ +static void CTAI_XML_WriteSObs(CTA_StochObs h, xmlTextWriter *writer) { + const xmlChar *name; + CTA_String hdb; + /* Start an element the the name of the tree handle */ + xmlTextWriterStartElement(writer, (xmlChar *) CTAI_Type2String(CTA_SOBS)); + /* Write name (if any) */ + name = (xmlChar *) CTAI_Handle_GetName(h); + if (name && *name) { + xmlTextWriterWriteAttribute(writer, CTAI_XML_ID, name); + } + /* Write database */ + hdb = CTAI_SObs_GetUserData(h, 1); + if (hdb == CTA_NULL) { + xmlTextWriterWriteComment(writer, + ConvertInput("Cannot find database in stochastic observer", (char *) MY_ENCODING)); + } else { + xmlTextWriterWriteAttribute(writer, CTAI_XML_DATABASE, (xmlChar *) CTAI_String_GetPtr(hdb)); + } + /* End the tree level elements */ + xmlTextWriterEndElement(writer); +} + + +/** \brief Generate XML from COSTA time span +* +* \param htime I COSTA handle +* \param writer I the XML text writer +*/ +static void CTAI_XML_WriteTime(CTA_Time htime, xmlTextWriter *writer) { + const xmlChar *name; + double t1, t2, ts; + xmlChar sz[64]; + + /* Start an element the the name of the tree handle */ + xmlTextWriterStartElement(writer, (xmlChar *) CTAI_Type2String(CTA_TIME)); + + /* Write name (if any) */ + name = (xmlChar *) CTAI_Handle_GetName(htime); + if (name && *name) { + xmlTextWriterWriteAttribute(writer, CTAI_XML_ID, name); + } + + /* Get time span */ + CTA_Time_GetSpan(htime, &t1, &t2); + CTA_Time_GetStep(htime, &ts); + + /* Write time span */ + sprintf((char *) sz, "%lg", t1); + xmlTextWriterWriteAttribute(writer, CTAI_XML_START, sz); + sprintf((char *) sz, "%lg", t2); + xmlTextWriterWriteAttribute(writer, CTAI_XML_STOP, sz); + sprintf((char *) sz, "%lg", ts); + xmlTextWriterWriteAttribute(writer, CTAI_XML_STEP, sz); + + /* End the tree level elements */ + xmlTextWriterEndElement(writer); +} + + + + + +/** \brief Generate XML from one COSTA string +* +* \param hstr I handle of a COSTA string +* \param writer I the XML text writer +*/ +static void CTAI_XML_WriteString(CTA_String hstr, xmlTextWriter *writer) { + const xmlChar *name; + const xmlChar *text; + + name = (xmlChar *) CTAI_Handle_GetName(hstr); + text = (xmlChar *) CTAI_String_GetPtr(hstr); + if (!text) { + xmlTextWriterWriteComment(writer, + ConvertInput("Cannot retrieve string value", (char *) MY_ENCODING)); + return; + } + + if (strlen((char *) name) == 0) { + /* If the name is empty, it's a text element */ + xmlTextWriterWriteString(writer, text); + } else { + /* If the name is not empty, it's an attribute */ + xmlTextWriterWriteAttribute(writer, name, text); + } +} + + +/** \brief Generate XML from one tree level (recursively) +* +* \param htree I handle of a COSTA tree +* \param writer I the XML text writer +*/ +static void CTAI_XML_WriteTree(CTA_Tree htree, xmlTextWriter *writer) { + int n; + int i; + CTA_Handle hitem; + + /* Start an element the the name of the tree handle */ + xmlTextWriterStartElement(writer, (xmlChar *) CTAI_Handle_GetName(htree)); + + /* Walk through all items on the current level */ + CTA_Tree_CountItems(htree, &n); + for (i = 1; i <= n; ++i) { + CTA_Tree_GetItem(htree, i, &hitem); + if (hitem != CTA_NULL) { + switch (CTAI_Handle_GetDatatype(hitem)) { + case CTA_TREE: + /* Recurse */ + CTAI_XML_WriteTree(hitem, writer); + break; + case CTA_STRING: + /* Write a string */ + CTAI_XML_WriteString(hitem, writer); + break; + case CTA_TIME: + /* Write a time span */ + CTAI_XML_WriteTime(hitem, writer); + break; + case CTA_VECTOR: + /* Write a vector */ + CTAI_XML_WriteVector(hitem,"","",0,0, writer); + break; + case CTA_TREEVECTOR: + /* Write a state vector */ + CTAI_XML_WriteTreeVector(0,0,hitem,0, writer); + break; + case CTA_SOBS: + /* Write a stochastic observer */ + CTAI_XML_WriteSObs(hitem, writer); + break; + default: + /* Write a comment */ + xmlTextWriterWriteComment(writer, + ConvertInput("Cannot write unknown data type", (char *) MY_ENCODING)); + break; + } + } + } + + /* End the tree level element */ + xmlTextWriterEndElement(writer); +} + + +/** \brief Build an XML document to write to a COSTA XML file +* +* \param hroot I handle of a COSTA tree +* \param writer I the XML text writer +*/ +static void CTAI_XML_WriteDocument(CTA_Tree hroot, xmlTextWriter *writer) { +/* Start the document with the xml default for the version, +* encoding ISO 8859-1 and the default for the standalone + * declaration. */ + xmlTextWriterStartDocument(writer, NULL, (char *) MY_ENCODING, NULL); + + /* Write the tree */ + CTAI_XML_WriteTree(hroot, writer); + + /* End the document */ + xmlTextWriterEndDocument(writer); +} + + +/** \brief Write a tree to a COSTA XML file +* +* \param fname I file name of XML file to write +* \param hroot I handle of a COSTA tree +* \return CTA_OK if successful +*/ +#undef METHOD +#define METHOD "Write" +int CTA_XML_Write(CTA_String hfname, CTA_Tree hroot) { + char *fname; + xmlDoc *doc; + xmlTextWriter *writer; + int retval; + + /* Check handle types */ + retval=CTA_Handle_Check((CTA_Handle) hfname, CTA_STRING); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Handle is not a cta_vector handle"); + return retval; + } + retval=CTA_Handle_Check((CTA_Handle) hroot, CTA_TREE); + if (retval!=CTA_OK) { + CTA_WRITE_ERROR("Cannot retrieve handle data"); + return retval; + } + + /* Initialize libxml */ + LIBXML_TEST_VERSION; + + /* Create a new xmlWriter for DOM, with no compression. */ + writer = xmlNewTextWriterDoc(&doc, 0); + if (!writer) { + return CTA_EXTERNAL_ERROR; + } + + /* Create document */ + CTAI_XML_WriteDocument(hroot, writer); + + /* Write document and clean up */ + xmlFreeTextWriter(writer); + fname = CTAI_String_Allocate(hfname); + xmlSaveFileEnc(fname, doc, (char *) MY_ENCODING); + xmlFreeDoc(doc); + free(fname); + return CTA_OK; +} + +/* Interfacing with Fortran */ +CTAEXPORT void CTA_XML_READ_F77(int *hfname, int *hroot, int *ierr) { + *ierr=CTA_XML_Read((CTA_String)*hfname, (CTA_Tree*)hroot); +} + +CTAEXPORT void CTA_XML_WRITE_F77(int *hfname, int *hroot, int *ierr) { + *ierr=CTA_XML_Write((CTA_String)*hfname, (CTA_Tree)*hroot); +} + + diff --git a/costa/native/cta/src/f_cta_defaults.c b/costa/native/cta/src/f_cta_defaults.c new file mode 100644 index 000000000..da0dabcdb --- /dev/null +++ b/costa/native/cta/src/f_cta_defaults.c @@ -0,0 +1,47 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/f_cta_defaults.c $ +$Revision: 1400 $, $Date: 2010-03-18 16:03:08 +0100 (Thu, 18 Mar 2010) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "cta_defaults.h" + + +#define CTA_DEFAULT_VECTOR_F77 F77_CALL(cta_default_vector,CTA_DEFAULT_VECTOR_F) +#define CTA_DEFAULT_SOBS_F77 F77_CALL(cta_default_sobs,CTA_DEFAULT_SOBS_F) +#define CTA_OBSDESCR_TABLE_F77 F77_CALL(cta_obsdescr_table,CTA_OBSDESCR_TABLE_F) +#define CTA_DEFAULT_MATRIX_F77 F77_CALL(cta_default_matrix,CTA_DEFAULT_MATRIX_F) +#define CTA_MODBUILD_SP_F77 F77_CALL(cta_modbuild_sp,CTA_MODBUILD_SP_F) +#define CTA_MODBUILD_PAR_F77 F77_CALL(cta_modbuild_par,CTA_MODBUILD_PAR_F) +#define CTA_FILE_STDOUT_F77 F77_CALL(cta_file_stdout,CTA_FILE_STDOUT_F) +#define CTA_MODELCOMBINER_F77 F77_CALL(cta_modelcombiner,CTA_MODELCOMBINER_F) +#define CTA_MODBUILD_B3B_F77 F77_CALL(cta_modbuild_b3b,CTA_MODBUILD_B3B_F) + +CTAEXPORT int CTA_DEFAULT_VECTOR_F77(){return CTA_DEFAULT_VECTOR;}; +CTAEXPORT int CTA_DEFAULT_SOBS_F77() {return CTA_DEFAULT_SOBS;}; +CTAEXPORT int CTA_OBSDESCR_TABLE_F77(){return CTA_OBSDESCR_TABLE;}; +CTAEXPORT int CTA_DEFAULT_MATRIX_F77(){return CTA_DEFAULT_MATRIX;}; +CTAEXPORT int CTA_MODBUILD_SP_F77() {return CTA_MODBUILD_SP;}; +CTAEXPORT int CTA_MODBUILD_PAR_F77() {return CTA_MODBUILD_PAR;}; +CTAEXPORT int CTA_FILE_STDOUT_F77() {return CTA_FILE_STDOUT;}; +CTAEXPORT int CTA_MODELCOMBINER_F77() {return CTA_MODELCOMBINER;}; +CTAEXPORT int CTA_MODBUILD_B3B_F77() {return CTA_MODBUILD_B3B;}; + + + diff --git a/costa/native/cta/src/f_cta_utils.c b/costa/native/cta/src/f_cta_utils.c new file mode 100644 index 000000000..5899b2847 --- /dev/null +++ b/costa/native/cta/src/f_cta_utils.c @@ -0,0 +1,62 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/f_cta_utils.c $ +$Revision: 671 $, $Date: 2008-10-07 14:49:42 +0200 (Tue, 07 Oct 2008) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2005 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "f_cta_utils.h" +#include "ctai.h" + +/*! \brief Copies a Fortran-string into a zero terminated C-string + + Traling blanks in the Fortran-string are not copied. + Note: it is the programmers responsibility that the C-string is long enough + to hold the copy of the fortran sting. + \param f_str I fortran string + \param c_str O c-string, copy of fstr (is allocated in this function) + \param len_f I length of Fortran string (hidden argument in Fortran) +*/ +int CTA_fstr2cstr(char *f_str,char *c_str, int len_f){ + + int len_c; //lengt of the c equivalent of f_str + + //determine "strlen"-length of fortran string + for (len_c=len_f-1;len_c>=0 && f_str[len_c]==' ';len_c--){}; + //copy f-string into c-string + if (len_c>=0) { + strncpy(c_str,f_str,len_c+1); + } + //add "0"-character + c_str[len_c+1]='\0'; + return CTA_OK; +}; + +int CTA_cstr2fstr(char *c_str,char *f_str, int len_f){ + int len_cpy; //numver of characters that must/can be copied + int i; // loopcouter over traling elements of f_str + + // avoid overflow by checking length + len_cpy=MIN((int) strlen(c_str),len_f); + strncpy(f_str,c_str,len_cpy); + // put spaces in trailing elements of f_str + for (i=len_cpy;i operation on each value in this array. + ! \param h I Array handle + ! \param alpha I The alpha in state variable += alpha * vector. + ! \param axpyValues I the values for the axpy-operation on all values in this array. + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Array_axpyOnValues + subroutine CTA_Array_axpyOnValues( h, alpha, axpyValues, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: h + real(8) , intent(in ) :: alpha + real(8) , intent(in ) :: axpyValues(*) + integer , intent(out ) :: status + end subroutine CTA_Array_axpyOnValues + end interface + + ! \brief Multiply each value in this array with the corresponding multiplication factor. + ! \param h I Array handle + ! \param multiplicationFactors I the multiplication factors for all array values. + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Array_multiplyValues + subroutine CTA_Array_multiplyValues( h, multiplicationFactors, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: h + real(8) , intent(in ) :: multiplicationFactors(*) + integer , intent(out ) :: status + end subroutine CTA_Array_multiplyValues + end interface + + ! \brief Change the dimensions of an array. The new array should have the same length + ! \param h I Array handle + ! \param nDimensions I length of array dimensions + ! \param dimensions I new dimensions of array + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Array_reshape + subroutine CTA_Array_reshape( h, nDimensions, dimensions, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: h + integer , intent(in ) :: nDimensions + integer , intent(in ) :: dimensions(*) + integer , intent(out ) :: status + end subroutine CTA_Array_reshape + end interface + + ! \brief Get part of the array by selection of a subset in one dimension. + ! Eg. a=[[1,2,3],[4,5,6]] a.getSlice(0,0) returns [1,2,3] + ! Note that the number of dimensions IS reduced by one. + ! \param h I Array handle + ! \param dimension I Dimension to make selection in + ! \param index I Element in dimension that is selected + ! \param h_out O new Array with selected selection + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Array_getSlice + subroutine CTA_Array_getSlice( h, dimension, index, h_out, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: h + integer , intent(in ) :: dimension + integer , intent(in ) :: index + integer(CTA_HANDLE_IKIND) , intent(out ) :: h_out + integer , intent(out ) :: status + end subroutine CTA_Array_getSlice + end interface + + ! \brief Get part of the array by selection of a subset in one dimension. + ! Eg. a=[[1,2,3],[4,5,6],[7,8,9]] a.getSlice(0,0,1) returns [[1,2],[3,4]] + ! Note that the number of dimensions is NOT reduced by one. + ! \param h I Array handle + ! \param dimension I Dimension to make selection in + ! \param minIndex I start of range to select + ! \param maxIndex I end of range to select + ! \param h_out O new Array with selected selection + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Array_getSlice_range + subroutine CTA_Array_getSlice_range( h, dimension, minIndex, maxIndex, h_out, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: h + integer , intent(in ) :: dimension + integer , intent(in ) :: minIndex + integer , intent(in ) :: maxIndex + integer(CTA_HANDLE_IKIND) , intent(out ) :: h_out + integer , intent(out ) :: status + end subroutine CTA_Array_getSlice_range + end interface + + ! \brief Get part of the array by selection of a subset in one dimension. + ! Eg. a=[[1,2,3],[4,5,6],[7,8,9]] a.getSlice(0,0,1) returns [1,2,3,4] + ! \param h I Array handle + ! \param dimension I Dimension to make selection in + ! \param minIndex I start of range to select + ! \param maxIndex I end of range to select + ! \param values O selected values + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Array_getSliceAsDoubles_range + subroutine CTA_Array_getSliceAsDoubles_range( h, dimension, minIndex, maxIndex, values, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: h + integer , intent(in ) :: dimension + integer , intent(in ) :: minIndex + integer , intent(in ) :: maxIndex + real(8) , intent(out ) :: values + integer , intent(out ) :: status + end subroutine CTA_Array_getSliceAsDoubles_range + end interface + + ! \brief Set the values of a part of an array. + ! Eg. a=[[1,2,3],[4,5,6],[7,8,9]] and a.setSlice([11,12,13],1,1) + ! sets the second column a=[[1,11,3],[4,12,6],[7,13,9]] + ! Note that the dimension of the slice is one smaller than for the array. + ! \param h I Array handle + ! \param slice I Values to set + ! \param dimension I Dimension that is replaced + ! \param index I Index of selected dimension to replace + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Array_setSliceAsDoubles + subroutine CTA_Array_setSliceAsDoubles( h, slice, dimension, index, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: h + real(8) , intent(in ) :: slice(*) + integer , intent(in ) :: dimension + integer , intent(in ) :: index + integer , intent(out ) :: status + end subroutine CTA_Array_setSliceAsDoubles + end interface + + ! \brief Set the values of a part of an array. + ! Eg. a=[[1,2,3],[4,5,6],[7,8,9]] and a.setSlice([11,12,13],1,1) + ! sets the second column a=[[1,11,3],[4,12,6],[7,13,9]] + ! Note that the dimension of the slice is one smaller than for the array. + ! \param h I Array handle + ! \param slice_h I Values to set + ! \param dimension I Dimension that is replaced + ! \param index I Index of selected dimension to replace + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Array_setSliceAsArray + subroutine CTA_Array_setSliceAsArray( h, slice_h, dimension, index, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: h + integer(CTA_HANDLE_IKIND) , intent(in ) :: slice_h + integer , intent(in ) :: dimension + integer , intent(in ) :: index + integer , intent(out ) :: status + end subroutine CTA_Array_setSliceAsArray + end interface + + ! \brief Set the values of a part of an array. + ! Eg. a=[[1,2,3],[4,5,6],[7,8,9]] and a.setSlice([[11,12,13],[14,15,16]],1,1,2) + ! sets the second and third columns a=[[1,11,14],[4,12,15],[7,13,16]] + ! Note that the dimension of the slice is the same as for the array. + ! \param h I Array handle + ! \param slice_h I Values to set + ! \param dimension I Dimension that is replaced + ! \param minIndex I Start of range in Dimension to be replaced + ! \param maxIndex I End of range in Dimension to be replaced + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Array_setSliceAsArray_range + subroutine CTA_Array_setSliceAsArray_range( h, slice_h, dimension, minIndex, maxIndex, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: h + integer(CTA_HANDLE_IKIND) , intent(in ) :: slice_h + integer , intent(in ) :: dimension + integer , intent(in ) :: minIndex + integer , intent(in ) :: maxIndex + integer , intent(out ) :: status + end subroutine CTA_Array_setSliceAsArray_range + end interface + + ! \brief Set the values of a part of an array. + ! Eg. a=[[1,2,3],[4,5,6],[7,8,9]] and a.setSlice([11,12,13,14,15,16],1,1,2) + ! sets the second and third columns a=[[1,11,14],[4,12,15],[7,13,16]] + ! Note that the dimension of the slice is the same as for the array. + ! \param h I Array handle + ! \param slice I Values to set + ! \param dimension I Dimension that is replaced + ! \param minIndex I Start of range in Dimension to be replaced + ! \param maxIndex I End of range in Dimension to be replaced + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Array_setSliceAsDoubles_range + subroutine CTA_Array_setSliceAsDoubles_range( h, slice, dimension, minIndex, maxIndex, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: h + real(8) , intent(in ) :: slice(*) + integer , intent(in ) :: dimension + integer , intent(in ) :: minIndex + integer , intent(in ) :: maxIndex + integer , intent(out ) :: status + end subroutine CTA_Array_setSliceAsDoubles_range + end interface + + ! \brief Convert indices in multiple dimensions to position in the one-dimensional array as + ! returned eg by getValuesAsDoubles + ! Eg. a=[[1,2,3],[4,5,6],[7,8,9]] then valueIndex([1,0]) returns 3 which points to the value 4 here. + ! \param h I Array handle + ! \param nIndices I length of indices + ! \param indices I indices of element + ! \param index O position of element + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Array_valueIndex + subroutine CTA_Array_valueIndex( h, nIndices, indices, index, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: h + integer , intent(in ) :: nIndices + integer , intent(in ) :: indices(*) + integer , intent(out ) :: index + integer , intent(out ) :: status + end subroutine CTA_Array_valueIndex + end interface + + +end module cta_f90_array + diff --git a/costa/native/cta_f90/generated/cta_f90_datatypes.f90 b/costa/native/cta_f90/generated/cta_f90_datatypes.f90 new file mode 100644 index 000000000..a8a49e53e --- /dev/null +++ b/costa/native/cta_f90/generated/cta_f90_datatypes.f90 @@ -0,0 +1,23 @@ +module cta_f90_datatypes + + implicit none + + public + + ! \brief Get the result of the C-function sizeof for a COSTA datatype + ! + ! \param datatype I COSTA data type + ! \param size O receives result sizeof-function + ! \param status O CTA_OK if successful + ! + interface CTA_F90_SizeOf + subroutine CTA_SizeOf( datatype, size, status ) + integer , intent(in ) :: datatype + integer , intent(out ) :: size + integer , intent(out ) :: status + end subroutine CTA_SizeOf + end interface + + +end module cta_f90_datatypes + diff --git a/costa/native/cta_f90/generated/cta_f90_datetime.f90 b/costa/native/cta_f90/generated/cta_f90_datetime.f90 new file mode 100644 index 000000000..ddbf8cc92 --- /dev/null +++ b/costa/native/cta_f90/generated/cta_f90_datetime.f90 @@ -0,0 +1,150 @@ +module cta_f90_datetime + + implicit none + + public + + ! \brief Julian day number from Gregorian date. + ! + ! \param year I Year + ! \param month I Month + ! \param day I Day + ! \param hour I Hour + ! \param minute I Minute + ! \param second I Second + ! \param jd O Julian day number + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_DateTime_GregorianToJulian + subroutine CTA_DateTime_GregorianToJulian( year, month, day, hour, minute, second, jd, status ) + use CTA_F90_Parameters, only : CTA_TIME_RKIND + integer , intent(in ) :: year + integer , intent(in ) :: month + integer , intent(in ) :: day + integer , intent(in ) :: hour + integer , intent(in ) :: minute + integer , intent(in ) :: second + real(CTA_TIME_RKIND) , intent(out ) :: jd + integer , intent(out ) :: status + end subroutine CTA_DateTime_GregorianToJulian + end interface + + ! \brief Modified Julian day number from Gregorian date. + ! + ! \param year I Year + ! \param month I Month + ! \param day I Day + ! \param hour I Hour + ! \param minute I Minute + ! \param second I Second + ! \param mjd O Modified Julian day number + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_DateTime_GregorianToModifiedJulian + subroutine CTA_DateTime_GregorianToModifiedJulian( year, month, day, hour, minute, second, mjd, status ) + use CTA_F90_Parameters, only : CTA_TIME_RKIND + integer , intent(in ) :: year + integer , intent(in ) :: month + integer , intent(in ) :: day + integer , intent(in ) :: hour + integer , intent(in ) :: minute + integer , intent(in ) :: second + real(CTA_TIME_RKIND) , intent(out ) :: mjd + integer , intent(out ) :: status + end subroutine CTA_DateTime_GregorianToModifiedJulian + end interface + + ! \brief Convert days into hours, minutes, and seconds. + ! + ! \param days I Year + ! \param hour O Hour + ! \param minute O Minute + ! \param second O Second + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_DateTime_DaysToHMS + subroutine CTA_DateTime_DaysToHMS( days, hour, minute, second, status ) + use CTA_F90_Parameters, only : CTA_TIME_RKIND + real(CTA_TIME_RKIND) , intent(in ) :: days + integer , intent(out ) :: hour + integer , intent(out ) :: minute + integer , intent(out ) :: second + integer , intent(out ) :: status + end subroutine CTA_DateTime_DaysToHMS + end interface + + ! \brief Julian day number from Modified Julian day number + ! + ! \param mjd I Modified Julian day number + ! \param jd O Julian day number + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_DateTime_ModifiedJulianToJulian + subroutine CTA_DateTime_ModifiedJulianToJulian( mjd, jd, status ) + use CTA_F90_Parameters, only : CTA_TIME_RKIND + real(CTA_TIME_RKIND) , intent(in ) :: mjd + real(CTA_TIME_RKIND) , intent(out ) :: jd + integer , intent(out ) :: status + end subroutine CTA_DateTime_ModifiedJulianToJulian + end interface + + ! \brief Gregorian calendar date from Julian day number + ! + ! \param jd I Julian day number + ! \param year O Year + ! \param month O Month + ! \param day O Day + ! \param hour O Hour + ! \param minute O Minute + ! \param second O Second + ! \param jd O Julian day number + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_DateTime_JulianToGregorian + subroutine CTA_DateTime_JulianToGregorian( jd, year, month, day, hour, minute, second, status ) + use CTA_F90_Parameters, only : CTA_TIME_RKIND + real(CTA_TIME_RKIND) , intent(out ) :: jd + integer , intent(out ) :: year + integer , intent(out ) :: month + integer , intent(out ) :: day + integer , intent(out ) :: hour + integer , intent(out ) :: minute + integer , intent(out ) :: second + integer , intent(out ) :: status + end subroutine CTA_DateTime_JulianToGregorian + end interface + + ! \brief Gregorian calendar date from Modified Julian day number + ! + ! \param mjd I Modified Julian day number + ! \param year O Year + ! \param month O Month + ! \param day O Day + ! \param hour O Hour + ! \param minute O Minute + ! \param second O Second + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_DateTime_ModifiedJulianToGregorian + subroutine CTA_DateTime_ModifiedJulianToGregorian( mjd, year, month, day, hour, minute, second, status ) + use CTA_F90_Parameters, only : CTA_TIME_RKIND + real(CTA_TIME_RKIND) , intent(in ) :: mjd + integer , intent(out ) :: year + integer , intent(out ) :: month + integer , intent(out ) :: day + integer , intent(out ) :: hour + integer , intent(out ) :: minute + integer , intent(out ) :: second + integer , intent(out ) :: status + end subroutine CTA_DateTime_ModifiedJulianToGregorian + end interface + + +end module cta_f90_datetime + diff --git a/costa/native/cta_f90/generated/cta_f90_file.f90 b/costa/native/cta_f90/generated/cta_f90_file.f90 new file mode 100644 index 000000000..b7038defe --- /dev/null +++ b/costa/native/cta_f90/generated/cta_f90_file.f90 @@ -0,0 +1,149 @@ +module cta_f90_file + + implicit none + + public + + ! \brief Create a new COSTA file + ! for holding a C file descriptor of a FORTRAN file LUN + ! + ! \note This call does not open a file. + ! No FORTRAN support in this version + ! + ! \param hfile O receives handle of created file + ! \param status O CTA_OK if successful + ! + interface CTA_F90_File_Create + subroutine CTA_File_Create( hfile, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(out ) :: hfile + integer , intent(out ) :: status + end subroutine CTA_File_Create + end interface + + ! \brief Free a new COSTA file-handle + ! + ! \note The File is not closed (in this version) + ! + ! \param hfile IO handle of COSTA file CTA_NULL on return + ! \param status O CTA_OK if successful + ! + interface CTA_F90_File_Free + subroutine CTA_File_Free( hfile, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: hfile + integer , intent(out ) :: status + end subroutine CTA_File_Free + end interface + + ! \brief Get the C-file descriptor of the COSTA file + ! + ! \note + ! + ! \param hfile I handle of COSTA file-handle + ! \param file O receives file descriptor + ! \param status O CTA_OK if successful + ! + interface CTA_F90_File_Get + subroutine CTA_File_Get( hfile, file, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hfile + integer , intent(out ) :: file + integer , intent(out ) :: status + end subroutine CTA_File_Get + end interface + + ! \brief Get the NETCDF file id of the COSTA file + ! + ! \note + ! + ! \param hfile I handle of COSTA file-handle + ! \param ncid O receives NETCDF file id + ! \param status O CTA_OK if successful + ! + interface CTA_F90_File_GetNetcdf + subroutine CTA_File_GetNetcdf( hfile, ncid, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hfile + integer , intent(out ) :: ncid + integer , intent(out ) :: status + end subroutine CTA_File_GetNetcdf + end interface + + ! \brief Set the C-file descriptor of the COSTA file + ! + ! \note + ! + ! \param hfile IO handle of COSTA file-handle + ! \param file I file descriptor + ! \param status O CTA_OK if successful + ! + interface CTA_F90_File_Set + subroutine CTA_File_Set( hfile, file, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: hfile + integer , intent(in ) :: file + integer , intent(out ) :: status + end subroutine CTA_File_Set + end interface + + ! \brief Open a C-file and set descriptor + ! + ! \note + ! + ! \param hfile I provide a valid handle of COSTA file + ! \param sname I file path + ! \param smode I open-mode (see C fopen documentation) + ! if CTA_NULL is provided, the file will be + ! opened with read/write access (file pointer at begin + ! of file) + ! \param status O CTA_OK if successful + ! + interface CTA_F90_File_Open + subroutine CTA_File_Open( hfile, sname, smode, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hfile + integer(CTA_HANDLE_IKIND) , intent(in ) :: sname + integer(CTA_HANDLE_IKIND) , intent(in ) :: smode + integer , intent(out ) :: status + end subroutine CTA_File_Open + end interface + + ! \brief check whether file is a NETCDF file + ! + ! \param hfile I provide a valid handle of COSTA file + ! \param isnetcdf O CTA_TRUE if file is NETCDF file CTA_FALSE otherwise + ! + ! \param status O CTA_OK if successful + ! + interface CTA_F90_File_IsNetcdf + subroutine CTA_File_IsNetcdf( hfile, isnetcdf, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hfile + integer , intent(out ) :: isnetcdf + integer , intent(out ) :: status + end subroutine CTA_File_IsNetcdf + end interface + + ! \brief Write a string to file + ! + ! \note + ! + ! \param hfile I handle of COSTA file + ! \param str I string that must be written to file + ! \param eol I Add end of line, CTA_TRUE for adding end of line or CTA_FALSE otherwise + ! \param status O CTA_OK if successful + ! + interface CTA_F90_File_WriteStr + subroutine CTA_File_WriteStr( hfile, str, eol, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hfile + character(len=*) , intent(in ) :: str(*) + integer , intent(in ) :: eol + integer , intent(out ) :: status + end subroutine CTA_File_WriteStr + end interface + + +end module cta_f90_file + diff --git a/costa/native/cta_f90/generated/cta_f90_flush_mod.f90 b/costa/native/cta_f90/generated/cta_f90_flush_mod.f90 new file mode 100644 index 000000000..090d6798b --- /dev/null +++ b/costa/native/cta_f90/generated/cta_f90_flush_mod.f90 @@ -0,0 +1,22 @@ +module cta_f90_flush_mod + + implicit none + + public + + ! \brief Flushes STDOUT and STDERR both for C as FORTRAN + ! + ! \note Function is introduced because some FORTRAN compilers use + ! different flushing than the C compiler. As a result the order of output + ! generated by both languages is "random". This routine makes sure that + ! all buffers have been flushed. + ! + ! + interface CTA_F90_Flush + subroutine CTA_Flush( ) + end subroutine CTA_Flush + end interface + + +end module cta_f90_flush_mod + diff --git a/costa/native/cta_f90/generated/cta_f90_functions.f90 b/costa/native/cta_f90/generated/cta_f90_functions.f90 new file mode 100644 index 000000000..b9e9a8ae9 --- /dev/null +++ b/costa/native/cta_f90/generated/cta_f90_functions.f90 @@ -0,0 +1,145 @@ +module cta_f90_functions + + implicit none + + public + + ! \brief Create a new COSTA function. + ! + ! \note Argument name is only used for debugging and output. + ! + ! \param name I name of the new function for debugging purposes + ! \param function I pointer to function that has to be associated + ! with new COSTA function + ! \param hintf I handle of associated interface + ! \param hfunc O receives handle of created COSTA function + ! \param status O CTA_OK if successful + ! + interface CTA_F90_Func_Create + subroutine CTA_Func_Create( name, function, hintf, hfunc, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + character(len=*) , intent(in ) :: name + integer(CTA_HANDLE_IKIND) , intent(in ) :: function(*) + integer(CTA_HANDLE_IKIND) , intent(in ) :: hintf + integer(CTA_HANDLE_IKIND) , intent(out ) :: hfunc + integer , intent(out ) :: status + end subroutine CTA_Func_Create + end interface + + ! \brief Duplicates a user defined function + ! + ! \param hfunc I COSTA user function handle + ! \param hdupl I duplication of hfunc + ! \param status O error status: CTA_OK + ! + interface CTA_F90_Func_Duplicate + subroutine CTA_Func_Duplicate( hfunc, hdupl, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hfunc + integer(CTA_HANDLE_IKIND) , intent(in ) :: hdupl(*) + integer , intent(out ) :: status + end subroutine CTA_Func_Duplicate + end interface + + ! \brief Free a new COSTA function. + ! + ! \note + ! + ! \param hfunc IO handle of COSTA function, replaced by CTA_NULL on return + ! \param status O CTA_OK if sucessful + ! + interface CTA_F90_Func_Free + subroutine CTA_Func_Free( hfunc, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: hfunc + integer , intent(out ) :: status + end subroutine CTA_Func_Free + end interface + + ! \brief Get interface of COSTA function. + ! + ! \note For performance reasons, the interface is not a copy but a handle + ! to the actual interface, it should NOT be freed by the calling routine! + ! + ! \param hfunc I handle of COSTA function + ! \param hintf O receives handle of interface of function + ! \param status O CTA_OK if successful + ! + interface CTA_F90_Func_GetIntf + subroutine CTA_Func_GetIntf( hfunc, hintf, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hfunc + integer(CTA_HANDLE_IKIND) , intent(out ) :: hintf + integer , intent(out ) :: status + end subroutine CTA_Func_GetIntf + end interface + + ! \brief Get function pointer of function + ! + ! \note There is no FORTRAN verion of this function available + ! + ! \param hfunc I handle of COSTA function. + ! \param function O receives pointer to function + ! \param status O CTA_OK if successful + ! + interface CTA_F90_Func_GetFunc + subroutine CTA_Func_GetFunc( hfunc, function, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hfunc + integer(CTA_HANDLE_IKIND) , intent(out ) :: function + integer , intent(out ) :: status + end subroutine CTA_Func_GetFunc + end interface + + ! \brief Get name of function + ! + ! \note Future versions will return a COSTA string handle. + ! + ! \param hfunc I handle of COSTA function. + ! \param name O handle of string object that is to receive function name, must exist before calling + ! \param status O CTA_OK if successful + ! + interface CTA_F90_Func_GetName + subroutine CTA_Func_GetName( hfunc, name, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hfunc + integer(CTA_HANDLE_IKIND) , intent(out ) :: name + integer , intent(out ) :: status + end subroutine CTA_Func_GetName + end interface + + ! \brief Set userdata of function + ! + ! \note Frees existing user data and replaces it with userdata + ! + ! \param hfunc IO handle of COSTA function. + ! \param userdata I new userdata handles + ! \param status O CTA_OK if successful + ! + interface CTA_F90_Func_SetUserdata + subroutine CTA_Func_SetUserdata( hfunc, userdata, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: hfunc + integer(CTA_HANDLE_IKIND) , intent(in ) :: userdata + integer , intent(out ) :: status + end subroutine CTA_Func_SetUserdata + end interface + + ! \brief Get userdata of function + ! + ! \param hfunc I handle of COSTA function. + ! \param userdata O userdata handle + ! \param status O CTA_OK if successful + ! + interface CTA_F90_Func_GetUserdata + subroutine CTA_Func_GetUserdata( hfunc, userdata, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hfunc + integer(CTA_HANDLE_IKIND) , intent(out ) :: userdata + integer , intent(out ) :: status + end subroutine CTA_Func_GetUserdata + end interface + + +end module cta_f90_functions + diff --git a/costa/native/cta_f90/generated/cta_f90_handles.f90 b/costa/native/cta_f90/generated/cta_f90_handles.f90 new file mode 100644 index 000000000..60e232b56 --- /dev/null +++ b/costa/native/cta_f90/generated/cta_f90_handles.f90 @@ -0,0 +1,223 @@ +module cta_f90_handles + + implicit none + + public + + ! \brief Create a new COSTA handle + ! + ! \param name I name associated with handle + ! \param datatype I data type of handle + ! \param data I block of data associated to handle + ! \param handle O receives COSTA handle + ! \param status O error status: CTA_OK + ! + !interface CTA_F90_Handle_Create + ! subroutine CTA_Handle_Create( name, datatype, data, handle, status ) + ! use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + ! character(len=*) , intent(in ) :: name + ! integer(CTA_HANDLE_IKIND) , intent(in ) :: datatype + ! void , intent(in ) :: data(*) + ! integer(CTA_HANDLE_IKIND) , intent(out ) :: handle + ! integer , intent(out ) :: status + ! end subroutine CTA_Handle_Create + !end interface + + ! \brief Free a COSTA handle + ! + ! \note The data part of the handle is NOT freed. + ! + ! \param handle IO handle that is to be freed, replaced by CTA_NULL on return. + ! \param status O error status: CTA_OK, CTA_ILLEGAL_HANDLE + ! + interface CTA_F90_Handle_Free + subroutine CTA_Handle_Free( handle, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: handle + integer , intent(out ) :: status + end subroutine CTA_Handle_Free + end interface + + ! \brief Free a COSTA handle + ! + ! \note Calls datatype-specific free methods, if available. + ! + ! \param handle IO handle that must be freed, replaced by CTA_NULL on return. + ! \param status O error status: CTA_OK, CTA_ILLEGAL_HANDLE + ! + interface CTA_F90_Handle_Free_All + subroutine CTA_Handle_Free_All( handle, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: handle + integer , intent(out ) :: status + end subroutine CTA_Handle_Free_All + end interface + + ! \brief Check whether a handle is valid and checks type + ! + ! \note The handle CTA_NULL is not valid. + ! + ! \param handle I COSTA handle + ! \param datatype I data type to compare handle with + ! \param status O error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE + ! + ! + interface CTA_F90_Handle_Check + subroutine CTA_Handle_Check( handle, datatype, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: handle + integer(CTA_HANDLE_IKIND) , intent(in ) :: datatype + integer , intent(out ) :: status + end subroutine CTA_Handle_Check + end interface + + ! \brief Get pointer to data element of handle + ! + ! \param handle I COSTA handle + ! \param data O receives pointer to data element + ! \param status O error status: CTA_OK, CTA_ILLEGAL_HANDLE + ! + !interface CTA_F90_Handle_GetData + ! subroutine CTA_Handle_GetData( handle, data, status ) + ! use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + ! integer(CTA_HANDLE_IKIND) , intent(in ) :: handle + ! void , intent(out ) :: data + ! integer , intent(out ) :: status + ! end subroutine CTA_Handle_GetData + !end interface + + ! \brief Get the value the handle points to + ! + ! \param handle I COSTA handle + ! \param value O receives pointer to data element + ! \param datatype I specify the data type of *value, must be the same as data type of handle + ! \param status O error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE + ! + !interface CTA_F90_Handle_GetValue + ! subroutine CTA_Handle_GetValue( handle, value, datatype, status ) + ! use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + ! integer(CTA_HANDLE_IKIND) , intent(in ) :: handle + ! void , intent(out ) :: value + ! integer , intent(in ) :: datatype + ! integer , intent(out ) :: status + ! end subroutine CTA_Handle_GetValue + !end interface + + ! \brief Get name associated with handle + ! + ! \param handle I COSTA handle + ! \param hname O receives name of data type + ! \param status O error status: CTA_OK, CTA_ILLEGAL_HANDLE + ! + interface CTA_F90_Handle_GetName + subroutine CTA_Handle_GetName( handle, hname, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: handle + integer(CTA_HANDLE_IKIND) , intent(out ) :: hname + integer , intent(out ) :: status + end subroutine CTA_Handle_GetName + end interface + + ! \brief Get data type associated with handle + ! + ! \param handle I COSTA handle + ! \param datatype O receives data type of handle + ! \param status O error status: CTA_OK, CTA_ILLEGAL_HANDLE + ! + interface CTA_F90_Handle_GetDatatype + subroutine CTA_Handle_GetDatatype( handle, datatype, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: handle + integer , intent(out ) :: datatype + integer , intent(out ) :: status + end subroutine CTA_Handle_GetDatatype + end interface + + ! \brief Find a handle by name and data type in the COSTA handle administration + ! + ! \param sname I name of handle + ! \param datatype I data type of handle + ! \param handlenr O receives the handle (only if return value is CTA_OK) + ! \param status O error status: CTA_OK, CTA_HANDLE_NOT_FOUND + ! + interface CTA_F90_Handle_Find + subroutine CTA_Handle_Find( sname, datatype, handlenr, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: sname + integer , intent(in ) :: datatype + integer , intent(out ) :: handlenr + integer , intent(out ) :: status + end subroutine CTA_Handle_Find + end interface + + ! \brief Print overview of all COSTA handles + ! + ! \param status O error status: CTA_OK + ! + interface CTA_F90_Handle_Printall + subroutine CTA_Handle_Printall( status ) + integer , intent(out ) :: status + end subroutine CTA_Handle_Printall + end interface + + ! \brief Counts all handles sorts them by type and prints overview. + ! This function can be usefull for detecting memory leaks that are + ! the result of not freeing instances of COSTA objects costa objects. + ! + ! \param location I String to indicate location of call + ! \param status O error status: CTA_OK + ! + interface CTA_F90_Handle_PrintInfo + subroutine CTA_Handle_PrintInfo( location, status ) + character(len=*) , intent(in ) :: location + integer , intent(out ) :: status + end subroutine CTA_Handle_PrintInfo + end interface + + ! \brief Get the reference count of the handle + ! + ! \param handle I COSTA handle + ! \param refCount O reference count of handle + ! \param status O error status: CTA_OK + ! + ! + interface CTA_F90_Handle_GetRefCount + subroutine CTA_Handle_GetRefCount( handle, refCount, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: handle + integer , intent(out ) :: refCount + integer , intent(out ) :: status + end subroutine CTA_Handle_GetRefCount + end interface + + ! \brief Increase the reference count of the handle + ! + ! \param handle I COSTA handle + ! \param status O error status: CTA_OK + ! + ! + interface CTA_F90_Handle_IncRefCount + subroutine CTA_Handle_IncRefCount( handle, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: handle + integer , intent(out ) :: status + end subroutine CTA_Handle_IncRefCount + end interface + + ! \brief Decrease the reference count of the handle + ! + ! \param handle I COSTA handle + ! \param status O error status: CTA_OK + ! + ! + interface CTA_F90_Handle_DecrRefCount + subroutine CTA_Handle_DecrRefCount( handle, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: handle + integer , intent(out ) :: status + end subroutine CTA_Handle_DecrRefCount + end interface + + +end module cta_f90_handles + diff --git a/costa/native/cta_f90/generated/cta_f90_initialise.f90 b/costa/native/cta_f90/generated/cta_f90_initialise.f90 new file mode 100644 index 000000000..c481ce448 --- /dev/null +++ b/costa/native/cta_f90/generated/cta_f90_initialise.f90 @@ -0,0 +1,27 @@ +module cta_f90_initialise + + implicit none + + public + + ! \brief Initialize the COSTA environment + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Core_Initialise + subroutine CTA_Core_Initialise( status ) + integer , intent(out ) :: status + end subroutine CTA_Core_Initialise + end interface + + ! \brief Finalise the COSTA environment + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Core_Finalise + subroutine CTA_Core_Finalise( status ) + integer , intent(out ) :: status + end subroutine CTA_Core_Finalise + end interface + + +end module cta_f90_initialise + diff --git a/costa/native/cta_f90/generated/cta_f90_interface.f90 b/costa/native/cta_f90/generated/cta_f90_interface.f90 new file mode 100644 index 000000000..eed032e3b --- /dev/null +++ b/costa/native/cta_f90/generated/cta_f90_interface.f90 @@ -0,0 +1,109 @@ +module cta_f90_interface + + implicit none + + public + + ! \brief Create a new interface + ! + ! \param name I name of the new interface + ! \param argtyp I list with the data types of arguments + ! \param narg I number of arguments of interface + ! \param hintf O receives the new COSTA interface handle + ! \param status O error status: CTA_OK + ! + interface CTA_F90_Intf_Create + subroutine CTA_Intf_Create( name, argtyp, narg, hintf, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + character(len=*) , intent(in ) :: name + integer(CTA_HANDLE_IKIND) , intent(in ) :: argtyp(*) + integer , intent(in ) :: narg + integer(CTA_HANDLE_IKIND) , intent(out ) :: hintf + integer , intent(out ) :: status + end subroutine CTA_Intf_Create + end interface + + ! \brief Free an interface + ! + ! \note Freeing CTA_NULL is allowed. + ! + ! \param hintf IO handle of interface, replaced by CTA_NULL on return + ! \param status O error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE + ! + interface CTA_F90_Intf_Free + subroutine CTA_Intf_Free( hintf, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: hintf + integer , intent(out ) :: status + end subroutine CTA_Intf_Free + end interface + + ! \brief Match two interfaces for compatibility argumentlist-argumentlist + ! + ! \note Two interfaces are compatible if all arguments have the same datatype, + ! CTA_VOID is compatible with all other arguments except for CTA_FSTRING + ! + ! \param argtyp1 I list with the data types of arguments of first interface + ! \param narg1 I number of arguments in first interface + ! \param argtyp2 I list with the data types of arguments of second interface + ! \param narg2 I number of arguments in second interface + ! \param flag O receives TRUE if interfaces are compatible FALSE ortherwise + ! \param status O error status: CTA_OK + ! + interface CTA_F90_Intf_Match_aa + subroutine CTA_Intf_Match_aa( argtyp1, narg1, argtyp2, narg2, flag, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: argtyp1(*) + integer , intent(in ) :: narg1 + integer(CTA_HANDLE_IKIND) , intent(in ) :: argtyp2(*) + integer , intent(in ) :: narg2 + integer , intent(out ) :: flag + integer , intent(out ) :: status + end subroutine CTA_Intf_Match_aa + end interface + + ! \brief Match two interfaces for compatibility handle-argumentlist + ! + ! \note Two interfaces are compatible if all arguments have the same datatype, + ! CTA_VOID is compatible with all other arguments except for CTA_FSTRING + ! + ! \param hintf1 I handle of first interface + ! \param argtyp2 I list with the data types of arguments of second interface + ! \param narg2 I number of arguments in second interface + ! \param flag O receives TRUE if interfaces are compatible FALSE ortherwise + ! \param status O error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE + ! + interface CTA_F90_Intf_Match_ha + subroutine CTA_Intf_Match_ha( hintf1, argtyp2, narg2, flag, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hintf1 + integer(CTA_HANDLE_IKIND) , intent(in ) :: argtyp2(*) + integer , intent(in ) :: narg2 + integer , intent(out ) :: flag + integer , intent(out ) :: status + end subroutine CTA_Intf_Match_ha + end interface + + ! \brief Match two interfaces for compatibility handle-handle + ! + ! \note two interfaces are compatible if all arguments have the same datatype, + ! CTA_VOID is compatible with all other arguments except for CTA_FSTRING + ! + ! \param hintf1 I handle of first interface + ! \param hintf2 I handle of second interface + ! \param flag O receives TRUE if interfaces are compatible FALSE ortherwise + ! \param status O error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE + ! + interface CTA_F90_Intf_Match_hh + subroutine CTA_Intf_Match_hh( hintf1, hintf2, flag, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hintf1 + integer(CTA_HANDLE_IKIND) , intent(in ) :: hintf2 + integer , intent(out ) :: flag + integer , intent(out ) :: status + end subroutine CTA_Intf_Match_hh + end interface + + +end module cta_f90_interface + diff --git a/costa/native/cta_f90/generated/cta_f90_matrix.f90 b/costa/native/cta_f90/generated/cta_f90_matrix.f90 new file mode 100644 index 000000000..2b67f46bd --- /dev/null +++ b/costa/native/cta_f90/generated/cta_f90_matrix.f90 @@ -0,0 +1,373 @@ +module cta_f90_matrix + + implicit none + + public + + ! \brief Create a new class (=implementation) of a COSTA matrix component. + ! + ! \param name I name of the new matrix class + ! \param h_func I COSTA function handles for functions that implement class, + ! missing functions must have value CTA_NULL + ! \param hmatcl O receives handle of new matrix class + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Matrix_DefineClass + subroutine CTA_Matrix_DefineClass( name, h_func, hmatcl, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + character(len=*) , intent(in ) :: name + integer(CTA_HANDLE_IKIND) , intent(in ) :: h_func(*) + integer(CTA_HANDLE_IKIND) , intent(out ) :: hmatcl + integer , intent(out ) :: status + end subroutine CTA_Matrix_DefineClass + end interface + + ! \brief Duplicate a matrix instance. + ! + ! \note Only size, data type and class type are duplicated, the values are not + ! copied. + ! + ! \param hmat1 I handle of matrix to be duplicated + ! \param hmat2 O receives handle of duplicate matrix, empty handle before calling + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Matrix_Duplicate + subroutine CTA_Matrix_Duplicate( hmat1, hmat2, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hmat1 + integer(CTA_HANDLE_IKIND) , intent(out ) :: hmat2 + integer , intent(out ) :: status + end subroutine CTA_Matrix_Duplicate + end interface + + ! \brief Create a new matrix. + ! + ! \note + ! + ! \param hmatcl I matrix class of new matrix + ! \param m I number of rows + ! \param n I number of columns + ! \param datatype I datatype of elements in matrix + ! \param userdata IO userdata for creation (depends on class) + ! \param hmat O receives handle of new matrix + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Matrix_Create + subroutine CTA_Matrix_Create( hmatcl, m, n, datatype, userdata, hmat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hmatcl + integer , intent(in ) :: m + integer , intent(in ) :: n + integer , intent(in ) :: datatype + integer(CTA_HANDLE_IKIND) , intent(inout) :: userdata + integer(CTA_HANDLE_IKIND) , intent(out ) :: hmat + integer , intent(out ) :: status + end subroutine CTA_Matrix_Create + end interface + + ! \brief Get datatype of matrix + ! + ! \note + ! + ! \param hmat I handle of matrix + ! \param datatype O receives data type of matrix + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Matrix_GetDatatype + subroutine CTA_Matrix_GetDatatype( hmat, datatype, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hmat + integer , intent(out ) :: datatype + integer , intent(out ) :: status + end subroutine CTA_Matrix_GetDatatype + end interface + + ! \brief Get copy of all values in matrix. + ! + ! \note The elements in the matrix are returned column-wise + ! (FORTRAN matrix representation) + ! + ! \param hmat I handle of matrix + ! \param vals O copy of values in matrix + ! \param m I number of rows of vals (must be the same as for the matrix) + ! \param n I number of columns of vals (must be the same as for the matrix) + ! \param datatype I data type of *vals, must be the same as data type of matrix elements + ! \param status O error status: CTA_OK if successful + ! + !interface CTA_F90_Matrix_GetVals + ! subroutine CTA_Matrix_GetVals( hmat, vals, m, n, datatype, status ) + ! use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + ! integer(CTA_HANDLE_IKIND) , intent(in ) :: hmat + ! void , intent(out ) :: vals + ! integer , intent(in ) :: m + ! integer , intent(in ) :: n + ! integer , intent(in ) :: datatype + ! integer , intent(out ) :: status + ! end subroutine CTA_Matrix_GetVals + !end interface + + ! \brief Get copy of single value in the matrix. + ! + ! \note Counting of the indices starts from 1. + ! \param hmat I handle of matrix + ! \param val O receives copy of value in matrix + ! \param m I row index of value to be copied + ! \param n I column index of value to be copied + ! \param datatype I data type of *val, must be the same as data type of matrix elements + ! \param status O error status: CTA_OK if successful + ! + !interface CTA_F90_Matrix_GetVal + ! subroutine CTA_Matrix_GetVal( hmat, val, m, n, datatype, status ) + ! use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + ! integer(CTA_HANDLE_IKIND) , intent(in ) :: hmat + ! void , intent(out ) :: val + ! integer , intent(in ) :: m + ! integer , intent(in ) :: n + ! integer , intent(in ) :: datatype + ! integer , intent(out ) :: status + ! end subroutine CTA_Matrix_GetVal + !end interface + + ! \brief Set whole matrix to one single value. + ! + ! \note + ! \param hmat IO handle of matrix + ! \param val I value that must be set + ! \param datatype I data type of *val, must be the same as data type of matrix elements + ! \param status O error status: CTA_OK if successful + ! + !interface CTA_F90_Matrix_SetConstant + ! subroutine CTA_Matrix_SetConstant( hmat, val, datatype, status ) + ! use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + ! integer(CTA_HANDLE_IKIND) , intent(inout) :: hmat + ! void , intent(in ) :: val(*) + ! integer , intent(in ) :: datatype + ! integer , intent(out ) :: status + ! end subroutine CTA_Matrix_SetConstant + !end interface + + ! \brief Set values of a single column of the matrix. + ! + ! \note Counting of the indices starts from 1. + ! \param hmat IO handle of matrix + ! \param n I index of matrix column to set + ! \param hvec I handle of COSTA vector the values have to be set to, length must be the same as number of rows of matrix + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Matrix_SetCol + subroutine CTA_Matrix_SetCol( hmat, n, hvec, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: hmat + integer , intent(in ) :: n + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvec + integer , intent(out ) :: status + end subroutine CTA_Matrix_SetCol + end interface + + ! \brief Set all values of the matrix. + ! + ! \note The elements in vals should be column-wise + ! (FORTRAN matrix representation) + ! \param hmat IO handle of matrix + ! \param vals I copy of values in matrix + ! \param m I number of rows of vals (must be the same as for the matrix) + ! \param n I number of columns of vals (must be the same as for the matrix) + ! \param datatype I data type of vals + ! \param status O error status: CTA_OK if successful + ! + !interface CTA_F90_Matrix_SetVals + ! subroutine CTA_Matrix_SetVals( hmat, vals, m, n, datatype, status ) + ! use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + ! integer(CTA_HANDLE_IKIND) , intent(inout) :: hmat + ! void , intent(in ) :: vals(*) + ! integer , intent(in ) :: m + ! integer , intent(in ) :: n + ! integer , intent(in ) :: datatype + ! integer , intent(out ) :: status + ! end subroutine CTA_Matrix_SetVals + !end interface + + ! \brief Set a single value in the matrix. + ! + ! \param hmat IO handle of matrix + ! \param val I value to be set at position (m,n) + ! \param m I row index + ! \param n I column index + ! \param datatype I data type of *val, must be the same as data type of matrix elements + ! \param status O error status: CTA_OK if successful + ! + !interface CTA_F90_Matrix_SetVal + ! subroutine CTA_Matrix_SetVal( hmat, val, m, n, datatype, status ) + ! use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + ! integer(CTA_HANDLE_IKIND) , intent(inout) :: hmat + ! void , intent(in ) :: val(*) + ! integer , intent(in ) :: m + ! integer , intent(in ) :: n + ! integer , intent(in ) :: datatype + ! integer , intent(out ) :: status + ! end subroutine CTA_Matrix_SetVal + !end interface + + ! \brief Export a matrix. + ! + ! \note CTA_DEFAULT_MATRIX supports exporting to:\n + ! file (usedoc is handle of COSTA file)\n + ! + ! \param hmat I handle of matrix + ! \param usedoc I configuration of output + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Matrix_Export + subroutine CTA_Matrix_Export( hmat, usedoc, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hmat + integer(CTA_HANDLE_IKIND) , intent(in ) :: usedoc + integer , intent(out ) :: status + end subroutine CTA_Matrix_Export + end interface + + ! \brief Perform a rank 1 operation A:=alpha*x*y'+A + ! + ! \param hmat IO handle of matrix A + ! \param alpha I scalar + ! \param vx I vector x + ! \param vy I vector y + ! \param status O error status: CTA_OK if successful + ! \note it is allowed that are the same object (*vx==*vy) + ! + interface CTA_F90_Matrix_Ger + subroutine CTA_Matrix_Ger( hmat, alpha, vx, vy, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: hmat + real(8) , intent(in ) :: alpha + integer(CTA_HANDLE_IKIND) , intent(in ) :: vx + integer(CTA_HANDLE_IKIND) , intent(in ) :: vy + integer , intent(out ) :: status + end subroutine CTA_Matrix_Ger + end interface + + ! \brief Compute inverse of a square matrix A:=inv(A) + ! + ! \param hmat IO handle of matrix A + ! \param status O error status: CTA_OK if successful + ! + ! + interface CTA_F90_Matrix_Inv + subroutine CTA_Matrix_Inv( hmat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: hmat + integer , intent(out ) :: status + end subroutine CTA_Matrix_Inv + end interface + + ! \brief Perform the matrix multiplication y:=alpha*OP(A)*x+beta*y + ! where op(X)=X, X^T + ! + ! \param hmat I handle of matrix (A from the equation above) + ! \param trans I transpose flag CTA_TRUE/CTA_FALSE for matrix A + ! \param alpha I scalar + ! \param vx I vector x + ! \param beta I scalar + ! \param vy IO vector y + ! \param status O error status: CTA_OK if successful + ! \note it is allowed that vectors are the same object (*vx==*vy) + ! + interface CTA_F90_Matrix_Gemv + subroutine CTA_Matrix_Gemv( hmat, trans, alpha, vx, beta, vy, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hmat + integer , intent(in ) :: trans + real(8) , intent(in ) :: alpha + integer(CTA_HANDLE_IKIND) , intent(in ) :: vx + real(8) , intent(in ) :: beta + integer(CTA_HANDLE_IKIND) , intent(inout) :: vy + integer , intent(out ) :: status + end subroutine CTA_Matrix_Gemv + end interface + + ! \brief Perform the matrix multication C:=alpha*op(A)*op(B)+beta*C + ! where op(X)=X, X^T + ! + ! \param mC IO handle of matrix C + ! \param transa I transpose flag CTA_TRUE/CTA_FALSE for matrix A + ! \param transb I transpose flag CTA_TRUE/CTA_FALSE for matrix A + ! \param alpha I scalar + ! \param mA I handle of matrix A + ! \param mB I handle of matrix B + ! \param beta I scalar + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Matrix_Gemm + subroutine CTA_Matrix_Gemm( mC, transa, transb, alpha, mA, mB, beta, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: mC + integer , intent(in ) :: transa + integer , intent(in ) :: transb + real(8) , intent(in ) :: alpha + integer(CTA_HANDLE_IKIND) , intent(in ) :: mA + integer(CTA_HANDLE_IKIND) , intent(in ) :: mB + real(8) , intent(in ) :: beta + integer , intent(out ) :: status + end subroutine CTA_Matrix_Gemm + end interface + + ! \brief Perform the matrix addition Y:=alpha*X+Y + ! + ! \param mY IO handle of matrix Y + ! \param alpha I scalar + ! \param mX I handle of matrix X + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Matrix_Axpy + subroutine CTA_Matrix_Axpy( mY, alpha, mX, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: mY + real(8) , intent(in ) :: alpha + integer(CTA_HANDLE_IKIND) , intent(in ) :: mX + integer , intent(out ) :: status + end subroutine CTA_Matrix_Axpy + end interface + + ! \brief Computes the eigenvalues and optionally the eigenvectors + ! of a general matrix A + ! + ! The computed eigenvectors are normalized to have Euclidean norm + ! equal to 1 and largest component real. + ! + ! \param A I Matrix A + ! \param eigvals O Vector with eigenvalues + ! \param eigvecs O Matrix with eigenvalues. The eigenvectors are + ! not computed when eigvecs in CTA_NULL on entry + ! \param status O error status: CTA_OK if successful + ! \note the eigenvalues can be complex. Since COSTA does not yet support + ! complex vectors, only the real part of the eigenvalues is + ! returned. + ! + interface CTA_F90_Matrix_EigVals + subroutine CTA_Matrix_EigVals( A, eigvals, eigvecs, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: A + integer(CTA_HANDLE_IKIND) , intent(out ) :: eigvals + integer(CTA_HANDLE_IKIND) , intent(out ) :: eigvecs + integer , intent(out ) :: status + end subroutine CTA_Matrix_EigVals + end interface + + ! \brief Free the matrix object + ! + ! \Note hmat=CTA_NULL is allowed + ! + ! \param hmat IO handle of matrix, replaced by CTA_NULL on return. + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Matrix_Free + subroutine CTA_Matrix_Free( hmat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: hmat + integer , intent(out ) :: status + end subroutine CTA_Matrix_Free + end interface + + +end module cta_f90_matrix + diff --git a/costa/native/cta_f90/generated/cta_f90_mem.f90 b/costa/native/cta_f90/generated/cta_f90_mem.f90 new file mode 100644 index 000000000..dd05e3fd8 --- /dev/null +++ b/costa/native/cta_f90/generated/cta_f90_mem.f90 @@ -0,0 +1,19 @@ +module cta_f90_mem + + implicit none + + public + + ! \brief Free memory that has been allocated with CTA_Malloc or CTA_Realloc + ! + ! \param ptr I pointer to memory block to deallocate + ! + !interface CTA_F90_Free + ! subroutine CTA_Free( ptr ) + ! void , intent(in ) :: ptr(*) + ! end subroutine CTA_Free + !end interface + + +end module cta_f90_mem + diff --git a/costa/native/cta_f90/generated/cta_f90_message.f90 b/costa/native/cta_f90/generated/cta_f90_message.f90 new file mode 100644 index 000000000..75eb035a2 --- /dev/null +++ b/costa/native/cta_f90/generated/cta_f90_message.f90 @@ -0,0 +1,60 @@ +module cta_f90_message + + implicit none + + public + + ! \brief Write a message + ! + ! \param className I name of class that writes the message + ! \param method I name of the method that writes the message + ! \param message I message + ! \param type I type of message + ! -'M':message + ! -'I':Info + ! -'W':Warning + ! -'E':Error + ! -'F':Fatal error (will terminate application) + ! + interface CTA_F90_Message_Write + subroutine CTA_Message_Write( className, method, message, type ) + character(len=*) , intent(in ) :: className + character(len=*) , intent(in ) :: method + character(len=*) , intent(in ) :: message + character(len=*) , intent(in ) :: type + end subroutine CTA_Message_Write + end interface + + ! \brief Set an external writer for handling messages + ! + ! An external writer must comply to the following C interface: + ! + ! void my_writer(char *className, char *method, char *message, char type); + ! + ! \param externalWriter I External writer + ! + ! + ! \note Fortran writers are not yet supported + ! + interface CTA_F90_Message_SetExternalWriter + subroutine CTA_Message_SetExternalWriter( externalWriter ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: externalWriter + end subroutine CTA_Message_SetExternalWriter + end interface + + ! \brief Toggle message handler between quiet and normal mode. + ! in the quiet mode no messages are send (not even to external writers) + ! + ! \param setting I set message handler in quiet mode CTA_TRUE/CTA_FALSE + ! + ! + interface CTA_F90_Message_Quiet + subroutine CTA_Message_Quiet( setting ) + integer , intent(in ) :: setting + end subroutine CTA_Message_Quiet + end interface + + +end module cta_f90_message + diff --git a/costa/native/cta_f90/generated/cta_f90_metainfo.f90 b/costa/native/cta_f90/generated/cta_f90_metainfo.f90 new file mode 100644 index 000000000..199e86d3a --- /dev/null +++ b/costa/native/cta_f90/generated/cta_f90_metainfo.f90 @@ -0,0 +1,60 @@ +module cta_f90_metainfo + + implicit none + + public + + ! \brief Create a new class (=implementation) of a COSTA observation description component. + ! + ! \param name I name of the new observation description class + ! \param h_func I COSTA function handles for functions that implement class, + ! missing functions must have value CTA_NULL + ! \param hobsdscrcl O receives handle of new observation description class + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Metainfo_DefineClass + subroutine CTA_Metainfo_DefineClass( name, h_func, hobsdscrcl, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + character(len=*) , intent(in ) :: name + integer(CTA_HANDLE_IKIND) , intent(in ) :: h_func(*) + integer(CTA_HANDLE_IKIND) , intent(out ) :: hobsdscrcl + integer , intent(out ) :: status + end subroutine CTA_Metainfo_DefineClass + end interface + + ! \brief Create a new observation description instance. + ! + ! \param hsobscl I class of new observation description + ! \param usrdat IO data of the stochastic observer for which + ! a descriptor is to be created + ! \param hobsdscr O receives handle of created observation description object + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Metainfo_Create + subroutine CTA_Metainfo_Create( hobsdscr, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(out ) :: hobsdscr + integer , intent(out ) :: status + end subroutine CTA_Metainfo_Create + end interface + + ! \brief Get properties/values that correspond to a given key. + ! + ! \param hobsdscr I handle of observation description + ! \param Key I key for which the value is asked + ! \param Properties IO COSTA-vector that is to receive the values + ! \param datatype I data type of elements in properties vector, must be the same as of queried properties + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Metainfo_SetUnit + subroutine CTA_Metainfo_SetUnit( hobsdscr, Key, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hobsdscr + character(len=*) , intent(in ) :: Key + integer , intent(out ) :: status + end subroutine CTA_Metainfo_SetUnit + end interface + + +end module cta_f90_metainfo + diff --git a/costa/native/cta_f90/generated/cta_f90_method.f90 b/costa/native/cta_f90/generated/cta_f90_method.f90 new file mode 100644 index 000000000..0217e2570 --- /dev/null +++ b/costa/native/cta_f90/generated/cta_f90_method.f90 @@ -0,0 +1,71 @@ +module cta_f90_method + + implicit none + + public + + ! \brief Create a new class (=implementation) of a COSTA method. + ! + ! \param name I name of the new method class + ! \param h_func I COSTA function handles for functions that implement class, + ! missing functions must have value CTA_NULL + ! \param hmethcl O receives handle of new method class + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Meth_DefineClass + subroutine CTA_Meth_DefineClass( name, h_func, hmethcl, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + character(len=*) , intent(in ) :: name + integer(CTA_HANDLE_IKIND) , intent(in ) :: h_func(*) + integer(CTA_HANDLE_IKIND) , intent(out ) :: hmethcl + integer , intent(out ) :: status + end subroutine CTA_Meth_DefineClass + end interface + + ! \brief Create an instance of a method. + ! + ! \param hmethcl I method class of new object + ! \param userdata IO user data for creation (depends on class) + ! \param hmeth O receives handle of new method + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Meth_Create + subroutine CTA_Meth_Create( hmethcl, userdata, hmeth, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hmethcl + integer(CTA_HANDLE_IKIND) , intent(inout) :: userdata + integer(CTA_HANDLE_IKIND) , intent(out ) :: hmeth + integer , intent(out ) :: status + end subroutine CTA_Meth_Create + end interface + + ! \brief Run method. + ! \param hmeth I handle of method + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Meth_Run + subroutine CTA_Meth_Run( hmeth, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hmeth + integer , intent(out ) :: status + end subroutine CTA_Meth_Run + end interface + + ! \brief Free the method object. + ! + ! \Note hmeth=CTA_NULL is allowed + ! + ! \param hmeth IO handle of method, replaced by CTA_NULL on return. + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Meth_Free + subroutine CTA_Meth_Free( hmeth, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: hmeth + integer , intent(out ) :: status + end subroutine CTA_Meth_Free + end interface + + +end module cta_f90_method + diff --git a/costa/native/cta_f90/generated/cta_f90_modbuild_par.f90 b/costa/native/cta_f90/generated/cta_f90_modbuild_par.f90 new file mode 100644 index 000000000..b4c54053b --- /dev/null +++ b/costa/native/cta_f90/generated/cta_f90_modbuild_par.f90 @@ -0,0 +1,34 @@ +module cta_f90_modbuild_par + + implicit none + + public + + ! \brief Create the model class of the Par-Modelbuilder and initilizes MPI + ! + ! \note This is not a user function. It is called at initialization of the + ! COSTA environment. + ! + ! \param modelcls O receives handle of the SP-modelbuilder class + ! + interface CTA_F90_Modbuild_par_CreateClass + subroutine CTA_Modbuild_par_CreateClass( modelcls ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(out ) :: modelcls + end subroutine CTA_Modbuild_par_CreateClass + end interface + + ! \brief Stop the model class of the Par-Modelbuilder and finalize MPI + ! + ! \note This is not a user function. It is called at the finalization of the + ! COSTA environment. + ! + ! + interface CTA_F90_Modbuild_par_Finalize + subroutine CTA_Modbuild_par_Finalize( ) + end subroutine CTA_Modbuild_par_Finalize + end interface + + +end module cta_f90_modbuild_par + diff --git a/costa/native/cta_f90/generated/cta_f90_model.f90 b/costa/native/cta_f90/generated/cta_f90_model.f90 new file mode 100644 index 000000000..5aeaa8fde --- /dev/null +++ b/costa/native/cta_f90/generated/cta_f90_model.f90 @@ -0,0 +1,618 @@ +module cta_f90_model + + implicit none + + public + + ! \brief Create a model instance + ! + ! \param hmodcl I model class of new instance + ! \param userdata IO user data needed for creation (depends on modelclass) + ! \param hmodel O receives handle of new model instance + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Model_Create + subroutine CTA_Model_Create( hmodcl, userdata, hmodel, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hmodcl + integer(CTA_HANDLE_IKIND) , intent(inout) :: userdata + integer(CTA_HANDLE_IKIND) , intent(out ) :: hmodel + integer , intent(out ) :: status + end subroutine CTA_Model_Create + end interface + + ! \brief Compute model for given timespan + ! + ! \param hmodel IO handle of model instance + ! \param htime I timespan for which to compute + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Model_Compute + subroutine CTA_Model_Compute( hmodel, htime, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: hmodel + integer(CTA_HANDLE_IKIND) , intent(in ) :: htime + integer , intent(out ) :: status + end subroutine CTA_Model_Compute + end interface + + ! \brief Add noise during during the given timespan at + ! the Compute + ! + ! \note Noise is added in the compute-method + ! \param hmodel IO handle of model instance + ! \param htime I timespan for which to compute adding noise + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Model_AddNoise + subroutine CTA_Model_AddNoise( hmodel, htime, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: hmodel + integer(CTA_HANDLE_IKIND) , intent(in ) :: htime + integer , intent(out ) :: status + end subroutine CTA_Model_AddNoise + end interface + + ! \brief Set the internal state of the model. + ! + ! \note A copy of the state is set + ! + ! \param hmodel IO handle of model instance + ! \param hstate I handle of new state + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Model_SetState + subroutine CTA_Model_SetState( hmodel, hstate, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: hmodel + integer(CTA_HANDLE_IKIND) , intent(in ) :: hstate + integer , intent(out ) :: status + end subroutine CTA_Model_SetState + end interface + + ! \brief Get a copy of the internal state. + ! + ! \note Optionally a tree-vector is created. In that case the caller of this + ! method is responsible for freeing that tree-vector. The input state must be compatible + ! (same size and or composition) as the models internal state. + ! \note If *hstate == CTA_NULL a new object is created, user is responsible for freeing this object. + ! + ! \param hmodel I handle of model instance + ! \param hstate IO receives state of the model, *hstate can be CTA_NULL on calling (see note) + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Model_GetState + subroutine CTA_Model_GetState( hmodel, hstate, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hmodel + integer(CTA_HANDLE_IKIND) , intent(inout) :: hstate + integer , intent(out ) :: status + end subroutine CTA_Model_GetState + end interface + + ! \brief Perform axpy operation on the internal state. + ! + ! \note AXPY: y=alpha*x+y. y corresponds to the models + ! internal state and x can be a state vector or a model + ! + ! \param hmodel IO handle of model instance (y) + ! \param alpha I alpha + ! \param hx I handle of x (state or model) + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Model_AxpyState + subroutine CTA_Model_AxpyState( hmodel, alpha, hx, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: hmodel + real(8) , intent(in ) :: alpha + integer(CTA_HANDLE_IKIND) , intent(in ) :: hx + integer , intent(out ) :: status + end subroutine CTA_Model_AxpyState + end interface + + ! \brief Get element-wise scaling for model state + ! + ! The values in the state-vector are compared on "importance" in various + ! algorithms like RRSQRT and COFFEE. The model state holds in general + ! various quantities like concentration, velicity, location etc in + ! arbitrary units. The scaling vector (that can be model state dependend) + ! makes it possible to meaningfull compare elements in the state-vector + ! for importance. Various methods are available like a transformation to + ! enery. + ! + ! The scaling vector represents a diagonal scaling matrix but is + ! respresented by a tree-vector. + ! + ! \note The elementwise scaling is returned in the form of a tree-vector + ! with same build-up as the tree-vector of the model state. The scaling vector + ! is created whenever hscale==CTA_NULL on input, the caller is + ! responsible for freeing this object. + ! + ! \param hmodel I handle of model instance + ! \param hscale IO receives state scaling vector for the model state, + ! hstate can be CTA_NULL on calling (see note) + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Model_GetStateScaling + subroutine CTA_Model_GetStateScaling( hmodel, hscale, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hmodel + integer(CTA_HANDLE_IKIND) , intent(inout) :: hscale + integer , intent(out ) :: status + end subroutine CTA_Model_GetStateScaling + end interface + + ! \brief Set the models forcings. + ! + ! \note Set the forcings (constant) for the given timespan. + ! The model will fall back to its own forcings definition + ! outside the given timespan. + ! + ! \param hmodel IO handle of model instance + ! \param tspan I time span on which to set the forcing values + ! \param hforc I handle of vector with new forcings + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Model_SetForc + subroutine CTA_Model_SetForc( hmodel, tspan, hforc, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: hmodel + integer(CTA_HANDLE_IKIND) , intent(in ) :: tspan + integer(CTA_HANDLE_IKIND) , intent(in ) :: hforc + integer , intent(out ) :: status + end subroutine CTA_Model_SetForc + end interface + + ! \brief Get a copy of the values of the models forcings + ! + ! \note Optionally a tree-vector is created in that case the caller of this + ! method is responsible for freeing that tree-vector. The input tree-vector + ! must be compatible (same size and or composition) as the models + ! internal tree-vector representing the forcings. + ! If the forcings of the model are not constant for the given timespan + ! the result is dependent on the model-implementation + ! \note If *hforc == CTA_NULL a new object is created, user is responsible for freeing this object. + ! + ! \param hmodel I handle of model instance + ! \param tspan I timespan for wich the given forcings are valid + ! \param hforc IO receives models forcings, *hforc can be CTA_NULL on calling (see note) + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Model_GetForc + subroutine CTA_Model_GetForc( hmodel, tspan, hforc, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hmodel + integer(CTA_HANDLE_IKIND) , intent(in ) :: tspan + integer(CTA_HANDLE_IKIND) , intent(inout) :: hforc + integer , intent(out ) :: status + end subroutine CTA_Model_GetForc + end interface + + ! \brief Perform axpy operation on the models forcings. + ! + ! \note AXPY: y=alpha*x+y. y corresponds to the models + ! internal forcings. + ! The adjustment to the forcings (alpha*x) is only valid for the given + ! time span. Note that the model will use y(t)+x for the given time span + ! where y(t) denotes the default forcings of the model. + ! + ! \param hmodel IO handle of model instance (y) + ! \param tspan I time span for wich the given forcings are valid + ! \param alpha I scalar + ! \param hx I handle of forcings tree-vector x + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Model_AxpyForc + subroutine CTA_Model_AxpyForc( hmodel, tspan, alpha, hx, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: hmodel + integer(CTA_HANDLE_IKIND) , intent(in ) :: tspan + real(8) , intent(in ) :: alpha + integer(CTA_HANDLE_IKIND) , intent(in ) :: hx + integer , intent(out ) :: status + end subroutine CTA_Model_AxpyForc + end interface + + ! \brief Set parameters of the model. + ! + ! \param hmodel IO handle of model instance + ! \param hparam I handle of parameters vector + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Model_SetParam + subroutine CTA_Model_SetParam( hmodel, hparam, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: hmodel + integer(CTA_HANDLE_IKIND) , intent(in ) :: hparam + integer , intent(out ) :: status + end subroutine CTA_Model_SetParam + end interface + + ! \brief Get a copy of the parameters of the model. + ! + ! \note Optionally a tree-vector is created in that case the caller of this + ! method is responsible for freeing that tree-vector. The input tree-vector + ! must be compatible (same size and or composition) as the models + ! internal tree-vector representing the parameters. + ! \note If *hforc == CTA_NULL a new object is created, user is responsible for freeing this object. + ! + ! \param hmodel I handle of model instance + ! \param hparam IO receives model forcings, *hforc can equal CTA_NULL on calling (see note) + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Model_GetParam + subroutine CTA_Model_GetParam( hmodel, hparam, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hmodel + integer(CTA_HANDLE_IKIND) , intent(inout) :: hparam + integer , intent(out ) :: status + end subroutine CTA_Model_GetParam + end interface + + ! \brief Perform axpy operation on the models parameters. + ! + ! \note AXPY: y=alpha*x+y where y corresponds to the models + ! internal parameters. + ! + ! \param hmodel IO handle of model instance (y) + ! \param alpha I alpha + ! \param hx I handle of treevector of parameters (x) + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Model_AxpyParam + subroutine CTA_Model_AxpyParam( hmodel, alpha, hx, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: hmodel + real(8) , intent(in ) :: alpha + integer(CTA_HANDLE_IKIND) , intent(in ) :: hx + integer , intent(out ) :: status + end subroutine CTA_Model_AxpyParam + end interface + + ! \brief Return the timehorizon on the model. + ! The time horizon is the initial overal simulation span for which the mode is configured + ! + ! \param hmodel I handle of model instance + ! \param tHorizon I time horizon of model + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Model_GetTimeHorizon + subroutine CTA_Model_GetTimeHorizon( hmodel, tHorizon, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hmodel + integer(CTA_HANDLE_IKIND) , intent(in ) :: tHorizon + integer , intent(out ) :: status + end subroutine CTA_Model_GetTimeHorizon + end interface + + ! \brief Return the current time of the model. + ! + ! \param hmodel I handle of model instance + ! \param tCurrent I time corresponding the the model state + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Model_GetCurrentTime + subroutine CTA_Model_GetCurrentTime( hmodel, tCurrent, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hmodel + integer(CTA_HANDLE_IKIND) , intent(in ) :: tCurrent + integer , intent(out ) :: status + end subroutine CTA_Model_GetCurrentTime + end interface + + ! \brief Get covariance matrix of noise parameters. + ! + ! \note ONLY for Stochastic models. + ! The covariance matrix is represented by an array + ! of tree-vectors (columns of the matrix) + ! optionally a tree-vector is created in that case the caller of this + ! method is responsible for freeing that tree-vector. The input tree-vector + ! must be compatible (same size and or composition) as the models + ! internal tree-vector. + ! \note If hstmat[icol] == CTA_NULL a new object is created, user is responsible for freeing this object. + ! + ! \param hmodel I handle of model instance + ! \param hstmat O receives array of tree-vectors, *hstmat can equal CTA_NULL on calling (see note) + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Model_GetNoiseCovar + subroutine CTA_Model_GetNoiseCovar( hmodel, hstmat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hmodel + integer(CTA_HANDLE_IKIND) , intent(out ) :: hstmat + integer , intent(out ) :: status + end subroutine CTA_Model_GetNoiseCovar + end interface + + ! \brief Get number of noise parameters: the number of columns of the noise covariance matrix. + ! + ! \note ONLY for Stochastic models. + ! + ! \param hmodel I handle of model instance + ! \param nnoise O receives number of noise parameters + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Model_GetNoiseCount + subroutine CTA_Model_GetNoiseCount( hmodel, nnoise, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hmodel + integer , intent(out ) :: nnoise + integer , intent(out ) :: status + end subroutine CTA_Model_GetNoiseCount + end interface + + ! \brief Free model instance. + ! + ! \note ONLY for Stochastic models. + ! + ! \param hmodel IO handle of model instance, replaced by CTA_NULL on return + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Model_Free + subroutine CTA_Model_Free( hmodel, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: hmodel + integer , intent(out ) :: status + end subroutine CTA_Model_Free + end interface + + ! \brief Announce to the model what observations will be requested. + ! + ! Before the compute method this method is used to announce what + ! obeservation will be requested after the CTA_Model_Compute using the + ! CTA_Model_GetObsvalues method. + ! + ! For some simulation models it is more efficient to do a single simulation + ! (a single CTA_Model_Compute call) for a particular simulation span then + ! simulating the same simulation span in a number of steps (multiple + ! CTA_Model_Compute calls). + ! + ! This method can be used to announce for what observations the model + ! must provide a prediction in advance. This method must be called prior + ! to the CTA_Compute method and makes it possible to perform simulations + ! over a longer time interval without the need to interupt the computations + ! in order to get the predictions at intermediate time instances. + ! + ! Notes on the behavior of the method: + ! - The observation description used in the first CTA_Model_GetObsValues + ! after the compute MUST be the same as the observation description + ! used in the announce. + ! - All observations that are announced MUST be in the timespan of the + ! following CTA_Model_Compute. + ! - The announced observations can only be retreved ONCE after the + ! CTA_Model_Compute. + ! - A CTA_Model_SetState or CTA_Model_AxpyState will reset the announced + ! CTA_Model_AnnounceObsValues administration (since stored predictions + ! might not be valid anymore) + ! + ! \param hmodel I handle of model instance + ! \param hdescr I observation description component + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Model_AnnounceObsValues + subroutine CTA_Model_AnnounceObsValues( hmodel, hdescr, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hmodel + integer(CTA_HANDLE_IKIND) , intent(in ) :: hdescr + integer , intent(out ) :: status + end subroutine CTA_Model_AnnounceObsValues + end interface + + ! \brief Get (interpolate) the models internal state to the + ! observations described as specified in the observation + ! description component. + ! + ! \note The interface supports a the time instance for time-interpolation. + ! It depends on the model whether and how this is supported. + ! + ! \param hmodel I handle of model instance + ! \param htime I time instance (for checking and time-interpolation if + ! supported by model) + ! \param hdescr I observation description component + ! \param values O receives values of the models internal state corresponding to + ! observations as described in hdescr + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Model_GetObsValues + subroutine CTA_Model_GetObsValues( hmodel, htime, hdescr, values, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hmodel + integer(CTA_HANDLE_IKIND) , intent(in ) :: htime + integer(CTA_HANDLE_IKIND) , intent(in ) :: hdescr + integer(CTA_HANDLE_IKIND) , intent(out ) :: values + integer , intent(out ) :: status + end subroutine CTA_Model_GetObsValues + end interface + + ! \brief Get for each observation a localization scaling vector + ! + ! \param hmodel I handle of model instance + ! \param hdescr I observation description for which we want localization scaling vectors + ! \param distance I characteristic distance + ! \param locVecs O costa vector of handles to treevectors (scaling vectors). The treevectors + ! are created when the indices are CTA_NULL on entry + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Model_GetObsLocalization + subroutine CTA_Model_GetObsLocalization( hmodel, hdescr, distance, locVecs, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hmodel + integer(CTA_HANDLE_IKIND) , intent(in ) :: hdescr + real(8) , intent(in ) :: distance + integer(CTA_HANDLE_IKIND) , intent(out ) :: locVecs + integer , intent(out ) :: status + end subroutine CTA_Model_GetObsLocalization + end interface + + ! \brief Get a query for the stochastic observer in order to + ! filter out the observations that can actually be provided by the model. + ! + ! \param hmodel I handle of model instance + ! \param htime I time instance + ! \param hdescr I observation description component + ! \param sselect O receives a query to filter out the observations, must exist before calling + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Model_GetObsSelect + subroutine CTA_Model_GetObsSelect( hmodel, htime, hdescr, sselect, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hmodel + integer(CTA_HANDLE_IKIND) , intent(in ) :: htime + integer(CTA_HANDLE_IKIND) , intent(in ) :: hdescr + integer(CTA_HANDLE_IKIND) , intent(out ) :: sselect + integer , intent(out ) :: status + end subroutine CTA_Model_GetObsSelect + end interface + + ! \brief Export the whole internal state of a model + ! This export function will export the whole state of the model such that + ! a so called "restart" start from this point yielding the same results. + ! There are no ruled on the format that is used to store the data. + ! Various extra otions are valid but a model will in most cases support an export + ! to a file and to a COSTA pack object. + ! + ! + ! \param hmodel I handle of model instance + ! \param hexport I target for export e.g. CTA_File or CTA_Pack + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Model_Export + subroutine CTA_Model_Export( hmodel, hexport, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hmodel + integer(CTA_HANDLE_IKIND) , intent(in ) :: hexport + integer , intent(out ) :: status + end subroutine CTA_Model_Export + end interface + + ! \brief Import the whole internal state of a model + ! After the inport the models internal state is exactly the same as the point that + ! the export was created using CTA_Model_Export. + ! + ! + ! \param hmodel I handle of model instance + ! \param himport I handle with data created by CTA_MODEL_Export e.g. CTA_File or CTA_Pack + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Model_Import + subroutine CTA_Model_Import( hmodel, himport, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hmodel + integer(CTA_HANDLE_IKIND) , intent(in ) :: himport + integer , intent(out ) :: status + end subroutine CTA_Model_Import + end interface + + ! \brief Get the number of domains for local analysis + ! + ! \param hmodel I handle of model instance + ! \param distance I characteristic distance + ! \param ndomains O number of domains + ! \param locVecs O costa vector of handles to treevectors (scaling vectors). The treevectors + ! are created when the indices are CTA_NULL on entry + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Model_GetNumDomains + subroutine CTA_Model_GetNumDomains( hmodel, distance, ndomains, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hmodel + real(8) , intent(in ) :: distance + integer , intent(out ) :: ndomains + integer , intent(out ) :: status + end subroutine CTA_Model_GetNumDomains + end interface + + ! \brief Get selection of observations that are relevnet for assimilation in the given domain + ! + ! \param hmodel I handle of model instance + ! \param hdescr I observation description of all observations + ! \param distance I characteristic distance + ! \param idomain I domain number + ! \param selection O costa vector with the indices of the relevant observations (0 based) + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Model_GetObsSelector + subroutine CTA_Model_GetObsSelector( hmodel, hdescr, distance, idomain, selection, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hmodel + integer(CTA_HANDLE_IKIND) , intent(in ) :: hdescr + real(8) , intent(in ) :: distance + integer , intent(in ) :: idomain + integer(CTA_HANDLE_IKIND) , intent(out ) :: selection + integer , intent(out ) :: status + end subroutine CTA_Model_GetObsSelector + end interface + + ! \brief Get for each observation a localization scaling vector for single domain + ! + ! \param hmodel I handle of model instance + ! \param hdescr I observation description for which we want localization scaling vectors + ! \param distance I characteristic distance + ! \param idomain I domain number + ! \param locVecs O costa vector of handles to treevectors (scaling vectors). The treevectors + ! are created when the indices are CTA_NULL on entry + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Model_GetObsLocalizationDomain + subroutine CTA_Model_GetObsLocalizationDomain( hmodel, hdescr, distance, idomain, locVecs, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hmodel + integer(CTA_HANDLE_IKIND) , intent(in ) :: hdescr + real(8) , intent(in ) :: distance + integer , intent(in ) :: idomain + integer(CTA_HANDLE_IKIND) , intent(out ) :: locVecs + integer , intent(out ) :: status + end subroutine CTA_Model_GetObsLocalizationDomain + end interface + + ! \brief Get a copy of the internal state. + ! + ! \note Optionally a tree-vector is created. In that case the caller of this + ! method is responsible for freeing that tree-vector. The input state must be compatible + ! (same size and or composition) as the models internal state. + ! \note If *hstate == CTA_NULL a new object is created, user is responsible for freeing this object. + ! + ! \param hmodel I handle of model instance + ! \param idomain I domain number + ! \param hstate IO receives state of the model, *hstate can be CTA_NULL on calling (see note) + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Model_GetStateDomain + subroutine CTA_Model_GetStateDomain( hmodel, idomain, hstate, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hmodel + integer , intent(in ) :: idomain + integer(CTA_HANDLE_IKIND) , intent(inout) :: hstate + integer , intent(out ) :: status + end subroutine CTA_Model_GetStateDomain + end interface + + ! \brief Perform axpy operation on the internal state for a single domain + ! + ! \note AXPY: y=alpha*x+y. y corresponds to the models + ! internal state and x can be a state vector or a model + ! + ! \param hmodel IO handle of model instance (y) + ! \param alpha I alpha + ! \param hx I handle of x (state or model) + ! \param idomain I domain number + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Model_AxpyStateDomain + subroutine CTA_Model_AxpyStateDomain( hmodel, alpha, idomain, hx, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: hmodel + real(8) , intent(in ) :: alpha + integer , intent(in ) :: idomain + integer(CTA_HANDLE_IKIND) , intent(in ) :: hx + integer , intent(out ) :: status + end subroutine CTA_Model_AxpyStateDomain + end interface + + +end module cta_f90_model + diff --git a/costa/native/cta_f90/generated/cta_f90_model_factory.f90 b/costa/native/cta_f90/generated/cta_f90_model_factory.f90 new file mode 100644 index 000000000..9c3603ad5 --- /dev/null +++ b/costa/native/cta_f90/generated/cta_f90_model_factory.f90 @@ -0,0 +1,43 @@ +module cta_f90_model_factory + + implicit none + + public + + ! \brief Create a COSTA modell class from XML input file + ! (load from methods from dynamic load library) + ! + ! \param fName I XML-configuration file + ! \param modelClass O Class of new model Factory + ! \param status O Model class handle + ! + interface CTA_F90_ModelFactory_New + subroutine CTA_ModelFactory_New( fName, modelClass, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + character(len=*) , intent(in ) :: fName + integer(CTA_HANDLE_IKIND) , intent(out ) :: modelClass + integer , intent(out ) :: status + end subroutine CTA_ModelFactory_New + end interface + + ! \brief Define a new class (=implementation) of a COSTA model component + ! + ! \param name I name of the new model class + ! \param h_func I COSTA function handles for functions that implement class, + ! missing functions must have value CTA_NULL + ! \param hmodcl O receives handle of new model class + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Model_DefineClass + subroutine CTA_Model_DefineClass( name, h_func, hmodcl, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + character(len=*) , intent(in ) :: name + integer(CTA_HANDLE_IKIND) , intent(in ) :: h_func(*) + integer(CTA_HANDLE_IKIND) , intent(out ) :: hmodcl + integer , intent(out ) :: status + end subroutine CTA_Model_DefineClass + end interface + + +end module cta_f90_model_factory + diff --git a/costa/native/cta_f90/generated/cta_f90_model_utilities.f90 b/costa/native/cta_f90/generated/cta_f90_model_utilities.f90 new file mode 100644 index 000000000..49402d4f4 --- /dev/null +++ b/costa/native/cta_f90/generated/cta_f90_model_utilities.f90 @@ -0,0 +1,42 @@ +module cta_f90_model_utilities + + implicit none + + public + + ! \brief Handles model configuration tree or name of input xml-file + ! + ! When a new instance of the a model component is created it needs some + ! input. The most convenient way is to provide the root to a COSTA-tree + ! with configuration information the name of a XML-configuration file + ! This routine will check whether the input handle is a COSTA-tree or + ! the name of a XML-configuration file. A COSTA-tree containing the content + ! of the XML-file is created when the input is the name of the XML-file. + ! + ! \note A COSTA-tree is created if the input is the name of an XML-file. + ! the handle of the input tree is returned otherwise. This means that + ! depending on the input, a tree is created. The routine will return whether + ! the returned tree is created by this routine. The caller is responsible + ! for freeing the tree when necessary. + ! + ! \param hinput I handle of tree (CTA_Tree) with model input or string + ! (CTA_Sring) with name of xml-input file + ! \param tinput O receives handle of tree (CTA_Tree) with model input + ! \param cleanup O receives flag (CTA_TRUE/CTA_FALSE) indicating whether tinput is + ! created and must be freed by the caller + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Model_Util_InputTree + subroutine CTA_Model_Util_InputTree( hinput, tinput, cleanup, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hinput + integer(CTA_HANDLE_IKIND) , intent(out ) :: tinput + integer , intent(out ) :: cleanup + integer , intent(out ) :: status + end subroutine CTA_Model_Util_InputTree + end interface + + +end module cta_f90_model_utilities + diff --git a/costa/native/cta_f90/generated/cta_f90_obsdescr.f90 b/costa/native/cta_f90/generated/cta_f90_obsdescr.f90 new file mode 100644 index 000000000..bc9cc7860 --- /dev/null +++ b/costa/native/cta_f90/generated/cta_f90_obsdescr.f90 @@ -0,0 +1,191 @@ +module cta_f90_obsdescr + + implicit none + + public + + ! \brief Create a new class (=implementation) of a COSTA observation description component. + ! + ! \param name I name of the new observation description class + ! \param h_func I COSTA function handles for functions that implement class, + ! missing functions must have value CTA_NULL + ! \param hobsdscrcl O receives handle of new observation description class + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_ObsDescr_DefineClass + subroutine CTA_ObsDescr_DefineClass( name, h_func, hobsdscrcl, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + character(len=*) , intent(in ) :: name + integer(CTA_HANDLE_IKIND) , intent(in ) :: h_func(*) + integer(CTA_HANDLE_IKIND) , intent(out ) :: hobsdscrcl + integer , intent(out ) :: status + end subroutine CTA_ObsDescr_DefineClass + end interface + + ! \brief Create a new observation description instance. + ! + ! \param hsobscl I class of new observation description + ! \param usrdat IO data of the stochastic observer for which + ! a descriptor is to be created + ! \param hobsdscr O receives handle of created observation description + ! object + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_ObsDescr_Create + subroutine CTA_ObsDescr_Create( hsobscl, usrdat, hobsdscr, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hsobscl + integer(CTA_HANDLE_IKIND) , intent(inout) :: usrdat + integer(CTA_HANDLE_IKIND) , intent(out ) :: hobsdscr + integer , intent(out ) :: status + end subroutine CTA_ObsDescr_Create + end interface + + ! \brief Create a new observation description that is subset of existing observation description. + ! + ! \param hobsdescr I the observation description to create a subset + ! from + ! \param selection I selection criterion (subset of SQL) + ! \param reltab O Relation table specifying the relation between + ! the original and new observation description + ! component. Note no relation table is created when + ! reltab==CTA_NULL on entry + ! \param hobsdescrout O the new COSTA-stochastic observer, empty before + ! calling, caller responsible for freeing after use + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_ObsDescr_CreateSel + subroutine CTA_ObsDescr_CreateSel( hobsdescr, selection, reltab, hobsdescrout, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hobsdescr + integer(CTA_HANDLE_IKIND) , intent(in ) :: selection + integer(CTA_HANDLE_IKIND) , intent(out ) :: reltab + integer(CTA_HANDLE_IKIND) , intent(out ) :: hobsdescrout + integer , intent(out ) :: status + end subroutine CTA_ObsDescr_CreateSel + end interface + + ! \brief Create a new observation description that is subset of existing observation description. + ! All observations in the interval (t1,t2] (note t1 is not part + ! of the interval!) of the time span are selected. + ! + ! \param hobsdescr I the observation description to create a subset + ! from + ! \param timespan I time span over which selection has to be made + ! \param reltab O Relation table specifying the relation between + ! the original and new observation description + ! component. Note no relation table is created when + ! reltab==CTA_NULL on enty + ! \param hobsdescrout O the new COSTA-stochastic observer, empty before + ! calling, caller responsible for freeing after use + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_ObsDescr_CreateTimSel + subroutine CTA_ObsDescr_CreateTimSel( hobsdescr, timespan, reltab, hobsdescrout, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hobsdescr + integer(CTA_HANDLE_IKIND) , intent(in ) :: timespan + integer(CTA_HANDLE_IKIND) , intent(out ) :: reltab + integer(CTA_HANDLE_IKIND) , intent(out ) :: hobsdescrout + integer , intent(out ) :: status + end subroutine CTA_ObsDescr_CreateTimSel + end interface + + ! \brief Get properties/values that correspond to a given key. + ! + ! \param hobsdscr I handle of observation description + ! \param Key I key for which the value is asked + ! \param Properties IO COSTA-vector that is to receive the values + ! \param datatype I data type of elements in properties vector, must be the same as of queried properties + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_ObsDescr_Get_ValueProperties + subroutine CTA_ObsDescr_Get_ValueProperties( hobsdscr, Key, Properties, datatype, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hobsdscr + character(len=*) , intent(in ) :: Key + integer(CTA_HANDLE_IKIND) , intent(inout) :: Properties + integer , intent(in ) :: datatype + integer , intent(out ) :: status + end subroutine CTA_ObsDescr_Get_ValueProperties + end interface + + ! \brief Get all keys names. + ! + ! \param hobsdscr I handle of observation description + ! \param Keys O receives all keys (COSTA-string vector); must exist before calling and be large enough + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_ObsDescr_Get_PropertyKeys + subroutine CTA_ObsDescr_Get_PropertyKeys( hobsdscr, Keys, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hobsdscr + integer(CTA_HANDLE_IKIND) , intent(out ) :: Keys + integer , intent(out ) :: status + end subroutine CTA_ObsDescr_Get_PropertyKeys + end interface + + ! \brief Get number of properties/keys. + ! + ! \param hobsdscr I handle of observation description + ! \param nkeys O receives number of properties/keys + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_ObsDescr_Property_Count + subroutine CTA_ObsDescr_Property_Count( hobsdscr, nkeys, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hobsdscr + integer , intent(out ) :: nkeys + integer , intent(out ) :: status + end subroutine CTA_ObsDescr_Property_Count + end interface + + ! \brief Get number of observations. + ! + ! \param hobsdscr I handle of observation description + ! \param nobs O receives the number of observations + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_ObsDescr_Observation_Count + subroutine CTA_ObsDescr_Observation_Count( hobsdscr, nobs, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hobsdscr + integer , intent(out ) :: nobs + integer , intent(out ) :: status + end subroutine CTA_ObsDescr_Observation_Count + end interface + + ! \brief Export observation description. + ! + ! The default observation description CTA_DEFAULT_OBSDESC supports exporting to:\n + ! TODO + ! + ! \param hdescr I handle of observation description + ! \param usrdat IO export configuration/medium + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_ObsDescr_Export + subroutine CTA_ObsDescr_Export( hdescr, usrdat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hdescr + integer(CTA_HANDLE_IKIND) , intent(inout) :: usrdat + integer , intent(out ) :: status + end subroutine CTA_ObsDescr_Export + end interface + + ! \brief Free observation description object. + ! + ! \param hobsdscr IO handle of observation description, replaced by CTA_NULL on return. + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_ObsDescr_Free + subroutine CTA_ObsDescr_Free( hobsdscr, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: hobsdscr + integer , intent(out ) :: status + end subroutine CTA_ObsDescr_Free + end interface + + +end module cta_f90_obsdescr + diff --git a/costa/native/cta_f90/generated/cta_f90_pack.f90 b/costa/native/cta_f90/generated/cta_f90_pack.f90 new file mode 100644 index 000000000..f357c1376 --- /dev/null +++ b/costa/native/cta_f90/generated/cta_f90_pack.f90 @@ -0,0 +1,604 @@ +module cta_f90_pack + + implicit none + + public + + ! \brief Create a pack instance. + ! + ! \param initsize I the initial size >=0 of the buffer + ! \param hpack O receives handle of new pack object + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Pack_Create + subroutine CTA_Pack_Create( initsize, hpack, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer , intent(in ) :: initsize + integer(CTA_HANDLE_IKIND) , intent(out ) :: hpack + integer , intent(out ) :: status + end subroutine CTA_Pack_Create + end interface + + ! \brief Free a pack instance. + ! + ! \param hpack IO handle of pack object, replaced by CTA_NULL on return + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Pack_Free + subroutine CTA_Pack_Free( hpack, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + integer , intent(out ) :: status + end subroutine CTA_Pack_Free + end interface + + ! \brief Add data to pack object. + ! + ! \param hpack IO handle of pack object + ! \param data I data that must be packed + ! \param lendat I size of the data to be packed (chars) + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Pack_Add + module procedure CTA_Pack_Add_integer_1d + module procedure CTA_Pack_Add_integer_2d + module procedure CTA_Pack_Add_integer_3d + module procedure CTA_Pack_Add_integer_4d + module procedure CTA_Pack_Add_integer_5d + module procedure CTA_Pack_Add_integer_6d + module procedure CTA_Pack_Add_integer_7d + module procedure CTA_Pack_Add_real4_1d + module procedure CTA_Pack_Add_real4_2d + module procedure CTA_Pack_Add_real4_3d + module procedure CTA_Pack_Add_real4_4d + module procedure CTA_Pack_Add_real4_5d + module procedure CTA_Pack_Add_real4_6d + module procedure CTA_Pack_Add_real4_7d + module procedure CTA_Pack_Add_real8_1d + module procedure CTA_Pack_Add_real8_2d + module procedure CTA_Pack_Add_real8_3d + module procedure CTA_Pack_Add_real8_4d + module procedure CTA_Pack_Add_real8_5d + module procedure CTA_Pack_Add_real8_6d + module procedure CTA_Pack_Add_real8_7d + end interface + + ! \brief Unpack (get) data from pack object. + ! + ! \param hpack IO handle of pack object + ! \param data O buffer that receives data that is unpacked from pack-buffer (buffer length must be >= lendat) + ! \param lendat I size of the data to be unpacked (chars) + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Pack_Get + module procedure CTA_Pack_Get_integer_1d + module procedure CTA_Pack_Get_integer_2d + module procedure CTA_Pack_Get_integer_3d + module procedure CTA_Pack_Get_integer_4d + module procedure CTA_Pack_Get_integer_5d + module procedure CTA_Pack_Get_integer_6d + module procedure CTA_Pack_Get_integer_7d + module procedure CTA_Pack_Get_real4_1d + module procedure CTA_Pack_Get_real4_2d + module procedure CTA_Pack_Get_real4_3d + module procedure CTA_Pack_Get_real4_4d + module procedure CTA_Pack_Get_real4_5d + module procedure CTA_Pack_Get_real4_6d + module procedure CTA_Pack_Get_real4_7d + module procedure CTA_Pack_Get_real8_1d + module procedure CTA_Pack_Get_real8_2d + module procedure CTA_Pack_Get_real8_3d + module procedure CTA_Pack_Get_real8_4d + module procedure CTA_Pack_Get_real8_5d + module procedure CTA_Pack_Get_real8_6d + module procedure CTA_Pack_Get_real8_7d + end interface + + ! \brief Get length of packed data in pack-buffer. + ! + ! \param hpack I handle of pack object + ! + ! \param status O length packed data + ! + interface CTA_F90_Pack_GetLen + subroutine CTA_Pack_GetLen( hpack, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hpack + integer , intent(out ) :: status + end subroutine CTA_Pack_GetLen + end interface + + ! \brief Only update administration for added elements + ! + ! This function can be used to update the administration after the + ! pack-buffer is filled externally (e.g. using an mpi_recv) + ! + ! \param hpack I handle of pack object + ! \param lendat I number of added elements (chars) + ! + ! \param status O length packed data + ! + interface CTA_F90_Pack_AddCnt + subroutine CTA_Pack_AddCnt( hpack, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hpack + integer , intent(in ) :: lendat + integer , intent(out ) :: status + end subroutine CTA_Pack_AddCnt + end interface + + ! \brief Get the internal pack and unpack pointers + ! + ! This function can be used to save to pointers and + ! reset the state of the pack component after unpacking or adding + ! some data + ! + ! \param hpack I handle of pack object + ! \param ip1 O unpack pointer + ! \param ip2 O pack pointer + ! + ! \param status O length packed data + ! + interface CTA_F90_Pack_GetIndx + subroutine CTA_Pack_GetIndx( hpack, ip1, ip2, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hpack + integer , intent(out ) :: ip1 + integer , intent(out ) :: ip2 + integer , intent(out ) :: status + end subroutine CTA_Pack_GetIndx + end interface + + ! \brief Set the internal pack and unpack pointers + ! + ! This function can be used to restore the pointers and + ! reset the state of the pack component after unpacking or adding + ! some data + ! + ! \param hpack I handle of pack object + ! \param ip1 I unpack pointer. In order to reset all unpackin + ! set to CTA_PACK_RESET + ! \param ip2 I pack pointer. In order to reset the whole pack object + ! set to CTA_PACK_RESET + ! + ! \param status O length packed data + ! + interface CTA_F90_Pack_SetIndx + subroutine CTA_Pack_SetIndx( hpack, ip1, ip2, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hpack + integer , intent(in ) :: ip1 + integer , intent(in ) :: ip2 + integer , intent(out ) :: status + end subroutine CTA_Pack_SetIndx + end interface + + +contains + + subroutine CTA_Pack_Add_integer_1d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + integer , intent(in ) :: data(:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Add( hpack, data, lendat, status ) + end subroutine CTA_Pack_Add_integer_1d + + subroutine CTA_Pack_Add_integer_2d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + integer , intent(in ) :: data(:,:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Add( hpack, data, lendat, status ) + end subroutine CTA_Pack_Add_integer_2d + + subroutine CTA_Pack_Add_integer_3d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + integer , intent(in ) :: data(:,:,:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Add( hpack, data, lendat, status ) + end subroutine CTA_Pack_Add_integer_3d + + subroutine CTA_Pack_Add_integer_4d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + integer , intent(in ) :: data(:,:,:,:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Add( hpack, data, lendat, status ) + end subroutine CTA_Pack_Add_integer_4d + + subroutine CTA_Pack_Add_integer_5d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + integer , intent(in ) :: data(:,:,:,:,:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Add( hpack, data, lendat, status ) + end subroutine CTA_Pack_Add_integer_5d + + subroutine CTA_Pack_Add_integer_6d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + integer , intent(in ) :: data(:,:,:,:,:,:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Add( hpack, data, lendat, status ) + end subroutine CTA_Pack_Add_integer_6d + + subroutine CTA_Pack_Add_integer_7d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + integer , intent(in ) :: data(:,:,:,:,:,:,:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Add( hpack, data, lendat, status ) + end subroutine CTA_Pack_Add_integer_7d + + subroutine CTA_Pack_Add_real4_1d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + real(4) , intent(in ) :: data(:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Add( hpack, data, lendat, status ) + end subroutine CTA_Pack_Add_real4_1d + + subroutine CTA_Pack_Add_real4_2d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + real(4) , intent(in ) :: data(:,:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Add( hpack, data, lendat, status ) + end subroutine CTA_Pack_Add_real4_2d + + subroutine CTA_Pack_Add_real4_3d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + real(4) , intent(in ) :: data(:,:,:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Add( hpack, data, lendat, status ) + end subroutine CTA_Pack_Add_real4_3d + + subroutine CTA_Pack_Add_real4_4d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + real(4) , intent(in ) :: data(:,:,:,:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Add( hpack, data, lendat, status ) + end subroutine CTA_Pack_Add_real4_4d + + subroutine CTA_Pack_Add_real4_5d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + real(4) , intent(in ) :: data(:,:,:,:,:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Add( hpack, data, lendat, status ) + end subroutine CTA_Pack_Add_real4_5d + + subroutine CTA_Pack_Add_real4_6d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + real(4) , intent(in ) :: data(:,:,:,:,:,:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Add( hpack, data, lendat, status ) + end subroutine CTA_Pack_Add_real4_6d + + subroutine CTA_Pack_Add_real4_7d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + real(4) , intent(in ) :: data(:,:,:,:,:,:,:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Add( hpack, data, lendat, status ) + end subroutine CTA_Pack_Add_real4_7d + + subroutine CTA_Pack_Add_real8_1d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + real(8) , intent(in ) :: data(:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Add( hpack, data, lendat, status ) + end subroutine CTA_Pack_Add_real8_1d + + subroutine CTA_Pack_Add_real8_2d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + real(8) , intent(in ) :: data(:,:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Add( hpack, data, lendat, status ) + end subroutine CTA_Pack_Add_real8_2d + + subroutine CTA_Pack_Add_real8_3d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + real(8) , intent(in ) :: data(:,:,:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Add( hpack, data, lendat, status ) + end subroutine CTA_Pack_Add_real8_3d + + subroutine CTA_Pack_Add_real8_4d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + real(8) , intent(in ) :: data(:,:,:,:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Add( hpack, data, lendat, status ) + end subroutine CTA_Pack_Add_real8_4d + + subroutine CTA_Pack_Add_real8_5d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + real(8) , intent(in ) :: data(:,:,:,:,:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Add( hpack, data, lendat, status ) + end subroutine CTA_Pack_Add_real8_5d + + subroutine CTA_Pack_Add_real8_6d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + real(8) , intent(in ) :: data(:,:,:,:,:,:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Add( hpack, data, lendat, status ) + end subroutine CTA_Pack_Add_real8_6d + + subroutine CTA_Pack_Add_real8_7d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + real(8) , intent(in ) :: data(:,:,:,:,:,:,:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Add( hpack, data, lendat, status ) + end subroutine CTA_Pack_Add_real8_7d + + subroutine CTA_Pack_Get_integer_1d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + integer , intent(out ) :: data(:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Get( hpack, data, lendat, status ) + end subroutine CTA_Pack_Get_integer_1d + + subroutine CTA_Pack_Get_integer_2d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + integer , intent(out ) :: data(:,:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Get( hpack, data, lendat, status ) + end subroutine CTA_Pack_Get_integer_2d + + subroutine CTA_Pack_Get_integer_3d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + integer , intent(out ) :: data(:,:,:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Get( hpack, data, lendat, status ) + end subroutine CTA_Pack_Get_integer_3d + + subroutine CTA_Pack_Get_integer_4d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + integer , intent(out ) :: data(:,:,:,:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Get( hpack, data, lendat, status ) + end subroutine CTA_Pack_Get_integer_4d + + subroutine CTA_Pack_Get_integer_5d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + integer , intent(out ) :: data(:,:,:,:,:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Get( hpack, data, lendat, status ) + end subroutine CTA_Pack_Get_integer_5d + + subroutine CTA_Pack_Get_integer_6d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + integer , intent(out ) :: data(:,:,:,:,:,:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Get( hpack, data, lendat, status ) + end subroutine CTA_Pack_Get_integer_6d + + subroutine CTA_Pack_Get_integer_7d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + integer , intent(out ) :: data(:,:,:,:,:,:,:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Get( hpack, data, lendat, status ) + end subroutine CTA_Pack_Get_integer_7d + + subroutine CTA_Pack_Get_real4_1d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + real(4) , intent(out ) :: data(:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Get( hpack, data, lendat, status ) + end subroutine CTA_Pack_Get_real4_1d + + subroutine CTA_Pack_Get_real4_2d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + real(4) , intent(out ) :: data(:,:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Get( hpack, data, lendat, status ) + end subroutine CTA_Pack_Get_real4_2d + + subroutine CTA_Pack_Get_real4_3d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + real(4) , intent(out ) :: data(:,:,:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Get( hpack, data, lendat, status ) + end subroutine CTA_Pack_Get_real4_3d + + subroutine CTA_Pack_Get_real4_4d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + real(4) , intent(out ) :: data(:,:,:,:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Get( hpack, data, lendat, status ) + end subroutine CTA_Pack_Get_real4_4d + + subroutine CTA_Pack_Get_real4_5d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + real(4) , intent(out ) :: data(:,:,:,:,:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Get( hpack, data, lendat, status ) + end subroutine CTA_Pack_Get_real4_5d + + subroutine CTA_Pack_Get_real4_6d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + real(4) , intent(out ) :: data(:,:,:,:,:,:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Get( hpack, data, lendat, status ) + end subroutine CTA_Pack_Get_real4_6d + + subroutine CTA_Pack_Get_real4_7d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + real(4) , intent(out ) :: data(:,:,:,:,:,:,:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Get( hpack, data, lendat, status ) + end subroutine CTA_Pack_Get_real4_7d + + subroutine CTA_Pack_Get_real8_1d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + real(8) , intent(out ) :: data(:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Get( hpack, data, lendat, status ) + end subroutine CTA_Pack_Get_real8_1d + + subroutine CTA_Pack_Get_real8_2d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + real(8) , intent(out ) :: data(:,:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Get( hpack, data, lendat, status ) + end subroutine CTA_Pack_Get_real8_2d + + subroutine CTA_Pack_Get_real8_3d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + real(8) , intent(out ) :: data(:,:,:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Get( hpack, data, lendat, status ) + end subroutine CTA_Pack_Get_real8_3d + + subroutine CTA_Pack_Get_real8_4d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + real(8) , intent(out ) :: data(:,:,:,:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Get( hpack, data, lendat, status ) + end subroutine CTA_Pack_Get_real8_4d + + subroutine CTA_Pack_Get_real8_5d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + real(8) , intent(out ) :: data(:,:,:,:,:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Get( hpack, data, lendat, status ) + end subroutine CTA_Pack_Get_real8_5d + + subroutine CTA_Pack_Get_real8_6d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + real(8) , intent(out ) :: data(:,:,:,:,:,:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Get( hpack, data, lendat, status ) + end subroutine CTA_Pack_Get_real8_6d + + subroutine CTA_Pack_Get_real8_7d( hpack, data, lendat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(inout) :: hpack + real(8) , intent(out ) :: data(:,:,:,:,:,:,:) + integer , intent(in ) :: lendat + integer , intent(out ) :: status + call CTA_Pack_Get( hpack, data, lendat, status ) + end subroutine CTA_Pack_Get_real8_7d + +end module cta_f90_pack + diff --git a/costa/native/cta_f90/generated/cta_f90_par.f90 b/costa/native/cta_f90/generated/cta_f90_par.f90 new file mode 100644 index 000000000..85d8ba9fb --- /dev/null +++ b/costa/native/cta_f90/generated/cta_f90_par.f90 @@ -0,0 +1,62 @@ +module cta_f90_par + + implicit none + + public + + ! \brief Initialises parallel environment for a process that spawned + ! The executable is spawned using MPI_COMM_SPAWN or MPI_COMM_SPAWN_MULTIPLE + ! + ! It will set up the communication groups and optionally starts the parallel + ! model builder + ! + ! \param StartPar I CTA_TRUE/CTA_FALSE start parallel model builder + ! + ! Note when a worker process is part of a Master-Worker model and it does + ! not implement the COSTA model interface it should not start the parallel model builder + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Par_WorkerSpawn + subroutine CTA_Par_WorkerSpawn( StartPar, status ) + integer , intent(in ) :: StartPar + integer , intent(out ) :: status + end subroutine CTA_Par_WorkerSpawn + end interface + + ! \brief Initialises parallel environment and create process groups + ! + ! \param parConfig I configuration input from XML-file + ! \param StartPar I CTA_TRUE/CTA_FALSE start parallel model builder + ! + ! Note when a worker process is part of a Master-Worker model and it does + ! not implement the COSTA model interface it should not start the parallel model builder + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Par_CreateGroups + subroutine CTA_Par_CreateGroups( parConfig, StartPar, status ) + integer , intent(in ) :: parConfig + integer , intent(in ) :: StartPar + integer , intent(out ) :: status + end subroutine CTA_Par_CreateGroups + end interface + + ! \brief Get the global number of COSTA process group and index of this process + ! + ! \param itime O The index of this process in the group + ! \param iGroup O The group number of this process belongs to (1..nGroups) + ! + ! \note If this function is called by the COSTA master process or in a sequential run it will return 0 for iGroup + ! + ! + interface CTA_F90_Par_GetGroupInfo + subroutine CTA_Par_GetGroupInfo( iGroup, itime ) + integer , intent(out ) :: iGroup + integer , intent(out ) :: itime + end subroutine CTA_Par_GetGroupInfo + end interface + + +end module cta_f90_par + diff --git a/costa/native/cta_f90/generated/cta_f90_parameters.f90 b/costa/native/cta_f90/generated/cta_f90_parameters.f90 new file mode 100644 index 000000000..d247090f0 --- /dev/null +++ b/costa/native/cta_f90/generated/cta_f90_parameters.f90 @@ -0,0 +1,25 @@ +module CTA_F90_Parameters + + implicit none + + + ! --- in/out ----------------------------------- + + public + + + ! --- const ------------------------------------ + + include 'cta_f90.inc' + + ! integer kind used for handles: + integer, parameter :: CTA_HANDLE_IKIND = 4 + + ! Do not use the 'sizeof' function! Not standard ... + !integer, parameter :: CTA_HANDLE_IKIND = sizeof(CTA_HANDLE) + + ! real kind used for time values: + integer, parameter :: CTA_TIME_RKIND = 8 + + +end module CTA_F90_Parameters diff --git a/costa/native/cta_f90/generated/cta_f90_reltable.f90 b/costa/native/cta_f90/generated/cta_f90_reltable.f90 new file mode 100644 index 000000000..9215143a2 --- /dev/null +++ b/costa/native/cta_f90/generated/cta_f90_reltable.f90 @@ -0,0 +1,157 @@ +module cta_f90_reltable + + implicit none + + public + + ! \brief Create a relation table + ! + ! \param hreltable O created relation table + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_RelTable_Create + subroutine CTA_RelTable_Create( hreltable, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(out ) :: hreltable + integer , intent(out ) :: status + end subroutine CTA_RelTable_Create + end interface + + ! \brief Free a relation table object. + ! + ! \param hreltable IO relation table to be freed, + ! value is set to CTA_NULL on return. + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_RelTable_Free + subroutine CTA_RelTable_Free( hreltable, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: hreltable + integer , intent(out ) :: status + end subroutine CTA_RelTable_Free + end interface + + ! \brief Copy elements according to relation table + ! + ! \note we currently only support copying of elements + ! between two vector instances. Other types of + ! COSTA object will be supported when needed + ! in later versions + ! + ! \param hreltable I handle of relation table + ! \param hfrom I Origin object to copy data from + ! \param hto I Target object to copy data to + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_RelTable_Apply + subroutine CTA_RelTable_Apply( hreltable, hfrom, hto, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hreltable + integer(CTA_HANDLE_IKIND) , intent(in ) :: hfrom + integer(CTA_HANDLE_IKIND) , intent(in ) :: hto + integer , intent(out ) :: status + end subroutine CTA_RelTable_Apply + end interface + + ! \brief Copy elements according to inverse of relation table + ! + ! \note we currently only support copying of elements + ! between two vector instances. Other types of + ! COSTA object will be supported when needed + ! in later versions + ! + ! \param hreltable I handle of relation table + ! \param hfrom I Origin object to copy data from + ! \param hto I Target object to copy data to + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_RelTable_ApplyInv + subroutine CTA_RelTable_ApplyInv( hreltable, hfrom, hto, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hreltable + integer(CTA_HANDLE_IKIND) , intent(in ) :: hfrom + integer(CTA_HANDLE_IKIND) , intent(in ) :: hto + integer , intent(out ) :: status + end subroutine CTA_RelTable_ApplyInv + end interface + + ! \brief Set a relation table + ! A Set a relation table that defines a selection of elements + ! + ! \param hreltable O relation table that is set + ! \param vselect I (integer) vector with indices of elements + ! from the target set that are selected. + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_RelTable_SetSelect + subroutine CTA_RelTable_SetSelect( hreltable, vselect, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(out ) :: hreltable + integer(CTA_HANDLE_IKIND) , intent(in ) :: vselect + integer , intent(out ) :: status + end subroutine CTA_RelTable_SetSelect + end interface + + ! \brief Get the number of elements that are copied when the table is applied + ! + ! + ! \param hreltable I relation table + ! \param nelt O number of elements that are copied + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_RelTable_Count + subroutine CTA_RelTable_Count( hreltable, nelt, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hreltable + integer , intent(out ) :: nelt + integer , intent(out ) :: status + end subroutine CTA_RelTable_Count + end interface + + ! \brief Set a relation table that is combination of two + ! relation tables. + ! + ! Set a relation table that is the combination of two exisiting relation + ! tables. It is possible to use the inverse of the relation tables when + ! needed + ! . + ! A usefull application of this method is to create a relation table that + ! defines a relation between a subset of elements from set1 and a subset of + ! the elements of set2. In order to set a relation table of this kind first + ! create two relation tables: + ! hrel1 elements from set 1 that have a relation with the elements from set 2, + ! hrel2 elements from set 2 that have a relation with the elements from set 1 + ! + ! The combined relation table of hrel1 and inverse(hrel2) is a relation + ! table that spcifies the relation of a subset of elements from set1 and a + ! subset of elements from set2. + ! + ! \param hreltable O relation table that is set + ! \param hrel1 I first relation table + ! \param inverse1 I use inverse of hrel1 (CTA_TRUE/CTA_FALSE) + ! \param hrel2 I first relation table + ! \param inverse2 I use inverse of hrel2 (CTA_TRUE/CTA_FALSE) + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_RelTable_SetTableCombine + subroutine CTA_RelTable_SetTableCombine( hreltable, hrel1, inverse1, hrel2, inverse2, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(out ) :: hreltable + integer(CTA_HANDLE_IKIND) , intent(in ) :: hrel1 + integer , intent(in ) :: inverse1 + integer(CTA_HANDLE_IKIND) , intent(in ) :: hrel2 + integer , intent(in ) :: inverse2 + integer , intent(out ) :: status + end subroutine CTA_RelTable_SetTableCombine + end interface + + +end module cta_f90_reltable + diff --git a/costa/native/cta_f90/generated/cta_f90_resultwriter.f90 b/costa/native/cta_f90/generated/cta_f90_resultwriter.f90 new file mode 100644 index 000000000..3c2ab0055 --- /dev/null +++ b/costa/native/cta_f90/generated/cta_f90_resultwriter.f90 @@ -0,0 +1,91 @@ +module cta_f90_resultwriter + + implicit none + + public + + ! \brief Handle a string message send to the resultwriter + ! + ! \param idWriter I ID of this resultwriter (Counter of number of native result writers) + ! \param config I Name of XML configuration file containting the function pointers and additional information + ! \param workingDir I Full path to working directory + ! \param message I Message send to resultwriter + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Resultwriter_putmessage + subroutine CTA_Resultwriter_putmessage( idWriter, config, workingDir, message, status ) + integer , intent(in ) :: idWriter + character(len=*) , intent(in ) :: config(*) + character(len=*) , intent(in ) :: workingDir(*) + character(len=*) , intent(in ) :: message(*) + integer , intent(out ) :: status + end subroutine CTA_Resultwriter_putmessage + end interface + + ! \brief Handle a string message send to the resultwriter + ! + ! \param idWriter I ID of this resultwriter (Counter of number of native result writers) + ! \param config I Name of XML configuration file containting the function pointers and additional information + ! \param workingDir I Full path to working directory + ! \param id I Name of the variable/array send to the resultwriter + ! \param handle I Handle (Vector or TreeVector) of variable + ! \param outputLevel I Selected output level (see opendabridge for possible values) + ! \param context I Location from which the resultwriter was called + ! \param iteration I Iteration number from which the resultwriter was called + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Resultwriter_putvalue + subroutine CTA_Resultwriter_putvalue( idWriter, config, workingDir, id, handle, outputLevel, context, iteration, status ) + integer , intent(in ) :: idWriter + character(len=*) , intent(in ) :: config(*) + character(len=*) , intent(in ) :: workingDir(*) + character(len=*) , intent(in ) :: id(*) + integer , intent(in ) :: handle + integer , intent(in ) :: outputLevel + character(len=*) , intent(in ) :: context(*) + integer , intent(in ) :: iteration + integer , intent(out ) :: status + end subroutine CTA_Resultwriter_putvalue + end interface + + ! \brief Handle a string message send to the resultwriter + ! + ! \param idWriter I ID of this resultwriter (Counter of number of native result writers) + ! \param config I Name of XML configuration file containting the function pointers and additional information + ! \param workingDir I Full path to working directory + ! \param iteration I Iteration number from which the resultwriter was called + ! \param cost I Value of cost function + ! \param handle I Handle (Vector or TreeVector) of the current parameters + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Resultwriter_putiterationreport + subroutine CTA_Resultwriter_putiterationreport( idWriter, config, workingDir, iteration, cost, handle, status ) + integer , intent(in ) :: idWriter + character(len=*) , intent(in ) :: config(*) + character(len=*) , intent(in ) :: workingDir(*) + integer , intent(in ) :: iteration + real(8) , intent(in ) :: cost + integer , intent(in ) :: handle + integer , intent(out ) :: status + end subroutine CTA_Resultwriter_putiterationreport + end interface + + ! \brief Free a resultwriter (close output files etc). + ! + ! \param idWriter I ID of this resultwriter (Counter of number of native result writers) + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Resultwriter_free + subroutine CTA_Resultwriter_free( idWriter, status ) + integer , intent(in ) :: idWriter + integer , intent(out ) :: status + end subroutine CTA_Resultwriter_free + end interface + + +end module cta_f90_resultwriter + diff --git a/costa/native/cta_f90/generated/cta_f90_sobs.f90 b/costa/native/cta_f90/generated/cta_f90_sobs.f90 new file mode 100644 index 000000000..36f9efb39 --- /dev/null +++ b/costa/native/cta_f90/generated/cta_f90_sobs.f90 @@ -0,0 +1,271 @@ +module cta_f90_sobs + + implicit none + + public + + ! \brief Create a new class (=implementation) of a COSTA stochastic observer component. + ! + ! \param name I name of the new stochastic observer class + ! \param h_func I COSTA function handles for functions that implement class, + ! missing functions must have value CTA_NULL + ! \param descrcl I class of the observation description that is created by stochastic observer + ! \param hstochobscl O handle of new stochastic observer class + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_SObs_DefineClass + subroutine CTA_SObs_DefineClass( name, h_func, descrcl, hstochobscl, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + character(len=*) , intent(in ) :: name + integer(CTA_HANDLE_IKIND) , intent(in ) :: h_func(*) + integer(CTA_HANDLE_IKIND) , intent(in ) :: descrcl + integer(CTA_HANDLE_IKIND) , intent(out ) :: hstochobscl + integer , intent(out ) :: status + end subroutine CTA_SObs_DefineClass + end interface + + ! \brief Create an instance of a stocastic observer + ! + ! \param hstochobscl I stochastic observer class of new stochastic observer + ! \param userdata IO userdata for creation (depends on class) + ! \param hstochobs O receives handle of new stochastic observer object + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_SObs_Create + subroutine CTA_SObs_Create( hstochobscl, userdata, hstochobs, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hstochobscl + integer(CTA_HANDLE_IKIND) , intent(inout) :: userdata + integer(CTA_HANDLE_IKIND) , intent(out ) :: hstochobs + integer , intent(out ) :: status + end subroutine CTA_SObs_Create + end interface + + ! \brief Create a new stochastic observer that is subset of existing stochastic observer. + ! + ! \param hsobsin I handle of the existing stochastic observer of + ! which a selection is to be made + ! \param userdata IO inputs necessary for making a selection (depends on user implementation) + ! \param hsobsout O receives handle of the new COSTA-stochastic observer, empty before calling, caller responsible for freeing after use + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_SObs_CreateSel + subroutine CTA_SObs_CreateSel( hsobsin, userdata, hsobsout, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hsobsin + integer(CTA_HANDLE_IKIND) , intent(inout) :: userdata + integer(CTA_HANDLE_IKIND) , intent(out ) :: hsobsout + integer , intent(out ) :: status + end subroutine CTA_SObs_CreateSel + end interface + + ! \brief Create a new stoch observer that is subset in time of existing stochastic observer. + ! + ! All observations in the closed interval [t1,t2] of the time span are selected. + ! + ! \param hsobsin I handle of the stochastic observer of + ! which a selection is to be made + ! \param timespan I time span over which selection has to be made + ! \param hsobsout O receives handle of the new COSTA-stochastic observer, empty before calling + ! \param status O error states: CTA_OK if successful + ! + interface CTA_F90_SObs_CreateTimSel + subroutine CTA_SObs_CreateTimSel( hsobsin, timespan, hsobsout, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hsobsin + integer(CTA_HANDLE_IKIND) , intent(in ) :: timespan + integer(CTA_HANDLE_IKIND) , intent(out ) :: hsobsout + integer , intent(out ) :: status + end subroutine CTA_SObs_CreateTimSel + end interface + + ! \brief Count the number of elements in stochastic observer. + ! \param hsobs I handle of the stochastic observer + ! \param nmeasr O receives number of measurements in this observer + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_SObs_Count + subroutine CTA_SObs_Count( hsobs, nmeasr, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hsobs + integer , intent(out ) :: nmeasr + integer , intent(out ) :: status + end subroutine CTA_SObs_Count + end interface + + ! \brief Get a vector with the measurements. + ! + ! \param hsobs I handle of the stochastic observer + ! \param hvec IO handle of vector that receives the measurements; must exist before calling + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_SObs_GetVal + subroutine CTA_SObs_GetVal( hsobs, hvec, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hsobs + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec + integer , intent(out ) :: status + end subroutine CTA_SObs_GetVal + end interface + + ! \brief Count the times associated to the measurements. + ! + ! \param hsobs I handle of the stochastic observer + ! \param hvec IO handle to vector that receives the times; must exist before calling + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_SObs_GetTimes + subroutine CTA_SObs_GetTimes( hsobs, hvec, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hsobs + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec + integer , intent(out ) :: status + end subroutine CTA_SObs_GetTimes + end interface + + ! \brief Draw random values (measurements) according to the probability density + ! function of the mesurements. + ! + ! \param hsobs I handle of the stochastic observer + ! \param hvec IO handle of vector that receives the draw (measurements); must exist before calling + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_SObs_GetRealisation + subroutine CTA_SObs_GetRealisation( hsobs, hvec, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hsobs + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec + integer , intent(out ) :: status + end subroutine CTA_SObs_GetRealisation + end interface + + ! \brief Get expectation of the probability density function of the mesurements. + ! + ! \param hsobs I handle of the stochastic observer + ! \param hvec IO handle of vector that receives the expectation values; must exist before calling + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_SObs_GetExpectation + subroutine CTA_SObs_GetExpectation( hsobs, hvec, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hsobs + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec + integer , intent(out ) :: status + end subroutine CTA_SObs_GetExpectation + end interface + + ! \brief Get the value of the probability density function of the mesurements at given location. + ! + ! \param hsobs I handle of the stochastic observer + ! \param hvecx I handle of vector with location for evaluating pdf + ! \param hvecy IO handle of vector that is to contain the pdf-value; must exist before calling + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_SObs_EvalPDF + subroutine CTA_SObs_EvalPDF( hsobs, hvecx, hvecy, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hsobs + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvecx + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvecy + integer , intent(out ) :: status + end subroutine CTA_SObs_EvalPDF + end interface + + ! \brief Get covariance matrix of probability density function of the measurements. + ! + ! \param hsobs I handle of the stochastic observer + ! \param hmat IO handle of matrix that receives the covariance matrix; must exist before calling + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_SObs_GetCovMat + subroutine CTA_SObs_GetCovMat( hsobs, hmat, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hsobs + integer(CTA_HANDLE_IKIND) , intent(inout) :: hmat + integer , intent(out ) :: status + end subroutine CTA_SObs_GetCovMat + end interface + + ! \brief Get variance of probability density function of the mesurements. + ! + ! \param hsobs I handle of the stochastic observer + ! \param hvec IO handle of vector that receives the variance; must exist before calling + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_SObs_GetVar + subroutine CTA_SObs_GetVar( hsobs, hvec, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hsobs + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec + integer , intent(out ) :: status + end subroutine CTA_SObs_GetVar + end interface + + ! \brief Get standard deviation of probability density function of the measurements. + ! + ! \param hsobs I handle of the stochastic observer + ! \param hvec IO handle of vector that is to contain the standard deviation + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_SObs_GetStd + subroutine CTA_SObs_GetStd( hsobs, hvec, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hsobs + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec + integer , intent(out ) :: status + end subroutine CTA_SObs_GetStd + end interface + + ! \brief Create the observation description corresponding to the stochastic observer. + ! + ! \note Caller is responsible for freeing the here created observation description + ! + ! \param hsobs I handle of the stochastic observer + ! \param hobsdescr O receives handle of newly created observation description class, empty before calling + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_SObs_GetDescription + subroutine CTA_SObs_GetDescription( hsobs, hobsdescr, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hsobs + integer(CTA_HANDLE_IKIND) , intent(out ) :: hobsdescr + integer , intent(out ) :: status + end subroutine CTA_SObs_GetDescription + end interface + + ! \brief Export the stochastic observer. + ! + ! \note Supported by CTA_DEFAULT_SOBS:\n + ! output to file (userdata must contain handle of COSTA file)\n + ! + ! \param hsobs I handle of the stochastic observer + ! \param userdata I configuration of output + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_SObs_Export + subroutine CTA_SObs_Export( hsobs, userdata, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hsobs + integer(CTA_HANDLE_IKIND) , intent(in ) :: userdata + integer , intent(out ) :: status + end subroutine CTA_SObs_Export + end interface + + ! \brief Free the stochastic observer + ! + ! \Note hsobs=CTA_NULL is allowed + ! + ! \param hsobs IO handle of the stochastic observer, replaced by CTA_NULL on return + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_SObs_Free + subroutine CTA_SObs_Free( hsobs, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: hsobs + integer , intent(out ) :: status + end subroutine CTA_SObs_Free + end interface + + +end module cta_f90_sobs + diff --git a/costa/native/cta_f90/generated/cta_f90_string.f90 b/costa/native/cta_f90/generated/cta_f90_string.f90 new file mode 100644 index 000000000..3caf7a1be --- /dev/null +++ b/costa/native/cta_f90/generated/cta_f90_string.f90 @@ -0,0 +1,229 @@ +module cta_f90_string + + implicit none + + public + + ! \brief Create a new COSTA string instance. + ! + ! \param hstring O handle of created string + ! \param status O CTA_OK if successful + ! + interface CTA_F90_String_Create + subroutine CTA_String_Create( hstring, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(out ) :: hstring + integer , intent(out ) :: status + end subroutine CTA_String_Create + end interface + + ! \brief Create a new COSTA string that is a copy of an existing one + ! + ! \param hto O receives the handle of the created string + ! \param hfrom I handle of string to copy + ! + ! \param status O CTA_OK if successful + ! + interface CTA_F90_String_Copy + subroutine CTA_String_Copy( hto, hfrom, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(out ) :: hto + integer(CTA_HANDLE_IKIND) , intent(in ) :: hfrom + integer , intent(out ) :: status + end subroutine CTA_String_Copy + end interface + + ! \brief Free the COSTA string instance. + ! + ! \note + ! + ! \param hstring IO handle of the string instance, replaced by CTA_NULL on return + ! \param status O CTA_OK if successful + ! + interface CTA_F90_String_Free + subroutine CTA_String_Free( hstring, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: hstring + integer , intent(out ) :: status + end subroutine CTA_String_Free + end interface + + ! \brief Get the number of characters in string. + ! + ! \note The returned length is the number of characters excluding the + ! 0-character. + ! + ! \param hstring I handle of the string + ! \param len O receives the number of characters in string + ! \param status O CTA_OK if successful + ! + interface CTA_F90_String_GetLength + subroutine CTA_String_GetLength( hstring, len, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hstring + integer , intent(out ) :: len + integer , intent(out ) :: status + end subroutine CTA_String_GetLength + end interface + + ! \brief Set the string to new content. + ! + ! \param hstring IO handle of the string + ! \param str I new content + ! \param status O CTA_OK if successful + ! + interface CTA_F90_String_Set + subroutine CTA_String_Set( hstring, str, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: hstring + character(len=*) , intent(in ) :: str + integer , intent(out ) :: status + end subroutine CTA_String_Set + end interface + + ! \brief Get a copy of the string. + ! + ! \note It is the responsibility of the caller making str large enough to + ! hold the string and trailing 0-character. + ! + ! \param hstring I handle of the string + ! \param str O buffer that receives a copy of the string including trailing 0-character + ! \param status O CTA_OK if successful + ! + interface CTA_F90_String_Get + subroutine CTA_String_Get( hstring, str, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hstring + character(len=*) , intent(out ) :: str + integer , intent(out ) :: status + end subroutine CTA_String_Get + end interface + + ! \brief Get the (scalar) value of a string + ! + ! \note It is the responsibility of the caller that parameter value is large enough to + ! hold the value as specified by the datatype. + ! + ! \param hstring I handle of the string + ! \param value O receives the value + ! \param datatype I data type of value + ! \param status O CTA_OK if successful + ! + !interface CTA_F90_String_GetValue + ! subroutine CTA_String_GetValue( hstring, value, datatype, status ) + ! use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + ! integer(CTA_HANDLE_IKIND) , intent(in ) :: hstring + ! void , intent(out ) :: value + ! integer , intent(in ) :: datatype + ! integer , intent(out ) :: status + ! end subroutine CTA_String_GetValue + !end interface + + ! \brief Create new string that is a concatination of existing strings. + ! + ! \param istring IO handle of the string (first string in concatination) + ! and whole concatinated string on return + ! \param xstring I handle of the second string (extension string) + ! \param status O CTA_OK if successful + ! + interface CTA_F90_String_Conc + subroutine CTA_String_Conc( istring, xstring, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: istring + integer(CTA_HANDLE_IKIND) , intent(in ) :: xstring + integer , intent(out ) :: status + end subroutine CTA_String_Conc + end interface + + ! \brief Imports string. + ! + ! Supports: pack objects (usrdata must be handle of pack object to import from) + ! + ! \param hstring IO handle of the string + ! \param usrdata I configuration of import + ! \param status O CTA_OK if successful + ! + ! \note Only CTA_Pack is currently supported fot usrdata + ! + interface CTA_F90_String_Import + subroutine CTA_String_Import( hstring, usrdata, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: hstring + integer(CTA_HANDLE_IKIND) , intent(in ) :: usrdata + integer , intent(out ) :: status + end subroutine CTA_String_Import + end interface + + ! \brief Exports length of string and string itself. + ! + ! Supports: pack objects (usrdata must be handle of pack object to export to) + ! + ! \param hstring I handle of the string + ! \param usrdata IO configuration of export + ! \param status O CTA_OK if successful + ! + ! \note Only CTA_Pack is currently supported fot usrdata + ! + interface CTA_F90_String_Export + subroutine CTA_String_Export( hstring, usrdata, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hstring + integer(CTA_HANDLE_IKIND) , intent(inout) :: usrdata + integer , intent(out ) :: status + end subroutine CTA_String_Export + end interface + + ! \brief Check whether string is equal to COSTA string. + ! + ! + ! \param hstring I handle of the string + ! \param str0 I string to compare hsting with + ! \param status O CTA_TRUE/CTA_FALSE + ! + ! \note Only CTA_Pack is currently supported fot usrdata + ! + interface CTA_F90_String_Equals_Char + subroutine CTA_String_Equals_Char( hstring, str0, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hstring + character(len=*) , intent(in ) :: str0 + integer , intent(out ) :: status + end subroutine CTA_String_Equals_Char + end interface + + ! \brief Check whether two COSTA strings are equal. + ! + ! + ! \param hstring1 I handle of first string + ! \param hstring2 I handle of second string + ! \param status O CTA_TRUE/CTA_FALSE + ! + ! \note Only CTA_Pack is currently supported fot usrdata + ! + interface CTA_F90_Strings_Equal + subroutine CTA_Strings_Equal( hstring1, hstring2, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hstring1 + integer(CTA_HANDLE_IKIND) , intent(in ) :: hstring2 + integer , intent(out ) :: status + end subroutine CTA_Strings_Equal + end interface + + ! \brief Create a duplication of a COSTA string + ! + ! \param hfrom I handle of string to copy + ! \param hto O handle of created string + ! \param status O CTA_OK if successful + ! + interface CTA_F90_String_Duplicate + subroutine CTA_String_Duplicate( hfrom, hto, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hfrom + integer(CTA_HANDLE_IKIND) , intent(out ) :: hto + integer , intent(out ) :: status + end subroutine CTA_String_Duplicate + end interface + + +end module cta_f90_string + diff --git a/costa/native/cta_f90/generated/cta_f90_time.f90 b/costa/native/cta_f90/generated/cta_f90_time.f90 new file mode 100644 index 000000000..65a0b690e --- /dev/null +++ b/costa/native/cta_f90/generated/cta_f90_time.f90 @@ -0,0 +1,249 @@ +module cta_f90_time + + implicit none + + public + + ! \brief Create a time object. + ! + ! \param htime O receives handle of newly created time object + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Time_Create + subroutine CTA_Time_Create( htime, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(out ) :: htime + integer , intent(out ) :: status + end subroutine CTA_Time_Create + end interface + + ! \brief Free a time object. + ! + ! \param htime IO handle of time object to be freed, replaced by CTA_NULL on return. + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Time_Free + subroutine CTA_Time_Free( htime, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: htime + integer , intent(out ) :: status + end subroutine CTA_Time_Free + end interface + + ! \brief Set the time span. + ! + ! \param htime IO time object of which to set time span + ! \param tstart I starting time + ! \param tend I ending time + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Time_SetSpan + subroutine CTA_Time_SetSpan( htime, tstart, tend, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_TIME_RKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: htime + real(CTA_TIME_RKIND) , intent(in ) :: tstart + real(CTA_TIME_RKIND) , intent(in ) :: tend + integer , intent(out ) :: status + end subroutine CTA_Time_SetSpan + end interface + + ! \brief Get the time span. + ! + ! \param htime I time object of which to get time span + ! \param tstart O receives the starting time + ! \param tend O receives ending time + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Time_GetSpan + subroutine CTA_Time_GetSpan( htime, tstart, tend, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_TIME_RKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: htime + real(CTA_TIME_RKIND) , intent(out ) :: tstart + real(CTA_TIME_RKIND) , intent(out ) :: tend + integer , intent(out ) :: status + end subroutine CTA_Time_GetSpan + end interface + + ! \brief Set the time step. + ! + ! \param htime IO time object of which to set time step + ! \param tstep I new time step + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Time_SetStep + subroutine CTA_Time_SetStep( htime, tstep, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_TIME_RKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: htime + real(CTA_TIME_RKIND) , intent(in ) :: tstep + integer , intent(out ) :: status + end subroutine CTA_Time_SetStep + end interface + + ! \brief Get time step. + ! + ! \param htime IO time object of which to get time step + ! \param tstep O receives time step + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Time_GetStep + subroutine CTA_Time_GetStep( htime, tstep, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_TIME_RKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: htime + real(CTA_TIME_RKIND) , intent(out ) :: tstep + integer , intent(out ) :: status + end subroutine CTA_Time_GetStep + end interface + + ! \brief Count number of timesteps in time + ! + ! \param htime I time object (see function description) + ! \param nsteps O number of timesteps + ! + ! \param status O error status: CTA_OK if successful + ! + ! \note number of steps is rounded to nearest integer + ! + interface CTA_F90_Time_CountSteps + subroutine CTA_Time_CountSteps( htime, nsteps, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: htime + integer , intent(out ) :: nsteps + integer , intent(out ) :: status + end subroutine CTA_Time_CountSteps + end interface + + ! \brief Get interval of i-th step + ! + ! \param htime I time object (see function description) + ! \param istep I interval of step + ! \param hstep O time step of model + ! + ! \param status O error status: CTA_OK if successful + ! + ! \note intervals are counted from 1 to nsteps + ! + interface CTA_F90_Time_GetTimeStep + subroutine CTA_Time_GetTimeStep( htime, istep, hstep, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: htime + integer , intent(in ) :: istep + integer(CTA_HANDLE_IKIND) , intent(out ) :: hstep + integer , intent(out ) :: status + end subroutine CTA_Time_GetTimeStep + end interface + + ! \brief Check whether htimesub is within time span of htime. + ! + ! \param htimesub I time object (see function description) + ! \param htime I time object (see function description) + ! \param inspan O receives TRUE if htimesub is within time span of htime or FALSE otherwise + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Time_InSpan + subroutine CTA_Time_InSpan( htimesub, htime, inspan, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: htimesub + integer(CTA_HANDLE_IKIND) , intent(in ) :: htime + integer , intent(out ) :: inspan + integer , intent(out ) :: status + end subroutine CTA_Time_InSpan + end interface + + ! \brief Check whether time step of time object equals t + ! + ! \param htime I time object + ! \param t I time step to compare + ! \param isstep O receives TRUE if t equals time step of time object or FALSE otherwise + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Time_IsStep + subroutine CTA_Time_IsStep( htime, t, isstep, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_TIME_RKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: htime + real(CTA_TIME_RKIND) , intent(in ) :: t + integer , intent(out ) :: isstep + integer , intent(out ) :: status + end subroutine CTA_Time_IsStep + end interface + + ! \brief Copy a time object. + ! + ! \param hfrom I time object to copy from + ! \param hto O handle of time object that receives copy, must exist before calling + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Time_Copy + subroutine CTA_Time_Copy( hfrom, hto, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hfrom + integer(CTA_HANDLE_IKIND) , intent(out ) :: hto + integer , intent(out ) :: status + end subroutine CTA_Time_Copy + end interface + + ! \brief Export a time object. + ! exports the whole internal state of the time object to given target + ! CTA_FILE will export the time component in a MATLAB/OCTAVE readable form + ! CTA_PACK will pack the content + ! + ! \param htime I time object to export + ! \param hexport I target for export (CTA_FILE or CTA_PACK) + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Time_Export + subroutine CTA_Time_Export( htime, hexport, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: htime + integer(CTA_HANDLE_IKIND) , intent(in ) :: hexport + integer , intent(out ) :: status + end subroutine CTA_Time_Export + end interface + + ! \brief Import a time object. + ! imports the whole internal state of the time object from given source + ! + ! \param htime I time object to import to + ! \param himport I source of import (CTA_PACK) + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Time_Import + subroutine CTA_Time_Import( htime, himport, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: htime + integer(CTA_HANDLE_IKIND) , intent(in ) :: himport + integer , intent(out ) :: status + end subroutine CTA_Time_Import + end interface + + ! \brief Returns whether time object describes an timespan or a single + ! instance. + ! + ! \param htime I time object to import to + ! \param isspan O time object is a time timespan (CTA_TRUE/CTA_FALSE) + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Time_IsSpan + subroutine CTA_Time_IsSpan( htime, isspan, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: htime + integer , intent(out ) :: isspan + integer , intent(out ) :: status + end subroutine CTA_Time_IsSpan + end interface + + +end module cta_f90_time + diff --git a/costa/native/cta_f90/generated/cta_f90_tree.f90 b/costa/native/cta_f90/generated/cta_f90_tree.f90 new file mode 100644 index 000000000..c79fae22e --- /dev/null +++ b/costa/native/cta_f90/generated/cta_f90_tree.f90 @@ -0,0 +1,242 @@ +module cta_f90_tree + + implicit none + + public + + ! \brief Create a new COSTA tree instance + ! + ! \note + ! + ! \param htree O receives handle of created tree + ! \param status O CTA_OK if successful + ! + interface CTA_F90_Tree_Create + subroutine CTA_Tree_Create( htree, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(out ) :: htree + integer , intent(out ) :: status + end subroutine CTA_Tree_Create + end interface + + ! \brief Free the COSTA tree instance + ! + ! \note + ! + ! \param htree IO handle of the tree instance, replaced by CTA_NULL on return + ! \param status O CTA_OK if successful + ! + interface CTA_F90_Tree_Free + subroutine CTA_Tree_Free( htree, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: htree + integer , intent(out ) :: status + end subroutine CTA_Tree_Free + end interface + + ! \brief Add a COSTA handle to the COSTA tree + ! + ! \note + ! + ! \param htree IO handle of the tree object (parent) + ! \param name I name of the COSTA item + ! \param hitem I handle of the COSTA item to add (do not free the object after adding it to the tree) + ! \param status O CTA_OK if successful + ! + interface CTA_F90_Tree_AddHandle + subroutine CTA_Tree_AddHandle( htree, name, hitem, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: htree + character(len=*) , intent(in ) :: name + integer(CTA_HANDLE_IKIND) , intent(in ) :: hitem + integer , intent(out ) :: status + end subroutine CTA_Tree_AddHandle + end interface + + ! \brief Count the number of COSTA handles specified by the given path. + ! + ! \param htree I handle of the tree object + ! \param path I path of the item, separated by / or \\ + ! \param count O receives the number of items found + ! \param status O CTA_OK if successful or CTA_ITEM_NOT_FOUND in case of not found + ! + interface CTA_F90_Tree_CountHandles + subroutine CTA_Tree_CountHandles( htree, path, count, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: htree + integer(CTA_HANDLE_IKIND) , intent(in ) :: path + integer , intent(out ) :: count + integer , intent(out ) :: status + end subroutine CTA_Tree_CountHandles + end interface + + ! \brief Count the number of COSTA handles specified by the given path. + ! + ! \param htree I handle of the tree object + ! \param path I path of the item, separated by / or \ + ! \param count O receives the number of items found + ! \param status O CTA_OK if successful or CTA_ITEM_NOT_FOUND in case of not found + ! + interface CTA_F90_Tree_CountHandlesStr + subroutine CTA_Tree_CountHandlesStr( htree, path, count, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: htree + character(len=*) , intent(in ) :: path(*) + integer , intent(out ) :: count + integer , intent(out ) :: status + end subroutine CTA_Tree_CountHandlesStr + end interface + + ! \brief Get a COSTA handle from the COSTA tree (by path) + ! + ! \note In case of trees with default values, returns the default value. + ! \note The returned handle must not be freed. + ! + ! \param htree I handle of the tree object + ! \param path I path of the item, separated by / or \\ + ! \param hitem O receives the handle of the COSTA item, or CTA_NULL in case not found, do not free this handle. + ! \param status O CTA_OK if successful or CTA_ITEM_NOT_FOUND in case of not found + ! + interface CTA_F90_Tree_GetHandle + subroutine CTA_Tree_GetHandle( htree, path, hitem, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: htree + integer(CTA_HANDLE_IKIND) , intent(in ) :: path + integer(CTA_HANDLE_IKIND) , intent(out ) :: hitem + integer , intent(out ) :: status + end subroutine CTA_Tree_GetHandle + end interface + + ! \brief Get the value of a COSTA handle from the COSTA tree (by path) + ! + ! \note In case of trees with default values, returns the default value. + ! + ! \param htree I handle of the tree object + ! \param path I COSTA string describing path of the item, separated by / or \ + ! \param value O receives the value of the COSTA item, or CTA_NULL in case of not found + ! \param datatype I data type of parameter value, must be the same as item in tree + ! \param status O CTA_OK if successful or CTA_ITEM_NOT_FOUND in case of not found + ! + !interface CTA_F90_Tree_GetValue + ! subroutine CTA_Tree_GetValue( htree, path, value, datatype, status ) + ! use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + ! integer(CTA_HANDLE_IKIND) , intent(in ) :: htree + ! integer(CTA_HANDLE_IKIND) , intent(in ) :: path + ! void , intent(out ) :: value + ! integer , intent(in ) :: datatype + ! integer , intent(out ) :: status + ! end subroutine CTA_Tree_GetValue + !end interface + + ! \brief Get a COSTA handle from the COSTA tree (by path) + ! + ! \note In case of trees with default values, returns the default value. + ! \note The returned handle must not be freed. + ! + ! \param htree I handle of the tree object + ! \param str I C string describing path of the item, separated by / or \ + ! \param hitem O receives the handle of the COSTA item, or CTA_NULL in case of not found, do not free this handle + ! \param status O CTA_OK if successful or CTA_ITEM_NOT_FOUND in case of not found + ! + interface CTA_F90_Tree_GetHandleStr + subroutine CTA_Tree_GetHandleStr( htree, str, hitem, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: htree + character(len=*) , intent(in ) :: str + integer(CTA_HANDLE_IKIND) , intent(out ) :: hitem + integer , intent(out ) :: status + end subroutine CTA_Tree_GetHandleStr + end interface + + ! \brief Get the value of a COSTA handle from the COSTA tree (by path) + ! + ! \note In case of trees with default values, returns the default value. + ! + ! \param htree I handle of the tree instance + ! \param str I C string describing path of the item, separated by / or \ + ! \param value O receives the value of the COSTA item, or CTA_NULL in case of not found + ! \param datatype I data type of the value specified + ! \param status O CTA_OK if successful or CTA_ITEM_NOT_FOUND in case of not found + ! + !interface CTA_F90_Tree_GetValueStr + ! subroutine CTA_Tree_GetValueStr( htree, str, value, datatype, status ) + ! use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + ! integer(CTA_HANDLE_IKIND) , intent(in ) :: htree + ! character(len=*) , intent(in ) :: str + ! void , intent(out ) :: value + ! integer , intent(in ) :: datatype + ! integer , intent(out ) :: status + ! end subroutine CTA_Tree_GetValueStr + !end interface + + ! \brief Count the number of elements on the current level of the COSTA tree + ! + ! \param htree I handle of the tree level + ! \param count O receives the number of elements on the current tree level + ! \param status O CTA_OK if successful + ! + interface CTA_F90_Tree_CountItems + subroutine CTA_Tree_CountItems( htree, count, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: htree + integer , intent(out ) :: count + integer , intent(out ) :: status + end subroutine CTA_Tree_CountItems + end interface + + ! \brief Get a handle (by index) on the current level of the COSTA tree + ! + ! \param htree I handle of the tree level + ! \param index I index of the item to return, 1 <= index <= CTA_Tree_CountItems() + ! \param hitem O receives handle of the item at given index + ! \param status O CTA_OK if successful + ! + interface CTA_F90_Tree_GetItem + subroutine CTA_Tree_GetItem( htree, index, hitem, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: htree + integer , intent(in ) :: index + integer(CTA_HANDLE_IKIND) , intent(out ) :: hitem + integer , intent(out ) :: status + end subroutine CTA_Tree_GetItem + end interface + + ! \brief Get the value of a COSTA handle from the COSTA tree (by index) + ! + ! \note In case of trees with default values, returns the default value. + ! + ! \param htree I handle of the tree instance + ! \param index I index of the item + ! \param value O receives value of the COSTA item, or CTA_NULL in case of not found + ! \param datatype I data type of the value specified + ! \param status O CTA_OK if successful or CTA_ITEM_NOT_FOUND in case not found + ! + !interface CTA_F90_Tree_GetItemValue + ! subroutine CTA_Tree_GetItemValue( htree, index, value, datatype, status ) + ! use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + ! integer(CTA_HANDLE_IKIND) , intent(in ) :: htree + ! integer , intent(in ) :: index + ! void , intent(out ) :: value + ! integer , intent(in ) :: datatype + ! integer , intent(out ) :: status + ! end subroutine CTA_Tree_GetItemValue + !end interface + + ! \brief Print a COSTA tree to STDOUT + ! + ! \note + ! + ! \param htree I handle of the tree + ! \param status O CTA_OK if successful + ! + interface CTA_F90_Tree_Print + subroutine CTA_Tree_Print( htree, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: htree + integer , intent(out ) :: status + end subroutine CTA_Tree_Print + end interface + + +end module cta_f90_tree + diff --git a/costa/native/cta_f90/generated/cta_f90_treevector.f90 b/costa/native/cta_f90/generated/cta_f90_treevector.f90 new file mode 100644 index 000000000..d22ee5e85 --- /dev/null +++ b/costa/native/cta_f90/generated/cta_f90_treevector.f90 @@ -0,0 +1,1146 @@ +module cta_f90_treevector + + implicit none + + public + + ! \brief Create a tree-vector. + ! + ! \param name I name of the tree-vector, this is a human readable + ! string used for (debug) output and not by the algorithms + ! itself + ! \param tag I tag of this tree-vector + ! \param treevec O new tree-vector + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_TreeVector_Create + subroutine CTA_TreeVector_Create( name, tag, treevec, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + character(len=*) , intent(in ) :: name + character(len=*) , intent(in ) :: tag + integer(CTA_HANDLE_IKIND) , intent(out ) :: treevec + integer , intent(out ) :: status + end subroutine CTA_TreeVector_Create + end interface + + ! \brief Duplicate a tree-vector. + ! + ! \note Duplication means that a new tree-vector is created that is identical to + ! the originating tree-vector. All data in the original tree-vector is also copied. + ! + ! \param treevec1 I handle of treevector to be duplicated + ! \param treevec2 O receives handle to duplicate + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_TreeVector_Duplicate + subroutine CTA_TreeVector_Duplicate( treevec1, treevec2, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec1 + integer(CTA_HANDLE_IKIND) , intent(out ) :: treevec2 + integer , intent(out ) :: status + end subroutine CTA_TreeVector_Duplicate + end interface + + ! \brief Define a tree-vector to be a concatination of other tree-vectors. + ! + ! \note The concatenation is done by reference (handle). The sub-tree-vectors that + ! are concatenated are not copied. + ! + ! \param treevec1 I tree-vector that will be concatenation of the sub-tree-vectors provided in parameter treevecs + ! \param treevecs I array of the sub-tree-vectors + ! \param ntreevecs I number of sub-tree-vectors in treevecs + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_TreeVector_Conc + subroutine CTA_TreeVector_Conc( treevec1, treevecs, ntreevecs, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec1 + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevecs(*) + integer , intent(in ) :: ntreevecs + integer , intent(out ) :: status + end subroutine CTA_TreeVector_Conc + end interface + + ! \brief Get the handle of a sub-tree-vectors using its tag. + ! + ! \note This is done by reference (handle). The handle of the + ! returned sub-tree-vector is not a copy + ! + ! \param treevec I Tree-vector + ! \param tag I tag of the requested sub-tree-vector + ! \param subtreevec O receives handle of the requested sub-tree-vectors, this is by reference, not a copy + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_TreeVector_GetSubTreeVec + subroutine CTA_TreeVector_GetSubTreeVec( treevec, tag, subtreevec, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec + character(len=*) , intent(in ) :: tag + integer(CTA_HANDLE_IKIND) , intent(out ) :: subtreevec + integer , intent(out ) :: status + end subroutine CTA_TreeVector_GetSubTreeVec + end interface + + ! \brief Get the tag of a sub-tree-vector using its index (starting with 0). + ! + ! \param treevec I Tree-vector + ! \param index I index of the requested sub-tree-vector + ! \param tag O String of standard length containnig the tag + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_TreeVector_GetSubTreeVecId + subroutine CTA_TreeVector_GetSubTreeVecId( treevec, index, tag, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec + integer , intent(in ) :: index + character(len=*) , intent(out ) :: tag(*) + integer , intent(out ) :: status + end subroutine CTA_TreeVector_GetSubTreeVecId + end interface + + ! \brief Get the handle of a first-layer sub-tree-vector using its index. + ! + ! \note The concatination is done by reference (handle). The handle of the + ! returned sub-tree-vector is not a copy + ! + ! \param treevec I Tree-vector + ! \param index I index of requested sub-tree-vector. Note that the first sub-tree-vector has index 1. + ! \param subtreevec O receives handle of the requested sub-tree-vector, this is by reference, not a copy + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_TreeVector_GetSubTreeVecIndex + subroutine CTA_TreeVector_GetSubTreeVecIndex( treevec, index, subtreevec, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec + integer , intent(in ) :: index + integer(CTA_HANDLE_IKIND) , intent(out ) :: subtreevec + integer , intent(out ) :: status + end subroutine CTA_TreeVector_GetSubTreeVecIndex + end interface + + ! \brief Get number of sub-treevectors + ! + ! \note In case of a leaf 0 is returned + ! + ! \param treevec I Tree-vector + ! \param numSubTrees O Number of sub-treevectors + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_TreeVector_GetNumSubTree + subroutine CTA_TreeVector_GetNumSubTree( treevec, numSubTrees, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec + integer , intent(out ) :: numSubTrees + integer , intent(out ) :: status + end subroutine CTA_TreeVector_GetNumSubTree + end interface + + ! \brief Get the tag of the tree-vector. + ! + ! Note tag should be large enough to hold the result + ! length of CTA_STRLEN_TAG is always save (no internal protection) + ! + ! \param treevec I Tree-vector + ! \param tag O receives the tag of the requested sub-tree-vector (see note) + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_TreeVector_GetTag + subroutine CTA_TreeVector_GetTag( treevec, tag, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec + character(len=*) , intent(out ) :: tag + integer , intent(out ) :: status + end subroutine CTA_TreeVector_GetTag + end interface + + ! \brief Set the values of the tree-vector + ! + ! \note This operation is only possible when all data elements in the tree-vector + ! are of the same type and the size of the tree-vector corresponds to the + ! size of the input vector. + ! + ! \param treevec IO TreeVector + ! \param hvec I handle of the vector containing new values (see note) + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_TreeVector_SetVec + subroutine CTA_TreeVector_SetVec( treevec, hvec, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: treevec + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvec + integer , intent(out ) :: status + end subroutine CTA_TreeVector_SetVec + end interface + + ! \brief Get the values of the tree-vector. + ! + ! \note This operation is only possible when all data elements in the tree-vector + ! are of the same type and the size of the tree-vector corresponds to the + ! vector size. + ! + ! \param treevec I Tree-vector + ! \param hvec O Vector that is receiving the values; must exist before calling + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_TreeVector_GetVec + subroutine CTA_TreeVector_GetVec( treevec, hvec, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec + integer(CTA_HANDLE_IKIND) , intent(out ) :: hvec + integer , intent(out ) :: status + end subroutine CTA_TreeVector_GetVec + end interface + + ! \brief Axpy operation between two tree-vectors. + ! + ! \note Axpy: y=alpha*x+y. Add alpha times tree-vector x to + ! this tree-vector (y). + ! + ! \param y IO Tree-vector (y) + ! \param alpha I scalar + ! \param x I Tree-vector (x) + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_TreeVector_Axpy + subroutine CTA_TreeVector_Axpy( y, alpha, x, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: y + real(8) , intent(in ) :: alpha + integer(CTA_HANDLE_IKIND) , intent(in ) :: x + integer , intent(out ) :: status + end subroutine CTA_TreeVector_Axpy + end interface + + ! \brief Compute dot product of two tree-vectors. + ! + ! \note dotprod = sum[all i] (treevec1_i * treevec2_i) + ! + ! \param treevec1 I first tree-vector + ! \param treevec2 I second tree-vector + ! \param dotprod O receives the dot product + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_TreeVector_Dot + subroutine CTA_TreeVector_Dot( treevec1, treevec2, dotprod, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec1 + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec2 + real(8) , intent(out ) :: dotprod + integer , intent(out ) :: status + end subroutine CTA_TreeVector_Dot + end interface + + ! \brief Compute the 2-norm of a tree-vector. + ! + ! \param treevec1 I Tree-vector + ! \param nrm2 O receives the 2-norm + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_TreeVector_Nrm2 + subroutine CTA_TreeVector_Nrm2( treevec1, nrm2, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec1 + real(8) , intent(out ) :: nrm2 + integer , intent(out ) :: status + end subroutine CTA_TreeVector_Nrm2 + end interface + + ! \brief Copy a tree-vector + ! + ! \note The two tree-vectors must be compatible: same structure and datatypes. + ! + ! \param treevec1 I sending tree-vector + ! \param treevec2 O receiving tree-vector + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_TreeVector_Copy + subroutine CTA_TreeVector_Copy( treevec1, treevec2, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec1 + integer(CTA_HANDLE_IKIND) , intent(out ) :: treevec2 + integer , intent(out ) :: status + end subroutine CTA_TreeVector_Copy + end interface + + ! \brief Set whole tree-vector equal to a constant value. + ! + ! \note This method can only be used if all elements of the tree-vector + ! have the same data type. + ! + ! \param treevec IO TreeVector + ! \param val I value to set + ! \param datatype I data type of val, must be same as data type of tree-vector + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_TreeVector_SetConstant + module procedure CTA_TreeVector_SetConstant_integer + module procedure CTA_TreeVector_SetConstant_real4 + module procedure CTA_TreeVector_SetConstant_real8 + end interface + + ! \brief Scale tree-vector. + ! + ! \param treevec IO handle of tree-vector + ! \param alpha I scalar + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_TreeVector_Scal + subroutine CTA_TreeVector_Scal( treevec, alpha, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: treevec + real(8) , intent(in ) :: alpha + integer , intent(out ) :: status + end subroutine CTA_TreeVector_Scal + end interface + + ! \brief Set all values of the tree-vector. + ! + ! \note This method can only be used if all elements of the tree-vector + ! are of the same data type. + ! + ! \param treevec IO Tree-vector + ! \param val I values to be set + ! \param nval I number of elements in val + ! \param datatype I data type of *val, must be the same as data type of elements in tree-vector + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_TreeVector_SetVals + module procedure CTA_TreeVector_SetVals_integer_1d + module procedure CTA_TreeVector_SetVals_integer_2d + module procedure CTA_TreeVector_SetVals_integer_3d + module procedure CTA_TreeVector_SetVals_integer_4d + module procedure CTA_TreeVector_SetVals_integer_5d + module procedure CTA_TreeVector_SetVals_integer_6d + module procedure CTA_TreeVector_SetVals_integer_7d + module procedure CTA_TreeVector_SetVals_real4_1d + module procedure CTA_TreeVector_SetVals_real4_2d + module procedure CTA_TreeVector_SetVals_real4_3d + module procedure CTA_TreeVector_SetVals_real4_4d + module procedure CTA_TreeVector_SetVals_real4_5d + module procedure CTA_TreeVector_SetVals_real4_6d + module procedure CTA_TreeVector_SetVals_real4_7d + module procedure CTA_TreeVector_SetVals_real8_1d + module procedure CTA_TreeVector_SetVals_real8_2d + module procedure CTA_TreeVector_SetVals_real8_3d + module procedure CTA_TreeVector_SetVals_real8_4d + module procedure CTA_TreeVector_SetVals_real8_5d + module procedure CTA_TreeVector_SetVals_real8_6d + module procedure CTA_TreeVector_SetVals_real8_7d + end interface + + ! \brief Get all values of the tree-vector. + ! + ! \note This method can only be used if all elements of the tree-vector + ! are of the same data type. + ! + ! \param treevec I Tree-vector + ! \param val O receives the values + ! \param nval I number of elements in val + ! \param datatype I data type of *val, must be the same as data type of elements in tree-vector + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_TreeVector_GetVals + module procedure CTA_TreeVector_GetVals_integer_1d + module procedure CTA_TreeVector_GetVals_integer_2d + module procedure CTA_TreeVector_GetVals_integer_3d + module procedure CTA_TreeVector_GetVals_integer_4d + module procedure CTA_TreeVector_GetVals_integer_5d + module procedure CTA_TreeVector_GetVals_integer_6d + module procedure CTA_TreeVector_GetVals_integer_7d + module procedure CTA_TreeVector_GetVals_real4_1d + module procedure CTA_TreeVector_GetVals_real4_2d + module procedure CTA_TreeVector_GetVals_real4_3d + module procedure CTA_TreeVector_GetVals_real4_4d + module procedure CTA_TreeVector_GetVals_real4_5d + module procedure CTA_TreeVector_GetVals_real4_6d + module procedure CTA_TreeVector_GetVals_real4_7d + module procedure CTA_TreeVector_GetVals_real8_1d + module procedure CTA_TreeVector_GetVals_real8_2d + module procedure CTA_TreeVector_GetVals_real8_3d + module procedure CTA_TreeVector_GetVals_real8_4d + module procedure CTA_TreeVector_GetVals_real8_5d + module procedure CTA_TreeVector_GetVals_real8_6d + module procedure CTA_TreeVector_GetVals_real8_7d + end interface + + ! \brief Set single value of the tree-vector. + ! + ! \param treevec IO Tree-Vector + ! \param i I index of value in tree-vector + ! \param val I value to be set + ! \param datatype I data type of *val, must be the same as data type of element in tree-vector + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_TreeVector_SetVal + module procedure CTA_TreeVector_SetVal_integer + module procedure CTA_TreeVector_SetVal_real4 + module procedure CTA_TreeVector_SetVal_real8 + end interface + + ! \brief Get single value of the tree-vector. + ! + ! \param treevec I Tree-vector + ! \param i I index in value in tree-vector + ! \param val O returned value + ! \param datatype I data type of *val, must be the same as data type of element in tree-vector + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_TreeVector_GetVal + module procedure CTA_TreeVector_GetVal_integer + module procedure CTA_TreeVector_GetVal_real4 + module procedure CTA_TreeVector_GetVal_real8 + end interface + + ! \brief Get size of tree-vector. + ! + ! \param treevec I Tree-vector + ! \param n O receives size of tree-vector + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_TreeVector_GetSize + subroutine CTA_TreeVector_GetSize( treevec, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec + integer , intent(out ) :: n + integer , intent(out ) :: status + end subroutine CTA_TreeVector_GetSize + end interface + + ! \brief Export tree-vector. + ! + ! Can export tree-vector to file or pack object.\n + ! usrdata must contain a handle of the file or pack object to be used.\n + ! Dependency: CTA_Vector_Export() + ! + ! + ! \param treevec I Tree-vector + ! \param usrdata I export properties + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_TreeVector_Export + subroutine CTA_TreeVector_Export( treevec, usrdata, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec + integer(CTA_HANDLE_IKIND) , intent(in ) :: usrdata + integer , intent(out ) :: status + end subroutine CTA_TreeVector_Export + end interface + + ! \brief Import Tree-vector. + ! + ! Can import tree-vector from file or pack object.\n + ! usrdata must contain a handle of the file or pack object to be used.\n + ! Dependency: CTA_Vector_Import() + ! + ! + ! \param treevec I Tree-vector + ! \param usrdata I import properties + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_TreeVector_Import + subroutine CTA_TreeVector_Import( treevec, usrdata, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec + integer(CTA_HANDLE_IKIND) , intent(in ) :: usrdata + integer , intent(out ) :: status + end subroutine CTA_TreeVector_Import + end interface + + ! \brief Import Tree-vector as flat vector. + ! + ! Can import tree-vector from netcdf file.\n + ! usrdata must contain a handle of the file .\n + ! Dependency: CTA_Vector_VImport() + ! + ! + ! \param treevec I Tree-vector + ! \param usrdata I import properties + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_TreeVector_VImport + subroutine CTA_TreeVector_VImport( treevec, usrdata, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec + integer(CTA_HANDLE_IKIND) , intent(in ) :: usrdata + integer , intent(out ) :: status + end subroutine CTA_TreeVector_VImport + end interface + + ! \brief Free Tree-vector. + ! + ! \param treevec I handle of tree-vector + ! \param recursive I also free all sub-tree-vectors, yes: CTA_TRUE or no: CTA_FALSE + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_TreeVector_Free + subroutine CTA_TreeVector_Free( treevec, recursive, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec(*) + integer , intent(in ) :: recursive + integer , intent(out ) :: status + end subroutine CTA_TreeVector_Free + end interface + + ! \brief Print tree-vector information. + ! + ! Gives following information:\n\n + ! Tree-vector information:\n + ! tag: [tag]\n + ! nsubtreevecs: [number of sub-tree-vectors]\n + ! + ! If nsubtreevecs > 0: recursively prints all sub-tree-vectors + ! Else prints:\n + ! leaf: yes\n + ! tree-vector size (leaf) + ! + ! \param treevec I tree-vector + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_TreeVector_Info + subroutine CTA_TreeVector_Info( treevec, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec + integer , intent(out ) :: status + end subroutine CTA_TreeVector_Info + end interface + + ! \brief Perform the matrix multiplication C:=alpha*op(A)*op(B)+beta*C + ! where op(X)=X, X^T. However C and A are matrices of wich the columns are + ! tree-vectors + ! + ! \param sC IO array of tree-vector (matrix C) + ! \param nc I number of columns of C (dimension of sC) + ! \param transa I transpose flag CTA_TRUE/CTA_FALSE for matrix A (not supported) + ! \param transb I transpose flag CTA_TRUE/CTA_FALSE for matrix B + ! \param alpha I scalar + ! \param sA I handle of matrix A + ! \param na I number of columns of A (dimension of sA) + ! \param mB I handle of matrix B + ! \param beta I scalar + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_TreeVector_Gemm + subroutine CTA_TreeVector_Gemm( sC, nc, transa, transb, alpha, sA, na, mB, beta, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: sC + integer , intent(in ) :: nc + integer , intent(in ) :: transa + integer , intent(in ) :: transb + real(8) , intent(in ) :: alpha + integer(CTA_HANDLE_IKIND) , intent(in ) :: sA(*) + integer , intent(in ) :: na + integer(CTA_HANDLE_IKIND) , intent(in ) :: mB + real(8) , intent(in ) :: beta + integer , intent(out ) :: status + end subroutine CTA_TreeVector_Gemm + end interface + + ! \brief Perform given operation on all leafs of the treevector + ! + ! \param treevec1 I handle of first COSTA tree-vector + ! \param treevec2 I handle of second COSTA tree-vector + ! \param treevec I handle of a COSTA tree-vector + ! \param op I operation to perform on the leafs + ! \param arg I additional argument of operation + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_TreeVector_OpOnLeafs + subroutine CTA_TreeVector_OpOnLeafs( treevec1, treevec2, op, arg, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec1 + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec2 + integer(CTA_HANDLE_IKIND) , intent(in ) :: op + integer(CTA_HANDLE_IKIND) , intent(in ) :: arg + integer , intent(out ) :: status + end subroutine CTA_TreeVector_OpOnLeafs + end interface + + ! \brief Elementwise division of two vectors + ! \note y:=y./x + ! + ! \param y I handle of a COSTA tree-vector (y) + ! \param x I handle of a COSTA tree-vector (y) + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_TreeVector_ElmDiv + subroutine CTA_TreeVector_ElmDiv( y, x, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: y + integer(CTA_HANDLE_IKIND) , intent(in ) :: x + integer , intent(out ) :: status + end subroutine CTA_TreeVector_ElmDiv + end interface + + ! \brief Elementwise multiplication of two vectors + ! \note y:=y.*x + ! + ! \param y I handle of a COSTA tree-vector (y) + ! \param x I handle of a COSTA tree-vector (y) + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_TreeVector_ElmProd + subroutine CTA_TreeVector_ElmProd( y, x, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: y + integer(CTA_HANDLE_IKIND) , intent(in ) :: x + integer , intent(out ) :: status + end subroutine CTA_TreeVector_ElmProd + end interface + + ! \brief Elementwise sqare root + ! \note y:=sqrt(y) + ! + ! \param y I handle of a COSTA tree-vector (y) + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_TreeVector_ElmSqrt + subroutine CTA_TreeVector_ElmSqrt( y, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: y + integer , intent(out ) :: status + end subroutine CTA_TreeVector_ElmSqrt + end interface + + ! \brief Set nocompute flag of a sub-tree vector + ! + ! When this flag is set, the values of the sub-treevector will + ! be ignored in all basic vector operations (including asking the + ! total length of the tree-vector). This propertie is used for + ! additionally adding some meta information + ! + ! \note the nocompute flag is set at the level of the parent! + ! so the "isolated" sub-treevector can be used in basic vector + ! operations. + ! + ! \param x I handle of a COSTA tree-vector (y) + ! \param tag I tag of sub-treevector + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_TreeVector_SetSubTreeNocompute + subroutine CTA_TreeVector_SetSubTreeNocompute( x, tag, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: x + character(len=*) , intent(in ) :: tag + integer , intent(out ) :: status + end subroutine CTA_TreeVector_SetSubTreeNocompute + end interface + + ! \brief Increase the reference count of a treevector and all subtrevectors + ! + ! \param treevec I handle of a COSTA tree-vector + ! \param status O error status: CTA_OK if successful + ! + ! + interface CTA_F90_TreeVector_IncRefCount + subroutine CTA_TreeVector_IncRefCount( treevec, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec + integer , intent(out ) :: status + end subroutine CTA_TreeVector_IncRefCount + end interface + + +contains + + subroutine CTA_TreeVector_SetConstant_integer( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + use CTA_F90_Parameters, only : CTA_OK + integer(CTA_HANDLE_IKIND) , intent(inout) :: treevec + integer , intent(in ) :: val + integer , intent(out ) :: status + call CTA_TreeVector_SetConstant( treevec, val, CTA_INTEGER, status ) + end subroutine CTA_TreeVector_SetConstant_integer + + subroutine CTA_TreeVector_SetConstant_real4( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + use CTA_F90_Parameters, only : CTA_OK + integer(CTA_HANDLE_IKIND) , intent(inout) :: treevec + real(4) , intent(in ) :: val + integer , intent(out ) :: status + call CTA_TreeVector_SetConstant( treevec, val, CTA_REAL, status ) + end subroutine CTA_TreeVector_SetConstant_real4 + + subroutine CTA_TreeVector_SetConstant_real8( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + use CTA_F90_Parameters, only : CTA_OK + integer(CTA_HANDLE_IKIND) , intent(inout) :: treevec + real(8) , intent(in ) :: val + integer , intent(out ) :: status + call CTA_TreeVector_SetConstant( treevec, val, CTA_DOUBLE, status ) + end subroutine CTA_TreeVector_SetConstant_real8 + + subroutine CTA_TreeVector_SetVals_integer_1d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(inout) :: treevec + integer , intent(in ) :: val(:) + integer , intent(out ) :: status + call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_INTEGER, status ) + end subroutine CTA_TreeVector_SetVals_integer_1d + + subroutine CTA_TreeVector_SetVals_integer_2d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(inout) :: treevec + integer , intent(in ) :: val(:,:) + integer , intent(out ) :: status + call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_INTEGER, status ) + end subroutine CTA_TreeVector_SetVals_integer_2d + + subroutine CTA_TreeVector_SetVals_integer_3d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(inout) :: treevec + integer , intent(in ) :: val(:,:,:) + integer , intent(out ) :: status + call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_INTEGER, status ) + end subroutine CTA_TreeVector_SetVals_integer_3d + + subroutine CTA_TreeVector_SetVals_integer_4d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(inout) :: treevec + integer , intent(in ) :: val(:,:,:,:) + integer , intent(out ) :: status + call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_INTEGER, status ) + end subroutine CTA_TreeVector_SetVals_integer_4d + + subroutine CTA_TreeVector_SetVals_integer_5d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(inout) :: treevec + integer , intent(in ) :: val(:,:,:,:,:) + integer , intent(out ) :: status + call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_INTEGER, status ) + end subroutine CTA_TreeVector_SetVals_integer_5d + + subroutine CTA_TreeVector_SetVals_integer_6d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(inout) :: treevec + integer , intent(in ) :: val(:,:,:,:,:,:) + integer , intent(out ) :: status + call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_INTEGER, status ) + end subroutine CTA_TreeVector_SetVals_integer_6d + + subroutine CTA_TreeVector_SetVals_integer_7d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(inout) :: treevec + integer , intent(in ) :: val(:,:,:,:,:,:,:) + integer , intent(out ) :: status + call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_INTEGER, status ) + end subroutine CTA_TreeVector_SetVals_integer_7d + + subroutine CTA_TreeVector_SetVals_real4_1d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(inout) :: treevec + real(4) , intent(in ) :: val(:) + integer , intent(out ) :: status + call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_REAL, status ) + end subroutine CTA_TreeVector_SetVals_real4_1d + + subroutine CTA_TreeVector_SetVals_real4_2d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(inout) :: treevec + real(4) , intent(in ) :: val(:,:) + integer , intent(out ) :: status + call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_REAL, status ) + end subroutine CTA_TreeVector_SetVals_real4_2d + + subroutine CTA_TreeVector_SetVals_real4_3d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(inout) :: treevec + real(4) , intent(in ) :: val(:,:,:) + integer , intent(out ) :: status + call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_REAL, status ) + end subroutine CTA_TreeVector_SetVals_real4_3d + + subroutine CTA_TreeVector_SetVals_real4_4d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(inout) :: treevec + real(4) , intent(in ) :: val(:,:,:,:) + integer , intent(out ) :: status + call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_REAL, status ) + end subroutine CTA_TreeVector_SetVals_real4_4d + + subroutine CTA_TreeVector_SetVals_real4_5d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(inout) :: treevec + real(4) , intent(in ) :: val(:,:,:,:,:) + integer , intent(out ) :: status + call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_REAL, status ) + end subroutine CTA_TreeVector_SetVals_real4_5d + + subroutine CTA_TreeVector_SetVals_real4_6d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(inout) :: treevec + real(4) , intent(in ) :: val(:,:,:,:,:,:) + integer , intent(out ) :: status + call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_REAL, status ) + end subroutine CTA_TreeVector_SetVals_real4_6d + + subroutine CTA_TreeVector_SetVals_real4_7d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(inout) :: treevec + real(4) , intent(in ) :: val(:,:,:,:,:,:,:) + integer , intent(out ) :: status + call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_REAL, status ) + end subroutine CTA_TreeVector_SetVals_real4_7d + + subroutine CTA_TreeVector_SetVals_real8_1d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(inout) :: treevec + real(8) , intent(in ) :: val(:) + integer , intent(out ) :: status + call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_DOUBLE, status ) + end subroutine CTA_TreeVector_SetVals_real8_1d + + subroutine CTA_TreeVector_SetVals_real8_2d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(inout) :: treevec + real(8) , intent(in ) :: val(:,:) + integer , intent(out ) :: status + call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_DOUBLE, status ) + end subroutine CTA_TreeVector_SetVals_real8_2d + + subroutine CTA_TreeVector_SetVals_real8_3d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(inout) :: treevec + real(8) , intent(in ) :: val(:,:,:) + integer , intent(out ) :: status + call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_DOUBLE, status ) + end subroutine CTA_TreeVector_SetVals_real8_3d + + subroutine CTA_TreeVector_SetVals_real8_4d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(inout) :: treevec + real(8) , intent(in ) :: val(:,:,:,:) + integer , intent(out ) :: status + call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_DOUBLE, status ) + end subroutine CTA_TreeVector_SetVals_real8_4d + + subroutine CTA_TreeVector_SetVals_real8_5d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(inout) :: treevec + real(8) , intent(in ) :: val(:,:,:,:,:) + integer , intent(out ) :: status + call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_DOUBLE, status ) + end subroutine CTA_TreeVector_SetVals_real8_5d + + subroutine CTA_TreeVector_SetVals_real8_6d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(inout) :: treevec + real(8) , intent(in ) :: val(:,:,:,:,:,:) + integer , intent(out ) :: status + call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_DOUBLE, status ) + end subroutine CTA_TreeVector_SetVals_real8_6d + + subroutine CTA_TreeVector_SetVals_real8_7d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(inout) :: treevec + real(8) , intent(in ) :: val(:,:,:,:,:,:,:) + integer , intent(out ) :: status + call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_DOUBLE, status ) + end subroutine CTA_TreeVector_SetVals_real8_7d + + subroutine CTA_TreeVector_GetVals_integer_1d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec + integer , intent(out ) :: val(:) + integer , intent(out ) :: status + call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_INTEGER, status ) + end subroutine CTA_TreeVector_GetVals_integer_1d + + subroutine CTA_TreeVector_GetVals_integer_2d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec + integer , intent(out ) :: val(:,:) + integer , intent(out ) :: status + call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_INTEGER, status ) + end subroutine CTA_TreeVector_GetVals_integer_2d + + subroutine CTA_TreeVector_GetVals_integer_3d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec + integer , intent(out ) :: val(:,:,:) + integer , intent(out ) :: status + call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_INTEGER, status ) + end subroutine CTA_TreeVector_GetVals_integer_3d + + subroutine CTA_TreeVector_GetVals_integer_4d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec + integer , intent(out ) :: val(:,:,:,:) + integer , intent(out ) :: status + call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_INTEGER, status ) + end subroutine CTA_TreeVector_GetVals_integer_4d + + subroutine CTA_TreeVector_GetVals_integer_5d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec + integer , intent(out ) :: val(:,:,:,:,:) + integer , intent(out ) :: status + call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_INTEGER, status ) + end subroutine CTA_TreeVector_GetVals_integer_5d + + subroutine CTA_TreeVector_GetVals_integer_6d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec + integer , intent(out ) :: val(:,:,:,:,:,:) + integer , intent(out ) :: status + call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_INTEGER, status ) + end subroutine CTA_TreeVector_GetVals_integer_6d + + subroutine CTA_TreeVector_GetVals_integer_7d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec + integer , intent(out ) :: val(:,:,:,:,:,:,:) + integer , intent(out ) :: status + call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_INTEGER, status ) + end subroutine CTA_TreeVector_GetVals_integer_7d + + subroutine CTA_TreeVector_GetVals_real4_1d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec + real(4) , intent(out ) :: val(:) + integer , intent(out ) :: status + call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_REAL, status ) + end subroutine CTA_TreeVector_GetVals_real4_1d + + subroutine CTA_TreeVector_GetVals_real4_2d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec + real(4) , intent(out ) :: val(:,:) + integer , intent(out ) :: status + call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_REAL, status ) + end subroutine CTA_TreeVector_GetVals_real4_2d + + subroutine CTA_TreeVector_GetVals_real4_3d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec + real(4) , intent(out ) :: val(:,:,:) + integer , intent(out ) :: status + call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_REAL, status ) + end subroutine CTA_TreeVector_GetVals_real4_3d + + subroutine CTA_TreeVector_GetVals_real4_4d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec + real(4) , intent(out ) :: val(:,:,:,:) + integer , intent(out ) :: status + call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_REAL, status ) + end subroutine CTA_TreeVector_GetVals_real4_4d + + subroutine CTA_TreeVector_GetVals_real4_5d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec + real(4) , intent(out ) :: val(:,:,:,:,:) + integer , intent(out ) :: status + call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_REAL, status ) + end subroutine CTA_TreeVector_GetVals_real4_5d + + subroutine CTA_TreeVector_GetVals_real4_6d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec + real(4) , intent(out ) :: val(:,:,:,:,:,:) + integer , intent(out ) :: status + call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_REAL, status ) + end subroutine CTA_TreeVector_GetVals_real4_6d + + subroutine CTA_TreeVector_GetVals_real4_7d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec + real(4) , intent(out ) :: val(:,:,:,:,:,:,:) + integer , intent(out ) :: status + call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_REAL, status ) + end subroutine CTA_TreeVector_GetVals_real4_7d + + subroutine CTA_TreeVector_GetVals_real8_1d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec + real(8) , intent(out ) :: val(:) + integer , intent(out ) :: status + call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_DOUBLE, status ) + end subroutine CTA_TreeVector_GetVals_real8_1d + + subroutine CTA_TreeVector_GetVals_real8_2d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec + real(8) , intent(out ) :: val(:,:) + integer , intent(out ) :: status + call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_DOUBLE, status ) + end subroutine CTA_TreeVector_GetVals_real8_2d + + subroutine CTA_TreeVector_GetVals_real8_3d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec + real(8) , intent(out ) :: val(:,:,:) + integer , intent(out ) :: status + call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_DOUBLE, status ) + end subroutine CTA_TreeVector_GetVals_real8_3d + + subroutine CTA_TreeVector_GetVals_real8_4d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec + real(8) , intent(out ) :: val(:,:,:,:) + integer , intent(out ) :: status + call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_DOUBLE, status ) + end subroutine CTA_TreeVector_GetVals_real8_4d + + subroutine CTA_TreeVector_GetVals_real8_5d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec + real(8) , intent(out ) :: val(:,:,:,:,:) + integer , intent(out ) :: status + call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_DOUBLE, status ) + end subroutine CTA_TreeVector_GetVals_real8_5d + + subroutine CTA_TreeVector_GetVals_real8_6d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec + real(8) , intent(out ) :: val(:,:,:,:,:,:) + integer , intent(out ) :: status + call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_DOUBLE, status ) + end subroutine CTA_TreeVector_GetVals_real8_6d + + subroutine CTA_TreeVector_GetVals_real8_7d( treevec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec + real(8) , intent(out ) :: val(:,:,:,:,:,:,:) + integer , intent(out ) :: status + call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_DOUBLE, status ) + end subroutine CTA_TreeVector_GetVals_real8_7d + + subroutine CTA_TreeVector_SetVal_integer( treevec, i, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + use CTA_F90_Parameters, only : CTA_OK + integer(CTA_HANDLE_IKIND) , intent(inout) :: treevec + integer , intent(in ) :: i + integer , intent(in ) :: val + integer , intent(out ) :: status + call CTA_TreeVector_SetVal( treevec, i, val, CTA_INTEGER, status ) + end subroutine CTA_TreeVector_SetVal_integer + + subroutine CTA_TreeVector_SetVal_real4( treevec, i, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + use CTA_F90_Parameters, only : CTA_OK + integer(CTA_HANDLE_IKIND) , intent(inout) :: treevec + integer , intent(in ) :: i + real(4) , intent(in ) :: val + integer , intent(out ) :: status + call CTA_TreeVector_SetVal( treevec, i, val, CTA_REAL, status ) + end subroutine CTA_TreeVector_SetVal_real4 + + subroutine CTA_TreeVector_SetVal_real8( treevec, i, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + use CTA_F90_Parameters, only : CTA_OK + integer(CTA_HANDLE_IKIND) , intent(inout) :: treevec + integer , intent(in ) :: i + real(8) , intent(in ) :: val + integer , intent(out ) :: status + call CTA_TreeVector_SetVal( treevec, i, val, CTA_DOUBLE, status ) + end subroutine CTA_TreeVector_SetVal_real8 + + subroutine CTA_TreeVector_GetVal_integer( treevec, i, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + use CTA_F90_Parameters, only : CTA_OK + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec + integer , intent(in ) :: i + integer , intent(out ) :: val + integer , intent(out ) :: status + call CTA_TreeVector_GetVal( treevec, i, val, CTA_INTEGER, status ) + end subroutine CTA_TreeVector_GetVal_integer + + subroutine CTA_TreeVector_GetVal_real4( treevec, i, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + use CTA_F90_Parameters, only : CTA_OK + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec + integer , intent(in ) :: i + real(4) , intent(out ) :: val + integer , intent(out ) :: status + call CTA_TreeVector_GetVal( treevec, i, val, CTA_REAL, status ) + end subroutine CTA_TreeVector_GetVal_real4 + + subroutine CTA_TreeVector_GetVal_real8( treevec, i, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + use CTA_F90_Parameters, only : CTA_OK + integer(CTA_HANDLE_IKIND) , intent(in ) :: treevec + integer , intent(in ) :: i + real(8) , intent(out ) :: val + integer , intent(out ) :: status + call CTA_TreeVector_GetVal( treevec, i, val, CTA_DOUBLE, status ) + end subroutine CTA_TreeVector_GetVal_real8 + +end module cta_f90_treevector + diff --git a/costa/native/cta_f90/generated/cta_f90_util_methods.f90 b/costa/native/cta_f90/generated/cta_f90_util_methods.f90 new file mode 100644 index 000000000..ce421430c --- /dev/null +++ b/costa/native/cta_f90/generated/cta_f90_util_methods.f90 @@ -0,0 +1,85 @@ +module cta_f90_util_methods + + implicit none + + public + + ! \brief print the predicted values and the observed values + ! + ! \param fgModel O Model of foreground run (with data + ! assimilation) or Vector with the predicted + ! values of the foreground run. + ! if set to CTA_NULL NaN will be printed as result + ! \param bgModel I Model of background run (without data + ! assimilation) or Vector with the predicted + ! values of the background run. + ! if set to CTA_NULL NaN will be printed as result + ! \param sObs I Stochastic observer + ! \param time I Corresponding time + ! \param file I Output file (note CTA_FILE_STDOUT prints to screen) + ! \param printHeader I Print header CTA_TRUE/CTA_FALSE + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Util_MethodsPrintObservations + subroutine CTA_Util_MethodsPrintObservations( fgModel, bgModel, sObs, time, file, printHeader, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(out ) :: fgModel + integer(CTA_HANDLE_IKIND) , intent(in ) :: bgModel + integer(CTA_HANDLE_IKIND) , intent(in ) :: sObs + integer(CTA_HANDLE_IKIND) , intent(in ) :: time + integer(CTA_HANDLE_IKIND) , intent(in ) :: file + integer , intent(in ) :: printHeader + integer , intent(out ) :: status + end subroutine CTA_Util_MethodsPrintObservations + end interface + + ! \brief Make an initial selection of the observations + ! + ! The selection of observations is based on the given simulation timespan + ! and the criterion provided by the model (CTA_Model_GetObsSelect) + ! + ! \note sObsSel is created and should be freed by the caller of this routine + ! + ! \param model I Model (for CTA_Model_GetObsSelect) + ! \param sObsAll I All observations + ! \param spanSim I Simulation timespan + ! \param sObsSel O Selection of observations + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Util_MethodsSelectObservations + subroutine CTA_Util_MethodsSelectObservations( model, sObsAll, spanSim, sObsSel, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: model + integer(CTA_HANDLE_IKIND) , intent(in ) :: sObsAll + integer(CTA_HANDLE_IKIND) , intent(in ) :: spanSim + integer(CTA_HANDLE_IKIND) , intent(out ) :: sObsSel + integer , intent(out ) :: status + end subroutine CTA_Util_MethodsSelectObservations + end interface + + ! \brief Create an output file for filter predictions at station + ! locations + ! + ! The routine CTA_Util_MethodsPrintObservations can be used for writing + ! the results. + ! \note the header is written by this call + ! + ! \param stationFile (I) Name of result file + ! \param fStationFile (O) Handle to result file + ! + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Util_MethodsOpenResultFile + subroutine CTA_Util_MethodsOpenResultFile( stationFile, fStationFile, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + character(len=*) , intent(in ) :: stationFile(*) + integer(CTA_HANDLE_IKIND) , intent(out ) :: fStationFile + integer , intent(out ) :: status + end subroutine CTA_Util_MethodsOpenResultFile + end interface + + +end module cta_f90_util_methods + diff --git a/costa/native/cta_f90/generated/cta_f90_util_sort.f90 b/costa/native/cta_f90/generated/cta_f90_util_sort.f90 new file mode 100644 index 000000000..3fca0c13e --- /dev/null +++ b/costa/native/cta_f90/generated/cta_f90_util_sort.f90 @@ -0,0 +1,31 @@ +module cta_f90_util_sort + + implicit none + + public + + ! \brief Sort an integer array using the Quicksort algorithm. + ! An additional interger array is permutated in way as the unsorted array + ! + ! + ! \param list (input/output) array, dimension (N) + ! On entry, the array to be sorted. + ! On exit, list has been sorted into increasing order + ! + ! \param indx (input/output) array, dimension (N) + ! + ! \param n (input) INTEGER + ! The length of the array D. + ! + ! + interface CTA_F90_Util_IQSort2 + subroutine CTA_Util_IQSort2( list, indx, n ) + integer , intent(inout) :: list + integer , intent(inout) :: indx + integer , intent(in ) :: n + end subroutine CTA_Util_IQSort2 + end interface + + +end module cta_f90_util_sort + diff --git a/costa/native/cta_f90/generated/cta_f90_util_statistics.f90 b/costa/native/cta_f90/generated/cta_f90_util_statistics.f90 new file mode 100644 index 000000000..e2c7e5879 --- /dev/null +++ b/costa/native/cta_f90/generated/cta_f90_util_statistics.f90 @@ -0,0 +1,44 @@ +module cta_f90_util_statistics + + implicit none + + public + + ! \brief Initialize the random generator. + ! \note only initialize the random generator once. + ! \param seed I some positive initial seed + ! + interface CTA_F90_rand_seed + subroutine CTA_rand_seed( seed ) + integer , intent(in ) :: seed + end subroutine CTA_rand_seed + end interface + + ! \brief Get an uniform random number from the interval [0 1]. + ! + ! \param x O receives the random number + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_rand_u + subroutine CTA_rand_u( x, status ) + real(8) , intent(out ) :: x + integer , intent(out ) :: status + end subroutine CTA_rand_u + end interface + + ! \brief Get a random number from a normal distribution whit mean 0 and + ! standard deviation 1. + ! + ! \param x O receives the random number + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_rand_n + subroutine CTA_rand_n( x, status ) + real(8) , intent(out ) :: x + integer , intent(out ) :: status + end subroutine CTA_rand_n + end interface + + +end module cta_f90_util_statistics + diff --git a/costa/native/cta_f90/generated/cta_f90_vector.f90 b/costa/native/cta_f90/generated/cta_f90_vector.f90 new file mode 100644 index 000000000..3ccf2899d --- /dev/null +++ b/costa/native/cta_f90/generated/cta_f90_vector.f90 @@ -0,0 +1,1013 @@ +module cta_f90_vector + + implicit none + + public + + ! \brief Create a new class (=implementation) of a COSTA vector component. + ! + ! \param name I name of the new vector class + ! \param h_func I COSTA function handles for functions that implement class. + ! Missing functions must have value CTA_NULL + ! \param hveccl O receives handle of new vector class + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Vector_DefineClass + subroutine CTA_Vector_DefineClass( name, h_func, hveccl, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + character(len=*) , intent(in ) :: name + integer(CTA_HANDLE_IKIND) , intent(in ) :: h_func(*) + integer(CTA_HANDLE_IKIND) , intent(out ) :: hveccl + integer , intent(out ) :: status + end subroutine CTA_Vector_DefineClass + end interface + + ! \brief Duplicate a vector object. + ! + ! \note Only size, data type and type (class) are duplicated, the content is not + ! copied. + ! + ! \param hvector1 I handle of vector to be duplicated + ! \param hvector2 O receives handle of new duplicate vector, empty before calling + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Vector_Duplicate + subroutine CTA_Vector_Duplicate( hvector1, hvector2, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvector1 + integer(CTA_HANDLE_IKIND) , intent(out ) :: hvector2 + integer , intent(out ) :: status + end subroutine CTA_Vector_Duplicate + end interface + + ! \brief Create a new vector. + ! + ! \note + ! + ! \param hveccl I vector class of new vector + ! \param n I number of elements + ! \param datatype I data type of elements in vector + ! \param userdata IO user data for creation (depends on class) + ! \param hvector O receives handle of new vector + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Vector_Create + subroutine CTA_Vector_Create( hveccl, n, datatype, userdata, hvector, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hveccl + integer , intent(in ) :: n + integer , intent(in ) :: datatype + integer(CTA_HANDLE_IKIND) , intent(inout) :: userdata + integer(CTA_HANDLE_IKIND) , intent(out ) :: hvector + integer , intent(out ) :: status + end subroutine CTA_Vector_Create + end interface + + ! \brief Get size of vector. + ! + ! \param hvec I handle of vector + ! \param n O receives number of elements + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Vector_GetSize + subroutine CTA_Vector_GetSize( hvec, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvec + integer , intent(out ) :: n + integer , intent(out ) :: status + end subroutine CTA_Vector_GetSize + end interface + + ! \brief Get a copy of a single element in the vector. + ! + ! \param hvec I handle of vector + ! \param i I index of element + ! \param vals O receives copy of value in vector, must exist before calling + ! \param datatype I data type of *vals, must be the same as data type of elements in vector + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Vector_GetVal + module procedure CTA_Vector_GetVal_integer + module procedure CTA_Vector_GetVal_real4 + module procedure CTA_Vector_GetVal_real8 + end interface + + ! \brief Get a copy of all elements in the vector. + ! + ! \param hvec I handle of vector + ! \param vals O receives copy of all elements in vector, must exist before calling + ! \param n I length of array vals + ! \param datatype I data type of *vals, must be the same as data type of elements in vector + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Vector_GetVals + module procedure CTA_Vector_GetVals_integer_1d + module procedure CTA_Vector_GetVals_integer_2d + module procedure CTA_Vector_GetVals_integer_3d + module procedure CTA_Vector_GetVals_integer_4d + module procedure CTA_Vector_GetVals_integer_5d + module procedure CTA_Vector_GetVals_integer_6d + module procedure CTA_Vector_GetVals_integer_7d + module procedure CTA_Vector_GetVals_real4_1d + module procedure CTA_Vector_GetVals_real4_2d + module procedure CTA_Vector_GetVals_real4_3d + module procedure CTA_Vector_GetVals_real4_4d + module procedure CTA_Vector_GetVals_real4_5d + module procedure CTA_Vector_GetVals_real4_6d + module procedure CTA_Vector_GetVals_real4_7d + module procedure CTA_Vector_GetVals_real8_1d + module procedure CTA_Vector_GetVals_real8_2d + module procedure CTA_Vector_GetVals_real8_3d + module procedure CTA_Vector_GetVals_real8_4d + module procedure CTA_Vector_GetVals_real8_5d + module procedure CTA_Vector_GetVals_real8_6d + module procedure CTA_Vector_GetVals_real8_7d + end interface + + ! \brief Set a copy of an element in the vector. + ! + ! \note The value is copied in the vector. + ! + ! \param hvec IO handle of vector + ! \param i I index of element + ! \param val I new value that is copied into element at given index + ! \param datatype I data type of *val, must be the same as data type of elements in vector + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Vector_SetVal + module procedure CTA_Vector_SetVal_integer + module procedure CTA_Vector_SetVal_real4 + module procedure CTA_Vector_SetVal_real8 + module procedure CTA_Vector_SetVal_char + end interface + + ! \brief Set all elementes in the vector. + ! + ! \note The values are copied in the vector. + ! + ! \param hvec IO handle of vector + ! \param vals I values that need to be copied to vector + ! \param n I number of elements in vals + ! \param datatype I data type of *vals, must be the same as data type of elements in vector + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Vector_SetVals + module procedure CTA_Vector_SetVals_integer_1d + module procedure CTA_Vector_SetVals_integer_2d + module procedure CTA_Vector_SetVals_integer_3d + module procedure CTA_Vector_SetVals_integer_4d + module procedure CTA_Vector_SetVals_integer_5d + module procedure CTA_Vector_SetVals_integer_6d + module procedure CTA_Vector_SetVals_integer_7d + module procedure CTA_Vector_SetVals_real4_1d + module procedure CTA_Vector_SetVals_real4_2d + module procedure CTA_Vector_SetVals_real4_3d + module procedure CTA_Vector_SetVals_real4_4d + module procedure CTA_Vector_SetVals_real4_5d + module procedure CTA_Vector_SetVals_real4_6d + module procedure CTA_Vector_SetVals_real4_7d + module procedure CTA_Vector_SetVals_real8_1d + module procedure CTA_Vector_SetVals_real8_2d + module procedure CTA_Vector_SetVals_real8_3d + module procedure CTA_Vector_SetVals_real8_4d + module procedure CTA_Vector_SetVals_real8_5d + module procedure CTA_Vector_SetVals_real8_6d + module procedure CTA_Vector_SetVals_real8_7d + end interface + + ! \brief Scale a vector. + ! + ! \note scale: x=alpha*x + ! + ! \param hvec IO handle of vector + ! \param alpha I scalar + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Vector_Scal + subroutine CTA_Vector_Scal( hvec, alpha, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec + real(8) , intent(in ) :: alpha + integer , intent(out ) :: status + end subroutine CTA_Vector_Scal + end interface + + ! \brief Copy a vector + ! + ! \note This function copies the vector content (y=x), but does not make a new vector. + ! + ! \param hvec_x I handle of sending vector + ! \param hvec_y O handle of receiving vector + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Vector_Copy + subroutine CTA_Vector_Copy( hvec_x, hvec_y, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvec_x + integer(CTA_HANDLE_IKIND) , intent(out ) :: hvec_y + integer , intent(out ) :: status + end subroutine CTA_Vector_Copy + end interface + + ! \brief Operation axpy for two vectors x and y. + ! + ! \note axpy: y=alpha*x+y + ! + ! \param hvec_y IO handle of vector y + ! \param alpha I scalar + ! \param hvec_x I handle of vector x + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Vector_Axpy + subroutine CTA_Vector_Axpy( hvec_y, alpha, hvec_x, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec_y + real(8) , intent(in ) :: alpha + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvec_x + integer , intent(out ) :: status + end subroutine CTA_Vector_Axpy + end interface + + ! \brief Dot product operation of two vectors. + ! + ! \note dot: dotprod=x^t*y + ! + ! \param hvec_x I handle of vector x + ! \param hvec_y I handle of vector y + ! \param dotprod O receives dot product + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Vector_Dot + subroutine CTA_Vector_Dot( hvec_x, hvec_y, dotprod, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvec_x + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvec_y + real(8) , intent(out ) :: dotprod + integer , intent(out ) :: status + end subroutine CTA_Vector_Dot + end interface + + ! \brief Compute 2-norm of vector. + ! + ! \note 2-norm: sqrt(x^t*x) + ! + ! \param hvec_x I handle of vector x + ! \param norm2 O receives 2-norm + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Vector_Nrm2 + subroutine CTA_Vector_Nrm2( hvec_x, norm2, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvec_x + real(8) , intent(out ) :: norm2 + integer , intent(out ) :: status + end subroutine CTA_Vector_Nrm2 + end interface + + ! \brief Find index of element in vector with largest absolute value + ! + ! \param hvec_x I handle of vector + ! \param iloc O receives index of largest absolute value + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Vector_Amax + subroutine CTA_Vector_Amax( hvec_x, iloc, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvec_x + integer , intent(out ) :: iloc + integer , intent(out ) :: status + end subroutine CTA_Vector_Amax + end interface + + ! \brief Find largest length of elements in vector. + ! + ! \note e.g. length of string in a vector of strings + ! + ! \param hvec_x I handle of vector + ! \param maxlen O receives largest length of elements in vector + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Vector_GetMaxLen + subroutine CTA_Vector_GetMaxLen( hvec_x, maxlen, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvec_x + integer , intent(out ) :: maxlen + integer , intent(out ) :: status + end subroutine CTA_Vector_GetMaxLen + end interface + + ! \brief Export a vector to file, stdout or pack object. + ! + ! \Note CTA_DEFAULT_VECTOR supports exporting to:\n + ! file (usrdata a handle of COSTA file)\n + ! pack object (usrdata a handle of COSTA pack object)\n + ! + ! \param hvec I handle of vector + ! \param usrdata I configuration of output + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Vector_Export + subroutine CTA_Vector_Export( hvec, usrdata, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvec + integer(CTA_HANDLE_IKIND) , intent(in ) :: usrdata + integer , intent(out ) :: status + end subroutine CTA_Vector_Export + end interface + + ! \brief Import a vector. + ! + ! \note CTA_DEFAULT_VECTOR supports importing from pack object (usrdata[0] a pack handle). + ! \note Data type and size of vector can be changed due to this action + ! + ! \param hvec I handle of vector + ! \param usrdata I configuration of output + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Vector_Import + subroutine CTA_Vector_Import( hvec, usrdata, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvec + integer(CTA_HANDLE_IKIND) , intent(in ) :: usrdata + integer , intent(out ) :: status + end subroutine CTA_Vector_Import + end interface + + ! \brief Print table, each column built up by a vector. + ! + ! \note CTA_DEFAULT_VECTOR TODO + ! + ! \param table I array of vector handles, these vectors form the table to be filled + ! \param ncolumns I number of columns in table + ! (number of vector handles in table) + ! \param vformats I handle of vector of string (size ncolumns) containing the + ! formats for printing each column (C-format) + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Vector_Print_Table + subroutine CTA_Vector_Print_Table( table, ncolumns, vformats, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: table + integer , intent(in ) :: ncolumns + integer(CTA_HANDLE_IKIND) , intent(in ) :: vformats + integer , intent(out ) :: status + end subroutine CTA_Vector_Print_Table + end interface + + ! \brief Free the vector object. + ! + ! + ! \Note hvec_x=CTA_NULL is allowed + ! + ! \param hvec_x IO handle of vector, replaced by CTA_NULL on return + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Vector_Free + subroutine CTA_Vector_Free( hvec_x, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec_x + integer , intent(out ) :: status + end subroutine CTA_Vector_Free + end interface + + ! \brief Get type of vector. + ! + ! \param hvec I handle of vector + ! \param datatype O receives data type of vector + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Vector_GetDatatype + subroutine CTA_Vector_GetDatatype( hvec, datatype, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvec + integer , intent(out ) :: datatype + integer , intent(out ) :: status + end subroutine CTA_Vector_GetDatatype + end interface + + ! \brief Set whole vector to one single value. + ! + ! \Note hvec=CTA_NULL is allowed, function returns error code + ! + ! \param hvec IO handle of vector + ! \param val I value that must be set + ! \param datatype I data type of *val, must be the same as data type of elements in vector + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Vector_SetConstant + module procedure CTA_Vector_SetConstant_integer + module procedure CTA_Vector_SetConstant_real4 + module procedure CTA_Vector_SetConstant_real8 + end interface + + ! \brief Element wise division + ! + ! \note y=x./y + ! + ! \param hvec_y IO handle of vector y + ! \param hvec_x I handle of vector x + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Vector_ElmDiv + subroutine CTA_Vector_ElmDiv( hvec_y, hvec_x, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec_y + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvec_x + integer , intent(out ) :: status + end subroutine CTA_Vector_ElmDiv + end interface + + ! \brief Element wise product + ! + ! \note y=x.y + ! + ! \param hvec_y IO handle of vector y + ! \param hvec_x I handle of vector x + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Vector_ElmProd + subroutine CTA_Vector_ElmProd( hvec_y, hvec_x, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec_y + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvec_x + integer , intent(out ) :: status + end subroutine CTA_Vector_ElmProd + end interface + + ! \brief Element wise square root + ! + ! \note y=y.^0.5 + ! + ! \param hvec_x IO handle of vector y + ! \param status O error status: CTA_OK if successful + ! + interface CTA_F90_Vector_ElmSqrt + subroutine CTA_Vector_ElmSqrt( hvec_x, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec_x + integer , intent(out ) :: status + end subroutine CTA_Vector_ElmSqrt + end interface + + +contains + + subroutine CTA_Vector_GetVal_integer( hvec, i, vals, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + use CTA_F90_Parameters, only : CTA_OK + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvec + integer , intent(in ) :: i + integer , intent(out ) :: vals + integer , intent(out ) :: status + call CTA_Vector_GetVal( hvec, i, vals, CTA_INTEGER, status ) + end subroutine CTA_Vector_GetVal_integer + + subroutine CTA_Vector_GetVal_real4( hvec, i, vals, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + use CTA_F90_Parameters, only : CTA_OK + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvec + integer , intent(in ) :: i + real(4) , intent(out ) :: vals + integer , intent(out ) :: status + call CTA_Vector_GetVal( hvec, i, vals, CTA_REAL, status ) + end subroutine CTA_Vector_GetVal_real4 + + subroutine CTA_Vector_GetVal_real8( hvec, i, vals, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + use CTA_F90_Parameters, only : CTA_OK + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvec + integer , intent(in ) :: i + real(8) , intent(out ) :: vals + integer , intent(out ) :: status + call CTA_Vector_GetVal( hvec, i, vals, CTA_DOUBLE, status ) + end subroutine CTA_Vector_GetVal_real8 + + subroutine CTA_F90_Vector_GetHandle( hvec, i, vals, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_HANDLE + use CTA_F90_Parameters, only : CTA_OK + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvec + integer , intent(in ) :: i + integer , intent(out ) :: vals + integer , intent(out ) :: status + call CTA_Vector_GetVal( hvec, i, vals, CTA_HANDLE, status ) + end subroutine CTA_F90_Vector_GetHandle + + subroutine CTA_Vector_GetVals_integer_1d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvec + integer , intent(out ) :: vals(:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_GetVals( hvec, vals, n, CTA_INTEGER, status ) + end subroutine CTA_Vector_GetVals_integer_1d + + subroutine CTA_Vector_GetVals_integer_2d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvec + integer , intent(out ) :: vals(:,:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_GetVals( hvec, vals, n, CTA_INTEGER, status ) + end subroutine CTA_Vector_GetVals_integer_2d + + subroutine CTA_Vector_GetVals_integer_3d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvec + integer , intent(out ) :: vals(:,:,:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_GetVals( hvec, vals, n, CTA_INTEGER, status ) + end subroutine CTA_Vector_GetVals_integer_3d + + subroutine CTA_Vector_GetVals_integer_4d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvec + integer , intent(out ) :: vals(:,:,:,:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_GetVals( hvec, vals, n, CTA_INTEGER, status ) + end subroutine CTA_Vector_GetVals_integer_4d + + subroutine CTA_Vector_GetVals_integer_5d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvec + integer , intent(out ) :: vals(:,:,:,:,:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_GetVals( hvec, vals, n, CTA_INTEGER, status ) + end subroutine CTA_Vector_GetVals_integer_5d + + subroutine CTA_Vector_GetVals_integer_6d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvec + integer , intent(out ) :: vals(:,:,:,:,:,:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_GetVals( hvec, vals, n, CTA_INTEGER, status ) + end subroutine CTA_Vector_GetVals_integer_6d + + subroutine CTA_Vector_GetVals_integer_7d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvec + integer , intent(out ) :: vals(:,:,:,:,:,:,:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_GetVals( hvec, vals, n, CTA_INTEGER, status ) + end subroutine CTA_Vector_GetVals_integer_7d + + subroutine CTA_Vector_GetVals_real4_1d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvec + real(4) , intent(out ) :: vals(:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_GetVals( hvec, vals, n, CTA_REAL, status ) + end subroutine CTA_Vector_GetVals_real4_1d + + subroutine CTA_Vector_GetVals_real4_2d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvec + real(4) , intent(out ) :: vals(:,:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_GetVals( hvec, vals, n, CTA_REAL, status ) + end subroutine CTA_Vector_GetVals_real4_2d + + subroutine CTA_Vector_GetVals_real4_3d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvec + real(4) , intent(out ) :: vals(:,:,:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_GetVals( hvec, vals, n, CTA_REAL, status ) + end subroutine CTA_Vector_GetVals_real4_3d + + subroutine CTA_Vector_GetVals_real4_4d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvec + real(4) , intent(out ) :: vals(:,:,:,:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_GetVals( hvec, vals, n, CTA_REAL, status ) + end subroutine CTA_Vector_GetVals_real4_4d + + subroutine CTA_Vector_GetVals_real4_5d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvec + real(4) , intent(out ) :: vals(:,:,:,:,:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_GetVals( hvec, vals, n, CTA_REAL, status ) + end subroutine CTA_Vector_GetVals_real4_5d + + subroutine CTA_Vector_GetVals_real4_6d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvec + real(4) , intent(out ) :: vals(:,:,:,:,:,:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_GetVals( hvec, vals, n, CTA_REAL, status ) + end subroutine CTA_Vector_GetVals_real4_6d + + subroutine CTA_Vector_GetVals_real4_7d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvec + real(4) , intent(out ) :: vals(:,:,:,:,:,:,:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_GetVals( hvec, vals, n, CTA_REAL, status ) + end subroutine CTA_Vector_GetVals_real4_7d + + subroutine CTA_Vector_GetVals_real8_1d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvec + real(8) , intent(out ) :: vals(:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_GetVals( hvec, vals, n, CTA_DOUBLE, status ) + end subroutine CTA_Vector_GetVals_real8_1d + + subroutine CTA_Vector_GetVals_real8_2d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvec + real(8) , intent(out ) :: vals(:,:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_GetVals( hvec, vals, n, CTA_DOUBLE, status ) + end subroutine CTA_Vector_GetVals_real8_2d + + subroutine CTA_Vector_GetVals_real8_3d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvec + real(8) , intent(out ) :: vals(:,:,:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_GetVals( hvec, vals, n, CTA_DOUBLE, status ) + end subroutine CTA_Vector_GetVals_real8_3d + + subroutine CTA_Vector_GetVals_real8_4d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvec + real(8) , intent(out ) :: vals(:,:,:,:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_GetVals( hvec, vals, n, CTA_DOUBLE, status ) + end subroutine CTA_Vector_GetVals_real8_4d + + subroutine CTA_Vector_GetVals_real8_5d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvec + real(8) , intent(out ) :: vals(:,:,:,:,:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_GetVals( hvec, vals, n, CTA_DOUBLE, status ) + end subroutine CTA_Vector_GetVals_real8_5d + + subroutine CTA_Vector_GetVals_real8_6d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvec + real(8) , intent(out ) :: vals(:,:,:,:,:,:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_GetVals( hvec, vals, n, CTA_DOUBLE, status ) + end subroutine CTA_Vector_GetVals_real8_6d + + subroutine CTA_Vector_GetVals_real8_7d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(in ) :: hvec + real(8) , intent(out ) :: vals(:,:,:,:,:,:,:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_GetVals( hvec, vals, n, CTA_DOUBLE, status ) + end subroutine CTA_Vector_GetVals_real8_7d + + subroutine CTA_Vector_SetVal_integer( hvec, i, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + use CTA_F90_Parameters, only : CTA_OK + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec + integer , intent(in ) :: i + integer , intent(in ) :: val + integer , intent(out ) :: status + call CTA_Vector_SetVal( hvec, i, val, CTA_INTEGER, status ) + end subroutine CTA_Vector_SetVal_integer + + subroutine CTA_Vector_SetVal_real4( hvec, i, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + use CTA_F90_Parameters, only : CTA_OK + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec + integer , intent(in ) :: i + real(4) , intent(in ) :: val + integer , intent(out ) :: status + call CTA_Vector_SetVal( hvec, i, val, CTA_REAL, status ) + end subroutine CTA_Vector_SetVal_real4 + + subroutine CTA_Vector_SetVal_real8( hvec, i, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + use CTA_F90_Parameters, only : CTA_OK + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec + integer , intent(in ) :: i + real(8) , intent(in ) :: val + integer , intent(out ) :: status + call CTA_Vector_SetVal( hvec, i, val, CTA_DOUBLE, status ) + end subroutine CTA_Vector_SetVal_real8 + + subroutine CTA_Vector_SetVal_char( hvec, i, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_STRING + use CTA_F90_Parameters, only : CTA_OK + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec + integer , intent(in ) :: i + character(len=*) , intent(in ) :: val + integer , intent(out ) :: status + integer(CTA_HANDLE_IKIND) :: sval + call CTA_String_Create( sval, status ) + if (status/=CTA_OK) return + call CTA_String_Set( sval, val, status ) + if (status/=CTA_OK) return + call CTA_Vector_SetVal( hvec, i, sval, CTA_STRING, status ) + if (status/=CTA_OK) return + call CTA_String_Free( sval, status ) + if (status/=CTA_OK) return + end subroutine CTA_Vector_SetVal_char + + subroutine CTA_F90_Vector_SetHandle( hvec, i, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_HANDLE + use CTA_F90_Parameters, only : CTA_OK + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec + integer , intent(in ) :: i + integer , intent(in ) :: val + integer , intent(out ) :: status + call CTA_Vector_SetVal( hvec, i, val, CTA_HANDLE, status ) + end subroutine CTA_F90_Vector_SetHandle + + subroutine CTA_Vector_SetVals_integer_1d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec + integer , intent(in ) :: vals(:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_SetVals( hvec, vals, n, CTA_INTEGER, status ) + end subroutine CTA_Vector_SetVals_integer_1d + + subroutine CTA_Vector_SetVals_integer_2d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec + integer , intent(in ) :: vals(:,:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_SetVals( hvec, vals, n, CTA_INTEGER, status ) + end subroutine CTA_Vector_SetVals_integer_2d + + subroutine CTA_Vector_SetVals_integer_3d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec + integer , intent(in ) :: vals(:,:,:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_SetVals( hvec, vals, n, CTA_INTEGER, status ) + end subroutine CTA_Vector_SetVals_integer_3d + + subroutine CTA_Vector_SetVals_integer_4d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec + integer , intent(in ) :: vals(:,:,:,:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_SetVals( hvec, vals, n, CTA_INTEGER, status ) + end subroutine CTA_Vector_SetVals_integer_4d + + subroutine CTA_Vector_SetVals_integer_5d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec + integer , intent(in ) :: vals(:,:,:,:,:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_SetVals( hvec, vals, n, CTA_INTEGER, status ) + end subroutine CTA_Vector_SetVals_integer_5d + + subroutine CTA_Vector_SetVals_integer_6d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec + integer , intent(in ) :: vals(:,:,:,:,:,:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_SetVals( hvec, vals, n, CTA_INTEGER, status ) + end subroutine CTA_Vector_SetVals_integer_6d + + subroutine CTA_Vector_SetVals_integer_7d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec + integer , intent(in ) :: vals(:,:,:,:,:,:,:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_SetVals( hvec, vals, n, CTA_INTEGER, status ) + end subroutine CTA_Vector_SetVals_integer_7d + + subroutine CTA_Vector_SetVals_real4_1d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec + real(4) , intent(in ) :: vals(:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_SetVals( hvec, vals, n, CTA_REAL, status ) + end subroutine CTA_Vector_SetVals_real4_1d + + subroutine CTA_Vector_SetVals_real4_2d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec + real(4) , intent(in ) :: vals(:,:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_SetVals( hvec, vals, n, CTA_REAL, status ) + end subroutine CTA_Vector_SetVals_real4_2d + + subroutine CTA_Vector_SetVals_real4_3d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec + real(4) , intent(in ) :: vals(:,:,:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_SetVals( hvec, vals, n, CTA_REAL, status ) + end subroutine CTA_Vector_SetVals_real4_3d + + subroutine CTA_Vector_SetVals_real4_4d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec + real(4) , intent(in ) :: vals(:,:,:,:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_SetVals( hvec, vals, n, CTA_REAL, status ) + end subroutine CTA_Vector_SetVals_real4_4d + + subroutine CTA_Vector_SetVals_real4_5d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec + real(4) , intent(in ) :: vals(:,:,:,:,:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_SetVals( hvec, vals, n, CTA_REAL, status ) + end subroutine CTA_Vector_SetVals_real4_5d + + subroutine CTA_Vector_SetVals_real4_6d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec + real(4) , intent(in ) :: vals(:,:,:,:,:,:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_SetVals( hvec, vals, n, CTA_REAL, status ) + end subroutine CTA_Vector_SetVals_real4_6d + + subroutine CTA_Vector_SetVals_real4_7d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec + real(4) , intent(in ) :: vals(:,:,:,:,:,:,:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_SetVals( hvec, vals, n, CTA_REAL, status ) + end subroutine CTA_Vector_SetVals_real4_7d + + subroutine CTA_Vector_SetVals_real8_1d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec + real(8) , intent(in ) :: vals(:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_SetVals( hvec, vals, n, CTA_DOUBLE, status ) + end subroutine CTA_Vector_SetVals_real8_1d + + subroutine CTA_Vector_SetVals_real8_2d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec + real(8) , intent(in ) :: vals(:,:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_SetVals( hvec, vals, n, CTA_DOUBLE, status ) + end subroutine CTA_Vector_SetVals_real8_2d + + subroutine CTA_Vector_SetVals_real8_3d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec + real(8) , intent(in ) :: vals(:,:,:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_SetVals( hvec, vals, n, CTA_DOUBLE, status ) + end subroutine CTA_Vector_SetVals_real8_3d + + subroutine CTA_Vector_SetVals_real8_4d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec + real(8) , intent(in ) :: vals(:,:,:,:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_SetVals( hvec, vals, n, CTA_DOUBLE, status ) + end subroutine CTA_Vector_SetVals_real8_4d + + subroutine CTA_Vector_SetVals_real8_5d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec + real(8) , intent(in ) :: vals(:,:,:,:,:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_SetVals( hvec, vals, n, CTA_DOUBLE, status ) + end subroutine CTA_Vector_SetVals_real8_5d + + subroutine CTA_Vector_SetVals_real8_6d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec + real(8) , intent(in ) :: vals(:,:,:,:,:,:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_SetVals( hvec, vals, n, CTA_DOUBLE, status ) + end subroutine CTA_Vector_SetVals_real8_6d + + subroutine CTA_Vector_SetVals_real8_7d( hvec, vals, n, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec + real(8) , intent(in ) :: vals(:,:,:,:,:,:,:) + integer , intent(in ) :: n + integer , intent(out ) :: status + call CTA_Vector_SetVals( hvec, vals, n, CTA_DOUBLE, status ) + end subroutine CTA_Vector_SetVals_real8_7d + + subroutine CTA_Vector_SetConstant_integer( hvec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_INTEGER + use CTA_F90_Parameters, only : CTA_OK + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec + integer , intent(in ) :: val + integer , intent(out ) :: status + call CTA_Vector_SetConstant( hvec, val, CTA_INTEGER, status ) + end subroutine CTA_Vector_SetConstant_integer + + subroutine CTA_Vector_SetConstant_real4( hvec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_REAL + use CTA_F90_Parameters, only : CTA_OK + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec + real(4) , intent(in ) :: val + integer , intent(out ) :: status + call CTA_Vector_SetConstant( hvec, val, CTA_REAL, status ) + end subroutine CTA_Vector_SetConstant_real4 + + subroutine CTA_Vector_SetConstant_real8( hvec, val, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + use CTA_F90_Parameters, only : CTA_DOUBLE + use CTA_F90_Parameters, only : CTA_OK + integer(CTA_HANDLE_IKIND) , intent(inout) :: hvec + real(8) , intent(in ) :: val + integer , intent(out ) :: status + call CTA_Vector_SetConstant( hvec, val, CTA_DOUBLE, status ) + end subroutine CTA_Vector_SetConstant_real8 + +end module cta_f90_vector + diff --git a/costa/native/cta_f90/generated/cta_f90_xml.f90 b/costa/native/cta_f90/generated/cta_f90_xml.f90 new file mode 100644 index 000000000..72de01add --- /dev/null +++ b/costa/native/cta_f90/generated/cta_f90_xml.f90 @@ -0,0 +1,39 @@ +module cta_f90_xml + + implicit none + + public + + ! \brief Read a COSTA XML file into a new tree. + ! + ! \param hfname I file name of XML file to read + ! \param hroot O handle of a new COSTA tree + ! \param status O CTA_OK if successful + ! + interface CTA_F90_XML_Read + subroutine CTA_XML_Read( hfname, hroot, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hfname + integer(CTA_HANDLE_IKIND) , intent(out ) :: hroot + integer , intent(out ) :: status + end subroutine CTA_XML_Read + end interface + + ! \brief Write a tree to a COSTA XML file + ! + ! \param hfname I file name of XML file to write + ! \param hroot I handle of a COSTA tree + ! \param status O CTA_OK if successful + ! + interface CTA_F90_XML_Write + subroutine CTA_XML_Write( hfname, hroot, status ) + use CTA_F90_Parameters, only : CTA_HANDLE_IKIND + integer(CTA_HANDLE_IKIND) , intent(in ) :: hfname + integer(CTA_HANDLE_IKIND) , intent(in ) :: hroot + integer , intent(out ) :: status + end subroutine CTA_XML_Write + end interface + + +end module cta_f90_xml + diff --git a/costa/native/cta_f90/include/cta_f77.inc b/costa/native/cta_f90/include/cta_f77.inc new file mode 100644 index 000000000..8f6764131 --- /dev/null +++ b/costa/native/cta_f90/include/cta_f77.inc @@ -0,0 +1,247 @@ +! +! COSTA: Problem solving environment for data assimilation +! Copyright (C) 2005 Nils van Velzen +! +! This library is free software; you can redistribute it and/or +! modify it under the terms of the GNU Lesser General Public +! License as published by the Free Software Foundation; either +! version 2.1 of the License, or (at your option) any later version. +! +! This library is distributed in the hope that it will be useful, +! but WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +! Lesser General Public License for more details. +! +! You should have received a copy of the GNU Lesser General Public +! License along with this library; if not, write to the Free Software +! Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +! + +! FORTRAN file unit for standard output + integer FCTA_STDOUT + parameter (FCTA_STDOUT = 6) + +! handle to a COSTA object + integer CTA_HANDLE + parameter (CTA_HANDLE=-1) +! A handle of a COSTA interface object + integer CTA_INTERFACE + parameter (CTA_INTERFACE=-2) +! A handle of a COSTA function object + integer CTA_FUNCTION + parameter (CTA_FUNCTION=-3) +! A handle of a COSTA vector object + integer CTA_VECTOR + parameter (CTA_VECTOR=-4) +! A handle of a COSTA Vector class + integer CTA_VECTORCLASS + parameter (CTA_VECTORCLASS=-5) +! A handle of a COSTA (sub) state + integer CTA_STATE + parameter (CTA_STATE=-6) +!/*! A handle of a COSTA Matrix class */ + integer CTA_MATRIXCLASS + parameter (CTA_MATRIXCLASS=-7) +!/*! A handle of a COSTA Matrix */ + integer CTA_MATRIX + parameter (CTA_MATRIX=-8) +!/*! A COSTA model class */ + integer CTA_MODELCLASS + parameter (CTA_MODELCLASS=-16) + + integer CTA_VOID + parameter (CTA_VOID =-100) + integer CTA_INTEGER + parameter (CTA_INTEGER =-101) + integer CTA_REAL + parameter (CTA_REAL =-102) + integer CTA_DOUBLE + parameter (CTA_DOUBLE =-103) + integer CTA_FSTRING + parameter (CTA_FSTRING =-104) + integer CTA_CSTRING + parameter (CTA_CSTRING =-105) + integer CTA_STRING + parameter (CTA_STRING =-28) + integer CTA_TIME + parameter (CTA_TIME = -15) + integer CTA_ARRAY + parameter (CTA_ARRAY = -30) + + integer CTA_1DINTEGER + parameter (CTA_1DINTEGER =-201) + integer CTA_1DREAL + parameter (CTA_1DREAL =-202) + integer CTA_1DDOUBLE + parameter (CTA_1DDOUBLE =-203) + integer CTA_1DFSTRING + parameter (CTA_1DFSTRING =-204) + integer CTA_1DCSTRING + parameter (CTA_1DCSTRING =-205) + +! Common errors + integer CTA_OK + parameter (CTA_OK=0) + integer CTA_ARRAY_TOO_SHORT + parameter (CTA_ARRAY_TOO_SHORT=10) + integer CTA_ILLEGAL_DATATYPE + parameter (CTA_ILLEGAL_DATATYPE=11) + integer CTA_DIMENSION_ERROR + parameter (CTA_DIMENSION_ERROR=12) + integer CTA_INCOMPATIBLE_VECTORS + parameter (CTA_INCOMPATIBLE_VECTORS=13) + integer CTA_CONCAT_NOT_POSSIBLE + parameter (CTA_CONCAT_NOT_POSSIBLE=14) + integer CTA_SETVAL_NOT_POSSIBLE + parameter (CTA_SETVAL_NOT_POSSIBLE=15) + integer CTA_ITEM_NOT_FOUND + parameter (CTA_ITEM_NOT_FOUND=16) + integer CTA_UNINITIALISED_SUBSTATES + parameter (CTA_UNINITIALISED_SUBSTATES=17) + integer CTA_STATES_NOT_COMPATIBLE + parameter (CTA_STATES_NOT_COMPATIBLE =18) + integer CTA_INCOMPATIBLE_MATRICES + parameter (CTA_INCOMPATIBLE_MATRICES =19) + integer CTA_NOT_IMPLEMENTED + parameter (CTA_NOT_IMPLEMENTED =20) + +! Common Constants + integer CTA_DEFAULT + parameter (CTA_DEFAULT=0) + integer CTA_NULL + parameter (CTA_NULL=0) + integer CTA_TRUE + parameter(CTA_TRUE=1) + integer CTA_FALSE + parameter(CTA_FALSE=0) + + integer CTA_STRLEN_NAME, CTA_STRLEN_TAG + parameter (CTA_STRLEN_NAME=80, CTA_STRLEN_TAG=80) + + integer CTA_ASSIMOBS, CTA_VALIDATEOBS, CTA_ALLOBS + parameter(CTA_ASSIMOBS=1, CTA_VALIDATEOBS=2, CTA_ALLOBS=3) + +! parameters from cta_pack.h + +! Reset pack/unpack pointer of pack object + integer, parameter ::CTA_PACK_RESET=-1 + +! parameters from cta_model.h + + integer, parameter ::FCTA_MODEL_CREATE_SIZE =1 + integer, parameter ::FCTA_MODEL_CREATE_INIT =2 + integer, parameter ::FCTA_MODEL_FREE =3 + integer, parameter ::FCTA_MODEL_COMPUTE =4 + integer, parameter ::FCTA_MODEL_SET_STATE =5 + integer, parameter ::FCTA_MODEL_GET_STATE =6 + integer, parameter ::FCTA_MODEL_AXPY_STATE =7 + integer, parameter ::FCTA_MODEL_AXPY_MODEL =8 + integer, parameter ::FCTA_MODEL_SET_FORC =9 + integer, parameter ::FCTA_MODEL_GET_FORC =10 + integer, parameter ::FCTA_MODEL_AXPY_FORC =11 + integer, parameter ::FCTA_MODEL_SET_PARAM =12 + integer, parameter ::FCTA_MODEL_GET_PARAM =13 + integer, parameter ::FCTA_MODEL_AXPY_PARAM =14 + integer, parameter ::FCTA_MODEL_GET_STATESCALING =15 + integer, parameter ::FCTA_MODEL_GET_TIMEHORIZON =16 + integer, parameter ::FCTA_MODEL_GET_CURRENTTIME =17 + + + integer, parameter ::FCTA_MODEL_GET_NOISE_COUNT =18 + integer, parameter ::FCTA_MODEL_GET_NOISE_COVAR =19 + integer, parameter ::FCTA_MODEL_GET_OBSVALUES =20 + integer, parameter ::FCTA_MODEL_GET_OBSSELECT =21 + integer, parameter ::FCTA_MODEL_ANNOUNCE_OBSVALUES =22 + integer, parameter ::FCTA_MODEL_ADD_NOISE =23 + integer, parameter ::FCTA_MODEL_EXPORT =24 + integer, parameter ::FCTA_MODEL_IMPORT =25 + integer, parameter ::FCTA_MODEL_ADJ_SET_FORC =26 + integer, parameter ::FCTA_MODEL_ADJ_COMPUTE =27 + integer, parameter ::FCTA_MODEL_ADJ_PREPARE =28 + integer, parameter ::FCTA_MODEL_NUMFUNC =28 + + integer, parameter ::FCTA_MODEL_GETNUMDOMAINS =30 + integer, parameter ::FCTA_MODEL_GETOBSSELECTOR =31 + integer, parameter ::FCTA_MODEL_GETOBSLOCALIZATIONDOMAIN =32 + integer, parameter ::FCTA_MODEL_GETSTATEDOMAIN =33 + integer, parameter ::FCTA_MODEL_AXPYSTATEDOMAIN =34 + + + + + +! Observation description parameters + integer, parameter ::FCTA_OBSDESCR_CREATE_SIZE = 1+1 + integer, parameter ::FCTA_OBSDESCR_CREATE_INIT = 2+1 + integer, parameter ::FCTA_OBSDESCR_FREE = 3+1 + integer, parameter ::FCTA_OBSDESCR_GET_PROPERTIES = 4+1 + integer, parameter ::FCTA_OBSDESCR_GET_KEYS = 5+1 + integer, parameter ::FCTA_OBSDESCR_COUNT_OBSERVATIONS = 6+1 + integer, parameter ::FCTA_OBSDESCR_COUNT_PROPERTIES = 7+1 + integer, parameter ::FCTA_OBSDESCR_EXPORT = 8+1 + integer, parameter ::FCTA_OBSDESCR_SELECTION = 9+1 + integer, parameter ::FCTA_OBSDESCR_NUMFUNC = 10+1 + + integer, parameter ::FCTA_SOBS_CREATE_SIZE = 1+1 + integer, parameter ::FCTA_SOBS_CREATE_INIT = 2+1 + integer, parameter ::FCTA_SOBS_FREE = 3+1 + integer, parameter ::FCTA_SOBS_CREATE_SELECTION = 4+1 + integer, parameter ::FCTA_SOBS_COUNT = 5+1 + integer, parameter ::FCTA_SOBS_GET_OBS_DESCRIPTION= 6+1 + integer, parameter ::FCTA_SOBS_GET_VALUES = 7+1 + integer, parameter ::FCTA_SOBS_GET_REALISATION = 8+1 + integer, parameter ::FCTA_SOBS_GET_EXPECTATION = 9+1 + integer, parameter ::FCTA_SOBS_EVALUATE_PDF = 10+1 + integer, parameter ::FCTA_SOBS_GET_COV_MATRIX = 11+1 + integer, parameter ::FCTA_SOBS_GET_VARIANCE = 12+1 + integer, parameter ::FCTA_SOBS_EXPORT = 13+1 + integer, parameter ::FCTA_SOBS_GET_TIMES = 14+1 + integer, parameter ::FCTA_SOBS_NUMFUNC = 15+1 + + +! Common block holding "constants" that are set at initialisation + +!NOT HAPPY BUT HAVE SOME PROBLEMS WITH LOADING OF SO-FILES UNDER LINUX +!WHERE WE HAVE 2 INSTANCES OF COMMON BLOCK JUST SET VALUES!!!! + + integer, parameter :: CTA_MODBUILD_PAR = 0 + integer, parameter :: CTA_FILE_STDOUT = 1 + integer, parameter :: CTA_DEFAULT_SOBS = 23 + integer, parameter :: CTA_COMBINE_SOBS = 45 + integer, parameter :: CTA_NETCDF_SOBS = 67 + integer, parameter :: CTA_MAORI_SOBS = -173 + integer, parameter :: CTA_OBSDESCR_TABLE = 77 + integer, parameter :: CTA_DEFAULT_VECTOR = 100 + integer, parameter :: CTA_DEFAULT_MATRIX = 116 + integer, parameter :: CTA_MODBUILD_SP = 147 + integer, parameter :: CTA_MODELCOMBINER = 0 + integer, parameter :: CTA_MODBUILD_B3B = 0 + integer, parameter :: CTA_MODBUILD_BB = 0 + integer, parameter :: CTA_OP_ROOT_RMS = 148 + integer, parameter :: CTA_OP_ROOT_AMAX = 149 + integer, parameter :: CTA_OP_ROOT_PRINTI = 150 + integer, parameter :: CTA_OP_ROOT_SSQ = 151 + +! external CTA_DEFAULT_VECTOR, CTA_DEFAULT_MATRIX, & +! & CTA_DEFAULT_SOBS, CTA_MODBUILD_SP, & +! & CTA_MODBUILD_PAR, CTA_OBSDESCR_TABLE, & +! & CTA_FILE_STDOUT, CTA_MODELCOMBINER, & +! & CTA_MODBUILD_B3B + + + +!!!DEC$ IF DEFINED(CTALIB) +!!!DEC$ ATTRIBUTES DLLEXPORT:: /ctaf77/ +!!!DEC$ ELSE +!!!DEC$ ATTRIBUTES DLLIMPORT:: /ctaf77/ +!!!DEC$ ENDIF +!! common /ctaf77/ CTA_DEFAULT_VECTOR, CTA_DEFAULT_MATRIX, & +!! & CTA_DEFAULT_SOBS, CTA_MODBUILD_SP, & +!! & CTA_MODBUILD_PAR, CTA_OBSDESCR_TABLE, & +!! & CTA_FILE_STDOUT, CTA_MODELCOMBINER, & +!! & CTA_MODBUILD_B3B + + + + + diff --git a/costa/native/cta_f90/include/cta_f90.inc b/costa/native/cta_f90/include/cta_f90.inc new file mode 100644 index 000000000..daa7d8726 --- /dev/null +++ b/costa/native/cta_f90/include/cta_f90.inc @@ -0,0 +1,19 @@ +! +! COSTA: Problem solving environment for data assimilation +! Copyright (C) 2005 Nils van Velzen +! +! This library is free software; you can redistribute it and/or +! modify it under the terms of the GNU Lesser General Public +! License as published by the Free Software Foundation; either +! version 2.1 of the License, or (at your option) any later version. +! +! This library is distributed in the hope that it will be useful, +! but WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +! Lesser General Public License for more details. +! +! You should have received a copy of the GNU Lesser General Public +! License along with this library; if not, write to the Free Software +! Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +! +include 'cta_f77.inc' diff --git a/costa/native/cta_f90/include/cta_f90_contains.inc b/costa/native/cta_f90/include/cta_f90_contains.inc new file mode 100644 index 000000000..5805d90f9 --- /dev/null +++ b/costa/native/cta_f90/include/cta_f90_contains.inc @@ -0,0 +1,42 @@ + ! return error message given error code + + function CTA_F90_StrError( ierr ) + + use CTA_F90_Parameters, only : CTA_OK + use CTA_F90_Parameters, only : CTA_ARRAY_TOO_SHORT + use CTA_F90_Parameters, only : CTA_ILLEGAL_DATATYPE + use CTA_F90_Parameters, only : CTA_DIMENSION_ERROR + use CTA_F90_Parameters, only : CTA_INCOMPATIBLE_VECTORS + use CTA_F90_Parameters, only : CTA_CONCAT_NOT_POSSIBLE + use CTA_F90_Parameters, only : CTA_SETVAL_NOT_POSSIBLE + use CTA_F90_Parameters, only : CTA_ITEM_NOT_FOUND + use CTA_F90_Parameters, only : CTA_UNINITIALISED_SUBSTATES + use CTA_F90_Parameters, only : CTA_STATES_NOT_COMPATIBLE + use CTA_F90_Parameters, only : CTA_INCOMPATIBLE_MATRICES + use CTA_F90_Parameters, only : CTA_NOT_IMPLEMENTED + + ! --- in/out --------------------------------- + + character(len=80) :: CTA_F90_StrError + integer, intent(in) :: ierr + + ! --- begin ---------------------------------- + + select case ( ierr ) + case ( CTA_OK ) ! ok, no message + case ( CTA_ARRAY_TOO_SHORT ) ; CTA_F90_StrError = 'CTA - ERROR - Array too short' + case ( CTA_ILLEGAL_DATATYPE ) ; CTA_F90_StrError = 'CTA - ERROR - Illegal datatype' + case ( CTA_DIMENSION_ERROR ) ; CTA_F90_StrError = 'CTA - ERROR - Dimension error' + case ( CTA_INCOMPATIBLE_VECTORS ) ; CTA_F90_StrError = 'CTA - ERROR - Incompatible vectors' + case ( CTA_CONCAT_NOT_POSSIBLE ) ; CTA_F90_StrError = 'CTA - ERROR - Concat not possible' + case ( CTA_SETVAL_NOT_POSSIBLE ) ; CTA_F90_StrError = 'CTA - ERROR - Setval not possible' + case ( CTA_ITEM_NOT_FOUND ) ; CTA_F90_StrError = 'CTA - ERROR - Item not found' + case ( CTA_UNINITIALISED_SUBSTATES ) ; CTA_F90_StrError = 'CTA - ERROR - Uninitialised substates' + case ( CTA_STATES_NOT_COMPATIBLE ) ; CTA_F90_StrError = 'CTA - ERROR - States not compatible' + case ( CTA_INCOMPATIBLE_MATRICES ) ; CTA_F90_StrError = 'CTA - ERROR - Incompatible matrices' + case ( CTA_NOT_IMPLEMENTED ) ; CTA_F90_StrError = 'CTA - ERROR - Not implemented' + case default + write (CTA_F90_StrError,'("CTA - ERROR - Unknown error code: ",i8)') ierr + end select + + end function CTA_F90_StrError diff --git a/costa/native/external/blas/caxpy.f b/costa/native/external/blas/caxpy.f new file mode 100644 index 000000000..7037c5a54 --- /dev/null +++ b/costa/native/external/blas/caxpy.f @@ -0,0 +1,34 @@ + subroutine caxpy(n,ca,cx,incx,cy,incy) +c +c constant times a vector plus a vector. +c jack dongarra, linpack, 3/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + complex cx(*),cy(*),ca + integer i,incx,incy,ix,iy,n +c + if(n.le.0)return + if (abs(real(ca)) + abs(aimag(ca)) .eq. 0.0 ) return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments +c not equal to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + cy(iy) = cy(iy) + ca*cx(ix) + ix = ix + incx + iy = iy + incy + 10 continue + return +c +c code for both increments equal to 1 +c + 20 do 30 i = 1,n + cy(i) = cy(i) + ca*cx(i) + 30 continue + return + end diff --git a/costa/native/external/blas/ccopy.f b/costa/native/external/blas/ccopy.f new file mode 100644 index 000000000..61d5267e5 --- /dev/null +++ b/costa/native/external/blas/ccopy.f @@ -0,0 +1,33 @@ + subroutine ccopy(n,cx,incx,cy,incy) +c +c copies a vector, x, to a vector, y. +c jack dongarra, linpack, 3/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + complex cx(*),cy(*) + integer i,incx,incy,ix,iy,n +c + if(n.le.0)return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments +c not equal to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + cy(iy) = cx(ix) + ix = ix + incx + iy = iy + incy + 10 continue + return +c +c code for both increments equal to 1 +c + 20 do 30 i = 1,n + cy(i) = cx(i) + 30 continue + return + end diff --git a/costa/native/external/blas/cdotc.f b/costa/native/external/blas/cdotc.f new file mode 100644 index 000000000..1d5890596 --- /dev/null +++ b/costa/native/external/blas/cdotc.f @@ -0,0 +1,38 @@ + complex function cdotc(n,cx,incx,cy,incy) +c +c forms the dot product of two vectors, conjugating the first +c vector. +c jack dongarra, linpack, 3/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + complex cx(*),cy(*),ctemp + integer i,incx,incy,ix,iy,n +c + ctemp = (0.0,0.0) + cdotc = (0.0,0.0) + if(n.le.0)return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments +c not equal to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + ctemp = ctemp + conjg(cx(ix))*cy(iy) + ix = ix + incx + iy = iy + incy + 10 continue + cdotc = ctemp + return +c +c code for both increments equal to 1 +c + 20 do 30 i = 1,n + ctemp = ctemp + conjg(cx(i))*cy(i) + 30 continue + cdotc = ctemp + return + end diff --git a/costa/native/external/blas/cdotu.f b/costa/native/external/blas/cdotu.f new file mode 100644 index 000000000..d88cea45e --- /dev/null +++ b/costa/native/external/blas/cdotu.f @@ -0,0 +1,37 @@ + complex function cdotu(n,cx,incx,cy,incy) +c +c forms the dot product of two vectors. +c jack dongarra, linpack, 3/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + complex cx(*),cy(*),ctemp + integer i,incx,incy,ix,iy,n +c + ctemp = (0.0,0.0) + cdotu = (0.0,0.0) + if(n.le.0)return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments +c not equal to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + ctemp = ctemp + cx(ix)*cy(iy) + ix = ix + incx + iy = iy + incy + 10 continue + cdotu = ctemp + return +c +c code for both increments equal to 1 +c + 20 do 30 i = 1,n + ctemp = ctemp + cx(i)*cy(i) + 30 continue + cdotu = ctemp + return + end diff --git a/costa/native/external/blas/cgbmv.f b/costa/native/external/blas/cgbmv.f new file mode 100644 index 000000000..5b559c12d --- /dev/null +++ b/costa/native/external/blas/cgbmv.f @@ -0,0 +1,322 @@ + SUBROUTINE CGBMV ( TRANS, M, N, KL, KU, ALPHA, A, LDA, X, INCX, + $ BETA, Y, INCY ) +* .. Scalar Arguments .. + COMPLEX ALPHA, BETA + INTEGER INCX, INCY, KL, KU, LDA, M, N + CHARACTER*1 TRANS +* .. Array Arguments .. + COMPLEX A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* CGBMV performs one of the matrix-vector operations +* +* y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, or +* +* y := alpha*conjg( A' )*x + beta*y, +* +* where alpha and beta are scalars, x and y are vectors and A is an +* m by n band matrix, with kl sub-diagonals and ku super-diagonals. +* +* Parameters +* ========== +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' y := alpha*A*x + beta*y. +* +* TRANS = 'T' or 't' y := alpha*A'*x + beta*y. +* +* TRANS = 'C' or 'c' y := alpha*conjg( A' )*x + beta*y. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix A. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* KL - INTEGER. +* On entry, KL specifies the number of sub-diagonals of the +* matrix A. KL must satisfy 0 .le. KL. +* Unchanged on exit. +* +* KU - INTEGER. +* On entry, KU specifies the number of super-diagonals of the +* matrix A. KU must satisfy 0 .le. KU. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, n ). +* Before entry, the leading ( kl + ku + 1 ) by n part of the +* array A must contain the matrix of coefficients, supplied +* column by column, with the leading diagonal of the matrix in +* row ( ku + 1 ) of the array, the first super-diagonal +* starting at position 2 in row ku, the first sub-diagonal +* starting at position 1 in row ( ku + 2 ), and so on. +* Elements in the array A that do not correspond to elements +* in the band matrix (such as the top left ku by ku triangle) +* are not referenced. +* The following program segment will transfer a band matrix +* from conventional full matrix storage to band storage: +* +* DO 20, J = 1, N +* K = KU + 1 - J +* DO 10, I = MAX( 1, J - KU ), MIN( M, J + KL ) +* A( K + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* ( kl + ku + 1 ). +* Unchanged on exit. +* +* X - COMPLEX array of DIMENSION at least +* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' +* and at least +* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. +* Before entry, the incremented array X must contain the +* vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - COMPLEX . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then Y need not be set on input. +* Unchanged on exit. +* +* Y - COMPLEX array of DIMENSION at least +* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' +* and at least +* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. +* Before entry, the incremented array Y must contain the +* vector y. On exit, Y is overwritten by the updated vector y. +* +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. Local Scalars .. + COMPLEX TEMP + INTEGER I, INFO, IX, IY, J, JX, JY, K, KUP1, KX, KY, + $ LENX, LENY + LOGICAL NOCONJ +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 1 + ELSE IF( M.LT.0 )THEN + INFO = 2 + ELSE IF( N.LT.0 )THEN + INFO = 3 + ELSE IF( KL.LT.0 )THEN + INFO = 4 + ELSE IF( KU.LT.0 )THEN + INFO = 5 + ELSE IF( LDA.LT.( KL + KU + 1 ) )THEN + INFO = 8 + ELSE IF( INCX.EQ.0 )THEN + INFO = 10 + ELSE IF( INCY.EQ.0 )THEN + INFO = 13 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CGBMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* + NOCONJ = LSAME( TRANS, 'T' ) +* +* Set LENX and LENY, the lengths of the vectors x and y, and set +* up the start points in X and Y. +* + IF( LSAME( TRANS, 'N' ) )THEN + LENX = N + LENY = M + ELSE + LENX = M + LENY = N + END IF + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( LENX - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( LENY - 1 )*INCY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the band part of A. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE )THEN + IF( INCY.EQ.1 )THEN + IF( BETA.EQ.ZERO )THEN + DO 10, I = 1, LENY + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20, I = 1, LENY + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO )THEN + DO 30, I = 1, LENY + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40, I = 1, LENY + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + KUP1 = KU + 1 + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form y := alpha*A*x + y. +* + JX = KX + IF( INCY.EQ.1 )THEN + DO 60, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + K = KUP1 - J + DO 50, I = MAX( 1, J - KU ), MIN( M, J + KL ) + Y( I ) = Y( I ) + TEMP*A( K + I, J ) + 50 CONTINUE + END IF + JX = JX + INCX + 60 CONTINUE + ELSE + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + IY = KY + K = KUP1 - J + DO 70, I = MAX( 1, J - KU ), MIN( M, J + KL ) + Y( IY ) = Y( IY ) + TEMP*A( K + I, J ) + IY = IY + INCY + 70 CONTINUE + END IF + JX = JX + INCX + IF( J.GT.KU ) + $ KY = KY + INCY + 80 CONTINUE + END IF + ELSE +* +* Form y := alpha*A'*x + y or y := alpha*conjg( A' )*x + y. +* + JY = KY + IF( INCX.EQ.1 )THEN + DO 110, J = 1, N + TEMP = ZERO + K = KUP1 - J + IF( NOCONJ )THEN + DO 90, I = MAX( 1, J - KU ), MIN( M, J + KL ) + TEMP = TEMP + A( K + I, J )*X( I ) + 90 CONTINUE + ELSE + DO 100, I = MAX( 1, J - KU ), MIN( M, J + KL ) + TEMP = TEMP + CONJG( A( K + I, J ) )*X( I ) + 100 CONTINUE + END IF + Y( JY ) = Y( JY ) + ALPHA*TEMP + JY = JY + INCY + 110 CONTINUE + ELSE + DO 140, J = 1, N + TEMP = ZERO + IX = KX + K = KUP1 - J + IF( NOCONJ )THEN + DO 120, I = MAX( 1, J - KU ), MIN( M, J + KL ) + TEMP = TEMP + A( K + I, J )*X( IX ) + IX = IX + INCX + 120 CONTINUE + ELSE + DO 130, I = MAX( 1, J - KU ), MIN( M, J + KL ) + TEMP = TEMP + CONJG( A( K + I, J ) )*X( IX ) + IX = IX + INCX + 130 CONTINUE + END IF + Y( JY ) = Y( JY ) + ALPHA*TEMP + JY = JY + INCY + IF( J.GT.KU ) + $ KX = KX + INCX + 140 CONTINUE + END IF + END IF +* + RETURN +* +* End of CGBMV . +* + END diff --git a/costa/native/external/blas/cgemm.f b/costa/native/external/blas/cgemm.f new file mode 100644 index 000000000..14ebdc072 --- /dev/null +++ b/costa/native/external/blas/cgemm.f @@ -0,0 +1,414 @@ + SUBROUTINE CGEMM ( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, + $ BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER*1 TRANSA, TRANSB + INTEGER M, N, K, LDA, LDB, LDC + COMPLEX ALPHA, BETA +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* CGEMM performs one of the matrix-matrix operations +* +* C := alpha*op( A )*op( B ) + beta*C, +* +* where op( X ) is one of +* +* op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ), +* +* alpha and beta are scalars, and A, B and C are matrices, with op( A ) +* an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. +* +* Parameters +* ========== +* +* TRANSA - CHARACTER*1. +* On entry, TRANSA specifies the form of op( A ) to be used in +* the matrix multiplication as follows: +* +* TRANSA = 'N' or 'n', op( A ) = A. +* +* TRANSA = 'T' or 't', op( A ) = A'. +* +* TRANSA = 'C' or 'c', op( A ) = conjg( A' ). +* +* Unchanged on exit. +* +* TRANSB - CHARACTER*1. +* On entry, TRANSB specifies the form of op( B ) to be used in +* the matrix multiplication as follows: +* +* TRANSB = 'N' or 'n', op( B ) = B. +* +* TRANSB = 'T' or 't', op( B ) = B'. +* +* TRANSB = 'C' or 'c', op( B ) = conjg( B' ). +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix +* op( A ) and of the matrix C. M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix +* op( B ) and the number of columns of the matrix C. N must be +* at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry, K specifies the number of columns of the matrix +* op( A ) and the number of rows of the matrix op( B ). K must +* be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, ka ), where ka is +* k when TRANSA = 'N' or 'n', and is m otherwise. +* Before entry with TRANSA = 'N' or 'n', the leading m by k +* part of the array A must contain the matrix A, otherwise +* the leading k by m part of the array A must contain the +* matrix A. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When TRANSA = 'N' or 'n' then +* LDA must be at least max( 1, m ), otherwise LDA must be at +* least max( 1, k ). +* Unchanged on exit. +* +* B - COMPLEX array of DIMENSION ( LDB, kb ), where kb is +* n when TRANSB = 'N' or 'n', and is k otherwise. +* Before entry with TRANSB = 'N' or 'n', the leading k by n +* part of the array B must contain the matrix B, otherwise +* the leading n by k part of the array B must contain the +* matrix B. +* Unchanged on exit. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. When TRANSB = 'N' or 'n' then +* LDB must be at least max( 1, k ), otherwise LDB must be at +* least max( 1, n ). +* Unchanged on exit. +* +* BETA - COMPLEX . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then C need not be set on input. +* Unchanged on exit. +* +* C - COMPLEX array of DIMENSION ( LDC, n ). +* Before entry, the leading m by n part of the array C must +* contain the matrix C, except when beta is zero, in which +* case C need not be set on entry. +* On exit, the array C is overwritten by the m by n matrix +* ( alpha*op( A )*op( B ) + beta*C ). +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX +* .. Local Scalars .. + LOGICAL CONJA, CONJB, NOTA, NOTB + INTEGER I, INFO, J, L, NCOLA, NROWA, NROWB + COMPLEX TEMP +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Executable Statements .. +* +* Set NOTA and NOTB as true if A and B respectively are not +* conjugated or transposed, set CONJA and CONJB as true if A and +* B respectively are to be transposed but not conjugated and set +* NROWA, NCOLA and NROWB as the number of rows and columns of A +* and the number of rows of B respectively. +* + NOTA = LSAME( TRANSA, 'N' ) + NOTB = LSAME( TRANSB, 'N' ) + CONJA = LSAME( TRANSA, 'C' ) + CONJB = LSAME( TRANSB, 'C' ) + IF( NOTA )THEN + NROWA = M + NCOLA = K + ELSE + NROWA = K + NCOLA = M + END IF + IF( NOTB )THEN + NROWB = K + ELSE + NROWB = N + END IF +* +* Test the input parameters. +* + INFO = 0 + IF( ( .NOT.NOTA ).AND. + $ ( .NOT.CONJA ).AND. + $ ( .NOT.LSAME( TRANSA, 'T' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.NOTB ).AND. + $ ( .NOT.CONJB ).AND. + $ ( .NOT.LSAME( TRANSB, 'T' ) ) )THEN + INFO = 2 + ELSE IF( M .LT.0 )THEN + INFO = 3 + ELSE IF( N .LT.0 )THEN + INFO = 4 + ELSE IF( K .LT.0 )THEN + INFO = 5 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 8 + ELSE IF( LDB.LT.MAX( 1, NROWB ) )THEN + INFO = 10 + ELSE IF( LDC.LT.MAX( 1, M ) )THEN + INFO = 13 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CGEMM ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + IF( BETA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, M + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40, J = 1, N + DO 30, I = 1, M + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + RETURN + END IF +* +* Start the operations. +* + IF( NOTB )THEN + IF( NOTA )THEN +* +* Form C := alpha*A*B + beta*C. +* + DO 90, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 50, I = 1, M + C( I, J ) = ZERO + 50 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 60, I = 1, M + C( I, J ) = BETA*C( I, J ) + 60 CONTINUE + END IF + DO 80, L = 1, K + IF( B( L, J ).NE.ZERO )THEN + TEMP = ALPHA*B( L, J ) + DO 70, I = 1, M + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 70 CONTINUE + END IF + 80 CONTINUE + 90 CONTINUE + ELSE IF( CONJA )THEN +* +* Form C := alpha*conjg( A' )*B + beta*C. +* + DO 120, J = 1, N + DO 110, I = 1, M + TEMP = ZERO + DO 100, L = 1, K + TEMP = TEMP + CONJG( A( L, I ) )*B( L, J ) + 100 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 110 CONTINUE + 120 CONTINUE + ELSE +* +* Form C := alpha*A'*B + beta*C +* + DO 150, J = 1, N + DO 140, I = 1, M + TEMP = ZERO + DO 130, L = 1, K + TEMP = TEMP + A( L, I )*B( L, J ) + 130 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 140 CONTINUE + 150 CONTINUE + END IF + ELSE IF( NOTA )THEN + IF( CONJB )THEN +* +* Form C := alpha*A*conjg( B' ) + beta*C. +* + DO 200, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 160, I = 1, M + C( I, J ) = ZERO + 160 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 170, I = 1, M + C( I, J ) = BETA*C( I, J ) + 170 CONTINUE + END IF + DO 190, L = 1, K + IF( B( J, L ).NE.ZERO )THEN + TEMP = ALPHA*CONJG( B( J, L ) ) + DO 180, I = 1, M + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 180 CONTINUE + END IF + 190 CONTINUE + 200 CONTINUE + ELSE +* +* Form C := alpha*A*B' + beta*C +* + DO 250, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 210, I = 1, M + C( I, J ) = ZERO + 210 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 220, I = 1, M + C( I, J ) = BETA*C( I, J ) + 220 CONTINUE + END IF + DO 240, L = 1, K + IF( B( J, L ).NE.ZERO )THEN + TEMP = ALPHA*B( J, L ) + DO 230, I = 1, M + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 230 CONTINUE + END IF + 240 CONTINUE + 250 CONTINUE + END IF + ELSE IF( CONJA )THEN + IF( CONJB )THEN +* +* Form C := alpha*conjg( A' )*conjg( B' ) + beta*C. +* + DO 280, J = 1, N + DO 270, I = 1, M + TEMP = ZERO + DO 260, L = 1, K + TEMP = TEMP + CONJG( A( L, I ) )*CONJG( B( J, L ) ) + 260 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 270 CONTINUE + 280 CONTINUE + ELSE +* +* Form C := alpha*conjg( A' )*B' + beta*C +* + DO 310, J = 1, N + DO 300, I = 1, M + TEMP = ZERO + DO 290, L = 1, K + TEMP = TEMP + CONJG( A( L, I ) )*B( J, L ) + 290 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 300 CONTINUE + 310 CONTINUE + END IF + ELSE + IF( CONJB )THEN +* +* Form C := alpha*A'*conjg( B' ) + beta*C +* + DO 340, J = 1, N + DO 330, I = 1, M + TEMP = ZERO + DO 320, L = 1, K + TEMP = TEMP + A( L, I )*CONJG( B( J, L ) ) + 320 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 330 CONTINUE + 340 CONTINUE + ELSE +* +* Form C := alpha*A'*B' + beta*C +* + DO 370, J = 1, N + DO 360, I = 1, M + TEMP = ZERO + DO 350, L = 1, K + TEMP = TEMP + A( L, I )*B( J, L ) + 350 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 360 CONTINUE + 370 CONTINUE + END IF + END IF +* + RETURN +* +* End of CGEMM . +* + END diff --git a/costa/native/external/blas/cgemv.f b/costa/native/external/blas/cgemv.f new file mode 100644 index 000000000..04872d8d9 --- /dev/null +++ b/costa/native/external/blas/cgemv.f @@ -0,0 +1,281 @@ + SUBROUTINE CGEMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX, + $ BETA, Y, INCY ) +* .. Scalar Arguments .. + COMPLEX ALPHA, BETA + INTEGER INCX, INCY, LDA, M, N + CHARACTER*1 TRANS +* .. Array Arguments .. + COMPLEX A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* CGEMV performs one of the matrix-vector operations +* +* y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, or +* +* y := alpha*conjg( A' )*x + beta*y, +* +* where alpha and beta are scalars, x and y are vectors and A is an +* m by n matrix. +* +* Parameters +* ========== +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' y := alpha*A*x + beta*y. +* +* TRANS = 'T' or 't' y := alpha*A'*x + beta*y. +* +* TRANS = 'C' or 'c' y := alpha*conjg( A' )*x + beta*y. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix A. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, n ). +* Before entry, the leading m by n part of the array A must +* contain the matrix of coefficients. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, m ). +* Unchanged on exit. +* +* X - COMPLEX array of DIMENSION at least +* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' +* and at least +* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. +* Before entry, the incremented array X must contain the +* vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - COMPLEX . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then Y need not be set on input. +* Unchanged on exit. +* +* Y - COMPLEX array of DIMENSION at least +* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' +* and at least +* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. +* Before entry with BETA non-zero, the incremented array Y +* must contain the vector y. On exit, Y is overwritten by the +* updated vector y. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. Local Scalars .. + COMPLEX TEMP + INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY, LENX, LENY + LOGICAL NOCONJ +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 1 + ELSE IF( M.LT.0 )THEN + INFO = 2 + ELSE IF( N.LT.0 )THEN + INFO = 3 + ELSE IF( LDA.LT.MAX( 1, M ) )THEN + INFO = 6 + ELSE IF( INCX.EQ.0 )THEN + INFO = 8 + ELSE IF( INCY.EQ.0 )THEN + INFO = 11 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CGEMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* + NOCONJ = LSAME( TRANS, 'T' ) +* +* Set LENX and LENY, the lengths of the vectors x and y, and set +* up the start points in X and Y. +* + IF( LSAME( TRANS, 'N' ) )THEN + LENX = N + LENY = M + ELSE + LENX = M + LENY = N + END IF + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( LENX - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( LENY - 1 )*INCY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE )THEN + IF( INCY.EQ.1 )THEN + IF( BETA.EQ.ZERO )THEN + DO 10, I = 1, LENY + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20, I = 1, LENY + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO )THEN + DO 30, I = 1, LENY + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40, I = 1, LENY + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form y := alpha*A*x + y. +* + JX = KX + IF( INCY.EQ.1 )THEN + DO 60, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + DO 50, I = 1, M + Y( I ) = Y( I ) + TEMP*A( I, J ) + 50 CONTINUE + END IF + JX = JX + INCX + 60 CONTINUE + ELSE + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + IY = KY + DO 70, I = 1, M + Y( IY ) = Y( IY ) + TEMP*A( I, J ) + IY = IY + INCY + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + ELSE +* +* Form y := alpha*A'*x + y or y := alpha*conjg( A' )*x + y. +* + JY = KY + IF( INCX.EQ.1 )THEN + DO 110, J = 1, N + TEMP = ZERO + IF( NOCONJ )THEN + DO 90, I = 1, M + TEMP = TEMP + A( I, J )*X( I ) + 90 CONTINUE + ELSE + DO 100, I = 1, M + TEMP = TEMP + CONJG( A( I, J ) )*X( I ) + 100 CONTINUE + END IF + Y( JY ) = Y( JY ) + ALPHA*TEMP + JY = JY + INCY + 110 CONTINUE + ELSE + DO 140, J = 1, N + TEMP = ZERO + IX = KX + IF( NOCONJ )THEN + DO 120, I = 1, M + TEMP = TEMP + A( I, J )*X( IX ) + IX = IX + INCX + 120 CONTINUE + ELSE + DO 130, I = 1, M + TEMP = TEMP + CONJG( A( I, J ) )*X( IX ) + IX = IX + INCX + 130 CONTINUE + END IF + Y( JY ) = Y( JY ) + ALPHA*TEMP + JY = JY + INCY + 140 CONTINUE + END IF + END IF +* + RETURN +* +* End of CGEMV . +* + END diff --git a/costa/native/external/blas/cgerc.f b/costa/native/external/blas/cgerc.f new file mode 100644 index 000000000..288e192d9 --- /dev/null +++ b/costa/native/external/blas/cgerc.f @@ -0,0 +1,157 @@ + SUBROUTINE CGERC ( M, N, ALPHA, X, INCX, Y, INCY, A, LDA ) +* .. Scalar Arguments .. + COMPLEX ALPHA + INTEGER INCX, INCY, LDA, M, N +* .. Array Arguments .. + COMPLEX A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* CGERC performs the rank 1 operation +* +* A := alpha*x*conjg( y' ) + A, +* +* where alpha is a scalar, x is an m element vector, y is an n element +* vector and A is an m by n matrix. +* +* Parameters +* ========== +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix A. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - COMPLEX array of dimension at least +* ( 1 + ( m - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the m +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* Y - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. +* Unchanged on exit. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, n ). +* Before entry, the leading m by n part of the array A must +* contain the matrix of coefficients. On exit, A is +* overwritten by the updated matrix. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. Local Scalars .. + COMPLEX TEMP + INTEGER I, INFO, IX, J, JY, KX +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( M.LT.0 )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + ELSE IF( INCY.EQ.0 )THEN + INFO = 7 + ELSE IF( LDA.LT.MAX( 1, M ) )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CGERC ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF( INCY.GT.0 )THEN + JY = 1 + ELSE + JY = 1 - ( N - 1 )*INCY + END IF + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( Y( JY ).NE.ZERO )THEN + TEMP = ALPHA*CONJG( Y( JY ) ) + DO 10, I = 1, M + A( I, J ) = A( I, J ) + X( I )*TEMP + 10 CONTINUE + END IF + JY = JY + INCY + 20 CONTINUE + ELSE + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( M - 1 )*INCX + END IF + DO 40, J = 1, N + IF( Y( JY ).NE.ZERO )THEN + TEMP = ALPHA*CONJG( Y( JY ) ) + IX = KX + DO 30, I = 1, M + A( I, J ) = A( I, J ) + X( IX )*TEMP + IX = IX + INCX + 30 CONTINUE + END IF + JY = JY + INCY + 40 CONTINUE + END IF +* + RETURN +* +* End of CGERC . +* + END diff --git a/costa/native/external/blas/cgeru.f b/costa/native/external/blas/cgeru.f new file mode 100644 index 000000000..8a9ac390e --- /dev/null +++ b/costa/native/external/blas/cgeru.f @@ -0,0 +1,157 @@ + SUBROUTINE CGERU ( M, N, ALPHA, X, INCX, Y, INCY, A, LDA ) +* .. Scalar Arguments .. + COMPLEX ALPHA + INTEGER INCX, INCY, LDA, M, N +* .. Array Arguments .. + COMPLEX A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* CGERU performs the rank 1 operation +* +* A := alpha*x*y' + A, +* +* where alpha is a scalar, x is an m element vector, y is an n element +* vector and A is an m by n matrix. +* +* Parameters +* ========== +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix A. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - COMPLEX array of dimension at least +* ( 1 + ( m - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the m +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* Y - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. +* Unchanged on exit. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, n ). +* Before entry, the leading m by n part of the array A must +* contain the matrix of coefficients. On exit, A is +* overwritten by the updated matrix. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. Local Scalars .. + COMPLEX TEMP + INTEGER I, INFO, IX, J, JY, KX +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( M.LT.0 )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + ELSE IF( INCY.EQ.0 )THEN + INFO = 7 + ELSE IF( LDA.LT.MAX( 1, M ) )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CGERU ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF( INCY.GT.0 )THEN + JY = 1 + ELSE + JY = 1 - ( N - 1 )*INCY + END IF + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( Y( JY ).NE.ZERO )THEN + TEMP = ALPHA*Y( JY ) + DO 10, I = 1, M + A( I, J ) = A( I, J ) + X( I )*TEMP + 10 CONTINUE + END IF + JY = JY + INCY + 20 CONTINUE + ELSE + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( M - 1 )*INCX + END IF + DO 40, J = 1, N + IF( Y( JY ).NE.ZERO )THEN + TEMP = ALPHA*Y( JY ) + IX = KX + DO 30, I = 1, M + A( I, J ) = A( I, J ) + X( IX )*TEMP + IX = IX + INCX + 30 CONTINUE + END IF + JY = JY + INCY + 40 CONTINUE + END IF +* + RETURN +* +* End of CGERU . +* + END diff --git a/costa/native/external/blas/chbmv.f b/costa/native/external/blas/chbmv.f new file mode 100644 index 000000000..2de4c46c8 --- /dev/null +++ b/costa/native/external/blas/chbmv.f @@ -0,0 +1,309 @@ + SUBROUTINE CHBMV ( UPLO, N, K, ALPHA, A, LDA, X, INCX, + $ BETA, Y, INCY ) +* .. Scalar Arguments .. + COMPLEX ALPHA, BETA + INTEGER INCX, INCY, K, LDA, N + CHARACTER*1 UPLO +* .. Array Arguments .. + COMPLEX A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* CHBMV performs the matrix-vector operation +* +* y := alpha*A*x + beta*y, +* +* where alpha and beta are scalars, x and y are n element vectors and +* A is an n by n hermitian band matrix, with k super-diagonals. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the band matrix A is being supplied as +* follows: +* +* UPLO = 'U' or 'u' The upper triangular part of A is +* being supplied. +* +* UPLO = 'L' or 'l' The lower triangular part of A is +* being supplied. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry, K specifies the number of super-diagonals of the +* matrix A. K must satisfy 0 .le. K. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) +* by n part of the array A must contain the upper triangular +* band part of the hermitian matrix, supplied column by +* column, with the leading diagonal of the matrix in row +* ( k + 1 ) of the array, the first super-diagonal starting at +* position 2 in row k, and so on. The top left k by k triangle +* of the array A is not referenced. +* The following program segment will transfer the upper +* triangular part of a hermitian band matrix from conventional +* full matrix storage to band storage: +* +* DO 20, J = 1, N +* M = K + 1 - J +* DO 10, I = MAX( 1, J - K ), J +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) +* by n part of the array A must contain the lower triangular +* band part of the hermitian matrix, supplied column by +* column, with the leading diagonal of the matrix in row 1 of +* the array, the first sub-diagonal starting at position 1 in +* row 2, and so on. The bottom right k by k triangle of the +* array A is not referenced. +* The following program segment will transfer the lower +* triangular part of a hermitian band matrix from conventional +* full matrix storage to band storage: +* +* DO 20, J = 1, N +* M = 1 - J +* DO 10, I = J, MIN( N, J + K ) +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Note that the imaginary parts of the diagonal elements need +* not be set and are assumed to be zero. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* ( k + 1 ). +* Unchanged on exit. +* +* X - COMPLEX array of DIMENSION at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the +* vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - COMPLEX . +* On entry, BETA specifies the scalar beta. +* Unchanged on exit. +* +* Y - COMPLEX array of DIMENSION at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the +* vector y. On exit, Y is overwritten by the updated vector y. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. Local Scalars .. + COMPLEX TEMP1, TEMP2 + INTEGER I, INFO, IX, IY, J, JX, JY, KPLUS1, KX, KY, L +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, MIN, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( K.LT.0 )THEN + INFO = 3 + ELSE IF( LDA.LT.( K + 1 ) )THEN + INFO = 6 + ELSE IF( INCX.EQ.0 )THEN + INFO = 8 + ELSE IF( INCY.EQ.0 )THEN + INFO = 11 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CHBMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set up the start points in X and Y. +* + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF +* +* Start the operations. In this version the elements of the array A +* are accessed sequentially with one pass through A. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE )THEN + IF( INCY.EQ.1 )THEN + IF( BETA.EQ.ZERO )THEN + DO 10, I = 1, N + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20, I = 1, N + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO )THEN + DO 30, I = 1, N + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40, I = 1, N + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form y when upper triangle of A is stored. +* + KPLUS1 = K + 1 + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 60, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + L = KPLUS1 - J + DO 50, I = MAX( 1, J - K ), J - 1 + Y( I ) = Y( I ) + TEMP1*A( L + I, J ) + TEMP2 = TEMP2 + CONJG( A( L + I, J ) )*X( I ) + 50 CONTINUE + Y( J ) = Y( J ) + TEMP1*REAL( A( KPLUS1, J ) ) + $ + ALPHA*TEMP2 + 60 CONTINUE + ELSE + JX = KX + JY = KY + DO 80, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + IX = KX + IY = KY + L = KPLUS1 - J + DO 70, I = MAX( 1, J - K ), J - 1 + Y( IY ) = Y( IY ) + TEMP1*A( L + I, J ) + TEMP2 = TEMP2 + CONJG( A( L + I, J ) )*X( IX ) + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + Y( JY ) = Y( JY ) + TEMP1*REAL( A( KPLUS1, J ) ) + $ + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + IF( J.GT.K )THEN + KX = KX + INCX + KY = KY + INCY + END IF + 80 CONTINUE + END IF + ELSE +* +* Form y when lower triangle of A is stored. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 100, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + Y( J ) = Y( J ) + TEMP1*REAL( A( 1, J ) ) + L = 1 - J + DO 90, I = J + 1, MIN( N, J + K ) + Y( I ) = Y( I ) + TEMP1*A( L + I, J ) + TEMP2 = TEMP2 + CONJG( A( L + I, J ) )*X( I ) + 90 CONTINUE + Y( J ) = Y( J ) + ALPHA*TEMP2 + 100 CONTINUE + ELSE + JX = KX + JY = KY + DO 120, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + Y( JY ) = Y( JY ) + TEMP1*REAL( A( 1, J ) ) + L = 1 - J + IX = JX + IY = JY + DO 110, I = J + 1, MIN( N, J + K ) + IX = IX + INCX + IY = IY + INCY + Y( IY ) = Y( IY ) + TEMP1*A( L + I, J ) + TEMP2 = TEMP2 + CONJG( A( L + I, J ) )*X( IX ) + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of CHBMV . +* + END diff --git a/costa/native/external/blas/chemm.f b/costa/native/external/blas/chemm.f new file mode 100644 index 000000000..374d74d0b --- /dev/null +++ b/costa/native/external/blas/chemm.f @@ -0,0 +1,304 @@ + SUBROUTINE CHEMM ( SIDE, UPLO, M, N, ALPHA, A, LDA, B, LDB, + $ BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER*1 SIDE, UPLO + INTEGER M, N, LDA, LDB, LDC + COMPLEX ALPHA, BETA +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* CHEMM performs one of the matrix-matrix operations +* +* C := alpha*A*B + beta*C, +* +* or +* +* C := alpha*B*A + beta*C, +* +* where alpha and beta are scalars, A is an hermitian matrix and B and +* C are m by n matrices. +* +* Parameters +* ========== +* +* SIDE - CHARACTER*1. +* On entry, SIDE specifies whether the hermitian matrix A +* appears on the left or right in the operation as follows: +* +* SIDE = 'L' or 'l' C := alpha*A*B + beta*C, +* +* SIDE = 'R' or 'r' C := alpha*B*A + beta*C, +* +* Unchanged on exit. +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the hermitian matrix A is to be +* referenced as follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of the +* hermitian matrix is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of the +* hermitian matrix is to be referenced. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix C. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix C. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, ka ), where ka is +* m when SIDE = 'L' or 'l' and is n otherwise. +* Before entry with SIDE = 'L' or 'l', the m by m part of +* the array A must contain the hermitian matrix, such that +* when UPLO = 'U' or 'u', the leading m by m upper triangular +* part of the array A must contain the upper triangular part +* of the hermitian matrix and the strictly lower triangular +* part of A is not referenced, and when UPLO = 'L' or 'l', +* the leading m by m lower triangular part of the array A +* must contain the lower triangular part of the hermitian +* matrix and the strictly upper triangular part of A is not +* referenced. +* Before entry with SIDE = 'R' or 'r', the n by n part of +* the array A must contain the hermitian matrix, such that +* when UPLO = 'U' or 'u', the leading n by n upper triangular +* part of the array A must contain the upper triangular part +* of the hermitian matrix and the strictly lower triangular +* part of A is not referenced, and when UPLO = 'L' or 'l', +* the leading n by n lower triangular part of the array A +* must contain the lower triangular part of the hermitian +* matrix and the strictly upper triangular part of A is not +* referenced. +* Note that the imaginary parts of the diagonal elements need +* not be set, they are assumed to be zero. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When SIDE = 'L' or 'l' then +* LDA must be at least max( 1, m ), otherwise LDA must be at +* least max( 1, n ). +* Unchanged on exit. +* +* B - COMPLEX array of DIMENSION ( LDB, n ). +* Before entry, the leading m by n part of the array B must +* contain the matrix B. +* Unchanged on exit. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. LDB must be at least +* max( 1, m ). +* Unchanged on exit. +* +* BETA - COMPLEX . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then C need not be set on input. +* Unchanged on exit. +* +* C - COMPLEX array of DIMENSION ( LDC, n ). +* Before entry, the leading m by n part of the array C must +* contain the matrix C, except when beta is zero, in which +* case C need not be set on entry. +* On exit, the array C is overwritten by the m by n updated +* matrix. +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, REAL +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, INFO, J, K, NROWA + COMPLEX TEMP1, TEMP2 +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Executable Statements .. +* +* Set NROWA as the number of rows of A. +* + IF( LSAME( SIDE, 'L' ) )THEN + NROWA = M + ELSE + NROWA = N + END IF + UPPER = LSAME( UPLO, 'U' ) +* +* Test the input parameters. +* + INFO = 0 + IF( ( .NOT.LSAME( SIDE, 'L' ) ).AND. + $ ( .NOT.LSAME( SIDE, 'R' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO, 'L' ) ) )THEN + INFO = 2 + ELSE IF( M .LT.0 )THEN + INFO = 3 + ELSE IF( N .LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 7 + ELSE IF( LDB.LT.MAX( 1, M ) )THEN + INFO = 9 + ELSE IF( LDC.LT.MAX( 1, M ) )THEN + INFO = 12 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CHEMM ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + IF( BETA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, M + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40, J = 1, N + DO 30, I = 1, M + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + RETURN + END IF +* +* Start the operations. +* + IF( LSAME( SIDE, 'L' ) )THEN +* +* Form C := alpha*A*B + beta*C. +* + IF( UPPER )THEN + DO 70, J = 1, N + DO 60, I = 1, M + TEMP1 = ALPHA*B( I, J ) + TEMP2 = ZERO + DO 50, K = 1, I - 1 + C( K, J ) = C( K, J ) + TEMP1*A( K, I ) + TEMP2 = TEMP2 + + $ B( K, J )*CONJG( A( K, I ) ) + 50 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = TEMP1*REAL( A( I, I ) ) + + $ ALPHA*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ TEMP1*REAL( A( I, I ) ) + + $ ALPHA*TEMP2 + END IF + 60 CONTINUE + 70 CONTINUE + ELSE + DO 100, J = 1, N + DO 90, I = M, 1, -1 + TEMP1 = ALPHA*B( I, J ) + TEMP2 = ZERO + DO 80, K = I + 1, M + C( K, J ) = C( K, J ) + TEMP1*A( K, I ) + TEMP2 = TEMP2 + + $ B( K, J )*CONJG( A( K, I ) ) + 80 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = TEMP1*REAL( A( I, I ) ) + + $ ALPHA*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ TEMP1*REAL( A( I, I ) ) + + $ ALPHA*TEMP2 + END IF + 90 CONTINUE + 100 CONTINUE + END IF + ELSE +* +* Form C := alpha*B*A + beta*C. +* + DO 170, J = 1, N + TEMP1 = ALPHA*REAL( A( J, J ) ) + IF( BETA.EQ.ZERO )THEN + DO 110, I = 1, M + C( I, J ) = TEMP1*B( I, J ) + 110 CONTINUE + ELSE + DO 120, I = 1, M + C( I, J ) = BETA*C( I, J ) + TEMP1*B( I, J ) + 120 CONTINUE + END IF + DO 140, K = 1, J - 1 + IF( UPPER )THEN + TEMP1 = ALPHA*A( K, J ) + ELSE + TEMP1 = ALPHA*CONJG( A( J, K ) ) + END IF + DO 130, I = 1, M + C( I, J ) = C( I, J ) + TEMP1*B( I, K ) + 130 CONTINUE + 140 CONTINUE + DO 160, K = J + 1, N + IF( UPPER )THEN + TEMP1 = ALPHA*CONJG( A( J, K ) ) + ELSE + TEMP1 = ALPHA*A( K, J ) + END IF + DO 150, I = 1, M + C( I, J ) = C( I, J ) + TEMP1*B( I, K ) + 150 CONTINUE + 160 CONTINUE + 170 CONTINUE + END IF +* + RETURN +* +* End of CHEMM . +* + END diff --git a/costa/native/external/blas/chemv.f b/costa/native/external/blas/chemv.f new file mode 100644 index 000000000..34c6ba393 --- /dev/null +++ b/costa/native/external/blas/chemv.f @@ -0,0 +1,266 @@ + SUBROUTINE CHEMV ( UPLO, N, ALPHA, A, LDA, X, INCX, + $ BETA, Y, INCY ) +* .. Scalar Arguments .. + COMPLEX ALPHA, BETA + INTEGER INCX, INCY, LDA, N + CHARACTER*1 UPLO +* .. Array Arguments .. + COMPLEX A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* CHEMV performs the matrix-vector operation +* +* y := alpha*A*x + beta*y, +* +* where alpha and beta are scalars, x and y are n element vectors and +* A is an n by n hermitian matrix. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array A is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of A +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of A +* is to be referenced. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular part of the hermitian matrix and the strictly +* lower triangular part of A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular part of the hermitian matrix and the strictly +* upper triangular part of A is not referenced. +* Note that the imaginary parts of the diagonal elements need +* not be set and are assumed to be zero. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* X - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - COMPLEX . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then Y need not be set on input. +* Unchanged on exit. +* +* Y - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. On exit, Y is overwritten by the updated +* vector y. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. Local Scalars .. + COMPLEX TEMP1, TEMP2 + INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 5 + ELSE IF( INCX.EQ.0 )THEN + INFO = 7 + ELSE IF( INCY.EQ.0 )THEN + INFO = 10 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CHEMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set up the start points in X and Y. +* + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE )THEN + IF( INCY.EQ.1 )THEN + IF( BETA.EQ.ZERO )THEN + DO 10, I = 1, N + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20, I = 1, N + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO )THEN + DO 30, I = 1, N + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40, I = 1, N + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form y when A is stored in upper triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 60, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + DO 50, I = 1, J - 1 + Y( I ) = Y( I ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + CONJG( A( I, J ) )*X( I ) + 50 CONTINUE + Y( J ) = Y( J ) + TEMP1*REAL( A( J, J ) ) + ALPHA*TEMP2 + 60 CONTINUE + ELSE + JX = KX + JY = KY + DO 80, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + IX = KX + IY = KY + DO 70, I = 1, J - 1 + Y( IY ) = Y( IY ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + CONJG( A( I, J ) )*X( IX ) + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + Y( JY ) = Y( JY ) + TEMP1*REAL( A( J, J ) ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + 80 CONTINUE + END IF + ELSE +* +* Form y when A is stored in lower triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 100, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + Y( J ) = Y( J ) + TEMP1*REAL( A( J, J ) ) + DO 90, I = J + 1, N + Y( I ) = Y( I ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + CONJG( A( I, J ) )*X( I ) + 90 CONTINUE + Y( J ) = Y( J ) + ALPHA*TEMP2 + 100 CONTINUE + ELSE + JX = KX + JY = KY + DO 120, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + Y( JY ) = Y( JY ) + TEMP1*REAL( A( J, J ) ) + IX = JX + IY = JY + DO 110, I = J + 1, N + IX = IX + INCX + IY = IY + INCY + Y( IY ) = Y( IY ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + CONJG( A( I, J ) )*X( IX ) + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of CHEMV . +* + END diff --git a/costa/native/external/blas/cher.f b/costa/native/external/blas/cher.f new file mode 100644 index 000000000..1934f93b0 --- /dev/null +++ b/costa/native/external/blas/cher.f @@ -0,0 +1,212 @@ + SUBROUTINE CHER ( UPLO, N, ALPHA, X, INCX, A, LDA ) +* .. Scalar Arguments .. + REAL ALPHA + INTEGER INCX, LDA, N + CHARACTER*1 UPLO +* .. Array Arguments .. + COMPLEX A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* CHER performs the hermitian rank 1 operation +* +* A := alpha*x*conjg( x' ) + A, +* +* where alpha is a real scalar, x is an n element vector and A is an +* n by n hermitian matrix. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array A is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of A +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of A +* is to be referenced. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular part of the hermitian matrix and the strictly +* lower triangular part of A is not referenced. On exit, the +* upper triangular part of the array A is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular part of the hermitian matrix and the strictly +* upper triangular part of A is not referenced. On exit, the +* lower triangular part of the array A is overwritten by the +* lower triangular part of the updated matrix. +* Note that the imaginary parts of the diagonal elements need +* not be set, they are assumed to be zero, and on exit they +* are set to zero. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. Local Scalars .. + COMPLEX TEMP + INTEGER I, INFO, IX, J, JX, KX +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 7 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CHER ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ALPHA.EQ.REAL( ZERO ) ) ) + $ RETURN +* +* Set the start point in X if the increment is not unity. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form A when A is stored in upper triangle. +* + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = ALPHA*CONJG( X( J ) ) + DO 10, I = 1, J - 1 + A( I, J ) = A( I, J ) + X( I )*TEMP + 10 CONTINUE + A( J, J ) = REAL( A( J, J ) ) + REAL( X( J )*TEMP ) + ELSE + A( J, J ) = REAL( A( J, J ) ) + END IF + 20 CONTINUE + ELSE + JX = KX + DO 40, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*CONJG( X( JX ) ) + IX = KX + DO 30, I = 1, J - 1 + A( I, J ) = A( I, J ) + X( IX )*TEMP + IX = IX + INCX + 30 CONTINUE + A( J, J ) = REAL( A( J, J ) ) + REAL( X( JX )*TEMP ) + ELSE + A( J, J ) = REAL( A( J, J ) ) + END IF + JX = JX + INCX + 40 CONTINUE + END IF + ELSE +* +* Form A when A is stored in lower triangle. +* + IF( INCX.EQ.1 )THEN + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = ALPHA*CONJG( X( J ) ) + A( J, J ) = REAL( A( J, J ) ) + REAL( TEMP*X( J ) ) + DO 50, I = J + 1, N + A( I, J ) = A( I, J ) + X( I )*TEMP + 50 CONTINUE + ELSE + A( J, J ) = REAL( A( J, J ) ) + END IF + 60 CONTINUE + ELSE + JX = KX + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*CONJG( X( JX ) ) + A( J, J ) = REAL( A( J, J ) ) + REAL( TEMP*X( JX ) ) + IX = JX + DO 70, I = J + 1, N + IX = IX + INCX + A( I, J ) = A( I, J ) + X( IX )*TEMP + 70 CONTINUE + ELSE + A( J, J ) = REAL( A( J, J ) ) + END IF + JX = JX + INCX + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of CHER . +* + END diff --git a/costa/native/external/blas/cher2.f b/costa/native/external/blas/cher2.f new file mode 100644 index 000000000..d7660ad63 --- /dev/null +++ b/costa/native/external/blas/cher2.f @@ -0,0 +1,249 @@ + SUBROUTINE CHER2 ( UPLO, N, ALPHA, X, INCX, Y, INCY, A, LDA ) +* .. Scalar Arguments .. + COMPLEX ALPHA + INTEGER INCX, INCY, LDA, N + CHARACTER*1 UPLO +* .. Array Arguments .. + COMPLEX A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* CHER2 performs the hermitian rank 2 operation +* +* A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A, +* +* where alpha is a scalar, x and y are n element vectors and A is an n +* by n hermitian matrix. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array A is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of A +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of A +* is to be referenced. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* Y - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. +* Unchanged on exit. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular part of the hermitian matrix and the strictly +* lower triangular part of A is not referenced. On exit, the +* upper triangular part of the array A is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular part of the hermitian matrix and the strictly +* upper triangular part of A is not referenced. On exit, the +* lower triangular part of the array A is overwritten by the +* lower triangular part of the updated matrix. +* Note that the imaginary parts of the diagonal elements need +* not be set, they are assumed to be zero, and on exit they +* are set to zero. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. Local Scalars .. + COMPLEX TEMP1, TEMP2 + INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + ELSE IF( INCY.EQ.0 )THEN + INFO = 7 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CHER2 ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Set up the start points in X and Y if the increments are not both +* unity. +* + IF( ( INCX.NE.1 ).OR.( INCY.NE.1 ) )THEN + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF + JX = KX + JY = KY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form A when A is stored in the upper triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 20, J = 1, N + IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN + TEMP1 = ALPHA*CONJG( Y( J ) ) + TEMP2 = CONJG( ALPHA*X( J ) ) + DO 10, I = 1, J - 1 + A( I, J ) = A( I, J ) + X( I )*TEMP1 + Y( I )*TEMP2 + 10 CONTINUE + A( J, J ) = REAL( A( J, J ) ) + + $ REAL( X( J )*TEMP1 + Y( J )*TEMP2 ) + ELSE + A( J, J ) = REAL( A( J, J ) ) + END IF + 20 CONTINUE + ELSE + DO 40, J = 1, N + IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN + TEMP1 = ALPHA*CONJG( Y( JY ) ) + TEMP2 = CONJG( ALPHA*X( JX ) ) + IX = KX + IY = KY + DO 30, I = 1, J - 1 + A( I, J ) = A( I, J ) + X( IX )*TEMP1 + $ + Y( IY )*TEMP2 + IX = IX + INCX + IY = IY + INCY + 30 CONTINUE + A( J, J ) = REAL( A( J, J ) ) + + $ REAL( X( JX )*TEMP1 + Y( JY )*TEMP2 ) + ELSE + A( J, J ) = REAL( A( J, J ) ) + END IF + JX = JX + INCX + JY = JY + INCY + 40 CONTINUE + END IF + ELSE +* +* Form A when A is stored in the lower triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 60, J = 1, N + IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN + TEMP1 = ALPHA*CONJG( Y( J ) ) + TEMP2 = CONJG( ALPHA*X( J ) ) + A( J, J ) = REAL( A( J, J ) ) + + $ REAL( X( J )*TEMP1 + Y( J )*TEMP2 ) + DO 50, I = J + 1, N + A( I, J ) = A( I, J ) + X( I )*TEMP1 + Y( I )*TEMP2 + 50 CONTINUE + ELSE + A( J, J ) = REAL( A( J, J ) ) + END IF + 60 CONTINUE + ELSE + DO 80, J = 1, N + IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN + TEMP1 = ALPHA*CONJG( Y( JY ) ) + TEMP2 = CONJG( ALPHA*X( JX ) ) + A( J, J ) = REAL( A( J, J ) ) + + $ REAL( X( JX )*TEMP1 + Y( JY )*TEMP2 ) + IX = JX + IY = JY + DO 70, I = J + 1, N + IX = IX + INCX + IY = IY + INCY + A( I, J ) = A( I, J ) + X( IX )*TEMP1 + $ + Y( IY )*TEMP2 + 70 CONTINUE + ELSE + A( J, J ) = REAL( A( J, J ) ) + END IF + JX = JX + INCX + JY = JY + INCY + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of CHER2 . +* + END diff --git a/costa/native/external/blas/cher2k.f b/costa/native/external/blas/cher2k.f new file mode 100644 index 000000000..c9a00aede --- /dev/null +++ b/costa/native/external/blas/cher2k.f @@ -0,0 +1,371 @@ + SUBROUTINE CHER2K( UPLO, TRANS, N, K, ALPHA, A, LDA, B, LDB, + $ BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER*1 UPLO, TRANS + INTEGER N, K, LDA, LDB, LDC + REAL BETA + COMPLEX ALPHA +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* CHER2K performs one of the hermitian rank 2k operations +* +* C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) + beta*C, +* +* or +* +* C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A + beta*C, +* +* where alpha and beta are scalars with beta real, C is an n by n +* hermitian matrix and A and B are n by k matrices in the first case +* and k by n matrices in the second case. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array C is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of C +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of C +* is to be referenced. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' C := alpha*A*conjg( B' ) + +* conjg( alpha )*B*conjg( A' ) + +* beta*C. +* +* TRANS = 'C' or 'c' C := alpha*conjg( A' )*B + +* conjg( alpha )*conjg( B' )*A + +* beta*C. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix C. N must be +* at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with TRANS = 'N' or 'n', K specifies the number +* of columns of the matrices A and B, and on entry with +* TRANS = 'C' or 'c', K specifies the number of rows of the +* matrices A and B. K must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, ka ), where ka is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array A must contain the matrix A, otherwise +* the leading k by n part of the array A must contain the +* matrix A. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDA must be at least max( 1, n ), otherwise LDA must +* be at least max( 1, k ). +* Unchanged on exit. +* +* B - COMPLEX array of DIMENSION ( LDB, kb ), where kb is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array B must contain the matrix B, otherwise +* the leading k by n part of the array B must contain the +* matrix B. +* Unchanged on exit. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDB must be at least max( 1, n ), otherwise LDB must +* be at least max( 1, k ). +* Unchanged on exit. +* +* BETA - REAL . +* On entry, BETA specifies the scalar beta. +* Unchanged on exit. +* +* C - COMPLEX array of DIMENSION ( LDC, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array C must contain the upper +* triangular part of the hermitian matrix and the strictly +* lower triangular part of C is not referenced. On exit, the +* upper triangular part of the array C is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array C must contain the lower +* triangular part of the hermitian matrix and the strictly +* upper triangular part of C is not referenced. On exit, the +* lower triangular part of the array C is overwritten by the +* lower triangular part of the updated matrix. +* Note that the imaginary parts of the diagonal elements need +* not be set, they are assumed to be zero, and on exit they +* are set to zero. +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* -- Modified 8-Nov-93 to set C(J,J) to REAL( C(J,J) ) when BETA = 1. +* Ed Anderson, Cray Research Inc. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, REAL +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, INFO, J, L, NROWA + COMPLEX TEMP1, TEMP2 +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + IF( LSAME( TRANS, 'N' ) )THEN + NROWA = N + ELSE + NROWA = K + END IF + UPPER = LSAME( UPLO, 'U' ) +* + INFO = 0 + IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO , 'L' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND. + $ ( .NOT.LSAME( TRANS, 'C' ) ) )THEN + INFO = 2 + ELSE IF( N .LT.0 )THEN + INFO = 3 + ELSE IF( K .LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 7 + ELSE IF( LDB.LT.MAX( 1, NROWA ) )THEN + INFO = 9 + ELSE IF( LDC.LT.MAX( 1, N ) )THEN + INFO = 12 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CHER2K', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR. + $ ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + IF( UPPER )THEN + IF( BETA.EQ.REAL( ZERO ) )THEN + DO 20, J = 1, N + DO 10, I = 1, J + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40, J = 1, N + DO 30, I = 1, J - 1 + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + C( J, J ) = BETA*REAL( C( J, J ) ) + 40 CONTINUE + END IF + ELSE + IF( BETA.EQ.REAL( ZERO ) )THEN + DO 60, J = 1, N + DO 50, I = J, N + C( I, J ) = ZERO + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80, J = 1, N + C( J, J ) = BETA*REAL( C( J, J ) ) + DO 70, I = J + 1, N + C( I, J ) = BETA*C( I, J ) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + RETURN + END IF +* +* Start the operations. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) + +* C. +* + IF( UPPER )THEN + DO 130, J = 1, N + IF( BETA.EQ.REAL( ZERO ) )THEN + DO 90, I = 1, J + C( I, J ) = ZERO + 90 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 100, I = 1, J - 1 + C( I, J ) = BETA*C( I, J ) + 100 CONTINUE + C( J, J ) = BETA*REAL( C( J, J ) ) + ELSE + C( J, J ) = REAL( C( J, J ) ) + END IF + DO 120, L = 1, K + IF( ( A( J, L ).NE.ZERO ).OR. + $ ( B( J, L ).NE.ZERO ) )THEN + TEMP1 = ALPHA*CONJG( B( J, L ) ) + TEMP2 = CONJG( ALPHA*A( J, L ) ) + DO 110, I = 1, J - 1 + C( I, J ) = C( I, J ) + A( I, L )*TEMP1 + + $ B( I, L )*TEMP2 + 110 CONTINUE + C( J, J ) = REAL( C( J, J ) ) + + $ REAL( A( J, L )*TEMP1 + + $ B( J, L )*TEMP2 ) + END IF + 120 CONTINUE + 130 CONTINUE + ELSE + DO 180, J = 1, N + IF( BETA.EQ.REAL( ZERO ) )THEN + DO 140, I = J, N + C( I, J ) = ZERO + 140 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 150, I = J + 1, N + C( I, J ) = BETA*C( I, J ) + 150 CONTINUE + C( J, J ) = BETA*REAL( C( J, J ) ) + ELSE + C( J, J ) = REAL( C( J, J ) ) + END IF + DO 170, L = 1, K + IF( ( A( J, L ).NE.ZERO ).OR. + $ ( B( J, L ).NE.ZERO ) )THEN + TEMP1 = ALPHA*CONJG( B( J, L ) ) + TEMP2 = CONJG( ALPHA*A( J, L ) ) + DO 160, I = J + 1, N + C( I, J ) = C( I, J ) + A( I, L )*TEMP1 + + $ B( I, L )*TEMP2 + 160 CONTINUE + C( J, J ) = REAL( C( J, J ) ) + + $ REAL( A( J, L )*TEMP1 + + $ B( J, L )*TEMP2 ) + END IF + 170 CONTINUE + 180 CONTINUE + END IF + ELSE +* +* Form C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A + +* C. +* + IF( UPPER )THEN + DO 210, J = 1, N + DO 200, I = 1, J + TEMP1 = ZERO + TEMP2 = ZERO + DO 190, L = 1, K + TEMP1 = TEMP1 + CONJG( A( L, I ) )*B( L, J ) + TEMP2 = TEMP2 + CONJG( B( L, I ) )*A( L, J ) + 190 CONTINUE + IF( I.EQ.J )THEN + IF( BETA.EQ.REAL( ZERO ) )THEN + C( J, J ) = REAL( ALPHA *TEMP1 + + $ CONJG( ALPHA )*TEMP2 ) + ELSE + C( J, J ) = BETA*REAL( C( J, J ) ) + + $ REAL( ALPHA *TEMP1 + + $ CONJG( ALPHA )*TEMP2 ) + END IF + ELSE + IF( BETA.EQ.REAL( ZERO ) )THEN + C( I, J ) = ALPHA*TEMP1 + CONJG( ALPHA )*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ ALPHA*TEMP1 + CONJG( ALPHA )*TEMP2 + END IF + END IF + 200 CONTINUE + 210 CONTINUE + ELSE + DO 240, J = 1, N + DO 230, I = J, N + TEMP1 = ZERO + TEMP2 = ZERO + DO 220, L = 1, K + TEMP1 = TEMP1 + CONJG( A( L, I ) )*B( L, J ) + TEMP2 = TEMP2 + CONJG( B( L, I ) )*A( L, J ) + 220 CONTINUE + IF( I.EQ.J )THEN + IF( BETA.EQ.REAL( ZERO ) )THEN + C( J, J ) = REAL( ALPHA *TEMP1 + + $ CONJG( ALPHA )*TEMP2 ) + ELSE + C( J, J ) = BETA*REAL( C( J, J ) ) + + $ REAL( ALPHA *TEMP1 + + $ CONJG( ALPHA )*TEMP2 ) + END IF + ELSE + IF( BETA.EQ.REAL( ZERO ) )THEN + C( I, J ) = ALPHA*TEMP1 + CONJG( ALPHA )*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ ALPHA*TEMP1 + CONJG( ALPHA )*TEMP2 + END IF + END IF + 230 CONTINUE + 240 CONTINUE + END IF + END IF +* + RETURN +* +* End of CHER2K. +* + END diff --git a/costa/native/external/blas/cherk.f b/costa/native/external/blas/cherk.f new file mode 100644 index 000000000..f5cd99974 --- /dev/null +++ b/costa/native/external/blas/cherk.f @@ -0,0 +1,328 @@ + SUBROUTINE CHERK ( UPLO, TRANS, N, K, ALPHA, A, LDA, + $ BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER*1 UPLO, TRANS + INTEGER N, K, LDA, LDC + REAL ALPHA, BETA +* .. Array Arguments .. + COMPLEX A( LDA, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* CHERK performs one of the hermitian rank k operations +* +* C := alpha*A*conjg( A' ) + beta*C, +* +* or +* +* C := alpha*conjg( A' )*A + beta*C, +* +* where alpha and beta are real scalars, C is an n by n hermitian +* matrix and A is an n by k matrix in the first case and a k by n +* matrix in the second case. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array C is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of C +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of C +* is to be referenced. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' C := alpha*A*conjg( A' ) + beta*C. +* +* TRANS = 'C' or 'c' C := alpha*conjg( A' )*A + beta*C. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix C. N must be +* at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with TRANS = 'N' or 'n', K specifies the number +* of columns of the matrix A, and on entry with +* TRANS = 'C' or 'c', K specifies the number of rows of the +* matrix A. K must be at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, ka ), where ka is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array A must contain the matrix A, otherwise +* the leading k by n part of the array A must contain the +* matrix A. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDA must be at least max( 1, n ), otherwise LDA must +* be at least max( 1, k ). +* Unchanged on exit. +* +* BETA - REAL . +* On entry, BETA specifies the scalar beta. +* Unchanged on exit. +* +* C - COMPLEX array of DIMENSION ( LDC, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array C must contain the upper +* triangular part of the hermitian matrix and the strictly +* lower triangular part of C is not referenced. On exit, the +* upper triangular part of the array C is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array C must contain the lower +* triangular part of the hermitian matrix and the strictly +* upper triangular part of C is not referenced. On exit, the +* lower triangular part of the array C is overwritten by the +* lower triangular part of the updated matrix. +* Note that the imaginary parts of the diagonal elements need +* not be set, they are assumed to be zero, and on exit they +* are set to zero. +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* -- Modified 8-Nov-93 to set C(J,J) to REAL( C(J,J) ) when BETA = 1. +* Ed Anderson, Cray Research Inc. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CMPLX, CONJG, MAX, REAL +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, INFO, J, L, NROWA + REAL RTEMP + COMPLEX TEMP +* .. Parameters .. + REAL ONE , ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + IF( LSAME( TRANS, 'N' ) )THEN + NROWA = N + ELSE + NROWA = K + END IF + UPPER = LSAME( UPLO, 'U' ) +* + INFO = 0 + IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO , 'L' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND. + $ ( .NOT.LSAME( TRANS, 'C' ) ) )THEN + INFO = 2 + ELSE IF( N .LT.0 )THEN + INFO = 3 + ELSE IF( K .LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 7 + ELSE IF( LDC.LT.MAX( 1, N ) )THEN + INFO = 10 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CHERK ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR. + $ ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + IF( UPPER )THEN + IF( BETA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, J + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40, J = 1, N + DO 30, I = 1, J - 1 + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + C( J, J ) = BETA*REAL( C( J, J ) ) + 40 CONTINUE + END IF + ELSE + IF( BETA.EQ.ZERO )THEN + DO 60, J = 1, N + DO 50, I = J, N + C( I, J ) = ZERO + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80, J = 1, N + C( J, J ) = BETA*REAL( C( J, J ) ) + DO 70, I = J + 1, N + C( I, J ) = BETA*C( I, J ) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + RETURN + END IF +* +* Start the operations. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form C := alpha*A*conjg( A' ) + beta*C. +* + IF( UPPER )THEN + DO 130, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 90, I = 1, J + C( I, J ) = ZERO + 90 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 100, I = 1, J - 1 + C( I, J ) = BETA*C( I, J ) + 100 CONTINUE + C( J, J ) = BETA*REAL( C( J, J ) ) + ELSE + C( J, J ) = REAL( C( J, J ) ) + END IF + DO 120, L = 1, K + IF( A( J, L ).NE.CMPLX( ZERO ) )THEN + TEMP = ALPHA*CONJG( A( J, L ) ) + DO 110, I = 1, J - 1 + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 110 CONTINUE + C( J, J ) = REAL( C( J, J ) ) + + $ REAL( TEMP*A( I, L ) ) + END IF + 120 CONTINUE + 130 CONTINUE + ELSE + DO 180, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 140, I = J, N + C( I, J ) = ZERO + 140 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + C( J, J ) = BETA*REAL( C( J, J ) ) + DO 150, I = J + 1, N + C( I, J ) = BETA*C( I, J ) + 150 CONTINUE + ELSE + C( J, J ) = REAL( C( J, J ) ) + END IF + DO 170, L = 1, K + IF( A( J, L ).NE.CMPLX( ZERO ) )THEN + TEMP = ALPHA*CONJG( A( J, L ) ) + C( J, J ) = REAL( C( J, J ) ) + + $ REAL( TEMP*A( J, L ) ) + DO 160, I = J + 1, N + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 160 CONTINUE + END IF + 170 CONTINUE + 180 CONTINUE + END IF + ELSE +* +* Form C := alpha*conjg( A' )*A + beta*C. +* + IF( UPPER )THEN + DO 220, J = 1, N + DO 200, I = 1, J - 1 + TEMP = ZERO + DO 190, L = 1, K + TEMP = TEMP + CONJG( A( L, I ) )*A( L, J ) + 190 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 200 CONTINUE + RTEMP = ZERO + DO 210, L = 1, K + RTEMP = RTEMP + CONJG( A( L, J ) )*A( L, J ) + 210 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( J, J ) = ALPHA*RTEMP + ELSE + C( J, J ) = ALPHA*RTEMP + BETA*REAL( C( J, J ) ) + END IF + 220 CONTINUE + ELSE + DO 260, J = 1, N + RTEMP = ZERO + DO 230, L = 1, K + RTEMP = RTEMP + CONJG( A( L, J ) )*A( L, J ) + 230 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( J, J ) = ALPHA*RTEMP + ELSE + C( J, J ) = ALPHA*RTEMP + BETA*REAL( C( J, J ) ) + END IF + DO 250, I = J + 1, N + TEMP = ZERO + DO 240, L = 1, K + TEMP = TEMP + CONJG( A( L, I ) )*A( L, J ) + 240 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 250 CONTINUE + 260 CONTINUE + END IF + END IF +* + RETURN +* +* End of CHERK . +* + END diff --git a/costa/native/external/blas/chpmv.f b/costa/native/external/blas/chpmv.f new file mode 100644 index 000000000..f5a566e3e --- /dev/null +++ b/costa/native/external/blas/chpmv.f @@ -0,0 +1,270 @@ + SUBROUTINE CHPMV ( UPLO, N, ALPHA, AP, X, INCX, BETA, Y, INCY ) +* .. Scalar Arguments .. + COMPLEX ALPHA, BETA + INTEGER INCX, INCY, N + CHARACTER*1 UPLO +* .. Array Arguments .. + COMPLEX AP( * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* CHPMV performs the matrix-vector operation +* +* y := alpha*A*x + beta*y, +* +* where alpha and beta are scalars, x and y are n element vectors and +* A is an n by n hermitian matrix, supplied in packed form. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the matrix A is supplied in the packed +* array AP as follows: +* +* UPLO = 'U' or 'u' The upper triangular part of A is +* supplied in AP. +* +* UPLO = 'L' or 'l' The lower triangular part of A is +* supplied in AP. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* AP - COMPLEX array of DIMENSION at least +* ( ( n*( n + 1 ) )/2 ). +* Before entry with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular part of the hermitian matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) +* and a( 2, 2 ) respectively, and so on. +* Before entry with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular part of the hermitian matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) +* and a( 3, 1 ) respectively, and so on. +* Note that the imaginary parts of the diagonal elements need +* not be set and are assumed to be zero. +* Unchanged on exit. +* +* X - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - COMPLEX . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then Y need not be set on input. +* Unchanged on exit. +* +* Y - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. On exit, Y is overwritten by the updated +* vector y. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. Local Scalars .. + COMPLEX TEMP1, TEMP2 + INTEGER I, INFO, IX, IY, J, JX, JY, K, KK, KX, KY +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 6 + ELSE IF( INCY.EQ.0 )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CHPMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set up the start points in X and Y. +* + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF +* +* Start the operations. In this version the elements of the array AP +* are accessed sequentially with one pass through AP. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE )THEN + IF( INCY.EQ.1 )THEN + IF( BETA.EQ.ZERO )THEN + DO 10, I = 1, N + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20, I = 1, N + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO )THEN + DO 30, I = 1, N + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40, I = 1, N + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + KK = 1 + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form y when AP contains the upper triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 60, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + K = KK + DO 50, I = 1, J - 1 + Y( I ) = Y( I ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + CONJG( AP( K ) )*X( I ) + K = K + 1 + 50 CONTINUE + Y( J ) = Y( J ) + TEMP1*REAL( AP( KK + J - 1 ) ) + $ + ALPHA*TEMP2 + KK = KK + J + 60 CONTINUE + ELSE + JX = KX + JY = KY + DO 80, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + IX = KX + IY = KY + DO 70, K = KK, KK + J - 2 + Y( IY ) = Y( IY ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + CONJG( AP( K ) )*X( IX ) + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + Y( JY ) = Y( JY ) + TEMP1*REAL( AP( KK + J - 1 ) ) + $ + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + KK = KK + J + 80 CONTINUE + END IF + ELSE +* +* Form y when AP contains the lower triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 100, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + Y( J ) = Y( J ) + TEMP1*REAL( AP( KK ) ) + K = KK + 1 + DO 90, I = J + 1, N + Y( I ) = Y( I ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + CONJG( AP( K ) )*X( I ) + K = K + 1 + 90 CONTINUE + Y( J ) = Y( J ) + ALPHA*TEMP2 + KK = KK + ( N - J + 1 ) + 100 CONTINUE + ELSE + JX = KX + JY = KY + DO 120, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + Y( JY ) = Y( JY ) + TEMP1*REAL( AP( KK ) ) + IX = JX + IY = JY + DO 110, K = KK + 1, KK + N - J + IX = IX + INCX + IY = IY + INCY + Y( IY ) = Y( IY ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + CONJG( AP( K ) )*X( IX ) + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + KK = KK + ( N - J + 1 ) + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of CHPMV . +* + END diff --git a/costa/native/external/blas/chpr.f b/costa/native/external/blas/chpr.f new file mode 100644 index 000000000..bb41b231b --- /dev/null +++ b/costa/native/external/blas/chpr.f @@ -0,0 +1,217 @@ + SUBROUTINE CHPR ( UPLO, N, ALPHA, X, INCX, AP ) +* .. Scalar Arguments .. + REAL ALPHA + INTEGER INCX, N + CHARACTER*1 UPLO +* .. Array Arguments .. + COMPLEX AP( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* CHPR performs the hermitian rank 1 operation +* +* A := alpha*x*conjg( x' ) + A, +* +* where alpha is a real scalar, x is an n element vector and A is an +* n by n hermitian matrix, supplied in packed form. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the matrix A is supplied in the packed +* array AP as follows: +* +* UPLO = 'U' or 'u' The upper triangular part of A is +* supplied in AP. +* +* UPLO = 'L' or 'l' The lower triangular part of A is +* supplied in AP. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* AP - COMPLEX array of DIMENSION at least +* ( ( n*( n + 1 ) )/2 ). +* Before entry with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular part of the hermitian matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) +* and a( 2, 2 ) respectively, and so on. On exit, the array +* AP is overwritten by the upper triangular part of the +* updated matrix. +* Before entry with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular part of the hermitian matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) +* and a( 3, 1 ) respectively, and so on. On exit, the array +* AP is overwritten by the lower triangular part of the +* updated matrix. +* Note that the imaginary parts of the diagonal elements need +* not be set, they are assumed to be zero, and on exit they +* are set to zero. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. Local Scalars .. + COMPLEX TEMP + INTEGER I, INFO, IX, J, JX, K, KK, KX +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CHPR ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ALPHA.EQ.REAL( ZERO ) ) ) + $ RETURN +* +* Set the start point in X if the increment is not unity. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of the array AP +* are accessed sequentially with one pass through AP. +* + KK = 1 + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form A when upper triangle is stored in AP. +* + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = ALPHA*CONJG( X( J ) ) + K = KK + DO 10, I = 1, J - 1 + AP( K ) = AP( K ) + X( I )*TEMP + K = K + 1 + 10 CONTINUE + AP( KK + J - 1 ) = REAL( AP( KK + J - 1 ) ) + $ + REAL( X( J )*TEMP ) + ELSE + AP( KK + J - 1 ) = REAL( AP( KK + J - 1 ) ) + END IF + KK = KK + J + 20 CONTINUE + ELSE + JX = KX + DO 40, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*CONJG( X( JX ) ) + IX = KX + DO 30, K = KK, KK + J - 2 + AP( K ) = AP( K ) + X( IX )*TEMP + IX = IX + INCX + 30 CONTINUE + AP( KK + J - 1 ) = REAL( AP( KK + J - 1 ) ) + $ + REAL( X( JX )*TEMP ) + ELSE + AP( KK + J - 1 ) = REAL( AP( KK + J - 1 ) ) + END IF + JX = JX + INCX + KK = KK + J + 40 CONTINUE + END IF + ELSE +* +* Form A when lower triangle is stored in AP. +* + IF( INCX.EQ.1 )THEN + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = ALPHA*CONJG( X( J ) ) + AP( KK ) = REAL( AP( KK ) ) + REAL( TEMP*X( J ) ) + K = KK + 1 + DO 50, I = J + 1, N + AP( K ) = AP( K ) + X( I )*TEMP + K = K + 1 + 50 CONTINUE + ELSE + AP( KK ) = REAL( AP( KK ) ) + END IF + KK = KK + N - J + 1 + 60 CONTINUE + ELSE + JX = KX + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*CONJG( X( JX ) ) + AP( KK ) = REAL( AP( KK ) ) + REAL( TEMP*X( JX ) ) + IX = JX + DO 70, K = KK + 1, KK + N - J + IX = IX + INCX + AP( K ) = AP( K ) + X( IX )*TEMP + 70 CONTINUE + ELSE + AP( KK ) = REAL( AP( KK ) ) + END IF + JX = JX + INCX + KK = KK + N - J + 1 + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of CHPR . +* + END diff --git a/costa/native/external/blas/chpr2.f b/costa/native/external/blas/chpr2.f new file mode 100644 index 000000000..474d99b96 --- /dev/null +++ b/costa/native/external/blas/chpr2.f @@ -0,0 +1,251 @@ + SUBROUTINE CHPR2 ( UPLO, N, ALPHA, X, INCX, Y, INCY, AP ) +* .. Scalar Arguments .. + COMPLEX ALPHA + INTEGER INCX, INCY, N + CHARACTER*1 UPLO +* .. Array Arguments .. + COMPLEX AP( * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* CHPR2 performs the hermitian rank 2 operation +* +* A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A, +* +* where alpha is a scalar, x and y are n element vectors and A is an +* n by n hermitian matrix, supplied in packed form. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the matrix A is supplied in the packed +* array AP as follows: +* +* UPLO = 'U' or 'u' The upper triangular part of A is +* supplied in AP. +* +* UPLO = 'L' or 'l' The lower triangular part of A is +* supplied in AP. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* Y - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. +* Unchanged on exit. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* AP - COMPLEX array of DIMENSION at least +* ( ( n*( n + 1 ) )/2 ). +* Before entry with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular part of the hermitian matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) +* and a( 2, 2 ) respectively, and so on. On exit, the array +* AP is overwritten by the upper triangular part of the +* updated matrix. +* Before entry with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular part of the hermitian matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) +* and a( 3, 1 ) respectively, and so on. On exit, the array +* AP is overwritten by the lower triangular part of the +* updated matrix. +* Note that the imaginary parts of the diagonal elements need +* not be set, they are assumed to be zero, and on exit they +* are set to zero. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. Local Scalars .. + COMPLEX TEMP1, TEMP2 + INTEGER I, INFO, IX, IY, J, JX, JY, K, KK, KX, KY +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + ELSE IF( INCY.EQ.0 )THEN + INFO = 7 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CHPR2 ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Set up the start points in X and Y if the increments are not both +* unity. +* + IF( ( INCX.NE.1 ).OR.( INCY.NE.1 ) )THEN + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF + JX = KX + JY = KY + END IF +* +* Start the operations. In this version the elements of the array AP +* are accessed sequentially with one pass through AP. +* + KK = 1 + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form A when upper triangle is stored in AP. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 20, J = 1, N + IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN + TEMP1 = ALPHA*CONJG( Y( J ) ) + TEMP2 = CONJG( ALPHA*X( J ) ) + K = KK + DO 10, I = 1, J - 1 + AP( K ) = AP( K ) + X( I )*TEMP1 + Y( I )*TEMP2 + K = K + 1 + 10 CONTINUE + AP( KK + J - 1 ) = REAL( AP( KK + J - 1 ) ) + + $ REAL( X( J )*TEMP1 + Y( J )*TEMP2 ) + ELSE + AP( KK + J - 1 ) = REAL( AP( KK + J - 1 ) ) + END IF + KK = KK + J + 20 CONTINUE + ELSE + DO 40, J = 1, N + IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN + TEMP1 = ALPHA*CONJG( Y( JY ) ) + TEMP2 = CONJG( ALPHA*X( JX ) ) + IX = KX + IY = KY + DO 30, K = KK, KK + J - 2 + AP( K ) = AP( K ) + X( IX )*TEMP1 + Y( IY )*TEMP2 + IX = IX + INCX + IY = IY + INCY + 30 CONTINUE + AP( KK + J - 1 ) = REAL( AP( KK + J - 1 ) ) + + $ REAL( X( JX )*TEMP1 + + $ Y( JY )*TEMP2 ) + ELSE + AP( KK + J - 1 ) = REAL( AP( KK + J - 1 ) ) + END IF + JX = JX + INCX + JY = JY + INCY + KK = KK + J + 40 CONTINUE + END IF + ELSE +* +* Form A when lower triangle is stored in AP. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 60, J = 1, N + IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN + TEMP1 = ALPHA*CONJG( Y( J ) ) + TEMP2 = CONJG( ALPHA*X( J ) ) + AP( KK ) = REAL( AP( KK ) ) + + $ REAL( X( J )*TEMP1 + Y( J )*TEMP2 ) + K = KK + 1 + DO 50, I = J + 1, N + AP( K ) = AP( K ) + X( I )*TEMP1 + Y( I )*TEMP2 + K = K + 1 + 50 CONTINUE + ELSE + AP( KK ) = REAL( AP( KK ) ) + END IF + KK = KK + N - J + 1 + 60 CONTINUE + ELSE + DO 80, J = 1, N + IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN + TEMP1 = ALPHA*CONJG( Y( JY ) ) + TEMP2 = CONJG( ALPHA*X( JX ) ) + AP( KK ) = REAL( AP( KK ) ) + + $ REAL( X( JX )*TEMP1 + Y( JY )*TEMP2 ) + IX = JX + IY = JY + DO 70, K = KK + 1, KK + N - J + IX = IX + INCX + IY = IY + INCY + AP( K ) = AP( K ) + X( IX )*TEMP1 + Y( IY )*TEMP2 + 70 CONTINUE + ELSE + AP( KK ) = REAL( AP( KK ) ) + END IF + JX = JX + INCX + JY = JY + INCY + KK = KK + N - J + 1 + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of CHPR2 . +* + END diff --git a/costa/native/external/blas/crotg.f b/costa/native/external/blas/crotg.f new file mode 100644 index 000000000..659807950 --- /dev/null +++ b/costa/native/external/blas/crotg.f @@ -0,0 +1,20 @@ + subroutine crotg(ca,cb,c,s) + complex ca,cb,s + real c + real norm,scale + complex alpha + if (cabs(ca) .ne. 0.) go to 10 + c = 0. + s = (1.,0.) + ca = cb + go to 20 + 10 continue + scale = cabs(ca) + cabs(cb) + norm = scale * sqrt((cabs(ca/scale))**2 + (cabs(cb/scale))**2) + alpha = ca /cabs(ca) + c = cabs(ca) / norm + s = alpha * conjg(cb) / norm + ca = alpha * norm + 20 continue + return + end diff --git a/costa/native/external/blas/cscal.f b/costa/native/external/blas/cscal.f new file mode 100644 index 000000000..56eeebac8 --- /dev/null +++ b/costa/native/external/blas/cscal.f @@ -0,0 +1,28 @@ + subroutine cscal(n,ca,cx,incx) +c +c scales a vector by a constant. +c jack dongarra, linpack, 3/11/78. +c modified 3/93 to return if incx .le. 0. +c modified 12/3/93, array(1) declarations changed to array(*) +c + complex ca,cx(*) + integer i,incx,n,nincx +c + if( n.le.0 .or. incx.le.0 )return + if(incx.eq.1)go to 20 +c +c code for increment not equal to 1 +c + nincx = n*incx + do 10 i = 1,nincx,incx + cx(i) = ca*cx(i) + 10 continue + return +c +c code for increment equal to 1 +c + 20 do 30 i = 1,n + cx(i) = ca*cx(i) + 30 continue + return + end diff --git a/costa/native/external/blas/csscal.f b/costa/native/external/blas/csscal.f new file mode 100644 index 000000000..edd7e5554 --- /dev/null +++ b/costa/native/external/blas/csscal.f @@ -0,0 +1,29 @@ + subroutine csscal(n,sa,cx,incx) +c +c scales a complex vector by a real constant. +c jack dongarra, linpack, 3/11/78. +c modified 3/93 to return if incx .le. 0. +c modified 12/3/93, array(1) declarations changed to array(*) +c + complex cx(*) + real sa + integer i,incx,n,nincx +c + if( n.le.0 .or. incx.le.0 )return + if(incx.eq.1)go to 20 +c +c code for increment not equal to 1 +c + nincx = n*incx + do 10 i = 1,nincx,incx + cx(i) = cmplx(sa*real(cx(i)),sa*aimag(cx(i))) + 10 continue + return +c +c code for increment equal to 1 +c + 20 do 30 i = 1,n + cx(i) = cmplx(sa*real(cx(i)),sa*aimag(cx(i))) + 30 continue + return + end diff --git a/costa/native/external/blas/cswap.f b/costa/native/external/blas/cswap.f new file mode 100644 index 000000000..ede4495f6 --- /dev/null +++ b/costa/native/external/blas/cswap.f @@ -0,0 +1,36 @@ + subroutine cswap (n,cx,incx,cy,incy) +c +c interchanges two vectors. +c jack dongarra, linpack, 3/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + complex cx(*),cy(*),ctemp + integer i,incx,incy,ix,iy,n +c + if(n.le.0)return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments not equal +c to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + ctemp = cx(ix) + cx(ix) = cy(iy) + cy(iy) = ctemp + ix = ix + incx + iy = iy + incy + 10 continue + return +c +c code for both increments equal to 1 + 20 do 30 i = 1,n + ctemp = cx(i) + cx(i) = cy(i) + cy(i) = ctemp + 30 continue + return + end diff --git a/costa/native/external/blas/csymm.f b/costa/native/external/blas/csymm.f new file mode 100644 index 000000000..f4eda9017 --- /dev/null +++ b/costa/native/external/blas/csymm.f @@ -0,0 +1,296 @@ + SUBROUTINE CSYMM ( SIDE, UPLO, M, N, ALPHA, A, LDA, B, LDB, + $ BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER*1 SIDE, UPLO + INTEGER M, N, LDA, LDB, LDC + COMPLEX ALPHA, BETA +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* CSYMM performs one of the matrix-matrix operations +* +* C := alpha*A*B + beta*C, +* +* or +* +* C := alpha*B*A + beta*C, +* +* where alpha and beta are scalars, A is a symmetric matrix and B and +* C are m by n matrices. +* +* Parameters +* ========== +* +* SIDE - CHARACTER*1. +* On entry, SIDE specifies whether the symmetric matrix A +* appears on the left or right in the operation as follows: +* +* SIDE = 'L' or 'l' C := alpha*A*B + beta*C, +* +* SIDE = 'R' or 'r' C := alpha*B*A + beta*C, +* +* Unchanged on exit. +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the symmetric matrix A is to be +* referenced as follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of the +* symmetric matrix is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of the +* symmetric matrix is to be referenced. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix C. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix C. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, ka ), where ka is +* m when SIDE = 'L' or 'l' and is n otherwise. +* Before entry with SIDE = 'L' or 'l', the m by m part of +* the array A must contain the symmetric matrix, such that +* when UPLO = 'U' or 'u', the leading m by m upper triangular +* part of the array A must contain the upper triangular part +* of the symmetric matrix and the strictly lower triangular +* part of A is not referenced, and when UPLO = 'L' or 'l', +* the leading m by m lower triangular part of the array A +* must contain the lower triangular part of the symmetric +* matrix and the strictly upper triangular part of A is not +* referenced. +* Before entry with SIDE = 'R' or 'r', the n by n part of +* the array A must contain the symmetric matrix, such that +* when UPLO = 'U' or 'u', the leading n by n upper triangular +* part of the array A must contain the upper triangular part +* of the symmetric matrix and the strictly lower triangular +* part of A is not referenced, and when UPLO = 'L' or 'l', +* the leading n by n lower triangular part of the array A +* must contain the lower triangular part of the symmetric +* matrix and the strictly upper triangular part of A is not +* referenced. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When SIDE = 'L' or 'l' then +* LDA must be at least max( 1, m ), otherwise LDA must be at +* least max( 1, n ). +* Unchanged on exit. +* +* B - COMPLEX array of DIMENSION ( LDB, n ). +* Before entry, the leading m by n part of the array B must +* contain the matrix B. +* Unchanged on exit. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. LDB must be at least +* max( 1, m ). +* Unchanged on exit. +* +* BETA - COMPLEX . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then C need not be set on input. +* Unchanged on exit. +* +* C - COMPLEX array of DIMENSION ( LDC, n ). +* Before entry, the leading m by n part of the array C must +* contain the matrix C, except when beta is zero, in which +* case C need not be set on entry. +* On exit, the array C is overwritten by the m by n updated +* matrix. +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, INFO, J, K, NROWA + COMPLEX TEMP1, TEMP2 +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Executable Statements .. +* +* Set NROWA as the number of rows of A. +* + IF( LSAME( SIDE, 'L' ) )THEN + NROWA = M + ELSE + NROWA = N + END IF + UPPER = LSAME( UPLO, 'U' ) +* +* Test the input parameters. +* + INFO = 0 + IF( ( .NOT.LSAME( SIDE, 'L' ) ).AND. + $ ( .NOT.LSAME( SIDE, 'R' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO, 'L' ) ) )THEN + INFO = 2 + ELSE IF( M .LT.0 )THEN + INFO = 3 + ELSE IF( N .LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 7 + ELSE IF( LDB.LT.MAX( 1, M ) )THEN + INFO = 9 + ELSE IF( LDC.LT.MAX( 1, M ) )THEN + INFO = 12 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CSYMM ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + IF( BETA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, M + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40, J = 1, N + DO 30, I = 1, M + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + RETURN + END IF +* +* Start the operations. +* + IF( LSAME( SIDE, 'L' ) )THEN +* +* Form C := alpha*A*B + beta*C. +* + IF( UPPER )THEN + DO 70, J = 1, N + DO 60, I = 1, M + TEMP1 = ALPHA*B( I, J ) + TEMP2 = ZERO + DO 50, K = 1, I - 1 + C( K, J ) = C( K, J ) + TEMP1 *A( K, I ) + TEMP2 = TEMP2 + B( K, J )*A( K, I ) + 50 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = TEMP1*A( I, I ) + ALPHA*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ TEMP1*A( I, I ) + ALPHA*TEMP2 + END IF + 60 CONTINUE + 70 CONTINUE + ELSE + DO 100, J = 1, N + DO 90, I = M, 1, -1 + TEMP1 = ALPHA*B( I, J ) + TEMP2 = ZERO + DO 80, K = I + 1, M + C( K, J ) = C( K, J ) + TEMP1 *A( K, I ) + TEMP2 = TEMP2 + B( K, J )*A( K, I ) + 80 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = TEMP1*A( I, I ) + ALPHA*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ TEMP1*A( I, I ) + ALPHA*TEMP2 + END IF + 90 CONTINUE + 100 CONTINUE + END IF + ELSE +* +* Form C := alpha*B*A + beta*C. +* + DO 170, J = 1, N + TEMP1 = ALPHA*A( J, J ) + IF( BETA.EQ.ZERO )THEN + DO 110, I = 1, M + C( I, J ) = TEMP1*B( I, J ) + 110 CONTINUE + ELSE + DO 120, I = 1, M + C( I, J ) = BETA*C( I, J ) + TEMP1*B( I, J ) + 120 CONTINUE + END IF + DO 140, K = 1, J - 1 + IF( UPPER )THEN + TEMP1 = ALPHA*A( K, J ) + ELSE + TEMP1 = ALPHA*A( J, K ) + END IF + DO 130, I = 1, M + C( I, J ) = C( I, J ) + TEMP1*B( I, K ) + 130 CONTINUE + 140 CONTINUE + DO 160, K = J + 1, N + IF( UPPER )THEN + TEMP1 = ALPHA*A( J, K ) + ELSE + TEMP1 = ALPHA*A( K, J ) + END IF + DO 150, I = 1, M + C( I, J ) = C( I, J ) + TEMP1*B( I, K ) + 150 CONTINUE + 160 CONTINUE + 170 CONTINUE + END IF +* + RETURN +* +* End of CSYMM . +* + END diff --git a/costa/native/external/blas/csyr2k.f b/costa/native/external/blas/csyr2k.f new file mode 100644 index 000000000..eb3861077 --- /dev/null +++ b/costa/native/external/blas/csyr2k.f @@ -0,0 +1,324 @@ + SUBROUTINE CSYR2K( UPLO, TRANS, N, K, ALPHA, A, LDA, B, LDB, + $ BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER*1 UPLO, TRANS + INTEGER N, K, LDA, LDB, LDC + COMPLEX ALPHA, BETA +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* CSYR2K performs one of the symmetric rank 2k operations +* +* C := alpha*A*B' + alpha*B*A' + beta*C, +* +* or +* +* C := alpha*A'*B + alpha*B'*A + beta*C, +* +* where alpha and beta are scalars, C is an n by n symmetric matrix +* and A and B are n by k matrices in the first case and k by n +* matrices in the second case. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array C is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of C +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of C +* is to be referenced. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' C := alpha*A*B' + alpha*B*A' + +* beta*C. +* +* TRANS = 'T' or 't' C := alpha*A'*B + alpha*B'*A + +* beta*C. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix C. N must be +* at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with TRANS = 'N' or 'n', K specifies the number +* of columns of the matrices A and B, and on entry with +* TRANS = 'T' or 't', K specifies the number of rows of the +* matrices A and B. K must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, ka ), where ka is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array A must contain the matrix A, otherwise +* the leading k by n part of the array A must contain the +* matrix A. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDA must be at least max( 1, n ), otherwise LDA must +* be at least max( 1, k ). +* Unchanged on exit. +* +* B - COMPLEX array of DIMENSION ( LDB, kb ), where kb is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array B must contain the matrix B, otherwise +* the leading k by n part of the array B must contain the +* matrix B. +* Unchanged on exit. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDB must be at least max( 1, n ), otherwise LDB must +* be at least max( 1, k ). +* Unchanged on exit. +* +* BETA - COMPLEX . +* On entry, BETA specifies the scalar beta. +* Unchanged on exit. +* +* C - COMPLEX array of DIMENSION ( LDC, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array C must contain the upper +* triangular part of the symmetric matrix and the strictly +* lower triangular part of C is not referenced. On exit, the +* upper triangular part of the array C is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array C must contain the lower +* triangular part of the symmetric matrix and the strictly +* upper triangular part of C is not referenced. On exit, the +* lower triangular part of the array C is overwritten by the +* lower triangular part of the updated matrix. +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, INFO, J, L, NROWA + COMPLEX TEMP1, TEMP2 +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + IF( LSAME( TRANS, 'N' ) )THEN + NROWA = N + ELSE + NROWA = K + END IF + UPPER = LSAME( UPLO, 'U' ) +* + INFO = 0 + IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO , 'L' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND. + $ ( .NOT.LSAME( TRANS, 'T' ) ) )THEN + INFO = 2 + ELSE IF( N .LT.0 )THEN + INFO = 3 + ELSE IF( K .LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 7 + ELSE IF( LDB.LT.MAX( 1, NROWA ) )THEN + INFO = 9 + ELSE IF( LDC.LT.MAX( 1, N ) )THEN + INFO = 12 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CSYR2K', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR. + $ ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + IF( UPPER )THEN + IF( BETA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, J + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40, J = 1, N + DO 30, I = 1, J + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE + IF( BETA.EQ.ZERO )THEN + DO 60, J = 1, N + DO 50, I = J, N + C( I, J ) = ZERO + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80, J = 1, N + DO 70, I = J, N + C( I, J ) = BETA*C( I, J ) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + RETURN + END IF +* +* Start the operations. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form C := alpha*A*B' + alpha*B*A' + C. +* + IF( UPPER )THEN + DO 130, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 90, I = 1, J + C( I, J ) = ZERO + 90 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 100, I = 1, J + C( I, J ) = BETA*C( I, J ) + 100 CONTINUE + END IF + DO 120, L = 1, K + IF( ( A( J, L ).NE.ZERO ).OR. + $ ( B( J, L ).NE.ZERO ) )THEN + TEMP1 = ALPHA*B( J, L ) + TEMP2 = ALPHA*A( J, L ) + DO 110, I = 1, J + C( I, J ) = C( I, J ) + A( I, L )*TEMP1 + + $ B( I, L )*TEMP2 + 110 CONTINUE + END IF + 120 CONTINUE + 130 CONTINUE + ELSE + DO 180, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 140, I = J, N + C( I, J ) = ZERO + 140 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 150, I = J, N + C( I, J ) = BETA*C( I, J ) + 150 CONTINUE + END IF + DO 170, L = 1, K + IF( ( A( J, L ).NE.ZERO ).OR. + $ ( B( J, L ).NE.ZERO ) )THEN + TEMP1 = ALPHA*B( J, L ) + TEMP2 = ALPHA*A( J, L ) + DO 160, I = J, N + C( I, J ) = C( I, J ) + A( I, L )*TEMP1 + + $ B( I, L )*TEMP2 + 160 CONTINUE + END IF + 170 CONTINUE + 180 CONTINUE + END IF + ELSE +* +* Form C := alpha*A'*B + alpha*B'*A + C. +* + IF( UPPER )THEN + DO 210, J = 1, N + DO 200, I = 1, J + TEMP1 = ZERO + TEMP2 = ZERO + DO 190, L = 1, K + TEMP1 = TEMP1 + A( L, I )*B( L, J ) + TEMP2 = TEMP2 + B( L, I )*A( L, J ) + 190 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP1 + ALPHA*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ ALPHA*TEMP1 + ALPHA*TEMP2 + END IF + 200 CONTINUE + 210 CONTINUE + ELSE + DO 240, J = 1, N + DO 230, I = J, N + TEMP1 = ZERO + TEMP2 = ZERO + DO 220, L = 1, K + TEMP1 = TEMP1 + A( L, I )*B( L, J ) + TEMP2 = TEMP2 + B( L, I )*A( L, J ) + 220 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP1 + ALPHA*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ ALPHA*TEMP1 + ALPHA*TEMP2 + END IF + 230 CONTINUE + 240 CONTINUE + END IF + END IF +* + RETURN +* +* End of CSYR2K. +* + END diff --git a/costa/native/external/blas/csyrk.f b/costa/native/external/blas/csyrk.f new file mode 100644 index 000000000..d76bce052 --- /dev/null +++ b/costa/native/external/blas/csyrk.f @@ -0,0 +1,293 @@ + SUBROUTINE CSYRK ( UPLO, TRANS, N, K, ALPHA, A, LDA, + $ BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER*1 UPLO, TRANS + INTEGER N, K, LDA, LDC + COMPLEX ALPHA, BETA +* .. Array Arguments .. + COMPLEX A( LDA, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* CSYRK performs one of the symmetric rank k operations +* +* C := alpha*A*A' + beta*C, +* +* or +* +* C := alpha*A'*A + beta*C, +* +* where alpha and beta are scalars, C is an n by n symmetric matrix +* and A is an n by k matrix in the first case and a k by n matrix +* in the second case. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array C is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of C +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of C +* is to be referenced. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' C := alpha*A*A' + beta*C. +* +* TRANS = 'T' or 't' C := alpha*A'*A + beta*C. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix C. N must be +* at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with TRANS = 'N' or 'n', K specifies the number +* of columns of the matrix A, and on entry with +* TRANS = 'T' or 't', K specifies the number of rows of the +* matrix A. K must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, ka ), where ka is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array A must contain the matrix A, otherwise +* the leading k by n part of the array A must contain the +* matrix A. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDA must be at least max( 1, n ), otherwise LDA must +* be at least max( 1, k ). +* Unchanged on exit. +* +* BETA - COMPLEX . +* On entry, BETA specifies the scalar beta. +* Unchanged on exit. +* +* C - COMPLEX array of DIMENSION ( LDC, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array C must contain the upper +* triangular part of the symmetric matrix and the strictly +* lower triangular part of C is not referenced. On exit, the +* upper triangular part of the array C is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array C must contain the lower +* triangular part of the symmetric matrix and the strictly +* upper triangular part of C is not referenced. On exit, the +* lower triangular part of the array C is overwritten by the +* lower triangular part of the updated matrix. +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, INFO, J, L, NROWA + COMPLEX TEMP +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + IF( LSAME( TRANS, 'N' ) )THEN + NROWA = N + ELSE + NROWA = K + END IF + UPPER = LSAME( UPLO, 'U' ) +* + INFO = 0 + IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO , 'L' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND. + $ ( .NOT.LSAME( TRANS, 'T' ) ) )THEN + INFO = 2 + ELSE IF( N .LT.0 )THEN + INFO = 3 + ELSE IF( K .LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 7 + ELSE IF( LDC.LT.MAX( 1, N ) )THEN + INFO = 10 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CSYRK ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR. + $ ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + IF( UPPER )THEN + IF( BETA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, J + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40, J = 1, N + DO 30, I = 1, J + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE + IF( BETA.EQ.ZERO )THEN + DO 60, J = 1, N + DO 50, I = J, N + C( I, J ) = ZERO + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80, J = 1, N + DO 70, I = J, N + C( I, J ) = BETA*C( I, J ) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + RETURN + END IF +* +* Start the operations. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form C := alpha*A*A' + beta*C. +* + IF( UPPER )THEN + DO 130, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 90, I = 1, J + C( I, J ) = ZERO + 90 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 100, I = 1, J + C( I, J ) = BETA*C( I, J ) + 100 CONTINUE + END IF + DO 120, L = 1, K + IF( A( J, L ).NE.ZERO )THEN + TEMP = ALPHA*A( J, L ) + DO 110, I = 1, J + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 110 CONTINUE + END IF + 120 CONTINUE + 130 CONTINUE + ELSE + DO 180, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 140, I = J, N + C( I, J ) = ZERO + 140 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 150, I = J, N + C( I, J ) = BETA*C( I, J ) + 150 CONTINUE + END IF + DO 170, L = 1, K + IF( A( J, L ).NE.ZERO )THEN + TEMP = ALPHA*A( J, L ) + DO 160, I = J, N + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 160 CONTINUE + END IF + 170 CONTINUE + 180 CONTINUE + END IF + ELSE +* +* Form C := alpha*A'*A + beta*C. +* + IF( UPPER )THEN + DO 210, J = 1, N + DO 200, I = 1, J + TEMP = ZERO + DO 190, L = 1, K + TEMP = TEMP + A( L, I )*A( L, J ) + 190 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 200 CONTINUE + 210 CONTINUE + ELSE + DO 240, J = 1, N + DO 230, I = J, N + TEMP = ZERO + DO 220, L = 1, K + TEMP = TEMP + A( L, I )*A( L, J ) + 220 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 230 CONTINUE + 240 CONTINUE + END IF + END IF +* + RETURN +* +* End of CSYRK . +* + END diff --git a/costa/native/external/blas/ctbmv.f b/costa/native/external/blas/ctbmv.f new file mode 100644 index 000000000..5b9ceb43f --- /dev/null +++ b/costa/native/external/blas/ctbmv.f @@ -0,0 +1,377 @@ + SUBROUTINE CTBMV ( UPLO, TRANS, DIAG, N, K, A, LDA, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, K, LDA, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + COMPLEX A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* CTBMV performs one of the matrix-vector operations +* +* x := A*x, or x := A'*x, or x := conjg( A' )*x, +* +* where x is an n element vector and A is an n by n unit, or non-unit, +* upper or lower triangular band matrix, with ( k + 1 ) diagonals. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' x := A*x. +* +* TRANS = 'T' or 't' x := A'*x. +* +* TRANS = 'C' or 'c' x := conjg( A' )*x. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with UPLO = 'U' or 'u', K specifies the number of +* super-diagonals of the matrix A. +* On entry with UPLO = 'L' or 'l', K specifies the number of +* sub-diagonals of the matrix A. +* K must satisfy 0 .le. K. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) +* by n part of the array A must contain the upper triangular +* band part of the matrix of coefficients, supplied column by +* column, with the leading diagonal of the matrix in row +* ( k + 1 ) of the array, the first super-diagonal starting at +* position 2 in row k, and so on. The top left k by k triangle +* of the array A is not referenced. +* The following program segment will transfer an upper +* triangular band matrix from conventional full matrix storage +* to band storage: +* +* DO 20, J = 1, N +* M = K + 1 - J +* DO 10, I = MAX( 1, J - K ), J +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) +* by n part of the array A must contain the lower triangular +* band part of the matrix of coefficients, supplied column by +* column, with the leading diagonal of the matrix in row 1 of +* the array, the first sub-diagonal starting at position 1 in +* row 2, and so on. The bottom right k by k triangle of the +* array A is not referenced. +* The following program segment will transfer a lower +* triangular band matrix from conventional full matrix storage +* to band storage: +* +* DO 20, J = 1, N +* M = 1 - J +* DO 10, I = J, MIN( N, J + K ) +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Note that when DIAG = 'U' or 'u' the elements of the array A +* corresponding to the diagonal elements of the matrix are not +* referenced, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* ( k + 1 ). +* Unchanged on exit. +* +* X - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. On exit, X is overwritten with the +* tranformed vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. Local Scalars .. + COMPLEX TEMP + INTEGER I, INFO, IX, J, JX, KPLUS1, KX, L + LOGICAL NOCONJ, NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( K.LT.0 )THEN + INFO = 5 + ELSE IF( LDA.LT.( K + 1 ) )THEN + INFO = 7 + ELSE IF( INCX.EQ.0 )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CTBMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOCONJ = LSAME( TRANS, 'T' ) + NOUNIT = LSAME( DIAG , 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x := A*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KPLUS1 = K + 1 + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + L = KPLUS1 - J + DO 10, I = MAX( 1, J - K ), J - 1 + X( I ) = X( I ) + TEMP*A( L + I, J ) + 10 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*A( KPLUS1, J ) + END IF + 20 CONTINUE + ELSE + JX = KX + DO 40, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + L = KPLUS1 - J + DO 30, I = MAX( 1, J - K ), J - 1 + X( IX ) = X( IX ) + TEMP*A( L + I, J ) + IX = IX + INCX + 30 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*A( KPLUS1, J ) + END IF + JX = JX + INCX + IF( J.GT.K ) + $ KX = KX + INCX + 40 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 60, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + L = 1 - J + DO 50, I = MIN( N, J + K ), J + 1, -1 + X( I ) = X( I ) + TEMP*A( L + I, J ) + 50 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*A( 1, J ) + END IF + 60 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 80, J = N, 1, -1 + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + L = 1 - J + DO 70, I = MIN( N, J + K ), J + 1, -1 + X( IX ) = X( IX ) + TEMP*A( L + I, J ) + IX = IX - INCX + 70 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*A( 1, J ) + END IF + JX = JX - INCX + IF( ( N - J ).GE.K ) + $ KX = KX - INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := A'*x or x := conjg( A' )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KPLUS1 = K + 1 + IF( INCX.EQ.1 )THEN + DO 110, J = N, 1, -1 + TEMP = X( J ) + L = KPLUS1 - J + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*A( KPLUS1, J ) + DO 90, I = J - 1, MAX( 1, J - K ), -1 + TEMP = TEMP + A( L + I, J )*X( I ) + 90 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*CONJG( A( KPLUS1, J ) ) + DO 100, I = J - 1, MAX( 1, J - K ), -1 + TEMP = TEMP + CONJG( A( L + I, J ) )*X( I ) + 100 CONTINUE + END IF + X( J ) = TEMP + 110 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 140, J = N, 1, -1 + TEMP = X( JX ) + KX = KX - INCX + IX = KX + L = KPLUS1 - J + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*A( KPLUS1, J ) + DO 120, I = J - 1, MAX( 1, J - K ), -1 + TEMP = TEMP + A( L + I, J )*X( IX ) + IX = IX - INCX + 120 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*CONJG( A( KPLUS1, J ) ) + DO 130, I = J - 1, MAX( 1, J - K ), -1 + TEMP = TEMP + CONJG( A( L + I, J ) )*X( IX ) + IX = IX - INCX + 130 CONTINUE + END IF + X( JX ) = TEMP + JX = JX - INCX + 140 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 170, J = 1, N + TEMP = X( J ) + L = 1 - J + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*A( 1, J ) + DO 150, I = J + 1, MIN( N, J + K ) + TEMP = TEMP + A( L + I, J )*X( I ) + 150 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*CONJG( A( 1, J ) ) + DO 160, I = J + 1, MIN( N, J + K ) + TEMP = TEMP + CONJG( A( L + I, J ) )*X( I ) + 160 CONTINUE + END IF + X( J ) = TEMP + 170 CONTINUE + ELSE + JX = KX + DO 200, J = 1, N + TEMP = X( JX ) + KX = KX + INCX + IX = KX + L = 1 - J + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*A( 1, J ) + DO 180, I = J + 1, MIN( N, J + K ) + TEMP = TEMP + A( L + I, J )*X( IX ) + IX = IX + INCX + 180 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*CONJG( A( 1, J ) ) + DO 190, I = J + 1, MIN( N, J + K ) + TEMP = TEMP + CONJG( A( L + I, J ) )*X( IX ) + IX = IX + INCX + 190 CONTINUE + END IF + X( JX ) = TEMP + JX = JX + INCX + 200 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of CTBMV . +* + END diff --git a/costa/native/external/blas/ctbsv.f b/costa/native/external/blas/ctbsv.f new file mode 100644 index 000000000..abe4c77a3 --- /dev/null +++ b/costa/native/external/blas/ctbsv.f @@ -0,0 +1,381 @@ + SUBROUTINE CTBSV ( UPLO, TRANS, DIAG, N, K, A, LDA, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, K, LDA, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + COMPLEX A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* CTBSV solves one of the systems of equations +* +* A*x = b, or A'*x = b, or conjg( A' )*x = b, +* +* where b and x are n element vectors and A is an n by n unit, or +* non-unit, upper or lower triangular band matrix, with ( k + 1 ) +* diagonals. +* +* No test for singularity or near-singularity is included in this +* routine. Such tests must be performed before calling this routine. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the equations to be solved as +* follows: +* +* TRANS = 'N' or 'n' A*x = b. +* +* TRANS = 'T' or 't' A'*x = b. +* +* TRANS = 'C' or 'c' conjg( A' )*x = b. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with UPLO = 'U' or 'u', K specifies the number of +* super-diagonals of the matrix A. +* On entry with UPLO = 'L' or 'l', K specifies the number of +* sub-diagonals of the matrix A. +* K must satisfy 0 .le. K. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) +* by n part of the array A must contain the upper triangular +* band part of the matrix of coefficients, supplied column by +* column, with the leading diagonal of the matrix in row +* ( k + 1 ) of the array, the first super-diagonal starting at +* position 2 in row k, and so on. The top left k by k triangle +* of the array A is not referenced. +* The following program segment will transfer an upper +* triangular band matrix from conventional full matrix storage +* to band storage: +* +* DO 20, J = 1, N +* M = K + 1 - J +* DO 10, I = MAX( 1, J - K ), J +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) +* by n part of the array A must contain the lower triangular +* band part of the matrix of coefficients, supplied column by +* column, with the leading diagonal of the matrix in row 1 of +* the array, the first sub-diagonal starting at position 1 in +* row 2, and so on. The bottom right k by k triangle of the +* array A is not referenced. +* The following program segment will transfer a lower +* triangular band matrix from conventional full matrix storage +* to band storage: +* +* DO 20, J = 1, N +* M = 1 - J +* DO 10, I = J, MIN( N, J + K ) +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Note that when DIAG = 'U' or 'u' the elements of the array A +* corresponding to the diagonal elements of the matrix are not +* referenced, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* ( k + 1 ). +* Unchanged on exit. +* +* X - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element right-hand side vector b. On exit, X is overwritten +* with the solution vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. Local Scalars .. + COMPLEX TEMP + INTEGER I, INFO, IX, J, JX, KPLUS1, KX, L + LOGICAL NOCONJ, NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( K.LT.0 )THEN + INFO = 5 + ELSE IF( LDA.LT.( K + 1 ) )THEN + INFO = 7 + ELSE IF( INCX.EQ.0 )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CTBSV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOCONJ = LSAME( TRANS, 'T' ) + NOUNIT = LSAME( DIAG , 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed by sequentially with one pass through A. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x := inv( A )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KPLUS1 = K + 1 + IF( INCX.EQ.1 )THEN + DO 20, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + L = KPLUS1 - J + IF( NOUNIT ) + $ X( J ) = X( J )/A( KPLUS1, J ) + TEMP = X( J ) + DO 10, I = J - 1, MAX( 1, J - K ), -1 + X( I ) = X( I ) - TEMP*A( L + I, J ) + 10 CONTINUE + END IF + 20 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 40, J = N, 1, -1 + KX = KX - INCX + IF( X( JX ).NE.ZERO )THEN + IX = KX + L = KPLUS1 - J + IF( NOUNIT ) + $ X( JX ) = X( JX )/A( KPLUS1, J ) + TEMP = X( JX ) + DO 30, I = J - 1, MAX( 1, J - K ), -1 + X( IX ) = X( IX ) - TEMP*A( L + I, J ) + IX = IX - INCX + 30 CONTINUE + END IF + JX = JX - INCX + 40 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + L = 1 - J + IF( NOUNIT ) + $ X( J ) = X( J )/A( 1, J ) + TEMP = X( J ) + DO 50, I = J + 1, MIN( N, J + K ) + X( I ) = X( I ) - TEMP*A( L + I, J ) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE + JX = KX + DO 80, J = 1, N + KX = KX + INCX + IF( X( JX ).NE.ZERO )THEN + IX = KX + L = 1 - J + IF( NOUNIT ) + $ X( JX ) = X( JX )/A( 1, J ) + TEMP = X( JX ) + DO 70, I = J + 1, MIN( N, J + K ) + X( IX ) = X( IX ) - TEMP*A( L + I, J ) + IX = IX + INCX + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := inv( A' )*x or x := inv( conjg( A') )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KPLUS1 = K + 1 + IF( INCX.EQ.1 )THEN + DO 110, J = 1, N + TEMP = X( J ) + L = KPLUS1 - J + IF( NOCONJ )THEN + DO 90, I = MAX( 1, J - K ), J - 1 + TEMP = TEMP - A( L + I, J )*X( I ) + 90 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( KPLUS1, J ) + ELSE + DO 100, I = MAX( 1, J - K ), J - 1 + TEMP = TEMP - CONJG( A( L + I, J ) )*X( I ) + 100 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/CONJG( A( KPLUS1, J ) ) + END IF + X( J ) = TEMP + 110 CONTINUE + ELSE + JX = KX + DO 140, J = 1, N + TEMP = X( JX ) + IX = KX + L = KPLUS1 - J + IF( NOCONJ )THEN + DO 120, I = MAX( 1, J - K ), J - 1 + TEMP = TEMP - A( L + I, J )*X( IX ) + IX = IX + INCX + 120 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( KPLUS1, J ) + ELSE + DO 130, I = MAX( 1, J - K ), J - 1 + TEMP = TEMP - CONJG( A( L + I, J ) )*X( IX ) + IX = IX + INCX + 130 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/CONJG( A( KPLUS1, J ) ) + END IF + X( JX ) = TEMP + JX = JX + INCX + IF( J.GT.K ) + $ KX = KX + INCX + 140 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 170, J = N, 1, -1 + TEMP = X( J ) + L = 1 - J + IF( NOCONJ )THEN + DO 150, I = MIN( N, J + K ), J + 1, -1 + TEMP = TEMP - A( L + I, J )*X( I ) + 150 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( 1, J ) + ELSE + DO 160, I = MIN( N, J + K ), J + 1, -1 + TEMP = TEMP - CONJG( A( L + I, J ) )*X( I ) + 160 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/CONJG( A( 1, J ) ) + END IF + X( J ) = TEMP + 170 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 200, J = N, 1, -1 + TEMP = X( JX ) + IX = KX + L = 1 - J + IF( NOCONJ )THEN + DO 180, I = MIN( N, J + K ), J + 1, -1 + TEMP = TEMP - A( L + I, J )*X( IX ) + IX = IX - INCX + 180 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( 1, J ) + ELSE + DO 190, I = MIN( N, J + K ), J + 1, -1 + TEMP = TEMP - CONJG( A( L + I, J ) )*X( IX ) + IX = IX - INCX + 190 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/CONJG( A( 1, J ) ) + END IF + X( JX ) = TEMP + JX = JX - INCX + IF( ( N - J ).GE.K ) + $ KX = KX - INCX + 200 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of CTBSV . +* + END diff --git a/costa/native/external/blas/ctpmv.f b/costa/native/external/blas/ctpmv.f new file mode 100644 index 000000000..86cb35f79 --- /dev/null +++ b/costa/native/external/blas/ctpmv.f @@ -0,0 +1,338 @@ + SUBROUTINE CTPMV ( UPLO, TRANS, DIAG, N, AP, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + COMPLEX AP( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* CTPMV performs one of the matrix-vector operations +* +* x := A*x, or x := A'*x, or x := conjg( A' )*x, +* +* where x is an n element vector and A is an n by n unit, or non-unit, +* upper or lower triangular matrix, supplied in packed form. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' x := A*x. +* +* TRANS = 'T' or 't' x := A'*x. +* +* TRANS = 'C' or 'c' x := conjg( A' )*x. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* AP - COMPLEX array of DIMENSION at least +* ( ( n*( n + 1 ) )/2 ). +* Before entry with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular matrix packed sequentially, +* column by column, so that AP( 1 ) contains a( 1, 1 ), +* AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) +* respectively, and so on. +* Before entry with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular matrix packed sequentially, +* column by column, so that AP( 1 ) contains a( 1, 1 ), +* AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) +* respectively, and so on. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced, but are assumed to be unity. +* Unchanged on exit. +* +* X - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. On exit, X is overwritten with the +* tranformed vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. Local Scalars .. + COMPLEX TEMP + INTEGER I, INFO, IX, J, JX, K, KK, KX + LOGICAL NOCONJ, NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( INCX.EQ.0 )THEN + INFO = 7 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CTPMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOCONJ = LSAME( TRANS, 'T' ) + NOUNIT = LSAME( DIAG , 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of AP are +* accessed sequentially with one pass through AP. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x:= A*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KK = 1 + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + K = KK + DO 10, I = 1, J - 1 + X( I ) = X( I ) + TEMP*AP( K ) + K = K + 1 + 10 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*AP( KK + J - 1 ) + END IF + KK = KK + J + 20 CONTINUE + ELSE + JX = KX + DO 40, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + DO 30, K = KK, KK + J - 2 + X( IX ) = X( IX ) + TEMP*AP( K ) + IX = IX + INCX + 30 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*AP( KK + J - 1 ) + END IF + JX = JX + INCX + KK = KK + J + 40 CONTINUE + END IF + ELSE + KK = ( N*( N + 1 ) )/2 + IF( INCX.EQ.1 )THEN + DO 60, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + K = KK + DO 50, I = N, J + 1, -1 + X( I ) = X( I ) + TEMP*AP( K ) + K = K - 1 + 50 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*AP( KK - N + J ) + END IF + KK = KK - ( N - J + 1 ) + 60 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 80, J = N, 1, -1 + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + DO 70, K = KK, KK - ( N - ( J + 1 ) ), -1 + X( IX ) = X( IX ) + TEMP*AP( K ) + IX = IX - INCX + 70 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*AP( KK - N + J ) + END IF + JX = JX - INCX + KK = KK - ( N - J + 1 ) + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := A'*x or x := conjg( A' )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KK = ( N*( N + 1 ) )/2 + IF( INCX.EQ.1 )THEN + DO 110, J = N, 1, -1 + TEMP = X( J ) + K = KK - 1 + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*AP( KK ) + DO 90, I = J - 1, 1, -1 + TEMP = TEMP + AP( K )*X( I ) + K = K - 1 + 90 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*CONJG( AP( KK ) ) + DO 100, I = J - 1, 1, -1 + TEMP = TEMP + CONJG( AP( K ) )*X( I ) + K = K - 1 + 100 CONTINUE + END IF + X( J ) = TEMP + KK = KK - J + 110 CONTINUE + ELSE + JX = KX + ( N - 1 )*INCX + DO 140, J = N, 1, -1 + TEMP = X( JX ) + IX = JX + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*AP( KK ) + DO 120, K = KK - 1, KK - J + 1, -1 + IX = IX - INCX + TEMP = TEMP + AP( K )*X( IX ) + 120 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*CONJG( AP( KK ) ) + DO 130, K = KK - 1, KK - J + 1, -1 + IX = IX - INCX + TEMP = TEMP + CONJG( AP( K ) )*X( IX ) + 130 CONTINUE + END IF + X( JX ) = TEMP + JX = JX - INCX + KK = KK - J + 140 CONTINUE + END IF + ELSE + KK = 1 + IF( INCX.EQ.1 )THEN + DO 170, J = 1, N + TEMP = X( J ) + K = KK + 1 + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*AP( KK ) + DO 150, I = J + 1, N + TEMP = TEMP + AP( K )*X( I ) + K = K + 1 + 150 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*CONJG( AP( KK ) ) + DO 160, I = J + 1, N + TEMP = TEMP + CONJG( AP( K ) )*X( I ) + K = K + 1 + 160 CONTINUE + END IF + X( J ) = TEMP + KK = KK + ( N - J + 1 ) + 170 CONTINUE + ELSE + JX = KX + DO 200, J = 1, N + TEMP = X( JX ) + IX = JX + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*AP( KK ) + DO 180, K = KK + 1, KK + N - J + IX = IX + INCX + TEMP = TEMP + AP( K )*X( IX ) + 180 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*CONJG( AP( KK ) ) + DO 190, K = KK + 1, KK + N - J + IX = IX + INCX + TEMP = TEMP + CONJG( AP( K ) )*X( IX ) + 190 CONTINUE + END IF + X( JX ) = TEMP + JX = JX + INCX + KK = KK + ( N - J + 1 ) + 200 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of CTPMV . +* + END diff --git a/costa/native/external/blas/ctpsv.f b/costa/native/external/blas/ctpsv.f new file mode 100644 index 000000000..2973949db --- /dev/null +++ b/costa/native/external/blas/ctpsv.f @@ -0,0 +1,341 @@ + SUBROUTINE CTPSV ( UPLO, TRANS, DIAG, N, AP, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + COMPLEX AP( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* CTPSV solves one of the systems of equations +* +* A*x = b, or A'*x = b, or conjg( A' )*x = b, +* +* where b and x are n element vectors and A is an n by n unit, or +* non-unit, upper or lower triangular matrix, supplied in packed form. +* +* No test for singularity or near-singularity is included in this +* routine. Such tests must be performed before calling this routine. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the equations to be solved as +* follows: +* +* TRANS = 'N' or 'n' A*x = b. +* +* TRANS = 'T' or 't' A'*x = b. +* +* TRANS = 'C' or 'c' conjg( A' )*x = b. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* AP - COMPLEX array of DIMENSION at least +* ( ( n*( n + 1 ) )/2 ). +* Before entry with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular matrix packed sequentially, +* column by column, so that AP( 1 ) contains a( 1, 1 ), +* AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) +* respectively, and so on. +* Before entry with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular matrix packed sequentially, +* column by column, so that AP( 1 ) contains a( 1, 1 ), +* AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) +* respectively, and so on. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced, but are assumed to be unity. +* Unchanged on exit. +* +* X - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element right-hand side vector b. On exit, X is overwritten +* with the solution vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. Local Scalars .. + COMPLEX TEMP + INTEGER I, INFO, IX, J, JX, K, KK, KX + LOGICAL NOCONJ, NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( INCX.EQ.0 )THEN + INFO = 7 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CTPSV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOCONJ = LSAME( TRANS, 'T' ) + NOUNIT = LSAME( DIAG , 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of AP are +* accessed sequentially with one pass through AP. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x := inv( A )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KK = ( N*( N + 1 ) )/2 + IF( INCX.EQ.1 )THEN + DO 20, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( J ) = X( J )/AP( KK ) + TEMP = X( J ) + K = KK - 1 + DO 10, I = J - 1, 1, -1 + X( I ) = X( I ) - TEMP*AP( K ) + K = K - 1 + 10 CONTINUE + END IF + KK = KK - J + 20 CONTINUE + ELSE + JX = KX + ( N - 1 )*INCX + DO 40, J = N, 1, -1 + IF( X( JX ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( JX ) = X( JX )/AP( KK ) + TEMP = X( JX ) + IX = JX + DO 30, K = KK - 1, KK - J + 1, -1 + IX = IX - INCX + X( IX ) = X( IX ) - TEMP*AP( K ) + 30 CONTINUE + END IF + JX = JX - INCX + KK = KK - J + 40 CONTINUE + END IF + ELSE + KK = 1 + IF( INCX.EQ.1 )THEN + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( J ) = X( J )/AP( KK ) + TEMP = X( J ) + K = KK + 1 + DO 50, I = J + 1, N + X( I ) = X( I ) - TEMP*AP( K ) + K = K + 1 + 50 CONTINUE + END IF + KK = KK + ( N - J + 1 ) + 60 CONTINUE + ELSE + JX = KX + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( JX ) = X( JX )/AP( KK ) + TEMP = X( JX ) + IX = JX + DO 70, K = KK + 1, KK + N - J + IX = IX + INCX + X( IX ) = X( IX ) - TEMP*AP( K ) + 70 CONTINUE + END IF + JX = JX + INCX + KK = KK + ( N - J + 1 ) + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := inv( A' )*x or x := inv( conjg( A' ) )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KK = 1 + IF( INCX.EQ.1 )THEN + DO 110, J = 1, N + TEMP = X( J ) + K = KK + IF( NOCONJ )THEN + DO 90, I = 1, J - 1 + TEMP = TEMP - AP( K )*X( I ) + K = K + 1 + 90 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/AP( KK + J - 1 ) + ELSE + DO 100, I = 1, J - 1 + TEMP = TEMP - CONJG( AP( K ) )*X( I ) + K = K + 1 + 100 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/CONJG( AP( KK + J - 1 ) ) + END IF + X( J ) = TEMP + KK = KK + J + 110 CONTINUE + ELSE + JX = KX + DO 140, J = 1, N + TEMP = X( JX ) + IX = KX + IF( NOCONJ )THEN + DO 120, K = KK, KK + J - 2 + TEMP = TEMP - AP( K )*X( IX ) + IX = IX + INCX + 120 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/AP( KK + J - 1 ) + ELSE + DO 130, K = KK, KK + J - 2 + TEMP = TEMP - CONJG( AP( K ) )*X( IX ) + IX = IX + INCX + 130 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/CONJG( AP( KK + J - 1 ) ) + END IF + X( JX ) = TEMP + JX = JX + INCX + KK = KK + J + 140 CONTINUE + END IF + ELSE + KK = ( N*( N + 1 ) )/2 + IF( INCX.EQ.1 )THEN + DO 170, J = N, 1, -1 + TEMP = X( J ) + K = KK + IF( NOCONJ )THEN + DO 150, I = N, J + 1, -1 + TEMP = TEMP - AP( K )*X( I ) + K = K - 1 + 150 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/AP( KK - N + J ) + ELSE + DO 160, I = N, J + 1, -1 + TEMP = TEMP - CONJG( AP( K ) )*X( I ) + K = K - 1 + 160 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/CONJG( AP( KK - N + J ) ) + END IF + X( J ) = TEMP + KK = KK - ( N - J + 1 ) + 170 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 200, J = N, 1, -1 + TEMP = X( JX ) + IX = KX + IF( NOCONJ )THEN + DO 180, K = KK, KK - ( N - ( J + 1 ) ), -1 + TEMP = TEMP - AP( K )*X( IX ) + IX = IX - INCX + 180 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/AP( KK - N + J ) + ELSE + DO 190, K = KK, KK - ( N - ( J + 1 ) ), -1 + TEMP = TEMP - CONJG( AP( K ) )*X( IX ) + IX = IX - INCX + 190 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/CONJG( AP( KK - N + J ) ) + END IF + X( JX ) = TEMP + JX = JX - INCX + KK = KK - ( N - J + 1 ) + 200 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of CTPSV . +* + END diff --git a/costa/native/external/blas/ctrmm.f b/costa/native/external/blas/ctrmm.f new file mode 100644 index 000000000..521fe6e51 --- /dev/null +++ b/costa/native/external/blas/ctrmm.f @@ -0,0 +1,392 @@ + SUBROUTINE CTRMM ( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, + $ B, LDB ) +* .. Scalar Arguments .. + CHARACTER*1 SIDE, UPLO, TRANSA, DIAG + INTEGER M, N, LDA, LDB + COMPLEX ALPHA +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* CTRMM performs one of the matrix-matrix operations +* +* B := alpha*op( A )*B, or B := alpha*B*op( A ) +* +* where alpha is a scalar, B is an m by n matrix, A is a unit, or +* non-unit, upper or lower triangular matrix and op( A ) is one of +* +* op( A ) = A or op( A ) = A' or op( A ) = conjg( A' ). +* +* Parameters +* ========== +* +* SIDE - CHARACTER*1. +* On entry, SIDE specifies whether op( A ) multiplies B from +* the left or right as follows: +* +* SIDE = 'L' or 'l' B := alpha*op( A )*B. +* +* SIDE = 'R' or 'r' B := alpha*B*op( A ). +* +* Unchanged on exit. +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix A is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANSA - CHARACTER*1. +* On entry, TRANSA specifies the form of op( A ) to be used in +* the matrix multiplication as follows: +* +* TRANSA = 'N' or 'n' op( A ) = A. +* +* TRANSA = 'T' or 't' op( A ) = A'. +* +* TRANSA = 'C' or 'c' op( A ) = conjg( A' ). +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit triangular +* as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of B. M must be at +* least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of B. N must be +* at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. When alpha is +* zero then A is not referenced and B need not be set before +* entry. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, k ), where k is m +* when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. +* Before entry with UPLO = 'U' or 'u', the leading k by k +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading k by k +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When SIDE = 'L' or 'l' then +* LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' +* then LDA must be at least max( 1, n ). +* Unchanged on exit. +* +* B - COMPLEX array of DIMENSION ( LDB, n ). +* Before entry, the leading m by n part of the array B must +* contain the matrix B, and on exit is overwritten by the +* transformed matrix. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. LDB must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX +* .. Local Scalars .. + LOGICAL LSIDE, NOCONJ, NOUNIT, UPPER + INTEGER I, INFO, J, K, NROWA + COMPLEX TEMP +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + LSIDE = LSAME( SIDE , 'L' ) + IF( LSIDE )THEN + NROWA = M + ELSE + NROWA = N + END IF + NOCONJ = LSAME( TRANSA, 'T' ) + NOUNIT = LSAME( DIAG , 'N' ) + UPPER = LSAME( UPLO , 'U' ) +* + INFO = 0 + IF( ( .NOT.LSIDE ).AND. + $ ( .NOT.LSAME( SIDE , 'R' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO , 'L' ) ) )THEN + INFO = 2 + ELSE IF( ( .NOT.LSAME( TRANSA, 'N' ) ).AND. + $ ( .NOT.LSAME( TRANSA, 'T' ) ).AND. + $ ( .NOT.LSAME( TRANSA, 'C' ) ) )THEN + INFO = 3 + ELSE IF( ( .NOT.LSAME( DIAG , 'U' ) ).AND. + $ ( .NOT.LSAME( DIAG , 'N' ) ) )THEN + INFO = 4 + ELSE IF( M .LT.0 )THEN + INFO = 5 + ELSE IF( N .LT.0 )THEN + INFO = 6 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 9 + ELSE IF( LDB.LT.MAX( 1, M ) )THEN + INFO = 11 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CTRMM ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, M + B( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + RETURN + END IF +* +* Start the operations. +* + IF( LSIDE )THEN + IF( LSAME( TRANSA, 'N' ) )THEN +* +* Form B := alpha*A*B. +* + IF( UPPER )THEN + DO 50, J = 1, N + DO 40, K = 1, M + IF( B( K, J ).NE.ZERO )THEN + TEMP = ALPHA*B( K, J ) + DO 30, I = 1, K - 1 + B( I, J ) = B( I, J ) + TEMP*A( I, K ) + 30 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP*A( K, K ) + B( K, J ) = TEMP + END IF + 40 CONTINUE + 50 CONTINUE + ELSE + DO 80, J = 1, N + DO 70 K = M, 1, -1 + IF( B( K, J ).NE.ZERO )THEN + TEMP = ALPHA*B( K, J ) + B( K, J ) = TEMP + IF( NOUNIT ) + $ B( K, J ) = B( K, J )*A( K, K ) + DO 60, I = K + 1, M + B( I, J ) = B( I, J ) + TEMP*A( I, K ) + 60 CONTINUE + END IF + 70 CONTINUE + 80 CONTINUE + END IF + ELSE +* +* Form B := alpha*A'*B or B := alpha*conjg( A' )*B. +* + IF( UPPER )THEN + DO 120, J = 1, N + DO 110, I = M, 1, -1 + TEMP = B( I, J ) + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*A( I, I ) + DO 90, K = 1, I - 1 + TEMP = TEMP + A( K, I )*B( K, J ) + 90 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*CONJG( A( I, I ) ) + DO 100, K = 1, I - 1 + TEMP = TEMP + CONJG( A( K, I ) )*B( K, J ) + 100 CONTINUE + END IF + B( I, J ) = ALPHA*TEMP + 110 CONTINUE + 120 CONTINUE + ELSE + DO 160, J = 1, N + DO 150, I = 1, M + TEMP = B( I, J ) + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*A( I, I ) + DO 130, K = I + 1, M + TEMP = TEMP + A( K, I )*B( K, J ) + 130 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*CONJG( A( I, I ) ) + DO 140, K = I + 1, M + TEMP = TEMP + CONJG( A( K, I ) )*B( K, J ) + 140 CONTINUE + END IF + B( I, J ) = ALPHA*TEMP + 150 CONTINUE + 160 CONTINUE + END IF + END IF + ELSE + IF( LSAME( TRANSA, 'N' ) )THEN +* +* Form B := alpha*B*A. +* + IF( UPPER )THEN + DO 200, J = N, 1, -1 + TEMP = ALPHA + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 170, I = 1, M + B( I, J ) = TEMP*B( I, J ) + 170 CONTINUE + DO 190, K = 1, J - 1 + IF( A( K, J ).NE.ZERO )THEN + TEMP = ALPHA*A( K, J ) + DO 180, I = 1, M + B( I, J ) = B( I, J ) + TEMP*B( I, K ) + 180 CONTINUE + END IF + 190 CONTINUE + 200 CONTINUE + ELSE + DO 240, J = 1, N + TEMP = ALPHA + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 210, I = 1, M + B( I, J ) = TEMP*B( I, J ) + 210 CONTINUE + DO 230, K = J + 1, N + IF( A( K, J ).NE.ZERO )THEN + TEMP = ALPHA*A( K, J ) + DO 220, I = 1, M + B( I, J ) = B( I, J ) + TEMP*B( I, K ) + 220 CONTINUE + END IF + 230 CONTINUE + 240 CONTINUE + END IF + ELSE +* +* Form B := alpha*B*A' or B := alpha*B*conjg( A' ). +* + IF( UPPER )THEN + DO 280, K = 1, N + DO 260, J = 1, K - 1 + IF( A( J, K ).NE.ZERO )THEN + IF( NOCONJ )THEN + TEMP = ALPHA*A( J, K ) + ELSE + TEMP = ALPHA*CONJG( A( J, K ) ) + END IF + DO 250, I = 1, M + B( I, J ) = B( I, J ) + TEMP*B( I, K ) + 250 CONTINUE + END IF + 260 CONTINUE + TEMP = ALPHA + IF( NOUNIT )THEN + IF( NOCONJ )THEN + TEMP = TEMP*A( K, K ) + ELSE + TEMP = TEMP*CONJG( A( K, K ) ) + END IF + END IF + IF( TEMP.NE.ONE )THEN + DO 270, I = 1, M + B( I, K ) = TEMP*B( I, K ) + 270 CONTINUE + END IF + 280 CONTINUE + ELSE + DO 320, K = N, 1, -1 + DO 300, J = K + 1, N + IF( A( J, K ).NE.ZERO )THEN + IF( NOCONJ )THEN + TEMP = ALPHA*A( J, K ) + ELSE + TEMP = ALPHA*CONJG( A( J, K ) ) + END IF + DO 290, I = 1, M + B( I, J ) = B( I, J ) + TEMP*B( I, K ) + 290 CONTINUE + END IF + 300 CONTINUE + TEMP = ALPHA + IF( NOUNIT )THEN + IF( NOCONJ )THEN + TEMP = TEMP*A( K, K ) + ELSE + TEMP = TEMP*CONJG( A( K, K ) ) + END IF + END IF + IF( TEMP.NE.ONE )THEN + DO 310, I = 1, M + B( I, K ) = TEMP*B( I, K ) + 310 CONTINUE + END IF + 320 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of CTRMM . +* + END diff --git a/costa/native/external/blas/ctrmv.f b/costa/native/external/blas/ctrmv.f new file mode 100644 index 000000000..cdf948961 --- /dev/null +++ b/costa/native/external/blas/ctrmv.f @@ -0,0 +1,321 @@ + SUBROUTINE CTRMV ( UPLO, TRANS, DIAG, N, A, LDA, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, LDA, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + COMPLEX A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* CTRMV performs one of the matrix-vector operations +* +* x := A*x, or x := A'*x, or x := conjg( A' )*x, +* +* where x is an n element vector and A is an n by n unit, or non-unit, +* upper or lower triangular matrix. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' x := A*x. +* +* TRANS = 'T' or 't' x := A'*x. +* +* TRANS = 'C' or 'c' x := conjg( A' )*x. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* X - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. On exit, X is overwritten with the +* tranformed vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. Local Scalars .. + COMPLEX TEMP + INTEGER I, INFO, IX, J, JX, KX + LOGICAL NOCONJ, NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 6 + ELSE IF( INCX.EQ.0 )THEN + INFO = 8 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CTRMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOCONJ = LSAME( TRANS, 'T' ) + NOUNIT = LSAME( DIAG , 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x := A*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + DO 10, I = 1, J - 1 + X( I ) = X( I ) + TEMP*A( I, J ) + 10 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*A( J, J ) + END IF + 20 CONTINUE + ELSE + JX = KX + DO 40, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + DO 30, I = 1, J - 1 + X( IX ) = X( IX ) + TEMP*A( I, J ) + IX = IX + INCX + 30 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*A( J, J ) + END IF + JX = JX + INCX + 40 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 60, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + DO 50, I = N, J + 1, -1 + X( I ) = X( I ) + TEMP*A( I, J ) + 50 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*A( J, J ) + END IF + 60 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 80, J = N, 1, -1 + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + DO 70, I = N, J + 1, -1 + X( IX ) = X( IX ) + TEMP*A( I, J ) + IX = IX - INCX + 70 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*A( J, J ) + END IF + JX = JX - INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := A'*x or x := conjg( A' )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + IF( INCX.EQ.1 )THEN + DO 110, J = N, 1, -1 + TEMP = X( J ) + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 90, I = J - 1, 1, -1 + TEMP = TEMP + A( I, J )*X( I ) + 90 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*CONJG( A( J, J ) ) + DO 100, I = J - 1, 1, -1 + TEMP = TEMP + CONJG( A( I, J ) )*X( I ) + 100 CONTINUE + END IF + X( J ) = TEMP + 110 CONTINUE + ELSE + JX = KX + ( N - 1 )*INCX + DO 140, J = N, 1, -1 + TEMP = X( JX ) + IX = JX + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 120, I = J - 1, 1, -1 + IX = IX - INCX + TEMP = TEMP + A( I, J )*X( IX ) + 120 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*CONJG( A( J, J ) ) + DO 130, I = J - 1, 1, -1 + IX = IX - INCX + TEMP = TEMP + CONJG( A( I, J ) )*X( IX ) + 130 CONTINUE + END IF + X( JX ) = TEMP + JX = JX - INCX + 140 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 170, J = 1, N + TEMP = X( J ) + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 150, I = J + 1, N + TEMP = TEMP + A( I, J )*X( I ) + 150 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*CONJG( A( J, J ) ) + DO 160, I = J + 1, N + TEMP = TEMP + CONJG( A( I, J ) )*X( I ) + 160 CONTINUE + END IF + X( J ) = TEMP + 170 CONTINUE + ELSE + JX = KX + DO 200, J = 1, N + TEMP = X( JX ) + IX = JX + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 180, I = J + 1, N + IX = IX + INCX + TEMP = TEMP + A( I, J )*X( IX ) + 180 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*CONJG( A( J, J ) ) + DO 190, I = J + 1, N + IX = IX + INCX + TEMP = TEMP + CONJG( A( I, J ) )*X( IX ) + 190 CONTINUE + END IF + X( JX ) = TEMP + JX = JX + INCX + 200 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of CTRMV . +* + END diff --git a/costa/native/external/blas/ctrsm.f b/costa/native/external/blas/ctrsm.f new file mode 100644 index 000000000..f9b74dccd --- /dev/null +++ b/costa/native/external/blas/ctrsm.f @@ -0,0 +1,414 @@ + SUBROUTINE CTRSM ( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, + $ B, LDB ) +* .. Scalar Arguments .. + CHARACTER*1 SIDE, UPLO, TRANSA, DIAG + INTEGER M, N, LDA, LDB + COMPLEX ALPHA +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* CTRSM solves one of the matrix equations +* +* op( A )*X = alpha*B, or X*op( A ) = alpha*B, +* +* where alpha is a scalar, X and B are m by n matrices, A is a unit, or +* non-unit, upper or lower triangular matrix and op( A ) is one of +* +* op( A ) = A or op( A ) = A' or op( A ) = conjg( A' ). +* +* The matrix X is overwritten on B. +* +* Parameters +* ========== +* +* SIDE - CHARACTER*1. +* On entry, SIDE specifies whether op( A ) appears on the left +* or right of X as follows: +* +* SIDE = 'L' or 'l' op( A )*X = alpha*B. +* +* SIDE = 'R' or 'r' X*op( A ) = alpha*B. +* +* Unchanged on exit. +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix A is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANSA - CHARACTER*1. +* On entry, TRANSA specifies the form of op( A ) to be used in +* the matrix multiplication as follows: +* +* TRANSA = 'N' or 'n' op( A ) = A. +* +* TRANSA = 'T' or 't' op( A ) = A'. +* +* TRANSA = 'C' or 'c' op( A ) = conjg( A' ). +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit triangular +* as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of B. M must be at +* least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of B. N must be +* at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX . +* On entry, ALPHA specifies the scalar alpha. When alpha is +* zero then A is not referenced and B need not be set before +* entry. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, k ), where k is m +* when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. +* Before entry with UPLO = 'U' or 'u', the leading k by k +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading k by k +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When SIDE = 'L' or 'l' then +* LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' +* then LDA must be at least max( 1, n ). +* Unchanged on exit. +* +* B - COMPLEX array of DIMENSION ( LDB, n ). +* Before entry, the leading m by n part of the array B must +* contain the right-hand side matrix B, and on exit is +* overwritten by the solution matrix X. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. LDB must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX +* .. Local Scalars .. + LOGICAL LSIDE, NOCONJ, NOUNIT, UPPER + INTEGER I, INFO, J, K, NROWA + COMPLEX TEMP +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + LSIDE = LSAME( SIDE , 'L' ) + IF( LSIDE )THEN + NROWA = M + ELSE + NROWA = N + END IF + NOCONJ = LSAME( TRANSA, 'T' ) + NOUNIT = LSAME( DIAG , 'N' ) + UPPER = LSAME( UPLO , 'U' ) +* + INFO = 0 + IF( ( .NOT.LSIDE ).AND. + $ ( .NOT.LSAME( SIDE , 'R' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO , 'L' ) ) )THEN + INFO = 2 + ELSE IF( ( .NOT.LSAME( TRANSA, 'N' ) ).AND. + $ ( .NOT.LSAME( TRANSA, 'T' ) ).AND. + $ ( .NOT.LSAME( TRANSA, 'C' ) ) )THEN + INFO = 3 + ELSE IF( ( .NOT.LSAME( DIAG , 'U' ) ).AND. + $ ( .NOT.LSAME( DIAG , 'N' ) ) )THEN + INFO = 4 + ELSE IF( M .LT.0 )THEN + INFO = 5 + ELSE IF( N .LT.0 )THEN + INFO = 6 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 9 + ELSE IF( LDB.LT.MAX( 1, M ) )THEN + INFO = 11 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CTRSM ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, M + B( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + RETURN + END IF +* +* Start the operations. +* + IF( LSIDE )THEN + IF( LSAME( TRANSA, 'N' ) )THEN +* +* Form B := alpha*inv( A )*B. +* + IF( UPPER )THEN + DO 60, J = 1, N + IF( ALPHA.NE.ONE )THEN + DO 30, I = 1, M + B( I, J ) = ALPHA*B( I, J ) + 30 CONTINUE + END IF + DO 50, K = M, 1, -1 + IF( B( K, J ).NE.ZERO )THEN + IF( NOUNIT ) + $ B( K, J ) = B( K, J )/A( K, K ) + DO 40, I = 1, K - 1 + B( I, J ) = B( I, J ) - B( K, J )*A( I, K ) + 40 CONTINUE + END IF + 50 CONTINUE + 60 CONTINUE + ELSE + DO 100, J = 1, N + IF( ALPHA.NE.ONE )THEN + DO 70, I = 1, M + B( I, J ) = ALPHA*B( I, J ) + 70 CONTINUE + END IF + DO 90 K = 1, M + IF( B( K, J ).NE.ZERO )THEN + IF( NOUNIT ) + $ B( K, J ) = B( K, J )/A( K, K ) + DO 80, I = K + 1, M + B( I, J ) = B( I, J ) - B( K, J )*A( I, K ) + 80 CONTINUE + END IF + 90 CONTINUE + 100 CONTINUE + END IF + ELSE +* +* Form B := alpha*inv( A' )*B +* or B := alpha*inv( conjg( A' ) )*B. +* + IF( UPPER )THEN + DO 140, J = 1, N + DO 130, I = 1, M + TEMP = ALPHA*B( I, J ) + IF( NOCONJ )THEN + DO 110, K = 1, I - 1 + TEMP = TEMP - A( K, I )*B( K, J ) + 110 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( I, I ) + ELSE + DO 120, K = 1, I - 1 + TEMP = TEMP - CONJG( A( K, I ) )*B( K, J ) + 120 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/CONJG( A( I, I ) ) + END IF + B( I, J ) = TEMP + 130 CONTINUE + 140 CONTINUE + ELSE + DO 180, J = 1, N + DO 170, I = M, 1, -1 + TEMP = ALPHA*B( I, J ) + IF( NOCONJ )THEN + DO 150, K = I + 1, M + TEMP = TEMP - A( K, I )*B( K, J ) + 150 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( I, I ) + ELSE + DO 160, K = I + 1, M + TEMP = TEMP - CONJG( A( K, I ) )*B( K, J ) + 160 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/CONJG( A( I, I ) ) + END IF + B( I, J ) = TEMP + 170 CONTINUE + 180 CONTINUE + END IF + END IF + ELSE + IF( LSAME( TRANSA, 'N' ) )THEN +* +* Form B := alpha*B*inv( A ). +* + IF( UPPER )THEN + DO 230, J = 1, N + IF( ALPHA.NE.ONE )THEN + DO 190, I = 1, M + B( I, J ) = ALPHA*B( I, J ) + 190 CONTINUE + END IF + DO 210, K = 1, J - 1 + IF( A( K, J ).NE.ZERO )THEN + DO 200, I = 1, M + B( I, J ) = B( I, J ) - A( K, J )*B( I, K ) + 200 CONTINUE + END IF + 210 CONTINUE + IF( NOUNIT )THEN + TEMP = ONE/A( J, J ) + DO 220, I = 1, M + B( I, J ) = TEMP*B( I, J ) + 220 CONTINUE + END IF + 230 CONTINUE + ELSE + DO 280, J = N, 1, -1 + IF( ALPHA.NE.ONE )THEN + DO 240, I = 1, M + B( I, J ) = ALPHA*B( I, J ) + 240 CONTINUE + END IF + DO 260, K = J + 1, N + IF( A( K, J ).NE.ZERO )THEN + DO 250, I = 1, M + B( I, J ) = B( I, J ) - A( K, J )*B( I, K ) + 250 CONTINUE + END IF + 260 CONTINUE + IF( NOUNIT )THEN + TEMP = ONE/A( J, J ) + DO 270, I = 1, M + B( I, J ) = TEMP*B( I, J ) + 270 CONTINUE + END IF + 280 CONTINUE + END IF + ELSE +* +* Form B := alpha*B*inv( A' ) +* or B := alpha*B*inv( conjg( A' ) ). +* + IF( UPPER )THEN + DO 330, K = N, 1, -1 + IF( NOUNIT )THEN + IF( NOCONJ )THEN + TEMP = ONE/A( K, K ) + ELSE + TEMP = ONE/CONJG( A( K, K ) ) + END IF + DO 290, I = 1, M + B( I, K ) = TEMP*B( I, K ) + 290 CONTINUE + END IF + DO 310, J = 1, K - 1 + IF( A( J, K ).NE.ZERO )THEN + IF( NOCONJ )THEN + TEMP = A( J, K ) + ELSE + TEMP = CONJG( A( J, K ) ) + END IF + DO 300, I = 1, M + B( I, J ) = B( I, J ) - TEMP*B( I, K ) + 300 CONTINUE + END IF + 310 CONTINUE + IF( ALPHA.NE.ONE )THEN + DO 320, I = 1, M + B( I, K ) = ALPHA*B( I, K ) + 320 CONTINUE + END IF + 330 CONTINUE + ELSE + DO 380, K = 1, N + IF( NOUNIT )THEN + IF( NOCONJ )THEN + TEMP = ONE/A( K, K ) + ELSE + TEMP = ONE/CONJG( A( K, K ) ) + END IF + DO 340, I = 1, M + B( I, K ) = TEMP*B( I, K ) + 340 CONTINUE + END IF + DO 360, J = K + 1, N + IF( A( J, K ).NE.ZERO )THEN + IF( NOCONJ )THEN + TEMP = A( J, K ) + ELSE + TEMP = CONJG( A( J, K ) ) + END IF + DO 350, I = 1, M + B( I, J ) = B( I, J ) - TEMP*B( I, K ) + 350 CONTINUE + END IF + 360 CONTINUE + IF( ALPHA.NE.ONE )THEN + DO 370, I = 1, M + B( I, K ) = ALPHA*B( I, K ) + 370 CONTINUE + END IF + 380 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of CTRSM . +* + END diff --git a/costa/native/external/blas/ctrsv.f b/costa/native/external/blas/ctrsv.f new file mode 100644 index 000000000..a14b1abf0 --- /dev/null +++ b/costa/native/external/blas/ctrsv.f @@ -0,0 +1,324 @@ + SUBROUTINE CTRSV ( UPLO, TRANS, DIAG, N, A, LDA, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, LDA, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + COMPLEX A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* CTRSV solves one of the systems of equations +* +* A*x = b, or A'*x = b, or conjg( A' )*x = b, +* +* where b and x are n element vectors and A is an n by n unit, or +* non-unit, upper or lower triangular matrix. +* +* No test for singularity or near-singularity is included in this +* routine. Such tests must be performed before calling this routine. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the equations to be solved as +* follows: +* +* TRANS = 'N' or 'n' A*x = b. +* +* TRANS = 'T' or 't' A'*x = b. +* +* TRANS = 'C' or 'c' conjg( A' )*x = b. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* A - COMPLEX array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* X - COMPLEX array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element right-hand side vector b. On exit, X is overwritten +* with the solution vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. Local Scalars .. + COMPLEX TEMP + INTEGER I, INFO, IX, J, JX, KX + LOGICAL NOCONJ, NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 6 + ELSE IF( INCX.EQ.0 )THEN + INFO = 8 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CTRSV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOCONJ = LSAME( TRANS, 'T' ) + NOUNIT = LSAME( DIAG , 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x := inv( A )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + IF( INCX.EQ.1 )THEN + DO 20, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( J ) = X( J )/A( J, J ) + TEMP = X( J ) + DO 10, I = J - 1, 1, -1 + X( I ) = X( I ) - TEMP*A( I, J ) + 10 CONTINUE + END IF + 20 CONTINUE + ELSE + JX = KX + ( N - 1 )*INCX + DO 40, J = N, 1, -1 + IF( X( JX ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( JX ) = X( JX )/A( J, J ) + TEMP = X( JX ) + IX = JX + DO 30, I = J - 1, 1, -1 + IX = IX - INCX + X( IX ) = X( IX ) - TEMP*A( I, J ) + 30 CONTINUE + END IF + JX = JX - INCX + 40 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( J ) = X( J )/A( J, J ) + TEMP = X( J ) + DO 50, I = J + 1, N + X( I ) = X( I ) - TEMP*A( I, J ) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE + JX = KX + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( JX ) = X( JX )/A( J, J ) + TEMP = X( JX ) + IX = JX + DO 70, I = J + 1, N + IX = IX + INCX + X( IX ) = X( IX ) - TEMP*A( I, J ) + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := inv( A' )*x or x := inv( conjg( A' ) )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + IF( INCX.EQ.1 )THEN + DO 110, J = 1, N + TEMP = X( J ) + IF( NOCONJ )THEN + DO 90, I = 1, J - 1 + TEMP = TEMP - A( I, J )*X( I ) + 90 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( J, J ) + ELSE + DO 100, I = 1, J - 1 + TEMP = TEMP - CONJG( A( I, J ) )*X( I ) + 100 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/CONJG( A( J, J ) ) + END IF + X( J ) = TEMP + 110 CONTINUE + ELSE + JX = KX + DO 140, J = 1, N + IX = KX + TEMP = X( JX ) + IF( NOCONJ )THEN + DO 120, I = 1, J - 1 + TEMP = TEMP - A( I, J )*X( IX ) + IX = IX + INCX + 120 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( J, J ) + ELSE + DO 130, I = 1, J - 1 + TEMP = TEMP - CONJG( A( I, J ) )*X( IX ) + IX = IX + INCX + 130 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/CONJG( A( J, J ) ) + END IF + X( JX ) = TEMP + JX = JX + INCX + 140 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 170, J = N, 1, -1 + TEMP = X( J ) + IF( NOCONJ )THEN + DO 150, I = N, J + 1, -1 + TEMP = TEMP - A( I, J )*X( I ) + 150 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( J, J ) + ELSE + DO 160, I = N, J + 1, -1 + TEMP = TEMP - CONJG( A( I, J ) )*X( I ) + 160 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/CONJG( A( J, J ) ) + END IF + X( J ) = TEMP + 170 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 200, J = N, 1, -1 + IX = KX + TEMP = X( JX ) + IF( NOCONJ )THEN + DO 180, I = N, J + 1, -1 + TEMP = TEMP - A( I, J )*X( IX ) + IX = IX - INCX + 180 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( J, J ) + ELSE + DO 190, I = N, J + 1, -1 + TEMP = TEMP - CONJG( A( I, J ) )*X( IX ) + IX = IX - INCX + 190 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/CONJG( A( J, J ) ) + END IF + X( JX ) = TEMP + JX = JX - INCX + 200 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of CTRSV . +* + END diff --git a/costa/native/external/blas/dasum.f b/costa/native/external/blas/dasum.f new file mode 100644 index 000000000..28b128a84 --- /dev/null +++ b/costa/native/external/blas/dasum.f @@ -0,0 +1,43 @@ + double precision function dasum(n,dx,incx) +c +c takes the sum of the absolute values. +c jack dongarra, linpack, 3/11/78. +c modified 3/93 to return if incx .le. 0. +c modified 12/3/93, array(1) declarations changed to array(*) +c + double precision dx(*),dtemp + integer i,incx,m,mp1,n,nincx +c + dasum = 0.0d0 + dtemp = 0.0d0 + if( n.le.0 .or. incx.le.0 )return + if(incx.eq.1)go to 20 +c +c code for increment not equal to 1 +c + nincx = n*incx + do 10 i = 1,nincx,incx + dtemp = dtemp + dabs(dx(i)) + 10 continue + dasum = dtemp + return +c +c code for increment equal to 1 +c +c +c clean-up loop +c + 20 m = mod(n,6) + if( m .eq. 0 ) go to 40 + do 30 i = 1,m + dtemp = dtemp + dabs(dx(i)) + 30 continue + if( n .lt. 6 ) go to 60 + 40 mp1 = m + 1 + do 50 i = mp1,n,6 + dtemp = dtemp + dabs(dx(i)) + dabs(dx(i + 1)) + dabs(dx(i + 2)) + * + dabs(dx(i + 3)) + dabs(dx(i + 4)) + dabs(dx(i + 5)) + 50 continue + 60 dasum = dtemp + return + end diff --git a/costa/native/external/blas/daxpy.f b/costa/native/external/blas/daxpy.f new file mode 100644 index 000000000..91daa3c64 --- /dev/null +++ b/costa/native/external/blas/daxpy.f @@ -0,0 +1,48 @@ + subroutine daxpy(n,da,dx,incx,dy,incy) +c +c constant times a vector plus a vector. +c uses unrolled loops for increments equal to one. +c jack dongarra, linpack, 3/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + double precision dx(*),dy(*),da + integer i,incx,incy,ix,iy,m,mp1,n +c + if(n.le.0)return + if (da .eq. 0.0d0) return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments +c not equal to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + dy(iy) = dy(iy) + da*dx(ix) + ix = ix + incx + iy = iy + incy + 10 continue + return +c +c code for both increments equal to 1 +c +c +c clean-up loop +c + 20 m = mod(n,4) + if( m .eq. 0 ) go to 40 + do 30 i = 1,m + dy(i) = dy(i) + da*dx(i) + 30 continue + if( n .lt. 4 ) return + 40 mp1 = m + 1 + do 50 i = mp1,n,4 + dy(i) = dy(i) + da*dx(i) + dy(i + 1) = dy(i + 1) + da*dx(i + 1) + dy(i + 2) = dy(i + 2) + da*dx(i + 2) + dy(i + 3) = dy(i + 3) + da*dx(i + 3) + 50 continue + return + end diff --git a/costa/native/external/blas/dcabs1.f b/costa/native/external/blas/dcabs1.f new file mode 100644 index 000000000..385ea5e1a --- /dev/null +++ b/costa/native/external/blas/dcabs1.f @@ -0,0 +1,8 @@ + double precision function dcabs1(z) + double complex z,zz + double precision t(2) + equivalence (zz,t(1)) + zz = z + dcabs1 = dabs(t(1)) + dabs(t(2)) + return + end diff --git a/costa/native/external/blas/dcopy.f b/costa/native/external/blas/dcopy.f new file mode 100644 index 000000000..e16892716 --- /dev/null +++ b/costa/native/external/blas/dcopy.f @@ -0,0 +1,50 @@ + subroutine dcopy(n,dx,incx,dy,incy) +c +c copies a vector, x, to a vector, y. +c uses unrolled loops for increments equal to one. +c jack dongarra, linpack, 3/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + double precision dx(*),dy(*) + integer i,incx,incy,ix,iy,m,mp1,n +c + if(n.le.0)return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments +c not equal to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + dy(iy) = dx(ix) + ix = ix + incx + iy = iy + incy + 10 continue + return +c +c code for both increments equal to 1 +c +c +c clean-up loop +c + 20 m = mod(n,7) + if( m .eq. 0 ) go to 40 + do 30 i = 1,m + dy(i) = dx(i) + 30 continue + if( n .lt. 7 ) return + 40 mp1 = m + 1 + do 50 i = mp1,n,7 + dy(i) = dx(i) + dy(i + 1) = dx(i + 1) + dy(i + 2) = dx(i + 2) + dy(i + 3) = dx(i + 3) + dy(i + 4) = dx(i + 4) + dy(i + 5) = dx(i + 5) + dy(i + 6) = dx(i + 6) + 50 continue + return + end diff --git a/costa/native/external/blas/ddot.f b/costa/native/external/blas/ddot.f new file mode 100644 index 000000000..e04c7c25e --- /dev/null +++ b/costa/native/external/blas/ddot.f @@ -0,0 +1,49 @@ + double precision function ddot(n,dx,incx,dy,incy) +c +c forms the dot product of two vectors. +c uses unrolled loops for increments equal to one. +c jack dongarra, linpack, 3/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + double precision dx(*),dy(*),dtemp + integer i,incx,incy,ix,iy,m,mp1,n +c + ddot = 0.0d0 + dtemp = 0.0d0 + if(n.le.0)return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments +c not equal to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + dtemp = dtemp + dx(ix)*dy(iy) + ix = ix + incx + iy = iy + incy + 10 continue + ddot = dtemp + return +c +c code for both increments equal to 1 +c +c +c clean-up loop +c + 20 m = mod(n,5) + if( m .eq. 0 ) go to 40 + do 30 i = 1,m + dtemp = dtemp + dx(i)*dy(i) + 30 continue + if( n .lt. 5 ) go to 60 + 40 mp1 = m + 1 + do 50 i = mp1,n,5 + dtemp = dtemp + dx(i)*dy(i) + dx(i + 1)*dy(i + 1) + + * dx(i + 2)*dy(i + 2) + dx(i + 3)*dy(i + 3) + dx(i + 4)*dy(i + 4) + 50 continue + 60 ddot = dtemp + return + end diff --git a/costa/native/external/blas/dgbmv.f b/costa/native/external/blas/dgbmv.f new file mode 100644 index 000000000..e9c8f76fb --- /dev/null +++ b/costa/native/external/blas/dgbmv.f @@ -0,0 +1,300 @@ + SUBROUTINE DGBMV ( TRANS, M, N, KL, KU, ALPHA, A, LDA, X, INCX, + $ BETA, Y, INCY ) +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA, BETA + INTEGER INCX, INCY, KL, KU, LDA, M, N + CHARACTER*1 TRANS +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* DGBMV performs one of the matrix-vector operations +* +* y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, +* +* where alpha and beta are scalars, x and y are vectors and A is an +* m by n band matrix, with kl sub-diagonals and ku super-diagonals. +* +* Parameters +* ========== +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' y := alpha*A*x + beta*y. +* +* TRANS = 'T' or 't' y := alpha*A'*x + beta*y. +* +* TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix A. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* KL - INTEGER. +* On entry, KL specifies the number of sub-diagonals of the +* matrix A. KL must satisfy 0 .le. KL. +* Unchanged on exit. +* +* KU - INTEGER. +* On entry, KU specifies the number of super-diagonals of the +* matrix A. KU must satisfy 0 .le. KU. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). +* Before entry, the leading ( kl + ku + 1 ) by n part of the +* array A must contain the matrix of coefficients, supplied +* column by column, with the leading diagonal of the matrix in +* row ( ku + 1 ) of the array, the first super-diagonal +* starting at position 2 in row ku, the first sub-diagonal +* starting at position 1 in row ( ku + 2 ), and so on. +* Elements in the array A that do not correspond to elements +* in the band matrix (such as the top left ku by ku triangle) +* are not referenced. +* The following program segment will transfer a band matrix +* from conventional full matrix storage to band storage: +* +* DO 20, J = 1, N +* K = KU + 1 - J +* DO 10, I = MAX( 1, J - KU ), MIN( M, J + KL ) +* A( K + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* ( kl + ku + 1 ). +* Unchanged on exit. +* +* X - DOUBLE PRECISION array of DIMENSION at least +* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' +* and at least +* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. +* Before entry, the incremented array X must contain the +* vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - DOUBLE PRECISION. +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then Y need not be set on input. +* Unchanged on exit. +* +* Y - DOUBLE PRECISION array of DIMENSION at least +* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' +* and at least +* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. +* Before entry, the incremented array Y must contain the +* vector y. On exit, Y is overwritten by the updated vector y. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* .. Parameters .. + DOUBLE PRECISION ONE , ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. Local Scalars .. + DOUBLE PRECISION TEMP + INTEGER I, INFO, IX, IY, J, JX, JY, K, KUP1, KX, KY, + $ LENX, LENY +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 1 + ELSE IF( M.LT.0 )THEN + INFO = 2 + ELSE IF( N.LT.0 )THEN + INFO = 3 + ELSE IF( KL.LT.0 )THEN + INFO = 4 + ELSE IF( KU.LT.0 )THEN + INFO = 5 + ELSE IF( LDA.LT.( KL + KU + 1 ) )THEN + INFO = 8 + ELSE IF( INCX.EQ.0 )THEN + INFO = 10 + ELSE IF( INCY.EQ.0 )THEN + INFO = 13 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DGBMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set LENX and LENY, the lengths of the vectors x and y, and set +* up the start points in X and Y. +* + IF( LSAME( TRANS, 'N' ) )THEN + LENX = N + LENY = M + ELSE + LENX = M + LENY = N + END IF + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( LENX - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( LENY - 1 )*INCY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the band part of A. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE )THEN + IF( INCY.EQ.1 )THEN + IF( BETA.EQ.ZERO )THEN + DO 10, I = 1, LENY + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20, I = 1, LENY + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO )THEN + DO 30, I = 1, LENY + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40, I = 1, LENY + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + KUP1 = KU + 1 + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form y := alpha*A*x + y. +* + JX = KX + IF( INCY.EQ.1 )THEN + DO 60, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + K = KUP1 - J + DO 50, I = MAX( 1, J - KU ), MIN( M, J + KL ) + Y( I ) = Y( I ) + TEMP*A( K + I, J ) + 50 CONTINUE + END IF + JX = JX + INCX + 60 CONTINUE + ELSE + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + IY = KY + K = KUP1 - J + DO 70, I = MAX( 1, J - KU ), MIN( M, J + KL ) + Y( IY ) = Y( IY ) + TEMP*A( K + I, J ) + IY = IY + INCY + 70 CONTINUE + END IF + JX = JX + INCX + IF( J.GT.KU ) + $ KY = KY + INCY + 80 CONTINUE + END IF + ELSE +* +* Form y := alpha*A'*x + y. +* + JY = KY + IF( INCX.EQ.1 )THEN + DO 100, J = 1, N + TEMP = ZERO + K = KUP1 - J + DO 90, I = MAX( 1, J - KU ), MIN( M, J + KL ) + TEMP = TEMP + A( K + I, J )*X( I ) + 90 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP + JY = JY + INCY + 100 CONTINUE + ELSE + DO 120, J = 1, N + TEMP = ZERO + IX = KX + K = KUP1 - J + DO 110, I = MAX( 1, J - KU ), MIN( M, J + KL ) + TEMP = TEMP + A( K + I, J )*X( IX ) + IX = IX + INCX + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP + JY = JY + INCY + IF( J.GT.KU ) + $ KX = KX + INCX + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of DGBMV . +* + END diff --git a/costa/native/external/blas/dgemm.f b/costa/native/external/blas/dgemm.f new file mode 100644 index 000000000..baabe4c52 --- /dev/null +++ b/costa/native/external/blas/dgemm.f @@ -0,0 +1,313 @@ + SUBROUTINE DGEMM ( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, + $ BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER*1 TRANSA, TRANSB + INTEGER M, N, K, LDA, LDB, LDC + DOUBLE PRECISION ALPHA, BETA +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* DGEMM performs one of the matrix-matrix operations +* +* C := alpha*op( A )*op( B ) + beta*C, +* +* where op( X ) is one of +* +* op( X ) = X or op( X ) = X', +* +* alpha and beta are scalars, and A, B and C are matrices, with op( A ) +* an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. +* +* Parameters +* ========== +* +* TRANSA - CHARACTER*1. +* On entry, TRANSA specifies the form of op( A ) to be used in +* the matrix multiplication as follows: +* +* TRANSA = 'N' or 'n', op( A ) = A. +* +* TRANSA = 'T' or 't', op( A ) = A'. +* +* TRANSA = 'C' or 'c', op( A ) = A'. +* +* Unchanged on exit. +* +* TRANSB - CHARACTER*1. +* On entry, TRANSB specifies the form of op( B ) to be used in +* the matrix multiplication as follows: +* +* TRANSB = 'N' or 'n', op( B ) = B. +* +* TRANSB = 'T' or 't', op( B ) = B'. +* +* TRANSB = 'C' or 'c', op( B ) = B'. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix +* op( A ) and of the matrix C. M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix +* op( B ) and the number of columns of the matrix C. N must be +* at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry, K specifies the number of columns of the matrix +* op( A ) and the number of rows of the matrix op( B ). K must +* be at least zero. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is +* k when TRANSA = 'N' or 'n', and is m otherwise. +* Before entry with TRANSA = 'N' or 'n', the leading m by k +* part of the array A must contain the matrix A, otherwise +* the leading k by m part of the array A must contain the +* matrix A. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When TRANSA = 'N' or 'n' then +* LDA must be at least max( 1, m ), otherwise LDA must be at +* least max( 1, k ). +* Unchanged on exit. +* +* B - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is +* n when TRANSB = 'N' or 'n', and is k otherwise. +* Before entry with TRANSB = 'N' or 'n', the leading k by n +* part of the array B must contain the matrix B, otherwise +* the leading n by k part of the array B must contain the +* matrix B. +* Unchanged on exit. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. When TRANSB = 'N' or 'n' then +* LDB must be at least max( 1, k ), otherwise LDB must be at +* least max( 1, n ). +* Unchanged on exit. +* +* BETA - DOUBLE PRECISION. +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then C need not be set on input. +* Unchanged on exit. +* +* C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). +* Before entry, the leading m by n part of the array C must +* contain the matrix C, except when beta is zero, in which +* case C need not be set on entry. +* On exit, the array C is overwritten by the m by n matrix +* ( alpha*op( A )*op( B ) + beta*C ). +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Local Scalars .. + LOGICAL NOTA, NOTB + INTEGER I, INFO, J, L, NCOLA, NROWA, NROWB + DOUBLE PRECISION TEMP +* .. Parameters .. + DOUBLE PRECISION ONE , ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Executable Statements .. +* +* Set NOTA and NOTB as true if A and B respectively are not +* transposed and set NROWA, NCOLA and NROWB as the number of rows +* and columns of A and the number of rows of B respectively. +* + NOTA = LSAME( TRANSA, 'N' ) + NOTB = LSAME( TRANSB, 'N' ) + IF( NOTA )THEN + NROWA = M + NCOLA = K + ELSE + NROWA = K + NCOLA = M + END IF + IF( NOTB )THEN + NROWB = K + ELSE + NROWB = N + END IF +* +* Test the input parameters. +* + INFO = 0 + IF( ( .NOT.NOTA ).AND. + $ ( .NOT.LSAME( TRANSA, 'C' ) ).AND. + $ ( .NOT.LSAME( TRANSA, 'T' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.NOTB ).AND. + $ ( .NOT.LSAME( TRANSB, 'C' ) ).AND. + $ ( .NOT.LSAME( TRANSB, 'T' ) ) )THEN + INFO = 2 + ELSE IF( M .LT.0 )THEN + INFO = 3 + ELSE IF( N .LT.0 )THEN + INFO = 4 + ELSE IF( K .LT.0 )THEN + INFO = 5 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 8 + ELSE IF( LDB.LT.MAX( 1, NROWB ) )THEN + INFO = 10 + ELSE IF( LDC.LT.MAX( 1, M ) )THEN + INFO = 13 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DGEMM ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* And if alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + IF( BETA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, M + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40, J = 1, N + DO 30, I = 1, M + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + RETURN + END IF +* +* Start the operations. +* + IF( NOTB )THEN + IF( NOTA )THEN +* +* Form C := alpha*A*B + beta*C. +* + DO 90, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 50, I = 1, M + C( I, J ) = ZERO + 50 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 60, I = 1, M + C( I, J ) = BETA*C( I, J ) + 60 CONTINUE + END IF + DO 80, L = 1, K + IF( B( L, J ).NE.ZERO )THEN + TEMP = ALPHA*B( L, J ) + DO 70, I = 1, M + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 70 CONTINUE + END IF + 80 CONTINUE + 90 CONTINUE + ELSE +* +* Form C := alpha*A'*B + beta*C +* + DO 120, J = 1, N + DO 110, I = 1, M + TEMP = ZERO + DO 100, L = 1, K + TEMP = TEMP + A( L, I )*B( L, J ) + 100 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 110 CONTINUE + 120 CONTINUE + END IF + ELSE + IF( NOTA )THEN +* +* Form C := alpha*A*B' + beta*C +* + DO 170, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 130, I = 1, M + C( I, J ) = ZERO + 130 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 140, I = 1, M + C( I, J ) = BETA*C( I, J ) + 140 CONTINUE + END IF + DO 160, L = 1, K + IF( B( J, L ).NE.ZERO )THEN + TEMP = ALPHA*B( J, L ) + DO 150, I = 1, M + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 150 CONTINUE + END IF + 160 CONTINUE + 170 CONTINUE + ELSE +* +* Form C := alpha*A'*B' + beta*C +* + DO 200, J = 1, N + DO 190, I = 1, M + TEMP = ZERO + DO 180, L = 1, K + TEMP = TEMP + A( L, I )*B( J, L ) + 180 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 190 CONTINUE + 200 CONTINUE + END IF + END IF +* + RETURN +* +* End of DGEMM . +* + END diff --git a/costa/native/external/blas/dgemv.f b/costa/native/external/blas/dgemv.f new file mode 100644 index 000000000..8ef80b3a5 --- /dev/null +++ b/costa/native/external/blas/dgemv.f @@ -0,0 +1,261 @@ + SUBROUTINE DGEMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX, + $ BETA, Y, INCY ) +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA, BETA + INTEGER INCX, INCY, LDA, M, N + CHARACTER*1 TRANS +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* DGEMV performs one of the matrix-vector operations +* +* y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, +* +* where alpha and beta are scalars, x and y are vectors and A is an +* m by n matrix. +* +* Parameters +* ========== +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' y := alpha*A*x + beta*y. +* +* TRANS = 'T' or 't' y := alpha*A'*x + beta*y. +* +* TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix A. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). +* Before entry, the leading m by n part of the array A must +* contain the matrix of coefficients. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, m ). +* Unchanged on exit. +* +* X - DOUBLE PRECISION array of DIMENSION at least +* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' +* and at least +* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. +* Before entry, the incremented array X must contain the +* vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - DOUBLE PRECISION. +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then Y need not be set on input. +* Unchanged on exit. +* +* Y - DOUBLE PRECISION array of DIMENSION at least +* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' +* and at least +* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. +* Before entry with BETA non-zero, the incremented array Y +* must contain the vector y. On exit, Y is overwritten by the +* updated vector y. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + DOUBLE PRECISION ONE , ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. Local Scalars .. + DOUBLE PRECISION TEMP + INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY, LENX, LENY +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 1 + ELSE IF( M.LT.0 )THEN + INFO = 2 + ELSE IF( N.LT.0 )THEN + INFO = 3 + ELSE IF( LDA.LT.MAX( 1, M ) )THEN + INFO = 6 + ELSE IF( INCX.EQ.0 )THEN + INFO = 8 + ELSE IF( INCY.EQ.0 )THEN + INFO = 11 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DGEMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set LENX and LENY, the lengths of the vectors x and y, and set +* up the start points in X and Y. +* + IF( LSAME( TRANS, 'N' ) )THEN + LENX = N + LENY = M + ELSE + LENX = M + LENY = N + END IF + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( LENX - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( LENY - 1 )*INCY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE )THEN + IF( INCY.EQ.1 )THEN + IF( BETA.EQ.ZERO )THEN + DO 10, I = 1, LENY + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20, I = 1, LENY + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO )THEN + DO 30, I = 1, LENY + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40, I = 1, LENY + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form y := alpha*A*x + y. +* + JX = KX + IF( INCY.EQ.1 )THEN + DO 60, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + DO 50, I = 1, M + Y( I ) = Y( I ) + TEMP*A( I, J ) + 50 CONTINUE + END IF + JX = JX + INCX + 60 CONTINUE + ELSE + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + IY = KY + DO 70, I = 1, M + Y( IY ) = Y( IY ) + TEMP*A( I, J ) + IY = IY + INCY + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + ELSE +* +* Form y := alpha*A'*x + y. +* + JY = KY + IF( INCX.EQ.1 )THEN + DO 100, J = 1, N + TEMP = ZERO + DO 90, I = 1, M + TEMP = TEMP + A( I, J )*X( I ) + 90 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP + JY = JY + INCY + 100 CONTINUE + ELSE + DO 120, J = 1, N + TEMP = ZERO + IX = KX + DO 110, I = 1, M + TEMP = TEMP + A( I, J )*X( IX ) + IX = IX + INCX + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP + JY = JY + INCY + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of DGEMV . +* + END diff --git a/costa/native/external/blas/dger.f b/costa/native/external/blas/dger.f new file mode 100644 index 000000000..d316000ab --- /dev/null +++ b/costa/native/external/blas/dger.f @@ -0,0 +1,157 @@ + SUBROUTINE DGER ( M, N, ALPHA, X, INCX, Y, INCY, A, LDA ) +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA + INTEGER INCX, INCY, LDA, M, N +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* DGER performs the rank 1 operation +* +* A := alpha*x*y' + A, +* +* where alpha is a scalar, x is an m element vector, y is an n element +* vector and A is an m by n matrix. +* +* Parameters +* ========== +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix A. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - DOUBLE PRECISION array of dimension at least +* ( 1 + ( m - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the m +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* Y - DOUBLE PRECISION array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. +* Unchanged on exit. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). +* Before entry, the leading m by n part of the array A must +* contain the matrix of coefficients. On exit, A is +* overwritten by the updated matrix. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. Local Scalars .. + DOUBLE PRECISION TEMP + INTEGER I, INFO, IX, J, JY, KX +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( M.LT.0 )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + ELSE IF( INCY.EQ.0 )THEN + INFO = 7 + ELSE IF( LDA.LT.MAX( 1, M ) )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DGER ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF( INCY.GT.0 )THEN + JY = 1 + ELSE + JY = 1 - ( N - 1 )*INCY + END IF + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( Y( JY ).NE.ZERO )THEN + TEMP = ALPHA*Y( JY ) + DO 10, I = 1, M + A( I, J ) = A( I, J ) + X( I )*TEMP + 10 CONTINUE + END IF + JY = JY + INCY + 20 CONTINUE + ELSE + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( M - 1 )*INCX + END IF + DO 40, J = 1, N + IF( Y( JY ).NE.ZERO )THEN + TEMP = ALPHA*Y( JY ) + IX = KX + DO 30, I = 1, M + A( I, J ) = A( I, J ) + X( IX )*TEMP + IX = IX + INCX + 30 CONTINUE + END IF + JY = JY + INCY + 40 CONTINUE + END IF +* + RETURN +* +* End of DGER . +* + END diff --git a/costa/native/external/blas/dnrm2.f b/costa/native/external/blas/dnrm2.f new file mode 100644 index 000000000..119d0477e --- /dev/null +++ b/costa/native/external/blas/dnrm2.f @@ -0,0 +1,60 @@ + DOUBLE PRECISION FUNCTION DNRM2 ( N, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, N +* .. Array Arguments .. + DOUBLE PRECISION X( * ) +* .. +* +* DNRM2 returns the euclidean norm of a vector via the function +* name, so that +* +* DNRM2 := sqrt( x'*x ) +* +* +* +* -- This version written on 25-October-1982. +* Modified on 14-October-1993 to inline the call to DLASSQ. +* Sven Hammarling, Nag Ltd. +* +* +* .. Parameters .. + DOUBLE PRECISION ONE , ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. Local Scalars .. + INTEGER IX + DOUBLE PRECISION ABSXI, NORM, SCALE, SSQ +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. + IF( N.LT.1 .OR. INCX.LT.1 )THEN + NORM = ZERO + ELSE IF( N.EQ.1 )THEN + NORM = ABS( X( 1 ) ) + ELSE + SCALE = ZERO + SSQ = ONE +* The following loop is equivalent to this call to the LAPACK +* auxiliary routine: +* CALL DLASSQ( N, X, INCX, SCALE, SSQ ) +* + DO 10, IX = 1, 1 + ( N - 1 )*INCX, INCX + IF( X( IX ).NE.ZERO )THEN + ABSXI = ABS( X( IX ) ) + IF( SCALE.LT.ABSXI )THEN + SSQ = ONE + SSQ*( SCALE/ABSXI )**2 + SCALE = ABSXI + ELSE + SSQ = SSQ + ( ABSXI/SCALE )**2 + END IF + END IF + 10 CONTINUE + NORM = SCALE * SQRT( SSQ ) + END IF +* + DNRM2 = NORM + RETURN +* +* End of DNRM2. +* + END diff --git a/costa/native/external/blas/drot.f b/costa/native/external/blas/drot.f new file mode 100644 index 000000000..b9ea3bd91 --- /dev/null +++ b/costa/native/external/blas/drot.f @@ -0,0 +1,37 @@ + subroutine drot (n,dx,incx,dy,incy,c,s) +c +c applies a plane rotation. +c jack dongarra, linpack, 3/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + double precision dx(*),dy(*),dtemp,c,s + integer i,incx,incy,ix,iy,n +c + if(n.le.0)return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments not equal +c to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + dtemp = c*dx(ix) + s*dy(iy) + dy(iy) = c*dy(iy) - s*dx(ix) + dx(ix) = dtemp + ix = ix + incx + iy = iy + incy + 10 continue + return +c +c code for both increments equal to 1 +c + 20 do 30 i = 1,n + dtemp = c*dx(i) + s*dy(i) + dy(i) = c*dy(i) - s*dx(i) + dx(i) = dtemp + 30 continue + return + end diff --git a/costa/native/external/blas/drotg.f b/costa/native/external/blas/drotg.f new file mode 100644 index 000000000..67838e2cb --- /dev/null +++ b/costa/native/external/blas/drotg.f @@ -0,0 +1,27 @@ + subroutine drotg(da,db,c,s) +c +c construct givens plane rotation. +c jack dongarra, linpack, 3/11/78. +c + double precision da,db,c,s,roe,scale,r,z +c + roe = db + if( dabs(da) .gt. dabs(db) ) roe = da + scale = dabs(da) + dabs(db) + if( scale .ne. 0.0d0 ) go to 10 + c = 1.0d0 + s = 0.0d0 + r = 0.0d0 + z = 0.0d0 + go to 20 + 10 r = scale*dsqrt((da/scale)**2 + (db/scale)**2) + r = dsign(1.0d0,roe)*r + c = da/r + s = db/r + z = 1.0d0 + if( dabs(da) .gt. dabs(db) ) z = s + if( dabs(db) .ge. dabs(da) .and. c .ne. 0.0d0 ) z = 1.0d0/c + 20 da = r + db = z + return + end diff --git a/costa/native/external/blas/dsbmv.f b/costa/native/external/blas/dsbmv.f new file mode 100644 index 000000000..272042af6 --- /dev/null +++ b/costa/native/external/blas/dsbmv.f @@ -0,0 +1,303 @@ + SUBROUTINE DSBMV ( UPLO, N, K, ALPHA, A, LDA, X, INCX, + $ BETA, Y, INCY ) +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA, BETA + INTEGER INCX, INCY, K, LDA, N + CHARACTER*1 UPLO +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* DSBMV performs the matrix-vector operation +* +* y := alpha*A*x + beta*y, +* +* where alpha and beta are scalars, x and y are n element vectors and +* A is an n by n symmetric band matrix, with k super-diagonals. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the band matrix A is being supplied as +* follows: +* +* UPLO = 'U' or 'u' The upper triangular part of A is +* being supplied. +* +* UPLO = 'L' or 'l' The lower triangular part of A is +* being supplied. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry, K specifies the number of super-diagonals of the +* matrix A. K must satisfy 0 .le. K. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) +* by n part of the array A must contain the upper triangular +* band part of the symmetric matrix, supplied column by +* column, with the leading diagonal of the matrix in row +* ( k + 1 ) of the array, the first super-diagonal starting at +* position 2 in row k, and so on. The top left k by k triangle +* of the array A is not referenced. +* The following program segment will transfer the upper +* triangular part of a symmetric band matrix from conventional +* full matrix storage to band storage: +* +* DO 20, J = 1, N +* M = K + 1 - J +* DO 10, I = MAX( 1, J - K ), J +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) +* by n part of the array A must contain the lower triangular +* band part of the symmetric matrix, supplied column by +* column, with the leading diagonal of the matrix in row 1 of +* the array, the first sub-diagonal starting at position 1 in +* row 2, and so on. The bottom right k by k triangle of the +* array A is not referenced. +* The following program segment will transfer the lower +* triangular part of a symmetric band matrix from conventional +* full matrix storage to band storage: +* +* DO 20, J = 1, N +* M = 1 - J +* DO 10, I = J, MIN( N, J + K ) +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* ( k + 1 ). +* Unchanged on exit. +* +* X - DOUBLE PRECISION array of DIMENSION at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the +* vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - DOUBLE PRECISION. +* On entry, BETA specifies the scalar beta. +* Unchanged on exit. +* +* Y - DOUBLE PRECISION array of DIMENSION at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the +* vector y. On exit, Y is overwritten by the updated vector y. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + DOUBLE PRECISION ONE , ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. Local Scalars .. + DOUBLE PRECISION TEMP1, TEMP2 + INTEGER I, INFO, IX, IY, J, JX, JY, KPLUS1, KX, KY, L +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( K.LT.0 )THEN + INFO = 3 + ELSE IF( LDA.LT.( K + 1 ) )THEN + INFO = 6 + ELSE IF( INCX.EQ.0 )THEN + INFO = 8 + ELSE IF( INCY.EQ.0 )THEN + INFO = 11 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DSBMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set up the start points in X and Y. +* + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF +* +* Start the operations. In this version the elements of the array A +* are accessed sequentially with one pass through A. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE )THEN + IF( INCY.EQ.1 )THEN + IF( BETA.EQ.ZERO )THEN + DO 10, I = 1, N + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20, I = 1, N + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO )THEN + DO 30, I = 1, N + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40, I = 1, N + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form y when upper triangle of A is stored. +* + KPLUS1 = K + 1 + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 60, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + L = KPLUS1 - J + DO 50, I = MAX( 1, J - K ), J - 1 + Y( I ) = Y( I ) + TEMP1*A( L + I, J ) + TEMP2 = TEMP2 + A( L + I, J )*X( I ) + 50 CONTINUE + Y( J ) = Y( J ) + TEMP1*A( KPLUS1, J ) + ALPHA*TEMP2 + 60 CONTINUE + ELSE + JX = KX + JY = KY + DO 80, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + IX = KX + IY = KY + L = KPLUS1 - J + DO 70, I = MAX( 1, J - K ), J - 1 + Y( IY ) = Y( IY ) + TEMP1*A( L + I, J ) + TEMP2 = TEMP2 + A( L + I, J )*X( IX ) + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + Y( JY ) = Y( JY ) + TEMP1*A( KPLUS1, J ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + IF( J.GT.K )THEN + KX = KX + INCX + KY = KY + INCY + END IF + 80 CONTINUE + END IF + ELSE +* +* Form y when lower triangle of A is stored. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 100, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + Y( J ) = Y( J ) + TEMP1*A( 1, J ) + L = 1 - J + DO 90, I = J + 1, MIN( N, J + K ) + Y( I ) = Y( I ) + TEMP1*A( L + I, J ) + TEMP2 = TEMP2 + A( L + I, J )*X( I ) + 90 CONTINUE + Y( J ) = Y( J ) + ALPHA*TEMP2 + 100 CONTINUE + ELSE + JX = KX + JY = KY + DO 120, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + Y( JY ) = Y( JY ) + TEMP1*A( 1, J ) + L = 1 - J + IX = JX + IY = JY + DO 110, I = J + 1, MIN( N, J + K ) + IX = IX + INCX + IY = IY + INCY + Y( IY ) = Y( IY ) + TEMP1*A( L + I, J ) + TEMP2 = TEMP2 + A( L + I, J )*X( IX ) + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of DSBMV . +* + END diff --git a/costa/native/external/blas/dscal.f b/costa/native/external/blas/dscal.f new file mode 100644 index 000000000..e1467faf2 --- /dev/null +++ b/costa/native/external/blas/dscal.f @@ -0,0 +1,43 @@ + subroutine dscal(n,da,dx,incx) +c +c scales a vector by a constant. +c uses unrolled loops for increment equal to one. +c jack dongarra, linpack, 3/11/78. +c modified 3/93 to return if incx .le. 0. +c modified 12/3/93, array(1) declarations changed to array(*) +c + double precision da,dx(*) + integer i,incx,m,mp1,n,nincx +c + if( n.le.0 .or. incx.le.0 )return + if(incx.eq.1)go to 20 +c +c code for increment not equal to 1 +c + nincx = n*incx + do 10 i = 1,nincx,incx + dx(i) = da*dx(i) + 10 continue + return +c +c code for increment equal to 1 +c +c +c clean-up loop +c + 20 m = mod(n,5) + if( m .eq. 0 ) go to 40 + do 30 i = 1,m + dx(i) = da*dx(i) + 30 continue + if( n .lt. 5 ) return + 40 mp1 = m + 1 + do 50 i = mp1,n,5 + dx(i) = da*dx(i) + dx(i + 1) = da*dx(i + 1) + dx(i + 2) = da*dx(i + 2) + dx(i + 3) = da*dx(i + 3) + dx(i + 4) = da*dx(i + 4) + 50 continue + return + end diff --git a/costa/native/external/blas/dspmv.f b/costa/native/external/blas/dspmv.f new file mode 100644 index 000000000..3ace7bf26 --- /dev/null +++ b/costa/native/external/blas/dspmv.f @@ -0,0 +1,262 @@ + SUBROUTINE DSPMV ( UPLO, N, ALPHA, AP, X, INCX, BETA, Y, INCY ) +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA, BETA + INTEGER INCX, INCY, N + CHARACTER*1 UPLO +* .. Array Arguments .. + DOUBLE PRECISION AP( * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* DSPMV performs the matrix-vector operation +* +* y := alpha*A*x + beta*y, +* +* where alpha and beta are scalars, x and y are n element vectors and +* A is an n by n symmetric matrix, supplied in packed form. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the matrix A is supplied in the packed +* array AP as follows: +* +* UPLO = 'U' or 'u' The upper triangular part of A is +* supplied in AP. +* +* UPLO = 'L' or 'l' The lower triangular part of A is +* supplied in AP. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* AP - DOUBLE PRECISION array of DIMENSION at least +* ( ( n*( n + 1 ) )/2 ). +* Before entry with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular part of the symmetric matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) +* and a( 2, 2 ) respectively, and so on. +* Before entry with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular part of the symmetric matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) +* and a( 3, 1 ) respectively, and so on. +* Unchanged on exit. +* +* X - DOUBLE PRECISION array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - DOUBLE PRECISION. +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then Y need not be set on input. +* Unchanged on exit. +* +* Y - DOUBLE PRECISION array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. On exit, Y is overwritten by the updated +* vector y. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + DOUBLE PRECISION ONE , ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. Local Scalars .. + DOUBLE PRECISION TEMP1, TEMP2 + INTEGER I, INFO, IX, IY, J, JX, JY, K, KK, KX, KY +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 6 + ELSE IF( INCY.EQ.0 )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DSPMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set up the start points in X and Y. +* + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF +* +* Start the operations. In this version the elements of the array AP +* are accessed sequentially with one pass through AP. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE )THEN + IF( INCY.EQ.1 )THEN + IF( BETA.EQ.ZERO )THEN + DO 10, I = 1, N + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20, I = 1, N + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO )THEN + DO 30, I = 1, N + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40, I = 1, N + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + KK = 1 + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form y when AP contains the upper triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 60, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + K = KK + DO 50, I = 1, J - 1 + Y( I ) = Y( I ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + AP( K )*X( I ) + K = K + 1 + 50 CONTINUE + Y( J ) = Y( J ) + TEMP1*AP( KK + J - 1 ) + ALPHA*TEMP2 + KK = KK + J + 60 CONTINUE + ELSE + JX = KX + JY = KY + DO 80, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + IX = KX + IY = KY + DO 70, K = KK, KK + J - 2 + Y( IY ) = Y( IY ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + AP( K )*X( IX ) + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + Y( JY ) = Y( JY ) + TEMP1*AP( KK + J - 1 ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + KK = KK + J + 80 CONTINUE + END IF + ELSE +* +* Form y when AP contains the lower triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 100, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + Y( J ) = Y( J ) + TEMP1*AP( KK ) + K = KK + 1 + DO 90, I = J + 1, N + Y( I ) = Y( I ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + AP( K )*X( I ) + K = K + 1 + 90 CONTINUE + Y( J ) = Y( J ) + ALPHA*TEMP2 + KK = KK + ( N - J + 1 ) + 100 CONTINUE + ELSE + JX = KX + JY = KY + DO 120, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + Y( JY ) = Y( JY ) + TEMP1*AP( KK ) + IX = JX + IY = JY + DO 110, K = KK + 1, KK + N - J + IX = IX + INCX + IY = IY + INCY + Y( IY ) = Y( IY ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + AP( K )*X( IX ) + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + KK = KK + ( N - J + 1 ) + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of DSPMV . +* + END diff --git a/costa/native/external/blas/dspr.f b/costa/native/external/blas/dspr.f new file mode 100644 index 000000000..3da6889c9 --- /dev/null +++ b/costa/native/external/blas/dspr.f @@ -0,0 +1,198 @@ + SUBROUTINE DSPR ( UPLO, N, ALPHA, X, INCX, AP ) +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA + INTEGER INCX, N + CHARACTER*1 UPLO +* .. Array Arguments .. + DOUBLE PRECISION AP( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* DSPR performs the symmetric rank 1 operation +* +* A := alpha*x*x' + A, +* +* where alpha is a real scalar, x is an n element vector and A is an +* n by n symmetric matrix, supplied in packed form. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the matrix A is supplied in the packed +* array AP as follows: +* +* UPLO = 'U' or 'u' The upper triangular part of A is +* supplied in AP. +* +* UPLO = 'L' or 'l' The lower triangular part of A is +* supplied in AP. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - DOUBLE PRECISION array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* AP - DOUBLE PRECISION array of DIMENSION at least +* ( ( n*( n + 1 ) )/2 ). +* Before entry with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular part of the symmetric matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) +* and a( 2, 2 ) respectively, and so on. On exit, the array +* AP is overwritten by the upper triangular part of the +* updated matrix. +* Before entry with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular part of the symmetric matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) +* and a( 3, 1 ) respectively, and so on. On exit, the array +* AP is overwritten by the lower triangular part of the +* updated matrix. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. Local Scalars .. + DOUBLE PRECISION TEMP + INTEGER I, INFO, IX, J, JX, K, KK, KX +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DSPR ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Set the start point in X if the increment is not unity. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of the array AP +* are accessed sequentially with one pass through AP. +* + KK = 1 + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form A when upper triangle is stored in AP. +* + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = ALPHA*X( J ) + K = KK + DO 10, I = 1, J + AP( K ) = AP( K ) + X( I )*TEMP + K = K + 1 + 10 CONTINUE + END IF + KK = KK + J + 20 CONTINUE + ELSE + JX = KX + DO 40, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + IX = KX + DO 30, K = KK, KK + J - 1 + AP( K ) = AP( K ) + X( IX )*TEMP + IX = IX + INCX + 30 CONTINUE + END IF + JX = JX + INCX + KK = KK + J + 40 CONTINUE + END IF + ELSE +* +* Form A when lower triangle is stored in AP. +* + IF( INCX.EQ.1 )THEN + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = ALPHA*X( J ) + K = KK + DO 50, I = J, N + AP( K ) = AP( K ) + X( I )*TEMP + K = K + 1 + 50 CONTINUE + END IF + KK = KK + N - J + 1 + 60 CONTINUE + ELSE + JX = KX + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + IX = JX + DO 70, K = KK, KK + N - J + AP( K ) = AP( K ) + X( IX )*TEMP + IX = IX + INCX + 70 CONTINUE + END IF + JX = JX + INCX + KK = KK + N - J + 1 + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of DSPR . +* + END diff --git a/costa/native/external/blas/dspr2.f b/costa/native/external/blas/dspr2.f new file mode 100644 index 000000000..1cfce21b0 --- /dev/null +++ b/costa/native/external/blas/dspr2.f @@ -0,0 +1,229 @@ + SUBROUTINE DSPR2 ( UPLO, N, ALPHA, X, INCX, Y, INCY, AP ) +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA + INTEGER INCX, INCY, N + CHARACTER*1 UPLO +* .. Array Arguments .. + DOUBLE PRECISION AP( * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* DSPR2 performs the symmetric rank 2 operation +* +* A := alpha*x*y' + alpha*y*x' + A, +* +* where alpha is a scalar, x and y are n element vectors and A is an +* n by n symmetric matrix, supplied in packed form. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the matrix A is supplied in the packed +* array AP as follows: +* +* UPLO = 'U' or 'u' The upper triangular part of A is +* supplied in AP. +* +* UPLO = 'L' or 'l' The lower triangular part of A is +* supplied in AP. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - DOUBLE PRECISION array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* Y - DOUBLE PRECISION array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. +* Unchanged on exit. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* AP - DOUBLE PRECISION array of DIMENSION at least +* ( ( n*( n + 1 ) )/2 ). +* Before entry with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular part of the symmetric matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) +* and a( 2, 2 ) respectively, and so on. On exit, the array +* AP is overwritten by the upper triangular part of the +* updated matrix. +* Before entry with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular part of the symmetric matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) +* and a( 3, 1 ) respectively, and so on. On exit, the array +* AP is overwritten by the lower triangular part of the +* updated matrix. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. Local Scalars .. + DOUBLE PRECISION TEMP1, TEMP2 + INTEGER I, INFO, IX, IY, J, JX, JY, K, KK, KX, KY +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + ELSE IF( INCY.EQ.0 )THEN + INFO = 7 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DSPR2 ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Set up the start points in X and Y if the increments are not both +* unity. +* + IF( ( INCX.NE.1 ).OR.( INCY.NE.1 ) )THEN + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF + JX = KX + JY = KY + END IF +* +* Start the operations. In this version the elements of the array AP +* are accessed sequentially with one pass through AP. +* + KK = 1 + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form A when upper triangle is stored in AP. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 20, J = 1, N + IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN + TEMP1 = ALPHA*Y( J ) + TEMP2 = ALPHA*X( J ) + K = KK + DO 10, I = 1, J + AP( K ) = AP( K ) + X( I )*TEMP1 + Y( I )*TEMP2 + K = K + 1 + 10 CONTINUE + END IF + KK = KK + J + 20 CONTINUE + ELSE + DO 40, J = 1, N + IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN + TEMP1 = ALPHA*Y( JY ) + TEMP2 = ALPHA*X( JX ) + IX = KX + IY = KY + DO 30, K = KK, KK + J - 1 + AP( K ) = AP( K ) + X( IX )*TEMP1 + Y( IY )*TEMP2 + IX = IX + INCX + IY = IY + INCY + 30 CONTINUE + END IF + JX = JX + INCX + JY = JY + INCY + KK = KK + J + 40 CONTINUE + END IF + ELSE +* +* Form A when lower triangle is stored in AP. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 60, J = 1, N + IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN + TEMP1 = ALPHA*Y( J ) + TEMP2 = ALPHA*X( J ) + K = KK + DO 50, I = J, N + AP( K ) = AP( K ) + X( I )*TEMP1 + Y( I )*TEMP2 + K = K + 1 + 50 CONTINUE + END IF + KK = KK + N - J + 1 + 60 CONTINUE + ELSE + DO 80, J = 1, N + IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN + TEMP1 = ALPHA*Y( JY ) + TEMP2 = ALPHA*X( JX ) + IX = JX + IY = JY + DO 70, K = KK, KK + N - J + AP( K ) = AP( K ) + X( IX )*TEMP1 + Y( IY )*TEMP2 + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + END IF + JX = JX + INCX + JY = JY + INCY + KK = KK + N - J + 1 + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of DSPR2 . +* + END diff --git a/costa/native/external/blas/dswap.f b/costa/native/external/blas/dswap.f new file mode 100644 index 000000000..7f7d1fbba --- /dev/null +++ b/costa/native/external/blas/dswap.f @@ -0,0 +1,56 @@ + subroutine dswap (n,dx,incx,dy,incy) +c +c interchanges two vectors. +c uses unrolled loops for increments equal one. +c jack dongarra, linpack, 3/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + double precision dx(*),dy(*),dtemp + integer i,incx,incy,ix,iy,m,mp1,n +c + if(n.le.0)return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments not equal +c to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + dtemp = dx(ix) + dx(ix) = dy(iy) + dy(iy) = dtemp + ix = ix + incx + iy = iy + incy + 10 continue + return +c +c code for both increments equal to 1 +c +c +c clean-up loop +c + 20 m = mod(n,3) + if( m .eq. 0 ) go to 40 + do 30 i = 1,m + dtemp = dx(i) + dx(i) = dy(i) + dy(i) = dtemp + 30 continue + if( n .lt. 3 ) return + 40 mp1 = m + 1 + do 50 i = mp1,n,3 + dtemp = dx(i) + dx(i) = dy(i) + dy(i) = dtemp + dtemp = dx(i + 1) + dx(i + 1) = dy(i + 1) + dy(i + 1) = dtemp + dtemp = dx(i + 2) + dx(i + 2) = dy(i + 2) + dy(i + 2) = dtemp + 50 continue + return + end diff --git a/costa/native/external/blas/dsymm.f b/costa/native/external/blas/dsymm.f new file mode 100644 index 000000000..0f2514170 --- /dev/null +++ b/costa/native/external/blas/dsymm.f @@ -0,0 +1,294 @@ + SUBROUTINE DSYMM ( SIDE, UPLO, M, N, ALPHA, A, LDA, B, LDB, + $ BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER*1 SIDE, UPLO + INTEGER M, N, LDA, LDB, LDC + DOUBLE PRECISION ALPHA, BETA +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* DSYMM performs one of the matrix-matrix operations +* +* C := alpha*A*B + beta*C, +* +* or +* +* C := alpha*B*A + beta*C, +* +* where alpha and beta are scalars, A is a symmetric matrix and B and +* C are m by n matrices. +* +* Parameters +* ========== +* +* SIDE - CHARACTER*1. +* On entry, SIDE specifies whether the symmetric matrix A +* appears on the left or right in the operation as follows: +* +* SIDE = 'L' or 'l' C := alpha*A*B + beta*C, +* +* SIDE = 'R' or 'r' C := alpha*B*A + beta*C, +* +* Unchanged on exit. +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the symmetric matrix A is to be +* referenced as follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of the +* symmetric matrix is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of the +* symmetric matrix is to be referenced. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix C. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix C. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is +* m when SIDE = 'L' or 'l' and is n otherwise. +* Before entry with SIDE = 'L' or 'l', the m by m part of +* the array A must contain the symmetric matrix, such that +* when UPLO = 'U' or 'u', the leading m by m upper triangular +* part of the array A must contain the upper triangular part +* of the symmetric matrix and the strictly lower triangular +* part of A is not referenced, and when UPLO = 'L' or 'l', +* the leading m by m lower triangular part of the array A +* must contain the lower triangular part of the symmetric +* matrix and the strictly upper triangular part of A is not +* referenced. +* Before entry with SIDE = 'R' or 'r', the n by n part of +* the array A must contain the symmetric matrix, such that +* when UPLO = 'U' or 'u', the leading n by n upper triangular +* part of the array A must contain the upper triangular part +* of the symmetric matrix and the strictly lower triangular +* part of A is not referenced, and when UPLO = 'L' or 'l', +* the leading n by n lower triangular part of the array A +* must contain the lower triangular part of the symmetric +* matrix and the strictly upper triangular part of A is not +* referenced. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When SIDE = 'L' or 'l' then +* LDA must be at least max( 1, m ), otherwise LDA must be at +* least max( 1, n ). +* Unchanged on exit. +* +* B - DOUBLE PRECISION array of DIMENSION ( LDB, n ). +* Before entry, the leading m by n part of the array B must +* contain the matrix B. +* Unchanged on exit. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. LDB must be at least +* max( 1, m ). +* Unchanged on exit. +* +* BETA - DOUBLE PRECISION. +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then C need not be set on input. +* Unchanged on exit. +* +* C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). +* Before entry, the leading m by n part of the array C must +* contain the matrix C, except when beta is zero, in which +* case C need not be set on entry. +* On exit, the array C is overwritten by the m by n updated +* matrix. +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, INFO, J, K, NROWA + DOUBLE PRECISION TEMP1, TEMP2 +* .. Parameters .. + DOUBLE PRECISION ONE , ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Executable Statements .. +* +* Set NROWA as the number of rows of A. +* + IF( LSAME( SIDE, 'L' ) )THEN + NROWA = M + ELSE + NROWA = N + END IF + UPPER = LSAME( UPLO, 'U' ) +* +* Test the input parameters. +* + INFO = 0 + IF( ( .NOT.LSAME( SIDE, 'L' ) ).AND. + $ ( .NOT.LSAME( SIDE, 'R' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO, 'L' ) ) )THEN + INFO = 2 + ELSE IF( M .LT.0 )THEN + INFO = 3 + ELSE IF( N .LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 7 + ELSE IF( LDB.LT.MAX( 1, M ) )THEN + INFO = 9 + ELSE IF( LDC.LT.MAX( 1, M ) )THEN + INFO = 12 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DSYMM ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + IF( BETA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, M + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40, J = 1, N + DO 30, I = 1, M + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + RETURN + END IF +* +* Start the operations. +* + IF( LSAME( SIDE, 'L' ) )THEN +* +* Form C := alpha*A*B + beta*C. +* + IF( UPPER )THEN + DO 70, J = 1, N + DO 60, I = 1, M + TEMP1 = ALPHA*B( I, J ) + TEMP2 = ZERO + DO 50, K = 1, I - 1 + C( K, J ) = C( K, J ) + TEMP1 *A( K, I ) + TEMP2 = TEMP2 + B( K, J )*A( K, I ) + 50 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = TEMP1*A( I, I ) + ALPHA*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ TEMP1*A( I, I ) + ALPHA*TEMP2 + END IF + 60 CONTINUE + 70 CONTINUE + ELSE + DO 100, J = 1, N + DO 90, I = M, 1, -1 + TEMP1 = ALPHA*B( I, J ) + TEMP2 = ZERO + DO 80, K = I + 1, M + C( K, J ) = C( K, J ) + TEMP1 *A( K, I ) + TEMP2 = TEMP2 + B( K, J )*A( K, I ) + 80 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = TEMP1*A( I, I ) + ALPHA*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ TEMP1*A( I, I ) + ALPHA*TEMP2 + END IF + 90 CONTINUE + 100 CONTINUE + END IF + ELSE +* +* Form C := alpha*B*A + beta*C. +* + DO 170, J = 1, N + TEMP1 = ALPHA*A( J, J ) + IF( BETA.EQ.ZERO )THEN + DO 110, I = 1, M + C( I, J ) = TEMP1*B( I, J ) + 110 CONTINUE + ELSE + DO 120, I = 1, M + C( I, J ) = BETA*C( I, J ) + TEMP1*B( I, J ) + 120 CONTINUE + END IF + DO 140, K = 1, J - 1 + IF( UPPER )THEN + TEMP1 = ALPHA*A( K, J ) + ELSE + TEMP1 = ALPHA*A( J, K ) + END IF + DO 130, I = 1, M + C( I, J ) = C( I, J ) + TEMP1*B( I, K ) + 130 CONTINUE + 140 CONTINUE + DO 160, K = J + 1, N + IF( UPPER )THEN + TEMP1 = ALPHA*A( J, K ) + ELSE + TEMP1 = ALPHA*A( K, J ) + END IF + DO 150, I = 1, M + C( I, J ) = C( I, J ) + TEMP1*B( I, K ) + 150 CONTINUE + 160 CONTINUE + 170 CONTINUE + END IF +* + RETURN +* +* End of DSYMM . +* + END diff --git a/costa/native/external/blas/dsymv.f b/costa/native/external/blas/dsymv.f new file mode 100644 index 000000000..7592d156b --- /dev/null +++ b/costa/native/external/blas/dsymv.f @@ -0,0 +1,262 @@ + SUBROUTINE DSYMV ( UPLO, N, ALPHA, A, LDA, X, INCX, + $ BETA, Y, INCY ) +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA, BETA + INTEGER INCX, INCY, LDA, N + CHARACTER*1 UPLO +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* DSYMV performs the matrix-vector operation +* +* y := alpha*A*x + beta*y, +* +* where alpha and beta are scalars, x and y are n element vectors and +* A is an n by n symmetric matrix. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array A is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of A +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of A +* is to be referenced. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular part of the symmetric matrix and the strictly +* lower triangular part of A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular part of the symmetric matrix and the strictly +* upper triangular part of A is not referenced. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* X - DOUBLE PRECISION array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - DOUBLE PRECISION. +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then Y need not be set on input. +* Unchanged on exit. +* +* Y - DOUBLE PRECISION array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. On exit, Y is overwritten by the updated +* vector y. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + DOUBLE PRECISION ONE , ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. Local Scalars .. + DOUBLE PRECISION TEMP1, TEMP2 + INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 5 + ELSE IF( INCX.EQ.0 )THEN + INFO = 7 + ELSE IF( INCY.EQ.0 )THEN + INFO = 10 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DSYMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set up the start points in X and Y. +* + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE )THEN + IF( INCY.EQ.1 )THEN + IF( BETA.EQ.ZERO )THEN + DO 10, I = 1, N + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20, I = 1, N + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO )THEN + DO 30, I = 1, N + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40, I = 1, N + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form y when A is stored in upper triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 60, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + DO 50, I = 1, J - 1 + Y( I ) = Y( I ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + A( I, J )*X( I ) + 50 CONTINUE + Y( J ) = Y( J ) + TEMP1*A( J, J ) + ALPHA*TEMP2 + 60 CONTINUE + ELSE + JX = KX + JY = KY + DO 80, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + IX = KX + IY = KY + DO 70, I = 1, J - 1 + Y( IY ) = Y( IY ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + A( I, J )*X( IX ) + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + Y( JY ) = Y( JY ) + TEMP1*A( J, J ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + 80 CONTINUE + END IF + ELSE +* +* Form y when A is stored in lower triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 100, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + Y( J ) = Y( J ) + TEMP1*A( J, J ) + DO 90, I = J + 1, N + Y( I ) = Y( I ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + A( I, J )*X( I ) + 90 CONTINUE + Y( J ) = Y( J ) + ALPHA*TEMP2 + 100 CONTINUE + ELSE + JX = KX + JY = KY + DO 120, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + Y( JY ) = Y( JY ) + TEMP1*A( J, J ) + IX = JX + IY = JY + DO 110, I = J + 1, N + IX = IX + INCX + IY = IY + INCY + Y( IY ) = Y( IY ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + A( I, J )*X( IX ) + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of DSYMV . +* + END diff --git a/costa/native/external/blas/dsyr.f b/costa/native/external/blas/dsyr.f new file mode 100644 index 000000000..873771967 --- /dev/null +++ b/costa/native/external/blas/dsyr.f @@ -0,0 +1,197 @@ + SUBROUTINE DSYR ( UPLO, N, ALPHA, X, INCX, A, LDA ) +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA + INTEGER INCX, LDA, N + CHARACTER*1 UPLO +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* DSYR performs the symmetric rank 1 operation +* +* A := alpha*x*x' + A, +* +* where alpha is a real scalar, x is an n element vector and A is an +* n by n symmetric matrix. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array A is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of A +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of A +* is to be referenced. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - DOUBLE PRECISION array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular part of the symmetric matrix and the strictly +* lower triangular part of A is not referenced. On exit, the +* upper triangular part of the array A is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular part of the symmetric matrix and the strictly +* upper triangular part of A is not referenced. On exit, the +* lower triangular part of the array A is overwritten by the +* lower triangular part of the updated matrix. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. Local Scalars .. + DOUBLE PRECISION TEMP + INTEGER I, INFO, IX, J, JX, KX +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 7 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DSYR ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Set the start point in X if the increment is not unity. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form A when A is stored in upper triangle. +* + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = ALPHA*X( J ) + DO 10, I = 1, J + A( I, J ) = A( I, J ) + X( I )*TEMP + 10 CONTINUE + END IF + 20 CONTINUE + ELSE + JX = KX + DO 40, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + IX = KX + DO 30, I = 1, J + A( I, J ) = A( I, J ) + X( IX )*TEMP + IX = IX + INCX + 30 CONTINUE + END IF + JX = JX + INCX + 40 CONTINUE + END IF + ELSE +* +* Form A when A is stored in lower triangle. +* + IF( INCX.EQ.1 )THEN + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = ALPHA*X( J ) + DO 50, I = J, N + A( I, J ) = A( I, J ) + X( I )*TEMP + 50 CONTINUE + END IF + 60 CONTINUE + ELSE + JX = KX + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + IX = JX + DO 70, I = J, N + A( I, J ) = A( I, J ) + X( IX )*TEMP + IX = IX + INCX + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of DSYR . +* + END diff --git a/costa/native/external/blas/dsyr2.f b/costa/native/external/blas/dsyr2.f new file mode 100644 index 000000000..918ad8a7d --- /dev/null +++ b/costa/native/external/blas/dsyr2.f @@ -0,0 +1,230 @@ + SUBROUTINE DSYR2 ( UPLO, N, ALPHA, X, INCX, Y, INCY, A, LDA ) +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA + INTEGER INCX, INCY, LDA, N + CHARACTER*1 UPLO +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* DSYR2 performs the symmetric rank 2 operation +* +* A := alpha*x*y' + alpha*y*x' + A, +* +* where alpha is a scalar, x and y are n element vectors and A is an n +* by n symmetric matrix. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array A is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of A +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of A +* is to be referenced. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - DOUBLE PRECISION array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* Y - DOUBLE PRECISION array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. +* Unchanged on exit. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular part of the symmetric matrix and the strictly +* lower triangular part of A is not referenced. On exit, the +* upper triangular part of the array A is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular part of the symmetric matrix and the strictly +* upper triangular part of A is not referenced. On exit, the +* lower triangular part of the array A is overwritten by the +* lower triangular part of the updated matrix. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. Local Scalars .. + DOUBLE PRECISION TEMP1, TEMP2 + INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + ELSE IF( INCY.EQ.0 )THEN + INFO = 7 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DSYR2 ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Set up the start points in X and Y if the increments are not both +* unity. +* + IF( ( INCX.NE.1 ).OR.( INCY.NE.1 ) )THEN + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF + JX = KX + JY = KY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form A when A is stored in the upper triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 20, J = 1, N + IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN + TEMP1 = ALPHA*Y( J ) + TEMP2 = ALPHA*X( J ) + DO 10, I = 1, J + A( I, J ) = A( I, J ) + X( I )*TEMP1 + Y( I )*TEMP2 + 10 CONTINUE + END IF + 20 CONTINUE + ELSE + DO 40, J = 1, N + IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN + TEMP1 = ALPHA*Y( JY ) + TEMP2 = ALPHA*X( JX ) + IX = KX + IY = KY + DO 30, I = 1, J + A( I, J ) = A( I, J ) + X( IX )*TEMP1 + $ + Y( IY )*TEMP2 + IX = IX + INCX + IY = IY + INCY + 30 CONTINUE + END IF + JX = JX + INCX + JY = JY + INCY + 40 CONTINUE + END IF + ELSE +* +* Form A when A is stored in the lower triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 60, J = 1, N + IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN + TEMP1 = ALPHA*Y( J ) + TEMP2 = ALPHA*X( J ) + DO 50, I = J, N + A( I, J ) = A( I, J ) + X( I )*TEMP1 + Y( I )*TEMP2 + 50 CONTINUE + END IF + 60 CONTINUE + ELSE + DO 80, J = 1, N + IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN + TEMP1 = ALPHA*Y( JY ) + TEMP2 = ALPHA*X( JX ) + IX = JX + IY = JY + DO 70, I = J, N + A( I, J ) = A( I, J ) + X( IX )*TEMP1 + $ + Y( IY )*TEMP2 + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + END IF + JX = JX + INCX + JY = JY + INCY + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of DSYR2 . +* + END diff --git a/costa/native/external/blas/dsyr2k.f b/costa/native/external/blas/dsyr2k.f new file mode 100644 index 000000000..ac7d97de6 --- /dev/null +++ b/costa/native/external/blas/dsyr2k.f @@ -0,0 +1,327 @@ + SUBROUTINE DSYR2K( UPLO, TRANS, N, K, ALPHA, A, LDA, B, LDB, + $ BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER*1 UPLO, TRANS + INTEGER N, K, LDA, LDB, LDC + DOUBLE PRECISION ALPHA, BETA +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* DSYR2K performs one of the symmetric rank 2k operations +* +* C := alpha*A*B' + alpha*B*A' + beta*C, +* +* or +* +* C := alpha*A'*B + alpha*B'*A + beta*C, +* +* where alpha and beta are scalars, C is an n by n symmetric matrix +* and A and B are n by k matrices in the first case and k by n +* matrices in the second case. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array C is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of C +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of C +* is to be referenced. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' C := alpha*A*B' + alpha*B*A' + +* beta*C. +* +* TRANS = 'T' or 't' C := alpha*A'*B + alpha*B'*A + +* beta*C. +* +* TRANS = 'C' or 'c' C := alpha*A'*B + alpha*B'*A + +* beta*C. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix C. N must be +* at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with TRANS = 'N' or 'n', K specifies the number +* of columns of the matrices A and B, and on entry with +* TRANS = 'T' or 't' or 'C' or 'c', K specifies the number +* of rows of the matrices A and B. K must be at least zero. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array A must contain the matrix A, otherwise +* the leading k by n part of the array A must contain the +* matrix A. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDA must be at least max( 1, n ), otherwise LDA must +* be at least max( 1, k ). +* Unchanged on exit. +* +* B - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array B must contain the matrix B, otherwise +* the leading k by n part of the array B must contain the +* matrix B. +* Unchanged on exit. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDB must be at least max( 1, n ), otherwise LDB must +* be at least max( 1, k ). +* Unchanged on exit. +* +* BETA - DOUBLE PRECISION. +* On entry, BETA specifies the scalar beta. +* Unchanged on exit. +* +* C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array C must contain the upper +* triangular part of the symmetric matrix and the strictly +* lower triangular part of C is not referenced. On exit, the +* upper triangular part of the array C is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array C must contain the lower +* triangular part of the symmetric matrix and the strictly +* upper triangular part of C is not referenced. On exit, the +* lower triangular part of the array C is overwritten by the +* lower triangular part of the updated matrix. +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, INFO, J, L, NROWA + DOUBLE PRECISION TEMP1, TEMP2 +* .. Parameters .. + DOUBLE PRECISION ONE , ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + IF( LSAME( TRANS, 'N' ) )THEN + NROWA = N + ELSE + NROWA = K + END IF + UPPER = LSAME( UPLO, 'U' ) +* + INFO = 0 + IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO , 'L' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND. + $ ( .NOT.LSAME( TRANS, 'T' ) ).AND. + $ ( .NOT.LSAME( TRANS, 'C' ) ) )THEN + INFO = 2 + ELSE IF( N .LT.0 )THEN + INFO = 3 + ELSE IF( K .LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 7 + ELSE IF( LDB.LT.MAX( 1, NROWA ) )THEN + INFO = 9 + ELSE IF( LDC.LT.MAX( 1, N ) )THEN + INFO = 12 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DSYR2K', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR. + $ ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + IF( UPPER )THEN + IF( BETA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, J + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40, J = 1, N + DO 30, I = 1, J + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE + IF( BETA.EQ.ZERO )THEN + DO 60, J = 1, N + DO 50, I = J, N + C( I, J ) = ZERO + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80, J = 1, N + DO 70, I = J, N + C( I, J ) = BETA*C( I, J ) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + RETURN + END IF +* +* Start the operations. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form C := alpha*A*B' + alpha*B*A' + C. +* + IF( UPPER )THEN + DO 130, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 90, I = 1, J + C( I, J ) = ZERO + 90 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 100, I = 1, J + C( I, J ) = BETA*C( I, J ) + 100 CONTINUE + END IF + DO 120, L = 1, K + IF( ( A( J, L ).NE.ZERO ).OR. + $ ( B( J, L ).NE.ZERO ) )THEN + TEMP1 = ALPHA*B( J, L ) + TEMP2 = ALPHA*A( J, L ) + DO 110, I = 1, J + C( I, J ) = C( I, J ) + + $ A( I, L )*TEMP1 + B( I, L )*TEMP2 + 110 CONTINUE + END IF + 120 CONTINUE + 130 CONTINUE + ELSE + DO 180, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 140, I = J, N + C( I, J ) = ZERO + 140 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 150, I = J, N + C( I, J ) = BETA*C( I, J ) + 150 CONTINUE + END IF + DO 170, L = 1, K + IF( ( A( J, L ).NE.ZERO ).OR. + $ ( B( J, L ).NE.ZERO ) )THEN + TEMP1 = ALPHA*B( J, L ) + TEMP2 = ALPHA*A( J, L ) + DO 160, I = J, N + C( I, J ) = C( I, J ) + + $ A( I, L )*TEMP1 + B( I, L )*TEMP2 + 160 CONTINUE + END IF + 170 CONTINUE + 180 CONTINUE + END IF + ELSE +* +* Form C := alpha*A'*B + alpha*B'*A + C. +* + IF( UPPER )THEN + DO 210, J = 1, N + DO 200, I = 1, J + TEMP1 = ZERO + TEMP2 = ZERO + DO 190, L = 1, K + TEMP1 = TEMP1 + A( L, I )*B( L, J ) + TEMP2 = TEMP2 + B( L, I )*A( L, J ) + 190 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP1 + ALPHA*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ ALPHA*TEMP1 + ALPHA*TEMP2 + END IF + 200 CONTINUE + 210 CONTINUE + ELSE + DO 240, J = 1, N + DO 230, I = J, N + TEMP1 = ZERO + TEMP2 = ZERO + DO 220, L = 1, K + TEMP1 = TEMP1 + A( L, I )*B( L, J ) + TEMP2 = TEMP2 + B( L, I )*A( L, J ) + 220 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP1 + ALPHA*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ ALPHA*TEMP1 + ALPHA*TEMP2 + END IF + 230 CONTINUE + 240 CONTINUE + END IF + END IF +* + RETURN +* +* End of DSYR2K. +* + END diff --git a/costa/native/external/blas/dsyrk.f b/costa/native/external/blas/dsyrk.f new file mode 100644 index 000000000..b618b2968 --- /dev/null +++ b/costa/native/external/blas/dsyrk.f @@ -0,0 +1,294 @@ + SUBROUTINE DSYRK ( UPLO, TRANS, N, K, ALPHA, A, LDA, + $ BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER*1 UPLO, TRANS + INTEGER N, K, LDA, LDC + DOUBLE PRECISION ALPHA, BETA +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* DSYRK performs one of the symmetric rank k operations +* +* C := alpha*A*A' + beta*C, +* +* or +* +* C := alpha*A'*A + beta*C, +* +* where alpha and beta are scalars, C is an n by n symmetric matrix +* and A is an n by k matrix in the first case and a k by n matrix +* in the second case. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array C is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of C +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of C +* is to be referenced. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' C := alpha*A*A' + beta*C. +* +* TRANS = 'T' or 't' C := alpha*A'*A + beta*C. +* +* TRANS = 'C' or 'c' C := alpha*A'*A + beta*C. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix C. N must be +* at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with TRANS = 'N' or 'n', K specifies the number +* of columns of the matrix A, and on entry with +* TRANS = 'T' or 't' or 'C' or 'c', K specifies the number +* of rows of the matrix A. K must be at least zero. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array A must contain the matrix A, otherwise +* the leading k by n part of the array A must contain the +* matrix A. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDA must be at least max( 1, n ), otherwise LDA must +* be at least max( 1, k ). +* Unchanged on exit. +* +* BETA - DOUBLE PRECISION. +* On entry, BETA specifies the scalar beta. +* Unchanged on exit. +* +* C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array C must contain the upper +* triangular part of the symmetric matrix and the strictly +* lower triangular part of C is not referenced. On exit, the +* upper triangular part of the array C is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array C must contain the lower +* triangular part of the symmetric matrix and the strictly +* upper triangular part of C is not referenced. On exit, the +* lower triangular part of the array C is overwritten by the +* lower triangular part of the updated matrix. +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, INFO, J, L, NROWA + DOUBLE PRECISION TEMP +* .. Parameters .. + DOUBLE PRECISION ONE , ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + IF( LSAME( TRANS, 'N' ) )THEN + NROWA = N + ELSE + NROWA = K + END IF + UPPER = LSAME( UPLO, 'U' ) +* + INFO = 0 + IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO , 'L' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND. + $ ( .NOT.LSAME( TRANS, 'T' ) ).AND. + $ ( .NOT.LSAME( TRANS, 'C' ) ) )THEN + INFO = 2 + ELSE IF( N .LT.0 )THEN + INFO = 3 + ELSE IF( K .LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 7 + ELSE IF( LDC.LT.MAX( 1, N ) )THEN + INFO = 10 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DSYRK ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR. + $ ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + IF( UPPER )THEN + IF( BETA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, J + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40, J = 1, N + DO 30, I = 1, J + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE + IF( BETA.EQ.ZERO )THEN + DO 60, J = 1, N + DO 50, I = J, N + C( I, J ) = ZERO + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80, J = 1, N + DO 70, I = J, N + C( I, J ) = BETA*C( I, J ) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + RETURN + END IF +* +* Start the operations. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form C := alpha*A*A' + beta*C. +* + IF( UPPER )THEN + DO 130, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 90, I = 1, J + C( I, J ) = ZERO + 90 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 100, I = 1, J + C( I, J ) = BETA*C( I, J ) + 100 CONTINUE + END IF + DO 120, L = 1, K + IF( A( J, L ).NE.ZERO )THEN + TEMP = ALPHA*A( J, L ) + DO 110, I = 1, J + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 110 CONTINUE + END IF + 120 CONTINUE + 130 CONTINUE + ELSE + DO 180, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 140, I = J, N + C( I, J ) = ZERO + 140 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 150, I = J, N + C( I, J ) = BETA*C( I, J ) + 150 CONTINUE + END IF + DO 170, L = 1, K + IF( A( J, L ).NE.ZERO )THEN + TEMP = ALPHA*A( J, L ) + DO 160, I = J, N + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 160 CONTINUE + END IF + 170 CONTINUE + 180 CONTINUE + END IF + ELSE +* +* Form C := alpha*A'*A + beta*C. +* + IF( UPPER )THEN + DO 210, J = 1, N + DO 200, I = 1, J + TEMP = ZERO + DO 190, L = 1, K + TEMP = TEMP + A( L, I )*A( L, J ) + 190 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 200 CONTINUE + 210 CONTINUE + ELSE + DO 240, J = 1, N + DO 230, I = J, N + TEMP = ZERO + DO 220, L = 1, K + TEMP = TEMP + A( L, I )*A( L, J ) + 220 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 230 CONTINUE + 240 CONTINUE + END IF + END IF +* + RETURN +* +* End of DSYRK . +* + END diff --git a/costa/native/external/blas/dtbmv.f b/costa/native/external/blas/dtbmv.f new file mode 100644 index 000000000..1363db79c --- /dev/null +++ b/costa/native/external/blas/dtbmv.f @@ -0,0 +1,342 @@ + SUBROUTINE DTBMV ( UPLO, TRANS, DIAG, N, K, A, LDA, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, K, LDA, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* DTBMV performs one of the matrix-vector operations +* +* x := A*x, or x := A'*x, +* +* where x is an n element vector and A is an n by n unit, or non-unit, +* upper or lower triangular band matrix, with ( k + 1 ) diagonals. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' x := A*x. +* +* TRANS = 'T' or 't' x := A'*x. +* +* TRANS = 'C' or 'c' x := A'*x. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with UPLO = 'U' or 'u', K specifies the number of +* super-diagonals of the matrix A. +* On entry with UPLO = 'L' or 'l', K specifies the number of +* sub-diagonals of the matrix A. +* K must satisfy 0 .le. K. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) +* by n part of the array A must contain the upper triangular +* band part of the matrix of coefficients, supplied column by +* column, with the leading diagonal of the matrix in row +* ( k + 1 ) of the array, the first super-diagonal starting at +* position 2 in row k, and so on. The top left k by k triangle +* of the array A is not referenced. +* The following program segment will transfer an upper +* triangular band matrix from conventional full matrix storage +* to band storage: +* +* DO 20, J = 1, N +* M = K + 1 - J +* DO 10, I = MAX( 1, J - K ), J +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) +* by n part of the array A must contain the lower triangular +* band part of the matrix of coefficients, supplied column by +* column, with the leading diagonal of the matrix in row 1 of +* the array, the first sub-diagonal starting at position 1 in +* row 2, and so on. The bottom right k by k triangle of the +* array A is not referenced. +* The following program segment will transfer a lower +* triangular band matrix from conventional full matrix storage +* to band storage: +* +* DO 20, J = 1, N +* M = 1 - J +* DO 10, I = J, MIN( N, J + K ) +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Note that when DIAG = 'U' or 'u' the elements of the array A +* corresponding to the diagonal elements of the matrix are not +* referenced, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* ( k + 1 ). +* Unchanged on exit. +* +* X - DOUBLE PRECISION array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. On exit, X is overwritten with the +* tranformed vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. Local Scalars .. + DOUBLE PRECISION TEMP + INTEGER I, INFO, IX, J, JX, KPLUS1, KX, L + LOGICAL NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( K.LT.0 )THEN + INFO = 5 + ELSE IF( LDA.LT.( K + 1 ) )THEN + INFO = 7 + ELSE IF( INCX.EQ.0 )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DTBMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOUNIT = LSAME( DIAG, 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x := A*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KPLUS1 = K + 1 + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + L = KPLUS1 - J + DO 10, I = MAX( 1, J - K ), J - 1 + X( I ) = X( I ) + TEMP*A( L + I, J ) + 10 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*A( KPLUS1, J ) + END IF + 20 CONTINUE + ELSE + JX = KX + DO 40, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + L = KPLUS1 - J + DO 30, I = MAX( 1, J - K ), J - 1 + X( IX ) = X( IX ) + TEMP*A( L + I, J ) + IX = IX + INCX + 30 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*A( KPLUS1, J ) + END IF + JX = JX + INCX + IF( J.GT.K ) + $ KX = KX + INCX + 40 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 60, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + L = 1 - J + DO 50, I = MIN( N, J + K ), J + 1, -1 + X( I ) = X( I ) + TEMP*A( L + I, J ) + 50 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*A( 1, J ) + END IF + 60 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 80, J = N, 1, -1 + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + L = 1 - J + DO 70, I = MIN( N, J + K ), J + 1, -1 + X( IX ) = X( IX ) + TEMP*A( L + I, J ) + IX = IX - INCX + 70 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*A( 1, J ) + END IF + JX = JX - INCX + IF( ( N - J ).GE.K ) + $ KX = KX - INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := A'*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KPLUS1 = K + 1 + IF( INCX.EQ.1 )THEN + DO 100, J = N, 1, -1 + TEMP = X( J ) + L = KPLUS1 - J + IF( NOUNIT ) + $ TEMP = TEMP*A( KPLUS1, J ) + DO 90, I = J - 1, MAX( 1, J - K ), -1 + TEMP = TEMP + A( L + I, J )*X( I ) + 90 CONTINUE + X( J ) = TEMP + 100 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 120, J = N, 1, -1 + TEMP = X( JX ) + KX = KX - INCX + IX = KX + L = KPLUS1 - J + IF( NOUNIT ) + $ TEMP = TEMP*A( KPLUS1, J ) + DO 110, I = J - 1, MAX( 1, J - K ), -1 + TEMP = TEMP + A( L + I, J )*X( IX ) + IX = IX - INCX + 110 CONTINUE + X( JX ) = TEMP + JX = JX - INCX + 120 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 140, J = 1, N + TEMP = X( J ) + L = 1 - J + IF( NOUNIT ) + $ TEMP = TEMP*A( 1, J ) + DO 130, I = J + 1, MIN( N, J + K ) + TEMP = TEMP + A( L + I, J )*X( I ) + 130 CONTINUE + X( J ) = TEMP + 140 CONTINUE + ELSE + JX = KX + DO 160, J = 1, N + TEMP = X( JX ) + KX = KX + INCX + IX = KX + L = 1 - J + IF( NOUNIT ) + $ TEMP = TEMP*A( 1, J ) + DO 150, I = J + 1, MIN( N, J + K ) + TEMP = TEMP + A( L + I, J )*X( IX ) + IX = IX + INCX + 150 CONTINUE + X( JX ) = TEMP + JX = JX + INCX + 160 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of DTBMV . +* + END diff --git a/costa/native/external/blas/dtbsv.f b/costa/native/external/blas/dtbsv.f new file mode 100644 index 000000000..d87ed82d5 --- /dev/null +++ b/costa/native/external/blas/dtbsv.f @@ -0,0 +1,346 @@ + SUBROUTINE DTBSV ( UPLO, TRANS, DIAG, N, K, A, LDA, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, K, LDA, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* DTBSV solves one of the systems of equations +* +* A*x = b, or A'*x = b, +* +* where b and x are n element vectors and A is an n by n unit, or +* non-unit, upper or lower triangular band matrix, with ( k + 1 ) +* diagonals. +* +* No test for singularity or near-singularity is included in this +* routine. Such tests must be performed before calling this routine. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the equations to be solved as +* follows: +* +* TRANS = 'N' or 'n' A*x = b. +* +* TRANS = 'T' or 't' A'*x = b. +* +* TRANS = 'C' or 'c' A'*x = b. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with UPLO = 'U' or 'u', K specifies the number of +* super-diagonals of the matrix A. +* On entry with UPLO = 'L' or 'l', K specifies the number of +* sub-diagonals of the matrix A. +* K must satisfy 0 .le. K. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) +* by n part of the array A must contain the upper triangular +* band part of the matrix of coefficients, supplied column by +* column, with the leading diagonal of the matrix in row +* ( k + 1 ) of the array, the first super-diagonal starting at +* position 2 in row k, and so on. The top left k by k triangle +* of the array A is not referenced. +* The following program segment will transfer an upper +* triangular band matrix from conventional full matrix storage +* to band storage: +* +* DO 20, J = 1, N +* M = K + 1 - J +* DO 10, I = MAX( 1, J - K ), J +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) +* by n part of the array A must contain the lower triangular +* band part of the matrix of coefficients, supplied column by +* column, with the leading diagonal of the matrix in row 1 of +* the array, the first sub-diagonal starting at position 1 in +* row 2, and so on. The bottom right k by k triangle of the +* array A is not referenced. +* The following program segment will transfer a lower +* triangular band matrix from conventional full matrix storage +* to band storage: +* +* DO 20, J = 1, N +* M = 1 - J +* DO 10, I = J, MIN( N, J + K ) +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Note that when DIAG = 'U' or 'u' the elements of the array A +* corresponding to the diagonal elements of the matrix are not +* referenced, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* ( k + 1 ). +* Unchanged on exit. +* +* X - DOUBLE PRECISION array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element right-hand side vector b. On exit, X is overwritten +* with the solution vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. Local Scalars .. + DOUBLE PRECISION TEMP + INTEGER I, INFO, IX, J, JX, KPLUS1, KX, L + LOGICAL NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( K.LT.0 )THEN + INFO = 5 + ELSE IF( LDA.LT.( K + 1 ) )THEN + INFO = 7 + ELSE IF( INCX.EQ.0 )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DTBSV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOUNIT = LSAME( DIAG, 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed by sequentially with one pass through A. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x := inv( A )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KPLUS1 = K + 1 + IF( INCX.EQ.1 )THEN + DO 20, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + L = KPLUS1 - J + IF( NOUNIT ) + $ X( J ) = X( J )/A( KPLUS1, J ) + TEMP = X( J ) + DO 10, I = J - 1, MAX( 1, J - K ), -1 + X( I ) = X( I ) - TEMP*A( L + I, J ) + 10 CONTINUE + END IF + 20 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 40, J = N, 1, -1 + KX = KX - INCX + IF( X( JX ).NE.ZERO )THEN + IX = KX + L = KPLUS1 - J + IF( NOUNIT ) + $ X( JX ) = X( JX )/A( KPLUS1, J ) + TEMP = X( JX ) + DO 30, I = J - 1, MAX( 1, J - K ), -1 + X( IX ) = X( IX ) - TEMP*A( L + I, J ) + IX = IX - INCX + 30 CONTINUE + END IF + JX = JX - INCX + 40 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + L = 1 - J + IF( NOUNIT ) + $ X( J ) = X( J )/A( 1, J ) + TEMP = X( J ) + DO 50, I = J + 1, MIN( N, J + K ) + X( I ) = X( I ) - TEMP*A( L + I, J ) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE + JX = KX + DO 80, J = 1, N + KX = KX + INCX + IF( X( JX ).NE.ZERO )THEN + IX = KX + L = 1 - J + IF( NOUNIT ) + $ X( JX ) = X( JX )/A( 1, J ) + TEMP = X( JX ) + DO 70, I = J + 1, MIN( N, J + K ) + X( IX ) = X( IX ) - TEMP*A( L + I, J ) + IX = IX + INCX + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := inv( A')*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KPLUS1 = K + 1 + IF( INCX.EQ.1 )THEN + DO 100, J = 1, N + TEMP = X( J ) + L = KPLUS1 - J + DO 90, I = MAX( 1, J - K ), J - 1 + TEMP = TEMP - A( L + I, J )*X( I ) + 90 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( KPLUS1, J ) + X( J ) = TEMP + 100 CONTINUE + ELSE + JX = KX + DO 120, J = 1, N + TEMP = X( JX ) + IX = KX + L = KPLUS1 - J + DO 110, I = MAX( 1, J - K ), J - 1 + TEMP = TEMP - A( L + I, J )*X( IX ) + IX = IX + INCX + 110 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( KPLUS1, J ) + X( JX ) = TEMP + JX = JX + INCX + IF( J.GT.K ) + $ KX = KX + INCX + 120 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 140, J = N, 1, -1 + TEMP = X( J ) + L = 1 - J + DO 130, I = MIN( N, J + K ), J + 1, -1 + TEMP = TEMP - A( L + I, J )*X( I ) + 130 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( 1, J ) + X( J ) = TEMP + 140 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 160, J = N, 1, -1 + TEMP = X( JX ) + IX = KX + L = 1 - J + DO 150, I = MIN( N, J + K ), J + 1, -1 + TEMP = TEMP - A( L + I, J )*X( IX ) + IX = IX - INCX + 150 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( 1, J ) + X( JX ) = TEMP + JX = JX - INCX + IF( ( N - J ).GE.K ) + $ KX = KX - INCX + 160 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of DTBSV . +* + END diff --git a/costa/native/external/blas/dtpmv.f b/costa/native/external/blas/dtpmv.f new file mode 100644 index 000000000..ee11bc1b0 --- /dev/null +++ b/costa/native/external/blas/dtpmv.f @@ -0,0 +1,299 @@ + SUBROUTINE DTPMV ( UPLO, TRANS, DIAG, N, AP, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + DOUBLE PRECISION AP( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* DTPMV performs one of the matrix-vector operations +* +* x := A*x, or x := A'*x, +* +* where x is an n element vector and A is an n by n unit, or non-unit, +* upper or lower triangular matrix, supplied in packed form. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' x := A*x. +* +* TRANS = 'T' or 't' x := A'*x. +* +* TRANS = 'C' or 'c' x := A'*x. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* AP - DOUBLE PRECISION array of DIMENSION at least +* ( ( n*( n + 1 ) )/2 ). +* Before entry with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular matrix packed sequentially, +* column by column, so that AP( 1 ) contains a( 1, 1 ), +* AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) +* respectively, and so on. +* Before entry with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular matrix packed sequentially, +* column by column, so that AP( 1 ) contains a( 1, 1 ), +* AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) +* respectively, and so on. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced, but are assumed to be unity. +* Unchanged on exit. +* +* X - DOUBLE PRECISION array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. On exit, X is overwritten with the +* tranformed vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. Local Scalars .. + DOUBLE PRECISION TEMP + INTEGER I, INFO, IX, J, JX, K, KK, KX + LOGICAL NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( INCX.EQ.0 )THEN + INFO = 7 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DTPMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOUNIT = LSAME( DIAG, 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of AP are +* accessed sequentially with one pass through AP. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x:= A*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KK =1 + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + K = KK + DO 10, I = 1, J - 1 + X( I ) = X( I ) + TEMP*AP( K ) + K = K + 1 + 10 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*AP( KK + J - 1 ) + END IF + KK = KK + J + 20 CONTINUE + ELSE + JX = KX + DO 40, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + DO 30, K = KK, KK + J - 2 + X( IX ) = X( IX ) + TEMP*AP( K ) + IX = IX + INCX + 30 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*AP( KK + J - 1 ) + END IF + JX = JX + INCX + KK = KK + J + 40 CONTINUE + END IF + ELSE + KK = ( N*( N + 1 ) )/2 + IF( INCX.EQ.1 )THEN + DO 60, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + K = KK + DO 50, I = N, J + 1, -1 + X( I ) = X( I ) + TEMP*AP( K ) + K = K - 1 + 50 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*AP( KK - N + J ) + END IF + KK = KK - ( N - J + 1 ) + 60 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 80, J = N, 1, -1 + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + DO 70, K = KK, KK - ( N - ( J + 1 ) ), -1 + X( IX ) = X( IX ) + TEMP*AP( K ) + IX = IX - INCX + 70 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*AP( KK - N + J ) + END IF + JX = JX - INCX + KK = KK - ( N - J + 1 ) + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := A'*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KK = ( N*( N + 1 ) )/2 + IF( INCX.EQ.1 )THEN + DO 100, J = N, 1, -1 + TEMP = X( J ) + IF( NOUNIT ) + $ TEMP = TEMP*AP( KK ) + K = KK - 1 + DO 90, I = J - 1, 1, -1 + TEMP = TEMP + AP( K )*X( I ) + K = K - 1 + 90 CONTINUE + X( J ) = TEMP + KK = KK - J + 100 CONTINUE + ELSE + JX = KX + ( N - 1 )*INCX + DO 120, J = N, 1, -1 + TEMP = X( JX ) + IX = JX + IF( NOUNIT ) + $ TEMP = TEMP*AP( KK ) + DO 110, K = KK - 1, KK - J + 1, -1 + IX = IX - INCX + TEMP = TEMP + AP( K )*X( IX ) + 110 CONTINUE + X( JX ) = TEMP + JX = JX - INCX + KK = KK - J + 120 CONTINUE + END IF + ELSE + KK = 1 + IF( INCX.EQ.1 )THEN + DO 140, J = 1, N + TEMP = X( J ) + IF( NOUNIT ) + $ TEMP = TEMP*AP( KK ) + K = KK + 1 + DO 130, I = J + 1, N + TEMP = TEMP + AP( K )*X( I ) + K = K + 1 + 130 CONTINUE + X( J ) = TEMP + KK = KK + ( N - J + 1 ) + 140 CONTINUE + ELSE + JX = KX + DO 160, J = 1, N + TEMP = X( JX ) + IX = JX + IF( NOUNIT ) + $ TEMP = TEMP*AP( KK ) + DO 150, K = KK + 1, KK + N - J + IX = IX + INCX + TEMP = TEMP + AP( K )*X( IX ) + 150 CONTINUE + X( JX ) = TEMP + JX = JX + INCX + KK = KK + ( N - J + 1 ) + 160 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of DTPMV . +* + END diff --git a/costa/native/external/blas/dtpsv.f b/costa/native/external/blas/dtpsv.f new file mode 100644 index 000000000..91930d9fb --- /dev/null +++ b/costa/native/external/blas/dtpsv.f @@ -0,0 +1,302 @@ + SUBROUTINE DTPSV ( UPLO, TRANS, DIAG, N, AP, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + DOUBLE PRECISION AP( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* DTPSV solves one of the systems of equations +* +* A*x = b, or A'*x = b, +* +* where b and x are n element vectors and A is an n by n unit, or +* non-unit, upper or lower triangular matrix, supplied in packed form. +* +* No test for singularity or near-singularity is included in this +* routine. Such tests must be performed before calling this routine. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the equations to be solved as +* follows: +* +* TRANS = 'N' or 'n' A*x = b. +* +* TRANS = 'T' or 't' A'*x = b. +* +* TRANS = 'C' or 'c' A'*x = b. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* AP - DOUBLE PRECISION array of DIMENSION at least +* ( ( n*( n + 1 ) )/2 ). +* Before entry with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular matrix packed sequentially, +* column by column, so that AP( 1 ) contains a( 1, 1 ), +* AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) +* respectively, and so on. +* Before entry with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular matrix packed sequentially, +* column by column, so that AP( 1 ) contains a( 1, 1 ), +* AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) +* respectively, and so on. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced, but are assumed to be unity. +* Unchanged on exit. +* +* X - DOUBLE PRECISION array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element right-hand side vector b. On exit, X is overwritten +* with the solution vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. Local Scalars .. + DOUBLE PRECISION TEMP + INTEGER I, INFO, IX, J, JX, K, KK, KX + LOGICAL NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( INCX.EQ.0 )THEN + INFO = 7 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DTPSV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOUNIT = LSAME( DIAG, 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of AP are +* accessed sequentially with one pass through AP. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x := inv( A )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KK = ( N*( N + 1 ) )/2 + IF( INCX.EQ.1 )THEN + DO 20, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( J ) = X( J )/AP( KK ) + TEMP = X( J ) + K = KK - 1 + DO 10, I = J - 1, 1, -1 + X( I ) = X( I ) - TEMP*AP( K ) + K = K - 1 + 10 CONTINUE + END IF + KK = KK - J + 20 CONTINUE + ELSE + JX = KX + ( N - 1 )*INCX + DO 40, J = N, 1, -1 + IF( X( JX ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( JX ) = X( JX )/AP( KK ) + TEMP = X( JX ) + IX = JX + DO 30, K = KK - 1, KK - J + 1, -1 + IX = IX - INCX + X( IX ) = X( IX ) - TEMP*AP( K ) + 30 CONTINUE + END IF + JX = JX - INCX + KK = KK - J + 40 CONTINUE + END IF + ELSE + KK = 1 + IF( INCX.EQ.1 )THEN + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( J ) = X( J )/AP( KK ) + TEMP = X( J ) + K = KK + 1 + DO 50, I = J + 1, N + X( I ) = X( I ) - TEMP*AP( K ) + K = K + 1 + 50 CONTINUE + END IF + KK = KK + ( N - J + 1 ) + 60 CONTINUE + ELSE + JX = KX + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( JX ) = X( JX )/AP( KK ) + TEMP = X( JX ) + IX = JX + DO 70, K = KK + 1, KK + N - J + IX = IX + INCX + X( IX ) = X( IX ) - TEMP*AP( K ) + 70 CONTINUE + END IF + JX = JX + INCX + KK = KK + ( N - J + 1 ) + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := inv( A' )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KK = 1 + IF( INCX.EQ.1 )THEN + DO 100, J = 1, N + TEMP = X( J ) + K = KK + DO 90, I = 1, J - 1 + TEMP = TEMP - AP( K )*X( I ) + K = K + 1 + 90 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/AP( KK + J - 1 ) + X( J ) = TEMP + KK = KK + J + 100 CONTINUE + ELSE + JX = KX + DO 120, J = 1, N + TEMP = X( JX ) + IX = KX + DO 110, K = KK, KK + J - 2 + TEMP = TEMP - AP( K )*X( IX ) + IX = IX + INCX + 110 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/AP( KK + J - 1 ) + X( JX ) = TEMP + JX = JX + INCX + KK = KK + J + 120 CONTINUE + END IF + ELSE + KK = ( N*( N + 1 ) )/2 + IF( INCX.EQ.1 )THEN + DO 140, J = N, 1, -1 + TEMP = X( J ) + K = KK + DO 130, I = N, J + 1, -1 + TEMP = TEMP - AP( K )*X( I ) + K = K - 1 + 130 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/AP( KK - N + J ) + X( J ) = TEMP + KK = KK - ( N - J + 1 ) + 140 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 160, J = N, 1, -1 + TEMP = X( JX ) + IX = KX + DO 150, K = KK, KK - ( N - ( J + 1 ) ), -1 + TEMP = TEMP - AP( K )*X( IX ) + IX = IX - INCX + 150 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/AP( KK - N + J ) + X( JX ) = TEMP + JX = JX - INCX + KK = KK - (N - J + 1 ) + 160 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of DTPSV . +* + END diff --git a/costa/native/external/blas/dtrmm.f b/costa/native/external/blas/dtrmm.f new file mode 100644 index 000000000..40c7740c9 --- /dev/null +++ b/costa/native/external/blas/dtrmm.f @@ -0,0 +1,355 @@ + SUBROUTINE DTRMM ( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, + $ B, LDB ) +* .. Scalar Arguments .. + CHARACTER*1 SIDE, UPLO, TRANSA, DIAG + INTEGER M, N, LDA, LDB + DOUBLE PRECISION ALPHA +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* DTRMM performs one of the matrix-matrix operations +* +* B := alpha*op( A )*B, or B := alpha*B*op( A ), +* +* where alpha is a scalar, B is an m by n matrix, A is a unit, or +* non-unit, upper or lower triangular matrix and op( A ) is one of +* +* op( A ) = A or op( A ) = A'. +* +* Parameters +* ========== +* +* SIDE - CHARACTER*1. +* On entry, SIDE specifies whether op( A ) multiplies B from +* the left or right as follows: +* +* SIDE = 'L' or 'l' B := alpha*op( A )*B. +* +* SIDE = 'R' or 'r' B := alpha*B*op( A ). +* +* Unchanged on exit. +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix A is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANSA - CHARACTER*1. +* On entry, TRANSA specifies the form of op( A ) to be used in +* the matrix multiplication as follows: +* +* TRANSA = 'N' or 'n' op( A ) = A. +* +* TRANSA = 'T' or 't' op( A ) = A'. +* +* TRANSA = 'C' or 'c' op( A ) = A'. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit triangular +* as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of B. M must be at +* least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of B. N must be +* at least zero. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. When alpha is +* zero then A is not referenced and B need not be set before +* entry. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m +* when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. +* Before entry with UPLO = 'U' or 'u', the leading k by k +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading k by k +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When SIDE = 'L' or 'l' then +* LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' +* then LDA must be at least max( 1, n ). +* Unchanged on exit. +* +* B - DOUBLE PRECISION array of DIMENSION ( LDB, n ). +* Before entry, the leading m by n part of the array B must +* contain the matrix B, and on exit is overwritten by the +* transformed matrix. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. LDB must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Local Scalars .. + LOGICAL LSIDE, NOUNIT, UPPER + INTEGER I, INFO, J, K, NROWA + DOUBLE PRECISION TEMP +* .. Parameters .. + DOUBLE PRECISION ONE , ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + LSIDE = LSAME( SIDE , 'L' ) + IF( LSIDE )THEN + NROWA = M + ELSE + NROWA = N + END IF + NOUNIT = LSAME( DIAG , 'N' ) + UPPER = LSAME( UPLO , 'U' ) +* + INFO = 0 + IF( ( .NOT.LSIDE ).AND. + $ ( .NOT.LSAME( SIDE , 'R' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO , 'L' ) ) )THEN + INFO = 2 + ELSE IF( ( .NOT.LSAME( TRANSA, 'N' ) ).AND. + $ ( .NOT.LSAME( TRANSA, 'T' ) ).AND. + $ ( .NOT.LSAME( TRANSA, 'C' ) ) )THEN + INFO = 3 + ELSE IF( ( .NOT.LSAME( DIAG , 'U' ) ).AND. + $ ( .NOT.LSAME( DIAG , 'N' ) ) )THEN + INFO = 4 + ELSE IF( M .LT.0 )THEN + INFO = 5 + ELSE IF( N .LT.0 )THEN + INFO = 6 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 9 + ELSE IF( LDB.LT.MAX( 1, M ) )THEN + INFO = 11 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DTRMM ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, M + B( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + RETURN + END IF +* +* Start the operations. +* + IF( LSIDE )THEN + IF( LSAME( TRANSA, 'N' ) )THEN +* +* Form B := alpha*A*B. +* + IF( UPPER )THEN + DO 50, J = 1, N + DO 40, K = 1, M + IF( B( K, J ).NE.ZERO )THEN + TEMP = ALPHA*B( K, J ) + DO 30, I = 1, K - 1 + B( I, J ) = B( I, J ) + TEMP*A( I, K ) + 30 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP*A( K, K ) + B( K, J ) = TEMP + END IF + 40 CONTINUE + 50 CONTINUE + ELSE + DO 80, J = 1, N + DO 70 K = M, 1, -1 + IF( B( K, J ).NE.ZERO )THEN + TEMP = ALPHA*B( K, J ) + B( K, J ) = TEMP + IF( NOUNIT ) + $ B( K, J ) = B( K, J )*A( K, K ) + DO 60, I = K + 1, M + B( I, J ) = B( I, J ) + TEMP*A( I, K ) + 60 CONTINUE + END IF + 70 CONTINUE + 80 CONTINUE + END IF + ELSE +* +* Form B := alpha*A'*B. +* + IF( UPPER )THEN + DO 110, J = 1, N + DO 100, I = M, 1, -1 + TEMP = B( I, J ) + IF( NOUNIT ) + $ TEMP = TEMP*A( I, I ) + DO 90, K = 1, I - 1 + TEMP = TEMP + A( K, I )*B( K, J ) + 90 CONTINUE + B( I, J ) = ALPHA*TEMP + 100 CONTINUE + 110 CONTINUE + ELSE + DO 140, J = 1, N + DO 130, I = 1, M + TEMP = B( I, J ) + IF( NOUNIT ) + $ TEMP = TEMP*A( I, I ) + DO 120, K = I + 1, M + TEMP = TEMP + A( K, I )*B( K, J ) + 120 CONTINUE + B( I, J ) = ALPHA*TEMP + 130 CONTINUE + 140 CONTINUE + END IF + END IF + ELSE + IF( LSAME( TRANSA, 'N' ) )THEN +* +* Form B := alpha*B*A. +* + IF( UPPER )THEN + DO 180, J = N, 1, -1 + TEMP = ALPHA + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 150, I = 1, M + B( I, J ) = TEMP*B( I, J ) + 150 CONTINUE + DO 170, K = 1, J - 1 + IF( A( K, J ).NE.ZERO )THEN + TEMP = ALPHA*A( K, J ) + DO 160, I = 1, M + B( I, J ) = B( I, J ) + TEMP*B( I, K ) + 160 CONTINUE + END IF + 170 CONTINUE + 180 CONTINUE + ELSE + DO 220, J = 1, N + TEMP = ALPHA + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 190, I = 1, M + B( I, J ) = TEMP*B( I, J ) + 190 CONTINUE + DO 210, K = J + 1, N + IF( A( K, J ).NE.ZERO )THEN + TEMP = ALPHA*A( K, J ) + DO 200, I = 1, M + B( I, J ) = B( I, J ) + TEMP*B( I, K ) + 200 CONTINUE + END IF + 210 CONTINUE + 220 CONTINUE + END IF + ELSE +* +* Form B := alpha*B*A'. +* + IF( UPPER )THEN + DO 260, K = 1, N + DO 240, J = 1, K - 1 + IF( A( J, K ).NE.ZERO )THEN + TEMP = ALPHA*A( J, K ) + DO 230, I = 1, M + B( I, J ) = B( I, J ) + TEMP*B( I, K ) + 230 CONTINUE + END IF + 240 CONTINUE + TEMP = ALPHA + IF( NOUNIT ) + $ TEMP = TEMP*A( K, K ) + IF( TEMP.NE.ONE )THEN + DO 250, I = 1, M + B( I, K ) = TEMP*B( I, K ) + 250 CONTINUE + END IF + 260 CONTINUE + ELSE + DO 300, K = N, 1, -1 + DO 280, J = K + 1, N + IF( A( J, K ).NE.ZERO )THEN + TEMP = ALPHA*A( J, K ) + DO 270, I = 1, M + B( I, J ) = B( I, J ) + TEMP*B( I, K ) + 270 CONTINUE + END IF + 280 CONTINUE + TEMP = ALPHA + IF( NOUNIT ) + $ TEMP = TEMP*A( K, K ) + IF( TEMP.NE.ONE )THEN + DO 290, I = 1, M + B( I, K ) = TEMP*B( I, K ) + 290 CONTINUE + END IF + 300 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of DTRMM . +* + END diff --git a/costa/native/external/blas/dtrmv.f b/costa/native/external/blas/dtrmv.f new file mode 100644 index 000000000..3d5c61b20 --- /dev/null +++ b/costa/native/external/blas/dtrmv.f @@ -0,0 +1,286 @@ + SUBROUTINE DTRMV ( UPLO, TRANS, DIAG, N, A, LDA, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, LDA, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* DTRMV performs one of the matrix-vector operations +* +* x := A*x, or x := A'*x, +* +* where x is an n element vector and A is an n by n unit, or non-unit, +* upper or lower triangular matrix. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' x := A*x. +* +* TRANS = 'T' or 't' x := A'*x. +* +* TRANS = 'C' or 'c' x := A'*x. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* X - DOUBLE PRECISION array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. On exit, X is overwritten with the +* tranformed vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. Local Scalars .. + DOUBLE PRECISION TEMP + INTEGER I, INFO, IX, J, JX, KX + LOGICAL NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 6 + ELSE IF( INCX.EQ.0 )THEN + INFO = 8 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DTRMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOUNIT = LSAME( DIAG, 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x := A*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + DO 10, I = 1, J - 1 + X( I ) = X( I ) + TEMP*A( I, J ) + 10 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*A( J, J ) + END IF + 20 CONTINUE + ELSE + JX = KX + DO 40, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + DO 30, I = 1, J - 1 + X( IX ) = X( IX ) + TEMP*A( I, J ) + IX = IX + INCX + 30 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*A( J, J ) + END IF + JX = JX + INCX + 40 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 60, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + DO 50, I = N, J + 1, -1 + X( I ) = X( I ) + TEMP*A( I, J ) + 50 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*A( J, J ) + END IF + 60 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 80, J = N, 1, -1 + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + DO 70, I = N, J + 1, -1 + X( IX ) = X( IX ) + TEMP*A( I, J ) + IX = IX - INCX + 70 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*A( J, J ) + END IF + JX = JX - INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := A'*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + IF( INCX.EQ.1 )THEN + DO 100, J = N, 1, -1 + TEMP = X( J ) + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 90, I = J - 1, 1, -1 + TEMP = TEMP + A( I, J )*X( I ) + 90 CONTINUE + X( J ) = TEMP + 100 CONTINUE + ELSE + JX = KX + ( N - 1 )*INCX + DO 120, J = N, 1, -1 + TEMP = X( JX ) + IX = JX + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 110, I = J - 1, 1, -1 + IX = IX - INCX + TEMP = TEMP + A( I, J )*X( IX ) + 110 CONTINUE + X( JX ) = TEMP + JX = JX - INCX + 120 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 140, J = 1, N + TEMP = X( J ) + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 130, I = J + 1, N + TEMP = TEMP + A( I, J )*X( I ) + 130 CONTINUE + X( J ) = TEMP + 140 CONTINUE + ELSE + JX = KX + DO 160, J = 1, N + TEMP = X( JX ) + IX = JX + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 150, I = J + 1, N + IX = IX + INCX + TEMP = TEMP + A( I, J )*X( IX ) + 150 CONTINUE + X( JX ) = TEMP + JX = JX + INCX + 160 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of DTRMV . +* + END diff --git a/costa/native/external/blas/dtrsm.f b/costa/native/external/blas/dtrsm.f new file mode 100644 index 000000000..e8425142b --- /dev/null +++ b/costa/native/external/blas/dtrsm.f @@ -0,0 +1,378 @@ + SUBROUTINE DTRSM ( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, + $ B, LDB ) +* .. Scalar Arguments .. + CHARACTER*1 SIDE, UPLO, TRANSA, DIAG + INTEGER M, N, LDA, LDB + DOUBLE PRECISION ALPHA +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* DTRSM solves one of the matrix equations +* +* op( A )*X = alpha*B, or X*op( A ) = alpha*B, +* +* where alpha is a scalar, X and B are m by n matrices, A is a unit, or +* non-unit, upper or lower triangular matrix and op( A ) is one of +* +* op( A ) = A or op( A ) = A'. +* +* The matrix X is overwritten on B. +* +* Parameters +* ========== +* +* SIDE - CHARACTER*1. +* On entry, SIDE specifies whether op( A ) appears on the left +* or right of X as follows: +* +* SIDE = 'L' or 'l' op( A )*X = alpha*B. +* +* SIDE = 'R' or 'r' X*op( A ) = alpha*B. +* +* Unchanged on exit. +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix A is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANSA - CHARACTER*1. +* On entry, TRANSA specifies the form of op( A ) to be used in +* the matrix multiplication as follows: +* +* TRANSA = 'N' or 'n' op( A ) = A. +* +* TRANSA = 'T' or 't' op( A ) = A'. +* +* TRANSA = 'C' or 'c' op( A ) = A'. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit triangular +* as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of B. M must be at +* least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of B. N must be +* at least zero. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. When alpha is +* zero then A is not referenced and B need not be set before +* entry. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m +* when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. +* Before entry with UPLO = 'U' or 'u', the leading k by k +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading k by k +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When SIDE = 'L' or 'l' then +* LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' +* then LDA must be at least max( 1, n ). +* Unchanged on exit. +* +* B - DOUBLE PRECISION array of DIMENSION ( LDB, n ). +* Before entry, the leading m by n part of the array B must +* contain the right-hand side matrix B, and on exit is +* overwritten by the solution matrix X. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. LDB must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Local Scalars .. + LOGICAL LSIDE, NOUNIT, UPPER + INTEGER I, INFO, J, K, NROWA + DOUBLE PRECISION TEMP +* .. Parameters .. + DOUBLE PRECISION ONE , ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + LSIDE = LSAME( SIDE , 'L' ) + IF( LSIDE )THEN + NROWA = M + ELSE + NROWA = N + END IF + NOUNIT = LSAME( DIAG , 'N' ) + UPPER = LSAME( UPLO , 'U' ) +* + INFO = 0 + IF( ( .NOT.LSIDE ).AND. + $ ( .NOT.LSAME( SIDE , 'R' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO , 'L' ) ) )THEN + INFO = 2 + ELSE IF( ( .NOT.LSAME( TRANSA, 'N' ) ).AND. + $ ( .NOT.LSAME( TRANSA, 'T' ) ).AND. + $ ( .NOT.LSAME( TRANSA, 'C' ) ) )THEN + INFO = 3 + ELSE IF( ( .NOT.LSAME( DIAG , 'U' ) ).AND. + $ ( .NOT.LSAME( DIAG , 'N' ) ) )THEN + INFO = 4 + ELSE IF( M .LT.0 )THEN + INFO = 5 + ELSE IF( N .LT.0 )THEN + INFO = 6 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 9 + ELSE IF( LDB.LT.MAX( 1, M ) )THEN + INFO = 11 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DTRSM ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, M + B( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + RETURN + END IF +* +* Start the operations. +* + IF( LSIDE )THEN + IF( LSAME( TRANSA, 'N' ) )THEN +* +* Form B := alpha*inv( A )*B. +* + IF( UPPER )THEN + DO 60, J = 1, N + IF( ALPHA.NE.ONE )THEN + DO 30, I = 1, M + B( I, J ) = ALPHA*B( I, J ) + 30 CONTINUE + END IF + DO 50, K = M, 1, -1 + IF( B( K, J ).NE.ZERO )THEN + IF( NOUNIT ) + $ B( K, J ) = B( K, J )/A( K, K ) + DO 40, I = 1, K - 1 + B( I, J ) = B( I, J ) - B( K, J )*A( I, K ) + 40 CONTINUE + END IF + 50 CONTINUE + 60 CONTINUE + ELSE + DO 100, J = 1, N + IF( ALPHA.NE.ONE )THEN + DO 70, I = 1, M + B( I, J ) = ALPHA*B( I, J ) + 70 CONTINUE + END IF + DO 90 K = 1, M + IF( B( K, J ).NE.ZERO )THEN + IF( NOUNIT ) + $ B( K, J ) = B( K, J )/A( K, K ) + DO 80, I = K + 1, M + B( I, J ) = B( I, J ) - B( K, J )*A( I, K ) + 80 CONTINUE + END IF + 90 CONTINUE + 100 CONTINUE + END IF + ELSE +* +* Form B := alpha*inv( A' )*B. +* + IF( UPPER )THEN + DO 130, J = 1, N + DO 120, I = 1, M + TEMP = ALPHA*B( I, J ) + DO 110, K = 1, I - 1 + TEMP = TEMP - A( K, I )*B( K, J ) + 110 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( I, I ) + B( I, J ) = TEMP + 120 CONTINUE + 130 CONTINUE + ELSE + DO 160, J = 1, N + DO 150, I = M, 1, -1 + TEMP = ALPHA*B( I, J ) + DO 140, K = I + 1, M + TEMP = TEMP - A( K, I )*B( K, J ) + 140 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( I, I ) + B( I, J ) = TEMP + 150 CONTINUE + 160 CONTINUE + END IF + END IF + ELSE + IF( LSAME( TRANSA, 'N' ) )THEN +* +* Form B := alpha*B*inv( A ). +* + IF( UPPER )THEN + DO 210, J = 1, N + IF( ALPHA.NE.ONE )THEN + DO 170, I = 1, M + B( I, J ) = ALPHA*B( I, J ) + 170 CONTINUE + END IF + DO 190, K = 1, J - 1 + IF( A( K, J ).NE.ZERO )THEN + DO 180, I = 1, M + B( I, J ) = B( I, J ) - A( K, J )*B( I, K ) + 180 CONTINUE + END IF + 190 CONTINUE + IF( NOUNIT )THEN + TEMP = ONE/A( J, J ) + DO 200, I = 1, M + B( I, J ) = TEMP*B( I, J ) + 200 CONTINUE + END IF + 210 CONTINUE + ELSE + DO 260, J = N, 1, -1 + IF( ALPHA.NE.ONE )THEN + DO 220, I = 1, M + B( I, J ) = ALPHA*B( I, J ) + 220 CONTINUE + END IF + DO 240, K = J + 1, N + IF( A( K, J ).NE.ZERO )THEN + DO 230, I = 1, M + B( I, J ) = B( I, J ) - A( K, J )*B( I, K ) + 230 CONTINUE + END IF + 240 CONTINUE + IF( NOUNIT )THEN + TEMP = ONE/A( J, J ) + DO 250, I = 1, M + B( I, J ) = TEMP*B( I, J ) + 250 CONTINUE + END IF + 260 CONTINUE + END IF + ELSE +* +* Form B := alpha*B*inv( A' ). +* + IF( UPPER )THEN + DO 310, K = N, 1, -1 + IF( NOUNIT )THEN + TEMP = ONE/A( K, K ) + DO 270, I = 1, M + B( I, K ) = TEMP*B( I, K ) + 270 CONTINUE + END IF + DO 290, J = 1, K - 1 + IF( A( J, K ).NE.ZERO )THEN + TEMP = A( J, K ) + DO 280, I = 1, M + B( I, J ) = B( I, J ) - TEMP*B( I, K ) + 280 CONTINUE + END IF + 290 CONTINUE + IF( ALPHA.NE.ONE )THEN + DO 300, I = 1, M + B( I, K ) = ALPHA*B( I, K ) + 300 CONTINUE + END IF + 310 CONTINUE + ELSE + DO 360, K = 1, N + IF( NOUNIT )THEN + TEMP = ONE/A( K, K ) + DO 320, I = 1, M + B( I, K ) = TEMP*B( I, K ) + 320 CONTINUE + END IF + DO 340, J = K + 1, N + IF( A( J, K ).NE.ZERO )THEN + TEMP = A( J, K ) + DO 330, I = 1, M + B( I, J ) = B( I, J ) - TEMP*B( I, K ) + 330 CONTINUE + END IF + 340 CONTINUE + IF( ALPHA.NE.ONE )THEN + DO 350, I = 1, M + B( I, K ) = ALPHA*B( I, K ) + 350 CONTINUE + END IF + 360 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of DTRSM . +* + END diff --git a/costa/native/external/blas/dtrsv.f b/costa/native/external/blas/dtrsv.f new file mode 100644 index 000000000..9c3e90a97 --- /dev/null +++ b/costa/native/external/blas/dtrsv.f @@ -0,0 +1,289 @@ + SUBROUTINE DTRSV ( UPLO, TRANS, DIAG, N, A, LDA, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, LDA, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* DTRSV solves one of the systems of equations +* +* A*x = b, or A'*x = b, +* +* where b and x are n element vectors and A is an n by n unit, or +* non-unit, upper or lower triangular matrix. +* +* No test for singularity or near-singularity is included in this +* routine. Such tests must be performed before calling this routine. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the equations to be solved as +* follows: +* +* TRANS = 'N' or 'n' A*x = b. +* +* TRANS = 'T' or 't' A'*x = b. +* +* TRANS = 'C' or 'c' A'*x = b. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* X - DOUBLE PRECISION array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element right-hand side vector b. On exit, X is overwritten +* with the solution vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. Local Scalars .. + DOUBLE PRECISION TEMP + INTEGER I, INFO, IX, J, JX, KX + LOGICAL NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 6 + ELSE IF( INCX.EQ.0 )THEN + INFO = 8 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DTRSV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOUNIT = LSAME( DIAG, 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x := inv( A )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + IF( INCX.EQ.1 )THEN + DO 20, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( J ) = X( J )/A( J, J ) + TEMP = X( J ) + DO 10, I = J - 1, 1, -1 + X( I ) = X( I ) - TEMP*A( I, J ) + 10 CONTINUE + END IF + 20 CONTINUE + ELSE + JX = KX + ( N - 1 )*INCX + DO 40, J = N, 1, -1 + IF( X( JX ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( JX ) = X( JX )/A( J, J ) + TEMP = X( JX ) + IX = JX + DO 30, I = J - 1, 1, -1 + IX = IX - INCX + X( IX ) = X( IX ) - TEMP*A( I, J ) + 30 CONTINUE + END IF + JX = JX - INCX + 40 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( J ) = X( J )/A( J, J ) + TEMP = X( J ) + DO 50, I = J + 1, N + X( I ) = X( I ) - TEMP*A( I, J ) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE + JX = KX + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( JX ) = X( JX )/A( J, J ) + TEMP = X( JX ) + IX = JX + DO 70, I = J + 1, N + IX = IX + INCX + X( IX ) = X( IX ) - TEMP*A( I, J ) + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := inv( A' )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + IF( INCX.EQ.1 )THEN + DO 100, J = 1, N + TEMP = X( J ) + DO 90, I = 1, J - 1 + TEMP = TEMP - A( I, J )*X( I ) + 90 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( J, J ) + X( J ) = TEMP + 100 CONTINUE + ELSE + JX = KX + DO 120, J = 1, N + TEMP = X( JX ) + IX = KX + DO 110, I = 1, J - 1 + TEMP = TEMP - A( I, J )*X( IX ) + IX = IX + INCX + 110 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( J, J ) + X( JX ) = TEMP + JX = JX + INCX + 120 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 140, J = N, 1, -1 + TEMP = X( J ) + DO 130, I = N, J + 1, -1 + TEMP = TEMP - A( I, J )*X( I ) + 130 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( J, J ) + X( J ) = TEMP + 140 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 160, J = N, 1, -1 + TEMP = X( JX ) + IX = KX + DO 150, I = N, J + 1, -1 + TEMP = TEMP - A( I, J )*X( IX ) + IX = IX - INCX + 150 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( J, J ) + X( JX ) = TEMP + JX = JX - INCX + 160 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of DTRSV . +* + END diff --git a/costa/native/external/blas/dzasum.f b/costa/native/external/blas/dzasum.f new file mode 100644 index 000000000..d21c1ffc9 --- /dev/null +++ b/costa/native/external/blas/dzasum.f @@ -0,0 +1,34 @@ + double precision function dzasum(n,zx,incx) +c +c takes the sum of the absolute values. +c jack dongarra, 3/11/78. +c modified 3/93 to return if incx .le. 0. +c modified 12/3/93, array(1) declarations changed to array(*) +c + double complex zx(*) + double precision stemp,dcabs1 + integer i,incx,ix,n +c + dzasum = 0.0d0 + stemp = 0.0d0 + if( n.le.0 .or. incx.le.0 )return + if(incx.eq.1)go to 20 +c +c code for increment not equal to 1 +c + ix = 1 + do 10 i = 1,n + stemp = stemp + dcabs1(zx(ix)) + ix = ix + incx + 10 continue + dzasum = stemp + return +c +c code for increment equal to 1 +c + 20 do 30 i = 1,n + stemp = stemp + dcabs1(zx(i)) + 30 continue + dzasum = stemp + return + end diff --git a/costa/native/external/blas/dznrm2.f b/costa/native/external/blas/dznrm2.f new file mode 100644 index 000000000..205ce3932 --- /dev/null +++ b/costa/native/external/blas/dznrm2.f @@ -0,0 +1,67 @@ + DOUBLE PRECISION FUNCTION DZNRM2( N, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, N +* .. Array Arguments .. + COMPLEX*16 X( * ) +* .. +* +* DZNRM2 returns the euclidean norm of a vector via the function +* name, so that +* +* DZNRM2 := sqrt( conjg( x' )*x ) +* +* +* +* -- This version written on 25-October-1982. +* Modified on 14-October-1993 to inline the call to ZLASSQ. +* Sven Hammarling, Nag Ltd. +* +* +* .. Parameters .. + DOUBLE PRECISION ONE , ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. Local Scalars .. + INTEGER IX + DOUBLE PRECISION NORM, SCALE, SSQ, TEMP +* .. Intrinsic Functions .. + INTRINSIC ABS, DIMAG, DBLE, SQRT +* .. +* .. Executable Statements .. + IF( N.LT.1 .OR. INCX.LT.1 )THEN + NORM = ZERO + ELSE + SCALE = ZERO + SSQ = ONE +* The following loop is equivalent to this call to the LAPACK +* auxiliary routine: +* CALL ZLASSQ( N, X, INCX, SCALE, SSQ ) +* + DO 10, IX = 1, 1 + ( N - 1 )*INCX, INCX + IF( DBLE( X( IX ) ).NE.ZERO )THEN + TEMP = ABS( DBLE( X( IX ) ) ) + IF( SCALE.LT.TEMP )THEN + SSQ = ONE + SSQ*( SCALE/TEMP )**2 + SCALE = TEMP + ELSE + SSQ = SSQ + ( TEMP/SCALE )**2 + END IF + END IF + IF( DIMAG( X( IX ) ).NE.ZERO )THEN + TEMP = ABS( DIMAG( X( IX ) ) ) + IF( SCALE.LT.TEMP )THEN + SSQ = ONE + SSQ*( SCALE/TEMP )**2 + SCALE = TEMP + ELSE + SSQ = SSQ + ( TEMP/SCALE )**2 + END IF + END IF + 10 CONTINUE + NORM = SCALE * SQRT( SSQ ) + END IF +* + DZNRM2 = NORM + RETURN +* +* End of DZNRM2. +* + END diff --git a/costa/native/external/blas/icamax.f b/costa/native/external/blas/icamax.f new file mode 100644 index 000000000..b13d4904f --- /dev/null +++ b/costa/native/external/blas/icamax.f @@ -0,0 +1,43 @@ + integer function icamax(n,cx,incx) +c +c finds the index of element having max. absolute value. +c jack dongarra, linpack, 3/11/78. +c modified 3/93 to return if incx .le. 0. +c modified 12/3/93, array(1) declarations changed to array(*) +c + complex cx(*) + real smax + integer i,incx,ix,n + complex zdum + real cabs1 + cabs1(zdum) = abs(real(zdum)) + abs(aimag(zdum)) +c + icamax = 0 + if( n.lt.1 .or. incx.le.0 ) return + icamax = 1 + if(n.eq.1)return + if(incx.eq.1)go to 20 +c +c code for increment not equal to 1 +c + ix = 1 + smax = cabs1(cx(1)) + ix = ix + incx + do 10 i = 2,n + if(cabs1(cx(ix)).le.smax) go to 5 + icamax = i + smax = cabs1(cx(ix)) + 5 ix = ix + incx + 10 continue + return +c +c code for increment equal to 1 +c + 20 smax = cabs1(cx(1)) + do 30 i = 2,n + if(cabs1(cx(i)).le.smax) go to 30 + icamax = i + smax = cabs1(cx(i)) + 30 continue + return + end diff --git a/costa/native/external/blas/idamax.f b/costa/native/external/blas/idamax.f new file mode 100644 index 000000000..59d80dc41 --- /dev/null +++ b/costa/native/external/blas/idamax.f @@ -0,0 +1,39 @@ + integer function idamax(n,dx,incx) +c +c finds the index of element having max. absolute value. +c jack dongarra, linpack, 3/11/78. +c modified 3/93 to return if incx .le. 0. +c modified 12/3/93, array(1) declarations changed to array(*) +c + double precision dx(*),dmax + integer i,incx,ix,n +c + idamax = 0 + if( n.lt.1 .or. incx.le.0 ) return + idamax = 1 + if(n.eq.1)return + if(incx.eq.1)go to 20 +c +c code for increment not equal to 1 +c + ix = 1 + dmax = dabs(dx(1)) + ix = ix + incx + do 10 i = 2,n + if(dabs(dx(ix)).le.dmax) go to 5 + idamax = i + dmax = dabs(dx(ix)) + 5 ix = ix + incx + 10 continue + return +c +c code for increment equal to 1 +c + 20 dmax = dabs(dx(1)) + do 30 i = 2,n + if(dabs(dx(i)).le.dmax) go to 30 + idamax = i + dmax = dabs(dx(i)) + 30 continue + return + end diff --git a/costa/native/external/blas/isamax.f b/costa/native/external/blas/isamax.f new file mode 100644 index 000000000..a649e0281 --- /dev/null +++ b/costa/native/external/blas/isamax.f @@ -0,0 +1,39 @@ + integer function isamax(n,sx,incx) +c +c finds the index of element having max. absolute value. +c jack dongarra, linpack, 3/11/78. +c modified 3/93 to return if incx .le. 0. +c modified 12/3/93, array(1) declarations changed to array(*) +c + real sx(*),smax + integer i,incx,ix,n +c + isamax = 0 + if( n.lt.1 .or. incx.le.0 ) return + isamax = 1 + if(n.eq.1)return + if(incx.eq.1)go to 20 +c +c code for increment not equal to 1 +c + ix = 1 + smax = abs(sx(1)) + ix = ix + incx + do 10 i = 2,n + if(abs(sx(ix)).le.smax) go to 5 + isamax = i + smax = abs(sx(ix)) + 5 ix = ix + incx + 10 continue + return +c +c code for increment equal to 1 +c + 20 smax = abs(sx(1)) + do 30 i = 2,n + if(abs(sx(i)).le.smax) go to 30 + isamax = i + smax = abs(sx(i)) + 30 continue + return + end diff --git a/costa/native/external/blas/izamax.f b/costa/native/external/blas/izamax.f new file mode 100644 index 000000000..ec14f827d --- /dev/null +++ b/costa/native/external/blas/izamax.f @@ -0,0 +1,41 @@ + integer function izamax(n,zx,incx) +c +c finds the index of element having max. absolute value. +c jack dongarra, 1/15/85. +c modified 3/93 to return if incx .le. 0. +c modified 12/3/93, array(1) declarations changed to array(*) +c + double complex zx(*) + double precision smax + integer i,incx,ix,n + double precision dcabs1 +c + izamax = 0 + if( n.lt.1 .or. incx.le.0 )return + izamax = 1 + if(n.eq.1)return + if(incx.eq.1)go to 20 +c +c code for increment not equal to 1 +c + ix = 1 + smax = dcabs1(zx(1)) + ix = ix + incx + do 10 i = 2,n + if(dcabs1(zx(ix)).le.smax) go to 5 + izamax = i + smax = dcabs1(zx(ix)) + 5 ix = ix + incx + 10 continue + return +c +c code for increment equal to 1 +c + 20 smax = dcabs1(zx(1)) + do 30 i = 2,n + if(dcabs1(zx(i)).le.smax) go to 30 + izamax = i + smax = dcabs1(zx(i)) + 30 continue + return + end diff --git a/costa/native/external/blas/lsame.f b/costa/native/external/blas/lsame.f new file mode 100644 index 000000000..bf25d86f2 --- /dev/null +++ b/costa/native/external/blas/lsame.f @@ -0,0 +1,87 @@ + LOGICAL FUNCTION LSAME( CA, CB ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER CA, CB +* .. +* +* Purpose +* ======= +* +* LSAME returns .TRUE. if CA is the same letter as CB regardless of +* case. +* +* Arguments +* ========= +* +* CA (input) CHARACTER*1 +* CB (input) CHARACTER*1 +* CA and CB specify the single characters to be compared. +* +* ===================================================================== +* +* .. Intrinsic Functions .. + INTRINSIC ICHAR +* .. +* .. Local Scalars .. + INTEGER INTA, INTB, ZCODE +* .. +* .. Executable Statements .. +* +* Test if the characters are equal +* + LSAME = CA.EQ.CB + IF( LSAME ) + $ RETURN +* +* Now test for equivalence if both characters are alphabetic. +* + ZCODE = ICHAR( 'Z' ) +* +* Use 'Z' rather than 'A' so that ASCII can be detected on Prime +* machines, on which ICHAR returns a value with bit 8 set. +* ICHAR('A') on Prime machines returns 193 which is the same as +* ICHAR('A') on an EBCDIC machine. +* + INTA = ICHAR( CA ) + INTB = ICHAR( CB ) +* + IF( ZCODE.EQ.90 .OR. ZCODE.EQ.122 ) THEN +* +* ASCII is assumed - ZCODE is the ASCII code of either lower or +* upper case 'Z'. +* + IF( INTA.GE.97 .AND. INTA.LE.122 ) INTA = INTA - 32 + IF( INTB.GE.97 .AND. INTB.LE.122 ) INTB = INTB - 32 +* + ELSE IF( ZCODE.EQ.233 .OR. ZCODE.EQ.169 ) THEN +* +* EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or +* upper case 'Z'. +* + IF( INTA.GE.129 .AND. INTA.LE.137 .OR. + $ INTA.GE.145 .AND. INTA.LE.153 .OR. + $ INTA.GE.162 .AND. INTA.LE.169 ) INTA = INTA + 64 + IF( INTB.GE.129 .AND. INTB.LE.137 .OR. + $ INTB.GE.145 .AND. INTB.LE.153 .OR. + $ INTB.GE.162 .AND. INTB.LE.169 ) INTB = INTB + 64 +* + ELSE IF( ZCODE.EQ.218 .OR. ZCODE.EQ.250 ) THEN +* +* ASCII is assumed, on Prime machines - ZCODE is the ASCII code +* plus 128 of either lower or upper case 'Z'. +* + IF( INTA.GE.225 .AND. INTA.LE.250 ) INTA = INTA - 32 + IF( INTB.GE.225 .AND. INTB.LE.250 ) INTB = INTB - 32 + END IF + LSAME = INTA.EQ.INTB +* +* RETURN +* +* End of LSAME +* + END diff --git a/costa/native/external/blas/sasum.f b/costa/native/external/blas/sasum.f new file mode 100644 index 000000000..8697579d4 --- /dev/null +++ b/costa/native/external/blas/sasum.f @@ -0,0 +1,44 @@ + real function sasum(n,sx,incx) +c +c takes the sum of the absolute values. +c uses unrolled loops for increment equal to one. +c jack dongarra, linpack, 3/11/78. +c modified 3/93 to return if incx .le. 0. +c modified 12/3/93, array(1) declarations changed to array(*) +c + real sx(*),stemp + integer i,incx,m,mp1,n,nincx +c + sasum = 0.0e0 + stemp = 0.0e0 + if( n.le.0 .or. incx.le.0 )return + if(incx.eq.1)go to 20 +c +c code for increment not equal to 1 +c + nincx = n*incx + do 10 i = 1,nincx,incx + stemp = stemp + abs(sx(i)) + 10 continue + sasum = stemp + return +c +c code for increment equal to 1 +c +c +c clean-up loop +c + 20 m = mod(n,6) + if( m .eq. 0 ) go to 40 + do 30 i = 1,m + stemp = stemp + abs(sx(i)) + 30 continue + if( n .lt. 6 ) go to 60 + 40 mp1 = m + 1 + do 50 i = mp1,n,6 + stemp = stemp + abs(sx(i)) + abs(sx(i + 1)) + abs(sx(i + 2)) + * + abs(sx(i + 3)) + abs(sx(i + 4)) + abs(sx(i + 5)) + 50 continue + 60 sasum = stemp + return + end diff --git a/costa/native/external/blas/saxpy.f b/costa/native/external/blas/saxpy.f new file mode 100644 index 000000000..4b1c70387 --- /dev/null +++ b/costa/native/external/blas/saxpy.f @@ -0,0 +1,48 @@ + subroutine saxpy(n,sa,sx,incx,sy,incy) +c +c constant times a vector plus a vector. +c uses unrolled loop for increments equal to one. +c jack dongarra, linpack, 3/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + real sx(*),sy(*),sa + integer i,incx,incy,ix,iy,m,mp1,n +c + if(n.le.0)return + if (sa .eq. 0.0) return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments +c not equal to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + sy(iy) = sy(iy) + sa*sx(ix) + ix = ix + incx + iy = iy + incy + 10 continue + return +c +c code for both increments equal to 1 +c +c +c clean-up loop +c + 20 m = mod(n,4) + if( m .eq. 0 ) go to 40 + do 30 i = 1,m + sy(i) = sy(i) + sa*sx(i) + 30 continue + if( n .lt. 4 ) return + 40 mp1 = m + 1 + do 50 i = mp1,n,4 + sy(i) = sy(i) + sa*sx(i) + sy(i + 1) = sy(i + 1) + sa*sx(i + 1) + sy(i + 2) = sy(i + 2) + sa*sx(i + 2) + sy(i + 3) = sy(i + 3) + sa*sx(i + 3) + 50 continue + return + end diff --git a/costa/native/external/blas/scasum.f b/costa/native/external/blas/scasum.f new file mode 100644 index 000000000..b6c0ea077 --- /dev/null +++ b/costa/native/external/blas/scasum.f @@ -0,0 +1,34 @@ + real function scasum(n,cx,incx) +c +c takes the sum of the absolute values of a complex vector and +c returns a single precision result. +c jack dongarra, linpack, 3/11/78. +c modified 3/93 to return if incx .le. 0. +c modified 12/3/93, array(1) declarations changed to array(*) +c + complex cx(*) + real stemp + integer i,incx,n,nincx +c + scasum = 0.0e0 + stemp = 0.0e0 + if( n.le.0 .or. incx.le.0 )return + if(incx.eq.1)go to 20 +c +c code for increment not equal to 1 +c + nincx = n*incx + do 10 i = 1,nincx,incx + stemp = stemp + abs(real(cx(i))) + abs(aimag(cx(i))) + 10 continue + scasum = stemp + return +c +c code for increment equal to 1 +c + 20 do 30 i = 1,n + stemp = stemp + abs(real(cx(i))) + abs(aimag(cx(i))) + 30 continue + scasum = stemp + return + end diff --git a/costa/native/external/blas/scnrm2.f b/costa/native/external/blas/scnrm2.f new file mode 100644 index 000000000..8bfe9ae7f --- /dev/null +++ b/costa/native/external/blas/scnrm2.f @@ -0,0 +1,67 @@ + REAL FUNCTION SCNRM2( N, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, N +* .. Array Arguments .. + COMPLEX X( * ) +* .. +* +* SCNRM2 returns the euclidean norm of a vector via the function +* name, so that +* +* SCNRM2 := sqrt( conjg( x' )*x ) +* +* +* +* -- This version written on 25-October-1982. +* Modified on 14-October-1993 to inline the call to CLASSQ. +* Sven Hammarling, Nag Ltd. +* +* +* .. Parameters .. + REAL ONE , ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. Local Scalars .. + INTEGER IX + REAL NORM, SCALE, SSQ, TEMP +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, REAL, SQRT +* .. +* .. Executable Statements .. + IF( N.LT.1 .OR. INCX.LT.1 )THEN + NORM = ZERO + ELSE + SCALE = ZERO + SSQ = ONE +* The following loop is equivalent to this call to the LAPACK +* auxiliary routine: +* CALL CLASSQ( N, X, INCX, SCALE, SSQ ) +* + DO 10, IX = 1, 1 + ( N - 1 )*INCX, INCX + IF( REAL( X( IX ) ).NE.ZERO )THEN + TEMP = ABS( REAL( X( IX ) ) ) + IF( SCALE.LT.TEMP )THEN + SSQ = ONE + SSQ*( SCALE/TEMP )**2 + SCALE = TEMP + ELSE + SSQ = SSQ + ( TEMP/SCALE )**2 + END IF + END IF + IF( AIMAG( X( IX ) ).NE.ZERO )THEN + TEMP = ABS( AIMAG( X( IX ) ) ) + IF( SCALE.LT.TEMP )THEN + SSQ = ONE + SSQ*( SCALE/TEMP )**2 + SCALE = TEMP + ELSE + SSQ = SSQ + ( TEMP/SCALE )**2 + END IF + END IF + 10 CONTINUE + NORM = SCALE * SQRT( SSQ ) + END IF +* + SCNRM2 = NORM + RETURN +* +* End of SCNRM2. +* + END diff --git a/costa/native/external/blas/scopy.f b/costa/native/external/blas/scopy.f new file mode 100644 index 000000000..0202778ca --- /dev/null +++ b/costa/native/external/blas/scopy.f @@ -0,0 +1,50 @@ + subroutine scopy(n,sx,incx,sy,incy) +c +c copies a vector, x, to a vector, y. +c uses unrolled loops for increments equal to 1. +c jack dongarra, linpack, 3/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + real sx(*),sy(*) + integer i,incx,incy,ix,iy,m,mp1,n +c + if(n.le.0)return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments +c not equal to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + sy(iy) = sx(ix) + ix = ix + incx + iy = iy + incy + 10 continue + return +c +c code for both increments equal to 1 +c +c +c clean-up loop +c + 20 m = mod(n,7) + if( m .eq. 0 ) go to 40 + do 30 i = 1,m + sy(i) = sx(i) + 30 continue + if( n .lt. 7 ) return + 40 mp1 = m + 1 + do 50 i = mp1,n,7 + sy(i) = sx(i) + sy(i + 1) = sx(i + 1) + sy(i + 2) = sx(i + 2) + sy(i + 3) = sx(i + 3) + sy(i + 4) = sx(i + 4) + sy(i + 5) = sx(i + 5) + sy(i + 6) = sx(i + 6) + 50 continue + return + end diff --git a/costa/native/external/blas/sdot.f b/costa/native/external/blas/sdot.f new file mode 100644 index 000000000..416d90a59 --- /dev/null +++ b/costa/native/external/blas/sdot.f @@ -0,0 +1,49 @@ + real function sdot(n,sx,incx,sy,incy) +c +c forms the dot product of two vectors. +c uses unrolled loops for increments equal to one. +c jack dongarra, linpack, 3/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + real sx(*),sy(*),stemp + integer i,incx,incy,ix,iy,m,mp1,n +c + stemp = 0.0e0 + sdot = 0.0e0 + if(n.le.0)return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments +c not equal to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + stemp = stemp + sx(ix)*sy(iy) + ix = ix + incx + iy = iy + incy + 10 continue + sdot = stemp + return +c +c code for both increments equal to 1 +c +c +c clean-up loop +c + 20 m = mod(n,5) + if( m .eq. 0 ) go to 40 + do 30 i = 1,m + stemp = stemp + sx(i)*sy(i) + 30 continue + if( n .lt. 5 ) go to 60 + 40 mp1 = m + 1 + do 50 i = mp1,n,5 + stemp = stemp + sx(i)*sy(i) + sx(i + 1)*sy(i + 1) + + * sx(i + 2)*sy(i + 2) + sx(i + 3)*sy(i + 3) + sx(i + 4)*sy(i + 4) + 50 continue + 60 sdot = stemp + return + end diff --git a/costa/native/external/blas/sgbmv.f b/costa/native/external/blas/sgbmv.f new file mode 100644 index 000000000..926abd735 --- /dev/null +++ b/costa/native/external/blas/sgbmv.f @@ -0,0 +1,300 @@ + SUBROUTINE SGBMV ( TRANS, M, N, KL, KU, ALPHA, A, LDA, X, INCX, + $ BETA, Y, INCY ) +* .. Scalar Arguments .. + REAL ALPHA, BETA + INTEGER INCX, INCY, KL, KU, LDA, M, N + CHARACTER*1 TRANS +* .. Array Arguments .. + REAL A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* SGBMV performs one of the matrix-vector operations +* +* y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, +* +* where alpha and beta are scalars, x and y are vectors and A is an +* m by n band matrix, with kl sub-diagonals and ku super-diagonals. +* +* Parameters +* ========== +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' y := alpha*A*x + beta*y. +* +* TRANS = 'T' or 't' y := alpha*A'*x + beta*y. +* +* TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix A. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* KL - INTEGER. +* On entry, KL specifies the number of sub-diagonals of the +* matrix A. KL must satisfy 0 .le. KL. +* Unchanged on exit. +* +* KU - INTEGER. +* On entry, KU specifies the number of super-diagonals of the +* matrix A. KU must satisfy 0 .le. KU. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, n ). +* Before entry, the leading ( kl + ku + 1 ) by n part of the +* array A must contain the matrix of coefficients, supplied +* column by column, with the leading diagonal of the matrix in +* row ( ku + 1 ) of the array, the first super-diagonal +* starting at position 2 in row ku, the first sub-diagonal +* starting at position 1 in row ( ku + 2 ), and so on. +* Elements in the array A that do not correspond to elements +* in the band matrix (such as the top left ku by ku triangle) +* are not referenced. +* The following program segment will transfer a band matrix +* from conventional full matrix storage to band storage: +* +* DO 20, J = 1, N +* K = KU + 1 - J +* DO 10, I = MAX( 1, J - KU ), MIN( M, J + KL ) +* A( K + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* ( kl + ku + 1 ). +* Unchanged on exit. +* +* X - REAL array of DIMENSION at least +* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' +* and at least +* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. +* Before entry, the incremented array X must contain the +* vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - REAL . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then Y need not be set on input. +* Unchanged on exit. +* +* Y - REAL array of DIMENSION at least +* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' +* and at least +* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. +* Before entry, the incremented array Y must contain the +* vector y. On exit, Y is overwritten by the updated vector y. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* .. Parameters .. + REAL ONE , ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. Local Scalars .. + REAL TEMP + INTEGER I, INFO, IX, IY, J, JX, JY, K, KUP1, KX, KY, + $ LENX, LENY +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 1 + ELSE IF( M.LT.0 )THEN + INFO = 2 + ELSE IF( N.LT.0 )THEN + INFO = 3 + ELSE IF( KL.LT.0 )THEN + INFO = 4 + ELSE IF( KU.LT.0 )THEN + INFO = 5 + ELSE IF( LDA.LT.( KL + KU + 1 ) )THEN + INFO = 8 + ELSE IF( INCX.EQ.0 )THEN + INFO = 10 + ELSE IF( INCY.EQ.0 )THEN + INFO = 13 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'SGBMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set LENX and LENY, the lengths of the vectors x and y, and set +* up the start points in X and Y. +* + IF( LSAME( TRANS, 'N' ) )THEN + LENX = N + LENY = M + ELSE + LENX = M + LENY = N + END IF + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( LENX - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( LENY - 1 )*INCY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the band part of A. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE )THEN + IF( INCY.EQ.1 )THEN + IF( BETA.EQ.ZERO )THEN + DO 10, I = 1, LENY + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20, I = 1, LENY + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO )THEN + DO 30, I = 1, LENY + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40, I = 1, LENY + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + KUP1 = KU + 1 + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form y := alpha*A*x + y. +* + JX = KX + IF( INCY.EQ.1 )THEN + DO 60, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + K = KUP1 - J + DO 50, I = MAX( 1, J - KU ), MIN( M, J + KL ) + Y( I ) = Y( I ) + TEMP*A( K + I, J ) + 50 CONTINUE + END IF + JX = JX + INCX + 60 CONTINUE + ELSE + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + IY = KY + K = KUP1 - J + DO 70, I = MAX( 1, J - KU ), MIN( M, J + KL ) + Y( IY ) = Y( IY ) + TEMP*A( K + I, J ) + IY = IY + INCY + 70 CONTINUE + END IF + JX = JX + INCX + IF( J.GT.KU ) + $ KY = KY + INCY + 80 CONTINUE + END IF + ELSE +* +* Form y := alpha*A'*x + y. +* + JY = KY + IF( INCX.EQ.1 )THEN + DO 100, J = 1, N + TEMP = ZERO + K = KUP1 - J + DO 90, I = MAX( 1, J - KU ), MIN( M, J + KL ) + TEMP = TEMP + A( K + I, J )*X( I ) + 90 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP + JY = JY + INCY + 100 CONTINUE + ELSE + DO 120, J = 1, N + TEMP = ZERO + IX = KX + K = KUP1 - J + DO 110, I = MAX( 1, J - KU ), MIN( M, J + KL ) + TEMP = TEMP + A( K + I, J )*X( IX ) + IX = IX + INCX + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP + JY = JY + INCY + IF( J.GT.KU ) + $ KX = KX + INCX + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of SGBMV . +* + END diff --git a/costa/native/external/blas/sgemm.f b/costa/native/external/blas/sgemm.f new file mode 100644 index 000000000..8dc772974 --- /dev/null +++ b/costa/native/external/blas/sgemm.f @@ -0,0 +1,313 @@ + SUBROUTINE SGEMM ( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, + $ BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER*1 TRANSA, TRANSB + INTEGER M, N, K, LDA, LDB, LDC + REAL ALPHA, BETA +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* SGEMM performs one of the matrix-matrix operations +* +* C := alpha*op( A )*op( B ) + beta*C, +* +* where op( X ) is one of +* +* op( X ) = X or op( X ) = X', +* +* alpha and beta are scalars, and A, B and C are matrices, with op( A ) +* an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. +* +* Parameters +* ========== +* +* TRANSA - CHARACTER*1. +* On entry, TRANSA specifies the form of op( A ) to be used in +* the matrix multiplication as follows: +* +* TRANSA = 'N' or 'n', op( A ) = A. +* +* TRANSA = 'T' or 't', op( A ) = A'. +* +* TRANSA = 'C' or 'c', op( A ) = A'. +* +* Unchanged on exit. +* +* TRANSB - CHARACTER*1. +* On entry, TRANSB specifies the form of op( B ) to be used in +* the matrix multiplication as follows: +* +* TRANSB = 'N' or 'n', op( B ) = B. +* +* TRANSB = 'T' or 't', op( B ) = B'. +* +* TRANSB = 'C' or 'c', op( B ) = B'. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix +* op( A ) and of the matrix C. M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix +* op( B ) and the number of columns of the matrix C. N must be +* at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry, K specifies the number of columns of the matrix +* op( A ) and the number of rows of the matrix op( B ). K must +* be at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, ka ), where ka is +* k when TRANSA = 'N' or 'n', and is m otherwise. +* Before entry with TRANSA = 'N' or 'n', the leading m by k +* part of the array A must contain the matrix A, otherwise +* the leading k by m part of the array A must contain the +* matrix A. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When TRANSA = 'N' or 'n' then +* LDA must be at least max( 1, m ), otherwise LDA must be at +* least max( 1, k ). +* Unchanged on exit. +* +* B - REAL array of DIMENSION ( LDB, kb ), where kb is +* n when TRANSB = 'N' or 'n', and is k otherwise. +* Before entry with TRANSB = 'N' or 'n', the leading k by n +* part of the array B must contain the matrix B, otherwise +* the leading n by k part of the array B must contain the +* matrix B. +* Unchanged on exit. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. When TRANSB = 'N' or 'n' then +* LDB must be at least max( 1, k ), otherwise LDB must be at +* least max( 1, n ). +* Unchanged on exit. +* +* BETA - REAL . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then C need not be set on input. +* Unchanged on exit. +* +* C - REAL array of DIMENSION ( LDC, n ). +* Before entry, the leading m by n part of the array C must +* contain the matrix C, except when beta is zero, in which +* case C need not be set on entry. +* On exit, the array C is overwritten by the m by n matrix +* ( alpha*op( A )*op( B ) + beta*C ). +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Local Scalars .. + LOGICAL NOTA, NOTB + INTEGER I, INFO, J, L, NCOLA, NROWA, NROWB + REAL TEMP +* .. Parameters .. + REAL ONE , ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Executable Statements .. +* +* Set NOTA and NOTB as true if A and B respectively are not +* transposed and set NROWA, NCOLA and NROWB as the number of rows +* and columns of A and the number of rows of B respectively. +* + NOTA = LSAME( TRANSA, 'N' ) + NOTB = LSAME( TRANSB, 'N' ) + IF( NOTA )THEN + NROWA = M + NCOLA = K + ELSE + NROWA = K + NCOLA = M + END IF + IF( NOTB )THEN + NROWB = K + ELSE + NROWB = N + END IF +* +* Test the input parameters. +* + INFO = 0 + IF( ( .NOT.NOTA ).AND. + $ ( .NOT.LSAME( TRANSA, 'C' ) ).AND. + $ ( .NOT.LSAME( TRANSA, 'T' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.NOTB ).AND. + $ ( .NOT.LSAME( TRANSB, 'C' ) ).AND. + $ ( .NOT.LSAME( TRANSB, 'T' ) ) )THEN + INFO = 2 + ELSE IF( M .LT.0 )THEN + INFO = 3 + ELSE IF( N .LT.0 )THEN + INFO = 4 + ELSE IF( K .LT.0 )THEN + INFO = 5 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 8 + ELSE IF( LDB.LT.MAX( 1, NROWB ) )THEN + INFO = 10 + ELSE IF( LDC.LT.MAX( 1, M ) )THEN + INFO = 13 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'SGEMM ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* And if alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + IF( BETA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, M + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40, J = 1, N + DO 30, I = 1, M + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + RETURN + END IF +* +* Start the operations. +* + IF( NOTB )THEN + IF( NOTA )THEN +* +* Form C := alpha*A*B + beta*C. +* + DO 90, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 50, I = 1, M + C( I, J ) = ZERO + 50 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 60, I = 1, M + C( I, J ) = BETA*C( I, J ) + 60 CONTINUE + END IF + DO 80, L = 1, K + IF( B( L, J ).NE.ZERO )THEN + TEMP = ALPHA*B( L, J ) + DO 70, I = 1, M + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 70 CONTINUE + END IF + 80 CONTINUE + 90 CONTINUE + ELSE +* +* Form C := alpha*A'*B + beta*C +* + DO 120, J = 1, N + DO 110, I = 1, M + TEMP = ZERO + DO 100, L = 1, K + TEMP = TEMP + A( L, I )*B( L, J ) + 100 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 110 CONTINUE + 120 CONTINUE + END IF + ELSE + IF( NOTA )THEN +* +* Form C := alpha*A*B' + beta*C +* + DO 170, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 130, I = 1, M + C( I, J ) = ZERO + 130 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 140, I = 1, M + C( I, J ) = BETA*C( I, J ) + 140 CONTINUE + END IF + DO 160, L = 1, K + IF( B( J, L ).NE.ZERO )THEN + TEMP = ALPHA*B( J, L ) + DO 150, I = 1, M + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 150 CONTINUE + END IF + 160 CONTINUE + 170 CONTINUE + ELSE +* +* Form C := alpha*A'*B' + beta*C +* + DO 200, J = 1, N + DO 190, I = 1, M + TEMP = ZERO + DO 180, L = 1, K + TEMP = TEMP + A( L, I )*B( J, L ) + 180 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 190 CONTINUE + 200 CONTINUE + END IF + END IF +* + RETURN +* +* End of SGEMM . +* + END diff --git a/costa/native/external/blas/sgemv.f b/costa/native/external/blas/sgemv.f new file mode 100644 index 000000000..4b47f0470 --- /dev/null +++ b/costa/native/external/blas/sgemv.f @@ -0,0 +1,261 @@ + SUBROUTINE SGEMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX, + $ BETA, Y, INCY ) +* .. Scalar Arguments .. + REAL ALPHA, BETA + INTEGER INCX, INCY, LDA, M, N + CHARACTER*1 TRANS +* .. Array Arguments .. + REAL A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* SGEMV performs one of the matrix-vector operations +* +* y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, +* +* where alpha and beta are scalars, x and y are vectors and A is an +* m by n matrix. +* +* Parameters +* ========== +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' y := alpha*A*x + beta*y. +* +* TRANS = 'T' or 't' y := alpha*A'*x + beta*y. +* +* TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix A. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, n ). +* Before entry, the leading m by n part of the array A must +* contain the matrix of coefficients. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, m ). +* Unchanged on exit. +* +* X - REAL array of DIMENSION at least +* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' +* and at least +* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. +* Before entry, the incremented array X must contain the +* vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - REAL . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then Y need not be set on input. +* Unchanged on exit. +* +* Y - REAL array of DIMENSION at least +* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' +* and at least +* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. +* Before entry with BETA non-zero, the incremented array Y +* must contain the vector y. On exit, Y is overwritten by the +* updated vector y. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + REAL ONE , ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. Local Scalars .. + REAL TEMP + INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY, LENX, LENY +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 1 + ELSE IF( M.LT.0 )THEN + INFO = 2 + ELSE IF( N.LT.0 )THEN + INFO = 3 + ELSE IF( LDA.LT.MAX( 1, M ) )THEN + INFO = 6 + ELSE IF( INCX.EQ.0 )THEN + INFO = 8 + ELSE IF( INCY.EQ.0 )THEN + INFO = 11 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'SGEMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set LENX and LENY, the lengths of the vectors x and y, and set +* up the start points in X and Y. +* + IF( LSAME( TRANS, 'N' ) )THEN + LENX = N + LENY = M + ELSE + LENX = M + LENY = N + END IF + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( LENX - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( LENY - 1 )*INCY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE )THEN + IF( INCY.EQ.1 )THEN + IF( BETA.EQ.ZERO )THEN + DO 10, I = 1, LENY + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20, I = 1, LENY + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO )THEN + DO 30, I = 1, LENY + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40, I = 1, LENY + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form y := alpha*A*x + y. +* + JX = KX + IF( INCY.EQ.1 )THEN + DO 60, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + DO 50, I = 1, M + Y( I ) = Y( I ) + TEMP*A( I, J ) + 50 CONTINUE + END IF + JX = JX + INCX + 60 CONTINUE + ELSE + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + IY = KY + DO 70, I = 1, M + Y( IY ) = Y( IY ) + TEMP*A( I, J ) + IY = IY + INCY + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + ELSE +* +* Form y := alpha*A'*x + y. +* + JY = KY + IF( INCX.EQ.1 )THEN + DO 100, J = 1, N + TEMP = ZERO + DO 90, I = 1, M + TEMP = TEMP + A( I, J )*X( I ) + 90 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP + JY = JY + INCY + 100 CONTINUE + ELSE + DO 120, J = 1, N + TEMP = ZERO + IX = KX + DO 110, I = 1, M + TEMP = TEMP + A( I, J )*X( IX ) + IX = IX + INCX + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP + JY = JY + INCY + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of SGEMV . +* + END diff --git a/costa/native/external/blas/sger.f b/costa/native/external/blas/sger.f new file mode 100644 index 000000000..f336b417d --- /dev/null +++ b/costa/native/external/blas/sger.f @@ -0,0 +1,157 @@ + SUBROUTINE SGER ( M, N, ALPHA, X, INCX, Y, INCY, A, LDA ) +* .. Scalar Arguments .. + REAL ALPHA + INTEGER INCX, INCY, LDA, M, N +* .. Array Arguments .. + REAL A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* SGER performs the rank 1 operation +* +* A := alpha*x*y' + A, +* +* where alpha is a scalar, x is an m element vector, y is an n element +* vector and A is an m by n matrix. +* +* Parameters +* ========== +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix A. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - REAL array of dimension at least +* ( 1 + ( m - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the m +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* Y - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. +* Unchanged on exit. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, n ). +* Before entry, the leading m by n part of the array A must +* contain the matrix of coefficients. On exit, A is +* overwritten by the updated matrix. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. Local Scalars .. + REAL TEMP + INTEGER I, INFO, IX, J, JY, KX +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( M.LT.0 )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + ELSE IF( INCY.EQ.0 )THEN + INFO = 7 + ELSE IF( LDA.LT.MAX( 1, M ) )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'SGER ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF( INCY.GT.0 )THEN + JY = 1 + ELSE + JY = 1 - ( N - 1 )*INCY + END IF + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( Y( JY ).NE.ZERO )THEN + TEMP = ALPHA*Y( JY ) + DO 10, I = 1, M + A( I, J ) = A( I, J ) + X( I )*TEMP + 10 CONTINUE + END IF + JY = JY + INCY + 20 CONTINUE + ELSE + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( M - 1 )*INCX + END IF + DO 40, J = 1, N + IF( Y( JY ).NE.ZERO )THEN + TEMP = ALPHA*Y( JY ) + IX = KX + DO 30, I = 1, M + A( I, J ) = A( I, J ) + X( IX )*TEMP + IX = IX + INCX + 30 CONTINUE + END IF + JY = JY + INCY + 40 CONTINUE + END IF +* + RETURN +* +* End of SGER . +* + END diff --git a/costa/native/external/blas/snrm2.f b/costa/native/external/blas/snrm2.f new file mode 100644 index 000000000..81dc0cda6 --- /dev/null +++ b/costa/native/external/blas/snrm2.f @@ -0,0 +1,60 @@ + REAL FUNCTION SNRM2 ( N, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, N +* .. Array Arguments .. + REAL X( * ) +* .. +* +* SNRM2 returns the euclidean norm of a vector via the function +* name, so that +* +* SNRM2 := sqrt( x'*x ) +* +* +* +* -- This version written on 25-October-1982. +* Modified on 14-October-1993 to inline the call to SLASSQ. +* Sven Hammarling, Nag Ltd. +* +* +* .. Parameters .. + REAL ONE , ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. Local Scalars .. + INTEGER IX + REAL ABSXI, NORM, SCALE, SSQ +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. + IF( N.LT.1 .OR. INCX.LT.1 )THEN + NORM = ZERO + ELSE IF( N.EQ.1 )THEN + NORM = ABS( X( 1 ) ) + ELSE + SCALE = ZERO + SSQ = ONE +* The following loop is equivalent to this call to the LAPACK +* auxiliary routine: +* CALL SLASSQ( N, X, INCX, SCALE, SSQ ) +* + DO 10, IX = 1, 1 + ( N - 1 )*INCX, INCX + IF( X( IX ).NE.ZERO )THEN + ABSXI = ABS( X( IX ) ) + IF( SCALE.LT.ABSXI )THEN + SSQ = ONE + SSQ*( SCALE/ABSXI )**2 + SCALE = ABSXI + ELSE + SSQ = SSQ + ( ABSXI/SCALE )**2 + END IF + END IF + 10 CONTINUE + NORM = SCALE * SQRT( SSQ ) + END IF +* + SNRM2 = NORM + RETURN +* +* End of SNRM2. +* + END diff --git a/costa/native/external/blas/srot.f b/costa/native/external/blas/srot.f new file mode 100644 index 000000000..e0ee1e50c --- /dev/null +++ b/costa/native/external/blas/srot.f @@ -0,0 +1,37 @@ + subroutine srot (n,sx,incx,sy,incy,c,s) +c +c applies a plane rotation. +c jack dongarra, linpack, 3/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + real sx(*),sy(*),stemp,c,s + integer i,incx,incy,ix,iy,n +c + if(n.le.0)return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments not equal +c to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + stemp = c*sx(ix) + s*sy(iy) + sy(iy) = c*sy(iy) - s*sx(ix) + sx(ix) = stemp + ix = ix + incx + iy = iy + incy + 10 continue + return +c +c code for both increments equal to 1 +c + 20 do 30 i = 1,n + stemp = c*sx(i) + s*sy(i) + sy(i) = c*sy(i) - s*sx(i) + sx(i) = stemp + 30 continue + return + end diff --git a/costa/native/external/blas/srotg.f b/costa/native/external/blas/srotg.f new file mode 100644 index 000000000..84d1922af --- /dev/null +++ b/costa/native/external/blas/srotg.f @@ -0,0 +1,27 @@ + subroutine srotg(sa,sb,c,s) +c +c construct givens plane rotation. +c jack dongarra, linpack, 3/11/78. +c + real sa,sb,c,s,roe,scale,r,z +c + roe = sb + if( abs(sa) .gt. abs(sb) ) roe = sa + scale = abs(sa) + abs(sb) + if( scale .ne. 0.0 ) go to 10 + c = 1.0 + s = 0.0 + r = 0.0 + z = 0.0 + go to 20 + 10 r = scale*sqrt((sa/scale)**2 + (sb/scale)**2) + r = sign(1.0,roe)*r + c = sa/r + s = sb/r + z = 1.0 + if( abs(sa) .gt. abs(sb) ) z = s + if( abs(sb) .ge. abs(sa) .and. c .ne. 0.0 ) z = 1.0/c + 20 sa = r + sb = z + return + end diff --git a/costa/native/external/blas/ssbmv.f b/costa/native/external/blas/ssbmv.f new file mode 100644 index 000000000..7761be874 --- /dev/null +++ b/costa/native/external/blas/ssbmv.f @@ -0,0 +1,303 @@ + SUBROUTINE SSBMV ( UPLO, N, K, ALPHA, A, LDA, X, INCX, + $ BETA, Y, INCY ) +* .. Scalar Arguments .. + REAL ALPHA, BETA + INTEGER INCX, INCY, K, LDA, N + CHARACTER*1 UPLO +* .. Array Arguments .. + REAL A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* SSBMV performs the matrix-vector operation +* +* y := alpha*A*x + beta*y, +* +* where alpha and beta are scalars, x and y are n element vectors and +* A is an n by n symmetric band matrix, with k super-diagonals. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the band matrix A is being supplied as +* follows: +* +* UPLO = 'U' or 'u' The upper triangular part of A is +* being supplied. +* +* UPLO = 'L' or 'l' The lower triangular part of A is +* being supplied. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry, K specifies the number of super-diagonals of the +* matrix A. K must satisfy 0 .le. K. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) +* by n part of the array A must contain the upper triangular +* band part of the symmetric matrix, supplied column by +* column, with the leading diagonal of the matrix in row +* ( k + 1 ) of the array, the first super-diagonal starting at +* position 2 in row k, and so on. The top left k by k triangle +* of the array A is not referenced. +* The following program segment will transfer the upper +* triangular part of a symmetric band matrix from conventional +* full matrix storage to band storage: +* +* DO 20, J = 1, N +* M = K + 1 - J +* DO 10, I = MAX( 1, J - K ), J +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) +* by n part of the array A must contain the lower triangular +* band part of the symmetric matrix, supplied column by +* column, with the leading diagonal of the matrix in row 1 of +* the array, the first sub-diagonal starting at position 1 in +* row 2, and so on. The bottom right k by k triangle of the +* array A is not referenced. +* The following program segment will transfer the lower +* triangular part of a symmetric band matrix from conventional +* full matrix storage to band storage: +* +* DO 20, J = 1, N +* M = 1 - J +* DO 10, I = J, MIN( N, J + K ) +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* ( k + 1 ). +* Unchanged on exit. +* +* X - REAL array of DIMENSION at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the +* vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - REAL . +* On entry, BETA specifies the scalar beta. +* Unchanged on exit. +* +* Y - REAL array of DIMENSION at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the +* vector y. On exit, Y is overwritten by the updated vector y. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + REAL ONE , ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. Local Scalars .. + REAL TEMP1, TEMP2 + INTEGER I, INFO, IX, IY, J, JX, JY, KPLUS1, KX, KY, L +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( K.LT.0 )THEN + INFO = 3 + ELSE IF( LDA.LT.( K + 1 ) )THEN + INFO = 6 + ELSE IF( INCX.EQ.0 )THEN + INFO = 8 + ELSE IF( INCY.EQ.0 )THEN + INFO = 11 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'SSBMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set up the start points in X and Y. +* + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF +* +* Start the operations. In this version the elements of the array A +* are accessed sequentially with one pass through A. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE )THEN + IF( INCY.EQ.1 )THEN + IF( BETA.EQ.ZERO )THEN + DO 10, I = 1, N + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20, I = 1, N + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO )THEN + DO 30, I = 1, N + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40, I = 1, N + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form y when upper triangle of A is stored. +* + KPLUS1 = K + 1 + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 60, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + L = KPLUS1 - J + DO 50, I = MAX( 1, J - K ), J - 1 + Y( I ) = Y( I ) + TEMP1*A( L + I, J ) + TEMP2 = TEMP2 + A( L + I, J )*X( I ) + 50 CONTINUE + Y( J ) = Y( J ) + TEMP1*A( KPLUS1, J ) + ALPHA*TEMP2 + 60 CONTINUE + ELSE + JX = KX + JY = KY + DO 80, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + IX = KX + IY = KY + L = KPLUS1 - J + DO 70, I = MAX( 1, J - K ), J - 1 + Y( IY ) = Y( IY ) + TEMP1*A( L + I, J ) + TEMP2 = TEMP2 + A( L + I, J )*X( IX ) + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + Y( JY ) = Y( JY ) + TEMP1*A( KPLUS1, J ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + IF( J.GT.K )THEN + KX = KX + INCX + KY = KY + INCY + END IF + 80 CONTINUE + END IF + ELSE +* +* Form y when lower triangle of A is stored. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 100, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + Y( J ) = Y( J ) + TEMP1*A( 1, J ) + L = 1 - J + DO 90, I = J + 1, MIN( N, J + K ) + Y( I ) = Y( I ) + TEMP1*A( L + I, J ) + TEMP2 = TEMP2 + A( L + I, J )*X( I ) + 90 CONTINUE + Y( J ) = Y( J ) + ALPHA*TEMP2 + 100 CONTINUE + ELSE + JX = KX + JY = KY + DO 120, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + Y( JY ) = Y( JY ) + TEMP1*A( 1, J ) + L = 1 - J + IX = JX + IY = JY + DO 110, I = J + 1, MIN( N, J + K ) + IX = IX + INCX + IY = IY + INCY + Y( IY ) = Y( IY ) + TEMP1*A( L + I, J ) + TEMP2 = TEMP2 + A( L + I, J )*X( IX ) + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of SSBMV . +* + END diff --git a/costa/native/external/blas/sscal.f b/costa/native/external/blas/sscal.f new file mode 100644 index 000000000..ac7ee0010 --- /dev/null +++ b/costa/native/external/blas/sscal.f @@ -0,0 +1,43 @@ + subroutine sscal(n,sa,sx,incx) +c +c scales a vector by a constant. +c uses unrolled loops for increment equal to 1. +c jack dongarra, linpack, 3/11/78. +c modified 3/93 to return if incx .le. 0. +c modified 12/3/93, array(1) declarations changed to array(*) +c + real sa,sx(*) + integer i,incx,m,mp1,n,nincx +c + if( n.le.0 .or. incx.le.0 )return + if(incx.eq.1)go to 20 +c +c code for increment not equal to 1 +c + nincx = n*incx + do 10 i = 1,nincx,incx + sx(i) = sa*sx(i) + 10 continue + return +c +c code for increment equal to 1 +c +c +c clean-up loop +c + 20 m = mod(n,5) + if( m .eq. 0 ) go to 40 + do 30 i = 1,m + sx(i) = sa*sx(i) + 30 continue + if( n .lt. 5 ) return + 40 mp1 = m + 1 + do 50 i = mp1,n,5 + sx(i) = sa*sx(i) + sx(i + 1) = sa*sx(i + 1) + sx(i + 2) = sa*sx(i + 2) + sx(i + 3) = sa*sx(i + 3) + sx(i + 4) = sa*sx(i + 4) + 50 continue + return + end diff --git a/costa/native/external/blas/sspmv.f b/costa/native/external/blas/sspmv.f new file mode 100644 index 000000000..f7bd25e11 --- /dev/null +++ b/costa/native/external/blas/sspmv.f @@ -0,0 +1,262 @@ + SUBROUTINE SSPMV ( UPLO, N, ALPHA, AP, X, INCX, BETA, Y, INCY ) +* .. Scalar Arguments .. + REAL ALPHA, BETA + INTEGER INCX, INCY, N + CHARACTER*1 UPLO +* .. Array Arguments .. + REAL AP( * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* SSPMV performs the matrix-vector operation +* +* y := alpha*A*x + beta*y, +* +* where alpha and beta are scalars, x and y are n element vectors and +* A is an n by n symmetric matrix, supplied in packed form. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the matrix A is supplied in the packed +* array AP as follows: +* +* UPLO = 'U' or 'u' The upper triangular part of A is +* supplied in AP. +* +* UPLO = 'L' or 'l' The lower triangular part of A is +* supplied in AP. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* AP - REAL array of DIMENSION at least +* ( ( n*( n + 1 ) )/2 ). +* Before entry with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular part of the symmetric matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) +* and a( 2, 2 ) respectively, and so on. +* Before entry with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular part of the symmetric matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) +* and a( 3, 1 ) respectively, and so on. +* Unchanged on exit. +* +* X - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - REAL . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then Y need not be set on input. +* Unchanged on exit. +* +* Y - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. On exit, Y is overwritten by the updated +* vector y. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + REAL ONE , ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. Local Scalars .. + REAL TEMP1, TEMP2 + INTEGER I, INFO, IX, IY, J, JX, JY, K, KK, KX, KY +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 6 + ELSE IF( INCY.EQ.0 )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'SSPMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set up the start points in X and Y. +* + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF +* +* Start the operations. In this version the elements of the array AP +* are accessed sequentially with one pass through AP. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE )THEN + IF( INCY.EQ.1 )THEN + IF( BETA.EQ.ZERO )THEN + DO 10, I = 1, N + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20, I = 1, N + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO )THEN + DO 30, I = 1, N + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40, I = 1, N + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + KK = 1 + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form y when AP contains the upper triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 60, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + K = KK + DO 50, I = 1, J - 1 + Y( I ) = Y( I ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + AP( K )*X( I ) + K = K + 1 + 50 CONTINUE + Y( J ) = Y( J ) + TEMP1*AP( KK + J - 1 ) + ALPHA*TEMP2 + KK = KK + J + 60 CONTINUE + ELSE + JX = KX + JY = KY + DO 80, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + IX = KX + IY = KY + DO 70, K = KK, KK + J - 2 + Y( IY ) = Y( IY ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + AP( K )*X( IX ) + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + Y( JY ) = Y( JY ) + TEMP1*AP( KK + J - 1 ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + KK = KK + J + 80 CONTINUE + END IF + ELSE +* +* Form y when AP contains the lower triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 100, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + Y( J ) = Y( J ) + TEMP1*AP( KK ) + K = KK + 1 + DO 90, I = J + 1, N + Y( I ) = Y( I ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + AP( K )*X( I ) + K = K + 1 + 90 CONTINUE + Y( J ) = Y( J ) + ALPHA*TEMP2 + KK = KK + ( N - J + 1 ) + 100 CONTINUE + ELSE + JX = KX + JY = KY + DO 120, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + Y( JY ) = Y( JY ) + TEMP1*AP( KK ) + IX = JX + IY = JY + DO 110, K = KK + 1, KK + N - J + IX = IX + INCX + IY = IY + INCY + Y( IY ) = Y( IY ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + AP( K )*X( IX ) + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + KK = KK + ( N - J + 1 ) + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of SSPMV . +* + END diff --git a/costa/native/external/blas/sspr.f b/costa/native/external/blas/sspr.f new file mode 100644 index 000000000..e3865daba --- /dev/null +++ b/costa/native/external/blas/sspr.f @@ -0,0 +1,198 @@ + SUBROUTINE SSPR ( UPLO, N, ALPHA, X, INCX, AP ) +* .. Scalar Arguments .. + REAL ALPHA + INTEGER INCX, N + CHARACTER*1 UPLO +* .. Array Arguments .. + REAL AP( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* SSPR performs the symmetric rank 1 operation +* +* A := alpha*x*x' + A, +* +* where alpha is a real scalar, x is an n element vector and A is an +* n by n symmetric matrix, supplied in packed form. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the matrix A is supplied in the packed +* array AP as follows: +* +* UPLO = 'U' or 'u' The upper triangular part of A is +* supplied in AP. +* +* UPLO = 'L' or 'l' The lower triangular part of A is +* supplied in AP. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* AP - REAL array of DIMENSION at least +* ( ( n*( n + 1 ) )/2 ). +* Before entry with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular part of the symmetric matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) +* and a( 2, 2 ) respectively, and so on. On exit, the array +* AP is overwritten by the upper triangular part of the +* updated matrix. +* Before entry with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular part of the symmetric matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) +* and a( 3, 1 ) respectively, and so on. On exit, the array +* AP is overwritten by the lower triangular part of the +* updated matrix. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. Local Scalars .. + REAL TEMP + INTEGER I, INFO, IX, J, JX, K, KK, KX +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'SSPR ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Set the start point in X if the increment is not unity. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of the array AP +* are accessed sequentially with one pass through AP. +* + KK = 1 + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form A when upper triangle is stored in AP. +* + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = ALPHA*X( J ) + K = KK + DO 10, I = 1, J + AP( K ) = AP( K ) + X( I )*TEMP + K = K + 1 + 10 CONTINUE + END IF + KK = KK + J + 20 CONTINUE + ELSE + JX = KX + DO 40, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + IX = KX + DO 30, K = KK, KK + J - 1 + AP( K ) = AP( K ) + X( IX )*TEMP + IX = IX + INCX + 30 CONTINUE + END IF + JX = JX + INCX + KK = KK + J + 40 CONTINUE + END IF + ELSE +* +* Form A when lower triangle is stored in AP. +* + IF( INCX.EQ.1 )THEN + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = ALPHA*X( J ) + K = KK + DO 50, I = J, N + AP( K ) = AP( K ) + X( I )*TEMP + K = K + 1 + 50 CONTINUE + END IF + KK = KK + N - J + 1 + 60 CONTINUE + ELSE + JX = KX + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + IX = JX + DO 70, K = KK, KK + N - J + AP( K ) = AP( K ) + X( IX )*TEMP + IX = IX + INCX + 70 CONTINUE + END IF + JX = JX + INCX + KK = KK + N - J + 1 + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of SSPR . +* + END diff --git a/costa/native/external/blas/sspr2.f b/costa/native/external/blas/sspr2.f new file mode 100644 index 000000000..8be0098b1 --- /dev/null +++ b/costa/native/external/blas/sspr2.f @@ -0,0 +1,229 @@ + SUBROUTINE SSPR2 ( UPLO, N, ALPHA, X, INCX, Y, INCY, AP ) +* .. Scalar Arguments .. + REAL ALPHA + INTEGER INCX, INCY, N + CHARACTER*1 UPLO +* .. Array Arguments .. + REAL AP( * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* SSPR2 performs the symmetric rank 2 operation +* +* A := alpha*x*y' + alpha*y*x' + A, +* +* where alpha is a scalar, x and y are n element vectors and A is an +* n by n symmetric matrix, supplied in packed form. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the matrix A is supplied in the packed +* array AP as follows: +* +* UPLO = 'U' or 'u' The upper triangular part of A is +* supplied in AP. +* +* UPLO = 'L' or 'l' The lower triangular part of A is +* supplied in AP. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* Y - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. +* Unchanged on exit. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* AP - REAL array of DIMENSION at least +* ( ( n*( n + 1 ) )/2 ). +* Before entry with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular part of the symmetric matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) +* and a( 2, 2 ) respectively, and so on. On exit, the array +* AP is overwritten by the upper triangular part of the +* updated matrix. +* Before entry with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular part of the symmetric matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) +* and a( 3, 1 ) respectively, and so on. On exit, the array +* AP is overwritten by the lower triangular part of the +* updated matrix. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. Local Scalars .. + REAL TEMP1, TEMP2 + INTEGER I, INFO, IX, IY, J, JX, JY, K, KK, KX, KY +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + ELSE IF( INCY.EQ.0 )THEN + INFO = 7 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'SSPR2 ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Set up the start points in X and Y if the increments are not both +* unity. +* + IF( ( INCX.NE.1 ).OR.( INCY.NE.1 ) )THEN + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF + JX = KX + JY = KY + END IF +* +* Start the operations. In this version the elements of the array AP +* are accessed sequentially with one pass through AP. +* + KK = 1 + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form A when upper triangle is stored in AP. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 20, J = 1, N + IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN + TEMP1 = ALPHA*Y( J ) + TEMP2 = ALPHA*X( J ) + K = KK + DO 10, I = 1, J + AP( K ) = AP( K ) + X( I )*TEMP1 + Y( I )*TEMP2 + K = K + 1 + 10 CONTINUE + END IF + KK = KK + J + 20 CONTINUE + ELSE + DO 40, J = 1, N + IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN + TEMP1 = ALPHA*Y( JY ) + TEMP2 = ALPHA*X( JX ) + IX = KX + IY = KY + DO 30, K = KK, KK + J - 1 + AP( K ) = AP( K ) + X( IX )*TEMP1 + Y( IY )*TEMP2 + IX = IX + INCX + IY = IY + INCY + 30 CONTINUE + END IF + JX = JX + INCX + JY = JY + INCY + KK = KK + J + 40 CONTINUE + END IF + ELSE +* +* Form A when lower triangle is stored in AP. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 60, J = 1, N + IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN + TEMP1 = ALPHA*Y( J ) + TEMP2 = ALPHA*X( J ) + K = KK + DO 50, I = J, N + AP( K ) = AP( K ) + X( I )*TEMP1 + Y( I )*TEMP2 + K = K + 1 + 50 CONTINUE + END IF + KK = KK + N - J + 1 + 60 CONTINUE + ELSE + DO 80, J = 1, N + IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN + TEMP1 = ALPHA*Y( JY ) + TEMP2 = ALPHA*X( JX ) + IX = JX + IY = JY + DO 70, K = KK, KK + N - J + AP( K ) = AP( K ) + X( IX )*TEMP1 + Y( IY )*TEMP2 + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + END IF + JX = JX + INCX + JY = JY + INCY + KK = KK + N - J + 1 + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of SSPR2 . +* + END diff --git a/costa/native/external/blas/sswap.f b/costa/native/external/blas/sswap.f new file mode 100644 index 000000000..ef0722272 --- /dev/null +++ b/costa/native/external/blas/sswap.f @@ -0,0 +1,56 @@ + subroutine sswap (n,sx,incx,sy,incy) +c +c interchanges two vectors. +c uses unrolled loops for increments equal to 1. +c jack dongarra, linpack, 3/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + real sx(*),sy(*),stemp + integer i,incx,incy,ix,iy,m,mp1,n +c + if(n.le.0)return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments not equal +c to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + stemp = sx(ix) + sx(ix) = sy(iy) + sy(iy) = stemp + ix = ix + incx + iy = iy + incy + 10 continue + return +c +c code for both increments equal to 1 +c +c +c clean-up loop +c + 20 m = mod(n,3) + if( m .eq. 0 ) go to 40 + do 30 i = 1,m + stemp = sx(i) + sx(i) = sy(i) + sy(i) = stemp + 30 continue + if( n .lt. 3 ) return + 40 mp1 = m + 1 + do 50 i = mp1,n,3 + stemp = sx(i) + sx(i) = sy(i) + sy(i) = stemp + stemp = sx(i + 1) + sx(i + 1) = sy(i + 1) + sy(i + 1) = stemp + stemp = sx(i + 2) + sx(i + 2) = sy(i + 2) + sy(i + 2) = stemp + 50 continue + return + end diff --git a/costa/native/external/blas/ssymm.f b/costa/native/external/blas/ssymm.f new file mode 100644 index 000000000..861bef906 --- /dev/null +++ b/costa/native/external/blas/ssymm.f @@ -0,0 +1,294 @@ + SUBROUTINE SSYMM ( SIDE, UPLO, M, N, ALPHA, A, LDA, B, LDB, + $ BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER*1 SIDE, UPLO + INTEGER M, N, LDA, LDB, LDC + REAL ALPHA, BETA +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* SSYMM performs one of the matrix-matrix operations +* +* C := alpha*A*B + beta*C, +* +* or +* +* C := alpha*B*A + beta*C, +* +* where alpha and beta are scalars, A is a symmetric matrix and B and +* C are m by n matrices. +* +* Parameters +* ========== +* +* SIDE - CHARACTER*1. +* On entry, SIDE specifies whether the symmetric matrix A +* appears on the left or right in the operation as follows: +* +* SIDE = 'L' or 'l' C := alpha*A*B + beta*C, +* +* SIDE = 'R' or 'r' C := alpha*B*A + beta*C, +* +* Unchanged on exit. +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the symmetric matrix A is to be +* referenced as follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of the +* symmetric matrix is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of the +* symmetric matrix is to be referenced. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix C. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix C. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, ka ), where ka is +* m when SIDE = 'L' or 'l' and is n otherwise. +* Before entry with SIDE = 'L' or 'l', the m by m part of +* the array A must contain the symmetric matrix, such that +* when UPLO = 'U' or 'u', the leading m by m upper triangular +* part of the array A must contain the upper triangular part +* of the symmetric matrix and the strictly lower triangular +* part of A is not referenced, and when UPLO = 'L' or 'l', +* the leading m by m lower triangular part of the array A +* must contain the lower triangular part of the symmetric +* matrix and the strictly upper triangular part of A is not +* referenced. +* Before entry with SIDE = 'R' or 'r', the n by n part of +* the array A must contain the symmetric matrix, such that +* when UPLO = 'U' or 'u', the leading n by n upper triangular +* part of the array A must contain the upper triangular part +* of the symmetric matrix and the strictly lower triangular +* part of A is not referenced, and when UPLO = 'L' or 'l', +* the leading n by n lower triangular part of the array A +* must contain the lower triangular part of the symmetric +* matrix and the strictly upper triangular part of A is not +* referenced. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When SIDE = 'L' or 'l' then +* LDA must be at least max( 1, m ), otherwise LDA must be at +* least max( 1, n ). +* Unchanged on exit. +* +* B - REAL array of DIMENSION ( LDB, n ). +* Before entry, the leading m by n part of the array B must +* contain the matrix B. +* Unchanged on exit. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. LDB must be at least +* max( 1, m ). +* Unchanged on exit. +* +* BETA - REAL . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then C need not be set on input. +* Unchanged on exit. +* +* C - REAL array of DIMENSION ( LDC, n ). +* Before entry, the leading m by n part of the array C must +* contain the matrix C, except when beta is zero, in which +* case C need not be set on entry. +* On exit, the array C is overwritten by the m by n updated +* matrix. +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, INFO, J, K, NROWA + REAL TEMP1, TEMP2 +* .. Parameters .. + REAL ONE , ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Executable Statements .. +* +* Set NROWA as the number of rows of A. +* + IF( LSAME( SIDE, 'L' ) )THEN + NROWA = M + ELSE + NROWA = N + END IF + UPPER = LSAME( UPLO, 'U' ) +* +* Test the input parameters. +* + INFO = 0 + IF( ( .NOT.LSAME( SIDE, 'L' ) ).AND. + $ ( .NOT.LSAME( SIDE, 'R' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO, 'L' ) ) )THEN + INFO = 2 + ELSE IF( M .LT.0 )THEN + INFO = 3 + ELSE IF( N .LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 7 + ELSE IF( LDB.LT.MAX( 1, M ) )THEN + INFO = 9 + ELSE IF( LDC.LT.MAX( 1, M ) )THEN + INFO = 12 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'SSYMM ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + IF( BETA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, M + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40, J = 1, N + DO 30, I = 1, M + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + RETURN + END IF +* +* Start the operations. +* + IF( LSAME( SIDE, 'L' ) )THEN +* +* Form C := alpha*A*B + beta*C. +* + IF( UPPER )THEN + DO 70, J = 1, N + DO 60, I = 1, M + TEMP1 = ALPHA*B( I, J ) + TEMP2 = ZERO + DO 50, K = 1, I - 1 + C( K, J ) = C( K, J ) + TEMP1 *A( K, I ) + TEMP2 = TEMP2 + B( K, J )*A( K, I ) + 50 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = TEMP1*A( I, I ) + ALPHA*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ TEMP1*A( I, I ) + ALPHA*TEMP2 + END IF + 60 CONTINUE + 70 CONTINUE + ELSE + DO 100, J = 1, N + DO 90, I = M, 1, -1 + TEMP1 = ALPHA*B( I, J ) + TEMP2 = ZERO + DO 80, K = I + 1, M + C( K, J ) = C( K, J ) + TEMP1 *A( K, I ) + TEMP2 = TEMP2 + B( K, J )*A( K, I ) + 80 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = TEMP1*A( I, I ) + ALPHA*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ TEMP1*A( I, I ) + ALPHA*TEMP2 + END IF + 90 CONTINUE + 100 CONTINUE + END IF + ELSE +* +* Form C := alpha*B*A + beta*C. +* + DO 170, J = 1, N + TEMP1 = ALPHA*A( J, J ) + IF( BETA.EQ.ZERO )THEN + DO 110, I = 1, M + C( I, J ) = TEMP1*B( I, J ) + 110 CONTINUE + ELSE + DO 120, I = 1, M + C( I, J ) = BETA*C( I, J ) + TEMP1*B( I, J ) + 120 CONTINUE + END IF + DO 140, K = 1, J - 1 + IF( UPPER )THEN + TEMP1 = ALPHA*A( K, J ) + ELSE + TEMP1 = ALPHA*A( J, K ) + END IF + DO 130, I = 1, M + C( I, J ) = C( I, J ) + TEMP1*B( I, K ) + 130 CONTINUE + 140 CONTINUE + DO 160, K = J + 1, N + IF( UPPER )THEN + TEMP1 = ALPHA*A( J, K ) + ELSE + TEMP1 = ALPHA*A( K, J ) + END IF + DO 150, I = 1, M + C( I, J ) = C( I, J ) + TEMP1*B( I, K ) + 150 CONTINUE + 160 CONTINUE + 170 CONTINUE + END IF +* + RETURN +* +* End of SSYMM . +* + END diff --git a/costa/native/external/blas/ssymv.f b/costa/native/external/blas/ssymv.f new file mode 100644 index 000000000..9819ba802 --- /dev/null +++ b/costa/native/external/blas/ssymv.f @@ -0,0 +1,262 @@ + SUBROUTINE SSYMV ( UPLO, N, ALPHA, A, LDA, X, INCX, + $ BETA, Y, INCY ) +* .. Scalar Arguments .. + REAL ALPHA, BETA + INTEGER INCX, INCY, LDA, N + CHARACTER*1 UPLO +* .. Array Arguments .. + REAL A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* SSYMV performs the matrix-vector operation +* +* y := alpha*A*x + beta*y, +* +* where alpha and beta are scalars, x and y are n element vectors and +* A is an n by n symmetric matrix. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array A is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of A +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of A +* is to be referenced. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular part of the symmetric matrix and the strictly +* lower triangular part of A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular part of the symmetric matrix and the strictly +* upper triangular part of A is not referenced. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* X - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - REAL . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then Y need not be set on input. +* Unchanged on exit. +* +* Y - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. On exit, Y is overwritten by the updated +* vector y. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + REAL ONE , ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. Local Scalars .. + REAL TEMP1, TEMP2 + INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 5 + ELSE IF( INCX.EQ.0 )THEN + INFO = 7 + ELSE IF( INCY.EQ.0 )THEN + INFO = 10 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'SSYMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set up the start points in X and Y. +* + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE )THEN + IF( INCY.EQ.1 )THEN + IF( BETA.EQ.ZERO )THEN + DO 10, I = 1, N + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20, I = 1, N + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO )THEN + DO 30, I = 1, N + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40, I = 1, N + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form y when A is stored in upper triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 60, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + DO 50, I = 1, J - 1 + Y( I ) = Y( I ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + A( I, J )*X( I ) + 50 CONTINUE + Y( J ) = Y( J ) + TEMP1*A( J, J ) + ALPHA*TEMP2 + 60 CONTINUE + ELSE + JX = KX + JY = KY + DO 80, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + IX = KX + IY = KY + DO 70, I = 1, J - 1 + Y( IY ) = Y( IY ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + A( I, J )*X( IX ) + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + Y( JY ) = Y( JY ) + TEMP1*A( J, J ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + 80 CONTINUE + END IF + ELSE +* +* Form y when A is stored in lower triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 100, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + Y( J ) = Y( J ) + TEMP1*A( J, J ) + DO 90, I = J + 1, N + Y( I ) = Y( I ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + A( I, J )*X( I ) + 90 CONTINUE + Y( J ) = Y( J ) + ALPHA*TEMP2 + 100 CONTINUE + ELSE + JX = KX + JY = KY + DO 120, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + Y( JY ) = Y( JY ) + TEMP1*A( J, J ) + IX = JX + IY = JY + DO 110, I = J + 1, N + IX = IX + INCX + IY = IY + INCY + Y( IY ) = Y( IY ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + A( I, J )*X( IX ) + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of SSYMV . +* + END diff --git a/costa/native/external/blas/ssyr.f b/costa/native/external/blas/ssyr.f new file mode 100644 index 000000000..964872884 --- /dev/null +++ b/costa/native/external/blas/ssyr.f @@ -0,0 +1,197 @@ + SUBROUTINE SSYR ( UPLO, N, ALPHA, X, INCX, A, LDA ) +* .. Scalar Arguments .. + REAL ALPHA + INTEGER INCX, LDA, N + CHARACTER*1 UPLO +* .. Array Arguments .. + REAL A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* SSYR performs the symmetric rank 1 operation +* +* A := alpha*x*x' + A, +* +* where alpha is a real scalar, x is an n element vector and A is an +* n by n symmetric matrix. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array A is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of A +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of A +* is to be referenced. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular part of the symmetric matrix and the strictly +* lower triangular part of A is not referenced. On exit, the +* upper triangular part of the array A is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular part of the symmetric matrix and the strictly +* upper triangular part of A is not referenced. On exit, the +* lower triangular part of the array A is overwritten by the +* lower triangular part of the updated matrix. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. Local Scalars .. + REAL TEMP + INTEGER I, INFO, IX, J, JX, KX +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 7 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'SSYR ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Set the start point in X if the increment is not unity. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form A when A is stored in upper triangle. +* + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = ALPHA*X( J ) + DO 10, I = 1, J + A( I, J ) = A( I, J ) + X( I )*TEMP + 10 CONTINUE + END IF + 20 CONTINUE + ELSE + JX = KX + DO 40, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + IX = KX + DO 30, I = 1, J + A( I, J ) = A( I, J ) + X( IX )*TEMP + IX = IX + INCX + 30 CONTINUE + END IF + JX = JX + INCX + 40 CONTINUE + END IF + ELSE +* +* Form A when A is stored in lower triangle. +* + IF( INCX.EQ.1 )THEN + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = ALPHA*X( J ) + DO 50, I = J, N + A( I, J ) = A( I, J ) + X( I )*TEMP + 50 CONTINUE + END IF + 60 CONTINUE + ELSE + JX = KX + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + IX = JX + DO 70, I = J, N + A( I, J ) = A( I, J ) + X( IX )*TEMP + IX = IX + INCX + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of SSYR . +* + END diff --git a/costa/native/external/blas/ssyr2.f b/costa/native/external/blas/ssyr2.f new file mode 100644 index 000000000..ac20fc9d4 --- /dev/null +++ b/costa/native/external/blas/ssyr2.f @@ -0,0 +1,230 @@ + SUBROUTINE SSYR2 ( UPLO, N, ALPHA, X, INCX, Y, INCY, A, LDA ) +* .. Scalar Arguments .. + REAL ALPHA + INTEGER INCX, INCY, LDA, N + CHARACTER*1 UPLO +* .. Array Arguments .. + REAL A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* SSYR2 performs the symmetric rank 2 operation +* +* A := alpha*x*y' + alpha*y*x' + A, +* +* where alpha is a scalar, x and y are n element vectors and A is an n +* by n symmetric matrix. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array A is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of A +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of A +* is to be referenced. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* Y - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. +* Unchanged on exit. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular part of the symmetric matrix and the strictly +* lower triangular part of A is not referenced. On exit, the +* upper triangular part of the array A is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular part of the symmetric matrix and the strictly +* upper triangular part of A is not referenced. On exit, the +* lower triangular part of the array A is overwritten by the +* lower triangular part of the updated matrix. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. Local Scalars .. + REAL TEMP1, TEMP2 + INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + ELSE IF( INCY.EQ.0 )THEN + INFO = 7 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'SSYR2 ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Set up the start points in X and Y if the increments are not both +* unity. +* + IF( ( INCX.NE.1 ).OR.( INCY.NE.1 ) )THEN + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF + JX = KX + JY = KY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form A when A is stored in the upper triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 20, J = 1, N + IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN + TEMP1 = ALPHA*Y( J ) + TEMP2 = ALPHA*X( J ) + DO 10, I = 1, J + A( I, J ) = A( I, J ) + X( I )*TEMP1 + Y( I )*TEMP2 + 10 CONTINUE + END IF + 20 CONTINUE + ELSE + DO 40, J = 1, N + IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN + TEMP1 = ALPHA*Y( JY ) + TEMP2 = ALPHA*X( JX ) + IX = KX + IY = KY + DO 30, I = 1, J + A( I, J ) = A( I, J ) + X( IX )*TEMP1 + $ + Y( IY )*TEMP2 + IX = IX + INCX + IY = IY + INCY + 30 CONTINUE + END IF + JX = JX + INCX + JY = JY + INCY + 40 CONTINUE + END IF + ELSE +* +* Form A when A is stored in the lower triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 60, J = 1, N + IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN + TEMP1 = ALPHA*Y( J ) + TEMP2 = ALPHA*X( J ) + DO 50, I = J, N + A( I, J ) = A( I, J ) + X( I )*TEMP1 + Y( I )*TEMP2 + 50 CONTINUE + END IF + 60 CONTINUE + ELSE + DO 80, J = 1, N + IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN + TEMP1 = ALPHA*Y( JY ) + TEMP2 = ALPHA*X( JX ) + IX = JX + IY = JY + DO 70, I = J, N + A( I, J ) = A( I, J ) + X( IX )*TEMP1 + $ + Y( IY )*TEMP2 + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + END IF + JX = JX + INCX + JY = JY + INCY + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of SSYR2 . +* + END diff --git a/costa/native/external/blas/ssyr2k.f b/costa/native/external/blas/ssyr2k.f new file mode 100644 index 000000000..d269164d3 --- /dev/null +++ b/costa/native/external/blas/ssyr2k.f @@ -0,0 +1,327 @@ + SUBROUTINE SSYR2K( UPLO, TRANS, N, K, ALPHA, A, LDA, B, LDB, + $ BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER*1 UPLO, TRANS + INTEGER N, K, LDA, LDB, LDC + REAL ALPHA, BETA +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* SSYR2K performs one of the symmetric rank 2k operations +* +* C := alpha*A*B' + alpha*B*A' + beta*C, +* +* or +* +* C := alpha*A'*B + alpha*B'*A + beta*C, +* +* where alpha and beta are scalars, C is an n by n symmetric matrix +* and A and B are n by k matrices in the first case and k by n +* matrices in the second case. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array C is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of C +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of C +* is to be referenced. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' C := alpha*A*B' + alpha*B*A' + +* beta*C. +* +* TRANS = 'T' or 't' C := alpha*A'*B + alpha*B'*A + +* beta*C. +* +* TRANS = 'C' or 'c' C := alpha*A'*B + alpha*B'*A + +* beta*C. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix C. N must be +* at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with TRANS = 'N' or 'n', K specifies the number +* of columns of the matrices A and B, and on entry with +* TRANS = 'T' or 't' or 'C' or 'c', K specifies the number +* of rows of the matrices A and B. K must be at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, ka ), where ka is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array A must contain the matrix A, otherwise +* the leading k by n part of the array A must contain the +* matrix A. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDA must be at least max( 1, n ), otherwise LDA must +* be at least max( 1, k ). +* Unchanged on exit. +* +* B - REAL array of DIMENSION ( LDB, kb ), where kb is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array B must contain the matrix B, otherwise +* the leading k by n part of the array B must contain the +* matrix B. +* Unchanged on exit. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDB must be at least max( 1, n ), otherwise LDB must +* be at least max( 1, k ). +* Unchanged on exit. +* +* BETA - REAL . +* On entry, BETA specifies the scalar beta. +* Unchanged on exit. +* +* C - REAL array of DIMENSION ( LDC, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array C must contain the upper +* triangular part of the symmetric matrix and the strictly +* lower triangular part of C is not referenced. On exit, the +* upper triangular part of the array C is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array C must contain the lower +* triangular part of the symmetric matrix and the strictly +* upper triangular part of C is not referenced. On exit, the +* lower triangular part of the array C is overwritten by the +* lower triangular part of the updated matrix. +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, INFO, J, L, NROWA + REAL TEMP1, TEMP2 +* .. Parameters .. + REAL ONE , ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + IF( LSAME( TRANS, 'N' ) )THEN + NROWA = N + ELSE + NROWA = K + END IF + UPPER = LSAME( UPLO, 'U' ) +* + INFO = 0 + IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO , 'L' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND. + $ ( .NOT.LSAME( TRANS, 'T' ) ).AND. + $ ( .NOT.LSAME( TRANS, 'C' ) ) )THEN + INFO = 2 + ELSE IF( N .LT.0 )THEN + INFO = 3 + ELSE IF( K .LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 7 + ELSE IF( LDB.LT.MAX( 1, NROWA ) )THEN + INFO = 9 + ELSE IF( LDC.LT.MAX( 1, N ) )THEN + INFO = 12 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'SSYR2K', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR. + $ ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + IF( UPPER )THEN + IF( BETA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, J + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40, J = 1, N + DO 30, I = 1, J + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE + IF( BETA.EQ.ZERO )THEN + DO 60, J = 1, N + DO 50, I = J, N + C( I, J ) = ZERO + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80, J = 1, N + DO 70, I = J, N + C( I, J ) = BETA*C( I, J ) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + RETURN + END IF +* +* Start the operations. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form C := alpha*A*B' + alpha*B*A' + C. +* + IF( UPPER )THEN + DO 130, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 90, I = 1, J + C( I, J ) = ZERO + 90 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 100, I = 1, J + C( I, J ) = BETA*C( I, J ) + 100 CONTINUE + END IF + DO 120, L = 1, K + IF( ( A( J, L ).NE.ZERO ).OR. + $ ( B( J, L ).NE.ZERO ) )THEN + TEMP1 = ALPHA*B( J, L ) + TEMP2 = ALPHA*A( J, L ) + DO 110, I = 1, J + C( I, J ) = C( I, J ) + + $ A( I, L )*TEMP1 + B( I, L )*TEMP2 + 110 CONTINUE + END IF + 120 CONTINUE + 130 CONTINUE + ELSE + DO 180, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 140, I = J, N + C( I, J ) = ZERO + 140 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 150, I = J, N + C( I, J ) = BETA*C( I, J ) + 150 CONTINUE + END IF + DO 170, L = 1, K + IF( ( A( J, L ).NE.ZERO ).OR. + $ ( B( J, L ).NE.ZERO ) )THEN + TEMP1 = ALPHA*B( J, L ) + TEMP2 = ALPHA*A( J, L ) + DO 160, I = J, N + C( I, J ) = C( I, J ) + + $ A( I, L )*TEMP1 + B( I, L )*TEMP2 + 160 CONTINUE + END IF + 170 CONTINUE + 180 CONTINUE + END IF + ELSE +* +* Form C := alpha*A'*B + alpha*B'*A + C. +* + IF( UPPER )THEN + DO 210, J = 1, N + DO 200, I = 1, J + TEMP1 = ZERO + TEMP2 = ZERO + DO 190, L = 1, K + TEMP1 = TEMP1 + A( L, I )*B( L, J ) + TEMP2 = TEMP2 + B( L, I )*A( L, J ) + 190 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP1 + ALPHA*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ ALPHA*TEMP1 + ALPHA*TEMP2 + END IF + 200 CONTINUE + 210 CONTINUE + ELSE + DO 240, J = 1, N + DO 230, I = J, N + TEMP1 = ZERO + TEMP2 = ZERO + DO 220, L = 1, K + TEMP1 = TEMP1 + A( L, I )*B( L, J ) + TEMP2 = TEMP2 + B( L, I )*A( L, J ) + 220 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP1 + ALPHA*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ ALPHA*TEMP1 + ALPHA*TEMP2 + END IF + 230 CONTINUE + 240 CONTINUE + END IF + END IF +* + RETURN +* +* End of SSYR2K. +* + END diff --git a/costa/native/external/blas/ssyrk.f b/costa/native/external/blas/ssyrk.f new file mode 100644 index 000000000..3c3b0ee88 --- /dev/null +++ b/costa/native/external/blas/ssyrk.f @@ -0,0 +1,294 @@ + SUBROUTINE SSYRK ( UPLO, TRANS, N, K, ALPHA, A, LDA, + $ BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER*1 UPLO, TRANS + INTEGER N, K, LDA, LDC + REAL ALPHA, BETA +* .. Array Arguments .. + REAL A( LDA, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* SSYRK performs one of the symmetric rank k operations +* +* C := alpha*A*A' + beta*C, +* +* or +* +* C := alpha*A'*A + beta*C, +* +* where alpha and beta are scalars, C is an n by n symmetric matrix +* and A is an n by k matrix in the first case and a k by n matrix +* in the second case. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array C is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of C +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of C +* is to be referenced. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' C := alpha*A*A' + beta*C. +* +* TRANS = 'T' or 't' C := alpha*A'*A + beta*C. +* +* TRANS = 'C' or 'c' C := alpha*A'*A + beta*C. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix C. N must be +* at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with TRANS = 'N' or 'n', K specifies the number +* of columns of the matrix A, and on entry with +* TRANS = 'T' or 't' or 'C' or 'c', K specifies the number +* of rows of the matrix A. K must be at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, ka ), where ka is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array A must contain the matrix A, otherwise +* the leading k by n part of the array A must contain the +* matrix A. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDA must be at least max( 1, n ), otherwise LDA must +* be at least max( 1, k ). +* Unchanged on exit. +* +* BETA - REAL . +* On entry, BETA specifies the scalar beta. +* Unchanged on exit. +* +* C - REAL array of DIMENSION ( LDC, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array C must contain the upper +* triangular part of the symmetric matrix and the strictly +* lower triangular part of C is not referenced. On exit, the +* upper triangular part of the array C is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array C must contain the lower +* triangular part of the symmetric matrix and the strictly +* upper triangular part of C is not referenced. On exit, the +* lower triangular part of the array C is overwritten by the +* lower triangular part of the updated matrix. +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, INFO, J, L, NROWA + REAL TEMP +* .. Parameters .. + REAL ONE , ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + IF( LSAME( TRANS, 'N' ) )THEN + NROWA = N + ELSE + NROWA = K + END IF + UPPER = LSAME( UPLO, 'U' ) +* + INFO = 0 + IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO , 'L' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND. + $ ( .NOT.LSAME( TRANS, 'T' ) ).AND. + $ ( .NOT.LSAME( TRANS, 'C' ) ) )THEN + INFO = 2 + ELSE IF( N .LT.0 )THEN + INFO = 3 + ELSE IF( K .LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 7 + ELSE IF( LDC.LT.MAX( 1, N ) )THEN + INFO = 10 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'SSYRK ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR. + $ ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + IF( UPPER )THEN + IF( BETA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, J + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40, J = 1, N + DO 30, I = 1, J + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE + IF( BETA.EQ.ZERO )THEN + DO 60, J = 1, N + DO 50, I = J, N + C( I, J ) = ZERO + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80, J = 1, N + DO 70, I = J, N + C( I, J ) = BETA*C( I, J ) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + RETURN + END IF +* +* Start the operations. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form C := alpha*A*A' + beta*C. +* + IF( UPPER )THEN + DO 130, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 90, I = 1, J + C( I, J ) = ZERO + 90 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 100, I = 1, J + C( I, J ) = BETA*C( I, J ) + 100 CONTINUE + END IF + DO 120, L = 1, K + IF( A( J, L ).NE.ZERO )THEN + TEMP = ALPHA*A( J, L ) + DO 110, I = 1, J + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 110 CONTINUE + END IF + 120 CONTINUE + 130 CONTINUE + ELSE + DO 180, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 140, I = J, N + C( I, J ) = ZERO + 140 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 150, I = J, N + C( I, J ) = BETA*C( I, J ) + 150 CONTINUE + END IF + DO 170, L = 1, K + IF( A( J, L ).NE.ZERO )THEN + TEMP = ALPHA*A( J, L ) + DO 160, I = J, N + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 160 CONTINUE + END IF + 170 CONTINUE + 180 CONTINUE + END IF + ELSE +* +* Form C := alpha*A'*A + beta*C. +* + IF( UPPER )THEN + DO 210, J = 1, N + DO 200, I = 1, J + TEMP = ZERO + DO 190, L = 1, K + TEMP = TEMP + A( L, I )*A( L, J ) + 190 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 200 CONTINUE + 210 CONTINUE + ELSE + DO 240, J = 1, N + DO 230, I = J, N + TEMP = ZERO + DO 220, L = 1, K + TEMP = TEMP + A( L, I )*A( L, J ) + 220 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 230 CONTINUE + 240 CONTINUE + END IF + END IF +* + RETURN +* +* End of SSYRK . +* + END diff --git a/costa/native/external/blas/stbmv.f b/costa/native/external/blas/stbmv.f new file mode 100644 index 000000000..92a0a563b --- /dev/null +++ b/costa/native/external/blas/stbmv.f @@ -0,0 +1,342 @@ + SUBROUTINE STBMV ( UPLO, TRANS, DIAG, N, K, A, LDA, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, K, LDA, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + REAL A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* STBMV performs one of the matrix-vector operations +* +* x := A*x, or x := A'*x, +* +* where x is an n element vector and A is an n by n unit, or non-unit, +* upper or lower triangular band matrix, with ( k + 1 ) diagonals. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' x := A*x. +* +* TRANS = 'T' or 't' x := A'*x. +* +* TRANS = 'C' or 'c' x := A'*x. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with UPLO = 'U' or 'u', K specifies the number of +* super-diagonals of the matrix A. +* On entry with UPLO = 'L' or 'l', K specifies the number of +* sub-diagonals of the matrix A. +* K must satisfy 0 .le. K. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) +* by n part of the array A must contain the upper triangular +* band part of the matrix of coefficients, supplied column by +* column, with the leading diagonal of the matrix in row +* ( k + 1 ) of the array, the first super-diagonal starting at +* position 2 in row k, and so on. The top left k by k triangle +* of the array A is not referenced. +* The following program segment will transfer an upper +* triangular band matrix from conventional full matrix storage +* to band storage: +* +* DO 20, J = 1, N +* M = K + 1 - J +* DO 10, I = MAX( 1, J - K ), J +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) +* by n part of the array A must contain the lower triangular +* band part of the matrix of coefficients, supplied column by +* column, with the leading diagonal of the matrix in row 1 of +* the array, the first sub-diagonal starting at position 1 in +* row 2, and so on. The bottom right k by k triangle of the +* array A is not referenced. +* The following program segment will transfer a lower +* triangular band matrix from conventional full matrix storage +* to band storage: +* +* DO 20, J = 1, N +* M = 1 - J +* DO 10, I = J, MIN( N, J + K ) +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Note that when DIAG = 'U' or 'u' the elements of the array A +* corresponding to the diagonal elements of the matrix are not +* referenced, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* ( k + 1 ). +* Unchanged on exit. +* +* X - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. On exit, X is overwritten with the +* tranformed vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. Local Scalars .. + REAL TEMP + INTEGER I, INFO, IX, J, JX, KPLUS1, KX, L + LOGICAL NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( K.LT.0 )THEN + INFO = 5 + ELSE IF( LDA.LT.( K + 1 ) )THEN + INFO = 7 + ELSE IF( INCX.EQ.0 )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'STBMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOUNIT = LSAME( DIAG, 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x := A*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KPLUS1 = K + 1 + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + L = KPLUS1 - J + DO 10, I = MAX( 1, J - K ), J - 1 + X( I ) = X( I ) + TEMP*A( L + I, J ) + 10 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*A( KPLUS1, J ) + END IF + 20 CONTINUE + ELSE + JX = KX + DO 40, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + L = KPLUS1 - J + DO 30, I = MAX( 1, J - K ), J - 1 + X( IX ) = X( IX ) + TEMP*A( L + I, J ) + IX = IX + INCX + 30 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*A( KPLUS1, J ) + END IF + JX = JX + INCX + IF( J.GT.K ) + $ KX = KX + INCX + 40 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 60, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + L = 1 - J + DO 50, I = MIN( N, J + K ), J + 1, -1 + X( I ) = X( I ) + TEMP*A( L + I, J ) + 50 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*A( 1, J ) + END IF + 60 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 80, J = N, 1, -1 + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + L = 1 - J + DO 70, I = MIN( N, J + K ), J + 1, -1 + X( IX ) = X( IX ) + TEMP*A( L + I, J ) + IX = IX - INCX + 70 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*A( 1, J ) + END IF + JX = JX - INCX + IF( ( N - J ).GE.K ) + $ KX = KX - INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := A'*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KPLUS1 = K + 1 + IF( INCX.EQ.1 )THEN + DO 100, J = N, 1, -1 + TEMP = X( J ) + L = KPLUS1 - J + IF( NOUNIT ) + $ TEMP = TEMP*A( KPLUS1, J ) + DO 90, I = J - 1, MAX( 1, J - K ), -1 + TEMP = TEMP + A( L + I, J )*X( I ) + 90 CONTINUE + X( J ) = TEMP + 100 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 120, J = N, 1, -1 + TEMP = X( JX ) + KX = KX - INCX + IX = KX + L = KPLUS1 - J + IF( NOUNIT ) + $ TEMP = TEMP*A( KPLUS1, J ) + DO 110, I = J - 1, MAX( 1, J - K ), -1 + TEMP = TEMP + A( L + I, J )*X( IX ) + IX = IX - INCX + 110 CONTINUE + X( JX ) = TEMP + JX = JX - INCX + 120 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 140, J = 1, N + TEMP = X( J ) + L = 1 - J + IF( NOUNIT ) + $ TEMP = TEMP*A( 1, J ) + DO 130, I = J + 1, MIN( N, J + K ) + TEMP = TEMP + A( L + I, J )*X( I ) + 130 CONTINUE + X( J ) = TEMP + 140 CONTINUE + ELSE + JX = KX + DO 160, J = 1, N + TEMP = X( JX ) + KX = KX + INCX + IX = KX + L = 1 - J + IF( NOUNIT ) + $ TEMP = TEMP*A( 1, J ) + DO 150, I = J + 1, MIN( N, J + K ) + TEMP = TEMP + A( L + I, J )*X( IX ) + IX = IX + INCX + 150 CONTINUE + X( JX ) = TEMP + JX = JX + INCX + 160 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of STBMV . +* + END diff --git a/costa/native/external/blas/stbsv.f b/costa/native/external/blas/stbsv.f new file mode 100644 index 000000000..96ebe26c3 --- /dev/null +++ b/costa/native/external/blas/stbsv.f @@ -0,0 +1,346 @@ + SUBROUTINE STBSV ( UPLO, TRANS, DIAG, N, K, A, LDA, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, K, LDA, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + REAL A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* STBSV solves one of the systems of equations +* +* A*x = b, or A'*x = b, +* +* where b and x are n element vectors and A is an n by n unit, or +* non-unit, upper or lower triangular band matrix, with ( k + 1 ) +* diagonals. +* +* No test for singularity or near-singularity is included in this +* routine. Such tests must be performed before calling this routine. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the equations to be solved as +* follows: +* +* TRANS = 'N' or 'n' A*x = b. +* +* TRANS = 'T' or 't' A'*x = b. +* +* TRANS = 'C' or 'c' A'*x = b. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with UPLO = 'U' or 'u', K specifies the number of +* super-diagonals of the matrix A. +* On entry with UPLO = 'L' or 'l', K specifies the number of +* sub-diagonals of the matrix A. +* K must satisfy 0 .le. K. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) +* by n part of the array A must contain the upper triangular +* band part of the matrix of coefficients, supplied column by +* column, with the leading diagonal of the matrix in row +* ( k + 1 ) of the array, the first super-diagonal starting at +* position 2 in row k, and so on. The top left k by k triangle +* of the array A is not referenced. +* The following program segment will transfer an upper +* triangular band matrix from conventional full matrix storage +* to band storage: +* +* DO 20, J = 1, N +* M = K + 1 - J +* DO 10, I = MAX( 1, J - K ), J +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) +* by n part of the array A must contain the lower triangular +* band part of the matrix of coefficients, supplied column by +* column, with the leading diagonal of the matrix in row 1 of +* the array, the first sub-diagonal starting at position 1 in +* row 2, and so on. The bottom right k by k triangle of the +* array A is not referenced. +* The following program segment will transfer a lower +* triangular band matrix from conventional full matrix storage +* to band storage: +* +* DO 20, J = 1, N +* M = 1 - J +* DO 10, I = J, MIN( N, J + K ) +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Note that when DIAG = 'U' or 'u' the elements of the array A +* corresponding to the diagonal elements of the matrix are not +* referenced, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* ( k + 1 ). +* Unchanged on exit. +* +* X - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element right-hand side vector b. On exit, X is overwritten +* with the solution vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. Local Scalars .. + REAL TEMP + INTEGER I, INFO, IX, J, JX, KPLUS1, KX, L + LOGICAL NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( K.LT.0 )THEN + INFO = 5 + ELSE IF( LDA.LT.( K + 1 ) )THEN + INFO = 7 + ELSE IF( INCX.EQ.0 )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'STBSV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOUNIT = LSAME( DIAG, 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed by sequentially with one pass through A. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x := inv( A )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KPLUS1 = K + 1 + IF( INCX.EQ.1 )THEN + DO 20, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + L = KPLUS1 - J + IF( NOUNIT ) + $ X( J ) = X( J )/A( KPLUS1, J ) + TEMP = X( J ) + DO 10, I = J - 1, MAX( 1, J - K ), -1 + X( I ) = X( I ) - TEMP*A( L + I, J ) + 10 CONTINUE + END IF + 20 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 40, J = N, 1, -1 + KX = KX - INCX + IF( X( JX ).NE.ZERO )THEN + IX = KX + L = KPLUS1 - J + IF( NOUNIT ) + $ X( JX ) = X( JX )/A( KPLUS1, J ) + TEMP = X( JX ) + DO 30, I = J - 1, MAX( 1, J - K ), -1 + X( IX ) = X( IX ) - TEMP*A( L + I, J ) + IX = IX - INCX + 30 CONTINUE + END IF + JX = JX - INCX + 40 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + L = 1 - J + IF( NOUNIT ) + $ X( J ) = X( J )/A( 1, J ) + TEMP = X( J ) + DO 50, I = J + 1, MIN( N, J + K ) + X( I ) = X( I ) - TEMP*A( L + I, J ) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE + JX = KX + DO 80, J = 1, N + KX = KX + INCX + IF( X( JX ).NE.ZERO )THEN + IX = KX + L = 1 - J + IF( NOUNIT ) + $ X( JX ) = X( JX )/A( 1, J ) + TEMP = X( JX ) + DO 70, I = J + 1, MIN( N, J + K ) + X( IX ) = X( IX ) - TEMP*A( L + I, J ) + IX = IX + INCX + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := inv( A')*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KPLUS1 = K + 1 + IF( INCX.EQ.1 )THEN + DO 100, J = 1, N + TEMP = X( J ) + L = KPLUS1 - J + DO 90, I = MAX( 1, J - K ), J - 1 + TEMP = TEMP - A( L + I, J )*X( I ) + 90 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( KPLUS1, J ) + X( J ) = TEMP + 100 CONTINUE + ELSE + JX = KX + DO 120, J = 1, N + TEMP = X( JX ) + IX = KX + L = KPLUS1 - J + DO 110, I = MAX( 1, J - K ), J - 1 + TEMP = TEMP - A( L + I, J )*X( IX ) + IX = IX + INCX + 110 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( KPLUS1, J ) + X( JX ) = TEMP + JX = JX + INCX + IF( J.GT.K ) + $ KX = KX + INCX + 120 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 140, J = N, 1, -1 + TEMP = X( J ) + L = 1 - J + DO 130, I = MIN( N, J + K ), J + 1, -1 + TEMP = TEMP - A( L + I, J )*X( I ) + 130 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( 1, J ) + X( J ) = TEMP + 140 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 160, J = N, 1, -1 + TEMP = X( JX ) + IX = KX + L = 1 - J + DO 150, I = MIN( N, J + K ), J + 1, -1 + TEMP = TEMP - A( L + I, J )*X( IX ) + IX = IX - INCX + 150 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( 1, J ) + X( JX ) = TEMP + JX = JX - INCX + IF( ( N - J ).GE.K ) + $ KX = KX - INCX + 160 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of STBSV . +* + END diff --git a/costa/native/external/blas/stpmv.f b/costa/native/external/blas/stpmv.f new file mode 100644 index 000000000..c2acf8636 --- /dev/null +++ b/costa/native/external/blas/stpmv.f @@ -0,0 +1,299 @@ + SUBROUTINE STPMV ( UPLO, TRANS, DIAG, N, AP, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + REAL AP( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* STPMV performs one of the matrix-vector operations +* +* x := A*x, or x := A'*x, +* +* where x is an n element vector and A is an n by n unit, or non-unit, +* upper or lower triangular matrix, supplied in packed form. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' x := A*x. +* +* TRANS = 'T' or 't' x := A'*x. +* +* TRANS = 'C' or 'c' x := A'*x. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* AP - REAL array of DIMENSION at least +* ( ( n*( n + 1 ) )/2 ). +* Before entry with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular matrix packed sequentially, +* column by column, so that AP( 1 ) contains a( 1, 1 ), +* AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) +* respectively, and so on. +* Before entry with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular matrix packed sequentially, +* column by column, so that AP( 1 ) contains a( 1, 1 ), +* AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) +* respectively, and so on. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced, but are assumed to be unity. +* Unchanged on exit. +* +* X - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. On exit, X is overwritten with the +* tranformed vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. Local Scalars .. + REAL TEMP + INTEGER I, INFO, IX, J, JX, K, KK, KX + LOGICAL NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( INCX.EQ.0 )THEN + INFO = 7 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'STPMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOUNIT = LSAME( DIAG, 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of AP are +* accessed sequentially with one pass through AP. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x:= A*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KK =1 + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + K = KK + DO 10, I = 1, J - 1 + X( I ) = X( I ) + TEMP*AP( K ) + K = K + 1 + 10 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*AP( KK + J - 1 ) + END IF + KK = KK + J + 20 CONTINUE + ELSE + JX = KX + DO 40, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + DO 30, K = KK, KK + J - 2 + X( IX ) = X( IX ) + TEMP*AP( K ) + IX = IX + INCX + 30 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*AP( KK + J - 1 ) + END IF + JX = JX + INCX + KK = KK + J + 40 CONTINUE + END IF + ELSE + KK = ( N*( N + 1 ) )/2 + IF( INCX.EQ.1 )THEN + DO 60, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + K = KK + DO 50, I = N, J + 1, -1 + X( I ) = X( I ) + TEMP*AP( K ) + K = K - 1 + 50 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*AP( KK - N + J ) + END IF + KK = KK - ( N - J + 1 ) + 60 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 80, J = N, 1, -1 + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + DO 70, K = KK, KK - ( N - ( J + 1 ) ), -1 + X( IX ) = X( IX ) + TEMP*AP( K ) + IX = IX - INCX + 70 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*AP( KK - N + J ) + END IF + JX = JX - INCX + KK = KK - ( N - J + 1 ) + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := A'*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KK = ( N*( N + 1 ) )/2 + IF( INCX.EQ.1 )THEN + DO 100, J = N, 1, -1 + TEMP = X( J ) + IF( NOUNIT ) + $ TEMP = TEMP*AP( KK ) + K = KK - 1 + DO 90, I = J - 1, 1, -1 + TEMP = TEMP + AP( K )*X( I ) + K = K - 1 + 90 CONTINUE + X( J ) = TEMP + KK = KK - J + 100 CONTINUE + ELSE + JX = KX + ( N - 1 )*INCX + DO 120, J = N, 1, -1 + TEMP = X( JX ) + IX = JX + IF( NOUNIT ) + $ TEMP = TEMP*AP( KK ) + DO 110, K = KK - 1, KK - J + 1, -1 + IX = IX - INCX + TEMP = TEMP + AP( K )*X( IX ) + 110 CONTINUE + X( JX ) = TEMP + JX = JX - INCX + KK = KK - J + 120 CONTINUE + END IF + ELSE + KK = 1 + IF( INCX.EQ.1 )THEN + DO 140, J = 1, N + TEMP = X( J ) + IF( NOUNIT ) + $ TEMP = TEMP*AP( KK ) + K = KK + 1 + DO 130, I = J + 1, N + TEMP = TEMP + AP( K )*X( I ) + K = K + 1 + 130 CONTINUE + X( J ) = TEMP + KK = KK + ( N - J + 1 ) + 140 CONTINUE + ELSE + JX = KX + DO 160, J = 1, N + TEMP = X( JX ) + IX = JX + IF( NOUNIT ) + $ TEMP = TEMP*AP( KK ) + DO 150, K = KK + 1, KK + N - J + IX = IX + INCX + TEMP = TEMP + AP( K )*X( IX ) + 150 CONTINUE + X( JX ) = TEMP + JX = JX + INCX + KK = KK + ( N - J + 1 ) + 160 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of STPMV . +* + END diff --git a/costa/native/external/blas/stpsv.f b/costa/native/external/blas/stpsv.f new file mode 100644 index 000000000..f22d6c65a --- /dev/null +++ b/costa/native/external/blas/stpsv.f @@ -0,0 +1,302 @@ + SUBROUTINE STPSV ( UPLO, TRANS, DIAG, N, AP, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + REAL AP( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* STPSV solves one of the systems of equations +* +* A*x = b, or A'*x = b, +* +* where b and x are n element vectors and A is an n by n unit, or +* non-unit, upper or lower triangular matrix, supplied in packed form. +* +* No test for singularity or near-singularity is included in this +* routine. Such tests must be performed before calling this routine. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the equations to be solved as +* follows: +* +* TRANS = 'N' or 'n' A*x = b. +* +* TRANS = 'T' or 't' A'*x = b. +* +* TRANS = 'C' or 'c' A'*x = b. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* AP - REAL array of DIMENSION at least +* ( ( n*( n + 1 ) )/2 ). +* Before entry with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular matrix packed sequentially, +* column by column, so that AP( 1 ) contains a( 1, 1 ), +* AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) +* respectively, and so on. +* Before entry with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular matrix packed sequentially, +* column by column, so that AP( 1 ) contains a( 1, 1 ), +* AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) +* respectively, and so on. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced, but are assumed to be unity. +* Unchanged on exit. +* +* X - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element right-hand side vector b. On exit, X is overwritten +* with the solution vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. Local Scalars .. + REAL TEMP + INTEGER I, INFO, IX, J, JX, K, KK, KX + LOGICAL NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( INCX.EQ.0 )THEN + INFO = 7 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'STPSV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOUNIT = LSAME( DIAG, 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of AP are +* accessed sequentially with one pass through AP. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x := inv( A )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KK = ( N*( N + 1 ) )/2 + IF( INCX.EQ.1 )THEN + DO 20, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( J ) = X( J )/AP( KK ) + TEMP = X( J ) + K = KK - 1 + DO 10, I = J - 1, 1, -1 + X( I ) = X( I ) - TEMP*AP( K ) + K = K - 1 + 10 CONTINUE + END IF + KK = KK - J + 20 CONTINUE + ELSE + JX = KX + ( N - 1 )*INCX + DO 40, J = N, 1, -1 + IF( X( JX ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( JX ) = X( JX )/AP( KK ) + TEMP = X( JX ) + IX = JX + DO 30, K = KK - 1, KK - J + 1, -1 + IX = IX - INCX + X( IX ) = X( IX ) - TEMP*AP( K ) + 30 CONTINUE + END IF + JX = JX - INCX + KK = KK - J + 40 CONTINUE + END IF + ELSE + KK = 1 + IF( INCX.EQ.1 )THEN + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( J ) = X( J )/AP( KK ) + TEMP = X( J ) + K = KK + 1 + DO 50, I = J + 1, N + X( I ) = X( I ) - TEMP*AP( K ) + K = K + 1 + 50 CONTINUE + END IF + KK = KK + ( N - J + 1 ) + 60 CONTINUE + ELSE + JX = KX + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( JX ) = X( JX )/AP( KK ) + TEMP = X( JX ) + IX = JX + DO 70, K = KK + 1, KK + N - J + IX = IX + INCX + X( IX ) = X( IX ) - TEMP*AP( K ) + 70 CONTINUE + END IF + JX = JX + INCX + KK = KK + ( N - J + 1 ) + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := inv( A' )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KK = 1 + IF( INCX.EQ.1 )THEN + DO 100, J = 1, N + TEMP = X( J ) + K = KK + DO 90, I = 1, J - 1 + TEMP = TEMP - AP( K )*X( I ) + K = K + 1 + 90 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/AP( KK + J - 1 ) + X( J ) = TEMP + KK = KK + J + 100 CONTINUE + ELSE + JX = KX + DO 120, J = 1, N + TEMP = X( JX ) + IX = KX + DO 110, K = KK, KK + J - 2 + TEMP = TEMP - AP( K )*X( IX ) + IX = IX + INCX + 110 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/AP( KK + J - 1 ) + X( JX ) = TEMP + JX = JX + INCX + KK = KK + J + 120 CONTINUE + END IF + ELSE + KK = ( N*( N + 1 ) )/2 + IF( INCX.EQ.1 )THEN + DO 140, J = N, 1, -1 + TEMP = X( J ) + K = KK + DO 130, I = N, J + 1, -1 + TEMP = TEMP - AP( K )*X( I ) + K = K - 1 + 130 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/AP( KK - N + J ) + X( J ) = TEMP + KK = KK - ( N - J + 1 ) + 140 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 160, J = N, 1, -1 + TEMP = X( JX ) + IX = KX + DO 150, K = KK, KK - ( N - ( J + 1 ) ), -1 + TEMP = TEMP - AP( K )*X( IX ) + IX = IX - INCX + 150 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/AP( KK - N + J ) + X( JX ) = TEMP + JX = JX - INCX + KK = KK - (N - J + 1 ) + 160 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of STPSV . +* + END diff --git a/costa/native/external/blas/strmm.f b/costa/native/external/blas/strmm.f new file mode 100644 index 000000000..b1556c634 --- /dev/null +++ b/costa/native/external/blas/strmm.f @@ -0,0 +1,355 @@ + SUBROUTINE STRMM ( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, + $ B, LDB ) +* .. Scalar Arguments .. + CHARACTER*1 SIDE, UPLO, TRANSA, DIAG + INTEGER M, N, LDA, LDB + REAL ALPHA +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* STRMM performs one of the matrix-matrix operations +* +* B := alpha*op( A )*B, or B := alpha*B*op( A ), +* +* where alpha is a scalar, B is an m by n matrix, A is a unit, or +* non-unit, upper or lower triangular matrix and op( A ) is one of +* +* op( A ) = A or op( A ) = A'. +* +* Parameters +* ========== +* +* SIDE - CHARACTER*1. +* On entry, SIDE specifies whether op( A ) multiplies B from +* the left or right as follows: +* +* SIDE = 'L' or 'l' B := alpha*op( A )*B. +* +* SIDE = 'R' or 'r' B := alpha*B*op( A ). +* +* Unchanged on exit. +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix A is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANSA - CHARACTER*1. +* On entry, TRANSA specifies the form of op( A ) to be used in +* the matrix multiplication as follows: +* +* TRANSA = 'N' or 'n' op( A ) = A. +* +* TRANSA = 'T' or 't' op( A ) = A'. +* +* TRANSA = 'C' or 'c' op( A ) = A'. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit triangular +* as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of B. M must be at +* least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of B. N must be +* at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. When alpha is +* zero then A is not referenced and B need not be set before +* entry. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, k ), where k is m +* when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. +* Before entry with UPLO = 'U' or 'u', the leading k by k +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading k by k +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When SIDE = 'L' or 'l' then +* LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' +* then LDA must be at least max( 1, n ). +* Unchanged on exit. +* +* B - REAL array of DIMENSION ( LDB, n ). +* Before entry, the leading m by n part of the array B must +* contain the matrix B, and on exit is overwritten by the +* transformed matrix. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. LDB must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Local Scalars .. + LOGICAL LSIDE, NOUNIT, UPPER + INTEGER I, INFO, J, K, NROWA + REAL TEMP +* .. Parameters .. + REAL ONE , ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + LSIDE = LSAME( SIDE , 'L' ) + IF( LSIDE )THEN + NROWA = M + ELSE + NROWA = N + END IF + NOUNIT = LSAME( DIAG , 'N' ) + UPPER = LSAME( UPLO , 'U' ) +* + INFO = 0 + IF( ( .NOT.LSIDE ).AND. + $ ( .NOT.LSAME( SIDE , 'R' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO , 'L' ) ) )THEN + INFO = 2 + ELSE IF( ( .NOT.LSAME( TRANSA, 'N' ) ).AND. + $ ( .NOT.LSAME( TRANSA, 'T' ) ).AND. + $ ( .NOT.LSAME( TRANSA, 'C' ) ) )THEN + INFO = 3 + ELSE IF( ( .NOT.LSAME( DIAG , 'U' ) ).AND. + $ ( .NOT.LSAME( DIAG , 'N' ) ) )THEN + INFO = 4 + ELSE IF( M .LT.0 )THEN + INFO = 5 + ELSE IF( N .LT.0 )THEN + INFO = 6 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 9 + ELSE IF( LDB.LT.MAX( 1, M ) )THEN + INFO = 11 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'STRMM ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, M + B( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + RETURN + END IF +* +* Start the operations. +* + IF( LSIDE )THEN + IF( LSAME( TRANSA, 'N' ) )THEN +* +* Form B := alpha*A*B. +* + IF( UPPER )THEN + DO 50, J = 1, N + DO 40, K = 1, M + IF( B( K, J ).NE.ZERO )THEN + TEMP = ALPHA*B( K, J ) + DO 30, I = 1, K - 1 + B( I, J ) = B( I, J ) + TEMP*A( I, K ) + 30 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP*A( K, K ) + B( K, J ) = TEMP + END IF + 40 CONTINUE + 50 CONTINUE + ELSE + DO 80, J = 1, N + DO 70 K = M, 1, -1 + IF( B( K, J ).NE.ZERO )THEN + TEMP = ALPHA*B( K, J ) + B( K, J ) = TEMP + IF( NOUNIT ) + $ B( K, J ) = B( K, J )*A( K, K ) + DO 60, I = K + 1, M + B( I, J ) = B( I, J ) + TEMP*A( I, K ) + 60 CONTINUE + END IF + 70 CONTINUE + 80 CONTINUE + END IF + ELSE +* +* Form B := alpha*A'*B. +* + IF( UPPER )THEN + DO 110, J = 1, N + DO 100, I = M, 1, -1 + TEMP = B( I, J ) + IF( NOUNIT ) + $ TEMP = TEMP*A( I, I ) + DO 90, K = 1, I - 1 + TEMP = TEMP + A( K, I )*B( K, J ) + 90 CONTINUE + B( I, J ) = ALPHA*TEMP + 100 CONTINUE + 110 CONTINUE + ELSE + DO 140, J = 1, N + DO 130, I = 1, M + TEMP = B( I, J ) + IF( NOUNIT ) + $ TEMP = TEMP*A( I, I ) + DO 120, K = I + 1, M + TEMP = TEMP + A( K, I )*B( K, J ) + 120 CONTINUE + B( I, J ) = ALPHA*TEMP + 130 CONTINUE + 140 CONTINUE + END IF + END IF + ELSE + IF( LSAME( TRANSA, 'N' ) )THEN +* +* Form B := alpha*B*A. +* + IF( UPPER )THEN + DO 180, J = N, 1, -1 + TEMP = ALPHA + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 150, I = 1, M + B( I, J ) = TEMP*B( I, J ) + 150 CONTINUE + DO 170, K = 1, J - 1 + IF( A( K, J ).NE.ZERO )THEN + TEMP = ALPHA*A( K, J ) + DO 160, I = 1, M + B( I, J ) = B( I, J ) + TEMP*B( I, K ) + 160 CONTINUE + END IF + 170 CONTINUE + 180 CONTINUE + ELSE + DO 220, J = 1, N + TEMP = ALPHA + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 190, I = 1, M + B( I, J ) = TEMP*B( I, J ) + 190 CONTINUE + DO 210, K = J + 1, N + IF( A( K, J ).NE.ZERO )THEN + TEMP = ALPHA*A( K, J ) + DO 200, I = 1, M + B( I, J ) = B( I, J ) + TEMP*B( I, K ) + 200 CONTINUE + END IF + 210 CONTINUE + 220 CONTINUE + END IF + ELSE +* +* Form B := alpha*B*A'. +* + IF( UPPER )THEN + DO 260, K = 1, N + DO 240, J = 1, K - 1 + IF( A( J, K ).NE.ZERO )THEN + TEMP = ALPHA*A( J, K ) + DO 230, I = 1, M + B( I, J ) = B( I, J ) + TEMP*B( I, K ) + 230 CONTINUE + END IF + 240 CONTINUE + TEMP = ALPHA + IF( NOUNIT ) + $ TEMP = TEMP*A( K, K ) + IF( TEMP.NE.ONE )THEN + DO 250, I = 1, M + B( I, K ) = TEMP*B( I, K ) + 250 CONTINUE + END IF + 260 CONTINUE + ELSE + DO 300, K = N, 1, -1 + DO 280, J = K + 1, N + IF( A( J, K ).NE.ZERO )THEN + TEMP = ALPHA*A( J, K ) + DO 270, I = 1, M + B( I, J ) = B( I, J ) + TEMP*B( I, K ) + 270 CONTINUE + END IF + 280 CONTINUE + TEMP = ALPHA + IF( NOUNIT ) + $ TEMP = TEMP*A( K, K ) + IF( TEMP.NE.ONE )THEN + DO 290, I = 1, M + B( I, K ) = TEMP*B( I, K ) + 290 CONTINUE + END IF + 300 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of STRMM . +* + END diff --git a/costa/native/external/blas/strmv.f b/costa/native/external/blas/strmv.f new file mode 100644 index 000000000..f2f7d75dd --- /dev/null +++ b/costa/native/external/blas/strmv.f @@ -0,0 +1,286 @@ + SUBROUTINE STRMV ( UPLO, TRANS, DIAG, N, A, LDA, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, LDA, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + REAL A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* STRMV performs one of the matrix-vector operations +* +* x := A*x, or x := A'*x, +* +* where x is an n element vector and A is an n by n unit, or non-unit, +* upper or lower triangular matrix. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' x := A*x. +* +* TRANS = 'T' or 't' x := A'*x. +* +* TRANS = 'C' or 'c' x := A'*x. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* X - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. On exit, X is overwritten with the +* tranformed vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. Local Scalars .. + REAL TEMP + INTEGER I, INFO, IX, J, JX, KX + LOGICAL NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 6 + ELSE IF( INCX.EQ.0 )THEN + INFO = 8 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'STRMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOUNIT = LSAME( DIAG, 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x := A*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + DO 10, I = 1, J - 1 + X( I ) = X( I ) + TEMP*A( I, J ) + 10 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*A( J, J ) + END IF + 20 CONTINUE + ELSE + JX = KX + DO 40, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + DO 30, I = 1, J - 1 + X( IX ) = X( IX ) + TEMP*A( I, J ) + IX = IX + INCX + 30 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*A( J, J ) + END IF + JX = JX + INCX + 40 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 60, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + DO 50, I = N, J + 1, -1 + X( I ) = X( I ) + TEMP*A( I, J ) + 50 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*A( J, J ) + END IF + 60 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 80, J = N, 1, -1 + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + DO 70, I = N, J + 1, -1 + X( IX ) = X( IX ) + TEMP*A( I, J ) + IX = IX - INCX + 70 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*A( J, J ) + END IF + JX = JX - INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := A'*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + IF( INCX.EQ.1 )THEN + DO 100, J = N, 1, -1 + TEMP = X( J ) + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 90, I = J - 1, 1, -1 + TEMP = TEMP + A( I, J )*X( I ) + 90 CONTINUE + X( J ) = TEMP + 100 CONTINUE + ELSE + JX = KX + ( N - 1 )*INCX + DO 120, J = N, 1, -1 + TEMP = X( JX ) + IX = JX + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 110, I = J - 1, 1, -1 + IX = IX - INCX + TEMP = TEMP + A( I, J )*X( IX ) + 110 CONTINUE + X( JX ) = TEMP + JX = JX - INCX + 120 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 140, J = 1, N + TEMP = X( J ) + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 130, I = J + 1, N + TEMP = TEMP + A( I, J )*X( I ) + 130 CONTINUE + X( J ) = TEMP + 140 CONTINUE + ELSE + JX = KX + DO 160, J = 1, N + TEMP = X( JX ) + IX = JX + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 150, I = J + 1, N + IX = IX + INCX + TEMP = TEMP + A( I, J )*X( IX ) + 150 CONTINUE + X( JX ) = TEMP + JX = JX + INCX + 160 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of STRMV . +* + END diff --git a/costa/native/external/blas/strsm.f b/costa/native/external/blas/strsm.f new file mode 100644 index 000000000..1c80a7aee --- /dev/null +++ b/costa/native/external/blas/strsm.f @@ -0,0 +1,378 @@ + SUBROUTINE STRSM ( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, + $ B, LDB ) +* .. Scalar Arguments .. + CHARACTER*1 SIDE, UPLO, TRANSA, DIAG + INTEGER M, N, LDA, LDB + REAL ALPHA +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* STRSM solves one of the matrix equations +* +* op( A )*X = alpha*B, or X*op( A ) = alpha*B, +* +* where alpha is a scalar, X and B are m by n matrices, A is a unit, or +* non-unit, upper or lower triangular matrix and op( A ) is one of +* +* op( A ) = A or op( A ) = A'. +* +* The matrix X is overwritten on B. +* +* Parameters +* ========== +* +* SIDE - CHARACTER*1. +* On entry, SIDE specifies whether op( A ) appears on the left +* or right of X as follows: +* +* SIDE = 'L' or 'l' op( A )*X = alpha*B. +* +* SIDE = 'R' or 'r' X*op( A ) = alpha*B. +* +* Unchanged on exit. +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix A is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANSA - CHARACTER*1. +* On entry, TRANSA specifies the form of op( A ) to be used in +* the matrix multiplication as follows: +* +* TRANSA = 'N' or 'n' op( A ) = A. +* +* TRANSA = 'T' or 't' op( A ) = A'. +* +* TRANSA = 'C' or 'c' op( A ) = A'. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit triangular +* as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of B. M must be at +* least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of B. N must be +* at least zero. +* Unchanged on exit. +* +* ALPHA - REAL . +* On entry, ALPHA specifies the scalar alpha. When alpha is +* zero then A is not referenced and B need not be set before +* entry. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, k ), where k is m +* when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. +* Before entry with UPLO = 'U' or 'u', the leading k by k +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading k by k +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When SIDE = 'L' or 'l' then +* LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' +* then LDA must be at least max( 1, n ). +* Unchanged on exit. +* +* B - REAL array of DIMENSION ( LDB, n ). +* Before entry, the leading m by n part of the array B must +* contain the right-hand side matrix B, and on exit is +* overwritten by the solution matrix X. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. LDB must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Local Scalars .. + LOGICAL LSIDE, NOUNIT, UPPER + INTEGER I, INFO, J, K, NROWA + REAL TEMP +* .. Parameters .. + REAL ONE , ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + LSIDE = LSAME( SIDE , 'L' ) + IF( LSIDE )THEN + NROWA = M + ELSE + NROWA = N + END IF + NOUNIT = LSAME( DIAG , 'N' ) + UPPER = LSAME( UPLO , 'U' ) +* + INFO = 0 + IF( ( .NOT.LSIDE ).AND. + $ ( .NOT.LSAME( SIDE , 'R' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO , 'L' ) ) )THEN + INFO = 2 + ELSE IF( ( .NOT.LSAME( TRANSA, 'N' ) ).AND. + $ ( .NOT.LSAME( TRANSA, 'T' ) ).AND. + $ ( .NOT.LSAME( TRANSA, 'C' ) ) )THEN + INFO = 3 + ELSE IF( ( .NOT.LSAME( DIAG , 'U' ) ).AND. + $ ( .NOT.LSAME( DIAG , 'N' ) ) )THEN + INFO = 4 + ELSE IF( M .LT.0 )THEN + INFO = 5 + ELSE IF( N .LT.0 )THEN + INFO = 6 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 9 + ELSE IF( LDB.LT.MAX( 1, M ) )THEN + INFO = 11 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'STRSM ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, M + B( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + RETURN + END IF +* +* Start the operations. +* + IF( LSIDE )THEN + IF( LSAME( TRANSA, 'N' ) )THEN +* +* Form B := alpha*inv( A )*B. +* + IF( UPPER )THEN + DO 60, J = 1, N + IF( ALPHA.NE.ONE )THEN + DO 30, I = 1, M + B( I, J ) = ALPHA*B( I, J ) + 30 CONTINUE + END IF + DO 50, K = M, 1, -1 + IF( B( K, J ).NE.ZERO )THEN + IF( NOUNIT ) + $ B( K, J ) = B( K, J )/A( K, K ) + DO 40, I = 1, K - 1 + B( I, J ) = B( I, J ) - B( K, J )*A( I, K ) + 40 CONTINUE + END IF + 50 CONTINUE + 60 CONTINUE + ELSE + DO 100, J = 1, N + IF( ALPHA.NE.ONE )THEN + DO 70, I = 1, M + B( I, J ) = ALPHA*B( I, J ) + 70 CONTINUE + END IF + DO 90 K = 1, M + IF( B( K, J ).NE.ZERO )THEN + IF( NOUNIT ) + $ B( K, J ) = B( K, J )/A( K, K ) + DO 80, I = K + 1, M + B( I, J ) = B( I, J ) - B( K, J )*A( I, K ) + 80 CONTINUE + END IF + 90 CONTINUE + 100 CONTINUE + END IF + ELSE +* +* Form B := alpha*inv( A' )*B. +* + IF( UPPER )THEN + DO 130, J = 1, N + DO 120, I = 1, M + TEMP = ALPHA*B( I, J ) + DO 110, K = 1, I - 1 + TEMP = TEMP - A( K, I )*B( K, J ) + 110 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( I, I ) + B( I, J ) = TEMP + 120 CONTINUE + 130 CONTINUE + ELSE + DO 160, J = 1, N + DO 150, I = M, 1, -1 + TEMP = ALPHA*B( I, J ) + DO 140, K = I + 1, M + TEMP = TEMP - A( K, I )*B( K, J ) + 140 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( I, I ) + B( I, J ) = TEMP + 150 CONTINUE + 160 CONTINUE + END IF + END IF + ELSE + IF( LSAME( TRANSA, 'N' ) )THEN +* +* Form B := alpha*B*inv( A ). +* + IF( UPPER )THEN + DO 210, J = 1, N + IF( ALPHA.NE.ONE )THEN + DO 170, I = 1, M + B( I, J ) = ALPHA*B( I, J ) + 170 CONTINUE + END IF + DO 190, K = 1, J - 1 + IF( A( K, J ).NE.ZERO )THEN + DO 180, I = 1, M + B( I, J ) = B( I, J ) - A( K, J )*B( I, K ) + 180 CONTINUE + END IF + 190 CONTINUE + IF( NOUNIT )THEN + TEMP = ONE/A( J, J ) + DO 200, I = 1, M + B( I, J ) = TEMP*B( I, J ) + 200 CONTINUE + END IF + 210 CONTINUE + ELSE + DO 260, J = N, 1, -1 + IF( ALPHA.NE.ONE )THEN + DO 220, I = 1, M + B( I, J ) = ALPHA*B( I, J ) + 220 CONTINUE + END IF + DO 240, K = J + 1, N + IF( A( K, J ).NE.ZERO )THEN + DO 230, I = 1, M + B( I, J ) = B( I, J ) - A( K, J )*B( I, K ) + 230 CONTINUE + END IF + 240 CONTINUE + IF( NOUNIT )THEN + TEMP = ONE/A( J, J ) + DO 250, I = 1, M + B( I, J ) = TEMP*B( I, J ) + 250 CONTINUE + END IF + 260 CONTINUE + END IF + ELSE +* +* Form B := alpha*B*inv( A' ). +* + IF( UPPER )THEN + DO 310, K = N, 1, -1 + IF( NOUNIT )THEN + TEMP = ONE/A( K, K ) + DO 270, I = 1, M + B( I, K ) = TEMP*B( I, K ) + 270 CONTINUE + END IF + DO 290, J = 1, K - 1 + IF( A( J, K ).NE.ZERO )THEN + TEMP = A( J, K ) + DO 280, I = 1, M + B( I, J ) = B( I, J ) - TEMP*B( I, K ) + 280 CONTINUE + END IF + 290 CONTINUE + IF( ALPHA.NE.ONE )THEN + DO 300, I = 1, M + B( I, K ) = ALPHA*B( I, K ) + 300 CONTINUE + END IF + 310 CONTINUE + ELSE + DO 360, K = 1, N + IF( NOUNIT )THEN + TEMP = ONE/A( K, K ) + DO 320, I = 1, M + B( I, K ) = TEMP*B( I, K ) + 320 CONTINUE + END IF + DO 340, J = K + 1, N + IF( A( J, K ).NE.ZERO )THEN + TEMP = A( J, K ) + DO 330, I = 1, M + B( I, J ) = B( I, J ) - TEMP*B( I, K ) + 330 CONTINUE + END IF + 340 CONTINUE + IF( ALPHA.NE.ONE )THEN + DO 350, I = 1, M + B( I, K ) = ALPHA*B( I, K ) + 350 CONTINUE + END IF + 360 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of STRSM . +* + END diff --git a/costa/native/external/blas/strsv.f b/costa/native/external/blas/strsv.f new file mode 100644 index 000000000..6fb05aafe --- /dev/null +++ b/costa/native/external/blas/strsv.f @@ -0,0 +1,289 @@ + SUBROUTINE STRSV ( UPLO, TRANS, DIAG, N, A, LDA, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, LDA, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + REAL A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* STRSV solves one of the systems of equations +* +* A*x = b, or A'*x = b, +* +* where b and x are n element vectors and A is an n by n unit, or +* non-unit, upper or lower triangular matrix. +* +* No test for singularity or near-singularity is included in this +* routine. Such tests must be performed before calling this routine. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the equations to be solved as +* follows: +* +* TRANS = 'N' or 'n' A*x = b. +* +* TRANS = 'T' or 't' A'*x = b. +* +* TRANS = 'C' or 'c' A'*x = b. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* A - REAL array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* X - REAL array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element right-hand side vector b. On exit, X is overwritten +* with the solution vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. Local Scalars .. + REAL TEMP + INTEGER I, INFO, IX, J, JX, KX + LOGICAL NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 6 + ELSE IF( INCX.EQ.0 )THEN + INFO = 8 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'STRSV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOUNIT = LSAME( DIAG, 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x := inv( A )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + IF( INCX.EQ.1 )THEN + DO 20, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( J ) = X( J )/A( J, J ) + TEMP = X( J ) + DO 10, I = J - 1, 1, -1 + X( I ) = X( I ) - TEMP*A( I, J ) + 10 CONTINUE + END IF + 20 CONTINUE + ELSE + JX = KX + ( N - 1 )*INCX + DO 40, J = N, 1, -1 + IF( X( JX ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( JX ) = X( JX )/A( J, J ) + TEMP = X( JX ) + IX = JX + DO 30, I = J - 1, 1, -1 + IX = IX - INCX + X( IX ) = X( IX ) - TEMP*A( I, J ) + 30 CONTINUE + END IF + JX = JX - INCX + 40 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( J ) = X( J )/A( J, J ) + TEMP = X( J ) + DO 50, I = J + 1, N + X( I ) = X( I ) - TEMP*A( I, J ) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE + JX = KX + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( JX ) = X( JX )/A( J, J ) + TEMP = X( JX ) + IX = JX + DO 70, I = J + 1, N + IX = IX + INCX + X( IX ) = X( IX ) - TEMP*A( I, J ) + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := inv( A' )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + IF( INCX.EQ.1 )THEN + DO 100, J = 1, N + TEMP = X( J ) + DO 90, I = 1, J - 1 + TEMP = TEMP - A( I, J )*X( I ) + 90 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( J, J ) + X( J ) = TEMP + 100 CONTINUE + ELSE + JX = KX + DO 120, J = 1, N + TEMP = X( JX ) + IX = KX + DO 110, I = 1, J - 1 + TEMP = TEMP - A( I, J )*X( IX ) + IX = IX + INCX + 110 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( J, J ) + X( JX ) = TEMP + JX = JX + INCX + 120 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 140, J = N, 1, -1 + TEMP = X( J ) + DO 130, I = N, J + 1, -1 + TEMP = TEMP - A( I, J )*X( I ) + 130 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( J, J ) + X( J ) = TEMP + 140 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 160, J = N, 1, -1 + TEMP = X( JX ) + IX = KX + DO 150, I = N, J + 1, -1 + TEMP = TEMP - A( I, J )*X( IX ) + IX = IX - INCX + 150 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( J, J ) + X( JX ) = TEMP + JX = JX - INCX + 160 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of STRSV . +* + END diff --git a/costa/native/external/blas/xerbla.f b/costa/native/external/blas/xerbla.f new file mode 100644 index 000000000..18100082c --- /dev/null +++ b/costa/native/external/blas/xerbla.f @@ -0,0 +1,43 @@ + SUBROUTINE XERBLA( SRNAME, INFO ) +* +* -- LAPACK auxiliary routine (preliminary version) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER*6 SRNAME + INTEGER INFO +* .. +* +* Purpose +* ======= +* +* XERBLA is an error handler for the LAPACK routines. +* It is called by an LAPACK routine if an input parameter has an +* invalid value. A message is printed and execution stops. +* +* Installers may consider modifying the STOP statement in order to +* call system-specific exception-handling facilities. +* +* Arguments +* ========= +* +* SRNAME (input) CHARACTER*6 +* The name of the routine which called XERBLA. +* +* INFO (input) INTEGER +* The position of the invalid parameter in the parameter list +* of the calling routine. +* +* + WRITE( *, FMT = 9999 )SRNAME, INFO +* + STOP +* + 9999 FORMAT( ' ** On entry to ', A6, ' parameter number ', I2, ' had ', + $ 'an illegal value' ) +* +* End of XERBLA +* + END diff --git a/costa/native/external/blas/zaxpy.f b/costa/native/external/blas/zaxpy.f new file mode 100644 index 000000000..4fa3b1e43 --- /dev/null +++ b/costa/native/external/blas/zaxpy.f @@ -0,0 +1,34 @@ + subroutine zaxpy(n,za,zx,incx,zy,incy) +c +c constant times a vector plus a vector. +c jack dongarra, 3/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + double complex zx(*),zy(*),za + integer i,incx,incy,ix,iy,n + double precision dcabs1 + if(n.le.0)return + if (dcabs1(za) .eq. 0.0d0) return + if (incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments +c not equal to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + zy(iy) = zy(iy) + za*zx(ix) + ix = ix + incx + iy = iy + incy + 10 continue + return +c +c code for both increments equal to 1 +c + 20 do 30 i = 1,n + zy(i) = zy(i) + za*zx(i) + 30 continue + return + end diff --git a/costa/native/external/blas/zcopy.f b/costa/native/external/blas/zcopy.f new file mode 100644 index 000000000..9ccfa880f --- /dev/null +++ b/costa/native/external/blas/zcopy.f @@ -0,0 +1,33 @@ + subroutine zcopy(n,zx,incx,zy,incy) +c +c copies a vector, x, to a vector, y. +c jack dongarra, linpack, 4/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + double complex zx(*),zy(*) + integer i,incx,incy,ix,iy,n +c + if(n.le.0)return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments +c not equal to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + zy(iy) = zx(ix) + ix = ix + incx + iy = iy + incy + 10 continue + return +c +c code for both increments equal to 1 +c + 20 do 30 i = 1,n + zy(i) = zx(i) + 30 continue + return + end diff --git a/costa/native/external/blas/zdotc.f b/costa/native/external/blas/zdotc.f new file mode 100644 index 000000000..d6ac68537 --- /dev/null +++ b/costa/native/external/blas/zdotc.f @@ -0,0 +1,36 @@ + double complex function zdotc(n,zx,incx,zy,incy) +c +c forms the dot product of a vector. +c jack dongarra, 3/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + double complex zx(*),zy(*),ztemp + integer i,incx,incy,ix,iy,n + ztemp = (0.0d0,0.0d0) + zdotc = (0.0d0,0.0d0) + if(n.le.0)return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments +c not equal to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + ztemp = ztemp + dconjg(zx(ix))*zy(iy) + ix = ix + incx + iy = iy + incy + 10 continue + zdotc = ztemp + return +c +c code for both increments equal to 1 +c + 20 do 30 i = 1,n + ztemp = ztemp + dconjg(zx(i))*zy(i) + 30 continue + zdotc = ztemp + return + end diff --git a/costa/native/external/blas/zdotu.f b/costa/native/external/blas/zdotu.f new file mode 100644 index 000000000..329e98855 --- /dev/null +++ b/costa/native/external/blas/zdotu.f @@ -0,0 +1,36 @@ + double complex function zdotu(n,zx,incx,zy,incy) +c +c forms the dot product of two vectors. +c jack dongarra, 3/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + double complex zx(*),zy(*),ztemp + integer i,incx,incy,ix,iy,n + ztemp = (0.0d0,0.0d0) + zdotu = (0.0d0,0.0d0) + if(n.le.0)return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments +c not equal to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + ztemp = ztemp + zx(ix)*zy(iy) + ix = ix + incx + iy = iy + incy + 10 continue + zdotu = ztemp + return +c +c code for both increments equal to 1 +c + 20 do 30 i = 1,n + ztemp = ztemp + zx(i)*zy(i) + 30 continue + zdotu = ztemp + return + end diff --git a/costa/native/external/blas/zdscal.f b/costa/native/external/blas/zdscal.f new file mode 100644 index 000000000..8123424de --- /dev/null +++ b/costa/native/external/blas/zdscal.f @@ -0,0 +1,30 @@ + subroutine zdscal(n,da,zx,incx) +c +c scales a vector by a constant. +c jack dongarra, 3/11/78. +c modified 3/93 to return if incx .le. 0. +c modified 12/3/93, array(1) declarations changed to array(*) +c + double complex zx(*) + double precision da + integer i,incx,ix,n +c + if( n.le.0 .or. incx.le.0 )return + if(incx.eq.1)go to 20 +c +c code for increment not equal to 1 +c + ix = 1 + do 10 i = 1,n + zx(ix) = dcmplx(da,0.0d0)*zx(ix) + ix = ix + incx + 10 continue + return +c +c code for increment equal to 1 +c + 20 do 30 i = 1,n + zx(i) = dcmplx(da,0.0d0)*zx(i) + 30 continue + return + end diff --git a/costa/native/external/blas/zgbmv.f b/costa/native/external/blas/zgbmv.f new file mode 100644 index 000000000..91ce9a60b --- /dev/null +++ b/costa/native/external/blas/zgbmv.f @@ -0,0 +1,322 @@ + SUBROUTINE ZGBMV ( TRANS, M, N, KL, KU, ALPHA, A, LDA, X, INCX, + $ BETA, Y, INCY ) +* .. Scalar Arguments .. + COMPLEX*16 ALPHA, BETA + INTEGER INCX, INCY, KL, KU, LDA, M, N + CHARACTER*1 TRANS +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* ZGBMV performs one of the matrix-vector operations +* +* y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, or +* +* y := alpha*conjg( A' )*x + beta*y, +* +* where alpha and beta are scalars, x and y are vectors and A is an +* m by n band matrix, with kl sub-diagonals and ku super-diagonals. +* +* Parameters +* ========== +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' y := alpha*A*x + beta*y. +* +* TRANS = 'T' or 't' y := alpha*A'*x + beta*y. +* +* TRANS = 'C' or 'c' y := alpha*conjg( A' )*x + beta*y. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix A. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* KL - INTEGER. +* On entry, KL specifies the number of sub-diagonals of the +* matrix A. KL must satisfy 0 .le. KL. +* Unchanged on exit. +* +* KU - INTEGER. +* On entry, KU specifies the number of super-diagonals of the +* matrix A. KU must satisfy 0 .le. KU. +* Unchanged on exit. +* +* ALPHA - COMPLEX*16 . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, n ). +* Before entry, the leading ( kl + ku + 1 ) by n part of the +* array A must contain the matrix of coefficients, supplied +* column by column, with the leading diagonal of the matrix in +* row ( ku + 1 ) of the array, the first super-diagonal +* starting at position 2 in row ku, the first sub-diagonal +* starting at position 1 in row ( ku + 2 ), and so on. +* Elements in the array A that do not correspond to elements +* in the band matrix (such as the top left ku by ku triangle) +* are not referenced. +* The following program segment will transfer a band matrix +* from conventional full matrix storage to band storage: +* +* DO 20, J = 1, N +* K = KU + 1 - J +* DO 10, I = MAX( 1, J - KU ), MIN( M, J + KL ) +* A( K + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* ( kl + ku + 1 ). +* Unchanged on exit. +* +* X - COMPLEX*16 array of DIMENSION at least +* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' +* and at least +* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. +* Before entry, the incremented array X must contain the +* vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - COMPLEX*16 . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then Y need not be set on input. +* Unchanged on exit. +* +* Y - COMPLEX*16 array of DIMENSION at least +* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' +* and at least +* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. +* Before entry, the incremented array Y must contain the +* vector y. On exit, Y is overwritten by the updated vector y. +* +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. Local Scalars .. + COMPLEX*16 TEMP + INTEGER I, INFO, IX, IY, J, JX, JY, K, KUP1, KX, KY, + $ LENX, LENY + LOGICAL NOCONJ +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 1 + ELSE IF( M.LT.0 )THEN + INFO = 2 + ELSE IF( N.LT.0 )THEN + INFO = 3 + ELSE IF( KL.LT.0 )THEN + INFO = 4 + ELSE IF( KU.LT.0 )THEN + INFO = 5 + ELSE IF( LDA.LT.( KL + KU + 1 ) )THEN + INFO = 8 + ELSE IF( INCX.EQ.0 )THEN + INFO = 10 + ELSE IF( INCY.EQ.0 )THEN + INFO = 13 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZGBMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* + NOCONJ = LSAME( TRANS, 'T' ) +* +* Set LENX and LENY, the lengths of the vectors x and y, and set +* up the start points in X and Y. +* + IF( LSAME( TRANS, 'N' ) )THEN + LENX = N + LENY = M + ELSE + LENX = M + LENY = N + END IF + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( LENX - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( LENY - 1 )*INCY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the band part of A. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE )THEN + IF( INCY.EQ.1 )THEN + IF( BETA.EQ.ZERO )THEN + DO 10, I = 1, LENY + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20, I = 1, LENY + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO )THEN + DO 30, I = 1, LENY + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40, I = 1, LENY + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + KUP1 = KU + 1 + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form y := alpha*A*x + y. +* + JX = KX + IF( INCY.EQ.1 )THEN + DO 60, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + K = KUP1 - J + DO 50, I = MAX( 1, J - KU ), MIN( M, J + KL ) + Y( I ) = Y( I ) + TEMP*A( K + I, J ) + 50 CONTINUE + END IF + JX = JX + INCX + 60 CONTINUE + ELSE + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + IY = KY + K = KUP1 - J + DO 70, I = MAX( 1, J - KU ), MIN( M, J + KL ) + Y( IY ) = Y( IY ) + TEMP*A( K + I, J ) + IY = IY + INCY + 70 CONTINUE + END IF + JX = JX + INCX + IF( J.GT.KU ) + $ KY = KY + INCY + 80 CONTINUE + END IF + ELSE +* +* Form y := alpha*A'*x + y or y := alpha*conjg( A' )*x + y. +* + JY = KY + IF( INCX.EQ.1 )THEN + DO 110, J = 1, N + TEMP = ZERO + K = KUP1 - J + IF( NOCONJ )THEN + DO 90, I = MAX( 1, J - KU ), MIN( M, J + KL ) + TEMP = TEMP + A( K + I, J )*X( I ) + 90 CONTINUE + ELSE + DO 100, I = MAX( 1, J - KU ), MIN( M, J + KL ) + TEMP = TEMP + DCONJG( A( K + I, J ) )*X( I ) + 100 CONTINUE + END IF + Y( JY ) = Y( JY ) + ALPHA*TEMP + JY = JY + INCY + 110 CONTINUE + ELSE + DO 140, J = 1, N + TEMP = ZERO + IX = KX + K = KUP1 - J + IF( NOCONJ )THEN + DO 120, I = MAX( 1, J - KU ), MIN( M, J + KL ) + TEMP = TEMP + A( K + I, J )*X( IX ) + IX = IX + INCX + 120 CONTINUE + ELSE + DO 130, I = MAX( 1, J - KU ), MIN( M, J + KL ) + TEMP = TEMP + DCONJG( A( K + I, J ) )*X( IX ) + IX = IX + INCX + 130 CONTINUE + END IF + Y( JY ) = Y( JY ) + ALPHA*TEMP + JY = JY + INCY + IF( J.GT.KU ) + $ KX = KX + INCX + 140 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZGBMV . +* + END diff --git a/costa/native/external/blas/zgemm.f b/costa/native/external/blas/zgemm.f new file mode 100644 index 000000000..09cd151ee --- /dev/null +++ b/costa/native/external/blas/zgemm.f @@ -0,0 +1,415 @@ + SUBROUTINE ZGEMM ( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, + $ BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER*1 TRANSA, TRANSB + INTEGER M, N, K, LDA, LDB, LDC + COMPLEX*16 ALPHA, BETA +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* ZGEMM performs one of the matrix-matrix operations +* +* C := alpha*op( A )*op( B ) + beta*C, +* +* where op( X ) is one of +* +* op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ), +* +* alpha and beta are scalars, and A, B and C are matrices, with op( A ) +* an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. +* +* Parameters +* ========== +* +* TRANSA - CHARACTER*1. +* On entry, TRANSA specifies the form of op( A ) to be used in +* the matrix multiplication as follows: +* +* TRANSA = 'N' or 'n', op( A ) = A. +* +* TRANSA = 'T' or 't', op( A ) = A'. +* +* TRANSA = 'C' or 'c', op( A ) = conjg( A' ). +* +* Unchanged on exit. +* +* TRANSB - CHARACTER*1. +* On entry, TRANSB specifies the form of op( B ) to be used in +* the matrix multiplication as follows: +* +* TRANSB = 'N' or 'n', op( B ) = B. +* +* TRANSB = 'T' or 't', op( B ) = B'. +* +* TRANSB = 'C' or 'c', op( B ) = conjg( B' ). +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix +* op( A ) and of the matrix C. M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix +* op( B ) and the number of columns of the matrix C. N must be +* at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry, K specifies the number of columns of the matrix +* op( A ) and the number of rows of the matrix op( B ). K must +* be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX*16 . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is +* k when TRANSA = 'N' or 'n', and is m otherwise. +* Before entry with TRANSA = 'N' or 'n', the leading m by k +* part of the array A must contain the matrix A, otherwise +* the leading k by m part of the array A must contain the +* matrix A. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When TRANSA = 'N' or 'n' then +* LDA must be at least max( 1, m ), otherwise LDA must be at +* least max( 1, k ). +* Unchanged on exit. +* +* B - COMPLEX*16 array of DIMENSION ( LDB, kb ), where kb is +* n when TRANSB = 'N' or 'n', and is k otherwise. +* Before entry with TRANSB = 'N' or 'n', the leading k by n +* part of the array B must contain the matrix B, otherwise +* the leading n by k part of the array B must contain the +* matrix B. +* Unchanged on exit. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. When TRANSB = 'N' or 'n' then +* LDB must be at least max( 1, k ), otherwise LDB must be at +* least max( 1, n ). +* Unchanged on exit. +* +* BETA - COMPLEX*16 . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then C need not be set on input. +* Unchanged on exit. +* +* C - COMPLEX*16 array of DIMENSION ( LDC, n ). +* Before entry, the leading m by n part of the array C must +* contain the matrix C, except when beta is zero, in which +* case C need not be set on entry. +* On exit, the array C is overwritten by the m by n matrix +* ( alpha*op( A )*op( B ) + beta*C ). +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX +* .. Local Scalars .. + LOGICAL CONJA, CONJB, NOTA, NOTB + INTEGER I, INFO, J, L, NCOLA, NROWA, NROWB + COMPLEX*16 TEMP +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Executable Statements .. +* +* Set NOTA and NOTB as true if A and B respectively are not +* conjugated or transposed, set CONJA and CONJB as true if A and +* B respectively are to be transposed but not conjugated and set +* NROWA, NCOLA and NROWB as the number of rows and columns of A +* and the number of rows of B respectively. +* + NOTA = LSAME( TRANSA, 'N' ) + NOTB = LSAME( TRANSB, 'N' ) + CONJA = LSAME( TRANSA, 'C' ) + CONJB = LSAME( TRANSB, 'C' ) + IF( NOTA )THEN + NROWA = M + NCOLA = K + ELSE + NROWA = K + NCOLA = M + END IF + IF( NOTB )THEN + NROWB = K + ELSE + NROWB = N + END IF +* +* Test the input parameters. +* + INFO = 0 + IF( ( .NOT.NOTA ).AND. + $ ( .NOT.CONJA ).AND. + $ ( .NOT.LSAME( TRANSA, 'T' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.NOTB ).AND. + $ ( .NOT.CONJB ).AND. + $ ( .NOT.LSAME( TRANSB, 'T' ) ) )THEN + INFO = 2 + ELSE IF( M .LT.0 )THEN + INFO = 3 + ELSE IF( N .LT.0 )THEN + INFO = 4 + ELSE IF( K .LT.0 )THEN + INFO = 5 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 8 + ELSE IF( LDB.LT.MAX( 1, NROWB ) )THEN + INFO = 10 + ELSE IF( LDC.LT.MAX( 1, M ) )THEN + INFO = 13 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZGEMM ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + IF( BETA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, M + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40, J = 1, N + DO 30, I = 1, M + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + RETURN + END IF +* +* Start the operations. +* + IF( NOTB )THEN + IF( NOTA )THEN +* +* Form C := alpha*A*B + beta*C. +* + DO 90, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 50, I = 1, M + C( I, J ) = ZERO + 50 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 60, I = 1, M + C( I, J ) = BETA*C( I, J ) + 60 CONTINUE + END IF + DO 80, L = 1, K + IF( B( L, J ).NE.ZERO )THEN + TEMP = ALPHA*B( L, J ) + DO 70, I = 1, M + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 70 CONTINUE + END IF + 80 CONTINUE + 90 CONTINUE + ELSE IF( CONJA )THEN +* +* Form C := alpha*conjg( A' )*B + beta*C. +* + DO 120, J = 1, N + DO 110, I = 1, M + TEMP = ZERO + DO 100, L = 1, K + TEMP = TEMP + DCONJG( A( L, I ) )*B( L, J ) + 100 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 110 CONTINUE + 120 CONTINUE + ELSE +* +* Form C := alpha*A'*B + beta*C +* + DO 150, J = 1, N + DO 140, I = 1, M + TEMP = ZERO + DO 130, L = 1, K + TEMP = TEMP + A( L, I )*B( L, J ) + 130 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 140 CONTINUE + 150 CONTINUE + END IF + ELSE IF( NOTA )THEN + IF( CONJB )THEN +* +* Form C := alpha*A*conjg( B' ) + beta*C. +* + DO 200, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 160, I = 1, M + C( I, J ) = ZERO + 160 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 170, I = 1, M + C( I, J ) = BETA*C( I, J ) + 170 CONTINUE + END IF + DO 190, L = 1, K + IF( B( J, L ).NE.ZERO )THEN + TEMP = ALPHA*DCONJG( B( J, L ) ) + DO 180, I = 1, M + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 180 CONTINUE + END IF + 190 CONTINUE + 200 CONTINUE + ELSE +* +* Form C := alpha*A*B' + beta*C +* + DO 250, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 210, I = 1, M + C( I, J ) = ZERO + 210 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 220, I = 1, M + C( I, J ) = BETA*C( I, J ) + 220 CONTINUE + END IF + DO 240, L = 1, K + IF( B( J, L ).NE.ZERO )THEN + TEMP = ALPHA*B( J, L ) + DO 230, I = 1, M + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 230 CONTINUE + END IF + 240 CONTINUE + 250 CONTINUE + END IF + ELSE IF( CONJA )THEN + IF( CONJB )THEN +* +* Form C := alpha*conjg( A' )*conjg( B' ) + beta*C. +* + DO 280, J = 1, N + DO 270, I = 1, M + TEMP = ZERO + DO 260, L = 1, K + TEMP = TEMP + + $ DCONJG( A( L, I ) )*DCONJG( B( J, L ) ) + 260 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 270 CONTINUE + 280 CONTINUE + ELSE +* +* Form C := alpha*conjg( A' )*B' + beta*C +* + DO 310, J = 1, N + DO 300, I = 1, M + TEMP = ZERO + DO 290, L = 1, K + TEMP = TEMP + DCONJG( A( L, I ) )*B( J, L ) + 290 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 300 CONTINUE + 310 CONTINUE + END IF + ELSE + IF( CONJB )THEN +* +* Form C := alpha*A'*conjg( B' ) + beta*C +* + DO 340, J = 1, N + DO 330, I = 1, M + TEMP = ZERO + DO 320, L = 1, K + TEMP = TEMP + A( L, I )*DCONJG( B( J, L ) ) + 320 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 330 CONTINUE + 340 CONTINUE + ELSE +* +* Form C := alpha*A'*B' + beta*C +* + DO 370, J = 1, N + DO 360, I = 1, M + TEMP = ZERO + DO 350, L = 1, K + TEMP = TEMP + A( L, I )*B( J, L ) + 350 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 360 CONTINUE + 370 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZGEMM . +* + END diff --git a/costa/native/external/blas/zgemv.f b/costa/native/external/blas/zgemv.f new file mode 100644 index 000000000..014a5e02b --- /dev/null +++ b/costa/native/external/blas/zgemv.f @@ -0,0 +1,281 @@ + SUBROUTINE ZGEMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX, + $ BETA, Y, INCY ) +* .. Scalar Arguments .. + COMPLEX*16 ALPHA, BETA + INTEGER INCX, INCY, LDA, M, N + CHARACTER*1 TRANS +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* ZGEMV performs one of the matrix-vector operations +* +* y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, or +* +* y := alpha*conjg( A' )*x + beta*y, +* +* where alpha and beta are scalars, x and y are vectors and A is an +* m by n matrix. +* +* Parameters +* ========== +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' y := alpha*A*x + beta*y. +* +* TRANS = 'T' or 't' y := alpha*A'*x + beta*y. +* +* TRANS = 'C' or 'c' y := alpha*conjg( A' )*x + beta*y. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix A. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX*16 . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, n ). +* Before entry, the leading m by n part of the array A must +* contain the matrix of coefficients. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, m ). +* Unchanged on exit. +* +* X - COMPLEX*16 array of DIMENSION at least +* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' +* and at least +* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. +* Before entry, the incremented array X must contain the +* vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - COMPLEX*16 . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then Y need not be set on input. +* Unchanged on exit. +* +* Y - COMPLEX*16 array of DIMENSION at least +* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' +* and at least +* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. +* Before entry with BETA non-zero, the incremented array Y +* must contain the vector y. On exit, Y is overwritten by the +* updated vector y. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. Local Scalars .. + COMPLEX*16 TEMP + INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY, LENX, LENY + LOGICAL NOCONJ +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 1 + ELSE IF( M.LT.0 )THEN + INFO = 2 + ELSE IF( N.LT.0 )THEN + INFO = 3 + ELSE IF( LDA.LT.MAX( 1, M ) )THEN + INFO = 6 + ELSE IF( INCX.EQ.0 )THEN + INFO = 8 + ELSE IF( INCY.EQ.0 )THEN + INFO = 11 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZGEMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* + NOCONJ = LSAME( TRANS, 'T' ) +* +* Set LENX and LENY, the lengths of the vectors x and y, and set +* up the start points in X and Y. +* + IF( LSAME( TRANS, 'N' ) )THEN + LENX = N + LENY = M + ELSE + LENX = M + LENY = N + END IF + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( LENX - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( LENY - 1 )*INCY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE )THEN + IF( INCY.EQ.1 )THEN + IF( BETA.EQ.ZERO )THEN + DO 10, I = 1, LENY + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20, I = 1, LENY + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO )THEN + DO 30, I = 1, LENY + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40, I = 1, LENY + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form y := alpha*A*x + y. +* + JX = KX + IF( INCY.EQ.1 )THEN + DO 60, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + DO 50, I = 1, M + Y( I ) = Y( I ) + TEMP*A( I, J ) + 50 CONTINUE + END IF + JX = JX + INCX + 60 CONTINUE + ELSE + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*X( JX ) + IY = KY + DO 70, I = 1, M + Y( IY ) = Y( IY ) + TEMP*A( I, J ) + IY = IY + INCY + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + ELSE +* +* Form y := alpha*A'*x + y or y := alpha*conjg( A' )*x + y. +* + JY = KY + IF( INCX.EQ.1 )THEN + DO 110, J = 1, N + TEMP = ZERO + IF( NOCONJ )THEN + DO 90, I = 1, M + TEMP = TEMP + A( I, J )*X( I ) + 90 CONTINUE + ELSE + DO 100, I = 1, M + TEMP = TEMP + DCONJG( A( I, J ) )*X( I ) + 100 CONTINUE + END IF + Y( JY ) = Y( JY ) + ALPHA*TEMP + JY = JY + INCY + 110 CONTINUE + ELSE + DO 140, J = 1, N + TEMP = ZERO + IX = KX + IF( NOCONJ )THEN + DO 120, I = 1, M + TEMP = TEMP + A( I, J )*X( IX ) + IX = IX + INCX + 120 CONTINUE + ELSE + DO 130, I = 1, M + TEMP = TEMP + DCONJG( A( I, J ) )*X( IX ) + IX = IX + INCX + 130 CONTINUE + END IF + Y( JY ) = Y( JY ) + ALPHA*TEMP + JY = JY + INCY + 140 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZGEMV . +* + END diff --git a/costa/native/external/blas/zgerc.f b/costa/native/external/blas/zgerc.f new file mode 100644 index 000000000..968c5b472 --- /dev/null +++ b/costa/native/external/blas/zgerc.f @@ -0,0 +1,157 @@ + SUBROUTINE ZGERC ( M, N, ALPHA, X, INCX, Y, INCY, A, LDA ) +* .. Scalar Arguments .. + COMPLEX*16 ALPHA + INTEGER INCX, INCY, LDA, M, N +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* ZGERC performs the rank 1 operation +* +* A := alpha*x*conjg( y' ) + A, +* +* where alpha is a scalar, x is an m element vector, y is an n element +* vector and A is an m by n matrix. +* +* Parameters +* ========== +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix A. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX*16 . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - COMPLEX*16 array of dimension at least +* ( 1 + ( m - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the m +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* Y - COMPLEX*16 array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. +* Unchanged on exit. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, n ). +* Before entry, the leading m by n part of the array A must +* contain the matrix of coefficients. On exit, A is +* overwritten by the updated matrix. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. Local Scalars .. + COMPLEX*16 TEMP + INTEGER I, INFO, IX, J, JY, KX +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( M.LT.0 )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + ELSE IF( INCY.EQ.0 )THEN + INFO = 7 + ELSE IF( LDA.LT.MAX( 1, M ) )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZGERC ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF( INCY.GT.0 )THEN + JY = 1 + ELSE + JY = 1 - ( N - 1 )*INCY + END IF + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( Y( JY ).NE.ZERO )THEN + TEMP = ALPHA*DCONJG( Y( JY ) ) + DO 10, I = 1, M + A( I, J ) = A( I, J ) + X( I )*TEMP + 10 CONTINUE + END IF + JY = JY + INCY + 20 CONTINUE + ELSE + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( M - 1 )*INCX + END IF + DO 40, J = 1, N + IF( Y( JY ).NE.ZERO )THEN + TEMP = ALPHA*DCONJG( Y( JY ) ) + IX = KX + DO 30, I = 1, M + A( I, J ) = A( I, J ) + X( IX )*TEMP + IX = IX + INCX + 30 CONTINUE + END IF + JY = JY + INCY + 40 CONTINUE + END IF +* + RETURN +* +* End of ZGERC . +* + END diff --git a/costa/native/external/blas/zgeru.f b/costa/native/external/blas/zgeru.f new file mode 100644 index 000000000..5283af641 --- /dev/null +++ b/costa/native/external/blas/zgeru.f @@ -0,0 +1,157 @@ + SUBROUTINE ZGERU ( M, N, ALPHA, X, INCX, Y, INCY, A, LDA ) +* .. Scalar Arguments .. + COMPLEX*16 ALPHA + INTEGER INCX, INCY, LDA, M, N +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* ZGERU performs the rank 1 operation +* +* A := alpha*x*y' + A, +* +* where alpha is a scalar, x is an m element vector, y is an n element +* vector and A is an m by n matrix. +* +* Parameters +* ========== +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix A. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX*16 . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - COMPLEX*16 array of dimension at least +* ( 1 + ( m - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the m +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* Y - COMPLEX*16 array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. +* Unchanged on exit. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, n ). +* Before entry, the leading m by n part of the array A must +* contain the matrix of coefficients. On exit, A is +* overwritten by the updated matrix. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. Local Scalars .. + COMPLEX*16 TEMP + INTEGER I, INFO, IX, J, JY, KX +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( M.LT.0 )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + ELSE IF( INCY.EQ.0 )THEN + INFO = 7 + ELSE IF( LDA.LT.MAX( 1, M ) )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZGERU ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF( INCY.GT.0 )THEN + JY = 1 + ELSE + JY = 1 - ( N - 1 )*INCY + END IF + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( Y( JY ).NE.ZERO )THEN + TEMP = ALPHA*Y( JY ) + DO 10, I = 1, M + A( I, J ) = A( I, J ) + X( I )*TEMP + 10 CONTINUE + END IF + JY = JY + INCY + 20 CONTINUE + ELSE + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( M - 1 )*INCX + END IF + DO 40, J = 1, N + IF( Y( JY ).NE.ZERO )THEN + TEMP = ALPHA*Y( JY ) + IX = KX + DO 30, I = 1, M + A( I, J ) = A( I, J ) + X( IX )*TEMP + IX = IX + INCX + 30 CONTINUE + END IF + JY = JY + INCY + 40 CONTINUE + END IF +* + RETURN +* +* End of ZGERU . +* + END diff --git a/costa/native/external/blas/zhbmv.f b/costa/native/external/blas/zhbmv.f new file mode 100644 index 000000000..1c0449362 --- /dev/null +++ b/costa/native/external/blas/zhbmv.f @@ -0,0 +1,309 @@ + SUBROUTINE ZHBMV ( UPLO, N, K, ALPHA, A, LDA, X, INCX, + $ BETA, Y, INCY ) +* .. Scalar Arguments .. + COMPLEX*16 ALPHA, BETA + INTEGER INCX, INCY, K, LDA, N + CHARACTER*1 UPLO +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* ZHBMV performs the matrix-vector operation +* +* y := alpha*A*x + beta*y, +* +* where alpha and beta are scalars, x and y are n element vectors and +* A is an n by n hermitian band matrix, with k super-diagonals. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the band matrix A is being supplied as +* follows: +* +* UPLO = 'U' or 'u' The upper triangular part of A is +* being supplied. +* +* UPLO = 'L' or 'l' The lower triangular part of A is +* being supplied. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry, K specifies the number of super-diagonals of the +* matrix A. K must satisfy 0 .le. K. +* Unchanged on exit. +* +* ALPHA - COMPLEX*16 . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) +* by n part of the array A must contain the upper triangular +* band part of the hermitian matrix, supplied column by +* column, with the leading diagonal of the matrix in row +* ( k + 1 ) of the array, the first super-diagonal starting at +* position 2 in row k, and so on. The top left k by k triangle +* of the array A is not referenced. +* The following program segment will transfer the upper +* triangular part of a hermitian band matrix from conventional +* full matrix storage to band storage: +* +* DO 20, J = 1, N +* M = K + 1 - J +* DO 10, I = MAX( 1, J - K ), J +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) +* by n part of the array A must contain the lower triangular +* band part of the hermitian matrix, supplied column by +* column, with the leading diagonal of the matrix in row 1 of +* the array, the first sub-diagonal starting at position 1 in +* row 2, and so on. The bottom right k by k triangle of the +* array A is not referenced. +* The following program segment will transfer the lower +* triangular part of a hermitian band matrix from conventional +* full matrix storage to band storage: +* +* DO 20, J = 1, N +* M = 1 - J +* DO 10, I = J, MIN( N, J + K ) +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Note that the imaginary parts of the diagonal elements need +* not be set and are assumed to be zero. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* ( k + 1 ). +* Unchanged on exit. +* +* X - COMPLEX*16 array of DIMENSION at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the +* vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - COMPLEX*16 . +* On entry, BETA specifies the scalar beta. +* Unchanged on exit. +* +* Y - COMPLEX*16 array of DIMENSION at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the +* vector y. On exit, Y is overwritten by the updated vector y. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. Local Scalars .. + COMPLEX*16 TEMP1, TEMP2 + INTEGER I, INFO, IX, IY, J, JX, JY, KPLUS1, KX, KY, L +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX, MIN, DBLE +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( K.LT.0 )THEN + INFO = 3 + ELSE IF( LDA.LT.( K + 1 ) )THEN + INFO = 6 + ELSE IF( INCX.EQ.0 )THEN + INFO = 8 + ELSE IF( INCY.EQ.0 )THEN + INFO = 11 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZHBMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set up the start points in X and Y. +* + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF +* +* Start the operations. In this version the elements of the array A +* are accessed sequentially with one pass through A. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE )THEN + IF( INCY.EQ.1 )THEN + IF( BETA.EQ.ZERO )THEN + DO 10, I = 1, N + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20, I = 1, N + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO )THEN + DO 30, I = 1, N + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40, I = 1, N + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form y when upper triangle of A is stored. +* + KPLUS1 = K + 1 + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 60, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + L = KPLUS1 - J + DO 50, I = MAX( 1, J - K ), J - 1 + Y( I ) = Y( I ) + TEMP1*A( L + I, J ) + TEMP2 = TEMP2 + DCONJG( A( L + I, J ) )*X( I ) + 50 CONTINUE + Y( J ) = Y( J ) + TEMP1*DBLE( A( KPLUS1, J ) ) + $ + ALPHA*TEMP2 + 60 CONTINUE + ELSE + JX = KX + JY = KY + DO 80, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + IX = KX + IY = KY + L = KPLUS1 - J + DO 70, I = MAX( 1, J - K ), J - 1 + Y( IY ) = Y( IY ) + TEMP1*A( L + I, J ) + TEMP2 = TEMP2 + DCONJG( A( L + I, J ) )*X( IX ) + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + Y( JY ) = Y( JY ) + TEMP1*DBLE( A( KPLUS1, J ) ) + $ + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + IF( J.GT.K )THEN + KX = KX + INCX + KY = KY + INCY + END IF + 80 CONTINUE + END IF + ELSE +* +* Form y when lower triangle of A is stored. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 100, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + Y( J ) = Y( J ) + TEMP1*DBLE( A( 1, J ) ) + L = 1 - J + DO 90, I = J + 1, MIN( N, J + K ) + Y( I ) = Y( I ) + TEMP1*A( L + I, J ) + TEMP2 = TEMP2 + DCONJG( A( L + I, J ) )*X( I ) + 90 CONTINUE + Y( J ) = Y( J ) + ALPHA*TEMP2 + 100 CONTINUE + ELSE + JX = KX + JY = KY + DO 120, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + Y( JY ) = Y( JY ) + TEMP1*DBLE( A( 1, J ) ) + L = 1 - J + IX = JX + IY = JY + DO 110, I = J + 1, MIN( N, J + K ) + IX = IX + INCX + IY = IY + INCY + Y( IY ) = Y( IY ) + TEMP1*A( L + I, J ) + TEMP2 = TEMP2 + DCONJG( A( L + I, J ) )*X( IX ) + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZHBMV . +* + END diff --git a/costa/native/external/blas/zhemm.f b/costa/native/external/blas/zhemm.f new file mode 100644 index 000000000..d3912c084 --- /dev/null +++ b/costa/native/external/blas/zhemm.f @@ -0,0 +1,304 @@ + SUBROUTINE ZHEMM ( SIDE, UPLO, M, N, ALPHA, A, LDA, B, LDB, + $ BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER*1 SIDE, UPLO + INTEGER M, N, LDA, LDB, LDC + COMPLEX*16 ALPHA, BETA +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* ZHEMM performs one of the matrix-matrix operations +* +* C := alpha*A*B + beta*C, +* +* or +* +* C := alpha*B*A + beta*C, +* +* where alpha and beta are scalars, A is an hermitian matrix and B and +* C are m by n matrices. +* +* Parameters +* ========== +* +* SIDE - CHARACTER*1. +* On entry, SIDE specifies whether the hermitian matrix A +* appears on the left or right in the operation as follows: +* +* SIDE = 'L' or 'l' C := alpha*A*B + beta*C, +* +* SIDE = 'R' or 'r' C := alpha*B*A + beta*C, +* +* Unchanged on exit. +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the hermitian matrix A is to be +* referenced as follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of the +* hermitian matrix is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of the +* hermitian matrix is to be referenced. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix C. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix C. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX*16 . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is +* m when SIDE = 'L' or 'l' and is n otherwise. +* Before entry with SIDE = 'L' or 'l', the m by m part of +* the array A must contain the hermitian matrix, such that +* when UPLO = 'U' or 'u', the leading m by m upper triangular +* part of the array A must contain the upper triangular part +* of the hermitian matrix and the strictly lower triangular +* part of A is not referenced, and when UPLO = 'L' or 'l', +* the leading m by m lower triangular part of the array A +* must contain the lower triangular part of the hermitian +* matrix and the strictly upper triangular part of A is not +* referenced. +* Before entry with SIDE = 'R' or 'r', the n by n part of +* the array A must contain the hermitian matrix, such that +* when UPLO = 'U' or 'u', the leading n by n upper triangular +* part of the array A must contain the upper triangular part +* of the hermitian matrix and the strictly lower triangular +* part of A is not referenced, and when UPLO = 'L' or 'l', +* the leading n by n lower triangular part of the array A +* must contain the lower triangular part of the hermitian +* matrix and the strictly upper triangular part of A is not +* referenced. +* Note that the imaginary parts of the diagonal elements need +* not be set, they are assumed to be zero. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When SIDE = 'L' or 'l' then +* LDA must be at least max( 1, m ), otherwise LDA must be at +* least max( 1, n ). +* Unchanged on exit. +* +* B - COMPLEX*16 array of DIMENSION ( LDB, n ). +* Before entry, the leading m by n part of the array B must +* contain the matrix B. +* Unchanged on exit. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. LDB must be at least +* max( 1, m ). +* Unchanged on exit. +* +* BETA - COMPLEX*16 . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then C need not be set on input. +* Unchanged on exit. +* +* C - COMPLEX*16 array of DIMENSION ( LDC, n ). +* Before entry, the leading m by n part of the array C must +* contain the matrix C, except when beta is zero, in which +* case C need not be set on entry. +* On exit, the array C is overwritten by the m by n updated +* matrix. +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX, DBLE +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, INFO, J, K, NROWA + COMPLEX*16 TEMP1, TEMP2 +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Executable Statements .. +* +* Set NROWA as the number of rows of A. +* + IF( LSAME( SIDE, 'L' ) )THEN + NROWA = M + ELSE + NROWA = N + END IF + UPPER = LSAME( UPLO, 'U' ) +* +* Test the input parameters. +* + INFO = 0 + IF( ( .NOT.LSAME( SIDE, 'L' ) ).AND. + $ ( .NOT.LSAME( SIDE, 'R' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO, 'L' ) ) )THEN + INFO = 2 + ELSE IF( M .LT.0 )THEN + INFO = 3 + ELSE IF( N .LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 7 + ELSE IF( LDB.LT.MAX( 1, M ) )THEN + INFO = 9 + ELSE IF( LDC.LT.MAX( 1, M ) )THEN + INFO = 12 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZHEMM ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + IF( BETA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, M + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40, J = 1, N + DO 30, I = 1, M + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + RETURN + END IF +* +* Start the operations. +* + IF( LSAME( SIDE, 'L' ) )THEN +* +* Form C := alpha*A*B + beta*C. +* + IF( UPPER )THEN + DO 70, J = 1, N + DO 60, I = 1, M + TEMP1 = ALPHA*B( I, J ) + TEMP2 = ZERO + DO 50, K = 1, I - 1 + C( K, J ) = C( K, J ) + TEMP1*A( K, I ) + TEMP2 = TEMP2 + + $ B( K, J )*DCONJG( A( K, I ) ) + 50 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = TEMP1*DBLE( A( I, I ) ) + + $ ALPHA*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ TEMP1*DBLE( A( I, I ) ) + + $ ALPHA*TEMP2 + END IF + 60 CONTINUE + 70 CONTINUE + ELSE + DO 100, J = 1, N + DO 90, I = M, 1, -1 + TEMP1 = ALPHA*B( I, J ) + TEMP2 = ZERO + DO 80, K = I + 1, M + C( K, J ) = C( K, J ) + TEMP1*A( K, I ) + TEMP2 = TEMP2 + + $ B( K, J )*DCONJG( A( K, I ) ) + 80 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = TEMP1*DBLE( A( I, I ) ) + + $ ALPHA*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ TEMP1*DBLE( A( I, I ) ) + + $ ALPHA*TEMP2 + END IF + 90 CONTINUE + 100 CONTINUE + END IF + ELSE +* +* Form C := alpha*B*A + beta*C. +* + DO 170, J = 1, N + TEMP1 = ALPHA*DBLE( A( J, J ) ) + IF( BETA.EQ.ZERO )THEN + DO 110, I = 1, M + C( I, J ) = TEMP1*B( I, J ) + 110 CONTINUE + ELSE + DO 120, I = 1, M + C( I, J ) = BETA*C( I, J ) + TEMP1*B( I, J ) + 120 CONTINUE + END IF + DO 140, K = 1, J - 1 + IF( UPPER )THEN + TEMP1 = ALPHA*A( K, J ) + ELSE + TEMP1 = ALPHA*DCONJG( A( J, K ) ) + END IF + DO 130, I = 1, M + C( I, J ) = C( I, J ) + TEMP1*B( I, K ) + 130 CONTINUE + 140 CONTINUE + DO 160, K = J + 1, N + IF( UPPER )THEN + TEMP1 = ALPHA*DCONJG( A( J, K ) ) + ELSE + TEMP1 = ALPHA*A( K, J ) + END IF + DO 150, I = 1, M + C( I, J ) = C( I, J ) + TEMP1*B( I, K ) + 150 CONTINUE + 160 CONTINUE + 170 CONTINUE + END IF +* + RETURN +* +* End of ZHEMM . +* + END diff --git a/costa/native/external/blas/zhemv.f b/costa/native/external/blas/zhemv.f new file mode 100644 index 000000000..54aa7b900 --- /dev/null +++ b/costa/native/external/blas/zhemv.f @@ -0,0 +1,266 @@ + SUBROUTINE ZHEMV ( UPLO, N, ALPHA, A, LDA, X, INCX, + $ BETA, Y, INCY ) +* .. Scalar Arguments .. + COMPLEX*16 ALPHA, BETA + INTEGER INCX, INCY, LDA, N + CHARACTER*1 UPLO +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* ZHEMV performs the matrix-vector operation +* +* y := alpha*A*x + beta*y, +* +* where alpha and beta are scalars, x and y are n element vectors and +* A is an n by n hermitian matrix. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array A is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of A +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of A +* is to be referenced. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX*16 . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular part of the hermitian matrix and the strictly +* lower triangular part of A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular part of the hermitian matrix and the strictly +* upper triangular part of A is not referenced. +* Note that the imaginary parts of the diagonal elements need +* not be set and are assumed to be zero. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* X - COMPLEX*16 array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - COMPLEX*16 . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then Y need not be set on input. +* Unchanged on exit. +* +* Y - COMPLEX*16 array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. On exit, Y is overwritten by the updated +* vector y. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. Local Scalars .. + COMPLEX*16 TEMP1, TEMP2 + INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX, DBLE +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 5 + ELSE IF( INCX.EQ.0 )THEN + INFO = 7 + ELSE IF( INCY.EQ.0 )THEN + INFO = 10 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZHEMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set up the start points in X and Y. +* + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE )THEN + IF( INCY.EQ.1 )THEN + IF( BETA.EQ.ZERO )THEN + DO 10, I = 1, N + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20, I = 1, N + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO )THEN + DO 30, I = 1, N + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40, I = 1, N + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form y when A is stored in upper triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 60, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + DO 50, I = 1, J - 1 + Y( I ) = Y( I ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + DCONJG( A( I, J ) )*X( I ) + 50 CONTINUE + Y( J ) = Y( J ) + TEMP1*DBLE( A( J, J ) ) + ALPHA*TEMP2 + 60 CONTINUE + ELSE + JX = KX + JY = KY + DO 80, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + IX = KX + IY = KY + DO 70, I = 1, J - 1 + Y( IY ) = Y( IY ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + DCONJG( A( I, J ) )*X( IX ) + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + Y( JY ) = Y( JY ) + TEMP1*DBLE( A( J, J ) ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + 80 CONTINUE + END IF + ELSE +* +* Form y when A is stored in lower triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 100, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + Y( J ) = Y( J ) + TEMP1*DBLE( A( J, J ) ) + DO 90, I = J + 1, N + Y( I ) = Y( I ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + DCONJG( A( I, J ) )*X( I ) + 90 CONTINUE + Y( J ) = Y( J ) + ALPHA*TEMP2 + 100 CONTINUE + ELSE + JX = KX + JY = KY + DO 120, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + Y( JY ) = Y( JY ) + TEMP1*DBLE( A( J, J ) ) + IX = JX + IY = JY + DO 110, I = J + 1, N + IX = IX + INCX + IY = IY + INCY + Y( IY ) = Y( IY ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + DCONJG( A( I, J ) )*X( IX ) + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZHEMV . +* + END diff --git a/costa/native/external/blas/zher.f b/costa/native/external/blas/zher.f new file mode 100644 index 000000000..fcf40a5eb --- /dev/null +++ b/costa/native/external/blas/zher.f @@ -0,0 +1,212 @@ + SUBROUTINE ZHER ( UPLO, N, ALPHA, X, INCX, A, LDA ) +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA + INTEGER INCX, LDA, N + CHARACTER*1 UPLO +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* ZHER performs the hermitian rank 1 operation +* +* A := alpha*x*conjg( x' ) + A, +* +* where alpha is a real scalar, x is an n element vector and A is an +* n by n hermitian matrix. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array A is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of A +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of A +* is to be referenced. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - COMPLEX*16 array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular part of the hermitian matrix and the strictly +* lower triangular part of A is not referenced. On exit, the +* upper triangular part of the array A is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular part of the hermitian matrix and the strictly +* upper triangular part of A is not referenced. On exit, the +* lower triangular part of the array A is overwritten by the +* lower triangular part of the updated matrix. +* Note that the imaginary parts of the diagonal elements need +* not be set, they are assumed to be zero, and on exit they +* are set to zero. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. Local Scalars .. + COMPLEX*16 TEMP + INTEGER I, INFO, IX, J, JX, KX +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX, DBLE +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 7 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZHER ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ALPHA.EQ.DBLE( ZERO ) ) ) + $ RETURN +* +* Set the start point in X if the increment is not unity. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form A when A is stored in upper triangle. +* + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = ALPHA*DCONJG( X( J ) ) + DO 10, I = 1, J - 1 + A( I, J ) = A( I, J ) + X( I )*TEMP + 10 CONTINUE + A( J, J ) = DBLE( A( J, J ) ) + DBLE( X( J )*TEMP ) + ELSE + A( J, J ) = DBLE( A( J, J ) ) + END IF + 20 CONTINUE + ELSE + JX = KX + DO 40, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*DCONJG( X( JX ) ) + IX = KX + DO 30, I = 1, J - 1 + A( I, J ) = A( I, J ) + X( IX )*TEMP + IX = IX + INCX + 30 CONTINUE + A( J, J ) = DBLE( A( J, J ) ) + DBLE( X( JX )*TEMP ) + ELSE + A( J, J ) = DBLE( A( J, J ) ) + END IF + JX = JX + INCX + 40 CONTINUE + END IF + ELSE +* +* Form A when A is stored in lower triangle. +* + IF( INCX.EQ.1 )THEN + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = ALPHA*DCONJG( X( J ) ) + A( J, J ) = DBLE( A( J, J ) ) + DBLE( TEMP*X( J ) ) + DO 50, I = J + 1, N + A( I, J ) = A( I, J ) + X( I )*TEMP + 50 CONTINUE + ELSE + A( J, J ) = DBLE( A( J, J ) ) + END IF + 60 CONTINUE + ELSE + JX = KX + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*DCONJG( X( JX ) ) + A( J, J ) = DBLE( A( J, J ) ) + DBLE( TEMP*X( JX ) ) + IX = JX + DO 70, I = J + 1, N + IX = IX + INCX + A( I, J ) = A( I, J ) + X( IX )*TEMP + 70 CONTINUE + ELSE + A( J, J ) = DBLE( A( J, J ) ) + END IF + JX = JX + INCX + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZHER . +* + END diff --git a/costa/native/external/blas/zher2.f b/costa/native/external/blas/zher2.f new file mode 100644 index 000000000..06acdff7b --- /dev/null +++ b/costa/native/external/blas/zher2.f @@ -0,0 +1,249 @@ + SUBROUTINE ZHER2 ( UPLO, N, ALPHA, X, INCX, Y, INCY, A, LDA ) +* .. Scalar Arguments .. + COMPLEX*16 ALPHA + INTEGER INCX, INCY, LDA, N + CHARACTER*1 UPLO +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* ZHER2 performs the hermitian rank 2 operation +* +* A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A, +* +* where alpha is a scalar, x and y are n element vectors and A is an n +* by n hermitian matrix. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array A is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of A +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of A +* is to be referenced. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX*16 . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - COMPLEX*16 array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* Y - COMPLEX*16 array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. +* Unchanged on exit. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular part of the hermitian matrix and the strictly +* lower triangular part of A is not referenced. On exit, the +* upper triangular part of the array A is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular part of the hermitian matrix and the strictly +* upper triangular part of A is not referenced. On exit, the +* lower triangular part of the array A is overwritten by the +* lower triangular part of the updated matrix. +* Note that the imaginary parts of the diagonal elements need +* not be set, they are assumed to be zero, and on exit they +* are set to zero. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. Local Scalars .. + COMPLEX*16 TEMP1, TEMP2 + INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX, DBLE +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + ELSE IF( INCY.EQ.0 )THEN + INFO = 7 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZHER2 ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Set up the start points in X and Y if the increments are not both +* unity. +* + IF( ( INCX.NE.1 ).OR.( INCY.NE.1 ) )THEN + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF + JX = KX + JY = KY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form A when A is stored in the upper triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 20, J = 1, N + IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN + TEMP1 = ALPHA*DCONJG( Y( J ) ) + TEMP2 = DCONJG( ALPHA*X( J ) ) + DO 10, I = 1, J - 1 + A( I, J ) = A( I, J ) + X( I )*TEMP1 + Y( I )*TEMP2 + 10 CONTINUE + A( J, J ) = DBLE( A( J, J ) ) + + $ DBLE( X( J )*TEMP1 + Y( J )*TEMP2 ) + ELSE + A( J, J ) = DBLE( A( J, J ) ) + END IF + 20 CONTINUE + ELSE + DO 40, J = 1, N + IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN + TEMP1 = ALPHA*DCONJG( Y( JY ) ) + TEMP2 = DCONJG( ALPHA*X( JX ) ) + IX = KX + IY = KY + DO 30, I = 1, J - 1 + A( I, J ) = A( I, J ) + X( IX )*TEMP1 + $ + Y( IY )*TEMP2 + IX = IX + INCX + IY = IY + INCY + 30 CONTINUE + A( J, J ) = DBLE( A( J, J ) ) + + $ DBLE( X( JX )*TEMP1 + Y( JY )*TEMP2 ) + ELSE + A( J, J ) = DBLE( A( J, J ) ) + END IF + JX = JX + INCX + JY = JY + INCY + 40 CONTINUE + END IF + ELSE +* +* Form A when A is stored in the lower triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 60, J = 1, N + IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN + TEMP1 = ALPHA*DCONJG( Y( J ) ) + TEMP2 = DCONJG( ALPHA*X( J ) ) + A( J, J ) = DBLE( A( J, J ) ) + + $ DBLE( X( J )*TEMP1 + Y( J )*TEMP2 ) + DO 50, I = J + 1, N + A( I, J ) = A( I, J ) + X( I )*TEMP1 + Y( I )*TEMP2 + 50 CONTINUE + ELSE + A( J, J ) = DBLE( A( J, J ) ) + END IF + 60 CONTINUE + ELSE + DO 80, J = 1, N + IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN + TEMP1 = ALPHA*DCONJG( Y( JY ) ) + TEMP2 = DCONJG( ALPHA*X( JX ) ) + A( J, J ) = DBLE( A( J, J ) ) + + $ DBLE( X( JX )*TEMP1 + Y( JY )*TEMP2 ) + IX = JX + IY = JY + DO 70, I = J + 1, N + IX = IX + INCX + IY = IY + INCY + A( I, J ) = A( I, J ) + X( IX )*TEMP1 + $ + Y( IY )*TEMP2 + 70 CONTINUE + ELSE + A( J, J ) = DBLE( A( J, J ) ) + END IF + JX = JX + INCX + JY = JY + INCY + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZHER2 . +* + END diff --git a/costa/native/external/blas/zher2k.f b/costa/native/external/blas/zher2k.f new file mode 100644 index 000000000..408d75cf3 --- /dev/null +++ b/costa/native/external/blas/zher2k.f @@ -0,0 +1,372 @@ + SUBROUTINE ZHER2K( UPLO, TRANS, N, K, ALPHA, A, LDA, B, LDB, BETA, + $ C, LDC ) +* .. Scalar Arguments .. + CHARACTER TRANS, UPLO + INTEGER K, LDA, LDB, LDC, N + DOUBLE PRECISION BETA + COMPLEX*16 ALPHA +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* ZHER2K performs one of the hermitian rank 2k operations +* +* C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) + beta*C, +* +* or +* +* C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A + beta*C, +* +* where alpha and beta are scalars with beta real, C is an n by n +* hermitian matrix and A and B are n by k matrices in the first case +* and k by n matrices in the second case. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array C is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of C +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of C +* is to be referenced. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' C := alpha*A*conjg( B' ) + +* conjg( alpha )*B*conjg( A' ) + +* beta*C. +* +* TRANS = 'C' or 'c' C := alpha*conjg( A' )*B + +* conjg( alpha )*conjg( B' )*A + +* beta*C. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix C. N must be +* at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with TRANS = 'N' or 'n', K specifies the number +* of columns of the matrices A and B, and on entry with +* TRANS = 'C' or 'c', K specifies the number of rows of the +* matrices A and B. K must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX*16 . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array A must contain the matrix A, otherwise +* the leading k by n part of the array A must contain the +* matrix A. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDA must be at least max( 1, n ), otherwise LDA must +* be at least max( 1, k ). +* Unchanged on exit. +* +* B - COMPLEX*16 array of DIMENSION ( LDB, kb ), where kb is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array B must contain the matrix B, otherwise +* the leading k by n part of the array B must contain the +* matrix B. +* Unchanged on exit. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDB must be at least max( 1, n ), otherwise LDB must +* be at least max( 1, k ). +* Unchanged on exit. +* +* BETA - DOUBLE PRECISION . +* On entry, BETA specifies the scalar beta. +* Unchanged on exit. +* +* C - COMPLEX*16 array of DIMENSION ( LDC, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array C must contain the upper +* triangular part of the hermitian matrix and the strictly +* lower triangular part of C is not referenced. On exit, the +* upper triangular part of the array C is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array C must contain the lower +* triangular part of the hermitian matrix and the strictly +* upper triangular part of C is not referenced. On exit, the +* lower triangular part of the array C is overwritten by the +* lower triangular part of the updated matrix. +* Note that the imaginary parts of the diagonal elements need +* not be set, they are assumed to be zero, and on exit they +* are set to zero. +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* -- Modified 8-Nov-93 to set C(J,J) to DBLE( C(J,J) ) when BETA = 1. +* Ed Anderson, Cray Research Inc. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCONJG, MAX +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, INFO, J, L, NROWA + COMPLEX*16 TEMP1, TEMP2 +* .. +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + IF( LSAME( TRANS, 'N' ) ) THEN + NROWA = N + ELSE + NROWA = K + END IF + UPPER = LSAME( UPLO, 'U' ) +* + INFO = 0 + IF( ( .NOT.UPPER ) .AND. ( .NOT.LSAME( UPLO, 'L' ) ) ) THEN + INFO = 1 + ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ) .AND. + $ ( .NOT.LSAME( TRANS, 'C' ) ) ) THEN + INFO = 2 + ELSE IF( N.LT.0 ) THEN + INFO = 3 + ELSE IF( K.LT.0 ) THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, NROWA ) ) THEN + INFO = 7 + ELSE IF( LDB.LT.MAX( 1, NROWA ) ) THEN + INFO = 9 + ELSE IF( LDC.LT.MAX( 1, N ) ) THEN + INFO = 12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHER2K', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ) .OR. ( ( ( ALPHA.EQ.ZERO ) .OR. ( K.EQ.0 ) ) .AND. + $ ( BETA.EQ.ONE ) ) )RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO ) THEN + IF( UPPER ) THEN + IF( BETA.EQ.DBLE( ZERO ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, J + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = 1, J - 1 + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + C( J, J ) = BETA*DBLE( C( J, J ) ) + 40 CONTINUE + END IF + ELSE + IF( BETA.EQ.DBLE( ZERO ) ) THEN + DO 60 J = 1, N + DO 50 I = J, N + C( I, J ) = ZERO + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80 J = 1, N + C( J, J ) = BETA*DBLE( C( J, J ) ) + DO 70 I = J + 1, N + C( I, J ) = BETA*C( I, J ) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + RETURN + END IF +* +* Start the operations. +* + IF( LSAME( TRANS, 'N' ) ) THEN +* +* Form C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) + +* C. +* + IF( UPPER ) THEN + DO 130 J = 1, N + IF( BETA.EQ.DBLE( ZERO ) ) THEN + DO 90 I = 1, J + C( I, J ) = ZERO + 90 CONTINUE + ELSE IF( BETA.NE.ONE ) THEN + DO 100 I = 1, J - 1 + C( I, J ) = BETA*C( I, J ) + 100 CONTINUE + C( J, J ) = BETA*DBLE( C( J, J ) ) + ELSE + C( J, J ) = DBLE( C( J, J ) ) + END IF + DO 120 L = 1, K + IF( ( A( J, L ).NE.ZERO ) .OR. ( B( J, L ).NE.ZERO ) ) + $ THEN + TEMP1 = ALPHA*DCONJG( B( J, L ) ) + TEMP2 = DCONJG( ALPHA*A( J, L ) ) + DO 110 I = 1, J - 1 + C( I, J ) = C( I, J ) + A( I, L )*TEMP1 + + $ B( I, L )*TEMP2 + 110 CONTINUE + C( J, J ) = DBLE( C( J, J ) ) + + $ DBLE( A( J, L )*TEMP1+B( J, L )*TEMP2 ) + END IF + 120 CONTINUE + 130 CONTINUE + ELSE + DO 180 J = 1, N + IF( BETA.EQ.DBLE( ZERO ) ) THEN + DO 140 I = J, N + C( I, J ) = ZERO + 140 CONTINUE + ELSE IF( BETA.NE.ONE ) THEN + DO 150 I = J + 1, N + C( I, J ) = BETA*C( I, J ) + 150 CONTINUE + C( J, J ) = BETA*DBLE( C( J, J ) ) + ELSE + C( J, J ) = DBLE( C( J, J ) ) + END IF + DO 170 L = 1, K + IF( ( A( J, L ).NE.ZERO ) .OR. ( B( J, L ).NE.ZERO ) ) + $ THEN + TEMP1 = ALPHA*DCONJG( B( J, L ) ) + TEMP2 = DCONJG( ALPHA*A( J, L ) ) + DO 160 I = J + 1, N + C( I, J ) = C( I, J ) + A( I, L )*TEMP1 + + $ B( I, L )*TEMP2 + 160 CONTINUE + C( J, J ) = DBLE( C( J, J ) ) + + $ DBLE( A( J, L )*TEMP1+B( J, L )*TEMP2 ) + END IF + 170 CONTINUE + 180 CONTINUE + END IF + ELSE +* +* Form C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A + +* C. +* + IF( UPPER ) THEN + DO 210 J = 1, N + DO 200 I = 1, J + TEMP1 = ZERO + TEMP2 = ZERO + DO 190 L = 1, K + TEMP1 = TEMP1 + DCONJG( A( L, I ) )*B( L, J ) + TEMP2 = TEMP2 + DCONJG( B( L, I ) )*A( L, J ) + 190 CONTINUE + IF( I.EQ.J ) THEN + IF( BETA.EQ.DBLE( ZERO ) ) THEN + C( J, J ) = DBLE( ALPHA*TEMP1+DCONJG( ALPHA )* + $ TEMP2 ) + ELSE + C( J, J ) = BETA*DBLE( C( J, J ) ) + + $ DBLE( ALPHA*TEMP1+DCONJG( ALPHA )* + $ TEMP2 ) + END IF + ELSE + IF( BETA.EQ.DBLE( ZERO ) ) THEN + C( I, J ) = ALPHA*TEMP1 + DCONJG( ALPHA )*TEMP2 + ELSE + C( I, J ) = BETA*C( I, J ) + ALPHA*TEMP1 + + $ DCONJG( ALPHA )*TEMP2 + END IF + END IF + 200 CONTINUE + 210 CONTINUE + ELSE + DO 240 J = 1, N + DO 230 I = J, N + TEMP1 = ZERO + TEMP2 = ZERO + DO 220 L = 1, K + TEMP1 = TEMP1 + DCONJG( A( L, I ) )*B( L, J ) + TEMP2 = TEMP2 + DCONJG( B( L, I ) )*A( L, J ) + 220 CONTINUE + IF( I.EQ.J ) THEN + IF( BETA.EQ.DBLE( ZERO ) ) THEN + C( J, J ) = DBLE( ALPHA*TEMP1+DCONJG( ALPHA )* + $ TEMP2 ) + ELSE + C( J, J ) = BETA*DBLE( C( J, J ) ) + + $ DBLE( ALPHA*TEMP1+DCONJG( ALPHA )* + $ TEMP2 ) + END IF + ELSE + IF( BETA.EQ.DBLE( ZERO ) ) THEN + C( I, J ) = ALPHA*TEMP1 + DCONJG( ALPHA )*TEMP2 + ELSE + C( I, J ) = BETA*C( I, J ) + ALPHA*TEMP1 + + $ DCONJG( ALPHA )*TEMP2 + END IF + END IF + 230 CONTINUE + 240 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZHER2K. +* + END diff --git a/costa/native/external/blas/zherk.f b/costa/native/external/blas/zherk.f new file mode 100644 index 000000000..cfbf71802 --- /dev/null +++ b/costa/native/external/blas/zherk.f @@ -0,0 +1,330 @@ + SUBROUTINE ZHERK( UPLO, TRANS, N, K, ALPHA, A, LDA, BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER TRANS, UPLO + INTEGER K, LDA, LDC, N + DOUBLE PRECISION ALPHA, BETA +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* ZHERK performs one of the hermitian rank k operations +* +* C := alpha*A*conjg( A' ) + beta*C, +* +* or +* +* C := alpha*conjg( A' )*A + beta*C, +* +* where alpha and beta are real scalars, C is an n by n hermitian +* matrix and A is an n by k matrix in the first case and a k by n +* matrix in the second case. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array C is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of C +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of C +* is to be referenced. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' C := alpha*A*conjg( A' ) + beta*C. +* +* TRANS = 'C' or 'c' C := alpha*conjg( A' )*A + beta*C. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix C. N must be +* at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with TRANS = 'N' or 'n', K specifies the number +* of columns of the matrix A, and on entry with +* TRANS = 'C' or 'c', K specifies the number of rows of the +* matrix A. K must be at least zero. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array A must contain the matrix A, otherwise +* the leading k by n part of the array A must contain the +* matrix A. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDA must be at least max( 1, n ), otherwise LDA must +* be at least max( 1, k ). +* Unchanged on exit. +* +* BETA - DOUBLE PRECISION. +* On entry, BETA specifies the scalar beta. +* Unchanged on exit. +* +* C - COMPLEX*16 array of DIMENSION ( LDC, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array C must contain the upper +* triangular part of the hermitian matrix and the strictly +* lower triangular part of C is not referenced. On exit, the +* upper triangular part of the array C is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array C must contain the lower +* triangular part of the hermitian matrix and the strictly +* upper triangular part of C is not referenced. On exit, the +* lower triangular part of the array C is overwritten by the +* lower triangular part of the updated matrix. +* Note that the imaginary parts of the diagonal elements need +* not be set, they are assumed to be zero, and on exit they +* are set to zero. +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* -- Modified 8-Nov-93 to set C(J,J) to DBLE( C(J,J) ) when BETA = 1. +* Ed Anderson, Cray Research Inc. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCMPLX, DCONJG, MAX +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, INFO, J, L, NROWA + DOUBLE PRECISION RTEMP + COMPLEX*16 TEMP +* .. +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + IF( LSAME( TRANS, 'N' ) ) THEN + NROWA = N + ELSE + NROWA = K + END IF + UPPER = LSAME( UPLO, 'U' ) +* + INFO = 0 + IF( ( .NOT.UPPER ) .AND. ( .NOT.LSAME( UPLO, 'L' ) ) ) THEN + INFO = 1 + ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ) .AND. + $ ( .NOT.LSAME( TRANS, 'C' ) ) ) THEN + INFO = 2 + ELSE IF( N.LT.0 ) THEN + INFO = 3 + ELSE IF( K.LT.0 ) THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, NROWA ) ) THEN + INFO = 7 + ELSE IF( LDC.LT.MAX( 1, N ) ) THEN + INFO = 10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHERK ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ) .OR. ( ( ( ALPHA.EQ.ZERO ) .OR. ( K.EQ.0 ) ) .AND. + $ ( BETA.EQ.ONE ) ) )RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO ) THEN + IF( UPPER ) THEN + IF( BETA.EQ.ZERO ) THEN + DO 20 J = 1, N + DO 10 I = 1, J + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = 1, J - 1 + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + C( J, J ) = BETA*DBLE( C( J, J ) ) + 40 CONTINUE + END IF + ELSE + IF( BETA.EQ.ZERO ) THEN + DO 60 J = 1, N + DO 50 I = J, N + C( I, J ) = ZERO + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80 J = 1, N + C( J, J ) = BETA*DBLE( C( J, J ) ) + DO 70 I = J + 1, N + C( I, J ) = BETA*C( I, J ) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + RETURN + END IF +* +* Start the operations. +* + IF( LSAME( TRANS, 'N' ) ) THEN +* +* Form C := alpha*A*conjg( A' ) + beta*C. +* + IF( UPPER ) THEN + DO 130 J = 1, N + IF( BETA.EQ.ZERO ) THEN + DO 90 I = 1, J + C( I, J ) = ZERO + 90 CONTINUE + ELSE IF( BETA.NE.ONE ) THEN + DO 100 I = 1, J - 1 + C( I, J ) = BETA*C( I, J ) + 100 CONTINUE + C( J, J ) = BETA*DBLE( C( J, J ) ) + ELSE + C( J, J ) = DBLE( C( J, J ) ) + END IF + DO 120 L = 1, K + IF( A( J, L ).NE.DCMPLX( ZERO ) ) THEN + TEMP = ALPHA*DCONJG( A( J, L ) ) + DO 110 I = 1, J - 1 + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 110 CONTINUE + C( J, J ) = DBLE( C( J, J ) ) + + $ DBLE( TEMP*A( I, L ) ) + END IF + 120 CONTINUE + 130 CONTINUE + ELSE + DO 180 J = 1, N + IF( BETA.EQ.ZERO ) THEN + DO 140 I = J, N + C( I, J ) = ZERO + 140 CONTINUE + ELSE IF( BETA.NE.ONE ) THEN + C( J, J ) = BETA*DBLE( C( J, J ) ) + DO 150 I = J + 1, N + C( I, J ) = BETA*C( I, J ) + 150 CONTINUE + ELSE + C( J, J ) = DBLE( C( J, J ) ) + END IF + DO 170 L = 1, K + IF( A( J, L ).NE.DCMPLX( ZERO ) ) THEN + TEMP = ALPHA*DCONJG( A( J, L ) ) + C( J, J ) = DBLE( C( J, J ) ) + + $ DBLE( TEMP*A( J, L ) ) + DO 160 I = J + 1, N + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 160 CONTINUE + END IF + 170 CONTINUE + 180 CONTINUE + END IF + ELSE +* +* Form C := alpha*conjg( A' )*A + beta*C. +* + IF( UPPER ) THEN + DO 220 J = 1, N + DO 200 I = 1, J - 1 + TEMP = ZERO + DO 190 L = 1, K + TEMP = TEMP + DCONJG( A( L, I ) )*A( L, J ) + 190 CONTINUE + IF( BETA.EQ.ZERO ) THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 200 CONTINUE + RTEMP = ZERO + DO 210 L = 1, K + RTEMP = RTEMP + DCONJG( A( L, J ) )*A( L, J ) + 210 CONTINUE + IF( BETA.EQ.ZERO ) THEN + C( J, J ) = ALPHA*RTEMP + ELSE + C( J, J ) = ALPHA*RTEMP + BETA*DBLE( C( J, J ) ) + END IF + 220 CONTINUE + ELSE + DO 260 J = 1, N + RTEMP = ZERO + DO 230 L = 1, K + RTEMP = RTEMP + DCONJG( A( L, J ) )*A( L, J ) + 230 CONTINUE + IF( BETA.EQ.ZERO ) THEN + C( J, J ) = ALPHA*RTEMP + ELSE + C( J, J ) = ALPHA*RTEMP + BETA*DBLE( C( J, J ) ) + END IF + DO 250 I = J + 1, N + TEMP = ZERO + DO 240 L = 1, K + TEMP = TEMP + DCONJG( A( L, I ) )*A( L, J ) + 240 CONTINUE + IF( BETA.EQ.ZERO ) THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 250 CONTINUE + 260 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZHERK . +* + END diff --git a/costa/native/external/blas/zhpmv.f b/costa/native/external/blas/zhpmv.f new file mode 100644 index 000000000..9cde9234c --- /dev/null +++ b/costa/native/external/blas/zhpmv.f @@ -0,0 +1,270 @@ + SUBROUTINE ZHPMV ( UPLO, N, ALPHA, AP, X, INCX, BETA, Y, INCY ) +* .. Scalar Arguments .. + COMPLEX*16 ALPHA, BETA + INTEGER INCX, INCY, N + CHARACTER*1 UPLO +* .. Array Arguments .. + COMPLEX*16 AP( * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* ZHPMV performs the matrix-vector operation +* +* y := alpha*A*x + beta*y, +* +* where alpha and beta are scalars, x and y are n element vectors and +* A is an n by n hermitian matrix, supplied in packed form. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the matrix A is supplied in the packed +* array AP as follows: +* +* UPLO = 'U' or 'u' The upper triangular part of A is +* supplied in AP. +* +* UPLO = 'L' or 'l' The lower triangular part of A is +* supplied in AP. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX*16 . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* AP - COMPLEX*16 array of DIMENSION at least +* ( ( n*( n + 1 ) )/2 ). +* Before entry with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular part of the hermitian matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) +* and a( 2, 2 ) respectively, and so on. +* Before entry with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular part of the hermitian matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) +* and a( 3, 1 ) respectively, and so on. +* Note that the imaginary parts of the diagonal elements need +* not be set and are assumed to be zero. +* Unchanged on exit. +* +* X - COMPLEX*16 array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - COMPLEX*16 . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then Y need not be set on input. +* Unchanged on exit. +* +* Y - COMPLEX*16 array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. On exit, Y is overwritten by the updated +* vector y. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. Local Scalars .. + COMPLEX*16 TEMP1, TEMP2 + INTEGER I, INFO, IX, IY, J, JX, JY, K, KK, KX, KY +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC DCONJG, DBLE +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 6 + ELSE IF( INCY.EQ.0 )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZHPMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set up the start points in X and Y. +* + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF +* +* Start the operations. In this version the elements of the array AP +* are accessed sequentially with one pass through AP. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE )THEN + IF( INCY.EQ.1 )THEN + IF( BETA.EQ.ZERO )THEN + DO 10, I = 1, N + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20, I = 1, N + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO )THEN + DO 30, I = 1, N + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40, I = 1, N + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + KK = 1 + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form y when AP contains the upper triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 60, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + K = KK + DO 50, I = 1, J - 1 + Y( I ) = Y( I ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + DCONJG( AP( K ) )*X( I ) + K = K + 1 + 50 CONTINUE + Y( J ) = Y( J ) + TEMP1*DBLE( AP( KK + J - 1 ) ) + $ + ALPHA*TEMP2 + KK = KK + J + 60 CONTINUE + ELSE + JX = KX + JY = KY + DO 80, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + IX = KX + IY = KY + DO 70, K = KK, KK + J - 2 + Y( IY ) = Y( IY ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + DCONJG( AP( K ) )*X( IX ) + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + Y( JY ) = Y( JY ) + TEMP1*DBLE( AP( KK + J - 1 ) ) + $ + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + KK = KK + J + 80 CONTINUE + END IF + ELSE +* +* Form y when AP contains the lower triangle. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 100, J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + Y( J ) = Y( J ) + TEMP1*DBLE( AP( KK ) ) + K = KK + 1 + DO 90, I = J + 1, N + Y( I ) = Y( I ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + DCONJG( AP( K ) )*X( I ) + K = K + 1 + 90 CONTINUE + Y( J ) = Y( J ) + ALPHA*TEMP2 + KK = KK + ( N - J + 1 ) + 100 CONTINUE + ELSE + JX = KX + JY = KY + DO 120, J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + Y( JY ) = Y( JY ) + TEMP1*DBLE( AP( KK ) ) + IX = JX + IY = JY + DO 110, K = KK + 1, KK + N - J + IX = IX + INCX + IY = IY + INCY + Y( IY ) = Y( IY ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + DCONJG( AP( K ) )*X( IX ) + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + KK = KK + ( N - J + 1 ) + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZHPMV . +* + END diff --git a/costa/native/external/blas/zhpr.f b/costa/native/external/blas/zhpr.f new file mode 100644 index 000000000..2e368de49 --- /dev/null +++ b/costa/native/external/blas/zhpr.f @@ -0,0 +1,217 @@ + SUBROUTINE ZHPR ( UPLO, N, ALPHA, X, INCX, AP ) +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA + INTEGER INCX, N + CHARACTER*1 UPLO +* .. Array Arguments .. + COMPLEX*16 AP( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* ZHPR performs the hermitian rank 1 operation +* +* A := alpha*x*conjg( x' ) + A, +* +* where alpha is a real scalar, x is an n element vector and A is an +* n by n hermitian matrix, supplied in packed form. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the matrix A is supplied in the packed +* array AP as follows: +* +* UPLO = 'U' or 'u' The upper triangular part of A is +* supplied in AP. +* +* UPLO = 'L' or 'l' The lower triangular part of A is +* supplied in AP. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - DOUBLE PRECISION. +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - COMPLEX*16 array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* AP - COMPLEX*16 array of DIMENSION at least +* ( ( n*( n + 1 ) )/2 ). +* Before entry with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular part of the hermitian matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) +* and a( 2, 2 ) respectively, and so on. On exit, the array +* AP is overwritten by the upper triangular part of the +* updated matrix. +* Before entry with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular part of the hermitian matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) +* and a( 3, 1 ) respectively, and so on. On exit, the array +* AP is overwritten by the lower triangular part of the +* updated matrix. +* Note that the imaginary parts of the diagonal elements need +* not be set, they are assumed to be zero, and on exit they +* are set to zero. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. Local Scalars .. + COMPLEX*16 TEMP + INTEGER I, INFO, IX, J, JX, K, KK, KX +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC DCONJG, DBLE +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZHPR ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ALPHA.EQ.DBLE( ZERO ) ) ) + $ RETURN +* +* Set the start point in X if the increment is not unity. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of the array AP +* are accessed sequentially with one pass through AP. +* + KK = 1 + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form A when upper triangle is stored in AP. +* + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = ALPHA*DCONJG( X( J ) ) + K = KK + DO 10, I = 1, J - 1 + AP( K ) = AP( K ) + X( I )*TEMP + K = K + 1 + 10 CONTINUE + AP( KK + J - 1 ) = DBLE( AP( KK + J - 1 ) ) + $ + DBLE( X( J )*TEMP ) + ELSE + AP( KK + J - 1 ) = DBLE( AP( KK + J - 1 ) ) + END IF + KK = KK + J + 20 CONTINUE + ELSE + JX = KX + DO 40, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*DCONJG( X( JX ) ) + IX = KX + DO 30, K = KK, KK + J - 2 + AP( K ) = AP( K ) + X( IX )*TEMP + IX = IX + INCX + 30 CONTINUE + AP( KK + J - 1 ) = DBLE( AP( KK + J - 1 ) ) + $ + DBLE( X( JX )*TEMP ) + ELSE + AP( KK + J - 1 ) = DBLE( AP( KK + J - 1 ) ) + END IF + JX = JX + INCX + KK = KK + J + 40 CONTINUE + END IF + ELSE +* +* Form A when lower triangle is stored in AP. +* + IF( INCX.EQ.1 )THEN + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = ALPHA*DCONJG( X( J ) ) + AP( KK ) = DBLE( AP( KK ) ) + DBLE( TEMP*X( J ) ) + K = KK + 1 + DO 50, I = J + 1, N + AP( K ) = AP( K ) + X( I )*TEMP + K = K + 1 + 50 CONTINUE + ELSE + AP( KK ) = DBLE( AP( KK ) ) + END IF + KK = KK + N - J + 1 + 60 CONTINUE + ELSE + JX = KX + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = ALPHA*DCONJG( X( JX ) ) + AP( KK ) = DBLE( AP( KK ) ) + DBLE( TEMP*X( JX ) ) + IX = JX + DO 70, K = KK + 1, KK + N - J + IX = IX + INCX + AP( K ) = AP( K ) + X( IX )*TEMP + 70 CONTINUE + ELSE + AP( KK ) = DBLE( AP( KK ) ) + END IF + JX = JX + INCX + KK = KK + N - J + 1 + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZHPR . +* + END diff --git a/costa/native/external/blas/zhpr2.f b/costa/native/external/blas/zhpr2.f new file mode 100644 index 000000000..e10774b14 --- /dev/null +++ b/costa/native/external/blas/zhpr2.f @@ -0,0 +1,251 @@ + SUBROUTINE ZHPR2 ( UPLO, N, ALPHA, X, INCX, Y, INCY, AP ) +* .. Scalar Arguments .. + COMPLEX*16 ALPHA + INTEGER INCX, INCY, N + CHARACTER*1 UPLO +* .. Array Arguments .. + COMPLEX*16 AP( * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* ZHPR2 performs the hermitian rank 2 operation +* +* A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A, +* +* where alpha is a scalar, x and y are n element vectors and A is an +* n by n hermitian matrix, supplied in packed form. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the matrix A is supplied in the packed +* array AP as follows: +* +* UPLO = 'U' or 'u' The upper triangular part of A is +* supplied in AP. +* +* UPLO = 'L' or 'l' The lower triangular part of A is +* supplied in AP. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX*16 . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - COMPLEX*16 array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* Y - COMPLEX*16 array of dimension at least +* ( 1 + ( n - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. +* Unchanged on exit. +* +* INCY - INTEGER. +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* AP - COMPLEX*16 array of DIMENSION at least +* ( ( n*( n + 1 ) )/2 ). +* Before entry with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular part of the hermitian matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) +* and a( 2, 2 ) respectively, and so on. On exit, the array +* AP is overwritten by the upper triangular part of the +* updated matrix. +* Before entry with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular part of the hermitian matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) +* and a( 3, 1 ) respectively, and so on. On exit, the array +* AP is overwritten by the lower triangular part of the +* updated matrix. +* Note that the imaginary parts of the diagonal elements need +* not be set, they are assumed to be zero, and on exit they +* are set to zero. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. Local Scalars .. + COMPLEX*16 TEMP1, TEMP2 + INTEGER I, INFO, IX, IY, J, JX, JY, K, KK, KX, KY +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC DCONJG, DBLE +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO, 'U' ).AND. + $ .NOT.LSAME( UPLO, 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( INCX.EQ.0 )THEN + INFO = 5 + ELSE IF( INCY.EQ.0 )THEN + INFO = 7 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZHPR2 ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Set up the start points in X and Y if the increments are not both +* unity. +* + IF( ( INCX.NE.1 ).OR.( INCY.NE.1 ) )THEN + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF + JX = KX + JY = KY + END IF +* +* Start the operations. In this version the elements of the array AP +* are accessed sequentially with one pass through AP. +* + KK = 1 + IF( LSAME( UPLO, 'U' ) )THEN +* +* Form A when upper triangle is stored in AP. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 20, J = 1, N + IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN + TEMP1 = ALPHA*DCONJG( Y( J ) ) + TEMP2 = DCONJG( ALPHA*X( J ) ) + K = KK + DO 10, I = 1, J - 1 + AP( K ) = AP( K ) + X( I )*TEMP1 + Y( I )*TEMP2 + K = K + 1 + 10 CONTINUE + AP( KK + J - 1 ) = DBLE( AP( KK + J - 1 ) ) + + $ DBLE( X( J )*TEMP1 + Y( J )*TEMP2 ) + ELSE + AP( KK + J - 1 ) = DBLE( AP( KK + J - 1 ) ) + END IF + KK = KK + J + 20 CONTINUE + ELSE + DO 40, J = 1, N + IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN + TEMP1 = ALPHA*DCONJG( Y( JY ) ) + TEMP2 = DCONJG( ALPHA*X( JX ) ) + IX = KX + IY = KY + DO 30, K = KK, KK + J - 2 + AP( K ) = AP( K ) + X( IX )*TEMP1 + Y( IY )*TEMP2 + IX = IX + INCX + IY = IY + INCY + 30 CONTINUE + AP( KK + J - 1 ) = DBLE( AP( KK + J - 1 ) ) + + $ DBLE( X( JX )*TEMP1 + + $ Y( JY )*TEMP2 ) + ELSE + AP( KK + J - 1 ) = DBLE( AP( KK + J - 1 ) ) + END IF + JX = JX + INCX + JY = JY + INCY + KK = KK + J + 40 CONTINUE + END IF + ELSE +* +* Form A when lower triangle is stored in AP. +* + IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN + DO 60, J = 1, N + IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN + TEMP1 = ALPHA*DCONJG( Y( J ) ) + TEMP2 = DCONJG( ALPHA*X( J ) ) + AP( KK ) = DBLE( AP( KK ) ) + + $ DBLE( X( J )*TEMP1 + Y( J )*TEMP2 ) + K = KK + 1 + DO 50, I = J + 1, N + AP( K ) = AP( K ) + X( I )*TEMP1 + Y( I )*TEMP2 + K = K + 1 + 50 CONTINUE + ELSE + AP( KK ) = DBLE( AP( KK ) ) + END IF + KK = KK + N - J + 1 + 60 CONTINUE + ELSE + DO 80, J = 1, N + IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN + TEMP1 = ALPHA*DCONJG( Y( JY ) ) + TEMP2 = DCONJG( ALPHA*X( JX ) ) + AP( KK ) = DBLE( AP( KK ) ) + + $ DBLE( X( JX )*TEMP1 + Y( JY )*TEMP2 ) + IX = JX + IY = JY + DO 70, K = KK + 1, KK + N - J + IX = IX + INCX + IY = IY + INCY + AP( K ) = AP( K ) + X( IX )*TEMP1 + Y( IY )*TEMP2 + 70 CONTINUE + ELSE + AP( KK ) = DBLE( AP( KK ) ) + END IF + JX = JX + INCX + JY = JY + INCY + KK = KK + N - J + 1 + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZHPR2 . +* + END diff --git a/costa/native/external/blas/zrotg.f b/costa/native/external/blas/zrotg.f new file mode 100644 index 000000000..f6a4aa127 --- /dev/null +++ b/costa/native/external/blas/zrotg.f @@ -0,0 +1,21 @@ + subroutine zrotg(ca,cb,c,s) + double complex ca,cb,s + double precision c + double precision norm,scale + double complex alpha + if (cdabs(ca) .ne. 0.0d0) go to 10 + c = 0.0d0 + s = (1.0d0,0.0d0) + ca = cb + go to 20 + 10 continue + scale = cdabs(ca) + cdabs(cb) + norm = scale*dsqrt((cdabs(ca/dcmplx(scale,0.0d0)))**2 + + * (cdabs(cb/dcmplx(scale,0.0d0)))**2) + alpha = ca /cdabs(ca) + c = cdabs(ca) / norm + s = alpha * dconjg(cb) / norm + ca = alpha * norm + 20 continue + return + end diff --git a/costa/native/external/blas/zscal.f b/costa/native/external/blas/zscal.f new file mode 100644 index 000000000..6fa857639 --- /dev/null +++ b/costa/native/external/blas/zscal.f @@ -0,0 +1,29 @@ + subroutine zscal(n,za,zx,incx) +c +c scales a vector by a constant. +c jack dongarra, 3/11/78. +c modified 3/93 to return if incx .le. 0. +c modified 12/3/93, array(1) declarations changed to array(*) +c + double complex za,zx(*) + integer i,incx,ix,n +c + if( n.le.0 .or. incx.le.0 )return + if(incx.eq.1)go to 20 +c +c code for increment not equal to 1 +c + ix = 1 + do 10 i = 1,n + zx(ix) = za*zx(ix) + ix = ix + incx + 10 continue + return +c +c code for increment equal to 1 +c + 20 do 30 i = 1,n + zx(i) = za*zx(i) + 30 continue + return + end diff --git a/costa/native/external/blas/zswap.f b/costa/native/external/blas/zswap.f new file mode 100644 index 000000000..f28a4e415 --- /dev/null +++ b/costa/native/external/blas/zswap.f @@ -0,0 +1,36 @@ + subroutine zswap (n,zx,incx,zy,incy) +c +c interchanges two vectors. +c jack dongarra, 3/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + double complex zx(*),zy(*),ztemp + integer i,incx,incy,ix,iy,n +c + if(n.le.0)return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments not equal +c to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + ztemp = zx(ix) + zx(ix) = zy(iy) + zy(iy) = ztemp + ix = ix + incx + iy = iy + incy + 10 continue + return +c +c code for both increments equal to 1 + 20 do 30 i = 1,n + ztemp = zx(i) + zx(i) = zy(i) + zy(i) = ztemp + 30 continue + return + end diff --git a/costa/native/external/blas/zsymm.f b/costa/native/external/blas/zsymm.f new file mode 100644 index 000000000..20b7c08d8 --- /dev/null +++ b/costa/native/external/blas/zsymm.f @@ -0,0 +1,296 @@ + SUBROUTINE ZSYMM ( SIDE, UPLO, M, N, ALPHA, A, LDA, B, LDB, + $ BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER*1 SIDE, UPLO + INTEGER M, N, LDA, LDB, LDC + COMPLEX*16 ALPHA, BETA +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* ZSYMM performs one of the matrix-matrix operations +* +* C := alpha*A*B + beta*C, +* +* or +* +* C := alpha*B*A + beta*C, +* +* where alpha and beta are scalars, A is a symmetric matrix and B and +* C are m by n matrices. +* +* Parameters +* ========== +* +* SIDE - CHARACTER*1. +* On entry, SIDE specifies whether the symmetric matrix A +* appears on the left or right in the operation as follows: +* +* SIDE = 'L' or 'l' C := alpha*A*B + beta*C, +* +* SIDE = 'R' or 'r' C := alpha*B*A + beta*C, +* +* Unchanged on exit. +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the symmetric matrix A is to be +* referenced as follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of the +* symmetric matrix is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of the +* symmetric matrix is to be referenced. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of the matrix C. +* M must be at least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of the matrix C. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX*16 . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is +* m when SIDE = 'L' or 'l' and is n otherwise. +* Before entry with SIDE = 'L' or 'l', the m by m part of +* the array A must contain the symmetric matrix, such that +* when UPLO = 'U' or 'u', the leading m by m upper triangular +* part of the array A must contain the upper triangular part +* of the symmetric matrix and the strictly lower triangular +* part of A is not referenced, and when UPLO = 'L' or 'l', +* the leading m by m lower triangular part of the array A +* must contain the lower triangular part of the symmetric +* matrix and the strictly upper triangular part of A is not +* referenced. +* Before entry with SIDE = 'R' or 'r', the n by n part of +* the array A must contain the symmetric matrix, such that +* when UPLO = 'U' or 'u', the leading n by n upper triangular +* part of the array A must contain the upper triangular part +* of the symmetric matrix and the strictly lower triangular +* part of A is not referenced, and when UPLO = 'L' or 'l', +* the leading n by n lower triangular part of the array A +* must contain the lower triangular part of the symmetric +* matrix and the strictly upper triangular part of A is not +* referenced. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When SIDE = 'L' or 'l' then +* LDA must be at least max( 1, m ), otherwise LDA must be at +* least max( 1, n ). +* Unchanged on exit. +* +* B - COMPLEX*16 array of DIMENSION ( LDB, n ). +* Before entry, the leading m by n part of the array B must +* contain the matrix B. +* Unchanged on exit. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. LDB must be at least +* max( 1, m ). +* Unchanged on exit. +* +* BETA - COMPLEX*16 . +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then C need not be set on input. +* Unchanged on exit. +* +* C - COMPLEX*16 array of DIMENSION ( LDC, n ). +* Before entry, the leading m by n part of the array C must +* contain the matrix C, except when beta is zero, in which +* case C need not be set on entry. +* On exit, the array C is overwritten by the m by n updated +* matrix. +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, INFO, J, K, NROWA + COMPLEX*16 TEMP1, TEMP2 +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Executable Statements .. +* +* Set NROWA as the number of rows of A. +* + IF( LSAME( SIDE, 'L' ) )THEN + NROWA = M + ELSE + NROWA = N + END IF + UPPER = LSAME( UPLO, 'U' ) +* +* Test the input parameters. +* + INFO = 0 + IF( ( .NOT.LSAME( SIDE, 'L' ) ).AND. + $ ( .NOT.LSAME( SIDE, 'R' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO, 'L' ) ) )THEN + INFO = 2 + ELSE IF( M .LT.0 )THEN + INFO = 3 + ELSE IF( N .LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 7 + ELSE IF( LDB.LT.MAX( 1, M ) )THEN + INFO = 9 + ELSE IF( LDC.LT.MAX( 1, M ) )THEN + INFO = 12 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZSYMM ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + IF( BETA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, M + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40, J = 1, N + DO 30, I = 1, M + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + RETURN + END IF +* +* Start the operations. +* + IF( LSAME( SIDE, 'L' ) )THEN +* +* Form C := alpha*A*B + beta*C. +* + IF( UPPER )THEN + DO 70, J = 1, N + DO 60, I = 1, M + TEMP1 = ALPHA*B( I, J ) + TEMP2 = ZERO + DO 50, K = 1, I - 1 + C( K, J ) = C( K, J ) + TEMP1 *A( K, I ) + TEMP2 = TEMP2 + B( K, J )*A( K, I ) + 50 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = TEMP1*A( I, I ) + ALPHA*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ TEMP1*A( I, I ) + ALPHA*TEMP2 + END IF + 60 CONTINUE + 70 CONTINUE + ELSE + DO 100, J = 1, N + DO 90, I = M, 1, -1 + TEMP1 = ALPHA*B( I, J ) + TEMP2 = ZERO + DO 80, K = I + 1, M + C( K, J ) = C( K, J ) + TEMP1 *A( K, I ) + TEMP2 = TEMP2 + B( K, J )*A( K, I ) + 80 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = TEMP1*A( I, I ) + ALPHA*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ TEMP1*A( I, I ) + ALPHA*TEMP2 + END IF + 90 CONTINUE + 100 CONTINUE + END IF + ELSE +* +* Form C := alpha*B*A + beta*C. +* + DO 170, J = 1, N + TEMP1 = ALPHA*A( J, J ) + IF( BETA.EQ.ZERO )THEN + DO 110, I = 1, M + C( I, J ) = TEMP1*B( I, J ) + 110 CONTINUE + ELSE + DO 120, I = 1, M + C( I, J ) = BETA*C( I, J ) + TEMP1*B( I, J ) + 120 CONTINUE + END IF + DO 140, K = 1, J - 1 + IF( UPPER )THEN + TEMP1 = ALPHA*A( K, J ) + ELSE + TEMP1 = ALPHA*A( J, K ) + END IF + DO 130, I = 1, M + C( I, J ) = C( I, J ) + TEMP1*B( I, K ) + 130 CONTINUE + 140 CONTINUE + DO 160, K = J + 1, N + IF( UPPER )THEN + TEMP1 = ALPHA*A( J, K ) + ELSE + TEMP1 = ALPHA*A( K, J ) + END IF + DO 150, I = 1, M + C( I, J ) = C( I, J ) + TEMP1*B( I, K ) + 150 CONTINUE + 160 CONTINUE + 170 CONTINUE + END IF +* + RETURN +* +* End of ZSYMM . +* + END diff --git a/costa/native/external/blas/zsyr2k.f b/costa/native/external/blas/zsyr2k.f new file mode 100644 index 000000000..aba2071ac --- /dev/null +++ b/costa/native/external/blas/zsyr2k.f @@ -0,0 +1,324 @@ + SUBROUTINE ZSYR2K( UPLO, TRANS, N, K, ALPHA, A, LDA, B, LDB, + $ BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER*1 UPLO, TRANS + INTEGER N, K, LDA, LDB, LDC + COMPLEX*16 ALPHA, BETA +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* ZSYR2K performs one of the symmetric rank 2k operations +* +* C := alpha*A*B' + alpha*B*A' + beta*C, +* +* or +* +* C := alpha*A'*B + alpha*B'*A + beta*C, +* +* where alpha and beta are scalars, C is an n by n symmetric matrix +* and A and B are n by k matrices in the first case and k by n +* matrices in the second case. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array C is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of C +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of C +* is to be referenced. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' C := alpha*A*B' + alpha*B*A' + +* beta*C. +* +* TRANS = 'T' or 't' C := alpha*A'*B + alpha*B'*A + +* beta*C. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix C. N must be +* at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with TRANS = 'N' or 'n', K specifies the number +* of columns of the matrices A and B, and on entry with +* TRANS = 'T' or 't', K specifies the number of rows of the +* matrices A and B. K must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX*16 . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array A must contain the matrix A, otherwise +* the leading k by n part of the array A must contain the +* matrix A. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDA must be at least max( 1, n ), otherwise LDA must +* be at least max( 1, k ). +* Unchanged on exit. +* +* B - COMPLEX*16 array of DIMENSION ( LDB, kb ), where kb is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array B must contain the matrix B, otherwise +* the leading k by n part of the array B must contain the +* matrix B. +* Unchanged on exit. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDB must be at least max( 1, n ), otherwise LDB must +* be at least max( 1, k ). +* Unchanged on exit. +* +* BETA - COMPLEX*16 . +* On entry, BETA specifies the scalar beta. +* Unchanged on exit. +* +* C - COMPLEX*16 array of DIMENSION ( LDC, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array C must contain the upper +* triangular part of the symmetric matrix and the strictly +* lower triangular part of C is not referenced. On exit, the +* upper triangular part of the array C is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array C must contain the lower +* triangular part of the symmetric matrix and the strictly +* upper triangular part of C is not referenced. On exit, the +* lower triangular part of the array C is overwritten by the +* lower triangular part of the updated matrix. +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, INFO, J, L, NROWA + COMPLEX*16 TEMP1, TEMP2 +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + IF( LSAME( TRANS, 'N' ) )THEN + NROWA = N + ELSE + NROWA = K + END IF + UPPER = LSAME( UPLO, 'U' ) +* + INFO = 0 + IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO , 'L' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND. + $ ( .NOT.LSAME( TRANS, 'T' ) ) )THEN + INFO = 2 + ELSE IF( N .LT.0 )THEN + INFO = 3 + ELSE IF( K .LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 7 + ELSE IF( LDB.LT.MAX( 1, NROWA ) )THEN + INFO = 9 + ELSE IF( LDC.LT.MAX( 1, N ) )THEN + INFO = 12 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZSYR2K', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR. + $ ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + IF( UPPER )THEN + IF( BETA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, J + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40, J = 1, N + DO 30, I = 1, J + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE + IF( BETA.EQ.ZERO )THEN + DO 60, J = 1, N + DO 50, I = J, N + C( I, J ) = ZERO + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80, J = 1, N + DO 70, I = J, N + C( I, J ) = BETA*C( I, J ) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + RETURN + END IF +* +* Start the operations. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form C := alpha*A*B' + alpha*B*A' + C. +* + IF( UPPER )THEN + DO 130, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 90, I = 1, J + C( I, J ) = ZERO + 90 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 100, I = 1, J + C( I, J ) = BETA*C( I, J ) + 100 CONTINUE + END IF + DO 120, L = 1, K + IF( ( A( J, L ).NE.ZERO ).OR. + $ ( B( J, L ).NE.ZERO ) )THEN + TEMP1 = ALPHA*B( J, L ) + TEMP2 = ALPHA*A( J, L ) + DO 110, I = 1, J + C( I, J ) = C( I, J ) + A( I, L )*TEMP1 + + $ B( I, L )*TEMP2 + 110 CONTINUE + END IF + 120 CONTINUE + 130 CONTINUE + ELSE + DO 180, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 140, I = J, N + C( I, J ) = ZERO + 140 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 150, I = J, N + C( I, J ) = BETA*C( I, J ) + 150 CONTINUE + END IF + DO 170, L = 1, K + IF( ( A( J, L ).NE.ZERO ).OR. + $ ( B( J, L ).NE.ZERO ) )THEN + TEMP1 = ALPHA*B( J, L ) + TEMP2 = ALPHA*A( J, L ) + DO 160, I = J, N + C( I, J ) = C( I, J ) + A( I, L )*TEMP1 + + $ B( I, L )*TEMP2 + 160 CONTINUE + END IF + 170 CONTINUE + 180 CONTINUE + END IF + ELSE +* +* Form C := alpha*A'*B + alpha*B'*A + C. +* + IF( UPPER )THEN + DO 210, J = 1, N + DO 200, I = 1, J + TEMP1 = ZERO + TEMP2 = ZERO + DO 190, L = 1, K + TEMP1 = TEMP1 + A( L, I )*B( L, J ) + TEMP2 = TEMP2 + B( L, I )*A( L, J ) + 190 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP1 + ALPHA*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ ALPHA*TEMP1 + ALPHA*TEMP2 + END IF + 200 CONTINUE + 210 CONTINUE + ELSE + DO 240, J = 1, N + DO 230, I = J, N + TEMP1 = ZERO + TEMP2 = ZERO + DO 220, L = 1, K + TEMP1 = TEMP1 + A( L, I )*B( L, J ) + TEMP2 = TEMP2 + B( L, I )*A( L, J ) + 220 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP1 + ALPHA*TEMP2 + ELSE + C( I, J ) = BETA *C( I, J ) + + $ ALPHA*TEMP1 + ALPHA*TEMP2 + END IF + 230 CONTINUE + 240 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZSYR2K. +* + END diff --git a/costa/native/external/blas/zsyrk.f b/costa/native/external/blas/zsyrk.f new file mode 100644 index 000000000..77e2c20aa --- /dev/null +++ b/costa/native/external/blas/zsyrk.f @@ -0,0 +1,293 @@ + SUBROUTINE ZSYRK ( UPLO, TRANS, N, K, ALPHA, A, LDA, + $ BETA, C, LDC ) +* .. Scalar Arguments .. + CHARACTER*1 UPLO, TRANS + INTEGER N, K, LDA, LDC + COMPLEX*16 ALPHA, BETA +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* ZSYRK performs one of the symmetric rank k operations +* +* C := alpha*A*A' + beta*C, +* +* or +* +* C := alpha*A'*A + beta*C, +* +* where alpha and beta are scalars, C is an n by n symmetric matrix +* and A is an n by k matrix in the first case and a k by n matrix +* in the second case. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array C is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of C +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of C +* is to be referenced. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' C := alpha*A*A' + beta*C. +* +* TRANS = 'T' or 't' C := alpha*A'*A + beta*C. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix C. N must be +* at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with TRANS = 'N' or 'n', K specifies the number +* of columns of the matrix A, and on entry with +* TRANS = 'T' or 't', K specifies the number of rows of the +* matrix A. K must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX*16 . +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is +* k when TRANS = 'N' or 'n', and is n otherwise. +* Before entry with TRANS = 'N' or 'n', the leading n by k +* part of the array A must contain the matrix A, otherwise +* the leading k by n part of the array A must contain the +* matrix A. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When TRANS = 'N' or 'n' +* then LDA must be at least max( 1, n ), otherwise LDA must +* be at least max( 1, k ). +* Unchanged on exit. +* +* BETA - COMPLEX*16 . +* On entry, BETA specifies the scalar beta. +* Unchanged on exit. +* +* C - COMPLEX*16 array of DIMENSION ( LDC, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array C must contain the upper +* triangular part of the symmetric matrix and the strictly +* lower triangular part of C is not referenced. On exit, the +* upper triangular part of the array C is overwritten by the +* upper triangular part of the updated matrix. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array C must contain the lower +* triangular part of the symmetric matrix and the strictly +* upper triangular part of C is not referenced. On exit, the +* lower triangular part of the array C is overwritten by the +* lower triangular part of the updated matrix. +* +* LDC - INTEGER. +* On entry, LDC specifies the first dimension of C as declared +* in the calling (sub) program. LDC must be at least +* max( 1, n ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, INFO, J, L, NROWA + COMPLEX*16 TEMP +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + IF( LSAME( TRANS, 'N' ) )THEN + NROWA = N + ELSE + NROWA = K + END IF + UPPER = LSAME( UPLO, 'U' ) +* + INFO = 0 + IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO , 'L' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND. + $ ( .NOT.LSAME( TRANS, 'T' ) ) )THEN + INFO = 2 + ELSE IF( N .LT.0 )THEN + INFO = 3 + ELSE IF( K .LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 7 + ELSE IF( LDC.LT.MAX( 1, N ) )THEN + INFO = 10 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZSYRK ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR. + $ ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + IF( UPPER )THEN + IF( BETA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, J + C( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40, J = 1, N + DO 30, I = 1, J + C( I, J ) = BETA*C( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE + IF( BETA.EQ.ZERO )THEN + DO 60, J = 1, N + DO 50, I = J, N + C( I, J ) = ZERO + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80, J = 1, N + DO 70, I = J, N + C( I, J ) = BETA*C( I, J ) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + RETURN + END IF +* +* Start the operations. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form C := alpha*A*A' + beta*C. +* + IF( UPPER )THEN + DO 130, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 90, I = 1, J + C( I, J ) = ZERO + 90 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 100, I = 1, J + C( I, J ) = BETA*C( I, J ) + 100 CONTINUE + END IF + DO 120, L = 1, K + IF( A( J, L ).NE.ZERO )THEN + TEMP = ALPHA*A( J, L ) + DO 110, I = 1, J + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 110 CONTINUE + END IF + 120 CONTINUE + 130 CONTINUE + ELSE + DO 180, J = 1, N + IF( BETA.EQ.ZERO )THEN + DO 140, I = J, N + C( I, J ) = ZERO + 140 CONTINUE + ELSE IF( BETA.NE.ONE )THEN + DO 150, I = J, N + C( I, J ) = BETA*C( I, J ) + 150 CONTINUE + END IF + DO 170, L = 1, K + IF( A( J, L ).NE.ZERO )THEN + TEMP = ALPHA*A( J, L ) + DO 160, I = J, N + C( I, J ) = C( I, J ) + TEMP*A( I, L ) + 160 CONTINUE + END IF + 170 CONTINUE + 180 CONTINUE + END IF + ELSE +* +* Form C := alpha*A'*A + beta*C. +* + IF( UPPER )THEN + DO 210, J = 1, N + DO 200, I = 1, J + TEMP = ZERO + DO 190, L = 1, K + TEMP = TEMP + A( L, I )*A( L, J ) + 190 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 200 CONTINUE + 210 CONTINUE + ELSE + DO 240, J = 1, N + DO 230, I = J, N + TEMP = ZERO + DO 220, L = 1, K + TEMP = TEMP + A( L, I )*A( L, J ) + 220 CONTINUE + IF( BETA.EQ.ZERO )THEN + C( I, J ) = ALPHA*TEMP + ELSE + C( I, J ) = ALPHA*TEMP + BETA*C( I, J ) + END IF + 230 CONTINUE + 240 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZSYRK . +* + END diff --git a/costa/native/external/blas/ztbmv.f b/costa/native/external/blas/ztbmv.f new file mode 100644 index 000000000..17944082c --- /dev/null +++ b/costa/native/external/blas/ztbmv.f @@ -0,0 +1,377 @@ + SUBROUTINE ZTBMV ( UPLO, TRANS, DIAG, N, K, A, LDA, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, K, LDA, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* ZTBMV performs one of the matrix-vector operations +* +* x := A*x, or x := A'*x, or x := conjg( A' )*x, +* +* where x is an n element vector and A is an n by n unit, or non-unit, +* upper or lower triangular band matrix, with ( k + 1 ) diagonals. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' x := A*x. +* +* TRANS = 'T' or 't' x := A'*x. +* +* TRANS = 'C' or 'c' x := conjg( A' )*x. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with UPLO = 'U' or 'u', K specifies the number of +* super-diagonals of the matrix A. +* On entry with UPLO = 'L' or 'l', K specifies the number of +* sub-diagonals of the matrix A. +* K must satisfy 0 .le. K. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) +* by n part of the array A must contain the upper triangular +* band part of the matrix of coefficients, supplied column by +* column, with the leading diagonal of the matrix in row +* ( k + 1 ) of the array, the first super-diagonal starting at +* position 2 in row k, and so on. The top left k by k triangle +* of the array A is not referenced. +* The following program segment will transfer an upper +* triangular band matrix from conventional full matrix storage +* to band storage: +* +* DO 20, J = 1, N +* M = K + 1 - J +* DO 10, I = MAX( 1, J - K ), J +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) +* by n part of the array A must contain the lower triangular +* band part of the matrix of coefficients, supplied column by +* column, with the leading diagonal of the matrix in row 1 of +* the array, the first sub-diagonal starting at position 1 in +* row 2, and so on. The bottom right k by k triangle of the +* array A is not referenced. +* The following program segment will transfer a lower +* triangular band matrix from conventional full matrix storage +* to band storage: +* +* DO 20, J = 1, N +* M = 1 - J +* DO 10, I = J, MIN( N, J + K ) +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Note that when DIAG = 'U' or 'u' the elements of the array A +* corresponding to the diagonal elements of the matrix are not +* referenced, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* ( k + 1 ). +* Unchanged on exit. +* +* X - COMPLEX*16 array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. On exit, X is overwritten with the +* tranformed vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. Local Scalars .. + COMPLEX*16 TEMP + INTEGER I, INFO, IX, J, JX, KPLUS1, KX, L + LOGICAL NOCONJ, NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( K.LT.0 )THEN + INFO = 5 + ELSE IF( LDA.LT.( K + 1 ) )THEN + INFO = 7 + ELSE IF( INCX.EQ.0 )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZTBMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOCONJ = LSAME( TRANS, 'T' ) + NOUNIT = LSAME( DIAG , 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x := A*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KPLUS1 = K + 1 + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + L = KPLUS1 - J + DO 10, I = MAX( 1, J - K ), J - 1 + X( I ) = X( I ) + TEMP*A( L + I, J ) + 10 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*A( KPLUS1, J ) + END IF + 20 CONTINUE + ELSE + JX = KX + DO 40, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + L = KPLUS1 - J + DO 30, I = MAX( 1, J - K ), J - 1 + X( IX ) = X( IX ) + TEMP*A( L + I, J ) + IX = IX + INCX + 30 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*A( KPLUS1, J ) + END IF + JX = JX + INCX + IF( J.GT.K ) + $ KX = KX + INCX + 40 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 60, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + L = 1 - J + DO 50, I = MIN( N, J + K ), J + 1, -1 + X( I ) = X( I ) + TEMP*A( L + I, J ) + 50 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*A( 1, J ) + END IF + 60 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 80, J = N, 1, -1 + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + L = 1 - J + DO 70, I = MIN( N, J + K ), J + 1, -1 + X( IX ) = X( IX ) + TEMP*A( L + I, J ) + IX = IX - INCX + 70 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*A( 1, J ) + END IF + JX = JX - INCX + IF( ( N - J ).GE.K ) + $ KX = KX - INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := A'*x or x := conjg( A' )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KPLUS1 = K + 1 + IF( INCX.EQ.1 )THEN + DO 110, J = N, 1, -1 + TEMP = X( J ) + L = KPLUS1 - J + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*A( KPLUS1, J ) + DO 90, I = J - 1, MAX( 1, J - K ), -1 + TEMP = TEMP + A( L + I, J )*X( I ) + 90 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*DCONJG( A( KPLUS1, J ) ) + DO 100, I = J - 1, MAX( 1, J - K ), -1 + TEMP = TEMP + DCONJG( A( L + I, J ) )*X( I ) + 100 CONTINUE + END IF + X( J ) = TEMP + 110 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 140, J = N, 1, -1 + TEMP = X( JX ) + KX = KX - INCX + IX = KX + L = KPLUS1 - J + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*A( KPLUS1, J ) + DO 120, I = J - 1, MAX( 1, J - K ), -1 + TEMP = TEMP + A( L + I, J )*X( IX ) + IX = IX - INCX + 120 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*DCONJG( A( KPLUS1, J ) ) + DO 130, I = J - 1, MAX( 1, J - K ), -1 + TEMP = TEMP + DCONJG( A( L + I, J ) )*X( IX ) + IX = IX - INCX + 130 CONTINUE + END IF + X( JX ) = TEMP + JX = JX - INCX + 140 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 170, J = 1, N + TEMP = X( J ) + L = 1 - J + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*A( 1, J ) + DO 150, I = J + 1, MIN( N, J + K ) + TEMP = TEMP + A( L + I, J )*X( I ) + 150 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*DCONJG( A( 1, J ) ) + DO 160, I = J + 1, MIN( N, J + K ) + TEMP = TEMP + DCONJG( A( L + I, J ) )*X( I ) + 160 CONTINUE + END IF + X( J ) = TEMP + 170 CONTINUE + ELSE + JX = KX + DO 200, J = 1, N + TEMP = X( JX ) + KX = KX + INCX + IX = KX + L = 1 - J + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*A( 1, J ) + DO 180, I = J + 1, MIN( N, J + K ) + TEMP = TEMP + A( L + I, J )*X( IX ) + IX = IX + INCX + 180 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*DCONJG( A( 1, J ) ) + DO 190, I = J + 1, MIN( N, J + K ) + TEMP = TEMP + DCONJG( A( L + I, J ) )*X( IX ) + IX = IX + INCX + 190 CONTINUE + END IF + X( JX ) = TEMP + JX = JX + INCX + 200 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of ZTBMV . +* + END diff --git a/costa/native/external/blas/ztbsv.f b/costa/native/external/blas/ztbsv.f new file mode 100644 index 000000000..f3ded8193 --- /dev/null +++ b/costa/native/external/blas/ztbsv.f @@ -0,0 +1,381 @@ + SUBROUTINE ZTBSV ( UPLO, TRANS, DIAG, N, K, A, LDA, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, K, LDA, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* ZTBSV solves one of the systems of equations +* +* A*x = b, or A'*x = b, or conjg( A' )*x = b, +* +* where b and x are n element vectors and A is an n by n unit, or +* non-unit, upper or lower triangular band matrix, with ( k + 1 ) +* diagonals. +* +* No test for singularity or near-singularity is included in this +* routine. Such tests must be performed before calling this routine. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the equations to be solved as +* follows: +* +* TRANS = 'N' or 'n' A*x = b. +* +* TRANS = 'T' or 't' A'*x = b. +* +* TRANS = 'C' or 'c' conjg( A' )*x = b. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* K - INTEGER. +* On entry with UPLO = 'U' or 'u', K specifies the number of +* super-diagonals of the matrix A. +* On entry with UPLO = 'L' or 'l', K specifies the number of +* sub-diagonals of the matrix A. +* K must satisfy 0 .le. K. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) +* by n part of the array A must contain the upper triangular +* band part of the matrix of coefficients, supplied column by +* column, with the leading diagonal of the matrix in row +* ( k + 1 ) of the array, the first super-diagonal starting at +* position 2 in row k, and so on. The top left k by k triangle +* of the array A is not referenced. +* The following program segment will transfer an upper +* triangular band matrix from conventional full matrix storage +* to band storage: +* +* DO 20, J = 1, N +* M = K + 1 - J +* DO 10, I = MAX( 1, J - K ), J +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) +* by n part of the array A must contain the lower triangular +* band part of the matrix of coefficients, supplied column by +* column, with the leading diagonal of the matrix in row 1 of +* the array, the first sub-diagonal starting at position 1 in +* row 2, and so on. The bottom right k by k triangle of the +* array A is not referenced. +* The following program segment will transfer a lower +* triangular band matrix from conventional full matrix storage +* to band storage: +* +* DO 20, J = 1, N +* M = 1 - J +* DO 10, I = J, MIN( N, J + K ) +* A( M + I, J ) = matrix( I, J ) +* 10 CONTINUE +* 20 CONTINUE +* +* Note that when DIAG = 'U' or 'u' the elements of the array A +* corresponding to the diagonal elements of the matrix are not +* referenced, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* ( k + 1 ). +* Unchanged on exit. +* +* X - COMPLEX*16 array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element right-hand side vector b. On exit, X is overwritten +* with the solution vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. Local Scalars .. + COMPLEX*16 TEMP + INTEGER I, INFO, IX, J, JX, KPLUS1, KX, L + LOGICAL NOCONJ, NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( K.LT.0 )THEN + INFO = 5 + ELSE IF( LDA.LT.( K + 1 ) )THEN + INFO = 7 + ELSE IF( INCX.EQ.0 )THEN + INFO = 9 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZTBSV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOCONJ = LSAME( TRANS, 'T' ) + NOUNIT = LSAME( DIAG , 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed by sequentially with one pass through A. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x := inv( A )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KPLUS1 = K + 1 + IF( INCX.EQ.1 )THEN + DO 20, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + L = KPLUS1 - J + IF( NOUNIT ) + $ X( J ) = X( J )/A( KPLUS1, J ) + TEMP = X( J ) + DO 10, I = J - 1, MAX( 1, J - K ), -1 + X( I ) = X( I ) - TEMP*A( L + I, J ) + 10 CONTINUE + END IF + 20 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 40, J = N, 1, -1 + KX = KX - INCX + IF( X( JX ).NE.ZERO )THEN + IX = KX + L = KPLUS1 - J + IF( NOUNIT ) + $ X( JX ) = X( JX )/A( KPLUS1, J ) + TEMP = X( JX ) + DO 30, I = J - 1, MAX( 1, J - K ), -1 + X( IX ) = X( IX ) - TEMP*A( L + I, J ) + IX = IX - INCX + 30 CONTINUE + END IF + JX = JX - INCX + 40 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + L = 1 - J + IF( NOUNIT ) + $ X( J ) = X( J )/A( 1, J ) + TEMP = X( J ) + DO 50, I = J + 1, MIN( N, J + K ) + X( I ) = X( I ) - TEMP*A( L + I, J ) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE + JX = KX + DO 80, J = 1, N + KX = KX + INCX + IF( X( JX ).NE.ZERO )THEN + IX = KX + L = 1 - J + IF( NOUNIT ) + $ X( JX ) = X( JX )/A( 1, J ) + TEMP = X( JX ) + DO 70, I = J + 1, MIN( N, J + K ) + X( IX ) = X( IX ) - TEMP*A( L + I, J ) + IX = IX + INCX + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := inv( A' )*x or x := inv( conjg( A') )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KPLUS1 = K + 1 + IF( INCX.EQ.1 )THEN + DO 110, J = 1, N + TEMP = X( J ) + L = KPLUS1 - J + IF( NOCONJ )THEN + DO 90, I = MAX( 1, J - K ), J - 1 + TEMP = TEMP - A( L + I, J )*X( I ) + 90 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( KPLUS1, J ) + ELSE + DO 100, I = MAX( 1, J - K ), J - 1 + TEMP = TEMP - DCONJG( A( L + I, J ) )*X( I ) + 100 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/DCONJG( A( KPLUS1, J ) ) + END IF + X( J ) = TEMP + 110 CONTINUE + ELSE + JX = KX + DO 140, J = 1, N + TEMP = X( JX ) + IX = KX + L = KPLUS1 - J + IF( NOCONJ )THEN + DO 120, I = MAX( 1, J - K ), J - 1 + TEMP = TEMP - A( L + I, J )*X( IX ) + IX = IX + INCX + 120 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( KPLUS1, J ) + ELSE + DO 130, I = MAX( 1, J - K ), J - 1 + TEMP = TEMP - DCONJG( A( L + I, J ) )*X( IX ) + IX = IX + INCX + 130 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/DCONJG( A( KPLUS1, J ) ) + END IF + X( JX ) = TEMP + JX = JX + INCX + IF( J.GT.K ) + $ KX = KX + INCX + 140 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 170, J = N, 1, -1 + TEMP = X( J ) + L = 1 - J + IF( NOCONJ )THEN + DO 150, I = MIN( N, J + K ), J + 1, -1 + TEMP = TEMP - A( L + I, J )*X( I ) + 150 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( 1, J ) + ELSE + DO 160, I = MIN( N, J + K ), J + 1, -1 + TEMP = TEMP - DCONJG( A( L + I, J ) )*X( I ) + 160 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/DCONJG( A( 1, J ) ) + END IF + X( J ) = TEMP + 170 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 200, J = N, 1, -1 + TEMP = X( JX ) + IX = KX + L = 1 - J + IF( NOCONJ )THEN + DO 180, I = MIN( N, J + K ), J + 1, -1 + TEMP = TEMP - A( L + I, J )*X( IX ) + IX = IX - INCX + 180 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( 1, J ) + ELSE + DO 190, I = MIN( N, J + K ), J + 1, -1 + TEMP = TEMP - DCONJG( A( L + I, J ) )*X( IX ) + IX = IX - INCX + 190 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/DCONJG( A( 1, J ) ) + END IF + X( JX ) = TEMP + JX = JX - INCX + IF( ( N - J ).GE.K ) + $ KX = KX - INCX + 200 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of ZTBSV . +* + END diff --git a/costa/native/external/blas/ztpmv.f b/costa/native/external/blas/ztpmv.f new file mode 100644 index 000000000..4fad3a8bd --- /dev/null +++ b/costa/native/external/blas/ztpmv.f @@ -0,0 +1,338 @@ + SUBROUTINE ZTPMV ( UPLO, TRANS, DIAG, N, AP, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + COMPLEX*16 AP( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* ZTPMV performs one of the matrix-vector operations +* +* x := A*x, or x := A'*x, or x := conjg( A' )*x, +* +* where x is an n element vector and A is an n by n unit, or non-unit, +* upper or lower triangular matrix, supplied in packed form. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' x := A*x. +* +* TRANS = 'T' or 't' x := A'*x. +* +* TRANS = 'C' or 'c' x := conjg( A' )*x. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* AP - COMPLEX*16 array of DIMENSION at least +* ( ( n*( n + 1 ) )/2 ). +* Before entry with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular matrix packed sequentially, +* column by column, so that AP( 1 ) contains a( 1, 1 ), +* AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) +* respectively, and so on. +* Before entry with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular matrix packed sequentially, +* column by column, so that AP( 1 ) contains a( 1, 1 ), +* AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) +* respectively, and so on. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced, but are assumed to be unity. +* Unchanged on exit. +* +* X - COMPLEX*16 array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. On exit, X is overwritten with the +* tranformed vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. Local Scalars .. + COMPLEX*16 TEMP + INTEGER I, INFO, IX, J, JX, K, KK, KX + LOGICAL NOCONJ, NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC DCONJG +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( INCX.EQ.0 )THEN + INFO = 7 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZTPMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOCONJ = LSAME( TRANS, 'T' ) + NOUNIT = LSAME( DIAG , 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of AP are +* accessed sequentially with one pass through AP. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x:= A*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KK = 1 + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + K = KK + DO 10, I = 1, J - 1 + X( I ) = X( I ) + TEMP*AP( K ) + K = K + 1 + 10 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*AP( KK + J - 1 ) + END IF + KK = KK + J + 20 CONTINUE + ELSE + JX = KX + DO 40, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + DO 30, K = KK, KK + J - 2 + X( IX ) = X( IX ) + TEMP*AP( K ) + IX = IX + INCX + 30 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*AP( KK + J - 1 ) + END IF + JX = JX + INCX + KK = KK + J + 40 CONTINUE + END IF + ELSE + KK = ( N*( N + 1 ) )/2 + IF( INCX.EQ.1 )THEN + DO 60, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + K = KK + DO 50, I = N, J + 1, -1 + X( I ) = X( I ) + TEMP*AP( K ) + K = K - 1 + 50 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*AP( KK - N + J ) + END IF + KK = KK - ( N - J + 1 ) + 60 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 80, J = N, 1, -1 + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + DO 70, K = KK, KK - ( N - ( J + 1 ) ), -1 + X( IX ) = X( IX ) + TEMP*AP( K ) + IX = IX - INCX + 70 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*AP( KK - N + J ) + END IF + JX = JX - INCX + KK = KK - ( N - J + 1 ) + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := A'*x or x := conjg( A' )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KK = ( N*( N + 1 ) )/2 + IF( INCX.EQ.1 )THEN + DO 110, J = N, 1, -1 + TEMP = X( J ) + K = KK - 1 + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*AP( KK ) + DO 90, I = J - 1, 1, -1 + TEMP = TEMP + AP( K )*X( I ) + K = K - 1 + 90 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*DCONJG( AP( KK ) ) + DO 100, I = J - 1, 1, -1 + TEMP = TEMP + DCONJG( AP( K ) )*X( I ) + K = K - 1 + 100 CONTINUE + END IF + X( J ) = TEMP + KK = KK - J + 110 CONTINUE + ELSE + JX = KX + ( N - 1 )*INCX + DO 140, J = N, 1, -1 + TEMP = X( JX ) + IX = JX + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*AP( KK ) + DO 120, K = KK - 1, KK - J + 1, -1 + IX = IX - INCX + TEMP = TEMP + AP( K )*X( IX ) + 120 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*DCONJG( AP( KK ) ) + DO 130, K = KK - 1, KK - J + 1, -1 + IX = IX - INCX + TEMP = TEMP + DCONJG( AP( K ) )*X( IX ) + 130 CONTINUE + END IF + X( JX ) = TEMP + JX = JX - INCX + KK = KK - J + 140 CONTINUE + END IF + ELSE + KK = 1 + IF( INCX.EQ.1 )THEN + DO 170, J = 1, N + TEMP = X( J ) + K = KK + 1 + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*AP( KK ) + DO 150, I = J + 1, N + TEMP = TEMP + AP( K )*X( I ) + K = K + 1 + 150 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*DCONJG( AP( KK ) ) + DO 160, I = J + 1, N + TEMP = TEMP + DCONJG( AP( K ) )*X( I ) + K = K + 1 + 160 CONTINUE + END IF + X( J ) = TEMP + KK = KK + ( N - J + 1 ) + 170 CONTINUE + ELSE + JX = KX + DO 200, J = 1, N + TEMP = X( JX ) + IX = JX + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*AP( KK ) + DO 180, K = KK + 1, KK + N - J + IX = IX + INCX + TEMP = TEMP + AP( K )*X( IX ) + 180 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*DCONJG( AP( KK ) ) + DO 190, K = KK + 1, KK + N - J + IX = IX + INCX + TEMP = TEMP + DCONJG( AP( K ) )*X( IX ) + 190 CONTINUE + END IF + X( JX ) = TEMP + JX = JX + INCX + KK = KK + ( N - J + 1 ) + 200 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of ZTPMV . +* + END diff --git a/costa/native/external/blas/ztpsv.f b/costa/native/external/blas/ztpsv.f new file mode 100644 index 000000000..8649f4748 --- /dev/null +++ b/costa/native/external/blas/ztpsv.f @@ -0,0 +1,341 @@ + SUBROUTINE ZTPSV ( UPLO, TRANS, DIAG, N, AP, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + COMPLEX*16 AP( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* ZTPSV solves one of the systems of equations +* +* A*x = b, or A'*x = b, or conjg( A' )*x = b, +* +* where b and x are n element vectors and A is an n by n unit, or +* non-unit, upper or lower triangular matrix, supplied in packed form. +* +* No test for singularity or near-singularity is included in this +* routine. Such tests must be performed before calling this routine. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the equations to be solved as +* follows: +* +* TRANS = 'N' or 'n' A*x = b. +* +* TRANS = 'T' or 't' A'*x = b. +* +* TRANS = 'C' or 'c' conjg( A' )*x = b. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* AP - COMPLEX*16 array of DIMENSION at least +* ( ( n*( n + 1 ) )/2 ). +* Before entry with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular matrix packed sequentially, +* column by column, so that AP( 1 ) contains a( 1, 1 ), +* AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) +* respectively, and so on. +* Before entry with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular matrix packed sequentially, +* column by column, so that AP( 1 ) contains a( 1, 1 ), +* AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) +* respectively, and so on. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced, but are assumed to be unity. +* Unchanged on exit. +* +* X - COMPLEX*16 array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element right-hand side vector b. On exit, X is overwritten +* with the solution vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. Local Scalars .. + COMPLEX*16 TEMP + INTEGER I, INFO, IX, J, JX, K, KK, KX + LOGICAL NOCONJ, NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC DCONJG +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( INCX.EQ.0 )THEN + INFO = 7 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZTPSV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOCONJ = LSAME( TRANS, 'T' ) + NOUNIT = LSAME( DIAG , 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of AP are +* accessed sequentially with one pass through AP. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x := inv( A )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KK = ( N*( N + 1 ) )/2 + IF( INCX.EQ.1 )THEN + DO 20, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( J ) = X( J )/AP( KK ) + TEMP = X( J ) + K = KK - 1 + DO 10, I = J - 1, 1, -1 + X( I ) = X( I ) - TEMP*AP( K ) + K = K - 1 + 10 CONTINUE + END IF + KK = KK - J + 20 CONTINUE + ELSE + JX = KX + ( N - 1 )*INCX + DO 40, J = N, 1, -1 + IF( X( JX ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( JX ) = X( JX )/AP( KK ) + TEMP = X( JX ) + IX = JX + DO 30, K = KK - 1, KK - J + 1, -1 + IX = IX - INCX + X( IX ) = X( IX ) - TEMP*AP( K ) + 30 CONTINUE + END IF + JX = JX - INCX + KK = KK - J + 40 CONTINUE + END IF + ELSE + KK = 1 + IF( INCX.EQ.1 )THEN + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( J ) = X( J )/AP( KK ) + TEMP = X( J ) + K = KK + 1 + DO 50, I = J + 1, N + X( I ) = X( I ) - TEMP*AP( K ) + K = K + 1 + 50 CONTINUE + END IF + KK = KK + ( N - J + 1 ) + 60 CONTINUE + ELSE + JX = KX + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( JX ) = X( JX )/AP( KK ) + TEMP = X( JX ) + IX = JX + DO 70, K = KK + 1, KK + N - J + IX = IX + INCX + X( IX ) = X( IX ) - TEMP*AP( K ) + 70 CONTINUE + END IF + JX = JX + INCX + KK = KK + ( N - J + 1 ) + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := inv( A' )*x or x := inv( conjg( A' ) )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + KK = 1 + IF( INCX.EQ.1 )THEN + DO 110, J = 1, N + TEMP = X( J ) + K = KK + IF( NOCONJ )THEN + DO 90, I = 1, J - 1 + TEMP = TEMP - AP( K )*X( I ) + K = K + 1 + 90 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/AP( KK + J - 1 ) + ELSE + DO 100, I = 1, J - 1 + TEMP = TEMP - DCONJG( AP( K ) )*X( I ) + K = K + 1 + 100 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/DCONJG( AP( KK + J - 1 ) ) + END IF + X( J ) = TEMP + KK = KK + J + 110 CONTINUE + ELSE + JX = KX + DO 140, J = 1, N + TEMP = X( JX ) + IX = KX + IF( NOCONJ )THEN + DO 120, K = KK, KK + J - 2 + TEMP = TEMP - AP( K )*X( IX ) + IX = IX + INCX + 120 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/AP( KK + J - 1 ) + ELSE + DO 130, K = KK, KK + J - 2 + TEMP = TEMP - DCONJG( AP( K ) )*X( IX ) + IX = IX + INCX + 130 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/DCONJG( AP( KK + J - 1 ) ) + END IF + X( JX ) = TEMP + JX = JX + INCX + KK = KK + J + 140 CONTINUE + END IF + ELSE + KK = ( N*( N + 1 ) )/2 + IF( INCX.EQ.1 )THEN + DO 170, J = N, 1, -1 + TEMP = X( J ) + K = KK + IF( NOCONJ )THEN + DO 150, I = N, J + 1, -1 + TEMP = TEMP - AP( K )*X( I ) + K = K - 1 + 150 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/AP( KK - N + J ) + ELSE + DO 160, I = N, J + 1, -1 + TEMP = TEMP - DCONJG( AP( K ) )*X( I ) + K = K - 1 + 160 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/DCONJG( AP( KK - N + J ) ) + END IF + X( J ) = TEMP + KK = KK - ( N - J + 1 ) + 170 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 200, J = N, 1, -1 + TEMP = X( JX ) + IX = KX + IF( NOCONJ )THEN + DO 180, K = KK, KK - ( N - ( J + 1 ) ), -1 + TEMP = TEMP - AP( K )*X( IX ) + IX = IX - INCX + 180 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/AP( KK - N + J ) + ELSE + DO 190, K = KK, KK - ( N - ( J + 1 ) ), -1 + TEMP = TEMP - DCONJG( AP( K ) )*X( IX ) + IX = IX - INCX + 190 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/DCONJG( AP( KK - N + J ) ) + END IF + X( JX ) = TEMP + JX = JX - INCX + KK = KK - ( N - J + 1 ) + 200 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of ZTPSV . +* + END diff --git a/costa/native/external/blas/ztrmm.f b/costa/native/external/blas/ztrmm.f new file mode 100644 index 000000000..30910d1da --- /dev/null +++ b/costa/native/external/blas/ztrmm.f @@ -0,0 +1,392 @@ + SUBROUTINE ZTRMM ( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, + $ B, LDB ) +* .. Scalar Arguments .. + CHARACTER*1 SIDE, UPLO, TRANSA, DIAG + INTEGER M, N, LDA, LDB + COMPLEX*16 ALPHA +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* ZTRMM performs one of the matrix-matrix operations +* +* B := alpha*op( A )*B, or B := alpha*B*op( A ) +* +* where alpha is a scalar, B is an m by n matrix, A is a unit, or +* non-unit, upper or lower triangular matrix and op( A ) is one of +* +* op( A ) = A or op( A ) = A' or op( A ) = conjg( A' ). +* +* Parameters +* ========== +* +* SIDE - CHARACTER*1. +* On entry, SIDE specifies whether op( A ) multiplies B from +* the left or right as follows: +* +* SIDE = 'L' or 'l' B := alpha*op( A )*B. +* +* SIDE = 'R' or 'r' B := alpha*B*op( A ). +* +* Unchanged on exit. +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix A is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANSA - CHARACTER*1. +* On entry, TRANSA specifies the form of op( A ) to be used in +* the matrix multiplication as follows: +* +* TRANSA = 'N' or 'n' op( A ) = A. +* +* TRANSA = 'T' or 't' op( A ) = A'. +* +* TRANSA = 'C' or 'c' op( A ) = conjg( A' ). +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit triangular +* as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of B. M must be at +* least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of B. N must be +* at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX*16 . +* On entry, ALPHA specifies the scalar alpha. When alpha is +* zero then A is not referenced and B need not be set before +* entry. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, k ), where k is m +* when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. +* Before entry with UPLO = 'U' or 'u', the leading k by k +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading k by k +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When SIDE = 'L' or 'l' then +* LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' +* then LDA must be at least max( 1, n ). +* Unchanged on exit. +* +* B - COMPLEX*16 array of DIMENSION ( LDB, n ). +* Before entry, the leading m by n part of the array B must +* contain the matrix B, and on exit is overwritten by the +* transformed matrix. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. LDB must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX +* .. Local Scalars .. + LOGICAL LSIDE, NOCONJ, NOUNIT, UPPER + INTEGER I, INFO, J, K, NROWA + COMPLEX*16 TEMP +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + LSIDE = LSAME( SIDE , 'L' ) + IF( LSIDE )THEN + NROWA = M + ELSE + NROWA = N + END IF + NOCONJ = LSAME( TRANSA, 'T' ) + NOUNIT = LSAME( DIAG , 'N' ) + UPPER = LSAME( UPLO , 'U' ) +* + INFO = 0 + IF( ( .NOT.LSIDE ).AND. + $ ( .NOT.LSAME( SIDE , 'R' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO , 'L' ) ) )THEN + INFO = 2 + ELSE IF( ( .NOT.LSAME( TRANSA, 'N' ) ).AND. + $ ( .NOT.LSAME( TRANSA, 'T' ) ).AND. + $ ( .NOT.LSAME( TRANSA, 'C' ) ) )THEN + INFO = 3 + ELSE IF( ( .NOT.LSAME( DIAG , 'U' ) ).AND. + $ ( .NOT.LSAME( DIAG , 'N' ) ) )THEN + INFO = 4 + ELSE IF( M .LT.0 )THEN + INFO = 5 + ELSE IF( N .LT.0 )THEN + INFO = 6 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 9 + ELSE IF( LDB.LT.MAX( 1, M ) )THEN + INFO = 11 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZTRMM ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, M + B( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + RETURN + END IF +* +* Start the operations. +* + IF( LSIDE )THEN + IF( LSAME( TRANSA, 'N' ) )THEN +* +* Form B := alpha*A*B. +* + IF( UPPER )THEN + DO 50, J = 1, N + DO 40, K = 1, M + IF( B( K, J ).NE.ZERO )THEN + TEMP = ALPHA*B( K, J ) + DO 30, I = 1, K - 1 + B( I, J ) = B( I, J ) + TEMP*A( I, K ) + 30 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP*A( K, K ) + B( K, J ) = TEMP + END IF + 40 CONTINUE + 50 CONTINUE + ELSE + DO 80, J = 1, N + DO 70 K = M, 1, -1 + IF( B( K, J ).NE.ZERO )THEN + TEMP = ALPHA*B( K, J ) + B( K, J ) = TEMP + IF( NOUNIT ) + $ B( K, J ) = B( K, J )*A( K, K ) + DO 60, I = K + 1, M + B( I, J ) = B( I, J ) + TEMP*A( I, K ) + 60 CONTINUE + END IF + 70 CONTINUE + 80 CONTINUE + END IF + ELSE +* +* Form B := alpha*A'*B or B := alpha*conjg( A' )*B. +* + IF( UPPER )THEN + DO 120, J = 1, N + DO 110, I = M, 1, -1 + TEMP = B( I, J ) + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*A( I, I ) + DO 90, K = 1, I - 1 + TEMP = TEMP + A( K, I )*B( K, J ) + 90 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*DCONJG( A( I, I ) ) + DO 100, K = 1, I - 1 + TEMP = TEMP + DCONJG( A( K, I ) )*B( K, J ) + 100 CONTINUE + END IF + B( I, J ) = ALPHA*TEMP + 110 CONTINUE + 120 CONTINUE + ELSE + DO 160, J = 1, N + DO 150, I = 1, M + TEMP = B( I, J ) + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*A( I, I ) + DO 130, K = I + 1, M + TEMP = TEMP + A( K, I )*B( K, J ) + 130 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*DCONJG( A( I, I ) ) + DO 140, K = I + 1, M + TEMP = TEMP + DCONJG( A( K, I ) )*B( K, J ) + 140 CONTINUE + END IF + B( I, J ) = ALPHA*TEMP + 150 CONTINUE + 160 CONTINUE + END IF + END IF + ELSE + IF( LSAME( TRANSA, 'N' ) )THEN +* +* Form B := alpha*B*A. +* + IF( UPPER )THEN + DO 200, J = N, 1, -1 + TEMP = ALPHA + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 170, I = 1, M + B( I, J ) = TEMP*B( I, J ) + 170 CONTINUE + DO 190, K = 1, J - 1 + IF( A( K, J ).NE.ZERO )THEN + TEMP = ALPHA*A( K, J ) + DO 180, I = 1, M + B( I, J ) = B( I, J ) + TEMP*B( I, K ) + 180 CONTINUE + END IF + 190 CONTINUE + 200 CONTINUE + ELSE + DO 240, J = 1, N + TEMP = ALPHA + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 210, I = 1, M + B( I, J ) = TEMP*B( I, J ) + 210 CONTINUE + DO 230, K = J + 1, N + IF( A( K, J ).NE.ZERO )THEN + TEMP = ALPHA*A( K, J ) + DO 220, I = 1, M + B( I, J ) = B( I, J ) + TEMP*B( I, K ) + 220 CONTINUE + END IF + 230 CONTINUE + 240 CONTINUE + END IF + ELSE +* +* Form B := alpha*B*A' or B := alpha*B*conjg( A' ). +* + IF( UPPER )THEN + DO 280, K = 1, N + DO 260, J = 1, K - 1 + IF( A( J, K ).NE.ZERO )THEN + IF( NOCONJ )THEN + TEMP = ALPHA*A( J, K ) + ELSE + TEMP = ALPHA*DCONJG( A( J, K ) ) + END IF + DO 250, I = 1, M + B( I, J ) = B( I, J ) + TEMP*B( I, K ) + 250 CONTINUE + END IF + 260 CONTINUE + TEMP = ALPHA + IF( NOUNIT )THEN + IF( NOCONJ )THEN + TEMP = TEMP*A( K, K ) + ELSE + TEMP = TEMP*DCONJG( A( K, K ) ) + END IF + END IF + IF( TEMP.NE.ONE )THEN + DO 270, I = 1, M + B( I, K ) = TEMP*B( I, K ) + 270 CONTINUE + END IF + 280 CONTINUE + ELSE + DO 320, K = N, 1, -1 + DO 300, J = K + 1, N + IF( A( J, K ).NE.ZERO )THEN + IF( NOCONJ )THEN + TEMP = ALPHA*A( J, K ) + ELSE + TEMP = ALPHA*DCONJG( A( J, K ) ) + END IF + DO 290, I = 1, M + B( I, J ) = B( I, J ) + TEMP*B( I, K ) + 290 CONTINUE + END IF + 300 CONTINUE + TEMP = ALPHA + IF( NOUNIT )THEN + IF( NOCONJ )THEN + TEMP = TEMP*A( K, K ) + ELSE + TEMP = TEMP*DCONJG( A( K, K ) ) + END IF + END IF + IF( TEMP.NE.ONE )THEN + DO 310, I = 1, M + B( I, K ) = TEMP*B( I, K ) + 310 CONTINUE + END IF + 320 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of ZTRMM . +* + END diff --git a/costa/native/external/blas/ztrmv.f b/costa/native/external/blas/ztrmv.f new file mode 100644 index 000000000..677e212b5 --- /dev/null +++ b/costa/native/external/blas/ztrmv.f @@ -0,0 +1,321 @@ + SUBROUTINE ZTRMV ( UPLO, TRANS, DIAG, N, A, LDA, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, LDA, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* ZTRMV performs one of the matrix-vector operations +* +* x := A*x, or x := A'*x, or x := conjg( A' )*x, +* +* where x is an n element vector and A is an n by n unit, or non-unit, +* upper or lower triangular matrix. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the operation to be performed as +* follows: +* +* TRANS = 'N' or 'n' x := A*x. +* +* TRANS = 'T' or 't' x := A'*x. +* +* TRANS = 'C' or 'c' x := conjg( A' )*x. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* X - COMPLEX*16 array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element vector x. On exit, X is overwritten with the +* tranformed vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. Local Scalars .. + COMPLEX*16 TEMP + INTEGER I, INFO, IX, J, JX, KX + LOGICAL NOCONJ, NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 6 + ELSE IF( INCX.EQ.0 )THEN + INFO = 8 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZTRMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOCONJ = LSAME( TRANS, 'T' ) + NOUNIT = LSAME( DIAG , 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x := A*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + IF( INCX.EQ.1 )THEN + DO 20, J = 1, N + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + DO 10, I = 1, J - 1 + X( I ) = X( I ) + TEMP*A( I, J ) + 10 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*A( J, J ) + END IF + 20 CONTINUE + ELSE + JX = KX + DO 40, J = 1, N + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + DO 30, I = 1, J - 1 + X( IX ) = X( IX ) + TEMP*A( I, J ) + IX = IX + INCX + 30 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*A( J, J ) + END IF + JX = JX + INCX + 40 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 60, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + TEMP = X( J ) + DO 50, I = N, J + 1, -1 + X( I ) = X( I ) + TEMP*A( I, J ) + 50 CONTINUE + IF( NOUNIT ) + $ X( J ) = X( J )*A( J, J ) + END IF + 60 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 80, J = N, 1, -1 + IF( X( JX ).NE.ZERO )THEN + TEMP = X( JX ) + IX = KX + DO 70, I = N, J + 1, -1 + X( IX ) = X( IX ) + TEMP*A( I, J ) + IX = IX - INCX + 70 CONTINUE + IF( NOUNIT ) + $ X( JX ) = X( JX )*A( J, J ) + END IF + JX = JX - INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := A'*x or x := conjg( A' )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + IF( INCX.EQ.1 )THEN + DO 110, J = N, 1, -1 + TEMP = X( J ) + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 90, I = J - 1, 1, -1 + TEMP = TEMP + A( I, J )*X( I ) + 90 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*DCONJG( A( J, J ) ) + DO 100, I = J - 1, 1, -1 + TEMP = TEMP + DCONJG( A( I, J ) )*X( I ) + 100 CONTINUE + END IF + X( J ) = TEMP + 110 CONTINUE + ELSE + JX = KX + ( N - 1 )*INCX + DO 140, J = N, 1, -1 + TEMP = X( JX ) + IX = JX + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 120, I = J - 1, 1, -1 + IX = IX - INCX + TEMP = TEMP + A( I, J )*X( IX ) + 120 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*DCONJG( A( J, J ) ) + DO 130, I = J - 1, 1, -1 + IX = IX - INCX + TEMP = TEMP + DCONJG( A( I, J ) )*X( IX ) + 130 CONTINUE + END IF + X( JX ) = TEMP + JX = JX - INCX + 140 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 170, J = 1, N + TEMP = X( J ) + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 150, I = J + 1, N + TEMP = TEMP + A( I, J )*X( I ) + 150 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*DCONJG( A( J, J ) ) + DO 160, I = J + 1, N + TEMP = TEMP + DCONJG( A( I, J ) )*X( I ) + 160 CONTINUE + END IF + X( J ) = TEMP + 170 CONTINUE + ELSE + JX = KX + DO 200, J = 1, N + TEMP = X( JX ) + IX = JX + IF( NOCONJ )THEN + IF( NOUNIT ) + $ TEMP = TEMP*A( J, J ) + DO 180, I = J + 1, N + IX = IX + INCX + TEMP = TEMP + A( I, J )*X( IX ) + 180 CONTINUE + ELSE + IF( NOUNIT ) + $ TEMP = TEMP*DCONJG( A( J, J ) ) + DO 190, I = J + 1, N + IX = IX + INCX + TEMP = TEMP + DCONJG( A( I, J ) )*X( IX ) + 190 CONTINUE + END IF + X( JX ) = TEMP + JX = JX + INCX + 200 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of ZTRMV . +* + END diff --git a/costa/native/external/blas/ztrsm.f b/costa/native/external/blas/ztrsm.f new file mode 100644 index 000000000..e414ec666 --- /dev/null +++ b/costa/native/external/blas/ztrsm.f @@ -0,0 +1,414 @@ + SUBROUTINE ZTRSM ( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, + $ B, LDB ) +* .. Scalar Arguments .. + CHARACTER*1 SIDE, UPLO, TRANSA, DIAG + INTEGER M, N, LDA, LDB + COMPLEX*16 ALPHA +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* ZTRSM solves one of the matrix equations +* +* op( A )*X = alpha*B, or X*op( A ) = alpha*B, +* +* where alpha is a scalar, X and B are m by n matrices, A is a unit, or +* non-unit, upper or lower triangular matrix and op( A ) is one of +* +* op( A ) = A or op( A ) = A' or op( A ) = conjg( A' ). +* +* The matrix X is overwritten on B. +* +* Parameters +* ========== +* +* SIDE - CHARACTER*1. +* On entry, SIDE specifies whether op( A ) appears on the left +* or right of X as follows: +* +* SIDE = 'L' or 'l' op( A )*X = alpha*B. +* +* SIDE = 'R' or 'r' X*op( A ) = alpha*B. +* +* Unchanged on exit. +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix A is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANSA - CHARACTER*1. +* On entry, TRANSA specifies the form of op( A ) to be used in +* the matrix multiplication as follows: +* +* TRANSA = 'N' or 'n' op( A ) = A. +* +* TRANSA = 'T' or 't' op( A ) = A'. +* +* TRANSA = 'C' or 'c' op( A ) = conjg( A' ). +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit triangular +* as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* M - INTEGER. +* On entry, M specifies the number of rows of B. M must be at +* least zero. +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the number of columns of B. N must be +* at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX*16 . +* On entry, ALPHA specifies the scalar alpha. When alpha is +* zero then A is not referenced and B need not be set before +* entry. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, k ), where k is m +* when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. +* Before entry with UPLO = 'U' or 'u', the leading k by k +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading k by k +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. When SIDE = 'L' or 'l' then +* LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' +* then LDA must be at least max( 1, n ). +* Unchanged on exit. +* +* B - COMPLEX*16 array of DIMENSION ( LDB, n ). +* Before entry, the leading m by n part of the array B must +* contain the right-hand side matrix B, and on exit is +* overwritten by the solution matrix X. +* +* LDB - INTEGER. +* On entry, LDB specifies the first dimension of B as declared +* in the calling (sub) program. LDB must be at least +* max( 1, m ). +* Unchanged on exit. +* +* +* Level 3 Blas routine. +* +* -- Written on 8-February-1989. +* Jack Dongarra, Argonne National Laboratory. +* Iain Duff, AERE Harwell. +* Jeremy Du Croz, Numerical Algorithms Group Ltd. +* Sven Hammarling, Numerical Algorithms Group Ltd. +* +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX +* .. Local Scalars .. + LOGICAL LSIDE, NOCONJ, NOUNIT, UPPER + INTEGER I, INFO, J, K, NROWA + COMPLEX*16 TEMP +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + LSIDE = LSAME( SIDE , 'L' ) + IF( LSIDE )THEN + NROWA = M + ELSE + NROWA = N + END IF + NOCONJ = LSAME( TRANSA, 'T' ) + NOUNIT = LSAME( DIAG , 'N' ) + UPPER = LSAME( UPLO , 'U' ) +* + INFO = 0 + IF( ( .NOT.LSIDE ).AND. + $ ( .NOT.LSAME( SIDE , 'R' ) ) )THEN + INFO = 1 + ELSE IF( ( .NOT.UPPER ).AND. + $ ( .NOT.LSAME( UPLO , 'L' ) ) )THEN + INFO = 2 + ELSE IF( ( .NOT.LSAME( TRANSA, 'N' ) ).AND. + $ ( .NOT.LSAME( TRANSA, 'T' ) ).AND. + $ ( .NOT.LSAME( TRANSA, 'C' ) ) )THEN + INFO = 3 + ELSE IF( ( .NOT.LSAME( DIAG , 'U' ) ).AND. + $ ( .NOT.LSAME( DIAG , 'N' ) ) )THEN + INFO = 4 + ELSE IF( M .LT.0 )THEN + INFO = 5 + ELSE IF( N .LT.0 )THEN + INFO = 6 + ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN + INFO = 9 + ELSE IF( LDB.LT.MAX( 1, M ) )THEN + INFO = 11 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZTRSM ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* +* And when alpha.eq.zero. +* + IF( ALPHA.EQ.ZERO )THEN + DO 20, J = 1, N + DO 10, I = 1, M + B( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + RETURN + END IF +* +* Start the operations. +* + IF( LSIDE )THEN + IF( LSAME( TRANSA, 'N' ) )THEN +* +* Form B := alpha*inv( A )*B. +* + IF( UPPER )THEN + DO 60, J = 1, N + IF( ALPHA.NE.ONE )THEN + DO 30, I = 1, M + B( I, J ) = ALPHA*B( I, J ) + 30 CONTINUE + END IF + DO 50, K = M, 1, -1 + IF( B( K, J ).NE.ZERO )THEN + IF( NOUNIT ) + $ B( K, J ) = B( K, J )/A( K, K ) + DO 40, I = 1, K - 1 + B( I, J ) = B( I, J ) - B( K, J )*A( I, K ) + 40 CONTINUE + END IF + 50 CONTINUE + 60 CONTINUE + ELSE + DO 100, J = 1, N + IF( ALPHA.NE.ONE )THEN + DO 70, I = 1, M + B( I, J ) = ALPHA*B( I, J ) + 70 CONTINUE + END IF + DO 90 K = 1, M + IF( B( K, J ).NE.ZERO )THEN + IF( NOUNIT ) + $ B( K, J ) = B( K, J )/A( K, K ) + DO 80, I = K + 1, M + B( I, J ) = B( I, J ) - B( K, J )*A( I, K ) + 80 CONTINUE + END IF + 90 CONTINUE + 100 CONTINUE + END IF + ELSE +* +* Form B := alpha*inv( A' )*B +* or B := alpha*inv( conjg( A' ) )*B. +* + IF( UPPER )THEN + DO 140, J = 1, N + DO 130, I = 1, M + TEMP = ALPHA*B( I, J ) + IF( NOCONJ )THEN + DO 110, K = 1, I - 1 + TEMP = TEMP - A( K, I )*B( K, J ) + 110 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( I, I ) + ELSE + DO 120, K = 1, I - 1 + TEMP = TEMP - DCONJG( A( K, I ) )*B( K, J ) + 120 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/DCONJG( A( I, I ) ) + END IF + B( I, J ) = TEMP + 130 CONTINUE + 140 CONTINUE + ELSE + DO 180, J = 1, N + DO 170, I = M, 1, -1 + TEMP = ALPHA*B( I, J ) + IF( NOCONJ )THEN + DO 150, K = I + 1, M + TEMP = TEMP - A( K, I )*B( K, J ) + 150 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( I, I ) + ELSE + DO 160, K = I + 1, M + TEMP = TEMP - DCONJG( A( K, I ) )*B( K, J ) + 160 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/DCONJG( A( I, I ) ) + END IF + B( I, J ) = TEMP + 170 CONTINUE + 180 CONTINUE + END IF + END IF + ELSE + IF( LSAME( TRANSA, 'N' ) )THEN +* +* Form B := alpha*B*inv( A ). +* + IF( UPPER )THEN + DO 230, J = 1, N + IF( ALPHA.NE.ONE )THEN + DO 190, I = 1, M + B( I, J ) = ALPHA*B( I, J ) + 190 CONTINUE + END IF + DO 210, K = 1, J - 1 + IF( A( K, J ).NE.ZERO )THEN + DO 200, I = 1, M + B( I, J ) = B( I, J ) - A( K, J )*B( I, K ) + 200 CONTINUE + END IF + 210 CONTINUE + IF( NOUNIT )THEN + TEMP = ONE/A( J, J ) + DO 220, I = 1, M + B( I, J ) = TEMP*B( I, J ) + 220 CONTINUE + END IF + 230 CONTINUE + ELSE + DO 280, J = N, 1, -1 + IF( ALPHA.NE.ONE )THEN + DO 240, I = 1, M + B( I, J ) = ALPHA*B( I, J ) + 240 CONTINUE + END IF + DO 260, K = J + 1, N + IF( A( K, J ).NE.ZERO )THEN + DO 250, I = 1, M + B( I, J ) = B( I, J ) - A( K, J )*B( I, K ) + 250 CONTINUE + END IF + 260 CONTINUE + IF( NOUNIT )THEN + TEMP = ONE/A( J, J ) + DO 270, I = 1, M + B( I, J ) = TEMP*B( I, J ) + 270 CONTINUE + END IF + 280 CONTINUE + END IF + ELSE +* +* Form B := alpha*B*inv( A' ) +* or B := alpha*B*inv( conjg( A' ) ). +* + IF( UPPER )THEN + DO 330, K = N, 1, -1 + IF( NOUNIT )THEN + IF( NOCONJ )THEN + TEMP = ONE/A( K, K ) + ELSE + TEMP = ONE/DCONJG( A( K, K ) ) + END IF + DO 290, I = 1, M + B( I, K ) = TEMP*B( I, K ) + 290 CONTINUE + END IF + DO 310, J = 1, K - 1 + IF( A( J, K ).NE.ZERO )THEN + IF( NOCONJ )THEN + TEMP = A( J, K ) + ELSE + TEMP = DCONJG( A( J, K ) ) + END IF + DO 300, I = 1, M + B( I, J ) = B( I, J ) - TEMP*B( I, K ) + 300 CONTINUE + END IF + 310 CONTINUE + IF( ALPHA.NE.ONE )THEN + DO 320, I = 1, M + B( I, K ) = ALPHA*B( I, K ) + 320 CONTINUE + END IF + 330 CONTINUE + ELSE + DO 380, K = 1, N + IF( NOUNIT )THEN + IF( NOCONJ )THEN + TEMP = ONE/A( K, K ) + ELSE + TEMP = ONE/DCONJG( A( K, K ) ) + END IF + DO 340, I = 1, M + B( I, K ) = TEMP*B( I, K ) + 340 CONTINUE + END IF + DO 360, J = K + 1, N + IF( A( J, K ).NE.ZERO )THEN + IF( NOCONJ )THEN + TEMP = A( J, K ) + ELSE + TEMP = DCONJG( A( J, K ) ) + END IF + DO 350, I = 1, M + B( I, J ) = B( I, J ) - TEMP*B( I, K ) + 350 CONTINUE + END IF + 360 CONTINUE + IF( ALPHA.NE.ONE )THEN + DO 370, I = 1, M + B( I, K ) = ALPHA*B( I, K ) + 370 CONTINUE + END IF + 380 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of ZTRSM . +* + END diff --git a/costa/native/external/blas/ztrsv.f b/costa/native/external/blas/ztrsv.f new file mode 100644 index 000000000..d0a57c447 --- /dev/null +++ b/costa/native/external/blas/ztrsv.f @@ -0,0 +1,324 @@ + SUBROUTINE ZTRSV ( UPLO, TRANS, DIAG, N, A, LDA, X, INCX ) +* .. Scalar Arguments .. + INTEGER INCX, LDA, N + CHARACTER*1 DIAG, TRANS, UPLO +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* ZTRSV solves one of the systems of equations +* +* A*x = b, or A'*x = b, or conjg( A' )*x = b, +* +* where b and x are n element vectors and A is an n by n unit, or +* non-unit, upper or lower triangular matrix. +* +* No test for singularity or near-singularity is included in this +* routine. Such tests must be performed before calling this routine. +* +* Parameters +* ========== +* +* UPLO - CHARACTER*1. +* On entry, UPLO specifies whether the matrix is an upper or +* lower triangular matrix as follows: +* +* UPLO = 'U' or 'u' A is an upper triangular matrix. +* +* UPLO = 'L' or 'l' A is a lower triangular matrix. +* +* Unchanged on exit. +* +* TRANS - CHARACTER*1. +* On entry, TRANS specifies the equations to be solved as +* follows: +* +* TRANS = 'N' or 'n' A*x = b. +* +* TRANS = 'T' or 't' A'*x = b. +* +* TRANS = 'C' or 'c' conjg( A' )*x = b. +* +* Unchanged on exit. +* +* DIAG - CHARACTER*1. +* On entry, DIAG specifies whether or not A is unit +* triangular as follows: +* +* DIAG = 'U' or 'u' A is assumed to be unit triangular. +* +* DIAG = 'N' or 'n' A is not assumed to be unit +* triangular. +* +* Unchanged on exit. +* +* N - INTEGER. +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* A - COMPLEX*16 array of DIMENSION ( LDA, n ). +* Before entry with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular matrix and the strictly lower triangular part of +* A is not referenced. +* Before entry with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular matrix and the strictly upper triangular part of +* A is not referenced. +* Note that when DIAG = 'U' or 'u', the diagonal elements of +* A are not referenced either, but are assumed to be unity. +* Unchanged on exit. +* +* LDA - INTEGER. +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, n ). +* Unchanged on exit. +* +* X - COMPLEX*16 array of dimension at least +* ( 1 + ( n - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the n +* element right-hand side vector b. On exit, X is overwritten +* with the solution vector x. +* +* INCX - INTEGER. +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* +* Level 2 Blas routine. +* +* -- Written on 22-October-1986. +* Jack Dongarra, Argonne National Lab. +* Jeremy Du Croz, Nag Central Office. +* Sven Hammarling, Nag Central Office. +* Richard Hanson, Sandia National Labs. +* +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. Local Scalars .. + COMPLEX*16 TEMP + INTEGER I, INFO, IX, J, JX, KX + LOGICAL NOCONJ, NOUNIT +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.LSAME( UPLO , 'U' ).AND. + $ .NOT.LSAME( UPLO , 'L' ) )THEN + INFO = 1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND. + $ .NOT.LSAME( TRANS, 'T' ).AND. + $ .NOT.LSAME( TRANS, 'C' ) )THEN + INFO = 2 + ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND. + $ .NOT.LSAME( DIAG , 'N' ) )THEN + INFO = 3 + ELSE IF( N.LT.0 )THEN + INFO = 4 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 6 + ELSE IF( INCX.EQ.0 )THEN + INFO = 8 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZTRSV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* + NOCONJ = LSAME( TRANS, 'T' ) + NOUNIT = LSAME( DIAG , 'N' ) +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF( INCX.LE.0 )THEN + KX = 1 - ( N - 1 )*INCX + ELSE IF( INCX.NE.1 )THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF( LSAME( TRANS, 'N' ) )THEN +* +* Form x := inv( A )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + IF( INCX.EQ.1 )THEN + DO 20, J = N, 1, -1 + IF( X( J ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( J ) = X( J )/A( J, J ) + TEMP = X( J ) + DO 10, I = J - 1, 1, -1 + X( I ) = X( I ) - TEMP*A( I, J ) + 10 CONTINUE + END IF + 20 CONTINUE + ELSE + JX = KX + ( N - 1 )*INCX + DO 40, J = N, 1, -1 + IF( X( JX ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( JX ) = X( JX )/A( J, J ) + TEMP = X( JX ) + IX = JX + DO 30, I = J - 1, 1, -1 + IX = IX - INCX + X( IX ) = X( IX ) - TEMP*A( I, J ) + 30 CONTINUE + END IF + JX = JX - INCX + 40 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 60, J = 1, N + IF( X( J ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( J ) = X( J )/A( J, J ) + TEMP = X( J ) + DO 50, I = J + 1, N + X( I ) = X( I ) - TEMP*A( I, J ) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE + JX = KX + DO 80, J = 1, N + IF( X( JX ).NE.ZERO )THEN + IF( NOUNIT ) + $ X( JX ) = X( JX )/A( J, J ) + TEMP = X( JX ) + IX = JX + DO 70, I = J + 1, N + IX = IX + INCX + X( IX ) = X( IX ) - TEMP*A( I, J ) + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := inv( A' )*x or x := inv( conjg( A' ) )*x. +* + IF( LSAME( UPLO, 'U' ) )THEN + IF( INCX.EQ.1 )THEN + DO 110, J = 1, N + TEMP = X( J ) + IF( NOCONJ )THEN + DO 90, I = 1, J - 1 + TEMP = TEMP - A( I, J )*X( I ) + 90 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( J, J ) + ELSE + DO 100, I = 1, J - 1 + TEMP = TEMP - DCONJG( A( I, J ) )*X( I ) + 100 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/DCONJG( A( J, J ) ) + END IF + X( J ) = TEMP + 110 CONTINUE + ELSE + JX = KX + DO 140, J = 1, N + IX = KX + TEMP = X( JX ) + IF( NOCONJ )THEN + DO 120, I = 1, J - 1 + TEMP = TEMP - A( I, J )*X( IX ) + IX = IX + INCX + 120 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( J, J ) + ELSE + DO 130, I = 1, J - 1 + TEMP = TEMP - DCONJG( A( I, J ) )*X( IX ) + IX = IX + INCX + 130 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/DCONJG( A( J, J ) ) + END IF + X( JX ) = TEMP + JX = JX + INCX + 140 CONTINUE + END IF + ELSE + IF( INCX.EQ.1 )THEN + DO 170, J = N, 1, -1 + TEMP = X( J ) + IF( NOCONJ )THEN + DO 150, I = N, J + 1, -1 + TEMP = TEMP - A( I, J )*X( I ) + 150 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( J, J ) + ELSE + DO 160, I = N, J + 1, -1 + TEMP = TEMP - DCONJG( A( I, J ) )*X( I ) + 160 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/DCONJG( A( J, J ) ) + END IF + X( J ) = TEMP + 170 CONTINUE + ELSE + KX = KX + ( N - 1 )*INCX + JX = KX + DO 200, J = N, 1, -1 + IX = KX + TEMP = X( JX ) + IF( NOCONJ )THEN + DO 180, I = N, J + 1, -1 + TEMP = TEMP - A( I, J )*X( IX ) + IX = IX - INCX + 180 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/A( J, J ) + ELSE + DO 190, I = N, J + 1, -1 + TEMP = TEMP - DCONJG( A( I, J ) )*X( IX ) + IX = IX - INCX + 190 CONTINUE + IF( NOUNIT ) + $ TEMP = TEMP/DCONJG( A( J, J ) ) + END IF + X( JX ) = TEMP + JX = JX - INCX + 200 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of ZTRSV . +* + END diff --git a/costa/native/external/lapack/cbdsqr.f b/costa/native/external/lapack/cbdsqr.f new file mode 100644 index 000000000..6be44f18c --- /dev/null +++ b/costa/native/external/lapack/cbdsqr.f @@ -0,0 +1,733 @@ + SUBROUTINE CBDSQR( UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U, + $ LDU, C, LDC, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU +* .. +* .. Array Arguments .. + REAL D( * ), E( * ), RWORK( * ) + COMPLEX C( LDC, * ), U( LDU, * ), VT( LDVT, * ) +* .. +* +* Purpose +* ======= +* +* CBDSQR computes the singular value decomposition (SVD) of a real +* N-by-N (upper or lower) bidiagonal matrix B: B = Q * S * P' (P' +* denotes the transpose of P), where S is a diagonal matrix with +* non-negative diagonal elements (the singular values of B), and Q +* and P are orthogonal matrices. +* +* The routine computes S, and optionally computes U * Q, P' * VT, +* or Q' * C, for given complex input matrices U, VT, and C. +* +* See "Computing Small Singular Values of Bidiagonal Matrices With +* Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, +* LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11, +* no. 5, pp. 873-912, Sept 1990) and +* "Accurate singular values and differential qd algorithms," by +* B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics +* Department, University of California at Berkeley, July 1992 +* for a detailed description of the algorithm. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': B is upper bidiagonal; +* = 'L': B is lower bidiagonal. +* +* N (input) INTEGER +* The order of the matrix B. N >= 0. +* +* NCVT (input) INTEGER +* The number of columns of the matrix VT. NCVT >= 0. +* +* NRU (input) INTEGER +* The number of rows of the matrix U. NRU >= 0. +* +* NCC (input) INTEGER +* The number of columns of the matrix C. NCC >= 0. +* +* D (input/output) REAL array, dimension (N) +* On entry, the n diagonal elements of the bidiagonal matrix B. +* On exit, if INFO=0, the singular values of B in decreasing +* order. +* +* E (input/output) REAL array, dimension (N) +* On entry, the elements of E contain the +* offdiagonal elements of of the bidiagonal matrix whose SVD +* is desired. On normal exit (INFO = 0), E is destroyed. +* If the algorithm does not converge (INFO > 0), D and E +* will contain the diagonal and superdiagonal elements of a +* bidiagonal matrix orthogonally equivalent to the one given +* as input. E(N) is used for workspace. +* +* VT (input/output) COMPLEX array, dimension (LDVT, NCVT) +* On entry, an N-by-NCVT matrix VT. +* On exit, VT is overwritten by P' * VT. +* VT is not referenced if NCVT = 0. +* +* LDVT (input) INTEGER +* The leading dimension of the array VT. +* LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0. +* +* U (input/output) COMPLEX array, dimension (LDU, N) +* On entry, an NRU-by-N matrix U. +* On exit, U is overwritten by U * Q. +* U is not referenced if NRU = 0. +* +* LDU (input) INTEGER +* The leading dimension of the array U. LDU >= max(1,NRU). +* +* C (input/output) COMPLEX array, dimension (LDC, NCC) +* On entry, an N-by-NCC matrix C. +* On exit, C is overwritten by Q' * C. +* C is not referenced if NCC = 0. +* +* LDC (input) INTEGER +* The leading dimension of the array C. +* LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0. +* +* RWORK (workspace) REAL array, dimension (4*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: If INFO = -i, the i-th argument had an illegal value +* > 0: the algorithm did not converge; D and E contain the +* elements of a bidiagonal matrix which is orthogonally +* similar to the input matrix B; if INFO = i, i +* elements of E have not converged to zero. +* +* Internal Parameters +* =================== +* +* TOLMUL REAL, default = max(10,min(100,EPS**(-1/8))) +* TOLMUL controls the convergence criterion of the QR loop. +* If it is positive, TOLMUL*EPS is the desired relative +* precision in the computed singular values. +* If it is negative, abs(TOLMUL*EPS*sigma_max) is the +* desired absolute accuracy in the computed singular +* values (corresponds to relative accuracy +* abs(TOLMUL*EPS) in the largest singular value. +* abs(TOLMUL) should be between 1 and 1/EPS, and preferably +* between 10 (for fast convergence) and .1/EPS +* (for there to be some accuracy in the results). +* Default is to lose at either one eighth or 2 of the +* available decimal digits in each computed singular value +* (whichever is smaller). +* +* MAXITR INTEGER, default = 6 +* MAXITR controls the maximum number of passes of the +* algorithm through its inner loop. The algorithms stops +* (and so fails to converge) if the number of passes +* through the inner loop exceeds MAXITR*N**2. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) + REAL ONE + PARAMETER ( ONE = 1.0E0 ) + REAL NEGONE + PARAMETER ( NEGONE = -1.0E0 ) + REAL HNDRTH + PARAMETER ( HNDRTH = 0.01E0 ) + REAL TEN + PARAMETER ( TEN = 10.0E0 ) + REAL HNDRD + PARAMETER ( HNDRD = 100.0E0 ) + REAL MEIGTH + PARAMETER ( MEIGTH = -0.125E0 ) + INTEGER MAXITR + PARAMETER ( MAXITR = 6 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, ROTATE + INTEGER I, IDIR, ISUB, ITER, J, LL, LLL, M, MAXIT, NM1, + $ NM12, NM13, OLDLL, OLDM + REAL ABSE, ABSS, COSL, COSR, CS, EPS, F, G, H, MU, + $ OLDCS, OLDSN, R, SHIFT, SIGMN, SIGMX, SINL, + $ SINR, SLL, SMAX, SMIN, SMINL, SMINLO, SMINOA, + $ SN, THRESH, TOL, TOLMUL, UNFL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CLASR, CSROT, CSSCAL, CSWAP, SLARTG, SLAS2, + $ SLASQ1, SLASV2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, REAL, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LOWER ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NCVT.LT.0 ) THEN + INFO = -3 + ELSE IF( NRU.LT.0 ) THEN + INFO = -4 + ELSE IF( NCC.LT.0 ) THEN + INFO = -5 + ELSE IF( ( NCVT.EQ.0 .AND. LDVT.LT.1 ) .OR. + $ ( NCVT.GT.0 .AND. LDVT.LT.MAX( 1, N ) ) ) THEN + INFO = -9 + ELSE IF( LDU.LT.MAX( 1, NRU ) ) THEN + INFO = -11 + ELSE IF( ( NCC.EQ.0 .AND. LDC.LT.1 ) .OR. + $ ( NCC.GT.0 .AND. LDC.LT.MAX( 1, N ) ) ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CBDSQR', -INFO ) + RETURN + END IF + IF( N.EQ.0 ) + $ RETURN + IF( N.EQ.1 ) + $ GO TO 160 +* +* ROTATE is true if any singular vectors desired, false otherwise +* + ROTATE = ( NCVT.GT.0 ) .OR. ( NRU.GT.0 ) .OR. ( NCC.GT.0 ) +* +* If no singular vectors desired, use qd algorithm +* + IF( .NOT.ROTATE ) THEN + CALL SLASQ1( N, D, E, RWORK, INFO ) + RETURN + END IF +* + NM1 = N - 1 + NM12 = NM1 + NM1 + NM13 = NM12 + NM1 + IDIR = 0 +* +* Get machine constants +* + EPS = SLAMCH( 'Epsilon' ) + UNFL = SLAMCH( 'Safe minimum' ) +* +* If matrix lower bidiagonal, rotate to be upper bidiagonal +* by applying Givens rotations on the left +* + IF( LOWER ) THEN + DO 10 I = 1, N - 1 + CALL SLARTG( D( I ), E( I ), CS, SN, R ) + D( I ) = R + E( I ) = SN*D( I+1 ) + D( I+1 ) = CS*D( I+1 ) + RWORK( I ) = CS + RWORK( NM1+I ) = SN + 10 CONTINUE +* +* Update singular vectors if desired +* + IF( NRU.GT.0 ) + $ CALL CLASR( 'R', 'V', 'F', NRU, N, RWORK( 1 ), RWORK( N ), + $ U, LDU ) + IF( NCC.GT.0 ) + $ CALL CLASR( 'L', 'V', 'F', N, NCC, RWORK( 1 ), RWORK( N ), + $ C, LDC ) + END IF +* +* Compute singular values to relative accuracy TOL +* (By setting TOL to be negative, algorithm will compute +* singular values to absolute accuracy ABS(TOL)*norm(input matrix)) +* + TOLMUL = MAX( TEN, MIN( HNDRD, EPS**MEIGTH ) ) + TOL = TOLMUL*EPS +* +* Compute approximate maximum, minimum singular values +* + SMAX = ZERO + DO 20 I = 1, N + SMAX = MAX( SMAX, ABS( D( I ) ) ) + 20 CONTINUE + DO 30 I = 1, N - 1 + SMAX = MAX( SMAX, ABS( E( I ) ) ) + 30 CONTINUE + SMINL = ZERO + IF( TOL.GE.ZERO ) THEN +* +* Relative accuracy desired +* + SMINOA = ABS( D( 1 ) ) + IF( SMINOA.EQ.ZERO ) + $ GO TO 50 + MU = SMINOA + DO 40 I = 2, N + MU = ABS( D( I ) )*( MU / ( MU+ABS( E( I-1 ) ) ) ) + SMINOA = MIN( SMINOA, MU ) + IF( SMINOA.EQ.ZERO ) + $ GO TO 50 + 40 CONTINUE + 50 CONTINUE + SMINOA = SMINOA / SQRT( REAL( N ) ) + THRESH = MAX( TOL*SMINOA, MAXITR*N*N*UNFL ) + ELSE +* +* Absolute accuracy desired +* + THRESH = MAX( ABS( TOL )*SMAX, MAXITR*N*N*UNFL ) + END IF +* +* Prepare for main iteration loop for the singular values +* (MAXIT is the maximum number of passes through the inner +* loop permitted before nonconvergence signalled.) +* + MAXIT = MAXITR*N*N + ITER = 0 + OLDLL = -1 + OLDM = -1 +* +* M points to last element of unconverged part of matrix +* + M = N +* +* Begin main iteration loop +* + 60 CONTINUE +* +* Check for convergence or exceeding iteration count +* + IF( M.LE.1 ) + $ GO TO 160 + IF( ITER.GT.MAXIT ) + $ GO TO 200 +* +* Find diagonal block of matrix to work on +* + IF( TOL.LT.ZERO .AND. ABS( D( M ) ).LE.THRESH ) + $ D( M ) = ZERO + SMAX = ABS( D( M ) ) + SMIN = SMAX + DO 70 LLL = 1, M - 1 + LL = M - LLL + ABSS = ABS( D( LL ) ) + ABSE = ABS( E( LL ) ) + IF( TOL.LT.ZERO .AND. ABSS.LE.THRESH ) + $ D( LL ) = ZERO + IF( ABSE.LE.THRESH ) + $ GO TO 80 + SMIN = MIN( SMIN, ABSS ) + SMAX = MAX( SMAX, ABSS, ABSE ) + 70 CONTINUE + LL = 0 + GO TO 90 + 80 CONTINUE + E( LL ) = ZERO +* +* Matrix splits since E(LL) = 0 +* + IF( LL.EQ.M-1 ) THEN +* +* Convergence of bottom singular value, return to top of loop +* + M = M - 1 + GO TO 60 + END IF + 90 CONTINUE + LL = LL + 1 +* +* E(LL) through E(M-1) are nonzero, E(LL-1) is zero +* + IF( LL.EQ.M-1 ) THEN +* +* 2 by 2 block, handle separately +* + CALL SLASV2( D( M-1 ), E( M-1 ), D( M ), SIGMN, SIGMX, SINR, + $ COSR, SINL, COSL ) + D( M-1 ) = SIGMX + E( M-1 ) = ZERO + D( M ) = SIGMN +* +* Compute singular vectors, if desired +* + IF( NCVT.GT.0 ) + $ CALL CSROT( NCVT, VT( M-1, 1 ), LDVT, VT( M, 1 ), LDVT, + $ COSR, SINR ) + IF( NRU.GT.0 ) + $ CALL CSROT( NRU, U( 1, M-1 ), 1, U( 1, M ), 1, COSL, SINL ) + IF( NCC.GT.0 ) + $ CALL CSROT( NCC, C( M-1, 1 ), LDC, C( M, 1 ), LDC, COSL, + $ SINL ) + M = M - 2 + GO TO 60 + END IF +* +* If working on new submatrix, choose shift direction +* (from larger end diagonal element towards smaller) +* + IF( LL.GT.OLDM .OR. M.LT.OLDLL ) THEN + IF( ABS( D( LL ) ).GE.ABS( D( M ) ) ) THEN +* +* Chase bulge from top (big end) to bottom (small end) +* + IDIR = 1 + ELSE +* +* Chase bulge from bottom (big end) to top (small end) +* + IDIR = 2 + END IF + END IF +* +* Apply convergence tests +* + IF( IDIR.EQ.1 ) THEN +* +* Run convergence test in forward direction +* First apply standard test to bottom of matrix +* + IF( ABS( E( M-1 ) ).LE.ABS( TOL )*ABS( D( M ) ) .OR. + $ ( TOL.LT.ZERO .AND. ABS( E( M-1 ) ).LE.THRESH ) ) THEN + E( M-1 ) = ZERO + GO TO 60 + END IF +* + IF( TOL.GE.ZERO ) THEN +* +* If relative accuracy desired, +* apply convergence criterion forward +* + MU = ABS( D( LL ) ) + SMINL = MU + DO 100 LLL = LL, M - 1 + IF( ABS( E( LLL ) ).LE.TOL*MU ) THEN + E( LLL ) = ZERO + GO TO 60 + END IF + SMINLO = SMINL + MU = ABS( D( LLL+1 ) )*( MU / ( MU+ABS( E( LLL ) ) ) ) + SMINL = MIN( SMINL, MU ) + 100 CONTINUE + END IF +* + ELSE +* +* Run convergence test in backward direction +* First apply standard test to top of matrix +* + IF( ABS( E( LL ) ).LE.ABS( TOL )*ABS( D( LL ) ) .OR. + $ ( TOL.LT.ZERO .AND. ABS( E( LL ) ).LE.THRESH ) ) THEN + E( LL ) = ZERO + GO TO 60 + END IF +* + IF( TOL.GE.ZERO ) THEN +* +* If relative accuracy desired, +* apply convergence criterion backward +* + MU = ABS( D( M ) ) + SMINL = MU + DO 110 LLL = M - 1, LL, -1 + IF( ABS( E( LLL ) ).LE.TOL*MU ) THEN + E( LLL ) = ZERO + GO TO 60 + END IF + SMINLO = SMINL + MU = ABS( D( LLL ) )*( MU / ( MU+ABS( E( LLL ) ) ) ) + SMINL = MIN( SMINL, MU ) + 110 CONTINUE + END IF + END IF + OLDLL = LL + OLDM = M +* +* Compute shift. First, test if shifting would ruin relative +* accuracy, and if so set the shift to zero. +* + IF( TOL.GE.ZERO .AND. N*TOL*( SMINL / SMAX ).LE. + $ MAX( EPS, HNDRTH*TOL ) ) THEN +* +* Use a zero shift to avoid loss of relative accuracy +* + SHIFT = ZERO + ELSE +* +* Compute the shift from 2-by-2 block at end of matrix +* + IF( IDIR.EQ.1 ) THEN + SLL = ABS( D( LL ) ) + CALL SLAS2( D( M-1 ), E( M-1 ), D( M ), SHIFT, R ) + ELSE + SLL = ABS( D( M ) ) + CALL SLAS2( D( LL ), E( LL ), D( LL+1 ), SHIFT, R ) + END IF +* +* Test if shift negligible, and if so set to zero +* + IF( SLL.GT.ZERO ) THEN + IF( ( SHIFT / SLL )**2.LT.EPS ) + $ SHIFT = ZERO + END IF + END IF +* +* Increment iteration count +* + ITER = ITER + M - LL +* +* If SHIFT = 0, do simplified QR iteration +* + IF( SHIFT.EQ.ZERO ) THEN + IF( IDIR.EQ.1 ) THEN +* +* Chase bulge from top to bottom +* Save cosines and sines for later singular vector updates +* + CS = ONE + OLDCS = ONE + DO 120 I = LL, M - 1 + CALL SLARTG( D( I )*CS, E( I ), CS, SN, R ) + IF( I.GT.LL ) + $ E( I-1 ) = OLDSN*R + CALL SLARTG( OLDCS*R, D( I+1 )*SN, OLDCS, OLDSN, D( I ) ) + RWORK( I-LL+1 ) = CS + RWORK( I-LL+1+NM1 ) = SN + RWORK( I-LL+1+NM12 ) = OLDCS + RWORK( I-LL+1+NM13 ) = OLDSN + 120 CONTINUE + H = D( M )*CS + D( M ) = H*OLDCS + E( M-1 ) = H*OLDSN +* +* Update singular vectors +* + IF( NCVT.GT.0 ) + $ CALL CLASR( 'L', 'V', 'F', M-LL+1, NCVT, RWORK( 1 ), + $ RWORK( N ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL CLASR( 'R', 'V', 'F', NRU, M-LL+1, RWORK( NM12+1 ), + $ RWORK( NM13+1 ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL CLASR( 'L', 'V', 'F', M-LL+1, NCC, RWORK( NM12+1 ), + $ RWORK( NM13+1 ), C( LL, 1 ), LDC ) +* +* Test convergence +* + IF( ABS( E( M-1 ) ).LE.THRESH ) + $ E( M-1 ) = ZERO +* + ELSE +* +* Chase bulge from bottom to top +* Save cosines and sines for later singular vector updates +* + CS = ONE + OLDCS = ONE + DO 130 I = M, LL + 1, -1 + CALL SLARTG( D( I )*CS, E( I-1 ), CS, SN, R ) + IF( I.LT.M ) + $ E( I ) = OLDSN*R + CALL SLARTG( OLDCS*R, D( I-1 )*SN, OLDCS, OLDSN, D( I ) ) + RWORK( I-LL ) = CS + RWORK( I-LL+NM1 ) = -SN + RWORK( I-LL+NM12 ) = OLDCS + RWORK( I-LL+NM13 ) = -OLDSN + 130 CONTINUE + H = D( LL )*CS + D( LL ) = H*OLDCS + E( LL ) = H*OLDSN +* +* Update singular vectors +* + IF( NCVT.GT.0 ) + $ CALL CLASR( 'L', 'V', 'B', M-LL+1, NCVT, RWORK( NM12+1 ), + $ RWORK( NM13+1 ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL CLASR( 'R', 'V', 'B', NRU, M-LL+1, RWORK( 1 ), + $ RWORK( N ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL CLASR( 'L', 'V', 'B', M-LL+1, NCC, RWORK( 1 ), + $ RWORK( N ), C( LL, 1 ), LDC ) +* +* Test convergence +* + IF( ABS( E( LL ) ).LE.THRESH ) + $ E( LL ) = ZERO + END IF + ELSE +* +* Use nonzero shift +* + IF( IDIR.EQ.1 ) THEN +* +* Chase bulge from top to bottom +* Save cosines and sines for later singular vector updates +* + F = ( ABS( D( LL ) )-SHIFT )* + $ ( SIGN( ONE, D( LL ) )+SHIFT / D( LL ) ) + G = E( LL ) + DO 140 I = LL, M - 1 + CALL SLARTG( F, G, COSR, SINR, R ) + IF( I.GT.LL ) + $ E( I-1 ) = R + F = COSR*D( I ) + SINR*E( I ) + E( I ) = COSR*E( I ) - SINR*D( I ) + G = SINR*D( I+1 ) + D( I+1 ) = COSR*D( I+1 ) + CALL SLARTG( F, G, COSL, SINL, R ) + D( I ) = R + F = COSL*E( I ) + SINL*D( I+1 ) + D( I+1 ) = COSL*D( I+1 ) - SINL*E( I ) + IF( I.LT.M-1 ) THEN + G = SINL*E( I+1 ) + E( I+1 ) = COSL*E( I+1 ) + END IF + RWORK( I-LL+1 ) = COSR + RWORK( I-LL+1+NM1 ) = SINR + RWORK( I-LL+1+NM12 ) = COSL + RWORK( I-LL+1+NM13 ) = SINL + 140 CONTINUE + E( M-1 ) = F +* +* Update singular vectors +* + IF( NCVT.GT.0 ) + $ CALL CLASR( 'L', 'V', 'F', M-LL+1, NCVT, RWORK( 1 ), + $ RWORK( N ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL CLASR( 'R', 'V', 'F', NRU, M-LL+1, RWORK( NM12+1 ), + $ RWORK( NM13+1 ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL CLASR( 'L', 'V', 'F', M-LL+1, NCC, RWORK( NM12+1 ), + $ RWORK( NM13+1 ), C( LL, 1 ), LDC ) +* +* Test convergence +* + IF( ABS( E( M-1 ) ).LE.THRESH ) + $ E( M-1 ) = ZERO +* + ELSE +* +* Chase bulge from bottom to top +* Save cosines and sines for later singular vector updates +* + F = ( ABS( D( M ) )-SHIFT )*( SIGN( ONE, D( M ) )+SHIFT / + $ D( M ) ) + G = E( M-1 ) + DO 150 I = M, LL + 1, -1 + CALL SLARTG( F, G, COSR, SINR, R ) + IF( I.LT.M ) + $ E( I ) = R + F = COSR*D( I ) + SINR*E( I-1 ) + E( I-1 ) = COSR*E( I-1 ) - SINR*D( I ) + G = SINR*D( I-1 ) + D( I-1 ) = COSR*D( I-1 ) + CALL SLARTG( F, G, COSL, SINL, R ) + D( I ) = R + F = COSL*E( I-1 ) + SINL*D( I-1 ) + D( I-1 ) = COSL*D( I-1 ) - SINL*E( I-1 ) + IF( I.GT.LL+1 ) THEN + G = SINL*E( I-2 ) + E( I-2 ) = COSL*E( I-2 ) + END IF + RWORK( I-LL ) = COSR + RWORK( I-LL+NM1 ) = -SINR + RWORK( I-LL+NM12 ) = COSL + RWORK( I-LL+NM13 ) = -SINL + 150 CONTINUE + E( LL ) = F +* +* Test convergence +* + IF( ABS( E( LL ) ).LE.THRESH ) + $ E( LL ) = ZERO +* +* Update singular vectors if desired +* + IF( NCVT.GT.0 ) + $ CALL CLASR( 'L', 'V', 'B', M-LL+1, NCVT, RWORK( NM12+1 ), + $ RWORK( NM13+1 ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL CLASR( 'R', 'V', 'B', NRU, M-LL+1, RWORK( 1 ), + $ RWORK( N ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL CLASR( 'L', 'V', 'B', M-LL+1, NCC, RWORK( 1 ), + $ RWORK( N ), C( LL, 1 ), LDC ) + END IF + END IF +* +* QR iteration finished, go back and check convergence +* + GO TO 60 +* +* All singular values converged, so make them positive +* + 160 CONTINUE + DO 170 I = 1, N + IF( D( I ).LT.ZERO ) THEN + D( I ) = -D( I ) +* +* Change sign of singular vectors, if desired +* + IF( NCVT.GT.0 ) + $ CALL CSSCAL( NCVT, NEGONE, VT( I, 1 ), LDVT ) + END IF + 170 CONTINUE +* +* Sort the singular values into decreasing order (insertion sort on +* singular values, but only one transposition per singular vector) +* + DO 190 I = 1, N - 1 +* +* Scan for smallest D(I) +* + ISUB = 1 + SMIN = D( 1 ) + DO 180 J = 2, N + 1 - I + IF( D( J ).LE.SMIN ) THEN + ISUB = J + SMIN = D( J ) + END IF + 180 CONTINUE + IF( ISUB.NE.N+1-I ) THEN +* +* Swap singular values and vectors +* + D( ISUB ) = D( N+1-I ) + D( N+1-I ) = SMIN + IF( NCVT.GT.0 ) + $ CALL CSWAP( NCVT, VT( ISUB, 1 ), LDVT, VT( N+1-I, 1 ), + $ LDVT ) + IF( NRU.GT.0 ) + $ CALL CSWAP( NRU, U( 1, ISUB ), 1, U( 1, N+1-I ), 1 ) + IF( NCC.GT.0 ) + $ CALL CSWAP( NCC, C( ISUB, 1 ), LDC, C( N+1-I, 1 ), LDC ) + END IF + 190 CONTINUE + GO TO 220 +* +* Maximum number of iterations exceeded, failure to converge +* + 200 CONTINUE + INFO = 0 + DO 210 I = 1, N - 1 + IF( E( I ).NE.ZERO ) + $ INFO = INFO + 1 + 210 CONTINUE + 220 CONTINUE + RETURN +* +* End of CBDSQR +* + END diff --git a/costa/native/external/lapack/cgbbrd.f b/costa/native/external/lapack/cgbbrd.f new file mode 100644 index 000000000..6546788a1 --- /dev/null +++ b/costa/native/external/lapack/cgbbrd.f @@ -0,0 +1,466 @@ + SUBROUTINE CGBBRD( VECT, M, N, NCC, KL, KU, AB, LDAB, D, E, Q, + $ LDQ, PT, LDPT, C, LDC, WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER VECT + INTEGER INFO, KL, KU, LDAB, LDC, LDPT, LDQ, M, N, NCC +* .. +* .. Array Arguments .. + REAL D( * ), E( * ), RWORK( * ) + COMPLEX AB( LDAB, * ), C( LDC, * ), PT( LDPT, * ), + $ Q( LDQ, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGBBRD reduces a complex general m-by-n band matrix A to real upper +* bidiagonal form B by a unitary transformation: Q' * A * P = B. +* +* The routine computes B, and optionally forms Q or P', or computes +* Q'*C for a given matrix C. +* +* Arguments +* ========= +* +* VECT (input) CHARACTER*1 +* Specifies whether or not the matrices Q and P' are to be +* formed. +* = 'N': do not form Q or P'; +* = 'Q': form Q only; +* = 'P': form P' only; +* = 'B': form both. +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* NCC (input) INTEGER +* The number of columns of the matrix C. NCC >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals of the matrix A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals of the matrix A. KU >= 0. +* +* AB (input/output) COMPLEX array, dimension (LDAB,N) +* On entry, the m-by-n band matrix A, stored in rows 1 to +* KL+KU+1. The j-th column of A is stored in the j-th column of +* the array AB as follows: +* AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl). +* On exit, A is overwritten by values generated during the +* reduction. +* +* LDAB (input) INTEGER +* The leading dimension of the array A. LDAB >= KL+KU+1. +* +* D (output) REAL array, dimension (min(M,N)) +* The diagonal elements of the bidiagonal matrix B. +* +* E (output) REAL array, dimension (min(M,N)-1) +* The superdiagonal elements of the bidiagonal matrix B. +* +* Q (output) COMPLEX array, dimension (LDQ,M) +* If VECT = 'Q' or 'B', the m-by-m unitary matrix Q. +* If VECT = 'N' or 'P', the array Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. +* LDQ >= max(1,M) if VECT = 'Q' or 'B'; LDQ >= 1 otherwise. +* +* PT (output) COMPLEX array, dimension (LDPT,N) +* If VECT = 'P' or 'B', the n-by-n unitary matrix P'. +* If VECT = 'N' or 'Q', the array PT is not referenced. +* +* LDPT (input) INTEGER +* The leading dimension of the array PT. +* LDPT >= max(1,N) if VECT = 'P' or 'B'; LDPT >= 1 otherwise. +* +* C (input/output) COMPLEX array, dimension (LDC,NCC) +* On entry, an m-by-ncc matrix C. +* On exit, C is overwritten by Q'*C. +* C is not referenced if NCC = 0. +* +* LDC (input) INTEGER +* The leading dimension of the array C. +* LDC >= max(1,M) if NCC > 0; LDC >= 1 if NCC = 0. +* +* WORK (workspace) COMPLEX array, dimension (max(M,N)) +* +* RWORK (workspace) REAL array, dimension (max(M,N)) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL WANTB, WANTC, WANTPT, WANTQ + INTEGER I, INCA, J, J1, J2, KB, KB1, KK, KLM, KLU1, + $ KUN, L, MINMN, ML, ML0, MU, MU0, NR, NRT + REAL ABST, RC + COMPLEX RA, RB, RS, T +* .. +* .. External Subroutines .. + EXTERNAL CLARGV, CLARTG, CLARTV, CLASET, CROT, CSCAL, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CONJG, MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + WANTB = LSAME( VECT, 'B' ) + WANTQ = LSAME( VECT, 'Q' ) .OR. WANTB + WANTPT = LSAME( VECT, 'P' ) .OR. WANTB + WANTC = NCC.GT.0 + KLU1 = KL + KU + 1 + INFO = 0 + IF( .NOT.WANTQ .AND. .NOT.WANTPT .AND. .NOT.LSAME( VECT, 'N' ) ) + $ THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NCC.LT.0 ) THEN + INFO = -4 + ELSE IF( KL.LT.0 ) THEN + INFO = -5 + ELSE IF( KU.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KLU1 ) THEN + INFO = -8 + ELSE IF( LDQ.LT.1 .OR. WANTQ .AND. LDQ.LT.MAX( 1, M ) ) THEN + INFO = -12 + ELSE IF( LDPT.LT.1 .OR. WANTPT .AND. LDPT.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDC.LT.1 .OR. WANTC .AND. LDC.LT.MAX( 1, M ) ) THEN + INFO = -16 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGBBRD', -INFO ) + RETURN + END IF +* +* Initialize Q and P' to the unit matrix, if needed +* + IF( WANTQ ) + $ CALL CLASET( 'Full', M, M, CZERO, CONE, Q, LDQ ) + IF( WANTPT ) + $ CALL CLASET( 'Full', N, N, CZERO, CONE, PT, LDPT ) +* +* Quick return if possible. +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* + MINMN = MIN( M, N ) +* + IF( KL+KU.GT.1 ) THEN +* +* Reduce to upper bidiagonal form if KU > 0; if KU = 0, reduce +* first to lower bidiagonal form and then transform to upper +* bidiagonal +* + IF( KU.GT.0 ) THEN + ML0 = 1 + MU0 = 2 + ELSE + ML0 = 2 + MU0 = 1 + END IF +* +* Wherever possible, plane rotations are generated and applied in +* vector operations of length NR over the index set J1:J2:KLU1. +* +* The complex sines of the plane rotations are stored in WORK, +* and the real cosines in RWORK. +* + KLM = MIN( M-1, KL ) + KUN = MIN( N-1, KU ) + KB = KLM + KUN + KB1 = KB + 1 + INCA = KB1*LDAB + NR = 0 + J1 = KLM + 2 + J2 = 1 - KUN +* + DO 90 I = 1, MINMN +* +* Reduce i-th column and i-th row of matrix to bidiagonal form +* + ML = KLM + 1 + MU = KUN + 1 + DO 80 KK = 1, KB + J1 = J1 + KB + J2 = J2 + KB +* +* generate plane rotations to annihilate nonzero elements +* which have been created below the band +* + IF( NR.GT.0 ) + $ CALL CLARGV( NR, AB( KLU1, J1-KLM-1 ), INCA, + $ WORK( J1 ), KB1, RWORK( J1 ), KB1 ) +* +* apply plane rotations from the left +* + DO 10 L = 1, KB + IF( J2-KLM+L-1.GT.N ) THEN + NRT = NR - 1 + ELSE + NRT = NR + END IF + IF( NRT.GT.0 ) + $ CALL CLARTV( NRT, AB( KLU1-L, J1-KLM+L-1 ), INCA, + $ AB( KLU1-L+1, J1-KLM+L-1 ), INCA, + $ RWORK( J1 ), WORK( J1 ), KB1 ) + 10 CONTINUE +* + IF( ML.GT.ML0 ) THEN + IF( ML.LE.M-I+1 ) THEN +* +* generate plane rotation to annihilate a(i+ml-1,i) +* within the band, and apply rotation from the left +* + CALL CLARTG( AB( KU+ML-1, I ), AB( KU+ML, I ), + $ RWORK( I+ML-1 ), WORK( I+ML-1 ), RA ) + AB( KU+ML-1, I ) = RA + IF( I.LT.N ) + $ CALL CROT( MIN( KU+ML-2, N-I ), + $ AB( KU+ML-2, I+1 ), LDAB-1, + $ AB( KU+ML-1, I+1 ), LDAB-1, + $ RWORK( I+ML-1 ), WORK( I+ML-1 ) ) + END IF + NR = NR + 1 + J1 = J1 - KB1 + END IF +* + IF( WANTQ ) THEN +* +* accumulate product of plane rotations in Q +* + DO 20 J = J1, J2, KB1 + CALL CROT( M, Q( 1, J-1 ), 1, Q( 1, J ), 1, + $ RWORK( J ), CONJG( WORK( J ) ) ) + 20 CONTINUE + END IF +* + IF( WANTC ) THEN +* +* apply plane rotations to C +* + DO 30 J = J1, J2, KB1 + CALL CROT( NCC, C( J-1, 1 ), LDC, C( J, 1 ), LDC, + $ RWORK( J ), WORK( J ) ) + 30 CONTINUE + END IF +* + IF( J2+KUN.GT.N ) THEN +* +* adjust J2 to keep within the bounds of the matrix +* + NR = NR - 1 + J2 = J2 - KB1 + END IF +* + DO 40 J = J1, J2, KB1 +* +* create nonzero element a(j-1,j+ku) above the band +* and store it in WORK(n+1:2*n) +* + WORK( J+KUN ) = WORK( J )*AB( 1, J+KUN ) + AB( 1, J+KUN ) = RWORK( J )*AB( 1, J+KUN ) + 40 CONTINUE +* +* generate plane rotations to annihilate nonzero elements +* which have been generated above the band +* + IF( NR.GT.0 ) + $ CALL CLARGV( NR, AB( 1, J1+KUN-1 ), INCA, + $ WORK( J1+KUN ), KB1, RWORK( J1+KUN ), + $ KB1 ) +* +* apply plane rotations from the right +* + DO 50 L = 1, KB + IF( J2+L-1.GT.M ) THEN + NRT = NR - 1 + ELSE + NRT = NR + END IF + IF( NRT.GT.0 ) + $ CALL CLARTV( NRT, AB( L+1, J1+KUN-1 ), INCA, + $ AB( L, J1+KUN ), INCA, + $ RWORK( J1+KUN ), WORK( J1+KUN ), KB1 ) + 50 CONTINUE +* + IF( ML.EQ.ML0 .AND. MU.GT.MU0 ) THEN + IF( MU.LE.N-I+1 ) THEN +* +* generate plane rotation to annihilate a(i,i+mu-1) +* within the band, and apply rotation from the right +* + CALL CLARTG( AB( KU-MU+3, I+MU-2 ), + $ AB( KU-MU+2, I+MU-1 ), + $ RWORK( I+MU-1 ), WORK( I+MU-1 ), RA ) + AB( KU-MU+3, I+MU-2 ) = RA + CALL CROT( MIN( KL+MU-2, M-I ), + $ AB( KU-MU+4, I+MU-2 ), 1, + $ AB( KU-MU+3, I+MU-1 ), 1, + $ RWORK( I+MU-1 ), WORK( I+MU-1 ) ) + END IF + NR = NR + 1 + J1 = J1 - KB1 + END IF +* + IF( WANTPT ) THEN +* +* accumulate product of plane rotations in P' +* + DO 60 J = J1, J2, KB1 + CALL CROT( N, PT( J+KUN-1, 1 ), LDPT, + $ PT( J+KUN, 1 ), LDPT, RWORK( J+KUN ), + $ CONJG( WORK( J+KUN ) ) ) + 60 CONTINUE + END IF +* + IF( J2+KB.GT.M ) THEN +* +* adjust J2 to keep within the bounds of the matrix +* + NR = NR - 1 + J2 = J2 - KB1 + END IF +* + DO 70 J = J1, J2, KB1 +* +* create nonzero element a(j+kl+ku,j+ku-1) below the +* band and store it in WORK(1:n) +* + WORK( J+KB ) = WORK( J+KUN )*AB( KLU1, J+KUN ) + AB( KLU1, J+KUN ) = RWORK( J+KUN )*AB( KLU1, J+KUN ) + 70 CONTINUE +* + IF( ML.GT.ML0 ) THEN + ML = ML - 1 + ELSE + MU = MU - 1 + END IF + 80 CONTINUE + 90 CONTINUE + END IF +* + IF( KU.EQ.0 .AND. KL.GT.0 ) THEN +* +* A has been reduced to complex lower bidiagonal form +* +* Transform lower bidiagonal form to upper bidiagonal by applying +* plane rotations from the left, overwriting superdiagonal +* elements on subdiagonal elements +* + DO 100 I = 1, MIN( M-1, N ) + CALL CLARTG( AB( 1, I ), AB( 2, I ), RC, RS, RA ) + AB( 1, I ) = RA + IF( I.LT.N ) THEN + AB( 2, I ) = RS*AB( 1, I+1 ) + AB( 1, I+1 ) = RC*AB( 1, I+1 ) + END IF + IF( WANTQ ) + $ CALL CROT( M, Q( 1, I ), 1, Q( 1, I+1 ), 1, RC, + $ CONJG( RS ) ) + IF( WANTC ) + $ CALL CROT( NCC, C( I, 1 ), LDC, C( I+1, 1 ), LDC, RC, + $ RS ) + 100 CONTINUE + ELSE +* +* A has been reduced to complex upper bidiagonal form or is +* diagonal +* + IF( KU.GT.0 .AND. M.LT.N ) THEN +* +* Annihilate a(m,m+1) by applying plane rotations from the +* right +* + RB = AB( KU, M+1 ) + DO 110 I = M, 1, -1 + CALL CLARTG( AB( KU+1, I ), RB, RC, RS, RA ) + AB( KU+1, I ) = RA + IF( I.GT.1 ) THEN + RB = -CONJG( RS )*AB( KU, I ) + AB( KU, I ) = RC*AB( KU, I ) + END IF + IF( WANTPT ) + $ CALL CROT( N, PT( I, 1 ), LDPT, PT( M+1, 1 ), LDPT, + $ RC, CONJG( RS ) ) + 110 CONTINUE + END IF + END IF +* +* Make diagonal and superdiagonal elements real, storing them in D +* and E +* + T = AB( KU+1, 1 ) + DO 120 I = 1, MINMN + ABST = ABS( T ) + D( I ) = ABST + IF( ABST.NE.ZERO ) THEN + T = T / ABST + ELSE + T = CONE + END IF + IF( WANTQ ) + $ CALL CSCAL( M, T, Q( 1, I ), 1 ) + IF( WANTC ) + $ CALL CSCAL( NCC, CONJG( T ), C( I, 1 ), LDC ) + IF( I.LT.MINMN ) THEN + IF( KU.EQ.0 .AND. KL.EQ.0 ) THEN + E( I ) = ZERO + T = AB( 1, I+1 ) + ELSE + IF( KU.EQ.0 ) THEN + T = AB( 2, I )*CONJG( T ) + ELSE + T = AB( KU, I+1 )*CONJG( T ) + END IF + ABST = ABS( T ) + E( I ) = ABST + IF( ABST.NE.ZERO ) THEN + T = T / ABST + ELSE + T = CONE + END IF + IF( WANTPT ) + $ CALL CSCAL( N, T, PT( I+1, 1 ), LDPT ) + T = AB( KU+1, I+1 )*CONJG( T ) + END IF + END IF + 120 CONTINUE + RETURN +* +* End of CGBBRD +* + END diff --git a/costa/native/external/lapack/cgbcon.f b/costa/native/external/lapack/cgbcon.f new file mode 100644 index 000000000..4cf580f61 --- /dev/null +++ b/costa/native/external/lapack/cgbcon.f @@ -0,0 +1,230 @@ + SUBROUTINE CGBCON( NORM, N, KL, KU, AB, LDAB, IPIV, ANORM, RCOND, + $ WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER INFO, KL, KU, LDAB, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL RWORK( * ) + COMPLEX AB( LDAB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGBCON estimates the reciprocal of the condition number of a complex +* general band matrix A, in either the 1-norm or the infinity-norm, +* using the LU factorization computed by CGBTRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as +* RCOND = 1 / ( norm(A) * norm(inv(A)) ). +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies whether the 1-norm condition number or the +* infinity-norm condition number is required: +* = '1' or 'O': 1-norm; +* = 'I': Infinity-norm. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* AB (input) COMPLEX array, dimension (LDAB,N) +* Details of the LU factorization of the band matrix A, as +* computed by CGBTRF. U is stored as an upper triangular band +* matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and +* the multipliers used during the factorization are stored in +* rows KL+KU+2 to 2*KL+KU+1. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices; for 1 <= i <= N, row i of the matrix was +* interchanged with row IPIV(i). +* +* ANORM (input) REAL +* If NORM = '1' or 'O', the 1-norm of the original matrix A. +* If NORM = 'I', the infinity-norm of the original matrix A. +* +* RCOND (output) REAL +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(norm(A) * norm(inv(A))). +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* RWORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LNOTI, ONENRM + CHARACTER NORMIN + INTEGER IX, J, JP, KASE, KASE1, KD, LM + REAL AINVNM, SCALE, SMLNUM + COMPLEX T, ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + REAL SLAMCH + COMPLEX CDOTC + EXTERNAL LSAME, ICAMAX, SLAMCH, CDOTC +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CLACON, CLATBS, CSRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MIN, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.2*KL+KU+1 ) THEN + INFO = -6 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGBCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = SLAMCH( 'Safe minimum' ) +* +* Estimate the norm of inv(A). +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KD = KL + KU + 1 + LNOTI = KL.GT.0 + KASE = 0 + 10 CONTINUE + CALL CLACON( N, WORK( N+1 ), WORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(L). +* + IF( LNOTI ) THEN + DO 20 J = 1, N - 1 + LM = MIN( KL, N-J ) + JP = IPIV( J ) + T = WORK( JP ) + IF( JP.NE.J ) THEN + WORK( JP ) = WORK( J ) + WORK( J ) = T + END IF + CALL CAXPY( LM, -T, AB( KD+1, J ), 1, WORK( J+1 ), 1 ) + 20 CONTINUE + END IF +* +* Multiply by inv(U). +* + CALL CLATBS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ KL+KU, AB, LDAB, WORK, SCALE, RWORK, INFO ) + ELSE +* +* Multiply by inv(U'). +* + CALL CLATBS( 'Upper', 'Conjugate transpose', 'Non-unit', + $ NORMIN, N, KL+KU, AB, LDAB, WORK, SCALE, RWORK, + $ INFO ) +* +* Multiply by inv(L'). +* + IF( LNOTI ) THEN + DO 30 J = N - 1, 1, -1 + LM = MIN( KL, N-J ) + WORK( J ) = WORK( J ) - CDOTC( LM, AB( KD+1, J ), 1, + $ WORK( J+1 ), 1 ) + JP = IPIV( J ) + IF( JP.NE.J ) THEN + T = WORK( JP ) + WORK( JP ) = WORK( J ) + WORK( J ) = T + END IF + 30 CONTINUE + END IF + END IF +* +* Divide X by 1/SCALE if doing so will not cause overflow. +* + NORMIN = 'Y' + IF( SCALE.NE.ONE ) THEN + IX = ICAMAX( N, WORK, 1 ) + IF( SCALE.LT.CABS1( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 40 + CALL CSRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 40 CONTINUE + RETURN +* +* End of CGBCON +* + END diff --git a/costa/native/external/lapack/cgbequ.f b/costa/native/external/lapack/cgbequ.f new file mode 100644 index 000000000..bb4de99fd --- /dev/null +++ b/costa/native/external/lapack/cgbequ.f @@ -0,0 +1,248 @@ + SUBROUTINE CGBEQU( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1992 +* +* .. Scalar Arguments .. + INTEGER INFO, KL, KU, LDAB, M, N + REAL AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. + REAL C( * ), R( * ) + COMPLEX AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* CGBEQU computes row and column scalings intended to equilibrate an +* M-by-N band matrix A and reduce its condition number. R returns the +* row scale factors and C the column scale factors, chosen to try to +* make the largest element in each row and column of the matrix B with +* elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. +* +* R(i) and C(j) are restricted to be between SMLNUM = smallest safe +* number and BIGNUM = largest safe number. Use of these scaling +* factors is not guaranteed to reduce the condition number of A but +* works well in practice. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* AB (input) COMPLEX array, dimension (LDAB,N) +* The band matrix A, stored in rows 1 to KL+KU+1. The j-th +* column of A is stored in the j-th column of the array AB as +* follows: +* AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KL+KU+1. +* +* R (output) REAL array, dimension (M) +* If INFO = 0, or INFO > M, R contains the row scale factors +* for A. +* +* C (output) REAL array, dimension (N) +* If INFO = 0, C contains the column scale factors for A. +* +* ROWCND (output) REAL +* If INFO = 0 or INFO > M, ROWCND contains the ratio of the +* smallest R(i) to the largest R(i). If ROWCND >= 0.1 and +* AMAX is neither too large nor too small, it is not worth +* scaling by R. +* +* COLCND (output) REAL +* If INFO = 0, COLCND contains the ratio of the smallest +* C(i) to the largest C(i). If COLCND >= 0.1, it is not +* worth scaling by C. +* +* AMAX (output) REAL +* Absolute value of largest matrix element. If AMAX is very +* close to overflow or very close to underflow, the matrix +* should be scaled. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is +* <= M: the i-th row of A is exactly zero +* > M: the (i-M)-th column of A is exactly zero +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, KD + REAL BIGNUM, RCMAX, RCMIN, SMLNUM + COMPLEX ZDUM +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, MIN, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGBEQU', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + ROWCND = ONE + COLCND = ONE + AMAX = ZERO + RETURN + END IF +* +* Get machine constants. +* + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM +* +* Compute row scale factors. +* + DO 10 I = 1, M + R( I ) = ZERO + 10 CONTINUE +* +* Find the maximum element in each row. +* + KD = KU + 1 + DO 30 J = 1, N + DO 20 I = MAX( J-KU, 1 ), MIN( J+KL, M ) + R( I ) = MAX( R( I ), CABS1( AB( KD+I-J, J ) ) ) + 20 CONTINUE + 30 CONTINUE +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 40 I = 1, M + RCMAX = MAX( RCMAX, R( I ) ) + RCMIN = MIN( RCMIN, R( I ) ) + 40 CONTINUE + AMAX = RCMAX +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 50 I = 1, M + IF( R( I ).EQ.ZERO ) THEN + INFO = I + RETURN + END IF + 50 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 60 I = 1, M + R( I ) = ONE / MIN( MAX( R( I ), SMLNUM ), BIGNUM ) + 60 CONTINUE +* +* Compute ROWCND = min(R(I)) / max(R(I)) +* + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* +* Compute column scale factors +* + DO 70 J = 1, N + C( J ) = ZERO + 70 CONTINUE +* +* Find the maximum element in each column, +* assuming the row scaling computed above. +* + KD = KU + 1 + DO 90 J = 1, N + DO 80 I = MAX( J-KU, 1 ), MIN( J+KL, M ) + C( J ) = MAX( C( J ), CABS1( AB( KD+I-J, J ) )*R( I ) ) + 80 CONTINUE + 90 CONTINUE +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 100 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 100 CONTINUE +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 110 J = 1, N + IF( C( J ).EQ.ZERO ) THEN + INFO = M + J + RETURN + END IF + 110 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 120 J = 1, N + C( J ) = ONE / MIN( MAX( C( J ), SMLNUM ), BIGNUM ) + 120 CONTINUE +* +* Compute COLCND = min(C(J)) / max(C(J)) +* + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* + RETURN +* +* End of CGBEQU +* + END diff --git a/costa/native/external/lapack/cgbrfs.f b/costa/native/external/lapack/cgbrfs.f new file mode 100644 index 000000000..b2137a348 --- /dev/null +++ b/costa/native/external/lapack/cgbrfs.f @@ -0,0 +1,361 @@ + SUBROUTINE CGBRFS( TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, LDAFB, + $ IPIV, B, LDB, X, LDX, FERR, BERR, WORK, RWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, KL, KU, LDAB, LDAFB, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL BERR( * ), FERR( * ), RWORK( * ) + COMPLEX AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* CGBRFS improves the computed solution to a system of linear +* equations when the coefficient matrix is banded, and provides +* error bounds and backward error estimates for the solution. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose) +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AB (input) COMPLEX array, dimension (LDAB,N) +* The original band matrix A, stored in rows 1 to KL+KU+1. +* The j-th column of A is stored in the j-th column of the +* array AB as follows: +* AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KL+KU+1. +* +* AFB (input) COMPLEX array, dimension (LDAFB,N) +* Details of the LU factorization of the band matrix A, as +* computed by CGBTRF. U is stored as an upper triangular band +* matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and +* the multipliers used during the factorization are stored in +* rows KL+KU+2 to 2*KL+KU+1. +* +* LDAFB (input) INTEGER +* The leading dimension of the array AFB. LDAFB >= 2*KL*KU+1. +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices from CGBTRF; for 1<=i<=N, row i of the +* matrix was interchanged with row IPIV(i). +* +* B (input) COMPLEX array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input/output) COMPLEX array, dimension (LDX,NRHS) +* On entry, the solution matrix X, as computed by CGBTRS. +* On exit, the improved solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* RWORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Internal Parameters +* =================== +* +* ITMAX is the maximum number of steps of iterative refinement. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) + REAL THREE + PARAMETER ( THREE = 3.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN + CHARACTER TRANSN, TRANST + INTEGER COUNT, I, J, K, KASE, KK, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX ZDUM +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CGBMV, CGBTRS, CLACON, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, MIN, REAL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -7 + ELSE IF( LDAFB.LT.2*KL+KU+1 ) THEN + INFO = -9 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -14 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGBRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANSN = 'N' + TRANST = 'C' + ELSE + TRANSN = 'C' + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = MIN( KL+KU+2, N+1 ) + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - op(A) * X, +* where op(A) = A, A**T, or A**H, depending on TRANS. +* + CALL CCOPY( N, B( 1, J ), 1, WORK, 1 ) + CALL CGBMV( TRANS, N, N, KL, KU, -CONE, AB, LDAB, X( 1, J ), 1, + $ CONE, WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(op(A))*abs(X) + abs(B). +* + IF( NOTRAN ) THEN + DO 50 K = 1, N + KK = KU + 1 - K + XK = CABS1( X( K, J ) ) + DO 40 I = MAX( 1, K-KU ), MIN( N, K+KL ) + RWORK( I ) = RWORK( I ) + CABS1( AB( KK+I, K ) )*XK + 40 CONTINUE + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + KK = KU + 1 - K + DO 60 I = MAX( 1, K-KU ), MIN( N, K+KL ) + S = S + CABS1( AB( KK+I, K ) )*CABS1( X( I, J ) ) + 60 CONTINUE + RWORK( K ) = RWORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL CGBTRS( TRANS, N, KL, KU, 1, AFB, LDAFB, IPIV, WORK, N, + $ INFO ) + CALL CAXPY( N, CONE, WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use CLACON to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL CLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**H). +* + CALL CGBTRS( TRANST, N, KL, KU, 1, AFB, LDAFB, IPIV, + $ WORK, N, INFO ) + DO 110 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 110 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 120 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 120 CONTINUE + CALL CGBTRS( TRANSN, N, KL, KU, 1, AFB, LDAFB, IPIV, + $ WORK, N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of CGBRFS +* + END diff --git a/costa/native/external/lapack/cgbsv.f b/costa/native/external/lapack/cgbsv.f new file mode 100644 index 000000000..ad5630ed5 --- /dev/null +++ b/costa/native/external/lapack/cgbsv.f @@ -0,0 +1,143 @@ + SUBROUTINE CGBSV( N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + INTEGER INFO, KL, KU, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX AB( LDAB, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* CGBSV computes the solution to a complex system of linear equations +* A * X = B, where A is a band matrix of order N with KL subdiagonals +* and KU superdiagonals, and X and B are N-by-NRHS matrices. +* +* The LU decomposition with partial pivoting and row interchanges is +* used to factor A as A = L * U, where L is a product of permutation +* and unit lower triangular matrices with KL subdiagonals, and U is +* upper triangular with KL+KU superdiagonals. The factored form of A +* is then used to solve the system of equations A * X = B. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AB (input/output) COMPLEX array, dimension (LDAB,N) +* On entry, the matrix A in band storage, in rows KL+1 to +* 2*KL+KU+1; rows 1 to KL of the array need not be set. +* The j-th column of A is stored in the j-th column of the +* array AB as follows: +* AB(KL+KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+KL) +* On exit, details of the factorization: U is stored as an +* upper triangular band matrix with KL+KU superdiagonals in +* rows 1 to KL+KU+1, and the multipliers used during the +* factorization are stored in rows KL+KU+2 to 2*KL+KU+1. +* See below for further details. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +* +* IPIV (output) INTEGER array, dimension (N) +* The pivot indices that define the permutation matrix P; +* row i of the matrix was interchanged with row IPIV(i). +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, U(i,i) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, and the solution has not been computed. +* +* Further Details +* =============== +* +* The band storage scheme is illustrated by the following example, when +* M = N = 6, KL = 2, KU = 1: +* +* On entry: On exit: +* +* * * * + + + * * * u14 u25 u36 +* * * + + + + * * u13 u24 u35 u46 +* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +* a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * +* a31 a42 a53 a64 * * m31 m42 m53 m64 * * +* +* Array elements marked * are not used by the routine; elements marked +* + need not be set on entry, but are required by the routine to store +* elements of U because of fill-in resulting from the row interchanges. +* +* ===================================================================== +* +* .. External Subroutines .. + EXTERNAL CGBTRF, CGBTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( KL.LT.0 ) THEN + INFO = -2 + ELSE IF( KU.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.2*KL+KU+1 ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( N, 1 ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGBSV ', -INFO ) + RETURN + END IF +* +* Compute the LU factorization of the band matrix A. +* + CALL CGBTRF( N, N, KL, KU, AB, LDAB, IPIV, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL CGBTRS( 'No transpose', N, KL, KU, NRHS, AB, LDAB, IPIV, + $ B, LDB, INFO ) + END IF + RETURN +* +* End of CGBSV +* + END diff --git a/costa/native/external/lapack/cgbsvx.f b/costa/native/external/lapack/cgbsvx.f new file mode 100644 index 000000000..78289c60d --- /dev/null +++ b/costa/native/external/lapack/cgbsvx.f @@ -0,0 +1,518 @@ + SUBROUTINE CGBSVX( FACT, TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, + $ LDAFB, IPIV, EQUED, R, C, B, LDB, X, LDX, + $ RCOND, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, TRANS + INTEGER INFO, KL, KU, LDAB, LDAFB, LDB, LDX, N, NRHS + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL BERR( * ), C( * ), FERR( * ), R( * ), + $ RWORK( * ) + COMPLEX AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* CGBSVX uses the LU factorization to compute the solution to a complex +* system of linear equations A * X = B, A**T * X = B, or A**H * X = B, +* where A is a band matrix of order N with KL subdiagonals and KU +* superdiagonals, and X and B are N-by-NRHS matrices. +* +* Error bounds on the solution and a condition estimate are also +* provided. +* +* Description +* =========== +* +* The following steps are performed by this subroutine: +* +* 1. If FACT = 'E', real scaling factors are computed to equilibrate +* the system: +* TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B +* TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B +* TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B +* Whether or not the system will be equilibrated depends on the +* scaling of the matrix A, but if equilibration is used, A is +* overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') +* or diag(C)*B (if TRANS = 'T' or 'C'). +* +* 2. If FACT = 'N' or 'E', the LU decomposition is used to factor the +* matrix A (after equilibration if FACT = 'E') as +* A = L * U, +* where L is a product of permutation and unit lower triangular +* matrices with KL subdiagonals, and U is upper triangular with +* KL+KU superdiagonals. +* +* 3. If some U(i,i)=0, so that U is exactly singular, then the routine +* returns with INFO = i. Otherwise, the factored form of A is used +* to estimate the condition number of the matrix A. If the +* reciprocal of the condition number is less than machine precision, +* INFO = N+1 is returned as a warning, but the routine still goes on +* to solve for X and compute error bounds as described below. +* +* 4. The system of equations is solved for X using the factored form +* of A. +* +* 5. Iterative refinement is applied to improve the computed solution +* matrix and calculate error bounds and backward error estimates +* for it. +* +* 6. If equilibration was used, the matrix X is premultiplied by +* diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so +* that it solves the original system before equilibration. +* +* Arguments +* ========= +* +* FACT (input) CHARACTER*1 +* Specifies whether or not the factored form of the matrix A is +* supplied on entry, and if not, whether the matrix A should be +* equilibrated before it is factored. +* = 'F': On entry, AFB and IPIV contain the factored form of +* A. If EQUED is not 'N', the matrix A has been +* equilibrated with scaling factors given by R and C. +* AB, AFB, and IPIV are not modified. +* = 'N': The matrix A will be copied to AFB and factored. +* = 'E': The matrix A will be equilibrated if necessary, then +* copied to AFB and factored. +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations. +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose) +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AB (input/output) COMPLEX array, dimension (LDAB,N) +* On entry, the matrix A in band storage, in rows 1 to KL+KU+1. +* The j-th column of A is stored in the j-th column of the +* array AB as follows: +* AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl) +* +* If FACT = 'F' and EQUED is not 'N', then A must have been +* equilibrated by the scaling factors in R and/or C. AB is not +* modified if FACT = 'F' or 'N', or if FACT = 'E' and +* EQUED = 'N' on exit. +* +* On exit, if EQUED .ne. 'N', A is scaled as follows: +* EQUED = 'R': A := diag(R) * A +* EQUED = 'C': A := A * diag(C) +* EQUED = 'B': A := diag(R) * A * diag(C). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KL+KU+1. +* +* AFB (input or output) COMPLEX array, dimension (LDAFB,N) +* If FACT = 'F', then AFB is an input argument and on entry +* contains details of the LU factorization of the band matrix +* A, as computed by CGBTRF. U is stored as an upper triangular +* band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, +* and the multipliers used during the factorization are stored +* in rows KL+KU+2 to 2*KL+KU+1. If EQUED .ne. 'N', then AFB is +* the factored form of the equilibrated matrix A. +* +* If FACT = 'N', then AFB is an output argument and on exit +* returns details of the LU factorization of A. +* +* If FACT = 'E', then AFB is an output argument and on exit +* returns details of the LU factorization of the equilibrated +* matrix A (see the description of AB for the form of the +* equilibrated matrix). +* +* LDAFB (input) INTEGER +* The leading dimension of the array AFB. LDAFB >= 2*KL+KU+1. +* +* IPIV (input or output) INTEGER array, dimension (N) +* If FACT = 'F', then IPIV is an input argument and on entry +* contains the pivot indices from the factorization A = L*U +* as computed by CGBTRF; row i of the matrix was interchanged +* with row IPIV(i). +* +* If FACT = 'N', then IPIV is an output argument and on exit +* contains the pivot indices from the factorization A = L*U +* of the original matrix A. +* +* If FACT = 'E', then IPIV is an output argument and on exit +* contains the pivot indices from the factorization A = L*U +* of the equilibrated matrix A. +* +* EQUED (input or output) CHARACTER*1 +* Specifies the form of equilibration that was done. +* = 'N': No equilibration (always true if FACT = 'N'). +* = 'R': Row equilibration, i.e., A has been premultiplied by +* diag(R). +* = 'C': Column equilibration, i.e., A has been postmultiplied +* by diag(C). +* = 'B': Both row and column equilibration, i.e., A has been +* replaced by diag(R) * A * diag(C). +* EQUED is an input argument if FACT = 'F'; otherwise, it is an +* output argument. +* +* R (input or output) REAL array, dimension (N) +* The row scale factors for A. If EQUED = 'R' or 'B', A is +* multiplied on the left by diag(R); if EQUED = 'N' or 'C', R +* is not accessed. R is an input argument if FACT = 'F'; +* otherwise, R is an output argument. If FACT = 'F' and +* EQUED = 'R' or 'B', each element of R must be positive. +* +* C (input or output) REAL array, dimension (N) +* The column scale factors for A. If EQUED = 'C' or 'B', A is +* multiplied on the right by diag(C); if EQUED = 'N' or 'R', C +* is not accessed. C is an input argument if FACT = 'F'; +* otherwise, C is an output argument. If FACT = 'F' and +* EQUED = 'C' or 'B', each element of C must be positive. +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, +* if EQUED = 'N', B is not modified; +* if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by +* diag(R)*B; +* if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is +* overwritten by diag(C)*B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (output) COMPLEX array, dimension (LDX,NRHS) +* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X +* to the original system of equations. Note that A and B are +* modified on exit if EQUED .ne. 'N', and the solution to the +* equilibrated system is inv(diag(C))*X if TRANS = 'N' and +* EQUED = 'C' or 'B', or inv(diag(R))*X if TRANS = 'T' or 'C' +* and EQUED = 'R' or 'B'. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* RCOND (output) REAL +* The estimate of the reciprocal condition number of the matrix +* A after equilibration (if done). If RCOND is less than the +* machine precision (in particular, if RCOND = 0), the matrix +* is singular to working precision. This condition is +* indicated by a return code of INFO > 0. +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* RWORK (workspace/output) REAL array, dimension (N) +* On exit, RWORK(1) contains the reciprocal pivot growth +* factor norm(A)/norm(U). The "max absolute element" norm is +* used. If RWORK(1) is much less than 1, then the stability +* of the LU factorization of the (equilibrated) matrix A +* could be poor. This also means that the solution X, condition +* estimator RCOND, and forward error bound FERR could be +* unreliable. If factorization fails with 0 0: if INFO = i, and i is +* <= N: U(i,i) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, so the solution and error bounds +* could not be computed. RCOND = 0 is returned. +* = N+1: U is nonsingular, but RCOND is less than machine +* precision, meaning that the matrix is singular +* to working precision. Nevertheless, the +* solution and error bounds are computed because +* there are a number of situations where the +* computed solution can be more accurate than the +* value of RCOND would suggest. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL COLEQU, EQUIL, NOFACT, NOTRAN, ROWEQU + CHARACTER NORM + INTEGER I, INFEQU, J, J1, J2 + REAL AMAX, ANORM, BIGNUM, COLCND, RCMAX, RCMIN, + $ ROWCND, RPVGRW, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANGB, CLANTB, SLAMCH + EXTERNAL LSAME, CLANGB, CLANTB, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGBCON, CGBEQU, CGBRFS, CGBTRF, CGBTRS, + $ CLACPY, CLAQGB, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + NOTRAN = LSAME( TRANS, 'N' ) + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + ROWEQU = .FALSE. + COLEQU = .FALSE. + ELSE + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + SMLNUM = SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + END IF +* +* Test the input parameters. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) ) + $ THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KL.LT.0 ) THEN + INFO = -4 + ELSE IF( KU.LT.0 ) THEN + INFO = -5 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -8 + ELSE IF( LDAFB.LT.2*KL+KU+1 ) THEN + INFO = -10 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( ROWEQU .OR. COLEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -12 + ELSE + IF( ROWEQU ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 10 J = 1, N + RCMIN = MIN( RCMIN, R( J ) ) + RCMAX = MAX( RCMAX, R( J ) ) + 10 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -13 + ELSE IF( N.GT.0 ) THEN + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + ROWCND = ONE + END IF + END IF + IF( COLEQU .AND. INFO.EQ.0 ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 20 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 20 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -14 + ELSE IF( N.GT.0 ) THEN + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + COLCND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -16 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -18 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGBSVX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL CGBEQU( N, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL CLAQGB( N, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, EQUED ) + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + END IF + END IF +* +* Scale the right hand side. +* + IF( NOTRAN ) THEN + IF( ROWEQU ) THEN + DO 40 J = 1, NRHS + DO 30 I = 1, N + B( I, J ) = R( I )*B( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( COLEQU ) THEN + DO 60 J = 1, NRHS + DO 50 I = 1, N + B( I, J ) = C( I )*B( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the LU factorization of the band matrix A. +* + DO 70 J = 1, N + J1 = MAX( J-KU, 1 ) + J2 = MIN( J+KL, N ) + CALL CCOPY( J2-J1+1, AB( KU+1-J+J1, J ), 1, + $ AFB( KL+KU+1-J+J1, J ), 1 ) + 70 CONTINUE +* + CALL CGBTRF( N, N, KL, KU, AFB, LDAFB, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN + IF( INFO.GT.0 ) THEN +* +* Compute the reciprocal pivot growth factor of the +* leading rank-deficient INFO columns of A. +* + ANORM = ZERO + DO 90 J = 1, INFO + DO 80 I = MAX( KU+2-J, 1 ), + $ MIN( N+KU+1-J, KL+KU+1 ) + ANORM = MAX( ANORM, ABS( AB( I, J ) ) ) + 80 CONTINUE + 90 CONTINUE + RPVGRW = CLANTB( 'M', 'U', 'N', INFO, + $ MIN( INFO-1, KL+KU ), AFB( MAX( 1, + $ KL+KU+2-INFO ), 1 ), LDAFB, RWORK ) + IF( RPVGRW.EQ.ZERO ) THEN + RPVGRW = ONE + ELSE + RPVGRW = ANORM / RPVGRW + END IF + RWORK( 1 ) = RPVGRW + RCOND = ZERO + END IF + RETURN + END IF + END IF +* +* Compute the norm of the matrix A and the +* reciprocal pivot growth factor RPVGRW. +* + IF( NOTRAN ) THEN + NORM = '1' + ELSE + NORM = 'I' + END IF + ANORM = CLANGB( NORM, N, KL, KU, AB, LDAB, RWORK ) + RPVGRW = CLANTB( 'M', 'U', 'N', N, KL+KU, AFB, LDAFB, RWORK ) + IF( RPVGRW.EQ.ZERO ) THEN + RPVGRW = ONE + ELSE + RPVGRW = CLANGB( 'M', N, KL, KU, AB, LDAB, RWORK ) / RPVGRW + END IF +* +* Compute the reciprocal of the condition number of A. +* + CALL CGBCON( NORM, N, KL, KU, AFB, LDAFB, IPIV, ANORM, RCOND, + $ WORK, RWORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* +* Compute the solution matrix X. +* + CALL CLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL CGBTRS( TRANS, N, KL, KU, NRHS, AFB, LDAFB, IPIV, X, LDX, + $ INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL CGBRFS( TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, LDAFB, IPIV, + $ B, LDB, X, LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* Transform the solution matrix X to a solution of the original +* system. +* + IF( NOTRAN ) THEN + IF( COLEQU ) THEN + DO 110 J = 1, NRHS + DO 100 I = 1, N + X( I, J ) = C( I )*X( I, J ) + 100 CONTINUE + 110 CONTINUE + DO 120 J = 1, NRHS + FERR( J ) = FERR( J ) / COLCND + 120 CONTINUE + END IF + ELSE IF( ROWEQU ) THEN + DO 140 J = 1, NRHS + DO 130 I = 1, N + X( I, J ) = R( I )*X( I, J ) + 130 CONTINUE + 140 CONTINUE + DO 150 J = 1, NRHS + FERR( J ) = FERR( J ) / ROWCND + 150 CONTINUE + END IF +* + RWORK( 1 ) = RPVGRW + RETURN +* +* End of CGBSVX +* + END diff --git a/costa/native/external/lapack/cgbtf2.f b/costa/native/external/lapack/cgbtf2.f new file mode 100644 index 000000000..8a8b5ca95 --- /dev/null +++ b/costa/native/external/lapack/cgbtf2.f @@ -0,0 +1,203 @@ + SUBROUTINE CGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, KL, KU, LDAB, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* CGBTF2 computes an LU factorization of a complex m-by-n band matrix +* A using partial pivoting with row interchanges. +* +* This is the unblocked version of the algorithm, calling Level 2 BLAS. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* AB (input/output) COMPLEX array, dimension (LDAB,N) +* On entry, the matrix A in band storage, in rows KL+1 to +* 2*KL+KU+1; rows 1 to KL of the array need not be set. +* The j-th column of A is stored in the j-th column of the +* array AB as follows: +* AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) +* +* On exit, details of the factorization: U is stored as an +* upper triangular band matrix with KL+KU superdiagonals in +* rows 1 to KL+KU+1, and the multipliers used during the +* factorization are stored in rows KL+KU+2 to 2*KL+KU+1. +* See below for further details. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +* +* IPIV (output) INTEGER array, dimension (min(M,N)) +* The pivot indices; for 1 <= i <= min(M,N), row i of the +* matrix was interchanged with row IPIV(i). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = +i, U(i,i) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, and division by zero will occur if it is used +* to solve a system of equations. +* +* Further Details +* =============== +* +* The band storage scheme is illustrated by the following example, when +* M = N = 6, KL = 2, KU = 1: +* +* On entry: On exit: +* +* * * * + + + * * * u14 u25 u36 +* * * + + + + * * u13 u24 u35 u46 +* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +* a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * +* a31 a42 a53 a64 * * m31 m42 m53 m64 * * +* +* Array elements marked * are not used by the routine; elements marked +* + need not be set on entry, but are required by the routine to store +* elements of U, because of fill-in resulting from the row +* interchanges. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, J, JP, JU, KM, KV +* .. +* .. External Functions .. + INTEGER ICAMAX + EXTERNAL ICAMAX +* .. +* .. External Subroutines .. + EXTERNAL CGERU, CSCAL, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* KV is the number of superdiagonals in the factor U, allowing for +* fill-in. +* + KV = KU + KL +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KL+KV+1 ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGBTF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Gaussian elimination with partial pivoting +* +* Set fill-in elements in columns KU+2 to KV to zero. +* + DO 20 J = KU + 2, MIN( KV, N ) + DO 10 I = KV - J + 2, KL + AB( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* JU is the index of the last column affected by the current stage +* of the factorization. +* + JU = 1 +* + DO 40 J = 1, MIN( M, N ) +* +* Set fill-in elements in column J+KV to zero. +* + IF( J+KV.LE.N ) THEN + DO 30 I = 1, KL + AB( I, J+KV ) = ZERO + 30 CONTINUE + END IF +* +* Find pivot and test for singularity. KM is the number of +* subdiagonal elements in the current column. +* + KM = MIN( KL, M-J ) + JP = ICAMAX( KM+1, AB( KV+1, J ), 1 ) + IPIV( J ) = JP + J - 1 + IF( AB( KV+JP, J ).NE.ZERO ) THEN + JU = MAX( JU, MIN( J+KU+JP-1, N ) ) +* +* Apply interchange to columns J to JU. +* + IF( JP.NE.1 ) + $ CALL CSWAP( JU-J+1, AB( KV+JP, J ), LDAB-1, + $ AB( KV+1, J ), LDAB-1 ) + IF( KM.GT.0 ) THEN +* +* Compute multipliers. +* + CALL CSCAL( KM, ONE / AB( KV+1, J ), AB( KV+2, J ), 1 ) +* +* Update trailing submatrix within the band. +* + IF( JU.GT.J ) + $ CALL CGERU( KM, JU-J, -ONE, AB( KV+2, J ), 1, + $ AB( KV, J+1 ), LDAB-1, AB( KV+1, J+1 ), + $ LDAB-1 ) + END IF + ELSE +* +* If pivot is zero, set INFO to the index of the pivot +* unless a zero pivot has already been found. +* + IF( INFO.EQ.0 ) + $ INFO = J + END IF + 40 CONTINUE + RETURN +* +* End of CGBTF2 +* + END diff --git a/costa/native/external/lapack/cgbtrf.f b/costa/native/external/lapack/cgbtrf.f new file mode 100644 index 000000000..bf61bb148 --- /dev/null +++ b/costa/native/external/lapack/cgbtrf.f @@ -0,0 +1,443 @@ + SUBROUTINE CGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, KL, KU, LDAB, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* CGBTRF computes an LU factorization of a complex m-by-n band matrix A +* using partial pivoting with row interchanges. +* +* This is the blocked version of the algorithm, calling Level 3 BLAS. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* AB (input/output) COMPLEX array, dimension (LDAB,N) +* On entry, the matrix A in band storage, in rows KL+1 to +* 2*KL+KU+1; rows 1 to KL of the array need not be set. +* The j-th column of A is stored in the j-th column of the +* array AB as follows: +* AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) +* +* On exit, details of the factorization: U is stored as an +* upper triangular band matrix with KL+KU superdiagonals in +* rows 1 to KL+KU+1, and the multipliers used during the +* factorization are stored in rows KL+KU+2 to 2*KL+KU+1. +* See below for further details. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +* +* IPIV (output) INTEGER array, dimension (min(M,N)) +* The pivot indices; for 1 <= i <= min(M,N), row i of the +* matrix was interchanged with row IPIV(i). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = +i, U(i,i) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, and division by zero will occur if it is used +* to solve a system of equations. +* +* Further Details +* =============== +* +* The band storage scheme is illustrated by the following example, when +* M = N = 6, KL = 2, KU = 1: +* +* On entry: On exit: +* +* * * * + + + * * * u14 u25 u36 +* * * + + + + * * u13 u24 u35 u46 +* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +* a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * +* a31 a42 a53 a64 * * m31 m42 m53 m64 * * +* +* Array elements marked * are not used by the routine; elements marked +* + need not be set on entry, but are required by the routine to store +* elements of U because of fill-in resulting from the row interchanges. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) + INTEGER NBMAX, LDWORK + PARAMETER ( NBMAX = 64, LDWORK = NBMAX+1 ) +* .. +* .. Local Scalars .. + INTEGER I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP, + $ JU, K2, KM, KV, NB, NW + COMPLEX TEMP +* .. +* .. Local Arrays .. + COMPLEX WORK13( LDWORK, NBMAX ), + $ WORK31( LDWORK, NBMAX ) +* .. +* .. External Functions .. + INTEGER ICAMAX, ILAENV + EXTERNAL ICAMAX, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGBTF2, CGEMM, CGERU, CLASWP, CSCAL, + $ CSWAP, CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* KV is the number of superdiagonals in the factor U, allowing for +* fill-in +* + KV = KU + KL +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KL+KV+1 ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGBTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment +* + NB = ILAENV( 1, 'CGBTRF', ' ', M, N, KL, KU ) +* +* The block size must not exceed the limit set by the size of the +* local arrays WORK13 and WORK31. +* + NB = MIN( NB, NBMAX ) +* + IF( NB.LE.1 .OR. NB.GT.KL ) THEN +* +* Use unblocked code +* + CALL CGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO ) + ELSE +* +* Use blocked code +* +* Zero the superdiagonal elements of the work array WORK13 +* + DO 20 J = 1, NB + DO 10 I = 1, J - 1 + WORK13( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* Zero the subdiagonal elements of the work array WORK31 +* + DO 40 J = 1, NB + DO 30 I = J + 1, NB + WORK31( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE +* +* Gaussian elimination with partial pivoting +* +* Set fill-in elements in columns KU+2 to KV to zero +* + DO 60 J = KU + 2, MIN( KV, N ) + DO 50 I = KV - J + 2, KL + AB( I, J ) = ZERO + 50 CONTINUE + 60 CONTINUE +* +* JU is the index of the last column affected by the current +* stage of the factorization +* + JU = 1 +* + DO 180 J = 1, MIN( M, N ), NB + JB = MIN( NB, MIN( M, N )-J+1 ) +* +* The active part of the matrix is partitioned +* +* A11 A12 A13 +* A21 A22 A23 +* A31 A32 A33 +* +* Here A11, A21 and A31 denote the current block of JB columns +* which is about to be factorized. The number of rows in the +* partitioning are JB, I2, I3 respectively, and the numbers +* of columns are JB, J2, J3. The superdiagonal elements of A13 +* and the subdiagonal elements of A31 lie outside the band. +* + I2 = MIN( KL-JB, M-J-JB+1 ) + I3 = MIN( JB, M-J-KL+1 ) +* +* J2 and J3 are computed after JU has been updated. +* +* Factorize the current block of JB columns +* + DO 80 JJ = J, J + JB - 1 +* +* Set fill-in elements in column JJ+KV to zero +* + IF( JJ+KV.LE.N ) THEN + DO 70 I = 1, KL + AB( I, JJ+KV ) = ZERO + 70 CONTINUE + END IF +* +* Find pivot and test for singularity. KM is the number of +* subdiagonal elements in the current column. +* + KM = MIN( KL, M-JJ ) + JP = ICAMAX( KM+1, AB( KV+1, JJ ), 1 ) + IPIV( JJ ) = JP + JJ - J + IF( AB( KV+JP, JJ ).NE.ZERO ) THEN + JU = MAX( JU, MIN( JJ+KU+JP-1, N ) ) + IF( JP.NE.1 ) THEN +* +* Apply interchange to columns J to J+JB-1 +* + IF( JP+JJ-1.LT.J+KL ) THEN +* + CALL CSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1, + $ AB( KV+JP+JJ-J, J ), LDAB-1 ) + ELSE +* +* The interchange affects columns J to JJ-1 of A31 +* which are stored in the work array WORK31 +* + CALL CSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1, + $ WORK31( JP+JJ-J-KL, 1 ), LDWORK ) + CALL CSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1, + $ AB( KV+JP, JJ ), LDAB-1 ) + END IF + END IF +* +* Compute multipliers +* + CALL CSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ), + $ 1 ) +* +* Update trailing submatrix within the band and within +* the current block. JM is the index of the last column +* which needs to be updated. +* + JM = MIN( JU, J+JB-1 ) + IF( JM.GT.JJ ) + $ CALL CGERU( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1, + $ AB( KV, JJ+1 ), LDAB-1, + $ AB( KV+1, JJ+1 ), LDAB-1 ) + ELSE +* +* If pivot is zero, set INFO to the index of the pivot +* unless a zero pivot has already been found. +* + IF( INFO.EQ.0 ) + $ INFO = JJ + END IF +* +* Copy current column of A31 into the work array WORK31 +* + NW = MIN( JJ-J+1, I3 ) + IF( NW.GT.0 ) + $ CALL CCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1, + $ WORK31( 1, JJ-J+1 ), 1 ) + 80 CONTINUE + IF( J+JB.LE.N ) THEN +* +* Apply the row interchanges to the other blocks. +* + J2 = MIN( JU-J+1, KV ) - JB + J3 = MAX( 0, JU-J-KV+1 ) +* +* Use CLASWP to apply the row interchanges to A12, A22, and +* A32. +* + CALL CLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB, + $ IPIV( J ), 1 ) +* +* Adjust the pivot indices. +* + DO 90 I = J, J + JB - 1 + IPIV( I ) = IPIV( I ) + J - 1 + 90 CONTINUE +* +* Apply the row interchanges to A13, A23, and A33 +* columnwise. +* + K2 = J - 1 + JB + J2 + DO 110 I = 1, J3 + JJ = K2 + I + DO 100 II = J + I - 1, J + JB - 1 + IP = IPIV( II ) + IF( IP.NE.II ) THEN + TEMP = AB( KV+1+II-JJ, JJ ) + AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ ) + AB( KV+1+IP-JJ, JJ ) = TEMP + END IF + 100 CONTINUE + 110 CONTINUE +* +* Update the relevant part of the trailing submatrix +* + IF( J2.GT.0 ) THEN +* +* Update A12 +* + CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit', + $ JB, J2, ONE, AB( KV+1, J ), LDAB-1, + $ AB( KV+1-JB, J+JB ), LDAB-1 ) +* + IF( I2.GT.0 ) THEN +* +* Update A22 +* + CALL CGEMM( 'No transpose', 'No transpose', I2, J2, + $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1, + $ AB( KV+1-JB, J+JB ), LDAB-1, ONE, + $ AB( KV+1, J+JB ), LDAB-1 ) + END IF +* + IF( I3.GT.0 ) THEN +* +* Update A32 +* + CALL CGEMM( 'No transpose', 'No transpose', I3, J2, + $ JB, -ONE, WORK31, LDWORK, + $ AB( KV+1-JB, J+JB ), LDAB-1, ONE, + $ AB( KV+KL+1-JB, J+JB ), LDAB-1 ) + END IF + END IF +* + IF( J3.GT.0 ) THEN +* +* Copy the lower triangle of A13 into the work array +* WORK13 +* + DO 130 JJ = 1, J3 + DO 120 II = JJ, JB + WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 ) + 120 CONTINUE + 130 CONTINUE +* +* Update A13 in the work array +* + CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit', + $ JB, J3, ONE, AB( KV+1, J ), LDAB-1, + $ WORK13, LDWORK ) +* + IF( I2.GT.0 ) THEN +* +* Update A23 +* + CALL CGEMM( 'No transpose', 'No transpose', I2, J3, + $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1, + $ WORK13, LDWORK, ONE, AB( 1+JB, J+KV ), + $ LDAB-1 ) + END IF +* + IF( I3.GT.0 ) THEN +* +* Update A33 +* + CALL CGEMM( 'No transpose', 'No transpose', I3, J3, + $ JB, -ONE, WORK31, LDWORK, WORK13, + $ LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 ) + END IF +* +* Copy the lower triangle of A13 back into place +* + DO 150 JJ = 1, J3 + DO 140 II = JJ, JB + AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ ) + 140 CONTINUE + 150 CONTINUE + END IF + ELSE +* +* Adjust the pivot indices. +* + DO 160 I = J, J + JB - 1 + IPIV( I ) = IPIV( I ) + J - 1 + 160 CONTINUE + END IF +* +* Partially undo the interchanges in the current block to +* restore the upper triangular form of A31 and copy the upper +* triangle of A31 back into place +* + DO 170 JJ = J + JB - 1, J, -1 + JP = IPIV( JJ ) - JJ + 1 + IF( JP.NE.1 ) THEN +* +* Apply interchange to columns J to JJ-1 +* + IF( JP+JJ-1.LT.J+KL ) THEN +* +* The interchange does not affect A31 +* + CALL CSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1, + $ AB( KV+JP+JJ-J, J ), LDAB-1 ) + ELSE +* +* The interchange does affect A31 +* + CALL CSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1, + $ WORK31( JP+JJ-J-KL, 1 ), LDWORK ) + END IF + END IF +* +* Copy the current column of A31 back into place +* + NW = MIN( I3, JJ-J+1 ) + IF( NW.GT.0 ) + $ CALL CCOPY( NW, WORK31( 1, JJ-J+1 ), 1, + $ AB( KV+KL+1-JJ+J, JJ ), 1 ) + 170 CONTINUE + 180 CONTINUE + END IF +* + RETURN +* +* End of CGBTRF +* + END diff --git a/costa/native/external/lapack/cgbtrs.f b/costa/native/external/lapack/cgbtrs.f new file mode 100644 index 000000000..b4b55b86b --- /dev/null +++ b/costa/native/external/lapack/cgbtrs.f @@ -0,0 +1,215 @@ + SUBROUTINE CGBTRS( TRANS, N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, KL, KU, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX AB( LDAB, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* CGBTRS solves a system of linear equations +* A * X = B, A**T * X = B, or A**H * X = B +* with a general band matrix A using the LU factorization computed +* by CGBTRF. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations. +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose) +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AB (input) COMPLEX array, dimension (LDAB,N) +* Details of the LU factorization of the band matrix A, as +* computed by CGBTRF. U is stored as an upper triangular band +* matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and +* the multipliers used during the factorization are stored in +* rows KL+KU+2 to 2*KL+KU+1. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices; for 1 <= i <= N, row i of the matrix was +* interchanged with row IPIV(i). +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LNOTI, NOTRAN + INTEGER I, J, KD, L, LM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, CGERU, CLACGV, CSWAP, CTBSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.( 2*KL+KU+1 ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGBTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + KD = KU + KL + 1 + LNOTI = KL.GT.0 +* + IF( NOTRAN ) THEN +* +* Solve A*X = B. +* +* Solve L*X = B, overwriting B with X. +* +* L is represented as a product of permutations and unit lower +* triangular matrices L = P(1) * L(1) * ... * P(n-1) * L(n-1), +* where each transformation L(i) is a rank-one modification of +* the identity matrix. +* + IF( LNOTI ) THEN + DO 10 J = 1, N - 1 + LM = MIN( KL, N-J ) + L = IPIV( J ) + IF( L.NE.J ) + $ CALL CSWAP( NRHS, B( L, 1 ), LDB, B( J, 1 ), LDB ) + CALL CGERU( LM, NRHS, -ONE, AB( KD+1, J ), 1, B( J, 1 ), + $ LDB, B( J+1, 1 ), LDB ) + 10 CONTINUE + END IF +* + DO 20 I = 1, NRHS +* +* Solve U*X = B, overwriting B with X. +* + CALL CTBSV( 'Upper', 'No transpose', 'Non-unit', N, KL+KU, + $ AB, LDAB, B( 1, I ), 1 ) + 20 CONTINUE +* + ELSE IF( LSAME( TRANS, 'T' ) ) THEN +* +* Solve A**T * X = B. +* + DO 30 I = 1, NRHS +* +* Solve U**T * X = B, overwriting B with X. +* + CALL CTBSV( 'Upper', 'Transpose', 'Non-unit', N, KL+KU, AB, + $ LDAB, B( 1, I ), 1 ) + 30 CONTINUE +* +* Solve L**T * X = B, overwriting B with X. +* + IF( LNOTI ) THEN + DO 40 J = N - 1, 1, -1 + LM = MIN( KL, N-J ) + CALL CGEMV( 'Transpose', LM, NRHS, -ONE, B( J+1, 1 ), + $ LDB, AB( KD+1, J ), 1, ONE, B( J, 1 ), LDB ) + L = IPIV( J ) + IF( L.NE.J ) + $ CALL CSWAP( NRHS, B( L, 1 ), LDB, B( J, 1 ), LDB ) + 40 CONTINUE + END IF +* + ELSE +* +* Solve A**H * X = B. +* + DO 50 I = 1, NRHS +* +* Solve U**H * X = B, overwriting B with X. +* + CALL CTBSV( 'Upper', 'Conjugate transpose', 'Non-unit', N, + $ KL+KU, AB, LDAB, B( 1, I ), 1 ) + 50 CONTINUE +* +* Solve L**H * X = B, overwriting B with X. +* + IF( LNOTI ) THEN + DO 60 J = N - 1, 1, -1 + LM = MIN( KL, N-J ) + CALL CLACGV( NRHS, B( J, 1 ), LDB ) + CALL CGEMV( 'Conjugate transpose', LM, NRHS, -ONE, + $ B( J+1, 1 ), LDB, AB( KD+1, J ), 1, ONE, + $ B( J, 1 ), LDB ) + CALL CLACGV( NRHS, B( J, 1 ), LDB ) + L = IPIV( J ) + IF( L.NE.J ) + $ CALL CSWAP( NRHS, B( L, 1 ), LDB, B( J, 1 ), LDB ) + 60 CONTINUE + END IF + END IF + RETURN +* +* End of CGBTRS +* + END diff --git a/costa/native/external/lapack/cgebak.f b/costa/native/external/lapack/cgebak.f new file mode 100644 index 000000000..4c81e0534 --- /dev/null +++ b/costa/native/external/lapack/cgebak.f @@ -0,0 +1,190 @@ + SUBROUTINE CGEBAK( JOB, SIDE, N, ILO, IHI, SCALE, M, V, LDV, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER JOB, SIDE + INTEGER IHI, ILO, INFO, LDV, M, N +* .. +* .. Array Arguments .. + REAL SCALE( * ) + COMPLEX V( LDV, * ) +* .. +* +* Purpose +* ======= +* +* CGEBAK forms the right or left eigenvectors of a complex general +* matrix by backward transformation on the computed eigenvectors of the +* balanced matrix output by CGEBAL. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* Specifies the type of backward transformation required: +* = 'N', do nothing, return immediately; +* = 'P', do backward transformation for permutation only; +* = 'S', do backward transformation for scaling only; +* = 'B', do backward transformations for both permutation and +* scaling. +* JOB must be the same as the argument JOB supplied to CGEBAL. +* +* SIDE (input) CHARACTER*1 +* = 'R': V contains right eigenvectors; +* = 'L': V contains left eigenvectors. +* +* N (input) INTEGER +* The number of rows of the matrix V. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* The integers ILO and IHI determined by CGEBAL. +* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +* +* SCALE (input) REAL array, dimension (N) +* Details of the permutation and scaling factors, as returned +* by CGEBAL. +* +* M (input) INTEGER +* The number of columns of the matrix V. M >= 0. +* +* V (input/output) COMPLEX array, dimension (LDV,M) +* On entry, the matrix of right or left eigenvectors to be +* transformed, as returned by CHSEIN or CTREVC. +* On exit, V is overwritten by the transformed eigenvectors. +* +* LDV (input) INTEGER +* The leading dimension of the array V. LDV >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LEFTV, RIGHTV + INTEGER I, II, K + REAL S +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CSSCAL, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Decode and Test the input parameters +* + RIGHTV = LSAME( SIDE, 'R' ) + LEFTV = LSAME( SIDE, 'L' ) +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND. + $ .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN + INFO = -1 + ELSE IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -5 + ELSE IF( M.LT.0 ) THEN + INFO = -7 + ELSE IF( LDV.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEBAK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + IF( M.EQ.0 ) + $ RETURN + IF( LSAME( JOB, 'N' ) ) + $ RETURN +* + IF( ILO.EQ.IHI ) + $ GO TO 30 +* +* Backward balance +* + IF( LSAME( JOB, 'S' ) .OR. LSAME( JOB, 'B' ) ) THEN +* + IF( RIGHTV ) THEN + DO 10 I = ILO, IHI + S = SCALE( I ) + CALL CSSCAL( M, S, V( I, 1 ), LDV ) + 10 CONTINUE + END IF +* + IF( LEFTV ) THEN + DO 20 I = ILO, IHI + S = ONE / SCALE( I ) + CALL CSSCAL( M, S, V( I, 1 ), LDV ) + 20 CONTINUE + END IF +* + END IF +* +* Backward permutation +* +* For I = ILO-1 step -1 until 1, +* IHI+1 step 1 until N do -- +* + 30 CONTINUE + IF( LSAME( JOB, 'P' ) .OR. LSAME( JOB, 'B' ) ) THEN + IF( RIGHTV ) THEN + DO 40 II = 1, N + I = II + IF( I.GE.ILO .AND. I.LE.IHI ) + $ GO TO 40 + IF( I.LT.ILO ) + $ I = ILO - II + K = SCALE( I ) + IF( K.EQ.I ) + $ GO TO 40 + CALL CSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 40 CONTINUE + END IF +* + IF( LEFTV ) THEN + DO 50 II = 1, N + I = II + IF( I.GE.ILO .AND. I.LE.IHI ) + $ GO TO 50 + IF( I.LT.ILO ) + $ I = ILO - II + K = SCALE( I ) + IF( K.EQ.I ) + $ GO TO 50 + CALL CSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 50 CONTINUE + END IF + END IF +* + RETURN +* +* End of CGEBAK +* + END diff --git a/costa/native/external/lapack/cgebal.f b/costa/native/external/lapack/cgebal.f new file mode 100644 index 000000000..b7e7663bf --- /dev/null +++ b/costa/native/external/lapack/cgebal.f @@ -0,0 +1,331 @@ + SUBROUTINE CGEBAL( JOB, N, A, LDA, ILO, IHI, SCALE, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOB + INTEGER IHI, ILO, INFO, LDA, N +* .. +* .. Array Arguments .. + REAL SCALE( * ) + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CGEBAL balances a general complex matrix A. This involves, first, +* permuting A by a similarity transformation to isolate eigenvalues +* in the first 1 to ILO-1 and last IHI+1 to N elements on the +* diagonal; and second, applying a diagonal similarity transformation +* to rows and columns ILO to IHI to make the rows and columns as +* close in norm as possible. Both steps are optional. +* +* Balancing may reduce the 1-norm of the matrix, and improve the +* accuracy of the computed eigenvalues and/or eigenvectors. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* Specifies the operations to be performed on A: +* = 'N': none: simply set ILO = 1, IHI = N, SCALE(I) = 1.0 +* for i = 1,...,N; +* = 'P': permute only; +* = 'S': scale only; +* = 'B': both permute and scale. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the input matrix A. +* On exit, A is overwritten by the balanced matrix. +* If JOB = 'N', A is not referenced. +* See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* ILO (output) INTEGER +* IHI (output) INTEGER +* ILO and IHI are set to integers such that on exit +* A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N. +* If JOB = 'N' or 'S', ILO = 1 and IHI = N. +* +* SCALE (output) REAL array, dimension (N) +* Details of the permutations and scaling factors applied to +* A. If P(j) is the index of the row and column interchanged +* with row and column j and D(j) is the scaling factor +* applied to row and column j, then +* SCALE(j) = P(j) for j = 1,...,ILO-1 +* = D(j) for j = ILO,...,IHI +* = P(j) for j = IHI+1,...,N. +* The order in which the interchanges are made is N to IHI+1, +* then 1 to ILO-1. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The permutations consist of row and column interchanges which put +* the matrix in the form +* +* ( T1 X Y ) +* P A P = ( 0 B Z ) +* ( 0 0 T2 ) +* +* where T1 and T2 are upper triangular matrices whose eigenvalues lie +* along the diagonal. The column indices ILO and IHI mark the starting +* and ending columns of the submatrix B. Balancing consists of applying +* a diagonal similarity transformation inv(D) * B * D to make the +* 1-norms of each row of B and its corresponding column nearly equal. +* The output matrix is +* +* ( T1 X*D Y ) +* ( 0 inv(D)*B*D inv(D)*Z ). +* ( 0 0 T2 ) +* +* Information about the permutations P and the diagonal matrix D is +* returned in the vector SCALE. +* +* This subroutine is based on the EISPACK routine CBAL. +* +* Modified by Tzu-Yi Chen, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL SCLFAC + PARAMETER ( SCLFAC = 0.8E+1 ) + REAL FACTOR + PARAMETER ( FACTOR = 0.95E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOCONV + INTEGER I, ICA, IEXC, IRA, J, K, L, M + REAL C, CA, F, G, R, RA, S, SFMAX1, SFMAX2, SFMIN1, + $ SFMIN2 + COMPLEX CDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + REAL SLAMCH + EXTERNAL LSAME, ICAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CSSCAL, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, MIN, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND. + $ .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEBAL', -INFO ) + RETURN + END IF +* + K = 1 + L = N +* + IF( N.EQ.0 ) + $ GO TO 210 +* + IF( LSAME( JOB, 'N' ) ) THEN + DO 10 I = 1, N + SCALE( I ) = ONE + 10 CONTINUE + GO TO 210 + END IF +* + IF( LSAME( JOB, 'S' ) ) + $ GO TO 120 +* +* Permutation to isolate eigenvalues if possible +* + GO TO 50 +* +* Row and column exchange. +* + 20 CONTINUE + SCALE( M ) = J + IF( J.EQ.M ) + $ GO TO 30 +* + CALL CSWAP( L, A( 1, J ), 1, A( 1, M ), 1 ) + CALL CSWAP( N-K+1, A( J, K ), LDA, A( M, K ), LDA ) +* + 30 CONTINUE + GO TO ( 40, 80 )IEXC +* +* Search for rows isolating an eigenvalue and push them down. +* + 40 CONTINUE + IF( L.EQ.1 ) + $ GO TO 210 + L = L - 1 +* + 50 CONTINUE + DO 70 J = L, 1, -1 +* + DO 60 I = 1, L + IF( I.EQ.J ) + $ GO TO 60 + IF( REAL( A( J, I ) ).NE.ZERO .OR. AIMAG( A( J, I ) ).NE. + $ ZERO )GO TO 70 + 60 CONTINUE +* + M = L + IEXC = 1 + GO TO 20 + 70 CONTINUE +* + GO TO 90 +* +* Search for columns isolating an eigenvalue and push them left. +* + 80 CONTINUE + K = K + 1 +* + 90 CONTINUE + DO 110 J = K, L +* + DO 100 I = K, L + IF( I.EQ.J ) + $ GO TO 100 + IF( REAL( A( I, J ) ).NE.ZERO .OR. AIMAG( A( I, J ) ).NE. + $ ZERO )GO TO 110 + 100 CONTINUE +* + M = K + IEXC = 2 + GO TO 20 + 110 CONTINUE +* + 120 CONTINUE + DO 130 I = K, L + SCALE( I ) = ONE + 130 CONTINUE +* + IF( LSAME( JOB, 'P' ) ) + $ GO TO 210 +* +* Balance the submatrix in rows K to L. +* +* Iterative loop for norm reduction +* + SFMIN1 = SLAMCH( 'S' ) / SLAMCH( 'P' ) + SFMAX1 = ONE / SFMIN1 + SFMIN2 = SFMIN1*SCLFAC + SFMAX2 = ONE / SFMIN2 + 140 CONTINUE + NOCONV = .FALSE. +* + DO 200 I = K, L + C = ZERO + R = ZERO +* + DO 150 J = K, L + IF( J.EQ.I ) + $ GO TO 150 + C = C + CABS1( A( J, I ) ) + R = R + CABS1( A( I, J ) ) + 150 CONTINUE + ICA = ICAMAX( L, A( 1, I ), 1 ) + CA = ABS( A( ICA, I ) ) + IRA = ICAMAX( N-K+1, A( I, K ), LDA ) + RA = ABS( A( I, IRA+K-1 ) ) +* +* Guard against zero C or R due to underflow. +* + IF( C.EQ.ZERO .OR. R.EQ.ZERO ) + $ GO TO 200 + G = R / SCLFAC + F = ONE + S = C + R + 160 CONTINUE + IF( C.GE.G .OR. MAX( F, C, CA ).GE.SFMAX2 .OR. + $ MIN( R, G, RA ).LE.SFMIN2 )GO TO 170 + F = F*SCLFAC + C = C*SCLFAC + CA = CA*SCLFAC + R = R / SCLFAC + G = G / SCLFAC + RA = RA / SCLFAC + GO TO 160 +* + 170 CONTINUE + G = C / SCLFAC + 180 CONTINUE + IF( G.LT.R .OR. MAX( R, RA ).GE.SFMAX2 .OR. + $ MIN( F, C, G, CA ).LE.SFMIN2 )GO TO 190 + F = F / SCLFAC + C = C / SCLFAC + G = G / SCLFAC + CA = CA / SCLFAC + R = R*SCLFAC + RA = RA*SCLFAC + GO TO 180 +* +* Now balance. +* + 190 CONTINUE + IF( ( C+R ).GE.FACTOR*S ) + $ GO TO 200 + IF( F.LT.ONE .AND. SCALE( I ).LT.ONE ) THEN + IF( F*SCALE( I ).LE.SFMIN1 ) + $ GO TO 200 + END IF + IF( F.GT.ONE .AND. SCALE( I ).GT.ONE ) THEN + IF( SCALE( I ).GE.SFMAX1 / F ) + $ GO TO 200 + END IF + G = ONE / F + SCALE( I ) = SCALE( I )*F + NOCONV = .TRUE. +* + CALL CSSCAL( N-K+1, G, A( I, K ), LDA ) + CALL CSSCAL( L, F, A( 1, I ), 1 ) +* + 200 CONTINUE +* + IF( NOCONV ) + $ GO TO 140 +* + 210 CONTINUE + ILO = K + IHI = L +* + RETURN +* +* End of CGEBAL +* + END diff --git a/costa/native/external/lapack/cgebd2.f b/costa/native/external/lapack/cgebd2.f new file mode 100644 index 000000000..fdec366ac --- /dev/null +++ b/costa/native/external/lapack/cgebd2.f @@ -0,0 +1,249 @@ + SUBROUTINE CGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + REAL D( * ), E( * ) + COMPLEX A( LDA, * ), TAUP( * ), TAUQ( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGEBD2 reduces a complex general m by n matrix A to upper or lower +* real bidiagonal form B by a unitary transformation: Q' * A * P = B. +* +* If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows in the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns in the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the m by n general matrix to be reduced. +* On exit, +* if m >= n, the diagonal and the first superdiagonal are +* overwritten with the upper bidiagonal matrix B; the +* elements below the diagonal, with the array TAUQ, represent +* the unitary matrix Q as a product of elementary +* reflectors, and the elements above the first superdiagonal, +* with the array TAUP, represent the unitary matrix P as +* a product of elementary reflectors; +* if m < n, the diagonal and the first subdiagonal are +* overwritten with the lower bidiagonal matrix B; the +* elements below the first subdiagonal, with the array TAUQ, +* represent the unitary matrix Q as a product of +* elementary reflectors, and the elements above the diagonal, +* with the array TAUP, represent the unitary matrix P as +* a product of elementary reflectors. +* See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* D (output) REAL array, dimension (min(M,N)) +* The diagonal elements of the bidiagonal matrix B: +* D(i) = A(i,i). +* +* E (output) REAL array, dimension (min(M,N)-1) +* The off-diagonal elements of the bidiagonal matrix B: +* if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; +* if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. +* +* TAUQ (output) COMPLEX array dimension (min(M,N)) +* The scalar factors of the elementary reflectors which +* represent the unitary matrix Q. See Further Details. +* +* TAUP (output) COMPLEX array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors which +* represent the unitary matrix P. See Further Details. +* +* WORK (workspace) COMPLEX array, dimension (max(M,N)) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The matrices Q and P are represented as products of elementary +* reflectors: +* +* If m >= n, +* +* Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) +* +* Each H(i) and G(i) has the form: +* +* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' +* +* where tauq and taup are complex scalars, and v and u are complex +* vectors; v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in +* A(i+1:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in +* A(i,i+2:n); tauq is stored in TAUQ(i) and taup in TAUP(i). +* +* If m < n, +* +* Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) +* +* Each H(i) and G(i) has the form: +* +* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' +* +* where tauq and taup are complex scalars, v and u are complex vectors; +* v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); +* u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); +* tauq is stored in TAUQ(i) and taup in TAUP(i). +* +* The contents of A on exit are illustrated by the following examples: +* +* m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): +* +* ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) +* ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) +* ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) +* ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) +* ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) +* ( v1 v2 v3 v4 v5 ) +* +* where d and e denote diagonal and off-diagonal elements of B, vi +* denotes an element of the vector defining H(i), and ui an element of +* the vector defining G(i). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I + COMPLEX ALPHA +* .. +* .. External Subroutines .. + EXTERNAL CLACGV, CLARF, CLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.LT.0 ) THEN + CALL XERBLA( 'CGEBD2', -INFO ) + RETURN + END IF +* + IF( M.GE.N ) THEN +* +* Reduce to upper bidiagonal form +* + DO 10 I = 1, N +* +* Generate elementary reflector H(i) to annihilate A(i+1:m,i) +* + ALPHA = A( I, I ) + CALL CLARFG( M-I+1, ALPHA, A( MIN( I+1, M ), I ), 1, + $ TAUQ( I ) ) + D( I ) = ALPHA + A( I, I ) = ONE +* +* Apply H(i)' to A(i:m,i+1:n) from the left +* + CALL CLARF( 'Left', M-I+1, N-I, A( I, I ), 1, + $ CONJG( TAUQ( I ) ), A( I, I+1 ), LDA, WORK ) + A( I, I ) = D( I ) +* + IF( I.LT.N ) THEN +* +* Generate elementary reflector G(i) to annihilate +* A(i,i+2:n) +* + CALL CLACGV( N-I, A( I, I+1 ), LDA ) + ALPHA = A( I, I+1 ) + CALL CLARFG( N-I, ALPHA, A( I, MIN( I+2, N ) ), + $ LDA, TAUP( I ) ) + E( I ) = ALPHA + A( I, I+1 ) = ONE +* +* Apply G(i) to A(i+1:m,i+1:n) from the right +* + CALL CLARF( 'Right', M-I, N-I, A( I, I+1 ), LDA, + $ TAUP( I ), A( I+1, I+1 ), LDA, WORK ) + CALL CLACGV( N-I, A( I, I+1 ), LDA ) + A( I, I+1 ) = E( I ) + ELSE + TAUP( I ) = ZERO + END IF + 10 CONTINUE + ELSE +* +* Reduce to lower bidiagonal form +* + DO 20 I = 1, M +* +* Generate elementary reflector G(i) to annihilate A(i,i+1:n) +* + CALL CLACGV( N-I+1, A( I, I ), LDA ) + ALPHA = A( I, I ) + CALL CLARFG( N-I+1, ALPHA, A( I, MIN( I+1, N ) ), LDA, + $ TAUP( I ) ) + D( I ) = ALPHA + A( I, I ) = ONE +* +* Apply G(i) to A(i+1:m,i:n) from the right +* + CALL CLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, TAUP( I ), + $ A( MIN( I+1, M ), I ), LDA, WORK ) + CALL CLACGV( N-I+1, A( I, I ), LDA ) + A( I, I ) = D( I ) +* + IF( I.LT.M ) THEN +* +* Generate elementary reflector H(i) to annihilate +* A(i+2:m,i) +* + ALPHA = A( I+1, I ) + CALL CLARFG( M-I, ALPHA, A( MIN( I+2, M ), I ), 1, + $ TAUQ( I ) ) + E( I ) = ALPHA + A( I+1, I ) = ONE +* +* Apply H(i)' to A(i+1:m,i+1:n) from the left +* + CALL CLARF( 'Left', M-I, N-I, A( I+1, I ), 1, + $ CONJG( TAUQ( I ) ), A( I+1, I+1 ), LDA, + $ WORK ) + A( I+1, I ) = E( I ) + ELSE + TAUQ( I ) = ZERO + END IF + 20 CONTINUE + END IF + RETURN +* +* End of CGEBD2 +* + END diff --git a/costa/native/external/lapack/cgebrd.f b/costa/native/external/lapack/cgebrd.f new file mode 100644 index 000000000..058da28c6 --- /dev/null +++ b/costa/native/external/lapack/cgebrd.f @@ -0,0 +1,270 @@ + SUBROUTINE CGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + REAL D( * ), E( * ) + COMPLEX A( LDA, * ), TAUP( * ), TAUQ( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGEBRD reduces a general complex M-by-N matrix A to upper or lower +* bidiagonal form B by a unitary transformation: Q**H * A * P = B. +* +* If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows in the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns in the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the M-by-N general matrix to be reduced. +* On exit, +* if m >= n, the diagonal and the first superdiagonal are +* overwritten with the upper bidiagonal matrix B; the +* elements below the diagonal, with the array TAUQ, represent +* the unitary matrix Q as a product of elementary +* reflectors, and the elements above the first superdiagonal, +* with the array TAUP, represent the unitary matrix P as +* a product of elementary reflectors; +* if m < n, the diagonal and the first subdiagonal are +* overwritten with the lower bidiagonal matrix B; the +* elements below the first subdiagonal, with the array TAUQ, +* represent the unitary matrix Q as a product of +* elementary reflectors, and the elements above the diagonal, +* with the array TAUP, represent the unitary matrix P as +* a product of elementary reflectors. +* See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* D (output) REAL array, dimension (min(M,N)) +* The diagonal elements of the bidiagonal matrix B: +* D(i) = A(i,i). +* +* E (output) REAL array, dimension (min(M,N)-1) +* The off-diagonal elements of the bidiagonal matrix B: +* if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; +* if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. +* +* TAUQ (output) COMPLEX array dimension (min(M,N)) +* The scalar factors of the elementary reflectors which +* represent the unitary matrix Q. See Further Details. +* +* TAUP (output) COMPLEX array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors which +* represent the unitary matrix P. See Further Details. +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of the array WORK. LWORK >= max(1,M,N). +* For optimum performance LWORK >= (M+N)*NB, where NB +* is the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The matrices Q and P are represented as products of elementary +* reflectors: +* +* If m >= n, +* +* Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) +* +* Each H(i) and G(i) has the form: +* +* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' +* +* where tauq and taup are complex scalars, and v and u are complex +* vectors; v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in +* A(i+1:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in +* A(i,i+2:n); tauq is stored in TAUQ(i) and taup in TAUP(i). +* +* If m < n, +* +* Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) +* +* Each H(i) and G(i) has the form: +* +* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' +* +* where tauq and taup are complex scalars, and v and u are complex +* vectors; v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in +* A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in +* A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). +* +* The contents of A on exit are illustrated by the following examples: +* +* m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): +* +* ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) +* ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) +* ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) +* ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) +* ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) +* ( v1 v2 v3 v4 v5 ) +* +* where d and e denote diagonal and off-diagonal elements of B, vi +* denotes an element of the vector defining H(i), and ui an element of +* the vector defining G(i). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IINFO, J, LDWRKX, LDWRKY, LWKOPT, MINMN, NB, + $ NBMIN, NX + REAL WS +* .. +* .. External Subroutines .. + EXTERNAL CGEBD2, CGEMM, CLABRD, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, REAL +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NB = MAX( 1, ILAENV( 1, 'CGEBRD', ' ', M, N, -1, -1 ) ) + LWKOPT = ( M+N )*NB + WORK( 1 ) = REAL( LWKOPT ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, M, N ) .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF + IF( INFO.LT.0 ) THEN + CALL XERBLA( 'CGEBRD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + MINMN = MIN( M, N ) + IF( MINMN.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + WS = MAX( M, N ) + LDWRKX = M + LDWRKY = N +* + IF( NB.GT.1 .AND. NB.LT.MINMN ) THEN +* +* Set the crossover point NX. +* + NX = MAX( NB, ILAENV( 3, 'CGEBRD', ' ', M, N, -1, -1 ) ) +* +* Determine when to switch from blocked to unblocked code. +* + IF( NX.LT.MINMN ) THEN + WS = ( M+N )*NB + IF( LWORK.LT.WS ) THEN +* +* Not enough work space for the optimal NB, consider using +* a smaller block size. +* + NBMIN = ILAENV( 2, 'CGEBRD', ' ', M, N, -1, -1 ) + IF( LWORK.GE.( M+N )*NBMIN ) THEN + NB = LWORK / ( M+N ) + ELSE + NB = 1 + NX = MINMN + END IF + END IF + END IF + ELSE + NX = MINMN + END IF +* + DO 30 I = 1, MINMN - NX, NB +* +* Reduce rows and columns i:i+ib-1 to bidiagonal form and return +* the matrices X and Y which are needed to update the unreduced +* part of the matrix +* + CALL CLABRD( M-I+1, N-I+1, NB, A( I, I ), LDA, D( I ), E( I ), + $ TAUQ( I ), TAUP( I ), WORK, LDWRKX, + $ WORK( LDWRKX*NB+1 ), LDWRKY ) +* +* Update the trailing submatrix A(i+ib:m,i+ib:n), using +* an update of the form A := A - V*Y' - X*U' +* + CALL CGEMM( 'No transpose', 'Conjugate transpose', M-I-NB+1, + $ N-I-NB+1, NB, -ONE, A( I+NB, I ), LDA, + $ WORK( LDWRKX*NB+NB+1 ), LDWRKY, ONE, + $ A( I+NB, I+NB ), LDA ) + CALL CGEMM( 'No transpose', 'No transpose', M-I-NB+1, N-I-NB+1, + $ NB, -ONE, WORK( NB+1 ), LDWRKX, A( I, I+NB ), LDA, + $ ONE, A( I+NB, I+NB ), LDA ) +* +* Copy diagonal and off-diagonal elements of B back into A +* + IF( M.GE.N ) THEN + DO 10 J = I, I + NB - 1 + A( J, J ) = D( J ) + A( J, J+1 ) = E( J ) + 10 CONTINUE + ELSE + DO 20 J = I, I + NB - 1 + A( J, J ) = D( J ) + A( J+1, J ) = E( J ) + 20 CONTINUE + END IF + 30 CONTINUE +* +* Use unblocked code to reduce the remainder of the matrix +* + CALL CGEBD2( M-I+1, N-I+1, A( I, I ), LDA, D( I ), E( I ), + $ TAUQ( I ), TAUP( I ), WORK, IINFO ) + WORK( 1 ) = WS + RETURN +* +* End of CGEBRD +* + END diff --git a/costa/native/external/lapack/cgecon.f b/costa/native/external/lapack/cgecon.f new file mode 100644 index 000000000..ff7133ffc --- /dev/null +++ b/costa/native/external/lapack/cgecon.f @@ -0,0 +1,189 @@ + SUBROUTINE CGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, RWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER INFO, LDA, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + REAL RWORK( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGECON estimates the reciprocal of the condition number of a general +* complex matrix A, in either the 1-norm or the infinity-norm, using +* the LU factorization computed by CGETRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as +* RCOND = 1 / ( norm(A) * norm(inv(A)) ). +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies whether the 1-norm condition number or the +* infinity-norm condition number is required: +* = '1' or 'O': 1-norm; +* = 'I': Infinity-norm. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input) COMPLEX array, dimension (LDA,N) +* The factors L and U from the factorization A = P*L*U +* as computed by CGETRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* ANORM (input) REAL +* If NORM = '1' or 'O', the 1-norm of the original matrix A. +* If NORM = 'I', the infinity-norm of the original matrix A. +* +* RCOND (output) REAL +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(norm(A) * norm(inv(A))). +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* RWORK (workspace) REAL array, dimension (2*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ONENRM + CHARACTER NORMIN + INTEGER IX, KASE, KASE1 + REAL AINVNM, SCALE, SL, SMLNUM, SU + COMPLEX ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + REAL SLAMCH + EXTERNAL LSAME, ICAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CLACON, CLATRS, CSRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGECON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = SLAMCH( 'Safe minimum' ) +* +* Estimate the norm of inv(A). +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 10 CONTINUE + CALL CLACON( N, WORK( N+1 ), WORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(L). +* + CALL CLATRS( 'Lower', 'No transpose', 'Unit', NORMIN, N, A, + $ LDA, WORK, SL, RWORK, INFO ) +* +* Multiply by inv(U). +* + CALL CLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ A, LDA, WORK, SU, RWORK( N+1 ), INFO ) + ELSE +* +* Multiply by inv(U'). +* + CALL CLATRS( 'Upper', 'Conjugate transpose', 'Non-unit', + $ NORMIN, N, A, LDA, WORK, SU, RWORK( N+1 ), + $ INFO ) +* +* Multiply by inv(L'). +* + CALL CLATRS( 'Lower', 'Conjugate transpose', 'Unit', NORMIN, + $ N, A, LDA, WORK, SL, RWORK, INFO ) + END IF +* +* Divide X by 1/(SL*SU) if doing so will not cause overflow. +* + SCALE = SL*SU + NORMIN = 'Y' + IF( SCALE.NE.ONE ) THEN + IX = ICAMAX( N, WORK, 1 ) + IF( SCALE.LT.CABS1( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL CSRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE + RETURN +* +* End of CGECON +* + END diff --git a/costa/native/external/lapack/cgeequ.f b/costa/native/external/lapack/cgeequ.f new file mode 100644 index 000000000..5d59b6cd6 --- /dev/null +++ b/costa/native/external/lapack/cgeequ.f @@ -0,0 +1,234 @@ + SUBROUTINE CGEEQU( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N + REAL AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. + REAL C( * ), R( * ) + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CGEEQU computes row and column scalings intended to equilibrate an +* M-by-N matrix A and reduce its condition number. R returns the row +* scale factors and C the column scale factors, chosen to try to make +* the largest element in each row and column of the matrix B with +* elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. +* +* R(i) and C(j) are restricted to be between SMLNUM = smallest safe +* number and BIGNUM = largest safe number. Use of these scaling +* factors is not guaranteed to reduce the condition number of A but +* works well in practice. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input) COMPLEX array, dimension (LDA,N) +* The M-by-N matrix whose equilibration factors are +* to be computed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* R (output) REAL array, dimension (M) +* If INFO = 0 or INFO > M, R contains the row scale factors +* for A. +* +* C (output) REAL array, dimension (N) +* If INFO = 0, C contains the column scale factors for A. +* +* ROWCND (output) REAL +* If INFO = 0 or INFO > M, ROWCND contains the ratio of the +* smallest R(i) to the largest R(i). If ROWCND >= 0.1 and +* AMAX is neither too large nor too small, it is not worth +* scaling by R. +* +* COLCND (output) REAL +* If INFO = 0, COLCND contains the ratio of the smallest +* C(i) to the largest C(i). If COLCND >= 0.1, it is not +* worth scaling by C. +* +* AMAX (output) REAL +* Absolute value of largest matrix element. If AMAX is very +* close to overflow or very close to underflow, the matrix +* should be scaled. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is +* <= M: the i-th row of A is exactly zero +* > M: the (i-M)-th column of A is exactly zero +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL BIGNUM, RCMAX, RCMIN, SMLNUM + COMPLEX ZDUM +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, MIN, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEEQU', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + ROWCND = ONE + COLCND = ONE + AMAX = ZERO + RETURN + END IF +* +* Get machine constants. +* + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM +* +* Compute row scale factors. +* + DO 10 I = 1, M + R( I ) = ZERO + 10 CONTINUE +* +* Find the maximum element in each row. +* + DO 30 J = 1, N + DO 20 I = 1, M + R( I ) = MAX( R( I ), CABS1( A( I, J ) ) ) + 20 CONTINUE + 30 CONTINUE +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 40 I = 1, M + RCMAX = MAX( RCMAX, R( I ) ) + RCMIN = MIN( RCMIN, R( I ) ) + 40 CONTINUE + AMAX = RCMAX +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 50 I = 1, M + IF( R( I ).EQ.ZERO ) THEN + INFO = I + RETURN + END IF + 50 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 60 I = 1, M + R( I ) = ONE / MIN( MAX( R( I ), SMLNUM ), BIGNUM ) + 60 CONTINUE +* +* Compute ROWCND = min(R(I)) / max(R(I)) +* + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* +* Compute column scale factors +* + DO 70 J = 1, N + C( J ) = ZERO + 70 CONTINUE +* +* Find the maximum element in each column, +* assuming the row scaling computed above. +* + DO 90 J = 1, N + DO 80 I = 1, M + C( J ) = MAX( C( J ), CABS1( A( I, J ) )*R( I ) ) + 80 CONTINUE + 90 CONTINUE +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 100 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 100 CONTINUE +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 110 J = 1, N + IF( C( J ).EQ.ZERO ) THEN + INFO = M + J + RETURN + END IF + 110 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 120 J = 1, N + C( J ) = ONE / MIN( MAX( C( J ), SMLNUM ), BIGNUM ) + 120 CONTINUE +* +* Compute COLCND = min(C(J)) / max(C(J)) +* + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* + RETURN +* +* End of CGEEQU +* + END diff --git a/costa/native/external/lapack/cgees.f b/costa/native/external/lapack/cgees.f new file mode 100644 index 000000000..a081854bd --- /dev/null +++ b/costa/native/external/lapack/cgees.f @@ -0,0 +1,322 @@ + SUBROUTINE CGEES( JOBVS, SORT, SELECT, N, A, LDA, SDIM, W, VS, + $ LDVS, WORK, LWORK, RWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBVS, SORT + INTEGER INFO, LDA, LDVS, LWORK, N, SDIM +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + REAL RWORK( * ) + COMPLEX A( LDA, * ), VS( LDVS, * ), W( * ), WORK( * ) +* .. +* .. Function Arguments .. + LOGICAL SELECT + EXTERNAL SELECT +* .. +* +* Purpose +* ======= +* +* CGEES computes for an N-by-N complex nonsymmetric matrix A, the +* eigenvalues, the Schur form T, and, optionally, the matrix of Schur +* vectors Z. This gives the Schur factorization A = Z*T*(Z**H). +* +* Optionally, it also orders the eigenvalues on the diagonal of the +* Schur form so that selected eigenvalues are at the top left. +* The leading columns of Z then form an orthonormal basis for the +* invariant subspace corresponding to the selected eigenvalues. + +* A complex matrix is in Schur form if it is upper triangular. +* +* Arguments +* ========= +* +* JOBVS (input) CHARACTER*1 +* = 'N': Schur vectors are not computed; +* = 'V': Schur vectors are computed. +* +* SORT (input) CHARACTER*1 +* Specifies whether or not to order the eigenvalues on the +* diagonal of the Schur form. +* = 'N': Eigenvalues are not ordered: +* = 'S': Eigenvalues are ordered (see SELECT). +* +* SELECT (input) LOGICAL FUNCTION of one COMPLEX argument +* SELECT must be declared EXTERNAL in the calling subroutine. +* If SORT = 'S', SELECT is used to select eigenvalues to order +* to the top left of the Schur form. +* IF SORT = 'N', SELECT is not referenced. +* The eigenvalue W(j) is selected if SELECT(W(j)) is true. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the N-by-N matrix A. +* On exit, A has been overwritten by its Schur form T. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* SDIM (output) INTEGER +* If SORT = 'N', SDIM = 0. +* If SORT = 'S', SDIM = number of eigenvalues for which +* SELECT is true. +* +* W (output) COMPLEX array, dimension (N) +* W contains the computed eigenvalues, in the same order that +* they appear on the diagonal of the output Schur form T. +* +* VS (output) COMPLEX array, dimension (LDVS,N) +* If JOBVS = 'V', VS contains the unitary matrix Z of Schur +* vectors. +* If JOBVS = 'N', VS is not referenced. +* +* LDVS (input) INTEGER +* The leading dimension of the array VS. LDVS >= 1; if +* JOBVS = 'V', LDVS >= N. +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,2*N). +* For good performance, LWORK must generally be larger. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) REAL array, dimension (N) +* +* BWORK (workspace) LOGICAL array, dimension (N) +* Not referenced if SORT = 'N'. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, and i is +* <= N: the QR algorithm failed to compute all the +* eigenvalues; elements 1:ILO-1 and i+1:N of W +* contain those eigenvalues which have converged; +* if JOBVS = 'V', VS contains the matrix which +* reduces A to its partially converged Schur form. +* = N+1: the eigenvalues could not be reordered because +* some eigenvalues were too close to separate (the +* problem is very ill-conditioned); +* = N+2: after reordering, roundoff changed values of +* some complex eigenvalues so that leading +* eigenvalues in the Schur form no longer satisfy +* SELECT = .TRUE.. This could also be caused by +* underflow due to scaling. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, SCALEA, WANTST, WANTVS + INTEGER HSWORK, I, IBAL, ICOND, IERR, IEVAL, IHI, ILO, + $ ITAU, IWRK, K, MAXB, MAXWRK, MINWRK + REAL ANRM, BIGNUM, CSCALE, EPS, S, SEP, SMLNUM +* .. +* .. Local Arrays .. + REAL DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGEBAK, CGEBAL, CGEHRD, CHSEQR, CLACPY, + $ CLASCL, CTRSEN, CUNGHR, SLABAD, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL CLANGE, SLAMCH + EXTERNAL LSAME, ILAENV, CLANGE, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + WANTVS = LSAME( JOBVS, 'V' ) + WANTST = LSAME( SORT, 'S' ) + IF( ( .NOT.WANTVS ) .AND. ( .NOT.LSAME( JOBVS, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDVS.LT.1 .OR. ( WANTVS .AND. LDVS.LT.N ) ) THEN + INFO = -10 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* CWorkspace refers to complex workspace, and RWorkspace to real +* workspace. NB refers to the optimal block size for the +* immediately following subroutine, as returned by ILAENV. +* HSWORK refers to the workspace preferred by CHSEQR, as +* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, +* the worst case.) +* + MINWRK = 1 + IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN + MAXWRK = N + N*ILAENV( 1, 'CGEHRD', ' ', N, 1, N, 0 ) + MINWRK = MAX( 1, 2*N ) + IF( .NOT.WANTVS ) THEN + MAXB = MAX( ILAENV( 8, 'CHSEQR', 'SN', N, 1, N, -1 ), 2 ) + K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'CHSEQR', 'SN', N, 1, + $ N, -1 ) ) ) + HSWORK = MAX( K*( K+2 ), 2*N ) + MAXWRK = MAX( MAXWRK, HSWORK, 1 ) + ELSE + MAXWRK = MAX( MAXWRK, N+( N-1 )* + $ ILAENV( 1, 'CUNGHR', ' ', N, 1, N, -1 ) ) + MAXB = MAX( ILAENV( 8, 'CHSEQR', 'EN', N, 1, N, -1 ), 2 ) + K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'CHSEQR', 'EN', N, 1, + $ N, -1 ) ) ) + HSWORK = MAX( K*( K+2 ), 2*N ) + MAXWRK = MAX( MAXWRK, HSWORK, 1 ) + END IF + WORK( 1 ) = MAXWRK + END IF + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEES ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SDIM = 0 + RETURN + END IF +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = CLANGE( 'M', N, N, A, LDA, DUM ) + SCALEA = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + SCALEA = .TRUE. + CSCALE = SMLNUM + ELSE IF( ANRM.GT.BIGNUM ) THEN + SCALEA = .TRUE. + CSCALE = BIGNUM + END IF + IF( SCALEA ) + $ CALL CLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR ) +* +* Permute the matrix to make it more nearly triangular +* (CWorkspace: none) +* (RWorkspace: need N) +* + IBAL = 1 + CALL CGEBAL( 'P', N, A, LDA, ILO, IHI, RWORK( IBAL ), IERR ) +* +* Reduce to upper Hessenberg form +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: none) +* + ITAU = 1 + IWRK = N + ITAU + CALL CGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* + IF( WANTVS ) THEN +* +* Copy Householder vectors to VS +* + CALL CLACPY( 'L', N, N, A, LDA, VS, LDVS ) +* +* Generate unitary matrix in VS +* (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) +* (RWorkspace: none) +* + CALL CUNGHR( N, ILO, IHI, VS, LDVS, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) + END IF +* + SDIM = 0 +* +* Perform QR iteration, accumulating Schur vectors in VS if desired +* (CWorkspace: need 1, prefer HSWORK (see comments) ) +* (RWorkspace: none) +* + IWRK = ITAU + CALL CHSEQR( 'S', JOBVS, N, ILO, IHI, A, LDA, W, VS, LDVS, + $ WORK( IWRK ), LWORK-IWRK+1, IEVAL ) + IF( IEVAL.GT.0 ) + $ INFO = IEVAL +* +* Sort eigenvalues if desired +* + IF( WANTST .AND. INFO.EQ.0 ) THEN + IF( SCALEA ) + $ CALL CLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, W, N, IERR ) + DO 10 I = 1, N + BWORK( I ) = SELECT( W( I ) ) + 10 CONTINUE +* +* Reorder eigenvalues and transform Schur vectors +* (CWorkspace: none) +* (RWorkspace: none) +* + CALL CTRSEN( 'N', JOBVS, BWORK, N, A, LDA, VS, LDVS, W, SDIM, + $ S, SEP, WORK( IWRK ), LWORK-IWRK+1, ICOND ) + END IF +* + IF( WANTVS ) THEN +* +* Undo balancing +* (CWorkspace: none) +* (RWorkspace: need N) +* + CALL CGEBAK( 'P', 'R', N, ILO, IHI, RWORK( IBAL ), N, VS, LDVS, + $ IERR ) + END IF +* + IF( SCALEA ) THEN +* +* Undo scaling for the Schur form of A +* + CALL CLASCL( 'U', 0, 0, CSCALE, ANRM, N, N, A, LDA, IERR ) + CALL CCOPY( N, A, LDA+1, W, 1 ) + END IF +* + WORK( 1 ) = MAXWRK + RETURN +* +* End of CGEES +* + END diff --git a/costa/native/external/lapack/cgeesx.f b/costa/native/external/lapack/cgeesx.f new file mode 100644 index 000000000..a096e9f59 --- /dev/null +++ b/costa/native/external/lapack/cgeesx.f @@ -0,0 +1,371 @@ + SUBROUTINE CGEESX( JOBVS, SORT, SELECT, SENSE, N, A, LDA, SDIM, W, + $ VS, LDVS, RCONDE, RCONDV, WORK, LWORK, RWORK, + $ BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBVS, SENSE, SORT + INTEGER INFO, LDA, LDVS, LWORK, N, SDIM + REAL RCONDE, RCONDV +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + REAL RWORK( * ) + COMPLEX A( LDA, * ), VS( LDVS, * ), W( * ), WORK( * ) +* .. +* .. Function Arguments .. + LOGICAL SELECT + EXTERNAL SELECT +* .. +* +* Purpose +* ======= +* +* CGEESX computes for an N-by-N complex nonsymmetric matrix A, the +* eigenvalues, the Schur form T, and, optionally, the matrix of Schur +* vectors Z. This gives the Schur factorization A = Z*T*(Z**H). +* +* Optionally, it also orders the eigenvalues on the diagonal of the +* Schur form so that selected eigenvalues are at the top left; +* computes a reciprocal condition number for the average of the +* selected eigenvalues (RCONDE); and computes a reciprocal condition +* number for the right invariant subspace corresponding to the +* selected eigenvalues (RCONDV). The leading columns of Z form an +* orthonormal basis for this invariant subspace. +* +* For further explanation of the reciprocal condition numbers RCONDE +* and RCONDV, see Section 4.10 of the LAPACK Users' Guide (where +* these quantities are called s and sep respectively). +* +* A complex matrix is in Schur form if it is upper triangular. +* +* Arguments +* ========= +* +* JOBVS (input) CHARACTER*1 +* = 'N': Schur vectors are not computed; +* = 'V': Schur vectors are computed. +* +* SORT (input) CHARACTER*1 +* Specifies whether or not to order the eigenvalues on the +* diagonal of the Schur form. +* = 'N': Eigenvalues are not ordered; +* = 'S': Eigenvalues are ordered (see SELECT). +* +* SELECT (input) LOGICAL FUNCTION of one COMPLEX argument +* SELECT must be declared EXTERNAL in the calling subroutine. +* If SORT = 'S', SELECT is used to select eigenvalues to order +* to the top left of the Schur form. +* If SORT = 'N', SELECT is not referenced. +* An eigenvalue W(j) is selected if SELECT(W(j)) is true. +* +* SENSE (input) CHARACTER*1 +* Determines which reciprocal condition numbers are computed. +* = 'N': None are computed; +* = 'E': Computed for average of selected eigenvalues only; +* = 'V': Computed for selected right invariant subspace only; +* = 'B': Computed for both. +* If SENSE = 'E', 'V' or 'B', SORT must equal 'S'. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA, N) +* On entry, the N-by-N matrix A. +* On exit, A is overwritten by its Schur form T. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* SDIM (output) INTEGER +* If SORT = 'N', SDIM = 0. +* If SORT = 'S', SDIM = number of eigenvalues for which +* SELECT is true. +* +* W (output) COMPLEX array, dimension (N) +* W contains the computed eigenvalues, in the same order +* that they appear on the diagonal of the output Schur form T. +* +* VS (output) COMPLEX array, dimension (LDVS,N) +* If JOBVS = 'V', VS contains the unitary matrix Z of Schur +* vectors. +* If JOBVS = 'N', VS is not referenced. +* +* LDVS (input) INTEGER +* The leading dimension of the array VS. LDVS >= 1, and if +* JOBVS = 'V', LDVS >= N. +* +* RCONDE (output) REAL +* If SENSE = 'E' or 'B', RCONDE contains the reciprocal +* condition number for the average of the selected eigenvalues. +* Not referenced if SENSE = 'N' or 'V'. +* +* RCONDV (output) REAL +* If SENSE = 'V' or 'B', RCONDV contains the reciprocal +* condition number for the selected right invariant subspace. +* Not referenced if SENSE = 'N' or 'E'. +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,2*N). +* Also, if SENSE = 'E' or 'V' or 'B', LWORK >= 2*SDIM*(N-SDIM), +* where SDIM is the number of selected eigenvalues computed by +* this routine. Note that 2*SDIM*(N-SDIM) <= N*N/2. +* For good performance, LWORK must generally be larger. +* +* RWORK (workspace) REAL array, dimension (N) +* +* BWORK (workspace) LOGICAL array, dimension (N) +* Not referenced if SORT = 'N'. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, and i is +* <= N: the QR algorithm failed to compute all the +* eigenvalues; elements 1:ILO-1 and i+1:N of W +* contain those eigenvalues which have converged; if +* JOBVS = 'V', VS contains the transformation which +* reduces A to its partially converged Schur form. +* = N+1: the eigenvalues could not be reordered because some +* eigenvalues were too close to separate (the problem +* is very ill-conditioned); +* = N+2: after reordering, roundoff changed values of some +* complex eigenvalues so that leading eigenvalues in +* the Schur form no longer satisfy SELECT=.TRUE. This +* could also be caused by underflow due to scaling. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL SCALEA, WANTSB, WANTSE, WANTSN, WANTST, + $ WANTSV, WANTVS + INTEGER HSWORK, I, IBAL, ICOND, IERR, IEVAL, IHI, ILO, + $ ITAU, IWRK, K, MAXB, MAXWRK, MINWRK + REAL ANRM, BIGNUM, CSCALE, EPS, SMLNUM +* .. +* .. Local Arrays .. + REAL DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGEBAK, CGEBAL, CGEHRD, CHSEQR, CLACPY, + $ CLASCL, CTRSEN, CUNGHR, SLABAD, SLASCL, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL CLANGE, SLAMCH + EXTERNAL LSAME, ILAENV, CLANGE, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + WANTVS = LSAME( JOBVS, 'V' ) + WANTST = LSAME( SORT, 'S' ) + WANTSN = LSAME( SENSE, 'N' ) + WANTSE = LSAME( SENSE, 'E' ) + WANTSV = LSAME( SENSE, 'V' ) + WANTSB = LSAME( SENSE, 'B' ) + IF( ( .NOT.WANTVS ) .AND. ( .NOT.LSAME( JOBVS, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( WANTSN .OR. WANTSE .OR. WANTSV .OR. WANTSB ) .OR. + $ ( .NOT.WANTST .AND. .NOT.WANTSN ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVS.LT.1 .OR. ( WANTVS .AND. LDVS.LT.N ) ) THEN + INFO = -11 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of real workspace needed at that point in the +* code, as well as the preferred amount for good performance. +* CWorkspace refers to complex workspace, and RWorkspace to real +* workspace. NB refers to the optimal block size for the +* immediately following subroutine, as returned by ILAENV. +* HSWORK refers to the workspace preferred by CHSEQR, as +* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, +* the worst case. +* If SENSE = 'E', 'V' or 'B', then the amount of workspace needed +* depends on SDIM, which is computed by the routine CTRSEN later +* in the code.) +* + MINWRK = 1 + IF( INFO.EQ.0 .AND. ( LWORK.GE.1 ) ) THEN + MAXWRK = N + N*ILAENV( 1, 'CGEHRD', ' ', N, 1, N, 0 ) + MINWRK = MAX( 1, 2*N ) + IF( .NOT.WANTVS ) THEN + MAXB = MAX( ILAENV( 8, 'CHSEQR', 'SN', N, 1, N, -1 ), 2 ) + K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'CHSEQR', 'SN', N, 1, + $ N, -1 ) ) ) + HSWORK = MAX( K*( K+2 ), 2*N ) + MAXWRK = MAX( MAXWRK, HSWORK, 1 ) + ELSE + MAXWRK = MAX( MAXWRK, N+( N-1 )* + $ ILAENV( 1, 'CUNGHR', ' ', N, 1, N, -1 ) ) + MAXB = MAX( ILAENV( 8, 'CHSEQR', 'SV', N, 1, N, -1 ), 2 ) + K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'CHSEQR', 'SV', N, 1, + $ N, -1 ) ) ) + HSWORK = MAX( K*( K+2 ), 2*N ) + MAXWRK = MAX( MAXWRK, HSWORK, 1 ) + END IF + WORK( 1 ) = MAXWRK + END IF + IF( LWORK.LT.MINWRK ) THEN + INFO = -15 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEESX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SDIM = 0 + RETURN + END IF +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = CLANGE( 'M', N, N, A, LDA, DUM ) + SCALEA = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + SCALEA = .TRUE. + CSCALE = SMLNUM + ELSE IF( ANRM.GT.BIGNUM ) THEN + SCALEA = .TRUE. + CSCALE = BIGNUM + END IF + IF( SCALEA ) + $ CALL CLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR ) +* +* +* Permute the matrix to make it more nearly triangular +* (CWorkspace: none) +* (RWorkspace: need N) +* + IBAL = 1 + CALL CGEBAL( 'P', N, A, LDA, ILO, IHI, RWORK( IBAL ), IERR ) +* +* Reduce to upper Hessenberg form +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: none) +* + ITAU = 1 + IWRK = N + ITAU + CALL CGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* + IF( WANTVS ) THEN +* +* Copy Householder vectors to VS +* + CALL CLACPY( 'L', N, N, A, LDA, VS, LDVS ) +* +* Generate unitary matrix in VS +* (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) +* (RWorkspace: none) +* + CALL CUNGHR( N, ILO, IHI, VS, LDVS, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) + END IF +* + SDIM = 0 +* +* Perform QR iteration, accumulating Schur vectors in VS if desired +* (CWorkspace: need 1, prefer HSWORK (see comments) ) +* (RWorkspace: none) +* + IWRK = ITAU + CALL CHSEQR( 'S', JOBVS, N, ILO, IHI, A, LDA, W, VS, LDVS, + $ WORK( IWRK ), LWORK-IWRK+1, IEVAL ) + IF( IEVAL.GT.0 ) + $ INFO = IEVAL +* +* Sort eigenvalues if desired +* + IF( WANTST .AND. INFO.EQ.0 ) THEN + IF( SCALEA ) + $ CALL CLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, W, N, IERR ) + DO 10 I = 1, N + BWORK( I ) = SELECT( W( I ) ) + 10 CONTINUE +* +* Reorder eigenvalues, transform Schur vectors, and compute +* reciprocal condition numbers +* (CWorkspace: if SENSE is not 'N', need 2*SDIM*(N-SDIM) +* otherwise, need none ) +* (RWorkspace: none) +* + CALL CTRSEN( SENSE, JOBVS, BWORK, N, A, LDA, VS, LDVS, W, SDIM, + $ RCONDE, RCONDV, WORK( IWRK ), LWORK-IWRK+1, + $ ICOND ) + IF( .NOT.WANTSN ) + $ MAXWRK = MAX( MAXWRK, 2*SDIM*( N-SDIM ) ) + IF( ICOND.EQ.-14 ) THEN +* +* Not enough complex workspace +* + INFO = -15 + END IF + END IF +* + IF( WANTVS ) THEN +* +* Undo balancing +* (CWorkspace: none) +* (RWorkspace: need N) +* + CALL CGEBAK( 'P', 'R', N, ILO, IHI, RWORK( IBAL ), N, VS, LDVS, + $ IERR ) + END IF +* + IF( SCALEA ) THEN +* +* Undo scaling for the Schur form of A +* + CALL CLASCL( 'U', 0, 0, CSCALE, ANRM, N, N, A, LDA, IERR ) + CALL CCOPY( N, A, LDA+1, W, 1 ) + IF( ( WANTSV .OR. WANTSB ) .AND. INFO.EQ.0 ) THEN + DUM( 1 ) = RCONDV + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, 1, 1, DUM, 1, IERR ) + RCONDV = DUM( 1 ) + END IF + END IF +* + WORK( 1 ) = MAXWRK + RETURN +* +* End of CGEESX +* + END diff --git a/costa/native/external/lapack/cgeev.f b/costa/native/external/lapack/cgeev.f new file mode 100644 index 000000000..77221d74a --- /dev/null +++ b/costa/native/external/lapack/cgeev.f @@ -0,0 +1,391 @@ + SUBROUTINE CGEEV( JOBVL, JOBVR, N, A, LDA, W, VL, LDVL, VR, LDVR, + $ WORK, LWORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBVL, JOBVR + INTEGER INFO, LDA, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. + REAL RWORK( * ) + COMPLEX A( LDA, * ), VL( LDVL, * ), VR( LDVR, * ), + $ W( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGEEV computes for an N-by-N complex nonsymmetric matrix A, the +* eigenvalues and, optionally, the left and/or right eigenvectors. +* +* The right eigenvector v(j) of A satisfies +* A * v(j) = lambda(j) * v(j) +* where lambda(j) is its eigenvalue. +* The left eigenvector u(j) of A satisfies +* u(j)**H * A = lambda(j) * u(j)**H +* where u(j)**H denotes the conjugate transpose of u(j). +* +* The computed eigenvectors are normalized to have Euclidean norm +* equal to 1 and largest component real. +* +* Arguments +* ========= +* +* JOBVL (input) CHARACTER*1 +* = 'N': left eigenvectors of A are not computed; +* = 'V': left eigenvectors of are computed. +* +* JOBVR (input) CHARACTER*1 +* = 'N': right eigenvectors of A are not computed; +* = 'V': right eigenvectors of A are computed. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the N-by-N matrix A. +* On exit, A has been overwritten. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* W (output) COMPLEX array, dimension (N) +* W contains the computed eigenvalues. +* +* VL (output) COMPLEX array, dimension (LDVL,N) +* If JOBVL = 'V', the left eigenvectors u(j) are stored one +* after another in the columns of VL, in the same order +* as their eigenvalues. +* If JOBVL = 'N', VL is not referenced. +* u(j) = VL(:,j), the j-th column of VL. +* +* LDVL (input) INTEGER +* The leading dimension of the array VL. LDVL >= 1; if +* JOBVL = 'V', LDVL >= N. +* +* VR (output) COMPLEX array, dimension (LDVR,N) +* If JOBVR = 'V', the right eigenvectors v(j) are stored one +* after another in the columns of VR, in the same order +* as their eigenvalues. +* If JOBVR = 'N', VR is not referenced. +* v(j) = VR(:,j), the j-th column of VR. +* +* LDVR (input) INTEGER +* The leading dimension of the array VR. LDVR >= 1; if +* JOBVR = 'V', LDVR >= N. +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,2*N). +* For good performance, LWORK must generally be larger. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) REAL array, dimension (2*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, the QR algorithm failed to compute all the +* eigenvalues, and no eigenvectors have been computed; +* elements and i+1:N of W contain eigenvalues which have +* converged. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, SCALEA, WANTVL, WANTVR + CHARACTER SIDE + INTEGER HSWORK, I, IBAL, IERR, IHI, ILO, IRWORK, ITAU, + $ IWRK, K, MAXB, MAXWRK, MINWRK, NOUT + REAL ANRM, BIGNUM, CSCALE, EPS, SCL, SMLNUM + COMPLEX TMP +* .. +* .. Local Arrays .. + LOGICAL SELECT( 1 ) + REAL DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL CGEBAK, CGEBAL, CGEHRD, CHSEQR, CLACPY, CLASCL, + $ CSCAL, CSSCAL, CTREVC, CUNGHR, SLABAD, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV, ISAMAX + REAL CLANGE, SCNRM2, SLAMCH + EXTERNAL LSAME, ILAENV, ISAMAX, CLANGE, SCNRM2, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC AIMAG, CMPLX, CONJG, MAX, MIN, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + WANTVL = LSAME( JOBVL, 'V' ) + WANTVR = LSAME( JOBVR, 'V' ) + IF( ( .NOT.WANTVL ) .AND. ( .NOT.LSAME( JOBVL, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( ( .NOT.WANTVR ) .AND. ( .NOT.LSAME( JOBVR, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDVL.LT.1 .OR. ( WANTVL .AND. LDVL.LT.N ) ) THEN + INFO = -8 + ELSE IF( LDVR.LT.1 .OR. ( WANTVR .AND. LDVR.LT.N ) ) THEN + INFO = -10 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* CWorkspace refers to complex workspace, and RWorkspace to real +* workspace. NB refers to the optimal block size for the +* immediately following subroutine, as returned by ILAENV. +* HSWORK refers to the workspace preferred by CHSEQR, as +* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, +* the worst case.) +* + MINWRK = 1 + IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN + MAXWRK = N + N*ILAENV( 1, 'CGEHRD', ' ', N, 1, N, 0 ) + IF( ( .NOT.WANTVL ) .AND. ( .NOT.WANTVR ) ) THEN + MINWRK = MAX( 1, 2*N ) + MAXB = MAX( ILAENV( 8, 'CHSEQR', 'EN', N, 1, N, -1 ), 2 ) + K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'CHSEQR', 'EN', N, 1, + $ N, -1 ) ) ) + HSWORK = MAX( K*( K+2 ), 2*N ) + MAXWRK = MAX( MAXWRK, HSWORK ) + ELSE + MINWRK = MAX( 1, 2*N ) + MAXWRK = MAX( MAXWRK, N+( N-1 )* + $ ILAENV( 1, 'CUNGHR', ' ', N, 1, N, -1 ) ) + MAXB = MAX( ILAENV( 8, 'CHSEQR', 'SV', N, 1, N, -1 ), 2 ) + K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'CHSEQR', 'SV', N, 1, + $ N, -1 ) ) ) + HSWORK = MAX( K*( K+2 ), 2*N ) + MAXWRK = MAX( MAXWRK, HSWORK, 2*N ) + END IF + WORK( 1 ) = MAXWRK + END IF + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEEV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = CLANGE( 'M', N, N, A, LDA, DUM ) + SCALEA = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + SCALEA = .TRUE. + CSCALE = SMLNUM + ELSE IF( ANRM.GT.BIGNUM ) THEN + SCALEA = .TRUE. + CSCALE = BIGNUM + END IF + IF( SCALEA ) + $ CALL CLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR ) +* +* Balance the matrix +* (CWorkspace: none) +* (RWorkspace: need N) +* + IBAL = 1 + CALL CGEBAL( 'B', N, A, LDA, ILO, IHI, RWORK( IBAL ), IERR ) +* +* Reduce to upper Hessenberg form +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: none) +* + ITAU = 1 + IWRK = ITAU + N + CALL CGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* + IF( WANTVL ) THEN +* +* Want left eigenvectors +* Copy Householder vectors to VL +* + SIDE = 'L' + CALL CLACPY( 'L', N, N, A, LDA, VL, LDVL ) +* +* Generate unitary matrix in VL +* (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) +* (RWorkspace: none) +* + CALL CUNGHR( N, ILO, IHI, VL, LDVL, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* +* Perform QR iteration, accumulating Schur vectors in VL +* (CWorkspace: need 1, prefer HSWORK (see comments) ) +* (RWorkspace: none) +* + IWRK = ITAU + CALL CHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, W, VL, LDVL, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) +* + IF( WANTVR ) THEN +* +* Want left and right eigenvectors +* Copy Schur vectors to VR +* + SIDE = 'B' + CALL CLACPY( 'F', N, N, VL, LDVL, VR, LDVR ) + END IF +* + ELSE IF( WANTVR ) THEN +* +* Want right eigenvectors +* Copy Householder vectors to VR +* + SIDE = 'R' + CALL CLACPY( 'L', N, N, A, LDA, VR, LDVR ) +* +* Generate unitary matrix in VR +* (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) +* (RWorkspace: none) +* + CALL CUNGHR( N, ILO, IHI, VR, LDVR, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* +* Perform QR iteration, accumulating Schur vectors in VR +* (CWorkspace: need 1, prefer HSWORK (see comments) ) +* (RWorkspace: none) +* + IWRK = ITAU + CALL CHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, W, VR, LDVR, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) +* + ELSE +* +* Compute eigenvalues only +* (CWorkspace: need 1, prefer HSWORK (see comments) ) +* (RWorkspace: none) +* + IWRK = ITAU + CALL CHSEQR( 'E', 'N', N, ILO, IHI, A, LDA, W, VR, LDVR, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) + END IF +* +* If INFO > 0 from CHSEQR, then quit +* + IF( INFO.GT.0 ) + $ GO TO 50 +* + IF( WANTVL .OR. WANTVR ) THEN +* +* Compute left and/or right eigenvectors +* (CWorkspace: need 2*N) +* (RWorkspace: need 2*N) +* + IRWORK = IBAL + N + CALL CTREVC( SIDE, 'B', SELECT, N, A, LDA, VL, LDVL, VR, LDVR, + $ N, NOUT, WORK( IWRK ), RWORK( IRWORK ), IERR ) + END IF +* + IF( WANTVL ) THEN +* +* Undo balancing of left eigenvectors +* (CWorkspace: none) +* (RWorkspace: need N) +* + CALL CGEBAK( 'B', 'L', N, ILO, IHI, RWORK( IBAL ), N, VL, LDVL, + $ IERR ) +* +* Normalize left eigenvectors and make largest component real +* + DO 20 I = 1, N + SCL = ONE / SCNRM2( N, VL( 1, I ), 1 ) + CALL CSSCAL( N, SCL, VL( 1, I ), 1 ) + DO 10 K = 1, N + RWORK( IRWORK+K-1 ) = REAL( VL( K, I ) )**2 + + $ AIMAG( VL( K, I ) )**2 + 10 CONTINUE + K = ISAMAX( N, RWORK( IRWORK ), 1 ) + TMP = CONJG( VL( K, I ) ) / SQRT( RWORK( IRWORK+K-1 ) ) + CALL CSCAL( N, TMP, VL( 1, I ), 1 ) + VL( K, I ) = CMPLX( REAL( VL( K, I ) ), ZERO ) + 20 CONTINUE + END IF +* + IF( WANTVR ) THEN +* +* Undo balancing of right eigenvectors +* (CWorkspace: none) +* (RWorkspace: need N) +* + CALL CGEBAK( 'B', 'R', N, ILO, IHI, RWORK( IBAL ), N, VR, LDVR, + $ IERR ) +* +* Normalize right eigenvectors and make largest component real +* + DO 40 I = 1, N + SCL = ONE / SCNRM2( N, VR( 1, I ), 1 ) + CALL CSSCAL( N, SCL, VR( 1, I ), 1 ) + DO 30 K = 1, N + RWORK( IRWORK+K-1 ) = REAL( VR( K, I ) )**2 + + $ AIMAG( VR( K, I ) )**2 + 30 CONTINUE + K = ISAMAX( N, RWORK( IRWORK ), 1 ) + TMP = CONJG( VR( K, I ) ) / SQRT( RWORK( IRWORK+K-1 ) ) + CALL CSCAL( N, TMP, VR( 1, I ), 1 ) + VR( K, I ) = CMPLX( REAL( VR( K, I ) ), ZERO ) + 40 CONTINUE + END IF +* +* Undo scaling if necessary +* + 50 CONTINUE + IF( SCALEA ) THEN + CALL CLASCL( 'G', 0, 0, CSCALE, ANRM, N-INFO, 1, W( INFO+1 ), + $ MAX( N-INFO, 1 ), IERR ) + IF( INFO.GT.0 ) THEN + CALL CLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, W, N, IERR ) + END IF + END IF +* + WORK( 1 ) = MAXWRK + RETURN +* +* End of CGEEV +* + END diff --git a/costa/native/external/lapack/cgeevx.f b/costa/native/external/lapack/cgeevx.f new file mode 100644 index 000000000..4e2e3da64 --- /dev/null +++ b/costa/native/external/lapack/cgeevx.f @@ -0,0 +1,520 @@ + SUBROUTINE CGEEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, W, VL, + $ LDVL, VR, LDVR, ILO, IHI, SCALE, ABNRM, RCONDE, + $ RCONDV, WORK, LWORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER BALANC, JOBVL, JOBVR, SENSE + INTEGER IHI, ILO, INFO, LDA, LDVL, LDVR, LWORK, N + REAL ABNRM +* .. +* .. Array Arguments .. + REAL RCONDE( * ), RCONDV( * ), RWORK( * ), + $ SCALE( * ) + COMPLEX A( LDA, * ), VL( LDVL, * ), VR( LDVR, * ), + $ W( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGEEVX computes for an N-by-N complex nonsymmetric matrix A, the +* eigenvalues and, optionally, the left and/or right eigenvectors. +* +* Optionally also, it computes a balancing transformation to improve +* the conditioning of the eigenvalues and eigenvectors (ILO, IHI, +* SCALE, and ABNRM), reciprocal condition numbers for the eigenvalues +* (RCONDE), and reciprocal condition numbers for the right +* eigenvectors (RCONDV). +* +* The right eigenvector v(j) of A satisfies +* A * v(j) = lambda(j) * v(j) +* where lambda(j) is its eigenvalue. +* The left eigenvector u(j) of A satisfies +* u(j)**H * A = lambda(j) * u(j)**H +* where u(j)**H denotes the conjugate transpose of u(j). +* +* The computed eigenvectors are normalized to have Euclidean norm +* equal to 1 and largest component real. +* +* Balancing a matrix means permuting the rows and columns to make it +* more nearly upper triangular, and applying a diagonal similarity +* transformation D * A * D**(-1), where D is a diagonal matrix, to +* make its rows and columns closer in norm and the condition numbers +* of its eigenvalues and eigenvectors smaller. The computed +* reciprocal condition numbers correspond to the balanced matrix. +* Permuting rows and columns will not change the condition numbers +* (in exact arithmetic) but diagonal scaling will. For further +* explanation of balancing, see section 4.10.2 of the LAPACK +* Users' Guide. +* +* Arguments +* ========= +* +* BALANC (input) CHARACTER*1 +* Indicates how the input matrix should be diagonally scaled +* and/or permuted to improve the conditioning of its +* eigenvalues. +* = 'N': Do not diagonally scale or permute; +* = 'P': Perform permutations to make the matrix more nearly +* upper triangular. Do not diagonally scale; +* = 'S': Diagonally scale the matrix, ie. replace A by +* D*A*D**(-1), where D is a diagonal matrix chosen +* to make the rows and columns of A more equal in +* norm. Do not permute; +* = 'B': Both diagonally scale and permute A. +* +* Computed reciprocal condition numbers will be for the matrix +* after balancing and/or permuting. Permuting does not change +* condition numbers (in exact arithmetic), but balancing does. +* +* JOBVL (input) CHARACTER*1 +* = 'N': left eigenvectors of A are not computed; +* = 'V': left eigenvectors of A are computed. +* If SENSE = 'E' or 'B', JOBVL must = 'V'. +* +* JOBVR (input) CHARACTER*1 +* = 'N': right eigenvectors of A are not computed; +* = 'V': right eigenvectors of A are computed. +* If SENSE = 'E' or 'B', JOBVR must = 'V'. +* +* SENSE (input) CHARACTER*1 +* Determines which reciprocal condition numbers are computed. +* = 'N': None are computed; +* = 'E': Computed for eigenvalues only; +* = 'V': Computed for right eigenvectors only; +* = 'B': Computed for eigenvalues and right eigenvectors. +* +* If SENSE = 'E' or 'B', both left and right eigenvectors +* must also be computed (JOBVL = 'V' and JOBVR = 'V'). +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the N-by-N matrix A. +* On exit, A has been overwritten. If JOBVL = 'V' or +* JOBVR = 'V', A contains the Schur form of the balanced +* version of the matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* W (output) COMPLEX array, dimension (N) +* W contains the computed eigenvalues. +* +* VL (output) COMPLEX array, dimension (LDVL,N) +* If JOBVL = 'V', the left eigenvectors u(j) are stored one +* after another in the columns of VL, in the same order +* as their eigenvalues. +* If JOBVL = 'N', VL is not referenced. +* u(j) = VL(:,j), the j-th column of VL. +* +* LDVL (input) INTEGER +* The leading dimension of the array VL. LDVL >= 1; if +* JOBVL = 'V', LDVL >= N. +* +* VR (output) COMPLEX array, dimension (LDVR,N) +* If JOBVR = 'V', the right eigenvectors v(j) are stored one +* after another in the columns of VR, in the same order +* as their eigenvalues. +* If JOBVR = 'N', VR is not referenced. +* v(j) = VR(:,j), the j-th column of VR. +* +* LDVR (input) INTEGER +* The leading dimension of the array VR. LDVR >= 1; if +* JOBVR = 'V', LDVR >= N. +* +* ILO,IHI (output) INTEGER +* ILO and IHI are integer values determined when A was +* balanced. The balanced A(i,j) = 0 if I > J and +* J = 1,...,ILO-1 or I = IHI+1,...,N. +* +* SCALE (output) REAL array, dimension (N) +* Details of the permutations and scaling factors applied +* when balancing A. If P(j) is the index of the row and column +* interchanged with row and column j, and D(j) is the scaling +* factor applied to row and column j, then +* SCALE(J) = P(J), for J = 1,...,ILO-1 +* = D(J), for J = ILO,...,IHI +* = P(J) for J = IHI+1,...,N. +* The order in which the interchanges are made is N to IHI+1, +* then 1 to ILO-1. +* +* ABNRM (output) REAL +* The one-norm of the balanced matrix (the maximum +* of the sum of absolute values of elements of any column). +* +* RCONDE (output) REAL array, dimension (N) +* RCONDE(j) is the reciprocal condition number of the j-th +* eigenvalue. +* +* RCONDV (output) REAL array, dimension (N) +* RCONDV(j) is the reciprocal condition number of the j-th +* right eigenvector. +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. If SENSE = 'N' or 'E', +* LWORK >= max(1,2*N), and if SENSE = 'V' or 'B', +* LWORK >= N*N+2*N. +* For good performance, LWORK must generally be larger. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) REAL array, dimension (2*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, the QR algorithm failed to compute all the +* eigenvalues, and no eigenvectors or condition numbers +* have been computed; elements 1:ILO-1 and i+1:N of W +* contain eigenvalues which have converged. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, SCALEA, WANTVL, WANTVR, WNTSNB, WNTSNE, + $ WNTSNN, WNTSNV + CHARACTER JOB, SIDE + INTEGER HSWORK, I, ICOND, IERR, ITAU, IWRK, K, MAXB, + $ MAXWRK, MINWRK, NOUT + REAL ANRM, BIGNUM, CSCALE, EPS, SCL, SMLNUM + COMPLEX TMP +* .. +* .. Local Arrays .. + LOGICAL SELECT( 1 ) + REAL DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL CGEBAK, CGEBAL, CGEHRD, CHSEQR, CLACPY, CLASCL, + $ CSCAL, CSSCAL, CTREVC, CTRSNA, CUNGHR, SLABAD, + $ SLASCL, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV, ISAMAX + REAL CLANGE, SCNRM2, SLAMCH + EXTERNAL LSAME, ILAENV, ISAMAX, CLANGE, SCNRM2, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC AIMAG, CMPLX, CONJG, MAX, MIN, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + WANTVL = LSAME( JOBVL, 'V' ) + WANTVR = LSAME( JOBVR, 'V' ) + WNTSNN = LSAME( SENSE, 'N' ) + WNTSNE = LSAME( SENSE, 'E' ) + WNTSNV = LSAME( SENSE, 'V' ) + WNTSNB = LSAME( SENSE, 'B' ) + IF( .NOT.( LSAME( BALANC, 'N' ) .OR. LSAME( BALANC, 'S' ) .OR. + $ LSAME( BALANC, 'P' ) .OR. LSAME( BALANC, 'B' ) ) ) THEN + INFO = -1 + ELSE IF( ( .NOT.WANTVL ) .AND. ( .NOT.LSAME( JOBVL, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( ( .NOT.WANTVR ) .AND. ( .NOT.LSAME( JOBVR, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( .NOT.( WNTSNN .OR. WNTSNE .OR. WNTSNB .OR. WNTSNV ) .OR. + $ ( ( WNTSNE .OR. WNTSNB ) .AND. .NOT.( WANTVL .AND. + $ WANTVR ) ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVL.LT.1 .OR. ( WANTVL .AND. LDVL.LT.N ) ) THEN + INFO = -10 + ELSE IF( LDVR.LT.1 .OR. ( WANTVR .AND. LDVR.LT.N ) ) THEN + INFO = -12 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* CWorkspace refers to complex workspace, and RWorkspace to real +* workspace. NB refers to the optimal block size for the +* immediately following subroutine, as returned by ILAENV. +* HSWORK refers to the workspace preferred by CHSEQR, as +* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, +* the worst case.) +* + MINWRK = 1 + IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN + MAXWRK = N + N*ILAENV( 1, 'CGEHRD', ' ', N, 1, N, 0 ) + IF( ( .NOT.WANTVL ) .AND. ( .NOT.WANTVR ) ) THEN + MINWRK = MAX( 1, 2*N ) + IF( .NOT.( WNTSNN .OR. WNTSNE ) ) + $ MINWRK = MAX( MINWRK, N*N+2*N ) + MAXB = MAX( ILAENV( 8, 'CHSEQR', 'SN', N, 1, N, -1 ), 2 ) + IF( WNTSNN ) THEN + K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'CHSEQR', 'EN', N, + $ 1, N, -1 ) ) ) + ELSE + K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'CHSEQR', 'SN', N, + $ 1, N, -1 ) ) ) + END IF + HSWORK = MAX( K*( K+2 ), 2*N ) + MAXWRK = MAX( MAXWRK, 1, HSWORK ) + IF( .NOT.( WNTSNN .OR. WNTSNE ) ) + $ MAXWRK = MAX( MAXWRK, N*N+2*N ) + ELSE + MINWRK = MAX( 1, 2*N ) + IF( .NOT.( WNTSNN .OR. WNTSNE ) ) + $ MINWRK = MAX( MINWRK, N*N+2*N ) + MAXB = MAX( ILAENV( 8, 'CHSEQR', 'SN', N, 1, N, -1 ), 2 ) + K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'CHSEQR', 'EN', N, 1, + $ N, -1 ) ) ) + HSWORK = MAX( K*( K+2 ), 2*N ) + MAXWRK = MAX( MAXWRK, 1, HSWORK ) + MAXWRK = MAX( MAXWRK, N+( N-1 )* + $ ILAENV( 1, 'CUNGHR', ' ', N, 1, N, -1 ) ) + IF( .NOT.( WNTSNN .OR. WNTSNE ) ) + $ MAXWRK = MAX( MAXWRK, N*N+2*N ) + MAXWRK = MAX( MAXWRK, 2*N, 1 ) + END IF + WORK( 1 ) = MAXWRK + END IF + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -20 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEEVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ICOND = 0 + ANRM = CLANGE( 'M', N, N, A, LDA, DUM ) + SCALEA = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + SCALEA = .TRUE. + CSCALE = SMLNUM + ELSE IF( ANRM.GT.BIGNUM ) THEN + SCALEA = .TRUE. + CSCALE = BIGNUM + END IF + IF( SCALEA ) + $ CALL CLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR ) +* +* Balance the matrix and compute ABNRM +* + CALL CGEBAL( BALANC, N, A, LDA, ILO, IHI, SCALE, IERR ) + ABNRM = CLANGE( '1', N, N, A, LDA, DUM ) + IF( SCALEA ) THEN + DUM( 1 ) = ABNRM + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, 1, 1, DUM, 1, IERR ) + ABNRM = DUM( 1 ) + END IF +* +* Reduce to upper Hessenberg form +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: none) +* + ITAU = 1 + IWRK = ITAU + N + CALL CGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* + IF( WANTVL ) THEN +* +* Want left eigenvectors +* Copy Householder vectors to VL +* + SIDE = 'L' + CALL CLACPY( 'L', N, N, A, LDA, VL, LDVL ) +* +* Generate unitary matrix in VL +* (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) +* (RWorkspace: none) +* + CALL CUNGHR( N, ILO, IHI, VL, LDVL, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* +* Perform QR iteration, accumulating Schur vectors in VL +* (CWorkspace: need 1, prefer HSWORK (see comments) ) +* (RWorkspace: none) +* + IWRK = ITAU + CALL CHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, W, VL, LDVL, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) +* + IF( WANTVR ) THEN +* +* Want left and right eigenvectors +* Copy Schur vectors to VR +* + SIDE = 'B' + CALL CLACPY( 'F', N, N, VL, LDVL, VR, LDVR ) + END IF +* + ELSE IF( WANTVR ) THEN +* +* Want right eigenvectors +* Copy Householder vectors to VR +* + SIDE = 'R' + CALL CLACPY( 'L', N, N, A, LDA, VR, LDVR ) +* +* Generate unitary matrix in VR +* (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) +* (RWorkspace: none) +* + CALL CUNGHR( N, ILO, IHI, VR, LDVR, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* +* Perform QR iteration, accumulating Schur vectors in VR +* (CWorkspace: need 1, prefer HSWORK (see comments) ) +* (RWorkspace: none) +* + IWRK = ITAU + CALL CHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, W, VR, LDVR, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) +* + ELSE +* +* Compute eigenvalues only +* If condition numbers desired, compute Schur form +* + IF( WNTSNN ) THEN + JOB = 'E' + ELSE + JOB = 'S' + END IF +* +* (CWorkspace: need 1, prefer HSWORK (see comments) ) +* (RWorkspace: none) +* + IWRK = ITAU + CALL CHSEQR( JOB, 'N', N, ILO, IHI, A, LDA, W, VR, LDVR, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) + END IF +* +* If INFO > 0 from CHSEQR, then quit +* + IF( INFO.GT.0 ) + $ GO TO 50 +* + IF( WANTVL .OR. WANTVR ) THEN +* +* Compute left and/or right eigenvectors +* (CWorkspace: need 2*N) +* (RWorkspace: need N) +* + CALL CTREVC( SIDE, 'B', SELECT, N, A, LDA, VL, LDVL, VR, LDVR, + $ N, NOUT, WORK( IWRK ), RWORK, IERR ) + END IF +* +* Compute condition numbers if desired +* (CWorkspace: need N*N+2*N unless SENSE = 'E') +* (RWorkspace: need 2*N unless SENSE = 'E') +* + IF( .NOT.WNTSNN ) THEN + CALL CTRSNA( SENSE, 'A', SELECT, N, A, LDA, VL, LDVL, VR, LDVR, + $ RCONDE, RCONDV, N, NOUT, WORK( IWRK ), N, RWORK, + $ ICOND ) + END IF +* + IF( WANTVL ) THEN +* +* Undo balancing of left eigenvectors +* + CALL CGEBAK( BALANC, 'L', N, ILO, IHI, SCALE, N, VL, LDVL, + $ IERR ) +* +* Normalize left eigenvectors and make largest component real +* + DO 20 I = 1, N + SCL = ONE / SCNRM2( N, VL( 1, I ), 1 ) + CALL CSSCAL( N, SCL, VL( 1, I ), 1 ) + DO 10 K = 1, N + RWORK( K ) = REAL( VL( K, I ) )**2 + + $ AIMAG( VL( K, I ) )**2 + 10 CONTINUE + K = ISAMAX( N, RWORK, 1 ) + TMP = CONJG( VL( K, I ) ) / SQRT( RWORK( K ) ) + CALL CSCAL( N, TMP, VL( 1, I ), 1 ) + VL( K, I ) = CMPLX( REAL( VL( K, I ) ), ZERO ) + 20 CONTINUE + END IF +* + IF( WANTVR ) THEN +* +* Undo balancing of right eigenvectors +* + CALL CGEBAK( BALANC, 'R', N, ILO, IHI, SCALE, N, VR, LDVR, + $ IERR ) +* +* Normalize right eigenvectors and make largest component real +* + DO 40 I = 1, N + SCL = ONE / SCNRM2( N, VR( 1, I ), 1 ) + CALL CSSCAL( N, SCL, VR( 1, I ), 1 ) + DO 30 K = 1, N + RWORK( K ) = REAL( VR( K, I ) )**2 + + $ AIMAG( VR( K, I ) )**2 + 30 CONTINUE + K = ISAMAX( N, RWORK, 1 ) + TMP = CONJG( VR( K, I ) ) / SQRT( RWORK( K ) ) + CALL CSCAL( N, TMP, VR( 1, I ), 1 ) + VR( K, I ) = CMPLX( REAL( VR( K, I ) ), ZERO ) + 40 CONTINUE + END IF +* +* Undo scaling if necessary +* + 50 CONTINUE + IF( SCALEA ) THEN + CALL CLASCL( 'G', 0, 0, CSCALE, ANRM, N-INFO, 1, W( INFO+1 ), + $ MAX( N-INFO, 1 ), IERR ) + IF( INFO.EQ.0 ) THEN + IF( ( WNTSNV .OR. WNTSNB ) .AND. ICOND.EQ.0 ) + $ CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, RCONDV, N, + $ IERR ) + ELSE + CALL CLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, W, N, IERR ) + END IF + END IF +* + WORK( 1 ) = MAXWRK + RETURN +* +* End of CGEEVX +* + END diff --git a/costa/native/external/lapack/cgegs.f b/costa/native/external/lapack/cgegs.f new file mode 100644 index 000000000..f41a49b6b --- /dev/null +++ b/costa/native/external/lapack/cgegs.f @@ -0,0 +1,442 @@ + SUBROUTINE CGEGS( JOBVSL, JOBVSR, N, A, LDA, B, LDB, ALPHA, BETA, + $ VSL, LDVSL, VSR, LDVSR, WORK, LWORK, RWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBVSL, JOBVSR + INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N +* .. +* .. Array Arguments .. + REAL RWORK( * ) + COMPLEX A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* This routine is deprecated and has been replaced by routine CGGES. +* +* CGEGS computes for a pair of N-by-N complex nonsymmetric matrices A, +* B: the generalized eigenvalues (alpha, beta), the complex Schur +* form (A, B), and optionally left and/or right Schur vectors +* (VSL and VSR). +* +* (If only the generalized eigenvalues are needed, use the driver CGEGV +* instead.) +* +* A generalized eigenvalue for a pair of matrices (A,B) is, roughly +* speaking, a scalar w or a ratio alpha/beta = w, such that A - w*B +* is singular. It is usually represented as the pair (alpha,beta), +* as there is a reasonable interpretation for beta=0, and even for +* both being zero. A good beginning reference is the book, "Matrix +* Computations", by G. Golub & C. van Loan (Johns Hopkins U. Press) +* +* The (generalized) Schur form of a pair of matrices is the result of +* multiplying both matrices on the left by one unitary matrix and +* both on the right by another unitary matrix, these two unitary +* matrices being chosen so as to bring the pair of matrices into +* upper triangular form with the diagonal elements of B being +* non-negative real numbers (this is also called complex Schur form.) +* +* The left and right Schur vectors are the columns of VSL and VSR, +* respectively, where VSL and VSR are the unitary matrices +* which reduce A and B to Schur form: +* +* Schur form of (A,B) = ( (VSL)**H A (VSR), (VSL)**H B (VSR) ) +* +* Arguments +* ========= +* +* JOBVSL (input) CHARACTER*1 +* = 'N': do not compute the left Schur vectors; +* = 'V': compute the left Schur vectors. +* +* JOBVSR (input) CHARACTER*1 +* = 'N': do not compute the right Schur vectors; +* = 'V': compute the right Schur vectors. +* +* N (input) INTEGER +* The order of the matrices A, B, VSL, and VSR. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA, N) +* On entry, the first of the pair of matrices whose generalized +* eigenvalues and (optionally) Schur vectors are to be +* computed. +* On exit, the generalized Schur form of A. +* +* LDA (input) INTEGER +* The leading dimension of A. LDA >= max(1,N). +* +* B (input/output) COMPLEX array, dimension (LDB, N) +* On entry, the second of the pair of matrices whose +* generalized eigenvalues and (optionally) Schur vectors are +* to be computed. +* On exit, the generalized Schur form of B. +* +* LDB (input) INTEGER +* The leading dimension of B. LDB >= max(1,N). +* +* ALPHA (output) COMPLEX array, dimension (N) +* BETA (output) COMPLEX array, dimension (N) +* On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the +* generalized eigenvalues. ALPHA(j), j=1,...,N and BETA(j), +* j=1,...,N are the diagonals of the complex Schur form (A,B) +* output by CGEGS. The BETA(j) will be non-negative real. +* +* Note: the quotients ALPHA(j)/BETA(j) may easily over- or +* underflow, and BETA(j) may even be zero. Thus, the user +* should avoid naively computing the ratio alpha/beta. +* However, ALPHA will be always less than and usually +* comparable with norm(A) in magnitude, and BETA always less +* than and usually comparable with norm(B). +* +* VSL (output) COMPLEX array, dimension (LDVSL,N) +* If JOBVSL = 'V', VSL will contain the left Schur vectors. +* (See "Purpose", above.) +* Not referenced if JOBVSL = 'N'. +* +* LDVSL (input) INTEGER +* The leading dimension of the matrix VSL. LDVSL >= 1, and +* if JOBVSL = 'V', LDVSL >= N. +* +* VSR (output) COMPLEX array, dimension (LDVSR,N) +* If JOBVSR = 'V', VSR will contain the right Schur vectors. +* (See "Purpose", above.) +* Not referenced if JOBVSR = 'N'. +* +* LDVSR (input) INTEGER +* The leading dimension of the matrix VSR. LDVSR >= 1, and +* if JOBVSR = 'V', LDVSR >= N. +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,2*N). +* For good performance, LWORK must generally be larger. +* To compute the optimal value of LWORK, call ILAENV to get +* blocksizes (for CGEQRF, CUNMQR, and CUNGQR.) Then compute: +* NB -- MAX of the blocksizes for CGEQRF, CUNMQR, and CUNGQR; +* the optimal LWORK is N*(NB+1). +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) REAL array, dimension (3*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* =1,...,N: +* The QZ iteration failed. (A,B) are not in Schur +* form, but ALPHA(j) and BETA(j) should be correct for +* j=INFO+1,...,N. +* > N: errors that usually indicate LAPACK problems: +* =N+1: error return from CGGBAL +* =N+2: error return from CGEQRF +* =N+3: error return from CUNMQR +* =N+4: error return from CUNGQR +* =N+5: error return from CGGHRD +* =N+6: error return from CHGEQZ (other than failed +* iteration) +* =N+7: error return from CGGBAK (computing VSL) +* =N+8: error return from CGGBAK (computing VSR) +* =N+9: error return from CLASCL (various places) +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ), + $ CONE = ( 1.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ILASCL, ILBSCL, ILVSL, ILVSR, LQUERY + INTEGER ICOLS, IHI, IINFO, IJOBVL, IJOBVR, ILEFT, + $ ILO, IRIGHT, IROWS, IRWORK, ITAU, IWORK, + $ LOPT, LWKMIN, LWKOPT, NB, NB1, NB2, NB3 + REAL ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, + $ SAFMIN, SMLNUM +* .. +* .. External Subroutines .. + EXTERNAL CGEQRF, CGGBAK, CGGBAL, CGGHRD, CHGEQZ, CLACPY, + $ CLASCL, CLASET, CUNGQR, CUNMQR, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL CLANGE, SLAMCH + EXTERNAL ILAENV, LSAME, CLANGE, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVSL, 'N' ) ) THEN + IJOBVL = 1 + ILVSL = .FALSE. + ELSE IF( LSAME( JOBVSL, 'V' ) ) THEN + IJOBVL = 2 + ILVSL = .TRUE. + ELSE + IJOBVL = -1 + ILVSL = .FALSE. + END IF +* + IF( LSAME( JOBVSR, 'N' ) ) THEN + IJOBVR = 1 + ILVSR = .FALSE. + ELSE IF( LSAME( JOBVSR, 'V' ) ) THEN + IJOBVR = 2 + ILVSR = .TRUE. + ELSE + IJOBVR = -1 + ILVSR = .FALSE. + END IF +* +* Test the input arguments +* + LWKMIN = MAX( 2*N, 1 ) + LWKOPT = LWKMIN + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + INFO = 0 + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVSL.LT.1 .OR. ( ILVSL .AND. LDVSL.LT.N ) ) THEN + INFO = -11 + ELSE IF( LDVSR.LT.1 .OR. ( ILVSR .AND. LDVSR.LT.N ) ) THEN + INFO = -13 + ELSE IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN + INFO = -15 + END IF +* + IF( INFO.EQ.0 ) THEN + NB1 = ILAENV( 1, 'CGEQRF', ' ', N, N, -1, -1 ) + NB2 = ILAENV( 1, 'CUNMQR', ' ', N, N, N, -1 ) + NB3 = ILAENV( 1, 'CUNGQR', ' ', N, N, N, -1 ) + NB = MAX( NB1, NB2, NB3 ) + LOPT = N*(NB+1) + WORK( 1 ) = LOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEGS ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = SLAMCH( 'E' )*SLAMCH( 'B' ) + SAFMIN = SLAMCH( 'S' ) + SMLNUM = N*SAFMIN / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = CLANGE( 'M', N, N, A, LDA, RWORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF +* + IF( ILASCL ) THEN + CALL CLASCL( 'G', -1, -1, ANRM, ANRMTO, N, N, A, LDA, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + END IF +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = CLANGE( 'M', N, N, B, LDB, RWORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF +* + IF( ILBSCL ) THEN + CALL CLASCL( 'G', -1, -1, BNRM, BNRMTO, N, N, B, LDB, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + END IF +* +* Permute the matrix to make it more nearly triangular +* + ILEFT = 1 + IRIGHT = N + 1 + IRWORK = IRIGHT + N + IWORK = 1 + CALL CGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), RWORK( IRWORK ), IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 1 + GO TO 10 + END IF +* +* Reduce B to triangular form, and initialize VSL and/or VSR +* + IROWS = IHI + 1 - ILO + ICOLS = N + 1 - ILO + ITAU = IWORK + IWORK = ITAU + IROWS + CALL CGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWORK ), LWORK+1-IWORK, IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 2 + GO TO 10 + END IF +* + CALL CUNMQR( 'L', 'C', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWORK ), + $ LWORK+1-IWORK, IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 3 + GO TO 10 + END IF +* + IF( ILVSL ) THEN + CALL CLASET( 'Full', N, N, CZERO, CONE, VSL, LDVSL ) + CALL CLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VSL( ILO+1, ILO ), LDVSL ) + CALL CUNGQR( IROWS, IROWS, IROWS, VSL( ILO, ILO ), LDVSL, + $ WORK( ITAU ), WORK( IWORK ), LWORK+1-IWORK, + $ IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 4 + GO TO 10 + END IF + END IF +* + IF( ILVSR ) + $ CALL CLASET( 'Full', N, N, CZERO, CONE, VSR, LDVSR ) +* +* Reduce to generalized Hessenberg form +* + CALL CGGHRD( JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, VSL, + $ LDVSL, VSR, LDVSR, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 5 + GO TO 10 + END IF +* +* Perform QZ algorithm, computing Schur vectors if desired +* + IWORK = ITAU + CALL CHGEQZ( 'S', JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, WORK( IWORK ), + $ LWORK+1-IWORK, RWORK( IRWORK ), IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + IF( IINFO.GT.0 .AND. IINFO.LE.N ) THEN + INFO = IINFO + ELSE IF( IINFO.GT.N .AND. IINFO.LE.2*N ) THEN + INFO = IINFO - N + ELSE + INFO = N + 6 + END IF + GO TO 10 + END IF +* +* Apply permutation to VSL and VSR +* + IF( ILVSL ) THEN + CALL CGGBAK( 'P', 'L', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VSL, LDVSL, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 7 + GO TO 10 + END IF + END IF + IF( ILVSR ) THEN + CALL CGGBAK( 'P', 'R', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VSR, LDVSR, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 8 + GO TO 10 + END IF + END IF +* +* Undo scaling +* + IF( ILASCL ) THEN + CALL CLASCL( 'U', -1, -1, ANRMTO, ANRM, N, N, A, LDA, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + CALL CLASCL( 'G', -1, -1, ANRMTO, ANRM, N, 1, ALPHA, N, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + END IF +* + IF( ILBSCL ) THEN + CALL CLASCL( 'U', -1, -1, BNRMTO, BNRM, N, N, B, LDB, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + CALL CLASCL( 'G', -1, -1, BNRMTO, BNRM, N, 1, BETA, N, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + END IF +* + 10 CONTINUE + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of CGEGS +* + END diff --git a/costa/native/external/lapack/cgegv.f b/costa/native/external/lapack/cgegv.f new file mode 100644 index 000000000..ca4525437 --- /dev/null +++ b/costa/native/external/lapack/cgegv.f @@ -0,0 +1,591 @@ + SUBROUTINE CGEGV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHA, BETA, + $ VL, LDVL, VR, LDVR, WORK, LWORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBVL, JOBVR + INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. + REAL RWORK( * ) + COMPLEX A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), VL( LDVL, * ), VR( LDVR, * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* This routine is deprecated and has been replaced by routine CGGEV. +* +* CGEGV computes for a pair of N-by-N complex nonsymmetric matrices A +* and B, the generalized eigenvalues (alpha, beta), and optionally, +* the left and/or right generalized eigenvectors (VL and VR). +* +* A generalized eigenvalue for a pair of matrices (A,B) is, roughly +* speaking, a scalar w or a ratio alpha/beta = w, such that A - w*B +* is singular. It is usually represented as the pair (alpha,beta), +* as there is a reasonable interpretation for beta=0, and even for +* both being zero. A good beginning reference is the book, "Matrix +* Computations", by G. Golub & C. van Loan (Johns Hopkins U. Press) +* +* A right generalized eigenvector corresponding to a generalized +* eigenvalue w for a pair of matrices (A,B) is a vector r such +* that (A - w B) r = 0 . A left generalized eigenvector is a vector +* l such that l**H * (A - w B) = 0, where l**H is the +* conjugate-transpose of l. +* +* Note: this routine performs "full balancing" on A and B -- see +* "Further Details", below. +* +* Arguments +* ========= +* +* JOBVL (input) CHARACTER*1 +* = 'N': do not compute the left generalized eigenvectors; +* = 'V': compute the left generalized eigenvectors. +* +* JOBVR (input) CHARACTER*1 +* = 'N': do not compute the right generalized eigenvectors; +* = 'V': compute the right generalized eigenvectors. +* +* N (input) INTEGER +* The order of the matrices A, B, VL, and VR. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA, N) +* On entry, the first of the pair of matrices whose +* generalized eigenvalues and (optionally) generalized +* eigenvectors are to be computed. +* On exit, the contents will have been destroyed. (For a +* description of the contents of A on exit, see "Further +* Details", below.) +* +* LDA (input) INTEGER +* The leading dimension of A. LDA >= max(1,N). +* +* B (input/output) COMPLEX array, dimension (LDB, N) +* On entry, the second of the pair of matrices whose +* generalized eigenvalues and (optionally) generalized +* eigenvectors are to be computed. +* On exit, the contents will have been destroyed. (For a +* description of the contents of B on exit, see "Further +* Details", below.) +* +* LDB (input) INTEGER +* The leading dimension of B. LDB >= max(1,N). +* +* ALPHA (output) COMPLEX array, dimension (N) +* BETA (output) COMPLEX array, dimension (N) +* On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the +* generalized eigenvalues. +* +* Note: the quotients ALPHA(j)/BETA(j) may easily over- or +* underflow, and BETA(j) may even be zero. Thus, the user +* should avoid naively computing the ratio alpha/beta. +* However, ALPHA will be always less than and usually +* comparable with norm(A) in magnitude, and BETA always less +* than and usually comparable with norm(B). +* +* VL (output) COMPLEX array, dimension (LDVL,N) +* If JOBVL = 'V', the left generalized eigenvectors. (See +* "Purpose", above.) +* Each eigenvector will be scaled so the largest component +* will have abs(real part) + abs(imag. part) = 1, *except* +* that for eigenvalues with alpha=beta=0, a zero vector will +* be returned as the corresponding eigenvector. +* Not referenced if JOBVL = 'N'. +* +* LDVL (input) INTEGER +* The leading dimension of the matrix VL. LDVL >= 1, and +* if JOBVL = 'V', LDVL >= N. +* +* VR (output) COMPLEX array, dimension (LDVR,N) +* If JOBVR = 'V', the right generalized eigenvectors. (See +* "Purpose", above.) +* Each eigenvector will be scaled so the largest component +* will have abs(real part) + abs(imag. part) = 1, *except* +* that for eigenvalues with alpha=beta=0, a zero vector will +* be returned as the corresponding eigenvector. +* Not referenced if JOBVR = 'N'. +* +* LDVR (input) INTEGER +* The leading dimension of the matrix VR. LDVR >= 1, and +* if JOBVR = 'V', LDVR >= N. +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,2*N). +* For good performance, LWORK must generally be larger. +* To compute the optimal value of LWORK, call ILAENV to get +* blocksizes (for CGEQRF, CUNMQR, and CUNGQR.) Then compute: +* NB -- MAX of the blocksizes for CGEQRF, CUNMQR, and CUNGQR; +* The optimal LWORK is MAX( 2*N, N*(NB+1) ). +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace/output) REAL array, dimension (8*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* =1,...,N: +* The QZ iteration failed. No eigenvectors have been +* calculated, but ALPHA(j) and BETA(j) should be +* correct for j=INFO+1,...,N. +* > N: errors that usually indicate LAPACK problems: +* =N+1: error return from CGGBAL +* =N+2: error return from CGEQRF +* =N+3: error return from CUNMQR +* =N+4: error return from CUNGQR +* =N+5: error return from CGGHRD +* =N+6: error return from CHGEQZ (other than failed +* iteration) +* =N+7: error return from CTGEVC +* =N+8: error return from CGGBAK (computing VL) +* =N+9: error return from CGGBAK (computing VR) +* =N+10: error return from CLASCL (various calls) +* +* Further Details +* =============== +* +* Balancing +* --------- +* +* This driver calls CGGBAL to both permute and scale rows and columns +* of A and B. The permutations PL and PR are chosen so that PL*A*PR +* and PL*B*R will be upper triangular except for the diagonal blocks +* A(i:j,i:j) and B(i:j,i:j), with i and j as close together as +* possible. The diagonal scaling matrices DL and DR are chosen so +* that the pair DL*PL*A*PR*DR, DL*PL*B*PR*DR have elements close to +* one (except for the elements that start out zero.) +* +* After the eigenvalues and eigenvectors of the balanced matrices +* have been computed, CGGBAK transforms the eigenvectors back to what +* they would have been (in perfect arithmetic) if they had not been +* balanced. +* +* Contents of A and B on Exit +* -------- -- - --- - -- ---- +* +* If any eigenvectors are computed (either JOBVL='V' or JOBVR='V' or +* both), then on exit the arrays A and B will contain the complex Schur +* form[*] of the "balanced" versions of A and B. If no eigenvectors +* are computed, then only the diagonal blocks will be correct. +* +* [*] In other words, upper triangular form. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ), + $ CONE = ( 1.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ILIMIT, ILV, ILVL, ILVR, LQUERY + CHARACTER CHTEMP + INTEGER ICOLS, IHI, IINFO, IJOBVL, IJOBVR, ILEFT, ILO, + $ IN, IRIGHT, IROWS, IRWORK, ITAU, IWORK, JC, JR, + $ LOPT, LWKMIN, LWKOPT, NB, NB1, NB2, NB3 + REAL ABSAI, ABSAR, ABSB, ANRM, ANRM1, ANRM2, BNRM, + $ BNRM1, BNRM2, EPS, SAFMAX, SAFMIN, SALFAI, + $ SALFAR, SBETA, SCALE, TEMP + COMPLEX X +* .. +* .. Local Arrays .. + LOGICAL LDUMMA( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL CGEQRF, CGGBAK, CGGBAL, CGGHRD, CHGEQZ, CLACPY, + $ CLASCL, CLASET, CTGEVC, CUNGQR, CUNMQR, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL CLANGE, SLAMCH + EXTERNAL ILAENV, LSAME, CLANGE, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, INT, MAX, REAL +* .. +* .. Statement Functions .. + REAL ABS1 +* .. +* .. Statement Function definitions .. + ABS1( X ) = ABS( REAL( X ) ) + ABS( AIMAG( X ) ) +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVL, 'N' ) ) THEN + IJOBVL = 1 + ILVL = .FALSE. + ELSE IF( LSAME( JOBVL, 'V' ) ) THEN + IJOBVL = 2 + ILVL = .TRUE. + ELSE + IJOBVL = -1 + ILVL = .FALSE. + END IF +* + IF( LSAME( JOBVR, 'N' ) ) THEN + IJOBVR = 1 + ILVR = .FALSE. + ELSE IF( LSAME( JOBVR, 'V' ) ) THEN + IJOBVR = 2 + ILVR = .TRUE. + ELSE + IJOBVR = -1 + ILVR = .FALSE. + END IF + ILV = ILVL .OR. ILVR +* +* Test the input arguments +* + LWKMIN = MAX( 2*N, 1 ) + LWKOPT = LWKMIN + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + INFO = 0 + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVL.LT.1 .OR. ( ILVL .AND. LDVL.LT.N ) ) THEN + INFO = -11 + ELSE IF( LDVR.LT.1 .OR. ( ILVR .AND. LDVR.LT.N ) ) THEN + INFO = -13 + ELSE IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN + INFO = -15 + END IF +* + IF( INFO.EQ.0 ) THEN + NB1 = ILAENV( 1, 'CGEQRF', ' ', N, N, -1, -1 ) + NB2 = ILAENV( 1, 'CUNMQR', ' ', N, N, N, -1 ) + NB3 = ILAENV( 1, 'CUNGQR', ' ', N, N, N, -1 ) + NB = MAX( NB1, NB2, NB3 ) + LOPT = MAX( 2*N, N*(NB+1) ) + WORK( 1 ) = LOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEGV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = SLAMCH( 'E' )*SLAMCH( 'B' ) + SAFMIN = SLAMCH( 'S' ) + SAFMIN = SAFMIN + SAFMIN + SAFMAX = ONE / SAFMIN +* +* Scale A +* + ANRM = CLANGE( 'M', N, N, A, LDA, RWORK ) + ANRM1 = ANRM + ANRM2 = ONE + IF( ANRM.LT.ONE ) THEN + IF( SAFMAX*ANRM.LT.ONE ) THEN + ANRM1 = SAFMIN + ANRM2 = SAFMAX*ANRM + END IF + END IF +* + IF( ANRM.GT.ZERO ) THEN + CALL CLASCL( 'G', -1, -1, ANRM, ONE, N, N, A, LDA, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 10 + RETURN + END IF + END IF +* +* Scale B +* + BNRM = CLANGE( 'M', N, N, B, LDB, RWORK ) + BNRM1 = BNRM + BNRM2 = ONE + IF( BNRM.LT.ONE ) THEN + IF( SAFMAX*BNRM.LT.ONE ) THEN + BNRM1 = SAFMIN + BNRM2 = SAFMAX*BNRM + END IF + END IF +* + IF( BNRM.GT.ZERO ) THEN + CALL CLASCL( 'G', -1, -1, BNRM, ONE, N, N, B, LDB, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 10 + RETURN + END IF + END IF +* +* Permute the matrix to make it more nearly triangular +* Also "balance" the matrix. +* + ILEFT = 1 + IRIGHT = N + 1 + IRWORK = IRIGHT + N + CALL CGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), RWORK( IRWORK ), IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 1 + GO TO 80 + END IF +* +* Reduce B to triangular form, and initialize VL and/or VR +* + IROWS = IHI + 1 - ILO + IF( ILV ) THEN + ICOLS = N + 1 - ILO + ELSE + ICOLS = IROWS + END IF + ITAU = 1 + IWORK = ITAU + IROWS + CALL CGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWORK ), LWORK+1-IWORK, IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 2 + GO TO 80 + END IF +* + CALL CUNMQR( 'L', 'C', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWORK ), + $ LWORK+1-IWORK, IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 3 + GO TO 80 + END IF +* + IF( ILVL ) THEN + CALL CLASET( 'Full', N, N, CZERO, CONE, VL, LDVL ) + CALL CLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VL( ILO+1, ILO ), LDVL ) + CALL CUNGQR( IROWS, IROWS, IROWS, VL( ILO, ILO ), LDVL, + $ WORK( ITAU ), WORK( IWORK ), LWORK+1-IWORK, + $ IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 4 + GO TO 80 + END IF + END IF +* + IF( ILVR ) + $ CALL CLASET( 'Full', N, N, CZERO, CONE, VR, LDVR ) +* +* Reduce to generalized Hessenberg form +* + IF( ILV ) THEN +* +* Eigenvectors requested -- work on whole matrix. +* + CALL CGGHRD( JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, IINFO ) + ELSE + CALL CGGHRD( 'N', 'N', IROWS, 1, IROWS, A( ILO, ILO ), LDA, + $ B( ILO, ILO ), LDB, VL, LDVL, VR, LDVR, IINFO ) + END IF + IF( IINFO.NE.0 ) THEN + INFO = N + 5 + GO TO 80 + END IF +* +* Perform QZ algorithm +* + IWORK = ITAU + IF( ILV ) THEN + CHTEMP = 'S' + ELSE + CHTEMP = 'E' + END IF + CALL CHGEQZ( CHTEMP, JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHA, BETA, VL, LDVL, VR, LDVR, WORK( IWORK ), + $ LWORK+1-IWORK, RWORK( IRWORK ), IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + IF( IINFO.GT.0 .AND. IINFO.LE.N ) THEN + INFO = IINFO + ELSE IF( IINFO.GT.N .AND. IINFO.LE.2*N ) THEN + INFO = IINFO - N + ELSE + INFO = N + 6 + END IF + GO TO 80 + END IF +* + IF( ILV ) THEN +* +* Compute Eigenvectors +* + IF( ILVL ) THEN + IF( ILVR ) THEN + CHTEMP = 'B' + ELSE + CHTEMP = 'L' + END IF + ELSE + CHTEMP = 'R' + END IF +* + CALL CTGEVC( CHTEMP, 'B', LDUMMA, N, A, LDA, B, LDB, VL, LDVL, + $ VR, LDVR, N, IN, WORK( IWORK ), RWORK( IRWORK ), + $ IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 7 + GO TO 80 + END IF +* +* Undo balancing on VL and VR, rescale +* + IF( ILVL ) THEN + CALL CGGBAK( 'P', 'L', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VL, LDVL, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 8 + GO TO 80 + END IF + DO 30 JC = 1, N + TEMP = ZERO + DO 10 JR = 1, N + TEMP = MAX( TEMP, ABS1( VL( JR, JC ) ) ) + 10 CONTINUE + IF( TEMP.LT.SAFMIN ) + $ GO TO 30 + TEMP = ONE / TEMP + DO 20 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + 20 CONTINUE + 30 CONTINUE + END IF + IF( ILVR ) THEN + CALL CGGBAK( 'P', 'R', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VR, LDVR, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + GO TO 80 + END IF + DO 60 JC = 1, N + TEMP = ZERO + DO 40 JR = 1, N + TEMP = MAX( TEMP, ABS1( VR( JR, JC ) ) ) + 40 CONTINUE + IF( TEMP.LT.SAFMIN ) + $ GO TO 60 + TEMP = ONE / TEMP + DO 50 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + 50 CONTINUE + 60 CONTINUE + END IF +* +* End of eigenvector calculation +* + END IF +* +* Undo scaling in alpha, beta +* +* Note: this does not give the alpha and beta for the unscaled +* problem. +* +* Un-scaling is limited to avoid underflow in alpha and beta +* if they are significant. +* + DO 70 JC = 1, N + ABSAR = ABS( REAL( ALPHA( JC ) ) ) + ABSAI = ABS( AIMAG( ALPHA( JC ) ) ) + ABSB = ABS( REAL( BETA( JC ) ) ) + SALFAR = ANRM*REAL( ALPHA( JC ) ) + SALFAI = ANRM*AIMAG( ALPHA( JC ) ) + SBETA = BNRM*REAL( BETA( JC ) ) + ILIMIT = .FALSE. + SCALE = ONE +* +* Check for significant underflow in imaginary part of ALPHA +* + IF( ABS( SALFAI ).LT.SAFMIN .AND. ABSAI.GE. + $ MAX( SAFMIN, EPS*ABSAR, EPS*ABSB ) ) THEN + ILIMIT = .TRUE. + SCALE = ( SAFMIN / ANRM1 ) / MAX( SAFMIN, ANRM2*ABSAI ) + END IF +* +* Check for significant underflow in real part of ALPHA +* + IF( ABS( SALFAR ).LT.SAFMIN .AND. ABSAR.GE. + $ MAX( SAFMIN, EPS*ABSAI, EPS*ABSB ) ) THEN + ILIMIT = .TRUE. + SCALE = MAX( SCALE, ( SAFMIN / ANRM1 ) / + $ MAX( SAFMIN, ANRM2*ABSAR ) ) + END IF +* +* Check for significant underflow in BETA +* + IF( ABS( SBETA ).LT.SAFMIN .AND. ABSB.GE. + $ MAX( SAFMIN, EPS*ABSAR, EPS*ABSAI ) ) THEN + ILIMIT = .TRUE. + SCALE = MAX( SCALE, ( SAFMIN / BNRM1 ) / + $ MAX( SAFMIN, BNRM2*ABSB ) ) + END IF +* +* Check for possible overflow when limiting scaling +* + IF( ILIMIT ) THEN + TEMP = ( SCALE*SAFMIN )*MAX( ABS( SALFAR ), ABS( SALFAI ), + $ ABS( SBETA ) ) + IF( TEMP.GT.ONE ) + $ SCALE = SCALE / TEMP + IF( SCALE.LT.ONE ) + $ ILIMIT = .FALSE. + END IF +* +* Recompute un-scaled ALPHA, BETA if necessary. +* + IF( ILIMIT ) THEN + SALFAR = ( SCALE*REAL( ALPHA( JC ) ) )*ANRM + SALFAI = ( SCALE*AIMAG( ALPHA( JC ) ) )*ANRM + SBETA = ( SCALE*BETA( JC ) )*BNRM + END IF + ALPHA( JC ) = CMPLX( SALFAR, SALFAI ) + BETA( JC ) = SBETA + 70 CONTINUE +* + 80 CONTINUE + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of CGEGV +* + END diff --git a/costa/native/external/lapack/cgehd2.f b/costa/native/external/lapack/cgehd2.f new file mode 100644 index 000000000..ab206ff6c --- /dev/null +++ b/costa/native/external/lapack/cgehd2.f @@ -0,0 +1,149 @@ + SUBROUTINE CGEHD2( N, ILO, IHI, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER IHI, ILO, INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGEHD2 reduces a complex general matrix A to upper Hessenberg form H +* by a unitary similarity transformation: Q' * A * Q = H . +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* It is assumed that A is already upper triangular in rows +* and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally +* set by a previous call to CGEBAL; otherwise they should be +* set to 1 and N respectively. See Further Details. +* 1 <= ILO <= IHI <= max(1,N). +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the n by n general matrix to be reduced. +* On exit, the upper triangle and the first subdiagonal of A +* are overwritten with the upper Hessenberg matrix H, and the +* elements below the first subdiagonal, with the array TAU, +* represent the unitary matrix Q as a product of elementary +* reflectors. See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* TAU (output) COMPLEX array, dimension (N-1) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace) COMPLEX array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of (ihi-ilo) elementary +* reflectors +* +* Q = H(ilo) H(ilo+1) . . . H(ihi-1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on +* exit in A(i+2:ihi,i), and tau in TAU(i). +* +* The contents of A are illustrated by the following example, with +* n = 7, ilo = 2 and ihi = 6: +* +* on entry, on exit, +* +* ( a a a a a a a ) ( a a h h h h a ) +* ( a a a a a a ) ( a h h h h a ) +* ( a a a a a a ) ( h h h h h h ) +* ( a a a a a a ) ( v2 h h h h h ) +* ( a a a a a a ) ( v2 v3 h h h h ) +* ( a a a a a a ) ( v2 v3 v4 h h h ) +* ( a ) ( a ) +* +* where a denotes an element of the original matrix A, h denotes a +* modified element of the upper Hessenberg matrix H, and vi denotes an +* element of the vector defining H(i). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I + COMPLEX ALPHA +* .. +* .. External Subroutines .. + EXTERNAL CLARF, CLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -2 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEHD2', -INFO ) + RETURN + END IF +* + DO 10 I = ILO, IHI - 1 +* +* Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) +* + ALPHA = A( I+1, I ) + CALL CLARFG( IHI-I, ALPHA, A( MIN( I+2, N ), I ), 1, TAU( I ) ) + A( I+1, I ) = ONE +* +* Apply H(i) to A(1:ihi,i+1:ihi) from the right +* + CALL CLARF( 'Right', IHI, IHI-I, A( I+1, I ), 1, TAU( I ), + $ A( 1, I+1 ), LDA, WORK ) +* +* Apply H(i)' to A(i+1:ihi,i+1:n) from the left +* + CALL CLARF( 'Left', IHI-I, N-I, A( I+1, I ), 1, + $ CONJG( TAU( I ) ), A( I+1, I+1 ), LDA, WORK ) +* + A( I+1, I ) = ALPHA + 10 CONTINUE +* + RETURN +* +* End of CGEHD2 +* + END diff --git a/costa/native/external/lapack/cgehrd.f b/costa/native/external/lapack/cgehrd.f new file mode 100644 index 000000000..de63da494 --- /dev/null +++ b/costa/native/external/lapack/cgehrd.f @@ -0,0 +1,254 @@ + SUBROUTINE CGEHRD( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER IHI, ILO, INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGEHRD reduces a complex general matrix A to upper Hessenberg form H +* by a unitary similarity transformation: Q' * A * Q = H . +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* It is assumed that A is already upper triangular in rows +* and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally +* set by a previous call to CGEBAL; otherwise they should be +* set to 1 and N respectively. See Further Details. +* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the N-by-N general matrix to be reduced. +* On exit, the upper triangle and the first subdiagonal of A +* are overwritten with the upper Hessenberg matrix H, and the +* elements below the first subdiagonal, with the array TAU, +* represent the unitary matrix Q as a product of elementary +* reflectors. See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* TAU (output) COMPLEX array, dimension (N-1) +* The scalar factors of the elementary reflectors (see Further +* Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to +* zero. +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of the array WORK. LWORK >= max(1,N). +* For optimum performance LWORK >= N*NB, where NB is the +* optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of (ihi-ilo) elementary +* reflectors +* +* Q = H(ilo) H(ilo+1) . . . H(ihi-1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on +* exit in A(i+2:ihi,i), and tau in TAU(i). +* +* The contents of A are illustrated by the following example, with +* n = 7, ilo = 2 and ihi = 6: +* +* on entry, on exit, +* +* ( a a a a a a a ) ( a a h h h h a ) +* ( a a a a a a ) ( a h h h h a ) +* ( a a a a a a ) ( h h h h h h ) +* ( a a a a a a ) ( v2 h h h h h ) +* ( a a a a a a ) ( v2 v3 h h h h ) +* ( a a a a a a ) ( v2 v3 v4 h h h ) +* ( a ) ( a ) +* +* where a denotes an element of the original matrix A, h denotes a +* modified element of the upper Hessenberg matrix H, and vi denotes an +* element of the vector defining H(i). +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT + PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, LDWORK, LWKOPT, NB, NBMIN, + $ NH, NX + COMPLEX EI +* .. +* .. Local Arrays .. + COMPLEX T( LDT, NBMAX ) +* .. +* .. External Subroutines .. + EXTERNAL CGEHD2, CGEMM, CLAHRD, CLARFB, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NB = MIN( NBMAX, ILAENV( 1, 'CGEHRD', ' ', N, ILO, IHI, -1 ) ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -2 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEHRD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Set elements 1:ILO-1 and IHI:N-1 of TAU to zero +* + DO 10 I = 1, ILO - 1 + TAU( I ) = ZERO + 10 CONTINUE + DO 20 I = MAX( 1, IHI ), N - 1 + TAU( I ) = ZERO + 20 CONTINUE +* +* Quick return if possible +* + NH = IHI - ILO + 1 + IF( NH.LE.1 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + IWS = 1 + IF( NB.GT.1 .AND. NB.LT.NH ) THEN +* +* Determine when to cross over from blocked to unblocked code +* (last block is always handled by unblocked code). +* + NX = MAX( NB, ILAENV( 3, 'CGEHRD', ' ', N, ILO, IHI, -1 ) ) + IF( NX.LT.NH ) THEN +* +* Determine if workspace is large enough for blocked code. +* + IWS = N*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: determine the +* minimum value of NB, and reduce NB or force use of +* unblocked code. +* + NBMIN = MAX( 2, ILAENV( 2, 'CGEHRD', ' ', N, ILO, IHI, + $ -1 ) ) + IF( LWORK.GE.N*NBMIN ) THEN + NB = LWORK / N + ELSE + NB = 1 + END IF + END IF + END IF + END IF + LDWORK = N +* + IF( NB.LT.NBMIN .OR. NB.GE.NH ) THEN +* +* Use unblocked code below +* + I = ILO +* + ELSE +* +* Use blocked code +* + DO 30 I = ILO, IHI - 1 - NX, NB + IB = MIN( NB, IHI-I ) +* +* Reduce columns i:i+ib-1 to Hessenberg form, returning the +* matrices V and T of the block reflector H = I - V*T*V' +* which performs the reduction, and also the matrix Y = A*V*T +* + CALL CLAHRD( IHI, I, IB, A( 1, I ), LDA, TAU( I ), T, LDT, + $ WORK, LDWORK ) +* +* Apply the block reflector H to A(1:ihi,i+ib:ihi) from the +* right, computing A := A - Y * V'. V(i+ib,ib-1) must be set +* to 1. +* + EI = A( I+IB, I+IB-1 ) + A( I+IB, I+IB-1 ) = ONE + CALL CGEMM( 'No transpose', 'Conjugate transpose', IHI, + $ IHI-I-IB+1, IB, -ONE, WORK, LDWORK, + $ A( I+IB, I ), LDA, ONE, A( 1, I+IB ), LDA ) + A( I+IB, I+IB-1 ) = EI +* +* Apply the block reflector H to A(i+1:ihi,i+ib:n) from the +* left +* + CALL CLARFB( 'Left', 'Conjugate transpose', 'Forward', + $ 'Columnwise', IHI-I, N-I-IB+1, IB, A( I+1, I ), + $ LDA, T, LDT, A( I+1, I+IB ), LDA, WORK, + $ LDWORK ) + 30 CONTINUE + END IF +* +* Use unblocked code to reduce the rest of the matrix +* + CALL CGEHD2( N, I, IHI, A, LDA, TAU, WORK, IINFO ) + WORK( 1 ) = IWS +* + RETURN +* +* End of CGEHRD +* + END diff --git a/costa/native/external/lapack/cgelq2.f b/costa/native/external/lapack/cgelq2.f new file mode 100644 index 000000000..449ed20fe --- /dev/null +++ b/costa/native/external/lapack/cgelq2.f @@ -0,0 +1,124 @@ + SUBROUTINE CGELQ2( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGELQ2 computes an LQ factorization of a complex m by n matrix A: +* A = L * Q. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the m by n matrix A. +* On exit, the elements on and below the diagonal of the array +* contain the m by min(m,n) lower trapezoidal matrix L (L is +* lower triangular if m <= n); the elements above the diagonal, +* with the array TAU, represent the unitary matrix Q as a +* product of elementary reflectors (see Further Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) COMPLEX array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace) COMPLEX array, dimension (M) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(k)' . . . H(2)' H(1)', where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in +* A(i,i+1:n), and tau in TAU(i). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, K + COMPLEX ALPHA +* .. +* .. External Subroutines .. + EXTERNAL CLACGV, CLARF, CLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGELQ2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = 1, K +* +* Generate elementary reflector H(i) to annihilate A(i,i+1:n) +* + CALL CLACGV( N-I+1, A( I, I ), LDA ) + ALPHA = A( I, I ) + CALL CLARFG( N-I+1, ALPHA, A( I, MIN( I+1, N ) ), LDA, + $ TAU( I ) ) + IF( I.LT.M ) THEN +* +* Apply H(i) to A(i+1:m,i:n) from the right +* + A( I, I ) = ONE + CALL CLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, TAU( I ), + $ A( I+1, I ), LDA, WORK ) + END IF + A( I, I ) = ALPHA + CALL CLACGV( N-I+1, A( I, I ), LDA ) + 10 CONTINUE + RETURN +* +* End of CGELQ2 +* + END diff --git a/costa/native/external/lapack/cgelqf.f b/costa/native/external/lapack/cgelqf.f new file mode 100644 index 000000000..b5484c718 --- /dev/null +++ b/costa/native/external/lapack/cgelqf.f @@ -0,0 +1,196 @@ + SUBROUTINE CGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGELQF computes an LQ factorization of a complex M-by-N matrix A: +* A = L * Q. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, the elements on and below the diagonal of the array +* contain the m-by-min(m,n) lower trapezoidal matrix L (L is +* lower triangular if m <= n); the elements above the diagonal, +* with the array TAU, represent the unitary matrix Q as a +* product of elementary reflectors (see Further Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) COMPLEX array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,M). +* For optimum performance LWORK >= M*NB, where NB is the +* optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(k)' . . . H(2)' H(1)', where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in +* A(i,i+1:n), and tau in TAU(i). +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL CGELQ2, CLARFB, CLARFT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 ) + LWKOPT = M*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGELQF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = M + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'CGELQF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CGELQF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially +* + DO 10 I = 1, K - NX, NB + IB = MIN( K-I+1, NB ) +* +* Compute the LQ factorization of the current block +* A(i:i+ib-1,i:n) +* + CALL CGELQ2( IB, N-I+1, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) + IF( I+IB.LE.M ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL CLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, I ), + $ LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(i+ib:m,i:n) from the right +* + CALL CLARFB( 'Right', 'No transpose', 'Forward', + $ 'Rowwise', M-I-IB+1, N-I+1, IB, A( I, I ), + $ LDA, WORK, LDWORK, A( I+IB, I ), LDA, + $ WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + ELSE + I = 1 + END IF +* +* Use unblocked code to factor the last or only block. +* + IF( I.LE.K ) + $ CALL CGELQ2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of CGELQF +* + END diff --git a/costa/native/external/lapack/cgels.f b/costa/native/external/lapack/cgels.f new file mode 100644 index 000000000..fd3de112d --- /dev/null +++ b/costa/native/external/lapack/cgels.f @@ -0,0 +1,405 @@ + SUBROUTINE CGELS( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGELS solves overdetermined or underdetermined complex linear systems +* involving an M-by-N matrix A, or its conjugate-transpose, using a QR +* or LQ factorization of A. It is assumed that A has full rank. +* +* The following options are provided: +* +* 1. If TRANS = 'N' and m >= n: find the least squares solution of +* an overdetermined system, i.e., solve the least squares problem +* minimize || B - A*X ||. +* +* 2. If TRANS = 'N' and m < n: find the minimum norm solution of +* an underdetermined system A * X = B. +* +* 3. If TRANS = 'C' and m >= n: find the minimum norm solution of +* an undetermined system A**H * X = B. +* +* 4. If TRANS = 'C' and m < n: find the least squares solution of +* an overdetermined system, i.e., solve the least squares problem +* minimize || B - A**H * X ||. +* +* Several right hand side vectors b and solution vectors x can be +* handled in a single call; they are stored as the columns of the +* M-by-NRHS right hand side matrix B and the N-by-NRHS solution +* matrix X. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER +* = 'N': the linear system involves A; +* = 'C': the linear system involves A**H. +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of +* columns of the matrices B and X. NRHS >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* if M >= N, A is overwritten by details of its QR +* factorization as returned by CGEQRF; +* if M < N, A is overwritten by details of its LQ +* factorization as returned by CGELQF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the matrix B of right hand side vectors, stored +* columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS +* if TRANS = 'C'. +* On exit, B is overwritten by the solution vectors, stored +* columnwise: +* if TRANS = 'N' and m >= n, rows 1 to n of B contain the least +* squares solution vectors; the residual sum of squares for the +* solution in each column is given by the sum of squares of +* elements N+1 to M in that column; +* if TRANS = 'N' and m < n, rows 1 to N of B contain the +* minimum norm solution vectors; +* if TRANS = 'C' and m >= n, rows 1 to M of B contain the +* minimum norm solution vectors; +* if TRANS = 'C' and m < n, rows 1 to M of B contain the +* least squares solution vectors; the residual sum of squares +* for the solution in each column is given by the sum of +* squares of elements M+1 to N in that column. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= MAX(1,M,N). +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* LWORK >= max( 1, MN + max( MN, NRHS ) ). +* For optimal performance, +* LWORK >= max( 1, MN + max( MN, NRHS )*NB ). +* where MN = min(M,N) and NB is the optimum block size. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, TPSD + INTEGER BROW, I, IASCL, IBSCL, J, MN, NB, SCLLEN, WSIZE + REAL ANRM, BIGNUM, BNRM, SMLNUM +* .. +* .. Local Arrays .. + REAL RWORK( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL CLANGE, SLAMCH + EXTERNAL LSAME, ILAENV, CLANGE, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CGELQF, CGEQRF, CLASCL, CLASET, CTRSM, CUNMLQ, + $ CUNMQR, SLABAD, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, REAL +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + MN = MIN( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.( LSAME( TRANS, 'N' ) .OR. LSAME( TRANS, 'C' ) ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, MN+MAX( MN, NRHS ) ) .AND. + $ .NOT.LQUERY ) THEN + INFO = -10 + END IF +* +* Figure out optimal block size +* + IF( INFO.EQ.0 .OR. INFO.EQ.-10 ) THEN +* + TPSD = .TRUE. + IF( LSAME( TRANS, 'N' ) ) + $ TPSD = .FALSE. +* + IF( M.GE.N ) THEN + NB = ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) + IF( TPSD ) THEN + NB = MAX( NB, ILAENV( 1, 'CUNMQR', 'LN', M, NRHS, N, + $ -1 ) ) + ELSE + NB = MAX( NB, ILAENV( 1, 'CUNMQR', 'LC', M, NRHS, N, + $ -1 ) ) + END IF + ELSE + NB = ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 ) + IF( TPSD ) THEN + NB = MAX( NB, ILAENV( 1, 'CUNMLQ', 'LC', N, NRHS, M, + $ -1 ) ) + ELSE + NB = MAX( NB, ILAENV( 1, 'CUNMLQ', 'LN', N, NRHS, M, + $ -1 ) ) + END IF + END IF +* + WSIZE = MAX( 1, MN + MAX( MN, NRHS )*NB ) + WORK( 1 ) = REAL( WSIZE ) +* + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGELS ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, NRHS ).EQ.0 ) THEN + CALL CLASET( 'Full', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + RETURN + END IF +* +* Get machine parameters +* + SMLNUM = SLAMCH( 'S' ) / SLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max element outside range [SMLNUM,BIGNUM] +* + ANRM = CLANGE( 'M', M, N, A, LDA, RWORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL CLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + GO TO 50 + END IF +* + BROW = M + IF( TPSD ) + $ BROW = N + BNRM = CLANGE( 'M', BROW, NRHS, B, LDB, RWORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL CLASCL( 'G', 0, 0, BNRM, SMLNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL CLASCL( 'G', 0, 0, BNRM, BIGNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 2 + END IF +* + IF( M.GE.N ) THEN +* +* compute QR factorization of A +* + CALL CGEQRF( M, N, A, LDA, WORK( 1 ), WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least N, optimally N*NB +* + IF( .NOT.TPSD ) THEN +* +* Least-Squares Problem min || A * X - B || +* +* B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS) +* + CALL CUNMQR( 'Left', 'Conjugate transpose', M, NRHS, N, A, + $ LDA, WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least NRHS, optimally NRHS*NB +* +* B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) +* + CALL CTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N, + $ NRHS, CONE, A, LDA, B, LDB ) +* + SCLLEN = N +* + ELSE +* +* Overdetermined system of equations A' * X = B +* +* B(1:N,1:NRHS) := inv(R') * B(1:N,1:NRHS) +* + CALL CTRSM( 'Left', 'Upper', 'Conjugate transpose', + $ 'Non-unit', N, NRHS, CONE, A, LDA, B, LDB ) +* +* B(N+1:M,1:NRHS) = ZERO +* + DO 20 J = 1, NRHS + DO 10 I = N + 1, M + B( I, J ) = CZERO + 10 CONTINUE + 20 CONTINUE +* +* B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS) +* + CALL CUNMQR( 'Left', 'No transpose', M, NRHS, N, A, LDA, + $ WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least NRHS, optimally NRHS*NB +* + SCLLEN = M +* + END IF +* + ELSE +* +* Compute LQ factorization of A +* + CALL CGELQF( M, N, A, LDA, WORK( 1 ), WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least M, optimally M*NB. +* + IF( .NOT.TPSD ) THEN +* +* underdetermined system of equations A * X = B +* +* B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) +* + CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Non-unit', M, + $ NRHS, CONE, A, LDA, B, LDB ) +* +* B(M+1:N,1:NRHS) = 0 +* + DO 40 J = 1, NRHS + DO 30 I = M + 1, N + B( I, J ) = CZERO + 30 CONTINUE + 40 CONTINUE +* +* B(1:N,1:NRHS) := Q(1:N,:)' * B(1:M,1:NRHS) +* + CALL CUNMLQ( 'Left', 'Conjugate transpose', N, NRHS, M, A, + $ LDA, WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least NRHS, optimally NRHS*NB +* + SCLLEN = N +* + ELSE +* +* overdetermined system min || A' * X - B || +* +* B(1:N,1:NRHS) := Q * B(1:N,1:NRHS) +* + CALL CUNMLQ( 'Left', 'No transpose', N, NRHS, M, A, LDA, + $ WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least NRHS, optimally NRHS*NB +* +* B(1:M,1:NRHS) := inv(L') * B(1:M,1:NRHS) +* + CALL CTRSM( 'Left', 'Lower', 'Conjugate transpose', + $ 'Non-unit', M, NRHS, CONE, A, LDA, B, LDB ) +* + SCLLEN = M +* + END IF +* + END IF +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL CLASCL( 'G', 0, 0, SMLNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL CLASCL( 'G', 0, 0, BIGNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF +* + 50 CONTINUE + WORK( 1 ) = REAL( WSIZE ) +* + RETURN +* +* End of CGELS +* + END diff --git a/costa/native/external/lapack/cgelsd.f b/costa/native/external/lapack/cgelsd.f new file mode 100644 index 000000000..e4adf2101 --- /dev/null +++ b/costa/native/external/lapack/cgelsd.f @@ -0,0 +1,547 @@ + SUBROUTINE CGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK, + $ WORK, LWORK, RWORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL RWORK( * ), S( * ) + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGELSD computes the minimum-norm solution to a real linear least +* squares problem: +* minimize 2-norm(| b - A*x |) +* using the singular value decomposition (SVD) of A. A is an M-by-N +* matrix which may be rank-deficient. +* +* Several right hand side vectors b and solution vectors x can be +* handled in a single call; they are stored as the columns of the +* M-by-NRHS right hand side matrix B and the N-by-NRHS solution +* matrix X. +* +* The problem is solved in three steps: +* (1) Reduce the coefficient matrix A to bidiagonal form with +* Householder tranformations, reducing the original problem +* into a "bidiagonal least squares problem" (BLS) +* (2) Solve the BLS using a divide and conquer approach. +* (3) Apply back all the Householder tranformations to solve +* the original least squares problem. +* +* The effective rank of A is determined by treating as zero those +* singular values which are less than RCOND times the largest singular +* value. +* +* The divide and conquer algorithm makes very mild assumptions about +* floating point arithmetic. It will work on machines with a guard +* digit in add/subtract, or on those binary machines without guard +* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +* Cray-2. It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, A has been destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the M-by-NRHS right hand side matrix B. +* On exit, B is overwritten by the N-by-NRHS solution matrix X. +* If m >= n and RANK = n, the residual sum-of-squares for +* the solution in the i-th column is given by the sum of +* squares of elements n+1:m in that column. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,M,N). +* +* S (output) REAL array, dimension (min(M,N)) +* The singular values of A in decreasing order. +* The condition number of A in the 2-norm = S(1)/S(min(m,n)). +* +* RCOND (input) REAL +* RCOND is used to determine the effective rank of A. +* Singular values S(i) <= RCOND*S(1) are treated as zero. +* If RCOND < 0, machine precision is used instead. +* +* RANK (output) INTEGER +* The effective rank of A, i.e., the number of singular values +* which are greater than RCOND*S(1). +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK must be at least 1. +* The exact minimum amount of workspace needed depends on M, +* N and NRHS. As long as LWORK is at least +* 2 * N + N * NRHS +* if M is greater than or equal to N or +* 2 * M + M * NRHS +* if M is less than N, the code will execute correctly. +* For good performance, LWORK should generally be larger. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* +* RWORK (workspace) REAL array, dimension at least +* 10*N + 2*N*SMLSIZ + 8*N*NLVL + 3*SMLSIZ*NRHS + +* (SMLSIZ+1)**2 +* if M is greater than or equal to N or +* 10*M + 2*M*SMLSIZ + 8*M*NLVL + 3*SMLSIZ*NRHS + +* (SMLSIZ+1)**2 +* if M is less than N, the code will execute correctly. +* SMLSIZ is returned by ILAENV and is equal to the maximum +* size of the subproblems at the bottom of the computation +* tree (usually about 25), and +* NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 ) +* +* IWORK (workspace) INTEGER array, dimension (LIWORK) +* LIWORK >= 3 * MINMN * NLVL + 11 * MINMN, +* where MINMN = MIN( M,N ). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: the algorithm for computing the SVD failed to converge; +* if INFO = i, i off-diagonal elements of an intermediate +* bidiagonal form did not converge to zero. +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Ren-Cang Li, Computer Science Division, University of +* California at Berkeley, USA +* Osni Marques, LBNL/NERSC, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + COMPLEX CZERO + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER IASCL, IBSCL, IE, IL, ITAU, ITAUP, ITAUQ, + $ LDWORK, MAXMN, MAXWRK, MINMN, MINWRK, MM, + $ MNTHR, NRWORK, NWORK, SMLSIZ + REAL ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM +* .. +* .. External Subroutines .. + EXTERNAL CGEBRD, CGELQF, CGEQRF, CLACPY, + $ CLALSD, CLASCL, CLASET, CUNMBR, + $ CUNMLQ, CUNMQR, SLABAD, SLASCL, + $ SLASET, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + REAL CLANGE, SLAMCH + EXTERNAL CLANGE, SLAMCH, ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC CMPLX, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + MINMN = MIN( M, N ) + MAXMN = MAX( M, N ) + MNTHR = ILAENV( 6, 'CGELSD', ' ', M, N, NRHS, -1 ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN + INFO = -7 + END IF +* + SMLSIZ = ILAENV( 9, 'CGELSD', ' ', 0, 0, 0, 0 ) +* +* Compute workspace. +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + MINWRK = 1 + IF( INFO.EQ.0 ) THEN + MAXWRK = 0 + MM = M + IF( M.GE.N .AND. M.GE.MNTHR ) THEN +* +* Path 1a - overdetermined, with many more rows than columns. +* + MM = N + MAXWRK = MAX( MAXWRK, N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, + $ -1 ) ) + MAXWRK = MAX( MAXWRK, NRHS*ILAENV( 1, 'CUNMQR', 'LC', M, + $ NRHS, N, -1 ) ) + END IF + IF( M.GE.N ) THEN +* +* Path 1 - overdetermined or exactly determined. +* + MAXWRK = MAX( MAXWRK, 2*N+( MM+N )* + $ ILAENV( 1, 'CGEBRD', ' ', MM, N, -1, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*N+NRHS* + $ ILAENV( 1, 'CUNMBR', 'QLC', MM, NRHS, N, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*N+( N-1 )* + $ ILAENV( 1, 'CUNMBR', 'PLN', N, NRHS, N, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*N+N*NRHS ) + MINWRK = MAX( 2*N+MM, 2*N+N*NRHS ) + END IF + IF( N.GT.M ) THEN + IF( N.GE.MNTHR ) THEN +* +* Path 2a - underdetermined, with many more columns +* than rows. +* + MAXWRK = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 ) + MAXWRK = MAX( MAXWRK, M*M+4*M+2*M* + $ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) ) + MAXWRK = MAX( MAXWRK, M*M+4*M+NRHS* + $ ILAENV( 1, 'CUNMBR', 'QLC', M, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, M*M+4*M+( M-1 )* + $ ILAENV( 1, 'CUNMLQ', 'LC', N, NRHS, M, -1 ) ) + IF( NRHS.GT.1 ) THEN + MAXWRK = MAX( MAXWRK, M*M+M+M*NRHS ) + ELSE + MAXWRK = MAX( MAXWRK, M*M+2*M ) + END IF + MAXWRK = MAX( MAXWRK, M*M+4*M+M*NRHS ) + ELSE +* +* Path 2 - underdetermined. +* + MAXWRK = 2*M + ( N+M )*ILAENV( 1, 'CGEBRD', ' ', M, N, + $ -1, -1 ) + MAXWRK = MAX( MAXWRK, 2*M+NRHS* + $ ILAENV( 1, 'CUNMBR', 'QLC', M, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*M+M* + $ ILAENV( 1, 'CUNMBR', 'PLN', N, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*M+M*NRHS ) + END IF + MINWRK = MAX( 2*M+N, 2*M+M*NRHS ) + END IF + MINWRK = MIN( MINWRK, MAXWRK ) + WORK( 1 ) = CMPLX( MAXWRK, 0 ) + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGELSD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + GO TO 10 + END IF +* +* Quick return if possible. +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RANK = 0 + RETURN + END IF +* +* Get machine parameters. +* + EPS = SLAMCH( 'P' ) + SFMIN = SLAMCH( 'S' ) + SMLNUM = SFMIN / EPS + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Scale A if max entry outside range [SMLNUM,BIGNUM]. +* + ANRM = CLANGE( 'M', M, N, A, LDA, RWORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM. +* + CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL CLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + CALL SLASET( 'F', MINMN, 1, ZERO, ZERO, S, 1 ) + RANK = 0 + GO TO 10 + END IF +* +* Scale B if max entry outside range [SMLNUM,BIGNUM]. +* + BNRM = CLANGE( 'M', M, NRHS, B, LDB, RWORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM. +* + CALL CLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM. +* + CALL CLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 2 + END IF +* +* If M < N make sure B(M+1:N,:) = 0 +* + IF( M.LT.N ) + $ CALL CLASET( 'F', N-M, NRHS, CZERO, CZERO, B( M+1, 1 ), LDB ) +* +* Overdetermined case. +* + IF( M.GE.N ) THEN +* +* Path 1 - overdetermined or exactly determined. +* + MM = M + IF( M.GE.MNTHR ) THEN +* +* Path 1a - overdetermined, with many more rows than columns +* + MM = N + ITAU = 1 + NWORK = ITAU + N +* +* Compute A=Q*R. +* (RWorkspace: need N) +* (CWorkspace: need N, prefer N*NB) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) +* +* Multiply B by transpose(Q). +* (RWorkspace: need N) +* (CWorkspace: need NRHS, prefer NRHS*NB) +* + CALL CUNMQR( 'L', 'C', M, NRHS, N, A, LDA, WORK( ITAU ), B, + $ LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* +* Zero out below R. +* + IF( N.GT.1 ) THEN + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, A( 2, 1 ), + $ LDA ) + END IF + END IF +* + ITAUQ = 1 + ITAUP = ITAUQ + N + NWORK = ITAUP + N + IE = 1 + NRWORK = IE + N +* +* Bidiagonalize R in A. +* (RWorkspace: need N) +* (CWorkspace: need 2*N+MM, prefer 2*N+(MM+N)*NB) +* + CALL CGEBRD( MM, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors of R. +* (CWorkspace: need 2*N+NRHS, prefer 2*N+NRHS*NB) +* + CALL CUNMBR( 'Q', 'L', 'C', MM, NRHS, N, A, LDA, WORK( ITAUQ ), + $ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* +* Solve the bidiagonal least squares problem. +* + CALL CLALSD( 'U', SMLSIZ, N, NRHS, S, RWORK( IE ), B, LDB, + $ RCOND, RANK, WORK( NWORK ), RWORK( NRWORK ), + $ IWORK, INFO ) + IF( INFO.NE.0 ) THEN + GO TO 10 + END IF +* +* Multiply B by right bidiagonalizing vectors of R. +* + CALL CUNMBR( 'P', 'L', 'N', N, NRHS, N, A, LDA, WORK( ITAUP ), + $ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* + ELSE IF( N.GE.MNTHR .AND. LWORK.GE.4*M+M*M+ + $ MAX( M, 2*M-4, NRHS, N-3*M ) ) THEN +* +* Path 2a - underdetermined, with many more columns than rows +* and sufficient workspace for an efficient algorithm. +* + LDWORK = M + IF( LWORK.GE.MAX( 4*M+M*LDA+MAX( M, 2*M-4, NRHS, N-3*M ), + $ M*LDA+M+M*NRHS ) )LDWORK = LDA + ITAU = 1 + NWORK = M + 1 +* +* Compute A=L*Q. +* (CWorkspace: need 2*M, prefer M+M*NB) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) + IL = NWORK +* +* Copy L to WORK(IL), zeroing out above its diagonal. +* + CALL CLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, WORK( IL+LDWORK ), + $ LDWORK ) + ITAUQ = IL + LDWORK*M + ITAUP = ITAUQ + M + NWORK = ITAUP + M + IE = 1 + NRWORK = IE + M +* +* Bidiagonalize L in WORK(IL). +* (RWorkspace: need M) +* (CWorkspace: need M*M+4*M, prefer M*M+4*M+2*M*NB) +* + CALL CGEBRD( M, M, WORK( IL ), LDWORK, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors of L. +* (CWorkspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) +* + CALL CUNMBR( 'Q', 'L', 'C', M, NRHS, M, WORK( IL ), LDWORK, + $ WORK( ITAUQ ), B, LDB, WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) +* +* Solve the bidiagonal least squares problem. +* + CALL CLALSD( 'U', SMLSIZ, M, NRHS, S, RWORK( IE ), B, LDB, + $ RCOND, RANK, WORK( NWORK ), RWORK( NRWORK ), + $ IWORK, INFO ) + IF( INFO.NE.0 ) THEN + GO TO 10 + END IF +* +* Multiply B by right bidiagonalizing vectors of L. +* + CALL CUNMBR( 'P', 'L', 'N', M, NRHS, M, WORK( IL ), LDWORK, + $ WORK( ITAUP ), B, LDB, WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) +* +* Zero out below first M rows of B. +* + CALL CLASET( 'F', N-M, NRHS, CZERO, CZERO, B( M+1, 1 ), LDB ) + NWORK = ITAU + M +* +* Multiply transpose(Q) by B. +* (CWorkspace: need NRHS, prefer NRHS*NB) +* + CALL CUNMLQ( 'L', 'C', N, NRHS, M, A, LDA, WORK( ITAU ), B, + $ LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* + ELSE +* +* Path 2 - remaining underdetermined cases. +* + ITAUQ = 1 + ITAUP = ITAUQ + M + NWORK = ITAUP + M + IE = 1 + NRWORK = IE + M +* +* Bidiagonalize A. +* (RWorkspace: need M) +* (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) +* + CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors. +* (CWorkspace: need 2*M+NRHS, prefer 2*M+NRHS*NB) +* + CALL CUNMBR( 'Q', 'L', 'C', M, NRHS, N, A, LDA, WORK( ITAUQ ), + $ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* +* Solve the bidiagonal least squares problem. +* + CALL CLALSD( 'L', SMLSIZ, M, NRHS, S, RWORK( IE ), B, LDB, + $ RCOND, RANK, WORK( NWORK ), RWORK( NRWORK ), + $ IWORK, INFO ) + IF( INFO.NE.0 ) THEN + GO TO 10 + END IF +* +* Multiply B by right bidiagonalizing vectors of A. +* + CALL CUNMBR( 'P', 'L', 'N', N, NRHS, M, A, LDA, WORK( ITAUP ), + $ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* + END IF +* +* Undo scaling. +* + IF( IASCL.EQ.1 ) THEN + CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO ) + CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO ) + CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL CLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL CLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) + END IF +* + 10 CONTINUE + WORK( 1 ) = CMPLX( MAXWRK, 0 ) + RETURN +* +* End of CGELSD +* + END diff --git a/costa/native/external/lapack/cgelss.f b/costa/native/external/lapack/cgelss.f new file mode 100644 index 000000000..c80bb89aa --- /dev/null +++ b/costa/native/external/lapack/cgelss.f @@ -0,0 +1,639 @@ + SUBROUTINE CGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK, + $ WORK, LWORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK + REAL RCOND +* .. +* .. Array Arguments .. + REAL RWORK( * ), S( * ) + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGELSS computes the minimum norm solution to a complex linear +* least squares problem: +* +* Minimize 2-norm(| b - A*x |). +* +* using the singular value decomposition (SVD) of A. A is an M-by-N +* matrix which may be rank-deficient. +* +* Several right hand side vectors b and solution vectors x can be +* handled in a single call; they are stored as the columns of the +* M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix +* X. +* +* The effective rank of A is determined by treating as zero those +* singular values which are less than RCOND times the largest singular +* value. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, the first min(m,n) rows of A are overwritten with +* its right singular vectors, stored rowwise. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the M-by-NRHS right hand side matrix B. +* On exit, B is overwritten by the N-by-NRHS solution matrix X. +* If m >= n and RANK = n, the residual sum-of-squares for +* the solution in the i-th column is given by the sum of +* squares of elements n+1:m in that column. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,M,N). +* +* S (output) REAL array, dimension (min(M,N)) +* The singular values of A in decreasing order. +* The condition number of A in the 2-norm = S(1)/S(min(m,n)). +* +* RCOND (input) REAL +* RCOND is used to determine the effective rank of A. +* Singular values S(i) <= RCOND*S(1) are treated as zero. +* If RCOND < 0, machine precision is used instead. +* +* RANK (output) INTEGER +* The effective rank of A, i.e., the number of singular values +* which are greater than RCOND*S(1). +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= 1, and also: +* LWORK >= 2*min(M,N) + max(M,N,NRHS) +* For good performance, LWORK should generally be larger. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) REAL array, dimension (5*min(M,N)) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: the algorithm for computing the SVD failed to converge; +* if INFO = i, i off-diagonal elements of an intermediate +* bidiagonal form did not converge to zero. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER BL, CHUNK, I, IASCL, IBSCL, IE, IL, IRWORK, + $ ITAU, ITAUP, ITAUQ, IWORK, LDWORK, MAXMN, + $ MAXWRK, MINMN, MINWRK, MM, MNTHR + REAL ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM, THR +* .. +* .. Local Arrays .. + COMPLEX VDUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL CBDSQR, CCOPY, CGEBRD, CGELQF, CGEMM, CGEMV, + $ CGEQRF, CLACPY, CLASCL, CLASET, CSRSCL, CUNGBR, + $ CUNMBR, CUNMLQ, CUNMQR, SLABAD, SLASCL, SLASET, + $ XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + REAL CLANGE, SLAMCH + EXTERNAL ILAENV, CLANGE, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + MINMN = MIN( M, N ) + MAXMN = MAX( M, N ) + MNTHR = ILAENV( 6, 'CGELSS', ' ', M, N, NRHS, -1 ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN + INFO = -7 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* CWorkspace refers to complex workspace, and RWorkspace refers +* to real workspace. NB refers to the optimal block size for the +* immediately following subroutine, as returned by ILAENV.) +* + MINWRK = 1 + IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN + MAXWRK = 0 + MM = M + IF( M.GE.N .AND. M.GE.MNTHR ) THEN +* +* Path 1a - overdetermined, with many more rows than columns +* +* Space needed for CBDSQR is BDSPAC = 5*N +* + MM = N + MAXWRK = MAX( MAXWRK, N+N*ILAENV( 1, 'CGEQRF', ' ', M, N, + $ -1, -1 ) ) + MAXWRK = MAX( MAXWRK, N+NRHS* + $ ILAENV( 1, 'CUNMQR', 'LC', M, NRHS, N, -1 ) ) + END IF + IF( M.GE.N ) THEN +* +* Path 1 - overdetermined or exactly determined +* +* Space needed for CBDSQR is BDSPC = 7*N+12 +* + MAXWRK = MAX( MAXWRK, 2*N+( MM+N )* + $ ILAENV( 1, 'CGEBRD', ' ', MM, N, -1, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*N+NRHS* + $ ILAENV( 1, 'CUNMBR', 'QLC', MM, NRHS, N, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*N+( N-1 )* + $ ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) ) + MAXWRK = MAX( MAXWRK, N*NRHS ) + MINWRK = 2*N + MAX( NRHS, M ) + END IF + IF( N.GT.M ) THEN + MINWRK = 2*M + MAX( NRHS, N ) + IF( N.GE.MNTHR ) THEN +* +* Path 2a - underdetermined, with many more columns +* than rows +* +* Space needed for CBDSQR is BDSPAC = 5*M +* + MAXWRK = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 ) + MAXWRK = MAX( MAXWRK, 3*M+M*M+2*M* + $ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) ) + MAXWRK = MAX( MAXWRK, 3*M+M*M+NRHS* + $ ILAENV( 1, 'CUNMBR', 'QLC', M, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, 3*M+M*M+( M-1 )* + $ ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) ) + IF( NRHS.GT.1 ) THEN + MAXWRK = MAX( MAXWRK, M*M+M+M*NRHS ) + ELSE + MAXWRK = MAX( MAXWRK, M*M+2*M ) + END IF + MAXWRK = MAX( MAXWRK, M+NRHS* + $ ILAENV( 1, 'CUNMLQ', 'LC', N, NRHS, M, -1 ) ) + ELSE +* +* Path 2 - underdetermined +* +* Space needed for CBDSQR is BDSPAC = 5*M +* + MAXWRK = 2*M + ( N+M )*ILAENV( 1, 'CGEBRD', ' ', M, N, + $ -1, -1 ) + MAXWRK = MAX( MAXWRK, 2*M+NRHS* + $ ILAENV( 1, 'CUNMBR', 'QLC', M, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*M+M* + $ ILAENV( 1, 'CUNGBR', 'P', M, N, M, -1 ) ) + MAXWRK = MAX( MAXWRK, N*NRHS ) + END IF + END IF + MINWRK = MAX( MINWRK, 1 ) + MAXWRK = MAX( MINWRK, MAXWRK ) + WORK( 1 ) = MAXWRK + END IF +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) + $ INFO = -12 + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGELSS', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RANK = 0 + RETURN + END IF +* +* Get machine parameters +* + EPS = SLAMCH( 'P' ) + SFMIN = SLAMCH( 'S' ) + SMLNUM = SFMIN / EPS + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = CLANGE( 'M', M, N, A, LDA, RWORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL CLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + CALL SLASET( 'F', MINMN, 1, ZERO, ZERO, S, MINMN ) + RANK = 0 + GO TO 70 + END IF +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = CLANGE( 'M', M, NRHS, B, LDB, RWORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL CLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL CLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 2 + END IF +* +* Overdetermined case +* + IF( M.GE.N ) THEN +* +* Path 1 - overdetermined or exactly determined +* + MM = M + IF( M.GE.MNTHR ) THEN +* +* Path 1a - overdetermined, with many more rows than columns +* + MM = N + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: none) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, INFO ) +* +* Multiply B by transpose(Q) +* (CWorkspace: need N+NRHS, prefer N+NRHS*NB) +* (RWorkspace: none) +* + CALL CUNMQR( 'L', 'C', M, NRHS, N, A, LDA, WORK( ITAU ), B, + $ LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) +* +* Zero out below R +* + IF( N.GT.1 ) + $ CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, A( 2, 1 ), + $ LDA ) + END IF +* + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in A +* (CWorkspace: need 2*N+MM, prefer 2*N+(MM+N)*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( MM, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors of R +* (CWorkspace: need 2*N+NRHS, prefer 2*N+NRHS*NB) +* (RWorkspace: none) +* + CALL CUNMBR( 'Q', 'L', 'C', MM, NRHS, N, A, LDA, WORK( ITAUQ ), + $ B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) +* +* Generate right bidiagonalizing vectors of R in A +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: none) +* + CALL CUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, INFO ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration +* multiply B by transpose of left singular vectors +* compute right singular vectors in A +* (CWorkspace: none) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, N, 0, NRHS, S, RWORK( IE ), A, LDA, VDUM, + $ 1, B, LDB, RWORK( IRWORK ), INFO ) + IF( INFO.NE.0 ) + $ GO TO 70 +* +* Multiply B by reciprocals of singular values +* + THR = MAX( RCOND*S( 1 ), SFMIN ) + IF( RCOND.LT.ZERO ) + $ THR = MAX( EPS*S( 1 ), SFMIN ) + RANK = 0 + DO 10 I = 1, N + IF( S( I ).GT.THR ) THEN + CALL CSRSCL( NRHS, S( I ), B( I, 1 ), LDB ) + RANK = RANK + 1 + ELSE + CALL CLASET( 'F', 1, NRHS, CZERO, CZERO, B( I, 1 ), LDB ) + END IF + 10 CONTINUE +* +* Multiply B by right singular vectors +* (CWorkspace: need N, prefer N*NRHS) +* (RWorkspace: none) +* + IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN + CALL CGEMM( 'C', 'N', N, NRHS, N, CONE, A, LDA, B, LDB, + $ CZERO, WORK, LDB ) + CALL CLACPY( 'G', N, NRHS, WORK, LDB, B, LDB ) + ELSE IF( NRHS.GT.1 ) THEN + CHUNK = LWORK / N + DO 20 I = 1, NRHS, CHUNK + BL = MIN( NRHS-I+1, CHUNK ) + CALL CGEMM( 'C', 'N', N, BL, N, CONE, A, LDA, B( 1, I ), + $ LDB, CZERO, WORK, N ) + CALL CLACPY( 'G', N, BL, WORK, N, B( 1, I ), LDB ) + 20 CONTINUE + ELSE + CALL CGEMV( 'C', N, N, CONE, A, LDA, B, 1, CZERO, WORK, 1 ) + CALL CCOPY( N, WORK, 1, B, 1 ) + END IF +* + ELSE IF( N.GE.MNTHR .AND. LWORK.GE.3*M+M*M+MAX( M, NRHS, N-2*M ) ) + $ THEN +* +* Underdetermined case, M much less than N +* +* Path 2a - underdetermined, with many more columns than rows +* and sufficient workspace for an efficient algorithm +* + LDWORK = M + IF( LWORK.GE.3*M+M*LDA+MAX( M, NRHS, N-2*M ) ) + $ LDWORK = LDA + ITAU = 1 + IWORK = M + 1 +* +* Compute A=L*Q +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: none) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, INFO ) + IL = IWORK +* +* Copy L to WORK(IL), zeroing out above it +* + CALL CLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, WORK( IL+LDWORK ), + $ LDWORK ) + IE = 1 + ITAUQ = IL + LDWORK*M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IL) +* (CWorkspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, WORK( IL ), LDWORK, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors of L +* (CWorkspace: need M*M+3*M+NRHS, prefer M*M+3*M+NRHS*NB) +* (RWorkspace: none) +* + CALL CUNMBR( 'Q', 'L', 'C', M, NRHS, M, WORK( IL ), LDWORK, + $ WORK( ITAUQ ), B, LDB, WORK( IWORK ), + $ LWORK-IWORK+1, INFO ) +* +* Generate right bidiagonalizing vectors of R in WORK(IL) +* (CWorkspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB) +* (RWorkspace: none) +* + CALL CUNGBR( 'P', M, M, M, WORK( IL ), LDWORK, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, INFO ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right singular +* vectors of L in WORK(IL) and multiplying B by transpose of +* left singular vectors +* (CWorkspace: need M*M) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, M, 0, NRHS, S, RWORK( IE ), WORK( IL ), + $ LDWORK, A, LDA, B, LDB, RWORK( IRWORK ), INFO ) + IF( INFO.NE.0 ) + $ GO TO 70 +* +* Multiply B by reciprocals of singular values +* + THR = MAX( RCOND*S( 1 ), SFMIN ) + IF( RCOND.LT.ZERO ) + $ THR = MAX( EPS*S( 1 ), SFMIN ) + RANK = 0 + DO 30 I = 1, M + IF( S( I ).GT.THR ) THEN + CALL CSRSCL( NRHS, S( I ), B( I, 1 ), LDB ) + RANK = RANK + 1 + ELSE + CALL CLASET( 'F', 1, NRHS, CZERO, CZERO, B( I, 1 ), LDB ) + END IF + 30 CONTINUE + IWORK = IL + M*LDWORK +* +* Multiply B by right singular vectors of L in WORK(IL) +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NRHS) +* (RWorkspace: none) +* + IF( LWORK.GE.LDB*NRHS+IWORK-1 .AND. NRHS.GT.1 ) THEN + CALL CGEMM( 'C', 'N', M, NRHS, M, CONE, WORK( IL ), LDWORK, + $ B, LDB, CZERO, WORK( IWORK ), LDB ) + CALL CLACPY( 'G', M, NRHS, WORK( IWORK ), LDB, B, LDB ) + ELSE IF( NRHS.GT.1 ) THEN + CHUNK = ( LWORK-IWORK+1 ) / M + DO 40 I = 1, NRHS, CHUNK + BL = MIN( NRHS-I+1, CHUNK ) + CALL CGEMM( 'C', 'N', M, BL, M, CONE, WORK( IL ), LDWORK, + $ B( 1, I ), LDB, CZERO, WORK( IWORK ), N ) + CALL CLACPY( 'G', M, BL, WORK( IWORK ), N, B( 1, I ), + $ LDB ) + 40 CONTINUE + ELSE + CALL CGEMV( 'C', M, M, CONE, WORK( IL ), LDWORK, B( 1, 1 ), + $ 1, CZERO, WORK( IWORK ), 1 ) + CALL CCOPY( M, WORK( IWORK ), 1, B( 1, 1 ), 1 ) + END IF +* +* Zero out below first M rows of B +* + CALL CLASET( 'F', N-M, NRHS, CZERO, CZERO, B( M+1, 1 ), LDB ) + IWORK = ITAU + M +* +* Multiply transpose(Q) by B +* (CWorkspace: need M+NRHS, prefer M+NHRS*NB) +* (RWorkspace: none) +* + CALL CUNMLQ( 'L', 'C', N, NRHS, M, A, LDA, WORK( ITAU ), B, + $ LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) +* + ELSE +* +* Path 2 - remaining underdetermined cases +* + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize A +* (CWorkspace: need 3*M, prefer 2*M+(M+N)*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors +* (CWorkspace: need 2*M+NRHS, prefer 2*M+NRHS*NB) +* (RWorkspace: none) +* + CALL CUNMBR( 'Q', 'L', 'C', M, NRHS, N, A, LDA, WORK( ITAUQ ), + $ B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) +* +* Generate right bidiagonalizing vectors in A +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: none) +* + CALL CUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, INFO ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, +* computing right singular vectors of A in A and +* multiplying B by transpose of left singular vectors +* (CWorkspace: none) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'L', M, N, 0, NRHS, S, RWORK( IE ), A, LDA, VDUM, + $ 1, B, LDB, RWORK( IRWORK ), INFO ) + IF( INFO.NE.0 ) + $ GO TO 70 +* +* Multiply B by reciprocals of singular values +* + THR = MAX( RCOND*S( 1 ), SFMIN ) + IF( RCOND.LT.ZERO ) + $ THR = MAX( EPS*S( 1 ), SFMIN ) + RANK = 0 + DO 50 I = 1, M + IF( S( I ).GT.THR ) THEN + CALL CSRSCL( NRHS, S( I ), B( I, 1 ), LDB ) + RANK = RANK + 1 + ELSE + CALL CLASET( 'F', 1, NRHS, CZERO, CZERO, B( I, 1 ), LDB ) + END IF + 50 CONTINUE +* +* Multiply B by right singular vectors of A +* (CWorkspace: need N, prefer N*NRHS) +* (RWorkspace: none) +* + IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN + CALL CGEMM( 'C', 'N', N, NRHS, M, CONE, A, LDA, B, LDB, + $ CZERO, WORK, LDB ) + CALL CLACPY( 'G', N, NRHS, WORK, LDB, B, LDB ) + ELSE IF( NRHS.GT.1 ) THEN + CHUNK = LWORK / N + DO 60 I = 1, NRHS, CHUNK + BL = MIN( NRHS-I+1, CHUNK ) + CALL CGEMM( 'C', 'N', N, BL, M, CONE, A, LDA, B( 1, I ), + $ LDB, CZERO, WORK, N ) + CALL CLACPY( 'F', N, BL, WORK, N, B( 1, I ), LDB ) + 60 CONTINUE + ELSE + CALL CGEMV( 'C', M, N, CONE, A, LDA, B, 1, CZERO, WORK, 1 ) + CALL CCOPY( N, WORK, 1, B, 1 ) + END IF + END IF +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO ) + CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO ) + CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL CLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL CLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) + END IF + 70 CONTINUE + WORK( 1 ) = MAXWRK + RETURN +* +* End of CGELSS +* + END diff --git a/costa/native/external/lapack/cgelsx.f b/costa/native/external/lapack/cgelsx.f new file mode 100644 index 000000000..748954179 --- /dev/null +++ b/costa/native/external/lapack/cgelsx.f @@ -0,0 +1,358 @@ + SUBROUTINE CGELSX( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK, + $ WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, M, N, NRHS, RANK + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + REAL RWORK( * ) + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* This routine is deprecated and has been replaced by routine CGELSY. +* +* CGELSX computes the minimum-norm solution to a complex linear least +* squares problem: +* minimize || A * X - B || +* using a complete orthogonal factorization of A. A is an M-by-N +* matrix which may be rank-deficient. +* +* Several right hand side vectors b and solution vectors x can be +* handled in a single call; they are stored as the columns of the +* M-by-NRHS right hand side matrix B and the N-by-NRHS solution +* matrix X. +* +* The routine first computes a QR factorization with column pivoting: +* A * P = Q * [ R11 R12 ] +* [ 0 R22 ] +* with R11 defined as the largest leading submatrix whose estimated +* condition number is less than 1/RCOND. The order of R11, RANK, +* is the effective rank of A. +* +* Then, R22 is considered to be negligible, and R12 is annihilated +* by unitary transformations from the right, arriving at the +* complete orthogonal factorization: +* A * P = Q * [ T11 0 ] * Z +* [ 0 0 ] +* The minimum-norm solution is then +* X = P * Z' [ inv(T11)*Q1'*B ] +* [ 0 ] +* where Q1 consists of the first RANK columns of Q. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of +* columns of matrices B and X. NRHS >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, A has been overwritten by details of its +* complete orthogonal factorization. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the M-by-NRHS right hand side matrix B. +* On exit, the N-by-NRHS solution matrix X. +* If m >= n and RANK = n, the residual sum-of-squares for +* the solution in the i-th column is given by the sum of +* squares of elements N+1:M in that column. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,M,N). +* +* JPVT (input/output) INTEGER array, dimension (N) +* On entry, if JPVT(i) .ne. 0, the i-th column of A is an +* initial column, otherwise it is a free column. Before +* the QR factorization of A, all initial columns are +* permuted to the leading positions; only the remaining +* free columns are moved as a result of column pivoting +* during the factorization. +* On exit, if JPVT(i) = k, then the i-th column of A*P +* was the k-th column of A. +* +* RCOND (input) REAL +* RCOND is used to determine the effective rank of A, which +* is defined as the order of the largest leading triangular +* submatrix R11 in the QR factorization with pivoting of A, +* whose estimated condition number < 1/RCOND. +* +* RANK (output) INTEGER +* The effective rank of A, i.e., the order of the submatrix +* R11. This is the same as the order of the submatrix T11 +* in the complete orthogonal factorization of A. +* +* WORK (workspace) COMPLEX array, dimension +* (min(M,N) + max( N, 2*min(M,N)+NRHS )), +* +* RWORK (workspace) REAL array, dimension (2*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + INTEGER IMAX, IMIN + PARAMETER ( IMAX = 1, IMIN = 2 ) + REAL ZERO, ONE, DONE, NTDONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, DONE = ZERO, + $ NTDONE = ONE ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, IASCL, IBSCL, ISMAX, ISMIN, J, K, MN + REAL ANRM, BIGNUM, BNRM, SMAX, SMAXPR, SMIN, SMINPR, + $ SMLNUM + COMPLEX C1, C2, S1, S2, T1, T2 +* .. +* .. External Subroutines .. + EXTERNAL CGEQPF, CLAIC1, CLASCL, CLASET, CLATZM, CTRSM, + $ CTZRQF, CUNM2R, SLABAD, XERBLA +* .. +* .. External Functions .. + REAL CLANGE, SLAMCH + EXTERNAL CLANGE, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CONJG, MAX, MIN +* .. +* .. Executable Statements .. +* + MN = MIN( M, N ) + ISMIN = MN + 1 + ISMAX = 2*MN + 1 +* +* Test the input arguments. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -7 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGELSX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, NRHS ).EQ.0 ) THEN + RANK = 0 + RETURN + END IF +* +* Get machine parameters +* + SMLNUM = SLAMCH( 'S' ) / SLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max elements outside range [SMLNUM,BIGNUM] +* + ANRM = CLANGE( 'M', M, N, A, LDA, RWORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL CLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + RANK = 0 + GO TO 100 + END IF +* + BNRM = CLANGE( 'M', M, NRHS, B, LDB, RWORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL CLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL CLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 2 + END IF +* +* Compute QR factorization with column pivoting of A: +* A * P = Q * R +* + CALL CGEQPF( M, N, A, LDA, JPVT, WORK( 1 ), WORK( MN+1 ), RWORK, + $ INFO ) +* +* complex workspace MN+N. Real workspace 2*N. Details of Householder +* rotations stored in WORK(1:MN). +* +* Determine RANK using incremental condition estimation +* + WORK( ISMIN ) = CONE + WORK( ISMAX ) = CONE + SMAX = ABS( A( 1, 1 ) ) + SMIN = SMAX + IF( ABS( A( 1, 1 ) ).EQ.ZERO ) THEN + RANK = 0 + CALL CLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + GO TO 100 + ELSE + RANK = 1 + END IF +* + 10 CONTINUE + IF( RANK.LT.MN ) THEN + I = RANK + 1 + CALL CLAIC1( IMIN, RANK, WORK( ISMIN ), SMIN, A( 1, I ), + $ A( I, I ), SMINPR, S1, C1 ) + CALL CLAIC1( IMAX, RANK, WORK( ISMAX ), SMAX, A( 1, I ), + $ A( I, I ), SMAXPR, S2, C2 ) +* + IF( SMAXPR*RCOND.LE.SMINPR ) THEN + DO 20 I = 1, RANK + WORK( ISMIN+I-1 ) = S1*WORK( ISMIN+I-1 ) + WORK( ISMAX+I-1 ) = S2*WORK( ISMAX+I-1 ) + 20 CONTINUE + WORK( ISMIN+RANK ) = C1 + WORK( ISMAX+RANK ) = C2 + SMIN = SMINPR + SMAX = SMAXPR + RANK = RANK + 1 + GO TO 10 + END IF + END IF +* +* Logically partition R = [ R11 R12 ] +* [ 0 R22 ] +* where R11 = R(1:RANK,1:RANK) +* +* [R11,R12] = [ T11, 0 ] * Y +* + IF( RANK.LT.N ) + $ CALL CTZRQF( RANK, N, A, LDA, WORK( MN+1 ), INFO ) +* +* Details of Householder rotations stored in WORK(MN+1:2*MN) +* +* B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS) +* + CALL CUNM2R( 'Left', 'Conjugate transpose', M, NRHS, MN, A, LDA, + $ WORK( 1 ), B, LDB, WORK( 2*MN+1 ), INFO ) +* +* workspace NRHS +* +* B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS) +* + CALL CTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', RANK, + $ NRHS, CONE, A, LDA, B, LDB ) +* + DO 40 I = RANK + 1, N + DO 30 J = 1, NRHS + B( I, J ) = CZERO + 30 CONTINUE + 40 CONTINUE +* +* B(1:N,1:NRHS) := Y' * B(1:N,1:NRHS) +* + IF( RANK.LT.N ) THEN + DO 50 I = 1, RANK + CALL CLATZM( 'Left', N-RANK+1, NRHS, A( I, RANK+1 ), LDA, + $ CONJG( WORK( MN+I ) ), B( I, 1 ), + $ B( RANK+1, 1 ), LDB, WORK( 2*MN+1 ) ) + 50 CONTINUE + END IF +* +* workspace NRHS +* +* B(1:N,1:NRHS) := P * B(1:N,1:NRHS) +* + DO 90 J = 1, NRHS + DO 60 I = 1, N + WORK( 2*MN+I ) = NTDONE + 60 CONTINUE + DO 80 I = 1, N + IF( WORK( 2*MN+I ).EQ.NTDONE ) THEN + IF( JPVT( I ).NE.I ) THEN + K = I + T1 = B( K, J ) + T2 = B( JPVT( K ), J ) + 70 CONTINUE + B( JPVT( K ), J ) = T1 + WORK( 2*MN+K ) = DONE + T1 = T2 + K = JPVT( K ) + T2 = B( JPVT( K ), J ) + IF( JPVT( K ).NE.I ) + $ GO TO 70 + B( I, J ) = T1 + WORK( 2*MN+K ) = DONE + END IF + END IF + 80 CONTINUE + 90 CONTINUE +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO ) + CALL CLASCL( 'U', 0, 0, SMLNUM, ANRM, RANK, RANK, A, LDA, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO ) + CALL CLASCL( 'U', 0, 0, BIGNUM, ANRM, RANK, RANK, A, LDA, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL CLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL CLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) + END IF +* + 100 CONTINUE +* + RETURN +* +* End of CGELSX +* + END diff --git a/costa/native/external/lapack/cgelsy.f b/costa/native/external/lapack/cgelsy.f new file mode 100644 index 000000000..a4b96de7b --- /dev/null +++ b/costa/native/external/lapack/cgelsy.f @@ -0,0 +1,386 @@ + SUBROUTINE CGELSY( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK, + $ WORK, LWORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + REAL RWORK( * ) + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGELSY computes the minimum-norm solution to a complex linear least +* squares problem: +* minimize || A * X - B || +* using a complete orthogonal factorization of A. A is an M-by-N +* matrix which may be rank-deficient. +* +* Several right hand side vectors b and solution vectors x can be +* handled in a single call; they are stored as the columns of the +* M-by-NRHS right hand side matrix B and the N-by-NRHS solution +* matrix X. +* +* The routine first computes a QR factorization with column pivoting: +* A * P = Q * [ R11 R12 ] +* [ 0 R22 ] +* with R11 defined as the largest leading submatrix whose estimated +* condition number is less than 1/RCOND. The order of R11, RANK, +* is the effective rank of A. +* +* Then, R22 is considered to be negligible, and R12 is annihilated +* by unitary transformations from the right, arriving at the +* complete orthogonal factorization: +* A * P = Q * [ T11 0 ] * Z +* [ 0 0 ] +* The minimum-norm solution is then +* X = P * Z' [ inv(T11)*Q1'*B ] +* [ 0 ] +* where Q1 consists of the first RANK columns of Q. +* +* This routine is basically identical to the original xGELSX except +* three differences: +* o The permutation of matrix B (the right hand side) is faster and +* more simple. +* o The call to the subroutine xGEQPF has been substituted by the +* the call to the subroutine xGEQP3. This subroutine is a Blas-3 +* version of the QR factorization with column pivoting. +* o Matrix B (the right hand side) is updated with Blas-3. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of +* columns of matrices B and X. NRHS >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, A has been overwritten by details of its +* complete orthogonal factorization. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the M-by-NRHS right hand side matrix B. +* On exit, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,M,N). +* +* JPVT (input/output) INTEGER array, dimension (N) +* On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted +* to the front of AP, otherwise column i is a free column. +* On exit, if JPVT(i) = k, then the i-th column of A*P +* was the k-th column of A. +* +* RCOND (input) REAL +* RCOND is used to determine the effective rank of A, which +* is defined as the order of the largest leading triangular +* submatrix R11 in the QR factorization with pivoting of A, +* whose estimated condition number < 1/RCOND. +* +* RANK (output) INTEGER +* The effective rank of A, i.e., the order of the submatrix +* R11. This is the same as the order of the submatrix T11 +* in the complete orthogonal factorization of A. +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* The unblocked strategy requires that: +* LWORK >= MN + MAX( 2*MN, N+1, MN+NRHS ) +* where MN = min(M,N). +* The block algorithm requires that: +* LWORK >= MN + MAX( 2*MN, NB*(N+1), MN+MN*NB, MN+NB*NRHS ) +* where NB is an upper bound on the blocksize returned +* by ILAENV for the routines CGEQP3, CTZRZF, CTZRQF, CUNMQR, +* and CUNMRZ. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) REAL array, dimension (2*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* E. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +* G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +* +* ===================================================================== +* +* .. Parameters .. + INTEGER IMAX, IMIN + PARAMETER ( IMAX = 1, IMIN = 2 ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IASCL, IBSCL, ISMAX, ISMIN, J, LWKOPT, MN, + $ NB, NB1, NB2, NB3, NB4 + REAL ANRM, BIGNUM, BNRM, SMAX, SMAXPR, SMIN, SMINPR, + $ SMLNUM, WSIZE + COMPLEX C1, C2, S1, S2 +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGEQP3, CLAIC1, CLASCL, CLASET, CTRSM, + $ CTZRZF, CUNMQR, CUNMRZ, SLABAD, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + REAL CLANGE, SLAMCH + EXTERNAL CLANGE, ILAENV, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, REAL, CMPLX +* .. +* .. Executable Statements .. +* + MN = MIN( M, N ) + ISMIN = MN + 1 + ISMAX = 2*MN + 1 +* +* Test the input arguments. +* + INFO = 0 + NB1 = ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) + NB2 = ILAENV( 1, 'CGERQF', ' ', M, N, -1, -1 ) + NB3 = ILAENV( 1, 'CUNMQR', ' ', M, N, NRHS, -1 ) + NB4 = ILAENV( 1, 'CUNMRQ', ' ', M, N, NRHS, -1 ) + NB = MAX( NB1, NB2, NB3, NB4 ) + LWKOPT = MAX( 1, MN+2*N+NB*(N+1), 2*MN+NB*NRHS ) + WORK( 1 ) = CMPLX( LWKOPT ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -7 + ELSE IF( LWORK.LT.( MN+MAX( 2*MN, N+1, MN+NRHS ) ) .AND. + $ .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGELSY', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, NRHS ).EQ.0 ) THEN + RANK = 0 + RETURN + END IF +* +* Get machine parameters +* + SMLNUM = SLAMCH( 'S' ) / SLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max entries outside range [SMLNUM,BIGNUM] +* + ANRM = CLANGE( 'M', M, N, A, LDA, RWORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL CLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + RANK = 0 + GO TO 70 + END IF +* + BNRM = CLANGE( 'M', M, NRHS, B, LDB, RWORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL CLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL CLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 2 + END IF +* +* Compute QR factorization with column pivoting of A: +* A * P = Q * R +* + CALL CGEQP3( M, N, A, LDA, JPVT, WORK( 1 ), WORK( MN+1 ), + $ LWORK-MN, RWORK, INFO ) + WSIZE = MN + REAL( WORK( MN+1 ) ) +* +* complex workspace: MN+NB*(N+1). real workspace 2*N. +* Details of Householder rotations stored in WORK(1:MN). +* +* Determine RANK using incremental condition estimation +* + WORK( ISMIN ) = CONE + WORK( ISMAX ) = CONE + SMAX = ABS( A( 1, 1 ) ) + SMIN = SMAX + IF( ABS( A( 1, 1 ) ).EQ.ZERO ) THEN + RANK = 0 + CALL CLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + GO TO 70 + ELSE + RANK = 1 + END IF +* + 10 CONTINUE + IF( RANK.LT.MN ) THEN + I = RANK + 1 + CALL CLAIC1( IMIN, RANK, WORK( ISMIN ), SMIN, A( 1, I ), + $ A( I, I ), SMINPR, S1, C1 ) + CALL CLAIC1( IMAX, RANK, WORK( ISMAX ), SMAX, A( 1, I ), + $ A( I, I ), SMAXPR, S2, C2 ) +* + IF( SMAXPR*RCOND.LE.SMINPR ) THEN + DO 20 I = 1, RANK + WORK( ISMIN+I-1 ) = S1*WORK( ISMIN+I-1 ) + WORK( ISMAX+I-1 ) = S2*WORK( ISMAX+I-1 ) + 20 CONTINUE + WORK( ISMIN+RANK ) = C1 + WORK( ISMAX+RANK ) = C2 + SMIN = SMINPR + SMAX = SMAXPR + RANK = RANK + 1 + GO TO 10 + END IF + END IF +* +* complex workspace: 3*MN. +* +* Logically partition R = [ R11 R12 ] +* [ 0 R22 ] +* where R11 = R(1:RANK,1:RANK) +* +* [R11,R12] = [ T11, 0 ] * Y +* + IF( RANK.LT.N ) + $ CALL CTZRZF( RANK, N, A, LDA, WORK( MN+1 ), WORK( 2*MN+1 ), + $ LWORK-2*MN, INFO ) +* +* complex workspace: 2*MN. +* Details of Householder rotations stored in WORK(MN+1:2*MN) +* +* B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS) +* + CALL CUNMQR( 'Left', 'Conjugate transpose', M, NRHS, MN, A, LDA, + $ WORK( 1 ), B, LDB, WORK( 2*MN+1 ), LWORK-2*MN, INFO ) + WSIZE = MAX( WSIZE, 2*MN+REAL( WORK( 2*MN+1 ) ) ) +* +* complex workspace: 2*MN+NB*NRHS. +* +* B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS) +* + CALL CTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', RANK, + $ NRHS, CONE, A, LDA, B, LDB ) +* + DO 40 J = 1, NRHS + DO 30 I = RANK + 1, N + B( I, J ) = CZERO + 30 CONTINUE + 40 CONTINUE +* +* B(1:N,1:NRHS) := Y' * B(1:N,1:NRHS) +* + IF( RANK.LT.N ) THEN + CALL CUNMRZ( 'Left', 'Conjugate transpose', N, NRHS, RANK, + $ N-RANK, A, LDA, WORK( MN+1 ), B, LDB, + $ WORK( 2*MN+1 ), LWORK-2*MN, INFO ) + END IF +* +* complex workspace: 2*MN+NRHS. +* +* B(1:N,1:NRHS) := P * B(1:N,1:NRHS) +* + DO 60 J = 1, NRHS + DO 50 I = 1, N + WORK( JPVT( I ) ) = B( I, J ) + 50 CONTINUE + CALL CCOPY( N, WORK( 1 ), 1, B( 1, J ), 1 ) + 60 CONTINUE +* +* complex workspace: N. +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO ) + CALL CLASCL( 'U', 0, 0, SMLNUM, ANRM, RANK, RANK, A, LDA, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO ) + CALL CLASCL( 'U', 0, 0, BIGNUM, ANRM, RANK, RANK, A, LDA, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL CLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL CLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) + END IF +* + 70 CONTINUE + WORK( 1 ) = CMPLX( LWKOPT ) +* + RETURN +* +* End of CGELSY +* + END diff --git a/costa/native/external/lapack/cgeql2.f b/costa/native/external/lapack/cgeql2.f new file mode 100644 index 000000000..56121e1f6 --- /dev/null +++ b/costa/native/external/lapack/cgeql2.f @@ -0,0 +1,122 @@ + SUBROUTINE CGEQL2( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGEQL2 computes a QL factorization of a complex m by n matrix A: +* A = Q * L. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the m by n matrix A. +* On exit, if m >= n, the lower triangle of the subarray +* A(m-n+1:m,1:n) contains the n by n lower triangular matrix L; +* if m <= n, the elements on and below the (n-m)-th +* superdiagonal contain the m by n lower trapezoidal matrix L; +* the remaining elements, with the array TAU, represent the +* unitary matrix Q as a product of elementary reflectors +* (see Further Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) COMPLEX array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace) COMPLEX array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(k) . . . H(2) H(1), where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in +* A(1:m-k+i-1,n-k+i), and tau in TAU(i). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, K + COMPLEX ALPHA +* .. +* .. External Subroutines .. + EXTERNAL CLARF, CLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEQL2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = K, 1, -1 +* +* Generate elementary reflector H(i) to annihilate +* A(1:m-k+i-1,n-k+i) +* + ALPHA = A( M-K+I, N-K+I ) + CALL CLARFG( M-K+I, ALPHA, A( 1, N-K+I ), 1, TAU( I ) ) +* +* Apply H(i)' to A(1:m-k+i,1:n-k+i-1) from the left +* + A( M-K+I, N-K+I ) = ONE + CALL CLARF( 'Left', M-K+I, N-K+I-1, A( 1, N-K+I ), 1, + $ CONJG( TAU( I ) ), A, LDA, WORK ) + A( M-K+I, N-K+I ) = ALPHA + 10 CONTINUE + RETURN +* +* End of CGEQL2 +* + END diff --git a/costa/native/external/lapack/cgeqlf.f b/costa/native/external/lapack/cgeqlf.f new file mode 100644 index 000000000..88c05d26e --- /dev/null +++ b/costa/native/external/lapack/cgeqlf.f @@ -0,0 +1,205 @@ + SUBROUTINE CGEQLF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGEQLF computes a QL factorization of a complex M-by-N matrix A: +* A = Q * L. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, +* if m >= n, the lower triangle of the subarray +* A(m-n+1:m,1:n) contains the N-by-N lower triangular matrix L; +* if m <= n, the elements on and below the (n-m)-th +* superdiagonal contain the M-by-N lower trapezoidal matrix L; +* the remaining elements, with the array TAU, represent the +* unitary matrix Q as a product of elementary reflectors +* (see Further Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) COMPLEX array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N). +* For optimum performance LWORK >= N*NB, where NB is +* the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(k) . . . H(2) H(1), where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in +* A(1:m-k+i-1,n-k+i), and tau in TAU(i). +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, KI, KK, LDWORK, LWKOPT, + $ MU, NB, NBMIN, NU, NX +* .. +* .. External Subroutines .. + EXTERNAL CGEQL2, CLARFB, CLARFT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'CGEQLF', ' ', M, N, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEQLF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 1 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'CGEQLF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CGEQLF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially. +* The last kk columns are handled by the block method. +* + KI = ( ( K-NX-1 ) / NB )*NB + KK = MIN( K, KI+NB ) +* + DO 10 I = K - KK + KI + 1, K - KK + 1, -NB + IB = MIN( K-I+1, NB ) +* +* Compute the QL factorization of the current block +* A(1:m-k+i+ib-1,n-k+i:n-k+i+ib-1) +* + CALL CGEQL2( M-K+I+IB-1, IB, A( 1, N-K+I ), LDA, TAU( I ), + $ WORK, IINFO ) + IF( N-K+I.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL CLARFT( 'Backward', 'Columnwise', M-K+I+IB-1, IB, + $ A( 1, N-K+I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H' to A(1:m-k+i+ib-1,1:n-k+i-1) from the left +* + CALL CLARFB( 'Left', 'Conjugate transpose', 'Backward', + $ 'Columnwise', M-K+I+IB-1, N-K+I-1, IB, + $ A( 1, N-K+I ), LDA, WORK, LDWORK, A, LDA, + $ WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + MU = M - K + I + NB - 1 + NU = N - K + I + NB - 1 + ELSE + MU = M + NU = N + END IF +* +* Use unblocked code to factor the last or only block +* + IF( MU.GT.0 .AND. NU.GT.0 ) + $ CALL CGEQL2( MU, NU, A, LDA, TAU, WORK, IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of CGEQLF +* + END diff --git a/costa/native/external/lapack/cgeqp3.f b/costa/native/external/lapack/cgeqp3.f new file mode 100644 index 000000000..44c43e6b1 --- /dev/null +++ b/costa/native/external/lapack/cgeqp3.f @@ -0,0 +1,285 @@ + SUBROUTINE CGEQP3( M, N, A, LDA, JPVT, TAU, WORK, LWORK, RWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + REAL RWORK( * ) + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGEQP3 computes a QR factorization with column pivoting of a +* matrix A: A*P = Q*R using Level 3 BLAS. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, the upper triangle of the array contains the +* min(M,N)-by-N upper trapezoidal matrix R; the elements below +* the diagonal, together with the array TAU, represent the +* unitary matrix Q as a product of min(M,N) elementary +* reflectors. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* JPVT (input/output) INTEGER array, dimension (N) +* On entry, if JPVT(J).ne.0, the J-th column of A is permuted +* to the front of A*P (a leading column); if JPVT(J)=0, +* the J-th column of A is a free column. +* On exit, if JPVT(J)=K, then the J-th column of A*P was the +* the K-th column of A. +* +* TAU (output) COMPLEX array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors. +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO=0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= N+1. +* For optimal performance LWORK >= ( N+1 )*NB, where NB +* is the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) REAL array, dimension (2*N) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(1) H(2) . . . H(k), where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real/complex scalar, and v is a real/complex vector +* with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in +* A(i+1:m,i), and tau in TAU(i). +* +* Based on contributions by +* G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +* X. Sun, Computer Science Dept., Duke University, USA +* +* ===================================================================== +* +* .. Parameters .. + INTEGER INB, INBMIN, IXOVER + PARAMETER ( INB = 1, INBMIN = 2, IXOVER = 3 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER FJB, IWS, J, JB, LWKOPT, MINMN, MINWS, NA, NB, + $ NBMIN, NFXD, NX, SM, SMINMN, SN, TOPBMN +* .. +* .. External Subroutines .. + EXTERNAL CGEQRF, CLAQP2, CLAQPS, CSWAP, CUNMQR, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + REAL SCNRM2 + EXTERNAL ILAENV, SCNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* + IWS = N + 1 + MINMN = MIN( M, N ) +* +* Test input arguments +* ==================== +* + INFO = 0 + NB = ILAENV( INB, 'CGEQRF', ' ', M, N, -1, -1 ) + LWKOPT = ( N+1 )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( ( LWORK.LT.IWS ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEQP3', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible. +* + IF( MINMN.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Move initial columns up front. +* + NFXD = 1 + DO 10 J = 1, N + IF( JPVT( J ).NE.0 ) THEN + IF( J.NE.NFXD ) THEN + CALL CSWAP( M, A( 1, J ), 1, A( 1, NFXD ), 1 ) + JPVT( J ) = JPVT( NFXD ) + JPVT( NFXD ) = J + ELSE + JPVT( J ) = J + END IF + NFXD = NFXD + 1 + ELSE + JPVT( J ) = J + END IF + 10 CONTINUE + NFXD = NFXD - 1 +* +* Factorize fixed columns +* ======================= +* +* Compute the QR factorization of fixed columns and update +* remaining columns. +* + IF( NFXD.GT.0 ) THEN + NA = MIN( M, NFXD ) +*CC CALL CGEQR2( M, NA, A, LDA, TAU, WORK, INFO ) + CALL CGEQRF( M, NA, A, LDA, TAU, WORK, LWORK, INFO ) + IWS = MAX( IWS, INT( WORK( 1 ) ) ) + IF( NA.LT.N ) THEN +*CC CALL CUNM2R( 'Left', 'Conjugate Transpose', M, N-NA, +*CC $ NA, A, LDA, TAU, A( 1, NA+1 ), LDA, WORK, +*CC $ INFO ) + CALL CUNMQR( 'Left', 'Conjugate Transpose', M, N-NA, NA, A, + $ LDA, TAU, A( 1, NA+1 ), LDA, WORK, LWORK, + $ INFO ) + IWS = MAX( IWS, INT( WORK( 1 ) ) ) + END IF + END IF +* +* Factorize free columns +* ====================== +* + IF( NFXD.LT.MINMN ) THEN +* + SM = M - NFXD + SN = N - NFXD + SMINMN = MINMN - NFXD +* +* Determine the block size. +* + NB = ILAENV( INB, 'CGEQRF', ' ', SM, SN, -1, -1 ) + NBMIN = 2 + NX = 0 +* + IF( ( NB.GT.1 ) .AND. ( NB.LT.SMINMN ) ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( IXOVER, 'CGEQRF', ' ', SM, SN, -1, + $ -1 ) ) +* +* + IF( NX.LT.SMINMN ) THEN +* +* Determine if workspace is large enough for blocked code. +* + MINWS = ( SN+1 )*NB + IWS = MAX( IWS, MINWS ) + IF( LWORK.LT.MINWS ) THEN +* +* Not enough workspace to use optimal NB: Reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / ( SN+1 ) + NBMIN = MAX( 2, ILAENV( INBMIN, 'CGEQRF', ' ', SM, SN, + $ -1, -1 ) ) +* +* + END IF + END IF + END IF +* +* Initialize partial column norms. The first N elements of work +* store the exact column norms. +* + DO 20 J = NFXD + 1, N + RWORK( J ) = SCNRM2( SM, A( NFXD+1, J ), 1 ) + RWORK( N+J ) = RWORK( J ) + 20 CONTINUE +* + IF( ( NB.GE.NBMIN ) .AND. ( NB.LT.SMINMN ) .AND. + $ ( NX.LT.SMINMN ) ) THEN +* +* Use blocked code initially. +* + J = NFXD + 1 +* +* Compute factorization: while loop. +* +* + TOPBMN = MINMN - NX + 30 CONTINUE + IF( J.LE.TOPBMN ) THEN + JB = MIN( NB, TOPBMN-J+1 ) +* +* Factorize JB columns among columns J:N. +* + CALL CLAQPS( M, N-J+1, J-1, JB, FJB, A( 1, J ), LDA, + $ JPVT( J ), TAU( J ), RWORK( J ), + $ RWORK( N+J ), WORK( 1 ), WORK( JB+1 ), + $ N-J+1 ) +* + J = J + FJB + GO TO 30 + END IF + ELSE + J = NFXD + 1 + END IF +* +* Use unblocked code to factor the last or only block. +* +* + IF( J.LE.MINMN ) + $ CALL CLAQP2( M, N-J+1, J-1, A( 1, J ), LDA, JPVT( J ), + $ TAU( J ), RWORK( J ), RWORK( N+J ), WORK( 1 ) ) +* + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of CGEQP3 +* + END diff --git a/costa/native/external/lapack/cgeqpf.f b/costa/native/external/lapack/cgeqpf.f new file mode 100644 index 000000000..5836e5503 --- /dev/null +++ b/costa/native/external/lapack/cgeqpf.f @@ -0,0 +1,225 @@ + SUBROUTINE CGEQPF( M, N, A, LDA, JPVT, TAU, WORK, RWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + REAL RWORK( * ) + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* This routine is deprecated and has been replaced by routine CGEQP3. +* +* CGEQPF computes a QR factorization with column pivoting of a +* complex M-by-N matrix A: A*P = Q*R. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0 +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, the upper triangle of the array contains the +* min(M,N)-by-N upper triangular matrix R; the elements +* below the diagonal, together with the array TAU, +* represent the unitary matrix Q as a product of +* min(m,n) elementary reflectors. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* JPVT (input/output) INTEGER array, dimension (N) +* On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted +* to the front of A*P (a leading column); if JPVT(i) = 0, +* the i-th column of A is a free column. +* On exit, if JPVT(i) = k, then the i-th column of A*P +* was the k-th column of A. +* +* TAU (output) COMPLEX array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors. +* +* WORK (workspace) COMPLEX array, dimension (N) +* +* RWORK (workspace) REAL array, dimension (2*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(1) H(2) . . . H(n) +* +* Each H(i) has the form +* +* H = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i). +* +* The matrix P is represented in jpvt as follows: If +* jpvt(j) = i +* then the jth column of P is the ith canonical unit vector. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, ITEMP, J, MA, MN, PVT + REAL TEMP, TEMP2 + COMPLEX AII +* .. +* .. External Subroutines .. + EXTERNAL CGEQR2, CLARF, CLARFG, CSWAP, CUNM2R, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CMPLX, CONJG, MAX, MIN, SQRT +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SCNRM2 + EXTERNAL ISAMAX, SCNRM2 +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEQPF', -INFO ) + RETURN + END IF +* + MN = MIN( M, N ) +* +* Move initial columns up front +* + ITEMP = 1 + DO 10 I = 1, N + IF( JPVT( I ).NE.0 ) THEN + IF( I.NE.ITEMP ) THEN + CALL CSWAP( M, A( 1, I ), 1, A( 1, ITEMP ), 1 ) + JPVT( I ) = JPVT( ITEMP ) + JPVT( ITEMP ) = I + ELSE + JPVT( I ) = I + END IF + ITEMP = ITEMP + 1 + ELSE + JPVT( I ) = I + END IF + 10 CONTINUE + ITEMP = ITEMP - 1 +* +* Compute the QR factorization and update remaining columns +* + IF( ITEMP.GT.0 ) THEN + MA = MIN( ITEMP, M ) + CALL CGEQR2( M, MA, A, LDA, TAU, WORK, INFO ) + IF( MA.LT.N ) THEN + CALL CUNM2R( 'Left', 'Conjugate transpose', M, N-MA, MA, A, + $ LDA, TAU, A( 1, MA+1 ), LDA, WORK, INFO ) + END IF + END IF +* + IF( ITEMP.LT.MN ) THEN +* +* Initialize partial column norms. The first n elements of +* work store the exact column norms. +* + DO 20 I = ITEMP + 1, N + RWORK( I ) = SCNRM2( M-ITEMP, A( ITEMP+1, I ), 1 ) + RWORK( N+I ) = RWORK( I ) + 20 CONTINUE +* +* Compute factorization +* + DO 40 I = ITEMP + 1, MN +* +* Determine ith pivot column and swap if necessary +* + PVT = ( I-1 ) + ISAMAX( N-I+1, RWORK( I ), 1 ) +* + IF( PVT.NE.I ) THEN + CALL CSWAP( M, A( 1, PVT ), 1, A( 1, I ), 1 ) + ITEMP = JPVT( PVT ) + JPVT( PVT ) = JPVT( I ) + JPVT( I ) = ITEMP + RWORK( PVT ) = RWORK( I ) + RWORK( N+PVT ) = RWORK( N+I ) + END IF +* +* Generate elementary reflector H(i) +* + AII = A( I, I ) + CALL CLARFG( M-I+1, AII, A( MIN( I+1, M ), I ), 1, + $ TAU( I ) ) + A( I, I ) = AII +* + IF( I.LT.N ) THEN +* +* Apply H(i) to A(i:m,i+1:n) from the left +* + AII = A( I, I ) + A( I, I ) = CMPLX( ONE ) + CALL CLARF( 'Left', M-I+1, N-I, A( I, I ), 1, + $ CONJG( TAU( I ) ), A( I, I+1 ), LDA, WORK ) + A( I, I ) = AII + END IF +* +* Update partial column norms +* + DO 30 J = I + 1, N + IF( RWORK( J ).NE.ZERO ) THEN + TEMP = ONE - ( ABS( A( I, J ) ) / RWORK( J ) )**2 + TEMP = MAX( TEMP, ZERO ) + TEMP2 = ONE + 0.05*TEMP*( RWORK( J ) / RWORK( N+J ) ) + $ **2 + IF( TEMP2.EQ.ONE ) THEN + IF( M-I.GT.0 ) THEN + RWORK( J ) = SCNRM2( M-I, A( I+1, J ), 1 ) + RWORK( N+J ) = RWORK( J ) + ELSE + RWORK( J ) = ZERO + RWORK( N+J ) = ZERO + END IF + ELSE + RWORK( J ) = RWORK( J )*SQRT( TEMP ) + END IF + END IF + 30 CONTINUE +* + 40 CONTINUE + END IF + RETURN +* +* End of CGEQPF +* + END diff --git a/costa/native/external/lapack/cgeqr2.f b/costa/native/external/lapack/cgeqr2.f new file mode 100644 index 000000000..b06c32e49 --- /dev/null +++ b/costa/native/external/lapack/cgeqr2.f @@ -0,0 +1,122 @@ + SUBROUTINE CGEQR2( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGEQR2 computes a QR factorization of a complex m by n matrix A: +* A = Q * R. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the m by n matrix A. +* On exit, the elements on and above the diagonal of the array +* contain the min(m,n) by n upper trapezoidal matrix R (R is +* upper triangular if m >= n); the elements below the diagonal, +* with the array TAU, represent the unitary matrix Q as a +* product of elementary reflectors (see Further Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) COMPLEX array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace) COMPLEX array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(1) H(2) . . . H(k), where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), +* and tau in TAU(i). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, K + COMPLEX ALPHA +* .. +* .. External Subroutines .. + EXTERNAL CLARF, CLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEQR2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = 1, K +* +* Generate elementary reflector H(i) to annihilate A(i+1:m,i) +* + CALL CLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1, + $ TAU( I ) ) + IF( I.LT.N ) THEN +* +* Apply H(i)' to A(i:m,i+1:n) from the left +* + ALPHA = A( I, I ) + A( I, I ) = ONE + CALL CLARF( 'Left', M-I+1, N-I, A( I, I ), 1, + $ CONJG( TAU( I ) ), A( I, I+1 ), LDA, WORK ) + A( I, I ) = ALPHA + END IF + 10 CONTINUE + RETURN +* +* End of CGEQR2 +* + END diff --git a/costa/native/external/lapack/cgeqrf.f b/costa/native/external/lapack/cgeqrf.f new file mode 100644 index 000000000..eaf2e80df --- /dev/null +++ b/costa/native/external/lapack/cgeqrf.f @@ -0,0 +1,197 @@ + SUBROUTINE CGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGEQRF computes a QR factorization of a complex M-by-N matrix A: +* A = Q * R. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, the elements on and above the diagonal of the array +* contain the min(M,N)-by-N upper trapezoidal matrix R (R is +* upper triangular if m >= n); the elements below the diagonal, +* with the array TAU, represent the unitary matrix Q as a +* product of min(m,n) elementary reflectors (see Further +* Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) COMPLEX array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N). +* For optimum performance LWORK >= N*NB, where NB is +* the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(1) H(2) . . . H(k), where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), +* and tau in TAU(i). +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL CGEQR2, CLARFB, CLARFT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEQRF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'CGEQRF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CGEQRF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially +* + DO 10 I = 1, K - NX, NB + IB = MIN( K-I+1, NB ) +* +* Compute the QR factorization of the current block +* A(i:m,i:i+ib-1) +* + CALL CGEQR2( M-I+1, IB, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) + IF( I+IB.LE.N ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL CLARFT( 'Forward', 'Columnwise', M-I+1, IB, + $ A( I, I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H' to A(i:m,i+ib:n) from the left +* + CALL CLARFB( 'Left', 'Conjugate transpose', 'Forward', + $ 'Columnwise', M-I+1, N-I-IB+1, IB, + $ A( I, I ), LDA, WORK, LDWORK, A( I, I+IB ), + $ LDA, WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + ELSE + I = 1 + END IF +* +* Use unblocked code to factor the last or only block. +* + IF( I.LE.K ) + $ CALL CGEQR2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of CGEQRF +* + END diff --git a/costa/native/external/lapack/cgerfs.f b/costa/native/external/lapack/cgerfs.f new file mode 100644 index 000000000..6ccdff204 --- /dev/null +++ b/costa/native/external/lapack/cgerfs.f @@ -0,0 +1,341 @@ + SUBROUTINE CGERFS( TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, + $ X, LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL BERR( * ), FERR( * ), RWORK( * ) + COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* CGERFS improves the computed solution to a system of linear +* equations and provides error bounds and backward error estimates for +* the solution. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose) +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input) COMPLEX array, dimension (LDA,N) +* The original N-by-N matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* AF (input) COMPLEX array, dimension (LDAF,N) +* The factors L and U from the factorization A = P*L*U +* as computed by CGETRF. +* +* LDAF (input) INTEGER +* The leading dimension of the array AF. LDAF >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices from CGETRF; for 1<=i<=N, row i of the +* matrix was interchanged with row IPIV(i). +* +* B (input) COMPLEX array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input/output) COMPLEX array, dimension (LDX,NRHS) +* On entry, the solution matrix X, as computed by CGETRS. +* On exit, the improved solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* RWORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Internal Parameters +* =================== +* +* ITMAX is the maximum number of steps of iterative refinement. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) + REAL THREE + PARAMETER ( THREE = 3.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN + CHARACTER TRANSN, TRANST + INTEGER COUNT, I, J, K, KASE, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CGEMV, CGETRS, CLACON, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGERFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANSN = 'N' + TRANST = 'C' + ELSE + TRANSN = 'C' + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - op(A) * X, +* where op(A) = A, A**T, or A**H, depending on TRANS. +* + CALL CCOPY( N, B( 1, J ), 1, WORK, 1 ) + CALL CGEMV( TRANS, N, N, -ONE, A, LDA, X( 1, J ), 1, ONE, WORK, + $ 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(op(A))*abs(X) + abs(B). +* + IF( NOTRAN ) THEN + DO 50 K = 1, N + XK = CABS1( X( K, J ) ) + DO 40 I = 1, N + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + 40 CONTINUE + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + DO 60 I = 1, N + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 60 CONTINUE + RWORK( K ) = RWORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL CGETRS( TRANS, N, 1, AF, LDAF, IPIV, WORK, N, INFO ) + CALL CAXPY( N, ONE, WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use CLACON to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL CLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**H). +* + CALL CGETRS( TRANST, N, 1, AF, LDAF, IPIV, WORK, N, + $ INFO ) + DO 110 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 110 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 120 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 120 CONTINUE + CALL CGETRS( TRANSN, N, 1, AF, LDAF, IPIV, WORK, N, + $ INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of CGERFS +* + END diff --git a/costa/native/external/lapack/cgerq2.f b/costa/native/external/lapack/cgerq2.f new file mode 100644 index 000000000..024d633f4 --- /dev/null +++ b/costa/native/external/lapack/cgerq2.f @@ -0,0 +1,125 @@ + SUBROUTINE CGERQ2( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGERQ2 computes an RQ factorization of a complex m by n matrix A: +* A = R * Q. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the m by n matrix A. +* On exit, if m <= n, the upper triangle of the subarray +* A(1:m,n-m+1:n) contains the m by m upper triangular matrix R; +* if m >= n, the elements on and above the (m-n)-th subdiagonal +* contain the m by n upper trapezoidal matrix R; the remaining +* elements, with the array TAU, represent the unitary matrix +* Q as a product of elementary reflectors (see Further +* Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) COMPLEX array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace) COMPLEX array, dimension (M) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(1)' H(2)' . . . H(k)', where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(n-k+i+1:n) = 0 and v(n-k+i) = 1; conjg(v(1:n-k+i-1)) is stored on +* exit in A(m-k+i,1:n-k+i-1), and tau in TAU(i). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, K + COMPLEX ALPHA +* .. +* .. External Subroutines .. + EXTERNAL CLACGV, CLARF, CLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGERQ2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = K, 1, -1 +* +* Generate elementary reflector H(i) to annihilate +* A(m-k+i,1:n-k+i-1) +* + CALL CLACGV( N-K+I, A( M-K+I, 1 ), LDA ) + ALPHA = A( M-K+I, N-K+I ) + CALL CLARFG( N-K+I, ALPHA, A( M-K+I, 1 ), LDA, + $ TAU( I ) ) +* +* Apply H(i) to A(1:m-k+i-1,1:n-k+i) from the right +* + A( M-K+I, N-K+I ) = ONE + CALL CLARF( 'Right', M-K+I-1, N-K+I, A( M-K+I, 1 ), LDA, + $ TAU( I ), A, LDA, WORK ) + A( M-K+I, N-K+I ) = ALPHA + CALL CLACGV( N-K+I-1, A( M-K+I, 1 ), LDA ) + 10 CONTINUE + RETURN +* +* End of CGERQ2 +* + END diff --git a/costa/native/external/lapack/cgerqf.f b/costa/native/external/lapack/cgerqf.f new file mode 100644 index 000000000..2091a8201 --- /dev/null +++ b/costa/native/external/lapack/cgerqf.f @@ -0,0 +1,205 @@ + SUBROUTINE CGERQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGERQF computes an RQ factorization of a complex M-by-N matrix A: +* A = R * Q. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, +* if m <= n, the upper triangle of the subarray +* A(1:m,n-m+1:n) contains the M-by-M upper triangular matrix R; +* if m >= n, the elements on and above the (m-n)-th subdiagonal +* contain the M-by-N upper trapezoidal matrix R; +* the remaining elements, with the array TAU, represent the +* unitary matrix Q as a product of min(m,n) elementary +* reflectors (see Further Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) COMPLEX array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,M). +* For optimum performance LWORK >= M*NB, where NB is +* the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(1)' H(2)' . . . H(k)', where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(n-k+i+1:n) = 0 and v(n-k+i) = 1; conjg(v(1:n-k+i-1)) is stored on +* exit in A(m-k+i,1:n-k+i-1), and tau in TAU(i). +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, KI, KK, LDWORK, LWKOPT, + $ MU, NB, NBMIN, NU, NX +* .. +* .. External Subroutines .. + EXTERNAL CGERQ2, CLARFB, CLARFT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'CGERQF', ' ', M, N, -1, -1 ) + LWKOPT = M*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGERQF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 1 + IWS = M + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'CGERQF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CGERQF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially. +* The last kk rows are handled by the block method. +* + KI = ( ( K-NX-1 ) / NB )*NB + KK = MIN( K, KI+NB ) +* + DO 10 I = K - KK + KI + 1, K - KK + 1, -NB + IB = MIN( K-I+1, NB ) +* +* Compute the RQ factorization of the current block +* A(m-k+i:m-k+i+ib-1,1:n-k+i+ib-1) +* + CALL CGERQ2( IB, N-K+I+IB-1, A( M-K+I, 1 ), LDA, TAU( I ), + $ WORK, IINFO ) + IF( M-K+I.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL CLARFT( 'Backward', 'Rowwise', N-K+I+IB-1, IB, + $ A( M-K+I, 1 ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(1:m-k+i-1,1:n-k+i+ib-1) from the right +* + CALL CLARFB( 'Right', 'No transpose', 'Backward', + $ 'Rowwise', M-K+I-1, N-K+I+IB-1, IB, + $ A( M-K+I, 1 ), LDA, WORK, LDWORK, A, LDA, + $ WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + MU = M - K + I + NB - 1 + NU = N - K + I + NB - 1 + ELSE + MU = M + NU = N + END IF +* +* Use unblocked code to factor the last or only block +* + IF( MU.GT.0 .AND. NU.GT.0 ) + $ CALL CGERQ2( MU, NU, A, LDA, TAU, WORK, IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of CGERQF +* + END diff --git a/costa/native/external/lapack/cgesc2.f b/costa/native/external/lapack/cgesc2.f new file mode 100644 index 000000000..236494f56 --- /dev/null +++ b/costa/native/external/lapack/cgesc2.f @@ -0,0 +1,134 @@ + SUBROUTINE CGESC2( N, A, LDA, RHS, IPIV, JPIV, SCALE ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER LDA, N + REAL SCALE +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), JPIV( * ) + COMPLEX A( LDA, * ), RHS( * ) +* .. +* +* Purpose +* ======= +* +* CGESC2 solves a system of linear equations +* +* A * X = scale* RHS +* +* with a general N-by-N matrix A using the LU factorization with +* complete pivoting computed by CGETC2. +* +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of columns of the matrix A. +* +* A (input) COMPLEX array, dimension (LDA, N) +* On entry, the LU part of the factorization of the n-by-n +* matrix A computed by CGETC2: A = P * L * U * Q +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1, N). +* +* RHS (input/output) COMPLEX array, dimension N. +* On entry, the right hand side vector b. +* On exit, the solution vector X. +* +* IPIV (iput) INTEGER array, dimension (N). +* The pivot indices; for 1 <= i <= N, row i of the +* matrix has been interchanged with row IPIV(i). +* +* JPIV (iput) INTEGER array, dimension (N). +* The pivot indices; for 1 <= j <= N, column j of the +* matrix has been interchanged with column JPIV(j). +* +* SCALE (output) REAL +* On exit, SCALE contains the scale factor. SCALE is chosen +* 0 <= SCALE <= 1 to prevent owerflow in the solution. +* +* Further Details +* =============== +* +* Based on contributions by +* Bo Kagstrom and Peter Poromaa, Department of Computing Science, +* Umea University, S-901 87 Umea, Sweden. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL BIGNUM, EPS, SMLNUM + COMPLEX TEMP +* .. +* .. External Subroutines .. + EXTERNAL CLASWP, CSCAL, SLABAD +* .. +* .. External Functions .. + INTEGER ICAMAX + REAL SLAMCH + EXTERNAL ICAMAX, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CMPLX, REAL +* .. +* .. Executable Statements .. +* +* Set constant to control overflow +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) / EPS + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Apply permutations IPIV to RHS +* + CALL CLASWP( 1, RHS, LDA, 1, N-1, IPIV, 1 ) +* +* Solve for L part +* + DO 20 I = 1, N - 1 + DO 10 J = I + 1, N + RHS( J ) = RHS( J ) - A( J, I )*RHS( I ) + 10 CONTINUE + 20 CONTINUE +* +* Solve for U part +* + SCALE = ONE +* +* Check for scaling +* + I = ICAMAX( N, RHS, 1 ) + IF( TWO*SMLNUM*ABS( RHS( I ) ).GT.ABS( A( N, N ) ) ) THEN + TEMP = CMPLX( ONE / TWO, ZERO ) / ABS( RHS( I ) ) + CALL CSCAL( N, TEMP, RHS( 1 ), 1 ) + SCALE = SCALE*REAL( TEMP ) + END IF + DO 40 I = N, 1, -1 + TEMP = CMPLX( ONE, ZERO ) / A( I, I ) + RHS( I ) = RHS( I )*TEMP + DO 30 J = I + 1, N + RHS( I ) = RHS( I ) - RHS( J )*( A( I, J )*TEMP ) + 30 CONTINUE + 40 CONTINUE +* +* Apply permutations JPIV to the solution (RHS) +* + CALL CLASWP( 1, RHS, LDA, 1, N-1, JPIV, -1 ) + RETURN +* +* End of CGESC2 +* + END diff --git a/costa/native/external/lapack/cgesdd.f b/costa/native/external/lapack/cgesdd.f new file mode 100644 index 000000000..d9445dc94 --- /dev/null +++ b/costa/native/external/lapack/cgesdd.f @@ -0,0 +1,1950 @@ + SUBROUTINE CGESDD( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, + $ LWORK, RWORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ + INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL RWORK( * ), S( * ) + COMPLEX A( LDA, * ), U( LDU, * ), VT( LDVT, * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGESDD computes the singular value decomposition (SVD) of a complex +* M-by-N matrix A, optionally computing the left and/or right singular +* vectors, by using divide-and-conquer method. The SVD is written +* +* A = U * SIGMA * conjugate-transpose(V) +* +* where SIGMA is an M-by-N matrix which is zero except for its +* min(m,n) diagonal elements, U is an M-by-M unitary matrix, and +* V is an N-by-N unitary matrix. The diagonal elements of SIGMA +* are the singular values of A; they are real and non-negative, and +* are returned in descending order. The first min(m,n) columns of +* U and V are the left and right singular vectors of A. +* +* Note that the routine returns VT = V**H, not V. +* +* The divide and conquer algorithm makes very mild assumptions about +* floating point arithmetic. It will work on machines with a guard +* digit in add/subtract, or on those binary machines without guard +* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +* Cray-2. It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* Specifies options for computing all or part of the matrix U: +* = 'A': all M columns of U and all N rows of V**H are +* returned in the arrays U and VT; +* = 'S': the first min(M,N) columns of U and the first +* min(M,N) rows of V**H are returned in the arrays U +* and VT; +* = 'O': If M >= N, the first N columns of U are overwritten +* on the array A and all rows of V**H are returned in +* the array VT; +* otherwise, all columns of U are returned in the +* array U and the first M rows of V**H are overwritten +* in the array VT; +* = 'N': no columns of U or rows of V**H are computed. +* +* M (input) INTEGER +* The number of rows of the input matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the input matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, +* if JOBZ = 'O', A is overwritten with the first N columns +* of U (the left singular vectors, stored +* columnwise) if M >= N; +* A is overwritten with the first M rows +* of V**H (the right singular vectors, stored +* rowwise) otherwise. +* if JOBZ .ne. 'O', the contents of A are destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* S (output) REAL array, dimension (min(M,N)) +* The singular values of A, sorted so that S(i) >= S(i+1). +* +* U (output) COMPLEX array, dimension (LDU,UCOL) +* UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N; +* UCOL = min(M,N) if JOBZ = 'S'. +* If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M +* unitary matrix U; +* if JOBZ = 'S', U contains the first min(M,N) columns of U +* (the left singular vectors, stored columnwise); +* if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced. +* +* LDU (input) INTEGER +* The leading dimension of the array U. LDU >= 1; if +* JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M. +* +* VT (output) COMPLEX array, dimension (LDVT,N) +* If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the +* N-by-N unitary matrix V**H; +* if JOBZ = 'S', VT contains the first min(M,N) rows of +* V**H (the right singular vectors, stored rowwise); +* if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced. +* +* LDVT (input) INTEGER +* The leading dimension of the array VT. LDVT >= 1; if +* JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N; +* if JOBZ = 'S', LDVT >= min(M,N). +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= 1. +* if JOBZ = 'N', LWORK >= 2*min(M,N)+max(M,N). +* if JOBZ = 'O', +* LWORK >= 2*min(M,N)*min(M,N)+2*min(M,N)+max(M,N). +* if JOBZ = 'S' or 'A', +* LWORK >= min(M,N)*min(M,N)+2*min(M,N)+max(M,N). +* For good performance, LWORK should generally be larger. +* If LWORK < 0 but other input arguments are legal, WORK(1) +* returns the optimal LWORK. +* +* RWORK (workspace) REAL array, dimension (LRWORK) +* If JOBZ = 'N', LRWORK >= 7*min(M,N). +* Otherwise, LRWORK >= 5*min(M,N)*min(M,N) + 5*min(M,N) +* +* IWORK (workspace) INTEGER array, dimension (8*min(M,N)) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: The updating process of SBDSDC did not converge. +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ), + $ CONE = ( 1.0E0, 0.0E0 ) ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WNTQA, WNTQAS, WNTQN, WNTQO, WNTQS + INTEGER BLK, CHUNK, I, IE, IERR, IL, IR, IRU, IRVT, + $ ISCL, ITAU, ITAUP, ITAUQ, IU, IVT, LDWKVT, + $ LDWRKL, LDWRKR, LDWRKU, MAXWRK, MINMN, MINWRK, + $ MNTHR1, MNTHR2, NRWORK, NWORK, WRKBL + REAL ANRM, BIGNUM, EPS, SMLNUM +* .. +* .. Local Arrays .. + INTEGER IDUM( 1 ) + REAL DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL CGEBRD, CGELQF, CGEMM, CGEQRF, CLACP2, CLACPY, + $ CLACRM, CLARCM, CLASCL, CLASET, CUNGBR, CUNGLQ, + $ CUNGQR, CUNMBR, SBDSDC, SLASCL, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL CLANGE, SLAMCH + EXTERNAL CLANGE, ILAENV, LSAME, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + MINMN = MIN( M, N ) + MNTHR1 = INT( MINMN*17.0E0 / 9.0E0 ) + MNTHR2 = INT( MINMN*5.0E0 / 3.0E0 ) + WNTQA = LSAME( JOBZ, 'A' ) + WNTQS = LSAME( JOBZ, 'S' ) + WNTQAS = WNTQA .OR. WNTQS + WNTQO = LSAME( JOBZ, 'O' ) + WNTQN = LSAME( JOBZ, 'N' ) + MINWRK = 1 + MAXWRK = 1 + LQUERY = ( LWORK.EQ.-1 ) +* + IF( .NOT.( WNTQA .OR. WNTQS .OR. WNTQO .OR. WNTQN ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDU.LT.1 .OR. ( WNTQAS .AND. LDU.LT.M ) .OR. + $ ( WNTQO .AND. M.LT.N .AND. LDU.LT.M ) ) THEN + INFO = -8 + ELSE IF( LDVT.LT.1 .OR. ( WNTQA .AND. LDVT.LT.N ) .OR. + $ ( WNTQS .AND. LDVT.LT.MINMN ) .OR. + $ ( WNTQO .AND. M.GE.N .AND. LDVT.LT.N ) ) THEN + INFO = -10 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* CWorkspace refers to complex workspace, and RWorkspace to +* real workspace. NB refers to the optimal block size for the +* immediately following subroutine, as returned by ILAENV.) +* + IF( INFO.EQ.0 .AND. M.GT.0 .AND. N.GT.0 ) THEN + IF( M.GE.N ) THEN +* +* There is no complex work space needed for bidiagonal SVD +* The real work space needed for bidiagonal SVD is BDSPAC, +* BDSPAC = 3*N*N + 4*N +* + IF( M.GE.MNTHR1 ) THEN + IF( WNTQN ) THEN +* +* Path 1 (M much larger than N, JOBZ='N') +* + WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, + $ -1 ) + WRKBL = MAX( WRKBL, 2*N+2*N* + $ ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) ) + MAXWRK = WRKBL + MINWRK = 3*N + ELSE IF( WNTQO ) THEN +* +* Path 2 (M much larger than N, JOBZ='O') +* + WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'CUNGQR', ' ', M, + $ N, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+2*N* + $ ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+N* + $ ILAENV( 1, 'CUNMBR', 'QLN', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+N* + $ ILAENV( 1, 'CUNMBR', 'PRC', N, N, N, -1 ) ) + MAXWRK = M*N + N*N + WRKBL + MINWRK = 2*N*N + 3*N + ELSE IF( WNTQS ) THEN +* +* Path 3 (M much larger than N, JOBZ='S') +* + WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'CUNGQR', ' ', M, + $ N, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+2*N* + $ ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+N* + $ ILAENV( 1, 'CUNMBR', 'QLN', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+N* + $ ILAENV( 1, 'CUNMBR', 'PRC', N, N, N, -1 ) ) + MAXWRK = N*N + WRKBL + MINWRK = N*N + 3*N + ELSE IF( WNTQA ) THEN +* +* Path 4 (M much larger than N, JOBZ='A') +* + WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'CUNGQR', ' ', M, + $ M, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+2*N* + $ ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+N* + $ ILAENV( 1, 'CUNMBR', 'QLN', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+N* + $ ILAENV( 1, 'CUNMBR', 'PRC', N, N, N, -1 ) ) + MAXWRK = N*N + WRKBL + MINWRK = N*N + 2*N + M + END IF + ELSE IF( M.GE.MNTHR2 ) THEN +* +* Path 5 (M much larger than N, but not as much as MNTHR1) +* + MAXWRK = 2*N + ( M+N )*ILAENV( 1, 'CGEBRD', ' ', M, N, + $ -1, -1 ) + MINWRK = 2*N + M + IF( WNTQO ) THEN + MAXWRK = MAX( MAXWRK, 2*N+N* + $ ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*N+N* + $ ILAENV( 1, 'CUNGBR', 'Q', M, N, N, -1 ) ) + MAXWRK = MAXWRK + M*N + MINWRK = MINWRK + N*N + ELSE IF( WNTQS ) THEN + MAXWRK = MAX( MAXWRK, 2*N+N* + $ ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*N+N* + $ ILAENV( 1, 'CUNGBR', 'Q', M, N, N, -1 ) ) + ELSE IF( WNTQA ) THEN + MAXWRK = MAX( MAXWRK, 2*N+N* + $ ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*N+M* + $ ILAENV( 1, 'CUNGBR', 'Q', M, M, N, -1 ) ) + END IF + ELSE +* +* Path 6 (M at least N, but not much larger) +* + MAXWRK = 2*N + ( M+N )*ILAENV( 1, 'CGEBRD', ' ', M, N, + $ -1, -1 ) + MINWRK = 2*N + M + IF( WNTQO ) THEN + MAXWRK = MAX( MAXWRK, 2*N+N* + $ ILAENV( 1, 'CUNMBR', 'PRC', N, N, N, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*N+N* + $ ILAENV( 1, 'CUNMBR', 'QLN', M, N, N, -1 ) ) + MAXWRK = MAXWRK + M*N + MINWRK = MINWRK + N*N + ELSE IF( WNTQS ) THEN + MAXWRK = MAX( MAXWRK, 2*N+N* + $ ILAENV( 1, 'CUNMBR', 'PRC', N, N, N, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*N+N* + $ ILAENV( 1, 'CUNMBR', 'QLN', M, N, N, -1 ) ) + ELSE IF( WNTQA ) THEN + MAXWRK = MAX( MAXWRK, 2*N+N* + $ ILAENV( 1, 'CUNGBR', 'PRC', N, N, N, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*N+M* + $ ILAENV( 1, 'CUNGBR', 'QLN', M, M, N, -1 ) ) + END IF + END IF + ELSE +* +* There is no complex work space needed for bidiagonal SVD +* The real work space needed for bidiagonal SVD is BDSPAC, +* BDSPAC = 3*M*M + 4*M +* + IF( N.GE.MNTHR1 ) THEN + IF( WNTQN ) THEN +* +* Path 1t (N much larger than M, JOBZ='N') +* + MAXWRK = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, + $ -1 ) + MAXWRK = MAX( MAXWRK, 2*M+2*M* + $ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) ) + MINWRK = 3*M + ELSE IF( WNTQO ) THEN +* +* Path 2t (N much larger than M, JOBZ='O') +* + WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'CUNGLQ', ' ', M, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+2*M* + $ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+M* + $ ILAENV( 1, 'CUNMBR', 'PRC', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+M* + $ ILAENV( 1, 'CUNMBR', 'QLN', M, M, M, -1 ) ) + MAXWRK = M*N + M*M + WRKBL + MINWRK = 2*M*M + 3*M + ELSE IF( WNTQS ) THEN +* +* Path 3t (N much larger than M, JOBZ='S') +* + WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'CUNGLQ', ' ', M, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+2*M* + $ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+M* + $ ILAENV( 1, 'CUNMBR', 'PRC', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+M* + $ ILAENV( 1, 'CUNMBR', 'QLN', M, M, M, -1 ) ) + MAXWRK = M*M + WRKBL + MINWRK = M*M + 3*M + ELSE IF( WNTQA ) THEN +* +* Path 4t (N much larger than M, JOBZ='A') +* + WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'CUNGLQ', ' ', N, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+2*M* + $ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+M* + $ ILAENV( 1, 'CUNMBR', 'PRC', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+M* + $ ILAENV( 1, 'CUNMBR', 'QLN', M, M, M, -1 ) ) + MAXWRK = M*M + WRKBL + MINWRK = M*M + 2*M + N + END IF + ELSE IF( N.GE.MNTHR2 ) THEN +* +* Path 5t (N much larger than M, but not as much as MNTHR1) +* + MAXWRK = 2*M + ( M+N )*ILAENV( 1, 'CGEBRD', ' ', M, N, + $ -1, -1 ) + MINWRK = 2*M + N + IF( WNTQO ) THEN + MAXWRK = MAX( MAXWRK, 2*M+M* + $ ILAENV( 1, 'CUNGBR', 'P', M, N, M, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*M+M* + $ ILAENV( 1, 'CUNGBR', 'Q', M, M, N, -1 ) ) + MAXWRK = MAXWRK + M*N + MINWRK = MINWRK + M*M + ELSE IF( WNTQS ) THEN + MAXWRK = MAX( MAXWRK, 2*M+M* + $ ILAENV( 1, 'CUNGBR', 'P', M, N, M, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*M+M* + $ ILAENV( 1, 'CUNGBR', 'Q', M, M, N, -1 ) ) + ELSE IF( WNTQA ) THEN + MAXWRK = MAX( MAXWRK, 2*M+N* + $ ILAENV( 1, 'CUNGBR', 'P', N, N, M, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*M+M* + $ ILAENV( 1, 'CUNGBR', 'Q', M, M, N, -1 ) ) + END IF + ELSE +* +* Path 6t (N greater than M, but not much larger) +* + MAXWRK = 2*M + ( M+N )*ILAENV( 1, 'CGEBRD', ' ', M, N, + $ -1, -1 ) + MINWRK = 2*M + N + IF( WNTQO ) THEN + MAXWRK = MAX( MAXWRK, 2*M+M* + $ ILAENV( 1, 'CUNMBR', 'PRC', M, N, M, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*M+M* + $ ILAENV( 1, 'CUNMBR', 'QLN', M, M, N, -1 ) ) + MAXWRK = MAXWRK + M*N + MINWRK = MINWRK + M*M + ELSE IF( WNTQS ) THEN + MAXWRK = MAX( MAXWRK, 2*M+M* + $ ILAENV( 1, 'CUNGBR', 'PRC', M, N, M, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*M+M* + $ ILAENV( 1, 'CUNGBR', 'QLN', M, M, N, -1 ) ) + ELSE IF( WNTQA ) THEN + MAXWRK = MAX( MAXWRK, 2*M+N* + $ ILAENV( 1, 'CUNGBR', 'PRC', N, N, M, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*M+M* + $ ILAENV( 1, 'CUNGBR', 'QLN', M, M, N, -1 ) ) + END IF + END IF + END IF + MAXWRK = MAX( MAXWRK, MINWRK ) + WORK( 1 ) = MAXWRK + END IF +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGESDD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + IF( LWORK.GE.1 ) + $ WORK( 1 ) = ONE + RETURN + END IF +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SQRT( SLAMCH( 'S' ) ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = CLANGE( 'M', M, N, A, LDA, DUM ) + ISCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ISCL = 1 + CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR ) + ELSE IF( ANRM.GT.BIGNUM ) THEN + ISCL = 1 + CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR ) + END IF +* + IF( M.GE.N ) THEN +* +* A has at least as many rows as columns. If A has sufficiently +* more rows than columns, first reduce using the QR +* decomposition (if sufficient workspace available) +* + IF( M.GE.MNTHR1 ) THEN +* + IF( WNTQN ) THEN +* +* Path 1 (M much larger than N, JOBZ='N') +* No singular vectors to be computed +* + ITAU = 1 + NWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: need 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Zero out below R +* + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, A( 2, 1 ), + $ LDA ) + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize R in A +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) + NRWORK = IE + N +* +* Perform bidiagonal SVD, compute singular values only +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL SBDSDC( 'U', 'N', N, S, RWORK( IE ), DUM, 1, DUM, 1, + $ DUM, IDUM, RWORK( NRWORK ), IWORK, INFO ) +* + ELSE IF( WNTQO ) THEN +* +* Path 2 (M much larger than N, JOBZ='O') +* N left singular vectors to be overwritten on A and +* N right singular vectors to be computed in VT +* + IU = 1 +* +* WORK(IU) is N by N +* + LDWRKU = N + IR = IU + LDWRKU*N + IF( LWORK.GE.M*N+N*N+3*N ) THEN +* +* WORK(IR) is M by N +* + LDWRKR = M + ELSE + LDWRKR = ( LWORK-N*N-3*N ) / N + END IF + ITAU = IR + LDWRKR*N + NWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need N*N+2*N, prefer M*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy R to WORK( IR ), zeroing out below it +* + CALL CLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR ) + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, WORK( IR+1 ), + $ LDWRKR ) +* +* Generate Q in A +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* (CWorkspace: need N*N+3*N, prefer M*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, WORK( IR ), LDWRKR, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of R in WORK(IRU) and computing right singular vectors +* of R in WORK(IRVT) +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + IRU = IE + N + IRVT = IRU + N*N + NRWORK = IRVT + N*N + CALL SBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ), + $ N, RWORK( IRVT ), N, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRU) to complex matrix WORK(IU) +* Overwrite WORK(IU) by the left singular vectors of R +* (CWorkspace: need 2*N*N+3*N, prefer M*N+N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL CLACP2( 'F', N, N, RWORK( IRU ), N, WORK( IU ), + $ LDWRKU ) + CALL CUNMBR( 'Q', 'L', 'N', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IU ), LDWRKU, + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Copy real matrix RWORK(IRVT) to complex matrix VT +* Overwrite VT by the right singular vectors of R +* (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL CLACP2( 'F', N, N, RWORK( IRVT ), N, VT, LDVT ) + CALL CUNMBR( 'P', 'R', 'C', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IU), storing result in WORK(IR) and copying to A +* (CWorkspace: need 2*N*N, prefer N*N+M*N) +* (RWorkspace: 0) +* + DO 10 I = 1, M, LDWRKR + CHUNK = MIN( M-I+1, LDWRKR ) + CALL CGEMM( 'N', 'N', CHUNK, N, N, CONE, A( I, 1 ), + $ LDA, WORK( IU ), LDWRKU, CZERO, + $ WORK( IR ), LDWRKR ) + CALL CLACPY( 'F', CHUNK, N, WORK( IR ), LDWRKR, + $ A( I, 1 ), LDA ) + 10 CONTINUE +* + ELSE IF( WNTQS ) THEN +* +* Path 3 (M much larger than N, JOBZ='S') +* N left singular vectors to be computed in U and +* N right singular vectors to be computed in VT +* + IR = 1 +* +* WORK(IR) is N by N +* + LDWRKR = N + ITAU = IR + LDWRKR*N + NWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy R to WORK(IR), zeroing out below it +* + CALL CLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR ) + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, WORK( IR+1 ), + $ LDWRKR ) +* +* Generate Q in A +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, WORK( IR ), LDWRKR, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + IRU = IE + N + IRVT = IRU + N*N + NRWORK = IRVT + N*N + CALL SBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ), + $ N, RWORK( IRVT ), N, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRU) to complex matrix U +* Overwrite U by left singular vectors of R +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL CLACP2( 'F', N, N, RWORK( IRU ), N, U, LDU ) + CALL CUNMBR( 'Q', 'L', 'N', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy real matrix RWORK(IRVT) to complex matrix VT +* Overwrite VT by right singular vectors of R +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL CLACP2( 'F', N, N, RWORK( IRVT ), N, VT, LDVT ) + CALL CUNMBR( 'P', 'R', 'C', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in U +* (CWorkspace: need N*N) +* (RWorkspace: 0) +* + CALL CLACPY( 'F', N, N, U, LDU, WORK( IR ), LDWRKR ) + CALL CGEMM( 'N', 'N', M, N, N, CONE, A, LDA, WORK( IR ), + $ LDWRKR, CZERO, U, LDU ) +* + ELSE IF( WNTQA ) THEN +* +* Path 4 (M much larger than N, JOBZ='A') +* M left singular vectors to be computed in U and +* N right singular vectors to be computed in VT +* + IU = 1 +* +* WORK(IU) is N by N +* + LDWRKU = N + ITAU = IU + LDWRKU*N + NWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need N+M, prefer N+M*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Produce R in A, zeroing out below it +* + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, A( 2, 1 ), + $ LDA ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize R in A +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) + IRU = IE + N + IRVT = IRU + N*N + NRWORK = IRVT + N*N +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + CALL SBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ), + $ N, RWORK( IRVT ), N, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRU) to complex matrix WORK(IU) +* Overwrite WORK(IU) by left singular vectors of R +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL CLACP2( 'F', N, N, RWORK( IRU ), N, WORK( IU ), + $ LDWRKU ) + CALL CUNMBR( 'Q', 'L', 'N', N, N, N, A, LDA, + $ WORK( ITAUQ ), WORK( IU ), LDWRKU, + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Copy real matrix RWORK(IRVT) to complex matrix VT +* Overwrite VT by right singular vectors of R +* (CWorkspace: need 3*N, prefer 2*N+N*NB) +* (RWorkspace: 0) +* + CALL CLACP2( 'F', N, N, RWORK( IRVT ), N, VT, LDVT ) + CALL CUNMBR( 'P', 'R', 'C', N, N, N, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IU), storing result in A +* (CWorkspace: need N*N) +* (RWorkspace: 0) +* + CALL CGEMM( 'N', 'N', M, N, N, CONE, U, LDU, WORK( IU ), + $ LDWRKU, CZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL CLACPY( 'F', M, N, A, LDA, U, LDU ) +* + END IF +* + ELSE IF( M.GE.MNTHR2 ) THEN +* +* MNTHR2 <= M < MNTHR1 +* +* Path 5 (M much larger than N, but not as much as MNTHR1) +* Reduce to bidiagonal form without QR decomposition, use +* CUNGBR and matrix multiplication to compute singular vectors +* + IE = 1 + NRWORK = IE + N + ITAUQ = 1 + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize A +* (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) + IF( WNTQN ) THEN +* +* Compute singular values only +* (Cworkspace: 0) +* (Rworkspace: need BDSPAC) +* + CALL SBDSDC( 'U', 'N', N, S, RWORK( IE ), DUM, 1, DUM, 1, + $ DUM, IDUM, RWORK( NRWORK ), IWORK, INFO ) + ELSE IF( WNTQO ) THEN + IU = NWORK + IRU = NRWORK + IRVT = IRU + N*N + NRWORK = IRVT + N*N +* +* Copy A to VT, generate P**H +* (Cworkspace: need 2*N, prefer N+N*NB) +* (Rworkspace: 0) +* + CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT ) + CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Generate Q in A +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* + IF( LWORK.GE.M*N+3*N ) THEN +* +* WORK( IU ) is M by N +* + LDWRKU = M + ELSE +* +* WORK(IU) is LDWRKU by N +* + LDWRKU = ( LWORK-3*N ) / N + END IF + NWORK = IU + LDWRKU*N +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + CALL SBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ), + $ N, RWORK( IRVT ), N, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Multiply real matrix RWORK(IRVT) by P**H in VT, +* storing the result in WORK(IU), copying to VT +* (Cworkspace: need 0) +* (Rworkspace: need 3*N*N) +* + CALL CLARCM( N, N, RWORK( IRVT ), N, VT, LDVT, + $ WORK( IU ), LDWRKU, RWORK( NRWORK ) ) + CALL CLACPY( 'F', N, N, WORK( IU ), LDWRKU, VT, LDVT ) +* +* Multiply Q in A by real matrix RWORK(IRU), storing the +* result in WORK(IU), copying to A +* (CWorkspace: need N*N, prefer M*N) +* (Rworkspace: need 3*N*N, prefer N*N+2*M*N) +* + NRWORK = IRVT + DO 20 I = 1, M, LDWRKU + CHUNK = MIN( M-I+1, LDWRKU ) + CALL CLACRM( CHUNK, N, A( I, 1 ), LDA, RWORK( IRU ), + $ N, WORK( IU ), LDWRKU, RWORK( NRWORK ) ) + CALL CLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU, + $ A( I, 1 ), LDA ) + 20 CONTINUE +* + ELSE IF( WNTQS ) THEN +* +* Copy A to VT, generate P**H +* (Cworkspace: need 2*N, prefer N+N*NB) +* (Rworkspace: 0) +* + CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT ) + CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Copy A to U, generate Q +* (Cworkspace: need 2*N, prefer N+N*NB) +* (Rworkspace: 0) +* + CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) + CALL CUNGBR( 'Q', M, N, N, U, LDU, WORK( ITAUQ ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + IRU = NRWORK + IRVT = IRU + N*N + NRWORK = IRVT + N*N + CALL SBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ), + $ N, RWORK( IRVT ), N, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Multiply real matrix RWORK(IRVT) by P**H in VT, +* storing the result in A, copying to VT +* (Cworkspace: need 0) +* (Rworkspace: need 3*N*N) +* + CALL CLARCM( N, N, RWORK( IRVT ), N, VT, LDVT, A, LDA, + $ RWORK( NRWORK ) ) + CALL CLACPY( 'F', N, N, A, LDA, VT, LDVT ) +* +* Multiply Q in U by real matrix RWORK(IRU), storing the +* result in A, copying to U +* (CWorkspace: need 0) +* (Rworkspace: need N*N+2*M*N) +* + NRWORK = IRVT + CALL CLACRM( M, N, U, LDU, RWORK( IRU ), N, A, LDA, + $ RWORK( NRWORK ) ) + CALL CLACPY( 'F', M, N, A, LDA, U, LDU ) + ELSE +* +* Copy A to VT, generate P**H +* (Cworkspace: need 2*N, prefer N+N*NB) +* (Rworkspace: 0) +* + CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT ) + CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Copy A to U, generate Q +* (Cworkspace: need 2*N, prefer N+N*NB) +* (Rworkspace: 0) +* + CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) + CALL CUNGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + IRU = NRWORK + IRVT = IRU + N*N + NRWORK = IRVT + N*N + CALL SBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ), + $ N, RWORK( IRVT ), N, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Multiply real matrix RWORK(IRVT) by P**H in VT, +* storing the result in A, copying to VT +* (Cworkspace: need 0) +* (Rworkspace: need 3*N*N) +* + CALL CLARCM( N, N, RWORK( IRVT ), N, VT, LDVT, A, LDA, + $ RWORK( NRWORK ) ) + CALL CLACPY( 'F', N, N, A, LDA, VT, LDVT ) +* +* Multiply Q in U by real matrix RWORK(IRU), storing the +* result in A, copying to U +* (CWorkspace: 0) +* (Rworkspace: need 3*N*N) +* + NRWORK = IRVT + CALL CLACRM( M, N, U, LDU, RWORK( IRU ), N, A, LDA, + $ RWORK( NRWORK ) ) + CALL CLACPY( 'F', M, N, A, LDA, U, LDU ) + END IF +* + ELSE +* +* M .LT. MNTHR2 +* +* Path 6 (M at least N, but not much larger) +* Reduce to bidiagonal form without QR decomposition +* Use CUNMBR to compute singular vectors +* + IE = 1 + NRWORK = IE + N + ITAUQ = 1 + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize A +* (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) + IF( WNTQN ) THEN +* +* Compute singular values only +* (Cworkspace: 0) +* (Rworkspace: need BDSPAC) +* + CALL SBDSDC( 'U', 'N', N, S, RWORK( IE ), DUM, 1, DUM, 1, + $ DUM, IDUM, RWORK( NRWORK ), IWORK, INFO ) + ELSE IF( WNTQO ) THEN + IU = NWORK + IRU = NRWORK + IRVT = IRU + N*N + NRWORK = IRVT + N*N + IF( LWORK.GE.M*N+3*N ) THEN +* +* WORK( IU ) is M by N +* + LDWRKU = M + ELSE +* +* WORK( IU ) is LDWRKU by N +* + LDWRKU = ( LWORK-3*N ) / N + END IF + NWORK = IU + LDWRKU*N +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + CALL SBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ), + $ N, RWORK( IRVT ), N, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRVT) to complex matrix VT +* Overwrite VT by right singular vectors of A +* (Cworkspace: need 2*N, prefer N+N*NB) +* (Rworkspace: need 0) +* + CALL CLACP2( 'F', N, N, RWORK( IRVT ), N, VT, LDVT ) + CALL CUNMBR( 'P', 'R', 'C', N, N, N, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* + IF( LWORK.GE.M*N+3*N ) THEN +* +* Copy real matrix RWORK(IRU) to complex matrix WORK(IU) +* Overwrite WORK(IU) by left singular vectors of A, copying +* to A +* (Cworkspace: need M*N+2*N, prefer M*N+N+N*NB) +* (Rworkspace: need 0) +* + CALL CLASET( 'F', M, N, CZERO, CZERO, WORK( IU ), + $ LDWRKU ) + CALL CLACP2( 'F', N, N, RWORK( IRU ), N, WORK( IU ), + $ LDWRKU ) + CALL CUNMBR( 'Q', 'L', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), WORK( IU ), LDWRKU, + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) + CALL CLACPY( 'F', M, N, WORK( IU ), LDWRKU, A, LDA ) + ELSE +* +* Generate Q in A +* (Cworkspace: need 2*N, prefer N+N*NB) +* (Rworkspace: need 0) +* + CALL CUNGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Multiply Q in A by real matrix RWORK(IRU), storing the +* result in WORK(IU), copying to A +* (CWorkspace: need N*N, prefer M*N) +* (Rworkspace: need 3*N*N, prefer N*N+2*M*N) +* + NRWORK = IRVT + DO 30 I = 1, M, LDWRKU + CHUNK = MIN( M-I+1, LDWRKU ) + CALL CLACRM( CHUNK, N, A( I, 1 ), LDA, + $ RWORK( IRU ), N, WORK( IU ), LDWRKU, + $ RWORK( NRWORK ) ) + CALL CLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU, + $ A( I, 1 ), LDA ) + 30 CONTINUE + END IF +* + ELSE IF( WNTQS ) THEN +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + IRU = NRWORK + IRVT = IRU + N*N + NRWORK = IRVT + N*N + CALL SBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ), + $ N, RWORK( IRVT ), N, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRU) to complex matrix U +* Overwrite U by left singular vectors of A +* (CWorkspace: need 3*N, prefer 2*N+N*NB) +* (RWorkspace: 0) +* + CALL CLASET( 'F', M, N, CZERO, CZERO, U, LDU ) + CALL CLACP2( 'F', N, N, RWORK( IRU ), N, U, LDU ) + CALL CUNMBR( 'Q', 'L', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy real matrix RWORK(IRVT) to complex matrix VT +* Overwrite VT by right singular vectors of A +* (CWorkspace: need 3*N, prefer 2*N+N*NB) +* (RWorkspace: 0) +* + CALL CLACP2( 'F', N, N, RWORK( IRVT ), N, VT, LDVT ) + CALL CUNMBR( 'P', 'R', 'C', N, N, N, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + ELSE +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + IRU = NRWORK + IRVT = IRU + N*N + NRWORK = IRVT + N*N + CALL SBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ), + $ N, RWORK( IRVT ), N, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Set the right corner of U to identity matrix +* + CALL CLASET( 'F', M, M, CZERO, CZERO, U, LDU ) + CALL CLASET( 'F', M-N, M-N, CZERO, CONE, U( N+1, N+1 ), + $ LDU ) +* +* Copy real matrix RWORK(IRU) to complex matrix U +* Overwrite U by left singular vectors of A +* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) +* (RWorkspace: 0) +* + CALL CLACP2( 'F', N, N, RWORK( IRU ), N, U, LDU ) + CALL CUNMBR( 'Q', 'L', 'N', M, M, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy real matrix RWORK(IRVT) to complex matrix VT +* Overwrite VT by right singular vectors of A +* (CWorkspace: need 3*N, prefer 2*N+N*NB) +* (RWorkspace: 0) +* + CALL CLACP2( 'F', N, N, RWORK( IRVT ), N, VT, LDVT ) + CALL CUNMBR( 'P', 'R', 'C', N, N, N, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + END IF +* + END IF +* + ELSE +* +* A has more columns than rows. If A has sufficiently more +* columns than rows, first reduce using the LQ decomposition +* (if sufficient workspace available) +* + IF( N.GE.MNTHR1 ) THEN +* + IF( WNTQN ) THEN +* +* Path 1t (N much larger than M, JOBZ='N') +* No singular vectors to be computed +* + ITAU = 1 + NWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Zero out above L +* + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, A( 1, 2 ), + $ LDA ) + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize L in A +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) + NRWORK = IE + M +* +* Perform bidiagonal SVD, compute singular values only +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL SBDSDC( 'U', 'N', M, S, RWORK( IE ), DUM, 1, DUM, 1, + $ DUM, IDUM, RWORK( NRWORK ), IWORK, INFO ) +* + ELSE IF( WNTQO ) THEN +* +* Path 2t (N much larger than M, JOBZ='O') +* M right singular vectors to be overwritten on A and +* M left singular vectors to be computed in U +* + IVT = 1 + LDWKVT = M +* +* WORK(IVT) is M by M +* + IL = IVT + LDWKVT*M + IF( LWORK.GE.M*N+M*M+3*M ) THEN +* +* WORK(IL) M by N +* + LDWRKL = M + CHUNK = N + ELSE +* +* WORK(IL) is M by CHUNK +* + LDWRKL = M + CHUNK = ( LWORK-M*M-3*M ) / M + END IF + ITAU = IL + LDWRKL*CHUNK + NWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy L to WORK(IL), zeroing about above it +* + CALL CLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWRKL ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IL+LDWRKL ), LDWRKL ) +* +* Generate Q in A +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IL) +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, WORK( IL ), LDWRKL, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + IRU = IE + M + IRVT = IRU + M*M + NRWORK = IRVT + M*M + CALL SBDSDC( 'U', 'I', M, S, RWORK( IE ), RWORK( IRU ), + $ M, RWORK( IRVT ), M, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRU) to complex matrix WORK(IU) +* Overwrite WORK(IU) by the left singular vectors of L +* (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL CLACP2( 'F', M, M, RWORK( IRU ), M, U, LDU ) + CALL CUNMBR( 'Q', 'L', 'N', M, M, M, WORK( IL ), LDWRKL, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT) +* Overwrite WORK(IVT) by the right singular vectors of L +* (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL CLACP2( 'F', M, M, RWORK( IRVT ), M, WORK( IVT ), + $ LDWKVT ) + CALL CUNMBR( 'P', 'R', 'C', M, M, M, WORK( IL ), LDWRKL, + $ WORK( ITAUP ), WORK( IVT ), LDWKVT, + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Multiply right singular vectors of L in WORK(IL) by Q +* in A, storing result in WORK(IL) and copying to A +* (CWorkspace: need 2*M*M, prefer M*M+M*N)) +* (RWorkspace: 0) +* + DO 40 I = 1, N, CHUNK + BLK = MIN( N-I+1, CHUNK ) + CALL CGEMM( 'N', 'N', M, BLK, M, CONE, WORK( IVT ), M, + $ A( 1, I ), LDA, CZERO, WORK( IL ), + $ LDWRKL ) + CALL CLACPY( 'F', M, BLK, WORK( IL ), LDWRKL, + $ A( 1, I ), LDA ) + 40 CONTINUE +* + ELSE IF( WNTQS ) THEN +* +* Path 3t (N much larger than M, JOBZ='S') +* M right singular vectors to be computed in VT and +* M left singular vectors to be computed in U +* + IL = 1 +* +* WORK(IL) is M by M +* + LDWRKL = M + ITAU = IL + LDWRKL*M + NWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy L to WORK(IL), zeroing out above it +* + CALL CLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWRKL ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IL+LDWRKL ), LDWRKL ) +* +* Generate Q in A +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IL) +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, WORK( IL ), LDWRKL, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + IRU = IE + M + IRVT = IRU + M*M + NRWORK = IRVT + M*M + CALL SBDSDC( 'U', 'I', M, S, RWORK( IE ), RWORK( IRU ), + $ M, RWORK( IRVT ), M, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRU) to complex matrix U +* Overwrite U by left singular vectors of L +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) +* (RWorkspace: 0) +* + CALL CLACP2( 'F', M, M, RWORK( IRU ), M, U, LDU ) + CALL CUNMBR( 'Q', 'L', 'N', M, M, M, WORK( IL ), LDWRKL, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy real matrix RWORK(IRVT) to complex matrix VT +* Overwrite VT by left singular vectors of L +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) +* (RWorkspace: 0) +* + CALL CLACP2( 'F', M, M, RWORK( IRVT ), M, VT, LDVT ) + CALL CUNMBR( 'P', 'R', 'C', M, M, M, WORK( IL ), LDWRKL, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy VT to WORK(IL), multiply right singular vectors of L +* in WORK(IL) by Q in A, storing result in VT +* (CWorkspace: need M*M) +* (RWorkspace: 0) +* + CALL CLACPY( 'F', M, M, VT, LDVT, WORK( IL ), LDWRKL ) + CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IL ), LDWRKL, + $ A, LDA, CZERO, VT, LDVT ) +* + ELSE IF( WNTQA ) THEN +* +* Path 9t (N much larger than M, JOBZ='A') +* N right singular vectors to be computed in VT and +* M left singular vectors to be computed in U +* + IVT = 1 +* +* WORK(IVT) is M by M +* + LDWKVT = M + ITAU = IVT + LDWKVT*M + NWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need M+N, prefer M+N*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Produce L in A, zeroing out above it +* + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, A( 1, 2 ), + $ LDA ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize L in A +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + IRU = IE + M + IRVT = IRU + M*M + NRWORK = IRVT + M*M + CALL SBDSDC( 'U', 'I', M, S, RWORK( IE ), RWORK( IRU ), + $ M, RWORK( IRVT ), M, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRU) to complex matrix U +* Overwrite U by left singular vectors of L +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL CLACP2( 'F', M, M, RWORK( IRU ), M, U, LDU ) + CALL CUNMBR( 'Q', 'L', 'N', M, M, M, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT) +* Overwrite WORK(IVT) by right singular vectors of L +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) +* (RWorkspace: 0) +* + CALL CLACP2( 'F', M, M, RWORK( IRVT ), M, WORK( IVT ), + $ LDWKVT ) + CALL CUNMBR( 'P', 'R', 'C', M, M, M, A, LDA, + $ WORK( ITAUP ), WORK( IVT ), LDWKVT, + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Multiply right singular vectors of L in WORK(IVT) by +* Q in VT, storing result in A +* (CWorkspace: need M*M) +* (RWorkspace: 0) +* + CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IVT ), LDWKVT, + $ VT, LDVT, CZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL CLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* + END IF +* + ELSE IF( N.GE.MNTHR2 ) THEN +* +* MNTHR2 <= N < MNTHR1 +* +* Path 5t (N much larger than M, but not as much as MNTHR1) +* Reduce to bidiagonal form without QR decomposition, use +* CUNGBR and matrix multiplication to compute singular vectors +* +* + IE = 1 + NRWORK = IE + M + ITAUQ = 1 + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize A +* (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) +* (RWorkspace: M) +* + CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) +* + IF( WNTQN ) THEN +* +* Compute singular values only +* (Cworkspace: 0) +* (Rworkspace: need BDSPAC) +* + CALL SBDSDC( 'L', 'N', M, S, RWORK( IE ), DUM, 1, DUM, 1, + $ DUM, IDUM, RWORK( NRWORK ), IWORK, INFO ) + ELSE IF( WNTQO ) THEN + IRVT = NRWORK + IRU = IRVT + M*M + NRWORK = IRU + M*M + IVT = NWORK +* +* Copy A to U, generate Q +* (Cworkspace: need 2*M, prefer M+M*NB) +* (Rworkspace: 0) +* + CALL CLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL CUNGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Generate P**H in A +* (Cworkspace: need 2*M, prefer M+M*NB) +* (Rworkspace: 0) +* + CALL CUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* + LDWKVT = M + IF( LWORK.GE.M*N+3*M ) THEN +* +* WORK( IVT ) is M by N +* + NWORK = IVT + LDWKVT*N + CHUNK = N + ELSE +* +* WORK( IVT ) is M by CHUNK +* + CHUNK = ( LWORK-3*M ) / M + NWORK = IVT + LDWKVT*CHUNK + END IF +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + CALL SBDSDC( 'L', 'I', M, S, RWORK( IE ), RWORK( IRU ), + $ M, RWORK( IRVT ), M, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Multiply Q in U by real matrix RWORK(IRVT) +* storing the result in WORK(IVT), copying to U +* (Cworkspace: need 0) +* (Rworkspace: need 2*M*M) +* + CALL CLACRM( M, M, U, LDU, RWORK( IRU ), M, WORK( IVT ), + $ LDWKVT, RWORK( NRWORK ) ) + CALL CLACPY( 'F', M, M, WORK( IVT ), LDWKVT, U, LDU ) +* +* Multiply RWORK(IRVT) by P**H in A, storing the +* result in WORK(IVT), copying to A +* (CWorkspace: need M*M, prefer M*N) +* (Rworkspace: need 2*M*M, prefer 2*M*N) +* + NRWORK = IRU + DO 50 I = 1, N, CHUNK + BLK = MIN( N-I+1, CHUNK ) + CALL CLARCM( M, BLK, RWORK( IRVT ), M, A( 1, I ), LDA, + $ WORK( IVT ), LDWKVT, RWORK( NRWORK ) ) + CALL CLACPY( 'F', M, BLK, WORK( IVT ), LDWKVT, + $ A( 1, I ), LDA ) + 50 CONTINUE + ELSE IF( WNTQS ) THEN +* +* Copy A to U, generate Q +* (Cworkspace: need 2*M, prefer M+M*NB) +* (Rworkspace: 0) +* + CALL CLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL CUNGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Copy A to VT, generate P**H +* (Cworkspace: need 2*M, prefer M+M*NB) +* (Rworkspace: 0) +* + CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) + CALL CUNGBR( 'P', M, N, M, VT, LDVT, WORK( ITAUP ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + IRVT = NRWORK + IRU = IRVT + M*M + NRWORK = IRU + M*M + CALL SBDSDC( 'L', 'I', M, S, RWORK( IE ), RWORK( IRU ), + $ M, RWORK( IRVT ), M, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Multiply Q in U by real matrix RWORK(IRU), storing the +* result in A, copying to U +* (CWorkspace: need 0) +* (Rworkspace: need 3*M*M) +* + CALL CLACRM( M, M, U, LDU, RWORK( IRU ), M, A, LDA, + $ RWORK( NRWORK ) ) + CALL CLACPY( 'F', M, M, A, LDA, U, LDU ) +* +* Multiply real matrix RWORK(IRVT) by P**H in VT, +* storing the result in A, copying to VT +* (Cworkspace: need 0) +* (Rworkspace: need M*M+2*M*N) +* + NRWORK = IRU + CALL CLARCM( M, N, RWORK( IRVT ), M, VT, LDVT, A, LDA, + $ RWORK( NRWORK ) ) + CALL CLACPY( 'F', M, N, A, LDA, VT, LDVT ) + ELSE +* +* Copy A to U, generate Q +* (Cworkspace: need 2*M, prefer M+M*NB) +* (Rworkspace: 0) +* + CALL CLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL CUNGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Copy A to VT, generate P**H +* (Cworkspace: need 2*M, prefer M+M*NB) +* (Rworkspace: 0) +* + CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) + CALL CUNGBR( 'P', N, N, M, VT, LDVT, WORK( ITAUP ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + IRVT = NRWORK + IRU = IRVT + M*M + NRWORK = IRU + M*M + CALL SBDSDC( 'L', 'I', M, S, RWORK( IE ), RWORK( IRU ), + $ M, RWORK( IRVT ), M, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Multiply Q in U by real matrix RWORK(IRU), storing the +* result in A, copying to U +* (CWorkspace: need 0) +* (Rworkspace: need 3*M*M) +* + CALL CLACRM( M, M, U, LDU, RWORK( IRU ), M, A, LDA, + $ RWORK( NRWORK ) ) + CALL CLACPY( 'F', M, M, A, LDA, U, LDU ) +* +* Multiply real matrix RWORK(IRVT) by P**H in VT, +* storing the result in A, copying to VT +* (Cworkspace: need 0) +* (Rworkspace: need M*M+2*M*N) +* + CALL CLARCM( M, N, RWORK( IRVT ), M, VT, LDVT, A, LDA, + $ RWORK( NRWORK ) ) + CALL CLACPY( 'F', M, N, A, LDA, VT, LDVT ) + END IF +* + ELSE +* +* N .LT. MNTHR2 +* +* Path 6t (N greater than M, but not much larger) +* Reduce to bidiagonal form without LQ decomposition +* Use CUNMBR to compute singular vectors +* + IE = 1 + NRWORK = IE + M + ITAUQ = 1 + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize A +* (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) +* (RWorkspace: M) +* + CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) + IF( WNTQN ) THEN +* +* Compute singular values only +* (Cworkspace: 0) +* (Rworkspace: need BDSPAC) +* + CALL SBDSDC( 'L', 'N', M, S, RWORK( IE ), DUM, 1, DUM, 1, + $ DUM, IDUM, RWORK( NRWORK ), IWORK, INFO ) + ELSE IF( WNTQO ) THEN + LDWKVT = M + IVT = NWORK + IF( LWORK.GE.M*N+3*M ) THEN +* +* WORK( IVT ) is M by N +* + CALL CLASET( 'F', M, N, CZERO, CZERO, WORK( IVT ), + $ LDWKVT ) + NWORK = IVT + LDWKVT*N + ELSE +* +* WORK( IVT ) is M by CHUNK +* + CHUNK = ( LWORK-3*M ) / M + NWORK = IVT + LDWKVT*CHUNK + END IF +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + IRVT = NRWORK + IRU = IRVT + M*M + NRWORK = IRU + M*M + CALL SBDSDC( 'L', 'I', M, S, RWORK( IE ), RWORK( IRU ), + $ M, RWORK( IRVT ), M, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRU) to complex matrix U +* Overwrite U by left singular vectors of A +* (Cworkspace: need 2*M, prefer M+M*NB) +* (Rworkspace: need 0) +* + CALL CLACP2( 'F', M, M, RWORK( IRU ), M, U, LDU ) + CALL CUNMBR( 'Q', 'L', 'N', M, M, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* + IF( LWORK.GE.M*N+3*M ) THEN +* +* Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT) +* Overwrite WORK(IVT) by right singular vectors of A, +* copying to A +* (Cworkspace: need M*N+2*M, prefer M*N+M+M*NB) +* (Rworkspace: need 0) +* + CALL CLACP2( 'F', M, M, RWORK( IRVT ), M, WORK( IVT ), + $ LDWKVT ) + CALL CUNMBR( 'P', 'R', 'C', M, N, M, A, LDA, + $ WORK( ITAUP ), WORK( IVT ), LDWKVT, + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) + CALL CLACPY( 'F', M, N, WORK( IVT ), LDWKVT, A, LDA ) + ELSE +* +* Generate P**H in A +* (Cworkspace: need 2*M, prefer M+M*NB) +* (Rworkspace: need 0) +* + CALL CUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Multiply Q in A by real matrix RWORK(IRU), storing the +* result in WORK(IU), copying to A +* (CWorkspace: need M*M, prefer M*N) +* (Rworkspace: need 3*M*M, prefer M*M+2*M*N) +* + NRWORK = IRU + DO 60 I = 1, N, CHUNK + BLK = MIN( N-I+1, CHUNK ) + CALL CLARCM( M, BLK, RWORK( IRVT ), M, A( 1, I ), + $ LDA, WORK( IVT ), LDWKVT, + $ RWORK( NRWORK ) ) + CALL CLACPY( 'F', M, BLK, WORK( IVT ), LDWKVT, + $ A( 1, I ), LDA ) + 60 CONTINUE + END IF + ELSE IF( WNTQS ) THEN +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + IRVT = NRWORK + IRU = IRVT + M*M + NRWORK = IRU + M*M + CALL SBDSDC( 'L', 'I', M, S, RWORK( IE ), RWORK( IRU ), + $ M, RWORK( IRVT ), M, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRU) to complex matrix U +* Overwrite U by left singular vectors of A +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: M*M) +* + CALL CLACP2( 'F', M, M, RWORK( IRU ), M, U, LDU ) + CALL CUNMBR( 'Q', 'L', 'N', M, M, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy real matrix RWORK(IRVT) to complex matrix VT +* Overwrite VT by right singular vectors of A +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: M*M) +* + CALL CLASET( 'F', M, N, CZERO, CZERO, VT, LDVT ) + CALL CLACP2( 'F', M, M, RWORK( IRVT ), M, VT, LDVT ) + CALL CUNMBR( 'P', 'R', 'C', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + ELSE +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + IRVT = NRWORK + IRU = IRVT + M*M + NRWORK = IRU + M*M +* + CALL SBDSDC( 'L', 'I', M, S, RWORK( IE ), RWORK( IRU ), + $ M, RWORK( IRVT ), M, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRU) to complex matrix U +* Overwrite U by left singular vectors of A +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: M*M) +* + CALL CLACP2( 'F', M, M, RWORK( IRU ), M, U, LDU ) + CALL CUNMBR( 'Q', 'L', 'N', M, M, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Set the right corner of VT to identity matrix +* + CALL CLASET( 'F', N-M, N-M, CZERO, CONE, VT( M+1, M+1 ), + $ LDVT ) +* +* Copy real matrix RWORK(IRVT) to complex matrix VT +* Overwrite VT by right singular vectors of A +* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) +* (RWorkspace: M*M) +* + CALL CLASET( 'F', N, N, CZERO, CZERO, VT, LDVT ) + CALL CLACP2( 'F', M, M, RWORK( IRVT ), M, VT, LDVT ) + CALL CUNMBR( 'P', 'R', 'C', N, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + END IF +* + END IF +* + END IF +* +* Undo scaling if necessary +* + IF( ISCL.EQ.1 ) THEN + IF( ANRM.GT.BIGNUM ) + $ CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, + $ IERR ) + IF( ANRM.LT.SMLNUM ) + $ CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, + $ IERR ) + END IF +* +* Return optimal workspace in WORK(1) +* + WORK( 1 ) = MAXWRK +* + RETURN +* +* End of CGESDD +* + END diff --git a/costa/native/external/lapack/cgesv.f b/costa/native/external/lapack/cgesv.f new file mode 100644 index 000000000..0ed85cd42 --- /dev/null +++ b/costa/native/external/lapack/cgesv.f @@ -0,0 +1,108 @@ + SUBROUTINE CGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* CGESV computes the solution to a complex system of linear equations +* A * X = B, +* where A is an N-by-N matrix and X and B are N-by-NRHS matrices. +* +* The LU decomposition with partial pivoting and row interchanges is +* used to factor A as +* A = P * L * U, +* where P is a permutation matrix, L is unit lower triangular, and U is +* upper triangular. The factored form of A is then used to solve the +* system of equations A * X = B. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the N-by-N coefficient matrix A. +* On exit, the factors L and U from the factorization +* A = P*L*U; the unit diagonal elements of L are not stored. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (output) INTEGER array, dimension (N) +* The pivot indices that define the permutation matrix P; +* row i of the matrix was interchanged with row IPIV(i). +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS matrix of right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, U(i,i) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, so the solution could not be computed. +* +* ===================================================================== +* +* .. External Subroutines .. + EXTERNAL CGETRF, CGETRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGESV ', -INFO ) + RETURN + END IF +* +* Compute the LU factorization of A. +* + CALL CGETRF( N, N, A, LDA, IPIV, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL CGETRS( 'No transpose', N, NRHS, A, LDA, IPIV, B, LDB, + $ INFO ) + END IF + RETURN +* +* End of CGESV +* + END diff --git a/costa/native/external/lapack/cgesvd.f b/costa/native/external/lapack/cgesvd.f new file mode 100644 index 000000000..137ed7ed7 --- /dev/null +++ b/costa/native/external/lapack/cgesvd.f @@ -0,0 +1,3618 @@ + SUBROUTINE CGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, + $ WORK, LWORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBU, JOBVT + INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N +* .. +* .. Array Arguments .. + REAL RWORK( * ), S( * ) + COMPLEX A( LDA, * ), U( LDU, * ), VT( LDVT, * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGESVD computes the singular value decomposition (SVD) of a complex +* M-by-N matrix A, optionally computing the left and/or right singular +* vectors. The SVD is written +* +* A = U * SIGMA * conjugate-transpose(V) +* +* where SIGMA is an M-by-N matrix which is zero except for its +* min(m,n) diagonal elements, U is an M-by-M unitary matrix, and +* V is an N-by-N unitary matrix. The diagonal elements of SIGMA +* are the singular values of A; they are real and non-negative, and +* are returned in descending order. The first min(m,n) columns of +* U and V are the left and right singular vectors of A. +* +* Note that the routine returns V**H, not V. +* +* Arguments +* ========= +* +* JOBU (input) CHARACTER*1 +* Specifies options for computing all or part of the matrix U: +* = 'A': all M columns of U are returned in array U: +* = 'S': the first min(m,n) columns of U (the left singular +* vectors) are returned in the array U; +* = 'O': the first min(m,n) columns of U (the left singular +* vectors) are overwritten on the array A; +* = 'N': no columns of U (no left singular vectors) are +* computed. +* +* JOBVT (input) CHARACTER*1 +* Specifies options for computing all or part of the matrix +* V**H: +* = 'A': all N rows of V**H are returned in the array VT; +* = 'S': the first min(m,n) rows of V**H (the right singular +* vectors) are returned in the array VT; +* = 'O': the first min(m,n) rows of V**H (the right singular +* vectors) are overwritten on the array A; +* = 'N': no rows of V**H (no right singular vectors) are +* computed. +* +* JOBVT and JOBU cannot both be 'O'. +* +* M (input) INTEGER +* The number of rows of the input matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the input matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, +* if JOBU = 'O', A is overwritten with the first min(m,n) +* columns of U (the left singular vectors, +* stored columnwise); +* if JOBVT = 'O', A is overwritten with the first min(m,n) +* rows of V**H (the right singular vectors, +* stored rowwise); +* if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A +* are destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* S (output) REAL array, dimension (min(M,N)) +* The singular values of A, sorted so that S(i) >= S(i+1). +* +* U (output) COMPLEX array, dimension (LDU,UCOL) +* (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'. +* If JOBU = 'A', U contains the M-by-M unitary matrix U; +* if JOBU = 'S', U contains the first min(m,n) columns of U +* (the left singular vectors, stored columnwise); +* if JOBU = 'N' or 'O', U is not referenced. +* +* LDU (input) INTEGER +* The leading dimension of the array U. LDU >= 1; if +* JOBU = 'S' or 'A', LDU >= M. +* +* VT (output) COMPLEX array, dimension (LDVT,N) +* If JOBVT = 'A', VT contains the N-by-N unitary matrix +* V**H; +* if JOBVT = 'S', VT contains the first min(m,n) rows of +* V**H (the right singular vectors, stored rowwise); +* if JOBVT = 'N' or 'O', VT is not referenced. +* +* LDVT (input) INTEGER +* The leading dimension of the array VT. LDVT >= 1; if +* JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N). +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= 1. +* LWORK >= 2*MIN(M,N)+MAX(M,N). +* For good performance, LWORK should generally be larger. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) REAL array, dimension (5*min(M,N)) +* On exit, if INFO > 0, RWORK(1:MIN(M,N)-1) contains the +* unconverged superdiagonal elements of an upper bidiagonal +* matrix B whose diagonal is in S (not necessarily sorted). +* B satisfies A = U * B * VT, so it has the same singular +* values as A, and singular vectors related by U and VT. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if CBDSQR did not converge, INFO specifies how many +* superdiagonals of an intermediate bidiagonal form B +* did not converge to zero. See the description of RWORK +* above for details. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ), + $ CONE = ( 1.0E0, 0.0E0 ) ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS, + $ WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS + INTEGER BLK, CHUNK, I, IE, IERR, IR, IRWORK, ISCL, + $ ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU, + $ MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU, + $ NRVT, WRKBL + REAL ANRM, BIGNUM, EPS, SMLNUM +* .. +* .. Local Arrays .. + REAL DUM( 1 ) + COMPLEX CDUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL CBDSQR, CGEBRD, CGELQF, CGEMM, CGEQRF, CLACPY, + $ CLASCL, CLASET, CUNGBR, CUNGLQ, CUNGQR, CUNMBR, + $ SLASCL, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL CLANGE, SLAMCH + EXTERNAL LSAME, ILAENV, CLANGE, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + MINMN = MIN( M, N ) + MNTHR = ILAENV( 6, 'CGESVD', JOBU // JOBVT, M, N, 0, 0 ) + WNTUA = LSAME( JOBU, 'A' ) + WNTUS = LSAME( JOBU, 'S' ) + WNTUAS = WNTUA .OR. WNTUS + WNTUO = LSAME( JOBU, 'O' ) + WNTUN = LSAME( JOBU, 'N' ) + WNTVA = LSAME( JOBVT, 'A' ) + WNTVS = LSAME( JOBVT, 'S' ) + WNTVAS = WNTVA .OR. WNTVS + WNTVO = LSAME( JOBVT, 'O' ) + WNTVN = LSAME( JOBVT, 'N' ) + MINWRK = 1 + LQUERY = ( LWORK.EQ.-1 ) +* + IF( .NOT.( WNTUA .OR. WNTUS .OR. WNTUO .OR. WNTUN ) ) THEN + INFO = -1 + ELSE IF( .NOT.( WNTVA .OR. WNTVS .OR. WNTVO .OR. WNTVN ) .OR. + $ ( WNTVO .AND. WNTUO ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDU.LT.1 .OR. ( WNTUAS .AND. LDU.LT.M ) ) THEN + INFO = -9 + ELSE IF( LDVT.LT.1 .OR. ( WNTVA .AND. LDVT.LT.N ) .OR. + $ ( WNTVS .AND. LDVT.LT.MINMN ) ) THEN + INFO = -11 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* CWorkspace refers to complex workspace, and RWorkspace to +* real workspace. NB refers to the optimal block size for the +* immediately following subroutine, as returned by ILAENV.) +* + IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) .AND. M.GT.0 .AND. + $ N.GT.0 ) THEN + IF( M.GE.N ) THEN +* +* Space needed for CBDSQR is BDSPAC = 5*N +* + IF( M.GE.MNTHR ) THEN + IF( WNTUN ) THEN +* +* Path 1 (M much larger than N, JOBU='N') +* + MAXWRK = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, + $ -1 ) + MAXWRK = MAX( MAXWRK, 2*N+2*N* + $ ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) ) + IF( WNTVO .OR. WNTVAS ) + $ MAXWRK = MAX( MAXWRK, 2*N+( N-1 )* + $ ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) ) + MINWRK = 3*N + MAXWRK = MAX( MINWRK, MAXWRK ) + ELSE IF( WNTUO .AND. WNTVN ) THEN +* +* Path 2 (M much larger than N, JOBU='O', JOBVT='N') +* + WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'CUNGQR', ' ', M, + $ N, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+2*N* + $ ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+N* + $ ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) ) + MAXWRK = MAX( N*N+WRKBL, N*N+M*N ) + MINWRK = 2*N + M + MAXWRK = MAX( MINWRK, MAXWRK ) + ELSE IF( WNTUO .AND. WNTVAS ) THEN +* +* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or +* 'A') +* + WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'CUNGQR', ' ', M, + $ N, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+2*N* + $ ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+N* + $ ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+( N-1 )* + $ ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) ) + MAXWRK = MAX( N*N+WRKBL, N*N+M*N ) + MINWRK = 2*N + M + MAXWRK = MAX( MINWRK, MAXWRK ) + ELSE IF( WNTUS .AND. WNTVN ) THEN +* +* Path 4 (M much larger than N, JOBU='S', JOBVT='N') +* + WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'CUNGQR', ' ', M, + $ N, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+2*N* + $ ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+N* + $ ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) ) + MAXWRK = N*N + WRKBL + MINWRK = 2*N + M + MAXWRK = MAX( MINWRK, MAXWRK ) + ELSE IF( WNTUS .AND. WNTVO ) THEN +* +* Path 5 (M much larger than N, JOBU='S', JOBVT='O') +* + WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'CUNGQR', ' ', M, + $ N, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+2*N* + $ ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+N* + $ ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+( N-1 )* + $ ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) ) + MAXWRK = 2*N*N + WRKBL + MINWRK = 2*N + M + MAXWRK = MAX( MINWRK, MAXWRK ) + ELSE IF( WNTUS .AND. WNTVAS ) THEN +* +* Path 6 (M much larger than N, JOBU='S', JOBVT='S' or +* 'A') +* + WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'CUNGQR', ' ', M, + $ N, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+2*N* + $ ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+N* + $ ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+( N-1 )* + $ ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) ) + MAXWRK = N*N + WRKBL + MINWRK = 2*N + M + MAXWRK = MAX( MINWRK, MAXWRK ) + ELSE IF( WNTUA .AND. WNTVN ) THEN +* +* Path 7 (M much larger than N, JOBU='A', JOBVT='N') +* + WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'CUNGQR', ' ', M, + $ M, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+2*N* + $ ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+N* + $ ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) ) + MAXWRK = N*N + WRKBL + MINWRK = 2*N + M + MAXWRK = MAX( MINWRK, MAXWRK ) + ELSE IF( WNTUA .AND. WNTVO ) THEN +* +* Path 8 (M much larger than N, JOBU='A', JOBVT='O') +* + WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'CUNGQR', ' ', M, + $ M, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+2*N* + $ ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+N* + $ ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+( N-1 )* + $ ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) ) + MAXWRK = 2*N*N + WRKBL + MINWRK = 2*N + M + MAXWRK = MAX( MINWRK, MAXWRK ) + ELSE IF( WNTUA .AND. WNTVAS ) THEN +* +* Path 9 (M much larger than N, JOBU='A', JOBVT='S' or +* 'A') +* + WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'CUNGQR', ' ', M, + $ M, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+2*N* + $ ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+N* + $ ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+( N-1 )* + $ ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) ) + MAXWRK = N*N + WRKBL + MINWRK = 2*N + M + MAXWRK = MAX( MINWRK, MAXWRK ) + END IF + ELSE +* +* Path 10 (M at least N, but not much larger) +* + MAXWRK = 2*N + ( M+N )*ILAENV( 1, 'CGEBRD', ' ', M, N, + $ -1, -1 ) + IF( WNTUS .OR. WNTUO ) + $ MAXWRK = MAX( MAXWRK, 2*N+N* + $ ILAENV( 1, 'CUNGBR', 'Q', M, N, N, -1 ) ) + IF( WNTUA ) + $ MAXWRK = MAX( MAXWRK, 2*N+M* + $ ILAENV( 1, 'CUNGBR', 'Q', M, M, N, -1 ) ) + IF( .NOT.WNTVN ) + $ MAXWRK = MAX( MAXWRK, 2*N+( N-1 )* + $ ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) ) + MINWRK = 2*N + M + MAXWRK = MAX( MINWRK, MAXWRK ) + END IF + ELSE +* +* Space needed for CBDSQR is BDSPAC = 5*M +* + IF( N.GE.MNTHR ) THEN + IF( WNTVN ) THEN +* +* Path 1t(N much larger than M, JOBVT='N') +* + MAXWRK = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, + $ -1 ) + MAXWRK = MAX( MAXWRK, 2*M+2*M* + $ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) ) + IF( WNTUO .OR. WNTUAS ) + $ MAXWRK = MAX( MAXWRK, 2*M+M* + $ ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) ) + MINWRK = 3*M + MAXWRK = MAX( MINWRK, MAXWRK ) + ELSE IF( WNTVO .AND. WNTUN ) THEN +* +* Path 2t(N much larger than M, JOBU='N', JOBVT='O') +* + WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'CUNGLQ', ' ', M, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+2*M* + $ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+( M-1 )* + $ ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) ) + MAXWRK = MAX( M*M+WRKBL, M*M+M*N ) + MINWRK = 2*M + N + MAXWRK = MAX( MINWRK, MAXWRK ) + ELSE IF( WNTVO .AND. WNTUAS ) THEN +* +* Path 3t(N much larger than M, JOBU='S' or 'A', +* JOBVT='O') +* + WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'CUNGLQ', ' ', M, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+2*M* + $ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+( M-1 )* + $ ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+M* + $ ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) ) + MAXWRK = MAX( M*M+WRKBL, M*M+M*N ) + MINWRK = 2*M + N + MAXWRK = MAX( MINWRK, MAXWRK ) + ELSE IF( WNTVS .AND. WNTUN ) THEN +* +* Path 4t(N much larger than M, JOBU='N', JOBVT='S') +* + WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'CUNGLQ', ' ', M, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+2*M* + $ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+( M-1 )* + $ ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) ) + MAXWRK = M*M + WRKBL + MINWRK = 2*M + N + MAXWRK = MAX( MINWRK, MAXWRK ) + ELSE IF( WNTVS .AND. WNTUO ) THEN +* +* Path 5t(N much larger than M, JOBU='O', JOBVT='S') +* + WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'CUNGLQ', ' ', M, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+2*M* + $ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+( M-1 )* + $ ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+M* + $ ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) ) + MAXWRK = 2*M*M + WRKBL + MINWRK = 2*M + N + MAXWRK = MAX( MINWRK, MAXWRK ) + ELSE IF( WNTVS .AND. WNTUAS ) THEN +* +* Path 6t(N much larger than M, JOBU='S' or 'A', +* JOBVT='S') +* + WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'CUNGLQ', ' ', M, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+2*M* + $ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+( M-1 )* + $ ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+M* + $ ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) ) + MAXWRK = M*M + WRKBL + MINWRK = 2*M + N + MAXWRK = MAX( MINWRK, MAXWRK ) + ELSE IF( WNTVA .AND. WNTUN ) THEN +* +* Path 7t(N much larger than M, JOBU='N', JOBVT='A') +* + WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'CUNGLQ', ' ', N, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+2*M* + $ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+( M-1 )* + $ ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) ) + MAXWRK = M*M + WRKBL + MINWRK = 2*M + N + MAXWRK = MAX( MINWRK, MAXWRK ) + ELSE IF( WNTVA .AND. WNTUO ) THEN +* +* Path 8t(N much larger than M, JOBU='O', JOBVT='A') +* + WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'CUNGLQ', ' ', N, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+2*M* + $ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+( M-1 )* + $ ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+M* + $ ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) ) + MAXWRK = 2*M*M + WRKBL + MINWRK = 2*M + N + MAXWRK = MAX( MINWRK, MAXWRK ) + ELSE IF( WNTVA .AND. WNTUAS ) THEN +* +* Path 9t(N much larger than M, JOBU='S' or 'A', +* JOBVT='A') +* + WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'CUNGLQ', ' ', N, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+2*M* + $ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+( M-1 )* + $ ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+M* + $ ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) ) + MAXWRK = M*M + WRKBL + MINWRK = 2*M + N + MAXWRK = MAX( MINWRK, MAXWRK ) + END IF + ELSE +* +* Path 10t(N greater than M, but not much larger) +* + MAXWRK = 2*M + ( M+N )*ILAENV( 1, 'CGEBRD', ' ', M, N, + $ -1, -1 ) + IF( WNTVS .OR. WNTVO ) + $ MAXWRK = MAX( MAXWRK, 2*M+M* + $ ILAENV( 1, 'CUNGBR', 'P', M, N, M, -1 ) ) + IF( WNTVA ) + $ MAXWRK = MAX( MAXWRK, 2*M+N* + $ ILAENV( 1, 'CUNGBR', 'P', N, N, M, -1 ) ) + IF( .NOT.WNTUN ) + $ MAXWRK = MAX( MAXWRK, 2*M+( M-1 )* + $ ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) ) + MINWRK = 2*M + N + MAXWRK = MAX( MINWRK, MAXWRK ) + END IF + END IF + WORK( 1 ) = MAXWRK + END IF +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGESVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + IF( LWORK.GE.1 ) + $ WORK( 1 ) = ONE + RETURN + END IF +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SQRT( SLAMCH( 'S' ) ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = CLANGE( 'M', M, N, A, LDA, DUM ) + ISCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ISCL = 1 + CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR ) + ELSE IF( ANRM.GT.BIGNUM ) THEN + ISCL = 1 + CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR ) + END IF +* + IF( M.GE.N ) THEN +* +* A has at least as many rows as columns. If A has sufficiently +* more rows than columns, first reduce using the QR +* decomposition (if sufficient workspace available) +* + IF( M.GE.MNTHR ) THEN +* + IF( WNTUN ) THEN +* +* Path 1 (M much larger than N, JOBU='N') +* No left singular vectors to be computed +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: need 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Zero out below R +* + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, A( 2, 1 ), + $ LDA ) + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in A +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + NCVT = 0 + IF( WNTVO .OR. WNTVAS ) THEN +* +* If right singular vectors desired, generate P'. +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + NCVT = N + END IF + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in A if desired +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, NCVT, 0, 0, S, RWORK( IE ), A, LDA, + $ CDUM, 1, CDUM, 1, RWORK( IRWORK ), INFO ) +* +* If right singular vectors desired in VT, copy them there +* + IF( WNTVAS ) + $ CALL CLACPY( 'F', N, N, A, LDA, VT, LDVT ) +* + ELSE IF( WNTUO .AND. WNTVN ) THEN +* +* Path 2 (M much larger than N, JOBU='O', JOBVT='N') +* N left singular vectors to be overwritten on A and +* no right singular vectors to be computed +* + IF( LWORK.GE.N*N+3*N ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*N ) THEN +* +* WORK(IU) is LDA by N, WORK(IR) is LDA by N +* + LDWRKU = LDA + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+N*N ) THEN +* +* WORK(IU) is LDA by N, WORK(IR) is N by N +* + LDWRKU = LDA + LDWRKR = N + ELSE +* +* WORK(IU) is LDWRKU by N, WORK(IR) is N by N +* + LDWRKU = ( LWORK-N*N ) / N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IR) and zero out below it +* + CALL CLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR ) + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ WORK( IR+1 ), LDWRKR ) +* +* Generate Q in A +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, WORK( IR ), LDWRKR, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing R +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) +* (RWorkspace: need 0) +* + CALL CUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) +* (CWorkspace: need N*N) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, 0, N, 0, S, RWORK( IE ), CDUM, 1, + $ WORK( IR ), LDWRKR, CDUM, 1, + $ RWORK( IRWORK ), INFO ) + IU = ITAUQ +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in WORK(IU) and copying to A +* (CWorkspace: need N*N+N, prefer N*N+M*N) +* (RWorkspace: 0) +* + DO 10 I = 1, M, LDWRKU + CHUNK = MIN( M-I+1, LDWRKU ) + CALL CGEMM( 'N', 'N', CHUNK, N, N, CONE, A( I, 1 ), + $ LDA, WORK( IR ), LDWRKR, CZERO, + $ WORK( IU ), LDWRKU ) + CALL CLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU, + $ A( I, 1 ), LDA ) + 10 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize A +* (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB) +* (RWorkspace: N) +* + CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing A +* (CWorkspace: need 3*N, prefer 2*N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in A +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, 0, M, 0, S, RWORK( IE ), CDUM, 1, + $ A, LDA, CDUM, 1, RWORK( IRWORK ), INFO ) +* + END IF +* + ELSE IF( WNTUO .AND. WNTVAS ) THEN +* +* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A') +* N left singular vectors to be overwritten on A and +* N right singular vectors to be computed in VT +* + IF( LWORK.GE.N*N+3*N ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by N +* + LDWRKU = LDA + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+N*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is N by N +* + LDWRKU = LDA + LDWRKR = N + ELSE +* +* WORK(IU) is LDWRKU by N and WORK(IR) is N by N +* + LDWRKU = ( LWORK-N*N ) / N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to VT, zeroing out below it +* + CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT ) + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, VT( 2, 1 ), + $ LDVT ) +* +* Generate Q in A +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT, copying result to WORK(IR) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, VT, LDVT, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR ) +* +* Generate left vectors bidiagonalizing R in WORK(IR) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing R in VT +* (CWorkspace: need N*N+3*N-1, prefer N*N+2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) and computing right +* singular vectors of R in VT +* (CWorkspace: need N*N) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), VT, + $ LDVT, WORK( IR ), LDWRKR, CDUM, 1, + $ RWORK( IRWORK ), INFO ) + IU = ITAUQ +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in WORK(IU) and copying to A +* (CWorkspace: need N*N+N, prefer N*N+M*N) +* (RWorkspace: 0) +* + DO 20 I = 1, M, LDWRKU + CHUNK = MIN( M-I+1, LDWRKU ) + CALL CGEMM( 'N', 'N', CHUNK, N, N, CONE, A( I, 1 ), + $ LDA, WORK( IR ), LDWRKR, CZERO, + $ WORK( IU ), LDWRKU ) + CALL CLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU, + $ A( I, 1 ), LDA ) + 20 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to VT, zeroing out below it +* + CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT ) + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, VT( 2, 1 ), + $ LDVT ) +* +* Generate Q in A +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: N) +* + CALL CGEBRD( N, N, VT, LDVT, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in A by left vectors bidiagonalizing R +* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT, + $ WORK( ITAUQ ), A, LDA, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing R in VT +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in A and computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), VT, + $ LDVT, A, LDA, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTUS ) THEN +* + IF( WNTVN ) THEN +* +* Path 4 (M much larger than N, JOBU='S', JOBVT='N') +* N left singular vectors to be computed in U and +* no right singular vectors to be computed +* + IF( LWORK.GE.N*N+3*N ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IR) is LDA by N +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is N by N +* + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IR), zeroing out below it +* + CALL CLACPY( 'U', N, N, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ WORK( IR+1 ), LDWRKR ) +* +* Generate Q in A +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, WORK( IR ), LDWRKR, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing R in WORK(IR) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) +* (CWorkspace: need N*N) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, 0, N, 0, S, RWORK( IE ), CDUM, + $ 1, WORK( IR ), LDWRKR, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in U +* (CWorkspace: need N*N) +* (RWorkspace: 0) +* + CALL CGEMM( 'N', 'N', M, N, N, CONE, A, LDA, + $ WORK( IR ), LDWRKR, CZERO, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, N, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ A( 2, 1 ), LDA ) +* +* Bidiagonalize R in A +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left vectors bidiagonalizing R +* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, 0, M, 0, S, RWORK( IE ), CDUM, + $ 1, U, LDU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVO ) THEN +* +* Path 5 (M much larger than N, JOBU='S', JOBVT='O') +* N left singular vectors to be computed in U and +* N right singular vectors to be overwritten on A +* + IF( LWORK.GE.2*N*N+3*N ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is N by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = N + ELSE +* +* WORK(IU) is N by N and WORK(IR) is N by N +* + LDWRKU = N + IR = IU + LDWRKU*N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL CLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ WORK( IU+1 ), LDWRKU ) +* +* Generate Q in A +* (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to +* WORK(IR) +* (CWorkspace: need 2*N*N+3*N, +* prefer 2*N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL CLACPY( 'U', N, N, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in WORK(IR) +* (CWorkspace: need 2*N*N+3*N-1, +* prefer 2*N*N+2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in WORK(IR) +* (CWorkspace: need 2*N*N) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), + $ WORK( IR ), LDWRKR, WORK( IU ), + $ LDWRKU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IU), storing result in U +* (CWorkspace: need N*N) +* (RWorkspace: 0) +* + CALL CGEMM( 'N', 'N', M, N, N, CONE, A, LDA, + $ WORK( IU ), LDWRKU, CZERO, U, LDU ) +* +* Copy right singular vectors of R to A +* (CWorkspace: need N*N) +* (RWorkspace: 0) +* + CALL CLACPY( 'F', N, N, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, N, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ A( 2, 1 ), LDA ) +* +* Bidiagonalize R in A +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left vectors bidiagonalizing R +* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing R in A +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in A +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), A, + $ LDA, U, LDU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVAS ) THEN +* +* Path 6 (M much larger than N, JOBU='S', JOBVT='S' +* or 'A') +* N left singular vectors to be computed in U and +* N right singular vectors to be computed in VT +* + IF( LWORK.GE.N*N+3*N ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IU) is LDA by N +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is N by N +* + LDWRKU = N + END IF + ITAU = IU + LDWRKU*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL CLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ WORK( IU+1 ), LDWRKU ) +* +* Generate Q in A +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to VT +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL CLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT, + $ LDVT ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (CWorkspace: need N*N+3*N-1, +* prefer N*N+2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in VT +* (CWorkspace: need N*N) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), VT, + $ LDVT, WORK( IU ), LDWRKU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IU), storing result in U +* (CWorkspace: need N*N) +* (RWorkspace: 0) +* + CALL CGEMM( 'N', 'N', M, N, N, CONE, A, LDA, + $ WORK( IU ), LDWRKU, CZERO, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, N, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to VT, zeroing out below it +* + CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT ) + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ VT( 2, 1 ), LDVT ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, VT, LDVT, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in VT +* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), VT, + $ LDVT, U, LDU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* + END IF +* + END IF +* + ELSE IF( WNTUA ) THEN +* + IF( WNTVN ) THEN +* +* Path 7 (M much larger than N, JOBU='A', JOBVT='N') +* M left singular vectors to be computed in U and +* no right singular vectors to be computed +* + IF( LWORK.GE.N*N+MAX( N+M, 3*N ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IR) is LDA by N +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is N by N +* + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Copy R to WORK(IR), zeroing out below it +* + CALL CLACPY( 'U', N, N, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ WORK( IR+1 ), LDWRKR ) +* +* Generate Q in U +* (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, WORK( IR ), LDWRKR, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in WORK(IR) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) +* (CWorkspace: need N*N) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, 0, N, 0, S, RWORK( IE ), CDUM, + $ 1, WORK( IR ), LDWRKR, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IR), storing result in A +* (CWorkspace: need N*N) +* (RWorkspace: 0) +* + CALL CGEMM( 'N', 'N', M, N, N, CONE, U, LDU, + $ WORK( IR ), LDWRKR, CZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL CLACPY( 'F', M, N, A, LDA, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need N+M, prefer N+M*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ A( 2, 1 ), LDA ) +* +* Bidiagonalize R in A +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in A +* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, 0, M, 0, S, RWORK( IE ), CDUM, + $ 1, U, LDU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVO ) THEN +* +* Path 8 (M much larger than N, JOBU='A', JOBVT='O') +* M left singular vectors to be computed in U and +* N right singular vectors to be overwritten on A +* + IF( LWORK.GE.2*N*N+MAX( N+M, 3*N ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is N by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = N + ELSE +* +* WORK(IU) is N by N and WORK(IR) is N by N +* + LDWRKU = N + IR = IU + LDWRKU*N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL CLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ WORK( IU+1 ), LDWRKU ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to +* WORK(IR) +* (CWorkspace: need 2*N*N+3*N, +* prefer 2*N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL CLACPY( 'U', N, N, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in WORK(IR) +* (CWorkspace: need 2*N*N+3*N-1, +* prefer 2*N*N+2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in WORK(IR) +* (CWorkspace: need 2*N*N) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), + $ WORK( IR ), LDWRKR, WORK( IU ), + $ LDWRKU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IU), storing result in A +* (CWorkspace: need N*N) +* (RWorkspace: 0) +* + CALL CGEMM( 'N', 'N', M, N, N, CONE, U, LDU, + $ WORK( IU ), LDWRKU, CZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL CLACPY( 'F', M, N, A, LDA, U, LDU ) +* +* Copy right singular vectors of R from WORK(IR) to A +* + CALL CLACPY( 'F', N, N, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need N+M, prefer N+M*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ A( 2, 1 ), LDA ) +* +* Bidiagonalize R in A +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in A +* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in A +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in A +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), A, + $ LDA, U, LDU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVAS ) THEN +* +* Path 9 (M much larger than N, JOBU='A', JOBVT='S' +* or 'A') +* M left singular vectors to be computed in U and +* N right singular vectors to be computed in VT +* + IF( LWORK.GE.N*N+MAX( N+M, 3*N ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IU) is LDA by N +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is N by N +* + LDWRKU = N + END IF + ITAU = IU + LDWRKU*N + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL CLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ WORK( IU+1 ), LDWRKU ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to VT +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL CLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT, + $ LDVT ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (CWorkspace: need N*N+3*N-1, +* prefer N*N+2*N+(N-1)*NB) +* (RWorkspace: need 0) +* + CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in VT +* (CWorkspace: need N*N) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), VT, + $ LDVT, WORK( IU ), LDWRKU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IU), storing result in A +* (CWorkspace: need N*N) +* (RWorkspace: 0) +* + CALL CGEMM( 'N', 'N', M, N, N, CONE, U, LDU, + $ WORK( IU ), LDWRKU, CZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL CLACPY( 'F', M, N, A, LDA, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need N+M, prefer N+M*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R from A to VT, zeroing out below it +* + CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT ) + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ VT( 2, 1 ), LDVT ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, VT, LDVT, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in VT +* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), VT, + $ LDVT, U, LDU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* + END IF +* + END IF +* + END IF +* + ELSE +* +* M .LT. MNTHR +* +* Path 10 (M at least N, but not much larger) +* Reduce to bidiagonal form without QR decomposition +* + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize A +* (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + IF( WNTUAS ) THEN +* +* If left singular vectors desired in U, copy result to U +* and generate left bidiagonalizing vectors in U +* (CWorkspace: need 2*N+NCU, prefer 2*N+NCU*NB) +* (RWorkspace: 0) +* + CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) + IF( WNTUS ) + $ NCU = N + IF( WNTUA ) + $ NCU = M + CALL CUNGBR( 'Q', M, NCU, N, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVAS ) THEN +* +* If right singular vectors desired in VT, copy result to +* VT and generate right bidiagonalizing vectors in VT +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT ) + CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTUO ) THEN +* +* If left singular vectors desired in A, generate left +* bidiagonalizing vectors in A +* (CWorkspace: need 3*N, prefer 2*N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVO ) THEN +* +* If right singular vectors desired in A, generate right +* bidiagonalizing vectors in A +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IRWORK = IE + N + IF( WNTUAS .OR. WNTUO ) + $ NRU = M + IF( WNTUN ) + $ NRU = 0 + IF( WNTVAS .OR. WNTVO ) + $ NCVT = N + IF( WNTVN ) + $ NCVT = 0 + IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, NCVT, NRU, 0, S, RWORK( IE ), VT, + $ LDVT, U, LDU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) + ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in A +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, NCVT, NRU, 0, S, RWORK( IE ), A, + $ LDA, U, LDU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) + ELSE +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in A and computing right singular +* vectors in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, NCVT, NRU, 0, S, RWORK( IE ), VT, + $ LDVT, A, LDA, CDUM, 1, RWORK( IRWORK ), + $ INFO ) + END IF +* + END IF +* + ELSE +* +* A has more columns than rows. If A has sufficiently more +* columns than rows, first reduce using the LQ decomposition (if +* sufficient workspace available) +* + IF( N.GE.MNTHR ) THEN +* + IF( WNTVN ) THEN +* +* Path 1t(N much larger than M, JOBVT='N') +* No right singular vectors to be computed +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Zero out above L +* + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, A( 1, 2 ), + $ LDA ) + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in A +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + IF( WNTUO .OR. WNTUAS ) THEN +* +* If left singular vectors desired, generate Q +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IRWORK = IE + M + NRU = 0 + IF( WNTUO .OR. WNTUAS ) + $ NRU = M +* +* Perform bidiagonal QR iteration, computing left singular +* vectors of A in A if desired +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, 0, NRU, 0, S, RWORK( IE ), CDUM, 1, + $ A, LDA, CDUM, 1, RWORK( IRWORK ), INFO ) +* +* If left singular vectors desired in U, copy them there +* + IF( WNTUAS ) + $ CALL CLACPY( 'F', M, M, A, LDA, U, LDU ) +* + ELSE IF( WNTVO .AND. WNTUN ) THEN +* +* Path 2t(N much larger than M, JOBU='N', JOBVT='O') +* M right singular vectors to be overwritten on A and +* no left singular vectors to be computed +* + IF( LWORK.GE.M*M+3*M ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+M*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is M by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = M + ELSE +* +* WORK(IU) is M by CHUNK and WORK(IR) is M by M +* + LDWRKU = M + CHUNK = ( LWORK-M*M ) / M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IR) and zero out above it +* + CALL CLACPY( 'L', M, M, A, LDA, WORK( IR ), LDWRKR ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IR+LDWRKR ), LDWRKR ) +* +* Generate Q in A +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IR) +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, WORK( IR ), LDWRKR, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing L +* (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of L in WORK(IR) +* (CWorkspace: need M*M) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, M, 0, 0, S, RWORK( IE ), + $ WORK( IR ), LDWRKR, CDUM, 1, CDUM, 1, + $ RWORK( IRWORK ), INFO ) + IU = ITAUQ +* +* Multiply right singular vectors of L in WORK(IR) by Q +* in A, storing result in WORK(IU) and copying to A +* (CWorkspace: need M*M+M, prefer M*M+M*N) +* (RWorkspace: 0) +* + DO 30 I = 1, N, CHUNK + BLK = MIN( N-I+1, CHUNK ) + CALL CGEMM( 'N', 'N', M, BLK, M, CONE, WORK( IR ), + $ LDWRKR, A( 1, I ), LDA, CZERO, + $ WORK( IU ), LDWRKU ) + CALL CLACPY( 'F', M, BLK, WORK( IU ), LDWRKU, + $ A( 1, I ), LDA ) + 30 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize A +* (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing A +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in A +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'L', M, N, 0, 0, S, RWORK( IE ), A, LDA, + $ CDUM, 1, CDUM, 1, RWORK( IRWORK ), INFO ) +* + END IF +* + ELSE IF( WNTVO .AND. WNTUAS ) THEN +* +* Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O') +* M right singular vectors to be overwritten on A and +* M left singular vectors to be computed in U +* + IF( LWORK.GE.M*M+3*M ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+M*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is M by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = M + ELSE +* +* WORK(IU) is M by CHUNK and WORK(IR) is M by M +* + LDWRKU = M + CHUNK = ( LWORK-M*M ) / M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing about above it +* + CALL CLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, U( 1, 2 ), + $ LDU ) +* +* Generate Q in A +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U, copying result to WORK(IR) +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, U, LDU, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR ) +* +* Generate right vectors bidiagonalizing L in WORK(IR) +* (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing L in U +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in U, and computing right +* singular vectors of L in WORK(IR) +* (CWorkspace: need M*M) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, M, M, 0, S, RWORK( IE ), + $ WORK( IR ), LDWRKR, U, LDU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) + IU = ITAUQ +* +* Multiply right singular vectors of L in WORK(IR) by Q +* in A, storing result in WORK(IU) and copying to A +* (CWorkspace: need M*M+M, prefer M*M+M*N)) +* (RWorkspace: 0) +* + DO 40 I = 1, N, CHUNK + BLK = MIN( N-I+1, CHUNK ) + CALL CGEMM( 'N', 'N', M, BLK, M, CONE, WORK( IR ), + $ LDWRKR, A( 1, I ), LDA, CZERO, + $ WORK( IU ), LDWRKU ) + CALL CLACPY( 'F', M, BLK, WORK( IU ), LDWRKU, + $ A( 1, I ), LDA ) + 40 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing out above it +* + CALL CLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, U( 1, 2 ), + $ LDU ) +* +* Generate Q in A +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, U, LDU, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right vectors bidiagonalizing L by Q in A +* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'P', 'L', 'C', M, N, M, U, LDU, + $ WORK( ITAUP ), A, LDA, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing L in U +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in A +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), A, LDA, + $ U, LDU, CDUM, 1, RWORK( IRWORK ), INFO ) +* + END IF +* + ELSE IF( WNTVS ) THEN +* + IF( WNTUN ) THEN +* +* Path 4t(N much larger than M, JOBU='N', JOBVT='S') +* M right singular vectors to be computed in VT and +* no left singular vectors to be computed +* + IF( LWORK.GE.M*M+3*M ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IR) is LDA by M +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is M by M +* + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IR), zeroing out above it +* + CALL CLACPY( 'L', M, M, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IR+LDWRKR ), LDWRKR ) +* +* Generate Q in A +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IR) +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, WORK( IR ), LDWRKR, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing L in +* WORK(IR) +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of L in WORK(IR) +* (CWorkspace: need M*M) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, M, 0, 0, S, RWORK( IE ), + $ WORK( IR ), LDWRKR, CDUM, 1, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IR) by +* Q in A, storing result in VT +* (CWorkspace: need M*M) +* (RWorkspace: 0) +* + CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IR ), + $ LDWRKR, A, LDA, CZERO, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy result to VT +* + CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( M, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ A( 1, 2 ), LDA ) +* +* Bidiagonalize L in A +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right vectors bidiagonalizing L by Q in VT +* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'P', 'L', 'C', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, N, 0, 0, S, RWORK( IE ), VT, + $ LDVT, CDUM, 1, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* + END IF +* + ELSE IF( WNTUO ) THEN +* +* Path 5t(N much larger than M, JOBU='O', JOBVT='S') +* M right singular vectors to be computed in VT and +* M left singular vectors to be overwritten on A +* + IF( LWORK.GE.2*M*M+3*M ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is LDA by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is M by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = M + ELSE +* +* WORK(IU) is M by M and WORK(IR) is M by M +* + LDWRKU = M + IR = IU + LDWRKU*M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out below it +* + CALL CLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) +* +* Generate Q in A +* (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to +* WORK(IR) +* (CWorkspace: need 2*M*M+3*M, +* prefer 2*M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', M, M, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (CWorkspace: need 2*M*M+3*M-1, +* prefer 2*M*M+2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in WORK(IR) +* (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in WORK(IR) and computing +* right singular vectors of L in WORK(IU) +* (CWorkspace: need 2*M*M) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, M, M, 0, S, RWORK( IE ), + $ WORK( IU ), LDWRKU, WORK( IR ), + $ LDWRKR, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in A, storing result in VT +* (CWorkspace: need M*M) +* (RWorkspace: 0) +* + CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ), + $ LDWRKU, A, LDA, CZERO, VT, LDVT ) +* +* Copy left singular vectors of L to A +* (CWorkspace: need M*M) +* (RWorkspace: 0) +* + CALL CLACPY( 'F', M, M, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( M, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ A( 1, 2 ), LDA ) +* +* Bidiagonalize L in A +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right vectors bidiagonalizing L by Q in VT +* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'P', 'L', 'C', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors of L in A +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in A and computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT, + $ LDVT, A, LDA, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* + END IF +* + ELSE IF( WNTUAS ) THEN +* +* Path 6t(N much larger than M, JOBU='S' or 'A', +* JOBVT='S') +* M right singular vectors to be computed in VT and +* M left singular vectors to be computed in U +* + IF( LWORK.GE.M*M+3*M ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IU) is LDA by N +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is LDA by M +* + LDWRKU = M + END IF + ITAU = IU + LDWRKU*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out above it +* + CALL CLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) +* +* Generate Q in A +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to U +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', M, M, WORK( IU ), LDWRKU, U, + $ LDU ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (CWorkspace: need M*M+3*M-1, +* prefer M*M+2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in U and computing right +* singular vectors of L in WORK(IU) +* (CWorkspace: need M*M) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, M, M, 0, S, RWORK( IE ), + $ WORK( IU ), LDWRKU, U, LDU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in A, storing result in VT +* (CWorkspace: need M*M) +* (RWorkspace: 0) +* + CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ), + $ LDWRKU, A, LDA, CZERO, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( M, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing out above it +* + CALL CLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ U( 1, 2 ), LDU ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, U, LDU, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in U by Q +* in VT +* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'P', 'L', 'C', M, N, M, U, LDU, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT, + $ LDVT, U, LDU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* + END IF +* + END IF +* + ELSE IF( WNTVA ) THEN +* + IF( WNTUN ) THEN +* +* Path 7t(N much larger than M, JOBU='N', JOBVT='A') +* N right singular vectors to be computed in VT and +* no left singular vectors to be computed +* + IF( LWORK.GE.M*M+MAX( N+M, 3*M ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IR) is LDA by M +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is M by M +* + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Copy L to WORK(IR), zeroing out above it +* + CALL CLACPY( 'L', M, M, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IR+LDWRKR ), LDWRKR ) +* +* Generate Q in VT +* (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IR) +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, WORK( IR ), LDWRKR, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in WORK(IR) +* (CWorkspace: need M*M+3*M-1, +* prefer M*M+2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of L in WORK(IR) +* (CWorkspace: need M*M) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, M, 0, 0, S, RWORK( IE ), + $ WORK( IR ), LDWRKR, CDUM, 1, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IR) by +* Q in VT, storing result in A +* (CWorkspace: need M*M) +* (RWorkspace: 0) +* + CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IR ), + $ LDWRKR, VT, LDVT, CZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL CLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need M+N, prefer M+N*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ A( 1, 2 ), LDA ) +* +* Bidiagonalize L in A +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in A by Q +* in VT +* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'P', 'L', 'C', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, N, 0, 0, S, RWORK( IE ), VT, + $ LDVT, CDUM, 1, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* + END IF +* + ELSE IF( WNTUO ) THEN +* +* Path 8t(N much larger than M, JOBU='O', JOBVT='A') +* N right singular vectors to be computed in VT and +* M left singular vectors to be overwritten on A +* + IF( LWORK.GE.2*M*M+MAX( N+M, 3*M ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is LDA by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is M by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = M + ELSE +* +* WORK(IU) is M by M and WORK(IR) is M by M +* + LDWRKU = M + IR = IU + LDWRKU*M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out above it +* + CALL CLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to +* WORK(IR) +* (CWorkspace: need 2*M*M+3*M, +* prefer 2*M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', M, M, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (CWorkspace: need 2*M*M+3*M-1, +* prefer 2*M*M+2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in WORK(IR) +* (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in WORK(IR) and computing +* right singular vectors of L in WORK(IU) +* (CWorkspace: need 2*M*M) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, M, M, 0, S, RWORK( IE ), + $ WORK( IU ), LDWRKU, WORK( IR ), + $ LDWRKR, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in VT, storing result in A +* (CWorkspace: need M*M) +* (RWorkspace: 0) +* + CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ), + $ LDWRKU, VT, LDVT, CZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL CLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* +* Copy left singular vectors of A from WORK(IR) to A +* + CALL CLACPY( 'F', M, M, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need M+N, prefer M+N*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ A( 1, 2 ), LDA ) +* +* Bidiagonalize L in A +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in A by Q +* in VT +* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'P', 'L', 'C', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in A +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in A and computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT, + $ LDVT, A, LDA, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* + END IF +* + ELSE IF( WNTUAS ) THEN +* +* Path 9t(N much larger than M, JOBU='S' or 'A', +* JOBVT='A') +* N right singular vectors to be computed in VT and +* M left singular vectors to be computed in U +* + IF( LWORK.GE.M*M+MAX( N+M, 3*M ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IU) is LDA by M +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is M by M +* + LDWRKU = M + END IF + ITAU = IU + LDWRKU*M + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out above it +* + CALL CLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to U +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', M, M, WORK( IU ), LDWRKU, U, + $ LDU ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in U and computing right +* singular vectors of L in WORK(IU) +* (CWorkspace: need M*M) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, M, M, 0, S, RWORK( IE ), + $ WORK( IU ), LDWRKU, U, LDU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in VT, storing result in A +* (CWorkspace: need M*M) +* (RWorkspace: 0) +* + CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ), + $ LDWRKU, VT, LDVT, CZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL CLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need M+N, prefer M+N*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing out above it +* + CALL CLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ U( 1, 2 ), LDU ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, U, LDU, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in U by Q +* in VT +* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'P', 'L', 'C', M, N, M, U, LDU, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT, + $ LDVT, U, LDU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* + END IF +* + END IF +* + END IF +* + ELSE +* +* N .LT. MNTHR +* +* Path 10t(N greater than M, but not much larger) +* Reduce to bidiagonal form without LQ decomposition +* + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize A +* (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) +* (RWorkspace: M) +* + CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + IF( WNTUAS ) THEN +* +* If left singular vectors desired in U, copy result to U +* and generate left bidiagonalizing vectors in U +* (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL CLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL CUNGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVAS ) THEN +* +* If right singular vectors desired in VT, copy result to +* VT and generate right bidiagonalizing vectors in VT +* (CWorkspace: need 2*M+NRVT, prefer 2*M+NRVT*NB) +* (RWorkspace: 0) +* + CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) + IF( WNTVA ) + $ NRVT = N + IF( WNTVS ) + $ NRVT = M + CALL CUNGBR( 'P', NRVT, N, M, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTUO ) THEN +* +* If left singular vectors desired in A, generate left +* bidiagonalizing vectors in A +* (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVO ) THEN +* +* If right singular vectors desired in A, generate right +* bidiagonalizing vectors in A +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IRWORK = IE + M + IF( WNTUAS .OR. WNTUO ) + $ NRU = M + IF( WNTUN ) + $ NRU = 0 + IF( WNTVAS .OR. WNTVO ) + $ NCVT = N + IF( WNTVN ) + $ NCVT = 0 + IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'L', M, NCVT, NRU, 0, S, RWORK( IE ), VT, + $ LDVT, U, LDU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) + ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in A +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'L', M, NCVT, NRU, 0, S, RWORK( IE ), A, + $ LDA, U, LDU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) + ELSE +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in A and computing right singular +* vectors in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'L', M, NCVT, NRU, 0, S, RWORK( IE ), VT, + $ LDVT, A, LDA, CDUM, 1, RWORK( IRWORK ), + $ INFO ) + END IF +* + END IF +* + END IF +* +* Undo scaling if necessary +* + IF( ISCL.EQ.1 ) THEN + IF( ANRM.GT.BIGNUM ) + $ CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, + $ IERR ) + IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM ) + $ CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1, + $ RWORK( IE ), MINMN, IERR ) + IF( ANRM.LT.SMLNUM ) + $ CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, + $ IERR ) + IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM ) + $ CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1, + $ RWORK( IE ), MINMN, IERR ) + END IF +* +* Return optimal workspace in WORK(1) +* + WORK( 1 ) = MAXWRK +* + RETURN +* +* End of CGESVD +* + END diff --git a/costa/native/external/lapack/cgesvx.f b/costa/native/external/lapack/cgesvx.f new file mode 100644 index 000000000..04f20659e --- /dev/null +++ b/costa/native/external/lapack/cgesvx.f @@ -0,0 +1,484 @@ + SUBROUTINE CGESVX( FACT, TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, + $ EQUED, R, C, B, LDB, X, LDX, RCOND, FERR, BERR, + $ WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, TRANS + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL BERR( * ), C( * ), FERR( * ), R( * ), + $ RWORK( * ) + COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* CGESVX uses the LU factorization to compute the solution to a complex +* system of linear equations +* A * X = B, +* where A is an N-by-N matrix and X and B are N-by-NRHS matrices. +* +* Error bounds on the solution and a condition estimate are also +* provided. +* +* Description +* =========== +* +* The following steps are performed: +* +* 1. If FACT = 'E', real scaling factors are computed to equilibrate +* the system: +* TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B +* TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B +* TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B +* Whether or not the system will be equilibrated depends on the +* scaling of the matrix A, but if equilibration is used, A is +* overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') +* or diag(C)*B (if TRANS = 'T' or 'C'). +* +* 2. If FACT = 'N' or 'E', the LU decomposition is used to factor the +* matrix A (after equilibration if FACT = 'E') as +* A = P * L * U, +* where P is a permutation matrix, L is a unit lower triangular +* matrix, and U is upper triangular. +* +* 3. If some U(i,i)=0, so that U is exactly singular, then the routine +* returns with INFO = i. Otherwise, the factored form of A is used +* to estimate the condition number of the matrix A. If the +* reciprocal of the condition number is less than machine precision, +* INFO = N+1 is returned as a warning, but the routine still goes on +* to solve for X and compute error bounds as described below. +* +* 4. The system of equations is solved for X using the factored form +* of A. +* +* 5. Iterative refinement is applied to improve the computed solution +* matrix and calculate error bounds and backward error estimates +* for it. +* +* 6. If equilibration was used, the matrix X is premultiplied by +* diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so +* that it solves the original system before equilibration. +* +* Arguments +* ========= +* +* FACT (input) CHARACTER*1 +* Specifies whether or not the factored form of the matrix A is +* supplied on entry, and if not, whether the matrix A should be +* equilibrated before it is factored. +* = 'F': On entry, AF and IPIV contain the factored form of A. +* If EQUED is not 'N', the matrix A has been +* equilibrated with scaling factors given by R and C. +* A, AF, and IPIV are not modified. +* = 'N': The matrix A will be copied to AF and factored. +* = 'E': The matrix A will be equilibrated if necessary, then +* copied to AF and factored. +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose) +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the N-by-N matrix A. If FACT = 'F' and EQUED is +* not 'N', then A must have been equilibrated by the scaling +* factors in R and/or C. A is not modified if FACT = 'F' or +* 'N', or if FACT = 'E' and EQUED = 'N' on exit. +* +* On exit, if EQUED .ne. 'N', A is scaled as follows: +* EQUED = 'R': A := diag(R) * A +* EQUED = 'C': A := A * diag(C) +* EQUED = 'B': A := diag(R) * A * diag(C). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* AF (input or output) COMPLEX array, dimension (LDAF,N) +* If FACT = 'F', then AF is an input argument and on entry +* contains the factors L and U from the factorization +* A = P*L*U as computed by CGETRF. If EQUED .ne. 'N', then +* AF is the factored form of the equilibrated matrix A. +* +* If FACT = 'N', then AF is an output argument and on exit +* returns the factors L and U from the factorization A = P*L*U +* of the original matrix A. +* +* If FACT = 'E', then AF is an output argument and on exit +* returns the factors L and U from the factorization A = P*L*U +* of the equilibrated matrix A (see the description of A for +* the form of the equilibrated matrix). +* +* LDAF (input) INTEGER +* The leading dimension of the array AF. LDAF >= max(1,N). +* +* IPIV (input or output) INTEGER array, dimension (N) +* If FACT = 'F', then IPIV is an input argument and on entry +* contains the pivot indices from the factorization A = P*L*U +* as computed by CGETRF; row i of the matrix was interchanged +* with row IPIV(i). +* +* If FACT = 'N', then IPIV is an output argument and on exit +* contains the pivot indices from the factorization A = P*L*U +* of the original matrix A. +* +* If FACT = 'E', then IPIV is an output argument and on exit +* contains the pivot indices from the factorization A = P*L*U +* of the equilibrated matrix A. +* +* EQUED (input or output) CHARACTER*1 +* Specifies the form of equilibration that was done. +* = 'N': No equilibration (always true if FACT = 'N'). +* = 'R': Row equilibration, i.e., A has been premultiplied by +* diag(R). +* = 'C': Column equilibration, i.e., A has been postmultiplied +* by diag(C). +* = 'B': Both row and column equilibration, i.e., A has been +* replaced by diag(R) * A * diag(C). +* EQUED is an input argument if FACT = 'F'; otherwise, it is an +* output argument. +* +* R (input or output) REAL array, dimension (N) +* The row scale factors for A. If EQUED = 'R' or 'B', A is +* multiplied on the left by diag(R); if EQUED = 'N' or 'C', R +* is not accessed. R is an input argument if FACT = 'F'; +* otherwise, R is an output argument. If FACT = 'F' and +* EQUED = 'R' or 'B', each element of R must be positive. +* +* C (input or output) REAL array, dimension (N) +* The column scale factors for A. If EQUED = 'C' or 'B', A is +* multiplied on the right by diag(C); if EQUED = 'N' or 'R', C +* is not accessed. C is an input argument if FACT = 'F'; +* otherwise, C is an output argument. If FACT = 'F' and +* EQUED = 'C' or 'B', each element of C must be positive. +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, +* if EQUED = 'N', B is not modified; +* if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by +* diag(R)*B; +* if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is +* overwritten by diag(C)*B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (output) COMPLEX array, dimension (LDX,NRHS) +* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X +* to the original system of equations. Note that A and B are +* modified on exit if EQUED .ne. 'N', and the solution to the +* equilibrated system is inv(diag(C))*X if TRANS = 'N' and +* EQUED = 'C' or 'B', or inv(diag(R))*X if TRANS = 'T' or 'C' +* and EQUED = 'R' or 'B'. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* RCOND (output) REAL +* The estimate of the reciprocal condition number of the matrix +* A after equilibration (if done). If RCOND is less than the +* machine precision (in particular, if RCOND = 0), the matrix +* is singular to working precision. This condition is +* indicated by a return code of INFO > 0. +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* RWORK (workspace/output) REAL array, dimension (2*N) +* On exit, RWORK(1) contains the reciprocal pivot growth +* factor norm(A)/norm(U). The "max absolute element" norm is +* used. If RWORK(1) is much less than 1, then the stability +* of the LU factorization of the (equilibrated) matrix A +* could be poor. This also means that the solution X, condition +* estimator RCOND, and forward error bound FERR could be +* unreliable. If factorization fails with 0 0: if INFO = i, and i is +* <= N: U(i,i) is exactly zero. The factorization has +* been completed, but the factor U is exactly +* singular, so the solution and error bounds +* could not be computed. RCOND = 0 is returned. +* = N+1: U is nonsingular, but RCOND is less than machine +* precision, meaning that the matrix is singular +* to working precision. Nevertheless, the +* solution and error bounds are computed because +* there are a number of situations where the +* computed solution can be more accurate than the +* value of RCOND would suggest. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL COLEQU, EQUIL, NOFACT, NOTRAN, ROWEQU + CHARACTER NORM + INTEGER I, INFEQU, J + REAL AMAX, ANORM, BIGNUM, COLCND, RCMAX, RCMIN, + $ ROWCND, RPVGRW, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANGE, CLANTR, SLAMCH + EXTERNAL LSAME, CLANGE, CLANTR, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CGECON, CGEEQU, CGERFS, CGETRF, CGETRS, CLACPY, + $ CLAQGE, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + NOTRAN = LSAME( TRANS, 'N' ) + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + ROWEQU = .FALSE. + COLEQU = .FALSE. + ELSE + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + SMLNUM = SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + END IF +* +* Test the input parameters. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) ) + $ THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( ROWEQU .OR. COLEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -10 + ELSE + IF( ROWEQU ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 10 J = 1, N + RCMIN = MIN( RCMIN, R( J ) ) + RCMAX = MAX( RCMAX, R( J ) ) + 10 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -11 + ELSE IF( N.GT.0 ) THEN + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + ROWCND = ONE + END IF + END IF + IF( COLEQU .AND. INFO.EQ.0 ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 20 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 20 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -12 + ELSE IF( N.GT.0 ) THEN + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + COLCND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -16 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGESVX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL CGEEQU( N, N, A, LDA, R, C, ROWCND, COLCND, AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL CLAQGE( N, N, A, LDA, R, C, ROWCND, COLCND, AMAX, + $ EQUED ) + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + END IF + END IF +* +* Scale the right hand side. +* + IF( NOTRAN ) THEN + IF( ROWEQU ) THEN + DO 40 J = 1, NRHS + DO 30 I = 1, N + B( I, J ) = R( I )*B( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( COLEQU ) THEN + DO 60 J = 1, NRHS + DO 50 I = 1, N + B( I, J ) = C( I )*B( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the LU factorization of A. +* + CALL CLACPY( 'Full', N, N, A, LDA, AF, LDAF ) + CALL CGETRF( N, N, AF, LDAF, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN + IF( INFO.GT.0 ) THEN +* +* Compute the reciprocal pivot growth factor of the +* leading rank-deficient INFO columns of A. +* + RPVGRW = CLANTR( 'M', 'U', 'N', INFO, INFO, AF, LDAF, + $ RWORK ) + IF( RPVGRW.EQ.ZERO ) THEN + RPVGRW = ONE + ELSE + RPVGRW = CLANGE( 'M', N, INFO, A, LDA, RWORK ) / + $ RPVGRW + END IF + RWORK( 1 ) = RPVGRW + RCOND = ZERO + END IF + RETURN + END IF + END IF +* +* Compute the norm of the matrix A and the +* reciprocal pivot growth factor RPVGRW. +* + IF( NOTRAN ) THEN + NORM = '1' + ELSE + NORM = 'I' + END IF + ANORM = CLANGE( NORM, N, N, A, LDA, RWORK ) + RPVGRW = CLANTR( 'M', 'U', 'N', N, N, AF, LDAF, RWORK ) + IF( RPVGRW.EQ.ZERO ) THEN + RPVGRW = ONE + ELSE + RPVGRW = CLANGE( 'M', N, N, A, LDA, RWORK ) / RPVGRW + END IF +* +* Compute the reciprocal of the condition number of A. +* + CALL CGECON( NORM, N, AF, LDAF, ANORM, RCOND, WORK, RWORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* +* Compute the solution matrix X. +* + CALL CLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL CGETRS( TRANS, N, NRHS, AF, LDAF, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL CGERFS( TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, X, + $ LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* Transform the solution matrix X to a solution of the original +* system. +* + IF( NOTRAN ) THEN + IF( COLEQU ) THEN + DO 80 J = 1, NRHS + DO 70 I = 1, N + X( I, J ) = C( I )*X( I, J ) + 70 CONTINUE + 80 CONTINUE + DO 90 J = 1, NRHS + FERR( J ) = FERR( J ) / COLCND + 90 CONTINUE + END IF + ELSE IF( ROWEQU ) THEN + DO 110 J = 1, NRHS + DO 100 I = 1, N + X( I, J ) = R( I )*X( I, J ) + 100 CONTINUE + 110 CONTINUE + DO 120 J = 1, NRHS + FERR( J ) = FERR( J ) / ROWCND + 120 CONTINUE + END IF +* + RWORK( 1 ) = RPVGRW + RETURN +* +* End of CGESVX +* + END diff --git a/costa/native/external/lapack/cgetc2.f b/costa/native/external/lapack/cgetc2.f new file mode 100644 index 000000000..1ab4b9d05 --- /dev/null +++ b/costa/native/external/lapack/cgetc2.f @@ -0,0 +1,146 @@ + SUBROUTINE CGETC2( N, A, LDA, IPIV, JPIV, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), JPIV( * ) + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CGETC2 computes an LU factorization, using complete pivoting, of the +* n-by-n matrix A. The factorization has the form A = P * L * U * Q, +* where P and Q are permutation matrices, L is lower triangular with +* unit diagonal elements and U is upper triangular. +* +* This is a level 1 BLAS version of the algorithm. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA, N) +* On entry, the n-by-n matrix to be factored. +* On exit, the factors L and U from the factorization +* A = P*L*U*Q; the unit diagonal elements of L are not stored. +* If U(k, k) appears to be less than SMIN, U(k, k) is given the +* value of SMIN, giving a nonsingular perturbed system. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1, N). +* +* IPIV (output) INTEGER array, dimension (N). +* The pivot indices; for 1 <= i <= N, row i of the +* matrix has been interchanged with row IPIV(i). +* +* JPIV (output) INTEGER array, dimension (N). +* The pivot indices; for 1 <= j <= N, column j of the +* matrix has been interchanged with column JPIV(j). +* +* INFO (output) INTEGER +* = 0: successful exit +* > 0: if INFO = k, U(k, k) is likely to produce overflow if +* one tries to solve for x in Ax = b. So U is perturbed +* to avoid the overflow. +* +* Further Details +* =============== +* +* Based on contributions by +* Bo Kagstrom and Peter Poromaa, Department of Computing Science, +* Umea University, S-901 87 Umea, Sweden. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IP, IPV, J, JP, JPV + REAL BIGNUM, EPS, SMIN, SMLNUM, XMAX +* .. +* .. External Subroutines .. + EXTERNAL CGERU, CSWAP, SLABAD +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CMPLX, MAX +* .. +* .. Executable Statements .. +* +* Set constants to control overflow +* + INFO = 0 + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) / EPS + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Factorize A using complete pivoting. +* Set pivots less than SMIN to SMIN +* + DO 40 I = 1, N - 1 +* +* Find max element in matrix A +* + XMAX = ZERO + DO 20 IP = I, N + DO 10 JP = I, N + IF( ABS( A( IP, JP ) ).GE.XMAX ) THEN + XMAX = ABS( A( IP, JP ) ) + IPV = IP + JPV = JP + END IF + 10 CONTINUE + 20 CONTINUE + IF( I.EQ.1 ) + $ SMIN = MAX( EPS*XMAX, SMLNUM ) +* +* Swap rows +* + IF( IPV.NE.I ) + $ CALL CSWAP( N, A( IPV, 1 ), LDA, A( I, 1 ), LDA ) + IPIV( I ) = IPV +* +* Swap columns +* + IF( JPV.NE.I ) + $ CALL CSWAP( N, A( 1, JPV ), 1, A( 1, I ), 1 ) + JPIV( I ) = JPV +* +* Check for singularity +* + IF( ABS( A( I, I ) ).LT.SMIN ) THEN + INFO = I + A( I, I ) = CMPLX( SMIN, ZERO ) + END IF + DO 30 J = I + 1, N + A( J, I ) = A( J, I ) / A( I, I ) + 30 CONTINUE + CALL CGERU( N-I, N-I, -CMPLX( ONE ), A( I+1, I ), 1, + $ A( I, I+1 ), LDA, A( I+1, I+1 ), LDA ) + 40 CONTINUE +* + IF( ABS( A( N, N ) ).LT.SMIN ) THEN + INFO = N + A( N, N ) = CMPLX( SMIN, ZERO ) + END IF + RETURN +* +* End of CGETC2 +* + END diff --git a/costa/native/external/lapack/cgetf2.f b/costa/native/external/lapack/cgetf2.f new file mode 100644 index 000000000..9abcaba47 --- /dev/null +++ b/costa/native/external/lapack/cgetf2.f @@ -0,0 +1,136 @@ + SUBROUTINE CGETF2( M, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CGETF2 computes an LU factorization of a general m-by-n matrix A +* using partial pivoting with row interchanges. +* +* The factorization has the form +* A = P * L * U +* where P is a permutation matrix, L is lower triangular with unit +* diagonal elements (lower trapezoidal if m > n), and U is upper +* triangular (upper trapezoidal if m < n). +* +* This is the right-looking Level 2 BLAS version of the algorithm. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the m by n matrix to be factored. +* On exit, the factors L and U from the factorization +* A = P*L*U; the unit diagonal elements of L are not stored. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* IPIV (output) INTEGER array, dimension (min(M,N)) +* The pivot indices; for 1 <= i <= min(M,N), row i of the +* matrix was interchanged with row IPIV(i). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* > 0: if INFO = k, U(k,k) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, and division by zero will occur if it is used +* to solve a system of equations. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER J, JP +* .. +* .. External Functions .. + INTEGER ICAMAX + EXTERNAL ICAMAX +* .. +* .. External Subroutines .. + EXTERNAL CGERU, CSCAL, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGETF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* + DO 10 J = 1, MIN( M, N ) +* +* Find pivot and test for singularity. +* + JP = J - 1 + ICAMAX( M-J+1, A( J, J ), 1 ) + IPIV( J ) = JP + IF( A( JP, J ).NE.ZERO ) THEN +* +* Apply the interchange to columns 1:N. +* + IF( JP.NE.J ) + $ CALL CSWAP( N, A( J, 1 ), LDA, A( JP, 1 ), LDA ) +* +* Compute elements J+1:M of J-th column. +* + IF( J.LT.M ) + $ CALL CSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 ) +* + ELSE IF( INFO.EQ.0 ) THEN +* + INFO = J + END IF +* + IF( J.LT.MIN( M, N ) ) THEN +* +* Update trailing submatrix. +* + CALL CGERU( M-J, N-J, -ONE, A( J+1, J ), 1, A( J, J+1 ), + $ LDA, A( J+1, J+1 ), LDA ) + END IF + 10 CONTINUE + RETURN +* +* End of CGETF2 +* + END diff --git a/costa/native/external/lapack/cgetrf.f b/costa/native/external/lapack/cgetrf.f new file mode 100644 index 000000000..4d288f8c3 --- /dev/null +++ b/costa/native/external/lapack/cgetrf.f @@ -0,0 +1,160 @@ + SUBROUTINE CGETRF( M, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CGETRF computes an LU factorization of a general M-by-N matrix A +* using partial pivoting with row interchanges. +* +* The factorization has the form +* A = P * L * U +* where P is a permutation matrix, L is lower triangular with unit +* diagonal elements (lower trapezoidal if m > n), and U is upper +* triangular (upper trapezoidal if m < n). +* +* This is the right-looking Level 3 BLAS version of the algorithm. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the M-by-N matrix to be factored. +* On exit, the factors L and U from the factorization +* A = P*L*U; the unit diagonal elements of L are not stored. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* IPIV (output) INTEGER array, dimension (min(M,N)) +* The pivot indices; for 1 <= i <= min(M,N), row i of the +* matrix was interchanged with row IPIV(i). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, U(i,i) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, and division by zero will occur if it is used +* to solve a system of equations. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, IINFO, J, JB, NB +* .. +* .. External Subroutines .. + EXTERNAL CGEMM, CGETF2, CLASWP, CTRSM, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGETRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'CGETRF', ' ', M, N, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN +* +* Use unblocked code. +* + CALL CGETF2( M, N, A, LDA, IPIV, INFO ) + ELSE +* +* Use blocked code. +* + DO 20 J = 1, MIN( M, N ), NB + JB = MIN( MIN( M, N )-J+1, NB ) +* +* Factor diagonal and subdiagonal blocks and test for exact +* singularity. +* + CALL CGETF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO ) +* +* Adjust INFO and the pivot indices. +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + J - 1 + DO 10 I = J, MIN( M, J+JB-1 ) + IPIV( I ) = J - 1 + IPIV( I ) + 10 CONTINUE +* +* Apply interchanges to columns 1:J-1. +* + CALL CLASWP( J-1, A, LDA, J, J+JB-1, IPIV, 1 ) +* + IF( J+JB.LE.N ) THEN +* +* Apply interchanges to columns J+JB:N. +* + CALL CLASWP( N-J-JB+1, A( 1, J+JB ), LDA, J, J+JB-1, + $ IPIV, 1 ) +* +* Compute block row of U. +* + CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit', JB, + $ N-J-JB+1, ONE, A( J, J ), LDA, A( J, J+JB ), + $ LDA ) + IF( J+JB.LE.M ) THEN +* +* Update trailing submatrix. +* + CALL CGEMM( 'No transpose', 'No transpose', M-J-JB+1, + $ N-J-JB+1, JB, -ONE, A( J+JB, J ), LDA, + $ A( J, J+JB ), LDA, ONE, A( J+JB, J+JB ), + $ LDA ) + END IF + END IF + 20 CONTINUE + END IF + RETURN +* +* End of CGETRF +* + END diff --git a/costa/native/external/lapack/cgetri.f b/costa/native/external/lapack/cgetri.f new file mode 100644 index 000000000..6840f531c --- /dev/null +++ b/costa/native/external/lapack/cgetri.f @@ -0,0 +1,194 @@ + SUBROUTINE CGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGETRI computes the inverse of a matrix using the LU factorization +* computed by CGETRF. +* +* This method inverts U and then computes inv(A) by solving the system +* inv(A)*L = inv(U) for inv(A). +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the factors L and U from the factorization +* A = P*L*U as computed by CGETRF. +* On exit, if INFO = 0, the inverse of the original matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices from CGETRF; for 1<=i<=N, row i of the +* matrix was interchanged with row IPIV(i). +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO=0, then WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N). +* For optimal performance LWORK >= N*NB, where NB is +* the optimal blocksize returned by ILAENV. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, U(i,i) is exactly zero; the matrix is +* singular and its inverse could not be computed. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IWS, J, JB, JJ, JP, LDWORK, LWKOPT, NB, + $ NBMIN, NN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CGEMM, CGEMV, CSWAP, CTRSM, CTRTRI, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NB = ILAENV( 1, 'CGETRI', ' ', N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -3 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGETRI', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form inv(U). If INFO > 0 from CTRTRI, then U is singular, +* and the inverse is not computed. +* + CALL CTRTRI( 'Upper', 'Non-unit', N, A, LDA, INFO ) + IF( INFO.GT.0 ) + $ RETURN +* + NBMIN = 2 + LDWORK = N + IF( NB.GT.1 .AND. NB.LT.N ) THEN + IWS = MAX( LDWORK*NB, 1 ) + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CGETRI', ' ', N, -1, -1, -1 ) ) + END IF + ELSE + IWS = N + END IF +* +* Solve the equation inv(A)*L = inv(U) for inv(A). +* + IF( NB.LT.NBMIN .OR. NB.GE.N ) THEN +* +* Use unblocked code. +* + DO 20 J = N, 1, -1 +* +* Copy current column of L to WORK and replace with zeros. +* + DO 10 I = J + 1, N + WORK( I ) = A( I, J ) + A( I, J ) = ZERO + 10 CONTINUE +* +* Compute current column of inv(A). +* + IF( J.LT.N ) + $ CALL CGEMV( 'No transpose', N, N-J, -ONE, A( 1, J+1 ), + $ LDA, WORK( J+1 ), 1, ONE, A( 1, J ), 1 ) + 20 CONTINUE + ELSE +* +* Use blocked code. +* + NN = ( ( N-1 ) / NB )*NB + 1 + DO 50 J = NN, 1, -NB + JB = MIN( NB, N-J+1 ) +* +* Copy current block column of L to WORK and replace with +* zeros. +* + DO 40 JJ = J, J + JB - 1 + DO 30 I = JJ + 1, N + WORK( I+( JJ-J )*LDWORK ) = A( I, JJ ) + A( I, JJ ) = ZERO + 30 CONTINUE + 40 CONTINUE +* +* Compute current block column of inv(A). +* + IF( J+JB.LE.N ) + $ CALL CGEMM( 'No transpose', 'No transpose', N, JB, + $ N-J-JB+1, -ONE, A( 1, J+JB ), LDA, + $ WORK( J+JB ), LDWORK, ONE, A( 1, J ), LDA ) + CALL CTRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB, + $ ONE, WORK( J ), LDWORK, A( 1, J ), LDA ) + 50 CONTINUE + END IF +* +* Apply column interchanges. +* + DO 60 J = N - 1, 1, -1 + JP = IPIV( J ) + IF( JP.NE.J ) + $ CALL CSWAP( N, A( 1, J ), 1, A( 1, JP ), 1 ) + 60 CONTINUE +* + WORK( 1 ) = IWS + RETURN +* +* End of CGETRI +* + END diff --git a/costa/native/external/lapack/cgetrs.f b/costa/native/external/lapack/cgetrs.f new file mode 100644 index 000000000..d8a78f845 --- /dev/null +++ b/costa/native/external/lapack/cgetrs.f @@ -0,0 +1,150 @@ + SUBROUTINE CGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* CGETRS solves a system of linear equations +* A * X = B, A**T * X = B, or A**H * X = B +* with a general N-by-N matrix A using the LU factorization computed +* by CGETRF. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose) +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* A (input) COMPLEX array, dimension (LDA,N) +* The factors L and U from the factorization A = P*L*U +* as computed by CGETRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices from CGETRF; for 1<=i<=N, row i of the +* matrix was interchanged with row IPIV(i). +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLASWP, CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGETRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( NOTRAN ) THEN +* +* Solve A * X = B. +* +* Apply row interchanges to the right hand sides. +* + CALL CLASWP( NRHS, B, LDB, 1, N, IPIV, 1 ) +* +* Solve L*X = B, overwriting B with X. +* + CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit', N, NRHS, + $ ONE, A, LDA, B, LDB ) +* +* Solve U*X = B, overwriting B with X. +* + CALL CTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N, + $ NRHS, ONE, A, LDA, B, LDB ) + ELSE +* +* Solve A**T * X = B or A**H * X = B. +* +* Solve U'*X = B, overwriting B with X. +* + CALL CTRSM( 'Left', 'Upper', TRANS, 'Non-unit', N, NRHS, ONE, + $ A, LDA, B, LDB ) +* +* Solve L'*X = B, overwriting B with X. +* + CALL CTRSM( 'Left', 'Lower', TRANS, 'Unit', N, NRHS, ONE, A, + $ LDA, B, LDB ) +* +* Apply row interchanges to the solution vectors. +* + CALL CLASWP( NRHS, B, LDB, 1, N, IPIV, -1 ) + END IF +* + RETURN +* +* End of CGETRS +* + END diff --git a/costa/native/external/lapack/cggbak.f b/costa/native/external/lapack/cggbak.f new file mode 100644 index 000000000..0ae1d2994 --- /dev/null +++ b/costa/native/external/lapack/cggbak.f @@ -0,0 +1,216 @@ + SUBROUTINE CGGBAK( JOB, SIDE, N, ILO, IHI, LSCALE, RSCALE, M, V, + $ LDV, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER JOB, SIDE + INTEGER IHI, ILO, INFO, LDV, M, N +* .. +* .. Array Arguments .. + REAL LSCALE( * ), RSCALE( * ) + COMPLEX V( LDV, * ) +* .. +* +* Purpose +* ======= +* +* CGGBAK forms the right or left eigenvectors of a complex generalized +* eigenvalue problem A*x = lambda*B*x, by backward transformation on +* the computed eigenvectors of the balanced pair of matrices output by +* CGGBAL. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* Specifies the type of backward transformation required: +* = 'N': do nothing, return immediately; +* = 'P': do backward transformation for permutation only; +* = 'S': do backward transformation for scaling only; +* = 'B': do backward transformations for both permutation and +* scaling. +* JOB must be the same as the argument JOB supplied to CGGBAL. +* +* SIDE (input) CHARACTER*1 +* = 'R': V contains right eigenvectors; +* = 'L': V contains left eigenvectors. +* +* N (input) INTEGER +* The number of rows of the matrix V. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* The integers ILO and IHI determined by CGGBAL. +* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +* +* LSCALE (input) REAL array, dimension (N) +* Details of the permutations and/or scaling factors applied +* to the left side of A and B, as returned by CGGBAL. +* +* RSCALE (input) REAL array, dimension (N) +* Details of the permutations and/or scaling factors applied +* to the right side of A and B, as returned by CGGBAL. +* +* M (input) INTEGER +* The number of columns of the matrix V. M >= 0. +* +* V (input/output) COMPLEX array, dimension (LDV,M) +* On entry, the matrix of right or left eigenvectors to be +* transformed, as returned by CTGEVC. +* On exit, V is overwritten by the transformed eigenvectors. +* +* LDV (input) INTEGER +* The leading dimension of the matrix V. LDV >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* See R.C. Ward, Balancing the generalized eigenvalue problem, +* SIAM J. Sci. Stat. Comp. 2 (1981), 141-152. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LEFTV, RIGHTV + INTEGER I, K +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CSSCAL, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + RIGHTV = LSAME( SIDE, 'R' ) + LEFTV = LSAME( SIDE, 'L' ) +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND. + $ .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN + INFO = -1 + ELSE IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 ) THEN + INFO = -4 + ELSE IF( IHI.LT.ILO .OR. IHI.GT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( M.LT.0 ) THEN + INFO = -6 + ELSE IF( LDV.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGGBAK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + IF( M.EQ.0 ) + $ RETURN + IF( LSAME( JOB, 'N' ) ) + $ RETURN +* + IF( ILO.EQ.IHI ) + $ GO TO 30 +* +* Backward balance +* + IF( LSAME( JOB, 'S' ) .OR. LSAME( JOB, 'B' ) ) THEN +* +* Backward transformation on right eigenvectors +* + IF( RIGHTV ) THEN + DO 10 I = ILO, IHI + CALL CSSCAL( M, RSCALE( I ), V( I, 1 ), LDV ) + 10 CONTINUE + END IF +* +* Backward transformation on left eigenvectors +* + IF( LEFTV ) THEN + DO 20 I = ILO, IHI + CALL CSSCAL( M, LSCALE( I ), V( I, 1 ), LDV ) + 20 CONTINUE + END IF + END IF +* +* Backward permutation +* + 30 CONTINUE + IF( LSAME( JOB, 'P' ) .OR. LSAME( JOB, 'B' ) ) THEN +* +* Backward permutation on right eigenvectors +* + IF( RIGHTV ) THEN + IF( ILO.EQ.1 ) + $ GO TO 50 + DO 40 I = ILO - 1, 1, -1 + K = RSCALE( I ) + IF( K.EQ.I ) + $ GO TO 40 + CALL CSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 40 CONTINUE +* + 50 CONTINUE + IF( IHI.EQ.N ) + $ GO TO 70 + DO 60 I = IHI + 1, N + K = RSCALE( I ) + IF( K.EQ.I ) + $ GO TO 60 + CALL CSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 60 CONTINUE + END IF +* +* Backward permutation on left eigenvectors +* + 70 CONTINUE + IF( LEFTV ) THEN + IF( ILO.EQ.1 ) + $ GO TO 90 + DO 80 I = ILO - 1, 1, -1 + K = LSCALE( I ) + IF( K.EQ.I ) + $ GO TO 80 + CALL CSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 80 CONTINUE +* + 90 CONTINUE + IF( IHI.EQ.N ) + $ GO TO 110 + DO 100 I = IHI + 1, N + K = LSCALE( I ) + IF( K.EQ.I ) + $ GO TO 100 + CALL CSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 100 CONTINUE + END IF + END IF +* + 110 CONTINUE +* + RETURN +* +* End of CGGBAK +* + END diff --git a/costa/native/external/lapack/cggbal.f b/costa/native/external/lapack/cggbal.f new file mode 100644 index 000000000..cc258d485 --- /dev/null +++ b/costa/native/external/lapack/cggbal.f @@ -0,0 +1,474 @@ + SUBROUTINE CGGBAL( JOB, N, A, LDA, B, LDB, ILO, IHI, LSCALE, + $ RSCALE, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER JOB + INTEGER IHI, ILO, INFO, LDA, LDB, N +* .. +* .. Array Arguments .. + REAL LSCALE( * ), RSCALE( * ), WORK( * ) + COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* CGGBAL balances a pair of general complex matrices (A,B). This +* involves, first, permuting A and B by similarity transformations to +* isolate eigenvalues in the first 1 to ILO$-$1 and last IHI+1 to N +* elements on the diagonal; and second, applying a diagonal similarity +* transformation to rows and columns ILO to IHI to make the rows +* and columns as close in norm as possible. Both steps are optional. +* +* Balancing may reduce the 1-norm of the matrices, and improve the +* accuracy of the computed eigenvalues and/or eigenvectors in the +* generalized eigenvalue problem A*x = lambda*B*x. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* Specifies the operations to be performed on A and B: +* = 'N': none: simply set ILO = 1, IHI = N, LSCALE(I) = 1.0 +* and RSCALE(I) = 1.0 for i=1,...,N; +* = 'P': permute only; +* = 'S': scale only; +* = 'B': both permute and scale. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the input matrix A. +* On exit, A is overwritten by the balanced matrix. +* If JOB = 'N', A is not referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) COMPLEX array, dimension (LDB,N) +* On entry, the input matrix B. +* On exit, B is overwritten by the balanced matrix. +* If JOB = 'N', B is not referenced. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* ILO (output) INTEGER +* IHI (output) INTEGER +* ILO and IHI are set to integers such that on exit +* A(i,j) = 0 and B(i,j) = 0 if i > j and +* j = 1,...,ILO-1 or i = IHI+1,...,N. +* If JOB = 'N' or 'S', ILO = 1 and IHI = N. +* +* LSCALE (output) REAL array, dimension (N) +* Details of the permutations and scaling factors applied +* to the left side of A and B. If P(j) is the index of the +* row interchanged with row j, and D(j) is the scaling factor +* applied to row j, then +* LSCALE(j) = P(j) for J = 1,...,ILO-1 +* = D(j) for J = ILO,...,IHI +* = P(j) for J = IHI+1,...,N. +* The order in which the interchanges are made is N to IHI+1, +* then 1 to ILO-1. +* +* RSCALE (output) REAL array, dimension (N) +* Details of the permutations and scaling factors applied +* to the right side of A and B. If P(j) is the index of the +* column interchanged with column j, and D(j) is the scaling +* factor applied to column j, then +* RSCALE(j) = P(j) for J = 1,...,ILO-1 +* = D(j) for J = ILO,...,IHI +* = P(j) for J = IHI+1,...,N. +* The order in which the interchanges are made is N to IHI+1, +* then 1 to ILO-1. +* +* WORK (workspace) REAL array, dimension (6*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* See R.C. WARD, Balancing the generalized eigenvalue problem, +* SIAM J. Sci. Stat. Comp. 2 (1981), 141-152. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0E+0, HALF = 0.5E+0, ONE = 1.0E+0 ) + REAL THREE, SCLFAC + PARAMETER ( THREE = 3.0E+0, SCLFAC = 1.0E+1 ) + COMPLEX CZERO + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, ICAB, IFLOW, IP1, IR, IRAB, IT, J, JC, JP1, + $ K, KOUNT, L, LCAB, LM1, LRAB, LSFMAX, LSFMIN, + $ M, NR, NRP2 + REAL ALPHA, BASL, BETA, CAB, CMAX, COEF, COEF2, + $ COEF5, COR, EW, EWC, GAMMA, PGAMMA, RAB, SFMAX, + $ SFMIN, SUM, T, TA, TB, TC + COMPLEX CDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + REAL SDOT, SLAMCH + EXTERNAL LSAME, ICAMAX, SDOT, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CSSCAL, CSWAP, SAXPY, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, INT, LOG10, MAX, MIN, REAL, SIGN +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND. + $ .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGGBAL', -INFO ) + RETURN + END IF +* + K = 1 + L = N +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( LSAME( JOB, 'N' ) ) THEN + ILO = 1 + IHI = N + DO 10 I = 1, N + LSCALE( I ) = ONE + RSCALE( I ) = ONE + 10 CONTINUE + RETURN + END IF +* + IF( K.EQ.L ) THEN + ILO = 1 + IHI = 1 + LSCALE( 1 ) = ONE + RSCALE( 1 ) = ONE + RETURN + END IF +* + IF( LSAME( JOB, 'S' ) ) + $ GO TO 190 +* + GO TO 30 +* +* Permute the matrices A and B to isolate the eigenvalues. +* +* Find row with one nonzero in columns 1 through L +* + 20 CONTINUE + L = LM1 + IF( L.NE.1 ) + $ GO TO 30 +* + RSCALE( 1 ) = 1 + LSCALE( 1 ) = 1 + GO TO 190 +* + 30 CONTINUE + LM1 = L - 1 + DO 80 I = L, 1, -1 + DO 40 J = 1, LM1 + JP1 = J + 1 + IF( A( I, J ).NE.CZERO .OR. B( I, J ).NE.CZERO ) + $ GO TO 50 + 40 CONTINUE + J = L + GO TO 70 +* + 50 CONTINUE + DO 60 J = JP1, L + IF( A( I, J ).NE.CZERO .OR. B( I, J ).NE.CZERO ) + $ GO TO 80 + 60 CONTINUE + J = JP1 - 1 +* + 70 CONTINUE + M = L + IFLOW = 1 + GO TO 160 + 80 CONTINUE + GO TO 100 +* +* Find column with one nonzero in rows K through N +* + 90 CONTINUE + K = K + 1 +* + 100 CONTINUE + DO 150 J = K, L + DO 110 I = K, LM1 + IP1 = I + 1 + IF( A( I, J ).NE.CZERO .OR. B( I, J ).NE.CZERO ) + $ GO TO 120 + 110 CONTINUE + I = L + GO TO 140 + 120 CONTINUE + DO 130 I = IP1, L + IF( A( I, J ).NE.CZERO .OR. B( I, J ).NE.CZERO ) + $ GO TO 150 + 130 CONTINUE + I = IP1 - 1 + 140 CONTINUE + M = K + IFLOW = 2 + GO TO 160 + 150 CONTINUE + GO TO 190 +* +* Permute rows M and I +* + 160 CONTINUE + LSCALE( M ) = I + IF( I.EQ.M ) + $ GO TO 170 + CALL CSWAP( N-K+1, A( I, K ), LDA, A( M, K ), LDA ) + CALL CSWAP( N-K+1, B( I, K ), LDB, B( M, K ), LDB ) +* +* Permute columns M and J +* + 170 CONTINUE + RSCALE( M ) = J + IF( J.EQ.M ) + $ GO TO 180 + CALL CSWAP( L, A( 1, J ), 1, A( 1, M ), 1 ) + CALL CSWAP( L, B( 1, J ), 1, B( 1, M ), 1 ) +* + 180 CONTINUE + GO TO ( 20, 90 )IFLOW +* + 190 CONTINUE + ILO = K + IHI = L +* + IF( ILO.EQ.IHI ) + $ RETURN +* + IF( LSAME( JOB, 'P' ) ) + $ RETURN +* +* Balance the submatrix in rows ILO to IHI. +* + NR = IHI - ILO + 1 + DO 200 I = ILO, IHI + RSCALE( I ) = ZERO + LSCALE( I ) = ZERO +* + WORK( I ) = ZERO + WORK( I+N ) = ZERO + WORK( I+2*N ) = ZERO + WORK( I+3*N ) = ZERO + WORK( I+4*N ) = ZERO + WORK( I+5*N ) = ZERO + 200 CONTINUE +* +* Compute right side vector in resulting linear equations +* + BASL = LOG10( SCLFAC ) + DO 240 I = ILO, IHI + DO 230 J = ILO, IHI + IF( A( I, J ).EQ.CZERO ) THEN + TA = ZERO + GO TO 210 + END IF + TA = LOG10( CABS1( A( I, J ) ) ) / BASL +* + 210 CONTINUE + IF( B( I, J ).EQ.CZERO ) THEN + TB = ZERO + GO TO 220 + END IF + TB = LOG10( CABS1( B( I, J ) ) ) / BASL +* + 220 CONTINUE + WORK( I+4*N ) = WORK( I+4*N ) - TA - TB + WORK( J+5*N ) = WORK( J+5*N ) - TA - TB + 230 CONTINUE + 240 CONTINUE +* + COEF = ONE / REAL( 2*NR ) + COEF2 = COEF*COEF + COEF5 = HALF*COEF2 + NRP2 = NR + 2 + BETA = ZERO + IT = 1 +* +* Start generalized conjugate gradient iteration +* + 250 CONTINUE +* + GAMMA = SDOT( NR, WORK( ILO+4*N ), 1, WORK( ILO+4*N ), 1 ) + + $ SDOT( NR, WORK( ILO+5*N ), 1, WORK( ILO+5*N ), 1 ) +* + EW = ZERO + EWC = ZERO + DO 260 I = ILO, IHI + EW = EW + WORK( I+4*N ) + EWC = EWC + WORK( I+5*N ) + 260 CONTINUE +* + GAMMA = COEF*GAMMA - COEF2*( EW**2+EWC**2 ) - COEF5*( EW-EWC )**2 + IF( GAMMA.EQ.ZERO ) + $ GO TO 350 + IF( IT.NE.1 ) + $ BETA = GAMMA / PGAMMA + T = COEF5*( EWC-THREE*EW ) + TC = COEF5*( EW-THREE*EWC ) +* + CALL SSCAL( NR, BETA, WORK( ILO ), 1 ) + CALL SSCAL( NR, BETA, WORK( ILO+N ), 1 ) +* + CALL SAXPY( NR, COEF, WORK( ILO+4*N ), 1, WORK( ILO+N ), 1 ) + CALL SAXPY( NR, COEF, WORK( ILO+5*N ), 1, WORK( ILO ), 1 ) +* + DO 270 I = ILO, IHI + WORK( I ) = WORK( I ) + TC + WORK( I+N ) = WORK( I+N ) + T + 270 CONTINUE +* +* Apply matrix to vector +* + DO 300 I = ILO, IHI + KOUNT = 0 + SUM = ZERO + DO 290 J = ILO, IHI + IF( A( I, J ).EQ.CZERO ) + $ GO TO 280 + KOUNT = KOUNT + 1 + SUM = SUM + WORK( J ) + 280 CONTINUE + IF( B( I, J ).EQ.CZERO ) + $ GO TO 290 + KOUNT = KOUNT + 1 + SUM = SUM + WORK( J ) + 290 CONTINUE + WORK( I+2*N ) = REAL( KOUNT )*WORK( I+N ) + SUM + 300 CONTINUE +* + DO 330 J = ILO, IHI + KOUNT = 0 + SUM = ZERO + DO 320 I = ILO, IHI + IF( A( I, J ).EQ.CZERO ) + $ GO TO 310 + KOUNT = KOUNT + 1 + SUM = SUM + WORK( I+N ) + 310 CONTINUE + IF( B( I, J ).EQ.CZERO ) + $ GO TO 320 + KOUNT = KOUNT + 1 + SUM = SUM + WORK( I+N ) + 320 CONTINUE + WORK( J+3*N ) = REAL( KOUNT )*WORK( J ) + SUM + 330 CONTINUE +* + SUM = SDOT( NR, WORK( ILO+N ), 1, WORK( ILO+2*N ), 1 ) + + $ SDOT( NR, WORK( ILO ), 1, WORK( ILO+3*N ), 1 ) + ALPHA = GAMMA / SUM +* +* Determine correction to current iteration +* + CMAX = ZERO + DO 340 I = ILO, IHI + COR = ALPHA*WORK( I+N ) + IF( ABS( COR ).GT.CMAX ) + $ CMAX = ABS( COR ) + LSCALE( I ) = LSCALE( I ) + COR + COR = ALPHA*WORK( I ) + IF( ABS( COR ).GT.CMAX ) + $ CMAX = ABS( COR ) + RSCALE( I ) = RSCALE( I ) + COR + 340 CONTINUE + IF( CMAX.LT.HALF ) + $ GO TO 350 +* + CALL SAXPY( NR, -ALPHA, WORK( ILO+2*N ), 1, WORK( ILO+4*N ), 1 ) + CALL SAXPY( NR, -ALPHA, WORK( ILO+3*N ), 1, WORK( ILO+5*N ), 1 ) +* + PGAMMA = GAMMA + IT = IT + 1 + IF( IT.LE.NRP2 ) + $ GO TO 250 +* +* End generalized conjugate gradient iteration +* + 350 CONTINUE + SFMIN = SLAMCH( 'S' ) + SFMAX = ONE / SFMIN + LSFMIN = INT( LOG10( SFMIN ) / BASL+ONE ) + LSFMAX = INT( LOG10( SFMAX ) / BASL ) + DO 360 I = ILO, IHI + IRAB = ICAMAX( N-ILO+1, A( I, ILO ), LDA ) + RAB = ABS( A( I, IRAB+ILO-1 ) ) + IRAB = ICAMAX( N-ILO+1, B( I, ILO ), LDA ) + RAB = MAX( RAB, ABS( B( I, IRAB+ILO-1 ) ) ) + LRAB = INT( LOG10( RAB+SFMIN ) / BASL+ONE ) + IR = LSCALE( I ) + SIGN( HALF, LSCALE( I ) ) + IR = MIN( MAX( IR, LSFMIN ), LSFMAX, LSFMAX-LRAB ) + LSCALE( I ) = SCLFAC**IR + ICAB = ICAMAX( IHI, A( 1, I ), 1 ) + CAB = ABS( A( ICAB, I ) ) + ICAB = ICAMAX( IHI, B( 1, I ), 1 ) + CAB = MAX( CAB, ABS( B( ICAB, I ) ) ) + LCAB = INT( LOG10( CAB+SFMIN ) / BASL+ONE ) + JC = RSCALE( I ) + SIGN( HALF, RSCALE( I ) ) + JC = MIN( MAX( JC, LSFMIN ), LSFMAX, LSFMAX-LCAB ) + RSCALE( I ) = SCLFAC**JC + 360 CONTINUE +* +* Row scaling of matrices A and B +* + DO 370 I = ILO, IHI + CALL CSSCAL( N-ILO+1, LSCALE( I ), A( I, ILO ), LDA ) + CALL CSSCAL( N-ILO+1, LSCALE( I ), B( I, ILO ), LDB ) + 370 CONTINUE +* +* Column scaling of matrices A and B +* + DO 380 J = ILO, IHI + CALL CSSCAL( IHI, RSCALE( J ), A( 1, J ), 1 ) + CALL CSSCAL( IHI, RSCALE( J ), B( 1, J ), 1 ) + 380 CONTINUE +* + RETURN +* +* End of CGGBAL +* + END diff --git a/costa/native/external/lapack/cgges.f b/costa/native/external/lapack/cgges.f new file mode 100644 index 000000000..9511c13de --- /dev/null +++ b/costa/native/external/lapack/cgges.f @@ -0,0 +1,476 @@ + SUBROUTINE CGGES( JOBVSL, JOBVSR, SORT, SELCTG, N, A, LDA, B, LDB, + $ SDIM, ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, WORK, + $ LWORK, RWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBVSL, JOBVSR, SORT + INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + REAL RWORK( * ) + COMPLEX A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ), + $ WORK( * ) +* .. +* .. Function Arguments .. + LOGICAL SELCTG + EXTERNAL SELCTG +* .. +* +* Purpose +* ======= +* +* CGGES computes for a pair of N-by-N complex nonsymmetric matrices +* (A,B), the generalized eigenvalues, the generalized complex Schur +* form (S, T), and optionally left and/or right Schur vectors (VSL +* and VSR). This gives the generalized Schur factorization +* +* (A,B) = ( (VSL)*S*(VSR)**H, (VSL)*T*(VSR)**H ) +* +* where (VSR)**H is the conjugate-transpose of VSR. +* +* Optionally, it also orders the eigenvalues so that a selected cluster +* of eigenvalues appears in the leading diagonal blocks of the upper +* triangular matrix S and the upper triangular matrix T. The leading +* columns of VSL and VSR then form an unitary basis for the +* corresponding left and right eigenspaces (deflating subspaces). +* +* (If only the generalized eigenvalues are needed, use the driver +* CGGEV instead, which is faster.) +* +* A generalized eigenvalue for a pair of matrices (A,B) is a scalar w +* or a ratio alpha/beta = w, such that A - w*B is singular. It is +* usually represented as the pair (alpha,beta), as there is a +* reasonable interpretation for beta=0, and even for both being zero. +* +* A pair of matrices (S,T) is in generalized complex Schur form if S +* and T are upper triangular and, in addition, the diagonal elements +* of T are non-negative real numbers. +* +* Arguments +* ========= +* +* JOBVSL (input) CHARACTER*1 +* = 'N': do not compute the left Schur vectors; +* = 'V': compute the left Schur vectors. +* +* JOBVSR (input) CHARACTER*1 +* = 'N': do not compute the right Schur vectors; +* = 'V': compute the right Schur vectors. +* +* SORT (input) CHARACTER*1 +* Specifies whether or not to order the eigenvalues on the +* diagonal of the generalized Schur form. +* = 'N': Eigenvalues are not ordered; +* = 'S': Eigenvalues are ordered (see SELCTG). +* +* SELCTG (input) LOGICAL FUNCTION of two COMPLEX arguments +* SELCTG must be declared EXTERNAL in the calling subroutine. +* If SORT = 'N', SELCTG is not referenced. +* If SORT = 'S', SELCTG is used to select eigenvalues to sort +* to the top left of the Schur form. +* An eigenvalue ALPHA(j)/BETA(j) is selected if +* SELCTG(ALPHA(j),BETA(j)) is true. +* +* Note that a selected complex eigenvalue may no longer satisfy +* SELCTG(ALPHA(j),BETA(j)) = .TRUE. after ordering, since +* ordering may change the value of complex eigenvalues +* (especially if the eigenvalue is ill-conditioned), in this +* case INFO is set to N+2 (See INFO below). +* +* N (input) INTEGER +* The order of the matrices A, B, VSL, and VSR. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA, N) +* On entry, the first of the pair of matrices. +* On exit, A has been overwritten by its generalized Schur +* form S. +* +* LDA (input) INTEGER +* The leading dimension of A. LDA >= max(1,N). +* +* B (input/output) COMPLEX array, dimension (LDB, N) +* On entry, the second of the pair of matrices. +* On exit, B has been overwritten by its generalized Schur +* form T. +* +* LDB (input) INTEGER +* The leading dimension of B. LDB >= max(1,N). +* +* SDIM (output) INTEGER +* If SORT = 'N', SDIM = 0. +* If SORT = 'S', SDIM = number of eigenvalues (after sorting) +* for which SELCTG is true. +* +* ALPHA (output) COMPLEX array, dimension (N) +* BETA (output) COMPLEX array, dimension (N) +* On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the +* generalized eigenvalues. ALPHA(j), j=1,...,N and BETA(j), +* j=1,...,N are the diagonals of the complex Schur form (A,B) +* output by CGGES. The BETA(j) will be non-negative real. +* +* Note: the quotients ALPHA(j)/BETA(j) may easily over- or +* underflow, and BETA(j) may even be zero. Thus, the user +* should avoid naively computing the ratio alpha/beta. +* However, ALPHA will be always less than and usually +* comparable with norm(A) in magnitude, and BETA always less +* than and usually comparable with norm(B). +* +* VSL (output) COMPLEX array, dimension (LDVSL,N) +* If JOBVSL = 'V', VSL will contain the left Schur vectors. +* Not referenced if JOBVSL = 'N'. +* +* LDVSL (input) INTEGER +* The leading dimension of the matrix VSL. LDVSL >= 1, and +* if JOBVSL = 'V', LDVSL >= N. +* +* VSR (output) COMPLEX array, dimension (LDVSR,N) +* If JOBVSR = 'V', VSR will contain the right Schur vectors. +* Not referenced if JOBVSR = 'N'. +* +* LDVSR (input) INTEGER +* The leading dimension of the matrix VSR. LDVSR >= 1, and +* if JOBVSR = 'V', LDVSR >= N. +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,2*N). +* For good performance, LWORK must generally be larger. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) REAL array, dimension (8*N) +* +* BWORK (workspace) LOGICAL array, dimension (N) +* Not referenced if SORT = 'N'. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* =1,...,N: +* The QZ iteration failed. (A,B) are not in Schur +* form, but ALPHA(j) and BETA(j) should be correct for +* j=INFO+1,...,N. +* > N: =N+1: other than QZ iteration failed in CHGEQZ +* =N+2: after reordering, roundoff changed values of +* some complex eigenvalues so that leading +* eigenvalues in the Generalized Schur form no +* longer satisfy SELCTG=.TRUE. This could also +* be caused due to scaling. +* =N+3: reordering falied in CTGSEN. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ), + $ CONE = ( 1.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + LOGICAL CURSL, ILASCL, ILBSCL, ILVSL, ILVSR, LASTSL, + $ LQUERY, WANTST + INTEGER I, ICOLS, IERR, IHI, IJOBVL, IJOBVR, ILEFT, + $ ILO, IRIGHT, IROWS, IRWRK, ITAU, IWRK, LWKMIN, + $ LWKOPT + REAL ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, PVSL, + $ PVSR, SMLNUM +* .. +* .. Local Arrays .. + INTEGER IDUM( 1 ) + REAL DIF( 2 ) +* .. +* .. External Subroutines .. + EXTERNAL CGEQRF, CGGBAK, CGGBAL, CGGHRD, CHGEQZ, CLACPY, + $ CLASCL, CLASET, CTGSEN, CUNGQR, CUNMQR, SLABAD, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL CLANGE, SLAMCH + EXTERNAL LSAME, ILAENV, CLANGE, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVSL, 'N' ) ) THEN + IJOBVL = 1 + ILVSL = .FALSE. + ELSE IF( LSAME( JOBVSL, 'V' ) ) THEN + IJOBVL = 2 + ILVSL = .TRUE. + ELSE + IJOBVL = -1 + ILVSL = .FALSE. + END IF +* + IF( LSAME( JOBVSR, 'N' ) ) THEN + IJOBVR = 1 + ILVSR = .FALSE. + ELSE IF( LSAME( JOBVSR, 'V' ) ) THEN + IJOBVR = 2 + ILVSR = .TRUE. + ELSE + IJOBVR = -1 + ILVSR = .FALSE. + END IF +* + WANTST = LSAME( SORT, 'S' ) +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDVSL.LT.1 .OR. ( ILVSL .AND. LDVSL.LT.N ) ) THEN + INFO = -14 + ELSE IF( LDVSR.LT.1 .OR. ( ILVSR .AND. LDVSR.LT.N ) ) THEN + INFO = -16 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + LWKMIN = 1 + IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN + LWKMIN = MAX( 1, 2*N ) + LWKOPT = N + N*ILAENV( 1, 'CGEQRF', ' ', N, 1, N, 0 ) + IF( ILVSL ) THEN + LWKOPT = MAX( LWKOPT, N+N*ILAENV( 1, 'CUNGQR', ' ', N, 1, N, + $ -1 ) ) + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) + $ INFO = -18 +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGGES ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + WORK( 1 ) = LWKOPT + IF( N.EQ.0 ) THEN + SDIM = 0 + RETURN + END IF +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = CLANGE( 'M', N, N, A, LDA, RWORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF +* + IF( ILASCL ) + $ CALL CLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR ) +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = CLANGE( 'M', N, N, B, LDB, RWORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF +* + IF( ILBSCL ) + $ CALL CLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR ) +* +* Permute the matrix to make it more nearly triangular +* (Real Workspace: need 6*N) +* + ILEFT = 1 + IRIGHT = N + 1 + IRWRK = IRIGHT + N + CALL CGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), RWORK( IRWRK ), IERR ) +* +* Reduce B to triangular form (QR decomposition of B) +* (Complex Workspace: need N, prefer N*NB) +* + IROWS = IHI + 1 - ILO + ICOLS = N + 1 - ILO + ITAU = 1 + IWRK = ITAU + IROWS + CALL CGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Apply the orthogonal transformation to matrix A +* (Complex Workspace: need N, prefer N*NB) +* + CALL CUNMQR( 'L', 'C', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ), + $ LWORK+1-IWRK, IERR ) +* +* Initialize VSL +* (Complex Workspace: need N, prefer N*NB) +* + IF( ILVSL ) THEN + CALL CLASET( 'Full', N, N, CZERO, CONE, VSL, LDVSL ) + CALL CLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VSL( ILO+1, ILO ), LDVSL ) + CALL CUNGQR( IROWS, IROWS, IROWS, VSL( ILO, ILO ), LDVSL, + $ WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* +* Initialize VSR +* + IF( ILVSR ) + $ CALL CLASET( 'Full', N, N, CZERO, CONE, VSR, LDVSR ) +* +* Reduce to generalized Hessenberg form +* (Workspace: none needed) +* + CALL CGGHRD( JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, VSL, + $ LDVSL, VSR, LDVSR, IERR ) +* + SDIM = 0 +* +* Perform QZ algorithm, computing Schur vectors if desired +* (Complex Workspace: need N) +* (Real Workspace: need N) +* + IWRK = ITAU + CALL CHGEQZ( 'S', JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, WORK( IWRK ), + $ LWORK+1-IWRK, RWORK( IRWRK ), IERR ) + IF( IERR.NE.0 ) THEN + IF( IERR.GT.0 .AND. IERR.LE.N ) THEN + INFO = IERR + ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN + INFO = IERR - N + ELSE + INFO = N + 1 + END IF + GO TO 30 + END IF +* +* Sort eigenvalues ALPHA/BETA if desired +* (Workspace: none needed) +* + IF( WANTST ) THEN +* +* Undo scaling on eigenvalues before selecting +* + IF( ILASCL ) + $ CALL CLASCL( 'G', 0, 0, ANRM, ANRMTO, N, 1, ALPHA, N, IERR ) + IF( ILBSCL ) + $ CALL CLASCL( 'G', 0, 0, BNRM, BNRMTO, N, 1, BETA, N, IERR ) +* +* Select eigenvalues +* + DO 10 I = 1, N + BWORK( I ) = SELCTG( ALPHA( I ), BETA( I ) ) + 10 CONTINUE +* + CALL CTGSEN( 0, ILVSL, ILVSR, BWORK, N, A, LDA, B, LDB, ALPHA, + $ BETA, VSL, LDVSL, VSR, LDVSR, SDIM, PVSL, PVSR, + $ DIF, WORK( IWRK ), LWORK-IWRK+1, IDUM, 1, IERR ) + IF( IERR.EQ.1 ) + $ INFO = N + 3 +* + END IF +* +* Apply back-permutation to VSL and VSR +* (Workspace: none needed) +* + IF( ILVSL ) + $ CALL CGGBAK( 'P', 'L', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VSL, LDVSL, IERR ) + IF( ILVSR ) + $ CALL CGGBAK( 'P', 'R', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VSR, LDVSR, IERR ) +* +* Undo scaling +* + IF( ILASCL ) THEN + CALL CLASCL( 'U', 0, 0, ANRMTO, ANRM, N, N, A, LDA, IERR ) + CALL CLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHA, N, IERR ) + END IF +* + IF( ILBSCL ) THEN + CALL CLASCL( 'U', 0, 0, BNRMTO, BNRM, N, N, B, LDB, IERR ) + CALL CLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) + END IF +* + IF( WANTST ) THEN +* +* Check if reordering is correct +* + LASTSL = .TRUE. + SDIM = 0 + DO 20 I = 1, N + CURSL = SELCTG( ALPHA( I ), BETA( I ) ) + IF( CURSL ) + $ SDIM = SDIM + 1 + IF( CURSL .AND. .NOT.LASTSL ) + $ INFO = N + 2 + LASTSL = CURSL + 20 CONTINUE +* + END IF +* + 30 CONTINUE +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of CGGES +* + END diff --git a/costa/native/external/lapack/cggesx.f b/costa/native/external/lapack/cggesx.f new file mode 100644 index 000000000..2585ca34c --- /dev/null +++ b/costa/native/external/lapack/cggesx.f @@ -0,0 +1,545 @@ + SUBROUTINE CGGESX( JOBVSL, JOBVSR, SORT, SELCTG, SENSE, N, A, LDA, + $ B, LDB, SDIM, ALPHA, BETA, VSL, LDVSL, VSR, + $ LDVSR, RCONDE, RCONDV, WORK, LWORK, RWORK, + $ IWORK, LIWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBVSL, JOBVSR, SENSE, SORT + INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LIWORK, LWORK, N, + $ SDIM +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + INTEGER IWORK( * ) + REAL RCONDE( 2 ), RCONDV( 2 ), RWORK( * ) + COMPLEX A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ), + $ WORK( * ) +* .. +* .. Function Arguments .. + LOGICAL SELCTG + EXTERNAL SELCTG +* .. +* +* Purpose +* ======= +* +* CGGESX computes for a pair of N-by-N complex nonsymmetric matrices +* (A,B), the generalized eigenvalues, the complex Schur form (S,T), +* and, optionally, the left and/or right matrices of Schur vectors (VSL +* and VSR). This gives the generalized Schur factorization +* +* (A,B) = ( (VSL) S (VSR)**H, (VSL) T (VSR)**H ) +* +* where (VSR)**H is the conjugate-transpose of VSR. +* +* Optionally, it also orders the eigenvalues so that a selected cluster +* of eigenvalues appears in the leading diagonal blocks of the upper +* triangular matrix S and the upper triangular matrix T; computes +* a reciprocal condition number for the average of the selected +* eigenvalues (RCONDE); and computes a reciprocal condition number for +* the right and left deflating subspaces corresponding to the selected +* eigenvalues (RCONDV). The leading columns of VSL and VSR then form +* an orthonormal basis for the corresponding left and right eigenspaces +* (deflating subspaces). +* +* A generalized eigenvalue for a pair of matrices (A,B) is a scalar w +* or a ratio alpha/beta = w, such that A - w*B is singular. It is +* usually represented as the pair (alpha,beta), as there is a +* reasonable interpretation for beta=0 or for both being zero. +* +* A pair of matrices (S,T) is in generalized complex Schur form if T is +* upper triangular with non-negative diagonal and S is upper +* triangular. +* +* Arguments +* ========= +* +* JOBVSL (input) CHARACTER*1 +* = 'N': do not compute the left Schur vectors; +* = 'V': compute the left Schur vectors. +* +* JOBVSR (input) CHARACTER*1 +* = 'N': do not compute the right Schur vectors; +* = 'V': compute the right Schur vectors. +* +* SORT (input) CHARACTER*1 +* Specifies whether or not to order the eigenvalues on the +* diagonal of the generalized Schur form. +* = 'N': Eigenvalues are not ordered; +* = 'S': Eigenvalues are ordered (see SELCTG). +* +* SELCTG (input) LOGICAL FUNCTION of two COMPLEX arguments +* SELCTG must be declared EXTERNAL in the calling subroutine. +* If SORT = 'N', SELCTG is not referenced. +* If SORT = 'S', SELCTG is used to select eigenvalues to sort +* to the top left of the Schur form. +* Note that a selected complex eigenvalue may no longer satisfy +* SELCTG(ALPHA(j),BETA(j)) = .TRUE. after ordering, since +* ordering may change the value of complex eigenvalues +* (especially if the eigenvalue is ill-conditioned), in this +* case INFO is set to N+3 see INFO below). +* +* SENSE (input) CHARACTER +* Determines which reciprocal condition numbers are computed. +* = 'N' : None are computed; +* = 'E' : Computed for average of selected eigenvalues only; +* = 'V' : Computed for selected deflating subspaces only; +* = 'B' : Computed for both. +* If SENSE = 'E', 'V', or 'B', SORT must equal 'S'. +* +* N (input) INTEGER +* The order of the matrices A, B, VSL, and VSR. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA, N) +* On entry, the first of the pair of matrices. +* On exit, A has been overwritten by its generalized Schur +* form S. +* +* LDA (input) INTEGER +* The leading dimension of A. LDA >= max(1,N). +* +* B (input/output) COMPLEX array, dimension (LDB, N) +* On entry, the second of the pair of matrices. +* On exit, B has been overwritten by its generalized Schur +* form T. +* +* LDB (input) INTEGER +* The leading dimension of B. LDB >= max(1,N). +* +* SDIM (output) INTEGER +* If SORT = 'N', SDIM = 0. +* If SORT = 'S', SDIM = number of eigenvalues (after sorting) +* for which SELCTG is true. +* +* ALPHA (output) COMPLEX array, dimension (N) +* BETA (output) COMPLEX array, dimension (N) +* On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the +* generalized eigenvalues. ALPHA(j) and BETA(j),j=1,...,N are +* the diagonals of the complex Schur form (S,T). BETA(j) will +* be non-negative real. +* +* Note: the quotients ALPHA(j)/BETA(j) may easily over- or +* underflow, and BETA(j) may even be zero. Thus, the user +* should avoid naively computing the ratio alpha/beta. +* However, ALPHA will be always less than and usually +* comparable with norm(A) in magnitude, and BETA always less +* than and usually comparable with norm(B). +* +* VSL (output) COMPLEX array, dimension (LDVSL,N) +* If JOBVSL = 'V', VSL will contain the left Schur vectors. +* Not referenced if JOBVSL = 'N'. +* +* LDVSL (input) INTEGER +* The leading dimension of the matrix VSL. LDVSL >=1, and +* if JOBVSL = 'V', LDVSL >= N. +* +* VSR (output) COMPLEX array, dimension (LDVSR,N) +* If JOBVSR = 'V', VSR will contain the right Schur vectors. +* Not referenced if JOBVSR = 'N'. +* +* LDVSR (input) INTEGER +* The leading dimension of the matrix VSR. LDVSR >= 1, and +* if JOBVSR = 'V', LDVSR >= N. +* +* RCONDE (output) REAL array, dimension ( 2 ) +* If SENSE = 'E' or 'B', RCONDE(1) and RCONDE(2) contain the +* reciprocal condition numbers for the average of the selected +* eigenvalues. +* Not referenced if SENSE = 'N' or 'V'. +* +* RCONDV (output) REAL array, dimension ( 2 ) +* If SENSE = 'V' or 'B', RCONDV(1) and RCONDV(2) contain the +* reciprocal condition number for the selected deflating +* subspaces. +* Not referenced if SENSE = 'N' or 'E'. +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= 2*N. +* If SENSE = 'E', 'V', or 'B', +* LWORK >= MAX(2*N, 2*SDIM*(N-SDIM)). +* +* RWORK (workspace) REAL array, dimension ( 8*N ) +* Real workspace. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* Not referenced if SENSE = 'N'. +* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array WORK. LIWORK >= N+2. +* +* BWORK (workspace) LOGICAL array, dimension (N) +* Not referenced if SORT = 'N'. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* = 1,...,N: +* The QZ iteration failed. (A,B) are not in Schur +* form, but ALPHA(j) and BETA(j) should be correct for +* j=INFO+1,...,N. +* > N: =N+1: other than QZ iteration failed in CHGEQZ +* =N+2: after reordering, roundoff changed values of +* some complex eigenvalues so that leading +* eigenvalues in the Generalized Schur form no +* longer satisfy SELCTG=.TRUE. This could also +* be caused due to scaling. +* =N+3: reordering failed in CTGSEN. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL CURSL, ILASCL, ILBSCL, ILVSL, ILVSR, LASTSL, + $ WANTSB, WANTSE, WANTSN, WANTST, WANTSV + INTEGER I, ICOLS, IERR, IHI, IJOB, IJOBVL, IJOBVR, + $ ILEFT, ILO, IRIGHT, IROWS, IRWRK, ITAU, IWRK, + $ LIWMIN, MAXWRK, MINWRK + REAL ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, PL, + $ PR, SMLNUM +* .. +* .. Local Arrays .. + REAL DIF( 2 ) +* .. +* .. External Subroutines .. + EXTERNAL CGEQRF, CGGBAK, CGGBAL, CGGHRD, CHGEQZ, CLACPY, + $ CLASCL, CLASET, CTGSEN, CUNGQR, CUNMQR, SLABAD, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL CLANGE, SLAMCH + EXTERNAL LSAME, ILAENV, CLANGE, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVSL, 'N' ) ) THEN + IJOBVL = 1 + ILVSL = .FALSE. + ELSE IF( LSAME( JOBVSL, 'V' ) ) THEN + IJOBVL = 2 + ILVSL = .TRUE. + ELSE + IJOBVL = -1 + ILVSL = .FALSE. + END IF +* + IF( LSAME( JOBVSR, 'N' ) ) THEN + IJOBVR = 1 + ILVSR = .FALSE. + ELSE IF( LSAME( JOBVSR, 'V' ) ) THEN + IJOBVR = 2 + ILVSR = .TRUE. + ELSE + IJOBVR = -1 + ILVSR = .FALSE. + END IF +* + WANTST = LSAME( SORT, 'S' ) + WANTSN = LSAME( SENSE, 'N' ) + WANTSE = LSAME( SENSE, 'E' ) + WANTSV = LSAME( SENSE, 'V' ) + WANTSB = LSAME( SENSE, 'B' ) + IF( WANTSN ) THEN + IJOB = 0 + IWORK( 1 ) = 1 + ELSE IF( WANTSE ) THEN + IJOB = 1 + ELSE IF( WANTSV ) THEN + IJOB = 2 + ELSE IF( WANTSB ) THEN + IJOB = 4 + END IF +* +* Test the input arguments +* + INFO = 0 + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( .NOT.( WANTSN .OR. WANTSE .OR. WANTSV .OR. WANTSB ) .OR. + $ ( .NOT.WANTST .AND. .NOT.WANTSN ) ) THEN + INFO = -5 + ELSE IF( N.LT.0 ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDVSL.LT.1 .OR. ( ILVSL .AND. LDVSL.LT.N ) ) THEN + INFO = -15 + ELSE IF( LDVSR.LT.1 .OR. ( ILVSR .AND. LDVSR.LT.N ) ) THEN + INFO = -17 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + MINWRK = 1 + IF( INFO.EQ.0 .AND. LWORK.GE.1 ) THEN + MINWRK = MAX( 1, 2*N ) + MAXWRK = N + N*ILAENV( 1, 'CGEQRF', ' ', N, 1, N, 0 ) + IF( ILVSL ) THEN + MAXWRK = MAX( MAXWRK, N+N*ILAENV( 1, 'CUNGQR', ' ', N, 1, N, + $ -1 ) ) + END IF + WORK( 1 ) = MAXWRK + END IF + IF( .NOT.WANTSN ) THEN + LIWMIN = N+2 + ELSE + LIWMIN = 1 + END IF + IWORK( 1 ) = LIWMIN +* + IF( INFO.EQ.0 .AND. LWORK.LT.MINWRK ) THEN + INFO = -21 + ELSE IF( INFO.EQ.0 .AND. IJOB.GE.1 ) THEN + IF( LIWORK.LT.LIWMIN ) + $ INFO = -24 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGGESX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SDIM = 0 + RETURN + END IF +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = CLANGE( 'M', N, N, A, LDA, RWORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF + IF( ILASCL ) + $ CALL CLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR ) +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = CLANGE( 'M', N, N, B, LDB, RWORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF + IF( ILBSCL ) + $ CALL CLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR ) +* +* Permute the matrix to make it more nearly triangular +* (Real Workspace: need 6*N) +* + ILEFT = 1 + IRIGHT = N + 1 + IRWRK = IRIGHT + N + CALL CGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), RWORK( IRWRK ), IERR ) +* +* Reduce B to triangular form (QR decomposition of B) +* (Complex Workspace: need N, prefer N*NB) +* + IROWS = IHI + 1 - ILO + ICOLS = N + 1 - ILO + ITAU = 1 + IWRK = ITAU + IROWS + CALL CGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Apply the unitary transformation to matrix A +* (Complex Workspace: need N, prefer N*NB) +* + CALL CUNMQR( 'L', 'C', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ), + $ LWORK+1-IWRK, IERR ) +* +* Initialize VSL +* (Complex Workspace: need N, prefer N*NB) +* + IF( ILVSL ) THEN + CALL CLASET( 'Full', N, N, CZERO, CONE, VSL, LDVSL ) + CALL CLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VSL( ILO+1, ILO ), LDVSL ) + CALL CUNGQR( IROWS, IROWS, IROWS, VSL( ILO, ILO ), LDVSL, + $ WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* +* Initialize VSR +* + IF( ILVSR ) + $ CALL CLASET( 'Full', N, N, CZERO, CONE, VSR, LDVSR ) +* +* Reduce to generalized Hessenberg form +* (Workspace: none needed) +* + CALL CGGHRD( JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, VSL, + $ LDVSL, VSR, LDVSR, IERR ) +* + SDIM = 0 +* +* Perform QZ algorithm, computing Schur vectors if desired +* (Complex Workspace: need N) +* (Real Workspace: need N) +* + IWRK = ITAU + CALL CHGEQZ( 'S', JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, WORK( IWRK ), + $ LWORK+1-IWRK, RWORK( IRWRK ), IERR ) + IF( IERR.NE.0 ) THEN + IF( IERR.GT.0 .AND. IERR.LE.N ) THEN + INFO = IERR + ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN + INFO = IERR - N + ELSE + INFO = N + 1 + END IF + GO TO 40 + END IF +* +* Sort eigenvalues ALPHA/BETA and compute the reciprocal of +* condition number(s) +* + IF( WANTST ) THEN +* +* Undo scaling on eigenvalues before SELCTGing +* + IF( ILASCL ) + $ CALL CLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHA, N, IERR ) + IF( ILBSCL ) + $ CALL CLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) +* +* Select eigenvalues +* + DO 10 I = 1, N + BWORK( I ) = SELCTG( ALPHA( I ), BETA( I ) ) + 10 CONTINUE +* +* Reorder eigenvalues, transform Generalized Schur vectors, and +* compute reciprocal condition numbers +* (Complex Workspace: If IJOB >= 1, need MAX(1, 2*SDIM*(N-SDIM)) +* otherwise, need 1 ) +* + CALL CTGSEN( IJOB, ILVSL, ILVSR, BWORK, N, A, LDA, B, LDB, + $ ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, SDIM, PL, PR, + $ DIF, WORK( IWRK ), LWORK-IWRK+1, IWORK, LIWORK, + $ IERR ) +* + IF( IJOB.GE.1 ) + $ MAXWRK = MAX( MAXWRK, 2*SDIM*( N-SDIM ) ) + IF( IERR.EQ.-21 ) THEN +* +* not enough complex workspace +* + INFO = -21 + ELSE + RCONDE( 1 ) = PL + RCONDE( 2 ) = PL + RCONDV( 1 ) = DIF( 1 ) + RCONDV( 2 ) = DIF( 2 ) + IF( IERR.EQ.1 ) + $ INFO = N + 3 + END IF +* + END IF +* +* Apply permutation to VSL and VSR +* (Workspace: none needed) +* + IF( ILVSL ) + $ CALL CGGBAK( 'P', 'L', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VSL, LDVSL, IERR ) +* + IF( ILVSR ) + $ CALL CGGBAK( 'P', 'R', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VSR, LDVSR, IERR ) +* +* Undo scaling +* + IF( ILASCL ) THEN + CALL CLASCL( 'U', 0, 0, ANRMTO, ANRM, N, N, A, LDA, IERR ) + CALL CLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHA, N, IERR ) + END IF +* + IF( ILBSCL ) THEN + CALL CLASCL( 'U', 0, 0, BNRMTO, BNRM, N, N, B, LDB, IERR ) + CALL CLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) + END IF +* + 20 CONTINUE +* + IF( WANTST ) THEN +* +* Check if reordering is correct +* + LASTSL = .TRUE. + SDIM = 0 + DO 30 I = 1, N + CURSL = SELCTG( ALPHA( I ), BETA( I ) ) + IF( CURSL ) + $ SDIM = SDIM + 1 + IF( CURSL .AND. .NOT.LASTSL ) + $ INFO = N + 2 + LASTSL = CURSL + 30 CONTINUE +* + END IF +* + 40 CONTINUE +* + WORK( 1 ) = MAXWRK + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of CGGESX +* + END diff --git a/costa/native/external/lapack/cggev.f b/costa/native/external/lapack/cggev.f new file mode 100644 index 000000000..74315981a --- /dev/null +++ b/costa/native/external/lapack/cggev.f @@ -0,0 +1,449 @@ + SUBROUTINE CGGEV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHA, BETA, + $ VL, LDVL, VR, LDVR, WORK, LWORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBVL, JOBVR + INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. + REAL RWORK( * ) + COMPLEX A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), VL( LDVL, * ), VR( LDVR, * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGGEV computes for a pair of N-by-N complex nonsymmetric matrices +* (A,B), the generalized eigenvalues, and optionally, the left and/or +* right generalized eigenvectors. +* +* A generalized eigenvalue for a pair of matrices (A,B) is a scalar +* lambda or a ratio alpha/beta = lambda, such that A - lambda*B is +* singular. It is usually represented as the pair (alpha,beta), as +* there is a reasonable interpretation for beta=0, and even for both +* being zero. +* +* The right generalized eigenvector v(j) corresponding to the +* generalized eigenvalue lambda(j) of (A,B) satisfies +* +* A * v(j) = lambda(j) * B * v(j). +* +* The left generalized eigenvector u(j) corresponding to the +* generalized eigenvalues lambda(j) of (A,B) satisfies +* +* u(j)**H * A = lambda(j) * u(j)**H * B +* +* where u(j)**H is the conjugate-transpose of u(j). +* +* Arguments +* ========= +* +* JOBVL (input) CHARACTER*1 +* = 'N': do not compute the left generalized eigenvectors; +* = 'V': compute the left generalized eigenvectors. +* +* JOBVR (input) CHARACTER*1 +* = 'N': do not compute the right generalized eigenvectors; +* = 'V': compute the right generalized eigenvectors. +* +* N (input) INTEGER +* The order of the matrices A, B, VL, and VR. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA, N) +* On entry, the matrix A in the pair (A,B). +* On exit, A has been overwritten. +* +* LDA (input) INTEGER +* The leading dimension of A. LDA >= max(1,N). +* +* B (input/output) COMPLEX array, dimension (LDB, N) +* On entry, the matrix B in the pair (A,B). +* On exit, B has been overwritten. +* +* LDB (input) INTEGER +* The leading dimension of B. LDB >= max(1,N). +* +* ALPHA (output) COMPLEX array, dimension (N) +* BETA (output) COMPLEX array, dimension (N) +* On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the +* generalized eigenvalues. +* +* Note: the quotients ALPHA(j)/BETA(j) may easily over- or +* underflow, and BETA(j) may even be zero. Thus, the user +* should avoid naively computing the ratio alpha/beta. +* However, ALPHA will be always less than and usually +* comparable with norm(A) in magnitude, and BETA always less +* than and usually comparable with norm(B). +* +* VL (output) COMPLEX array, dimension (LDVL,N) +* If JOBVL = 'V', the left generalized eigenvectors u(j) are +* stored one after another in the columns of VL, in the same +* order as their eigenvalues. +* Each eigenvector will be scaled so the largest component +* will have abs(real part) + abs(imag. part) = 1. +* Not referenced if JOBVL = 'N'. +* +* LDVL (input) INTEGER +* The leading dimension of the matrix VL. LDVL >= 1, and +* if JOBVL = 'V', LDVL >= N. +* +* VR (output) COMPLEX array, dimension (LDVR,N) +* If JOBVR = 'V', the right generalized eigenvectors v(j) are +* stored one after another in the columns of VR, in the same +* order as their eigenvalues. +* Each eigenvector will be scaled so the largest component +* will have abs(real part) + abs(imag. part) = 1. +* Not referenced if JOBVR = 'N'. +* +* LDVR (input) INTEGER +* The leading dimension of the matrix VR. LDVR >= 1, and +* if JOBVR = 'V', LDVR >= N. +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,2*N). +* For good performance, LWORK must generally be larger. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace/output) REAL array, dimension (8*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* =1,...,N: +* The QZ iteration failed. No eigenvectors have been +* calculated, but ALPHA(j) and BETA(j) should be +* correct for j=INFO+1,...,N. +* > N: =N+1: other then QZ iteration failed in SHGEQZ, +* =N+2: error return from STGEVC. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ), + $ CONE = ( 1.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ILASCL, ILBSCL, ILV, ILVL, ILVR, LQUERY + CHARACTER CHTEMP + INTEGER ICOLS, IERR, IHI, IJOBVL, IJOBVR, ILEFT, ILO, + $ IN, IRIGHT, IROWS, IRWRK, ITAU, IWRK, JC, JR, + $ LWKMIN, LWKOPT + REAL ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, + $ SMLNUM, TEMP + COMPLEX X +* .. +* .. Local Arrays .. + LOGICAL LDUMMA( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL CGEQRF, CGGBAK, CGGBAL, CGGHRD, CHGEQZ, CLACPY, + $ CLASCL, CLASET, CTGEVC, CUNGQR, CUNMQR, SLABAD, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL CLANGE, SLAMCH + EXTERNAL LSAME, ILAENV, CLANGE, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL, SQRT +* .. +* .. Statement Functions .. + REAL ABS1 +* .. +* .. Statement Function definitions .. + ABS1( X ) = ABS( REAL( X ) ) + ABS( AIMAG( X ) ) +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVL, 'N' ) ) THEN + IJOBVL = 1 + ILVL = .FALSE. + ELSE IF( LSAME( JOBVL, 'V' ) ) THEN + IJOBVL = 2 + ILVL = .TRUE. + ELSE + IJOBVL = -1 + ILVL = .FALSE. + END IF +* + IF( LSAME( JOBVR, 'N' ) ) THEN + IJOBVR = 1 + ILVR = .FALSE. + ELSE IF( LSAME( JOBVR, 'V' ) ) THEN + IJOBVR = 2 + ILVR = .TRUE. + ELSE + IJOBVR = -1 + ILVR = .FALSE. + END IF + ILV = ILVL .OR. ILVR +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVL.LT.1 .OR. ( ILVL .AND. LDVL.LT.N ) ) THEN + INFO = -11 + ELSE IF( LDVR.LT.1 .OR. ( ILVR .AND. LDVR.LT.N ) ) THEN + INFO = -13 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV. The workspace is +* computed assuming ILO = 1 and IHI = N, the worst case.) +* + LWKMIN = 1 + IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN + LWKOPT = N + N*ILAENV( 1, 'CGEQRF', ' ', N, 1, N, 0 ) + LWKMIN = MAX( 1, 2*N ) + WORK( 1 ) = LWKOPT + END IF +* + IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) + $ INFO = -15 +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGGEV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + WORK( 1 ) = LWKOPT + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = SLAMCH( 'E' )*SLAMCH( 'B' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = CLANGE( 'M', N, N, A, LDA, RWORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF + IF( ILASCL ) + $ CALL CLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR ) +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = CLANGE( 'M', N, N, B, LDB, RWORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF + IF( ILBSCL ) + $ CALL CLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR ) +* +* Permute the matrices A, B to isolate eigenvalues if possible +* (Real Workspace: need 6*N) +* + ILEFT = 1 + IRIGHT = N + 1 + IRWRK = IRIGHT + N + CALL CGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), RWORK( IRWRK ), IERR ) +* +* Reduce B to triangular form (QR decomposition of B) +* (Complex Workspace: need N, prefer N*NB) +* + IROWS = IHI + 1 - ILO + IF( ILV ) THEN + ICOLS = N + 1 - ILO + ELSE + ICOLS = IROWS + END IF + ITAU = 1 + IWRK = ITAU + IROWS + CALL CGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Apply the orthogonal transformation to matrix A +* (Complex Workspace: need N, prefer N*NB) +* + CALL CUNMQR( 'L', 'C', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ), + $ LWORK+1-IWRK, IERR ) +* +* Initialize VL +* (Complex Workspace: need N, prefer N*NB) +* + IF( ILVL ) THEN + CALL CLASET( 'Full', N, N, CZERO, CONE, VL, LDVL ) + CALL CLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VL( ILO+1, ILO ), LDVL ) + CALL CUNGQR( IROWS, IROWS, IROWS, VL( ILO, ILO ), LDVL, + $ WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* +* Initialize VR +* + IF( ILVR ) + $ CALL CLASET( 'Full', N, N, CZERO, CONE, VR, LDVR ) +* +* Reduce to generalized Hessenberg form +* + IF( ILV ) THEN +* +* Eigenvectors requested -- work on whole matrix. +* + CALL CGGHRD( JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, IERR ) + ELSE + CALL CGGHRD( 'N', 'N', IROWS, 1, IROWS, A( ILO, ILO ), LDA, + $ B( ILO, ILO ), LDB, VL, LDVL, VR, LDVR, IERR ) + END IF +* +* Perform QZ algorithm (Compute eigenvalues, and optionally, the +* Schur form and Schur vectors) +* (Complex Workspace: need N) +* (Real Workspace: need N) +* + IWRK = ITAU + IF( ILV ) THEN + CHTEMP = 'S' + ELSE + CHTEMP = 'E' + END IF + CALL CHGEQZ( CHTEMP, JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHA, BETA, VL, LDVL, VR, LDVR, WORK( IWRK ), + $ LWORK+1-IWRK, RWORK( IRWRK ), IERR ) + IF( IERR.NE.0 ) THEN + IF( IERR.GT.0 .AND. IERR.LE.N ) THEN + INFO = IERR + ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN + INFO = IERR - N + ELSE + INFO = N + 1 + END IF + GO TO 70 + END IF +* +* Compute Eigenvectors +* (Real Workspace: need 2*N) +* (Complex Workspace: need 2*N) +* + IF( ILV ) THEN + IF( ILVL ) THEN + IF( ILVR ) THEN + CHTEMP = 'B' + ELSE + CHTEMP = 'L' + END IF + ELSE + CHTEMP = 'R' + END IF +* + CALL CTGEVC( CHTEMP, 'B', LDUMMA, N, A, LDA, B, LDB, VL, LDVL, + $ VR, LDVR, N, IN, WORK( IWRK ), RWORK( IRWRK ), + $ IERR ) + IF( IERR.NE.0 ) THEN + INFO = N + 2 + GO TO 70 + END IF +* +* Undo balancing on VL and VR and normalization +* (Workspace: none needed) +* + IF( ILVL ) THEN + CALL CGGBAK( 'P', 'L', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VL, LDVL, IERR ) + DO 30 JC = 1, N + TEMP = ZERO + DO 10 JR = 1, N + TEMP = MAX( TEMP, ABS1( VL( JR, JC ) ) ) + 10 CONTINUE + IF( TEMP.LT.SMLNUM ) + $ GO TO 30 + TEMP = ONE / TEMP + DO 20 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + 20 CONTINUE + 30 CONTINUE + END IF + IF( ILVR ) THEN + CALL CGGBAK( 'P', 'R', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VR, LDVR, IERR ) + DO 60 JC = 1, N + TEMP = ZERO + DO 40 JR = 1, N + TEMP = MAX( TEMP, ABS1( VR( JR, JC ) ) ) + 40 CONTINUE + IF( TEMP.LT.SMLNUM ) + $ GO TO 60 + TEMP = ONE / TEMP + DO 50 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + 50 CONTINUE + 60 CONTINUE + END IF + END IF +* +* Undo scaling if necessary +* + IF( ILASCL ) + $ CALL CLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHA, N, IERR ) +* + IF( ILBSCL ) + $ CALL CLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) +* + 70 CONTINUE + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of CGGEV +* + END diff --git a/costa/native/external/lapack/cggevx.f b/costa/native/external/lapack/cggevx.f new file mode 100644 index 000000000..d9fdc2b0f --- /dev/null +++ b/costa/native/external/lapack/cggevx.f @@ -0,0 +1,640 @@ + SUBROUTINE CGGEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, B, LDB, + $ ALPHA, BETA, VL, LDVL, VR, LDVR, ILO, IHI, + $ LSCALE, RSCALE, ABNRM, BBNRM, RCONDE, RCONDV, + $ WORK, LWORK, RWORK, IWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER BALANC, JOBVL, JOBVR, SENSE + INTEGER IHI, ILO, INFO, LDA, LDB, LDVL, LDVR, LWORK, N + REAL ABNRM, BBNRM +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + INTEGER IWORK( * ) + REAL LSCALE( * ), RCONDE( * ), RCONDV( * ), + $ RSCALE( * ), RWORK( * ) + COMPLEX A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), VL( LDVL, * ), VR( LDVR, * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGGEVX computes for a pair of N-by-N complex nonsymmetric matrices +* (A,B) the generalized eigenvalues, and optionally, the left and/or +* right generalized eigenvectors. +* +* Optionally, it also computes a balancing transformation to improve +* the conditioning of the eigenvalues and eigenvectors (ILO, IHI, +* LSCALE, RSCALE, ABNRM, and BBNRM), reciprocal condition numbers for +* the eigenvalues (RCONDE), and reciprocal condition numbers for the +* right eigenvectors (RCONDV). +* +* A generalized eigenvalue for a pair of matrices (A,B) is a scalar +* lambda or a ratio alpha/beta = lambda, such that A - lambda*B is +* singular. It is usually represented as the pair (alpha,beta), as +* there is a reasonable interpretation for beta=0, and even for both +* being zero. +* +* The right eigenvector v(j) corresponding to the eigenvalue lambda(j) +* of (A,B) satisfies +* A * v(j) = lambda(j) * B * v(j) . +* The left eigenvector u(j) corresponding to the eigenvalue lambda(j) +* of (A,B) satisfies +* u(j)**H * A = lambda(j) * u(j)**H * B. +* where u(j)**H is the conjugate-transpose of u(j). +* +* +* Arguments +* ========= +* +* BALANC (input) CHARACTER*1 +* Specifies the balance option to be performed: +* = 'N': do not diagonally scale or permute; +* = 'P': permute only; +* = 'S': scale only; +* = 'B': both permute and scale. +* Computed reciprocal condition numbers will be for the +* matrices after permuting and/or balancing. Permuting does +* not change condition numbers (in exact arithmetic), but +* balancing does. +* +* JOBVL (input) CHARACTER*1 +* = 'N': do not compute the left generalized eigenvectors; +* = 'V': compute the left generalized eigenvectors. +* +* JOBVR (input) CHARACTER*1 +* = 'N': do not compute the right generalized eigenvectors; +* = 'V': compute the right generalized eigenvectors. +* +* SENSE (input) CHARACTER*1 +* Determines which reciprocal condition numbers are computed. +* = 'N': none are computed; +* = 'E': computed for eigenvalues only; +* = 'V': computed for eigenvectors only; +* = 'B': computed for eigenvalues and eigenvectors. +* +* N (input) INTEGER +* The order of the matrices A, B, VL, and VR. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA, N) +* On entry, the matrix A in the pair (A,B). +* On exit, A has been overwritten. If JOBVL='V' or JOBVR='V' +* or both, then A contains the first part of the complex Schur +* form of the "balanced" versions of the input A and B. +* +* LDA (input) INTEGER +* The leading dimension of A. LDA >= max(1,N). +* +* B (input/output) COMPLEX array, dimension (LDB, N) +* On entry, the matrix B in the pair (A,B). +* On exit, B has been overwritten. If JOBVL='V' or JOBVR='V' +* or both, then B contains the second part of the complex +* Schur form of the "balanced" versions of the input A and B. +* +* LDB (input) INTEGER +* The leading dimension of B. LDB >= max(1,N). +* +* ALPHA (output) COMPLEX array, dimension (N) +* BETA (output) COMPLEX array, dimension (N) +* On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the generalized +* eigenvalues. +* +* Note: the quotient ALPHA(j)/BETA(j) ) may easily over- or +* underflow, and BETA(j) may even be zero. Thus, the user +* should avoid naively computing the ratio ALPHA/BETA. +* However, ALPHA will be always less than and usually +* comparable with norm(A) in magnitude, and BETA always less +* than and usually comparable with norm(B). +* +* VL (output) COMPLEX array, dimension (LDVL,N) +* If JOBVL = 'V', the left generalized eigenvectors u(j) are +* stored one after another in the columns of VL, in the same +* order as their eigenvalues. +* Each eigenvector will be scaled so the largest component +* will have abs(real part) + abs(imag. part) = 1. +* Not referenced if JOBVL = 'N'. +* +* LDVL (input) INTEGER +* The leading dimension of the matrix VL. LDVL >= 1, and +* if JOBVL = 'V', LDVL >= N. +* +* VR (output) COMPLEX array, dimension (LDVR,N) +* If JOBVR = 'V', the right generalized eigenvectors v(j) are +* stored one after another in the columns of VR, in the same +* order as their eigenvalues. +* Each eigenvector will be scaled so the largest component +* will have abs(real part) + abs(imag. part) = 1. +* Not referenced if JOBVR = 'N'. +* +* LDVR (input) INTEGER +* The leading dimension of the matrix VR. LDVR >= 1, and +* if JOBVR = 'V', LDVR >= N. +* +* ILO,IHI (output) INTEGER +* ILO and IHI are integer values such that on exit +* A(i,j) = 0 and B(i,j) = 0 if i > j and +* j = 1,...,ILO-1 or i = IHI+1,...,N. +* If BALANC = 'N' or 'S', ILO = 1 and IHI = N. +* +* LSCALE (output) REAL array, dimension (N) +* Details of the permutations and scaling factors applied +* to the left side of A and B. If PL(j) is the index of the +* row interchanged with row j, and DL(j) is the scaling +* factor applied to row j, then +* LSCALE(j) = PL(j) for j = 1,...,ILO-1 +* = DL(j) for j = ILO,...,IHI +* = PL(j) for j = IHI+1,...,N. +* The order in which the interchanges are made is N to IHI+1, +* then 1 to ILO-1. +* +* RSCALE (output) REAL array, dimension (N) +* Details of the permutations and scaling factors applied +* to the right side of A and B. If PR(j) is the index of the +* column interchanged with column j, and DR(j) is the scaling +* factor applied to column j, then +* RSCALE(j) = PR(j) for j = 1,...,ILO-1 +* = DR(j) for j = ILO,...,IHI +* = PR(j) for j = IHI+1,...,N +* The order in which the interchanges are made is N to IHI+1, +* then 1 to ILO-1. +* +* ABNRM (output) REAL +* The one-norm of the balanced matrix A. +* +* BBNRM (output) REAL +* The one-norm of the balanced matrix B. +* +* RCONDE (output) REAL array, dimension (N) +* If SENSE = 'E' or 'B', the reciprocal condition numbers of +* the selected eigenvalues, stored in consecutive elements of +* the array. +* If SENSE = 'V', RCONDE is not referenced. +* +* RCONDV (output) REAL array, dimension (N) +* If JOB = 'V' or 'B', the estimated reciprocal condition +* numbers of the selected eigenvectors, stored in consecutive +* elements of the array. If the eigenvalues cannot be reordered +* to compute RCONDV(j), RCONDV(j) is set to 0; this can only +* occur when the true value would be very small anyway. +* If SENSE = 'E', RCONDV is not referenced. +* Not referenced if JOB = 'E'. +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,2*N). +* If SENSE = 'N' or 'E', LWORK >= 2*N. +* If SENSE = 'V' or 'B', LWORK >= 2*N*N+2*N. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) REAL array, dimension (6*N) +* Real workspace. +* +* IWORK (workspace) INTEGER array, dimension (N+2) +* If SENSE = 'E', IWORK is not referenced. +* +* BWORK (workspace) LOGICAL array, dimension (N) +* If SENSE = 'N', BWORK is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* = 1,...,N: +* The QZ iteration failed. No eigenvectors have been +* calculated, but ALPHA(j) and BETA(j) should be correct +* for j=INFO+1,...,N. +* > N: =N+1: other than QZ iteration failed in CHGEQZ. +* =N+2: error return from CTGEVC. +* +* Further Details +* =============== +* +* Balancing a matrix pair (A,B) includes, first, permuting rows and +* columns to isolate eigenvalues, second, applying diagonal similarity +* transformation to the rows and columns to make the rows and columns +* as close in norm as possible. The computed reciprocal condition +* numbers correspond to the balanced matrix. Permuting rows and columns +* will not change the condition numbers (in exact arithmetic) but +* diagonal scaling will. For further explanation of balancing, see +* section 4.11.1.2 of LAPACK Users' Guide. +* +* An approximate error bound on the chordal distance between the i-th +* computed generalized eigenvalue w and the corresponding exact +* eigenvalue lambda is +* +* chord(w, lambda) <= EPS * norm(ABNRM, BBNRM) / RCONDE(I) +* +* An approximate error bound for the angle between the i-th computed +* eigenvector VL(i) or VR(i) is given by +* +* EPS * norm(ABNRM, BBNRM) / DIF(i). +* +* For further explanation of the reciprocal condition numbers RCONDE +* and RCONDV, see section 4.11 of LAPACK User's Guide. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ILASCL, ILBSCL, ILV, ILVL, ILVR, LQUERY, + $ WANTSB, WANTSE, WANTSN, WANTSV + CHARACTER CHTEMP + INTEGER I, ICOLS, IERR, IJOBVL, IJOBVR, IN, IROWS, + $ ITAU, IWRK, IWRK1, J, JC, JR, M, MAXWRK, MINWRK + REAL ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, + $ SMLNUM, TEMP + COMPLEX X +* .. +* .. Local Arrays .. + LOGICAL LDUMMA( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL CGEQRF, CGGBAK, CGGBAL, CGGHRD, CHGEQZ, CLACPY, + $ CLASCL, CLASET, CTGEVC, CTGSNA, CUNGQR, CUNMQR, + $ SLABAD, SLASCL, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL CLANGE, SLAMCH + EXTERNAL LSAME, ILAENV, CLANGE, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL, SQRT +* .. +* .. Statement Functions .. + REAL ABS1 +* .. +* .. Statement Function definitions .. + ABS1( X ) = ABS( REAL( X ) ) + ABS( AIMAG( X ) ) +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVL, 'N' ) ) THEN + IJOBVL = 1 + ILVL = .FALSE. + ELSE IF( LSAME( JOBVL, 'V' ) ) THEN + IJOBVL = 2 + ILVL = .TRUE. + ELSE + IJOBVL = -1 + ILVL = .FALSE. + END IF +* + IF( LSAME( JOBVR, 'N' ) ) THEN + IJOBVR = 1 + ILVR = .FALSE. + ELSE IF( LSAME( JOBVR, 'V' ) ) THEN + IJOBVR = 2 + ILVR = .TRUE. + ELSE + IJOBVR = -1 + ILVR = .FALSE. + END IF + ILV = ILVL .OR. ILVR +* + WANTSN = LSAME( SENSE, 'N' ) + WANTSE = LSAME( SENSE, 'E' ) + WANTSV = LSAME( SENSE, 'V' ) + WANTSB = LSAME( SENSE, 'B' ) +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.( LSAME( BALANC, 'N' ) .OR. LSAME( BALANC, + $ 'S' ) .OR. LSAME( BALANC, 'P' ) .OR. LSAME( BALANC, 'B' ) ) ) + $ THEN + INFO = -1 + ELSE IF( IJOBVL.LE.0 ) THEN + INFO = -2 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -3 + ELSE IF( .NOT.( WANTSN .OR. WANTSE .OR. WANTSB .OR. WANTSV ) ) + $ THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDVL.LT.1 .OR. ( ILVL .AND. LDVL.LT.N ) ) THEN + INFO = -13 + ELSE IF( LDVR.LT.1 .OR. ( ILVR .AND. LDVR.LT.N ) ) THEN + INFO = -15 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV. The workspace is +* computed assuming ILO = 1 and IHI = N, the worst case.) +* + MINWRK = 1 + IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN + MAXWRK = N + N*ILAENV( 1, 'CGEQRF', ' ', N, 1, N, 0 ) + IF( WANTSE ) THEN + MINWRK = MAX( 1, 2*N ) + ELSE IF( WANTSV .OR. WANTSB ) THEN + MINWRK = 2*N*N + 2*N + MAXWRK = MAX( MAXWRK, 2*N*N+2*N ) + END IF + WORK( 1 ) = MAXWRK + END IF +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -25 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGGEVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = CLANGE( 'M', N, N, A, LDA, RWORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF + IF( ILASCL ) + $ CALL CLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR ) +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = CLANGE( 'M', N, N, B, LDB, RWORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF + IF( ILBSCL ) + $ CALL CLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR ) +* +* Permute and/or balance the matrix pair (A,B) +* (Real Workspace: need 6*N) +* + CALL CGGBAL( BALANC, N, A, LDA, B, LDB, ILO, IHI, LSCALE, RSCALE, + $ RWORK, IERR ) +* +* Compute ABNRM and BBNRM +* + ABNRM = CLANGE( '1', N, N, A, LDA, RWORK( 1 ) ) + IF( ILASCL ) THEN + RWORK( 1 ) = ABNRM + CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, 1, 1, RWORK( 1 ), 1, + $ IERR ) + ABNRM = RWORK( 1 ) + END IF +* + BBNRM = CLANGE( '1', N, N, B, LDB, RWORK( 1 ) ) + IF( ILBSCL ) THEN + RWORK( 1 ) = BBNRM + CALL SLASCL( 'G', 0, 0, BNRMTO, BNRM, 1, 1, RWORK( 1 ), 1, + $ IERR ) + BBNRM = RWORK( 1 ) + END IF +* +* Reduce B to triangular form (QR decomposition of B) +* (Complex Workspace: need N, prefer N*NB ) +* + IROWS = IHI + 1 - ILO + IF( ILV .OR. .NOT.WANTSN ) THEN + ICOLS = N + 1 - ILO + ELSE + ICOLS = IROWS + END IF + ITAU = 1 + IWRK = ITAU + IROWS + CALL CGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Apply the unitary transformation to A +* (Complex Workspace: need N, prefer N*NB) +* + CALL CUNMQR( 'L', 'C', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ), + $ LWORK+1-IWRK, IERR ) +* +* Initialize VL and/or VR +* (Workspace: need N, prefer N*NB) +* + IF( ILVL ) THEN + CALL CLASET( 'Full', N, N, CZERO, CONE, VL, LDVL ) + CALL CLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VL( ILO+1, ILO ), LDVL ) + CALL CUNGQR( IROWS, IROWS, IROWS, VL( ILO, ILO ), LDVL, + $ WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* + IF( ILVR ) + $ CALL CLASET( 'Full', N, N, CZERO, CONE, VR, LDVR ) +* +* Reduce to generalized Hessenberg form +* (Workspace: none needed) +* + IF( ILV .OR. .NOT.WANTSN ) THEN +* +* Eigenvectors requested -- work on whole matrix. +* + CALL CGGHRD( JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, IERR ) + ELSE + CALL CGGHRD( 'N', 'N', IROWS, 1, IROWS, A( ILO, ILO ), LDA, + $ B( ILO, ILO ), LDB, VL, LDVL, VR, LDVR, IERR ) + END IF +* +* Perform QZ algorithm (Compute eigenvalues, and optionally, the +* Schur forms and Schur vectors) +* (Complex Workspace: need N) +* (Real Workspace: need N) +* + IWRK = ITAU + IF( ILV .OR. .NOT.WANTSN ) THEN + CHTEMP = 'S' + ELSE + CHTEMP = 'E' + END IF +* + CALL CHGEQZ( CHTEMP, JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHA, BETA, VL, LDVL, VR, LDVR, WORK( IWRK ), + $ LWORK+1-IWRK, RWORK, IERR ) + IF( IERR.NE.0 ) THEN + IF( IERR.GT.0 .AND. IERR.LE.N ) THEN + INFO = IERR + ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN + INFO = IERR - N + ELSE + INFO = N + 1 + END IF + GO TO 90 + END IF +* +* Compute Eigenvectors and estimate condition numbers if desired +* CTGEVC: (Complex Workspace: need 2*N ) +* (Real Workspace: need 2*N ) +* CTGSNA: (Complex Workspace: need 2*N*N if SENSE='V' or 'B') +* (Integer Workspace: need N+2 ) +* + IF( ILV .OR. .NOT.WANTSN ) THEN + IF( ILV ) THEN + IF( ILVL ) THEN + IF( ILVR ) THEN + CHTEMP = 'B' + ELSE + CHTEMP = 'L' + END IF + ELSE + CHTEMP = 'R' + END IF +* + CALL CTGEVC( CHTEMP, 'B', LDUMMA, N, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, N, IN, WORK( IWRK ), RWORK, + $ IERR ) + IF( IERR.NE.0 ) THEN + INFO = N + 2 + GO TO 90 + END IF + END IF +* + IF( .NOT.WANTSN ) THEN +* +* compute eigenvectors (STGEVC) and estimate condition +* numbers (STGSNA). Note that the definition of the condition +* number is not invariant under transformation (u,v) to +* (Q*u, Z*v), where (u,v) are eigenvectors of the generalized +* Schur form (S,T), Q and Z are orthogonal matrices. In order +* to avoid using extra 2*N*N workspace, we have to +* re-calculate eigenvectors and estimate the condition numbers +* one at a time. +* + DO 20 I = 1, N +* + DO 10 J = 1, N + BWORK( J ) = .FALSE. + 10 CONTINUE + BWORK( I ) = .TRUE. +* + IWRK = N + 1 + IWRK1 = IWRK + N +* + IF( WANTSE .OR. WANTSB ) THEN + CALL CTGEVC( 'B', 'S', BWORK, N, A, LDA, B, LDB, + $ WORK( 1 ), N, WORK( IWRK ), N, 1, M, + $ WORK( IWRK1 ), RWORK, IERR ) + IF( IERR.NE.0 ) THEN + INFO = N + 2 + GO TO 90 + END IF + END IF +* + CALL CTGSNA( SENSE, 'S', BWORK, N, A, LDA, B, LDB, + $ WORK( 1 ), N, WORK( IWRK ), N, RCONDE( I ), + $ RCONDV( I ), 1, M, WORK( IWRK1 ), + $ LWORK-IWRK1+1, IWORK, IERR ) +* + 20 CONTINUE + END IF + END IF +* +* Undo balancing on VL and VR and normalization +* (Workspace: none needed) +* + IF( ILVL ) THEN + CALL CGGBAK( BALANC, 'L', N, ILO, IHI, LSCALE, RSCALE, N, VL, + $ LDVL, IERR ) +* + DO 50 JC = 1, N + TEMP = ZERO + DO 30 JR = 1, N + TEMP = MAX( TEMP, ABS1( VL( JR, JC ) ) ) + 30 CONTINUE + IF( TEMP.LT.SMLNUM ) + $ GO TO 50 + TEMP = ONE / TEMP + DO 40 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + 40 CONTINUE + 50 CONTINUE + END IF +* + IF( ILVR ) THEN + CALL CGGBAK( BALANC, 'R', N, ILO, IHI, LSCALE, RSCALE, N, VR, + $ LDVR, IERR ) + DO 80 JC = 1, N + TEMP = ZERO + DO 60 JR = 1, N + TEMP = MAX( TEMP, ABS1( VR( JR, JC ) ) ) + 60 CONTINUE + IF( TEMP.LT.SMLNUM ) + $ GO TO 80 + TEMP = ONE / TEMP + DO 70 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + 70 CONTINUE + 80 CONTINUE + END IF +* +* Undo scaling if necessary +* + IF( ILASCL ) + $ CALL CLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHA, N, IERR ) +* + IF( ILBSCL ) + $ CALL CLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) +* + 90 CONTINUE + WORK( 1 ) = MAXWRK +* + RETURN +* +* End of CGGEVX +* + END diff --git a/costa/native/external/lapack/cggglm.f b/costa/native/external/lapack/cggglm.f new file mode 100644 index 000000000..e81b8f932 --- /dev/null +++ b/costa/native/external/lapack/cggglm.f @@ -0,0 +1,213 @@ + SUBROUTINE CGGGLM( N, M, P, A, LDA, B, LDB, D, X, Y, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), D( * ), WORK( * ), + $ X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* CGGGLM solves a general Gauss-Markov linear model (GLM) problem: +* +* minimize || y ||_2 subject to d = A*x + B*y +* x +* +* where A is an N-by-M matrix, B is an N-by-P matrix, and d is a +* given N-vector. It is assumed that M <= N <= M+P, and +* +* rank(A) = M and rank( A B ) = N. +* +* Under these assumptions, the constrained equation is always +* consistent, and there is a unique solution x and a minimal 2-norm +* solution y, which is obtained using a generalized QR factorization +* of A and B. +* +* In particular, if matrix B is square nonsingular, then the problem +* GLM is equivalent to the following weighted linear least squares +* problem +* +* minimize || inv(B)*(d-A*x) ||_2 +* x +* +* where inv(B) denotes the inverse of B. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of rows of the matrices A and B. N >= 0. +* +* M (input) INTEGER +* The number of columns of the matrix A. 0 <= M <= N. +* +* P (input) INTEGER +* The number of columns of the matrix B. P >= N-M. +* +* A (input/output) COMPLEX array, dimension (LDA,M) +* On entry, the N-by-M matrix A. +* On exit, A is destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) COMPLEX array, dimension (LDB,P) +* On entry, the N-by-P matrix B. +* On exit, B is destroyed. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* D (input/output) COMPLEX array, dimension (N) +* On entry, D is the left hand side of the GLM equation. +* On exit, D is destroyed. +* +* X (output) COMPLEX array, dimension (M) +* Y (output) COMPLEX array, dimension (P) +* On exit, X and Y are the solutions of the GLM problem. +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N+M+P). +* For optimum performance, LWORK >= M+min(N,P)+max(N,P)*NB, +* where NB is an upper bound for the optimal blocksizes for +* CGEQRF, CGERQF, CUNMQR and CUNMRQ. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* =================================================================== +* +* .. Parameters .. + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, LOPT, LWKOPT, NB, NB1, NB2, NB3, NB4, NP +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGEMV, CGGQRF, CTRSV, CUNMQR, CUNMRQ, + $ XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NP = MIN( N, P ) + NB1 = ILAENV( 1, 'CGEQRF', ' ', N, M, -1, -1 ) + NB2 = ILAENV( 1, 'CGERQF', ' ', N, M, -1, -1 ) + NB3 = ILAENV( 1, 'CUNMQR', ' ', N, M, P, -1 ) + NB4 = ILAENV( 1, 'CUNMRQ', ' ', N, M, P, -1 ) + NB = MAX( NB1, NB2, NB3, NB4 ) + LWKOPT = M + NP + MAX( N, P )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( M.LT.0 .OR. M.GT.N ) THEN + INFO = -2 + ELSE IF( P.LT.0 .OR. P.LT.N-M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LWORK.LT.MAX( 1, N+M+P ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGGGLM', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Compute the GQR factorization of matrices A and B: +* +* Q'*A = ( R11 ) M, Q'*B*Z' = ( T11 T12 ) M +* ( 0 ) N-M ( 0 T22 ) N-M +* M M+P-N N-M +* +* where R11 and T22 are upper triangular, and Q and Z are +* unitary. +* + CALL CGGQRF( N, M, P, A, LDA, WORK, B, LDB, WORK( M+1 ), + $ WORK( M+NP+1 ), LWORK-M-NP, INFO ) + LOPT = WORK( M+NP+1 ) +* +* Update left-hand-side vector d = Q'*d = ( d1 ) M +* ( d2 ) N-M +* + CALL CUNMQR( 'Left', 'Conjugate transpose', N, 1, M, A, LDA, WORK, + $ D, MAX( 1, N ), WORK( M+NP+1 ), LWORK-M-NP, INFO ) + LOPT = MAX( LOPT, INT( WORK( M+NP+1 ) ) ) +* +* Solve T22*y2 = d2 for y2 +* + CALL CTRSV( 'Upper', 'No transpose', 'Non unit', N-M, + $ B( M+1, M+P-N+1 ), LDB, D( M+1 ), 1 ) + CALL CCOPY( N-M, D( M+1 ), 1, Y( M+P-N+1 ), 1 ) +* +* Set y1 = 0 +* + DO 10 I = 1, M + P - N + Y( I ) = CZERO + 10 CONTINUE +* +* Update d1 = d1 - T12*y2 +* + CALL CGEMV( 'No transpose', M, N-M, -CONE, B( 1, M+P-N+1 ), LDB, + $ Y( M+P-N+1 ), 1, CONE, D, 1 ) +* +* Solve triangular system: R11*x = d1 +* + CALL CTRSV( 'Upper', 'No Transpose', 'Non unit', M, A, LDA, D, 1 ) +* +* Copy D to X +* + CALL CCOPY( M, D, 1, X, 1 ) +* +* Backward transformation y = Z'*y +* + CALL CUNMRQ( 'Left', 'Conjugate transpose', P, 1, NP, + $ B( MAX( 1, N-P+1 ), 1 ), LDB, WORK( M+1 ), Y, + $ MAX( 1, P ), WORK( M+NP+1 ), LWORK-M-NP, INFO ) + WORK( 1 ) = M + NP + MAX( LOPT, INT( WORK( M+NP+1 ) ) ) +* + RETURN +* +* End of CGGGLM +* + END diff --git a/costa/native/external/lapack/cgghrd.f b/costa/native/external/lapack/cgghrd.f new file mode 100644 index 000000000..c296c145d --- /dev/null +++ b/costa/native/external/lapack/cgghrd.f @@ -0,0 +1,256 @@ + SUBROUTINE CGGHRD( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q, + $ LDQ, Z, LDZ, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER COMPQ, COMPZ + INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* CGGHRD reduces a pair of complex matrices (A,B) to generalized upper +* Hessenberg form using unitary transformations, where A is a +* general matrix and B is upper triangular: Q' * A * Z = H and +* Q' * B * Z = T, where H is upper Hessenberg, T is upper triangular, +* and Q and Z are unitary, and ' means conjugate transpose. +* +* The unitary matrices Q and Z are determined as products of Givens +* rotations. They may either be formed explicitly, or they may be +* postmultiplied into input matrices Q1 and Z1, so that +* +* Q1 * A * Z1' = (Q1*Q) * H * (Z1*Z)' +* Q1 * B * Z1' = (Q1*Q) * T * (Z1*Z)' +* +* Arguments +* ========= +* +* COMPQ (input) CHARACTER*1 +* = 'N': do not compute Q; +* = 'I': Q is initialized to the unit matrix, and the +* unitary matrix Q is returned; +* = 'V': Q must contain a unitary matrix Q1 on entry, +* and the product Q1*Q is returned. +* +* COMPZ (input) CHARACTER*1 +* = 'N': do not compute Q; +* = 'I': Q is initialized to the unit matrix, and the +* unitary matrix Q is returned; +* = 'V': Q must contain a unitary matrix Q1 on entry, +* and the product Q1*Q is returned. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* It is assumed that A is already upper triangular in rows and +* columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally set +* by a previous call to CGGBAL; otherwise they should be set +* to 1 and N respectively. +* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +* +* A (input/output) COMPLEX array, dimension (LDA, N) +* On entry, the N-by-N general matrix to be reduced. +* On exit, the upper triangle and the first subdiagonal of A +* are overwritten with the upper Hessenberg matrix H, and the +* rest is set to zero. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) COMPLEX array, dimension (LDB, N) +* On entry, the N-by-N upper triangular matrix B. +* On exit, the upper triangular matrix T = Q' B Z. The +* elements below the diagonal are set to zero. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* Q (input/output) COMPLEX array, dimension (LDQ, N) +* If COMPQ='N': Q is not referenced. +* If COMPQ='I': on entry, Q need not be set, and on exit it +* contains the unitary matrix Q, where Q' +* is the product of the Givens transformations +* which are applied to A and B on the left. +* If COMPQ='V': on entry, Q must contain a unitary matrix +* Q1, and on exit this is overwritten by Q1*Q. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. +* LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise. +* +* Z (input/output) COMPLEX array, dimension (LDZ, N) +* If COMPZ='N': Z is not referenced. +* If COMPZ='I': on entry, Z need not be set, and on exit it +* contains the unitary matrix Z, which is +* the product of the Givens transformations +* which are applied to A and B on the right. +* If COMPZ='V': on entry, Z must contain a unitary matrix +* Z1, and on exit this is overwritten by Z1*Z. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. +* LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* This routine reduces A to Hessenberg and B to triangular form by +* an unblocked reduction, as described in _Matrix_Computations_, +* by Golub and van Loan (Johns Hopkins Press). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CONE, CZERO + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ), + $ CZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ILQ, ILZ + INTEGER ICOMPQ, ICOMPZ, JCOL, JROW + REAL C + COMPLEX CTEMP, S +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLARTG, CLASET, CROT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX +* .. +* .. Executable Statements .. +* +* Decode COMPQ +* + IF( LSAME( COMPQ, 'N' ) ) THEN + ILQ = .FALSE. + ICOMPQ = 1 + ELSE IF( LSAME( COMPQ, 'V' ) ) THEN + ILQ = .TRUE. + ICOMPQ = 2 + ELSE IF( LSAME( COMPQ, 'I' ) ) THEN + ILQ = .TRUE. + ICOMPQ = 3 + ELSE + ICOMPQ = 0 + END IF +* +* Decode COMPZ +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ILZ = .FALSE. + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ILZ = .TRUE. + ICOMPZ = 2 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ILZ = .TRUE. + ICOMPZ = 3 + ELSE + ICOMPZ = 0 + END IF +* +* Test the input parameters. +* + INFO = 0 + IF( ICOMPQ.LE.0 ) THEN + INFO = -1 + ELSE IF( ICOMPZ.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 ) THEN + INFO = -4 + ELSE IF( IHI.GT.N .OR. IHI.LT.ILO-1 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( ( ILQ .AND. LDQ.LT.N ) .OR. LDQ.LT.1 ) THEN + INFO = -11 + ELSE IF( ( ILZ .AND. LDZ.LT.N ) .OR. LDZ.LT.1 ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGGHRD', -INFO ) + RETURN + END IF +* +* Initialize Q and Z if desired. +* + IF( ICOMPQ.EQ.3 ) + $ CALL CLASET( 'Full', N, N, CZERO, CONE, Q, LDQ ) + IF( ICOMPZ.EQ.3 ) + $ CALL CLASET( 'Full', N, N, CZERO, CONE, Z, LDZ ) +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* +* Zero out lower triangle of B +* + DO 20 JCOL = 1, N - 1 + DO 10 JROW = JCOL + 1, N + B( JROW, JCOL ) = CZERO + 10 CONTINUE + 20 CONTINUE +* +* Reduce A and B +* + DO 40 JCOL = ILO, IHI - 2 +* + DO 30 JROW = IHI, JCOL + 2, -1 +* +* Step 1: rotate rows JROW-1, JROW to kill A(JROW,JCOL) +* + CTEMP = A( JROW-1, JCOL ) + CALL CLARTG( CTEMP, A( JROW, JCOL ), C, S, + $ A( JROW-1, JCOL ) ) + A( JROW, JCOL ) = CZERO + CALL CROT( N-JCOL, A( JROW-1, JCOL+1 ), LDA, + $ A( JROW, JCOL+1 ), LDA, C, S ) + CALL CROT( N+2-JROW, B( JROW-1, JROW-1 ), LDB, + $ B( JROW, JROW-1 ), LDB, C, S ) + IF( ILQ ) + $ CALL CROT( N, Q( 1, JROW-1 ), 1, Q( 1, JROW ), 1, C, + $ CONJG( S ) ) +* +* Step 2: rotate columns JROW, JROW-1 to kill B(JROW,JROW-1) +* + CTEMP = B( JROW, JROW ) + CALL CLARTG( CTEMP, B( JROW, JROW-1 ), C, S, + $ B( JROW, JROW ) ) + B( JROW, JROW-1 ) = CZERO + CALL CROT( IHI, A( 1, JROW ), 1, A( 1, JROW-1 ), 1, C, S ) + CALL CROT( JROW-1, B( 1, JROW ), 1, B( 1, JROW-1 ), 1, C, + $ S ) + IF( ILZ ) + $ CALL CROT( N, Z( 1, JROW ), 1, Z( 1, JROW-1 ), 1, C, S ) + 30 CONTINUE + 40 CONTINUE +* + RETURN +* +* End of CGGHRD +* + END diff --git a/costa/native/external/lapack/cgglse.f b/costa/native/external/lapack/cgglse.f new file mode 100644 index 000000000..da55eae03 --- /dev/null +++ b/costa/native/external/lapack/cgglse.f @@ -0,0 +1,218 @@ + SUBROUTINE CGGLSE( M, N, P, A, LDA, B, LDB, C, D, X, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), C( * ), D( * ), + $ WORK( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* CGGLSE solves the linear equality-constrained least squares (LSE) +* problem: +* +* minimize || c - A*x ||_2 subject to B*x = d +* +* where A is an M-by-N matrix, B is a P-by-N matrix, c is a given +* M-vector, and d is a given P-vector. It is assumed that +* P <= N <= M+P, and +* +* rank(B) = P and rank( ( A ) ) = N. +* ( ( B ) ) +* +* These conditions ensure that the LSE problem has a unique solution, +* which is obtained using a GRQ factorization of the matrices B and A. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrices A and B. N >= 0. +* +* P (input) INTEGER +* The number of rows of the matrix B. 0 <= P <= N <= M+P. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, A is destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) COMPLEX array, dimension (LDB,N) +* On entry, the P-by-N matrix B. +* On exit, B is destroyed. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,P). +* +* C (input/output) COMPLEX array, dimension (M) +* On entry, C contains the right hand side vector for the +* least squares part of the LSE problem. +* On exit, the residual sum of squares for the solution +* is given by the sum of squares of elements N-P+1 to M of +* vector C. +* +* D (input/output) COMPLEX array, dimension (P) +* On entry, D contains the right hand side vector for the +* constrained equation. +* On exit, D is destroyed. +* +* X (output) COMPLEX array, dimension (N) +* On exit, X is the solution of the LSE problem. +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,M+N+P). +* For optimum performance LWORK >= P+min(M,N)+max(M,N)*NB, +* where NB is an upper bound for the optimal blocksizes for +* CGEQRF, CGERQF, CUNMQR and CUNMRQ. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LOPT, LWKOPT, MN, NB, NB1, NB2, NB3, NB4, NR +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CGEMV, CGGRQF, CTRMV, CTRSV, + $ CUNMQR, CUNMRQ, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + MN = MIN( M, N ) + NB1 = ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) + NB2 = ILAENV( 1, 'CGERQF', ' ', M, N, -1, -1 ) + NB3 = ILAENV( 1, 'CUNMQR', ' ', M, N, P, -1 ) + NB4 = ILAENV( 1, 'CUNMRQ', ' ', M, N, P, -1 ) + NB = MAX( NB1, NB2, NB3, NB4 ) + LWKOPT = P + MN + MAX( M, N )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( P.LT.0 .OR. P.GT.N .OR. P.LT.N-M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -7 + ELSE IF( LWORK.LT.MAX( 1, M+N+P ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGGLSE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Compute the GRQ factorization of matrices B and A: +* +* B*Q' = ( 0 T12 ) P Z'*A*Q' = ( R11 R12 ) N-P +* N-P P ( 0 R22 ) M+P-N +* N-P P +* +* where T12 and R11 are upper triangular, and Q and Z are +* unitary. +* + CALL CGGRQF( P, M, N, B, LDB, WORK, A, LDA, WORK( P+1 ), + $ WORK( P+MN+1 ), LWORK-P-MN, INFO ) + LOPT = WORK( P+MN+1 ) +* +* Update c = Z'*c = ( c1 ) N-P +* ( c2 ) M+P-N +* + CALL CUNMQR( 'Left', 'Conjugate Transpose', M, 1, MN, A, LDA, + $ WORK( P+1 ), C, MAX( 1, M ), WORK( P+MN+1 ), + $ LWORK-P-MN, INFO ) + LOPT = MAX( LOPT, INT( WORK( P+MN+1 ) ) ) +* +* Solve T12*x2 = d for x2 +* + CALL CTRSV( 'Upper', 'No transpose', 'Non unit', P, B( 1, N-P+1 ), + $ LDB, D, 1 ) +* +* Update c1 +* + CALL CGEMV( 'No transpose', N-P, P, -CONE, A( 1, N-P+1 ), LDA, D, + $ 1, CONE, C, 1 ) +* +* Sovle R11*x1 = c1 for x1 +* + CALL CTRSV( 'Upper', 'No transpose', 'Non unit', N-P, A, LDA, C, + $ 1 ) +* +* Put the solutions in X +* + CALL CCOPY( N-P, C, 1, X, 1 ) + CALL CCOPY( P, D, 1, X( N-P+1 ), 1 ) +* +* Compute the residual vector: +* + IF( M.LT.N ) THEN + NR = M + P - N + CALL CGEMV( 'No transpose', NR, N-M, -CONE, A( N-P+1, M+1 ), + $ LDA, D( NR+1 ), 1, CONE, C( N-P+1 ), 1 ) + ELSE + NR = P + END IF + CALL CTRMV( 'Upper', 'No transpose', 'Non unit', NR, + $ A( N-P+1, N-P+1 ), LDA, D, 1 ) + CALL CAXPY( NR, -CONE, D, 1, C( N-P+1 ), 1 ) +* +* Backward transformation x = Q'*x +* + CALL CUNMRQ( 'Left', 'Conjugate Transpose', N, 1, P, B, LDB, + $ WORK( 1 ), X, N, WORK( P+MN+1 ), LWORK-P-MN, INFO ) + WORK( 1 ) = P + MN + MAX( LOPT, INT( WORK( P+MN+1 ) ) ) +* + RETURN +* +* End of CGGLSE +* + END diff --git a/costa/native/external/lapack/cggqrf.f b/costa/native/external/lapack/cggqrf.f new file mode 100644 index 000000000..ee63759f7 --- /dev/null +++ b/costa/native/external/lapack/cggqrf.f @@ -0,0 +1,212 @@ + SUBROUTINE CGGQRF( N, M, P, A, LDA, TAUA, B, LDB, TAUB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGGQRF computes a generalized QR factorization of an N-by-M matrix A +* and an N-by-P matrix B: +* +* A = Q*R, B = Q*T*Z, +* +* where Q is an N-by-N unitary matrix, Z is a P-by-P unitary matrix, +* and R and T assume one of the forms: +* +* if N >= M, R = ( R11 ) M , or if N < M, R = ( R11 R12 ) N, +* ( 0 ) N-M N M-N +* M +* +* where R11 is upper triangular, and +* +* if N <= P, T = ( 0 T12 ) N, or if N > P, T = ( T11 ) N-P, +* P-N N ( T21 ) P +* P +* +* where T12 or T21 is upper triangular. +* +* In particular, if B is square and nonsingular, the GQR factorization +* of A and B implicitly gives the QR factorization of inv(B)*A: +* +* inv(B)*A = Z'*(inv(T)*R) +* +* where inv(B) denotes the inverse of the matrix B, and Z' denotes the +* conjugate transpose of matrix Z. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of rows of the matrices A and B. N >= 0. +* +* M (input) INTEGER +* The number of columns of the matrix A. M >= 0. +* +* P (input) INTEGER +* The number of columns of the matrix B. P >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,M) +* On entry, the N-by-M matrix A. +* On exit, the elements on and above the diagonal of the array +* contain the min(N,M)-by-M upper trapezoidal matrix R (R is +* upper triangular if N >= M); the elements below the diagonal, +* with the array TAUA, represent the unitary matrix Q as a +* product of min(N,M) elementary reflectors (see Further +* Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* TAUA (output) COMPLEX array, dimension (min(N,M)) +* The scalar factors of the elementary reflectors which +* represent the unitary matrix Q (see Further Details). +* +* B (input/output) COMPLEX array, dimension (LDB,P) +* On entry, the N-by-P matrix B. +* On exit, if N <= P, the upper triangle of the subarray +* B(1:N,P-N+1:P) contains the N-by-N upper triangular matrix T; +* if N > P, the elements on and above the (N-P)-th subdiagonal +* contain the N-by-P upper trapezoidal matrix T; the remaining +* elements, with the array TAUB, represent the unitary +* matrix Z as a product of elementary reflectors (see Further +* Details). +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* TAUB (output) COMPLEX array, dimension (min(N,P)) +* The scalar factors of the elementary reflectors which +* represent the unitary matrix Z (see Further Details). +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N,M,P). +* For optimum performance LWORK >= max(N,M,P)*max(NB1,NB2,NB3), +* where NB1 is the optimal blocksize for the QR factorization +* of an N-by-M matrix, NB2 is the optimal blocksize for the +* RQ factorization of an N-by-P matrix, and NB3 is the optimal +* blocksize for a call of CUNMQR. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(1) H(2) . . . H(k), where k = min(n,m). +* +* Each H(i) has the form +* +* H(i) = I - taua * v * v' +* +* where taua is a complex scalar, and v is a complex vector with +* v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), +* and taua in TAUA(i). +* To form Q explicitly, use LAPACK subroutine CUNGQR. +* To use Q to update another matrix, use LAPACK subroutine CUNMQR. +* +* The matrix Z is represented as a product of elementary reflectors +* +* Z = H(1) H(2) . . . H(k), where k = min(n,p). +* +* Each H(i) has the form +* +* H(i) = I - taub * v * v' +* +* where taub is a complex scalar, and v is a complex vector with +* v(p-k+i+1:p) = 0 and v(p-k+i) = 1; v(1:p-k+i-1) is stored on exit in +* B(n-k+i,1:p-k+i-1), and taub in TAUB(i). +* To form Z explicitly, use LAPACK subroutine CUNGRQ. +* To use Z to update another matrix, use LAPACK subroutine CUNMRQ. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LOPT, LWKOPT, NB, NB1, NB2, NB3 +* .. +* .. External Subroutines .. + EXTERNAL CGEQRF, CGERQF, CUNMQR, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NB1 = ILAENV( 1, 'CGEQRF', ' ', N, M, -1, -1 ) + NB2 = ILAENV( 1, 'CGERQF', ' ', N, P, -1, -1 ) + NB3 = ILAENV( 1, 'CUNMQR', ' ', N, M, P, -1 ) + NB = MAX( NB1, NB2, NB3 ) + LWKOPT = MAX( N, M, P)*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( P.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, N, M, P ) .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGGQRF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* QR factorization of N-by-M matrix A: A = Q*R +* + CALL CGEQRF( N, M, A, LDA, TAUA, WORK, LWORK, INFO ) + LOPT = WORK( 1 ) +* +* Update B := Q'*B. +* + CALL CUNMQR( 'Left', 'Conjugate Transpose', N, P, MIN( N, M ), A, + $ LDA, TAUA, B, LDB, WORK, LWORK, INFO ) + LOPT = MAX( LOPT, INT( WORK( 1 ) ) ) +* +* RQ factorization of N-by-P matrix B: B = T*Z. +* + CALL CGERQF( N, P, B, LDB, TAUB, WORK, LWORK, INFO ) + WORK( 1 ) = MAX( LOPT, INT( WORK( 1 ) ) ) +* + RETURN +* +* End of CGGQRF +* + END diff --git a/costa/native/external/lapack/cggrqf.f b/costa/native/external/lapack/cggrqf.f new file mode 100644 index 000000000..455c9a454 --- /dev/null +++ b/costa/native/external/lapack/cggrqf.f @@ -0,0 +1,212 @@ + SUBROUTINE CGGRQF( M, P, N, A, LDA, TAUA, B, LDB, TAUB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGGRQF computes a generalized RQ factorization of an M-by-N matrix A +* and a P-by-N matrix B: +* +* A = R*Q, B = Z*T*Q, +* +* where Q is an N-by-N unitary matrix, Z is a P-by-P unitary +* matrix, and R and T assume one of the forms: +* +* if M <= N, R = ( 0 R12 ) M, or if M > N, R = ( R11 ) M-N, +* N-M M ( R21 ) N +* N +* +* where R12 or R21 is upper triangular, and +* +* if P >= N, T = ( T11 ) N , or if P < N, T = ( T11 T12 ) P, +* ( 0 ) P-N P N-P +* N +* +* where T11 is upper triangular. +* +* In particular, if B is square and nonsingular, the GRQ factorization +* of A and B implicitly gives the RQ factorization of A*inv(B): +* +* A*inv(B) = (R*inv(T))*Z' +* +* where inv(B) denotes the inverse of the matrix B, and Z' denotes the +* conjugate transpose of the matrix Z. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* P (input) INTEGER +* The number of rows of the matrix B. P >= 0. +* +* N (input) INTEGER +* The number of columns of the matrices A and B. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, if M <= N, the upper triangle of the subarray +* A(1:M,N-M+1:N) contains the M-by-M upper triangular matrix R; +* if M > N, the elements on and above the (M-N)-th subdiagonal +* contain the M-by-N upper trapezoidal matrix R; the remaining +* elements, with the array TAUA, represent the unitary +* matrix Q as a product of elementary reflectors (see Further +* Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAUA (output) COMPLEX array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors which +* represent the unitary matrix Q (see Further Details). +* +* B (input/output) COMPLEX array, dimension (LDB,N) +* On entry, the P-by-N matrix B. +* On exit, the elements on and above the diagonal of the array +* contain the min(P,N)-by-N upper trapezoidal matrix T (T is +* upper triangular if P >= N); the elements below the diagonal, +* with the array TAUB, represent the unitary matrix Z as a +* product of elementary reflectors (see Further Details). +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,P). +* +* TAUB (output) COMPLEX array, dimension (min(P,N)) +* The scalar factors of the elementary reflectors which +* represent the unitary matrix Z (see Further Details). +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N,M,P). +* For optimum performance LWORK >= max(N,M,P)*max(NB1,NB2,NB3), +* where NB1 is the optimal blocksize for the RQ factorization +* of an M-by-N matrix, NB2 is the optimal blocksize for the +* QR factorization of a P-by-N matrix, and NB3 is the optimal +* blocksize for a call of CUNMRQ. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO=-i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(1) H(2) . . . H(k), where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - taua * v * v' +* +* where taua is a complex scalar, and v is a complex vector with +* v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in +* A(m-k+i,1:n-k+i-1), and taua in TAUA(i). +* To form Q explicitly, use LAPACK subroutine CUNGRQ. +* To use Q to update another matrix, use LAPACK subroutine CUNMRQ. +* +* The matrix Z is represented as a product of elementary reflectors +* +* Z = H(1) H(2) . . . H(k), where k = min(p,n). +* +* Each H(i) has the form +* +* H(i) = I - taub * v * v' +* +* where taub is a complex scalar, and v is a complex vector with +* v(1:i-1) = 0 and v(i) = 1; v(i+1:p) is stored on exit in B(i+1:p,i), +* and taub in TAUB(i). +* To form Z explicitly, use LAPACK subroutine CUNGQR. +* To use Z to update another matrix, use LAPACK subroutine CUNMQR. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LOPT, LWKOPT, NB, NB1, NB2, NB3 +* .. +* .. External Subroutines .. + EXTERNAL CGEQRF, CGERQF, CUNMRQ, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NB1 = ILAENV( 1, 'CGERQF', ' ', M, N, -1, -1 ) + NB2 = ILAENV( 1, 'CGEQRF', ' ', P, N, -1, -1 ) + NB3 = ILAENV( 1, 'CUNMRQ', ' ', M, N, P, -1 ) + NB = MAX( NB1, NB2, NB3 ) + LWKOPT = MAX( N, M, P)*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( P.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, M, P, N ) .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGGRQF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* RQ factorization of M-by-N matrix A: A = R*Q +* + CALL CGERQF( M, N, A, LDA, TAUA, WORK, LWORK, INFO ) + LOPT = WORK( 1 ) +* +* Update B := B*Q' +* + CALL CUNMRQ( 'Right', 'Conjugate Transpose', P, N, MIN( M, N ), + $ A( MAX( 1, M-N+1 ), 1 ), LDA, TAUA, B, LDB, WORK, + $ LWORK, INFO ) + LOPT = MAX( LOPT, INT( WORK( 1 ) ) ) +* +* QR factorization of P-by-N matrix B: B = Z*T +* + CALL CGEQRF( P, N, B, LDB, TAUB, WORK, LWORK, INFO ) + WORK( 1 ) = MAX( LOPT, INT( WORK( 1 ) ) ) +* + RETURN +* +* End of CGGRQF +* + END diff --git a/costa/native/external/lapack/cggsvd.f b/costa/native/external/lapack/cggsvd.f new file mode 100644 index 000000000..7bee097c2 --- /dev/null +++ b/costa/native/external/lapack/cggsvd.f @@ -0,0 +1,334 @@ + SUBROUTINE CGGSVD( JOBU, JOBV, JOBQ, M, N, P, K, L, A, LDA, B, + $ LDB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, WORK, + $ RWORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBQ, JOBU, JOBV + INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL ALPHA( * ), BETA( * ), RWORK( * ) + COMPLEX A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ U( LDU, * ), V( LDV, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGGSVD computes the generalized singular value decomposition (GSVD) +* of an M-by-N complex matrix A and P-by-N complex matrix B: +* +* U'*A*Q = D1*( 0 R ), V'*B*Q = D2*( 0 R ) +* +* where U, V and Q are unitary matrices, and Z' means the conjugate +* transpose of Z. Let K+L = the effective numerical rank of the +* matrix (A',B')', then R is a (K+L)-by-(K+L) nonsingular upper +* triangular matrix, D1 and D2 are M-by-(K+L) and P-by-(K+L) "diagonal" +* matrices and of the following structures, respectively: +* +* If M-K-L >= 0, +* +* K L +* D1 = K ( I 0 ) +* L ( 0 C ) +* M-K-L ( 0 0 ) +* +* K L +* D2 = L ( 0 S ) +* P-L ( 0 0 ) +* +* N-K-L K L +* ( 0 R ) = K ( 0 R11 R12 ) +* L ( 0 0 R22 ) +* where +* +* C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), +* S = diag( BETA(K+1), ... , BETA(K+L) ), +* C**2 + S**2 = I. +* +* R is stored in A(1:K+L,N-K-L+1:N) on exit. +* +* If M-K-L < 0, +* +* K M-K K+L-M +* D1 = K ( I 0 0 ) +* M-K ( 0 C 0 ) +* +* K M-K K+L-M +* D2 = M-K ( 0 S 0 ) +* K+L-M ( 0 0 I ) +* P-L ( 0 0 0 ) +* +* N-K-L K M-K K+L-M +* ( 0 R ) = K ( 0 R11 R12 R13 ) +* M-K ( 0 0 R22 R23 ) +* K+L-M ( 0 0 0 R33 ) +* +* where +* +* C = diag( ALPHA(K+1), ... , ALPHA(M) ), +* S = diag( BETA(K+1), ... , BETA(M) ), +* C**2 + S**2 = I. +* +* (R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N), and R33 is stored +* ( 0 R22 R23 ) +* in B(M-K+1:L,N+M-K-L+1:N) on exit. +* +* The routine computes C, S, R, and optionally the unitary +* transformation matrices U, V and Q. +* +* In particular, if B is an N-by-N nonsingular matrix, then the GSVD of +* A and B implicitly gives the SVD of A*inv(B): +* A*inv(B) = U*(D1*inv(D2))*V'. +* If ( A',B')' has orthnormal columns, then the GSVD of A and B is also +* equal to the CS decomposition of A and B. Furthermore, the GSVD can +* be used to derive the solution of the eigenvalue problem: +* A'*A x = lambda* B'*B x. +* In some literature, the GSVD of A and B is presented in the form +* U'*A*X = ( 0 D1 ), V'*B*X = ( 0 D2 ) +* where U and V are orthogonal and X is nonsingular, and D1 and D2 are +* ``diagonal''. The former GSVD form can be converted to the latter +* form by taking the nonsingular matrix X as +* +* X = Q*( I 0 ) +* ( 0 inv(R) ) +* +* Arguments +* ========= +* +* JOBU (input) CHARACTER*1 +* = 'U': Unitary matrix U is computed; +* = 'N': U is not computed. +* +* JOBV (input) CHARACTER*1 +* = 'V': Unitary matrix V is computed; +* = 'N': V is not computed. +* +* JOBQ (input) CHARACTER*1 +* = 'Q': Unitary matrix Q is computed; +* = 'N': Q is not computed. +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrices A and B. N >= 0. +* +* P (input) INTEGER +* The number of rows of the matrix B. P >= 0. +* +* K (output) INTEGER +* L (output) INTEGER +* On exit, K and L specify the dimension of the subblocks +* described in Purpose. +* K + L = effective numerical rank of (A',B')'. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, A contains the triangular matrix R, or part of R. +* See Purpose for details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) COMPLEX array, dimension (LDB,N) +* On entry, the P-by-N matrix B. +* On exit, B contains part of the triangular matrix R if +* M-K-L < 0. See Purpose for details. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,P). +* +* ALPHA (output) REAL array, dimension (N) +* BETA (output) REAL array, dimension (N) +* On exit, ALPHA and BETA contain the generalized singular +* value pairs of A and B; +* ALPHA(1:K) = 1, +* BETA(1:K) = 0, +* and if M-K-L >= 0, +* ALPHA(K+1:K+L) = C, +* BETA(K+1:K+L) = S, +* or if M-K-L < 0, +* ALPHA(K+1:M)= C, ALPHA(M+1:K+L)= 0 +* BETA(K+1:M) = S, BETA(M+1:K+L) = 1 +* and +* ALPHA(K+L+1:N) = 0 +* BETA(K+L+1:N) = 0 +* +* U (output) COMPLEX array, dimension (LDU,M) +* If JOBU = 'U', U contains the M-by-M unitary matrix U. +* If JOBU = 'N', U is not referenced. +* +* LDU (input) INTEGER +* The leading dimension of the array U. LDU >= max(1,M) if +* JOBU = 'U'; LDU >= 1 otherwise. +* +* V (output) COMPLEX array, dimension (LDV,P) +* If JOBV = 'V', V contains the P-by-P unitary matrix V. +* If JOBV = 'N', V is not referenced. +* +* LDV (input) INTEGER +* The leading dimension of the array V. LDV >= max(1,P) if +* JOBV = 'V'; LDV >= 1 otherwise. +* +* Q (output) COMPLEX array, dimension (LDQ,N) +* If JOBQ = 'Q', Q contains the N-by-N unitary matrix Q. +* If JOBQ = 'N', Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= max(1,N) if +* JOBQ = 'Q'; LDQ >= 1 otherwise. +* +* WORK (workspace) COMPLEX array, dimension (max(3*N,M,P)+N) +* +* RWORK (workspace) REAL array, dimension (2*N) +* +* IWORK (workspace/output) INTEGER array, dimension (N) +* On exit, IWORK stores the sorting information. More +* precisely, the following loop will sort ALPHA +* for I = K+1, min(M,K+L) +* swap ALPHA(I) and ALPHA(IWORK(I)) +* endfor +* such that ALPHA(1) >= ALPHA(2) >= ... >= ALPHA(N). +* +* INFO (output)INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = 1, the Jacobi-type procedure failed to +* converge. For further details, see subroutine CTGSJA. +* +* Internal Parameters +* =================== +* +* TOLA REAL +* TOLB REAL +* TOLA and TOLB are the thresholds to determine the effective +* rank of (A',B')'. Generally, they are set to +* TOLA = MAX(M,N)*norm(A)*MACHEPS, +* TOLB = MAX(P,N)*norm(B)*MACHEPS. +* The size of TOLA and TOLB may affect the size of backward +* errors of the decomposition. +* +* Further Details +* =============== +* +* 2-96 Based on modifications by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL WANTQ, WANTU, WANTV + INTEGER I, IBND, ISUB, J, NCYCLE + REAL ANORM, BNORM, SMAX, TEMP, TOLA, TOLB, ULP, UNFL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANGE, SLAMCH + EXTERNAL LSAME, CLANGE, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CGGSVP, CTGSJA, SCOPY, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + WANTU = LSAME( JOBU, 'U' ) + WANTV = LSAME( JOBV, 'V' ) + WANTQ = LSAME( JOBQ, 'Q' ) +* + INFO = 0 + IF( .NOT.( WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( P.LT.0 ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -12 + ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN + INFO = -16 + ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN + INFO = -18 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -20 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGGSVD', -INFO ) + RETURN + END IF +* +* Compute the Frobenius norm of matrices A and B +* + ANORM = CLANGE( '1', M, N, A, LDA, RWORK ) + BNORM = CLANGE( '1', P, N, B, LDB, RWORK ) +* +* Get machine precision and set up threshold for determining +* the effective numerical rank of the matrices A and B. +* + ULP = SLAMCH( 'Precision' ) + UNFL = SLAMCH( 'Safe Minimum' ) + TOLA = MAX( M, N )*MAX( ANORM, UNFL )*ULP + TOLB = MAX( P, N )*MAX( BNORM, UNFL )*ULP +* + CALL CGGSVP( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, TOLA, + $ TOLB, K, L, U, LDU, V, LDV, Q, LDQ, IWORK, RWORK, + $ WORK, WORK( N+1 ), INFO ) +* +* Compute the GSVD of two upper "triangular" matrices +* + CALL CTGSJA( JOBU, JOBV, JOBQ, M, P, N, K, L, A, LDA, B, LDB, + $ TOLA, TOLB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, + $ WORK, NCYCLE, INFO ) +* +* Sort the singular values and store the pivot indices in IWORK +* Copy ALPHA to RWORK, then sort ALPHA in RWORK +* + CALL SCOPY( N, ALPHA, 1, RWORK, 1 ) + IBND = MIN( L, M-K ) + DO 20 I = 1, IBND +* +* Scan for largest ALPHA(K+I) +* + ISUB = I + SMAX = RWORK( K+I ) + DO 10 J = I + 1, IBND + TEMP = RWORK( K+J ) + IF( TEMP.GT.SMAX ) THEN + ISUB = J + SMAX = TEMP + END IF + 10 CONTINUE + IF( ISUB.NE.I ) THEN + RWORK( K+ISUB ) = RWORK( K+I ) + RWORK( K+I ) = SMAX + IWORK( K+I ) = K + ISUB + ELSE + IWORK( K+I ) = K + I + END IF + 20 CONTINUE +* + RETURN +* +* End of CGGSVD +* + END diff --git a/costa/native/external/lapack/cggsvp.f b/costa/native/external/lapack/cggsvp.f new file mode 100644 index 000000000..fe0ed0bbd --- /dev/null +++ b/costa/native/external/lapack/cggsvp.f @@ -0,0 +1,403 @@ + SUBROUTINE CGGSVP( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, + $ TOLA, TOLB, K, L, U, LDU, V, LDV, Q, LDQ, + $ IWORK, RWORK, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER JOBQ, JOBU, JOBV + INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P + REAL TOLA, TOLB +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL RWORK( * ) + COMPLEX A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ TAU( * ), U( LDU, * ), V( LDV, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGGSVP computes unitary matrices U, V and Q such that +* +* N-K-L K L +* U'*A*Q = K ( 0 A12 A13 ) if M-K-L >= 0; +* L ( 0 0 A23 ) +* M-K-L ( 0 0 0 ) +* +* N-K-L K L +* = K ( 0 A12 A13 ) if M-K-L < 0; +* M-K ( 0 0 A23 ) +* +* N-K-L K L +* V'*B*Q = L ( 0 0 B13 ) +* P-L ( 0 0 0 ) +* +* where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular +* upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, +* otherwise A23 is (M-K)-by-L upper trapezoidal. K+L = the effective +* numerical rank of the (M+P)-by-N matrix (A',B')'. Z' denotes the +* conjugate transpose of Z. +* +* This decomposition is the preprocessing step for computing the +* Generalized Singular Value Decomposition (GSVD), see subroutine +* CGGSVD. +* +* Arguments +* ========= +* +* JOBU (input) CHARACTER*1 +* = 'U': Unitary matrix U is computed; +* = 'N': U is not computed. +* +* JOBV (input) CHARACTER*1 +* = 'V': Unitary matrix V is computed; +* = 'N': V is not computed. +* +* JOBQ (input) CHARACTER*1 +* = 'Q': Unitary matrix Q is computed; +* = 'N': Q is not computed. +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* P (input) INTEGER +* The number of rows of the matrix B. P >= 0. +* +* N (input) INTEGER +* The number of columns of the matrices A and B. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, A contains the triangular (or trapezoidal) matrix +* described in the Purpose section. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) COMPLEX array, dimension (LDB,N) +* On entry, the P-by-N matrix B. +* On exit, B contains the triangular matrix described in +* the Purpose section. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,P). +* +* TOLA (input) REAL +* TOLB (input) REAL +* TOLA and TOLB are the thresholds to determine the effective +* numerical rank of matrix B and a subblock of A. Generally, +* they are set to +* TOLA = MAX(M,N)*norm(A)*MACHEPS, +* TOLB = MAX(P,N)*norm(B)*MACHEPS. +* The size of TOLA and TOLB may affect the size of backward +* errors of the decomposition. +* +* K (output) INTEGER +* L (output) INTEGER +* On exit, K and L specify the dimension of the subblocks +* described in Purpose section. +* K + L = effective numerical rank of (A',B')'. +* +* U (output) COMPLEX array, dimension (LDU,M) +* If JOBU = 'U', U contains the unitary matrix U. +* If JOBU = 'N', U is not referenced. +* +* LDU (input) INTEGER +* The leading dimension of the array U. LDU >= max(1,M) if +* JOBU = 'U'; LDU >= 1 otherwise. +* +* V (output) COMPLEX array, dimension (LDV,M) +* If JOBV = 'V', V contains the unitary matrix V. +* If JOBV = 'N', V is not referenced. +* +* LDV (input) INTEGER +* The leading dimension of the array V. LDV >= max(1,P) if +* JOBV = 'V'; LDV >= 1 otherwise. +* +* Q (output) COMPLEX array, dimension (LDQ,N) +* If JOBQ = 'Q', Q contains the unitary matrix Q. +* If JOBQ = 'N', Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= max(1,N) if +* JOBQ = 'Q'; LDQ >= 1 otherwise. +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* RWORK (workspace) REAL array, dimension (2*N) +* +* TAU (workspace) COMPLEX array, dimension (N) +* +* WORK (workspace) COMPLEX array, dimension (max(3*N,M,P)) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The subroutine uses LAPACK subroutine CGEQPF for the QR factorization +* with column pivoting to detect the effective numerical rank of the +* a matrix. It may be replaced by a better rank determination strategy. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL FORWRD, WANTQ, WANTU, WANTV + INTEGER I, J + COMPLEX T +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CGEQPF, CGEQR2, CGERQ2, CLACPY, CLAPMT, CLASET, + $ CUNG2R, CUNM2R, CUNMR2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, MIN, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( T ) = ABS( REAL( T ) ) + ABS( AIMAG( T ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + WANTU = LSAME( JOBU, 'U' ) + WANTV = LSAME( JOBV, 'V' ) + WANTQ = LSAME( JOBQ, 'Q' ) + FORWRD = .TRUE. +* + INFO = 0 + IF( .NOT.( WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( P.LT.0 ) THEN + INFO = -5 + ELSE IF( N.LT.0 ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -10 + ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN + INFO = -16 + ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN + INFO = -18 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -20 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGGSVP', -INFO ) + RETURN + END IF +* +* QR with column pivoting of B: B*P = V*( S11 S12 ) +* ( 0 0 ) +* + DO 10 I = 1, N + IWORK( I ) = 0 + 10 CONTINUE + CALL CGEQPF( P, N, B, LDB, IWORK, TAU, WORK, RWORK, INFO ) +* +* Update A := A*P +* + CALL CLAPMT( FORWRD, M, N, A, LDA, IWORK ) +* +* Determine the effective rank of matrix B. +* + L = 0 + DO 20 I = 1, MIN( P, N ) + IF( CABS1( B( I, I ) ).GT.TOLB ) + $ L = L + 1 + 20 CONTINUE +* + IF( WANTV ) THEN +* +* Copy the details of V, and form V. +* + CALL CLASET( 'Full', P, P, CZERO, CZERO, V, LDV ) + IF( P.GT.1 ) + $ CALL CLACPY( 'Lower', P-1, N, B( 2, 1 ), LDB, V( 2, 1 ), + $ LDV ) + CALL CUNG2R( P, P, MIN( P, N ), V, LDV, TAU, WORK, INFO ) + END IF +* +* Clean up B +* + DO 40 J = 1, L - 1 + DO 30 I = J + 1, L + B( I, J ) = CZERO + 30 CONTINUE + 40 CONTINUE + IF( P.GT.L ) + $ CALL CLASET( 'Full', P-L, N, CZERO, CZERO, B( L+1, 1 ), LDB ) +* + IF( WANTQ ) THEN +* +* Set Q = I and Update Q := Q*P +* + CALL CLASET( 'Full', N, N, CZERO, CONE, Q, LDQ ) + CALL CLAPMT( FORWRD, N, N, Q, LDQ, IWORK ) + END IF +* + IF( P.GE.L .AND. N.NE.L ) THEN +* +* RQ factorization of ( S11 S12 ) = ( 0 S12 )*Z +* + CALL CGERQ2( L, N, B, LDB, TAU, WORK, INFO ) +* +* Update A := A*Z' +* + CALL CUNMR2( 'Right', 'Conjugate transpose', M, N, L, B, LDB, + $ TAU, A, LDA, WORK, INFO ) + IF( WANTQ ) THEN +* +* Update Q := Q*Z' +* + CALL CUNMR2( 'Right', 'Conjugate transpose', N, N, L, B, + $ LDB, TAU, Q, LDQ, WORK, INFO ) + END IF +* +* Clean up B +* + CALL CLASET( 'Full', L, N-L, CZERO, CZERO, B, LDB ) + DO 60 J = N - L + 1, N + DO 50 I = J - N + L + 1, L + B( I, J ) = CZERO + 50 CONTINUE + 60 CONTINUE +* + END IF +* +* Let N-L L +* A = ( A11 A12 ) M, +* +* then the following does the complete QR decomposition of A11: +* +* A11 = U*( 0 T12 )*P1' +* ( 0 0 ) +* + DO 70 I = 1, N - L + IWORK( I ) = 0 + 70 CONTINUE + CALL CGEQPF( M, N-L, A, LDA, IWORK, TAU, WORK, RWORK, INFO ) +* +* Determine the effective rank of A11 +* + K = 0 + DO 80 I = 1, MIN( M, N-L ) + IF( CABS1( A( I, I ) ).GT.TOLA ) + $ K = K + 1 + 80 CONTINUE +* +* Update A12 := U'*A12, where A12 = A( 1:M, N-L+1:N ) +* + CALL CUNM2R( 'Left', 'Conjugate transpose', M, L, MIN( M, N-L ), + $ A, LDA, TAU, A( 1, N-L+1 ), LDA, WORK, INFO ) +* + IF( WANTU ) THEN +* +* Copy the details of U, and form U +* + CALL CLASET( 'Full', M, M, CZERO, CZERO, U, LDU ) + IF( M.GT.1 ) + $ CALL CLACPY( 'Lower', M-1, N-L, A( 2, 1 ), LDA, U( 2, 1 ), + $ LDU ) + CALL CUNG2R( M, M, MIN( M, N-L ), U, LDU, TAU, WORK, INFO ) + END IF +* + IF( WANTQ ) THEN +* +* Update Q( 1:N, 1:N-L ) = Q( 1:N, 1:N-L )*P1 +* + CALL CLAPMT( FORWRD, N, N-L, Q, LDQ, IWORK ) + END IF +* +* Clean up A: set the strictly lower triangular part of +* A(1:K, 1:K) = 0, and A( K+1:M, 1:N-L ) = 0. +* + DO 100 J = 1, K - 1 + DO 90 I = J + 1, K + A( I, J ) = CZERO + 90 CONTINUE + 100 CONTINUE + IF( M.GT.K ) + $ CALL CLASET( 'Full', M-K, N-L, CZERO, CZERO, A( K+1, 1 ), LDA ) +* + IF( N-L.GT.K ) THEN +* +* RQ factorization of ( T11 T12 ) = ( 0 T12 )*Z1 +* + CALL CGERQ2( K, N-L, A, LDA, TAU, WORK, INFO ) +* + IF( WANTQ ) THEN +* +* Update Q( 1:N,1:N-L ) = Q( 1:N,1:N-L )*Z1' +* + CALL CUNMR2( 'Right', 'Conjugate transpose', N, N-L, K, A, + $ LDA, TAU, Q, LDQ, WORK, INFO ) + END IF +* +* Clean up A +* + CALL CLASET( 'Full', K, N-L-K, CZERO, CZERO, A, LDA ) + DO 120 J = N - L - K + 1, N - L + DO 110 I = J - N + L + K + 1, K + A( I, J ) = CZERO + 110 CONTINUE + 120 CONTINUE +* + END IF +* + IF( M.GT.K ) THEN +* +* QR factorization of A( K+1:M,N-L+1:N ) +* + CALL CGEQR2( M-K, L, A( K+1, N-L+1 ), LDA, TAU, WORK, INFO ) +* + IF( WANTU ) THEN +* +* Update U(:,K+1:M) := U(:,K+1:M)*U1 +* + CALL CUNM2R( 'Right', 'No transpose', M, M-K, MIN( M-K, L ), + $ A( K+1, N-L+1 ), LDA, TAU, U( 1, K+1 ), LDU, + $ WORK, INFO ) + END IF +* +* Clean up +* + DO 140 J = N - L + 1, N + DO 130 I = J - N + K + L + 1, M + A( I, J ) = CZERO + 130 CONTINUE + 140 CONTINUE +* + END IF +* + RETURN +* +* End of CGGSVP +* + END diff --git a/costa/native/external/lapack/cgtcon.f b/costa/native/external/lapack/cgtcon.f new file mode 100644 index 000000000..08d0bd833 --- /dev/null +++ b/costa/native/external/lapack/cgtcon.f @@ -0,0 +1,167 @@ + SUBROUTINE CGTCON( NORM, N, DL, D, DU, DU2, IPIV, ANORM, RCOND, + $ WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER INFO, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX D( * ), DL( * ), DU( * ), DU2( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CGTCON estimates the reciprocal of the condition number of a complex +* tridiagonal matrix A using the LU factorization as computed by +* CGTTRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies whether the 1-norm condition number or the +* infinity-norm condition number is required: +* = '1' or 'O': 1-norm; +* = 'I': Infinity-norm. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* DL (input) COMPLEX array, dimension (N-1) +* The (n-1) multipliers that define the matrix L from the +* LU factorization of A as computed by CGTTRF. +* +* D (input) COMPLEX array, dimension (N) +* The n diagonal elements of the upper triangular matrix U from +* the LU factorization of A. +* +* DU (input) COMPLEX array, dimension (N-1) +* The (n-1) elements of the first superdiagonal of U. +* +* DU2 (input) COMPLEX array, dimension (N-2) +* The (n-2) elements of the second superdiagonal of U. +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices; for 1 <= i <= n, row i of the matrix was +* interchanged with row IPIV(i). IPIV(i) will always be either +* i or i+1; IPIV(i) = i indicates a row interchange was not +* required. +* +* ANORM (input) REAL +* If NORM = '1' or 'O', the 1-norm of the original matrix A. +* If NORM = 'I', the infinity-norm of the original matrix A. +* +* RCOND (output) REAL +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +* estimate of the 1-norm of inv(A) computed in this routine. +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ONENRM + INTEGER I, KASE, KASE1 + REAL AINVNM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CGTTRS, CLACON, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CMPLX +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGTCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* +* Check that D(1:N) is non-zero. +* + DO 10 I = 1, N + IF( D( I ).EQ.CMPLX( ZERO ) ) + $ RETURN + 10 CONTINUE +* + AINVNM = ZERO + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 20 CONTINUE + CALL CLACON( N, WORK( N+1 ), WORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(U)*inv(L). +* + CALL CGTTRS( 'No transpose', N, 1, DL, D, DU, DU2, IPIV, + $ WORK, N, INFO ) + ELSE +* +* Multiply by inv(L')*inv(U'). +* + CALL CGTTRS( 'Conjugate transpose', N, 1, DL, D, DU, DU2, + $ IPIV, WORK, N, INFO ) + END IF + GO TO 20 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of CGTCON +* + END diff --git a/costa/native/external/lapack/cgtrfs.f b/costa/native/external/lapack/cgtrfs.f new file mode 100644 index 000000000..e98545602 --- /dev/null +++ b/costa/native/external/lapack/cgtrfs.f @@ -0,0 +1,369 @@ + SUBROUTINE CGTRFS( TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, DU2, + $ IPIV, B, LDB, X, LDX, FERR, BERR, WORK, RWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL BERR( * ), FERR( * ), RWORK( * ) + COMPLEX B( LDB, * ), D( * ), DF( * ), DL( * ), + $ DLF( * ), DU( * ), DU2( * ), DUF( * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* CGTRFS improves the computed solution to a system of linear +* equations when the coefficient matrix is tridiagonal, and provides +* error bounds and backward error estimates for the solution. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose) +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* DL (input) COMPLEX array, dimension (N-1) +* The (n-1) subdiagonal elements of A. +* +* D (input) COMPLEX array, dimension (N) +* The diagonal elements of A. +* +* DU (input) COMPLEX array, dimension (N-1) +* The (n-1) superdiagonal elements of A. +* +* DLF (input) COMPLEX array, dimension (N-1) +* The (n-1) multipliers that define the matrix L from the +* LU factorization of A as computed by CGTTRF. +* +* DF (input) COMPLEX array, dimension (N) +* The n diagonal elements of the upper triangular matrix U from +* the LU factorization of A. +* +* DUF (input) COMPLEX array, dimension (N-1) +* The (n-1) elements of the first superdiagonal of U. +* +* DU2 (input) COMPLEX array, dimension (N-2) +* The (n-2) elements of the second superdiagonal of U. +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices; for 1 <= i <= n, row i of the matrix was +* interchanged with row IPIV(i). IPIV(i) will always be either +* i or i+1; IPIV(i) = i indicates a row interchange was not +* required. +* +* B (input) COMPLEX array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input/output) COMPLEX array, dimension (LDX,NRHS) +* On entry, the solution matrix X, as computed by CGTTRS. +* On exit, the improved solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* RWORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Internal Parameters +* =================== +* +* ITMAX is the maximum number of steps of iterative refinement. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) + REAL THREE + PARAMETER ( THREE = 3.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN + CHARACTER TRANSN, TRANST + INTEGER COUNT, I, J, KASE, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN + COMPLEX ZDUM +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CGTTRS, CLACON, CLAGTM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, MAX, REAL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -15 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGTRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANSN = 'N' + TRANST = 'C' + ELSE + TRANSN = 'C' + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = 4 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 110 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - op(A) * X, +* where op(A) = A, A**T, or A**H, depending on TRANS. +* + CALL CCOPY( N, B( 1, J ), 1, WORK, 1 ) + CALL CLAGTM( TRANS, N, 1, -ONE, DL, D, DU, X( 1, J ), LDX, ONE, + $ WORK, N ) +* +* Compute abs(op(A))*abs(x) + abs(b) for use in the backward +* error bound. +* + IF( NOTRAN ) THEN + IF( N.EQ.1 ) THEN + RWORK( 1 ) = CABS1( B( 1, J ) ) + + $ CABS1( D( 1 ) )*CABS1( X( 1, J ) ) + ELSE + RWORK( 1 ) = CABS1( B( 1, J ) ) + + $ CABS1( D( 1 ) )*CABS1( X( 1, J ) ) + + $ CABS1( DU( 1 ) )*CABS1( X( 2, J ) ) + DO 30 I = 2, N - 1 + RWORK( I ) = CABS1( B( I, J ) ) + + $ CABS1( DL( I-1 ) )*CABS1( X( I-1, J ) ) + + $ CABS1( D( I ) )*CABS1( X( I, J ) ) + + $ CABS1( DU( I ) )*CABS1( X( I+1, J ) ) + 30 CONTINUE + RWORK( N ) = CABS1( B( N, J ) ) + + $ CABS1( DL( N-1 ) )*CABS1( X( N-1, J ) ) + + $ CABS1( D( N ) )*CABS1( X( N, J ) ) + END IF + ELSE + IF( N.EQ.1 ) THEN + RWORK( 1 ) = CABS1( B( 1, J ) ) + + $ CABS1( D( 1 ) )*CABS1( X( 1, J ) ) + ELSE + RWORK( 1 ) = CABS1( B( 1, J ) ) + + $ CABS1( D( 1 ) )*CABS1( X( 1, J ) ) + + $ CABS1( DL( 1 ) )*CABS1( X( 2, J ) ) + DO 40 I = 2, N - 1 + RWORK( I ) = CABS1( B( I, J ) ) + + $ CABS1( DU( I-1 ) )*CABS1( X( I-1, J ) ) + + $ CABS1( D( I ) )*CABS1( X( I, J ) ) + + $ CABS1( DL( I ) )*CABS1( X( I+1, J ) ) + 40 CONTINUE + RWORK( N ) = CABS1( B( N, J ) ) + + $ CABS1( DU( N-1 ) )*CABS1( X( N-1, J ) ) + + $ CABS1( D( N ) )*CABS1( X( N, J ) ) + END IF + END IF +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + S = ZERO + DO 50 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 50 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL CGTTRS( TRANS, N, 1, DLF, DF, DUF, DU2, IPIV, WORK, N, + $ INFO ) + CALL CAXPY( N, CMPLX( ONE ), WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use CLACON to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 60 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 60 CONTINUE +* + KASE = 0 + 70 CONTINUE + CALL CLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**H). +* + CALL CGTTRS( TRANST, N, 1, DLF, DF, DUF, DU2, IPIV, WORK, + $ N, INFO ) + DO 80 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 80 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 90 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 90 CONTINUE + CALL CGTTRS( TRANSN, N, 1, DLF, DF, DUF, DU2, IPIV, WORK, + $ N, INFO ) + END IF + GO TO 70 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 100 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 100 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 110 CONTINUE +* + RETURN +* +* End of CGTRFS +* + END diff --git a/costa/native/external/lapack/cgtsv.f b/costa/native/external/lapack/cgtsv.f new file mode 100644 index 000000000..7e2fd85d4 --- /dev/null +++ b/costa/native/external/lapack/cgtsv.f @@ -0,0 +1,174 @@ + SUBROUTINE CGTSV( N, NRHS, DL, D, DU, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX B( LDB, * ), D( * ), DL( * ), DU( * ) +* .. +* +* Purpose +* ======= +* +* CGTSV solves the equation +* +* A*X = B, +* +* where A is an N-by-N tridiagonal matrix, by Gaussian elimination with +* partial pivoting. +* +* Note that the equation A'*X = B may be solved by interchanging the +* order of the arguments DU and DL. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* DL (input/output) COMPLEX array, dimension (N-1) +* On entry, DL must contain the (n-1) subdiagonal elements of +* A. +* On exit, DL is overwritten by the (n-2) elements of the +* second superdiagonal of the upper triangular matrix U from +* the LU factorization of A, in DL(1), ..., DL(n-2). +* +* D (input/output) COMPLEX array, dimension (N) +* On entry, D must contain the diagonal elements of A. +* On exit, D is overwritten by the n diagonal elements of U. +* +* DU (input/output) COMPLEX array, dimension (N-1) +* On entry, DU must contain the (n-1) superdiagonal elements +* of A. +* On exit, DU is overwritten by the (n-1) elements of the first +* superdiagonal of U. +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, U(i,i) is exactly zero, and the solution +* has not been computed. The factorization has not been +* completed unless i = N. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER J, K + COMPLEX MULT, TEMP, ZDUM +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGTSV ', -INFO ) + RETURN + END IF +* + IF( N.EQ.0 ) + $ RETURN +* + DO 30 K = 1, N - 1 + IF( DL( K ).EQ.ZERO ) THEN +* +* Subdiagonal is zero, no elimination is required. +* + IF( D( K ).EQ.ZERO ) THEN +* +* Diagonal is zero: set INFO = K and return; a unique +* solution can not be found. +* + INFO = K + RETURN + END IF + ELSE IF( CABS1( D( K ) ).GE.CABS1( DL( K ) ) ) THEN +* +* No row interchange required +* + MULT = DL( K ) / D( K ) + D( K+1 ) = D( K+1 ) - MULT*DU( K ) + DO 10 J = 1, NRHS + B( K+1, J ) = B( K+1, J ) - MULT*B( K, J ) + 10 CONTINUE + IF( K.LT.( N-1 ) ) + $ DL( K ) = ZERO + ELSE +* +* Interchange rows K and K+1 +* + MULT = D( K ) / DL( K ) + D( K ) = DL( K ) + TEMP = D( K+1 ) + D( K+1 ) = DU( K ) - MULT*TEMP + IF( K.LT.( N-1 ) ) THEN + DL( K ) = DU( K+1 ) + DU( K+1 ) = -MULT*DL( K ) + END IF + DU( K ) = TEMP + DO 20 J = 1, NRHS + TEMP = B( K, J ) + B( K, J ) = B( K+1, J ) + B( K+1, J ) = TEMP - MULT*B( K+1, J ) + 20 CONTINUE + END IF + 30 CONTINUE + IF( D( N ).EQ.ZERO ) THEN + INFO = N + RETURN + END IF +* +* Back solve with the matrix U from the factorization. +* + DO 50 J = 1, NRHS + B( N, J ) = B( N, J ) / D( N ) + IF( N.GT.1 ) + $ B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / D( N-1 ) + DO 40 K = N - 2, 1, -1 + B( K, J ) = ( B( K, J )-DU( K )*B( K+1, J )-DL( K )* + $ B( K+2, J ) ) / D( K ) + 40 CONTINUE + 50 CONTINUE +* + RETURN +* +* End of CGTSV +* + END diff --git a/costa/native/external/lapack/cgtsvx.f b/costa/native/external/lapack/cgtsvx.f new file mode 100644 index 000000000..2bef165de --- /dev/null +++ b/costa/native/external/lapack/cgtsvx.f @@ -0,0 +1,294 @@ + SUBROUTINE CGTSVX( FACT, TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, + $ DU2, IPIV, B, LDB, X, LDX, RCOND, FERR, BERR, + $ WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER FACT, TRANS + INTEGER INFO, LDB, LDX, N, NRHS + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL BERR( * ), FERR( * ), RWORK( * ) + COMPLEX B( LDB, * ), D( * ), DF( * ), DL( * ), + $ DLF( * ), DU( * ), DU2( * ), DUF( * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* CGTSVX uses the LU factorization to compute the solution to a complex +* system of linear equations A * X = B, A**T * X = B, or A**H * X = B, +* where A is a tridiagonal matrix of order N and X and B are N-by-NRHS +* matrices. +* +* Error bounds on the solution and a condition estimate are also +* provided. +* +* Description +* =========== +* +* The following steps are performed: +* +* 1. If FACT = 'N', the LU decomposition is used to factor the matrix A +* as A = L * U, where L is a product of permutation and unit lower +* bidiagonal matrices and U is upper triangular with nonzeros in +* only the main diagonal and first two superdiagonals. +* +* 2. If some U(i,i)=0, so that U is exactly singular, then the routine +* returns with INFO = i. Otherwise, the factored form of A is used +* to estimate the condition number of the matrix A. If the +* reciprocal of the condition number is less than machine precision, +* INFO = N+1 is returned as a warning, but the routine still goes on +* to solve for X and compute error bounds as described below. +* +* 3. The system of equations is solved for X using the factored form +* of A. +* +* 4. Iterative refinement is applied to improve the computed solution +* matrix and calculate error bounds and backward error estimates +* for it. +* +* Arguments +* ========= +* +* FACT (input) CHARACTER*1 +* Specifies whether or not the factored form of A has been +* supplied on entry. +* = 'F': DLF, DF, DUF, DU2, and IPIV contain the factored form +* of A; DL, D, DU, DLF, DF, DUF, DU2 and IPIV will not +* be modified. +* = 'N': The matrix will be copied to DLF, DF, and DUF +* and factored. +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose) +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* DL (input) COMPLEX array, dimension (N-1) +* The (n-1) subdiagonal elements of A. +* +* D (input) COMPLEX array, dimension (N) +* The n diagonal elements of A. +* +* DU (input) COMPLEX array, dimension (N-1) +* The (n-1) superdiagonal elements of A. +* +* DLF (input or output) COMPLEX array, dimension (N-1) +* If FACT = 'F', then DLF is an input argument and on entry +* contains the (n-1) multipliers that define the matrix L from +* the LU factorization of A as computed by CGTTRF. +* +* If FACT = 'N', then DLF is an output argument and on exit +* contains the (n-1) multipliers that define the matrix L from +* the LU factorization of A. +* +* DF (input or output) COMPLEX array, dimension (N) +* If FACT = 'F', then DF is an input argument and on entry +* contains the n diagonal elements of the upper triangular +* matrix U from the LU factorization of A. +* +* If FACT = 'N', then DF is an output argument and on exit +* contains the n diagonal elements of the upper triangular +* matrix U from the LU factorization of A. +* +* DUF (input or output) COMPLEX array, dimension (N-1) +* If FACT = 'F', then DUF is an input argument and on entry +* contains the (n-1) elements of the first superdiagonal of U. +* +* If FACT = 'N', then DUF is an output argument and on exit +* contains the (n-1) elements of the first superdiagonal of U. +* +* DU2 (input or output) COMPLEX array, dimension (N-2) +* If FACT = 'F', then DU2 is an input argument and on entry +* contains the (n-2) elements of the second superdiagonal of +* U. +* +* If FACT = 'N', then DU2 is an output argument and on exit +* contains the (n-2) elements of the second superdiagonal of +* U. +* +* IPIV (input or output) INTEGER array, dimension (N) +* If FACT = 'F', then IPIV is an input argument and on entry +* contains the pivot indices from the LU factorization of A as +* computed by CGTTRF. +* +* If FACT = 'N', then IPIV is an output argument and on exit +* contains the pivot indices from the LU factorization of A; +* row i of the matrix was interchanged with row IPIV(i). +* IPIV(i) will always be either i or i+1; IPIV(i) = i indicates +* a row interchange was not required. +* +* B (input) COMPLEX array, dimension (LDB,NRHS) +* The N-by-NRHS right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (output) COMPLEX array, dimension (LDX,NRHS) +* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* RCOND (output) REAL +* The estimate of the reciprocal condition number of the matrix +* A. If RCOND is less than the machine precision (in +* particular, if RCOND = 0), the matrix is singular to working +* precision. This condition is indicated by a return code of +* INFO > 0. +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* RWORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is +* <= N: U(i,i) is exactly zero. The factorization +* has not been completed unless i = N, but the +* factor U is exactly singular, so the solution +* and error bounds could not be computed. +* RCOND = 0 is returned. +* = N+1: U is nonsingular, but RCOND is less than machine +* precision, meaning that the matrix is singular +* to working precision. Nevertheless, the +* solution and error bounds are computed because +* there are a number of situations where the +* computed solution can be more accurate than the +* value of RCOND would suggest. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOFACT, NOTRAN + CHARACTER NORM + REAL ANORM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANGT, SLAMCH + EXTERNAL LSAME, CLANGT, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGTCON, CGTRFS, CGTTRF, CGTTRS, CLACPY, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -16 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGTSVX', -INFO ) + RETURN + END IF +* + IF( NOFACT ) THEN +* +* Compute the LU factorization of A. +* + CALL CCOPY( N, D, 1, DF, 1 ) + IF( N.GT.1 ) THEN + CALL CCOPY( N-1, DL, 1, DLF, 1 ) + CALL CCOPY( N-1, DU, 1, DUF, 1 ) + END IF + CALL CGTTRF( N, DLF, DF, DUF, DU2, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN + IF( INFO.GT.0 ) + $ RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + IF( NOTRAN ) THEN + NORM = '1' + ELSE + NORM = 'I' + END IF + ANORM = CLANGT( NORM, N, DL, D, DU ) +* +* Compute the reciprocal of the condition number of A. +* + CALL CGTCON( NORM, N, DLF, DF, DUF, DU2, IPIV, ANORM, RCOND, WORK, + $ INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* +* Compute the solution vectors X. +* + CALL CLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL CGTTRS( TRANS, N, NRHS, DLF, DF, DUF, DU2, IPIV, X, LDX, + $ INFO ) +* +* Use iterative refinement to improve the computed solutions and +* compute error bounds and backward error estimates for them. +* + CALL CGTRFS( TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, DU2, IPIV, + $ B, LDB, X, LDX, FERR, BERR, WORK, RWORK, INFO ) +* + RETURN +* +* End of CGTSVX +* + END diff --git a/costa/native/external/lapack/cgttrf.f b/costa/native/external/lapack/cgttrf.f new file mode 100644 index 000000000..16753f107 --- /dev/null +++ b/costa/native/external/lapack/cgttrf.f @@ -0,0 +1,175 @@ + SUBROUTINE CGTTRF( N, DL, D, DU, DU2, IPIV, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* Purpose +* ======= +* +* CGTTRF computes an LU factorization of a complex tridiagonal matrix A +* using elimination with partial pivoting and row interchanges. +* +* The factorization has the form +* A = L * U +* where L is a product of permutation and unit lower bidiagonal +* matrices and U is upper triangular with nonzeros in only the main +* diagonal and first two superdiagonals. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. +* +* DL (input/output) COMPLEX array, dimension (N-1) +* On entry, DL must contain the (n-1) sub-diagonal elements of +* A. +* +* On exit, DL is overwritten by the (n-1) multipliers that +* define the matrix L from the LU factorization of A. +* +* D (input/output) COMPLEX array, dimension (N) +* On entry, D must contain the diagonal elements of A. +* +* On exit, D is overwritten by the n diagonal elements of the +* upper triangular matrix U from the LU factorization of A. +* +* DU (input/output) COMPLEX array, dimension (N-1) +* On entry, DU must contain the (n-1) super-diagonal elements +* of A. +* +* On exit, DU is overwritten by the (n-1) elements of the first +* super-diagonal of U. +* +* DU2 (output) COMPLEX array, dimension (N-2) +* On exit, DU2 is overwritten by the (n-2) elements of the +* second super-diagonal of U. +* +* IPIV (output) INTEGER array, dimension (N) +* The pivot indices; for 1 <= i <= n, row i of the matrix was +* interchanged with row IPIV(i). IPIV(i) will always be either +* i or i+1; IPIV(i) = i indicates a row interchange was not +* required. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* > 0: if INFO = k, U(k,k) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, and division by zero will occur if it is used +* to solve a system of equations. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I + COMPLEX FACT, TEMP, ZDUM +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + CALL XERBLA( 'CGTTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Initialize IPIV(i) = i and DU2(i) = 0 +* + DO 10 I = 1, N + IPIV( I ) = I + 10 CONTINUE + DO 20 I = 1, N - 2 + DU2( I ) = ZERO + 20 CONTINUE +* + DO 30 I = 1, N - 2 + IF( CABS1( D( I ) ).GE.CABS1( DL( I ) ) ) THEN +* +* No row interchange required, eliminate DL(I) +* + IF( CABS1( D( I ) ).NE.ZERO ) THEN + FACT = DL( I ) / D( I ) + DL( I ) = FACT + D( I+1 ) = D( I+1 ) - FACT*DU( I ) + END IF + ELSE +* +* Interchange rows I and I+1, eliminate DL(I) +* + FACT = D( I ) / DL( I ) + D( I ) = DL( I ) + DL( I ) = FACT + TEMP = DU( I ) + DU( I ) = D( I+1 ) + D( I+1 ) = TEMP - FACT*D( I+1 ) + DU2( I ) = DU( I+1 ) + DU( I+1 ) = -FACT*DU( I+1 ) + IPIV( I ) = I + 1 + END IF + 30 CONTINUE + IF( N.GT.1 ) THEN + I = N - 1 + IF( CABS1( D( I ) ).GE.CABS1( DL( I ) ) ) THEN + IF( CABS1( D( I ) ).NE.ZERO ) THEN + FACT = DL( I ) / D( I ) + DL( I ) = FACT + D( I+1 ) = D( I+1 ) - FACT*DU( I ) + END IF + ELSE + FACT = D( I ) / DL( I ) + D( I ) = DL( I ) + DL( I ) = FACT + TEMP = DU( I ) + DU( I ) = D( I+1 ) + D( I+1 ) = TEMP - FACT*D( I+1 ) + IPIV( I ) = I + 1 + END IF + END IF +* +* Check for a zero on the diagonal of U. +* + DO 40 I = 1, N + IF( CABS1( D( I ) ).EQ.ZERO ) THEN + INFO = I + GO TO 50 + END IF + 40 CONTINUE + 50 CONTINUE +* + RETURN +* +* End of CGTTRF +* + END diff --git a/costa/native/external/lapack/cgttrs.f b/costa/native/external/lapack/cgttrs.f new file mode 100644 index 000000000..fdf507112 --- /dev/null +++ b/costa/native/external/lapack/cgttrs.f @@ -0,0 +1,143 @@ + SUBROUTINE CGTTRS( TRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* Purpose +* ======= +* +* CGTTRS solves one of the systems of equations +* A * X = B, A**T * X = B, or A**H * X = B, +* with a tridiagonal matrix A using the LU factorization computed +* by CGTTRF. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER +* Specifies the form of the system of equations. +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose) +* +* N (input) INTEGER +* The order of the matrix A. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* DL (input) COMPLEX array, dimension (N-1) +* The (n-1) multipliers that define the matrix L from the +* LU factorization of A. +* +* D (input) COMPLEX array, dimension (N) +* The n diagonal elements of the upper triangular matrix U from +* the LU factorization of A. +* +* DU (input) COMPLEX array, dimension (N-1) +* The (n-1) elements of the first super-diagonal of U. +* +* DU2 (input) COMPLEX array, dimension (N-2) +* The (n-2) elements of the second super-diagonal of U. +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices; for 1 <= i <= n, row i of the matrix was +* interchanged with row IPIV(i). IPIV(i) will always be either +* i or i+1; IPIV(i) = i indicates a row interchange was not +* required. +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the matrix of right hand side vectors B. +* On exit, B is overwritten by the solution vectors X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL NOTRAN + INTEGER ITRANS, J, JB, NB +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CGTTS2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOTRAN = ( TRANS.EQ.'N' .OR. TRANS.EQ.'n' ) + IF( .NOT.NOTRAN .AND. .NOT.( TRANS.EQ.'T' .OR. TRANS.EQ. + $ 't' ) .AND. .NOT.( TRANS.EQ.'C' .OR. TRANS.EQ.'c' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( N, 1 ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGTTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* Decode TRANS +* + IF( NOTRAN ) THEN + ITRANS = 0 + ELSE IF( TRANS.EQ.'T' .OR. TRANS.EQ.'t' ) THEN + ITRANS = 1 + ELSE + ITRANS = 2 + END IF +* +* Determine the number of right-hand sides to solve at a time. +* + IF( NRHS.EQ.1 ) THEN + NB = 1 + ELSE + NB = MAX( 1, ILAENV( 1, 'CGTTRS', TRANS, N, NRHS, -1, -1 ) ) + END IF +* + IF( NB.GE.NRHS ) THEN + CALL CGTTS2( ITRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB ) + ELSE + DO 10 J = 1, NRHS, NB + JB = MIN( NRHS-J+1, NB ) + CALL CGTTS2( ITRANS, N, JB, DL, D, DU, DU2, IPIV, B( 1, J ), + $ LDB ) + 10 CONTINUE + END IF +* +* End of CGTTRS +* + END diff --git a/costa/native/external/lapack/cgtts2.f b/costa/native/external/lapack/cgtts2.f new file mode 100644 index 000000000..7a9188ce9 --- /dev/null +++ b/costa/native/external/lapack/cgtts2.f @@ -0,0 +1,272 @@ + SUBROUTINE CGTTS2( ITRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER ITRANS, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* Purpose +* ======= +* +* CGTTS2 solves one of the systems of equations +* A * X = B, A**T * X = B, or A**H * X = B, +* with a tridiagonal matrix A using the LU factorization computed +* by CGTTRF. +* +* Arguments +* ========= +* +* ITRANS (input) INTEGER +* Specifies the form of the system of equations. +* = 0: A * X = B (No transpose) +* = 1: A**T * X = B (Transpose) +* = 2: A**H * X = B (Conjugate transpose) +* +* N (input) INTEGER +* The order of the matrix A. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* DL (input) COMPLEX array, dimension (N-1) +* The (n-1) multipliers that define the matrix L from the +* LU factorization of A. +* +* D (input) COMPLEX array, dimension (N) +* The n diagonal elements of the upper triangular matrix U from +* the LU factorization of A. +* +* DU (input) COMPLEX array, dimension (N-1) +* The (n-1) elements of the first super-diagonal of U. +* +* DU2 (input) COMPLEX array, dimension (N-2) +* The (n-2) elements of the second super-diagonal of U. +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices; for 1 <= i <= n, row i of the matrix was +* interchanged with row IPIV(i). IPIV(i) will always be either +* i or i+1; IPIV(i) = i indicates a row interchange was not +* required. +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the matrix of right hand side vectors B. +* On exit, B is overwritten by the solution vectors X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J + COMPLEX TEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( ITRANS.EQ.0 ) THEN +* +* Solve A*X = B using the LU factorization of A, +* overwriting each right hand side vector with its solution. +* + IF( NRHS.LE.1 ) THEN + J = 1 + 10 CONTINUE +* +* Solve L*x = b. +* + DO 20 I = 1, N - 1 + IF( IPIV( I ).EQ.I ) THEN + B( I+1, J ) = B( I+1, J ) - DL( I )*B( I, J ) + ELSE + TEMP = B( I, J ) + B( I, J ) = B( I+1, J ) + B( I+1, J ) = TEMP - DL( I )*B( I, J ) + END IF + 20 CONTINUE +* +* Solve U*x = b. +* + B( N, J ) = B( N, J ) / D( N ) + IF( N.GT.1 ) + $ B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / + $ D( N-1 ) + DO 30 I = N - 2, 1, -1 + B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DU2( I )* + $ B( I+2, J ) ) / D( I ) + 30 CONTINUE + IF( J.LT.NRHS ) THEN + J = J + 1 + GO TO 10 + END IF + ELSE + DO 60 J = 1, NRHS +* +* Solve L*x = b. +* + DO 40 I = 1, N - 1 + IF( IPIV( I ).EQ.I ) THEN + B( I+1, J ) = B( I+1, J ) - DL( I )*B( I, J ) + ELSE + TEMP = B( I, J ) + B( I, J ) = B( I+1, J ) + B( I+1, J ) = TEMP - DL( I )*B( I, J ) + END IF + 40 CONTINUE +* +* Solve U*x = b. +* + B( N, J ) = B( N, J ) / D( N ) + IF( N.GT.1 ) + $ B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / + $ D( N-1 ) + DO 50 I = N - 2, 1, -1 + B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DU2( I )* + $ B( I+2, J ) ) / D( I ) + 50 CONTINUE + 60 CONTINUE + END IF + ELSE IF( ITRANS.EQ.1 ) THEN +* +* Solve A**T * X = B. +* + IF( NRHS.LE.1 ) THEN + J = 1 + 70 CONTINUE +* +* Solve U**T * x = b. +* + B( 1, J ) = B( 1, J ) / D( 1 ) + IF( N.GT.1 ) + $ B( 2, J ) = ( B( 2, J )-DU( 1 )*B( 1, J ) ) / D( 2 ) + DO 80 I = 3, N + B( I, J ) = ( B( I, J )-DU( I-1 )*B( I-1, J )-DU2( I-2 )* + $ B( I-2, J ) ) / D( I ) + 80 CONTINUE +* +* Solve L**T * x = b. +* + DO 90 I = N - 1, 1, -1 + IF( IPIV( I ).EQ.I ) THEN + B( I, J ) = B( I, J ) - DL( I )*B( I+1, J ) + ELSE + TEMP = B( I+1, J ) + B( I+1, J ) = B( I, J ) - DL( I )*TEMP + B( I, J ) = TEMP + END IF + 90 CONTINUE + IF( J.LT.NRHS ) THEN + J = J + 1 + GO TO 70 + END IF + ELSE + DO 120 J = 1, NRHS +* +* Solve U**T * x = b. +* + B( 1, J ) = B( 1, J ) / D( 1 ) + IF( N.GT.1 ) + $ B( 2, J ) = ( B( 2, J )-DU( 1 )*B( 1, J ) ) / D( 2 ) + DO 100 I = 3, N + B( I, J ) = ( B( I, J )-DU( I-1 )*B( I-1, J )- + $ DU2( I-2 )*B( I-2, J ) ) / D( I ) + 100 CONTINUE +* +* Solve L**T * x = b. +* + DO 110 I = N - 1, 1, -1 + IF( IPIV( I ).EQ.I ) THEN + B( I, J ) = B( I, J ) - DL( I )*B( I+1, J ) + ELSE + TEMP = B( I+1, J ) + B( I+1, J ) = B( I, J ) - DL( I )*TEMP + B( I, J ) = TEMP + END IF + 110 CONTINUE + 120 CONTINUE + END IF + ELSE +* +* Solve A**H * X = B. +* + IF( NRHS.LE.1 ) THEN + J = 1 + 130 CONTINUE +* +* Solve U**H * x = b. +* + B( 1, J ) = B( 1, J ) / CONJG( D( 1 ) ) + IF( N.GT.1 ) + $ B( 2, J ) = ( B( 2, J )-CONJG( DU( 1 ) )*B( 1, J ) ) / + $ CONJG( D( 2 ) ) + DO 140 I = 3, N + B( I, J ) = ( B( I, J )-CONJG( DU( I-1 ) )*B( I-1, J )- + $ CONJG( DU2( I-2 ) )*B( I-2, J ) ) / + $ CONJG( D( I ) ) + 140 CONTINUE +* +* Solve L**H * x = b. +* + DO 150 I = N - 1, 1, -1 + IF( IPIV( I ).EQ.I ) THEN + B( I, J ) = B( I, J ) - CONJG( DL( I ) )*B( I+1, J ) + ELSE + TEMP = B( I+1, J ) + B( I+1, J ) = B( I, J ) - CONJG( DL( I ) )*TEMP + B( I, J ) = TEMP + END IF + 150 CONTINUE + IF( J.LT.NRHS ) THEN + J = J + 1 + GO TO 130 + END IF + ELSE + DO 180 J = 1, NRHS +* +* Solve U**H * x = b. +* + B( 1, J ) = B( 1, J ) / CONJG( D( 1 ) ) + IF( N.GT.1 ) + $ B( 2, J ) = ( B( 2, J )-CONJG( DU( 1 ) )*B( 1, J ) ) / + $ CONJG( D( 2 ) ) + DO 160 I = 3, N + B( I, J ) = ( B( I, J )-CONJG( DU( I-1 ) )* + $ B( I-1, J )-CONJG( DU2( I-2 ) )* + $ B( I-2, J ) ) / CONJG( D( I ) ) + 160 CONTINUE +* +* Solve L**H * x = b. +* + DO 170 I = N - 1, 1, -1 + IF( IPIV( I ).EQ.I ) THEN + B( I, J ) = B( I, J ) - CONJG( DL( I ) )* + $ B( I+1, J ) + ELSE + TEMP = B( I+1, J ) + B( I+1, J ) = B( I, J ) - CONJG( DL( I ) )*TEMP + B( I, J ) = TEMP + END IF + 170 CONTINUE + 180 CONTINUE + END IF + END IF +* +* End of CGTTS2 +* + END diff --git a/costa/native/external/lapack/chbev.f b/costa/native/external/lapack/chbev.f new file mode 100644 index 000000000..2375cc35c --- /dev/null +++ b/costa/native/external/lapack/chbev.f @@ -0,0 +1,209 @@ + SUBROUTINE CHBEV( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, + $ RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KD, LDAB, LDZ, N +* .. +* .. Array Arguments .. + REAL RWORK( * ), W( * ) + COMPLEX AB( LDAB, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* CHBEV computes all the eigenvalues and, optionally, eigenvectors of +* a complex Hermitian band matrix A. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* AB (input/output) COMPLEX array, dimension (LDAB, N) +* On entry, the upper or lower triangle of the Hermitian band +* matrix A, stored in the first KD+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* On exit, AB is overwritten by values generated during the +* reduction to tridiagonal form. If UPLO = 'U', the first +* superdiagonal and the diagonal of the tridiagonal matrix T +* are returned in rows KD and KD+1 of AB, and if UPLO = 'L', +* the diagonal and first subdiagonal of T are returned in the +* first two rows of AB. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD + 1. +* +* W (output) REAL array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* Z (output) COMPLEX array, dimension (LDZ, N) +* If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +* eigenvectors of the matrix A, with the i-th column of Z +* holding the eigenvector associated with W(i). +* If JOBZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace) COMPLEX array, dimension (N) +* +* RWORK (workspace) REAL array, dimension (max(1,3*N-2)) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, the algorithm failed to converge; i +* off-diagonal elements of an intermediate tridiagonal +* form did not converge to zero. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, WANTZ + INTEGER IINFO, IMAX, INDE, INDRWK, ISCALE + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANHB, SLAMCH + EXTERNAL LSAME, CLANHB, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CHBTRD, CLASCL, CSTEQR, SSCAL, SSTERF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHBEV ', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( LOWER ) THEN + W( 1 ) = AB( 1, 1 ) + ELSE + W( 1 ) = AB( KD+1, 1 ) + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = CLANHB( 'M', UPLO, N, KD, AB, LDAB, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL CLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL CLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + END IF +* +* Call CHBTRD to reduce Hermitian band matrix to tridiagonal form. +* + INDE = 1 + CALL CHBTRD( JOBZ, UPLO, N, KD, AB, LDAB, W, RWORK( INDE ), Z, + $ LDZ, WORK, IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, call CSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + INDRWK = INDE + N + CALL CSTEQR( JOBZ, N, W, RWORK( INDE ), Z, LDZ, + $ RWORK( INDRWK ), INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* + RETURN +* +* End of CHBEV +* + END diff --git a/costa/native/external/lapack/chbevd.f b/costa/native/external/lapack/chbevd.f new file mode 100644 index 000000000..9573958eb --- /dev/null +++ b/costa/native/external/lapack/chbevd.f @@ -0,0 +1,298 @@ + SUBROUTINE CHBEVD( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, + $ LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KD, LDAB, LDZ, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL RWORK( * ), W( * ) + COMPLEX AB( LDAB, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* CHBEVD computes all the eigenvalues and, optionally, eigenvectors of +* a complex Hermitian band matrix A. If eigenvectors are desired, it +* uses a divide and conquer algorithm. +* +* The divide and conquer algorithm makes very mild assumptions about +* floating point arithmetic. It will work on machines with a guard +* digit in add/subtract, or on those binary machines without guard +* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +* Cray-2. It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* AB (input/output) COMPLEX array, dimension (LDAB, N) +* On entry, the upper or lower triangle of the Hermitian band +* matrix A, stored in the first KD+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* On exit, AB is overwritten by values generated during the +* reduction to tridiagonal form. If UPLO = 'U', the first +* superdiagonal and the diagonal of the tridiagonal matrix T +* are returned in rows KD and KD+1 of AB, and if UPLO = 'L', +* the diagonal and first subdiagonal of T are returned in the +* first two rows of AB. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD + 1. +* +* W (output) REAL array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* Z (output) COMPLEX array, dimension (LDZ, N) +* If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +* eigenvectors of the matrix A, with the i-th column of Z +* holding the eigenvector associated with W(i). +* If JOBZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If N <= 1, LWORK must be at least 1. +* If JOBZ = 'N' and N > 1, LWORK must be at least N. +* If JOBZ = 'V' and N > 1, LWORK must be at least 2*N**2. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace/output) REAL array, +* dimension (LRWORK) +* On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. +* +* LRWORK (input) INTEGER +* The dimension of array RWORK. +* If N <= 1, LRWORK must be at least 1. +* If JOBZ = 'N' and N > 1, LRWORK must be at least N. +* If JOBZ = 'V' and N > 1, LRWORK must be at least +* 1 + 5*N + 2*N**2. +* +* If LRWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the RWORK array, +* returns this value as the first entry of the RWORK array, and +* no error message related to LRWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of array IWORK. +* If JOBZ = 'N' or N <= 1, LIWORK must be at least 1. +* If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N . +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, the algorithm failed to converge; i +* off-diagonal elements of an intermediate tridiagonal +* form did not converge to zero. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ), + $ CONE = ( 1.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, IMAX, INDE, INDWK2, INDWRK, ISCALE, + $ LIWMIN, LLRWK, LLWK2, LRWMIN, LWMIN + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANHB, SLAMCH + EXTERNAL LSAME, CLANHB, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CGEMM, CHBTRD, CLACPY, CLASCL, CSTEDC, SSCAL, + $ SSTERF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LWMIN = 1 + LRWMIN = 1 + LIWMIN = 1 + ELSE + IF( WANTZ ) THEN + LWMIN = 2*N**2 + LRWMIN = 1 + 5*N + 2*N**2 + LIWMIN = 3 + 5*N + ELSE + LWMIN = N + LRWMIN = N + LIWMIN = 1 + END IF + END IF + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -15 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHBEVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = AB( 1, 1 ) + IF( WANTZ ) + $ Z( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = CLANHB( 'M', UPLO, N, KD, AB, LDAB, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL CLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL CLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + END IF +* +* Call CHBTRD to reduce Hermitian band matrix to tridiagonal form. +* + INDE = 1 + INDWRK = INDE + N + INDWK2 = 1 + N*N + LLWK2 = LWORK - INDWK2 + 1 + LLRWK = LRWORK - INDWRK + 1 + CALL CHBTRD( JOBZ, UPLO, N, KD, AB, LDAB, W, RWORK( INDE ), Z, + $ LDZ, WORK, IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, call CSTEDC. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + CALL CSTEDC( 'I', N, W, RWORK( INDE ), WORK, N, WORK( INDWK2 ), + $ LLWK2, RWORK( INDWRK ), LLRWK, IWORK, LIWORK, + $ INFO ) + CALL CGEMM( 'N', 'N', N, N, N, CONE, Z, LDZ, WORK, N, CZERO, + $ WORK( INDWK2 ), N ) + CALL CLACPY( 'A', N, N, WORK( INDWK2 ), N, Z, LDZ ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of CHBEVD +* + END diff --git a/costa/native/external/lapack/chbevx.f b/costa/native/external/lapack/chbevx.f new file mode 100644 index 000000000..02869d2d2 --- /dev/null +++ b/costa/native/external/lapack/chbevx.f @@ -0,0 +1,417 @@ + SUBROUTINE CHBEVX( JOBZ, RANGE, UPLO, N, KD, AB, LDAB, Q, LDQ, VL, + $ VU, IL, IU, ABSTOL, M, W, Z, LDZ, WORK, RWORK, + $ IWORK, IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, KD, LDAB, LDQ, LDZ, M, N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + REAL RWORK( * ), W( * ) + COMPLEX AB( LDAB, * ), Q( LDQ, * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* CHBEVX computes selected eigenvalues and, optionally, eigenvectors +* of a complex Hermitian band matrix A. Eigenvalues and eigenvectors +* can be selected by specifying either a range of values or a range of +* indices for the desired eigenvalues. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* RANGE (input) CHARACTER*1 +* = 'A': all eigenvalues will be found; +* = 'V': all eigenvalues in the half-open interval (VL,VU] +* will be found; +* = 'I': the IL-th through IU-th eigenvalues will be found. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* AB (input/output) COMPLEX array, dimension (LDAB, N) +* On entry, the upper or lower triangle of the Hermitian band +* matrix A, stored in the first KD+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* On exit, AB is overwritten by values generated during the +* reduction to tridiagonal form. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD + 1. +* +* Q (output) COMPLEX array, dimension (LDQ, N) +* If JOBZ = 'V', the N-by-N unitary matrix used in the +* reduction to tridiagonal form. +* If JOBZ = 'N', the array Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. If JOBZ = 'V', then +* LDQ >= max(1,N). +* +* VL (input) REAL +* VU (input) REAL +* If RANGE='V', the lower and upper bounds of the interval to +* be searched for eigenvalues. VL < VU. +* Not referenced if RANGE = 'A' or 'I'. +* +* IL (input) INTEGER +* IU (input) INTEGER +* If RANGE='I', the indices (in ascending order) of the +* smallest and largest eigenvalues to be returned. +* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +* Not referenced if RANGE = 'A' or 'V'. +* +* ABSTOL (input) REAL +* The absolute error tolerance for the eigenvalues. +* An approximate eigenvalue is accepted as converged +* when it is determined to lie in an interval [a,b] +* of width less than or equal to +* +* ABSTOL + EPS * max( |a|,|b| ) , +* +* where EPS is the machine precision. If ABSTOL is less than +* or equal to zero, then EPS*|T| will be used in its place, +* where |T| is the 1-norm of the tridiagonal matrix obtained +* by reducing AB to tridiagonal form. +* +* Eigenvalues will be computed most accurately when ABSTOL is +* set to twice the underflow threshold 2*SLAMCH('S'), not zero. +* If this routine returns with INFO>0, indicating that some +* eigenvectors did not converge, try setting ABSTOL to +* 2*SLAMCH('S'). +* +* See "Computing Small Singular Values of Bidiagonal Matrices +* with Guaranteed High Relative Accuracy," by Demmel and +* Kahan, LAPACK Working Note #3. +* +* M (output) INTEGER +* The total number of eigenvalues found. 0 <= M <= N. +* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +* +* W (output) REAL array, dimension (N) +* The first M elements contain the selected eigenvalues in +* ascending order. +* +* Z (output) COMPLEX array, dimension (LDZ, max(1,M)) +* If JOBZ = 'V', then if INFO = 0, the first M columns of Z +* contain the orthonormal eigenvectors of the matrix A +* corresponding to the selected eigenvalues, with the i-th +* column of Z holding the eigenvector associated with W(i). +* If an eigenvector fails to converge, then that column of Z +* contains the latest approximation to the eigenvector, and the +* index of the eigenvector is returned in IFAIL. +* If JOBZ = 'N', then Z is not referenced. +* Note: the user must ensure that at least max(1,M) columns are +* supplied in the array Z; if RANGE = 'V', the exact value of M +* is not known in advance and an upper bound must be used. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace) COMPLEX array, dimension (N) +* +* RWORK (workspace) REAL array, dimension (7*N) +* +* IWORK (workspace) INTEGER array, dimension (5*N) +* +* IFAIL (output) INTEGER array, dimension (N) +* If JOBZ = 'V', then if INFO = 0, the first M elements of +* IFAIL are zero. If INFO > 0, then IFAIL contains the +* indices of the eigenvectors that failed to converge. +* If JOBZ = 'N', then IFAIL is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, then i eigenvectors failed to converge. +* Their indices are stored in array IFAIL. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ), + $ CONE = ( 1.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, VALEIG, WANTZ + CHARACTER ORDER + INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, + $ INDISP, INDIWK, INDRWK, INDWRK, ISCALE, ITMP1, + $ J, JJ, NSPLIT + REAL ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU + COMPLEX CTMP1 +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANHB, SLAMCH + EXTERNAL LSAME, CLANHB, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGEMV, CHBTRD, CLACPY, CLASCL, CSTEIN, + $ CSTEQR, CSWAP, SCOPY, SSCAL, SSTEBZ, SSTERF, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) + LOWER = LSAME( UPLO, 'L' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -7 + ELSE IF( WANTZ .AND. LDQ.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -11 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -13 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) + $ INFO = -18 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHBEVX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + M = 1 + IF( LOWER ) THEN + CTMP1 = AB( 1, 1 ) + ELSE + CTMP1 = AB( KD+1, 1 ) + END IF + TMP1 = REAL( CTMP1 ) + IF( VALEIG ) THEN + IF( .NOT.( VL.LT.TMP1 .AND. VU.GE.TMP1 ) ) + $ M = 0 + END IF + IF( M.EQ.1 ) THEN + W( 1 ) = CTMP1 + IF( WANTZ ) + $ Z( 1, 1 ) = CONE + END IF + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF ( VALEIG ) THEN + VLL = VL + VUU = VU + ELSE + VLL = ZERO + VUU = ZERO + ENDIF + ANRM = CLANHB( 'M', UPLO, N, KD, AB, LDAB, RWORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL CLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL CLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call CHBTRD to reduce Hermitian band matrix to tridiagonal form. +* + INDD = 1 + INDE = INDD + N + INDRWK = INDE + N + INDWRK = 1 + CALL CHBTRD( JOBZ, UPLO, N, KD, AB, LDAB, RWORK( INDD ), + $ RWORK( INDE ), Q, LDQ, WORK( INDWRK ), IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal +* to zero, then call SSTERF or CSTEQR. If this fails for some +* eigenvalue, then try SSTEBZ. +* + IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND. + $ ( ABSTOL.LE.ZERO ) ) THEN + CALL SCOPY( N, RWORK( INDD ), 1, W, 1 ) + INDEE = INDRWK + 2*N + IF( .NOT.WANTZ ) THEN + CALL SCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + CALL SSTERF( N, W, RWORK( INDEE ), INFO ) + ELSE + CALL CLACPY( 'A', N, N, Q, LDQ, Z, LDZ ) + CALL SCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + CALL CSTEQR( JOBZ, N, W, RWORK( INDEE ), Z, LDZ, + $ RWORK( INDRWK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 10 I = 1, N + IFAIL( I ) = 0 + 10 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 30 + END IF + INFO = 0 + END IF +* +* Otherwise, call SSTEBZ and, if eigenvectors are desired, CSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWK = INDISP + N + CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ RWORK( INDD ), RWORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ), + $ IWORK( INDIWK ), INFO ) +* + IF( WANTZ ) THEN + CALL CSTEIN( N, RWORK( INDD ), RWORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ RWORK( INDRWK ), IWORK( INDIWK ), IFAIL, INFO ) +* +* Apply unitary matrix used in reduction to tridiagonal +* form to eigenvectors returned by CSTEIN. +* + DO 20 J = 1, M + CALL CCOPY( N, Z( 1, J ), 1, WORK( 1 ), 1 ) + CALL CGEMV( 'N', N, N, CONE, Q, LDQ, WORK, 1, CZERO, + $ Z( 1, J ), 1 ) + 20 CONTINUE + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 30 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 50 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 40 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 40 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL CSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 50 CONTINUE + END IF +* + RETURN +* +* End of CHBEVX +* + END diff --git a/costa/native/external/lapack/chbgst.f b/costa/native/external/lapack/chbgst.f new file mode 100644 index 000000000..3bec2ad70 --- /dev/null +++ b/costa/native/external/lapack/chbgst.f @@ -0,0 +1,1377 @@ + SUBROUTINE CHBGST( VECT, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, X, + $ LDX, WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO, VECT + INTEGER INFO, KA, KB, LDAB, LDBB, LDX, N +* .. +* .. Array Arguments .. + REAL RWORK( * ) + COMPLEX AB( LDAB, * ), BB( LDBB, * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* CHBGST reduces a complex Hermitian-definite banded generalized +* eigenproblem A*x = lambda*B*x to standard form C*y = lambda*y, +* such that C has the same bandwidth as A. +* +* B must have been previously factorized as S**H*S by CPBSTF, using a +* split Cholesky factorization. A is overwritten by C = X**H*A*X, where +* X = S**(-1)*Q and Q is a unitary matrix chosen to preserve the +* bandwidth of A. +* +* Arguments +* ========= +* +* VECT (input) CHARACTER*1 +* = 'N': do not form the transformation matrix X; +* = 'V': form X. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* KA (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KA >= 0. +* +* KB (input) INTEGER +* The number of superdiagonals of the matrix B if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KA >= KB >= 0. +* +* AB (input/output) COMPLEX array, dimension (LDAB,N) +* On entry, the upper or lower triangle of the Hermitian band +* matrix A, stored in the first ka+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). +* +* On exit, the transformed matrix X**H*A*X, stored in the same +* format as A. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KA+1. +* +* BB (input) COMPLEX array, dimension (LDBB,N) +* The banded factor S from the split Cholesky factorization of +* B, as returned by CPBSTF, stored in the first kb+1 rows of +* the array. +* +* LDBB (input) INTEGER +* The leading dimension of the array BB. LDBB >= KB+1. +* +* X (output) COMPLEX array, dimension (LDX,N) +* If VECT = 'V', the n-by-n matrix X. +* If VECT = 'N', the array X is not referenced. +* +* LDX (input) INTEGER +* The leading dimension of the array X. +* LDX >= max(1,N) if VECT = 'V'; LDX >= 1 otherwise. +* +* WORK (workspace) COMPLEX array, dimension (N) +* +* RWORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CZERO, CONE + REAL ONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ), ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPDATE, UPPER, WANTX + INTEGER I, I0, I1, I2, INCA, J, J1, J1T, J2, J2T, K, + $ KA1, KB1, KBT, L, M, NR, NRT, NX + REAL BII + COMPLEX RA, RA1, T +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CGERC, CGERU, CLACGV, CLAR2V, CLARGV, CLARTG, + $ CLARTV, CLASET, CROT, CSSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, MIN, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + WANTX = LSAME( VECT, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + KA1 = KA + 1 + KB1 = KB + 1 + INFO = 0 + IF( .NOT.WANTX .AND. .NOT.LSAME( VECT, 'N' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KA.LT.0 ) THEN + INFO = -4 + ELSE IF( KB.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KA+1 ) THEN + INFO = -7 + ELSE IF( LDBB.LT.KB+1 ) THEN + INFO = -9 + ELSE IF( LDX.LT.1 .OR. WANTX .AND. LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHBGST', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + INCA = LDAB*KA1 +* +* Initialize X to the unit matrix, if needed +* + IF( WANTX ) + $ CALL CLASET( 'Full', N, N, CZERO, CONE, X, LDX ) +* +* Set M to the splitting point m. It must be the same value as is +* used in CPBSTF. The chosen value allows the arrays WORK and RWORK +* to be of dimension (N). +* + M = ( N+KB ) / 2 +* +* The routine works in two phases, corresponding to the two halves +* of the split Cholesky factorization of B as S**H*S where +* +* S = ( U ) +* ( M L ) +* +* with U upper triangular of order m, and L lower triangular of +* order n-m. S has the same bandwidth as B. +* +* S is treated as a product of elementary matrices: +* +* S = S(m)*S(m-1)*...*S(2)*S(1)*S(m+1)*S(m+2)*...*S(n-1)*S(n) +* +* where S(i) is determined by the i-th row of S. +* +* In phase 1, the index i takes the values n, n-1, ... , m+1; +* in phase 2, it takes the values 1, 2, ... , m. +* +* For each value of i, the current matrix A is updated by forming +* inv(S(i))**H*A*inv(S(i)). This creates a triangular bulge outside +* the band of A. The bulge is then pushed down toward the bottom of +* A in phase 1, and up toward the top of A in phase 2, by applying +* plane rotations. +* +* There are kb*(kb+1)/2 elements in the bulge, but at most 2*kb-1 +* of them are linearly independent, so annihilating a bulge requires +* only 2*kb-1 plane rotations. The rotations are divided into a 1st +* set of kb-1 rotations, and a 2nd set of kb rotations. +* +* Wherever possible, rotations are generated and applied in vector +* operations of length NR between the indices J1 and J2 (sometimes +* replaced by modified values NRT, J1T or J2T). +* +* The real cosines and complex sines of the rotations are stored in +* the arrays RWORK and WORK, those of the 1st set in elements +* 2:m-kb-1, and those of the 2nd set in elements m-kb+1:n. +* +* The bulges are not formed explicitly; nonzero elements outside the +* band are created only when they are required for generating new +* rotations; they are stored in the array WORK, in positions where +* they are later overwritten by the sines of the rotations which +* annihilate them. +* +* **************************** Phase 1 ***************************** +* +* The logical structure of this phase is: +* +* UPDATE = .TRUE. +* DO I = N, M + 1, -1 +* use S(i) to update A and create a new bulge +* apply rotations to push all bulges KA positions downward +* END DO +* UPDATE = .FALSE. +* DO I = M + KA + 1, N - 1 +* apply rotations to push all bulges KA positions downward +* END DO +* +* To avoid duplicating code, the two loops are merged. +* + UPDATE = .TRUE. + I = N + 1 + 10 CONTINUE + IF( UPDATE ) THEN + I = I - 1 + KBT = MIN( KB, I-1 ) + I0 = I - 1 + I1 = MIN( N, I+KA ) + I2 = I - KBT + KA1 + IF( I.LT.M+1 ) THEN + UPDATE = .FALSE. + I = I + 1 + I0 = M + IF( KA.EQ.0 ) + $ GO TO 480 + GO TO 10 + END IF + ELSE + I = I + KA + IF( I.GT.N-1 ) + $ GO TO 480 + END IF +* + IF( UPPER ) THEN +* +* Transform A, working with the upper triangle +* + IF( UPDATE ) THEN +* +* Form inv(S(i))**H * A * inv(S(i)) +* + BII = REAL( BB( KB1, I ) ) + AB( KA1, I ) = ( REAL( AB( KA1, I ) ) / BII ) / BII + DO 20 J = I + 1, I1 + AB( I-J+KA1, J ) = AB( I-J+KA1, J ) / BII + 20 CONTINUE + DO 30 J = MAX( 1, I-KA ), I - 1 + AB( J-I+KA1, I ) = AB( J-I+KA1, I ) / BII + 30 CONTINUE + DO 60 K = I - KBT, I - 1 + DO 40 J = I - KBT, K + AB( J-K+KA1, K ) = AB( J-K+KA1, K ) - + $ BB( J-I+KB1, I )* + $ CONJG( AB( K-I+KA1, I ) ) - + $ CONJG( BB( K-I+KB1, I ) )* + $ AB( J-I+KA1, I ) + + $ REAL( AB( KA1, I ) )* + $ BB( J-I+KB1, I )* + $ CONJG( BB( K-I+KB1, I ) ) + 40 CONTINUE + DO 50 J = MAX( 1, I-KA ), I - KBT - 1 + AB( J-K+KA1, K ) = AB( J-K+KA1, K ) - + $ CONJG( BB( K-I+KB1, I ) )* + $ AB( J-I+KA1, I ) + 50 CONTINUE + 60 CONTINUE + DO 80 J = I, I1 + DO 70 K = MAX( J-KA, I-KBT ), I - 1 + AB( K-J+KA1, J ) = AB( K-J+KA1, J ) - + $ BB( K-I+KB1, I )*AB( I-J+KA1, J ) + 70 CONTINUE + 80 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by inv(S(i)) +* + CALL CSSCAL( N-M, ONE / BII, X( M+1, I ), 1 ) + IF( KBT.GT.0 ) + $ CALL CGERC( N-M, KBT, -CONE, X( M+1, I ), 1, + $ BB( KB1-KBT, I ), 1, X( M+1, I-KBT ), + $ LDX ) + END IF +* +* store a(i,i1) in RA1 for use in next loop over K +* + RA1 = AB( I-I1+KA1, I1 ) + END IF +* +* Generate and apply vectors of rotations to chase all the +* existing bulges KA positions down toward the bottom of the +* band +* + DO 130 K = 1, KB - 1 + IF( UPDATE ) THEN +* +* Determine the rotations which would annihilate the bulge +* which has in theory just been created +* + IF( I-K+KA.LT.N .AND. I-K.GT.1 ) THEN +* +* generate rotation to annihilate a(i,i-k+ka+1) +* + CALL CLARTG( AB( K+1, I-K+KA ), RA1, + $ RWORK( I-K+KA-M ), WORK( I-K+KA-M ), RA ) +* +* create nonzero element a(i-k,i-k+ka+1) outside the +* band and store it in WORK(i-k) +* + T = -BB( KB1-K, I )*RA1 + WORK( I-K ) = RWORK( I-K+KA-M )*T - + $ CONJG( WORK( I-K+KA-M ) )* + $ AB( 1, I-K+KA ) + AB( 1, I-K+KA ) = WORK( I-K+KA-M )*T + + $ RWORK( I-K+KA-M )*AB( 1, I-K+KA ) + RA1 = RA + END IF + END IF + J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1 + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + IF( UPDATE ) THEN + J2T = MAX( J2, I+2*KA-K+1 ) + ELSE + J2T = J2 + END IF + NRT = ( N-J2T+KA ) / KA1 + DO 90 J = J2T, J1, KA1 +* +* create nonzero element a(j-ka,j+1) outside the band +* and store it in WORK(j-m) +* + WORK( J-M ) = WORK( J-M )*AB( 1, J+1 ) + AB( 1, J+1 ) = RWORK( J-M )*AB( 1, J+1 ) + 90 CONTINUE +* +* generate rotations in 1st set to annihilate elements which +* have been created outside the band +* + IF( NRT.GT.0 ) + $ CALL CLARGV( NRT, AB( 1, J2T ), INCA, WORK( J2T-M ), KA1, + $ RWORK( J2T-M ), KA1 ) + IF( NR.GT.0 ) THEN +* +* apply rotations in 1st set from the right +* + DO 100 L = 1, KA - 1 + CALL CLARTV( NR, AB( KA1-L, J2 ), INCA, + $ AB( KA-L, J2+1 ), INCA, RWORK( J2-M ), + $ WORK( J2-M ), KA1 ) + 100 CONTINUE +* +* apply rotations in 1st set from both sides to diagonal +* blocks +* + CALL CLAR2V( NR, AB( KA1, J2 ), AB( KA1, J2+1 ), + $ AB( KA, J2+1 ), INCA, RWORK( J2-M ), + $ WORK( J2-M ), KA1 ) +* + CALL CLACGV( NR, WORK( J2-M ), KA1 ) + END IF +* +* start applying rotations in 1st set from the left +* + DO 110 L = KA - 1, KB - K + 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL CLARTV( NRT, AB( L, J2+KA1-L ), INCA, + $ AB( L+1, J2+KA1-L ), INCA, RWORK( J2-M ), + $ WORK( J2-M ), KA1 ) + 110 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 1st set +* + DO 120 J = J2, J1, KA1 + CALL CROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1, + $ RWORK( J-M ), CONJG( WORK( J-M ) ) ) + 120 CONTINUE + END IF + 130 CONTINUE +* + IF( UPDATE ) THEN + IF( I2.LE.N .AND. KBT.GT.0 ) THEN +* +* create nonzero element a(i-kbt,i-kbt+ka+1) outside the +* band and store it in WORK(i-kbt) +* + WORK( I-KBT ) = -BB( KB1-KBT, I )*RA1 + END IF + END IF +* + DO 170 K = KB, 1, -1 + IF( UPDATE ) THEN + J2 = I - K - 1 + MAX( 2, K-I0+1 )*KA1 + ELSE + J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1 + END IF +* +* finish applying rotations in 2nd set from the left +* + DO 140 L = KB - K, 1, -1 + NRT = ( N-J2+KA+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL CLARTV( NRT, AB( L, J2-L+1 ), INCA, + $ AB( L+1, J2-L+1 ), INCA, RWORK( J2-KA ), + $ WORK( J2-KA ), KA1 ) + 140 CONTINUE + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + DO 150 J = J1, J2, -KA1 + WORK( J ) = WORK( J-KA ) + RWORK( J ) = RWORK( J-KA ) + 150 CONTINUE + DO 160 J = J2, J1, KA1 +* +* create nonzero element a(j-ka,j+1) outside the band +* and store it in WORK(j) +* + WORK( J ) = WORK( J )*AB( 1, J+1 ) + AB( 1, J+1 ) = RWORK( J )*AB( 1, J+1 ) + 160 CONTINUE + IF( UPDATE ) THEN + IF( I-K.LT.N-KA .AND. K.LE.KBT ) + $ WORK( I-K+KA ) = WORK( I-K ) + END IF + 170 CONTINUE +* + DO 210 K = KB, 1, -1 + J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1 + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + IF( NR.GT.0 ) THEN +* +* generate rotations in 2nd set to annihilate elements +* which have been created outside the band +* + CALL CLARGV( NR, AB( 1, J2 ), INCA, WORK( J2 ), KA1, + $ RWORK( J2 ), KA1 ) +* +* apply rotations in 2nd set from the right +* + DO 180 L = 1, KA - 1 + CALL CLARTV( NR, AB( KA1-L, J2 ), INCA, + $ AB( KA-L, J2+1 ), INCA, RWORK( J2 ), + $ WORK( J2 ), KA1 ) + 180 CONTINUE +* +* apply rotations in 2nd set from both sides to diagonal +* blocks +* + CALL CLAR2V( NR, AB( KA1, J2 ), AB( KA1, J2+1 ), + $ AB( KA, J2+1 ), INCA, RWORK( J2 ), + $ WORK( J2 ), KA1 ) +* + CALL CLACGV( NR, WORK( J2 ), KA1 ) + END IF +* +* start applying rotations in 2nd set from the left +* + DO 190 L = KA - 1, KB - K + 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL CLARTV( NRT, AB( L, J2+KA1-L ), INCA, + $ AB( L+1, J2+KA1-L ), INCA, RWORK( J2 ), + $ WORK( J2 ), KA1 ) + 190 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 2nd set +* + DO 200 J = J2, J1, KA1 + CALL CROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1, + $ RWORK( J ), CONJG( WORK( J ) ) ) + 200 CONTINUE + END IF + 210 CONTINUE +* + DO 230 K = 1, KB - 1 + J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1 +* +* finish applying rotations in 1st set from the left +* + DO 220 L = KB - K, 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL CLARTV( NRT, AB( L, J2+KA1-L ), INCA, + $ AB( L+1, J2+KA1-L ), INCA, RWORK( J2-M ), + $ WORK( J2-M ), KA1 ) + 220 CONTINUE + 230 CONTINUE +* + IF( KB.GT.1 ) THEN + DO 240 J = N - 1, I2 + KA, -1 + RWORK( J-M ) = RWORK( J-KA-M ) + WORK( J-M ) = WORK( J-KA-M ) + 240 CONTINUE + END IF +* + ELSE +* +* Transform A, working with the lower triangle +* + IF( UPDATE ) THEN +* +* Form inv(S(i))**H * A * inv(S(i)) +* + BII = REAL( BB( 1, I ) ) + AB( 1, I ) = ( REAL( AB( 1, I ) ) / BII ) / BII + DO 250 J = I + 1, I1 + AB( J-I+1, I ) = AB( J-I+1, I ) / BII + 250 CONTINUE + DO 260 J = MAX( 1, I-KA ), I - 1 + AB( I-J+1, J ) = AB( I-J+1, J ) / BII + 260 CONTINUE + DO 290 K = I - KBT, I - 1 + DO 270 J = I - KBT, K + AB( K-J+1, J ) = AB( K-J+1, J ) - + $ BB( I-J+1, J )*CONJG( AB( I-K+1, + $ K ) ) - CONJG( BB( I-K+1, K ) )* + $ AB( I-J+1, J ) + REAL( AB( 1, I ) )* + $ BB( I-J+1, J )*CONJG( BB( I-K+1, + $ K ) ) + 270 CONTINUE + DO 280 J = MAX( 1, I-KA ), I - KBT - 1 + AB( K-J+1, J ) = AB( K-J+1, J ) - + $ CONJG( BB( I-K+1, K ) )* + $ AB( I-J+1, J ) + 280 CONTINUE + 290 CONTINUE + DO 310 J = I, I1 + DO 300 K = MAX( J-KA, I-KBT ), I - 1 + AB( J-K+1, K ) = AB( J-K+1, K ) - + $ BB( I-K+1, K )*AB( J-I+1, I ) + 300 CONTINUE + 310 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by inv(S(i)) +* + CALL CSSCAL( N-M, ONE / BII, X( M+1, I ), 1 ) + IF( KBT.GT.0 ) + $ CALL CGERU( N-M, KBT, -CONE, X( M+1, I ), 1, + $ BB( KBT+1, I-KBT ), LDBB-1, + $ X( M+1, I-KBT ), LDX ) + END IF +* +* store a(i1,i) in RA1 for use in next loop over K +* + RA1 = AB( I1-I+1, I ) + END IF +* +* Generate and apply vectors of rotations to chase all the +* existing bulges KA positions down toward the bottom of the +* band +* + DO 360 K = 1, KB - 1 + IF( UPDATE ) THEN +* +* Determine the rotations which would annihilate the bulge +* which has in theory just been created +* + IF( I-K+KA.LT.N .AND. I-K.GT.1 ) THEN +* +* generate rotation to annihilate a(i-k+ka+1,i) +* + CALL CLARTG( AB( KA1-K, I ), RA1, RWORK( I-K+KA-M ), + $ WORK( I-K+KA-M ), RA ) +* +* create nonzero element a(i-k+ka+1,i-k) outside the +* band and store it in WORK(i-k) +* + T = -BB( K+1, I-K )*RA1 + WORK( I-K ) = RWORK( I-K+KA-M )*T - + $ CONJG( WORK( I-K+KA-M ) )*AB( KA1, I-K ) + AB( KA1, I-K ) = WORK( I-K+KA-M )*T + + $ RWORK( I-K+KA-M )*AB( KA1, I-K ) + RA1 = RA + END IF + END IF + J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1 + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + IF( UPDATE ) THEN + J2T = MAX( J2, I+2*KA-K+1 ) + ELSE + J2T = J2 + END IF + NRT = ( N-J2T+KA ) / KA1 + DO 320 J = J2T, J1, KA1 +* +* create nonzero element a(j+1,j-ka) outside the band +* and store it in WORK(j-m) +* + WORK( J-M ) = WORK( J-M )*AB( KA1, J-KA+1 ) + AB( KA1, J-KA+1 ) = RWORK( J-M )*AB( KA1, J-KA+1 ) + 320 CONTINUE +* +* generate rotations in 1st set to annihilate elements which +* have been created outside the band +* + IF( NRT.GT.0 ) + $ CALL CLARGV( NRT, AB( KA1, J2T-KA ), INCA, WORK( J2T-M ), + $ KA1, RWORK( J2T-M ), KA1 ) + IF( NR.GT.0 ) THEN +* +* apply rotations in 1st set from the left +* + DO 330 L = 1, KA - 1 + CALL CLARTV( NR, AB( L+1, J2-L ), INCA, + $ AB( L+2, J2-L ), INCA, RWORK( J2-M ), + $ WORK( J2-M ), KA1 ) + 330 CONTINUE +* +* apply rotations in 1st set from both sides to diagonal +* blocks +* + CALL CLAR2V( NR, AB( 1, J2 ), AB( 1, J2+1 ), AB( 2, J2 ), + $ INCA, RWORK( J2-M ), WORK( J2-M ), KA1 ) +* + CALL CLACGV( NR, WORK( J2-M ), KA1 ) + END IF +* +* start applying rotations in 1st set from the right +* + DO 340 L = KA - 1, KB - K + 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL CLARTV( NRT, AB( KA1-L+1, J2 ), INCA, + $ AB( KA1-L, J2+1 ), INCA, RWORK( J2-M ), + $ WORK( J2-M ), KA1 ) + 340 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 1st set +* + DO 350 J = J2, J1, KA1 + CALL CROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1, + $ RWORK( J-M ), WORK( J-M ) ) + 350 CONTINUE + END IF + 360 CONTINUE +* + IF( UPDATE ) THEN + IF( I2.LE.N .AND. KBT.GT.0 ) THEN +* +* create nonzero element a(i-kbt+ka+1,i-kbt) outside the +* band and store it in WORK(i-kbt) +* + WORK( I-KBT ) = -BB( KBT+1, I-KBT )*RA1 + END IF + END IF +* + DO 400 K = KB, 1, -1 + IF( UPDATE ) THEN + J2 = I - K - 1 + MAX( 2, K-I0+1 )*KA1 + ELSE + J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1 + END IF +* +* finish applying rotations in 2nd set from the right +* + DO 370 L = KB - K, 1, -1 + NRT = ( N-J2+KA+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL CLARTV( NRT, AB( KA1-L+1, J2-KA ), INCA, + $ AB( KA1-L, J2-KA+1 ), INCA, + $ RWORK( J2-KA ), WORK( J2-KA ), KA1 ) + 370 CONTINUE + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + DO 380 J = J1, J2, -KA1 + WORK( J ) = WORK( J-KA ) + RWORK( J ) = RWORK( J-KA ) + 380 CONTINUE + DO 390 J = J2, J1, KA1 +* +* create nonzero element a(j+1,j-ka) outside the band +* and store it in WORK(j) +* + WORK( J ) = WORK( J )*AB( KA1, J-KA+1 ) + AB( KA1, J-KA+1 ) = RWORK( J )*AB( KA1, J-KA+1 ) + 390 CONTINUE + IF( UPDATE ) THEN + IF( I-K.LT.N-KA .AND. K.LE.KBT ) + $ WORK( I-K+KA ) = WORK( I-K ) + END IF + 400 CONTINUE +* + DO 440 K = KB, 1, -1 + J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1 + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + IF( NR.GT.0 ) THEN +* +* generate rotations in 2nd set to annihilate elements +* which have been created outside the band +* + CALL CLARGV( NR, AB( KA1, J2-KA ), INCA, WORK( J2 ), KA1, + $ RWORK( J2 ), KA1 ) +* +* apply rotations in 2nd set from the left +* + DO 410 L = 1, KA - 1 + CALL CLARTV( NR, AB( L+1, J2-L ), INCA, + $ AB( L+2, J2-L ), INCA, RWORK( J2 ), + $ WORK( J2 ), KA1 ) + 410 CONTINUE +* +* apply rotations in 2nd set from both sides to diagonal +* blocks +* + CALL CLAR2V( NR, AB( 1, J2 ), AB( 1, J2+1 ), AB( 2, J2 ), + $ INCA, RWORK( J2 ), WORK( J2 ), KA1 ) +* + CALL CLACGV( NR, WORK( J2 ), KA1 ) + END IF +* +* start applying rotations in 2nd set from the right +* + DO 420 L = KA - 1, KB - K + 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL CLARTV( NRT, AB( KA1-L+1, J2 ), INCA, + $ AB( KA1-L, J2+1 ), INCA, RWORK( J2 ), + $ WORK( J2 ), KA1 ) + 420 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 2nd set +* + DO 430 J = J2, J1, KA1 + CALL CROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1, + $ RWORK( J ), WORK( J ) ) + 430 CONTINUE + END IF + 440 CONTINUE +* + DO 460 K = 1, KB - 1 + J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1 +* +* finish applying rotations in 1st set from the right +* + DO 450 L = KB - K, 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL CLARTV( NRT, AB( KA1-L+1, J2 ), INCA, + $ AB( KA1-L, J2+1 ), INCA, RWORK( J2-M ), + $ WORK( J2-M ), KA1 ) + 450 CONTINUE + 460 CONTINUE +* + IF( KB.GT.1 ) THEN + DO 470 J = N - 1, I2 + KA, -1 + RWORK( J-M ) = RWORK( J-KA-M ) + WORK( J-M ) = WORK( J-KA-M ) + 470 CONTINUE + END IF +* + END IF +* + GO TO 10 +* + 480 CONTINUE +* +* **************************** Phase 2 ***************************** +* +* The logical structure of this phase is: +* +* UPDATE = .TRUE. +* DO I = 1, M +* use S(i) to update A and create a new bulge +* apply rotations to push all bulges KA positions upward +* END DO +* UPDATE = .FALSE. +* DO I = M - KA - 1, 2, -1 +* apply rotations to push all bulges KA positions upward +* END DO +* +* To avoid duplicating code, the two loops are merged. +* + UPDATE = .TRUE. + I = 0 + 490 CONTINUE + IF( UPDATE ) THEN + I = I + 1 + KBT = MIN( KB, M-I ) + I0 = I + 1 + I1 = MAX( 1, I-KA ) + I2 = I + KBT - KA1 + IF( I.GT.M ) THEN + UPDATE = .FALSE. + I = I - 1 + I0 = M + 1 + IF( KA.EQ.0 ) + $ RETURN + GO TO 490 + END IF + ELSE + I = I - KA + IF( I.LT.2 ) + $ RETURN + END IF +* + IF( I.LT.M-KBT ) THEN + NX = M + ELSE + NX = N + END IF +* + IF( UPPER ) THEN +* +* Transform A, working with the upper triangle +* + IF( UPDATE ) THEN +* +* Form inv(S(i))**H * A * inv(S(i)) +* + BII = REAL( BB( KB1, I ) ) + AB( KA1, I ) = ( REAL( AB( KA1, I ) ) / BII ) / BII + DO 500 J = I1, I - 1 + AB( J-I+KA1, I ) = AB( J-I+KA1, I ) / BII + 500 CONTINUE + DO 510 J = I + 1, MIN( N, I+KA ) + AB( I-J+KA1, J ) = AB( I-J+KA1, J ) / BII + 510 CONTINUE + DO 540 K = I + 1, I + KBT + DO 520 J = K, I + KBT + AB( K-J+KA1, J ) = AB( K-J+KA1, J ) - + $ BB( I-J+KB1, J )* + $ CONJG( AB( I-K+KA1, K ) ) - + $ CONJG( BB( I-K+KB1, K ) )* + $ AB( I-J+KA1, J ) + + $ REAL( AB( KA1, I ) )* + $ BB( I-J+KB1, J )* + $ CONJG( BB( I-K+KB1, K ) ) + 520 CONTINUE + DO 530 J = I + KBT + 1, MIN( N, I+KA ) + AB( K-J+KA1, J ) = AB( K-J+KA1, J ) - + $ CONJG( BB( I-K+KB1, K ) )* + $ AB( I-J+KA1, J ) + 530 CONTINUE + 540 CONTINUE + DO 560 J = I1, I + DO 550 K = I + 1, MIN( J+KA, I+KBT ) + AB( J-K+KA1, K ) = AB( J-K+KA1, K ) - + $ BB( I-K+KB1, K )*AB( J-I+KA1, I ) + 550 CONTINUE + 560 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by inv(S(i)) +* + CALL CSSCAL( NX, ONE / BII, X( 1, I ), 1 ) + IF( KBT.GT.0 ) + $ CALL CGERU( NX, KBT, -CONE, X( 1, I ), 1, + $ BB( KB, I+1 ), LDBB-1, X( 1, I+1 ), LDX ) + END IF +* +* store a(i1,i) in RA1 for use in next loop over K +* + RA1 = AB( I1-I+KA1, I ) + END IF +* +* Generate and apply vectors of rotations to chase all the +* existing bulges KA positions up toward the top of the band +* + DO 610 K = 1, KB - 1 + IF( UPDATE ) THEN +* +* Determine the rotations which would annihilate the bulge +* which has in theory just been created +* + IF( I+K-KA1.GT.0 .AND. I+K.LT.M ) THEN +* +* generate rotation to annihilate a(i+k-ka-1,i) +* + CALL CLARTG( AB( K+1, I ), RA1, RWORK( I+K-KA ), + $ WORK( I+K-KA ), RA ) +* +* create nonzero element a(i+k-ka-1,i+k) outside the +* band and store it in WORK(m-kb+i+k) +* + T = -BB( KB1-K, I+K )*RA1 + WORK( M-KB+I+K ) = RWORK( I+K-KA )*T - + $ CONJG( WORK( I+K-KA ) )* + $ AB( 1, I+K ) + AB( 1, I+K ) = WORK( I+K-KA )*T + + $ RWORK( I+K-KA )*AB( 1, I+K ) + RA1 = RA + END IF + END IF + J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1 + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + IF( UPDATE ) THEN + J2T = MIN( J2, I-2*KA+K-1 ) + ELSE + J2T = J2 + END IF + NRT = ( J2T+KA-1 ) / KA1 + DO 570 J = J1, J2T, KA1 +* +* create nonzero element a(j-1,j+ka) outside the band +* and store it in WORK(j) +* + WORK( J ) = WORK( J )*AB( 1, J+KA-1 ) + AB( 1, J+KA-1 ) = RWORK( J )*AB( 1, J+KA-1 ) + 570 CONTINUE +* +* generate rotations in 1st set to annihilate elements which +* have been created outside the band +* + IF( NRT.GT.0 ) + $ CALL CLARGV( NRT, AB( 1, J1+KA ), INCA, WORK( J1 ), KA1, + $ RWORK( J1 ), KA1 ) + IF( NR.GT.0 ) THEN +* +* apply rotations in 1st set from the left +* + DO 580 L = 1, KA - 1 + CALL CLARTV( NR, AB( KA1-L, J1+L ), INCA, + $ AB( KA-L, J1+L ), INCA, RWORK( J1 ), + $ WORK( J1 ), KA1 ) + 580 CONTINUE +* +* apply rotations in 1st set from both sides to diagonal +* blocks +* + CALL CLAR2V( NR, AB( KA1, J1 ), AB( KA1, J1-1 ), + $ AB( KA, J1 ), INCA, RWORK( J1 ), WORK( J1 ), + $ KA1 ) +* + CALL CLACGV( NR, WORK( J1 ), KA1 ) + END IF +* +* start applying rotations in 1st set from the right +* + DO 590 L = KA - 1, KB - K + 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL CLARTV( NRT, AB( L, J1T ), INCA, + $ AB( L+1, J1T-1 ), INCA, RWORK( J1T ), + $ WORK( J1T ), KA1 ) + 590 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 1st set +* + DO 600 J = J1, J2, KA1 + CALL CROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1, + $ RWORK( J ), WORK( J ) ) + 600 CONTINUE + END IF + 610 CONTINUE +* + IF( UPDATE ) THEN + IF( I2.GT.0 .AND. KBT.GT.0 ) THEN +* +* create nonzero element a(i+kbt-ka-1,i+kbt) outside the +* band and store it in WORK(m-kb+i+kbt) +* + WORK( M-KB+I+KBT ) = -BB( KB1-KBT, I+KBT )*RA1 + END IF + END IF +* + DO 650 K = KB, 1, -1 + IF( UPDATE ) THEN + J2 = I + K + 1 - MAX( 2, K+I0-M )*KA1 + ELSE + J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1 + END IF +* +* finish applying rotations in 2nd set from the right +* + DO 620 L = KB - K, 1, -1 + NRT = ( J2+KA+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL CLARTV( NRT, AB( L, J1T+KA ), INCA, + $ AB( L+1, J1T+KA-1 ), INCA, + $ RWORK( M-KB+J1T+KA ), + $ WORK( M-KB+J1T+KA ), KA1 ) + 620 CONTINUE + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + DO 630 J = J1, J2, KA1 + WORK( M-KB+J ) = WORK( M-KB+J+KA ) + RWORK( M-KB+J ) = RWORK( M-KB+J+KA ) + 630 CONTINUE + DO 640 J = J1, J2, KA1 +* +* create nonzero element a(j-1,j+ka) outside the band +* and store it in WORK(m-kb+j) +* + WORK( M-KB+J ) = WORK( M-KB+J )*AB( 1, J+KA-1 ) + AB( 1, J+KA-1 ) = RWORK( M-KB+J )*AB( 1, J+KA-1 ) + 640 CONTINUE + IF( UPDATE ) THEN + IF( I+K.GT.KA1 .AND. K.LE.KBT ) + $ WORK( M-KB+I+K-KA ) = WORK( M-KB+I+K ) + END IF + 650 CONTINUE +* + DO 690 K = KB, 1, -1 + J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1 + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + IF( NR.GT.0 ) THEN +* +* generate rotations in 2nd set to annihilate elements +* which have been created outside the band +* + CALL CLARGV( NR, AB( 1, J1+KA ), INCA, WORK( M-KB+J1 ), + $ KA1, RWORK( M-KB+J1 ), KA1 ) +* +* apply rotations in 2nd set from the left +* + DO 660 L = 1, KA - 1 + CALL CLARTV( NR, AB( KA1-L, J1+L ), INCA, + $ AB( KA-L, J1+L ), INCA, RWORK( M-KB+J1 ), + $ WORK( M-KB+J1 ), KA1 ) + 660 CONTINUE +* +* apply rotations in 2nd set from both sides to diagonal +* blocks +* + CALL CLAR2V( NR, AB( KA1, J1 ), AB( KA1, J1-1 ), + $ AB( KA, J1 ), INCA, RWORK( M-KB+J1 ), + $ WORK( M-KB+J1 ), KA1 ) +* + CALL CLACGV( NR, WORK( M-KB+J1 ), KA1 ) + END IF +* +* start applying rotations in 2nd set from the right +* + DO 670 L = KA - 1, KB - K + 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL CLARTV( NRT, AB( L, J1T ), INCA, + $ AB( L+1, J1T-1 ), INCA, + $ RWORK( M-KB+J1T ), WORK( M-KB+J1T ), + $ KA1 ) + 670 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 2nd set +* + DO 680 J = J1, J2, KA1 + CALL CROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1, + $ RWORK( M-KB+J ), WORK( M-KB+J ) ) + 680 CONTINUE + END IF + 690 CONTINUE +* + DO 710 K = 1, KB - 1 + J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1 +* +* finish applying rotations in 1st set from the right +* + DO 700 L = KB - K, 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL CLARTV( NRT, AB( L, J1T ), INCA, + $ AB( L+1, J1T-1 ), INCA, RWORK( J1T ), + $ WORK( J1T ), KA1 ) + 700 CONTINUE + 710 CONTINUE +* + IF( KB.GT.1 ) THEN + DO 720 J = 2, I2 - KA + RWORK( J ) = RWORK( J+KA ) + WORK( J ) = WORK( J+KA ) + 720 CONTINUE + END IF +* + ELSE +* +* Transform A, working with the lower triangle +* + IF( UPDATE ) THEN +* +* Form inv(S(i))**H * A * inv(S(i)) +* + BII = REAL( BB( 1, I ) ) + AB( 1, I ) = ( REAL( AB( 1, I ) ) / BII ) / BII + DO 730 J = I1, I - 1 + AB( I-J+1, J ) = AB( I-J+1, J ) / BII + 730 CONTINUE + DO 740 J = I + 1, MIN( N, I+KA ) + AB( J-I+1, I ) = AB( J-I+1, I ) / BII + 740 CONTINUE + DO 770 K = I + 1, I + KBT + DO 750 J = K, I + KBT + AB( J-K+1, K ) = AB( J-K+1, K ) - + $ BB( J-I+1, I )*CONJG( AB( K-I+1, + $ I ) ) - CONJG( BB( K-I+1, I ) )* + $ AB( J-I+1, I ) + REAL( AB( 1, I ) )* + $ BB( J-I+1, I )*CONJG( BB( K-I+1, + $ I ) ) + 750 CONTINUE + DO 760 J = I + KBT + 1, MIN( N, I+KA ) + AB( J-K+1, K ) = AB( J-K+1, K ) - + $ CONJG( BB( K-I+1, I ) )* + $ AB( J-I+1, I ) + 760 CONTINUE + 770 CONTINUE + DO 790 J = I1, I + DO 780 K = I + 1, MIN( J+KA, I+KBT ) + AB( K-J+1, J ) = AB( K-J+1, J ) - + $ BB( K-I+1, I )*AB( I-J+1, J ) + 780 CONTINUE + 790 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by inv(S(i)) +* + CALL CSSCAL( NX, ONE / BII, X( 1, I ), 1 ) + IF( KBT.GT.0 ) + $ CALL CGERC( NX, KBT, -CONE, X( 1, I ), 1, BB( 2, I ), + $ 1, X( 1, I+1 ), LDX ) + END IF +* +* store a(i,i1) in RA1 for use in next loop over K +* + RA1 = AB( I-I1+1, I1 ) + END IF +* +* Generate and apply vectors of rotations to chase all the +* existing bulges KA positions up toward the top of the band +* + DO 840 K = 1, KB - 1 + IF( UPDATE ) THEN +* +* Determine the rotations which would annihilate the bulge +* which has in theory just been created +* + IF( I+K-KA1.GT.0 .AND. I+K.LT.M ) THEN +* +* generate rotation to annihilate a(i,i+k-ka-1) +* + CALL CLARTG( AB( KA1-K, I+K-KA ), RA1, + $ RWORK( I+K-KA ), WORK( I+K-KA ), RA ) +* +* create nonzero element a(i+k,i+k-ka-1) outside the +* band and store it in WORK(m-kb+i+k) +* + T = -BB( K+1, I )*RA1 + WORK( M-KB+I+K ) = RWORK( I+K-KA )*T - + $ CONJG( WORK( I+K-KA ) )* + $ AB( KA1, I+K-KA ) + AB( KA1, I+K-KA ) = WORK( I+K-KA )*T + + $ RWORK( I+K-KA )*AB( KA1, I+K-KA ) + RA1 = RA + END IF + END IF + J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1 + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + IF( UPDATE ) THEN + J2T = MIN( J2, I-2*KA+K-1 ) + ELSE + J2T = J2 + END IF + NRT = ( J2T+KA-1 ) / KA1 + DO 800 J = J1, J2T, KA1 +* +* create nonzero element a(j+ka,j-1) outside the band +* and store it in WORK(j) +* + WORK( J ) = WORK( J )*AB( KA1, J-1 ) + AB( KA1, J-1 ) = RWORK( J )*AB( KA1, J-1 ) + 800 CONTINUE +* +* generate rotations in 1st set to annihilate elements which +* have been created outside the band +* + IF( NRT.GT.0 ) + $ CALL CLARGV( NRT, AB( KA1, J1 ), INCA, WORK( J1 ), KA1, + $ RWORK( J1 ), KA1 ) + IF( NR.GT.0 ) THEN +* +* apply rotations in 1st set from the right +* + DO 810 L = 1, KA - 1 + CALL CLARTV( NR, AB( L+1, J1 ), INCA, AB( L+2, J1-1 ), + $ INCA, RWORK( J1 ), WORK( J1 ), KA1 ) + 810 CONTINUE +* +* apply rotations in 1st set from both sides to diagonal +* blocks +* + CALL CLAR2V( NR, AB( 1, J1 ), AB( 1, J1-1 ), + $ AB( 2, J1-1 ), INCA, RWORK( J1 ), + $ WORK( J1 ), KA1 ) +* + CALL CLACGV( NR, WORK( J1 ), KA1 ) + END IF +* +* start applying rotations in 1st set from the left +* + DO 820 L = KA - 1, KB - K + 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL CLARTV( NRT, AB( KA1-L+1, J1T-KA1+L ), INCA, + $ AB( KA1-L, J1T-KA1+L ), INCA, + $ RWORK( J1T ), WORK( J1T ), KA1 ) + 820 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 1st set +* + DO 830 J = J1, J2, KA1 + CALL CROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1, + $ RWORK( J ), CONJG( WORK( J ) ) ) + 830 CONTINUE + END IF + 840 CONTINUE +* + IF( UPDATE ) THEN + IF( I2.GT.0 .AND. KBT.GT.0 ) THEN +* +* create nonzero element a(i+kbt,i+kbt-ka-1) outside the +* band and store it in WORK(m-kb+i+kbt) +* + WORK( M-KB+I+KBT ) = -BB( KBT+1, I )*RA1 + END IF + END IF +* + DO 880 K = KB, 1, -1 + IF( UPDATE ) THEN + J2 = I + K + 1 - MAX( 2, K+I0-M )*KA1 + ELSE + J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1 + END IF +* +* finish applying rotations in 2nd set from the left +* + DO 850 L = KB - K, 1, -1 + NRT = ( J2+KA+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL CLARTV( NRT, AB( KA1-L+1, J1T+L-1 ), INCA, + $ AB( KA1-L, J1T+L-1 ), INCA, + $ RWORK( M-KB+J1T+KA ), + $ WORK( M-KB+J1T+KA ), KA1 ) + 850 CONTINUE + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + DO 860 J = J1, J2, KA1 + WORK( M-KB+J ) = WORK( M-KB+J+KA ) + RWORK( M-KB+J ) = RWORK( M-KB+J+KA ) + 860 CONTINUE + DO 870 J = J1, J2, KA1 +* +* create nonzero element a(j+ka,j-1) outside the band +* and store it in WORK(m-kb+j) +* + WORK( M-KB+J ) = WORK( M-KB+J )*AB( KA1, J-1 ) + AB( KA1, J-1 ) = RWORK( M-KB+J )*AB( KA1, J-1 ) + 870 CONTINUE + IF( UPDATE ) THEN + IF( I+K.GT.KA1 .AND. K.LE.KBT ) + $ WORK( M-KB+I+K-KA ) = WORK( M-KB+I+K ) + END IF + 880 CONTINUE +* + DO 920 K = KB, 1, -1 + J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1 + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + IF( NR.GT.0 ) THEN +* +* generate rotations in 2nd set to annihilate elements +* which have been created outside the band +* + CALL CLARGV( NR, AB( KA1, J1 ), INCA, WORK( M-KB+J1 ), + $ KA1, RWORK( M-KB+J1 ), KA1 ) +* +* apply rotations in 2nd set from the right +* + DO 890 L = 1, KA - 1 + CALL CLARTV( NR, AB( L+1, J1 ), INCA, AB( L+2, J1-1 ), + $ INCA, RWORK( M-KB+J1 ), WORK( M-KB+J1 ), + $ KA1 ) + 890 CONTINUE +* +* apply rotations in 2nd set from both sides to diagonal +* blocks +* + CALL CLAR2V( NR, AB( 1, J1 ), AB( 1, J1-1 ), + $ AB( 2, J1-1 ), INCA, RWORK( M-KB+J1 ), + $ WORK( M-KB+J1 ), KA1 ) +* + CALL CLACGV( NR, WORK( M-KB+J1 ), KA1 ) + END IF +* +* start applying rotations in 2nd set from the left +* + DO 900 L = KA - 1, KB - K + 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL CLARTV( NRT, AB( KA1-L+1, J1T-KA1+L ), INCA, + $ AB( KA1-L, J1T-KA1+L ), INCA, + $ RWORK( M-KB+J1T ), WORK( M-KB+J1T ), + $ KA1 ) + 900 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 2nd set +* + DO 910 J = J1, J2, KA1 + CALL CROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1, + $ RWORK( M-KB+J ), CONJG( WORK( M-KB+J ) ) ) + 910 CONTINUE + END IF + 920 CONTINUE +* + DO 940 K = 1, KB - 1 + J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1 +* +* finish applying rotations in 1st set from the left +* + DO 930 L = KB - K, 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL CLARTV( NRT, AB( KA1-L+1, J1T-KA1+L ), INCA, + $ AB( KA1-L, J1T-KA1+L ), INCA, + $ RWORK( J1T ), WORK( J1T ), KA1 ) + 930 CONTINUE + 940 CONTINUE +* + IF( KB.GT.1 ) THEN + DO 950 J = 2, I2 - KA + RWORK( J ) = RWORK( J+KA ) + WORK( J ) = WORK( J+KA ) + 950 CONTINUE + END IF +* + END IF +* + GO TO 490 +* +* End of CHBGST +* + END diff --git a/costa/native/external/lapack/chbgv.f b/costa/native/external/lapack/chbgv.f new file mode 100644 index 000000000..4172aa683 --- /dev/null +++ b/costa/native/external/lapack/chbgv.f @@ -0,0 +1,192 @@ + SUBROUTINE CHBGV( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, Z, + $ LDZ, WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KA, KB, LDAB, LDBB, LDZ, N +* .. +* .. Array Arguments .. + REAL RWORK( * ), W( * ) + COMPLEX AB( LDAB, * ), BB( LDBB, * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* CHBGV computes all the eigenvalues, and optionally, the eigenvectors +* of a complex generalized Hermitian-definite banded eigenproblem, of +* the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian +* and banded, and B is also positive definite. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangles of A and B are stored; +* = 'L': Lower triangles of A and B are stored. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* KA (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KA >= 0. +* +* KB (input) INTEGER +* The number of superdiagonals of the matrix B if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KB >= 0. +* +* AB (input/output) COMPLEX array, dimension (LDAB, N) +* On entry, the upper or lower triangle of the Hermitian band +* matrix A, stored in the first ka+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). +* +* On exit, the contents of AB are destroyed. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KA+1. +* +* BB (input/output) COMPLEX array, dimension (LDBB, N) +* On entry, the upper or lower triangle of the Hermitian band +* matrix B, stored in the first kb+1 rows of the array. The +* j-th column of B is stored in the j-th column of the array BB +* as follows: +* if UPLO = 'U', BB(kb+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j; +* if UPLO = 'L', BB(1+i-j,j) = B(i,j) for j<=i<=min(n,j+kb). +* +* On exit, the factor S from the split Cholesky factorization +* B = S**H*S, as returned by CPBSTF. +* +* LDBB (input) INTEGER +* The leading dimension of the array BB. LDBB >= KB+1. +* +* W (output) REAL array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* Z (output) COMPLEX array, dimension (LDZ, N) +* If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of +* eigenvectors, with the i-th column of Z holding the +* eigenvector associated with W(i). The eigenvectors are +* normalized so that Z**H*B*Z = I. +* If JOBZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= N. +* +* WORK (workspace) COMPLEX array, dimension (N) +* +* RWORK (workspace) REAL array, dimension (3*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is: +* <= N: the algorithm failed to converge: +* i off-diagonal elements of an intermediate +* tridiagonal form did not converge to zero; +* > N: if INFO = N + i, for 1 <= i <= N, then CPBSTF +* returned INFO = i: B is not positive definite. +* The factorization of B could not be completed and +* no eigenvalues or eigenvectors were computed. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER, WANTZ + CHARACTER VECT + INTEGER IINFO, INDE, INDWRK +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CHBGST, CHBTRD, CPBSTF, CSTEQR, SSTERF, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KA.LT.0 ) THEN + INFO = -4 + ELSE IF( KB.LT.0 .OR. KB.GT.KA ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KA+1 ) THEN + INFO = -7 + ELSE IF( LDBB.LT.KB+1 ) THEN + INFO = -9 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHBGV ', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a split Cholesky factorization of B. +* + CALL CPBSTF( UPLO, N, KB, BB, LDBB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem. +* + INDE = 1 + INDWRK = INDE + N + CALL CHBGST( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, Z, LDZ, + $ WORK, RWORK( INDWRK ), IINFO ) +* +* Reduce to tridiagonal form. +* + IF( WANTZ ) THEN + VECT = 'U' + ELSE + VECT = 'N' + END IF + CALL CHBTRD( VECT, UPLO, N, KA, AB, LDAB, W, RWORK( INDE ), Z, + $ LDZ, WORK, IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, call CSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + CALL CSTEQR( JOBZ, N, W, RWORK( INDE ), Z, LDZ, + $ RWORK( INDWRK ), INFO ) + END IF + RETURN +* +* End of CHBGV +* + END diff --git a/costa/native/external/lapack/chbgvd.f b/costa/native/external/lapack/chbgvd.f new file mode 100644 index 000000000..db4917efc --- /dev/null +++ b/costa/native/external/lapack/chbgvd.f @@ -0,0 +1,295 @@ + SUBROUTINE CHBGVD( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, + $ Z, LDZ, WORK, LWORK, RWORK, LRWORK, IWORK, + $ LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KA, KB, LDAB, LDBB, LDZ, LIWORK, LRWORK, + $ LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL RWORK( * ), W( * ) + COMPLEX AB( LDAB, * ), BB( LDBB, * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* CHBGVD computes all the eigenvalues, and optionally, the eigenvectors +* of a complex generalized Hermitian-definite banded eigenproblem, of +* the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian +* and banded, and B is also positive definite. If eigenvectors are +* desired, it uses a divide and conquer algorithm. +* +* The divide and conquer algorithm makes very mild assumptions about +* floating point arithmetic. It will work on machines with a guard +* digit in add/subtract, or on those binary machines without guard +* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +* Cray-2. It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangles of A and B are stored; +* = 'L': Lower triangles of A and B are stored. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* KA (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KA >= 0. +* +* KB (input) INTEGER +* The number of superdiagonals of the matrix B if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KB >= 0. +* +* AB (input/output) COMPLEX array, dimension (LDAB, N) +* On entry, the upper or lower triangle of the Hermitian band +* matrix A, stored in the first ka+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). +* +* On exit, the contents of AB are destroyed. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KA+1. +* +* BB (input/output) COMPLEX array, dimension (LDBB, N) +* On entry, the upper or lower triangle of the Hermitian band +* matrix B, stored in the first kb+1 rows of the array. The +* j-th column of B is stored in the j-th column of the array BB +* as follows: +* if UPLO = 'U', BB(kb+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j; +* if UPLO = 'L', BB(1+i-j,j) = B(i,j) for j<=i<=min(n,j+kb). +* +* On exit, the factor S from the split Cholesky factorization +* B = S**H*S, as returned by CPBSTF. +* +* LDBB (input) INTEGER +* The leading dimension of the array BB. LDBB >= KB+1. +* +* W (output) REAL array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* Z (output) COMPLEX array, dimension (LDZ, N) +* If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of +* eigenvectors, with the i-th column of Z holding the +* eigenvector associated with W(i). The eigenvectors are +* normalized so that Z**H*B*Z = I. +* If JOBZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= N. +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO=0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If N <= 1, LWORK >= 1. +* If JOBZ = 'N' and N > 1, LWORK >= N. +* If JOBZ = 'V' and N > 1, LWORK >= 2*N**2. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace/output) REAL array, dimension (LRWORK) +* On exit, if INFO=0, RWORK(1) returns the optimal LRWORK. +* +* LRWORK (input) INTEGER +* The dimension of array RWORK. +* If N <= 1, LRWORK >= 1. +* If JOBZ = 'N' and N > 1, LRWORK >= N. +* If JOBZ = 'V' and N > 1, LRWORK >= 1 + 5*N + 2*N**2. +* +* If LRWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the RWORK array, +* returns this value as the first entry of the RWORK array, and +* no error message related to LRWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* On exit, if INFO=0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of array IWORK. +* If JOBZ = 'N' or N <= 1, LIWORK >= 1. +* If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is: +* <= N: the algorithm failed to converge: +* i off-diagonal elements of an intermediate +* tridiagonal form did not converge to zero; +* > N: if INFO = N + i, for 1 <= i <= N, then CPBSTF +* returned INFO = i: B is not positive definite. +* The factorization of B could not be completed and +* no eigenvalues or eigenvectors were computed. +* +* Further Details +* =============== +* +* Based on contributions by +* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CONE, CZERO + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ), + $ CZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER VECT + INTEGER IINFO, INDE, INDWK2, INDWRK, LIWMIN, LLRWK, + $ LLWK2, LRWMIN, LWMIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CGEMM, CHBGST, CHBTRD, CLACPY, CPBSTF, CSTEDC, + $ SSTERF, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LWMIN = 1 + LRWMIN = 1 + LIWMIN = 1 + ELSE + IF( WANTZ ) THEN + LWMIN = 2*N**2 + LRWMIN = 1 + 5*N + 2*N**2 + LIWMIN = 3 + 5*N + ELSE + LWMIN = N + LRWMIN = N + LIWMIN = 1 + END IF + END IF + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KA.LT.0 ) THEN + INFO = -4 + ELSE IF( KB.LT.0 .OR. KB.GT.KA ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KA+1 ) THEN + INFO = -7 + ELSE IF( LDBB.LT.KB+1 ) THEN + INFO = -9 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -12 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -14 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -16 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -18 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHBGVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a split Cholesky factorization of B. +* + CALL CPBSTF( UPLO, N, KB, BB, LDBB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem. +* + INDE = 1 + INDWRK = INDE + N + INDWK2 = 1 + N*N + LLWK2 = LWORK - INDWK2 + 2 + LLRWK = LRWORK - INDWRK + 2 + CALL CHBGST( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, Z, LDZ, + $ WORK, RWORK( INDWRK ), IINFO ) +* +* Reduce Hermitian band matrix to tridiagonal form. +* + IF( WANTZ ) THEN + VECT = 'U' + ELSE + VECT = 'N' + END IF + CALL CHBTRD( VECT, UPLO, N, KA, AB, LDAB, W, RWORK( INDE ), Z, + $ LDZ, WORK, IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, call CSTEDC. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + CALL CSTEDC( 'I', N, W, RWORK( INDE ), WORK, N, WORK( INDWK2 ), + $ LLWK2, RWORK( INDWRK ), LLRWK, IWORK, LIWORK, + $ INFO ) + CALL CGEMM( 'N', 'N', N, N, N, CONE, Z, LDZ, WORK, N, CZERO, + $ WORK( INDWK2 ), N ) + CALL CLACPY( 'A', N, N, WORK( INDWK2 ), N, Z, LDZ ) + END IF +* + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of CHBGVD +* + END diff --git a/costa/native/external/lapack/chbgvx.f b/costa/native/external/lapack/chbgvx.f new file mode 100644 index 000000000..32b5f7381 --- /dev/null +++ b/costa/native/external/lapack/chbgvx.f @@ -0,0 +1,374 @@ + SUBROUTINE CHBGVX( JOBZ, RANGE, UPLO, N, KA, KB, AB, LDAB, BB, + $ LDBB, Q, LDQ, VL, VU, IL, IU, ABSTOL, M, W, Z, + $ LDZ, WORK, RWORK, IWORK, IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, KA, KB, LDAB, LDBB, LDQ, LDZ, M, + $ N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + REAL RWORK( * ), W( * ) + COMPLEX AB( LDAB, * ), BB( LDBB, * ), Q( LDQ, * ), + $ WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* CHBGVX computes all the eigenvalues, and optionally, the eigenvectors +* of a complex generalized Hermitian-definite banded eigenproblem, of +* the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian +* and banded, and B is also positive definite. Eigenvalues and +* eigenvectors can be selected by specifying either all eigenvalues, +* a range of values or a range of indices for the desired eigenvalues. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* RANGE (input) CHARACTER*1 +* = 'A': all eigenvalues will be found; +* = 'V': all eigenvalues in the half-open interval (VL,VU] +* will be found; +* = 'I': the IL-th through IU-th eigenvalues will be found. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangles of A and B are stored; +* = 'L': Lower triangles of A and B are stored. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* KA (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KA >= 0. +* +* KB (input) INTEGER +* The number of superdiagonals of the matrix B if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KB >= 0. +* +* AB (input/output) COMPLEX array, dimension (LDAB, N) +* On entry, the upper or lower triangle of the Hermitian band +* matrix A, stored in the first ka+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). +* +* On exit, the contents of AB are destroyed. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KA+1. +* +* BB (input/output) COMPLEX array, dimension (LDBB, N) +* On entry, the upper or lower triangle of the Hermitian band +* matrix B, stored in the first kb+1 rows of the array. The +* j-th column of B is stored in the j-th column of the array BB +* as follows: +* if UPLO = 'U', BB(kb+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j; +* if UPLO = 'L', BB(1+i-j,j) = B(i,j) for j<=i<=min(n,j+kb). +* +* On exit, the factor S from the split Cholesky factorization +* B = S**H*S, as returned by CPBSTF. +* +* LDBB (input) INTEGER +* The leading dimension of the array BB. LDBB >= KB+1. +* +* Q (output) COMPLEX array, dimension (LDQ, N) +* If JOBZ = 'V', the n-by-n matrix used in the reduction of +* A*x = (lambda)*B*x to standard form, i.e. C*x = (lambda)*x, +* and consequently C to tridiagonal form. +* If JOBZ = 'N', the array Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. If JOBZ = 'N', +* LDQ >= 1. If JOBZ = 'V', LDQ >= max(1,N). +* +* VL (input) REAL +* VU (input) REAL +* If RANGE='V', the lower and upper bounds of the interval to +* be searched for eigenvalues. VL < VU. +* Not referenced if RANGE = 'A' or 'I'. +* +* IL (input) INTEGER +* IU (input) INTEGER +* If RANGE='I', the indices (in ascending order) of the +* smallest and largest eigenvalues to be returned. +* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +* Not referenced if RANGE = 'A' or 'V'. +* +* ABSTOL (input) REAL +* The absolute error tolerance for the eigenvalues. +* An approximate eigenvalue is accepted as converged +* when it is determined to lie in an interval [a,b] +* of width less than or equal to +* +* ABSTOL + EPS * max( |a|,|b| ) , +* +* where EPS is the machine precision. If ABSTOL is less than +* or equal to zero, then EPS*|T| will be used in its place, +* where |T| is the 1-norm of the tridiagonal matrix obtained +* by reducing AP to tridiagonal form. +* +* Eigenvalues will be computed most accurately when ABSTOL is +* set to twice the underflow threshold 2*SLAMCH('S'), not zero. +* If this routine returns with INFO>0, indicating that some +* eigenvectors did not converge, try setting ABSTOL to +* 2*SLAMCH('S'). +* +* M (output) INTEGER +* The total number of eigenvalues found. 0 <= M <= N. +* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +* +* W (output) REAL array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* Z (output) COMPLEX array, dimension (LDZ, N) +* If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of +* eigenvectors, with the i-th column of Z holding the +* eigenvector associated with W(i). The eigenvectors are +* normalized so that Z**H*B*Z = I. +* If JOBZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= N. +* +* WORK (workspace) COMPLEX array, dimension (N) +* +* RWORK (workspace) REAL array, dimension (7*N) +* +* IWORK (workspace) INTEGER array, dimension (5*N) +* +* IFAIL (output) INTEGER array, dimension (N) +* If JOBZ = 'V', then if INFO = 0, the first M elements of +* IFAIL are zero. If INFO > 0, then IFAIL contains the +* indices of the eigenvectors that failed to converge. +* If JOBZ = 'N', then IFAIL is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is: +* <= N: then i eigenvectors failed to converge. Their +* indices are stored in array IFAIL. +* > N: if INFO = N + i, for 1 <= i <= N, then CPBSTF +* returned INFO = i: B is not positive definite. +* The factorization of B could not be completed and +* no eigenvalues or eigenvectors were computed. +* +* Further Details +* =============== +* +* Based on contributions by +* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, UPPER, VALEIG, WANTZ + CHARACTER ORDER, VECT + INTEGER I, IINFO, INDD, INDE, INDEE, INDIBL, INDISP, + $ INDIWK, INDRWK, INDWRK, ITMP1, J, JJ, NSPLIT + REAL TMP1 +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGEMV, CHBGST, CHBTRD, CLACPY, CPBSTF, + $ CSTEIN, CSTEQR, CSWAP, SCOPY, SSTEBZ, SSTERF, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KA.LT.0 ) THEN + INFO = -5 + ELSE IF( KB.LT.0 .OR. KB.GT.KA ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KA+1 ) THEN + INFO = -8 + ELSE IF( LDBB.LT.KB+1 ) THEN + INFO = -10 + ELSE IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN + INFO = -12 + ELSE IF( INDEIG .AND. IL.LT.1 ) THEN + INFO = -13 + ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) THEN + INFO = -14 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -19 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHBGVX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a split Cholesky factorization of B. +* + CALL CPBSTF( UPLO, N, KB, BB, LDBB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem. +* + CALL CHBGST( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, Q, LDQ, + $ WORK, RWORK, IINFO ) +* +* Solve the standard eigenvalue problem. +* Reduce Hermitian band matrix to tridiagonal form. +* + INDD = 1 + INDE = INDD + N + INDRWK = INDE + N + INDWRK = 1 + IF( WANTZ ) THEN + VECT = 'U' + ELSE + VECT = 'N' + END IF + CALL CHBTRD( VECT, UPLO, N, KA, AB, LDAB, RWORK( INDD ), + $ RWORK( INDE ), Q, LDQ, WORK( INDWRK ), IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal +* to zero, then call SSTERF or CSTEQR. If this fails for some +* eigenvalue, then try SSTEBZ. +* + IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND. + $ ( ABSTOL.LE.ZERO ) ) THEN + CALL SCOPY( N, RWORK( INDD ), 1, W, 1 ) + INDEE = INDRWK + 2*N + CALL SCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, RWORK( INDEE ), INFO ) + ELSE + CALL CLACPY( 'A', N, N, Q, LDQ, Z, LDZ ) + CALL CSTEQR( JOBZ, N, W, RWORK( INDEE ), Z, LDZ, + $ RWORK( INDRWK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 10 I = 1, N + IFAIL( I ) = 0 + 10 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 30 + END IF + INFO = 0 + END IF +* +* Otherwise, call SSTEBZ and, if eigenvectors are desired, +* call CSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWK = INDISP + N + CALL SSTEBZ( RANGE, ORDER, N, VL, VU, IL, IU, ABSTOL, + $ RWORK( INDD ), RWORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ), + $ IWORK( INDIWK ), INFO ) +* + IF( WANTZ ) THEN + CALL CSTEIN( N, RWORK( INDD ), RWORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ RWORK( INDRWK ), IWORK( INDIWK ), IFAIL, INFO ) +* +* Apply unitary matrix used in reduction to tridiagonal +* form to eigenvectors returned by CSTEIN. +* + DO 20 J = 1, M + CALL CCOPY( N, Z( 1, J ), 1, WORK( 1 ), 1 ) + CALL CGEMV( 'N', N, N, CONE, Q, LDQ, WORK, 1, CZERO, + $ Z( 1, J ), 1 ) + 20 CONTINUE + END IF +* + 30 CONTINUE +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 50 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 40 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 40 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL CSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 50 CONTINUE + END IF +* + RETURN +* +* End of CHBGVX +* + END diff --git a/costa/native/external/lapack/chbtrd.f b/costa/native/external/lapack/chbtrd.f new file mode 100644 index 000000000..a87ff602c --- /dev/null +++ b/costa/native/external/lapack/chbtrd.f @@ -0,0 +1,589 @@ + SUBROUTINE CHBTRD( VECT, UPLO, N, KD, AB, LDAB, D, E, Q, LDQ, + $ WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO, VECT + INTEGER INFO, KD, LDAB, LDQ, N +* .. +* .. Array Arguments .. + REAL D( * ), E( * ) + COMPLEX AB( LDAB, * ), Q( LDQ, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CHBTRD reduces a complex Hermitian band matrix A to real symmetric +* tridiagonal form T by a unitary similarity transformation: +* Q**H * A * Q = T. +* +* Arguments +* ========= +* +* VECT (input) CHARACTER*1 +* = 'N': do not form Q; +* = 'V': form Q; +* = 'U': update a matrix X, by forming X*Q. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* AB (input/output) COMPLEX array, dimension (LDAB,N) +* On entry, the upper or lower triangle of the Hermitian band +* matrix A, stored in the first KD+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* On exit, the diagonal elements of AB are overwritten by the +* diagonal elements of the tridiagonal matrix T; if KD > 0, the +* elements on the first superdiagonal (if UPLO = 'U') or the +* first subdiagonal (if UPLO = 'L') are overwritten by the +* off-diagonal elements of T; the rest of AB is overwritten by +* values generated during the reduction. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* D (output) REAL array, dimension (N) +* The diagonal elements of the tridiagonal matrix T. +* +* E (output) REAL array, dimension (N-1) +* The off-diagonal elements of the tridiagonal matrix T: +* E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'. +* +* Q (input/output) COMPLEX array, dimension (LDQ,N) +* On entry, if VECT = 'U', then Q must contain an N-by-N +* matrix X; if VECT = 'N' or 'V', then Q need not be set. +* +* On exit: +* if VECT = 'V', Q contains the N-by-N unitary matrix Q; +* if VECT = 'U', Q contains the product X*Q; +* if VECT = 'N', the array Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. +* LDQ >= 1, and LDQ >= N if VECT = 'V' or 'U'. +* +* WORK (workspace) COMPLEX array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* Modified by Linda Kaufman, Bell Labs. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL INITQ, UPPER, WANTQ + INTEGER I, I2, IBL, INCA, INCX, IQAEND, IQB, IQEND, J, + $ J1, J1END, J1INC, J2, JEND, JIN, JINC, K, KD1, + $ KDM1, KDN, L, LAST, LEND, NQ, NR, NRT + REAL ABST + COMPLEX T, TEMP +* .. +* .. External Subroutines .. + EXTERNAL CLACGV, CLAR2V, CLARGV, CLARTG, CLARTV, CLASET, + $ CROT, CSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CONJG, MAX, MIN, REAL +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INITQ = LSAME( VECT, 'V' ) + WANTQ = INITQ .OR. LSAME( VECT, 'U' ) + UPPER = LSAME( UPLO, 'U' ) + KD1 = KD + 1 + KDM1 = KD - 1 + INCX = LDAB - 1 + IQEND = 1 +* + INFO = 0 + IF( .NOT.WANTQ .AND. .NOT.LSAME( VECT, 'N' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD1 ) THEN + INFO = -6 + ELSE IF( LDQ.LT.MAX( 1, N ) .AND. WANTQ ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHBTRD', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Initialize Q to the unit matrix, if needed +* + IF( INITQ ) + $ CALL CLASET( 'Full', N, N, CZERO, CONE, Q, LDQ ) +* +* Wherever possible, plane rotations are generated and applied in +* vector operations of length NR over the index set J1:J2:KD1. +* +* The real cosines and complex sines of the plane rotations are +* stored in the arrays D and WORK. +* + INCA = KD1*LDAB + KDN = MIN( N-1, KD ) + IF( UPPER ) THEN +* + IF( KD.GT.1 ) THEN +* +* Reduce to complex Hermitian tridiagonal form, working with +* the upper triangle +* + NR = 0 + J1 = KDN + 2 + J2 = 1 +* + AB( KD1, 1 ) = REAL( AB( KD1, 1 ) ) + DO 90 I = 1, N - 2 +* +* Reduce i-th row of matrix to tridiagonal form +* + DO 80 K = KDN + 1, 2, -1 + J1 = J1 + KDN + J2 = J2 + KDN +* + IF( NR.GT.0 ) THEN +* +* generate plane rotations to annihilate nonzero +* elements which have been created outside the band +* + CALL CLARGV( NR, AB( 1, J1-1 ), INCA, WORK( J1 ), + $ KD1, D( J1 ), KD1 ) +* +* apply rotations from the right +* +* +* Dependent on the the number of diagonals either +* CLARTV or CROT is used +* + IF( NR.GE.2*KD-1 ) THEN + DO 10 L = 1, KD - 1 + CALL CLARTV( NR, AB( L+1, J1-1 ), INCA, + $ AB( L, J1 ), INCA, D( J1 ), + $ WORK( J1 ), KD1 ) + 10 CONTINUE +* + ELSE + JEND = J1 + ( NR-1 )*KD1 + DO 20 JINC = J1, JEND, KD1 + CALL CROT( KDM1, AB( 2, JINC-1 ), 1, + $ AB( 1, JINC ), 1, D( JINC ), + $ WORK( JINC ) ) + 20 CONTINUE + END IF + END IF +* +* + IF( K.GT.2 ) THEN + IF( K.LE.N-I+1 ) THEN +* +* generate plane rotation to annihilate a(i,i+k-1) +* within the band +* + CALL CLARTG( AB( KD-K+3, I+K-2 ), + $ AB( KD-K+2, I+K-1 ), D( I+K-1 ), + $ WORK( I+K-1 ), TEMP ) + AB( KD-K+3, I+K-2 ) = TEMP +* +* apply rotation from the right +* + CALL CROT( K-3, AB( KD-K+4, I+K-2 ), 1, + $ AB( KD-K+3, I+K-1 ), 1, D( I+K-1 ), + $ WORK( I+K-1 ) ) + END IF + NR = NR + 1 + J1 = J1 - KDN - 1 + END IF +* +* apply plane rotations from both sides to diagonal +* blocks +* + IF( NR.GT.0 ) + $ CALL CLAR2V( NR, AB( KD1, J1-1 ), AB( KD1, J1 ), + $ AB( KD, J1 ), INCA, D( J1 ), + $ WORK( J1 ), KD1 ) +* +* apply plane rotations from the left +* + CALL CLACGV( NR, WORK( J1 ), KD1 ) + IF( NR.GT.0 ) THEN + IF( 2*KD-1.LT.NR ) THEN +* +* Dependent on the the number of diagonals either +* CLARTV or CROT is used +* + DO 30 L = 1, KD - 1 + IF( J2+L.GT.N ) THEN + NRT = NR - 1 + ELSE + NRT = NR + END IF + IF( NRT.GT.0 ) + $ CALL CLARTV( NRT, AB( KD-L, J1+L ), INCA, + $ AB( KD-L+1, J1+L ), INCA, + $ D( J1 ), WORK( J1 ), KD1 ) + 30 CONTINUE + ELSE + J1END = J1 + KD1*( NR-2 ) + IF( J1END.GE.J1 ) THEN + DO 40 JIN = J1, J1END, KD1 + CALL CROT( KD-1, AB( KD-1, JIN+1 ), INCX, + $ AB( KD, JIN+1 ), INCX, + $ D( JIN ), WORK( JIN ) ) + 40 CONTINUE + END IF + LEND = MIN( KDM1, N-J2 ) + LAST = J1END + KD1 + IF( LEND.GT.0 ) + $ CALL CROT( LEND, AB( KD-1, LAST+1 ), INCX, + $ AB( KD, LAST+1 ), INCX, D( LAST ), + $ WORK( LAST ) ) + END IF + END IF +* + IF( WANTQ ) THEN +* +* accumulate product of plane rotations in Q +* + IF( INITQ ) THEN +* +* take advantage of the fact that Q was +* initially the Identity matrix +* + IQEND = MAX( IQEND, J2 ) + I2 = MAX( 0, K-3 ) + IQAEND = 1 + I*KD + IF( K.EQ.2 ) + $ IQAEND = IQAEND + KD + IQAEND = MIN( IQAEND, IQEND ) + DO 50 J = J1, J2, KD1 + IBL = I - I2 / KDM1 + I2 = I2 + 1 + IQB = MAX( 1, J-IBL ) + NQ = 1 + IQAEND - IQB + IQAEND = MIN( IQAEND+KD, IQEND ) + CALL CROT( NQ, Q( IQB, J-1 ), 1, Q( IQB, J ), + $ 1, D( J ), CONJG( WORK( J ) ) ) + 50 CONTINUE + ELSE +* + DO 60 J = J1, J2, KD1 + CALL CROT( N, Q( 1, J-1 ), 1, Q( 1, J ), 1, + $ D( J ), CONJG( WORK( J ) ) ) + 60 CONTINUE + END IF +* + END IF +* + IF( J2+KDN.GT.N ) THEN +* +* adjust J2 to keep within the bounds of the matrix +* + NR = NR - 1 + J2 = J2 - KDN - 1 + END IF +* + DO 70 J = J1, J2, KD1 +* +* create nonzero element a(j-1,j+kd) outside the band +* and store it in WORK +* + WORK( J+KD ) = WORK( J )*AB( 1, J+KD ) + AB( 1, J+KD ) = D( J )*AB( 1, J+KD ) + 70 CONTINUE + 80 CONTINUE + 90 CONTINUE + END IF +* + IF( KD.GT.0 ) THEN +* +* make off-diagonal elements real and copy them to E +* + DO 100 I = 1, N - 1 + T = AB( KD, I+1 ) + ABST = ABS( T ) + AB( KD, I+1 ) = ABST + E( I ) = ABST + IF( ABST.NE.ZERO ) THEN + T = T / ABST + ELSE + T = CONE + END IF + IF( I.LT.N-1 ) + $ AB( KD, I+2 ) = AB( KD, I+2 )*T + IF( WANTQ ) THEN + CALL CSCAL( N, CONJG( T ), Q( 1, I+1 ), 1 ) + END IF + 100 CONTINUE + ELSE +* +* set E to zero if original matrix was diagonal +* + DO 110 I = 1, N - 1 + E( I ) = ZERO + 110 CONTINUE + END IF +* +* copy diagonal elements to D +* + DO 120 I = 1, N + D( I ) = AB( KD1, I ) + 120 CONTINUE +* + ELSE +* + IF( KD.GT.1 ) THEN +* +* Reduce to complex Hermitian tridiagonal form, working with +* the lower triangle +* + NR = 0 + J1 = KDN + 2 + J2 = 1 +* + AB( 1, 1 ) = REAL( AB( 1, 1 ) ) + DO 210 I = 1, N - 2 +* +* Reduce i-th column of matrix to tridiagonal form +* + DO 200 K = KDN + 1, 2, -1 + J1 = J1 + KDN + J2 = J2 + KDN +* + IF( NR.GT.0 ) THEN +* +* generate plane rotations to annihilate nonzero +* elements which have been created outside the band +* + CALL CLARGV( NR, AB( KD1, J1-KD1 ), INCA, + $ WORK( J1 ), KD1, D( J1 ), KD1 ) +* +* apply plane rotations from one side +* +* +* Dependent on the the number of diagonals either +* CLARTV or CROT is used +* + IF( NR.GT.2*KD-1 ) THEN + DO 130 L = 1, KD - 1 + CALL CLARTV( NR, AB( KD1-L, J1-KD1+L ), INCA, + $ AB( KD1-L+1, J1-KD1+L ), INCA, + $ D( J1 ), WORK( J1 ), KD1 ) + 130 CONTINUE + ELSE + JEND = J1 + KD1*( NR-1 ) + DO 140 JINC = J1, JEND, KD1 + CALL CROT( KDM1, AB( KD, JINC-KD ), INCX, + $ AB( KD1, JINC-KD ), INCX, + $ D( JINC ), WORK( JINC ) ) + 140 CONTINUE + END IF +* + END IF +* + IF( K.GT.2 ) THEN + IF( K.LE.N-I+1 ) THEN +* +* generate plane rotation to annihilate a(i+k-1,i) +* within the band +* + CALL CLARTG( AB( K-1, I ), AB( K, I ), + $ D( I+K-1 ), WORK( I+K-1 ), TEMP ) + AB( K-1, I ) = TEMP +* +* apply rotation from the left +* + CALL CROT( K-3, AB( K-2, I+1 ), LDAB-1, + $ AB( K-1, I+1 ), LDAB-1, D( I+K-1 ), + $ WORK( I+K-1 ) ) + END IF + NR = NR + 1 + J1 = J1 - KDN - 1 + END IF +* +* apply plane rotations from both sides to diagonal +* blocks +* + IF( NR.GT.0 ) + $ CALL CLAR2V( NR, AB( 1, J1-1 ), AB( 1, J1 ), + $ AB( 2, J1-1 ), INCA, D( J1 ), + $ WORK( J1 ), KD1 ) +* +* apply plane rotations from the right +* +* +* Dependent on the the number of diagonals either +* CLARTV or CROT is used +* + CALL CLACGV( NR, WORK( J1 ), KD1 ) + IF( NR.GT.0 ) THEN + IF( NR.GT.2*KD-1 ) THEN + DO 150 L = 1, KD - 1 + IF( J2+L.GT.N ) THEN + NRT = NR - 1 + ELSE + NRT = NR + END IF + IF( NRT.GT.0 ) + $ CALL CLARTV( NRT, AB( L+2, J1-1 ), INCA, + $ AB( L+1, J1 ), INCA, D( J1 ), + $ WORK( J1 ), KD1 ) + 150 CONTINUE + ELSE + J1END = J1 + KD1*( NR-2 ) + IF( J1END.GE.J1 ) THEN + DO 160 J1INC = J1, J1END, KD1 + CALL CROT( KDM1, AB( 3, J1INC-1 ), 1, + $ AB( 2, J1INC ), 1, D( J1INC ), + $ WORK( J1INC ) ) + 160 CONTINUE + END IF + LEND = MIN( KDM1, N-J2 ) + LAST = J1END + KD1 + IF( LEND.GT.0 ) + $ CALL CROT( LEND, AB( 3, LAST-1 ), 1, + $ AB( 2, LAST ), 1, D( LAST ), + $ WORK( LAST ) ) + END IF + END IF +* +* +* + IF( WANTQ ) THEN +* +* accumulate product of plane rotations in Q +* + IF( INITQ ) THEN +* +* take advantage of the fact that Q was +* initially the Identity matrix +* + IQEND = MAX( IQEND, J2 ) + I2 = MAX( 0, K-3 ) + IQAEND = 1 + I*KD + IF( K.EQ.2 ) + $ IQAEND = IQAEND + KD + IQAEND = MIN( IQAEND, IQEND ) + DO 170 J = J1, J2, KD1 + IBL = I - I2 / KDM1 + I2 = I2 + 1 + IQB = MAX( 1, J-IBL ) + NQ = 1 + IQAEND - IQB + IQAEND = MIN( IQAEND+KD, IQEND ) + CALL CROT( NQ, Q( IQB, J-1 ), 1, Q( IQB, J ), + $ 1, D( J ), WORK( J ) ) + 170 CONTINUE + ELSE +* + DO 180 J = J1, J2, KD1 + CALL CROT( N, Q( 1, J-1 ), 1, Q( 1, J ), 1, + $ D( J ), WORK( J ) ) + 180 CONTINUE + END IF + END IF +* + IF( J2+KDN.GT.N ) THEN +* +* adjust J2 to keep within the bounds of the matrix +* + NR = NR - 1 + J2 = J2 - KDN - 1 + END IF +* + DO 190 J = J1, J2, KD1 +* +* create nonzero element a(j+kd,j-1) outside the +* band and store it in WORK +* + WORK( J+KD ) = WORK( J )*AB( KD1, J ) + AB( KD1, J ) = D( J )*AB( KD1, J ) + 190 CONTINUE + 200 CONTINUE + 210 CONTINUE + END IF +* + IF( KD.GT.0 ) THEN +* +* make off-diagonal elements real and copy them to E +* + DO 220 I = 1, N - 1 + T = AB( 2, I ) + ABST = ABS( T ) + AB( 2, I ) = ABST + E( I ) = ABST + IF( ABST.NE.ZERO ) THEN + T = T / ABST + ELSE + T = CONE + END IF + IF( I.LT.N-1 ) + $ AB( 2, I+1 ) = AB( 2, I+1 )*T + IF( WANTQ ) THEN + CALL CSCAL( N, T, Q( 1, I+1 ), 1 ) + END IF + 220 CONTINUE + ELSE +* +* set E to zero if original matrix was diagonal +* + DO 230 I = 1, N - 1 + E( I ) = ZERO + 230 CONTINUE + END IF +* +* copy diagonal elements to D +* + DO 240 I = 1, N + D( I ) = AB( 1, I ) + 240 CONTINUE + END IF +* + RETURN +* +* End of CHBTRD +* + END diff --git a/costa/native/external/lapack/checon.f b/costa/native/external/lapack/checon.f new file mode 100644 index 000000000..9f3016f1b --- /dev/null +++ b/costa/native/external/lapack/checon.f @@ -0,0 +1,159 @@ + SUBROUTINE CHECON( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CHECON estimates the reciprocal of the condition number of a complex +* Hermitian matrix A using the factorization A = U*D*U**H or +* A = L*D*L**H computed by CHETRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the details of the factorization are stored +* as an upper or lower triangular matrix. +* = 'U': Upper triangular, form is A = U*D*U**H; +* = 'L': Lower triangular, form is A = L*D*L**H. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input) COMPLEX array, dimension (LDA,N) +* The block diagonal matrix D and the multipliers used to +* obtain the factor U or L as computed by CHETRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by CHETRF. +* +* ANORM (input) REAL +* The 1-norm of the original matrix A. +* +* RCOND (output) REAL +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +* estimate of the 1-norm of inv(A) computed in this routine. +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, KASE + REAL AINVNM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CHETRS, CLACON, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHECON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.LE.ZERO ) THEN + RETURN + END IF +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO 10 I = N, 1, -1 + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO 20 I = 1, N + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO ) + $ RETURN + 20 CONTINUE + END IF +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + 30 CONTINUE + CALL CLACON( N, WORK( N+1 ), WORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN +* +* Multiply by inv(L*D*L') or inv(U*D*U'). +* + CALL CHETRS( UPLO, N, 1, A, LDA, IPIV, WORK, N, INFO ) + GO TO 30 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of CHECON +* + END diff --git a/costa/native/external/lapack/cheev.f b/costa/native/external/lapack/cheev.f new file mode 100644 index 000000000..3309689e4 --- /dev/null +++ b/costa/native/external/lapack/cheev.f @@ -0,0 +1,220 @@ + SUBROUTINE CHEEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + REAL RWORK( * ), W( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CHEEV computes all eigenvalues and, optionally, eigenvectors of a +* complex Hermitian matrix A. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA, N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the +* leading N-by-N upper triangular part of A contains the +* upper triangular part of the matrix A. If UPLO = 'L', +* the leading N-by-N lower triangular part of A contains +* the lower triangular part of the matrix A. +* On exit, if JOBZ = 'V', then if INFO = 0, A contains the +* orthonormal eigenvectors of the matrix A. +* If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') +* or the upper triangle (if UPLO='U') of A, including the +* diagonal, is destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* W (output) REAL array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of the array WORK. LWORK >= max(1,2*N-1). +* For optimal efficiency, LWORK >= (NB+1)*N, +* where NB is the blocksize for CHETRD returned by ILAENV. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) REAL array, dimension (max(1, 3*N-2)) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the algorithm failed to converge; i +* off-diagonal elements of an intermediate tridiagonal +* form did not converge to zero. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, IMAX, INDE, INDTAU, INDWRK, ISCALE, + $ LLWORK, LOPT, LWKOPT, NB + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL CLANHE, SLAMCH + EXTERNAL ILAENV, LSAME, CLANHE, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CHETRD, CLASCL, CSTEQR, CUNGTR, SSCAL, SSTERF, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, 2*N-1 ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'CHETRD', UPLO, N, -1, -1, -1 ) + LWKOPT = MAX( 1, ( NB+1 )*N ) + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHEEV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( N.EQ.1 ) THEN + W( 1 ) = A( 1, 1 ) + WORK( 1 ) = 3 + IF( WANTZ ) + $ A( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = CLANHE( 'M', UPLO, N, A, LDA, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) + $ CALL CLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO ) +* +* Call CHETRD to reduce Hermitian matrix to tridiagonal form. +* + INDE = 1 + INDTAU = 1 + INDWRK = INDTAU + N + LLWORK = LWORK - INDWRK + 1 + CALL CHETRD( UPLO, N, A, LDA, W, RWORK( INDE ), WORK( INDTAU ), + $ WORK( INDWRK ), LLWORK, IINFO ) + LOPT = N + WORK( INDWRK ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, first call +* CUNGTR to generate the unitary matrix, then call CSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + CALL CUNGTR( UPLO, N, A, LDA, WORK( INDTAU ), WORK( INDWRK ), + $ LLWORK, IINFO ) + INDWRK = INDE + N + CALL CSTEQR( JOBZ, N, W, RWORK( INDE ), A, LDA, + $ RWORK( INDWRK ), INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* Set WORK(1) to optimal complex workspace size. +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of CHEEV +* + END diff --git a/costa/native/external/lapack/cheevd.f b/costa/native/external/lapack/cheevd.f new file mode 100644 index 000000000..4fef57448 --- /dev/null +++ b/costa/native/external/lapack/cheevd.f @@ -0,0 +1,296 @@ + SUBROUTINE CHEEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, + $ LRWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDA, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL RWORK( * ), W( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CHEEVD computes all eigenvalues and, optionally, eigenvectors of a +* complex Hermitian matrix A. If eigenvectors are desired, it uses a +* divide and conquer algorithm. +* +* The divide and conquer algorithm makes very mild assumptions about +* floating point arithmetic. It will work on machines with a guard +* digit in add/subtract, or on those binary machines without guard +* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +* Cray-2. It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA, N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the +* leading N-by-N upper triangular part of A contains the +* upper triangular part of the matrix A. If UPLO = 'L', +* the leading N-by-N lower triangular part of A contains +* the lower triangular part of the matrix A. +* On exit, if JOBZ = 'V', then if INFO = 0, A contains the +* orthonormal eigenvectors of the matrix A. +* If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') +* or the upper triangle (if UPLO='U') of A, including the +* diagonal, is destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* W (output) REAL array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of the array WORK. +* If N <= 1, LWORK must be at least 1. +* If JOBZ = 'N' and N > 1, LWORK must be at least N + 1. +* If JOBZ = 'V' and N > 1, LWORK must be at least 2*N + N**2. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace/output) REAL array, +* dimension (LRWORK) +* On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. +* +* LRWORK (input) INTEGER +* The dimension of the array RWORK. +* If N <= 1, LRWORK must be at least 1. +* If JOBZ = 'N' and N > 1, LRWORK must be at least N. +* If JOBZ = 'V' and N > 1, LRWORK must be at least +* 1 + 5*N + 2*N**2. +* +* If LRWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the RWORK array, +* returns this value as the first entry of the RWORK array, and +* no error message related to LRWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array IWORK. +* If N <= 1, LIWORK must be at least 1. +* If JOBZ = 'N' and N > 1, LIWORK must be at least 1. +* If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the algorithm failed to converge; i +* off-diagonal elements of an intermediate tridiagonal +* form did not converge to zero. +* +* Further Details +* =============== +* +* Based on contributions by +* Jeff Rutter, Computer Science Division, University of California +* at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, IMAX, INDE, INDRWK, INDTAU, INDWK2, + $ INDWRK, ISCALE, LIOPT, LIWMIN, LLRWK, LLWORK, + $ LLWRK2, LOPT, LROPT, LRWMIN, LWMIN + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANHE, SLAMCH + EXTERNAL LSAME, CLANHE, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CHETRD, CLACPY, CLASCL, CSTEDC, CUNMTR, SSCAL, + $ SSTERF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LWMIN = 1 + LRWMIN = 1 + LIWMIN = 1 + LOPT = LWMIN + LROPT = LRWMIN + LIOPT = LIWMIN + ELSE + IF( WANTZ ) THEN + LWMIN = 2*N + N*N + LRWMIN = 1 + 5*N + 2*N**2 + LIWMIN = 3 + 5*N + ELSE + LWMIN = N + 1 + LRWMIN = N + LIWMIN = 1 + END IF + LOPT = LWMIN + LROPT = LRWMIN + LIOPT = LIWMIN + END IF + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -8 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LOPT + RWORK( 1 ) = LROPT + IWORK( 1 ) = LIOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHEEVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = A( 1, 1 ) + IF( WANTZ ) + $ A( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = CLANHE( 'M', UPLO, N, A, LDA, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) + $ CALL CLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO ) +* +* Call CHETRD to reduce Hermitian matrix to tridiagonal form. +* + INDE = 1 + INDTAU = 1 + INDWRK = INDTAU + N + INDRWK = INDE + N + INDWK2 = INDWRK + N*N + LLWORK = LWORK - INDWRK + 1 + LLWRK2 = LWORK - INDWK2 + 1 + LLRWK = LRWORK - INDRWK + 1 + CALL CHETRD( UPLO, N, A, LDA, W, RWORK( INDE ), WORK( INDTAU ), + $ WORK( INDWRK ), LLWORK, IINFO ) + LOPT = MAX( REAL( LOPT ), REAL( N )+REAL( WORK( INDWRK ) ) ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, first call +* CSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the +* tridiagonal matrix, then call CUNMTR to multiply it to the +* Householder transformations represented as Householder vectors in +* A. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + CALL CSTEDC( 'I', N, W, RWORK( INDE ), WORK( INDWRK ), N, + $ WORK( INDWK2 ), LLWRK2, RWORK( INDRWK ), LLRWK, + $ IWORK, LIWORK, INFO ) + CALL CUNMTR( 'L', UPLO, 'N', N, N, A, LDA, WORK( INDTAU ), + $ WORK( INDWRK ), N, WORK( INDWK2 ), LLWRK2, IINFO ) + CALL CLACPY( 'A', N, N, WORK( INDWRK ), N, A, LDA ) + LOPT = MAX( LOPT, N+N**2+INT( WORK( INDWK2 ) ) ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* + WORK( 1 ) = LOPT + RWORK( 1 ) = LROPT + IWORK( 1 ) = LIOPT +* + RETURN +* +* End of CHEEVD +* + END diff --git a/costa/native/external/lapack/cheevr.f b/costa/native/external/lapack/cheevr.f new file mode 100644 index 000000000..80475f624 --- /dev/null +++ b/costa/native/external/lapack/cheevr.f @@ -0,0 +1,522 @@ + SUBROUTINE CHEEVR( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, + $ ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, LWORK, + $ RWORK, LRWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 20, 2000 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDA, LDZ, LIWORK, LRWORK, LWORK, + $ M, N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER ISUPPZ( * ), IWORK( * ) + REAL RWORK( * ), W( * ) + COMPLEX A( LDA, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* CHEEVR computes selected eigenvalues and, optionally, eigenvectors +* of a complex Hermitian matrix T. Eigenvalues and eigenvectors can +* be selected by specifying either a range of values or a range of +* indices for the desired eigenvalues. +* +* Whenever possible, CHEEVR calls CSTEGR to compute the +* eigenspectrum using Relatively Robust Representations. CSTEGR +* computes eigenvalues by the dqds algorithm, while orthogonal +* eigenvectors are computed from various "good" L D L^T representations +* (also known as Relatively Robust Representations). Gram-Schmidt +* orthogonalization is avoided as far as possible. More specifically, +* the various steps of the algorithm are as follows. For the i-th +* unreduced block of T, +* (a) Compute T - sigma_i = L_i D_i L_i^T, such that L_i D_i L_i^T +* is a relatively robust representation, +* (b) Compute the eigenvalues, lambda_j, of L_i D_i L_i^T to high +* relative accuracy by the dqds algorithm, +* (c) If there is a cluster of close eigenvalues, "choose" sigma_i +* close to the cluster, and go to step (a), +* (d) Given the approximate eigenvalue lambda_j of L_i D_i L_i^T, +* compute the corresponding eigenvector by forming a +* rank-revealing twisted factorization. +* The desired accuracy of the output can be specified by the input +* parameter ABSTOL. +* +* For more details, see "A new O(n^2) algorithm for the symmetric +* tridiagonal eigenvalue/eigenvector problem", by Inderjit Dhillon, +* Computer Science Division Technical Report No. UCB//CSD-97-971, +* UC Berkeley, May 1997. +* +* +* Note 1 : CHEEVR calls CSTEGR when the full spectrum is requested +* on machines which conform to the ieee-754 floating point standard. +* CHEEVR calls SSTEBZ and CSTEIN on non-ieee machines and +* when partial spectrum requests are made. +* +* Normal execution of CSTEGR may create NaNs and infinities and +* hence may abort due to a floating point exception in environments +* which do not handle NaNs and infinities in the ieee standard default +* manner. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* RANGE (input) CHARACTER*1 +* = 'A': all eigenvalues will be found. +* = 'V': all eigenvalues in the half-open interval (VL,VU] +* will be found. +* = 'I': the IL-th through IU-th eigenvalues will be found. +********** For RANGE = 'V' or 'I' and IU - IL < N - 1, SSTEBZ and +********** CSTEIN are called +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA, N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the +* leading N-by-N upper triangular part of A contains the +* upper triangular part of the matrix A. If UPLO = 'L', +* the leading N-by-N lower triangular part of A contains +* the lower triangular part of the matrix A. +* On exit, the lower triangle (if UPLO='L') or the upper +* triangle (if UPLO='U') of A, including the diagonal, is +* destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* VL (input) REAL +* VU (input) REAL +* If RANGE='V', the lower and upper bounds of the interval to +* be searched for eigenvalues. VL < VU. +* Not referenced if RANGE = 'A' or 'I'. +* +* IL (input) INTEGER +* IU (input) INTEGER +* If RANGE='I', the indices (in ascending order) of the +* smallest and largest eigenvalues to be returned. +* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +* Not referenced if RANGE = 'A' or 'V'. +* +* ABSTOL (input) REAL +* The absolute error tolerance for the eigenvalues. +* An approximate eigenvalue is accepted as converged +* when it is determined to lie in an interval [a,b] +* of width less than or equal to +* +* ABSTOL + EPS * max( |a|,|b| ) , +* +* where EPS is the machine precision. If ABSTOL is less than +* or equal to zero, then EPS*|T| will be used in its place, +* where |T| is the 1-norm of the tridiagonal matrix obtained +* by reducing A to tridiagonal form. +* +* See "Computing Small Singular Values of Bidiagonal Matrices +* with Guaranteed High Relative Accuracy," by Demmel and +* Kahan, LAPACK Working Note #3. +* +* If high relative accuracy is important, set ABSTOL to +* SLAMCH( 'Safe minimum' ). Doing so will guarantee that +* eigenvalues are computed to high relative accuracy when +* possible in future releases. The current code does not +* make any guarantees about high relative accuracy, but +* furutre releases will. See J. Barlow and J. Demmel, +* "Computing Accurate Eigensystems of Scaled Diagonally +* Dominant Matrices", LAPACK Working Note #7, for a discussion +* of which matrices define their eigenvalues to high relative +* accuracy. +* +* M (output) INTEGER +* The total number of eigenvalues found. 0 <= M <= N. +* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +* +* W (output) REAL array, dimension (N) +* The first M elements contain the selected eigenvalues in +* ascending order. +* +* Z (output) COMPLEX array, dimension (LDZ, max(1,M)) +* If JOBZ = 'V', then if INFO = 0, the first M columns of Z +* contain the orthonormal eigenvectors of the matrix A +* corresponding to the selected eigenvalues, with the i-th +* column of Z holding the eigenvector associated with W(i). +* If JOBZ = 'N', then Z is not referenced. +* Note: the user must ensure that at least max(1,M) columns are +* supplied in the array Z; if RANGE = 'V', the exact value of M +* is not known in advance and an upper bound must be used. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* ISUPPZ (output) INTEGER ARRAY, dimension ( 2*max(1,M) ) +* The support of the eigenvectors in Z, i.e., the indices +* indicating the nonzero elements in Z. The i-th eigenvector +* is nonzero only in elements ISUPPZ( 2*i-1 ) through +* ISUPPZ( 2*i ). +********** Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1 +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of the array WORK. LWORK >= max(1,2*N). +* For optimal efficiency, LWORK >= (NB+1)*N, +* where NB is the max of the blocksize for CHETRD and for +* CUNMTR as returned by ILAENV. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace/output) REAL array, dimension (LRWORK) +* On exit, if INFO = 0, RWORK(1) returns the optimal +* (and minimal) LRWORK. +* +* LRWORK (input) INTEGER +* The length of the array RWORK. LRWORK >= max(1,24*N). +* +* If LRWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the RWORK array, returns +* this value as the first entry of the RWORK array, and no error +* message related to LRWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* On exit, if INFO = 0, IWORK(1) returns the optimal +* (and minimal) LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array IWORK. LIWORK >= max(1,10*N). +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: Internal error +* +* Further Details +* =============== +* +* Based on contributions by +* Inderjit Dhillon, IBM Almaden, USA +* Osni Marques, LBNL/NERSC, USA +* Ken Stanley, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, VALEIG, WANTZ + CHARACTER ORDER + INTEGER I, IEEEOK, IINFO, IMAX, INDIBL, INDIFL, INDISP, + $ INDIWO, INDRD, INDRDD, INDRE, INDREE, INDRWK, + $ INDTAU, INDWK, INDWKN, ISCALE, ITMP1, J, JJ, + $ LIWMIN, LLWORK, LLWRKN, LRWMIN, LWKOPT, LWMIN, + $ NB, NSPLIT + REAL ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL CLANSY, SLAMCH + EXTERNAL LSAME, ILAENV, CLANSY, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CHETRD, CSSCAL, CSTEGR, CSTEIN, CSWAP, CUNMTR, + $ SCOPY, SSCAL, SSTEBZ, SSTERF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + IEEEOK = ILAENV( 10, 'CHEEVR', 'N', 1, 2, 3, 4 ) +* + LOWER = LSAME( UPLO, 'L' ) + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + LQUERY = ( ( LWORK.EQ.-1 ) .OR. ( LRWORK.EQ.-1 ) .OR. + $ ( LIWORK.EQ.-1 ) ) +* + LRWMIN = MAX( 1, 24*N ) + LIWMIN = MAX( 1, 10*N ) + LWMIN = MAX( 1, 2*N ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -8 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -10 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -15 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -18 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -20 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -22 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'CHETRD', UPLO, N, -1, -1, -1 ) + NB = MAX( NB, ILAENV( 1, 'CUNMTR', UPLO, N, -1, -1, -1 ) ) + LWKOPT = MAX( ( NB+1 )*N, LWMIN ) + WORK( 1 ) = LWKOPT + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHEEVR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( N.EQ.1 ) THEN + WORK( 1 ) = 7 + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = REAL( A( 1, 1 ) ) + ELSE + IF( VL.LT.REAL( A( 1, 1 ) ) .AND. VU.GE.REAL( A( 1, 1 ) ) ) + $ THEN + M = 1 + W( 1 ) = REAL( A( 1, 1 ) ) + END IF + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + VLL = VL + VUU = VU + ANRM = CLANSY( 'M', UPLO, N, A, LDA, RWORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + DO 10 J = 1, N + CALL CSSCAL( N-J+1, SIGMA, A( J, J ), 1 ) + 10 CONTINUE + ELSE + DO 20 J = 1, N + CALL CSSCAL( J, SIGMA, A( 1, J ), 1 ) + 20 CONTINUE + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call CHETRD to reduce Hermitian matrix to tridiagonal form. +* + INDTAU = 1 + INDWK = INDTAU + N +* + INDRE = 1 + INDRD = INDRE + N + INDREE = INDRD + N + INDRDD = INDREE + N + INDRWK = INDRDD + N + LLWORK = LWORK - INDWK + 1 + CALL CHETRD( UPLO, N, A, LDA, RWORK( INDRD ), RWORK( INDRE ), + $ WORK( INDTAU ), WORK( INDWK ), LLWORK, IINFO ) +* +* If all eigenvalues are desired +* then call SSTERF or CSTEGR and CUNMTR. +* + IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND. + $ IEEEOK.EQ.1 ) THEN + IF( .NOT.WANTZ ) THEN + CALL SCOPY( N, RWORK( INDRD ), 1, W, 1 ) + CALL SCOPY( N-1, RWORK( INDRE ), 1, RWORK( INDREE ), 1 ) + CALL SSTERF( N, W, RWORK( INDREE ), INFO ) + ELSE + CALL SCOPY( N-1, RWORK( INDRE ), 1, RWORK( INDREE ), 1 ) + CALL SCOPY( N, RWORK( INDRD ), 1, RWORK( INDRDD ), 1 ) +* + CALL CSTEGR( JOBZ, 'A', N, RWORK( INDRDD ), + $ RWORK( INDREE ), VL, VU, IL, IU, ABSTOL, M, W, + $ Z, LDZ, ISUPPZ, RWORK( INDRWK ), LWORK, IWORK, + $ LIWORK, INFO ) +* +* +* +* Apply unitary matrix used in reduction to tridiagonal +* form to eigenvectors returned by CSTEIN. +* + IF( WANTZ .AND. INFO.EQ.0 ) THEN + INDWKN = INDWK + LLWRKN = LWORK - INDWKN + 1 + CALL CUNMTR( 'L', UPLO, 'N', N, M, A, LDA, + $ WORK( INDTAU ), Z, LDZ, WORK( INDWKN ), + $ LLWRKN, IINFO ) + END IF + END IF +* +* + IF( INFO.EQ.0 ) THEN + M = N + GO TO 30 + END IF + INFO = 0 + END IF +* +* Otherwise, call SSTEBZ and, if eigenvectors are desired, CSTEIN. +* Also call SSTEBZ and CSTEIN if CSTEGR fails. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIFL = 1 + INDIBL = INDIFL + N + INDISP = INDIBL + N + INDIWO = INDISP + N + CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ RWORK( INDRD ), RWORK( INDRE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ), + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL CSTEIN( N, RWORK( INDRD ), RWORK( INDRE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ RWORK( INDRWK ), IWORK( INDIWO ), IWORK( INDIFL ), + $ INFO ) +* +* Apply unitary matrix used in reduction to tridiagonal +* form to eigenvectors returned by CSTEIN. +* + INDWKN = INDWK + LLWRKN = LWORK - INDWKN + 1 + CALL CUNMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z, + $ LDZ, WORK( INDWKN ), LLWRKN, IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 30 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 50 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 40 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 40 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL CSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + END IF + 50 CONTINUE + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWKOPT + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of CHEEVR +* + END diff --git a/costa/native/external/lapack/cheevx.f b/costa/native/external/lapack/cheevx.f new file mode 100644 index 000000000..f95133620 --- /dev/null +++ b/costa/native/external/lapack/cheevx.f @@ -0,0 +1,426 @@ + SUBROUTINE CHEEVX( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, + $ ABSTOL, M, W, Z, LDZ, WORK, LWORK, RWORK, + $ IWORK, IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDA, LDZ, LWORK, M, N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + REAL RWORK( * ), W( * ) + COMPLEX A( LDA, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* CHEEVX computes selected eigenvalues and, optionally, eigenvectors +* of a complex Hermitian matrix A. Eigenvalues and eigenvectors can +* be selected by specifying either a range of values or a range of +* indices for the desired eigenvalues. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* RANGE (input) CHARACTER*1 +* = 'A': all eigenvalues will be found. +* = 'V': all eigenvalues in the half-open interval (VL,VU] +* will be found. +* = 'I': the IL-th through IU-th eigenvalues will be found. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA, N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the +* leading N-by-N upper triangular part of A contains the +* upper triangular part of the matrix A. If UPLO = 'L', +* the leading N-by-N lower triangular part of A contains +* the lower triangular part of the matrix A. +* On exit, the lower triangle (if UPLO='L') or the upper +* triangle (if UPLO='U') of A, including the diagonal, is +* destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* VL (input) REAL +* VU (input) REAL +* If RANGE='V', the lower and upper bounds of the interval to +* be searched for eigenvalues. VL < VU. +* Not referenced if RANGE = 'A' or 'I'. +* +* IL (input) INTEGER +* IU (input) INTEGER +* If RANGE='I', the indices (in ascending order) of the +* smallest and largest eigenvalues to be returned. +* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +* Not referenced if RANGE = 'A' or 'V'. +* +* ABSTOL (input) REAL +* The absolute error tolerance for the eigenvalues. +* An approximate eigenvalue is accepted as converged +* when it is determined to lie in an interval [a,b] +* of width less than or equal to +* +* ABSTOL + EPS * max( |a|,|b| ) , +* +* where EPS is the machine precision. If ABSTOL is less than +* or equal to zero, then EPS*|T| will be used in its place, +* where |T| is the 1-norm of the tridiagonal matrix obtained +* by reducing A to tridiagonal form. +* +* Eigenvalues will be computed most accurately when ABSTOL is +* set to twice the underflow threshold 2*SLAMCH('S'), not zero. +* If this routine returns with INFO>0, indicating that some +* eigenvectors did not converge, try setting ABSTOL to +* 2*SLAMCH('S'). +* +* See "Computing Small Singular Values of Bidiagonal Matrices +* with Guaranteed High Relative Accuracy," by Demmel and +* Kahan, LAPACK Working Note #3. +* +* M (output) INTEGER +* The total number of eigenvalues found. 0 <= M <= N. +* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +* +* W (output) REAL array, dimension (N) +* On normal exit, the first M elements contain the selected +* eigenvalues in ascending order. +* +* Z (output) COMPLEX array, dimension (LDZ, max(1,M)) +* If JOBZ = 'V', then if INFO = 0, the first M columns of Z +* contain the orthonormal eigenvectors of the matrix A +* corresponding to the selected eigenvalues, with the i-th +* column of Z holding the eigenvector associated with W(i). +* If an eigenvector fails to converge, then that column of Z +* contains the latest approximation to the eigenvector, and the +* index of the eigenvector is returned in IFAIL. +* If JOBZ = 'N', then Z is not referenced. +* Note: the user must ensure that at least max(1,M) columns are +* supplied in the array Z; if RANGE = 'V', the exact value of M +* is not known in advance and an upper bound must be used. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of the array WORK. LWORK >= max(1,2*N-1). +* For optimal efficiency, LWORK >= (NB+1)*N, +* where NB is the max of the blocksize for CHETRD and for +* CUNMTR as returned by ILAENV. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) REAL array, dimension (7*N) +* +* IWORK (workspace) INTEGER array, dimension (5*N) +* +* IFAIL (output) INTEGER array, dimension (N) +* If JOBZ = 'V', then if INFO = 0, the first M elements of +* IFAIL are zero. If INFO > 0, then IFAIL contains the +* indices of the eigenvectors that failed to converge. +* If JOBZ = 'N', then IFAIL is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, then i eigenvectors failed to converge. +* Their indices are stored in array IFAIL. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, VALEIG, WANTZ + CHARACTER ORDER + INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, + $ INDISP, INDIWK, INDRWK, INDTAU, INDWRK, ISCALE, + $ ITMP1, J, JJ, LLWORK, LOPT, LWKOPT, NB, NSPLIT + REAL ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL CLANHE, SLAMCH + EXTERNAL LSAME, ILAENV, CLANHE, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CHETRD, CLACPY, CSSCAL, CSTEIN, CSTEQR, CSWAP, + $ CUNGTR, CUNMTR, SCOPY, SSCAL, SSTEBZ, SSTERF, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + LOWER = LSAME( UPLO, 'L' ) + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -8 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -10 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -15 + ELSE IF( LWORK.LT.MAX( 1, 2*N-1 ) .AND. .NOT.LQUERY ) THEN + INFO = -17 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'CHETRD', UPLO, N, -1, -1, -1 ) + NB = MAX( NB, ILAENV( 1, 'CUNMTR', UPLO, N, -1, -1, -1 ) ) + LWKOPT = ( NB+1 )*N + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHEEVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( N.EQ.1 ) THEN + WORK( 1 ) = 1 + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = A( 1, 1 ) + ELSE IF( VALEIG ) THEN + IF( VL.LT.REAL( A( 1, 1 ) ) .AND. VU.GE.REAL( A( 1, 1 ) ) ) + $ THEN + M = 1 + W( 1 ) = A( 1, 1 ) + END IF + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + VLL = VL + VUU = VU + ANRM = CLANHE( 'M', UPLO, N, A, LDA, RWORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + DO 10 J = 1, N + CALL CSSCAL( N-J+1, SIGMA, A( J, J ), 1 ) + 10 CONTINUE + ELSE + DO 20 J = 1, N + CALL CSSCAL( J, SIGMA, A( 1, J ), 1 ) + 20 CONTINUE + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call CHETRD to reduce Hermitian matrix to tridiagonal form. +* + INDD = 1 + INDE = INDD + N + INDRWK = INDE + N + INDTAU = 1 + INDWRK = INDTAU + N + LLWORK = LWORK - INDWRK + 1 + CALL CHETRD( UPLO, N, A, LDA, RWORK( INDD ), RWORK( INDE ), + $ WORK( INDTAU ), WORK( INDWRK ), LLWORK, IINFO ) + LOPT = N + WORK( INDWRK ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal to +* zero, then call SSTERF or CUNGTR and CSTEQR. If this fails for +* some eigenvalue, then try SSTEBZ. +* + IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND. + $ ( ABSTOL.LE.ZERO ) ) THEN + CALL SCOPY( N, RWORK( INDD ), 1, W, 1 ) + INDEE = INDRWK + 2*N + IF( .NOT.WANTZ ) THEN + CALL SCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + CALL SSTERF( N, W, RWORK( INDEE ), INFO ) + ELSE + CALL CLACPY( 'A', N, N, A, LDA, Z, LDZ ) + CALL CUNGTR( UPLO, N, Z, LDZ, WORK( INDTAU ), + $ WORK( INDWRK ), LLWORK, IINFO ) + CALL SCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + CALL CSTEQR( JOBZ, N, W, RWORK( INDEE ), Z, LDZ, + $ RWORK( INDRWK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 30 I = 1, N + IFAIL( I ) = 0 + 30 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 40 + END IF + INFO = 0 + END IF +* +* Otherwise, call SSTEBZ and, if eigenvectors are desired, CSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWK = INDISP + N + CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ RWORK( INDD ), RWORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ), + $ IWORK( INDIWK ), INFO ) +* + IF( WANTZ ) THEN + CALL CSTEIN( N, RWORK( INDD ), RWORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ RWORK( INDRWK ), IWORK( INDIWK ), IFAIL, INFO ) +* +* Apply unitary matrix used in reduction to tridiagonal +* form to eigenvectors returned by CSTEIN. +* + CALL CUNMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z, + $ LDZ, WORK( INDWRK ), LLWORK, IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 40 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 60 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 50 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 50 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL CSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 60 CONTINUE + END IF +* +* Set WORK(1) to optimal complex workspace size. +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of CHEEVX +* + END diff --git a/costa/native/external/lapack/chegs2.f b/costa/native/external/lapack/chegs2.f new file mode 100644 index 000000000..4e887a330 --- /dev/null +++ b/costa/native/external/lapack/chegs2.f @@ -0,0 +1,225 @@ + SUBROUTINE CHEGS2( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, ITYPE, LDA, LDB, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* CHEGS2 reduces a complex Hermitian-definite generalized +* eigenproblem to standard form. +* +* If ITYPE = 1, the problem is A*x = lambda*B*x, +* and A is overwritten by inv(U')*A*inv(U) or inv(L)*A*inv(L') +* +* If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or +* B*A*x = lambda*x, and A is overwritten by U*A*U` or L'*A*L. +* +* B must have been previously factorized as U'*U or L*L' by CPOTRF. +* +* Arguments +* ========= +* +* ITYPE (input) INTEGER +* = 1: compute inv(U')*A*inv(U) or inv(L)*A*inv(L'); +* = 2 or 3: compute U*A*U' or L'*A*L. +* +* UPLO (input) CHARACTER +* Specifies whether the upper or lower triangular part of the +* Hermitian matrix A is stored, and how B has been factorized. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the leading +* n by n upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n by n lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, if INFO = 0, the transformed matrix, stored in the +* same format as A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input) COMPLEX array, dimension (LDB,N) +* The triangular factor from the Cholesky factorization of B, +* as returned by CPOTRF. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, HALF + PARAMETER ( ONE = 1.0E+0, HALF = 0.5E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER K + REAL AKK, BKK + COMPLEX CT +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CHER2, CLACGV, CSSCAL, CTRMV, CTRSV, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHEGS2', -INFO ) + RETURN + END IF +* + IF( ITYPE.EQ.1 ) THEN + IF( UPPER ) THEN +* +* Compute inv(U')*A*inv(U) +* + DO 10 K = 1, N +* +* Update the upper triangle of A(k:n,k:n) +* + AKK = A( K, K ) + BKK = B( K, K ) + AKK = AKK / BKK**2 + A( K, K ) = AKK + IF( K.LT.N ) THEN + CALL CSSCAL( N-K, ONE / BKK, A( K, K+1 ), LDA ) + CT = -HALF*AKK + CALL CLACGV( N-K, A( K, K+1 ), LDA ) + CALL CLACGV( N-K, B( K, K+1 ), LDB ) + CALL CAXPY( N-K, CT, B( K, K+1 ), LDB, A( K, K+1 ), + $ LDA ) + CALL CHER2( UPLO, N-K, -CONE, A( K, K+1 ), LDA, + $ B( K, K+1 ), LDB, A( K+1, K+1 ), LDA ) + CALL CAXPY( N-K, CT, B( K, K+1 ), LDB, A( K, K+1 ), + $ LDA ) + CALL CLACGV( N-K, B( K, K+1 ), LDB ) + CALL CTRSV( UPLO, 'Conjugate transpose', 'Non-unit', + $ N-K, B( K+1, K+1 ), LDB, A( K, K+1 ), + $ LDA ) + CALL CLACGV( N-K, A( K, K+1 ), LDA ) + END IF + 10 CONTINUE + ELSE +* +* Compute inv(L)*A*inv(L') +* + DO 20 K = 1, N +* +* Update the lower triangle of A(k:n,k:n) +* + AKK = A( K, K ) + BKK = B( K, K ) + AKK = AKK / BKK**2 + A( K, K ) = AKK + IF( K.LT.N ) THEN + CALL CSSCAL( N-K, ONE / BKK, A( K+1, K ), 1 ) + CT = -HALF*AKK + CALL CAXPY( N-K, CT, B( K+1, K ), 1, A( K+1, K ), 1 ) + CALL CHER2( UPLO, N-K, -CONE, A( K+1, K ), 1, + $ B( K+1, K ), 1, A( K+1, K+1 ), LDA ) + CALL CAXPY( N-K, CT, B( K+1, K ), 1, A( K+1, K ), 1 ) + CALL CTRSV( UPLO, 'No transpose', 'Non-unit', N-K, + $ B( K+1, K+1 ), LDB, A( K+1, K ), 1 ) + END IF + 20 CONTINUE + END IF + ELSE + IF( UPPER ) THEN +* +* Compute U*A*U' +* + DO 30 K = 1, N +* +* Update the upper triangle of A(1:k,1:k) +* + AKK = A( K, K ) + BKK = B( K, K ) + CALL CTRMV( UPLO, 'No transpose', 'Non-unit', K-1, B, + $ LDB, A( 1, K ), 1 ) + CT = HALF*AKK + CALL CAXPY( K-1, CT, B( 1, K ), 1, A( 1, K ), 1 ) + CALL CHER2( UPLO, K-1, CONE, A( 1, K ), 1, B( 1, K ), 1, + $ A, LDA ) + CALL CAXPY( K-1, CT, B( 1, K ), 1, A( 1, K ), 1 ) + CALL CSSCAL( K-1, BKK, A( 1, K ), 1 ) + A( K, K ) = AKK*BKK**2 + 30 CONTINUE + ELSE +* +* Compute L'*A*L +* + DO 40 K = 1, N +* +* Update the lower triangle of A(1:k,1:k) +* + AKK = A( K, K ) + BKK = B( K, K ) + CALL CLACGV( K-1, A( K, 1 ), LDA ) + CALL CTRMV( UPLO, 'Conjugate transpose', 'Non-unit', K-1, + $ B, LDB, A( K, 1 ), LDA ) + CT = HALF*AKK + CALL CLACGV( K-1, B( K, 1 ), LDB ) + CALL CAXPY( K-1, CT, B( K, 1 ), LDB, A( K, 1 ), LDA ) + CALL CHER2( UPLO, K-1, CONE, A( K, 1 ), LDA, B( K, 1 ), + $ LDB, A, LDA ) + CALL CAXPY( K-1, CT, B( K, 1 ), LDB, A( K, 1 ), LDA ) + CALL CLACGV( K-1, B( K, 1 ), LDB ) + CALL CSSCAL( K-1, BKK, A( K, 1 ), LDA ) + CALL CLACGV( K-1, A( K, 1 ), LDA ) + A( K, K ) = AKK*BKK**2 + 40 CONTINUE + END IF + END IF + RETURN +* +* End of CHEGS2 +* + END diff --git a/costa/native/external/lapack/chegst.f b/costa/native/external/lapack/chegst.f new file mode 100644 index 000000000..0296054a1 --- /dev/null +++ b/costa/native/external/lapack/chegst.f @@ -0,0 +1,260 @@ + SUBROUTINE CHEGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, ITYPE, LDA, LDB, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* CHEGST reduces a complex Hermitian-definite generalized +* eigenproblem to standard form. +* +* If ITYPE = 1, the problem is A*x = lambda*B*x, +* and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H) +* +* If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or +* B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L. +* +* B must have been previously factorized as U**H*U or L*L**H by CPOTRF. +* +* Arguments +* ========= +* +* ITYPE (input) INTEGER +* = 1: compute inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H); +* = 2 or 3: compute U*A*U**H or L**H*A*L. +* +* UPLO (input) CHARACTER +* = 'U': Upper triangle of A is stored and B is factored as +* U**H*U; +* = 'L': Lower triangle of A is stored and B is factored as +* L*L**H. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the leading +* N-by-N upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, if INFO = 0, the transformed matrix, stored in the +* same format as A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input) COMPLEX array, dimension (LDB,N) +* The triangular factor from the Cholesky factorization of B, +* as returned by CPOTRF. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) + COMPLEX CONE, HALF + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ), + $ HALF = ( 0.5E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER K, KB, NB +* .. +* .. External Subroutines .. + EXTERNAL CHEGS2, CHEMM, CHER2K, CTRMM, CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHEGST', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'CHEGST', UPLO, N, -1, -1, -1 ) +* + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code +* + CALL CHEGS2( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + ELSE +* +* Use blocked code +* + IF( ITYPE.EQ.1 ) THEN + IF( UPPER ) THEN +* +* Compute inv(U')*A*inv(U) +* + DO 10 K = 1, N, NB + KB = MIN( N-K+1, NB ) +* +* Update the upper triangle of A(k:n,k:n) +* + CALL CHEGS2( ITYPE, UPLO, KB, A( K, K ), LDA, + $ B( K, K ), LDB, INFO ) + IF( K+KB.LE.N ) THEN + CALL CTRSM( 'Left', UPLO, 'Conjugate transpose', + $ 'Non-unit', KB, N-K-KB+1, CONE, + $ B( K, K ), LDB, A( K, K+KB ), LDA ) + CALL CHEMM( 'Left', UPLO, KB, N-K-KB+1, -HALF, + $ A( K, K ), LDA, B( K, K+KB ), LDB, + $ CONE, A( K, K+KB ), LDA ) + CALL CHER2K( UPLO, 'Conjugate transpose', N-K-KB+1, + $ KB, -CONE, A( K, K+KB ), LDA, + $ B( K, K+KB ), LDB, ONE, + $ A( K+KB, K+KB ), LDA ) + CALL CHEMM( 'Left', UPLO, KB, N-K-KB+1, -HALF, + $ A( K, K ), LDA, B( K, K+KB ), LDB, + $ CONE, A( K, K+KB ), LDA ) + CALL CTRSM( 'Right', UPLO, 'No transpose', + $ 'Non-unit', KB, N-K-KB+1, CONE, + $ B( K+KB, K+KB ), LDB, A( K, K+KB ), + $ LDA ) + END IF + 10 CONTINUE + ELSE +* +* Compute inv(L)*A*inv(L') +* + DO 20 K = 1, N, NB + KB = MIN( N-K+1, NB ) +* +* Update the lower triangle of A(k:n,k:n) +* + CALL CHEGS2( ITYPE, UPLO, KB, A( K, K ), LDA, + $ B( K, K ), LDB, INFO ) + IF( K+KB.LE.N ) THEN + CALL CTRSM( 'Right', UPLO, 'Conjugate transpose', + $ 'Non-unit', N-K-KB+1, KB, CONE, + $ B( K, K ), LDB, A( K+KB, K ), LDA ) + CALL CHEMM( 'Right', UPLO, N-K-KB+1, KB, -HALF, + $ A( K, K ), LDA, B( K+KB, K ), LDB, + $ CONE, A( K+KB, K ), LDA ) + CALL CHER2K( UPLO, 'No transpose', N-K-KB+1, KB, + $ -CONE, A( K+KB, K ), LDA, + $ B( K+KB, K ), LDB, ONE, + $ A( K+KB, K+KB ), LDA ) + CALL CHEMM( 'Right', UPLO, N-K-KB+1, KB, -HALF, + $ A( K, K ), LDA, B( K+KB, K ), LDB, + $ CONE, A( K+KB, K ), LDA ) + CALL CTRSM( 'Left', UPLO, 'No transpose', + $ 'Non-unit', N-K-KB+1, KB, CONE, + $ B( K+KB, K+KB ), LDB, A( K+KB, K ), + $ LDA ) + END IF + 20 CONTINUE + END IF + ELSE + IF( UPPER ) THEN +* +* Compute U*A*U' +* + DO 30 K = 1, N, NB + KB = MIN( N-K+1, NB ) +* +* Update the upper triangle of A(1:k+kb-1,1:k+kb-1) +* + CALL CTRMM( 'Left', UPLO, 'No transpose', 'Non-unit', + $ K-1, KB, CONE, B, LDB, A( 1, K ), LDA ) + CALL CHEMM( 'Right', UPLO, K-1, KB, HALF, A( K, K ), + $ LDA, B( 1, K ), LDB, CONE, A( 1, K ), + $ LDA ) + CALL CHER2K( UPLO, 'No transpose', K-1, KB, CONE, + $ A( 1, K ), LDA, B( 1, K ), LDB, ONE, A, + $ LDA ) + CALL CHEMM( 'Right', UPLO, K-1, KB, HALF, A( K, K ), + $ LDA, B( 1, K ), LDB, CONE, A( 1, K ), + $ LDA ) + CALL CTRMM( 'Right', UPLO, 'Conjugate transpose', + $ 'Non-unit', K-1, KB, CONE, B( K, K ), LDB, + $ A( 1, K ), LDA ) + CALL CHEGS2( ITYPE, UPLO, KB, A( K, K ), LDA, + $ B( K, K ), LDB, INFO ) + 30 CONTINUE + ELSE +* +* Compute L'*A*L +* + DO 40 K = 1, N, NB + KB = MIN( N-K+1, NB ) +* +* Update the lower triangle of A(1:k+kb-1,1:k+kb-1) +* + CALL CTRMM( 'Right', UPLO, 'No transpose', 'Non-unit', + $ KB, K-1, CONE, B, LDB, A( K, 1 ), LDA ) + CALL CHEMM( 'Left', UPLO, KB, K-1, HALF, A( K, K ), + $ LDA, B( K, 1 ), LDB, CONE, A( K, 1 ), + $ LDA ) + CALL CHER2K( UPLO, 'Conjugate transpose', K-1, KB, + $ CONE, A( K, 1 ), LDA, B( K, 1 ), LDB, + $ ONE, A, LDA ) + CALL CHEMM( 'Left', UPLO, KB, K-1, HALF, A( K, K ), + $ LDA, B( K, 1 ), LDB, CONE, A( K, 1 ), + $ LDA ) + CALL CTRMM( 'Left', UPLO, 'Conjugate transpose', + $ 'Non-unit', KB, K-1, CONE, B( K, K ), LDB, + $ A( K, 1 ), LDA ) + CALL CHEGS2( ITYPE, UPLO, KB, A( K, K ), LDA, + $ B( K, K ), LDB, INFO ) + 40 CONTINUE + END IF + END IF + END IF + RETURN +* +* End of CHEGST +* + END diff --git a/costa/native/external/lapack/chegv.f b/costa/native/external/lapack/chegv.f new file mode 100644 index 000000000..2aa038b50 --- /dev/null +++ b/costa/native/external/lapack/chegv.f @@ -0,0 +1,229 @@ + SUBROUTINE CHEGV( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, + $ LWORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDA, LDB, LWORK, N +* .. +* .. Array Arguments .. + REAL RWORK( * ), W( * ) + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CHEGV computes all the eigenvalues, and optionally, the eigenvectors +* of a complex generalized Hermitian-definite eigenproblem, of the form +* A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. +* Here A and B are assumed to be Hermitian and B is also +* positive definite. +* +* Arguments +* ========= +* +* ITYPE (input) INTEGER +* Specifies the problem type to be solved: +* = 1: A*x = (lambda)*B*x +* = 2: A*B*x = (lambda)*x +* = 3: B*A*x = (lambda)*x +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangles of A and B are stored; +* = 'L': Lower triangles of A and B are stored. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA, N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the +* leading N-by-N upper triangular part of A contains the +* upper triangular part of the matrix A. If UPLO = 'L', +* the leading N-by-N lower triangular part of A contains +* the lower triangular part of the matrix A. +* +* On exit, if JOBZ = 'V', then if INFO = 0, A contains the +* matrix Z of eigenvectors. The eigenvectors are normalized +* as follows: +* if ITYPE = 1 or 2, Z**H*B*Z = I; +* if ITYPE = 3, Z**H*inv(B)*Z = I. +* If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') +* or the lower triangle (if UPLO='L') of A, including the +* diagonal, is destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) COMPLEX array, dimension (LDB, N) +* On entry, the Hermitian positive definite matrix B. +* If UPLO = 'U', the leading N-by-N upper triangular part of B +* contains the upper triangular part of the matrix B. +* If UPLO = 'L', the leading N-by-N lower triangular part of B +* contains the lower triangular part of the matrix B. +* +* On exit, if INFO <= N, the part of B containing the matrix is +* overwritten by the triangular factor U or L from the Cholesky +* factorization B = U**H*U or B = L*L**H. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* W (output) REAL array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of the array WORK. LWORK >= max(1,2*N-1). +* For optimal efficiency, LWORK >= (NB+1)*N, +* where NB is the blocksize for CHETRD returned by ILAENV. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) REAL array, dimension (max(1, 3*N-2)) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: CPOTRF or CHEEV returned an error code: +* <= N: if INFO = i, CHEEV failed to converge; +* i off-diagonal elements of an intermediate +* tridiagonal form did not converge to zero; +* > N: if INFO = N + i, for 1 <= i <= N, then the leading +* minor of order i of B is not positive definite. +* The factorization of B could not be completed and +* no eigenvalues or eigenvectors were computed. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER TRANS + INTEGER LWKOPT, NB, NEIG +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL CHEEV, CHEGST, CPOTRF, CTRMM, CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ. -1 ) +* + INFO = 0 + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, 2*N-1 ) .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'CHETRD', UPLO, N, -1, -1, -1 ) + LWKOPT = ( NB+1 )*N + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHEGV ', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL CPOTRF( UPLO, N, B, LDB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL CHEGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + CALL CHEEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, INFO ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + NEIG = N + IF( INFO.GT.0 ) + $ NEIG = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)'*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'C' + END IF +* + CALL CTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, + $ B, LDB, A, LDA ) +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U'*y +* + IF( UPPER ) THEN + TRANS = 'C' + ELSE + TRANS = 'N' + END IF +* + CALL CTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, + $ B, LDB, A, LDA ) + END IF + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of CHEGV +* + END diff --git a/costa/native/external/lapack/chegvd.f b/costa/native/external/lapack/chegvd.f new file mode 100644 index 000000000..05ceab081 --- /dev/null +++ b/costa/native/external/lapack/chegvd.f @@ -0,0 +1,297 @@ + SUBROUTINE CHEGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, + $ LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDA, LDB, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL RWORK( * ), W( * ) + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CHEGVD computes all the eigenvalues, and optionally, the eigenvectors +* of a complex generalized Hermitian-definite eigenproblem, of the form +* A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and +* B are assumed to be Hermitian and B is also positive definite. +* If eigenvectors are desired, it uses a divide and conquer algorithm. +* +* The divide and conquer algorithm makes very mild assumptions about +* floating point arithmetic. It will work on machines with a guard +* digit in add/subtract, or on those binary machines without guard +* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +* Cray-2. It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* ITYPE (input) INTEGER +* Specifies the problem type to be solved: +* = 1: A*x = (lambda)*B*x +* = 2: A*B*x = (lambda)*x +* = 3: B*A*x = (lambda)*x +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangles of A and B are stored; +* = 'L': Lower triangles of A and B are stored. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA, N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the +* leading N-by-N upper triangular part of A contains the +* upper triangular part of the matrix A. If UPLO = 'L', +* the leading N-by-N lower triangular part of A contains +* the lower triangular part of the matrix A. +* +* On exit, if JOBZ = 'V', then if INFO = 0, A contains the +* matrix Z of eigenvectors. The eigenvectors are normalized +* as follows: +* if ITYPE = 1 or 2, Z**H*B*Z = I; +* if ITYPE = 3, Z**H*inv(B)*Z = I. +* If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') +* or the lower triangle (if UPLO='L') of A, including the +* diagonal, is destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) COMPLEX array, dimension (LDB, N) +* On entry, the Hermitian matrix B. If UPLO = 'U', the +* leading N-by-N upper triangular part of B contains the +* upper triangular part of the matrix B. If UPLO = 'L', +* the leading N-by-N lower triangular part of B contains +* the lower triangular part of the matrix B. +* +* On exit, if INFO <= N, the part of B containing the matrix is +* overwritten by the triangular factor U or L from the Cholesky +* factorization B = U**H*U or B = L*L**H. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* W (output) REAL array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of the array WORK. +* If N <= 1, LWORK >= 1. +* If JOBZ = 'N' and N > 1, LWORK >= N + 1. +* If JOBZ = 'V' and N > 1, LWORK >= 2*N + N**2. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace/output) REAL array, dimension (LRWORK) +* On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. +* +* LRWORK (input) INTEGER +* The dimension of the array RWORK. +* If N <= 1, LRWORK >= 1. +* If JOBZ = 'N' and N > 1, LRWORK >= N. +* If JOBZ = 'V' and N > 1, LRWORK >= 1 + 5*N + 2*N**2. +* +* If LRWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the RWORK array, +* returns this value as the first entry of the RWORK array, and +* no error message related to LRWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array IWORK. +* If N <= 1, LIWORK >= 1. +* If JOBZ = 'N' and N > 1, LIWORK >= 1. +* If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: CPOTRF or CHEEVD returned an error code: +* <= N: if INFO = i, CHEEVD failed to converge; +* i off-diagonal elements of an intermediate +* tridiagonal form did not converge to zero; +* > N: if INFO = N + i, for 1 <= i <= N, then the leading +* minor of order i of B is not positive definite. +* The factorization of B could not be completed and +* no eigenvalues or eigenvectors were computed. +* +* Further Details +* =============== +* +* Based on contributions by +* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER TRANS + INTEGER LIOPT, LIWMIN, LOPT, LROPT, LRWMIN, LWMIN, NEIG +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CHEEVD, CHEGST, CPOTRF, CTRMM, CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LWMIN = 1 + LRWMIN = 1 + LIWMIN = 1 + LOPT = LWMIN + LROPT = LRWMIN + LIOPT = LIWMIN + ELSE + IF( WANTZ ) THEN + LWMIN = 2*N + N*N + LRWMIN = 1 + 5*N + 2*N*N + LIWMIN = 3 + 5*N + ELSE + LWMIN = N + 1 + LRWMIN = N + LIWMIN = 1 + END IF + LOPT = LWMIN + LROPT = LRWMIN + LIOPT = LIWMIN + END IF + IF( ITYPE.LT.0 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -15 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LOPT + RWORK( 1 ) = LROPT + IWORK( 1 ) = LIOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHEGVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL CPOTRF( UPLO, N, B, LDB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL CHEGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + CALL CHEEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, LRWORK, + $ IWORK, LIWORK, INFO ) + LOPT = MAX( REAL( LOPT ), REAL( WORK( 1 ) ) ) + LROPT = MAX( REAL( LROPT ), REAL( RWORK( 1 ) ) ) + LIOPT = MAX( REAL( LIOPT ), REAL( IWORK( 1 ) ) ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + NEIG = N + IF( INFO.GT.0 ) + $ NEIG = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)'*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'C' + END IF +* + CALL CTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, CONE, + $ B, LDB, A, LDA ) +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U'*y +* + IF( UPPER ) THEN + TRANS = 'C' + ELSE + TRANS = 'N' + END IF +* + CALL CTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, CONE, + $ B, LDB, A, LDA ) + END IF + END IF +* + WORK( 1 ) = LOPT + RWORK( 1 ) = LROPT + IWORK( 1 ) = LIOPT +* + RETURN +* +* End of CHEGVD +* + END diff --git a/costa/native/external/lapack/chegvx.f b/costa/native/external/lapack/chegvx.f new file mode 100644 index 000000000..39020a510 --- /dev/null +++ b/costa/native/external/lapack/chegvx.f @@ -0,0 +1,329 @@ + SUBROUTINE CHEGVX( ITYPE, JOBZ, RANGE, UPLO, N, A, LDA, B, LDB, + $ VL, VU, IL, IU, ABSTOL, M, W, Z, LDZ, WORK, + $ LWORK, RWORK, IWORK, IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, ITYPE, IU, LDA, LDB, LDZ, LWORK, M, N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + REAL RWORK( * ), W( * ) + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* CHEGVX computes selected eigenvalues, and optionally, eigenvectors +* of a complex generalized Hermitian-definite eigenproblem, of the form +* A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and +* B are assumed to be Hermitian and B is also positive definite. +* Eigenvalues and eigenvectors can be selected by specifying either a +* range of values or a range of indices for the desired eigenvalues. +* +* Arguments +* ========= +* +* ITYPE (input) INTEGER +* Specifies the problem type to be solved: +* = 1: A*x = (lambda)*B*x +* = 2: A*B*x = (lambda)*x +* = 3: B*A*x = (lambda)*x +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* RANGE (input) CHARACTER*1 +* = 'A': all eigenvalues will be found. +* = 'V': all eigenvalues in the half-open interval (VL,VU] +* will be found. +* = 'I': the IL-th through IU-th eigenvalues will be found. +** +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangles of A and B are stored; +* = 'L': Lower triangles of A and B are stored. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA, N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the +* leading N-by-N upper triangular part of A contains the +* upper triangular part of the matrix A. If UPLO = 'L', +* the leading N-by-N lower triangular part of A contains +* the lower triangular part of the matrix A. +* +* On exit, the lower triangle (if UPLO='L') or the upper +* triangle (if UPLO='U') of A, including the diagonal, is +* destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) COMPLEX array, dimension (LDB, N) +* On entry, the Hermitian matrix B. If UPLO = 'U', the +* leading N-by-N upper triangular part of B contains the +* upper triangular part of the matrix B. If UPLO = 'L', +* the leading N-by-N lower triangular part of B contains +* the lower triangular part of the matrix B. +* +* On exit, if INFO <= N, the part of B containing the matrix is +* overwritten by the triangular factor U or L from the Cholesky +* factorization B = U**H*U or B = L*L**H. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* VL (input) REAL +* VU (input) REAL +* If RANGE='V', the lower and upper bounds of the interval to +* be searched for eigenvalues. VL < VU. +* Not referenced if RANGE = 'A' or 'I'. +* +* IL (input) INTEGER +* IU (input) INTEGER +* If RANGE='I', the indices (in ascending order) of the +* smallest and largest eigenvalues to be returned. +* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +* Not referenced if RANGE = 'A' or 'V'. +* +* ABSTOL (input) REAL +* The absolute error tolerance for the eigenvalues. +* An approximate eigenvalue is accepted as converged +* when it is determined to lie in an interval [a,b] +* of width less than or equal to +* +* ABSTOL + EPS * max( |a|,|b| ) , +* +* where EPS is the machine precision. If ABSTOL is less than +* or equal to zero, then EPS*|T| will be used in its place, +* where |T| is the 1-norm of the tridiagonal matrix obtained +* by reducing A to tridiagonal form. +* +* Eigenvalues will be computed most accurately when ABSTOL is +* set to twice the underflow threshold 2*SLAMCH('S'), not zero. +* If this routine returns with INFO>0, indicating that some +* eigenvectors did not converge, try setting ABSTOL to +* 2*SLAMCH('S'). +* +* M (output) INTEGER +* The total number of eigenvalues found. 0 <= M <= N. +* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +* +* W (output) REAL array, dimension (N) +* The first M elements contain the selected +* eigenvalues in ascending order. +* +* Z (output) COMPLEX array, dimension (LDZ, max(1,M)) +* If JOBZ = 'N', then Z is not referenced. +* If JOBZ = 'V', then if INFO = 0, the first M columns of Z +* contain the orthonormal eigenvectors of the matrix A +* corresponding to the selected eigenvalues, with the i-th +* column of Z holding the eigenvector associated with W(i). +* The eigenvectors are normalized as follows: +* if ITYPE = 1 or 2, Z**T*B*Z = I; +* if ITYPE = 3, Z**T*inv(B)*Z = I. +* +* If an eigenvector fails to converge, then that column of Z +* contains the latest approximation to the eigenvector, and the +* index of the eigenvector is returned in IFAIL. +* Note: the user must ensure that at least max(1,M) columns are +* supplied in the array Z; if RANGE = 'V', the exact value of M +* is not known in advance and an upper bound must be used. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of the array WORK. LWORK >= max(1,2*N-1). +* For optimal efficiency, LWORK >= (NB+1)*N, +* where NB is the blocksize for CHETRD returned by ILAENV. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) REAL array, dimension (7*N) +* +* IWORK (workspace) INTEGER array, dimension (5*N) +* +* IFAIL (output) INTEGER array, dimension (N) +* If JOBZ = 'V', then if INFO = 0, the first M elements of +* IFAIL are zero. If INFO > 0, then IFAIL contains the +* indices of the eigenvectors that failed to converge. +* If JOBZ = 'N', then IFAIL is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: CPOTRF or CHEEVX returned an error code: +* <= N: if INFO = i, CHEEVX failed to converge; +* i eigenvectors failed to converge. Their indices +* are stored in array IFAIL. +* > N: if INFO = N + i, for 1 <= i <= N, then the leading +* minor of order i of B is not positive definite. +* The factorization of B could not be completed and +* no eigenvalues or eigenvectors were computed. +* +* Further Details +* =============== +* +* Based on contributions by +* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LQUERY, UPPER, VALEIG, WANTZ + CHARACTER TRANS + INTEGER LOPT, LWKOPT, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL CHEEVX, CHEGST, CPOTRF, CTRMM, CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( ITYPE.LT.0 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -3 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( VALEIG .AND. N.GT.0 ) THEN + IF( VU.LE.VL ) INFO = -11 + ELSE IF( INDEIG .AND. IL.LT.1 ) THEN + INFO = -12 + ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) THEN + INFO = -13 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -18 + ELSE IF( LWORK.LT.MAX( 1, 2*N-1 ) .AND. .NOT.LQUERY ) THEN + INFO = -20 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'CHETRD', UPLO, N, -1, -1, -1 ) + LWKOPT = ( NB+1 )*N + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHEGVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Form a Cholesky factorization of B. +* + CALL CPOTRF( UPLO, N, B, LDB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL CHEGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + CALL CHEEVX( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, ABSTOL, + $ M, W, Z, LDZ, WORK, LWORK, RWORK, IWORK, IFAIL, + $ INFO ) + LOPT = WORK( 1 ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + IF( INFO.GT.0 ) + $ M = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)'*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'C' + END IF +* + CALL CTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, M, CONE, B, + $ LDB, Z, LDZ ) +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U'*y +* + IF( UPPER ) THEN + TRANS = 'C' + ELSE + TRANS = 'N' + END IF +* + CALL CTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, M, CONE, B, + $ LDB, Z, LDZ ) + END IF + END IF +* +* Set WORK(1) to optimal complex workspace size. +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of CHEGVX +* + END diff --git a/costa/native/external/lapack/cherfs.f b/costa/native/external/lapack/cherfs.f new file mode 100644 index 000000000..669e2ad8b --- /dev/null +++ b/costa/native/external/lapack/cherfs.f @@ -0,0 +1,339 @@ + SUBROUTINE CHERFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, + $ X, LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL BERR( * ), FERR( * ), RWORK( * ) + COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* CHERFS improves the computed solution to a system of linear +* equations when the coefficient matrix is Hermitian indefinite, and +* provides error bounds and backward error estimates for the solution. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input) COMPLEX array, dimension (LDA,N) +* The Hermitian matrix A. If UPLO = 'U', the leading N-by-N +* upper triangular part of A contains the upper triangular part +* of the matrix A, and the strictly lower triangular part of A +* is not referenced. If UPLO = 'L', the leading N-by-N lower +* triangular part of A contains the lower triangular part of +* the matrix A, and the strictly upper triangular part of A is +* not referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* AF (input) COMPLEX array, dimension (LDAF,N) +* The factored form of the matrix A. AF contains the block +* diagonal matrix D and the multipliers used to obtain the +* factor U or L from the factorization A = U*D*U**H or +* A = L*D*L**H as computed by CHETRF. +* +* LDAF (input) INTEGER +* The leading dimension of the array AF. LDAF >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by CHETRF. +* +* B (input) COMPLEX array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input/output) COMPLEX array, dimension (LDX,NRHS) +* On entry, the solution matrix X, as computed by CHETRS. +* On exit, the improved solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* RWORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Internal Parameters +* =================== +* +* ITMAX is the maximum number of steps of iterative refinement. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) + REAL THREE + PARAMETER ( THREE = 3.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, J, K, KASE, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX ZDUM +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CHEMV, CHETRS, CLACON, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHERFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL CCOPY( N, B( 1, J ), 1, WORK, 1 ) + CALL CHEMV( UPLO, N, -ONE, A, LDA, X( 1, J ), 1, ONE, WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + DO 40 I = 1, K - 1 + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 40 CONTINUE + RWORK( K ) = RWORK( K ) + ABS( REAL( A( K, K ) ) )*XK + S + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + RWORK( K ) = RWORK( K ) + ABS( REAL( A( K, K ) ) )*XK + DO 60 I = K + 1, N + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 60 CONTINUE + RWORK( K ) = RWORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL CHETRS( UPLO, N, 1, AF, LDAF, IPIV, WORK, N, INFO ) + CALL CAXPY( N, ONE, WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use CLACON to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL CLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A'). +* + CALL CHETRS( UPLO, N, 1, AF, LDAF, IPIV, WORK, N, INFO ) + DO 110 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 120 CONTINUE + CALL CHETRS( UPLO, N, 1, AF, LDAF, IPIV, WORK, N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of CHERFS +* + END diff --git a/costa/native/external/lapack/chesv.f b/costa/native/external/lapack/chesv.f new file mode 100644 index 000000000..a53b36c77 --- /dev/null +++ b/costa/native/external/lapack/chesv.f @@ -0,0 +1,171 @@ + SUBROUTINE CHESV( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CHESV computes the solution to a complex system of linear equations +* A * X = B, +* where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS +* matrices. +* +* The diagonal pivoting method is used to factor A as +* A = U * D * U**H, if UPLO = 'U', or +* A = L * D * L**H, if UPLO = 'L', +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices, and D is Hermitian and block diagonal with +* 1-by-1 and 2-by-2 diagonal blocks. The factored form of A is then +* used to solve the system of equations A * X = B. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the leading +* N-by-N upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, if INFO = 0, the block diagonal matrix D and the +* multipliers used to obtain the factor U or L from the +* factorization A = U*D*U**H or A = L*D*L**H as computed by +* CHETRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (output) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D, as +* determined by CHETRF. If IPIV(k) > 0, then rows and columns +* k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1 +* diagonal block. If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, +* then rows and columns k-1 and -IPIV(k) were interchanged and +* D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If UPLO = 'L' and +* IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and +* -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 +* diagonal block. +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of WORK. LWORK >= 1, and for best performance +* LWORK >= N*NB, where NB is the optimal blocksize for +* CHETRF. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, D(i,i) is exactly zero. The factorization +* has been completed, but the block diagonal matrix D is +* exactly singular, so the solution could not be computed. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LWKOPT, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL CHETRF, CHETRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'CHETRF', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHESV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Compute the factorization A = U*D*U' or A = L*D*L'. +* + CALL CHETRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL CHETRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of CHESV +* + END diff --git a/costa/native/external/lapack/chesvx.f b/costa/native/external/lapack/chesvx.f new file mode 100644 index 000000000..6537575f7 --- /dev/null +++ b/costa/native/external/lapack/chesvx.f @@ -0,0 +1,299 @@ + SUBROUTINE CHESVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, + $ LDB, X, LDX, RCOND, FERR, BERR, WORK, LWORK, + $ RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER FACT, UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, LWORK, N, NRHS + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL BERR( * ), FERR( * ), RWORK( * ) + COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* CHESVX uses the diagonal pivoting factorization to compute the +* solution to a complex system of linear equations A * X = B, +* where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS +* matrices. +* +* Error bounds on the solution and a condition estimate are also +* provided. +* +* Description +* =========== +* +* The following steps are performed: +* +* 1. If FACT = 'N', the diagonal pivoting method is used to factor A. +* The form of the factorization is +* A = U * D * U**H, if UPLO = 'U', or +* A = L * D * L**H, if UPLO = 'L', +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices, and D is Hermitian and block diagonal with +* 1-by-1 and 2-by-2 diagonal blocks. +* +* 2. If some D(i,i)=0, so that D is exactly singular, then the routine +* returns with INFO = i. Otherwise, the factored form of A is used +* to estimate the condition number of the matrix A. If the +* reciprocal of the condition number is less than machine precision, +* INFO = N+1 is returned as a warning, but the routine still goes on +* to solve for X and compute error bounds as described below. +* +* 3. The system of equations is solved for X using the factored form +* of A. +* +* 4. Iterative refinement is applied to improve the computed solution +* matrix and calculate error bounds and backward error estimates +* for it. +* +* Arguments +* ========= +* +* FACT (input) CHARACTER*1 +* Specifies whether or not the factored form of A has been +* supplied on entry. +* = 'F': On entry, AF and IPIV contain the factored form +* of A. A, AF and IPIV will not be modified. +* = 'N': The matrix A will be copied to AF and factored. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input) COMPLEX array, dimension (LDA,N) +* The Hermitian matrix A. If UPLO = 'U', the leading N-by-N +* upper triangular part of A contains the upper triangular part +* of the matrix A, and the strictly lower triangular part of A +* is not referenced. If UPLO = 'L', the leading N-by-N lower +* triangular part of A contains the lower triangular part of +* the matrix A, and the strictly upper triangular part of A is +* not referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* AF (input or output) COMPLEX array, dimension (LDAF,N) +* If FACT = 'F', then AF is an input argument and on entry +* contains the block diagonal matrix D and the multipliers used +* to obtain the factor U or L from the factorization +* A = U*D*U**H or A = L*D*L**H as computed by CHETRF. +* +* If FACT = 'N', then AF is an output argument and on exit +* returns the block diagonal matrix D and the multipliers used +* to obtain the factor U or L from the factorization +* A = U*D*U**H or A = L*D*L**H. +* +* LDAF (input) INTEGER +* The leading dimension of the array AF. LDAF >= max(1,N). +* +* IPIV (input or output) INTEGER array, dimension (N) +* If FACT = 'F', then IPIV is an input argument and on entry +* contains details of the interchanges and the block structure +* of D, as determined by CHETRF. +* If IPIV(k) > 0, then rows and columns k and IPIV(k) were +* interchanged and D(k,k) is a 1-by-1 diagonal block. +* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +* +* If FACT = 'N', then IPIV is an output argument and on exit +* contains details of the interchanges and the block structure +* of D, as determined by CHETRF. +* +* B (input) COMPLEX array, dimension (LDB,NRHS) +* The N-by-NRHS right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (output) COMPLEX array, dimension (LDX,NRHS) +* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* RCOND (output) REAL +* The estimate of the reciprocal condition number of the matrix +* A. If RCOND is less than the machine precision (in +* particular, if RCOND = 0), the matrix is singular to working +* precision. This condition is indicated by a return code of +* INFO > 0. +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of WORK. LWORK >= 2*N, and for best performance +* LWORK >= N*NB, where NB is the optimal blocksize for +* CHETRF. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is +* <= N: D(i,i) is exactly zero. The factorization +* has been completed but the factor D is exactly +* singular, so the solution and error bounds could +* not be computed. RCOND = 0 is returned. +* = N+1: D is nonsingular, but RCOND is less than machine +* precision, meaning that the matrix is singular +* to working precision. Nevertheless, the +* solution and error bounds are computed because +* there are a number of situations where the +* computed solution can be more accurate than the +* value of RCOND would suggest. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, NOFACT + INTEGER LWKOPT, NB + REAL ANORM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL CLANHE, SLAMCH + EXTERNAL ILAENV, LSAME, CLANHE, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CHECON, CHERFS, CHETRF, CHETRS, CLACPY, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -11 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LWORK.LT.MAX( 1, 2*N ) .AND. .NOT.LQUERY ) THEN + INFO = -18 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'CHETRF', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHESVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* + IF( NOFACT ) THEN +* +* Compute the factorization A = U*D*U' or A = L*D*L'. +* + CALL CLACPY( UPLO, N, N, A, LDA, AF, LDAF ) + CALL CHETRF( UPLO, N, AF, LDAF, IPIV, WORK, LWORK, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN + IF( INFO.GT.0 ) + $ RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = CLANHE( 'I', UPLO, N, A, LDA, RWORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL CHECON( UPLO, N, AF, LDAF, IPIV, ANORM, RCOND, WORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* +* Compute the solution vectors X. +* + CALL CLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL CHETRS( UPLO, N, NRHS, AF, LDAF, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solutions and +* compute error bounds and backward error estimates for them. +* + CALL CHERFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, X, + $ LDX, FERR, BERR, WORK, RWORK, INFO ) +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of CHESVX +* + END diff --git a/costa/native/external/lapack/chetd2.f b/costa/native/external/lapack/chetd2.f new file mode 100644 index 000000000..449a29882 --- /dev/null +++ b/costa/native/external/lapack/chetd2.f @@ -0,0 +1,259 @@ + SUBROUTINE CHETD2( UPLO, N, A, LDA, D, E, TAU, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + REAL D( * ), E( * ) + COMPLEX A( LDA, * ), TAU( * ) +* .. +* +* Purpose +* ======= +* +* CHETD2 reduces a complex Hermitian matrix A to real symmetric +* tridiagonal form T by a unitary similarity transformation: +* Q' * A * Q = T. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* Hermitian matrix A is stored: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the leading +* n-by-n upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n-by-n lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* On exit, if UPLO = 'U', the diagonal and first superdiagonal +* of A are overwritten by the corresponding elements of the +* tridiagonal matrix T, and the elements above the first +* superdiagonal, with the array TAU, represent the unitary +* matrix Q as a product of elementary reflectors; if UPLO +* = 'L', the diagonal and first subdiagonal of A are over- +* written by the corresponding elements of the tridiagonal +* matrix T, and the elements below the first subdiagonal, with +* the array TAU, represent the unitary matrix Q as a product +* of elementary reflectors. See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* D (output) REAL array, dimension (N) +* The diagonal elements of the tridiagonal matrix T: +* D(i) = A(i,i). +* +* E (output) REAL array, dimension (N-1) +* The off-diagonal elements of the tridiagonal matrix T: +* E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. +* +* TAU (output) COMPLEX array, dimension (N-1) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* If UPLO = 'U', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(n-1) . . . H(2) H(1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in +* A(1:i-1,i+1), and tau in TAU(i). +* +* If UPLO = 'L', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(1) H(2) . . . H(n-1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), +* and tau in TAU(i). +* +* The contents of A on exit are illustrated by the following examples +* with n = 5: +* +* if UPLO = 'U': if UPLO = 'L': +* +* ( d e v2 v3 v4 ) ( d ) +* ( d e v3 v4 ) ( e d ) +* ( d e v4 ) ( v1 e d ) +* ( d e ) ( v1 v2 e d ) +* ( d ) ( v1 v2 v3 e d ) +* +* where d and e denote diagonal and off-diagonal elements of T, and vi +* denotes an element of the vector defining H(i). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO, HALF + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ), + $ HALF = ( 0.5E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I + COMPLEX ALPHA, TAUI +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CHEMV, CHER2, CLARFG, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX CDOTC + EXTERNAL LSAME, CDOTC +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETD2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Reduce the upper triangle of A +* + A( N, N ) = REAL( A( N, N ) ) + DO 10 I = N - 1, 1, -1 +* +* Generate elementary reflector H(i) = I - tau * v * v' +* to annihilate A(1:i-1,i+1) +* + ALPHA = A( I, I+1 ) + CALL CLARFG( I, ALPHA, A( 1, I+1 ), 1, TAUI ) + E( I ) = ALPHA +* + IF( TAUI.NE.ZERO ) THEN +* +* Apply H(i) from both sides to A(1:i,1:i) +* + A( I, I+1 ) = ONE +* +* Compute x := tau * A * v storing x in TAU(1:i) +* + CALL CHEMV( UPLO, I, TAUI, A, LDA, A( 1, I+1 ), 1, ZERO, + $ TAU, 1 ) +* +* Compute w := x - 1/2 * tau * (x'*v) * v +* + ALPHA = -HALF*TAUI*CDOTC( I, TAU, 1, A( 1, I+1 ), 1 ) + CALL CAXPY( I, ALPHA, A( 1, I+1 ), 1, TAU, 1 ) +* +* Apply the transformation as a rank-2 update: +* A := A - v * w' - w * v' +* + CALL CHER2( UPLO, I, -ONE, A( 1, I+1 ), 1, TAU, 1, A, + $ LDA ) +* + ELSE + A( I, I ) = REAL( A( I, I ) ) + END IF + A( I, I+1 ) = E( I ) + D( I+1 ) = A( I+1, I+1 ) + TAU( I ) = TAUI + 10 CONTINUE + D( 1 ) = A( 1, 1 ) + ELSE +* +* Reduce the lower triangle of A +* + A( 1, 1 ) = REAL( A( 1, 1 ) ) + DO 20 I = 1, N - 1 +* +* Generate elementary reflector H(i) = I - tau * v * v' +* to annihilate A(i+2:n,i) +* + ALPHA = A( I+1, I ) + CALL CLARFG( N-I, ALPHA, A( MIN( I+2, N ), I ), 1, TAUI ) + E( I ) = ALPHA +* + IF( TAUI.NE.ZERO ) THEN +* +* Apply H(i) from both sides to A(i+1:n,i+1:n) +* + A( I+1, I ) = ONE +* +* Compute x := tau * A * v storing y in TAU(i:n-1) +* + CALL CHEMV( UPLO, N-I, TAUI, A( I+1, I+1 ), LDA, + $ A( I+1, I ), 1, ZERO, TAU( I ), 1 ) +* +* Compute w := x - 1/2 * tau * (x'*v) * v +* + ALPHA = -HALF*TAUI*CDOTC( N-I, TAU( I ), 1, A( I+1, I ), + $ 1 ) + CALL CAXPY( N-I, ALPHA, A( I+1, I ), 1, TAU( I ), 1 ) +* +* Apply the transformation as a rank-2 update: +* A := A - v * w' - w * v' +* + CALL CHER2( UPLO, N-I, -ONE, A( I+1, I ), 1, TAU( I ), 1, + $ A( I+1, I+1 ), LDA ) +* + ELSE + A( I+1, I+1 ) = REAL( A( I+1, I+1 ) ) + END IF + A( I+1, I ) = E( I ) + D( I ) = A( I, I ) + TAU( I ) = TAUI + 20 CONTINUE + D( N ) = A( N, N ) + END IF +* + RETURN +* +* End of CHETD2 +* + END diff --git a/costa/native/external/lapack/chetf2.f b/costa/native/external/lapack/chetf2.f new file mode 100644 index 000000000..faa0a0385 --- /dev/null +++ b/costa/native/external/lapack/chetf2.f @@ -0,0 +1,544 @@ + SUBROUTINE CHETF2( UPLO, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CHETF2 computes the factorization of a complex Hermitian matrix A +* using the Bunch-Kaufman diagonal pivoting method: +* +* A = U*D*U' or A = L*D*L' +* +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices, U' is the conjugate transpose of U, and D is +* Hermitian and block diagonal with 1-by-1 and 2-by-2 diagonal blocks. +* +* This is the unblocked version of the algorithm, calling Level 2 BLAS. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* Hermitian matrix A is stored: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the leading +* n-by-n upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n-by-n lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, the block diagonal matrix D and the multipliers used +* to obtain the factor U or L (see below for further details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (output) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D. +* If IPIV(k) > 0, then rows and columns k and IPIV(k) were +* interchanged and D(k,k) is a 1-by-1 diagonal block. +* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* > 0: if INFO = k, D(k,k) is exactly zero. The factorization +* has been completed, but the block diagonal matrix D is +* exactly singular, and division by zero will occur if it +* is used to solve a system of equations. +* +* Further Details +* =============== +* +* 1-96 - Based on modifications by +* J. Lewis, Boeing Computer Services Company +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* If UPLO = 'U', then A = U*D*U', where +* U = P(n)*U(n)* ... *P(k)U(k)* ..., +* i.e., U is a product of terms P(k)*U(k), where k decreases from n to +* 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +* defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +* that if the diagonal block D(k) is of order s (s = 1 or 2), then +* +* ( I v 0 ) k-s +* U(k) = ( 0 I 0 ) s +* ( 0 0 I ) n-k +* k-s s n-k +* +* If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +* If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +* and A(k,k), and v overwrites A(1:k-2,k-1:k). +* +* If UPLO = 'L', then A = L*D*L', where +* L = P(1)*L(1)* ... *P(k)*L(k)* ..., +* i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +* n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +* defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +* that if the diagonal block D(k) is of order s (s = 1 or 2), then +* +* ( I 0 0 ) k-1 +* L(k) = ( 0 I 0 ) s +* ( 0 v I ) n-k-s+1 +* k-1 s n-k-s+1 +* +* If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +* If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +* and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0E+0, SEVTEN = 17.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IMAX, J, JMAX, K, KK, KP, KSTEP + REAL ABSAKK, ALPHA, COLMAX, D, D11, D22, R1, ROWMAX, + $ TT + COMPLEX D12, D21, T, WK, WKM1, WKP1, ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + REAL SLAPY2 + EXTERNAL LSAME, ICAMAX, SLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL CHER, CSSCAL, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, CONJG, MAX, REAL, SQRT +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETF2', -INFO ) + RETURN + END IF +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U' using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2 +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 90 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( REAL( A( K, K ) ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.GT.1 ) THEN + IMAX = ICAMAX( K-1, A( 1, K ), 1 ) + COLMAX = CABS1( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + A( K, K ) = REAL( A( K, K ) ) + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = IMAX + ICAMAX( K-IMAX, A( IMAX, IMAX+1 ), LDA ) + ROWMAX = CABS1( A( IMAX, JMAX ) ) + IF( IMAX.GT.1 ) THEN + JMAX = ICAMAX( IMAX-1, A( 1, IMAX ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( A( JMAX, IMAX ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( REAL( A( IMAX, IMAX ) ) ).GE.ALPHA*ROWMAX ) + $ THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K-1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K - KSTEP + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the leading +* submatrix A(1:k,1:k) +* + CALL CSWAP( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 ) + DO 20 J = KP + 1, KK - 1 + T = CONJG( A( J, KK ) ) + A( J, KK ) = CONJG( A( KP, J ) ) + A( KP, J ) = T + 20 CONTINUE + A( KP, KK ) = CONJG( A( KP, KK ) ) + R1 = REAL( A( KK, KK ) ) + A( KK, KK ) = REAL( A( KP, KP ) ) + A( KP, KP ) = R1 + IF( KSTEP.EQ.2 ) THEN + A( K, K ) = REAL( A( K, K ) ) + T = A( K-1, K ) + A( K-1, K ) = A( KP, K ) + A( KP, K ) = T + END IF + ELSE + A( K, K ) = REAL( A( K, K ) ) + IF( KSTEP.EQ.2 ) + $ A( K-1, K-1 ) = REAL( A( K-1, K-1 ) ) + END IF +* +* Update the leading submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* +* Perform a rank-1 update of A(1:k-1,1:k-1) as +* +* A := A - U(k)*D(k)*U(k)' = A - W(k)*1/D(k)*W(k)' +* + R1 = ONE / REAL( A( K, K ) ) + CALL CHER( UPLO, K-1, -R1, A( 1, K ), 1, A, LDA ) +* +* Store U(k) in column k +* + CALL CSSCAL( K-1, R1, A( 1, K ), 1 ) + ELSE +* +* 2-by-2 pivot block D(k): columns k and k-1 now hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* Perform a rank-2 update of A(1:k-2,1:k-2) as +* +* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )' +* = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )' +* + IF( K.GT.2 ) THEN +* + D = SLAPY2( REAL( A( K-1, K ) ), + $ AIMAG( A( K-1, K ) ) ) + D22 = REAL( A( K-1, K-1 ) ) / D + D11 = REAL( A( K, K ) ) / D + TT = ONE / ( D11*D22-ONE ) + D12 = A( K-1, K ) / D + D = TT / D +* + DO 40 J = K - 2, 1, -1 + WKM1 = D*( D11*A( J, K-1 )-CONJG( D12 )*A( J, K ) ) + WK = D*( D22*A( J, K )-D12*A( J, K-1 ) ) + DO 30 I = J, 1, -1 + A( I, J ) = A( I, J ) - A( I, K )*CONJG( WK ) - + $ A( I, K-1 )*CONJG( WKM1 ) + 30 CONTINUE + A( J, K ) = WK + A( J, K-1 ) = WKM1 + A( J, J ) = CMPLX( REAL( A( J, J ) ), 0.0E+0 ) + 40 CONTINUE +* + END IF +* + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L' using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2 +* + K = 1 + 50 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 90 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( REAL( A( K, K ) ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.LT.N ) THEN + IMAX = K + ICAMAX( N-K, A( K+1, K ), 1 ) + COLMAX = CABS1( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + A( K, K ) = REAL( A( K, K ) ) + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = K - 1 + ICAMAX( IMAX-K, A( IMAX, K ), LDA ) + ROWMAX = CABS1( A( IMAX, JMAX ) ) + IF( IMAX.LT.N ) THEN + JMAX = IMAX + ICAMAX( N-IMAX, A( IMAX+1, IMAX ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( A( JMAX, IMAX ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( REAL( A( IMAX, IMAX ) ) ).GE.ALPHA*ROWMAX ) + $ THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K+1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K + KSTEP - 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the trailing +* submatrix A(k:n,k:n) +* + IF( KP.LT.N ) + $ CALL CSWAP( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 ) + DO 60 J = KK + 1, KP - 1 + T = CONJG( A( J, KK ) ) + A( J, KK ) = CONJG( A( KP, J ) ) + A( KP, J ) = T + 60 CONTINUE + A( KP, KK ) = CONJG( A( KP, KK ) ) + R1 = REAL( A( KK, KK ) ) + A( KK, KK ) = REAL( A( KP, KP ) ) + A( KP, KP ) = R1 + IF( KSTEP.EQ.2 ) THEN + A( K, K ) = REAL( A( K, K ) ) + T = A( K+1, K ) + A( K+1, K ) = A( KP, K ) + A( KP, K ) = T + END IF + ELSE + A( K, K ) = REAL( A( K, K ) ) + IF( KSTEP.EQ.2 ) + $ A( K+1, K+1 ) = REAL( A( K+1, K+1 ) ) + END IF +* +* Update the trailing submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* + IF( K.LT.N ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* +* A := A - L(k)*D(k)*L(k)' = A - W(k)*(1/D(k))*W(k)' +* + R1 = ONE / REAL( A( K, K ) ) + CALL CHER( UPLO, N-K, -R1, A( K+1, K ), 1, + $ A( K+1, K+1 ), LDA ) +* +* Store L(k) in column K +* + CALL CSSCAL( N-K, R1, A( K+1, K ), 1 ) + END IF + ELSE +* +* 2-by-2 pivot block D(k) +* + IF( K.LT.N-1 ) THEN +* +* Perform a rank-2 update of A(k+2:n,k+2:n) as +* +* A := A - ( L(k) L(k+1) )*D(k)*( L(k) L(k+1) )' +* = A - ( W(k) W(k+1) )*inv(D(k))*( W(k) W(k+1) )' +* +* where L(k) and L(k+1) are the k-th and (k+1)-th +* columns of L +* + D = SLAPY2( REAL( A( K+1, K ) ), + $ AIMAG( A( K+1, K ) ) ) + D11 = REAL( A( K+1, K+1 ) ) / D + D22 = REAL( A( K, K ) ) / D + TT = ONE / ( D11*D22-ONE ) + D21 = A( K+1, K ) / D + D = TT / D +* + DO 80 J = K + 2, N + WK = D*( D11*A( J, K )-D21*A( J, K+1 ) ) + WKP1 = D*( D22*A( J, K+1 )-CONJG( D21 )*A( J, K ) ) + DO 70 I = J, N + A( I, J ) = A( I, J ) - A( I, K )*CONJG( WK ) - + $ A( I, K+1 )*CONJG( WKP1 ) + 70 CONTINUE + A( J, K ) = WK + A( J, K+1 ) = WKP1 + A( J, J ) = CMPLX( REAL( A( J, J ) ), 0.0E+0 ) + 80 CONTINUE + END IF + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 50 +* + END IF +* + 90 CONTINUE + RETURN +* +* End of CHETF2 +* + END diff --git a/costa/native/external/lapack/chetrd.f b/costa/native/external/lapack/chetrd.f new file mode 100644 index 000000000..6a453a076 --- /dev/null +++ b/costa/native/external/lapack/chetrd.f @@ -0,0 +1,297 @@ + SUBROUTINE CHETRD( UPLO, N, A, LDA, D, E, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + REAL D( * ), E( * ) + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CHETRD reduces a complex Hermitian matrix A to real symmetric +* tridiagonal form T by a unitary similarity transformation: +* Q**H * A * Q = T. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the leading +* N-by-N upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* On exit, if UPLO = 'U', the diagonal and first superdiagonal +* of A are overwritten by the corresponding elements of the +* tridiagonal matrix T, and the elements above the first +* superdiagonal, with the array TAU, represent the unitary +* matrix Q as a product of elementary reflectors; if UPLO +* = 'L', the diagonal and first subdiagonal of A are over- +* written by the corresponding elements of the tridiagonal +* matrix T, and the elements below the first subdiagonal, with +* the array TAU, represent the unitary matrix Q as a product +* of elementary reflectors. See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* D (output) REAL array, dimension (N) +* The diagonal elements of the tridiagonal matrix T: +* D(i) = A(i,i). +* +* E (output) REAL array, dimension (N-1) +* The off-diagonal elements of the tridiagonal matrix T: +* E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. +* +* TAU (output) COMPLEX array, dimension (N-1) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= 1. +* For optimum performance LWORK >= N*NB, where NB is the +* optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* If UPLO = 'U', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(n-1) . . . H(2) H(1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in +* A(1:i-1,i+1), and tau in TAU(i). +* +* If UPLO = 'L', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(1) H(2) . . . H(n-1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), +* and tau in TAU(i). +* +* The contents of A on exit are illustrated by the following examples +* with n = 5: +* +* if UPLO = 'U': if UPLO = 'L': +* +* ( d e v2 v3 v4 ) ( d ) +* ( d e v3 v4 ) ( e d ) +* ( d e v4 ) ( v1 e d ) +* ( d e ) ( v1 v2 e d ) +* ( d ) ( v1 v2 v3 e d ) +* +* where d and e denote diagonal and off-diagonal elements of T, and vi +* denotes an element of the vector defining H(i). +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER I, IINFO, IWS, J, KK, LDWORK, LWKOPT, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL CHER2K, CHETD2, CLATRD, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. +* + NB = ILAENV( 1, 'CHETRD', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETRD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NX = N + IWS = 1 + IF( NB.GT.1 .AND. NB.LT.N ) THEN +* +* Determine when to cross over from blocked to unblocked code +* (last block is always handled by unblocked code). +* + NX = MAX( NB, ILAENV( 3, 'CHETRD', UPLO, N, -1, -1, -1 ) ) + IF( NX.LT.N ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: determine the +* minimum value of NB, and reduce NB or force use of +* unblocked code by setting NX = N. +* + NB = MAX( LWORK / LDWORK, 1 ) + NBMIN = ILAENV( 2, 'CHETRD', UPLO, N, -1, -1, -1 ) + IF( NB.LT.NBMIN ) + $ NX = N + END IF + ELSE + NX = N + END IF + ELSE + NB = 1 + END IF +* + IF( UPPER ) THEN +* +* Reduce the upper triangle of A. +* Columns 1:kk are handled by the unblocked method. +* + KK = N - ( ( N-NX+NB-1 ) / NB )*NB + DO 20 I = N - NB + 1, KK + 1, -NB +* +* Reduce columns i:i+nb-1 to tridiagonal form and form the +* matrix W which is needed to update the unreduced part of +* the matrix +* + CALL CLATRD( UPLO, I+NB-1, NB, A, LDA, E, TAU, WORK, + $ LDWORK ) +* +* Update the unreduced submatrix A(1:i-1,1:i-1), using an +* update of the form: A := A - V*W' - W*V' +* + CALL CHER2K( UPLO, 'No transpose', I-1, NB, -CONE, + $ A( 1, I ), LDA, WORK, LDWORK, ONE, A, LDA ) +* +* Copy superdiagonal elements back into A, and diagonal +* elements into D +* + DO 10 J = I, I + NB - 1 + A( J-1, J ) = E( J-1 ) + D( J ) = A( J, J ) + 10 CONTINUE + 20 CONTINUE +* +* Use unblocked code to reduce the last or only block +* + CALL CHETD2( UPLO, KK, A, LDA, D, E, TAU, IINFO ) + ELSE +* +* Reduce the lower triangle of A +* + DO 40 I = 1, N - NX, NB +* +* Reduce columns i:i+nb-1 to tridiagonal form and form the +* matrix W which is needed to update the unreduced part of +* the matrix +* + CALL CLATRD( UPLO, N-I+1, NB, A( I, I ), LDA, E( I ), + $ TAU( I ), WORK, LDWORK ) +* +* Update the unreduced submatrix A(i+nb:n,i+nb:n), using +* an update of the form: A := A - V*W' - W*V' +* + CALL CHER2K( UPLO, 'No transpose', N-I-NB+1, NB, -CONE, + $ A( I+NB, I ), LDA, WORK( NB+1 ), LDWORK, ONE, + $ A( I+NB, I+NB ), LDA ) +* +* Copy subdiagonal elements back into A, and diagonal +* elements into D +* + DO 30 J = I, I + NB - 1 + A( J+1, J ) = E( J ) + D( J ) = A( J, J ) + 30 CONTINUE + 40 CONTINUE +* +* Use unblocked code to reduce the last or only block +* + CALL CHETD2( UPLO, N-I+1, A( I, I ), LDA, D( I ), E( I ), + $ TAU( I ), IINFO ) + END IF +* + WORK( 1 ) = LWKOPT + RETURN +* +* End of CHETRD +* + END diff --git a/costa/native/external/lapack/chetrf.f b/costa/native/external/lapack/chetrf.f new file mode 100644 index 000000000..21c74f081 --- /dev/null +++ b/costa/native/external/lapack/chetrf.f @@ -0,0 +1,282 @@ + SUBROUTINE CHETRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CHETRF computes the factorization of a complex Hermitian matrix A +* using the Bunch-Kaufman diagonal pivoting method. The form of the +* factorization is +* +* A = U*D*U**H or A = L*D*L**H +* +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices, and D is Hermitian and block diagonal with +* 1-by-1 and 2-by-2 diagonal blocks. +* +* This is the blocked version of the algorithm, calling Level 3 BLAS. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the leading +* N-by-N upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, the block diagonal matrix D and the multipliers used +* to obtain the factor U or L (see below for further details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (output) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D. +* If IPIV(k) > 0, then rows and columns k and IPIV(k) were +* interchanged and D(k,k) is a 1-by-1 diagonal block. +* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of WORK. LWORK >=1. For best performance +* LWORK >= N*NB, where NB is the block size returned by ILAENV. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, D(i,i) is exactly zero. The factorization +* has been completed, but the block diagonal matrix D is +* exactly singular, and division by zero will occur if it +* is used to solve a system of equations. +* +* Further Details +* =============== +* +* If UPLO = 'U', then A = U*D*U', where +* U = P(n)*U(n)* ... *P(k)U(k)* ..., +* i.e., U is a product of terms P(k)*U(k), where k decreases from n to +* 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +* defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +* that if the diagonal block D(k) is of order s (s = 1 or 2), then +* +* ( I v 0 ) k-s +* U(k) = ( 0 I 0 ) s +* ( 0 0 I ) n-k +* k-s s n-k +* +* If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +* If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +* and A(k,k), and v overwrites A(1:k-2,k-1:k). +* +* If UPLO = 'L', then A = L*D*L', where +* L = P(1)*L(1)* ... *P(k)*L(k)* ..., +* i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +* n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +* defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +* that if the diagonal block D(k) is of order s (s = 1 or 2), then +* +* ( I 0 0 ) k-1 +* L(k) = ( 0 I 0 ) s +* ( 0 v I ) n-k-s+1 +* k-1 s n-k-s+1 +* +* If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +* If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +* and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER IINFO, IWS, J, K, KB, LDWORK, LWKOPT, NB, NBMIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CHETF2, CLAHEF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size +* + NB = ILAENV( 1, 'CHETRF', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETRF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* + NBMIN = 2 + LDWORK = N + IF( NB.GT.1 .AND. NB.LT.N ) THEN + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN + NB = MAX( LWORK / LDWORK, 1 ) + NBMIN = MAX( 2, ILAENV( 2, 'CHETRF', UPLO, N, -1, -1, -1 ) ) + END IF + ELSE + IWS = 1 + END IF + IF( NB.LT.NBMIN ) + $ NB = N +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U' using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* KB, where KB is the number of columns factorized by CLAHEF; +* KB is either NB or NB-1, or K for the last block +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 40 +* + IF( K.GT.NB ) THEN +* +* Factorize columns k-kb+1:k of A and use blocked code to +* update columns 1:k-kb +* + CALL CLAHEF( UPLO, K, NB, KB, A, LDA, IPIV, WORK, N, IINFO ) + ELSE +* +* Use unblocked code to factorize columns 1:k of A +* + CALL CHETF2( UPLO, K, A, LDA, IPIV, IINFO ) + KB = K + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO +* +* Decrease K and return to the start of the main loop +* + K = K - KB + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L' using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* KB, where KB is the number of columns factorized by CLAHEF; +* KB is either NB or NB-1, or N-K+1 for the last block +* + K = 1 + 20 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 40 +* + IF( K.LE.N-NB ) THEN +* +* Factorize columns k:k+kb-1 of A and use blocked code to +* update columns k+kb:n +* + CALL CLAHEF( UPLO, N-K+1, NB, KB, A( K, K ), LDA, IPIV( K ), + $ WORK, N, IINFO ) + ELSE +* +* Use unblocked code to factorize columns k:n of A +* + CALL CHETF2( UPLO, N-K+1, A( K, K ), LDA, IPIV( K ), IINFO ) + KB = N - K + 1 + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + K - 1 +* +* Adjust IPIV +* + DO 30 J = K, K + KB - 1 + IF( IPIV( J ).GT.0 ) THEN + IPIV( J ) = IPIV( J ) + K - 1 + ELSE + IPIV( J ) = IPIV( J ) - K + 1 + END IF + 30 CONTINUE +* +* Increase K and return to the start of the main loop +* + K = K + KB + GO TO 20 +* + END IF +* + 40 CONTINUE + WORK( 1 ) = LWKOPT + RETURN +* +* End of CHETRF +* + END diff --git a/costa/native/external/lapack/chetri.f b/costa/native/external/lapack/chetri.f new file mode 100644 index 000000000..e72cd3730 --- /dev/null +++ b/costa/native/external/lapack/chetri.f @@ -0,0 +1,328 @@ + SUBROUTINE CHETRI( UPLO, N, A, LDA, IPIV, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CHETRI computes the inverse of a complex Hermitian indefinite matrix +* A using the factorization A = U*D*U**H or A = L*D*L**H computed by +* CHETRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the details of the factorization are stored +* as an upper or lower triangular matrix. +* = 'U': Upper triangular, form is A = U*D*U**H; +* = 'L': Lower triangular, form is A = L*D*L**H. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the block diagonal matrix D and the multipliers +* used to obtain the factor U or L as computed by CHETRF. +* +* On exit, if INFO = 0, the (Hermitian) inverse of the original +* matrix. If UPLO = 'U', the upper triangular part of the +* inverse is formed and the part of A below the diagonal is not +* referenced; if UPLO = 'L' the lower triangular part of the +* inverse is formed and the part of A above the diagonal is +* not referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by CHETRF. +* +* WORK (workspace) COMPLEX array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +* inverse could not be computed. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + COMPLEX CONE, ZERO + PARAMETER ( ONE = 1.0E+0, CONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KP, KSTEP + REAL AK, AKP1, D, T + COMPLEX AKKP1, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX CDOTC + EXTERNAL LSAME, CDOTC +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CHEMV, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CONJG, MAX, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO 10 INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO 20 INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 20 CONTINUE + END IF + INFO = 0 +* + IF( UPPER ) THEN +* +* Compute inv(A) from the factorization A = U*D*U'. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 30 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + A( K, K ) = ONE / REAL( A( K, K ) ) +* +* Compute column K of the inverse. +* + IF( K.GT.1 ) THEN + CALL CCOPY( K-1, A( 1, K ), 1, WORK, 1 ) + CALL CHEMV( UPLO, K-1, -CONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K ), 1 ) + A( K, K ) = A( K, K ) - REAL( CDOTC( K-1, WORK, 1, A( 1, + $ K ), 1 ) ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = ABS( A( K, K+1 ) ) + AK = REAL( A( K, K ) ) / T + AKP1 = REAL( A( K+1, K+1 ) ) / T + AKKP1 = A( K, K+1 ) / T + D = T*( AK*AKP1-ONE ) + A( K, K ) = AKP1 / D + A( K+1, K+1 ) = AK / D + A( K, K+1 ) = -AKKP1 / D +* +* Compute columns K and K+1 of the inverse. +* + IF( K.GT.1 ) THEN + CALL CCOPY( K-1, A( 1, K ), 1, WORK, 1 ) + CALL CHEMV( UPLO, K-1, -CONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K ), 1 ) + A( K, K ) = A( K, K ) - REAL( CDOTC( K-1, WORK, 1, A( 1, + $ K ), 1 ) ) + A( K, K+1 ) = A( K, K+1 ) - + $ CDOTC( K-1, A( 1, K ), 1, A( 1, K+1 ), 1 ) + CALL CCOPY( K-1, A( 1, K+1 ), 1, WORK, 1 ) + CALL CHEMV( UPLO, K-1, -CONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K+1 ), 1 ) + A( K+1, K+1 ) = A( K+1, K+1 ) - + $ REAL( CDOTC( K-1, WORK, 1, A( 1, K+1 ), + $ 1 ) ) + END IF + KSTEP = 2 + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the leading +* submatrix A(1:k+1,1:k+1) +* + CALL CSWAP( KP-1, A( 1, K ), 1, A( 1, KP ), 1 ) + DO 40 J = KP + 1, K - 1 + TEMP = CONJG( A( J, K ) ) + A( J, K ) = CONJG( A( KP, J ) ) + A( KP, J ) = TEMP + 40 CONTINUE + A( KP, K ) = CONJG( A( KP, K ) ) + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = A( K, K+1 ) + A( K, K+1 ) = A( KP, K+1 ) + A( KP, K+1 ) = TEMP + END IF + END IF +* + K = K + KSTEP + GO TO 30 + 50 CONTINUE +* + ELSE +* +* Compute inv(A) from the factorization A = L*D*L'. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 60 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 80 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + A( K, K ) = ONE / REAL( A( K, K ) ) +* +* Compute column K of the inverse. +* + IF( K.LT.N ) THEN + CALL CCOPY( N-K, A( K+1, K ), 1, WORK, 1 ) + CALL CHEMV( UPLO, N-K, -CONE, A( K+1, K+1 ), LDA, WORK, + $ 1, ZERO, A( K+1, K ), 1 ) + A( K, K ) = A( K, K ) - REAL( CDOTC( N-K, WORK, 1, + $ A( K+1, K ), 1 ) ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = ABS( A( K, K-1 ) ) + AK = REAL( A( K-1, K-1 ) ) / T + AKP1 = REAL( A( K, K ) ) / T + AKKP1 = A( K, K-1 ) / T + D = T*( AK*AKP1-ONE ) + A( K-1, K-1 ) = AKP1 / D + A( K, K ) = AK / D + A( K, K-1 ) = -AKKP1 / D +* +* Compute columns K-1 and K of the inverse. +* + IF( K.LT.N ) THEN + CALL CCOPY( N-K, A( K+1, K ), 1, WORK, 1 ) + CALL CHEMV( UPLO, N-K, -CONE, A( K+1, K+1 ), LDA, WORK, + $ 1, ZERO, A( K+1, K ), 1 ) + A( K, K ) = A( K, K ) - REAL( CDOTC( N-K, WORK, 1, + $ A( K+1, K ), 1 ) ) + A( K, K-1 ) = A( K, K-1 ) - + $ CDOTC( N-K, A( K+1, K ), 1, A( K+1, K-1 ), + $ 1 ) + CALL CCOPY( N-K, A( K+1, K-1 ), 1, WORK, 1 ) + CALL CHEMV( UPLO, N-K, -CONE, A( K+1, K+1 ), LDA, WORK, + $ 1, ZERO, A( K+1, K-1 ), 1 ) + A( K-1, K-1 ) = A( K-1, K-1 ) - + $ REAL( CDOTC( N-K, WORK, 1, A( K+1, K-1 ), + $ 1 ) ) + END IF + KSTEP = 2 + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the trailing +* submatrix A(k-1:n,k-1:n) +* + IF( KP.LT.N ) + $ CALL CSWAP( N-KP, A( KP+1, K ), 1, A( KP+1, KP ), 1 ) + DO 70 J = K + 1, KP - 1 + TEMP = CONJG( A( J, K ) ) + A( J, K ) = CONJG( A( KP, J ) ) + A( KP, J ) = TEMP + 70 CONTINUE + A( KP, K ) = CONJG( A( KP, K ) ) + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = A( K, K-1 ) + A( K, K-1 ) = A( KP, K-1 ) + A( KP, K-1 ) = TEMP + END IF + END IF +* + K = K - KSTEP + GO TO 60 + 80 CONTINUE + END IF +* + RETURN +* +* End of CHETRI +* + END diff --git a/costa/native/external/lapack/chetrs.f b/costa/native/external/lapack/chetrs.f new file mode 100644 index 000000000..73a61895d --- /dev/null +++ b/costa/native/external/lapack/chetrs.f @@ -0,0 +1,394 @@ + SUBROUTINE CHETRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* CHETRS solves a system of linear equations A*X = B with a complex +* Hermitian matrix A using the factorization A = U*D*U**H or +* A = L*D*L**H computed by CHETRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the details of the factorization are stored +* as an upper or lower triangular matrix. +* = 'U': Upper triangular, form is A = U*D*U**H; +* = 'L': Lower triangular, form is A = L*D*L**H. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* A (input) COMPLEX array, dimension (LDA,N) +* The block diagonal matrix D and the multipliers used to +* obtain the factor U or L as computed by CHETRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by CHETRF. +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KP + REAL S + COMPLEX AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, CGERU, CLACGV, CSSCAL, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, REAL +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*D*U'. +* +* First solve U*D*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 30 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL CGERU( K-1, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + S = REAL( ONE ) / REAL( A( K, K ) ) + CALL CSSCAL( NRHS, S, B( K, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K-1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K-1 ) + $ CALL CSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in columns K-1 and K of A. +* + CALL CGERU( K-2, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) + CALL CGERU( K-2, NRHS, -ONE, A( 1, K-1 ), 1, B( K-1, 1 ), + $ LDB, B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = A( K-1, K ) + AKM1 = A( K-1, K-1 ) / AKM1K + AK = A( K, K ) / CONJG( AKM1K ) + DENOM = AKM1*AK - ONE + DO 20 J = 1, NRHS + BKM1 = B( K-1, J ) / AKM1K + BK = B( K, J ) / CONJG( AKM1K ) + B( K-1, J ) = ( AK*BKM1-BK ) / DENOM + B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM + 20 CONTINUE + K = K - 2 + END IF +* + GO TO 10 + 30 CONTINUE +* +* Next solve U'*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 40 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(U'(K)), where U(K) is the transformation +* stored in column K of A. +* + IF( K.GT.1 ) THEN + CALL CLACGV( NRHS, B( K, 1 ), LDB ) + CALL CGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B, + $ LDB, A( 1, K ), 1, ONE, B( K, 1 ), LDB ) + CALL CLACGV( NRHS, B( K, 1 ), LDB ) + END IF +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(U'(K+1)), where U(K+1) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.GT.1 ) THEN + CALL CLACGV( NRHS, B( K, 1 ), LDB ) + CALL CGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B, + $ LDB, A( 1, K ), 1, ONE, B( K, 1 ), LDB ) + CALL CLACGV( NRHS, B( K, 1 ), LDB ) +* + CALL CLACGV( NRHS, B( K+1, 1 ), LDB ) + CALL CGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B, + $ LDB, A( 1, K+1 ), 1, ONE, B( K+1, 1 ), LDB ) + CALL CLACGV( NRHS, B( K+1, 1 ), LDB ) + END IF +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K + 2 + END IF +* + GO TO 40 + 50 CONTINUE +* + ELSE +* +* Solve A*X = B, where A = L*D*L'. +* +* First solve L*D*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 60 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 80 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL CGERU( N-K, NRHS, -ONE, A( K+1, K ), 1, B( K, 1 ), + $ LDB, B( K+1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + S = REAL( ONE ) / REAL( A( K, K ) ) + CALL CSSCAL( NRHS, S, B( K, 1 ), LDB ) + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K+1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K+1 ) + $ CALL CSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.LT.N-1 ) THEN + CALL CGERU( N-K-1, NRHS, -ONE, A( K+2, K ), 1, B( K, 1 ), + $ LDB, B( K+2, 1 ), LDB ) + CALL CGERU( N-K-1, NRHS, -ONE, A( K+2, K+1 ), 1, + $ B( K+1, 1 ), LDB, B( K+2, 1 ), LDB ) + END IF +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = A( K+1, K ) + AKM1 = A( K, K ) / CONJG( AKM1K ) + AK = A( K+1, K+1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 70 J = 1, NRHS + BKM1 = B( K, J ) / CONJG( AKM1K ) + BK = B( K+1, J ) / AKM1K + B( K, J ) = ( AK*BKM1-BK ) / DENOM + B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + 70 CONTINUE + K = K + 2 + END IF +* + GO TO 60 + 80 CONTINUE +* +* Next solve L'*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 90 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 100 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(L'(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) THEN + CALL CLACGV( NRHS, B( K, 1 ), LDB ) + CALL CGEMV( 'Conjugate transpose', N-K, NRHS, -ONE, + $ B( K+1, 1 ), LDB, A( K+1, K ), 1, ONE, + $ B( K, 1 ), LDB ) + CALL CLACGV( NRHS, B( K, 1 ), LDB ) + END IF +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(L'(K-1)), where L(K-1) is the transformation +* stored in columns K-1 and K of A. +* + IF( K.LT.N ) THEN + CALL CLACGV( NRHS, B( K, 1 ), LDB ) + CALL CGEMV( 'Conjugate transpose', N-K, NRHS, -ONE, + $ B( K+1, 1 ), LDB, A( K+1, K ), 1, ONE, + $ B( K, 1 ), LDB ) + CALL CLACGV( NRHS, B( K, 1 ), LDB ) +* + CALL CLACGV( NRHS, B( K-1, 1 ), LDB ) + CALL CGEMV( 'Conjugate transpose', N-K, NRHS, -ONE, + $ B( K+1, 1 ), LDB, A( K+1, K-1 ), 1, ONE, + $ B( K-1, 1 ), LDB ) + CALL CLACGV( NRHS, B( K-1, 1 ), LDB ) + END IF +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 2 + END IF +* + GO TO 90 + 100 CONTINUE + END IF +* + RETURN +* +* End of CHETRS +* + END diff --git a/costa/native/external/lapack/chgeqz.f b/costa/native/external/lapack/chgeqz.f new file mode 100644 index 000000000..5a9b4af83 --- /dev/null +++ b/costa/native/external/lapack/chgeqz.f @@ -0,0 +1,734 @@ + SUBROUTINE CHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHA, BETA, Q, LDQ, Z, LDZ, WORK, LWORK, + $ RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER COMPQ, COMPZ, JOB + INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, LWORK, N +* .. +* .. Array Arguments .. + REAL RWORK( * ) + COMPLEX A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), Q( LDQ, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* CHGEQZ implements a single-shift version of the QZ +* method for finding the generalized eigenvalues w(i)=ALPHA(i)/BETA(i) +* of the equation +* +* det( A - w(i) B ) = 0 +* +* If JOB='S', then the pair (A,B) is simultaneously +* reduced to Schur form (i.e., A and B are both upper triangular) by +* applying one unitary tranformation (usually called Q) on the left and +* another (usually called Z) on the right. The diagonal elements of +* A are then ALPHA(1),...,ALPHA(N), and of B are BETA(1),...,BETA(N). +* +* If JOB='S' and COMPQ and COMPZ are 'V' or 'I', then the unitary +* transformations used to reduce (A,B) are accumulated into the arrays +* Q and Z s.t.: +* +* Q(in) A(in) Z(in)* = Q(out) A(out) Z(out)* +* Q(in) B(in) Z(in)* = Q(out) B(out) Z(out)* +* +* Ref: C.B. Moler & G.W. Stewart, "An Algorithm for Generalized Matrix +* Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973), +* pp. 241--256. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* = 'E': compute only ALPHA and BETA. A and B will not +* necessarily be put into generalized Schur form. +* = 'S': put A and B into generalized Schur form, as well +* as computing ALPHA and BETA. +* +* COMPQ (input) CHARACTER*1 +* = 'N': do not modify Q. +* = 'V': multiply the array Q on the right by the conjugate +* transpose of the unitary tranformation that is +* applied to the left side of A and B to reduce them +* to Schur form. +* = 'I': like COMPQ='V', except that Q will be initialized to +* the identity first. +* +* COMPZ (input) CHARACTER*1 +* = 'N': do not modify Z. +* = 'V': multiply the array Z on the right by the unitary +* tranformation that is applied to the right side of +* A and B to reduce them to Schur form. +* = 'I': like COMPZ='V', except that Z will be initialized to +* the identity first. +* +* N (input) INTEGER +* The order of the matrices A, B, Q, and Z. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* It is assumed that A is already upper triangular in rows and +* columns 1:ILO-1 and IHI+1:N. +* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +* +* A (input/output) COMPLEX array, dimension (LDA, N) +* On entry, the N-by-N upper Hessenberg matrix A. Elements +* below the subdiagonal must be zero. +* If JOB='S', then on exit A and B will have been +* simultaneously reduced to upper triangular form. +* If JOB='E', then on exit A will have been destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max( 1, N ). +* +* B (input/output) COMPLEX array, dimension (LDB, N) +* On entry, the N-by-N upper triangular matrix B. Elements +* below the diagonal must be zero. +* If JOB='S', then on exit A and B will have been +* simultaneously reduced to upper triangular form. +* If JOB='E', then on exit B will have been destroyed. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max( 1, N ). +* +* ALPHA (output) COMPLEX array, dimension (N) +* The diagonal elements of A when the pair (A,B) has been +* reduced to Schur form. ALPHA(i)/BETA(i) i=1,...,N +* are the generalized eigenvalues. +* +* BETA (output) COMPLEX array, dimension (N) +* The diagonal elements of B when the pair (A,B) has been +* reduced to Schur form. ALPHA(i)/BETA(i) i=1,...,N +* are the generalized eigenvalues. A and B are normalized +* so that BETA(1),...,BETA(N) are non-negative real numbers. +* +* Q (input/output) COMPLEX array, dimension (LDQ, N) +* If COMPQ='N', then Q will not be referenced. +* If COMPQ='V' or 'I', then the conjugate transpose of the +* unitary transformations which are applied to A and B on +* the left will be applied to the array Q on the right. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= 1. +* If COMPQ='V' or 'I', then LDQ >= N. +* +* Z (input/output) COMPLEX array, dimension (LDZ, N) +* If COMPZ='N', then Z will not be referenced. +* If COMPZ='V' or 'I', then the unitary transformations which +* are applied to A and B on the right will be applied to the +* array Z on the right. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1. +* If COMPZ='V' or 'I', then LDZ >= N. +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO >= 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N). +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* = 1,...,N: the QZ iteration did not converge. (A,B) is not +* in Schur form, but ALPHA(i) and BETA(i), +* i=INFO+1,...,N should be correct. +* = N+1,...,2*N: the shift calculation failed. (A,B) is not +* in Schur form, but ALPHA(i) and BETA(i), +* i=INFO-N+1,...,N should be correct. +* > 2*N: various "impossible" errors. +* +* Further Details +* =============== +* +* We assume that complex ABS works as long as its value is less than +* overflow. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL HALF + PARAMETER ( HALF = 0.5E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ILAZR2, ILAZRO, ILQ, ILSCHR, ILZ, LQUERY + INTEGER ICOMPQ, ICOMPZ, IFIRST, IFRSTM, IITER, ILAST, + $ ILASTM, IN, ISCHUR, ISTART, J, JC, JCH, JITER, + $ JR, MAXIT + REAL ABSB, ANORM, ASCALE, ATOL, BNORM, BSCALE, BTOL, + $ C, SAFMIN, TEMP, TEMP2, TEMPR, ULP + COMPLEX ABI22, AD11, AD12, AD21, AD22, CTEMP, CTEMP2, + $ CTEMP3, ESHIFT, RTDISC, S, SHIFT, SIGNBC, T, + $ U12, X +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANHS, SLAMCH + EXTERNAL LSAME, CLANHS, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CLARTG, CLASET, CROT, CSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, CONJG, MAX, MIN, REAL, SQRT +* .. +* .. Statement Functions .. + REAL ABS1 +* .. +* .. Statement Function definitions .. + ABS1( X ) = ABS( REAL( X ) ) + ABS( AIMAG( X ) ) +* .. +* .. Executable Statements .. +* +* Decode JOB, COMPQ, COMPZ +* + IF( LSAME( JOB, 'E' ) ) THEN + ILSCHR = .FALSE. + ISCHUR = 1 + ELSE IF( LSAME( JOB, 'S' ) ) THEN + ILSCHR = .TRUE. + ISCHUR = 2 + ELSE + ISCHUR = 0 + END IF +* + IF( LSAME( COMPQ, 'N' ) ) THEN + ILQ = .FALSE. + ICOMPQ = 1 + ELSE IF( LSAME( COMPQ, 'V' ) ) THEN + ILQ = .TRUE. + ICOMPQ = 2 + ELSE IF( LSAME( COMPQ, 'I' ) ) THEN + ILQ = .TRUE. + ICOMPQ = 3 + ELSE + ICOMPQ = 0 + END IF +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ILZ = .FALSE. + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ILZ = .TRUE. + ICOMPZ = 2 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ILZ = .TRUE. + ICOMPZ = 3 + ELSE + ICOMPZ = 0 + END IF +* +* Check Argument Values +* + INFO = 0 + WORK( 1 ) = MAX( 1, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( ISCHUR.EQ.0 ) THEN + INFO = -1 + ELSE IF( ICOMPQ.EQ.0 ) THEN + INFO = -2 + ELSE IF( ICOMPZ.EQ.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( ILO.LT.1 ) THEN + INFO = -5 + ELSE IF( IHI.GT.N .OR. IHI.LT.ILO-1 ) THEN + INFO = -6 + ELSE IF( LDA.LT.N ) THEN + INFO = -8 + ELSE IF( LDB.LT.N ) THEN + INFO = -10 + ELSE IF( LDQ.LT.1 .OR. ( ILQ .AND. LDQ.LT.N ) ) THEN + INFO = -14 + ELSE IF( LDZ.LT.1 .OR. ( ILZ .AND. LDZ.LT.N ) ) THEN + INFO = -16 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -18 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHGEQZ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* +c WORK( 1 ) = CMPLX( 1 ) + IF( N.LE.0 ) THEN + WORK( 1 ) = CMPLX( 1 ) + RETURN + END IF +* +* Initialize Q and Z +* + IF( ICOMPQ.EQ.3 ) + $ CALL CLASET( 'Full', N, N, CZERO, CONE, Q, LDQ ) + IF( ICOMPZ.EQ.3 ) + $ CALL CLASET( 'Full', N, N, CZERO, CONE, Z, LDZ ) +* +* Machine Constants +* + IN = IHI + 1 - ILO + SAFMIN = SLAMCH( 'S' ) + ULP = SLAMCH( 'E' )*SLAMCH( 'B' ) + ANORM = CLANHS( 'F', IN, A( ILO, ILO ), LDA, RWORK ) + BNORM = CLANHS( 'F', IN, B( ILO, ILO ), LDB, RWORK ) + ATOL = MAX( SAFMIN, ULP*ANORM ) + BTOL = MAX( SAFMIN, ULP*BNORM ) + ASCALE = ONE / MAX( SAFMIN, ANORM ) + BSCALE = ONE / MAX( SAFMIN, BNORM ) +* +* +* Set Eigenvalues IHI+1:N +* + DO 10 J = IHI + 1, N + ABSB = ABS( B( J, J ) ) + IF( ABSB.GT.SAFMIN ) THEN + SIGNBC = CONJG( B( J, J ) / ABSB ) + B( J, J ) = ABSB + IF( ILSCHR ) THEN + CALL CSCAL( J-1, SIGNBC, B( 1, J ), 1 ) + CALL CSCAL( J, SIGNBC, A( 1, J ), 1 ) + ELSE + A( J, J ) = A( J, J )*SIGNBC + END IF + IF( ILZ ) + $ CALL CSCAL( N, SIGNBC, Z( 1, J ), 1 ) + ELSE + B( J, J ) = CZERO + END IF + ALPHA( J ) = A( J, J ) + BETA( J ) = B( J, J ) + 10 CONTINUE +* +* If IHI < ILO, skip QZ steps +* + IF( IHI.LT.ILO ) + $ GO TO 190 +* +* MAIN QZ ITERATION LOOP +* +* Initialize dynamic indices +* +* Eigenvalues ILAST+1:N have been found. +* Column operations modify rows IFRSTM:whatever +* Row operations modify columns whatever:ILASTM +* +* If only eigenvalues are being computed, then +* IFRSTM is the row of the last splitting row above row ILAST; +* this is always at least ILO. +* IITER counts iterations since the last eigenvalue was found, +* to tell when to use an extraordinary shift. +* MAXIT is the maximum number of QZ sweeps allowed. +* + ILAST = IHI + IF( ILSCHR ) THEN + IFRSTM = 1 + ILASTM = N + ELSE + IFRSTM = ILO + ILASTM = IHI + END IF + IITER = 0 + ESHIFT = CZERO + MAXIT = 30*( IHI-ILO+1 ) +* + DO 170 JITER = 1, MAXIT +* +* Check for too many iterations. +* + IF( JITER.GT.MAXIT ) + $ GO TO 180 +* +* Split the matrix if possible. +* +* Two tests: +* 1: A(j,j-1)=0 or j=ILO +* 2: B(j,j)=0 +* +* Special case: j=ILAST +* + IF( ILAST.EQ.ILO ) THEN + GO TO 60 + ELSE + IF( ABS1( A( ILAST, ILAST-1 ) ).LE.ATOL ) THEN + A( ILAST, ILAST-1 ) = CZERO + GO TO 60 + END IF + END IF +* + IF( ABS( B( ILAST, ILAST ) ).LE.BTOL ) THEN + B( ILAST, ILAST ) = CZERO + GO TO 50 + END IF +* +* General case: j= 0. +* +* AP (input) COMPLEX array, dimension (N*(N+1)/2) +* The block diagonal matrix D and the multipliers used to +* obtain the factor U or L as computed by CHPTRF, stored as a +* packed triangular matrix. +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by CHPTRF. +* +* ANORM (input) REAL +* The 1-norm of the original matrix A. +* +* RCOND (output) REAL +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +* estimate of the 1-norm of inv(A) computed in this routine. +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IP, KASE + REAL AINVNM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CHPTRS, CLACON, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHPCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.LE.ZERO ) THEN + RETURN + END IF +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + IP = N*( N+1 ) / 2 + DO 10 I = N, 1, -1 + IF( IPIV( I ).GT.0 .AND. AP( IP ).EQ.ZERO ) + $ RETURN + IP = IP - I + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + IP = 1 + DO 20 I = 1, N + IF( IPIV( I ).GT.0 .AND. AP( IP ).EQ.ZERO ) + $ RETURN + IP = IP + N - I + 1 + 20 CONTINUE + END IF +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + 30 CONTINUE + CALL CLACON( N, WORK( N+1 ), WORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN +* +* Multiply by inv(L*D*L') or inv(U*D*U'). +* + CALL CHPTRS( UPLO, N, 1, AP, IPIV, WORK, N, INFO ) + GO TO 30 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of CHPCON +* + END diff --git a/costa/native/external/lapack/chpev.f b/costa/native/external/lapack/chpev.f new file mode 100644 index 000000000..f118d7bc2 --- /dev/null +++ b/costa/native/external/lapack/chpev.f @@ -0,0 +1,197 @@ + SUBROUTINE CHPEV( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, RWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. + REAL RWORK( * ), W( * ) + COMPLEX AP( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* CHPEV computes all the eigenvalues and, optionally, eigenvectors of a +* complex Hermitian matrix in packed storage. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) COMPLEX array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the Hermitian matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* +* On exit, AP is overwritten by values generated during the +* reduction to tridiagonal form. If UPLO = 'U', the diagonal +* and first superdiagonal of the tridiagonal matrix T overwrite +* the corresponding elements of A, and if UPLO = 'L', the +* diagonal and first subdiagonal of T overwrite the +* corresponding elements of A. +* +* W (output) REAL array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* Z (output) COMPLEX array, dimension (LDZ, N) +* If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +* eigenvectors of the matrix A, with the i-th column of Z +* holding the eigenvector associated with W(i). +* If JOBZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace) COMPLEX array, dimension (max(1, 2*N-1)) +* +* RWORK (workspace) REAL array, dimension (max(1, 3*N-2)) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, the algorithm failed to converge; i +* off-diagonal elements of an intermediate tridiagonal +* form did not converge to zero. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL WANTZ + INTEGER IINFO, IMAX, INDE, INDRWK, INDTAU, INDWRK, + $ ISCALE + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANHP, SLAMCH + EXTERNAL LSAME, CLANHP, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CHPTRD, CSSCAL, CSTEQR, CUPGTR, SSCAL, SSTERF, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LSAME( UPLO, 'L' ) .OR. LSAME( UPLO, 'U' ) ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -7 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHPEV ', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = AP( 1 ) + RWORK( 1 ) = 1 + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = CLANHP( 'M', UPLO, N, AP, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + CALL CSSCAL( ( N*( N+1 ) ) / 2, SIGMA, AP, 1 ) + END IF +* +* Call CHPTRD to reduce Hermitian packed matrix to tridiagonal form. +* + INDE = 1 + INDTAU = 1 + CALL CHPTRD( UPLO, N, AP, W, RWORK( INDE ), WORK( INDTAU ), + $ IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, first call +* CUPGTR to generate the orthogonal matrix, then call CSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + INDWRK = INDTAU + N + CALL CUPGTR( UPLO, N, AP, WORK( INDTAU ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) + INDRWK = INDE + N + CALL CSTEQR( JOBZ, N, W, RWORK( INDE ), Z, LDZ, + $ RWORK( INDRWK ), INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* + RETURN +* +* End of CHPEV +* + END diff --git a/costa/native/external/lapack/chpevd.f b/costa/native/external/lapack/chpevd.f new file mode 100644 index 000000000..6783119d6 --- /dev/null +++ b/costa/native/external/lapack/chpevd.f @@ -0,0 +1,280 @@ + SUBROUTINE CHPEVD( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, LWORK, + $ RWORK, LRWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDZ, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL RWORK( * ), W( * ) + COMPLEX AP( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* CHPEVD computes all the eigenvalues and, optionally, eigenvectors of +* a complex Hermitian matrix A in packed storage. If eigenvectors are +* desired, it uses a divide and conquer algorithm. +* +* The divide and conquer algorithm makes very mild assumptions about +* floating point arithmetic. It will work on machines with a guard +* digit in add/subtract, or on those binary machines without guard +* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +* Cray-2. It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) COMPLEX array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the Hermitian matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* +* On exit, AP is overwritten by values generated during the +* reduction to tridiagonal form. If UPLO = 'U', the diagonal +* and first superdiagonal of the tridiagonal matrix T overwrite +* the corresponding elements of A, and if UPLO = 'L', the +* diagonal and first subdiagonal of T overwrite the +* corresponding elements of A. +* +* W (output) REAL array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* Z (output) COMPLEX array, dimension (LDZ, N) +* If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +* eigenvectors of the matrix A, with the i-th column of Z +* holding the eigenvector associated with W(i). +* If JOBZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of array WORK. +* If N <= 1, LWORK must be at least 1. +* If JOBZ = 'N' and N > 1, LWORK must be at least N. +* If JOBZ = 'V' and N > 1, LWORK must be at least 2*N. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace/output) REAL array, +* dimension (LRWORK) +* On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. +* +* LRWORK (input) INTEGER +* The dimension of array RWORK. +* If N <= 1, LRWORK must be at least 1. +* If JOBZ = 'N' and N > 1, LRWORK must be at least N. +* If JOBZ = 'V' and N > 1, LRWORK must be at least +* 1 + 5*N + 2*N**2. +* +* If LRWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the RWORK array, +* returns this value as the first entry of the RWORK array, and +* no error message related to LRWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of array IWORK. +* If JOBZ = 'N' or N <= 1, LIWORK must be at least 1. +* If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, the algorithm failed to converge; i +* off-diagonal elements of an intermediate tridiagonal +* form did not converge to zero. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WANTZ + INTEGER IINFO, IMAX, INDE, INDRWK, INDTAU, INDWRK, + $ ISCALE, LIWMIN, LLRWK, LLWRK, LRWMIN, LWMIN + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANHP, SLAMCH + EXTERNAL LSAME, CLANHP, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CHPTRD, CSSCAL, CSTEDC, CUPMTR, SSCAL, SSTERF, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LWMIN = 1 + LIWMIN = 1 + LRWMIN = 1 + ELSE + IF( WANTZ ) THEN + LWMIN = 2*N + LRWMIN = 1 + 5*N + 2*N**2 + LIWMIN = 3 + 5*N + ELSE + LWMIN = N + LRWMIN = N + LIWMIN = 1 + END IF + END IF + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LSAME( UPLO, 'L' ) .OR. LSAME( UPLO, 'U' ) ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -7 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -9 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHPEVD', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = AP( 1 ) + IF( WANTZ ) + $ Z( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = CLANHP( 'M', UPLO, N, AP, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + CALL CSSCAL( ( N*( N+1 ) ) / 2, SIGMA, AP, 1 ) + END IF +* +* Call CHPTRD to reduce Hermitian packed matrix to tridiagonal form. +* + INDE = 1 + INDTAU = 1 + INDRWK = INDE + N + INDWRK = INDTAU + N + LLWRK = LWORK - INDWRK + 1 + LLRWK = LRWORK - INDRWK + 1 + CALL CHPTRD( UPLO, N, AP, W, RWORK( INDE ), WORK( INDTAU ), + $ IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, first call +* CUPGTR to generate the orthogonal matrix, then call CSTEDC. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + CALL CSTEDC( 'I', N, W, RWORK( INDE ), Z, LDZ, WORK( INDWRK ), + $ LLWRK, RWORK( INDRWK ), LLRWK, IWORK, LIWORK, + $ INFO ) + CALL CUPMTR( 'L', UPLO, 'N', N, N, AP, WORK( INDTAU ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of CHPEVD +* + END diff --git a/costa/native/external/lapack/chpevx.f b/costa/native/external/lapack/chpevx.f new file mode 100644 index 000000000..3e7694904 --- /dev/null +++ b/costa/native/external/lapack/chpevx.f @@ -0,0 +1,384 @@ + SUBROUTINE CHPEVX( JOBZ, RANGE, UPLO, N, AP, VL, VU, IL, IU, + $ ABSTOL, M, W, Z, LDZ, WORK, RWORK, IWORK, + $ IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDZ, M, N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + REAL RWORK( * ), W( * ) + COMPLEX AP( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* CHPEVX computes selected eigenvalues and, optionally, eigenvectors +* of a complex Hermitian matrix A in packed storage. +* Eigenvalues/vectors can be selected by specifying either a range of +* values or a range of indices for the desired eigenvalues. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* RANGE (input) CHARACTER*1 +* = 'A': all eigenvalues will be found; +* = 'V': all eigenvalues in the half-open interval (VL,VU] +* will be found; +* = 'I': the IL-th through IU-th eigenvalues will be found. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) COMPLEX array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the Hermitian matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* +* On exit, AP is overwritten by values generated during the +* reduction to tridiagonal form. If UPLO = 'U', the diagonal +* and first superdiagonal of the tridiagonal matrix T overwrite +* the corresponding elements of A, and if UPLO = 'L', the +* diagonal and first subdiagonal of T overwrite the +* corresponding elements of A. +* +* VL (input) REAL +* VU (input) REAL +* If RANGE='V', the lower and upper bounds of the interval to +* be searched for eigenvalues. VL < VU. +* Not referenced if RANGE = 'A' or 'I'. +* +* IL (input) INTEGER +* IU (input) INTEGER +* If RANGE='I', the indices (in ascending order) of the +* smallest and largest eigenvalues to be returned. +* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +* Not referenced if RANGE = 'A' or 'V'. +* +* ABSTOL (input) REAL +* The absolute error tolerance for the eigenvalues. +* An approximate eigenvalue is accepted as converged +* when it is determined to lie in an interval [a,b] +* of width less than or equal to +* +* ABSTOL + EPS * max( |a|,|b| ) , +* +* where EPS is the machine precision. If ABSTOL is less than +* or equal to zero, then EPS*|T| will be used in its place, +* where |T| is the 1-norm of the tridiagonal matrix obtained +* by reducing AP to tridiagonal form. +* +* Eigenvalues will be computed most accurately when ABSTOL is +* set to twice the underflow threshold 2*SLAMCH('S'), not zero. +* If this routine returns with INFO>0, indicating that some +* eigenvectors did not converge, try setting ABSTOL to +* 2*SLAMCH('S'). +* +* See "Computing Small Singular Values of Bidiagonal Matrices +* with Guaranteed High Relative Accuracy," by Demmel and +* Kahan, LAPACK Working Note #3. +* +* M (output) INTEGER +* The total number of eigenvalues found. 0 <= M <= N. +* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +* +* W (output) REAL array, dimension (N) +* If INFO = 0, the selected eigenvalues in ascending order. +* +* Z (output) COMPLEX array, dimension (LDZ, max(1,M)) +* If JOBZ = 'V', then if INFO = 0, the first M columns of Z +* contain the orthonormal eigenvectors of the matrix A +* corresponding to the selected eigenvalues, with the i-th +* column of Z holding the eigenvector associated with W(i). +* If an eigenvector fails to converge, then that column of Z +* contains the latest approximation to the eigenvector, and +* the index of the eigenvector is returned in IFAIL. +* If JOBZ = 'N', then Z is not referenced. +* Note: the user must ensure that at least max(1,M) columns are +* supplied in the array Z; if RANGE = 'V', the exact value of M +* is not known in advance and an upper bound must be used. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* RWORK (workspace) REAL array, dimension (7*N) +* +* IWORK (workspace) INTEGER array, dimension (5*N) +* +* IFAIL (output) INTEGER array, dimension (N) +* If JOBZ = 'V', then if INFO = 0, the first M elements of +* IFAIL are zero. If INFO > 0, then IFAIL contains the +* indices of the eigenvectors that failed to converge. +* If JOBZ = 'N', then IFAIL is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, then i eigenvectors failed to converge. +* Their indices are stored in array IFAIL. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, VALEIG, WANTZ + CHARACTER ORDER + INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, + $ INDISP, INDIWK, INDRWK, INDTAU, INDWRK, ISCALE, + $ ITMP1, J, JJ, NSPLIT + REAL ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANHP, SLAMCH + EXTERNAL LSAME, CLANHP, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CHPTRD, CSSCAL, CSTEIN, CSTEQR, CSWAP, CUPGTR, + $ CUPMTR, SCOPY, SSCAL, SSTEBZ, SSTERF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LSAME( UPLO, 'L' ) .OR. LSAME( UPLO, 'U' ) ) ) + $ THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -7 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -9 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) + $ INFO = -14 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHPEVX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = AP( 1 ) + ELSE + IF( VL.LT.REAL( AP( 1 ) ) .AND. VU.GE.REAL( AP( 1 ) ) ) THEN + M = 1 + W( 1 ) = AP( 1 ) + END IF + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF ( VALEIG ) THEN + VLL = VL + VUU = VU + ELSE + VLL = ZERO + VUU = ZERO + ENDIF + ANRM = CLANHP( 'M', UPLO, N, AP, RWORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + CALL CSSCAL( ( N*( N+1 ) ) / 2, SIGMA, AP, 1 ) + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call CHPTRD to reduce Hermitian packed matrix to tridiagonal form. +* + INDD = 1 + INDE = INDD + N + INDRWK = INDE + N + INDTAU = 1 + INDWRK = INDTAU + N + CALL CHPTRD( UPLO, N, AP, RWORK( INDD ), RWORK( INDE ), + $ WORK( INDTAU ), IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal +* to zero, then call SSTERF or CUPGTR and CSTEQR. If this fails +* for some eigenvalue, then try SSTEBZ. +* + IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND. + $ ( ABSTOL.LE.ZERO ) ) THEN + CALL SCOPY( N, RWORK( INDD ), 1, W, 1 ) + INDEE = INDRWK + 2*N + IF( .NOT.WANTZ ) THEN + CALL SCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + CALL SSTERF( N, W, RWORK( INDEE ), INFO ) + ELSE + CALL CUPGTR( UPLO, N, AP, WORK( INDTAU ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) + CALL SCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + CALL CSTEQR( JOBZ, N, W, RWORK( INDEE ), Z, LDZ, + $ RWORK( INDRWK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 10 I = 1, N + IFAIL( I ) = 0 + 10 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 20 + END IF + INFO = 0 + END IF +* +* Otherwise, call SSTEBZ and, if eigenvectors are desired, CSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWK = INDISP + N + CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ RWORK( INDD ), RWORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ), + $ IWORK( INDIWK ), INFO ) +* + IF( WANTZ ) THEN + CALL CSTEIN( N, RWORK( INDD ), RWORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ RWORK( INDRWK ), IWORK( INDIWK ), IFAIL, INFO ) +* +* Apply unitary matrix used in reduction to tridiagonal +* form to eigenvectors returned by CSTEIN. +* + INDWRK = INDTAU + N + CALL CUPMTR( 'L', UPLO, 'N', N, M, AP, WORK( INDTAU ), Z, LDZ, + $ WORK( INDWRK ), INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 20 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 40 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 30 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 30 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL CSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 40 CONTINUE + END IF +* + RETURN +* +* End of CHPEVX +* + END diff --git a/costa/native/external/lapack/chpgst.f b/costa/native/external/lapack/chpgst.f new file mode 100644 index 000000000..64a158ae5 --- /dev/null +++ b/costa/native/external/lapack/chpgst.f @@ -0,0 +1,216 @@ + SUBROUTINE CHPGST( ITYPE, UPLO, N, AP, BP, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, ITYPE, N +* .. +* .. Array Arguments .. + COMPLEX AP( * ), BP( * ) +* .. +* +* Purpose +* ======= +* +* CHPGST reduces a complex Hermitian-definite generalized +* eigenproblem to standard form, using packed storage. +* +* If ITYPE = 1, the problem is A*x = lambda*B*x, +* and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H) +* +* If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or +* B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L. +* +* B must have been previously factorized as U**H*U or L*L**H by CPPTRF. +* +* Arguments +* ========= +* +* ITYPE (input) INTEGER +* = 1: compute inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H); +* = 2 or 3: compute U*A*U**H or L**H*A*L. +* +* UPLO (input) CHARACTER +* = 'U': Upper triangle of A is stored and B is factored as +* U**H*U; +* = 'L': Lower triangle of A is stored and B is factored as +* L*L**H. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* AP (input/output) COMPLEX array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the Hermitian matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* +* On exit, if INFO = 0, the transformed matrix, stored in the +* same format as A. +* +* BP (input) COMPLEX array, dimension (N*(N+1)/2) +* The triangular factor from the Cholesky factorization of B, +* stored in the same format as A, as returned by CPPTRF. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, HALF + PARAMETER ( ONE = 1.0E+0, HALF = 0.5E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, J1, J1J1, JJ, K, K1, K1K1, KK + REAL AJJ, AKK, BJJ, BKK + COMPLEX CT +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CHPMV, CHPR2, CSSCAL, CTPMV, CTPSV, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX CDOTC + EXTERNAL LSAME, CDOTC +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHPGST', -INFO ) + RETURN + END IF +* + IF( ITYPE.EQ.1 ) THEN + IF( UPPER ) THEN +* +* Compute inv(U')*A*inv(U) +* +* J1 and JJ are the indices of A(1,j) and A(j,j) +* + JJ = 0 + DO 10 J = 1, N + J1 = JJ + 1 + JJ = JJ + J +* +* Compute the j-th column of the upper triangle of A +* + AP( JJ ) = REAL( AP( JJ ) ) + BJJ = BP( JJ ) + CALL CTPSV( UPLO, 'Conjugate transpose', 'Non-unit', J, + $ BP, AP( J1 ), 1 ) + CALL CHPMV( UPLO, J-1, -CONE, AP, BP( J1 ), 1, CONE, + $ AP( J1 ), 1 ) + CALL CSSCAL( J-1, ONE / BJJ, AP( J1 ), 1 ) + AP( JJ ) = ( AP( JJ )-CDOTC( J-1, AP( J1 ), 1, BP( J1 ), + $ 1 ) ) / BJJ + 10 CONTINUE + ELSE +* +* Compute inv(L)*A*inv(L') +* +* KK and K1K1 are the indices of A(k,k) and A(k+1,k+1) +* + KK = 1 + DO 20 K = 1, N + K1K1 = KK + N - K + 1 +* +* Update the lower triangle of A(k:n,k:n) +* + AKK = AP( KK ) + BKK = BP( KK ) + AKK = AKK / BKK**2 + AP( KK ) = AKK + IF( K.LT.N ) THEN + CALL CSSCAL( N-K, ONE / BKK, AP( KK+1 ), 1 ) + CT = -HALF*AKK + CALL CAXPY( N-K, CT, BP( KK+1 ), 1, AP( KK+1 ), 1 ) + CALL CHPR2( UPLO, N-K, -CONE, AP( KK+1 ), 1, + $ BP( KK+1 ), 1, AP( K1K1 ) ) + CALL CAXPY( N-K, CT, BP( KK+1 ), 1, AP( KK+1 ), 1 ) + CALL CTPSV( UPLO, 'No transpose', 'Non-unit', N-K, + $ BP( K1K1 ), AP( KK+1 ), 1 ) + END IF + KK = K1K1 + 20 CONTINUE + END IF + ELSE + IF( UPPER ) THEN +* +* Compute U*A*U' +* +* K1 and KK are the indices of A(1,k) and A(k,k) +* + KK = 0 + DO 30 K = 1, N + K1 = KK + 1 + KK = KK + K +* +* Update the upper triangle of A(1:k,1:k) +* + AKK = AP( KK ) + BKK = BP( KK ) + CALL CTPMV( UPLO, 'No transpose', 'Non-unit', K-1, BP, + $ AP( K1 ), 1 ) + CT = HALF*AKK + CALL CAXPY( K-1, CT, BP( K1 ), 1, AP( K1 ), 1 ) + CALL CHPR2( UPLO, K-1, CONE, AP( K1 ), 1, BP( K1 ), 1, + $ AP ) + CALL CAXPY( K-1, CT, BP( K1 ), 1, AP( K1 ), 1 ) + CALL CSSCAL( K-1, BKK, AP( K1 ), 1 ) + AP( KK ) = AKK*BKK**2 + 30 CONTINUE + ELSE +* +* Compute L'*A*L +* +* JJ and J1J1 are the indices of A(j,j) and A(j+1,j+1) +* + JJ = 1 + DO 40 J = 1, N + J1J1 = JJ + N - J + 1 +* +* Compute the j-th column of the lower triangle of A +* + AJJ = AP( JJ ) + BJJ = BP( JJ ) + AP( JJ ) = AJJ*BJJ + CDOTC( N-J, AP( JJ+1 ), 1, + $ BP( JJ+1 ), 1 ) + CALL CSSCAL( N-J, BJJ, AP( JJ+1 ), 1 ) + CALL CHPMV( UPLO, N-J, CONE, AP( J1J1 ), BP( JJ+1 ), 1, + $ CONE, AP( JJ+1 ), 1 ) + CALL CTPMV( UPLO, 'Conjugate transpose', 'Non-unit', + $ N-J+1, BP( JJ ), AP( JJ ), 1 ) + JJ = J1J1 + 40 CONTINUE + END IF + END IF + RETURN +* +* End of CHPGST +* + END diff --git a/costa/native/external/lapack/chpgv.f b/costa/native/external/lapack/chpgv.f new file mode 100644 index 000000000..b3de68ef8 --- /dev/null +++ b/costa/native/external/lapack/chpgv.f @@ -0,0 +1,197 @@ + SUBROUTINE CHPGV( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK, + $ RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDZ, N +* .. +* .. Array Arguments .. + REAL RWORK( * ), W( * ) + COMPLEX AP( * ), BP( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* CHPGV computes all the eigenvalues and, optionally, the eigenvectors +* of a complex generalized Hermitian-definite eigenproblem, of the form +* A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. +* Here A and B are assumed to be Hermitian, stored in packed format, +* and B is also positive definite. +* +* Arguments +* ========= +* +* ITYPE (input) INTEGER +* Specifies the problem type to be solved: +* = 1: A*x = (lambda)*B*x +* = 2: A*B*x = (lambda)*x +* = 3: B*A*x = (lambda)*x +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangles of A and B are stored; +* = 'L': Lower triangles of A and B are stored. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* AP (input/output) COMPLEX array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the Hermitian matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* +* On exit, the contents of AP are destroyed. +* +* BP (input/output) COMPLEX array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the Hermitian matrix +* B, packed columnwise in a linear array. The j-th column of B +* is stored in the array BP as follows: +* if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j; +* if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n. +* +* On exit, the triangular factor U or L from the Cholesky +* factorization B = U**H*U or B = L*L**H, in the same storage +* format as B. +* +* W (output) REAL array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* Z (output) COMPLEX array, dimension (LDZ, N) +* If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of +* eigenvectors. The eigenvectors are normalized as follows: +* if ITYPE = 1 or 2, Z**H*B*Z = I; +* if ITYPE = 3, Z**H*inv(B)*Z = I. +* If JOBZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace) COMPLEX array, dimension (max(1, 2*N-1)) +* +* RWORK (workspace) REAL array, dimension (max(1, 3*N-2)) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: CPPTRF or CHPEV returned an error code: +* <= N: if INFO = i, CHPEV failed to converge; +* i off-diagonal elements of an intermediate +* tridiagonal form did not convergeto zero; +* > N: if INFO = N + i, for 1 <= i <= n, then the leading +* minor of order i of B is not positive definite. +* The factorization of B could not be completed and +* no eigenvalues or eigenvectors were computed. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER, WANTZ + CHARACTER TRANS + INTEGER J, NEIG +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CHPEV, CHPGST, CPPTRF, CTPMV, CTPSV, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) +* + INFO = 0 + IF( ITYPE.LT.0 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHPGV ', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL CPPTRF( UPLO, N, BP, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL CHPGST( ITYPE, UPLO, N, AP, BP, INFO ) + CALL CHPEV( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, RWORK, INFO ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + NEIG = N + IF( INFO.GT.0 ) + $ NEIG = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)'*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'C' + END IF +* + DO 10 J = 1, NEIG + CALL CTPSV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 10 CONTINUE +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U'*y +* + IF( UPPER ) THEN + TRANS = 'C' + ELSE + TRANS = 'N' + END IF +* + DO 20 J = 1, NEIG + CALL CTPMV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 20 CONTINUE + END IF + END IF + RETURN +* +* End of CHPGV +* + END diff --git a/costa/native/external/lapack/chpgvd.f b/costa/native/external/lapack/chpgvd.f new file mode 100644 index 000000000..a2dcad50a --- /dev/null +++ b/costa/native/external/lapack/chpgvd.f @@ -0,0 +1,291 @@ + SUBROUTINE CHPGVD( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK, + $ LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDZ, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL RWORK( * ), W( * ) + COMPLEX AP( * ), BP( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* CHPGVD computes all the eigenvalues and, optionally, the eigenvectors +* of a complex generalized Hermitian-definite eigenproblem, of the form +* A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and +* B are assumed to be Hermitian, stored in packed format, and B is also +* positive definite. +* If eigenvectors are desired, it uses a divide and conquer algorithm. +* +* The divide and conquer algorithm makes very mild assumptions about +* floating point arithmetic. It will work on machines with a guard +* digit in add/subtract, or on those binary machines without guard +* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +* Cray-2. It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* ITYPE (input) INTEGER +* Specifies the problem type to be solved: +* = 1: A*x = (lambda)*B*x +* = 2: A*B*x = (lambda)*x +* = 3: B*A*x = (lambda)*x +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangles of A and B are stored; +* = 'L': Lower triangles of A and B are stored. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* AP (input/output) COMPLEX array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the Hermitian matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* +* On exit, the contents of AP are destroyed. +* +* BP (input/output) COMPLEX array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the Hermitian matrix +* B, packed columnwise in a linear array. The j-th column of B +* is stored in the array BP as follows: +* if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j; +* if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n. +* +* On exit, the triangular factor U or L from the Cholesky +* factorization B = U**H*U or B = L*L**H, in the same storage +* format as B. +* +* W (output) REAL array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* Z (output) COMPLEX array, dimension (LDZ, N) +* If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of +* eigenvectors. The eigenvectors are normalized as follows: +* if ITYPE = 1 or 2, Z**H*B*Z = I; +* if ITYPE = 3, Z**H*inv(B)*Z = I. +* If JOBZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of array WORK. +* If N <= 1, LWORK >= 1. +* If JOBZ = 'N' and N > 1, LWORK >= N. +* If JOBZ = 'V' and N > 1, LWORK >= 2*N. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) REAL array, dimension (LRWORK) +* On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. +* +* LRWORK (input) INTEGER +* The dimension of array RWORK. +* If N <= 1, LRWORK >= 1. +* If JOBZ = 'N' and N > 1, LRWORK >= N. +* If JOBZ = 'V' and N > 1, LRWORK >= 1 + 5*N + 2*N**2. +* +* If LRWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the RWORK array, +* returns this value as the first entry of the RWORK array, and +* no error message related to LRWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of array IWORK. +* If JOBZ = 'N' or N <= 1, LIWORK >= 1. +* If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: CPPTRF or CHPEVD returned an error code: +* <= N: if INFO = i, CHPEVD failed to converge; +* i off-diagonal elements of an intermediate +* tridiagonal form did not convergeto zero; +* > N: if INFO = N + i, for 1 <= i <= n, then the leading +* minor of order i of B is not positive definite. +* The factorization of B could not be completed and +* no eigenvalues or eigenvectors were computed. +* +* Further Details +* =============== +* +* Based on contributions by +* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER TRANS + INTEGER J, LIWMIN, LRWMIN, LWMIN, NEIG +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CHPEVD, CHPGST, CPPTRF, CTPMV, CTPSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LWMIN = 1 + LIWMIN = 1 + LRWMIN = 1 + ELSE + IF( WANTZ ) THEN + LWMIN = 2*N + LRWMIN = 1 + 5*N + 2*N**2 + LIWMIN = 3 + 5*N + ELSE + LWMIN = N + LRWMIN = N + LIWMIN = 1 + END IF + END IF + IF( ITYPE.LT.0 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -15 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHPGVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL CPPTRF( UPLO, N, BP, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL CHPGST( ITYPE, UPLO, N, AP, BP, INFO ) + CALL CHPEVD( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, LWORK, RWORK, + $ LRWORK, IWORK, LIWORK, INFO ) + LWMIN = MAX( REAL( LWMIN ), REAL( WORK( 1 ) ) ) + LRWMIN = MAX( REAL( LRWMIN ), REAL( RWORK( 1 ) ) ) + LIWMIN = MAX( REAL( LIWMIN ), REAL( IWORK( 1 ) ) ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + NEIG = N + IF( INFO.GT.0 ) + $ NEIG = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)'*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'C' + END IF +* + DO 10 J = 1, NEIG + CALL CTPSV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 10 CONTINUE +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U'*y +* + IF( UPPER ) THEN + TRANS = 'C' + ELSE + TRANS = 'N' + END IF +* + DO 20 J = 1, NEIG + CALL CTPMV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 20 CONTINUE + END IF + END IF +* + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of CHPGVD +* + END diff --git a/costa/native/external/lapack/chpgvx.f b/costa/native/external/lapack/chpgvx.f new file mode 100644 index 000000000..8b0b97f9b --- /dev/null +++ b/costa/native/external/lapack/chpgvx.f @@ -0,0 +1,284 @@ + SUBROUTINE CHPGVX( ITYPE, JOBZ, RANGE, UPLO, N, AP, BP, VL, VU, + $ IL, IU, ABSTOL, M, W, Z, LDZ, WORK, RWORK, + $ IWORK, IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, ITYPE, IU, LDZ, M, N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + REAL RWORK( * ), W( * ) + COMPLEX AP( * ), BP( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* CHPGVX computes selected eigenvalues and, optionally, eigenvectors +* of a complex generalized Hermitian-definite eigenproblem, of the form +* A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and +* B are assumed to be Hermitian, stored in packed format, and B is also +* positive definite. Eigenvalues and eigenvectors can be selected by +* specifying either a range of values or a range of indices for the +* desired eigenvalues. +* +* Arguments +* ========= +* +* ITYPE (input) INTEGER +* Specifies the problem type to be solved: +* = 1: A*x = (lambda)*B*x +* = 2: A*B*x = (lambda)*x +* = 3: B*A*x = (lambda)*x +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* RANGE (input) CHARACTER*1 +* = 'A': all eigenvalues will be found; +* = 'V': all eigenvalues in the half-open interval (VL,VU] +* will be found; +* = 'I': the IL-th through IU-th eigenvalues will be found. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangles of A and B are stored; +* = 'L': Lower triangles of A and B are stored. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* AP (input/output) COMPLEX array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the Hermitian matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* +* On exit, the contents of AP are destroyed. +* +* BP (input/output) COMPLEX array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the Hermitian matrix +* B, packed columnwise in a linear array. The j-th column of B +* is stored in the array BP as follows: +* if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j; +* if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n. +* +* On exit, the triangular factor U or L from the Cholesky +* factorization B = U**H*U or B = L*L**H, in the same storage +* format as B. +* +* VL (input) REAL +* VU (input) REAL +* If RANGE='V', the lower and upper bounds of the interval to +* be searched for eigenvalues. VL < VU. +* Not referenced if RANGE = 'A' or 'I'. +* +* IL (input) INTEGER +* IU (input) INTEGER +* If RANGE='I', the indices (in ascending order) of the +* smallest and largest eigenvalues to be returned. +* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +* Not referenced if RANGE = 'A' or 'V'. +* +* ABSTOL (input) REAL +* The absolute error tolerance for the eigenvalues. +* An approximate eigenvalue is accepted as converged +* when it is determined to lie in an interval [a,b] +* of width less than or equal to +* +* ABSTOL + EPS * max( |a|,|b| ) , +* +* where EPS is the machine precision. If ABSTOL is less than +* or equal to zero, then EPS*|T| will be used in its place, +* where |T| is the 1-norm of the tridiagonal matrix obtained +* by reducing AP to tridiagonal form. +* +* Eigenvalues will be computed most accurately when ABSTOL is +* set to twice the underflow threshold 2*SLAMCH('S'), not zero. +* If this routine returns with INFO>0, indicating that some +* eigenvectors did not converge, try setting ABSTOL to +* 2*SLAMCH('S'). +* +* M (output) INTEGER +* The total number of eigenvalues found. 0 <= M <= N. +* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +* +* W (output) REAL array, dimension (N) +* On normal exit, the first M elements contain the selected +* eigenvalues in ascending order. +* +* Z (output) COMPLEX array, dimension (LDZ, N) +* If JOBZ = 'N', then Z is not referenced. +* If JOBZ = 'V', then if INFO = 0, the first M columns of Z +* contain the orthonormal eigenvectors of the matrix A +* corresponding to the selected eigenvalues, with the i-th +* column of Z holding the eigenvector associated with W(i). +* The eigenvectors are normalized as follows: +* if ITYPE = 1 or 2, Z**H*B*Z = I; +* if ITYPE = 3, Z**H*inv(B)*Z = I. +* +* If an eigenvector fails to converge, then that column of Z +* contains the latest approximation to the eigenvector, and the +* index of the eigenvector is returned in IFAIL. +* Note: the user must ensure that at least max(1,M) columns are +* supplied in the array Z; if RANGE = 'V', the exact value of M +* is not known in advance and an upper bound must be used. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* RWORK (workspace) REAL array, dimension (7*N) +* +* IWORK (workspace) INTEGER array, dimension (5*N) +* +* IFAIL (output) INTEGER array, dimension (N) +* If JOBZ = 'V', then if INFO = 0, the first M elements of +* IFAIL are zero. If INFO > 0, then IFAIL contains the +* indices of the eigenvectors that failed to converge. +* If JOBZ = 'N', then IFAIL is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: CPPTRF or CHPEVX returned an error code: +* <= N: if INFO = i, CHPEVX failed to converge; +* i eigenvectors failed to converge. Their indices +* are stored in array IFAIL. +* > N: if INFO = N + i, for 1 <= i <= n, then the leading +* minor of order i of B is not positive definite. +* The factorization of B could not be completed and +* no eigenvalues or eigenvectors were computed. +* +* Further Details +* =============== +* +* Based on contributions by +* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, UPPER, VALEIG, WANTZ + CHARACTER TRANS + INTEGER J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CHPEVX, CHPGST, CPPTRF, CTPMV, CTPSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + INFO = 0 + IF( ITYPE.LT.0 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -3 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN + INFO = -9 + ELSE IF( INDEIG .AND. IL.LT.1 ) THEN + INFO = -10 + ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) THEN + INFO = -11 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -16 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHPGVX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL CPPTRF( UPLO, N, BP, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL CHPGST( ITYPE, UPLO, N, AP, BP, INFO ) + CALL CHPEVX( JOBZ, RANGE, UPLO, N, AP, VL, VU, IL, IU, ABSTOL, M, + $ W, Z, LDZ, WORK, RWORK, IWORK, IFAIL, INFO ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + IF( INFO.GT.0 ) + $ M = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)'*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'C' + END IF +* + DO 10 J = 1, M + CALL CTPSV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 10 CONTINUE +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U'*y +* + IF( UPPER ) THEN + TRANS = 'C' + ELSE + TRANS = 'N' + END IF +* + DO 20 J = 1, M + CALL CTPMV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 20 CONTINUE + END IF + END IF +* + RETURN +* +* End of CHPGVX +* + END diff --git a/costa/native/external/lapack/chprfs.f b/costa/native/external/lapack/chprfs.f new file mode 100644 index 000000000..c24e6a8cd --- /dev/null +++ b/costa/native/external/lapack/chprfs.f @@ -0,0 +1,337 @@ + SUBROUTINE CHPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX, + $ FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL BERR( * ), FERR( * ), RWORK( * ) + COMPLEX AFP( * ), AP( * ), B( LDB, * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* CHPRFS improves the computed solution to a system of linear +* equations when the coefficient matrix is Hermitian indefinite +* and packed, and provides error bounds and backward error estimates +* for the solution. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AP (input) COMPLEX array, dimension (N*(N+1)/2) +* The upper or lower triangle of the Hermitian matrix A, packed +* columnwise in a linear array. The j-th column of A is stored +* in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* +* AFP (input) COMPLEX array, dimension (N*(N+1)/2) +* The factored form of the matrix A. AFP contains the block +* diagonal matrix D and the multipliers used to obtain the +* factor U or L from the factorization A = U*D*U**H or +* A = L*D*L**H as computed by CHPTRF, stored as a packed +* triangular matrix. +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by CHPTRF. +* +* B (input) COMPLEX array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input/output) COMPLEX array, dimension (LDX,NRHS) +* On entry, the solution matrix X, as computed by CHPTRS. +* On exit, the improved solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* RWORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Internal Parameters +* =================== +* +* ITMAX is the maximum number of steps of iterative refinement. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) + REAL THREE + PARAMETER ( THREE = 3.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, IK, J, K, KASE, KK, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX ZDUM +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CHPMV, CHPTRS, CLACON, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHPRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL CCOPY( N, B( 1, J ), 1, WORK, 1 ) + CALL CHPMV( UPLO, N, -ONE, AP, X( 1, J ), 1, ONE, WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + KK = 1 + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + IK = KK + DO 40 I = 1, K - 1 + RWORK( I ) = RWORK( I ) + CABS1( AP( IK ) )*XK + S = S + CABS1( AP( IK ) )*CABS1( X( I, J ) ) + IK = IK + 1 + 40 CONTINUE + RWORK( K ) = RWORK( K ) + ABS( REAL( AP( KK+K-1 ) ) )* + $ XK + S + KK = KK + K + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + RWORK( K ) = RWORK( K ) + ABS( REAL( AP( KK ) ) )*XK + IK = KK + 1 + DO 60 I = K + 1, N + RWORK( I ) = RWORK( I ) + CABS1( AP( IK ) )*XK + S = S + CABS1( AP( IK ) )*CABS1( X( I, J ) ) + IK = IK + 1 + 60 CONTINUE + RWORK( K ) = RWORK( K ) + S + KK = KK + ( N-K+1 ) + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL CHPTRS( UPLO, N, 1, AFP, IPIV, WORK, N, INFO ) + CALL CAXPY( N, ONE, WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use CLACON to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL CLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A'). +* + CALL CHPTRS( UPLO, N, 1, AFP, IPIV, WORK, N, INFO ) + DO 110 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 120 CONTINUE + CALL CHPTRS( UPLO, N, 1, AFP, IPIV, WORK, N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of CHPRFS +* + END diff --git a/costa/native/external/lapack/chpsv.f b/costa/native/external/lapack/chpsv.f new file mode 100644 index 000000000..6b2cd907d --- /dev/null +++ b/costa/native/external/lapack/chpsv.f @@ -0,0 +1,149 @@ + SUBROUTINE CHPSV( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX AP( * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* CHPSV computes the solution to a complex system of linear equations +* A * X = B, +* where A is an N-by-N Hermitian matrix stored in packed format and X +* and B are N-by-NRHS matrices. +* +* The diagonal pivoting method is used to factor A as +* A = U * D * U**H, if UPLO = 'U', or +* A = L * D * L**H, if UPLO = 'L', +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices, D is Hermitian and block diagonal with 1-by-1 +* and 2-by-2 diagonal blocks. The factored form of A is then used to +* solve the system of equations A * X = B. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AP (input/output) COMPLEX array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the Hermitian matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* See below for further details. +* +* On exit, the block diagonal matrix D and the multipliers used +* to obtain the factor U or L from the factorization +* A = U*D*U**H or A = L*D*L**H as computed by CHPTRF, stored as +* a packed triangular matrix in the same storage format as A. +* +* IPIV (output) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D, as +* determined by CHPTRF. If IPIV(k) > 0, then rows and columns +* k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1 +* diagonal block. If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, +* then rows and columns k-1 and -IPIV(k) were interchanged and +* D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If UPLO = 'L' and +* IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and +* -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 +* diagonal block. +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, D(i,i) is exactly zero. The factorization +* has been completed, but the block diagonal matrix D is +* exactly singular, so the solution could not be +* computed. +* +* Further Details +* =============== +* +* The packed storage scheme is illustrated by the following example +* when N = 4, UPLO = 'U': +* +* Two-dimensional storage of the Hermitian matrix A: +* +* a11 a12 a13 a14 +* a22 a23 a24 +* a33 a34 (aij = conjg(aji)) +* a44 +* +* Packed storage of the upper triangle of A: +* +* AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CHPTRF, CHPTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHPSV ', -INFO ) + RETURN + END IF +* +* Compute the factorization A = U*D*U' or A = L*D*L'. +* + CALL CHPTRF( UPLO, N, AP, IPIV, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL CHPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* + END IF + RETURN +* +* End of CHPSV +* + END diff --git a/costa/native/external/lapack/chpsvx.f b/costa/native/external/lapack/chpsvx.f new file mode 100644 index 000000000..6694cfa96 --- /dev/null +++ b/costa/native/external/lapack/chpsvx.f @@ -0,0 +1,279 @@ + SUBROUTINE CHPSVX( FACT, UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, + $ LDX, RCOND, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER FACT, UPLO + INTEGER INFO, LDB, LDX, N, NRHS + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL BERR( * ), FERR( * ), RWORK( * ) + COMPLEX AFP( * ), AP( * ), B( LDB, * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* CHPSVX uses the diagonal pivoting factorization A = U*D*U**H or +* A = L*D*L**H to compute the solution to a complex system of linear +* equations A * X = B, where A is an N-by-N Hermitian matrix stored +* in packed format and X and B are N-by-NRHS matrices. +* +* Error bounds on the solution and a condition estimate are also +* provided. +* +* Description +* =========== +* +* The following steps are performed: +* +* 1. If FACT = 'N', the diagonal pivoting method is used to factor A as +* A = U * D * U**H, if UPLO = 'U', or +* A = L * D * L**H, if UPLO = 'L', +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices and D is Hermitian and block diagonal with +* 1-by-1 and 2-by-2 diagonal blocks. +* +* 2. If some D(i,i)=0, so that D is exactly singular, then the routine +* returns with INFO = i. Otherwise, the factored form of A is used +* to estimate the condition number of the matrix A. If the +* reciprocal of the condition number is less than machine precision, +* INFO = N+1 is returned as a warning, but the routine still goes on +* to solve for X and compute error bounds as described below. +* +* 3. The system of equations is solved for X using the factored form +* of A. +* +* 4. Iterative refinement is applied to improve the computed solution +* matrix and calculate error bounds and backward error estimates +* for it. +* +* Arguments +* ========= +* +* FACT (input) CHARACTER*1 +* Specifies whether or not the factored form of A has been +* supplied on entry. +* = 'F': On entry, AFP and IPIV contain the factored form of +* A. AFP and IPIV will not be modified. +* = 'N': The matrix A will be copied to AFP and factored. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AP (input) COMPLEX array, dimension (N*(N+1)/2) +* The upper or lower triangle of the Hermitian matrix A, packed +* columnwise in a linear array. The j-th column of A is stored +* in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* See below for further details. +* +* AFP (input or output) COMPLEX array, dimension (N*(N+1)/2) +* If FACT = 'F', then AFP is an input argument and on entry +* contains the block diagonal matrix D and the multipliers used +* to obtain the factor U or L from the factorization +* A = U*D*U**H or A = L*D*L**H as computed by CHPTRF, stored as +* a packed triangular matrix in the same storage format as A. +* +* If FACT = 'N', then AFP is an output argument and on exit +* contains the block diagonal matrix D and the multipliers used +* to obtain the factor U or L from the factorization +* A = U*D*U**H or A = L*D*L**H as computed by CHPTRF, stored as +* a packed triangular matrix in the same storage format as A. +* +* IPIV (input or output) INTEGER array, dimension (N) +* If FACT = 'F', then IPIV is an input argument and on entry +* contains details of the interchanges and the block structure +* of D, as determined by CHPTRF. +* If IPIV(k) > 0, then rows and columns k and IPIV(k) were +* interchanged and D(k,k) is a 1-by-1 diagonal block. +* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +* +* If FACT = 'N', then IPIV is an output argument and on exit +* contains details of the interchanges and the block structure +* of D, as determined by CHPTRF. +* +* B (input) COMPLEX array, dimension (LDB,NRHS) +* The N-by-NRHS right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (output) COMPLEX array, dimension (LDX,NRHS) +* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* RCOND (output) REAL +* The estimate of the reciprocal condition number of the matrix +* A. If RCOND is less than the machine precision (in +* particular, if RCOND = 0), the matrix is singular to working +* precision. This condition is indicated by a return code of +* INFO > 0. +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* RWORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is +* <= N: D(i,i) is exactly zero. The factorization +* has been completed but the factor D is exactly +* singular, so the solution and error bounds could +* not be computed. RCOND = 0 is returned. +* = N+1: D is nonsingular, but RCOND is less than machine +* precision, meaning that the matrix is singular +* to working precision. Nevertheless, the +* solution and error bounds are computed because +* there are a number of situations where the +* computed solution can be more accurate than the +* value of RCOND would suggest. +* +* Further Details +* =============== +* +* The packed storage scheme is illustrated by the following example +* when N = 4, UPLO = 'U': +* +* Two-dimensional storage of the Hermitian matrix A: +* +* a11 a12 a13 a14 +* a22 a23 a24 +* a33 a34 (aij = conjg(aji)) +* a44 +* +* Packed storage of the upper triangle of A: +* +* AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOFACT + REAL ANORM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANHP, SLAMCH + EXTERNAL LSAME, CLANHP, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CHPCON, CHPRFS, CHPTRF, CHPTRS, CLACPY, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHPSVX', -INFO ) + RETURN + END IF +* + IF( NOFACT ) THEN +* +* Compute the factorization A = U*D*U' or A = L*D*L'. +* + CALL CCOPY( N*( N+1 ) / 2, AP, 1, AFP, 1 ) + CALL CHPTRF( UPLO, N, AFP, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN + IF( INFO.GT.0 ) + $ RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = CLANHP( 'I', UPLO, N, AP, RWORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL CHPCON( UPLO, N, AFP, IPIV, ANORM, RCOND, WORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* +* Compute the solution vectors X. +* + CALL CLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL CHPTRS( UPLO, N, NRHS, AFP, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solutions and +* compute error bounds and backward error estimates for them. +* + CALL CHPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX, FERR, + $ BERR, WORK, RWORK, INFO ) +* + RETURN +* +* End of CHPSVX +* + END diff --git a/costa/native/external/lapack/chptrd.f b/costa/native/external/lapack/chptrd.f new file mode 100644 index 000000000..1a37238c8 --- /dev/null +++ b/costa/native/external/lapack/chptrd.f @@ -0,0 +1,238 @@ + SUBROUTINE CHPTRD( UPLO, N, AP, D, E, TAU, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + REAL D( * ), E( * ) + COMPLEX AP( * ), TAU( * ) +* .. +* +* Purpose +* ======= +* +* CHPTRD reduces a complex Hermitian matrix A stored in packed form to +* real symmetric tridiagonal form T by a unitary similarity +* transformation: Q**H * A * Q = T. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) COMPLEX array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the Hermitian matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* On exit, if UPLO = 'U', the diagonal and first superdiagonal +* of A are overwritten by the corresponding elements of the +* tridiagonal matrix T, and the elements above the first +* superdiagonal, with the array TAU, represent the unitary +* matrix Q as a product of elementary reflectors; if UPLO +* = 'L', the diagonal and first subdiagonal of A are over- +* written by the corresponding elements of the tridiagonal +* matrix T, and the elements below the first subdiagonal, with +* the array TAU, represent the unitary matrix Q as a product +* of elementary reflectors. See Further Details. +* +* D (output) REAL array, dimension (N) +* The diagonal elements of the tridiagonal matrix T: +* D(i) = A(i,i). +* +* E (output) REAL array, dimension (N-1) +* The off-diagonal elements of the tridiagonal matrix T: +* E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. +* +* TAU (output) COMPLEX array, dimension (N-1) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* If UPLO = 'U', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(n-1) . . . H(2) H(1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in AP, +* overwriting A(1:i-1,i+1), and tau is stored in TAU(i). +* +* If UPLO = 'L', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(1) H(2) . . . H(n-1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in AP, +* overwriting A(i+2:n,i), and tau is stored in TAU(i). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO, HALF + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ), + $ HALF = ( 0.5E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, I1, I1I1, II + COMPLEX ALPHA, TAUI +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CHPMV, CHPR2, CLARFG, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX CDOTC + EXTERNAL LSAME, CDOTC +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHPTRD', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Reduce the upper triangle of A. +* I1 is the index in AP of A(1,I+1). +* + I1 = N*( N-1 ) / 2 + 1 + AP( I1+N-1 ) = REAL( AP( I1+N-1 ) ) + DO 10 I = N - 1, 1, -1 +* +* Generate elementary reflector H(i) = I - tau * v * v' +* to annihilate A(1:i-1,i+1) +* + ALPHA = AP( I1+I-1 ) + CALL CLARFG( I, ALPHA, AP( I1 ), 1, TAUI ) + E( I ) = ALPHA +* + IF( TAUI.NE.ZERO ) THEN +* +* Apply H(i) from both sides to A(1:i,1:i) +* + AP( I1+I-1 ) = ONE +* +* Compute y := tau * A * v storing y in TAU(1:i) +* + CALL CHPMV( UPLO, I, TAUI, AP, AP( I1 ), 1, ZERO, TAU, + $ 1 ) +* +* Compute w := y - 1/2 * tau * (y'*v) * v +* + ALPHA = -HALF*TAUI*CDOTC( I, TAU, 1, AP( I1 ), 1 ) + CALL CAXPY( I, ALPHA, AP( I1 ), 1, TAU, 1 ) +* +* Apply the transformation as a rank-2 update: +* A := A - v * w' - w * v' +* + CALL CHPR2( UPLO, I, -ONE, AP( I1 ), 1, TAU, 1, AP ) +* + END IF + AP( I1+I-1 ) = E( I ) + D( I+1 ) = AP( I1+I ) + TAU( I ) = TAUI + I1 = I1 - I + 10 CONTINUE + D( 1 ) = AP( 1 ) + ELSE +* +* Reduce the lower triangle of A. II is the index in AP of +* A(i,i) and I1I1 is the index of A(i+1,i+1). +* + II = 1 + AP( 1 ) = REAL( AP( 1 ) ) + DO 20 I = 1, N - 1 + I1I1 = II + N - I + 1 +* +* Generate elementary reflector H(i) = I - tau * v * v' +* to annihilate A(i+2:n,i) +* + ALPHA = AP( II+1 ) + CALL CLARFG( N-I, ALPHA, AP( II+2 ), 1, TAUI ) + E( I ) = ALPHA +* + IF( TAUI.NE.ZERO ) THEN +* +* Apply H(i) from both sides to A(i+1:n,i+1:n) +* + AP( II+1 ) = ONE +* +* Compute y := tau * A * v storing y in TAU(i:n-1) +* + CALL CHPMV( UPLO, N-I, TAUI, AP( I1I1 ), AP( II+1 ), 1, + $ ZERO, TAU( I ), 1 ) +* +* Compute w := y - 1/2 * tau * (y'*v) * v +* + ALPHA = -HALF*TAUI*CDOTC( N-I, TAU( I ), 1, AP( II+1 ), + $ 1 ) + CALL CAXPY( N-I, ALPHA, AP( II+1 ), 1, TAU( I ), 1 ) +* +* Apply the transformation as a rank-2 update: +* A := A - v * w' - w * v' +* + CALL CHPR2( UPLO, N-I, -ONE, AP( II+1 ), 1, TAU( I ), 1, + $ AP( I1I1 ) ) +* + END IF + AP( II+1 ) = E( I ) + D( I ) = AP( II ) + TAU( I ) = TAUI + II = I1I1 + 20 CONTINUE + D( N ) = AP( II ) + END IF +* + RETURN +* +* End of CHPTRD +* + END diff --git a/costa/native/external/lapack/chptrf.f b/costa/native/external/lapack/chptrf.f new file mode 100644 index 000000000..632c23935 --- /dev/null +++ b/costa/native/external/lapack/chptrf.f @@ -0,0 +1,581 @@ + SUBROUTINE CHPTRF( UPLO, N, AP, IPIV, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX AP( * ) +* .. +* +* Purpose +* ======= +* +* CHPTRF computes the factorization of a complex Hermitian packed +* matrix A using the Bunch-Kaufman diagonal pivoting method: +* +* A = U*D*U**H or A = L*D*L**H +* +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices, and D is Hermitian and block diagonal with +* 1-by-1 and 2-by-2 diagonal blocks. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) COMPLEX array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the Hermitian matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* +* On exit, the block diagonal matrix D and the multipliers used +* to obtain the factor U or L, stored as a packed triangular +* matrix overwriting A (see below for further details). +* +* IPIV (output) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D. +* If IPIV(k) > 0, then rows and columns k and IPIV(k) were +* interchanged and D(k,k) is a 1-by-1 diagonal block. +* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, D(i,i) is exactly zero. The factorization +* has been completed, but the block diagonal matrix D is +* exactly singular, and division by zero will occur if it +* is used to solve a system of equations. +* +* Further Details +* =============== +* +* 5-96 - Based on modifications by J. Lewis, Boeing Computer Services +* Company +* +* If UPLO = 'U', then A = U*D*U', where +* U = P(n)*U(n)* ... *P(k)U(k)* ..., +* i.e., U is a product of terms P(k)*U(k), where k decreases from n to +* 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +* defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +* that if the diagonal block D(k) is of order s (s = 1 or 2), then +* +* ( I v 0 ) k-s +* U(k) = ( 0 I 0 ) s +* ( 0 0 I ) n-k +* k-s s n-k +* +* If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +* If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +* and A(k,k), and v overwrites A(1:k-2,k-1:k). +* +* If UPLO = 'L', then A = L*D*L', where +* L = P(1)*L(1)* ... *P(k)*L(k)* ..., +* i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +* n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +* defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +* that if the diagonal block D(k) is of order s (s = 1 or 2), then +* +* ( I 0 0 ) k-1 +* L(k) = ( 0 I 0 ) s +* ( 0 v I ) n-k-s+1 +* k-1 s n-k-s+1 +* +* If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +* If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +* and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0E+0, SEVTEN = 17.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IMAX, J, JMAX, K, KC, KK, KNC, KP, KPC, + $ KSTEP, KX, NPP + REAL ABSAKK, ALPHA, COLMAX, D, D11, D22, R1, ROWMAX, + $ TT + COMPLEX D12, D21, T, WK, WKM1, WKP1, ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + REAL SLAPY2 + EXTERNAL LSAME, ICAMAX, SLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL CHPR, CSSCAL, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, CONJG, MAX, REAL, SQRT +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHPTRF', -INFO ) + RETURN + END IF +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U' using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2 +* + K = N + KC = ( N-1 )*N / 2 + 1 + 10 CONTINUE + KNC = KC +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 110 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( REAL( AP( KC+K-1 ) ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.GT.1 ) THEN + IMAX = ICAMAX( K-1, AP( KC ), 1 ) + COLMAX = CABS1( AP( KC+IMAX-1 ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + AP( KC+K-1 ) = REAL( AP( KC+K-1 ) ) + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + ROWMAX = ZERO + JMAX = IMAX + KX = IMAX*( IMAX+1 ) / 2 + IMAX + DO 20 J = IMAX + 1, K + IF( CABS1( AP( KX ) ).GT.ROWMAX ) THEN + ROWMAX = CABS1( AP( KX ) ) + JMAX = J + END IF + KX = KX + J + 20 CONTINUE + KPC = ( IMAX-1 )*IMAX / 2 + 1 + IF( IMAX.GT.1 ) THEN + JMAX = ICAMAX( IMAX-1, AP( KPC ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( AP( KPC+JMAX-1 ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( REAL( AP( KPC+IMAX-1 ) ) ).GE.ALPHA* + $ ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K-1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K - KSTEP + 1 + IF( KSTEP.EQ.2 ) + $ KNC = KNC - K + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the leading +* submatrix A(1:k,1:k) +* + CALL CSWAP( KP-1, AP( KNC ), 1, AP( KPC ), 1 ) + KX = KPC + KP - 1 + DO 30 J = KP + 1, KK - 1 + KX = KX + J - 1 + T = CONJG( AP( KNC+J-1 ) ) + AP( KNC+J-1 ) = CONJG( AP( KX ) ) + AP( KX ) = T + 30 CONTINUE + AP( KX+KK-1 ) = CONJG( AP( KX+KK-1 ) ) + R1 = REAL( AP( KNC+KK-1 ) ) + AP( KNC+KK-1 ) = REAL( AP( KPC+KP-1 ) ) + AP( KPC+KP-1 ) = R1 + IF( KSTEP.EQ.2 ) THEN + AP( KC+K-1 ) = REAL( AP( KC+K-1 ) ) + T = AP( KC+K-2 ) + AP( KC+K-2 ) = AP( KC+KP-1 ) + AP( KC+KP-1 ) = T + END IF + ELSE + AP( KC+K-1 ) = REAL( AP( KC+K-1 ) ) + IF( KSTEP.EQ.2 ) + $ AP( KC-1 ) = REAL( AP( KC-1 ) ) + END IF +* +* Update the leading submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* +* Perform a rank-1 update of A(1:k-1,1:k-1) as +* +* A := A - U(k)*D(k)*U(k)' = A - W(k)*1/D(k)*W(k)' +* + R1 = ONE / REAL( AP( KC+K-1 ) ) + CALL CHPR( UPLO, K-1, -R1, AP( KC ), 1, AP ) +* +* Store U(k) in column k +* + CALL CSSCAL( K-1, R1, AP( KC ), 1 ) + ELSE +* +* 2-by-2 pivot block D(k): columns k and k-1 now hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* Perform a rank-2 update of A(1:k-2,1:k-2) as +* +* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )' +* = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )' +* + IF( K.GT.2 ) THEN +* + D = SLAPY2( REAL( AP( K-1+( K-1 )*K / 2 ) ), + $ AIMAG( AP( K-1+( K-1 )*K / 2 ) ) ) + D22 = REAL( AP( K-1+( K-2 )*( K-1 ) / 2 ) ) / D + D11 = REAL( AP( K+( K-1 )*K / 2 ) ) / D + TT = ONE / ( D11*D22-ONE ) + D12 = AP( K-1+( K-1 )*K / 2 ) / D + D = TT / D +* + DO 50 J = K - 2, 1, -1 + WKM1 = D*( D11*AP( J+( K-2 )*( K-1 ) / 2 )- + $ CONJG( D12 )*AP( J+( K-1 )*K / 2 ) ) + WK = D*( D22*AP( J+( K-1 )*K / 2 )-D12* + $ AP( J+( K-2 )*( K-1 ) / 2 ) ) + DO 40 I = J, 1, -1 + AP( I+( J-1 )*J / 2 ) = AP( I+( J-1 )*J / 2 ) - + $ AP( I+( K-1 )*K / 2 )*CONJG( WK ) - + $ AP( I+( K-2 )*( K-1 ) / 2 )*CONJG( WKM1 ) + 40 CONTINUE + AP( J+( K-1 )*K / 2 ) = WK + AP( J+( K-2 )*( K-1 ) / 2 ) = WKM1 + AP( J+( J-1 )*J / 2 ) = CMPLX( REAL( AP( J+( J-1 )* + $ J / 2 ) ), 0.0E+0 ) + 50 CONTINUE +* + END IF +* + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + KC = KNC - K + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L' using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2 +* + K = 1 + KC = 1 + NPP = N*( N+1 ) / 2 + 60 CONTINUE + KNC = KC +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 110 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( REAL( AP( KC ) ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.LT.N ) THEN + IMAX = K + ICAMAX( N-K, AP( KC+1 ), 1 ) + COLMAX = CABS1( AP( KC+IMAX-K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + AP( KC ) = REAL( AP( KC ) ) + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + ROWMAX = ZERO + KX = KC + IMAX - K + DO 70 J = K, IMAX - 1 + IF( CABS1( AP( KX ) ).GT.ROWMAX ) THEN + ROWMAX = CABS1( AP( KX ) ) + JMAX = J + END IF + KX = KX + N - J + 70 CONTINUE + KPC = NPP - ( N-IMAX+1 )*( N-IMAX+2 ) / 2 + 1 + IF( IMAX.LT.N ) THEN + JMAX = IMAX + ICAMAX( N-IMAX, AP( KPC+1 ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( AP( KPC+JMAX-IMAX ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( REAL( AP( KPC ) ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K+1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K + KSTEP - 1 + IF( KSTEP.EQ.2 ) + $ KNC = KNC + N - K + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the trailing +* submatrix A(k:n,k:n) +* + IF( KP.LT.N ) + $ CALL CSWAP( N-KP, AP( KNC+KP-KK+1 ), 1, AP( KPC+1 ), + $ 1 ) + KX = KNC + KP - KK + DO 80 J = KK + 1, KP - 1 + KX = KX + N - J + 1 + T = CONJG( AP( KNC+J-KK ) ) + AP( KNC+J-KK ) = CONJG( AP( KX ) ) + AP( KX ) = T + 80 CONTINUE + AP( KNC+KP-KK ) = CONJG( AP( KNC+KP-KK ) ) + R1 = REAL( AP( KNC ) ) + AP( KNC ) = REAL( AP( KPC ) ) + AP( KPC ) = R1 + IF( KSTEP.EQ.2 ) THEN + AP( KC ) = REAL( AP( KC ) ) + T = AP( KC+1 ) + AP( KC+1 ) = AP( KC+KP-K ) + AP( KC+KP-K ) = T + END IF + ELSE + AP( KC ) = REAL( AP( KC ) ) + IF( KSTEP.EQ.2 ) + $ AP( KNC ) = REAL( AP( KNC ) ) + END IF +* +* Update the trailing submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* + IF( K.LT.N ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* +* A := A - L(k)*D(k)*L(k)' = A - W(k)*(1/D(k))*W(k)' +* + R1 = ONE / REAL( AP( KC ) ) + CALL CHPR( UPLO, N-K, -R1, AP( KC+1 ), 1, + $ AP( KC+N-K+1 ) ) +* +* Store L(k) in column K +* + CALL CSSCAL( N-K, R1, AP( KC+1 ), 1 ) + END IF + ELSE +* +* 2-by-2 pivot block D(k): columns K and K+1 now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* + IF( K.LT.N-1 ) THEN +* +* Perform a rank-2 update of A(k+2:n,k+2:n) as +* +* A := A - ( L(k) L(k+1) )*D(k)*( L(k) L(k+1) )' +* = A - ( W(k) W(k+1) )*inv(D(k))*( W(k) W(k+1) )' +* +* where L(k) and L(k+1) are the k-th and (k+1)-th +* columns of L +* + D = SLAPY2( REAL( AP( K+1+( K-1 )*( 2*N-K ) / 2 ) ), + $ AIMAG( AP( K+1+( K-1 )*( 2*N-K ) / 2 ) ) ) + D11 = REAL( AP( K+1+K*( 2*N-K-1 ) / 2 ) ) / D + D22 = REAL( AP( K+( K-1 )*( 2*N-K ) / 2 ) ) / D + TT = ONE / ( D11*D22-ONE ) + D21 = AP( K+1+( K-1 )*( 2*N-K ) / 2 ) / D + D = TT / D +* + DO 100 J = K + 2, N + WK = D*( D11*AP( J+( K-1 )*( 2*N-K ) / 2 )-D21* + $ AP( J+K*( 2*N-K-1 ) / 2 ) ) + WKP1 = D*( D22*AP( J+K*( 2*N-K-1 ) / 2 )- + $ CONJG( D21 )*AP( J+( K-1 )*( 2*N-K ) / 2 ) ) + DO 90 I = J, N + AP( I+( J-1 )*( 2*N-J ) / 2 ) = AP( I+( J-1 )* + $ ( 2*N-J ) / 2 ) - AP( I+( K-1 )*( 2*N-K ) / + $ 2 )*CONJG( WK ) - AP( I+K*( 2*N-K-1 ) / 2 )* + $ CONJG( WKP1 ) + 90 CONTINUE + AP( J+( K-1 )*( 2*N-K ) / 2 ) = WK + AP( J+K*( 2*N-K-1 ) / 2 ) = WKP1 + AP( J+( J-1 )*( 2*N-J ) / 2 ) + $ = CMPLX( REAL( AP( J+( J-1 )*( 2*N-J ) / 2 ) ), + $ 0.0E+0 ) + 100 CONTINUE + END IF + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + KC = KNC + N - K + 2 + GO TO 60 +* + END IF +* + 110 CONTINUE + RETURN +* +* End of CHPTRF +* + END diff --git a/costa/native/external/lapack/chptri.f b/costa/native/external/lapack/chptri.f new file mode 100644 index 000000000..323fb2cfe --- /dev/null +++ b/costa/native/external/lapack/chptri.f @@ -0,0 +1,344 @@ + SUBROUTINE CHPTRI( UPLO, N, AP, IPIV, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX AP( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CHPTRI computes the inverse of a complex Hermitian indefinite matrix +* A in packed storage using the factorization A = U*D*U**H or +* A = L*D*L**H computed by CHPTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the details of the factorization are stored +* as an upper or lower triangular matrix. +* = 'U': Upper triangular, form is A = U*D*U**H; +* = 'L': Lower triangular, form is A = L*D*L**H. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) COMPLEX array, dimension (N*(N+1)/2) +* On entry, the block diagonal matrix D and the multipliers +* used to obtain the factor U or L as computed by CHPTRF, +* stored as a packed triangular matrix. +* +* On exit, if INFO = 0, the (Hermitian) inverse of the original +* matrix, stored as a packed triangular matrix. The j-th column +* of inv(A) is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = inv(A)(i,j) for 1<=i<=j; +* if UPLO = 'L', +* AP(i + (j-1)*(2n-j)/2) = inv(A)(i,j) for j<=i<=n. +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by CHPTRF. +* +* WORK (workspace) COMPLEX array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +* inverse could not be computed. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + COMPLEX CONE, ZERO + PARAMETER ( ONE = 1.0E+0, CONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KC, KCNEXT, KP, KPC, KSTEP, KX, NPP + REAL AK, AKP1, D, T + COMPLEX AKKP1, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX CDOTC + EXTERNAL LSAME, CDOTC +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CHPMV, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CONJG, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHPTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + KP = N*( N+1 ) / 2 + DO 10 INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. AP( KP ).EQ.ZERO ) + $ RETURN + KP = KP - INFO + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + KP = 1 + DO 20 INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. AP( KP ).EQ.ZERO ) + $ RETURN + KP = KP + N - INFO + 1 + 20 CONTINUE + END IF + INFO = 0 +* + IF( UPPER ) THEN +* +* Compute inv(A) from the factorization A = U*D*U'. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + KC = 1 + 30 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + KCNEXT = KC + K + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + AP( KC+K-1 ) = ONE / REAL( AP( KC+K-1 ) ) +* +* Compute column K of the inverse. +* + IF( K.GT.1 ) THEN + CALL CCOPY( K-1, AP( KC ), 1, WORK, 1 ) + CALL CHPMV( UPLO, K-1, -CONE, AP, WORK, 1, ZERO, + $ AP( KC ), 1 ) + AP( KC+K-1 ) = AP( KC+K-1 ) - + $ REAL( CDOTC( K-1, WORK, 1, AP( KC ), 1 ) ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = ABS( AP( KCNEXT+K-1 ) ) + AK = REAL( AP( KC+K-1 ) ) / T + AKP1 = REAL( AP( KCNEXT+K ) ) / T + AKKP1 = AP( KCNEXT+K-1 ) / T + D = T*( AK*AKP1-ONE ) + AP( KC+K-1 ) = AKP1 / D + AP( KCNEXT+K ) = AK / D + AP( KCNEXT+K-1 ) = -AKKP1 / D +* +* Compute columns K and K+1 of the inverse. +* + IF( K.GT.1 ) THEN + CALL CCOPY( K-1, AP( KC ), 1, WORK, 1 ) + CALL CHPMV( UPLO, K-1, -CONE, AP, WORK, 1, ZERO, + $ AP( KC ), 1 ) + AP( KC+K-1 ) = AP( KC+K-1 ) - + $ REAL( CDOTC( K-1, WORK, 1, AP( KC ), 1 ) ) + AP( KCNEXT+K-1 ) = AP( KCNEXT+K-1 ) - + $ CDOTC( K-1, AP( KC ), 1, AP( KCNEXT ), + $ 1 ) + CALL CCOPY( K-1, AP( KCNEXT ), 1, WORK, 1 ) + CALL CHPMV( UPLO, K-1, -CONE, AP, WORK, 1, ZERO, + $ AP( KCNEXT ), 1 ) + AP( KCNEXT+K ) = AP( KCNEXT+K ) - + $ REAL( CDOTC( K-1, WORK, 1, AP( KCNEXT ), + $ 1 ) ) + END IF + KSTEP = 2 + KCNEXT = KCNEXT + K + 1 + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the leading +* submatrix A(1:k+1,1:k+1) +* + KPC = ( KP-1 )*KP / 2 + 1 + CALL CSWAP( KP-1, AP( KC ), 1, AP( KPC ), 1 ) + KX = KPC + KP - 1 + DO 40 J = KP + 1, K - 1 + KX = KX + J - 1 + TEMP = CONJG( AP( KC+J-1 ) ) + AP( KC+J-1 ) = CONJG( AP( KX ) ) + AP( KX ) = TEMP + 40 CONTINUE + AP( KC+KP-1 ) = CONJG( AP( KC+KP-1 ) ) + TEMP = AP( KC+K-1 ) + AP( KC+K-1 ) = AP( KPC+KP-1 ) + AP( KPC+KP-1 ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = AP( KC+K+K-1 ) + AP( KC+K+K-1 ) = AP( KC+K+KP-1 ) + AP( KC+K+KP-1 ) = TEMP + END IF + END IF +* + K = K + KSTEP + KC = KCNEXT + GO TO 30 + 50 CONTINUE +* + ELSE +* +* Compute inv(A) from the factorization A = L*D*L'. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + NPP = N*( N+1 ) / 2 + K = N + KC = NPP + 60 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 80 +* + KCNEXT = KC - ( N-K+2 ) + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + AP( KC ) = ONE / REAL( AP( KC ) ) +* +* Compute column K of the inverse. +* + IF( K.LT.N ) THEN + CALL CCOPY( N-K, AP( KC+1 ), 1, WORK, 1 ) + CALL CHPMV( UPLO, N-K, -CONE, AP( KC+N-K+1 ), WORK, 1, + $ ZERO, AP( KC+1 ), 1 ) + AP( KC ) = AP( KC ) - REAL( CDOTC( N-K, WORK, 1, + $ AP( KC+1 ), 1 ) ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = ABS( AP( KCNEXT+1 ) ) + AK = REAL( AP( KCNEXT ) ) / T + AKP1 = REAL( AP( KC ) ) / T + AKKP1 = AP( KCNEXT+1 ) / T + D = T*( AK*AKP1-ONE ) + AP( KCNEXT ) = AKP1 / D + AP( KC ) = AK / D + AP( KCNEXT+1 ) = -AKKP1 / D +* +* Compute columns K-1 and K of the inverse. +* + IF( K.LT.N ) THEN + CALL CCOPY( N-K, AP( KC+1 ), 1, WORK, 1 ) + CALL CHPMV( UPLO, N-K, -CONE, AP( KC+( N-K+1 ) ), WORK, + $ 1, ZERO, AP( KC+1 ), 1 ) + AP( KC ) = AP( KC ) - REAL( CDOTC( N-K, WORK, 1, + $ AP( KC+1 ), 1 ) ) + AP( KCNEXT+1 ) = AP( KCNEXT+1 ) - + $ CDOTC( N-K, AP( KC+1 ), 1, + $ AP( KCNEXT+2 ), 1 ) + CALL CCOPY( N-K, AP( KCNEXT+2 ), 1, WORK, 1 ) + CALL CHPMV( UPLO, N-K, -CONE, AP( KC+( N-K+1 ) ), WORK, + $ 1, ZERO, AP( KCNEXT+2 ), 1 ) + AP( KCNEXT ) = AP( KCNEXT ) - + $ REAL( CDOTC( N-K, WORK, 1, AP( KCNEXT+2 ), + $ 1 ) ) + END IF + KSTEP = 2 + KCNEXT = KCNEXT - ( N-K+3 ) + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the trailing +* submatrix A(k-1:n,k-1:n) +* + KPC = NPP - ( N-KP+1 )*( N-KP+2 ) / 2 + 1 + IF( KP.LT.N ) + $ CALL CSWAP( N-KP, AP( KC+KP-K+1 ), 1, AP( KPC+1 ), 1 ) + KX = KC + KP - K + DO 70 J = K + 1, KP - 1 + KX = KX + N - J + 1 + TEMP = CONJG( AP( KC+J-K ) ) + AP( KC+J-K ) = CONJG( AP( KX ) ) + AP( KX ) = TEMP + 70 CONTINUE + AP( KC+KP-K ) = CONJG( AP( KC+KP-K ) ) + TEMP = AP( KC ) + AP( KC ) = AP( KPC ) + AP( KPC ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = AP( KC-N+K-1 ) + AP( KC-N+K-1 ) = AP( KC-N+KP-1 ) + AP( KC-N+KP-1 ) = TEMP + END IF + END IF +* + K = K - KSTEP + KC = KCNEXT + GO TO 60 + 80 CONTINUE + END IF +* + RETURN +* +* End of CHPTRI +* + END diff --git a/costa/native/external/lapack/chptrs.f b/costa/native/external/lapack/chptrs.f new file mode 100644 index 000000000..885551cdf --- /dev/null +++ b/costa/native/external/lapack/chptrs.f @@ -0,0 +1,402 @@ + SUBROUTINE CHPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX AP( * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* CHPTRS solves a system of linear equations A*X = B with a complex +* Hermitian matrix A stored in packed format using the factorization +* A = U*D*U**H or A = L*D*L**H computed by CHPTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the details of the factorization are stored +* as an upper or lower triangular matrix. +* = 'U': Upper triangular, form is A = U*D*U**H; +* = 'L': Lower triangular, form is A = L*D*L**H. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AP (input) COMPLEX array, dimension (N*(N+1)/2) +* The block diagonal matrix D and the multipliers used to +* obtain the factor U or L as computed by CHPTRF, stored as a +* packed triangular matrix. +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by CHPTRF. +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KC, KP + REAL S + COMPLEX AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, CGERU, CLACGV, CSSCAL, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, REAL +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHPTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*D*U'. +* +* First solve U*D*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + KC = N*( N+1 ) / 2 + 1 + 10 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 30 +* + KC = KC - K + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL CGERU( K-1, NRHS, -ONE, AP( KC ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + S = REAL( ONE ) / REAL( AP( KC+K-1 ) ) + CALL CSSCAL( NRHS, S, B( K, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K-1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K-1 ) + $ CALL CSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in columns K-1 and K of A. +* + CALL CGERU( K-2, NRHS, -ONE, AP( KC ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) + CALL CGERU( K-2, NRHS, -ONE, AP( KC-( K-1 ) ), 1, + $ B( K-1, 1 ), LDB, B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = AP( KC+K-2 ) + AKM1 = AP( KC-1 ) / AKM1K + AK = AP( KC+K-1 ) / CONJG( AKM1K ) + DENOM = AKM1*AK - ONE + DO 20 J = 1, NRHS + BKM1 = B( K-1, J ) / AKM1K + BK = B( K, J ) / CONJG( AKM1K ) + B( K-1, J ) = ( AK*BKM1-BK ) / DENOM + B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM + 20 CONTINUE + KC = KC - K + 1 + K = K - 2 + END IF +* + GO TO 10 + 30 CONTINUE +* +* Next solve U'*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + KC = 1 + 40 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(U'(K)), where U(K) is the transformation +* stored in column K of A. +* + IF( K.GT.1 ) THEN + CALL CLACGV( NRHS, B( K, 1 ), LDB ) + CALL CGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B, + $ LDB, AP( KC ), 1, ONE, B( K, 1 ), LDB ) + CALL CLACGV( NRHS, B( K, 1 ), LDB ) + END IF +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + KC = KC + K + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(U'(K+1)), where U(K+1) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.GT.1 ) THEN + CALL CLACGV( NRHS, B( K, 1 ), LDB ) + CALL CGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B, + $ LDB, AP( KC ), 1, ONE, B( K, 1 ), LDB ) + CALL CLACGV( NRHS, B( K, 1 ), LDB ) +* + CALL CLACGV( NRHS, B( K+1, 1 ), LDB ) + CALL CGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B, + $ LDB, AP( KC+K ), 1, ONE, B( K+1, 1 ), LDB ) + CALL CLACGV( NRHS, B( K+1, 1 ), LDB ) + END IF +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + KC = KC + 2*K + 1 + K = K + 2 + END IF +* + GO TO 40 + 50 CONTINUE +* + ELSE +* +* Solve A*X = B, where A = L*D*L'. +* +* First solve L*D*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + KC = 1 + 60 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 80 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL CGERU( N-K, NRHS, -ONE, AP( KC+1 ), 1, B( K, 1 ), + $ LDB, B( K+1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + S = REAL( ONE ) / REAL( AP( KC ) ) + CALL CSSCAL( NRHS, S, B( K, 1 ), LDB ) + KC = KC + N - K + 1 + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K+1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K+1 ) + $ CALL CSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.LT.N-1 ) THEN + CALL CGERU( N-K-1, NRHS, -ONE, AP( KC+2 ), 1, B( K, 1 ), + $ LDB, B( K+2, 1 ), LDB ) + CALL CGERU( N-K-1, NRHS, -ONE, AP( KC+N-K+2 ), 1, + $ B( K+1, 1 ), LDB, B( K+2, 1 ), LDB ) + END IF +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = AP( KC+1 ) + AKM1 = AP( KC ) / CONJG( AKM1K ) + AK = AP( KC+N-K+1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 70 J = 1, NRHS + BKM1 = B( K, J ) / CONJG( AKM1K ) + BK = B( K+1, J ) / AKM1K + B( K, J ) = ( AK*BKM1-BK ) / DENOM + B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + 70 CONTINUE + KC = KC + 2*( N-K ) + 1 + K = K + 2 + END IF +* + GO TO 60 + 80 CONTINUE +* +* Next solve L'*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + KC = N*( N+1 ) / 2 + 1 + 90 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 100 +* + KC = KC - ( N-K+1 ) + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(L'(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) THEN + CALL CLACGV( NRHS, B( K, 1 ), LDB ) + CALL CGEMV( 'Conjugate transpose', N-K, NRHS, -ONE, + $ B( K+1, 1 ), LDB, AP( KC+1 ), 1, ONE, + $ B( K, 1 ), LDB ) + CALL CLACGV( NRHS, B( K, 1 ), LDB ) + END IF +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(L'(K-1)), where L(K-1) is the transformation +* stored in columns K-1 and K of A. +* + IF( K.LT.N ) THEN + CALL CLACGV( NRHS, B( K, 1 ), LDB ) + CALL CGEMV( 'Conjugate transpose', N-K, NRHS, -ONE, + $ B( K+1, 1 ), LDB, AP( KC+1 ), 1, ONE, + $ B( K, 1 ), LDB ) + CALL CLACGV( NRHS, B( K, 1 ), LDB ) +* + CALL CLACGV( NRHS, B( K-1, 1 ), LDB ) + CALL CGEMV( 'Conjugate transpose', N-K, NRHS, -ONE, + $ B( K+1, 1 ), LDB, AP( KC-( N-K ) ), 1, ONE, + $ B( K-1, 1 ), LDB ) + CALL CLACGV( NRHS, B( K-1, 1 ), LDB ) + END IF +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + KC = KC - ( N-K+2 ) + K = K - 2 + END IF +* + GO TO 90 + 100 CONTINUE + END IF +* + RETURN +* +* End of CHPTRS +* + END diff --git a/costa/native/external/lapack/chsein.f b/costa/native/external/lapack/chsein.f new file mode 100644 index 000000000..7434efeb4 --- /dev/null +++ b/costa/native/external/lapack/chsein.f @@ -0,0 +1,351 @@ + SUBROUTINE CHSEIN( SIDE, EIGSRC, INITV, SELECT, N, H, LDH, W, VL, + $ LDVL, VR, LDVR, MM, M, WORK, RWORK, IFAILL, + $ IFAILR, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER EIGSRC, INITV, SIDE + INTEGER INFO, LDH, LDVL, LDVR, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + INTEGER IFAILL( * ), IFAILR( * ) + REAL RWORK( * ) + COMPLEX H( LDH, * ), VL( LDVL, * ), VR( LDVR, * ), + $ W( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CHSEIN uses inverse iteration to find specified right and/or left +* eigenvectors of a complex upper Hessenberg matrix H. +* +* The right eigenvector x and the left eigenvector y of the matrix H +* corresponding to an eigenvalue w are defined by: +* +* H * x = w * x, y**h * H = w * y**h +* +* where y**h denotes the conjugate transpose of the vector y. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'R': compute right eigenvectors only; +* = 'L': compute left eigenvectors only; +* = 'B': compute both right and left eigenvectors. +* +* EIGSRC (input) CHARACTER*1 +* Specifies the source of eigenvalues supplied in W: +* = 'Q': the eigenvalues were found using CHSEQR; thus, if +* H has zero subdiagonal elements, and so is +* block-triangular, then the j-th eigenvalue can be +* assumed to be an eigenvalue of the block containing +* the j-th row/column. This property allows CHSEIN to +* perform inverse iteration on just one diagonal block. +* = 'N': no assumptions are made on the correspondence +* between eigenvalues and diagonal blocks. In this +* case, CHSEIN must always perform inverse iteration +* using the whole matrix H. +* +* INITV (input) CHARACTER*1 +* = 'N': no initial vectors are supplied; +* = 'U': user-supplied initial vectors are stored in the arrays +* VL and/or VR. +* +* SELECT (input) LOGICAL array, dimension (N) +* Specifies the eigenvectors to be computed. To select the +* eigenvector corresponding to the eigenvalue W(j), +* SELECT(j) must be set to .TRUE.. +* +* N (input) INTEGER +* The order of the matrix H. N >= 0. +* +* H (input) COMPLEX array, dimension (LDH,N) +* The upper Hessenberg matrix H. +* +* LDH (input) INTEGER +* The leading dimension of the array H. LDH >= max(1,N). +* +* W (input/output) COMPLEX array, dimension (N) +* On entry, the eigenvalues of H. +* On exit, the real parts of W may have been altered since +* close eigenvalues are perturbed slightly in searching for +* independent eigenvectors. +* +* VL (input/output) COMPLEX array, dimension (LDVL,MM) +* On entry, if INITV = 'U' and SIDE = 'L' or 'B', VL must +* contain starting vectors for the inverse iteration for the +* left eigenvectors; the starting vector for each eigenvector +* must be in the same column in which the eigenvector will be +* stored. +* On exit, if SIDE = 'L' or 'B', the left eigenvectors +* specified by SELECT will be stored consecutively in the +* columns of VL, in the same order as their eigenvalues. +* If SIDE = 'R', VL is not referenced. +* +* LDVL (input) INTEGER +* The leading dimension of the array VL. +* LDVL >= max(1,N) if SIDE = 'L' or 'B'; LDVL >= 1 otherwise. +* +* VR (input/output) COMPLEX array, dimension (LDVR,MM) +* On entry, if INITV = 'U' and SIDE = 'R' or 'B', VR must +* contain starting vectors for the inverse iteration for the +* right eigenvectors; the starting vector for each eigenvector +* must be in the same column in which the eigenvector will be +* stored. +* On exit, if SIDE = 'R' or 'B', the right eigenvectors +* specified by SELECT will be stored consecutively in the +* columns of VR, in the same order as their eigenvalues. +* If SIDE = 'L', VR is not referenced. +* +* LDVR (input) INTEGER +* The leading dimension of the array VR. +* LDVR >= max(1,N) if SIDE = 'R' or 'B'; LDVR >= 1 otherwise. +* +* MM (input) INTEGER +* The number of columns in the arrays VL and/or VR. MM >= M. +* +* M (output) INTEGER +* The number of columns in the arrays VL and/or VR required to +* store the eigenvectors (= the number of .TRUE. elements in +* SELECT). +* +* WORK (workspace) COMPLEX array, dimension (N*N) +* +* RWORK (workspace) REAL array, dimension (N) +* +* IFAILL (output) INTEGER array, dimension (MM) +* If SIDE = 'L' or 'B', IFAILL(i) = j > 0 if the left +* eigenvector in the i-th column of VL (corresponding to the +* eigenvalue w(j)) failed to converge; IFAILL(i) = 0 if the +* eigenvector converged satisfactorily. +* If SIDE = 'R', IFAILL is not referenced. +* +* IFAILR (output) INTEGER array, dimension (MM) +* If SIDE = 'R' or 'B', IFAILR(i) = j > 0 if the right +* eigenvector in the i-th column of VR (corresponding to the +* eigenvalue w(j)) failed to converge; IFAILR(i) = 0 if the +* eigenvector converged satisfactorily. +* If SIDE = 'L', IFAILR is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, i is the number of eigenvectors which +* failed to converge; see IFAILL and IFAILR for further +* details. +* +* Further Details +* =============== +* +* Each eigenvector is normalized so that the element of largest +* magnitude has magnitude 1; here the magnitude of a complex number +* (x,y) is taken to be |x|+|y|. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) + REAL RZERO + PARAMETER ( RZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL BOTHV, FROMQR, LEFTV, NOINIT, RIGHTV + INTEGER I, IINFO, K, KL, KLN, KR, KS, LDWORK + REAL EPS3, HNORM, SMLNUM, ULP, UNFL + COMPLEX CDUM, WK +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANHS, SLAMCH + EXTERNAL LSAME, CLANHS, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CLAEIN, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters. +* + BOTHV = LSAME( SIDE, 'B' ) + RIGHTV = LSAME( SIDE, 'R' ) .OR. BOTHV + LEFTV = LSAME( SIDE, 'L' ) .OR. BOTHV +* + FROMQR = LSAME( EIGSRC, 'Q' ) +* + NOINIT = LSAME( INITV, 'N' ) +* +* Set M to the number of columns required to store the selected +* eigenvectors. +* + M = 0 + DO 10 K = 1, N + IF( SELECT( K ) ) + $ M = M + 1 + 10 CONTINUE +* + INFO = 0 + IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -1 + ELSE IF( .NOT.FROMQR .AND. .NOT.LSAME( EIGSRC, 'N' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOINIT .AND. .NOT.LSAME( INITV, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDH.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVL.LT.1 .OR. ( LEFTV .AND. LDVL.LT.N ) ) THEN + INFO = -10 + ELSE IF( LDVR.LT.1 .OR. ( RIGHTV .AND. LDVR.LT.N ) ) THEN + INFO = -12 + ELSE IF( MM.LT.M ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHSEIN', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* +* Set machine-dependent constants. +* + UNFL = SLAMCH( 'Safe minimum' ) + ULP = SLAMCH( 'Precision' ) + SMLNUM = UNFL*( N / ULP ) +* + LDWORK = N +* + KL = 1 + KLN = 0 + IF( FROMQR ) THEN + KR = 0 + ELSE + KR = N + END IF + KS = 1 +* + DO 100 K = 1, N + IF( SELECT( K ) ) THEN +* +* Compute eigenvector(s) corresponding to W(K). +* + IF( FROMQR ) THEN +* +* If affiliation of eigenvalues is known, check whether +* the matrix splits. +* +* Determine KL and KR such that 1 <= KL <= K <= KR <= N +* and H(KL,KL-1) and H(KR+1,KR) are zero (or KL = 1 or +* KR = N). +* +* Then inverse iteration can be performed with the +* submatrix H(KL:N,KL:N) for a left eigenvector, and with +* the submatrix H(1:KR,1:KR) for a right eigenvector. +* + DO 20 I = K, KL + 1, -1 + IF( H( I, I-1 ).EQ.ZERO ) + $ GO TO 30 + 20 CONTINUE + 30 CONTINUE + KL = I + IF( K.GT.KR ) THEN + DO 40 I = K, N - 1 + IF( H( I+1, I ).EQ.ZERO ) + $ GO TO 50 + 40 CONTINUE + 50 CONTINUE + KR = I + END IF + END IF +* + IF( KL.NE.KLN ) THEN + KLN = KL +* +* Compute infinity-norm of submatrix H(KL:KR,KL:KR) if it +* has not ben computed before. +* + HNORM = CLANHS( 'I', KR-KL+1, H( KL, KL ), LDH, RWORK ) + IF( HNORM.GT.RZERO ) THEN + EPS3 = HNORM*ULP + ELSE + EPS3 = SMLNUM + END IF + END IF +* +* Perturb eigenvalue if it is close to any previous +* selected eigenvalues affiliated to the submatrix +* H(KL:KR,KL:KR). Close roots are modified by EPS3. +* + WK = W( K ) + 60 CONTINUE + DO 70 I = K - 1, KL, -1 + IF( SELECT( I ) .AND. CABS1( W( I )-WK ).LT.EPS3 ) THEN + WK = WK + EPS3 + GO TO 60 + END IF + 70 CONTINUE + W( K ) = WK +* + IF( LEFTV ) THEN +* +* Compute left eigenvector. +* + CALL CLAEIN( .FALSE., NOINIT, N-KL+1, H( KL, KL ), LDH, + $ WK, VL( KL, KS ), WORK, LDWORK, RWORK, EPS3, + $ SMLNUM, IINFO ) + IF( IINFO.GT.0 ) THEN + INFO = INFO + 1 + IFAILL( KS ) = K + ELSE + IFAILL( KS ) = 0 + END IF + DO 80 I = 1, KL - 1 + VL( I, KS ) = ZERO + 80 CONTINUE + END IF + IF( RIGHTV ) THEN +* +* Compute right eigenvector. +* + CALL CLAEIN( .TRUE., NOINIT, KR, H, LDH, WK, VR( 1, KS ), + $ WORK, LDWORK, RWORK, EPS3, SMLNUM, IINFO ) + IF( IINFO.GT.0 ) THEN + INFO = INFO + 1 + IFAILR( KS ) = K + ELSE + IFAILR( KS ) = 0 + END IF + DO 90 I = KR + 1, N + VR( I, KS ) = ZERO + 90 CONTINUE + END IF + KS = KS + 1 + END IF + 100 CONTINUE +* + RETURN +* +* End of CHSEIN +* + END diff --git a/costa/native/external/lapack/chseqr.f b/costa/native/external/lapack/chseqr.f new file mode 100644 index 000000000..d26ff16e1 --- /dev/null +++ b/costa/native/external/lapack/chseqr.f @@ -0,0 +1,474 @@ + SUBROUTINE CHSEQR( JOB, COMPZ, N, ILO, IHI, H, LDH, W, Z, LDZ, + $ WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER COMPZ, JOB + INTEGER IHI, ILO, INFO, LDH, LDZ, LWORK, N +* .. +* .. Array Arguments .. + COMPLEX H( LDH, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* CHSEQR computes the eigenvalues of a complex upper Hessenberg +* matrix H, and, optionally, the matrices T and Z from the Schur +* decomposition H = Z T Z**H, where T is an upper triangular matrix +* (the Schur form), and Z is the unitary matrix of Schur vectors. +* +* Optionally Z may be postmultiplied into an input unitary matrix Q, +* so that this routine can give the Schur factorization of a matrix A +* which has been reduced to the Hessenberg form H by the unitary +* matrix Q: A = Q*H*Q**H = (QZ)*T*(QZ)**H. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* = 'E': compute eigenvalues only; +* = 'S': compute eigenvalues and the Schur form T. +* +* COMPZ (input) CHARACTER*1 +* = 'N': no Schur vectors are computed; +* = 'I': Z is initialized to the unit matrix and the matrix Z +* of Schur vectors of H is returned; +* = 'V': Z must contain an unitary matrix Q on entry, and +* the product Q*Z is returned. +* +* N (input) INTEGER +* The order of the matrix H. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* It is assumed that H is already upper triangular in rows +* and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally +* set by a previous call to CGEBAL, and then passed to CGEHRD +* when the matrix output by CGEBAL is reduced to Hessenberg +* form. Otherwise ILO and IHI should be set to 1 and N +* respectively. +* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +* +* H (input/output) COMPLEX array, dimension (LDH,N) +* On entry, the upper Hessenberg matrix H. +* On exit, if JOB = 'S', H contains the upper triangular matrix +* T from the Schur decomposition (the Schur form). If +* JOB = 'E', the contents of H are unspecified on exit. +* +* LDH (input) INTEGER +* The leading dimension of the array H. LDH >= max(1,N). +* +* W (output) COMPLEX array, dimension (N) +* The computed eigenvalues. If JOB = 'S', the eigenvalues are +* stored in the same order as on the diagonal of the Schur form +* returned in H, with W(i) = H(i,i). +* +* Z (input/output) COMPLEX array, dimension (LDZ,N) +* If COMPZ = 'N': Z is not referenced. +* If COMPZ = 'I': on entry, Z need not be set, and on exit, Z +* contains the unitary matrix Z of the Schur vectors of H. +* If COMPZ = 'V': on entry Z must contain an N-by-N matrix Q, +* which is assumed to be equal to the unit matrix except for +* the submatrix Z(ILO:IHI,ILO:IHI); on exit Z contains Q*Z. +* Normally Q is the unitary matrix generated by CUNGHR after +* the call to CGEHRD which formed the Hessenberg matrix H. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. +* LDZ >= max(1,N) if COMPZ = 'I' or 'V'; LDZ >= 1 otherwise. +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N). +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, CHSEQR failed to compute all the +* eigenvalues in a total of 30*(IHI-ILO+1) iterations; +* elements 1:ilo-1 and i+1:n of W contain those +* eigenvalues which have been successfully computed. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) + REAL RZERO, RONE, CONST + PARAMETER ( RZERO = 0.0E+0, RONE = 1.0E+0, + $ CONST = 1.5E+0 ) + INTEGER NSMAX, LDS + PARAMETER ( NSMAX = 15, LDS = NSMAX ) +* .. +* .. Local Scalars .. + LOGICAL INITZ, LQUERY, WANTT, WANTZ + INTEGER I, I1, I2, IERR, II, ITEMP, ITN, ITS, J, K, L, + $ MAXB, NH, NR, NS, NV + REAL OVFL, RTEMP, SMLNUM, TST1, ULP, UNFL + COMPLEX CDUM, TAU, TEMP +* .. +* .. Local Arrays .. + REAL RWORK( 1 ) + COMPLEX S( LDS, NSMAX ), V( NSMAX+1 ), VV( NSMAX+1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX, ILAENV + REAL CLANHS, SLAMCH, SLAPY2 + EXTERNAL LSAME, ICAMAX, ILAENV, CLANHS, SLAMCH, SLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGEMV, CLACPY, CLAHQR, CLARFG, CLARFX, + $ CLASET, CSCAL, CSSCAL, SLABAD, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CONJG, MAX, MIN, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + WANTT = LSAME( JOB, 'S' ) + INITZ = LSAME( COMPZ, 'I' ) + WANTZ = INITZ .OR. LSAME( COMPZ, 'V' ) +* + INFO = 0 + WORK( 1 ) = MAX( 1, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( JOB, 'E' ) .AND. .NOT.WANTT ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( COMPZ, 'N' ) .AND. .NOT.WANTZ ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -5 + ELSE IF( LDH.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDZ.LT.1 .OR. WANTZ .AND. LDZ.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHSEQR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Initialize Z, if necessary +* + IF( INITZ ) + $ CALL CLASET( 'Full', N, N, ZERO, ONE, Z, LDZ ) +* +* Store the eigenvalues isolated by CGEBAL. +* + DO 10 I = 1, ILO - 1 + W( I ) = H( I, I ) + 10 CONTINUE + DO 20 I = IHI + 1, N + W( I ) = H( I, I ) + 20 CONTINUE +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN + IF( ILO.EQ.IHI ) THEN + W( ILO ) = H( ILO, ILO ) + RETURN + END IF +* +* Set rows and columns ILO to IHI to zero below the first +* subdiagonal. +* + DO 40 J = ILO, IHI - 2 + DO 30 I = J + 2, N + H( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + NH = IHI - ILO + 1 +* +* I1 and I2 are the indices of the first row and last column of H +* to which transformations must be applied. If eigenvalues only are +* being computed, I1 and I2 are re-set inside the main loop. +* + IF( WANTT ) THEN + I1 = 1 + I2 = N + ELSE + I1 = ILO + I2 = IHI + END IF +* +* Ensure that the subdiagonal elements are real. +* + DO 50 I = ILO + 1, IHI + TEMP = H( I, I-1 ) + IF( AIMAG( TEMP ).NE.RZERO ) THEN + RTEMP = SLAPY2( REAL( TEMP ), AIMAG( TEMP ) ) + H( I, I-1 ) = RTEMP + TEMP = TEMP / RTEMP + IF( I2.GT.I ) + $ CALL CSCAL( I2-I, CONJG( TEMP ), H( I, I+1 ), LDH ) + CALL CSCAL( I-I1, TEMP, H( I1, I ), 1 ) + IF( I.LT.IHI ) + $ H( I+1, I ) = TEMP*H( I+1, I ) + IF( WANTZ ) + $ CALL CSCAL( NH, TEMP, Z( ILO, I ), 1 ) + END IF + 50 CONTINUE +* +* Determine the order of the multi-shift QR algorithm to be used. +* + NS = ILAENV( 4, 'CHSEQR', JOB // COMPZ, N, ILO, IHI, -1 ) + MAXB = ILAENV( 8, 'CHSEQR', JOB // COMPZ, N, ILO, IHI, -1 ) + IF( NS.LE.1 .OR. NS.GT.NH .OR. MAXB.GE.NH ) THEN +* +* Use the standard double-shift algorithm +* + CALL CLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILO, IHI, Z, + $ LDZ, INFO ) + RETURN + END IF + MAXB = MAX( 2, MAXB ) + NS = MIN( NS, MAXB, NSMAX ) +* +* Now 1 < NS <= MAXB < NH. +* +* Set machine-dependent constants for the stopping criterion. +* If norm(H) <= sqrt(OVFL), overflow should not occur. +* + UNFL = SLAMCH( 'Safe minimum' ) + OVFL = RONE / UNFL + CALL SLABAD( UNFL, OVFL ) + ULP = SLAMCH( 'Precision' ) + SMLNUM = UNFL*( NH / ULP ) +* +* ITN is the total number of multiple-shift QR iterations allowed. +* + ITN = 30*NH +* +* The main loop begins here. I is the loop index and decreases from +* IHI to ILO in steps of at most MAXB. Each iteration of the loop +* works with the active submatrix in rows and columns L to I. +* Eigenvalues I+1 to IHI have already converged. Either L = ILO, or +* H(L,L-1) is negligible so that the matrix splits. +* + I = IHI + 60 CONTINUE + IF( I.LT.ILO ) + $ GO TO 180 +* +* Perform multiple-shift QR iterations on rows and columns ILO to I +* until a submatrix of order at most MAXB splits off at the bottom +* because a subdiagonal element has become negligible. +* + L = ILO + DO 160 ITS = 0, ITN +* +* Look for a single small subdiagonal element. +* + DO 70 K = I, L + 1, -1 + TST1 = CABS1( H( K-1, K-1 ) ) + CABS1( H( K, K ) ) + IF( TST1.EQ.RZERO ) + $ TST1 = CLANHS( '1', I-L+1, H( L, L ), LDH, RWORK ) + IF( ABS( REAL( H( K, K-1 ) ) ).LE.MAX( ULP*TST1, SMLNUM ) ) + $ GO TO 80 + 70 CONTINUE + 80 CONTINUE + L = K + IF( L.GT.ILO ) THEN +* +* H(L,L-1) is negligible. +* + H( L, L-1 ) = ZERO + END IF +* +* Exit from loop if a submatrix of order <= MAXB has split off. +* + IF( L.GE.I-MAXB+1 ) + $ GO TO 170 +* +* Now the active submatrix is in rows and columns L to I. If +* eigenvalues only are being computed, only the active submatrix +* need be transformed. +* + IF( .NOT.WANTT ) THEN + I1 = L + I2 = I + END IF +* + IF( ITS.EQ.20 .OR. ITS.EQ.30 ) THEN +* +* Exceptional shifts. +* + DO 90 II = I - NS + 1, I + W( II ) = CONST*( ABS( REAL( H( II, II-1 ) ) )+ + $ ABS( REAL( H( II, II ) ) ) ) + 90 CONTINUE + ELSE +* +* Use eigenvalues of trailing submatrix of order NS as shifts. +* + CALL CLACPY( 'Full', NS, NS, H( I-NS+1, I-NS+1 ), LDH, S, + $ LDS ) + CALL CLAHQR( .FALSE., .FALSE., NS, 1, NS, S, LDS, + $ W( I-NS+1 ), 1, NS, Z, LDZ, IERR ) + IF( IERR.GT.0 ) THEN +* +* If CLAHQR failed to compute all NS eigenvalues, use the +* unconverged diagonal elements as the remaining shifts. +* + DO 100 II = 1, IERR + W( I-NS+II ) = S( II, II ) + 100 CONTINUE + END IF + END IF +* +* Form the first column of (G-w(1)) (G-w(2)) . . . (G-w(ns)) +* where G is the Hessenberg submatrix H(L:I,L:I) and w is +* the vector of shifts (stored in W). The result is +* stored in the local array V. +* + V( 1 ) = ONE + DO 110 II = 2, NS + 1 + V( II ) = ZERO + 110 CONTINUE + NV = 1 + DO 130 J = I - NS + 1, I + CALL CCOPY( NV+1, V, 1, VV, 1 ) + CALL CGEMV( 'No transpose', NV+1, NV, ONE, H( L, L ), LDH, + $ VV, 1, -W( J ), V, 1 ) + NV = NV + 1 +* +* Scale V(1:NV) so that max(abs(V(i))) = 1. If V is zero, +* reset it to the unit vector. +* + ITEMP = ICAMAX( NV, V, 1 ) + RTEMP = CABS1( V( ITEMP ) ) + IF( RTEMP.EQ.RZERO ) THEN + V( 1 ) = ONE + DO 120 II = 2, NV + V( II ) = ZERO + 120 CONTINUE + ELSE + RTEMP = MAX( RTEMP, SMLNUM ) + CALL CSSCAL( NV, RONE / RTEMP, V, 1 ) + END IF + 130 CONTINUE +* +* Multiple-shift QR step +* + DO 150 K = L, I - 1 +* +* The first iteration of this loop determines a reflection G +* from the vector V and applies it from left and right to H, +* thus creating a nonzero bulge below the subdiagonal. +* +* Each subsequent iteration determines a reflection G to +* restore the Hessenberg form in the (K-1)th column, and thus +* chases the bulge one step toward the bottom of the active +* submatrix. NR is the order of G. +* + NR = MIN( NS+1, I-K+1 ) + IF( K.GT.L ) + $ CALL CCOPY( NR, H( K, K-1 ), 1, V, 1 ) + CALL CLARFG( NR, V( 1 ), V( 2 ), 1, TAU ) + IF( K.GT.L ) THEN + H( K, K-1 ) = V( 1 ) + DO 140 II = K + 1, I + H( II, K-1 ) = ZERO + 140 CONTINUE + END IF + V( 1 ) = ONE +* +* Apply G' from the left to transform the rows of the matrix +* in columns K to I2. +* + CALL CLARFX( 'Left', NR, I2-K+1, V, CONJG( TAU ), H( K, K ), + $ LDH, WORK ) +* +* Apply G from the right to transform the columns of the +* matrix in rows I1 to min(K+NR,I). +* + CALL CLARFX( 'Right', MIN( K+NR, I )-I1+1, NR, V, TAU, + $ H( I1, K ), LDH, WORK ) +* + IF( WANTZ ) THEN +* +* Accumulate transformations in the matrix Z +* + CALL CLARFX( 'Right', NH, NR, V, TAU, Z( ILO, K ), LDZ, + $ WORK ) + END IF + 150 CONTINUE +* +* Ensure that H(I,I-1) is real. +* + TEMP = H( I, I-1 ) + IF( AIMAG( TEMP ).NE.RZERO ) THEN + RTEMP = SLAPY2( REAL( TEMP ), AIMAG( TEMP ) ) + H( I, I-1 ) = RTEMP + TEMP = TEMP / RTEMP + IF( I2.GT.I ) + $ CALL CSCAL( I2-I, CONJG( TEMP ), H( I, I+1 ), LDH ) + CALL CSCAL( I-I1, TEMP, H( I1, I ), 1 ) + IF( WANTZ ) THEN + CALL CSCAL( NH, TEMP, Z( ILO, I ), 1 ) + END IF + END IF +* + 160 CONTINUE +* +* Failure to converge in remaining number of iterations +* + INFO = I + RETURN +* + 170 CONTINUE +* +* A submatrix of order <= MAXB in rows and columns L to I has split +* off. Use the double-shift QR algorithm to handle it. +* + CALL CLAHQR( WANTT, WANTZ, N, L, I, H, LDH, W, ILO, IHI, Z, LDZ, + $ INFO ) + IF( INFO.GT.0 ) + $ RETURN +* +* Decrement number of remaining iterations, and return to start of +* the main loop with a new value of I. +* + ITN = ITN - ITS + I = L - 1 + GO TO 60 +* + 180 CONTINUE + WORK( 1 ) = MAX( 1, N ) + RETURN +* +* End of CHSEQR +* + END diff --git a/costa/native/external/lapack/clabrd.f b/costa/native/external/lapack/clabrd.f new file mode 100644 index 000000000..aba02056c --- /dev/null +++ b/costa/native/external/lapack/clabrd.f @@ -0,0 +1,329 @@ + SUBROUTINE CLABRD( M, N, NB, A, LDA, D, E, TAUQ, TAUP, X, LDX, Y, + $ LDY ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER LDA, LDX, LDY, M, N, NB +* .. +* .. Array Arguments .. + REAL D( * ), E( * ) + COMPLEX A( LDA, * ), TAUP( * ), TAUQ( * ), X( LDX, * ), + $ Y( LDY, * ) +* .. +* +* Purpose +* ======= +* +* CLABRD reduces the first NB rows and columns of a complex general +* m by n matrix A to upper or lower real bidiagonal form by a unitary +* transformation Q' * A * P, and returns the matrices X and Y which +* are needed to apply the transformation to the unreduced part of A. +* +* If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower +* bidiagonal form. +* +* This is an auxiliary routine called by CGEBRD +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows in the matrix A. +* +* N (input) INTEGER +* The number of columns in the matrix A. +* +* NB (input) INTEGER +* The number of leading rows and columns of A to be reduced. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the m by n general matrix to be reduced. +* On exit, the first NB rows and columns of the matrix are +* overwritten; the rest of the array is unchanged. +* If m >= n, elements on and below the diagonal in the first NB +* columns, with the array TAUQ, represent the unitary +* matrix Q as a product of elementary reflectors; and +* elements above the diagonal in the first NB rows, with the +* array TAUP, represent the unitary matrix P as a product +* of elementary reflectors. +* If m < n, elements below the diagonal in the first NB +* columns, with the array TAUQ, represent the unitary +* matrix Q as a product of elementary reflectors, and +* elements on and above the diagonal in the first NB rows, +* with the array TAUP, represent the unitary matrix P as +* a product of elementary reflectors. +* See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* D (output) REAL array, dimension (NB) +* The diagonal elements of the first NB rows and columns of +* the reduced matrix. D(i) = A(i,i). +* +* E (output) REAL array, dimension (NB) +* The off-diagonal elements of the first NB rows and columns of +* the reduced matrix. +* +* TAUQ (output) COMPLEX array dimension (NB) +* The scalar factors of the elementary reflectors which +* represent the unitary matrix Q. See Further Details. +* +* TAUP (output) COMPLEX array, dimension (NB) +* The scalar factors of the elementary reflectors which +* represent the unitary matrix P. See Further Details. +* +* X (output) COMPLEX array, dimension (LDX,NB) +* The m-by-nb matrix X required to update the unreduced part +* of A. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,M). +* +* Y (output) COMPLEX array, dimension (LDY,NB) +* The n-by-nb matrix Y required to update the unreduced part +* of A. +* +* LDY (output) INTEGER +* The leading dimension of the array Y. LDY >= max(1,N). +* +* Further Details +* =============== +* +* The matrices Q and P are represented as products of elementary +* reflectors: +* +* Q = H(1) H(2) . . . H(nb) and P = G(1) G(2) . . . G(nb) +* +* Each H(i) and G(i) has the form: +* +* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' +* +* where tauq and taup are complex scalars, and v and u are complex +* vectors. +* +* If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in +* A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in +* A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). +* +* If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in +* A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in +* A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). +* +* The elements of the vectors v and u together form the m-by-nb matrix +* V and the nb-by-n matrix U' which are needed, with X and Y, to apply +* the transformation to the unreduced part of the matrix, using a block +* update of the form: A := A - V*Y' - X*U'. +* +* The contents of A on exit are illustrated by the following examples +* with nb = 2: +* +* m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): +* +* ( 1 1 u1 u1 u1 ) ( 1 u1 u1 u1 u1 u1 ) +* ( v1 1 1 u2 u2 ) ( 1 1 u2 u2 u2 u2 ) +* ( v1 v2 a a a ) ( v1 1 a a a a ) +* ( v1 v2 a a a ) ( v1 v2 a a a a ) +* ( v1 v2 a a a ) ( v1 v2 a a a a ) +* ( v1 v2 a a a ) +* +* where a denotes an element of the original matrix which is unchanged, +* vi denotes an element of the vector defining H(i), and ui an element +* of the vector defining G(i). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I + COMPLEX ALPHA +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, CLACGV, CLARFG, CSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) + $ RETURN +* + IF( M.GE.N ) THEN +* +* Reduce to upper bidiagonal form +* + DO 10 I = 1, NB +* +* Update A(i:m,i) +* + CALL CLACGV( I-1, Y( I, 1 ), LDY ) + CALL CGEMV( 'No transpose', M-I+1, I-1, -ONE, A( I, 1 ), + $ LDA, Y( I, 1 ), LDY, ONE, A( I, I ), 1 ) + CALL CLACGV( I-1, Y( I, 1 ), LDY ) + CALL CGEMV( 'No transpose', M-I+1, I-1, -ONE, X( I, 1 ), + $ LDX, A( 1, I ), 1, ONE, A( I, I ), 1 ) +* +* Generate reflection Q(i) to annihilate A(i+1:m,i) +* + ALPHA = A( I, I ) + CALL CLARFG( M-I+1, ALPHA, A( MIN( I+1, M ), I ), 1, + $ TAUQ( I ) ) + D( I ) = ALPHA + IF( I.LT.N ) THEN + A( I, I ) = ONE +* +* Compute Y(i+1:n,i) +* + CALL CGEMV( 'Conjugate transpose', M-I+1, N-I, ONE, + $ A( I, I+1 ), LDA, A( I, I ), 1, ZERO, + $ Y( I+1, I ), 1 ) + CALL CGEMV( 'Conjugate transpose', M-I+1, I-1, ONE, + $ A( I, 1 ), LDA, A( I, I ), 1, ZERO, + $ Y( 1, I ), 1 ) + CALL CGEMV( 'No transpose', N-I, I-1, -ONE, Y( I+1, 1 ), + $ LDY, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 ) + CALL CGEMV( 'Conjugate transpose', M-I+1, I-1, ONE, + $ X( I, 1 ), LDX, A( I, I ), 1, ZERO, + $ Y( 1, I ), 1 ) + CALL CGEMV( 'Conjugate transpose', I-1, N-I, -ONE, + $ A( 1, I+1 ), LDA, Y( 1, I ), 1, ONE, + $ Y( I+1, I ), 1 ) + CALL CSCAL( N-I, TAUQ( I ), Y( I+1, I ), 1 ) +* +* Update A(i,i+1:n) +* + CALL CLACGV( N-I, A( I, I+1 ), LDA ) + CALL CLACGV( I, A( I, 1 ), LDA ) + CALL CGEMV( 'No transpose', N-I, I, -ONE, Y( I+1, 1 ), + $ LDY, A( I, 1 ), LDA, ONE, A( I, I+1 ), LDA ) + CALL CLACGV( I, A( I, 1 ), LDA ) + CALL CLACGV( I-1, X( I, 1 ), LDX ) + CALL CGEMV( 'Conjugate transpose', I-1, N-I, -ONE, + $ A( 1, I+1 ), LDA, X( I, 1 ), LDX, ONE, + $ A( I, I+1 ), LDA ) + CALL CLACGV( I-1, X( I, 1 ), LDX ) +* +* Generate reflection P(i) to annihilate A(i,i+2:n) +* + ALPHA = A( I, I+1 ) + CALL CLARFG( N-I, ALPHA, A( I, MIN( I+2, N ) ), + $ LDA, TAUP( I ) ) + E( I ) = ALPHA + A( I, I+1 ) = ONE +* +* Compute X(i+1:m,i) +* + CALL CGEMV( 'No transpose', M-I, N-I, ONE, A( I+1, I+1 ), + $ LDA, A( I, I+1 ), LDA, ZERO, X( I+1, I ), 1 ) + CALL CGEMV( 'Conjugate transpose', N-I, I, ONE, + $ Y( I+1, 1 ), LDY, A( I, I+1 ), LDA, ZERO, + $ X( 1, I ), 1 ) + CALL CGEMV( 'No transpose', M-I, I, -ONE, A( I+1, 1 ), + $ LDA, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL CGEMV( 'No transpose', I-1, N-I, ONE, A( 1, I+1 ), + $ LDA, A( I, I+1 ), LDA, ZERO, X( 1, I ), 1 ) + CALL CGEMV( 'No transpose', M-I, I-1, -ONE, X( I+1, 1 ), + $ LDX, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL CSCAL( M-I, TAUP( I ), X( I+1, I ), 1 ) + CALL CLACGV( N-I, A( I, I+1 ), LDA ) + END IF + 10 CONTINUE + ELSE +* +* Reduce to lower bidiagonal form +* + DO 20 I = 1, NB +* +* Update A(i,i:n) +* + CALL CLACGV( N-I+1, A( I, I ), LDA ) + CALL CLACGV( I-1, A( I, 1 ), LDA ) + CALL CGEMV( 'No transpose', N-I+1, I-1, -ONE, Y( I, 1 ), + $ LDY, A( I, 1 ), LDA, ONE, A( I, I ), LDA ) + CALL CLACGV( I-1, A( I, 1 ), LDA ) + CALL CLACGV( I-1, X( I, 1 ), LDX ) + CALL CGEMV( 'Conjugate transpose', I-1, N-I+1, -ONE, + $ A( 1, I ), LDA, X( I, 1 ), LDX, ONE, A( I, I ), + $ LDA ) + CALL CLACGV( I-1, X( I, 1 ), LDX ) +* +* Generate reflection P(i) to annihilate A(i,i+1:n) +* + ALPHA = A( I, I ) + CALL CLARFG( N-I+1, ALPHA, A( I, MIN( I+1, N ) ), LDA, + $ TAUP( I ) ) + D( I ) = ALPHA + IF( I.LT.M ) THEN + A( I, I ) = ONE +* +* Compute X(i+1:m,i) +* + CALL CGEMV( 'No transpose', M-I, N-I+1, ONE, A( I+1, I ), + $ LDA, A( I, I ), LDA, ZERO, X( I+1, I ), 1 ) + CALL CGEMV( 'Conjugate transpose', N-I+1, I-1, ONE, + $ Y( I, 1 ), LDY, A( I, I ), LDA, ZERO, + $ X( 1, I ), 1 ) + CALL CGEMV( 'No transpose', M-I, I-1, -ONE, A( I+1, 1 ), + $ LDA, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL CGEMV( 'No transpose', I-1, N-I+1, ONE, A( 1, I ), + $ LDA, A( I, I ), LDA, ZERO, X( 1, I ), 1 ) + CALL CGEMV( 'No transpose', M-I, I-1, -ONE, X( I+1, 1 ), + $ LDX, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL CSCAL( M-I, TAUP( I ), X( I+1, I ), 1 ) + CALL CLACGV( N-I+1, A( I, I ), LDA ) +* +* Update A(i+1:m,i) +* + CALL CLACGV( I-1, Y( I, 1 ), LDY ) + CALL CGEMV( 'No transpose', M-I, I-1, -ONE, A( I+1, 1 ), + $ LDA, Y( I, 1 ), LDY, ONE, A( I+1, I ), 1 ) + CALL CLACGV( I-1, Y( I, 1 ), LDY ) + CALL CGEMV( 'No transpose', M-I, I, -ONE, X( I+1, 1 ), + $ LDX, A( 1, I ), 1, ONE, A( I+1, I ), 1 ) +* +* Generate reflection Q(i) to annihilate A(i+2:m,i) +* + ALPHA = A( I+1, I ) + CALL CLARFG( M-I, ALPHA, A( MIN( I+2, M ), I ), 1, + $ TAUQ( I ) ) + E( I ) = ALPHA + A( I+1, I ) = ONE +* +* Compute Y(i+1:n,i) +* + CALL CGEMV( 'Conjugate transpose', M-I, N-I, ONE, + $ A( I+1, I+1 ), LDA, A( I+1, I ), 1, ZERO, + $ Y( I+1, I ), 1 ) + CALL CGEMV( 'Conjugate transpose', M-I, I-1, ONE, + $ A( I+1, 1 ), LDA, A( I+1, I ), 1, ZERO, + $ Y( 1, I ), 1 ) + CALL CGEMV( 'No transpose', N-I, I-1, -ONE, Y( I+1, 1 ), + $ LDY, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 ) + CALL CGEMV( 'Conjugate transpose', M-I, I, ONE, + $ X( I+1, 1 ), LDX, A( I+1, I ), 1, ZERO, + $ Y( 1, I ), 1 ) + CALL CGEMV( 'Conjugate transpose', I, N-I, -ONE, + $ A( 1, I+1 ), LDA, Y( 1, I ), 1, ONE, + $ Y( I+1, I ), 1 ) + CALL CSCAL( N-I, TAUQ( I ), Y( I+1, I ), 1 ) + ELSE + CALL CLACGV( N-I+1, A( I, I ), LDA ) + END IF + 20 CONTINUE + END IF + RETURN +* +* End of CLABRD +* + END diff --git a/costa/native/external/lapack/clacgv.f b/costa/native/external/lapack/clacgv.f new file mode 100644 index 000000000..a1c40dfc2 --- /dev/null +++ b/costa/native/external/lapack/clacgv.f @@ -0,0 +1,61 @@ + SUBROUTINE CLACGV( N, X, INCX ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + INTEGER INCX, N +* .. +* .. Array Arguments .. + COMPLEX X( * ) +* .. +* +* Purpose +* ======= +* +* CLACGV conjugates a complex vector of length N. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The length of the vector X. N >= 0. +* +* X (input/output) COMPLEX array, dimension +* (1+(N-1)*abs(INCX)) +* On entry, the vector of length N to be conjugated. +* On exit, X is overwritten with conjg(X). +* +* INCX (input) INTEGER +* The spacing between successive elements of X. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IOFF +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG +* .. +* .. Executable Statements .. +* + IF( INCX.EQ.1 ) THEN + DO 10 I = 1, N + X( I ) = CONJG( X( I ) ) + 10 CONTINUE + ELSE + IOFF = 1 + IF( INCX.LT.0 ) + $ IOFF = 1 - ( N-1 )*INCX + DO 20 I = 1, N + X( IOFF ) = CONJG( X( IOFF ) ) + IOFF = IOFF + INCX + 20 CONTINUE + END IF + RETURN +* +* End of CLACGV +* + END diff --git a/costa/native/external/lapack/clacon.f b/costa/native/external/lapack/clacon.f new file mode 100644 index 000000000..034930abe --- /dev/null +++ b/costa/native/external/lapack/clacon.f @@ -0,0 +1,211 @@ + SUBROUTINE CLACON( N, V, X, EST, KASE ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER KASE, N + REAL EST +* .. +* .. Array Arguments .. + COMPLEX V( N ), X( N ) +* .. +* +* Purpose +* ======= +* +* CLACON estimates the 1-norm of a square, complex matrix A. +* Reverse communication is used for evaluating matrix-vector products. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix. N >= 1. +* +* V (workspace) COMPLEX array, dimension (N) +* On the final return, V = A*W, where EST = norm(V)/norm(W) +* (W is not returned). +* +* X (input/output) COMPLEX array, dimension (N) +* On an intermediate return, X should be overwritten by +* A * X, if KASE=1, +* A' * X, if KASE=2, +* where A' is the conjugate transpose of A, and CLACON must be +* re-called with all the other parameters unchanged. +* +* EST (output) REAL +* An estimate (a lower bound) for norm(A). +* +* KASE (input/output) INTEGER +* On the initial call to CLACON, KASE should be 0. +* On an intermediate return, KASE will be 1 or 2, indicating +* whether X should be overwritten by A * X or A' * X. +* On the final return from CLACON, KASE will again be 0. +* +* Further Details +* ======= ======= +* +* Contributed by Nick Higham, University of Manchester. +* Originally named CONEST, dated March 16, 1988. +* +* Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of +* a real or complex matrix, with applications to condition estimation", +* ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988. +* +* Last modified: April, 1999 +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ONE, TWO + PARAMETER ( ONE = 1.0E0, TWO = 2.0E0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ), + $ CONE = ( 1.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, ITER, J, JLAST, JUMP + REAL ABSXI, ALTSGN, ESTOLD, SAFMIN, TEMP +* .. +* .. External Functions .. + INTEGER ICMAX1 + REAL SCSUM1, SLAMCH + EXTERNAL ICMAX1, SCSUM1, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CCOPY +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, REAL +* .. +* .. Save statement .. + SAVE +* .. +* .. Executable Statements .. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + IF( KASE.EQ.0 ) THEN + DO 10 I = 1, N + X( I ) = CMPLX( ONE / REAL( N ) ) + 10 CONTINUE + KASE = 1 + JUMP = 1 + RETURN + END IF +* + GO TO ( 20, 40, 70, 90, 120 )JUMP +* +* ................ ENTRY (JUMP = 1) +* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY A*X. +* + 20 CONTINUE + IF( N.EQ.1 ) THEN + V( 1 ) = X( 1 ) + EST = ABS( V( 1 ) ) +* ... QUIT + GO TO 130 + END IF + EST = SCSUM1( N, X, 1 ) +* + DO 30 I = 1, N + ABSXI = ABS( X( I ) ) + IF( ABSXI.GT.SAFMIN ) THEN + X( I ) = CMPLX( REAL( X( I ) ) / ABSXI, + $ AIMAG( X( I ) ) / ABSXI ) + ELSE + X( I ) = CONE + END IF + 30 CONTINUE + KASE = 2 + JUMP = 2 + RETURN +* +* ................ ENTRY (JUMP = 2) +* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY CTRANS(A)*X. +* + 40 CONTINUE + J = ICMAX1( N, X, 1 ) + ITER = 2 +* +* MAIN LOOP - ITERATIONS 2,3,...,ITMAX. +* + 50 CONTINUE + DO 60 I = 1, N + X( I ) = CZERO + 60 CONTINUE + X( J ) = CONE + KASE = 1 + JUMP = 3 + RETURN +* +* ................ ENTRY (JUMP = 3) +* X HAS BEEN OVERWRITTEN BY A*X. +* + 70 CONTINUE + CALL CCOPY( N, X, 1, V, 1 ) + ESTOLD = EST + EST = SCSUM1( N, V, 1 ) +* +* TEST FOR CYCLING. + IF( EST.LE.ESTOLD ) + $ GO TO 100 +* + DO 80 I = 1, N + ABSXI = ABS( X( I ) ) + IF( ABSXI.GT.SAFMIN ) THEN + X( I ) = CMPLX( REAL( X( I ) ) / ABSXI, + $ AIMAG( X( I ) ) / ABSXI ) + ELSE + X( I ) = CONE + END IF + 80 CONTINUE + KASE = 2 + JUMP = 4 + RETURN +* +* ................ ENTRY (JUMP = 4) +* X HAS BEEN OVERWRITTEN BY CTRANS(A)*X. +* + 90 CONTINUE + JLAST = J + J = ICMAX1( N, X, 1 ) + IF( ( ABS( X( JLAST ) ).NE.ABS( X( J ) ) ) .AND. + $ ( ITER.LT.ITMAX ) ) THEN + ITER = ITER + 1 + GO TO 50 + END IF +* +* ITERATION COMPLETE. FINAL STAGE. +* + 100 CONTINUE + ALTSGN = ONE + DO 110 I = 1, N + X( I ) = CMPLX( ALTSGN*( ONE+REAL( I-1 ) / REAL( N-1 ) ) ) + ALTSGN = -ALTSGN + 110 CONTINUE + KASE = 1 + JUMP = 5 + RETURN +* +* ................ ENTRY (JUMP = 5) +* X HAS BEEN OVERWRITTEN BY A*X. +* + 120 CONTINUE + TEMP = TWO*( SCSUM1( N, X, 1 ) / REAL( 3*N ) ) + IF( TEMP.GT.EST ) THEN + CALL CCOPY( N, X, 1, V, 1 ) + EST = TEMP + END IF +* + 130 CONTINUE + KASE = 0 + RETURN +* +* End of CLACON +* + END diff --git a/costa/native/external/lapack/clacp2.f b/costa/native/external/lapack/clacp2.f new file mode 100644 index 000000000..ca7805ae6 --- /dev/null +++ b/costa/native/external/lapack/clacp2.f @@ -0,0 +1,92 @@ + SUBROUTINE CLACP2( UPLO, M, N, A, LDA, B, LDB ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER LDA, LDB, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ) + COMPLEX B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* CLACP2 copies all or part of a real two-dimensional matrix A to a +* complex matrix B. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies the part of the matrix A to be copied to B. +* = 'U': Upper triangular part +* = 'L': Lower triangular part +* Otherwise: All of the matrix A +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input) REAL array, dimension (LDA,N) +* The m by n matrix A. If UPLO = 'U', only the upper trapezium +* is accessed; if UPLO = 'L', only the lower trapezium is +* accessed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (output) COMPLEX array, dimension (LDB,N) +* On exit, B = A in the locations specified by UPLO. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,M). +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, MIN( J, M ) + B( I, J ) = A( I, J ) + 10 CONTINUE + 20 CONTINUE +* + ELSE IF( LSAME( UPLO, 'L' ) ) THEN + DO 40 J = 1, N + DO 30 I = J, M + B( I, J ) = A( I, J ) + 30 CONTINUE + 40 CONTINUE +* + ELSE + DO 60 J = 1, N + DO 50 I = 1, M + B( I, J ) = A( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + RETURN +* +* End of CLACP2 +* + END diff --git a/costa/native/external/lapack/clacpy.f b/costa/native/external/lapack/clacpy.f new file mode 100644 index 000000000..ca983e1c4 --- /dev/null +++ b/costa/native/external/lapack/clacpy.f @@ -0,0 +1,91 @@ + SUBROUTINE CLACPY( UPLO, M, N, A, LDA, B, LDB ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER LDA, LDB, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* CLACPY copies all or part of a two-dimensional matrix A to another +* matrix B. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies the part of the matrix A to be copied to B. +* = 'U': Upper triangular part +* = 'L': Lower triangular part +* Otherwise: All of the matrix A +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input) COMPLEX array, dimension (LDA,N) +* The m by n matrix A. If UPLO = 'U', only the upper trapezium +* is accessed; if UPLO = 'L', only the lower trapezium is +* accessed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (output) COMPLEX array, dimension (LDB,N) +* On exit, B = A in the locations specified by UPLO. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,M). +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, MIN( J, M ) + B( I, J ) = A( I, J ) + 10 CONTINUE + 20 CONTINUE +* + ELSE IF( LSAME( UPLO, 'L' ) ) THEN + DO 40 J = 1, N + DO 30 I = J, M + B( I, J ) = A( I, J ) + 30 CONTINUE + 40 CONTINUE +* + ELSE + DO 60 J = 1, N + DO 50 I = 1, M + B( I, J ) = A( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + RETURN +* +* End of CLACPY +* + END diff --git a/costa/native/external/lapack/clacrm.f b/costa/native/external/lapack/clacrm.f new file mode 100644 index 000000000..15647c1f9 --- /dev/null +++ b/costa/native/external/lapack/clacrm.f @@ -0,0 +1,111 @@ + SUBROUTINE CLACRM( M, N, A, LDA, B, LDB, C, LDC, RWORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER LDA, LDB, LDC, M, N +* .. +* .. Array Arguments .. + REAL B( LDB, * ), RWORK( * ) + COMPLEX A( LDA, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* CLACRM performs a very simple matrix-matrix multiplication: +* C := A * B, +* where A is M by N and complex; B is N by N and real; +* C is M by N and complex. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A and of the matrix C. +* M >= 0. +* +* N (input) INTEGER +* The number of columns and rows of the matrix B and +* the number of columns of the matrix C. +* N >= 0. +* +* A (input) COMPLEX array, dimension (LDA, N) +* A contains the M by N matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >=max(1,M). +* +* B (input) REAL array, dimension (LDB, N) +* B contains the N by N matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >=max(1,N). +* +* C (input) COMPLEX array, dimension (LDC, N) +* C contains the M by N matrix C. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >=max(1,N). +* +* RWORK (workspace) REAL array, dimension (2*M*N) +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E0, ZERO = 0.0E0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, L +* .. +* .. Intrinsic Functions .. + INTRINSIC AIMAG, CMPLX, REAL +* .. +* .. External Subroutines .. + EXTERNAL SGEMM +* .. +* .. Executable Statements .. +* +* Quick return if possible. +* + IF( ( M.EQ.0 ) .OR. ( N.EQ.0 ) ) + $ RETURN +* + DO 20 J = 1, N + DO 10 I = 1, M + RWORK( ( J-1 )*M+I ) = REAL( A( I, J ) ) + 10 CONTINUE + 20 CONTINUE +* + L = M*N + 1 + CALL SGEMM( 'N', 'N', M, N, N, ONE, RWORK, M, B, LDB, ZERO, + $ RWORK( L ), M ) + DO 40 J = 1, N + DO 30 I = 1, M + C( I, J ) = RWORK( L+( J-1 )*M+I-1 ) + 30 CONTINUE + 40 CONTINUE +* + DO 60 J = 1, N + DO 50 I = 1, M + RWORK( ( J-1 )*M+I ) = AIMAG( A( I, J ) ) + 50 CONTINUE + 60 CONTINUE + CALL SGEMM( 'N', 'N', M, N, N, ONE, RWORK, M, B, LDB, ZERO, + $ RWORK( L ), M ) + DO 80 J = 1, N + DO 70 I = 1, M + C( I, J ) = CMPLX( REAL( C( I, J ) ), + $ RWORK( L+( J-1 )*M+I-1 ) ) + 70 CONTINUE + 80 CONTINUE +* + RETURN +* +* End of CLACRM +* + END diff --git a/costa/native/external/lapack/clacrt.f b/costa/native/external/lapack/clacrt.f new file mode 100644 index 000000000..5211de83a --- /dev/null +++ b/costa/native/external/lapack/clacrt.f @@ -0,0 +1,91 @@ + SUBROUTINE CLACRT( N, CX, INCX, CY, INCY, C, S ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + INTEGER INCX, INCY, N + COMPLEX C, S +* .. +* .. Array Arguments .. + COMPLEX CX( * ), CY( * ) +* .. +* +* Purpose +* ======= +* +* CLACRT performs the operation +* +* ( c s )( x ) ==> ( x ) +* ( -s c )( y ) ( y ) +* +* where c and s are complex and the vectors x and y are complex. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of elements in the vectors CX and CY. +* +* CX (input/output) COMPLEX array, dimension (N) +* On input, the vector x. +* On output, CX is overwritten with c*x + s*y. +* +* INCX (input) INTEGER +* The increment between successive values of CX. INCX <> 0. +* +* CY (input/output) COMPLEX array, dimension (N) +* On input, the vector y. +* On output, CY is overwritten with -s*x + c*y. +* +* INCY (input) INTEGER +* The increment between successive values of CY. INCY <> 0. +* +* C (input) COMPLEX +* S (input) COMPLEX +* C and S define the matrix +* [ C S ]. +* [ -S C ] +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IX, IY + COMPLEX CTEMP +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) + $ RETURN + IF( INCX.EQ.1 .AND. INCY.EQ.1 ) + $ GO TO 20 +* +* Code for unequal increments or equal increments not equal to 1 +* + IX = 1 + IY = 1 + IF( INCX.LT.0 ) + $ IX = ( -N+1 )*INCX + 1 + IF( INCY.LT.0 ) + $ IY = ( -N+1 )*INCY + 1 + DO 10 I = 1, N + CTEMP = C*CX( IX ) + S*CY( IY ) + CY( IY ) = C*CY( IY ) - S*CX( IX ) + CX( IX ) = CTEMP + IX = IX + INCX + IY = IY + INCY + 10 CONTINUE + RETURN +* +* Code for both increments equal to 1 +* + 20 CONTINUE + DO 30 I = 1, N + CTEMP = C*CX( I ) + S*CY( I ) + CY( I ) = C*CY( I ) - S*CX( I ) + CX( I ) = CTEMP + 30 CONTINUE + RETURN + END diff --git a/costa/native/external/lapack/cladiv.f b/costa/native/external/lapack/cladiv.f new file mode 100644 index 000000000..64d22f83f --- /dev/null +++ b/costa/native/external/lapack/cladiv.f @@ -0,0 +1,47 @@ + COMPLEX FUNCTION CLADIV( X, Y ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + COMPLEX X, Y +* .. +* +* Purpose +* ======= +* +* CLADIV := X / Y, where X and Y are complex. The computation of X / Y +* will not overflow on an intermediary step unless the results +* overflows. +* +* Arguments +* ========= +* +* X (input) COMPLEX +* Y (input) COMPLEX +* The complex scalars X and Y. +* +* ===================================================================== +* +* .. Local Scalars .. + REAL ZI, ZR +* .. +* .. External Subroutines .. + EXTERNAL SLADIV +* .. +* .. Intrinsic Functions .. + INTRINSIC AIMAG, CMPLX, REAL +* .. +* .. Executable Statements .. +* + CALL SLADIV( REAL( X ), AIMAG( X ), REAL( Y ), AIMAG( Y ), ZR, + $ ZI ) + CLADIV = CMPLX( ZR, ZI ) +* + RETURN +* +* End of CLADIV +* + END diff --git a/costa/native/external/lapack/claed0.f b/costa/native/external/lapack/claed0.f new file mode 100644 index 000000000..6695249cc --- /dev/null +++ b/costa/native/external/lapack/claed0.f @@ -0,0 +1,289 @@ + SUBROUTINE CLAED0( QSIZ, N, D, E, Q, LDQ, QSTORE, LDQS, RWORK, + $ IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, LDQ, LDQS, N, QSIZ +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL D( * ), E( * ), RWORK( * ) + COMPLEX Q( LDQ, * ), QSTORE( LDQS, * ) +* .. +* +* Purpose +* ======= +* +* Using the divide and conquer method, CLAED0 computes all eigenvalues +* of a symmetric tridiagonal matrix which is one diagonal block of +* those from reducing a dense or band Hermitian matrix and +* corresponding eigenvectors of the dense or band matrix. +* +* Arguments +* ========= +* +* QSIZ (input) INTEGER +* The dimension of the unitary matrix used to reduce +* the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. +* +* N (input) INTEGER +* The dimension of the symmetric tridiagonal matrix. N >= 0. +* +* D (input/output) REAL array, dimension (N) +* On entry, the diagonal elements of the tridiagonal matrix. +* On exit, the eigenvalues in ascending order. +* +* E (input/output) REAL array, dimension (N-1) +* On entry, the off-diagonal elements of the tridiagonal matrix. +* On exit, E has been destroyed. +* +* Q (input/output) COMPLEX array, dimension (LDQ,N) +* On entry, Q must contain an QSIZ x N matrix whose columns +* unitarily orthonormal. It is a part of the unitary matrix +* that reduces the full dense Hermitian matrix to a +* (reducible) symmetric tridiagonal matrix. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= max(1,N). +* +* IWORK (workspace) INTEGER array, +* the dimension of IWORK must be at least +* 6 + 6*N + 5*N*lg N +* ( lg( N ) = smallest integer k +* such that 2^k >= N ) +* +* RWORK (workspace) REAL array, +* dimension (1 + 3*N + 2*N*lg N + 3*N**2) +* ( lg( N ) = smallest integer k +* such that 2^k >= N ) +* +* QSTORE (workspace) COMPLEX array, dimension (LDQS, N) +* Used to store parts of +* the eigenvector matrix when the updating matrix multiplies +* take place. +* +* LDQS (input) INTEGER +* The leading dimension of the array QSTORE. +* LDQS >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: The algorithm failed to compute an eigenvalue while +* working on the submatrix lying in rows and columns +* INFO/(N+1) through mod(INFO,N+1). +* +* ===================================================================== +* +* Warning: N could be as big as QSIZ! +* +* .. Parameters .. + REAL TWO + PARAMETER ( TWO = 2.E+0 ) +* .. +* .. Local Scalars .. + INTEGER CURLVL, CURPRB, CURR, I, IGIVCL, IGIVNM, + $ IGIVPT, INDXQ, IPERM, IPRMPT, IQ, IQPTR, IWREM, + $ J, K, LGN, LL, MATSIZ, MSD2, SMLSIZ, SMM1, + $ SPM1, SPM2, SUBMAT, SUBPBS, TLVLS + REAL TEMP +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CLACRM, CLAED7, SCOPY, SSTEQR, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, LOG, MAX, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* +* IF( ICOMPQ .LT. 0 .OR. ICOMPQ .GT. 2 ) THEN +* INFO = -1 +* ELSE IF( ( ICOMPQ .EQ. 1 ) .AND. ( QSIZ .LT. MAX( 0, N ) ) ) +* $ THEN + IF( QSIZ.LT.MAX( 0, N ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDQS.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLAED0', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + SMLSIZ = ILAENV( 9, 'CLAED0', ' ', 0, 0, 0, 0 ) +* +* Determine the size and placement of the submatrices, and save in +* the leading elements of IWORK. +* + IWORK( 1 ) = N + SUBPBS = 1 + TLVLS = 0 + 10 CONTINUE + IF( IWORK( SUBPBS ).GT.SMLSIZ ) THEN + DO 20 J = SUBPBS, 1, -1 + IWORK( 2*J ) = ( IWORK( J )+1 ) / 2 + IWORK( 2*J-1 ) = IWORK( J ) / 2 + 20 CONTINUE + TLVLS = TLVLS + 1 + SUBPBS = 2*SUBPBS + GO TO 10 + END IF + DO 30 J = 2, SUBPBS + IWORK( J ) = IWORK( J ) + IWORK( J-1 ) + 30 CONTINUE +* +* Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1 +* using rank-1 modifications (cuts). +* + SPM1 = SUBPBS - 1 + DO 40 I = 1, SPM1 + SUBMAT = IWORK( I ) + 1 + SMM1 = SUBMAT - 1 + D( SMM1 ) = D( SMM1 ) - ABS( E( SMM1 ) ) + D( SUBMAT ) = D( SUBMAT ) - ABS( E( SMM1 ) ) + 40 CONTINUE +* + INDXQ = 4*N + 3 +* +* Set up workspaces for eigenvalues only/accumulate new vectors +* routine +* + TEMP = LOG( REAL( N ) ) / LOG( TWO ) + LGN = INT( TEMP ) + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IPRMPT = INDXQ + N + 1 + IPERM = IPRMPT + N*LGN + IQPTR = IPERM + N*LGN + IGIVPT = IQPTR + N + 2 + IGIVCL = IGIVPT + N*LGN +* + IGIVNM = 1 + IQ = IGIVNM + 2*N*LGN + IWREM = IQ + N**2 + 1 +* Initialize pointers + DO 50 I = 0, SUBPBS + IWORK( IPRMPT+I ) = 1 + IWORK( IGIVPT+I ) = 1 + 50 CONTINUE + IWORK( IQPTR ) = 1 +* +* Solve each submatrix eigenproblem at the bottom of the divide and +* conquer tree. +* + CURR = 0 + DO 70 I = 0, SPM1 + IF( I.EQ.0 ) THEN + SUBMAT = 1 + MATSIZ = IWORK( 1 ) + ELSE + SUBMAT = IWORK( I ) + 1 + MATSIZ = IWORK( I+1 ) - IWORK( I ) + END IF + LL = IQ - 1 + IWORK( IQPTR+CURR ) + CALL SSTEQR( 'I', MATSIZ, D( SUBMAT ), E( SUBMAT ), + $ RWORK( LL ), MATSIZ, RWORK, INFO ) + CALL CLACRM( QSIZ, MATSIZ, Q( 1, SUBMAT ), LDQ, RWORK( LL ), + $ MATSIZ, QSTORE( 1, SUBMAT ), LDQS, + $ RWORK( IWREM ) ) + IWORK( IQPTR+CURR+1 ) = IWORK( IQPTR+CURR ) + MATSIZ**2 + CURR = CURR + 1 + IF( INFO.GT.0 ) THEN + INFO = SUBMAT*( N+1 ) + SUBMAT + MATSIZ - 1 + RETURN + END IF + K = 1 + DO 60 J = SUBMAT, IWORK( I+1 ) + IWORK( INDXQ+J ) = K + K = K + 1 + 60 CONTINUE + 70 CONTINUE +* +* Successively merge eigensystems of adjacent submatrices +* into eigensystem for the corresponding larger matrix. +* +* while ( SUBPBS > 1 ) +* + CURLVL = 1 + 80 CONTINUE + IF( SUBPBS.GT.1 ) THEN + SPM2 = SUBPBS - 2 + DO 90 I = 0, SPM2, 2 + IF( I.EQ.0 ) THEN + SUBMAT = 1 + MATSIZ = IWORK( 2 ) + MSD2 = IWORK( 1 ) + CURPRB = 0 + ELSE + SUBMAT = IWORK( I ) + 1 + MATSIZ = IWORK( I+2 ) - IWORK( I ) + MSD2 = MATSIZ / 2 + CURPRB = CURPRB + 1 + END IF +* +* Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2) +* into an eigensystem of size MATSIZ. CLAED7 handles the case +* when the eigenvectors of a full or band Hermitian matrix (which +* was reduced to tridiagonal form) are desired. +* +* I am free to use Q as a valuable working space until Loop 150. +* + CALL CLAED7( MATSIZ, MSD2, QSIZ, TLVLS, CURLVL, CURPRB, + $ D( SUBMAT ), QSTORE( 1, SUBMAT ), LDQS, + $ E( SUBMAT+MSD2-1 ), IWORK( INDXQ+SUBMAT ), + $ RWORK( IQ ), IWORK( IQPTR ), IWORK( IPRMPT ), + $ IWORK( IPERM ), IWORK( IGIVPT ), + $ IWORK( IGIVCL ), RWORK( IGIVNM ), + $ Q( 1, SUBMAT ), RWORK( IWREM ), + $ IWORK( SUBPBS+1 ), INFO ) + IF( INFO.GT.0 ) THEN + INFO = SUBMAT*( N+1 ) + SUBMAT + MATSIZ - 1 + RETURN + END IF + IWORK( I / 2+1 ) = IWORK( I+2 ) + 90 CONTINUE + SUBPBS = SUBPBS / 2 + CURLVL = CURLVL + 1 + GO TO 80 + END IF +* +* end while +* +* Re-merge the eigenvalues/vectors which were deflated at the final +* merge step. +* + DO 100 I = 1, N + J = IWORK( INDXQ+I ) + RWORK( I ) = D( J ) + CALL CCOPY( QSIZ, QSTORE( 1, J ), 1, Q( 1, I ), 1 ) + 100 CONTINUE + CALL SCOPY( N, RWORK, 1, D, 1 ) +* + RETURN +* +* End of CLAED0 +* + END diff --git a/costa/native/external/lapack/claed7.f b/costa/native/external/lapack/claed7.f new file mode 100644 index 000000000..83af549ed --- /dev/null +++ b/costa/native/external/lapack/claed7.f @@ -0,0 +1,267 @@ + SUBROUTINE CLAED7( N, CUTPNT, QSIZ, TLVLS, CURLVL, CURPBM, D, Q, + $ LDQ, RHO, INDXQ, QSTORE, QPTR, PRMPTR, PERM, + $ GIVPTR, GIVCOL, GIVNUM, WORK, RWORK, IWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER CURLVL, CURPBM, CUTPNT, INFO, LDQ, N, QSIZ, + $ TLVLS + REAL RHO +* .. +* .. Array Arguments .. + INTEGER GIVCOL( 2, * ), GIVPTR( * ), INDXQ( * ), + $ IWORK( * ), PERM( * ), PRMPTR( * ), QPTR( * ) + REAL D( * ), GIVNUM( 2, * ), QSTORE( * ), RWORK( * ) + COMPLEX Q( LDQ, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CLAED7 computes the updated eigensystem of a diagonal +* matrix after modification by a rank-one symmetric matrix. This +* routine is used only for the eigenproblem which requires all +* eigenvalues and optionally eigenvectors of a dense or banded +* Hermitian matrix that has been reduced to tridiagonal form. +* +* T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out) +* +* where Z = Q'u, u is a vector of length N with ones in the +* CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. +* +* The eigenvectors of the original matrix are stored in Q, and the +* eigenvalues are in D. The algorithm consists of three stages: +* +* The first stage consists of deflating the size of the problem +* when there are multiple eigenvalues or if there is a zero in +* the Z vector. For each such occurence the dimension of the +* secular equation problem is reduced by one. This stage is +* performed by the routine SLAED2. +* +* The second stage consists of calculating the updated +* eigenvalues. This is done by finding the roots of the secular +* equation via the routine SLAED4 (as called by SLAED3). +* This routine also calculates the eigenvectors of the current +* problem. +* +* The final stage consists of computing the updated eigenvectors +* directly using the updated eigenvalues. The eigenvectors for +* the current problem are multiplied with the eigenvectors from +* the overall problem. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The dimension of the symmetric tridiagonal matrix. N >= 0. +* +* CUTPNT (input) INTEGER +* Contains the location of the last eigenvalue in the leading +* sub-matrix. min(1,N) <= CUTPNT <= N. +* +* QSIZ (input) INTEGER +* The dimension of the unitary matrix used to reduce +* the full matrix to tridiagonal form. QSIZ >= N. +* +* TLVLS (input) INTEGER +* The total number of merging levels in the overall divide and +* conquer tree. +* +* CURLVL (input) INTEGER +* The current level in the overall merge routine, +* 0 <= curlvl <= tlvls. +* +* CURPBM (input) INTEGER +* The current problem in the current level in the overall +* merge routine (counting from upper left to lower right). +* +* D (input/output) REAL array, dimension (N) +* On entry, the eigenvalues of the rank-1-perturbed matrix. +* On exit, the eigenvalues of the repaired matrix. +* +* Q (input/output) COMPLEX array, dimension (LDQ,N) +* On entry, the eigenvectors of the rank-1-perturbed matrix. +* On exit, the eigenvectors of the repaired tridiagonal matrix. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= max(1,N). +* +* RHO (input) REAL +* Contains the subdiagonal element used to create the rank-1 +* modification. +* +* INDXQ (output) INTEGER array, dimension (N) +* This contains the permutation which will reintegrate the +* subproblem just solved back into sorted order, +* ie. D( INDXQ( I = 1, N ) ) will be in ascending order. +* +* IWORK (workspace) INTEGER array, dimension (4*N) +* +* RWORK (workspace) REAL array, +* dimension (3*N+2*QSIZ*N) +* +* WORK (workspace) COMPLEX array, dimension (QSIZ*N) +* +* QSTORE (input/output) REAL array, dimension (N**2+1) +* Stores eigenvectors of submatrices encountered during +* divide and conquer, packed together. QPTR points to +* beginning of the submatrices. +* +* QPTR (input/output) INTEGER array, dimension (N+2) +* List of indices pointing to beginning of submatrices stored +* in QSTORE. The submatrices are numbered starting at the +* bottom left of the divide and conquer tree, from left to +* right and bottom to top. +* +* PRMPTR (input) INTEGER array, dimension (N lg N) +* Contains a list of pointers which indicate where in PERM a +* level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) +* indicates the size of the permutation and also the size of +* the full, non-deflated problem. +* +* PERM (input) INTEGER array, dimension (N lg N) +* Contains the permutations (from deflation and sorting) to be +* applied to each eigenblock. +* +* GIVPTR (input) INTEGER array, dimension (N lg N) +* Contains a list of pointers which indicate where in GIVCOL a +* level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) +* indicates the number of Givens rotations. +* +* GIVCOL (input) INTEGER array, dimension (2, N lg N) +* Each pair of numbers indicates a pair of columns to take place +* in a Givens rotation. +* +* GIVNUM (input) REAL array, dimension (2, N lg N) +* Each number indicates the S value to be used in the +* corresponding Givens rotation. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = 1, an eigenvalue did not converge +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER COLTYP, CURR, I, IDLMDA, IND1, IND2, INDX, + $ INDXC, INDXP, IQ, IW, IZ, K, N1, N2, PTR +* .. +* .. External Subroutines .. + EXTERNAL CLACRM, CLAED8, SLAED9, SLAEDA, SLAMRG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* +* IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.1 ) THEN +* INFO = -1 +* ELSE IF( N.LT.0 ) THEN + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( MIN( 1, N ).GT.CUTPNT .OR. N.LT.CUTPNT ) THEN + INFO = -2 + ELSE IF( QSIZ.LT.N ) THEN + INFO = -3 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLAED7', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* The following values are for bookkeeping purposes only. They are +* integer pointers which indicate the portion of the workspace +* used by a particular array in SLAED2 and SLAED3. +* + IZ = 1 + IDLMDA = IZ + N + IW = IDLMDA + N + IQ = IW + N +* + INDX = 1 + INDXC = INDX + N + COLTYP = INDXC + N + INDXP = COLTYP + N +* +* Form the z-vector which consists of the last row of Q_1 and the +* first row of Q_2. +* + PTR = 1 + 2**TLVLS + DO 10 I = 1, CURLVL - 1 + PTR = PTR + 2**( TLVLS-I ) + 10 CONTINUE + CURR = PTR + CURPBM + CALL SLAEDA( N, TLVLS, CURLVL, CURPBM, PRMPTR, PERM, GIVPTR, + $ GIVCOL, GIVNUM, QSTORE, QPTR, RWORK( IZ ), + $ RWORK( IZ+N ), INFO ) +* +* When solving the final problem, we no longer need the stored data, +* so we will overwrite the data from this level onto the previously +* used storage space. +* + IF( CURLVL.EQ.TLVLS ) THEN + QPTR( CURR ) = 1 + PRMPTR( CURR ) = 1 + GIVPTR( CURR ) = 1 + END IF +* +* Sort and Deflate eigenvalues. +* + CALL CLAED8( K, N, QSIZ, Q, LDQ, D, RHO, CUTPNT, RWORK( IZ ), + $ RWORK( IDLMDA ), WORK, QSIZ, RWORK( IW ), + $ IWORK( INDXP ), IWORK( INDX ), INDXQ, + $ PERM( PRMPTR( CURR ) ), GIVPTR( CURR+1 ), + $ GIVCOL( 1, GIVPTR( CURR ) ), + $ GIVNUM( 1, GIVPTR( CURR ) ), INFO ) + PRMPTR( CURR+1 ) = PRMPTR( CURR ) + N + GIVPTR( CURR+1 ) = GIVPTR( CURR+1 ) + GIVPTR( CURR ) +* +* Solve Secular Equation. +* + IF( K.NE.0 ) THEN + CALL SLAED9( K, 1, K, N, D, RWORK( IQ ), K, RHO, + $ RWORK( IDLMDA ), RWORK( IW ), + $ QSTORE( QPTR( CURR ) ), K, INFO ) + CALL CLACRM( QSIZ, K, WORK, QSIZ, QSTORE( QPTR( CURR ) ), K, Q, + $ LDQ, RWORK( IQ ) ) + QPTR( CURR+1 ) = QPTR( CURR ) + K**2 + IF( INFO.NE.0 ) THEN + RETURN + END IF +* +* Prepare the INDXQ sorting premutation. +* + N1 = K + N2 = N - K + IND1 = 1 + IND2 = N + CALL SLAMRG( N1, N2, D, 1, -1, INDXQ ) + ELSE + QPTR( CURR+1 ) = QPTR( CURR ) + DO 20 I = 1, N + INDXQ( I ) = I + 20 CONTINUE + END IF +* + RETURN +* +* End of CLAED7 +* + END diff --git a/costa/native/external/lapack/claed8.f b/costa/native/external/lapack/claed8.f new file mode 100644 index 000000000..0b3c222dd --- /dev/null +++ b/costa/native/external/lapack/claed8.f @@ -0,0 +1,364 @@ + SUBROUTINE CLAED8( K, N, QSIZ, Q, LDQ, D, RHO, CUTPNT, Z, DLAMDA, + $ Q2, LDQ2, W, INDXP, INDX, INDXQ, PERM, GIVPTR, + $ GIVCOL, GIVNUM, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, +* Courant Institute, NAG Ltd., and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER CUTPNT, GIVPTR, INFO, K, LDQ, LDQ2, N, QSIZ + REAL RHO +* .. +* .. Array Arguments .. + INTEGER GIVCOL( 2, * ), INDX( * ), INDXP( * ), + $ INDXQ( * ), PERM( * ) + REAL D( * ), DLAMDA( * ), GIVNUM( 2, * ), W( * ), + $ Z( * ) + COMPLEX Q( LDQ, * ), Q2( LDQ2, * ) +* .. +* +* Purpose +* ======= +* +* CLAED8 merges the two sets of eigenvalues together into a single +* sorted set. Then it tries to deflate the size of the problem. +* There are two ways in which deflation can occur: when two or more +* eigenvalues are close together or if there is a tiny element in the +* Z vector. For each such occurrence the order of the related secular +* equation problem is reduced by one. +* +* Arguments +* ========= +* +* K (output) INTEGER +* Contains the number of non-deflated eigenvalues. +* This is the order of the related secular equation. +* +* N (input) INTEGER +* The dimension of the symmetric tridiagonal matrix. N >= 0. +* +* QSIZ (input) INTEGER +* The dimension of the unitary matrix used to reduce +* the dense or band matrix to tridiagonal form. +* QSIZ >= N if ICOMPQ = 1. +* +* Q (input/output) COMPLEX array, dimension (LDQ,N) +* On entry, Q contains the eigenvectors of the partially solved +* system which has been previously updated in matrix +* multiplies with other partially solved eigensystems. +* On exit, Q contains the trailing (N-K) updated eigenvectors +* (those which were deflated) in its last N-K columns. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= max( 1, N ). +* +* D (input/output) REAL array, dimension (N) +* On entry, D contains the eigenvalues of the two submatrices to +* be combined. On exit, D contains the trailing (N-K) updated +* eigenvalues (those which were deflated) sorted into increasing +* order. +* +* RHO (input/output) REAL +* Contains the off diagonal element associated with the rank-1 +* cut which originally split the two submatrices which are now +* being recombined. RHO is modified during the computation to +* the value required by SLAED3. +* +* CUTPNT (input) INTEGER +* Contains the location of the last eigenvalue in the leading +* sub-matrix. MIN(1,N) <= CUTPNT <= N. +* +* Z (input) REAL array, dimension (N) +* On input this vector contains the updating vector (the last +* row of the first sub-eigenvector matrix and the first row of +* the second sub-eigenvector matrix). The contents of Z are +* destroyed during the updating process. +* +* DLAMDA (output) REAL array, dimension (N) +* Contains a copy of the first K eigenvalues which will be used +* by SLAED3 to form the secular equation. +* +* Q2 (output) COMPLEX array, dimension (LDQ2,N) +* If ICOMPQ = 0, Q2 is not referenced. Otherwise, +* Contains a copy of the first K eigenvectors which will be used +* by SLAED7 in a matrix multiply (SGEMM) to update the new +* eigenvectors. +* +* LDQ2 (input) INTEGER +* The leading dimension of the array Q2. LDQ2 >= max( 1, N ). +* +* W (output) REAL array, dimension (N) +* This will hold the first k values of the final +* deflation-altered z-vector and will be passed to SLAED3. +* +* INDXP (workspace) INTEGER array, dimension (N) +* This will contain the permutation used to place deflated +* values of D at the end of the array. On output INDXP(1:K) +* points to the nondeflated D-values and INDXP(K+1:N) +* points to the deflated eigenvalues. +* +* INDX (workspace) INTEGER array, dimension (N) +* This will contain the permutation used to sort the contents of +* D into ascending order. +* +* INDXQ (input) INTEGER array, dimension (N) +* This contains the permutation which separately sorts the two +* sub-problems in D into ascending order. Note that elements in +* the second half of this permutation must first have CUTPNT +* added to their values in order to be accurate. +* +* PERM (output) INTEGER array, dimension (N) +* Contains the permutations (from deflation and sorting) to be +* applied to each eigenblock. +* +* GIVPTR (output) INTEGER +* Contains the number of Givens rotations which took place in +* this subproblem. +* +* GIVCOL (output) INTEGER array, dimension (2, N) +* Each pair of numbers indicates a pair of columns to take place +* in a Givens rotation. +* +* GIVNUM (output) REAL array, dimension (2, N) +* Each number indicates the S value to be used in the +* corresponding Givens rotation. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* ===================================================================== +* +* .. Parameters .. + REAL MONE, ZERO, ONE, TWO, EIGHT + PARAMETER ( MONE = -1.0E0, ZERO = 0.0E0, ONE = 1.0E0, + $ TWO = 2.0E0, EIGHT = 8.0E0 ) +* .. +* .. Local Scalars .. + INTEGER I, IMAX, J, JLAM, JMAX, JP, K2, N1, N1P1, N2 + REAL C, EPS, S, T, TAU, TOL +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SLAMCH, SLAPY2 + EXTERNAL ISAMAX, SLAMCH, SLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CLACPY, CSROT, SCOPY, SLAMRG, SSCAL, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( QSIZ.LT.N ) THEN + INFO = -3 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( CUTPNT.LT.MIN( 1, N ) .OR. CUTPNT.GT.N ) THEN + INFO = -8 + ELSE IF( LDQ2.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLAED8', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + N1 = CUTPNT + N2 = N - N1 + N1P1 = N1 + 1 +* + IF( RHO.LT.ZERO ) THEN + CALL SSCAL( N2, MONE, Z( N1P1 ), 1 ) + END IF +* +* Normalize z so that norm(z) = 1 +* + T = ONE / SQRT( TWO ) + DO 10 J = 1, N + INDX( J ) = J + 10 CONTINUE + CALL SSCAL( N, T, Z, 1 ) + RHO = ABS( TWO*RHO ) +* +* Sort the eigenvalues into increasing order +* + DO 20 I = CUTPNT + 1, N + INDXQ( I ) = INDXQ( I ) + CUTPNT + 20 CONTINUE + DO 30 I = 1, N + DLAMDA( I ) = D( INDXQ( I ) ) + W( I ) = Z( INDXQ( I ) ) + 30 CONTINUE + I = 1 + J = CUTPNT + 1 + CALL SLAMRG( N1, N2, DLAMDA, 1, 1, INDX ) + DO 40 I = 1, N + D( I ) = DLAMDA( INDX( I ) ) + Z( I ) = W( INDX( I ) ) + 40 CONTINUE +* +* Calculate the allowable deflation tolerance +* + IMAX = ISAMAX( N, Z, 1 ) + JMAX = ISAMAX( N, D, 1 ) + EPS = SLAMCH( 'Epsilon' ) + TOL = EIGHT*EPS*ABS( D( JMAX ) ) +* +* If the rank-1 modifier is small enough, no more needs to be done +* -- except to reorganize Q so that its columns correspond with the +* elements in D. +* + IF( RHO*ABS( Z( IMAX ) ).LE.TOL ) THEN + K = 0 + DO 50 J = 1, N + PERM( J ) = INDXQ( INDX( J ) ) + CALL CCOPY( QSIZ, Q( 1, PERM( J ) ), 1, Q2( 1, J ), 1 ) + 50 CONTINUE + CALL CLACPY( 'A', QSIZ, N, Q2( 1, 1 ), LDQ2, Q( 1, 1 ), LDQ ) + RETURN + END IF +* +* If there are multiple eigenvalues then the problem deflates. Here +* the number of equal eigenvalues are found. As each equal +* eigenvalue is found, an elementary reflector is computed to rotate +* the corresponding eigensubspace so that the corresponding +* components of Z are zero in this new basis. +* + K = 0 + GIVPTR = 0 + K2 = N + 1 + DO 60 J = 1, N + IF( RHO*ABS( Z( J ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + INDXP( K2 ) = J + IF( J.EQ.N ) + $ GO TO 100 + ELSE + JLAM = J + GO TO 70 + END IF + 60 CONTINUE + 70 CONTINUE + J = J + 1 + IF( J.GT.N ) + $ GO TO 90 + IF( RHO*ABS( Z( J ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + INDXP( K2 ) = J + ELSE +* +* Check if eigenvalues are close enough to allow deflation. +* + S = Z( JLAM ) + C = Z( J ) +* +* Find sqrt(a**2+b**2) without overflow or +* destructive underflow. +* + TAU = SLAPY2( C, S ) + T = D( J ) - D( JLAM ) + C = C / TAU + S = -S / TAU + IF( ABS( T*C*S ).LE.TOL ) THEN +* +* Deflation is possible. +* + Z( J ) = TAU + Z( JLAM ) = ZERO +* +* Record the appropriate Givens rotation +* + GIVPTR = GIVPTR + 1 + GIVCOL( 1, GIVPTR ) = INDXQ( INDX( JLAM ) ) + GIVCOL( 2, GIVPTR ) = INDXQ( INDX( J ) ) + GIVNUM( 1, GIVPTR ) = C + GIVNUM( 2, GIVPTR ) = S + CALL CSROT( QSIZ, Q( 1, INDXQ( INDX( JLAM ) ) ), 1, + $ Q( 1, INDXQ( INDX( J ) ) ), 1, C, S ) + T = D( JLAM )*C*C + D( J )*S*S + D( J ) = D( JLAM )*S*S + D( J )*C*C + D( JLAM ) = T + K2 = K2 - 1 + I = 1 + 80 CONTINUE + IF( K2+I.LE.N ) THEN + IF( D( JLAM ).LT.D( INDXP( K2+I ) ) ) THEN + INDXP( K2+I-1 ) = INDXP( K2+I ) + INDXP( K2+I ) = JLAM + I = I + 1 + GO TO 80 + ELSE + INDXP( K2+I-1 ) = JLAM + END IF + ELSE + INDXP( K2+I-1 ) = JLAM + END IF + JLAM = J + ELSE + K = K + 1 + W( K ) = Z( JLAM ) + DLAMDA( K ) = D( JLAM ) + INDXP( K ) = JLAM + JLAM = J + END IF + END IF + GO TO 70 + 90 CONTINUE +* +* Record the last eigenvalue. +* + K = K + 1 + W( K ) = Z( JLAM ) + DLAMDA( K ) = D( JLAM ) + INDXP( K ) = JLAM +* + 100 CONTINUE +* +* Sort the eigenvalues and corresponding eigenvectors into DLAMDA +* and Q2 respectively. The eigenvalues/vectors which were not +* deflated go into the first K slots of DLAMDA and Q2 respectively, +* while those which were deflated go into the last N - K slots. +* + DO 110 J = 1, N + JP = INDXP( J ) + DLAMDA( J ) = D( JP ) + PERM( J ) = INDXQ( INDX( JP ) ) + CALL CCOPY( QSIZ, Q( 1, PERM( J ) ), 1, Q2( 1, J ), 1 ) + 110 CONTINUE +* +* The deflated eigenvalues and their corresponding vectors go back +* into the last N - K slots of D and Q respectively. +* + IF( K.LT.N ) THEN + CALL SCOPY( N-K, DLAMDA( K+1 ), 1, D( K+1 ), 1 ) + CALL CLACPY( 'A', QSIZ, N-K, Q2( 1, K+1 ), LDQ2, Q( 1, K+1 ), + $ LDQ ) + END IF +* + RETURN +* +* End of CLAED8 +* + END diff --git a/costa/native/external/lapack/claein.f b/costa/native/external/lapack/claein.f new file mode 100644 index 000000000..85e6c7e72 --- /dev/null +++ b/costa/native/external/lapack/claein.f @@ -0,0 +1,264 @@ + SUBROUTINE CLAEIN( RIGHTV, NOINIT, N, H, LDH, W, V, B, LDB, RWORK, + $ EPS3, SMLNUM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + LOGICAL NOINIT, RIGHTV + INTEGER INFO, LDB, LDH, N + REAL EPS3, SMLNUM + COMPLEX W +* .. +* .. Array Arguments .. + REAL RWORK( * ) + COMPLEX B( LDB, * ), H( LDH, * ), V( * ) +* .. +* +* Purpose +* ======= +* +* CLAEIN uses inverse iteration to find a right or left eigenvector +* corresponding to the eigenvalue W of a complex upper Hessenberg +* matrix H. +* +* Arguments +* ========= +* +* RIGHTV (input) LOGICAL +* = .TRUE. : compute right eigenvector; +* = .FALSE.: compute left eigenvector. +* +* NOINIT (input) LOGICAL +* = .TRUE. : no initial vector supplied in V +* = .FALSE.: initial vector supplied in V. +* +* N (input) INTEGER +* The order of the matrix H. N >= 0. +* +* H (input) COMPLEX array, dimension (LDH,N) +* The upper Hessenberg matrix H. +* +* LDH (input) INTEGER +* The leading dimension of the array H. LDH >= max(1,N). +* +* W (input) COMPLEX +* The eigenvalue of H whose corresponding right or left +* eigenvector is to be computed. +* +* V (input/output) COMPLEX array, dimension (N) +* On entry, if NOINIT = .FALSE., V must contain a starting +* vector for inverse iteration; otherwise V need not be set. +* On exit, V contains the computed eigenvector, normalized so +* that the component of largest magnitude has magnitude 1; here +* the magnitude of a complex number (x,y) is taken to be +* |x| + |y|. +* +* B (workspace) COMPLEX array, dimension (LDB,N) +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* RWORK (workspace) REAL array, dimension (N) +* +* EPS3 (input) REAL +* A small machine-dependent value which is used to perturb +* close eigenvalues, and to replace zero pivots. +* +* SMLNUM (input) REAL +* A machine-dependent value close to the underflow threshold. +* +* INFO (output) INTEGER +* = 0: successful exit +* = 1: inverse iteration did not converge; V is set to the +* last iterate. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, TENTH + PARAMETER ( ONE = 1.0E+0, TENTH = 1.0E-1 ) + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + CHARACTER NORMIN, TRANS + INTEGER I, IERR, ITS, J + REAL GROWTO, NRMSML, ROOTN, RTEMP, SCALE, VNORM + COMPLEX CDUM, EI, EJ, TEMP, X +* .. +* .. External Functions .. + INTEGER ICAMAX + REAL SCASUM, SCNRM2 + COMPLEX CLADIV + EXTERNAL ICAMAX, SCASUM, SCNRM2, CLADIV +* .. +* .. External Subroutines .. + EXTERNAL CLATRS, CSSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL, SQRT +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* GROWTO is the threshold used in the acceptance test for an +* eigenvector. +* + ROOTN = SQRT( REAL( N ) ) + GROWTO = TENTH / ROOTN + NRMSML = MAX( ONE, EPS3*ROOTN )*SMLNUM +* +* Form B = H - W*I (except that the subdiagonal elements are not +* stored). +* + DO 20 J = 1, N + DO 10 I = 1, J - 1 + B( I, J ) = H( I, J ) + 10 CONTINUE + B( J, J ) = H( J, J ) - W + 20 CONTINUE +* + IF( NOINIT ) THEN +* +* Initialize V. +* + DO 30 I = 1, N + V( I ) = EPS3 + 30 CONTINUE + ELSE +* +* Scale supplied initial vector. +* + VNORM = SCNRM2( N, V, 1 ) + CALL CSSCAL( N, ( EPS3*ROOTN ) / MAX( VNORM, NRMSML ), V, 1 ) + END IF +* + IF( RIGHTV ) THEN +* +* LU decomposition with partial pivoting of B, replacing zero +* pivots by EPS3. +* + DO 60 I = 1, N - 1 + EI = H( I+1, I ) + IF( CABS1( B( I, I ) ).LT.CABS1( EI ) ) THEN +* +* Interchange rows and eliminate. +* + X = CLADIV( B( I, I ), EI ) + B( I, I ) = EI + DO 40 J = I + 1, N + TEMP = B( I+1, J ) + B( I+1, J ) = B( I, J ) - X*TEMP + B( I, J ) = TEMP + 40 CONTINUE + ELSE +* +* Eliminate without interchange. +* + IF( B( I, I ).EQ.ZERO ) + $ B( I, I ) = EPS3 + X = CLADIV( EI, B( I, I ) ) + IF( X.NE.ZERO ) THEN + DO 50 J = I + 1, N + B( I+1, J ) = B( I+1, J ) - X*B( I, J ) + 50 CONTINUE + END IF + END IF + 60 CONTINUE + IF( B( N, N ).EQ.ZERO ) + $ B( N, N ) = EPS3 +* + TRANS = 'N' +* + ELSE +* +* UL decomposition with partial pivoting of B, replacing zero +* pivots by EPS3. +* + DO 90 J = N, 2, -1 + EJ = H( J, J-1 ) + IF( CABS1( B( J, J ) ).LT.CABS1( EJ ) ) THEN +* +* Interchange columns and eliminate. +* + X = CLADIV( B( J, J ), EJ ) + B( J, J ) = EJ + DO 70 I = 1, J - 1 + TEMP = B( I, J-1 ) + B( I, J-1 ) = B( I, J ) - X*TEMP + B( I, J ) = TEMP + 70 CONTINUE + ELSE +* +* Eliminate without interchange. +* + IF( B( J, J ).EQ.ZERO ) + $ B( J, J ) = EPS3 + X = CLADIV( EJ, B( J, J ) ) + IF( X.NE.ZERO ) THEN + DO 80 I = 1, J - 1 + B( I, J-1 ) = B( I, J-1 ) - X*B( I, J ) + 80 CONTINUE + END IF + END IF + 90 CONTINUE + IF( B( 1, 1 ).EQ.ZERO ) + $ B( 1, 1 ) = EPS3 +* + TRANS = 'C' +* + END IF +* + NORMIN = 'N' + DO 110 ITS = 1, N +* +* Solve U*x = scale*v for a right eigenvector +* or U'*x = scale*v for a left eigenvector, +* overwriting x on v. +* + CALL CLATRS( 'Upper', TRANS, 'Nonunit', NORMIN, N, B, LDB, V, + $ SCALE, RWORK, IERR ) + NORMIN = 'Y' +* +* Test for sufficient growth in the norm of v. +* + VNORM = SCASUM( N, V, 1 ) + IF( VNORM.GE.GROWTO*SCALE ) + $ GO TO 120 +* +* Choose new orthogonal starting vector and try again. +* + RTEMP = EPS3 / ( ROOTN+ONE ) + V( 1 ) = EPS3 + DO 100 I = 2, N + V( I ) = RTEMP + 100 CONTINUE + V( N-ITS+1 ) = V( N-ITS+1 ) - EPS3*ROOTN + 110 CONTINUE +* +* Failure to find eigenvector in N iterations. +* + INFO = 1 +* + 120 CONTINUE +* +* Normalize eigenvector. +* + I = ICAMAX( N, V, 1 ) + CALL CSSCAL( N, ONE / CABS1( V( I ) ), V, 1 ) +* + RETURN +* +* End of CLAEIN +* + END diff --git a/costa/native/external/lapack/claesy.f b/costa/native/external/lapack/claesy.f new file mode 100644 index 000000000..15dc18685 --- /dev/null +++ b/costa/native/external/lapack/claesy.f @@ -0,0 +1,153 @@ + SUBROUTINE CLAESY( A, B, C, RT1, RT2, EVSCAL, CS1, SN1 ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + COMPLEX A, B, C, CS1, EVSCAL, RT1, RT2, SN1 +* .. +* +* Purpose +* ======= +* +* CLAESY computes the eigendecomposition of a 2-by-2 symmetric matrix +* ( ( A, B );( B, C ) ) +* provided the norm of the matrix of eigenvectors is larger than +* some threshold value. +* +* RT1 is the eigenvalue of larger absolute value, and RT2 of +* smaller absolute value. If the eigenvectors are computed, then +* on return ( CS1, SN1 ) is the unit eigenvector for RT1, hence +* +* [ CS1 SN1 ] . [ A B ] . [ CS1 -SN1 ] = [ RT1 0 ] +* [ -SN1 CS1 ] [ B C ] [ SN1 CS1 ] [ 0 RT2 ] +* +* Arguments +* ========= +* +* A (input) COMPLEX +* The ( 1, 1 ) element of input matrix. +* +* B (input) COMPLEX +* The ( 1, 2 ) element of input matrix. The ( 2, 1 ) element +* is also given by B, since the 2-by-2 matrix is symmetric. +* +* C (input) COMPLEX +* The ( 2, 2 ) element of input matrix. +* +* RT1 (output) COMPLEX +* The eigenvalue of larger modulus. +* +* RT2 (output) COMPLEX +* The eigenvalue of smaller modulus. +* +* EVSCAL (output) COMPLEX +* The complex value by which the eigenvector matrix was scaled +* to make it orthonormal. If EVSCAL is zero, the eigenvectors +* were not computed. This means one of two things: the 2-by-2 +* matrix could not be diagonalized, or the norm of the matrix +* of eigenvectors before scaling was larger than the threshold +* value THRESH (set below). +* +* CS1 (output) COMPLEX +* SN1 (output) COMPLEX +* If EVSCAL .NE. 0, ( CS1, SN1 ) is the unit right eigenvector +* for RT1. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) + REAL ONE + PARAMETER ( ONE = 1.0E0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E0, 0.0E0 ) ) + REAL HALF + PARAMETER ( HALF = 0.5E0 ) + REAL THRESH + PARAMETER ( THRESH = 0.1E0 ) +* .. +* .. Local Scalars .. + REAL BABS, EVNORM, TABS, Z + COMPLEX S, T, TMP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* +* Special case: The matrix is actually diagonal. +* To avoid divide by zero later, we treat this case separately. +* + IF( ABS( B ).EQ.ZERO ) THEN + RT1 = A + RT2 = C + IF( ABS( RT1 ).LT.ABS( RT2 ) ) THEN + TMP = RT1 + RT1 = RT2 + RT2 = TMP + CS1 = ZERO + SN1 = ONE + ELSE + CS1 = ONE + SN1 = ZERO + END IF + ELSE +* +* Compute the eigenvalues and eigenvectors. +* The characteristic equation is +* lambda **2 - (A+C) lambda + (A*C - B*B) +* and we solve it using the quadratic formula. +* + S = ( A+C )*HALF + T = ( A-C )*HALF +* +* Take the square root carefully to avoid over/under flow. +* + BABS = ABS( B ) + TABS = ABS( T ) + Z = MAX( BABS, TABS ) + IF( Z.GT.ZERO ) + $ T = Z*SQRT( ( T / Z )**2+( B / Z )**2 ) +* +* Compute the two eigenvalues. RT1 and RT2 are exchanged +* if necessary so that RT1 will have the greater magnitude. +* + RT1 = S + T + RT2 = S - T + IF( ABS( RT1 ).LT.ABS( RT2 ) ) THEN + TMP = RT1 + RT1 = RT2 + RT2 = TMP + END IF +* +* Choose CS1 = 1 and SN1 to satisfy the first equation, then +* scale the components of this eigenvector so that the matrix +* of eigenvectors X satisfies X * X' = I . (No scaling is +* done if the norm of the eigenvalue matrix is less than THRESH.) +* + SN1 = ( RT1-A ) / B + TABS = ABS( SN1 ) + IF( TABS.GT.ONE ) THEN + T = TABS*SQRT( ( ONE / TABS )**2+( SN1 / TABS )**2 ) + ELSE + T = SQRT( CONE+SN1*SN1 ) + END IF + EVNORM = ABS( T ) + IF( EVNORM.GE.THRESH ) THEN + EVSCAL = CONE / T + CS1 = EVSCAL + SN1 = SN1*EVSCAL + ELSE + EVSCAL = ZERO + END IF + END IF + RETURN +* +* End of CLAESY +* + END diff --git a/costa/native/external/lapack/claev2.f b/costa/native/external/lapack/claev2.f new file mode 100644 index 000000000..a97529270 --- /dev/null +++ b/costa/native/external/lapack/claev2.f @@ -0,0 +1,96 @@ + SUBROUTINE CLAEV2( A, B, C, RT1, RT2, CS1, SN1 ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + REAL CS1, RT1, RT2 + COMPLEX A, B, C, SN1 +* .. +* +* Purpose +* ======= +* +* CLAEV2 computes the eigendecomposition of a 2-by-2 Hermitian matrix +* [ A B ] +* [ CONJG(B) C ]. +* On return, RT1 is the eigenvalue of larger absolute value, RT2 is the +* eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right +* eigenvector for RT1, giving the decomposition +* +* [ CS1 CONJG(SN1) ] [ A B ] [ CS1 -CONJG(SN1) ] = [ RT1 0 ] +* [-SN1 CS1 ] [ CONJG(B) C ] [ SN1 CS1 ] [ 0 RT2 ]. +* +* Arguments +* ========= +* +* A (input) COMPLEX +* The (1,1) element of the 2-by-2 matrix. +* +* B (input) COMPLEX +* The (1,2) element and the conjugate of the (2,1) element of +* the 2-by-2 matrix. +* +* C (input) COMPLEX +* The (2,2) element of the 2-by-2 matrix. +* +* RT1 (output) REAL +* The eigenvalue of larger absolute value. +* +* RT2 (output) REAL +* The eigenvalue of smaller absolute value. +* +* CS1 (output) REAL +* SN1 (output) COMPLEX +* The vector (CS1, SN1) is a unit right eigenvector for RT1. +* +* Further Details +* =============== +* +* RT1 is accurate to a few ulps barring over/underflow. +* +* RT2 may be inaccurate if there is massive cancellation in the +* determinant A*C-B*B; higher precision or correctly rounded or +* correctly truncated arithmetic would be needed to compute RT2 +* accurately in all cases. +* +* CS1 and SN1 are accurate to a few ulps barring over/underflow. +* +* Overflow is possible only if RT1 is within a factor of 5 of overflow. +* Underflow is harmless if the input data is 0 or exceeds +* underflow_threshold / macheps. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) + REAL ONE + PARAMETER ( ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + REAL T + COMPLEX W +* .. +* .. External Subroutines .. + EXTERNAL SLAEV2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CONJG, REAL +* .. +* .. Executable Statements .. +* + IF( ABS( B ).EQ.ZERO ) THEN + W = ONE + ELSE + W = CONJG( B ) / ABS( B ) + END IF + CALL SLAEV2( REAL( A ), ABS( B ), REAL( C ), RT1, RT2, CS1, T ) + SN1 = W*T + RETURN +* +* End of CLAEV2 +* + END diff --git a/costa/native/external/lapack/clags2.f b/costa/native/external/lapack/clags2.f new file mode 100644 index 000000000..6d01a6811 --- /dev/null +++ b/costa/native/external/lapack/clags2.f @@ -0,0 +1,305 @@ + SUBROUTINE CLAGS2( UPPER, A1, A2, A3, B1, B2, B3, CSU, SNU, CSV, + $ SNV, CSQ, SNQ ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + LOGICAL UPPER + REAL A1, A3, B1, B3, CSQ, CSU, CSV + COMPLEX A2, B2, SNQ, SNU, SNV +* .. +* +* Purpose +* ======= +* +* CLAGS2 computes 2-by-2 unitary matrices U, V and Q, such +* that if ( UPPER ) then +* +* U'*A*Q = U'*( A1 A2 )*Q = ( x 0 ) +* ( 0 A3 ) ( x x ) +* and +* V'*B*Q = V'*( B1 B2 )*Q = ( x 0 ) +* ( 0 B3 ) ( x x ) +* +* or if ( .NOT.UPPER ) then +* +* U'*A*Q = U'*( A1 0 )*Q = ( x x ) +* ( A2 A3 ) ( 0 x ) +* and +* V'*B*Q = V'*( B1 0 )*Q = ( x x ) +* ( B2 B3 ) ( 0 x ) +* where +* +* U = ( CSU SNU ), V = ( CSV SNV ), +* ( -CONJG(SNU) CSU ) ( -CONJG(SNV) CSV ) +* +* Q = ( CSQ SNQ ) +* ( -CONJG(SNQ) CSQ ) +* +* Z' denotes the conjugate transpose of Z. +* +* The rows of the transformed A and B are parallel. Moreover, if the +* input 2-by-2 matrix A is not zero, then the transformed (1,1) entry +* of A is not zero. If the input matrices A and B are both not zero, +* then the transformed (2,2) element of B is not zero, except when the +* first rows of input A and B are parallel and the second rows are +* zero. +* +* Arguments +* ========= +* +* UPPER (input) LOGICAL +* = .TRUE.: the input matrices A and B are upper triangular. +* = .FALSE.: the input matrices A and B are lower triangular. +* +* A1 (input) REAL +* A2 (input) COMPLEX +* A3 (input) REAL +* On entry, A1, A2 and A3 are elements of the input 2-by-2 +* upper (lower) triangular matrix A. +* +* B1 (input) REAL +* B2 (input) COMPLEX +* B3 (input) REAL +* On entry, B1, B2 and B3 are elements of the input 2-by-2 +* upper (lower) triangular matrix B. +* +* CSU (output) REAL +* SNU (output) COMPLEX +* The desired unitary matrix U. +* +* CSV (output) REAL +* SNV (output) COMPLEX +* The desired unitary matrix V. +* +* CSQ (output) REAL +* SNQ (output) COMPLEX +* The desired unitary matrix Q. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + REAL A, AUA11, AUA12, AUA21, AUA22, AVB11, AVB12, + $ AVB21, AVB22, CSL, CSR, D, FB, FC, S1, S2, SNL, + $ SNR, UA11R, UA22R, VB11R, VB22R + COMPLEX B, C, D1, R, T, UA11, UA12, UA21, UA22, VB11, + $ VB12, VB21, VB22 +* .. +* .. External Subroutines .. + EXTERNAL CLARTG, SLASV2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, CONJG, REAL +* .. +* .. Statement Functions .. + REAL ABS1 +* .. +* .. Statement Function definitions .. + ABS1( T ) = ABS( REAL( T ) ) + ABS( AIMAG( T ) ) +* .. +* .. Executable Statements .. +* + IF( UPPER ) THEN +* +* Input matrices A and B are upper triangular matrices +* +* Form matrix C = A*adj(B) = ( a b ) +* ( 0 d ) +* + A = A1*B3 + D = A3*B1 + B = A2*B1 - A1*B2 + FB = ABS( B ) +* +* Transform complex 2-by-2 matrix C to real matrix by unitary +* diagonal matrix diag(1,D1). +* + D1 = ONE + IF( FB.NE.ZERO ) + $ D1 = B / FB +* +* The SVD of real 2 by 2 triangular C +* +* ( CSL -SNL )*( A B )*( CSR SNR ) = ( R 0 ) +* ( SNL CSL ) ( 0 D ) ( -SNR CSR ) ( 0 T ) +* + CALL SLASV2( A, FB, D, S1, S2, SNR, CSR, SNL, CSL ) +* + IF( ABS( CSL ).GE.ABS( SNL ) .OR. ABS( CSR ).GE.ABS( SNR ) ) + $ THEN +* +* Compute the (1,1) and (1,2) elements of U'*A and V'*B, +* and (1,2) element of |U|'*|A| and |V|'*|B|. +* + UA11R = CSL*A1 + UA12 = CSL*A2 + D1*SNL*A3 +* + VB11R = CSR*B1 + VB12 = CSR*B2 + D1*SNR*B3 +* + AUA12 = ABS( CSL )*ABS1( A2 ) + ABS( SNL )*ABS( A3 ) + AVB12 = ABS( CSR )*ABS1( B2 ) + ABS( SNR )*ABS( B3 ) +* +* zero (1,2) elements of U'*A and V'*B +* + IF( ( ABS( UA11R )+ABS1( UA12 ) ).EQ.ZERO ) THEN + CALL CLARTG( -CMPLX( VB11R ), CONJG( VB12 ), CSQ, SNQ, + $ R ) + ELSE IF( ( ABS( VB11R )+ABS1( VB12 ) ).EQ.ZERO ) THEN + CALL CLARTG( -CMPLX( UA11R ), CONJG( UA12 ), CSQ, SNQ, + $ R ) + ELSE IF( AUA12 / ( ABS( UA11R )+ABS1( UA12 ) ).LE.AVB12 / + $ ( ABS( VB11R )+ABS1( VB12 ) ) ) THEN + CALL CLARTG( -CMPLX( UA11R ), CONJG( UA12 ), CSQ, SNQ, + $ R ) + ELSE + CALL CLARTG( -CMPLX( VB11R ), CONJG( VB12 ), CSQ, SNQ, + $ R ) + END IF +* + CSU = CSL + SNU = -D1*SNL + CSV = CSR + SNV = -D1*SNR +* + ELSE +* +* Compute the (2,1) and (2,2) elements of U'*A and V'*B, +* and (2,2) element of |U|'*|A| and |V|'*|B|. +* + UA21 = -CONJG( D1 )*SNL*A1 + UA22 = -CONJG( D1 )*SNL*A2 + CSL*A3 +* + VB21 = -CONJG( D1 )*SNR*B1 + VB22 = -CONJG( D1 )*SNR*B2 + CSR*B3 +* + AUA22 = ABS( SNL )*ABS1( A2 ) + ABS( CSL )*ABS( A3 ) + AVB22 = ABS( SNR )*ABS1( B2 ) + ABS( CSR )*ABS( B3 ) +* +* zero (2,2) elements of U'*A and V'*B, and then swap. +* + IF( ( ABS1( UA21 )+ABS1( UA22 ) ).EQ.ZERO ) THEN + CALL CLARTG( -CONJG( VB21 ), CONJG( VB22 ), CSQ, SNQ, R ) + ELSE IF( ( ABS1( VB21 )+ABS( VB22 ) ).EQ.ZERO ) THEN + CALL CLARTG( -CONJG( UA21 ), CONJG( UA22 ), CSQ, SNQ, R ) + ELSE IF( AUA22 / ( ABS1( UA21 )+ABS1( UA22 ) ).LE.AVB22 / + $ ( ABS1( VB21 )+ABS1( VB22 ) ) ) THEN + CALL CLARTG( -CONJG( UA21 ), CONJG( UA22 ), CSQ, SNQ, R ) + ELSE + CALL CLARTG( -CONJG( VB21 ), CONJG( VB22 ), CSQ, SNQ, R ) + END IF +* + CSU = SNL + SNU = D1*CSL + CSV = SNR + SNV = D1*CSR +* + END IF +* + ELSE +* +* Input matrices A and B are lower triangular matrices +* +* Form matrix C = A*adj(B) = ( a 0 ) +* ( c d ) +* + A = A1*B3 + D = A3*B1 + C = A2*B3 - A3*B2 + FC = ABS( C ) +* +* Transform complex 2-by-2 matrix C to real matrix by unitary +* diagonal matrix diag(d1,1). +* + D1 = ONE + IF( FC.NE.ZERO ) + $ D1 = C / FC +* +* The SVD of real 2 by 2 triangular C +* +* ( CSL -SNL )*( A 0 )*( CSR SNR ) = ( R 0 ) +* ( SNL CSL ) ( C D ) ( -SNR CSR ) ( 0 T ) +* + CALL SLASV2( A, FC, D, S1, S2, SNR, CSR, SNL, CSL ) +* + IF( ABS( CSR ).GE.ABS( SNR ) .OR. ABS( CSL ).GE.ABS( SNL ) ) + $ THEN +* +* Compute the (2,1) and (2,2) elements of U'*A and V'*B, +* and (2,1) element of |U|'*|A| and |V|'*|B|. +* + UA21 = -D1*SNR*A1 + CSR*A2 + UA22R = CSR*A3 +* + VB21 = -D1*SNL*B1 + CSL*B2 + VB22R = CSL*B3 +* + AUA21 = ABS( SNR )*ABS( A1 ) + ABS( CSR )*ABS1( A2 ) + AVB21 = ABS( SNL )*ABS( B1 ) + ABS( CSL )*ABS1( B2 ) +* +* zero (2,1) elements of U'*A and V'*B. +* + IF( ( ABS1( UA21 )+ABS( UA22R ) ).EQ.ZERO ) THEN + CALL CLARTG( CMPLX( VB22R ), VB21, CSQ, SNQ, R ) + ELSE IF( ( ABS1( VB21 )+ABS( VB22R ) ).EQ.ZERO ) THEN + CALL CLARTG( CMPLX( UA22R ), UA21, CSQ, SNQ, R ) + ELSE IF( AUA21 / ( ABS1( UA21 )+ABS( UA22R ) ).LE.AVB21 / + $ ( ABS1( VB21 )+ABS( VB22R ) ) ) THEN + CALL CLARTG( CMPLX( UA22R ), UA21, CSQ, SNQ, R ) + ELSE + CALL CLARTG( CMPLX( VB22R ), VB21, CSQ, SNQ, R ) + END IF +* + CSU = CSR + SNU = -CONJG( D1 )*SNR + CSV = CSL + SNV = -CONJG( D1 )*SNL +* + ELSE +* +* Compute the (1,1) and (1,2) elements of U'*A and V'*B, +* and (1,1) element of |U|'*|A| and |V|'*|B|. +* + UA11 = CSR*A1 + CONJG( D1 )*SNR*A2 + UA12 = CONJG( D1 )*SNR*A3 +* + VB11 = CSL*B1 + CONJG( D1 )*SNL*B2 + VB12 = CONJG( D1 )*SNL*B3 +* + AUA11 = ABS( CSR )*ABS( A1 ) + ABS( SNR )*ABS1( A2 ) + AVB11 = ABS( CSL )*ABS( B1 ) + ABS( SNL )*ABS1( B2 ) +* +* zero (1,1) elements of U'*A and V'*B, and then swap. +* + IF( ( ABS1( UA11 )+ABS1( UA12 ) ).EQ.ZERO ) THEN + CALL CLARTG( VB12, VB11, CSQ, SNQ, R ) + ELSE IF( ( ABS1( VB11 )+ABS1( VB12 ) ).EQ.ZERO ) THEN + CALL CLARTG( UA12, UA11, CSQ, SNQ, R ) + ELSE IF( AUA11 / ( ABS1( UA11 )+ABS1( UA12 ) ).LE.AVB11 / + $ ( ABS1( VB11 )+ABS1( VB12 ) ) ) THEN + CALL CLARTG( UA12, UA11, CSQ, SNQ, R ) + ELSE + CALL CLARTG( VB12, VB11, CSQ, SNQ, R ) + END IF +* + CSU = SNR + SNU = CONJG( D1 )*CSR + CSV = SNL + SNV = CONJG( D1 )*CSL +* + END IF +* + END IF +* + RETURN +* +* End of CLAGS2 +* + END diff --git a/costa/native/external/lapack/clagtm.f b/costa/native/external/lapack/clagtm.f new file mode 100644 index 000000000..7145c0625 --- /dev/null +++ b/costa/native/external/lapack/clagtm.f @@ -0,0 +1,234 @@ + SUBROUTINE CLAGTM( TRANS, N, NRHS, ALPHA, DL, D, DU, X, LDX, BETA, + $ B, LDB ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER LDB, LDX, N, NRHS + REAL ALPHA, BETA +* .. +* .. Array Arguments .. + COMPLEX B( LDB, * ), D( * ), DL( * ), DU( * ), + $ X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* CLAGTM performs a matrix-vector product of the form +* +* B := alpha * A * X + beta * B +* +* where A is a tridiagonal matrix of order N, B and X are N by NRHS +* matrices, and alpha and beta are real scalars, each of which may be +* 0., 1., or -1. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER +* Specifies the operation applied to A. +* = 'N': No transpose, B := alpha * A * X + beta * B +* = 'T': Transpose, B := alpha * A**T * X + beta * B +* = 'C': Conjugate transpose, B := alpha * A**H * X + beta * B +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices X and B. +* +* ALPHA (input) REAL +* The scalar alpha. ALPHA must be 0., 1., or -1.; otherwise, +* it is assumed to be 0. +* +* DL (input) COMPLEX array, dimension (N-1) +* The (n-1) sub-diagonal elements of T. +* +* D (input) COMPLEX array, dimension (N) +* The diagonal elements of T. +* +* DU (input) COMPLEX array, dimension (N-1) +* The (n-1) super-diagonal elements of T. +* +* X (input) COMPLEX array, dimension (LDX,NRHS) +* The N by NRHS matrix X. +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(N,1). +* +* BETA (input) REAL +* The scalar beta. BETA must be 0., 1., or -1.; otherwise, +* it is assumed to be 1. +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the N by NRHS matrix B. +* On exit, B is overwritten by the matrix expression +* B := alpha * A * X + beta * B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(N,1). +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) + $ RETURN +* +* Multiply B by BETA if BETA.NE.1. +* + IF( BETA.EQ.ZERO ) THEN + DO 20 J = 1, NRHS + DO 10 I = 1, N + B( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE IF( BETA.EQ.-ONE ) THEN + DO 40 J = 1, NRHS + DO 30 I = 1, N + B( I, J ) = -B( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF +* + IF( ALPHA.EQ.ONE ) THEN + IF( LSAME( TRANS, 'N' ) ) THEN +* +* Compute B := B + A*X +* + DO 60 J = 1, NRHS + IF( N.EQ.1 ) THEN + B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J ) + ELSE + B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J ) + + $ DU( 1 )*X( 2, J ) + B( N, J ) = B( N, J ) + DL( N-1 )*X( N-1, J ) + + $ D( N )*X( N, J ) + DO 50 I = 2, N - 1 + B( I, J ) = B( I, J ) + DL( I-1 )*X( I-1, J ) + + $ D( I )*X( I, J ) + DU( I )*X( I+1, J ) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE IF( LSAME( TRANS, 'T' ) ) THEN +* +* Compute B := B + A**T * X +* + DO 80 J = 1, NRHS + IF( N.EQ.1 ) THEN + B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J ) + ELSE + B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J ) + + $ DL( 1 )*X( 2, J ) + B( N, J ) = B( N, J ) + DU( N-1 )*X( N-1, J ) + + $ D( N )*X( N, J ) + DO 70 I = 2, N - 1 + B( I, J ) = B( I, J ) + DU( I-1 )*X( I-1, J ) + + $ D( I )*X( I, J ) + DL( I )*X( I+1, J ) + 70 CONTINUE + END IF + 80 CONTINUE + ELSE IF( LSAME( TRANS, 'C' ) ) THEN +* +* Compute B := B + A**H * X +* + DO 100 J = 1, NRHS + IF( N.EQ.1 ) THEN + B( 1, J ) = B( 1, J ) + CONJG( D( 1 ) )*X( 1, J ) + ELSE + B( 1, J ) = B( 1, J ) + CONJG( D( 1 ) )*X( 1, J ) + + $ CONJG( DL( 1 ) )*X( 2, J ) + B( N, J ) = B( N, J ) + CONJG( DU( N-1 ) )* + $ X( N-1, J ) + CONJG( D( N ) )*X( N, J ) + DO 90 I = 2, N - 1 + B( I, J ) = B( I, J ) + CONJG( DU( I-1 ) )* + $ X( I-1, J ) + CONJG( D( I ) )* + $ X( I, J ) + CONJG( DL( I ) )* + $ X( I+1, J ) + 90 CONTINUE + END IF + 100 CONTINUE + END IF + ELSE IF( ALPHA.EQ.-ONE ) THEN + IF( LSAME( TRANS, 'N' ) ) THEN +* +* Compute B := B - A*X +* + DO 120 J = 1, NRHS + IF( N.EQ.1 ) THEN + B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J ) + ELSE + B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J ) - + $ DU( 1 )*X( 2, J ) + B( N, J ) = B( N, J ) - DL( N-1 )*X( N-1, J ) - + $ D( N )*X( N, J ) + DO 110 I = 2, N - 1 + B( I, J ) = B( I, J ) - DL( I-1 )*X( I-1, J ) - + $ D( I )*X( I, J ) - DU( I )*X( I+1, J ) + 110 CONTINUE + END IF + 120 CONTINUE + ELSE IF( LSAME( TRANS, 'T' ) ) THEN +* +* Compute B := B - A'*X +* + DO 140 J = 1, NRHS + IF( N.EQ.1 ) THEN + B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J ) + ELSE + B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J ) - + $ DL( 1 )*X( 2, J ) + B( N, J ) = B( N, J ) - DU( N-1 )*X( N-1, J ) - + $ D( N )*X( N, J ) + DO 130 I = 2, N - 1 + B( I, J ) = B( I, J ) - DU( I-1 )*X( I-1, J ) - + $ D( I )*X( I, J ) - DL( I )*X( I+1, J ) + 130 CONTINUE + END IF + 140 CONTINUE + ELSE IF( LSAME( TRANS, 'C' ) ) THEN +* +* Compute B := B - A'*X +* + DO 160 J = 1, NRHS + IF( N.EQ.1 ) THEN + B( 1, J ) = B( 1, J ) - CONJG( D( 1 ) )*X( 1, J ) + ELSE + B( 1, J ) = B( 1, J ) - CONJG( D( 1 ) )*X( 1, J ) - + $ CONJG( DL( 1 ) )*X( 2, J ) + B( N, J ) = B( N, J ) - CONJG( DU( N-1 ) )* + $ X( N-1, J ) - CONJG( D( N ) )*X( N, J ) + DO 150 I = 2, N - 1 + B( I, J ) = B( I, J ) - CONJG( DU( I-1 ) )* + $ X( I-1, J ) - CONJG( D( I ) )* + $ X( I, J ) - CONJG( DL( I ) )* + $ X( I+1, J ) + 150 CONTINUE + END IF + 160 CONTINUE + END IF + END IF + RETURN +* +* End of CLAGTM +* + END diff --git a/costa/native/external/lapack/clahef.f b/costa/native/external/lapack/clahef.f new file mode 100644 index 000000000..d5988bd4e --- /dev/null +++ b/costa/native/external/lapack/clahef.f @@ -0,0 +1,648 @@ + SUBROUTINE CLAHEF( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), W( LDW, * ) +* .. +* +* Purpose +* ======= +* +* CLAHEF computes a partial factorization of a complex Hermitian +* matrix A using the Bunch-Kaufman diagonal pivoting method. The +* partial factorization has the form: +* +* A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: +* ( 0 U22 ) ( 0 D ) ( U12' U22' ) +* +* A = ( L11 0 ) ( D 0 ) ( L11' L21' ) if UPLO = 'L' +* ( L21 I ) ( 0 A22 ) ( 0 I ) +* +* where the order of D is at most NB. The actual order is returned in +* the argument KB, and is either NB or NB-1, or N if N <= NB. +* Note that U' denotes the conjugate transpose of U. +* +* CLAHEF is an auxiliary routine called by CHETRF. It uses blocked code +* (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or +* A22 (if UPLO = 'L'). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* Hermitian matrix A is stored: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NB (input) INTEGER +* The maximum number of columns of the matrix A that should be +* factored. NB should be at least 2 to allow for 2-by-2 pivot +* blocks. +* +* KB (output) INTEGER +* The number of columns of A that were actually factored. +* KB is either NB-1 or NB, or N if N <= NB. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the leading +* n-by-n upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n-by-n lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* On exit, A contains details of the partial factorization. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (output) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D. +* If UPLO = 'U', only the last KB elements of IPIV are set; +* if UPLO = 'L', only the first KB elements are set. +* +* If IPIV(k) > 0, then rows and columns k and IPIV(k) were +* interchanged and D(k,k) is a 1-by-1 diagonal block. +* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +* +* W (workspace) COMPLEX array, dimension (LDW,NB) +* +* LDW (input) INTEGER +* The leading dimension of the array W. LDW >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* > 0: if INFO = k, D(k,k) is exactly zero. The factorization +* has been completed, but the block diagonal matrix D is +* exactly singular. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) + REAL EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0E+0, SEVTEN = 17.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER IMAX, J, JB, JJ, JMAX, JP, K, KK, KKW, KP, + $ KSTEP, KW + REAL ABSAKK, ALPHA, COLMAX, R1, ROWMAX, T + COMPLEX D11, D21, D22, Z +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + EXTERNAL LSAME, ICAMAX +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGEMM, CGEMV, CLACGV, CSSCAL, CSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CONJG, MAX, MIN, REAL, SQRT +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( Z ) = ABS( REAL( Z ) ) + ABS( AIMAG( Z ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Factorize the trailing columns of A using the upper triangle +* of A and working backwards, and compute the matrix W = U12*D +* for use in updating A11 (note that conjg(W) is actually stored) +* +* K is the main loop index, decreasing from N in steps of 1 or 2 +* +* KW is the column of W which corresponds to column K of A +* + K = N + 10 CONTINUE + KW = NB + K - N +* +* Exit from loop +* + IF( ( K.LE.N-NB+1 .AND. NB.LT.N ) .OR. K.LT.1 ) + $ GO TO 30 +* +* Copy column K of A to column KW of W and update it +* + CALL CCOPY( K-1, A( 1, K ), 1, W( 1, KW ), 1 ) + W( K, KW ) = REAL( A( K, K ) ) + IF( K.LT.N ) THEN + CALL CGEMV( 'No transpose', K, N-K, -CONE, A( 1, K+1 ), LDA, + $ W( K, KW+1 ), LDW, CONE, W( 1, KW ), 1 ) + W( K, KW ) = REAL( W( K, KW ) ) + END IF +* + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( REAL( W( K, KW ) ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.GT.1 ) THEN + IMAX = ICAMAX( K-1, W( 1, KW ), 1 ) + COLMAX = CABS1( W( IMAX, KW ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + A( K, K ) = REAL( A( K, K ) ) + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* Copy column IMAX to column KW-1 of W and update it +* + CALL CCOPY( IMAX-1, A( 1, IMAX ), 1, W( 1, KW-1 ), 1 ) + W( IMAX, KW-1 ) = REAL( A( IMAX, IMAX ) ) + CALL CCOPY( K-IMAX, A( IMAX, IMAX+1 ), LDA, + $ W( IMAX+1, KW-1 ), 1 ) + CALL CLACGV( K-IMAX, W( IMAX+1, KW-1 ), 1 ) + IF( K.LT.N ) THEN + CALL CGEMV( 'No transpose', K, N-K, -CONE, + $ A( 1, K+1 ), LDA, W( IMAX, KW+1 ), LDW, + $ CONE, W( 1, KW-1 ), 1 ) + W( IMAX, KW-1 ) = REAL( W( IMAX, KW-1 ) ) + END IF +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = IMAX + ICAMAX( K-IMAX, W( IMAX+1, KW-1 ), 1 ) + ROWMAX = CABS1( W( JMAX, KW-1 ) ) + IF( IMAX.GT.1 ) THEN + JMAX = ICAMAX( IMAX-1, W( 1, KW-1 ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( W( JMAX, KW-1 ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( REAL( W( IMAX, KW-1 ) ) ).GE.ALPHA*ROWMAX ) + $ THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX +* +* copy column KW-1 of W to column KW +* + CALL CCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 ) + ELSE +* +* interchange rows and columns K-1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K - KSTEP + 1 + KKW = NB + KK - N +* +* Updated column KP is already stored in column KKW of W +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP +* + A( KP, KP ) = REAL( A( KK, KK ) ) + CALL CCOPY( KK-1-KP, A( KP+1, KK ), 1, A( KP, KP+1 ), + $ LDA ) + CALL CLACGV( KK-1-KP, A( KP, KP+1 ), LDA ) + CALL CCOPY( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 ) +* +* Interchange rows KK and KP in last KK columns of A and W +* + IF( KK.LT.N ) + $ CALL CSWAP( N-KK, A( KK, KK+1 ), LDA, A( KP, KK+1 ), + $ LDA ) + CALL CSWAP( N-KK+1, W( KK, KKW ), LDW, W( KP, KKW ), + $ LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column KW of W now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* +* Store U(k) in column k of A +* + CALL CCOPY( K, W( 1, KW ), 1, A( 1, K ), 1 ) + R1 = ONE / REAL( A( K, K ) ) + CALL CSSCAL( K-1, R1, A( 1, K ), 1 ) +* +* Conjugate W(k) +* + CALL CLACGV( K-1, W( 1, KW ), 1 ) + ELSE +* +* 2-by-2 pivot block D(k): columns KW and KW-1 of W now +* hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* + IF( K.GT.2 ) THEN +* +* Store U(k) and U(k-1) in columns k and k-1 of A +* + D21 = W( K-1, KW ) + D11 = W( K, KW ) / CONJG( D21 ) + D22 = W( K-1, KW-1 ) / D21 + T = ONE / ( REAL( D11*D22 )-ONE ) + D21 = T / D21 + DO 20 J = 1, K - 2 + A( J, K-1 ) = D21*( D11*W( J, KW-1 )-W( J, KW ) ) + A( J, K ) = CONJG( D21 )* + $ ( D22*W( J, KW )-W( J, KW-1 ) ) + 20 CONTINUE + END IF +* +* Copy D(k) to A +* + A( K-1, K-1 ) = W( K-1, KW-1 ) + A( K-1, K ) = W( K-1, KW ) + A( K, K ) = W( K, KW ) +* +* Conjugate W(k) and W(k-1) +* + CALL CLACGV( K-1, W( 1, KW ), 1 ) + CALL CLACGV( K-2, W( 1, KW-1 ), 1 ) + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + 30 CONTINUE +* +* Update the upper triangle of A11 (= A(1:k,1:k)) as +* +* A11 := A11 - U12*D*U12' = A11 - U12*W' +* +* computing blocks of NB columns at a time (note that conjg(W) is +* actually stored) +* + DO 50 J = ( ( K-1 ) / NB )*NB + 1, 1, -NB + JB = MIN( NB, K-J+1 ) +* +* Update the upper triangle of the diagonal block +* + DO 40 JJ = J, J + JB - 1 + A( JJ, JJ ) = REAL( A( JJ, JJ ) ) + CALL CGEMV( 'No transpose', JJ-J+1, N-K, -CONE, + $ A( J, K+1 ), LDA, W( JJ, KW+1 ), LDW, CONE, + $ A( J, JJ ), 1 ) + A( JJ, JJ ) = REAL( A( JJ, JJ ) ) + 40 CONTINUE +* +* Update the rectangular superdiagonal block +* + CALL CGEMM( 'No transpose', 'Transpose', J-1, JB, N-K, + $ -CONE, A( 1, K+1 ), LDA, W( J, KW+1 ), LDW, + $ CONE, A( 1, J ), LDA ) + 50 CONTINUE +* +* Put U12 in standard form by partially undoing the interchanges +* in columns k+1:n +* + J = K + 1 + 60 CONTINUE + JJ = J + JP = IPIV( J ) + IF( JP.LT.0 ) THEN + JP = -JP + J = J + 1 + END IF + J = J + 1 + IF( JP.NE.JJ .AND. J.LE.N ) + $ CALL CSWAP( N-J+1, A( JP, J ), LDA, A( JJ, J ), LDA ) + IF( J.LE.N ) + $ GO TO 60 +* +* Set KB to the number of columns factorized +* + KB = N - K +* + ELSE +* +* Factorize the leading columns of A using the lower triangle +* of A and working forwards, and compute the matrix W = L21*D +* for use in updating A22 (note that conjg(W) is actually stored) +* +* K is the main loop index, increasing from 1 in steps of 1 or 2 +* + K = 1 + 70 CONTINUE +* +* Exit from loop +* + IF( ( K.GE.NB .AND. NB.LT.N ) .OR. K.GT.N ) + $ GO TO 90 +* +* Copy column K of A to column K of W and update it +* + W( K, K ) = REAL( A( K, K ) ) + IF( K.LT.N ) + $ CALL CCOPY( N-K, A( K+1, K ), 1, W( K+1, K ), 1 ) + CALL CGEMV( 'No transpose', N-K+1, K-1, -CONE, A( K, 1 ), LDA, + $ W( K, 1 ), LDW, CONE, W( K, K ), 1 ) + W( K, K ) = REAL( W( K, K ) ) +* + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( REAL( W( K, K ) ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.LT.N ) THEN + IMAX = K + ICAMAX( N-K, W( K+1, K ), 1 ) + COLMAX = CABS1( W( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + A( K, K ) = REAL( A( K, K ) ) + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* Copy column IMAX to column K+1 of W and update it +* + CALL CCOPY( IMAX-K, A( IMAX, K ), LDA, W( K, K+1 ), 1 ) + CALL CLACGV( IMAX-K, W( K, K+1 ), 1 ) + W( IMAX, K+1 ) = REAL( A( IMAX, IMAX ) ) + IF( IMAX.LT.N ) + $ CALL CCOPY( N-IMAX, A( IMAX+1, IMAX ), 1, + $ W( IMAX+1, K+1 ), 1 ) + CALL CGEMV( 'No transpose', N-K+1, K-1, -CONE, A( K, 1 ), + $ LDA, W( IMAX, 1 ), LDW, CONE, W( K, K+1 ), + $ 1 ) + W( IMAX, K+1 ) = REAL( W( IMAX, K+1 ) ) +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = K - 1 + ICAMAX( IMAX-K, W( K, K+1 ), 1 ) + ROWMAX = CABS1( W( JMAX, K+1 ) ) + IF( IMAX.LT.N ) THEN + JMAX = IMAX + ICAMAX( N-IMAX, W( IMAX+1, K+1 ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( W( JMAX, K+1 ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( REAL( W( IMAX, K+1 ) ) ).GE.ALPHA*ROWMAX ) + $ THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX +* +* copy column K+1 of W to column K +* + CALL CCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 ) + ELSE +* +* interchange rows and columns K+1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K + KSTEP - 1 +* +* Updated column KP is already stored in column KK of W +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP +* + A( KP, KP ) = REAL( A( KK, KK ) ) + CALL CCOPY( KP-KK-1, A( KK+1, KK ), 1, A( KP, KK+1 ), + $ LDA ) + CALL CLACGV( KP-KK-1, A( KP, KK+1 ), LDA ) + IF( KP.LT.N ) + $ CALL CCOPY( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 ) +* +* Interchange rows KK and KP in first KK columns of A and W +* + CALL CSWAP( KK-1, A( KK, 1 ), LDA, A( KP, 1 ), LDA ) + CALL CSWAP( KK, W( KK, 1 ), LDW, W( KP, 1 ), LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k of W now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* +* Store L(k) in column k of A +* + CALL CCOPY( N-K+1, W( K, K ), 1, A( K, K ), 1 ) + IF( K.LT.N ) THEN + R1 = ONE / REAL( A( K, K ) ) + CALL CSSCAL( N-K, R1, A( K+1, K ), 1 ) +* +* Conjugate W(k) +* + CALL CLACGV( N-K, W( K+1, K ), 1 ) + END IF + ELSE +* +* 2-by-2 pivot block D(k): columns k and k+1 of W now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* + IF( K.LT.N-1 ) THEN +* +* Store L(k) and L(k+1) in columns k and k+1 of A +* + D21 = W( K+1, K ) + D11 = W( K+1, K+1 ) / D21 + D22 = W( K, K ) / CONJG( D21 ) + T = ONE / ( REAL( D11*D22 )-ONE ) + D21 = T / D21 + DO 80 J = K + 2, N + A( J, K ) = CONJG( D21 )* + $ ( D11*W( J, K )-W( J, K+1 ) ) + A( J, K+1 ) = D21*( D22*W( J, K+1 )-W( J, K ) ) + 80 CONTINUE + END IF +* +* Copy D(k) to A +* + A( K, K ) = W( K, K ) + A( K+1, K ) = W( K+1, K ) + A( K+1, K+1 ) = W( K+1, K+1 ) +* +* Conjugate W(k) and W(k+1) +* + CALL CLACGV( N-K, W( K+1, K ), 1 ) + CALL CLACGV( N-K-1, W( K+2, K+1 ), 1 ) + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 70 +* + 90 CONTINUE +* +* Update the lower triangle of A22 (= A(k:n,k:n)) as +* +* A22 := A22 - L21*D*L21' = A22 - L21*W' +* +* computing blocks of NB columns at a time (note that conjg(W) is +* actually stored) +* + DO 110 J = K, N, NB + JB = MIN( NB, N-J+1 ) +* +* Update the lower triangle of the diagonal block +* + DO 100 JJ = J, J + JB - 1 + A( JJ, JJ ) = REAL( A( JJ, JJ ) ) + CALL CGEMV( 'No transpose', J+JB-JJ, K-1, -CONE, + $ A( JJ, 1 ), LDA, W( JJ, 1 ), LDW, CONE, + $ A( JJ, JJ ), 1 ) + A( JJ, JJ ) = REAL( A( JJ, JJ ) ) + 100 CONTINUE +* +* Update the rectangular subdiagonal block +* + IF( J+JB.LE.N ) + $ CALL CGEMM( 'No transpose', 'Transpose', N-J-JB+1, JB, + $ K-1, -CONE, A( J+JB, 1 ), LDA, W( J, 1 ), + $ LDW, CONE, A( J+JB, J ), LDA ) + 110 CONTINUE +* +* Put L21 in standard form by partially undoing the interchanges +* in columns 1:k-1 +* + J = K - 1 + 120 CONTINUE + JJ = J + JP = IPIV( J ) + IF( JP.LT.0 ) THEN + JP = -JP + J = J - 1 + END IF + J = J - 1 + IF( JP.NE.JJ .AND. J.GE.1 ) + $ CALL CSWAP( J, A( JP, 1 ), LDA, A( JJ, 1 ), LDA ) + IF( J.GE.1 ) + $ GO TO 120 +* +* Set KB to the number of columns factorized +* + KB = K - 1 +* + END IF + RETURN +* +* End of CLAHEF +* + END diff --git a/costa/native/external/lapack/clahqr.f b/costa/native/external/lapack/clahqr.f new file mode 100644 index 000000000..d21dec643 --- /dev/null +++ b/costa/native/external/lapack/clahqr.f @@ -0,0 +1,383 @@ + SUBROUTINE CLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILOZ, + $ IHIZ, Z, LDZ, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + LOGICAL WANTT, WANTZ + INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, N +* .. +* .. Array Arguments .. + COMPLEX H( LDH, * ), W( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* CLAHQR is an auxiliary routine called by CHSEQR to update the +* eigenvalues and Schur decomposition already computed by CHSEQR, by +* dealing with the Hessenberg submatrix in rows and columns ILO to IHI. +* +* Arguments +* ========= +* +* WANTT (input) LOGICAL +* = .TRUE. : the full Schur form T is required; +* = .FALSE.: only eigenvalues are required. +* +* WANTZ (input) LOGICAL +* = .TRUE. : the matrix of Schur vectors Z is required; +* = .FALSE.: Schur vectors are not required. +* +* N (input) INTEGER +* The order of the matrix H. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* It is assumed that H is already upper triangular in rows and +* columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless ILO = 1). +* CLAHQR works primarily with the Hessenberg submatrix in rows +* and columns ILO to IHI, but applies transformations to all of +* H if WANTT is .TRUE.. +* 1 <= ILO <= max(1,IHI); IHI <= N. +* +* H (input/output) COMPLEX array, dimension (LDH,N) +* On entry, the upper Hessenberg matrix H. +* On exit, if WANTT is .TRUE., H is upper triangular in rows +* and columns ILO:IHI, with any 2-by-2 diagonal blocks in +* standard form. If WANTT is .FALSE., the contents of H are +* unspecified on exit. +* +* LDH (input) INTEGER +* The leading dimension of the array H. LDH >= max(1,N). +* +* W (output) COMPLEX array, dimension (N) +* The computed eigenvalues ILO to IHI are stored in the +* corresponding elements of W. If WANTT is .TRUE., the +* eigenvalues are stored in the same order as on the diagonal +* of the Schur form returned in H, with W(i) = H(i,i). +* +* ILOZ (input) INTEGER +* IHIZ (input) INTEGER +* Specify the rows of Z to which transformations must be +* applied if WANTZ is .TRUE.. +* 1 <= ILOZ <= ILO; IHI <= IHIZ <= N. +* +* Z (input/output) COMPLEX array, dimension (LDZ,N) +* If WANTZ is .TRUE., on entry Z must contain the current +* matrix Z of transformations accumulated by CHSEQR, and on +* exit Z has been updated; transformations are applied only to +* the submatrix Z(ILOZ:IHIZ,ILO:IHI). +* If WANTZ is .FALSE., Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* > 0: if INFO = i, CLAHQR failed to compute all the +* eigenvalues ILO to IHI in a total of 30*(IHI-ILO+1) +* iterations; elements i+1:ihi of W contain those +* eigenvalues which have been successfully computed. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) + REAL RZERO, HALF + PARAMETER ( RZERO = 0.0E+0, HALF = 0.5E+0 ) + REAL DAT1 + PARAMETER ( DAT1 = 0.75E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, I1, I2, ITN, ITS, J, K, L, M, NH, NZ + REAL H10, H21, RTEMP, S, SMLNUM, T2, TST1, ULP + COMPLEX CDUM, H11, H11S, H22, SUM, T, T1, TEMP, U, V2, + $ X, Y +* .. +* .. Local Arrays .. + REAL RWORK( 1 ) + COMPLEX V( 2 ) +* .. +* .. External Functions .. + REAL CLANHS, SLAMCH + COMPLEX CLADIV + EXTERNAL CLANHS, SLAMCH, CLADIV +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CLARFG, CSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CONJG, MAX, MIN, REAL, SQRT +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + IF( ILO.EQ.IHI ) THEN + W( ILO ) = H( ILO, ILO ) + RETURN + END IF +* + NH = IHI - ILO + 1 + NZ = IHIZ - ILOZ + 1 +* +* Set machine-dependent constants for the stopping criterion. +* If norm(H) <= sqrt(OVFL), overflow should not occur. +* + ULP = SLAMCH( 'Precision' ) + SMLNUM = SLAMCH( 'Safe minimum' ) / ULP +* +* I1 and I2 are the indices of the first row and last column of H +* to which transformations must be applied. If eigenvalues only are +* being computed, I1 and I2 are set inside the main loop. +* + IF( WANTT ) THEN + I1 = 1 + I2 = N + END IF +* +* ITN is the total number of QR iterations allowed. +* + ITN = 30*NH +* +* The main loop begins here. I is the loop index and decreases from +* IHI to ILO in steps of 1. Each iteration of the loop works +* with the active submatrix in rows and columns L to I. +* Eigenvalues I+1 to IHI have already converged. Either L = ILO, or +* H(L,L-1) is negligible so that the matrix splits. +* + I = IHI + 10 CONTINUE + IF( I.LT.ILO ) + $ GO TO 130 +* +* Perform QR iterations on rows and columns ILO to I until a +* submatrix of order 1 splits off at the bottom because a +* subdiagonal element has become negligible. +* + L = ILO + DO 110 ITS = 0, ITN +* +* Look for a single small subdiagonal element. +* + DO 20 K = I, L + 1, -1 + TST1 = CABS1( H( K-1, K-1 ) ) + CABS1( H( K, K ) ) + IF( TST1.EQ.RZERO ) + $ TST1 = CLANHS( '1', I-L+1, H( L, L ), LDH, RWORK ) + IF( ABS( REAL( H( K, K-1 ) ) ).LE.MAX( ULP*TST1, SMLNUM ) ) + $ GO TO 30 + 20 CONTINUE + 30 CONTINUE + L = K + IF( L.GT.ILO ) THEN +* +* H(L,L-1) is negligible +* + H( L, L-1 ) = ZERO + END IF +* +* Exit from loop if a submatrix of order 1 has split off. +* + IF( L.GE.I ) + $ GO TO 120 +* +* Now the active submatrix is in rows and columns L to I. If +* eigenvalues only are being computed, only the active submatrix +* need be transformed. +* + IF( .NOT.WANTT ) THEN + I1 = L + I2 = I + END IF +* + IF( ITS.EQ.10 .OR. ITS.EQ.20 ) THEN +* +* Exceptional shift. +* + S = DAT1*ABS( REAL( H( I, I-1 ) ) ) + T = S + H( I, I ) + ELSE +* +* Wilkinson's shift. +* + T = H( I, I ) + U = H( I-1, I )*REAL( H( I, I-1 ) ) + IF( U.NE.ZERO ) THEN + X = HALF*( H( I-1, I-1 )-T ) + Y = SQRT( X*X+U ) + IF( REAL( X )*REAL( Y )+AIMAG( X )*AIMAG( Y ).LT.RZERO ) + $ Y = -Y + T = T - CLADIV( U, ( X+Y ) ) + END IF + END IF +* +* Look for two consecutive small subdiagonal elements. +* + DO 40 M = I - 1, L + 1, -1 +* +* Determine the effect of starting the single-shift QR +* iteration at row M, and see if this would make H(M,M-1) +* negligible. +* + H11 = H( M, M ) + H22 = H( M+1, M+1 ) + H11S = H11 - T + H21 = H( M+1, M ) + S = CABS1( H11S ) + ABS( H21 ) + H11S = H11S / S + H21 = H21 / S + V( 1 ) = H11S + V( 2 ) = H21 + H10 = H( M, M-1 ) + TST1 = CABS1( H11S )*( CABS1( H11 )+CABS1( H22 ) ) + IF( ABS( H10*H21 ).LE.ULP*TST1 ) + $ GO TO 50 + 40 CONTINUE + H11 = H( L, L ) + H22 = H( L+1, L+1 ) + H11S = H11 - T + H21 = H( L+1, L ) + S = CABS1( H11S ) + ABS( H21 ) + H11S = H11S / S + H21 = H21 / S + V( 1 ) = H11S + V( 2 ) = H21 + 50 CONTINUE +* +* Single-shift QR step +* + DO 100 K = M, I - 1 +* +* The first iteration of this loop determines a reflection G +* from the vector V and applies it from left and right to H, +* thus creating a nonzero bulge below the subdiagonal. +* +* Each subsequent iteration determines a reflection G to +* restore the Hessenberg form in the (K-1)th column, and thus +* chases the bulge one step toward the bottom of the active +* submatrix. +* +* V(2) is always real before the call to CLARFG, and hence +* after the call T2 ( = T1*V(2) ) is also real. +* + IF( K.GT.M ) + $ CALL CCOPY( 2, H( K, K-1 ), 1, V, 1 ) + CALL CLARFG( 2, V( 1 ), V( 2 ), 1, T1 ) + IF( K.GT.M ) THEN + H( K, K-1 ) = V( 1 ) + H( K+1, K-1 ) = ZERO + END IF + V2 = V( 2 ) + T2 = REAL( T1*V2 ) +* +* Apply G from the left to transform the rows of the matrix +* in columns K to I2. +* + DO 60 J = K, I2 + SUM = CONJG( T1 )*H( K, J ) + T2*H( K+1, J ) + H( K, J ) = H( K, J ) - SUM + H( K+1, J ) = H( K+1, J ) - SUM*V2 + 60 CONTINUE +* +* Apply G from the right to transform the columns of the +* matrix in rows I1 to min(K+2,I). +* + DO 70 J = I1, MIN( K+2, I ) + SUM = T1*H( J, K ) + T2*H( J, K+1 ) + H( J, K ) = H( J, K ) - SUM + H( J, K+1 ) = H( J, K+1 ) - SUM*CONJG( V2 ) + 70 CONTINUE +* + IF( WANTZ ) THEN +* +* Accumulate transformations in the matrix Z +* + DO 80 J = ILOZ, IHIZ + SUM = T1*Z( J, K ) + T2*Z( J, K+1 ) + Z( J, K ) = Z( J, K ) - SUM + Z( J, K+1 ) = Z( J, K+1 ) - SUM*CONJG( V2 ) + 80 CONTINUE + END IF +* + IF( K.EQ.M .AND. M.GT.L ) THEN +* +* If the QR step was started at row M > L because two +* consecutive small subdiagonals were found, then extra +* scaling must be performed to ensure that H(M,M-1) remains +* real. +* + TEMP = ONE - T1 + TEMP = TEMP / ABS( TEMP ) + H( M+1, M ) = H( M+1, M )*CONJG( TEMP ) + IF( M+2.LE.I ) + $ H( M+2, M+1 ) = H( M+2, M+1 )*TEMP + DO 90 J = M, I + IF( J.NE.M+1 ) THEN + IF( I2.GT.J ) + $ CALL CSCAL( I2-J, TEMP, H( J, J+1 ), LDH ) + CALL CSCAL( J-I1, CONJG( TEMP ), H( I1, J ), 1 ) + IF( WANTZ ) THEN + CALL CSCAL( NZ, CONJG( TEMP ), Z( ILOZ, J ), 1 ) + END IF + END IF + 90 CONTINUE + END IF + 100 CONTINUE +* +* Ensure that H(I,I-1) is real. +* + TEMP = H( I, I-1 ) + IF( AIMAG( TEMP ).NE.RZERO ) THEN + RTEMP = ABS( TEMP ) + H( I, I-1 ) = RTEMP + TEMP = TEMP / RTEMP + IF( I2.GT.I ) + $ CALL CSCAL( I2-I, CONJG( TEMP ), H( I, I+1 ), LDH ) + CALL CSCAL( I-I1, TEMP, H( I1, I ), 1 ) + IF( WANTZ ) THEN + CALL CSCAL( NZ, TEMP, Z( ILOZ, I ), 1 ) + END IF + END IF +* + 110 CONTINUE +* +* Failure to converge in remaining number of iterations +* + INFO = I + RETURN +* + 120 CONTINUE +* +* H(I,I-1) is negligible: one eigenvalue has converged. +* + W( I ) = H( I, I ) +* +* Decrement number of remaining iterations, and return to start of +* the main loop with new value of I. +* + ITN = ITN - ITS + I = L - 1 + GO TO 10 +* + 130 CONTINUE + RETURN +* +* End of CLAHQR +* + END diff --git a/costa/native/external/lapack/clahrd.f b/costa/native/external/lapack/clahrd.f new file mode 100644 index 000000000..2d21b6968 --- /dev/null +++ b/costa/native/external/lapack/clahrd.f @@ -0,0 +1,212 @@ + SUBROUTINE CLAHRD( N, K, NB, A, LDA, TAU, T, LDT, Y, LDY ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER K, LDA, LDT, LDY, N, NB +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), T( LDT, NB ), TAU( NB ), + $ Y( LDY, NB ) +* .. +* +* Purpose +* ======= +* +* CLAHRD reduces the first NB columns of a complex general n-by-(n-k+1) +* matrix A so that elements below the k-th subdiagonal are zero. The +* reduction is performed by a unitary similarity transformation +* Q' * A * Q. The routine returns the matrices V and T which determine +* Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T. +* +* This is an auxiliary routine called by CGEHRD. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. +* +* K (input) INTEGER +* The offset for the reduction. Elements below the k-th +* subdiagonal in the first NB columns are reduced to zero. +* +* NB (input) INTEGER +* The number of columns to be reduced. +* +* A (input/output) COMPLEX array, dimension (LDA,N-K+1) +* On entry, the n-by-(n-k+1) general matrix A. +* On exit, the elements on and above the k-th subdiagonal in +* the first NB columns are overwritten with the corresponding +* elements of the reduced matrix; the elements below the k-th +* subdiagonal, with the array TAU, represent the matrix Q as a +* product of elementary reflectors. The other columns of A are +* unchanged. See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* TAU (output) COMPLEX array, dimension (NB) +* The scalar factors of the elementary reflectors. See Further +* Details. +* +* T (output) COMPLEX array, dimension (LDT,NB) +* The upper triangular matrix T. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= NB. +* +* Y (output) COMPLEX array, dimension (LDY,NB) +* The n-by-nb matrix Y. +* +* LDY (input) INTEGER +* The leading dimension of the array Y. LDY >= max(1,N). +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of nb elementary reflectors +* +* Q = H(1) H(2) . . . H(nb). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in +* A(i+k+1:n,i), and tau in TAU(i). +* +* The elements of the vectors v together form the (n-k+1)-by-nb matrix +* V which is needed, with T and Y, to apply the transformation to the +* unreduced part of the matrix, using an update of the form: +* A := (I - V*T*V') * (A - Y*V'). +* +* The contents of A on exit are illustrated by the following example +* with n = 7, k = 3 and nb = 2: +* +* ( a h a a a ) +* ( a h a a a ) +* ( a h a a a ) +* ( h h a a a ) +* ( v1 h a a a ) +* ( v1 v2 a a a ) +* ( v1 v2 a a a ) +* +* where a denotes an element of the original matrix A, h denotes a +* modified element of the upper Hessenberg matrix H, and vi denotes an +* element of the vector defining H(i). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I + COMPLEX EI +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CGEMV, CLACGV, CLARFG, CSCAL, + $ CTRMV +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* + DO 10 I = 1, NB + IF( I.GT.1 ) THEN +* +* Update A(1:n,i) +* +* Compute i-th column of A - Y * V' +* + CALL CLACGV( I-1, A( K+I-1, 1 ), LDA ) + CALL CGEMV( 'No transpose', N, I-1, -ONE, Y, LDY, + $ A( K+I-1, 1 ), LDA, ONE, A( 1, I ), 1 ) + CALL CLACGV( I-1, A( K+I-1, 1 ), LDA ) +* +* Apply I - V * T' * V' to this column (call it b) from the +* left, using the last column of T as workspace +* +* Let V = ( V1 ) and b = ( b1 ) (first I-1 rows) +* ( V2 ) ( b2 ) +* +* where V1 is unit lower triangular +* +* w := V1' * b1 +* + CALL CCOPY( I-1, A( K+1, I ), 1, T( 1, NB ), 1 ) + CALL CTRMV( 'Lower', 'Conjugate transpose', 'Unit', I-1, + $ A( K+1, 1 ), LDA, T( 1, NB ), 1 ) +* +* w := w + V2'*b2 +* + CALL CGEMV( 'Conjugate transpose', N-K-I+1, I-1, ONE, + $ A( K+I, 1 ), LDA, A( K+I, I ), 1, ONE, + $ T( 1, NB ), 1 ) +* +* w := T'*w +* + CALL CTRMV( 'Upper', 'Conjugate transpose', 'Non-unit', I-1, + $ T, LDT, T( 1, NB ), 1 ) +* +* b2 := b2 - V2*w +* + CALL CGEMV( 'No transpose', N-K-I+1, I-1, -ONE, A( K+I, 1 ), + $ LDA, T( 1, NB ), 1, ONE, A( K+I, I ), 1 ) +* +* b1 := b1 - V1*w +* + CALL CTRMV( 'Lower', 'No transpose', 'Unit', I-1, + $ A( K+1, 1 ), LDA, T( 1, NB ), 1 ) + CALL CAXPY( I-1, -ONE, T( 1, NB ), 1, A( K+1, I ), 1 ) +* + A( K+I-1, I-1 ) = EI + END IF +* +* Generate the elementary reflector H(i) to annihilate +* A(k+i+1:n,i) +* + EI = A( K+I, I ) + CALL CLARFG( N-K-I+1, EI, A( MIN( K+I+1, N ), I ), 1, + $ TAU( I ) ) + A( K+I, I ) = ONE +* +* Compute Y(1:n,i) +* + CALL CGEMV( 'No transpose', N, N-K-I+1, ONE, A( 1, I+1 ), LDA, + $ A( K+I, I ), 1, ZERO, Y( 1, I ), 1 ) + CALL CGEMV( 'Conjugate transpose', N-K-I+1, I-1, ONE, + $ A( K+I, 1 ), LDA, A( K+I, I ), 1, ZERO, T( 1, I ), + $ 1 ) + CALL CGEMV( 'No transpose', N, I-1, -ONE, Y, LDY, T( 1, I ), 1, + $ ONE, Y( 1, I ), 1 ) + CALL CSCAL( N, TAU( I ), Y( 1, I ), 1 ) +* +* Compute T(1:i,i) +* + CALL CSCAL( I-1, -TAU( I ), T( 1, I ), 1 ) + CALL CTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T, LDT, + $ T( 1, I ), 1 ) + T( I, I ) = TAU( I ) +* + 10 CONTINUE + A( K+NB, NB ) = EI +* + RETURN +* +* End of CLAHRD +* + END diff --git a/costa/native/external/lapack/claic1.f b/costa/native/external/lapack/claic1.f new file mode 100644 index 000000000..2dc74add6 --- /dev/null +++ b/costa/native/external/lapack/claic1.f @@ -0,0 +1,296 @@ + SUBROUTINE CLAIC1( JOB, J, X, SEST, W, GAMMA, SESTPR, S, C ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + INTEGER J, JOB + REAL SEST, SESTPR + COMPLEX C, GAMMA, S +* .. +* .. Array Arguments .. + COMPLEX W( J ), X( J ) +* .. +* +* Purpose +* ======= +* +* CLAIC1 applies one step of incremental condition estimation in +* its simplest version: +* +* Let x, twonorm(x) = 1, be an approximate singular vector of an j-by-j +* lower triangular matrix L, such that +* twonorm(L*x) = sest +* Then CLAIC1 computes sestpr, s, c such that +* the vector +* [ s*x ] +* xhat = [ c ] +* is an approximate singular vector of +* [ L 0 ] +* Lhat = [ w' gamma ] +* in the sense that +* twonorm(Lhat*xhat) = sestpr. +* +* Depending on JOB, an estimate for the largest or smallest singular +* value is computed. +* +* Note that [s c]' and sestpr**2 is an eigenpair of the system +* +* diag(sest*sest, 0) + [alpha gamma] * [ conjg(alpha) ] +* [ conjg(gamma) ] +* +* where alpha = conjg(x)'*w. +* +* Arguments +* ========= +* +* JOB (input) INTEGER +* = 1: an estimate for the largest singular value is computed. +* = 2: an estimate for the smallest singular value is computed. +* +* J (input) INTEGER +* Length of X and W +* +* X (input) COMPLEX array, dimension (J) +* The j-vector x. +* +* SEST (input) REAL +* Estimated singular value of j by j matrix L +* +* W (input) COMPLEX array, dimension (J) +* The j-vector w. +* +* GAMMA (input) COMPLEX +* The diagonal element gamma. +* +* SESTPR (output) REAL +* Estimated singular value of (j+1) by (j+1) matrix Lhat. +* +* S (output) COMPLEX +* Sine needed in forming xhat. +* +* C (output) COMPLEX +* Cosine needed in forming xhat. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0 ) + REAL HALF, FOUR + PARAMETER ( HALF = 0.5E0, FOUR = 4.0E0 ) +* .. +* .. Local Scalars .. + REAL ABSALP, ABSEST, ABSGAM, B, EPS, NORMA, S1, S2, + $ SCL, T, TEST, TMP, ZETA1, ZETA2 + COMPLEX ALPHA, COSINE, SINE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CONJG, MAX, SQRT +* .. +* .. External Functions .. + REAL SLAMCH + COMPLEX CDOTC + EXTERNAL SLAMCH, CDOTC +* .. +* .. Executable Statements .. +* + EPS = SLAMCH( 'Epsilon' ) + ALPHA = CDOTC( J, X, 1, W, 1 ) +* + ABSALP = ABS( ALPHA ) + ABSGAM = ABS( GAMMA ) + ABSEST = ABS( SEST ) +* + IF( JOB.EQ.1 ) THEN +* +* Estimating largest singular value +* +* special cases +* + IF( SEST.EQ.ZERO ) THEN + S1 = MAX( ABSGAM, ABSALP ) + IF( S1.EQ.ZERO ) THEN + S = ZERO + C = ONE + SESTPR = ZERO + ELSE + S = ALPHA / S1 + C = GAMMA / S1 + TMP = SQRT( S*CONJG( S )+C*CONJG( C ) ) + S = S / TMP + C = C / TMP + SESTPR = S1*TMP + END IF + RETURN + ELSE IF( ABSGAM.LE.EPS*ABSEST ) THEN + S = ONE + C = ZERO + TMP = MAX( ABSEST, ABSALP ) + S1 = ABSEST / TMP + S2 = ABSALP / TMP + SESTPR = TMP*SQRT( S1*S1+S2*S2 ) + RETURN + ELSE IF( ABSALP.LE.EPS*ABSEST ) THEN + S1 = ABSGAM + S2 = ABSEST + IF( S1.LE.S2 ) THEN + S = ONE + C = ZERO + SESTPR = S2 + ELSE + S = ZERO + C = ONE + SESTPR = S1 + END IF + RETURN + ELSE IF( ABSEST.LE.EPS*ABSALP .OR. ABSEST.LE.EPS*ABSGAM ) THEN + S1 = ABSGAM + S2 = ABSALP + IF( S1.LE.S2 ) THEN + TMP = S1 / S2 + SCL = SQRT( ONE+TMP*TMP ) + SESTPR = S2*SCL + S = ( ALPHA / S2 ) / SCL + C = ( GAMMA / S2 ) / SCL + ELSE + TMP = S2 / S1 + SCL = SQRT( ONE+TMP*TMP ) + SESTPR = S1*SCL + S = ( ALPHA / S1 ) / SCL + C = ( GAMMA / S1 ) / SCL + END IF + RETURN + ELSE +* +* normal case +* + ZETA1 = ABSALP / ABSEST + ZETA2 = ABSGAM / ABSEST +* + B = ( ONE-ZETA1*ZETA1-ZETA2*ZETA2 )*HALF + C = ZETA1*ZETA1 + IF( B.GT.ZERO ) THEN + T = C / ( B+SQRT( B*B+C ) ) + ELSE + T = SQRT( B*B+C ) - B + END IF +* + SINE = -( ALPHA / ABSEST ) / T + COSINE = -( GAMMA / ABSEST ) / ( ONE+T ) + TMP = SQRT( SINE*CONJG( SINE )+COSINE*CONJG( COSINE ) ) + S = SINE / TMP + C = COSINE / TMP + SESTPR = SQRT( T+ONE )*ABSEST + RETURN + END IF +* + ELSE IF( JOB.EQ.2 ) THEN +* +* Estimating smallest singular value +* +* special cases +* + IF( SEST.EQ.ZERO ) THEN + SESTPR = ZERO + IF( MAX( ABSGAM, ABSALP ).EQ.ZERO ) THEN + SINE = ONE + COSINE = ZERO + ELSE + SINE = -CONJG( GAMMA ) + COSINE = CONJG( ALPHA ) + END IF + S1 = MAX( ABS( SINE ), ABS( COSINE ) ) + S = SINE / S1 + C = COSINE / S1 + TMP = SQRT( S*CONJG( S )+C*CONJG( C ) ) + S = S / TMP + C = C / TMP + RETURN + ELSE IF( ABSGAM.LE.EPS*ABSEST ) THEN + S = ZERO + C = ONE + SESTPR = ABSGAM + RETURN + ELSE IF( ABSALP.LE.EPS*ABSEST ) THEN + S1 = ABSGAM + S2 = ABSEST + IF( S1.LE.S2 ) THEN + S = ZERO + C = ONE + SESTPR = S1 + ELSE + S = ONE + C = ZERO + SESTPR = S2 + END IF + RETURN + ELSE IF( ABSEST.LE.EPS*ABSALP .OR. ABSEST.LE.EPS*ABSGAM ) THEN + S1 = ABSGAM + S2 = ABSALP + IF( S1.LE.S2 ) THEN + TMP = S1 / S2 + SCL = SQRT( ONE+TMP*TMP ) + SESTPR = ABSEST*( TMP / SCL ) + S = -( CONJG( GAMMA ) / S2 ) / SCL + C = ( CONJG( ALPHA ) / S2 ) / SCL + ELSE + TMP = S2 / S1 + SCL = SQRT( ONE+TMP*TMP ) + SESTPR = ABSEST / SCL + S = -( CONJG( GAMMA ) / S1 ) / SCL + C = ( CONJG( ALPHA ) / S1 ) / SCL + END IF + RETURN + ELSE +* +* normal case +* + ZETA1 = ABSALP / ABSEST + ZETA2 = ABSGAM / ABSEST +* + NORMA = MAX( ONE+ZETA1*ZETA1+ZETA1*ZETA2, + $ ZETA1*ZETA2+ZETA2*ZETA2 ) +* +* See if root is closer to zero or to ONE +* + TEST = ONE + TWO*( ZETA1-ZETA2 )*( ZETA1+ZETA2 ) + IF( TEST.GE.ZERO ) THEN +* +* root is close to zero, compute directly +* + B = ( ZETA1*ZETA1+ZETA2*ZETA2+ONE )*HALF + C = ZETA2*ZETA2 + T = C / ( B+SQRT( ABS( B*B-C ) ) ) + SINE = ( ALPHA / ABSEST ) / ( ONE-T ) + COSINE = -( GAMMA / ABSEST ) / T + SESTPR = SQRT( T+FOUR*EPS*EPS*NORMA )*ABSEST + ELSE +* +* root is closer to ONE, shift by that amount +* + B = ( ZETA2*ZETA2+ZETA1*ZETA1-ONE )*HALF + C = ZETA1*ZETA1 + IF( B.GE.ZERO ) THEN + T = -C / ( B+SQRT( B*B+C ) ) + ELSE + T = B - SQRT( B*B+C ) + END IF + SINE = -( ALPHA / ABSEST ) / T + COSINE = -( GAMMA / ABSEST ) / ( ONE+T ) + SESTPR = SQRT( ONE+T+FOUR*EPS*EPS*NORMA )*ABSEST + END IF + TMP = SQRT( SINE*CONJG( SINE )+COSINE*CONJG( COSINE ) ) + S = SINE / TMP + C = COSINE / TMP + RETURN +* + END IF + END IF + RETURN +* +* End of CLAIC1 +* + END diff --git a/costa/native/external/lapack/clals0.f b/costa/native/external/lapack/clals0.f new file mode 100644 index 000000000..9434b8206 --- /dev/null +++ b/costa/native/external/lapack/clals0.f @@ -0,0 +1,434 @@ + SUBROUTINE CLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B, LDB, BX, LDBX, + $ PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM, + $ POLES, DIFL, DIFR, Z, K, C, S, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* December 1, 1999 +* +* .. Scalar Arguments .. + INTEGER GIVPTR, ICOMPQ, INFO, K, LDB, LDBX, LDGCOL, + $ LDGNUM, NL, NR, NRHS, SQRE + REAL C, S +* .. +* .. Array Arguments .. + INTEGER GIVCOL( LDGCOL, * ), PERM( * ) + REAL DIFL( * ), DIFR( LDGNUM, * ), + $ GIVNUM( LDGNUM, * ), POLES( LDGNUM, * ), + $ RWORK( * ), Z( * ) + COMPLEX B( LDB, * ), BX( LDBX, * ) +* .. +* +* Purpose +* ======= +* +* CLALS0 applies back the multiplying factors of either the left or the +* right singular vector matrix of a diagonal matrix appended by a row +* to the right hand side matrix B in solving the least squares problem +* using the divide-and-conquer SVD approach. +* +* For the left singular vector matrix, three types of orthogonal +* matrices are involved: +* +* (1L) Givens rotations: the number of such rotations is GIVPTR; the +* pairs of columns/rows they were applied to are stored in GIVCOL; +* and the C- and S-values of these rotations are stored in GIVNUM. +* +* (2L) Permutation. The (NL+1)-st row of B is to be moved to the first +* row, and for J=2:N, PERM(J)-th row of B is to be moved to the +* J-th row. +* +* (3L) The left singular vector matrix of the remaining matrix. +* +* For the right singular vector matrix, four types of orthogonal +* matrices are involved: +* +* (1R) The right singular vector matrix of the remaining matrix. +* +* (2R) If SQRE = 1, one extra Givens rotation to generate the right +* null space. +* +* (3R) The inverse transformation of (2L). +* +* (4R) The inverse transformation of (1L). +* +* Arguments +* ========= +* +* ICOMPQ (input) INTEGER +* Specifies whether singular vectors are to be computed in +* factored form: +* = 0: Left singular vector matrix. +* = 1: Right singular vector matrix. +* +* NL (input) INTEGER +* The row dimension of the upper block. NL >= 1. +* +* NR (input) INTEGER +* The row dimension of the lower block. NR >= 1. +* +* SQRE (input) INTEGER +* = 0: the lower block is an NR-by-NR square matrix. +* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. +* +* The bidiagonal matrix has row dimension N = NL + NR + 1, +* and column dimension M = N + SQRE. +* +* NRHS (input) INTEGER +* The number of columns of B and BX. NRHS must be at least 1. +* +* B (input/output) COMPLEX array, dimension ( LDB, NRHS ) +* On input, B contains the right hand sides of the least +* squares problem in rows 1 through M. On output, B contains +* the solution X in rows 1 through N. +* +* LDB (input) INTEGER +* The leading dimension of B. LDB must be at least +* max(1,MAX( M, N ) ). +* +* BX (workspace) COMPLEX array, dimension ( LDBX, NRHS ) +* +* LDBX (input) INTEGER +* The leading dimension of BX. +* +* PERM (input) INTEGER array, dimension ( N ) +* The permutations (from deflation and sorting) applied +* to the two blocks. +* +* GIVPTR (input) INTEGER +* The number of Givens rotations which took place in this +* subproblem. +* +* GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 ) +* Each pair of numbers indicates a pair of rows/columns +* involved in a Givens rotation. +* +* LDGCOL (input) INTEGER +* The leading dimension of GIVCOL, must be at least N. +* +* GIVNUM (input) REAL array, dimension ( LDGNUM, 2 ) +* Each number indicates the C or S value used in the +* corresponding Givens rotation. +* +* LDGNUM (input) INTEGER +* The leading dimension of arrays DIFR, POLES and +* GIVNUM, must be at least K. +* +* POLES (input) REAL array, dimension ( LDGNUM, 2 ) +* On entry, POLES(1:K, 1) contains the new singular +* values obtained from solving the secular equation, and +* POLES(1:K, 2) is an array containing the poles in the secular +* equation. +* +* DIFL (input) REAL array, dimension ( K ). +* On entry, DIFL(I) is the distance between I-th updated +* (undeflated) singular value and the I-th (undeflated) old +* singular value. +* +* DIFR (input) REAL array, dimension ( LDGNUM, 2 ). +* On entry, DIFR(I, 1) contains the distances between I-th +* updated (undeflated) singular value and the I+1-th +* (undeflated) old singular value. And DIFR(I, 2) is the +* normalizing factor for the I-th right singular vector. +* +* Z (input) REAL array, dimension ( K ) +* Contain the components of the deflation-adjusted updating row +* vector. +* +* K (input) INTEGER +* Contains the dimension of the non-deflated matrix, +* This is the order of the related secular equation. 1 <= K <=N. +* +* C (input) REAL +* C contains garbage if SQRE =0 and the C-value of a Givens +* rotation related to the right null space if SQRE = 1. +* +* S (input) REAL +* S contains garbage if SQRE =0 and the S-value of a Givens +* rotation related to the right null space if SQRE = 1. +* +* RWORK (workspace) REAL array, dimension +* ( K*(1+NRHS) + 2*NRHS ) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Ren-Cang Li, Computer Science Division, University of +* California at Berkeley, USA +* Osni Marques, LBNL/NERSC, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO, NEGONE + PARAMETER ( ONE = 1.0E0, ZERO = 0.0E0, NEGONE = -1.0E0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, JCOL, JROW, M, N, NLP1 + REAL DIFLJ, DIFRJ, DJ, DSIGJ, DSIGJP, TEMP +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CLACPY, CLASCL, CSROT, CSSCAL, SGEMV, + $ XERBLA +* .. +* .. External Functions .. + REAL SLAMC3, SNRM2 + EXTERNAL SLAMC3, SNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC AIMAG, CMPLX, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( NL.LT.1 ) THEN + INFO = -2 + ELSE IF( NR.LT.1 ) THEN + INFO = -3 + ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN + INFO = -4 + END IF +* + N = NL + NR + 1 +* + IF( NRHS.LT.1 ) THEN + INFO = -5 + ELSE IF( LDB.LT.N ) THEN + INFO = -7 + ELSE IF( LDBX.LT.N ) THEN + INFO = -9 + ELSE IF( GIVPTR.LT.0 ) THEN + INFO = -11 + ELSE IF( LDGCOL.LT.N ) THEN + INFO = -13 + ELSE IF( LDGNUM.LT.N ) THEN + INFO = -15 + ELSE IF( K.LT.1 ) THEN + INFO = -20 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLALS0', -INFO ) + RETURN + END IF +* + M = N + SQRE + NLP1 = NL + 1 +* + IF( ICOMPQ.EQ.0 ) THEN +* +* Apply back orthogonal transformations from the left. +* +* Step (1L): apply back the Givens rotations performed. +* + DO 10 I = 1, GIVPTR + CALL CSROT( NRHS, B( GIVCOL( I, 2 ), 1 ), LDB, + $ B( GIVCOL( I, 1 ), 1 ), LDB, GIVNUM( I, 2 ), + $ GIVNUM( I, 1 ) ) + 10 CONTINUE +* +* Step (2L): permute rows of B. +* + CALL CCOPY( NRHS, B( NLP1, 1 ), LDB, BX( 1, 1 ), LDBX ) + DO 20 I = 2, N + CALL CCOPY( NRHS, B( PERM( I ), 1 ), LDB, BX( I, 1 ), LDBX ) + 20 CONTINUE +* +* Step (3L): apply the inverse of the left singular vector +* matrix to BX. +* + IF( K.EQ.1 ) THEN + CALL CCOPY( NRHS, BX, LDBX, B, LDB ) + IF( Z( 1 ).LT.ZERO ) THEN + CALL CSSCAL( NRHS, NEGONE, B, LDB ) + END IF + ELSE + DO 100 J = 1, K + DIFLJ = DIFL( J ) + DJ = POLES( J, 1 ) + DSIGJ = -POLES( J, 2 ) + IF( J.LT.K ) THEN + DIFRJ = -DIFR( J, 1 ) + DSIGJP = -POLES( J+1, 2 ) + END IF + IF( ( Z( J ).EQ.ZERO ) .OR. ( POLES( J, 2 ).EQ.ZERO ) ) + $ THEN + RWORK( J ) = ZERO + ELSE + RWORK( J ) = -POLES( J, 2 )*Z( J ) / DIFLJ / + $ ( POLES( J, 2 )+DJ ) + END IF + DO 30 I = 1, J - 1 + IF( ( Z( I ).EQ.ZERO ) .OR. + $ ( POLES( I, 2 ).EQ.ZERO ) ) THEN + RWORK( I ) = ZERO + ELSE + RWORK( I ) = POLES( I, 2 )*Z( I ) / + $ ( SLAMC3( POLES( I, 2 ), DSIGJ )- + $ DIFLJ ) / ( POLES( I, 2 )+DJ ) + END IF + 30 CONTINUE + DO 40 I = J + 1, K + IF( ( Z( I ).EQ.ZERO ) .OR. + $ ( POLES( I, 2 ).EQ.ZERO ) ) THEN + RWORK( I ) = ZERO + ELSE + RWORK( I ) = POLES( I, 2 )*Z( I ) / + $ ( SLAMC3( POLES( I, 2 ), DSIGJP )+ + $ DIFRJ ) / ( POLES( I, 2 )+DJ ) + END IF + 40 CONTINUE + RWORK( 1 ) = NEGONE + TEMP = SNRM2( K, RWORK, 1 ) +* +* Since B and BX are complex, the following call to SGEMV +* is performed in two steps (real and imaginary parts). +* +* CALL SGEMV( 'T', K, NRHS, ONE, BX, LDBX, WORK, 1, ZERO, +* $ B( J, 1 ), LDB ) +* + I = K + NRHS*2 + DO 60 JCOL = 1, NRHS + DO 50 JROW = 1, K + I = I + 1 + RWORK( I ) = REAL( BX( JROW, JCOL ) ) + 50 CONTINUE + 60 CONTINUE + CALL SGEMV( 'T', K, NRHS, ONE, RWORK( 1+K+NRHS*2 ), K, + $ RWORK( 1 ), 1, ZERO, RWORK( 1+K ), 1 ) + I = K + NRHS*2 + DO 80 JCOL = 1, NRHS + DO 70 JROW = 1, K + I = I + 1 + RWORK( I ) = AIMAG( BX( JROW, JCOL ) ) + 70 CONTINUE + 80 CONTINUE + CALL SGEMV( 'T', K, NRHS, ONE, RWORK( 1+K+NRHS*2 ), K, + $ RWORK( 1 ), 1, ZERO, RWORK( 1+K+NRHS ), 1 ) + DO 90 JCOL = 1, NRHS + B( J, JCOL ) = CMPLX( RWORK( JCOL+K ), + $ RWORK( JCOL+K+NRHS ) ) + 90 CONTINUE + CALL CLASCL( 'G', 0, 0, TEMP, ONE, 1, NRHS, B( J, 1 ), + $ LDB, INFO ) + 100 CONTINUE + END IF +* +* Move the deflated rows of BX to B also. +* + IF( K.LT.MAX( M, N ) ) + $ CALL CLACPY( 'A', N-K, NRHS, BX( K+1, 1 ), LDBX, + $ B( K+1, 1 ), LDB ) + ELSE +* +* Apply back the right orthogonal transformations. +* +* Step (1R): apply back the new right singular vector matrix +* to B. +* + IF( K.EQ.1 ) THEN + CALL CCOPY( NRHS, B, LDB, BX, LDBX ) + ELSE + DO 180 J = 1, K + DSIGJ = POLES( J, 2 ) + IF( Z( J ).EQ.ZERO ) THEN + RWORK( J ) = ZERO + ELSE + RWORK( J ) = -Z( J ) / DIFL( J ) / + $ ( DSIGJ+POLES( J, 1 ) ) / DIFR( J, 2 ) + END IF + DO 110 I = 1, J - 1 + IF( Z( J ).EQ.ZERO ) THEN + RWORK( I ) = ZERO + ELSE + RWORK( I ) = Z( J ) / ( SLAMC3( DSIGJ, -POLES( I+1, + $ 2 ) )-DIFR( I, 1 ) ) / + $ ( DSIGJ+POLES( I, 1 ) ) / DIFR( I, 2 ) + END IF + 110 CONTINUE + DO 120 I = J + 1, K + IF( Z( J ).EQ.ZERO ) THEN + RWORK( I ) = ZERO + ELSE + RWORK( I ) = Z( J ) / ( SLAMC3( DSIGJ, -POLES( I, + $ 2 ) )-DIFL( I ) ) / + $ ( DSIGJ+POLES( I, 1 ) ) / DIFR( I, 2 ) + END IF + 120 CONTINUE +* +* Since B and BX are complex, the following call to SGEMV +* is performed in two steps (real and imaginary parts). +* +* CALL SGEMV( 'T', K, NRHS, ONE, B, LDB, WORK, 1, ZERO, +* $ BX( J, 1 ), LDBX ) +* + I = K + NRHS*2 + DO 140 JCOL = 1, NRHS + DO 130 JROW = 1, K + I = I + 1 + RWORK( I ) = REAL( B( JROW, JCOL ) ) + 130 CONTINUE + 140 CONTINUE + CALL SGEMV( 'T', K, NRHS, ONE, RWORK( 1+K+NRHS*2 ), K, + $ RWORK( 1 ), 1, ZERO, RWORK( 1+K ), 1 ) + I = K + NRHS*2 + DO 160 JCOL = 1, NRHS + DO 150 JROW = 1, K + I = I + 1 + RWORK( I ) = AIMAG( B( JROW, JCOL ) ) + 150 CONTINUE + 160 CONTINUE + CALL SGEMV( 'T', K, NRHS, ONE, RWORK( 1+K+NRHS*2 ), K, + $ RWORK( 1 ), 1, ZERO, RWORK( 1+K+NRHS ), 1 ) + DO 170 JCOL = 1, NRHS + BX( J, JCOL ) = CMPLX( RWORK( JCOL+K ), + $ RWORK( JCOL+K+NRHS ) ) + 170 CONTINUE + 180 CONTINUE + END IF +* +* Step (2R): if SQRE = 1, apply back the rotation that is +* related to the right null space of the subproblem. +* + IF( SQRE.EQ.1 ) THEN + CALL CCOPY( NRHS, B( M, 1 ), LDB, BX( M, 1 ), LDBX ) + CALL CSROT( NRHS, BX( 1, 1 ), LDBX, BX( M, 1 ), LDBX, C, S ) + END IF + IF( K.LT.MAX( M, N ) ) + $ CALL CLACPY( 'A', N-K, NRHS, B( K+1, 1 ), LDB, + $ BX( K+1, 1 ), LDBX ) +* +* Step (3R): permute rows of B. +* + CALL CCOPY( NRHS, BX( 1, 1 ), LDBX, B( NLP1, 1 ), LDB ) + IF( SQRE.EQ.1 ) THEN + CALL CCOPY( NRHS, BX( M, 1 ), LDBX, B( M, 1 ), LDB ) + END IF + DO 190 I = 2, N + CALL CCOPY( NRHS, BX( I, 1 ), LDBX, B( PERM( I ), 1 ), LDB ) + 190 CONTINUE +* +* Step (4R): apply back the Givens rotations performed. +* + DO 200 I = GIVPTR, 1, -1 + CALL CSROT( NRHS, B( GIVCOL( I, 2 ), 1 ), LDB, + $ B( GIVCOL( I, 1 ), 1 ), LDB, GIVNUM( I, 2 ), + $ -GIVNUM( I, 1 ) ) + 200 CONTINUE + END IF +* + RETURN +* +* End of CLALS0 +* + END diff --git a/costa/native/external/lapack/clalsa.f b/costa/native/external/lapack/clalsa.f new file mode 100644 index 000000000..496572ade --- /dev/null +++ b/costa/native/external/lapack/clalsa.f @@ -0,0 +1,504 @@ + SUBROUTINE CLALSA( ICOMPQ, SMLSIZ, N, NRHS, B, LDB, BX, LDBX, U, + $ LDU, VT, K, DIFL, DIFR, Z, POLES, GIVPTR, + $ GIVCOL, LDGCOL, PERM, GIVNUM, C, S, RWORK, + $ IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER ICOMPQ, INFO, LDB, LDBX, LDGCOL, LDU, N, NRHS, + $ SMLSIZ +* .. +* .. Array Arguments .. + INTEGER GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ), + $ K( * ), PERM( LDGCOL, * ) + REAL C( * ), DIFL( LDU, * ), DIFR( LDU, * ), + $ GIVNUM( LDU, * ), POLES( LDU, * ), RWORK( * ), + $ S( * ), U( LDU, * ), VT( LDU, * ), Z( LDU, * ) + COMPLEX B( LDB, * ), BX( LDBX, * ) +* .. +* +* Purpose +* ======= +* +* CLALSA is an itermediate step in solving the least squares problem +* by computing the SVD of the coefficient matrix in compact form (The +* singular vectors are computed as products of simple orthorgonal +* matrices.). +* +* If ICOMPQ = 0, CLALSA applies the inverse of the left singular vector +* matrix of an upper bidiagonal matrix to the right hand side; and if +* ICOMPQ = 1, CLALSA applies the right singular vector matrix to the +* right hand side. The singular vector matrices were generated in +* compact form by CLALSA. +* +* Arguments +* ========= +* +* ICOMPQ (input) INTEGER +* Specifies whether the left or the right singular vector +* matrix is involved. +* = 0: Left singular vector matrix +* = 1: Right singular vector matrix +* +* SMLSIZ (input) INTEGER +* The maximum size of the subproblems at the bottom of the +* computation tree. +* +* N (input) INTEGER +* The row and column dimensions of the upper bidiagonal matrix. +* +* NRHS (input) INTEGER +* The number of columns of B and BX. NRHS must be at least 1. +* +* B (input) COMPLEX array, dimension ( LDB, NRHS ) +* On input, B contains the right hand sides of the least +* squares problem in rows 1 through M. On output, B contains +* the solution X in rows 1 through N. +* +* LDB (input) INTEGER +* The leading dimension of B in the calling subprogram. +* LDB must be at least max(1,MAX( M, N ) ). +* +* BX (output) COMPLEX array, dimension ( LDBX, NRHS ) +* On exit, the result of applying the left or right singular +* vector matrix to B. +* +* LDBX (input) INTEGER +* The leading dimension of BX. +* +* U (input) REAL array, dimension ( LDU, SMLSIZ ). +* On entry, U contains the left singular vector matrices of all +* subproblems at the bottom level. +* +* LDU (input) INTEGER, LDU = > N. +* The leading dimension of arrays U, VT, DIFL, DIFR, +* POLES, GIVNUM, and Z. +* +* VT (input) REAL array, dimension ( LDU, SMLSIZ+1 ). +* On entry, VT' contains the right singular vector matrices of +* all subproblems at the bottom level. +* +* K (input) INTEGER array, dimension ( N ). +* +* DIFL (input) REAL array, dimension ( LDU, NLVL ). +* where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1. +* +* DIFR (input) REAL array, dimension ( LDU, 2 * NLVL ). +* On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record +* distances between singular values on the I-th level and +* singular values on the (I -1)-th level, and DIFR(*, 2 * I) +* record the normalizing factors of the right singular vectors +* matrices of subproblems on I-th level. +* +* Z (input) REAL array, dimension ( LDU, NLVL ). +* On entry, Z(1, I) contains the components of the deflation- +* adjusted updating row vector for subproblems on the I-th +* level. +* +* POLES (input) REAL array, dimension ( LDU, 2 * NLVL ). +* On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old +* singular values involved in the secular equations on the I-th +* level. +* +* GIVPTR (input) INTEGER array, dimension ( N ). +* On entry, GIVPTR( I ) records the number of Givens +* rotations performed on the I-th problem on the computation +* tree. +* +* GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ). +* On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the +* locations of Givens rotations performed on the I-th level on +* the computation tree. +* +* LDGCOL (input) INTEGER, LDGCOL = > N. +* The leading dimension of arrays GIVCOL and PERM. +* +* PERM (input) INTEGER array, dimension ( LDGCOL, NLVL ). +* On entry, PERM(*, I) records permutations done on the I-th +* level of the computation tree. +* +* GIVNUM (input) REAL array, dimension ( LDU, 2 * NLVL ). +* On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S- +* values of Givens rotations performed on the I-th level on the +* computation tree. +* +* C (input) REAL array, dimension ( N ). +* On entry, if the I-th subproblem is not square, +* C( I ) contains the C-value of a Givens rotation related to +* the right null space of the I-th subproblem. +* +* S (input) REAL array, dimension ( N ). +* On entry, if the I-th subproblem is not square, +* S( I ) contains the S-value of a Givens rotation related to +* the right null space of the I-th subproblem. +* +* RWORK (workspace) REAL array, dimension at least +* max ( N, (SMLSZ+1)*NRHS*3 ). +* +* IWORK (workspace) INTEGER array. +* The dimension must be at least 3 * N +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Ren-Cang Li, Computer Science Division, University of +* California at Berkeley, USA +* Osni Marques, LBNL/NERSC, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + INTEGER I, I1, IC, IM1, INODE, J, JCOL, JIMAG, JREAL, + $ JROW, LF, LL, LVL, LVL2, ND, NDB1, NDIML, + $ NDIMR, NL, NLF, NLP1, NLVL, NR, NRF, NRP1, SQRE +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CLALS0, SGEMM, SLASDT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC AIMAG, CMPLX, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( SMLSIZ.LT.3 ) THEN + INFO = -2 + ELSE IF( N.LT.SMLSIZ ) THEN + INFO = -3 + ELSE IF( NRHS.LT.1 ) THEN + INFO = -4 + ELSE IF( LDB.LT.N ) THEN + INFO = -6 + ELSE IF( LDBX.LT.N ) THEN + INFO = -8 + ELSE IF( LDU.LT.N ) THEN + INFO = -10 + ELSE IF( LDGCOL.LT.N ) THEN + INFO = -19 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLALSA', -INFO ) + RETURN + END IF +* +* Book-keeping and setting up the computation tree. +* + INODE = 1 + NDIML = INODE + N + NDIMR = NDIML + N +* + CALL SLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ), + $ IWORK( NDIMR ), SMLSIZ ) +* +* The following code applies back the left singular vector factors. +* For applying back the right singular vector factors, go to 170. +* + IF( ICOMPQ.EQ.1 ) THEN + GO TO 170 + END IF +* +* The nodes on the bottom level of the tree were solved +* by SLASDQ. The corresponding left and right singular vector +* matrices are in explicit form. First apply back the left +* singular vector matrices. +* + NDB1 = ( ND+1 ) / 2 + DO 130 I = NDB1, ND +* +* IC : center row of each node +* NL : number of rows of left subproblem +* NR : number of rows of right subproblem +* NLF: starting row of the left subproblem +* NRF: starting row of the right subproblem +* + I1 = I - 1 + IC = IWORK( INODE+I1 ) + NL = IWORK( NDIML+I1 ) + NR = IWORK( NDIMR+I1 ) + NLF = IC - NL + NRF = IC + 1 +* +* Since B and BX are complex, the following call to SGEMM +* is performed in two steps (real and imaginary parts). +* +* CALL SGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU, +* $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX ) +* + J = NL*NRHS*2 + DO 20 JCOL = 1, NRHS + DO 10 JROW = NLF, NLF + NL - 1 + J = J + 1 + RWORK( J ) = REAL( B( JROW, JCOL ) ) + 10 CONTINUE + 20 CONTINUE + CALL SGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU, + $ RWORK( 1+NL*NRHS*2 ), NL, ZERO, RWORK( 1 ), NL ) + J = NL*NRHS*2 + DO 40 JCOL = 1, NRHS + DO 30 JROW = NLF, NLF + NL - 1 + J = J + 1 + RWORK( J ) = AIMAG( B( JROW, JCOL ) ) + 30 CONTINUE + 40 CONTINUE + CALL SGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU, + $ RWORK( 1+NL*NRHS*2 ), NL, ZERO, RWORK( 1+NL*NRHS ), + $ NL ) + JREAL = 0 + JIMAG = NL*NRHS + DO 60 JCOL = 1, NRHS + DO 50 JROW = NLF, NLF + NL - 1 + JREAL = JREAL + 1 + JIMAG = JIMAG + 1 + BX( JROW, JCOL ) = CMPLX( RWORK( JREAL ), + $ RWORK( JIMAG ) ) + 50 CONTINUE + 60 CONTINUE +* +* Since B and BX are complex, the following call to SGEMM +* is performed in two steps (real and imaginary parts). +* +* CALL SGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU, +* $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX ) +* + J = NR*NRHS*2 + DO 80 JCOL = 1, NRHS + DO 70 JROW = NRF, NRF + NR - 1 + J = J + 1 + RWORK( J ) = REAL( B( JROW, JCOL ) ) + 70 CONTINUE + 80 CONTINUE + CALL SGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU, + $ RWORK( 1+NR*NRHS*2 ), NR, ZERO, RWORK( 1 ), NR ) + J = NR*NRHS*2 + DO 100 JCOL = 1, NRHS + DO 90 JROW = NRF, NRF + NR - 1 + J = J + 1 + RWORK( J ) = AIMAG( B( JROW, JCOL ) ) + 90 CONTINUE + 100 CONTINUE + CALL SGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU, + $ RWORK( 1+NR*NRHS*2 ), NR, ZERO, RWORK( 1+NR*NRHS ), + $ NR ) + JREAL = 0 + JIMAG = NR*NRHS + DO 120 JCOL = 1, NRHS + DO 110 JROW = NRF, NRF + NR - 1 + JREAL = JREAL + 1 + JIMAG = JIMAG + 1 + BX( JROW, JCOL ) = CMPLX( RWORK( JREAL ), + $ RWORK( JIMAG ) ) + 110 CONTINUE + 120 CONTINUE +* + 130 CONTINUE +* +* Next copy the rows of B that correspond to unchanged rows +* in the bidiagonal matrix to BX. +* + DO 140 I = 1, ND + IC = IWORK( INODE+I-1 ) + CALL CCOPY( NRHS, B( IC, 1 ), LDB, BX( IC, 1 ), LDBX ) + 140 CONTINUE +* +* Finally go through the left singular vector matrices of all +* the other subproblems bottom-up on the tree. +* + J = 2**NLVL + SQRE = 0 +* + DO 160 LVL = NLVL, 1, -1 + LVL2 = 2*LVL - 1 +* +* find the first node LF and last node LL on +* the current level LVL +* + IF( LVL.EQ.1 ) THEN + LF = 1 + LL = 1 + ELSE + LF = 2**( LVL-1 ) + LL = 2*LF - 1 + END IF + DO 150 I = LF, LL + IM1 = I - 1 + IC = IWORK( INODE+IM1 ) + NL = IWORK( NDIML+IM1 ) + NR = IWORK( NDIMR+IM1 ) + NLF = IC - NL + NRF = IC + 1 + J = J - 1 + CALL CLALS0( ICOMPQ, NL, NR, SQRE, NRHS, BX( NLF, 1 ), LDBX, + $ B( NLF, 1 ), LDB, PERM( NLF, LVL ), + $ GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL, + $ GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ), + $ DIFL( NLF, LVL ), DIFR( NLF, LVL2 ), + $ Z( NLF, LVL ), K( J ), C( J ), S( J ), RWORK, + $ INFO ) + 150 CONTINUE + 160 CONTINUE + GO TO 330 +* +* ICOMPQ = 1: applying back the right singular vector factors. +* + 170 CONTINUE +* +* First now go through the right singular vector matrices of all +* the tree nodes top-down. +* + J = 0 + DO 190 LVL = 1, NLVL + LVL2 = 2*LVL - 1 +* +* Find the first node LF and last node LL on +* the current level LVL. +* + IF( LVL.EQ.1 ) THEN + LF = 1 + LL = 1 + ELSE + LF = 2**( LVL-1 ) + LL = 2*LF - 1 + END IF + DO 180 I = LL, LF, -1 + IM1 = I - 1 + IC = IWORK( INODE+IM1 ) + NL = IWORK( NDIML+IM1 ) + NR = IWORK( NDIMR+IM1 ) + NLF = IC - NL + NRF = IC + 1 + IF( I.EQ.LL ) THEN + SQRE = 0 + ELSE + SQRE = 1 + END IF + J = J + 1 + CALL CLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B( NLF, 1 ), LDB, + $ BX( NLF, 1 ), LDBX, PERM( NLF, LVL ), + $ GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL, + $ GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ), + $ DIFL( NLF, LVL ), DIFR( NLF, LVL2 ), + $ Z( NLF, LVL ), K( J ), C( J ), S( J ), RWORK, + $ INFO ) + 180 CONTINUE + 190 CONTINUE +* +* The nodes on the bottom level of the tree were solved +* by SLASDQ. The corresponding right singular vector +* matrices are in explicit form. Apply them back. +* + NDB1 = ( ND+1 ) / 2 + DO 320 I = NDB1, ND + I1 = I - 1 + IC = IWORK( INODE+I1 ) + NL = IWORK( NDIML+I1 ) + NR = IWORK( NDIMR+I1 ) + NLP1 = NL + 1 + IF( I.EQ.ND ) THEN + NRP1 = NR + ELSE + NRP1 = NR + 1 + END IF + NLF = IC - NL + NRF = IC + 1 +* +* Since B and BX are complex, the following call to SGEMM is +* performed in two steps (real and imaginary parts). +* +* CALL SGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU, +* $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX ) +* + J = NLP1*NRHS*2 + DO 210 JCOL = 1, NRHS + DO 200 JROW = NLF, NLF + NLP1 - 1 + J = J + 1 + RWORK( J ) = REAL( B( JROW, JCOL ) ) + 200 CONTINUE + 210 CONTINUE + CALL SGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU, + $ RWORK( 1+NLP1*NRHS*2 ), NLP1, ZERO, RWORK( 1 ), + $ NLP1 ) + J = NLP1*NRHS*2 + DO 230 JCOL = 1, NRHS + DO 220 JROW = NLF, NLF + NLP1 - 1 + J = J + 1 + RWORK( J ) = AIMAG( B( JROW, JCOL ) ) + 220 CONTINUE + 230 CONTINUE + CALL SGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU, + $ RWORK( 1+NLP1*NRHS*2 ), NLP1, ZERO, + $ RWORK( 1+NLP1*NRHS ), NLP1 ) + JREAL = 0 + JIMAG = NLP1*NRHS + DO 250 JCOL = 1, NRHS + DO 240 JROW = NLF, NLF + NLP1 - 1 + JREAL = JREAL + 1 + JIMAG = JIMAG + 1 + BX( JROW, JCOL ) = CMPLX( RWORK( JREAL ), + $ RWORK( JIMAG ) ) + 240 CONTINUE + 250 CONTINUE +* +* Since B and BX are complex, the following call to SGEMM is +* performed in two steps (real and imaginary parts). +* +* CALL SGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU, +* $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX ) +* + J = NRP1*NRHS*2 + DO 270 JCOL = 1, NRHS + DO 260 JROW = NRF, NRF + NRP1 - 1 + J = J + 1 + RWORK( J ) = REAL( B( JROW, JCOL ) ) + 260 CONTINUE + 270 CONTINUE + CALL SGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU, + $ RWORK( 1+NRP1*NRHS*2 ), NRP1, ZERO, RWORK( 1 ), + $ NRP1 ) + J = NRP1*NRHS*2 + DO 290 JCOL = 1, NRHS + DO 280 JROW = NRF, NRF + NRP1 - 1 + J = J + 1 + RWORK( J ) = AIMAG( B( JROW, JCOL ) ) + 280 CONTINUE + 290 CONTINUE + CALL SGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU, + $ RWORK( 1+NRP1*NRHS*2 ), NRP1, ZERO, + $ RWORK( 1+NRP1*NRHS ), NRP1 ) + JREAL = 0 + JIMAG = NRP1*NRHS + DO 310 JCOL = 1, NRHS + DO 300 JROW = NRF, NRF + NRP1 - 1 + JREAL = JREAL + 1 + JIMAG = JIMAG + 1 + BX( JROW, JCOL ) = CMPLX( RWORK( JREAL ), + $ RWORK( JIMAG ) ) + 300 CONTINUE + 310 CONTINUE +* + 320 CONTINUE +* + 330 CONTINUE +* + RETURN +* +* End of CLALSA +* + END diff --git a/costa/native/external/lapack/clalsd.f b/costa/native/external/lapack/clalsd.f new file mode 100644 index 000000000..21896d84c --- /dev/null +++ b/costa/native/external/lapack/clalsd.f @@ -0,0 +1,597 @@ + SUBROUTINE CLALSD( UPLO, SMLSIZ, N, NRHS, D, E, B, LDB, RCOND, + $ RANK, WORK, RWORK, IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS, RANK, SMLSIZ + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL D( * ), E( * ), RWORK( * ) + COMPLEX B( LDB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CLALSD uses the singular value decomposition of A to solve the least +* squares problem of finding X to minimize the Euclidean norm of each +* column of A*X-B, where A is N-by-N upper bidiagonal, and X and B +* are N-by-NRHS. The solution X overwrites B. +* +* The singular values of A smaller than RCOND times the largest +* singular value are treated as zero in solving the least squares +* problem; in this case a minimum norm solution is returned. +* The actual singular values are returned in D in ascending order. +* +* This code makes very mild assumptions about floating point +* arithmetic. It will work on machines with a guard digit in +* add/subtract, or on those binary machines without guard digits +* which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. +* It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': D and E define an upper bidiagonal matrix. +* = 'L': D and E define a lower bidiagonal matrix. +* +* SMLSIZ (input) INTEGER +* The maximum size of the subproblems at the bottom of the +* computation tree. +* +* N (input) INTEGER +* The dimension of the bidiagonal matrix. N >= 0. +* +* NRHS (input) INTEGER +* The number of columns of B. NRHS must be at least 1. +* +* D (input/output) REAL array, dimension (N) +* On entry D contains the main diagonal of the bidiagonal +* matrix. On exit, if INFO = 0, D contains its singular values. +* +* E (input) REAL array, dimension (N-1) +* Contains the super-diagonal entries of the bidiagonal matrix. +* On exit, E has been destroyed. +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On input, B contains the right hand sides of the least +* squares problem. On output, B contains the solution X. +* +* LDB (input) INTEGER +* The leading dimension of B in the calling subprogram. +* LDB must be at least max(1,N). +* +* RCOND (input) REAL +* The singular values of A less than or equal to RCOND times +* the largest singular value are treated as zero in solving +* the least squares problem. If RCOND is negative, +* machine precision is used instead. +* For example, if diag(S)*X=B were the least squares problem, +* where diag(S) is a diagonal matrix of singular values, the +* solution would be X(i) = B(i) / S(i) if S(i) is greater than +* RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to +* RCOND*max(S). +* +* RANK (output) INTEGER +* The number of singular values of A greater than RCOND times +* the largest singular value. +* +* WORK (workspace) COMPLEX array, dimension at least +* (N * NRHS). +* +* RWORK (workspace) REAL array, dimension at least +* (9*N + 2*N*SMLSIZ + 8*N*NLVL + 3*SMLSIZ*NRHS + (SMLSIZ+1)**2), +* where +* NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 ) +* +* IWORK (workspace) INTEGER array, dimension at least +* (3*N*NLVL + 11*N). +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: The algorithm failed to compute an singular value while +* working on the submatrix lying in rows and columns +* INFO/(N+1) through MOD(INFO,N+1). +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Ren-Cang Li, Computer Science Division, University of +* California at Berkeley, USA +* Osni Marques, LBNL/NERSC, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0 ) + COMPLEX CZERO + PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + INTEGER BX, BXST, C, DIFL, DIFR, GIVCOL, GIVNUM, + $ GIVPTR, I, ICMPQ1, ICMPQ2, IRWB, IRWIB, IRWRB, + $ IRWU, IRWVT, IRWWRK, IWK, J, JCOL, JIMAG, + $ JREAL, JROW, K, NLVL, NM1, NRWORK, NSIZE, NSUB, + $ PERM, POLES, S, SIZEI, SMLSZP, SQRE, ST, ST1, + $ U, VT, Z + REAL CS, EPS, ORGNRM, R, SN, TOL +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SLAMCH, SLANST + EXTERNAL ISAMAX, SLAMCH, SLANST +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CLACPY, CLALSA, CLASCL, CLASET, CSROT, + $ SGEMM, SLARTG, SLASCL, SLASDA, SLASDQ, SLASET, + $ SLASRT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, INT, LOG, REAL, SIGN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.1 ) THEN + INFO = -4 + ELSE IF( ( LDB.LT.1 ) .OR. ( LDB.LT.N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLALSD', -INFO ) + RETURN + END IF +* + EPS = SLAMCH( 'Epsilon' ) +* +* Set up the tolerance. +* + IF( ( RCOND.LE.ZERO ) .OR. ( RCOND.GE.ONE ) ) THEN + RCOND = EPS + END IF +* + RANK = 0 +* +* Quick return if possible. +* + IF( N.EQ.0 ) THEN + RETURN + ELSE IF( N.EQ.1 ) THEN + IF( D( 1 ).EQ.ZERO ) THEN + CALL CLASET( 'A', 1, NRHS, CZERO, CZERO, B, LDB ) + ELSE + RANK = 1 + CALL CLASCL( 'G', 0, 0, D( 1 ), ONE, 1, NRHS, B, LDB, INFO ) + D( 1 ) = ABS( D( 1 ) ) + END IF + RETURN + END IF +* +* Rotate the matrix if it is lower bidiagonal. +* + IF( UPLO.EQ.'L' ) THEN + DO 10 I = 1, N - 1 + CALL SLARTG( D( I ), E( I ), CS, SN, R ) + D( I ) = R + E( I ) = SN*D( I+1 ) + D( I+1 ) = CS*D( I+1 ) + IF( NRHS.EQ.1 ) THEN + CALL CSROT( 1, B( I, 1 ), 1, B( I+1, 1 ), 1, CS, SN ) + ELSE + RWORK( I*2-1 ) = CS + RWORK( I*2 ) = SN + END IF + 10 CONTINUE + IF( NRHS.GT.1 ) THEN + DO 30 I = 1, NRHS + DO 20 J = 1, N - 1 + CS = RWORK( J*2-1 ) + SN = RWORK( J*2 ) + CALL CSROT( 1, B( J, I ), 1, B( J+1, I ), 1, CS, SN ) + 20 CONTINUE + 30 CONTINUE + END IF + END IF +* +* Scale. +* + NM1 = N - 1 + ORGNRM = SLANST( 'M', N, D, E ) + IF( ORGNRM.EQ.ZERO ) THEN + CALL CLASET( 'A', N, NRHS, CZERO, CZERO, B, LDB ) + RETURN + END IF +* + CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, INFO ) + CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, NM1, 1, E, NM1, INFO ) +* +* If N is smaller than the minimum divide size SMLSIZ, then solve +* the problem with another solver. +* + IF( N.LE.SMLSIZ ) THEN + IRWU = 1 + IRWVT = IRWU + N*N + IRWWRK = IRWVT + N*N + IRWRB = IRWWRK + IRWIB = IRWRB + N*NRHS + IRWB = IRWIB + N*NRHS + CALL SLASET( 'A', N, N, ZERO, ONE, RWORK( IRWU ), N ) + CALL SLASET( 'A', N, N, ZERO, ONE, RWORK( IRWVT ), N ) + CALL SLASDQ( 'U', 0, N, N, N, 0, D, E, RWORK( IRWVT ), N, + $ RWORK( IRWU ), N, RWORK( IRWWRK ), 1, + $ RWORK( IRWWRK ), INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF +* +* In the real version, B is passed to SLASDQ and multiplied +* internally by Q'. Here B is complex and that product is +* computed below in two steps (real and imaginary parts). +* + J = IRWB - 1 + DO 50 JCOL = 1, NRHS + DO 40 JROW = 1, N + J = J + 1 + RWORK( J ) = REAL( B( JROW, JCOL ) ) + 40 CONTINUE + 50 CONTINUE + CALL SGEMM( 'T', 'N', N, NRHS, N, ONE, RWORK( IRWU ), N, + $ RWORK( IRWB ), N, ZERO, RWORK( IRWRB ), N ) + J = IRWB - 1 + DO 70 JCOL = 1, NRHS + DO 60 JROW = 1, N + J = J + 1 + RWORK( J ) = AIMAG( B( JROW, JCOL ) ) + 60 CONTINUE + 70 CONTINUE + CALL SGEMM( 'T', 'N', N, NRHS, N, ONE, RWORK( IRWU ), N, + $ RWORK( IRWB ), N, ZERO, RWORK( IRWIB ), N ) + JREAL = IRWRB - 1 + JIMAG = IRWIB - 1 + DO 90 JCOL = 1, NRHS + DO 80 JROW = 1, N + JREAL = JREAL + 1 + JIMAG = JIMAG + 1 + B( JROW, JCOL ) = CMPLX( RWORK( JREAL ), RWORK( JIMAG ) ) + 80 CONTINUE + 90 CONTINUE +* + TOL = RCOND*ABS( D( ISAMAX( N, D, 1 ) ) ) + DO 100 I = 1, N + IF( D( I ).LE.TOL ) THEN + CALL CLASET( 'A', 1, NRHS, CZERO, CZERO, B( I, 1 ), LDB ) + ELSE + CALL CLASCL( 'G', 0, 0, D( I ), ONE, 1, NRHS, B( I, 1 ), + $ LDB, INFO ) + RANK = RANK + 1 + END IF + 100 CONTINUE +* +* Since B is complex, the following call to SGEMM is performed +* in two steps (real and imaginary parts). That is for V * B +* (in the real version of the code V' is stored in WORK). +* +* CALL SGEMM( 'T', 'N', N, NRHS, N, ONE, WORK, N, B, LDB, ZERO, +* $ WORK( NWORK ), N ) +* + J = IRWB - 1 + DO 120 JCOL = 1, NRHS + DO 110 JROW = 1, N + J = J + 1 + RWORK( J ) = REAL( B( JROW, JCOL ) ) + 110 CONTINUE + 120 CONTINUE + CALL SGEMM( 'T', 'N', N, NRHS, N, ONE, RWORK( IRWVT ), N, + $ RWORK( IRWB ), N, ZERO, RWORK( IRWRB ), N ) + J = IRWB - 1 + DO 140 JCOL = 1, NRHS + DO 130 JROW = 1, N + J = J + 1 + RWORK( J ) = AIMAG( B( JROW, JCOL ) ) + 130 CONTINUE + 140 CONTINUE + CALL SGEMM( 'T', 'N', N, NRHS, N, ONE, RWORK( IRWVT ), N, + $ RWORK( IRWB ), N, ZERO, RWORK( IRWIB ), N ) + JREAL = IRWRB - 1 + JIMAG = IRWIB - 1 + DO 160 JCOL = 1, NRHS + DO 150 JROW = 1, N + JREAL = JREAL + 1 + JIMAG = JIMAG + 1 + B( JROW, JCOL ) = CMPLX( RWORK( JREAL ), RWORK( JIMAG ) ) + 150 CONTINUE + 160 CONTINUE +* +* Unscale. +* + CALL SLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO ) + CALL SLASRT( 'D', N, D, INFO ) + CALL CLASCL( 'G', 0, 0, ORGNRM, ONE, N, NRHS, B, LDB, INFO ) +* + RETURN + END IF +* +* Book-keeping and setting up some constants. +* + NLVL = INT( LOG( REAL( N ) / REAL( SMLSIZ+1 ) ) / LOG( TWO ) ) + 1 +* + SMLSZP = SMLSIZ + 1 +* + U = 1 + VT = 1 + SMLSIZ*N + DIFL = VT + SMLSZP*N + DIFR = DIFL + NLVL*N + Z = DIFR + NLVL*N*2 + C = Z + NLVL*N + S = C + N + POLES = S + N + GIVNUM = POLES + 2*NLVL*N + NRWORK = GIVNUM + 2*NLVL*N + BX = 1 +* + IRWRB = NRWORK + IRWIB = IRWRB + SMLSIZ*NRHS + IRWB = IRWIB + SMLSIZ*NRHS +* + SIZEI = 1 + N + K = SIZEI + N + GIVPTR = K + N + PERM = GIVPTR + N + GIVCOL = PERM + NLVL*N + IWK = GIVCOL + NLVL*N*2 +* + ST = 1 + SQRE = 0 + ICMPQ1 = 1 + ICMPQ2 = 0 + NSUB = 0 +* + DO 170 I = 1, N + IF( ABS( D( I ) ).LT.EPS ) THEN + D( I ) = SIGN( EPS, D( I ) ) + END IF + 170 CONTINUE +* + DO 240 I = 1, NM1 + IF( ( ABS( E( I ) ).LT.EPS ) .OR. ( I.EQ.NM1 ) ) THEN + NSUB = NSUB + 1 + IWORK( NSUB ) = ST +* +* Subproblem found. First determine its size and then +* apply divide and conquer on it. +* + IF( I.LT.NM1 ) THEN +* +* A subproblem with E(I) small for I < NM1. +* + NSIZE = I - ST + 1 + IWORK( SIZEI+NSUB-1 ) = NSIZE + ELSE IF( ABS( E( I ) ).GE.EPS ) THEN +* +* A subproblem with E(NM1) not too small but I = NM1. +* + NSIZE = N - ST + 1 + IWORK( SIZEI+NSUB-1 ) = NSIZE + ELSE +* +* A subproblem with E(NM1) small. This implies an +* 1-by-1 subproblem at D(N), which is not solved +* explicitly. +* + NSIZE = I - ST + 1 + IWORK( SIZEI+NSUB-1 ) = NSIZE + NSUB = NSUB + 1 + IWORK( NSUB ) = N + IWORK( SIZEI+NSUB-1 ) = 1 + CALL CCOPY( NRHS, B( N, 1 ), LDB, WORK( BX+NM1 ), N ) + END IF + ST1 = ST - 1 + IF( NSIZE.EQ.1 ) THEN +* +* This is a 1-by-1 subproblem and is not solved +* explicitly. +* + CALL CCOPY( NRHS, B( ST, 1 ), LDB, WORK( BX+ST1 ), N ) + ELSE IF( NSIZE.LE.SMLSIZ ) THEN +* +* This is a small subproblem and is solved by SLASDQ. +* + CALL SLASET( 'A', NSIZE, NSIZE, ZERO, ONE, + $ RWORK( VT+ST1 ), N ) + CALL SLASET( 'A', NSIZE, NSIZE, ZERO, ONE, + $ RWORK( U+ST1 ), N ) + CALL SLASDQ( 'U', 0, NSIZE, NSIZE, NSIZE, 0, D( ST ), + $ E( ST ), RWORK( VT+ST1 ), N, RWORK( U+ST1 ), + $ N, RWORK( NRWORK ), 1, RWORK( NRWORK ), + $ INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF +* +* In the real version, B is passed to SLASDQ and multiplied +* internally by Q'. Here B is complex and that product is +* computed below in two steps (real and imaginary parts). +* + J = IRWB - 1 + DO 190 JCOL = 1, NRHS + DO 180 JROW = ST, ST + NSIZE - 1 + J = J + 1 + RWORK( J ) = REAL( B( JROW, JCOL ) ) + 180 CONTINUE + 190 CONTINUE + CALL SGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE, + $ RWORK( U+ST1 ), N, RWORK( IRWB ), NSIZE, + $ ZERO, RWORK( IRWRB ), NSIZE ) + J = IRWB - 1 + DO 210 JCOL = 1, NRHS + DO 200 JROW = ST, ST + NSIZE - 1 + J = J + 1 + RWORK( J ) = AIMAG( B( JROW, JCOL ) ) + 200 CONTINUE + 210 CONTINUE + CALL SGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE, + $ RWORK( U+ST1 ), N, RWORK( IRWB ), NSIZE, + $ ZERO, RWORK( IRWIB ), NSIZE ) + JREAL = IRWRB - 1 + JIMAG = IRWIB - 1 + DO 230 JCOL = 1, NRHS + DO 220 JROW = ST, ST + NSIZE - 1 + JREAL = JREAL + 1 + JIMAG = JIMAG + 1 + B( JROW, JCOL ) = CMPLX( RWORK( JREAL ), + $ RWORK( JIMAG ) ) + 220 CONTINUE + 230 CONTINUE +* + CALL CLACPY( 'A', NSIZE, NRHS, B( ST, 1 ), LDB, + $ WORK( BX+ST1 ), N ) + ELSE +* +* A large problem. Solve it using divide and conquer. +* + CALL SLASDA( ICMPQ1, SMLSIZ, NSIZE, SQRE, D( ST ), + $ E( ST ), RWORK( U+ST1 ), N, RWORK( VT+ST1 ), + $ IWORK( K+ST1 ), RWORK( DIFL+ST1 ), + $ RWORK( DIFR+ST1 ), RWORK( Z+ST1 ), + $ RWORK( POLES+ST1 ), IWORK( GIVPTR+ST1 ), + $ IWORK( GIVCOL+ST1 ), N, IWORK( PERM+ST1 ), + $ RWORK( GIVNUM+ST1 ), RWORK( C+ST1 ), + $ RWORK( S+ST1 ), RWORK( NRWORK ), + $ IWORK( IWK ), INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + BXST = BX + ST1 + CALL CLALSA( ICMPQ2, SMLSIZ, NSIZE, NRHS, B( ST, 1 ), + $ LDB, WORK( BXST ), N, RWORK( U+ST1 ), N, + $ RWORK( VT+ST1 ), IWORK( K+ST1 ), + $ RWORK( DIFL+ST1 ), RWORK( DIFR+ST1 ), + $ RWORK( Z+ST1 ), RWORK( POLES+ST1 ), + $ IWORK( GIVPTR+ST1 ), IWORK( GIVCOL+ST1 ), N, + $ IWORK( PERM+ST1 ), RWORK( GIVNUM+ST1 ), + $ RWORK( C+ST1 ), RWORK( S+ST1 ), + $ RWORK( NRWORK ), IWORK( IWK ), INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + END IF + ST = I + 1 + END IF + 240 CONTINUE +* +* Apply the singular values and treat the tiny ones as zero. +* + TOL = RCOND*ABS( D( ISAMAX( N, D, 1 ) ) ) +* + DO 250 I = 1, N +* +* Some of the elements in D can be negative because 1-by-1 +* subproblems were not solved explicitly. +* + IF( ABS( D( I ) ).LE.TOL ) THEN + CALL CLASET( 'A', 1, NRHS, CZERO, CZERO, WORK( BX+I-1 ), N ) + ELSE + RANK = RANK + 1 + CALL CLASCL( 'G', 0, 0, D( I ), ONE, 1, NRHS, + $ WORK( BX+I-1 ), N, INFO ) + END IF + D( I ) = ABS( D( I ) ) + 250 CONTINUE +* +* Now apply back the right singular vectors. +* + ICMPQ2 = 1 + DO 320 I = 1, NSUB + ST = IWORK( I ) + ST1 = ST - 1 + NSIZE = IWORK( SIZEI+I-1 ) + BXST = BX + ST1 + IF( NSIZE.EQ.1 ) THEN + CALL CCOPY( NRHS, WORK( BXST ), N, B( ST, 1 ), LDB ) + ELSE IF( NSIZE.LE.SMLSIZ ) THEN +* +* Since B and BX are complex, the following call to SGEMM +* is performed in two steps (real and imaginary parts). +* +* CALL SGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE, +* $ RWORK( VT+ST1 ), N, RWORK( BXST ), N, ZERO, +* $ B( ST, 1 ), LDB ) +* + J = BXST - N - 1 + JREAL = IRWB - 1 + DO 270 JCOL = 1, NRHS + J = J + N + DO 260 JROW = 1, NSIZE + JREAL = JREAL + 1 + RWORK( JREAL ) = REAL( WORK( J+JROW ) ) + 260 CONTINUE + 270 CONTINUE + CALL SGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE, + $ RWORK( VT+ST1 ), N, RWORK( IRWB ), NSIZE, ZERO, + $ RWORK( IRWRB ), NSIZE ) + J = BXST - N - 1 + JIMAG = IRWB - 1 + DO 290 JCOL = 1, NRHS + J = J + N + DO 280 JROW = 1, NSIZE + JIMAG = JIMAG + 1 + RWORK( JIMAG ) = AIMAG( WORK( J+JROW ) ) + 280 CONTINUE + 290 CONTINUE + CALL SGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE, + $ RWORK( VT+ST1 ), N, RWORK( IRWB ), NSIZE, ZERO, + $ RWORK( IRWIB ), NSIZE ) + JREAL = IRWRB - 1 + JIMAG = IRWIB - 1 + DO 310 JCOL = 1, NRHS + DO 300 JROW = ST, ST + NSIZE - 1 + JREAL = JREAL + 1 + JIMAG = JIMAG + 1 + B( JROW, JCOL ) = CMPLX( RWORK( JREAL ), + $ RWORK( JIMAG ) ) + 300 CONTINUE + 310 CONTINUE + ELSE + CALL CLALSA( ICMPQ2, SMLSIZ, NSIZE, NRHS, WORK( BXST ), N, + $ B( ST, 1 ), LDB, RWORK( U+ST1 ), N, + $ RWORK( VT+ST1 ), IWORK( K+ST1 ), + $ RWORK( DIFL+ST1 ), RWORK( DIFR+ST1 ), + $ RWORK( Z+ST1 ), RWORK( POLES+ST1 ), + $ IWORK( GIVPTR+ST1 ), IWORK( GIVCOL+ST1 ), N, + $ IWORK( PERM+ST1 ), RWORK( GIVNUM+ST1 ), + $ RWORK( C+ST1 ), RWORK( S+ST1 ), + $ RWORK( NRWORK ), IWORK( IWK ), INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + END IF + 320 CONTINUE +* +* Unscale and sort the singular values. +* + CALL SLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO ) + CALL SLASRT( 'D', N, D, INFO ) + CALL CLASCL( 'G', 0, 0, ORGNRM, ONE, N, NRHS, B, LDB, INFO ) +* + RETURN +* +* End of CLALSD +* + END diff --git a/costa/native/external/lapack/clangb.f b/costa/native/external/lapack/clangb.f new file mode 100644 index 000000000..3288d307a --- /dev/null +++ b/costa/native/external/lapack/clangb.f @@ -0,0 +1,155 @@ + REAL FUNCTION CLANGB( NORM, N, KL, KU, AB, LDAB, + $ WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER KL, KU, LDAB, N +* .. +* .. Array Arguments .. + REAL WORK( * ) + COMPLEX AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* CLANGB returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of an +* n by n band matrix A, with kl sub-diagonals and ku super-diagonals. +* +* Description +* =========== +* +* CLANGB returns the value +* +* CLANGB = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in CLANGB as described +* above. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, CLANGB is +* set to zero. +* +* KL (input) INTEGER +* The number of sub-diagonals of the matrix A. KL >= 0. +* +* KU (input) INTEGER +* The number of super-diagonals of the matrix A. KU >= 0. +* +* AB (input) COMPLEX array, dimension (LDAB,N) +* The band matrix A, stored in rows 1 to KL+KU+1. The j-th +* column of A is stored in the j-th column of the array AB as +* follows: +* AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KL+KU+1. +* +* WORK (workspace) REAL array, dimension (LWORK), +* where LWORK >= N when NORM = 'I'; otherwise, WORK is not +* referenced. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, K, L + REAL SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + DO 20 J = 1, N + DO 10 I = MAX( KU+2-J, 1 ), MIN( N+KU+1-J, KL+KU+1 ) + VALUE = MAX( VALUE, ABS( AB( I, J ) ) ) + 10 CONTINUE + 20 CONTINUE + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + DO 40 J = 1, N + SUM = ZERO + DO 30 I = MAX( KU+2-J, 1 ), MIN( N+KU+1-J, KL+KU+1 ) + SUM = SUM + ABS( AB( I, J ) ) + 30 CONTINUE + VALUE = MAX( VALUE, SUM ) + 40 CONTINUE + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + DO 50 I = 1, N + WORK( I ) = ZERO + 50 CONTINUE + DO 70 J = 1, N + K = KU + 1 - J + DO 60 I = MAX( 1, J-KU ), MIN( N, J+KL ) + WORK( I ) = WORK( I ) + ABS( AB( K+I, J ) ) + 60 CONTINUE + 70 CONTINUE + VALUE = ZERO + DO 80 I = 1, N + VALUE = MAX( VALUE, WORK( I ) ) + 80 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + DO 90 J = 1, N + L = MAX( 1, J-KU ) + K = KU + 1 - J + L + CALL CLASSQ( MIN( N, J+KL )-L+1, AB( K, J ), 1, SCALE, SUM ) + 90 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + CLANGB = VALUE + RETURN +* +* End of CLANGB +* + END diff --git a/costa/native/external/lapack/clange.f b/costa/native/external/lapack/clange.f new file mode 100644 index 000000000..99eb75cd6 --- /dev/null +++ b/costa/native/external/lapack/clange.f @@ -0,0 +1,146 @@ + REAL FUNCTION CLANGE( NORM, M, N, A, LDA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER LDA, M, N +* .. +* .. Array Arguments .. + REAL WORK( * ) + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CLANGE returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* complex matrix A. +* +* Description +* =========== +* +* CLANGE returns the value +* +* CLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in CLANGE as described +* above. +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. When M = 0, +* CLANGE is set to zero. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. When N = 0, +* CLANGE is set to zero. +* +* A (input) COMPLEX array, dimension (LDA,N) +* The m by n matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(M,1). +* +* WORK (workspace) REAL array, dimension (LWORK), +* where LWORK >= M when NORM = 'I'; otherwise, WORK is not +* referenced. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( MIN( M, N ).EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + DO 20 J = 1, N + DO 10 I = 1, M + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 10 CONTINUE + 20 CONTINUE + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + DO 40 J = 1, N + SUM = ZERO + DO 30 I = 1, M + SUM = SUM + ABS( A( I, J ) ) + 30 CONTINUE + VALUE = MAX( VALUE, SUM ) + 40 CONTINUE + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + DO 50 I = 1, M + WORK( I ) = ZERO + 50 CONTINUE + DO 70 J = 1, N + DO 60 I = 1, M + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 60 CONTINUE + 70 CONTINUE + VALUE = ZERO + DO 80 I = 1, M + VALUE = MAX( VALUE, WORK( I ) ) + 80 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + DO 90 J = 1, N + CALL CLASSQ( M, A( 1, J ), 1, SCALE, SUM ) + 90 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + CLANGE = VALUE + RETURN +* +* End of CLANGE +* + END diff --git a/costa/native/external/lapack/clangt.f b/costa/native/external/lapack/clangt.f new file mode 100644 index 000000000..b147fdda1 --- /dev/null +++ b/costa/native/external/lapack/clangt.f @@ -0,0 +1,142 @@ + REAL FUNCTION CLANGT( NORM, N, DL, D, DU ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER N +* .. +* .. Array Arguments .. + COMPLEX D( * ), DL( * ), DU( * ) +* .. +* +* Purpose +* ======= +* +* CLANGT returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* complex tridiagonal matrix A. +* +* Description +* =========== +* +* CLANGT returns the value +* +* CLANGT = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in CLANGT as described +* above. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, CLANGT is +* set to zero. +* +* DL (input) COMPLEX array, dimension (N-1) +* The (n-1) sub-diagonal elements of A. +* +* D (input) COMPLEX array, dimension (N) +* The diagonal elements of A. +* +* DU (input) COMPLEX array, dimension (N-1) +* The (n-1) super-diagonal elements of A. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I + REAL ANORM, SCALE, SUM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) THEN + ANORM = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + ANORM = ABS( D( N ) ) + DO 10 I = 1, N - 1 + ANORM = MAX( ANORM, ABS( DL( I ) ) ) + ANORM = MAX( ANORM, ABS( D( I ) ) ) + ANORM = MAX( ANORM, ABS( DU( I ) ) ) + 10 CONTINUE + ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' ) THEN +* +* Find norm1(A). +* + IF( N.EQ.1 ) THEN + ANORM = ABS( D( 1 ) ) + ELSE + ANORM = MAX( ABS( D( 1 ) )+ABS( DL( 1 ) ), + $ ABS( D( N ) )+ABS( DU( N-1 ) ) ) + DO 20 I = 2, N - 1 + ANORM = MAX( ANORM, ABS( D( I ) )+ABS( DL( I ) )+ + $ ABS( DU( I-1 ) ) ) + 20 CONTINUE + END IF + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + IF( N.EQ.1 ) THEN + ANORM = ABS( D( 1 ) ) + ELSE + ANORM = MAX( ABS( D( 1 ) )+ABS( DU( 1 ) ), + $ ABS( D( N ) )+ABS( DL( N-1 ) ) ) + DO 30 I = 2, N - 1 + ANORM = MAX( ANORM, ABS( D( I ) )+ABS( DU( I ) )+ + $ ABS( DL( I-1 ) ) ) + 30 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + CALL CLASSQ( N, D, 1, SCALE, SUM ) + IF( N.GT.1 ) THEN + CALL CLASSQ( N-1, DL, 1, SCALE, SUM ) + CALL CLASSQ( N-1, DU, 1, SCALE, SUM ) + END IF + ANORM = SCALE*SQRT( SUM ) + END IF +* + CLANGT = ANORM + RETURN +* +* End of CLANGT +* + END diff --git a/costa/native/external/lapack/clanhb.f b/costa/native/external/lapack/clanhb.f new file mode 100644 index 000000000..48946c606 --- /dev/null +++ b/costa/native/external/lapack/clanhb.f @@ -0,0 +1,202 @@ + REAL FUNCTION CLANHB( NORM, UPLO, N, K, AB, LDAB, + $ WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM, UPLO + INTEGER K, LDAB, N +* .. +* .. Array Arguments .. + REAL WORK( * ) + COMPLEX AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* CLANHB returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of an +* n by n hermitian band matrix A, with k super-diagonals. +* +* Description +* =========== +* +* CLANHB returns the value +* +* CLANHB = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in CLANHB as described +* above. +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* band matrix A is supplied. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, CLANHB is +* set to zero. +* +* K (input) INTEGER +* The number of super-diagonals or sub-diagonals of the +* band matrix A. K >= 0. +* +* AB (input) COMPLEX array, dimension (LDAB,N) +* The upper or lower triangle of the hermitian band matrix A, +* stored in the first K+1 rows of AB. The j-th column of A is +* stored in the j-th column of the array AB as follows: +* if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+k). +* Note that the imaginary parts of the diagonal elements need +* not be set and are assumed to be zero. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= K+1. +* +* WORK (workspace) REAL array, dimension (LWORK), +* where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +* WORK is not referenced. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, L + REAL ABSA, SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, REAL, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = MAX( K+2-J, 1 ), K + VALUE = MAX( VALUE, ABS( AB( I, J ) ) ) + 10 CONTINUE + VALUE = MAX( VALUE, ABS( REAL( AB( K+1, J ) ) ) ) + 20 CONTINUE + ELSE + DO 40 J = 1, N + VALUE = MAX( VALUE, ABS( REAL( AB( 1, J ) ) ) ) + DO 30 I = 2, MIN( N+1-J, K+1 ) + VALUE = MAX( VALUE, ABS( AB( I, J ) ) ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is hermitian). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + SUM = ZERO + L = K + 1 - J + DO 50 I = MAX( 1, J-K ), J - 1 + ABSA = ABS( AB( L+I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 50 CONTINUE + WORK( J ) = SUM + ABS( REAL( AB( K+1, J ) ) ) + 60 CONTINUE + DO 70 I = 1, N + VALUE = MAX( VALUE, WORK( I ) ) + 70 CONTINUE + ELSE + DO 80 I = 1, N + WORK( I ) = ZERO + 80 CONTINUE + DO 100 J = 1, N + SUM = WORK( J ) + ABS( REAL( AB( 1, J ) ) ) + L = 1 - J + DO 90 I = J + 1, MIN( N, J+K ) + ABSA = ABS( AB( L+I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 90 CONTINUE + VALUE = MAX( VALUE, SUM ) + 100 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + IF( K.GT.0 ) THEN + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 2, N + CALL CLASSQ( MIN( J-1, K ), AB( MAX( K+2-J, 1 ), J ), + $ 1, SCALE, SUM ) + 110 CONTINUE + L = K + 1 + ELSE + DO 120 J = 1, N - 1 + CALL CLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE, + $ SUM ) + 120 CONTINUE + L = 1 + END IF + SUM = 2*SUM + ELSE + L = 1 + END IF + DO 130 J = 1, N + IF( REAL( AB( L, J ) ).NE.ZERO ) THEN + ABSA = ABS( REAL( AB( L, J ) ) ) + IF( SCALE.LT.ABSA ) THEN + SUM = ONE + SUM*( SCALE / ABSA )**2 + SCALE = ABSA + ELSE + SUM = SUM + ( ABSA / SCALE )**2 + END IF + END IF + 130 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + CLANHB = VALUE + RETURN +* +* End of CLANHB +* + END diff --git a/costa/native/external/lapack/clanhe.f b/costa/native/external/lapack/clanhe.f new file mode 100644 index 000000000..49de6e746 --- /dev/null +++ b/costa/native/external/lapack/clanhe.f @@ -0,0 +1,188 @@ + REAL FUNCTION CLANHE( NORM, UPLO, N, A, LDA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM, UPLO + INTEGER LDA, N +* .. +* .. Array Arguments .. + REAL WORK( * ) + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CLANHE returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* complex hermitian matrix A. +* +* Description +* =========== +* +* CLANHE returns the value +* +* CLANHE = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in CLANHE as described +* above. +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* hermitian matrix A is to be referenced. +* = 'U': Upper triangular part of A is referenced +* = 'L': Lower triangular part of A is referenced +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, CLANHE is +* set to zero. +* +* A (input) COMPLEX array, dimension (LDA,N) +* The hermitian matrix A. If UPLO = 'U', the leading n by n +* upper triangular part of A contains the upper triangular part +* of the matrix A, and the strictly lower triangular part of A +* is not referenced. If UPLO = 'L', the leading n by n lower +* triangular part of A contains the lower triangular part of +* the matrix A, and the strictly upper triangular part of A is +* not referenced. Note that the imaginary parts of the diagonal +* elements need not be set and are assumed to be zero. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(N,1). +* +* WORK (workspace) REAL array, dimension (LWORK), +* where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +* WORK is not referenced. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL ABSA, SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, REAL, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, J - 1 + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 10 CONTINUE + VALUE = MAX( VALUE, ABS( REAL( A( J, J ) ) ) ) + 20 CONTINUE + ELSE + DO 40 J = 1, N + VALUE = MAX( VALUE, ABS( REAL( A( J, J ) ) ) ) + DO 30 I = J + 1, N + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is hermitian). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + SUM = ZERO + DO 50 I = 1, J - 1 + ABSA = ABS( A( I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 50 CONTINUE + WORK( J ) = SUM + ABS( REAL( A( J, J ) ) ) + 60 CONTINUE + DO 70 I = 1, N + VALUE = MAX( VALUE, WORK( I ) ) + 70 CONTINUE + ELSE + DO 80 I = 1, N + WORK( I ) = ZERO + 80 CONTINUE + DO 100 J = 1, N + SUM = WORK( J ) + ABS( REAL( A( J, J ) ) ) + DO 90 I = J + 1, N + ABSA = ABS( A( I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 90 CONTINUE + VALUE = MAX( VALUE, SUM ) + 100 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 2, N + CALL CLASSQ( J-1, A( 1, J ), 1, SCALE, SUM ) + 110 CONTINUE + ELSE + DO 120 J = 1, N - 1 + CALL CLASSQ( N-J, A( J+1, J ), 1, SCALE, SUM ) + 120 CONTINUE + END IF + SUM = 2*SUM + DO 130 I = 1, N + IF( REAL( A( I, I ) ).NE.ZERO ) THEN + ABSA = ABS( REAL( A( I, I ) ) ) + IF( SCALE.LT.ABSA ) THEN + SUM = ONE + SUM*( SCALE / ABSA )**2 + SCALE = ABSA + ELSE + SUM = SUM + ( ABSA / SCALE )**2 + END IF + END IF + 130 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + CLANHE = VALUE + RETURN +* +* End of CLANHE +* + END diff --git a/costa/native/external/lapack/clanhp.f b/costa/native/external/lapack/clanhp.f new file mode 100644 index 000000000..666821d14 --- /dev/null +++ b/costa/native/external/lapack/clanhp.f @@ -0,0 +1,202 @@ + REAL FUNCTION CLANHP( NORM, UPLO, N, AP, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM, UPLO + INTEGER N +* .. +* .. Array Arguments .. + REAL WORK( * ) + COMPLEX AP( * ) +* .. +* +* Purpose +* ======= +* +* CLANHP returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* complex hermitian matrix A, supplied in packed form. +* +* Description +* =========== +* +* CLANHP returns the value +* +* CLANHP = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in CLANHP as described +* above. +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* hermitian matrix A is supplied. +* = 'U': Upper triangular part of A is supplied +* = 'L': Lower triangular part of A is supplied +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, CLANHP is +* set to zero. +* +* AP (input) COMPLEX array, dimension (N*(N+1)/2) +* The upper or lower triangle of the hermitian matrix A, packed +* columnwise in a linear array. The j-th column of A is stored +* in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* Note that the imaginary parts of the diagonal elements need +* not be set and are assumed to be zero. +* +* WORK (workspace) REAL array, dimension (LWORK), +* where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +* WORK is not referenced. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, K + REAL ABSA, SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, REAL, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + K = 0 + DO 20 J = 1, N + DO 10 I = K + 1, K + J - 1 + VALUE = MAX( VALUE, ABS( AP( I ) ) ) + 10 CONTINUE + K = K + J + VALUE = MAX( VALUE, ABS( REAL( AP( K ) ) ) ) + 20 CONTINUE + ELSE + K = 1 + DO 40 J = 1, N + VALUE = MAX( VALUE, ABS( REAL( AP( K ) ) ) ) + DO 30 I = K + 1, K + N - J + VALUE = MAX( VALUE, ABS( AP( I ) ) ) + 30 CONTINUE + K = K + N - J + 1 + 40 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is hermitian). +* + VALUE = ZERO + K = 1 + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + SUM = ZERO + DO 50 I = 1, J - 1 + ABSA = ABS( AP( K ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + K = K + 1 + 50 CONTINUE + WORK( J ) = SUM + ABS( REAL( AP( K ) ) ) + K = K + 1 + 60 CONTINUE + DO 70 I = 1, N + VALUE = MAX( VALUE, WORK( I ) ) + 70 CONTINUE + ELSE + DO 80 I = 1, N + WORK( I ) = ZERO + 80 CONTINUE + DO 100 J = 1, N + SUM = WORK( J ) + ABS( REAL( AP( K ) ) ) + K = K + 1 + DO 90 I = J + 1, N + ABSA = ABS( AP( K ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + K = K + 1 + 90 CONTINUE + VALUE = MAX( VALUE, SUM ) + 100 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + K = 2 + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 2, N + CALL CLASSQ( J-1, AP( K ), 1, SCALE, SUM ) + K = K + J + 110 CONTINUE + ELSE + DO 120 J = 1, N - 1 + CALL CLASSQ( N-J, AP( K ), 1, SCALE, SUM ) + K = K + N - J + 1 + 120 CONTINUE + END IF + SUM = 2*SUM + K = 1 + DO 130 I = 1, N + IF( REAL( AP( K ) ).NE.ZERO ) THEN + ABSA = ABS( REAL( AP( K ) ) ) + IF( SCALE.LT.ABSA ) THEN + SUM = ONE + SUM*( SCALE / ABSA )**2 + SCALE = ABSA + ELSE + SUM = SUM + ( ABSA / SCALE )**2 + END IF + END IF + IF( LSAME( UPLO, 'U' ) ) THEN + K = K + I + 1 + ELSE + K = K + N - I + 1 + END IF + 130 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + CLANHP = VALUE + RETURN +* +* End of CLANHP +* + END diff --git a/costa/native/external/lapack/clanhs.f b/costa/native/external/lapack/clanhs.f new file mode 100644 index 000000000..98d1062e7 --- /dev/null +++ b/costa/native/external/lapack/clanhs.f @@ -0,0 +1,143 @@ + REAL FUNCTION CLANHS( NORM, N, A, LDA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER LDA, N +* .. +* .. Array Arguments .. + REAL WORK( * ) + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CLANHS returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* Hessenberg matrix A. +* +* Description +* =========== +* +* CLANHS returns the value +* +* CLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in CLANHS as described +* above. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, CLANHS is +* set to zero. +* +* A (input) COMPLEX array, dimension (LDA,N) +* The n by n upper Hessenberg matrix A; the part of A below the +* first sub-diagonal is not referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(N,1). +* +* WORK (workspace) REAL array, dimension (LWORK), +* where LWORK >= N when NORM = 'I'; otherwise, WORK is not +* referenced. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + DO 20 J = 1, N + DO 10 I = 1, MIN( N, J+1 ) + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 10 CONTINUE + 20 CONTINUE + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + DO 40 J = 1, N + SUM = ZERO + DO 30 I = 1, MIN( N, J+1 ) + SUM = SUM + ABS( A( I, J ) ) + 30 CONTINUE + VALUE = MAX( VALUE, SUM ) + 40 CONTINUE + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + DO 50 I = 1, N + WORK( I ) = ZERO + 50 CONTINUE + DO 70 J = 1, N + DO 60 I = 1, MIN( N, J+1 ) + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 60 CONTINUE + 70 CONTINUE + VALUE = ZERO + DO 80 I = 1, N + VALUE = MAX( VALUE, WORK( I ) ) + 80 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + DO 90 J = 1, N + CALL CLASSQ( MIN( N, J+1 ), A( 1, J ), 1, SCALE, SUM ) + 90 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + CLANHS = VALUE + RETURN +* +* End of CLANHS +* + END diff --git a/costa/native/external/lapack/clanht.f b/costa/native/external/lapack/clanht.f new file mode 100644 index 000000000..16b4ac910 --- /dev/null +++ b/costa/native/external/lapack/clanht.f @@ -0,0 +1,126 @@ + REAL FUNCTION CLANHT( NORM, N, D, E ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER N +* .. +* .. Array Arguments .. + REAL D( * ) + COMPLEX E( * ) +* .. +* +* Purpose +* ======= +* +* CLANHT returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* complex Hermitian tridiagonal matrix A. +* +* Description +* =========== +* +* CLANHT returns the value +* +* CLANHT = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in CLANHT as described +* above. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, CLANHT is +* set to zero. +* +* D (input) REAL array, dimension (N) +* The diagonal elements of A. +* +* E (input) COMPLEX array, dimension (N-1) +* The (n-1) sub-diagonal or super-diagonal elements of A. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I + REAL ANORM, SCALE, SUM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLASSQ, SLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) THEN + ANORM = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + ANORM = ABS( D( N ) ) + DO 10 I = 1, N - 1 + ANORM = MAX( ANORM, ABS( D( I ) ) ) + ANORM = MAX( ANORM, ABS( E( I ) ) ) + 10 CONTINUE + ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' .OR. + $ LSAME( NORM, 'I' ) ) THEN +* +* Find norm1(A). +* + IF( N.EQ.1 ) THEN + ANORM = ABS( D( 1 ) ) + ELSE + ANORM = MAX( ABS( D( 1 ) )+ABS( E( 1 ) ), + $ ABS( E( N-1 ) )+ABS( D( N ) ) ) + DO 20 I = 2, N - 1 + ANORM = MAX( ANORM, ABS( D( I ) )+ABS( E( I ) )+ + $ ABS( E( I-1 ) ) ) + 20 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + IF( N.GT.1 ) THEN + CALL CLASSQ( N-1, E, 1, SCALE, SUM ) + SUM = 2*SUM + END IF + CALL SLASSQ( N, D, 1, SCALE, SUM ) + ANORM = SCALE*SQRT( SUM ) + END IF +* + CLANHT = ANORM + RETURN +* +* End of CLANHT +* + END diff --git a/costa/native/external/lapack/clansb.f b/costa/native/external/lapack/clansb.f new file mode 100644 index 000000000..5b6fd116d --- /dev/null +++ b/costa/native/external/lapack/clansb.f @@ -0,0 +1,188 @@ + REAL FUNCTION CLANSB( NORM, UPLO, N, K, AB, LDAB, + $ WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM, UPLO + INTEGER K, LDAB, N +* .. +* .. Array Arguments .. + REAL WORK( * ) + COMPLEX AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* CLANSB returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of an +* n by n symmetric band matrix A, with k super-diagonals. +* +* Description +* =========== +* +* CLANSB returns the value +* +* CLANSB = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in CLANSB as described +* above. +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* band matrix A is supplied. +* = 'U': Upper triangular part is supplied +* = 'L': Lower triangular part is supplied +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, CLANSB is +* set to zero. +* +* K (input) INTEGER +* The number of super-diagonals or sub-diagonals of the +* band matrix A. K >= 0. +* +* AB (input) COMPLEX array, dimension (LDAB,N) +* The upper or lower triangle of the symmetric band matrix A, +* stored in the first K+1 rows of AB. The j-th column of A is +* stored in the j-th column of the array AB as follows: +* if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+k). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= K+1. +* +* WORK (workspace) REAL array, dimension (LWORK), +* where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +* WORK is not referenced. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, L + REAL ABSA, SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = MAX( K+2-J, 1 ), K + 1 + VALUE = MAX( VALUE, ABS( AB( I, J ) ) ) + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = 1, MIN( N+1-J, K+1 ) + VALUE = MAX( VALUE, ABS( AB( I, J ) ) ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is symmetric). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + SUM = ZERO + L = K + 1 - J + DO 50 I = MAX( 1, J-K ), J - 1 + ABSA = ABS( AB( L+I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 50 CONTINUE + WORK( J ) = SUM + ABS( AB( K+1, J ) ) + 60 CONTINUE + DO 70 I = 1, N + VALUE = MAX( VALUE, WORK( I ) ) + 70 CONTINUE + ELSE + DO 80 I = 1, N + WORK( I ) = ZERO + 80 CONTINUE + DO 100 J = 1, N + SUM = WORK( J ) + ABS( AB( 1, J ) ) + L = 1 - J + DO 90 I = J + 1, MIN( N, J+K ) + ABSA = ABS( AB( L+I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 90 CONTINUE + VALUE = MAX( VALUE, SUM ) + 100 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + IF( K.GT.0 ) THEN + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 2, N + CALL CLASSQ( MIN( J-1, K ), AB( MAX( K+2-J, 1 ), J ), + $ 1, SCALE, SUM ) + 110 CONTINUE + L = K + 1 + ELSE + DO 120 J = 1, N - 1 + CALL CLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE, + $ SUM ) + 120 CONTINUE + L = 1 + END IF + SUM = 2*SUM + ELSE + L = 1 + END IF + CALL CLASSQ( N, AB( L, 1 ), LDAB, SCALE, SUM ) + VALUE = SCALE*SQRT( SUM ) + END IF +* + CLANSB = VALUE + RETURN +* +* End of CLANSB +* + END diff --git a/costa/native/external/lapack/clansp.f b/costa/native/external/lapack/clansp.f new file mode 100644 index 000000000..b57471cf0 --- /dev/null +++ b/costa/native/external/lapack/clansp.f @@ -0,0 +1,207 @@ + REAL FUNCTION CLANSP( NORM, UPLO, N, AP, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM, UPLO + INTEGER N +* .. +* .. Array Arguments .. + REAL WORK( * ) + COMPLEX AP( * ) +* .. +* +* Purpose +* ======= +* +* CLANSP returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* complex symmetric matrix A, supplied in packed form. +* +* Description +* =========== +* +* CLANSP returns the value +* +* CLANSP = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in CLANSP as described +* above. +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is supplied. +* = 'U': Upper triangular part of A is supplied +* = 'L': Lower triangular part of A is supplied +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, CLANSP is +* set to zero. +* +* AP (input) COMPLEX array, dimension (N*(N+1)/2) +* The upper or lower triangle of the symmetric matrix A, packed +* columnwise in a linear array. The j-th column of A is stored +* in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* +* WORK (workspace) REAL array, dimension (LWORK), +* where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +* WORK is not referenced. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, K + REAL ABSA, SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + K = 1 + DO 20 J = 1, N + DO 10 I = K, K + J - 1 + VALUE = MAX( VALUE, ABS( AP( I ) ) ) + 10 CONTINUE + K = K + J + 20 CONTINUE + ELSE + K = 1 + DO 40 J = 1, N + DO 30 I = K, K + N - J + VALUE = MAX( VALUE, ABS( AP( I ) ) ) + 30 CONTINUE + K = K + N - J + 1 + 40 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is symmetric). +* + VALUE = ZERO + K = 1 + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + SUM = ZERO + DO 50 I = 1, J - 1 + ABSA = ABS( AP( K ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + K = K + 1 + 50 CONTINUE + WORK( J ) = SUM + ABS( AP( K ) ) + K = K + 1 + 60 CONTINUE + DO 70 I = 1, N + VALUE = MAX( VALUE, WORK( I ) ) + 70 CONTINUE + ELSE + DO 80 I = 1, N + WORK( I ) = ZERO + 80 CONTINUE + DO 100 J = 1, N + SUM = WORK( J ) + ABS( AP( K ) ) + K = K + 1 + DO 90 I = J + 1, N + ABSA = ABS( AP( K ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + K = K + 1 + 90 CONTINUE + VALUE = MAX( VALUE, SUM ) + 100 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + K = 2 + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 2, N + CALL CLASSQ( J-1, AP( K ), 1, SCALE, SUM ) + K = K + J + 110 CONTINUE + ELSE + DO 120 J = 1, N - 1 + CALL CLASSQ( N-J, AP( K ), 1, SCALE, SUM ) + K = K + N - J + 1 + 120 CONTINUE + END IF + SUM = 2*SUM + K = 1 + DO 130 I = 1, N + IF( REAL( AP( K ) ).NE.ZERO ) THEN + ABSA = ABS( REAL( AP( K ) ) ) + IF( SCALE.LT.ABSA ) THEN + SUM = ONE + SUM*( SCALE / ABSA )**2 + SCALE = ABSA + ELSE + SUM = SUM + ( ABSA / SCALE )**2 + END IF + END IF + IF( AIMAG( AP( K ) ).NE.ZERO ) THEN + ABSA = ABS( AIMAG( AP( K ) ) ) + IF( SCALE.LT.ABSA ) THEN + SUM = ONE + SUM*( SCALE / ABSA )**2 + SCALE = ABSA + ELSE + SUM = SUM + ( ABSA / SCALE )**2 + END IF + END IF + IF( LSAME( UPLO, 'U' ) ) THEN + K = K + I + 1 + ELSE + K = K + N - I + 1 + END IF + 130 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + CLANSP = VALUE + RETURN +* +* End of CLANSP +* + END diff --git a/costa/native/external/lapack/clansy.f b/costa/native/external/lapack/clansy.f new file mode 100644 index 000000000..eb0523cb5 --- /dev/null +++ b/costa/native/external/lapack/clansy.f @@ -0,0 +1,175 @@ + REAL FUNCTION CLANSY( NORM, UPLO, N, A, LDA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM, UPLO + INTEGER LDA, N +* .. +* .. Array Arguments .. + REAL WORK( * ) + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CLANSY returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* complex symmetric matrix A. +* +* Description +* =========== +* +* CLANSY returns the value +* +* CLANSY = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in CLANSY as described +* above. +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is to be referenced. +* = 'U': Upper triangular part of A is referenced +* = 'L': Lower triangular part of A is referenced +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, CLANSY is +* set to zero. +* +* A (input) COMPLEX array, dimension (LDA,N) +* The symmetric matrix A. If UPLO = 'U', the leading n by n +* upper triangular part of A contains the upper triangular part +* of the matrix A, and the strictly lower triangular part of A +* is not referenced. If UPLO = 'L', the leading n by n lower +* triangular part of A contains the lower triangular part of +* the matrix A, and the strictly upper triangular part of A is +* not referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(N,1). +* +* WORK (workspace) REAL array, dimension (LWORK), +* where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +* WORK is not referenced. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL ABSA, SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, J + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = J, N + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is symmetric). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + SUM = ZERO + DO 50 I = 1, J - 1 + ABSA = ABS( A( I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 50 CONTINUE + WORK( J ) = SUM + ABS( A( J, J ) ) + 60 CONTINUE + DO 70 I = 1, N + VALUE = MAX( VALUE, WORK( I ) ) + 70 CONTINUE + ELSE + DO 80 I = 1, N + WORK( I ) = ZERO + 80 CONTINUE + DO 100 J = 1, N + SUM = WORK( J ) + ABS( A( J, J ) ) + DO 90 I = J + 1, N + ABSA = ABS( A( I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 90 CONTINUE + VALUE = MAX( VALUE, SUM ) + 100 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 2, N + CALL CLASSQ( J-1, A( 1, J ), 1, SCALE, SUM ) + 110 CONTINUE + ELSE + DO 120 J = 1, N - 1 + CALL CLASSQ( N-J, A( J+1, J ), 1, SCALE, SUM ) + 120 CONTINUE + END IF + SUM = 2*SUM + CALL CLASSQ( N, A, LDA+1, SCALE, SUM ) + VALUE = SCALE*SQRT( SUM ) + END IF +* + CLANSY = VALUE + RETURN +* +* End of CLANSY +* + END diff --git a/costa/native/external/lapack/clantb.f b/costa/native/external/lapack/clantb.f new file mode 100644 index 000000000..883634ee0 --- /dev/null +++ b/costa/native/external/lapack/clantb.f @@ -0,0 +1,286 @@ + REAL FUNCTION CLANTB( NORM, UPLO, DIAG, N, K, AB, + $ LDAB, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER K, LDAB, N +* .. +* .. Array Arguments .. + REAL WORK( * ) + COMPLEX AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* CLANTB returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of an +* n by n triangular band matrix A, with ( k + 1 ) diagonals. +* +* Description +* =========== +* +* CLANTB returns the value +* +* CLANTB = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in CLANTB as described +* above. +* +* UPLO (input) CHARACTER*1 +* Specifies whether the matrix A is upper or lower triangular. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* DIAG (input) CHARACTER*1 +* Specifies whether or not the matrix A is unit triangular. +* = 'N': Non-unit triangular +* = 'U': Unit triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, CLANTB is +* set to zero. +* +* K (input) INTEGER +* The number of super-diagonals of the matrix A if UPLO = 'U', +* or the number of sub-diagonals of the matrix A if UPLO = 'L'. +* K >= 0. +* +* AB (input) COMPLEX array, dimension (LDAB,N) +* The upper or lower triangular band matrix A, stored in the +* first k+1 rows of AB. The j-th column of A is stored +* in the j-th column of the array AB as follows: +* if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+k). +* Note that when DIAG = 'U', the elements of the array AB +* corresponding to the diagonal elements of the matrix A are +* not referenced, but are assumed to be one. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= K+1. +* +* WORK (workspace) REAL array, dimension (LWORK), +* where LWORK >= N when NORM = 'I'; otherwise, WORK is not +* referenced. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UDIAG + INTEGER I, J, L + REAL SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + IF( LSAME( DIAG, 'U' ) ) THEN + VALUE = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = MAX( K+2-J, 1 ), K + VALUE = MAX( VALUE, ABS( AB( I, J ) ) ) + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = 2, MIN( N+1-J, K+1 ) + VALUE = MAX( VALUE, ABS( AB( I, J ) ) ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + DO 50 I = MAX( K+2-J, 1 ), K + 1 + VALUE = MAX( VALUE, ABS( AB( I, J ) ) ) + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80 J = 1, N + DO 70 I = 1, MIN( N+1-J, K+1 ) + VALUE = MAX( VALUE, ABS( AB( I, J ) ) ) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + UDIAG = LSAME( DIAG, 'U' ) + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 90 I = MAX( K+2-J, 1 ), K + SUM = SUM + ABS( AB( I, J ) ) + 90 CONTINUE + ELSE + SUM = ZERO + DO 100 I = MAX( K+2-J, 1 ), K + 1 + SUM = SUM + ABS( AB( I, J ) ) + 100 CONTINUE + END IF + VALUE = MAX( VALUE, SUM ) + 110 CONTINUE + ELSE + DO 140 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 120 I = 2, MIN( N+1-J, K+1 ) + SUM = SUM + ABS( AB( I, J ) ) + 120 CONTINUE + ELSE + SUM = ZERO + DO 130 I = 1, MIN( N+1-J, K+1 ) + SUM = SUM + ABS( AB( I, J ) ) + 130 CONTINUE + END IF + VALUE = MAX( VALUE, SUM ) + 140 CONTINUE + END IF + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + DO 150 I = 1, N + WORK( I ) = ONE + 150 CONTINUE + DO 170 J = 1, N + L = K + 1 - J + DO 160 I = MAX( 1, J-K ), J - 1 + WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) ) + 160 CONTINUE + 170 CONTINUE + ELSE + DO 180 I = 1, N + WORK( I ) = ZERO + 180 CONTINUE + DO 200 J = 1, N + L = K + 1 - J + DO 190 I = MAX( 1, J-K ), J + WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) ) + 190 CONTINUE + 200 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + DO 210 I = 1, N + WORK( I ) = ONE + 210 CONTINUE + DO 230 J = 1, N + L = 1 - J + DO 220 I = J + 1, MIN( N, J+K ) + WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) ) + 220 CONTINUE + 230 CONTINUE + ELSE + DO 240 I = 1, N + WORK( I ) = ZERO + 240 CONTINUE + DO 260 J = 1, N + L = 1 - J + DO 250 I = J, MIN( N, J+K ) + WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) ) + 250 CONTINUE + 260 CONTINUE + END IF + END IF + DO 270 I = 1, N + VALUE = MAX( VALUE, WORK( I ) ) + 270 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = N + IF( K.GT.0 ) THEN + DO 280 J = 2, N + CALL CLASSQ( MIN( J-1, K ), + $ AB( MAX( K+2-J, 1 ), J ), 1, SCALE, + $ SUM ) + 280 CONTINUE + END IF + ELSE + SCALE = ZERO + SUM = ONE + DO 290 J = 1, N + CALL CLASSQ( MIN( J, K+1 ), AB( MAX( K+2-J, 1 ), J ), + $ 1, SCALE, SUM ) + 290 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = N + IF( K.GT.0 ) THEN + DO 300 J = 1, N - 1 + CALL CLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE, + $ SUM ) + 300 CONTINUE + END IF + ELSE + SCALE = ZERO + SUM = ONE + DO 310 J = 1, N + CALL CLASSQ( MIN( N-J+1, K+1 ), AB( 1, J ), 1, SCALE, + $ SUM ) + 310 CONTINUE + END IF + END IF + VALUE = SCALE*SQRT( SUM ) + END IF +* + CLANTB = VALUE + RETURN +* +* End of CLANTB +* + END diff --git a/costa/native/external/lapack/clantp.f b/costa/native/external/lapack/clantp.f new file mode 100644 index 000000000..8f27a3619 --- /dev/null +++ b/costa/native/external/lapack/clantp.f @@ -0,0 +1,287 @@ + REAL FUNCTION CLANTP( NORM, UPLO, DIAG, N, AP, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER N +* .. +* .. Array Arguments .. + REAL WORK( * ) + COMPLEX AP( * ) +* .. +* +* Purpose +* ======= +* +* CLANTP returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* triangular matrix A, supplied in packed form. +* +* Description +* =========== +* +* CLANTP returns the value +* +* CLANTP = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in CLANTP as described +* above. +* +* UPLO (input) CHARACTER*1 +* Specifies whether the matrix A is upper or lower triangular. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* DIAG (input) CHARACTER*1 +* Specifies whether or not the matrix A is unit triangular. +* = 'N': Non-unit triangular +* = 'U': Unit triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, CLANTP is +* set to zero. +* +* AP (input) COMPLEX array, dimension (N*(N+1)/2) +* The upper or lower triangular matrix A, packed columnwise in +* a linear array. The j-th column of A is stored in the array +* AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* Note that when DIAG = 'U', the elements of the array AP +* corresponding to the diagonal elements of the matrix A are +* not referenced, but are assumed to be one. +* +* WORK (workspace) REAL array, dimension (LWORK), +* where LWORK >= N when NORM = 'I'; otherwise, WORK is not +* referenced. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UDIAG + INTEGER I, J, K + REAL SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + K = 1 + IF( LSAME( DIAG, 'U' ) ) THEN + VALUE = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = K, K + J - 2 + VALUE = MAX( VALUE, ABS( AP( I ) ) ) + 10 CONTINUE + K = K + J + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = K + 1, K + N - J + VALUE = MAX( VALUE, ABS( AP( I ) ) ) + 30 CONTINUE + K = K + N - J + 1 + 40 CONTINUE + END IF + ELSE + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + DO 50 I = K, K + J - 1 + VALUE = MAX( VALUE, ABS( AP( I ) ) ) + 50 CONTINUE + K = K + J + 60 CONTINUE + ELSE + DO 80 J = 1, N + DO 70 I = K, K + N - J + VALUE = MAX( VALUE, ABS( AP( I ) ) ) + 70 CONTINUE + K = K + N - J + 1 + 80 CONTINUE + END IF + END IF + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + K = 1 + UDIAG = LSAME( DIAG, 'U' ) + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 90 I = K, K + J - 2 + SUM = SUM + ABS( AP( I ) ) + 90 CONTINUE + ELSE + SUM = ZERO + DO 100 I = K, K + J - 1 + SUM = SUM + ABS( AP( I ) ) + 100 CONTINUE + END IF + K = K + J + VALUE = MAX( VALUE, SUM ) + 110 CONTINUE + ELSE + DO 140 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 120 I = K + 1, K + N - J + SUM = SUM + ABS( AP( I ) ) + 120 CONTINUE + ELSE + SUM = ZERO + DO 130 I = K, K + N - J + SUM = SUM + ABS( AP( I ) ) + 130 CONTINUE + END IF + K = K + N - J + 1 + VALUE = MAX( VALUE, SUM ) + 140 CONTINUE + END IF + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + K = 1 + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + DO 150 I = 1, N + WORK( I ) = ONE + 150 CONTINUE + DO 170 J = 1, N + DO 160 I = 1, J - 1 + WORK( I ) = WORK( I ) + ABS( AP( K ) ) + K = K + 1 + 160 CONTINUE + K = K + 1 + 170 CONTINUE + ELSE + DO 180 I = 1, N + WORK( I ) = ZERO + 180 CONTINUE + DO 200 J = 1, N + DO 190 I = 1, J + WORK( I ) = WORK( I ) + ABS( AP( K ) ) + K = K + 1 + 190 CONTINUE + 200 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + DO 210 I = 1, N + WORK( I ) = ONE + 210 CONTINUE + DO 230 J = 1, N + K = K + 1 + DO 220 I = J + 1, N + WORK( I ) = WORK( I ) + ABS( AP( K ) ) + K = K + 1 + 220 CONTINUE + 230 CONTINUE + ELSE + DO 240 I = 1, N + WORK( I ) = ZERO + 240 CONTINUE + DO 260 J = 1, N + DO 250 I = J, N + WORK( I ) = WORK( I ) + ABS( AP( K ) ) + K = K + 1 + 250 CONTINUE + 260 CONTINUE + END IF + END IF + VALUE = ZERO + DO 270 I = 1, N + VALUE = MAX( VALUE, WORK( I ) ) + 270 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = N + K = 2 + DO 280 J = 2, N + CALL CLASSQ( J-1, AP( K ), 1, SCALE, SUM ) + K = K + J + 280 CONTINUE + ELSE + SCALE = ZERO + SUM = ONE + K = 1 + DO 290 J = 1, N + CALL CLASSQ( J, AP( K ), 1, SCALE, SUM ) + K = K + J + 290 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = N + K = 2 + DO 300 J = 1, N - 1 + CALL CLASSQ( N-J, AP( K ), 1, SCALE, SUM ) + K = K + N - J + 1 + 300 CONTINUE + ELSE + SCALE = ZERO + SUM = ONE + K = 1 + DO 310 J = 1, N + CALL CLASSQ( N-J+1, AP( K ), 1, SCALE, SUM ) + K = K + N - J + 1 + 310 CONTINUE + END IF + END IF + VALUE = SCALE*SQRT( SUM ) + END IF +* + CLANTP = VALUE + RETURN +* +* End of CLANTP +* + END diff --git a/costa/native/external/lapack/clantr.f b/costa/native/external/lapack/clantr.f new file mode 100644 index 000000000..4cd8b5268 --- /dev/null +++ b/costa/native/external/lapack/clantr.f @@ -0,0 +1,278 @@ + REAL FUNCTION CLANTR( NORM, UPLO, DIAG, M, N, A, LDA, + $ WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER LDA, M, N +* .. +* .. Array Arguments .. + REAL WORK( * ) + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CLANTR returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* trapezoidal or triangular matrix A. +* +* Description +* =========== +* +* CLANTR returns the value +* +* CLANTR = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in CLANTR as described +* above. +* +* UPLO (input) CHARACTER*1 +* Specifies whether the matrix A is upper or lower trapezoidal. +* = 'U': Upper trapezoidal +* = 'L': Lower trapezoidal +* Note that A is triangular instead of trapezoidal if M = N. +* +* DIAG (input) CHARACTER*1 +* Specifies whether or not the matrix A has unit diagonal. +* = 'N': Non-unit diagonal +* = 'U': Unit diagonal +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0, and if +* UPLO = 'U', M <= N. When M = 0, CLANTR is set to zero. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0, and if +* UPLO = 'L', N <= M. When N = 0, CLANTR is set to zero. +* +* A (input) COMPLEX array, dimension (LDA,N) +* The trapezoidal matrix A (A is triangular if M = N). +* If UPLO = 'U', the leading m by n upper trapezoidal part of +* the array A contains the upper trapezoidal matrix, and the +* strictly lower triangular part of A is not referenced. +* If UPLO = 'L', the leading m by n lower trapezoidal part of +* the array A contains the lower trapezoidal matrix, and the +* strictly upper triangular part of A is not referenced. Note +* that when DIAG = 'U', the diagonal elements of A are not +* referenced and are assumed to be one. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(M,1). +* +* WORK (workspace) REAL array, dimension (LWORK), +* where LWORK >= M when NORM = 'I'; otherwise, WORK is not +* referenced. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UDIAG + INTEGER I, J + REAL SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( MIN( M, N ).EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + IF( LSAME( DIAG, 'U' ) ) THEN + VALUE = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, MIN( M, J-1 ) + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = J + 1, M + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + DO 50 I = 1, MIN( M, J ) + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80 J = 1, N + DO 70 I = J, M + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + UDIAG = LSAME( DIAG, 'U' ) + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 1, N + IF( ( UDIAG ) .AND. ( J.LE.M ) ) THEN + SUM = ONE + DO 90 I = 1, J - 1 + SUM = SUM + ABS( A( I, J ) ) + 90 CONTINUE + ELSE + SUM = ZERO + DO 100 I = 1, MIN( M, J ) + SUM = SUM + ABS( A( I, J ) ) + 100 CONTINUE + END IF + VALUE = MAX( VALUE, SUM ) + 110 CONTINUE + ELSE + DO 140 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 120 I = J + 1, M + SUM = SUM + ABS( A( I, J ) ) + 120 CONTINUE + ELSE + SUM = ZERO + DO 130 I = J, M + SUM = SUM + ABS( A( I, J ) ) + 130 CONTINUE + END IF + VALUE = MAX( VALUE, SUM ) + 140 CONTINUE + END IF + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + DO 150 I = 1, M + WORK( I ) = ONE + 150 CONTINUE + DO 170 J = 1, N + DO 160 I = 1, MIN( M, J-1 ) + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 160 CONTINUE + 170 CONTINUE + ELSE + DO 180 I = 1, M + WORK( I ) = ZERO + 180 CONTINUE + DO 200 J = 1, N + DO 190 I = 1, MIN( M, J ) + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 190 CONTINUE + 200 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + DO 210 I = 1, N + WORK( I ) = ONE + 210 CONTINUE + DO 220 I = N + 1, M + WORK( I ) = ZERO + 220 CONTINUE + DO 240 J = 1, N + DO 230 I = J + 1, M + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 230 CONTINUE + 240 CONTINUE + ELSE + DO 250 I = 1, M + WORK( I ) = ZERO + 250 CONTINUE + DO 270 J = 1, N + DO 260 I = J, M + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 260 CONTINUE + 270 CONTINUE + END IF + END IF + VALUE = ZERO + DO 280 I = 1, M + VALUE = MAX( VALUE, WORK( I ) ) + 280 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = MIN( M, N ) + DO 290 J = 2, N + CALL CLASSQ( MIN( M, J-1 ), A( 1, J ), 1, SCALE, SUM ) + 290 CONTINUE + ELSE + SCALE = ZERO + SUM = ONE + DO 300 J = 1, N + CALL CLASSQ( MIN( M, J ), A( 1, J ), 1, SCALE, SUM ) + 300 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = MIN( M, N ) + DO 310 J = 1, N + CALL CLASSQ( M-J, A( MIN( M, J+1 ), J ), 1, SCALE, + $ SUM ) + 310 CONTINUE + ELSE + SCALE = ZERO + SUM = ONE + DO 320 J = 1, N + CALL CLASSQ( M-J+1, A( J, J ), 1, SCALE, SUM ) + 320 CONTINUE + END IF + END IF + VALUE = SCALE*SQRT( SUM ) + END IF +* + CLANTR = VALUE + RETURN +* +* End of CLANTR +* + END diff --git a/costa/native/external/lapack/clapll.f b/costa/native/external/lapack/clapll.f new file mode 100644 index 000000000..faf36ae46 --- /dev/null +++ b/costa/native/external/lapack/clapll.f @@ -0,0 +1,104 @@ + SUBROUTINE CLAPLL( N, X, INCX, Y, INCY, SSMIN ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INCX, INCY, N + REAL SSMIN +* .. +* .. Array Arguments .. + COMPLEX X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* Given two column vectors X and Y, let +* +* A = ( X Y ). +* +* The subroutine first computes the QR factorization of A = Q*R, +* and then computes the SVD of the 2-by-2 upper triangular matrix R. +* The smaller singular value of R is returned in SSMIN, which is used +* as the measurement of the linear dependency of the vectors X and Y. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The length of the vectors X and Y. +* +* X (input/output) COMPLEX array, dimension (1+(N-1)*INCX) +* On entry, X contains the N-vector X. +* On exit, X is overwritten. +* +* INCX (input) INTEGER +* The increment between successive elements of X. INCX > 0. +* +* Y (input/output) COMPLEX array, dimension (1+(N-1)*INCY) +* On entry, Y contains the N-vector Y. +* On exit, Y is overwritten. +* +* INCY (input) INTEGER +* The increment between successive elements of Y. INCY > 0. +* +* SSMIN (output) REAL +* The smallest singular value of the N-by-2 matrix A = ( X Y ). +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + REAL SSMAX + COMPLEX A11, A12, A22, C, TAU +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CONJG +* .. +* .. External Functions .. + COMPLEX CDOTC + EXTERNAL CDOTC +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CLARFG, SLAS2 +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.1 ) THEN + SSMIN = ZERO + RETURN + END IF +* +* Compute the QR factorization of the N-by-2 matrix ( X Y ) +* + CALL CLARFG( N, X( 1 ), X( 1+INCX ), INCX, TAU ) + A11 = X( 1 ) + X( 1 ) = CONE +* + C = -CONJG( TAU )*CDOTC( N, X, INCX, Y, INCY ) + CALL CAXPY( N, C, X, INCX, Y, INCY ) +* + CALL CLARFG( N-1, Y( 1+INCY ), Y( 1+2*INCY ), INCY, TAU ) +* + A12 = Y( 1 ) + A22 = Y( 1+INCY ) +* +* Compute the SVD of 2-by-2 Upper triangular matrix. +* + CALL SLAS2( ABS( A11 ), ABS( A12 ), ABS( A22 ), SSMIN, SSMAX ) +* + RETURN +* +* End of CLAPLL +* + END diff --git a/costa/native/external/lapack/clapmt.f b/costa/native/external/lapack/clapmt.f new file mode 100644 index 000000000..58d43ce44 --- /dev/null +++ b/costa/native/external/lapack/clapmt.f @@ -0,0 +1,135 @@ + SUBROUTINE CLAPMT( FORWRD, M, N, X, LDX, K ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + LOGICAL FORWRD + INTEGER LDX, M, N +* .. +* .. Array Arguments .. + INTEGER K( * ) + COMPLEX X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* CLAPMT rearranges the columns of the M by N matrix X as specified +* by the permutation K(1),K(2),...,K(N) of the integers 1,...,N. +* If FORWRD = .TRUE., forward permutation: +* +* X(*,K(J)) is moved X(*,J) for J = 1,2,...,N. +* +* If FORWRD = .FALSE., backward permutation: +* +* X(*,J) is moved to X(*,K(J)) for J = 1,2,...,N. +* +* Arguments +* ========= +* +* FORWRD (input) LOGICAL +* = .TRUE., forward permutation +* = .FALSE., backward permutation +* +* M (input) INTEGER +* The number of rows of the matrix X. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix X. N >= 0. +* +* X (input/output) COMPLEX array, dimension (LDX,N) +* On entry, the M by N matrix X. +* On exit, X contains the permuted matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X, LDX >= MAX(1,M). +* +* K (input) INTEGER array, dimension (N) +* On entry, K contains the permutation vector. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, II, J, IN + COMPLEX TEMP +* .. +* .. Executable Statements .. +* + IF( N.LE.1 ) + $ RETURN +* + DO 10 I = 1, N + K( I ) = -K( I ) + 10 CONTINUE +* + IF( FORWRD ) THEN +* +* Forward permutation +* + DO 60 I = 1, N +* + IF( K( I ).GT.0 ) + $ GO TO 40 +* + J = I + K( J ) = -K( J ) + IN = K( J ) +* + 20 CONTINUE + IF( K( IN ).GT.0 ) + $ GO TO 40 +* + DO 30 II = 1, M + TEMP = X( II, J ) + X( II, J ) = X( II, IN ) + X( II, IN ) = TEMP + 30 CONTINUE +* + K( IN ) = -K( IN ) + J = IN + IN = K( IN ) + GO TO 20 +* + 40 CONTINUE +* + 60 CONTINUE +* + ELSE +* +* Backward permutation +* + DO 110 I = 1, N +* + IF( K( I ).GT.0 ) + $ GO TO 100 +* + K( I ) = -K( I ) + J = K( I ) + 80 CONTINUE + IF( J.EQ.I ) + $ GO TO 100 +* + DO 90 II = 1, M + TEMP = X( II, I ) + X( II, I ) = X( II, J ) + X( II, J ) = TEMP + 90 CONTINUE +* + K( J ) = -K( J ) + J = K( J ) + GO TO 80 +* + 100 CONTINUE + + 110 CONTINUE +* + END IF +* + RETURN +* +* End of CLAPMT +* + END diff --git a/costa/native/external/lapack/claqgb.f b/costa/native/external/lapack/claqgb.f new file mode 100644 index 000000000..9580d6292 --- /dev/null +++ b/costa/native/external/lapack/claqgb.f @@ -0,0 +1,170 @@ + SUBROUTINE CLAQGB( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER EQUED + INTEGER KL, KU, LDAB, M, N + REAL AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. + REAL C( * ), R( * ) + COMPLEX AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* CLAQGB equilibrates a general M by N band matrix A with KL +* subdiagonals and KU superdiagonals using the row and scaling factors +* in the vectors R and C. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* AB (input/output) COMPLEX array, dimension (LDAB,N) +* On entry, the matrix A in band storage, in rows 1 to KL+KU+1. +* The j-th column of A is stored in the j-th column of the +* array AB as follows: +* AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) +* +* On exit, the equilibrated matrix, in the same storage format +* as A. See EQUED for the form of the equilibrated matrix. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDA >= KL+KU+1. +* +* R (output) REAL array, dimension (M) +* The row scale factors for A. +* +* C (output) REAL array, dimension (N) +* The column scale factors for A. +* +* ROWCND (output) REAL +* Ratio of the smallest R(i) to the largest R(i). +* +* COLCND (output) REAL +* Ratio of the smallest C(i) to the largest C(i). +* +* AMAX (input) REAL +* Absolute value of largest matrix entry. +* +* EQUED (output) CHARACTER*1 +* Specifies the form of equilibration that was done. +* = 'N': No equilibration +* = 'R': Row equilibration, i.e., A has been premultiplied by +* diag(R). +* = 'C': Column equilibration, i.e., A has been postmultiplied +* by diag(C). +* = 'B': Both row and column equilibration, i.e., A has been +* replaced by diag(R) * A * diag(C). +* +* Internal Parameters +* =================== +* +* THRESH is a threshold value used to decide if row or column scaling +* should be done based on the ratio of the row or column scaling +* factors. If ROWCND < THRESH, row scaling is done, and if +* COLCND < THRESH, column scaling is done. +* +* LARGE and SMALL are threshold values used to decide if row scaling +* should be done based on the absolute size of the largest matrix +* element. If AMAX > LARGE or AMAX < SMALL, row scaling is done. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, THRESH + PARAMETER ( ONE = 1.0E+0, THRESH = 0.1E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL CJ, LARGE, SMALL +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( ROWCND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) + $ THEN +* +* No row scaling +* + IF( COLCND.GE.THRESH ) THEN +* +* No column scaling +* + EQUED = 'N' + ELSE +* +* Column scaling +* + DO 20 J = 1, N + CJ = C( J ) + DO 10 I = MAX( 1, J-KU ), MIN( M, J+KL ) + AB( KU+1+I-J, J ) = CJ*AB( KU+1+I-J, J ) + 10 CONTINUE + 20 CONTINUE + EQUED = 'C' + END IF + ELSE IF( COLCND.GE.THRESH ) THEN +* +* Row scaling, no column scaling +* + DO 40 J = 1, N + DO 30 I = MAX( 1, J-KU ), MIN( M, J+KL ) + AB( KU+1+I-J, J ) = R( I )*AB( KU+1+I-J, J ) + 30 CONTINUE + 40 CONTINUE + EQUED = 'R' + ELSE +* +* Row and column scaling +* + DO 60 J = 1, N + CJ = C( J ) + DO 50 I = MAX( 1, J-KU ), MIN( M, J+KL ) + AB( KU+1+I-J, J ) = CJ*R( I )*AB( KU+1+I-J, J ) + 50 CONTINUE + 60 CONTINUE + EQUED = 'B' + END IF +* + RETURN +* +* End of CLAQGB +* + END diff --git a/costa/native/external/lapack/claqge.f b/costa/native/external/lapack/claqge.f new file mode 100644 index 000000000..750e9a192 --- /dev/null +++ b/costa/native/external/lapack/claqge.f @@ -0,0 +1,156 @@ + SUBROUTINE CLAQGE( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, + $ EQUED ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER EQUED + INTEGER LDA, M, N + REAL AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. + REAL C( * ), R( * ) + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CLAQGE equilibrates a general M by N matrix A using the row and +* scaling factors in the vectors R and C. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the M by N matrix A. +* On exit, the equilibrated matrix. See EQUED for the form of +* the equilibrated matrix. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(M,1). +* +* R (input) REAL array, dimension (M) +* The row scale factors for A. +* +* C (input) REAL array, dimension (N) +* The column scale factors for A. +* +* ROWCND (input) REAL +* Ratio of the smallest R(i) to the largest R(i). +* +* COLCND (input) REAL +* Ratio of the smallest C(i) to the largest C(i). +* +* AMAX (input) REAL +* Absolute value of largest matrix entry. +* +* EQUED (output) CHARACTER*1 +* Specifies the form of equilibration that was done. +* = 'N': No equilibration +* = 'R': Row equilibration, i.e., A has been premultiplied by +* diag(R). +* = 'C': Column equilibration, i.e., A has been postmultiplied +* by diag(C). +* = 'B': Both row and column equilibration, i.e., A has been +* replaced by diag(R) * A * diag(C). +* +* Internal Parameters +* =================== +* +* THRESH is a threshold value used to decide if row or column scaling +* should be done based on the ratio of the row or column scaling +* factors. If ROWCND < THRESH, row scaling is done, and if +* COLCND < THRESH, column scaling is done. +* +* LARGE and SMALL are threshold values used to decide if row scaling +* should be done based on the absolute size of the largest matrix +* element. If AMAX > LARGE or AMAX < SMALL, row scaling is done. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, THRESH + PARAMETER ( ONE = 1.0E+0, THRESH = 0.1E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL CJ, LARGE, SMALL +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( ROWCND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) + $ THEN +* +* No row scaling +* + IF( COLCND.GE.THRESH ) THEN +* +* No column scaling +* + EQUED = 'N' + ELSE +* +* Column scaling +* + DO 20 J = 1, N + CJ = C( J ) + DO 10 I = 1, M + A( I, J ) = CJ*A( I, J ) + 10 CONTINUE + 20 CONTINUE + EQUED = 'C' + END IF + ELSE IF( COLCND.GE.THRESH ) THEN +* +* Row scaling, no column scaling +* + DO 40 J = 1, N + DO 30 I = 1, M + A( I, J ) = R( I )*A( I, J ) + 30 CONTINUE + 40 CONTINUE + EQUED = 'R' + ELSE +* +* Row and column scaling +* + DO 60 J = 1, N + CJ = C( J ) + DO 50 I = 1, M + A( I, J ) = CJ*R( I )*A( I, J ) + 50 CONTINUE + 60 CONTINUE + EQUED = 'B' + END IF +* + RETURN +* +* End of CLAQGE +* + END diff --git a/costa/native/external/lapack/claqhb.f b/costa/native/external/lapack/claqhb.f new file mode 100644 index 000000000..eb43e2b5e --- /dev/null +++ b/costa/native/external/lapack/claqhb.f @@ -0,0 +1,152 @@ + SUBROUTINE CLAQHB( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER EQUED, UPLO + INTEGER KD, LDAB, N + REAL AMAX, SCOND +* .. +* .. Array Arguments .. + REAL S( * ) + COMPLEX AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* CLAQHB equilibrates a symmetric band matrix A using the scaling +* factors in the vector S. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is stored. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of super-diagonals of the matrix A if UPLO = 'U', +* or the number of sub-diagonals if UPLO = 'L'. KD >= 0. +* +* AB (input/output) COMPLEX array, dimension (LDAB,N) +* On entry, the upper or lower triangle of the symmetric band +* matrix A, stored in the first KD+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* On exit, if INFO = 0, the triangular factor U or L from the +* Cholesky factorization A = U'*U or A = L*L' of the band +* matrix A, in the same storage format as A. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* S (output) REAL array, dimension (N) +* The scale factors for A. +* +* SCOND (input) REAL +* Ratio of the smallest S(i) to the largest S(i). +* +* AMAX (input) REAL +* Absolute value of largest matrix entry. +* +* EQUED (output) CHARACTER*1 +* Specifies whether or not equilibration was done. +* = 'N': No equilibration. +* = 'Y': Equilibration was done, i.e., A has been replaced by +* diag(S) * A * diag(S). +* +* Internal Parameters +* =================== +* +* THRESH is a threshold value used to decide if scaling should be done +* based on the ratio of the scaling factors. If SCOND < THRESH, +* scaling is done. +* +* LARGE and SMALL are threshold values used to decide if scaling should +* be done based on the absolute size of the largest matrix element. +* If AMAX > LARGE or AMAX < SMALL, scaling is done. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, THRESH + PARAMETER ( ONE = 1.0E+0, THRESH = 0.1E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL CJ, LARGE, SMALL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, REAL +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN +* +* No equilibration +* + EQUED = 'N' + ELSE +* +* Replace A by diag(S) * A * diag(S). +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Upper triangle of A is stored in band format. +* + DO 20 J = 1, N + CJ = S( J ) + DO 10 I = MAX( 1, J-KD ), J - 1 + AB( KD+1+I-J, J ) = CJ*S( I )*AB( KD+1+I-J, J ) + 10 CONTINUE + AB( KD+1, J ) = CJ*CJ*REAL( AB( KD+1, J ) ) + 20 CONTINUE + ELSE +* +* Lower triangle of A is stored. +* + DO 40 J = 1, N + CJ = S( J ) + AB( 1, J ) = CJ*CJ*REAL( AB( 1, J ) ) + DO 30 I = J + 1, MIN( N, J+KD ) + AB( 1+I-J, J ) = CJ*S( I )*AB( 1+I-J, J ) + 30 CONTINUE + 40 CONTINUE + END IF + EQUED = 'Y' + END IF +* + RETURN +* +* End of CLAQHB +* + END diff --git a/costa/native/external/lapack/claqhe.f b/costa/native/external/lapack/claqhe.f new file mode 100644 index 000000000..3234b8dc4 --- /dev/null +++ b/costa/native/external/lapack/claqhe.f @@ -0,0 +1,148 @@ + SUBROUTINE CLAQHE( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER EQUED, UPLO + INTEGER LDA, N + REAL AMAX, SCOND +* .. +* .. Array Arguments .. + REAL S( * ) + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CLAQHE equilibrates a Hermitian matrix A using the scaling factors +* in the vector S. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* Hermitian matrix A is stored. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the leading +* n by n upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n by n lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, if EQUED = 'Y', the equilibrated matrix: +* diag(S) * A * diag(S). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(N,1). +* +* S (input) REAL array, dimension (N) +* The scale factors for A. +* +* SCOND (input) REAL +* Ratio of the smallest S(i) to the largest S(i). +* +* AMAX (input) REAL +* Absolute value of largest matrix entry. +* +* EQUED (output) CHARACTER*1 +* Specifies whether or not equilibration was done. +* = 'N': No equilibration. +* = 'Y': Equilibration was done, i.e., A has been replaced by +* diag(S) * A * diag(S). +* +* Internal Parameters +* =================== +* +* THRESH is a threshold value used to decide if scaling should be done +* based on the ratio of the scaling factors. If SCOND < THRESH, +* scaling is done. +* +* LARGE and SMALL are threshold values used to decide if scaling should +* be done based on the absolute size of the largest matrix element. +* If AMAX > LARGE or AMAX < SMALL, scaling is done. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, THRESH + PARAMETER ( ONE = 1.0E+0, THRESH = 0.1E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL CJ, LARGE, SMALL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN +* +* No equilibration +* + EQUED = 'N' + ELSE +* +* Replace A by diag(S) * A * diag(S). +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Upper triangle of A is stored. +* + DO 20 J = 1, N + CJ = S( J ) + DO 10 I = 1, J - 1 + A( I, J ) = CJ*S( I )*A( I, J ) + 10 CONTINUE + A( J, J ) = CJ*CJ*REAL( A( J, J ) ) + 20 CONTINUE + ELSE +* +* Lower triangle of A is stored. +* + DO 40 J = 1, N + CJ = S( J ) + A( J, J ) = CJ*CJ*REAL( A( J, J ) ) + DO 30 I = J + 1, N + A( I, J ) = CJ*S( I )*A( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + EQUED = 'Y' + END IF +* + RETURN +* +* End of CLAQHE +* + END diff --git a/costa/native/external/lapack/claqhp.f b/costa/native/external/lapack/claqhp.f new file mode 100644 index 000000000..dab8e8dae --- /dev/null +++ b/costa/native/external/lapack/claqhp.f @@ -0,0 +1,147 @@ + SUBROUTINE CLAQHP( UPLO, N, AP, S, SCOND, AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER EQUED, UPLO + INTEGER N + REAL AMAX, SCOND +* .. +* .. Array Arguments .. + REAL S( * ) + COMPLEX AP( * ) +* .. +* +* Purpose +* ======= +* +* CLAQHP equilibrates a Hermitian matrix A using the scaling factors +* in the vector S. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* Hermitian matrix A is stored. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) COMPLEX array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the Hermitian matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* +* On exit, the equilibrated matrix: diag(S) * A * diag(S), in +* the same storage format as A. +* +* S (input) REAL array, dimension (N) +* The scale factors for A. +* +* SCOND (input) REAL +* Ratio of the smallest S(i) to the largest S(i). +* +* AMAX (input) REAL +* Absolute value of largest matrix entry. +* +* EQUED (output) CHARACTER*1 +* Specifies whether or not equilibration was done. +* = 'N': No equilibration. +* = 'Y': Equilibration was done, i.e., A has been replaced by +* diag(S) * A * diag(S). +* +* Internal Parameters +* =================== +* +* THRESH is a threshold value used to decide if scaling should be done +* based on the ratio of the scaling factors. If SCOND < THRESH, +* scaling is done. +* +* LARGE and SMALL are threshold values used to decide if scaling should +* be done based on the absolute size of the largest matrix element. +* If AMAX > LARGE or AMAX < SMALL, scaling is done. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, THRESH + PARAMETER ( ONE = 1.0E+0, THRESH = 0.1E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, JC + REAL CJ, LARGE, SMALL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN +* +* No equilibration +* + EQUED = 'N' + ELSE +* +* Replace A by diag(S) * A * diag(S). +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Upper triangle of A is stored. +* + JC = 1 + DO 20 J = 1, N + CJ = S( J ) + DO 10 I = 1, J - 1 + AP( JC+I-1 ) = CJ*S( I )*AP( JC+I-1 ) + 10 CONTINUE + AP( JC+J-1 ) = CJ*CJ*REAL( AP( JC+J-1 ) ) + JC = JC + J + 20 CONTINUE + ELSE +* +* Lower triangle of A is stored. +* + JC = 1 + DO 40 J = 1, N + CJ = S( J ) + AP( JC ) = CJ*CJ*REAL( AP( JC ) ) + DO 30 I = J + 1, N + AP( JC+I-J ) = CJ*S( I )*AP( JC+I-J ) + 30 CONTINUE + JC = JC + N - J + 1 + 40 CONTINUE + END IF + EQUED = 'Y' + END IF +* + RETURN +* +* End of CLAQHP +* + END diff --git a/costa/native/external/lapack/claqp2.f b/costa/native/external/lapack/claqp2.f new file mode 100644 index 000000000..28bbe6414 --- /dev/null +++ b/costa/native/external/lapack/claqp2.f @@ -0,0 +1,170 @@ + SUBROUTINE CLAQP2( M, N, OFFSET, A, LDA, JPVT, TAU, VN1, VN2, + $ WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER LDA, M, N, OFFSET +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + REAL VN1( * ), VN2( * ) + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CLAQP2 computes a QR factorization with column pivoting of +* the block A(OFFSET+1:M,1:N). +* The block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* OFFSET (input) INTEGER +* The number of rows of the matrix A that must be pivoted +* but no factorized. OFFSET >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, the upper triangle of block A(OFFSET+1:M,1:N) is +* the triangular factor obtained; the elements in block +* A(OFFSET+1:M,1:N) below the diagonal, together with the +* array TAU, represent the orthogonal matrix Q as a product of +* elementary reflectors. Block A(1:OFFSET,1:N) has been +* accordingly pivoted, but no factorized. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* JPVT (input/output) INTEGER array, dimension (N) +* On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted +* to the front of A*P (a leading column); if JPVT(i) = 0, +* the i-th column of A is a free column. +* On exit, if JPVT(i) = k, then the i-th column of A*P +* was the k-th column of A. +* +* TAU (output) COMPLEX array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors. +* +* VN1 (input/output) REAL array, dimension (N) +* The vector with the partial column norms. +* +* VN2 (input/output) REAL array, dimension (N) +* The vector with the exact column norms. +* +* WORK (workspace) COMPLEX array, dimension (N) +* +* Further Details +* =============== +* +* Based on contributions by +* G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +* X. Sun, Computer Science Dept., Duke University, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + COMPLEX CONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, + $ CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, ITEMP, J, MN, OFFPI, PVT + REAL TEMP, TEMP2 + COMPLEX AII +* .. +* .. External Subroutines .. + EXTERNAL CLARF, CLARFG, CSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CONJG, MAX, MIN, SQRT +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SCNRM2 + EXTERNAL ISAMAX, SCNRM2 +* .. +* .. Executable Statements .. +* + MN = MIN( M-OFFSET, N ) +* +* Compute factorization. +* + DO 20 I = 1, MN +* + OFFPI = OFFSET + I +* +* Determine ith pivot column and swap if necessary. +* + PVT = ( I-1 ) + ISAMAX( N-I+1, VN1( I ), 1 ) +* + IF( PVT.NE.I ) THEN + CALL CSWAP( M, A( 1, PVT ), 1, A( 1, I ), 1 ) + ITEMP = JPVT( PVT ) + JPVT( PVT ) = JPVT( I ) + JPVT( I ) = ITEMP + VN1( PVT ) = VN1( I ) + VN2( PVT ) = VN2( I ) + END IF +* +* Generate elementary reflector H(i). +* + IF( OFFPI.LT.M ) THEN + CALL CLARFG( M-OFFPI+1, A( OFFPI, I ), A( OFFPI+1, I ), 1, + $ TAU( I ) ) + ELSE + CALL CLARFG( 1, A( M, I ), A( M, I ), 1, TAU( I ) ) + END IF +* + IF( I.LT.N ) THEN +* +* Apply H(i)' to A(offset+i:m,i+1:n) from the left. +* + AII = A( OFFPI, I ) + A( OFFPI, I ) = CONE + CALL CLARF( 'Left', M-OFFPI+1, N-I, A( OFFPI, I ), 1, + $ CONJG( TAU( I ) ), A( OFFPI, I+1 ), LDA, + $ WORK( 1 ) ) + A( OFFPI, I ) = AII + END IF +* +* Update partial column norms. +* + DO 10 J = I + 1, N + IF( VN1( J ).NE.ZERO ) THEN + TEMP = ONE - ( ABS( A( OFFPI, J ) ) / VN1( J ) )**2 + TEMP = MAX( TEMP, ZERO ) + TEMP2 = ONE + 0.05*TEMP*( VN1( J ) / VN2( J ) )**2 + IF( TEMP2.EQ.ONE ) THEN + IF( OFFPI.LT.M ) THEN + VN1( J ) = SCNRM2( M-OFFPI, A( OFFPI+1, J ), 1 ) + VN2( J ) = VN1( J ) + ELSE + VN1( J ) = ZERO + VN2( J ) = ZERO + END IF + ELSE + VN1( J ) = VN1( J )*SQRT( TEMP ) + END IF + END IF + 10 CONTINUE +* + 20 CONTINUE +* + RETURN +* +* End of CLAQP2 +* + END diff --git a/costa/native/external/lapack/claqps.f b/costa/native/external/lapack/claqps.f new file mode 100644 index 000000000..ebfc09ea8 --- /dev/null +++ b/costa/native/external/lapack/claqps.f @@ -0,0 +1,260 @@ + SUBROUTINE CLAQPS( M, N, OFFSET, NB, KB, A, LDA, JPVT, TAU, VN1, + $ VN2, AUXV, F, LDF ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER KB, LDA, LDF, M, N, NB, OFFSET +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + REAL VN1( * ), VN2( * ) + COMPLEX A( LDA, * ), AUXV( * ), F( LDF, * ), TAU( * ) +* .. +* +* Purpose +* ======= +* +* CLAQPS computes a step of QR factorization with column pivoting +* of a complex M-by-N matrix A by using Blas-3. It tries to factorize +* NB columns from A starting from the row OFFSET+1, and updates all +* of the matrix with Blas-3 xGEMM. +* +* In some cases, due to catastrophic cancellations, it cannot +* factorize NB columns. Hence, the actual number of factorized +* columns is returned in KB. +* +* Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0 +* +* OFFSET (input) INTEGER +* The number of rows of A that have been factorized in +* previous steps. +* +* NB (input) INTEGER +* The number of columns to factorize. +* +* KB (output) INTEGER +* The number of columns actually factorized. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, block A(OFFSET+1:M,1:KB) is the triangular +* factor obtained and block A(1:OFFSET,1:N) has been +* accordingly pivoted, but no factorized. +* The rest of the matrix, block A(OFFSET+1:M,KB+1:N) has +* been updated. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* JPVT (input/output) INTEGER array, dimension (N) +* JPVT(I) = K <==> Column K of the full matrix A has been +* permuted into position I in AP. +* +* TAU (output) COMPLEX array, dimension (KB) +* The scalar factors of the elementary reflectors. +* +* VN1 (input/output) REAL array, dimension (N) +* The vector with the partial column norms. +* +* VN2 (input/output) REAL array, dimension (N) +* The vector with the exact column norms. +* +* AUXV (input/output) COMPLEX array, dimension (NB) +* Auxiliar vector. +* +* F (input/output) COMPLEX array, dimension (LDF,NB) +* Matrix F' = L*Y'*A. +* +* LDF (input) INTEGER +* The leading dimension of the array F. LDF >= max(1,N). +* +* Further Details +* =============== +* +* Based on contributions by +* G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +* X. Sun, Computer Science Dept., Duke University, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + COMPLEX CZERO, CONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, + $ CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER ITEMP, J, K, LASTRK, LSTICC, PVT, RK + REAL TEMP, TEMP2 + COMPLEX AKK +* .. +* .. External Subroutines .. + EXTERNAL CGEMM, CGEMV, CLARFG, CSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CONJG, MAX, MIN, NINT, REAL, SQRT +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SCNRM2 + EXTERNAL ISAMAX, SCNRM2 +* .. +* .. Executable Statements .. +* + LASTRK = MIN( M, N+OFFSET ) + LSTICC = 0 + K = 0 +* +* Beginning of while loop. +* + 10 CONTINUE + IF( ( K.LT.NB ) .AND. ( LSTICC.EQ.0 ) ) THEN + K = K + 1 + RK = OFFSET + K +* +* Determine ith pivot column and swap if necessary +* + PVT = ( K-1 ) + ISAMAX( N-K+1, VN1( K ), 1 ) + IF( PVT.NE.K ) THEN + CALL CSWAP( M, A( 1, PVT ), 1, A( 1, K ), 1 ) + CALL CSWAP( K-1, F( PVT, 1 ), LDF, F( K, 1 ), LDF ) + ITEMP = JPVT( PVT ) + JPVT( PVT ) = JPVT( K ) + JPVT( K ) = ITEMP + VN1( PVT ) = VN1( K ) + VN2( PVT ) = VN2( K ) + END IF +* +* Apply previous Householder reflectors to column K: +* A(RK:M,K) := A(RK:M,K) - A(RK:M,1:K-1)*F(K,1:K-1)'. +* + IF( K.GT.1 ) THEN +*CC CALL CGEMM( 'No transpose', 'Conjugate transpose', +*CC $ M-RK+1, 1, K-1, -CONE, A( RK, 1 ), LDA, +*CC $ F( K, 1 ), LDF, CONE, A( RK, K ), LDA ) + DO 20 J = 1, K - 1 + F( K, J ) = CONJG( F( K, J ) ) + 20 CONTINUE + CALL CGEMV( 'No transpose', M-RK+1, K-1, -CONE, A( RK, 1 ), + $ LDA, F( K, 1 ), LDF, CONE, A( RK, K ), 1 ) + DO 30 J = 1, K - 1 + F( K, J ) = CONJG( F( K, J ) ) + 30 CONTINUE + END IF +* +* Generate elementary reflector H(k). +* + IF( RK.LT.M ) THEN + CALL CLARFG( M-RK+1, A( RK, K ), A( RK+1, K ), 1, TAU( K ) ) + ELSE + CALL CLARFG( 1, A( RK, K ), A( RK, K ), 1, TAU( K ) ) + END IF +* + AKK = A( RK, K ) + A( RK, K ) = CONE +* +* Compute Kth column of F: +* +* Compute F(K+1:N,K) := tau(K)*A(RK:M,K+1:N)'*A(RK:M,K). +* + IF( K.LT.N ) THEN + CALL CGEMV( 'Conjugate transpose', M-RK+1, N-K, TAU( K ), + $ A( RK, K+1 ), LDA, A( RK, K ), 1, CZERO, + $ F( K+1, K ), 1 ) + END IF +* +* Padding F(1:K,K) with zeros. +* + DO 40 J = 1, K + F( J, K ) = CZERO + 40 CONTINUE +* +* Incremental updating of F: +* F(1:N,K) := F(1:N,K) - tau(K)*F(1:N,1:K-1)*A(RK:M,1:K-1)' +* *A(RK:M,K). +* + IF( K.GT.1 ) THEN + CALL CGEMV( 'Conjugate transpose', M-RK+1, K-1, -TAU( K ), + $ A( RK, 1 ), LDA, A( RK, K ), 1, CZERO, + $ AUXV( 1 ), 1 ) +* + CALL CGEMV( 'No transpose', N, K-1, CONE, F( 1, 1 ), LDF, + $ AUXV( 1 ), 1, CONE, F( 1, K ), 1 ) + END IF +* +* Update the current row of A: +* A(RK,K+1:N) := A(RK,K+1:N) - A(RK,1:K)*F(K+1:N,1:K)'. +* + IF( K.LT.N ) THEN + CALL CGEMM( 'No transpose', 'Conjugate transpose', 1, N-K, + $ K, -CONE, A( RK, 1 ), LDA, F( K+1, 1 ), LDF, + $ CONE, A( RK, K+1 ), LDA ) + END IF +* +* Update partial column norms. +* + IF( RK.LT.LASTRK ) THEN + DO 50 J = K + 1, N + IF( VN1( J ).NE.ZERO ) THEN + TEMP = ABS( A( RK, J ) ) / VN1( J ) + TEMP = MAX( ZERO, ( ONE+TEMP )*( ONE-TEMP ) ) + TEMP2 = ONE + 0.05*TEMP*( VN1( J ) / VN2( J ) )**2 + IF( TEMP2.EQ.ONE ) THEN + VN2( J ) = REAL( LSTICC ) + LSTICC = J + ELSE + VN1( J ) = VN1( J )*SQRT( TEMP ) + END IF + END IF + 50 CONTINUE + END IF +* + A( RK, K ) = AKK +* +* End of while loop. +* + GO TO 10 + END IF + KB = K + RK = OFFSET + KB +* +* Apply the block reflector to the rest of the matrix: +* A(OFFSET+KB+1:M,KB+1:N) := A(OFFSET+KB+1:M,KB+1:N) - +* A(OFFSET+KB+1:M,1:KB)*F(KB+1:N,1:KB)'. +* + IF( KB.LT.MIN( N, M-OFFSET ) ) THEN + CALL CGEMM( 'No transpose', 'Conjugate transpose', M-RK, N-KB, + $ KB, -CONE, A( RK+1, 1 ), LDA, F( KB+1, 1 ), LDF, + $ CONE, A( RK+1, KB+1 ), LDA ) + END IF +* +* Recomputation of difficult columns. +* + 60 CONTINUE + IF( LSTICC.GT.0 ) THEN + ITEMP = NINT( VN2( LSTICC ) ) + VN1( LSTICC ) = SCNRM2( M-RK, A( RK+1, LSTICC ), 1 ) + VN2( LSTICC ) = VN1( LSTICC ) + LSTICC = ITEMP + GO TO 60 + END IF +* + RETURN +* +* End of CLAQPS +* + END diff --git a/costa/native/external/lapack/claqsb.f b/costa/native/external/lapack/claqsb.f new file mode 100644 index 000000000..8522262dc --- /dev/null +++ b/costa/native/external/lapack/claqsb.f @@ -0,0 +1,150 @@ + SUBROUTINE CLAQSB( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER EQUED, UPLO + INTEGER KD, LDAB, N + REAL AMAX, SCOND +* .. +* .. Array Arguments .. + REAL S( * ) + COMPLEX AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* CLAQSB equilibrates a symmetric band matrix A using the scaling +* factors in the vector S. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is stored. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of super-diagonals of the matrix A if UPLO = 'U', +* or the number of sub-diagonals if UPLO = 'L'. KD >= 0. +* +* AB (input/output) COMPLEX array, dimension (LDAB,N) +* On entry, the upper or lower triangle of the symmetric band +* matrix A, stored in the first KD+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* On exit, if INFO = 0, the triangular factor U or L from the +* Cholesky factorization A = U'*U or A = L*L' of the band +* matrix A, in the same storage format as A. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* S (output) REAL array, dimension (N) +* The scale factors for A. +* +* SCOND (input) REAL +* Ratio of the smallest S(i) to the largest S(i). +* +* AMAX (input) REAL +* Absolute value of largest matrix entry. +* +* EQUED (output) CHARACTER*1 +* Specifies whether or not equilibration was done. +* = 'N': No equilibration. +* = 'Y': Equilibration was done, i.e., A has been replaced by +* diag(S) * A * diag(S). +* +* Internal Parameters +* =================== +* +* THRESH is a threshold value used to decide if scaling should be done +* based on the ratio of the scaling factors. If SCOND < THRESH, +* scaling is done. +* +* LARGE and SMALL are threshold values used to decide if scaling should +* be done based on the absolute size of the largest matrix element. +* If AMAX > LARGE or AMAX < SMALL, scaling is done. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, THRESH + PARAMETER ( ONE = 1.0E+0, THRESH = 0.1E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL CJ, LARGE, SMALL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN +* +* No equilibration +* + EQUED = 'N' + ELSE +* +* Replace A by diag(S) * A * diag(S). +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Upper triangle of A is stored in band format. +* + DO 20 J = 1, N + CJ = S( J ) + DO 10 I = MAX( 1, J-KD ), J + AB( KD+1+I-J, J ) = CJ*S( I )*AB( KD+1+I-J, J ) + 10 CONTINUE + 20 CONTINUE + ELSE +* +* Lower triangle of A is stored. +* + DO 40 J = 1, N + CJ = S( J ) + DO 30 I = J, MIN( N, J+KD ) + AB( 1+I-J, J ) = CJ*S( I )*AB( 1+I-J, J ) + 30 CONTINUE + 40 CONTINUE + END IF + EQUED = 'Y' + END IF +* + RETURN +* +* End of CLAQSB +* + END diff --git a/costa/native/external/lapack/claqsp.f b/costa/native/external/lapack/claqsp.f new file mode 100644 index 000000000..591d0ef44 --- /dev/null +++ b/costa/native/external/lapack/claqsp.f @@ -0,0 +1,142 @@ + SUBROUTINE CLAQSP( UPLO, N, AP, S, SCOND, AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER EQUED, UPLO + INTEGER N + REAL AMAX, SCOND +* .. +* .. Array Arguments .. + REAL S( * ) + COMPLEX AP( * ) +* .. +* +* Purpose +* ======= +* +* CLAQSP equilibrates a symmetric matrix A using the scaling factors +* in the vector S. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is stored. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) COMPLEX array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the symmetric matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* +* On exit, the equilibrated matrix: diag(S) * A * diag(S), in +* the same storage format as A. +* +* S (input) REAL array, dimension (N) +* The scale factors for A. +* +* SCOND (input) REAL +* Ratio of the smallest S(i) to the largest S(i). +* +* AMAX (input) REAL +* Absolute value of largest matrix entry. +* +* EQUED (output) CHARACTER*1 +* Specifies whether or not equilibration was done. +* = 'N': No equilibration. +* = 'Y': Equilibration was done, i.e., A has been replaced by +* diag(S) * A * diag(S). +* +* Internal Parameters +* =================== +* +* THRESH is a threshold value used to decide if scaling should be done +* based on the ratio of the scaling factors. If SCOND < THRESH, +* scaling is done. +* +* LARGE and SMALL are threshold values used to decide if scaling should +* be done based on the absolute size of the largest matrix element. +* If AMAX > LARGE or AMAX < SMALL, scaling is done. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, THRESH + PARAMETER ( ONE = 1.0E+0, THRESH = 0.1E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, JC + REAL CJ, LARGE, SMALL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN +* +* No equilibration +* + EQUED = 'N' + ELSE +* +* Replace A by diag(S) * A * diag(S). +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Upper triangle of A is stored. +* + JC = 1 + DO 20 J = 1, N + CJ = S( J ) + DO 10 I = 1, J + AP( JC+I-1 ) = CJ*S( I )*AP( JC+I-1 ) + 10 CONTINUE + JC = JC + J + 20 CONTINUE + ELSE +* +* Lower triangle of A is stored. +* + JC = 1 + DO 40 J = 1, N + CJ = S( J ) + DO 30 I = J, N + AP( JC+I-J ) = CJ*S( I )*AP( JC+I-J ) + 30 CONTINUE + JC = JC + N - J + 1 + 40 CONTINUE + END IF + EQUED = 'Y' + END IF +* + RETURN +* +* End of CLAQSP +* + END diff --git a/costa/native/external/lapack/claqsy.f b/costa/native/external/lapack/claqsy.f new file mode 100644 index 000000000..e920873c5 --- /dev/null +++ b/costa/native/external/lapack/claqsy.f @@ -0,0 +1,143 @@ + SUBROUTINE CLAQSY( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER EQUED, UPLO + INTEGER LDA, N + REAL AMAX, SCOND +* .. +* .. Array Arguments .. + REAL S( * ) + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CLAQSY equilibrates a symmetric matrix A using the scaling factors +* in the vector S. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is stored. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the symmetric matrix A. If UPLO = 'U', the leading +* n by n upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n by n lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, if EQUED = 'Y', the equilibrated matrix: +* diag(S) * A * diag(S). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(N,1). +* +* S (input) REAL array, dimension (N) +* The scale factors for A. +* +* SCOND (input) REAL +* Ratio of the smallest S(i) to the largest S(i). +* +* AMAX (input) REAL +* Absolute value of largest matrix entry. +* +* EQUED (output) CHARACTER*1 +* Specifies whether or not equilibration was done. +* = 'N': No equilibration. +* = 'Y': Equilibration was done, i.e., A has been replaced by +* diag(S) * A * diag(S). +* +* Internal Parameters +* =================== +* +* THRESH is a threshold value used to decide if scaling should be done +* based on the ratio of the scaling factors. If SCOND < THRESH, +* scaling is done. +* +* LARGE and SMALL are threshold values used to decide if scaling should +* be done based on the absolute size of the largest matrix element. +* If AMAX > LARGE or AMAX < SMALL, scaling is done. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, THRESH + PARAMETER ( ONE = 1.0E+0, THRESH = 0.1E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL CJ, LARGE, SMALL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN +* +* No equilibration +* + EQUED = 'N' + ELSE +* +* Replace A by diag(S) * A * diag(S). +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Upper triangle of A is stored. +* + DO 20 J = 1, N + CJ = S( J ) + DO 10 I = 1, J + A( I, J ) = CJ*S( I )*A( I, J ) + 10 CONTINUE + 20 CONTINUE + ELSE +* +* Lower triangle of A is stored. +* + DO 40 J = 1, N + CJ = S( J ) + DO 30 I = J, N + A( I, J ) = CJ*S( I )*A( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + EQUED = 'Y' + END IF +* + RETURN +* +* End of CLAQSY +* + END diff --git a/costa/native/external/lapack/clar1v.f b/costa/native/external/lapack/clar1v.f new file mode 100644 index 000000000..558a4dade --- /dev/null +++ b/costa/native/external/lapack/clar1v.f @@ -0,0 +1,328 @@ + SUBROUTINE CLAR1V( N, B1, BN, SIGMA, D, L, LD, LLD, GERSCH, Z, + $ ZTZ, MINGMA, R, ISUPPZ, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER B1, BN, N, R + REAL MINGMA, SIGMA, ZTZ +* .. +* .. Array Arguments .. + INTEGER ISUPPZ( * ) + REAL D( * ), GERSCH( * ), L( * ), LD( * ), LLD( * ), + $ WORK( * ) + COMPLEX Z( * ) +* .. +* +* Purpose +* ======= +* +* CLAR1V computes the (scaled) r-th column of the inverse of +* the sumbmatrix in rows B1 through BN of the tridiagonal matrix +* L D L^T - sigma I. The following steps accomplish this computation : +* (a) Stationary qd transform, L D L^T - sigma I = L(+) D(+) L(+)^T, +* (b) Progressive qd transform, L D L^T - sigma I = U(-) D(-) U(-)^T, +* (c) Computation of the diagonal elements of the inverse of +* L D L^T - sigma I by combining the above transforms, and choosing +* r as the index where the diagonal of the inverse is (one of the) +* largest in magnitude. +* (d) Computation of the (scaled) r-th column of the inverse using the +* twisted factorization obtained by combining the top part of the +* the stationary and the bottom part of the progressive transform. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix L D L^T. +* +* B1 (input) INTEGER +* First index of the submatrix of L D L^T. +* +* BN (input) INTEGER +* Last index of the submatrix of L D L^T. +* +* SIGMA (input) REAL +* The shift. Initially, when R = 0, SIGMA should be a good +* approximation to an eigenvalue of L D L^T. +* +* L (input) REAL array, dimension (N-1) +* The (n-1) subdiagonal elements of the unit bidiagonal matrix +* L, in elements 1 to N-1. +* +* D (input) REAL array, dimension (N) +* The n diagonal elements of the diagonal matrix D. +* +* LD (input) REAL array, dimension (N-1) +* The n-1 elements L(i)*D(i). +* +* LLD (input) REAL array, dimension (N-1) +* The n-1 elements L(i)*L(i)*D(i). +* +* GERSCH (input) REAL array, dimension (2*N) +* The n Gerschgorin intervals. These are used to restrict +* the initial search for R, when R is input as 0. +* +* Z (output) COMPLEX array, dimension (N) +* The (scaled) r-th column of the inverse. Z(R) is returned +* to be 1. +* +* ZTZ (output) REAL +* The square of the norm of Z. +* +* MINGMA (output) REAL +* The reciprocal of the largest (in magnitude) diagonal +* element of the inverse of L D L^T - sigma I. +* +* R (input/output) INTEGER +* Initially, R should be input to be 0 and is then output as +* the index where the diagonal element of the inverse is +* largest in magnitude. In later iterations, this same value +* of R should be input. +* +* ISUPPZ (output) INTEGER array, dimension (2) +* The support of the vector in Z, i.e., the vector Z is +* nonzero only in elements ISUPPZ(1) through ISUPPZ( 2 ). +* +* WORK (workspace) REAL array, dimension (4*N) +* +* Further Details +* =============== +* +* Based on contributions by +* Inderjit Dhillon, IBM Almaden, USA +* Osni Marques, LBNL/NERSC, USA +* Ken Stanley, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + INTEGER BLKSIZ + PARAMETER ( BLKSIZ = 32 ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + LOGICAL SAWNAN + INTEGER FROM, I, INDP, INDS, INDUMN, J, R1, R2, TO + REAL DMINUS, DPLUS, EPS, S, TMP +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, REAL +* .. +* .. Executable Statements .. +* + EPS = SLAMCH( 'Precision' ) + IF( R.EQ.0 ) THEN +* +* Eliminate the top and bottom indices from the possible values +* of R where the desired eigenvector is largest in magnitude. +* + R1 = B1 + DO 10 I = B1, BN + IF( SIGMA.GE.GERSCH( 2*I-1 ) .OR. SIGMA.LE.GERSCH( 2*I ) ) + $ THEN + R1 = I + GO TO 20 + END IF + 10 CONTINUE + 20 CONTINUE + R2 = BN + DO 30 I = BN, B1, -1 + IF( SIGMA.GE.GERSCH( 2*I-1 ) .OR. SIGMA.LE.GERSCH( 2*I ) ) + $ THEN + R2 = I + GO TO 40 + END IF + 30 CONTINUE + 40 CONTINUE + ELSE + R1 = R + R2 = R + END IF +* + INDUMN = N + INDS = 2*N + 1 + INDP = 3*N + 1 + SAWNAN = .FALSE. +* +* Compute the stationary transform (using the differential form) +* untill the index R2 +* + IF( B1.EQ.1 ) THEN + WORK( INDS ) = ZERO + ELSE + WORK( INDS ) = LLD( B1-1 ) + END IF + S = WORK( INDS ) - SIGMA + DO 50 I = B1, R2 - 1 + DPLUS = D( I ) + S + WORK( I ) = LD( I ) / DPLUS + WORK( INDS+I ) = S*WORK( I )*L( I ) + S = WORK( INDS+I ) - SIGMA + 50 CONTINUE +* + IF( .NOT.( S.GT.ZERO .OR. S.LT.ONE ) ) THEN +* +* Run a slower version of the above loop if a NaN is detected +* + SAWNAN = .TRUE. + J = B1 + 1 + 60 CONTINUE + IF( WORK( INDS+J ).GT.ZERO .OR. WORK( INDS+J ).LT.ONE ) THEN + J = J + 1 + GO TO 60 + END IF + WORK( INDS+J ) = LLD( J ) + S = WORK( INDS+J ) - SIGMA + DO 70 I = J + 1, R2 - 1 + DPLUS = D( I ) + S + WORK( I ) = LD( I ) / DPLUS + IF( WORK( I ).EQ.ZERO ) THEN + WORK( INDS+I ) = LLD( I ) + ELSE + WORK( INDS+I ) = S*WORK( I )*L( I ) + END IF + S = WORK( INDS+I ) - SIGMA + 70 CONTINUE + END IF + WORK( INDP+BN-1 ) = D( BN ) - SIGMA + DO 80 I = BN - 1, R1, -1 + DMINUS = LLD( I ) + WORK( INDP+I ) + TMP = D( I ) / DMINUS + WORK( INDUMN+I ) = L( I )*TMP + WORK( INDP+I-1 ) = WORK( INDP+I )*TMP - SIGMA + 80 CONTINUE + TMP = WORK( INDP+R1-1 ) + IF( .NOT.( TMP.GT.ZERO .OR. TMP.LT.ONE ) ) THEN +* +* Run a slower version of the above loop if a NaN is detected +* + SAWNAN = .TRUE. + J = BN - 3 + 90 CONTINUE + IF( WORK( INDP+J ).GT.ZERO .OR. WORK( INDP+J ).LT.ONE ) THEN + J = J - 1 + GO TO 90 + END IF + WORK( INDP+J ) = D( J+1 ) - SIGMA + DO 100 I = J, R1, -1 + DMINUS = LLD( I ) + WORK( INDP+I ) + TMP = D( I ) / DMINUS + WORK( INDUMN+I ) = L( I )*TMP + IF( TMP.EQ.ZERO ) THEN + WORK( INDP+I-1 ) = D( I ) - SIGMA + ELSE + WORK( INDP+I-1 ) = WORK( INDP+I )*TMP - SIGMA + END IF + 100 CONTINUE + END IF +* +* Find the index (from R1 to R2) of the largest (in magnitude) +* diagonal element of the inverse +* + MINGMA = WORK( INDS+R1-1 ) + WORK( INDP+R1-1 ) + IF( MINGMA.EQ.ZERO ) + $ MINGMA = EPS*WORK( INDS+R1-1 ) + R = R1 + DO 110 I = R1, R2 - 1 + TMP = WORK( INDS+I ) + WORK( INDP+I ) + IF( TMP.EQ.ZERO ) + $ TMP = EPS*WORK( INDS+I ) + IF( ABS( TMP ).LT.ABS( MINGMA ) ) THEN + MINGMA = TMP + R = I + 1 + END IF + 110 CONTINUE +* +* Compute the (scaled) r-th column of the inverse +* + ISUPPZ( 1 ) = B1 + ISUPPZ( 2 ) = BN + Z( R ) = CONE + ZTZ = ONE + IF( .NOT.SAWNAN ) THEN + FROM = R - 1 + TO = MAX( R-BLKSIZ, B1 ) + 120 CONTINUE + IF( FROM.GE.B1 ) THEN + DO 130 I = FROM, TO, -1 + Z( I ) = -( WORK( I )*Z( I+1 ) ) + ZTZ = ZTZ + REAL( Z( I )*Z( I ) ) + 130 CONTINUE + IF( ABS( Z( TO ) ).LE.EPS .AND. ABS( Z( TO+1 ) ).LE.EPS ) + $ THEN + ISUPPZ( 1 ) = TO + 2 + ELSE + FROM = TO - 1 + TO = MAX( TO-BLKSIZ, B1 ) + GO TO 120 + END IF + END IF + FROM = R + 1 + TO = MIN( R+BLKSIZ, BN ) + 140 CONTINUE + IF( FROM.LE.BN ) THEN + DO 150 I = FROM, TO + Z( I ) = -( WORK( INDUMN+I-1 )*Z( I-1 ) ) + ZTZ = ZTZ + REAL( Z( I )*Z( I ) ) + 150 CONTINUE + IF( ABS( Z( TO ) ).LE.EPS .AND. ABS( Z( TO-1 ) ).LE.EPS ) + $ THEN + ISUPPZ( 2 ) = TO - 2 + ELSE + FROM = TO + 1 + TO = MIN( TO+BLKSIZ, BN ) + GO TO 140 + END IF + END IF + ELSE + DO 160 I = R - 1, B1, -1 + IF( Z( I+1 ).EQ.ZERO ) THEN + Z( I ) = -( LD( I+1 ) / LD( I ) )*Z( I+2 ) + ELSE IF( ABS( Z( I+1 ) ).LE.EPS .AND. ABS( Z( I+2 ) ).LE. + $ EPS ) THEN + ISUPPZ( 1 ) = I + 3 + GO TO 170 + ELSE + Z( I ) = -( WORK( I )*Z( I+1 ) ) + END IF + ZTZ = ZTZ + REAL( Z( I )*Z( I ) ) + 160 CONTINUE + 170 CONTINUE + DO 180 I = R, BN - 1 + IF( Z( I ).EQ.ZERO ) THEN + Z( I+1 ) = -( LD( I-1 ) / LD( I ) )*Z( I-1 ) + ELSE IF( ABS( Z( I ) ).LE.EPS .AND. ABS( Z( I-1 ) ).LE.EPS ) + $ THEN + ISUPPZ( 2 ) = I - 2 + GO TO 190 + ELSE + Z( I+1 ) = -( WORK( INDUMN+I )*Z( I ) ) + END IF + ZTZ = ZTZ + REAL( Z( I+1 )*Z( I+1 ) ) + 180 CONTINUE + 190 CONTINUE + END IF + DO 200 I = B1, ISUPPZ( 1 ) - 3 + Z( I ) = ZERO + 200 CONTINUE + DO 210 I = ISUPPZ( 2 ) + 3, BN + Z( I ) = ZERO + 210 CONTINUE +* + RETURN +* +* End of CLAR1V +* + END diff --git a/costa/native/external/lapack/clar2v.f b/costa/native/external/lapack/clar2v.f new file mode 100644 index 000000000..e883e37ce --- /dev/null +++ b/costa/native/external/lapack/clar2v.f @@ -0,0 +1,98 @@ + SUBROUTINE CLAR2V( N, X, Y, Z, INCX, C, S, INCC ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + INTEGER INCC, INCX, N +* .. +* .. Array Arguments .. + REAL C( * ) + COMPLEX S( * ), X( * ), Y( * ), Z( * ) +* .. +* +* Purpose +* ======= +* +* CLAR2V applies a vector of complex plane rotations with real cosines +* from both sides to a sequence of 2-by-2 complex Hermitian matrices, +* defined by the elements of the vectors x, y and z. For i = 1,2,...,n +* +* ( x(i) z(i) ) := +* ( conjg(z(i)) y(i) ) +* +* ( c(i) conjg(s(i)) ) ( x(i) z(i) ) ( c(i) -conjg(s(i)) ) +* ( -s(i) c(i) ) ( conjg(z(i)) y(i) ) ( s(i) c(i) ) +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of plane rotations to be applied. +* +* X (input/output) COMPLEX array, dimension (1+(N-1)*INCX) +* The vector x; the elements of x are assumed to be real. +* +* Y (input/output) COMPLEX array, dimension (1+(N-1)*INCX) +* The vector y; the elements of y are assumed to be real. +* +* Z (input/output) COMPLEX array, dimension (1+(N-1)*INCX) +* The vector z. +* +* INCX (input) INTEGER +* The increment between elements of X, Y and Z. INCX > 0. +* +* C (input) REAL array, dimension (1+(N-1)*INCC) +* The cosines of the plane rotations. +* +* S (input) COMPLEX array, dimension (1+(N-1)*INCC) +* The sines of the plane rotations. +* +* INCC (input) INTEGER +* The increment between elements of C and S. INCC > 0. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IC, IX + REAL CI, SII, SIR, T1I, T1R, T5, T6, XI, YI, ZII, + $ ZIR + COMPLEX SI, T2, T3, T4, ZI +* .. +* .. Intrinsic Functions .. + INTRINSIC AIMAG, CMPLX, CONJG, REAL +* .. +* .. Executable Statements .. +* + IX = 1 + IC = 1 + DO 10 I = 1, N + XI = REAL( X( IX ) ) + YI = REAL( Y( IX ) ) + ZI = Z( IX ) + ZIR = REAL( ZI ) + ZII = AIMAG( ZI ) + CI = C( IC ) + SI = S( IC ) + SIR = REAL( SI ) + SII = AIMAG( SI ) + T1R = SIR*ZIR - SII*ZII + T1I = SIR*ZII + SII*ZIR + T2 = CI*ZI + T3 = T2 - CONJG( SI )*XI + T4 = CONJG( T2 ) + SI*YI + T5 = CI*XI + T1R + T6 = CI*YI - T1R + X( IX ) = CI*T5 + ( SIR*REAL( T4 )+SII*AIMAG( T4 ) ) + Y( IX ) = CI*T6 - ( SIR*REAL( T3 )-SII*AIMAG( T3 ) ) + Z( IX ) = CI*T3 + CONJG( SI )*CMPLX( T6, T1I ) + IX = IX + INCX + IC = IC + INCC + 10 CONTINUE + RETURN +* +* End of CLAR2V +* + END diff --git a/costa/native/external/lapack/clarcm.f b/costa/native/external/lapack/clarcm.f new file mode 100644 index 000000000..d8b197856 --- /dev/null +++ b/costa/native/external/lapack/clarcm.f @@ -0,0 +1,111 @@ + SUBROUTINE CLARCM( M, N, A, LDA, B, LDB, C, LDC, RWORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER LDA, LDB, LDC, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), RWORK( * ) + COMPLEX B( LDB, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* CLARCM performs a very simple matrix-matrix multiplication: +* C := A * B, +* where A is M by M and real; B is M by N and complex; +* C is M by N and complex. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A and of the matrix C. +* M >= 0. +* +* N (input) INTEGER +* The number of columns and rows of the matrix B and +* the number of columns of the matrix C. +* N >= 0. +* +* A (input) REAL array, dimension (LDA, M) +* A contains the M by M matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >=max(1,M). +* +* B (input) REAL array, dimension (LDB, N) +* B contains the M by N matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >=max(1,M). +* +* C (input) COMPLEX array, dimension (LDC, N) +* C contains the M by N matrix C. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >=max(1,M). +* +* RWORK (workspace) REAL array, dimension (2*M*N) +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E0, ZERO = 0.0E0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, L +* .. +* .. Intrinsic Functions .. + INTRINSIC AIMAG, CMPLX, REAL +* .. +* .. External Subroutines .. + EXTERNAL SGEMM +* .. +* .. Executable Statements .. +* +* Quick return if possible. +* + IF( ( M.EQ.0 ) .OR. ( N.EQ.0 ) ) + $ RETURN +* + DO 20 J = 1, N + DO 10 I = 1, M + RWORK( ( J-1 )*M+I ) = REAL( B( I, J ) ) + 10 CONTINUE + 20 CONTINUE +* + L = M*N + 1 + CALL SGEMM( 'N', 'N', M, N, M, ONE, A, LDA, RWORK, M, ZERO, + $ RWORK( L ), M ) + DO 40 J = 1, N + DO 30 I = 1, M + C( I, J ) = RWORK( L+( J-1 )*M+I-1 ) + 30 CONTINUE + 40 CONTINUE +* + DO 60 J = 1, N + DO 50 I = 1, M + RWORK( ( J-1 )*M+I ) = AIMAG( B( I, J ) ) + 50 CONTINUE + 60 CONTINUE + CALL SGEMM( 'N', 'N', M, N, M, ONE, A, LDA, RWORK, M, ZERO, + $ RWORK( L ), M ) + DO 80 J = 1, N + DO 70 I = 1, M + C( I, J ) = CMPLX( REAL( C( I, J ) ), + $ RWORK( L+( J-1 )*M+I-1 ) ) + 70 CONTINUE + 80 CONTINUE +* + RETURN +* +* End of CLARCM +* + END diff --git a/costa/native/external/lapack/clarf.f b/costa/native/external/lapack/clarf.f new file mode 100644 index 000000000..560d00867 --- /dev/null +++ b/costa/native/external/lapack/clarf.f @@ -0,0 +1,121 @@ + SUBROUTINE CLARF( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, LDC, M, N + COMPLEX TAU +* .. +* .. Array Arguments .. + COMPLEX C( LDC, * ), V( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CLARF applies a complex elementary reflector H to a complex M-by-N +* matrix C, from either the left or the right. H is represented in the +* form +* +* H = I - tau * v * v' +* +* where tau is a complex scalar and v is a complex vector. +* +* If tau = 0, then H is taken to be the unit matrix. +* +* To apply H' (the conjugate transpose of H), supply conjg(tau) instead +* tau. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': form H * C +* = 'R': form C * H +* +* M (input) INTEGER +* The number of rows of the matrix C. +* +* N (input) INTEGER +* The number of columns of the matrix C. +* +* V (input) COMPLEX array, dimension +* (1 + (M-1)*abs(INCV)) if SIDE = 'L' +* or (1 + (N-1)*abs(INCV)) if SIDE = 'R' +* The vector v in the representation of H. V is not used if +* TAU = 0. +* +* INCV (input) INTEGER +* The increment between elements of v. INCV <> 0. +* +* TAU (input) COMPLEX +* The value tau in the representation of H. +* +* C (input/output) COMPLEX array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by the matrix H * C if SIDE = 'L', +* or C * H if SIDE = 'R'. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) COMPLEX array, dimension +* (N) if SIDE = 'L' +* or (M) if SIDE = 'R' +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, CGERC +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C +* + IF( TAU.NE.ZERO ) THEN +* +* w := C' * v +* + CALL CGEMV( 'Conjugate transpose', M, N, ONE, C, LDC, V, + $ INCV, ZERO, WORK, 1 ) +* +* C := C - v * w' +* + CALL CGERC( M, N, -TAU, V, INCV, WORK, 1, C, LDC ) + END IF + ELSE +* +* Form C * H +* + IF( TAU.NE.ZERO ) THEN +* +* w := C * v +* + CALL CGEMV( 'No transpose', M, N, ONE, C, LDC, V, INCV, + $ ZERO, WORK, 1 ) +* +* C := C - w * v' +* + CALL CGERC( M, N, -TAU, WORK, 1, V, INCV, C, LDC ) + END IF + END IF + RETURN +* +* End of CLARF +* + END diff --git a/costa/native/external/lapack/clarfb.f b/costa/native/external/lapack/clarfb.f new file mode 100644 index 000000000..0f219a17c --- /dev/null +++ b/costa/native/external/lapack/clarfb.f @@ -0,0 +1,609 @@ + SUBROUTINE CLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, LDV, + $ T, LDT, C, LDC, WORK, LDWORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, SIDE, STOREV, TRANS + INTEGER K, LDC, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX C( LDC, * ), T( LDT, * ), V( LDV, * ), + $ WORK( LDWORK, * ) +* .. +* +* Purpose +* ======= +* +* CLARFB applies a complex block reflector H or its transpose H' to a +* complex M-by-N matrix C, from either the left or the right. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply H or H' from the Left +* = 'R': apply H or H' from the Right +* +* TRANS (input) CHARACTER*1 +* = 'N': apply H (No transpose) +* = 'C': apply H' (Conjugate transpose) +* +* DIRECT (input) CHARACTER*1 +* Indicates how H is formed from a product of elementary +* reflectors +* = 'F': H = H(1) H(2) . . . H(k) (Forward) +* = 'B': H = H(k) . . . H(2) H(1) (Backward) +* +* STOREV (input) CHARACTER*1 +* Indicates how the vectors which define the elementary +* reflectors are stored: +* = 'C': Columnwise +* = 'R': Rowwise +* +* M (input) INTEGER +* The number of rows of the matrix C. +* +* N (input) INTEGER +* The number of columns of the matrix C. +* +* K (input) INTEGER +* The order of the matrix T (= the number of elementary +* reflectors whose product defines the block reflector). +* +* V (input) COMPLEX array, dimension +* (LDV,K) if STOREV = 'C' +* (LDV,M) if STOREV = 'R' and SIDE = 'L' +* (LDV,N) if STOREV = 'R' and SIDE = 'R' +* The matrix V. See further details. +* +* LDV (input) INTEGER +* The leading dimension of the array V. +* If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M); +* if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N); +* if STOREV = 'R', LDV >= K. +* +* T (input) COMPLEX array, dimension (LDT,K) +* The triangular K-by-K matrix T in the representation of the +* block reflector. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= K. +* +* C (input/output) COMPLEX array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by H*C or H'*C or C*H or C*H'. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) COMPLEX array, dimension (LDWORK,K) +* +* LDWORK (input) INTEGER +* The leading dimension of the array WORK. +* If SIDE = 'L', LDWORK >= max(1,N); +* if SIDE = 'R', LDWORK >= max(1,M). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + CHARACTER TRANST + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGEMM, CLACGV, CTRMM +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) + $ RETURN +* + IF( LSAME( TRANS, 'N' ) ) THEN + TRANST = 'C' + ELSE + TRANST = 'N' + END IF +* + IF( LSAME( STOREV, 'C' ) ) THEN +* + IF( LSAME( DIRECT, 'F' ) ) THEN +* +* Let V = ( V1 ) (first K rows) +* ( V2 ) +* where V1 is unit lower triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H' * C where C = ( C1 ) +* ( C2 ) +* +* W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) +* +* W := C1' +* + DO 10 J = 1, K + CALL CCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + CALL CLACGV( N, WORK( 1, J ), 1 ) + 10 CONTINUE +* +* W := W * V1 +* + CALL CTRMM( 'Right', 'Lower', 'No transpose', 'Unit', N, + $ K, ONE, V, LDV, WORK, LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C2'*V2 +* + CALL CGEMM( 'Conjugate transpose', 'No transpose', N, + $ K, M-K, ONE, C( K+1, 1 ), LDC, + $ V( K+1, 1 ), LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T' or W * T +* + CALL CTRMM( 'Right', 'Upper', TRANST, 'Non-unit', N, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V * W' +* + IF( M.GT.K ) THEN +* +* C2 := C2 - V2 * W' +* + CALL CGEMM( 'No transpose', 'Conjugate transpose', + $ M-K, N, K, -ONE, V( K+1, 1 ), LDV, WORK, + $ LDWORK, ONE, C( K+1, 1 ), LDC ) + END IF +* +* W := W * V1' +* + CALL CTRMM( 'Right', 'Lower', 'Conjugate transpose', + $ 'Unit', N, K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W' +* + DO 30 J = 1, K + DO 20 I = 1, N + C( J, I ) = C( J, I ) - CONJG( WORK( I, J ) ) + 20 CONTINUE + 30 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H' where C = ( C1 C2 ) +* +* W := C * V = (C1*V1 + C2*V2) (stored in WORK) +* +* W := C1 +* + DO 40 J = 1, K + CALL CCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 ) + 40 CONTINUE +* +* W := W * V1 +* + CALL CTRMM( 'Right', 'Lower', 'No transpose', 'Unit', M, + $ K, ONE, V, LDV, WORK, LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C2 * V2 +* + CALL CGEMM( 'No transpose', 'No transpose', M, K, N-K, + $ ONE, C( 1, K+1 ), LDC, V( K+1, 1 ), LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T' +* + CALL CTRMM( 'Right', 'Upper', TRANS, 'Non-unit', M, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V' +* + IF( N.GT.K ) THEN +* +* C2 := C2 - W * V2' +* + CALL CGEMM( 'No transpose', 'Conjugate transpose', M, + $ N-K, K, -ONE, WORK, LDWORK, V( K+1, 1 ), + $ LDV, ONE, C( 1, K+1 ), LDC ) + END IF +* +* W := W * V1' +* + CALL CTRMM( 'Right', 'Lower', 'Conjugate transpose', + $ 'Unit', M, K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 60 J = 1, K + DO 50 I = 1, M + C( I, J ) = C( I, J ) - WORK( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + ELSE +* +* Let V = ( V1 ) +* ( V2 ) (last K rows) +* where V2 is unit upper triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H' * C where C = ( C1 ) +* ( C2 ) +* +* W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) +* +* W := C2' +* + DO 70 J = 1, K + CALL CCOPY( N, C( M-K+J, 1 ), LDC, WORK( 1, J ), 1 ) + CALL CLACGV( N, WORK( 1, J ), 1 ) + 70 CONTINUE +* +* W := W * V2 +* + CALL CTRMM( 'Right', 'Upper', 'No transpose', 'Unit', N, + $ K, ONE, V( M-K+1, 1 ), LDV, WORK, LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C1'*V1 +* + CALL CGEMM( 'Conjugate transpose', 'No transpose', N, + $ K, M-K, ONE, C, LDC, V, LDV, ONE, WORK, + $ LDWORK ) + END IF +* +* W := W * T' or W * T +* + CALL CTRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V * W' +* + IF( M.GT.K ) THEN +* +* C1 := C1 - V1 * W' +* + CALL CGEMM( 'No transpose', 'Conjugate transpose', + $ M-K, N, K, -ONE, V, LDV, WORK, LDWORK, + $ ONE, C, LDC ) + END IF +* +* W := W * V2' +* + CALL CTRMM( 'Right', 'Upper', 'Conjugate transpose', + $ 'Unit', N, K, ONE, V( M-K+1, 1 ), LDV, WORK, + $ LDWORK ) +* +* C2 := C2 - W' +* + DO 90 J = 1, K + DO 80 I = 1, N + C( M-K+J, I ) = C( M-K+J, I ) - + $ CONJG( WORK( I, J ) ) + 80 CONTINUE + 90 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H' where C = ( C1 C2 ) +* +* W := C * V = (C1*V1 + C2*V2) (stored in WORK) +* +* W := C2 +* + DO 100 J = 1, K + CALL CCOPY( M, C( 1, N-K+J ), 1, WORK( 1, J ), 1 ) + 100 CONTINUE +* +* W := W * V2 +* + CALL CTRMM( 'Right', 'Upper', 'No transpose', 'Unit', M, + $ K, ONE, V( N-K+1, 1 ), LDV, WORK, LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C1 * V1 +* + CALL CGEMM( 'No transpose', 'No transpose', M, K, N-K, + $ ONE, C, LDC, V, LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T' +* + CALL CTRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V' +* + IF( N.GT.K ) THEN +* +* C1 := C1 - W * V1' +* + CALL CGEMM( 'No transpose', 'Conjugate transpose', M, + $ N-K, K, -ONE, WORK, LDWORK, V, LDV, ONE, + $ C, LDC ) + END IF +* +* W := W * V2' +* + CALL CTRMM( 'Right', 'Upper', 'Conjugate transpose', + $ 'Unit', M, K, ONE, V( N-K+1, 1 ), LDV, WORK, + $ LDWORK ) +* +* C2 := C2 - W +* + DO 120 J = 1, K + DO 110 I = 1, M + C( I, N-K+J ) = C( I, N-K+J ) - WORK( I, J ) + 110 CONTINUE + 120 CONTINUE + END IF + END IF +* + ELSE IF( LSAME( STOREV, 'R' ) ) THEN +* + IF( LSAME( DIRECT, 'F' ) ) THEN +* +* Let V = ( V1 V2 ) (V1: first K columns) +* where V1 is unit upper triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H' * C where C = ( C1 ) +* ( C2 ) +* +* W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) +* +* W := C1' +* + DO 130 J = 1, K + CALL CCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + CALL CLACGV( N, WORK( 1, J ), 1 ) + 130 CONTINUE +* +* W := W * V1' +* + CALL CTRMM( 'Right', 'Upper', 'Conjugate transpose', + $ 'Unit', N, K, ONE, V, LDV, WORK, LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C2'*V2' +* + CALL CGEMM( 'Conjugate transpose', + $ 'Conjugate transpose', N, K, M-K, ONE, + $ C( K+1, 1 ), LDC, V( 1, K+1 ), LDV, ONE, + $ WORK, LDWORK ) + END IF +* +* W := W * T' or W * T +* + CALL CTRMM( 'Right', 'Upper', TRANST, 'Non-unit', N, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V' * W' +* + IF( M.GT.K ) THEN +* +* C2 := C2 - V2' * W' +* + CALL CGEMM( 'Conjugate transpose', + $ 'Conjugate transpose', M-K, N, K, -ONE, + $ V( 1, K+1 ), LDV, WORK, LDWORK, ONE, + $ C( K+1, 1 ), LDC ) + END IF +* +* W := W * V1 +* + CALL CTRMM( 'Right', 'Upper', 'No transpose', 'Unit', N, + $ K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W' +* + DO 150 J = 1, K + DO 140 I = 1, N + C( J, I ) = C( J, I ) - CONJG( WORK( I, J ) ) + 140 CONTINUE + 150 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H' where C = ( C1 C2 ) +* +* W := C * V' = (C1*V1' + C2*V2') (stored in WORK) +* +* W := C1 +* + DO 160 J = 1, K + CALL CCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 ) + 160 CONTINUE +* +* W := W * V1' +* + CALL CTRMM( 'Right', 'Upper', 'Conjugate transpose', + $ 'Unit', M, K, ONE, V, LDV, WORK, LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C2 * V2' +* + CALL CGEMM( 'No transpose', 'Conjugate transpose', M, + $ K, N-K, ONE, C( 1, K+1 ), LDC, + $ V( 1, K+1 ), LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T' +* + CALL CTRMM( 'Right', 'Upper', TRANS, 'Non-unit', M, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V +* + IF( N.GT.K ) THEN +* +* C2 := C2 - W * V2 +* + CALL CGEMM( 'No transpose', 'No transpose', M, N-K, K, + $ -ONE, WORK, LDWORK, V( 1, K+1 ), LDV, ONE, + $ C( 1, K+1 ), LDC ) + END IF +* +* W := W * V1 +* + CALL CTRMM( 'Right', 'Upper', 'No transpose', 'Unit', M, + $ K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 180 J = 1, K + DO 170 I = 1, M + C( I, J ) = C( I, J ) - WORK( I, J ) + 170 CONTINUE + 180 CONTINUE +* + END IF +* + ELSE +* +* Let V = ( V1 V2 ) (V2: last K columns) +* where V2 is unit lower triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H' * C where C = ( C1 ) +* ( C2 ) +* +* W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) +* +* W := C2' +* + DO 190 J = 1, K + CALL CCOPY( N, C( M-K+J, 1 ), LDC, WORK( 1, J ), 1 ) + CALL CLACGV( N, WORK( 1, J ), 1 ) + 190 CONTINUE +* +* W := W * V2' +* + CALL CTRMM( 'Right', 'Lower', 'Conjugate transpose', + $ 'Unit', N, K, ONE, V( 1, M-K+1 ), LDV, WORK, + $ LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C1'*V1' +* + CALL CGEMM( 'Conjugate transpose', + $ 'Conjugate transpose', N, K, M-K, ONE, C, + $ LDC, V, LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T' or W * T +* + CALL CTRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V' * W' +* + IF( M.GT.K ) THEN +* +* C1 := C1 - V1' * W' +* + CALL CGEMM( 'Conjugate transpose', + $ 'Conjugate transpose', M-K, N, K, -ONE, V, + $ LDV, WORK, LDWORK, ONE, C, LDC ) + END IF +* +* W := W * V2 +* + CALL CTRMM( 'Right', 'Lower', 'No transpose', 'Unit', N, + $ K, ONE, V( 1, M-K+1 ), LDV, WORK, LDWORK ) +* +* C2 := C2 - W' +* + DO 210 J = 1, K + DO 200 I = 1, N + C( M-K+J, I ) = C( M-K+J, I ) - + $ CONJG( WORK( I, J ) ) + 200 CONTINUE + 210 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H' where C = ( C1 C2 ) +* +* W := C * V' = (C1*V1' + C2*V2') (stored in WORK) +* +* W := C2 +* + DO 220 J = 1, K + CALL CCOPY( M, C( 1, N-K+J ), 1, WORK( 1, J ), 1 ) + 220 CONTINUE +* +* W := W * V2' +* + CALL CTRMM( 'Right', 'Lower', 'Conjugate transpose', + $ 'Unit', M, K, ONE, V( 1, N-K+1 ), LDV, WORK, + $ LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C1 * V1' +* + CALL CGEMM( 'No transpose', 'Conjugate transpose', M, + $ K, N-K, ONE, C, LDC, V, LDV, ONE, WORK, + $ LDWORK ) + END IF +* +* W := W * T or W * T' +* + CALL CTRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V +* + IF( N.GT.K ) THEN +* +* C1 := C1 - W * V1 +* + CALL CGEMM( 'No transpose', 'No transpose', M, N-K, K, + $ -ONE, WORK, LDWORK, V, LDV, ONE, C, LDC ) + END IF +* +* W := W * V2 +* + CALL CTRMM( 'Right', 'Lower', 'No transpose', 'Unit', M, + $ K, ONE, V( 1, N-K+1 ), LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 240 J = 1, K + DO 230 I = 1, M + C( I, N-K+J ) = C( I, N-K+J ) - WORK( I, J ) + 230 CONTINUE + 240 CONTINUE +* + END IF +* + END IF + END IF +* + RETURN +* +* End of CLARFB +* + END diff --git a/costa/native/external/lapack/clarfg.f b/costa/native/external/lapack/clarfg.f new file mode 100644 index 000000000..5b2568f2a --- /dev/null +++ b/costa/native/external/lapack/clarfg.f @@ -0,0 +1,146 @@ + SUBROUTINE CLARFG( N, ALPHA, X, INCX, TAU ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INCX, N + COMPLEX ALPHA, TAU +* .. +* .. Array Arguments .. + COMPLEX X( * ) +* .. +* +* Purpose +* ======= +* +* CLARFG generates a complex elementary reflector H of order n, such +* that +* +* H' * ( alpha ) = ( beta ), H' * H = I. +* ( x ) ( 0 ) +* +* where alpha and beta are scalars, with beta real, and x is an +* (n-1)-element complex vector. H is represented in the form +* +* H = I - tau * ( 1 ) * ( 1 v' ) , +* ( v ) +* +* where tau is a complex scalar and v is a complex (n-1)-element +* vector. Note that H is not hermitian. +* +* If the elements of x are all zero and alpha is real, then tau = 0 +* and H is taken to be the unit matrix. +* +* Otherwise 1 <= real(tau) <= 2 and abs(tau-1) <= 1 . +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the elementary reflector. +* +* ALPHA (input/output) COMPLEX +* On entry, the value alpha. +* On exit, it is overwritten with the value beta. +* +* X (input/output) COMPLEX array, dimension +* (1+(N-2)*abs(INCX)) +* On entry, the vector x. +* On exit, it is overwritten with the vector v. +* +* INCX (input) INTEGER +* The increment between elements of X. INCX > 0. +* +* TAU (output) COMPLEX +* The value tau. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER J, KNT + REAL ALPHI, ALPHR, BETA, RSAFMN, SAFMIN, XNORM +* .. +* .. External Functions .. + REAL SCNRM2, SLAMCH, SLAPY3 + COMPLEX CLADIV + EXTERNAL SCNRM2, SLAMCH, SLAPY3, CLADIV +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, REAL, SIGN +* .. +* .. External Subroutines .. + EXTERNAL CSCAL, CSSCAL +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) THEN + TAU = ZERO + RETURN + END IF +* + XNORM = SCNRM2( N-1, X, INCX ) + ALPHR = REAL( ALPHA ) + ALPHI = AIMAG( ALPHA ) +* + IF( XNORM.EQ.ZERO .AND. ALPHI.EQ.ZERO ) THEN +* +* H = I +* + TAU = ZERO + ELSE +* +* general case +* + BETA = -SIGN( SLAPY3( ALPHR, ALPHI, XNORM ), ALPHR ) + SAFMIN = SLAMCH( 'S' ) / SLAMCH( 'E' ) + RSAFMN = ONE / SAFMIN +* + IF( ABS( BETA ).LT.SAFMIN ) THEN +* +* XNORM, BETA may be inaccurate; scale X and recompute them +* + KNT = 0 + 10 CONTINUE + KNT = KNT + 1 + CALL CSSCAL( N-1, RSAFMN, X, INCX ) + BETA = BETA*RSAFMN + ALPHI = ALPHI*RSAFMN + ALPHR = ALPHR*RSAFMN + IF( ABS( BETA ).LT.SAFMIN ) + $ GO TO 10 +* +* New BETA is at most 1, at least SAFMIN +* + XNORM = SCNRM2( N-1, X, INCX ) + ALPHA = CMPLX( ALPHR, ALPHI ) + BETA = -SIGN( SLAPY3( ALPHR, ALPHI, XNORM ), ALPHR ) + TAU = CMPLX( ( BETA-ALPHR ) / BETA, -ALPHI / BETA ) + ALPHA = CLADIV( CMPLX( ONE ), ALPHA-BETA ) + CALL CSCAL( N-1, ALPHA, X, INCX ) +* +* If ALPHA is subnormal, it may lose relative accuracy +* + ALPHA = BETA + DO 20 J = 1, KNT + ALPHA = ALPHA*SAFMIN + 20 CONTINUE + ELSE + TAU = CMPLX( ( BETA-ALPHR ) / BETA, -ALPHI / BETA ) + ALPHA = CLADIV( CMPLX( ONE ), ALPHA-BETA ) + CALL CSCAL( N-1, ALPHA, X, INCX ) + ALPHA = BETA + END IF + END IF +* + RETURN +* +* End of CLARFG +* + END diff --git a/costa/native/external/lapack/clarft.f b/costa/native/external/lapack/clarft.f new file mode 100644 index 000000000..ff8483b17 --- /dev/null +++ b/costa/native/external/lapack/clarft.f @@ -0,0 +1,225 @@ + SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. + COMPLEX T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* Purpose +* ======= +* +* CLARFT forms the triangular factor T of a complex block reflector H +* of order n, which is defined as a product of k elementary reflectors. +* +* If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; +* +* If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. +* +* If STOREV = 'C', the vector which defines the elementary reflector +* H(i) is stored in the i-th column of the array V, and +* +* H = I - V * T * V' +* +* If STOREV = 'R', the vector which defines the elementary reflector +* H(i) is stored in the i-th row of the array V, and +* +* H = I - V' * T * V +* +* Arguments +* ========= +* +* DIRECT (input) CHARACTER*1 +* Specifies the order in which the elementary reflectors are +* multiplied to form the block reflector: +* = 'F': H = H(1) H(2) . . . H(k) (Forward) +* = 'B': H = H(k) . . . H(2) H(1) (Backward) +* +* STOREV (input) CHARACTER*1 +* Specifies how the vectors which define the elementary +* reflectors are stored (see also Further Details): +* = 'C': columnwise +* = 'R': rowwise +* +* N (input) INTEGER +* The order of the block reflector H. N >= 0. +* +* K (input) INTEGER +* The order of the triangular factor T (= the number of +* elementary reflectors). K >= 1. +* +* V (input/output) COMPLEX array, dimension +* (LDV,K) if STOREV = 'C' +* (LDV,N) if STOREV = 'R' +* The matrix V. See further details. +* +* LDV (input) INTEGER +* The leading dimension of the array V. +* If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. +* +* TAU (input) COMPLEX array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i). +* +* T (output) COMPLEX array, dimension (LDT,K) +* The k by k triangular factor T of the block reflector. +* If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is +* lower triangular. The rest of the array is not used. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= K. +* +* Further Details +* =============== +* +* The shape of the matrix V and the storage of the vectors which define +* the H(i) is best illustrated by the following example with n = 5 and +* k = 3. The elements equal to 1 are not stored; the corresponding +* array elements are modified but restored on exit. The rest of the +* array is not used. +* +* DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +* +* V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) +* ( v1 1 ) ( 1 v2 v2 v2 ) +* ( v1 v2 1 ) ( 1 v3 v3 ) +* ( v1 v2 v3 ) +* ( v1 v2 v3 ) +* +* DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +* +* V = ( v1 v2 v3 ) V = ( v1 v1 1 ) +* ( v1 v2 v3 ) ( v2 v2 v2 1 ) +* ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) +* ( 1 v3 ) +* ( 1 ) +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, J + COMPLEX VII +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, CLACGV, CTRMV +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 20 I = 1, K + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO 10 J = 1, I + T( J, I ) = ZERO + 10 CONTINUE + ELSE +* +* general case +* + VII = V( I, I ) + V( I, I ) = ONE + IF( LSAME( STOREV, 'C' ) ) THEN +* +* T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i) +* + CALL CGEMV( 'Conjugate transpose', N-I+1, I-1, + $ -TAU( I ), V( I, 1 ), LDV, V( I, I ), 1, + $ ZERO, T( 1, I ), 1 ) + ELSE +* +* T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)' +* + IF( I.LT.N ) + $ CALL CLACGV( N-I, V( I, I+1 ), LDV ) + CALL CGEMV( 'No transpose', I-1, N-I+1, -TAU( I ), + $ V( 1, I ), LDV, V( I, I ), LDV, ZERO, + $ T( 1, I ), 1 ) + IF( I.LT.N ) + $ CALL CLACGV( N-I, V( I, I+1 ), LDV ) + END IF + V( I, I ) = VII +* +* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) +* + CALL CTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T, + $ LDT, T( 1, I ), 1 ) + T( I, I ) = TAU( I ) + END IF + 20 CONTINUE + ELSE + DO 40 I = K, 1, -1 + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO 30 J = I, K + T( J, I ) = ZERO + 30 CONTINUE + ELSE +* +* general case +* + IF( I.LT.K ) THEN + IF( LSAME( STOREV, 'C' ) ) THEN + VII = V( N-K+I, I ) + V( N-K+I, I ) = ONE +* +* T(i+1:k,i) := +* - tau(i) * V(1:n-k+i,i+1:k)' * V(1:n-k+i,i) +* + CALL CGEMV( 'Conjugate transpose', N-K+I, K-I, + $ -TAU( I ), V( 1, I+1 ), LDV, V( 1, I ), + $ 1, ZERO, T( I+1, I ), 1 ) + V( N-K+I, I ) = VII + ELSE + VII = V( I, N-K+I ) + V( I, N-K+I ) = ONE +* +* T(i+1:k,i) := +* - tau(i) * V(i+1:k,1:n-k+i) * V(i,1:n-k+i)' +* + CALL CLACGV( N-K+I-1, V( I, 1 ), LDV ) + CALL CGEMV( 'No transpose', K-I, N-K+I, -TAU( I ), + $ V( I+1, 1 ), LDV, V( I, 1 ), LDV, ZERO, + $ T( I+1, I ), 1 ) + CALL CLACGV( N-K+I-1, V( I, 1 ), LDV ) + V( I, N-K+I ) = VII + END IF +* +* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) +* + CALL CTRMV( 'Lower', 'No transpose', 'Non-unit', K-I, + $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) + END IF + T( I, I ) = TAU( I ) + END IF + 40 CONTINUE + END IF + RETURN +* +* End of CLARFT +* + END diff --git a/costa/native/external/lapack/clarfx.f b/costa/native/external/lapack/clarfx.f new file mode 100644 index 000000000..c2dc07986 --- /dev/null +++ b/costa/native/external/lapack/clarfx.f @@ -0,0 +1,641 @@ + SUBROUTINE CLARFX( SIDE, M, N, V, TAU, C, LDC, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER LDC, M, N + COMPLEX TAU +* .. +* .. Array Arguments .. + COMPLEX C( LDC, * ), V( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CLARFX applies a complex elementary reflector H to a complex m by n +* matrix C, from either the left or the right. H is represented in the +* form +* +* H = I - tau * v * v' +* +* where tau is a complex scalar and v is a complex vector. +* +* If tau = 0, then H is taken to be the unit matrix +* +* This version uses inline code if H has order < 11. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': form H * C +* = 'R': form C * H +* +* M (input) INTEGER +* The number of rows of the matrix C. +* +* N (input) INTEGER +* The number of columns of the matrix C. +* +* V (input) COMPLEX array, dimension (M) if SIDE = 'L' +* or (N) if SIDE = 'R' +* The vector v in the representation of H. +* +* TAU (input) COMPLEX +* The value tau in the representation of H. +* +* C (input/output) COMPLEX array, dimension (LDC,N) +* On entry, the m by n matrix C. +* On exit, C is overwritten by the matrix H * C if SIDE = 'L', +* or C * H if SIDE = 'R'. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDA >= max(1,M). +* +* WORK (workspace) COMPLEX array, dimension (N) if SIDE = 'L' +* or (M) if SIDE = 'R' +* WORK is not referenced if H has order < 11. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER J + COMPLEX SUM, T1, T10, T2, T3, T4, T5, T6, T7, T8, T9, + $ V1, V10, V2, V3, V4, V5, V6, V7, V8, V9 +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, CGERC +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG +* .. +* .. Executable Statements .. +* + IF( TAU.EQ.ZERO ) + $ RETURN + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C, where H has order m. +* + GO TO ( 10, 30, 50, 70, 90, 110, 130, 150, + $ 170, 190 )M +* +* Code for general M +* +* w := C'*v +* + CALL CGEMV( 'Conjugate transpose', M, N, ONE, C, LDC, V, 1, + $ ZERO, WORK, 1 ) +* +* C := C - tau * v * w' +* + CALL CGERC( M, N, -TAU, V, 1, WORK, 1, C, LDC ) + GO TO 410 + 10 CONTINUE +* +* Special code for 1 x 1 Householder +* + T1 = ONE - TAU*V( 1 )*CONJG( V( 1 ) ) + DO 20 J = 1, N + C( 1, J ) = T1*C( 1, J ) + 20 CONTINUE + GO TO 410 + 30 CONTINUE +* +* Special code for 2 x 2 Householder +* + V1 = CONJG( V( 1 ) ) + T1 = TAU*CONJG( V1 ) + V2 = CONJG( V( 2 ) ) + T2 = TAU*CONJG( V2 ) + DO 40 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + 40 CONTINUE + GO TO 410 + 50 CONTINUE +* +* Special code for 3 x 3 Householder +* + V1 = CONJG( V( 1 ) ) + T1 = TAU*CONJG( V1 ) + V2 = CONJG( V( 2 ) ) + T2 = TAU*CONJG( V2 ) + V3 = CONJG( V( 3 ) ) + T3 = TAU*CONJG( V3 ) + DO 60 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + 60 CONTINUE + GO TO 410 + 70 CONTINUE +* +* Special code for 4 x 4 Householder +* + V1 = CONJG( V( 1 ) ) + T1 = TAU*CONJG( V1 ) + V2 = CONJG( V( 2 ) ) + T2 = TAU*CONJG( V2 ) + V3 = CONJG( V( 3 ) ) + T3 = TAU*CONJG( V3 ) + V4 = CONJG( V( 4 ) ) + T4 = TAU*CONJG( V4 ) + DO 80 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + 80 CONTINUE + GO TO 410 + 90 CONTINUE +* +* Special code for 5 x 5 Householder +* + V1 = CONJG( V( 1 ) ) + T1 = TAU*CONJG( V1 ) + V2 = CONJG( V( 2 ) ) + T2 = TAU*CONJG( V2 ) + V3 = CONJG( V( 3 ) ) + T3 = TAU*CONJG( V3 ) + V4 = CONJG( V( 4 ) ) + T4 = TAU*CONJG( V4 ) + V5 = CONJG( V( 5 ) ) + T5 = TAU*CONJG( V5 ) + DO 100 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + 100 CONTINUE + GO TO 410 + 110 CONTINUE +* +* Special code for 6 x 6 Householder +* + V1 = CONJG( V( 1 ) ) + T1 = TAU*CONJG( V1 ) + V2 = CONJG( V( 2 ) ) + T2 = TAU*CONJG( V2 ) + V3 = CONJG( V( 3 ) ) + T3 = TAU*CONJG( V3 ) + V4 = CONJG( V( 4 ) ) + T4 = TAU*CONJG( V4 ) + V5 = CONJG( V( 5 ) ) + T5 = TAU*CONJG( V5 ) + V6 = CONJG( V( 6 ) ) + T6 = TAU*CONJG( V6 ) + DO 120 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + 120 CONTINUE + GO TO 410 + 130 CONTINUE +* +* Special code for 7 x 7 Householder +* + V1 = CONJG( V( 1 ) ) + T1 = TAU*CONJG( V1 ) + V2 = CONJG( V( 2 ) ) + T2 = TAU*CONJG( V2 ) + V3 = CONJG( V( 3 ) ) + T3 = TAU*CONJG( V3 ) + V4 = CONJG( V( 4 ) ) + T4 = TAU*CONJG( V4 ) + V5 = CONJG( V( 5 ) ) + T5 = TAU*CONJG( V5 ) + V6 = CONJG( V( 6 ) ) + T6 = TAU*CONJG( V6 ) + V7 = CONJG( V( 7 ) ) + T7 = TAU*CONJG( V7 ) + DO 140 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + + $ V7*C( 7, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + C( 7, J ) = C( 7, J ) - SUM*T7 + 140 CONTINUE + GO TO 410 + 150 CONTINUE +* +* Special code for 8 x 8 Householder +* + V1 = CONJG( V( 1 ) ) + T1 = TAU*CONJG( V1 ) + V2 = CONJG( V( 2 ) ) + T2 = TAU*CONJG( V2 ) + V3 = CONJG( V( 3 ) ) + T3 = TAU*CONJG( V3 ) + V4 = CONJG( V( 4 ) ) + T4 = TAU*CONJG( V4 ) + V5 = CONJG( V( 5 ) ) + T5 = TAU*CONJG( V5 ) + V6 = CONJG( V( 6 ) ) + T6 = TAU*CONJG( V6 ) + V7 = CONJG( V( 7 ) ) + T7 = TAU*CONJG( V7 ) + V8 = CONJG( V( 8 ) ) + T8 = TAU*CONJG( V8 ) + DO 160 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + + $ V7*C( 7, J ) + V8*C( 8, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + C( 7, J ) = C( 7, J ) - SUM*T7 + C( 8, J ) = C( 8, J ) - SUM*T8 + 160 CONTINUE + GO TO 410 + 170 CONTINUE +* +* Special code for 9 x 9 Householder +* + V1 = CONJG( V( 1 ) ) + T1 = TAU*CONJG( V1 ) + V2 = CONJG( V( 2 ) ) + T2 = TAU*CONJG( V2 ) + V3 = CONJG( V( 3 ) ) + T3 = TAU*CONJG( V3 ) + V4 = CONJG( V( 4 ) ) + T4 = TAU*CONJG( V4 ) + V5 = CONJG( V( 5 ) ) + T5 = TAU*CONJG( V5 ) + V6 = CONJG( V( 6 ) ) + T6 = TAU*CONJG( V6 ) + V7 = CONJG( V( 7 ) ) + T7 = TAU*CONJG( V7 ) + V8 = CONJG( V( 8 ) ) + T8 = TAU*CONJG( V8 ) + V9 = CONJG( V( 9 ) ) + T9 = TAU*CONJG( V9 ) + DO 180 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + + $ V7*C( 7, J ) + V8*C( 8, J ) + V9*C( 9, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + C( 7, J ) = C( 7, J ) - SUM*T7 + C( 8, J ) = C( 8, J ) - SUM*T8 + C( 9, J ) = C( 9, J ) - SUM*T9 + 180 CONTINUE + GO TO 410 + 190 CONTINUE +* +* Special code for 10 x 10 Householder +* + V1 = CONJG( V( 1 ) ) + T1 = TAU*CONJG( V1 ) + V2 = CONJG( V( 2 ) ) + T2 = TAU*CONJG( V2 ) + V3 = CONJG( V( 3 ) ) + T3 = TAU*CONJG( V3 ) + V4 = CONJG( V( 4 ) ) + T4 = TAU*CONJG( V4 ) + V5 = CONJG( V( 5 ) ) + T5 = TAU*CONJG( V5 ) + V6 = CONJG( V( 6 ) ) + T6 = TAU*CONJG( V6 ) + V7 = CONJG( V( 7 ) ) + T7 = TAU*CONJG( V7 ) + V8 = CONJG( V( 8 ) ) + T8 = TAU*CONJG( V8 ) + V9 = CONJG( V( 9 ) ) + T9 = TAU*CONJG( V9 ) + V10 = CONJG( V( 10 ) ) + T10 = TAU*CONJG( V10 ) + DO 200 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + + $ V7*C( 7, J ) + V8*C( 8, J ) + V9*C( 9, J ) + + $ V10*C( 10, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + C( 7, J ) = C( 7, J ) - SUM*T7 + C( 8, J ) = C( 8, J ) - SUM*T8 + C( 9, J ) = C( 9, J ) - SUM*T9 + C( 10, J ) = C( 10, J ) - SUM*T10 + 200 CONTINUE + GO TO 410 + ELSE +* +* Form C * H, where H has order n. +* + GO TO ( 210, 230, 250, 270, 290, 310, 330, 350, + $ 370, 390 )N +* +* Code for general N +* +* w := C * v +* + CALL CGEMV( 'No transpose', M, N, ONE, C, LDC, V, 1, ZERO, + $ WORK, 1 ) +* +* C := C - tau * w * v' +* + CALL CGERC( M, N, -TAU, WORK, 1, V, 1, C, LDC ) + GO TO 410 + 210 CONTINUE +* +* Special code for 1 x 1 Householder +* + T1 = ONE - TAU*V( 1 )*CONJG( V( 1 ) ) + DO 220 J = 1, M + C( J, 1 ) = T1*C( J, 1 ) + 220 CONTINUE + GO TO 410 + 230 CONTINUE +* +* Special code for 2 x 2 Householder +* + V1 = V( 1 ) + T1 = TAU*CONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*CONJG( V2 ) + DO 240 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + 240 CONTINUE + GO TO 410 + 250 CONTINUE +* +* Special code for 3 x 3 Householder +* + V1 = V( 1 ) + T1 = TAU*CONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*CONJG( V2 ) + V3 = V( 3 ) + T3 = TAU*CONJG( V3 ) + DO 260 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + 260 CONTINUE + GO TO 410 + 270 CONTINUE +* +* Special code for 4 x 4 Householder +* + V1 = V( 1 ) + T1 = TAU*CONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*CONJG( V2 ) + V3 = V( 3 ) + T3 = TAU*CONJG( V3 ) + V4 = V( 4 ) + T4 = TAU*CONJG( V4 ) + DO 280 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + 280 CONTINUE + GO TO 410 + 290 CONTINUE +* +* Special code for 5 x 5 Householder +* + V1 = V( 1 ) + T1 = TAU*CONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*CONJG( V2 ) + V3 = V( 3 ) + T3 = TAU*CONJG( V3 ) + V4 = V( 4 ) + T4 = TAU*CONJG( V4 ) + V5 = V( 5 ) + T5 = TAU*CONJG( V5 ) + DO 300 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + 300 CONTINUE + GO TO 410 + 310 CONTINUE +* +* Special code for 6 x 6 Householder +* + V1 = V( 1 ) + T1 = TAU*CONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*CONJG( V2 ) + V3 = V( 3 ) + T3 = TAU*CONJG( V3 ) + V4 = V( 4 ) + T4 = TAU*CONJG( V4 ) + V5 = V( 5 ) + T5 = TAU*CONJG( V5 ) + V6 = V( 6 ) + T6 = TAU*CONJG( V6 ) + DO 320 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + 320 CONTINUE + GO TO 410 + 330 CONTINUE +* +* Special code for 7 x 7 Householder +* + V1 = V( 1 ) + T1 = TAU*CONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*CONJG( V2 ) + V3 = V( 3 ) + T3 = TAU*CONJG( V3 ) + V4 = V( 4 ) + T4 = TAU*CONJG( V4 ) + V5 = V( 5 ) + T5 = TAU*CONJG( V5 ) + V6 = V( 6 ) + T6 = TAU*CONJG( V6 ) + V7 = V( 7 ) + T7 = TAU*CONJG( V7 ) + DO 340 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + + $ V7*C( J, 7 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + C( J, 7 ) = C( J, 7 ) - SUM*T7 + 340 CONTINUE + GO TO 410 + 350 CONTINUE +* +* Special code for 8 x 8 Householder +* + V1 = V( 1 ) + T1 = TAU*CONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*CONJG( V2 ) + V3 = V( 3 ) + T3 = TAU*CONJG( V3 ) + V4 = V( 4 ) + T4 = TAU*CONJG( V4 ) + V5 = V( 5 ) + T5 = TAU*CONJG( V5 ) + V6 = V( 6 ) + T6 = TAU*CONJG( V6 ) + V7 = V( 7 ) + T7 = TAU*CONJG( V7 ) + V8 = V( 8 ) + T8 = TAU*CONJG( V8 ) + DO 360 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + + $ V7*C( J, 7 ) + V8*C( J, 8 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + C( J, 7 ) = C( J, 7 ) - SUM*T7 + C( J, 8 ) = C( J, 8 ) - SUM*T8 + 360 CONTINUE + GO TO 410 + 370 CONTINUE +* +* Special code for 9 x 9 Householder +* + V1 = V( 1 ) + T1 = TAU*CONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*CONJG( V2 ) + V3 = V( 3 ) + T3 = TAU*CONJG( V3 ) + V4 = V( 4 ) + T4 = TAU*CONJG( V4 ) + V5 = V( 5 ) + T5 = TAU*CONJG( V5 ) + V6 = V( 6 ) + T6 = TAU*CONJG( V6 ) + V7 = V( 7 ) + T7 = TAU*CONJG( V7 ) + V8 = V( 8 ) + T8 = TAU*CONJG( V8 ) + V9 = V( 9 ) + T9 = TAU*CONJG( V9 ) + DO 380 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + + $ V7*C( J, 7 ) + V8*C( J, 8 ) + V9*C( J, 9 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + C( J, 7 ) = C( J, 7 ) - SUM*T7 + C( J, 8 ) = C( J, 8 ) - SUM*T8 + C( J, 9 ) = C( J, 9 ) - SUM*T9 + 380 CONTINUE + GO TO 410 + 390 CONTINUE +* +* Special code for 10 x 10 Householder +* + V1 = V( 1 ) + T1 = TAU*CONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*CONJG( V2 ) + V3 = V( 3 ) + T3 = TAU*CONJG( V3 ) + V4 = V( 4 ) + T4 = TAU*CONJG( V4 ) + V5 = V( 5 ) + T5 = TAU*CONJG( V5 ) + V6 = V( 6 ) + T6 = TAU*CONJG( V6 ) + V7 = V( 7 ) + T7 = TAU*CONJG( V7 ) + V8 = V( 8 ) + T8 = TAU*CONJG( V8 ) + V9 = V( 9 ) + T9 = TAU*CONJG( V9 ) + V10 = V( 10 ) + T10 = TAU*CONJG( V10 ) + DO 400 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + + $ V7*C( J, 7 ) + V8*C( J, 8 ) + V9*C( J, 9 ) + + $ V10*C( J, 10 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + C( J, 7 ) = C( J, 7 ) - SUM*T7 + C( J, 8 ) = C( J, 8 ) - SUM*T8 + C( J, 9 ) = C( J, 9 ) - SUM*T9 + C( J, 10 ) = C( J, 10 ) - SUM*T10 + 400 CONTINUE + GO TO 410 + END IF + 410 RETURN +* +* End of CLARFX +* + END diff --git a/costa/native/external/lapack/clargv.f b/costa/native/external/lapack/clargv.f new file mode 100644 index 000000000..478e6e328 --- /dev/null +++ b/costa/native/external/lapack/clargv.f @@ -0,0 +1,225 @@ + SUBROUTINE CLARGV( N, X, INCX, Y, INCY, C, INCC ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INCC, INCX, INCY, N +* .. +* .. Array Arguments .. + REAL C( * ) + COMPLEX X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* CLARGV generates a vector of complex plane rotations with real +* cosines, determined by elements of the complex vectors x and y. +* For i = 1,2,...,n +* +* ( c(i) s(i) ) ( x(i) ) = ( r(i) ) +* ( -conjg(s(i)) c(i) ) ( y(i) ) = ( 0 ) +* +* where c(i)**2 + ABS(s(i))**2 = 1 +* +* The following conventions are used (these are the same as in CLARTG, +* but differ from the BLAS1 routine CROTG): +* If y(i)=0, then c(i)=1 and s(i)=0. +* If x(i)=0, then c(i)=0 and s(i) is chosen so that r(i) is real. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of plane rotations to be generated. +* +* X (input/output) COMPLEX array, dimension (1+(N-1)*INCX) +* On entry, the vector x. +* On exit, x(i) is overwritten by r(i), for i = 1,...,n. +* +* INCX (input) INTEGER +* The increment between elements of X. INCX > 0. +* +* Y (input/output) COMPLEX array, dimension (1+(N-1)*INCY) +* On entry, the vector y. +* On exit, the sines of the plane rotations. +* +* INCY (input) INTEGER +* The increment between elements of Y. INCY > 0. +* +* C (output) REAL array, dimension (1+(N-1)*INCC) +* The cosines of the plane rotations. +* +* INCC (input) INTEGER +* The increment between elements of C. INCC > 0. +* +* Further Details +* ======= ======= +* +* 6-6-96 - Modified with a new algorithm by W. Kahan and J. Demmel +* +* ===================================================================== +* +* .. Parameters .. + REAL TWO, ONE, ZERO + PARAMETER ( TWO = 2.0E+0, ONE = 1.0E+0, ZERO = 0.0E+0 ) + COMPLEX CZERO + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL FIRST + INTEGER COUNT, I, IC, IX, IY, J + REAL CS, D, DI, DR, EPS, F2, F2S, G2, G2S, SAFMIN, + $ SAFMN2, SAFMX2, SCALE + COMPLEX F, FF, FS, G, GS, R, SN +* .. +* .. External Functions .. + REAL SLAMCH, SLAPY2 + EXTERNAL SLAMCH, SLAPY2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, CONJG, INT, LOG, MAX, REAL, + $ SQRT +* .. +* .. Statement Functions .. + REAL ABS1, ABSSQ +* .. +* .. Save statement .. + SAVE FIRST, SAFMX2, SAFMIN, SAFMN2 +* .. +* .. Data statements .. + DATA FIRST / .TRUE. / +* .. +* .. Statement Function definitions .. + ABS1( FF ) = MAX( ABS( REAL( FF ) ), ABS( AIMAG( FF ) ) ) + ABSSQ( FF ) = REAL( FF )**2 + AIMAG( FF )**2 +* .. +* .. Executable Statements .. +* + IF( FIRST ) THEN + FIRST = .FALSE. + SAFMIN = SLAMCH( 'S' ) + EPS = SLAMCH( 'E' ) + SAFMN2 = SLAMCH( 'B' )**INT( LOG( SAFMIN / EPS ) / + $ LOG( SLAMCH( 'B' ) ) / TWO ) + SAFMX2 = ONE / SAFMN2 + END IF + IX = 1 + IY = 1 + IC = 1 + DO 60 I = 1, N + F = X( IX ) + G = Y( IY ) +* +* Use identical algorithm as in CLARTG +* + SCALE = MAX( ABS1( F ), ABS1( G ) ) + FS = F + GS = G + COUNT = 0 + IF( SCALE.GE.SAFMX2 ) THEN + 10 CONTINUE + COUNT = COUNT + 1 + FS = FS*SAFMN2 + GS = GS*SAFMN2 + SCALE = SCALE*SAFMN2 + IF( SCALE.GE.SAFMX2 ) + $ GO TO 10 + ELSE IF( SCALE.LE.SAFMN2 ) THEN + IF( G.EQ.CZERO ) THEN + CS = ONE + SN = CZERO + R = F + GO TO 50 + END IF + 20 CONTINUE + COUNT = COUNT - 1 + FS = FS*SAFMX2 + GS = GS*SAFMX2 + SCALE = SCALE*SAFMX2 + IF( SCALE.LE.SAFMN2 ) + $ GO TO 20 + END IF + F2 = ABSSQ( FS ) + G2 = ABSSQ( GS ) + IF( F2.LE.MAX( G2, ONE )*SAFMIN ) THEN +* +* This is a rare case: F is very small. +* + IF( F.EQ.CZERO ) THEN + CS = ZERO + R = SLAPY2( REAL( G ), AIMAG( G ) ) +* Do complex/real division explicitly with two real +* divisions + D = SLAPY2( REAL( GS ), AIMAG( GS ) ) + SN = CMPLX( REAL( GS ) / D, -AIMAG( GS ) / D ) + GO TO 50 + END IF + F2S = SLAPY2( REAL( FS ), AIMAG( FS ) ) +* G2 and G2S are accurate +* G2 is at least SAFMIN, and G2S is at least SAFMN2 + G2S = SQRT( G2 ) +* Error in CS from underflow in F2S is at most +* UNFL / SAFMN2 .lt. sqrt(UNFL*EPS) .lt. EPS +* If MAX(G2,ONE)=G2, then F2 .lt. G2*SAFMIN, +* and so CS .lt. sqrt(SAFMIN) +* If MAX(G2,ONE)=ONE, then F2 .lt. SAFMIN +* and so CS .lt. sqrt(SAFMIN)/SAFMN2 = sqrt(EPS) +* Therefore, CS = F2S/G2S / sqrt( 1 + (F2S/G2S)**2 ) = F2S/G2S + CS = F2S / G2S +* Make sure abs(FF) = 1 +* Do complex/real division explicitly with 2 real divisions + IF( ABS1( F ).GT.ONE ) THEN + D = SLAPY2( REAL( F ), AIMAG( F ) ) + FF = CMPLX( REAL( F ) / D, AIMAG( F ) / D ) + ELSE + DR = SAFMX2*REAL( F ) + DI = SAFMX2*AIMAG( F ) + D = SLAPY2( DR, DI ) + FF = CMPLX( DR / D, DI / D ) + END IF + SN = FF*CMPLX( REAL( GS ) / G2S, -AIMAG( GS ) / G2S ) + R = CS*F + SN*G + ELSE +* +* This is the most common case. +* Neither F2 nor F2/G2 are less than SAFMIN +* F2S cannot overflow, and it is accurate +* + F2S = SQRT( ONE+G2 / F2 ) +* Do the F2S(real)*FS(complex) multiply with two real +* multiplies + R = CMPLX( F2S*REAL( FS ), F2S*AIMAG( FS ) ) + CS = ONE / F2S + D = F2 + G2 +* Do complex/real division explicitly with two real divisions + SN = CMPLX( REAL( R ) / D, AIMAG( R ) / D ) + SN = SN*CONJG( GS ) + IF( COUNT.NE.0 ) THEN + IF( COUNT.GT.0 ) THEN + DO 30 J = 1, COUNT + R = R*SAFMX2 + 30 CONTINUE + ELSE + DO 40 J = 1, -COUNT + R = R*SAFMN2 + 40 CONTINUE + END IF + END IF + END IF + 50 CONTINUE + C( IC ) = CS + Y( IY ) = SN + X( IX ) = R + IC = IC + INCC + IY = IY + INCY + IX = IX + INCX + 60 CONTINUE + RETURN +* +* End of CLARGV +* + END diff --git a/costa/native/external/lapack/clarnv.f b/costa/native/external/lapack/clarnv.f new file mode 100644 index 000000000..4ea0a86a4 --- /dev/null +++ b/costa/native/external/lapack/clarnv.f @@ -0,0 +1,131 @@ + SUBROUTINE CLARNV( IDIST, ISEED, N, X ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER IDIST, N +* .. +* .. Array Arguments .. + INTEGER ISEED( 4 ) + COMPLEX X( * ) +* .. +* +* Purpose +* ======= +* +* CLARNV returns a vector of n random complex numbers from a uniform or +* normal distribution. +* +* Arguments +* ========= +* +* IDIST (input) INTEGER +* Specifies the distribution of the random numbers: +* = 1: real and imaginary parts each uniform (0,1) +* = 2: real and imaginary parts each uniform (-1,1) +* = 3: real and imaginary parts each normal (0,1) +* = 4: uniformly distributed on the disc abs(z) < 1 +* = 5: uniformly distributed on the circle abs(z) = 1 +* +* ISEED (input/output) INTEGER array, dimension (4) +* On entry, the seed of the random number generator; the array +* elements must be between 0 and 4095, and ISEED(4) must be +* odd. +* On exit, the seed is updated. +* +* N (input) INTEGER +* The number of random numbers to be generated. +* +* X (output) COMPLEX array, dimension (N) +* The generated random numbers. +* +* Further Details +* =============== +* +* This routine calls the auxiliary routine SLARUV to generate random +* real numbers from a uniform (0,1) distribution, in batches of up to +* 128 using vectorisable code. The Box-Muller method is used to +* transform numbers from a uniform to a normal distribution. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0 ) + INTEGER LV + PARAMETER ( LV = 128 ) + REAL TWOPI + PARAMETER ( TWOPI = 6.2831853071795864769252867663E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IL, IV +* .. +* .. Local Arrays .. + REAL U( LV ) +* .. +* .. Intrinsic Functions .. + INTRINSIC CMPLX, EXP, LOG, MIN, SQRT +* .. +* .. External Subroutines .. + EXTERNAL SLARUV +* .. +* .. Executable Statements .. +* + DO 60 IV = 1, N, LV / 2 + IL = MIN( LV / 2, N-IV+1 ) +* +* Call SLARUV to generate 2*IL real numbers from a uniform (0,1) +* distribution (2*IL <= LV) +* + CALL SLARUV( ISEED, 2*IL, U ) +* + IF( IDIST.EQ.1 ) THEN +* +* Copy generated numbers +* + DO 10 I = 1, IL + X( IV+I-1 ) = CMPLX( U( 2*I-1 ), U( 2*I ) ) + 10 CONTINUE + ELSE IF( IDIST.EQ.2 ) THEN +* +* Convert generated numbers to uniform (-1,1) distribution +* + DO 20 I = 1, IL + X( IV+I-1 ) = CMPLX( TWO*U( 2*I-1 )-ONE, + $ TWO*U( 2*I )-ONE ) + 20 CONTINUE + ELSE IF( IDIST.EQ.3 ) THEN +* +* Convert generated numbers to normal (0,1) distribution +* + DO 30 I = 1, IL + X( IV+I-1 ) = SQRT( -TWO*LOG( U( 2*I-1 ) ) )* + $ EXP( CMPLX( ZERO, TWOPI*U( 2*I ) ) ) + 30 CONTINUE + ELSE IF( IDIST.EQ.4 ) THEN +* +* Convert generated numbers to complex numbers uniformly +* distributed on the unit disk +* + DO 40 I = 1, IL + X( IV+I-1 ) = SQRT( U( 2*I-1 ) )* + $ EXP( CMPLX( ZERO, TWOPI*U( 2*I ) ) ) + 40 CONTINUE + ELSE IF( IDIST.EQ.5 ) THEN +* +* Convert generated numbers to complex numbers uniformly +* distributed on the unit circle +* + DO 50 I = 1, IL + X( IV+I-1 ) = EXP( CMPLX( ZERO, TWOPI*U( 2*I ) ) ) + 50 CONTINUE + END IF + 60 CONTINUE + RETURN +* +* End of CLARNV +* + END diff --git a/costa/native/external/lapack/clarrv.f b/costa/native/external/lapack/clarrv.f new file mode 100644 index 000000000..77001bc04 --- /dev/null +++ b/costa/native/external/lapack/clarrv.f @@ -0,0 +1,433 @@ + SUBROUTINE CLARRV( N, D, L, ISPLIT, M, W, IBLOCK, GERSCH, TOL, Z, + $ LDZ, ISUPPZ, WORK, IWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDZ, M, N + REAL TOL +* .. +* .. Array Arguments .. + INTEGER IBLOCK( * ), ISPLIT( * ), ISUPPZ( * ), + $ IWORK( * ) + REAL D( * ), GERSCH( * ), L( * ), W( * ), WORK( * ) + COMPLEX Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* CLARRV computes the eigenvectors of the tridiagonal matrix +* T = L D L^T given L, D and the eigenvalues of L D L^T. +* The input eigenvalues should have high relative accuracy with +* respect to the entries of L and D. The desired accuracy of the +* output can be specified by the input parameter TOL. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix. N >= 0. +* +* D (input/output) REAL array, dimension (N) +* On entry, the n diagonal elements of the diagonal matrix D. +* On exit, D may be overwritten. +* +* L (input/output) REAL array, dimension (N-1) +* On entry, the (n-1) subdiagonal elements of the unit +* bidiagonal matrix L in elements 1 to N-1 of L. L(N) need +* not be set. On exit, L is overwritten. +* +* ISPLIT (input) INTEGER array, dimension (N) +* The splitting points, at which T breaks up into submatrices. +* The first submatrix consists of rows/columns 1 to +* ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 +* through ISPLIT( 2 ), etc. +* +* TOL (input) REAL +* The absolute error tolerance for the +* eigenvalues/eigenvectors. +* Errors in the input eigenvalues must be bounded by TOL. +* The eigenvectors output have residual norms +* bounded by TOL, and the dot products between different +* eigenvectors are bounded by TOL. TOL must be at least +* N*EPS*|T|, where EPS is the machine precision and |T| is +* the 1-norm of the tridiagonal matrix. +* +* M (input) INTEGER +* The total number of eigenvalues found. 0 <= M <= N. +* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +* +* W (input) REAL array, dimension (N) +* The first M elements of W contain the eigenvalues for +* which eigenvectors are to be computed. The eigenvalues +* should be grouped by split-off block and ordered from +* smallest to largest within the block ( The output array +* W from SLARRE is expected here ). +* Errors in W must be bounded by TOL (see above). +* +* IBLOCK (input) INTEGER array, dimension (N) +* The submatrix indices associated with the corresponding +* eigenvalues in W; IBLOCK(i)=1 if eigenvalue W(i) belongs to +* the first submatrix from the top, =2 if W(i) belongs to +* the second submatrix, etc. +* +* Z (output) COMPLEX array, dimension (LDZ, max(1,M) ) +* If JOBZ = 'V', then if INFO = 0, the first M columns of Z +* contain the orthonormal eigenvectors of the matrix T +* corresponding to the selected eigenvalues, with the i-th +* column of Z holding the eigenvector associated with W(i). +* If JOBZ = 'N', then Z is not referenced. +* Note: the user must ensure that at least max(1,M) columns are +* supplied in the array Z; if RANGE = 'V', the exact value of M +* is not known in advance and an upper bound must be used. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* ISUPPZ (output) INTEGER ARRAY, dimension ( 2*max(1,M) ) +* The support of the eigenvectors in Z, i.e., the indices +* indicating the nonzero elements in Z. The i-th eigenvector +* is nonzero only in elements ISUPPZ( 2*i-1 ) through +* ISUPPZ( 2*i ). +* +* WORK (workspace) REAL array, dimension (13*N) +* +* IWORK (workspace) INTEGER array, dimension (6*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = 1, internal error in SLARRB +* if INFO = 2, internal error in CSTEIN +* +* Further Details +* =============== +* +* Based on contributions by +* Inderjit Dhillon, IBM Almaden, USA +* Osni Marques, LBNL/NERSC, USA +* Ken Stanley, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + INTEGER MGSSIZ + PARAMETER ( MGSSIZ = 20 ) + REAL ZERO, ONE, FOUR + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, FOUR = 4.0E0 ) + COMPLEX CZERO + PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + LOGICAL MGSCLS + INTEGER I, IBEGIN, IEND, IINDC1, IINDC2, IINDR, IINDWK, + $ IINFO, IM, IN, INDERR, INDGAP, INDIN1, INDIN2, + $ INDLD, INDLLD, INDWRK, ITER, ITMP1, ITMP2, J, + $ JBLK, K, KTOT, LSBDPT, MAXITR, NCLUS, NDEPTH, + $ NDONE, NEWCLS, NEWFRS, NEWFTT, NEWLST, NEWSIZ, + $ NSPLIT, OLDCLS, OLDFST, OLDIEN, OLDLST, OLDNCL, + $ P, Q, TEMP( 1 ) + REAL EPS, GAP, LAMBDA, MGSTOL, MINGMA, MINRGP, + $ NRMINV, RELGAP, RELTOL, RESID, RQCORR, SIGMA, + $ TMP1, ZTZ +* .. +* .. External Functions .. + REAL SCNRM2, SLAMCH + COMPLEX CDOTU + EXTERNAL CDOTU, SCNRM2, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CLAR1V, CLASET, CSTEIN, SCOPY, SLARRB +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CMPLX, MAX, MIN, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INDERR = N + 1 + INDLD = 2*N + INDLLD = 3*N + INDGAP = 4*N + INDIN1 = 5*N + 1 + INDIN2 = 6*N + 1 + INDWRK = 7*N + 1 +* + IINDR = N + IINDC1 = 2*N + IINDC2 = 3*N + IINDWK = 4*N + 1 +* + EPS = SLAMCH( 'Precision' ) +* + DO 10 I = 1, 2*N + IWORK( I ) = 0 + 10 CONTINUE + DO 20 I = 1, M + WORK( INDERR+I-1 ) = EPS*ABS( W( I ) ) + 20 CONTINUE + CALL CLASET( 'Full', N, N, CZERO, CZERO, Z, LDZ ) + MGSTOL = 5.0E0*EPS +* + NSPLIT = IBLOCK( M ) + IBEGIN = 1 + DO 170 JBLK = 1, NSPLIT + IEND = ISPLIT( JBLK ) +* +* Find the eigenvectors of the submatrix indexed IBEGIN +* through IEND. +* + IF( IBEGIN.EQ.IEND ) THEN + Z( IBEGIN, IBEGIN ) = ONE + ISUPPZ( 2*IBEGIN-1 ) = IBEGIN + ISUPPZ( 2*IBEGIN ) = IBEGIN + IBEGIN = IEND + 1 + GO TO 170 + END IF + OLDIEN = IBEGIN - 1 + IN = IEND - OLDIEN + RELTOL = MIN( 1.0E-2, ONE / REAL( IN ) ) + IM = IN + CALL SCOPY( IM, W( IBEGIN ), 1, WORK, 1 ) + DO 30 I = 1, IN - 1 + WORK( INDGAP+I ) = WORK( I+1 ) - WORK( I ) + 30 CONTINUE + WORK( INDGAP+IN ) = MAX( ABS( WORK( IN ) ), EPS ) + NDONE = 0 +* + NDEPTH = 0 + LSBDPT = 1 + NCLUS = 1 + IWORK( IINDC1+1 ) = 1 + IWORK( IINDC1+2 ) = IN +* +* While( NDONE.LT.IM ) do +* + 40 CONTINUE + IF( NDONE.LT.IM ) THEN + OLDNCL = NCLUS + NCLUS = 0 + LSBDPT = 1 - LSBDPT + DO 150 I = 1, OLDNCL + IF( LSBDPT.EQ.0 ) THEN + OLDCLS = IINDC1 + NEWCLS = IINDC2 + ELSE + OLDCLS = IINDC2 + NEWCLS = IINDC1 + END IF +* +* If NDEPTH > 1, retrieve the relatively robust +* representation (RRR) and perform limited bisection +* (if necessary) to get approximate eigenvalues. +* + J = OLDCLS + 2*I + OLDFST = IWORK( J-1 ) + OLDLST = IWORK( J ) + IF( NDEPTH.GT.0 ) THEN + J = OLDIEN + OLDFST + DO 45 K = 1, IN + D( IBEGIN+K-1 ) = REAL( Z( IBEGIN+K-1, + $ OLDIEN+OLDFST ) ) + L( IBEGIN+K-1 ) = REAL( Z( IBEGIN+K-1, + $ OLDIEN+OLDFST+1 ) ) + 45 CONTINUE + SIGMA = L( IEND ) + END IF + K = IBEGIN + DO 50 J = 1, IN - 1 + WORK( INDLD+J ) = D( K )*L( K ) + WORK( INDLLD+J ) = WORK( INDLD+J )*L( K ) + K = K + 1 + 50 CONTINUE + IF( NDEPTH.GT.0 ) THEN + CALL SLARRB( IN, D( IBEGIN ), L( IBEGIN ), + $ WORK( INDLD+1 ), WORK( INDLLD+1 ), + $ OLDFST, OLDLST, SIGMA, RELTOL, WORK, + $ WORK( INDGAP+1 ), WORK( INDERR ), + $ WORK( INDWRK ), IWORK( IINDWK ), IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = 1 + RETURN + END IF + END IF +* +* Classify eigenvalues of the current representation (RRR) +* as (i) isolated, (ii) loosely clustered or (iii) tightly +* clustered +* + NEWFRS = OLDFST + DO 140 J = OLDFST, OLDLST + IF( J.EQ.OLDLST .OR. WORK( INDGAP+J ).GE.RELTOL* + $ ABS( WORK( J ) ) ) THEN + NEWLST = J + ELSE +* +* continue (to the next loop) +* + RELGAP = WORK( INDGAP+J ) / ABS( WORK( J ) ) + IF( J.EQ.NEWFRS ) THEN + MINRGP = RELGAP + ELSE + MINRGP = MIN( MINRGP, RELGAP ) + END IF + GO TO 140 + END IF + NEWSIZ = NEWLST - NEWFRS + 1 + MAXITR = 10 + NEWFTT = OLDIEN + NEWFRS + IF( NEWSIZ.GT.1 ) THEN + MGSCLS = NEWSIZ.LE.MGSSIZ .AND. MINRGP.GE.MGSTOL + IF( .NOT.MGSCLS ) THEN + DO 55 K = 1, IN + WORK( INDIN1+K-1 ) = REAL( Z( IBEGIN+K-1, + $ NEWFTT ) ) + WORK( INDIN2+K-1 ) = REAL( Z( IBEGIN+K-1, + $ NEWFTT+1 ) ) + 55 CONTINUE + CALL SLARRF( IN, D( IBEGIN ), L( IBEGIN ), + $ WORK( INDLD+1 ), WORK( INDLLD+1 ), + $ NEWFRS, NEWLST, WORK, + $ WORK( INDIN1 ), WORK( INDIN2 ), + $ WORK( INDWRK ), IWORK( IINDWK ), + $ INFO ) + IF( INFO.EQ.0 ) THEN + NCLUS = NCLUS + 1 + K = NEWCLS + 2*NCLUS + IWORK( K-1 ) = NEWFRS + IWORK( K ) = NEWLST + ELSE + INFO = 0 + IF( MINRGP.GE.MGSTOL ) THEN + MGSCLS = .TRUE. + ELSE +* +* Call CSTEIN to process this tight cluster. +* This happens only if MINRGP <= MGSTOL +* and SLARRF returns INFO = 1. The latter +* means that a new RRR to "break" the +* cluster could not be found. +* + WORK( INDWRK ) = D( IBEGIN ) + DO 60 K = 1, IN - 1 + WORK( INDWRK+K ) = D( IBEGIN+K ) + + $ WORK( INDLLD+K ) + 60 CONTINUE + DO 70 K = 1, NEWSIZ + IWORK( IINDWK+K-1 ) = 1 + 70 CONTINUE + DO 80 K = NEWFRS, NEWLST + ISUPPZ( 2*( IBEGIN+K )-3 ) = 1 + ISUPPZ( 2*( IBEGIN+K )-2 ) = IN + 80 CONTINUE + TEMP( 1 ) = IN + CALL CSTEIN( IN, WORK( INDWRK ), + $ WORK( INDLD+1 ), NEWSIZ, + $ WORK( NEWFRS ), + $ IWORK( IINDWK ), TEMP( 1 ), + $ Z( IBEGIN, NEWFTT ), LDZ, + $ WORK( INDWRK+IN ), + $ IWORK( IINDWK+IN ), + $ IWORK( IINDWK+2*IN ), IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = 2 + RETURN + END IF + NDONE = NDONE + NEWSIZ + END IF + END IF + END IF + ELSE + MGSCLS = .FALSE. + END IF + IF( NEWSIZ.EQ.1 .OR. MGSCLS ) THEN + KTOT = NEWFTT + DO 100 K = NEWFRS, NEWLST + ITER = 0 + 90 CONTINUE + LAMBDA = WORK( K ) + CALL CLAR1V( IN, 1, IN, LAMBDA, D( IBEGIN ), + $ L( IBEGIN ), WORK( INDLD+1 ), + $ WORK( INDLLD+1 ), + $ GERSCH( 2*OLDIEN+1 ), + $ Z( IBEGIN, KTOT ), ZTZ, MINGMA, + $ IWORK( IINDR+KTOT ), + $ ISUPPZ( 2*KTOT-1 ), + $ WORK( INDWRK ) ) + TMP1 = ONE / ZTZ + NRMINV = SQRT( TMP1 ) + RESID = ABS( MINGMA )*NRMINV + RQCORR = MINGMA*TMP1 + IF( K.EQ.IN ) THEN + GAP = WORK( INDGAP+K-1 ) + ELSE IF( K.EQ.1 ) THEN + GAP = WORK( INDGAP+K ) + ELSE + GAP = MIN( WORK( INDGAP+K-1 ), + $ WORK( INDGAP+K ) ) + END IF + ITER = ITER + 1 + IF( RESID.GT.TOL*GAP .AND. ABS( RQCORR ).GT. + $ FOUR*EPS*ABS( LAMBDA ) ) THEN + WORK( K ) = LAMBDA + RQCORR + IF( ITER.LT.MAXITR ) THEN + GO TO 90 + END IF + END IF + IWORK( KTOT ) = 1 + IF( NEWSIZ.EQ.1 ) + $ NDONE = NDONE + 1 + CALL CSSCAL( IN, NRMINV, Z( IBEGIN, KTOT ), 1 ) + KTOT = KTOT + 1 + 100 CONTINUE + IF( NEWSIZ.GT.1 ) THEN + ITMP1 = ISUPPZ( 2*NEWFTT-1 ) + ITMP2 = ISUPPZ( 2*NEWFTT ) + KTOT = OLDIEN + NEWLST + DO 120 P = NEWFTT + 1, KTOT + DO 110 Q = NEWFTT, P - 1 + TMP1 = -CDOTU( IN, Z( IBEGIN, P ), 1, + $ Z( IBEGIN, Q ), 1 ) + CALL CAXPY( IN, CMPLX( TMP1, ZERO ), + $ Z( IBEGIN, Q ), 1, + $ Z( IBEGIN, P ), 1 ) + 110 CONTINUE + TMP1 = ONE / SCNRM2( IN, Z( IBEGIN, P ), 1 ) + CALL CSSCAL( IN, TMP1, Z( IBEGIN, P ), 1 ) + ITMP1 = MIN( ITMP1, ISUPPZ( 2*P-1 ) ) + ITMP2 = MAX( ITMP2, ISUPPZ( 2*P ) ) + 120 CONTINUE + DO 130 P = NEWFTT, KTOT + ISUPPZ( 2*P-1 ) = ITMP1 + ISUPPZ( 2*P ) = ITMP2 + 130 CONTINUE + NDONE = NDONE + NEWSIZ + END IF + END IF + NEWFRS = J + 1 + 140 CONTINUE + 150 CONTINUE + NDEPTH = NDEPTH + 1 + GO TO 40 + END IF + J = 2*IBEGIN + DO 160 I = IBEGIN, IEND + ISUPPZ( J-1 ) = ISUPPZ( J-1 ) + OLDIEN + ISUPPZ( J ) = ISUPPZ( J ) + OLDIEN + J = J + 2 + 160 CONTINUE + IBEGIN = IEND + 1 + 170 CONTINUE +* + RETURN +* +* End of CLARRV +* + END diff --git a/costa/native/external/lapack/clartg.f b/costa/native/external/lapack/clartg.f new file mode 100644 index 000000000..49b7b99b9 --- /dev/null +++ b/costa/native/external/lapack/clartg.f @@ -0,0 +1,193 @@ + SUBROUTINE CLARTG( F, G, CS, SN, R ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + REAL CS + COMPLEX F, G, R, SN +* .. +* +* Purpose +* ======= +* +* CLARTG generates a plane rotation so that +* +* [ CS SN ] [ F ] [ R ] +* [ __ ] . [ ] = [ ] where CS**2 + |SN|**2 = 1. +* [ -SN CS ] [ G ] [ 0 ] +* +* This is a faster version of the BLAS1 routine CROTG, except for +* the following differences: +* F and G are unchanged on return. +* If G=0, then CS=1 and SN=0. +* If F=0, then CS=0 and SN is chosen so that R is real. +* +* Arguments +* ========= +* +* F (input) COMPLEX +* The first component of vector to be rotated. +* +* G (input) COMPLEX +* The second component of vector to be rotated. +* +* CS (output) REAL +* The cosine of the rotation. +* +* SN (output) COMPLEX +* The sine of the rotation. +* +* R (output) COMPLEX +* The nonzero component of the rotated vector. +* +* Further Details +* ======= ======= +* +* 3-5-96 - Modified with a new algorithm by W. Kahan and J. Demmel +* +* ===================================================================== +* +* .. Parameters .. + REAL TWO, ONE, ZERO + PARAMETER ( TWO = 2.0E+0, ONE = 1.0E+0, ZERO = 0.0E+0 ) + COMPLEX CZERO + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL FIRST + INTEGER COUNT, I + REAL D, DI, DR, EPS, F2, F2S, G2, G2S, SAFMIN, + $ SAFMN2, SAFMX2, SCALE + COMPLEX FF, FS, GS +* .. +* .. External Functions .. + REAL SLAMCH, SLAPY2 + EXTERNAL SLAMCH, SLAPY2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, CONJG, INT, LOG, MAX, REAL, + $ SQRT +* .. +* .. Statement Functions .. + REAL ABS1, ABSSQ +* .. +* .. Save statement .. + SAVE FIRST, SAFMX2, SAFMIN, SAFMN2 +* .. +* .. Data statements .. + DATA FIRST / .TRUE. / +* .. +* .. Statement Function definitions .. + ABS1( FF ) = MAX( ABS( REAL( FF ) ), ABS( AIMAG( FF ) ) ) + ABSSQ( FF ) = REAL( FF )**2 + AIMAG( FF )**2 +* .. +* .. Executable Statements .. +* + IF( FIRST ) THEN + FIRST = .FALSE. + SAFMIN = SLAMCH( 'S' ) + EPS = SLAMCH( 'E' ) + SAFMN2 = SLAMCH( 'B' )**INT( LOG( SAFMIN / EPS ) / + $ LOG( SLAMCH( 'B' ) ) / TWO ) + SAFMX2 = ONE / SAFMN2 + END IF + SCALE = MAX( ABS1( F ), ABS1( G ) ) + FS = F + GS = G + COUNT = 0 + IF( SCALE.GE.SAFMX2 ) THEN + 10 CONTINUE + COUNT = COUNT + 1 + FS = FS*SAFMN2 + GS = GS*SAFMN2 + SCALE = SCALE*SAFMN2 + IF( SCALE.GE.SAFMX2 ) + $ GO TO 10 + ELSE IF( SCALE.LE.SAFMN2 ) THEN + IF( G.EQ.CZERO ) THEN + CS = ONE + SN = CZERO + R = F + RETURN + END IF + 20 CONTINUE + COUNT = COUNT - 1 + FS = FS*SAFMX2 + GS = GS*SAFMX2 + SCALE = SCALE*SAFMX2 + IF( SCALE.LE.SAFMN2 ) + $ GO TO 20 + END IF + F2 = ABSSQ( FS ) + G2 = ABSSQ( GS ) + IF( F2.LE.MAX( G2, ONE )*SAFMIN ) THEN +* +* This is a rare case: F is very small. +* + IF( F.EQ.CZERO ) THEN + CS = ZERO + R = SLAPY2( REAL( G ), AIMAG( G ) ) +* Do complex/real division explicitly with two real divisions + D = SLAPY2( REAL( GS ), AIMAG( GS ) ) + SN = CMPLX( REAL( GS ) / D, -AIMAG( GS ) / D ) + RETURN + END IF + F2S = SLAPY2( REAL( FS ), AIMAG( FS ) ) +* G2 and G2S are accurate +* G2 is at least SAFMIN, and G2S is at least SAFMN2 + G2S = SQRT( G2 ) +* Error in CS from underflow in F2S is at most +* UNFL / SAFMN2 .lt. sqrt(UNFL*EPS) .lt. EPS +* If MAX(G2,ONE)=G2, then F2 .lt. G2*SAFMIN, +* and so CS .lt. sqrt(SAFMIN) +* If MAX(G2,ONE)=ONE, then F2 .lt. SAFMIN +* and so CS .lt. sqrt(SAFMIN)/SAFMN2 = sqrt(EPS) +* Therefore, CS = F2S/G2S / sqrt( 1 + (F2S/G2S)**2 ) = F2S/G2S + CS = F2S / G2S +* Make sure abs(FF) = 1 +* Do complex/real division explicitly with 2 real divisions + IF( ABS1( F ).GT.ONE ) THEN + D = SLAPY2( REAL( F ), AIMAG( F ) ) + FF = CMPLX( REAL( F ) / D, AIMAG( F ) / D ) + ELSE + DR = SAFMX2*REAL( F ) + DI = SAFMX2*AIMAG( F ) + D = SLAPY2( DR, DI ) + FF = CMPLX( DR / D, DI / D ) + END IF + SN = FF*CMPLX( REAL( GS ) / G2S, -AIMAG( GS ) / G2S ) + R = CS*F + SN*G + ELSE +* +* This is the most common case. +* Neither F2 nor F2/G2 are less than SAFMIN +* F2S cannot overflow, and it is accurate +* + F2S = SQRT( ONE+G2 / F2 ) +* Do the F2S(real)*FS(complex) multiply with two real multiplies + R = CMPLX( F2S*REAL( FS ), F2S*AIMAG( FS ) ) + CS = ONE / F2S + D = F2 + G2 +* Do complex/real division explicitly with two real divisions + SN = CMPLX( REAL( R ) / D, AIMAG( R ) / D ) + SN = SN*CONJG( GS ) + IF( COUNT.NE.0 ) THEN + IF( COUNT.GT.0 ) THEN + DO 30 I = 1, COUNT + R = R*SAFMX2 + 30 CONTINUE + ELSE + DO 40 I = 1, -COUNT + R = R*SAFMN2 + 40 CONTINUE + END IF + END IF + END IF + RETURN +* +* End of CLARTG +* + END diff --git a/costa/native/external/lapack/clartv.f b/costa/native/external/lapack/clartv.f new file mode 100644 index 000000000..e653f763d --- /dev/null +++ b/costa/native/external/lapack/clartv.f @@ -0,0 +1,79 @@ + SUBROUTINE CLARTV( N, X, INCX, Y, INCY, C, S, INCC ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + INTEGER INCC, INCX, INCY, N +* .. +* .. Array Arguments .. + REAL C( * ) + COMPLEX S( * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* CLARTV applies a vector of complex plane rotations with real cosines +* to elements of the complex vectors x and y. For i = 1,2,...,n +* +* ( x(i) ) := ( c(i) s(i) ) ( x(i) ) +* ( y(i) ) ( -conjg(s(i)) c(i) ) ( y(i) ) +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of plane rotations to be applied. +* +* X (input/output) COMPLEX array, dimension (1+(N-1)*INCX) +* The vector x. +* +* INCX (input) INTEGER +* The increment between elements of X. INCX > 0. +* +* Y (input/output) COMPLEX array, dimension (1+(N-1)*INCY) +* The vector y. +* +* INCY (input) INTEGER +* The increment between elements of Y. INCY > 0. +* +* C (input) REAL array, dimension (1+(N-1)*INCC) +* The cosines of the plane rotations. +* +* S (input) COMPLEX array, dimension (1+(N-1)*INCC) +* The sines of the plane rotations. +* +* INCC (input) INTEGER +* The increment between elements of C and S. INCC > 0. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IC, IX, IY + COMPLEX XI, YI +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG +* .. +* .. Executable Statements .. +* + IX = 1 + IY = 1 + IC = 1 + DO 10 I = 1, N + XI = X( IX ) + YI = Y( IY ) + X( IX ) = C( IC )*XI + S( IC )*YI + Y( IY ) = C( IC )*YI - CONJG( S( IC ) )*XI + IX = IX + INCX + IY = IY + INCY + IC = IC + INCC + 10 CONTINUE + RETURN +* +* End of CLARTV +* + END diff --git a/costa/native/external/lapack/clarz.f b/costa/native/external/lapack/clarz.f new file mode 100644 index 000000000..9c16f2ce2 --- /dev/null +++ b/costa/native/external/lapack/clarz.f @@ -0,0 +1,158 @@ + SUBROUTINE CLARZ( SIDE, M, N, L, V, INCV, TAU, C, LDC, WORK ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, L, LDC, M, N + COMPLEX TAU +* .. +* .. Array Arguments .. + COMPLEX C( LDC, * ), V( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CLARZ applies a complex elementary reflector H to a complex +* M-by-N matrix C, from either the left or the right. H is represented +* in the form +* +* H = I - tau * v * v' +* +* where tau is a complex scalar and v is a complex vector. +* +* If tau = 0, then H is taken to be the unit matrix. +* +* To apply H' (the conjugate transpose of H), supply conjg(tau) instead +* tau. +* +* H is a product of k elementary reflectors as returned by CTZRZF. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': form H * C +* = 'R': form C * H +* +* M (input) INTEGER +* The number of rows of the matrix C. +* +* N (input) INTEGER +* The number of columns of the matrix C. +* +* L (input) INTEGER +* The number of entries of the vector V containing +* the meaningful part of the Householder vectors. +* If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. +* +* V (input) COMPLEX array, dimension (1+(L-1)*abs(INCV)) +* The vector v in the representation of H as returned by +* CTZRZF. V is not used if TAU = 0. +* +* INCV (input) INTEGER +* The increment between elements of v. INCV <> 0. +* +* TAU (input) COMPLEX +* The value tau in the representation of H. +* +* C (input/output) COMPLEX array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by the matrix H * C if SIDE = 'L', +* or C * H if SIDE = 'R'. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) COMPLEX array, dimension +* (N) if SIDE = 'L' +* or (M) if SIDE = 'R' +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CGEMV, CGERC, CGERU, CLACGV +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C +* + IF( TAU.NE.ZERO ) THEN +* +* w( 1:n ) = conjg( C( 1, 1:n ) ) +* + CALL CCOPY( N, C, LDC, WORK, 1 ) + CALL CLACGV( N, WORK, 1 ) +* +* w( 1:n ) = conjg( w( 1:n ) + C( m-l+1:m, 1:n )' * v( 1:l ) ) +* + CALL CGEMV( 'Conjugate transpose', L, N, ONE, C( M-L+1, 1 ), + $ LDC, V, INCV, ONE, WORK, 1 ) + CALL CLACGV( N, WORK, 1 ) +* +* C( 1, 1:n ) = C( 1, 1:n ) - tau * w( 1:n ) +* + CALL CAXPY( N, -TAU, WORK, 1, C, LDC ) +* +* C( m-l+1:m, 1:n ) = C( m-l+1:m, 1:n ) - ... +* tau * v( 1:l ) * conjg( w( 1:n )' ) +* + CALL CGERU( L, N, -TAU, V, INCV, WORK, 1, C( M-L+1, 1 ), + $ LDC ) + END IF +* + ELSE +* +* Form C * H +* + IF( TAU.NE.ZERO ) THEN +* +* w( 1:m ) = C( 1:m, 1 ) +* + CALL CCOPY( M, C, 1, WORK, 1 ) +* +* w( 1:m ) = w( 1:m ) + C( 1:m, n-l+1:n, 1:n ) * v( 1:l ) +* + CALL CGEMV( 'No transpose', M, L, ONE, C( 1, N-L+1 ), LDC, + $ V, INCV, ONE, WORK, 1 ) +* +* C( 1:m, 1 ) = C( 1:m, 1 ) - tau * w( 1:m ) +* + CALL CAXPY( M, -TAU, WORK, 1, C, 1 ) +* +* C( 1:m, n-l+1:n ) = C( 1:m, n-l+1:n ) - ... +* tau * w( 1:m ) * v( 1:l )' +* + CALL CGERC( M, L, -TAU, WORK, 1, V, INCV, C( 1, N-L+1 ), + $ LDC ) +* + END IF +* + END IF +* + RETURN +* +* End of CLARZ +* + END diff --git a/costa/native/external/lapack/clarzb.f b/costa/native/external/lapack/clarzb.f new file mode 100644 index 000000000..aa691a2f1 --- /dev/null +++ b/costa/native/external/lapack/clarzb.f @@ -0,0 +1,235 @@ + SUBROUTINE CLARZB( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, V, + $ LDV, T, LDT, C, LDC, WORK, LDWORK ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* December 1, 1999 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, SIDE, STOREV, TRANS + INTEGER K, L, LDC, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX C( LDC, * ), T( LDT, * ), V( LDV, * ), + $ WORK( LDWORK, * ) +* .. +* +* Purpose +* ======= +* +* CLARZB applies a complex block reflector H or its transpose H**H +* to a complex distributed M-by-N C from the left or the right. +* +* Currently, only STOREV = 'R' and DIRECT = 'B' are supported. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply H or H' from the Left +* = 'R': apply H or H' from the Right +* +* TRANS (input) CHARACTER*1 +* = 'N': apply H (No transpose) +* = 'C': apply H' (Conjugate transpose) +* +* DIRECT (input) CHARACTER*1 +* Indicates how H is formed from a product of elementary +* reflectors +* = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet) +* = 'B': H = H(k) . . . H(2) H(1) (Backward) +* +* STOREV (input) CHARACTER*1 +* Indicates how the vectors which define the elementary +* reflectors are stored: +* = 'C': Columnwise (not supported yet) +* = 'R': Rowwise +* +* M (input) INTEGER +* The number of rows of the matrix C. +* +* N (input) INTEGER +* The number of columns of the matrix C. +* +* K (input) INTEGER +* The order of the matrix T (= the number of elementary +* reflectors whose product defines the block reflector). +* +* L (input) INTEGER +* The number of columns of the matrix V containing the +* meaningful part of the Householder reflectors. +* If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. +* +* V (input) COMPLEX array, dimension (LDV,NV). +* If STOREV = 'C', NV = K; if STOREV = 'R', NV = L. +* +* LDV (input) INTEGER +* The leading dimension of the array V. +* If STOREV = 'C', LDV >= L; if STOREV = 'R', LDV >= K. +* +* T (input) COMPLEX array, dimension (LDT,K) +* The triangular K-by-K matrix T in the representation of the +* block reflector. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= K. +* +* C (input/output) COMPLEX array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by H*C or H'*C or C*H or C*H'. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) COMPLEX array, dimension (LDWORK,K) +* +* LDWORK (input) INTEGER +* The leading dimension of the array WORK. +* If SIDE = 'L', LDWORK >= max(1,N); +* if SIDE = 'R', LDWORK >= max(1,M). +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + CHARACTER TRANST + INTEGER I, INFO, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGEMM, CLACGV, CTRMM, XERBLA +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) + $ RETURN +* +* Check for currently supported options +* + INFO = 0 + IF( .NOT.LSAME( DIRECT, 'B' ) ) THEN + INFO = -3 + ELSE IF( .NOT.LSAME( STOREV, 'R' ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLARZB', -INFO ) + RETURN + END IF +* + IF( LSAME( TRANS, 'N' ) ) THEN + TRANST = 'C' + ELSE + TRANST = 'N' + END IF +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H' * C +* +* W( 1:n, 1:k ) = conjg( C( 1:k, 1:n )' ) +* + DO 10 J = 1, K + CALL CCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + 10 CONTINUE +* +* W( 1:n, 1:k ) = W( 1:n, 1:k ) + ... +* conjg( C( m-l+1:m, 1:n )' ) * V( 1:k, 1:l )' +* + IF( L.GT.0 ) + $ CALL CGEMM( 'Transpose', 'Conjugate transpose', N, K, L, + $ ONE, C( M-L+1, 1 ), LDC, V, LDV, ONE, WORK, + $ LDWORK ) +* +* W( 1:n, 1:k ) = W( 1:n, 1:k ) * T' or W( 1:m, 1:k ) * T +* + CALL CTRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K, ONE, T, + $ LDT, WORK, LDWORK ) +* +* C( 1:k, 1:n ) = C( 1:k, 1:n ) - conjg( W( 1:n, 1:k )' ) +* + DO 30 J = 1, N + DO 20 I = 1, K + C( I, J ) = C( I, J ) - WORK( J, I ) + 20 CONTINUE + 30 CONTINUE +* +* C( m-l+1:m, 1:n ) = C( m-l+1:m, 1:n ) - ... +* conjg( V( 1:k, 1:l )' ) * conjg( W( 1:n, 1:k )' ) +* + IF( L.GT.0 ) + $ CALL CGEMM( 'Transpose', 'Transpose', L, N, K, -ONE, V, LDV, + $ WORK, LDWORK, ONE, C( M-L+1, 1 ), LDC ) +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H' +* +* W( 1:m, 1:k ) = C( 1:m, 1:k ) +* + DO 40 J = 1, K + CALL CCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 ) + 40 CONTINUE +* +* W( 1:m, 1:k ) = W( 1:m, 1:k ) + ... +* C( 1:m, n-l+1:n ) * conjg( V( 1:k, 1:l )' ) +* + IF( L.GT.0 ) + $ CALL CGEMM( 'No transpose', 'Transpose', M, K, L, ONE, + $ C( 1, N-L+1 ), LDC, V, LDV, ONE, WORK, LDWORK ) +* +* W( 1:m, 1:k ) = W( 1:m, 1:k ) * conjg( T ) or +* W( 1:m, 1:k ) * conjg( T' ) +* + DO 50 J = 1, K + CALL CLACGV( K-J+1, T( J, J ), 1 ) + 50 CONTINUE + CALL CTRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K, ONE, T, + $ LDT, WORK, LDWORK ) + DO 60 J = 1, K + CALL CLACGV( K-J+1, T( J, J ), 1 ) + 60 CONTINUE +* +* C( 1:m, 1:k ) = C( 1:m, 1:k ) - W( 1:m, 1:k ) +* + DO 80 J = 1, K + DO 70 I = 1, M + C( I, J ) = C( I, J ) - WORK( I, J ) + 70 CONTINUE + 80 CONTINUE +* +* C( 1:m, n-l+1:n ) = C( 1:m, n-l+1:n ) - ... +* W( 1:m, 1:k ) * conjg( V( 1:k, 1:l ) ) +* + DO 90 J = 1, L + CALL CLACGV( K, V( 1, J ), 1 ) + 90 CONTINUE + IF( L.GT.0 ) + $ CALL CGEMM( 'No transpose', 'No transpose', M, L, K, -ONE, + $ WORK, LDWORK, V, LDV, ONE, C( 1, N-L+1 ), LDC ) + DO 100 J = 1, L + CALL CLACGV( K, V( 1, J ), 1 ) + 100 CONTINUE +* + END IF +* + RETURN +* +* End of CLARZB +* + END diff --git a/costa/native/external/lapack/clarzt.f b/costa/native/external/lapack/clarzt.f new file mode 100644 index 000000000..0a35c6f99 --- /dev/null +++ b/costa/native/external/lapack/clarzt.f @@ -0,0 +1,187 @@ + SUBROUTINE CLARZT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. + COMPLEX T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* Purpose +* ======= +* +* CLARZT forms the triangular factor T of a complex block reflector +* H of order > n, which is defined as a product of k elementary +* reflectors. +* +* If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; +* +* If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. +* +* If STOREV = 'C', the vector which defines the elementary reflector +* H(i) is stored in the i-th column of the array V, and +* +* H = I - V * T * V' +* +* If STOREV = 'R', the vector which defines the elementary reflector +* H(i) is stored in the i-th row of the array V, and +* +* H = I - V' * T * V +* +* Currently, only STOREV = 'R' and DIRECT = 'B' are supported. +* +* Arguments +* ========= +* +* DIRECT (input) CHARACTER*1 +* Specifies the order in which the elementary reflectors are +* multiplied to form the block reflector: +* = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet) +* = 'B': H = H(k) . . . H(2) H(1) (Backward) +* +* STOREV (input) CHARACTER*1 +* Specifies how the vectors which define the elementary +* reflectors are stored (see also Further Details): +* = 'C': columnwise (not supported yet) +* = 'R': rowwise +* +* N (input) INTEGER +* The order of the block reflector H. N >= 0. +* +* K (input) INTEGER +* The order of the triangular factor T (= the number of +* elementary reflectors). K >= 1. +* +* V (input/output) COMPLEX array, dimension +* (LDV,K) if STOREV = 'C' +* (LDV,N) if STOREV = 'R' +* The matrix V. See further details. +* +* LDV (input) INTEGER +* The leading dimension of the array V. +* If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. +* +* TAU (input) COMPLEX array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i). +* +* T (output) COMPLEX array, dimension (LDT,K) +* The k by k triangular factor T of the block reflector. +* If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is +* lower triangular. The rest of the array is not used. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= K. +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* The shape of the matrix V and the storage of the vectors which define +* the H(i) is best illustrated by the following example with n = 5 and +* k = 3. The elements equal to 1 are not stored; the corresponding +* array elements are modified but restored on exit. The rest of the +* array is not used. +* +* DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +* +* ______V_____ +* ( v1 v2 v3 ) / \ +* ( v1 v2 v3 ) ( v1 v1 v1 v1 v1 . . . . 1 ) +* V = ( v1 v2 v3 ) ( v2 v2 v2 v2 v2 . . . 1 ) +* ( v1 v2 v3 ) ( v3 v3 v3 v3 v3 . . 1 ) +* ( v1 v2 v3 ) +* . . . +* . . . +* 1 . . +* 1 . +* 1 +* +* DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +* +* ______V_____ +* 1 / \ +* . 1 ( 1 . . . . v1 v1 v1 v1 v1 ) +* . . 1 ( . 1 . . . v2 v2 v2 v2 v2 ) +* . . . ( . . 1 . . v3 v3 v3 v3 v3 ) +* . . . +* ( v1 v2 v3 ) +* ( v1 v2 v3 ) +* V = ( v1 v2 v3 ) +* ( v1 v2 v3 ) +* ( v1 v2 v3 ) +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, J +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, CLACGV, CTRMV, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Check for currently supported options +* + INFO = 0 + IF( .NOT.LSAME( DIRECT, 'B' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( STOREV, 'R' ) ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLARZT', -INFO ) + RETURN + END IF +* + DO 20 I = K, 1, -1 + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO 10 J = I, K + T( J, I ) = ZERO + 10 CONTINUE + ELSE +* +* general case +* + IF( I.LT.K ) THEN +* +* T(i+1:k,i) = - tau(i) * V(i+1:k,1:n) * V(i,1:n)' +* + CALL CLACGV( N, V( I, 1 ), LDV ) + CALL CGEMV( 'No transpose', K-I, N, -TAU( I ), + $ V( I+1, 1 ), LDV, V( I, 1 ), LDV, ZERO, + $ T( I+1, I ), 1 ) + CALL CLACGV( N, V( I, 1 ), LDV ) +* +* T(i+1:k,i) = T(i+1:k,i+1:k) * T(i+1:k,i) +* + CALL CTRMV( 'Lower', 'No transpose', 'Non-unit', K-I, + $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) + END IF + T( I, I ) = TAU( I ) + END IF + 20 CONTINUE + RETURN +* +* End of CLARZT +* + END diff --git a/costa/native/external/lapack/clascl.f b/costa/native/external/lapack/clascl.f new file mode 100644 index 000000000..2fc056376 --- /dev/null +++ b/costa/native/external/lapack/clascl.f @@ -0,0 +1,268 @@ + SUBROUTINE CLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER TYPE + INTEGER INFO, KL, KU, LDA, M, N + REAL CFROM, CTO +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CLASCL multiplies the M by N complex matrix A by the real scalar +* CTO/CFROM. This is done without over/underflow as long as the final +* result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that +* A may be full, upper triangular, lower triangular, upper Hessenberg, +* or banded. +* +* Arguments +* ========= +* +* TYPE (input) CHARACTER*1 +* TYPE indices the storage type of the input matrix. +* = 'G': A is a full matrix. +* = 'L': A is a lower triangular matrix. +* = 'U': A is an upper triangular matrix. +* = 'H': A is an upper Hessenberg matrix. +* = 'B': A is a symmetric band matrix with lower bandwidth KL +* and upper bandwidth KU and with the only the lower +* half stored. +* = 'Q': A is a symmetric band matrix with lower bandwidth KL +* and upper bandwidth KU and with the only the upper +* half stored. +* = 'Z': A is a band matrix with lower bandwidth KL and upper +* bandwidth KU. +* +* KL (input) INTEGER +* The lower bandwidth of A. Referenced only if TYPE = 'B', +* 'Q' or 'Z'. +* +* KU (input) INTEGER +* The upper bandwidth of A. Referenced only if TYPE = 'B', +* 'Q' or 'Z'. +* +* CFROM (input) REAL +* CTO (input) REAL +* The matrix A is multiplied by CTO/CFROM. A(I,J) is computed +* without over/underflow if the final result CTO*A(I,J)/CFROM +* can be represented without over/underflow. CFROM must be +* nonzero. +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,M) +* The matrix to be multiplied by CTO/CFROM. See TYPE for the +* storage type. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* INFO (output) INTEGER +* 0 - successful exit +* <0 - if INFO = -i, the i-th argument had an illegal value. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL DONE + INTEGER I, ITYPE, J, K1, K2, K3, K4 + REAL BIGNUM, CFROM1, CFROMC, CTO1, CTOC, MUL, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 +* + IF( LSAME( TYPE, 'G' ) ) THEN + ITYPE = 0 + ELSE IF( LSAME( TYPE, 'L' ) ) THEN + ITYPE = 1 + ELSE IF( LSAME( TYPE, 'U' ) ) THEN + ITYPE = 2 + ELSE IF( LSAME( TYPE, 'H' ) ) THEN + ITYPE = 3 + ELSE IF( LSAME( TYPE, 'B' ) ) THEN + ITYPE = 4 + ELSE IF( LSAME( TYPE, 'Q' ) ) THEN + ITYPE = 5 + ELSE IF( LSAME( TYPE, 'Z' ) ) THEN + ITYPE = 6 + ELSE + ITYPE = -1 + END IF +* + IF( ITYPE.EQ.-1 ) THEN + INFO = -1 + ELSE IF( CFROM.EQ.ZERO ) THEN + INFO = -4 + ELSE IF( M.LT.0 ) THEN + INFO = -6 + ELSE IF( N.LT.0 .OR. ( ITYPE.EQ.4 .AND. N.NE.M ) .OR. + $ ( ITYPE.EQ.5 .AND. N.NE.M ) ) THEN + INFO = -7 + ELSE IF( ITYPE.LE.3 .AND. LDA.LT.MAX( 1, M ) ) THEN + INFO = -9 + ELSE IF( ITYPE.GE.4 ) THEN + IF( KL.LT.0 .OR. KL.GT.MAX( M-1, 0 ) ) THEN + INFO = -2 + ELSE IF( KU.LT.0 .OR. KU.GT.MAX( N-1, 0 ) .OR. + $ ( ( ITYPE.EQ.4 .OR. ITYPE.EQ.5 ) .AND. KL.NE.KU ) ) + $ THEN + INFO = -3 + ELSE IF( ( ITYPE.EQ.4 .AND. LDA.LT.KL+1 ) .OR. + $ ( ITYPE.EQ.5 .AND. LDA.LT.KU+1 ) .OR. + $ ( ITYPE.EQ.6 .AND. LDA.LT.2*KL+KU+1 ) ) THEN + INFO = -9 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLASCL', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. M.EQ.0 ) + $ RETURN +* +* Get machine parameters +* + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM +* + CFROMC = CFROM + CTOC = CTO +* + 10 CONTINUE + CFROM1 = CFROMC*SMLNUM + CTO1 = CTOC / BIGNUM + IF( ABS( CFROM1 ).GT.ABS( CTOC ) .AND. CTOC.NE.ZERO ) THEN + MUL = SMLNUM + DONE = .FALSE. + CFROMC = CFROM1 + ELSE IF( ABS( CTO1 ).GT.ABS( CFROMC ) ) THEN + MUL = BIGNUM + DONE = .FALSE. + CTOC = CTO1 + ELSE + MUL = CTOC / CFROMC + DONE = .TRUE. + END IF +* + IF( ITYPE.EQ.0 ) THEN +* +* Full matrix +* + DO 30 J = 1, N + DO 20 I = 1, M + A( I, J ) = A( I, J )*MUL + 20 CONTINUE + 30 CONTINUE +* + ELSE IF( ITYPE.EQ.1 ) THEN +* +* Lower triangular matrix +* + DO 50 J = 1, N + DO 40 I = J, M + A( I, J ) = A( I, J )*MUL + 40 CONTINUE + 50 CONTINUE +* + ELSE IF( ITYPE.EQ.2 ) THEN +* +* Upper triangular matrix +* + DO 70 J = 1, N + DO 60 I = 1, MIN( J, M ) + A( I, J ) = A( I, J )*MUL + 60 CONTINUE + 70 CONTINUE +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* Upper Hessenberg matrix +* + DO 90 J = 1, N + DO 80 I = 1, MIN( J+1, M ) + A( I, J ) = A( I, J )*MUL + 80 CONTINUE + 90 CONTINUE +* + ELSE IF( ITYPE.EQ.4 ) THEN +* +* Lower half of a symmetric band matrix +* + K3 = KL + 1 + K4 = N + 1 + DO 110 J = 1, N + DO 100 I = 1, MIN( K3, K4-J ) + A( I, J ) = A( I, J )*MUL + 100 CONTINUE + 110 CONTINUE +* + ELSE IF( ITYPE.EQ.5 ) THEN +* +* Upper half of a symmetric band matrix +* + K1 = KU + 2 + K3 = KU + 1 + DO 130 J = 1, N + DO 120 I = MAX( K1-J, 1 ), K3 + A( I, J ) = A( I, J )*MUL + 120 CONTINUE + 130 CONTINUE +* + ELSE IF( ITYPE.EQ.6 ) THEN +* +* Band matrix +* + K1 = KL + KU + 2 + K2 = KL + 1 + K3 = 2*KL + KU + 1 + K4 = KL + KU + 1 + M + DO 150 J = 1, N + DO 140 I = MAX( K1-J, K2 ), MIN( K3, K4-J ) + A( I, J ) = A( I, J )*MUL + 140 CONTINUE + 150 CONTINUE +* + END IF +* + IF( .NOT.DONE ) + $ GO TO 10 +* + RETURN +* +* End of CLASCL +* + END diff --git a/costa/native/external/lapack/claset.f b/costa/native/external/lapack/claset.f new file mode 100644 index 000000000..f75a800a4 --- /dev/null +++ b/costa/native/external/lapack/claset.f @@ -0,0 +1,115 @@ + SUBROUTINE CLASET( UPLO, M, N, ALPHA, BETA, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER LDA, M, N + COMPLEX ALPHA, BETA +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CLASET initializes a 2-D array A to BETA on the diagonal and +* ALPHA on the offdiagonals. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies the part of the matrix A to be set. +* = 'U': Upper triangular part is set. The lower triangle +* is unchanged. +* = 'L': Lower triangular part is set. The upper triangle +* is unchanged. +* Otherwise: All of the matrix A is set. +* +* M (input) INTEGER +* On entry, M specifies the number of rows of A. +* +* N (input) INTEGER +* On entry, N specifies the number of columns of A. +* +* ALPHA (input) COMPLEX +* All the offdiagonal array elements are set to ALPHA. +* +* BETA (input) COMPLEX +* All the diagonal array elements are set to BETA. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the m by n matrix A. +* On exit, A(i,j) = ALPHA, 1 <= i <= m, 1 <= j <= n, i.ne.j; +* A(i,i) = BETA , 1 <= i <= min(m,n) +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Set the diagonal to BETA and the strictly upper triangular +* part of the array to ALPHA. +* + DO 20 J = 2, N + DO 10 I = 1, MIN( J-1, M ) + A( I, J ) = ALPHA + 10 CONTINUE + 20 CONTINUE + DO 30 I = 1, MIN( N, M ) + A( I, I ) = BETA + 30 CONTINUE +* + ELSE IF( LSAME( UPLO, 'L' ) ) THEN +* +* Set the diagonal to BETA and the strictly lower triangular +* part of the array to ALPHA. +* + DO 50 J = 1, MIN( M, N ) + DO 40 I = J + 1, M + A( I, J ) = ALPHA + 40 CONTINUE + 50 CONTINUE + DO 60 I = 1, MIN( N, M ) + A( I, I ) = BETA + 60 CONTINUE +* + ELSE +* +* Set the array to BETA on the diagonal and ALPHA on the +* offdiagonal. +* + DO 80 J = 1, N + DO 70 I = 1, M + A( I, J ) = ALPHA + 70 CONTINUE + 80 CONTINUE + DO 90 I = 1, MIN( M, N ) + A( I, I ) = BETA + 90 CONTINUE + END IF +* + RETURN +* +* End of CLASET +* + END diff --git a/costa/native/external/lapack/clasr.f b/costa/native/external/lapack/clasr.f new file mode 100644 index 000000000..eb2103eb0 --- /dev/null +++ b/costa/native/external/lapack/clasr.f @@ -0,0 +1,325 @@ + SUBROUTINE CLASR( SIDE, PIVOT, DIRECT, M, N, C, S, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, PIVOT, SIDE + INTEGER LDA, M, N +* .. +* .. Array Arguments .. + REAL C( * ), S( * ) + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CLASR performs the transformation +* +* A := P*A, when SIDE = 'L' or 'l' ( Left-hand side ) +* +* A := A*P', when SIDE = 'R' or 'r' ( Right-hand side ) +* +* where A is an m by n complex matrix and P is an orthogonal matrix, +* consisting of a sequence of plane rotations determined by the +* parameters PIVOT and DIRECT as follows ( z = m when SIDE = 'L' or 'l' +* and z = n when SIDE = 'R' or 'r' ): +* +* When DIRECT = 'F' or 'f' ( Forward sequence ) then +* +* P = P( z - 1 )*...*P( 2 )*P( 1 ), +* +* and when DIRECT = 'B' or 'b' ( Backward sequence ) then +* +* P = P( 1 )*P( 2 )*...*P( z - 1 ), +* +* where P( k ) is a plane rotation matrix for the following planes: +* +* when PIVOT = 'V' or 'v' ( Variable pivot ), +* the plane ( k, k + 1 ) +* +* when PIVOT = 'T' or 't' ( Top pivot ), +* the plane ( 1, k + 1 ) +* +* when PIVOT = 'B' or 'b' ( Bottom pivot ), +* the plane ( k, z ) +* +* c( k ) and s( k ) must contain the cosine and sine that define the +* matrix P( k ). The two by two plane rotation part of the matrix +* P( k ), R( k ), is assumed to be of the form +* +* R( k ) = ( c( k ) s( k ) ). +* ( -s( k ) c( k ) ) +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* Specifies whether the plane rotation matrix P is applied to +* A on the left or the right. +* = 'L': Left, compute A := P*A +* = 'R': Right, compute A:= A*P' +* +* DIRECT (input) CHARACTER*1 +* Specifies whether P is a forward or backward sequence of +* plane rotations. +* = 'F': Forward, P = P( z - 1 )*...*P( 2 )*P( 1 ) +* = 'B': Backward, P = P( 1 )*P( 2 )*...*P( z - 1 ) +* +* PIVOT (input) CHARACTER*1 +* Specifies the plane for which P(k) is a plane rotation +* matrix. +* = 'V': Variable pivot, the plane (k,k+1) +* = 'T': Top pivot, the plane (1,k+1) +* = 'B': Bottom pivot, the plane (k,z) +* +* M (input) INTEGER +* The number of rows of the matrix A. If m <= 1, an immediate +* return is effected. +* +* N (input) INTEGER +* The number of columns of the matrix A. If n <= 1, an +* immediate return is effected. +* +* C, S (input) REAL arrays, dimension +* (M-1) if SIDE = 'L' +* (N-1) if SIDE = 'R' +* c(k) and s(k) contain the cosine and sine that define the +* matrix P(k). The two by two plane rotation part of the +* matrix P(k), R(k), is assumed to be of the form +* R( k ) = ( c( k ) s( k ) ). +* ( -s( k ) c( k ) ) +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* The m by n matrix A. On exit, A is overwritten by P*A if +* SIDE = 'R' or by A*P' if SIDE = 'L'. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, J + REAL CTEMP, STEMP + COMPLEX TEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( .NOT.( LSAME( SIDE, 'L' ) .OR. LSAME( SIDE, 'R' ) ) ) THEN + INFO = 1 + ELSE IF( .NOT.( LSAME( PIVOT, 'V' ) .OR. LSAME( PIVOT, + $ 'T' ) .OR. LSAME( PIVOT, 'B' ) ) ) THEN + INFO = 2 + ELSE IF( .NOT.( LSAME( DIRECT, 'F' ) .OR. LSAME( DIRECT, 'B' ) ) ) + $ THEN + INFO = 3 + ELSE IF( M.LT.0 ) THEN + INFO = 4 + ELSE IF( N.LT.0 ) THEN + INFO = 5 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = 9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLASR ', INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( ( M.EQ.0 ) .OR. ( N.EQ.0 ) ) + $ RETURN + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form P * A +* + IF( LSAME( PIVOT, 'V' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 20 J = 1, M - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 10 I = 1, N + TEMP = A( J+1, I ) + A( J+1, I ) = CTEMP*TEMP - STEMP*A( J, I ) + A( J, I ) = STEMP*TEMP + CTEMP*A( J, I ) + 10 CONTINUE + END IF + 20 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 40 J = M - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 30 I = 1, N + TEMP = A( J+1, I ) + A( J+1, I ) = CTEMP*TEMP - STEMP*A( J, I ) + A( J, I ) = STEMP*TEMP + CTEMP*A( J, I ) + 30 CONTINUE + END IF + 40 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'T' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 60 J = 2, M + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 50 I = 1, N + TEMP = A( J, I ) + A( J, I ) = CTEMP*TEMP - STEMP*A( 1, I ) + A( 1, I ) = STEMP*TEMP + CTEMP*A( 1, I ) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 80 J = M, 2, -1 + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 70 I = 1, N + TEMP = A( J, I ) + A( J, I ) = CTEMP*TEMP - STEMP*A( 1, I ) + A( 1, I ) = STEMP*TEMP + CTEMP*A( 1, I ) + 70 CONTINUE + END IF + 80 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'B' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 100 J = 1, M - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 90 I = 1, N + TEMP = A( J, I ) + A( J, I ) = STEMP*A( M, I ) + CTEMP*TEMP + A( M, I ) = CTEMP*A( M, I ) - STEMP*TEMP + 90 CONTINUE + END IF + 100 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 120 J = M - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 110 I = 1, N + TEMP = A( J, I ) + A( J, I ) = STEMP*A( M, I ) + CTEMP*TEMP + A( M, I ) = CTEMP*A( M, I ) - STEMP*TEMP + 110 CONTINUE + END IF + 120 CONTINUE + END IF + END IF + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form A * P' +* + IF( LSAME( PIVOT, 'V' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 140 J = 1, N - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 130 I = 1, M + TEMP = A( I, J+1 ) + A( I, J+1 ) = CTEMP*TEMP - STEMP*A( I, J ) + A( I, J ) = STEMP*TEMP + CTEMP*A( I, J ) + 130 CONTINUE + END IF + 140 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 160 J = N - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 150 I = 1, M + TEMP = A( I, J+1 ) + A( I, J+1 ) = CTEMP*TEMP - STEMP*A( I, J ) + A( I, J ) = STEMP*TEMP + CTEMP*A( I, J ) + 150 CONTINUE + END IF + 160 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'T' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 180 J = 2, N + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 170 I = 1, M + TEMP = A( I, J ) + A( I, J ) = CTEMP*TEMP - STEMP*A( I, 1 ) + A( I, 1 ) = STEMP*TEMP + CTEMP*A( I, 1 ) + 170 CONTINUE + END IF + 180 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 200 J = N, 2, -1 + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 190 I = 1, M + TEMP = A( I, J ) + A( I, J ) = CTEMP*TEMP - STEMP*A( I, 1 ) + A( I, 1 ) = STEMP*TEMP + CTEMP*A( I, 1 ) + 190 CONTINUE + END IF + 200 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'B' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 220 J = 1, N - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 210 I = 1, M + TEMP = A( I, J ) + A( I, J ) = STEMP*A( I, N ) + CTEMP*TEMP + A( I, N ) = CTEMP*A( I, N ) - STEMP*TEMP + 210 CONTINUE + END IF + 220 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 240 J = N - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 230 I = 1, M + TEMP = A( I, J ) + A( I, J ) = STEMP*A( I, N ) + CTEMP*TEMP + A( I, N ) = CTEMP*A( I, N ) - STEMP*TEMP + 230 CONTINUE + END IF + 240 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of CLASR +* + END diff --git a/costa/native/external/lapack/classq.f b/costa/native/external/lapack/classq.f new file mode 100644 index 000000000..7adcd74b5 --- /dev/null +++ b/costa/native/external/lapack/classq.f @@ -0,0 +1,102 @@ + SUBROUTINE CLASSQ( N, X, INCX, SCALE, SUMSQ ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INCX, N + REAL SCALE, SUMSQ +* .. +* .. Array Arguments .. + COMPLEX X( * ) +* .. +* +* Purpose +* ======= +* +* CLASSQ returns the values scl and ssq such that +* +* ( scl**2 )*ssq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, +* +* where x( i ) = abs( X( 1 + ( i - 1 )*INCX ) ). The value of sumsq is +* assumed to be at least unity and the value of ssq will then satisfy +* +* 1.0 .le. ssq .le. ( sumsq + 2*n ). +* +* scale is assumed to be non-negative and scl returns the value +* +* scl = max( scale, abs( real( x( i ) ) ), abs( aimag( x( i ) ) ) ), +* i +* +* scale and sumsq must be supplied in SCALE and SUMSQ respectively. +* SCALE and SUMSQ are overwritten by scl and ssq respectively. +* +* The routine makes only one pass through the vector X. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of elements to be used from the vector X. +* +* X (input) COMPLEX array, dimension (N) +* The vector x as described above. +* x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. +* +* INCX (input) INTEGER +* The increment between successive values of the vector X. +* INCX > 0. +* +* SCALE (input/output) REAL +* On entry, the value scale in the equation above. +* On exit, SCALE is overwritten with the value scl . +* +* SUMSQ (input/output) REAL +* On entry, the value sumsq in the equation above. +* On exit, SUMSQ is overwritten with the value ssq . +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER IX + REAL TEMP1 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, REAL +* .. +* .. Executable Statements .. +* + IF( N.GT.0 ) THEN + DO 10 IX = 1, 1 + ( N-1 )*INCX, INCX + IF( REAL( X( IX ) ).NE.ZERO ) THEN + TEMP1 = ABS( REAL( X( IX ) ) ) + IF( SCALE.LT.TEMP1 ) THEN + SUMSQ = 1 + SUMSQ*( SCALE / TEMP1 )**2 + SCALE = TEMP1 + ELSE + SUMSQ = SUMSQ + ( TEMP1 / SCALE )**2 + END IF + END IF + IF( AIMAG( X( IX ) ).NE.ZERO ) THEN + TEMP1 = ABS( AIMAG( X( IX ) ) ) + IF( SCALE.LT.TEMP1 ) THEN + SUMSQ = 1 + SUMSQ*( SCALE / TEMP1 )**2 + SCALE = TEMP1 + ELSE + SUMSQ = SUMSQ + ( TEMP1 / SCALE )**2 + END IF + END IF + 10 CONTINUE + END IF +* + RETURN +* +* End of CLASSQ +* + END diff --git a/costa/native/external/lapack/claswp.f b/costa/native/external/lapack/claswp.f new file mode 100644 index 000000000..f993179bd --- /dev/null +++ b/costa/native/external/lapack/claswp.f @@ -0,0 +1,120 @@ + SUBROUTINE CLASWP( N, A, LDA, K1, K2, IPIV, INCX ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INCX, K1, K2, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CLASWP performs a series of row interchanges on the matrix A. +* One row interchange is initiated for each of rows K1 through K2 of A. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of columns of the matrix A. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the matrix of column dimension N to which the row +* interchanges will be applied. +* On exit, the permuted matrix. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* +* K1 (input) INTEGER +* The first element of IPIV for which a row interchange will +* be done. +* +* K2 (input) INTEGER +* The last element of IPIV for which a row interchange will +* be done. +* +* IPIV (input) INTEGER array, dimension (M*abs(INCX)) +* The vector of pivot indices. Only the elements in positions +* K1 through K2 of IPIV are accessed. +* IPIV(K) = L implies rows K and L are to be interchanged. +* +* INCX (input) INTEGER +* The increment between successive values of IPIV. If IPIV +* is negative, the pivots are applied in reverse order. +* +* Further Details +* =============== +* +* Modified by +* R. C. Whaley, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, I1, I2, INC, IP, IX, IX0, J, K, N32 + COMPLEX TEMP +* .. +* .. Executable Statements .. +* +* Interchange row I with row IPIV(I) for each of rows K1 through K2. +* + IF( INCX.GT.0 ) THEN + IX0 = K1 + I1 = K1 + I2 = K2 + INC = 1 + ELSE IF( INCX.LT.0 ) THEN + IX0 = 1 + ( 1-K2 )*INCX + I1 = K2 + I2 = K1 + INC = -1 + ELSE + RETURN + END IF +* + N32 = ( N / 32 )*32 + IF( N32.NE.0 ) THEN + DO 30 J = 1, N32, 32 + IX = IX0 + DO 20 I = I1, I2, INC + IP = IPIV( IX ) + IF( IP.NE.I ) THEN + DO 10 K = J, J + 31 + TEMP = A( I, K ) + A( I, K ) = A( IP, K ) + A( IP, K ) = TEMP + 10 CONTINUE + END IF + IX = IX + INCX + 20 CONTINUE + 30 CONTINUE + END IF + IF( N32.NE.N ) THEN + N32 = N32 + 1 + IX = IX0 + DO 50 I = I1, I2, INC + IP = IPIV( IX ) + IF( IP.NE.I ) THEN + DO 40 K = N32, N + TEMP = A( I, K ) + A( I, K ) = A( IP, K ) + A( IP, K ) = TEMP + 40 CONTINUE + END IF + IX = IX + INCX + 50 CONTINUE + END IF +* + RETURN +* +* End of CLASWP +* + END diff --git a/costa/native/external/lapack/clasyf.f b/costa/native/external/lapack/clasyf.f new file mode 100644 index 000000000..c35fa8bee --- /dev/null +++ b/costa/native/external/lapack/clasyf.f @@ -0,0 +1,598 @@ + SUBROUTINE CLASYF( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), W( LDW, * ) +* .. +* +* Purpose +* ======= +* +* CLASYF computes a partial factorization of a complex symmetric matrix +* A using the Bunch-Kaufman diagonal pivoting method. The partial +* factorization has the form: +* +* A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: +* ( 0 U22 ) ( 0 D ) ( U12' U22' ) +* +* A = ( L11 0 ) ( D 0 ) ( L11' L21' ) if UPLO = 'L' +* ( L21 I ) ( 0 A22 ) ( 0 I ) +* +* where the order of D is at most NB. The actual order is returned in +* the argument KB, and is either NB or NB-1, or N if N <= NB. +* Note that U' denotes the transpose of U. +* +* CLASYF is an auxiliary routine called by CSYTRF. It uses blocked code +* (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or +* A22 (if UPLO = 'L'). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is stored: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NB (input) INTEGER +* The maximum number of columns of the matrix A that should be +* factored. NB should be at least 2 to allow for 2-by-2 pivot +* blocks. +* +* KB (output) INTEGER +* The number of columns of A that were actually factored. +* KB is either NB-1 or NB, or N if N <= NB. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the symmetric matrix A. If UPLO = 'U', the leading +* n-by-n upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n-by-n lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* On exit, A contains details of the partial factorization. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (output) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D. +* If UPLO = 'U', only the last KB elements of IPIV are set; +* if UPLO = 'L', only the first KB elements are set. +* +* If IPIV(k) > 0, then rows and columns k and IPIV(k) were +* interchanged and D(k,k) is a 1-by-1 diagonal block. +* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +* +* W (workspace) COMPLEX array, dimension (LDW,NB) +* +* LDW (input) INTEGER +* The leading dimension of the array W. LDW >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* > 0: if INFO = k, D(k,k) is exactly zero. The factorization +* has been completed, but the block diagonal matrix D is +* exactly singular. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0E+0, SEVTEN = 17.0E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER IMAX, J, JB, JJ, JMAX, JP, K, KK, KKW, KP, + $ KSTEP, KW + REAL ABSAKK, ALPHA, COLMAX, ROWMAX + COMPLEX D11, D21, D22, R1, T, Z +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + EXTERNAL LSAME, ICAMAX +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGEMM, CGEMV, CSCAL, CSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, MIN, REAL, SQRT +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( Z ) = ABS( REAL( Z ) ) + ABS( AIMAG( Z ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Factorize the trailing columns of A using the upper triangle +* of A and working backwards, and compute the matrix W = U12*D +* for use in updating A11 +* +* K is the main loop index, decreasing from N in steps of 1 or 2 +* +* KW is the column of W which corresponds to column K of A +* + K = N + 10 CONTINUE + KW = NB + K - N +* +* Exit from loop +* + IF( ( K.LE.N-NB+1 .AND. NB.LT.N ) .OR. K.LT.1 ) + $ GO TO 30 +* +* Copy column K of A to column KW of W and update it +* + CALL CCOPY( K, A( 1, K ), 1, W( 1, KW ), 1 ) + IF( K.LT.N ) + $ CALL CGEMV( 'No transpose', K, N-K, -CONE, A( 1, K+1 ), LDA, + $ W( K, KW+1 ), LDW, CONE, W( 1, KW ), 1 ) +* + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = CABS1( W( K, KW ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.GT.1 ) THEN + IMAX = ICAMAX( K-1, W( 1, KW ), 1 ) + COLMAX = CABS1( W( IMAX, KW ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* Copy column IMAX to column KW-1 of W and update it +* + CALL CCOPY( IMAX, A( 1, IMAX ), 1, W( 1, KW-1 ), 1 ) + CALL CCOPY( K-IMAX, A( IMAX, IMAX+1 ), LDA, + $ W( IMAX+1, KW-1 ), 1 ) + IF( K.LT.N ) + $ CALL CGEMV( 'No transpose', K, N-K, -CONE, + $ A( 1, K+1 ), LDA, W( IMAX, KW+1 ), LDW, + $ CONE, W( 1, KW-1 ), 1 ) +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = IMAX + ICAMAX( K-IMAX, W( IMAX+1, KW-1 ), 1 ) + ROWMAX = CABS1( W( JMAX, KW-1 ) ) + IF( IMAX.GT.1 ) THEN + JMAX = ICAMAX( IMAX-1, W( 1, KW-1 ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( W( JMAX, KW-1 ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( CABS1( W( IMAX, KW-1 ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX +* +* copy column KW-1 of W to column KW +* + CALL CCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 ) + ELSE +* +* interchange rows and columns K-1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K - KSTEP + 1 + KKW = NB + KK - N +* +* Updated column KP is already stored in column KKW of W +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP +* + A( KP, K ) = A( KK, K ) + CALL CCOPY( K-1-KP, A( KP+1, KK ), 1, A( KP, KP+1 ), + $ LDA ) + CALL CCOPY( KP, A( 1, KK ), 1, A( 1, KP ), 1 ) +* +* Interchange rows KK and KP in last KK columns of A and W +* + CALL CSWAP( N-KK+1, A( KK, KK ), LDA, A( KP, KK ), LDA ) + CALL CSWAP( N-KK+1, W( KK, KKW ), LDW, W( KP, KKW ), + $ LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column KW of W now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* +* Store U(k) in column k of A +* + CALL CCOPY( K, W( 1, KW ), 1, A( 1, K ), 1 ) + R1 = CONE / A( K, K ) + CALL CSCAL( K-1, R1, A( 1, K ), 1 ) + ELSE +* +* 2-by-2 pivot block D(k): columns KW and KW-1 of W now +* hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* + IF( K.GT.2 ) THEN +* +* Store U(k) and U(k-1) in columns k and k-1 of A +* + D21 = W( K-1, KW ) + D11 = W( K, KW ) / D21 + D22 = W( K-1, KW-1 ) / D21 + T = CONE / ( D11*D22-CONE ) + D21 = T / D21 + DO 20 J = 1, K - 2 + A( J, K-1 ) = D21*( D11*W( J, KW-1 )-W( J, KW ) ) + A( J, K ) = D21*( D22*W( J, KW )-W( J, KW-1 ) ) + 20 CONTINUE + END IF +* +* Copy D(k) to A +* + A( K-1, K-1 ) = W( K-1, KW-1 ) + A( K-1, K ) = W( K-1, KW ) + A( K, K ) = W( K, KW ) + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + 30 CONTINUE +* +* Update the upper triangle of A11 (= A(1:k,1:k)) as +* +* A11 := A11 - U12*D*U12' = A11 - U12*W' +* +* computing blocks of NB columns at a time +* + DO 50 J = ( ( K-1 ) / NB )*NB + 1, 1, -NB + JB = MIN( NB, K-J+1 ) +* +* Update the upper triangle of the diagonal block +* + DO 40 JJ = J, J + JB - 1 + CALL CGEMV( 'No transpose', JJ-J+1, N-K, -CONE, + $ A( J, K+1 ), LDA, W( JJ, KW+1 ), LDW, CONE, + $ A( J, JJ ), 1 ) + 40 CONTINUE +* +* Update the rectangular superdiagonal block +* + CALL CGEMM( 'No transpose', 'Transpose', J-1, JB, N-K, + $ -CONE, A( 1, K+1 ), LDA, W( J, KW+1 ), LDW, + $ CONE, A( 1, J ), LDA ) + 50 CONTINUE +* +* Put U12 in standard form by partially undoing the interchanges +* in columns k+1:n +* + J = K + 1 + 60 CONTINUE + JJ = J + JP = IPIV( J ) + IF( JP.LT.0 ) THEN + JP = -JP + J = J + 1 + END IF + J = J + 1 + IF( JP.NE.JJ .AND. J.LE.N ) + $ CALL CSWAP( N-J+1, A( JP, J ), LDA, A( JJ, J ), LDA ) + IF( J.LE.N ) + $ GO TO 60 +* +* Set KB to the number of columns factorized +* + KB = N - K +* + ELSE +* +* Factorize the leading columns of A using the lower triangle +* of A and working forwards, and compute the matrix W = L21*D +* for use in updating A22 +* +* K is the main loop index, increasing from 1 in steps of 1 or 2 +* + K = 1 + 70 CONTINUE +* +* Exit from loop +* + IF( ( K.GE.NB .AND. NB.LT.N ) .OR. K.GT.N ) + $ GO TO 90 +* +* Copy column K of A to column K of W and update it +* + CALL CCOPY( N-K+1, A( K, K ), 1, W( K, K ), 1 ) + CALL CGEMV( 'No transpose', N-K+1, K-1, -CONE, A( K, 1 ), LDA, + $ W( K, 1 ), LDW, CONE, W( K, K ), 1 ) +* + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = CABS1( W( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.LT.N ) THEN + IMAX = K + ICAMAX( N-K, W( K+1, K ), 1 ) + COLMAX = CABS1( W( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* Copy column IMAX to column K+1 of W and update it +* + CALL CCOPY( IMAX-K, A( IMAX, K ), LDA, W( K, K+1 ), 1 ) + CALL CCOPY( N-IMAX+1, A( IMAX, IMAX ), 1, W( IMAX, K+1 ), + $ 1 ) + CALL CGEMV( 'No transpose', N-K+1, K-1, -CONE, A( K, 1 ), + $ LDA, W( IMAX, 1 ), LDW, CONE, W( K, K+1 ), + $ 1 ) +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = K - 1 + ICAMAX( IMAX-K, W( K, K+1 ), 1 ) + ROWMAX = CABS1( W( JMAX, K+1 ) ) + IF( IMAX.LT.N ) THEN + JMAX = IMAX + ICAMAX( N-IMAX, W( IMAX+1, K+1 ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( W( JMAX, K+1 ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( CABS1( W( IMAX, K+1 ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX +* +* copy column K+1 of W to column K +* + CALL CCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 ) + ELSE +* +* interchange rows and columns K+1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K + KSTEP - 1 +* +* Updated column KP is already stored in column KK of W +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP +* + A( KP, K ) = A( KK, K ) + CALL CCOPY( KP-K-1, A( K+1, KK ), 1, A( KP, K+1 ), LDA ) + CALL CCOPY( N-KP+1, A( KP, KK ), 1, A( KP, KP ), 1 ) +* +* Interchange rows KK and KP in first KK columns of A and W +* + CALL CSWAP( KK, A( KK, 1 ), LDA, A( KP, 1 ), LDA ) + CALL CSWAP( KK, W( KK, 1 ), LDW, W( KP, 1 ), LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k of W now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* +* Store L(k) in column k of A +* + CALL CCOPY( N-K+1, W( K, K ), 1, A( K, K ), 1 ) + IF( K.LT.N ) THEN + R1 = CONE / A( K, K ) + CALL CSCAL( N-K, R1, A( K+1, K ), 1 ) + END IF + ELSE +* +* 2-by-2 pivot block D(k): columns k and k+1 of W now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* + IF( K.LT.N-1 ) THEN +* +* Store L(k) and L(k+1) in columns k and k+1 of A +* + D21 = W( K+1, K ) + D11 = W( K+1, K+1 ) / D21 + D22 = W( K, K ) / D21 + T = CONE / ( D11*D22-CONE ) + D21 = T / D21 + DO 80 J = K + 2, N + A( J, K ) = D21*( D11*W( J, K )-W( J, K+1 ) ) + A( J, K+1 ) = D21*( D22*W( J, K+1 )-W( J, K ) ) + 80 CONTINUE + END IF +* +* Copy D(k) to A +* + A( K, K ) = W( K, K ) + A( K+1, K ) = W( K+1, K ) + A( K+1, K+1 ) = W( K+1, K+1 ) + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 70 +* + 90 CONTINUE +* +* Update the lower triangle of A22 (= A(k:n,k:n)) as +* +* A22 := A22 - L21*D*L21' = A22 - L21*W' +* +* computing blocks of NB columns at a time +* + DO 110 J = K, N, NB + JB = MIN( NB, N-J+1 ) +* +* Update the lower triangle of the diagonal block +* + DO 100 JJ = J, J + JB - 1 + CALL CGEMV( 'No transpose', J+JB-JJ, K-1, -CONE, + $ A( JJ, 1 ), LDA, W( JJ, 1 ), LDW, CONE, + $ A( JJ, JJ ), 1 ) + 100 CONTINUE +* +* Update the rectangular subdiagonal block +* + IF( J+JB.LE.N ) + $ CALL CGEMM( 'No transpose', 'Transpose', N-J-JB+1, JB, + $ K-1, -CONE, A( J+JB, 1 ), LDA, W( J, 1 ), + $ LDW, CONE, A( J+JB, J ), LDA ) + 110 CONTINUE +* +* Put L21 in standard form by partially undoing the interchanges +* in columns 1:k-1 +* + J = K - 1 + 120 CONTINUE + JJ = J + JP = IPIV( J ) + IF( JP.LT.0 ) THEN + JP = -JP + J = J - 1 + END IF + J = J - 1 + IF( JP.NE.JJ .AND. J.GE.1 ) + $ CALL CSWAP( J, A( JP, 1 ), LDA, A( JJ, 1 ), LDA ) + IF( J.GE.1 ) + $ GO TO 120 +* +* Set KB to the number of columns factorized +* + KB = K - 1 +* + END IF + RETURN +* +* End of CLASYF +* + END diff --git a/costa/native/external/lapack/clatbs.f b/costa/native/external/lapack/clatbs.f new file mode 100644 index 000000000..dbfac7532 --- /dev/null +++ b/costa/native/external/lapack/clatbs.f @@ -0,0 +1,909 @@ + SUBROUTINE CLATBS( UPLO, TRANS, DIAG, NORMIN, N, KD, AB, LDAB, X, + $ SCALE, CNORM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1992 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORMIN, TRANS, UPLO + INTEGER INFO, KD, LDAB, N + REAL SCALE +* .. +* .. Array Arguments .. + REAL CNORM( * ) + COMPLEX AB( LDAB, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* CLATBS solves one of the triangular systems +* +* A * x = s*b, A**T * x = s*b, or A**H * x = s*b, +* +* with scaling to prevent overflow, where A is an upper or lower +* triangular band matrix. Here A' denotes the transpose of A, x and b +* are n-element vectors, and s is a scaling factor, usually less than +* or equal to 1, chosen so that the components of x will be less than +* the overflow threshold. If the unscaled problem will not cause +* overflow, the Level 2 BLAS routine CTBSV is called. If the matrix A +* is singular (A(j,j) = 0 for some j), then s is set to 0 and a +* non-trivial solution to A*x = 0 is returned. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the matrix A is upper or lower triangular. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* TRANS (input) CHARACTER*1 +* Specifies the operation applied to A. +* = 'N': Solve A * x = s*b (No transpose) +* = 'T': Solve A**T * x = s*b (Transpose) +* = 'C': Solve A**H * x = s*b (Conjugate transpose) +* +* DIAG (input) CHARACTER*1 +* Specifies whether or not the matrix A is unit triangular. +* = 'N': Non-unit triangular +* = 'U': Unit triangular +* +* NORMIN (input) CHARACTER*1 +* Specifies whether CNORM has been set or not. +* = 'Y': CNORM contains the column norms on entry +* = 'N': CNORM is not set on entry. On exit, the norms will +* be computed and stored in CNORM. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of subdiagonals or superdiagonals in the +* triangular matrix A. KD >= 0. +* +* AB (input) COMPLEX array, dimension (LDAB,N) +* The upper or lower triangular band matrix A, stored in the +* first KD+1 rows of the array. The j-th column of A is stored +* in the j-th column of the array AB as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* X (input/output) COMPLEX array, dimension (N) +* On entry, the right hand side b of the triangular system. +* On exit, X is overwritten by the solution vector x. +* +* SCALE (output) REAL +* The scaling factor s for the triangular system +* A * x = s*b, A**T * x = s*b, or A**H * x = s*b. +* If SCALE = 0, the matrix A is singular or badly scaled, and +* the vector x is an exact or approximate solution to A*x = 0. +* +* CNORM (input or output) REAL array, dimension (N) +* +* If NORMIN = 'Y', CNORM is an input argument and CNORM(j) +* contains the norm of the off-diagonal part of the j-th column +* of A. If TRANS = 'N', CNORM(j) must be greater than or equal +* to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) +* must be greater than or equal to the 1-norm. +* +* If NORMIN = 'N', CNORM is an output argument and CNORM(j) +* returns the 1-norm of the offdiagonal part of the j-th column +* of A. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* Further Details +* ======= ======= +* +* A rough bound on x is computed; if that is less than overflow, CTBSV +* is called, otherwise, specific code is used which checks for possible +* overflow or divide-by-zero at every operation. +* +* A columnwise scheme is used for solving A*x = b. The basic algorithm +* if A is lower triangular is +* +* x[1:n] := b[1:n] +* for j = 1, ..., n +* x(j) := x(j) / A(j,j) +* x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] +* end +* +* Define bounds on the components of x after j iterations of the loop: +* M(j) = bound on x[1:j] +* G(j) = bound on x[j+1:n] +* Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. +* +* Then for iteration j+1 we have +* M(j+1) <= G(j) / | A(j+1,j+1) | +* G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | +* <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) +* +* where CNORM(j+1) is greater than or equal to the infinity-norm of +* column j+1 of A, not counting the diagonal. Hence +* +* G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) +* 1<=i<=j +* and +* +* |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) +* 1<=i< j +* +* Since |x(j)| <= M(j), we use the Level 2 BLAS routine CTBSV if the +* reciprocal of the largest M(j), j=1,..,n, is larger than +* max(underflow, 1/overflow). +* +* The bound on x(j) is also used to determine when a step in the +* columnwise method can be performed without fear of overflow. If +* the computed bound is greater than a large constant, x is scaled to +* prevent overflow, but if the bound overflows, x is set to 0, x(j) to +* 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. +* +* Similarly, a row-wise scheme is used to solve A**T *x = b or +* A**H *x = b. The basic algorithm for A upper triangular is +* +* for j = 1, ..., n +* x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) +* end +* +* We simultaneously compute two bounds +* G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j +* M(j) = bound on x(i), 1<=i<=j +* +* The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we +* add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. +* Then the bound on x(j) is +* +* M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | +* +* <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) +* 1<=i<=j +* +* and we can safely call CTBSV if 1/M(n) and 1/G(n) are both greater +* than max(underflow, 1/overflow). +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, HALF, ONE, TWO + PARAMETER ( ZERO = 0.0E+0, HALF = 0.5E+0, ONE = 1.0E+0, + $ TWO = 2.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + INTEGER I, IMAX, J, JFIRST, JINC, JLAST, JLEN, MAIND + REAL BIGNUM, GROW, REC, SMLNUM, TJJ, TMAX, TSCAL, + $ XBND, XJ, XMAX + COMPLEX CSUMJ, TJJS, USCAL, ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX, ISAMAX + REAL SCASUM, SLAMCH + COMPLEX CDOTC, CDOTU, CLADIV + EXTERNAL LSAME, ICAMAX, ISAMAX, SCASUM, SLAMCH, CDOTC, + $ CDOTU, CLADIV +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CSSCAL, CTBSV, SLABAD, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, CONJG, MAX, MIN, REAL +* .. +* .. Statement Functions .. + REAL CABS1, CABS2 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) + CABS2( ZDUM ) = ABS( REAL( ZDUM ) / 2. ) + + $ ABS( AIMAG( ZDUM ) / 2. ) +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* +* Test the input parameters. +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT. + $ LSAME( NORMIN, 'N' ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( KD.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLATBS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine machine dependent parameters to control overflow. +* + SMLNUM = SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SMLNUM / SLAMCH( 'Precision' ) + BIGNUM = ONE / SMLNUM + SCALE = ONE +* + IF( LSAME( NORMIN, 'N' ) ) THEN +* +* Compute the 1-norm of each column, not including the diagonal. +* + IF( UPPER ) THEN +* +* A is upper triangular. +* + DO 10 J = 1, N + JLEN = MIN( KD, J-1 ) + CNORM( J ) = SCASUM( JLEN, AB( KD+1-JLEN, J ), 1 ) + 10 CONTINUE + ELSE +* +* A is lower triangular. +* + DO 20 J = 1, N + JLEN = MIN( KD, N-J ) + IF( JLEN.GT.0 ) THEN + CNORM( J ) = SCASUM( JLEN, AB( 2, J ), 1 ) + ELSE + CNORM( J ) = ZERO + END IF + 20 CONTINUE + END IF + END IF +* +* Scale the column norms by TSCAL if the maximum element in CNORM is +* greater than BIGNUM/2. +* + IMAX = ISAMAX( N, CNORM, 1 ) + TMAX = CNORM( IMAX ) + IF( TMAX.LE.BIGNUM*HALF ) THEN + TSCAL = ONE + ELSE + TSCAL = HALF / ( SMLNUM*TMAX ) + CALL SSCAL( N, TSCAL, CNORM, 1 ) + END IF +* +* Compute a bound on the computed solution vector to see if the +* Level 2 BLAS routine CTBSV can be used. +* + XMAX = ZERO + DO 30 J = 1, N + XMAX = MAX( XMAX, CABS2( X( J ) ) ) + 30 CONTINUE + XBND = XMAX + IF( NOTRAN ) THEN +* +* Compute the growth in A * x = b. +* + IF( UPPER ) THEN + JFIRST = N + JLAST = 1 + JINC = -1 + MAIND = KD + 1 + ELSE + JFIRST = 1 + JLAST = N + JINC = 1 + MAIND = 1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 60 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, G(0) = max{x(i), i=1,...,n}. +* + GROW = HALF / MAX( XBND, SMLNUM ) + XBND = GROW + DO 40 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 60 +* + TJJS = AB( MAIND, J ) + TJJ = CABS1( TJJS ) +* + IF( TJJ.GE.SMLNUM ) THEN +* +* M(j) = G(j-1) / abs(A(j,j)) +* + XBND = MIN( XBND, MIN( ONE, TJJ )*GROW ) + ELSE +* +* M(j) could overflow, set XBND to 0. +* + XBND = ZERO + END IF +* + IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN +* +* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) +* + GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) ) + ELSE +* +* G(j) could overflow, set GROW to 0. +* + GROW = ZERO + END IF + 40 CONTINUE + GROW = XBND + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) ) + DO 50 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 60 +* +* G(j) = G(j-1)*( 1 + CNORM(j) ) +* + GROW = GROW*( ONE / ( ONE+CNORM( J ) ) ) + 50 CONTINUE + END IF + 60 CONTINUE +* + ELSE +* +* Compute the growth in A**T * x = b or A**H * x = b. +* + IF( UPPER ) THEN + JFIRST = 1 + JLAST = N + JINC = 1 + MAIND = KD + 1 + ELSE + JFIRST = N + JLAST = 1 + JINC = -1 + MAIND = 1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 90 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, M(0) = max{x(i), i=1,...,n}. +* + GROW = HALF / MAX( XBND, SMLNUM ) + XBND = GROW + DO 70 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 90 +* +* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) +* + XJ = ONE + CNORM( J ) + GROW = MIN( GROW, XBND / XJ ) +* + TJJS = AB( MAIND, J ) + TJJ = CABS1( TJJS ) +* + IF( TJJ.GE.SMLNUM ) THEN +* +* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) +* + IF( XJ.GT.TJJ ) + $ XBND = XBND*( TJJ / XJ ) + ELSE +* +* M(j) could overflow, set XBND to 0. +* + XBND = ZERO + END IF + 70 CONTINUE + GROW = MIN( GROW, XBND ) + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) ) + DO 80 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 90 +* +* G(j) = ( 1 + CNORM(j) )*G(j-1) +* + XJ = ONE + CNORM( J ) + GROW = GROW / XJ + 80 CONTINUE + END IF + 90 CONTINUE + END IF +* + IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN +* +* Use the Level 2 BLAS solve if the reciprocal of the bound on +* elements of X is not too small. +* + CALL CTBSV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, X, 1 ) + ELSE +* +* Use a Level 1 BLAS solve, scaling intermediate results. +* + IF( XMAX.GT.BIGNUM*HALF ) THEN +* +* Scale X so that its components are less than or equal to +* BIGNUM in absolute value. +* + SCALE = ( BIGNUM*HALF ) / XMAX + CALL CSSCAL( N, SCALE, X, 1 ) + XMAX = BIGNUM + ELSE + XMAX = XMAX*TWO + END IF +* + IF( NOTRAN ) THEN +* +* Solve A * x = b +* + DO 110 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) / A(j,j), scaling x if necessary. +* + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN + TJJS = AB( MAIND, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 105 + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by 1/b(j). +* + REC = ONE / XJ + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = CLADIV( X( J ), TJJS ) + XJ = CABS1( X( J ) ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM +* to avoid overflow when dividing by A(j,j). +* + REC = ( TJJ*BIGNUM ) / XJ + IF( CNORM( J ).GT.ONE ) THEN +* +* Scale by 1/CNORM(j) to avoid overflow when +* multiplying x(j) times column j. +* + REC = REC / CNORM( J ) + END IF + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = CLADIV( X( J ), TJJS ) + XJ = CABS1( X( J ) ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A*x = 0. +* + DO 100 I = 1, N + X( I ) = ZERO + 100 CONTINUE + X( J ) = ONE + XJ = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 105 CONTINUE +* +* Scale x if necessary to avoid overflow when adding a +* multiple of column j of A. +* + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN +* +* Scale x by 1/(2*abs(x(j))). +* + REC = REC*HALF + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + END IF + ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN +* +* Scale x by 1/2. +* + CALL CSSCAL( N, HALF, X, 1 ) + SCALE = SCALE*HALF + END IF +* + IF( UPPER ) THEN + IF( J.GT.1 ) THEN +* +* Compute the update +* x(max(1,j-kd):j-1) := x(max(1,j-kd):j-1) - +* x(j)* A(max(1,j-kd):j-1,j) +* + JLEN = MIN( KD, J-1 ) + CALL CAXPY( JLEN, -X( J )*TSCAL, + $ AB( KD+1-JLEN, J ), 1, X( J-JLEN ), 1 ) + I = ICAMAX( J-1, X, 1 ) + XMAX = CABS1( X( I ) ) + END IF + ELSE IF( J.LT.N ) THEN +* +* Compute the update +* x(j+1:min(j+kd,n)) := x(j+1:min(j+kd,n)) - +* x(j) * A(j+1:min(j+kd,n),j) +* + JLEN = MIN( KD, N-J ) + IF( JLEN.GT.0 ) + $ CALL CAXPY( JLEN, -X( J )*TSCAL, AB( 2, J ), 1, + $ X( J+1 ), 1 ) + I = J + ICAMAX( N-J, X( J+1 ), 1 ) + XMAX = CABS1( X( I ) ) + END IF + 110 CONTINUE +* + ELSE IF( LSAME( TRANS, 'T' ) ) THEN +* +* Solve A**T * x = b +* + DO 150 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = CABS1( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = AB( MAIND, J )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = CLADIV( USCAL, TJJS ) + END IF + IF( REC.LT.ONE ) THEN + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + CSUMJ = ZERO + IF( USCAL.EQ.CMPLX( ONE ) ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call CDOTU to perform the dot product. +* + IF( UPPER ) THEN + JLEN = MIN( KD, J-1 ) + CSUMJ = CDOTU( JLEN, AB( KD+1-JLEN, J ), 1, + $ X( J-JLEN ), 1 ) + ELSE + JLEN = MIN( KD, N-J ) + IF( JLEN.GT.1 ) + $ CSUMJ = CDOTU( JLEN, AB( 2, J ), 1, X( J+1 ), + $ 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + JLEN = MIN( KD, J-1 ) + DO 120 I = 1, JLEN + CSUMJ = CSUMJ + ( AB( KD+I-JLEN, J )*USCAL )* + $ X( J-JLEN-1+I ) + 120 CONTINUE + ELSE + JLEN = MIN( KD, N-J ) + DO 130 I = 1, JLEN + CSUMJ = CSUMJ + ( AB( I+1, J )*USCAL )*X( J+I ) + 130 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.CMPLX( TSCAL ) ) THEN +* +* Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - CSUMJ + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJS = AB( MAIND, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 145 + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = CLADIV( X( J ), TJJS ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = CLADIV( X( J ), TJJS ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0 and compute a solution to A**T *x = 0. +* + DO 140 I = 1, N + X( I ) = ZERO + 140 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 145 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = CLADIV( X( J ), TJJS ) - CSUMJ + END IF + XMAX = MAX( XMAX, CABS1( X( J ) ) ) + 150 CONTINUE +* + ELSE +* +* Solve A**H * x = b +* + DO 190 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = CABS1( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = CONJG( AB( MAIND, J ) )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = CLADIV( USCAL, TJJS ) + END IF + IF( REC.LT.ONE ) THEN + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + CSUMJ = ZERO + IF( USCAL.EQ.CMPLX( ONE ) ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call CDOTC to perform the dot product. +* + IF( UPPER ) THEN + JLEN = MIN( KD, J-1 ) + CSUMJ = CDOTC( JLEN, AB( KD+1-JLEN, J ), 1, + $ X( J-JLEN ), 1 ) + ELSE + JLEN = MIN( KD, N-J ) + IF( JLEN.GT.1 ) + $ CSUMJ = CDOTC( JLEN, AB( 2, J ), 1, X( J+1 ), + $ 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + JLEN = MIN( KD, J-1 ) + DO 160 I = 1, JLEN + CSUMJ = CSUMJ + ( CONJG( AB( KD+I-JLEN, J ) )* + $ USCAL )*X( J-JLEN-1+I ) + 160 CONTINUE + ELSE + JLEN = MIN( KD, N-J ) + DO 170 I = 1, JLEN + CSUMJ = CSUMJ + ( CONJG( AB( I+1, J ) )*USCAL )* + $ X( J+I ) + 170 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.CMPLX( TSCAL ) ) THEN +* +* Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - CSUMJ + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJS = CONJG( AB( MAIND, J ) )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 185 + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = CLADIV( X( J ), TJJS ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = CLADIV( X( J ), TJJS ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0 and compute a solution to A**H *x = 0. +* + DO 180 I = 1, N + X( I ) = ZERO + 180 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 185 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = CLADIV( X( J ), TJJS ) - CSUMJ + END IF + XMAX = MAX( XMAX, CABS1( X( J ) ) ) + 190 CONTINUE + END IF + SCALE = SCALE / TSCAL + END IF +* +* Scale the column norms by 1/TSCAL for return. +* + IF( TSCAL.NE.ONE ) THEN + CALL SSCAL( N, ONE / TSCAL, CNORM, 1 ) + END IF +* + RETURN +* +* End of CLATBS +* + END diff --git a/costa/native/external/lapack/clatdf.f b/costa/native/external/lapack/clatdf.f new file mode 100644 index 000000000..c39102b9b --- /dev/null +++ b/costa/native/external/lapack/clatdf.f @@ -0,0 +1,242 @@ + SUBROUTINE CLATDF( IJOB, N, Z, LDZ, RHS, RDSUM, RDSCAL, IPIV, + $ JPIV ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER IJOB, LDZ, N + REAL RDSCAL, RDSUM +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), JPIV( * ) + COMPLEX RHS( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* CLATDF computes the contribution to the reciprocal Dif-estimate +* by solving for x in Z * x = b, where b is chosen such that the norm +* of x is as large as possible. It is assumed that LU decomposition +* of Z has been computed by CGETC2. On entry RHS = f holds the +* contribution from earlier solved sub-systems, and on return RHS = x. +* +* The factorization of Z returned by CGETC2 has the form +* Z = P * L * U * Q, where P and Q are permutation matrices. L is lower +* triangular with unit diagonal elements and U is upper triangular. +* +* Arguments +* ========= +* +* IJOB (input) INTEGER +* IJOB = 2: First compute an approximative null-vector e +* of Z using CGECON, e is normalized and solve for +* Zx = +-e - f with the sign giving the greater value of +* 2-norm(x). About 5 times as expensive as Default. +* IJOB .ne. 2: Local look ahead strategy where +* all entries of the r.h.s. b is choosen as either +1 or +* -1. Default. +* +* N (input) INTEGER +* The number of columns of the matrix Z. +* +* Z (input) REAL array, dimension (LDZ, N) +* On entry, the LU part of the factorization of the n-by-n +* matrix Z computed by CGETC2: Z = P * L * U * Q +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDA >= max(1, N). +* +* RHS (input/output) REAL array, dimension (N). +* On entry, RHS contains contributions from other subsystems. +* On exit, RHS contains the solution of the subsystem with +* entries according to the value of IJOB (see above). +* +* RDSUM (input/output) REAL +* On entry, the sum of squares of computed contributions to +* the Dif-estimate under computation by CTGSYL, where the +* scaling factor RDSCAL (see below) has been factored out. +* On exit, the corresponding sum of squares updated with the +* contributions from the current sub-system. +* If TRANS = 'T' RDSUM is not touched. +* NOTE: RDSUM only makes sense when CTGSY2 is called by CTGSYL. +* +* RDSCAL (input/output) REAL +* On entry, scaling factor used to prevent overflow in RDSUM. +* On exit, RDSCAL is updated w.r.t. the current contributions +* in RDSUM. +* If TRANS = 'T', RDSCAL is not touched. +* NOTE: RDSCAL only makes sense when CTGSY2 is called by +* CTGSYL. +* +* IPIV (input) INTEGER array, dimension (N). +* The pivot indices; for 1 <= i <= N, row i of the +* matrix has been interchanged with row IPIV(i). +* +* JPIV (input) INTEGER array, dimension (N). +* The pivot indices; for 1 <= j <= N, column j of the +* matrix has been interchanged with column JPIV(j). +* +* Further Details +* =============== +* +* Based on contributions by +* Bo Kagstrom and Peter Poromaa, Department of Computing Science, +* Umea University, S-901 87 Umea, Sweden. +* +* This routine is a further developed implementation of algorithm +* BSOLVE in [1] using complete pivoting in the LU factorization. +* +* [1] Bo Kagstrom and Lars Westin, +* Generalized Schur Methods with Condition Estimators for +* Solving the Generalized Sylvester Equation, IEEE Transactions +* on Automatic Control, Vol. 34, No. 7, July 1989, pp 745-751. +* +* [2] Peter Poromaa, +* On Efficient and Robust Estimators for the Separation +* between two Regular Matrix Pairs with Applications in +* Condition Estimation. Report UMINF-95.05, Department of +* Computing Science, Umea University, S-901 87 Umea, Sweden, +* 1995. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER MAXDIM + PARAMETER ( MAXDIM = 2 ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, J, K + REAL RTEMP, SCALE, SMINU, SPLUS + COMPLEX BM, BP, PMONE, TEMP +* .. +* .. Local Arrays .. + REAL RWORK( MAXDIM ) + COMPLEX WORK( 4*MAXDIM ), XM( MAXDIM ), XP( MAXDIM ) +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CGECON, CGESC2, CLASSQ, CLASWP, + $ CSCAL +* .. +* .. External Functions .. + REAL SCASUM + COMPLEX CDOTC + EXTERNAL SCASUM, CDOTC +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, REAL, SQRT +* .. +* .. Executable Statements .. +* + IF( IJOB.NE.2 ) THEN +* +* Apply permutations IPIV to RHS +* + CALL CLASWP( 1, RHS, LDZ, 1, N-1, IPIV, 1 ) +* +* Solve for L-part choosing RHS either to +1 or -1. +* + PMONE = -CONE + DO 10 J = 1, N - 1 + BP = RHS( J ) + CONE + BM = RHS( J ) - CONE + SPLUS = ONE +* +* Lockahead for L- part RHS(1:N-1) = +-1 +* SPLUS and SMIN computed more efficiently than in BSOLVE[1]. +* + SPLUS = SPLUS + REAL( CDOTC( N-J, Z( J+1, J ), 1, Z( J+1, + $ J ), 1 ) ) + SMINU = REAL( CDOTC( N-J, Z( J+1, J ), 1, RHS( J+1 ), 1 ) ) + SPLUS = SPLUS*REAL( RHS( J ) ) + IF( SPLUS.GT.SMINU ) THEN + RHS( J ) = BP + ELSE IF( SMINU.GT.SPLUS ) THEN + RHS( J ) = BM + ELSE +* +* In this case the updating sums are equal and we can +* choose RHS(J) +1 or -1. The first time this happens we +* choose -1, thereafter +1. This is a simple way to get +* good estimates of matrices like Byers well-known example +* (see [1]). (Not done in BSOLVE.) +* + RHS( J ) = RHS( J ) + PMONE + PMONE = CONE + END IF +* +* Compute the remaining r.h.s. +* + TEMP = -RHS( J ) + CALL CAXPY( N-J, TEMP, Z( J+1, J ), 1, RHS( J+1 ), 1 ) + 10 CONTINUE +* +* Solve for U- part, lockahead for RHS(N) = +-1. This is not done +* In BSOLVE and will hopefully give us a better estimate because +* any ill-conditioning of the original matrix is transfered to U +* and not to L. U(N, N) is an approximation to sigma_min(LU). +* + CALL CCOPY( N-1, RHS, 1, WORK, 1 ) + WORK( N ) = RHS( N ) + CONE + RHS( N ) = RHS( N ) - CONE + SPLUS = ZERO + SMINU = ZERO + DO 30 I = N, 1, -1 + TEMP = CONE / Z( I, I ) + WORK( I ) = WORK( I )*TEMP + RHS( I ) = RHS( I )*TEMP + DO 20 K = I + 1, N + WORK( I ) = WORK( I ) - WORK( K )*( Z( I, K )*TEMP ) + RHS( I ) = RHS( I ) - RHS( K )*( Z( I, K )*TEMP ) + 20 CONTINUE + SPLUS = SPLUS + ABS( WORK( I ) ) + SMINU = SMINU + ABS( RHS( I ) ) + 30 CONTINUE + IF( SPLUS.GT.SMINU ) + $ CALL CCOPY( N, WORK, 1, RHS, 1 ) +* +* Apply the permutations JPIV to the computed solution (RHS) +* + CALL CLASWP( 1, RHS, LDZ, 1, N-1, JPIV, -1 ) +* +* Compute the sum of squares +* + CALL CLASSQ( N, RHS, 1, RDSCAL, RDSUM ) + RETURN + END IF +* +* ENTRY IJOB = 2 +* +* Compute approximate nullvector XM of Z +* + CALL CGECON( 'I', N, Z, LDZ, ONE, RTEMP, WORK, RWORK, INFO ) + CALL CCOPY( N, WORK( N+1 ), 1, XM, 1 ) +* +* Compute RHS +* + CALL CLASWP( 1, XM, LDZ, 1, N-1, IPIV, -1 ) + TEMP = CONE / SQRT( CDOTC( N, XM, 1, XM, 1 ) ) + CALL CSCAL( N, TEMP, XM, 1 ) + CALL CCOPY( N, XM, 1, XP, 1 ) + CALL CAXPY( N, CONE, RHS, 1, XP, 1 ) + CALL CAXPY( N, -CONE, XM, 1, RHS, 1 ) + CALL CGESC2( N, Z, LDZ, RHS, IPIV, JPIV, SCALE ) + CALL CGESC2( N, Z, LDZ, XP, IPIV, JPIV, SCALE ) + IF( SCASUM( N, XP, 1 ).GT.SCASUM( N, RHS, 1 ) ) + $ CALL CCOPY( N, XP, 1, RHS, 1 ) +* +* Compute the sum of squares +* + CALL CLASSQ( N, RHS, 1, RDSCAL, RDSUM ) + RETURN +* +* End of CLATDF +* + END diff --git a/costa/native/external/lapack/clatps.f b/costa/native/external/lapack/clatps.f new file mode 100644 index 000000000..aa8274b18 --- /dev/null +++ b/costa/native/external/lapack/clatps.f @@ -0,0 +1,895 @@ + SUBROUTINE CLATPS( UPLO, TRANS, DIAG, NORMIN, N, AP, X, SCALE, + $ CNORM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORMIN, TRANS, UPLO + INTEGER INFO, N + REAL SCALE +* .. +* .. Array Arguments .. + REAL CNORM( * ) + COMPLEX AP( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* CLATPS solves one of the triangular systems +* +* A * x = s*b, A**T * x = s*b, or A**H * x = s*b, +* +* with scaling to prevent overflow, where A is an upper or lower +* triangular matrix stored in packed form. Here A**T denotes the +* transpose of A, A**H denotes the conjugate transpose of A, x and b +* are n-element vectors, and s is a scaling factor, usually less than +* or equal to 1, chosen so that the components of x will be less than +* the overflow threshold. If the unscaled problem will not cause +* overflow, the Level 2 BLAS routine CTPSV is called. If the matrix A +* is singular (A(j,j) = 0 for some j), then s is set to 0 and a +* non-trivial solution to A*x = 0 is returned. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the matrix A is upper or lower triangular. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* TRANS (input) CHARACTER*1 +* Specifies the operation applied to A. +* = 'N': Solve A * x = s*b (No transpose) +* = 'T': Solve A**T * x = s*b (Transpose) +* = 'C': Solve A**H * x = s*b (Conjugate transpose) +* +* DIAG (input) CHARACTER*1 +* Specifies whether or not the matrix A is unit triangular. +* = 'N': Non-unit triangular +* = 'U': Unit triangular +* +* NORMIN (input) CHARACTER*1 +* Specifies whether CNORM has been set or not. +* = 'Y': CNORM contains the column norms on entry +* = 'N': CNORM is not set on entry. On exit, the norms will +* be computed and stored in CNORM. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input) COMPLEX array, dimension (N*(N+1)/2) +* The upper or lower triangular matrix A, packed columnwise in +* a linear array. The j-th column of A is stored in the array +* AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* +* X (input/output) COMPLEX array, dimension (N) +* On entry, the right hand side b of the triangular system. +* On exit, X is overwritten by the solution vector x. +* +* SCALE (output) REAL +* The scaling factor s for the triangular system +* A * x = s*b, A**T * x = s*b, or A**H * x = s*b. +* If SCALE = 0, the matrix A is singular or badly scaled, and +* the vector x is an exact or approximate solution to A*x = 0. +* +* CNORM (input or output) REAL array, dimension (N) +* +* If NORMIN = 'Y', CNORM is an input argument and CNORM(j) +* contains the norm of the off-diagonal part of the j-th column +* of A. If TRANS = 'N', CNORM(j) must be greater than or equal +* to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) +* must be greater than or equal to the 1-norm. +* +* If NORMIN = 'N', CNORM is an output argument and CNORM(j) +* returns the 1-norm of the offdiagonal part of the j-th column +* of A. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* Further Details +* ======= ======= +* +* A rough bound on x is computed; if that is less than overflow, CTPSV +* is called, otherwise, specific code is used which checks for possible +* overflow or divide-by-zero at every operation. +* +* A columnwise scheme is used for solving A*x = b. The basic algorithm +* if A is lower triangular is +* +* x[1:n] := b[1:n] +* for j = 1, ..., n +* x(j) := x(j) / A(j,j) +* x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] +* end +* +* Define bounds on the components of x after j iterations of the loop: +* M(j) = bound on x[1:j] +* G(j) = bound on x[j+1:n] +* Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. +* +* Then for iteration j+1 we have +* M(j+1) <= G(j) / | A(j+1,j+1) | +* G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | +* <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) +* +* where CNORM(j+1) is greater than or equal to the infinity-norm of +* column j+1 of A, not counting the diagonal. Hence +* +* G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) +* 1<=i<=j +* and +* +* |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) +* 1<=i< j +* +* Since |x(j)| <= M(j), we use the Level 2 BLAS routine CTPSV if the +* reciprocal of the largest M(j), j=1,..,n, is larger than +* max(underflow, 1/overflow). +* +* The bound on x(j) is also used to determine when a step in the +* columnwise method can be performed without fear of overflow. If +* the computed bound is greater than a large constant, x is scaled to +* prevent overflow, but if the bound overflows, x is set to 0, x(j) to +* 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. +* +* Similarly, a row-wise scheme is used to solve A**T *x = b or +* A**H *x = b. The basic algorithm for A upper triangular is +* +* for j = 1, ..., n +* x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) +* end +* +* We simultaneously compute two bounds +* G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j +* M(j) = bound on x(i), 1<=i<=j +* +* The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we +* add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. +* Then the bound on x(j) is +* +* M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | +* +* <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) +* 1<=i<=j +* +* and we can safely call CTPSV if 1/M(n) and 1/G(n) are both greater +* than max(underflow, 1/overflow). +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, HALF, ONE, TWO + PARAMETER ( ZERO = 0.0E+0, HALF = 0.5E+0, ONE = 1.0E+0, + $ TWO = 2.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + INTEGER I, IMAX, IP, J, JFIRST, JINC, JLAST, JLEN + REAL BIGNUM, GROW, REC, SMLNUM, TJJ, TMAX, TSCAL, + $ XBND, XJ, XMAX + COMPLEX CSUMJ, TJJS, USCAL, ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX, ISAMAX + REAL SCASUM, SLAMCH + COMPLEX CDOTC, CDOTU, CLADIV + EXTERNAL LSAME, ICAMAX, ISAMAX, SCASUM, SLAMCH, CDOTC, + $ CDOTU, CLADIV +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CSSCAL, CTPSV, SLABAD, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, CONJG, MAX, MIN, REAL +* .. +* .. Statement Functions .. + REAL CABS1, CABS2 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) + CABS2( ZDUM ) = ABS( REAL( ZDUM ) / 2. ) + + $ ABS( AIMAG( ZDUM ) / 2. ) +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* +* Test the input parameters. +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT. + $ LSAME( NORMIN, 'N' ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLATPS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine machine dependent parameters to control overflow. +* + SMLNUM = SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SMLNUM / SLAMCH( 'Precision' ) + BIGNUM = ONE / SMLNUM + SCALE = ONE +* + IF( LSAME( NORMIN, 'N' ) ) THEN +* +* Compute the 1-norm of each column, not including the diagonal. +* + IF( UPPER ) THEN +* +* A is upper triangular. +* + IP = 1 + DO 10 J = 1, N + CNORM( J ) = SCASUM( J-1, AP( IP ), 1 ) + IP = IP + J + 10 CONTINUE + ELSE +* +* A is lower triangular. +* + IP = 1 + DO 20 J = 1, N - 1 + CNORM( J ) = SCASUM( N-J, AP( IP+1 ), 1 ) + IP = IP + N - J + 1 + 20 CONTINUE + CNORM( N ) = ZERO + END IF + END IF +* +* Scale the column norms by TSCAL if the maximum element in CNORM is +* greater than BIGNUM/2. +* + IMAX = ISAMAX( N, CNORM, 1 ) + TMAX = CNORM( IMAX ) + IF( TMAX.LE.BIGNUM*HALF ) THEN + TSCAL = ONE + ELSE + TSCAL = HALF / ( SMLNUM*TMAX ) + CALL SSCAL( N, TSCAL, CNORM, 1 ) + END IF +* +* Compute a bound on the computed solution vector to see if the +* Level 2 BLAS routine CTPSV can be used. +* + XMAX = ZERO + DO 30 J = 1, N + XMAX = MAX( XMAX, CABS2( X( J ) ) ) + 30 CONTINUE + XBND = XMAX + IF( NOTRAN ) THEN +* +* Compute the growth in A * x = b. +* + IF( UPPER ) THEN + JFIRST = N + JLAST = 1 + JINC = -1 + ELSE + JFIRST = 1 + JLAST = N + JINC = 1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 60 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, G(0) = max{x(i), i=1,...,n}. +* + GROW = HALF / MAX( XBND, SMLNUM ) + XBND = GROW + IP = JFIRST*( JFIRST+1 ) / 2 + JLEN = N + DO 40 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 60 +* + TJJS = AP( IP ) + TJJ = CABS1( TJJS ) +* + IF( TJJ.GE.SMLNUM ) THEN +* +* M(j) = G(j-1) / abs(A(j,j)) +* + XBND = MIN( XBND, MIN( ONE, TJJ )*GROW ) + ELSE +* +* M(j) could overflow, set XBND to 0. +* + XBND = ZERO + END IF +* + IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN +* +* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) +* + GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) ) + ELSE +* +* G(j) could overflow, set GROW to 0. +* + GROW = ZERO + END IF + IP = IP + JINC*JLEN + JLEN = JLEN - 1 + 40 CONTINUE + GROW = XBND + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) ) + DO 50 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 60 +* +* G(j) = G(j-1)*( 1 + CNORM(j) ) +* + GROW = GROW*( ONE / ( ONE+CNORM( J ) ) ) + 50 CONTINUE + END IF + 60 CONTINUE +* + ELSE +* +* Compute the growth in A**T * x = b or A**H * x = b. +* + IF( UPPER ) THEN + JFIRST = 1 + JLAST = N + JINC = 1 + ELSE + JFIRST = N + JLAST = 1 + JINC = -1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 90 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, M(0) = max{x(i), i=1,...,n}. +* + GROW = HALF / MAX( XBND, SMLNUM ) + XBND = GROW + IP = JFIRST*( JFIRST+1 ) / 2 + JLEN = 1 + DO 70 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 90 +* +* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) +* + XJ = ONE + CNORM( J ) + GROW = MIN( GROW, XBND / XJ ) +* + TJJS = AP( IP ) + TJJ = CABS1( TJJS ) +* + IF( TJJ.GE.SMLNUM ) THEN +* +* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) +* + IF( XJ.GT.TJJ ) + $ XBND = XBND*( TJJ / XJ ) + ELSE +* +* M(j) could overflow, set XBND to 0. +* + XBND = ZERO + END IF + JLEN = JLEN + 1 + IP = IP + JINC*JLEN + 70 CONTINUE + GROW = MIN( GROW, XBND ) + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) ) + DO 80 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 90 +* +* G(j) = ( 1 + CNORM(j) )*G(j-1) +* + XJ = ONE + CNORM( J ) + GROW = GROW / XJ + 80 CONTINUE + END IF + 90 CONTINUE + END IF +* + IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN +* +* Use the Level 2 BLAS solve if the reciprocal of the bound on +* elements of X is not too small. +* + CALL CTPSV( UPLO, TRANS, DIAG, N, AP, X, 1 ) + ELSE +* +* Use a Level 1 BLAS solve, scaling intermediate results. +* + IF( XMAX.GT.BIGNUM*HALF ) THEN +* +* Scale X so that its components are less than or equal to +* BIGNUM in absolute value. +* + SCALE = ( BIGNUM*HALF ) / XMAX + CALL CSSCAL( N, SCALE, X, 1 ) + XMAX = BIGNUM + ELSE + XMAX = XMAX*TWO + END IF +* + IF( NOTRAN ) THEN +* +* Solve A * x = b +* + IP = JFIRST*( JFIRST+1 ) / 2 + DO 110 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) / A(j,j), scaling x if necessary. +* + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN + TJJS = AP( IP )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 105 + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by 1/b(j). +* + REC = ONE / XJ + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = CLADIV( X( J ), TJJS ) + XJ = CABS1( X( J ) ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM +* to avoid overflow when dividing by A(j,j). +* + REC = ( TJJ*BIGNUM ) / XJ + IF( CNORM( J ).GT.ONE ) THEN +* +* Scale by 1/CNORM(j) to avoid overflow when +* multiplying x(j) times column j. +* + REC = REC / CNORM( J ) + END IF + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = CLADIV( X( J ), TJJS ) + XJ = CABS1( X( J ) ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A*x = 0. +* + DO 100 I = 1, N + X( I ) = ZERO + 100 CONTINUE + X( J ) = ONE + XJ = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 105 CONTINUE +* +* Scale x if necessary to avoid overflow when adding a +* multiple of column j of A. +* + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN +* +* Scale x by 1/(2*abs(x(j))). +* + REC = REC*HALF + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + END IF + ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN +* +* Scale x by 1/2. +* + CALL CSSCAL( N, HALF, X, 1 ) + SCALE = SCALE*HALF + END IF +* + IF( UPPER ) THEN + IF( J.GT.1 ) THEN +* +* Compute the update +* x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) +* + CALL CAXPY( J-1, -X( J )*TSCAL, AP( IP-J+1 ), 1, X, + $ 1 ) + I = ICAMAX( J-1, X, 1 ) + XMAX = CABS1( X( I ) ) + END IF + IP = IP - J + ELSE + IF( J.LT.N ) THEN +* +* Compute the update +* x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) +* + CALL CAXPY( N-J, -X( J )*TSCAL, AP( IP+1 ), 1, + $ X( J+1 ), 1 ) + I = J + ICAMAX( N-J, X( J+1 ), 1 ) + XMAX = CABS1( X( I ) ) + END IF + IP = IP + N - J + 1 + END IF + 110 CONTINUE +* + ELSE IF( LSAME( TRANS, 'T' ) ) THEN +* +* Solve A**T * x = b +* + IP = JFIRST*( JFIRST+1 ) / 2 + JLEN = 1 + DO 150 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = CABS1( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = AP( IP )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = CLADIV( USCAL, TJJS ) + END IF + IF( REC.LT.ONE ) THEN + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + CSUMJ = ZERO + IF( USCAL.EQ.CMPLX( ONE ) ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call CDOTU to perform the dot product. +* + IF( UPPER ) THEN + CSUMJ = CDOTU( J-1, AP( IP-J+1 ), 1, X, 1 ) + ELSE IF( J.LT.N ) THEN + CSUMJ = CDOTU( N-J, AP( IP+1 ), 1, X( J+1 ), 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + DO 120 I = 1, J - 1 + CSUMJ = CSUMJ + ( AP( IP-J+I )*USCAL )*X( I ) + 120 CONTINUE + ELSE IF( J.LT.N ) THEN + DO 130 I = 1, N - J + CSUMJ = CSUMJ + ( AP( IP+I )*USCAL )*X( J+I ) + 130 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.CMPLX( TSCAL ) ) THEN +* +* Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - CSUMJ + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJS = AP( IP )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 145 + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = CLADIV( X( J ), TJJS ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = CLADIV( X( J ), TJJS ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0 and compute a solution to A**T *x = 0. +* + DO 140 I = 1, N + X( I ) = ZERO + 140 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 145 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = CLADIV( X( J ), TJJS ) - CSUMJ + END IF + XMAX = MAX( XMAX, CABS1( X( J ) ) ) + JLEN = JLEN + 1 + IP = IP + JINC*JLEN + 150 CONTINUE +* + ELSE +* +* Solve A**H * x = b +* + IP = JFIRST*( JFIRST+1 ) / 2 + JLEN = 1 + DO 190 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = CABS1( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = CONJG( AP( IP ) )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = CLADIV( USCAL, TJJS ) + END IF + IF( REC.LT.ONE ) THEN + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + CSUMJ = ZERO + IF( USCAL.EQ.CMPLX( ONE ) ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call CDOTC to perform the dot product. +* + IF( UPPER ) THEN + CSUMJ = CDOTC( J-1, AP( IP-J+1 ), 1, X, 1 ) + ELSE IF( J.LT.N ) THEN + CSUMJ = CDOTC( N-J, AP( IP+1 ), 1, X( J+1 ), 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + DO 160 I = 1, J - 1 + CSUMJ = CSUMJ + ( CONJG( AP( IP-J+I ) )*USCAL )* + $ X( I ) + 160 CONTINUE + ELSE IF( J.LT.N ) THEN + DO 170 I = 1, N - J + CSUMJ = CSUMJ + ( CONJG( AP( IP+I ) )*USCAL )* + $ X( J+I ) + 170 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.CMPLX( TSCAL ) ) THEN +* +* Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - CSUMJ + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJS = CONJG( AP( IP ) )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 185 + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = CLADIV( X( J ), TJJS ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = CLADIV( X( J ), TJJS ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0 and compute a solution to A**H *x = 0. +* + DO 180 I = 1, N + X( I ) = ZERO + 180 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 185 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = CLADIV( X( J ), TJJS ) - CSUMJ + END IF + XMAX = MAX( XMAX, CABS1( X( J ) ) ) + JLEN = JLEN + 1 + IP = IP + JINC*JLEN + 190 CONTINUE + END IF + SCALE = SCALE / TSCAL + END IF +* +* Scale the column norms by 1/TSCAL for return. +* + IF( TSCAL.NE.ONE ) THEN + CALL SSCAL( N, ONE / TSCAL, CNORM, 1 ) + END IF +* + RETURN +* +* End of CLATPS +* + END diff --git a/costa/native/external/lapack/clatrd.f b/costa/native/external/lapack/clatrd.f new file mode 100644 index 000000000..fe44d842a --- /dev/null +++ b/costa/native/external/lapack/clatrd.f @@ -0,0 +1,280 @@ + SUBROUTINE CLATRD( UPLO, N, NB, A, LDA, E, TAU, W, LDW ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER LDA, LDW, N, NB +* .. +* .. Array Arguments .. + REAL E( * ) + COMPLEX A( LDA, * ), TAU( * ), W( LDW, * ) +* .. +* +* Purpose +* ======= +* +* CLATRD reduces NB rows and columns of a complex Hermitian matrix A to +* Hermitian tridiagonal form by a unitary similarity +* transformation Q' * A * Q, and returns the matrices V and W which are +* needed to apply the transformation to the unreduced part of A. +* +* If UPLO = 'U', CLATRD reduces the last NB rows and columns of a +* matrix, of which the upper triangle is supplied; +* if UPLO = 'L', CLATRD reduces the first NB rows and columns of a +* matrix, of which the lower triangle is supplied. +* +* This is an auxiliary routine called by CHETRD. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER +* Specifies whether the upper or lower triangular part of the +* Hermitian matrix A is stored: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. +* +* NB (input) INTEGER +* The number of rows and columns to be reduced. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the leading +* n-by-n upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n-by-n lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* On exit: +* if UPLO = 'U', the last NB columns have been reduced to +* tridiagonal form, with the diagonal elements overwriting +* the diagonal elements of A; the elements above the diagonal +* with the array TAU, represent the unitary matrix Q as a +* product of elementary reflectors; +* if UPLO = 'L', the first NB columns have been reduced to +* tridiagonal form, with the diagonal elements overwriting +* the diagonal elements of A; the elements below the diagonal +* with the array TAU, represent the unitary matrix Q as a +* product of elementary reflectors. +* See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* E (output) REAL array, dimension (N-1) +* If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal +* elements of the last NB columns of the reduced matrix; +* if UPLO = 'L', E(1:nb) contains the subdiagonal elements of +* the first NB columns of the reduced matrix. +* +* TAU (output) COMPLEX array, dimension (N-1) +* The scalar factors of the elementary reflectors, stored in +* TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'. +* See Further Details. +* +* W (output) COMPLEX array, dimension (LDW,NB) +* The n-by-nb matrix W required to update the unreduced part +* of A. +* +* LDW (input) INTEGER +* The leading dimension of the array W. LDW >= max(1,N). +* +* Further Details +* =============== +* +* If UPLO = 'U', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(n) H(n-1) . . . H(n-nb+1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i), +* and tau in TAU(i-1). +* +* If UPLO = 'L', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(1) H(2) . . . H(nb). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), +* and tau in TAU(i). +* +* The elements of the vectors v together form the n-by-nb matrix V +* which is needed, with W, to apply the transformation to the unreduced +* part of the matrix, using a Hermitian rank-2k update of the form: +* A := A - V*W' - W*V'. +* +* The contents of A on exit are illustrated by the following examples +* with n = 5 and nb = 2: +* +* if UPLO = 'U': if UPLO = 'L': +* +* ( a a a v4 v5 ) ( d ) +* ( a a v4 v5 ) ( 1 d ) +* ( a 1 v5 ) ( v1 1 a ) +* ( d 1 ) ( v1 v2 a a ) +* ( d ) ( v1 v2 a a a ) +* +* where d denotes a diagonal element of the reduced matrix, a denotes +* an element of the original matrix that is unchanged, and vi denotes +* an element of the vector defining H(i). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO, ONE, HALF + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ), + $ HALF = ( 0.5E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, IW + COMPLEX ALPHA +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CGEMV, CHEMV, CLACGV, CLARFG, CSCAL +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX CDOTC + EXTERNAL LSAME, CDOTC +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN, REAL +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Reduce last NB columns of upper triangle +* + DO 10 I = N, N - NB + 1, -1 + IW = I - N + NB + IF( I.LT.N ) THEN +* +* Update A(1:i,i) +* + A( I, I ) = REAL( A( I, I ) ) + CALL CLACGV( N-I, W( I, IW+1 ), LDW ) + CALL CGEMV( 'No transpose', I, N-I, -ONE, A( 1, I+1 ), + $ LDA, W( I, IW+1 ), LDW, ONE, A( 1, I ), 1 ) + CALL CLACGV( N-I, W( I, IW+1 ), LDW ) + CALL CLACGV( N-I, A( I, I+1 ), LDA ) + CALL CGEMV( 'No transpose', I, N-I, -ONE, W( 1, IW+1 ), + $ LDW, A( I, I+1 ), LDA, ONE, A( 1, I ), 1 ) + CALL CLACGV( N-I, A( I, I+1 ), LDA ) + A( I, I ) = REAL( A( I, I ) ) + END IF + IF( I.GT.1 ) THEN +* +* Generate elementary reflector H(i) to annihilate +* A(1:i-2,i) +* + ALPHA = A( I-1, I ) + CALL CLARFG( I-1, ALPHA, A( 1, I ), 1, TAU( I-1 ) ) + E( I-1 ) = ALPHA + A( I-1, I ) = ONE +* +* Compute W(1:i-1,i) +* + CALL CHEMV( 'Upper', I-1, ONE, A, LDA, A( 1, I ), 1, + $ ZERO, W( 1, IW ), 1 ) + IF( I.LT.N ) THEN + CALL CGEMV( 'Conjugate transpose', I-1, N-I, ONE, + $ W( 1, IW+1 ), LDW, A( 1, I ), 1, ZERO, + $ W( I+1, IW ), 1 ) + CALL CGEMV( 'No transpose', I-1, N-I, -ONE, + $ A( 1, I+1 ), LDA, W( I+1, IW ), 1, ONE, + $ W( 1, IW ), 1 ) + CALL CGEMV( 'Conjugate transpose', I-1, N-I, ONE, + $ A( 1, I+1 ), LDA, A( 1, I ), 1, ZERO, + $ W( I+1, IW ), 1 ) + CALL CGEMV( 'No transpose', I-1, N-I, -ONE, + $ W( 1, IW+1 ), LDW, W( I+1, IW ), 1, ONE, + $ W( 1, IW ), 1 ) + END IF + CALL CSCAL( I-1, TAU( I-1 ), W( 1, IW ), 1 ) + ALPHA = -HALF*TAU( I-1 )*CDOTC( I-1, W( 1, IW ), 1, + $ A( 1, I ), 1 ) + CALL CAXPY( I-1, ALPHA, A( 1, I ), 1, W( 1, IW ), 1 ) + END IF +* + 10 CONTINUE + ELSE +* +* Reduce first NB columns of lower triangle +* + DO 20 I = 1, NB +* +* Update A(i:n,i) +* + A( I, I ) = REAL( A( I, I ) ) + CALL CLACGV( I-1, W( I, 1 ), LDW ) + CALL CGEMV( 'No transpose', N-I+1, I-1, -ONE, A( I, 1 ), + $ LDA, W( I, 1 ), LDW, ONE, A( I, I ), 1 ) + CALL CLACGV( I-1, W( I, 1 ), LDW ) + CALL CLACGV( I-1, A( I, 1 ), LDA ) + CALL CGEMV( 'No transpose', N-I+1, I-1, -ONE, W( I, 1 ), + $ LDW, A( I, 1 ), LDA, ONE, A( I, I ), 1 ) + CALL CLACGV( I-1, A( I, 1 ), LDA ) + A( I, I ) = REAL( A( I, I ) ) + IF( I.LT.N ) THEN +* +* Generate elementary reflector H(i) to annihilate +* A(i+2:n,i) +* + ALPHA = A( I+1, I ) + CALL CLARFG( N-I, ALPHA, A( MIN( I+2, N ), I ), 1, + $ TAU( I ) ) + E( I ) = ALPHA + A( I+1, I ) = ONE +* +* Compute W(i+1:n,i) +* + CALL CHEMV( 'Lower', N-I, ONE, A( I+1, I+1 ), LDA, + $ A( I+1, I ), 1, ZERO, W( I+1, I ), 1 ) + CALL CGEMV( 'Conjugate transpose', N-I, I-1, ONE, + $ W( I+1, 1 ), LDW, A( I+1, I ), 1, ZERO, + $ W( 1, I ), 1 ) + CALL CGEMV( 'No transpose', N-I, I-1, -ONE, A( I+1, 1 ), + $ LDA, W( 1, I ), 1, ONE, W( I+1, I ), 1 ) + CALL CGEMV( 'Conjugate transpose', N-I, I-1, ONE, + $ A( I+1, 1 ), LDA, A( I+1, I ), 1, ZERO, + $ W( 1, I ), 1 ) + CALL CGEMV( 'No transpose', N-I, I-1, -ONE, W( I+1, 1 ), + $ LDW, W( 1, I ), 1, ONE, W( I+1, I ), 1 ) + CALL CSCAL( N-I, TAU( I ), W( I+1, I ), 1 ) + ALPHA = -HALF*TAU( I )*CDOTC( N-I, W( I+1, I ), 1, + $ A( I+1, I ), 1 ) + CALL CAXPY( N-I, ALPHA, A( I+1, I ), 1, W( I+1, I ), 1 ) + END IF +* + 20 CONTINUE + END IF +* + RETURN +* +* End of CLATRD +* + END diff --git a/costa/native/external/lapack/clatrs.f b/costa/native/external/lapack/clatrs.f new file mode 100644 index 000000000..cd9243b1c --- /dev/null +++ b/costa/native/external/lapack/clatrs.f @@ -0,0 +1,880 @@ + SUBROUTINE CLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, LDA, X, SCALE, + $ CNORM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1992 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORMIN, TRANS, UPLO + INTEGER INFO, LDA, N + REAL SCALE +* .. +* .. Array Arguments .. + REAL CNORM( * ) + COMPLEX A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* CLATRS solves one of the triangular systems +* +* A * x = s*b, A**T * x = s*b, or A**H * x = s*b, +* +* with scaling to prevent overflow. Here A is an upper or lower +* triangular matrix, A**T denotes the transpose of A, A**H denotes the +* conjugate transpose of A, x and b are n-element vectors, and s is a +* scaling factor, usually less than or equal to 1, chosen so that the +* components of x will be less than the overflow threshold. If the +* unscaled problem will not cause overflow, the Level 2 BLAS routine +* CTRSV is called. If the matrix A is singular (A(j,j) = 0 for some j), +* then s is set to 0 and a non-trivial solution to A*x = 0 is returned. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the matrix A is upper or lower triangular. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* TRANS (input) CHARACTER*1 +* Specifies the operation applied to A. +* = 'N': Solve A * x = s*b (No transpose) +* = 'T': Solve A**T * x = s*b (Transpose) +* = 'C': Solve A**H * x = s*b (Conjugate transpose) +* +* DIAG (input) CHARACTER*1 +* Specifies whether or not the matrix A is unit triangular. +* = 'N': Non-unit triangular +* = 'U': Unit triangular +* +* NORMIN (input) CHARACTER*1 +* Specifies whether CNORM has been set or not. +* = 'Y': CNORM contains the column norms on entry +* = 'N': CNORM is not set on entry. On exit, the norms will +* be computed and stored in CNORM. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input) COMPLEX array, dimension (LDA,N) +* The triangular matrix A. If UPLO = 'U', the leading n by n +* upper triangular part of the array A contains the upper +* triangular matrix, and the strictly lower triangular part of +* A is not referenced. If UPLO = 'L', the leading n by n lower +* triangular part of the array A contains the lower triangular +* matrix, and the strictly upper triangular part of A is not +* referenced. If DIAG = 'U', the diagonal elements of A are +* also not referenced and are assumed to be 1. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max (1,N). +* +* X (input/output) COMPLEX array, dimension (N) +* On entry, the right hand side b of the triangular system. +* On exit, X is overwritten by the solution vector x. +* +* SCALE (output) REAL +* The scaling factor s for the triangular system +* A * x = s*b, A**T * x = s*b, or A**H * x = s*b. +* If SCALE = 0, the matrix A is singular or badly scaled, and +* the vector x is an exact or approximate solution to A*x = 0. +* +* CNORM (input or output) REAL array, dimension (N) +* +* If NORMIN = 'Y', CNORM is an input argument and CNORM(j) +* contains the norm of the off-diagonal part of the j-th column +* of A. If TRANS = 'N', CNORM(j) must be greater than or equal +* to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) +* must be greater than or equal to the 1-norm. +* +* If NORMIN = 'N', CNORM is an output argument and CNORM(j) +* returns the 1-norm of the offdiagonal part of the j-th column +* of A. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* Further Details +* ======= ======= +* +* A rough bound on x is computed; if that is less than overflow, CTRSV +* is called, otherwise, specific code is used which checks for possible +* overflow or divide-by-zero at every operation. +* +* A columnwise scheme is used for solving A*x = b. The basic algorithm +* if A is lower triangular is +* +* x[1:n] := b[1:n] +* for j = 1, ..., n +* x(j) := x(j) / A(j,j) +* x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] +* end +* +* Define bounds on the components of x after j iterations of the loop: +* M(j) = bound on x[1:j] +* G(j) = bound on x[j+1:n] +* Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. +* +* Then for iteration j+1 we have +* M(j+1) <= G(j) / | A(j+1,j+1) | +* G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | +* <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) +* +* where CNORM(j+1) is greater than or equal to the infinity-norm of +* column j+1 of A, not counting the diagonal. Hence +* +* G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) +* 1<=i<=j +* and +* +* |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) +* 1<=i< j +* +* Since |x(j)| <= M(j), we use the Level 2 BLAS routine CTRSV if the +* reciprocal of the largest M(j), j=1,..,n, is larger than +* max(underflow, 1/overflow). +* +* The bound on x(j) is also used to determine when a step in the +* columnwise method can be performed without fear of overflow. If +* the computed bound is greater than a large constant, x is scaled to +* prevent overflow, but if the bound overflows, x is set to 0, x(j) to +* 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. +* +* Similarly, a row-wise scheme is used to solve A**T *x = b or +* A**H *x = b. The basic algorithm for A upper triangular is +* +* for j = 1, ..., n +* x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) +* end +* +* We simultaneously compute two bounds +* G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j +* M(j) = bound on x(i), 1<=i<=j +* +* The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we +* add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. +* Then the bound on x(j) is +* +* M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | +* +* <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) +* 1<=i<=j +* +* and we can safely call CTRSV if 1/M(n) and 1/G(n) are both greater +* than max(underflow, 1/overflow). +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, HALF, ONE, TWO + PARAMETER ( ZERO = 0.0E+0, HALF = 0.5E+0, ONE = 1.0E+0, + $ TWO = 2.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + INTEGER I, IMAX, J, JFIRST, JINC, JLAST + REAL BIGNUM, GROW, REC, SMLNUM, TJJ, TMAX, TSCAL, + $ XBND, XJ, XMAX + COMPLEX CSUMJ, TJJS, USCAL, ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX, ISAMAX + REAL SCASUM, SLAMCH + COMPLEX CDOTC, CDOTU, CLADIV + EXTERNAL LSAME, ICAMAX, ISAMAX, SCASUM, SLAMCH, CDOTC, + $ CDOTU, CLADIV +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CSSCAL, CTRSV, SLABAD, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, CONJG, MAX, MIN, REAL +* .. +* .. Statement Functions .. + REAL CABS1, CABS2 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) + CABS2( ZDUM ) = ABS( REAL( ZDUM ) / 2. ) + + $ ABS( AIMAG( ZDUM ) / 2. ) +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* +* Test the input parameters. +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT. + $ LSAME( NORMIN, 'N' ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLATRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine machine dependent parameters to control overflow. +* + SMLNUM = SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SMLNUM / SLAMCH( 'Precision' ) + BIGNUM = ONE / SMLNUM + SCALE = ONE +* + IF( LSAME( NORMIN, 'N' ) ) THEN +* +* Compute the 1-norm of each column, not including the diagonal. +* + IF( UPPER ) THEN +* +* A is upper triangular. +* + DO 10 J = 1, N + CNORM( J ) = SCASUM( J-1, A( 1, J ), 1 ) + 10 CONTINUE + ELSE +* +* A is lower triangular. +* + DO 20 J = 1, N - 1 + CNORM( J ) = SCASUM( N-J, A( J+1, J ), 1 ) + 20 CONTINUE + CNORM( N ) = ZERO + END IF + END IF +* +* Scale the column norms by TSCAL if the maximum element in CNORM is +* greater than BIGNUM/2. +* + IMAX = ISAMAX( N, CNORM, 1 ) + TMAX = CNORM( IMAX ) + IF( TMAX.LE.BIGNUM*HALF ) THEN + TSCAL = ONE + ELSE + TSCAL = HALF / ( SMLNUM*TMAX ) + CALL SSCAL( N, TSCAL, CNORM, 1 ) + END IF +* +* Compute a bound on the computed solution vector to see if the +* Level 2 BLAS routine CTRSV can be used. +* + XMAX = ZERO + DO 30 J = 1, N + XMAX = MAX( XMAX, CABS2( X( J ) ) ) + 30 CONTINUE + XBND = XMAX +* + IF( NOTRAN ) THEN +* +* Compute the growth in A * x = b. +* + IF( UPPER ) THEN + JFIRST = N + JLAST = 1 + JINC = -1 + ELSE + JFIRST = 1 + JLAST = N + JINC = 1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 60 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, G(0) = max{x(i), i=1,...,n}. +* + GROW = HALF / MAX( XBND, SMLNUM ) + XBND = GROW + DO 40 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 60 +* + TJJS = A( J, J ) + TJJ = CABS1( TJJS ) +* + IF( TJJ.GE.SMLNUM ) THEN +* +* M(j) = G(j-1) / abs(A(j,j)) +* + XBND = MIN( XBND, MIN( ONE, TJJ )*GROW ) + ELSE +* +* M(j) could overflow, set XBND to 0. +* + XBND = ZERO + END IF +* + IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN +* +* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) +* + GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) ) + ELSE +* +* G(j) could overflow, set GROW to 0. +* + GROW = ZERO + END IF + 40 CONTINUE + GROW = XBND + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) ) + DO 50 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 60 +* +* G(j) = G(j-1)*( 1 + CNORM(j) ) +* + GROW = GROW*( ONE / ( ONE+CNORM( J ) ) ) + 50 CONTINUE + END IF + 60 CONTINUE +* + ELSE +* +* Compute the growth in A**T * x = b or A**H * x = b. +* + IF( UPPER ) THEN + JFIRST = 1 + JLAST = N + JINC = 1 + ELSE + JFIRST = N + JLAST = 1 + JINC = -1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 90 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, M(0) = max{x(i), i=1,...,n}. +* + GROW = HALF / MAX( XBND, SMLNUM ) + XBND = GROW + DO 70 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 90 +* +* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) +* + XJ = ONE + CNORM( J ) + GROW = MIN( GROW, XBND / XJ ) +* + TJJS = A( J, J ) + TJJ = CABS1( TJJS ) +* + IF( TJJ.GE.SMLNUM ) THEN +* +* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) +* + IF( XJ.GT.TJJ ) + $ XBND = XBND*( TJJ / XJ ) + ELSE +* +* M(j) could overflow, set XBND to 0. +* + XBND = ZERO + END IF + 70 CONTINUE + GROW = MIN( GROW, XBND ) + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) ) + DO 80 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 90 +* +* G(j) = ( 1 + CNORM(j) )*G(j-1) +* + XJ = ONE + CNORM( J ) + GROW = GROW / XJ + 80 CONTINUE + END IF + 90 CONTINUE + END IF +* + IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN +* +* Use the Level 2 BLAS solve if the reciprocal of the bound on +* elements of X is not too small. +* + CALL CTRSV( UPLO, TRANS, DIAG, N, A, LDA, X, 1 ) + ELSE +* +* Use a Level 1 BLAS solve, scaling intermediate results. +* + IF( XMAX.GT.BIGNUM*HALF ) THEN +* +* Scale X so that its components are less than or equal to +* BIGNUM in absolute value. +* + SCALE = ( BIGNUM*HALF ) / XMAX + CALL CSSCAL( N, SCALE, X, 1 ) + XMAX = BIGNUM + ELSE + XMAX = XMAX*TWO + END IF +* + IF( NOTRAN ) THEN +* +* Solve A * x = b +* + DO 110 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) / A(j,j), scaling x if necessary. +* + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN + TJJS = A( J, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 105 + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by 1/b(j). +* + REC = ONE / XJ + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = CLADIV( X( J ), TJJS ) + XJ = CABS1( X( J ) ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM +* to avoid overflow when dividing by A(j,j). +* + REC = ( TJJ*BIGNUM ) / XJ + IF( CNORM( J ).GT.ONE ) THEN +* +* Scale by 1/CNORM(j) to avoid overflow when +* multiplying x(j) times column j. +* + REC = REC / CNORM( J ) + END IF + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = CLADIV( X( J ), TJJS ) + XJ = CABS1( X( J ) ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A*x = 0. +* + DO 100 I = 1, N + X( I ) = ZERO + 100 CONTINUE + X( J ) = ONE + XJ = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 105 CONTINUE +* +* Scale x if necessary to avoid overflow when adding a +* multiple of column j of A. +* + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN +* +* Scale x by 1/(2*abs(x(j))). +* + REC = REC*HALF + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + END IF + ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN +* +* Scale x by 1/2. +* + CALL CSSCAL( N, HALF, X, 1 ) + SCALE = SCALE*HALF + END IF +* + IF( UPPER ) THEN + IF( J.GT.1 ) THEN +* +* Compute the update +* x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) +* + CALL CAXPY( J-1, -X( J )*TSCAL, A( 1, J ), 1, X, + $ 1 ) + I = ICAMAX( J-1, X, 1 ) + XMAX = CABS1( X( I ) ) + END IF + ELSE + IF( J.LT.N ) THEN +* +* Compute the update +* x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) +* + CALL CAXPY( N-J, -X( J )*TSCAL, A( J+1, J ), 1, + $ X( J+1 ), 1 ) + I = J + ICAMAX( N-J, X( J+1 ), 1 ) + XMAX = CABS1( X( I ) ) + END IF + END IF + 110 CONTINUE +* + ELSE IF( LSAME( TRANS, 'T' ) ) THEN +* +* Solve A**T * x = b +* + DO 150 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = CABS1( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = A( J, J )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = CLADIV( USCAL, TJJS ) + END IF + IF( REC.LT.ONE ) THEN + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + CSUMJ = ZERO + IF( USCAL.EQ.CMPLX( ONE ) ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call CDOTU to perform the dot product. +* + IF( UPPER ) THEN + CSUMJ = CDOTU( J-1, A( 1, J ), 1, X, 1 ) + ELSE IF( J.LT.N ) THEN + CSUMJ = CDOTU( N-J, A( J+1, J ), 1, X( J+1 ), 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + DO 120 I = 1, J - 1 + CSUMJ = CSUMJ + ( A( I, J )*USCAL )*X( I ) + 120 CONTINUE + ELSE IF( J.LT.N ) THEN + DO 130 I = J + 1, N + CSUMJ = CSUMJ + ( A( I, J )*USCAL )*X( I ) + 130 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.CMPLX( TSCAL ) ) THEN +* +* Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - CSUMJ + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN + TJJS = A( J, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 145 + END IF +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = CLADIV( X( J ), TJJS ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = CLADIV( X( J ), TJJS ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0 and compute a solution to A**T *x = 0. +* + DO 140 I = 1, N + X( I ) = ZERO + 140 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 145 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = CLADIV( X( J ), TJJS ) - CSUMJ + END IF + XMAX = MAX( XMAX, CABS1( X( J ) ) ) + 150 CONTINUE +* + ELSE +* +* Solve A**H * x = b +* + DO 190 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = CABS1( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = CONJG( A( J, J ) )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = CLADIV( USCAL, TJJS ) + END IF + IF( REC.LT.ONE ) THEN + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + CSUMJ = ZERO + IF( USCAL.EQ.CMPLX( ONE ) ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call CDOTC to perform the dot product. +* + IF( UPPER ) THEN + CSUMJ = CDOTC( J-1, A( 1, J ), 1, X, 1 ) + ELSE IF( J.LT.N ) THEN + CSUMJ = CDOTC( N-J, A( J+1, J ), 1, X( J+1 ), 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + DO 160 I = 1, J - 1 + CSUMJ = CSUMJ + ( CONJG( A( I, J ) )*USCAL )* + $ X( I ) + 160 CONTINUE + ELSE IF( J.LT.N ) THEN + DO 170 I = J + 1, N + CSUMJ = CSUMJ + ( CONJG( A( I, J ) )*USCAL )* + $ X( I ) + 170 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.CMPLX( TSCAL ) ) THEN +* +* Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - CSUMJ + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN + TJJS = CONJG( A( J, J ) )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 185 + END IF +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = CLADIV( X( J ), TJJS ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = CLADIV( X( J ), TJJS ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0 and compute a solution to A**H *x = 0. +* + DO 180 I = 1, N + X( I ) = ZERO + 180 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 185 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = CLADIV( X( J ), TJJS ) - CSUMJ + END IF + XMAX = MAX( XMAX, CABS1( X( J ) ) ) + 190 CONTINUE + END IF + SCALE = SCALE / TSCAL + END IF +* +* Scale the column norms by 1/TSCAL for return. +* + IF( TSCAL.NE.ONE ) THEN + CALL SSCAL( N, ONE / TSCAL, CNORM, 1 ) + END IF +* + RETURN +* +* End of CLATRS +* + END diff --git a/costa/native/external/lapack/clatrz.f b/costa/native/external/lapack/clatrz.f new file mode 100644 index 000000000..fa51d60cd --- /dev/null +++ b/costa/native/external/lapack/clatrz.f @@ -0,0 +1,134 @@ + SUBROUTINE CLATRZ( M, N, L, A, LDA, TAU, WORK ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER L, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CLATRZ factors the M-by-(M+L) complex upper trapezoidal matrix +* [ A1 A2 ] = [ A(1:M,1:M) A(1:M,N-L+1:N) ] as ( R 0 ) * Z by means +* of unitary transformations, where Z is an (M+L)-by-(M+L) unitary +* matrix and, R and A1 are M-by-M upper triangular matrices. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* L (input) INTEGER +* The number of columns of the matrix A containing the +* meaningful part of the Householder vectors. N-M >= L >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the leading M-by-N upper trapezoidal part of the +* array A must contain the matrix to be factorized. +* On exit, the leading M-by-M upper triangular part of A +* contains the upper triangular matrix R, and elements N-L+1 to +* N of the first M rows of A, with the array TAU, represent the +* unitary matrix Z as a product of M elementary reflectors. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) COMPLEX array, dimension (M) +* The scalar factors of the elementary reflectors. +* +* WORK (workspace) COMPLEX array, dimension (M) +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* The factorization is obtained by Householder's method. The kth +* transformation matrix, Z( k ), which is used to introduce zeros into +* the ( m - k + 1 )th row of A, is given in the form +* +* Z( k ) = ( I 0 ), +* ( 0 T( k ) ) +* +* where +* +* T( k ) = I - tau*u( k )*u( k )', u( k ) = ( 1 ), +* ( 0 ) +* ( z( k ) ) +* +* tau is a scalar and z( k ) is an l element vector. tau and z( k ) +* are chosen to annihilate the elements of the kth row of A2. +* +* The scalar tau is returned in the kth element of TAU and the vector +* u( k ) in the kth row of A2, such that the elements of z( k ) are +* in a( k, l + 1 ), ..., a( k, n ). The elements of R are returned in +* the upper triangular part of A1. +* +* Z is given by +* +* Z = Z( 1 ) * Z( 2 ) * ... * Z( m ). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I + COMPLEX ALPHA +* .. +* .. External Subroutines .. + EXTERNAL CLACGV, CLARFG, CLARZ +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.EQ.0 ) THEN + RETURN + ELSE IF( M.EQ.N ) THEN + DO 10 I = 1, N + TAU( I ) = ZERO + 10 CONTINUE + RETURN + END IF +* + DO 20 I = M, 1, -1 +* +* Generate elementary reflector H(i) to annihilate +* [ A(i,i) A(i,n-l+1:n) ] +* + CALL CLACGV( L, A( I, N-L+1 ), LDA ) + ALPHA = CONJG( A( I, I ) ) + CALL CLARFG( L+1, ALPHA, A( I, N-L+1 ), LDA, TAU( I ) ) + TAU( I ) = CONJG( TAU( I ) ) +* +* Apply H(i) to A(1:i-1,i:n) from the right +* + CALL CLARZ( 'Right', I-1, N-I+1, L, A( I, N-L+1 ), LDA, + $ CONJG( TAU( I ) ), A( 1, I ), LDA, WORK ) + A( I, I ) = CONJG( ALPHA ) +* + 20 CONTINUE +* + RETURN +* +* End of CLATRZ +* + END diff --git a/costa/native/external/lapack/clatzm.f b/costa/native/external/lapack/clatzm.f new file mode 100644 index 000000000..1c2507ffc --- /dev/null +++ b/costa/native/external/lapack/clatzm.f @@ -0,0 +1,147 @@ + SUBROUTINE CLATZM( SIDE, M, N, V, INCV, TAU, C1, C2, LDC, WORK ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, LDC, M, N + COMPLEX TAU +* .. +* .. Array Arguments .. + COMPLEX C1( LDC, * ), C2( LDC, * ), V( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* This routine is deprecated and has been replaced by routine CUNMRZ. +* +* CLATZM applies a Householder matrix generated by CTZRQF to a matrix. +* +* Let P = I - tau*u*u', u = ( 1 ), +* ( v ) +* where v is an (m-1) vector if SIDE = 'L', or a (n-1) vector if +* SIDE = 'R'. +* +* If SIDE equals 'L', let +* C = [ C1 ] 1 +* [ C2 ] m-1 +* n +* Then C is overwritten by P*C. +* +* If SIDE equals 'R', let +* C = [ C1, C2 ] m +* 1 n-1 +* Then C is overwritten by C*P. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': form P * C +* = 'R': form C * P +* +* M (input) INTEGER +* The number of rows of the matrix C. +* +* N (input) INTEGER +* The number of columns of the matrix C. +* +* V (input) COMPLEX array, dimension +* (1 + (M-1)*abs(INCV)) if SIDE = 'L' +* (1 + (N-1)*abs(INCV)) if SIDE = 'R' +* The vector v in the representation of P. V is not used +* if TAU = 0. +* +* INCV (input) INTEGER +* The increment between elements of v. INCV <> 0 +* +* TAU (input) COMPLEX +* The value tau in the representation of P. +* +* C1 (input/output) COMPLEX array, dimension +* (LDC,N) if SIDE = 'L' +* (M,1) if SIDE = 'R' +* On entry, the n-vector C1 if SIDE = 'L', or the m-vector C1 +* if SIDE = 'R'. +* +* On exit, the first row of P*C if SIDE = 'L', or the first +* column of C*P if SIDE = 'R'. +* +* C2 (input/output) COMPLEX array, dimension +* (LDC, N) if SIDE = 'L' +* (LDC, N-1) if SIDE = 'R' +* On entry, the (m - 1) x n matrix C2 if SIDE = 'L', or the +* m x (n - 1) matrix C2 if SIDE = 'R'. +* +* On exit, rows 2:m of P*C if SIDE = 'L', or columns 2:m of C*P +* if SIDE = 'R'. +* +* LDC (input) INTEGER +* The leading dimension of the arrays C1 and C2. +* LDC >= max(1,M). +* +* WORK (workspace) COMPLEX array, dimension +* (N) if SIDE = 'L' +* (M) if SIDE = 'R' +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CGEMV, CGERC, CGERU, CLACGV +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( ( MIN( M, N ).EQ.0 ) .OR. ( TAU.EQ.ZERO ) ) + $ RETURN +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* w := conjg( C1 + v' * C2 ) +* + CALL CCOPY( N, C1, LDC, WORK, 1 ) + CALL CLACGV( N, WORK, 1 ) + CALL CGEMV( 'Conjugate transpose', M-1, N, ONE, C2, LDC, V, + $ INCV, ONE, WORK, 1 ) +* +* [ C1 ] := [ C1 ] - tau* [ 1 ] * w' +* [ C2 ] [ C2 ] [ v ] +* + CALL CLACGV( N, WORK, 1 ) + CALL CAXPY( N, -TAU, WORK, 1, C1, LDC ) + CALL CGERU( M-1, N, -TAU, V, INCV, WORK, 1, C2, LDC ) +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* w := C1 + C2 * v +* + CALL CCOPY( M, C1, 1, WORK, 1 ) + CALL CGEMV( 'No transpose', M, N-1, ONE, C2, LDC, V, INCV, ONE, + $ WORK, 1 ) +* +* [ C1, C2 ] := [ C1, C2 ] - tau* w * [ 1 , v'] +* + CALL CAXPY( M, -TAU, WORK, 1, C1, 1 ) + CALL CGERC( M, N-1, -TAU, WORK, 1, V, INCV, C2, LDC ) + END IF +* + RETURN +* +* End of CLATZM +* + END diff --git a/costa/native/external/lapack/clauu2.f b/costa/native/external/lapack/clauu2.f new file mode 100644 index 000000000..e1054964b --- /dev/null +++ b/costa/native/external/lapack/clauu2.f @@ -0,0 +1,144 @@ + SUBROUTINE CLAUU2( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CLAUU2 computes the product U * U' or L' * L, where the triangular +* factor U or L is stored in the upper or lower triangular part of +* the array A. +* +* If UPLO = 'U' or 'u' then the upper triangle of the result is stored, +* overwriting the factor U in A. +* If UPLO = 'L' or 'l' then the lower triangle of the result is stored, +* overwriting the factor L in A. +* +* This is the unblocked form of the algorithm, calling Level 2 BLAS. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the triangular factor stored in the array A +* is upper or lower triangular: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the triangular factor U or L. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the triangular factor U or L. +* On exit, if UPLO = 'U', the upper triangle of A is +* overwritten with the upper triangle of the product U * U'; +* if UPLO = 'L', the lower triangle of A is overwritten with +* the lower triangle of the product L' * L. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I + REAL AII +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX CDOTC + EXTERNAL LSAME, CDOTC +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, CLACGV, CSSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CMPLX, MAX, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLAUU2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Compute the product U * U'. +* + DO 10 I = 1, N + AII = A( I, I ) + IF( I.LT.N ) THEN + A( I, I ) = AII*AII + REAL( CDOTC( N-I, A( I, I+1 ), LDA, + $ A( I, I+1 ), LDA ) ) + CALL CLACGV( N-I, A( I, I+1 ), LDA ) + CALL CGEMV( 'No transpose', I-1, N-I, ONE, A( 1, I+1 ), + $ LDA, A( I, I+1 ), LDA, CMPLX( AII ), + $ A( 1, I ), 1 ) + CALL CLACGV( N-I, A( I, I+1 ), LDA ) + ELSE + CALL CSSCAL( I, AII, A( 1, I ), 1 ) + END IF + 10 CONTINUE +* + ELSE +* +* Compute the product L' * L. +* + DO 20 I = 1, N + AII = A( I, I ) + IF( I.LT.N ) THEN + A( I, I ) = AII*AII + REAL( CDOTC( N-I, A( I+1, I ), 1, + $ A( I+1, I ), 1 ) ) + CALL CLACGV( I-1, A( I, 1 ), LDA ) + CALL CGEMV( 'Conjugate transpose', N-I, I-1, ONE, + $ A( I+1, 1 ), LDA, A( I+1, I ), 1, + $ CMPLX( AII ), A( I, 1 ), LDA ) + CALL CLACGV( I-1, A( I, 1 ), LDA ) + ELSE + CALL CSSCAL( I, AII, A( I, 1 ), LDA ) + END IF + 20 CONTINUE + END IF +* + RETURN +* +* End of CLAUU2 +* + END diff --git a/costa/native/external/lapack/clauum.f b/costa/native/external/lapack/clauum.f new file mode 100644 index 000000000..01276e40e --- /dev/null +++ b/costa/native/external/lapack/clauum.f @@ -0,0 +1,161 @@ + SUBROUTINE CLAUUM( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CLAUUM computes the product U * U' or L' * L, where the triangular +* factor U or L is stored in the upper or lower triangular part of +* the array A. +* +* If UPLO = 'U' or 'u' then the upper triangle of the result is stored, +* overwriting the factor U in A. +* If UPLO = 'L' or 'l' then the lower triangle of the result is stored, +* overwriting the factor L in A. +* +* This is the blocked form of the algorithm, calling Level 3 BLAS. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the triangular factor stored in the array A +* is upper or lower triangular: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the triangular factor U or L. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the triangular factor U or L. +* On exit, if UPLO = 'U', the upper triangle of A is +* overwritten with the upper triangle of the product U * U'; +* if UPLO = 'L', the lower triangle of A is overwritten with +* the lower triangle of the product L' * L. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IB, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CGEMM, CHERK, CLAUU2, CTRMM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLAUUM', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'CLAUUM', UPLO, N, -1, -1, -1 ) +* + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code +* + CALL CLAUU2( UPLO, N, A, LDA, INFO ) + ELSE +* +* Use blocked code +* + IF( UPPER ) THEN +* +* Compute the product U * U'. +* + DO 10 I = 1, N, NB + IB = MIN( NB, N-I+1 ) + CALL CTRMM( 'Right', 'Upper', 'Conjugate transpose', + $ 'Non-unit', I-1, IB, CONE, A( I, I ), LDA, + $ A( 1, I ), LDA ) + CALL CLAUU2( 'Upper', IB, A( I, I ), LDA, INFO ) + IF( I+IB.LE.N ) THEN + CALL CGEMM( 'No transpose', 'Conjugate transpose', + $ I-1, IB, N-I-IB+1, CONE, A( 1, I+IB ), + $ LDA, A( I, I+IB ), LDA, CONE, A( 1, I ), + $ LDA ) + CALL CHERK( 'Upper', 'No transpose', IB, N-I-IB+1, + $ ONE, A( I, I+IB ), LDA, ONE, A( I, I ), + $ LDA ) + END IF + 10 CONTINUE + ELSE +* +* Compute the product L' * L. +* + DO 20 I = 1, N, NB + IB = MIN( NB, N-I+1 ) + CALL CTRMM( 'Left', 'Lower', 'Conjugate transpose', + $ 'Non-unit', IB, I-1, CONE, A( I, I ), LDA, + $ A( I, 1 ), LDA ) + CALL CLAUU2( 'Lower', IB, A( I, I ), LDA, INFO ) + IF( I+IB.LE.N ) THEN + CALL CGEMM( 'Conjugate transpose', 'No transpose', IB, + $ I-1, N-I-IB+1, CONE, A( I+IB, I ), LDA, + $ A( I+IB, 1 ), LDA, CONE, A( I, 1 ), LDA ) + CALL CHERK( 'Lower', 'Conjugate transpose', IB, + $ N-I-IB+1, ONE, A( I+IB, I ), LDA, ONE, + $ A( I, I ), LDA ) + END IF + 20 CONTINUE + END IF + END IF +* + RETURN +* +* End of CLAUUM +* + END diff --git a/costa/native/external/lapack/cpbcon.f b/costa/native/external/lapack/cpbcon.f new file mode 100644 index 000000000..a9ba6b058 --- /dev/null +++ b/costa/native/external/lapack/cpbcon.f @@ -0,0 +1,194 @@ + SUBROUTINE CPBCON( UPLO, N, KD, AB, LDAB, ANORM, RCOND, WORK, + $ RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + REAL RWORK( * ) + COMPLEX AB( LDAB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CPBCON estimates the reciprocal of the condition number (in the +* 1-norm) of a complex Hermitian positive definite band matrix using +* the Cholesky factorization A = U**H*U or A = L*L**H computed by +* CPBTRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangular factor stored in AB; +* = 'L': Lower triangular factor stored in AB. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of sub-diagonals if UPLO = 'L'. KD >= 0. +* +* AB (input) COMPLEX array, dimension (LDAB,N) +* The triangular factor U or L from the Cholesky factorization +* A = U**H*U or A = L*L**H of the band matrix A, stored in the +* first KD+1 rows of the array. The j-th column of U or L is +* stored in the j-th column of the array AB as follows: +* if UPLO ='U', AB(kd+1+i-j,j) = U(i,j) for max(1,j-kd)<=i<=j; +* if UPLO ='L', AB(1+i-j,j) = L(i,j) for j<=i<=min(n,j+kd). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* ANORM (input) REAL +* The 1-norm (or infinity-norm) of the Hermitian band matrix A. +* +* RCOND (output) REAL +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +* estimate of the 1-norm of inv(A) computed in this routine. +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* RWORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + CHARACTER NORMIN + INTEGER IX, KASE + REAL AINVNM, SCALE, SCALEL, SCALEU, SMLNUM + COMPLEX ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + REAL SLAMCH + EXTERNAL LSAME, ICAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CLACON, CLATBS, CSRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPBCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = SLAMCH( 'Safe minimum' ) +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + NORMIN = 'N' + 10 CONTINUE + CALL CLACON( N, WORK( N+1 ), WORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN + IF( UPPER ) THEN +* +* Multiply by inv(U'). +* + CALL CLATBS( 'Upper', 'Conjugate transpose', 'Non-unit', + $ NORMIN, N, KD, AB, LDAB, WORK, SCALEL, RWORK, + $ INFO ) + NORMIN = 'Y' +* +* Multiply by inv(U). +* + CALL CLATBS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ KD, AB, LDAB, WORK, SCALEU, RWORK, INFO ) + ELSE +* +* Multiply by inv(L). +* + CALL CLATBS( 'Lower', 'No transpose', 'Non-unit', NORMIN, N, + $ KD, AB, LDAB, WORK, SCALEL, RWORK, INFO ) + NORMIN = 'Y' +* +* Multiply by inv(L'). +* + CALL CLATBS( 'Lower', 'Conjugate transpose', 'Non-unit', + $ NORMIN, N, KD, AB, LDAB, WORK, SCALEU, RWORK, + $ INFO ) + END IF +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + SCALE = SCALEL*SCALEU + IF( SCALE.NE.ONE ) THEN + IX = ICAMAX( N, WORK, 1 ) + IF( SCALE.LT.CABS1( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL CSRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE +* + RETURN +* +* End of CPBCON +* + END diff --git a/costa/native/external/lapack/cpbequ.f b/costa/native/external/lapack/cpbequ.f new file mode 100644 index 000000000..0f177ae45 --- /dev/null +++ b/costa/native/external/lapack/cpbequ.f @@ -0,0 +1,168 @@ + SUBROUTINE CPBEQU( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N + REAL AMAX, SCOND +* .. +* .. Array Arguments .. + REAL S( * ) + COMPLEX AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* CPBEQU computes row and column scalings intended to equilibrate a +* Hermitian positive definite band matrix A and reduce its condition +* number (with respect to the two-norm). S contains the scale factors, +* S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with +* elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This +* choice of S puts the condition number of B within a factor N of the +* smallest possible condition number over all possible diagonal +* scalings. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangular of A is stored; +* = 'L': Lower triangular of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* AB (input) COMPLEX array, dimension (LDAB,N) +* The upper or lower triangle of the Hermitian band matrix A, +* stored in the first KD+1 rows of the array. The j-th column +* of A is stored in the j-th column of the array AB as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* LDAB (input) INTEGER +* The leading dimension of the array A. LDAB >= KD+1. +* +* S (output) REAL array, dimension (N) +* If INFO = 0, S contains the scale factors for A. +* +* SCOND (output) REAL +* If INFO = 0, S contains the ratio of the smallest S(i) to +* the largest S(i). If SCOND >= 0.1 and AMAX is neither too +* large nor too small, it is not worth scaling by S. +* +* AMAX (output) REAL +* Absolute value of largest matrix element. If AMAX is very +* close to overflow or very close to underflow, the matrix +* should be scaled. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, the i-th diagonal element is nonpositive. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, J + REAL SMIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPBEQU', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SCOND = ONE + AMAX = ZERO + RETURN + END IF +* + IF( UPPER ) THEN + J = KD + 1 + ELSE + J = 1 + END IF +* +* Initialize SMIN and AMAX. +* + S( 1 ) = REAL( AB( J, 1 ) ) + SMIN = S( 1 ) + AMAX = S( 1 ) +* +* Find the minimum and maximum diagonal elements. +* + DO 10 I = 2, N + S( I ) = REAL( AB( J, I ) ) + SMIN = MIN( SMIN, S( I ) ) + AMAX = MAX( AMAX, S( I ) ) + 10 CONTINUE +* + IF( SMIN.LE.ZERO ) THEN +* +* Find the first non-positive diagonal element and return. +* + DO 20 I = 1, N + IF( S( I ).LE.ZERO ) THEN + INFO = I + RETURN + END IF + 20 CONTINUE + ELSE +* +* Set the scale factors to the reciprocals +* of the diagonal elements. +* + DO 30 I = 1, N + S( I ) = ONE / SQRT( S( I ) ) + 30 CONTINUE +* +* Compute SCOND = min(S(I)) / max(S(I)) +* + SCOND = SQRT( SMIN ) / SQRT( AMAX ) + END IF + RETURN +* +* End of CPBEQU +* + END diff --git a/costa/native/external/lapack/cpbrfs.f b/costa/native/external/lapack/cpbrfs.f new file mode 100644 index 000000000..8acd89bd8 --- /dev/null +++ b/costa/native/external/lapack/cpbrfs.f @@ -0,0 +1,342 @@ + SUBROUTINE CPBRFS( UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB, B, + $ LDB, X, LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, LDAFB, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + REAL BERR( * ), FERR( * ), RWORK( * ) + COMPLEX AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* CPBRFS improves the computed solution to a system of linear +* equations when the coefficient matrix is Hermitian positive definite +* and banded, and provides error bounds and backward error estimates +* for the solution. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AB (input) COMPLEX array, dimension (LDAB,N) +* The upper or lower triangle of the Hermitian band matrix A, +* stored in the first KD+1 rows of the array. The j-th column +* of A is stored in the j-th column of the array AB as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* AFB (input) COMPLEX array, dimension (LDAFB,N) +* The triangular factor U or L from the Cholesky factorization +* A = U**H*U or A = L*L**H of the band matrix A as computed by +* CPBTRF, in the same storage format as A (see AB). +* +* LDAFB (input) INTEGER +* The leading dimension of the array AFB. LDAFB >= KD+1. +* +* B (input) COMPLEX array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input/output) COMPLEX array, dimension (LDX,NRHS) +* On entry, the solution matrix X, as computed by CPBTRS. +* On exit, the improved solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* RWORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Internal Parameters +* =================== +* +* ITMAX is the maximum number of steps of iterative refinement. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) + REAL THREE + PARAMETER ( THREE = 3.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, J, K, KASE, L, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX ZDUM +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CHBMV, CLACON, CPBTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, MIN, REAL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDAFB.LT.KD+1 ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPBRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = MIN( N+1, 2*KD+2 ) + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL CCOPY( N, B( 1, J ), 1, WORK, 1 ) + CALL CHBMV( UPLO, N, KD, -ONE, AB, LDAB, X( 1, J ), 1, ONE, + $ WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + L = KD + 1 - K + DO 40 I = MAX( 1, K-KD ), K - 1 + RWORK( I ) = RWORK( I ) + CABS1( AB( L+I, K ) )*XK + S = S + CABS1( AB( L+I, K ) )*CABS1( X( I, J ) ) + 40 CONTINUE + RWORK( K ) = RWORK( K ) + ABS( REAL( AB( KD+1, K ) ) )* + $ XK + S + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + RWORK( K ) = RWORK( K ) + ABS( REAL( AB( 1, K ) ) )*XK + L = 1 - K + DO 60 I = K + 1, MIN( N, K+KD ) + RWORK( I ) = RWORK( I ) + CABS1( AB( L+I, K ) )*XK + S = S + CABS1( AB( L+I, K ) )*CABS1( X( I, J ) ) + 60 CONTINUE + RWORK( K ) = RWORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL CPBTRS( UPLO, N, KD, 1, AFB, LDAFB, WORK, N, INFO ) + CALL CAXPY( N, ONE, WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use CLACON to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL CLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A'). +* + CALL CPBTRS( UPLO, N, KD, 1, AFB, LDAFB, WORK, N, INFO ) + DO 110 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 120 CONTINUE + CALL CPBTRS( UPLO, N, KD, 1, AFB, LDAFB, WORK, N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of CPBRFS +* + END diff --git a/costa/native/external/lapack/cpbstf.f b/costa/native/external/lapack/cpbstf.f new file mode 100644 index 000000000..07463d4ce --- /dev/null +++ b/costa/native/external/lapack/cpbstf.f @@ -0,0 +1,264 @@ + SUBROUTINE CPBSTF( UPLO, N, KD, AB, LDAB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N +* .. +* .. Array Arguments .. + COMPLEX AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* CPBSTF computes a split Cholesky factorization of a complex +* Hermitian positive definite band matrix A. +* +* This routine is designed to be used in conjunction with CHBGST. +* +* The factorization has the form A = S**H*S where S is a band matrix +* of the same bandwidth as A and the following structure: +* +* S = ( U ) +* ( M L ) +* +* where U is upper triangular of order m = (n+kd)/2, and L is lower +* triangular of order n-m. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* AB (input/output) COMPLEX array, dimension (LDAB,N) +* On entry, the upper or lower triangle of the Hermitian band +* matrix A, stored in the first kd+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* On exit, if INFO = 0, the factor S from the split Cholesky +* factorization A = S**H*S. See Further Details. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the factorization could not be completed, +* because the updated element a(i,i) was negative; the +* matrix A is not positive definite. +* +* Further Details +* =============== +* +* The band storage scheme is illustrated by the following example, when +* N = 7, KD = 2: +* +* S = ( s11 s12 s13 ) +* ( s22 s23 s24 ) +* ( s33 s34 ) +* ( s44 ) +* ( s53 s54 s55 ) +* ( s64 s65 s66 ) +* ( s75 s76 s77 ) +* +* If UPLO = 'U', the array AB holds: +* +* on entry: on exit: +* +* * * a13 a24 a35 a46 a57 * * s13 s24 s53' s64' s75' +* * a12 a23 a34 a45 a56 a67 * s12 s23 s34 s54' s65' s76' +* a11 a22 a33 a44 a55 a66 a77 s11 s22 s33 s44 s55 s66 s77 +* +* If UPLO = 'L', the array AB holds: +* +* on entry: on exit: +* +* a11 a22 a33 a44 a55 a66 a77 s11 s22 s33 s44 s55 s66 s77 +* a21 a32 a43 a54 a65 a76 * s12' s23' s34' s54 s65 s76 * +* a31 a42 a53 a64 a64 * * s13' s24' s53 s64 s75 * * +* +* Array elements marked * are not used by the routine; s12' denotes +* conjg(s12); the diagonal elements of S are real. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, KLD, KM, M + REAL AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CHER, CLACGV, CSSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPBSTF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + KLD = MAX( 1, LDAB-1 ) +* +* Set the splitting point m. +* + M = ( N+KD ) / 2 +* + IF( UPPER ) THEN +* +* Factorize A(m+1:n,m+1:n) as L**H*L, and update A(1:m,1:m). +* + DO 10 J = N, M + 1, -1 +* +* Compute s(j,j) and test for non-positive-definiteness. +* + AJJ = REAL( AB( KD+1, J ) ) + IF( AJJ.LE.ZERO ) THEN + AB( KD+1, J ) = AJJ + GO TO 50 + END IF + AJJ = SQRT( AJJ ) + AB( KD+1, J ) = AJJ + KM = MIN( J-1, KD ) +* +* Compute elements j-km:j-1 of the j-th column and update the +* the leading submatrix within the band. +* + CALL CSSCAL( KM, ONE / AJJ, AB( KD+1-KM, J ), 1 ) + CALL CHER( 'Upper', KM, -ONE, AB( KD+1-KM, J ), 1, + $ AB( KD+1, J-KM ), KLD ) + 10 CONTINUE +* +* Factorize the updated submatrix A(1:m,1:m) as U**H*U. +* + DO 20 J = 1, M +* +* Compute s(j,j) and test for non-positive-definiteness. +* + AJJ = REAL( AB( KD+1, J ) ) + IF( AJJ.LE.ZERO ) THEN + AB( KD+1, J ) = AJJ + GO TO 50 + END IF + AJJ = SQRT( AJJ ) + AB( KD+1, J ) = AJJ + KM = MIN( KD, M-J ) +* +* Compute elements j+1:j+km of the j-th row and update the +* trailing submatrix within the band. +* + IF( KM.GT.0 ) THEN + CALL CSSCAL( KM, ONE / AJJ, AB( KD, J+1 ), KLD ) + CALL CLACGV( KM, AB( KD, J+1 ), KLD ) + CALL CHER( 'Upper', KM, -ONE, AB( KD, J+1 ), KLD, + $ AB( KD+1, J+1 ), KLD ) + CALL CLACGV( KM, AB( KD, J+1 ), KLD ) + END IF + 20 CONTINUE + ELSE +* +* Factorize A(m+1:n,m+1:n) as L**H*L, and update A(1:m,1:m). +* + DO 30 J = N, M + 1, -1 +* +* Compute s(j,j) and test for non-positive-definiteness. +* + AJJ = REAL( AB( 1, J ) ) + IF( AJJ.LE.ZERO ) THEN + AB( 1, J ) = AJJ + GO TO 50 + END IF + AJJ = SQRT( AJJ ) + AB( 1, J ) = AJJ + KM = MIN( J-1, KD ) +* +* Compute elements j-km:j-1 of the j-th row and update the +* trailing submatrix within the band. +* + CALL CSSCAL( KM, ONE / AJJ, AB( KM+1, J-KM ), KLD ) + CALL CLACGV( KM, AB( KM+1, J-KM ), KLD ) + CALL CHER( 'Lower', KM, -ONE, AB( KM+1, J-KM ), KLD, + $ AB( 1, J-KM ), KLD ) + CALL CLACGV( KM, AB( KM+1, J-KM ), KLD ) + 30 CONTINUE +* +* Factorize the updated submatrix A(1:m,1:m) as U**H*U. +* + DO 40 J = 1, M +* +* Compute s(j,j) and test for non-positive-definiteness. +* + AJJ = REAL( AB( 1, J ) ) + IF( AJJ.LE.ZERO ) THEN + AB( 1, J ) = AJJ + GO TO 50 + END IF + AJJ = SQRT( AJJ ) + AB( 1, J ) = AJJ + KM = MIN( KD, M-J ) +* +* Compute elements j+1:j+km of the j-th column and update the +* trailing submatrix within the band. +* + IF( KM.GT.0 ) THEN + CALL CSSCAL( KM, ONE / AJJ, AB( 2, J ), 1 ) + CALL CHER( 'Lower', KM, -ONE, AB( 2, J ), 1, + $ AB( 1, J+1 ), KLD ) + END IF + 40 CONTINUE + END IF + RETURN +* + 50 CONTINUE + INFO = J + RETURN +* +* End of CPBSTF +* + END diff --git a/costa/native/external/lapack/cpbsv.f b/costa/native/external/lapack/cpbsv.f new file mode 100644 index 000000000..61d7f164e --- /dev/null +++ b/costa/native/external/lapack/cpbsv.f @@ -0,0 +1,152 @@ + SUBROUTINE CPBSV( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX AB( LDAB, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* CPBSV computes the solution to a complex system of linear equations +* A * X = B, +* where A is an N-by-N Hermitian positive definite band matrix and X +* and B are N-by-NRHS matrices. +* +* The Cholesky decomposition is used to factor A as +* A = U**H * U, if UPLO = 'U', or +* A = L * L**H, if UPLO = 'L', +* where U is an upper triangular band matrix, and L is a lower +* triangular band matrix, with the same number of superdiagonals or +* subdiagonals as A. The factored form of A is then used to solve the +* system of equations A * X = B. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AB (input/output) COMPLEX array, dimension (LDAB,N) +* On entry, the upper or lower triangle of the Hermitian band +* matrix A, stored in the first KD+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(KD+1+i-j,j) = A(i,j) for max(1,j-KD)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(N,j+KD). +* See below for further details. +* +* On exit, if INFO = 0, the triangular factor U or L from the +* Cholesky factorization A = U**H*U or A = L*L**H of the band +* matrix A, in the same storage format as A. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the leading minor of order i of A is not +* positive definite, so the factorization could not be +* completed, and the solution has not been computed. +* +* Further Details +* =============== +* +* The band storage scheme is illustrated by the following example, when +* N = 6, KD = 2, and UPLO = 'U': +* +* On entry: On exit: +* +* * * a13 a24 a35 a46 * * u13 u24 u35 u46 +* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +* +* Similarly, if UPLO = 'L' the format of A is as follows: +* +* On entry: On exit: +* +* a11 a22 a33 a44 a55 a66 l11 l22 l33 l44 l55 l66 +* a21 a32 a43 a54 a65 * l21 l32 l43 l54 l65 * +* a31 a42 a53 a64 * * l31 l42 l53 l64 * * +* +* Array elements marked * are not used by the routine. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CPBTRF, CPBTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPBSV ', -INFO ) + RETURN + END IF +* +* Compute the Cholesky factorization A = U'*U or A = L*L'. +* + CALL CPBTRF( UPLO, N, KD, AB, LDAB, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL CPBTRS( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO ) +* + END IF + RETURN +* +* End of CPBSV +* + END diff --git a/costa/native/external/lapack/cpbsvx.f b/costa/native/external/lapack/cpbsvx.f new file mode 100644 index 000000000..c43403437 --- /dev/null +++ b/costa/native/external/lapack/cpbsvx.f @@ -0,0 +1,423 @@ + SUBROUTINE CPBSVX( FACT, UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB, + $ EQUED, S, B, LDB, X, LDX, RCOND, FERR, BERR, + $ WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, UPLO + INTEGER INFO, KD, LDAB, LDAFB, LDB, LDX, N, NRHS + REAL RCOND +* .. +* .. Array Arguments .. + REAL BERR( * ), FERR( * ), RWORK( * ), S( * ) + COMPLEX AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* CPBSVX uses the Cholesky factorization A = U**H*U or A = L*L**H to +* compute the solution to a complex system of linear equations +* A * X = B, +* where A is an N-by-N Hermitian positive definite band matrix and X +* and B are N-by-NRHS matrices. +* +* Error bounds on the solution and a condition estimate are also +* provided. +* +* Description +* =========== +* +* The following steps are performed: +* +* 1. If FACT = 'E', real scaling factors are computed to equilibrate +* the system: +* diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B +* Whether or not the system will be equilibrated depends on the +* scaling of the matrix A, but if equilibration is used, A is +* overwritten by diag(S)*A*diag(S) and B by diag(S)*B. +* +* 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to +* factor the matrix A (after equilibration if FACT = 'E') as +* A = U**H * U, if UPLO = 'U', or +* A = L * L**H, if UPLO = 'L', +* where U is an upper triangular band matrix, and L is a lower +* triangular band matrix. +* +* 3. If the leading i-by-i principal minor is not positive definite, +* then the routine returns with INFO = i. Otherwise, the factored +* form of A is used to estimate the condition number of the matrix +* A. If the reciprocal of the condition number is less than machine +* precision, INFO = N+1 is returned as a warning, but the routine +* still goes on to solve for X and compute error bounds as +* described below. +* +* 4. The system of equations is solved for X using the factored form +* of A. +* +* 5. Iterative refinement is applied to improve the computed solution +* matrix and calculate error bounds and backward error estimates +* for it. +* +* 6. If equilibration was used, the matrix X is premultiplied by +* diag(S) so that it solves the original system before +* equilibration. +* +* Arguments +* ========= +* +* FACT (input) CHARACTER*1 +* Specifies whether or not the factored form of the matrix A is +* supplied on entry, and if not, whether the matrix A should be +* equilibrated before it is factored. +* = 'F': On entry, AFB contains the factored form of A. +* If EQUED = 'Y', the matrix A has been equilibrated +* with scaling factors given by S. AB and AFB will not +* be modified. +* = 'N': The matrix A will be copied to AFB and factored. +* = 'E': The matrix A will be equilibrated if necessary, then +* copied to AFB and factored. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* NRHS (input) INTEGER +* The number of right-hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AB (input/output) COMPLEX array, dimension (LDAB,N) +* On entry, the upper or lower triangle of the Hermitian band +* matrix A, stored in the first KD+1 rows of the array, except +* if FACT = 'F' and EQUED = 'Y', then A must contain the +* equilibrated matrix diag(S)*A*diag(S). The j-th column of A +* is stored in the j-th column of the array AB as follows: +* if UPLO = 'U', AB(KD+1+i-j,j) = A(i,j) for max(1,j-KD)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(N,j+KD). +* See below for further details. +* +* On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by +* diag(S)*A*diag(S). +* +* LDAB (input) INTEGER +* The leading dimension of the array A. LDAB >= KD+1. +* +* AFB (input or output) COMPLEX array, dimension (LDAFB,N) +* If FACT = 'F', then AFB is an input argument and on entry +* contains the triangular factor U or L from the Cholesky +* factorization A = U**H*U or A = L*L**H of the band matrix +* A, in the same storage format as A (see AB). If EQUED = 'Y', +* then AFB is the factored form of the equilibrated matrix A. +* +* If FACT = 'N', then AFB is an output argument and on exit +* returns the triangular factor U or L from the Cholesky +* factorization A = U**H*U or A = L*L**H. +* +* If FACT = 'E', then AFB is an output argument and on exit +* returns the triangular factor U or L from the Cholesky +* factorization A = U**H*U or A = L*L**H of the equilibrated +* matrix A (see the description of A for the form of the +* equilibrated matrix). +* +* LDAFB (input) INTEGER +* The leading dimension of the array AFB. LDAFB >= KD+1. +* +* EQUED (input or output) CHARACTER*1 +* Specifies the form of equilibration that was done. +* = 'N': No equilibration (always true if FACT = 'N'). +* = 'Y': Equilibration was done, i.e., A has been replaced by +* diag(S) * A * diag(S). +* EQUED is an input argument if FACT = 'F'; otherwise, it is an +* output argument. +* +* S (input or output) REAL array, dimension (N) +* The scale factors for A; not accessed if EQUED = 'N'. S is +* an input argument if FACT = 'F'; otherwise, S is an output +* argument. If FACT = 'F' and EQUED = 'Y', each element of S +* must be positive. +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y', +* B is overwritten by diag(S) * B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (output) COMPLEX array, dimension (LDX,NRHS) +* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to +* the original system of equations. Note that if EQUED = 'Y', +* A and B are modified on exit, and the solution to the +* equilibrated system is inv(diag(S))*X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* RCOND (output) REAL +* The estimate of the reciprocal condition number of the matrix +* A after equilibration (if done). If RCOND is less than the +* machine precision (in particular, if RCOND = 0), the matrix +* is singular to working precision. This condition is +* indicated by a return code of INFO > 0. +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* RWORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is +* <= N: the leading minor of order i of A is +* not positive definite, so the factorization +* could not be completed, and the solution has not +* been computed. RCOND = 0 is returned. +* = N+1: U is nonsingular, but RCOND is less than machine +* precision, meaning that the matrix is singular +* to working precision. Nevertheless, the +* solution and error bounds are computed because +* there are a number of situations where the +* computed solution can be more accurate than the +* value of RCOND would suggest. +* +* Further Details +* =============== +* +* The band storage scheme is illustrated by the following example, when +* N = 6, KD = 2, and UPLO = 'U': +* +* Two-dimensional storage of the Hermitian matrix A: +* +* a11 a12 a13 +* a22 a23 a24 +* a33 a34 a35 +* a44 a45 a46 +* a55 a56 +* (aij=conjg(aji)) a66 +* +* Band storage of the upper triangle of A: +* +* * * a13 a24 a35 a46 +* * a12 a23 a34 a45 a56 +* a11 a22 a33 a44 a55 a66 +* +* Similarly, if UPLO = 'L' the format of A is as follows: +* +* a11 a22 a33 a44 a55 a66 +* a21 a32 a43 a54 a65 * +* a31 a42 a53 a64 * * +* +* Array elements marked * are not used by the routine. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL EQUIL, NOFACT, RCEQU, UPPER + INTEGER I, INFEQU, J, J1, J2 + REAL AMAX, ANORM, BIGNUM, SCOND, SMAX, SMIN, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANHB, SLAMCH + EXTERNAL LSAME, CLANHB, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CLACPY, CLAQHB, CPBCON, CPBEQU, CPBRFS, + $ CPBTRF, CPBTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + UPPER = LSAME( UPLO, 'U' ) + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + RCEQU = .FALSE. + ELSE + RCEQU = LSAME( EQUED, 'Y' ) + SMLNUM = SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + END IF +* +* Test the input parameters. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) ) + $ THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -7 + ELSE IF( LDAFB.LT.KD+1 ) THEN + INFO = -9 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -10 + ELSE + IF( RCEQU ) THEN + SMIN = BIGNUM + SMAX = ZERO + DO 10 J = 1, N + SMIN = MIN( SMIN, S( J ) ) + SMAX = MAX( SMAX, S( J ) ) + 10 CONTINUE + IF( SMIN.LE.ZERO ) THEN + INFO = -11 + ELSE IF( N.GT.0 ) THEN + SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) + ELSE + SCOND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -15 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPBSVX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL CPBEQU( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL CLAQHB( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, EQUED ) + RCEQU = LSAME( EQUED, 'Y' ) + END IF + END IF +* +* Scale the right-hand side. +* + IF( RCEQU ) THEN + DO 30 J = 1, NRHS + DO 20 I = 1, N + B( I, J ) = S( I )*B( I, J ) + 20 CONTINUE + 30 CONTINUE + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the Cholesky factorization A = U'*U or A = L*L'. +* + IF( UPPER ) THEN + DO 40 J = 1, N + J1 = MAX( J-KD, 1 ) + CALL CCOPY( J-J1+1, AB( KD+1-J+J1, J ), 1, + $ AFB( KD+1-J+J1, J ), 1 ) + 40 CONTINUE + ELSE + DO 50 J = 1, N + J2 = MIN( J+KD, N ) + CALL CCOPY( J2-J+1, AB( 1, J ), 1, AFB( 1, J ), 1 ) + 50 CONTINUE + END IF +* + CALL CPBTRF( UPLO, N, KD, AFB, LDAFB, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN + IF( INFO.GT.0 ) + $ RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = CLANHB( '1', UPLO, N, KD, AB, LDAB, RWORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL CPBCON( UPLO, N, KD, AFB, LDAFB, ANORM, RCOND, WORK, RWORK, + $ INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* +* Compute the solution matrix X. +* + CALL CLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL CPBTRS( UPLO, N, KD, NRHS, AFB, LDAFB, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL CPBRFS( UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB, B, LDB, X, + $ LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* Transform the solution matrix X to a solution of the original +* system. +* + IF( RCEQU ) THEN + DO 70 J = 1, NRHS + DO 60 I = 1, N + X( I, J ) = S( I )*X( I, J ) + 60 CONTINUE + 70 CONTINUE + DO 80 J = 1, NRHS + FERR( J ) = FERR( J ) / SCOND + 80 CONTINUE + END IF +* + RETURN +* +* End of CPBSVX +* + END diff --git a/costa/native/external/lapack/cpbtf2.f b/costa/native/external/lapack/cpbtf2.f new file mode 100644 index 000000000..c6edd2637 --- /dev/null +++ b/costa/native/external/lapack/cpbtf2.f @@ -0,0 +1,201 @@ + SUBROUTINE CPBTF2( UPLO, N, KD, AB, LDAB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N +* .. +* .. Array Arguments .. + COMPLEX AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* CPBTF2 computes the Cholesky factorization of a complex Hermitian +* positive definite band matrix A. +* +* The factorization has the form +* A = U' * U , if UPLO = 'U', or +* A = L * L', if UPLO = 'L', +* where U is an upper triangular matrix, U' is the conjugate transpose +* of U, and L is lower triangular. +* +* This is the unblocked version of the algorithm, calling Level 2 BLAS. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* Hermitian matrix A is stored: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of super-diagonals of the matrix A if UPLO = 'U', +* or the number of sub-diagonals if UPLO = 'L'. KD >= 0. +* +* AB (input/output) COMPLEX array, dimension (LDAB,N) +* On entry, the upper or lower triangle of the Hermitian band +* matrix A, stored in the first KD+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* On exit, if INFO = 0, the triangular factor U or L from the +* Cholesky factorization A = U'*U or A = L*L' of the band +* matrix A, in the same storage format as A. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* > 0: if INFO = k, the leading minor of order k is not +* positive definite, and the factorization could not be +* completed. +* +* Further Details +* =============== +* +* The band storage scheme is illustrated by the following example, when +* N = 6, KD = 2, and UPLO = 'U': +* +* On entry: On exit: +* +* * * a13 a24 a35 a46 * * u13 u24 u35 u46 +* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +* +* Similarly, if UPLO = 'L' the format of A is as follows: +* +* On entry: On exit: +* +* a11 a22 a33 a44 a55 a66 l11 l22 l33 l44 l55 l66 +* a21 a32 a43 a54 a65 * l21 l32 l43 l54 l65 * +* a31 a42 a53 a64 * * l31 l42 l53 l64 * * +* +* Array elements marked * are not used by the routine. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, KLD, KN + REAL AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CHER, CLACGV, CSSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPBTF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + KLD = MAX( 1, LDAB-1 ) +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U'*U. +* + DO 10 J = 1, N +* +* Compute U(J,J) and test for non-positive-definiteness. +* + AJJ = REAL( AB( KD+1, J ) ) + IF( AJJ.LE.ZERO ) THEN + AB( KD+1, J ) = AJJ + GO TO 30 + END IF + AJJ = SQRT( AJJ ) + AB( KD+1, J ) = AJJ +* +* Compute elements J+1:J+KN of row J and update the +* trailing submatrix within the band. +* + KN = MIN( KD, N-J ) + IF( KN.GT.0 ) THEN + CALL CSSCAL( KN, ONE / AJJ, AB( KD, J+1 ), KLD ) + CALL CLACGV( KN, AB( KD, J+1 ), KLD ) + CALL CHER( 'Upper', KN, -ONE, AB( KD, J+1 ), KLD, + $ AB( KD+1, J+1 ), KLD ) + CALL CLACGV( KN, AB( KD, J+1 ), KLD ) + END IF + 10 CONTINUE + ELSE +* +* Compute the Cholesky factorization A = L*L'. +* + DO 20 J = 1, N +* +* Compute L(J,J) and test for non-positive-definiteness. +* + AJJ = REAL( AB( 1, J ) ) + IF( AJJ.LE.ZERO ) THEN + AB( 1, J ) = AJJ + GO TO 30 + END IF + AJJ = SQRT( AJJ ) + AB( 1, J ) = AJJ +* +* Compute elements J+1:J+KN of column J and update the +* trailing submatrix within the band. +* + KN = MIN( KD, N-J ) + IF( KN.GT.0 ) THEN + CALL CSSCAL( KN, ONE / AJJ, AB( 2, J ), 1 ) + CALL CHER( 'Lower', KN, -ONE, AB( 2, J ), 1, + $ AB( 1, J+1 ), KLD ) + END IF + 20 CONTINUE + END IF + RETURN +* + 30 CONTINUE + INFO = J + RETURN +* +* End of CPBTF2 +* + END diff --git a/costa/native/external/lapack/cpbtrf.f b/costa/native/external/lapack/cpbtrf.f new file mode 100644 index 000000000..102257dd9 --- /dev/null +++ b/costa/native/external/lapack/cpbtrf.f @@ -0,0 +1,372 @@ + SUBROUTINE CPBTRF( UPLO, N, KD, AB, LDAB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N +* .. +* .. Array Arguments .. + COMPLEX AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* CPBTRF computes the Cholesky factorization of a complex Hermitian +* positive definite band matrix A. +* +* The factorization has the form +* A = U**H * U, if UPLO = 'U', or +* A = L * L**H, if UPLO = 'L', +* where U is an upper triangular matrix and L is lower triangular. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* AB (input/output) COMPLEX array, dimension (LDAB,N) +* On entry, the upper or lower triangle of the Hermitian band +* matrix A, stored in the first KD+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* On exit, if INFO = 0, the triangular factor U or L from the +* Cholesky factorization A = U**H*U or A = L*L**H of the band +* matrix A, in the same storage format as A. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the leading minor of order i is not +* positive definite, and the factorization could not be +* completed. +* +* Further Details +* =============== +* +* The band storage scheme is illustrated by the following example, when +* N = 6, KD = 2, and UPLO = 'U': +* +* On entry: On exit: +* +* * * a13 a24 a35 a46 * * u13 u24 u35 u46 +* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +* +* Similarly, if UPLO = 'L' the format of A is as follows: +* +* On entry: On exit: +* +* a11 a22 a33 a44 a55 a66 l11 l22 l33 l44 l55 l66 +* a21 a32 a43 a54 a65 * l21 l32 l43 l54 l65 * +* a31 a42 a53 a64 * * l31 l42 l53 l64 * * +* +* Array elements marked * are not used by the routine. +* +* Contributed by +* Peter Mayes and Giuseppe Radicati, IBM ECSEC, Rome, March 23, 1989 +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) + INTEGER NBMAX, LDWORK + PARAMETER ( NBMAX = 32, LDWORK = NBMAX+1 ) +* .. +* .. Local Scalars .. + INTEGER I, I2, I3, IB, II, J, JJ, NB +* .. +* .. Local Arrays .. + COMPLEX WORK( LDWORK, NBMAX ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CGEMM, CHERK, CPBTF2, CPOTF2, CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( ( .NOT.LSAME( UPLO, 'U' ) ) .AND. + $ ( .NOT.LSAME( UPLO, 'L' ) ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPBTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment +* + NB = ILAENV( 1, 'CPBTRF', UPLO, N, KD, -1, -1 ) +* +* The block size must not exceed the semi-bandwidth KD, and must not +* exceed the limit set by the size of the local array WORK. +* + NB = MIN( NB, NBMAX ) +* + IF( NB.LE.1 .OR. NB.GT.KD ) THEN +* +* Use unblocked code +* + CALL CPBTF2( UPLO, N, KD, AB, LDAB, INFO ) + ELSE +* +* Use blocked code +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Compute the Cholesky factorization of a Hermitian band +* matrix, given the upper triangle of the matrix in band +* storage. +* +* Zero the upper triangle of the work array. +* + DO 20 J = 1, NB + DO 10 I = 1, J - 1 + WORK( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* Process the band matrix one diagonal block at a time. +* + DO 70 I = 1, N, NB + IB = MIN( NB, N-I+1 ) +* +* Factorize the diagonal block +* + CALL CPOTF2( UPLO, IB, AB( KD+1, I ), LDAB-1, II ) + IF( II.NE.0 ) THEN + INFO = I + II - 1 + GO TO 150 + END IF + IF( I+IB.LE.N ) THEN +* +* Update the relevant part of the trailing submatrix. +* If A11 denotes the diagonal block which has just been +* factorized, then we need to update the remaining +* blocks in the diagram: +* +* A11 A12 A13 +* A22 A23 +* A33 +* +* The numbers of rows and columns in the partitioning +* are IB, I2, I3 respectively. The blocks A12, A22 and +* A23 are empty if IB = KD. The upper triangle of A13 +* lies outside the band. +* + I2 = MIN( KD-IB, N-I-IB+1 ) + I3 = MIN( IB, N-I-KD+1 ) +* + IF( I2.GT.0 ) THEN +* +* Update A12 +* + CALL CTRSM( 'Left', 'Upper', 'Conjugate transpose', + $ 'Non-unit', IB, I2, CONE, + $ AB( KD+1, I ), LDAB-1, + $ AB( KD+1-IB, I+IB ), LDAB-1 ) +* +* Update A22 +* + CALL CHERK( 'Upper', 'Conjugate transpose', I2, IB, + $ -ONE, AB( KD+1-IB, I+IB ), LDAB-1, ONE, + $ AB( KD+1, I+IB ), LDAB-1 ) + END IF +* + IF( I3.GT.0 ) THEN +* +* Copy the lower triangle of A13 into the work array. +* + DO 40 JJ = 1, I3 + DO 30 II = JJ, IB + WORK( II, JJ ) = AB( II-JJ+1, JJ+I+KD-1 ) + 30 CONTINUE + 40 CONTINUE +* +* Update A13 (in the work array). +* + CALL CTRSM( 'Left', 'Upper', 'Conjugate transpose', + $ 'Non-unit', IB, I3, CONE, + $ AB( KD+1, I ), LDAB-1, WORK, LDWORK ) +* +* Update A23 +* + IF( I2.GT.0 ) + $ CALL CGEMM( 'Conjugate transpose', + $ 'No transpose', I2, I3, IB, -CONE, + $ AB( KD+1-IB, I+IB ), LDAB-1, WORK, + $ LDWORK, CONE, AB( 1+IB, I+KD ), + $ LDAB-1 ) +* +* Update A33 +* + CALL CHERK( 'Upper', 'Conjugate transpose', I3, IB, + $ -ONE, WORK, LDWORK, ONE, + $ AB( KD+1, I+KD ), LDAB-1 ) +* +* Copy the lower triangle of A13 back into place. +* + DO 60 JJ = 1, I3 + DO 50 II = JJ, IB + AB( II-JJ+1, JJ+I+KD-1 ) = WORK( II, JJ ) + 50 CONTINUE + 60 CONTINUE + END IF + END IF + 70 CONTINUE + ELSE +* +* Compute the Cholesky factorization of a Hermitian band +* matrix, given the lower triangle of the matrix in band +* storage. +* +* Zero the lower triangle of the work array. +* + DO 90 J = 1, NB + DO 80 I = J + 1, NB + WORK( I, J ) = ZERO + 80 CONTINUE + 90 CONTINUE +* +* Process the band matrix one diagonal block at a time. +* + DO 140 I = 1, N, NB + IB = MIN( NB, N-I+1 ) +* +* Factorize the diagonal block +* + CALL CPOTF2( UPLO, IB, AB( 1, I ), LDAB-1, II ) + IF( II.NE.0 ) THEN + INFO = I + II - 1 + GO TO 150 + END IF + IF( I+IB.LE.N ) THEN +* +* Update the relevant part of the trailing submatrix. +* If A11 denotes the diagonal block which has just been +* factorized, then we need to update the remaining +* blocks in the diagram: +* +* A11 +* A21 A22 +* A31 A32 A33 +* +* The numbers of rows and columns in the partitioning +* are IB, I2, I3 respectively. The blocks A21, A22 and +* A32 are empty if IB = KD. The lower triangle of A31 +* lies outside the band. +* + I2 = MIN( KD-IB, N-I-IB+1 ) + I3 = MIN( IB, N-I-KD+1 ) +* + IF( I2.GT.0 ) THEN +* +* Update A21 +* + CALL CTRSM( 'Right', 'Lower', + $ 'Conjugate transpose', 'Non-unit', I2, + $ IB, CONE, AB( 1, I ), LDAB-1, + $ AB( 1+IB, I ), LDAB-1 ) +* +* Update A22 +* + CALL CHERK( 'Lower', 'No transpose', I2, IB, -ONE, + $ AB( 1+IB, I ), LDAB-1, ONE, + $ AB( 1, I+IB ), LDAB-1 ) + END IF +* + IF( I3.GT.0 ) THEN +* +* Copy the upper triangle of A31 into the work array. +* + DO 110 JJ = 1, IB + DO 100 II = 1, MIN( JJ, I3 ) + WORK( II, JJ ) = AB( KD+1-JJ+II, JJ+I-1 ) + 100 CONTINUE + 110 CONTINUE +* +* Update A31 (in the work array). +* + CALL CTRSM( 'Right', 'Lower', + $ 'Conjugate transpose', 'Non-unit', I3, + $ IB, CONE, AB( 1, I ), LDAB-1, WORK, + $ LDWORK ) +* +* Update A32 +* + IF( I2.GT.0 ) + $ CALL CGEMM( 'No transpose', + $ 'Conjugate transpose', I3, I2, IB, + $ -CONE, WORK, LDWORK, AB( 1+IB, I ), + $ LDAB-1, CONE, AB( 1+KD-IB, I+IB ), + $ LDAB-1 ) +* +* Update A33 +* + CALL CHERK( 'Lower', 'No transpose', I3, IB, -ONE, + $ WORK, LDWORK, ONE, AB( 1, I+KD ), + $ LDAB-1 ) +* +* Copy the upper triangle of A31 back into place. +* + DO 130 JJ = 1, IB + DO 120 II = 1, MIN( JJ, I3 ) + AB( KD+1-JJ+II, JJ+I-1 ) = WORK( II, JJ ) + 120 CONTINUE + 130 CONTINUE + END IF + END IF + 140 CONTINUE + END IF + END IF + RETURN +* + 150 CONTINUE + RETURN +* +* End of CPBTRF +* + END diff --git a/costa/native/external/lapack/cpbtrs.f b/costa/native/external/lapack/cpbtrs.f new file mode 100644 index 000000000..e86b69565 --- /dev/null +++ b/costa/native/external/lapack/cpbtrs.f @@ -0,0 +1,146 @@ + SUBROUTINE CPBTRS( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX AB( LDAB, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* CPBTRS solves a system of linear equations A*X = B with a Hermitian +* positive definite band matrix A using the Cholesky factorization +* A = U**H*U or A = L*L**H computed by CPBTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangular factor stored in AB; +* = 'L': Lower triangular factor stored in AB. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AB (input) COMPLEX array, dimension (LDAB,N) +* The triangular factor U or L from the Cholesky factorization +* A = U**H*U or A = L*L**H of the band matrix A, stored in the +* first KD+1 rows of the array. The j-th column of U or L is +* stored in the j-th column of the array AB as follows: +* if UPLO ='U', AB(kd+1+i-j,j) = U(i,j) for max(1,j-kd)<=i<=j; +* if UPLO ='L', AB(1+i-j,j) = L(i,j) for j<=i<=min(n,j+kd). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CTBSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPBTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B where A = U'*U. +* + DO 10 J = 1, NRHS +* +* Solve U'*X = B, overwriting B with X. +* + CALL CTBSV( 'Upper', 'Conjugate transpose', 'Non-unit', N, + $ KD, AB, LDAB, B( 1, J ), 1 ) +* +* Solve U*X = B, overwriting B with X. +* + CALL CTBSV( 'Upper', 'No transpose', 'Non-unit', N, KD, AB, + $ LDAB, B( 1, J ), 1 ) + 10 CONTINUE + ELSE +* +* Solve A*X = B where A = L*L'. +* + DO 20 J = 1, NRHS +* +* Solve L*X = B, overwriting B with X. +* + CALL CTBSV( 'Lower', 'No transpose', 'Non-unit', N, KD, AB, + $ LDAB, B( 1, J ), 1 ) +* +* Solve L'*X = B, overwriting B with X. +* + CALL CTBSV( 'Lower', 'Conjugate transpose', 'Non-unit', N, + $ KD, AB, LDAB, B( 1, J ), 1 ) + 20 CONTINUE + END IF +* + RETURN +* +* End of CPBTRS +* + END diff --git a/costa/native/external/lapack/cpocon.f b/costa/native/external/lapack/cpocon.f new file mode 100644 index 000000000..90eeb4ae6 --- /dev/null +++ b/costa/native/external/lapack/cpocon.f @@ -0,0 +1,180 @@ + SUBROUTINE CPOCON( UPLO, N, A, LDA, ANORM, RCOND, WORK, RWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + REAL RWORK( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CPOCON estimates the reciprocal of the condition number (in the +* 1-norm) of a complex Hermitian positive definite matrix using the +* Cholesky factorization A = U**H*U or A = L*L**H computed by CPOTRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input) COMPLEX array, dimension (LDA,N) +* The triangular factor U or L from the Cholesky factorization +* A = U**H*U or A = L*L**H, as computed by CPOTRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* ANORM (input) REAL +* The 1-norm (or infinity-norm) of the Hermitian matrix A. +* +* RCOND (output) REAL +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +* estimate of the 1-norm of inv(A) computed in this routine. +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* RWORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + CHARACTER NORMIN + INTEGER IX, KASE + REAL AINVNM, SCALE, SCALEL, SCALEU, SMLNUM + COMPLEX ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + REAL SLAMCH + EXTERNAL LSAME, ICAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CLACON, CLATRS, CSRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPOCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = SLAMCH( 'Safe minimum' ) +* +* Estimate the 1-norm of inv(A). +* + KASE = 0 + NORMIN = 'N' + 10 CONTINUE + CALL CLACON( N, WORK( N+1 ), WORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN + IF( UPPER ) THEN +* +* Multiply by inv(U'). +* + CALL CLATRS( 'Upper', 'Conjugate transpose', 'Non-unit', + $ NORMIN, N, A, LDA, WORK, SCALEL, RWORK, INFO ) + NORMIN = 'Y' +* +* Multiply by inv(U). +* + CALL CLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ A, LDA, WORK, SCALEU, RWORK, INFO ) + ELSE +* +* Multiply by inv(L). +* + CALL CLATRS( 'Lower', 'No transpose', 'Non-unit', NORMIN, N, + $ A, LDA, WORK, SCALEL, RWORK, INFO ) + NORMIN = 'Y' +* +* Multiply by inv(L'). +* + CALL CLATRS( 'Lower', 'Conjugate transpose', 'Non-unit', + $ NORMIN, N, A, LDA, WORK, SCALEU, RWORK, INFO ) + END IF +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + SCALE = SCALEL*SCALEU + IF( SCALE.NE.ONE ) THEN + IX = ICAMAX( N, WORK, 1 ) + IF( SCALE.LT.CABS1( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL CSRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE + RETURN +* +* End of CPOCON +* + END diff --git a/costa/native/external/lapack/cpoequ.f b/costa/native/external/lapack/cpoequ.f new file mode 100644 index 000000000..353efbd0b --- /dev/null +++ b/costa/native/external/lapack/cpoequ.f @@ -0,0 +1,138 @@ + SUBROUTINE CPOEQU( N, A, LDA, S, SCOND, AMAX, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, N + REAL AMAX, SCOND +* .. +* .. Array Arguments .. + REAL S( * ) + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CPOEQU computes row and column scalings intended to equilibrate a +* Hermitian positive definite matrix A and reduce its condition number +* (with respect to the two-norm). S contains the scale factors, +* S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with +* elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This +* choice of S puts the condition number of B within a factor N of the +* smallest possible condition number over all possible diagonal +* scalings. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input) COMPLEX array, dimension (LDA,N) +* The N-by-N Hermitian positive definite matrix whose scaling +* factors are to be computed. Only the diagonal elements of A +* are referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* S (output) REAL array, dimension (N) +* If INFO = 0, S contains the scale factors for A. +* +* SCOND (output) REAL +* If INFO = 0, S contains the ratio of the smallest S(i) to +* the largest S(i). If SCOND >= 0.1 and AMAX is neither too +* large nor too small, it is not worth scaling by S. +* +* AMAX (output) REAL +* Absolute value of largest matrix element. If AMAX is very +* close to overflow or very close to underflow, the matrix +* should be scaled. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the i-th diagonal element is nonpositive. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I + REAL SMIN +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPOEQU', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SCOND = ONE + AMAX = ZERO + RETURN + END IF +* +* Find the minimum and maximum diagonal elements. +* + S( 1 ) = REAL( A( 1, 1 ) ) + SMIN = S( 1 ) + AMAX = S( 1 ) + DO 10 I = 2, N + S( I ) = REAL( A( I, I ) ) + SMIN = MIN( SMIN, S( I ) ) + AMAX = MAX( AMAX, S( I ) ) + 10 CONTINUE +* + IF( SMIN.LE.ZERO ) THEN +* +* Find the first non-positive diagonal element and return. +* + DO 20 I = 1, N + IF( S( I ).LE.ZERO ) THEN + INFO = I + RETURN + END IF + 20 CONTINUE + ELSE +* +* Set the scale factors to the reciprocals +* of the diagonal elements. +* + DO 30 I = 1, N + S( I ) = ONE / SQRT( S( I ) ) + 30 CONTINUE +* +* Compute SCOND = min(S(I)) / max(S(I)) +* + SCOND = SQRT( SMIN ) / SQRT( AMAX ) + END IF + RETURN +* +* End of CPOEQU +* + END diff --git a/costa/native/external/lapack/cporfs.f b/costa/native/external/lapack/cporfs.f new file mode 100644 index 000000000..d0c18012c --- /dev/null +++ b/costa/native/external/lapack/cporfs.f @@ -0,0 +1,333 @@ + SUBROUTINE CPORFS( UPLO, N, NRHS, A, LDA, AF, LDAF, B, LDB, X, + $ LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + REAL BERR( * ), FERR( * ), RWORK( * ) + COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* CPORFS improves the computed solution to a system of linear +* equations when the coefficient matrix is Hermitian positive definite, +* and provides error bounds and backward error estimates for the +* solution. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input) COMPLEX array, dimension (LDA,N) +* The Hermitian matrix A. If UPLO = 'U', the leading N-by-N +* upper triangular part of A contains the upper triangular part +* of the matrix A, and the strictly lower triangular part of A +* is not referenced. If UPLO = 'L', the leading N-by-N lower +* triangular part of A contains the lower triangular part of +* the matrix A, and the strictly upper triangular part of A is +* not referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* AF (input) COMPLEX array, dimension (LDAF,N) +* The triangular factor U or L from the Cholesky factorization +* A = U**H*U or A = L*L**H, as computed by CPOTRF. +* +* LDAF (input) INTEGER +* The leading dimension of the array AF. LDAF >= max(1,N). +* +* B (input) COMPLEX array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input/output) COMPLEX array, dimension (LDX,NRHS) +* On entry, the solution matrix X, as computed by CPOTRS. +* On exit, the improved solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* RWORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Internal Parameters +* =================== +* +* ITMAX is the maximum number of steps of iterative refinement. +* +* ==================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) + REAL THREE + PARAMETER ( THREE = 3.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, J, K, KASE, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX ZDUM +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CHEMV, CLACON, CPOTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPORFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL CCOPY( N, B( 1, J ), 1, WORK, 1 ) + CALL CHEMV( UPLO, N, -ONE, A, LDA, X( 1, J ), 1, ONE, WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + DO 40 I = 1, K - 1 + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 40 CONTINUE + RWORK( K ) = RWORK( K ) + ABS( REAL( A( K, K ) ) )*XK + S + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + RWORK( K ) = RWORK( K ) + ABS( REAL( A( K, K ) ) )*XK + DO 60 I = K + 1, N + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 60 CONTINUE + RWORK( K ) = RWORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL CPOTRS( UPLO, N, 1, AF, LDAF, WORK, N, INFO ) + CALL CAXPY( N, ONE, WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use CLACON to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL CLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A'). +* + CALL CPOTRS( UPLO, N, 1, AF, LDAF, WORK, N, INFO ) + DO 110 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 120 CONTINUE + CALL CPOTRS( UPLO, N, 1, AF, LDAF, WORK, N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of CPORFS +* + END diff --git a/costa/native/external/lapack/cposv.f b/costa/native/external/lapack/cposv.f new file mode 100644 index 000000000..5c150d958 --- /dev/null +++ b/costa/native/external/lapack/cposv.f @@ -0,0 +1,122 @@ + SUBROUTINE CPOSV( UPLO, N, NRHS, A, LDA, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* CPOSV computes the solution to a complex system of linear equations +* A * X = B, +* where A is an N-by-N Hermitian positive definite matrix and X and B +* are N-by-NRHS matrices. +* +* The Cholesky decomposition is used to factor A as +* A = U**H* U, if UPLO = 'U', or +* A = L * L**H, if UPLO = 'L', +* where U is an upper triangular matrix and L is a lower triangular +* matrix. The factored form of A is then used to solve the system of +* equations A * X = B. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the leading +* N-by-N upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, if INFO = 0, the factor U or L from the Cholesky +* factorization A = U**H*U or A = L*L**H. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the leading minor of order i of A is not +* positive definite, so the factorization could not be +* completed, and the solution has not been computed. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CPOTRF, CPOTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPOSV ', -INFO ) + RETURN + END IF +* +* Compute the Cholesky factorization A = U'*U or A = L*L'. +* + CALL CPOTRF( UPLO, N, A, LDA, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL CPOTRS( UPLO, N, NRHS, A, LDA, B, LDB, INFO ) +* + END IF + RETURN +* +* End of CPOSV +* + END diff --git a/costa/native/external/lapack/cposvx.f b/costa/native/external/lapack/cposvx.f new file mode 100644 index 000000000..2ae696ca8 --- /dev/null +++ b/costa/native/external/lapack/cposvx.f @@ -0,0 +1,378 @@ + SUBROUTINE CPOSVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, EQUED, + $ S, B, LDB, X, LDX, RCOND, FERR, BERR, WORK, + $ RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS + REAL RCOND +* .. +* .. Array Arguments .. + REAL BERR( * ), FERR( * ), RWORK( * ), S( * ) + COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* CPOSVX uses the Cholesky factorization A = U**H*U or A = L*L**H to +* compute the solution to a complex system of linear equations +* A * X = B, +* where A is an N-by-N Hermitian positive definite matrix and X and B +* are N-by-NRHS matrices. +* +* Error bounds on the solution and a condition estimate are also +* provided. +* +* Description +* =========== +* +* The following steps are performed: +* +* 1. If FACT = 'E', real scaling factors are computed to equilibrate +* the system: +* diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B +* Whether or not the system will be equilibrated depends on the +* scaling of the matrix A, but if equilibration is used, A is +* overwritten by diag(S)*A*diag(S) and B by diag(S)*B. +* +* 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to +* factor the matrix A (after equilibration if FACT = 'E') as +* A = U**H* U, if UPLO = 'U', or +* A = L * L**H, if UPLO = 'L', +* where U is an upper triangular matrix and L is a lower triangular +* matrix. +* +* 3. If the leading i-by-i principal minor is not positive definite, +* then the routine returns with INFO = i. Otherwise, the factored +* form of A is used to estimate the condition number of the matrix +* A. If the reciprocal of the condition number is less than machine +* precision, INFO = N+1 is returned as a warning, but the routine +* still goes on to solve for X and compute error bounds as +* described below. +* +* 4. The system of equations is solved for X using the factored form +* of A. +* +* 5. Iterative refinement is applied to improve the computed solution +* matrix and calculate error bounds and backward error estimates +* for it. +* +* 6. If equilibration was used, the matrix X is premultiplied by +* diag(S) so that it solves the original system before +* equilibration. +* +* Arguments +* ========= +* +* FACT (input) CHARACTER*1 +* Specifies whether or not the factored form of the matrix A is +* supplied on entry, and if not, whether the matrix A should be +* equilibrated before it is factored. +* = 'F': On entry, AF contains the factored form of A. +* If EQUED = 'Y', the matrix A has been equilibrated +* with scaling factors given by S. A and AF will not +* be modified. +* = 'N': The matrix A will be copied to AF and factored. +* = 'E': The matrix A will be equilibrated if necessary, then +* copied to AF and factored. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the Hermitian matrix A, except if FACT = 'F' and +* EQUED = 'Y', then A must contain the equilibrated matrix +* diag(S)*A*diag(S). If UPLO = 'U', the leading +* N-by-N upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. A is not modified if +* FACT = 'F' or 'N', or if FACT = 'E' and EQUED = 'N' on exit. +* +* On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by +* diag(S)*A*diag(S). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* AF (input or output) COMPLEX array, dimension (LDAF,N) +* If FACT = 'F', then AF is an input argument and on entry +* contains the triangular factor U or L from the Cholesky +* factorization A = U**H*U or A = L*L**H, in the same storage +* format as A. If EQUED .ne. 'N', then AF is the factored form +* of the equilibrated matrix diag(S)*A*diag(S). +* +* If FACT = 'N', then AF is an output argument and on exit +* returns the triangular factor U or L from the Cholesky +* factorization A = U**H*U or A = L*L**H of the original +* matrix A. +* +* If FACT = 'E', then AF is an output argument and on exit +* returns the triangular factor U or L from the Cholesky +* factorization A = U**H*U or A = L*L**H of the equilibrated +* matrix A (see the description of A for the form of the +* equilibrated matrix). +* +* LDAF (input) INTEGER +* The leading dimension of the array AF. LDAF >= max(1,N). +* +* EQUED (input or output) CHARACTER*1 +* Specifies the form of equilibration that was done. +* = 'N': No equilibration (always true if FACT = 'N'). +* = 'Y': Equilibration was done, i.e., A has been replaced by +* diag(S) * A * diag(S). +* EQUED is an input argument if FACT = 'F'; otherwise, it is an +* output argument. +* +* S (input or output) REAL array, dimension (N) +* The scale factors for A; not accessed if EQUED = 'N'. S is +* an input argument if FACT = 'F'; otherwise, S is an output +* argument. If FACT = 'F' and EQUED = 'Y', each element of S +* must be positive. +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS righthand side matrix B. +* On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y', +* B is overwritten by diag(S) * B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (output) COMPLEX array, dimension (LDX,NRHS) +* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to +* the original system of equations. Note that if EQUED = 'Y', +* A and B are modified on exit, and the solution to the +* equilibrated system is inv(diag(S))*X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* RCOND (output) REAL +* The estimate of the reciprocal condition number of the matrix +* A after equilibration (if done). If RCOND is less than the +* machine precision (in particular, if RCOND = 0), the matrix +* is singular to working precision. This condition is +* indicated by a return code of INFO > 0. +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* RWORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is +* <= N: the leading minor of order i of A is +* not positive definite, so the factorization +* could not be completed, and the solution has not +* been computed. RCOND = 0 is returned. +* = N+1: U is nonsingular, but RCOND is less than machine +* precision, meaning that the matrix is singular +* to working precision. Nevertheless, the +* solution and error bounds are computed because +* there are a number of situations where the +* computed solution can be more accurate than the +* value of RCOND would suggest. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL EQUIL, NOFACT, RCEQU + INTEGER I, INFEQU, J + REAL AMAX, ANORM, BIGNUM, SCOND, SMAX, SMIN, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANHE, SLAMCH + EXTERNAL LSAME, CLANHE, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CLACPY, CLAQHE, CPOCON, CPOEQU, CPORFS, CPOTRF, + $ CPOTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + RCEQU = .FALSE. + ELSE + RCEQU = LSAME( EQUED, 'Y' ) + SMLNUM = SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + END IF +* +* Test the input parameters. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) ) + $ THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -9 + ELSE + IF( RCEQU ) THEN + SMIN = BIGNUM + SMAX = ZERO + DO 10 J = 1, N + SMIN = MIN( SMIN, S( J ) ) + SMAX = MAX( SMAX, S( J ) ) + 10 CONTINUE + IF( SMIN.LE.ZERO ) THEN + INFO = -10 + ELSE IF( N.GT.0 ) THEN + SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) + ELSE + SCOND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -14 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPOSVX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL CPOEQU( N, A, LDA, S, SCOND, AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL CLAQHE( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED ) + RCEQU = LSAME( EQUED, 'Y' ) + END IF + END IF +* +* Scale the right hand side. +* + IF( RCEQU ) THEN + DO 30 J = 1, NRHS + DO 20 I = 1, N + B( I, J ) = S( I )*B( I, J ) + 20 CONTINUE + 30 CONTINUE + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the Cholesky factorization A = U'*U or A = L*L'. +* + CALL CLACPY( UPLO, N, N, A, LDA, AF, LDAF ) + CALL CPOTRF( UPLO, N, AF, LDAF, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN + IF( INFO.GT.0 ) + $ RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = CLANHE( '1', UPLO, N, A, LDA, RWORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL CPOCON( UPLO, N, AF, LDAF, ANORM, RCOND, WORK, RWORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* +* Compute the solution matrix X. +* + CALL CLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL CPOTRS( UPLO, N, NRHS, AF, LDAF, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL CPORFS( UPLO, N, NRHS, A, LDA, AF, LDAF, B, LDB, X, LDX, + $ FERR, BERR, WORK, RWORK, INFO ) +* +* Transform the solution matrix X to a solution of the original +* system. +* + IF( RCEQU ) THEN + DO 50 J = 1, NRHS + DO 40 I = 1, N + X( I, J ) = S( I )*X( I, J ) + 40 CONTINUE + 50 CONTINUE + DO 60 J = 1, NRHS + FERR( J ) = FERR( J ) / SCOND + 60 CONTINUE + END IF +* + RETURN +* +* End of CPOSVX +* + END diff --git a/costa/native/external/lapack/cpotf2.f b/costa/native/external/lapack/cpotf2.f new file mode 100644 index 000000000..73cb0b3ea --- /dev/null +++ b/costa/native/external/lapack/cpotf2.f @@ -0,0 +1,175 @@ + SUBROUTINE CPOTF2( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CPOTF2 computes the Cholesky factorization of a complex Hermitian +* positive definite matrix A. +* +* The factorization has the form +* A = U' * U , if UPLO = 'U', or +* A = L * L', if UPLO = 'L', +* where U is an upper triangular matrix and L is lower triangular. +* +* This is the unblocked version of the algorithm, calling Level 2 BLAS. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* Hermitian matrix A is stored. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the leading +* n by n upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n by n lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, if INFO = 0, the factor U or L from the Cholesky +* factorization A = U'*U or A = L*L'. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* > 0: if INFO = k, the leading minor of order k is not +* positive definite, and the factorization could not be +* completed. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J + REAL AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX CDOTC + EXTERNAL LSAME, CDOTC +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, CLACGV, CSSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPOTF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U'*U. +* + DO 10 J = 1, N +* +* Compute U(J,J) and test for non-positive-definiteness. +* + AJJ = REAL( A( J, J ) ) - CDOTC( J-1, A( 1, J ), 1, + $ A( 1, J ), 1 ) + IF( AJJ.LE.ZERO ) THEN + A( J, J ) = AJJ + GO TO 30 + END IF + AJJ = SQRT( AJJ ) + A( J, J ) = AJJ +* +* Compute elements J+1:N of row J. +* + IF( J.LT.N ) THEN + CALL CLACGV( J-1, A( 1, J ), 1 ) + CALL CGEMV( 'Transpose', J-1, N-J, -CONE, A( 1, J+1 ), + $ LDA, A( 1, J ), 1, CONE, A( J, J+1 ), LDA ) + CALL CLACGV( J-1, A( 1, J ), 1 ) + CALL CSSCAL( N-J, ONE / AJJ, A( J, J+1 ), LDA ) + END IF + 10 CONTINUE + ELSE +* +* Compute the Cholesky factorization A = L*L'. +* + DO 20 J = 1, N +* +* Compute L(J,J) and test for non-positive-definiteness. +* + AJJ = REAL( A( J, J ) ) - CDOTC( J-1, A( J, 1 ), LDA, + $ A( J, 1 ), LDA ) + IF( AJJ.LE.ZERO ) THEN + A( J, J ) = AJJ + GO TO 30 + END IF + AJJ = SQRT( AJJ ) + A( J, J ) = AJJ +* +* Compute elements J+1:N of column J. +* + IF( J.LT.N ) THEN + CALL CLACGV( J-1, A( J, 1 ), LDA ) + CALL CGEMV( 'No transpose', N-J, J-1, -CONE, A( J+1, 1 ), + $ LDA, A( J, 1 ), LDA, CONE, A( J+1, J ), 1 ) + CALL CLACGV( J-1, A( J, 1 ), LDA ) + CALL CSSCAL( N-J, ONE / AJJ, A( J+1, J ), 1 ) + END IF + 20 CONTINUE + END IF + GO TO 40 +* + 30 CONTINUE + INFO = J +* + 40 CONTINUE + RETURN +* +* End of CPOTF2 +* + END diff --git a/costa/native/external/lapack/cpotrf.f b/costa/native/external/lapack/cpotrf.f new file mode 100644 index 000000000..bbbc6dff5 --- /dev/null +++ b/costa/native/external/lapack/cpotrf.f @@ -0,0 +1,187 @@ + SUBROUTINE CPOTRF( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CPOTRF computes the Cholesky factorization of a complex Hermitian +* positive definite matrix A. +* +* The factorization has the form +* A = U**H * U, if UPLO = 'U', or +* A = L * L**H, if UPLO = 'L', +* where U is an upper triangular matrix and L is lower triangular. +* +* This is the block version of the algorithm, calling Level 3 BLAS. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the leading +* N-by-N upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, if INFO = 0, the factor U or L from the Cholesky +* factorization A = U**H*U or A = L*L**H. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the leading minor of order i is not +* positive definite, and the factorization could not be +* completed. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + COMPLEX CONE + PARAMETER ( ONE = 1.0E+0, CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, JB, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CGEMM, CHERK, CPOTF2, CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPOTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'CPOTRF', UPLO, N, -1, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code. +* + CALL CPOTF2( UPLO, N, A, LDA, INFO ) + ELSE +* +* Use blocked code. +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U'*U. +* + DO 10 J = 1, N, NB +* +* Update and factorize the current diagonal block and test +* for non-positive-definiteness. +* + JB = MIN( NB, N-J+1 ) + CALL CHERK( 'Upper', 'Conjugate transpose', JB, J-1, + $ -ONE, A( 1, J ), LDA, ONE, A( J, J ), LDA ) + CALL CPOTF2( 'Upper', JB, A( J, J ), LDA, INFO ) + IF( INFO.NE.0 ) + $ GO TO 30 + IF( J+JB.LE.N ) THEN +* +* Compute the current block row. +* + CALL CGEMM( 'Conjugate transpose', 'No transpose', JB, + $ N-J-JB+1, J-1, -CONE, A( 1, J ), LDA, + $ A( 1, J+JB ), LDA, CONE, A( J, J+JB ), + $ LDA ) + CALL CTRSM( 'Left', 'Upper', 'Conjugate transpose', + $ 'Non-unit', JB, N-J-JB+1, CONE, A( J, J ), + $ LDA, A( J, J+JB ), LDA ) + END IF + 10 CONTINUE +* + ELSE +* +* Compute the Cholesky factorization A = L*L'. +* + DO 20 J = 1, N, NB +* +* Update and factorize the current diagonal block and test +* for non-positive-definiteness. +* + JB = MIN( NB, N-J+1 ) + CALL CHERK( 'Lower', 'No transpose', JB, J-1, -ONE, + $ A( J, 1 ), LDA, ONE, A( J, J ), LDA ) + CALL CPOTF2( 'Lower', JB, A( J, J ), LDA, INFO ) + IF( INFO.NE.0 ) + $ GO TO 30 + IF( J+JB.LE.N ) THEN +* +* Compute the current block column. +* + CALL CGEMM( 'No transpose', 'Conjugate transpose', + $ N-J-JB+1, JB, J-1, -CONE, A( J+JB, 1 ), + $ LDA, A( J, 1 ), LDA, CONE, A( J+JB, J ), + $ LDA ) + CALL CTRSM( 'Right', 'Lower', 'Conjugate transpose', + $ 'Non-unit', N-J-JB+1, JB, CONE, A( J, J ), + $ LDA, A( J+JB, J ), LDA ) + END IF + 20 CONTINUE + END IF + END IF + GO TO 40 +* + 30 CONTINUE + INFO = INFO + J - 1 +* + 40 CONTINUE + RETURN +* +* End of CPOTRF +* + END diff --git a/costa/native/external/lapack/cpotri.f b/costa/native/external/lapack/cpotri.f new file mode 100644 index 000000000..d1dc0291e --- /dev/null +++ b/costa/native/external/lapack/cpotri.f @@ -0,0 +1,97 @@ + SUBROUTINE CPOTRI( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CPOTRI computes the inverse of a complex Hermitian positive definite +* matrix A using the Cholesky factorization A = U**H*U or A = L*L**H +* computed by CPOTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the triangular factor U or L from the Cholesky +* factorization A = U**H*U or A = L*L**H, as computed by +* CPOTRF. +* On exit, the upper or lower triangle of the (Hermitian) +* inverse of A, overwriting the input factor U or L. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the (i,i) element of the factor U or L is +* zero, and the inverse could not be computed. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLAUUM, CTRTRI, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPOTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Invert the triangular Cholesky factor U or L. +* + CALL CTRTRI( UPLO, 'Non-unit', N, A, LDA, INFO ) + IF( INFO.GT.0 ) + $ RETURN +* +* Form inv(U)*inv(U)' or inv(L)'*inv(L). +* + CALL CLAUUM( UPLO, N, A, LDA, INFO ) +* + RETURN +* +* End of CPOTRI +* + END diff --git a/costa/native/external/lapack/cpotrs.f b/costa/native/external/lapack/cpotrs.f new file mode 100644 index 000000000..d779e2977 --- /dev/null +++ b/costa/native/external/lapack/cpotrs.f @@ -0,0 +1,133 @@ + SUBROUTINE CPOTRS( UPLO, N, NRHS, A, LDA, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* CPOTRS solves a system of linear equations A*X = B with a Hermitian +* positive definite matrix A using the Cholesky factorization +* A = U**H*U or A = L*L**H computed by CPOTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* A (input) COMPLEX array, dimension (LDA,N) +* The triangular factor U or L from the Cholesky factorization +* A = U**H*U or A = L*L**H, as computed by CPOTRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPOTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B where A = U'*U. +* +* Solve U'*X = B, overwriting B with X. +* + CALL CTRSM( 'Left', 'Upper', 'Conjugate transpose', 'Non-unit', + $ N, NRHS, ONE, A, LDA, B, LDB ) +* +* Solve U*X = B, overwriting B with X. +* + CALL CTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N, + $ NRHS, ONE, A, LDA, B, LDB ) + ELSE +* +* Solve A*X = B where A = L*L'. +* +* Solve L*X = B, overwriting B with X. +* + CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Non-unit', N, + $ NRHS, ONE, A, LDA, B, LDB ) +* +* Solve L'*X = B, overwriting B with X. +* + CALL CTRSM( 'Left', 'Lower', 'Conjugate transpose', 'Non-unit', + $ N, NRHS, ONE, A, LDA, B, LDB ) + END IF +* + RETURN +* +* End of CPOTRS +* + END diff --git a/costa/native/external/lapack/cppcon.f b/costa/native/external/lapack/cppcon.f new file mode 100644 index 000000000..8fdddfe17 --- /dev/null +++ b/costa/native/external/lapack/cppcon.f @@ -0,0 +1,179 @@ + SUBROUTINE CPPCON( UPLO, N, AP, ANORM, RCOND, WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + REAL RWORK( * ) + COMPLEX AP( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CPPCON estimates the reciprocal of the condition number (in the +* 1-norm) of a complex Hermitian positive definite packed matrix using +* the Cholesky factorization A = U**H*U or A = L*L**H computed by +* CPPTRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input) COMPLEX array, dimension (N*(N+1)/2) +* The triangular factor U or L from the Cholesky factorization +* A = U**H*U or A = L*L**H, packed columnwise in a linear +* array. The j-th column of U or L is stored in the array AP +* as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n. +* +* ANORM (input) REAL +* The 1-norm (or infinity-norm) of the Hermitian matrix A. +* +* RCOND (output) REAL +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +* estimate of the 1-norm of inv(A) computed in this routine. +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* RWORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + CHARACTER NORMIN + INTEGER IX, KASE + REAL AINVNM, SCALE, SCALEL, SCALEU, SMLNUM + COMPLEX ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + REAL SLAMCH + EXTERNAL LSAME, ICAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CLACON, CLATPS, CSRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPPCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = SLAMCH( 'Safe minimum' ) +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + NORMIN = 'N' + 10 CONTINUE + CALL CLACON( N, WORK( N+1 ), WORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN + IF( UPPER ) THEN +* +* Multiply by inv(U'). +* + CALL CLATPS( 'Upper', 'Conjugate transpose', 'Non-unit', + $ NORMIN, N, AP, WORK, SCALEL, RWORK, INFO ) + NORMIN = 'Y' +* +* Multiply by inv(U). +* + CALL CLATPS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ AP, WORK, SCALEU, RWORK, INFO ) + ELSE +* +* Multiply by inv(L). +* + CALL CLATPS( 'Lower', 'No transpose', 'Non-unit', NORMIN, N, + $ AP, WORK, SCALEL, RWORK, INFO ) + NORMIN = 'Y' +* +* Multiply by inv(L'). +* + CALL CLATPS( 'Lower', 'Conjugate transpose', 'Non-unit', + $ NORMIN, N, AP, WORK, SCALEU, RWORK, INFO ) + END IF +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + SCALE = SCALEL*SCALEU + IF( SCALE.NE.ONE ) THEN + IX = ICAMAX( N, WORK, 1 ) + IF( SCALE.LT.CABS1( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL CSRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE + RETURN +* +* End of CPPCON +* + END diff --git a/costa/native/external/lapack/cppequ.f b/costa/native/external/lapack/cppequ.f new file mode 100644 index 000000000..1001cc6e8 --- /dev/null +++ b/costa/native/external/lapack/cppequ.f @@ -0,0 +1,170 @@ + SUBROUTINE CPPEQU( UPLO, N, AP, S, SCOND, AMAX, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N + REAL AMAX, SCOND +* .. +* .. Array Arguments .. + REAL S( * ) + COMPLEX AP( * ) +* .. +* +* Purpose +* ======= +* +* CPPEQU computes row and column scalings intended to equilibrate a +* Hermitian positive definite matrix A in packed storage and reduce +* its condition number (with respect to the two-norm). S contains the +* scale factors, S(i)=1/sqrt(A(i,i)), chosen so that the scaled matrix +* B with elements B(i,j)=S(i)*A(i,j)*S(j) has ones on the diagonal. +* This choice of S puts the condition number of B within a factor N of +* the smallest possible condition number over all possible diagonal +* scalings. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input) COMPLEX array, dimension (N*(N+1)/2) +* The upper or lower triangle of the Hermitian matrix A, packed +* columnwise in a linear array. The j-th column of A is stored +* in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* +* S (output) REAL array, dimension (N) +* If INFO = 0, S contains the scale factors for A. +* +* SCOND (output) REAL +* If INFO = 0, S contains the ratio of the smallest S(i) to +* the largest S(i). If SCOND >= 0.1 and AMAX is neither too +* large nor too small, it is not worth scaling by S. +* +* AMAX (output) REAL +* Absolute value of largest matrix element. If AMAX is very +* close to overflow or very close to underflow, the matrix +* should be scaled. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the i-th diagonal element is nonpositive. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, JJ + REAL SMIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPPEQU', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SCOND = ONE + AMAX = ZERO + RETURN + END IF +* +* Initialize SMIN and AMAX. +* + S( 1 ) = REAL( AP( 1 ) ) + SMIN = S( 1 ) + AMAX = S( 1 ) +* + IF( UPPER ) THEN +* +* UPLO = 'U': Upper triangle of A is stored. +* Find the minimum and maximum diagonal elements. +* + JJ = 1 + DO 10 I = 2, N + JJ = JJ + I + S( I ) = REAL( AP( JJ ) ) + SMIN = MIN( SMIN, S( I ) ) + AMAX = MAX( AMAX, S( I ) ) + 10 CONTINUE +* + ELSE +* +* UPLO = 'L': Lower triangle of A is stored. +* Find the minimum and maximum diagonal elements. +* + JJ = 1 + DO 20 I = 2, N + JJ = JJ + N - I + 2 + S( I ) = REAL( AP( JJ ) ) + SMIN = MIN( SMIN, S( I ) ) + AMAX = MAX( AMAX, S( I ) ) + 20 CONTINUE + END IF +* + IF( SMIN.LE.ZERO ) THEN +* +* Find the first non-positive diagonal element and return. +* + DO 30 I = 1, N + IF( S( I ).LE.ZERO ) THEN + INFO = I + RETURN + END IF + 30 CONTINUE + ELSE +* +* Set the scale factors to the reciprocals +* of the diagonal elements. +* + DO 40 I = 1, N + S( I ) = ONE / SQRT( S( I ) ) + 40 CONTINUE +* +* Compute SCOND = min(S(I)) / max(S(I)) +* + SCOND = SQRT( SMIN ) / SQRT( AMAX ) + END IF + RETURN +* +* End of CPPEQU +* + END diff --git a/costa/native/external/lapack/cpprfs.f b/costa/native/external/lapack/cpprfs.f new file mode 100644 index 000000000..53e6ff3db --- /dev/null +++ b/costa/native/external/lapack/cpprfs.f @@ -0,0 +1,331 @@ + SUBROUTINE CPPRFS( UPLO, N, NRHS, AP, AFP, B, LDB, X, LDX, FERR, + $ BERR, WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + REAL BERR( * ), FERR( * ), RWORK( * ) + COMPLEX AFP( * ), AP( * ), B( LDB, * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* CPPRFS improves the computed solution to a system of linear +* equations when the coefficient matrix is Hermitian positive definite +* and packed, and provides error bounds and backward error estimates +* for the solution. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AP (input) COMPLEX array, dimension (N*(N+1)/2) +* The upper or lower triangle of the Hermitian matrix A, packed +* columnwise in a linear array. The j-th column of A is stored +* in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* +* AFP (input) COMPLEX array, dimension (N*(N+1)/2) +* The triangular factor U or L from the Cholesky factorization +* A = U**H*U or A = L*L**H, as computed by SPPTRF/CPPTRF, +* packed columnwise in a linear array in the same format as A +* (see AP). +* +* B (input) COMPLEX array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input/output) COMPLEX array, dimension (LDX,NRHS) +* On entry, the solution matrix X, as computed by CPPTRS. +* On exit, the improved solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* RWORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Internal Parameters +* =================== +* +* ITMAX is the maximum number of steps of iterative refinement. +* +* ==================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) + REAL THREE + PARAMETER ( THREE = 3.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, IK, J, K, KASE, KK, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX ZDUM +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CHPMV, CLACON, CPPTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPPRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL CCOPY( N, B( 1, J ), 1, WORK, 1 ) + CALL CHPMV( UPLO, N, -CONE, AP, X( 1, J ), 1, CONE, WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + KK = 1 + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + IK = KK + DO 40 I = 1, K - 1 + RWORK( I ) = RWORK( I ) + CABS1( AP( IK ) )*XK + S = S + CABS1( AP( IK ) )*CABS1( X( I, J ) ) + IK = IK + 1 + 40 CONTINUE + RWORK( K ) = RWORK( K ) + ABS( REAL( AP( KK+K-1 ) ) )* + $ XK + S + KK = KK + K + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + RWORK( K ) = RWORK( K ) + ABS( REAL( AP( KK ) ) )*XK + IK = KK + 1 + DO 60 I = K + 1, N + RWORK( I ) = RWORK( I ) + CABS1( AP( IK ) )*XK + S = S + CABS1( AP( IK ) )*CABS1( X( I, J ) ) + IK = IK + 1 + 60 CONTINUE + RWORK( K ) = RWORK( K ) + S + KK = KK + ( N-K+1 ) + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL CPPTRS( UPLO, N, 1, AFP, WORK, N, INFO ) + CALL CAXPY( N, CONE, WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use CLACON to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL CLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A'). +* + CALL CPPTRS( UPLO, N, 1, AFP, WORK, N, INFO ) + DO 110 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 120 CONTINUE + CALL CPPTRS( UPLO, N, 1, AFP, WORK, N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of CPPRFS +* + END diff --git a/costa/native/external/lapack/cppsv.f b/costa/native/external/lapack/cppsv.f new file mode 100644 index 000000000..73187dfad --- /dev/null +++ b/costa/native/external/lapack/cppsv.f @@ -0,0 +1,134 @@ + SUBROUTINE CPPSV( UPLO, N, NRHS, AP, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX AP( * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* CPPSV computes the solution to a complex system of linear equations +* A * X = B, +* where A is an N-by-N Hermitian positive definite matrix stored in +* packed format and X and B are N-by-NRHS matrices. +* +* The Cholesky decomposition is used to factor A as +* A = U**H* U, if UPLO = 'U', or +* A = L * L**H, if UPLO = 'L', +* where U is an upper triangular matrix and L is a lower triangular +* matrix. The factored form of A is then used to solve the system of +* equations A * X = B. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AP (input/output) COMPLEX array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the Hermitian matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* See below for further details. +* +* On exit, if INFO = 0, the factor U or L from the Cholesky +* factorization A = U**H*U or A = L*L**H, in the same storage +* format as A. +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the leading minor of order i of A is not +* positive definite, so the factorization could not be +* completed, and the solution has not been computed. +* +* Further Details +* =============== +* +* The packed storage scheme is illustrated by the following example +* when N = 4, UPLO = 'U': +* +* Two-dimensional storage of the Hermitian matrix A: +* +* a11 a12 a13 a14 +* a22 a23 a24 +* a33 a34 (aij = conjg(aji)) +* a44 +* +* Packed storage of the upper triangle of A: +* +* AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CPPTRF, CPPTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPPSV ', -INFO ) + RETURN + END IF +* +* Compute the Cholesky factorization A = U'*U or A = L*L'. +* + CALL CPPTRF( UPLO, N, AP, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL CPPTRS( UPLO, N, NRHS, AP, B, LDB, INFO ) +* + END IF + RETURN +* +* End of CPPSV +* + END diff --git a/costa/native/external/lapack/cppsvx.f b/costa/native/external/lapack/cppsvx.f new file mode 100644 index 000000000..07c7047ec --- /dev/null +++ b/costa/native/external/lapack/cppsvx.f @@ -0,0 +1,383 @@ + SUBROUTINE CPPSVX( FACT, UPLO, N, NRHS, AP, AFP, EQUED, S, B, LDB, + $ X, LDX, RCOND, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, UPLO + INTEGER INFO, LDB, LDX, N, NRHS + REAL RCOND +* .. +* .. Array Arguments .. + REAL BERR( * ), FERR( * ), RWORK( * ), S( * ) + COMPLEX AFP( * ), AP( * ), B( LDB, * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* CPPSVX uses the Cholesky factorization A = U**H*U or A = L*L**H to +* compute the solution to a complex system of linear equations +* A * X = B, +* where A is an N-by-N Hermitian positive definite matrix stored in +* packed format and X and B are N-by-NRHS matrices. +* +* Error bounds on the solution and a condition estimate are also +* provided. +* +* Description +* =========== +* +* The following steps are performed: +* +* 1. If FACT = 'E', real scaling factors are computed to equilibrate +* the system: +* diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B +* Whether or not the system will be equilibrated depends on the +* scaling of the matrix A, but if equilibration is used, A is +* overwritten by diag(S)*A*diag(S) and B by diag(S)*B. +* +* 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to +* factor the matrix A (after equilibration if FACT = 'E') as +* A = U'* U , if UPLO = 'U', or +* A = L * L', if UPLO = 'L', +* where U is an upper triangular matrix, L is a lower triangular +* matrix, and ' indicates conjugate transpose. +* +* 3. If the leading i-by-i principal minor is not positive definite, +* then the routine returns with INFO = i. Otherwise, the factored +* form of A is used to estimate the condition number of the matrix +* A. If the reciprocal of the condition number is less than machine +* precision, INFO = N+1 is returned as a warning, but the routine +* still goes on to solve for X and compute error bounds as +* described below. +* +* 4. The system of equations is solved for X using the factored form +* of A. +* +* 5. Iterative refinement is applied to improve the computed solution +* matrix and calculate error bounds and backward error estimates +* for it. +* +* 6. If equilibration was used, the matrix X is premultiplied by +* diag(S) so that it solves the original system before +* equilibration. +* +* Arguments +* ========= +* +* FACT (input) CHARACTER*1 +* Specifies whether or not the factored form of the matrix A is +* supplied on entry, and if not, whether the matrix A should be +* equilibrated before it is factored. +* = 'F': On entry, AFP contains the factored form of A. +* If EQUED = 'Y', the matrix A has been equilibrated +* with scaling factors given by S. AP and AFP will not +* be modified. +* = 'N': The matrix A will be copied to AFP and factored. +* = 'E': The matrix A will be equilibrated if necessary, then +* copied to AFP and factored. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AP (input/output) COMPLEX array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the Hermitian matrix +* A, packed columnwise in a linear array, except if FACT = 'F' +* and EQUED = 'Y', then A must contain the equilibrated matrix +* diag(S)*A*diag(S). The j-th column of A is stored in the +* array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* See below for further details. A is not modified if +* FACT = 'F' or 'N', or if FACT = 'E' and EQUED = 'N' on exit. +* +* On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by +* diag(S)*A*diag(S). +* +* AFP (input or output) COMPLEX array, dimension (N*(N+1)/2) +* If FACT = 'F', then AFP is an input argument and on entry +* contains the triangular factor U or L from the Cholesky +* factorization A = U**H*U or A = L*L**H, in the same storage +* format as A. If EQUED .ne. 'N', then AFP is the factored +* form of the equilibrated matrix A. +* +* If FACT = 'N', then AFP is an output argument and on exit +* returns the triangular factor U or L from the Cholesky +* factorization A = U**H*U or A = L*L**H of the original +* matrix A. +* +* If FACT = 'E', then AFP is an output argument and on exit +* returns the triangular factor U or L from the Cholesky +* factorization A = U**H*U or A = L*L**H of the equilibrated +* matrix A (see the description of AP for the form of the +* equilibrated matrix). +* +* EQUED (input or output) CHARACTER*1 +* Specifies the form of equilibration that was done. +* = 'N': No equilibration (always true if FACT = 'N'). +* = 'Y': Equilibration was done, i.e., A has been replaced by +* diag(S) * A * diag(S). +* EQUED is an input argument if FACT = 'F'; otherwise, it is an +* output argument. +* +* S (input or output) REAL array, dimension (N) +* The scale factors for A; not accessed if EQUED = 'N'. S is +* an input argument if FACT = 'F'; otherwise, S is an output +* argument. If FACT = 'F' and EQUED = 'Y', each element of S +* must be positive. +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y', +* B is overwritten by diag(S) * B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (output) COMPLEX array, dimension (LDX,NRHS) +* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to +* the original system of equations. Note that if EQUED = 'Y', +* A and B are modified on exit, and the solution to the +* equilibrated system is inv(diag(S))*X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* RCOND (output) REAL +* The estimate of the reciprocal condition number of the matrix +* A after equilibration (if done). If RCOND is less than the +* machine precision (in particular, if RCOND = 0), the matrix +* is singular to working precision. This condition is +* indicated by a return code of INFO > 0. +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* RWORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is +* <= N: the leading minor of order i of A is +* not positive definite, so the factorization +* could not be completed, and the solution has not +* been computed. RCOND = 0 is returned. +* = N+1: U is nonsingular, but RCOND is less than machine +* precision, meaning that the matrix is singular +* to working precision. Nevertheless, the +* solution and error bounds are computed because +* there are a number of situations where the +* computed solution can be more accurate than the +* value of RCOND would suggest. +* +* Further Details +* =============== +* +* The packed storage scheme is illustrated by the following example +* when N = 4, UPLO = 'U': +* +* Two-dimensional storage of the Hermitian matrix A: +* +* a11 a12 a13 a14 +* a22 a23 a24 +* a33 a34 (aij = conjg(aji)) +* a44 +* +* Packed storage of the upper triangle of A: +* +* AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL EQUIL, NOFACT, RCEQU + INTEGER I, INFEQU, J + REAL AMAX, ANORM, BIGNUM, SCOND, SMAX, SMIN, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANHP, SLAMCH + EXTERNAL LSAME, CLANHP, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CLACPY, CLAQHP, CPPCON, CPPEQU, CPPRFS, + $ CPPTRF, CPPTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + RCEQU = .FALSE. + ELSE + RCEQU = LSAME( EQUED, 'Y' ) + SMLNUM = SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + END IF +* +* Test the input parameters. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) ) + $ THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -7 + ELSE + IF( RCEQU ) THEN + SMIN = BIGNUM + SMAX = ZERO + DO 10 J = 1, N + SMIN = MIN( SMIN, S( J ) ) + SMAX = MAX( SMAX, S( J ) ) + 10 CONTINUE + IF( SMIN.LE.ZERO ) THEN + INFO = -8 + ELSE IF( N.GT.0 ) THEN + SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) + ELSE + SCOND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPPSVX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL CPPEQU( UPLO, N, AP, S, SCOND, AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL CLAQHP( UPLO, N, AP, S, SCOND, AMAX, EQUED ) + RCEQU = LSAME( EQUED, 'Y' ) + END IF + END IF +* +* Scale the right-hand side. +* + IF( RCEQU ) THEN + DO 30 J = 1, NRHS + DO 20 I = 1, N + B( I, J ) = S( I )*B( I, J ) + 20 CONTINUE + 30 CONTINUE + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the Cholesky factorization A = U'*U or A = L*L'. +* + CALL CCOPY( N*( N+1 ) / 2, AP, 1, AFP, 1 ) + CALL CPPTRF( UPLO, N, AFP, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN + IF( INFO.GT.0 ) + $ RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = CLANHP( 'I', UPLO, N, AP, RWORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL CPPCON( UPLO, N, AFP, ANORM, RCOND, WORK, RWORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* +* Compute the solution matrix X. +* + CALL CLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL CPPTRS( UPLO, N, NRHS, AFP, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL CPPRFS( UPLO, N, NRHS, AP, AFP, B, LDB, X, LDX, FERR, BERR, + $ WORK, RWORK, INFO ) +* +* Transform the solution matrix X to a solution of the original +* system. +* + IF( RCEQU ) THEN + DO 50 J = 1, NRHS + DO 40 I = 1, N + X( I, J ) = S( I )*X( I, J ) + 40 CONTINUE + 50 CONTINUE + DO 60 J = 1, NRHS + FERR( J ) = FERR( J ) / SCOND + 60 CONTINUE + END IF +* + RETURN +* +* End of CPPSVX +* + END diff --git a/costa/native/external/lapack/cpptrf.f b/costa/native/external/lapack/cpptrf.f new file mode 100644 index 000000000..a7ced5934 --- /dev/null +++ b/costa/native/external/lapack/cpptrf.f @@ -0,0 +1,179 @@ + SUBROUTINE CPPTRF( UPLO, N, AP, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + COMPLEX AP( * ) +* .. +* +* Purpose +* ======= +* +* CPPTRF computes the Cholesky factorization of a complex Hermitian +* positive definite matrix A stored in packed format. +* +* The factorization has the form +* A = U**H * U, if UPLO = 'U', or +* A = L * L**H, if UPLO = 'L', +* where U is an upper triangular matrix and L is lower triangular. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) COMPLEX array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the Hermitian matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* See below for further details. +* +* On exit, if INFO = 0, the triangular factor U or L from the +* Cholesky factorization A = U**H*U or A = L*L**H, in the same +* storage format as A. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the leading minor of order i is not +* positive definite, and the factorization could not be +* completed. +* +* Further Details +* =============== +* +* The packed storage scheme is illustrated by the following example +* when N = 4, UPLO = 'U': +* +* Two-dimensional storage of the Hermitian matrix A: +* +* a11 a12 a13 a14 +* a22 a23 a24 +* a33 a34 (aij = conjg(aji)) +* a44 +* +* Packed storage of the upper triangle of A: +* +* AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, JC, JJ + REAL AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX CDOTC + EXTERNAL LSAME, CDOTC +* .. +* .. External Subroutines .. + EXTERNAL CHPR, CSSCAL, CTPSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPPTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U'*U. +* + JJ = 0 + DO 10 J = 1, N + JC = JJ + 1 + JJ = JJ + J +* +* Compute elements 1:J-1 of column J. +* + IF( J.GT.1 ) + $ CALL CTPSV( 'Upper', 'Conjugate transpose', 'Non-unit', + $ J-1, AP, AP( JC ), 1 ) +* +* Compute U(J,J) and test for non-positive-definiteness. +* + AJJ = REAL( AP( JJ ) ) - CDOTC( J-1, AP( JC ), 1, AP( JC ), + $ 1 ) + IF( AJJ.LE.ZERO ) THEN + AP( JJ ) = AJJ + GO TO 30 + END IF + AP( JJ ) = SQRT( AJJ ) + 10 CONTINUE + ELSE +* +* Compute the Cholesky factorization A = L*L'. +* + JJ = 1 + DO 20 J = 1, N +* +* Compute L(J,J) and test for non-positive-definiteness. +* + AJJ = REAL( AP( JJ ) ) + IF( AJJ.LE.ZERO ) THEN + AP( JJ ) = AJJ + GO TO 30 + END IF + AJJ = SQRT( AJJ ) + AP( JJ ) = AJJ +* +* Compute elements J+1:N of column J and update the trailing +* submatrix. +* + IF( J.LT.N ) THEN + CALL CSSCAL( N-J, ONE / AJJ, AP( JJ+1 ), 1 ) + CALL CHPR( 'Lower', N-J, -ONE, AP( JJ+1 ), 1, + $ AP( JJ+N-J+1 ) ) + JJ = JJ + N - J + 1 + END IF + 20 CONTINUE + END IF + GO TO 40 +* + 30 CONTINUE + INFO = J +* + 40 CONTINUE + RETURN +* +* End of CPPTRF +* + END diff --git a/costa/native/external/lapack/cpptri.f b/costa/native/external/lapack/cpptri.f new file mode 100644 index 000000000..f0e1d6151 --- /dev/null +++ b/costa/native/external/lapack/cpptri.f @@ -0,0 +1,131 @@ + SUBROUTINE CPPTRI( UPLO, N, AP, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + COMPLEX AP( * ) +* .. +* +* Purpose +* ======= +* +* CPPTRI computes the inverse of a complex Hermitian positive definite +* matrix A using the Cholesky factorization A = U**H*U or A = L*L**H +* computed by CPPTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangular factor is stored in AP; +* = 'L': Lower triangular factor is stored in AP. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) COMPLEX array, dimension (N*(N+1)/2) +* On entry, the triangular factor U or L from the Cholesky +* factorization A = U**H*U or A = L*L**H, packed columnwise as +* a linear array. The j-th column of U or L is stored in the +* array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n. +* +* On exit, the upper or lower triangle of the (Hermitian) +* inverse of A, overwriting the input factor U or L. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the (i,i) element of the factor U or L is +* zero, and the inverse could not be computed. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, JC, JJ, JJN + REAL AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX CDOTC + EXTERNAL LSAME, CDOTC +* .. +* .. External Subroutines .. + EXTERNAL CHPR, CSSCAL, CTPMV, CTPTRI, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPPTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Invert the triangular Cholesky factor U or L. +* + CALL CTPTRI( UPLO, 'Non-unit', N, AP, INFO ) + IF( INFO.GT.0 ) + $ RETURN + IF( UPPER ) THEN +* +* Compute the product inv(U) * inv(U)'. +* + JJ = 0 + DO 10 J = 1, N + JC = JJ + 1 + JJ = JJ + J + IF( J.GT.1 ) + $ CALL CHPR( 'Upper', J-1, ONE, AP( JC ), 1, AP ) + AJJ = AP( JJ ) + CALL CSSCAL( J, AJJ, AP( JC ), 1 ) + 10 CONTINUE +* + ELSE +* +* Compute the product inv(L)' * inv(L). +* + JJ = 1 + DO 20 J = 1, N + JJN = JJ + N - J + 1 + AP( JJ ) = REAL( CDOTC( N-J+1, AP( JJ ), 1, AP( JJ ), 1 ) ) + IF( J.LT.N ) + $ CALL CTPMV( 'Lower', 'Conjugate transpose', 'Non-unit', + $ N-J, AP( JJN ), AP( JJ+1 ), 1 ) + JJ = JJN + 20 CONTINUE + END IF +* + RETURN +* +* End of CPPTRI +* + END diff --git a/costa/native/external/lapack/cpptrs.f b/costa/native/external/lapack/cpptrs.f new file mode 100644 index 000000000..d82999c27 --- /dev/null +++ b/costa/native/external/lapack/cpptrs.f @@ -0,0 +1,135 @@ + SUBROUTINE CPPTRS( UPLO, N, NRHS, AP, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX AP( * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* CPPTRS solves a system of linear equations A*X = B with a Hermitian +* positive definite matrix A in packed storage using the Cholesky +* factorization A = U**H*U or A = L*L**H computed by CPPTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AP (input) COMPLEX array, dimension (N*(N+1)/2) +* The triangular factor U or L from the Cholesky factorization +* A = U**H*U or A = L*L**H, packed columnwise in a linear +* array. The j-th column of U or L is stored in the array AP +* as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n. +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CTPSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPPTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B where A = U'*U. +* + DO 10 I = 1, NRHS +* +* Solve U'*X = B, overwriting B with X. +* + CALL CTPSV( 'Upper', 'Conjugate transpose', 'Non-unit', N, + $ AP, B( 1, I ), 1 ) +* +* Solve U*X = B, overwriting B with X. +* + CALL CTPSV( 'Upper', 'No transpose', 'Non-unit', N, AP, + $ B( 1, I ), 1 ) + 10 CONTINUE + ELSE +* +* Solve A*X = B where A = L*L'. +* + DO 20 I = 1, NRHS +* +* Solve L*Y = B, overwriting B with X. +* + CALL CTPSV( 'Lower', 'No transpose', 'Non-unit', N, AP, + $ B( 1, I ), 1 ) +* +* Solve L'*X = Y, overwriting B with X. +* + CALL CTPSV( 'Lower', 'Conjugate transpose', 'Non-unit', N, + $ AP, B( 1, I ), 1 ) + 20 CONTINUE + END IF +* + RETURN +* +* End of CPPTRS +* + END diff --git a/costa/native/external/lapack/cptcon.f b/costa/native/external/lapack/cptcon.f new file mode 100644 index 000000000..1b2cd5095 --- /dev/null +++ b/costa/native/external/lapack/cptcon.f @@ -0,0 +1,151 @@ + SUBROUTINE CPTCON( N, D, E, ANORM, RCOND, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + REAL D( * ), RWORK( * ) + COMPLEX E( * ) +* .. +* +* Purpose +* ======= +* +* CPTCON computes the reciprocal of the condition number (in the +* 1-norm) of a complex Hermitian positive definite tridiagonal matrix +* using the factorization A = L*D*L**H or A = U**H*D*U computed by +* CPTTRF. +* +* Norm(inv(A)) is computed by a direct method, and the reciprocal of +* the condition number is computed as +* RCOND = 1 / (ANORM * norm(inv(A))). +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* D (input) REAL array, dimension (N) +* The n diagonal elements of the diagonal matrix D from the +* factorization of A, as computed by CPTTRF. +* +* E (input) COMPLEX array, dimension (N-1) +* The (n-1) off-diagonal elements of the unit bidiagonal factor +* U or L from the factorization of A, as computed by CPTTRF. +* +* ANORM (input) REAL +* The 1-norm of the original matrix A. +* +* RCOND (output) REAL +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is the +* 1-norm of inv(A) computed in this routine. +* +* RWORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The method used is described in Nicholas J. Higham, "Efficient +* Algorithms for Computing the Condition Number of a Tridiagonal +* Matrix", SIAM J. Sci. Stat. Comput., Vol. 7, No. 1, January 1986. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IX + REAL AINVNM +* .. +* .. External Functions .. + INTEGER ISAMAX + EXTERNAL ISAMAX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPTCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* +* Check that D(1:N) is positive. +* + DO 10 I = 1, N + IF( D( I ).LE.ZERO ) + $ RETURN + 10 CONTINUE +* +* Solve M(A) * x = e, where M(A) = (m(i,j)) is given by +* +* m(i,j) = abs(A(i,j)), i = j, +* m(i,j) = -abs(A(i,j)), i .ne. j, +* +* and e = [ 1, 1, ..., 1 ]'. Note M(A) = M(L)*D*M(L)'. +* +* Solve M(L) * x = e. +* + RWORK( 1 ) = ONE + DO 20 I = 2, N + RWORK( I ) = ONE + RWORK( I-1 )*ABS( E( I-1 ) ) + 20 CONTINUE +* +* Solve D * M(L)' * x = b. +* + RWORK( N ) = RWORK( N ) / D( N ) + DO 30 I = N - 1, 1, -1 + RWORK( I ) = RWORK( I ) / D( I ) + RWORK( I+1 )*ABS( E( I ) ) + 30 CONTINUE +* +* Compute AINVNM = max(x(i)), 1<=i<=n. +* + IX = ISAMAX( N, RWORK, 1 ) + AINVNM = ABS( RWORK( IX ) ) +* +* Compute the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of CPTCON +* + END diff --git a/costa/native/external/lapack/cpteqr.f b/costa/native/external/lapack/cpteqr.f new file mode 100644 index 000000000..ff28815a1 --- /dev/null +++ b/costa/native/external/lapack/cpteqr.f @@ -0,0 +1,191 @@ + SUBROUTINE CPTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + CHARACTER COMPZ + INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. + REAL D( * ), E( * ), WORK( * ) + COMPLEX Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* CPTEQR computes all eigenvalues and, optionally, eigenvectors of a +* symmetric positive definite tridiagonal matrix by first factoring the +* matrix using SPTTRF and then calling CBDSQR to compute the singular +* values of the bidiagonal factor. +* +* This routine computes the eigenvalues of the positive definite +* tridiagonal matrix to high relative accuracy. This means that if the +* eigenvalues range over many orders of magnitude in size, then the +* small eigenvalues and corresponding eigenvectors will be computed +* more accurately than, for example, with the standard QR method. +* +* The eigenvectors of a full or band positive definite Hermitian matrix +* can also be found if CHETRD, CHPTRD, or CHBTRD has been used to +* reduce this matrix to tridiagonal form. (The reduction to +* tridiagonal form, however, may preclude the possibility of obtaining +* high relative accuracy in the small eigenvalues of the original +* matrix, if these eigenvalues range over many orders of magnitude.) +* +* Arguments +* ========= +* +* COMPZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only. +* = 'V': Compute eigenvectors of original Hermitian +* matrix also. Array Z contains the unitary matrix +* used to reduce the original matrix to tridiagonal +* form. +* = 'I': Compute eigenvectors of tridiagonal matrix also. +* +* N (input) INTEGER +* The order of the matrix. N >= 0. +* +* D (input/output) REAL array, dimension (N) +* On entry, the n diagonal elements of the tridiagonal matrix. +* On normal exit, D contains the eigenvalues, in descending +* order. +* +* E (input/output) REAL array, dimension (N-1) +* On entry, the (n-1) subdiagonal elements of the tridiagonal +* matrix. +* On exit, E has been destroyed. +* +* Z (input/output) COMPLEX array, dimension (LDZ, N) +* On entry, if COMPZ = 'V', the unitary matrix used in the +* reduction to tridiagonal form. +* On exit, if COMPZ = 'V', the orthonormal eigenvectors of the +* original Hermitian matrix; +* if COMPZ = 'I', the orthonormal eigenvectors of the +* tridiagonal matrix. +* If INFO > 0 on exit, Z contains the eigenvectors associated +* with only the stored eigenvalues. +* If COMPZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* COMPZ = 'V' or 'I', LDZ >= max(1,N). +* +* WORK (workspace) REAL array, dimension (4*N) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, and i is: +* <= N the Cholesky factorization of the matrix could +* not be performed because the i-th principal minor +* was not positive definite. +* > N the SVD algorithm failed to converge; +* if INFO = N+i, i off-diagonal elements of the +* bidiagonal factor did not converge to zero. +* +* ==================================================================== +* +* .. Parameters .. + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CBDSQR, CLASET, SPTTRF, XERBLA +* .. +* .. Local Arrays .. + COMPLEX C( 1, 1 ), VT( 1, 1 ) +* .. +* .. Local Scalars .. + INTEGER I, ICOMPZ, NRU +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ICOMPZ = 0 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ICOMPZ = 2 + ELSE + ICOMPZ = -1 + END IF + IF( ICOMPZ.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ( LDZ.LT.1 ) .OR. ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1, + $ N ) ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPTEQR', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ICOMPZ.GT.0 ) + $ Z( 1, 1 ) = CONE + RETURN + END IF + IF( ICOMPZ.EQ.2 ) + $ CALL CLASET( 'Full', N, N, CZERO, CONE, Z, LDZ ) +* +* Call SPTTRF to factor the matrix. +* + CALL SPTTRF( N, D, E, INFO ) + IF( INFO.NE.0 ) + $ RETURN + DO 10 I = 1, N + D( I ) = SQRT( D( I ) ) + 10 CONTINUE + DO 20 I = 1, N - 1 + E( I ) = E( I )*D( I ) + 20 CONTINUE +* +* Call CBDSQR to compute the singular values/vectors of the +* bidiagonal factor. +* + IF( ICOMPZ.GT.0 ) THEN + NRU = N + ELSE + NRU = 0 + END IF + CALL CBDSQR( 'Lower', N, 0, NRU, 0, D, E, VT, 1, Z, LDZ, C, 1, + $ WORK, INFO ) +* +* Square the singular values. +* + IF( INFO.EQ.0 ) THEN + DO 30 I = 1, N + D( I ) = D( I )*D( I ) + 30 CONTINUE + ELSE + INFO = N + INFO + END IF +* + RETURN +* +* End of CPTEQR +* + END diff --git a/costa/native/external/lapack/cptrfs.f b/costa/native/external/lapack/cptrfs.f new file mode 100644 index 000000000..8dc5df10c --- /dev/null +++ b/costa/native/external/lapack/cptrfs.f @@ -0,0 +1,367 @@ + SUBROUTINE CPTRFS( UPLO, N, NRHS, D, E, DF, EF, B, LDB, X, LDX, + $ FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + REAL BERR( * ), D( * ), DF( * ), FERR( * ), + $ RWORK( * ) + COMPLEX B( LDB, * ), E( * ), EF( * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* CPTRFS improves the computed solution to a system of linear +* equations when the coefficient matrix is Hermitian positive definite +* and tridiagonal, and provides error bounds and backward error +* estimates for the solution. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the superdiagonal or the subdiagonal of the +* tridiagonal matrix A is stored and the form of the +* factorization: +* = 'U': E is the superdiagonal of A, and A = U**H*D*U; +* = 'L': E is the subdiagonal of A, and A = L*D*L**H. +* (The two forms are equivalent if A is real.) +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* D (input) REAL array, dimension (N) +* The n real diagonal elements of the tridiagonal matrix A. +* +* E (input) COMPLEX array, dimension (N-1) +* The (n-1) off-diagonal elements of the tridiagonal matrix A +* (see UPLO). +* +* DF (input) REAL array, dimension (N) +* The n diagonal elements of the diagonal matrix D from +* the factorization computed by CPTTRF. +* +* EF (input) COMPLEX array, dimension (N-1) +* The (n-1) off-diagonal elements of the unit bidiagonal +* factor U or L from the factorization computed by CPTTRF +* (see UPLO). +* +* B (input) COMPLEX array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input/output) COMPLEX array, dimension (LDX,NRHS) +* On entry, the solution matrix X, as computed by CPTTRS. +* On exit, the improved solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) REAL array, dimension (NRHS) +* The forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX array, dimension (N) +* +* RWORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Internal Parameters +* =================== +* +* ITMAX is the maximum number of steps of iterative refinement. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) + REAL THREE + PARAMETER ( THREE = 3.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, IX, J, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN + COMPLEX BI, CX, DX, EX, ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SLAMCH + EXTERNAL LSAME, ISAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CPTTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, CONJG, MAX, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPTRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = 4 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 100 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X. Also compute +* abs(A)*abs(x) + abs(b) for use in the backward error bound. +* + IF( UPPER ) THEN + IF( N.EQ.1 ) THEN + BI = B( 1, J ) + DX = D( 1 )*X( 1, J ) + WORK( 1 ) = BI - DX + RWORK( 1 ) = CABS1( BI ) + CABS1( DX ) + ELSE + BI = B( 1, J ) + DX = D( 1 )*X( 1, J ) + EX = E( 1 )*X( 2, J ) + WORK( 1 ) = BI - DX - EX + RWORK( 1 ) = CABS1( BI ) + CABS1( DX ) + + $ CABS1( E( 1 ) )*CABS1( X( 2, J ) ) + DO 30 I = 2, N - 1 + BI = B( I, J ) + CX = CONJG( E( I-1 ) )*X( I-1, J ) + DX = D( I )*X( I, J ) + EX = E( I )*X( I+1, J ) + WORK( I ) = BI - CX - DX - EX + RWORK( I ) = CABS1( BI ) + + $ CABS1( E( I-1 ) )*CABS1( X( I-1, J ) ) + + $ CABS1( DX ) + CABS1( E( I ) )* + $ CABS1( X( I+1, J ) ) + 30 CONTINUE + BI = B( N, J ) + CX = CONJG( E( N-1 ) )*X( N-1, J ) + DX = D( N )*X( N, J ) + WORK( N ) = BI - CX - DX + RWORK( N ) = CABS1( BI ) + CABS1( E( N-1 ) )* + $ CABS1( X( N-1, J ) ) + CABS1( DX ) + END IF + ELSE + IF( N.EQ.1 ) THEN + BI = B( 1, J ) + DX = D( 1 )*X( 1, J ) + WORK( 1 ) = BI - DX + RWORK( 1 ) = CABS1( BI ) + CABS1( DX ) + ELSE + BI = B( 1, J ) + DX = D( 1 )*X( 1, J ) + EX = CONJG( E( 1 ) )*X( 2, J ) + WORK( 1 ) = BI - DX - EX + RWORK( 1 ) = CABS1( BI ) + CABS1( DX ) + + $ CABS1( E( 1 ) )*CABS1( X( 2, J ) ) + DO 40 I = 2, N - 1 + BI = B( I, J ) + CX = E( I-1 )*X( I-1, J ) + DX = D( I )*X( I, J ) + EX = CONJG( E( I ) )*X( I+1, J ) + WORK( I ) = BI - CX - DX - EX + RWORK( I ) = CABS1( BI ) + + $ CABS1( E( I-1 ) )*CABS1( X( I-1, J ) ) + + $ CABS1( DX ) + CABS1( E( I ) )* + $ CABS1( X( I+1, J ) ) + 40 CONTINUE + BI = B( N, J ) + CX = E( N-1 )*X( N-1, J ) + DX = D( N )*X( N, J ) + WORK( N ) = BI - CX - DX + RWORK( N ) = CABS1( BI ) + CABS1( E( N-1 ) )* + $ CABS1( X( N-1, J ) ) + CABS1( DX ) + END IF + END IF +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + S = ZERO + DO 50 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 50 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL CPTTRS( UPLO, N, 1, DF, EF, WORK, N, INFO ) + CALL CAXPY( N, CMPLX( ONE ), WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* + DO 60 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 60 CONTINUE + IX = ISAMAX( N, RWORK, 1 ) + FERR( J ) = RWORK( IX ) +* +* Estimate the norm of inv(A). +* +* Solve M(A) * x = e, where M(A) = (m(i,j)) is given by +* +* m(i,j) = abs(A(i,j)), i = j, +* m(i,j) = -abs(A(i,j)), i .ne. j, +* +* and e = [ 1, 1, ..., 1 ]'. Note M(A) = M(L)*D*M(L)'. +* +* Solve M(L) * x = e. +* + RWORK( 1 ) = ONE + DO 70 I = 2, N + RWORK( I ) = ONE + RWORK( I-1 )*ABS( EF( I-1 ) ) + 70 CONTINUE +* +* Solve D * M(L)' * x = b. +* + RWORK( N ) = RWORK( N ) / DF( N ) + DO 80 I = N - 1, 1, -1 + RWORK( I ) = RWORK( I ) / DF( I ) + + $ RWORK( I+1 )*ABS( EF( I ) ) + 80 CONTINUE +* +* Compute norm(inv(A)) = max(x(i)), 1<=i<=n. +* + IX = ISAMAX( N, RWORK, 1 ) + FERR( J ) = FERR( J )*ABS( RWORK( IX ) ) +* +* Normalize error. +* + LSTRES = ZERO + DO 90 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 90 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 100 CONTINUE +* + RETURN +* +* End of CPTRFS +* + END diff --git a/costa/native/external/lapack/cptsv.f b/costa/native/external/lapack/cptsv.f new file mode 100644 index 000000000..16172d0ea --- /dev/null +++ b/costa/native/external/lapack/cptsv.f @@ -0,0 +1,101 @@ + SUBROUTINE CPTSV( N, NRHS, D, E, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 25, 1997 +* +* .. Scalar Arguments .. + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL D( * ) + COMPLEX B( LDB, * ), E( * ) +* .. +* +* Purpose +* ======= +* +* CPTSV computes the solution to a complex system of linear equations +* A*X = B, where A is an N-by-N Hermitian positive definite tridiagonal +* matrix, and X and B are N-by-NRHS matrices. +* +* A is factored as A = L*D*L**H, and the factored form of A is then +* used to solve the system of equations. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* D (input/output) REAL array, dimension (N) +* On entry, the n diagonal elements of the tridiagonal matrix +* A. On exit, the n diagonal elements of the diagonal matrix +* D from the factorization A = L*D*L**H. +* +* E (input/output) COMPLEX array, dimension (N-1) +* On entry, the (n-1) subdiagonal elements of the tridiagonal +* matrix A. On exit, the (n-1) subdiagonal elements of the +* unit bidiagonal factor L from the L*D*L**H factorization of +* A. E can also be regarded as the superdiagonal of the unit +* bidiagonal factor U from the U**H*D*U factorization of A. +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the leading minor of order i is not +* positive definite, and the solution has not been +* computed. The factorization has not been completed +* unless i = N. +* +* ===================================================================== +* +* .. External Subroutines .. + EXTERNAL CPTTRF, CPTTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPTSV ', -INFO ) + RETURN + END IF +* +* Compute the L*D*L' (or U'*D*U) factorization of A. +* + CALL CPTTRF( N, D, E, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL CPTTRS( 'Lower', N, NRHS, D, E, B, LDB, INFO ) + END IF + RETURN +* +* End of CPTSV +* + END diff --git a/costa/native/external/lapack/cptsvx.f b/costa/native/external/lapack/cptsvx.f new file mode 100644 index 000000000..7ae483786 --- /dev/null +++ b/costa/native/external/lapack/cptsvx.f @@ -0,0 +1,238 @@ + SUBROUTINE CPTSVX( FACT, N, NRHS, D, E, DF, EF, B, LDB, X, LDX, + $ RCOND, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER FACT + INTEGER INFO, LDB, LDX, N, NRHS + REAL RCOND +* .. +* .. Array Arguments .. + REAL BERR( * ), D( * ), DF( * ), FERR( * ), + $ RWORK( * ) + COMPLEX B( LDB, * ), E( * ), EF( * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* CPTSVX uses the factorization A = L*D*L**H to compute the solution +* to a complex system of linear equations A*X = B, where A is an +* N-by-N Hermitian positive definite tridiagonal matrix and X and B +* are N-by-NRHS matrices. +* +* Error bounds on the solution and a condition estimate are also +* provided. +* +* Description +* =========== +* +* The following steps are performed: +* +* 1. If FACT = 'N', the matrix A is factored as A = L*D*L**H, where L +* is a unit lower bidiagonal matrix and D is diagonal. The +* factorization can also be regarded as having the form +* A = U**H*D*U. +* +* 2. If the leading i-by-i principal minor is not positive definite, +* then the routine returns with INFO = i. Otherwise, the factored +* form of A is used to estimate the condition number of the matrix +* A. If the reciprocal of the condition number is less than machine +* precision, INFO = N+1 is returned as a warning, but the routine +* still goes on to solve for X and compute error bounds as +* described below. +* +* 3. The system of equations is solved for X using the factored form +* of A. +* +* 4. Iterative refinement is applied to improve the computed solution +* matrix and calculate error bounds and backward error estimates +* for it. +* +* Arguments +* ========= +* +* FACT (input) CHARACTER*1 +* Specifies whether or not the factored form of the matrix +* A is supplied on entry. +* = 'F': On entry, DF and EF contain the factored form of A. +* D, E, DF, and EF will not be modified. +* = 'N': The matrix A will be copied to DF and EF and +* factored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* D (input) REAL array, dimension (N) +* The n diagonal elements of the tridiagonal matrix A. +* +* E (input) COMPLEX array, dimension (N-1) +* The (n-1) subdiagonal elements of the tridiagonal matrix A. +* +* DF (input or output) REAL array, dimension (N) +* If FACT = 'F', then DF is an input argument and on entry +* contains the n diagonal elements of the diagonal matrix D +* from the L*D*L**H factorization of A. +* If FACT = 'N', then DF is an output argument and on exit +* contains the n diagonal elements of the diagonal matrix D +* from the L*D*L**H factorization of A. +* +* EF (input or output) COMPLEX array, dimension (N-1) +* If FACT = 'F', then EF is an input argument and on entry +* contains the (n-1) subdiagonal elements of the unit +* bidiagonal factor L from the L*D*L**H factorization of A. +* If FACT = 'N', then EF is an output argument and on exit +* contains the (n-1) subdiagonal elements of the unit +* bidiagonal factor L from the L*D*L**H factorization of A. +* +* B (input) COMPLEX array, dimension (LDB,NRHS) +* The N-by-NRHS right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (output) COMPLEX array, dimension (LDX,NRHS) +* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* RCOND (output) REAL +* The reciprocal condition number of the matrix A. If RCOND +* is less than the machine precision (in particular, if +* RCOND = 0), the matrix is singular to working precision. +* This condition is indicated by a return code of INFO > 0. +* +* FERR (output) REAL array, dimension (NRHS) +* The forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in any +* element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX array, dimension (N) +* +* RWORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is +* <= N: the leading minor of order i of A is +* not positive definite, so the factorization +* could not be completed, and the solution has not +* been computed. RCOND = 0 is returned. +* = N+1: U is nonsingular, but RCOND is less than machine +* precision, meaning that the matrix is singular +* to working precision. Nevertheless, the +* solution and error bounds are computed because +* there are a number of situations where the +* computed solution can be more accurate than the +* value of RCOND would suggest. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOFACT + REAL ANORM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANHT, SLAMCH + EXTERNAL LSAME, CLANHT, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CLACPY, CPTCON, CPTRFS, CPTTRF, CPTTRS, + $ SCOPY, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPTSVX', -INFO ) + RETURN + END IF +* + IF( NOFACT ) THEN +* +* Compute the L*D*L' (or U'*D*U) factorization of A. +* + CALL SCOPY( N, D, 1, DF, 1 ) + IF( N.GT.1 ) + $ CALL CCOPY( N-1, E, 1, EF, 1 ) + CALL CPTTRF( N, DF, EF, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN + IF( INFO.GT.0 ) + $ RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = CLANHT( '1', N, D, E ) +* +* Compute the reciprocal of the condition number of A. +* + CALL CPTCON( N, DF, EF, ANORM, RCOND, RWORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* +* Compute the solution vectors X. +* + CALL CLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL CPTTRS( 'Lower', N, NRHS, DF, EF, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solutions and +* compute error bounds and backward error estimates for them. +* + CALL CPTRFS( 'Lower', N, NRHS, D, E, DF, EF, B, LDB, X, LDX, FERR, + $ BERR, WORK, RWORK, INFO ) +* + RETURN +* +* End of CPTSVX +* + END diff --git a/costa/native/external/lapack/cpttrf.f b/costa/native/external/lapack/cpttrf.f new file mode 100644 index 000000000..ac451bd70 --- /dev/null +++ b/costa/native/external/lapack/cpttrf.f @@ -0,0 +1,169 @@ + SUBROUTINE CPTTRF( N, D, E, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, N +* .. +* .. Array Arguments .. + REAL D( * ) + COMPLEX E( * ) +* .. +* +* Purpose +* ======= +* +* CPTTRF computes the L*D*L' factorization of a complex Hermitian +* positive definite tridiagonal matrix A. The factorization may also +* be regarded as having the form A = U'*D*U. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* D (input/output) REAL array, dimension (N) +* On entry, the n diagonal elements of the tridiagonal matrix +* A. On exit, the n diagonal elements of the diagonal matrix +* D from the L*D*L' factorization of A. +* +* E (input/output) COMPLEX array, dimension (N-1) +* On entry, the (n-1) subdiagonal elements of the tridiagonal +* matrix A. On exit, the (n-1) subdiagonal elements of the +* unit bidiagonal factor L from the L*D*L' factorization of A. +* E can also be regarded as the superdiagonal of the unit +* bidiagonal factor U from the U'*D*U factorization of A. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* > 0: if INFO = k, the leading minor of order k is not +* positive definite; if k < N, the factorization could not +* be completed, while if k = N, the factorization was +* completed, but D(N) = 0. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, I4 + REAL EII, EIR, F, G +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC AIMAG, CMPLX, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + CALL XERBLA( 'CPTTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Compute the L*D*L' (or U'*D*U) factorization of A. +* + I4 = MOD( N-1, 4 ) + DO 10 I = 1, I4 + IF( D( I ).LE.ZERO ) THEN + INFO = I + GO TO 20 + END IF + EIR = REAL( E( I ) ) + EII = AIMAG( E( I ) ) + F = EIR / D( I ) + G = EII / D( I ) + E( I ) = CMPLX( F, G ) + D( I+1 ) = D( I+1 ) - F*EIR - G*EII + 10 CONTINUE +* + DO 110 I = I4+1, N - 4, 4 +* +* Drop out of the loop if d(i) <= 0: the matrix is not positive +* definite. +* + IF( D( I ).LE.ZERO ) THEN + INFO = I + GO TO 20 + END IF +* +* Solve for e(i) and d(i+1). +* + EIR = REAL( E( I ) ) + EII = AIMAG( E( I ) ) + F = EIR / D( I ) + G = EII / D( I ) + E( I ) = CMPLX( F, G ) + D( I+1 ) = D( I+1 ) - F*EIR - G*EII +* + IF( D( I+1 ).LE.ZERO ) THEN + INFO = I+1 + GO TO 20 + END IF +* +* Solve for e(i+1) and d(i+2). +* + EIR = REAL( E( I+1 ) ) + EII = AIMAG( E( I+1 ) ) + F = EIR / D( I+1 ) + G = EII / D( I+1 ) + E( I+1 ) = CMPLX( F, G ) + D( I+2 ) = D( I+2 ) - F*EIR - G*EII +* + IF( D( I+2 ).LE.ZERO ) THEN + INFO = I+2 + GO TO 20 + END IF +* +* Solve for e(i+2) and d(i+3). +* + EIR = REAL( E( I+2 ) ) + EII = AIMAG( E( I+2 ) ) + F = EIR / D( I+2 ) + G = EII / D( I+2 ) + E( I+2 ) = CMPLX( F, G ) + D( I+3 ) = D( I+3 ) - F*EIR - G*EII +* + IF( D( I+3 ).LE.ZERO ) THEN + INFO = I+3 + GO TO 20 + END IF +* +* Solve for e(i+3) and d(i+4). +* + EIR = REAL( E( I+3 ) ) + EII = AIMAG( E( I+3 ) ) + F = EIR / D( I+3 ) + G = EII / D( I+3 ) + E( I+3 ) = CMPLX( F, G ) + D( I+4 ) = D( I+4 ) - F*EIR - G*EII + 110 CONTINUE +* +* Check d(n) for positive definiteness. +* + IF( D( N ).LE.ZERO ) + $ INFO = N +* + 20 CONTINUE + RETURN +* +* End of CPTTRF +* + END diff --git a/costa/native/external/lapack/cpttrs.f b/costa/native/external/lapack/cpttrs.f new file mode 100644 index 000000000..2438caf1c --- /dev/null +++ b/costa/native/external/lapack/cpttrs.f @@ -0,0 +1,136 @@ + SUBROUTINE CPTTRS( UPLO, N, NRHS, D, E, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL D( * ) + COMPLEX B( LDB, * ), E( * ) +* .. +* +* Purpose +* ======= +* +* CPTTRS solves a tridiagonal system of the form +* A * X = B +* using the factorization A = U'*D*U or A = L*D*L' computed by CPTTRF. +* D is a diagonal matrix specified in the vector D, U (or L) is a unit +* bidiagonal matrix whose superdiagonal (subdiagonal) is specified in +* the vector E, and X and B are N by NRHS matrices. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies the form of the factorization and whether the +* vector E is the superdiagonal of the upper bidiagonal factor +* U or the subdiagonal of the lower bidiagonal factor L. +* = 'U': A = U'*D*U, E is the superdiagonal of U +* = 'L': A = L*D*L', E is the subdiagonal of L +* +* N (input) INTEGER +* The order of the tridiagonal matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* D (input) REAL array, dimension (N) +* The n diagonal elements of the diagonal matrix D from the +* factorization A = U'*D*U or A = L*D*L'. +* +* E (input) COMPLEX array, dimension (N-1) +* If UPLO = 'U', the (n-1) superdiagonal elements of the unit +* bidiagonal factor U from the factorization A = U'*D*U. +* If UPLO = 'L', the (n-1) subdiagonal elements of the unit +* bidiagonal factor L from the factorization A = L*D*L'. +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the right hand side vectors B for the system of +* linear equations. +* On exit, the solution vectors, X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER + INTEGER IUPLO, J, JB, NB +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CPTTS2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + UPPER = ( UPLO.EQ.'U' .OR. UPLO.EQ.'u' ) + IF( .NOT.UPPER .AND. .NOT.( UPLO.EQ.'L' .OR. UPLO.EQ.'l' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPTTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* Determine the number of right-hand sides to solve at a time. +* + IF( NRHS.EQ.1 ) THEN + NB = 1 + ELSE + NB = MAX( 1, ILAENV( 1, 'CPTTRS', UPLO, N, NRHS, -1, -1 ) ) + END IF +* +* Decode UPLO +* + IF( UPPER ) THEN + IUPLO = 1 + ELSE + IUPLO = 0 + END IF +* + IF( NB.GE.NRHS ) THEN + CALL CPTTS2( IUPLO, N, NRHS, D, E, B, LDB ) + ELSE + DO 10 J = 1, NRHS, NB + JB = MIN( NRHS-J+1, NB ) + CALL CPTTS2( IUPLO, N, JB, D, E, B( 1, J ), LDB ) + 10 CONTINUE + END IF +* + RETURN +* +* End of CPTTRS +* + END diff --git a/costa/native/external/lapack/cptts2.f b/costa/native/external/lapack/cptts2.f new file mode 100644 index 000000000..00e731bdd --- /dev/null +++ b/costa/native/external/lapack/cptts2.f @@ -0,0 +1,177 @@ + SUBROUTINE CPTTS2( IUPLO, N, NRHS, D, E, B, LDB ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER IUPLO, LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL D( * ) + COMPLEX B( LDB, * ), E( * ) +* .. +* +* Purpose +* ======= +* +* CPTTS2 solves a tridiagonal system of the form +* A * X = B +* using the factorization A = U'*D*U or A = L*D*L' computed by CPTTRF. +* D is a diagonal matrix specified in the vector D, U (or L) is a unit +* bidiagonal matrix whose superdiagonal (subdiagonal) is specified in +* the vector E, and X and B are N by NRHS matrices. +* +* Arguments +* ========= +* +* IUPLO (input) INTEGER +* Specifies the form of the factorization and whether the +* vector E is the superdiagonal of the upper bidiagonal factor +* U or the subdiagonal of the lower bidiagonal factor L. +* = 1: A = U'*D*U, E is the superdiagonal of U +* = 0: A = L*D*L', E is the subdiagonal of L +* +* N (input) INTEGER +* The order of the tridiagonal matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* D (input) REAL array, dimension (N) +* The n diagonal elements of the diagonal matrix D from the +* factorization A = U'*D*U or A = L*D*L'. +* +* E (input) COMPLEX array, dimension (N-1) +* If IUPLO = 1, the (n-1) superdiagonal elements of the unit +* bidiagonal factor U from the factorization A = U'*D*U. +* If IUPLO = 0, the (n-1) subdiagonal elements of the unit +* bidiagonal factor L from the factorization A = L*D*L'. +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the right hand side vectors B for the system of +* linear equations. +* On exit, the solution vectors, X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Subroutines .. + EXTERNAL CSSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.1 ) THEN + IF( N.EQ.1 ) + $ CALL CSSCAL( NRHS, 1. / D( 1 ), B, LDB ) + RETURN + END IF +* + IF( IUPLO.EQ.1 ) THEN +* +* Solve A * X = B using the factorization A = U'*D*U, +* overwriting each right hand side vector with its solution. +* + IF( NRHS.LE.2 ) THEN + J = 1 + 5 CONTINUE +* +* Solve U' * x = b. +* + DO 10 I = 2, N + B( I, J ) = B( I, J ) - B( I-1, J )*CONJG( E( I-1 ) ) + 10 CONTINUE +* +* Solve D * U * x = b. +* + DO 20 I = 1, N + B( I, J ) = B( I, J ) / D( I ) + 20 CONTINUE + DO 30 I = N - 1, 1, -1 + B( I, J ) = B( I, J ) - B( I+1, J )*E( I ) + 30 CONTINUE + IF( J.LT.NRHS ) THEN + J = J + 1 + GO TO 5 + END IF + ELSE + DO 60 J = 1, NRHS +* +* Solve U' * x = b. +* + DO 40 I = 2, N + B( I, J ) = B( I, J ) - B( I-1, J )*CONJG( E( I-1 ) ) + 40 CONTINUE +* +* Solve D * U * x = b. +* + B( N, J ) = B( N, J ) / D( N ) + DO 50 I = N - 1, 1, -1 + B( I, J ) = B( I, J ) / D( I ) - B( I+1, J )*E( I ) + 50 CONTINUE + 60 CONTINUE + END IF + ELSE +* +* Solve A * X = B using the factorization A = L*D*L', +* overwriting each right hand side vector with its solution. +* + IF( NRHS.LE.2 ) THEN + J = 1 + 65 CONTINUE +* +* Solve L * x = b. +* + DO 70 I = 2, N + B( I, J ) = B( I, J ) - B( I-1, J )*E( I-1 ) + 70 CONTINUE +* +* Solve D * L' * x = b. +* + DO 80 I = 1, N + B( I, J ) = B( I, J ) / D( I ) + 80 CONTINUE + DO 90 I = N - 1, 1, -1 + B( I, J ) = B( I, J ) - B( I+1, J )*CONJG( E( I ) ) + 90 CONTINUE + IF( J.LT.NRHS ) THEN + J = J + 1 + GO TO 65 + END IF + ELSE + DO 120 J = 1, NRHS +* +* Solve L * x = b. +* + DO 100 I = 2, N + B( I, J ) = B( I, J ) - B( I-1, J )*E( I-1 ) + 100 CONTINUE +* +* Solve D * L' * x = b. +* + B( N, J ) = B( N, J ) / D( N ) + DO 110 I = N - 1, 1, -1 + B( I, J ) = B( I, J ) / D( I ) - + $ B( I+1, J )*CONJG( E( I ) ) + 110 CONTINUE + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of CPTTS2 +* + END diff --git a/costa/native/external/lapack/crot.f b/costa/native/external/lapack/crot.f new file mode 100644 index 000000000..4c3b26dc4 --- /dev/null +++ b/costa/native/external/lapack/crot.f @@ -0,0 +1,92 @@ + SUBROUTINE CROT( N, CX, INCX, CY, INCY, C, S ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + INTEGER INCX, INCY, N + REAL C + COMPLEX S +* .. +* .. Array Arguments .. + COMPLEX CX( * ), CY( * ) +* .. +* +* Purpose +* ======= +* +* CROT applies a plane rotation, where the cos (C) is real and the +* sin (S) is complex, and the vectors CX and CY are complex. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of elements in the vectors CX and CY. +* +* CX (input/output) COMPLEX array, dimension (N) +* On input, the vector X. +* On output, CX is overwritten with C*X + S*Y. +* +* INCX (input) INTEGER +* The increment between successive values of CY. INCX <> 0. +* +* CY (input/output) COMPLEX array, dimension (N) +* On input, the vector Y. +* On output, CY is overwritten with -CONJG(S)*X + C*Y. +* +* INCY (input) INTEGER +* The increment between successive values of CY. INCX <> 0. +* +* C (input) REAL +* S (input) COMPLEX +* C and S define a rotation +* [ C S ] +* [ -conjg(S) C ] +* where C*C + S*CONJG(S) = 1.0. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IX, IY + COMPLEX STEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) + $ RETURN + IF( INCX.EQ.1 .AND. INCY.EQ.1 ) + $ GO TO 20 +* +* Code for unequal increments or equal increments not equal to 1 +* + IX = 1 + IY = 1 + IF( INCX.LT.0 ) + $ IX = ( -N+1 )*INCX + 1 + IF( INCY.LT.0 ) + $ IY = ( -N+1 )*INCY + 1 + DO 10 I = 1, N + STEMP = C*CX( IX ) + S*CY( IY ) + CY( IY ) = C*CY( IY ) - CONJG( S )*CX( IX ) + CX( IX ) = STEMP + IX = IX + INCX + IY = IY + INCY + 10 CONTINUE + RETURN +* +* Code for both increments equal to 1 +* + 20 CONTINUE + DO 30 I = 1, N + STEMP = C*CX( I ) + S*CY( I ) + CY( I ) = C*CY( I ) - CONJG( S )*CX( I ) + CX( I ) = STEMP + 30 CONTINUE + RETURN + END diff --git a/costa/native/external/lapack/cspcon.f b/costa/native/external/lapack/cspcon.f new file mode 100644 index 000000000..a9df7975a --- /dev/null +++ b/costa/native/external/lapack/cspcon.f @@ -0,0 +1,155 @@ + SUBROUTINE CSPCON( UPLO, N, AP, IPIV, ANORM, RCOND, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX AP( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CSPCON estimates the reciprocal of the condition number (in the +* 1-norm) of a complex symmetric packed matrix A using the +* factorization A = U*D*U**T or A = L*D*L**T computed by CSPTRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the details of the factorization are stored +* as an upper or lower triangular matrix. +* = 'U': Upper triangular, form is A = U*D*U**T; +* = 'L': Lower triangular, form is A = L*D*L**T. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input) COMPLEX array, dimension (N*(N+1)/2) +* The block diagonal matrix D and the multipliers used to +* obtain the factor U or L as computed by CSPTRF, stored as a +* packed triangular matrix. +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by CSPTRF. +* +* ANORM (input) REAL +* The 1-norm of the original matrix A. +* +* RCOND (output) REAL +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +* estimate of the 1-norm of inv(A) computed in this routine. +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IP, KASE + REAL AINVNM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLACON, CSPTRS, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSPCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.LE.ZERO ) THEN + RETURN + END IF +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + IP = N*( N+1 ) / 2 + DO 10 I = N, 1, -1 + IF( IPIV( I ).GT.0 .AND. AP( IP ).EQ.ZERO ) + $ RETURN + IP = IP - I + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + IP = 1 + DO 20 I = 1, N + IF( IPIV( I ).GT.0 .AND. AP( IP ).EQ.ZERO ) + $ RETURN + IP = IP + N - I + 1 + 20 CONTINUE + END IF +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + 30 CONTINUE + CALL CLACON( N, WORK( N+1 ), WORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN +* +* Multiply by inv(L*D*L') or inv(U*D*U'). +* + CALL CSPTRS( UPLO, N, 1, AP, IPIV, WORK, N, INFO ) + GO TO 30 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of CSPCON +* + END diff --git a/costa/native/external/lapack/cspmv.f b/costa/native/external/lapack/cspmv.f new file mode 100644 index 000000000..cf8d38865 --- /dev/null +++ b/costa/native/external/lapack/cspmv.f @@ -0,0 +1,265 @@ + SUBROUTINE CSPMV( UPLO, N, ALPHA, AP, X, INCX, BETA, Y, INCY ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INCX, INCY, N + COMPLEX ALPHA, BETA +* .. +* .. Array Arguments .. + COMPLEX AP( * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* CSPMV performs the matrix-vector operation +* +* y := alpha*A*x + beta*y, +* +* where alpha and beta are scalars, x and y are n element vectors and +* A is an n by n symmetric matrix, supplied in packed form. +* +* Arguments +* ========== +* +* UPLO - CHARACTER*1 +* On entry, UPLO specifies whether the upper or lower +* triangular part of the matrix A is supplied in the packed +* array AP as follows: +* +* UPLO = 'U' or 'u' The upper triangular part of A is +* supplied in AP. +* +* UPLO = 'L' or 'l' The lower triangular part of A is +* supplied in AP. +* +* Unchanged on exit. +* +* N - INTEGER +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* AP - COMPLEX array, dimension at least +* ( ( N*( N + 1 ) )/2 ). +* Before entry, with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular part of the symmetric matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) +* and a( 2, 2 ) respectively, and so on. +* Before entry, with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular part of the symmetric matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) +* and a( 3, 1 ) respectively, and so on. +* Unchanged on exit. +* +* X - COMPLEX array, dimension at least +* ( 1 + ( N - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the N- +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - COMPLEX +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then Y need not be set on input. +* Unchanged on exit. +* +* Y - COMPLEX array, dimension at least +* ( 1 + ( N - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. On exit, Y is overwritten by the updated +* vector y. +* +* INCY - INTEGER +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, IX, IY, J, JX, JY, K, KK, KX, KY + COMPLEX TEMP1, TEMP2 +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = 1 + ELSE IF( N.LT.0 ) THEN + INFO = 2 + ELSE IF( INCX.EQ.0 ) THEN + INFO = 6 + ELSE IF( INCY.EQ.0 ) THEN + INFO = 9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSPMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ) .OR. ( ( ALPHA.EQ.ZERO ) .AND. ( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set up the start points in X and Y. +* + IF( INCX.GT.0 ) THEN + KX = 1 + ELSE + KX = 1 - ( N-1 )*INCX + END IF + IF( INCY.GT.0 ) THEN + KY = 1 + ELSE + KY = 1 - ( N-1 )*INCY + END IF +* +* Start the operations. In this version the elements of the array AP +* are accessed sequentially with one pass through AP. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE ) THEN + IF( INCY.EQ.1 ) THEN + IF( BETA.EQ.ZERO ) THEN + DO 10 I = 1, N + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20 I = 1, N + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO ) THEN + DO 30 I = 1, N + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40 I = 1, N + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + KK = 1 + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Form y when AP contains the upper triangle. +* + IF( ( INCX.EQ.1 ) .AND. ( INCY.EQ.1 ) ) THEN + DO 60 J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + K = KK + DO 50 I = 1, J - 1 + Y( I ) = Y( I ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + AP( K )*X( I ) + K = K + 1 + 50 CONTINUE + Y( J ) = Y( J ) + TEMP1*AP( KK+J-1 ) + ALPHA*TEMP2 + KK = KK + J + 60 CONTINUE + ELSE + JX = KX + JY = KY + DO 80 J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + IX = KX + IY = KY + DO 70 K = KK, KK + J - 2 + Y( IY ) = Y( IY ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + AP( K )*X( IX ) + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + Y( JY ) = Y( JY ) + TEMP1*AP( KK+J-1 ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + KK = KK + J + 80 CONTINUE + END IF + ELSE +* +* Form y when AP contains the lower triangle. +* + IF( ( INCX.EQ.1 ) .AND. ( INCY.EQ.1 ) ) THEN + DO 100 J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + Y( J ) = Y( J ) + TEMP1*AP( KK ) + K = KK + 1 + DO 90 I = J + 1, N + Y( I ) = Y( I ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + AP( K )*X( I ) + K = K + 1 + 90 CONTINUE + Y( J ) = Y( J ) + ALPHA*TEMP2 + KK = KK + ( N-J+1 ) + 100 CONTINUE + ELSE + JX = KX + JY = KY + DO 120 J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + Y( JY ) = Y( JY ) + TEMP1*AP( KK ) + IX = JX + IY = JY + DO 110 K = KK + 1, KK + N - J + IX = IX + INCX + IY = IY + INCY + Y( IY ) = Y( IY ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + AP( K )*X( IX ) + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + KK = KK + ( N-J+1 ) + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of CSPMV +* + END diff --git a/costa/native/external/lapack/cspr.f b/costa/native/external/lapack/cspr.f new file mode 100644 index 000000000..45c505171 --- /dev/null +++ b/costa/native/external/lapack/cspr.f @@ -0,0 +1,214 @@ + SUBROUTINE CSPR( UPLO, N, ALPHA, X, INCX, AP ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INCX, N + COMPLEX ALPHA +* .. +* .. Array Arguments .. + COMPLEX AP( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* CSPR performs the symmetric rank 1 operation +* +* A := alpha*x*conjg( x' ) + A, +* +* where alpha is a complex scalar, x is an n element vector and A is an +* n by n symmetric matrix, supplied in packed form. +* +* Arguments +* ========== +* +* UPLO - CHARACTER*1 +* On entry, UPLO specifies whether the upper or lower +* triangular part of the matrix A is supplied in the packed +* array AP as follows: +* +* UPLO = 'U' or 'u' The upper triangular part of A is +* supplied in AP. +* +* UPLO = 'L' or 'l' The lower triangular part of A is +* supplied in AP. +* +* Unchanged on exit. +* +* N - INTEGER +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - COMPLEX array, dimension at least +* ( 1 + ( N - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the N- +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* AP - COMPLEX array, dimension at least +* ( ( N*( N + 1 ) )/2 ). +* Before entry, with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular part of the symmetric matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) +* and a( 2, 2 ) respectively, and so on. On exit, the array +* AP is overwritten by the upper triangular part of the +* updated matrix. +* Before entry, with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular part of the symmetric matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) +* and a( 3, 1 ) respectively, and so on. On exit, the array +* AP is overwritten by the lower triangular part of the +* updated matrix. +* Note that the imaginary parts of the diagonal elements need +* not be set, they are assumed to be zero, and on exit they +* are set to zero. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, IX, J, JX, K, KK, KX + COMPLEX TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = 1 + ELSE IF( N.LT.0 ) THEN + INFO = 2 + ELSE IF( INCX.EQ.0 ) THEN + INFO = 5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSPR ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ) .OR. ( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Set the start point in X if the increment is not unity. +* + IF( INCX.LE.0 ) THEN + KX = 1 - ( N-1 )*INCX + ELSE IF( INCX.NE.1 ) THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of the array AP +* are accessed sequentially with one pass through AP. +* + KK = 1 + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Form A when upper triangle is stored in AP. +* + IF( INCX.EQ.1 ) THEN + DO 20 J = 1, N + IF( X( J ).NE.ZERO ) THEN + TEMP = ALPHA*X( J ) + K = KK + DO 10 I = 1, J - 1 + AP( K ) = AP( K ) + X( I )*TEMP + K = K + 1 + 10 CONTINUE + AP( KK+J-1 ) = AP( KK+J-1 ) + X( J )*TEMP + ELSE + AP( KK+J-1 ) = AP( KK+J-1 ) + END IF + KK = KK + J + 20 CONTINUE + ELSE + JX = KX + DO 40 J = 1, N + IF( X( JX ).NE.ZERO ) THEN + TEMP = ALPHA*X( JX ) + IX = KX + DO 30 K = KK, KK + J - 2 + AP( K ) = AP( K ) + X( IX )*TEMP + IX = IX + INCX + 30 CONTINUE + AP( KK+J-1 ) = AP( KK+J-1 ) + X( JX )*TEMP + ELSE + AP( KK+J-1 ) = AP( KK+J-1 ) + END IF + JX = JX + INCX + KK = KK + J + 40 CONTINUE + END IF + ELSE +* +* Form A when lower triangle is stored in AP. +* + IF( INCX.EQ.1 ) THEN + DO 60 J = 1, N + IF( X( J ).NE.ZERO ) THEN + TEMP = ALPHA*X( J ) + AP( KK ) = AP( KK ) + TEMP*X( J ) + K = KK + 1 + DO 50 I = J + 1, N + AP( K ) = AP( K ) + X( I )*TEMP + K = K + 1 + 50 CONTINUE + ELSE + AP( KK ) = AP( KK ) + END IF + KK = KK + N - J + 1 + 60 CONTINUE + ELSE + JX = KX + DO 80 J = 1, N + IF( X( JX ).NE.ZERO ) THEN + TEMP = ALPHA*X( JX ) + AP( KK ) = AP( KK ) + TEMP*X( JX ) + IX = JX + DO 70 K = KK + 1, KK + N - J + IX = IX + INCX + AP( K ) = AP( K ) + X( IX )*TEMP + 70 CONTINUE + ELSE + AP( KK ) = AP( KK ) + END IF + JX = JX + INCX + KK = KK + N - J + 1 + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of CSPR +* + END diff --git a/costa/native/external/lapack/csprfs.f b/costa/native/external/lapack/csprfs.f new file mode 100644 index 000000000..ae5c18e34 --- /dev/null +++ b/costa/native/external/lapack/csprfs.f @@ -0,0 +1,336 @@ + SUBROUTINE CSPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX, + $ FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL BERR( * ), FERR( * ), RWORK( * ) + COMPLEX AFP( * ), AP( * ), B( LDB, * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* CSPRFS improves the computed solution to a system of linear +* equations when the coefficient matrix is symmetric indefinite +* and packed, and provides error bounds and backward error estimates +* for the solution. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AP (input) COMPLEX array, dimension (N*(N+1)/2) +* The upper or lower triangle of the symmetric matrix A, packed +* columnwise in a linear array. The j-th column of A is stored +* in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* +* AFP (input) COMPLEX array, dimension (N*(N+1)/2) +* The factored form of the matrix A. AFP contains the block +* diagonal matrix D and the multipliers used to obtain the +* factor U or L from the factorization A = U*D*U**T or +* A = L*D*L**T as computed by CSPTRF, stored as a packed +* triangular matrix. +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by CSPTRF. +* +* B (input) COMPLEX array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input/output) COMPLEX array, dimension (LDX,NRHS) +* On entry, the solution matrix X, as computed by CSPTRS. +* On exit, the improved solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* RWORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Internal Parameters +* =================== +* +* ITMAX is the maximum number of steps of iterative refinement. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) + REAL THREE + PARAMETER ( THREE = 3.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, IK, J, K, KASE, KK, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX ZDUM +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CLACON, CSPMV, CSPTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSPRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL CCOPY( N, B( 1, J ), 1, WORK, 1 ) + CALL CSPMV( UPLO, N, -ONE, AP, X( 1, J ), 1, ONE, WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + KK = 1 + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + IK = KK + DO 40 I = 1, K - 1 + RWORK( I ) = RWORK( I ) + CABS1( AP( IK ) )*XK + S = S + CABS1( AP( IK ) )*CABS1( X( I, J ) ) + IK = IK + 1 + 40 CONTINUE + RWORK( K ) = RWORK( K ) + CABS1( AP( KK+K-1 ) )*XK + S + KK = KK + K + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + RWORK( K ) = RWORK( K ) + CABS1( AP( KK ) )*XK + IK = KK + 1 + DO 60 I = K + 1, N + RWORK( I ) = RWORK( I ) + CABS1( AP( IK ) )*XK + S = S + CABS1( AP( IK ) )*CABS1( X( I, J ) ) + IK = IK + 1 + 60 CONTINUE + RWORK( K ) = RWORK( K ) + S + KK = KK + ( N-K+1 ) + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL CSPTRS( UPLO, N, 1, AFP, IPIV, WORK, N, INFO ) + CALL CAXPY( N, ONE, WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use CLACON to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL CLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A'). +* + CALL CSPTRS( UPLO, N, 1, AFP, IPIV, WORK, N, INFO ) + DO 110 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 120 CONTINUE + CALL CSPTRS( UPLO, N, 1, AFP, IPIV, WORK, N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of CSPRFS +* + END diff --git a/costa/native/external/lapack/cspsv.f b/costa/native/external/lapack/cspsv.f new file mode 100644 index 000000000..7c9c86be8 --- /dev/null +++ b/costa/native/external/lapack/cspsv.f @@ -0,0 +1,149 @@ + SUBROUTINE CSPSV( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX AP( * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* CSPSV computes the solution to a complex system of linear equations +* A * X = B, +* where A is an N-by-N symmetric matrix stored in packed format and X +* and B are N-by-NRHS matrices. +* +* The diagonal pivoting method is used to factor A as +* A = U * D * U**T, if UPLO = 'U', or +* A = L * D * L**T, if UPLO = 'L', +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices, D is symmetric and block diagonal with 1-by-1 +* and 2-by-2 diagonal blocks. The factored form of A is then used to +* solve the system of equations A * X = B. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AP (input/output) COMPLEX array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the symmetric matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* See below for further details. +* +* On exit, the block diagonal matrix D and the multipliers used +* to obtain the factor U or L from the factorization +* A = U*D*U**T or A = L*D*L**T as computed by CSPTRF, stored as +* a packed triangular matrix in the same storage format as A. +* +* IPIV (output) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D, as +* determined by CSPTRF. If IPIV(k) > 0, then rows and columns +* k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1 +* diagonal block. If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, +* then rows and columns k-1 and -IPIV(k) were interchanged and +* D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If UPLO = 'L' and +* IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and +* -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 +* diagonal block. +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, D(i,i) is exactly zero. The factorization +* has been completed, but the block diagonal matrix D is +* exactly singular, so the solution could not be +* computed. +* +* Further Details +* =============== +* +* The packed storage scheme is illustrated by the following example +* when N = 4, UPLO = 'U': +* +* Two-dimensional storage of the symmetric matrix A: +* +* a11 a12 a13 a14 +* a22 a23 a24 +* a33 a34 (aij = aji) +* a44 +* +* Packed storage of the upper triangle of A: +* +* AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CSPTRF, CSPTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSPSV ', -INFO ) + RETURN + END IF +* +* Compute the factorization A = U*D*U' or A = L*D*L'. +* + CALL CSPTRF( UPLO, N, AP, IPIV, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL CSPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* + END IF + RETURN +* +* End of CSPSV +* + END diff --git a/costa/native/external/lapack/cspsvx.f b/costa/native/external/lapack/cspsvx.f new file mode 100644 index 000000000..ea7b53177 --- /dev/null +++ b/costa/native/external/lapack/cspsvx.f @@ -0,0 +1,279 @@ + SUBROUTINE CSPSVX( FACT, UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, + $ LDX, RCOND, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER FACT, UPLO + INTEGER INFO, LDB, LDX, N, NRHS + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL BERR( * ), FERR( * ), RWORK( * ) + COMPLEX AFP( * ), AP( * ), B( LDB, * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* CSPSVX uses the diagonal pivoting factorization A = U*D*U**T or +* A = L*D*L**T to compute the solution to a complex system of linear +* equations A * X = B, where A is an N-by-N symmetric matrix stored +* in packed format and X and B are N-by-NRHS matrices. +* +* Error bounds on the solution and a condition estimate are also +* provided. +* +* Description +* =========== +* +* The following steps are performed: +* +* 1. If FACT = 'N', the diagonal pivoting method is used to factor A as +* A = U * D * U**T, if UPLO = 'U', or +* A = L * D * L**T, if UPLO = 'L', +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices and D is symmetric and block diagonal with +* 1-by-1 and 2-by-2 diagonal blocks. +* +* 2. If some D(i,i)=0, so that D is exactly singular, then the routine +* returns with INFO = i. Otherwise, the factored form of A is used +* to estimate the condition number of the matrix A. If the +* reciprocal of the condition number is less than machine precision, +* INFO = N+1 is returned as a warning, but the routine still goes on +* to solve for X and compute error bounds as described below. +* +* 3. The system of equations is solved for X using the factored form +* of A. +* +* 4. Iterative refinement is applied to improve the computed solution +* matrix and calculate error bounds and backward error estimates +* for it. +* +* Arguments +* ========= +* +* FACT (input) CHARACTER*1 +* Specifies whether or not the factored form of A has been +* supplied on entry. +* = 'F': On entry, AFP and IPIV contain the factored form +* of A. AP, AFP and IPIV will not be modified. +* = 'N': The matrix A will be copied to AFP and factored. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AP (input) COMPLEX array, dimension (N*(N+1)/2) +* The upper or lower triangle of the symmetric matrix A, packed +* columnwise in a linear array. The j-th column of A is stored +* in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* See below for further details. +* +* AFP (input or output) COMPLEX array, dimension (N*(N+1)/2) +* If FACT = 'F', then AFP is an input argument and on entry +* contains the block diagonal matrix D and the multipliers used +* to obtain the factor U or L from the factorization +* A = U*D*U**T or A = L*D*L**T as computed by CSPTRF, stored as +* a packed triangular matrix in the same storage format as A. +* +* If FACT = 'N', then AFP is an output argument and on exit +* contains the block diagonal matrix D and the multipliers used +* to obtain the factor U or L from the factorization +* A = U*D*U**T or A = L*D*L**T as computed by CSPTRF, stored as +* a packed triangular matrix in the same storage format as A. +* +* IPIV (input or output) INTEGER array, dimension (N) +* If FACT = 'F', then IPIV is an input argument and on entry +* contains details of the interchanges and the block structure +* of D, as determined by CSPTRF. +* If IPIV(k) > 0, then rows and columns k and IPIV(k) were +* interchanged and D(k,k) is a 1-by-1 diagonal block. +* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +* +* If FACT = 'N', then IPIV is an output argument and on exit +* contains details of the interchanges and the block structure +* of D, as determined by CSPTRF. +* +* B (input) COMPLEX array, dimension (LDB,NRHS) +* The N-by-NRHS right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (output) COMPLEX array, dimension (LDX,NRHS) +* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* RCOND (output) REAL +* The estimate of the reciprocal condition number of the matrix +* A. If RCOND is less than the machine precision (in +* particular, if RCOND = 0), the matrix is singular to working +* precision. This condition is indicated by a return code of +* INFO > 0. +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* RWORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is +* <= N: D(i,i) is exactly zero. The factorization +* has been completed but the factor D is exactly +* singular, so the solution and error bounds could +* not be computed. RCOND = 0 is returned. +* = N+1: D is nonsingular, but RCOND is less than machine +* precision, meaning that the matrix is singular +* to working precision. Nevertheless, the +* solution and error bounds are computed because +* there are a number of situations where the +* computed solution can be more accurate than the +* value of RCOND would suggest. +* +* Further Details +* =============== +* +* The packed storage scheme is illustrated by the following example +* when N = 4, UPLO = 'U': +* +* Two-dimensional storage of the symmetric matrix A: +* +* a11 a12 a13 a14 +* a22 a23 a24 +* a33 a34 (aij = aji) +* a44 +* +* Packed storage of the upper triangle of A: +* +* AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOFACT + REAL ANORM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANSP, SLAMCH + EXTERNAL LSAME, CLANSP, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CLACPY, CSPCON, CSPRFS, CSPTRF, CSPTRS, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSPSVX', -INFO ) + RETURN + END IF +* + IF( NOFACT ) THEN +* +* Compute the factorization A = U*D*U' or A = L*D*L'. +* + CALL CCOPY( N*( N+1 ) / 2, AP, 1, AFP, 1 ) + CALL CSPTRF( UPLO, N, AFP, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN + IF( INFO.GT.0 ) + $ RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = CLANSP( 'I', UPLO, N, AP, RWORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL CSPCON( UPLO, N, AFP, IPIV, ANORM, RCOND, WORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* +* Compute the solution vectors X. +* + CALL CLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL CSPTRS( UPLO, N, NRHS, AFP, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solutions and +* compute error bounds and backward error estimates for them. +* + CALL CSPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX, FERR, + $ BERR, WORK, RWORK, INFO ) +* + RETURN +* +* End of CSPSVX +* + END diff --git a/costa/native/external/lapack/csptrf.f b/costa/native/external/lapack/csptrf.f new file mode 100644 index 000000000..7183cda51 --- /dev/null +++ b/costa/native/external/lapack/csptrf.f @@ -0,0 +1,556 @@ + SUBROUTINE CSPTRF( UPLO, N, AP, IPIV, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX AP( * ) +* .. +* +* Purpose +* ======= +* +* CSPTRF computes the factorization of a complex symmetric matrix A +* stored in packed format using the Bunch-Kaufman diagonal pivoting +* method: +* +* A = U*D*U**T or A = L*D*L**T +* +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices, and D is symmetric and block diagonal with +* 1-by-1 and 2-by-2 diagonal blocks. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) COMPLEX array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the symmetric matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* +* On exit, the block diagonal matrix D and the multipliers used +* to obtain the factor U or L, stored as a packed triangular +* matrix overwriting A (see below for further details). +* +* IPIV (output) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D. +* If IPIV(k) > 0, then rows and columns k and IPIV(k) were +* interchanged and D(k,k) is a 1-by-1 diagonal block. +* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, D(i,i) is exactly zero. The factorization +* has been completed, but the block diagonal matrix D is +* exactly singular, and division by zero will occur if it +* is used to solve a system of equations. +* +* Further Details +* =============== +* +* 5-96 - Based on modifications by J. Lewis, Boeing Computer Services +* Company +* +* If UPLO = 'U', then A = U*D*U', where +* U = P(n)*U(n)* ... *P(k)U(k)* ..., +* i.e., U is a product of terms P(k)*U(k), where k decreases from n to +* 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +* defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +* that if the diagonal block D(k) is of order s (s = 1 or 2), then +* +* ( I v 0 ) k-s +* U(k) = ( 0 I 0 ) s +* ( 0 0 I ) n-k +* k-s s n-k +* +* If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +* If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +* and A(k,k), and v overwrites A(1:k-2,k-1:k). +* +* If UPLO = 'L', then A = L*D*L', where +* L = P(1)*L(1)* ... *P(k)*L(k)* ..., +* i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +* n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +* defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +* that if the diagonal block D(k) is of order s (s = 1 or 2), then +* +* ( I 0 0 ) k-1 +* L(k) = ( 0 I 0 ) s +* ( 0 v I ) n-k-s+1 +* k-1 s n-k-s+1 +* +* If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +* If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +* and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0E+0, SEVTEN = 17.0E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IMAX, J, JMAX, K, KC, KK, KNC, KP, KPC, + $ KSTEP, KX, NPP + REAL ABSAKK, ALPHA, COLMAX, ROWMAX + COMPLEX D11, D12, D21, D22, R1, T, WK, WKM1, WKP1, ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + EXTERNAL LSAME, ICAMAX +* .. +* .. External Subroutines .. + EXTERNAL CSCAL, CSPR, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL, SQRT +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSPTRF', -INFO ) + RETURN + END IF +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U' using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2 +* + K = N + KC = ( N-1 )*N / 2 + 1 + 10 CONTINUE + KNC = KC +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 110 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = CABS1( AP( KC+K-1 ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.GT.1 ) THEN + IMAX = ICAMAX( K-1, AP( KC ), 1 ) + COLMAX = CABS1( AP( KC+IMAX-1 ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + ROWMAX = ZERO + JMAX = IMAX + KX = IMAX*( IMAX+1 ) / 2 + IMAX + DO 20 J = IMAX + 1, K + IF( CABS1( AP( KX ) ).GT.ROWMAX ) THEN + ROWMAX = CABS1( AP( KX ) ) + JMAX = J + END IF + KX = KX + J + 20 CONTINUE + KPC = ( IMAX-1 )*IMAX / 2 + 1 + IF( IMAX.GT.1 ) THEN + JMAX = ICAMAX( IMAX-1, AP( KPC ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( AP( KPC+JMAX-1 ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( CABS1( AP( KPC+IMAX-1 ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K-1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K - KSTEP + 1 + IF( KSTEP.EQ.2 ) + $ KNC = KNC - K + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the leading +* submatrix A(1:k,1:k) +* + CALL CSWAP( KP-1, AP( KNC ), 1, AP( KPC ), 1 ) + KX = KPC + KP - 1 + DO 30 J = KP + 1, KK - 1 + KX = KX + J - 1 + T = AP( KNC+J-1 ) + AP( KNC+J-1 ) = AP( KX ) + AP( KX ) = T + 30 CONTINUE + T = AP( KNC+KK-1 ) + AP( KNC+KK-1 ) = AP( KPC+KP-1 ) + AP( KPC+KP-1 ) = T + IF( KSTEP.EQ.2 ) THEN + T = AP( KC+K-2 ) + AP( KC+K-2 ) = AP( KC+KP-1 ) + AP( KC+KP-1 ) = T + END IF + END IF +* +* Update the leading submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* +* Perform a rank-1 update of A(1:k-1,1:k-1) as +* +* A := A - U(k)*D(k)*U(k)' = A - W(k)*1/D(k)*W(k)' +* + R1 = CONE / AP( KC+K-1 ) + CALL CSPR( UPLO, K-1, -R1, AP( KC ), 1, AP ) +* +* Store U(k) in column k +* + CALL CSCAL( K-1, R1, AP( KC ), 1 ) + ELSE +* +* 2-by-2 pivot block D(k): columns k and k-1 now hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* Perform a rank-2 update of A(1:k-2,1:k-2) as +* +* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )' +* = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )' +* + IF( K.GT.2 ) THEN +* + D12 = AP( K-1+( K-1 )*K / 2 ) + D22 = AP( K-1+( K-2 )*( K-1 ) / 2 ) / D12 + D11 = AP( K+( K-1 )*K / 2 ) / D12 + T = CONE / ( D11*D22-CONE ) + D12 = T / D12 +* + DO 50 J = K - 2, 1, -1 + WKM1 = D12*( D11*AP( J+( K-2 )*( K-1 ) / 2 )- + $ AP( J+( K-1 )*K / 2 ) ) + WK = D12*( D22*AP( J+( K-1 )*K / 2 )- + $ AP( J+( K-2 )*( K-1 ) / 2 ) ) + DO 40 I = J, 1, -1 + AP( I+( J-1 )*J / 2 ) = AP( I+( J-1 )*J / 2 ) - + $ AP( I+( K-1 )*K / 2 )*WK - + $ AP( I+( K-2 )*( K-1 ) / 2 )*WKM1 + 40 CONTINUE + AP( J+( K-1 )*K / 2 ) = WK + AP( J+( K-2 )*( K-1 ) / 2 ) = WKM1 + 50 CONTINUE +* + END IF + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + KC = KNC - K + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L' using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2 +* + K = 1 + KC = 1 + NPP = N*( N+1 ) / 2 + 60 CONTINUE + KNC = KC +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 110 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = CABS1( AP( KC ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.LT.N ) THEN + IMAX = K + ICAMAX( N-K, AP( KC+1 ), 1 ) + COLMAX = CABS1( AP( KC+IMAX-K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + ROWMAX = ZERO + KX = KC + IMAX - K + DO 70 J = K, IMAX - 1 + IF( CABS1( AP( KX ) ).GT.ROWMAX ) THEN + ROWMAX = CABS1( AP( KX ) ) + JMAX = J + END IF + KX = KX + N - J + 70 CONTINUE + KPC = NPP - ( N-IMAX+1 )*( N-IMAX+2 ) / 2 + 1 + IF( IMAX.LT.N ) THEN + JMAX = IMAX + ICAMAX( N-IMAX, AP( KPC+1 ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( AP( KPC+JMAX-IMAX ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( CABS1( AP( KPC ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K+1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K + KSTEP - 1 + IF( KSTEP.EQ.2 ) + $ KNC = KNC + N - K + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the trailing +* submatrix A(k:n,k:n) +* + IF( KP.LT.N ) + $ CALL CSWAP( N-KP, AP( KNC+KP-KK+1 ), 1, AP( KPC+1 ), + $ 1 ) + KX = KNC + KP - KK + DO 80 J = KK + 1, KP - 1 + KX = KX + N - J + 1 + T = AP( KNC+J-KK ) + AP( KNC+J-KK ) = AP( KX ) + AP( KX ) = T + 80 CONTINUE + T = AP( KNC ) + AP( KNC ) = AP( KPC ) + AP( KPC ) = T + IF( KSTEP.EQ.2 ) THEN + T = AP( KC+1 ) + AP( KC+1 ) = AP( KC+KP-K ) + AP( KC+KP-K ) = T + END IF + END IF +* +* Update the trailing submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* + IF( K.LT.N ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* +* A := A - L(k)*D(k)*L(k)' = A - W(k)*(1/D(k))*W(k)' +* + R1 = CONE / AP( KC ) + CALL CSPR( UPLO, N-K, -R1, AP( KC+1 ), 1, + $ AP( KC+N-K+1 ) ) +* +* Store L(k) in column K +* + CALL CSCAL( N-K, R1, AP( KC+1 ), 1 ) + END IF + ELSE +* +* 2-by-2 pivot block D(k): columns K and K+1 now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* + IF( K.LT.N-1 ) THEN +* +* Perform a rank-2 update of A(k+2:n,k+2:n) as +* +* A := A - ( L(k) L(k+1) )*D(k)*( L(k) L(k+1) )' +* = A - ( W(k) W(k+1) )*inv(D(k))*( W(k) W(k+1) )' +* +* where L(k) and L(k+1) are the k-th and (k+1)-th +* columns of L +* + D21 = AP( K+1+( K-1 )*( 2*N-K ) / 2 ) + D11 = AP( K+1+K*( 2*N-K-1 ) / 2 ) / D21 + D22 = AP( K+( K-1 )*( 2*N-K ) / 2 ) / D21 + T = CONE / ( D11*D22-CONE ) + D21 = T / D21 +* + DO 100 J = K + 2, N + WK = D21*( D11*AP( J+( K-1 )*( 2*N-K ) / 2 )- + $ AP( J+K*( 2*N-K-1 ) / 2 ) ) + WKP1 = D21*( D22*AP( J+K*( 2*N-K-1 ) / 2 )- + $ AP( J+( K-1 )*( 2*N-K ) / 2 ) ) + DO 90 I = J, N + AP( I+( J-1 )*( 2*N-J ) / 2 ) = AP( I+( J-1 )* + $ ( 2*N-J ) / 2 ) - AP( I+( K-1 )*( 2*N-K ) / + $ 2 )*WK - AP( I+K*( 2*N-K-1 ) / 2 )*WKP1 + 90 CONTINUE + AP( J+( K-1 )*( 2*N-K ) / 2 ) = WK + AP( J+K*( 2*N-K-1 ) / 2 ) = WKP1 + 100 CONTINUE + END IF + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + KC = KNC + N - K + 2 + GO TO 60 +* + END IF +* + 110 CONTINUE + RETURN +* +* End of CSPTRF +* + END diff --git a/costa/native/external/lapack/csptri.f b/costa/native/external/lapack/csptri.f new file mode 100644 index 000000000..ace977f01 --- /dev/null +++ b/costa/native/external/lapack/csptri.f @@ -0,0 +1,338 @@ + SUBROUTINE CSPTRI( UPLO, N, AP, IPIV, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX AP( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CSPTRI computes the inverse of a complex symmetric indefinite matrix +* A in packed storage using the factorization A = U*D*U**T or +* A = L*D*L**T computed by CSPTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the details of the factorization are stored +* as an upper or lower triangular matrix. +* = 'U': Upper triangular, form is A = U*D*U**T; +* = 'L': Lower triangular, form is A = L*D*L**T. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) COMPLEX array, dimension (N*(N+1)/2) +* On entry, the block diagonal matrix D and the multipliers +* used to obtain the factor U or L as computed by CSPTRF, +* stored as a packed triangular matrix. +* +* On exit, if INFO = 0, the (symmetric) inverse of the original +* matrix, stored as a packed triangular matrix. The j-th column +* of inv(A) is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = inv(A)(i,j) for 1<=i<=j; +* if UPLO = 'L', +* AP(i + (j-1)*(2n-j)/2) = inv(A)(i,j) for j<=i<=n. +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by CSPTRF. +* +* WORK (workspace) COMPLEX array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +* inverse could not be computed. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KC, KCNEXT, KP, KPC, KSTEP, KX, NPP + COMPLEX AK, AKKP1, AKP1, D, T, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX CDOTU + EXTERNAL LSAME, CDOTU +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CSPMV, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSPTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + KP = N*( N+1 ) / 2 + DO 10 INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. AP( KP ).EQ.ZERO ) + $ RETURN + KP = KP - INFO + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + KP = 1 + DO 20 INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. AP( KP ).EQ.ZERO ) + $ RETURN + KP = KP + N - INFO + 1 + 20 CONTINUE + END IF + INFO = 0 +* + IF( UPPER ) THEN +* +* Compute inv(A) from the factorization A = U*D*U'. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + KC = 1 + 30 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + KCNEXT = KC + K + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + AP( KC+K-1 ) = ONE / AP( KC+K-1 ) +* +* Compute column K of the inverse. +* + IF( K.GT.1 ) THEN + CALL CCOPY( K-1, AP( KC ), 1, WORK, 1 ) + CALL CSPMV( UPLO, K-1, -ONE, AP, WORK, 1, ZERO, AP( KC ), + $ 1 ) + AP( KC+K-1 ) = AP( KC+K-1 ) - + $ CDOTU( K-1, WORK, 1, AP( KC ), 1 ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = AP( KCNEXT+K-1 ) + AK = AP( KC+K-1 ) / T + AKP1 = AP( KCNEXT+K ) / T + AKKP1 = AP( KCNEXT+K-1 ) / T + D = T*( AK*AKP1-ONE ) + AP( KC+K-1 ) = AKP1 / D + AP( KCNEXT+K ) = AK / D + AP( KCNEXT+K-1 ) = -AKKP1 / D +* +* Compute columns K and K+1 of the inverse. +* + IF( K.GT.1 ) THEN + CALL CCOPY( K-1, AP( KC ), 1, WORK, 1 ) + CALL CSPMV( UPLO, K-1, -ONE, AP, WORK, 1, ZERO, AP( KC ), + $ 1 ) + AP( KC+K-1 ) = AP( KC+K-1 ) - + $ CDOTU( K-1, WORK, 1, AP( KC ), 1 ) + AP( KCNEXT+K-1 ) = AP( KCNEXT+K-1 ) - + $ CDOTU( K-1, AP( KC ), 1, AP( KCNEXT ), + $ 1 ) + CALL CCOPY( K-1, AP( KCNEXT ), 1, WORK, 1 ) + CALL CSPMV( UPLO, K-1, -ONE, AP, WORK, 1, ZERO, + $ AP( KCNEXT ), 1 ) + AP( KCNEXT+K ) = AP( KCNEXT+K ) - + $ CDOTU( K-1, WORK, 1, AP( KCNEXT ), 1 ) + END IF + KSTEP = 2 + KCNEXT = KCNEXT + K + 1 + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the leading +* submatrix A(1:k+1,1:k+1) +* + KPC = ( KP-1 )*KP / 2 + 1 + CALL CSWAP( KP-1, AP( KC ), 1, AP( KPC ), 1 ) + KX = KPC + KP - 1 + DO 40 J = KP + 1, K - 1 + KX = KX + J - 1 + TEMP = AP( KC+J-1 ) + AP( KC+J-1 ) = AP( KX ) + AP( KX ) = TEMP + 40 CONTINUE + TEMP = AP( KC+K-1 ) + AP( KC+K-1 ) = AP( KPC+KP-1 ) + AP( KPC+KP-1 ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = AP( KC+K+K-1 ) + AP( KC+K+K-1 ) = AP( KC+K+KP-1 ) + AP( KC+K+KP-1 ) = TEMP + END IF + END IF +* + K = K + KSTEP + KC = KCNEXT + GO TO 30 + 50 CONTINUE +* + ELSE +* +* Compute inv(A) from the factorization A = L*D*L'. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + NPP = N*( N+1 ) / 2 + K = N + KC = NPP + 60 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 80 +* + KCNEXT = KC - ( N-K+2 ) + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + AP( KC ) = ONE / AP( KC ) +* +* Compute column K of the inverse. +* + IF( K.LT.N ) THEN + CALL CCOPY( N-K, AP( KC+1 ), 1, WORK, 1 ) + CALL CSPMV( UPLO, N-K, -ONE, AP( KC+N-K+1 ), WORK, 1, + $ ZERO, AP( KC+1 ), 1 ) + AP( KC ) = AP( KC ) - CDOTU( N-K, WORK, 1, AP( KC+1 ), + $ 1 ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = AP( KCNEXT+1 ) + AK = AP( KCNEXT ) / T + AKP1 = AP( KC ) / T + AKKP1 = AP( KCNEXT+1 ) / T + D = T*( AK*AKP1-ONE ) + AP( KCNEXT ) = AKP1 / D + AP( KC ) = AK / D + AP( KCNEXT+1 ) = -AKKP1 / D +* +* Compute columns K-1 and K of the inverse. +* + IF( K.LT.N ) THEN + CALL CCOPY( N-K, AP( KC+1 ), 1, WORK, 1 ) + CALL CSPMV( UPLO, N-K, -ONE, AP( KC+( N-K+1 ) ), WORK, 1, + $ ZERO, AP( KC+1 ), 1 ) + AP( KC ) = AP( KC ) - CDOTU( N-K, WORK, 1, AP( KC+1 ), + $ 1 ) + AP( KCNEXT+1 ) = AP( KCNEXT+1 ) - + $ CDOTU( N-K, AP( KC+1 ), 1, + $ AP( KCNEXT+2 ), 1 ) + CALL CCOPY( N-K, AP( KCNEXT+2 ), 1, WORK, 1 ) + CALL CSPMV( UPLO, N-K, -ONE, AP( KC+( N-K+1 ) ), WORK, 1, + $ ZERO, AP( KCNEXT+2 ), 1 ) + AP( KCNEXT ) = AP( KCNEXT ) - + $ CDOTU( N-K, WORK, 1, AP( KCNEXT+2 ), 1 ) + END IF + KSTEP = 2 + KCNEXT = KCNEXT - ( N-K+3 ) + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the trailing +* submatrix A(k-1:n,k-1:n) +* + KPC = NPP - ( N-KP+1 )*( N-KP+2 ) / 2 + 1 + IF( KP.LT.N ) + $ CALL CSWAP( N-KP, AP( KC+KP-K+1 ), 1, AP( KPC+1 ), 1 ) + KX = KC + KP - K + DO 70 J = K + 1, KP - 1 + KX = KX + N - J + 1 + TEMP = AP( KC+J-K ) + AP( KC+J-K ) = AP( KX ) + AP( KX ) = TEMP + 70 CONTINUE + TEMP = AP( KC ) + AP( KC ) = AP( KPC ) + AP( KPC ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = AP( KC-N+K-1 ) + AP( KC-N+K-1 ) = AP( KC-N+KP-1 ) + AP( KC-N+KP-1 ) = TEMP + END IF + END IF +* + K = K - KSTEP + KC = KCNEXT + GO TO 60 + 80 CONTINUE + END IF +* + RETURN +* +* End of CSPTRI +* + END diff --git a/costa/native/external/lapack/csptrs.f b/costa/native/external/lapack/csptrs.f new file mode 100644 index 000000000..0d5219091 --- /dev/null +++ b/costa/native/external/lapack/csptrs.f @@ -0,0 +1,378 @@ + SUBROUTINE CSPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX AP( * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* CSPTRS solves a system of linear equations A*X = B with a complex +* symmetric matrix A stored in packed format using the factorization +* A = U*D*U**T or A = L*D*L**T computed by CSPTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the details of the factorization are stored +* as an upper or lower triangular matrix. +* = 'U': Upper triangular, form is A = U*D*U**T; +* = 'L': Lower triangular, form is A = L*D*L**T. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AP (input) COMPLEX array, dimension (N*(N+1)/2) +* The block diagonal matrix D and the multipliers used to +* obtain the factor U or L as computed by CSPTRF, stored as a +* packed triangular matrix. +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by CSPTRF. +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KC, KP + COMPLEX AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, CGERU, CSCAL, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSPTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*D*U'. +* +* First solve U*D*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + KC = N*( N+1 ) / 2 + 1 + 10 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 30 +* + KC = KC - K + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL CGERU( K-1, NRHS, -ONE, AP( KC ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + CALL CSCAL( NRHS, ONE / AP( KC+K-1 ), B( K, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K-1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K-1 ) + $ CALL CSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in columns K-1 and K of A. +* + CALL CGERU( K-2, NRHS, -ONE, AP( KC ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) + CALL CGERU( K-2, NRHS, -ONE, AP( KC-( K-1 ) ), 1, + $ B( K-1, 1 ), LDB, B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = AP( KC+K-2 ) + AKM1 = AP( KC-1 ) / AKM1K + AK = AP( KC+K-1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 20 J = 1, NRHS + BKM1 = B( K-1, J ) / AKM1K + BK = B( K, J ) / AKM1K + B( K-1, J ) = ( AK*BKM1-BK ) / DENOM + B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM + 20 CONTINUE + KC = KC - K + 1 + K = K - 2 + END IF +* + GO TO 10 + 30 CONTINUE +* +* Next solve U'*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + KC = 1 + 40 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(U'(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL CGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, AP( KC ), + $ 1, ONE, B( K, 1 ), LDB ) +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + KC = KC + K + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(U'(K+1)), where U(K+1) is the transformation +* stored in columns K and K+1 of A. +* + CALL CGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, AP( KC ), + $ 1, ONE, B( K, 1 ), LDB ) + CALL CGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, + $ AP( KC+K ), 1, ONE, B( K+1, 1 ), LDB ) +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + KC = KC + 2*K + 1 + K = K + 2 + END IF +* + GO TO 40 + 50 CONTINUE +* + ELSE +* +* Solve A*X = B, where A = L*D*L'. +* +* First solve L*D*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + KC = 1 + 60 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 80 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL CGERU( N-K, NRHS, -ONE, AP( KC+1 ), 1, B( K, 1 ), + $ LDB, B( K+1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + CALL CSCAL( NRHS, ONE / AP( KC ), B( K, 1 ), LDB ) + KC = KC + N - K + 1 + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K+1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K+1 ) + $ CALL CSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.LT.N-1 ) THEN + CALL CGERU( N-K-1, NRHS, -ONE, AP( KC+2 ), 1, B( K, 1 ), + $ LDB, B( K+2, 1 ), LDB ) + CALL CGERU( N-K-1, NRHS, -ONE, AP( KC+N-K+2 ), 1, + $ B( K+1, 1 ), LDB, B( K+2, 1 ), LDB ) + END IF +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = AP( KC+1 ) + AKM1 = AP( KC ) / AKM1K + AK = AP( KC+N-K+1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 70 J = 1, NRHS + BKM1 = B( K, J ) / AKM1K + BK = B( K+1, J ) / AKM1K + B( K, J ) = ( AK*BKM1-BK ) / DENOM + B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + 70 CONTINUE + KC = KC + 2*( N-K ) + 1 + K = K + 2 + END IF +* + GO TO 60 + 80 CONTINUE +* +* Next solve L'*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + KC = N*( N+1 ) / 2 + 1 + 90 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 100 +* + KC = KC - ( N-K+1 ) + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(L'(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL CGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, AP( KC+1 ), 1, ONE, B( K, 1 ), LDB ) +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(L'(K-1)), where L(K-1) is the transformation +* stored in columns K-1 and K of A. +* + IF( K.LT.N ) THEN + CALL CGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, AP( KC+1 ), 1, ONE, B( K, 1 ), LDB ) + CALL CGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, AP( KC-( N-K ) ), 1, ONE, B( K-1, 1 ), + $ LDB ) + END IF +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + KC = KC - ( N-K+2 ) + K = K - 2 + END IF +* + GO TO 90 + 100 CONTINUE + END IF +* + RETURN +* +* End of CSPTRS +* + END diff --git a/costa/native/external/lapack/csrot.f b/costa/native/external/lapack/csrot.f new file mode 100644 index 000000000..f0a53ed7f --- /dev/null +++ b/costa/native/external/lapack/csrot.f @@ -0,0 +1,53 @@ + SUBROUTINE CSROT( N, CX, INCX, CY, INCY, C, S ) +* +* applies a plane rotation, where the cos and sin (c and s) are real +* and the vectors cx and cy are complex. +* jack dongarra, linpack, 3/11/78. +* +* ===================================================================== +* +* .. Scalar Arguments .. + INTEGER INCX, INCY, N + REAL C, S +* .. +* .. Array Arguments .. + COMPLEX CX( * ), CY( * ) +* .. +* .. Local Scalars .. + INTEGER I, IX, IY + COMPLEX CTEMP +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) + $ RETURN + IF( INCX.EQ.1 .AND. INCY.EQ.1 ) + $ GO TO 20 +* +* code for unequal increments or equal increments not equal +* to 1 +* + IX = 1 + IY = 1 + IF( INCX.LT.0 ) + $ IX = ( -N+1 )*INCX + 1 + IF( INCY.LT.0 ) + $ IY = ( -N+1 )*INCY + 1 + DO 10 I = 1, N + CTEMP = C*CX( IX ) + S*CY( IY ) + CY( IY ) = C*CY( IY ) - S*CX( IX ) + CX( IX ) = CTEMP + IX = IX + INCX + IY = IY + INCY + 10 CONTINUE + RETURN +* +* code for both increments equal to 1 +* + 20 DO 30 I = 1, N + CTEMP = C*CX( I ) + S*CY( I ) + CY( I ) = C*CY( I ) - S*CX( I ) + CX( I ) = CTEMP + 30 CONTINUE + RETURN + END diff --git a/costa/native/external/lapack/csrscl.f b/costa/native/external/lapack/csrscl.f new file mode 100644 index 000000000..9789e0000 --- /dev/null +++ b/costa/native/external/lapack/csrscl.f @@ -0,0 +1,115 @@ + SUBROUTINE CSRSCL( N, SA, SX, INCX ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INCX, N + REAL SA +* .. +* .. Array Arguments .. + COMPLEX SX( * ) +* .. +* +* Purpose +* ======= +* +* CSRSCL multiplies an n-element complex vector x by the real scalar +* 1/a. This is done without overflow or underflow as long as +* the final result x/a does not overflow or underflow. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of components of the vector x. +* +* SA (input) REAL +* The scalar a which is used to divide each component of x. +* SA must be >= 0, or the subroutine will divide by zero. +* +* SX (input/output) COMPLEX array, dimension +* (1+(N-1)*abs(INCX)) +* The n-element vector x. +* +* INCX (input) INTEGER +* The increment between successive values of the vector SX. +* > 0: SX(1) = X(1) and SX(1+(i-1)*INCX) = x(i), 1< i<= n +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL DONE + REAL BIGNUM, CDEN, CDEN1, CNUM, CNUM1, MUL, SMLNUM +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CSSCAL, SLABAD +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* +* Get machine parameters +* + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Initialize the denominator to SA and the numerator to 1. +* + CDEN = SA + CNUM = ONE +* + 10 CONTINUE + CDEN1 = CDEN*SMLNUM + CNUM1 = CNUM / BIGNUM + IF( ABS( CDEN1 ).GT.ABS( CNUM ) .AND. CNUM.NE.ZERO ) THEN +* +* Pre-multiply X by SMLNUM if CDEN is large compared to CNUM. +* + MUL = SMLNUM + DONE = .FALSE. + CDEN = CDEN1 + ELSE IF( ABS( CNUM1 ).GT.ABS( CDEN ) ) THEN +* +* Pre-multiply X by BIGNUM if CDEN is small compared to CNUM. +* + MUL = BIGNUM + DONE = .FALSE. + CNUM = CNUM1 + ELSE +* +* Multiply X by CNUM / CDEN and return. +* + MUL = CNUM / CDEN + DONE = .TRUE. + END IF +* +* Scale the vector X by MUL +* + CALL CSSCAL( N, MUL, SX, INCX ) +* + IF( .NOT.DONE ) + $ GO TO 10 +* + RETURN +* +* End of CSRSCL +* + END diff --git a/costa/native/external/lapack/cstedc.f b/costa/native/external/lapack/cstedc.f new file mode 100644 index 000000000..e0b166542 --- /dev/null +++ b/costa/native/external/lapack/cstedc.f @@ -0,0 +1,390 @@ + SUBROUTINE CSTEDC( COMPZ, N, D, E, Z, LDZ, WORK, LWORK, RWORK, + $ LRWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER COMPZ + INTEGER INFO, LDZ, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL D( * ), E( * ), RWORK( * ) + COMPLEX WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* CSTEDC computes all eigenvalues and, optionally, eigenvectors of a +* symmetric tridiagonal matrix using the divide and conquer method. +* The eigenvectors of a full or band complex Hermitian matrix can also +* be found if CHETRD or CHPTRD or CHBTRD has been used to reduce this +* matrix to tridiagonal form. +* +* This code makes very mild assumptions about floating point +* arithmetic. It will work on machines with a guard digit in +* add/subtract, or on those binary machines without guard digits +* which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. +* It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. See SLAED3 for details. +* +* Arguments +* ========= +* +* COMPZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only. +* = 'I': Compute eigenvectors of tridiagonal matrix also. +* = 'V': Compute eigenvectors of original Hermitian matrix +* also. On entry, Z contains the unitary matrix used +* to reduce the original matrix to tridiagonal form. +* +* N (input) INTEGER +* The dimension of the symmetric tridiagonal matrix. N >= 0. +* +* D (input/output) REAL array, dimension (N) +* On entry, the diagonal elements of the tridiagonal matrix. +* On exit, if INFO = 0, the eigenvalues in ascending order. +* +* E (input/output) REAL array, dimension (N-1) +* On entry, the subdiagonal elements of the tridiagonal matrix. +* On exit, E has been destroyed. +* +* Z (input/output) COMPLEX array, dimension (LDZ,N) +* On entry, if COMPZ = 'V', then Z contains the unitary +* matrix used in the reduction to tridiagonal form. +* On exit, if INFO = 0, then if COMPZ = 'V', Z contains the +* orthonormal eigenvectors of the original Hermitian matrix, +* and if COMPZ = 'I', Z contains the orthonormal eigenvectors +* of the symmetric tridiagonal matrix. +* If COMPZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1. +* If eigenvectors are desired, then LDZ >= max(1,N). +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If COMPZ = 'N' or 'I', or N <= 1, LWORK must be at least 1. +* If COMPZ = 'V' and N > 1, LWORK must be at least N*N. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace/output) REAL array, +* dimension (LRWORK) +* On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. +* +* LRWORK (input) INTEGER +* The dimension of the array RWORK. +* If COMPZ = 'N' or N <= 1, LRWORK must be at least 1. +* If COMPZ = 'V' and N > 1, LRWORK must be at least +* 1 + 3*N + 2*N*lg N + 3*N**2 , +* where lg( N ) = smallest integer k such +* that 2**k >= N. +* If COMPZ = 'I' and N > 1, LRWORK must be at least +* 1 + 4*N + 2*N**2 . +* +* If LRWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the RWORK array, +* returns this value as the first entry of the RWORK array, and +* no error message related to LRWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array IWORK. +* If COMPZ = 'N' or N <= 1, LIWORK must be at least 1. +* If COMPZ = 'V' or N > 1, LIWORK must be at least +* 6 + 6*N + 5*N*lg N. +* If COMPZ = 'I' or N > 1, LIWORK must be at least +* 3 + 5*N . +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: The algorithm failed to compute an eigenvalue while +* working on the submatrix lying in rows and columns +* INFO/(N+1) through mod(INFO,N+1). +* +* Further Details +* =============== +* +* Based on contributions by +* Jeff Rutter, Computer Science Division, University of California +* at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER END, I, ICOMPZ, II, J, K, LGN, LIWMIN, LL, + $ LRWMIN, LWMIN, M, SMLSIZ, START + REAL EPS, ORGNRM, P, TINY +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANST + EXTERNAL ILAENV, LSAME, SLAMCH, SLANST +* .. +* .. External Subroutines .. + EXTERNAL CLACPY, CLACRM, CLAED0, CSTEQR, CSWAP, SLASCL, + $ SLASET, SSTEDC, SSTEQR, SSTERF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, LOG, MAX, MOD, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ICOMPZ = 0 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ICOMPZ = 2 + ELSE + ICOMPZ = -1 + END IF + IF( N.LE.1 .OR. ICOMPZ.LE.0 ) THEN + LWMIN = 1 + LIWMIN = 1 + LRWMIN = 1 + ELSE + LGN = INT( LOG( REAL( N ) ) / LOG( TWO ) ) + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IF( ICOMPZ.EQ.1 ) THEN + LWMIN = N*N + LRWMIN = 1 + 3*N + 2*N*LGN + 3*N**2 + LIWMIN = 6 + 6*N + 5*N*LGN + ELSE IF( ICOMPZ.EQ.2 ) THEN + LWMIN = 1 + LRWMIN = 1 + 4*N + 2*N**2 + LIWMIN = 3 + 5*N + END IF + END IF + IF( ICOMPZ.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ( LDZ.LT.1 ) .OR. ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1, + $ N ) ) ) THEN + INFO = -6 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -8 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSTEDC', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + IF( N.EQ.1 ) THEN + IF( ICOMPZ.NE.0 ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* + SMLSIZ = ILAENV( 9, 'CSTEDC', ' ', 0, 0, 0, 0 ) +* +* If the following conditional clause is removed, then the routine +* will use the Divide and Conquer routine to compute only the +* eigenvalues, which requires (3N + 3N**2) real workspace and +* (2 + 5N + 2N lg(N)) integer workspace. +* Since on many architectures SSTERF is much faster than any other +* algorithm for finding eigenvalues only, it is used here +* as the default. +* +* If COMPZ = 'N', use SSTERF to compute the eigenvalues. +* + IF( ICOMPZ.EQ.0 ) THEN + CALL SSTERF( N, D, E, INFO ) + RETURN + END IF +* +* If N is smaller than the minimum divide size (SMLSIZ+1), then +* solve the problem with another solver. +* + IF( N.LE.SMLSIZ ) THEN + IF( ICOMPZ.EQ.0 ) THEN + CALL SSTERF( N, D, E, INFO ) + RETURN + ELSE IF( ICOMPZ.EQ.2 ) THEN + CALL CSTEQR( 'I', N, D, E, Z, LDZ, RWORK, INFO ) + RETURN + ELSE + CALL CSTEQR( 'V', N, D, E, Z, LDZ, RWORK, INFO ) + RETURN + END IF + END IF +* +* If COMPZ = 'I', we simply call SSTEDC instead. +* + IF( ICOMPZ.EQ.2 ) THEN + CALL SLASET( 'Full', N, N, ZERO, ONE, RWORK, N ) + LL = N*N + 1 + CALL SSTEDC( 'I', N, D, E, RWORK, N, RWORK( LL ), LRWORK-LL+1, + $ IWORK, LIWORK, INFO ) + DO 20 J = 1, N + DO 10 I = 1, N + Z( I, J ) = RWORK( ( J-1 )*N+I ) + 10 CONTINUE + 20 CONTINUE + RETURN + END IF +* +* From now on, only option left to be handled is COMPZ = 'V', +* i.e. ICOMPZ = 1. +* +* Scale. +* + ORGNRM = SLANST( 'M', N, D, E ) + IF( ORGNRM.EQ.ZERO ) + $ RETURN +* + EPS = SLAMCH( 'Epsilon' ) +* + START = 1 +* +* while ( START <= N ) +* + 30 CONTINUE + IF( START.LE.N ) THEN +* +* Let END be the position of the next subdiagonal entry such that +* E( END ) <= TINY or END = N if no such subdiagonal exists. The +* matrix identified by the elements between START and END +* constitutes an independent sub-problem. +* + END = START + 40 CONTINUE + IF( END.LT.N ) THEN + TINY = EPS*SQRT( ABS( D( END ) ) )*SQRT( ABS( D( END+1 ) ) ) + IF( ABS( E( END ) ).GT.TINY ) THEN + END = END + 1 + GO TO 40 + END IF + END IF +* +* (Sub) Problem determined. Compute its size and solve it. +* + M = END - START + 1 + IF( M.GT.SMLSIZ ) THEN + INFO = SMLSIZ +* +* Scale. +* + ORGNRM = SLANST( 'M', M, D( START ), E( START ) ) + CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, M, 1, D( START ), M, + $ INFO ) + CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, M-1, 1, E( START ), + $ M-1, INFO ) +* + CALL CLAED0( N, M, D( START ), E( START ), Z( 1, START ), + $ LDZ, WORK, N, RWORK, IWORK, INFO ) + IF( INFO.GT.0 ) THEN + INFO = ( INFO / ( M+1 )+START-1 )*( N+1 ) + + $ MOD( INFO, ( M+1 ) ) + START - 1 + RETURN + END IF +* +* Scale back. +* + CALL SLASCL( 'G', 0, 0, ONE, ORGNRM, M, 1, D( START ), M, + $ INFO ) +* + ELSE + CALL SSTEQR( 'I', M, D( START ), E( START ), RWORK, M, + $ RWORK( M*M+1 ), INFO ) + CALL CLACRM( N, M, Z( 1, START ), LDZ, RWORK, M, WORK, N, + $ RWORK( M*M+1 ) ) + CALL CLACPY( 'A', N, M, WORK, N, Z( 1, START ), LDZ ) + IF( INFO.GT.0 ) THEN + INFO = START*( N+1 ) + END + RETURN + END IF + END IF +* + START = END + 1 + GO TO 30 + END IF +* +* endwhile +* +* If the problem split any number of times, then the eigenvalues +* will not be properly ordered. Here we permute the eigenvalues +* (and the associated eigenvectors) into ascending order. +* + IF( M.NE.N ) THEN +* +* Use Selection Sort to minimize swaps of eigenvectors +* + DO 60 II = 2, N + I = II - 1 + K = I + P = D( I ) + DO 50 J = II, N + IF( D( J ).LT.P ) THEN + K = J + P = D( J ) + END IF + 50 CONTINUE + IF( K.NE.I ) THEN + D( K ) = D( I ) + D( I ) = P + CALL CSWAP( N, Z( 1, I ), 1, Z( 1, K ), 1 ) + END IF + 60 CONTINUE + END IF +* + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of CSTEDC +* + END diff --git a/costa/native/external/lapack/cstegr.f b/costa/native/external/lapack/cstegr.f new file mode 100644 index 000000000..da04d0c35 --- /dev/null +++ b/costa/native/external/lapack/cstegr.f @@ -0,0 +1,405 @@ + SUBROUTINE CSTEGR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, ABSTOL, + $ M, W, Z, LDZ, ISUPPZ, WORK, LWORK, IWORK, + $ LIWORK, INFO ) +* +* -- LAPACK computational routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE + INTEGER IL, INFO, IU, LDZ, LIWORK, LWORK, M, N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER ISUPPZ( * ), IWORK( * ) + REAL D( * ), E( * ), W( * ), WORK( * ) + COMPLEX Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* CSTEGR computes selected eigenvalues and, optionally, eigenvectors +* of a real symmetric tridiagonal matrix T. Eigenvalues and +* eigenvectors can be selected by specifying either a range of values +* or a range of indices for the desired eigenvalues. The eigenvalues +* are computed by the dqds algorithm, while orthogonal eigenvectors are +* computed from various ``good'' L D L^T representations (also known as +* Relatively Robust Representations). Gram-Schmidt orthogonalization is +* avoided as far as possible. More specifically, the various steps of +* the algorithm are as follows. For the i-th unreduced block of T, +* (a) Compute T - sigma_i = L_i D_i L_i^T, such that L_i D_i L_i^T +* is a relatively robust representation, +* (b) Compute the eigenvalues, lambda_j, of L_i D_i L_i^T to high +* relative accuracy by the dqds algorithm, +* (c) If there is a cluster of close eigenvalues, "choose" sigma_i +* close to the cluster, and go to step (a), +* (d) Given the approximate eigenvalue lambda_j of L_i D_i L_i^T, +* compute the corresponding eigenvector by forming a +* rank-revealing twisted factorization. +* The desired accuracy of the output can be specified by the input +* parameter ABSTOL. +* +* For more details, see "A new O(n^2) algorithm for the symmetric +* tridiagonal eigenvalue/eigenvector problem", by Inderjit Dhillon, +* Computer Science Division Technical Report No. UCB/CSD-97-971, +* UC Berkeley, May 1997. +* +* Note 1 : Currently CSTEGR is only set up to find ALL the n +* eigenvalues and eigenvectors of T in O(n^2) time +* Note 2 : Currently the routine CSTEIN is called when an appropriate +* sigma_i cannot be chosen in step (c) above. CSTEIN invokes modified +* Gram-Schmidt when eigenvalues are close. +* Note 3 : CSTEGR works only on machines which follow ieee-754 +* floating-point standard in their handling of infinities and NaNs. +* Normal execution of CSTEGR may create NaNs and infinities and hence +* may abort due to a floating point exception in environments which +* do not conform to the ieee standard. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* RANGE (input) CHARACTER*1 +* = 'A': all eigenvalues will be found. +* = 'V': all eigenvalues in the half-open interval (VL,VU] +* will be found. +* = 'I': the IL-th through IU-th eigenvalues will be found. +********** Only RANGE = 'A' is currently supported ********************* +* +* N (input) INTEGER +* The order of the matrix. N >= 0. +* +* D (input/output) REAL array, dimension (N) +* On entry, the n diagonal elements of the tridiagonal matrix +* T. On exit, D is overwritten. +* +* E (input/output) REAL array, dimension (N) +* On entry, the (n-1) subdiagonal elements of the tridiagonal +* matrix T in elements 1 to N-1 of E; E(N) need not be set. +* On exit, E is overwritten. +* +* VL (input) REAL +* VU (input) REAL +* If RANGE='V', the lower and upper bounds of the interval to +* be searched for eigenvalues. VL < VU. +* Not referenced if RANGE = 'A' or 'I'. +* +* IL (input) INTEGER +* IU (input) INTEGER +* If RANGE='I', the indices (in ascending order) of the +* smallest and largest eigenvalues to be returned. +* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +* Not referenced if RANGE = 'A' or 'V'. +* +* ABSTOL (input) REAL +* The absolute error tolerance for the +* eigenvalues/eigenvectors. IF JOBZ = 'V', the eigenvalues and +* eigenvectors output have residual norms bounded by ABSTOL, +* and the dot products between different eigenvectors are +* bounded by ABSTOL. If ABSTOL is less than N*EPS*|T|, then +* N*EPS*|T| will be used in its place, where EPS is the +* machine precision and |T| is the 1-norm of the tridiagonal +* matrix. The eigenvalues are computed to an accuracy of +* EPS*|T| irrespective of ABSTOL. If high relative accuracy +* is important, set ABSTOL to DLAMCH( 'Safe minimum' ). +* See Barlow and Demmel "Computing Accurate Eigensystems of +* Scaled Diagonally Dominant Matrices", LAPACK Working Note #7 +* for a discussion of which matrices define their eigenvalues +* to high relative accuracy. +* +* M (output) INTEGER +* The total number of eigenvalues found. 0 <= M <= N. +* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +* +* W (output) REAL array, dimension (N) +* The first M elements contain the selected eigenvalues in +* ascending order. +* +* Z (output) COMPLEX array, dimension (LDZ, max(1,M) ) +* If JOBZ = 'V', then if INFO = 0, the first M columns of Z +* contain the orthonormal eigenvectors of the matrix T +* corresponding to the selected eigenvalues, with the i-th +* column of Z holding the eigenvector associated with W(i). +* If JOBZ = 'N', then Z is not referenced. +* Note: the user must ensure that at least max(1,M) columns are +* supplied in the array Z; if RANGE = 'V', the exact value of M +* is not known in advance and an upper bound must be used. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* ISUPPZ (output) INTEGER ARRAY, dimension ( 2*max(1,M) ) +* The support of the eigenvectors in Z, i.e., the indices +* indicating the nonzero elements in Z. The i-th eigenvector +* is nonzero only in elements ISUPPZ( 2*i-1 ) through +* ISUPPZ( 2*i ). +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal +* (and minimal) LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,18*N) +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array IWORK. LIWORK >= max(1,10*N) +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = 1, internal error in SLARRE, +* if INFO = 2, internal error in CLARRV. +* +* Further Details +* =============== +* +* Based on contributions by +* Inderjit Dhillon, IBM Almaden, USA +* Osni Marques, LBNL/NERSC, USA +* Ken Stanley, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) + COMPLEX CZERO + PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LQUERY, VALEIG, WANTZ + INTEGER I, IBEGIN, IEND, IINDBL, IINDWK, IINFO, IINSPL, + $ INDGRS, INDWOF, INDWRK, ITMP, J, JJ, LIWMIN, + $ LWMIN, NSPLIT + REAL BIGNUM, EPS, RMAX, RMIN, SAFMIN, SCALE, SMLNUM, + $ THRESH, TMP, TNRM, TOL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANST + EXTERNAL LSAME, SLAMCH, SLANST +* .. +* .. External Subroutines .. + EXTERNAL CLARRV, CLASET, CSWAP, SLARRE, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + LQUERY = ( ( LWORK.EQ.-1 ) .OR. ( LIWORK.EQ.-1 ) ) + LWMIN = 18*N + LIWMIN = 10*N +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 +* +* The following two lines need to be removed once the +* RANGE = 'V' and RANGE = 'I' options are provided. +* + ELSE IF( VALEIG .OR. INDEIG ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN + INFO = -7 + ELSE IF( INDEIG .AND. IL.LT.1 ) THEN + INFO = -8 +* The following change should be made in DSTEVX also, otherwise +* IL can be specified as N+1 and IU as N. +* ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) THEN + ELSE IF( INDEIG .AND. ( IU.LT.IL .OR. IU.GT.N ) ) THEN + INFO = -9 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -14 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -17 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -19 + END IF + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSTEGR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = D( 1 ) + ELSE + IF( VL.LT.D( 1 ) .AND. VU.GE.D( 1 ) ) THEN + M = 1 + W( 1 ) = D( 1 ) + END IF + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + SCALE = ONE + TNRM = SLANST( 'M', N, D, E ) + IF( TNRM.GT.ZERO .AND. TNRM.LT.RMIN ) THEN + SCALE = RMIN / TNRM + ELSE IF( TNRM.GT.RMAX ) THEN + SCALE = RMAX / TNRM + END IF + IF( SCALE.NE.ONE ) THEN + CALL SSCAL( N, SCALE, D, 1 ) + CALL SSCAL( N-1, SCALE, E, 1 ) + TNRM = TNRM*SCALE + END IF + INDGRS = 1 + INDWOF = 2*N + 1 + INDWRK = 3*N + 1 +* + IINSPL = 1 + IINDBL = N + 1 + IINDWK = 2*N + 1 +* + CALL CLASET( 'Full', N, N, CZERO, CZERO, Z, LDZ ) +* +* Compute the desired eigenvalues of the tridiagonal after splitting +* into smaller subblocks if the corresponding of-diagonal elements +* are small +* + THRESH = EPS*TNRM + CALL SLARRE( N, D, E, THRESH, NSPLIT, IWORK( IINSPL ), M, W, + $ WORK( INDWOF ), WORK( INDGRS ), WORK( INDWRK ), + $ IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = 1 + RETURN + END IF +* + IF( WANTZ ) THEN +* +* Compute the desired eigenvectors corresponding to the computed +* eigenvalues +* + TOL = MAX( ABSTOL, REAL( N )*THRESH ) + IBEGIN = 1 + DO 20 I = 1, NSPLIT + IEND = IWORK( IINSPL+I-1 ) + DO 10 J = IBEGIN, IEND + IWORK( IINDBL+J-1 ) = I + 10 CONTINUE + IBEGIN = IEND + 1 + 20 CONTINUE +* + CALL CLARRV( N, D, E, IWORK( IINSPL ), M, W, IWORK( IINDBL ), + $ WORK( INDGRS ), TOL, Z, LDZ, ISUPPZ, + $ WORK( INDWRK ), IWORK( IINDWK ), IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = 2 + RETURN + END IF +* + END IF +* + IBEGIN = 1 + DO 40 I = 1, NSPLIT + IEND = IWORK( IINSPL+I-1 ) + DO 30 J = IBEGIN, IEND + W( J ) = W( J ) + WORK( INDWOF+I-1 ) + 30 CONTINUE + IBEGIN = IEND + 1 + 40 CONTINUE +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( SCALE.NE.ONE ) THEN + CALL SSCAL( M, ONE / SCALE, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( NSPLIT.GT.1 ) THEN + DO 60 J = 1, M - 1 + I = 0 + TMP = W( J ) + DO 50 JJ = J + 1, M + IF( W( JJ ).LT.TMP ) THEN + I = JJ + TMP = W( JJ ) + END IF + 50 CONTINUE + IF( I.NE.0 ) THEN + W( I ) = W( J ) + W( J ) = TMP + IF( WANTZ ) THEN + CALL CSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + ITMP = ISUPPZ( 2*I-1 ) + ISUPPZ( 2*I-1 ) = ISUPPZ( 2*J-1 ) + ISUPPZ( 2*J-1 ) = ITMP + ITMP = ISUPPZ( 2*I ) + ISUPPZ( 2*I ) = ISUPPZ( 2*J ) + ISUPPZ( 2*J ) = ITMP + END IF + END IF + 60 CONTINUE + END IF +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of CSTEGR +* + END diff --git a/costa/native/external/lapack/cstein.f b/costa/native/external/lapack/cstein.f new file mode 100644 index 000000000..39bdc4f86 --- /dev/null +++ b/costa/native/external/lapack/cstein.f @@ -0,0 +1,377 @@ + SUBROUTINE CSTEIN( N, D, E, M, W, IBLOCK, ISPLIT, Z, LDZ, WORK, + $ IWORK, IFAIL, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, LDZ, M, N +* .. +* .. Array Arguments .. + INTEGER IBLOCK( * ), IFAIL( * ), ISPLIT( * ), + $ IWORK( * ) + REAL D( * ), E( * ), W( * ), WORK( * ) + COMPLEX Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* CSTEIN computes the eigenvectors of a real symmetric tridiagonal +* matrix T corresponding to specified eigenvalues, using inverse +* iteration. +* +* The maximum number of iterations allowed for each eigenvector is +* specified by an internal parameter MAXITS (currently set to 5). +* +* Although the eigenvectors are real, they are stored in a complex +* array, which may be passed to CUNMTR or CUPMTR for back +* transformation to the eigenvectors of a complex Hermitian matrix +* which was reduced to tridiagonal form. +* +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix. N >= 0. +* +* D (input) REAL array, dimension (N) +* The n diagonal elements of the tridiagonal matrix T. +* +* E (input) REAL array, dimension (N) +* The (n-1) subdiagonal elements of the tridiagonal matrix +* T, stored in elements 1 to N-1; E(N) need not be set. +* +* M (input) INTEGER +* The number of eigenvectors to be found. 0 <= M <= N. +* +* W (input) REAL array, dimension (N) +* The first M elements of W contain the eigenvalues for +* which eigenvectors are to be computed. The eigenvalues +* should be grouped by split-off block and ordered from +* smallest to largest within the block. ( The output array +* W from SSTEBZ with ORDER = 'B' is expected here. ) +* +* IBLOCK (input) INTEGER array, dimension (N) +* The submatrix indices associated with the corresponding +* eigenvalues in W; IBLOCK(i)=1 if eigenvalue W(i) belongs to +* the first submatrix from the top, =2 if W(i) belongs to +* the second submatrix, etc. ( The output array IBLOCK +* from SSTEBZ is expected here. ) +* +* ISPLIT (input) INTEGER array, dimension (N) +* The splitting points, at which T breaks up into submatrices. +* The first submatrix consists of rows/columns 1 to +* ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 +* through ISPLIT( 2 ), etc. +* ( The output array ISPLIT from SSTEBZ is expected here. ) +* +* Z (output) COMPLEX array, dimension (LDZ, M) +* The computed eigenvectors. The eigenvector associated +* with the eigenvalue W(i) is stored in the i-th column of +* Z. Any vector which fails to converge is set to its current +* iterate after MAXITS iterations. +* The imaginary parts of the eigenvectors are set to zero. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= max(1,N). +* +* WORK (workspace) REAL array, dimension (5*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* IFAIL (output) INTEGER array, dimension (M) +* On normal exit, all elements of IFAIL are zero. +* If one or more eigenvectors fail to converge after +* MAXITS iterations, then their indices are stored in +* array IFAIL. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, then i eigenvectors failed to converge +* in MAXITS iterations. Their indices are stored in +* array IFAIL. +* +* Internal Parameters +* =================== +* +* MAXITS INTEGER, default = 5 +* The maximum number of iterations performed. +* +* EXTRA INTEGER, default = 2 +* The number of iterations performed after norm growth +* criterion is satisfied, should be at least 1. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) + REAL ZERO, ONE, TEN, ODM3, ODM1 + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TEN = 1.0E+1, + $ ODM3 = 1.0E-3, ODM1 = 1.0E-1 ) + INTEGER MAXITS, EXTRA + PARAMETER ( MAXITS = 5, EXTRA = 2 ) +* .. +* .. Local Scalars .. + INTEGER B1, BLKSIZ, BN, GPIND, I, IINFO, INDRV1, + $ INDRV2, INDRV3, INDRV4, INDRV5, ITS, J, J1, + $ JBLK, JMAX, JR, NBLK, NRMCHK + REAL CTR, EPS, EPS1, NRM, ONENRM, ORTOL, PERTOL, + $ SCL, SEP, STPCRT, TOL, XJ, XJM +* .. +* .. Local Arrays .. + INTEGER ISEED( 4 ) +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SASUM, SLAMCH, SNRM2 + EXTERNAL ISAMAX, SASUM, SLAMCH, SNRM2 +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLAGTF, SLAGTS, SLARNV, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CMPLX, MAX, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + DO 10 I = 1, M + IFAIL( I ) = 0 + 10 CONTINUE +* + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( M.LT.0 .OR. M.GT.N ) THEN + INFO = -4 + ELSE IF( LDZ.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE + DO 20 J = 2, M + IF( IBLOCK( J ).LT.IBLOCK( J-1 ) ) THEN + INFO = -6 + GO TO 30 + END IF + IF( IBLOCK( J ).EQ.IBLOCK( J-1 ) .AND. W( J ).LT.W( J-1 ) ) + $ THEN + INFO = -5 + GO TO 30 + END IF + 20 CONTINUE + 30 CONTINUE + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSTEIN', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. M.EQ.0 ) THEN + RETURN + ELSE IF( N.EQ.1 ) THEN + Z( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + EPS = SLAMCH( 'Precision' ) +* +* Initialize seed for random number generator SLARNV. +* + DO 40 I = 1, 4 + ISEED( I ) = 1 + 40 CONTINUE +* +* Initialize pointers. +* + INDRV1 = 0 + INDRV2 = INDRV1 + N + INDRV3 = INDRV2 + N + INDRV4 = INDRV3 + N + INDRV5 = INDRV4 + N +* +* Compute eigenvectors of matrix blocks. +* + J1 = 1 + DO 180 NBLK = 1, IBLOCK( M ) +* +* Find starting and ending indices of block nblk. +* + IF( NBLK.EQ.1 ) THEN + B1 = 1 + ELSE + B1 = ISPLIT( NBLK-1 ) + 1 + END IF + BN = ISPLIT( NBLK ) + BLKSIZ = BN - B1 + 1 + IF( BLKSIZ.EQ.1 ) + $ GO TO 60 + GPIND = B1 +* +* Compute reorthogonalization criterion and stopping criterion. +* + ONENRM = ABS( D( B1 ) ) + ABS( E( B1 ) ) + ONENRM = MAX( ONENRM, ABS( D( BN ) )+ABS( E( BN-1 ) ) ) + DO 50 I = B1 + 1, BN - 1 + ONENRM = MAX( ONENRM, ABS( D( I ) )+ABS( E( I-1 ) )+ + $ ABS( E( I ) ) ) + 50 CONTINUE + ORTOL = ODM3*ONENRM +* + STPCRT = SQRT( ODM1 / BLKSIZ ) +* +* Loop through eigenvalues of block nblk. +* + 60 CONTINUE + JBLK = 0 + DO 170 J = J1, M + IF( IBLOCK( J ).NE.NBLK ) THEN + J1 = J + GO TO 180 + END IF + JBLK = JBLK + 1 + XJ = W( J ) +* +* Skip all the work if the block size is one. +* + IF( BLKSIZ.EQ.1 ) THEN + WORK( INDRV1+1 ) = ONE + GO TO 140 + END IF +* +* If eigenvalues j and j-1 are too close, add a relatively +* small perturbation. +* + IF( JBLK.GT.1 ) THEN + EPS1 = ABS( EPS*XJ ) + PERTOL = TEN*EPS1 + SEP = XJ - XJM + IF( SEP.LT.PERTOL ) + $ XJ = XJM + PERTOL + END IF +* + ITS = 0 + NRMCHK = 0 +* +* Get random starting vector. +* + CALL SLARNV( 2, ISEED, BLKSIZ, WORK( INDRV1+1 ) ) +* +* Copy the matrix T so it won't be destroyed in factorization. +* + CALL SCOPY( BLKSIZ, D( B1 ), 1, WORK( INDRV4+1 ), 1 ) + CALL SCOPY( BLKSIZ-1, E( B1 ), 1, WORK( INDRV2+2 ), 1 ) + CALL SCOPY( BLKSIZ-1, E( B1 ), 1, WORK( INDRV3+1 ), 1 ) +* +* Compute LU factors with partial pivoting ( PT = LU ) +* + TOL = ZERO + CALL SLAGTF( BLKSIZ, WORK( INDRV4+1 ), XJ, WORK( INDRV2+2 ), + $ WORK( INDRV3+1 ), TOL, WORK( INDRV5+1 ), IWORK, + $ IINFO ) +* +* Update iteration count. +* + 70 CONTINUE + ITS = ITS + 1 + IF( ITS.GT.MAXITS ) + $ GO TO 120 +* +* Normalize and scale the righthand side vector Pb. +* + SCL = BLKSIZ*ONENRM*MAX( EPS, + $ ABS( WORK( INDRV4+BLKSIZ ) ) ) / + $ SASUM( BLKSIZ, WORK( INDRV1+1 ), 1 ) + CALL SSCAL( BLKSIZ, SCL, WORK( INDRV1+1 ), 1 ) +* +* Solve the system LU = Pb. +* + CALL SLAGTS( -1, BLKSIZ, WORK( INDRV4+1 ), WORK( INDRV2+2 ), + $ WORK( INDRV3+1 ), WORK( INDRV5+1 ), IWORK, + $ WORK( INDRV1+1 ), TOL, IINFO ) +* +* Reorthogonalize by modified Gram-Schmidt if eigenvalues are +* close enough. +* + IF( JBLK.EQ.1 ) + $ GO TO 110 + IF( ABS( XJ-XJM ).GT.ORTOL ) + $ GPIND = J + IF( GPIND.NE.J ) THEN + DO 100 I = GPIND, J - 1 + CTR = ZERO + DO 80 JR = 1, BLKSIZ + CTR = CTR + WORK( INDRV1+JR )* + $ REAL( Z( B1-1+JR, I ) ) + 80 CONTINUE + DO 90 JR = 1, BLKSIZ + WORK( INDRV1+JR ) = WORK( INDRV1+JR ) - + $ CTR*REAL( Z( B1-1+JR, I ) ) + 90 CONTINUE + 100 CONTINUE + END IF +* +* Check the infinity norm of the iterate. +* + 110 CONTINUE + JMAX = ISAMAX( BLKSIZ, WORK( INDRV1+1 ), 1 ) + NRM = ABS( WORK( INDRV1+JMAX ) ) +* +* Continue for additional iterations after norm reaches +* stopping criterion. +* + IF( NRM.LT.STPCRT ) + $ GO TO 70 + NRMCHK = NRMCHK + 1 + IF( NRMCHK.LT.EXTRA+1 ) + $ GO TO 70 +* + GO TO 130 +* +* If stopping criterion was not satisfied, update info and +* store eigenvector number in array ifail. +* + 120 CONTINUE + INFO = INFO + 1 + IFAIL( INFO ) = J +* +* Accept iterate as jth eigenvector. +* + 130 CONTINUE + SCL = ONE / SNRM2( BLKSIZ, WORK( INDRV1+1 ), 1 ) + JMAX = ISAMAX( BLKSIZ, WORK( INDRV1+1 ), 1 ) + IF( WORK( INDRV1+JMAX ).LT.ZERO ) + $ SCL = -SCL + CALL SSCAL( BLKSIZ, SCL, WORK( INDRV1+1 ), 1 ) + 140 CONTINUE + DO 150 I = 1, N + Z( I, J ) = CZERO + 150 CONTINUE + DO 160 I = 1, BLKSIZ + Z( B1+I-1, J ) = CMPLX( WORK( INDRV1+I ), ZERO ) + 160 CONTINUE +* +* Save the shift to check eigenvalue spacing at next +* iteration. +* + XJM = XJ +* + 170 CONTINUE + 180 CONTINUE +* + RETURN +* +* End of CSTEIN +* + END diff --git a/costa/native/external/lapack/csteqr.f b/costa/native/external/lapack/csteqr.f new file mode 100644 index 000000000..337e919ce --- /dev/null +++ b/costa/native/external/lapack/csteqr.f @@ -0,0 +1,504 @@ + SUBROUTINE CSTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER COMPZ + INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. + REAL D( * ), E( * ), WORK( * ) + COMPLEX Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* CSTEQR computes all eigenvalues and, optionally, eigenvectors of a +* symmetric tridiagonal matrix using the implicit QL or QR method. +* The eigenvectors of a full or band complex Hermitian matrix can also +* be found if CHETRD or CHPTRD or CHBTRD has been used to reduce this +* matrix to tridiagonal form. +* +* Arguments +* ========= +* +* COMPZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only. +* = 'V': Compute eigenvalues and eigenvectors of the original +* Hermitian matrix. On entry, Z must contain the +* unitary matrix used to reduce the original matrix +* to tridiagonal form. +* = 'I': Compute eigenvalues and eigenvectors of the +* tridiagonal matrix. Z is initialized to the identity +* matrix. +* +* N (input) INTEGER +* The order of the matrix. N >= 0. +* +* D (input/output) REAL array, dimension (N) +* On entry, the diagonal elements of the tridiagonal matrix. +* On exit, if INFO = 0, the eigenvalues in ascending order. +* +* E (input/output) REAL array, dimension (N-1) +* On entry, the (n-1) subdiagonal elements of the tridiagonal +* matrix. +* On exit, E has been destroyed. +* +* Z (input/output) COMPLEX array, dimension (LDZ, N) +* On entry, if COMPZ = 'V', then Z contains the unitary +* matrix used in the reduction to tridiagonal form. +* On exit, if INFO = 0, then if COMPZ = 'V', Z contains the +* orthonormal eigenvectors of the original Hermitian matrix, +* and if COMPZ = 'I', Z contains the orthonormal eigenvectors +* of the symmetric tridiagonal matrix. +* If COMPZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* eigenvectors are desired, then LDZ >= max(1,N). +* +* WORK (workspace) REAL array, dimension (max(1,2*N-2)) +* If COMPZ = 'N', then WORK is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: the algorithm has failed to find all the eigenvalues in +* a total of 30*N iterations; if INFO = i, then i +* elements of E have not converged to zero; on exit, D +* and E contain the elements of a symmetric tridiagonal +* matrix which is unitarily similar to the original +* matrix. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO, THREE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0, + $ THREE = 3.0E0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ), + $ CONE = ( 1.0E0, 0.0E0 ) ) + INTEGER MAXIT + PARAMETER ( MAXIT = 30 ) +* .. +* .. Local Scalars .. + INTEGER I, ICOMPZ, II, ISCALE, J, JTOT, K, L, L1, LEND, + $ LENDM1, LENDP1, LENDSV, LM1, LSV, M, MM, MM1, + $ NM1, NMAXIT + REAL ANORM, B, C, EPS, EPS2, F, G, P, R, RT1, RT2, + $ S, SAFMAX, SAFMIN, SSFMAX, SSFMIN, TST +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANST, SLAPY2 + EXTERNAL LSAME, SLAMCH, SLANST, SLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL CLASET, CLASR, CSWAP, SLAE2, SLAEV2, SLARTG, + $ SLASCL, SLASRT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ICOMPZ = 0 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ICOMPZ = 2 + ELSE + ICOMPZ = -1 + END IF + IF( ICOMPZ.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ( LDZ.LT.1 ) .OR. ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1, + $ N ) ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSTEQR', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ICOMPZ.EQ.2 ) + $ Z( 1, 1 ) = CONE + RETURN + END IF +* +* Determine the unit roundoff and over/underflow thresholds. +* + EPS = SLAMCH( 'E' ) + EPS2 = EPS**2 + SAFMIN = SLAMCH( 'S' ) + SAFMAX = ONE / SAFMIN + SSFMAX = SQRT( SAFMAX ) / THREE + SSFMIN = SQRT( SAFMIN ) / EPS2 +* +* Compute the eigenvalues and eigenvectors of the tridiagonal +* matrix. +* + IF( ICOMPZ.EQ.2 ) + $ CALL CLASET( 'Full', N, N, CZERO, CONE, Z, LDZ ) +* + NMAXIT = N*MAXIT + JTOT = 0 +* +* Determine where the matrix splits and choose QL or QR iteration +* for each block, according to whether top or bottom diagonal +* element is smaller. +* + L1 = 1 + NM1 = N - 1 +* + 10 CONTINUE + IF( L1.GT.N ) + $ GO TO 160 + IF( L1.GT.1 ) + $ E( L1-1 ) = ZERO + IF( L1.LE.NM1 ) THEN + DO 20 M = L1, NM1 + TST = ABS( E( M ) ) + IF( TST.EQ.ZERO ) + $ GO TO 30 + IF( TST.LE.( SQRT( ABS( D( M ) ) )*SQRT( ABS( D( M+ + $ 1 ) ) ) )*EPS ) THEN + E( M ) = ZERO + GO TO 30 + END IF + 20 CONTINUE + END IF + M = N +* + 30 CONTINUE + L = L1 + LSV = L + LEND = M + LENDSV = LEND + L1 = M + 1 + IF( LEND.EQ.L ) + $ GO TO 10 +* +* Scale submatrix in rows and columns L to LEND +* + ANORM = SLANST( 'I', LEND-L+1, D( L ), E( L ) ) + ISCALE = 0 + IF( ANORM.EQ.ZERO ) + $ GO TO 10 + IF( ANORM.GT.SSFMAX ) THEN + ISCALE = 1 + CALL SLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L+1, 1, D( L ), N, + $ INFO ) + CALL SLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L, 1, E( L ), N, + $ INFO ) + ELSE IF( ANORM.LT.SSFMIN ) THEN + ISCALE = 2 + CALL SLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L+1, 1, D( L ), N, + $ INFO ) + CALL SLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L, 1, E( L ), N, + $ INFO ) + END IF +* +* Choose between QL and QR iteration +* + IF( ABS( D( LEND ) ).LT.ABS( D( L ) ) ) THEN + LEND = LSV + L = LENDSV + END IF +* + IF( LEND.GT.L ) THEN +* +* QL Iteration +* +* Look for small subdiagonal element. +* + 40 CONTINUE + IF( L.NE.LEND ) THEN + LENDM1 = LEND - 1 + DO 50 M = L, LENDM1 + TST = ABS( E( M ) )**2 + IF( TST.LE.( EPS2*ABS( D( M ) ) )*ABS( D( M+1 ) )+ + $ SAFMIN )GO TO 60 + 50 CONTINUE + END IF +* + M = LEND +* + 60 CONTINUE + IF( M.LT.LEND ) + $ E( M ) = ZERO + P = D( L ) + IF( M.EQ.L ) + $ GO TO 80 +* +* If remaining matrix is 2-by-2, use SLAE2 or SLAEV2 +* to compute its eigensystem. +* + IF( M.EQ.L+1 ) THEN + IF( ICOMPZ.GT.0 ) THEN + CALL SLAEV2( D( L ), E( L ), D( L+1 ), RT1, RT2, C, S ) + WORK( L ) = C + WORK( N-1+L ) = S + CALL CLASR( 'R', 'V', 'B', N, 2, WORK( L ), + $ WORK( N-1+L ), Z( 1, L ), LDZ ) + ELSE + CALL SLAE2( D( L ), E( L ), D( L+1 ), RT1, RT2 ) + END IF + D( L ) = RT1 + D( L+1 ) = RT2 + E( L ) = ZERO + L = L + 2 + IF( L.LE.LEND ) + $ GO TO 40 + GO TO 140 + END IF +* + IF( JTOT.EQ.NMAXIT ) + $ GO TO 140 + JTOT = JTOT + 1 +* +* Form shift. +* + G = ( D( L+1 )-P ) / ( TWO*E( L ) ) + R = SLAPY2( G, ONE ) + G = D( M ) - P + ( E( L ) / ( G+SIGN( R, G ) ) ) +* + S = ONE + C = ONE + P = ZERO +* +* Inner loop +* + MM1 = M - 1 + DO 70 I = MM1, L, -1 + F = S*E( I ) + B = C*E( I ) + CALL SLARTG( G, F, C, S, R ) + IF( I.NE.M-1 ) + $ E( I+1 ) = R + G = D( I+1 ) - P + R = ( D( I )-G )*S + TWO*C*B + P = S*R + D( I+1 ) = G + P + G = C*R - B +* +* If eigenvectors are desired, then save rotations. +* + IF( ICOMPZ.GT.0 ) THEN + WORK( I ) = C + WORK( N-1+I ) = -S + END IF +* + 70 CONTINUE +* +* If eigenvectors are desired, then apply saved rotations. +* + IF( ICOMPZ.GT.0 ) THEN + MM = M - L + 1 + CALL CLASR( 'R', 'V', 'B', N, MM, WORK( L ), WORK( N-1+L ), + $ Z( 1, L ), LDZ ) + END IF +* + D( L ) = D( L ) - P + E( L ) = G + GO TO 40 +* +* Eigenvalue found. +* + 80 CONTINUE + D( L ) = P +* + L = L + 1 + IF( L.LE.LEND ) + $ GO TO 40 + GO TO 140 +* + ELSE +* +* QR Iteration +* +* Look for small superdiagonal element. +* + 90 CONTINUE + IF( L.NE.LEND ) THEN + LENDP1 = LEND + 1 + DO 100 M = L, LENDP1, -1 + TST = ABS( E( M-1 ) )**2 + IF( TST.LE.( EPS2*ABS( D( M ) ) )*ABS( D( M-1 ) )+ + $ SAFMIN )GO TO 110 + 100 CONTINUE + END IF +* + M = LEND +* + 110 CONTINUE + IF( M.GT.LEND ) + $ E( M-1 ) = ZERO + P = D( L ) + IF( M.EQ.L ) + $ GO TO 130 +* +* If remaining matrix is 2-by-2, use SLAE2 or SLAEV2 +* to compute its eigensystem. +* + IF( M.EQ.L-1 ) THEN + IF( ICOMPZ.GT.0 ) THEN + CALL SLAEV2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2, C, S ) + WORK( M ) = C + WORK( N-1+M ) = S + CALL CLASR( 'R', 'V', 'F', N, 2, WORK( M ), + $ WORK( N-1+M ), Z( 1, L-1 ), LDZ ) + ELSE + CALL SLAE2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2 ) + END IF + D( L-1 ) = RT1 + D( L ) = RT2 + E( L-1 ) = ZERO + L = L - 2 + IF( L.GE.LEND ) + $ GO TO 90 + GO TO 140 + END IF +* + IF( JTOT.EQ.NMAXIT ) + $ GO TO 140 + JTOT = JTOT + 1 +* +* Form shift. +* + G = ( D( L-1 )-P ) / ( TWO*E( L-1 ) ) + R = SLAPY2( G, ONE ) + G = D( M ) - P + ( E( L-1 ) / ( G+SIGN( R, G ) ) ) +* + S = ONE + C = ONE + P = ZERO +* +* Inner loop +* + LM1 = L - 1 + DO 120 I = M, LM1 + F = S*E( I ) + B = C*E( I ) + CALL SLARTG( G, F, C, S, R ) + IF( I.NE.M ) + $ E( I-1 ) = R + G = D( I ) - P + R = ( D( I+1 )-G )*S + TWO*C*B + P = S*R + D( I ) = G + P + G = C*R - B +* +* If eigenvectors are desired, then save rotations. +* + IF( ICOMPZ.GT.0 ) THEN + WORK( I ) = C + WORK( N-1+I ) = S + END IF +* + 120 CONTINUE +* +* If eigenvectors are desired, then apply saved rotations. +* + IF( ICOMPZ.GT.0 ) THEN + MM = L - M + 1 + CALL CLASR( 'R', 'V', 'F', N, MM, WORK( M ), WORK( N-1+M ), + $ Z( 1, M ), LDZ ) + END IF +* + D( L ) = D( L ) - P + E( LM1 ) = G + GO TO 90 +* +* Eigenvalue found. +* + 130 CONTINUE + D( L ) = P +* + L = L - 1 + IF( L.GE.LEND ) + $ GO TO 90 + GO TO 140 +* + END IF +* +* Undo scaling if necessary +* + 140 CONTINUE + IF( ISCALE.EQ.1 ) THEN + CALL SLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV+1, 1, + $ D( LSV ), N, INFO ) + CALL SLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV, 1, E( LSV ), + $ N, INFO ) + ELSE IF( ISCALE.EQ.2 ) THEN + CALL SLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV+1, 1, + $ D( LSV ), N, INFO ) + CALL SLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV, 1, E( LSV ), + $ N, INFO ) + END IF +* +* Check for no convergence to an eigenvalue after a total +* of N*MAXIT iterations. +* + IF( JTOT.EQ.NMAXIT ) THEN + DO 150 I = 1, N - 1 + IF( E( I ).NE.ZERO ) + $ INFO = INFO + 1 + 150 CONTINUE + RETURN + END IF + GO TO 10 +* +* Order eigenvalues and eigenvectors. +* + 160 CONTINUE + IF( ICOMPZ.EQ.0 ) THEN +* +* Use Quick Sort +* + CALL SLASRT( 'I', N, D, INFO ) +* + ELSE +* +* Use Selection Sort to minimize swaps of eigenvectors +* + DO 180 II = 2, N + I = II - 1 + K = I + P = D( I ) + DO 170 J = II, N + IF( D( J ).LT.P ) THEN + K = J + P = D( J ) + END IF + 170 CONTINUE + IF( K.NE.I ) THEN + D( K ) = D( I ) + D( I ) = P + CALL CSWAP( N, Z( 1, I ), 1, Z( 1, K ), 1 ) + END IF + 180 CONTINUE + END IF + RETURN +* +* End of CSTEQR +* + END diff --git a/costa/native/external/lapack/csycon.f b/costa/native/external/lapack/csycon.f new file mode 100644 index 000000000..252ad585f --- /dev/null +++ b/costa/native/external/lapack/csycon.f @@ -0,0 +1,159 @@ + SUBROUTINE CSYCON( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CSYCON estimates the reciprocal of the condition number (in the +* 1-norm) of a complex symmetric matrix A using the factorization +* A = U*D*U**T or A = L*D*L**T computed by CSYTRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the details of the factorization are stored +* as an upper or lower triangular matrix. +* = 'U': Upper triangular, form is A = U*D*U**T; +* = 'L': Lower triangular, form is A = L*D*L**T. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input) COMPLEX array, dimension (LDA,N) +* The block diagonal matrix D and the multipliers used to +* obtain the factor U or L as computed by CSYTRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by CSYTRF. +* +* ANORM (input) REAL +* The 1-norm of the original matrix A. +* +* RCOND (output) REAL +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +* estimate of the 1-norm of inv(A) computed in this routine. +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, KASE + REAL AINVNM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLACON, CSYTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.LE.ZERO ) THEN + RETURN + END IF +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO 10 I = N, 1, -1 + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO 20 I = 1, N + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO ) + $ RETURN + 20 CONTINUE + END IF +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + 30 CONTINUE + CALL CLACON( N, WORK( N+1 ), WORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN +* +* Multiply by inv(L*D*L') or inv(U*D*U'). +* + CALL CSYTRS( UPLO, N, 1, A, LDA, IPIV, WORK, N, INFO ) + GO TO 30 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of CSYCON +* + END diff --git a/costa/native/external/lapack/csymv.f b/costa/native/external/lapack/csymv.f new file mode 100644 index 000000000..6ff7e760c --- /dev/null +++ b/costa/native/external/lapack/csymv.f @@ -0,0 +1,265 @@ + SUBROUTINE CSYMV( UPLO, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INCX, INCY, LDA, N + COMPLEX ALPHA, BETA +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* CSYMV performs the matrix-vector operation +* +* y := alpha*A*x + beta*y, +* +* where alpha and beta are scalars, x and y are n element vectors and +* A is an n by n symmetric matrix. +* +* Arguments +* ========== +* +* UPLO - CHARACTER*1 +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array A is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of A +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of A +* is to be referenced. +* +* Unchanged on exit. +* +* N - INTEGER +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX array, dimension ( LDA, N ) +* Before entry, with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular part of the symmetric matrix and the strictly +* lower triangular part of A is not referenced. +* Before entry, with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular part of the symmetric matrix and the strictly +* upper triangular part of A is not referenced. +* Unchanged on exit. +* +* LDA - INTEGER +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, N ). +* Unchanged on exit. +* +* X - COMPLEX array, dimension at least +* ( 1 + ( N - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the N- +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - COMPLEX +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then Y need not be set on input. +* Unchanged on exit. +* +* Y - COMPLEX array, dimension at least +* ( 1 + ( N - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. On exit, Y is overwritten by the updated +* vector y. +* +* INCY - INTEGER +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY + COMPLEX TEMP1, TEMP2 +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = 1 + ELSE IF( N.LT.0 ) THEN + INFO = 2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = 5 + ELSE IF( INCX.EQ.0 ) THEN + INFO = 7 + ELSE IF( INCY.EQ.0 ) THEN + INFO = 10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ) .OR. ( ( ALPHA.EQ.ZERO ) .AND. ( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set up the start points in X and Y. +* + IF( INCX.GT.0 ) THEN + KX = 1 + ELSE + KX = 1 - ( N-1 )*INCX + END IF + IF( INCY.GT.0 ) THEN + KY = 1 + ELSE + KY = 1 - ( N-1 )*INCY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE ) THEN + IF( INCY.EQ.1 ) THEN + IF( BETA.EQ.ZERO ) THEN + DO 10 I = 1, N + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20 I = 1, N + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO ) THEN + DO 30 I = 1, N + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40 I = 1, N + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Form y when A is stored in upper triangle. +* + IF( ( INCX.EQ.1 ) .AND. ( INCY.EQ.1 ) ) THEN + DO 60 J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + DO 50 I = 1, J - 1 + Y( I ) = Y( I ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + A( I, J )*X( I ) + 50 CONTINUE + Y( J ) = Y( J ) + TEMP1*A( J, J ) + ALPHA*TEMP2 + 60 CONTINUE + ELSE + JX = KX + JY = KY + DO 80 J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + IX = KX + IY = KY + DO 70 I = 1, J - 1 + Y( IY ) = Y( IY ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + A( I, J )*X( IX ) + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + Y( JY ) = Y( JY ) + TEMP1*A( J, J ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + 80 CONTINUE + END IF + ELSE +* +* Form y when A is stored in lower triangle. +* + IF( ( INCX.EQ.1 ) .AND. ( INCY.EQ.1 ) ) THEN + DO 100 J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + Y( J ) = Y( J ) + TEMP1*A( J, J ) + DO 90 I = J + 1, N + Y( I ) = Y( I ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + A( I, J )*X( I ) + 90 CONTINUE + Y( J ) = Y( J ) + ALPHA*TEMP2 + 100 CONTINUE + ELSE + JX = KX + JY = KY + DO 120 J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + Y( JY ) = Y( JY ) + TEMP1*A( J, J ) + IX = JX + IY = JY + DO 110 I = J + 1, N + IX = IX + INCX + IY = IY + INCY + Y( IY ) = Y( IY ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + A( I, J )*X( IX ) + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of CSYMV +* + END diff --git a/costa/native/external/lapack/csyr.f b/costa/native/external/lapack/csyr.f new file mode 100644 index 000000000..94d69db8b --- /dev/null +++ b/costa/native/external/lapack/csyr.f @@ -0,0 +1,199 @@ + SUBROUTINE CSYR( UPLO, N, ALPHA, X, INCX, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INCX, LDA, N + COMPLEX ALPHA +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* CSYR performs the symmetric rank 1 operation +* +* A := alpha*x*( x' ) + A, +* +* where alpha is a complex scalar, x is an n element vector and A is an +* n by n symmetric matrix. +* +* Arguments +* ========== +* +* UPLO - CHARACTER*1 +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array A is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of A +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of A +* is to be referenced. +* +* Unchanged on exit. +* +* N - INTEGER +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - COMPLEX array, dimension at least +* ( 1 + ( N - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the N- +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* A - COMPLEX array, dimension ( LDA, N ) +* Before entry, with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular part of the symmetric matrix and the strictly +* lower triangular part of A is not referenced. On exit, the +* upper triangular part of the array A is overwritten by the +* upper triangular part of the updated matrix. +* Before entry, with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular part of the symmetric matrix and the strictly +* upper triangular part of A is not referenced. On exit, the +* lower triangular part of the array A is overwritten by the +* lower triangular part of the updated matrix. +* +* LDA - INTEGER +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, N ). +* Unchanged on exit. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, IX, J, JX, KX + COMPLEX TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = 1 + ELSE IF( N.LT.0 ) THEN + INFO = 2 + ELSE IF( INCX.EQ.0 ) THEN + INFO = 5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = 7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYR ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ) .OR. ( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Set the start point in X if the increment is not unity. +* + IF( INCX.LE.0 ) THEN + KX = 1 - ( N-1 )*INCX + ELSE IF( INCX.NE.1 ) THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Form A when A is stored in upper triangle. +* + IF( INCX.EQ.1 ) THEN + DO 20 J = 1, N + IF( X( J ).NE.ZERO ) THEN + TEMP = ALPHA*X( J ) + DO 10 I = 1, J + A( I, J ) = A( I, J ) + X( I )*TEMP + 10 CONTINUE + END IF + 20 CONTINUE + ELSE + JX = KX + DO 40 J = 1, N + IF( X( JX ).NE.ZERO ) THEN + TEMP = ALPHA*X( JX ) + IX = KX + DO 30 I = 1, J + A( I, J ) = A( I, J ) + X( IX )*TEMP + IX = IX + INCX + 30 CONTINUE + END IF + JX = JX + INCX + 40 CONTINUE + END IF + ELSE +* +* Form A when A is stored in lower triangle. +* + IF( INCX.EQ.1 ) THEN + DO 60 J = 1, N + IF( X( J ).NE.ZERO ) THEN + TEMP = ALPHA*X( J ) + DO 50 I = J, N + A( I, J ) = A( I, J ) + X( I )*TEMP + 50 CONTINUE + END IF + 60 CONTINUE + ELSE + JX = KX + DO 80 J = 1, N + IF( X( JX ).NE.ZERO ) THEN + TEMP = ALPHA*X( JX ) + IX = JX + DO 70 I = J, N + A( I, J ) = A( I, J ) + X( IX )*TEMP + IX = IX + INCX + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of CSYR +* + END diff --git a/costa/native/external/lapack/csyrfs.f b/costa/native/external/lapack/csyrfs.f new file mode 100644 index 000000000..42a25aa8e --- /dev/null +++ b/costa/native/external/lapack/csyrfs.f @@ -0,0 +1,339 @@ + SUBROUTINE CSYRFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, + $ X, LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL BERR( * ), FERR( * ), RWORK( * ) + COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* CSYRFS improves the computed solution to a system of linear +* equations when the coefficient matrix is symmetric indefinite, and +* provides error bounds and backward error estimates for the solution. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input) COMPLEX array, dimension (LDA,N) +* The symmetric matrix A. If UPLO = 'U', the leading N-by-N +* upper triangular part of A contains the upper triangular part +* of the matrix A, and the strictly lower triangular part of A +* is not referenced. If UPLO = 'L', the leading N-by-N lower +* triangular part of A contains the lower triangular part of +* the matrix A, and the strictly upper triangular part of A is +* not referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* AF (input) COMPLEX array, dimension (LDAF,N) +* The factored form of the matrix A. AF contains the block +* diagonal matrix D and the multipliers used to obtain the +* factor U or L from the factorization A = U*D*U**T or +* A = L*D*L**T as computed by CSYTRF. +* +* LDAF (input) INTEGER +* The leading dimension of the array AF. LDAF >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by CSYTRF. +* +* B (input) COMPLEX array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input/output) COMPLEX array, dimension (LDX,NRHS) +* On entry, the solution matrix X, as computed by CSYTRS. +* On exit, the improved solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* RWORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Internal Parameters +* =================== +* +* ITMAX is the maximum number of steps of iterative refinement. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) + REAL THREE + PARAMETER ( THREE = 3.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, J, K, KASE, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX ZDUM +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CLACON, CSYMV, CSYTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL CCOPY( N, B( 1, J ), 1, WORK, 1 ) + CALL CSYMV( UPLO, N, -ONE, A, LDA, X( 1, J ), 1, ONE, WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + DO 40 I = 1, K - 1 + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 40 CONTINUE + RWORK( K ) = RWORK( K ) + CABS1( A( K, K ) )*XK + S + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + RWORK( K ) = RWORK( K ) + CABS1( A( K, K ) )*XK + DO 60 I = K + 1, N + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 60 CONTINUE + RWORK( K ) = RWORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL CSYTRS( UPLO, N, 1, AF, LDAF, IPIV, WORK, N, INFO ) + CALL CAXPY( N, ONE, WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use CLACON to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL CLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A'). +* + CALL CSYTRS( UPLO, N, 1, AF, LDAF, IPIV, WORK, N, INFO ) + DO 110 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 120 CONTINUE + CALL CSYTRS( UPLO, N, 1, AF, LDAF, IPIV, WORK, N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of CSYRFS +* + END diff --git a/costa/native/external/lapack/csysv.f b/costa/native/external/lapack/csysv.f new file mode 100644 index 000000000..7c331a643 --- /dev/null +++ b/costa/native/external/lapack/csysv.f @@ -0,0 +1,171 @@ + SUBROUTINE CSYSV( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CSYSV computes the solution to a complex system of linear equations +* A * X = B, +* where A is an N-by-N symmetric matrix and X and B are N-by-NRHS +* matrices. +* +* The diagonal pivoting method is used to factor A as +* A = U * D * U**T, if UPLO = 'U', or +* A = L * D * L**T, if UPLO = 'L', +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices, and D is symmetric and block diagonal with +* 1-by-1 and 2-by-2 diagonal blocks. The factored form of A is then +* used to solve the system of equations A * X = B. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the symmetric matrix A. If UPLO = 'U', the leading +* N-by-N upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, if INFO = 0, the block diagonal matrix D and the +* multipliers used to obtain the factor U or L from the +* factorization A = U*D*U**T or A = L*D*L**T as computed by +* CSYTRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (output) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D, as +* determined by CSYTRF. If IPIV(k) > 0, then rows and columns +* k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1 +* diagonal block. If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, +* then rows and columns k-1 and -IPIV(k) were interchanged and +* D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If UPLO = 'L' and +* IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and +* -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 +* diagonal block. +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of WORK. LWORK >= 1, and for best performance +* LWORK >= N*NB, where NB is the optimal blocksize for +* CSYTRF. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, D(i,i) is exactly zero. The factorization +* has been completed, but the block diagonal matrix D is +* exactly singular, so the solution could not be computed. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LWKOPT, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL CSYTRF, CSYTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'CSYTRF', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYSV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Compute the factorization A = U*D*U' or A = L*D*L'. +* + CALL CSYTRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL CSYTRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of CSYSV +* + END diff --git a/costa/native/external/lapack/csysvx.f b/costa/native/external/lapack/csysvx.f new file mode 100644 index 000000000..1612dda3f --- /dev/null +++ b/costa/native/external/lapack/csysvx.f @@ -0,0 +1,299 @@ + SUBROUTINE CSYSVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, + $ LDB, X, LDX, RCOND, FERR, BERR, WORK, LWORK, + $ RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER FACT, UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, LWORK, N, NRHS + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL BERR( * ), FERR( * ), RWORK( * ) + COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* CSYSVX uses the diagonal pivoting factorization to compute the +* solution to a complex system of linear equations A * X = B, +* where A is an N-by-N symmetric matrix and X and B are N-by-NRHS +* matrices. +* +* Error bounds on the solution and a condition estimate are also +* provided. +* +* Description +* =========== +* +* The following steps are performed: +* +* 1. If FACT = 'N', the diagonal pivoting method is used to factor A. +* The form of the factorization is +* A = U * D * U**T, if UPLO = 'U', or +* A = L * D * L**T, if UPLO = 'L', +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices, and D is symmetric and block diagonal with +* 1-by-1 and 2-by-2 diagonal blocks. +* +* 2. If some D(i,i)=0, so that D is exactly singular, then the routine +* returns with INFO = i. Otherwise, the factored form of A is used +* to estimate the condition number of the matrix A. If the +* reciprocal of the condition number is less than machine precision, +* INFO = N+1 is returned as a warning, but the routine still goes on +* to solve for X and compute error bounds as described below. +* +* 3. The system of equations is solved for X using the factored form +* of A. +* +* 4. Iterative refinement is applied to improve the computed solution +* matrix and calculate error bounds and backward error estimates +* for it. +* +* Arguments +* ========= +* +* FACT (input) CHARACTER*1 +* Specifies whether or not the factored form of A has been +* supplied on entry. +* = 'F': On entry, AF and IPIV contain the factored form +* of A. A, AF and IPIV will not be modified. +* = 'N': The matrix A will be copied to AF and factored. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input) COMPLEX array, dimension (LDA,N) +* The symmetric matrix A. If UPLO = 'U', the leading N-by-N +* upper triangular part of A contains the upper triangular part +* of the matrix A, and the strictly lower triangular part of A +* is not referenced. If UPLO = 'L', the leading N-by-N lower +* triangular part of A contains the lower triangular part of +* the matrix A, and the strictly upper triangular part of A is +* not referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* AF (input or output) COMPLEX array, dimension (LDAF,N) +* If FACT = 'F', then AF is an input argument and on entry +* contains the block diagonal matrix D and the multipliers used +* to obtain the factor U or L from the factorization +* A = U*D*U**T or A = L*D*L**T as computed by CSYTRF. +* +* If FACT = 'N', then AF is an output argument and on exit +* returns the block diagonal matrix D and the multipliers used +* to obtain the factor U or L from the factorization +* A = U*D*U**T or A = L*D*L**T. +* +* LDAF (input) INTEGER +* The leading dimension of the array AF. LDAF >= max(1,N). +* +* IPIV (input or output) INTEGER array, dimension (N) +* If FACT = 'F', then IPIV is an input argument and on entry +* contains details of the interchanges and the block structure +* of D, as determined by CSYTRF. +* If IPIV(k) > 0, then rows and columns k and IPIV(k) were +* interchanged and D(k,k) is a 1-by-1 diagonal block. +* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +* +* If FACT = 'N', then IPIV is an output argument and on exit +* contains details of the interchanges and the block structure +* of D, as determined by CSYTRF. +* +* B (input) COMPLEX array, dimension (LDB,NRHS) +* The N-by-NRHS right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (output) COMPLEX array, dimension (LDX,NRHS) +* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* RCOND (output) REAL +* The estimate of the reciprocal condition number of the matrix +* A. If RCOND is less than the machine precision (in +* particular, if RCOND = 0), the matrix is singular to working +* precision. This condition is indicated by a return code of +* INFO > 0. +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of WORK. LWORK >= 2*N, and for best performance +* LWORK >= N*NB, where NB is the optimal blocksize for +* CSYTRF. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is +* <= N: D(i,i) is exactly zero. The factorization +* has been completed but the factor D is exactly +* singular, so the solution and error bounds could +* not be computed. RCOND = 0 is returned. +* = N+1: D is nonsingular, but RCOND is less than machine +* precision, meaning that the matrix is singular +* to working precision. Nevertheless, the +* solution and error bounds are computed because +* there are a number of situations where the +* computed solution can be more accurate than the +* value of RCOND would suggest. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, NOFACT + INTEGER LWKOPT, NB + REAL ANORM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL CLANSY, SLAMCH + EXTERNAL ILAENV, LSAME, CLANSY, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CLACPY, CSYCON, CSYRFS, CSYTRF, CSYTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -11 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LWORK.LT.MAX( 1, 2*N ) .AND. .NOT.LQUERY ) THEN + INFO = -18 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'CSYTRF', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYSVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* + IF( NOFACT ) THEN +* +* Compute the factorization A = U*D*U' or A = L*D*L'. +* + CALL CLACPY( UPLO, N, N, A, LDA, AF, LDAF ) + CALL CSYTRF( UPLO, N, AF, LDAF, IPIV, WORK, LWORK, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN + IF( INFO.GT.0 ) + $ RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = CLANSY( 'I', UPLO, N, A, LDA, RWORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL CSYCON( UPLO, N, AF, LDAF, IPIV, ANORM, RCOND, WORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* +* Compute the solution vectors X. +* + CALL CLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL CSYTRS( UPLO, N, NRHS, AF, LDAF, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solutions and +* compute error bounds and backward error estimates for them. +* + CALL CSYRFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, X, + $ LDX, FERR, BERR, WORK, RWORK, INFO ) +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of CSYSVX +* + END diff --git a/costa/native/external/lapack/csytf2.f b/costa/native/external/lapack/csytf2.f new file mode 100644 index 000000000..1366519ab --- /dev/null +++ b/costa/native/external/lapack/csytf2.f @@ -0,0 +1,515 @@ + SUBROUTINE CSYTF2( UPLO, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CSYTF2 computes the factorization of a complex symmetric matrix A +* using the Bunch-Kaufman diagonal pivoting method: +* +* A = U*D*U' or A = L*D*L' +* +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices, U' is the transpose of U, and D is symmetric and +* block diagonal with 1-by-1 and 2-by-2 diagonal blocks. +* +* This is the unblocked version of the algorithm, calling Level 2 BLAS. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is stored: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the symmetric matrix A. If UPLO = 'U', the leading +* n-by-n upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n-by-n lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, the block diagonal matrix D and the multipliers used +* to obtain the factor U or L (see below for further details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (output) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D. +* If IPIV(k) > 0, then rows and columns k and IPIV(k) were +* interchanged and D(k,k) is a 1-by-1 diagonal block. +* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* > 0: if INFO = k, D(k,k) is exactly zero. The factorization +* has been completed, but the block diagonal matrix D is +* exactly singular, and division by zero will occur if it +* is used to solve a system of equations. +* +* Further Details +* =============== +* +* 1-96 - Based on modifications by J. Lewis, Boeing Computer Services +* Company +* +* If UPLO = 'U', then A = U*D*U', where +* U = P(n)*U(n)* ... *P(k)U(k)* ..., +* i.e., U is a product of terms P(k)*U(k), where k decreases from n to +* 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +* defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +* that if the diagonal block D(k) is of order s (s = 1 or 2), then +* +* ( I v 0 ) k-s +* U(k) = ( 0 I 0 ) s +* ( 0 0 I ) n-k +* k-s s n-k +* +* If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +* If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +* and A(k,k), and v overwrites A(1:k-2,k-1:k). +* +* If UPLO = 'L', then A = L*D*L', where +* L = P(1)*L(1)* ... *P(k)*L(k)* ..., +* i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +* n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +* defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +* that if the diagonal block D(k) is of order s (s = 1 or 2), then +* +* ( I 0 0 ) k-1 +* L(k) = ( 0 I 0 ) s +* ( 0 v I ) n-k-s+1 +* k-1 s n-k-s+1 +* +* If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +* If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +* and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0E+0, SEVTEN = 17.0E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IMAX, J, JMAX, K, KK, KP, KSTEP + REAL ABSAKK, ALPHA, COLMAX, ROWMAX + COMPLEX D11, D12, D21, D22, R1, T, WK, WKM1, WKP1, Z +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + EXTERNAL LSAME, ICAMAX +* .. +* .. External Subroutines .. + EXTERNAL CSCAL, CSWAP, CSYR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL, SQRT +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( Z ) = ABS( REAL( Z ) ) + ABS( AIMAG( Z ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYTF2', -INFO ) + RETURN + END IF +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U' using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2 +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 70 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = CABS1( A( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.GT.1 ) THEN + IMAX = ICAMAX( K-1, A( 1, K ), 1 ) + COLMAX = CABS1( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = IMAX + ICAMAX( K-IMAX, A( IMAX, IMAX+1 ), LDA ) + ROWMAX = CABS1( A( IMAX, JMAX ) ) + IF( IMAX.GT.1 ) THEN + JMAX = ICAMAX( IMAX-1, A( 1, IMAX ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( A( JMAX, IMAX ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( CABS1( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K-1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K - KSTEP + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the leading +* submatrix A(1:k,1:k) +* + CALL CSWAP( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 ) + CALL CSWAP( KK-KP-1, A( KP+1, KK ), 1, A( KP, KP+1 ), + $ LDA ) + T = A( KK, KK ) + A( KK, KK ) = A( KP, KP ) + A( KP, KP ) = T + IF( KSTEP.EQ.2 ) THEN + T = A( K-1, K ) + A( K-1, K ) = A( KP, K ) + A( KP, K ) = T + END IF + END IF +* +* Update the leading submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* +* Perform a rank-1 update of A(1:k-1,1:k-1) as +* +* A := A - U(k)*D(k)*U(k)' = A - W(k)*1/D(k)*W(k)' +* + R1 = CONE / A( K, K ) + CALL CSYR( UPLO, K-1, -R1, A( 1, K ), 1, A, LDA ) +* +* Store U(k) in column k +* + CALL CSCAL( K-1, R1, A( 1, K ), 1 ) + ELSE +* +* 2-by-2 pivot block D(k): columns k and k-1 now hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* Perform a rank-2 update of A(1:k-2,1:k-2) as +* +* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )' +* = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )' +* + IF( K.GT.2 ) THEN +* + D12 = A( K-1, K ) + D22 = A( K-1, K-1 ) / D12 + D11 = A( K, K ) / D12 + T = CONE / ( D11*D22-CONE ) + D12 = T / D12 +* + DO 30 J = K - 2, 1, -1 + WKM1 = D12*( D11*A( J, K-1 )-A( J, K ) ) + WK = D12*( D22*A( J, K )-A( J, K-1 ) ) + DO 20 I = J, 1, -1 + A( I, J ) = A( I, J ) - A( I, K )*WK - + $ A( I, K-1 )*WKM1 + 20 CONTINUE + A( J, K ) = WK + A( J, K-1 ) = WKM1 + 30 CONTINUE +* + END IF +* + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L' using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2 +* + K = 1 + 40 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 70 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = CABS1( A( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.LT.N ) THEN + IMAX = K + ICAMAX( N-K, A( K+1, K ), 1 ) + COLMAX = CABS1( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = K - 1 + ICAMAX( IMAX-K, A( IMAX, K ), LDA ) + ROWMAX = CABS1( A( IMAX, JMAX ) ) + IF( IMAX.LT.N ) THEN + JMAX = IMAX + ICAMAX( N-IMAX, A( IMAX+1, IMAX ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( A( JMAX, IMAX ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( CABS1( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K+1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K + KSTEP - 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the trailing +* submatrix A(k:n,k:n) +* + IF( KP.LT.N ) + $ CALL CSWAP( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 ) + CALL CSWAP( KP-KK-1, A( KK+1, KK ), 1, A( KP, KK+1 ), + $ LDA ) + T = A( KK, KK ) + A( KK, KK ) = A( KP, KP ) + A( KP, KP ) = T + IF( KSTEP.EQ.2 ) THEN + T = A( K+1, K ) + A( K+1, K ) = A( KP, K ) + A( KP, K ) = T + END IF + END IF +* +* Update the trailing submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* + IF( K.LT.N ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* +* A := A - L(k)*D(k)*L(k)' = A - W(k)*(1/D(k))*W(k)' +* + R1 = CONE / A( K, K ) + CALL CSYR( UPLO, N-K, -R1, A( K+1, K ), 1, + $ A( K+1, K+1 ), LDA ) +* +* Store L(k) in column K +* + CALL CSCAL( N-K, R1, A( K+1, K ), 1 ) + END IF + ELSE +* +* 2-by-2 pivot block D(k) +* + IF( K.LT.N-1 ) THEN +* +* Perform a rank-2 update of A(k+2:n,k+2:n) as +* +* A := A - ( L(k) L(k+1) )*D(k)*( L(k) L(k+1) )' +* = A - ( W(k) W(k+1) )*inv(D(k))*( W(k) W(k+1) )' +* +* where L(k) and L(k+1) are the k-th and (k+1)-th +* columns of L +* + D21 = A( K+1, K ) + D11 = A( K+1, K+1 ) / D21 + D22 = A( K, K ) / D21 + T = CONE / ( D11*D22-CONE ) + D21 = T / D21 +* + DO 60 J = K + 2, N + WK = D21*( D11*A( J, K )-A( J, K+1 ) ) + WKP1 = D21*( D22*A( J, K+1 )-A( J, K ) ) + DO 50 I = J, N + A( I, J ) = A( I, J ) - A( I, K )*WK - + $ A( I, K+1 )*WKP1 + 50 CONTINUE + A( J, K ) = WK + A( J, K+1 ) = WKP1 + 60 CONTINUE + END IF + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 40 +* + END IF +* + 70 CONTINUE + RETURN +* +* End of CSYTF2 +* + END diff --git a/costa/native/external/lapack/csytrf.f b/costa/native/external/lapack/csytrf.f new file mode 100644 index 000000000..30a20f502 --- /dev/null +++ b/costa/native/external/lapack/csytrf.f @@ -0,0 +1,287 @@ + SUBROUTINE CSYTRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CSYTRF computes the factorization of a complex symmetric matrix A +* using the Bunch-Kaufman diagonal pivoting method. The form of the +* factorization is +* +* A = U*D*U**T or A = L*D*L**T +* +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices, and D is symmetric and block diagonal with +* with 1-by-1 and 2-by-2 diagonal blocks. +* +* This is the blocked version of the algorithm, calling Level 3 BLAS. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the symmetric matrix A. If UPLO = 'U', the leading +* N-by-N upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, the block diagonal matrix D and the multipliers used +* to obtain the factor U or L (see below for further details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (output) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D. +* If IPIV(k) > 0, then rows and columns k and IPIV(k) were +* interchanged and D(k,k) is a 1-by-1 diagonal block. +* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of WORK. LWORK >=1. For best performance +* LWORK >= N*NB, where NB is the block size returned by ILAENV. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, D(i,i) is exactly zero. The factorization +* has been completed, but the block diagonal matrix D is +* exactly singular, and division by zero will occur if it +* is used to solve a system of equations. +* +* Further Details +* =============== +* +* If UPLO = 'U', then A = U*D*U', where +* U = P(n)*U(n)* ... *P(k)U(k)* ..., +* i.e., U is a product of terms P(k)*U(k), where k decreases from n to +* 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +* defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +* that if the diagonal block D(k) is of order s (s = 1 or 2), then +* +* ( I v 0 ) k-s +* U(k) = ( 0 I 0 ) s +* ( 0 0 I ) n-k +* k-s s n-k +* +* If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +* If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +* and A(k,k), and v overwrites A(1:k-2,k-1:k). +* +* If UPLO = 'L', then A = L*D*L', where +* L = P(1)*L(1)* ... *P(k)*L(k)* ..., +* i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +* n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +* defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +* that if the diagonal block D(k) is of order s (s = 1 or 2), then +* +* ( I 0 0 ) k-1 +* L(k) = ( 0 I 0 ) s +* ( 0 v I ) n-k-s+1 +* k-1 s n-k-s+1 +* +* If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +* If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +* and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER IINFO, IWS, J, K, KB, LDWORK, LWKOPT, NB, NBMIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CLASYF, CSYTF2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size +* + NB = ILAENV( 1, 'CSYTRF', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYTRF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* + NBMIN = 2 + LDWORK = N + IF( NB.GT.1 .AND. NB.LT.N ) THEN + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN + NB = MAX( LWORK / LDWORK, 1 ) + NBMIN = MAX( 2, ILAENV( 2, 'CSYTRF', UPLO, N, -1, -1, -1 ) ) + END IF + ELSE + IWS = 1 + END IF + IF( NB.LT.NBMIN ) + $ NB = N +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U' using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* KB, where KB is the number of columns factorized by CLASYF; +* KB is either NB or NB-1, or K for the last block +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 40 +* + IF( K.GT.NB ) THEN +* +* Factorize columns k-kb+1:k of A and use blocked code to +* update columns 1:k-kb +* + CALL CLASYF( UPLO, K, NB, KB, A, LDA, IPIV, WORK, N, IINFO ) + ELSE +* +* Use unblocked code to factorize columns 1:k of A +* + CALL CSYTF2( UPLO, K, A, LDA, IPIV, IINFO ) + KB = K + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO +* +* Decrease K and return to the start of the main loop +* + K = K - KB + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L' using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* KB, where KB is the number of columns factorized by CLASYF; +* KB is either NB or NB-1, or N-K+1 for the last block +* + K = 1 + 20 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 40 +* + IF( K.LE.N-NB ) THEN +* +* Factorize columns k:k+kb-1 of A and use blocked code to +* update columns k+kb:n +* + CALL CLASYF( UPLO, N-K+1, NB, KB, A( K, K ), LDA, IPIV( K ), + $ WORK, N, IINFO ) + ELSE +* +* Use unblocked code to factorize columns k:n of A +* + CALL CSYTF2( UPLO, N-K+1, A( K, K ), LDA, IPIV( K ), IINFO ) + KB = N - K + 1 + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + K - 1 +* +* Adjust IPIV +* + DO 30 J = K, K + KB - 1 + IF( IPIV( J ).GT.0 ) THEN + IPIV( J ) = IPIV( J ) + K - 1 + ELSE + IPIV( J ) = IPIV( J ) - K + 1 + END IF + 30 CONTINUE +* +* Increase K and return to the start of the main loop +* + K = K + KB + GO TO 20 +* + END IF +* + 40 CONTINUE + WORK( 1 ) = LWKOPT + RETURN +* +* End of CSYTRF +* + END diff --git a/costa/native/external/lapack/csytri.f b/costa/native/external/lapack/csytri.f new file mode 100644 index 000000000..451176426 --- /dev/null +++ b/costa/native/external/lapack/csytri.f @@ -0,0 +1,314 @@ + SUBROUTINE CSYTRI( UPLO, N, A, LDA, IPIV, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CSYTRI computes the inverse of a complex symmetric indefinite matrix +* A using the factorization A = U*D*U**T or A = L*D*L**T computed by +* CSYTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the details of the factorization are stored +* as an upper or lower triangular matrix. +* = 'U': Upper triangular, form is A = U*D*U**T; +* = 'L': Lower triangular, form is A = L*D*L**T. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the block diagonal matrix D and the multipliers +* used to obtain the factor U or L as computed by CSYTRF. +* +* On exit, if INFO = 0, the (symmetric) inverse of the original +* matrix. If UPLO = 'U', the upper triangular part of the +* inverse is formed and the part of A below the diagonal is not +* referenced; if UPLO = 'L' the lower triangular part of the +* inverse is formed and the part of A above the diagonal is +* not referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by CSYTRF. +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +* inverse could not be computed. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER K, KP, KSTEP + COMPLEX AK, AKKP1, AKP1, D, T, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX CDOTU + EXTERNAL LSAME, CDOTU +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CSWAP, CSYMV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO 10 INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO 20 INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 20 CONTINUE + END IF + INFO = 0 +* + IF( UPPER ) THEN +* +* Compute inv(A) from the factorization A = U*D*U'. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 30 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 40 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + A( K, K ) = ONE / A( K, K ) +* +* Compute column K of the inverse. +* + IF( K.GT.1 ) THEN + CALL CCOPY( K-1, A( 1, K ), 1, WORK, 1 ) + CALL CSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K ), 1 ) + A( K, K ) = A( K, K ) - CDOTU( K-1, WORK, 1, A( 1, K ), + $ 1 ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = A( K, K+1 ) + AK = A( K, K ) / T + AKP1 = A( K+1, K+1 ) / T + AKKP1 = A( K, K+1 ) / T + D = T*( AK*AKP1-ONE ) + A( K, K ) = AKP1 / D + A( K+1, K+1 ) = AK / D + A( K, K+1 ) = -AKKP1 / D +* +* Compute columns K and K+1 of the inverse. +* + IF( K.GT.1 ) THEN + CALL CCOPY( K-1, A( 1, K ), 1, WORK, 1 ) + CALL CSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K ), 1 ) + A( K, K ) = A( K, K ) - CDOTU( K-1, WORK, 1, A( 1, K ), + $ 1 ) + A( K, K+1 ) = A( K, K+1 ) - + $ CDOTU( K-1, A( 1, K ), 1, A( 1, K+1 ), 1 ) + CALL CCOPY( K-1, A( 1, K+1 ), 1, WORK, 1 ) + CALL CSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K+1 ), 1 ) + A( K+1, K+1 ) = A( K+1, K+1 ) - + $ CDOTU( K-1, WORK, 1, A( 1, K+1 ), 1 ) + END IF + KSTEP = 2 + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the leading +* submatrix A(1:k+1,1:k+1) +* + CALL CSWAP( KP-1, A( 1, K ), 1, A( 1, KP ), 1 ) + CALL CSWAP( K-KP-1, A( KP+1, K ), 1, A( KP, KP+1 ), LDA ) + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = A( K, K+1 ) + A( K, K+1 ) = A( KP, K+1 ) + A( KP, K+1 ) = TEMP + END IF + END IF +* + K = K + KSTEP + GO TO 30 + 40 CONTINUE +* + ELSE +* +* Compute inv(A) from the factorization A = L*D*L'. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 50 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 60 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + A( K, K ) = ONE / A( K, K ) +* +* Compute column K of the inverse. +* + IF( K.LT.N ) THEN + CALL CCOPY( N-K, A( K+1, K ), 1, WORK, 1 ) + CALL CSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1, + $ ZERO, A( K+1, K ), 1 ) + A( K, K ) = A( K, K ) - CDOTU( N-K, WORK, 1, A( K+1, K ), + $ 1 ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = A( K, K-1 ) + AK = A( K-1, K-1 ) / T + AKP1 = A( K, K ) / T + AKKP1 = A( K, K-1 ) / T + D = T*( AK*AKP1-ONE ) + A( K-1, K-1 ) = AKP1 / D + A( K, K ) = AK / D + A( K, K-1 ) = -AKKP1 / D +* +* Compute columns K-1 and K of the inverse. +* + IF( K.LT.N ) THEN + CALL CCOPY( N-K, A( K+1, K ), 1, WORK, 1 ) + CALL CSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1, + $ ZERO, A( K+1, K ), 1 ) + A( K, K ) = A( K, K ) - CDOTU( N-K, WORK, 1, A( K+1, K ), + $ 1 ) + A( K, K-1 ) = A( K, K-1 ) - + $ CDOTU( N-K, A( K+1, K ), 1, A( K+1, K-1 ), + $ 1 ) + CALL CCOPY( N-K, A( K+1, K-1 ), 1, WORK, 1 ) + CALL CSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1, + $ ZERO, A( K+1, K-1 ), 1 ) + A( K-1, K-1 ) = A( K-1, K-1 ) - + $ CDOTU( N-K, WORK, 1, A( K+1, K-1 ), 1 ) + END IF + KSTEP = 2 + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the trailing +* submatrix A(k-1:n,k-1:n) +* + IF( KP.LT.N ) + $ CALL CSWAP( N-KP, A( KP+1, K ), 1, A( KP+1, KP ), 1 ) + CALL CSWAP( KP-K-1, A( K+1, K ), 1, A( KP, K+1 ), LDA ) + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = A( K, K-1 ) + A( K, K-1 ) = A( KP, K-1 ) + A( KP, K-1 ) = TEMP + END IF + END IF +* + K = K - KSTEP + GO TO 50 + 60 CONTINUE + END IF +* + RETURN +* +* End of CSYTRI +* + END diff --git a/costa/native/external/lapack/csytrs.f b/costa/native/external/lapack/csytrs.f new file mode 100644 index 000000000..e392dcf65 --- /dev/null +++ b/costa/native/external/lapack/csytrs.f @@ -0,0 +1,370 @@ + SUBROUTINE CSYTRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* CSYTRS solves a system of linear equations A*X = B with a complex +* symmetric matrix A using the factorization A = U*D*U**T or +* A = L*D*L**T computed by CSYTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the details of the factorization are stored +* as an upper or lower triangular matrix. +* = 'U': Upper triangular, form is A = U*D*U**T; +* = 'L': Lower triangular, form is A = L*D*L**T. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* A (input) COMPLEX array, dimension (LDA,N) +* The block diagonal matrix D and the multipliers used to +* obtain the factor U or L as computed by CSYTRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by CSYTRF. +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KP + COMPLEX AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, CGERU, CSCAL, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*D*U'. +* +* First solve U*D*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 30 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL CGERU( K-1, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + CALL CSCAL( NRHS, ONE / A( K, K ), B( K, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K-1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K-1 ) + $ CALL CSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in columns K-1 and K of A. +* + CALL CGERU( K-2, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) + CALL CGERU( K-2, NRHS, -ONE, A( 1, K-1 ), 1, B( K-1, 1 ), + $ LDB, B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = A( K-1, K ) + AKM1 = A( K-1, K-1 ) / AKM1K + AK = A( K, K ) / AKM1K + DENOM = AKM1*AK - ONE + DO 20 J = 1, NRHS + BKM1 = B( K-1, J ) / AKM1K + BK = B( K, J ) / AKM1K + B( K-1, J ) = ( AK*BKM1-BK ) / DENOM + B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM + 20 CONTINUE + K = K - 2 + END IF +* + GO TO 10 + 30 CONTINUE +* +* Next solve U'*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 40 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(U'(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL CGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, A( 1, K ), + $ 1, ONE, B( K, 1 ), LDB ) +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(U'(K+1)), where U(K+1) is the transformation +* stored in columns K and K+1 of A. +* + CALL CGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, A( 1, K ), + $ 1, ONE, B( K, 1 ), LDB ) + CALL CGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, + $ A( 1, K+1 ), 1, ONE, B( K+1, 1 ), LDB ) +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K + 2 + END IF +* + GO TO 40 + 50 CONTINUE +* + ELSE +* +* Solve A*X = B, where A = L*D*L'. +* +* First solve L*D*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 60 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 80 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL CGERU( N-K, NRHS, -ONE, A( K+1, K ), 1, B( K, 1 ), + $ LDB, B( K+1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + CALL CSCAL( NRHS, ONE / A( K, K ), B( K, 1 ), LDB ) + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K+1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K+1 ) + $ CALL CSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.LT.N-1 ) THEN + CALL CGERU( N-K-1, NRHS, -ONE, A( K+2, K ), 1, B( K, 1 ), + $ LDB, B( K+2, 1 ), LDB ) + CALL CGERU( N-K-1, NRHS, -ONE, A( K+2, K+1 ), 1, + $ B( K+1, 1 ), LDB, B( K+2, 1 ), LDB ) + END IF +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = A( K+1, K ) + AKM1 = A( K, K ) / AKM1K + AK = A( K+1, K+1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 70 J = 1, NRHS + BKM1 = B( K, J ) / AKM1K + BK = B( K+1, J ) / AKM1K + B( K, J ) = ( AK*BKM1-BK ) / DENOM + B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + 70 CONTINUE + K = K + 2 + END IF +* + GO TO 60 + 80 CONTINUE +* +* Next solve L'*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 90 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 100 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(L'(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL CGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, A( K+1, K ), 1, ONE, B( K, 1 ), LDB ) +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(L'(K-1)), where L(K-1) is the transformation +* stored in columns K-1 and K of A. +* + IF( K.LT.N ) THEN + CALL CGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, A( K+1, K ), 1, ONE, B( K, 1 ), LDB ) + CALL CGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, A( K+1, K-1 ), 1, ONE, B( K-1, 1 ), + $ LDB ) + END IF +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 2 + END IF +* + GO TO 90 + 100 CONTINUE + END IF +* + RETURN +* +* End of CSYTRS +* + END diff --git a/costa/native/external/lapack/ctbcon.f b/costa/native/external/lapack/ctbcon.f new file mode 100644 index 000000000..84beed289 --- /dev/null +++ b/costa/native/external/lapack/ctbcon.f @@ -0,0 +1,205 @@ + SUBROUTINE CTBCON( NORM, UPLO, DIAG, N, KD, AB, LDAB, RCOND, WORK, + $ RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER INFO, KD, LDAB, N + REAL RCOND +* .. +* .. Array Arguments .. + REAL RWORK( * ) + COMPLEX AB( LDAB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CTBCON estimates the reciprocal of the condition number of a +* triangular band matrix A, in either the 1-norm or the infinity-norm. +* +* The norm of A is computed and an estimate is obtained for +* norm(inv(A)), then the reciprocal of the condition number is +* computed as +* RCOND = 1 / ( norm(A) * norm(inv(A)) ). +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies whether the 1-norm condition number or the +* infinity-norm condition number is required: +* = '1' or 'O': 1-norm; +* = 'I': Infinity-norm. +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals or subdiagonals of the +* triangular band matrix A. KD >= 0. +* +* AB (input) COMPLEX array, dimension (LDAB,N) +* The upper or lower triangular band matrix A, stored in the +* first kd+1 rows of the array. The j-th column of A is stored +* in the j-th column of the array AB as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* If DIAG = 'U', the diagonal elements of A are not referenced +* and are assumed to be 1. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* RCOND (output) REAL +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(norm(A) * norm(inv(A))). +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* RWORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, ONENRM, UPPER + CHARACTER NORMIN + INTEGER IX, KASE, KASE1 + REAL AINVNM, ANORM, SCALE, SMLNUM, XNORM + COMPLEX ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + REAL CLANTB, SLAMCH + EXTERNAL LSAME, ICAMAX, CLANTB, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CLACON, CLATBS, CSRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTBCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + END IF +* + RCOND = ZERO + SMLNUM = SLAMCH( 'Safe minimum' )*REAL( MAX( N, 1 ) ) +* +* Compute the 1-norm of the triangular matrix A or A'. +* + ANORM = CLANTB( NORM, UPLO, DIAG, N, KD, AB, LDAB, RWORK ) +* +* Continue only if ANORM > 0. +* + IF( ANORM.GT.ZERO ) THEN +* +* Estimate the 1-norm of the inverse of A. +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 10 CONTINUE + CALL CLACON( N, WORK( N+1 ), WORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(A). +* + CALL CLATBS( UPLO, 'No transpose', DIAG, NORMIN, N, KD, + $ AB, LDAB, WORK, SCALE, RWORK, INFO ) + ELSE +* +* Multiply by inv(A'). +* + CALL CLATBS( UPLO, 'Conjugate transpose', DIAG, NORMIN, + $ N, KD, AB, LDAB, WORK, SCALE, RWORK, INFO ) + END IF + NORMIN = 'Y' +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + IF( SCALE.NE.ONE ) THEN + IX = ICAMAX( N, WORK, 1 ) + XNORM = CABS1( WORK( IX ) ) + IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL CSRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / ANORM ) / AINVNM + END IF +* + 20 CONTINUE + RETURN +* +* End of CTBCON +* + END diff --git a/costa/native/external/lapack/ctbrfs.f b/costa/native/external/lapack/ctbrfs.f new file mode 100644 index 000000000..2b6736a33 --- /dev/null +++ b/costa/native/external/lapack/ctbrfs.f @@ -0,0 +1,393 @@ + SUBROUTINE CTBRFS( UPLO, TRANS, DIAG, N, KD, NRHS, AB, LDAB, B, + $ LDB, X, LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, KD, LDAB, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + REAL BERR( * ), FERR( * ), RWORK( * ) + COMPLEX AB( LDAB, * ), B( LDB, * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* CTBRFS provides error bounds and backward error estimates for the +* solution to a system of linear equations with a triangular band +* coefficient matrix. +* +* The solution matrix X must be computed by CTBTRS or some other +* means before entering this routine. CTBRFS does not do iterative +* refinement because doing so cannot improve the backward error. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose) +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals or subdiagonals of the +* triangular band matrix A. KD >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AB (input) COMPLEX array, dimension (LDAB,N) +* The upper or lower triangular band matrix A, stored in the +* first kd+1 rows of the array. The j-th column of A is stored +* in the j-th column of the array AB as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* If DIAG = 'U', the diagonal elements of A are not referenced +* and are assumed to be 1. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* B (input) COMPLEX array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input) COMPLEX array, dimension (LDX,NRHS) +* The solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* RWORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + CHARACTER TRANSN, TRANST + INTEGER I, J, K, KASE, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX ZDUM +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CLACON, CTBMV, CTBSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, MIN, REAL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTBRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANSN = 'N' + TRANST = 'C' + ELSE + TRANSN = 'C' + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = KD + 2 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 250 J = 1, NRHS +* +* Compute residual R = B - op(A) * X, +* where op(A) = A, A**T, or A**H, depending on TRANS. +* + CALL CCOPY( N, X( 1, J ), 1, WORK, 1 ) + CALL CTBMV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, WORK, 1 ) + CALL CAXPY( N, -ONE, B( 1, J ), 1, WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 20 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 20 CONTINUE +* + IF( NOTRAN ) THEN +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + IF( NOUNIT ) THEN + DO 40 K = 1, N + XK = CABS1( X( K, J ) ) + DO 30 I = MAX( 1, K-KD ), K + RWORK( I ) = RWORK( I ) + + $ CABS1( AB( KD+1+I-K, K ) )*XK + 30 CONTINUE + 40 CONTINUE + ELSE + DO 60 K = 1, N + XK = CABS1( X( K, J ) ) + DO 50 I = MAX( 1, K-KD ), K - 1 + RWORK( I ) = RWORK( I ) + + $ CABS1( AB( KD+1+I-K, K ) )*XK + 50 CONTINUE + RWORK( K ) = RWORK( K ) + XK + 60 CONTINUE + END IF + ELSE + IF( NOUNIT ) THEN + DO 80 K = 1, N + XK = CABS1( X( K, J ) ) + DO 70 I = K, MIN( N, K+KD ) + RWORK( I ) = RWORK( I ) + + $ CABS1( AB( 1+I-K, K ) )*XK + 70 CONTINUE + 80 CONTINUE + ELSE + DO 100 K = 1, N + XK = CABS1( X( K, J ) ) + DO 90 I = K + 1, MIN( N, K+KD ) + RWORK( I ) = RWORK( I ) + + $ CABS1( AB( 1+I-K, K ) )*XK + 90 CONTINUE + RWORK( K ) = RWORK( K ) + XK + 100 CONTINUE + END IF + END IF + ELSE +* +* Compute abs(A**H)*abs(X) + abs(B). +* + IF( UPPER ) THEN + IF( NOUNIT ) THEN + DO 120 K = 1, N + S = ZERO + DO 110 I = MAX( 1, K-KD ), K + S = S + CABS1( AB( KD+1+I-K, K ) )* + $ CABS1( X( I, J ) ) + 110 CONTINUE + RWORK( K ) = RWORK( K ) + S + 120 CONTINUE + ELSE + DO 140 K = 1, N + S = CABS1( X( K, J ) ) + DO 130 I = MAX( 1, K-KD ), K - 1 + S = S + CABS1( AB( KD+1+I-K, K ) )* + $ CABS1( X( I, J ) ) + 130 CONTINUE + RWORK( K ) = RWORK( K ) + S + 140 CONTINUE + END IF + ELSE + IF( NOUNIT ) THEN + DO 160 K = 1, N + S = ZERO + DO 150 I = K, MIN( N, K+KD ) + S = S + CABS1( AB( 1+I-K, K ) )* + $ CABS1( X( I, J ) ) + 150 CONTINUE + RWORK( K ) = RWORK( K ) + S + 160 CONTINUE + ELSE + DO 180 K = 1, N + S = CABS1( X( K, J ) ) + DO 170 I = K + 1, MIN( N, K+KD ) + S = S + CABS1( AB( 1+I-K, K ) )* + $ CABS1( X( I, J ) ) + 170 CONTINUE + RWORK( K ) = RWORK( K ) + S + 180 CONTINUE + END IF + END IF + END IF + S = ZERO + DO 190 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 190 CONTINUE + BERR( J ) = S +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use CLACON to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 200 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 200 CONTINUE +* + KASE = 0 + 210 CONTINUE + CALL CLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**H). +* + CALL CTBSV( UPLO, TRANST, DIAG, N, KD, AB, LDAB, WORK, + $ 1 ) + DO 220 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 220 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 230 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 230 CONTINUE + CALL CTBSV( UPLO, TRANSN, DIAG, N, KD, AB, LDAB, WORK, + $ 1 ) + END IF + GO TO 210 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 240 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 240 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 250 CONTINUE +* + RETURN +* +* End of CTBRFS +* + END diff --git a/costa/native/external/lapack/ctbtrs.f b/costa/native/external/lapack/ctbtrs.f new file mode 100644 index 000000000..67865b341 --- /dev/null +++ b/costa/native/external/lapack/ctbtrs.f @@ -0,0 +1,163 @@ + SUBROUTINE CTBTRS( UPLO, TRANS, DIAG, N, KD, NRHS, AB, LDAB, B, + $ LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, KD, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX AB( LDAB, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* CTBTRS solves a triangular system of the form +* +* A * X = B, A**T * X = B, or A**H * X = B, +* +* where A is a triangular band matrix of order N, and B is an +* N-by-NRHS matrix. A check is made to verify that A is nonsingular. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose) +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals or subdiagonals of the +* triangular band matrix A. KD >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AB (input) COMPLEX array, dimension (LDAB,N) +* The upper or lower triangular band matrix A, stored in the +* first kd+1 rows of AB. The j-th column of A is stored +* in the j-th column of the array AB as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* If DIAG = 'U', the diagonal elements of A are not referenced +* and are assumed to be 1. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, if INFO = 0, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the i-th diagonal element of A is zero, +* indicating that the matrix is singular and the +* solutions X have not been computed. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CTBSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOUNIT = LSAME( DIAG, 'N' ) + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT. + $ LSAME( TRANS, 'T' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTBTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check for singularity. +* + IF( NOUNIT ) THEN + IF( UPPER ) THEN + DO 10 INFO = 1, N + IF( AB( KD+1, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE + DO 20 INFO = 1, N + IF( AB( 1, INFO ).EQ.ZERO ) + $ RETURN + 20 CONTINUE + END IF + END IF + INFO = 0 +* +* Solve A * X = B, A**T * X = B, or A**H * X = B. +* + DO 30 J = 1, NRHS + CALL CTBSV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, B( 1, J ), 1 ) + 30 CONTINUE +* + RETURN +* +* End of CTBTRS +* + END diff --git a/costa/native/external/lapack/ctgevc.f b/costa/native/external/lapack/ctgevc.f new file mode 100644 index 000000000..faec3da87 --- /dev/null +++ b/costa/native/external/lapack/ctgevc.f @@ -0,0 +1,632 @@ + SUBROUTINE CTGEVC( SIDE, HOWMNY, SELECT, N, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, MM, M, WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER HOWMNY, SIDE + INTEGER INFO, LDA, LDB, LDVL, LDVR, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + REAL RWORK( * ) + COMPLEX A( LDA, * ), B( LDB, * ), VL( LDVL, * ), + $ VR( LDVR, * ), WORK( * ) +* .. +* +* +* Purpose +* ======= +* +* CTGEVC computes some or all of the right and/or left generalized +* eigenvectors of a pair of complex upper triangular matrices (A,B). +* +* The right generalized eigenvector x and the left generalized +* eigenvector y of (A,B) corresponding to a generalized eigenvalue +* w are defined by: +* +* (A - wB) * x = 0 and y**H * (A - wB) = 0 +* +* where y**H denotes the conjugate tranpose of y. +* +* If an eigenvalue w is determined by zero diagonal elements of both A +* and B, a unit vector is returned as the corresponding eigenvector. +* +* If all eigenvectors are requested, the routine may either return +* the matrices X and/or Y of right or left eigenvectors of (A,B), or +* the products Z*X and/or Q*Y, where Z and Q are input unitary +* matrices. If (A,B) was obtained from the generalized Schur +* factorization of an original pair of matrices +* (A0,B0) = (Q*A*Z**H,Q*B*Z**H), +* then Z*X and Q*Y are the matrices of right or left eigenvectors of +* A. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'R': compute right eigenvectors only; +* = 'L': compute left eigenvectors only; +* = 'B': compute both right and left eigenvectors. +* +* HOWMNY (input) CHARACTER*1 +* = 'A': compute all right and/or left eigenvectors; +* = 'B': compute all right and/or left eigenvectors, and +* backtransform them using the input matrices supplied +* in VR and/or VL; +* = 'S': compute selected right and/or left eigenvectors, +* specified by the logical array SELECT. +* +* SELECT (input) LOGICAL array, dimension (N) +* If HOWMNY='S', SELECT specifies the eigenvectors to be +* computed. +* If HOWMNY='A' or 'B', SELECT is not referenced. +* To select the eigenvector corresponding to the j-th +* eigenvalue, SELECT(j) must be set to .TRUE.. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* A (input) COMPLEX array, dimension (LDA,N) +* The upper triangular matrix A. +* +* LDA (input) INTEGER +* The leading dimension of array A. LDA >= max(1,N). +* +* B (input) COMPLEX array, dimension (LDB,N) +* The upper triangular matrix B. B must have real diagonal +* elements. +* +* LDB (input) INTEGER +* The leading dimension of array B. LDB >= max(1,N). +* +* VL (input/output) COMPLEX array, dimension (LDVL,MM) +* On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must +* contain an N-by-N matrix Q (usually the unitary matrix Q +* of left Schur vectors returned by CHGEQZ). +* On exit, if SIDE = 'L' or 'B', VL contains: +* if HOWMNY = 'A', the matrix Y of left eigenvectors of (A,B); +* if HOWMNY = 'B', the matrix Q*Y; +* if HOWMNY = 'S', the left eigenvectors of (A,B) specified by +* SELECT, stored consecutively in the columns of +* VL, in the same order as their eigenvalues. +* If SIDE = 'R', VL is not referenced. +* +* LDVL (input) INTEGER +* The leading dimension of array VL. +* LDVL >= max(1,N) if SIDE = 'L' or 'B'; LDVL >= 1 otherwise. +* +* VR (input/output) COMPLEX array, dimension (LDVR,MM) +* On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must +* contain an N-by-N matrix Q (usually the unitary matrix Z +* of right Schur vectors returned by CHGEQZ). +* On exit, if SIDE = 'R' or 'B', VR contains: +* if HOWMNY = 'A', the matrix X of right eigenvectors of (A,B); +* if HOWMNY = 'B', the matrix Z*X; +* if HOWMNY = 'S', the right eigenvectors of (A,B) specified by +* SELECT, stored consecutively in the columns of +* VR, in the same order as their eigenvalues. +* If SIDE = 'L', VR is not referenced. +* +* LDVR (input) INTEGER +* The leading dimension of the array VR. +* LDVR >= max(1,N) if SIDE = 'R' or 'B'; LDVR >= 1 otherwise. +* +* MM (input) INTEGER +* The number of columns in the arrays VL and/or VR. MM >= M. +* +* M (output) INTEGER +* The number of columns in the arrays VL and/or VR actually +* used to store the eigenvectors. If HOWMNY = 'A' or 'B', M +* is set to N. Each selected eigenvector occupies one column. +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* RWORK (workspace) REAL array, dimension (2*N) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL COMPL, COMPR, ILALL, ILBACK, ILBBAD, ILCOMP, + $ LSA, LSB + INTEGER I, IBEG, IEIG, IEND, IHWMNY, IM, ISIDE, ISRC, + $ J, JE, JR + REAL ACOEFA, ACOEFF, ANORM, ASCALE, BCOEFA, BIG, + $ BIGNUM, BNORM, BSCALE, DMIN, SAFMIN, SBETA, + $ SCALE, SMALL, TEMP, ULP, XMAX + COMPLEX BCOEFF, CA, CB, D, SALPHA, SUM, SUMA, SUMB, X +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + COMPLEX CLADIV + EXTERNAL LSAME, SLAMCH, CLADIV +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, SLABAD, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, CONJG, MAX, MIN, REAL +* .. +* .. Statement Functions .. + REAL ABS1 +* .. +* .. Statement Function definitions .. + ABS1( X ) = ABS( REAL( X ) ) + ABS( AIMAG( X ) ) +* .. +* .. Executable Statements .. +* +* Decode and Test the input parameters +* + IF( LSAME( HOWMNY, 'A' ) ) THEN + IHWMNY = 1 + ILALL = .TRUE. + ILBACK = .FALSE. + ELSE IF( LSAME( HOWMNY, 'S' ) ) THEN + IHWMNY = 2 + ILALL = .FALSE. + ILBACK = .FALSE. + ELSE IF( LSAME( HOWMNY, 'B' ) .OR. LSAME( HOWMNY, 'T' ) ) THEN + IHWMNY = 3 + ILALL = .TRUE. + ILBACK = .TRUE. + ELSE + IHWMNY = -1 + END IF +* + IF( LSAME( SIDE, 'R' ) ) THEN + ISIDE = 1 + COMPL = .FALSE. + COMPR = .TRUE. + ELSE IF( LSAME( SIDE, 'L' ) ) THEN + ISIDE = 2 + COMPL = .TRUE. + COMPR = .FALSE. + ELSE IF( LSAME( SIDE, 'B' ) ) THEN + ISIDE = 3 + COMPL = .TRUE. + COMPR = .TRUE. + ELSE + ISIDE = -1 + END IF +* + INFO = 0 + IF( ISIDE.LT.0 ) THEN + INFO = -1 + ELSE IF( IHWMNY.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTGEVC', -INFO ) + RETURN + END IF +* +* Count the number of eigenvectors +* + IF( .NOT.ILALL ) THEN + IM = 0 + DO 10 J = 1, N + IF( SELECT( J ) ) + $ IM = IM + 1 + 10 CONTINUE + ELSE + IM = N + END IF +* +* Check diagonal of B +* + ILBBAD = .FALSE. + DO 20 J = 1, N + IF( AIMAG( B( J, J ) ).NE.ZERO ) + $ ILBBAD = .TRUE. + 20 CONTINUE +* + IF( ILBBAD ) THEN + INFO = -7 + ELSE IF( COMPL .AND. LDVL.LT.N .OR. LDVL.LT.1 ) THEN + INFO = -10 + ELSE IF( COMPR .AND. LDVR.LT.N .OR. LDVR.LT.1 ) THEN + INFO = -12 + ELSE IF( MM.LT.IM ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTGEVC', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + M = IM + IF( N.EQ.0 ) + $ RETURN +* +* Machine Constants +* + SAFMIN = SLAMCH( 'Safe minimum' ) + BIG = ONE / SAFMIN + CALL SLABAD( SAFMIN, BIG ) + ULP = SLAMCH( 'Epsilon' )*SLAMCH( 'Base' ) + SMALL = SAFMIN*N / ULP + BIG = ONE / SMALL + BIGNUM = ONE / ( SAFMIN*N ) +* +* Compute the 1-norm of each column of the strictly upper triangular +* part of A and B to check for possible overflow in the triangular +* solver. +* + ANORM = ABS1( A( 1, 1 ) ) + BNORM = ABS1( B( 1, 1 ) ) + RWORK( 1 ) = ZERO + RWORK( N+1 ) = ZERO + DO 40 J = 2, N + RWORK( J ) = ZERO + RWORK( N+J ) = ZERO + DO 30 I = 1, J - 1 + RWORK( J ) = RWORK( J ) + ABS1( A( I, J ) ) + RWORK( N+J ) = RWORK( N+J ) + ABS1( B( I, J ) ) + 30 CONTINUE + ANORM = MAX( ANORM, RWORK( J )+ABS1( A( J, J ) ) ) + BNORM = MAX( BNORM, RWORK( N+J )+ABS1( B( J, J ) ) ) + 40 CONTINUE +* + ASCALE = ONE / MAX( ANORM, SAFMIN ) + BSCALE = ONE / MAX( BNORM, SAFMIN ) +* +* Left eigenvectors +* + IF( COMPL ) THEN + IEIG = 0 +* +* Main loop over eigenvalues +* + DO 140 JE = 1, N + IF( ILALL ) THEN + ILCOMP = .TRUE. + ELSE + ILCOMP = SELECT( JE ) + END IF + IF( ILCOMP ) THEN + IEIG = IEIG + 1 +* + IF( ABS1( A( JE, JE ) ).LE.SAFMIN .AND. + $ ABS( REAL( B( JE, JE ) ) ).LE.SAFMIN ) THEN +* +* Singular matrix pencil -- return unit eigenvector +* + DO 50 JR = 1, N + VL( JR, IEIG ) = CZERO + 50 CONTINUE + VL( IEIG, IEIG ) = CONE + GO TO 140 + END IF +* +* Non-singular eigenvalue: +* Compute coefficients a and b in +* H +* y ( a A - b B ) = 0 +* + TEMP = ONE / MAX( ABS1( A( JE, JE ) )*ASCALE, + $ ABS( REAL( B( JE, JE ) ) )*BSCALE, SAFMIN ) + SALPHA = ( TEMP*A( JE, JE ) )*ASCALE + SBETA = ( TEMP*REAL( B( JE, JE ) ) )*BSCALE + ACOEFF = SBETA*ASCALE + BCOEFF = SALPHA*BSCALE +* +* Scale to avoid underflow +* + LSA = ABS( SBETA ).GE.SAFMIN .AND. ABS( ACOEFF ).LT.SMALL + LSB = ABS1( SALPHA ).GE.SAFMIN .AND. ABS1( BCOEFF ).LT. + $ SMALL +* + SCALE = ONE + IF( LSA ) + $ SCALE = ( SMALL / ABS( SBETA ) )*MIN( ANORM, BIG ) + IF( LSB ) + $ SCALE = MAX( SCALE, ( SMALL / ABS1( SALPHA ) )* + $ MIN( BNORM, BIG ) ) + IF( LSA .OR. LSB ) THEN + SCALE = MIN( SCALE, ONE / + $ ( SAFMIN*MAX( ONE, ABS( ACOEFF ), + $ ABS1( BCOEFF ) ) ) ) + IF( LSA ) THEN + ACOEFF = ASCALE*( SCALE*SBETA ) + ELSE + ACOEFF = SCALE*ACOEFF + END IF + IF( LSB ) THEN + BCOEFF = BSCALE*( SCALE*SALPHA ) + ELSE + BCOEFF = SCALE*BCOEFF + END IF + END IF +* + ACOEFA = ABS( ACOEFF ) + BCOEFA = ABS1( BCOEFF ) + XMAX = ONE + DO 60 JR = 1, N + WORK( JR ) = CZERO + 60 CONTINUE + WORK( JE ) = CONE + DMIN = MAX( ULP*ACOEFA*ANORM, ULP*BCOEFA*BNORM, SAFMIN ) +* +* H +* Triangular solve of (a A - b B) y = 0 +* +* H +* (rowwise in (a A - b B) , or columnwise in a A - b B) +* + DO 100 J = JE + 1, N +* +* Compute +* j-1 +* SUM = sum conjg( a*A(k,j) - b*B(k,j) )*x(k) +* k=je +* (Scale if necessary) +* + TEMP = ONE / XMAX + IF( ACOEFA*RWORK( J )+BCOEFA*RWORK( N+J ).GT.BIGNUM* + $ TEMP ) THEN + DO 70 JR = JE, J - 1 + WORK( JR ) = TEMP*WORK( JR ) + 70 CONTINUE + XMAX = ONE + END IF + SUMA = CZERO + SUMB = CZERO +* + DO 80 JR = JE, J - 1 + SUMA = SUMA + CONJG( A( JR, J ) )*WORK( JR ) + SUMB = SUMB + CONJG( B( JR, J ) )*WORK( JR ) + 80 CONTINUE + SUM = ACOEFF*SUMA - CONJG( BCOEFF )*SUMB +* +* Form x(j) = - SUM / conjg( a*A(j,j) - b*B(j,j) ) +* +* with scaling and perturbation of the denominator +* + D = CONJG( ACOEFF*A( J, J )-BCOEFF*B( J, J ) ) + IF( ABS1( D ).LE.DMIN ) + $ D = CMPLX( DMIN ) +* + IF( ABS1( D ).LT.ONE ) THEN + IF( ABS1( SUM ).GE.BIGNUM*ABS1( D ) ) THEN + TEMP = ONE / ABS1( SUM ) + DO 90 JR = JE, J - 1 + WORK( JR ) = TEMP*WORK( JR ) + 90 CONTINUE + XMAX = TEMP*XMAX + SUM = TEMP*SUM + END IF + END IF + WORK( J ) = CLADIV( -SUM, D ) + XMAX = MAX( XMAX, ABS1( WORK( J ) ) ) + 100 CONTINUE +* +* Back transform eigenvector if HOWMNY='B'. +* + IF( ILBACK ) THEN + CALL CGEMV( 'N', N, N+1-JE, CONE, VL( 1, JE ), LDVL, + $ WORK( JE ), 1, CZERO, WORK( N+1 ), 1 ) + ISRC = 2 + IBEG = 1 + ELSE + ISRC = 1 + IBEG = JE + END IF +* +* Copy and scale eigenvector into column of VL +* + XMAX = ZERO + DO 110 JR = IBEG, N + XMAX = MAX( XMAX, ABS1( WORK( ( ISRC-1 )*N+JR ) ) ) + 110 CONTINUE +* + IF( XMAX.GT.SAFMIN ) THEN + TEMP = ONE / XMAX + DO 120 JR = IBEG, N + VL( JR, IEIG ) = TEMP*WORK( ( ISRC-1 )*N+JR ) + 120 CONTINUE + ELSE + IBEG = N + 1 + END IF +* + DO 130 JR = 1, IBEG - 1 + VL( JR, IEIG ) = CZERO + 130 CONTINUE +* + END IF + 140 CONTINUE + END IF +* +* Right eigenvectors +* + IF( COMPR ) THEN + IEIG = IM + 1 +* +* Main loop over eigenvalues +* + DO 250 JE = N, 1, -1 + IF( ILALL ) THEN + ILCOMP = .TRUE. + ELSE + ILCOMP = SELECT( JE ) + END IF + IF( ILCOMP ) THEN + IEIG = IEIG - 1 +* + IF( ABS1( A( JE, JE ) ).LE.SAFMIN .AND. + $ ABS( REAL( B( JE, JE ) ) ).LE.SAFMIN ) THEN +* +* Singular matrix pencil -- return unit eigenvector +* + DO 150 JR = 1, N + VR( JR, IEIG ) = CZERO + 150 CONTINUE + VR( IEIG, IEIG ) = CONE + GO TO 250 + END IF +* +* Non-singular eigenvalue: +* Compute coefficients a and b in +* +* ( a A - b B ) x = 0 +* + TEMP = ONE / MAX( ABS1( A( JE, JE ) )*ASCALE, + $ ABS( REAL( B( JE, JE ) ) )*BSCALE, SAFMIN ) + SALPHA = ( TEMP*A( JE, JE ) )*ASCALE + SBETA = ( TEMP*REAL( B( JE, JE ) ) )*BSCALE + ACOEFF = SBETA*ASCALE + BCOEFF = SALPHA*BSCALE +* +* Scale to avoid underflow +* + LSA = ABS( SBETA ).GE.SAFMIN .AND. ABS( ACOEFF ).LT.SMALL + LSB = ABS1( SALPHA ).GE.SAFMIN .AND. ABS1( BCOEFF ).LT. + $ SMALL +* + SCALE = ONE + IF( LSA ) + $ SCALE = ( SMALL / ABS( SBETA ) )*MIN( ANORM, BIG ) + IF( LSB ) + $ SCALE = MAX( SCALE, ( SMALL / ABS1( SALPHA ) )* + $ MIN( BNORM, BIG ) ) + IF( LSA .OR. LSB ) THEN + SCALE = MIN( SCALE, ONE / + $ ( SAFMIN*MAX( ONE, ABS( ACOEFF ), + $ ABS1( BCOEFF ) ) ) ) + IF( LSA ) THEN + ACOEFF = ASCALE*( SCALE*SBETA ) + ELSE + ACOEFF = SCALE*ACOEFF + END IF + IF( LSB ) THEN + BCOEFF = BSCALE*( SCALE*SALPHA ) + ELSE + BCOEFF = SCALE*BCOEFF + END IF + END IF +* + ACOEFA = ABS( ACOEFF ) + BCOEFA = ABS1( BCOEFF ) + XMAX = ONE + DO 160 JR = 1, N + WORK( JR ) = CZERO + 160 CONTINUE + WORK( JE ) = CONE + DMIN = MAX( ULP*ACOEFA*ANORM, ULP*BCOEFA*BNORM, SAFMIN ) +* +* Triangular solve of (a A - b B) x = 0 (columnwise) +* +* WORK(1:j-1) contains sums w, +* WORK(j+1:JE) contains x +* + DO 170 JR = 1, JE - 1 + WORK( JR ) = ACOEFF*A( JR, JE ) - BCOEFF*B( JR, JE ) + 170 CONTINUE + WORK( JE ) = CONE +* + DO 210 J = JE - 1, 1, -1 +* +* Form x(j) := - w(j) / d +* with scaling and perturbation of the denominator +* + D = ACOEFF*A( J, J ) - BCOEFF*B( J, J ) + IF( ABS1( D ).LE.DMIN ) + $ D = CMPLX( DMIN ) +* + IF( ABS1( D ).LT.ONE ) THEN + IF( ABS1( WORK( J ) ).GE.BIGNUM*ABS1( D ) ) THEN + TEMP = ONE / ABS1( WORK( J ) ) + DO 180 JR = 1, JE + WORK( JR ) = TEMP*WORK( JR ) + 180 CONTINUE + END IF + END IF +* + WORK( J ) = CLADIV( -WORK( J ), D ) +* + IF( J.GT.1 ) THEN +* +* w = w + x(j)*(a A(*,j) - b B(*,j) ) with scaling +* + IF( ABS1( WORK( J ) ).GT.ONE ) THEN + TEMP = ONE / ABS1( WORK( J ) ) + IF( ACOEFA*RWORK( J )+BCOEFA*RWORK( N+J ).GE. + $ BIGNUM*TEMP ) THEN + DO 190 JR = 1, JE + WORK( JR ) = TEMP*WORK( JR ) + 190 CONTINUE + END IF + END IF +* + CA = ACOEFF*WORK( J ) + CB = BCOEFF*WORK( J ) + DO 200 JR = 1, J - 1 + WORK( JR ) = WORK( JR ) + CA*A( JR, J ) - + $ CB*B( JR, J ) + 200 CONTINUE + END IF + 210 CONTINUE +* +* Back transform eigenvector if HOWMNY='B'. +* + IF( ILBACK ) THEN + CALL CGEMV( 'N', N, JE, CONE, VR, LDVR, WORK, 1, + $ CZERO, WORK( N+1 ), 1 ) + ISRC = 2 + IEND = N + ELSE + ISRC = 1 + IEND = JE + END IF +* +* Copy and scale eigenvector into column of VR +* + XMAX = ZERO + DO 220 JR = 1, IEND + XMAX = MAX( XMAX, ABS1( WORK( ( ISRC-1 )*N+JR ) ) ) + 220 CONTINUE +* + IF( XMAX.GT.SAFMIN ) THEN + TEMP = ONE / XMAX + DO 230 JR = 1, IEND + VR( JR, IEIG ) = TEMP*WORK( ( ISRC-1 )*N+JR ) + 230 CONTINUE + ELSE + IEND = 0 + END IF +* + DO 240 JR = IEND + 1, N + VR( JR, IEIG ) = CZERO + 240 CONTINUE +* + END IF + 250 CONTINUE + END IF +* + RETURN +* +* End of CTGEVC +* + END diff --git a/costa/native/external/lapack/ctgex2.f b/costa/native/external/lapack/ctgex2.f new file mode 100644 index 000000000..ef3913a7d --- /dev/null +++ b/costa/native/external/lapack/ctgex2.f @@ -0,0 +1,264 @@ + SUBROUTINE CTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, J1, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + LOGICAL WANTQ, WANTZ + INTEGER INFO, J1, LDA, LDB, LDQ, LDZ, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* CTGEX2 swaps adjacent diagonal 1 by 1 blocks (A11,B11) and (A22,B22) +* in an upper triangular matrix pair (A, B) by an unitary equivalence +* transformation. +* +* (A, B) must be in generalized Schur canonical form, that is, A and +* B are both upper triangular. +* +* Optionally, the matrices Q and Z of generalized Schur vectors are +* updated. +* +* Q(in) * A(in) * Z(in)' = Q(out) * A(out) * Z(out)' +* Q(in) * B(in) * Z(in)' = Q(out) * B(out) * Z(out)' +* +* +* Arguments +* ========= +* +* WANTQ (input) LOGICAL +* .TRUE. : update the left transformation matrix Q; +* .FALSE.: do not update Q. +* +* WANTZ (input) LOGICAL +* .TRUE. : update the right transformation matrix Z; +* .FALSE.: do not update Z. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* A (input/output) COMPLEX arrays, dimensions (LDA,N) +* On entry, the matrix A in the pair (A, B). +* On exit, the updated matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) COMPLEX arrays, dimensions (LDB,N) +* On entry, the matrix B in the pair (A, B). +* On exit, the updated matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* Q (input/output) COMPLEX array, dimension (LDZ,N) +* If WANTQ = .TRUE, on entry, the unitary matrix Q. On exit, +* the updated matrix Q. +* Not referenced if WANTQ = .FALSE.. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= 1; +* If WANTQ = .TRUE., LDQ >= N. +* +* Z (input/output) COMPLEX array, dimension (LDZ,N) +* If WANTZ = .TRUE, on entry, the unitary matrix Z. On exit, +* the updated matrix Z. +* Not referenced if WANTZ = .FALSE.. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1; +* If WANTZ = .TRUE., LDZ >= N. +* +* J1 (input) INTEGER +* The index to the first block (A11, B11). +* +* INFO (output) INTEGER +* =0: Successful exit. +* =1: The transformed matrix pair (A, B) would be too far +* from generalized Schur form; the problem is ill- +* conditioned. (A, B) may have been partially reordered, +* and ILST points to the first row of the current +* position of the block being moved. +* +* +* Further Details +* =============== +* +* Based on contributions by +* Bo Kagstrom and Peter Poromaa, Department of Computing Science, +* Umea University, S-901 87 Umea, Sweden. +* +* In the current code both weak and strong stability tests are +* performed. The user can omit the strong stability test by changing +* the internal logical parameter WANDS to .FALSE.. See ref. [2] for +* details. +* +* [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the +* Generalized Real Schur Form of a Regular Matrix Pair (A, B), in +* M.S. Moonen et al (eds), Linear Algebra for Large Scale and +* Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. +* +* [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified +* Eigenvalues of a Regular Matrix Pair (A, B) and Condition +* Estimation: Theory, Algorithms and Software, Report UMINF-94.04, +* Department of Computing Science, Umea University, S-901 87 Umea, +* Sweden, 1994. Also as LAPACK Working Note 87. To appear in +* Numerical Algorithms, 1996. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) + REAL TEN + PARAMETER ( TEN = 10.0E+0 ) + INTEGER LDST + PARAMETER ( LDST = 2 ) + LOGICAL WANDS + PARAMETER ( WANDS = .TRUE. ) +* .. +* .. Local Scalars .. + LOGICAL STRONG, WEAK + INTEGER I, M + REAL CQ, CZ, EPS, SA, SB, SCALE, SMLNUM, SS, SUM, + $ THRESH, WS + COMPLEX CDUM, F, G, SQ, SZ +* .. +* .. Local Arrays .. + COMPLEX S( LDST, LDST ), T( LDST, LDST ), WORK( 8 ) +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CLACPY, CLARTG, CLASSQ, CROT +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CONJG, MAX, REAL, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* + M = LDST + WEAK = .FALSE. + STRONG = .FALSE. +* +* Make a local copy of selected block in (A, B) +* + CALL CLACPY( 'Full', M, M, A( J1, J1 ), LDA, S, LDST ) + CALL CLACPY( 'Full', M, M, B( J1, J1 ), LDB, T, LDST ) +* +* Compute the threshold for testing the acceptance of swapping. +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) / EPS + SCALE = REAL( CZERO ) + SUM = REAL( CONE ) + CALL CLACPY( 'Full', M, M, S, LDST, WORK, M ) + CALL CLACPY( 'Full', M, M, T, LDST, WORK( M*M+1 ), M ) + CALL CLASSQ( 2*M*M, WORK, 1, SCALE, SUM ) + SA = SCALE*SQRT( SUM ) + THRESH = MAX( TEN*EPS*SA, SMLNUM ) +* +* Compute unitary QL and RQ that swap 1-by-1 and 1-by-1 blocks +* using Givens rotations and perform the swap tentatively. +* + F = S( 2, 2 )*T( 1, 1 ) - T( 2, 2 )*S( 1, 1 ) + G = S( 2, 2 )*T( 1, 2 ) - T( 2, 2 )*S( 1, 2 ) + SA = ABS( S( 2, 2 ) ) + SB = ABS( T( 2, 2 ) ) + CALL CLARTG( G, F, CZ, SZ, CDUM ) + SZ = -SZ + CALL CROT( 2, S( 1, 1 ), 1, S( 1, 2 ), 1, CZ, CONJG( SZ ) ) + CALL CROT( 2, T( 1, 1 ), 1, T( 1, 2 ), 1, CZ, CONJG( SZ ) ) + IF( SA.GE.SB ) THEN + CALL CLARTG( S( 1, 1 ), S( 2, 1 ), CQ, SQ, CDUM ) + ELSE + CALL CLARTG( T( 1, 1 ), T( 2, 1 ), CQ, SQ, CDUM ) + END IF + CALL CROT( 2, S( 1, 1 ), LDST, S( 2, 1 ), LDST, CQ, SQ ) + CALL CROT( 2, T( 1, 1 ), LDST, T( 2, 1 ), LDST, CQ, SQ ) +* +* Weak stability test: |S21| + |T21| <= O(EPS F-norm((S, T))) +* + WS = ABS( S( 2, 1 ) ) + ABS( T( 2, 1 ) ) + WEAK = WS.LE.THRESH + IF( .NOT.WEAK ) + $ GO TO 20 +* + IF( WANDS ) THEN +* +* Strong stability test: +* F-norm((A-QL'*S*QR, B-QL'*T*QR)) <= O(EPS*F-norm((A, B))) +* + CALL CLACPY( 'Full', M, M, S, LDST, WORK, M ) + CALL CLACPY( 'Full', M, M, T, LDST, WORK( M*M+1 ), M ) + CALL CROT( 2, WORK, 1, WORK( 3 ), 1, CZ, -CONJG( SZ ) ) + CALL CROT( 2, WORK( 5 ), 1, WORK( 7 ), 1, CZ, -CONJG( SZ ) ) + CALL CROT( 2, WORK, 2, WORK( 2 ), 2, CQ, -SQ ) + CALL CROT( 2, WORK( 5 ), 2, WORK( 6 ), 2, CQ, -SQ ) + DO 10 I = 1, 2 + WORK( I ) = WORK( I ) - A( J1+I-1, J1 ) + WORK( I+2 ) = WORK( I+2 ) - A( J1+I-1, J1+1 ) + WORK( I+4 ) = WORK( I+4 ) - B( J1+I-1, J1 ) + WORK( I+6 ) = WORK( I+6 ) - B( J1+I-1, J1+1 ) + 10 CONTINUE + SCALE = REAL( CZERO ) + SUM = REAL( CONE ) + CALL CLASSQ( 2*M*M, WORK, 1, SCALE, SUM ) + SS = SCALE*SQRT( SUM ) + STRONG = SS.LE.THRESH + IF( .NOT.STRONG ) + $ GO TO 20 + END IF +* +* If the swap is accepted ("weakly" and "strongly"), apply the +* equivalence transformations to the original matrix pair (A,B) +* + CALL CROT( J1+1, A( 1, J1 ), 1, A( 1, J1+1 ), 1, CZ, CONJG( SZ ) ) + CALL CROT( J1+1, B( 1, J1 ), 1, B( 1, J1+1 ), 1, CZ, CONJG( SZ ) ) + CALL CROT( N-J1+1, A( J1, J1 ), LDA, A( J1+1, J1 ), LDA, CQ, SQ ) + CALL CROT( N-J1+1, B( J1, J1 ), LDB, B( J1+1, J1 ), LDB, CQ, SQ ) +* +* Set N1 by N2 (2,1) blocks to 0 +* + A( J1+1, J1 ) = CZERO + B( J1+1, J1 ) = CZERO +* +* Accumulate transformations into Q and Z if requested. +* + IF( WANTZ ) + $ CALL CROT( N, Z( 1, J1 ), 1, Z( 1, J1+1 ), 1, CZ, CONJG( SZ ) ) + IF( WANTQ ) + $ CALL CROT( N, Q( 1, J1 ), 1, Q( 1, J1+1 ), 1, CQ, CONJG( SQ ) ) +* +* Exit with INFO = 0 if swap was successfully performed. +* + RETURN +* +* Exit with INFO = 1 if swap was rejected. +* + 20 CONTINUE + INFO = 1 + RETURN +* +* End of CTGEX2 +* + END diff --git a/costa/native/external/lapack/ctgexc.f b/costa/native/external/lapack/ctgexc.f new file mode 100644 index 000000000..cbc02ba52 --- /dev/null +++ b/costa/native/external/lapack/ctgexc.f @@ -0,0 +1,207 @@ + SUBROUTINE CTGEXC( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, IFST, ILST, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + LOGICAL WANTQ, WANTZ + INTEGER IFST, ILST, INFO, LDA, LDB, LDQ, LDZ, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* CTGEXC reorders the generalized Schur decomposition of a complex +* matrix pair (A,B), using an unitary equivalence transformation +* (A, B) := Q * (A, B) * Z', so that the diagonal block of (A, B) with +* row index IFST is moved to row ILST. +* +* (A, B) must be in generalized Schur canonical form, that is, A and +* B are both upper triangular. +* +* Optionally, the matrices Q and Z of generalized Schur vectors are +* updated. +* +* Q(in) * A(in) * Z(in)' = Q(out) * A(out) * Z(out)' +* Q(in) * B(in) * Z(in)' = Q(out) * B(out) * Z(out)' +* +* Arguments +* ========= +* +* WANTQ (input) LOGICAL +* .TRUE. : update the left transformation matrix Q; +* .FALSE.: do not update Q. +* +* WANTZ (input) LOGICAL +* .TRUE. : update the right transformation matrix Z; +* .FALSE.: do not update Z. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the upper triangular matrix A in the pair (A, B). +* On exit, the updated matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) COMPLEX array, dimension (LDB,N) +* On entry, the upper triangular matrix B in the pair (A, B). +* On exit, the updated matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* Q (input/output) COMPLEX array, dimension (LDZ,N) +* On entry, if WANTQ = .TRUE., the unitary matrix Q. +* On exit, the updated matrix Q. +* If WANTQ = .FALSE., Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= 1; +* If WANTQ = .TRUE., LDQ >= N. +* +* Z (input/output) COMPLEX array, dimension (LDZ,N) +* On entry, if WANTZ = .TRUE., the unitary matrix Z. +* On exit, the updated matrix Z. +* If WANTZ = .FALSE., Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1; +* If WANTZ = .TRUE., LDZ >= N. +* +* IFST (input/output) INTEGER +* ILST (input/output) INTEGER +* Specify the reordering of the diagonal blocks of (A, B). +* The block with row index IFST is moved to row ILST, by a +* sequence of swapping between adjacent blocks. +* +* INFO (output) INTEGER +* =0: Successful exit. +* <0: if INFO = -i, the i-th argument had an illegal value. +* =1: The transformed matrix pair (A, B) would be too far +* from generalized Schur form; the problem is ill- +* conditioned. (A, B) may have been partially reordered, +* and ILST points to the first row of the current +* position of the block being moved. +* +* +* Further Details +* =============== +* +* Based on contributions by +* Bo Kagstrom and Peter Poromaa, Department of Computing Science, +* Umea University, S-901 87 Umea, Sweden. +* +* [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the +* Generalized Real Schur Form of a Regular Matrix Pair (A, B), in +* M.S. Moonen et al (eds), Linear Algebra for Large Scale and +* Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. +* +* [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified +* Eigenvalues of a Regular Matrix Pair (A, B) and Condition +* Estimation: Theory, Algorithms and Software, Report +* UMINF - 94.04, Department of Computing Science, Umea University, +* S-901 87 Umea, Sweden, 1994. Also as LAPACK Working Note 87. +* To appear in Numerical Algorithms, 1996. +* +* [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software +* for Solving the Generalized Sylvester Equation and Estimating the +* Separation between Regular Matrix Pairs, Report UMINF - 93.23, +* Department of Computing Science, Umea University, S-901 87 Umea, +* Sweden, December 1993, Revised April 1994, Also as LAPACK working +* Note 75. To appear in ACM Trans. on Math. Software, Vol 22, No 1, +* 1996. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER HERE +* .. +* .. External Subroutines .. + EXTERNAL CTGEX2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Decode and test input arguments. + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDQ.LT.1 .OR. WANTQ .AND. ( LDQ.LT.MAX( 1, N ) ) ) THEN + INFO = -9 + ELSE IF( LDZ.LT.1 .OR. WANTZ .AND. ( LDZ.LT.MAX( 1, N ) ) ) THEN + INFO = -11 + ELSE IF( IFST.LT.1 .OR. IFST.GT.N ) THEN + INFO = -12 + ELSE IF( ILST.LT.1 .OR. ILST.GT.N ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTGEXC', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN + IF( IFST.EQ.ILST ) + $ RETURN +* + IF( IFST.LT.ILST ) THEN +* + HERE = IFST +* + 10 CONTINUE +* +* Swap with next one below +* + CALL CTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, LDZ, + $ HERE, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE + 1 + IF( HERE.LT.ILST ) + $ GO TO 10 + HERE = HERE - 1 + ELSE + HERE = IFST - 1 +* + 20 CONTINUE +* +* Swap with next one above +* + CALL CTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, LDZ, + $ HERE, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE - 1 + IF( HERE.GE.ILST ) + $ GO TO 20 + HERE = HERE + 1 + END IF + ILST = HERE + RETURN +* +* End of CTGEXC +* + END diff --git a/costa/native/external/lapack/ctgsen.f b/costa/native/external/lapack/ctgsen.f new file mode 100644 index 000000000..7fe2cff0e --- /dev/null +++ b/costa/native/external/lapack/ctgsen.f @@ -0,0 +1,642 @@ + SUBROUTINE CTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, + $ ALPHA, BETA, Q, LDQ, Z, LDZ, M, PL, PR, DIF, + $ WORK, LWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + LOGICAL WANTQ, WANTZ + INTEGER IJOB, INFO, LDA, LDB, LDQ, LDZ, LIWORK, LWORK, + $ M, N + REAL PL, PR +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + INTEGER IWORK( * ) + REAL DIF( * ) + COMPLEX A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), Q( LDQ, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* CTGSEN reorders the generalized Schur decomposition of a complex +* matrix pair (A, B) (in terms of an unitary equivalence trans- +* formation Q' * (A, B) * Z), so that a selected cluster of eigenvalues +* appears in the leading diagonal blocks of the pair (A,B). The leading +* columns of Q and Z form unitary bases of the corresponding left and +* right eigenspaces (deflating subspaces). (A, B) must be in +* generalized Schur canonical form, that is, A and B are both upper +* triangular. +* +* CTGSEN also computes the generalized eigenvalues +* +* w(j)= ALPHA(j) / BETA(j) +* +* of the reordered matrix pair (A, B). +* +* Optionally, the routine computes estimates of reciprocal condition +* numbers for eigenvalues and eigenspaces. These are Difu[(A11,B11), +* (A22,B22)] and Difl[(A11,B11), (A22,B22)], i.e. the separation(s) +* between the matrix pairs (A11, B11) and (A22,B22) that correspond to +* the selected cluster and the eigenvalues outside the cluster, resp., +* and norms of "projections" onto left and right eigenspaces w.r.t. +* the selected cluster in the (1,1)-block. +* +* +* Arguments +* ========= +* +* IJOB (input) integer +* Specifies whether condition numbers are required for the +* cluster of eigenvalues (PL and PR) or the deflating subspaces +* (Difu and Difl): +* =0: Only reorder w.r.t. SELECT. No extras. +* =1: Reciprocal of norms of "projections" onto left and right +* eigenspaces w.r.t. the selected cluster (PL and PR). +* =2: Upper bounds on Difu and Difl. F-norm-based estimate +* (DIF(1:2)). +* =3: Estimate of Difu and Difl. 1-norm-based estimate +* (DIF(1:2)). +* About 5 times as expensive as IJOB = 2. +* =4: Compute PL, PR and DIF (i.e. 0, 1 and 2 above): Economic +* version to get it all. +* =5: Compute PL, PR and DIF (i.e. 0, 1 and 3 above) +* +* WANTQ (input) LOGICAL +* .TRUE. : update the left transformation matrix Q; +* .FALSE.: do not update Q. +* +* WANTZ (input) LOGICAL +* .TRUE. : update the right transformation matrix Z; +* .FALSE.: do not update Z. +* +* SELECT (input) LOGICAL array, dimension (N) +* SELECT specifies the eigenvalues in the selected cluster. To +* select an eigenvalue w(j), SELECT(j) must be set to +* .TRUE.. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* A (input/output) COMPLEX array, dimension(LDA,N) +* On entry, the upper triangular matrix A, in generalized +* Schur canonical form. +* On exit, A is overwritten by the reordered matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) COMPLEX array, dimension(LDB,N) +* On entry, the upper triangular matrix B, in generalized +* Schur canonical form. +* On exit, B is overwritten by the reordered matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* ALPHA (output) COMPLEX array, dimension (N) +* BETA (output) COMPLEX array, dimension (N) +* The diagonal elements of A and B, respectively, +* when the pair (A,B) has been reduced to generalized Schur +* form. ALPHA(i)/BETA(i) i=1,...,N are the generalized +* eigenvalues. +* +* Q (input/output) COMPLEX array, dimension (LDQ,N) +* On entry, if WANTQ = .TRUE., Q is an N-by-N matrix. +* On exit, Q has been postmultiplied by the left unitary +* transformation matrix which reorder (A, B); The leading M +* columns of Q form orthonormal bases for the specified pair of +* left eigenspaces (deflating subspaces). +* If WANTQ = .FALSE., Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= 1. +* If WANTQ = .TRUE., LDQ >= N. +* +* Z (input/output) COMPLEX array, dimension (LDZ,N) +* On entry, if WANTZ = .TRUE., Z is an N-by-N matrix. +* On exit, Z has been postmultiplied by the left unitary +* transformation matrix which reorder (A, B); The leading M +* columns of Z form orthonormal bases for the specified pair of +* left eigenspaces (deflating subspaces). +* If WANTZ = .FALSE., Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1. +* If WANTZ = .TRUE., LDZ >= N. +* +* M (output) INTEGER +* The dimension of the specified pair of left and right +* eigenspaces, (deflating subspaces) 0 <= M <= N. +* +* PL, PR (output) REAL +* If IJOB = 1, 4 or 5, PL, PR are lower bounds on the +* reciprocal of the norm of "projections" onto left and right +* eigenspace with respect to the selected cluster. +* 0 < PL, PR <= 1. +* If M = 0 or M = N, PL = PR = 1. +* If IJOB = 0, 2 or 3 PL, PR are not referenced. +* +* DIF (output) REAL array, dimension (2). +* If IJOB >= 2, DIF(1:2) store the estimates of Difu and Difl. +* If IJOB = 2 or 4, DIF(1:2) are F-norm-based upper bounds on +* Difu and Difl. If IJOB = 3 or 5, DIF(1:2) are 1-norm-based +* estimates of Difu and Difl, computed using reversed +* communication with CLACON. +* If M = 0 or N, DIF(1:2) = F-norm([A, B]). +* If IJOB = 0 or 1, DIF is not referenced. +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* IF IJOB = 0, WORK is not referenced. Otherwise, +* on exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= 1 +* If IJOB = 1, 2 or 4, LWORK >= 2*M*(N-M) +* If IJOB = 3 or 5, LWORK >= 4*M*(N-M) +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER, dimension (LIWORK) +* IF IJOB = 0, IWORK is not referenced. Otherwise, +* on exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array IWORK. LIWORK >= 1. +* If IJOB = 1, 2 or 4, LIWORK >= N+2; +* If IJOB = 3 or 5, LIWORK >= MAX(N+2, 2*M*(N-M)); +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* =0: Successful exit. +* <0: If INFO = -i, the i-th argument had an illegal value. +* =1: Reordering of (A, B) failed because the transformed +* matrix pair (A, B) would be too far from generalized +* Schur form; the problem is very ill-conditioned. +* (A, B) may have been partially reordered. +* If requested, 0 is returned in DIF(*), PL and PR. +* +* +* Further Details +* =============== +* +* CTGSEN first collects the selected eigenvalues by computing unitary +* U and W that move them to the top left corner of (A, B). In other +* words, the selected eigenvalues are the eigenvalues of (A11, B11) in +* +* U'*(A, B)*W = (A11 A12) (B11 B12) n1 +* ( 0 A22),( 0 B22) n2 +* n1 n2 n1 n2 +* +* where N = n1+n2 and U' means the conjugate transpose of U. The first +* n1 columns of U and W span the specified pair of left and right +* eigenspaces (deflating subspaces) of (A, B). +* +* If (A, B) has been obtained from the generalized real Schur +* decomposition of a matrix pair (C, D) = Q*(A, B)*Z', then the +* reordered generalized Schur form of (C, D) is given by +* +* (C, D) = (Q*U)*(U'*(A, B)*W)*(Z*W)', +* +* and the first n1 columns of Q*U and Z*W span the corresponding +* deflating subspaces of (C, D) (Q and Z store Q*U and Z*W, resp.). +* +* Note that if the selected eigenvalue is sufficiently ill-conditioned, +* then its value may differ significantly from its value before +* reordering. +* +* The reciprocal condition numbers of the left and right eigenspaces +* spanned by the first n1 columns of U and W (or Q*U and Z*W) may +* be returned in DIF(1:2), corresponding to Difu and Difl, resp. +* +* The Difu and Difl are defined as: +* +* Difu[(A11, B11), (A22, B22)] = sigma-min( Zu ) +* and +* Difl[(A11, B11), (A22, B22)] = Difu[(A22, B22), (A11, B11)], +* +* where sigma-min(Zu) is the smallest singular value of the +* (2*n1*n2)-by-(2*n1*n2) matrix +* +* Zu = [ kron(In2, A11) -kron(A22', In1) ] +* [ kron(In2, B11) -kron(B22', In1) ]. +* +* Here, Inx is the identity matrix of size nx and A22' is the +* transpose of A22. kron(X, Y) is the Kronecker product between +* the matrices X and Y. +* +* When DIF(2) is small, small changes in (A, B) can cause large changes +* in the deflating subspace. An approximate (asymptotic) bound on the +* maximum angular error in the computed deflating subspaces is +* +* EPS * norm((A, B)) / DIF(2), +* +* where EPS is the machine precision. +* +* The reciprocal norm of the projectors on the left and right +* eigenspaces associated with (A11, B11) may be returned in PL and PR. +* They are computed as follows. First we compute L and R so that +* P*(A, B)*Q is block diagonal, where +* +* P = ( I -L ) n1 Q = ( I R ) n1 +* ( 0 I ) n2 and ( 0 I ) n2 +* n1 n2 n1 n2 +* +* and (L, R) is the solution to the generalized Sylvester equation +* +* A11*R - L*A22 = -A12 +* B11*R - L*B22 = -B12 +* +* Then PL = (F-norm(L)**2+1)**(-1/2) and PR = (F-norm(R)**2+1)**(-1/2). +* An approximate (asymptotic) bound on the average absolute error of +* the selected eigenvalues is +* +* EPS * norm((A, B)) / PL. +* +* There are also global error bounds which valid for perturbations up +* to a certain restriction: A lower bound (x) on the smallest +* F-norm(E,F) for which an eigenvalue of (A11, B11) may move and +* coalesce with an eigenvalue of (A22, B22) under perturbation (E,F), +* (i.e. (A + E, B + F), is +* +* x = min(Difu,Difl)/((1/(PL*PL)+1/(PR*PR))**(1/2)+2*max(1/PL,1/PR)). +* +* An approximate bound on x can be computed from DIF(1:2), PL and PR. +* +* If y = ( F-norm(E,F) / x) <= 1, the angles between the perturbed +* (L', R') and unperturbed (L, R) left and right deflating subspaces +* associated with the selected cluster in the (1,1)-blocks can be +* bounded as +* +* max-angle(L, L') <= arctan( y * PL / (1 - y * (1 - PL * PL)**(1/2)) +* max-angle(R, R') <= arctan( y * PR / (1 - y * (1 - PR * PR)**(1/2)) +* +* See LAPACK User's Guide section 4.11 or the following references +* for more information. +* +* Note that if the default method for computing the Frobenius-norm- +* based estimate DIF is not wanted (see CLATDF), then the parameter +* IDIFJB (see below) should be changed from 3 to 4 (routine CLATDF +* (IJOB = 2 will be used)). See CTGSYL for more details. +* +* Based on contributions by +* Bo Kagstrom and Peter Poromaa, Department of Computing Science, +* Umea University, S-901 87 Umea, Sweden. +* +* References +* ========== +* +* [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the +* Generalized Real Schur Form of a Regular Matrix Pair (A, B), in +* M.S. Moonen et al (eds), Linear Algebra for Large Scale and +* Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. +* +* [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified +* Eigenvalues of a Regular Matrix Pair (A, B) and Condition +* Estimation: Theory, Algorithms and Software, Report +* UMINF - 94.04, Department of Computing Science, Umea University, +* S-901 87 Umea, Sweden, 1994. Also as LAPACK Working Note 87. +* To appear in Numerical Algorithms, 1996. +* +* [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software +* for Solving the Generalized Sylvester Equation and Estimating the +* Separation between Regular Matrix Pairs, Report UMINF - 93.23, +* Department of Computing Science, Umea University, S-901 87 Umea, +* Sweden, December 1993, Revised April 1994, Also as LAPACK working +* Note 75. To appear in ACM Trans. on Math. Software, Vol 22, No 1, +* 1996. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER IDIFJB + PARAMETER ( IDIFJB = 3 ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, SWAP, WANTD, WANTD1, WANTD2, WANTP + INTEGER I, IERR, IJB, K, KASE, KS, LIWMIN, LWMIN, MN2, + $ N1, N2 + REAL DSCALE, DSUM, RDSCAL, SAFMIN +* .. +* .. External Subroutines .. + REAL SLAMCH + EXTERNAL CLACON, CLACPY, CLASSQ, CSCAL, CTGEXC, CTGSYL, + $ SLAMCH, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CMPLX, CONJG, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + IF( IJOB.LT.0 .OR. IJOB.GT.5 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -13 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -15 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTGSEN', -INFO ) + RETURN + END IF +* + IERR = 0 +* + WANTP = IJOB.EQ.1 .OR. IJOB.GE.4 + WANTD1 = IJOB.EQ.2 .OR. IJOB.EQ.4 + WANTD2 = IJOB.EQ.3 .OR. IJOB.EQ.5 + WANTD = WANTD1 .OR. WANTD2 +* +* Set M to the dimension of the specified pair of deflating +* subspaces. +* + M = 0 + DO 10 K = 1, N + ALPHA( K ) = A( K, K ) + BETA( K ) = B( K, K ) + IF( K.LT.N ) THEN + IF( SELECT( K ) ) + $ M = M + 1 + ELSE + IF( SELECT( N ) ) + $ M = M + 1 + END IF + 10 CONTINUE +* + IF( IJOB.EQ.1 .OR. IJOB.EQ.2 .OR. IJOB.EQ.4 ) THEN + LWMIN = MAX( 1, 2*M*(N-M) ) + LIWMIN = MAX( 1, N+2 ) + ELSE IF( IJOB.EQ.3 .OR. IJOB.EQ.5 ) THEN + LWMIN = MAX( 1, 4*M*(N-M) ) + LIWMIN = MAX( 1, 2*M*(N-M), N+2 ) + ELSE + LWMIN = 1 + LIWMIN = 1 + END IF +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -21 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -23 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTGSEN', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible. +* + IF( M.EQ.N .OR. M.EQ.0 ) THEN + IF( WANTP ) THEN + PL = ONE + PR = ONE + END IF + IF( WANTD ) THEN + DSCALE = ZERO + DSUM = ONE + DO 20 I = 1, N + CALL CLASSQ( N, A( 1, I ), 1, DSCALE, DSUM ) + CALL CLASSQ( N, B( 1, I ), 1, DSCALE, DSUM ) + 20 CONTINUE + DIF( 1 ) = DSCALE*SQRT( DSUM ) + DIF( 2 ) = DIF( 1 ) + END IF + GO TO 70 + END IF +* +* Get machine constant +* + SAFMIN = SLAMCH( 'S' ) +* +* Collect the selected blocks at the top-left corner of (A, B). +* + KS = 0 + DO 30 K = 1, N + SWAP = SELECT( K ) + IF( SWAP ) THEN + KS = KS + 1 +* +* Swap the K-th block to position KS. Compute unitary Q +* and Z that will swap adjacent diagonal blocks in (A, B). +* + IF( K.NE.KS ) + $ CALL CTGEXC( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, K, KS, IERR ) +* + IF( IERR.GT.0 ) THEN +* +* Swap is rejected: exit. +* + INFO = 1 + IF( WANTP ) THEN + PL = ZERO + PR = ZERO + END IF + IF( WANTD ) THEN + DIF( 1 ) = ZERO + DIF( 2 ) = ZERO + END IF + GO TO 70 + END IF + END IF + 30 CONTINUE + IF( WANTP ) THEN +* +* Solve generalized Sylvester equation for R and L: +* A11 * R - L * A22 = A12 +* B11 * R - L * B22 = B12 +* + N1 = M + N2 = N - M + I = N1 + 1 + CALL CLACPY( 'Full', N1, N2, A( 1, I ), LDA, WORK, N1 ) + CALL CLACPY( 'Full', N1, N2, B( 1, I ), LDB, WORK( N1*N2+1 ), + $ N1 ) + IJB = 0 + CALL CTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, WORK, + $ N1, B, LDB, B( I, I ), LDB, WORK( N1*N2+1 ), N1, + $ DSCALE, DIF( 1 ), WORK( N1*N2*2+1 ), + $ LWORK-2*N1*N2, IWORK, IERR ) +* +* Estimate the reciprocal of norms of "projections" onto +* left and right eigenspaces +* + RDSCAL = ZERO + DSUM = ONE + CALL CLASSQ( N1*N2, WORK, 1, RDSCAL, DSUM ) + PL = RDSCAL*SQRT( DSUM ) + IF( PL.EQ.ZERO ) THEN + PL = ONE + ELSE + PL = DSCALE / ( SQRT( DSCALE*DSCALE / PL+PL )*SQRT( PL ) ) + END IF + RDSCAL = ZERO + DSUM = ONE + CALL CLASSQ( N1*N2, WORK( N1*N2+1 ), 1, RDSCAL, DSUM ) + PR = RDSCAL*SQRT( DSUM ) + IF( PR.EQ.ZERO ) THEN + PR = ONE + ELSE + PR = DSCALE / ( SQRT( DSCALE*DSCALE / PR+PR )*SQRT( PR ) ) + END IF + END IF + IF( WANTD ) THEN +* +* Compute estimates Difu and Difl. +* + IF( WANTD1 ) THEN + N1 = M + N2 = N - M + I = N1 + 1 + IJB = IDIFJB +* +* Frobenius norm-based Difu estimate. +* + CALL CTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, WORK, + $ N1, B, LDB, B( I, I ), LDB, WORK( N1*N2+1 ), + $ N1, DSCALE, DIF( 1 ), WORK( N1*N2*2+1 ), + $ LWORK-2*N1*N2, IWORK, IERR ) +* +* Frobenius norm-based Difl estimate. +* + CALL CTGSYL( 'N', IJB, N2, N1, A( I, I ), LDA, A, LDA, WORK, + $ N2, B( I, I ), LDB, B, LDB, WORK( N1*N2+1 ), + $ N2, DSCALE, DIF( 2 ), WORK( N1*N2*2+1 ), + $ LWORK-2*N1*N2, IWORK, IERR ) + ELSE +* +* Compute 1-norm-based estimates of Difu and Difl using +* reversed communication with CLACON. In each step a +* generalized Sylvester equation or a transposed variant +* is solved. +* + KASE = 0 + N1 = M + N2 = N - M + I = N1 + 1 + IJB = 0 + MN2 = 2*N1*N2 +* +* 1-norm-based estimate of Difu. +* + 40 CONTINUE + CALL CLACON( MN2, WORK( MN2+1 ), WORK, DIF( 1 ), KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Solve generalized Sylvester equation +* + CALL CTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, + $ WORK, N1, B, LDB, B( I, I ), LDB, + $ WORK( N1*N2+1 ), N1, DSCALE, DIF( 1 ), + $ WORK( N1*N2*2+1 ), LWORK-2*N1*N2, IWORK, + $ IERR ) + ELSE +* +* Solve the transposed variant. +* + CALL CTGSYL( 'C', IJB, N1, N2, A, LDA, A( I, I ), LDA, + $ WORK, N1, B, LDB, B( I, I ), LDB, + $ WORK( N1*N2+1 ), N1, DSCALE, DIF( 1 ), + $ WORK( N1*N2*2+1 ), LWORK-2*N1*N2, IWORK, + $ IERR ) + END IF + GO TO 40 + END IF + DIF( 1 ) = DSCALE / DIF( 1 ) +* +* 1-norm-based estimate of Difl. +* + 50 CONTINUE + CALL CLACON( MN2, WORK( MN2+1 ), WORK, DIF( 2 ), KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Solve generalized Sylvester equation +* + CALL CTGSYL( 'N', IJB, N2, N1, A( I, I ), LDA, A, LDA, + $ WORK, N2, B( I, I ), LDB, B, LDB, + $ WORK( N1*N2+1 ), N2, DSCALE, DIF( 2 ), + $ WORK( N1*N2*2+1 ), LWORK-2*N1*N2, IWORK, + $ IERR ) + ELSE +* +* Solve the transposed variant. +* + CALL CTGSYL( 'C', IJB, N2, N1, A( I, I ), LDA, A, LDA, + $ WORK, N2, B, LDB, B( I, I ), LDB, + $ WORK( N1*N2+1 ), N2, DSCALE, DIF( 2 ), + $ WORK( N1*N2*2+1 ), LWORK-2*N1*N2, IWORK, + $ IERR ) + END IF + GO TO 50 + END IF + DIF( 2 ) = DSCALE / DIF( 2 ) + END IF + END IF +* +* If B(K,K) is complex, make it real and positive (normalization +* of the generalized Schur form) and Store the generalized +* eigenvalues of reordered pair (A, B) +* + DO 60 K = 1, N + DSCALE = ABS( B( K, K ) ) + IF( DSCALE.GT.SAFMIN ) THEN + WORK( 1 ) = CONJG( B( K, K ) / DSCALE ) + WORK( 2 ) = B( K, K ) / DSCALE + B( K, K ) = DSCALE + CALL CSCAL( N-K, WORK( 1 ), B( K, K+1 ), LDB ) + CALL CSCAL( N-K+1, WORK( 1 ), A( K, K ), LDA ) + IF( WANTQ ) + $ CALL CSCAL( N, WORK( 2 ), Q( 1, K ), 1 ) + ELSE + B( K, K ) = CMPLX( ZERO, ZERO ) + END IF +* + ALPHA( K ) = A( K, K ) + BETA( K ) = B( K, K ) +* + 60 CONTINUE +* + 70 CONTINUE +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of CTGSEN +* + END diff --git a/costa/native/external/lapack/ctgsja.f b/costa/native/external/lapack/ctgsja.f new file mode 100644 index 000000000..e9a54241c --- /dev/null +++ b/costa/native/external/lapack/ctgsja.f @@ -0,0 +1,526 @@ + SUBROUTINE CTGSJA( JOBU, JOBV, JOBQ, M, P, N, K, L, A, LDA, B, + $ LDB, TOLA, TOLB, ALPHA, BETA, U, LDU, V, LDV, + $ Q, LDQ, WORK, NCYCLE, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBQ, JOBU, JOBV + INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, + $ NCYCLE, P + REAL TOLA, TOLB +* .. +* .. Array Arguments .. + REAL ALPHA( * ), BETA( * ) + COMPLEX A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ U( LDU, * ), V( LDV, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CTGSJA computes the generalized singular value decomposition (GSVD) +* of two complex upper triangular (or trapezoidal) matrices A and B. +* +* On entry, it is assumed that matrices A and B have the following +* forms, which may be obtained by the preprocessing subroutine CGGSVP +* from a general M-by-N matrix A and P-by-N matrix B: +* +* N-K-L K L +* A = K ( 0 A12 A13 ) if M-K-L >= 0; +* L ( 0 0 A23 ) +* M-K-L ( 0 0 0 ) +* +* N-K-L K L +* A = K ( 0 A12 A13 ) if M-K-L < 0; +* M-K ( 0 0 A23 ) +* +* N-K-L K L +* B = L ( 0 0 B13 ) +* P-L ( 0 0 0 ) +* +* where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular +* upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, +* otherwise A23 is (M-K)-by-L upper trapezoidal. +* +* On exit, +* +* U'*A*Q = D1*( 0 R ), V'*B*Q = D2*( 0 R ), +* +* where U, V and Q are unitary matrices, Z' denotes the conjugate +* transpose of Z, R is a nonsingular upper triangular matrix, and D1 +* and D2 are ``diagonal'' matrices, which are of the following +* structures: +* +* If M-K-L >= 0, +* +* K L +* D1 = K ( I 0 ) +* L ( 0 C ) +* M-K-L ( 0 0 ) +* +* K L +* D2 = L ( 0 S ) +* P-L ( 0 0 ) +* +* N-K-L K L +* ( 0 R ) = K ( 0 R11 R12 ) K +* L ( 0 0 R22 ) L +* +* where +* +* C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), +* S = diag( BETA(K+1), ... , BETA(K+L) ), +* C**2 + S**2 = I. +* +* R is stored in A(1:K+L,N-K-L+1:N) on exit. +* +* If M-K-L < 0, +* +* K M-K K+L-M +* D1 = K ( I 0 0 ) +* M-K ( 0 C 0 ) +* +* K M-K K+L-M +* D2 = M-K ( 0 S 0 ) +* K+L-M ( 0 0 I ) +* P-L ( 0 0 0 ) +* +* N-K-L K M-K K+L-M +* ( 0 R ) = K ( 0 R11 R12 R13 ) +* M-K ( 0 0 R22 R23 ) +* K+L-M ( 0 0 0 R33 ) +* +* where +* C = diag( ALPHA(K+1), ... , ALPHA(M) ), +* S = diag( BETA(K+1), ... , BETA(M) ), +* C**2 + S**2 = I. +* +* R = ( R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N) and R33 is stored +* ( 0 R22 R23 ) +* in B(M-K+1:L,N+M-K-L+1:N) on exit. +* +* The computation of the unitary transformation matrices U, V or Q +* is optional. These matrices may either be formed explicitly, or they +* may be postmultiplied into input matrices U1, V1, or Q1. +* +* Arguments +* ========= +* +* JOBU (input) CHARACTER*1 +* = 'U': U must contain a unitary matrix U1 on entry, and +* the product U1*U is returned; +* = 'I': U is initialized to the unit matrix, and the +* unitary matrix U is returned; +* = 'N': U is not computed. +* +* JOBV (input) CHARACTER*1 +* = 'V': V must contain a unitary matrix V1 on entry, and +* the product V1*V is returned; +* = 'I': V is initialized to the unit matrix, and the +* unitary matrix V is returned; +* = 'N': V is not computed. +* +* JOBQ (input) CHARACTER*1 +* = 'Q': Q must contain a unitary matrix Q1 on entry, and +* the product Q1*Q is returned; +* = 'I': Q is initialized to the unit matrix, and the +* unitary matrix Q is returned; +* = 'N': Q is not computed. +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* P (input) INTEGER +* The number of rows of the matrix B. P >= 0. +* +* N (input) INTEGER +* The number of columns of the matrices A and B. N >= 0. +* +* K (input) INTEGER +* L (input) INTEGER +* K and L specify the subblocks in the input matrices A and B: +* A23 = A(K+1:MIN(K+L,M),N-L+1:N) and B13 = B(1:L,,N-L+1:N) +* of A and B, whose GSVD is going to be computed by CTGSJA. +* See Further details. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, A(N-K+1:N,1:MIN(K+L,M) ) contains the triangular +* matrix R or part of R. See Purpose for details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) COMPLEX array, dimension (LDB,N) +* On entry, the P-by-N matrix B. +* On exit, if necessary, B(M-K+1:L,N+M-K-L+1:N) contains +* a part of R. See Purpose for details. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,P). +* +* TOLA (input) REAL +* TOLB (input) REAL +* TOLA and TOLB are the convergence criteria for the Jacobi- +* Kogbetliantz iteration procedure. Generally, they are the +* same as used in the preprocessing step, say +* TOLA = MAX(M,N)*norm(A)*MACHEPS, +* TOLB = MAX(P,N)*norm(B)*MACHEPS. +* +* ALPHA (output) REAL array, dimension (N) +* BETA (output) REAL array, dimension (N) +* On exit, ALPHA and BETA contain the generalized singular +* value pairs of A and B; +* ALPHA(1:K) = 1, +* BETA(1:K) = 0, +* and if M-K-L >= 0, +* ALPHA(K+1:K+L) = diag(C), +* BETA(K+1:K+L) = diag(S), +* or if M-K-L < 0, +* ALPHA(K+1:M)= C, ALPHA(M+1:K+L)= 0 +* BETA(K+1:M) = S, BETA(M+1:K+L) = 1. +* Furthermore, if K+L < N, +* ALPHA(K+L+1:N) = 0 +* BETA(K+L+1:N) = 0. +* +* U (input/output) COMPLEX array, dimension (LDU,M) +* On entry, if JOBU = 'U', U must contain a matrix U1 (usually +* the unitary matrix returned by CGGSVP). +* On exit, +* if JOBU = 'I', U contains the unitary matrix U; +* if JOBU = 'U', U contains the product U1*U. +* If JOBU = 'N', U is not referenced. +* +* LDU (input) INTEGER +* The leading dimension of the array U. LDU >= max(1,M) if +* JOBU = 'U'; LDU >= 1 otherwise. +* +* V (input/output) COMPLEX array, dimension (LDV,P) +* On entry, if JOBV = 'V', V must contain a matrix V1 (usually +* the unitary matrix returned by CGGSVP). +* On exit, +* if JOBV = 'I', V contains the unitary matrix V; +* if JOBV = 'V', V contains the product V1*V. +* If JOBV = 'N', V is not referenced. +* +* LDV (input) INTEGER +* The leading dimension of the array V. LDV >= max(1,P) if +* JOBV = 'V'; LDV >= 1 otherwise. +* +* Q (input/output) COMPLEX array, dimension (LDQ,N) +* On entry, if JOBQ = 'Q', Q must contain a matrix Q1 (usually +* the unitary matrix returned by CGGSVP). +* On exit, +* if JOBQ = 'I', Q contains the unitary matrix Q; +* if JOBQ = 'Q', Q contains the product Q1*Q. +* If JOBQ = 'N', Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= max(1,N) if +* JOBQ = 'Q'; LDQ >= 1 otherwise. +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* NCYCLE (output) INTEGER +* The number of cycles required for convergence. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* = 1: the procedure does not converge after MAXIT cycles. +* +* Internal Parameters +* =================== +* +* MAXIT INTEGER +* MAXIT specifies the total loops that the iterative procedure +* may take. If after MAXIT cycles, the routine fails to +* converge, we return INFO = 1. +* +* Further Details +* =============== +* +* CTGSJA essentially uses a variant of Kogbetliantz algorithm to reduce +* min(L,M-K)-by-L triangular (or trapezoidal) matrix A23 and L-by-L +* matrix B13 to the form: +* +* U1'*A13*Q1 = C1*R1; V1'*B13*Q1 = S1*R1, +* +* where U1, V1 and Q1 are unitary matrix, and Z' is the conjugate +* transpose of Z. C1 and S1 are diagonal matrices satisfying +* +* C1**2 + S1**2 = I, +* +* and R1 is an L-by-L nonsingular upper triangular matrix. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER MAXIT + PARAMETER ( MAXIT = 40 ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. +* + LOGICAL INITQ, INITU, INITV, UPPER, WANTQ, WANTU, WANTV + INTEGER I, J, KCYCLE + REAL A1, A3, B1, B3, CSQ, CSU, CSV, ERROR, GAMMA, + $ RWK, SSMIN + COMPLEX A2, B2, SNQ, SNU, SNV +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CLAGS2, CLAPLL, CLASET, CROT, CSSCAL, + $ SLARTG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CONJG, MAX, MIN, REAL +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + INITU = LSAME( JOBU, 'I' ) + WANTU = INITU .OR. LSAME( JOBU, 'U' ) +* + INITV = LSAME( JOBV, 'I' ) + WANTV = INITV .OR. LSAME( JOBV, 'V' ) +* + INITQ = LSAME( JOBQ, 'I' ) + WANTQ = INITQ .OR. LSAME( JOBQ, 'Q' ) +* + INFO = 0 + IF( .NOT.( INITU .OR. WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( INITV .OR. WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( INITQ .OR. WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( P.LT.0 ) THEN + INFO = -5 + ELSE IF( N.LT.0 ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -12 + ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN + INFO = -18 + ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN + INFO = -20 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -22 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTGSJA', -INFO ) + RETURN + END IF +* +* Initialize U, V and Q, if necessary +* + IF( INITU ) + $ CALL CLASET( 'Full', M, M, CZERO, CONE, U, LDU ) + IF( INITV ) + $ CALL CLASET( 'Full', P, P, CZERO, CONE, V, LDV ) + IF( INITQ ) + $ CALL CLASET( 'Full', N, N, CZERO, CONE, Q, LDQ ) +* +* Loop until convergence +* + UPPER = .FALSE. + DO 40 KCYCLE = 1, MAXIT +* + UPPER = .NOT.UPPER +* + DO 20 I = 1, L - 1 + DO 10 J = I + 1, L +* + A1 = ZERO + A2 = CZERO + A3 = ZERO + IF( K+I.LE.M ) + $ A1 = REAL( A( K+I, N-L+I ) ) + IF( K+J.LE.M ) + $ A3 = REAL( A( K+J, N-L+J ) ) +* + B1 = REAL( B( I, N-L+I ) ) + B3 = REAL( B( J, N-L+J ) ) +* + IF( UPPER ) THEN + IF( K+I.LE.M ) + $ A2 = A( K+I, N-L+J ) + B2 = B( I, N-L+J ) + ELSE + IF( K+J.LE.M ) + $ A2 = A( K+J, N-L+I ) + B2 = B( J, N-L+I ) + END IF +* + CALL CLAGS2( UPPER, A1, A2, A3, B1, B2, B3, CSU, SNU, + $ CSV, SNV, CSQ, SNQ ) +* +* Update (K+I)-th and (K+J)-th rows of matrix A: U'*A +* + IF( K+J.LE.M ) + $ CALL CROT( L, A( K+J, N-L+1 ), LDA, A( K+I, N-L+1 ), + $ LDA, CSU, CONJG( SNU ) ) +* +* Update I-th and J-th rows of matrix B: V'*B +* + CALL CROT( L, B( J, N-L+1 ), LDB, B( I, N-L+1 ), LDB, + $ CSV, CONJG( SNV ) ) +* +* Update (N-L+I)-th and (N-L+J)-th columns of matrices +* A and B: A*Q and B*Q +* + CALL CROT( MIN( K+L, M ), A( 1, N-L+J ), 1, + $ A( 1, N-L+I ), 1, CSQ, SNQ ) +* + CALL CROT( L, B( 1, N-L+J ), 1, B( 1, N-L+I ), 1, CSQ, + $ SNQ ) +* + IF( UPPER ) THEN + IF( K+I.LE.M ) + $ A( K+I, N-L+J ) = CZERO + B( I, N-L+J ) = CZERO + ELSE + IF( K+J.LE.M ) + $ A( K+J, N-L+I ) = CZERO + B( J, N-L+I ) = CZERO + END IF +* +* Ensure that the diagonal elements of A and B are real. +* + IF( K+I.LE.M ) + $ A( K+I, N-L+I ) = REAL( A( K+I, N-L+I ) ) + IF( K+J.LE.M ) + $ A( K+J, N-L+J ) = REAL( A( K+J, N-L+J ) ) + B( I, N-L+I ) = REAL( B( I, N-L+I ) ) + B( J, N-L+J ) = REAL( B( J, N-L+J ) ) +* +* Update unitary matrices U, V, Q, if desired. +* + IF( WANTU .AND. K+J.LE.M ) + $ CALL CROT( M, U( 1, K+J ), 1, U( 1, K+I ), 1, CSU, + $ SNU ) +* + IF( WANTV ) + $ CALL CROT( P, V( 1, J ), 1, V( 1, I ), 1, CSV, SNV ) +* + IF( WANTQ ) + $ CALL CROT( N, Q( 1, N-L+J ), 1, Q( 1, N-L+I ), 1, CSQ, + $ SNQ ) +* + 10 CONTINUE + 20 CONTINUE +* + IF( .NOT.UPPER ) THEN +* +* The matrices A13 and B13 were lower triangular at the start +* of the cycle, and are now upper triangular. +* +* Convergence test: test the parallelism of the corresponding +* rows of A and B. +* + ERROR = ZERO + DO 30 I = 1, MIN( L, M-K ) + CALL CCOPY( L-I+1, A( K+I, N-L+I ), LDA, WORK, 1 ) + CALL CCOPY( L-I+1, B( I, N-L+I ), LDB, WORK( L+1 ), 1 ) + CALL CLAPLL( L-I+1, WORK, 1, WORK( L+1 ), 1, SSMIN ) + ERROR = MAX( ERROR, SSMIN ) + 30 CONTINUE +* + IF( ABS( ERROR ).LE.MIN( TOLA, TOLB ) ) + $ GO TO 50 + END IF +* +* End of cycle loop +* + 40 CONTINUE +* +* The algorithm has not converged after MAXIT cycles. +* + INFO = 1 + GO TO 100 +* + 50 CONTINUE +* +* If ERROR <= MIN(TOLA,TOLB), then the algorithm has converged. +* Compute the generalized singular value pairs (ALPHA, BETA), and +* set the triangular matrix R to array A. +* + DO 60 I = 1, K + ALPHA( I ) = ONE + BETA( I ) = ZERO + 60 CONTINUE +* + DO 70 I = 1, MIN( L, M-K ) +* + A1 = REAL( A( K+I, N-L+I ) ) + B1 = REAL( B( I, N-L+I ) ) +* + IF( A1.NE.ZERO ) THEN + GAMMA = B1 / A1 +* + IF( GAMMA.LT.ZERO ) THEN + CALL CSSCAL( L-I+1, -ONE, B( I, N-L+I ), LDB ) + IF( WANTV ) + $ CALL CSSCAL( P, -ONE, V( 1, I ), 1 ) + END IF +* + CALL SLARTG( ABS( GAMMA ), ONE, BETA( K+I ), ALPHA( K+I ), + $ RWK ) +* + IF( ALPHA( K+I ).GE.BETA( K+I ) ) THEN + CALL CSSCAL( L-I+1, ONE / ALPHA( K+I ), A( K+I, N-L+I ), + $ LDA ) + ELSE + CALL CSSCAL( L-I+1, ONE / BETA( K+I ), B( I, N-L+I ), + $ LDB ) + CALL CCOPY( L-I+1, B( I, N-L+I ), LDB, A( K+I, N-L+I ), + $ LDA ) + END IF +* + ELSE + ALPHA( K+I ) = ZERO + BETA( K+I ) = ONE + CALL CCOPY( L-I+1, B( I, N-L+I ), LDB, A( K+I, N-L+I ), + $ LDA ) + END IF + 70 CONTINUE +* +* Post-assignment +* + DO 80 I = M + 1, K + L + ALPHA( I ) = ZERO + BETA( I ) = ONE + 80 CONTINUE +* + IF( K+L.LT.N ) THEN + DO 90 I = K + L + 1, N + ALPHA( I ) = ZERO + BETA( I ) = ZERO + 90 CONTINUE + END IF +* + 100 CONTINUE + NCYCLE = KCYCLE +* + RETURN +* +* End of CTGSJA +* + END diff --git a/costa/native/external/lapack/ctgsna.f b/costa/native/external/lapack/ctgsna.f new file mode 100644 index 000000000..74b4acfbe --- /dev/null +++ b/costa/native/external/lapack/ctgsna.f @@ -0,0 +1,402 @@ + SUBROUTINE CTGSNA( JOB, HOWMNY, SELECT, N, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, S, DIF, MM, M, WORK, LWORK, + $ IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER HOWMNY, JOB + INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + INTEGER IWORK( * ) + REAL DIF( * ), S( * ) + COMPLEX A( LDA, * ), B( LDB, * ), VL( LDVL, * ), + $ VR( LDVR, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CTGSNA estimates reciprocal condition numbers for specified +* eigenvalues and/or eigenvectors of a matrix pair (A, B). +* +* (A, B) must be in generalized Schur canonical form, that is, A and +* B are both upper triangular. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* Specifies whether condition numbers are required for +* eigenvalues (S) or eigenvectors (DIF): +* = 'E': for eigenvalues only (S); +* = 'V': for eigenvectors only (DIF); +* = 'B': for both eigenvalues and eigenvectors (S and DIF). +* +* HOWMNY (input) CHARACTER*1 +* = 'A': compute condition numbers for all eigenpairs; +* = 'S': compute condition numbers for selected eigenpairs +* specified by the array SELECT. +* +* SELECT (input) LOGICAL array, dimension (N) +* If HOWMNY = 'S', SELECT specifies the eigenpairs for which +* condition numbers are required. To select condition numbers +* for the corresponding j-th eigenvalue and/or eigenvector, +* SELECT(j) must be set to .TRUE.. +* If HOWMNY = 'A', SELECT is not referenced. +* +* N (input) INTEGER +* The order of the square matrix pair (A, B). N >= 0. +* +* A (input) COMPLEX array, dimension (LDA,N) +* The upper triangular matrix A in the pair (A,B). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input) COMPLEX array, dimension (LDB,N) +* The upper triangular matrix B in the pair (A, B). +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* VL (input) COMPLEX array, dimension (LDVL,M) +* IF JOB = 'E' or 'B', VL must contain left eigenvectors of +* (A, B), corresponding to the eigenpairs specified by HOWMNY +* and SELECT. The eigenvectors must be stored in consecutive +* columns of VL, as returned by CTGEVC. +* If JOB = 'V', VL is not referenced. +* +* LDVL (input) INTEGER +* The leading dimension of the array VL. LDVL >= 1; and +* If JOB = 'E' or 'B', LDVL >= N. +* +* VR (input) COMPLEX array, dimension (LDVR,M) +* IF JOB = 'E' or 'B', VR must contain right eigenvectors of +* (A, B), corresponding to the eigenpairs specified by HOWMNY +* and SELECT. The eigenvectors must be stored in consecutive +* columns of VR, as returned by CTGEVC. +* If JOB = 'V', VR is not referenced. +* +* LDVR (input) INTEGER +* The leading dimension of the array VR. LDVR >= 1; +* If JOB = 'E' or 'B', LDVR >= N. +* +* S (output) REAL array, dimension (MM) +* If JOB = 'E' or 'B', the reciprocal condition numbers of the +* selected eigenvalues, stored in consecutive elements of the +* array. +* If JOB = 'V', S is not referenced. +* +* DIF (output) REAL array, dimension (MM) +* If JOB = 'V' or 'B', the estimated reciprocal condition +* numbers of the selected eigenvectors, stored in consecutive +* elements of the array. +* If the eigenvalues cannot be reordered to compute DIF(j), +* DIF(j) is set to 0; this can only occur when the true value +* would be very small anyway. +* For each eigenvalue/vector specified by SELECT, DIF stores +* a Frobenius norm-based estimate of Difl. +* If JOB = 'E', DIF is not referenced. +* +* MM (input) INTEGER +* The number of elements in the arrays S and DIF. MM >= M. +* +* M (output) INTEGER +* The number of elements of the arrays S and DIF used to store +* the specified condition numbers; for each selected eigenvalue +* one element is used. If HOWMNY = 'A', M is set to N. +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* If JOB = 'E', WORK is not referenced. Otherwise, +* on exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= 1. +* If JOB = 'V' or 'B', LWORK >= 2*N*N. +* +* IWORK (workspace) INTEGER array, dimension (N+2) +* If JOB = 'E', IWORK is not referenced. +* +* INFO (output) INTEGER +* = 0: Successful exit +* < 0: If INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The reciprocal of the condition number of the i-th generalized +* eigenvalue w = (a, b) is defined as +* +* S(I) = (|v'Au|**2 + |v'Bu|**2)**(1/2) / (norm(u)*norm(v)) +* +* where u and v are the right and left eigenvectors of (A, B) +* corresponding to w; |z| denotes the absolute value of the complex +* number, and norm(u) denotes the 2-norm of the vector u. The pair +* (a, b) corresponds to an eigenvalue w = a/b (= v'Au/v'Bu) of the +* matrix pair (A, B). If both a and b equal zero, then (A,B) is +* singular and S(I) = -1 is returned. +* +* An approximate error bound on the chordal distance between the i-th +* computed generalized eigenvalue w and the corresponding exact +* eigenvalue lambda is +* +* chord(w, lambda) <= EPS * norm(A, B) / S(I), +* +* where EPS is the machine precision. +* +* The reciprocal of the condition number of the right eigenvector u +* and left eigenvector v corresponding to the generalized eigenvalue w +* is defined as follows. Suppose +* +* (A, B) = ( a * ) ( b * ) 1 +* ( 0 A22 ),( 0 B22 ) n-1 +* 1 n-1 1 n-1 +* +* Then the reciprocal condition number DIF(I) is +* +* Difl[(a, b), (A22, B22)] = sigma-min( Zl ) +* +* where sigma-min(Zl) denotes the smallest singular value of +* +* Zl = [ kron(a, In-1) -kron(1, A22) ] +* [ kron(b, In-1) -kron(1, B22) ]. +* +* Here In-1 is the identity matrix of size n-1 and X' is the conjugate +* transpose of X. kron(X, Y) is the Kronecker product between the +* matrices X and Y. +* +* We approximate the smallest singular value of Zl with an upper +* bound. This is done by CLATDF. +* +* An approximate error bound for a computed eigenvector VL(i) or +* VR(i) is given by +* +* EPS * norm(A, B) / DIF(i). +* +* See ref. [2-3] for more details and further references. +* +* Based on contributions by +* Bo Kagstrom and Peter Poromaa, Department of Computing Science, +* Umea University, S-901 87 Umea, Sweden. +* +* References +* ========== +* +* [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the +* Generalized Real Schur Form of a Regular Matrix Pair (A, B), in +* M.S. Moonen et al (eds), Linear Algebra for Large Scale and +* Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. +* +* [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified +* Eigenvalues of a Regular Matrix Pair (A, B) and Condition +* Estimation: Theory, Algorithms and Software, Report +* UMINF - 94.04, Department of Computing Science, Umea University, +* S-901 87 Umea, Sweden, 1994. Also as LAPACK Working Note 87. +* To appear in Numerical Algorithms, 1996. +* +* [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software +* for Solving the Generalized Sylvester Equation and Estimating the +* Separation between Regular Matrix Pairs, Report UMINF - 93.23, +* Department of Computing Science, Umea University, S-901 87 Umea, +* Sweden, December 1993, Revised April 1994, Also as LAPACK Working +* Note 75. +* To appear in ACM Trans. on Math. Software, Vol 22, No 1, 1996. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + INTEGER IDIFJB + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, IDIFJB = 3 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, SOMCON, WANTBH, WANTDF, WANTS + INTEGER I, IERR, IFST, ILST, K, KS, LLWRK, LWMIN, + $ N1, N2 + REAL BIGNUM, COND, EPS, LNRM, RNRM, SCALE, SMLNUM + COMPLEX YHAX, YHBX +* .. +* .. Local Arrays .. + COMPLEX DUMMY( 1 ), DUMMY1( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SCNRM2, SLAMCH, SLAPY2 + COMPLEX CDOTC + EXTERNAL LSAME, SCNRM2, SLAMCH, SLAPY2, CDOTC +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, CLACPY, CTGEXC, CTGSYL, SLABAD, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CMPLX, MAX +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + WANTBH = LSAME( JOB, 'B' ) + WANTS = LSAME( JOB, 'E' ) .OR. WANTBH + WANTDF = LSAME( JOB, 'V' ) .OR. WANTBH +* + SOMCON = LSAME( HOWMNY, 'S' ) +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) +* + IF( LSAME( JOB, 'V' ) .OR. LSAME( JOB, 'B' ) ) THEN + LWMIN = MAX( 1, 2*N*N ) + ELSE + LWMIN = 1 + END IF +* + IF( .NOT.WANTS .AND. .NOT.WANTDF ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( HOWMNY, 'A' ) .AND. .NOT.SOMCON ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( WANTS .AND. LDVL.LT.N ) THEN + INFO = -10 + ELSE IF( WANTS .AND. LDVR.LT.N ) THEN + INFO = -12 + ELSE +* +* Set M to the number of eigenpairs for which condition numbers +* are required, and test MM. +* + IF( SOMCON ) THEN + M = 0 + DO 10 K = 1, N + IF( SELECT( K ) ) + $ M = M + 1 + 10 CONTINUE + ELSE + M = N + END IF +* + IF( MM.LT.M ) THEN + INFO = -15 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -18 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTGSNA', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) / EPS + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + LLWRK = LWORK - 2*N*N + KS = 0 + DO 20 K = 1, N +* +* Determine whether condition numbers are required for the k-th +* eigenpair. +* + IF( SOMCON ) THEN + IF( .NOT.SELECT( K ) ) + $ GO TO 20 + END IF +* + KS = KS + 1 +* + IF( WANTS ) THEN +* +* Compute the reciprocal condition number of the k-th +* eigenvalue. +* + RNRM = SCNRM2( N, VR( 1, KS ), 1 ) + LNRM = SCNRM2( N, VL( 1, KS ), 1 ) + CALL CGEMV( 'N', N, N, CMPLX( ONE, ZERO ), A, LDA, + $ VR( 1, KS ), 1, CMPLX( ZERO, ZERO ), WORK, 1 ) + YHAX = CDOTC( N, WORK, 1, VL( 1, KS ), 1 ) + CALL CGEMV( 'N', N, N, CMPLX( ONE, ZERO ), B, LDB, + $ VR( 1, KS ), 1, CMPLX( ZERO, ZERO ), WORK, 1 ) + YHBX = CDOTC( N, WORK, 1, VL( 1, KS ), 1 ) + COND = SLAPY2( ABS( YHAX ), ABS( YHBX ) ) + IF( COND.EQ.ZERO ) THEN + S( KS ) = -ONE + ELSE + S( KS ) = COND / ( RNRM*LNRM ) + END IF + END IF +* + IF( WANTDF ) THEN + IF( N.EQ.1 ) THEN + DIF( KS ) = SLAPY2( ABS( A( 1, 1 ) ), ABS( B( 1, 1 ) ) ) + GO TO 20 + END IF +* +* Estimate the reciprocal condition number of the k-th +* eigenvectors. +* +* Copy the matrix (A, B) to the array WORK and move the +* (k,k)th pair to the (1,1) position. +* + CALL CLACPY( 'Full', N, N, A, LDA, WORK, N ) + CALL CLACPY( 'Full', N, N, B, LDB, WORK( N*N+1 ), N ) + IFST = K + ILST = 1 +* + CALL CTGEXC( .FALSE., .FALSE., N, WORK, N, WORK( N*N+1 ), N, + $ DUMMY, 1, DUMMY1, 1, IFST, ILST, IERR ) +* + IF( IERR.GT.0 ) THEN +* +* Ill-conditioned problem - swap rejected. +* + DIF( KS ) = ZERO + ELSE +* +* Reordering successful, solve generalized Sylvester +* equation for R and L, +* A22 * R - L * A11 = A12 +* B22 * R - L * B11 = B12, +* and compute estimate of Difl[(A11,B11), (A22, B22)]. +* + N1 = 1 + N2 = N - N1 + I = N*N + 1 + CALL CTGSYL( 'N', IDIFJB, N2, N1, WORK( N*N1+N1+1 ), N, + $ WORK, N, WORK( N1+1 ), N, WORK( N*N1+N1+I ), + $ N, WORK( I ), N, WORK( N1+I ), N, SCALE, + $ DIF( KS ), WORK( N*N*2+1 ), LLWRK, IWORK, + $ IERR ) + END IF + END IF +* + 20 CONTINUE + WORK( 1 ) = LWMIN + RETURN +* +* End of CTGSNA +* + END diff --git a/costa/native/external/lapack/ctgsy2.f b/costa/native/external/lapack/ctgsy2.f new file mode 100644 index 000000000..575df2494 --- /dev/null +++ b/costa/native/external/lapack/ctgsy2.f @@ -0,0 +1,357 @@ + SUBROUTINE CTGSY2( TRANS, IJOB, M, N, A, LDA, B, LDB, C, LDC, D, + $ LDD, E, LDE, F, LDF, SCALE, RDSUM, RDSCAL, + $ INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER IJOB, INFO, LDA, LDB, LDC, LDD, LDE, LDF, M, N + REAL RDSCAL, RDSUM, SCALE +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), C( LDC, * ), + $ D( LDD, * ), E( LDE, * ), F( LDF, * ) +* .. +* +* Purpose +* ======= +* +* CTGSY2 solves the generalized Sylvester equation +* +* A * R - L * B = scale * C (1) +* D * R - L * E = scale * F +* +* using Level 1 and 2 BLAS, where R and L are unknown M-by-N matrices, +* (A, D), (B, E) and (C, F) are given matrix pairs of size M-by-M, +* N-by-N and M-by-N, respectively. A, B, D and E are upper triangular +* (i.e., (A,D) and (B,E) in generalized Schur form). +* +* The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1 is an output +* scaling factor chosen to avoid overflow. +* +* In matrix notation solving equation (1) corresponds to solve +* Zx = scale * b, where Z is defined as +* +* Z = [ kron(In, A) -kron(B', Im) ] (2) +* [ kron(In, D) -kron(E', Im) ], +* +* Ik is the identity matrix of size k and X' is the transpose of X. +* kron(X, Y) is the Kronecker product between the matrices X and Y. +* +* If TRANS = 'C', y in the conjugate transposed system Z'y = scale*b +* is solved for, which is equivalent to solve for R and L in +* +* A' * R + D' * L = scale * C (3) +* R * B' + L * E' = scale * -F +* +* This case is used to compute an estimate of Dif[(A, D), (B, E)] = +* = sigma_min(Z) using reverse communicaton with CLACON. +* +* CTGSY2 also (IJOB >= 1) contributes to the computation in CTGSYL +* of an upper bound on the separation between to matrix pairs. Then +* the input (A, D), (B, E) are sub-pencils of two matrix pairs in +* CTGSYL. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER +* = 'N', solve the generalized Sylvester equation (1). +* = 'T': solve the 'transposed' system (3). +* +* IJOB (input) INTEGER +* Specifies what kind of functionality to be performed. +* =0: solve (1) only. +* =1: A contribution from this subsystem to a Frobenius +* norm-based estimate of the separation between two matrix +* pairs is computed. (look ahead strategy is used). +* =2: A contribution from this subsystem to a Frobenius +* norm-based estimate of the separation between two matrix +* pairs is computed. (SGECON on sub-systems is used.) +* Not referenced if TRANS = 'T'. +* +* M (input) INTEGER +* On entry, M specifies the order of A and D, and the row +* dimension of C, F, R and L. +* +* N (input) INTEGER +* On entry, N specifies the order of B and E, and the column +* dimension of C, F, R and L. +* +* A (input) COMPLEX array, dimension (LDA, M) +* On entry, A contains an upper triangular matrix. +* +* LDA (input) INTEGER +* The leading dimension of the matrix A. LDA >= max(1, M). +* +* B (input) COMPLEX array, dimension (LDB, N) +* On entry, B contains an upper triangular matrix. +* +* LDB (input) INTEGER +* The leading dimension of the matrix B. LDB >= max(1, N). +* +* C (input/ output) COMPLEX array, dimension (LDC, N) +* On entry, C contains the right-hand-side of the first matrix +* equation in (1). +* On exit, if IJOB = 0, C has been overwritten by the solution +* R. +* +* LDC (input) INTEGER +* The leading dimension of the matrix C. LDC >= max(1, M). +* +* D (input) COMPLEX array, dimension (LDD, M) +* On entry, D contains an upper triangular matrix. +* +* LDD (input) INTEGER +* The leading dimension of the matrix D. LDD >= max(1, M). +* +* E (input) COMPLEX array, dimension (LDE, N) +* On entry, E contains an upper triangular matrix. +* +* LDE (input) INTEGER +* The leading dimension of the matrix E. LDE >= max(1, N). +* +* F (input/ output) COMPLEX array, dimension (LDF, N) +* On entry, F contains the right-hand-side of the second matrix +* equation in (1). +* On exit, if IJOB = 0, F has been overwritten by the solution +* L. +* +* LDF (input) INTEGER +* The leading dimension of the matrix F. LDF >= max(1, M). +* +* SCALE (output) REAL +* On exit, 0 <= SCALE <= 1. If 0 < SCALE < 1, the solutions +* R and L (C and F on entry) will hold the solutions to a +* slightly perturbed system but the input matrices A, B, D and +* E have not been changed. If SCALE = 0, R and L will hold the +* solutions to the homogeneous system with C = F = 0. +* Normally, SCALE = 1. +* +* RDSUM (input/output) REAL +* On entry, the sum of squares of computed contributions to +* the Dif-estimate under computation by CTGSYL, where the +* scaling factor RDSCAL (see below) has been factored out. +* On exit, the corresponding sum of squares updated with the +* contributions from the current sub-system. +* If TRANS = 'T' RDSUM is not touched. +* NOTE: RDSUM only makes sense when CTGSY2 is called by +* CTGSYL. +* +* RDSCAL (input/output) REAL +* On entry, scaling factor used to prevent overflow in RDSUM. +* On exit, RDSCAL is updated w.r.t. the current contributions +* in RDSUM. +* If TRANS = 'T', RDSCAL is not touched. +* NOTE: RDSCAL only makes sense when CTGSY2 is called by +* CTGSYL. +* +* INFO (output) INTEGER +* On exit, if INFO is set to +* =0: Successful exit +* <0: If INFO = -i, input argument number i is illegal. +* >0: The matrix pairs (A, D) and (B, E) have common or very +* close eigenvalues. +* +* Further Details +* =============== +* +* Based on contributions by +* Bo Kagstrom and Peter Poromaa, Department of Computing Science, +* Umea University, S-901 87 Umea, Sweden. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + INTEGER LDZ + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, LDZ = 2 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN + INTEGER I, IERR, J, K + REAL SCALOC + COMPLEX ALPHA +* .. +* .. Local Arrays .. + INTEGER IPIV( LDZ ), JPIV( LDZ ) + COMPLEX RHS( LDZ ), Z( LDZ, LDZ ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CGESC2, CGETC2, CSCAL, CLATDF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CMPLX, CONJG, MAX +* .. +* .. Executable Statements .. +* +* Decode and test input parameters +* + INFO = 0 + IERR = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( ( IJOB.LT.0 ) .OR. ( IJOB.GT.2 ) ) THEN + INFO = -2 + ELSE IF( M.LE.0 ) THEN + INFO = -3 + ELSE IF( N.LE.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LDD.LT.MAX( 1, M ) ) THEN + INFO = -12 + ELSE IF( LDE.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDF.LT.MAX( 1, M ) ) THEN + INFO = -16 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTGSY2', -INFO ) + RETURN + END IF +* + IF( NOTRAN ) THEN +* +* Solve (I, J) - system +* A(I, I) * R(I, J) - L(I, J) * B(J, J) = C(I, J) +* D(I, I) * R(I, J) - L(I, J) * E(J, J) = F(I, J) +* for I = M, M - 1, ..., 1; J = 1, 2, ..., N +* + SCALE = ONE + SCALOC = ONE + DO 30 J = 1, N + DO 20 I = M, 1, -1 +* +* Build 2 by 2 system +* + Z( 1, 1 ) = A( I, I ) + Z( 2, 1 ) = D( I, I ) + Z( 1, 2 ) = -B( J, J ) + Z( 2, 2 ) = -E( J, J ) +* +* Set up right hand side(s) +* + RHS( 1 ) = C( I, J ) + RHS( 2 ) = F( I, J ) +* +* Solve Z * x = RHS +* + CALL CGETC2( LDZ, Z, LDZ, IPIV, JPIV, IERR ) + IF( IERR.GT.0 ) + $ INFO = IERR + IF( IJOB.EQ.0 ) THEN + CALL CGESC2( LDZ, Z, LDZ, RHS, IPIV, JPIV, SCALOC ) + IF( SCALOC.NE.ONE ) THEN + DO 10 K = 1, N + CALL CSCAL( M, CMPLX( SCALOC, ZERO ), C( 1, K ), + $ 1 ) + CALL CSCAL( M, CMPLX( SCALOC, ZERO ), F( 1, K ), + $ 1 ) + 10 CONTINUE + SCALE = SCALE*SCALOC + END IF + ELSE + CALL CLATDF( IJOB, LDZ, Z, LDZ, RHS, RDSUM, RDSCAL, + $ IPIV, JPIV ) + END IF +* +* Unpack solution vector(s) +* + C( I, J ) = RHS( 1 ) + F( I, J ) = RHS( 2 ) +* +* Substitute R(I, J) and L(I, J) into remaining equation. +* + IF( I.GT.1 ) THEN + ALPHA = -RHS( 1 ) + CALL CAXPY( I-1, ALPHA, A( 1, I ), 1, C( 1, J ), 1 ) + CALL CAXPY( I-1, ALPHA, D( 1, I ), 1, F( 1, J ), 1 ) + END IF + IF( J.LT.N ) THEN + CALL CAXPY( N-J, RHS( 2 ), B( J, J+1 ), LDB, + $ C( I, J+1 ), LDC ) + CALL CAXPY( N-J, RHS( 2 ), E( J, J+1 ), LDE, + $ F( I, J+1 ), LDF ) + END IF +* + 20 CONTINUE + 30 CONTINUE + ELSE +* +* Solve transposed (I, J) - system: +* A(I, I)' * R(I, J) + D(I, I)' * L(J, J) = C(I, J) +* R(I, I) * B(J, J) + L(I, J) * E(J, J) = -F(I, J) +* for I = 1, 2, ..., M, J = N, N - 1, ..., 1 +* + SCALE = ONE + SCALOC = ONE + DO 80 I = 1, M + DO 70 J = N, 1, -1 +* +* Build 2 by 2 system Z' +* + Z( 1, 1 ) = CONJG( A( I, I ) ) + Z( 2, 1 ) = -CONJG( B( J, J ) ) + Z( 1, 2 ) = CONJG( D( I, I ) ) + Z( 2, 2 ) = -CONJG( E( J, J ) ) +* +* +* Set up right hand side(s) +* + RHS( 1 ) = C( I, J ) + RHS( 2 ) = F( I, J ) +* +* Solve Z' * x = RHS +* + CALL CGETC2( LDZ, Z, LDZ, IPIV, JPIV, IERR ) + IF( IERR.GT.0 ) + $ INFO = IERR + CALL CGESC2( LDZ, Z, LDZ, RHS, IPIV, JPIV, SCALOC ) + IF( SCALOC.NE.ONE ) THEN + DO 40 K = 1, N + CALL CSCAL( M, CMPLX( SCALOC, ZERO ), C( 1, K ), + $ 1 ) + CALL CSCAL( M, CMPLX( SCALOC, ZERO ), F( 1, K ), + $ 1 ) + 40 CONTINUE + SCALE = SCALE*SCALOC + END IF +* +* Unpack solution vector(s) +* + C( I, J ) = RHS( 1 ) + F( I, J ) = RHS( 2 ) +* +* Substitute R(I, J) and L(I, J) into remaining equation. +* + DO 50 K = 1, J - 1 + F( I, K ) = F( I, K ) + RHS( 1 )*CONJG( B( K, J ) ) + + $ RHS( 2 )*CONJG( E( K, J ) ) + 50 CONTINUE + DO 60 K = I + 1, M + C( K, J ) = C( K, J ) - CONJG( A( I, K ) )*RHS( 1 ) - + $ CONJG( D( I, K ) )*RHS( 2 ) + 60 CONTINUE +* + 70 CONTINUE + 80 CONTINUE + END IF + RETURN +* +* End of CTGSY2 +* + END diff --git a/costa/native/external/lapack/ctgsyl.f b/costa/native/external/lapack/ctgsyl.f new file mode 100644 index 000000000..f7cdb98be --- /dev/null +++ b/costa/native/external/lapack/ctgsyl.f @@ -0,0 +1,548 @@ + SUBROUTINE CTGSYL( TRANS, IJOB, M, N, A, LDA, B, LDB, C, LDC, D, + $ LDD, E, LDE, F, LDF, SCALE, DIF, WORK, LWORK, + $ IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER IJOB, INFO, LDA, LDB, LDC, LDD, LDE, LDF, + $ LWORK, M, N + REAL DIF, SCALE +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + COMPLEX A( LDA, * ), B( LDB, * ), C( LDC, * ), + $ D( LDD, * ), E( LDE, * ), F( LDF, * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* CTGSYL solves the generalized Sylvester equation: +* +* A * R - L * B = scale * C (1) +* D * R - L * E = scale * F +* +* where R and L are unknown m-by-n matrices, (A, D), (B, E) and +* (C, F) are given matrix pairs of size m-by-m, n-by-n and m-by-n, +* respectively, with complex entries. A, B, D and E are upper +* triangular (i.e., (A,D) and (B,E) in generalized Schur form). +* +* The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1 +* is an output scaling factor chosen to avoid overflow. +* +* In matrix notation (1) is equivalent to solve Zx = scale*b, where Z +* is defined as +* +* Z = [ kron(In, A) -kron(B', Im) ] (2) +* [ kron(In, D) -kron(E', Im) ], +* +* Here Ix is the identity matrix of size x and X' is the conjugate +* transpose of X. Kron(X, Y) is the Kronecker product between the +* matrices X and Y. +* +* If TRANS = 'C', y in the conjugate transposed system Z'*y = scale*b +* is solved for, which is equivalent to solve for R and L in +* +* A' * R + D' * L = scale * C (3) +* R * B' + L * E' = scale * -F +* +* This case (TRANS = 'C') is used to compute an one-norm-based estimate +* of Dif[(A,D), (B,E)], the separation between the matrix pairs (A,D) +* and (B,E), using CLACON. +* +* If IJOB >= 1, CTGSYL computes a Frobenius norm-based estimate of +* Dif[(A,D),(B,E)]. That is, the reciprocal of a lower bound on the +* reciprocal of the smallest singular value of Z. +* +* This is a level-3 BLAS algorithm. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER*1 +* = 'N': solve the generalized sylvester equation (1). +* = 'C': solve the "conjugate transposed" system (3). +* +* IJOB (input) INTEGER +* Specifies what kind of functionality to be performed. +* =0: solve (1) only. +* =1: The functionality of 0 and 3. +* =2: The functionality of 0 and 4. +* =3: Only an estimate of Dif[(A,D), (B,E)] is computed. +* (look ahead strategy is used). +* =4: Only an estimate of Dif[(A,D), (B,E)] is computed. +* (CGECON on sub-systems is used). +* Not referenced if TRANS = 'C'. +* +* M (input) INTEGER +* The order of the matrices A and D, and the row dimension of +* the matrices C, F, R and L. +* +* N (input) INTEGER +* The order of the matrices B and E, and the column dimension +* of the matrices C, F, R and L. +* +* A (input) COMPLEX array, dimension (LDA, M) +* The upper triangular matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1, M). +* +* B (input) COMPLEX array, dimension (LDB, N) +* The upper triangular matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1, N). +* +* C (input/output) COMPLEX array, dimension (LDC, N) +* On entry, C contains the right-hand-side of the first matrix +* equation in (1) or (3). +* On exit, if IJOB = 0, 1 or 2, C has been overwritten by +* the solution R. If IJOB = 3 or 4 and TRANS = 'N', C holds R, +* the solution achieved during the computation of the +* Dif-estimate. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1, M). +* +* D (input) COMPLEX array, dimension (LDD, M) +* The upper triangular matrix D. +* +* LDD (input) INTEGER +* The leading dimension of the array D. LDD >= max(1, M). +* +* E (input) COMPLEX array, dimension (LDE, N) +* The upper triangular matrix E. +* +* LDE (input) INTEGER +* The leading dimension of the array E. LDE >= max(1, N). +* +* F (input/output) COMPLEX array, dimension (LDF, N) +* On entry, F contains the right-hand-side of the second matrix +* equation in (1) or (3). +* On exit, if IJOB = 0, 1 or 2, F has been overwritten by +* the solution L. If IJOB = 3 or 4 and TRANS = 'N', F holds L, +* the solution achieved during the computation of the +* Dif-estimate. +* +* LDF (input) INTEGER +* The leading dimension of the array F. LDF >= max(1, M). +* +* DIF (output) REAL +* On exit DIF is the reciprocal of a lower bound of the +* reciprocal of the Dif-function, i.e. DIF is an upper bound of +* Dif[(A,D), (B,E)] = sigma-min(Z), where Z as in (2). +* IF IJOB = 0 or TRANS = 'C', DIF is not referenced. +* +* SCALE (output) REAL +* On exit SCALE is the scaling factor in (1) or (3). +* If 0 < SCALE < 1, C and F hold the solutions R and L, resp., +* to a slightly perturbed system but the input matrices A, B, +* D and E have not been changed. If SCALE = 0, R and L will +* hold the solutions to the homogenious system with C = F = 0. +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* IF IJOB = 0, WORK is not referenced. Otherwise, +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK > = 1. +* If IJOB = 1 or 2 and TRANS = 'N', LWORK >= 2*M*N. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace) INTEGER array, dimension (M+N+2) +* If IJOB = 0, IWORK is not referenced. +* +* INFO (output) INTEGER +* =0: successful exit +* <0: If INFO = -i, the i-th argument had an illegal value. +* >0: (A, D) and (B, E) have common or very close +* eigenvalues. +* +* Further Details +* =============== +* +* Based on contributions by +* Bo Kagstrom and Peter Poromaa, Department of Computing Science, +* Umea University, S-901 87 Umea, Sweden. +* +* [1] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software +* for Solving the Generalized Sylvester Equation and Estimating the +* Separation between Regular Matrix Pairs, Report UMINF - 93.23, +* Department of Computing Science, Umea University, S-901 87 Umea, +* Sweden, December 1993, Revised April 1994, Also as LAPACK Working +* Note 75. To appear in ACM Trans. on Math. Software, Vol 22, +* No 1, 1996. +* +* [2] B. Kagstrom, A Perturbation Analysis of the Generalized Sylvester +* Equation (AR - LB, DR - LE ) = (C, F), SIAM J. Matrix Anal. +* Appl., 15(4):1045-1060, 1994. +* +* [3] B. Kagstrom and L. Westin, Generalized Schur Methods with +* Condition Estimators for Solving the Generalized Sylvester +* Equation, IEEE Transactions on Automatic Control, Vol. 34, No. 7, +* July 1989, pp 745-751. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, NOTRAN + INTEGER I, IE, IFUNC, IROUND, IS, ISOLVE, J, JE, JS, K, + $ LINFO, LWMIN, MB, NB, P, PQ, Q + REAL DSCALE, DSUM, SCALE2, SCALOC +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGEMM, CLACPY, CSCAL, CTGSY2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CMPLX, MAX, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Decode and test input parameters +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* + IF( ( IJOB.EQ.1 .OR. IJOB.EQ.2 ) .AND. NOTRAN ) THEN + LWMIN = MAX( 1, 2*M*N ) + ELSE + LWMIN = 1 + END IF +* + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( ( IJOB.LT.0 ) .OR. ( IJOB.GT.4 ) ) THEN + INFO = -2 + ELSE IF( M.LE.0 ) THEN + INFO = -3 + ELSE IF( N.LE.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LDD.LT.MAX( 1, M ) ) THEN + INFO = -12 + ELSE IF( LDE.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDF.LT.MAX( 1, M ) ) THEN + INFO = -16 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -20 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTGSYL', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Determine optimal block sizes MB and NB +* + MB = ILAENV( 2, 'CTGSYL', TRANS, M, N, -1, -1 ) + NB = ILAENV( 5, 'CTGSYL', TRANS, M, N, -1, -1 ) +* + ISOLVE = 1 + IFUNC = 0 + IF( IJOB.GE.3 .AND. NOTRAN ) THEN + IFUNC = IJOB - 2 + DO 10 J = 1, N + CALL CCOPY( M, CMPLX( ZERO, ZERO ), 0, C( 1, J ), 1 ) + CALL CCOPY( M, CMPLX( ZERO, ZERO ), 0, F( 1, J ), 1 ) + 10 CONTINUE + ELSE IF( IJOB.GE.1 .AND. NOTRAN ) THEN + ISOLVE = 2 + END IF +* + IF( ( MB.LE.1 .AND. NB.LE.1 ) .OR. ( MB.GE.M .AND. NB.GE.N ) ) + $ THEN +* +* Use unblocked Level 2 solver +* + DO 30 IROUND = 1, ISOLVE +* + SCALE = ONE + DSCALE = ZERO + DSUM = ONE + PQ = M*N + CALL CTGSY2( TRANS, IFUNC, M, N, A, LDA, B, LDB, C, LDC, D, + $ LDD, E, LDE, F, LDF, SCALE, DSUM, DSCALE, + $ INFO ) + IF( DSCALE.NE.ZERO ) THEN + IF( IJOB.EQ.1 .OR. IJOB.EQ.3 ) THEN + DIF = SQRT( REAL( 2*M*N ) ) / ( DSCALE*SQRT( DSUM ) ) + ELSE + DIF = SQRT( REAL( PQ ) ) / ( DSCALE*SQRT( DSUM ) ) + END IF + END IF + IF( ISOLVE.EQ.2 .AND. IROUND.EQ.1 ) THEN + IFUNC = IJOB + SCALE2 = SCALE + CALL CLACPY( 'F', M, N, C, LDC, WORK, M ) + CALL CLACPY( 'F', M, N, F, LDF, WORK( M*N+1 ), M ) + DO 20 J = 1, N + CALL CCOPY( M, CMPLX( ZERO, ZERO ), 0, C( 1, J ), 1 ) + CALL CCOPY( M, CMPLX( ZERO, ZERO ), 0, F( 1, J ), 1 ) + 20 CONTINUE + ELSE IF( ISOLVE.EQ.2 .AND. IROUND.EQ.2 ) THEN + CALL CLACPY( 'F', M, N, WORK, M, C, LDC ) + CALL CLACPY( 'F', M, N, WORK( M*N+1 ), M, F, LDF ) + SCALE = SCALE2 + END IF + 30 CONTINUE +* + RETURN +* + END IF +* +* Determine block structure of A +* + P = 0 + I = 1 + 40 CONTINUE + IF( I.GT.M ) + $ GO TO 50 + P = P + 1 + IWORK( P ) = I + I = I + MB + IF( I.GE.M ) + $ GO TO 50 + GO TO 40 + 50 CONTINUE + IWORK( P+1 ) = M + 1 + IF( IWORK( P ).EQ.IWORK( P+1 ) ) + $ P = P - 1 +* +* Determine block structure of B +* + Q = P + 1 + J = 1 + 60 CONTINUE + IF( J.GT.N ) + $ GO TO 70 +* + Q = Q + 1 + IWORK( Q ) = J + J = J + NB + IF( J.GE.N ) + $ GO TO 70 + GO TO 60 +* + 70 CONTINUE + IWORK( Q+1 ) = N + 1 + IF( IWORK( Q ).EQ.IWORK( Q+1 ) ) + $ Q = Q - 1 +* + IF( NOTRAN ) THEN + DO 150 IROUND = 1, ISOLVE +* +* Solve (I, J) - subsystem +* A(I, I) * R(I, J) - L(I, J) * B(J, J) = C(I, J) +* D(I, I) * R(I, J) - L(I, J) * E(J, J) = F(I, J) +* for I = P, P - 1, ..., 1; J = 1, 2, ..., Q +* + PQ = 0 + SCALE = ONE + DSCALE = ZERO + DSUM = ONE + DO 130 J = P + 2, Q + JS = IWORK( J ) + JE = IWORK( J+1 ) - 1 + NB = JE - JS + 1 + DO 120 I = P, 1, -1 + IS = IWORK( I ) + IE = IWORK( I+1 ) - 1 + MB = IE - IS + 1 + CALL CTGSY2( TRANS, IFUNC, MB, NB, A( IS, IS ), LDA, + $ B( JS, JS ), LDB, C( IS, JS ), LDC, + $ D( IS, IS ), LDD, E( JS, JS ), LDE, + $ F( IS, JS ), LDF, SCALOC, DSUM, DSCALE, + $ LINFO ) + IF( LINFO.GT.0 ) + $ INFO = LINFO + PQ = PQ + MB*NB + IF( SCALOC.NE.ONE ) THEN + DO 80 K = 1, JS - 1 + CALL CSCAL( M, CMPLX( SCALOC, ZERO ), C( 1, K ), + $ 1 ) + CALL CSCAL( M, CMPLX( SCALOC, ZERO ), F( 1, K ), + $ 1 ) + 80 CONTINUE + DO 90 K = JS, JE + CALL CSCAL( IS-1, CMPLX( SCALOC, ZERO ), + $ C( 1, K ), 1 ) + CALL CSCAL( IS-1, CMPLX( SCALOC, ZERO ), + $ F( 1, K ), 1 ) + 90 CONTINUE + DO 100 K = JS, JE + CALL CSCAL( M-IE, CMPLX( SCALOC, ZERO ), + $ C( IE+1, K ), 1 ) + CALL CSCAL( M-IE, CMPLX( SCALOC, ZERO ), + $ F( IE+1, K ), 1 ) + 100 CONTINUE + DO 110 K = JE + 1, N + CALL CSCAL( M, CMPLX( SCALOC, ZERO ), C( 1, K ), + $ 1 ) + CALL CSCAL( M, CMPLX( SCALOC, ZERO ), F( 1, K ), + $ 1 ) + 110 CONTINUE + SCALE = SCALE*SCALOC + END IF +* +* Substitute R(I,J) and L(I,J) into remaining equation. +* + IF( I.GT.1 ) THEN + CALL CGEMM( 'N', 'N', IS-1, NB, MB, + $ CMPLX( -ONE, ZERO ), A( 1, IS ), LDA, + $ C( IS, JS ), LDC, CMPLX( ONE, ZERO ), + $ C( 1, JS ), LDC ) + CALL CGEMM( 'N', 'N', IS-1, NB, MB, + $ CMPLX( -ONE, ZERO ), D( 1, IS ), LDD, + $ C( IS, JS ), LDC, CMPLX( ONE, ZERO ), + $ F( 1, JS ), LDF ) + END IF + IF( J.LT.Q ) THEN + CALL CGEMM( 'N', 'N', MB, N-JE, NB, + $ CMPLX( ONE, ZERO ), F( IS, JS ), LDF, + $ B( JS, JE+1 ), LDB, CMPLX( ONE, ZERO ), + $ C( IS, JE+1 ), LDC ) + CALL CGEMM( 'N', 'N', MB, N-JE, NB, + $ CMPLX( ONE, ZERO ), F( IS, JS ), LDF, + $ E( JS, JE+1 ), LDE, CMPLX( ONE, ZERO ), + $ F( IS, JE+1 ), LDF ) + END IF + 120 CONTINUE + 130 CONTINUE + IF( DSCALE.NE.ZERO ) THEN + IF( IJOB.EQ.1 .OR. IJOB.EQ.3 ) THEN + DIF = SQRT( REAL( 2*M*N ) ) / ( DSCALE*SQRT( DSUM ) ) + ELSE + DIF = SQRT( REAL( PQ ) ) / ( DSCALE*SQRT( DSUM ) ) + END IF + END IF + IF( ISOLVE.EQ.2 .AND. IROUND.EQ.1 ) THEN + IFUNC = IJOB + SCALE2 = SCALE + CALL CLACPY( 'F', M, N, C, LDC, WORK, M ) + CALL CLACPY( 'F', M, N, F, LDF, WORK( M*N+1 ), M ) + DO 140 J = 1, N + CALL CCOPY( M, CMPLX( ZERO, ZERO ), 0, C( 1, J ), 1 ) + CALL CCOPY( M, CMPLX( ZERO, ZERO ), 0, F( 1, J ), 1 ) + 140 CONTINUE + ELSE IF( ISOLVE.EQ.2 .AND. IROUND.EQ.2 ) THEN + CALL CLACPY( 'F', M, N, WORK, M, C, LDC ) + CALL CLACPY( 'F', M, N, WORK( M*N+1 ), M, F, LDF ) + SCALE = SCALE2 + END IF + 150 CONTINUE + ELSE +* +* Solve transposed (I, J)-subsystem +* A(I, I)' * R(I, J) + D(I, I)' * L(I, J) = C(I, J) +* R(I, J) * B(J, J) + L(I, J) * E(J, J) = -F(I, J) +* for I = 1,2,..., P; J = Q, Q-1,..., 1 +* + SCALE = ONE + DO 210 I = 1, P + IS = IWORK( I ) + IE = IWORK( I+1 ) - 1 + MB = IE - IS + 1 + DO 200 J = Q, P + 2, -1 + JS = IWORK( J ) + JE = IWORK( J+1 ) - 1 + NB = JE - JS + 1 + CALL CTGSY2( TRANS, IFUNC, MB, NB, A( IS, IS ), LDA, + $ B( JS, JS ), LDB, C( IS, JS ), LDC, + $ D( IS, IS ), LDD, E( JS, JS ), LDE, + $ F( IS, JS ), LDF, SCALOC, DSUM, DSCALE, + $ LINFO ) + IF( LINFO.GT.0 ) + $ INFO = LINFO + IF( SCALOC.NE.ONE ) THEN + DO 160 K = 1, JS - 1 + CALL CSCAL( M, CMPLX( SCALOC, ZERO ), C( 1, K ), + $ 1 ) + CALL CSCAL( M, CMPLX( SCALOC, ZERO ), F( 1, K ), + $ 1 ) + 160 CONTINUE + DO 170 K = JS, JE + CALL CSCAL( IS-1, CMPLX( SCALOC, ZERO ), C( 1, K ), + $ 1 ) + CALL CSCAL( IS-1, CMPLX( SCALOC, ZERO ), F( 1, K ), + $ 1 ) + 170 CONTINUE + DO 180 K = JS, JE + CALL CSCAL( M-IE, CMPLX( SCALOC, ZERO ), + $ C( IE+1, K ), 1 ) + CALL CSCAL( M-IE, CMPLX( SCALOC, ZERO ), + $ F( IE+1, K ), 1 ) + 180 CONTINUE + DO 190 K = JE + 1, N + CALL CSCAL( M, CMPLX( SCALOC, ZERO ), C( 1, K ), + $ 1 ) + CALL CSCAL( M, CMPLX( SCALOC, ZERO ), F( 1, K ), + $ 1 ) + 190 CONTINUE + SCALE = SCALE*SCALOC + END IF +* +* Substitute R(I,J) and L(I,J) into remaining equation. +* + IF( J.GT.P+2 ) THEN + CALL CGEMM( 'N', 'C', MB, JS-1, NB, + $ CMPLX( ONE, ZERO ), C( IS, JS ), LDC, + $ B( 1, JS ), LDB, CMPLX( ONE, ZERO ), + $ F( IS, 1 ), LDF ) + CALL CGEMM( 'N', 'C', MB, JS-1, NB, + $ CMPLX( ONE, ZERO ), F( IS, JS ), LDF, + $ E( 1, JS ), LDE, CMPLX( ONE, ZERO ), + $ F( IS, 1 ), LDF ) + END IF + IF( I.LT.P ) THEN + CALL CGEMM( 'C', 'N', M-IE, NB, MB, + $ CMPLX( -ONE, ZERO ), A( IS, IE+1 ), LDA, + $ C( IS, JS ), LDC, CMPLX( ONE, ZERO ), + $ C( IE+1, JS ), LDC ) + CALL CGEMM( 'C', 'N', M-IE, NB, MB, + $ CMPLX( -ONE, ZERO ), D( IS, IE+1 ), LDD, + $ F( IS, JS ), LDF, CMPLX( ONE, ZERO ), + $ C( IE+1, JS ), LDC ) + END IF + 200 CONTINUE + 210 CONTINUE + END IF +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of CTGSYL +* + END diff --git a/costa/native/external/lapack/ctpcon.f b/costa/native/external/lapack/ctpcon.f new file mode 100644 index 000000000..46982d397 --- /dev/null +++ b/costa/native/external/lapack/ctpcon.f @@ -0,0 +1,194 @@ + SUBROUTINE CTPCON( NORM, UPLO, DIAG, N, AP, RCOND, WORK, RWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER INFO, N + REAL RCOND +* .. +* .. Array Arguments .. + REAL RWORK( * ) + COMPLEX AP( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CTPCON estimates the reciprocal of the condition number of a packed +* triangular matrix A, in either the 1-norm or the infinity-norm. +* +* The norm of A is computed and an estimate is obtained for +* norm(inv(A)), then the reciprocal of the condition number is +* computed as +* RCOND = 1 / ( norm(A) * norm(inv(A)) ). +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies whether the 1-norm condition number or the +* infinity-norm condition number is required: +* = '1' or 'O': 1-norm; +* = 'I': Infinity-norm. +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input) COMPLEX array, dimension (N*(N+1)/2) +* The upper or lower triangular matrix A, packed columnwise in +* a linear array. The j-th column of A is stored in the array +* AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* If DIAG = 'U', the diagonal elements of A are not referenced +* and are assumed to be 1. +* +* RCOND (output) REAL +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(norm(A) * norm(inv(A))). +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* RWORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, ONENRM, UPPER + CHARACTER NORMIN + INTEGER IX, KASE, KASE1 + REAL AINVNM, ANORM, SCALE, SMLNUM, XNORM + COMPLEX ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + REAL CLANTP, SLAMCH + EXTERNAL LSAME, ICAMAX, CLANTP, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CLACON, CLATPS, CSRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTPCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + END IF +* + RCOND = ZERO + SMLNUM = SLAMCH( 'Safe minimum' )*REAL( MAX( 1, N ) ) +* +* Compute the norm of the triangular matrix A. +* + ANORM = CLANTP( NORM, UPLO, DIAG, N, AP, RWORK ) +* +* Continue only if ANORM > 0. +* + IF( ANORM.GT.ZERO ) THEN +* +* Estimate the norm of the inverse of A. +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 10 CONTINUE + CALL CLACON( N, WORK( N+1 ), WORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(A). +* + CALL CLATPS( UPLO, 'No transpose', DIAG, NORMIN, N, AP, + $ WORK, SCALE, RWORK, INFO ) + ELSE +* +* Multiply by inv(A'). +* + CALL CLATPS( UPLO, 'Conjugate transpose', DIAG, NORMIN, + $ N, AP, WORK, SCALE, RWORK, INFO ) + END IF + NORMIN = 'Y' +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + IF( SCALE.NE.ONE ) THEN + IX = ICAMAX( N, WORK, 1 ) + XNORM = CABS1( WORK( IX ) ) + IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL CSRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / ANORM ) / AINVNM + END IF +* + 20 CONTINUE + RETURN +* +* End of CTPCON +* + END diff --git a/costa/native/external/lapack/ctprfs.f b/costa/native/external/lapack/ctprfs.f new file mode 100644 index 000000000..b448f2b9c --- /dev/null +++ b/costa/native/external/lapack/ctprfs.f @@ -0,0 +1,387 @@ + SUBROUTINE CTPRFS( UPLO, TRANS, DIAG, N, NRHS, AP, B, LDB, X, LDX, + $ FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + REAL BERR( * ), FERR( * ), RWORK( * ) + COMPLEX AP( * ), B( LDB, * ), WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* CTPRFS provides error bounds and backward error estimates for the +* solution to a system of linear equations with a triangular packed +* coefficient matrix. +* +* The solution matrix X must be computed by CTPTRS or some other +* means before entering this routine. CTPRFS does not do iterative +* refinement because doing so cannot improve the backward error. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose) +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AP (input) COMPLEX array, dimension (N*(N+1)/2) +* The upper or lower triangular matrix A, packed columnwise in +* a linear array. The j-th column of A is stored in the array +* AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* If DIAG = 'U', the diagonal elements of A are not referenced +* and are assumed to be 1. +* +* B (input) COMPLEX array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input) COMPLEX array, dimension (LDX,NRHS) +* The solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* RWORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + CHARACTER TRANSN, TRANST + INTEGER I, J, K, KASE, KC, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX ZDUM +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CLACON, CTPMV, CTPSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTPRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANSN = 'N' + TRANST = 'C' + ELSE + TRANSN = 'C' + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 250 J = 1, NRHS +* +* Compute residual R = B - op(A) * X, +* where op(A) = A, A**T, or A**H, depending on TRANS. +* + CALL CCOPY( N, X( 1, J ), 1, WORK, 1 ) + CALL CTPMV( UPLO, TRANS, DIAG, N, AP, WORK, 1 ) + CALL CAXPY( N, -ONE, B( 1, J ), 1, WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 20 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 20 CONTINUE +* + IF( NOTRAN ) THEN +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + KC = 1 + IF( NOUNIT ) THEN + DO 40 K = 1, N + XK = CABS1( X( K, J ) ) + DO 30 I = 1, K + RWORK( I ) = RWORK( I ) + + $ CABS1( AP( KC+I-1 ) )*XK + 30 CONTINUE + KC = KC + K + 40 CONTINUE + ELSE + DO 60 K = 1, N + XK = CABS1( X( K, J ) ) + DO 50 I = 1, K - 1 + RWORK( I ) = RWORK( I ) + + $ CABS1( AP( KC+I-1 ) )*XK + 50 CONTINUE + RWORK( K ) = RWORK( K ) + XK + KC = KC + K + 60 CONTINUE + END IF + ELSE + KC = 1 + IF( NOUNIT ) THEN + DO 80 K = 1, N + XK = CABS1( X( K, J ) ) + DO 70 I = K, N + RWORK( I ) = RWORK( I ) + + $ CABS1( AP( KC+I-K ) )*XK + 70 CONTINUE + KC = KC + N - K + 1 + 80 CONTINUE + ELSE + DO 100 K = 1, N + XK = CABS1( X( K, J ) ) + DO 90 I = K + 1, N + RWORK( I ) = RWORK( I ) + + $ CABS1( AP( KC+I-K ) )*XK + 90 CONTINUE + RWORK( K ) = RWORK( K ) + XK + KC = KC + N - K + 1 + 100 CONTINUE + END IF + END IF + ELSE +* +* Compute abs(A**H)*abs(X) + abs(B). +* + IF( UPPER ) THEN + KC = 1 + IF( NOUNIT ) THEN + DO 120 K = 1, N + S = ZERO + DO 110 I = 1, K + S = S + CABS1( AP( KC+I-1 ) )*CABS1( X( I, J ) ) + 110 CONTINUE + RWORK( K ) = RWORK( K ) + S + KC = KC + K + 120 CONTINUE + ELSE + DO 140 K = 1, N + S = CABS1( X( K, J ) ) + DO 130 I = 1, K - 1 + S = S + CABS1( AP( KC+I-1 ) )*CABS1( X( I, J ) ) + 130 CONTINUE + RWORK( K ) = RWORK( K ) + S + KC = KC + K + 140 CONTINUE + END IF + ELSE + KC = 1 + IF( NOUNIT ) THEN + DO 160 K = 1, N + S = ZERO + DO 150 I = K, N + S = S + CABS1( AP( KC+I-K ) )*CABS1( X( I, J ) ) + 150 CONTINUE + RWORK( K ) = RWORK( K ) + S + KC = KC + N - K + 1 + 160 CONTINUE + ELSE + DO 180 K = 1, N + S = CABS1( X( K, J ) ) + DO 170 I = K + 1, N + S = S + CABS1( AP( KC+I-K ) )*CABS1( X( I, J ) ) + 170 CONTINUE + RWORK( K ) = RWORK( K ) + S + KC = KC + N - K + 1 + 180 CONTINUE + END IF + END IF + END IF + S = ZERO + DO 190 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 190 CONTINUE + BERR( J ) = S +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use CLACON to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 200 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 200 CONTINUE +* + KASE = 0 + 210 CONTINUE + CALL CLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**H). +* + CALL CTPSV( UPLO, TRANST, DIAG, N, AP, WORK, 1 ) + DO 220 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 220 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 230 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 230 CONTINUE + CALL CTPSV( UPLO, TRANSN, DIAG, N, AP, WORK, 1 ) + END IF + GO TO 210 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 240 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 240 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 250 CONTINUE +* + RETURN +* +* End of CTPRFS +* + END diff --git a/costa/native/external/lapack/ctptri.f b/costa/native/external/lapack/ctptri.f new file mode 100644 index 000000000..baddb5f71 --- /dev/null +++ b/costa/native/external/lapack/ctptri.f @@ -0,0 +1,177 @@ + SUBROUTINE CTPTRI( UPLO, DIAG, N, AP, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER DIAG, UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + COMPLEX AP( * ) +* .. +* +* Purpose +* ======= +* +* CTPTRI computes the inverse of a complex upper or lower triangular +* matrix A stored in packed format. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) COMPLEX array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangular matrix A, stored +* columnwise in a linear array. The j-th column of A is stored +* in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*((2*n-j)/2) = A(i,j) for j<=i<=n. +* See below for further details. +* On exit, the (triangular) inverse of the original matrix, in +* the same packed storage format. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, A(i,i) is exactly zero. The triangular +* matrix is singular and its inverse can not be computed. +* +* Further Details +* =============== +* +* A triangular matrix A can be transferred to packed storage using one +* of the following program segments: +* +* UPLO = 'U': UPLO = 'L': +* +* JC = 1 JC = 1 +* DO 2 J = 1, N DO 2 J = 1, N +* DO 1 I = 1, J DO 1 I = J, N +* AP(JC+I-1) = A(I,J) AP(JC+I-J) = A(I,J) +* 1 CONTINUE 1 CONTINUE +* JC = JC + J JC = JC + N - J + 1 +* 2 CONTINUE 2 CONTINUE +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J, JC, JCLAST, JJ + COMPLEX AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CSCAL, CTPMV, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTPTRI', -INFO ) + RETURN + END IF +* +* Check for singularity if non-unit. +* + IF( NOUNIT ) THEN + IF( UPPER ) THEN + JJ = 0 + DO 10 INFO = 1, N + JJ = JJ + INFO + IF( AP( JJ ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE + JJ = 1 + DO 20 INFO = 1, N + IF( AP( JJ ).EQ.ZERO ) + $ RETURN + JJ = JJ + N - INFO + 1 + 20 CONTINUE + END IF + INFO = 0 + END IF +* + IF( UPPER ) THEN +* +* Compute inverse of upper triangular matrix. +* + JC = 1 + DO 30 J = 1, N + IF( NOUNIT ) THEN + AP( JC+J-1 ) = ONE / AP( JC+J-1 ) + AJJ = -AP( JC+J-1 ) + ELSE + AJJ = -ONE + END IF +* +* Compute elements 1:j-1 of j-th column. +* + CALL CTPMV( 'Upper', 'No transpose', DIAG, J-1, AP, + $ AP( JC ), 1 ) + CALL CSCAL( J-1, AJJ, AP( JC ), 1 ) + JC = JC + J + 30 CONTINUE +* + ELSE +* +* Compute inverse of lower triangular matrix. +* + JC = N*( N+1 ) / 2 + DO 40 J = N, 1, -1 + IF( NOUNIT ) THEN + AP( JC ) = ONE / AP( JC ) + AJJ = -AP( JC ) + ELSE + AJJ = -ONE + END IF + IF( J.LT.N ) THEN +* +* Compute elements j+1:n of j-th column. +* + CALL CTPMV( 'Lower', 'No transpose', DIAG, N-J, + $ AP( JCLAST ), AP( JC+1 ), 1 ) + CALL CSCAL( N-J, AJJ, AP( JC+1 ), 1 ) + END IF + JCLAST = JC + JC = JC - N + J - 2 + 40 CONTINUE + END IF +* + RETURN +* +* End of CTPTRI +* + END diff --git a/costa/native/external/lapack/ctptrs.f b/costa/native/external/lapack/ctptrs.f new file mode 100644 index 000000000..c3e481bd2 --- /dev/null +++ b/costa/native/external/lapack/ctptrs.f @@ -0,0 +1,154 @@ + SUBROUTINE CTPTRS( UPLO, TRANS, DIAG, N, NRHS, AP, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX AP( * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* CTPTRS solves a triangular system of the form +* +* A * X = B, A**T * X = B, or A**H * X = B, +* +* where A is a triangular matrix of order N stored in packed format, +* and B is an N-by-NRHS matrix. A check is made to verify that A is +* nonsingular. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose) +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AP (input) COMPLEX array, dimension (N*(N+1)/2) +* The upper or lower triangular matrix A, packed columnwise in +* a linear array. The j-th column of A is stored in the array +* AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, if INFO = 0, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the i-th diagonal element of A is zero, +* indicating that the matrix is singular and the +* solutions X have not been computed. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J, JC +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CTPSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT. + $ LSAME( TRANS, 'T' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTPTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check for singularity. +* + IF( NOUNIT ) THEN + IF( UPPER ) THEN + JC = 1 + DO 10 INFO = 1, N + IF( AP( JC+INFO-1 ).EQ.ZERO ) + $ RETURN + JC = JC + INFO + 10 CONTINUE + ELSE + JC = 1 + DO 20 INFO = 1, N + IF( AP( JC ).EQ.ZERO ) + $ RETURN + JC = JC + N - INFO + 1 + 20 CONTINUE + END IF + END IF + INFO = 0 +* +* Solve A * x = b, A**T * x = b, or A**H * x = b. +* + DO 30 J = 1, NRHS + CALL CTPSV( UPLO, TRANS, DIAG, N, AP, B( 1, J ), 1 ) + 30 CONTINUE +* + RETURN +* +* End of CTPTRS +* + END diff --git a/costa/native/external/lapack/ctrcon.f b/costa/native/external/lapack/ctrcon.f new file mode 100644 index 000000000..21f9fa3dd --- /dev/null +++ b/costa/native/external/lapack/ctrcon.f @@ -0,0 +1,200 @@ + SUBROUTINE CTRCON( NORM, UPLO, DIAG, N, A, LDA, RCOND, WORK, + $ RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER INFO, LDA, N + REAL RCOND +* .. +* .. Array Arguments .. + REAL RWORK( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CTRCON estimates the reciprocal of the condition number of a +* triangular matrix A, in either the 1-norm or the infinity-norm. +* +* The norm of A is computed and an estimate is obtained for +* norm(inv(A)), then the reciprocal of the condition number is +* computed as +* RCOND = 1 / ( norm(A) * norm(inv(A)) ). +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies whether the 1-norm condition number or the +* infinity-norm condition number is required: +* = '1' or 'O': 1-norm; +* = 'I': Infinity-norm. +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input) COMPLEX array, dimension (LDA,N) +* The triangular matrix A. If UPLO = 'U', the leading N-by-N +* upper triangular part of the array A contains the upper +* triangular matrix, and the strictly lower triangular part of +* A is not referenced. If UPLO = 'L', the leading N-by-N lower +* triangular part of the array A contains the lower triangular +* matrix, and the strictly upper triangular part of A is not +* referenced. If DIAG = 'U', the diagonal elements of A are +* also not referenced and are assumed to be 1. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* RCOND (output) REAL +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(norm(A) * norm(inv(A))). +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* RWORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, ONENRM, UPPER + CHARACTER NORMIN + INTEGER IX, KASE, KASE1 + REAL AINVNM, ANORM, SCALE, SMLNUM, XNORM + COMPLEX ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + REAL CLANTR, SLAMCH + EXTERNAL LSAME, ICAMAX, CLANTR, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CLACON, CLATRS, CSRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTRCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + END IF +* + RCOND = ZERO + SMLNUM = SLAMCH( 'Safe minimum' )*REAL( MAX( 1, N ) ) +* +* Compute the norm of the triangular matrix A. +* + ANORM = CLANTR( NORM, UPLO, DIAG, N, N, A, LDA, RWORK ) +* +* Continue only if ANORM > 0. +* + IF( ANORM.GT.ZERO ) THEN +* +* Estimate the norm of the inverse of A. +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 10 CONTINUE + CALL CLACON( N, WORK( N+1 ), WORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(A). +* + CALL CLATRS( UPLO, 'No transpose', DIAG, NORMIN, N, A, + $ LDA, WORK, SCALE, RWORK, INFO ) + ELSE +* +* Multiply by inv(A'). +* + CALL CLATRS( UPLO, 'Conjugate transpose', DIAG, NORMIN, + $ N, A, LDA, WORK, SCALE, RWORK, INFO ) + END IF + NORMIN = 'Y' +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + IF( SCALE.NE.ONE ) THEN + IX = ICAMAX( N, WORK, 1 ) + XNORM = CABS1( WORK( IX ) ) + IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL CSRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / ANORM ) / AINVNM + END IF +* + 20 CONTINUE + RETURN +* +* End of CTRCON +* + END diff --git a/costa/native/external/lapack/ctrevc.f b/costa/native/external/lapack/ctrevc.f new file mode 100644 index 000000000..b352c4003 --- /dev/null +++ b/costa/native/external/lapack/ctrevc.f @@ -0,0 +1,390 @@ + SUBROUTINE CTREVC( SIDE, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR, + $ LDVR, MM, M, WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER HOWMNY, SIDE + INTEGER INFO, LDT, LDVL, LDVR, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + REAL RWORK( * ) + COMPLEX T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* CTREVC computes some or all of the right and/or left eigenvectors of +* a complex upper triangular matrix T. +* +* The right eigenvector x and the left eigenvector y of T corresponding +* to an eigenvalue w are defined by: +* +* T*x = w*x, y'*T = w*y' +* +* where y' denotes the conjugate transpose of the vector y. +* +* If all eigenvectors are requested, the routine may either return the +* matrices X and/or Y of right or left eigenvectors of T, or the +* products Q*X and/or Q*Y, where Q is an input unitary +* matrix. If T was obtained from the Schur factorization of an +* original matrix A = Q*T*Q', then Q*X and Q*Y are the matrices of +* right or left eigenvectors of A. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'R': compute right eigenvectors only; +* = 'L': compute left eigenvectors only; +* = 'B': compute both right and left eigenvectors. +* +* HOWMNY (input) CHARACTER*1 +* = 'A': compute all right and/or left eigenvectors; +* = 'B': compute all right and/or left eigenvectors, +* and backtransform them using the input matrices +* supplied in VR and/or VL; +* = 'S': compute selected right and/or left eigenvectors, +* specified by the logical array SELECT. +* +* SELECT (input) LOGICAL array, dimension (N) +* If HOWMNY = 'S', SELECT specifies the eigenvectors to be +* computed. +* If HOWMNY = 'A' or 'B', SELECT is not referenced. +* To select the eigenvector corresponding to the j-th +* eigenvalue, SELECT(j) must be set to .TRUE.. +* +* N (input) INTEGER +* The order of the matrix T. N >= 0. +* +* T (input/output) COMPLEX array, dimension (LDT,N) +* The upper triangular matrix T. T is modified, but restored +* on exit. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= max(1,N). +* +* VL (input/output) COMPLEX array, dimension (LDVL,MM) +* On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must +* contain an N-by-N matrix Q (usually the unitary matrix Q of +* Schur vectors returned by CHSEQR). +* On exit, if SIDE = 'L' or 'B', VL contains: +* if HOWMNY = 'A', the matrix Y of left eigenvectors of T; +* VL is lower triangular. The i-th column +* VL(i) of VL is the eigenvector corresponding +* to T(i,i). +* if HOWMNY = 'B', the matrix Q*Y; +* if HOWMNY = 'S', the left eigenvectors of T specified by +* SELECT, stored consecutively in the columns +* of VL, in the same order as their +* eigenvalues. +* If SIDE = 'R', VL is not referenced. +* +* LDVL (input) INTEGER +* The leading dimension of the array VL. LDVL >= max(1,N) if +* SIDE = 'L' or 'B'; LDVL >= 1 otherwise. +* +* VR (input/output) COMPLEX array, dimension (LDVR,MM) +* On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must +* contain an N-by-N matrix Q (usually the unitary matrix Q of +* Schur vectors returned by CHSEQR). +* On exit, if SIDE = 'R' or 'B', VR contains: +* if HOWMNY = 'A', the matrix X of right eigenvectors of T; +* VR is upper triangular. The i-th column +* VR(i) of VR is the eigenvector corresponding +* to T(i,i). +* if HOWMNY = 'B', the matrix Q*X; +* if HOWMNY = 'S', the right eigenvectors of T specified by +* SELECT, stored consecutively in the columns +* of VR, in the same order as their +* eigenvalues. +* If SIDE = 'L', VR is not referenced. +* +* LDVR (input) INTEGER +* The leading dimension of the array VR. LDVR >= max(1,N) if +* SIDE = 'R' or 'B'; LDVR >= 1 otherwise. +* +* MM (input) INTEGER +* The number of columns in the arrays VL and/or VR. MM >= M. +* +* M (output) INTEGER +* The number of columns in the arrays VL and/or VR actually +* used to store the eigenvectors. If HOWMNY = 'A' or 'B', M +* is set to N. Each selected eigenvector occupies one +* column. +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* RWORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The algorithm used in this program is basically backward (forward) +* substitution, with scaling to make the the code robust against +* possible overflow. +* +* Each eigenvector is normalized so that the element of largest +* magnitude has magnitude 1; here the magnitude of a complex number +* (x,y) is taken to be |x| + |y|. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + COMPLEX CMZERO, CMONE + PARAMETER ( CMZERO = ( 0.0E+0, 0.0E+0 ), + $ CMONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ALLV, BOTHV, LEFTV, OVER, RIGHTV, SOMEV + INTEGER I, II, IS, J, K, KI + REAL OVFL, REMAX, SCALE, SMIN, SMLNUM, ULP, UNFL + COMPLEX CDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + REAL SCASUM, SLAMCH + EXTERNAL LSAME, ICAMAX, SCASUM, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGEMV, CLATRS, CSSCAL, SLABAD, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, CONJG, MAX, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + BOTHV = LSAME( SIDE, 'B' ) + RIGHTV = LSAME( SIDE, 'R' ) .OR. BOTHV + LEFTV = LSAME( SIDE, 'L' ) .OR. BOTHV +* + ALLV = LSAME( HOWMNY, 'A' ) + OVER = LSAME( HOWMNY, 'B' ) + SOMEV = LSAME( HOWMNY, 'S' ) +* +* Set M to the number of columns required to store the selected +* eigenvectors. +* + IF( SOMEV ) THEN + M = 0 + DO 10 J = 1, N + IF( SELECT( J ) ) + $ M = M + 1 + 10 CONTINUE + ELSE + M = N + END IF +* + INFO = 0 + IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -1 + ELSE IF( .NOT.ALLV .AND. .NOT.OVER .AND. .NOT.SOMEV ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDVL.LT.1 .OR. ( LEFTV .AND. LDVL.LT.N ) ) THEN + INFO = -8 + ELSE IF( LDVR.LT.1 .OR. ( RIGHTV .AND. LDVR.LT.N ) ) THEN + INFO = -10 + ELSE IF( MM.LT.M ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTREVC', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* +* Set the constants to control overflow. +* + UNFL = SLAMCH( 'Safe minimum' ) + OVFL = ONE / UNFL + CALL SLABAD( UNFL, OVFL ) + ULP = SLAMCH( 'Precision' ) + SMLNUM = UNFL*( N / ULP ) +* +* Store the diagonal elements of T in working array WORK. +* + DO 20 I = 1, N + WORK( I+N ) = T( I, I ) + 20 CONTINUE +* +* Compute 1-norm of each column of strictly upper triangular +* part of T to control overflow in triangular solver. +* + RWORK( 1 ) = ZERO + DO 30 J = 2, N + RWORK( J ) = SCASUM( J-1, T( 1, J ), 1 ) + 30 CONTINUE +* + IF( RIGHTV ) THEN +* +* Compute right eigenvectors. +* + IS = M + DO 80 KI = N, 1, -1 +* + IF( SOMEV ) THEN + IF( .NOT.SELECT( KI ) ) + $ GO TO 80 + END IF + SMIN = MAX( ULP*( CABS1( T( KI, KI ) ) ), SMLNUM ) +* + WORK( 1 ) = CMONE +* +* Form right-hand side. +* + DO 40 K = 1, KI - 1 + WORK( K ) = -T( K, KI ) + 40 CONTINUE +* +* Solve the triangular system: +* (T(1:KI-1,1:KI-1) - T(KI,KI))*X = SCALE*WORK. +* + DO 50 K = 1, KI - 1 + T( K, K ) = T( K, K ) - T( KI, KI ) + IF( CABS1( T( K, K ) ).LT.SMIN ) + $ T( K, K ) = SMIN + 50 CONTINUE +* + IF( KI.GT.1 ) THEN + CALL CLATRS( 'Upper', 'No transpose', 'Non-unit', 'Y', + $ KI-1, T, LDT, WORK( 1 ), SCALE, RWORK, + $ INFO ) + WORK( KI ) = SCALE + END IF +* +* Copy the vector x or Q*x to VR and normalize. +* + IF( .NOT.OVER ) THEN + CALL CCOPY( KI, WORK( 1 ), 1, VR( 1, IS ), 1 ) +* + II = ICAMAX( KI, VR( 1, IS ), 1 ) + REMAX = ONE / CABS1( VR( II, IS ) ) + CALL CSSCAL( KI, REMAX, VR( 1, IS ), 1 ) +* + DO 60 K = KI + 1, N + VR( K, IS ) = CMZERO + 60 CONTINUE + ELSE + IF( KI.GT.1 ) + $ CALL CGEMV( 'N', N, KI-1, CMONE, VR, LDVR, WORK( 1 ), + $ 1, CMPLX( SCALE ), VR( 1, KI ), 1 ) +* + II = ICAMAX( N, VR( 1, KI ), 1 ) + REMAX = ONE / CABS1( VR( II, KI ) ) + CALL CSSCAL( N, REMAX, VR( 1, KI ), 1 ) + END IF +* +* Set back the original diagonal elements of T. +* + DO 70 K = 1, KI - 1 + T( K, K ) = WORK( K+N ) + 70 CONTINUE +* + IS = IS - 1 + 80 CONTINUE + END IF +* + IF( LEFTV ) THEN +* +* Compute left eigenvectors. +* + IS = 1 + DO 130 KI = 1, N +* + IF( SOMEV ) THEN + IF( .NOT.SELECT( KI ) ) + $ GO TO 130 + END IF + SMIN = MAX( ULP*( CABS1( T( KI, KI ) ) ), SMLNUM ) +* + WORK( N ) = CMONE +* +* Form right-hand side. +* + DO 90 K = KI + 1, N + WORK( K ) = -CONJG( T( KI, K ) ) + 90 CONTINUE +* +* Solve the triangular system: +* (T(KI+1:N,KI+1:N) - T(KI,KI))'*X = SCALE*WORK. +* + DO 100 K = KI + 1, N + T( K, K ) = T( K, K ) - T( KI, KI ) + IF( CABS1( T( K, K ) ).LT.SMIN ) + $ T( K, K ) = SMIN + 100 CONTINUE +* + IF( KI.LT.N ) THEN + CALL CLATRS( 'Upper', 'Conjugate transpose', 'Non-unit', + $ 'Y', N-KI, T( KI+1, KI+1 ), LDT, + $ WORK( KI+1 ), SCALE, RWORK, INFO ) + WORK( KI ) = SCALE + END IF +* +* Copy the vector x or Q*x to VL and normalize. +* + IF( .NOT.OVER ) THEN + CALL CCOPY( N-KI+1, WORK( KI ), 1, VL( KI, IS ), 1 ) +* + II = ICAMAX( N-KI+1, VL( KI, IS ), 1 ) + KI - 1 + REMAX = ONE / CABS1( VL( II, IS ) ) + CALL CSSCAL( N-KI+1, REMAX, VL( KI, IS ), 1 ) +* + DO 110 K = 1, KI - 1 + VL( K, IS ) = CMZERO + 110 CONTINUE + ELSE + IF( KI.LT.N ) + $ CALL CGEMV( 'N', N, N-KI, CMONE, VL( 1, KI+1 ), LDVL, + $ WORK( KI+1 ), 1, CMPLX( SCALE ), + $ VL( 1, KI ), 1 ) +* + II = ICAMAX( N, VL( 1, KI ), 1 ) + REMAX = ONE / CABS1( VL( II, KI ) ) + CALL CSSCAL( N, REMAX, VL( 1, KI ), 1 ) + END IF +* +* Set back the original diagonal elements of T. +* + DO 120 K = KI + 1, N + T( K, K ) = WORK( K+N ) + 120 CONTINUE +* + IS = IS + 1 + 130 CONTINUE + END IF +* + RETURN +* +* End of CTREVC +* + END diff --git a/costa/native/external/lapack/ctrexc.f b/costa/native/external/lapack/ctrexc.f new file mode 100644 index 000000000..a9de47b69 --- /dev/null +++ b/costa/native/external/lapack/ctrexc.f @@ -0,0 +1,162 @@ + SUBROUTINE CTREXC( COMPQ, N, T, LDT, Q, LDQ, IFST, ILST, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER COMPQ + INTEGER IFST, ILST, INFO, LDQ, LDT, N +* .. +* .. Array Arguments .. + COMPLEX Q( LDQ, * ), T( LDT, * ) +* .. +* +* Purpose +* ======= +* +* CTREXC reorders the Schur factorization of a complex matrix +* A = Q*T*Q**H, so that the diagonal element of T with row index IFST +* is moved to row ILST. +* +* The Schur form T is reordered by a unitary similarity transformation +* Z**H*T*Z, and optionally the matrix Q of Schur vectors is updated by +* postmultplying it with Z. +* +* Arguments +* ========= +* +* COMPQ (input) CHARACTER*1 +* = 'V': update the matrix Q of Schur vectors; +* = 'N': do not update Q. +* +* N (input) INTEGER +* The order of the matrix T. N >= 0. +* +* T (input/output) COMPLEX array, dimension (LDT,N) +* On entry, the upper triangular matrix T. +* On exit, the reordered upper triangular matrix. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= max(1,N). +* +* Q (input/output) COMPLEX array, dimension (LDQ,N) +* On entry, if COMPQ = 'V', the matrix Q of Schur vectors. +* On exit, if COMPQ = 'V', Q has been postmultiplied by the +* unitary transformation matrix Z which reorders T. +* If COMPQ = 'N', Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= max(1,N). +* +* IFST (input) INTEGER +* ILST (input) INTEGER +* Specify the reordering of the diagonal elements of T: +* The element with row index IFST is moved to row ILST by a +* sequence of transpositions between adjacent elements. +* 1 <= IFST <= N; 1 <= ILST <= N. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL WANTQ + INTEGER K, M1, M2, M3 + REAL CS + COMPLEX SN, T11, T22, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLARTG, CROT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters. +* + INFO = 0 + WANTQ = LSAME( COMPQ, 'V' ) + IF( .NOT.LSAME( COMPQ, 'N' ) .AND. .NOT.WANTQ ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.MAX( 1, N ) ) ) THEN + INFO = -6 + ELSE IF( IFST.LT.1 .OR. IFST.GT.N ) THEN + INFO = -7 + ELSE IF( ILST.LT.1 .OR. ILST.GT.N ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTREXC', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.1 .OR. IFST.EQ.ILST ) + $ RETURN +* + IF( IFST.LT.ILST ) THEN +* +* Move the IFST-th diagonal element forward down the diagonal. +* + M1 = 0 + M2 = -1 + M3 = 1 + ELSE +* +* Move the IFST-th diagonal element backward up the diagonal. +* + M1 = -1 + M2 = 0 + M3 = -1 + END IF +* + DO 10 K = IFST + M1, ILST + M2, M3 +* +* Interchange the k-th and (k+1)-th diagonal elements. +* + T11 = T( K, K ) + T22 = T( K+1, K+1 ) +* +* Determine the transformation to perform the interchange. +* + CALL CLARTG( T( K, K+1 ), T22-T11, CS, SN, TEMP ) +* +* Apply transformation to the matrix T. +* + IF( K+2.LE.N ) + $ CALL CROT( N-K-1, T( K, K+2 ), LDT, T( K+1, K+2 ), LDT, CS, + $ SN ) + CALL CROT( K-1, T( 1, K ), 1, T( 1, K+1 ), 1, CS, CONJG( SN ) ) +* + T( K, K ) = T22 + T( K+1, K+1 ) = T11 +* + IF( WANTQ ) THEN +* +* Accumulate transformation in the matrix Q. +* + CALL CROT( N, Q( 1, K ), 1, Q( 1, K+1 ), 1, CS, + $ CONJG( SN ) ) + END IF +* + 10 CONTINUE +* + RETURN +* +* End of CTREXC +* + END diff --git a/costa/native/external/lapack/ctrrfs.f b/costa/native/external/lapack/ctrrfs.f new file mode 100644 index 000000000..c9e84c3b3 --- /dev/null +++ b/costa/native/external/lapack/ctrrfs.f @@ -0,0 +1,378 @@ + SUBROUTINE CTRRFS( UPLO, TRANS, DIAG, N, NRHS, A, LDA, B, LDB, X, + $ LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, LDA, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + REAL BERR( * ), FERR( * ), RWORK( * ) + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* CTRRFS provides error bounds and backward error estimates for the +* solution to a system of linear equations with a triangular +* coefficient matrix. +* +* The solution matrix X must be computed by CTRTRS or some other +* means before entering this routine. CTRRFS does not do iterative +* refinement because doing so cannot improve the backward error. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose) +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input) COMPLEX array, dimension (LDA,N) +* The triangular matrix A. If UPLO = 'U', the leading N-by-N +* upper triangular part of the array A contains the upper +* triangular matrix, and the strictly lower triangular part of +* A is not referenced. If UPLO = 'L', the leading N-by-N lower +* triangular part of the array A contains the lower triangular +* matrix, and the strictly upper triangular part of A is not +* referenced. If DIAG = 'U', the diagonal elements of A are +* also not referenced and are assumed to be 1. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input) COMPLEX array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input) COMPLEX array, dimension (LDX,NRHS) +* The solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX array, dimension (2*N) +* +* RWORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + CHARACTER TRANSN, TRANST + INTEGER I, J, K, KASE, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX ZDUM +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CLACON, CTRMV, CTRSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTRRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANSN = 'N' + TRANST = 'C' + ELSE + TRANSN = 'C' + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 250 J = 1, NRHS +* +* Compute residual R = B - op(A) * X, +* where op(A) = A, A**T, or A**H, depending on TRANS. +* + CALL CCOPY( N, X( 1, J ), 1, WORK, 1 ) + CALL CTRMV( UPLO, TRANS, DIAG, N, A, LDA, WORK, 1 ) + CALL CAXPY( N, -ONE, B( 1, J ), 1, WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 20 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 20 CONTINUE +* + IF( NOTRAN ) THEN +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + IF( NOUNIT ) THEN + DO 40 K = 1, N + XK = CABS1( X( K, J ) ) + DO 30 I = 1, K + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + 30 CONTINUE + 40 CONTINUE + ELSE + DO 60 K = 1, N + XK = CABS1( X( K, J ) ) + DO 50 I = 1, K - 1 + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + 50 CONTINUE + RWORK( K ) = RWORK( K ) + XK + 60 CONTINUE + END IF + ELSE + IF( NOUNIT ) THEN + DO 80 K = 1, N + XK = CABS1( X( K, J ) ) + DO 70 I = K, N + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + 70 CONTINUE + 80 CONTINUE + ELSE + DO 100 K = 1, N + XK = CABS1( X( K, J ) ) + DO 90 I = K + 1, N + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + 90 CONTINUE + RWORK( K ) = RWORK( K ) + XK + 100 CONTINUE + END IF + END IF + ELSE +* +* Compute abs(A**H)*abs(X) + abs(B). +* + IF( UPPER ) THEN + IF( NOUNIT ) THEN + DO 120 K = 1, N + S = ZERO + DO 110 I = 1, K + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 110 CONTINUE + RWORK( K ) = RWORK( K ) + S + 120 CONTINUE + ELSE + DO 140 K = 1, N + S = CABS1( X( K, J ) ) + DO 130 I = 1, K - 1 + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 130 CONTINUE + RWORK( K ) = RWORK( K ) + S + 140 CONTINUE + END IF + ELSE + IF( NOUNIT ) THEN + DO 160 K = 1, N + S = ZERO + DO 150 I = K, N + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 150 CONTINUE + RWORK( K ) = RWORK( K ) + S + 160 CONTINUE + ELSE + DO 180 K = 1, N + S = CABS1( X( K, J ) ) + DO 170 I = K + 1, N + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 170 CONTINUE + RWORK( K ) = RWORK( K ) + S + 180 CONTINUE + END IF + END IF + END IF + S = ZERO + DO 190 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 190 CONTINUE + BERR( J ) = S +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use CLACON to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 200 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 200 CONTINUE +* + KASE = 0 + 210 CONTINUE + CALL CLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**H). +* + CALL CTRSV( UPLO, TRANST, DIAG, N, A, LDA, WORK, 1 ) + DO 220 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 220 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 230 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 230 CONTINUE + CALL CTRSV( UPLO, TRANSN, DIAG, N, A, LDA, WORK, 1 ) + END IF + GO TO 210 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 240 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 240 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 250 CONTINUE +* + RETURN +* +* End of CTRRFS +* + END diff --git a/costa/native/external/lapack/ctrsen.f b/costa/native/external/lapack/ctrsen.f new file mode 100644 index 000000000..9b622d687 --- /dev/null +++ b/costa/native/external/lapack/ctrsen.f @@ -0,0 +1,358 @@ + SUBROUTINE CTRSEN( JOB, COMPQ, SELECT, N, T, LDT, Q, LDQ, W, M, S, + $ SEP, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER COMPQ, JOB + INTEGER INFO, LDQ, LDT, LWORK, M, N + REAL S, SEP +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + COMPLEX Q( LDQ, * ), T( LDT, * ), W( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CTRSEN reorders the Schur factorization of a complex matrix +* A = Q*T*Q**H, so that a selected cluster of eigenvalues appears in +* the leading positions on the diagonal of the upper triangular matrix +* T, and the leading columns of Q form an orthonormal basis of the +* corresponding right invariant subspace. +* +* Optionally the routine computes the reciprocal condition numbers of +* the cluster of eigenvalues and/or the invariant subspace. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* Specifies whether condition numbers are required for the +* cluster of eigenvalues (S) or the invariant subspace (SEP): +* = 'N': none; +* = 'E': for eigenvalues only (S); +* = 'V': for invariant subspace only (SEP); +* = 'B': for both eigenvalues and invariant subspace (S and +* SEP). +* +* COMPQ (input) CHARACTER*1 +* = 'V': update the matrix Q of Schur vectors; +* = 'N': do not update Q. +* +* SELECT (input) LOGICAL array, dimension (N) +* SELECT specifies the eigenvalues in the selected cluster. To +* select the j-th eigenvalue, SELECT(j) must be set to .TRUE.. +* +* N (input) INTEGER +* The order of the matrix T. N >= 0. +* +* T (input/output) COMPLEX array, dimension (LDT,N) +* On entry, the upper triangular matrix T. +* On exit, T is overwritten by the reordered matrix T, with the +* selected eigenvalues as the leading diagonal elements. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= max(1,N). +* +* Q (input/output) COMPLEX array, dimension (LDQ,N) +* On entry, if COMPQ = 'V', the matrix Q of Schur vectors. +* On exit, if COMPQ = 'V', Q has been postmultiplied by the +* unitary transformation matrix which reorders T; the leading M +* columns of Q form an orthonormal basis for the specified +* invariant subspace. +* If COMPQ = 'N', Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. +* LDQ >= 1; and if COMPQ = 'V', LDQ >= N. +* +* W (output) COMPLEX array, dimension (N) +* The reordered eigenvalues of T, in the same order as they +* appear on the diagonal of T. +* +* M (output) INTEGER +* The dimension of the specified invariant subspace. +* 0 <= M <= N. +* +* S (output) REAL +* If JOB = 'E' or 'B', S is a lower bound on the reciprocal +* condition number for the selected cluster of eigenvalues. +* S cannot underestimate the true reciprocal condition number +* by more than a factor of sqrt(N). If M = 0 or N, S = 1. +* If JOB = 'N' or 'V', S is not referenced. +* +* SEP (output) REAL +* If JOB = 'V' or 'B', SEP is the estimated reciprocal +* condition number of the specified invariant subspace. If +* M = 0 or N, SEP = norm(T). +* If JOB = 'N' or 'E', SEP is not referenced. +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* If JOB = 'N', WORK is not referenced. Otherwise, +* on exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If JOB = 'N', LWORK >= 1; +* if JOB = 'E', LWORK = M*(N-M); +* if JOB = 'V' or 'B', LWORK >= 2*M*(N-M). +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* CTRSEN first collects the selected eigenvalues by computing a unitary +* transformation Z to move them to the top left corner of T. In other +* words, the selected eigenvalues are the eigenvalues of T11 in: +* +* Z'*T*Z = ( T11 T12 ) n1 +* ( 0 T22 ) n2 +* n1 n2 +* +* where N = n1+n2 and Z' means the conjugate transpose of Z. The first +* n1 columns of Z span the specified invariant subspace of T. +* +* If T has been obtained from the Schur factorization of a matrix +* A = Q*T*Q', then the reordered Schur factorization of A is given by +* A = (Q*Z)*(Z'*T*Z)*(Q*Z)', and the first n1 columns of Q*Z span the +* corresponding invariant subspace of A. +* +* The reciprocal condition number of the average of the eigenvalues of +* T11 may be returned in S. S lies between 0 (very badly conditioned) +* and 1 (very well conditioned). It is computed as follows. First we +* compute R so that +* +* P = ( I R ) n1 +* ( 0 0 ) n2 +* n1 n2 +* +* is the projector on the invariant subspace associated with T11. +* R is the solution of the Sylvester equation: +* +* T11*R - R*T22 = T12. +* +* Let F-norm(M) denote the Frobenius-norm of M and 2-norm(M) denote +* the two-norm of M. Then S is computed as the lower bound +* +* (1 + F-norm(R)**2)**(-1/2) +* +* on the reciprocal of 2-norm(P), the true reciprocal condition number. +* S cannot underestimate 1 / 2-norm(P) by more than a factor of +* sqrt(N). +* +* An approximate error bound for the computed average of the +* eigenvalues of T11 is +* +* EPS * norm(T) / S +* +* where EPS is the machine precision. +* +* The reciprocal condition number of the right invariant subspace +* spanned by the first n1 columns of Z (or of Q*Z) is returned in SEP. +* SEP is defined as the separation of T11 and T22: +* +* sep( T11, T22 ) = sigma-min( C ) +* +* where sigma-min(C) is the smallest singular value of the +* n1*n2-by-n1*n2 matrix +* +* C = kprod( I(n2), T11 ) - kprod( transpose(T22), I(n1) ) +* +* I(m) is an m by m identity matrix, and kprod denotes the Kronecker +* product. We estimate sigma-min(C) by the reciprocal of an estimate of +* the 1-norm of inverse(C). The true reciprocal 1-norm of inverse(C) +* cannot differ from sigma-min(C) by more than a factor of sqrt(n1*n2). +* +* When SEP is small, small changes in T can cause large changes in +* the invariant subspace. An approximate bound on the maximum angular +* error in the computed right invariant subspace is +* +* EPS * norm(T) / SEP +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WANTBH, WANTQ, WANTS, WANTSP + INTEGER IERR, K, KASE, KS, LWMIN, N1, N2, NN + REAL EST, RNORM, SCALE +* .. +* .. Local Arrays .. + REAL RWORK( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANGE + EXTERNAL LSAME, CLANGE +* .. +* .. External Subroutines .. + EXTERNAL CLACON, CLACPY, CTREXC, CTRSYL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters. +* + WANTBH = LSAME( JOB, 'B' ) + WANTS = LSAME( JOB, 'E' ) .OR. WANTBH + WANTSP = LSAME( JOB, 'V' ) .OR. WANTBH + WANTQ = LSAME( COMPQ, 'V' ) +* +* Set M to the number of selected eigenvalues. +* + M = 0 + DO 10 K = 1, N + IF( SELECT( K ) ) + $ M = M + 1 + 10 CONTINUE +* + N1 = M + N2 = N - M + NN = N1*N2 +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) +* + IF( WANTSP ) THEN + LWMIN = MAX( 1, 2*NN ) + ELSE IF( LSAME( JOB, 'N' ) ) THEN + LWMIN = 1 + ELSE IF( LSAME( JOB, 'E' ) ) THEN + LWMIN = MAX( 1, NN ) + END IF +* + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.WANTS .AND. .NOT.WANTSP ) + $ THEN + INFO = -1 + ELSE IF( .NOT.LSAME( COMPQ, 'N' ) .AND. .NOT.WANTQ ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -14 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTRSEN', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.N .OR. M.EQ.0 ) THEN + IF( WANTS ) + $ S = ONE + IF( WANTSP ) + $ SEP = CLANGE( '1', N, N, T, LDT, RWORK ) + GO TO 40 + END IF +* +* Collect the selected eigenvalues at the top left corner of T. +* + KS = 0 + DO 20 K = 1, N + IF( SELECT( K ) ) THEN + KS = KS + 1 +* +* Swap the K-th eigenvalue to position KS. +* + IF( K.NE.KS ) + $ CALL CTREXC( COMPQ, N, T, LDT, Q, LDQ, K, KS, IERR ) + END IF + 20 CONTINUE +* + IF( WANTS ) THEN +* +* Solve the Sylvester equation for R: +* +* T11*R - R*T22 = scale*T12 +* + CALL CLACPY( 'F', N1, N2, T( 1, N1+1 ), LDT, WORK, N1 ) + CALL CTRSYL( 'N', 'N', -1, N1, N2, T, LDT, T( N1+1, N1+1 ), + $ LDT, WORK, N1, SCALE, IERR ) +* +* Estimate the reciprocal of the condition number of the cluster +* of eigenvalues. +* + RNORM = CLANGE( 'F', N1, N2, WORK, N1, RWORK ) + IF( RNORM.EQ.ZERO ) THEN + S = ONE + ELSE + S = SCALE / ( SQRT( SCALE*SCALE / RNORM+RNORM )* + $ SQRT( RNORM ) ) + END IF + END IF +* + IF( WANTSP ) THEN +* +* Estimate sep(T11,T22). +* + EST = ZERO + KASE = 0 + 30 CONTINUE + CALL CLACON( NN, WORK( NN+1 ), WORK, EST, KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Solve T11*R - R*T22 = scale*X. +* + CALL CTRSYL( 'N', 'N', -1, N1, N2, T, LDT, + $ T( N1+1, N1+1 ), LDT, WORK, N1, SCALE, + $ IERR ) + ELSE +* +* Solve T11'*R - R*T22' = scale*X. +* + CALL CTRSYL( 'C', 'C', -1, N1, N2, T, LDT, + $ T( N1+1, N1+1 ), LDT, WORK, N1, SCALE, + $ IERR ) + END IF + GO TO 30 + END IF +* + SEP = SCALE / EST + END IF +* + 40 CONTINUE +* +* Copy reordered eigenvalues to W. +* + DO 50 K = 1, N + W( K ) = T( K, K ) + 50 CONTINUE +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of CTRSEN +* + END diff --git a/costa/native/external/lapack/ctrsna.f b/costa/native/external/lapack/ctrsna.f new file mode 100644 index 000000000..c4b3079ab --- /dev/null +++ b/costa/native/external/lapack/ctrsna.f @@ -0,0 +1,354 @@ + SUBROUTINE CTRSNA( JOB, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR, + $ LDVR, S, SEP, MM, M, WORK, LDWORK, RWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER HOWMNY, JOB + INTEGER INFO, LDT, LDVL, LDVR, LDWORK, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + REAL RWORK( * ), S( * ), SEP( * ) + COMPLEX T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), + $ WORK( LDWORK, * ) +* .. +* +* Purpose +* ======= +* +* CTRSNA estimates reciprocal condition numbers for specified +* eigenvalues and/or right eigenvectors of a complex upper triangular +* matrix T (or of any matrix Q*T*Q**H with Q unitary). +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* Specifies whether condition numbers are required for +* eigenvalues (S) or eigenvectors (SEP): +* = 'E': for eigenvalues only (S); +* = 'V': for eigenvectors only (SEP); +* = 'B': for both eigenvalues and eigenvectors (S and SEP). +* +* HOWMNY (input) CHARACTER*1 +* = 'A': compute condition numbers for all eigenpairs; +* = 'S': compute condition numbers for selected eigenpairs +* specified by the array SELECT. +* +* SELECT (input) LOGICAL array, dimension (N) +* If HOWMNY = 'S', SELECT specifies the eigenpairs for which +* condition numbers are required. To select condition numbers +* for the j-th eigenpair, SELECT(j) must be set to .TRUE.. +* If HOWMNY = 'A', SELECT is not referenced. +* +* N (input) INTEGER +* The order of the matrix T. N >= 0. +* +* T (input) COMPLEX array, dimension (LDT,N) +* The upper triangular matrix T. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= max(1,N). +* +* VL (input) COMPLEX array, dimension (LDVL,M) +* If JOB = 'E' or 'B', VL must contain left eigenvectors of T +* (or of any Q*T*Q**H with Q unitary), corresponding to the +* eigenpairs specified by HOWMNY and SELECT. The eigenvectors +* must be stored in consecutive columns of VL, as returned by +* CHSEIN or CTREVC. +* If JOB = 'V', VL is not referenced. +* +* LDVL (input) INTEGER +* The leading dimension of the array VL. +* LDVL >= 1; and if JOB = 'E' or 'B', LDVL >= N. +* +* VR (input) COMPLEX array, dimension (LDVR,M) +* If JOB = 'E' or 'B', VR must contain right eigenvectors of T +* (or of any Q*T*Q**H with Q unitary), corresponding to the +* eigenpairs specified by HOWMNY and SELECT. The eigenvectors +* must be stored in consecutive columns of VR, as returned by +* CHSEIN or CTREVC. +* If JOB = 'V', VR is not referenced. +* +* LDVR (input) INTEGER +* The leading dimension of the array VR. +* LDVR >= 1; and if JOB = 'E' or 'B', LDVR >= N. +* +* S (output) REAL array, dimension (MM) +* If JOB = 'E' or 'B', the reciprocal condition numbers of the +* selected eigenvalues, stored in consecutive elements of the +* array. Thus S(j), SEP(j), and the j-th columns of VL and VR +* all correspond to the same eigenpair (but not in general the +* j-th eigenpair, unless all eigenpairs are selected). +* If JOB = 'V', S is not referenced. +* +* SEP (output) REAL array, dimension (MM) +* If JOB = 'V' or 'B', the estimated reciprocal condition +* numbers of the selected eigenvectors, stored in consecutive +* elements of the array. +* If JOB = 'E', SEP is not referenced. +* +* MM (input) INTEGER +* The number of elements in the arrays S (if JOB = 'E' or 'B') +* and/or SEP (if JOB = 'V' or 'B'). MM >= M. +* +* M (output) INTEGER +* The number of elements of the arrays S and/or SEP actually +* used to store the estimated condition numbers. +* If HOWMNY = 'A', M is set to N. +* +* WORK (workspace) COMPLEX array, dimension (LDWORK,N+1) +* If JOB = 'E', WORK is not referenced. +* +* LDWORK (input) INTEGER +* The leading dimension of the array WORK. +* LDWORK >= 1; and if JOB = 'V' or 'B', LDWORK >= N. +* +* RWORK (workspace) REAL array, dimension (N) +* If JOB = 'E', RWORK is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The reciprocal of the condition number of an eigenvalue lambda is +* defined as +* +* S(lambda) = |v'*u| / (norm(u)*norm(v)) +* +* where u and v are the right and left eigenvectors of T corresponding +* to lambda; v' denotes the conjugate transpose of v, and norm(u) +* denotes the Euclidean norm. These reciprocal condition numbers always +* lie between zero (very badly conditioned) and one (very well +* conditioned). If n = 1, S(lambda) is defined to be 1. +* +* An approximate error bound for a computed eigenvalue W(i) is given by +* +* EPS * norm(T) / S(i) +* +* where EPS is the machine precision. +* +* The reciprocal of the condition number of the right eigenvector u +* corresponding to lambda is defined as follows. Suppose +* +* T = ( lambda c ) +* ( 0 T22 ) +* +* Then the reciprocal condition number is +* +* SEP( lambda, T22 ) = sigma-min( T22 - lambda*I ) +* +* where sigma-min denotes the smallest singular value. We approximate +* the smallest singular value by the reciprocal of an estimate of the +* one-norm of the inverse of T22 - lambda*I. If n = 1, SEP(1) is +* defined to be abs(T(1,1)). +* +* An approximate error bound for a computed right eigenvector VR(i) +* is given by +* +* EPS * norm(T) / SEP(i) +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0+0 ) +* .. +* .. Local Scalars .. + LOGICAL SOMCON, WANTBH, WANTS, WANTSP + CHARACTER NORMIN + INTEGER I, IERR, IX, J, K, KASE, KS + REAL BIGNUM, EPS, EST, LNRM, RNRM, SCALE, SMLNUM, + $ XNORM + COMPLEX CDUM, PROD +* .. +* .. Local Arrays .. + COMPLEX DUMMY( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + REAL SCNRM2, SLAMCH + COMPLEX CDOTC + EXTERNAL LSAME, ICAMAX, SCNRM2, SLAMCH, CDOTC +* .. +* .. External Subroutines .. + EXTERNAL CLACON, CLACPY, CLATRS, CSRSCL, CTREXC, SLABAD, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + WANTBH = LSAME( JOB, 'B' ) + WANTS = LSAME( JOB, 'E' ) .OR. WANTBH + WANTSP = LSAME( JOB, 'V' ) .OR. WANTBH +* + SOMCON = LSAME( HOWMNY, 'S' ) +* +* Set M to the number of eigenpairs for which condition numbers are +* to be computed. +* + IF( SOMCON ) THEN + M = 0 + DO 10 J = 1, N + IF( SELECT( J ) ) + $ M = M + 1 + 10 CONTINUE + ELSE + M = N + END IF +* + INFO = 0 + IF( .NOT.WANTS .AND. .NOT.WANTSP ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( HOWMNY, 'A' ) .AND. .NOT.SOMCON ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDVL.LT.1 .OR. ( WANTS .AND. LDVL.LT.N ) ) THEN + INFO = -8 + ELSE IF( LDVR.LT.1 .OR. ( WANTS .AND. LDVR.LT.N ) ) THEN + INFO = -10 + ELSE IF( MM.LT.M ) THEN + INFO = -13 + ELSE IF( LDWORK.LT.1 .OR. ( WANTSP .AND. LDWORK.LT.N ) ) THEN + INFO = -16 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTRSNA', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( SOMCON ) THEN + IF( .NOT.SELECT( 1 ) ) + $ RETURN + END IF + IF( WANTS ) + $ S( 1 ) = ONE + IF( WANTSP ) + $ SEP( 1 ) = ABS( T( 1, 1 ) ) + RETURN + END IF +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) / EPS + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* + KS = 1 + DO 50 K = 1, N +* + IF( SOMCON ) THEN + IF( .NOT.SELECT( K ) ) + $ GO TO 50 + END IF +* + IF( WANTS ) THEN +* +* Compute the reciprocal condition number of the k-th +* eigenvalue. +* + PROD = CDOTC( N, VR( 1, KS ), 1, VL( 1, KS ), 1 ) + RNRM = SCNRM2( N, VR( 1, KS ), 1 ) + LNRM = SCNRM2( N, VL( 1, KS ), 1 ) + S( KS ) = ABS( PROD ) / ( RNRM*LNRM ) +* + END IF +* + IF( WANTSP ) THEN +* +* Estimate the reciprocal condition number of the k-th +* eigenvector. +* +* Copy the matrix T to the array WORK and swap the k-th +* diagonal element to the (1,1) position. +* + CALL CLACPY( 'Full', N, N, T, LDT, WORK, LDWORK ) + CALL CTREXC( 'No Q', N, WORK, LDWORK, DUMMY, 1, K, 1, IERR ) +* +* Form C = T22 - lambda*I in WORK(2:N,2:N). +* + DO 20 I = 2, N + WORK( I, I ) = WORK( I, I ) - WORK( 1, 1 ) + 20 CONTINUE +* +* Estimate a lower bound for the 1-norm of inv(C'). The 1st +* and (N+1)th columns of WORK are used to store work vectors. +* + SEP( KS ) = ZERO + EST = ZERO + KASE = 0 + NORMIN = 'N' + 30 CONTINUE + CALL CLACON( N-1, WORK( 1, N+1 ), WORK, EST, KASE ) +* + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Solve C'*x = scale*b +* + CALL CLATRS( 'Upper', 'Conjugate transpose', + $ 'Nonunit', NORMIN, N-1, WORK( 2, 2 ), + $ LDWORK, WORK, SCALE, RWORK, IERR ) + ELSE +* +* Solve C*x = scale*b +* + CALL CLATRS( 'Upper', 'No transpose', 'Nonunit', + $ NORMIN, N-1, WORK( 2, 2 ), LDWORK, WORK, + $ SCALE, RWORK, IERR ) + END IF + NORMIN = 'Y' + IF( SCALE.NE.ONE ) THEN +* +* Multiply by 1/SCALE if doing so will not cause +* overflow. +* + IX = ICAMAX( N-1, WORK, 1 ) + XNORM = CABS1( WORK( IX, 1 ) ) + IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 40 + CALL CSRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 30 + END IF +* + SEP( KS ) = ONE / MAX( EST, SMLNUM ) + END IF +* + 40 CONTINUE + KS = KS + 1 + 50 CONTINUE + RETURN +* +* End of CTRSNA +* + END diff --git a/costa/native/external/lapack/ctrsyl.f b/costa/native/external/lapack/ctrsyl.f new file mode 100644 index 000000000..aa0faa5aa --- /dev/null +++ b/costa/native/external/lapack/ctrsyl.f @@ -0,0 +1,368 @@ + SUBROUTINE CTRSYL( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C, + $ LDC, SCALE, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER TRANA, TRANB + INTEGER INFO, ISGN, LDA, LDB, LDC, M, N + REAL SCALE +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* CTRSYL solves the complex Sylvester matrix equation: +* +* op(A)*X + X*op(B) = scale*C or +* op(A)*X - X*op(B) = scale*C, +* +* where op(A) = A or A**H, and A and B are both upper triangular. A is +* M-by-M and B is N-by-N; the right hand side C and the solution X are +* M-by-N; and scale is an output scale factor, set <= 1 to avoid +* overflow in X. +* +* Arguments +* ========= +* +* TRANA (input) CHARACTER*1 +* Specifies the option op(A): +* = 'N': op(A) = A (No transpose) +* = 'C': op(A) = A**H (Conjugate transpose) +* +* TRANB (input) CHARACTER*1 +* Specifies the option op(B): +* = 'N': op(B) = B (No transpose) +* = 'C': op(B) = B**H (Conjugate transpose) +* +* ISGN (input) INTEGER +* Specifies the sign in the equation: +* = +1: solve op(A)*X + X*op(B) = scale*C +* = -1: solve op(A)*X - X*op(B) = scale*C +* +* M (input) INTEGER +* The order of the matrix A, and the number of rows in the +* matrices X and C. M >= 0. +* +* N (input) INTEGER +* The order of the matrix B, and the number of columns in the +* matrices X and C. N >= 0. +* +* A (input) COMPLEX array, dimension (LDA,M) +* The upper triangular matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input) COMPLEX array, dimension (LDB,N) +* The upper triangular matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* C (input/output) COMPLEX array, dimension (LDC,N) +* On entry, the M-by-N right hand side matrix C. +* On exit, C is overwritten by the solution matrix X. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M) +* +* SCALE (output) REAL +* The scale factor, scale, set <= 1 to avoid overflow in X. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* = 1: A and B have common or very close eigenvalues; perturbed +* values were used to solve the equation (but the matrices +* A and B are unchanged). +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRNA, NOTRNB + INTEGER J, K, L + REAL BIGNUM, DA11, DB, EPS, SCALOC, SGN, SMIN, + $ SMLNUM + COMPLEX A11, SUML, SUMR, VEC, X11 +* .. +* .. Local Arrays .. + REAL DUM( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANGE, SLAMCH + COMPLEX CDOTC, CDOTU, CLADIV + EXTERNAL LSAME, CLANGE, SLAMCH, CDOTC, CDOTU, CLADIV +* .. +* .. External Subroutines .. + EXTERNAL CSSCAL, SLABAD, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, CONJG, MAX, MIN, REAL +* .. +* .. Executable Statements .. +* +* Decode and Test input parameters +* + NOTRNA = LSAME( TRANA, 'N' ) + NOTRNB = LSAME( TRANB, 'N' ) +* + INFO = 0 + IF( .NOT.NOTRNA .AND. .NOT.LSAME( TRANA, 'T' ) .AND. .NOT. + $ LSAME( TRANA, 'C' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRNB .AND. .NOT.LSAME( TRANB, 'T' ) .AND. .NOT. + $ LSAME( TRANB, 'C' ) ) THEN + INFO = -2 + ELSE IF( ISGN.NE.1 .AND. ISGN.NE.-1 ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTRSYL', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Set constants to control overflow +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SMLNUM*REAL( M*N ) / EPS + BIGNUM = ONE / SMLNUM + SMIN = MAX( SMLNUM, EPS*CLANGE( 'M', M, M, A, LDA, DUM ), + $ EPS*CLANGE( 'M', N, N, B, LDB, DUM ) ) + SCALE = ONE + SGN = ISGN +* + IF( NOTRNA .AND. NOTRNB ) THEN +* +* Solve A*X + ISGN*X*B = scale*C. +* +* The (K,L)th block of X is determined starting from +* bottom-left corner column by column by +* +* A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) +* +* Where +* M L-1 +* R(K,L) = SUM [A(K,I)*X(I,L)] +ISGN*SUM [X(K,J)*B(J,L)]. +* I=K+1 J=1 +* + DO 30 L = 1, N + DO 20 K = M, 1, -1 +* + SUML = CDOTU( M-K, A( K, MIN( K+1, M ) ), LDA, + $ C( MIN( K+1, M ), L ), 1 ) + SUMR = CDOTU( L-1, C( K, 1 ), LDC, B( 1, L ), 1 ) + VEC = C( K, L ) - ( SUML+SGN*SUMR ) +* + SCALOC = ONE + A11 = A( K, K ) + SGN*B( L, L ) + DA11 = ABS( REAL( A11 ) ) + ABS( AIMAG( A11 ) ) + IF( DA11.LE.SMIN ) THEN + A11 = SMIN + DA11 = SMIN + INFO = 1 + END IF + DB = ABS( REAL( VEC ) ) + ABS( AIMAG( VEC ) ) + IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN + IF( DB.GT.BIGNUM*DA11 ) + $ SCALOC = ONE / DB + END IF + X11 = CLADIV( VEC*CMPLX( SCALOC ), A11 ) +* + IF( SCALOC.NE.ONE ) THEN + DO 10 J = 1, N + CALL CSSCAL( M, SCALOC, C( 1, J ), 1 ) + 10 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K, L ) = X11 +* + 20 CONTINUE + 30 CONTINUE +* + ELSE IF( .NOT.NOTRNA .AND. NOTRNB ) THEN +* +* Solve A' *X + ISGN*X*B = scale*C. +* +* The (K,L)th block of X is determined starting from +* upper-left corner column by column by +* +* A'(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) +* +* Where +* K-1 L-1 +* R(K,L) = SUM [A'(I,K)*X(I,L)] + ISGN*SUM [X(K,J)*B(J,L)] +* I=1 J=1 +* + DO 60 L = 1, N + DO 50 K = 1, M +* + SUML = CDOTC( K-1, A( 1, K ), 1, C( 1, L ), 1 ) + SUMR = CDOTU( L-1, C( K, 1 ), LDC, B( 1, L ), 1 ) + VEC = C( K, L ) - ( SUML+SGN*SUMR ) +* + SCALOC = ONE + A11 = CONJG( A( K, K ) ) + SGN*B( L, L ) + DA11 = ABS( REAL( A11 ) ) + ABS( AIMAG( A11 ) ) + IF( DA11.LE.SMIN ) THEN + A11 = SMIN + DA11 = SMIN + INFO = 1 + END IF + DB = ABS( REAL( VEC ) ) + ABS( AIMAG( VEC ) ) + IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN + IF( DB.GT.BIGNUM*DA11 ) + $ SCALOC = ONE / DB + END IF +* + X11 = CLADIV( VEC*CMPLX( SCALOC ), A11 ) +* + IF( SCALOC.NE.ONE ) THEN + DO 40 J = 1, N + CALL CSSCAL( M, SCALOC, C( 1, J ), 1 ) + 40 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K, L ) = X11 +* + 50 CONTINUE + 60 CONTINUE +* + ELSE IF( .NOT.NOTRNA .AND. .NOT.NOTRNB ) THEN +* +* Solve A'*X + ISGN*X*B' = C. +* +* The (K,L)th block of X is determined starting from +* upper-right corner column by column by +* +* A'(K,K)*X(K,L) + ISGN*X(K,L)*B'(L,L) = C(K,L) - R(K,L) +* +* Where +* K-1 +* R(K,L) = SUM [A'(I,K)*X(I,L)] + +* I=1 +* N +* ISGN*SUM [X(K,J)*B'(L,J)]. +* J=L+1 +* + DO 90 L = N, 1, -1 + DO 80 K = 1, M +* + SUML = CDOTC( K-1, A( 1, K ), 1, C( 1, L ), 1 ) + SUMR = CDOTC( N-L, C( K, MIN( L+1, N ) ), LDC, + $ B( L, MIN( L+1, N ) ), LDB ) + VEC = C( K, L ) - ( SUML+SGN*CONJG( SUMR ) ) +* + SCALOC = ONE + A11 = CONJG( A( K, K )+SGN*B( L, L ) ) + DA11 = ABS( REAL( A11 ) ) + ABS( AIMAG( A11 ) ) + IF( DA11.LE.SMIN ) THEN + A11 = SMIN + DA11 = SMIN + INFO = 1 + END IF + DB = ABS( REAL( VEC ) ) + ABS( AIMAG( VEC ) ) + IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN + IF( DB.GT.BIGNUM*DA11 ) + $ SCALOC = ONE / DB + END IF +* + X11 = CLADIV( VEC*CMPLX( SCALOC ), A11 ) +* + IF( SCALOC.NE.ONE ) THEN + DO 70 J = 1, N + CALL CSSCAL( M, SCALOC, C( 1, J ), 1 ) + 70 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K, L ) = X11 +* + 80 CONTINUE + 90 CONTINUE +* + ELSE IF( NOTRNA .AND. .NOT.NOTRNB ) THEN +* +* Solve A*X + ISGN*X*B' = C. +* +* The (K,L)th block of X is determined starting from +* bottom-left corner column by column by +* +* A(K,K)*X(K,L) + ISGN*X(K,L)*B'(L,L) = C(K,L) - R(K,L) +* +* Where +* M N +* R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B'(L,J)] +* I=K+1 J=L+1 +* + DO 120 L = N, 1, -1 + DO 110 K = M, 1, -1 +* + SUML = CDOTU( M-K, A( K, MIN( K+1, M ) ), LDA, + $ C( MIN( K+1, M ), L ), 1 ) + SUMR = CDOTC( N-L, C( K, MIN( L+1, N ) ), LDC, + $ B( L, MIN( L+1, N ) ), LDB ) + VEC = C( K, L ) - ( SUML+SGN*CONJG( SUMR ) ) +* + SCALOC = ONE + A11 = A( K, K ) + SGN*CONJG( B( L, L ) ) + DA11 = ABS( REAL( A11 ) ) + ABS( AIMAG( A11 ) ) + IF( DA11.LE.SMIN ) THEN + A11 = SMIN + DA11 = SMIN + INFO = 1 + END IF + DB = ABS( REAL( VEC ) ) + ABS( AIMAG( VEC ) ) + IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN + IF( DB.GT.BIGNUM*DA11 ) + $ SCALOC = ONE / DB + END IF +* + X11 = CLADIV( VEC*CMPLX( SCALOC ), A11 ) +* + IF( SCALOC.NE.ONE ) THEN + DO 100 J = 1, N + CALL CSSCAL( M, SCALOC, C( 1, J ), 1 ) + 100 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K, L ) = X11 +* + 110 CONTINUE + 120 CONTINUE +* + END IF +* + RETURN +* +* End of CTRSYL +* + END diff --git a/costa/native/external/lapack/ctrti2.f b/costa/native/external/lapack/ctrti2.f new file mode 100644 index 000000000..a8a379bfe --- /dev/null +++ b/costa/native/external/lapack/ctrti2.f @@ -0,0 +1,147 @@ + SUBROUTINE CTRTI2( UPLO, DIAG, N, A, LDA, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER DIAG, UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CTRTI2 computes the inverse of a complex upper or lower triangular +* matrix. +* +* This is the Level 2 BLAS version of the algorithm. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the matrix A is upper or lower triangular. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* DIAG (input) CHARACTER*1 +* Specifies whether or not the matrix A is unit triangular. +* = 'N': Non-unit triangular +* = 'U': Unit triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the triangular matrix A. If UPLO = 'U', the +* leading n by n upper triangular part of the array A contains +* the upper triangular matrix, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n by n lower triangular part of the array A contains +* the lower triangular matrix, and the strictly upper +* triangular part of A is not referenced. If DIAG = 'U', the +* diagonal elements of A are also not referenced and are +* assumed to be 1. +* +* On exit, the (triangular) inverse of the original matrix, in +* the same storage format. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J + COMPLEX AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CSCAL, CTRMV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTRTI2', -INFO ) + RETURN + END IF +* + IF( UPPER ) THEN +* +* Compute inverse of upper triangular matrix. +* + DO 10 J = 1, N + IF( NOUNIT ) THEN + A( J, J ) = ONE / A( J, J ) + AJJ = -A( J, J ) + ELSE + AJJ = -ONE + END IF +* +* Compute elements 1:j-1 of j-th column. +* + CALL CTRMV( 'Upper', 'No transpose', DIAG, J-1, A, LDA, + $ A( 1, J ), 1 ) + CALL CSCAL( J-1, AJJ, A( 1, J ), 1 ) + 10 CONTINUE + ELSE +* +* Compute inverse of lower triangular matrix. +* + DO 20 J = N, 1, -1 + IF( NOUNIT ) THEN + A( J, J ) = ONE / A( J, J ) + AJJ = -A( J, J ) + ELSE + AJJ = -ONE + END IF + IF( J.LT.N ) THEN +* +* Compute elements j+1:n of j-th column. +* + CALL CTRMV( 'Lower', 'No transpose', DIAG, N-J, + $ A( J+1, J+1 ), LDA, A( J+1, J ), 1 ) + CALL CSCAL( N-J, AJJ, A( J+1, J ), 1 ) + END IF + 20 CONTINUE + END IF +* + RETURN +* +* End of CTRTI2 +* + END diff --git a/costa/native/external/lapack/ctrtri.f b/costa/native/external/lapack/ctrtri.f new file mode 100644 index 000000000..ffd79b870 --- /dev/null +++ b/costa/native/external/lapack/ctrtri.f @@ -0,0 +1,178 @@ + SUBROUTINE CTRTRI( UPLO, DIAG, N, A, LDA, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER DIAG, UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* CTRTRI computes the inverse of a complex upper or lower triangular +* matrix A. +* +* This is the Level 3 BLAS version of the algorithm. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the triangular matrix A. If UPLO = 'U', the +* leading N-by-N upper triangular part of the array A contains +* the upper triangular matrix, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of the array A contains +* the lower triangular matrix, and the strictly upper +* triangular part of A is not referenced. If DIAG = 'U', the +* diagonal elements of A are also not referenced and are +* assumed to be 1. +* On exit, the (triangular) inverse of the original matrix, in +* the same storage format. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, A(i,i) is exactly zero. The triangular +* matrix is singular and its inverse can not be computed. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J, JB, NB, NN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CTRMM, CTRSM, CTRTI2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTRTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check for singularity if non-unit. +* + IF( NOUNIT ) THEN + DO 10 INFO = 1, N + IF( A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + INFO = 0 + END IF +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'CTRTRI', UPLO // DIAG, N, -1, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code +* + CALL CTRTI2( UPLO, DIAG, N, A, LDA, INFO ) + ELSE +* +* Use blocked code +* + IF( UPPER ) THEN +* +* Compute inverse of upper triangular matrix +* + DO 20 J = 1, N, NB + JB = MIN( NB, N-J+1 ) +* +* Compute rows 1:j-1 of current block column +* + CALL CTRMM( 'Left', 'Upper', 'No transpose', DIAG, J-1, + $ JB, ONE, A, LDA, A( 1, J ), LDA ) + CALL CTRSM( 'Right', 'Upper', 'No transpose', DIAG, J-1, + $ JB, -ONE, A( J, J ), LDA, A( 1, J ), LDA ) +* +* Compute inverse of current diagonal block +* + CALL CTRTI2( 'Upper', DIAG, JB, A( J, J ), LDA, INFO ) + 20 CONTINUE + ELSE +* +* Compute inverse of lower triangular matrix +* + NN = ( ( N-1 ) / NB )*NB + 1 + DO 30 J = NN, 1, -NB + JB = MIN( NB, N-J+1 ) + IF( J+JB.LE.N ) THEN +* +* Compute rows j+jb:n of current block column +* + CALL CTRMM( 'Left', 'Lower', 'No transpose', DIAG, + $ N-J-JB+1, JB, ONE, A( J+JB, J+JB ), LDA, + $ A( J+JB, J ), LDA ) + CALL CTRSM( 'Right', 'Lower', 'No transpose', DIAG, + $ N-J-JB+1, JB, -ONE, A( J, J ), LDA, + $ A( J+JB, J ), LDA ) + END IF +* +* Compute inverse of current diagonal block +* + CALL CTRTI2( 'Lower', DIAG, JB, A( J, J ), LDA, INFO ) + 30 CONTINUE + END IF + END IF +* + RETURN +* +* End of CTRTRI +* + END diff --git a/costa/native/external/lapack/ctrtrs.f b/costa/native/external/lapack/ctrtrs.f new file mode 100644 index 000000000..9ec56c4ab --- /dev/null +++ b/costa/native/external/lapack/ctrtrs.f @@ -0,0 +1,149 @@ + SUBROUTINE CTRTRS( UPLO, TRANS, DIAG, N, NRHS, A, LDA, B, LDB, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* CTRTRS solves a triangular system of the form +* +* A * X = B, A**T * X = B, or A**H * X = B, +* +* where A is a triangular matrix of order N, and B is an N-by-NRHS +* matrix. A check is made to verify that A is nonsingular. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose) +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* A (input) COMPLEX array, dimension (LDA,N) +* The triangular matrix A. If UPLO = 'U', the leading N-by-N +* upper triangular part of the array A contains the upper +* triangular matrix, and the strictly lower triangular part of +* A is not referenced. If UPLO = 'L', the leading N-by-N lower +* triangular part of the array A contains the lower triangular +* matrix, and the strictly upper triangular part of A is not +* referenced. If DIAG = 'U', the diagonal elements of A are +* also not referenced and are assumed to be 1. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) COMPLEX array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, if INFO = 0, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the i-th diagonal element of A is zero, +* indicating that the matrix is singular and the solutions +* X have not been computed. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT. + $ LSAME( TRANS, 'T' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTRTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check for singularity. +* + IF( NOUNIT ) THEN + DO 10 INFO = 1, N + IF( A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + END IF + INFO = 0 +* +* Solve A * x = b, A**T * x = b, or A**H * x = b. +* + CALL CTRSM( 'Left', UPLO, TRANS, DIAG, N, NRHS, ONE, A, LDA, B, + $ LDB ) +* + RETURN +* +* End of CTRTRS +* + END diff --git a/costa/native/external/lapack/ctzrqf.f b/costa/native/external/lapack/ctzrqf.f new file mode 100644 index 000000000..1da25fbd4 --- /dev/null +++ b/costa/native/external/lapack/ctzrqf.f @@ -0,0 +1,174 @@ + SUBROUTINE CTZRQF( M, N, A, LDA, TAU, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ) +* .. +* +* Purpose +* ======= +* +* This routine is deprecated and has been replaced by routine CTZRZF. +* +* CTZRQF reduces the M-by-N ( M<=N ) complex upper trapezoidal matrix A +* to upper triangular form by means of unitary transformations. +* +* The upper trapezoidal matrix A is factored as +* +* A = ( R 0 ) * Z, +* +* where Z is an N-by-N unitary matrix and R is an M-by-M upper +* triangular matrix. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= M. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the leading M-by-N upper trapezoidal part of the +* array A must contain the matrix to be factorized. +* On exit, the leading M-by-M upper triangular part of A +* contains the upper triangular matrix R, and elements M+1 to +* N of the first M rows of A, with the array TAU, represent the +* unitary matrix Z as a product of M elementary reflectors. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) COMPLEX array, dimension (M) +* The scalar factors of the elementary reflectors. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The factorization is obtained by Householder's method. The kth +* transformation matrix, Z( k ), whose conjugate transpose is used to +* introduce zeros into the (m - k + 1)th row of A, is given in the form +* +* Z( k ) = ( I 0 ), +* ( 0 T( k ) ) +* +* where +* +* T( k ) = I - tau*u( k )*u( k )', u( k ) = ( 1 ), +* ( 0 ) +* ( z( k ) ) +* +* tau is a scalar and z( k ) is an ( n - m ) element vector. +* tau and z( k ) are chosen to annihilate the elements of the kth row +* of X. +* +* The scalar tau is returned in the kth element of TAU and the vector +* u( k ) in the kth row of A, such that the elements of z( k ) are +* in a( k, m + 1 ), ..., a( k, n ). The elements of R are returned in +* the upper triangular part of A. +* +* Z is given by +* +* Z = Z( 1 ) * Z( 2 ) * ... * Z( m ). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CONE, CZERO + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ), + $ CZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, K, M1 + COMPLEX ALPHA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, MIN +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CGEMV, CGERC, CLACGV, CLARFG, + $ XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTZRQF', -INFO ) + RETURN + END IF +* +* Perform the factorization. +* + IF( M.EQ.0 ) + $ RETURN + IF( M.EQ.N ) THEN + DO 10 I = 1, N + TAU( I ) = CZERO + 10 CONTINUE + ELSE + M1 = MIN( M+1, N ) + DO 20 K = M, 1, -1 +* +* Use a Householder reflection to zero the kth row of A. +* First set up the reflection. +* + A( K, K ) = CONJG( A( K, K ) ) + CALL CLACGV( N-M, A( K, M1 ), LDA ) + ALPHA = A( K, K ) + CALL CLARFG( N-M+1, ALPHA, A( K, M1 ), LDA, TAU( K ) ) + A( K, K ) = ALPHA + TAU( K ) = CONJG( TAU( K ) ) +* + IF( TAU( K ).NE.CZERO .AND. K.GT.1 ) THEN +* +* We now perform the operation A := A*P( k )'. +* +* Use the first ( k - 1 ) elements of TAU to store a( k ), +* where a( k ) consists of the first ( k - 1 ) elements of +* the kth column of A. Also let B denote the first +* ( k - 1 ) rows of the last ( n - m ) columns of A. +* + CALL CCOPY( K-1, A( 1, K ), 1, TAU, 1 ) +* +* Form w = a( k ) + B*z( k ) in TAU. +* + CALL CGEMV( 'No transpose', K-1, N-M, CONE, A( 1, M1 ), + $ LDA, A( K, M1 ), LDA, CONE, TAU, 1 ) +* +* Now form a( k ) := a( k ) - conjg(tau)*w +* and B := B - conjg(tau)*w*z( k )'. +* + CALL CAXPY( K-1, -CONJG( TAU( K ) ), TAU, 1, A( 1, K ), + $ 1 ) + CALL CGERC( K-1, N-M, -CONJG( TAU( K ) ), TAU, 1, + $ A( K, M1 ), LDA, A( 1, M1 ), LDA ) + END IF + 20 CONTINUE + END IF +* + RETURN +* +* End of CTZRQF +* + END diff --git a/costa/native/external/lapack/ctzrzf.f b/costa/native/external/lapack/ctzrzf.f new file mode 100644 index 000000000..f380dfba1 --- /dev/null +++ b/costa/native/external/lapack/ctzrzf.f @@ -0,0 +1,241 @@ + SUBROUTINE CTZRZF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CTZRZF reduces the M-by-N ( M<=N ) complex upper trapezoidal matrix A +* to upper triangular form by means of unitary transformations. +* +* The upper trapezoidal matrix A is factored as +* +* A = ( R 0 ) * Z, +* +* where Z is an N-by-N unitary matrix and R is an M-by-M upper +* triangular matrix. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the leading M-by-N upper trapezoidal part of the +* array A must contain the matrix to be factorized. +* On exit, the leading M-by-M upper triangular part of A +* contains the upper triangular matrix R, and elements M+1 to +* N of the first M rows of A, with the array TAU, represent the +* unitary matrix Z as a product of M elementary reflectors. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) COMPLEX array, dimension (M) +* The scalar factors of the elementary reflectors. +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,M). +* For optimum performance LWORK >= M*NB, where NB is +* the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* The factorization is obtained by Householder's method. The kth +* transformation matrix, Z( k ), which is used to introduce zeros into +* the ( m - k + 1 )th row of A, is given in the form +* +* Z( k ) = ( I 0 ), +* ( 0 T( k ) ) +* +* where +* +* T( k ) = I - tau*u( k )*u( k )', u( k ) = ( 1 ), +* ( 0 ) +* ( z( k ) ) +* +* tau is a scalar and z( k ) is an ( n - m ) element vector. +* tau and z( k ) are chosen to annihilate the elements of the kth row +* of X. +* +* The scalar tau is returned in the kth element of TAU and the vector +* u( k ) in the kth row of A, such that the elements of z( k ) are +* in a( k, m + 1 ), ..., a( k, n ). The elements of R are returned in +* the upper triangular part of A. +* +* Z is given by +* +* Z = Z( 1 ) * Z( 2 ) * ... * Z( m ). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IWS, KI, KK, LDWORK, LWKOPT, M1, MU, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL CLARZB, CLARZT, CLATRZ, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. +* + NB = ILAENV( 1, 'CGERQF', ' ', M, N, -1, -1 ) + LWKOPT = M*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTZRZF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + ELSE IF( M.EQ.N ) THEN + DO 10 I = 1, N + TAU( I ) = ZERO + 10 CONTINUE + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 1 + IWS = M + IF( NB.GT.1 .AND. NB.LT.M ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'CGERQF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.M ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CGERQF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.M .AND. NX.LT.M ) THEN +* +* Use blocked code initially. +* The last kk rows are handled by the block method. +* + M1 = MIN( M+1, N ) + KI = ( ( M-NX-1 ) / NB )*NB + KK = MIN( M, KI+NB ) +* + DO 20 I = M - KK + KI + 1, M - KK + 1, -NB + IB = MIN( M-I+1, NB ) +* +* Compute the TZ factorization of the current block +* A(i:i+ib-1,i:n) +* + CALL CLATRZ( IB, N-I+1, N-M, A( I, I ), LDA, TAU( I ), + $ WORK ) + IF( I.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL CLARZT( 'Backward', 'Rowwise', N-M, IB, A( I, M1 ), + $ LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(1:i-1,i:n) from the right +* + CALL CLARZB( 'Right', 'No transpose', 'Backward', + $ 'Rowwise', I-1, N-I+1, IB, N-M, A( I, M1 ), + $ LDA, WORK, LDWORK, A( 1, I ), LDA, + $ WORK( IB+1 ), LDWORK ) + END IF + 20 CONTINUE + MU = I + NB - 1 + ELSE + MU = M + END IF +* +* Use unblocked code to factor the last or only block +* + IF( MU.GT.0 ) + $ CALL CLATRZ( MU, N, N-M, A, LDA, TAU, WORK ) +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of CTZRZF +* + END diff --git a/costa/native/external/lapack/cung2l.f b/costa/native/external/lapack/cung2l.f new file mode 100644 index 000000000..bc8c56abd --- /dev/null +++ b/costa/native/external/lapack/cung2l.f @@ -0,0 +1,129 @@ + SUBROUTINE CUNG2L( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CUNG2L generates an m by n complex matrix Q with orthonormal columns, +* which is defined as the last n columns of a product of k elementary +* reflectors of order m +* +* Q = H(k) . . . H(2) H(1) +* +* as returned by CGEQLF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. M >= N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. N >= K >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the (n-k+i)-th column must contain the vector which +* defines the elementary reflector H(i), for i = 1,2,...,k, as +* returned by CGEQLF in the last k columns of its array +* argument A. +* On exit, the m-by-n matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) COMPLEX array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by CGEQLF. +* +* WORK (workspace) COMPLEX array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, II, J, L +* .. +* .. External Subroutines .. + EXTERNAL CLARF, CSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNG2L', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* +* Initialise columns 1:n-k to columns of the unit matrix +* + DO 20 J = 1, N - K + DO 10 L = 1, M + A( L, J ) = ZERO + 10 CONTINUE + A( M-N+J, J ) = ONE + 20 CONTINUE +* + DO 40 I = 1, K + II = N - K + I +* +* Apply H(i) to A(1:m-k+i,1:n-k+i) from the left +* + A( M-N+II, II ) = ONE + CALL CLARF( 'Left', M-N+II, II-1, A( 1, II ), 1, TAU( I ), A, + $ LDA, WORK ) + CALL CSCAL( M-N+II-1, -TAU( I ), A( 1, II ), 1 ) + A( M-N+II, II ) = ONE - TAU( I ) +* +* Set A(m-k+i+1:m,n-k+i) to zero +* + DO 30 L = M - N + II + 1, M + A( L, II ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of CUNG2L +* + END diff --git a/costa/native/external/lapack/cung2r.f b/costa/native/external/lapack/cung2r.f new file mode 100644 index 000000000..290c96d4c --- /dev/null +++ b/costa/native/external/lapack/cung2r.f @@ -0,0 +1,131 @@ + SUBROUTINE CUNG2R( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CUNG2R generates an m by n complex matrix Q with orthonormal columns, +* which is defined as the first n columns of a product of k elementary +* reflectors of order m +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by CGEQRF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. M >= N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. N >= K >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the i-th column must contain the vector which +* defines the elementary reflector H(i), for i = 1,2,...,k, as +* returned by CGEQRF in the first k columns of its array +* argument A. +* On exit, the m by n matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) COMPLEX array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by CGEQRF. +* +* WORK (workspace) COMPLEX array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, J, L +* .. +* .. External Subroutines .. + EXTERNAL CLARF, CSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNG2R', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* +* Initialise columns k+1:n to columns of the unit matrix +* + DO 20 J = K + 1, N + DO 10 L = 1, M + A( L, J ) = ZERO + 10 CONTINUE + A( J, J ) = ONE + 20 CONTINUE +* + DO 40 I = K, 1, -1 +* +* Apply H(i) to A(i:m,i:n) from the left +* + IF( I.LT.N ) THEN + A( I, I ) = ONE + CALL CLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ), + $ A( I, I+1 ), LDA, WORK ) + END IF + IF( I.LT.M ) + $ CALL CSCAL( M-I, -TAU( I ), A( I+1, I ), 1 ) + A( I, I ) = ONE - TAU( I ) +* +* Set A(1:i-1,i) to zero +* + DO 30 L = 1, I - 1 + A( L, I ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of CUNG2R +* + END diff --git a/costa/native/external/lapack/cungbr.f b/costa/native/external/lapack/cungbr.f new file mode 100644 index 000000000..8894670e7 --- /dev/null +++ b/costa/native/external/lapack/cungbr.f @@ -0,0 +1,246 @@ + SUBROUTINE CUNGBR( VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER VECT + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CUNGBR generates one of the complex unitary matrices Q or P**H +* determined by CGEBRD when reducing a complex matrix A to bidiagonal +* form: A = Q * B * P**H. Q and P**H are defined as products of +* elementary reflectors H(i) or G(i) respectively. +* +* If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q +* is of order M: +* if m >= k, Q = H(1) H(2) . . . H(k) and CUNGBR returns the first n +* columns of Q, where m >= n >= k; +* if m < k, Q = H(1) H(2) . . . H(m-1) and CUNGBR returns Q as an +* M-by-M matrix. +* +* If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**H +* is of order N: +* if k < n, P**H = G(k) . . . G(2) G(1) and CUNGBR returns the first m +* rows of P**H, where n >= m >= k; +* if k >= n, P**H = G(n-1) . . . G(2) G(1) and CUNGBR returns P**H as +* an N-by-N matrix. +* +* Arguments +* ========= +* +* VECT (input) CHARACTER*1 +* Specifies whether the matrix Q or the matrix P**H is +* required, as defined in the transformation applied by CGEBRD: +* = 'Q': generate Q; +* = 'P': generate P**H. +* +* M (input) INTEGER +* The number of rows of the matrix Q or P**H to be returned. +* M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q or P**H to be returned. +* N >= 0. +* If VECT = 'Q', M >= N >= min(M,K); +* if VECT = 'P', N >= M >= min(N,K). +* +* K (input) INTEGER +* If VECT = 'Q', the number of columns in the original M-by-K +* matrix reduced by CGEBRD. +* If VECT = 'P', the number of rows in the original K-by-N +* matrix reduced by CGEBRD. +* K >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the vectors which define the elementary reflectors, +* as returned by CGEBRD. +* On exit, the M-by-N matrix Q or P**H. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= M. +* +* TAU (input) COMPLEX array, dimension +* (min(M,K)) if VECT = 'Q' +* (min(N,K)) if VECT = 'P' +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i) or G(i), which determines Q or P**H, as +* returned by CGEBRD in its array argument TAUQ or TAUP. +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,min(M,N)). +* For optimum performance LWORK >= min(M,N)*NB, where NB +* is the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WANTQ + INTEGER I, IINFO, J, LWKOPT, MN, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL CUNGLQ, CUNGQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + WANTQ = LSAME( VECT, 'Q' ) + MN = MIN( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.WANTQ .AND. .NOT.LSAME( VECT, 'P' ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 .OR. ( WANTQ .AND. ( N.GT.M .OR. N.LT.MIN( M, + $ K ) ) ) .OR. ( .NOT.WANTQ .AND. ( M.GT.N .OR. M.LT. + $ MIN( N, K ) ) ) ) THEN + INFO = -3 + ELSE IF( K.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LWORK.LT.MAX( 1, MN ) .AND. .NOT.LQUERY ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( WANTQ ) THEN + NB = ILAENV( 1, 'CUNGQR', ' ', M, N, K, -1 ) + ELSE + NB = ILAENV( 1, 'CUNGLQ', ' ', M, N, K, -1 ) + END IF + LWKOPT = MAX( 1, MN )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNGBR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( WANTQ ) THEN +* +* Form Q, determined by a call to CGEBRD to reduce an m-by-k +* matrix +* + IF( M.GE.K ) THEN +* +* If m >= k, assume m >= n >= k +* + CALL CUNGQR( M, N, K, A, LDA, TAU, WORK, LWORK, IINFO ) +* + ELSE +* +* If m < k, assume m = n +* +* Shift the vectors which define the elementary reflectors one +* column to the right, and set the first row and column of Q +* to those of the unit matrix +* + DO 20 J = M, 2, -1 + A( 1, J ) = ZERO + DO 10 I = J + 1, M + A( I, J ) = A( I, J-1 ) + 10 CONTINUE + 20 CONTINUE + A( 1, 1 ) = ONE + DO 30 I = 2, M + A( I, 1 ) = ZERO + 30 CONTINUE + IF( M.GT.1 ) THEN +* +* Form Q(2:m,2:m) +* + CALL CUNGQR( M-1, M-1, M-1, A( 2, 2 ), LDA, TAU, WORK, + $ LWORK, IINFO ) + END IF + END IF + ELSE +* +* Form P', determined by a call to CGEBRD to reduce a k-by-n +* matrix +* + IF( K.LT.N ) THEN +* +* If k < n, assume k <= m <= n +* + CALL CUNGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, IINFO ) +* + ELSE +* +* If k >= n, assume m = n +* +* Shift the vectors which define the elementary reflectors one +* row downward, and set the first row and column of P' to +* those of the unit matrix +* + A( 1, 1 ) = ONE + DO 40 I = 2, N + A( I, 1 ) = ZERO + 40 CONTINUE + DO 60 J = 2, N + DO 50 I = J - 1, 2, -1 + A( I, J ) = A( I-1, J ) + 50 CONTINUE + A( 1, J ) = ZERO + 60 CONTINUE + IF( N.GT.1 ) THEN +* +* Form P'(2:n,2:n) +* + CALL CUNGLQ( N-1, N-1, N-1, A( 2, 2 ), LDA, TAU, WORK, + $ LWORK, IINFO ) + END IF + END IF + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of CUNGBR +* + END diff --git a/costa/native/external/lapack/cunghr.f b/costa/native/external/lapack/cunghr.f new file mode 100644 index 000000000..704acdc25 --- /dev/null +++ b/costa/native/external/lapack/cunghr.f @@ -0,0 +1,166 @@ + SUBROUTINE CUNGHR( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER IHI, ILO, INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CUNGHR generates a complex unitary matrix Q which is defined as the +* product of IHI-ILO elementary reflectors of order N, as returned by +* CGEHRD: +* +* Q = H(ilo) H(ilo+1) . . . H(ihi-1). +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix Q. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* ILO and IHI must have the same values as in the previous call +* of CGEHRD. Q is equal to the unit matrix except in the +* submatrix Q(ilo+1:ihi,ilo+1:ihi). +* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the vectors which define the elementary reflectors, +* as returned by CGEHRD. +* On exit, the N-by-N unitary matrix Q. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* TAU (input) COMPLEX array, dimension (N-1) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by CGEHRD. +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= IHI-ILO. +* For optimum performance LWORK >= (IHI-ILO)*NB, where NB is +* the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IINFO, J, LWKOPT, NB, NH +* .. +* .. External Subroutines .. + EXTERNAL CUNGQR, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NH = IHI - ILO + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -2 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, NH ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'CUNGQR', ' ', NH, NH, NH, -1 ) + LWKOPT = MAX( 1, NH )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNGHR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Shift the vectors which define the elementary reflectors one +* column to the right, and set the first ilo and the last n-ihi +* rows and columns to those of the unit matrix +* + DO 40 J = IHI, ILO + 1, -1 + DO 10 I = 1, J - 1 + A( I, J ) = ZERO + 10 CONTINUE + DO 20 I = J + 1, IHI + A( I, J ) = A( I, J-1 ) + 20 CONTINUE + DO 30 I = IHI + 1, N + A( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + DO 60 J = 1, ILO + DO 50 I = 1, N + A( I, J ) = ZERO + 50 CONTINUE + A( J, J ) = ONE + 60 CONTINUE + DO 80 J = IHI + 1, N + DO 70 I = 1, N + A( I, J ) = ZERO + 70 CONTINUE + A( J, J ) = ONE + 80 CONTINUE +* + IF( NH.GT.0 ) THEN +* +* Generate Q(ilo+1:ihi,ilo+1:ihi) +* + CALL CUNGQR( NH, NH, NH, A( ILO+1, ILO+1 ), LDA, TAU( ILO ), + $ WORK, LWORK, IINFO ) + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of CUNGHR +* + END diff --git a/costa/native/external/lapack/cungl2.f b/costa/native/external/lapack/cungl2.f new file mode 100644 index 000000000..68d0f7a73 --- /dev/null +++ b/costa/native/external/lapack/cungl2.f @@ -0,0 +1,137 @@ + SUBROUTINE CUNGL2( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CUNGL2 generates an m-by-n complex matrix Q with orthonormal rows, +* which is defined as the first m rows of a product of k elementary +* reflectors of order n +* +* Q = H(k)' . . . H(2)' H(1)' +* +* as returned by CGELQF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. N >= M. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. M >= K >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the i-th row must contain the vector which defines +* the elementary reflector H(i), for i = 1,2,...,k, as returned +* by CGELQF in the first k rows of its array argument A. +* On exit, the m by n matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) COMPLEX array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by CGELQF. +* +* WORK (workspace) COMPLEX array, dimension (M) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, J, L +* .. +* .. External Subroutines .. + EXTERNAL CLACGV, CLARF, CSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNGL2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.LE.0 ) + $ RETURN +* + IF( K.LT.M ) THEN +* +* Initialise rows k+1:m to rows of the unit matrix +* + DO 20 J = 1, N + DO 10 L = K + 1, M + A( L, J ) = ZERO + 10 CONTINUE + IF( J.GT.K .AND. J.LE.M ) + $ A( J, J ) = ONE + 20 CONTINUE + END IF +* + DO 40 I = K, 1, -1 +* +* Apply H(i)' to A(i:m,i:n) from the right +* + IF( I.LT.N ) THEN + CALL CLACGV( N-I, A( I, I+1 ), LDA ) + IF( I.LT.M ) THEN + A( I, I ) = ONE + CALL CLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, + $ CONJG( TAU( I ) ), A( I+1, I ), LDA, WORK ) + END IF + CALL CSCAL( N-I, -TAU( I ), A( I, I+1 ), LDA ) + CALL CLACGV( N-I, A( I, I+1 ), LDA ) + END IF + A( I, I ) = ONE - CONJG( TAU( I ) ) +* +* Set A(i,1:i-1,i) to zero +* + DO 30 L = 1, I - 1 + A( I, L ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of CUNGL2 +* + END diff --git a/costa/native/external/lapack/cunglq.f b/costa/native/external/lapack/cunglq.f new file mode 100644 index 000000000..494649482 --- /dev/null +++ b/costa/native/external/lapack/cunglq.f @@ -0,0 +1,216 @@ + SUBROUTINE CUNGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CUNGLQ generates an M-by-N complex matrix Q with orthonormal rows, +* which is defined as the first M rows of a product of K elementary +* reflectors of order N +* +* Q = H(k)' . . . H(2)' H(1)' +* +* as returned by CGELQF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. N >= M. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. M >= K >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the i-th row must contain the vector which defines +* the elementary reflector H(i), for i = 1,2,...,k, as returned +* by CGELQF in the first k rows of its array argument A. +* On exit, the M-by-N matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) COMPLEX array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by CGELQF. +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,M). +* For optimum performance LWORK >= M*NB, where NB is +* the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit; +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, KI, KK, L, LDWORK, + $ LWKOPT, NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL CLARFB, CLARFT, CUNGL2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'CUNGLQ', ' ', M, N, K, -1 ) + LWKOPT = MAX( 1, M )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNGLQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.LE.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = M + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'CUNGLQ', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CUNGLQ', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the last block. +* The first kk rows are handled by the block method. +* + KI = ( ( K-NX-1 ) / NB )*NB + KK = MIN( K, KI+NB ) +* +* Set A(kk+1:m,1:kk) to zero. +* + DO 20 J = 1, KK + DO 10 I = KK + 1, M + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the last or only block. +* + IF( KK.LT.M ) + $ CALL CUNGL2( M-KK, N-KK, K-KK, A( KK+1, KK+1 ), LDA, + $ TAU( KK+1 ), WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = KI + 1, 1, -NB + IB = MIN( NB, K-I+1 ) + IF( I+IB.LE.M ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL CLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, I ), + $ LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H' to A(i+ib:m,i:n) from the right +* + CALL CLARFB( 'Right', 'Conjugate transpose', 'Forward', + $ 'Rowwise', M-I-IB+1, N-I+1, IB, A( I, I ), + $ LDA, WORK, LDWORK, A( I+IB, I ), LDA, + $ WORK( IB+1 ), LDWORK ) + END IF +* +* Apply H' to columns i:n of current block +* + CALL CUNGL2( IB, N-I+1, IB, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* +* Set columns 1:i-1 of current block to zero +* + DO 40 J = 1, I - 1 + DO 30 L = I, I + IB - 1 + A( L, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of CUNGLQ +* + END diff --git a/costa/native/external/lapack/cungql.f b/costa/native/external/lapack/cungql.f new file mode 100644 index 000000000..1f5e4c954 --- /dev/null +++ b/costa/native/external/lapack/cungql.f @@ -0,0 +1,214 @@ + SUBROUTINE CUNGQL( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CUNGQL generates an M-by-N complex matrix Q with orthonormal columns, +* which is defined as the last N columns of a product of K elementary +* reflectors of order M +* +* Q = H(k) . . . H(2) H(1) +* +* as returned by CGEQLF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. M >= N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. N >= K >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the (n-k+i)-th column must contain the vector which +* defines the elementary reflector H(i), for i = 1,2,...,k, as +* returned by CGEQLF in the last k columns of its array +* argument A. +* On exit, the M-by-N matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) COMPLEX array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by CGEQLF. +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N). +* For optimum performance LWORK >= N*NB, where NB is the +* optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, KK, L, LDWORK, LWKOPT, + $ NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL CLARFB, CLARFT, CUNG2L, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'CUNGQL', ' ', M, N, K, -1 ) + LWKOPT = MAX( 1, N )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNGQL', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'CUNGQL', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CUNGQL', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the first block. +* The last kk columns are handled by the block method. +* + KK = MIN( K, ( ( K-NX+NB-1 ) / NB )*NB ) +* +* Set A(m-kk+1:m,1:n-kk) to zero. +* + DO 20 J = 1, N - KK + DO 10 I = M - KK + 1, M + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the first or only block. +* + CALL CUNG2L( M-KK, N-KK, K-KK, A, LDA, TAU, WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = K - KK + 1, K, NB + IB = MIN( NB, K-I+1 ) + IF( N-K+I.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL CLARFT( 'Backward', 'Columnwise', M-K+I+IB-1, IB, + $ A( 1, N-K+I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(1:m-k+i+ib-1,1:n-k+i-1) from the left +* + CALL CLARFB( 'Left', 'No transpose', 'Backward', + $ 'Columnwise', M-K+I+IB-1, N-K+I-1, IB, + $ A( 1, N-K+I ), LDA, WORK, LDWORK, A, LDA, + $ WORK( IB+1 ), LDWORK ) + END IF +* +* Apply H to rows 1:m-k+i+ib-1 of current block +* + CALL CUNG2L( M-K+I+IB-1, IB, IB, A( 1, N-K+I ), LDA, + $ TAU( I ), WORK, IINFO ) +* +* Set rows m-k+i+ib:m of current block to zero +* + DO 40 J = N - K + I, N - K + I + IB - 1 + DO 30 L = M - K + I + IB, M + A( L, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of CUNGQL +* + END diff --git a/costa/native/external/lapack/cungqr.f b/costa/native/external/lapack/cungqr.f new file mode 100644 index 000000000..4dc4035ca --- /dev/null +++ b/costa/native/external/lapack/cungqr.f @@ -0,0 +1,217 @@ + SUBROUTINE CUNGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CUNGQR generates an M-by-N complex matrix Q with orthonormal columns, +* which is defined as the first N columns of a product of K elementary +* reflectors of order M +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by CGEQRF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. M >= N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. N >= K >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the i-th column must contain the vector which +* defines the elementary reflector H(i), for i = 1,2,...,k, as +* returned by CGEQRF in the first k columns of its array +* argument A. +* On exit, the M-by-N matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) COMPLEX array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by CGEQRF. +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N). +* For optimum performance LWORK >= N*NB, where NB is the +* optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, KI, KK, L, LDWORK, + $ LWKOPT, NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL CLARFB, CLARFT, CUNG2R, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'CUNGQR', ' ', M, N, K, -1 ) + LWKOPT = MAX( 1, N )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNGQR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'CUNGQR', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CUNGQR', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the last block. +* The first kk columns are handled by the block method. +* + KI = ( ( K-NX-1 ) / NB )*NB + KK = MIN( K, KI+NB ) +* +* Set A(1:kk,kk+1:n) to zero. +* + DO 20 J = KK + 1, N + DO 10 I = 1, KK + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the last or only block. +* + IF( KK.LT.N ) + $ CALL CUNG2R( M-KK, N-KK, K-KK, A( KK+1, KK+1 ), LDA, + $ TAU( KK+1 ), WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = KI + 1, 1, -NB + IB = MIN( NB, K-I+1 ) + IF( I+IB.LE.N ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL CLARFT( 'Forward', 'Columnwise', M-I+1, IB, + $ A( I, I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(i:m,i+ib:n) from the left +* + CALL CLARFB( 'Left', 'No transpose', 'Forward', + $ 'Columnwise', M-I+1, N-I-IB+1, IB, + $ A( I, I ), LDA, WORK, LDWORK, A( I, I+IB ), + $ LDA, WORK( IB+1 ), LDWORK ) + END IF +* +* Apply H to rows i:m of current block +* + CALL CUNG2R( M-I+1, IB, IB, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* +* Set rows 1:i-1 of current block to zero +* + DO 40 J = I, I + IB - 1 + DO 30 L = 1, I - 1 + A( L, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of CUNGQR +* + END diff --git a/costa/native/external/lapack/cungr2.f b/costa/native/external/lapack/cungr2.f new file mode 100644 index 000000000..8cf1cd309 --- /dev/null +++ b/costa/native/external/lapack/cungr2.f @@ -0,0 +1,135 @@ + SUBROUTINE CUNGR2( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CUNGR2 generates an m by n complex matrix Q with orthonormal rows, +* which is defined as the last m rows of a product of k elementary +* reflectors of order n +* +* Q = H(1)' H(2)' . . . H(k)' +* +* as returned by CGERQF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. N >= M. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. M >= K >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the (m-k+i)-th row must contain the vector which +* defines the elementary reflector H(i), for i = 1,2,...,k, as +* returned by CGERQF in the last k rows of its array argument +* A. +* On exit, the m-by-n matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) COMPLEX array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by CGERQF. +* +* WORK (workspace) COMPLEX array, dimension (M) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, II, J, L +* .. +* .. External Subroutines .. + EXTERNAL CLACGV, CLARF, CSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNGR2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.LE.0 ) + $ RETURN +* + IF( K.LT.M ) THEN +* +* Initialise rows 1:m-k to rows of the unit matrix +* + DO 20 J = 1, N + DO 10 L = 1, M - K + A( L, J ) = ZERO + 10 CONTINUE + IF( J.GT.N-M .AND. J.LE.N-K ) + $ A( M-N+J, J ) = ONE + 20 CONTINUE + END IF +* + DO 40 I = 1, K + II = M - K + I +* +* Apply H(i)' to A(1:m-k+i,1:n-k+i) from the right +* + CALL CLACGV( N-M+II-1, A( II, 1 ), LDA ) + A( II, N-M+II ) = ONE + CALL CLARF( 'Right', II-1, N-M+II, A( II, 1 ), LDA, + $ CONJG( TAU( I ) ), A, LDA, WORK ) + CALL CSCAL( N-M+II-1, -TAU( I ), A( II, 1 ), LDA ) + CALL CLACGV( N-M+II-1, A( II, 1 ), LDA ) + A( II, N-M+II ) = ONE - CONJG( TAU( I ) ) +* +* Set A(m-k+i,n-k+i+1:n) to zero +* + DO 30 L = N - M + II + 1, N + A( II, L ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of CUNGR2 +* + END diff --git a/costa/native/external/lapack/cungrq.f b/costa/native/external/lapack/cungrq.f new file mode 100644 index 000000000..a9e07e114 --- /dev/null +++ b/costa/native/external/lapack/cungrq.f @@ -0,0 +1,215 @@ + SUBROUTINE CUNGRQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CUNGRQ generates an M-by-N complex matrix Q with orthonormal rows, +* which is defined as the last M rows of a product of K elementary +* reflectors of order N +* +* Q = H(1)' H(2)' . . . H(k)' +* +* as returned by CGERQF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. N >= M. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. M >= K >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the (m-k+i)-th row must contain the vector which +* defines the elementary reflector H(i), for i = 1,2,...,k, as +* returned by CGERQF in the last k rows of its array argument +* A. +* On exit, the M-by-N matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) COMPLEX array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by CGERQF. +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,M). +* For optimum performance LWORK >= M*NB, where NB is the +* optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, II, IINFO, IWS, J, KK, L, LDWORK, + $ LWKOPT, NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL CLARFB, CLARFT, CUNGR2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'CUNGRQ', ' ', M, N, K, -1 ) + LWKOPT = MAX( 1, M )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNGRQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.LE.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = M + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'CUNGRQ', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CUNGRQ', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the first block. +* The last kk rows are handled by the block method. +* + KK = MIN( K, ( ( K-NX+NB-1 ) / NB )*NB ) +* +* Set A(1:m-kk,n-kk+1:n) to zero. +* + DO 20 J = N - KK + 1, N + DO 10 I = 1, M - KK + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the first or only block. +* + CALL CUNGR2( M-KK, N-KK, K-KK, A, LDA, TAU, WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = K - KK + 1, K, NB + IB = MIN( NB, K-I+1 ) + II = M - K + I + IF( II.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL CLARFT( 'Backward', 'Rowwise', N-K+I+IB-1, IB, + $ A( II, 1 ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H' to A(1:m-k+i-1,1:n-k+i+ib-1) from the right +* + CALL CLARFB( 'Right', 'Conjugate transpose', 'Backward', + $ 'Rowwise', II-1, N-K+I+IB-1, IB, A( II, 1 ), + $ LDA, WORK, LDWORK, A, LDA, WORK( IB+1 ), + $ LDWORK ) + END IF +* +* Apply H' to columns 1:n-k+i+ib-1 of current block +* + CALL CUNGR2( IB, N-K+I+IB-1, IB, A( II, 1 ), LDA, TAU( I ), + $ WORK, IINFO ) +* +* Set columns n-k+i+ib:n of current block to zero +* + DO 40 L = N - K + I + IB, N + DO 30 J = II, II + IB - 1 + A( J, L ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of CUNGRQ +* + END diff --git a/costa/native/external/lapack/cungtr.f b/costa/native/external/lapack/cungtr.f new file mode 100644 index 000000000..a2ea7a680 --- /dev/null +++ b/costa/native/external/lapack/cungtr.f @@ -0,0 +1,185 @@ + SUBROUTINE CUNGTR( UPLO, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CUNGTR generates a complex unitary matrix Q which is defined as the +* product of n-1 elementary reflectors of order N, as returned by +* CHETRD: +* +* if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), +* +* if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A contains elementary reflectors +* from CHETRD; +* = 'L': Lower triangle of A contains elementary reflectors +* from CHETRD. +* +* N (input) INTEGER +* The order of the matrix Q. N >= 0. +* +* A (input/output) COMPLEX array, dimension (LDA,N) +* On entry, the vectors which define the elementary reflectors, +* as returned by CHETRD. +* On exit, the N-by-N unitary matrix Q. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= N. +* +* TAU (input) COMPLEX array, dimension (N-1) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by CHETRD. +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= N-1. +* For optimum performance LWORK >= (N-1)*NB, where NB is +* the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER I, IINFO, J, LWKOPT, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL CUNGQL, CUNGQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, N-1 ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN + IF ( UPPER ) THEN + NB = ILAENV( 1, 'CUNGQL', ' ', N-1, N-1, N-1, -1 ) + ELSE + NB = ILAENV( 1, 'CUNGQR', ' ', N-1, N-1, N-1, -1 ) + END IF + LWKOPT = MAX( 1, N-1 )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNGTR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( UPPER ) THEN +* +* Q was determined by a call to CHETRD with UPLO = 'U' +* +* Shift the vectors which define the elementary reflectors one +* column to the left, and set the last row and column of Q to +* those of the unit matrix +* + DO 20 J = 1, N - 1 + DO 10 I = 1, J - 1 + A( I, J ) = A( I, J+1 ) + 10 CONTINUE + A( N, J ) = ZERO + 20 CONTINUE + DO 30 I = 1, N - 1 + A( I, N ) = ZERO + 30 CONTINUE + A( N, N ) = ONE +* +* Generate Q(1:n-1,1:n-1) +* + CALL CUNGQL( N-1, N-1, N-1, A, LDA, TAU, WORK, LWORK, IINFO ) +* + ELSE +* +* Q was determined by a call to CHETRD with UPLO = 'L'. +* +* Shift the vectors which define the elementary reflectors one +* column to the right, and set the first row and column of Q to +* those of the unit matrix +* + DO 50 J = N, 2, -1 + A( 1, J ) = ZERO + DO 40 I = J + 1, N + A( I, J ) = A( I, J-1 ) + 40 CONTINUE + 50 CONTINUE + A( 1, 1 ) = ONE + DO 60 I = 2, N + A( I, 1 ) = ZERO + 60 CONTINUE + IF( N.GT.1 ) THEN +* +* Generate Q(2:n,2:n) +* + CALL CUNGQR( N-1, N-1, N-1, A( 2, 2 ), LDA, TAU, WORK, + $ LWORK, IINFO ) + END IF + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of CUNGTR +* + END diff --git a/costa/native/external/lapack/cunm2l.f b/costa/native/external/lapack/cunm2l.f new file mode 100644 index 000000000..dbf85c57e --- /dev/null +++ b/costa/native/external/lapack/cunm2l.f @@ -0,0 +1,197 @@ + SUBROUTINE CUNM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CUNM2L overwrites the general complex m-by-n matrix C with +* +* Q * C if SIDE = 'L' and TRANS = 'N', or +* +* Q'* C if SIDE = 'L' and TRANS = 'C', or +* +* C * Q if SIDE = 'R' and TRANS = 'N', or +* +* C * Q' if SIDE = 'R' and TRANS = 'C', +* +* where Q is a complex unitary matrix defined as the product of k +* elementary reflectors +* +* Q = H(k) . . . H(2) H(1) +* +* as returned by CGEQLF. Q is of order m if SIDE = 'L' and of order n +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q' from the Left +* = 'R': apply Q or Q' from the Right +* +* TRANS (input) CHARACTER*1 +* = 'N': apply Q (No transpose) +* = 'C': apply Q' (Conjugate transpose) +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* A (input) COMPLEX array, dimension (LDA,K) +* The i-th column must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* CGEQLF in the last k columns of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* If SIDE = 'L', LDA >= max(1,M); +* if SIDE = 'R', LDA >= max(1,N). +* +* TAU (input) COMPLEX array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by CGEQLF. +* +* C (input/output) COMPLEX array, dimension (LDC,N) +* On entry, the m-by-n matrix C. +* On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) COMPLEX array, dimension +* (N) if SIDE = 'L', +* (M) if SIDE = 'R' +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, MI, NI, NQ + COMPLEX AII, TAUI +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLARF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNM2L', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. NOTRAN .OR. .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) or H(i)' is applied to C(1:m-k+i,1:n) +* + MI = M - K + I + ELSE +* +* H(i) or H(i)' is applied to C(1:m,1:n-k+i) +* + NI = N - K + I + END IF +* +* Apply H(i) or H(i)' +* + IF( NOTRAN ) THEN + TAUI = TAU( I ) + ELSE + TAUI = CONJG( TAU( I ) ) + END IF + AII = A( NQ-K+I, I ) + A( NQ-K+I, I ) = ONE + CALL CLARF( SIDE, MI, NI, A( 1, I ), 1, TAUI, C, LDC, WORK ) + A( NQ-K+I, I ) = AII + 10 CONTINUE + RETURN +* +* End of CUNM2L +* + END diff --git a/costa/native/external/lapack/cunm2r.f b/costa/native/external/lapack/cunm2r.f new file mode 100644 index 000000000..b05d13546 --- /dev/null +++ b/costa/native/external/lapack/cunm2r.f @@ -0,0 +1,202 @@ + SUBROUTINE CUNM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CUNM2R overwrites the general complex m-by-n matrix C with +* +* Q * C if SIDE = 'L' and TRANS = 'N', or +* +* Q'* C if SIDE = 'L' and TRANS = 'C', or +* +* C * Q if SIDE = 'R' and TRANS = 'N', or +* +* C * Q' if SIDE = 'R' and TRANS = 'C', +* +* where Q is a complex unitary matrix defined as the product of k +* elementary reflectors +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by CGEQRF. Q is of order m if SIDE = 'L' and of order n +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q' from the Left +* = 'R': apply Q or Q' from the Right +* +* TRANS (input) CHARACTER*1 +* = 'N': apply Q (No transpose) +* = 'C': apply Q' (Conjugate transpose) +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* A (input) COMPLEX array, dimension (LDA,K) +* The i-th column must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* CGEQRF in the first k columns of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* If SIDE = 'L', LDA >= max(1,M); +* if SIDE = 'R', LDA >= max(1,N). +* +* TAU (input) COMPLEX array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by CGEQRF. +* +* C (input/output) COMPLEX array, dimension (LDC,N) +* On entry, the m-by-n matrix C. +* On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) COMPLEX array, dimension +* (N) if SIDE = 'L', +* (M) if SIDE = 'R' +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, IC, JC, MI, NI, NQ + COMPLEX AII, TAUI +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLARF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNM2R', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. .NOT.NOTRAN .OR. .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) or H(i)' is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H(i) or H(i)' is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H(i) or H(i)' +* + IF( NOTRAN ) THEN + TAUI = TAU( I ) + ELSE + TAUI = CONJG( TAU( I ) ) + END IF + AII = A( I, I ) + A( I, I ) = ONE + CALL CLARF( SIDE, MI, NI, A( I, I ), 1, TAUI, C( IC, JC ), LDC, + $ WORK ) + A( I, I ) = AII + 10 CONTINUE + RETURN +* +* End of CUNM2R +* + END diff --git a/costa/native/external/lapack/cunmbr.f b/costa/native/external/lapack/cunmbr.f new file mode 100644 index 000000000..2f93df663 --- /dev/null +++ b/costa/native/external/lapack/cunmbr.f @@ -0,0 +1,282 @@ + SUBROUTINE CUNMBR( VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C, + $ LDC, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS, VECT + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* If VECT = 'Q', CUNMBR overwrites the general complex M-by-N matrix C +* with +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'C': Q**H * C C * Q**H +* +* If VECT = 'P', CUNMBR overwrites the general complex M-by-N matrix C +* with +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': P * C C * P +* TRANS = 'C': P**H * C C * P**H +* +* Here Q and P**H are the unitary matrices determined by CGEBRD when +* reducing a complex matrix A to bidiagonal form: A = Q * B * P**H. Q +* and P**H are defined as products of elementary reflectors H(i) and +* G(i) respectively. +* +* Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the +* order of the unitary matrix Q or P**H that is applied. +* +* If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: +* if nq >= k, Q = H(1) H(2) . . . H(k); +* if nq < k, Q = H(1) H(2) . . . H(nq-1). +* +* If VECT = 'P', A is assumed to have been a K-by-NQ matrix: +* if k < nq, P = G(1) G(2) . . . G(k); +* if k >= nq, P = G(1) G(2) . . . G(nq-1). +* +* Arguments +* ========= +* +* VECT (input) CHARACTER*1 +* = 'Q': apply Q or Q**H; +* = 'P': apply P or P**H. +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q, Q**H, P or P**H from the Left; +* = 'R': apply Q, Q**H, P or P**H from the Right. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q or P; +* = 'C': Conjugate transpose, apply Q**H or P**H. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* If VECT = 'Q', the number of columns in the original +* matrix reduced by CGEBRD. +* If VECT = 'P', the number of rows in the original +* matrix reduced by CGEBRD. +* K >= 0. +* +* A (input) COMPLEX array, dimension +* (LDA,min(nq,K)) if VECT = 'Q' +* (LDA,nq) if VECT = 'P' +* The vectors which define the elementary reflectors H(i) and +* G(i), whose products determine the matrices Q and P, as +* returned by CGEBRD. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* If VECT = 'Q', LDA >= max(1,nq); +* if VECT = 'P', LDA >= max(1,min(nq,K)). +* +* TAU (input) COMPLEX array, dimension (min(nq,K)) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i) or G(i) which determines Q or P, as returned +* by CGEBRD in the array argument TAUQ or TAUP. +* +* C (input/output) COMPLEX array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q +* or P*C or P**H*C or C*P or C*P**H. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If SIDE = 'L', LWORK >= max(1,N); +* if SIDE = 'R', LWORK >= max(1,M). +* For optimum performance LWORK >= N*NB if SIDE = 'L', and +* LWORK >= M*NB if SIDE = 'R', where NB is the optimal +* blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL APPLYQ, LEFT, LQUERY, NOTRAN + CHARACTER TRANST + INTEGER I1, I2, IINFO, LWKOPT, MI, NB, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL CUNMLQ, CUNMQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + APPLYQ = LSAME( VECT, 'Q' ) + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q or P and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.APPLYQ .AND. .NOT.LSAME( VECT, 'P' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( K.LT.0 ) THEN + INFO = -6 + ELSE IF( ( APPLYQ .AND. LDA.LT.MAX( 1, NQ ) ) .OR. + $ ( .NOT.APPLYQ .AND. LDA.LT.MAX( 1, MIN( NQ, K ) ) ) ) + $ THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( APPLYQ ) THEN + IF( LEFT ) THEN + NB = ILAENV( 1, 'CUNMQR', SIDE // TRANS, M-1, N, M-1, + $ -1 ) + ELSE + NB = ILAENV( 1, 'CUNMQR', SIDE // TRANS, M, N-1, N-1, + $ -1 ) + END IF + ELSE + IF( LEFT ) THEN + NB = ILAENV( 1, 'CUNMLQ', SIDE // TRANS, M-1, N, M-1, + $ -1 ) + ELSE + NB = ILAENV( 1, 'CUNMLQ', SIDE // TRANS, M, N-1, N-1, + $ -1 ) + END IF + END IF + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNMBR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + END IF +* +* Quick return if possible +* + WORK( 1 ) = 1 + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* + IF( APPLYQ ) THEN +* +* Apply Q +* + IF( NQ.GE.K ) THEN +* +* Q was determined by a call to CGEBRD with nq >= k +* + CALL CUNMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, IINFO ) + ELSE IF( NQ.GT.1 ) THEN +* +* Q was determined by a call to CGEBRD with nq < k +* + IF( LEFT ) THEN + MI = M - 1 + NI = N + I1 = 2 + I2 = 1 + ELSE + MI = M + NI = N - 1 + I1 = 1 + I2 = 2 + END IF + CALL CUNMQR( SIDE, TRANS, MI, NI, NQ-1, A( 2, 1 ), LDA, TAU, + $ C( I1, I2 ), LDC, WORK, LWORK, IINFO ) + END IF + ELSE +* +* Apply P +* + IF( NOTRAN ) THEN + TRANST = 'C' + ELSE + TRANST = 'N' + END IF + IF( NQ.GT.K ) THEN +* +* P was determined by a call to CGEBRD with nq > k +* + CALL CUNMLQ( SIDE, TRANST, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, IINFO ) + ELSE IF( NQ.GT.1 ) THEN +* +* P was determined by a call to CGEBRD with nq <= k +* + IF( LEFT ) THEN + MI = M - 1 + NI = N + I1 = 2 + I2 = 1 + ELSE + MI = M + NI = N - 1 + I1 = 1 + I2 = 2 + END IF + CALL CUNMLQ( SIDE, TRANST, MI, NI, NQ-1, A( 1, 2 ), LDA, + $ TAU, C( I1, I2 ), LDC, WORK, LWORK, IINFO ) + END IF + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of CUNMBR +* + END diff --git a/costa/native/external/lapack/cunmhr.f b/costa/native/external/lapack/cunmhr.f new file mode 100644 index 000000000..8bbfb57ea --- /dev/null +++ b/costa/native/external/lapack/cunmhr.f @@ -0,0 +1,203 @@ + SUBROUTINE CUNMHR( SIDE, TRANS, M, N, ILO, IHI, A, LDA, TAU, C, + $ LDC, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER IHI, ILO, INFO, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* CUNMHR overwrites the general complex M-by-N matrix C with +* +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'C': Q**H * C C * Q**H +* +* where Q is a complex unitary matrix of order nq, with nq = m if +* SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of +* IHI-ILO elementary reflectors, as returned by CGEHRD: +* +* Q = H(ilo) H(ilo+1) . . . H(ihi-1). +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q**H from the Left; +* = 'R': apply Q or Q**H from the Right. +* +* TRANS (input) CHARACTER*1 +* = 'N': apply Q (No transpose) +* = 'C': apply Q**H (Conjugate transpose) +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* ILO and IHI must have the same values as in the previous call +* of CGEHRD. Q is equal to the unit matrix except in the +* submatrix Q(ilo+1:ihi,ilo+1:ihi). +* If SIDE = 'L', then 1 <= ILO <= IHI <= M, if M > 0, and +* ILO = 1 and IHI = 0, if M = 0; +* if SIDE = 'R', then 1 <= ILO <= IHI <= N, if N > 0, and +* ILO = 1 and IHI = 0, if N = 0. +* +* A (input) COMPLEX array, dimension +* (LDA,M) if SIDE = 'L' +* (LDA,N) if SIDE = 'R' +* The vectors which define the elementary reflectors, as +* returned by CGEHRD. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'. +* +* TAU (input) COMPLEX array, dimension +* (M-1) if SIDE = 'L' +* (N-1) if SIDE = 'R' +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by CGEHRD. +* +* C (input/output) COMPLEX array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If SIDE = 'L', LWORK >= max(1,N); +* if SIDE = 'R', LWORK >= max(1,M). +* For optimum performance LWORK >= N*NB if SIDE = 'L', and +* LWORK >= M*NB if SIDE = 'R', where NB is the optimal +* blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LEFT, LQUERY + INTEGER I1, I2, IINFO, LWKOPT, MI, NB, NH, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL CUNMQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NH = IHI - ILO + LEFT = LSAME( SIDE, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) + $ THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, NQ ) ) THEN + INFO = -5 + ELSE IF( IHI.LT.MIN( ILO, NQ ) .OR. IHI.GT.NQ ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( LEFT ) THEN + NB = ILAENV( 1, 'CUNMQR', SIDE // TRANS, NH, N, NH, -1 ) + ELSE + NB = ILAENV( 1, 'CUNMQR', SIDE // TRANS, M, NH, NH, -1 ) + END IF + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNMHR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. NH.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( LEFT ) THEN + MI = NH + NI = N + I1 = ILO + 1 + I2 = 1 + ELSE + MI = M + NI = NH + I1 = 1 + I2 = ILO + 1 + END IF +* + CALL CUNMQR( SIDE, TRANS, MI, NI, NH, A( ILO+1, ILO ), LDA, + $ TAU( ILO ), C( I1, I2 ), LDC, WORK, LWORK, IINFO ) +* + WORK( 1 ) = LWKOPT + RETURN +* +* End of CUNMHR +* + END diff --git a/costa/native/external/lapack/cunml2.f b/costa/native/external/lapack/cunml2.f new file mode 100644 index 000000000..7aca75b94 --- /dev/null +++ b/costa/native/external/lapack/cunml2.f @@ -0,0 +1,206 @@ + SUBROUTINE CUNML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CUNML2 overwrites the general complex m-by-n matrix C with +* +* Q * C if SIDE = 'L' and TRANS = 'N', or +* +* Q'* C if SIDE = 'L' and TRANS = 'C', or +* +* C * Q if SIDE = 'R' and TRANS = 'N', or +* +* C * Q' if SIDE = 'R' and TRANS = 'C', +* +* where Q is a complex unitary matrix defined as the product of k +* elementary reflectors +* +* Q = H(k)' . . . H(2)' H(1)' +* +* as returned by CGELQF. Q is of order m if SIDE = 'L' and of order n +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q' from the Left +* = 'R': apply Q or Q' from the Right +* +* TRANS (input) CHARACTER*1 +* = 'N': apply Q (No transpose) +* = 'C': apply Q' (Conjugate transpose) +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* A (input) COMPLEX array, dimension +* (LDA,M) if SIDE = 'L', +* (LDA,N) if SIDE = 'R' +* The i-th row must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* CGELQF in the first k rows of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,K). +* +* TAU (input) COMPLEX array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by CGELQF. +* +* C (input/output) COMPLEX array, dimension (LDC,N) +* On entry, the m-by-n matrix C. +* On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) COMPLEX array, dimension +* (N) if SIDE = 'L', +* (M) if SIDE = 'R' +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, IC, JC, MI, NI, NQ + COMPLEX AII, TAUI +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLACGV, CLARF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNML2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. NOTRAN .OR. .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) or H(i)' is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H(i) or H(i)' is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H(i) or H(i)' +* + IF( NOTRAN ) THEN + TAUI = CONJG( TAU( I ) ) + ELSE + TAUI = TAU( I ) + END IF + IF( I.LT.NQ ) + $ CALL CLACGV( NQ-I, A( I, I+1 ), LDA ) + AII = A( I, I ) + A( I, I ) = ONE + CALL CLARF( SIDE, MI, NI, A( I, I ), LDA, TAUI, C( IC, JC ), + $ LDC, WORK ) + A( I, I ) = AII + IF( I.LT.NQ ) + $ CALL CLACGV( NQ-I, A( I, I+1 ), LDA ) + 10 CONTINUE + RETURN +* +* End of CUNML2 +* + END diff --git a/costa/native/external/lapack/cunmlq.f b/costa/native/external/lapack/cunmlq.f new file mode 100644 index 000000000..1d420ea0a --- /dev/null +++ b/costa/native/external/lapack/cunmlq.f @@ -0,0 +1,269 @@ + SUBROUTINE CUNMLQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* CUNMLQ overwrites the general complex M-by-N matrix C with +* +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'C': Q**H * C C * Q**H +* +* where Q is a complex unitary matrix defined as the product of k +* elementary reflectors +* +* Q = H(k)' . . . H(2)' H(1)' +* +* as returned by CGELQF. Q is of order M if SIDE = 'L' and of order N +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q**H from the Left; +* = 'R': apply Q or Q**H from the Right. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q; +* = 'C': Conjugate transpose, apply Q**H. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* A (input) COMPLEX array, dimension +* (LDA,M) if SIDE = 'L', +* (LDA,N) if SIDE = 'R' +* The i-th row must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* CGELQF in the first k rows of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,K). +* +* TAU (input) COMPLEX array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by CGELQF. +* +* C (input/output) COMPLEX array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If SIDE = 'L', LWORK >= max(1,N); +* if SIDE = 'R', LWORK >= max(1,M). +* For optimum performance LWORK >= N*NB if SIDE 'L', and +* LWORK >= M*NB if SIDE = 'R', where NB is the optimal +* blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT + PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + CHARACTER TRANST + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK, + $ LWKOPT, MI, NB, NBMIN, NI, NQ, NW +* .. +* .. Local Arrays .. + COMPLEX T( LDT, NBMAX ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CLARFB, CLARFT, CUNML2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. NB may be at most NBMAX, where NBMAX +* is used to define the local array T. +* + NB = MIN( NBMAX, ILAENV( 1, 'CUNMLQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNMLQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IWS = NW*NB + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CUNMLQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + ELSE + IWS = NW + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL CUNML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, + $ IINFO ) + ELSE +* +* Use blocked code +* + IF( ( LEFT .AND. NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + IF( NOTRAN ) THEN + TRANST = 'C' + ELSE + TRANST = 'N' + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL CLARFT( 'Forward', 'Rowwise', NQ-I+1, IB, A( I, I ), + $ LDA, TAU( I ), T, LDT ) + IF( LEFT ) THEN +* +* H or H' is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H or H' is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H or H' +* + CALL CLARFB( SIDE, TRANST, 'Forward', 'Rowwise', MI, NI, IB, + $ A( I, I ), LDA, T, LDT, C( IC, JC ), LDC, WORK, + $ LDWORK ) + 10 CONTINUE + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of CUNMLQ +* + END diff --git a/costa/native/external/lapack/cunmql.f b/costa/native/external/lapack/cunmql.f new file mode 100644 index 000000000..cbaca868f --- /dev/null +++ b/costa/native/external/lapack/cunmql.f @@ -0,0 +1,258 @@ + SUBROUTINE CUNMQL( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* CUNMQL overwrites the general complex M-by-N matrix C with +* +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'C': Q**H * C C * Q**H +* +* where Q is a complex unitary matrix defined as the product of k +* elementary reflectors +* +* Q = H(k) . . . H(2) H(1) +* +* as returned by CGEQLF. Q is of order M if SIDE = 'L' and of order N +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q**H from the Left; +* = 'R': apply Q or Q**H from the Right. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q; +* = 'C': Transpose, apply Q**H. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* A (input) COMPLEX array, dimension (LDA,K) +* The i-th column must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* CGEQLF in the last k columns of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* If SIDE = 'L', LDA >= max(1,M); +* if SIDE = 'R', LDA >= max(1,N). +* +* TAU (input) COMPLEX array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by CGEQLF. +* +* C (input/output) COMPLEX array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If SIDE = 'L', LWORK >= max(1,N); +* if SIDE = 'R', LWORK >= max(1,M). +* For optimum performance LWORK >= N*NB if SIDE = 'L', and +* LWORK >= M*NB if SIDE = 'R', where NB is the optimal +* blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT + PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + INTEGER I, I1, I2, I3, IB, IINFO, IWS, LDWORK, LWKOPT, + $ MI, NB, NBMIN, NI, NQ, NW +* .. +* .. Local Arrays .. + COMPLEX T( LDT, NBMAX ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CLARFB, CLARFT, CUNM2L, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. NB may be at most NBMAX, where NBMAX +* is used to define the local array T. +* + NB = MIN( NBMAX, ILAENV( 1, 'CUNMQL', SIDE // TRANS, M, N, K, + $ -1 ) ) + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNMQL', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IWS = NW*NB + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CUNMQL', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + ELSE + IWS = NW + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL CUNM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, + $ IINFO ) + ELSE +* +* Use blocked code +* + IF( ( LEFT .AND. NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL CLARFT( 'Backward', 'Columnwise', NQ-K+I+IB-1, IB, + $ A( 1, I ), LDA, TAU( I ), T, LDT ) + IF( LEFT ) THEN +* +* H or H' is applied to C(1:m-k+i+ib-1,1:n) +* + MI = M - K + I + IB - 1 + ELSE +* +* H or H' is applied to C(1:m,1:n-k+i+ib-1) +* + NI = N - K + I + IB - 1 + END IF +* +* Apply H or H' +* + CALL CLARFB( SIDE, TRANS, 'Backward', 'Columnwise', MI, NI, + $ IB, A( 1, I ), LDA, T, LDT, C, LDC, WORK, + $ LDWORK ) + 10 CONTINUE + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of CUNMQL +* + END diff --git a/costa/native/external/lapack/cunmqr.f b/costa/native/external/lapack/cunmqr.f new file mode 100644 index 000000000..0c2aa4563 --- /dev/null +++ b/costa/native/external/lapack/cunmqr.f @@ -0,0 +1,262 @@ + SUBROUTINE CUNMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* CUNMQR overwrites the general complex M-by-N matrix C with +* +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'C': Q**H * C C * Q**H +* +* where Q is a complex unitary matrix defined as the product of k +* elementary reflectors +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by CGEQRF. Q is of order M if SIDE = 'L' and of order N +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q**H from the Left; +* = 'R': apply Q or Q**H from the Right. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q; +* = 'C': Conjugate transpose, apply Q**H. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* A (input) COMPLEX array, dimension (LDA,K) +* The i-th column must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* CGEQRF in the first k columns of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* If SIDE = 'L', LDA >= max(1,M); +* if SIDE = 'R', LDA >= max(1,N). +* +* TAU (input) COMPLEX array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by CGEQRF. +* +* C (input/output) COMPLEX array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If SIDE = 'L', LWORK >= max(1,N); +* if SIDE = 'R', LWORK >= max(1,M). +* For optimum performance LWORK >= N*NB if SIDE = 'L', and +* LWORK >= M*NB if SIDE = 'R', where NB is the optimal +* blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT + PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK, + $ LWKOPT, MI, NB, NBMIN, NI, NQ, NW +* .. +* .. Local Arrays .. + COMPLEX T( LDT, NBMAX ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CLARFB, CLARFT, CUNM2R, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. NB may be at most NBMAX, where NBMAX +* is used to define the local array T. +* + NB = MIN( NBMAX, ILAENV( 1, 'CUNMQR', SIDE // TRANS, M, N, K, + $ -1 ) ) + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNMQR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IWS = NW*NB + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CUNMQR', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + ELSE + IWS = NW + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL CUNM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, + $ IINFO ) + ELSE +* +* Use blocked code +* + IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL CLARFT( 'Forward', 'Columnwise', NQ-I+1, IB, A( I, I ), + $ LDA, TAU( I ), T, LDT ) + IF( LEFT ) THEN +* +* H or H' is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H or H' is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H or H' +* + CALL CLARFB( SIDE, TRANS, 'Forward', 'Columnwise', MI, NI, + $ IB, A( I, I ), LDA, T, LDT, C( IC, JC ), LDC, + $ WORK, LDWORK ) + 10 CONTINUE + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of CUNMQR +* + END diff --git a/costa/native/external/lapack/cunmr2.f b/costa/native/external/lapack/cunmr2.f new file mode 100644 index 000000000..7fc45626e --- /dev/null +++ b/costa/native/external/lapack/cunmr2.f @@ -0,0 +1,199 @@ + SUBROUTINE CUNMR2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CUNMR2 overwrites the general complex m-by-n matrix C with +* +* Q * C if SIDE = 'L' and TRANS = 'N', or +* +* Q'* C if SIDE = 'L' and TRANS = 'C', or +* +* C * Q if SIDE = 'R' and TRANS = 'N', or +* +* C * Q' if SIDE = 'R' and TRANS = 'C', +* +* where Q is a complex unitary matrix defined as the product of k +* elementary reflectors +* +* Q = H(1)' H(2)' . . . H(k)' +* +* as returned by CGERQF. Q is of order m if SIDE = 'L' and of order n +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q' from the Left +* = 'R': apply Q or Q' from the Right +* +* TRANS (input) CHARACTER*1 +* = 'N': apply Q (No transpose) +* = 'C': apply Q' (Conjugate transpose) +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* A (input) COMPLEX array, dimension +* (LDA,M) if SIDE = 'L', +* (LDA,N) if SIDE = 'R' +* The i-th row must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* CGERQF in the last k rows of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,K). +* +* TAU (input) COMPLEX array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by CGERQF. +* +* C (input/output) COMPLEX array, dimension (LDC,N) +* On entry, the m-by-n matrix C. +* On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) COMPLEX array, dimension +* (N) if SIDE = 'L', +* (M) if SIDE = 'R' +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, MI, NI, NQ + COMPLEX AII, TAUI +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLACGV, CLARF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNMR2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. .NOT.NOTRAN .OR. .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) or H(i)' is applied to C(1:m-k+i,1:n) +* + MI = M - K + I + ELSE +* +* H(i) or H(i)' is applied to C(1:m,1:n-k+i) +* + NI = N - K + I + END IF +* +* Apply H(i) or H(i)' +* + IF( NOTRAN ) THEN + TAUI = CONJG( TAU( I ) ) + ELSE + TAUI = TAU( I ) + END IF + CALL CLACGV( NQ-K+I-1, A( I, 1 ), LDA ) + AII = A( I, NQ-K+I ) + A( I, NQ-K+I ) = ONE + CALL CLARF( SIDE, MI, NI, A( I, 1 ), LDA, TAUI, C, LDC, WORK ) + A( I, NQ-K+I ) = AII + CALL CLACGV( NQ-K+I-1, A( I, 1 ), LDA ) + 10 CONTINUE + RETURN +* +* End of CUNMR2 +* + END diff --git a/costa/native/external/lapack/cunmr3.f b/costa/native/external/lapack/cunmr3.f new file mode 100644 index 000000000..38c8eca46 --- /dev/null +++ b/costa/native/external/lapack/cunmr3.f @@ -0,0 +1,213 @@ + SUBROUTINE CUNMR3( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, L, LDA, LDC, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CUNMR3 overwrites the general complex m by n matrix C with +* +* Q * C if SIDE = 'L' and TRANS = 'N', or +* +* Q'* C if SIDE = 'L' and TRANS = 'C', or +* +* C * Q if SIDE = 'R' and TRANS = 'N', or +* +* C * Q' if SIDE = 'R' and TRANS = 'C', +* +* where Q is a complex unitary matrix defined as the product of k +* elementary reflectors +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by CTZRZF. Q is of order m if SIDE = 'L' and of order n +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q' from the Left +* = 'R': apply Q or Q' from the Right +* +* TRANS (input) CHARACTER*1 +* = 'N': apply Q (No transpose) +* = 'C': apply Q' (Conjugate transpose) +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* L (input) INTEGER +* The number of columns of the matrix A containing +* the meaningful part of the Householder reflectors. +* If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. +* +* A (input) COMPLEX array, dimension +* (LDA,M) if SIDE = 'L', +* (LDA,N) if SIDE = 'R' +* The i-th row must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* CTZRZF in the last k rows of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,K). +* +* TAU (input) COMPLEX array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by CTZRZF. +* +* C (input/output) COMPLEX array, dimension (LDC,N) +* On entry, the m-by-n matrix C. +* On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) COMPLEX array, dimension +* (N) if SIDE = 'L', +* (M) if SIDE = 'R' +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, IC, JA, JC, MI, NI, NQ + COMPLEX TAUI +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLARZ, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( L.LT.0 .OR. ( LEFT .AND. ( L.GT.M ) ) .OR. + $ ( .NOT.LEFT .AND. ( L.GT.N ) ) ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNMR3', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. .NOT.NOTRAN .OR. .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + JA = M - L + 1 + JC = 1 + ELSE + MI = M + JA = N - L + 1 + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) or H(i)' is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H(i) or H(i)' is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H(i) or H(i)' +* + IF( NOTRAN ) THEN + TAUI = TAU( I ) + ELSE + TAUI = CONJG( TAU( I ) ) + END IF + CALL CLARZ( SIDE, MI, NI, L, A( I, JA ), LDA, TAUI, + $ C( IC, JC ), LDC, WORK ) +* + 10 CONTINUE +* + RETURN +* +* End of CUNMR3 +* + END diff --git a/costa/native/external/lapack/cunmrq.f b/costa/native/external/lapack/cunmrq.f new file mode 100644 index 000000000..89f9b6c37 --- /dev/null +++ b/costa/native/external/lapack/cunmrq.f @@ -0,0 +1,265 @@ + SUBROUTINE CUNMRQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* CUNMRQ overwrites the general complex M-by-N matrix C with +* +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'C': Q**H * C C * Q**H +* +* where Q is a complex unitary matrix defined as the product of k +* elementary reflectors +* +* Q = H(1)' H(2)' . . . H(k)' +* +* as returned by CGERQF. Q is of order M if SIDE = 'L' and of order N +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q**H from the Left; +* = 'R': apply Q or Q**H from the Right. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q; +* = 'C': Transpose, apply Q**H. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* A (input) COMPLEX array, dimension +* (LDA,M) if SIDE = 'L', +* (LDA,N) if SIDE = 'R' +* The i-th row must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* CGERQF in the last k rows of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,K). +* +* TAU (input) COMPLEX array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by CGERQF. +* +* C (input/output) COMPLEX array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If SIDE = 'L', LWORK >= max(1,N); +* if SIDE = 'R', LWORK >= max(1,M). +* For optimum performance LWORK >= N*NB if SIDE = 'L', and +* LWORK >= M*NB if SIDE = 'R', where NB is the optimal +* blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT + PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + CHARACTER TRANST + INTEGER I, I1, I2, I3, IB, IINFO, IWS, LDWORK, LWKOPT, + $ MI, NB, NBMIN, NI, NQ, NW +* .. +* .. Local Arrays .. + COMPLEX T( LDT, NBMAX ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CLARFB, CLARFT, CUNMR2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. NB may be at most NBMAX, where NBMAX +* is used to define the local array T. +* + NB = MIN( NBMAX, ILAENV( 1, 'CUNMRQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNMRQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IWS = NW*NB + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CUNMRQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + ELSE + IWS = NW + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL CUNMR2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, + $ IINFO ) + ELSE +* +* Use blocked code +* + IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + IF( NOTRAN ) THEN + TRANST = 'C' + ELSE + TRANST = 'N' + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL CLARFT( 'Backward', 'Rowwise', NQ-K+I+IB-1, IB, + $ A( I, 1 ), LDA, TAU( I ), T, LDT ) + IF( LEFT ) THEN +* +* H or H' is applied to C(1:m-k+i+ib-1,1:n) +* + MI = M - K + I + IB - 1 + ELSE +* +* H or H' is applied to C(1:m,1:n-k+i+ib-1) +* + NI = N - K + I + IB - 1 + END IF +* +* Apply H or H' +* + CALL CLARFB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI, + $ IB, A( I, 1 ), LDA, T, LDT, C, LDC, WORK, + $ LDWORK ) + 10 CONTINUE + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of CUNMRQ +* + END diff --git a/costa/native/external/lapack/cunmrz.f b/costa/native/external/lapack/cunmrz.f new file mode 100644 index 000000000..b157af671 --- /dev/null +++ b/costa/native/external/lapack/cunmrz.f @@ -0,0 +1,294 @@ + SUBROUTINE CUNMRZ( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, L, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* CUNMRZ overwrites the general complex M-by-N matrix C with +* +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'C': Q**H * C C * Q**H +* +* where Q is a complex unitary matrix defined as the product of k +* elementary reflectors +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by CTZRZF. Q is of order M if SIDE = 'L' and of order N +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q**H from the Left; +* = 'R': apply Q or Q**H from the Right. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q; +* = 'C': Conjugate transpose, apply Q**H. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* L (input) INTEGER +* The number of columns of the matrix A containing +* the meaningful part of the Householder reflectors. +* If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. +* +* A (input) COMPLEX array, dimension +* (LDA,M) if SIDE = 'L', +* (LDA,N) if SIDE = 'R' +* The i-th row must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* CTZRZF in the last k rows of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,K). +* +* TAU (input) COMPLEX array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by CTZRZF. +* +* C (input/output) COMPLEX array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If SIDE = 'L', LWORK >= max(1,N); +* if SIDE = 'R', LWORK >= max(1,M). +* For optimum performance LWORK >= N*NB if SIDE = 'L', and +* LWORK >= M*NB if SIDE = 'R', where NB is the optimal +* blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT + PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + CHARACTER TRANST + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JA, JC, + $ LDWORK, LWKOPT, MI, NB, NBMIN, NI, NQ, NW +* .. +* .. Local Arrays .. + COMPLEX T( LDT, NBMAX ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CLARZB, CLARZT, CUNMR3, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( L.LT.0 .OR. ( LEFT .AND. ( L.GT.M ) ) .OR. + $ ( .NOT.LEFT .AND. ( L.GT.N ) ) ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. NB may be at most NBMAX, where NBMAX +* is used to define the local array T. +* + NB = MIN( NBMAX, ILAENV( 1, 'CUNMRQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNMRZ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Determine the block size. NB may be at most NBMAX, where NBMAX +* is used to define the local array T. +* + NB = MIN( NBMAX, ILAENV( 1, 'CUNMRQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IWS = NW*NB + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CUNMRQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + ELSE + IWS = NW + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL CUNMR3( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC, + $ WORK, IINFO ) + ELSE +* +* Use blocked code +* + IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + JA = M - L + 1 + ELSE + MI = M + IC = 1 + JA = N - L + 1 + END IF +* + IF( NOTRAN ) THEN + TRANST = 'C' + ELSE + TRANST = 'N' + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL CLARZT( 'Backward', 'Rowwise', L, IB, A( I, JA ), LDA, + $ TAU( I ), T, LDT ) +* + IF( LEFT ) THEN +* +* H or H' is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H or H' is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H or H' +* + CALL CLARZB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI, + $ IB, L, A( I, JA ), LDA, T, LDT, C( IC, JC ), + $ LDC, WORK, LDWORK ) + 10 CONTINUE +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of CUNMRZ +* + END diff --git a/costa/native/external/lapack/cunmtr.f b/costa/native/external/lapack/cunmtr.f new file mode 100644 index 000000000..aa2992474 --- /dev/null +++ b/costa/native/external/lapack/cunmtr.f @@ -0,0 +1,224 @@ + SUBROUTINE CUNMTR( SIDE, UPLO, TRANS, M, N, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS, UPLO + INTEGER INFO, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* CUNMTR overwrites the general complex M-by-N matrix C with +* +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'C': Q**H * C C * Q**H +* +* where Q is a complex unitary matrix of order nq, with nq = m if +* SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of +* nq-1 elementary reflectors, as returned by CHETRD: +* +* if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); +* +* if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q**H from the Left; +* = 'R': apply Q or Q**H from the Right. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A contains elementary reflectors +* from CHETRD; +* = 'L': Lower triangle of A contains elementary reflectors +* from CHETRD. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q; +* = 'C': Conjugate transpose, apply Q**H. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* A (input) COMPLEX array, dimension +* (LDA,M) if SIDE = 'L' +* (LDA,N) if SIDE = 'R' +* The vectors which define the elementary reflectors, as +* returned by CHETRD. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'. +* +* TAU (input) COMPLEX array, dimension +* (M-1) if SIDE = 'L' +* (N-1) if SIDE = 'R' +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by CHETRD. +* +* C (input/output) COMPLEX array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace/output) COMPLEX array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If SIDE = 'L', LWORK >= max(1,N); +* if SIDE = 'R', LWORK >= max(1,M). +* For optimum performance LWORK >= N*NB if SIDE = 'L', and +* LWORK >=M*NB if SIDE = 'R', where NB is the optimal +* blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, UPPER + INTEGER I1, I2, IINFO, LWKOPT, MI, NB, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL CUNMQL, CUNMQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) + $ THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( UPPER ) THEN + IF( LEFT ) THEN + NB = ILAENV( 1, 'CUNMQL', SIDE // TRANS, M-1, N, M-1, + $ -1 ) + ELSE + NB = ILAENV( 1, 'CUNMQL', SIDE // TRANS, M, N-1, N-1, + $ -1 ) + END IF + ELSE + IF( LEFT ) THEN + NB = ILAENV( 1, 'CUNMQR', SIDE // TRANS, M-1, N, M-1, + $ -1 ) + ELSE + NB = ILAENV( 1, 'CUNMQR', SIDE // TRANS, M, N-1, N-1, + $ -1 ) + END IF + END IF + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNMTR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. NQ.EQ.1 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( LEFT ) THEN + MI = M - 1 + NI = N + ELSE + MI = M + NI = N - 1 + END IF +* + IF( UPPER ) THEN +* +* Q was determined by a call to CHETRD with UPLO = 'U' +* + CALL CUNMQL( SIDE, TRANS, MI, NI, NQ-1, A( 1, 2 ), LDA, TAU, C, + $ LDC, WORK, LWORK, IINFO ) + ELSE +* +* Q was determined by a call to CHETRD with UPLO = 'L' +* + IF( LEFT ) THEN + I1 = 2 + I2 = 1 + ELSE + I1 = 1 + I2 = 2 + END IF + CALL CUNMQR( SIDE, TRANS, MI, NI, NQ-1, A( 2, 1 ), LDA, TAU, + $ C( I1, I2 ), LDC, WORK, LWORK, IINFO ) + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of CUNMTR +* + END diff --git a/costa/native/external/lapack/cupgtr.f b/costa/native/external/lapack/cupgtr.f new file mode 100644 index 000000000..82e84b611 --- /dev/null +++ b/costa/native/external/lapack/cupgtr.f @@ -0,0 +1,162 @@ + SUBROUTINE CUPGTR( UPLO, N, AP, TAU, Q, LDQ, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDQ, N +* .. +* .. Array Arguments .. + COMPLEX AP( * ), Q( LDQ, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CUPGTR generates a complex unitary matrix Q which is defined as the +* product of n-1 elementary reflectors H(i) of order n, as returned by +* CHPTRD using packed storage: +* +* if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), +* +* if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangular packed storage used in previous +* call to CHPTRD; +* = 'L': Lower triangular packed storage used in previous +* call to CHPTRD. +* +* N (input) INTEGER +* The order of the matrix Q. N >= 0. +* +* AP (input) COMPLEX array, dimension (N*(N+1)/2) +* The vectors which define the elementary reflectors, as +* returned by CHPTRD. +* +* TAU (input) COMPLEX array, dimension (N-1) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by CHPTRD. +* +* Q (output) COMPLEX array, dimension (LDQ,N) +* The N-by-N unitary matrix Q. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= max(1,N). +* +* WORK (workspace) COMPLEX array, dimension (N-1) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IINFO, IJ, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CUNG2L, CUNG2R, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUPGTR', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Q was determined by a call to CHPTRD with UPLO = 'U' +* +* Unpack the vectors which define the elementary reflectors and +* set the last row and column of Q equal to those of the unit +* matrix +* + IJ = 2 + DO 20 J = 1, N - 1 + DO 10 I = 1, J - 1 + Q( I, J ) = AP( IJ ) + IJ = IJ + 1 + 10 CONTINUE + IJ = IJ + 2 + Q( N, J ) = CZERO + 20 CONTINUE + DO 30 I = 1, N - 1 + Q( I, N ) = CZERO + 30 CONTINUE + Q( N, N ) = CONE +* +* Generate Q(1:n-1,1:n-1) +* + CALL CUNG2L( N-1, N-1, N-1, Q, LDQ, TAU, WORK, IINFO ) +* + ELSE +* +* Q was determined by a call to CHPTRD with UPLO = 'L'. +* +* Unpack the vectors which define the elementary reflectors and +* set the first row and column of Q equal to those of the unit +* matrix +* + Q( 1, 1 ) = CONE + DO 40 I = 2, N + Q( I, 1 ) = CZERO + 40 CONTINUE + IJ = 3 + DO 60 J = 2, N + Q( 1, J ) = CZERO + DO 50 I = J + 1, N + Q( I, J ) = AP( IJ ) + IJ = IJ + 1 + 50 CONTINUE + IJ = IJ + 2 + 60 CONTINUE + IF( N.GT.1 ) THEN +* +* Generate Q(2:n,2:n) +* + CALL CUNG2R( N-1, N-1, N-1, Q( 2, 2 ), LDQ, TAU, WORK, + $ IINFO ) + END IF + END IF + RETURN +* +* End of CUPGTR +* + END diff --git a/costa/native/external/lapack/cupmtr.f b/costa/native/external/lapack/cupmtr.f new file mode 100644 index 000000000..d9013f769 --- /dev/null +++ b/costa/native/external/lapack/cupmtr.f @@ -0,0 +1,268 @@ + SUBROUTINE CUPMTR( SIDE, UPLO, TRANS, M, N, AP, TAU, C, LDC, WORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS, UPLO + INTEGER INFO, LDC, M, N +* .. +* .. Array Arguments .. + COMPLEX AP( * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* CUPMTR overwrites the general complex M-by-N matrix C with +* +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'C': Q**H * C C * Q**H +* +* where Q is a complex unitary matrix of order nq, with nq = m if +* SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of +* nq-1 elementary reflectors, as returned by CHPTRD using packed +* storage: +* +* if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); +* +* if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q**H from the Left; +* = 'R': apply Q or Q**H from the Right. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangular packed storage used in previous +* call to CHPTRD; +* = 'L': Lower triangular packed storage used in previous +* call to CHPTRD. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q; +* = 'C': Conjugate transpose, apply Q**H. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* AP (input) COMPLEX array, dimension +* (M*(M+1)/2) if SIDE = 'L' +* (N*(N+1)/2) if SIDE = 'R' +* The vectors which define the elementary reflectors, as +* returned by CHPTRD. AP is modified by the routine but +* restored on exit. +* +* TAU (input) COMPLEX array, dimension (M-1) if SIDE = 'L' +* or (N-1) if SIDE = 'R' +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by CHPTRD. +* +* C (input/output) COMPLEX array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) COMPLEX array, dimension +* (N) if SIDE = 'L' +* (M) if SIDE = 'R' +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL FORWRD, LEFT, NOTRAN, UPPER + INTEGER I, I1, I2, I3, IC, II, JC, MI, NI, NQ + COMPLEX AII, TAUI +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLARF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + UPPER = LSAME( UPLO, 'U' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUPMTR', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Q was determined by a call to CHPTRD with UPLO = 'U' +* + FORWRD = ( LEFT .AND. NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) +* + IF( FORWRD ) THEN + I1 = 1 + I2 = NQ - 1 + I3 = 1 + II = 2 + ELSE + I1 = NQ - 1 + I2 = 1 + I3 = -1 + II = NQ*( NQ+1 ) / 2 - 1 + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) or H(i)' is applied to C(1:i,1:n) +* + MI = I + ELSE +* +* H(i) or H(i)' is applied to C(1:m,1:i) +* + NI = I + END IF +* +* Apply H(i) or H(i)' +* + IF( NOTRAN ) THEN + TAUI = TAU( I ) + ELSE + TAUI = CONJG( TAU( I ) ) + END IF + AII = AP( II ) + AP( II ) = ONE + CALL CLARF( SIDE, MI, NI, AP( II-I+1 ), 1, TAUI, C, LDC, + $ WORK ) + AP( II ) = AII +* + IF( FORWRD ) THEN + II = II + I + 2 + ELSE + II = II - I - 1 + END IF + 10 CONTINUE + ELSE +* +* Q was determined by a call to CHPTRD with UPLO = 'L'. +* + FORWRD = ( LEFT .AND. .NOT.NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. NOTRAN ) +* + IF( FORWRD ) THEN + I1 = 1 + I2 = NQ - 1 + I3 = 1 + II = 2 + ELSE + I1 = NQ - 1 + I2 = 1 + I3 = -1 + II = NQ*( NQ+1 ) / 2 - 1 + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 20 I = I1, I2, I3 + AII = AP( II ) + AP( II ) = ONE + IF( LEFT ) THEN +* +* H(i) or H(i)' is applied to C(i+1:m,1:n) +* + MI = M - I + IC = I + 1 + ELSE +* +* H(i) or H(i)' is applied to C(1:m,i+1:n) +* + NI = N - I + JC = I + 1 + END IF +* +* Apply H(i) or H(i)' +* + IF( NOTRAN ) THEN + TAUI = TAU( I ) + ELSE + TAUI = CONJG( TAU( I ) ) + END IF + CALL CLARF( SIDE, MI, NI, AP( II ), 1, TAUI, C( IC, JC ), + $ LDC, WORK ) + AP( II ) = AII +* + IF( FORWRD ) THEN + II = II + NQ - I + 1 + ELSE + II = II - NQ + I - 2 + END IF + 20 CONTINUE + END IF + RETURN +* +* End of CUPMTR +* + END diff --git a/costa/native/external/lapack/dbdsdc.f b/costa/native/external/lapack/dbdsdc.f new file mode 100644 index 000000000..17828d161 --- /dev/null +++ b/costa/native/external/lapack/dbdsdc.f @@ -0,0 +1,427 @@ + SUBROUTINE DBDSDC( UPLO, COMPQ, N, D, E, U, LDU, VT, LDVT, Q, IQ, + $ WORK, IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* December 1, 1999 +* +* .. Scalar Arguments .. + CHARACTER COMPQ, UPLO + INTEGER INFO, LDU, LDVT, N +* .. +* .. Array Arguments .. + INTEGER IQ( * ), IWORK( * ) + DOUBLE PRECISION D( * ), E( * ), Q( * ), U( LDU, * ), + $ VT( LDVT, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DBDSDC computes the singular value decomposition (SVD) of a real +* N-by-N (upper or lower) bidiagonal matrix B: B = U * S * VT, +* using a divide and conquer method, where S is a diagonal matrix +* with non-negative diagonal elements (the singular values of B), and +* U and VT are orthogonal matrices of left and right singular vectors, +* respectively. DBDSDC can be used to compute all singular values, +* and optionally, singular vectors or singular vectors in compact form. +* +* This code makes very mild assumptions about floating point +* arithmetic. It will work on machines with a guard digit in +* add/subtract, or on those binary machines without guard digits +* which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. +* It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. See DLASD3 for details. +* +* The code currently call DLASDQ if singular values only are desired. +* However, it can be slightly modified to compute singular values +* using the divide and conquer method. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': B is upper bidiagonal. +* = 'L': B is lower bidiagonal. +* +* COMPQ (input) CHARACTER*1 +* Specifies whether singular vectors are to be computed +* as follows: +* = 'N': Compute singular values only; +* = 'P': Compute singular values and compute singular +* vectors in compact form; +* = 'I': Compute singular values and singular vectors. +* +* N (input) INTEGER +* The order of the matrix B. N >= 0. +* +* D (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the n diagonal elements of the bidiagonal matrix B. +* On exit, if INFO=0, the singular values of B. +* +* E (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the elements of E contain the offdiagonal +* elements of the bidiagonal matrix whose SVD is desired. +* On exit, E has been destroyed. +* +* U (output) DOUBLE PRECISION array, dimension (LDU,N) +* If COMPQ = 'I', then: +* On exit, if INFO = 0, U contains the left singular vectors +* of the bidiagonal matrix. +* For other values of COMPQ, U is not referenced. +* +* LDU (input) INTEGER +* The leading dimension of the array U. LDU >= 1. +* If singular vectors are desired, then LDU >= max( 1, N ). +* +* VT (output) DOUBLE PRECISION array, dimension (LDVT,N) +* If COMPQ = 'I', then: +* On exit, if INFO = 0, VT' contains the right singular +* vectors of the bidiagonal matrix. +* For other values of COMPQ, VT is not referenced. +* +* LDVT (input) INTEGER +* The leading dimension of the array VT. LDVT >= 1. +* If singular vectors are desired, then LDVT >= max( 1, N ). +* +* Q (output) DOUBLE PRECISION array, dimension (LDQ) +* If COMPQ = 'P', then: +* On exit, if INFO = 0, Q and IQ contain the left +* and right singular vectors in a compact form, +* requiring O(N log N) space instead of 2*N**2. +* In particular, Q contains all the DOUBLE PRECISION data in +* LDQ >= N*(11 + 2*SMLSIZ + 8*INT(LOG_2(N/(SMLSIZ+1)))) +* words of memory, where SMLSIZ is returned by ILAENV and +* is equal to the maximum size of the subproblems at the +* bottom of the computation tree (usually about 25). +* For other values of COMPQ, Q is not referenced. +* +* IQ (output) INTEGER array, dimension (LDIQ) +* If COMPQ = 'P', then: +* On exit, if INFO = 0, Q and IQ contain the left +* and right singular vectors in a compact form, +* requiring O(N log N) space instead of 2*N**2. +* In particular, IQ contains all INTEGER data in +* LDIQ >= N*(3 + 3*INT(LOG_2(N/(SMLSIZ+1)))) +* words of memory, where SMLSIZ is returned by ILAENV and +* is equal to the maximum size of the subproblems at the +* bottom of the computation tree (usually about 25). +* For other values of COMPQ, IQ is not referenced. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (LWORK) +* If COMPQ = 'N' then LWORK >= (4 * N). +* If COMPQ = 'P' then LWORK >= (6 * N). +* If COMPQ = 'I' then LWORK >= (3 * N**2 + 4 * N). +* +* IWORK (workspace) INTEGER array, dimension (8*N) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: The algorithm failed to compute an singular value. +* The update process of divide and conquer failed. +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER DIFL, DIFR, GIVCOL, GIVNUM, GIVPTR, I, IC, + $ ICOMPQ, IERR, II, IS, IU, IUPLO, IVT, J, K, KK, + $ MLVL, NM1, NSIZE, PERM, POLES, QSTART, SMLSIZ, + $ SMLSZP, SQRE, START, WSTART, Z + DOUBLE PRECISION CS, EPS, ORGNRM, P, R, SN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANST + EXTERNAL LSAME, ILAENV, DLAMCH, DLANST +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLARTG, DLASCL, DLASD0, DLASDA, DLASDQ, + $ DLASET, DLASR, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, INT, LOG, SIGN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IUPLO = 0 + IF( LSAME( UPLO, 'U' ) ) + $ IUPLO = 1 + IF( LSAME( UPLO, 'L' ) ) + $ IUPLO = 2 + IF( LSAME( COMPQ, 'N' ) ) THEN + ICOMPQ = 0 + ELSE IF( LSAME( COMPQ, 'P' ) ) THEN + ICOMPQ = 1 + ELSE IF( LSAME( COMPQ, 'I' ) ) THEN + ICOMPQ = 2 + ELSE + ICOMPQ = -1 + END IF + IF( IUPLO.EQ.0 ) THEN + INFO = -1 + ELSE IF( ICOMPQ.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ( LDU.LT.1 ) .OR. ( ( ICOMPQ.EQ.2 ) .AND. ( LDU.LT. + $ N ) ) ) THEN + INFO = -7 + ELSE IF( ( LDVT.LT.1 ) .OR. ( ( ICOMPQ.EQ.2 ) .AND. ( LDVT.LT. + $ N ) ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DBDSDC', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + SMLSIZ = ILAENV( 9, 'DBDSDC', ' ', 0, 0, 0, 0 ) + IF( N.EQ.1 ) THEN + IF( ICOMPQ.EQ.1 ) THEN + Q( 1 ) = SIGN( ONE, D( 1 ) ) + Q( 1+SMLSIZ*N ) = ONE + ELSE IF( ICOMPQ.EQ.2 ) THEN + U( 1, 1 ) = SIGN( ONE, D( 1 ) ) + VT( 1, 1 ) = ONE + END IF + D( 1 ) = ABS( D( 1 ) ) + RETURN + END IF + NM1 = N - 1 +* +* If matrix lower bidiagonal, rotate to be upper bidiagonal +* by applying Givens rotations on the left +* + WSTART = 1 + QSTART = 3 + IF( ICOMPQ.EQ.1 ) THEN + CALL DCOPY( N, D, 1, Q( 1 ), 1 ) + CALL DCOPY( N-1, E, 1, Q( N+1 ), 1 ) + END IF + IF( IUPLO.EQ.2 ) THEN + QSTART = 5 + WSTART = 2*N - 1 + DO 10 I = 1, N - 1 + CALL DLARTG( D( I ), E( I ), CS, SN, R ) + D( I ) = R + E( I ) = SN*D( I+1 ) + D( I+1 ) = CS*D( I+1 ) + IF( ICOMPQ.EQ.1 ) THEN + Q( I+2*N ) = CS + Q( I+3*N ) = SN + ELSE IF( ICOMPQ.EQ.2 ) THEN + WORK( I ) = CS + WORK( NM1+I ) = -SN + END IF + 10 CONTINUE + END IF +* +* If ICOMPQ = 0, use DLASDQ to compute the singular values. +* + IF( ICOMPQ.EQ.0 ) THEN + CALL DLASDQ( 'U', 0, N, 0, 0, 0, D, E, VT, LDVT, U, LDU, U, + $ LDU, WORK( WSTART ), INFO ) + GO TO 40 + END IF +* +* If N is smaller than the minimum divide size SMLSIZ, then solve +* the problem with another solver. +* + IF( N.LE.SMLSIZ ) THEN + IF( ICOMPQ.EQ.2 ) THEN + CALL DLASET( 'A', N, N, ZERO, ONE, U, LDU ) + CALL DLASET( 'A', N, N, ZERO, ONE, VT, LDVT ) + CALL DLASDQ( 'U', 0, N, N, N, 0, D, E, VT, LDVT, U, LDU, U, + $ LDU, WORK( WSTART ), INFO ) + ELSE IF( ICOMPQ.EQ.1 ) THEN + IU = 1 + IVT = IU + N + CALL DLASET( 'A', N, N, ZERO, ONE, Q( IU+( QSTART-1 )*N ), + $ N ) + CALL DLASET( 'A', N, N, ZERO, ONE, Q( IVT+( QSTART-1 )*N ), + $ N ) + CALL DLASDQ( 'U', 0, N, N, N, 0, D, E, + $ Q( IVT+( QSTART-1 )*N ), N, + $ Q( IU+( QSTART-1 )*N ), N, + $ Q( IU+( QSTART-1 )*N ), N, WORK( WSTART ), + $ INFO ) + END IF + GO TO 40 + END IF +* + IF( ICOMPQ.EQ.2 ) THEN + CALL DLASET( 'A', N, N, ZERO, ONE, U, LDU ) + CALL DLASET( 'A', N, N, ZERO, ONE, VT, LDVT ) + END IF +* +* Scale. +* + ORGNRM = DLANST( 'M', N, D, E ) + IF( ORGNRM.EQ.ZERO ) + $ RETURN + CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, IERR ) + CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, NM1, 1, E, NM1, IERR ) +* + EPS = DLAMCH( 'Epsilon' ) +* + MLVL = INT( LOG( DBLE( N ) / DBLE( SMLSIZ+1 ) ) / LOG( TWO ) ) + 1 + SMLSZP = SMLSIZ + 1 +* + IF( ICOMPQ.EQ.1 ) THEN + IU = 1 + IVT = 1 + SMLSIZ + DIFL = IVT + SMLSZP + DIFR = DIFL + MLVL + Z = DIFR + MLVL*2 + IC = Z + MLVL + IS = IC + 1 + POLES = IS + 1 + GIVNUM = POLES + 2*MLVL +* + K = 1 + GIVPTR = 2 + PERM = 3 + GIVCOL = PERM + MLVL + END IF +* + DO 20 I = 1, N + IF( ABS( D( I ) ).LT.EPS ) THEN + D( I ) = SIGN( EPS, D( I ) ) + END IF + 20 CONTINUE +* + START = 1 + SQRE = 0 +* + DO 30 I = 1, NM1 + IF( ( ABS( E( I ) ).LT.EPS ) .OR. ( I.EQ.NM1 ) ) THEN +* +* Subproblem found. First determine its size and then +* apply divide and conquer on it. +* + IF( I.LT.NM1 ) THEN +* +* A subproblem with E(I) small for I < NM1. +* + NSIZE = I - START + 1 + ELSE IF( ABS( E( I ) ).GE.EPS ) THEN +* +* A subproblem with E(NM1) not too small but I = NM1. +* + NSIZE = N - START + 1 + ELSE +* +* A subproblem with E(NM1) small. This implies an +* 1-by-1 subproblem at D(N). Solve this 1-by-1 problem +* first. +* + NSIZE = I - START + 1 + IF( ICOMPQ.EQ.2 ) THEN + U( N, N ) = SIGN( ONE, D( N ) ) + VT( N, N ) = ONE + ELSE IF( ICOMPQ.EQ.1 ) THEN + Q( N+( QSTART-1 )*N ) = SIGN( ONE, D( N ) ) + Q( N+( SMLSIZ+QSTART-1 )*N ) = ONE + END IF + D( N ) = ABS( D( N ) ) + END IF + IF( ICOMPQ.EQ.2 ) THEN + CALL DLASD0( NSIZE, SQRE, D( START ), E( START ), + $ U( START, START ), LDU, VT( START, START ), + $ LDVT, SMLSIZ, IWORK, WORK( WSTART ), INFO ) + ELSE + CALL DLASDA( ICOMPQ, SMLSIZ, NSIZE, SQRE, D( START ), + $ E( START ), Q( START+( IU+QSTART-2 )*N ), N, + $ Q( START+( IVT+QSTART-2 )*N ), + $ IQ( START+K*N ), Q( START+( DIFL+QSTART-2 )* + $ N ), Q( START+( DIFR+QSTART-2 )*N ), + $ Q( START+( Z+QSTART-2 )*N ), + $ Q( START+( POLES+QSTART-2 )*N ), + $ IQ( START+GIVPTR*N ), IQ( START+GIVCOL*N ), + $ N, IQ( START+PERM*N ), + $ Q( START+( GIVNUM+QSTART-2 )*N ), + $ Q( START+( IC+QSTART-2 )*N ), + $ Q( START+( IS+QSTART-2 )*N ), + $ WORK( WSTART ), IWORK, INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + END IF + START = I + 1 + END IF + 30 CONTINUE +* +* Unscale +* + CALL DLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, IERR ) + 40 CONTINUE +* +* Use Selection Sort to minimize swaps of singular vectors +* + DO 60 II = 2, N + I = II - 1 + KK = I + P = D( I ) + DO 50 J = II, N + IF( D( J ).GT.P ) THEN + KK = J + P = D( J ) + END IF + 50 CONTINUE + IF( KK.NE.I ) THEN + D( KK ) = D( I ) + D( I ) = P + IF( ICOMPQ.EQ.1 ) THEN + IQ( I ) = KK + ELSE IF( ICOMPQ.EQ.2 ) THEN + CALL DSWAP( N, U( 1, I ), 1, U( 1, KK ), 1 ) + CALL DSWAP( N, VT( I, 1 ), LDVT, VT( KK, 1 ), LDVT ) + END IF + ELSE IF( ICOMPQ.EQ.1 ) THEN + IQ( I ) = I + END IF + 60 CONTINUE +* +* If ICOMPQ = 1, use IQ(N,1) as the indicator for UPLO +* + IF( ICOMPQ.EQ.1 ) THEN + IF( IUPLO.EQ.1 ) THEN + IQ( N ) = 1 + ELSE + IQ( N ) = 0 + END IF + END IF +* +* If B is lower bidiagonal, update U by those Givens rotations +* which rotated B to be upper bidiagonal +* + IF( ( IUPLO.EQ.2 ) .AND. ( ICOMPQ.EQ.2 ) ) + $ CALL DLASR( 'L', 'V', 'B', N, N, WORK( 1 ), WORK( N ), U, LDU ) +* + RETURN +* +* End of DBDSDC +* + END diff --git a/costa/native/external/lapack/dbdsqr.f b/costa/native/external/lapack/dbdsqr.f new file mode 100644 index 000000000..678a24408 --- /dev/null +++ b/costa/native/external/lapack/dbdsqr.f @@ -0,0 +1,733 @@ + SUBROUTINE DBDSQR( UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U, + $ LDU, C, LDC, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( LDC, * ), D( * ), E( * ), U( LDU, * ), + $ VT( LDVT, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DBDSQR computes the singular value decomposition (SVD) of a real +* N-by-N (upper or lower) bidiagonal matrix B: B = Q * S * P' (P' +* denotes the transpose of P), where S is a diagonal matrix with +* non-negative diagonal elements (the singular values of B), and Q +* and P are orthogonal matrices. +* +* The routine computes S, and optionally computes U * Q, P' * VT, +* or Q' * C, for given real input matrices U, VT, and C. +* +* See "Computing Small Singular Values of Bidiagonal Matrices With +* Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, +* LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11, +* no. 5, pp. 873-912, Sept 1990) and +* "Accurate singular values and differential qd algorithms," by +* B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics +* Department, University of California at Berkeley, July 1992 +* for a detailed description of the algorithm. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': B is upper bidiagonal; +* = 'L': B is lower bidiagonal. +* +* N (input) INTEGER +* The order of the matrix B. N >= 0. +* +* NCVT (input) INTEGER +* The number of columns of the matrix VT. NCVT >= 0. +* +* NRU (input) INTEGER +* The number of rows of the matrix U. NRU >= 0. +* +* NCC (input) INTEGER +* The number of columns of the matrix C. NCC >= 0. +* +* D (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the n diagonal elements of the bidiagonal matrix B. +* On exit, if INFO=0, the singular values of B in decreasing +* order. +* +* E (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the elements of E contain the +* offdiagonal elements of the bidiagonal matrix whose SVD +* is desired. On normal exit (INFO = 0), E is destroyed. +* If the algorithm does not converge (INFO > 0), D and E +* will contain the diagonal and superdiagonal elements of a +* bidiagonal matrix orthogonally equivalent to the one given +* as input. E(N) is used for workspace. +* +* VT (input/output) DOUBLE PRECISION array, dimension (LDVT, NCVT) +* On entry, an N-by-NCVT matrix VT. +* On exit, VT is overwritten by P' * VT. +* VT is not referenced if NCVT = 0. +* +* LDVT (input) INTEGER +* The leading dimension of the array VT. +* LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0. +* +* U (input/output) DOUBLE PRECISION array, dimension (LDU, N) +* On entry, an NRU-by-N matrix U. +* On exit, U is overwritten by U * Q. +* U is not referenced if NRU = 0. +* +* LDU (input) INTEGER +* The leading dimension of the array U. LDU >= max(1,NRU). +* +* C (input/output) DOUBLE PRECISION array, dimension (LDC, NCC) +* On entry, an N-by-NCC matrix C. +* On exit, C is overwritten by Q' * C. +* C is not referenced if NCC = 0. +* +* LDC (input) INTEGER +* The leading dimension of the array C. +* LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: If INFO = -i, the i-th argument had an illegal value +* > 0: the algorithm did not converge; D and E contain the +* elements of a bidiagonal matrix which is orthogonally +* similar to the input matrix B; if INFO = i, i +* elements of E have not converged to zero. +* +* Internal Parameters +* =================== +* +* TOLMUL DOUBLE PRECISION, default = max(10,min(100,EPS**(-1/8))) +* TOLMUL controls the convergence criterion of the QR loop. +* If it is positive, TOLMUL*EPS is the desired relative +* precision in the computed singular values. +* If it is negative, abs(TOLMUL*EPS*sigma_max) is the +* desired absolute accuracy in the computed singular +* values (corresponds to relative accuracy +* abs(TOLMUL*EPS) in the largest singular value. +* abs(TOLMUL) should be between 1 and 1/EPS, and preferably +* between 10 (for fast convergence) and .1/EPS +* (for there to be some accuracy in the results). +* Default is to lose at either one eighth or 2 of the +* available decimal digits in each computed singular value +* (whichever is smaller). +* +* MAXITR INTEGER, default = 6 +* MAXITR controls the maximum number of passes of the +* algorithm through its inner loop. The algorithms stops +* (and so fails to converge) if the number of passes +* through the inner loop exceeds MAXITR*N**2. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D0 ) + DOUBLE PRECISION NEGONE + PARAMETER ( NEGONE = -1.0D0 ) + DOUBLE PRECISION HNDRTH + PARAMETER ( HNDRTH = 0.01D0 ) + DOUBLE PRECISION TEN + PARAMETER ( TEN = 10.0D0 ) + DOUBLE PRECISION HNDRD + PARAMETER ( HNDRD = 100.0D0 ) + DOUBLE PRECISION MEIGTH + PARAMETER ( MEIGTH = -0.125D0 ) + INTEGER MAXITR + PARAMETER ( MAXITR = 6 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, ROTATE + INTEGER I, IDIR, ISUB, ITER, J, LL, LLL, M, MAXIT, NM1, + $ NM12, NM13, OLDLL, OLDM + DOUBLE PRECISION ABSE, ABSS, COSL, COSR, CS, EPS, F, G, H, MU, + $ OLDCS, OLDSN, R, SHIFT, SIGMN, SIGMX, SINL, + $ SINR, SLL, SMAX, SMIN, SMINL, SMINLO, SMINOA, + $ SN, THRESH, TOL, TOLMUL, UNFL +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DLARTG, DLAS2, DLASQ1, DLASR, DLASV2, DROT, + $ DSCAL, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, MAX, MIN, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LOWER ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NCVT.LT.0 ) THEN + INFO = -3 + ELSE IF( NRU.LT.0 ) THEN + INFO = -4 + ELSE IF( NCC.LT.0 ) THEN + INFO = -5 + ELSE IF( ( NCVT.EQ.0 .AND. LDVT.LT.1 ) .OR. + $ ( NCVT.GT.0 .AND. LDVT.LT.MAX( 1, N ) ) ) THEN + INFO = -9 + ELSE IF( LDU.LT.MAX( 1, NRU ) ) THEN + INFO = -11 + ELSE IF( ( NCC.EQ.0 .AND. LDC.LT.1 ) .OR. + $ ( NCC.GT.0 .AND. LDC.LT.MAX( 1, N ) ) ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DBDSQR', -INFO ) + RETURN + END IF + IF( N.EQ.0 ) + $ RETURN + IF( N.EQ.1 ) + $ GO TO 160 +* +* ROTATE is true if any singular vectors desired, false otherwise +* + ROTATE = ( NCVT.GT.0 ) .OR. ( NRU.GT.0 ) .OR. ( NCC.GT.0 ) +* +* If no singular vectors desired, use qd algorithm +* + IF( .NOT.ROTATE ) THEN + CALL DLASQ1( N, D, E, WORK, INFO ) + RETURN + END IF +* + NM1 = N - 1 + NM12 = NM1 + NM1 + NM13 = NM12 + NM1 + IDIR = 0 +* +* Get machine constants +* + EPS = DLAMCH( 'Epsilon' ) + UNFL = DLAMCH( 'Safe minimum' ) +* +* If matrix lower bidiagonal, rotate to be upper bidiagonal +* by applying Givens rotations on the left +* + IF( LOWER ) THEN + DO 10 I = 1, N - 1 + CALL DLARTG( D( I ), E( I ), CS, SN, R ) + D( I ) = R + E( I ) = SN*D( I+1 ) + D( I+1 ) = CS*D( I+1 ) + WORK( I ) = CS + WORK( NM1+I ) = SN + 10 CONTINUE +* +* Update singular vectors if desired +* + IF( NRU.GT.0 ) + $ CALL DLASR( 'R', 'V', 'F', NRU, N, WORK( 1 ), WORK( N ), U, + $ LDU ) + IF( NCC.GT.0 ) + $ CALL DLASR( 'L', 'V', 'F', N, NCC, WORK( 1 ), WORK( N ), C, + $ LDC ) + END IF +* +* Compute singular values to relative accuracy TOL +* (By setting TOL to be negative, algorithm will compute +* singular values to absolute accuracy ABS(TOL)*norm(input matrix)) +* + TOLMUL = MAX( TEN, MIN( HNDRD, EPS**MEIGTH ) ) + TOL = TOLMUL*EPS +* +* Compute approximate maximum, minimum singular values +* + SMAX = ZERO + DO 20 I = 1, N + SMAX = MAX( SMAX, ABS( D( I ) ) ) + 20 CONTINUE + DO 30 I = 1, N - 1 + SMAX = MAX( SMAX, ABS( E( I ) ) ) + 30 CONTINUE + SMINL = ZERO + IF( TOL.GE.ZERO ) THEN +* +* Relative accuracy desired +* + SMINOA = ABS( D( 1 ) ) + IF( SMINOA.EQ.ZERO ) + $ GO TO 50 + MU = SMINOA + DO 40 I = 2, N + MU = ABS( D( I ) )*( MU / ( MU+ABS( E( I-1 ) ) ) ) + SMINOA = MIN( SMINOA, MU ) + IF( SMINOA.EQ.ZERO ) + $ GO TO 50 + 40 CONTINUE + 50 CONTINUE + SMINOA = SMINOA / SQRT( DBLE( N ) ) + THRESH = MAX( TOL*SMINOA, MAXITR*N*N*UNFL ) + ELSE +* +* Absolute accuracy desired +* + THRESH = MAX( ABS( TOL )*SMAX, MAXITR*N*N*UNFL ) + END IF +* +* Prepare for main iteration loop for the singular values +* (MAXIT is the maximum number of passes through the inner +* loop permitted before nonconvergence signalled.) +* + MAXIT = MAXITR*N*N + ITER = 0 + OLDLL = -1 + OLDM = -1 +* +* M points to last element of unconverged part of matrix +* + M = N +* +* Begin main iteration loop +* + 60 CONTINUE +* +* Check for convergence or exceeding iteration count +* + IF( M.LE.1 ) + $ GO TO 160 + IF( ITER.GT.MAXIT ) + $ GO TO 200 +* +* Find diagonal block of matrix to work on +* + IF( TOL.LT.ZERO .AND. ABS( D( M ) ).LE.THRESH ) + $ D( M ) = ZERO + SMAX = ABS( D( M ) ) + SMIN = SMAX + DO 70 LLL = 1, M - 1 + LL = M - LLL + ABSS = ABS( D( LL ) ) + ABSE = ABS( E( LL ) ) + IF( TOL.LT.ZERO .AND. ABSS.LE.THRESH ) + $ D( LL ) = ZERO + IF( ABSE.LE.THRESH ) + $ GO TO 80 + SMIN = MIN( SMIN, ABSS ) + SMAX = MAX( SMAX, ABSS, ABSE ) + 70 CONTINUE + LL = 0 + GO TO 90 + 80 CONTINUE + E( LL ) = ZERO +* +* Matrix splits since E(LL) = 0 +* + IF( LL.EQ.M-1 ) THEN +* +* Convergence of bottom singular value, return to top of loop +* + M = M - 1 + GO TO 60 + END IF + 90 CONTINUE + LL = LL + 1 +* +* E(LL) through E(M-1) are nonzero, E(LL-1) is zero +* + IF( LL.EQ.M-1 ) THEN +* +* 2 by 2 block, handle separately +* + CALL DLASV2( D( M-1 ), E( M-1 ), D( M ), SIGMN, SIGMX, SINR, + $ COSR, SINL, COSL ) + D( M-1 ) = SIGMX + E( M-1 ) = ZERO + D( M ) = SIGMN +* +* Compute singular vectors, if desired +* + IF( NCVT.GT.0 ) + $ CALL DROT( NCVT, VT( M-1, 1 ), LDVT, VT( M, 1 ), LDVT, COSR, + $ SINR ) + IF( NRU.GT.0 ) + $ CALL DROT( NRU, U( 1, M-1 ), 1, U( 1, M ), 1, COSL, SINL ) + IF( NCC.GT.0 ) + $ CALL DROT( NCC, C( M-1, 1 ), LDC, C( M, 1 ), LDC, COSL, + $ SINL ) + M = M - 2 + GO TO 60 + END IF +* +* If working on new submatrix, choose shift direction +* (from larger end diagonal element towards smaller) +* + IF( LL.GT.OLDM .OR. M.LT.OLDLL ) THEN + IF( ABS( D( LL ) ).GE.ABS( D( M ) ) ) THEN +* +* Chase bulge from top (big end) to bottom (small end) +* + IDIR = 1 + ELSE +* +* Chase bulge from bottom (big end) to top (small end) +* + IDIR = 2 + END IF + END IF +* +* Apply convergence tests +* + IF( IDIR.EQ.1 ) THEN +* +* Run convergence test in forward direction +* First apply standard test to bottom of matrix +* + IF( ABS( E( M-1 ) ).LE.ABS( TOL )*ABS( D( M ) ) .OR. + $ ( TOL.LT.ZERO .AND. ABS( E( M-1 ) ).LE.THRESH ) ) THEN + E( M-1 ) = ZERO + GO TO 60 + END IF +* + IF( TOL.GE.ZERO ) THEN +* +* If relative accuracy desired, +* apply convergence criterion forward +* + MU = ABS( D( LL ) ) + SMINL = MU + DO 100 LLL = LL, M - 1 + IF( ABS( E( LLL ) ).LE.TOL*MU ) THEN + E( LLL ) = ZERO + GO TO 60 + END IF + SMINLO = SMINL + MU = ABS( D( LLL+1 ) )*( MU / ( MU+ABS( E( LLL ) ) ) ) + SMINL = MIN( SMINL, MU ) + 100 CONTINUE + END IF +* + ELSE +* +* Run convergence test in backward direction +* First apply standard test to top of matrix +* + IF( ABS( E( LL ) ).LE.ABS( TOL )*ABS( D( LL ) ) .OR. + $ ( TOL.LT.ZERO .AND. ABS( E( LL ) ).LE.THRESH ) ) THEN + E( LL ) = ZERO + GO TO 60 + END IF +* + IF( TOL.GE.ZERO ) THEN +* +* If relative accuracy desired, +* apply convergence criterion backward +* + MU = ABS( D( M ) ) + SMINL = MU + DO 110 LLL = M - 1, LL, -1 + IF( ABS( E( LLL ) ).LE.TOL*MU ) THEN + E( LLL ) = ZERO + GO TO 60 + END IF + SMINLO = SMINL + MU = ABS( D( LLL ) )*( MU / ( MU+ABS( E( LLL ) ) ) ) + SMINL = MIN( SMINL, MU ) + 110 CONTINUE + END IF + END IF + OLDLL = LL + OLDM = M +* +* Compute shift. First, test if shifting would ruin relative +* accuracy, and if so set the shift to zero. +* + IF( TOL.GE.ZERO .AND. N*TOL*( SMINL / SMAX ).LE. + $ MAX( EPS, HNDRTH*TOL ) ) THEN +* +* Use a zero shift to avoid loss of relative accuracy +* + SHIFT = ZERO + ELSE +* +* Compute the shift from 2-by-2 block at end of matrix +* + IF( IDIR.EQ.1 ) THEN + SLL = ABS( D( LL ) ) + CALL DLAS2( D( M-1 ), E( M-1 ), D( M ), SHIFT, R ) + ELSE + SLL = ABS( D( M ) ) + CALL DLAS2( D( LL ), E( LL ), D( LL+1 ), SHIFT, R ) + END IF +* +* Test if shift negligible, and if so set to zero +* + IF( SLL.GT.ZERO ) THEN + IF( ( SHIFT / SLL )**2.LT.EPS ) + $ SHIFT = ZERO + END IF + END IF +* +* Increment iteration count +* + ITER = ITER + M - LL +* +* If SHIFT = 0, do simplified QR iteration +* + IF( SHIFT.EQ.ZERO ) THEN + IF( IDIR.EQ.1 ) THEN +* +* Chase bulge from top to bottom +* Save cosines and sines for later singular vector updates +* + CS = ONE + OLDCS = ONE + DO 120 I = LL, M - 1 + CALL DLARTG( D( I )*CS, E( I ), CS, SN, R ) + IF( I.GT.LL ) + $ E( I-1 ) = OLDSN*R + CALL DLARTG( OLDCS*R, D( I+1 )*SN, OLDCS, OLDSN, D( I ) ) + WORK( I-LL+1 ) = CS + WORK( I-LL+1+NM1 ) = SN + WORK( I-LL+1+NM12 ) = OLDCS + WORK( I-LL+1+NM13 ) = OLDSN + 120 CONTINUE + H = D( M )*CS + D( M ) = H*OLDCS + E( M-1 ) = H*OLDSN +* +* Update singular vectors +* + IF( NCVT.GT.0 ) + $ CALL DLASR( 'L', 'V', 'F', M-LL+1, NCVT, WORK( 1 ), + $ WORK( N ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL DLASR( 'R', 'V', 'F', NRU, M-LL+1, WORK( NM12+1 ), + $ WORK( NM13+1 ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL DLASR( 'L', 'V', 'F', M-LL+1, NCC, WORK( NM12+1 ), + $ WORK( NM13+1 ), C( LL, 1 ), LDC ) +* +* Test convergence +* + IF( ABS( E( M-1 ) ).LE.THRESH ) + $ E( M-1 ) = ZERO +* + ELSE +* +* Chase bulge from bottom to top +* Save cosines and sines for later singular vector updates +* + CS = ONE + OLDCS = ONE + DO 130 I = M, LL + 1, -1 + CALL DLARTG( D( I )*CS, E( I-1 ), CS, SN, R ) + IF( I.LT.M ) + $ E( I ) = OLDSN*R + CALL DLARTG( OLDCS*R, D( I-1 )*SN, OLDCS, OLDSN, D( I ) ) + WORK( I-LL ) = CS + WORK( I-LL+NM1 ) = -SN + WORK( I-LL+NM12 ) = OLDCS + WORK( I-LL+NM13 ) = -OLDSN + 130 CONTINUE + H = D( LL )*CS + D( LL ) = H*OLDCS + E( LL ) = H*OLDSN +* +* Update singular vectors +* + IF( NCVT.GT.0 ) + $ CALL DLASR( 'L', 'V', 'B', M-LL+1, NCVT, WORK( NM12+1 ), + $ WORK( NM13+1 ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL DLASR( 'R', 'V', 'B', NRU, M-LL+1, WORK( 1 ), + $ WORK( N ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL DLASR( 'L', 'V', 'B', M-LL+1, NCC, WORK( 1 ), + $ WORK( N ), C( LL, 1 ), LDC ) +* +* Test convergence +* + IF( ABS( E( LL ) ).LE.THRESH ) + $ E( LL ) = ZERO + END IF + ELSE +* +* Use nonzero shift +* + IF( IDIR.EQ.1 ) THEN +* +* Chase bulge from top to bottom +* Save cosines and sines for later singular vector updates +* + F = ( ABS( D( LL ) )-SHIFT )* + $ ( SIGN( ONE, D( LL ) )+SHIFT / D( LL ) ) + G = E( LL ) + DO 140 I = LL, M - 1 + CALL DLARTG( F, G, COSR, SINR, R ) + IF( I.GT.LL ) + $ E( I-1 ) = R + F = COSR*D( I ) + SINR*E( I ) + E( I ) = COSR*E( I ) - SINR*D( I ) + G = SINR*D( I+1 ) + D( I+1 ) = COSR*D( I+1 ) + CALL DLARTG( F, G, COSL, SINL, R ) + D( I ) = R + F = COSL*E( I ) + SINL*D( I+1 ) + D( I+1 ) = COSL*D( I+1 ) - SINL*E( I ) + IF( I.LT.M-1 ) THEN + G = SINL*E( I+1 ) + E( I+1 ) = COSL*E( I+1 ) + END IF + WORK( I-LL+1 ) = COSR + WORK( I-LL+1+NM1 ) = SINR + WORK( I-LL+1+NM12 ) = COSL + WORK( I-LL+1+NM13 ) = SINL + 140 CONTINUE + E( M-1 ) = F +* +* Update singular vectors +* + IF( NCVT.GT.0 ) + $ CALL DLASR( 'L', 'V', 'F', M-LL+1, NCVT, WORK( 1 ), + $ WORK( N ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL DLASR( 'R', 'V', 'F', NRU, M-LL+1, WORK( NM12+1 ), + $ WORK( NM13+1 ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL DLASR( 'L', 'V', 'F', M-LL+1, NCC, WORK( NM12+1 ), + $ WORK( NM13+1 ), C( LL, 1 ), LDC ) +* +* Test convergence +* + IF( ABS( E( M-1 ) ).LE.THRESH ) + $ E( M-1 ) = ZERO +* + ELSE +* +* Chase bulge from bottom to top +* Save cosines and sines for later singular vector updates +* + F = ( ABS( D( M ) )-SHIFT )*( SIGN( ONE, D( M ) )+SHIFT / + $ D( M ) ) + G = E( M-1 ) + DO 150 I = M, LL + 1, -1 + CALL DLARTG( F, G, COSR, SINR, R ) + IF( I.LT.M ) + $ E( I ) = R + F = COSR*D( I ) + SINR*E( I-1 ) + E( I-1 ) = COSR*E( I-1 ) - SINR*D( I ) + G = SINR*D( I-1 ) + D( I-1 ) = COSR*D( I-1 ) + CALL DLARTG( F, G, COSL, SINL, R ) + D( I ) = R + F = COSL*E( I-1 ) + SINL*D( I-1 ) + D( I-1 ) = COSL*D( I-1 ) - SINL*E( I-1 ) + IF( I.GT.LL+1 ) THEN + G = SINL*E( I-2 ) + E( I-2 ) = COSL*E( I-2 ) + END IF + WORK( I-LL ) = COSR + WORK( I-LL+NM1 ) = -SINR + WORK( I-LL+NM12 ) = COSL + WORK( I-LL+NM13 ) = -SINL + 150 CONTINUE + E( LL ) = F +* +* Test convergence +* + IF( ABS( E( LL ) ).LE.THRESH ) + $ E( LL ) = ZERO +* +* Update singular vectors if desired +* + IF( NCVT.GT.0 ) + $ CALL DLASR( 'L', 'V', 'B', M-LL+1, NCVT, WORK( NM12+1 ), + $ WORK( NM13+1 ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL DLASR( 'R', 'V', 'B', NRU, M-LL+1, WORK( 1 ), + $ WORK( N ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL DLASR( 'L', 'V', 'B', M-LL+1, NCC, WORK( 1 ), + $ WORK( N ), C( LL, 1 ), LDC ) + END IF + END IF +* +* QR iteration finished, go back and check convergence +* + GO TO 60 +* +* All singular values converged, so make them positive +* + 160 CONTINUE + DO 170 I = 1, N + IF( D( I ).LT.ZERO ) THEN + D( I ) = -D( I ) +* +* Change sign of singular vectors, if desired +* + IF( NCVT.GT.0 ) + $ CALL DSCAL( NCVT, NEGONE, VT( I, 1 ), LDVT ) + END IF + 170 CONTINUE +* +* Sort the singular values into decreasing order (insertion sort on +* singular values, but only one transposition per singular vector) +* + DO 190 I = 1, N - 1 +* +* Scan for smallest D(I) +* + ISUB = 1 + SMIN = D( 1 ) + DO 180 J = 2, N + 1 - I + IF( D( J ).LE.SMIN ) THEN + ISUB = J + SMIN = D( J ) + END IF + 180 CONTINUE + IF( ISUB.NE.N+1-I ) THEN +* +* Swap singular values and vectors +* + D( ISUB ) = D( N+1-I ) + D( N+1-I ) = SMIN + IF( NCVT.GT.0 ) + $ CALL DSWAP( NCVT, VT( ISUB, 1 ), LDVT, VT( N+1-I, 1 ), + $ LDVT ) + IF( NRU.GT.0 ) + $ CALL DSWAP( NRU, U( 1, ISUB ), 1, U( 1, N+1-I ), 1 ) + IF( NCC.GT.0 ) + $ CALL DSWAP( NCC, C( ISUB, 1 ), LDC, C( N+1-I, 1 ), LDC ) + END IF + 190 CONTINUE + GO TO 220 +* +* Maximum number of iterations exceeded, failure to converge +* + 200 CONTINUE + INFO = 0 + DO 210 I = 1, N - 1 + IF( E( I ).NE.ZERO ) + $ INFO = INFO + 1 + 210 CONTINUE + 220 CONTINUE + RETURN +* +* End of DBDSQR +* + END diff --git a/costa/native/external/lapack/ddisna.f b/costa/native/external/lapack/ddisna.f new file mode 100644 index 000000000..b17771154 --- /dev/null +++ b/costa/native/external/lapack/ddisna.f @@ -0,0 +1,180 @@ + SUBROUTINE DDISNA( JOB, M, N, D, SEP, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER JOB + INTEGER INFO, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), SEP( * ) +* .. +* +* Purpose +* ======= +* +* DDISNA computes the reciprocal condition numbers for the eigenvectors +* of a real symmetric or complex Hermitian matrix or for the left or +* right singular vectors of a general m-by-n matrix. The reciprocal +* condition number is the 'gap' between the corresponding eigenvalue or +* singular value and the nearest other one. +* +* The bound on the error, measured by angle in radians, in the I-th +* computed vector is given by +* +* DLAMCH( 'E' ) * ( ANORM / SEP( I ) ) +* +* where ANORM = 2-norm(A) = max( abs( D(j) ) ). SEP(I) is not allowed +* to be smaller than DLAMCH( 'E' )*ANORM in order to limit the size of +* the error bound. +* +* DDISNA may also be used to compute error bounds for eigenvectors of +* the generalized symmetric definite eigenproblem. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* Specifies for which problem the reciprocal condition numbers +* should be computed: +* = 'E': the eigenvectors of a symmetric/Hermitian matrix; +* = 'L': the left singular vectors of a general matrix; +* = 'R': the right singular vectors of a general matrix. +* +* M (input) INTEGER +* The number of rows of the matrix. M >= 0. +* +* N (input) INTEGER +* If JOB = 'L' or 'R', the number of columns of the matrix, +* in which case N >= 0. Ignored if JOB = 'E'. +* +* D (input) DOUBLE PRECISION array, dimension (M) if JOB = 'E' +* dimension (min(M,N)) if JOB = 'L' or 'R' +* The eigenvalues (if JOB = 'E') or singular values (if JOB = +* 'L' or 'R') of the matrix, in either increasing or decreasing +* order. If singular values, they must be non-negative. +* +* SEP (output) DOUBLE PRECISION array, dimension (M) if JOB = 'E' +* dimension (min(M,N)) if JOB = 'L' or 'R' +* The reciprocal condition numbers of the vectors. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL DECR, EIGEN, INCR, LEFT, RIGHT, SING + INTEGER I, K + DOUBLE PRECISION ANORM, EPS, NEWGAP, OLDGAP, SAFMIN, THRESH +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + EIGEN = LSAME( JOB, 'E' ) + LEFT = LSAME( JOB, 'L' ) + RIGHT = LSAME( JOB, 'R' ) + SING = LEFT .OR. RIGHT + IF( EIGEN ) THEN + K = M + ELSE IF( SING ) THEN + K = MIN( M, N ) + END IF + IF( .NOT.EIGEN .AND. .NOT.SING ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( K.LT.0 ) THEN + INFO = -3 + ELSE + INCR = .TRUE. + DECR = .TRUE. + DO 10 I = 1, K - 1 + IF( INCR ) + $ INCR = INCR .AND. D( I ).LE.D( I+1 ) + IF( DECR ) + $ DECR = DECR .AND. D( I ).GE.D( I+1 ) + 10 CONTINUE + IF( SING .AND. K.GT.0 ) THEN + IF( INCR ) + $ INCR = INCR .AND. ZERO.LE.D( 1 ) + IF( DECR ) + $ DECR = DECR .AND. D( K ).GE.ZERO + END IF + IF( .NOT.( INCR .OR. DECR ) ) + $ INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DDISNA', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( K.EQ.0 ) + $ RETURN +* +* Compute reciprocal condition numbers +* + IF( K.EQ.1 ) THEN + SEP( 1 ) = DLAMCH( 'O' ) + ELSE + OLDGAP = ABS( D( 2 )-D( 1 ) ) + SEP( 1 ) = OLDGAP + DO 20 I = 2, K - 1 + NEWGAP = ABS( D( I+1 )-D( I ) ) + SEP( I ) = MIN( OLDGAP, NEWGAP ) + OLDGAP = NEWGAP + 20 CONTINUE + SEP( K ) = OLDGAP + END IF + IF( SING ) THEN + IF( ( LEFT .AND. M.GT.N ) .OR. ( RIGHT .AND. M.LT.N ) ) THEN + IF( INCR ) + $ SEP( 1 ) = MIN( SEP( 1 ), D( 1 ) ) + IF( DECR ) + $ SEP( K ) = MIN( SEP( K ), D( K ) ) + END IF + END IF +* +* Ensure that reciprocal condition numbers are not less than +* threshold, in order to limit the size of the error bound +* + EPS = DLAMCH( 'E' ) + SAFMIN = DLAMCH( 'S' ) + ANORM = MAX( ABS( D( 1 ) ), ABS( D( K ) ) ) + IF( ANORM.EQ.ZERO ) THEN + THRESH = EPS + ELSE + THRESH = MAX( EPS*ANORM, SAFMIN ) + END IF + DO 30 I = 1, K + SEP( I ) = MAX( SEP( I ), THRESH ) + 30 CONTINUE +* + RETURN +* +* End of DDISNA +* + END diff --git a/costa/native/external/lapack/dgbbrd.f b/costa/native/external/lapack/dgbbrd.f new file mode 100644 index 000000000..a04475259 --- /dev/null +++ b/costa/native/external/lapack/dgbbrd.f @@ -0,0 +1,444 @@ + SUBROUTINE DGBBRD( VECT, M, N, NCC, KL, KU, AB, LDAB, D, E, Q, + $ LDQ, PT, LDPT, C, LDC, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER VECT + INTEGER INFO, KL, KU, LDAB, LDC, LDPT, LDQ, M, N, NCC +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ), C( LDC, * ), D( * ), E( * ), + $ PT( LDPT, * ), Q( LDQ, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DGBBRD reduces a real general m-by-n band matrix A to upper +* bidiagonal form B by an orthogonal transformation: Q' * A * P = B. +* +* The routine computes B, and optionally forms Q or P', or computes +* Q'*C for a given matrix C. +* +* Arguments +* ========= +* +* VECT (input) CHARACTER*1 +* Specifies whether or not the matrices Q and P' are to be +* formed. +* = 'N': do not form Q or P'; +* = 'Q': form Q only; +* = 'P': form P' only; +* = 'B': form both. +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* NCC (input) INTEGER +* The number of columns of the matrix C. NCC >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals of the matrix A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals of the matrix A. KU >= 0. +* +* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) +* On entry, the m-by-n band matrix A, stored in rows 1 to +* KL+KU+1. The j-th column of A is stored in the j-th column of +* the array AB as follows: +* AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl). +* On exit, A is overwritten by values generated during the +* reduction. +* +* LDAB (input) INTEGER +* The leading dimension of the array A. LDAB >= KL+KU+1. +* +* D (output) DOUBLE PRECISION array, dimension (min(M,N)) +* The diagonal elements of the bidiagonal matrix B. +* +* E (output) DOUBLE PRECISION array, dimension (min(M,N)-1) +* The superdiagonal elements of the bidiagonal matrix B. +* +* Q (output) DOUBLE PRECISION array, dimension (LDQ,M) +* If VECT = 'Q' or 'B', the m-by-m orthogonal matrix Q. +* If VECT = 'N' or 'P', the array Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. +* LDQ >= max(1,M) if VECT = 'Q' or 'B'; LDQ >= 1 otherwise. +* +* PT (output) DOUBLE PRECISION array, dimension (LDPT,N) +* If VECT = 'P' or 'B', the n-by-n orthogonal matrix P'. +* If VECT = 'N' or 'Q', the array PT is not referenced. +* +* LDPT (input) INTEGER +* The leading dimension of the array PT. +* LDPT >= max(1,N) if VECT = 'P' or 'B'; LDPT >= 1 otherwise. +* +* C (input/output) DOUBLE PRECISION array, dimension (LDC,NCC) +* On entry, an m-by-ncc matrix C. +* On exit, C is overwritten by Q'*C. +* C is not referenced if NCC = 0. +* +* LDC (input) INTEGER +* The leading dimension of the array C. +* LDC >= max(1,M) if NCC > 0; LDC >= 1 if NCC = 0. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (2*max(M,N)) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL WANTB, WANTC, WANTPT, WANTQ + INTEGER I, INCA, J, J1, J2, KB, KB1, KK, KLM, KLU1, + $ KUN, L, MINMN, ML, ML0, MN, MU, MU0, NR, NRT + DOUBLE PRECISION RA, RB, RC, RS +* .. +* .. External Subroutines .. + EXTERNAL DLARGV, DLARTG, DLARTV, DLASET, DROT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + WANTB = LSAME( VECT, 'B' ) + WANTQ = LSAME( VECT, 'Q' ) .OR. WANTB + WANTPT = LSAME( VECT, 'P' ) .OR. WANTB + WANTC = NCC.GT.0 + KLU1 = KL + KU + 1 + INFO = 0 + IF( .NOT.WANTQ .AND. .NOT.WANTPT .AND. .NOT.LSAME( VECT, 'N' ) ) + $ THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NCC.LT.0 ) THEN + INFO = -4 + ELSE IF( KL.LT.0 ) THEN + INFO = -5 + ELSE IF( KU.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KLU1 ) THEN + INFO = -8 + ELSE IF( LDQ.LT.1 .OR. WANTQ .AND. LDQ.LT.MAX( 1, M ) ) THEN + INFO = -12 + ELSE IF( LDPT.LT.1 .OR. WANTPT .AND. LDPT.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDC.LT.1 .OR. WANTC .AND. LDC.LT.MAX( 1, M ) ) THEN + INFO = -16 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGBBRD', -INFO ) + RETURN + END IF +* +* Initialize Q and P' to the unit matrix, if needed +* + IF( WANTQ ) + $ CALL DLASET( 'Full', M, M, ZERO, ONE, Q, LDQ ) + IF( WANTPT ) + $ CALL DLASET( 'Full', N, N, ZERO, ONE, PT, LDPT ) +* +* Quick return if possible. +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* + MINMN = MIN( M, N ) +* + IF( KL+KU.GT.1 ) THEN +* +* Reduce to upper bidiagonal form if KU > 0; if KU = 0, reduce +* first to lower bidiagonal form and then transform to upper +* bidiagonal +* + IF( KU.GT.0 ) THEN + ML0 = 1 + MU0 = 2 + ELSE + ML0 = 2 + MU0 = 1 + END IF +* +* Wherever possible, plane rotations are generated and applied in +* vector operations of length NR over the index set J1:J2:KLU1. +* +* The sines of the plane rotations are stored in WORK(1:max(m,n)) +* and the cosines in WORK(max(m,n)+1:2*max(m,n)). +* + MN = MAX( M, N ) + KLM = MIN( M-1, KL ) + KUN = MIN( N-1, KU ) + KB = KLM + KUN + KB1 = KB + 1 + INCA = KB1*LDAB + NR = 0 + J1 = KLM + 2 + J2 = 1 - KUN +* + DO 90 I = 1, MINMN +* +* Reduce i-th column and i-th row of matrix to bidiagonal form +* + ML = KLM + 1 + MU = KUN + 1 + DO 80 KK = 1, KB + J1 = J1 + KB + J2 = J2 + KB +* +* generate plane rotations to annihilate nonzero elements +* which have been created below the band +* + IF( NR.GT.0 ) + $ CALL DLARGV( NR, AB( KLU1, J1-KLM-1 ), INCA, + $ WORK( J1 ), KB1, WORK( MN+J1 ), KB1 ) +* +* apply plane rotations from the left +* + DO 10 L = 1, KB + IF( J2-KLM+L-1.GT.N ) THEN + NRT = NR - 1 + ELSE + NRT = NR + END IF + IF( NRT.GT.0 ) + $ CALL DLARTV( NRT, AB( KLU1-L, J1-KLM+L-1 ), INCA, + $ AB( KLU1-L+1, J1-KLM+L-1 ), INCA, + $ WORK( MN+J1 ), WORK( J1 ), KB1 ) + 10 CONTINUE +* + IF( ML.GT.ML0 ) THEN + IF( ML.LE.M-I+1 ) THEN +* +* generate plane rotation to annihilate a(i+ml-1,i) +* within the band, and apply rotation from the left +* + CALL DLARTG( AB( KU+ML-1, I ), AB( KU+ML, I ), + $ WORK( MN+I+ML-1 ), WORK( I+ML-1 ), + $ RA ) + AB( KU+ML-1, I ) = RA + IF( I.LT.N ) + $ CALL DROT( MIN( KU+ML-2, N-I ), + $ AB( KU+ML-2, I+1 ), LDAB-1, + $ AB( KU+ML-1, I+1 ), LDAB-1, + $ WORK( MN+I+ML-1 ), WORK( I+ML-1 ) ) + END IF + NR = NR + 1 + J1 = J1 - KB1 + END IF +* + IF( WANTQ ) THEN +* +* accumulate product of plane rotations in Q +* + DO 20 J = J1, J2, KB1 + CALL DROT( M, Q( 1, J-1 ), 1, Q( 1, J ), 1, + $ WORK( MN+J ), WORK( J ) ) + 20 CONTINUE + END IF +* + IF( WANTC ) THEN +* +* apply plane rotations to C +* + DO 30 J = J1, J2, KB1 + CALL DROT( NCC, C( J-1, 1 ), LDC, C( J, 1 ), LDC, + $ WORK( MN+J ), WORK( J ) ) + 30 CONTINUE + END IF +* + IF( J2+KUN.GT.N ) THEN +* +* adjust J2 to keep within the bounds of the matrix +* + NR = NR - 1 + J2 = J2 - KB1 + END IF +* + DO 40 J = J1, J2, KB1 +* +* create nonzero element a(j-1,j+ku) above the band +* and store it in WORK(n+1:2*n) +* + WORK( J+KUN ) = WORK( J )*AB( 1, J+KUN ) + AB( 1, J+KUN ) = WORK( MN+J )*AB( 1, J+KUN ) + 40 CONTINUE +* +* generate plane rotations to annihilate nonzero elements +* which have been generated above the band +* + IF( NR.GT.0 ) + $ CALL DLARGV( NR, AB( 1, J1+KUN-1 ), INCA, + $ WORK( J1+KUN ), KB1, WORK( MN+J1+KUN ), + $ KB1 ) +* +* apply plane rotations from the right +* + DO 50 L = 1, KB + IF( J2+L-1.GT.M ) THEN + NRT = NR - 1 + ELSE + NRT = NR + END IF + IF( NRT.GT.0 ) + $ CALL DLARTV( NRT, AB( L+1, J1+KUN-1 ), INCA, + $ AB( L, J1+KUN ), INCA, + $ WORK( MN+J1+KUN ), WORK( J1+KUN ), + $ KB1 ) + 50 CONTINUE +* + IF( ML.EQ.ML0 .AND. MU.GT.MU0 ) THEN + IF( MU.LE.N-I+1 ) THEN +* +* generate plane rotation to annihilate a(i,i+mu-1) +* within the band, and apply rotation from the right +* + CALL DLARTG( AB( KU-MU+3, I+MU-2 ), + $ AB( KU-MU+2, I+MU-1 ), + $ WORK( MN+I+MU-1 ), WORK( I+MU-1 ), + $ RA ) + AB( KU-MU+3, I+MU-2 ) = RA + CALL DROT( MIN( KL+MU-2, M-I ), + $ AB( KU-MU+4, I+MU-2 ), 1, + $ AB( KU-MU+3, I+MU-1 ), 1, + $ WORK( MN+I+MU-1 ), WORK( I+MU-1 ) ) + END IF + NR = NR + 1 + J1 = J1 - KB1 + END IF +* + IF( WANTPT ) THEN +* +* accumulate product of plane rotations in P' +* + DO 60 J = J1, J2, KB1 + CALL DROT( N, PT( J+KUN-1, 1 ), LDPT, + $ PT( J+KUN, 1 ), LDPT, WORK( MN+J+KUN ), + $ WORK( J+KUN ) ) + 60 CONTINUE + END IF +* + IF( J2+KB.GT.M ) THEN +* +* adjust J2 to keep within the bounds of the matrix +* + NR = NR - 1 + J2 = J2 - KB1 + END IF +* + DO 70 J = J1, J2, KB1 +* +* create nonzero element a(j+kl+ku,j+ku-1) below the +* band and store it in WORK(1:n) +* + WORK( J+KB ) = WORK( J+KUN )*AB( KLU1, J+KUN ) + AB( KLU1, J+KUN ) = WORK( MN+J+KUN )*AB( KLU1, J+KUN ) + 70 CONTINUE +* + IF( ML.GT.ML0 ) THEN + ML = ML - 1 + ELSE + MU = MU - 1 + END IF + 80 CONTINUE + 90 CONTINUE + END IF +* + IF( KU.EQ.0 .AND. KL.GT.0 ) THEN +* +* A has been reduced to lower bidiagonal form +* +* Transform lower bidiagonal form to upper bidiagonal by applying +* plane rotations from the left, storing diagonal elements in D +* and off-diagonal elements in E +* + DO 100 I = 1, MIN( M-1, N ) + CALL DLARTG( AB( 1, I ), AB( 2, I ), RC, RS, RA ) + D( I ) = RA + IF( I.LT.N ) THEN + E( I ) = RS*AB( 1, I+1 ) + AB( 1, I+1 ) = RC*AB( 1, I+1 ) + END IF + IF( WANTQ ) + $ CALL DROT( M, Q( 1, I ), 1, Q( 1, I+1 ), 1, RC, RS ) + IF( WANTC ) + $ CALL DROT( NCC, C( I, 1 ), LDC, C( I+1, 1 ), LDC, RC, + $ RS ) + 100 CONTINUE + IF( M.LE.N ) + $ D( M ) = AB( 1, M ) + ELSE IF( KU.GT.0 ) THEN +* +* A has been reduced to upper bidiagonal form +* + IF( M.LT.N ) THEN +* +* Annihilate a(m,m+1) by applying plane rotations from the +* right, storing diagonal elements in D and off-diagonal +* elements in E +* + RB = AB( KU, M+1 ) + DO 110 I = M, 1, -1 + CALL DLARTG( AB( KU+1, I ), RB, RC, RS, RA ) + D( I ) = RA + IF( I.GT.1 ) THEN + RB = -RS*AB( KU, I ) + E( I-1 ) = RC*AB( KU, I ) + END IF + IF( WANTPT ) + $ CALL DROT( N, PT( I, 1 ), LDPT, PT( M+1, 1 ), LDPT, + $ RC, RS ) + 110 CONTINUE + ELSE +* +* Copy off-diagonal elements to E and diagonal elements to D +* + DO 120 I = 1, MINMN - 1 + E( I ) = AB( KU, I+1 ) + 120 CONTINUE + DO 130 I = 1, MINMN + D( I ) = AB( KU+1, I ) + 130 CONTINUE + END IF + ELSE +* +* A is diagonal. Set elements of E to zero and copy diagonal +* elements to D. +* + DO 140 I = 1, MINMN - 1 + E( I ) = ZERO + 140 CONTINUE + DO 150 I = 1, MINMN + D( I ) = AB( 1, I ) + 150 CONTINUE + END IF + RETURN +* +* End of DGBBRD +* + END diff --git a/costa/native/external/lapack/dgbcon.f b/costa/native/external/lapack/dgbcon.f new file mode 100644 index 000000000..ba613b735 --- /dev/null +++ b/costa/native/external/lapack/dgbcon.f @@ -0,0 +1,222 @@ + SUBROUTINE DGBCON( NORM, N, KL, KU, AB, LDAB, IPIV, ANORM, RCOND, + $ WORK, IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER INFO, KL, KU, LDAB, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + DOUBLE PRECISION AB( LDAB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DGBCON estimates the reciprocal of the condition number of a real +* general band matrix A, in either the 1-norm or the infinity-norm, +* using the LU factorization computed by DGBTRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as +* RCOND = 1 / ( norm(A) * norm(inv(A)) ). +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies whether the 1-norm condition number or the +* infinity-norm condition number is required: +* = '1' or 'O': 1-norm; +* = 'I': Infinity-norm. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) +* Details of the LU factorization of the band matrix A, as +* computed by DGBTRF. U is stored as an upper triangular band +* matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and +* the multipliers used during the factorization are stored in +* rows KL+KU+2 to 2*KL+KU+1. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices; for 1 <= i <= N, row i of the matrix was +* interchanged with row IPIV(i). +* +* ANORM (input) DOUBLE PRECISION +* If NORM = '1' or 'O', the 1-norm of the original matrix A. +* If NORM = 'I', the infinity-norm of the original matrix A. +* +* RCOND (output) DOUBLE PRECISION +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(norm(A) * norm(inv(A))). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LNOTI, ONENRM + CHARACTER NORMIN + INTEGER IX, J, JP, KASE, KASE1, KD, LM + DOUBLE PRECISION AINVNM, SCALE, SMLNUM, T +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + DOUBLE PRECISION DDOT, DLAMCH + EXTERNAL LSAME, IDAMAX, DDOT, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DLACON, DLATBS, DRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.2*KL+KU+1 ) THEN + INFO = -6 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGBCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = DLAMCH( 'Safe minimum' ) +* +* Estimate the norm of inv(A). +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KD = KL + KU + 1 + LNOTI = KL.GT.0 + KASE = 0 + 10 CONTINUE + CALL DLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(L). +* + IF( LNOTI ) THEN + DO 20 J = 1, N - 1 + LM = MIN( KL, N-J ) + JP = IPIV( J ) + T = WORK( JP ) + IF( JP.NE.J ) THEN + WORK( JP ) = WORK( J ) + WORK( J ) = T + END IF + CALL DAXPY( LM, -T, AB( KD+1, J ), 1, WORK( J+1 ), 1 ) + 20 CONTINUE + END IF +* +* Multiply by inv(U). +* + CALL DLATBS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ KL+KU, AB, LDAB, WORK, SCALE, WORK( 2*N+1 ), + $ INFO ) + ELSE +* +* Multiply by inv(U'). +* + CALL DLATBS( 'Upper', 'Transpose', 'Non-unit', NORMIN, N, + $ KL+KU, AB, LDAB, WORK, SCALE, WORK( 2*N+1 ), + $ INFO ) +* +* Multiply by inv(L'). +* + IF( LNOTI ) THEN + DO 30 J = N - 1, 1, -1 + LM = MIN( KL, N-J ) + WORK( J ) = WORK( J ) - DDOT( LM, AB( KD+1, J ), 1, + $ WORK( J+1 ), 1 ) + JP = IPIV( J ) + IF( JP.NE.J ) THEN + T = WORK( JP ) + WORK( JP ) = WORK( J ) + WORK( J ) = T + END IF + 30 CONTINUE + END IF + END IF +* +* Divide X by 1/SCALE if doing so will not cause overflow. +* + NORMIN = 'Y' + IF( SCALE.NE.ONE ) THEN + IX = IDAMAX( N, WORK, 1 ) + IF( SCALE.LT.ABS( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 40 + CALL DRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 40 CONTINUE + RETURN +* +* End of DGBCON +* + END diff --git a/costa/native/external/lapack/dgbequ.f b/costa/native/external/lapack/dgbequ.f new file mode 100644 index 000000000..309798c98 --- /dev/null +++ b/costa/native/external/lapack/dgbequ.f @@ -0,0 +1,240 @@ + SUBROUTINE DGBEQU( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + INTEGER INFO, KL, KU, LDAB, M, N + DOUBLE PRECISION AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ), C( * ), R( * ) +* .. +* +* Purpose +* ======= +* +* DGBEQU computes row and column scalings intended to equilibrate an +* M-by-N band matrix A and reduce its condition number. R returns the +* row scale factors and C the column scale factors, chosen to try to +* make the largest element in each row and column of the matrix B with +* elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. +* +* R(i) and C(j) are restricted to be between SMLNUM = smallest safe +* number and BIGNUM = largest safe number. Use of these scaling +* factors is not guaranteed to reduce the condition number of A but +* works well in practice. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) +* The band matrix A, stored in rows 1 to KL+KU+1. The j-th +* column of A is stored in the j-th column of the array AB as +* follows: +* AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KL+KU+1. +* +* R (output) DOUBLE PRECISION array, dimension (M) +* If INFO = 0, or INFO > M, R contains the row scale factors +* for A. +* +* C (output) DOUBLE PRECISION array, dimension (N) +* If INFO = 0, C contains the column scale factors for A. +* +* ROWCND (output) DOUBLE PRECISION +* If INFO = 0 or INFO > M, ROWCND contains the ratio of the +* smallest R(i) to the largest R(i). If ROWCND >= 0.1 and +* AMAX is neither too large nor too small, it is not worth +* scaling by R. +* +* COLCND (output) DOUBLE PRECISION +* If INFO = 0, COLCND contains the ratio of the smallest +* C(i) to the largest C(i). If COLCND >= 0.1, it is not +* worth scaling by C. +* +* AMAX (output) DOUBLE PRECISION +* Absolute value of largest matrix element. If AMAX is very +* close to overflow or very close to underflow, the matrix +* should be scaled. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is +* <= M: the i-th row of A is exactly zero +* > M: the (i-M)-th column of A is exactly zero +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, KD + DOUBLE PRECISION BIGNUM, RCMAX, RCMIN, SMLNUM +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGBEQU', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + ROWCND = ONE + COLCND = ONE + AMAX = ZERO + RETURN + END IF +* +* Get machine constants. +* + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM +* +* Compute row scale factors. +* + DO 10 I = 1, M + R( I ) = ZERO + 10 CONTINUE +* +* Find the maximum element in each row. +* + KD = KU + 1 + DO 30 J = 1, N + DO 20 I = MAX( J-KU, 1 ), MIN( J+KL, M ) + R( I ) = MAX( R( I ), ABS( AB( KD+I-J, J ) ) ) + 20 CONTINUE + 30 CONTINUE +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 40 I = 1, M + RCMAX = MAX( RCMAX, R( I ) ) + RCMIN = MIN( RCMIN, R( I ) ) + 40 CONTINUE + AMAX = RCMAX +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 50 I = 1, M + IF( R( I ).EQ.ZERO ) THEN + INFO = I + RETURN + END IF + 50 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 60 I = 1, M + R( I ) = ONE / MIN( MAX( R( I ), SMLNUM ), BIGNUM ) + 60 CONTINUE +* +* Compute ROWCND = min(R(I)) / max(R(I)) +* + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* +* Compute column scale factors +* + DO 70 J = 1, N + C( J ) = ZERO + 70 CONTINUE +* +* Find the maximum element in each column, +* assuming the row scaling computed above. +* + KD = KU + 1 + DO 90 J = 1, N + DO 80 I = MAX( J-KU, 1 ), MIN( J+KL, M ) + C( J ) = MAX( C( J ), ABS( AB( KD+I-J, J ) )*R( I ) ) + 80 CONTINUE + 90 CONTINUE +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 100 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 100 CONTINUE +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 110 J = 1, N + IF( C( J ).EQ.ZERO ) THEN + INFO = M + J + RETURN + END IF + 110 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 120 J = 1, N + C( J ) = ONE / MIN( MAX( C( J ), SMLNUM ), BIGNUM ) + 120 CONTINUE +* +* Compute COLCND = min(C(J)) / max(C(J)) +* + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* + RETURN +* +* End of DGBEQU +* + END diff --git a/costa/native/external/lapack/dgbrfs.f b/costa/native/external/lapack/dgbrfs.f new file mode 100644 index 000000000..226fd7736 --- /dev/null +++ b/costa/native/external/lapack/dgbrfs.f @@ -0,0 +1,351 @@ + SUBROUTINE DGBRFS( TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, LDAFB, + $ IPIV, B, LDB, X, LDX, FERR, BERR, WORK, IWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, KL, KU, LDAB, LDAFB, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + DOUBLE PRECISION AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* DGBRFS improves the computed solution to a system of linear +* equations when the coefficient matrix is banded, and provides +* error bounds and backward error estimates for the solution. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose = Transpose) +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) +* The original band matrix A, stored in rows 1 to KL+KU+1. +* The j-th column of A is stored in the j-th column of the +* array AB as follows: +* AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KL+KU+1. +* +* AFB (input) DOUBLE PRECISION array, dimension (LDAFB,N) +* Details of the LU factorization of the band matrix A, as +* computed by DGBTRF. U is stored as an upper triangular band +* matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and +* the multipliers used during the factorization are stored in +* rows KL+KU+2 to 2*KL+KU+1. +* +* LDAFB (input) INTEGER +* The leading dimension of the array AFB. LDAFB >= 2*KL*KU+1. +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices from DGBTRF; for 1<=i<=N, row i of the +* matrix was interchanged with row IPIV(i). +* +* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) +* On entry, the solution matrix X, as computed by DGBTRS. +* On exit, the improved solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Internal Parameters +* =================== +* +* ITMAX is the maximum number of steps of iterative refinement. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) + DOUBLE PRECISION THREE + PARAMETER ( THREE = 3.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN + CHARACTER TRANST + INTEGER COUNT, I, J, K, KASE, KK, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DGBMV, DGBTRS, DLACON, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -7 + ELSE IF( LDAFB.LT.2*KL+KU+1 ) THEN + INFO = -9 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -14 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGBRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = MIN( KL+KU+2, N+1 ) + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - op(A) * X, +* where op(A) = A, A**T, or A**H, depending on TRANS. +* + CALL DCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 ) + CALL DGBMV( TRANS, N, N, KL, KU, -ONE, AB, LDAB, X( 1, J ), 1, + $ ONE, WORK( N+1 ), 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + WORK( I ) = ABS( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(op(A))*abs(X) + abs(B). +* + IF( NOTRAN ) THEN + DO 50 K = 1, N + KK = KU + 1 - K + XK = ABS( X( K, J ) ) + DO 40 I = MAX( 1, K-KU ), MIN( N, K+KL ) + WORK( I ) = WORK( I ) + ABS( AB( KK+I, K ) )*XK + 40 CONTINUE + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + KK = KU + 1 - K + DO 60 I = MAX( 1, K-KU ), MIN( N, K+KL ) + S = S + ABS( AB( KK+I, K ) )*ABS( X( I, J ) ) + 60 CONTINUE + WORK( K ) = WORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL DGBTRS( TRANS, N, KL, KU, 1, AFB, LDAFB, IPIV, + $ WORK( N+1 ), N, INFO ) + CALL DAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use DLACON to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL DLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**T). +* + CALL DGBTRS( TRANST, N, KL, KU, 1, AFB, LDAFB, IPIV, + $ WORK( N+1 ), N, INFO ) + DO 110 I = 1, N + WORK( N+I ) = WORK( N+I )*WORK( I ) + 110 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 120 I = 1, N + WORK( N+I ) = WORK( N+I )*WORK( I ) + 120 CONTINUE + CALL DGBTRS( TRANS, N, KL, KU, 1, AFB, LDAFB, IPIV, + $ WORK( N+1 ), N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of DGBRFS +* + END diff --git a/costa/native/external/lapack/dgbsv.f b/costa/native/external/lapack/dgbsv.f new file mode 100644 index 000000000..0d35fe88b --- /dev/null +++ b/costa/native/external/lapack/dgbsv.f @@ -0,0 +1,143 @@ + SUBROUTINE DGBSV( N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + INTEGER INFO, KL, KU, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION AB( LDAB, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* DGBSV computes the solution to a real system of linear equations +* A * X = B, where A is a band matrix of order N with KL subdiagonals +* and KU superdiagonals, and X and B are N-by-NRHS matrices. +* +* The LU decomposition with partial pivoting and row interchanges is +* used to factor A as A = L * U, where L is a product of permutation +* and unit lower triangular matrices with KL subdiagonals, and U is +* upper triangular with KL+KU superdiagonals. The factored form of A +* is then used to solve the system of equations A * X = B. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) +* On entry, the matrix A in band storage, in rows KL+1 to +* 2*KL+KU+1; rows 1 to KL of the array need not be set. +* The j-th column of A is stored in the j-th column of the +* array AB as follows: +* AB(KL+KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+KL) +* On exit, details of the factorization: U is stored as an +* upper triangular band matrix with KL+KU superdiagonals in +* rows 1 to KL+KU+1, and the multipliers used during the +* factorization are stored in rows KL+KU+2 to 2*KL+KU+1. +* See below for further details. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +* +* IPIV (output) INTEGER array, dimension (N) +* The pivot indices that define the permutation matrix P; +* row i of the matrix was interchanged with row IPIV(i). +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, U(i,i) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, and the solution has not been computed. +* +* Further Details +* =============== +* +* The band storage scheme is illustrated by the following example, when +* M = N = 6, KL = 2, KU = 1: +* +* On entry: On exit: +* +* * * * + + + * * * u14 u25 u36 +* * * + + + + * * u13 u24 u35 u46 +* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +* a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * +* a31 a42 a53 a64 * * m31 m42 m53 m64 * * +* +* Array elements marked * are not used by the routine; elements marked +* + need not be set on entry, but are required by the routine to store +* elements of U because of fill-in resulting from the row interchanges. +* +* ===================================================================== +* +* .. External Subroutines .. + EXTERNAL DGBTRF, DGBTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( KL.LT.0 ) THEN + INFO = -2 + ELSE IF( KU.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.2*KL+KU+1 ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( N, 1 ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGBSV ', -INFO ) + RETURN + END IF +* +* Compute the LU factorization of the band matrix A. +* + CALL DGBTRF( N, N, KL, KU, AB, LDAB, IPIV, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL DGBTRS( 'No transpose', N, KL, KU, NRHS, AB, LDAB, IPIV, + $ B, LDB, INFO ) + END IF + RETURN +* +* End of DGBSV +* + END diff --git a/costa/native/external/lapack/dgbsvx.f b/costa/native/external/lapack/dgbsvx.f new file mode 100644 index 000000000..3e4961404 --- /dev/null +++ b/costa/native/external/lapack/dgbsvx.f @@ -0,0 +1,517 @@ + SUBROUTINE DGBSVX( FACT, TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, + $ LDAFB, IPIV, EQUED, R, C, B, LDB, X, LDX, + $ RCOND, FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, TRANS + INTEGER INFO, KL, KU, LDAB, LDAFB, LDB, LDX, N, NRHS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + DOUBLE PRECISION AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ BERR( * ), C( * ), FERR( * ), R( * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* DGBSVX uses the LU factorization to compute the solution to a real +* system of linear equations A * X = B, A**T * X = B, or A**H * X = B, +* where A is a band matrix of order N with KL subdiagonals and KU +* superdiagonals, and X and B are N-by-NRHS matrices. +* +* Error bounds on the solution and a condition estimate are also +* provided. +* +* Description +* =========== +* +* The following steps are performed by this subroutine: +* +* 1. If FACT = 'E', real scaling factors are computed to equilibrate +* the system: +* TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B +* TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B +* TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B +* Whether or not the system will be equilibrated depends on the +* scaling of the matrix A, but if equilibration is used, A is +* overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') +* or diag(C)*B (if TRANS = 'T' or 'C'). +* +* 2. If FACT = 'N' or 'E', the LU decomposition is used to factor the +* matrix A (after equilibration if FACT = 'E') as +* A = L * U, +* where L is a product of permutation and unit lower triangular +* matrices with KL subdiagonals, and U is upper triangular with +* KL+KU superdiagonals. +* +* 3. If some U(i,i)=0, so that U is exactly singular, then the routine +* returns with INFO = i. Otherwise, the factored form of A is used +* to estimate the condition number of the matrix A. If the +* reciprocal of the condition number is less than machine precision, +* INFO = N+1 is returned as a warning, but the routine still goes on +* to solve for X and compute error bounds as described below. +* +* 4. The system of equations is solved for X using the factored form +* of A. +* +* 5. Iterative refinement is applied to improve the computed solution +* matrix and calculate error bounds and backward error estimates +* for it. +* +* 6. If equilibration was used, the matrix X is premultiplied by +* diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so +* that it solves the original system before equilibration. +* +* Arguments +* ========= +* +* FACT (input) CHARACTER*1 +* Specifies whether or not the factored form of the matrix A is +* supplied on entry, and if not, whether the matrix A should be +* equilibrated before it is factored. +* = 'F': On entry, AFB and IPIV contain the factored form of +* A. If EQUED is not 'N', the matrix A has been +* equilibrated with scaling factors given by R and C. +* AB, AFB, and IPIV are not modified. +* = 'N': The matrix A will be copied to AFB and factored. +* = 'E': The matrix A will be equilibrated if necessary, then +* copied to AFB and factored. +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations. +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Transpose) +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) +* On entry, the matrix A in band storage, in rows 1 to KL+KU+1. +* The j-th column of A is stored in the j-th column of the +* array AB as follows: +* AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl) +* +* If FACT = 'F' and EQUED is not 'N', then A must have been +* equilibrated by the scaling factors in R and/or C. AB is not +* modified if FACT = 'F' or 'N', or if FACT = 'E' and +* EQUED = 'N' on exit. +* +* On exit, if EQUED .ne. 'N', A is scaled as follows: +* EQUED = 'R': A := diag(R) * A +* EQUED = 'C': A := A * diag(C) +* EQUED = 'B': A := diag(R) * A * diag(C). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KL+KU+1. +* +* AFB (input or output) DOUBLE PRECISION array, dimension (LDAFB,N) +* If FACT = 'F', then AFB is an input argument and on entry +* contains details of the LU factorization of the band matrix +* A, as computed by DGBTRF. U is stored as an upper triangular +* band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, +* and the multipliers used during the factorization are stored +* in rows KL+KU+2 to 2*KL+KU+1. If EQUED .ne. 'N', then AFB is +* the factored form of the equilibrated matrix A. +* +* If FACT = 'N', then AFB is an output argument and on exit +* returns details of the LU factorization of A. +* +* If FACT = 'E', then AFB is an output argument and on exit +* returns details of the LU factorization of the equilibrated +* matrix A (see the description of AB for the form of the +* equilibrated matrix). +* +* LDAFB (input) INTEGER +* The leading dimension of the array AFB. LDAFB >= 2*KL+KU+1. +* +* IPIV (input or output) INTEGER array, dimension (N) +* If FACT = 'F', then IPIV is an input argument and on entry +* contains the pivot indices from the factorization A = L*U +* as computed by DGBTRF; row i of the matrix was interchanged +* with row IPIV(i). +* +* If FACT = 'N', then IPIV is an output argument and on exit +* contains the pivot indices from the factorization A = L*U +* of the original matrix A. +* +* If FACT = 'E', then IPIV is an output argument and on exit +* contains the pivot indices from the factorization A = L*U +* of the equilibrated matrix A. +* +* EQUED (input or output) CHARACTER*1 +* Specifies the form of equilibration that was done. +* = 'N': No equilibration (always true if FACT = 'N'). +* = 'R': Row equilibration, i.e., A has been premultiplied by +* diag(R). +* = 'C': Column equilibration, i.e., A has been postmultiplied +* by diag(C). +* = 'B': Both row and column equilibration, i.e., A has been +* replaced by diag(R) * A * diag(C). +* EQUED is an input argument if FACT = 'F'; otherwise, it is an +* output argument. +* +* R (input or output) DOUBLE PRECISION array, dimension (N) +* The row scale factors for A. If EQUED = 'R' or 'B', A is +* multiplied on the left by diag(R); if EQUED = 'N' or 'C', R +* is not accessed. R is an input argument if FACT = 'F'; +* otherwise, R is an output argument. If FACT = 'F' and +* EQUED = 'R' or 'B', each element of R must be positive. +* +* C (input or output) DOUBLE PRECISION array, dimension (N) +* The column scale factors for A. If EQUED = 'C' or 'B', A is +* multiplied on the right by diag(C); if EQUED = 'N' or 'R', C +* is not accessed. C is an input argument if FACT = 'F'; +* otherwise, C is an output argument. If FACT = 'F' and +* EQUED = 'C' or 'B', each element of C must be positive. +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, +* if EQUED = 'N', B is not modified; +* if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by +* diag(R)*B; +* if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is +* overwritten by diag(C)*B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (output) DOUBLE PRECISION array, dimension (LDX,NRHS) +* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X +* to the original system of equations. Note that A and B are +* modified on exit if EQUED .ne. 'N', and the solution to the +* equilibrated system is inv(diag(C))*X if TRANS = 'N' and +* EQUED = 'C' or 'B', or inv(diag(R))*X if TRANS = 'T' or 'C' +* and EQUED = 'R' or 'B'. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* RCOND (output) DOUBLE PRECISION +* The estimate of the reciprocal condition number of the matrix +* A after equilibration (if done). If RCOND is less than the +* machine precision (in particular, if RCOND = 0), the matrix +* is singular to working precision. This condition is +* indicated by a return code of INFO > 0. +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (3*N) +* On exit, WORK(1) contains the reciprocal pivot growth +* factor norm(A)/norm(U). The "max absolute element" norm is +* used. If WORK(1) is much less than 1, then the stability +* of the LU factorization of the (equilibrated) matrix A +* could be poor. This also means that the solution X, condition +* estimator RCOND, and forward error bound FERR could be +* unreliable. If factorization fails with 0 0: if INFO = i, and i is +* <= N: U(i,i) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, so the solution and error bounds +* could not be computed. RCOND = 0 is returned. +* = N+1: U is nonsingular, but RCOND is less than machine +* precision, meaning that the matrix is singular +* to working precision. Nevertheless, the +* solution and error bounds are computed because +* there are a number of situations where the +* computed solution can be more accurate than the +* value of RCOND would suggest. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL COLEQU, EQUIL, NOFACT, NOTRAN, ROWEQU + CHARACTER NORM + INTEGER I, INFEQU, J, J1, J2 + DOUBLE PRECISION AMAX, ANORM, BIGNUM, COLCND, RCMAX, RCMIN, + $ ROWCND, RPVGRW, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANGB, DLANTB + EXTERNAL LSAME, DLAMCH, DLANGB, DLANTB +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGBCON, DGBEQU, DGBRFS, DGBTRF, DGBTRS, + $ DLACPY, DLAQGB, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + NOTRAN = LSAME( TRANS, 'N' ) + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + ROWEQU = .FALSE. + COLEQU = .FALSE. + ELSE + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + SMLNUM = DLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + END IF +* +* Test the input parameters. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) ) + $ THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KL.LT.0 ) THEN + INFO = -4 + ELSE IF( KU.LT.0 ) THEN + INFO = -5 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -8 + ELSE IF( LDAFB.LT.2*KL+KU+1 ) THEN + INFO = -10 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( ROWEQU .OR. COLEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -12 + ELSE + IF( ROWEQU ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 10 J = 1, N + RCMIN = MIN( RCMIN, R( J ) ) + RCMAX = MAX( RCMAX, R( J ) ) + 10 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -13 + ELSE IF( N.GT.0 ) THEN + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + ROWCND = ONE + END IF + END IF + IF( COLEQU .AND. INFO.EQ.0 ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 20 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 20 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -14 + ELSE IF( N.GT.0 ) THEN + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + COLCND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -16 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -18 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGBSVX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL DGBEQU( N, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL DLAQGB( N, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, EQUED ) + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + END IF + END IF +* +* Scale the right hand side. +* + IF( NOTRAN ) THEN + IF( ROWEQU ) THEN + DO 40 J = 1, NRHS + DO 30 I = 1, N + B( I, J ) = R( I )*B( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( COLEQU ) THEN + DO 60 J = 1, NRHS + DO 50 I = 1, N + B( I, J ) = C( I )*B( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the LU factorization of the band matrix A. +* + DO 70 J = 1, N + J1 = MAX( J-KU, 1 ) + J2 = MIN( J+KL, N ) + CALL DCOPY( J2-J1+1, AB( KU+1-J+J1, J ), 1, + $ AFB( KL+KU+1-J+J1, J ), 1 ) + 70 CONTINUE +* + CALL DGBTRF( N, N, KL, KU, AFB, LDAFB, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN + IF( INFO.GT.0 ) THEN +* +* Compute the reciprocal pivot growth factor of the +* leading rank-deficient INFO columns of A. +* + ANORM = ZERO + DO 90 J = 1, INFO + DO 80 I = MAX( KU+2-J, 1 ), + $ MIN( N+KU+1-J, KL+KU+1 ) + ANORM = MAX( ANORM, ABS( AB( I, J ) ) ) + 80 CONTINUE + 90 CONTINUE + RPVGRW = DLANTB( 'M', 'U', 'N', INFO, + $ MIN( INFO-1, KL+KU ), AFB( MAX( 1, + $ KL+KU+2-INFO ), 1 ), LDAFB, WORK ) + IF( RPVGRW.EQ.ZERO ) THEN + RPVGRW = ONE + ELSE + RPVGRW = ANORM / RPVGRW + END IF + WORK( 1 ) = RPVGRW + RCOND = ZERO + END IF + RETURN + END IF + END IF +* +* Compute the norm of the matrix A and the +* reciprocal pivot growth factor RPVGRW. +* + IF( NOTRAN ) THEN + NORM = '1' + ELSE + NORM = 'I' + END IF + ANORM = DLANGB( NORM, N, KL, KU, AB, LDAB, WORK ) + RPVGRW = DLANTB( 'M', 'U', 'N', N, KL+KU, AFB, LDAFB, WORK ) + IF( RPVGRW.EQ.ZERO ) THEN + RPVGRW = ONE + ELSE + RPVGRW = DLANGB( 'M', N, KL, KU, AB, LDAB, WORK ) / RPVGRW + END IF +* +* Compute the reciprocal of the condition number of A. +* + CALL DGBCON( NORM, N, KL, KU, AFB, LDAFB, IPIV, ANORM, RCOND, + $ WORK, IWORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.DLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* +* Compute the solution matrix X. +* + CALL DLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL DGBTRS( TRANS, N, KL, KU, NRHS, AFB, LDAFB, IPIV, X, LDX, + $ INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL DGBRFS( TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, LDAFB, IPIV, + $ B, LDB, X, LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* Transform the solution matrix X to a solution of the original +* system. +* + IF( NOTRAN ) THEN + IF( COLEQU ) THEN + DO 110 J = 1, NRHS + DO 100 I = 1, N + X( I, J ) = C( I )*X( I, J ) + 100 CONTINUE + 110 CONTINUE + DO 120 J = 1, NRHS + FERR( J ) = FERR( J ) / COLCND + 120 CONTINUE + END IF + ELSE IF( ROWEQU ) THEN + DO 140 J = 1, NRHS + DO 130 I = 1, N + X( I, J ) = R( I )*X( I, J ) + 130 CONTINUE + 140 CONTINUE + DO 150 J = 1, NRHS + FERR( J ) = FERR( J ) / ROWCND + 150 CONTINUE + END IF +* + WORK( 1 ) = RPVGRW + RETURN +* +* End of DGBSVX +* + END diff --git a/costa/native/external/lapack/dgbtf2.f b/costa/native/external/lapack/dgbtf2.f new file mode 100644 index 000000000..5e25629fc --- /dev/null +++ b/costa/native/external/lapack/dgbtf2.f @@ -0,0 +1,203 @@ + SUBROUTINE DGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + INTEGER INFO, KL, KU, LDAB, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* DGBTF2 computes an LU factorization of a real m-by-n band matrix A +* using partial pivoting with row interchanges. +* +* This is the unblocked version of the algorithm, calling Level 2 BLAS. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) +* On entry, the matrix A in band storage, in rows KL+1 to +* 2*KL+KU+1; rows 1 to KL of the array need not be set. +* The j-th column of A is stored in the j-th column of the +* array AB as follows: +* AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) +* +* On exit, details of the factorization: U is stored as an +* upper triangular band matrix with KL+KU superdiagonals in +* rows 1 to KL+KU+1, and the multipliers used during the +* factorization are stored in rows KL+KU+2 to 2*KL+KU+1. +* See below for further details. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +* +* IPIV (output) INTEGER array, dimension (min(M,N)) +* The pivot indices; for 1 <= i <= min(M,N), row i of the +* matrix was interchanged with row IPIV(i). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = +i, U(i,i) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, and division by zero will occur if it is used +* to solve a system of equations. +* +* Further Details +* =============== +* +* The band storage scheme is illustrated by the following example, when +* M = N = 6, KL = 2, KU = 1: +* +* On entry: On exit: +* +* * * * + + + * * * u14 u25 u36 +* * * + + + + * * u13 u24 u35 u46 +* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +* a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * +* a31 a42 a53 a64 * * m31 m42 m53 m64 * * +* +* Array elements marked * are not used by the routine; elements marked +* + need not be set on entry, but are required by the routine to store +* elements of U, because of fill-in resulting from the row +* interchanges. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, JP, JU, KM, KV +* .. +* .. External Functions .. + INTEGER IDAMAX + EXTERNAL IDAMAX +* .. +* .. External Subroutines .. + EXTERNAL DGER, DSCAL, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* KV is the number of superdiagonals in the factor U, allowing for +* fill-in. +* + KV = KU + KL +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KL+KV+1 ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGBTF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Gaussian elimination with partial pivoting +* +* Set fill-in elements in columns KU+2 to KV to zero. +* + DO 20 J = KU + 2, MIN( KV, N ) + DO 10 I = KV - J + 2, KL + AB( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* JU is the index of the last column affected by the current stage +* of the factorization. +* + JU = 1 +* + DO 40 J = 1, MIN( M, N ) +* +* Set fill-in elements in column J+KV to zero. +* + IF( J+KV.LE.N ) THEN + DO 30 I = 1, KL + AB( I, J+KV ) = ZERO + 30 CONTINUE + END IF +* +* Find pivot and test for singularity. KM is the number of +* subdiagonal elements in the current column. +* + KM = MIN( KL, M-J ) + JP = IDAMAX( KM+1, AB( KV+1, J ), 1 ) + IPIV( J ) = JP + J - 1 + IF( AB( KV+JP, J ).NE.ZERO ) THEN + JU = MAX( JU, MIN( J+KU+JP-1, N ) ) +* +* Apply interchange to columns J to JU. +* + IF( JP.NE.1 ) + $ CALL DSWAP( JU-J+1, AB( KV+JP, J ), LDAB-1, + $ AB( KV+1, J ), LDAB-1 ) +* + IF( KM.GT.0 ) THEN +* +* Compute multipliers. +* + CALL DSCAL( KM, ONE / AB( KV+1, J ), AB( KV+2, J ), 1 ) +* +* Update trailing submatrix within the band. +* + IF( JU.GT.J ) + $ CALL DGER( KM, JU-J, -ONE, AB( KV+2, J ), 1, + $ AB( KV, J+1 ), LDAB-1, AB( KV+1, J+1 ), + $ LDAB-1 ) + END IF + ELSE +* +* If pivot is zero, set INFO to the index of the pivot +* unless a zero pivot has already been found. +* + IF( INFO.EQ.0 ) + $ INFO = J + END IF + 40 CONTINUE + RETURN +* +* End of DGBTF2 +* + END diff --git a/costa/native/external/lapack/dgbtrf.f b/costa/native/external/lapack/dgbtrf.f new file mode 100644 index 000000000..c6e3d0a9c --- /dev/null +++ b/costa/native/external/lapack/dgbtrf.f @@ -0,0 +1,442 @@ + SUBROUTINE DGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + INTEGER INFO, KL, KU, LDAB, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* DGBTRF computes an LU factorization of a real m-by-n band matrix A +* using partial pivoting with row interchanges. +* +* This is the blocked version of the algorithm, calling Level 3 BLAS. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) +* On entry, the matrix A in band storage, in rows KL+1 to +* 2*KL+KU+1; rows 1 to KL of the array need not be set. +* The j-th column of A is stored in the j-th column of the +* array AB as follows: +* AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) +* +* On exit, details of the factorization: U is stored as an +* upper triangular band matrix with KL+KU superdiagonals in +* rows 1 to KL+KU+1, and the multipliers used during the +* factorization are stored in rows KL+KU+2 to 2*KL+KU+1. +* See below for further details. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +* +* IPIV (output) INTEGER array, dimension (min(M,N)) +* The pivot indices; for 1 <= i <= min(M,N), row i of the +* matrix was interchanged with row IPIV(i). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = +i, U(i,i) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, and division by zero will occur if it is used +* to solve a system of equations. +* +* Further Details +* =============== +* +* The band storage scheme is illustrated by the following example, when +* M = N = 6, KL = 2, KU = 1: +* +* On entry: On exit: +* +* * * * + + + * * * u14 u25 u36 +* * * + + + + * * u13 u24 u35 u46 +* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +* a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * +* a31 a42 a53 a64 * * m31 m42 m53 m64 * * +* +* Array elements marked * are not used by the routine; elements marked +* + need not be set on entry, but are required by the routine to store +* elements of U because of fill-in resulting from the row interchanges. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) + INTEGER NBMAX, LDWORK + PARAMETER ( NBMAX = 64, LDWORK = NBMAX+1 ) +* .. +* .. Local Scalars .. + INTEGER I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP, + $ JU, K2, KM, KV, NB, NW + DOUBLE PRECISION TEMP +* .. +* .. Local Arrays .. + DOUBLE PRECISION WORK13( LDWORK, NBMAX ), + $ WORK31( LDWORK, NBMAX ) +* .. +* .. External Functions .. + INTEGER IDAMAX, ILAENV + EXTERNAL IDAMAX, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGBTF2, DGEMM, DGER, DLASWP, DSCAL, + $ DSWAP, DTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* KV is the number of superdiagonals in the factor U, allowing for +* fill-in +* + KV = KU + KL +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KL+KV+1 ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGBTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment +* + NB = ILAENV( 1, 'DGBTRF', ' ', M, N, KL, KU ) +* +* The block size must not exceed the limit set by the size of the +* local arrays WORK13 and WORK31. +* + NB = MIN( NB, NBMAX ) +* + IF( NB.LE.1 .OR. NB.GT.KL ) THEN +* +* Use unblocked code +* + CALL DGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO ) + ELSE +* +* Use blocked code +* +* Zero the superdiagonal elements of the work array WORK13 +* + DO 20 J = 1, NB + DO 10 I = 1, J - 1 + WORK13( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* Zero the subdiagonal elements of the work array WORK31 +* + DO 40 J = 1, NB + DO 30 I = J + 1, NB + WORK31( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE +* +* Gaussian elimination with partial pivoting +* +* Set fill-in elements in columns KU+2 to KV to zero +* + DO 60 J = KU + 2, MIN( KV, N ) + DO 50 I = KV - J + 2, KL + AB( I, J ) = ZERO + 50 CONTINUE + 60 CONTINUE +* +* JU is the index of the last column affected by the current +* stage of the factorization +* + JU = 1 +* + DO 180 J = 1, MIN( M, N ), NB + JB = MIN( NB, MIN( M, N )-J+1 ) +* +* The active part of the matrix is partitioned +* +* A11 A12 A13 +* A21 A22 A23 +* A31 A32 A33 +* +* Here A11, A21 and A31 denote the current block of JB columns +* which is about to be factorized. The number of rows in the +* partitioning are JB, I2, I3 respectively, and the numbers +* of columns are JB, J2, J3. The superdiagonal elements of A13 +* and the subdiagonal elements of A31 lie outside the band. +* + I2 = MIN( KL-JB, M-J-JB+1 ) + I3 = MIN( JB, M-J-KL+1 ) +* +* J2 and J3 are computed after JU has been updated. +* +* Factorize the current block of JB columns +* + DO 80 JJ = J, J + JB - 1 +* +* Set fill-in elements in column JJ+KV to zero +* + IF( JJ+KV.LE.N ) THEN + DO 70 I = 1, KL + AB( I, JJ+KV ) = ZERO + 70 CONTINUE + END IF +* +* Find pivot and test for singularity. KM is the number of +* subdiagonal elements in the current column. +* + KM = MIN( KL, M-JJ ) + JP = IDAMAX( KM+1, AB( KV+1, JJ ), 1 ) + IPIV( JJ ) = JP + JJ - J + IF( AB( KV+JP, JJ ).NE.ZERO ) THEN + JU = MAX( JU, MIN( JJ+KU+JP-1, N ) ) + IF( JP.NE.1 ) THEN +* +* Apply interchange to columns J to J+JB-1 +* + IF( JP+JJ-1.LT.J+KL ) THEN +* + CALL DSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1, + $ AB( KV+JP+JJ-J, J ), LDAB-1 ) + ELSE +* +* The interchange affects columns J to JJ-1 of A31 +* which are stored in the work array WORK31 +* + CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1, + $ WORK31( JP+JJ-J-KL, 1 ), LDWORK ) + CALL DSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1, + $ AB( KV+JP, JJ ), LDAB-1 ) + END IF + END IF +* +* Compute multipliers +* + CALL DSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ), + $ 1 ) +* +* Update trailing submatrix within the band and within +* the current block. JM is the index of the last column +* which needs to be updated. +* + JM = MIN( JU, J+JB-1 ) + IF( JM.GT.JJ ) + $ CALL DGER( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1, + $ AB( KV, JJ+1 ), LDAB-1, + $ AB( KV+1, JJ+1 ), LDAB-1 ) + ELSE +* +* If pivot is zero, set INFO to the index of the pivot +* unless a zero pivot has already been found. +* + IF( INFO.EQ.0 ) + $ INFO = JJ + END IF +* +* Copy current column of A31 into the work array WORK31 +* + NW = MIN( JJ-J+1, I3 ) + IF( NW.GT.0 ) + $ CALL DCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1, + $ WORK31( 1, JJ-J+1 ), 1 ) + 80 CONTINUE + IF( J+JB.LE.N ) THEN +* +* Apply the row interchanges to the other blocks. +* + J2 = MIN( JU-J+1, KV ) - JB + J3 = MAX( 0, JU-J-KV+1 ) +* +* Use DLASWP to apply the row interchanges to A12, A22, and +* A32. +* + CALL DLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB, + $ IPIV( J ), 1 ) +* +* Adjust the pivot indices. +* + DO 90 I = J, J + JB - 1 + IPIV( I ) = IPIV( I ) + J - 1 + 90 CONTINUE +* +* Apply the row interchanges to A13, A23, and A33 +* columnwise. +* + K2 = J - 1 + JB + J2 + DO 110 I = 1, J3 + JJ = K2 + I + DO 100 II = J + I - 1, J + JB - 1 + IP = IPIV( II ) + IF( IP.NE.II ) THEN + TEMP = AB( KV+1+II-JJ, JJ ) + AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ ) + AB( KV+1+IP-JJ, JJ ) = TEMP + END IF + 100 CONTINUE + 110 CONTINUE +* +* Update the relevant part of the trailing submatrix +* + IF( J2.GT.0 ) THEN +* +* Update A12 +* + CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', + $ JB, J2, ONE, AB( KV+1, J ), LDAB-1, + $ AB( KV+1-JB, J+JB ), LDAB-1 ) +* + IF( I2.GT.0 ) THEN +* +* Update A22 +* + CALL DGEMM( 'No transpose', 'No transpose', I2, J2, + $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1, + $ AB( KV+1-JB, J+JB ), LDAB-1, ONE, + $ AB( KV+1, J+JB ), LDAB-1 ) + END IF +* + IF( I3.GT.0 ) THEN +* +* Update A32 +* + CALL DGEMM( 'No transpose', 'No transpose', I3, J2, + $ JB, -ONE, WORK31, LDWORK, + $ AB( KV+1-JB, J+JB ), LDAB-1, ONE, + $ AB( KV+KL+1-JB, J+JB ), LDAB-1 ) + END IF + END IF +* + IF( J3.GT.0 ) THEN +* +* Copy the lower triangle of A13 into the work array +* WORK13 +* + DO 130 JJ = 1, J3 + DO 120 II = JJ, JB + WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 ) + 120 CONTINUE + 130 CONTINUE +* +* Update A13 in the work array +* + CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', + $ JB, J3, ONE, AB( KV+1, J ), LDAB-1, + $ WORK13, LDWORK ) +* + IF( I2.GT.0 ) THEN +* +* Update A23 +* + CALL DGEMM( 'No transpose', 'No transpose', I2, J3, + $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1, + $ WORK13, LDWORK, ONE, AB( 1+JB, J+KV ), + $ LDAB-1 ) + END IF +* + IF( I3.GT.0 ) THEN +* +* Update A33 +* + CALL DGEMM( 'No transpose', 'No transpose', I3, J3, + $ JB, -ONE, WORK31, LDWORK, WORK13, + $ LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 ) + END IF +* +* Copy the lower triangle of A13 back into place +* + DO 150 JJ = 1, J3 + DO 140 II = JJ, JB + AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ ) + 140 CONTINUE + 150 CONTINUE + END IF + ELSE +* +* Adjust the pivot indices. +* + DO 160 I = J, J + JB - 1 + IPIV( I ) = IPIV( I ) + J - 1 + 160 CONTINUE + END IF +* +* Partially undo the interchanges in the current block to +* restore the upper triangular form of A31 and copy the upper +* triangle of A31 back into place +* + DO 170 JJ = J + JB - 1, J, -1 + JP = IPIV( JJ ) - JJ + 1 + IF( JP.NE.1 ) THEN +* +* Apply interchange to columns J to JJ-1 +* + IF( JP+JJ-1.LT.J+KL ) THEN +* +* The interchange does not affect A31 +* + CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1, + $ AB( KV+JP+JJ-J, J ), LDAB-1 ) + ELSE +* +* The interchange does affect A31 +* + CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1, + $ WORK31( JP+JJ-J-KL, 1 ), LDWORK ) + END IF + END IF +* +* Copy the current column of A31 back into place +* + NW = MIN( I3, JJ-J+1 ) + IF( NW.GT.0 ) + $ CALL DCOPY( NW, WORK31( 1, JJ-J+1 ), 1, + $ AB( KV+KL+1-JJ+J, JJ ), 1 ) + 170 CONTINUE + 180 CONTINUE + END IF +* + RETURN +* +* End of DGBTRF +* + END diff --git a/costa/native/external/lapack/dgbtrs.f b/costa/native/external/lapack/dgbtrs.f new file mode 100644 index 000000000..26fdf91eb --- /dev/null +++ b/costa/native/external/lapack/dgbtrs.f @@ -0,0 +1,187 @@ + SUBROUTINE DGBTRS( TRANS, N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, KL, KU, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION AB( LDAB, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* DGBTRS solves a system of linear equations +* A * X = B or A' * X = B +* with a general band matrix A using the LU factorization computed +* by DGBTRF. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations. +* = 'N': A * X = B (No transpose) +* = 'T': A'* X = B (Transpose) +* = 'C': A'* X = B (Conjugate transpose = Transpose) +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) +* Details of the LU factorization of the band matrix A, as +* computed by DGBTRF. U is stored as an upper triangular band +* matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and +* the multipliers used during the factorization are stored in +* rows KL+KU+2 to 2*KL+KU+1. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices; for 1 <= i <= N, row i of the matrix was +* interchanged with row IPIV(i). +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LNOTI, NOTRAN + INTEGER I, J, KD, L, LM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, DGER, DSWAP, DTBSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.( 2*KL+KU+1 ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGBTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + KD = KU + KL + 1 + LNOTI = KL.GT.0 +* + IF( NOTRAN ) THEN +* +* Solve A*X = B. +* +* Solve L*X = B, overwriting B with X. +* +* L is represented as a product of permutations and unit lower +* triangular matrices L = P(1) * L(1) * ... * P(n-1) * L(n-1), +* where each transformation L(i) is a rank-one modification of +* the identity matrix. +* + IF( LNOTI ) THEN + DO 10 J = 1, N - 1 + LM = MIN( KL, N-J ) + L = IPIV( J ) + IF( L.NE.J ) + $ CALL DSWAP( NRHS, B( L, 1 ), LDB, B( J, 1 ), LDB ) + CALL DGER( LM, NRHS, -ONE, AB( KD+1, J ), 1, B( J, 1 ), + $ LDB, B( J+1, 1 ), LDB ) + 10 CONTINUE + END IF +* + DO 20 I = 1, NRHS +* +* Solve U*X = B, overwriting B with X. +* + CALL DTBSV( 'Upper', 'No transpose', 'Non-unit', N, KL+KU, + $ AB, LDAB, B( 1, I ), 1 ) + 20 CONTINUE +* + ELSE +* +* Solve A'*X = B. +* + DO 30 I = 1, NRHS +* +* Solve U'*X = B, overwriting B with X. +* + CALL DTBSV( 'Upper', 'Transpose', 'Non-unit', N, KL+KU, AB, + $ LDAB, B( 1, I ), 1 ) + 30 CONTINUE +* +* Solve L'*X = B, overwriting B with X. +* + IF( LNOTI ) THEN + DO 40 J = N - 1, 1, -1 + LM = MIN( KL, N-J ) + CALL DGEMV( 'Transpose', LM, NRHS, -ONE, B( J+1, 1 ), + $ LDB, AB( KD+1, J ), 1, ONE, B( J, 1 ), LDB ) + L = IPIV( J ) + IF( L.NE.J ) + $ CALL DSWAP( NRHS, B( L, 1 ), LDB, B( J, 1 ), LDB ) + 40 CONTINUE + END IF + END IF + RETURN +* +* End of DGBTRS +* + END diff --git a/costa/native/external/lapack/dgebak.f b/costa/native/external/lapack/dgebak.f new file mode 100644 index 000000000..7604639e2 --- /dev/null +++ b/costa/native/external/lapack/dgebak.f @@ -0,0 +1,189 @@ + SUBROUTINE DGEBAK( JOB, SIDE, N, ILO, IHI, SCALE, M, V, LDV, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER JOB, SIDE + INTEGER IHI, ILO, INFO, LDV, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION SCALE( * ), V( LDV, * ) +* .. +* +* Purpose +* ======= +* +* DGEBAK forms the right or left eigenvectors of a real general matrix +* by backward transformation on the computed eigenvectors of the +* balanced matrix output by DGEBAL. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* Specifies the type of backward transformation required: +* = 'N', do nothing, return immediately; +* = 'P', do backward transformation for permutation only; +* = 'S', do backward transformation for scaling only; +* = 'B', do backward transformations for both permutation and +* scaling. +* JOB must be the same as the argument JOB supplied to DGEBAL. +* +* SIDE (input) CHARACTER*1 +* = 'R': V contains right eigenvectors; +* = 'L': V contains left eigenvectors. +* +* N (input) INTEGER +* The number of rows of the matrix V. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* The integers ILO and IHI determined by DGEBAL. +* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +* +* SCALE (input) DOUBLE PRECISION array, dimension (N) +* Details of the permutation and scaling factors, as returned +* by DGEBAL. +* +* M (input) INTEGER +* The number of columns of the matrix V. M >= 0. +* +* V (input/output) DOUBLE PRECISION array, dimension (LDV,M) +* On entry, the matrix of right or left eigenvectors to be +* transformed, as returned by DHSEIN or DTREVC. +* On exit, V is overwritten by the transformed eigenvectors. +* +* LDV (input) INTEGER +* The leading dimension of the array V. LDV >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LEFTV, RIGHTV + INTEGER I, II, K + DOUBLE PRECISION S +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Decode and Test the input parameters +* + RIGHTV = LSAME( SIDE, 'R' ) + LEFTV = LSAME( SIDE, 'L' ) +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND. + $ .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN + INFO = -1 + ELSE IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -5 + ELSE IF( M.LT.0 ) THEN + INFO = -7 + ELSE IF( LDV.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEBAK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + IF( M.EQ.0 ) + $ RETURN + IF( LSAME( JOB, 'N' ) ) + $ RETURN +* + IF( ILO.EQ.IHI ) + $ GO TO 30 +* +* Backward balance +* + IF( LSAME( JOB, 'S' ) .OR. LSAME( JOB, 'B' ) ) THEN +* + IF( RIGHTV ) THEN + DO 10 I = ILO, IHI + S = SCALE( I ) + CALL DSCAL( M, S, V( I, 1 ), LDV ) + 10 CONTINUE + END IF +* + IF( LEFTV ) THEN + DO 20 I = ILO, IHI + S = ONE / SCALE( I ) + CALL DSCAL( M, S, V( I, 1 ), LDV ) + 20 CONTINUE + END IF +* + END IF +* +* Backward permutation +* +* For I = ILO-1 step -1 until 1, +* IHI+1 step 1 until N do -- +* + 30 CONTINUE + IF( LSAME( JOB, 'P' ) .OR. LSAME( JOB, 'B' ) ) THEN + IF( RIGHTV ) THEN + DO 40 II = 1, N + I = II + IF( I.GE.ILO .AND. I.LE.IHI ) + $ GO TO 40 + IF( I.LT.ILO ) + $ I = ILO - II + K = SCALE( I ) + IF( K.EQ.I ) + $ GO TO 40 + CALL DSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 40 CONTINUE + END IF +* + IF( LEFTV ) THEN + DO 50 II = 1, N + I = II + IF( I.GE.ILO .AND. I.LE.IHI ) + $ GO TO 50 + IF( I.LT.ILO ) + $ I = ILO - II + K = SCALE( I ) + IF( K.EQ.I ) + $ GO TO 50 + CALL DSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 50 CONTINUE + END IF + END IF +* + RETURN +* +* End of DGEBAK +* + END diff --git a/costa/native/external/lapack/dgebal.f b/costa/native/external/lapack/dgebal.f new file mode 100644 index 000000000..63e0d1dfe --- /dev/null +++ b/costa/native/external/lapack/dgebal.f @@ -0,0 +1,323 @@ + SUBROUTINE DGEBAL( JOB, N, A, LDA, ILO, IHI, SCALE, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOB + INTEGER IHI, ILO, INFO, LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), SCALE( * ) +* .. +* +* Purpose +* ======= +* +* DGEBAL balances a general real matrix A. This involves, first, +* permuting A by a similarity transformation to isolate eigenvalues +* in the first 1 to ILO-1 and last IHI+1 to N elements on the +* diagonal; and second, applying a diagonal similarity transformation +* to rows and columns ILO to IHI to make the rows and columns as +* close in norm as possible. Both steps are optional. +* +* Balancing may reduce the 1-norm of the matrix, and improve the +* accuracy of the computed eigenvalues and/or eigenvectors. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* Specifies the operations to be performed on A: +* = 'N': none: simply set ILO = 1, IHI = N, SCALE(I) = 1.0 +* for i = 1,...,N; +* = 'P': permute only; +* = 'S': scale only; +* = 'B': both permute and scale. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the input matrix A. +* On exit, A is overwritten by the balanced matrix. +* If JOB = 'N', A is not referenced. +* See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* ILO (output) INTEGER +* IHI (output) INTEGER +* ILO and IHI are set to integers such that on exit +* A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N. +* If JOB = 'N' or 'S', ILO = 1 and IHI = N. +* +* SCALE (output) DOUBLE PRECISION array, dimension (N) +* Details of the permutations and scaling factors applied to +* A. If P(j) is the index of the row and column interchanged +* with row and column j and D(j) is the scaling factor +* applied to row and column j, then +* SCALE(j) = P(j) for j = 1,...,ILO-1 +* = D(j) for j = ILO,...,IHI +* = P(j) for j = IHI+1,...,N. +* The order in which the interchanges are made is N to IHI+1, +* then 1 to ILO-1. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The permutations consist of row and column interchanges which put +* the matrix in the form +* +* ( T1 X Y ) +* P A P = ( 0 B Z ) +* ( 0 0 T2 ) +* +* where T1 and T2 are upper triangular matrices whose eigenvalues lie +* along the diagonal. The column indices ILO and IHI mark the starting +* and ending columns of the submatrix B. Balancing consists of applying +* a diagonal similarity transformation inv(D) * B * D to make the +* 1-norms of each row of B and its corresponding column nearly equal. +* The output matrix is +* +* ( T1 X*D Y ) +* ( 0 inv(D)*B*D inv(D)*Z ). +* ( 0 0 T2 ) +* +* Information about the permutations P and the diagonal matrix D is +* returned in the vector SCALE. +* +* This subroutine is based on the EISPACK routine BALANC. +* +* Modified by Tzu-Yi Chen, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION SCLFAC + PARAMETER ( SCLFAC = 0.8D+1 ) + DOUBLE PRECISION FACTOR + PARAMETER ( FACTOR = 0.95D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOCONV + INTEGER I, ICA, IEXC, IRA, J, K, L, M + DOUBLE PRECISION C, CA, F, G, R, RA, S, SFMAX1, SFMAX2, SFMIN1, + $ SFMIN2 +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, IDAMAX, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND. + $ .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEBAL', -INFO ) + RETURN + END IF +* + K = 1 + L = N +* + IF( N.EQ.0 ) + $ GO TO 210 +* + IF( LSAME( JOB, 'N' ) ) THEN + DO 10 I = 1, N + SCALE( I ) = ONE + 10 CONTINUE + GO TO 210 + END IF +* + IF( LSAME( JOB, 'S' ) ) + $ GO TO 120 +* +* Permutation to isolate eigenvalues if possible +* + GO TO 50 +* +* Row and column exchange. +* + 20 CONTINUE + SCALE( M ) = J + IF( J.EQ.M ) + $ GO TO 30 +* + CALL DSWAP( L, A( 1, J ), 1, A( 1, M ), 1 ) + CALL DSWAP( N-K+1, A( J, K ), LDA, A( M, K ), LDA ) +* + 30 CONTINUE + GO TO ( 40, 80 )IEXC +* +* Search for rows isolating an eigenvalue and push them down. +* + 40 CONTINUE + IF( L.EQ.1 ) + $ GO TO 210 + L = L - 1 +* + 50 CONTINUE + DO 70 J = L, 1, -1 +* + DO 60 I = 1, L + IF( I.EQ.J ) + $ GO TO 60 + IF( A( J, I ).NE.ZERO ) + $ GO TO 70 + 60 CONTINUE +* + M = L + IEXC = 1 + GO TO 20 + 70 CONTINUE +* + GO TO 90 +* +* Search for columns isolating an eigenvalue and push them left. +* + 80 CONTINUE + K = K + 1 +* + 90 CONTINUE + DO 110 J = K, L +* + DO 100 I = K, L + IF( I.EQ.J ) + $ GO TO 100 + IF( A( I, J ).NE.ZERO ) + $ GO TO 110 + 100 CONTINUE +* + M = K + IEXC = 2 + GO TO 20 + 110 CONTINUE +* + 120 CONTINUE + DO 130 I = K, L + SCALE( I ) = ONE + 130 CONTINUE +* + IF( LSAME( JOB, 'P' ) ) + $ GO TO 210 +* +* Balance the submatrix in rows K to L. +* +* Iterative loop for norm reduction +* + SFMIN1 = DLAMCH( 'S' ) / DLAMCH( 'P' ) + SFMAX1 = ONE / SFMIN1 + SFMIN2 = SFMIN1*SCLFAC + SFMAX2 = ONE / SFMIN2 + 140 CONTINUE + NOCONV = .FALSE. +* + DO 200 I = K, L + C = ZERO + R = ZERO +* + DO 150 J = K, L + IF( J.EQ.I ) + $ GO TO 150 + C = C + ABS( A( J, I ) ) + R = R + ABS( A( I, J ) ) + 150 CONTINUE + ICA = IDAMAX( L, A( 1, I ), 1 ) + CA = ABS( A( ICA, I ) ) + IRA = IDAMAX( N-K+1, A( I, K ), LDA ) + RA = ABS( A( I, IRA+K-1 ) ) +* +* Guard against zero C or R due to underflow. +* + IF( C.EQ.ZERO .OR. R.EQ.ZERO ) + $ GO TO 200 + G = R / SCLFAC + F = ONE + S = C + R + 160 CONTINUE + IF( C.GE.G .OR. MAX( F, C, CA ).GE.SFMAX2 .OR. + $ MIN( R, G, RA ).LE.SFMIN2 )GO TO 170 + F = F*SCLFAC + C = C*SCLFAC + CA = CA*SCLFAC + R = R / SCLFAC + G = G / SCLFAC + RA = RA / SCLFAC + GO TO 160 +* + 170 CONTINUE + G = C / SCLFAC + 180 CONTINUE + IF( G.LT.R .OR. MAX( R, RA ).GE.SFMAX2 .OR. + $ MIN( F, C, G, CA ).LE.SFMIN2 )GO TO 190 + F = F / SCLFAC + C = C / SCLFAC + G = G / SCLFAC + CA = CA / SCLFAC + R = R*SCLFAC + RA = RA*SCLFAC + GO TO 180 +* +* Now balance. +* + 190 CONTINUE + IF( ( C+R ).GE.FACTOR*S ) + $ GO TO 200 + IF( F.LT.ONE .AND. SCALE( I ).LT.ONE ) THEN + IF( F*SCALE( I ).LE.SFMIN1 ) + $ GO TO 200 + END IF + IF( F.GT.ONE .AND. SCALE( I ).GT.ONE ) THEN + IF( SCALE( I ).GE.SFMAX1 / F ) + $ GO TO 200 + END IF + G = ONE / F + SCALE( I ) = SCALE( I )*F + NOCONV = .TRUE. +* + CALL DSCAL( N-K+1, G, A( I, K ), LDA ) + CALL DSCAL( L, F, A( 1, I ), 1 ) +* + 200 CONTINUE +* + IF( NOCONV ) + $ GO TO 140 +* + 210 CONTINUE + ILO = K + IHI = L +* + RETURN +* +* End of DGEBAL +* + END diff --git a/costa/native/external/lapack/dgebd2.f b/costa/native/external/lapack/dgebd2.f new file mode 100644 index 000000000..d4ed81f8b --- /dev/null +++ b/costa/native/external/lapack/dgebd2.f @@ -0,0 +1,238 @@ + SUBROUTINE DGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAUP( * ), + $ TAUQ( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DGEBD2 reduces a real general m by n matrix A to upper or lower +* bidiagonal form B by an orthogonal transformation: Q' * A * P = B. +* +* If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows in the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns in the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the m by n general matrix to be reduced. +* On exit, +* if m >= n, the diagonal and the first superdiagonal are +* overwritten with the upper bidiagonal matrix B; the +* elements below the diagonal, with the array TAUQ, represent +* the orthogonal matrix Q as a product of elementary +* reflectors, and the elements above the first superdiagonal, +* with the array TAUP, represent the orthogonal matrix P as +* a product of elementary reflectors; +* if m < n, the diagonal and the first subdiagonal are +* overwritten with the lower bidiagonal matrix B; the +* elements below the first subdiagonal, with the array TAUQ, +* represent the orthogonal matrix Q as a product of +* elementary reflectors, and the elements above the diagonal, +* with the array TAUP, represent the orthogonal matrix P as +* a product of elementary reflectors. +* See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* D (output) DOUBLE PRECISION array, dimension (min(M,N)) +* The diagonal elements of the bidiagonal matrix B: +* D(i) = A(i,i). +* +* E (output) DOUBLE PRECISION array, dimension (min(M,N)-1) +* The off-diagonal elements of the bidiagonal matrix B: +* if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; +* if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. +* +* TAUQ (output) DOUBLE PRECISION array dimension (min(M,N)) +* The scalar factors of the elementary reflectors which +* represent the orthogonal matrix Q. See Further Details. +* +* TAUP (output) DOUBLE PRECISION array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors which +* represent the orthogonal matrix P. See Further Details. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (max(M,N)) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The matrices Q and P are represented as products of elementary +* reflectors: +* +* If m >= n, +* +* Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) +* +* Each H(i) and G(i) has the form: +* +* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' +* +* where tauq and taup are real scalars, and v and u are real vectors; +* v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); +* u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); +* tauq is stored in TAUQ(i) and taup in TAUP(i). +* +* If m < n, +* +* Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) +* +* Each H(i) and G(i) has the form: +* +* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' +* +* where tauq and taup are real scalars, and v and u are real vectors; +* v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); +* u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); +* tauq is stored in TAUQ(i) and taup in TAUP(i). +* +* The contents of A on exit are illustrated by the following examples: +* +* m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): +* +* ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) +* ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) +* ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) +* ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) +* ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) +* ( v1 v2 v3 v4 v5 ) +* +* where d and e denote diagonal and off-diagonal elements of B, vi +* denotes an element of the vector defining H(i), and ui an element of +* the vector defining G(i). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I +* .. +* .. External Subroutines .. + EXTERNAL DLARF, DLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.LT.0 ) THEN + CALL XERBLA( 'DGEBD2', -INFO ) + RETURN + END IF +* + IF( M.GE.N ) THEN +* +* Reduce to upper bidiagonal form +* + DO 10 I = 1, N +* +* Generate elementary reflector H(i) to annihilate A(i+1:m,i) +* + CALL DLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1, + $ TAUQ( I ) ) + D( I ) = A( I, I ) + A( I, I ) = ONE +* +* Apply H(i) to A(i:m,i+1:n) from the left +* + CALL DLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAUQ( I ), + $ A( I, I+1 ), LDA, WORK ) + A( I, I ) = D( I ) +* + IF( I.LT.N ) THEN +* +* Generate elementary reflector G(i) to annihilate +* A(i,i+2:n) +* + CALL DLARFG( N-I, A( I, I+1 ), A( I, MIN( I+2, N ) ), + $ LDA, TAUP( I ) ) + E( I ) = A( I, I+1 ) + A( I, I+1 ) = ONE +* +* Apply G(i) to A(i+1:m,i+1:n) from the right +* + CALL DLARF( 'Right', M-I, N-I, A( I, I+1 ), LDA, + $ TAUP( I ), A( I+1, I+1 ), LDA, WORK ) + A( I, I+1 ) = E( I ) + ELSE + TAUP( I ) = ZERO + END IF + 10 CONTINUE + ELSE +* +* Reduce to lower bidiagonal form +* + DO 20 I = 1, M +* +* Generate elementary reflector G(i) to annihilate A(i,i+1:n) +* + CALL DLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA, + $ TAUP( I ) ) + D( I ) = A( I, I ) + A( I, I ) = ONE +* +* Apply G(i) to A(i+1:m,i:n) from the right +* + CALL DLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, TAUP( I ), + $ A( MIN( I+1, M ), I ), LDA, WORK ) + A( I, I ) = D( I ) +* + IF( I.LT.M ) THEN +* +* Generate elementary reflector H(i) to annihilate +* A(i+2:m,i) +* + CALL DLARFG( M-I, A( I+1, I ), A( MIN( I+2, M ), I ), 1, + $ TAUQ( I ) ) + E( I ) = A( I+1, I ) + A( I+1, I ) = ONE +* +* Apply H(i) to A(i+1:m,i+1:n) from the left +* + CALL DLARF( 'Left', M-I, N-I, A( I+1, I ), 1, TAUQ( I ), + $ A( I+1, I+1 ), LDA, WORK ) + A( I+1, I ) = E( I ) + ELSE + TAUQ( I ) = ZERO + END IF + 20 CONTINUE + END IF + RETURN +* +* End of DGEBD2 +* + END diff --git a/costa/native/external/lapack/dgebrd.f b/costa/native/external/lapack/dgebrd.f new file mode 100644 index 000000000..75184b846 --- /dev/null +++ b/costa/native/external/lapack/dgebrd.f @@ -0,0 +1,269 @@ + SUBROUTINE DGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAUP( * ), + $ TAUQ( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DGEBRD reduces a general real M-by-N matrix A to upper or lower +* bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. +* +* If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows in the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns in the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the M-by-N general matrix to be reduced. +* On exit, +* if m >= n, the diagonal and the first superdiagonal are +* overwritten with the upper bidiagonal matrix B; the +* elements below the diagonal, with the array TAUQ, represent +* the orthogonal matrix Q as a product of elementary +* reflectors, and the elements above the first superdiagonal, +* with the array TAUP, represent the orthogonal matrix P as +* a product of elementary reflectors; +* if m < n, the diagonal and the first subdiagonal are +* overwritten with the lower bidiagonal matrix B; the +* elements below the first subdiagonal, with the array TAUQ, +* represent the orthogonal matrix Q as a product of +* elementary reflectors, and the elements above the diagonal, +* with the array TAUP, represent the orthogonal matrix P as +* a product of elementary reflectors. +* See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* D (output) DOUBLE PRECISION array, dimension (min(M,N)) +* The diagonal elements of the bidiagonal matrix B: +* D(i) = A(i,i). +* +* E (output) DOUBLE PRECISION array, dimension (min(M,N)-1) +* The off-diagonal elements of the bidiagonal matrix B: +* if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; +* if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. +* +* TAUQ (output) DOUBLE PRECISION array dimension (min(M,N)) +* The scalar factors of the elementary reflectors which +* represent the orthogonal matrix Q. See Further Details. +* +* TAUP (output) DOUBLE PRECISION array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors which +* represent the orthogonal matrix P. See Further Details. +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of the array WORK. LWORK >= max(1,M,N). +* For optimum performance LWORK >= (M+N)*NB, where NB +* is the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The matrices Q and P are represented as products of elementary +* reflectors: +* +* If m >= n, +* +* Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) +* +* Each H(i) and G(i) has the form: +* +* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' +* +* where tauq and taup are real scalars, and v and u are real vectors; +* v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); +* u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); +* tauq is stored in TAUQ(i) and taup in TAUP(i). +* +* If m < n, +* +* Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) +* +* Each H(i) and G(i) has the form: +* +* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' +* +* where tauq and taup are real scalars, and v and u are real vectors; +* v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); +* u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); +* tauq is stored in TAUQ(i) and taup in TAUP(i). +* +* The contents of A on exit are illustrated by the following examples: +* +* m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): +* +* ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) +* ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) +* ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) +* ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) +* ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) +* ( v1 v2 v3 v4 v5 ) +* +* where d and e denote diagonal and off-diagonal elements of B, vi +* denotes an element of the vector defining H(i), and ui an element of +* the vector defining G(i). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IINFO, J, LDWRKX, LDWRKY, LWKOPT, MINMN, NB, + $ NBMIN, NX + DOUBLE PRECISION WS +* .. +* .. External Subroutines .. + EXTERNAL DGEBD2, DGEMM, DLABRD, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NB = MAX( 1, ILAENV( 1, 'DGEBRD', ' ', M, N, -1, -1 ) ) + LWKOPT = ( M+N )*NB + WORK( 1 ) = DBLE( LWKOPT ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, M, N ) .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF + IF( INFO.LT.0 ) THEN + CALL XERBLA( 'DGEBRD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + MINMN = MIN( M, N ) + IF( MINMN.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + WS = MAX( M, N ) + LDWRKX = M + LDWRKY = N +* + IF( NB.GT.1 .AND. NB.LT.MINMN ) THEN +* +* Set the crossover point NX. +* + NX = MAX( NB, ILAENV( 3, 'DGEBRD', ' ', M, N, -1, -1 ) ) +* +* Determine when to switch from blocked to unblocked code. +* + IF( NX.LT.MINMN ) THEN + WS = ( M+N )*NB + IF( LWORK.LT.WS ) THEN +* +* Not enough work space for the optimal NB, consider using +* a smaller block size. +* + NBMIN = ILAENV( 2, 'DGEBRD', ' ', M, N, -1, -1 ) + IF( LWORK.GE.( M+N )*NBMIN ) THEN + NB = LWORK / ( M+N ) + ELSE + NB = 1 + NX = MINMN + END IF + END IF + END IF + ELSE + NX = MINMN + END IF +* + DO 30 I = 1, MINMN - NX, NB +* +* Reduce rows and columns i:i+nb-1 to bidiagonal form and return +* the matrices X and Y which are needed to update the unreduced +* part of the matrix +* + CALL DLABRD( M-I+1, N-I+1, NB, A( I, I ), LDA, D( I ), E( I ), + $ TAUQ( I ), TAUP( I ), WORK, LDWRKX, + $ WORK( LDWRKX*NB+1 ), LDWRKY ) +* +* Update the trailing submatrix A(i+nb:m,i+nb:n), using an update +* of the form A := A - V*Y' - X*U' +* + CALL DGEMM( 'No transpose', 'Transpose', M-I-NB+1, N-I-NB+1, + $ NB, -ONE, A( I+NB, I ), LDA, + $ WORK( LDWRKX*NB+NB+1 ), LDWRKY, ONE, + $ A( I+NB, I+NB ), LDA ) + CALL DGEMM( 'No transpose', 'No transpose', M-I-NB+1, N-I-NB+1, + $ NB, -ONE, WORK( NB+1 ), LDWRKX, A( I, I+NB ), LDA, + $ ONE, A( I+NB, I+NB ), LDA ) +* +* Copy diagonal and off-diagonal elements of B back into A +* + IF( M.GE.N ) THEN + DO 10 J = I, I + NB - 1 + A( J, J ) = D( J ) + A( J, J+1 ) = E( J ) + 10 CONTINUE + ELSE + DO 20 J = I, I + NB - 1 + A( J, J ) = D( J ) + A( J+1, J ) = E( J ) + 20 CONTINUE + END IF + 30 CONTINUE +* +* Use unblocked code to reduce the remainder of the matrix +* + CALL DGEBD2( M-I+1, N-I+1, A( I, I ), LDA, D( I ), E( I ), + $ TAUQ( I ), TAUP( I ), WORK, IINFO ) + WORK( 1 ) = WS + RETURN +* +* End of DGEBRD +* + END diff --git a/costa/native/external/lapack/dgecon.f b/costa/native/external/lapack/dgecon.f new file mode 100644 index 000000000..f6bd485f4 --- /dev/null +++ b/costa/native/external/lapack/dgecon.f @@ -0,0 +1,181 @@ + SUBROUTINE DGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, IWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER INFO, LDA, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DGECON estimates the reciprocal of the condition number of a general +* real matrix A, in either the 1-norm or the infinity-norm, using +* the LU factorization computed by DGETRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as +* RCOND = 1 / ( norm(A) * norm(inv(A)) ). +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies whether the 1-norm condition number or the +* infinity-norm condition number is required: +* = '1' or 'O': 1-norm; +* = 'I': Infinity-norm. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input) DOUBLE PRECISION array, dimension (LDA,N) +* The factors L and U from the factorization A = P*L*U +* as computed by DGETRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* ANORM (input) DOUBLE PRECISION +* If NORM = '1' or 'O', the 1-norm of the original matrix A. +* If NORM = 'I', the infinity-norm of the original matrix A. +* +* RCOND (output) DOUBLE PRECISION +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(norm(A) * norm(inv(A))). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ONENRM + CHARACTER NORMIN + INTEGER IX, KASE, KASE1 + DOUBLE PRECISION AINVNM, SCALE, SL, SMLNUM, SU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, IDAMAX, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DLACON, DLATRS, DRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGECON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = DLAMCH( 'Safe minimum' ) +* +* Estimate the norm of inv(A). +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 10 CONTINUE + CALL DLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(L). +* + CALL DLATRS( 'Lower', 'No transpose', 'Unit', NORMIN, N, A, + $ LDA, WORK, SL, WORK( 2*N+1 ), INFO ) +* +* Multiply by inv(U). +* + CALL DLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ A, LDA, WORK, SU, WORK( 3*N+1 ), INFO ) + ELSE +* +* Multiply by inv(U'). +* + CALL DLATRS( 'Upper', 'Transpose', 'Non-unit', NORMIN, N, A, + $ LDA, WORK, SU, WORK( 3*N+1 ), INFO ) +* +* Multiply by inv(L'). +* + CALL DLATRS( 'Lower', 'Transpose', 'Unit', NORMIN, N, A, + $ LDA, WORK, SL, WORK( 2*N+1 ), INFO ) + END IF +* +* Divide X by 1/(SL*SU) if doing so will not cause overflow. +* + SCALE = SL*SU + NORMIN = 'Y' + IF( SCALE.NE.ONE ) THEN + IX = IDAMAX( N, WORK, 1 ) + IF( SCALE.LT.ABS( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL DRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE + RETURN +* +* End of DGECON +* + END diff --git a/costa/native/external/lapack/dgeequ.f b/costa/native/external/lapack/dgeequ.f new file mode 100644 index 000000000..a1be3c169 --- /dev/null +++ b/costa/native/external/lapack/dgeequ.f @@ -0,0 +1,226 @@ + SUBROUTINE DGEEQU( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N + DOUBLE PRECISION AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( * ), R( * ) +* .. +* +* Purpose +* ======= +* +* DGEEQU computes row and column scalings intended to equilibrate an +* M-by-N matrix A and reduce its condition number. R returns the row +* scale factors and C the column scale factors, chosen to try to make +* the largest element in each row and column of the matrix B with +* elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. +* +* R(i) and C(j) are restricted to be between SMLNUM = smallest safe +* number and BIGNUM = largest safe number. Use of these scaling +* factors is not guaranteed to reduce the condition number of A but +* works well in practice. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input) DOUBLE PRECISION array, dimension (LDA,N) +* The M-by-N matrix whose equilibration factors are +* to be computed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* R (output) DOUBLE PRECISION array, dimension (M) +* If INFO = 0 or INFO > M, R contains the row scale factors +* for A. +* +* C (output) DOUBLE PRECISION array, dimension (N) +* If INFO = 0, C contains the column scale factors for A. +* +* ROWCND (output) DOUBLE PRECISION +* If INFO = 0 or INFO > M, ROWCND contains the ratio of the +* smallest R(i) to the largest R(i). If ROWCND >= 0.1 and +* AMAX is neither too large nor too small, it is not worth +* scaling by R. +* +* COLCND (output) DOUBLE PRECISION +* If INFO = 0, COLCND contains the ratio of the smallest +* C(i) to the largest C(i). If COLCND >= 0.1, it is not +* worth scaling by C. +* +* AMAX (output) DOUBLE PRECISION +* Absolute value of largest matrix element. If AMAX is very +* close to overflow or very close to underflow, the matrix +* should be scaled. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is +* <= M: the i-th row of A is exactly zero +* > M: the (i-M)-th column of A is exactly zero +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION BIGNUM, RCMAX, RCMIN, SMLNUM +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEEQU', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + ROWCND = ONE + COLCND = ONE + AMAX = ZERO + RETURN + END IF +* +* Get machine constants. +* + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM +* +* Compute row scale factors. +* + DO 10 I = 1, M + R( I ) = ZERO + 10 CONTINUE +* +* Find the maximum element in each row. +* + DO 30 J = 1, N + DO 20 I = 1, M + R( I ) = MAX( R( I ), ABS( A( I, J ) ) ) + 20 CONTINUE + 30 CONTINUE +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 40 I = 1, M + RCMAX = MAX( RCMAX, R( I ) ) + RCMIN = MIN( RCMIN, R( I ) ) + 40 CONTINUE + AMAX = RCMAX +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 50 I = 1, M + IF( R( I ).EQ.ZERO ) THEN + INFO = I + RETURN + END IF + 50 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 60 I = 1, M + R( I ) = ONE / MIN( MAX( R( I ), SMLNUM ), BIGNUM ) + 60 CONTINUE +* +* Compute ROWCND = min(R(I)) / max(R(I)) +* + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* +* Compute column scale factors +* + DO 70 J = 1, N + C( J ) = ZERO + 70 CONTINUE +* +* Find the maximum element in each column, +* assuming the row scaling computed above. +* + DO 90 J = 1, N + DO 80 I = 1, M + C( J ) = MAX( C( J ), ABS( A( I, J ) )*R( I ) ) + 80 CONTINUE + 90 CONTINUE +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 100 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 100 CONTINUE +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 110 J = 1, N + IF( C( J ).EQ.ZERO ) THEN + INFO = M + J + RETURN + END IF + 110 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 120 J = 1, N + C( J ) = ONE / MIN( MAX( C( J ), SMLNUM ), BIGNUM ) + 120 CONTINUE +* +* Compute COLCND = min(C(J)) / max(C(J)) +* + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* + RETURN +* +* End of DGEEQU +* + END diff --git a/costa/native/external/lapack/dgees.f b/costa/native/external/lapack/dgees.f new file mode 100644 index 000000000..18f097041 --- /dev/null +++ b/costa/native/external/lapack/dgees.f @@ -0,0 +1,431 @@ + SUBROUTINE DGEES( JOBVS, SORT, SELECT, N, A, LDA, SDIM, WR, WI, + $ VS, LDVS, WORK, LWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBVS, SORT + INTEGER INFO, LDA, LDVS, LWORK, N, SDIM +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + DOUBLE PRECISION A( LDA, * ), VS( LDVS, * ), WI( * ), WORK( * ), + $ WR( * ) +* .. +* .. Function Arguments .. + LOGICAL SELECT + EXTERNAL SELECT +* .. +* +* Purpose +* ======= +* +* DGEES computes for an N-by-N real nonsymmetric matrix A, the +* eigenvalues, the real Schur form T, and, optionally, the matrix of +* Schur vectors Z. This gives the Schur factorization A = Z*T*(Z**T). +* +* Optionally, it also orders the eigenvalues on the diagonal of the +* real Schur form so that selected eigenvalues are at the top left. +* The leading columns of Z then form an orthonormal basis for the +* invariant subspace corresponding to the selected eigenvalues. +* +* A matrix is in real Schur form if it is upper quasi-triangular with +* 1-by-1 and 2-by-2 blocks. 2-by-2 blocks will be standardized in the +* form +* [ a b ] +* [ c a ] +* +* where b*c < 0. The eigenvalues of such a block are a +- sqrt(bc). +* +* Arguments +* ========= +* +* JOBVS (input) CHARACTER*1 +* = 'N': Schur vectors are not computed; +* = 'V': Schur vectors are computed. +* +* SORT (input) CHARACTER*1 +* Specifies whether or not to order the eigenvalues on the +* diagonal of the Schur form. +* = 'N': Eigenvalues are not ordered; +* = 'S': Eigenvalues are ordered (see SELECT). +* +* SELECT (input) LOGICAL FUNCTION of two DOUBLE PRECISION arguments +* SELECT must be declared EXTERNAL in the calling subroutine. +* If SORT = 'S', SELECT is used to select eigenvalues to sort +* to the top left of the Schur form. +* If SORT = 'N', SELECT is not referenced. +* An eigenvalue WR(j)+sqrt(-1)*WI(j) is selected if +* SELECT(WR(j),WI(j)) is true; i.e., if either one of a complex +* conjugate pair of eigenvalues is selected, then both complex +* eigenvalues are selected. +* Note that a selected complex eigenvalue may no longer +* satisfy SELECT(WR(j),WI(j)) = .TRUE. after ordering, since +* ordering may change the value of complex eigenvalues +* (especially if the eigenvalue is ill-conditioned); in this +* case INFO is set to N+2 (see INFO below). +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the N-by-N matrix A. +* On exit, A has been overwritten by its real Schur form T. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* SDIM (output) INTEGER +* If SORT = 'N', SDIM = 0. +* If SORT = 'S', SDIM = number of eigenvalues (after sorting) +* for which SELECT is true. (Complex conjugate +* pairs for which SELECT is true for either +* eigenvalue count as 2.) +* +* WR (output) DOUBLE PRECISION array, dimension (N) +* WI (output) DOUBLE PRECISION array, dimension (N) +* WR and WI contain the real and imaginary parts, +* respectively, of the computed eigenvalues in the same order +* that they appear on the diagonal of the output Schur form T. +* Complex conjugate pairs of eigenvalues will appear +* consecutively with the eigenvalue having the positive +* imaginary part first. +* +* VS (output) DOUBLE PRECISION array, dimension (LDVS,N) +* If JOBVS = 'V', VS contains the orthogonal matrix Z of Schur +* vectors. +* If JOBVS = 'N', VS is not referenced. +* +* LDVS (input) INTEGER +* The leading dimension of the array VS. LDVS >= 1; if +* JOBVS = 'V', LDVS >= N. +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) contains the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,3*N). +* For good performance, LWORK must generally be larger. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* BWORK (workspace) LOGICAL array, dimension (N) +* Not referenced if SORT = 'N'. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, and i is +* <= N: the QR algorithm failed to compute all the +* eigenvalues; elements 1:ILO-1 and i+1:N of WR and WI +* contain those eigenvalues which have converged; if +* JOBVS = 'V', VS contains the matrix which reduces A +* to its partially converged Schur form. +* = N+1: the eigenvalues could not be reordered because some +* eigenvalues were too close to separate (the problem +* is very ill-conditioned); +* = N+2: after reordering, roundoff changed values of some +* complex eigenvalues so that leading eigenvalues in +* the Schur form no longer satisfy SELECT=.TRUE. This +* could also be caused by underflow due to scaling. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL CURSL, LASTSL, LQUERY, LST2SL, SCALEA, WANTST, + $ WANTVS + INTEGER HSWORK, I, I1, I2, IBAL, ICOND, IERR, IEVAL, + $ IHI, ILO, INXT, IP, ITAU, IWRK, K, MAXB, + $ MAXWRK, MINWRK + DOUBLE PRECISION ANRM, BIGNUM, CSCALE, EPS, S, SEP, SMLNUM +* .. +* .. Local Arrays .. + INTEGER IDUM( 1 ) + DOUBLE PRECISION DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEBAK, DGEBAL, DGEHRD, DHSEQR, DLACPY, + $ DLASCL, DORGHR, DSWAP, DTRSEN, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, DLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + WANTVS = LSAME( JOBVS, 'V' ) + WANTST = LSAME( SORT, 'S' ) + IF( ( .NOT.WANTVS ) .AND. ( .NOT.LSAME( JOBVS, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDVS.LT.1 .OR. ( WANTVS .AND. LDVS.LT.N ) ) THEN + INFO = -11 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV. +* HSWORK refers to the workspace preferred by DHSEQR, as +* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, +* the worst case.) +* + MINWRK = 1 + IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN + MAXWRK = 2*N + N*ILAENV( 1, 'DGEHRD', ' ', N, 1, N, 0 ) + MINWRK = MAX( 1, 3*N ) + IF( .NOT.WANTVS ) THEN + MAXB = MAX( ILAENV( 8, 'DHSEQR', 'SN', N, 1, N, -1 ), 2 ) + K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'DHSEQR', 'SN', N, 1, + $ N, -1 ) ) ) + HSWORK = MAX( K*( K+2 ), 2*N ) + MAXWRK = MAX( MAXWRK, N+HSWORK, 1 ) + ELSE + MAXWRK = MAX( MAXWRK, 2*N+( N-1 )* + $ ILAENV( 1, 'DORGHR', ' ', N, 1, N, -1 ) ) + MAXB = MAX( ILAENV( 8, 'DHSEQR', 'EN', N, 1, N, -1 ), 2 ) + K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'DHSEQR', 'EN', N, 1, + $ N, -1 ) ) ) + HSWORK = MAX( K*( K+2 ), 2*N ) + MAXWRK = MAX( MAXWRK, N+HSWORK, 1 ) + END IF + WORK( 1 ) = MAXWRK + END IF + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEES ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SDIM = 0 + RETURN + END IF +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = DLANGE( 'M', N, N, A, LDA, DUM ) + SCALEA = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + SCALEA = .TRUE. + CSCALE = SMLNUM + ELSE IF( ANRM.GT.BIGNUM ) THEN + SCALEA = .TRUE. + CSCALE = BIGNUM + END IF + IF( SCALEA ) + $ CALL DLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR ) +* +* Permute the matrix to make it more nearly triangular +* (Workspace: need N) +* + IBAL = 1 + CALL DGEBAL( 'P', N, A, LDA, ILO, IHI, WORK( IBAL ), IERR ) +* +* Reduce to upper Hessenberg form +* (Workspace: need 3*N, prefer 2*N+N*NB) +* + ITAU = N + IBAL + IWRK = N + ITAU + CALL DGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* + IF( WANTVS ) THEN +* +* Copy Householder vectors to VS +* + CALL DLACPY( 'L', N, N, A, LDA, VS, LDVS ) +* +* Generate orthogonal matrix in VS +* (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* + CALL DORGHR( N, ILO, IHI, VS, LDVS, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) + END IF +* + SDIM = 0 +* +* Perform QR iteration, accumulating Schur vectors in VS if desired +* (Workspace: need N+1, prefer N+HSWORK (see comments) ) +* + IWRK = ITAU + CALL DHSEQR( 'S', JOBVS, N, ILO, IHI, A, LDA, WR, WI, VS, LDVS, + $ WORK( IWRK ), LWORK-IWRK+1, IEVAL ) + IF( IEVAL.GT.0 ) + $ INFO = IEVAL +* +* Sort eigenvalues if desired +* + IF( WANTST .AND. INFO.EQ.0 ) THEN + IF( SCALEA ) THEN + CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, WR, N, IERR ) + CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, WI, N, IERR ) + END IF + DO 10 I = 1, N + BWORK( I ) = SELECT( WR( I ), WI( I ) ) + 10 CONTINUE +* +* Reorder eigenvalues and transform Schur vectors +* (Workspace: none needed) +* + CALL DTRSEN( 'N', JOBVS, BWORK, N, A, LDA, VS, LDVS, WR, WI, + $ SDIM, S, SEP, WORK( IWRK ), LWORK-IWRK+1, IDUM, 1, + $ ICOND ) + IF( ICOND.GT.0 ) + $ INFO = N + ICOND + END IF +* + IF( WANTVS ) THEN +* +* Undo balancing +* (Workspace: need N) +* + CALL DGEBAK( 'P', 'R', N, ILO, IHI, WORK( IBAL ), N, VS, LDVS, + $ IERR ) + END IF +* + IF( SCALEA ) THEN +* +* Undo scaling for the Schur form of A +* + CALL DLASCL( 'H', 0, 0, CSCALE, ANRM, N, N, A, LDA, IERR ) + CALL DCOPY( N, A, LDA+1, WR, 1 ) + IF( CSCALE.EQ.SMLNUM ) THEN +* +* If scaling back towards underflow, adjust WI if an +* offdiagonal element of a 2-by-2 block in the Schur form +* underflows. +* + IF( IEVAL.GT.0 ) THEN + I1 = IEVAL + 1 + I2 = IHI - 1 + CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, WI, + $ MAX( ILO-1, 1 ), IERR ) + ELSE IF( WANTST ) THEN + I1 = 1 + I2 = N - 1 + ELSE + I1 = ILO + I2 = IHI - 1 + END IF + INXT = I1 - 1 + DO 20 I = I1, I2 + IF( I.LT.INXT ) + $ GO TO 20 + IF( WI( I ).EQ.ZERO ) THEN + INXT = I + 1 + ELSE + IF( A( I+1, I ).EQ.ZERO ) THEN + WI( I ) = ZERO + WI( I+1 ) = ZERO + ELSE IF( A( I+1, I ).NE.ZERO .AND. A( I, I+1 ).EQ. + $ ZERO ) THEN + WI( I ) = ZERO + WI( I+1 ) = ZERO + IF( I.GT.1 ) + $ CALL DSWAP( I-1, A( 1, I ), 1, A( 1, I+1 ), 1 ) + IF( N.GT.I+1 ) + $ CALL DSWAP( N-I-1, A( I, I+2 ), LDA, + $ A( I+1, I+2 ), LDA ) + CALL DSWAP( N, VS( 1, I ), 1, VS( 1, I+1 ), 1 ) + A( I, I+1 ) = A( I+1, I ) + A( I+1, I ) = ZERO + END IF + INXT = I + 2 + END IF + 20 CONTINUE + END IF +* +* Undo scaling for the imaginary part of the eigenvalues +* + CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, N-IEVAL, 1, + $ WI( IEVAL+1 ), MAX( N-IEVAL, 1 ), IERR ) + END IF +* + IF( WANTST .AND. INFO.EQ.0 ) THEN +* +* Check if reordering successful +* + LASTSL = .TRUE. + LST2SL = .TRUE. + SDIM = 0 + IP = 0 + DO 30 I = 1, N + CURSL = SELECT( WR( I ), WI( I ) ) + IF( WI( I ).EQ.ZERO ) THEN + IF( CURSL ) + $ SDIM = SDIM + 1 + IP = 0 + IF( CURSL .AND. .NOT.LASTSL ) + $ INFO = N + 2 + ELSE + IF( IP.EQ.1 ) THEN +* +* Last eigenvalue of conjugate pair +* + CURSL = CURSL .OR. LASTSL + LASTSL = CURSL + IF( CURSL ) + $ SDIM = SDIM + 2 + IP = -1 + IF( CURSL .AND. .NOT.LST2SL ) + $ INFO = N + 2 + ELSE +* +* First eigenvalue of conjugate pair +* + IP = 1 + END IF + END IF + LST2SL = LASTSL + LASTSL = CURSL + 30 CONTINUE + END IF +* + WORK( 1 ) = MAXWRK + RETURN +* +* End of DGEES +* + END diff --git a/costa/native/external/lapack/dgeesx.f b/costa/native/external/lapack/dgeesx.f new file mode 100644 index 000000000..bda013487 --- /dev/null +++ b/costa/native/external/lapack/dgeesx.f @@ -0,0 +1,502 @@ + SUBROUTINE DGEESX( JOBVS, SORT, SELECT, SENSE, N, A, LDA, SDIM, + $ WR, WI, VS, LDVS, RCONDE, RCONDV, WORK, LWORK, + $ IWORK, LIWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBVS, SENSE, SORT + INTEGER INFO, LDA, LDVS, LIWORK, LWORK, N, SDIM + DOUBLE PRECISION RCONDE, RCONDV +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), VS( LDVS, * ), WI( * ), WORK( * ), + $ WR( * ) +* .. +* .. Function Arguments .. + LOGICAL SELECT + EXTERNAL SELECT +* .. +* +* Purpose +* ======= +* +* DGEESX computes for an N-by-N real nonsymmetric matrix A, the +* eigenvalues, the real Schur form T, and, optionally, the matrix of +* Schur vectors Z. This gives the Schur factorization A = Z*T*(Z**T). +* +* Optionally, it also orders the eigenvalues on the diagonal of the +* real Schur form so that selected eigenvalues are at the top left; +* computes a reciprocal condition number for the average of the +* selected eigenvalues (RCONDE); and computes a reciprocal condition +* number for the right invariant subspace corresponding to the +* selected eigenvalues (RCONDV). The leading columns of Z form an +* orthonormal basis for this invariant subspace. +* +* For further explanation of the reciprocal condition numbers RCONDE +* and RCONDV, see Section 4.10 of the LAPACK Users' Guide (where +* these quantities are called s and sep respectively). +* +* A real matrix is in real Schur form if it is upper quasi-triangular +* with 1-by-1 and 2-by-2 blocks. 2-by-2 blocks will be standardized in +* the form +* [ a b ] +* [ c a ] +* +* where b*c < 0. The eigenvalues of such a block are a +- sqrt(bc). +* +* Arguments +* ========= +* +* JOBVS (input) CHARACTER*1 +* = 'N': Schur vectors are not computed; +* = 'V': Schur vectors are computed. +* +* SORT (input) CHARACTER*1 +* Specifies whether or not to order the eigenvalues on the +* diagonal of the Schur form. +* = 'N': Eigenvalues are not ordered; +* = 'S': Eigenvalues are ordered (see SELECT). +* +* SELECT (input) LOGICAL FUNCTION of two DOUBLE PRECISION arguments +* SELECT must be declared EXTERNAL in the calling subroutine. +* If SORT = 'S', SELECT is used to select eigenvalues to sort +* to the top left of the Schur form. +* If SORT = 'N', SELECT is not referenced. +* An eigenvalue WR(j)+sqrt(-1)*WI(j) is selected if +* SELECT(WR(j),WI(j)) is true; i.e., if either one of a +* complex conjugate pair of eigenvalues is selected, then both +* are. Note that a selected complex eigenvalue may no longer +* satisfy SELECT(WR(j),WI(j)) = .TRUE. after ordering, since +* ordering may change the value of complex eigenvalues +* (especially if the eigenvalue is ill-conditioned); in this +* case INFO may be set to N+3 (see INFO below). +* +* SENSE (input) CHARACTER*1 +* Determines which reciprocal condition numbers are computed. +* = 'N': None are computed; +* = 'E': Computed for average of selected eigenvalues only; +* = 'V': Computed for selected right invariant subspace only; +* = 'B': Computed for both. +* If SENSE = 'E', 'V' or 'B', SORT must equal 'S'. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) +* On entry, the N-by-N matrix A. +* On exit, A is overwritten by its real Schur form T. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* SDIM (output) INTEGER +* If SORT = 'N', SDIM = 0. +* If SORT = 'S', SDIM = number of eigenvalues (after sorting) +* for which SELECT is true. (Complex conjugate +* pairs for which SELECT is true for either +* eigenvalue count as 2.) +* +* WR (output) DOUBLE PRECISION array, dimension (N) +* WI (output) DOUBLE PRECISION array, dimension (N) +* WR and WI contain the real and imaginary parts, respectively, +* of the computed eigenvalues, in the same order that they +* appear on the diagonal of the output Schur form T. Complex +* conjugate pairs of eigenvalues appear consecutively with the +* eigenvalue having the positive imaginary part first. +* +* VS (output) DOUBLE PRECISION array, dimension (LDVS,N) +* If JOBVS = 'V', VS contains the orthogonal matrix Z of Schur +* vectors. +* If JOBVS = 'N', VS is not referenced. +* +* LDVS (input) INTEGER +* The leading dimension of the array VS. LDVS >= 1, and if +* JOBVS = 'V', LDVS >= N. +* +* RCONDE (output) DOUBLE PRECISION +* If SENSE = 'E' or 'B', RCONDE contains the reciprocal +* condition number for the average of the selected eigenvalues. +* Not referenced if SENSE = 'N' or 'V'. +* +* RCONDV (output) DOUBLE PRECISION +* If SENSE = 'V' or 'B', RCONDV contains the reciprocal +* condition number for the selected right invariant subspace. +* Not referenced if SENSE = 'N' or 'E'. +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,3*N). +* Also, if SENSE = 'E' or 'V' or 'B', +* LWORK >= N+2*SDIM*(N-SDIM), where SDIM is the number of +* selected eigenvalues computed by this routine. Note that +* N+2*SDIM*(N-SDIM) <= N+N*N/2. +* For good performance, LWORK must generally be larger. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* Not referenced if SENSE = 'N' or 'E'. +* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array IWORK. +* LIWORK >= 1; if SENSE = 'V' or 'B', LIWORK >= SDIM*(N-SDIM). +* +* BWORK (workspace) LOGICAL array, dimension (N) +* Not referenced if SORT = 'N'. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, and i is +* <= N: the QR algorithm failed to compute all the +* eigenvalues; elements 1:ILO-1 and i+1:N of WR and WI +* contain those eigenvalues which have converged; if +* JOBVS = 'V', VS contains the transformation which +* reduces A to its partially converged Schur form. +* = N+1: the eigenvalues could not be reordered because some +* eigenvalues were too close to separate (the problem +* is very ill-conditioned); +* = N+2: after reordering, roundoff changed values of some +* complex eigenvalues so that leading eigenvalues in +* the Schur form no longer satisfy SELECT=.TRUE. This +* could also be caused by underflow due to scaling. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL CURSL, LASTSL, LST2SL, SCALEA, WANTSB, WANTSE, + $ WANTSN, WANTST, WANTSV, WANTVS + INTEGER HSWORK, I, I1, I2, IBAL, ICOND, IERR, IEVAL, + $ IHI, ILO, INXT, IP, ITAU, IWRK, K, MAXB, + $ MAXWRK, MINWRK + DOUBLE PRECISION ANRM, BIGNUM, CSCALE, EPS, SMLNUM +* .. +* .. Local Arrays .. + DOUBLE PRECISION DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEBAK, DGEBAL, DGEHRD, DHSEQR, DLACPY, + $ DLASCL, DORGHR, DSWAP, DTRSEN, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, DLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + WANTVS = LSAME( JOBVS, 'V' ) + WANTST = LSAME( SORT, 'S' ) + WANTSN = LSAME( SENSE, 'N' ) + WANTSE = LSAME( SENSE, 'E' ) + WANTSV = LSAME( SENSE, 'V' ) + WANTSB = LSAME( SENSE, 'B' ) + IF( ( .NOT.WANTVS ) .AND. ( .NOT.LSAME( JOBVS, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( WANTSN .OR. WANTSE .OR. WANTSV .OR. WANTSB ) .OR. + $ ( .NOT.WANTST .AND. .NOT.WANTSN ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVS.LT.1 .OR. ( WANTVS .AND. LDVS.LT.N ) ) THEN + INFO = -12 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "RWorkspace:" describe the +* minimal amount of real workspace needed at that point in the +* code, as well as the preferred amount for good performance. +* IWorkspace refers to integer workspace. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV. +* HSWORK refers to the workspace preferred by DHSEQR, as +* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, +* the worst case. +* If SENSE = 'E', 'V' or 'B', then the amount of workspace needed +* depends on SDIM, which is computed by the routine DTRSEN later +* in the code.) +* + MINWRK = 1 + IF( INFO.EQ.0 .AND. LWORK.GE.1 ) THEN + MAXWRK = 2*N + N*ILAENV( 1, 'DGEHRD', ' ', N, 1, N, 0 ) + MINWRK = MAX( 1, 3*N ) + IF( .NOT.WANTVS ) THEN + MAXB = MAX( ILAENV( 8, 'DHSEQR', 'SN', N, 1, N, -1 ), 2 ) + K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'DHSEQR', 'SN', N, 1, + $ N, -1 ) ) ) + HSWORK = MAX( K*( K+2 ), 2*N ) + MAXWRK = MAX( MAXWRK, N+HSWORK, 1 ) + ELSE + MAXWRK = MAX( MAXWRK, 2*N+( N-1 )* + $ ILAENV( 1, 'DORGHR', ' ', N, 1, N, -1 ) ) + MAXB = MAX( ILAENV( 8, 'DHSEQR', 'SV', N, 1, N, -1 ), 2 ) + K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'DHSEQR', 'SV', N, 1, + $ N, -1 ) ) ) + HSWORK = MAX( K*( K+2 ), 2*N ) + MAXWRK = MAX( MAXWRK, N+HSWORK, 1 ) + END IF + WORK( 1 ) = MAXWRK + END IF + IF( LWORK.LT.MINWRK ) THEN + INFO = -16 + END IF + IF( LIWORK.LT.1 ) THEN + INFO = -18 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEESX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SDIM = 0 + RETURN + END IF +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = DLANGE( 'M', N, N, A, LDA, DUM ) + SCALEA = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + SCALEA = .TRUE. + CSCALE = SMLNUM + ELSE IF( ANRM.GT.BIGNUM ) THEN + SCALEA = .TRUE. + CSCALE = BIGNUM + END IF + IF( SCALEA ) + $ CALL DLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR ) +* +* Permute the matrix to make it more nearly triangular +* (RWorkspace: need N) +* + IBAL = 1 + CALL DGEBAL( 'P', N, A, LDA, ILO, IHI, WORK( IBAL ), IERR ) +* +* Reduce to upper Hessenberg form +* (RWorkspace: need 3*N, prefer 2*N+N*NB) +* + ITAU = N + IBAL + IWRK = N + ITAU + CALL DGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* + IF( WANTVS ) THEN +* +* Copy Householder vectors to VS +* + CALL DLACPY( 'L', N, N, A, LDA, VS, LDVS ) +* +* Generate orthogonal matrix in VS +* (RWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* + CALL DORGHR( N, ILO, IHI, VS, LDVS, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) + END IF +* + SDIM = 0 +* +* Perform QR iteration, accumulating Schur vectors in VS if desired +* (RWorkspace: need N+1, prefer N+HSWORK (see comments) ) +* + IWRK = ITAU + CALL DHSEQR( 'S', JOBVS, N, ILO, IHI, A, LDA, WR, WI, VS, LDVS, + $ WORK( IWRK ), LWORK-IWRK+1, IEVAL ) + IF( IEVAL.GT.0 ) + $ INFO = IEVAL +* +* Sort eigenvalues if desired +* + IF( WANTST .AND. INFO.EQ.0 ) THEN + IF( SCALEA ) THEN + CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, WR, N, IERR ) + CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, WI, N, IERR ) + END IF + DO 10 I = 1, N + BWORK( I ) = SELECT( WR( I ), WI( I ) ) + 10 CONTINUE +* +* Reorder eigenvalues, transform Schur vectors, and compute +* reciprocal condition numbers +* (RWorkspace: if SENSE is not 'N', need N+2*SDIM*(N-SDIM) +* otherwise, need N ) +* (IWorkspace: if SENSE is 'V' or 'B', need SDIM*(N-SDIM) +* otherwise, need 0 ) +* + CALL DTRSEN( SENSE, JOBVS, BWORK, N, A, LDA, VS, LDVS, WR, WI, + $ SDIM, RCONDE, RCONDV, WORK( IWRK ), LWORK-IWRK+1, + $ IWORK, LIWORK, ICOND ) + IF( .NOT.WANTSN ) + $ MAXWRK = MAX( MAXWRK, N+2*SDIM*( N-SDIM ) ) + IF( ICOND.EQ.-15 ) THEN +* +* Not enough real workspace +* + INFO = -16 + ELSE IF( ICOND.EQ.-17 ) THEN +* +* Not enough integer workspace +* + INFO = -18 + ELSE IF( ICOND.GT.0 ) THEN +* +* DTRSEN failed to reorder or to restore standard Schur form +* + INFO = ICOND + N + END IF + END IF +* + IF( WANTVS ) THEN +* +* Undo balancing +* (RWorkspace: need N) +* + CALL DGEBAK( 'P', 'R', N, ILO, IHI, WORK( IBAL ), N, VS, LDVS, + $ IERR ) + END IF +* + IF( SCALEA ) THEN +* +* Undo scaling for the Schur form of A +* + CALL DLASCL( 'H', 0, 0, CSCALE, ANRM, N, N, A, LDA, IERR ) + CALL DCOPY( N, A, LDA+1, WR, 1 ) + IF( ( WANTSV .OR. WANTSB ) .AND. INFO.EQ.0 ) THEN + DUM( 1 ) = RCONDV + CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, 1, 1, DUM, 1, IERR ) + RCONDV = DUM( 1 ) + END IF + IF( CSCALE.EQ.SMLNUM ) THEN +* +* If scaling back towards underflow, adjust WI if an +* offdiagonal element of a 2-by-2 block in the Schur form +* underflows. +* + IF( IEVAL.GT.0 ) THEN + I1 = IEVAL + 1 + I2 = IHI - 1 + CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, WI, N, + $ IERR ) + ELSE IF( WANTST ) THEN + I1 = 1 + I2 = N - 1 + ELSE + I1 = ILO + I2 = IHI - 1 + END IF + INXT = I1 - 1 + DO 20 I = I1, I2 + IF( I.LT.INXT ) + $ GO TO 20 + IF( WI( I ).EQ.ZERO ) THEN + INXT = I + 1 + ELSE + IF( A( I+1, I ).EQ.ZERO ) THEN + WI( I ) = ZERO + WI( I+1 ) = ZERO + ELSE IF( A( I+1, I ).NE.ZERO .AND. A( I, I+1 ).EQ. + $ ZERO ) THEN + WI( I ) = ZERO + WI( I+1 ) = ZERO + IF( I.GT.1 ) + $ CALL DSWAP( I-1, A( 1, I ), 1, A( 1, I+1 ), 1 ) + IF( N.GT.I+1 ) + $ CALL DSWAP( N-I-1, A( I, I+2 ), LDA, + $ A( I+1, I+2 ), LDA ) + CALL DSWAP( N, VS( 1, I ), 1, VS( 1, I+1 ), 1 ) + A( I, I+1 ) = A( I+1, I ) + A( I+1, I ) = ZERO + END IF + INXT = I + 2 + END IF + 20 CONTINUE + END IF + CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, N-IEVAL, 1, + $ WI( IEVAL+1 ), MAX( N-IEVAL, 1 ), IERR ) + END IF +* + IF( WANTST .AND. INFO.EQ.0 ) THEN +* +* Check if reordering successful +* + LASTSL = .TRUE. + LST2SL = .TRUE. + SDIM = 0 + IP = 0 + DO 30 I = 1, N + CURSL = SELECT( WR( I ), WI( I ) ) + IF( WI( I ).EQ.ZERO ) THEN + IF( CURSL ) + $ SDIM = SDIM + 1 + IP = 0 + IF( CURSL .AND. .NOT.LASTSL ) + $ INFO = N + 2 + ELSE + IF( IP.EQ.1 ) THEN +* +* Last eigenvalue of conjugate pair +* + CURSL = CURSL .OR. LASTSL + LASTSL = CURSL + IF( CURSL ) + $ SDIM = SDIM + 2 + IP = -1 + IF( CURSL .AND. .NOT.LST2SL ) + $ INFO = N + 2 + ELSE +* +* First eigenvalue of conjugate pair +* + IP = 1 + END IF + END IF + LST2SL = LASTSL + LASTSL = CURSL + 30 CONTINUE + END IF +* + WORK( 1 ) = MAXWRK + IF( WANTSV .OR. WANTSB ) THEN + IWORK( 1 ) = SDIM*( N-SDIM ) + ELSE + IWORK( 1 ) = 1 + END IF +* + RETURN +* +* End of DGEESX +* + END diff --git a/costa/native/external/lapack/dgeev.f b/costa/native/external/lapack/dgeev.f new file mode 100644 index 000000000..cdd3d2d6e --- /dev/null +++ b/costa/native/external/lapack/dgeev.f @@ -0,0 +1,409 @@ + SUBROUTINE DGEEV( JOBVL, JOBVR, N, A, LDA, WR, WI, VL, LDVL, VR, + $ LDVR, WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* December 8, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBVL, JOBVR + INTEGER INFO, LDA, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), VL( LDVL, * ), VR( LDVR, * ), + $ WI( * ), WORK( * ), WR( * ) +* .. +* +* Purpose +* ======= +* +* DGEEV computes for an N-by-N real nonsymmetric matrix A, the +* eigenvalues and, optionally, the left and/or right eigenvectors. +* +* The right eigenvector v(j) of A satisfies +* A * v(j) = lambda(j) * v(j) +* where lambda(j) is its eigenvalue. +* The left eigenvector u(j) of A satisfies +* u(j)**H * A = lambda(j) * u(j)**H +* where u(j)**H denotes the conjugate transpose of u(j). +* +* The computed eigenvectors are normalized to have Euclidean norm +* equal to 1 and largest component real. +* +* Arguments +* ========= +* +* JOBVL (input) CHARACTER*1 +* = 'N': left eigenvectors of A are not computed; +* = 'V': left eigenvectors of A are computed. +* +* JOBVR (input) CHARACTER*1 +* = 'N': right eigenvectors of A are not computed; +* = 'V': right eigenvectors of A are computed. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the N-by-N matrix A. +* On exit, A has been overwritten. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* WR (output) DOUBLE PRECISION array, dimension (N) +* WI (output) DOUBLE PRECISION array, dimension (N) +* WR and WI contain the real and imaginary parts, +* respectively, of the computed eigenvalues. Complex +* conjugate pairs of eigenvalues appear consecutively +* with the eigenvalue having the positive imaginary part +* first. +* +* VL (output) DOUBLE PRECISION array, dimension (LDVL,N) +* If JOBVL = 'V', the left eigenvectors u(j) are stored one +* after another in the columns of VL, in the same order +* as their eigenvalues. +* If JOBVL = 'N', VL is not referenced. +* If the j-th eigenvalue is real, then u(j) = VL(:,j), +* the j-th column of VL. +* If the j-th and (j+1)-st eigenvalues form a complex +* conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and +* u(j+1) = VL(:,j) - i*VL(:,j+1). +* +* LDVL (input) INTEGER +* The leading dimension of the array VL. LDVL >= 1; if +* JOBVL = 'V', LDVL >= N. +* +* VR (output) DOUBLE PRECISION array, dimension (LDVR,N) +* If JOBVR = 'V', the right eigenvectors v(j) are stored one +* after another in the columns of VR, in the same order +* as their eigenvalues. +* If JOBVR = 'N', VR is not referenced. +* If the j-th eigenvalue is real, then v(j) = VR(:,j), +* the j-th column of VR. +* If the j-th and (j+1)-st eigenvalues form a complex +* conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and +* v(j+1) = VR(:,j) - i*VR(:,j+1). +* +* LDVR (input) INTEGER +* The leading dimension of the array VR. LDVR >= 1; if +* JOBVR = 'V', LDVR >= N. +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,3*N), and +* if JOBVL = 'V' or JOBVR = 'V', LWORK >= 4*N. For good +* performance, LWORK must generally be larger. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, the QR algorithm failed to compute all the +* eigenvalues, and no eigenvectors have been computed; +* elements i+1:N of WR and WI contain eigenvalues which +* have converged. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, SCALEA, WANTVL, WANTVR + CHARACTER SIDE + INTEGER HSWORK, I, IBAL, IERR, IHI, ILO, ITAU, IWRK, K, + $ MAXB, MAXWRK, MINWRK, NOUT + DOUBLE PRECISION ANRM, BIGNUM, CS, CSCALE, EPS, R, SCL, SMLNUM, + $ SN +* .. +* .. Local Arrays .. + LOGICAL SELECT( 1 ) + DOUBLE PRECISION DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DGEBAK, DGEBAL, DGEHRD, DHSEQR, DLACPY, DLARTG, + $ DLASCL, DORGHR, DROT, DSCAL, DTREVC, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX, ILAENV + DOUBLE PRECISION DLAMCH, DLANGE, DLAPY2, DNRM2 + EXTERNAL LSAME, IDAMAX, ILAENV, DLAMCH, DLANGE, DLAPY2, + $ DNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + WANTVL = LSAME( JOBVL, 'V' ) + WANTVR = LSAME( JOBVR, 'V' ) + IF( ( .NOT.WANTVL ) .AND. ( .NOT.LSAME( JOBVL, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( ( .NOT.WANTVR ) .AND. ( .NOT.LSAME( JOBVR, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDVL.LT.1 .OR. ( WANTVL .AND. LDVL.LT.N ) ) THEN + INFO = -9 + ELSE IF( LDVR.LT.1 .OR. ( WANTVR .AND. LDVR.LT.N ) ) THEN + INFO = -11 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV. +* HSWORK refers to the workspace preferred by DHSEQR, as +* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, +* the worst case.) +* + MINWRK = 1 + IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN + MAXWRK = 2*N + N*ILAENV( 1, 'DGEHRD', ' ', N, 1, N, 0 ) + IF( ( .NOT.WANTVL ) .AND. ( .NOT.WANTVR ) ) THEN + MINWRK = MAX( 1, 3*N ) + MAXB = MAX( ILAENV( 8, 'DHSEQR', 'EN', N, 1, N, -1 ), 2 ) + K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'DHSEQR', 'EN', N, 1, + $ N, -1 ) ) ) + HSWORK = MAX( K*( K+2 ), 2*N ) + MAXWRK = MAX( MAXWRK, N+1, N+HSWORK ) + ELSE + MINWRK = MAX( 1, 4*N ) + MAXWRK = MAX( MAXWRK, 2*N+( N-1 )* + $ ILAENV( 1, 'DORGHR', ' ', N, 1, N, -1 ) ) + MAXB = MAX( ILAENV( 8, 'DHSEQR', 'SV', N, 1, N, -1 ), 2 ) + K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'DHSEQR', 'SV', N, 1, + $ N, -1 ) ) ) + HSWORK = MAX( K*( K+2 ), 2*N ) + MAXWRK = MAX( MAXWRK, N+1, N+HSWORK ) + MAXWRK = MAX( MAXWRK, 4*N ) + END IF + WORK( 1 ) = MAXWRK + END IF + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEEV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = DLANGE( 'M', N, N, A, LDA, DUM ) + SCALEA = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + SCALEA = .TRUE. + CSCALE = SMLNUM + ELSE IF( ANRM.GT.BIGNUM ) THEN + SCALEA = .TRUE. + CSCALE = BIGNUM + END IF + IF( SCALEA ) + $ CALL DLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR ) +* +* Balance the matrix +* (Workspace: need N) +* + IBAL = 1 + CALL DGEBAL( 'B', N, A, LDA, ILO, IHI, WORK( IBAL ), IERR ) +* +* Reduce to upper Hessenberg form +* (Workspace: need 3*N, prefer 2*N+N*NB) +* + ITAU = IBAL + N + IWRK = ITAU + N + CALL DGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* + IF( WANTVL ) THEN +* +* Want left eigenvectors +* Copy Householder vectors to VL +* + SIDE = 'L' + CALL DLACPY( 'L', N, N, A, LDA, VL, LDVL ) +* +* Generate orthogonal matrix in VL +* (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* + CALL DORGHR( N, ILO, IHI, VL, LDVL, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* +* Perform QR iteration, accumulating Schur vectors in VL +* (Workspace: need N+1, prefer N+HSWORK (see comments) ) +* + IWRK = ITAU + CALL DHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, WR, WI, VL, LDVL, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) +* + IF( WANTVR ) THEN +* +* Want left and right eigenvectors +* Copy Schur vectors to VR +* + SIDE = 'B' + CALL DLACPY( 'F', N, N, VL, LDVL, VR, LDVR ) + END IF +* + ELSE IF( WANTVR ) THEN +* +* Want right eigenvectors +* Copy Householder vectors to VR +* + SIDE = 'R' + CALL DLACPY( 'L', N, N, A, LDA, VR, LDVR ) +* +* Generate orthogonal matrix in VR +* (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* + CALL DORGHR( N, ILO, IHI, VR, LDVR, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* +* Perform QR iteration, accumulating Schur vectors in VR +* (Workspace: need N+1, prefer N+HSWORK (see comments) ) +* + IWRK = ITAU + CALL DHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, WR, WI, VR, LDVR, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) +* + ELSE +* +* Compute eigenvalues only +* (Workspace: need N+1, prefer N+HSWORK (see comments) ) +* + IWRK = ITAU + CALL DHSEQR( 'E', 'N', N, ILO, IHI, A, LDA, WR, WI, VR, LDVR, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) + END IF +* +* If INFO > 0 from DHSEQR, then quit +* + IF( INFO.GT.0 ) + $ GO TO 50 +* + IF( WANTVL .OR. WANTVR ) THEN +* +* Compute left and/or right eigenvectors +* (Workspace: need 4*N) +* + CALL DTREVC( SIDE, 'B', SELECT, N, A, LDA, VL, LDVL, VR, LDVR, + $ N, NOUT, WORK( IWRK ), IERR ) + END IF +* + IF( WANTVL ) THEN +* +* Undo balancing of left eigenvectors +* (Workspace: need N) +* + CALL DGEBAK( 'B', 'L', N, ILO, IHI, WORK( IBAL ), N, VL, LDVL, + $ IERR ) +* +* Normalize left eigenvectors and make largest component real +* + DO 20 I = 1, N + IF( WI( I ).EQ.ZERO ) THEN + SCL = ONE / DNRM2( N, VL( 1, I ), 1 ) + CALL DSCAL( N, SCL, VL( 1, I ), 1 ) + ELSE IF( WI( I ).GT.ZERO ) THEN + SCL = ONE / DLAPY2( DNRM2( N, VL( 1, I ), 1 ), + $ DNRM2( N, VL( 1, I+1 ), 1 ) ) + CALL DSCAL( N, SCL, VL( 1, I ), 1 ) + CALL DSCAL( N, SCL, VL( 1, I+1 ), 1 ) + DO 10 K = 1, N + WORK( IWRK+K-1 ) = VL( K, I )**2 + VL( K, I+1 )**2 + 10 CONTINUE + K = IDAMAX( N, WORK( IWRK ), 1 ) + CALL DLARTG( VL( K, I ), VL( K, I+1 ), CS, SN, R ) + CALL DROT( N, VL( 1, I ), 1, VL( 1, I+1 ), 1, CS, SN ) + VL( K, I+1 ) = ZERO + END IF + 20 CONTINUE + END IF +* + IF( WANTVR ) THEN +* +* Undo balancing of right eigenvectors +* (Workspace: need N) +* + CALL DGEBAK( 'B', 'R', N, ILO, IHI, WORK( IBAL ), N, VR, LDVR, + $ IERR ) +* +* Normalize right eigenvectors and make largest component real +* + DO 40 I = 1, N + IF( WI( I ).EQ.ZERO ) THEN + SCL = ONE / DNRM2( N, VR( 1, I ), 1 ) + CALL DSCAL( N, SCL, VR( 1, I ), 1 ) + ELSE IF( WI( I ).GT.ZERO ) THEN + SCL = ONE / DLAPY2( DNRM2( N, VR( 1, I ), 1 ), + $ DNRM2( N, VR( 1, I+1 ), 1 ) ) + CALL DSCAL( N, SCL, VR( 1, I ), 1 ) + CALL DSCAL( N, SCL, VR( 1, I+1 ), 1 ) + DO 30 K = 1, N + WORK( IWRK+K-1 ) = VR( K, I )**2 + VR( K, I+1 )**2 + 30 CONTINUE + K = IDAMAX( N, WORK( IWRK ), 1 ) + CALL DLARTG( VR( K, I ), VR( K, I+1 ), CS, SN, R ) + CALL DROT( N, VR( 1, I ), 1, VR( 1, I+1 ), 1, CS, SN ) + VR( K, I+1 ) = ZERO + END IF + 40 CONTINUE + END IF +* +* Undo scaling if necessary +* + 50 CONTINUE + IF( SCALEA ) THEN + CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, N-INFO, 1, WR( INFO+1 ), + $ MAX( N-INFO, 1 ), IERR ) + CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, N-INFO, 1, WI( INFO+1 ), + $ MAX( N-INFO, 1 ), IERR ) + IF( INFO.GT.0 ) THEN + CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, WR, N, + $ IERR ) + CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, WI, N, + $ IERR ) + END IF + END IF +* + WORK( 1 ) = MAXWRK + RETURN +* +* End of DGEEV +* + END diff --git a/costa/native/external/lapack/dgeevx.f b/costa/native/external/lapack/dgeevx.f new file mode 100644 index 000000000..f6f173b23 --- /dev/null +++ b/costa/native/external/lapack/dgeevx.f @@ -0,0 +1,544 @@ + SUBROUTINE DGEEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, WR, WI, + $ VL, LDVL, VR, LDVR, ILO, IHI, SCALE, ABNRM, + $ RCONDE, RCONDV, WORK, LWORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER BALANC, JOBVL, JOBVR, SENSE + INTEGER IHI, ILO, INFO, LDA, LDVL, LDVR, LWORK, N + DOUBLE PRECISION ABNRM +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), RCONDE( * ), RCONDV( * ), + $ SCALE( * ), VL( LDVL, * ), VR( LDVR, * ), + $ WI( * ), WORK( * ), WR( * ) +* .. +* +* Purpose +* ======= +* +* DGEEVX computes for an N-by-N real nonsymmetric matrix A, the +* eigenvalues and, optionally, the left and/or right eigenvectors. +* +* Optionally also, it computes a balancing transformation to improve +* the conditioning of the eigenvalues and eigenvectors (ILO, IHI, +* SCALE, and ABNRM), reciprocal condition numbers for the eigenvalues +* (RCONDE), and reciprocal condition numbers for the right +* eigenvectors (RCONDV). +* +* The right eigenvector v(j) of A satisfies +* A * v(j) = lambda(j) * v(j) +* where lambda(j) is its eigenvalue. +* The left eigenvector u(j) of A satisfies +* u(j)**H * A = lambda(j) * u(j)**H +* where u(j)**H denotes the conjugate transpose of u(j). +* +* The computed eigenvectors are normalized to have Euclidean norm +* equal to 1 and largest component real. +* +* Balancing a matrix means permuting the rows and columns to make it +* more nearly upper triangular, and applying a diagonal similarity +* transformation D * A * D**(-1), where D is a diagonal matrix, to +* make its rows and columns closer in norm and the condition numbers +* of its eigenvalues and eigenvectors smaller. The computed +* reciprocal condition numbers correspond to the balanced matrix. +* Permuting rows and columns will not change the condition numbers +* (in exact arithmetic) but diagonal scaling will. For further +* explanation of balancing, see section 4.10.2 of the LAPACK +* Users' Guide. +* +* Arguments +* ========= +* +* BALANC (input) CHARACTER*1 +* Indicates how the input matrix should be diagonally scaled +* and/or permuted to improve the conditioning of its +* eigenvalues. +* = 'N': Do not diagonally scale or permute; +* = 'P': Perform permutations to make the matrix more nearly +* upper triangular. Do not diagonally scale; +* = 'S': Diagonally scale the matrix, i.e. replace A by +* D*A*D**(-1), where D is a diagonal matrix chosen +* to make the rows and columns of A more equal in +* norm. Do not permute; +* = 'B': Both diagonally scale and permute A. +* +* Computed reciprocal condition numbers will be for the matrix +* after balancing and/or permuting. Permuting does not change +* condition numbers (in exact arithmetic), but balancing does. +* +* JOBVL (input) CHARACTER*1 +* = 'N': left eigenvectors of A are not computed; +* = 'V': left eigenvectors of A are computed. +* If SENSE = 'E' or 'B', JOBVL must = 'V'. +* +* JOBVR (input) CHARACTER*1 +* = 'N': right eigenvectors of A are not computed; +* = 'V': right eigenvectors of A are computed. +* If SENSE = 'E' or 'B', JOBVR must = 'V'. +* +* SENSE (input) CHARACTER*1 +* Determines which reciprocal condition numbers are computed. +* = 'N': None are computed; +* = 'E': Computed for eigenvalues only; +* = 'V': Computed for right eigenvectors only; +* = 'B': Computed for eigenvalues and right eigenvectors. +* +* If SENSE = 'E' or 'B', both left and right eigenvectors +* must also be computed (JOBVL = 'V' and JOBVR = 'V'). +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the N-by-N matrix A. +* On exit, A has been overwritten. If JOBVL = 'V' or +* JOBVR = 'V', A contains the real Schur form of the balanced +* version of the input matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* WR (output) DOUBLE PRECISION array, dimension (N) +* WI (output) DOUBLE PRECISION array, dimension (N) +* WR and WI contain the real and imaginary parts, +* respectively, of the computed eigenvalues. Complex +* conjugate pairs of eigenvalues will appear consecutively +* with the eigenvalue having the positive imaginary part +* first. +* +* VL (output) DOUBLE PRECISION array, dimension (LDVL,N) +* If JOBVL = 'V', the left eigenvectors u(j) are stored one +* after another in the columns of VL, in the same order +* as their eigenvalues. +* If JOBVL = 'N', VL is not referenced. +* If the j-th eigenvalue is real, then u(j) = VL(:,j), +* the j-th column of VL. +* If the j-th and (j+1)-st eigenvalues form a complex +* conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and +* u(j+1) = VL(:,j) - i*VL(:,j+1). +* +* LDVL (input) INTEGER +* The leading dimension of the array VL. LDVL >= 1; if +* JOBVL = 'V', LDVL >= N. +* +* VR (output) DOUBLE PRECISION array, dimension (LDVR,N) +* If JOBVR = 'V', the right eigenvectors v(j) are stored one +* after another in the columns of VR, in the same order +* as their eigenvalues. +* If JOBVR = 'N', VR is not referenced. +* If the j-th eigenvalue is real, then v(j) = VR(:,j), +* the j-th column of VR. +* If the j-th and (j+1)-st eigenvalues form a complex +* conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and +* v(j+1) = VR(:,j) - i*VR(:,j+1). +* +* LDVR (input) INTEGER +* The leading dimension of the array VR. LDVR >= 1, and if +* JOBVR = 'V', LDVR >= N. +* +* ILO,IHI (output) INTEGER +* ILO and IHI are integer values determined when A was +* balanced. The balanced A(i,j) = 0 if I > J and +* J = 1,...,ILO-1 or I = IHI+1,...,N. +* +* SCALE (output) DOUBLE PRECISION array, dimension (N) +* Details of the permutations and scaling factors applied +* when balancing A. If P(j) is the index of the row and column +* interchanged with row and column j, and D(j) is the scaling +* factor applied to row and column j, then +* SCALE(J) = P(J), for J = 1,...,ILO-1 +* = D(J), for J = ILO,...,IHI +* = P(J) for J = IHI+1,...,N. +* The order in which the interchanges are made is N to IHI+1, +* then 1 to ILO-1. +* +* ABNRM (output) DOUBLE PRECISION +* The one-norm of the balanced matrix (the maximum +* of the sum of absolute values of elements of any column). +* +* RCONDE (output) DOUBLE PRECISION array, dimension (N) +* RCONDE(j) is the reciprocal condition number of the j-th +* eigenvalue. +* +* RCONDV (output) DOUBLE PRECISION array, dimension (N) +* RCONDV(j) is the reciprocal condition number of the j-th +* right eigenvector. +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. If SENSE = 'N' or 'E', +* LWORK >= max(1,2*N), and if JOBVL = 'V' or JOBVR = 'V', +* LWORK >= 3*N. If SENSE = 'V' or 'B', LWORK >= N*(N+6). +* For good performance, LWORK must generally be larger. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace) INTEGER array, dimension (2*N-2) +* If SENSE = 'N' or 'E', not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, the QR algorithm failed to compute all the +* eigenvalues, and no eigenvectors or condition numbers +* have been computed; elements 1:ILO-1 and i+1:N of WR +* and WI contain eigenvalues which have converged. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, SCALEA, WANTVL, WANTVR, WNTSNB, WNTSNE, + $ WNTSNN, WNTSNV + CHARACTER JOB, SIDE + INTEGER HSWORK, I, ICOND, IERR, ITAU, IWRK, K, MAXB, + $ MAXWRK, MINWRK, NOUT + DOUBLE PRECISION ANRM, BIGNUM, CS, CSCALE, EPS, R, SCL, SMLNUM, + $ SN +* .. +* .. Local Arrays .. + LOGICAL SELECT( 1 ) + DOUBLE PRECISION DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DGEBAK, DGEBAL, DGEHRD, DHSEQR, DLACPY, DLARTG, + $ DLASCL, DORGHR, DROT, DSCAL, DTREVC, DTRSNA, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX, ILAENV + DOUBLE PRECISION DLAMCH, DLANGE, DLAPY2, DNRM2 + EXTERNAL LSAME, IDAMAX, ILAENV, DLAMCH, DLANGE, DLAPY2, + $ DNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + WANTVL = LSAME( JOBVL, 'V' ) + WANTVR = LSAME( JOBVR, 'V' ) + WNTSNN = LSAME( SENSE, 'N' ) + WNTSNE = LSAME( SENSE, 'E' ) + WNTSNV = LSAME( SENSE, 'V' ) + WNTSNB = LSAME( SENSE, 'B' ) + IF( .NOT.( LSAME( BALANC, 'N' ) .OR. LSAME( BALANC, + $ 'S' ) .OR. LSAME( BALANC, 'P' ) .OR. LSAME( BALANC, 'B' ) ) ) + $ THEN + INFO = -1 + ELSE IF( ( .NOT.WANTVL ) .AND. ( .NOT.LSAME( JOBVL, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( ( .NOT.WANTVR ) .AND. ( .NOT.LSAME( JOBVR, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( .NOT.( WNTSNN .OR. WNTSNE .OR. WNTSNB .OR. WNTSNV ) .OR. + $ ( ( WNTSNE .OR. WNTSNB ) .AND. .NOT.( WANTVL .AND. + $ WANTVR ) ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVL.LT.1 .OR. ( WANTVL .AND. LDVL.LT.N ) ) THEN + INFO = -11 + ELSE IF( LDVR.LT.1 .OR. ( WANTVR .AND. LDVR.LT.N ) ) THEN + INFO = -13 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV. +* HSWORK refers to the workspace preferred by DHSEQR, as +* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, +* the worst case.) +* + MINWRK = 1 + IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN + MAXWRK = N + N*ILAENV( 1, 'DGEHRD', ' ', N, 1, N, 0 ) + IF( ( .NOT.WANTVL ) .AND. ( .NOT.WANTVR ) ) THEN + MINWRK = MAX( 1, 2*N ) + IF( .NOT.WNTSNN ) + $ MINWRK = MAX( MINWRK, N*N+6*N ) + MAXB = MAX( ILAENV( 8, 'DHSEQR', 'SN', N, 1, N, -1 ), 2 ) + IF( WNTSNN ) THEN + K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'DHSEQR', 'EN', N, + $ 1, N, -1 ) ) ) + ELSE + K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'DHSEQR', 'SN', N, + $ 1, N, -1 ) ) ) + END IF + HSWORK = MAX( K*( K+2 ), 2*N ) + MAXWRK = MAX( MAXWRK, 1, HSWORK ) + IF( .NOT.WNTSNN ) + $ MAXWRK = MAX( MAXWRK, N*N+6*N ) + ELSE + MINWRK = MAX( 1, 3*N ) + IF( ( .NOT.WNTSNN ) .AND. ( .NOT.WNTSNE ) ) + $ MINWRK = MAX( MINWRK, N*N+6*N ) + MAXB = MAX( ILAENV( 8, 'DHSEQR', 'SN', N, 1, N, -1 ), 2 ) + K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'DHSEQR', 'EN', N, 1, + $ N, -1 ) ) ) + HSWORK = MAX( K*( K+2 ), 2*N ) + MAXWRK = MAX( MAXWRK, 1, HSWORK ) + MAXWRK = MAX( MAXWRK, N+( N-1 )* + $ ILAENV( 1, 'DORGHR', ' ', N, 1, N, -1 ) ) + IF( ( .NOT.WNTSNN ) .AND. ( .NOT.WNTSNE ) ) + $ MAXWRK = MAX( MAXWRK, N*N+6*N ) + MAXWRK = MAX( MAXWRK, 3*N, 1 ) + END IF + WORK( 1 ) = MAXWRK + END IF + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -21 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEEVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ICOND = 0 + ANRM = DLANGE( 'M', N, N, A, LDA, DUM ) + SCALEA = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + SCALEA = .TRUE. + CSCALE = SMLNUM + ELSE IF( ANRM.GT.BIGNUM ) THEN + SCALEA = .TRUE. + CSCALE = BIGNUM + END IF + IF( SCALEA ) + $ CALL DLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR ) +* +* Balance the matrix and compute ABNRM +* + CALL DGEBAL( BALANC, N, A, LDA, ILO, IHI, SCALE, IERR ) + ABNRM = DLANGE( '1', N, N, A, LDA, DUM ) + IF( SCALEA ) THEN + DUM( 1 ) = ABNRM + CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, 1, 1, DUM, 1, IERR ) + ABNRM = DUM( 1 ) + END IF +* +* Reduce to upper Hessenberg form +* (Workspace: need 2*N, prefer N+N*NB) +* + ITAU = 1 + IWRK = ITAU + N + CALL DGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* + IF( WANTVL ) THEN +* +* Want left eigenvectors +* Copy Householder vectors to VL +* + SIDE = 'L' + CALL DLACPY( 'L', N, N, A, LDA, VL, LDVL ) +* +* Generate orthogonal matrix in VL +* (Workspace: need 2*N-1, prefer N+(N-1)*NB) +* + CALL DORGHR( N, ILO, IHI, VL, LDVL, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* +* Perform QR iteration, accumulating Schur vectors in VL +* (Workspace: need 1, prefer HSWORK (see comments) ) +* + IWRK = ITAU + CALL DHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, WR, WI, VL, LDVL, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) +* + IF( WANTVR ) THEN +* +* Want left and right eigenvectors +* Copy Schur vectors to VR +* + SIDE = 'B' + CALL DLACPY( 'F', N, N, VL, LDVL, VR, LDVR ) + END IF +* + ELSE IF( WANTVR ) THEN +* +* Want right eigenvectors +* Copy Householder vectors to VR +* + SIDE = 'R' + CALL DLACPY( 'L', N, N, A, LDA, VR, LDVR ) +* +* Generate orthogonal matrix in VR +* (Workspace: need 2*N-1, prefer N+(N-1)*NB) +* + CALL DORGHR( N, ILO, IHI, VR, LDVR, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* +* Perform QR iteration, accumulating Schur vectors in VR +* (Workspace: need 1, prefer HSWORK (see comments) ) +* + IWRK = ITAU + CALL DHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, WR, WI, VR, LDVR, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) +* + ELSE +* +* Compute eigenvalues only +* If condition numbers desired, compute Schur form +* + IF( WNTSNN ) THEN + JOB = 'E' + ELSE + JOB = 'S' + END IF +* +* (Workspace: need 1, prefer HSWORK (see comments) ) +* + IWRK = ITAU + CALL DHSEQR( JOB, 'N', N, ILO, IHI, A, LDA, WR, WI, VR, LDVR, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) + END IF +* +* If INFO > 0 from DHSEQR, then quit +* + IF( INFO.GT.0 ) + $ GO TO 50 +* + IF( WANTVL .OR. WANTVR ) THEN +* +* Compute left and/or right eigenvectors +* (Workspace: need 3*N) +* + CALL DTREVC( SIDE, 'B', SELECT, N, A, LDA, VL, LDVL, VR, LDVR, + $ N, NOUT, WORK( IWRK ), IERR ) + END IF +* +* Compute condition numbers if desired +* (Workspace: need N*N+6*N unless SENSE = 'E') +* + IF( .NOT.WNTSNN ) THEN + CALL DTRSNA( SENSE, 'A', SELECT, N, A, LDA, VL, LDVL, VR, LDVR, + $ RCONDE, RCONDV, N, NOUT, WORK( IWRK ), N, IWORK, + $ ICOND ) + END IF +* + IF( WANTVL ) THEN +* +* Undo balancing of left eigenvectors +* + CALL DGEBAK( BALANC, 'L', N, ILO, IHI, SCALE, N, VL, LDVL, + $ IERR ) +* +* Normalize left eigenvectors and make largest component real +* + DO 20 I = 1, N + IF( WI( I ).EQ.ZERO ) THEN + SCL = ONE / DNRM2( N, VL( 1, I ), 1 ) + CALL DSCAL( N, SCL, VL( 1, I ), 1 ) + ELSE IF( WI( I ).GT.ZERO ) THEN + SCL = ONE / DLAPY2( DNRM2( N, VL( 1, I ), 1 ), + $ DNRM2( N, VL( 1, I+1 ), 1 ) ) + CALL DSCAL( N, SCL, VL( 1, I ), 1 ) + CALL DSCAL( N, SCL, VL( 1, I+1 ), 1 ) + DO 10 K = 1, N + WORK( K ) = VL( K, I )**2 + VL( K, I+1 )**2 + 10 CONTINUE + K = IDAMAX( N, WORK, 1 ) + CALL DLARTG( VL( K, I ), VL( K, I+1 ), CS, SN, R ) + CALL DROT( N, VL( 1, I ), 1, VL( 1, I+1 ), 1, CS, SN ) + VL( K, I+1 ) = ZERO + END IF + 20 CONTINUE + END IF +* + IF( WANTVR ) THEN +* +* Undo balancing of right eigenvectors +* + CALL DGEBAK( BALANC, 'R', N, ILO, IHI, SCALE, N, VR, LDVR, + $ IERR ) +* +* Normalize right eigenvectors and make largest component real +* + DO 40 I = 1, N + IF( WI( I ).EQ.ZERO ) THEN + SCL = ONE / DNRM2( N, VR( 1, I ), 1 ) + CALL DSCAL( N, SCL, VR( 1, I ), 1 ) + ELSE IF( WI( I ).GT.ZERO ) THEN + SCL = ONE / DLAPY2( DNRM2( N, VR( 1, I ), 1 ), + $ DNRM2( N, VR( 1, I+1 ), 1 ) ) + CALL DSCAL( N, SCL, VR( 1, I ), 1 ) + CALL DSCAL( N, SCL, VR( 1, I+1 ), 1 ) + DO 30 K = 1, N + WORK( K ) = VR( K, I )**2 + VR( K, I+1 )**2 + 30 CONTINUE + K = IDAMAX( N, WORK, 1 ) + CALL DLARTG( VR( K, I ), VR( K, I+1 ), CS, SN, R ) + CALL DROT( N, VR( 1, I ), 1, VR( 1, I+1 ), 1, CS, SN ) + VR( K, I+1 ) = ZERO + END IF + 40 CONTINUE + END IF +* +* Undo scaling if necessary +* + 50 CONTINUE + IF( SCALEA ) THEN + CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, N-INFO, 1, WR( INFO+1 ), + $ MAX( N-INFO, 1 ), IERR ) + CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, N-INFO, 1, WI( INFO+1 ), + $ MAX( N-INFO, 1 ), IERR ) + IF( INFO.EQ.0 ) THEN + IF( ( WNTSNV .OR. WNTSNB ) .AND. ICOND.EQ.0 ) + $ CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, RCONDV, N, + $ IERR ) + ELSE + CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, WR, N, + $ IERR ) + CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, WI, N, + $ IERR ) + END IF + END IF +* + WORK( 1 ) = MAXWRK + RETURN +* +* End of DGEEVX +* + END diff --git a/costa/native/external/lapack/dgegs.f b/costa/native/external/lapack/dgegs.f new file mode 100644 index 000000000..25a6c4b50 --- /dev/null +++ b/costa/native/external/lapack/dgegs.f @@ -0,0 +1,470 @@ + SUBROUTINE DGEGS( JOBVSL, JOBVSR, N, A, LDA, B, LDB, ALPHAR, + $ ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR, WORK, + $ LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBVSL, JOBVSR + INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), ALPHAI( * ), ALPHAR( * ), + $ B( LDB, * ), BETA( * ), VSL( LDVSL, * ), + $ VSR( LDVSR, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* This routine is deprecated and has been replaced by routine DGGES. +* +* DGEGS computes for a pair of N-by-N real nonsymmetric matrices A, B: +* the generalized eigenvalues (alphar +/- alphai*i, beta), the real +* Schur form (A, B), and optionally left and/or right Schur vectors +* (VSL and VSR). +* +* (If only the generalized eigenvalues are needed, use the driver DGEGV +* instead.) +* +* A generalized eigenvalue for a pair of matrices (A,B) is, roughly +* speaking, a scalar w or a ratio alpha/beta = w, such that A - w*B +* is singular. It is usually represented as the pair (alpha,beta), +* as there is a reasonable interpretation for beta=0, and even for +* both being zero. A good beginning reference is the book, "Matrix +* Computations", by G. Golub & C. van Loan (Johns Hopkins U. Press) +* +* The (generalized) Schur form of a pair of matrices is the result of +* multiplying both matrices on the left by one orthogonal matrix and +* both on the right by another orthogonal matrix, these two orthogonal +* matrices being chosen so as to bring the pair of matrices into +* (real) Schur form. +* +* A pair of matrices A, B is in generalized real Schur form if B is +* upper triangular with non-negative diagonal and A is block upper +* triangular with 1-by-1 and 2-by-2 blocks. 1-by-1 blocks correspond +* to real generalized eigenvalues, while 2-by-2 blocks of A will be +* "standardized" by making the corresponding elements of B have the +* form: +* [ a 0 ] +* [ 0 b ] +* +* and the pair of corresponding 2-by-2 blocks in A and B will +* have a complex conjugate pair of generalized eigenvalues. +* +* The left and right Schur vectors are the columns of VSL and VSR, +* respectively, where VSL and VSR are the orthogonal matrices +* which reduce A and B to Schur form: +* +* Schur form of (A,B) = ( (VSL)**T A (VSR), (VSL)**T B (VSR) ) +* +* Arguments +* ========= +* +* JOBVSL (input) CHARACTER*1 +* = 'N': do not compute the left Schur vectors; +* = 'V': compute the left Schur vectors. +* +* JOBVSR (input) CHARACTER*1 +* = 'N': do not compute the right Schur vectors; +* = 'V': compute the right Schur vectors. +* +* N (input) INTEGER +* The order of the matrices A, B, VSL, and VSR. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) +* On entry, the first of the pair of matrices whose generalized +* eigenvalues and (optionally) Schur vectors are to be +* computed. +* On exit, the generalized Schur form of A. +* Note: to avoid overflow, the Frobenius norm of the matrix +* A should be less than the overflow threshold. +* +* LDA (input) INTEGER +* The leading dimension of A. LDA >= max(1,N). +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB, N) +* On entry, the second of the pair of matrices whose +* generalized eigenvalues and (optionally) Schur vectors are +* to be computed. +* On exit, the generalized Schur form of B. +* Note: to avoid overflow, the Frobenius norm of the matrix +* B should be less than the overflow threshold. +* +* LDB (input) INTEGER +* The leading dimension of B. LDB >= max(1,N). +* +* ALPHAR (output) DOUBLE PRECISION array, dimension (N) +* ALPHAI (output) DOUBLE PRECISION array, dimension (N) +* BETA (output) DOUBLE PRECISION array, dimension (N) +* On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will +* be the generalized eigenvalues. ALPHAR(j) + ALPHAI(j)*i, +* j=1,...,N and BETA(j),j=1,...,N are the diagonals of the +* complex Schur form (A,B) that would result if the 2-by-2 +* diagonal blocks of the real Schur form of (A,B) were further +* reduced to triangular form using 2-by-2 complex unitary +* transformations. If ALPHAI(j) is zero, then the j-th +* eigenvalue is real; if positive, then the j-th and (j+1)-st +* eigenvalues are a complex conjugate pair, with ALPHAI(j+1) +* negative. +* +* Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) +* may easily over- or underflow, and BETA(j) may even be zero. +* Thus, the user should avoid naively computing the ratio +* alpha/beta. However, ALPHAR and ALPHAI will be always less +* than and usually comparable with norm(A) in magnitude, and +* BETA always less than and usually comparable with norm(B). +* +* VSL (output) DOUBLE PRECISION array, dimension (LDVSL,N) +* If JOBVSL = 'V', VSL will contain the left Schur vectors. +* (See "Purpose", above.) +* Not referenced if JOBVSL = 'N'. +* +* LDVSL (input) INTEGER +* The leading dimension of the matrix VSL. LDVSL >=1, and +* if JOBVSL = 'V', LDVSL >= N. +* +* VSR (output) DOUBLE PRECISION array, dimension (LDVSR,N) +* If JOBVSR = 'V', VSR will contain the right Schur vectors. +* (See "Purpose", above.) +* Not referenced if JOBVSR = 'N'. +* +* LDVSR (input) INTEGER +* The leading dimension of the matrix VSR. LDVSR >= 1, and +* if JOBVSR = 'V', LDVSR >= N. +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,4*N). +* For good performance, LWORK must generally be larger. +* To compute the optimal value of LWORK, call ILAENV to get +* blocksizes (for DGEQRF, DORMQR, and DORGQR.) Then compute: +* NB -- MAX of the blocksizes for DGEQRF, DORMQR, and DORGQR +* The optimal LWORK is 2*N + N*(NB+1). +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* = 1,...,N: +* The QZ iteration failed. (A,B) are not in Schur +* form, but ALPHAR(j), ALPHAI(j), and BETA(j) should +* be correct for j=INFO+1,...,N. +* > N: errors that usually indicate LAPACK problems: +* =N+1: error return from DGGBAL +* =N+2: error return from DGEQRF +* =N+3: error return from DORMQR +* =N+4: error return from DORGQR +* =N+5: error return from DGGHRD +* =N+6: error return from DHGEQZ (other than failed +* iteration) +* =N+7: error return from DGGBAK (computing VSL) +* =N+8: error return from DGGBAK (computing VSR) +* =N+9: error return from DLASCL (various places) +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL ILASCL, ILBSCL, ILVSL, ILVSR, LQUERY + INTEGER ICOLS, IHI, IINFO, IJOBVL, IJOBVR, ILEFT, ILO, + $ IRIGHT, IROWS, ITAU, IWORK, LOPT, LWKMIN, + $ LWKOPT, NB, NB1, NB2, NB3 + DOUBLE PRECISION ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, + $ SAFMIN, SMLNUM +* .. +* .. External Subroutines .. + EXTERNAL DGEQRF, DGGBAK, DGGBAL, DGGHRD, DHGEQZ, DLACPY, + $ DLASCL, DLASET, DORGQR, DORMQR, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, DLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVSL, 'N' ) ) THEN + IJOBVL = 1 + ILVSL = .FALSE. + ELSE IF( LSAME( JOBVSL, 'V' ) ) THEN + IJOBVL = 2 + ILVSL = .TRUE. + ELSE + IJOBVL = -1 + ILVSL = .FALSE. + END IF +* + IF( LSAME( JOBVSR, 'N' ) ) THEN + IJOBVR = 1 + ILVSR = .FALSE. + ELSE IF( LSAME( JOBVSR, 'V' ) ) THEN + IJOBVR = 2 + ILVSR = .TRUE. + ELSE + IJOBVR = -1 + ILVSR = .FALSE. + END IF +* +* Test the input arguments +* + LWKMIN = MAX( 4*N, 1 ) + LWKOPT = LWKMIN + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + INFO = 0 + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVSL.LT.1 .OR. ( ILVSL .AND. LDVSL.LT.N ) ) THEN + INFO = -12 + ELSE IF( LDVSR.LT.1 .OR. ( ILVSR .AND. LDVSR.LT.N ) ) THEN + INFO = -14 + ELSE IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN + INFO = -16 + END IF +* + IF( INFO.EQ.0 ) THEN + NB1 = ILAENV( 1, 'DGEQRF', ' ', N, N, -1, -1 ) + NB2 = ILAENV( 1, 'DORMQR', ' ', N, N, N, -1 ) + NB3 = ILAENV( 1, 'DORGQR', ' ', N, N, N, -1 ) + NB = MAX( NB1, NB2, NB3 ) + LOPT = 2*N + N*( NB+1 ) + WORK( 1 ) = LOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEGS ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = DLAMCH( 'E' )*DLAMCH( 'B' ) + SAFMIN = DLAMCH( 'S' ) + SMLNUM = N*SAFMIN / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = DLANGE( 'M', N, N, A, LDA, WORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF +* + IF( ILASCL ) THEN + CALL DLASCL( 'G', -1, -1, ANRM, ANRMTO, N, N, A, LDA, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + END IF +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = DLANGE( 'M', N, N, B, LDB, WORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF +* + IF( ILBSCL ) THEN + CALL DLASCL( 'G', -1, -1, BNRM, BNRMTO, N, N, B, LDB, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + END IF +* +* Permute the matrix to make it more nearly triangular +* Workspace layout: (2*N words -- "work..." not actually used) +* left_permutation, right_permutation, work... +* + ILEFT = 1 + IRIGHT = N + 1 + IWORK = IRIGHT + N + CALL DGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), WORK( IWORK ), IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 1 + GO TO 10 + END IF +* +* Reduce B to triangular form, and initialize VSL and/or VSR +* Workspace layout: ("work..." must have at least N words) +* left_permutation, right_permutation, tau, work... +* + IROWS = IHI + 1 - ILO + ICOLS = N + 1 - ILO + ITAU = IWORK + IWORK = ITAU + IROWS + CALL DGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWORK ), LWORK+1-IWORK, IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 2 + GO TO 10 + END IF +* + CALL DORMQR( 'L', 'T', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWORK ), + $ LWORK+1-IWORK, IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 3 + GO TO 10 + END IF +* + IF( ILVSL ) THEN + CALL DLASET( 'Full', N, N, ZERO, ONE, VSL, LDVSL ) + CALL DLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VSL( ILO+1, ILO ), LDVSL ) + CALL DORGQR( IROWS, IROWS, IROWS, VSL( ILO, ILO ), LDVSL, + $ WORK( ITAU ), WORK( IWORK ), LWORK+1-IWORK, + $ IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 4 + GO TO 10 + END IF + END IF +* + IF( ILVSR ) + $ CALL DLASET( 'Full', N, N, ZERO, ONE, VSR, LDVSR ) +* +* Reduce to generalized Hessenberg form +* + CALL DGGHRD( JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, VSL, + $ LDVSL, VSR, LDVSR, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 5 + GO TO 10 + END IF +* +* Perform QZ algorithm, computing Schur vectors if desired +* Workspace layout: ("work..." must have at least 1 word) +* left_permutation, right_permutation, work... +* + IWORK = ITAU + CALL DHGEQZ( 'S', JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR, + $ WORK( IWORK ), LWORK+1-IWORK, IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + IF( IINFO.GT.0 .AND. IINFO.LE.N ) THEN + INFO = IINFO + ELSE IF( IINFO.GT.N .AND. IINFO.LE.2*N ) THEN + INFO = IINFO - N + ELSE + INFO = N + 6 + END IF + GO TO 10 + END IF +* +* Apply permutation to VSL and VSR +* + IF( ILVSL ) THEN + CALL DGGBAK( 'P', 'L', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VSL, LDVSL, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 7 + GO TO 10 + END IF + END IF + IF( ILVSR ) THEN + CALL DGGBAK( 'P', 'R', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VSR, LDVSR, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 8 + GO TO 10 + END IF + END IF +* +* Undo scaling +* + IF( ILASCL ) THEN + CALL DLASCL( 'H', -1, -1, ANRMTO, ANRM, N, N, A, LDA, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + CALL DLASCL( 'G', -1, -1, ANRMTO, ANRM, N, 1, ALPHAR, N, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + CALL DLASCL( 'G', -1, -1, ANRMTO, ANRM, N, 1, ALPHAI, N, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + END IF +* + IF( ILBSCL ) THEN + CALL DLASCL( 'U', -1, -1, BNRMTO, BNRM, N, N, B, LDB, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + CALL DLASCL( 'G', -1, -1, BNRMTO, BNRM, N, 1, BETA, N, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + END IF +* + 10 CONTINUE + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of DGEGS +* + END diff --git a/costa/native/external/lapack/dgegv.f b/costa/native/external/lapack/dgegv.f new file mode 100644 index 000000000..67e1bba94 --- /dev/null +++ b/costa/native/external/lapack/dgegv.f @@ -0,0 +1,641 @@ + SUBROUTINE DGEGV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHAR, ALPHAI, + $ BETA, VL, LDVL, VR, LDVR, WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBVL, JOBVR + INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), ALPHAI( * ), ALPHAR( * ), + $ B( LDB, * ), BETA( * ), VL( LDVL, * ), + $ VR( LDVR, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* This routine is deprecated and has been replaced by routine DGGEV. +* +* DGEGV computes for a pair of n-by-n real nonsymmetric matrices A and +* B, the generalized eigenvalues (alphar +/- alphai*i, beta), and +* optionally, the left and/or right generalized eigenvectors (VL and +* VR). +* +* A generalized eigenvalue for a pair of matrices (A,B) is, roughly +* speaking, a scalar w or a ratio alpha/beta = w, such that A - w*B +* is singular. It is usually represented as the pair (alpha,beta), +* as there is a reasonable interpretation for beta=0, and even for +* both being zero. A good beginning reference is the book, "Matrix +* Computations", by G. Golub & C. van Loan (Johns Hopkins U. Press) +* +* A right generalized eigenvector corresponding to a generalized +* eigenvalue w for a pair of matrices (A,B) is a vector r such +* that (A - w B) r = 0 . A left generalized eigenvector is a vector +* l such that l**H * (A - w B) = 0, where l**H is the +* conjugate-transpose of l. +* +* Note: this routine performs "full balancing" on A and B -- see +* "Further Details", below. +* +* Arguments +* ========= +* +* JOBVL (input) CHARACTER*1 +* = 'N': do not compute the left generalized eigenvectors; +* = 'V': compute the left generalized eigenvectors. +* +* JOBVR (input) CHARACTER*1 +* = 'N': do not compute the right generalized eigenvectors; +* = 'V': compute the right generalized eigenvectors. +* +* N (input) INTEGER +* The order of the matrices A, B, VL, and VR. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) +* On entry, the first of the pair of matrices whose +* generalized eigenvalues and (optionally) generalized +* eigenvectors are to be computed. +* On exit, the contents will have been destroyed. (For a +* description of the contents of A on exit, see "Further +* Details", below.) +* +* LDA (input) INTEGER +* The leading dimension of A. LDA >= max(1,N). +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB, N) +* On entry, the second of the pair of matrices whose +* generalized eigenvalues and (optionally) generalized +* eigenvectors are to be computed. +* On exit, the contents will have been destroyed. (For a +* description of the contents of B on exit, see "Further +* Details", below.) +* +* LDB (input) INTEGER +* The leading dimension of B. LDB >= max(1,N). +* +* ALPHAR (output) DOUBLE PRECISION array, dimension (N) +* ALPHAI (output) DOUBLE PRECISION array, dimension (N) +* BETA (output) DOUBLE PRECISION array, dimension (N) +* On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will +* be the generalized eigenvalues. If ALPHAI(j) is zero, then +* the j-th eigenvalue is real; if positive, then the j-th and +* (j+1)-st eigenvalues are a complex conjugate pair, with +* ALPHAI(j+1) negative. +* +* Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) +* may easily over- or underflow, and BETA(j) may even be zero. +* Thus, the user should avoid naively computing the ratio +* alpha/beta. However, ALPHAR and ALPHAI will be always less +* than and usually comparable with norm(A) in magnitude, and +* BETA always less than and usually comparable with norm(B). +* +* VL (output) DOUBLE PRECISION array, dimension (LDVL,N) +* If JOBVL = 'V', the left generalized eigenvectors. (See +* "Purpose", above.) Real eigenvectors take one column, +* complex take two columns, the first for the real part and +* the second for the imaginary part. Complex eigenvectors +* correspond to an eigenvalue with positive imaginary part. +* Each eigenvector will be scaled so the largest component +* will have abs(real part) + abs(imag. part) = 1, *except* +* that for eigenvalues with alpha=beta=0, a zero vector will +* be returned as the corresponding eigenvector. +* Not referenced if JOBVL = 'N'. +* +* LDVL (input) INTEGER +* The leading dimension of the matrix VL. LDVL >= 1, and +* if JOBVL = 'V', LDVL >= N. +* +* VR (output) DOUBLE PRECISION array, dimension (LDVR,N) +* If JOBVR = 'V', the right generalized eigenvectors. (See +* "Purpose", above.) Real eigenvectors take one column, +* complex take two columns, the first for the real part and +* the second for the imaginary part. Complex eigenvectors +* correspond to an eigenvalue with positive imaginary part. +* Each eigenvector will be scaled so the largest component +* will have abs(real part) + abs(imag. part) = 1, *except* +* that for eigenvalues with alpha=beta=0, a zero vector will +* be returned as the corresponding eigenvector. +* Not referenced if JOBVR = 'N'. +* +* LDVR (input) INTEGER +* The leading dimension of the matrix VR. LDVR >= 1, and +* if JOBVR = 'V', LDVR >= N. +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,8*N). +* For good performance, LWORK must generally be larger. +* To compute the optimal value of LWORK, call ILAENV to get +* blocksizes (for DGEQRF, DORMQR, and DORGQR.) Then compute: +* NB -- MAX of the blocksizes for DGEQRF, DORMQR, and DORGQR; +* The optimal LWORK is: +* 2*N + MAX( 6*N, N*(NB+1) ). +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* = 1,...,N: +* The QZ iteration failed. No eigenvectors have been +* calculated, but ALPHAR(j), ALPHAI(j), and BETA(j) +* should be correct for j=INFO+1,...,N. +* > N: errors that usually indicate LAPACK problems: +* =N+1: error return from DGGBAL +* =N+2: error return from DGEQRF +* =N+3: error return from DORMQR +* =N+4: error return from DORGQR +* =N+5: error return from DGGHRD +* =N+6: error return from DHGEQZ (other than failed +* iteration) +* =N+7: error return from DTGEVC +* =N+8: error return from DGGBAK (computing VL) +* =N+9: error return from DGGBAK (computing VR) +* =N+10: error return from DLASCL (various calls) +* +* Further Details +* =============== +* +* Balancing +* --------- +* +* This driver calls DGGBAL to both permute and scale rows and columns +* of A and B. The permutations PL and PR are chosen so that PL*A*PR +* and PL*B*R will be upper triangular except for the diagonal blocks +* A(i:j,i:j) and B(i:j,i:j), with i and j as close together as +* possible. The diagonal scaling matrices DL and DR are chosen so +* that the pair DL*PL*A*PR*DR, DL*PL*B*PR*DR have elements close to +* one (except for the elements that start out zero.) +* +* After the eigenvalues and eigenvectors of the balanced matrices +* have been computed, DGGBAK transforms the eigenvectors back to what +* they would have been (in perfect arithmetic) if they had not been +* balanced. +* +* Contents of A and B on Exit +* -------- -- - --- - -- ---- +* +* If any eigenvectors are computed (either JOBVL='V' or JOBVR='V' or +* both), then on exit the arrays A and B will contain the real Schur +* form[*] of the "balanced" versions of A and B. If no eigenvectors +* are computed, then only the diagonal blocks will be correct. +* +* [*] See DHGEQZ, DGEGS, or read the book "Matrix Computations", +* by Golub & van Loan, pub. by Johns Hopkins U. Press. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL ILIMIT, ILV, ILVL, ILVR, LQUERY + CHARACTER CHTEMP + INTEGER ICOLS, IHI, IINFO, IJOBVL, IJOBVR, ILEFT, ILO, + $ IN, IRIGHT, IROWS, ITAU, IWORK, JC, JR, LOPT, + $ LWKMIN, LWKOPT, NB, NB1, NB2, NB3 + DOUBLE PRECISION ABSAI, ABSAR, ABSB, ANRM, ANRM1, ANRM2, BNRM, + $ BNRM1, BNRM2, EPS, ONEPLS, SAFMAX, SAFMIN, + $ SALFAI, SALFAR, SBETA, SCALE, TEMP +* .. +* .. Local Arrays .. + LOGICAL LDUMMA( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DGEQRF, DGGBAK, DGGBAL, DGGHRD, DHGEQZ, DLACPY, + $ DLASCL, DLASET, DORGQR, DORMQR, DTGEVC, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, DLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, MAX +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVL, 'N' ) ) THEN + IJOBVL = 1 + ILVL = .FALSE. + ELSE IF( LSAME( JOBVL, 'V' ) ) THEN + IJOBVL = 2 + ILVL = .TRUE. + ELSE + IJOBVL = -1 + ILVL = .FALSE. + END IF +* + IF( LSAME( JOBVR, 'N' ) ) THEN + IJOBVR = 1 + ILVR = .FALSE. + ELSE IF( LSAME( JOBVR, 'V' ) ) THEN + IJOBVR = 2 + ILVR = .TRUE. + ELSE + IJOBVR = -1 + ILVR = .FALSE. + END IF + ILV = ILVL .OR. ILVR +* +* Test the input arguments +* + LWKMIN = MAX( 8*N, 1 ) + LWKOPT = LWKMIN + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + INFO = 0 + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVL.LT.1 .OR. ( ILVL .AND. LDVL.LT.N ) ) THEN + INFO = -12 + ELSE IF( LDVR.LT.1 .OR. ( ILVR .AND. LDVR.LT.N ) ) THEN + INFO = -14 + ELSE IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN + INFO = -16 + END IF +* + IF( INFO.EQ.0 ) THEN + NB1 = ILAENV( 1, 'DGEQRF', ' ', N, N, -1, -1 ) + NB2 = ILAENV( 1, 'DORMQR', ' ', N, N, N, -1 ) + NB3 = ILAENV( 1, 'DORGQR', ' ', N, N, N, -1 ) + NB = MAX( NB1, NB2, NB3 ) + LOPT = 2*N + MAX( 6*N, N*( NB+1 ) ) + WORK( 1 ) = LOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEGV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = DLAMCH( 'E' )*DLAMCH( 'B' ) + SAFMIN = DLAMCH( 'S' ) + SAFMIN = SAFMIN + SAFMIN + SAFMAX = ONE / SAFMIN + ONEPLS = ONE + ( 4*EPS ) +* +* Scale A +* + ANRM = DLANGE( 'M', N, N, A, LDA, WORK ) + ANRM1 = ANRM + ANRM2 = ONE + IF( ANRM.LT.ONE ) THEN + IF( SAFMAX*ANRM.LT.ONE ) THEN + ANRM1 = SAFMIN + ANRM2 = SAFMAX*ANRM + END IF + END IF +* + IF( ANRM.GT.ZERO ) THEN + CALL DLASCL( 'G', -1, -1, ANRM, ONE, N, N, A, LDA, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 10 + RETURN + END IF + END IF +* +* Scale B +* + BNRM = DLANGE( 'M', N, N, B, LDB, WORK ) + BNRM1 = BNRM + BNRM2 = ONE + IF( BNRM.LT.ONE ) THEN + IF( SAFMAX*BNRM.LT.ONE ) THEN + BNRM1 = SAFMIN + BNRM2 = SAFMAX*BNRM + END IF + END IF +* + IF( BNRM.GT.ZERO ) THEN + CALL DLASCL( 'G', -1, -1, BNRM, ONE, N, N, B, LDB, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 10 + RETURN + END IF + END IF +* +* Permute the matrix to make it more nearly triangular +* Workspace layout: (8*N words -- "work" requires 6*N words) +* left_permutation, right_permutation, work... +* + ILEFT = 1 + IRIGHT = N + 1 + IWORK = IRIGHT + N + CALL DGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), WORK( IWORK ), IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 1 + GO TO 120 + END IF +* +* Reduce B to triangular form, and initialize VL and/or VR +* Workspace layout: ("work..." must have at least N words) +* left_permutation, right_permutation, tau, work... +* + IROWS = IHI + 1 - ILO + IF( ILV ) THEN + ICOLS = N + 1 - ILO + ELSE + ICOLS = IROWS + END IF + ITAU = IWORK + IWORK = ITAU + IROWS + CALL DGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWORK ), LWORK+1-IWORK, IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 2 + GO TO 120 + END IF +* + CALL DORMQR( 'L', 'T', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWORK ), + $ LWORK+1-IWORK, IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 3 + GO TO 120 + END IF +* + IF( ILVL ) THEN + CALL DLASET( 'Full', N, N, ZERO, ONE, VL, LDVL ) + CALL DLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VL( ILO+1, ILO ), LDVL ) + CALL DORGQR( IROWS, IROWS, IROWS, VL( ILO, ILO ), LDVL, + $ WORK( ITAU ), WORK( IWORK ), LWORK+1-IWORK, + $ IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 4 + GO TO 120 + END IF + END IF +* + IF( ILVR ) + $ CALL DLASET( 'Full', N, N, ZERO, ONE, VR, LDVR ) +* +* Reduce to generalized Hessenberg form +* + IF( ILV ) THEN +* +* Eigenvectors requested -- work on whole matrix. +* + CALL DGGHRD( JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, IINFO ) + ELSE + CALL DGGHRD( 'N', 'N', IROWS, 1, IROWS, A( ILO, ILO ), LDA, + $ B( ILO, ILO ), LDB, VL, LDVL, VR, LDVR, IINFO ) + END IF + IF( IINFO.NE.0 ) THEN + INFO = N + 5 + GO TO 120 + END IF +* +* Perform QZ algorithm +* Workspace layout: ("work..." must have at least 1 word) +* left_permutation, right_permutation, work... +* + IWORK = ITAU + IF( ILV ) THEN + CHTEMP = 'S' + ELSE + CHTEMP = 'E' + END IF + CALL DHGEQZ( CHTEMP, JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VL, LDVL, VR, LDVR, + $ WORK( IWORK ), LWORK+1-IWORK, IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + IF( IINFO.GT.0 .AND. IINFO.LE.N ) THEN + INFO = IINFO + ELSE IF( IINFO.GT.N .AND. IINFO.LE.2*N ) THEN + INFO = IINFO - N + ELSE + INFO = N + 6 + END IF + GO TO 120 + END IF +* + IF( ILV ) THEN +* +* Compute Eigenvectors (DTGEVC requires 6*N words of workspace) +* + IF( ILVL ) THEN + IF( ILVR ) THEN + CHTEMP = 'B' + ELSE + CHTEMP = 'L' + END IF + ELSE + CHTEMP = 'R' + END IF +* + CALL DTGEVC( CHTEMP, 'B', LDUMMA, N, A, LDA, B, LDB, VL, LDVL, + $ VR, LDVR, N, IN, WORK( IWORK ), IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 7 + GO TO 120 + END IF +* +* Undo balancing on VL and VR, rescale +* + IF( ILVL ) THEN + CALL DGGBAK( 'P', 'L', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VL, LDVL, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 8 + GO TO 120 + END IF + DO 50 JC = 1, N + IF( ALPHAI( JC ).LT.ZERO ) + $ GO TO 50 + TEMP = ZERO + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 10 JR = 1, N + TEMP = MAX( TEMP, ABS( VL( JR, JC ) ) ) + 10 CONTINUE + ELSE + DO 20 JR = 1, N + TEMP = MAX( TEMP, ABS( VL( JR, JC ) )+ + $ ABS( VL( JR, JC+1 ) ) ) + 20 CONTINUE + END IF + IF( TEMP.LT.SAFMIN ) + $ GO TO 50 + TEMP = ONE / TEMP + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 30 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + 30 CONTINUE + ELSE + DO 40 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + VL( JR, JC+1 ) = VL( JR, JC+1 )*TEMP + 40 CONTINUE + END IF + 50 CONTINUE + END IF + IF( ILVR ) THEN + CALL DGGBAK( 'P', 'R', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VR, LDVR, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + GO TO 120 + END IF + DO 100 JC = 1, N + IF( ALPHAI( JC ).LT.ZERO ) + $ GO TO 100 + TEMP = ZERO + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 60 JR = 1, N + TEMP = MAX( TEMP, ABS( VR( JR, JC ) ) ) + 60 CONTINUE + ELSE + DO 70 JR = 1, N + TEMP = MAX( TEMP, ABS( VR( JR, JC ) )+ + $ ABS( VR( JR, JC+1 ) ) ) + 70 CONTINUE + END IF + IF( TEMP.LT.SAFMIN ) + $ GO TO 100 + TEMP = ONE / TEMP + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 80 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + 80 CONTINUE + ELSE + DO 90 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + VR( JR, JC+1 ) = VR( JR, JC+1 )*TEMP + 90 CONTINUE + END IF + 100 CONTINUE + END IF +* +* End of eigenvector calculation +* + END IF +* +* Undo scaling in alpha, beta +* +* Note: this does not give the alpha and beta for the unscaled +* problem. +* +* Un-scaling is limited to avoid underflow in alpha and beta +* if they are significant. +* + DO 110 JC = 1, N + ABSAR = ABS( ALPHAR( JC ) ) + ABSAI = ABS( ALPHAI( JC ) ) + ABSB = ABS( BETA( JC ) ) + SALFAR = ANRM*ALPHAR( JC ) + SALFAI = ANRM*ALPHAI( JC ) + SBETA = BNRM*BETA( JC ) + ILIMIT = .FALSE. + SCALE = ONE +* +* Check for significant underflow in ALPHAI +* + IF( ABS( SALFAI ).LT.SAFMIN .AND. ABSAI.GE. + $ MAX( SAFMIN, EPS*ABSAR, EPS*ABSB ) ) THEN + ILIMIT = .TRUE. + SCALE = ( ONEPLS*SAFMIN / ANRM1 ) / + $ MAX( ONEPLS*SAFMIN, ANRM2*ABSAI ) +* + ELSE IF( SALFAI.EQ.ZERO ) THEN +* +* If insignificant underflow in ALPHAI, then make the +* conjugate eigenvalue real. +* + IF( ALPHAI( JC ).LT.ZERO .AND. JC.GT.1 ) THEN + ALPHAI( JC-1 ) = ZERO + ELSE IF( ALPHAI( JC ).GT.ZERO .AND. JC.LT.N ) THEN + ALPHAI( JC+1 ) = ZERO + END IF + END IF +* +* Check for significant underflow in ALPHAR +* + IF( ABS( SALFAR ).LT.SAFMIN .AND. ABSAR.GE. + $ MAX( SAFMIN, EPS*ABSAI, EPS*ABSB ) ) THEN + ILIMIT = .TRUE. + SCALE = MAX( SCALE, ( ONEPLS*SAFMIN / ANRM1 ) / + $ MAX( ONEPLS*SAFMIN, ANRM2*ABSAR ) ) + END IF +* +* Check for significant underflow in BETA +* + IF( ABS( SBETA ).LT.SAFMIN .AND. ABSB.GE. + $ MAX( SAFMIN, EPS*ABSAR, EPS*ABSAI ) ) THEN + ILIMIT = .TRUE. + SCALE = MAX( SCALE, ( ONEPLS*SAFMIN / BNRM1 ) / + $ MAX( ONEPLS*SAFMIN, BNRM2*ABSB ) ) + END IF +* +* Check for possible overflow when limiting scaling +* + IF( ILIMIT ) THEN + TEMP = ( SCALE*SAFMIN )*MAX( ABS( SALFAR ), ABS( SALFAI ), + $ ABS( SBETA ) ) + IF( TEMP.GT.ONE ) + $ SCALE = SCALE / TEMP + IF( SCALE.LT.ONE ) + $ ILIMIT = .FALSE. + END IF +* +* Recompute un-scaled ALPHAR, ALPHAI, BETA if necessary. +* + IF( ILIMIT ) THEN + SALFAR = ( SCALE*ALPHAR( JC ) )*ANRM + SALFAI = ( SCALE*ALPHAI( JC ) )*ANRM + SBETA = ( SCALE*BETA( JC ) )*BNRM + END IF + ALPHAR( JC ) = SALFAR + ALPHAI( JC ) = SALFAI + BETA( JC ) = SBETA + 110 CONTINUE +* + 120 CONTINUE + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of DGEGV +* + END diff --git a/costa/native/external/lapack/dgehd2.f b/costa/native/external/lapack/dgehd2.f new file mode 100644 index 000000000..b2345717c --- /dev/null +++ b/costa/native/external/lapack/dgehd2.f @@ -0,0 +1,150 @@ + SUBROUTINE DGEHD2( N, ILO, IHI, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + INTEGER IHI, ILO, INFO, LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DGEHD2 reduces a real general matrix A to upper Hessenberg form H by +* an orthogonal similarity transformation: Q' * A * Q = H . +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* It is assumed that A is already upper triangular in rows +* and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally +* set by a previous call to DGEBAL; otherwise they should be +* set to 1 and N respectively. See Further Details. +* 1 <= ILO <= IHI <= max(1,N). +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the n by n general matrix to be reduced. +* On exit, the upper triangle and the first subdiagonal of A +* are overwritten with the upper Hessenberg matrix H, and the +* elements below the first subdiagonal, with the array TAU, +* represent the orthogonal matrix Q as a product of elementary +* reflectors. See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* TAU (output) DOUBLE PRECISION array, dimension (N-1) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of (ihi-ilo) elementary +* reflectors +* +* Q = H(ilo) H(ilo+1) . . . H(ihi-1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on +* exit in A(i+2:ihi,i), and tau in TAU(i). +* +* The contents of A are illustrated by the following example, with +* n = 7, ilo = 2 and ihi = 6: +* +* on entry, on exit, +* +* ( a a a a a a a ) ( a a h h h h a ) +* ( a a a a a a ) ( a h h h h a ) +* ( a a a a a a ) ( h h h h h h ) +* ( a a a a a a ) ( v2 h h h h h ) +* ( a a a a a a ) ( v2 v3 h h h h ) +* ( a a a a a a ) ( v2 v3 v4 h h h ) +* ( a ) ( a ) +* +* where a denotes an element of the original matrix A, h denotes a +* modified element of the upper Hessenberg matrix H, and vi denotes an +* element of the vector defining H(i). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I + DOUBLE PRECISION AII +* .. +* .. External Subroutines .. + EXTERNAL DLARF, DLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -2 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEHD2', -INFO ) + RETURN + END IF +* + DO 10 I = ILO, IHI - 1 +* +* Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) +* + CALL DLARFG( IHI-I, A( I+1, I ), A( MIN( I+2, N ), I ), 1, + $ TAU( I ) ) + AII = A( I+1, I ) + A( I+1, I ) = ONE +* +* Apply H(i) to A(1:ihi,i+1:ihi) from the right +* + CALL DLARF( 'Right', IHI, IHI-I, A( I+1, I ), 1, TAU( I ), + $ A( 1, I+1 ), LDA, WORK ) +* +* Apply H(i) to A(i+1:ihi,i+1:n) from the left +* + CALL DLARF( 'Left', IHI-I, N-I, A( I+1, I ), 1, TAU( I ), + $ A( I+1, I+1 ), LDA, WORK ) +* + A( I+1, I ) = AII + 10 CONTINUE +* + RETURN +* +* End of DGEHD2 +* + END diff --git a/costa/native/external/lapack/dgehrd.f b/costa/native/external/lapack/dgehrd.f new file mode 100644 index 000000000..e9ffcfad2 --- /dev/null +++ b/costa/native/external/lapack/dgehrd.f @@ -0,0 +1,255 @@ + SUBROUTINE DGEHRD( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER IHI, ILO, INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DGEHRD reduces a real general matrix A to upper Hessenberg form H by +* an orthogonal similarity transformation: Q' * A * Q = H . +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* It is assumed that A is already upper triangular in rows +* and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally +* set by a previous call to DGEBAL; otherwise they should be +* set to 1 and N respectively. See Further Details. +* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the N-by-N general matrix to be reduced. +* On exit, the upper triangle and the first subdiagonal of A +* are overwritten with the upper Hessenberg matrix H, and the +* elements below the first subdiagonal, with the array TAU, +* represent the orthogonal matrix Q as a product of elementary +* reflectors. See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* TAU (output) DOUBLE PRECISION array, dimension (N-1) +* The scalar factors of the elementary reflectors (see Further +* Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to +* zero. +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of the array WORK. LWORK >= max(1,N). +* For optimum performance LWORK >= N*NB, where NB is the +* optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of (ihi-ilo) elementary +* reflectors +* +* Q = H(ilo) H(ilo+1) . . . H(ihi-1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on +* exit in A(i+2:ihi,i), and tau in TAU(i). +* +* The contents of A are illustrated by the following example, with +* n = 7, ilo = 2 and ihi = 6: +* +* on entry, on exit, +* +* ( a a a a a a a ) ( a a h h h h a ) +* ( a a a a a a ) ( a h h h h a ) +* ( a a a a a a ) ( h h h h h h ) +* ( a a a a a a ) ( v2 h h h h h ) +* ( a a a a a a ) ( v2 v3 h h h h ) +* ( a a a a a a ) ( v2 v3 v4 h h h ) +* ( a ) ( a ) +* +* where a denotes an element of the original matrix A, h denotes a +* modified element of the upper Hessenberg matrix H, and vi denotes an +* element of the vector defining H(i). +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT + PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, LDWORK, LWKOPT, NB, NBMIN, + $ NH, NX + DOUBLE PRECISION EI +* .. +* .. Local Arrays .. + DOUBLE PRECISION T( LDT, NBMAX ) +* .. +* .. External Subroutines .. + EXTERNAL DGEHD2, DGEMM, DLAHRD, DLARFB, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NB = MIN( NBMAX, ILAENV( 1, 'DGEHRD', ' ', N, ILO, IHI, -1 ) ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -2 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEHRD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Set elements 1:ILO-1 and IHI:N-1 of TAU to zero +* + DO 10 I = 1, ILO - 1 + TAU( I ) = ZERO + 10 CONTINUE + DO 20 I = MAX( 1, IHI ), N - 1 + TAU( I ) = ZERO + 20 CONTINUE +* +* Quick return if possible +* + NH = IHI - ILO + 1 + IF( NH.LE.1 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Determine the block size. +* + NB = MIN( NBMAX, ILAENV( 1, 'DGEHRD', ' ', N, ILO, IHI, -1 ) ) + NBMIN = 2 + IWS = 1 + IF( NB.GT.1 .AND. NB.LT.NH ) THEN +* +* Determine when to cross over from blocked to unblocked code +* (last block is always handled by unblocked code). +* + NX = MAX( NB, ILAENV( 3, 'DGEHRD', ' ', N, ILO, IHI, -1 ) ) + IF( NX.LT.NH ) THEN +* +* Determine if workspace is large enough for blocked code. +* + IWS = N*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: determine the +* minimum value of NB, and reduce NB or force use of +* unblocked code. +* + NBMIN = MAX( 2, ILAENV( 2, 'DGEHRD', ' ', N, ILO, IHI, + $ -1 ) ) + IF( LWORK.GE.N*NBMIN ) THEN + NB = LWORK / N + ELSE + NB = 1 + END IF + END IF + END IF + END IF + LDWORK = N +* + IF( NB.LT.NBMIN .OR. NB.GE.NH ) THEN +* +* Use unblocked code below +* + I = ILO +* + ELSE +* +* Use blocked code +* + DO 30 I = ILO, IHI - 1 - NX, NB + IB = MIN( NB, IHI-I ) +* +* Reduce columns i:i+ib-1 to Hessenberg form, returning the +* matrices V and T of the block reflector H = I - V*T*V' +* which performs the reduction, and also the matrix Y = A*V*T +* + CALL DLAHRD( IHI, I, IB, A( 1, I ), LDA, TAU( I ), T, LDT, + $ WORK, LDWORK ) +* +* Apply the block reflector H to A(1:ihi,i+ib:ihi) from the +* right, computing A := A - Y * V'. V(i+ib,ib-1) must be set +* to 1. +* + EI = A( I+IB, I+IB-1 ) + A( I+IB, I+IB-1 ) = ONE + CALL DGEMM( 'No transpose', 'Transpose', IHI, IHI-I-IB+1, + $ IB, -ONE, WORK, LDWORK, A( I+IB, I ), LDA, ONE, + $ A( 1, I+IB ), LDA ) + A( I+IB, I+IB-1 ) = EI +* +* Apply the block reflector H to A(i+1:ihi,i+ib:n) from the +* left +* + CALL DLARFB( 'Left', 'Transpose', 'Forward', 'Columnwise', + $ IHI-I, N-I-IB+1, IB, A( I+1, I ), LDA, T, LDT, + $ A( I+1, I+IB ), LDA, WORK, LDWORK ) + 30 CONTINUE + END IF +* +* Use unblocked code to reduce the rest of the matrix +* + CALL DGEHD2( N, I, IHI, A, LDA, TAU, WORK, IINFO ) + WORK( 1 ) = IWS +* + RETURN +* +* End of DGEHRD +* + END diff --git a/costa/native/external/lapack/dgelq2.f b/costa/native/external/lapack/dgelq2.f new file mode 100644 index 000000000..a12048779 --- /dev/null +++ b/costa/native/external/lapack/dgelq2.f @@ -0,0 +1,122 @@ + SUBROUTINE DGELQ2( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DGELQ2 computes an LQ factorization of a real m by n matrix A: +* A = L * Q. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the m by n matrix A. +* On exit, the elements on and below the diagonal of the array +* contain the m by min(m,n) lower trapezoidal matrix L (L is +* lower triangular if m <= n); the elements above the diagonal, +* with the array TAU, represent the orthogonal matrix Q as a +* product of elementary reflectors (see Further Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (M) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(k) . . . H(2) H(1), where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), +* and tau in TAU(i). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, K + DOUBLE PRECISION AII +* .. +* .. External Subroutines .. + EXTERNAL DLARF, DLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGELQ2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = 1, K +* +* Generate elementary reflector H(i) to annihilate A(i,i+1:n) +* + CALL DLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA, + $ TAU( I ) ) + IF( I.LT.M ) THEN +* +* Apply H(i) to A(i+1:m,i:n) from the right +* + AII = A( I, I ) + A( I, I ) = ONE + CALL DLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, TAU( I ), + $ A( I+1, I ), LDA, WORK ) + A( I, I ) = AII + END IF + 10 CONTINUE + RETURN +* +* End of DGELQ2 +* + END diff --git a/costa/native/external/lapack/dgelqf.f b/costa/native/external/lapack/dgelqf.f new file mode 100644 index 000000000..13d2d2f9f --- /dev/null +++ b/costa/native/external/lapack/dgelqf.f @@ -0,0 +1,196 @@ + SUBROUTINE DGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DGELQF computes an LQ factorization of a real M-by-N matrix A: +* A = L * Q. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, the elements on and below the diagonal of the array +* contain the m-by-min(m,n) lower trapezoidal matrix L (L is +* lower triangular if m <= n); the elements above the diagonal, +* with the array TAU, represent the orthogonal matrix Q as a +* product of elementary reflectors (see Further Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,M). +* For optimum performance LWORK >= M*NB, where NB is the +* optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(k) . . . H(2) H(1), where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), +* and tau in TAU(i). +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL DGELQ2, DLARFB, DLARFT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 ) + LWKOPT = M*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGELQF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = M + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'DGELQF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DGELQF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially +* + DO 10 I = 1, K - NX, NB + IB = MIN( K-I+1, NB ) +* +* Compute the LQ factorization of the current block +* A(i:i+ib-1,i:n) +* + CALL DGELQ2( IB, N-I+1, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) + IF( I+IB.LE.M ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL DLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, I ), + $ LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(i+ib:m,i:n) from the right +* + CALL DLARFB( 'Right', 'No transpose', 'Forward', + $ 'Rowwise', M-I-IB+1, N-I+1, IB, A( I, I ), + $ LDA, WORK, LDWORK, A( I+IB, I ), LDA, + $ WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + ELSE + I = 1 + END IF +* +* Use unblocked code to factor the last or only block. +* + IF( I.LE.K ) + $ CALL DGELQ2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of DGELQF +* + END diff --git a/costa/native/external/lapack/dgels.f b/costa/native/external/lapack/dgels.f new file mode 100644 index 000000000..a7d6519fa --- /dev/null +++ b/costa/native/external/lapack/dgels.f @@ -0,0 +1,403 @@ + SUBROUTINE DGELS( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DGELS solves overdetermined or underdetermined real linear systems +* involving an M-by-N matrix A, or its transpose, using a QR or LQ +* factorization of A. It is assumed that A has full rank. +* +* The following options are provided: +* +* 1. If TRANS = 'N' and m >= n: find the least squares solution of +* an overdetermined system, i.e., solve the least squares problem +* minimize || B - A*X ||. +* +* 2. If TRANS = 'N' and m < n: find the minimum norm solution of +* an underdetermined system A * X = B. +* +* 3. If TRANS = 'T' and m >= n: find the minimum norm solution of +* an undetermined system A**T * X = B. +* +* 4. If TRANS = 'T' and m < n: find the least squares solution of +* an overdetermined system, i.e., solve the least squares problem +* minimize || B - A**T * X ||. +* +* Several right hand side vectors b and solution vectors x can be +* handled in a single call; they are stored as the columns of the +* M-by-NRHS right hand side matrix B and the N-by-NRHS solution +* matrix X. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER +* = 'N': the linear system involves A; +* = 'T': the linear system involves A**T. +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of +* columns of the matrices B and X. NRHS >=0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, +* if M >= N, A is overwritten by details of its QR +* factorization as returned by DGEQRF; +* if M < N, A is overwritten by details of its LQ +* factorization as returned by DGELQF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) +* On entry, the matrix B of right hand side vectors, stored +* columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS +* if TRANS = 'T'. +* On exit, B is overwritten by the solution vectors, stored +* columnwise: +* if TRANS = 'N' and m >= n, rows 1 to n of B contain the least +* squares solution vectors; the residual sum of squares for the +* solution in each column is given by the sum of squares of +* elements N+1 to M in that column; +* if TRANS = 'N' and m < n, rows 1 to N of B contain the +* minimum norm solution vectors; +* if TRANS = 'T' and m >= n, rows 1 to M of B contain the +* minimum norm solution vectors; +* if TRANS = 'T' and m < n, rows 1 to M of B contain the +* least squares solution vectors; the residual sum of squares +* for the solution in each column is given by the sum of +* squares of elements M+1 to N in that column. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= MAX(1,M,N). +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* LWORK >= max( 1, MN + max( MN, NRHS ) ). +* For optimal performance, +* LWORK >= max( 1, MN + max( MN, NRHS )*NB ). +* where MN = min(M,N) and NB is the optimum block size. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, TPSD + INTEGER BROW, I, IASCL, IBSCL, J, MN, NB, SCLLEN, WSIZE + DOUBLE PRECISION ANRM, BIGNUM, BNRM, SMLNUM +* .. +* .. Local Arrays .. + DOUBLE PRECISION RWORK( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, DLANGE +* .. +* .. External Subroutines .. + EXTERNAL DGELQF, DGEQRF, DLASCL, DLASET, DORMLQ, DORMQR, + $ DTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + MN = MIN( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.( LSAME( TRANS, 'N' ) .OR. LSAME( TRANS, 'T' ) ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, MN+MAX( MN, NRHS ) ) .AND. .NOT.LQUERY ) + $ THEN + INFO = -10 + END IF +* +* Figure out optimal block size +* + IF( INFO.EQ.0 .OR. INFO.EQ.-10 ) THEN +* + TPSD = .TRUE. + IF( LSAME( TRANS, 'N' ) ) + $ TPSD = .FALSE. +* + IF( M.GE.N ) THEN + NB = ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 ) + IF( TPSD ) THEN + NB = MAX( NB, ILAENV( 1, 'DORMQR', 'LN', M, NRHS, N, + $ -1 ) ) + ELSE + NB = MAX( NB, ILAENV( 1, 'DORMQR', 'LT', M, NRHS, N, + $ -1 ) ) + END IF + ELSE + NB = ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 ) + IF( TPSD ) THEN + NB = MAX( NB, ILAENV( 1, 'DORMLQ', 'LT', N, NRHS, M, + $ -1 ) ) + ELSE + NB = MAX( NB, ILAENV( 1, 'DORMLQ', 'LN', N, NRHS, M, + $ -1 ) ) + END IF + END IF +* + WSIZE = MAX( 1, MN+MAX( MN, NRHS )*NB ) + WORK( 1 ) = DBLE( WSIZE ) +* + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGELS ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, NRHS ).EQ.0 ) THEN + CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + RETURN + END IF +* +* Get machine parameters +* + SMLNUM = DLAMCH( 'S' ) / DLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max element outside range [SMLNUM,BIGNUM] +* + ANRM = DLANGE( 'M', M, N, A, LDA, RWORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL DLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + GO TO 50 + END IF +* + BROW = M + IF( TPSD ) + $ BROW = N + BNRM = DLANGE( 'M', BROW, NRHS, B, LDB, RWORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL DLASCL( 'G', 0, 0, BNRM, SMLNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL DLASCL( 'G', 0, 0, BNRM, BIGNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 2 + END IF +* + IF( M.GE.N ) THEN +* +* compute QR factorization of A +* + CALL DGEQRF( M, N, A, LDA, WORK( 1 ), WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least N, optimally N*NB +* + IF( .NOT.TPSD ) THEN +* +* Least-Squares Problem min || A * X - B || +* +* B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS) +* + CALL DORMQR( 'Left', 'Transpose', M, NRHS, N, A, LDA, + $ WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least NRHS, optimally NRHS*NB +* +* B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) +* + CALL DTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N, + $ NRHS, ONE, A, LDA, B, LDB ) +* + SCLLEN = N +* + ELSE +* +* Overdetermined system of equations A' * X = B +* +* B(1:N,1:NRHS) := inv(R') * B(1:N,1:NRHS) +* + CALL DTRSM( 'Left', 'Upper', 'Transpose', 'Non-unit', N, + $ NRHS, ONE, A, LDA, B, LDB ) +* +* B(N+1:M,1:NRHS) = ZERO +* + DO 20 J = 1, NRHS + DO 10 I = N + 1, M + B( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS) +* + CALL DORMQR( 'Left', 'No transpose', M, NRHS, N, A, LDA, + $ WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least NRHS, optimally NRHS*NB +* + SCLLEN = M +* + END IF +* + ELSE +* +* Compute LQ factorization of A +* + CALL DGELQF( M, N, A, LDA, WORK( 1 ), WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least M, optimally M*NB. +* + IF( .NOT.TPSD ) THEN +* +* underdetermined system of equations A * X = B +* +* B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) +* + CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Non-unit', M, + $ NRHS, ONE, A, LDA, B, LDB ) +* +* B(M+1:N,1:NRHS) = 0 +* + DO 40 J = 1, NRHS + DO 30 I = M + 1, N + B( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE +* +* B(1:N,1:NRHS) := Q(1:N,:)' * B(1:M,1:NRHS) +* + CALL DORMLQ( 'Left', 'Transpose', N, NRHS, M, A, LDA, + $ WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least NRHS, optimally NRHS*NB +* + SCLLEN = N +* + ELSE +* +* overdetermined system min || A' * X - B || +* +* B(1:N,1:NRHS) := Q * B(1:N,1:NRHS) +* + CALL DORMLQ( 'Left', 'No transpose', N, NRHS, M, A, LDA, + $ WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least NRHS, optimally NRHS*NB +* +* B(1:M,1:NRHS) := inv(L') * B(1:M,1:NRHS) +* + CALL DTRSM( 'Left', 'Lower', 'Transpose', 'Non-unit', M, + $ NRHS, ONE, A, LDA, B, LDB ) +* + SCLLEN = M +* + END IF +* + END IF +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL DLASCL( 'G', 0, 0, SMLNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL DLASCL( 'G', 0, 0, BIGNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF +* + 50 CONTINUE + WORK( 1 ) = DBLE( WSIZE ) +* + RETURN +* +* End of DGELS +* + END diff --git a/costa/native/external/lapack/dgelsd.f b/costa/native/external/lapack/dgelsd.f new file mode 100644 index 000000000..0d4de0fdd --- /dev/null +++ b/costa/native/external/lapack/dgelsd.f @@ -0,0 +1,529 @@ + SUBROUTINE DGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK, + $ WORK, LWORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), S( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DGELSD computes the minimum-norm solution to a real linear least +* squares problem: +* minimize 2-norm(| b - A*x |) +* using the singular value decomposition (SVD) of A. A is an M-by-N +* matrix which may be rank-deficient. +* +* Several right hand side vectors b and solution vectors x can be +* handled in a single call; they are stored as the columns of the +* M-by-NRHS right hand side matrix B and the N-by-NRHS solution +* matrix X. +* +* The problem is solved in three steps: +* (1) Reduce the coefficient matrix A to bidiagonal form with +* Householder transformations, reducing the original problem +* into a "bidiagonal least squares problem" (BLS) +* (2) Solve the BLS using a divide and conquer approach. +* (3) Apply back all the Householder tranformations to solve +* the original least squares problem. +* +* The effective rank of A is determined by treating as zero those +* singular values which are less than RCOND times the largest singular +* value. +* +* The divide and conquer algorithm makes very mild assumptions about +* floating point arithmetic. It will work on machines with a guard +* digit in add/subtract, or on those binary machines without guard +* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +* Cray-2. It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of A. M >= 0. +* +* N (input) INTEGER +* The number of columns of A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, A has been destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) +* On entry, the M-by-NRHS right hand side matrix B. +* On exit, B is overwritten by the N-by-NRHS solution +* matrix X. If m >= n and RANK = n, the residual +* sum-of-squares for the solution in the i-th column is given +* by the sum of squares of elements n+1:m in that column. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,max(M,N)). +* +* S (output) DOUBLE PRECISION array, dimension (min(M,N)) +* The singular values of A in decreasing order. +* The condition number of A in the 2-norm = S(1)/S(min(m,n)). +* +* RCOND (input) DOUBLE PRECISION +* RCOND is used to determine the effective rank of A. +* Singular values S(i) <= RCOND*S(1) are treated as zero. +* If RCOND < 0, machine precision is used instead. +* +* RANK (output) INTEGER +* The effective rank of A, i.e., the number of singular values +* which are greater than RCOND*S(1). +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK must be at least 1. +* The exact minimum amount of workspace needed depends on M, +* N and NRHS. As long as LWORK is at least +* 12*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2, +* if M is greater than or equal to N or +* 12*M + 2*M*SMLSIZ + 8*M*NLVL + M*NRHS + (SMLSIZ+1)**2, +* if M is less than N, the code will execute correctly. +* SMLSIZ is returned by ILAENV and is equal to the maximum +* size of the subproblems at the bottom of the computation +* tree (usually about 25), and +* NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 ) +* For good performance, LWORK should generally be larger. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace) INTEGER array, dimension (LIWORK) +* LIWORK >= 3 * MINMN * NLVL + 11 * MINMN, +* where MINMN = MIN( M,N ). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: the algorithm for computing the SVD failed to converge; +* if INFO = i, i off-diagonal elements of an intermediate +* bidiagonal form did not converge to zero. +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Ren-Cang Li, Computer Science Division, University of +* California at Berkeley, USA +* Osni Marques, LBNL/NERSC, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER IASCL, IBSCL, IE, IL, ITAU, ITAUP, ITAUQ, + $ LDWORK, MAXMN, MAXWRK, MINMN, MINWRK, MM, + $ MNTHR, NLVL, NWORK, SMLSIZ, WLALSD + DOUBLE PRECISION ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM +* .. +* .. External Subroutines .. + EXTERNAL DGEBRD, DGELQF, DGEQRF, DLABAD, DLACPY, DLALSD, + $ DLASCL, DLASET, DORMBR, DORMLQ, DORMQR, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL ILAENV, DLAMCH, DLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, INT, LOG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + MINMN = MIN( M, N ) + MAXMN = MAX( M, N ) + MNTHR = ILAENV( 6, 'DGELSD', ' ', M, N, NRHS, -1 ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN + INFO = -7 + END IF +* + SMLSIZ = ILAENV( 9, 'DGELSD', ' ', 0, 0, 0, 0 ) +* +* Compute workspace. +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + MINWRK = 1 + MINMN = MAX( 1, MINMN ) + NLVL = MAX( INT( LOG( DBLE( MINMN ) / DBLE( SMLSIZ+1 ) ) / + $ LOG( TWO ) ) + 1, 0 ) +* + IF( INFO.EQ.0 ) THEN + MAXWRK = 0 + MM = M + IF( M.GE.N .AND. M.GE.MNTHR ) THEN +* +* Path 1a - overdetermined, with many more rows than columns. +* + MM = N + MAXWRK = MAX( MAXWRK, N+N*ILAENV( 1, 'DGEQRF', ' ', M, N, + $ -1, -1 ) ) + MAXWRK = MAX( MAXWRK, N+NRHS* + $ ILAENV( 1, 'DORMQR', 'LT', M, NRHS, N, -1 ) ) + END IF + IF( M.GE.N ) THEN +* +* Path 1 - overdetermined or exactly determined. +* + MAXWRK = MAX( MAXWRK, 3*N+( MM+N )* + $ ILAENV( 1, 'DGEBRD', ' ', MM, N, -1, -1 ) ) + MAXWRK = MAX( MAXWRK, 3*N+NRHS* + $ ILAENV( 1, 'DORMBR', 'QLT', MM, NRHS, N, -1 ) ) + MAXWRK = MAX( MAXWRK, 3*N+( N-1 )* + $ ILAENV( 1, 'DORMBR', 'PLN', N, NRHS, N, -1 ) ) + WLALSD = 9*N+2*N*SMLSIZ+8*N*NLVL+N*NRHS+(SMLSIZ+1)**2 + MAXWRK = MAX( MAXWRK, 3*N+WLALSD ) + MINWRK = MAX( 3*N+MM, 3*N+NRHS, 3*N+WLALSD ) + END IF + IF( N.GT.M ) THEN + WLALSD = 9*M+2*M*SMLSIZ+8*M*NLVL+M*NRHS+(SMLSIZ+1)**2 + IF( N.GE.MNTHR ) THEN +* +* Path 2a - underdetermined, with many more columns +* than rows. +* + MAXWRK = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 ) + MAXWRK = MAX( MAXWRK, M*M+4*M+2*M* + $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) ) + MAXWRK = MAX( MAXWRK, M*M+4*M+NRHS* + $ ILAENV( 1, 'DORMBR', 'QLT', M, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, M*M+4*M+( M-1 )* + $ ILAENV( 1, 'DORMBR', 'PLN', M, NRHS, M, -1 ) ) + IF( NRHS.GT.1 ) THEN + MAXWRK = MAX( MAXWRK, M*M+M+M*NRHS ) + ELSE + MAXWRK = MAX( MAXWRK, M*M+2*M ) + END IF + MAXWRK = MAX( MAXWRK, M+NRHS* + $ ILAENV( 1, 'DORMLQ', 'LT', N, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, M*M+4*M+WLALSD ) + ELSE +* +* Path 2 - remaining underdetermined cases. +* + MAXWRK = 3*M + ( N+M )*ILAENV( 1, 'DGEBRD', ' ', M, N, + $ -1, -1 ) + MAXWRK = MAX( MAXWRK, 3*M+NRHS* + $ ILAENV( 1, 'DORMBR', 'QLT', M, NRHS, N, -1 ) ) + MAXWRK = MAX( MAXWRK, 3*M+M* + $ ILAENV( 1, 'DORMBR', 'PLN', N, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, 3*M+WLALSD ) + END IF + MINWRK = MAX( 3*M+NRHS, 3*M+M, 3*M+WLALSD ) + END IF + MINWRK = MIN( MINWRK, MAXWRK ) + WORK( 1 ) = MAXWRK + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGELSD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + GO TO 10 + END IF +* +* Quick return if possible. +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RANK = 0 + RETURN + END IF +* +* Get machine parameters. +* + EPS = DLAMCH( 'P' ) + SFMIN = DLAMCH( 'S' ) + SMLNUM = SFMIN / EPS + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Scale A if max entry outside range [SMLNUM,BIGNUM]. +* + ANRM = DLANGE( 'M', M, N, A, LDA, WORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM. +* + CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM. +* + CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL DLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + CALL DLASET( 'F', MINMN, 1, ZERO, ZERO, S, 1 ) + RANK = 0 + GO TO 10 + END IF +* +* Scale B if max entry outside range [SMLNUM,BIGNUM]. +* + BNRM = DLANGE( 'M', M, NRHS, B, LDB, WORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM. +* + CALL DLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM. +* + CALL DLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 2 + END IF +* +* If M < N make sure certain entries of B are zero. +* + IF( M.LT.N ) + $ CALL DLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB ) +* +* Overdetermined case. +* + IF( M.GE.N ) THEN +* +* Path 1 - overdetermined or exactly determined. +* + MM = M + IF( M.GE.MNTHR ) THEN +* +* Path 1a - overdetermined, with many more rows than columns. +* + MM = N + ITAU = 1 + NWORK = ITAU + N +* +* Compute A=Q*R. +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) +* +* Multiply B by transpose(Q). +* (Workspace: need N+NRHS, prefer N+NRHS*NB) +* + CALL DORMQR( 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAU ), B, + $ LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* +* Zero out below R. +* + IF( N.GT.1 ) THEN + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA ) + END IF + END IF +* + IE = 1 + ITAUQ = IE + N + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize R in A. +* (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB) +* + CALL DGEBRD( MM, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors of R. +* (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB) +* + CALL DORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, WORK( ITAUQ ), + $ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* +* Solve the bidiagonal least squares problem. +* + CALL DLALSD( 'U', SMLSIZ, N, NRHS, S, WORK( IE ), B, LDB, + $ RCOND, RANK, WORK( NWORK ), IWORK, INFO ) + IF( INFO.NE.0 ) THEN + GO TO 10 + END IF +* +* Multiply B by right bidiagonalizing vectors of R. +* + CALL DORMBR( 'P', 'L', 'N', N, NRHS, N, A, LDA, WORK( ITAUP ), + $ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* + ELSE IF( N.GE.MNTHR .AND. LWORK.GE.4*M+M*M+ + $ MAX( M, 2*M-4, NRHS, N-3*M ) ) THEN +* +* Path 2a - underdetermined, with many more columns than rows +* and sufficient workspace for an efficient algorithm. +* + LDWORK = M + IF( LWORK.GE.MAX( 4*M+M*LDA+MAX( M, 2*M-4, NRHS, N-3*M ), + $ M*LDA+M+M*NRHS ) )LDWORK = LDA + ITAU = 1 + NWORK = M + 1 +* +* Compute A=L*Q. +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) + IL = NWORK +* +* Copy L to WORK(IL), zeroing out above its diagonal. +* + CALL DLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK ) + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, WORK( IL+LDWORK ), + $ LDWORK ) + IE = IL + LDWORK*M + ITAUQ = IE + M + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IL). +* (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB) +* + CALL DGEBRD( M, M, WORK( IL ), LDWORK, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors of L. +* (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) +* + CALL DORMBR( 'Q', 'L', 'T', M, NRHS, M, WORK( IL ), LDWORK, + $ WORK( ITAUQ ), B, LDB, WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) +* +* Solve the bidiagonal least squares problem. +* + CALL DLALSD( 'U', SMLSIZ, M, NRHS, S, WORK( IE ), B, LDB, + $ RCOND, RANK, WORK( NWORK ), IWORK, INFO ) + IF( INFO.NE.0 ) THEN + GO TO 10 + END IF +* +* Multiply B by right bidiagonalizing vectors of L. +* + CALL DORMBR( 'P', 'L', 'N', M, NRHS, M, WORK( IL ), LDWORK, + $ WORK( ITAUP ), B, LDB, WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) +* +* Zero out below first M rows of B. +* + CALL DLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB ) + NWORK = ITAU + M +* +* Multiply transpose(Q) by B. +* (Workspace: need M+NRHS, prefer M+NRHS*NB) +* + CALL DORMLQ( 'L', 'T', N, NRHS, M, A, LDA, WORK( ITAU ), B, + $ LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* + ELSE +* +* Path 2 - remaining underdetermined cases. +* + IE = 1 + ITAUQ = IE + M + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize A. +* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) +* + CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors. +* (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB) +* + CALL DORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAUQ ), + $ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* +* Solve the bidiagonal least squares problem. +* + CALL DLALSD( 'L', SMLSIZ, M, NRHS, S, WORK( IE ), B, LDB, + $ RCOND, RANK, WORK( NWORK ), IWORK, INFO ) + IF( INFO.NE.0 ) THEN + GO TO 10 + END IF +* +* Multiply B by right bidiagonalizing vectors of A. +* + CALL DORMBR( 'P', 'L', 'N', N, NRHS, M, A, LDA, WORK( ITAUP ), + $ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* + END IF +* +* Undo scaling. +* + IF( IASCL.EQ.1 ) THEN + CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO ) + CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO ) + CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL DLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL DLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) + END IF +* + 10 CONTINUE + WORK( 1 ) = MAXWRK + RETURN +* +* End of DGELSD +* + END diff --git a/costa/native/external/lapack/dgelss.f b/costa/native/external/lapack/dgelss.f new file mode 100644 index 000000000..a437767db --- /dev/null +++ b/costa/native/external/lapack/dgelss.f @@ -0,0 +1,613 @@ + SUBROUTINE DGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK, + $ WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), S( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DGELSS computes the minimum norm solution to a real linear least +* squares problem: +* +* Minimize 2-norm(| b - A*x |). +* +* using the singular value decomposition (SVD) of A. A is an M-by-N +* matrix which may be rank-deficient. +* +* Several right hand side vectors b and solution vectors x can be +* handled in a single call; they are stored as the columns of the +* M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix +* X. +* +* The effective rank of A is determined by treating as zero those +* singular values which are less than RCOND times the largest singular +* value. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, the first min(m,n) rows of A are overwritten with +* its right singular vectors, stored rowwise. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) +* On entry, the M-by-NRHS right hand side matrix B. +* On exit, B is overwritten by the N-by-NRHS solution +* matrix X. If m >= n and RANK = n, the residual +* sum-of-squares for the solution in the i-th column is given +* by the sum of squares of elements n+1:m in that column. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,max(M,N)). +* +* S (output) DOUBLE PRECISION array, dimension (min(M,N)) +* The singular values of A in decreasing order. +* The condition number of A in the 2-norm = S(1)/S(min(m,n)). +* +* RCOND (input) DOUBLE PRECISION +* RCOND is used to determine the effective rank of A. +* Singular values S(i) <= RCOND*S(1) are treated as zero. +* If RCOND < 0, machine precision is used instead. +* +* RANK (output) INTEGER +* The effective rank of A, i.e., the number of singular values +* which are greater than RCOND*S(1). +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= 1, and also: +* LWORK >= 3*min(M,N) + max( 2*min(M,N), max(M,N), NRHS ) +* For good performance, LWORK should generally be larger. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: the algorithm for computing the SVD failed to converge; +* if INFO = i, i off-diagonal elements of an intermediate +* bidiagonal form did not converge to zero. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER BDSPAC, BL, CHUNK, I, IASCL, IBSCL, IE, IL, + $ ITAU, ITAUP, ITAUQ, IWORK, LDWORK, MAXMN, + $ MAXWRK, MINMN, MINWRK, MM, MNTHR + DOUBLE PRECISION ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM, THR +* .. +* .. Local Arrays .. + DOUBLE PRECISION VDUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DBDSQR, DCOPY, DGEBRD, DGELQF, DGEMM, DGEMV, + $ DGEQRF, DLABAD, DLACPY, DLASCL, DLASET, DORGBR, + $ DORMBR, DORMLQ, DORMQR, DRSCL, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL ILAENV, DLAMCH, DLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + MINMN = MIN( M, N ) + MAXMN = MAX( M, N ) + MNTHR = ILAENV( 6, 'DGELSS', ' ', M, N, NRHS, -1 ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN + INFO = -7 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + MINWRK = 1 + IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN + MAXWRK = 0 + MM = M + IF( M.GE.N .AND. M.GE.MNTHR ) THEN +* +* Path 1a - overdetermined, with many more rows than columns +* + MM = N + MAXWRK = MAX( MAXWRK, N+N*ILAENV( 1, 'DGEQRF', ' ', M, N, + $ -1, -1 ) ) + MAXWRK = MAX( MAXWRK, N+NRHS* + $ ILAENV( 1, 'DORMQR', 'LT', M, NRHS, N, -1 ) ) + END IF + IF( M.GE.N ) THEN +* +* Path 1 - overdetermined or exactly determined +* +* Compute workspace needed for DBDSQR +* + BDSPAC = MAX( 1, 5*N ) + MAXWRK = MAX( MAXWRK, 3*N+( MM+N )* + $ ILAENV( 1, 'DGEBRD', ' ', MM, N, -1, -1 ) ) + MAXWRK = MAX( MAXWRK, 3*N+NRHS* + $ ILAENV( 1, 'DORMBR', 'QLT', MM, NRHS, N, -1 ) ) + MAXWRK = MAX( MAXWRK, 3*N+( N-1 )* + $ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) ) + MAXWRK = MAX( MAXWRK, BDSPAC ) + MAXWRK = MAX( MAXWRK, N*NRHS ) + MINWRK = MAX( 3*N+MM, 3*N+NRHS, BDSPAC ) + MAXWRK = MAX( MINWRK, MAXWRK ) + END IF + IF( N.GT.M ) THEN +* +* Compute workspace needed for DBDSQR +* + BDSPAC = MAX( 1, 5*M ) + MINWRK = MAX( 3*M+NRHS, 3*M+N, BDSPAC ) + IF( N.GE.MNTHR ) THEN +* +* Path 2a - underdetermined, with many more columns +* than rows +* + MAXWRK = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 ) + MAXWRK = MAX( MAXWRK, M*M+4*M+2*M* + $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) ) + MAXWRK = MAX( MAXWRK, M*M+4*M+NRHS* + $ ILAENV( 1, 'DORMBR', 'QLT', M, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, M*M+4*M+( M-1 )* + $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) ) + MAXWRK = MAX( MAXWRK, M*M+M+BDSPAC ) + IF( NRHS.GT.1 ) THEN + MAXWRK = MAX( MAXWRK, M*M+M+M*NRHS ) + ELSE + MAXWRK = MAX( MAXWRK, M*M+2*M ) + END IF + MAXWRK = MAX( MAXWRK, M+NRHS* + $ ILAENV( 1, 'DORMLQ', 'LT', N, NRHS, M, -1 ) ) + ELSE +* +* Path 2 - underdetermined +* + MAXWRK = 3*M + ( N+M )*ILAENV( 1, 'DGEBRD', ' ', M, N, + $ -1, -1 ) + MAXWRK = MAX( MAXWRK, 3*M+NRHS* + $ ILAENV( 1, 'DORMBR', 'QLT', M, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, 3*M+M* + $ ILAENV( 1, 'DORGBR', 'P', M, N, M, -1 ) ) + MAXWRK = MAX( MAXWRK, BDSPAC ) + MAXWRK = MAX( MAXWRK, N*NRHS ) + END IF + END IF + MAXWRK = MAX( MINWRK, MAXWRK ) + WORK( 1 ) = MAXWRK + END IF +* + MINWRK = MAX( MINWRK, 1 ) + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) + $ INFO = -12 + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGELSS', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RANK = 0 + RETURN + END IF +* +* Get machine parameters +* + EPS = DLAMCH( 'P' ) + SFMIN = DLAMCH( 'S' ) + SMLNUM = SFMIN / EPS + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = DLANGE( 'M', M, N, A, LDA, WORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL DLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + CALL DLASET( 'F', MINMN, 1, ZERO, ZERO, S, 1 ) + RANK = 0 + GO TO 70 + END IF +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = DLANGE( 'M', M, NRHS, B, LDB, WORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL DLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL DLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 2 + END IF +* +* Overdetermined case +* + IF( M.GE.N ) THEN +* +* Path 1 - overdetermined or exactly determined +* + MM = M + IF( M.GE.MNTHR ) THEN +* +* Path 1a - overdetermined, with many more rows than columns +* + MM = N + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, INFO ) +* +* Multiply B by transpose(Q) +* (Workspace: need N+NRHS, prefer N+NRHS*NB) +* + CALL DORMQR( 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAU ), B, + $ LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) +* +* Zero out below R +* + IF( N.GT.1 ) + $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA ) + END IF +* + IE = 1 + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in A +* (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB) +* + CALL DGEBRD( MM, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors of R +* (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB) +* + CALL DORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, WORK( ITAUQ ), + $ B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) +* +* Generate right bidiagonalizing vectors of R in A +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, INFO ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration +* multiply B by transpose of left singular vectors +* compute right singular vectors in A +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', N, N, 0, NRHS, S, WORK( IE ), A, LDA, VDUM, + $ 1, B, LDB, WORK( IWORK ), INFO ) + IF( INFO.NE.0 ) + $ GO TO 70 +* +* Multiply B by reciprocals of singular values +* + THR = MAX( RCOND*S( 1 ), SFMIN ) + IF( RCOND.LT.ZERO ) + $ THR = MAX( EPS*S( 1 ), SFMIN ) + RANK = 0 + DO 10 I = 1, N + IF( S( I ).GT.THR ) THEN + CALL DRSCL( NRHS, S( I ), B( I, 1 ), LDB ) + RANK = RANK + 1 + ELSE + CALL DLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB ) + END IF + 10 CONTINUE +* +* Multiply B by right singular vectors +* (Workspace: need N, prefer N*NRHS) +* + IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN + CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, A, LDA, B, LDB, ZERO, + $ WORK, LDB ) + CALL DLACPY( 'G', N, NRHS, WORK, LDB, B, LDB ) + ELSE IF( NRHS.GT.1 ) THEN + CHUNK = LWORK / N + DO 20 I = 1, NRHS, CHUNK + BL = MIN( NRHS-I+1, CHUNK ) + CALL DGEMM( 'T', 'N', N, BL, N, ONE, A, LDA, B( 1, I ), + $ LDB, ZERO, WORK, N ) + CALL DLACPY( 'G', N, BL, WORK, N, B( 1, I ), LDB ) + 20 CONTINUE + ELSE + CALL DGEMV( 'T', N, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 ) + CALL DCOPY( N, WORK, 1, B, 1 ) + END IF +* + ELSE IF( N.GE.MNTHR .AND. LWORK.GE.4*M+M*M+ + $ MAX( M, 2*M-4, NRHS, N-3*M ) ) THEN +* +* Path 2a - underdetermined, with many more columns than rows +* and sufficient workspace for an efficient algorithm +* + LDWORK = M + IF( LWORK.GE.MAX( 4*M+M*LDA+MAX( M, 2*M-4, NRHS, N-3*M ), + $ M*LDA+M+M*NRHS ) )LDWORK = LDA + ITAU = 1 + IWORK = M + 1 +* +* Compute A=L*Q +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, INFO ) + IL = IWORK +* +* Copy L to WORK(IL), zeroing out above it +* + CALL DLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK ) + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, WORK( IL+LDWORK ), + $ LDWORK ) + IE = IL + LDWORK*M + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IL) +* (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB) +* + CALL DGEBRD( M, M, WORK( IL ), LDWORK, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors of L +* (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) +* + CALL DORMBR( 'Q', 'L', 'T', M, NRHS, M, WORK( IL ), LDWORK, + $ WORK( ITAUQ ), B, LDB, WORK( IWORK ), + $ LWORK-IWORK+1, INFO ) +* +* Generate right bidiagonalizing vectors of R in WORK(IL) +* (Workspace: need M*M+5*M-1, prefer M*M+4*M+(M-1)*NB) +* + CALL DORGBR( 'P', M, M, M, WORK( IL ), LDWORK, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, INFO ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, +* computing right singular vectors of L in WORK(IL) and +* multiplying B by transpose of left singular vectors +* (Workspace: need M*M+M+BDSPAC) +* + CALL DBDSQR( 'U', M, M, 0, NRHS, S, WORK( IE ), WORK( IL ), + $ LDWORK, A, LDA, B, LDB, WORK( IWORK ), INFO ) + IF( INFO.NE.0 ) + $ GO TO 70 +* +* Multiply B by reciprocals of singular values +* + THR = MAX( RCOND*S( 1 ), SFMIN ) + IF( RCOND.LT.ZERO ) + $ THR = MAX( EPS*S( 1 ), SFMIN ) + RANK = 0 + DO 30 I = 1, M + IF( S( I ).GT.THR ) THEN + CALL DRSCL( NRHS, S( I ), B( I, 1 ), LDB ) + RANK = RANK + 1 + ELSE + CALL DLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB ) + END IF + 30 CONTINUE + IWORK = IE +* +* Multiply B by right singular vectors of L in WORK(IL) +* (Workspace: need M*M+2*M, prefer M*M+M+M*NRHS) +* + IF( LWORK.GE.LDB*NRHS+IWORK-1 .AND. NRHS.GT.1 ) THEN + CALL DGEMM( 'T', 'N', M, NRHS, M, ONE, WORK( IL ), LDWORK, + $ B, LDB, ZERO, WORK( IWORK ), LDB ) + CALL DLACPY( 'G', M, NRHS, WORK( IWORK ), LDB, B, LDB ) + ELSE IF( NRHS.GT.1 ) THEN + CHUNK = ( LWORK-IWORK+1 ) / M + DO 40 I = 1, NRHS, CHUNK + BL = MIN( NRHS-I+1, CHUNK ) + CALL DGEMM( 'T', 'N', M, BL, M, ONE, WORK( IL ), LDWORK, + $ B( 1, I ), LDB, ZERO, WORK( IWORK ), N ) + CALL DLACPY( 'G', M, BL, WORK( IWORK ), N, B( 1, I ), + $ LDB ) + 40 CONTINUE + ELSE + CALL DGEMV( 'T', M, M, ONE, WORK( IL ), LDWORK, B( 1, 1 ), + $ 1, ZERO, WORK( IWORK ), 1 ) + CALL DCOPY( M, WORK( IWORK ), 1, B( 1, 1 ), 1 ) + END IF +* +* Zero out below first M rows of B +* + CALL DLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB ) + IWORK = ITAU + M +* +* Multiply transpose(Q) by B +* (Workspace: need M+NRHS, prefer M+NRHS*NB) +* + CALL DORMLQ( 'L', 'T', N, NRHS, M, A, LDA, WORK( ITAU ), B, + $ LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) +* + ELSE +* +* Path 2 - remaining underdetermined cases +* + IE = 1 + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize A +* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) +* + CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors +* (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB) +* + CALL DORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAUQ ), + $ B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) +* +* Generate right bidiagonalizing vectors in A +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, INFO ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, +* computing right singular vectors of A in A and +* multiplying B by transpose of left singular vectors +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'L', M, N, 0, NRHS, S, WORK( IE ), A, LDA, VDUM, + $ 1, B, LDB, WORK( IWORK ), INFO ) + IF( INFO.NE.0 ) + $ GO TO 70 +* +* Multiply B by reciprocals of singular values +* + THR = MAX( RCOND*S( 1 ), SFMIN ) + IF( RCOND.LT.ZERO ) + $ THR = MAX( EPS*S( 1 ), SFMIN ) + RANK = 0 + DO 50 I = 1, M + IF( S( I ).GT.THR ) THEN + CALL DRSCL( NRHS, S( I ), B( I, 1 ), LDB ) + RANK = RANK + 1 + ELSE + CALL DLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB ) + END IF + 50 CONTINUE +* +* Multiply B by right singular vectors of A +* (Workspace: need N, prefer N*NRHS) +* + IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN + CALL DGEMM( 'T', 'N', N, NRHS, M, ONE, A, LDA, B, LDB, ZERO, + $ WORK, LDB ) + CALL DLACPY( 'F', N, NRHS, WORK, LDB, B, LDB ) + ELSE IF( NRHS.GT.1 ) THEN + CHUNK = LWORK / N + DO 60 I = 1, NRHS, CHUNK + BL = MIN( NRHS-I+1, CHUNK ) + CALL DGEMM( 'T', 'N', N, BL, M, ONE, A, LDA, B( 1, I ), + $ LDB, ZERO, WORK, N ) + CALL DLACPY( 'F', N, BL, WORK, N, B( 1, I ), LDB ) + 60 CONTINUE + ELSE + CALL DGEMV( 'T', M, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 ) + CALL DCOPY( N, WORK, 1, B, 1 ) + END IF + END IF +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO ) + CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO ) + CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL DLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL DLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) + END IF +* + 70 CONTINUE + WORK( 1 ) = MAXWRK + RETURN +* +* End of DGELSS +* + END diff --git a/costa/native/external/lapack/dgelsx.f b/costa/native/external/lapack/dgelsx.f new file mode 100644 index 000000000..2a009a4c3 --- /dev/null +++ b/costa/native/external/lapack/dgelsx.f @@ -0,0 +1,350 @@ + SUBROUTINE DGELSX( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK, + $ WORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, M, N, NRHS, RANK + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* This routine is deprecated and has been replaced by routine DGELSY. +* +* DGELSX computes the minimum-norm solution to a real linear least +* squares problem: +* minimize || A * X - B || +* using a complete orthogonal factorization of A. A is an M-by-N +* matrix which may be rank-deficient. +* +* Several right hand side vectors b and solution vectors x can be +* handled in a single call; they are stored as the columns of the +* M-by-NRHS right hand side matrix B and the N-by-NRHS solution +* matrix X. +* +* The routine first computes a QR factorization with column pivoting: +* A * P = Q * [ R11 R12 ] +* [ 0 R22 ] +* with R11 defined as the largest leading submatrix whose estimated +* condition number is less than 1/RCOND. The order of R11, RANK, +* is the effective rank of A. +* +* Then, R22 is considered to be negligible, and R12 is annihilated +* by orthogonal transformations from the right, arriving at the +* complete orthogonal factorization: +* A * P = Q * [ T11 0 ] * Z +* [ 0 0 ] +* The minimum-norm solution is then +* X = P * Z' [ inv(T11)*Q1'*B ] +* [ 0 ] +* where Q1 consists of the first RANK columns of Q. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of +* columns of matrices B and X. NRHS >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, A has been overwritten by details of its +* complete orthogonal factorization. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) +* On entry, the M-by-NRHS right hand side matrix B. +* On exit, the N-by-NRHS solution matrix X. +* If m >= n and RANK = n, the residual sum-of-squares for +* the solution in the i-th column is given by the sum of +* squares of elements N+1:M in that column. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,M,N). +* +* JPVT (input/output) INTEGER array, dimension (N) +* On entry, if JPVT(i) .ne. 0, the i-th column of A is an +* initial column, otherwise it is a free column. Before +* the QR factorization of A, all initial columns are +* permuted to the leading positions; only the remaining +* free columns are moved as a result of column pivoting +* during the factorization. +* On exit, if JPVT(i) = k, then the i-th column of A*P +* was the k-th column of A. +* +* RCOND (input) DOUBLE PRECISION +* RCOND is used to determine the effective rank of A, which +* is defined as the order of the largest leading triangular +* submatrix R11 in the QR factorization with pivoting of A, +* whose estimated condition number < 1/RCOND. +* +* RANK (output) INTEGER +* The effective rank of A, i.e., the order of the submatrix +* R11. This is the same as the order of the submatrix T11 +* in the complete orthogonal factorization of A. +* +* WORK (workspace) DOUBLE PRECISION array, dimension +* (max( min(M,N)+3*N, 2*min(M,N)+NRHS )), +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + INTEGER IMAX, IMIN + PARAMETER ( IMAX = 1, IMIN = 2 ) + DOUBLE PRECISION ZERO, ONE, DONE, NTDONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, DONE = ZERO, + $ NTDONE = ONE ) +* .. +* .. Local Scalars .. + INTEGER I, IASCL, IBSCL, ISMAX, ISMIN, J, K, MN + DOUBLE PRECISION ANRM, BIGNUM, BNRM, C1, C2, S1, S2, SMAX, + $ SMAXPR, SMIN, SMINPR, SMLNUM, T1, T2 +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL DLAMCH, DLANGE +* .. +* .. External Subroutines .. + EXTERNAL DGEQPF, DLAIC1, DLASCL, DLASET, DLATZM, DORM2R, + $ DTRSM, DTZRQF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + MN = MIN( M, N ) + ISMIN = MN + 1 + ISMAX = 2*MN + 1 +* +* Test the input arguments. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -7 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGELSX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, NRHS ).EQ.0 ) THEN + RANK = 0 + RETURN + END IF +* +* Get machine parameters +* + SMLNUM = DLAMCH( 'S' ) / DLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max elements outside range [SMLNUM,BIGNUM] +* + ANRM = DLANGE( 'M', M, N, A, LDA, WORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL DLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + RANK = 0 + GO TO 100 + END IF +* + BNRM = DLANGE( 'M', M, NRHS, B, LDB, WORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL DLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL DLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 2 + END IF +* +* Compute QR factorization with column pivoting of A: +* A * P = Q * R +* + CALL DGEQPF( M, N, A, LDA, JPVT, WORK( 1 ), WORK( MN+1 ), INFO ) +* +* workspace 3*N. Details of Householder rotations stored +* in WORK(1:MN). +* +* Determine RANK using incremental condition estimation +* + WORK( ISMIN ) = ONE + WORK( ISMAX ) = ONE + SMAX = ABS( A( 1, 1 ) ) + SMIN = SMAX + IF( ABS( A( 1, 1 ) ).EQ.ZERO ) THEN + RANK = 0 + CALL DLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + GO TO 100 + ELSE + RANK = 1 + END IF +* + 10 CONTINUE + IF( RANK.LT.MN ) THEN + I = RANK + 1 + CALL DLAIC1( IMIN, RANK, WORK( ISMIN ), SMIN, A( 1, I ), + $ A( I, I ), SMINPR, S1, C1 ) + CALL DLAIC1( IMAX, RANK, WORK( ISMAX ), SMAX, A( 1, I ), + $ A( I, I ), SMAXPR, S2, C2 ) +* + IF( SMAXPR*RCOND.LE.SMINPR ) THEN + DO 20 I = 1, RANK + WORK( ISMIN+I-1 ) = S1*WORK( ISMIN+I-1 ) + WORK( ISMAX+I-1 ) = S2*WORK( ISMAX+I-1 ) + 20 CONTINUE + WORK( ISMIN+RANK ) = C1 + WORK( ISMAX+RANK ) = C2 + SMIN = SMINPR + SMAX = SMAXPR + RANK = RANK + 1 + GO TO 10 + END IF + END IF +* +* Logically partition R = [ R11 R12 ] +* [ 0 R22 ] +* where R11 = R(1:RANK,1:RANK) +* +* [R11,R12] = [ T11, 0 ] * Y +* + IF( RANK.LT.N ) + $ CALL DTZRQF( RANK, N, A, LDA, WORK( MN+1 ), INFO ) +* +* Details of Householder rotations stored in WORK(MN+1:2*MN) +* +* B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS) +* + CALL DORM2R( 'Left', 'Transpose', M, NRHS, MN, A, LDA, WORK( 1 ), + $ B, LDB, WORK( 2*MN+1 ), INFO ) +* +* workspace NRHS +* +* B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS) +* + CALL DTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', RANK, + $ NRHS, ONE, A, LDA, B, LDB ) +* + DO 40 I = RANK + 1, N + DO 30 J = 1, NRHS + B( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE +* +* B(1:N,1:NRHS) := Y' * B(1:N,1:NRHS) +* + IF( RANK.LT.N ) THEN + DO 50 I = 1, RANK + CALL DLATZM( 'Left', N-RANK+1, NRHS, A( I, RANK+1 ), LDA, + $ WORK( MN+I ), B( I, 1 ), B( RANK+1, 1 ), LDB, + $ WORK( 2*MN+1 ) ) + 50 CONTINUE + END IF +* +* workspace NRHS +* +* B(1:N,1:NRHS) := P * B(1:N,1:NRHS) +* + DO 90 J = 1, NRHS + DO 60 I = 1, N + WORK( 2*MN+I ) = NTDONE + 60 CONTINUE + DO 80 I = 1, N + IF( WORK( 2*MN+I ).EQ.NTDONE ) THEN + IF( JPVT( I ).NE.I ) THEN + K = I + T1 = B( K, J ) + T2 = B( JPVT( K ), J ) + 70 CONTINUE + B( JPVT( K ), J ) = T1 + WORK( 2*MN+K ) = DONE + T1 = T2 + K = JPVT( K ) + T2 = B( JPVT( K ), J ) + IF( JPVT( K ).NE.I ) + $ GO TO 70 + B( I, J ) = T1 + WORK( 2*MN+K ) = DONE + END IF + END IF + 80 CONTINUE + 90 CONTINUE +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO ) + CALL DLASCL( 'U', 0, 0, SMLNUM, ANRM, RANK, RANK, A, LDA, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO ) + CALL DLASCL( 'U', 0, 0, BIGNUM, ANRM, RANK, RANK, A, LDA, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL DLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL DLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) + END IF +* + 100 CONTINUE +* + RETURN +* +* End of DGELSX +* + END diff --git a/costa/native/external/lapack/dgelsy.f b/costa/native/external/lapack/dgelsy.f new file mode 100644 index 000000000..5690c5403 --- /dev/null +++ b/costa/native/external/lapack/dgelsy.f @@ -0,0 +1,379 @@ + SUBROUTINE DGELSY( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK, + $ WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DGELSY computes the minimum-norm solution to a real linear least +* squares problem: +* minimize || A * X - B || +* using a complete orthogonal factorization of A. A is an M-by-N +* matrix which may be rank-deficient. +* +* Several right hand side vectors b and solution vectors x can be +* handled in a single call; they are stored as the columns of the +* M-by-NRHS right hand side matrix B and the N-by-NRHS solution +* matrix X. +* +* The routine first computes a QR factorization with column pivoting: +* A * P = Q * [ R11 R12 ] +* [ 0 R22 ] +* with R11 defined as the largest leading submatrix whose estimated +* condition number is less than 1/RCOND. The order of R11, RANK, +* is the effective rank of A. +* +* Then, R22 is considered to be negligible, and R12 is annihilated +* by orthogonal transformations from the right, arriving at the +* complete orthogonal factorization: +* A * P = Q * [ T11 0 ] * Z +* [ 0 0 ] +* The minimum-norm solution is then +* X = P * Z' [ inv(T11)*Q1'*B ] +* [ 0 ] +* where Q1 consists of the first RANK columns of Q. +* +* This routine is basically identical to the original xGELSX except +* three differences: +* o The call to the subroutine xGEQPF has been substituted by the +* the call to the subroutine xGEQP3. This subroutine is a Blas-3 +* version of the QR factorization with column pivoting. +* o Matrix B (the right hand side) is updated with Blas-3. +* o The permutation of matrix B (the right hand side) is faster and +* more simple. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of +* columns of matrices B and X. NRHS >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, A has been overwritten by details of its +* complete orthogonal factorization. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) +* On entry, the M-by-NRHS right hand side matrix B. +* On exit, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,M,N). +* +* JPVT (input/output) INTEGER array, dimension (N) +* On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted +* to the front of AP, otherwise column i is a free column. +* On exit, if JPVT(i) = k, then the i-th column of AP +* was the k-th column of A. +* +* RCOND (input) DOUBLE PRECISION +* RCOND is used to determine the effective rank of A, which +* is defined as the order of the largest leading triangular +* submatrix R11 in the QR factorization with pivoting of A, +* whose estimated condition number < 1/RCOND. +* +* RANK (output) INTEGER +* The effective rank of A, i.e., the order of the submatrix +* R11. This is the same as the order of the submatrix T11 +* in the complete orthogonal factorization of A. +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* The unblocked strategy requires that: +* LWORK >= MAX( MN+3*N+1, 2*MN+NRHS ), +* where MN = min( M, N ). +* The block algorithm requires that: +* LWORK >= MAX( MN+2*N+NB*(N+1), 2*MN+NB*NRHS ), +* where NB is an upper bound on the blocksize returned +* by ILAENV for the routines DGEQP3, DTZRZF, STZRQF, DORMQR, +* and DORMRZ. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: If INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* E. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +* G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +* +* ===================================================================== +* +* .. Parameters .. + INTEGER IMAX, IMIN + PARAMETER ( IMAX = 1, IMIN = 2 ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IASCL, IBSCL, ISMAX, ISMIN, J, LWKOPT, MN, + $ NB, NB1, NB2, NB3, NB4 + DOUBLE PRECISION ANRM, BIGNUM, BNRM, C1, C2, S1, S2, SMAX, + $ SMAXPR, SMIN, SMINPR, SMLNUM, WSIZE +* .. +* .. External Functions .. + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL ILAENV, DLAMCH, DLANGE +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEQP3, DLABAD, DLAIC1, DLASCL, DLASET, + $ DORMQR, DORMRZ, DTRSM, DTZRZF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, MAX, MIN +* .. +* .. Executable Statements .. +* + MN = MIN( M, N ) + ISMIN = MN + 1 + ISMAX = 2*MN + 1 +* +* Test the input arguments. +* + INFO = 0 + NB1 = ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 ) + NB2 = ILAENV( 1, 'DGERQF', ' ', M, N, -1, -1 ) + NB3 = ILAENV( 1, 'DORMQR', ' ', M, N, NRHS, -1 ) + NB4 = ILAENV( 1, 'DORMRQ', ' ', M, N, NRHS, -1 ) + NB = MAX( NB1, NB2, NB3, NB4 ) + LWKOPT = MAX( 1, MN+2*N+NB*( N+1 ), 2*MN+NB*NRHS ) + WORK( 1 ) = DBLE( LWKOPT ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -7 + ELSE IF( LWORK.LT.MAX( 1, MN+3*N+1, 2*MN+NRHS ) .AND. .NOT. + $ LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGELSY', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, NRHS ).EQ.0 ) THEN + RANK = 0 + RETURN + END IF +* +* Get machine parameters +* + SMLNUM = DLAMCH( 'S' ) / DLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max entries outside range [SMLNUM,BIGNUM] +* + ANRM = DLANGE( 'M', M, N, A, LDA, WORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL DLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + RANK = 0 + GO TO 70 + END IF +* + BNRM = DLANGE( 'M', M, NRHS, B, LDB, WORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL DLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL DLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 2 + END IF +* +* Compute QR factorization with column pivoting of A: +* A * P = Q * R +* + CALL DGEQP3( M, N, A, LDA, JPVT, WORK( 1 ), WORK( MN+1 ), + $ LWORK-MN, INFO ) + WSIZE = MN + WORK( MN+1 ) +* +* workspace: MN+2*N+NB*(N+1). +* Details of Householder rotations stored in WORK(1:MN). +* +* Determine RANK using incremental condition estimation +* + WORK( ISMIN ) = ONE + WORK( ISMAX ) = ONE + SMAX = ABS( A( 1, 1 ) ) + SMIN = SMAX + IF( ABS( A( 1, 1 ) ).EQ.ZERO ) THEN + RANK = 0 + CALL DLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + GO TO 70 + ELSE + RANK = 1 + END IF +* + 10 CONTINUE + IF( RANK.LT.MN ) THEN + I = RANK + 1 + CALL DLAIC1( IMIN, RANK, WORK( ISMIN ), SMIN, A( 1, I ), + $ A( I, I ), SMINPR, S1, C1 ) + CALL DLAIC1( IMAX, RANK, WORK( ISMAX ), SMAX, A( 1, I ), + $ A( I, I ), SMAXPR, S2, C2 ) +* + IF( SMAXPR*RCOND.LE.SMINPR ) THEN + DO 20 I = 1, RANK + WORK( ISMIN+I-1 ) = S1*WORK( ISMIN+I-1 ) + WORK( ISMAX+I-1 ) = S2*WORK( ISMAX+I-1 ) + 20 CONTINUE + WORK( ISMIN+RANK ) = C1 + WORK( ISMAX+RANK ) = C2 + SMIN = SMINPR + SMAX = SMAXPR + RANK = RANK + 1 + GO TO 10 + END IF + END IF +* +* workspace: 3*MN. +* +* Logically partition R = [ R11 R12 ] +* [ 0 R22 ] +* where R11 = R(1:RANK,1:RANK) +* +* [R11,R12] = [ T11, 0 ] * Y +* + IF( RANK.LT.N ) + $ CALL DTZRZF( RANK, N, A, LDA, WORK( MN+1 ), WORK( 2*MN+1 ), + $ LWORK-2*MN, INFO ) +* +* workspace: 2*MN. +* Details of Householder rotations stored in WORK(MN+1:2*MN) +* +* B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS) +* + CALL DORMQR( 'Left', 'Transpose', M, NRHS, MN, A, LDA, WORK( 1 ), + $ B, LDB, WORK( 2*MN+1 ), LWORK-2*MN, INFO ) + WSIZE = MAX( WSIZE, 2*MN+WORK( 2*MN+1 ) ) +* +* workspace: 2*MN+NB*NRHS. +* +* B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS) +* + CALL DTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', RANK, + $ NRHS, ONE, A, LDA, B, LDB ) +* + DO 40 J = 1, NRHS + DO 30 I = RANK + 1, N + B( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE +* +* B(1:N,1:NRHS) := Y' * B(1:N,1:NRHS) +* + IF( RANK.LT.N ) THEN + CALL DORMRZ( 'Left', 'Transpose', N, NRHS, RANK, N-RANK, A, + $ LDA, WORK( MN+1 ), B, LDB, WORK( 2*MN+1 ), + $ LWORK-2*MN, INFO ) + END IF +* +* workspace: 2*MN+NRHS. +* +* B(1:N,1:NRHS) := P * B(1:N,1:NRHS) +* + DO 60 J = 1, NRHS + DO 50 I = 1, N + WORK( JPVT( I ) ) = B( I, J ) + 50 CONTINUE + CALL DCOPY( N, WORK( 1 ), 1, B( 1, J ), 1 ) + 60 CONTINUE +* +* workspace: N. +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO ) + CALL DLASCL( 'U', 0, 0, SMLNUM, ANRM, RANK, RANK, A, LDA, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO ) + CALL DLASCL( 'U', 0, 0, BIGNUM, ANRM, RANK, RANK, A, LDA, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL DLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL DLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) + END IF +* + 70 CONTINUE + WORK( 1 ) = DBLE( LWKOPT ) +* + RETURN +* +* End of DGELSY +* + END diff --git a/costa/native/external/lapack/dgeql2.f b/costa/native/external/lapack/dgeql2.f new file mode 100644 index 000000000..3df1b0f14 --- /dev/null +++ b/costa/native/external/lapack/dgeql2.f @@ -0,0 +1,123 @@ + SUBROUTINE DGEQL2( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DGEQL2 computes a QL factorization of a real m by n matrix A: +* A = Q * L. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the m by n matrix A. +* On exit, if m >= n, the lower triangle of the subarray +* A(m-n+1:m,1:n) contains the n by n lower triangular matrix L; +* if m <= n, the elements on and below the (n-m)-th +* superdiagonal contain the m by n lower trapezoidal matrix L; +* the remaining elements, with the array TAU, represent the +* orthogonal matrix Q as a product of elementary reflectors +* (see Further Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(k) . . . H(2) H(1), where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in +* A(1:m-k+i-1,n-k+i), and tau in TAU(i). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, K + DOUBLE PRECISION AII +* .. +* .. External Subroutines .. + EXTERNAL DLARF, DLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEQL2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = K, 1, -1 +* +* Generate elementary reflector H(i) to annihilate +* A(1:m-k+i-1,n-k+i) +* + CALL DLARFG( M-K+I, A( M-K+I, N-K+I ), A( 1, N-K+I ), 1, + $ TAU( I ) ) +* +* Apply H(i) to A(1:m-k+i,1:n-k+i-1) from the left +* + AII = A( M-K+I, N-K+I ) + A( M-K+I, N-K+I ) = ONE + CALL DLARF( 'Left', M-K+I, N-K+I-1, A( 1, N-K+I ), 1, TAU( I ), + $ A, LDA, WORK ) + A( M-K+I, N-K+I ) = AII + 10 CONTINUE + RETURN +* +* End of DGEQL2 +* + END diff --git a/costa/native/external/lapack/dgeqlf.f b/costa/native/external/lapack/dgeqlf.f new file mode 100644 index 000000000..3bfd1d766 --- /dev/null +++ b/costa/native/external/lapack/dgeqlf.f @@ -0,0 +1,205 @@ + SUBROUTINE DGEQLF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DGEQLF computes a QL factorization of a real M-by-N matrix A: +* A = Q * L. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, +* if m >= n, the lower triangle of the subarray +* A(m-n+1:m,1:n) contains the N-by-N lower triangular matrix L; +* if m <= n, the elements on and below the (n-m)-th +* superdiagonal contain the M-by-N lower trapezoidal matrix L; +* the remaining elements, with the array TAU, represent the +* orthogonal matrix Q as a product of elementary reflectors +* (see Further Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N). +* For optimum performance LWORK >= N*NB, where NB is the +* optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(k) . . . H(2) H(1), where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in +* A(1:m-k+i-1,n-k+i), and tau in TAU(i). +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, KI, KK, LDWORK, LWKOPT, + $ MU, NB, NBMIN, NU, NX +* .. +* .. External Subroutines .. + EXTERNAL DGEQL2, DLARFB, DLARFT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'DGEQLF', ' ', M, N, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEQLF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 1 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'DGEQLF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DGEQLF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially. +* The last kk columns are handled by the block method. +* + KI = ( ( K-NX-1 ) / NB )*NB + KK = MIN( K, KI+NB ) +* + DO 10 I = K - KK + KI + 1, K - KK + 1, -NB + IB = MIN( K-I+1, NB ) +* +* Compute the QL factorization of the current block +* A(1:m-k+i+ib-1,n-k+i:n-k+i+ib-1) +* + CALL DGEQL2( M-K+I+IB-1, IB, A( 1, N-K+I ), LDA, TAU( I ), + $ WORK, IINFO ) + IF( N-K+I.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL DLARFT( 'Backward', 'Columnwise', M-K+I+IB-1, IB, + $ A( 1, N-K+I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H' to A(1:m-k+i+ib-1,1:n-k+i-1) from the left +* + CALL DLARFB( 'Left', 'Transpose', 'Backward', + $ 'Columnwise', M-K+I+IB-1, N-K+I-1, IB, + $ A( 1, N-K+I ), LDA, WORK, LDWORK, A, LDA, + $ WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + MU = M - K + I + NB - 1 + NU = N - K + I + NB - 1 + ELSE + MU = M + NU = N + END IF +* +* Use unblocked code to factor the last or only block +* + IF( MU.GT.0 .AND. NU.GT.0 ) + $ CALL DGEQL2( MU, NU, A, LDA, TAU, WORK, IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of DGEQLF +* + END diff --git a/costa/native/external/lapack/dgeqp3.f b/costa/native/external/lapack/dgeqp3.f new file mode 100644 index 000000000..8ccde4506 --- /dev/null +++ b/costa/native/external/lapack/dgeqp3.f @@ -0,0 +1,279 @@ + SUBROUTINE DGEQP3( M, N, A, LDA, JPVT, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DGEQP3 computes a QR factorization with column pivoting of a +* matrix A: A*P = Q*R using Level 3 BLAS. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, the upper triangle of the array contains the +* min(M,N)-by-N upper trapezoidal matrix R; the elements below +* the diagonal, together with the array TAU, represent the +* orthogonal matrix Q as a product of min(M,N) elementary +* reflectors. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* JPVT (input/output) INTEGER array, dimension (N) +* On entry, if JPVT(J).ne.0, the J-th column of A is permuted +* to the front of A*P (a leading column); if JPVT(J)=0, +* the J-th column of A is a free column. +* On exit, if JPVT(J)=K, then the J-th column of A*P was the +* the K-th column of A. +* +* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors. +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO=0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= 3*N+1. +* For optimal performance LWORK >= 2*N+( N+1 )*NB, where NB +* is the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(1) H(2) . . . H(k), where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real/complex scalar, and v is a real/complex vector +* with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in +* A(i+1:m,i), and tau in TAU(i). +* +* Based on contributions by +* G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +* X. Sun, Computer Science Dept., Duke University, USA +* +* ===================================================================== +* +* .. Parameters .. + INTEGER INB, INBMIN, IXOVER + PARAMETER ( INB = 1, INBMIN = 2, IXOVER = 3 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER FJB, IWS, J, JB, LWKOPT, MINMN, MINWS, NA, NB, + $ NBMIN, NFXD, NX, SM, SMINMN, SN, TOPBMN +* .. +* .. External Subroutines .. + EXTERNAL DGEQRF, DLAQP2, DLAQPS, DORMQR, DSWAP, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + DOUBLE PRECISION DNRM2 + EXTERNAL ILAENV, DNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* + IWS = 3*N + 1 + MINMN = MIN( M, N ) +* +* Test input arguments +* ==================== +* + INFO = 0 + NB = ILAENV( INB, 'DGEQRF', ' ', M, N, -1, -1 ) + LWKOPT = 2*N + ( N+1 )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( ( LWORK.LT.IWS ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEQP3', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible. +* + IF( MINMN.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Move initial columns up front. +* + NFXD = 1 + DO 10 J = 1, N + IF( JPVT( J ).NE.0 ) THEN + IF( J.NE.NFXD ) THEN + CALL DSWAP( M, A( 1, J ), 1, A( 1, NFXD ), 1 ) + JPVT( J ) = JPVT( NFXD ) + JPVT( NFXD ) = J + ELSE + JPVT( J ) = J + END IF + NFXD = NFXD + 1 + ELSE + JPVT( J ) = J + END IF + 10 CONTINUE + NFXD = NFXD - 1 +* +* Factorize fixed columns +* ======================= +* +* Compute the QR factorization of fixed columns and update +* remaining columns. +* + IF( NFXD.GT.0 ) THEN + NA = MIN( M, NFXD ) +*CC CALL DGEQR2( M, NA, A, LDA, TAU, WORK, INFO ) + CALL DGEQRF( M, NA, A, LDA, TAU, WORK, LWORK, INFO ) + IWS = MAX( IWS, INT( WORK( 1 ) ) ) + IF( NA.LT.N ) THEN +*CC CALL DORM2R( 'Left', 'Transpose', M, N-NA, NA, A, LDA, +*CC $ TAU, A( 1, NA+1 ), LDA, WORK, INFO ) + CALL DORMQR( 'Left', 'Transpose', M, N-NA, NA, A, LDA, TAU, + $ A( 1, NA+1 ), LDA, WORK, LWORK, INFO ) + IWS = MAX( IWS, INT( WORK( 1 ) ) ) + END IF + END IF +* +* Factorize free columns +* ====================== +* + IF( NFXD.LT.MINMN ) THEN +* + SM = M - NFXD + SN = N - NFXD + SMINMN = MINMN - NFXD +* +* Determine the block size. +* + NB = ILAENV( INB, 'DGEQRF', ' ', SM, SN, -1, -1 ) + NBMIN = 2 + NX = 0 +* + IF( ( NB.GT.1 ) .AND. ( NB.LT.SMINMN ) ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( IXOVER, 'DGEQRF', ' ', SM, SN, -1, + $ -1 ) ) +* +* + IF( NX.LT.SMINMN ) THEN +* +* Determine if workspace is large enough for blocked code. +* + MINWS = 2*SN + ( SN+1 )*NB + IWS = MAX( IWS, MINWS ) + IF( LWORK.LT.MINWS ) THEN +* +* Not enough workspace to use optimal NB: Reduce NB and +* determine the minimum value of NB. +* + NB = ( LWORK-2*SN ) / ( SN+1 ) + NBMIN = MAX( 2, ILAENV( INBMIN, 'DGEQRF', ' ', SM, SN, + $ -1, -1 ) ) +* +* + END IF + END IF + END IF +* +* Initialize partial column norms. The first N elements of work +* store the exact column norms. +* + DO 20 J = NFXD + 1, N + WORK( J ) = DNRM2( SM, A( NFXD+1, J ), 1 ) + WORK( N+J ) = WORK( J ) + 20 CONTINUE +* + IF( ( NB.GE.NBMIN ) .AND. ( NB.LT.SMINMN ) .AND. + $ ( NX.LT.SMINMN ) ) THEN +* +* Use blocked code initially. +* + J = NFXD + 1 +* +* Compute factorization: while loop. +* +* + TOPBMN = MINMN - NX + 30 CONTINUE + IF( J.LE.TOPBMN ) THEN + JB = MIN( NB, TOPBMN-J+1 ) +* +* Factorize JB columns among columns J:N. +* + CALL DLAQPS( M, N-J+1, J-1, JB, FJB, A( 1, J ), LDA, + $ JPVT( J ), TAU( J ), WORK( J ), WORK( N+J ), + $ WORK( 2*N+1 ), WORK( 2*N+JB+1 ), N-J+1 ) +* + J = J + FJB + GO TO 30 + END IF + ELSE + J = NFXD + 1 + END IF +* +* Use unblocked code to factor the last or only block. +* +* + IF( J.LE.MINMN ) + $ CALL DLAQP2( M, N-J+1, J-1, A( 1, J ), LDA, JPVT( J ), + $ TAU( J ), WORK( J ), WORK( N+J ), + $ WORK( 2*N+1 ) ) +* + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of DGEQP3 +* + END diff --git a/costa/native/external/lapack/dgeqpf.f b/costa/native/external/lapack/dgeqpf.f new file mode 100644 index 000000000..a33bee9d2 --- /dev/null +++ b/costa/native/external/lapack/dgeqpf.f @@ -0,0 +1,222 @@ + SUBROUTINE DGEQPF( M, N, A, LDA, JPVT, TAU, WORK, INFO ) +* +* -- LAPACK test routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* This routine is deprecated and has been replaced by routine DGEQP3. +* +* DGEQPF computes a QR factorization with column pivoting of a +* real M-by-N matrix A: A*P = Q*R. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0 +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, the upper triangle of the array contains the +* min(M,N)-by-N upper triangular matrix R; the elements +* below the diagonal, together with the array TAU, +* represent the orthogonal matrix Q as a product of +* min(m,n) elementary reflectors. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* JPVT (input/output) INTEGER array, dimension (N) +* On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted +* to the front of A*P (a leading column); if JPVT(i) = 0, +* the i-th column of A is a free column. +* On exit, if JPVT(i) = k, then the i-th column of A*P +* was the k-th column of A. +* +* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(1) H(2) . . . H(n) +* +* Each H(i) has the form +* +* H = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i). +* +* The matrix P is represented in jpvt as follows: If +* jpvt(j) = i +* then the jth column of P is the ith canonical unit vector. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, ITEMP, J, MA, MN, PVT + DOUBLE PRECISION AII, TEMP, TEMP2 +* .. +* .. External Subroutines .. + EXTERNAL DGEQR2, DLARF, DLARFG, DORM2R, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DNRM2 + EXTERNAL IDAMAX, DNRM2 +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEQPF', -INFO ) + RETURN + END IF +* + MN = MIN( M, N ) +* +* Move initial columns up front +* + ITEMP = 1 + DO 10 I = 1, N + IF( JPVT( I ).NE.0 ) THEN + IF( I.NE.ITEMP ) THEN + CALL DSWAP( M, A( 1, I ), 1, A( 1, ITEMP ), 1 ) + JPVT( I ) = JPVT( ITEMP ) + JPVT( ITEMP ) = I + ELSE + JPVT( I ) = I + END IF + ITEMP = ITEMP + 1 + ELSE + JPVT( I ) = I + END IF + 10 CONTINUE + ITEMP = ITEMP - 1 +* +* Compute the QR factorization and update remaining columns +* + IF( ITEMP.GT.0 ) THEN + MA = MIN( ITEMP, M ) + CALL DGEQR2( M, MA, A, LDA, TAU, WORK, INFO ) + IF( MA.LT.N ) THEN + CALL DORM2R( 'Left', 'Transpose', M, N-MA, MA, A, LDA, TAU, + $ A( 1, MA+1 ), LDA, WORK, INFO ) + END IF + END IF +* + IF( ITEMP.LT.MN ) THEN +* +* Initialize partial column norms. The first n elements of +* work store the exact column norms. +* + DO 20 I = ITEMP + 1, N + WORK( I ) = DNRM2( M-ITEMP, A( ITEMP+1, I ), 1 ) + WORK( N+I ) = WORK( I ) + 20 CONTINUE +* +* Compute factorization +* + DO 40 I = ITEMP + 1, MN +* +* Determine ith pivot column and swap if necessary +* + PVT = ( I-1 ) + IDAMAX( N-I+1, WORK( I ), 1 ) +* + IF( PVT.NE.I ) THEN + CALL DSWAP( M, A( 1, PVT ), 1, A( 1, I ), 1 ) + ITEMP = JPVT( PVT ) + JPVT( PVT ) = JPVT( I ) + JPVT( I ) = ITEMP + WORK( PVT ) = WORK( I ) + WORK( N+PVT ) = WORK( N+I ) + END IF +* +* Generate elementary reflector H(i) +* + IF( I.LT.M ) THEN + CALL DLARFG( M-I+1, A( I, I ), A( I+1, I ), 1, TAU( I ) ) + ELSE + CALL DLARFG( 1, A( M, M ), A( M, M ), 1, TAU( M ) ) + END IF +* + IF( I.LT.N ) THEN +* +* Apply H(i) to A(i:m,i+1:n) from the left +* + AII = A( I, I ) + A( I, I ) = ONE + CALL DLARF( 'LEFT', M-I+1, N-I, A( I, I ), 1, TAU( I ), + $ A( I, I+1 ), LDA, WORK( 2*N+1 ) ) + A( I, I ) = AII + END IF +* +* Update partial column norms +* + DO 30 J = I + 1, N + IF( WORK( J ).NE.ZERO ) THEN + TEMP = ONE - ( ABS( A( I, J ) ) / WORK( J ) )**2 + TEMP = MAX( TEMP, ZERO ) + TEMP2 = ONE + 0.05D0*TEMP* + $ ( WORK( J ) / WORK( N+J ) )**2 + IF( TEMP2.EQ.ONE ) THEN + IF( M-I.GT.0 ) THEN + WORK( J ) = DNRM2( M-I, A( I+1, J ), 1 ) + WORK( N+J ) = WORK( J ) + ELSE + WORK( J ) = ZERO + WORK( N+J ) = ZERO + END IF + ELSE + WORK( J ) = WORK( J )*SQRT( TEMP ) + END IF + END IF + 30 CONTINUE +* + 40 CONTINUE + END IF + RETURN +* +* End of DGEQPF +* + END diff --git a/costa/native/external/lapack/dgeqr2.f b/costa/native/external/lapack/dgeqr2.f new file mode 100644 index 000000000..b1ea5152e --- /dev/null +++ b/costa/native/external/lapack/dgeqr2.f @@ -0,0 +1,122 @@ + SUBROUTINE DGEQR2( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DGEQR2 computes a QR factorization of a real m by n matrix A: +* A = Q * R. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the m by n matrix A. +* On exit, the elements on and above the diagonal of the array +* contain the min(m,n) by n upper trapezoidal matrix R (R is +* upper triangular if m >= n); the elements below the diagonal, +* with the array TAU, represent the orthogonal matrix Q as a +* product of elementary reflectors (see Further Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(1) H(2) . . . H(k), where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), +* and tau in TAU(i). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, K + DOUBLE PRECISION AII +* .. +* .. External Subroutines .. + EXTERNAL DLARF, DLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEQR2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = 1, K +* +* Generate elementary reflector H(i) to annihilate A(i+1:m,i) +* + CALL DLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1, + $ TAU( I ) ) + IF( I.LT.N ) THEN +* +* Apply H(i) to A(i:m,i+1:n) from the left +* + AII = A( I, I ) + A( I, I ) = ONE + CALL DLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ), + $ A( I, I+1 ), LDA, WORK ) + A( I, I ) = AII + END IF + 10 CONTINUE + RETURN +* +* End of DGEQR2 +* + END diff --git a/costa/native/external/lapack/dgeqrf.f b/costa/native/external/lapack/dgeqrf.f new file mode 100644 index 000000000..7a40b537f --- /dev/null +++ b/costa/native/external/lapack/dgeqrf.f @@ -0,0 +1,197 @@ + SUBROUTINE DGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DGEQRF computes a QR factorization of a real M-by-N matrix A: +* A = Q * R. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, the elements on and above the diagonal of the array +* contain the min(M,N)-by-N upper trapezoidal matrix R (R is +* upper triangular if m >= n); the elements below the diagonal, +* with the array TAU, represent the orthogonal matrix Q as a +* product of min(m,n) elementary reflectors (see Further +* Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N). +* For optimum performance LWORK >= N*NB, where NB is +* the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(1) H(2) . . . H(k), where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), +* and tau in TAU(i). +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL DGEQR2, DLARFB, DLARFT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEQRF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'DGEQRF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DGEQRF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially +* + DO 10 I = 1, K - NX, NB + IB = MIN( K-I+1, NB ) +* +* Compute the QR factorization of the current block +* A(i:m,i:i+ib-1) +* + CALL DGEQR2( M-I+1, IB, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) + IF( I+IB.LE.N ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL DLARFT( 'Forward', 'Columnwise', M-I+1, IB, + $ A( I, I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H' to A(i:m,i+ib:n) from the left +* + CALL DLARFB( 'Left', 'Transpose', 'Forward', + $ 'Columnwise', M-I+1, N-I-IB+1, IB, + $ A( I, I ), LDA, WORK, LDWORK, A( I, I+IB ), + $ LDA, WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + ELSE + I = 1 + END IF +* +* Use unblocked code to factor the last or only block. +* + IF( I.LE.K ) + $ CALL DGEQR2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of DGEQRF +* + END diff --git a/costa/native/external/lapack/dgerfs.f b/costa/native/external/lapack/dgerfs.f new file mode 100644 index 000000000..aa6c13415 --- /dev/null +++ b/costa/native/external/lapack/dgerfs.f @@ -0,0 +1,332 @@ + SUBROUTINE DGERFS( TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, + $ X, LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* DGERFS improves the computed solution to a system of linear +* equations and provides error bounds and backward error estimates for +* the solution. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose = Transpose) +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input) DOUBLE PRECISION array, dimension (LDA,N) +* The original N-by-N matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) +* The factors L and U from the factorization A = P*L*U +* as computed by DGETRF. +* +* LDAF (input) INTEGER +* The leading dimension of the array AF. LDAF >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices from DGETRF; for 1<=i<=N, row i of the +* matrix was interchanged with row IPIV(i). +* +* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) +* On entry, the solution matrix X, as computed by DGETRS. +* On exit, the improved solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Internal Parameters +* =================== +* +* ITMAX is the maximum number of steps of iterative refinement. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) + DOUBLE PRECISION THREE + PARAMETER ( THREE = 3.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN + CHARACTER TRANST + INTEGER COUNT, I, J, K, KASE, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DGEMV, DGETRS, DLACON, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGERFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - op(A) * X, +* where op(A) = A, A**T, or A**H, depending on TRANS. +* + CALL DCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 ) + CALL DGEMV( TRANS, N, N, -ONE, A, LDA, X( 1, J ), 1, ONE, + $ WORK( N+1 ), 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + WORK( I ) = ABS( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(op(A))*abs(X) + abs(B). +* + IF( NOTRAN ) THEN + DO 50 K = 1, N + XK = ABS( X( K, J ) ) + DO 40 I = 1, N + WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK + 40 CONTINUE + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + DO 60 I = 1, N + S = S + ABS( A( I, K ) )*ABS( X( I, J ) ) + 60 CONTINUE + WORK( K ) = WORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL DGETRS( TRANS, N, 1, AF, LDAF, IPIV, WORK( N+1 ), N, + $ INFO ) + CALL DAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use DLACON to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL DLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**T). +* + CALL DGETRS( TRANST, N, 1, AF, LDAF, IPIV, WORK( N+1 ), + $ N, INFO ) + DO 110 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 110 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 120 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 120 CONTINUE + CALL DGETRS( TRANS, N, 1, AF, LDAF, IPIV, WORK( N+1 ), N, + $ INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of DGERFS +* + END diff --git a/costa/native/external/lapack/dgerq2.f b/costa/native/external/lapack/dgerq2.f new file mode 100644 index 000000000..cdba313da --- /dev/null +++ b/costa/native/external/lapack/dgerq2.f @@ -0,0 +1,123 @@ + SUBROUTINE DGERQ2( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DGERQ2 computes an RQ factorization of a real m by n matrix A: +* A = R * Q. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the m by n matrix A. +* On exit, if m <= n, the upper triangle of the subarray +* A(1:m,n-m+1:n) contains the m by m upper triangular matrix R; +* if m >= n, the elements on and above the (m-n)-th subdiagonal +* contain the m by n upper trapezoidal matrix R; the remaining +* elements, with the array TAU, represent the orthogonal matrix +* Q as a product of elementary reflectors (see Further +* Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (M) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(1) H(2) . . . H(k), where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in +* A(m-k+i,1:n-k+i-1), and tau in TAU(i). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, K + DOUBLE PRECISION AII +* .. +* .. External Subroutines .. + EXTERNAL DLARF, DLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGERQ2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = K, 1, -1 +* +* Generate elementary reflector H(i) to annihilate +* A(m-k+i,1:n-k+i-1) +* + CALL DLARFG( N-K+I, A( M-K+I, N-K+I ), A( M-K+I, 1 ), LDA, + $ TAU( I ) ) +* +* Apply H(i) to A(1:m-k+i-1,1:n-k+i) from the right +* + AII = A( M-K+I, N-K+I ) + A( M-K+I, N-K+I ) = ONE + CALL DLARF( 'Right', M-K+I-1, N-K+I, A( M-K+I, 1 ), LDA, + $ TAU( I ), A, LDA, WORK ) + A( M-K+I, N-K+I ) = AII + 10 CONTINUE + RETURN +* +* End of DGERQ2 +* + END diff --git a/costa/native/external/lapack/dgerqf.f b/costa/native/external/lapack/dgerqf.f new file mode 100644 index 000000000..f5940f232 --- /dev/null +++ b/costa/native/external/lapack/dgerqf.f @@ -0,0 +1,205 @@ + SUBROUTINE DGERQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DGERQF computes an RQ factorization of a real M-by-N matrix A: +* A = R * Q. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, +* if m <= n, the upper triangle of the subarray +* A(1:m,n-m+1:n) contains the M-by-M upper triangular matrix R; +* if m >= n, the elements on and above the (m-n)-th subdiagonal +* contain the M-by-N upper trapezoidal matrix R; +* the remaining elements, with the array TAU, represent the +* orthogonal matrix Q as a product of min(m,n) elementary +* reflectors (see Further Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,M). +* For optimum performance LWORK >= M*NB, where NB is +* the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(1) H(2) . . . H(k), where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in +* A(m-k+i,1:n-k+i-1), and tau in TAU(i). +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, KI, KK, LDWORK, LWKOPT, + $ MU, NB, NBMIN, NU, NX +* .. +* .. External Subroutines .. + EXTERNAL DGERQ2, DLARFB, DLARFT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'DGERQF', ' ', M, N, -1, -1 ) + LWKOPT = M*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGERQF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 1 + IWS = M + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'DGERQF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DGERQF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially. +* The last kk rows are handled by the block method. +* + KI = ( ( K-NX-1 ) / NB )*NB + KK = MIN( K, KI+NB ) +* + DO 10 I = K - KK + KI + 1, K - KK + 1, -NB + IB = MIN( K-I+1, NB ) +* +* Compute the RQ factorization of the current block +* A(m-k+i:m-k+i+ib-1,1:n-k+i+ib-1) +* + CALL DGERQ2( IB, N-K+I+IB-1, A( M-K+I, 1 ), LDA, TAU( I ), + $ WORK, IINFO ) + IF( M-K+I.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL DLARFT( 'Backward', 'Rowwise', N-K+I+IB-1, IB, + $ A( M-K+I, 1 ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(1:m-k+i-1,1:n-k+i+ib-1) from the right +* + CALL DLARFB( 'Right', 'No transpose', 'Backward', + $ 'Rowwise', M-K+I-1, N-K+I+IB-1, IB, + $ A( M-K+I, 1 ), LDA, WORK, LDWORK, A, LDA, + $ WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + MU = M - K + I + NB - 1 + NU = N - K + I + NB - 1 + ELSE + MU = M + NU = N + END IF +* +* Use unblocked code to factor the last or only block +* + IF( MU.GT.0 .AND. NU.GT.0 ) + $ CALL DGERQ2( MU, NU, A, LDA, TAU, WORK, IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of DGERQF +* + END diff --git a/costa/native/external/lapack/dgesc2.f b/costa/native/external/lapack/dgesc2.f new file mode 100644 index 000000000..33ac0e603 --- /dev/null +++ b/costa/native/external/lapack/dgesc2.f @@ -0,0 +1,133 @@ + SUBROUTINE DGESC2( N, A, LDA, RHS, IPIV, JPIV, SCALE ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER LDA, N + DOUBLE PRECISION SCALE +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), JPIV( * ) + DOUBLE PRECISION A( LDA, * ), RHS( * ) +* .. +* +* Purpose +* ======= +* +* DGESC2 solves a system of linear equations +* +* A * X = scale* RHS +* +* with a general N-by-N matrix A using the LU factorization with +* complete pivoting computed by DGETC2. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. +* +* A (input) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the LU part of the factorization of the n-by-n +* matrix A computed by DGETC2: A = P * L * U * Q +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1, N). +* +* RHS (input/output) DOUBLE PRECISION array, dimension (N). +* On entry, the right hand side vector b. +* On exit, the solution vector X. +* +* IPIV (iput) INTEGER array, dimension (N). +* The pivot indices; for 1 <= i <= N, row i of the +* matrix has been interchanged with row IPIV(i). +* +* JPIV (iput) INTEGER array, dimension (N). +* The pivot indices; for 1 <= j <= N, column j of the +* matrix has been interchanged with column JPIV(j). +* +* SCALE (output) DOUBLE PRECISION +* On exit, SCALE contains the scale factor. SCALE is chosen +* 0 <= SCALE <= 1 to prevent owerflow in the solution. +* +* Further Details +* =============== +* +* Based on contributions by +* Bo Kagstrom and Peter Poromaa, Department of Computing Science, +* Umea University, S-901 87 Umea, Sweden. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, TWO + PARAMETER ( ONE = 1.0D+0, TWO = 2.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION BIGNUM, EPS, SMLNUM, TEMP +* .. +* .. External Subroutines .. + EXTERNAL DLASWP, DSCAL +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL IDAMAX, DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Set constant to control owerflow +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) / EPS + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Apply permutations IPIV to RHS +* + CALL DLASWP( 1, RHS, LDA, 1, N-1, IPIV, 1 ) +* +* Solve for L part +* + DO 20 I = 1, N - 1 + DO 10 J = I + 1, N + RHS( J ) = RHS( J ) - A( J, I )*RHS( I ) + 10 CONTINUE + 20 CONTINUE +* +* Solve for U part +* + SCALE = ONE +* +* Check for scaling +* + I = IDAMAX( N, RHS, 1 ) + IF( TWO*SMLNUM*ABS( RHS( I ) ).GT.ABS( A( N, N ) ) ) THEN + TEMP = ( ONE / TWO ) / ABS( RHS( I ) ) + CALL DSCAL( N, TEMP, RHS( 1 ), 1 ) + SCALE = SCALE*TEMP + END IF +* + DO 40 I = N, 1, -1 + TEMP = ONE / A( I, I ) + RHS( I ) = RHS( I )*TEMP + DO 30 J = I + 1, N + RHS( I ) = RHS( I ) - RHS( J )*( A( I, J )*TEMP ) + 30 CONTINUE + 40 CONTINUE +* +* Apply permutations JPIV to the solution (RHS) +* + CALL DLASWP( 1, RHS, LDA, 1, N-1, JPIV, -1 ) + RETURN +* +* End of DGESC2 +* + END diff --git a/costa/native/external/lapack/dgesdd.f b/costa/native/external/lapack/dgesdd.f new file mode 100644 index 000000000..51e55e60b --- /dev/null +++ b/costa/native/external/lapack/dgesdd.f @@ -0,0 +1,1335 @@ + SUBROUTINE DGESDD( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, + $ LWORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ + INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), S( * ), U( LDU, * ), + $ VT( LDVT, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DGESDD computes the singular value decomposition (SVD) of a real +* M-by-N matrix A, optionally computing the left and right singular +* vectors. If singular vectors are desired, it uses a +* divide-and-conquer algorithm. +* +* The SVD is written +* +* A = U * SIGMA * transpose(V) +* +* where SIGMA is an M-by-N matrix which is zero except for its +* min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and +* V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA +* are the singular values of A; they are real and non-negative, and +* are returned in descending order. The first min(m,n) columns of +* U and V are the left and right singular vectors of A. +* +* Note that the routine returns VT = V**T, not V. +* +* The divide and conquer algorithm makes very mild assumptions about +* floating point arithmetic. It will work on machines with a guard +* digit in add/subtract, or on those binary machines without guard +* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +* Cray-2. It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* Specifies options for computing all or part of the matrix U: +* = 'A': all M columns of U and all N rows of V**T are +* returned in the arrays U and VT; +* = 'S': the first min(M,N) columns of U and the first +* min(M,N) rows of V**T are returned in the arrays U +* and VT; +* = 'O': If M >= N, the first N columns of U are overwritten +* on the array A and all rows of V**T are returned in +* the array VT; +* otherwise, all columns of U are returned in the +* array U and the first M rows of V**T are overwritten +* in the array VT; +* = 'N': no columns of U or rows of V**T are computed. +* +* M (input) INTEGER +* The number of rows of the input matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the input matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, +* if JOBZ = 'O', A is overwritten with the first N columns +* of U (the left singular vectors, stored +* columnwise) if M >= N; +* A is overwritten with the first M rows +* of V**T (the right singular vectors, stored +* rowwise) otherwise. +* if JOBZ .ne. 'O', the contents of A are destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* S (output) DOUBLE PRECISION array, dimension (min(M,N)) +* The singular values of A, sorted so that S(i) >= S(i+1). +* +* U (output) DOUBLE PRECISION array, dimension (LDU,UCOL) +* UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N; +* UCOL = min(M,N) if JOBZ = 'S'. +* If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M +* orthogonal matrix U; +* if JOBZ = 'S', U contains the first min(M,N) columns of U +* (the left singular vectors, stored columnwise); +* if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced. +* +* LDU (input) INTEGER +* The leading dimension of the array U. LDU >= 1; if +* JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M. +* +* VT (output) DOUBLE PRECISION array, dimension (LDVT,N) +* If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the +* N-by-N orthogonal matrix V**T; +* if JOBZ = 'S', VT contains the first min(M,N) rows of +* V**T (the right singular vectors, stored rowwise); +* if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced. +* +* LDVT (input) INTEGER +* The leading dimension of the array VT. LDVT >= 1; if +* JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N; +* if JOBZ = 'S', LDVT >= min(M,N). +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK; +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= 1. +* If JOBZ = 'N', +* LWORK >= 3*min(M,N) + max(max(M,N),6*min(M,N)). +* If JOBZ = 'O', +* LWORK >= 3*min(M,N)*min(M,N) + +* max(max(M,N),5*min(M,N)*min(M,N)+4*min(M,N)). +* If JOBZ = 'S' or 'A' +* LWORK >= 3*min(M,N)*min(M,N) + +* max(max(M,N),4*min(M,N)*min(M,N)+4*min(M,N)). +* For good performance, LWORK should generally be larger. +* If LWORK < 0 but other input arguments are legal, WORK(1) +* returns the optimal LWORK. +* +* IWORK (workspace) INTEGER array, dimension (8*min(M,N)) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: DBDSDC did not converge, updating process failed. +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WNTQA, WNTQAS, WNTQN, WNTQO, WNTQS + INTEGER BDSPAC, BLK, CHUNK, I, IE, IERR, IL, + $ IR, ISCL, ITAU, ITAUP, ITAUQ, IU, IVT, LDWKVT, + $ LDWRKL, LDWRKR, LDWRKU, MAXWRK, MINMN, MINWRK, + $ MNTHR, NWORK, WRKBL + DOUBLE PRECISION ANRM, BIGNUM, EPS, SMLNUM +* .. +* .. Local Arrays .. + INTEGER IDUM( 1 ) + DOUBLE PRECISION DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DBDSDC, DGEBRD, DGELQF, DGEMM, DGEQRF, DLACPY, + $ DLASCL, DLASET, DORGBR, DORGLQ, DORGQR, DORMBR, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL DLAMCH, DLANGE, ILAENV, LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, INT, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + MINMN = MIN( M, N ) + MNTHR = INT( MINMN*11.0D0 / 6.0D0 ) + WNTQA = LSAME( JOBZ, 'A' ) + WNTQS = LSAME( JOBZ, 'S' ) + WNTQAS = WNTQA .OR. WNTQS + WNTQO = LSAME( JOBZ, 'O' ) + WNTQN = LSAME( JOBZ, 'N' ) + MINWRK = 1 + MAXWRK = 1 + LQUERY = ( LWORK.EQ.-1 ) +* + IF( .NOT.( WNTQA .OR. WNTQS .OR. WNTQO .OR. WNTQN ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDU.LT.1 .OR. ( WNTQAS .AND. LDU.LT.M ) .OR. + $ ( WNTQO .AND. M.LT.N .AND. LDU.LT.M ) ) THEN + INFO = -8 + ELSE IF( LDVT.LT.1 .OR. ( WNTQA .AND. LDVT.LT.N ) .OR. + $ ( WNTQS .AND. LDVT.LT.MINMN ) .OR. + $ ( WNTQO .AND. M.GE.N .AND. LDVT.LT.N ) ) THEN + INFO = -10 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + IF( INFO.EQ.0 .AND. M.GT.0 .AND. N.GT.0 ) THEN + IF( M.GE.N ) THEN +* +* Compute space needed for DBDSDC +* + IF( WNTQN ) THEN + BDSPAC = 7*N + ELSE + BDSPAC = 3*N*N + 4*N + END IF + IF( M.GE.MNTHR ) THEN + IF( WNTQN ) THEN +* +* Path 1 (M much larger than N, JOBZ='N') +* + WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, + $ -1 ) + WRKBL = MAX( WRKBL, 3*N+2*N* + $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) ) + MAXWRK = MAX( WRKBL, BDSPAC+N ) + MINWRK = BDSPAC + N + ELSE IF( WNTQO ) THEN +* +* Path 2 (M much larger than N, JOBZ='O') +* + WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M, + $ N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+2*N* + $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'DORMBR', 'QLN', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'DORMBR', 'PRT', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC+3*N ) + MAXWRK = WRKBL + 2*N*N + MINWRK = BDSPAC + 2*N*N + 3*N + ELSE IF( WNTQS ) THEN +* +* Path 3 (M much larger than N, JOBZ='S') +* + WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M, + $ N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+2*N* + $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'DORMBR', 'QLN', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'DORMBR', 'PRT', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC+3*N ) + MAXWRK = WRKBL + N*N + MINWRK = BDSPAC + N*N + 3*N + ELSE IF( WNTQA ) THEN +* +* Path 4 (M much larger than N, JOBZ='A') +* + WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'DORGQR', ' ', M, + $ M, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+2*N* + $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'DORMBR', 'QLN', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'DORMBR', 'PRT', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC+3*N ) + MAXWRK = WRKBL + N*N + MINWRK = BDSPAC + N*N + 3*N + END IF + ELSE +* +* Path 5 (M at least N, but not much larger) +* + WRKBL = 3*N + ( M+N )*ILAENV( 1, 'DGEBRD', ' ', M, N, -1, + $ -1 ) + IF( WNTQN ) THEN + MAXWRK = MAX( WRKBL, BDSPAC+3*N ) + MINWRK = 3*N + MAX( M, BDSPAC ) + ELSE IF( WNTQO ) THEN + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'DORMBR', 'QLN', M, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'DORMBR', 'PRT', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC+3*N ) + MAXWRK = WRKBL + M*N + MINWRK = 3*N + MAX( M, N*N+BDSPAC ) + ELSE IF( WNTQS ) THEN + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'DORMBR', 'QLN', M, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'DORMBR', 'PRT', N, N, N, -1 ) ) + MAXWRK = MAX( WRKBL, BDSPAC+3*N ) + MINWRK = 3*N + MAX( M, BDSPAC ) + ELSE IF( WNTQA ) THEN + WRKBL = MAX( WRKBL, 3*N+M* + $ ILAENV( 1, 'DORMBR', 'QLN', M, M, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'DORMBR', 'PRT', N, N, N, -1 ) ) + MAXWRK = MAX( MAXWRK, BDSPAC+3*N ) + MINWRK = 3*N + MAX( M, BDSPAC ) + END IF + END IF + ELSE +* +* Compute space needed for DBDSDC +* + IF( WNTQN ) THEN + BDSPAC = 7*M + ELSE + BDSPAC = 3*M*M + 4*M + END IF + IF( N.GE.MNTHR ) THEN + IF( WNTQN ) THEN +* +* Path 1t (N much larger than M, JOBZ='N') +* + WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, + $ -1 ) + WRKBL = MAX( WRKBL, 3*M+2*M* + $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) ) + MAXWRK = MAX( WRKBL, BDSPAC+M ) + MINWRK = BDSPAC + M + ELSE IF( WNTQO ) THEN +* +* Path 2t (N much larger than M, JOBZ='O') +* + WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+2*M* + $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+M* + $ ILAENV( 1, 'DORMBR', 'QLN', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+M* + $ ILAENV( 1, 'DORMBR', 'PRT', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC+3*M ) + MAXWRK = WRKBL + 2*M*M + MINWRK = BDSPAC + 2*M*M + 3*M + ELSE IF( WNTQS ) THEN +* +* Path 3t (N much larger than M, JOBZ='S') +* + WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+2*M* + $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+M* + $ ILAENV( 1, 'DORMBR', 'QLN', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+M* + $ ILAENV( 1, 'DORMBR', 'PRT', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC+3*M ) + MAXWRK = WRKBL + M*M + MINWRK = BDSPAC + M*M + 3*M + ELSE IF( WNTQA ) THEN +* +* Path 4t (N much larger than M, JOBZ='A') +* + WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'DORGLQ', ' ', N, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+2*M* + $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+M* + $ ILAENV( 1, 'DORMBR', 'QLN', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+M* + $ ILAENV( 1, 'DORMBR', 'PRT', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC+3*M ) + MAXWRK = WRKBL + M*M + MINWRK = BDSPAC + M*M + 3*M + END IF + ELSE +* +* Path 5t (N greater than M, but not much larger) +* + WRKBL = 3*M + ( M+N )*ILAENV( 1, 'DGEBRD', ' ', M, N, -1, + $ -1 ) + IF( WNTQN ) THEN + MAXWRK = MAX( WRKBL, BDSPAC+3*M ) + MINWRK = 3*M + MAX( N, BDSPAC ) + ELSE IF( WNTQO ) THEN + WRKBL = MAX( WRKBL, 3*M+M* + $ ILAENV( 1, 'DORMBR', 'QLN', M, M, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+M* + $ ILAENV( 1, 'DORMBR', 'PRT', M, N, M, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC+3*M ) + MAXWRK = WRKBL + M*N + MINWRK = 3*M + MAX( N, M*M+BDSPAC ) + ELSE IF( WNTQS ) THEN + WRKBL = MAX( WRKBL, 3*M+M* + $ ILAENV( 1, 'DORMBR', 'QLN', M, M, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+M* + $ ILAENV( 1, 'DORMBR', 'PRT', M, N, M, -1 ) ) + MAXWRK = MAX( WRKBL, BDSPAC+3*M ) + MINWRK = 3*M + MAX( N, BDSPAC ) + ELSE IF( WNTQA ) THEN + WRKBL = MAX( WRKBL, 3*M+M* + $ ILAENV( 1, 'DORMBR', 'QLN', M, M, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+M* + $ ILAENV( 1, 'DORMBR', 'PRT', N, N, M, -1 ) ) + MAXWRK = MAX( WRKBL, BDSPAC+3*M ) + MINWRK = 3*M + MAX( N, BDSPAC ) + END IF + END IF + END IF + WORK( 1 ) = MAXWRK + END IF +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGESDD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + IF( LWORK.GE.1 ) + $ WORK( 1 ) = ONE + RETURN + END IF +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = SQRT( DLAMCH( 'S' ) ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = DLANGE( 'M', M, N, A, LDA, DUM ) + ISCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ISCL = 1 + CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR ) + ELSE IF( ANRM.GT.BIGNUM ) THEN + ISCL = 1 + CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR ) + END IF +* + IF( M.GE.N ) THEN +* +* A has at least as many rows as columns. If A has sufficiently +* more rows than columns, first reduce using the QR +* decomposition (if sufficient workspace available) +* + IF( M.GE.MNTHR ) THEN +* + IF( WNTQN ) THEN +* +* Path 1 (M much larger than N, JOBZ='N') +* No singular vectors to be computed +* + ITAU = 1 + NWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Zero out below R +* + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA ) + IE = 1 + ITAUQ = IE + N + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize R in A +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) + NWORK = IE + N +* +* Perform bidiagonal SVD, computing singular values only +* (Workspace: need N+BDSPAC) +* + CALL DBDSDC( 'U', 'N', N, S, WORK( IE ), DUM, 1, DUM, 1, + $ DUM, IDUM, WORK( NWORK ), IWORK, INFO ) +* + ELSE IF( WNTQO ) THEN +* +* Path 2 (M much larger than N, JOBZ = 'O') +* N left singular vectors to be overwritten on A and +* N right singular vectors to be computed in VT +* + IR = 1 +* +* WORK(IR) is LDWRKR by N +* + IF( LWORK.GE.LDA*N+N*N+3*N+BDSPAC ) THEN + LDWRKR = LDA + ELSE + LDWRKR = ( LWORK-N*N-3*N-BDSPAC ) / N + END IF + ITAU = IR + LDWRKR*N + NWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy R to WORK(IR), zeroing out below it +* + CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR ) + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IR+1 ), + $ LDWRKR ) +* +* Generate Q in A +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize R in VT, copying result to WORK(IR) +* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +* + CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* WORK(IU) is N by N +* + IU = NWORK + NWORK = IU + N*N +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in WORK(IU) and computing right +* singular vectors of bidiagonal matrix in VT +* (Workspace: need N+N*N+BDSPAC) +* + CALL DBDSDC( 'U', 'I', N, S, WORK( IE ), WORK( IU ), N, + $ VT, LDVT, DUM, IDUM, WORK( NWORK ), IWORK, + $ INFO ) +* +* Overwrite WORK(IU) by left singular vectors of R +* and VT by right singular vectors of R +* (Workspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB) +* + CALL DORMBR( 'Q', 'L', 'N', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IU ), N, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + CALL DORMBR( 'P', 'R', 'T', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IU), storing result in WORK(IR) and copying to A +* (Workspace: need 2*N*N, prefer N*N+M*N) +* + DO 10 I = 1, M, LDWRKR + CHUNK = MIN( M-I+1, LDWRKR ) + CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ), + $ LDA, WORK( IU ), N, ZERO, WORK( IR ), + $ LDWRKR ) + CALL DLACPY( 'F', CHUNK, N, WORK( IR ), LDWRKR, + $ A( I, 1 ), LDA ) + 10 CONTINUE +* + ELSE IF( WNTQS ) THEN +* +* Path 3 (M much larger than N, JOBZ='S') +* N left singular vectors to be computed in U and +* N right singular vectors to be computed in VT +* + IR = 1 +* +* WORK(IR) is N by N +* + LDWRKR = N + ITAU = IR + LDWRKR*N + NWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy R to WORK(IR), zeroing out below it +* + CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR ) + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IR+1 ), + $ LDWRKR ) +* +* Generate Q in A +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +* + CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagoal matrix in U and computing right singular +* vectors of bidiagonal matrix in VT +* (Workspace: need N+BDSPAC) +* + CALL DBDSDC( 'U', 'I', N, S, WORK( IE ), U, LDU, VT, + $ LDVT, DUM, IDUM, WORK( NWORK ), IWORK, + $ INFO ) +* +* Overwrite U by left singular vectors of R and VT +* by right singular vectors of R +* (Workspace: need N*N+3*N, prefer N*N+2*N+N*NB) +* + CALL DORMBR( 'Q', 'L', 'N', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* + CALL DORMBR( 'P', 'R', 'T', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in U +* (Workspace: need N*N) +* + CALL DLACPY( 'F', N, N, U, LDU, WORK( IR ), LDWRKR ) + CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA, WORK( IR ), + $ LDWRKR, ZERO, U, LDU ) +* + ELSE IF( WNTQA ) THEN +* +* Path 4 (M much larger than N, JOBZ='A') +* M left singular vectors to be computed in U and +* N right singular vectors to be computed in VT +* + IU = 1 +* +* WORK(IU) is N by N +* + LDWRKU = N + ITAU = IU + LDWRKU*N + NWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) + CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Produce R in A, zeroing out other entries +* + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize R in A +* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +* + CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in WORK(IU) and computing right +* singular vectors of bidiagonal matrix in VT +* (Workspace: need N+N*N+BDSPAC) +* + CALL DBDSDC( 'U', 'I', N, S, WORK( IE ), WORK( IU ), N, + $ VT, LDVT, DUM, IDUM, WORK( NWORK ), IWORK, + $ INFO ) +* +* Overwrite WORK(IU) by left singular vectors of R and VT +* by right singular vectors of R +* (Workspace: need N*N+3*N, prefer N*N+2*N+N*NB) +* + CALL DORMBR( 'Q', 'L', 'N', N, N, N, A, LDA, + $ WORK( ITAUQ ), WORK( IU ), LDWRKU, + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) + CALL DORMBR( 'P', 'R', 'T', N, N, N, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IU), storing result in A +* (Workspace: need N*N) +* + CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU, WORK( IU ), + $ LDWRKU, ZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL DLACPY( 'F', M, N, A, LDA, U, LDU ) +* + END IF +* + ELSE +* +* M .LT. MNTHR +* +* Path 5 (M at least N, but not much larger) +* Reduce to bidiagonal form without QR decomposition +* + IE = 1 + ITAUQ = IE + N + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize A +* (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) +* + CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) + IF( WNTQN ) THEN +* +* Perform bidiagonal SVD, only computing singular values +* (Workspace: need N+BDSPAC) +* + CALL DBDSDC( 'U', 'N', N, S, WORK( IE ), DUM, 1, DUM, 1, + $ DUM, IDUM, WORK( NWORK ), IWORK, INFO ) + ELSE IF( WNTQO ) THEN + IU = NWORK + IF( LWORK.GE.M*N+3*N+BDSPAC ) THEN +* +* WORK( IU ) is M by N +* + LDWRKU = M + NWORK = IU + LDWRKU*N + CALL DLASET( 'F', M, N, ZERO, ZERO, WORK( IU ), + $ LDWRKU ) + ELSE +* +* WORK( IU ) is N by N +* + LDWRKU = N + NWORK = IU + LDWRKU*N +* +* WORK(IR) is LDWRKR by N +* + IR = NWORK + LDWRKR = ( LWORK-N*N-3*N ) / N + END IF + NWORK = IU + LDWRKU*N +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in WORK(IU) and computing right +* singular vectors of bidiagonal matrix in VT +* (Workspace: need N+N*N+BDSPAC) +* + CALL DBDSDC( 'U', 'I', N, S, WORK( IE ), WORK( IU ), + $ LDWRKU, VT, LDVT, DUM, IDUM, WORK( NWORK ), + $ IWORK, INFO ) +* +* Overwrite VT by right singular vectors of A +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL DORMBR( 'P', 'R', 'T', N, N, N, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* + IF( LWORK.GE.M*N+3*N+BDSPAC ) THEN +* +* Overwrite WORK(IU) by left singular vectors of A +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL DORMBR( 'Q', 'L', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), WORK( IU ), LDWRKU, + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Copy left singular vectors of A from WORK(IU) to A +* + CALL DLACPY( 'F', M, N, WORK( IU ), LDWRKU, A, LDA ) + ELSE +* +* Generate Q in A +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Multiply Q in A by left singular vectors of +* bidiagonal matrix in WORK(IU), storing result in +* WORK(IR) and copying to A +* (Workspace: need 2*N*N, prefer N*N+M*N) +* + DO 20 I = 1, M, LDWRKR + CHUNK = MIN( M-I+1, LDWRKR ) + CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ), + $ LDA, WORK( IU ), LDWRKU, ZERO, + $ WORK( IR ), LDWRKR ) + CALL DLACPY( 'F', CHUNK, N, WORK( IR ), LDWRKR, + $ A( I, 1 ), LDA ) + 20 CONTINUE + END IF +* + ELSE IF( WNTQS ) THEN +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in U and computing right singular +* vectors of bidiagonal matrix in VT +* (Workspace: need N+BDSPAC) +* + CALL DLASET( 'F', M, N, ZERO, ZERO, U, LDU ) + CALL DBDSDC( 'U', 'I', N, S, WORK( IE ), U, LDU, VT, + $ LDVT, DUM, IDUM, WORK( NWORK ), IWORK, + $ INFO ) +* +* Overwrite U by left singular vectors of A and VT +* by right singular vectors of A +* (Workspace: need 3*N, prefer 2*N+N*NB) +* + CALL DORMBR( 'Q', 'L', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + CALL DORMBR( 'P', 'R', 'T', N, N, N, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + ELSE IF( WNTQA ) THEN +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in U and computing right singular +* vectors of bidiagonal matrix in VT +* (Workspace: need N+BDSPAC) +* + CALL DLASET( 'F', M, M, ZERO, ZERO, U, LDU ) + CALL DBDSDC( 'U', 'I', N, S, WORK( IE ), U, LDU, VT, + $ LDVT, DUM, IDUM, WORK( NWORK ), IWORK, + $ INFO ) +* +* Set the right corner of U to identity matrix +* + CALL DLASET( 'F', M-N, M-N, ZERO, ONE, U( N+1, N+1 ), + $ LDU ) +* +* Overwrite U by left singular vectors of A and VT +* by right singular vectors of A +* (Workspace: need N*N+2*N+M, prefer N*N+2*N+M*NB) +* + CALL DORMBR( 'Q', 'L', 'N', M, M, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + CALL DORMBR( 'P', 'R', 'T', N, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + END IF +* + END IF +* + ELSE +* +* A has more columns than rows. If A has sufficiently more +* columns than rows, first reduce using the LQ decomposition (if +* sufficient workspace available) +* + IF( N.GE.MNTHR ) THEN +* + IF( WNTQN ) THEN +* +* Path 1t (N much larger than M, JOBZ='N') +* No singular vectors to be computed +* + ITAU = 1 + NWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Zero out above L +* + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA ) + IE = 1 + ITAUQ = IE + M + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize L in A +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) + NWORK = IE + M +* +* Perform bidiagonal SVD, computing singular values only +* (Workspace: need M+BDSPAC) +* + CALL DBDSDC( 'U', 'N', M, S, WORK( IE ), DUM, 1, DUM, 1, + $ DUM, IDUM, WORK( NWORK ), IWORK, INFO ) +* + ELSE IF( WNTQO ) THEN +* +* Path 2t (N much larger than M, JOBZ='O') +* M right singular vectors to be overwritten on A and +* M left singular vectors to be computed in U +* + IVT = 1 +* +* IVT is M by M +* + IL = IVT + M*M + IF( LWORK.GE.M*N+M*M+3*M+BDSPAC ) THEN +* +* WORK(IL) is M by N +* + LDWRKL = M + CHUNK = N + ELSE + LDWRKL = M + CHUNK = ( LWORK-M*M ) / M + END IF + ITAU = IL + LDWRKL*M + NWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy L to WORK(IL), zeroing about above it +* + CALL DLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWRKL ) + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IL+LDWRKL ), LDWRKL ) +* +* Generate Q in A +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IL) +* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* + CALL DGEBRD( M, M, WORK( IL ), LDWRKL, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in U, and computing right singular +* vectors of bidiagonal matrix in WORK(IVT) +* (Workspace: need M+M*M+BDSPAC) +* + CALL DBDSDC( 'U', 'I', M, S, WORK( IE ), U, LDU, + $ WORK( IVT ), M, DUM, IDUM, WORK( NWORK ), + $ IWORK, INFO ) +* +* Overwrite U by left singular vectors of L and WORK(IVT) +* by right singular vectors of L +* (Workspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB) +* + CALL DORMBR( 'Q', 'L', 'N', M, M, M, WORK( IL ), LDWRKL, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + CALL DORMBR( 'P', 'R', 'T', M, M, M, WORK( IL ), LDWRKL, + $ WORK( ITAUP ), WORK( IVT ), M, + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Multiply right singular vectors of L in WORK(IVT) by Q +* in A, storing result in WORK(IL) and copying to A +* (Workspace: need 2*M*M, prefer M*M+M*N) +* + DO 30 I = 1, N, CHUNK + BLK = MIN( N-I+1, CHUNK ) + CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IVT ), M, + $ A( 1, I ), LDA, ZERO, WORK( IL ), LDWRKL ) + CALL DLACPY( 'F', M, BLK, WORK( IL ), LDWRKL, + $ A( 1, I ), LDA ) + 30 CONTINUE +* + ELSE IF( WNTQS ) THEN +* +* Path 3t (N much larger than M, JOBZ='S') +* M right singular vectors to be computed in VT and +* M left singular vectors to be computed in U +* + IL = 1 +* +* WORK(IL) is M by M +* + LDWRKL = M + ITAU = IL + LDWRKL*M + NWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy L to WORK(IL), zeroing out above it +* + CALL DLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWRKL ) + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IL+LDWRKL ), LDWRKL ) +* +* Generate Q in A +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to U +* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* + CALL DGEBRD( M, M, WORK( IL ), LDWRKL, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in U and computing right singular +* vectors of bidiagonal matrix in VT +* (Workspace: need M+BDSPAC) +* + CALL DBDSDC( 'U', 'I', M, S, WORK( IE ), U, LDU, VT, + $ LDVT, DUM, IDUM, WORK( NWORK ), IWORK, + $ INFO ) +* +* Overwrite U by left singular vectors of L and VT +* by right singular vectors of L +* (Workspace: need M*M+3*M, prefer M*M+2*M+M*NB) +* + CALL DORMBR( 'Q', 'L', 'N', M, M, M, WORK( IL ), LDWRKL, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + CALL DORMBR( 'P', 'R', 'T', M, M, M, WORK( IL ), LDWRKL, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Multiply right singular vectors of L in WORK(IL) by +* Q in A, storing result in VT +* (Workspace: need M*M) +* + CALL DLACPY( 'F', M, M, VT, LDVT, WORK( IL ), LDWRKL ) + CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IL ), LDWRKL, + $ A, LDA, ZERO, VT, LDVT ) +* + ELSE IF( WNTQA ) THEN +* +* Path 4t (N much larger than M, JOBZ='A') +* N right singular vectors to be computed in VT and +* M left singular vectors to be computed in U +* + IVT = 1 +* +* WORK(IVT) is M by M +* + LDWKVT = M + ITAU = IVT + LDWKVT*M + NWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Produce L in A, zeroing out other entries +* + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize L in A +* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* + CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in U and computing right singular +* vectors of bidiagonal matrix in WORK(IVT) +* (Workspace: need M+M*M+BDSPAC) +* + CALL DBDSDC( 'U', 'I', M, S, WORK( IE ), U, LDU, + $ WORK( IVT ), LDWKVT, DUM, IDUM, + $ WORK( NWORK ), IWORK, INFO ) +* +* Overwrite U by left singular vectors of L and WORK(IVT) +* by right singular vectors of L +* (Workspace: need M*M+3*M, prefer M*M+2*M+M*NB) +* + CALL DORMBR( 'Q', 'L', 'N', M, M, M, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + CALL DORMBR( 'P', 'R', 'T', M, M, M, A, LDA, + $ WORK( ITAUP ), WORK( IVT ), LDWKVT, + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Multiply right singular vectors of L in WORK(IVT) by +* Q in VT, storing result in A +* (Workspace: need M*M) +* + CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IVT ), LDWKVT, + $ VT, LDVT, ZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* + END IF +* + ELSE +* +* N .LT. MNTHR +* +* Path 5t (N greater than M, but not much larger) +* Reduce to bidiagonal form without LQ decomposition +* + IE = 1 + ITAUQ = IE + M + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize A +* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) +* + CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) + IF( WNTQN ) THEN +* +* Perform bidiagonal SVD, only computing singular values +* (Workspace: need M+BDSPAC) +* + CALL DBDSDC( 'L', 'N', M, S, WORK( IE ), DUM, 1, DUM, 1, + $ DUM, IDUM, WORK( NWORK ), IWORK, INFO ) + ELSE IF( WNTQO ) THEN + LDWKVT = M + IVT = NWORK + IF( LWORK.GE.M*N+3*M+BDSPAC ) THEN +* +* WORK( IVT ) is M by N +* + CALL DLASET( 'F', M, N, ZERO, ZERO, WORK( IVT ), + $ LDWKVT ) + NWORK = IVT + LDWKVT*N + ELSE +* +* WORK( IVT ) is M by M +* + NWORK = IVT + LDWKVT*M + IL = NWORK +* +* WORK(IL) is M by CHUNK +* + CHUNK = ( LWORK-M*M-3*M ) / M + END IF +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in U and computing right singular +* vectors of bidiagonal matrix in WORK(IVT) +* (Workspace: need M*M+BDSPAC) +* + CALL DBDSDC( 'L', 'I', M, S, WORK( IE ), U, LDU, + $ WORK( IVT ), LDWKVT, DUM, IDUM, + $ WORK( NWORK ), IWORK, INFO ) +* +* Overwrite U by left singular vectors of A +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL DORMBR( 'Q', 'L', 'N', M, M, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* + IF( LWORK.GE.M*N+3*M+BDSPAC ) THEN +* +* Overwrite WORK(IVT) by left singular vectors of A +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL DORMBR( 'P', 'R', 'T', M, N, M, A, LDA, + $ WORK( ITAUP ), WORK( IVT ), LDWKVT, + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Copy right singular vectors of A from WORK(IVT) to A +* + CALL DLACPY( 'F', M, N, WORK( IVT ), LDWKVT, A, LDA ) + ELSE +* +* Generate P**T in A +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Multiply Q in A by right singular vectors of +* bidiagonal matrix in WORK(IVT), storing result in +* WORK(IL) and copying to A +* (Workspace: need 2*M*M, prefer M*M+M*N) +* + DO 40 I = 1, N, CHUNK + BLK = MIN( N-I+1, CHUNK ) + CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IVT ), + $ LDWKVT, A( 1, I ), LDA, ZERO, + $ WORK( IL ), M ) + CALL DLACPY( 'F', M, BLK, WORK( IL ), M, A( 1, I ), + $ LDA ) + 40 CONTINUE + END IF + ELSE IF( WNTQS ) THEN +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in U and computing right singular +* vectors of bidiagonal matrix in VT +* (Workspace: need M+BDSPAC) +* + CALL DLASET( 'F', M, N, ZERO, ZERO, VT, LDVT ) + CALL DBDSDC( 'L', 'I', M, S, WORK( IE ), U, LDU, VT, + $ LDVT, DUM, IDUM, WORK( NWORK ), IWORK, + $ INFO ) +* +* Overwrite U by left singular vectors of A and VT +* by right singular vectors of A +* (Workspace: need 3*M, prefer 2*M+M*NB) +* + CALL DORMBR( 'Q', 'L', 'N', M, M, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + CALL DORMBR( 'P', 'R', 'T', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + ELSE IF( WNTQA ) THEN +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in U and computing right singular +* vectors of bidiagonal matrix in VT +* (Workspace: need M+BDSPAC) +* + CALL DLASET( 'F', N, N, ZERO, ZERO, VT, LDVT ) + CALL DBDSDC( 'L', 'I', M, S, WORK( IE ), U, LDU, VT, + $ LDVT, DUM, IDUM, WORK( NWORK ), IWORK, + $ INFO ) +* +* Set the right corner of VT to identity matrix +* + CALL DLASET( 'F', N-M, N-M, ZERO, ONE, VT( M+1, M+1 ), + $ LDVT ) +* +* Overwrite U by left singular vectors of A and VT +* by right singular vectors of A +* (Workspace: need 2*M+N, prefer 2*M+N*NB) +* + CALL DORMBR( 'Q', 'L', 'N', M, M, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + CALL DORMBR( 'P', 'R', 'T', N, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + END IF +* + END IF +* + END IF +* +* Undo scaling if necessary +* + IF( ISCL.EQ.1 ) THEN + IF( ANRM.GT.BIGNUM ) + $ CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, + $ IERR ) + IF( ANRM.LT.SMLNUM ) + $ CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, + $ IERR ) + END IF +* +* Return optimal workspace in WORK(1) +* + WORK( 1 ) = DBLE( MAXWRK ) +* + RETURN +* +* End of DGESDD +* + END diff --git a/costa/native/external/lapack/dgesv.f b/costa/native/external/lapack/dgesv.f new file mode 100644 index 000000000..3dbd27ff0 --- /dev/null +++ b/costa/native/external/lapack/dgesv.f @@ -0,0 +1,108 @@ + SUBROUTINE DGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* DGESV computes the solution to a real system of linear equations +* A * X = B, +* where A is an N-by-N matrix and X and B are N-by-NRHS matrices. +* +* The LU decomposition with partial pivoting and row interchanges is +* used to factor A as +* A = P * L * U, +* where P is a permutation matrix, L is unit lower triangular, and U is +* upper triangular. The factored form of A is then used to solve the +* system of equations A * X = B. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the N-by-N coefficient matrix A. +* On exit, the factors L and U from the factorization +* A = P*L*U; the unit diagonal elements of L are not stored. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (output) INTEGER array, dimension (N) +* The pivot indices that define the permutation matrix P; +* row i of the matrix was interchanged with row IPIV(i). +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS matrix of right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, U(i,i) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, so the solution could not be computed. +* +* ===================================================================== +* +* .. External Subroutines .. + EXTERNAL DGETRF, DGETRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGESV ', -INFO ) + RETURN + END IF +* +* Compute the LU factorization of A. +* + CALL DGETRF( N, N, A, LDA, IPIV, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL DGETRS( 'No transpose', N, NRHS, A, LDA, IPIV, B, LDB, + $ INFO ) + END IF + RETURN +* +* End of DGESV +* + END diff --git a/costa/native/external/lapack/dgesvd.f b/costa/native/external/lapack/dgesvd.f new file mode 100644 index 000000000..95b9b3e87 --- /dev/null +++ b/costa/native/external/lapack/dgesvd.f @@ -0,0 +1,3417 @@ + SUBROUTINE DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, + $ WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBU, JOBVT + INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), S( * ), U( LDU, * ), + $ VT( LDVT, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DGESVD computes the singular value decomposition (SVD) of a real +* M-by-N matrix A, optionally computing the left and/or right singular +* vectors. The SVD is written +* +* A = U * SIGMA * transpose(V) +* +* where SIGMA is an M-by-N matrix which is zero except for its +* min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and +* V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA +* are the singular values of A; they are real and non-negative, and +* are returned in descending order. The first min(m,n) columns of +* U and V are the left and right singular vectors of A. +* +* Note that the routine returns V**T, not V. +* +* Arguments +* ========= +* +* JOBU (input) CHARACTER*1 +* Specifies options for computing all or part of the matrix U: +* = 'A': all M columns of U are returned in array U: +* = 'S': the first min(m,n) columns of U (the left singular +* vectors) are returned in the array U; +* = 'O': the first min(m,n) columns of U (the left singular +* vectors) are overwritten on the array A; +* = 'N': no columns of U (no left singular vectors) are +* computed. +* +* JOBVT (input) CHARACTER*1 +* Specifies options for computing all or part of the matrix +* V**T: +* = 'A': all N rows of V**T are returned in the array VT; +* = 'S': the first min(m,n) rows of V**T (the right singular +* vectors) are returned in the array VT; +* = 'O': the first min(m,n) rows of V**T (the right singular +* vectors) are overwritten on the array A; +* = 'N': no rows of V**T (no right singular vectors) are +* computed. +* +* JOBVT and JOBU cannot both be 'O'. +* +* M (input) INTEGER +* The number of rows of the input matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the input matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, +* if JOBU = 'O', A is overwritten with the first min(m,n) +* columns of U (the left singular vectors, +* stored columnwise); +* if JOBVT = 'O', A is overwritten with the first min(m,n) +* rows of V**T (the right singular vectors, +* stored rowwise); +* if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A +* are destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* S (output) DOUBLE PRECISION array, dimension (min(M,N)) +* The singular values of A, sorted so that S(i) >= S(i+1). +* +* U (output) DOUBLE PRECISION array, dimension (LDU,UCOL) +* (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'. +* If JOBU = 'A', U contains the M-by-M orthogonal matrix U; +* if JOBU = 'S', U contains the first min(m,n) columns of U +* (the left singular vectors, stored columnwise); +* if JOBU = 'N' or 'O', U is not referenced. +* +* LDU (input) INTEGER +* The leading dimension of the array U. LDU >= 1; if +* JOBU = 'S' or 'A', LDU >= M. +* +* VT (output) DOUBLE PRECISION array, dimension (LDVT,N) +* If JOBVT = 'A', VT contains the N-by-N orthogonal matrix +* V**T; +* if JOBVT = 'S', VT contains the first min(m,n) rows of +* V**T (the right singular vectors, stored rowwise); +* if JOBVT = 'N' or 'O', VT is not referenced. +* +* LDVT (input) INTEGER +* The leading dimension of the array VT. LDVT >= 1; if +* JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N). +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK; +* if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged +* superdiagonal elements of an upper bidiagonal matrix B +* whose diagonal is in S (not necessarily sorted). B +* satisfies A = U * B * VT, so it has the same singular values +* as A, and singular vectors related by U and VT. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= 1. +* LWORK >= MAX(3*MIN(M,N)+MAX(M,N),5*MIN(M,N)). +* For good performance, LWORK should generally be larger. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if DBDSQR did not converge, INFO specifies how many +* superdiagonals of an intermediate bidiagonal form B +* did not converge to zero. See the description of WORK +* above for details. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS, + $ WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS + INTEGER BDSPAC, BLK, CHUNK, I, IE, IERR, IR, ISCL, + $ ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU, + $ MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU, + $ NRVT, WRKBL + DOUBLE PRECISION ANRM, BIGNUM, EPS, SMLNUM +* .. +* .. Local Arrays .. + DOUBLE PRECISION DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DBDSQR, DGEBRD, DGELQF, DGEMM, DGEQRF, DLACPY, + $ DLASCL, DLASET, DORGBR, DORGLQ, DORGQR, DORMBR, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, DLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + MINMN = MIN( M, N ) + MNTHR = ILAENV( 6, 'DGESVD', JOBU // JOBVT, M, N, 0, 0 ) + WNTUA = LSAME( JOBU, 'A' ) + WNTUS = LSAME( JOBU, 'S' ) + WNTUAS = WNTUA .OR. WNTUS + WNTUO = LSAME( JOBU, 'O' ) + WNTUN = LSAME( JOBU, 'N' ) + WNTVA = LSAME( JOBVT, 'A' ) + WNTVS = LSAME( JOBVT, 'S' ) + WNTVAS = WNTVA .OR. WNTVS + WNTVO = LSAME( JOBVT, 'O' ) + WNTVN = LSAME( JOBVT, 'N' ) + MINWRK = 1 + LQUERY = ( LWORK.EQ.-1 ) +* + IF( .NOT.( WNTUA .OR. WNTUS .OR. WNTUO .OR. WNTUN ) ) THEN + INFO = -1 + ELSE IF( .NOT.( WNTVA .OR. WNTVS .OR. WNTVO .OR. WNTVN ) .OR. + $ ( WNTVO .AND. WNTUO ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDU.LT.1 .OR. ( WNTUAS .AND. LDU.LT.M ) ) THEN + INFO = -9 + ELSE IF( LDVT.LT.1 .OR. ( WNTVA .AND. LDVT.LT.N ) .OR. + $ ( WNTVS .AND. LDVT.LT.MINMN ) ) THEN + INFO = -11 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) .AND. M.GT.0 .AND. + $ N.GT.0 ) THEN + IF( M.GE.N ) THEN +* +* Compute space needed for DBDSQR +* + BDSPAC = 5*N + IF( M.GE.MNTHR ) THEN + IF( WNTUN ) THEN +* +* Path 1 (M much larger than N, JOBU='N') +* + MAXWRK = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, + $ -1 ) + MAXWRK = MAX( MAXWRK, 3*N+2*N* + $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) ) + IF( WNTVO .OR. WNTVAS ) + $ MAXWRK = MAX( MAXWRK, 3*N+( N-1 )* + $ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) ) + MAXWRK = MAX( MAXWRK, BDSPAC ) + MINWRK = MAX( 4*N, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + ELSE IF( WNTUO .AND. WNTVN ) THEN +* +* Path 2 (M much larger than N, JOBU='O', JOBVT='N') +* + WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M, + $ N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+2*N* + $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N ) + MINWRK = MAX( 3*N+M, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + ELSE IF( WNTUO .AND. WNTVAS ) THEN +* +* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or +* 'A') +* + WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M, + $ N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+2*N* + $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+( N-1 )* + $ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N ) + MINWRK = MAX( 3*N+M, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + ELSE IF( WNTUS .AND. WNTVN ) THEN +* +* Path 4 (M much larger than N, JOBU='S', JOBVT='N') +* + WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M, + $ N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+2*N* + $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = N*N + WRKBL + MINWRK = MAX( 3*N+M, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + ELSE IF( WNTUS .AND. WNTVO ) THEN +* +* Path 5 (M much larger than N, JOBU='S', JOBVT='O') +* + WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M, + $ N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+2*N* + $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+( N-1 )* + $ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = 2*N*N + WRKBL + MINWRK = MAX( 3*N+M, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + ELSE IF( WNTUS .AND. WNTVAS ) THEN +* +* Path 6 (M much larger than N, JOBU='S', JOBVT='S' or +* 'A') +* + WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M, + $ N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+2*N* + $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+( N-1 )* + $ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = N*N + WRKBL + MINWRK = MAX( 3*N+M, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + ELSE IF( WNTUA .AND. WNTVN ) THEN +* +* Path 7 (M much larger than N, JOBU='A', JOBVT='N') +* + WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'DORGQR', ' ', M, + $ M, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+2*N* + $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = N*N + WRKBL + MINWRK = MAX( 3*N+M, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + ELSE IF( WNTUA .AND. WNTVO ) THEN +* +* Path 8 (M much larger than N, JOBU='A', JOBVT='O') +* + WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'DORGQR', ' ', M, + $ M, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+2*N* + $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+( N-1 )* + $ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = 2*N*N + WRKBL + MINWRK = MAX( 3*N+M, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + ELSE IF( WNTUA .AND. WNTVAS ) THEN +* +* Path 9 (M much larger than N, JOBU='A', JOBVT='S' or +* 'A') +* + WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'DORGQR', ' ', M, + $ M, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+2*N* + $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+( N-1 )* + $ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = N*N + WRKBL + MINWRK = MAX( 3*N+M, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + END IF + ELSE +* +* Path 10 (M at least N, but not much larger) +* + MAXWRK = 3*N + ( M+N )*ILAENV( 1, 'DGEBRD', ' ', M, N, + $ -1, -1 ) + IF( WNTUS .OR. WNTUO ) + $ MAXWRK = MAX( MAXWRK, 3*N+N* + $ ILAENV( 1, 'DORGBR', 'Q', M, N, N, -1 ) ) + IF( WNTUA ) + $ MAXWRK = MAX( MAXWRK, 3*N+M* + $ ILAENV( 1, 'DORGBR', 'Q', M, M, N, -1 ) ) + IF( .NOT.WNTVN ) + $ MAXWRK = MAX( MAXWRK, 3*N+( N-1 )* + $ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) ) + MAXWRK = MAX( MAXWRK, BDSPAC ) + MINWRK = MAX( 3*N+M, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + END IF + ELSE +* +* Compute space needed for DBDSQR +* + BDSPAC = 5*M + IF( N.GE.MNTHR ) THEN + IF( WNTVN ) THEN +* +* Path 1t(N much larger than M, JOBVT='N') +* + MAXWRK = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, + $ -1 ) + MAXWRK = MAX( MAXWRK, 3*M+2*M* + $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) ) + IF( WNTUO .OR. WNTUAS ) + $ MAXWRK = MAX( MAXWRK, 3*M+M* + $ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) ) + MAXWRK = MAX( MAXWRK, BDSPAC ) + MINWRK = MAX( 4*M, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + ELSE IF( WNTVO .AND. WNTUN ) THEN +* +* Path 2t(N much larger than M, JOBU='N', JOBVT='O') +* + WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+2*M* + $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+( M-1 )* + $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M ) + MINWRK = MAX( 3*M+N, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + ELSE IF( WNTVO .AND. WNTUAS ) THEN +* +* Path 3t(N much larger than M, JOBU='S' or 'A', +* JOBVT='O') +* + WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+2*M* + $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+( M-1 )* + $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+M* + $ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M ) + MINWRK = MAX( 3*M+N, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + ELSE IF( WNTVS .AND. WNTUN ) THEN +* +* Path 4t(N much larger than M, JOBU='N', JOBVT='S') +* + WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+2*M* + $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+( M-1 )* + $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = M*M + WRKBL + MINWRK = MAX( 3*M+N, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + ELSE IF( WNTVS .AND. WNTUO ) THEN +* +* Path 5t(N much larger than M, JOBU='O', JOBVT='S') +* + WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+2*M* + $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+( M-1 )* + $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+M* + $ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = 2*M*M + WRKBL + MINWRK = MAX( 3*M+N, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + ELSE IF( WNTVS .AND. WNTUAS ) THEN +* +* Path 6t(N much larger than M, JOBU='S' or 'A', +* JOBVT='S') +* + WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+2*M* + $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+( M-1 )* + $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+M* + $ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = M*M + WRKBL + MINWRK = MAX( 3*M+N, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + ELSE IF( WNTVA .AND. WNTUN ) THEN +* +* Path 7t(N much larger than M, JOBU='N', JOBVT='A') +* + WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'DORGLQ', ' ', N, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+2*M* + $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+( M-1 )* + $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = M*M + WRKBL + MINWRK = MAX( 3*M+N, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + ELSE IF( WNTVA .AND. WNTUO ) THEN +* +* Path 8t(N much larger than M, JOBU='O', JOBVT='A') +* + WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'DORGLQ', ' ', N, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+2*M* + $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+( M-1 )* + $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+M* + $ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = 2*M*M + WRKBL + MINWRK = MAX( 3*M+N, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + ELSE IF( WNTVA .AND. WNTUAS ) THEN +* +* Path 9t(N much larger than M, JOBU='S' or 'A', +* JOBVT='A') +* + WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'DORGLQ', ' ', N, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+2*M* + $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+( M-1 )* + $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+M* + $ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = M*M + WRKBL + MINWRK = MAX( 3*M+N, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + END IF + ELSE +* +* Path 10t(N greater than M, but not much larger) +* + MAXWRK = 3*M + ( M+N )*ILAENV( 1, 'DGEBRD', ' ', M, N, + $ -1, -1 ) + IF( WNTVS .OR. WNTVO ) + $ MAXWRK = MAX( MAXWRK, 3*M+M* + $ ILAENV( 1, 'DORGBR', 'P', M, N, M, -1 ) ) + IF( WNTVA ) + $ MAXWRK = MAX( MAXWRK, 3*M+N* + $ ILAENV( 1, 'DORGBR', 'P', N, N, M, -1 ) ) + IF( .NOT.WNTUN ) + $ MAXWRK = MAX( MAXWRK, 3*M+( M-1 )* + $ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) ) + MAXWRK = MAX( MAXWRK, BDSPAC ) + MINWRK = MAX( 3*M+N, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + END IF + END IF + WORK( 1 ) = MAXWRK + END IF +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGESVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + IF( LWORK.GE.1 ) + $ WORK( 1 ) = ONE + RETURN + END IF +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = SQRT( DLAMCH( 'S' ) ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = DLANGE( 'M', M, N, A, LDA, DUM ) + ISCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ISCL = 1 + CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR ) + ELSE IF( ANRM.GT.BIGNUM ) THEN + ISCL = 1 + CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR ) + END IF +* + IF( M.GE.N ) THEN +* +* A has at least as many rows as columns. If A has sufficiently +* more rows than columns, first reduce using the QR +* decomposition (if sufficient workspace available) +* + IF( M.GE.MNTHR ) THEN +* + IF( WNTUN ) THEN +* +* Path 1 (M much larger than N, JOBU='N') +* No left singular vectors to be computed +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Zero out below R +* + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA ) + IE = 1 + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in A +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + NCVT = 0 + IF( WNTVO .OR. WNTVAS ) THEN +* +* If right singular vectors desired, generate P'. +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + NCVT = N + END IF + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in A if desired +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', N, NCVT, 0, 0, S, WORK( IE ), A, LDA, + $ DUM, 1, DUM, 1, WORK( IWORK ), INFO ) +* +* If right singular vectors desired in VT, copy them there +* + IF( WNTVAS ) + $ CALL DLACPY( 'F', N, N, A, LDA, VT, LDVT ) +* + ELSE IF( WNTUO .AND. WNTVN ) THEN +* +* Path 2 (M much larger than N, JOBU='O', JOBVT='N') +* N left singular vectors to be overwritten on A and +* no right singular vectors to be computed +* + IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN +* +* WORK(IU) is LDA by N, WORK(IR) is LDA by N +* + LDWRKU = LDA + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN +* +* WORK(IU) is LDA by N, WORK(IR) is N by N +* + LDWRKU = LDA + LDWRKR = N + ELSE +* +* WORK(IU) is LDWRKU by N, WORK(IR) is N by N +* + LDWRKU = ( LWORK-N*N-N ) / N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IR) and zero out below it +* + CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR ) + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IR+1 ), + $ LDWRKR ) +* +* Generate Q in A +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +* + CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing R +* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) +* + CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) +* (Workspace: need N*N+BDSPAC) +* + CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, 1, + $ WORK( IR ), LDWRKR, DUM, 1, + $ WORK( IWORK ), INFO ) + IU = IE + N +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in WORK(IU) and copying to A +* (Workspace: need N*N+2*N, prefer N*N+M*N+N) +* + DO 10 I = 1, M, LDWRKU + CHUNK = MIN( M-I+1, LDWRKU ) + CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ), + $ LDA, WORK( IR ), LDWRKR, ZERO, + $ WORK( IU ), LDWRKU ) + CALL DLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU, + $ A( I, 1 ), LDA ) + 10 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + IE = 1 + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize A +* (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) +* + CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing A +* (Workspace: need 4*N, prefer 3*N+N*NB) +* + CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in A +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, 1, + $ A, LDA, DUM, 1, WORK( IWORK ), INFO ) +* + END IF +* + ELSE IF( WNTUO .AND. WNTVAS ) THEN +* +* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A') +* N left singular vectors to be overwritten on A and +* N right singular vectors to be computed in VT +* + IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by N +* + LDWRKU = LDA + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is N by N +* + LDWRKU = LDA + LDWRKR = N + ELSE +* +* WORK(IU) is LDWRKU by N and WORK(IR) is N by N +* + LDWRKU = ( LWORK-N*N-N ) / N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to VT, zeroing out below it +* + CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT ) + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, VT( 2, 1 ), + $ LDVT ) +* +* Generate Q in A +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT, copying result to WORK(IR) +* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +* + CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR ) +* +* Generate left vectors bidiagonalizing R in WORK(IR) +* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) +* + CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing R in VT +* (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB) +* + CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) and computing right +* singular vectors of R in VT +* (Workspace: need N*N+BDSPAC) +* + CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, LDVT, + $ WORK( IR ), LDWRKR, DUM, 1, + $ WORK( IWORK ), INFO ) + IU = IE + N +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in WORK(IU) and copying to A +* (Workspace: need N*N+2*N, prefer N*N+M*N+N) +* + DO 20 I = 1, M, LDWRKU + CHUNK = MIN( M-I+1, LDWRKU ) + CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ), + $ LDA, WORK( IR ), LDWRKR, ZERO, + $ WORK( IU ), LDWRKU ) + CALL DLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU, + $ A( I, 1 ), LDA ) + 20 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to VT, zeroing out below it +* + CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT ) + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, VT( 2, 1 ), + $ LDVT ) +* +* Generate Q in A +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in A by left vectors bidiagonalizing R +* (Workspace: need 3*N+M, prefer 3*N+M*NB) +* + CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT, + $ WORK( ITAUQ ), A, LDA, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing R in VT +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in A and computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, LDVT, + $ A, LDA, DUM, 1, WORK( IWORK ), INFO ) +* + END IF +* + ELSE IF( WNTUS ) THEN +* + IF( WNTVN ) THEN +* +* Path 4 (M much larger than N, JOBU='S', JOBVT='N') +* N left singular vectors to be computed in U and +* no right singular vectors to be computed +* + IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IR) is LDA by N +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is N by N +* + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IR), zeroing out below it +* + CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, + $ WORK( IR+1 ), LDWRKR ) +* +* Generate Q in A +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +* + CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing R in WORK(IR) +* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) +* + CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) +* (Workspace: need N*N+BDSPAC) +* + CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, + $ 1, WORK( IR ), LDWRKR, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in U +* (Workspace: need N*N) +* + CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA, + $ WORK( IR ), LDWRKR, ZERO, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), + $ LDA ) +* +* Bidiagonalize R in A +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left vectors bidiagonalizing R +* (Workspace: need 3*N+M, prefer 3*N+M*NB) +* + CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, + $ 1, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVO ) THEN +* +* Path 5 (M much larger than N, JOBU='S', JOBVT='O') +* N left singular vectors to be computed in U and +* N right singular vectors to be overwritten on A +* + IF( LWORK.GE.2*N*N+MAX( 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is N by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = N + ELSE +* +* WORK(IU) is N by N and WORK(IR) is N by N +* + LDWRKU = N + IR = IU + LDWRKU*N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, + $ WORK( IU+1 ), LDWRKU ) +* +* Generate Q in A +* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) +* + CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to +* WORK(IR) +* (Workspace: need 2*N*N+4*N, +* prefer 2*N*N+3*N+2*N*NB) +* + CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB) +* + CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in WORK(IR) +* (Workspace: need 2*N*N+4*N-1, +* prefer 2*N*N+3*N+(N-1)*NB) +* + CALL DORGBR( 'P', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in WORK(IR) +* (Workspace: need 2*N*N+BDSPAC) +* + CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), + $ WORK( IR ), LDWRKR, WORK( IU ), + $ LDWRKU, DUM, 1, WORK( IWORK ), INFO ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IU), storing result in U +* (Workspace: need N*N) +* + CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA, + $ WORK( IU ), LDWRKU, ZERO, U, LDU ) +* +* Copy right singular vectors of R to A +* (Workspace: need N*N) +* + CALL DLACPY( 'F', N, N, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), + $ LDA ) +* +* Bidiagonalize R in A +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left vectors bidiagonalizing R +* (Workspace: need 3*N+M, prefer 3*N+M*NB) +* + CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing R in A +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in A +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A, + $ LDA, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVAS ) THEN +* +* Path 6 (M much larger than N, JOBU='S', JOBVT='S' +* or 'A') +* N left singular vectors to be computed in U and +* N right singular vectors to be computed in VT +* + IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IU) is LDA by N +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is N by N +* + LDWRKU = N + END IF + ITAU = IU + LDWRKU*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, + $ WORK( IU+1 ), LDWRKU ) +* +* Generate Q in A +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to VT +* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +* + CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT, + $ LDVT ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) +* + CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (Workspace: need N*N+4*N-1, +* prefer N*N+3*N+(N-1)*NB) +* + CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in VT +* (Workspace: need N*N+BDSPAC) +* + CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, + $ LDVT, WORK( IU ), LDWRKU, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IU), storing result in U +* (Workspace: need N*N) +* + CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA, + $ WORK( IU ), LDWRKU, ZERO, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to VT, zeroing out below it +* + CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT ) + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, VT( 2, 1 ), + $ LDVT ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in VT +* (Workspace: need 3*N+M, prefer 3*N+M*NB) +* + CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, + $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + END IF +* + ELSE IF( WNTUA ) THEN +* + IF( WNTVN ) THEN +* +* Path 7 (M much larger than N, JOBU='A', JOBVT='N') +* M left singular vectors to be computed in U and +* no right singular vectors to be computed +* + IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IR) is LDA by N +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is N by N +* + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Copy R to WORK(IR), zeroing out below it +* + CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, + $ WORK( IR+1 ), LDWRKR ) +* +* Generate Q in U +* (Workspace: need N*N+N+M, prefer N*N+N+M*NB) +* + CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +* + CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in WORK(IR) +* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) +* + CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) +* (Workspace: need N*N+BDSPAC) +* + CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, + $ 1, WORK( IR ), LDWRKR, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IR), storing result in A +* (Workspace: need N*N) +* + CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU, + $ WORK( IR ), LDWRKR, ZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL DLACPY( 'F', M, N, A, LDA, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need N+M, prefer N+M*NB) +* + CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), + $ LDA ) +* +* Bidiagonalize R in A +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in A +* (Workspace: need 3*N+M, prefer 3*N+M*NB) +* + CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, + $ 1, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVO ) THEN +* +* Path 8 (M much larger than N, JOBU='A', JOBVT='O') +* M left singular vectors to be computed in U and +* N right singular vectors to be overwritten on A +* + IF( LWORK.GE.2*N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is N by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = N + ELSE +* +* WORK(IU) is N by N and WORK(IR) is N by N +* + LDWRKU = N + IR = IU + LDWRKU*N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB) +* + CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, + $ WORK( IU+1 ), LDWRKU ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to +* WORK(IR) +* (Workspace: need 2*N*N+4*N, +* prefer 2*N*N+3*N+2*N*NB) +* + CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB) +* + CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in WORK(IR) +* (Workspace: need 2*N*N+4*N-1, +* prefer 2*N*N+3*N+(N-1)*NB) +* + CALL DORGBR( 'P', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in WORK(IR) +* (Workspace: need 2*N*N+BDSPAC) +* + CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), + $ WORK( IR ), LDWRKR, WORK( IU ), + $ LDWRKU, DUM, 1, WORK( IWORK ), INFO ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IU), storing result in A +* (Workspace: need N*N) +* + CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU, + $ WORK( IU ), LDWRKU, ZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL DLACPY( 'F', M, N, A, LDA, U, LDU ) +* +* Copy right singular vectors of R from WORK(IR) to A +* + CALL DLACPY( 'F', N, N, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need N+M, prefer N+M*NB) +* + CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), + $ LDA ) +* +* Bidiagonalize R in A +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in A +* (Workspace: need 3*N+M, prefer 3*N+M*NB) +* + CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in A +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in A +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A, + $ LDA, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVAS ) THEN +* +* Path 9 (M much larger than N, JOBU='A', JOBVT='S' +* or 'A') +* M left singular vectors to be computed in U and +* N right singular vectors to be computed in VT +* + IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IU) is LDA by N +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is N by N +* + LDWRKU = N + END IF + ITAU = IU + LDWRKU*N + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need N*N+N+M, prefer N*N+N+M*NB) +* + CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, + $ WORK( IU+1 ), LDWRKU ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to VT +* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +* + CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT, + $ LDVT ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) +* + CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (Workspace: need N*N+4*N-1, +* prefer N*N+3*N+(N-1)*NB) +* + CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in VT +* (Workspace: need N*N+BDSPAC) +* + CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, + $ LDVT, WORK( IU ), LDWRKU, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IU), storing result in A +* (Workspace: need N*N) +* + CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU, + $ WORK( IU ), LDWRKU, ZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL DLACPY( 'F', M, N, A, LDA, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need N+M, prefer N+M*NB) +* + CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R from A to VT, zeroing out below it +* + CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT ) + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, VT( 2, 1 ), + $ LDVT ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in VT +* (Workspace: need 3*N+M, prefer 3*N+M*NB) +* + CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, + $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + END IF +* + END IF +* + ELSE +* +* M .LT. MNTHR +* +* Path 10 (M at least N, but not much larger) +* Reduce to bidiagonal form without QR decomposition +* + IE = 1 + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize A +* (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) +* + CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + IF( WNTUAS ) THEN +* +* If left singular vectors desired in U, copy result to U +* and generate left bidiagonalizing vectors in U +* (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB) +* + CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) + IF( WNTUS ) + $ NCU = N + IF( WNTUA ) + $ NCU = M + CALL DORGBR( 'Q', M, NCU, N, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVAS ) THEN +* +* If right singular vectors desired in VT, copy result to +* VT and generate right bidiagonalizing vectors in VT +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT ) + CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTUO ) THEN +* +* If left singular vectors desired in A, generate left +* bidiagonalizing vectors in A +* (Workspace: need 4*N, prefer 3*N+N*NB) +* + CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVO ) THEN +* +* If right singular vectors desired in A, generate right +* bidiagonalizing vectors in A +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IWORK = IE + N + IF( WNTUAS .OR. WNTUO ) + $ NRU = M + IF( WNTUN ) + $ NRU = 0 + IF( WNTVAS .OR. WNTVO ) + $ NCVT = N + IF( WNTVN ) + $ NCVT = 0 + IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in VT +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT, + $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO ) + ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in A +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), A, LDA, + $ U, LDU, DUM, 1, WORK( IWORK ), INFO ) + ELSE +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in A and computing right singular +* vectors in VT +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT, + $ LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO ) + END IF +* + END IF +* + ELSE +* +* A has more columns than rows. If A has sufficiently more +* columns than rows, first reduce using the LQ decomposition (if +* sufficient workspace available) +* + IF( N.GE.MNTHR ) THEN +* + IF( WNTVN ) THEN +* +* Path 1t(N much larger than M, JOBVT='N') +* No right singular vectors to be computed +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Zero out above L +* + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA ) + IE = 1 + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in A +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + IF( WNTUO .OR. WNTUAS ) THEN +* +* If left singular vectors desired, generate Q +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IWORK = IE + M + NRU = 0 + IF( WNTUO .OR. WNTUAS ) + $ NRU = M +* +* Perform bidiagonal QR iteration, computing left singular +* vectors of A in A if desired +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', M, 0, NRU, 0, S, WORK( IE ), DUM, 1, A, + $ LDA, DUM, 1, WORK( IWORK ), INFO ) +* +* If left singular vectors desired in U, copy them there +* + IF( WNTUAS ) + $ CALL DLACPY( 'F', M, M, A, LDA, U, LDU ) +* + ELSE IF( WNTVO .AND. WNTUN ) THEN +* +* Path 2t(N much larger than M, JOBU='N', JOBVT='O') +* M right singular vectors to be overwritten on A and +* no left singular vectors to be computed +* + IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is M by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = M + ELSE +* +* WORK(IU) is M by CHUNK and WORK(IR) is M by M +* + LDWRKU = M + CHUNK = ( LWORK-M*M-M ) / M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IR) and zero out above it +* + CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ), LDWRKR ) + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IR+LDWRKR ), LDWRKR ) +* +* Generate Q in A +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IR) +* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* + CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing L +* (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB) +* + CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of L in WORK(IR) +* (Workspace: need M*M+BDSPAC) +* + CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ), + $ WORK( IR ), LDWRKR, DUM, 1, DUM, 1, + $ WORK( IWORK ), INFO ) + IU = IE + M +* +* Multiply right singular vectors of L in WORK(IR) by Q +* in A, storing result in WORK(IU) and copying to A +* (Workspace: need M*M+2*M, prefer M*M+M*N+M) +* + DO 30 I = 1, N, CHUNK + BLK = MIN( N-I+1, CHUNK ) + CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ), + $ LDWRKR, A( 1, I ), LDA, ZERO, + $ WORK( IU ), LDWRKU ) + CALL DLACPY( 'F', M, BLK, WORK( IU ), LDWRKU, + $ A( 1, I ), LDA ) + 30 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + IE = 1 + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize A +* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) +* + CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing A +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in A +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'L', M, N, 0, 0, S, WORK( IE ), A, LDA, + $ DUM, 1, DUM, 1, WORK( IWORK ), INFO ) +* + END IF +* + ELSE IF( WNTVO .AND. WNTUAS ) THEN +* +* Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O') +* M right singular vectors to be overwritten on A and +* M left singular vectors to be computed in U +* + IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is M by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = M + ELSE +* +* WORK(IU) is M by CHUNK and WORK(IR) is M by M +* + LDWRKU = M + CHUNK = ( LWORK-M*M-M ) / M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing about above it +* + CALL DLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ), + $ LDU ) +* +* Generate Q in A +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U, copying result to WORK(IR) +* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* + CALL DGEBRD( M, M, U, LDU, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR ) +* +* Generate right vectors bidiagonalizing L in WORK(IR) +* (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB) +* + CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing L in U +* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) +* + CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in U, and computing right +* singular vectors of L in WORK(IR) +* (Workspace: need M*M+BDSPAC) +* + CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ), + $ WORK( IR ), LDWRKR, U, LDU, DUM, 1, + $ WORK( IWORK ), INFO ) + IU = IE + M +* +* Multiply right singular vectors of L in WORK(IR) by Q +* in A, storing result in WORK(IU) and copying to A +* (Workspace: need M*M+2*M, prefer M*M+M*N+M)) +* + DO 40 I = 1, N, CHUNK + BLK = MIN( N-I+1, CHUNK ) + CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ), + $ LDWRKR, A( 1, I ), LDA, ZERO, + $ WORK( IU ), LDWRKU ) + CALL DLACPY( 'F', M, BLK, WORK( IU ), LDWRKU, + $ A( 1, I ), LDA ) + 40 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing out above it +* + CALL DLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ), + $ LDU ) +* +* Generate Q in A +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL DGEBRD( M, M, U, LDU, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right vectors bidiagonalizing L by Q in A +* (Workspace: need 3*M+N, prefer 3*M+N*NB) +* + CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU, + $ WORK( ITAUP ), A, LDA, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing L in U +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in A +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), A, LDA, + $ U, LDU, DUM, 1, WORK( IWORK ), INFO ) +* + END IF +* + ELSE IF( WNTVS ) THEN +* + IF( WNTUN ) THEN +* +* Path 4t(N much larger than M, JOBU='N', JOBVT='S') +* M right singular vectors to be computed in VT and +* no left singular vectors to be computed +* + IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IR) is LDA by M +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is M by M +* + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IR), zeroing out above it +* + CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IR+LDWRKR ), LDWRKR ) +* +* Generate Q in A +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IR) +* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* + CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing L in +* WORK(IR) +* (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB) +* + CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of L in WORK(IR) +* (Workspace: need M*M+BDSPAC) +* + CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ), + $ WORK( IR ), LDWRKR, DUM, 1, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IR) by +* Q in A, storing result in VT +* (Workspace: need M*M) +* + CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ), + $ LDWRKR, A, LDA, ZERO, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy result to VT +* + CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), + $ LDA ) +* +* Bidiagonalize L in A +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right vectors bidiagonalizing L by Q in VT +* (Workspace: need 3*M+N, prefer 3*M+N*NB) +* + CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT, + $ LDVT, DUM, 1, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTUO ) THEN +* +* Path 5t(N much larger than M, JOBU='O', JOBVT='S') +* M right singular vectors to be computed in VT and +* M left singular vectors to be overwritten on A +* + IF( LWORK.GE.2*M*M+MAX( 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is LDA by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is M by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = M + ELSE +* +* WORK(IU) is M by M and WORK(IR) is M by M +* + LDWRKU = M + IR = IU + LDWRKU*M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out below it +* + CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) +* +* Generate Q in A +* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) +* + CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to +* WORK(IR) +* (Workspace: need 2*M*M+4*M, +* prefer 2*M*M+3*M+2*M*NB) +* + CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (Workspace: need 2*M*M+4*M-1, +* prefer 2*M*M+3*M+(M-1)*NB) +* + CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in WORK(IR) +* (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB) +* + CALL DORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in WORK(IR) and computing +* right singular vectors of L in WORK(IU) +* (Workspace: need 2*M*M+BDSPAC) +* + CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ), + $ WORK( IU ), LDWRKU, WORK( IR ), + $ LDWRKR, DUM, 1, WORK( IWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in A, storing result in VT +* (Workspace: need M*M) +* + CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ), + $ LDWRKU, A, LDA, ZERO, VT, LDVT ) +* +* Copy left singular vectors of L to A +* (Workspace: need M*M) +* + CALL DLACPY( 'F', M, M, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), + $ LDA ) +* +* Bidiagonalize L in A +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right vectors bidiagonalizing L by Q in VT +* (Workspace: need 3*M+N, prefer 3*M+N*NB) +* + CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors of L in A +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, compute left +* singular vectors of A in A and compute right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT, + $ LDVT, A, LDA, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTUAS ) THEN +* +* Path 6t(N much larger than M, JOBU='S' or 'A', +* JOBVT='S') +* M right singular vectors to be computed in VT and +* M left singular vectors to be computed in U +* + IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IU) is LDA by N +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is LDA by M +* + LDWRKU = M + END IF + ITAU = IU + LDWRKU*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out above it +* + CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) +* +* Generate Q in A +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to U +* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* + CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, U, + $ LDU ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (Workspace: need M*M+4*M-1, +* prefer M*M+3*M+(M-1)*NB) +* + CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) +* + CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in U and computing right +* singular vectors of L in WORK(IU) +* (Workspace: need M*M+BDSPAC) +* + CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ), + $ WORK( IU ), LDWRKU, U, LDU, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in A, storing result in VT +* (Workspace: need M*M) +* + CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ), + $ LDWRKU, A, LDA, ZERO, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing out above it +* + CALL DLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ), + $ LDU ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL DGEBRD( M, M, U, LDU, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in U by Q +* in VT +* (Workspace: need 3*M+N, prefer 3*M+N*NB) +* + CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT, + $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + END IF +* + ELSE IF( WNTVA ) THEN +* + IF( WNTUN ) THEN +* +* Path 7t(N much larger than M, JOBU='N', JOBVT='A') +* N right singular vectors to be computed in VT and +* no left singular vectors to be computed +* + IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IR) is LDA by M +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is M by M +* + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Copy L to WORK(IR), zeroing out above it +* + CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IR+LDWRKR ), LDWRKR ) +* +* Generate Q in VT +* (Workspace: need M*M+M+N, prefer M*M+M+N*NB) +* + CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IR) +* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* + CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in WORK(IR) +* (Workspace: need M*M+4*M-1, +* prefer M*M+3*M+(M-1)*NB) +* + CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of L in WORK(IR) +* (Workspace: need M*M+BDSPAC) +* + CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ), + $ WORK( IR ), LDWRKR, DUM, 1, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IR) by +* Q in VT, storing result in A +* (Workspace: need M*M) +* + CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ), + $ LDWRKR, VT, LDVT, ZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need M+N, prefer M+N*NB) +* + CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), + $ LDA ) +* +* Bidiagonalize L in A +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in A by Q +* in VT +* (Workspace: need 3*M+N, prefer 3*M+N*NB) +* + CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT, + $ LDVT, DUM, 1, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTUO ) THEN +* +* Path 8t(N much larger than M, JOBU='O', JOBVT='A') +* N right singular vectors to be computed in VT and +* M left singular vectors to be overwritten on A +* + IF( LWORK.GE.2*M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is LDA by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is M by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = M + ELSE +* +* WORK(IU) is M by M and WORK(IR) is M by M +* + LDWRKU = M + IR = IU + LDWRKU*M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB) +* + CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out above it +* + CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to +* WORK(IR) +* (Workspace: need 2*M*M+4*M, +* prefer 2*M*M+3*M+2*M*NB) +* + CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (Workspace: need 2*M*M+4*M-1, +* prefer 2*M*M+3*M+(M-1)*NB) +* + CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in WORK(IR) +* (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB) +* + CALL DORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in WORK(IR) and computing +* right singular vectors of L in WORK(IU) +* (Workspace: need 2*M*M+BDSPAC) +* + CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ), + $ WORK( IU ), LDWRKU, WORK( IR ), + $ LDWRKR, DUM, 1, WORK( IWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in VT, storing result in A +* (Workspace: need M*M) +* + CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ), + $ LDWRKU, VT, LDVT, ZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* +* Copy left singular vectors of A from WORK(IR) to A +* + CALL DLACPY( 'F', M, M, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need M+N, prefer M+N*NB) +* + CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), + $ LDA ) +* +* Bidiagonalize L in A +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in A by Q +* in VT +* (Workspace: need 3*M+N, prefer 3*M+N*NB) +* + CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in A +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in A and computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT, + $ LDVT, A, LDA, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTUAS ) THEN +* +* Path 9t(N much larger than M, JOBU='S' or 'A', +* JOBVT='A') +* N right singular vectors to be computed in VT and +* M left singular vectors to be computed in U +* + IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IU) is LDA by M +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is M by M +* + LDWRKU = M + END IF + ITAU = IU + LDWRKU*M + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need M*M+M+N, prefer M*M+M+N*NB) +* + CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out above it +* + CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to U +* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* + CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, U, + $ LDU ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB) +* + CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) +* + CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in U and computing right +* singular vectors of L in WORK(IU) +* (Workspace: need M*M+BDSPAC) +* + CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ), + $ WORK( IU ), LDWRKU, U, LDU, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in VT, storing result in A +* (Workspace: need M*M) +* + CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ), + $ LDWRKU, VT, LDVT, ZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need M+N, prefer M+N*NB) +* + CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing out above it +* + CALL DLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ), + $ LDU ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL DGEBRD( M, M, U, LDU, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in U by Q +* in VT +* (Workspace: need 3*M+N, prefer 3*M+N*NB) +* + CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT, + $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + END IF +* + END IF +* + ELSE +* +* N .LT. MNTHR +* +* Path 10t(N greater than M, but not much larger) +* Reduce to bidiagonal form without LQ decomposition +* + IE = 1 + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize A +* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) +* + CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + IF( WNTUAS ) THEN +* +* If left singular vectors desired in U, copy result to U +* and generate left bidiagonalizing vectors in U +* (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB) +* + CALL DLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL DORGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVAS ) THEN +* +* If right singular vectors desired in VT, copy result to +* VT and generate right bidiagonalizing vectors in VT +* (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB) +* + CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) + IF( WNTVA ) + $ NRVT = N + IF( WNTVS ) + $ NRVT = M + CALL DORGBR( 'P', NRVT, N, M, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTUO ) THEN +* +* If left singular vectors desired in A, generate left +* bidiagonalizing vectors in A +* (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB) +* + CALL DORGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVO ) THEN +* +* If right singular vectors desired in A, generate right +* bidiagonalizing vectors in A +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IWORK = IE + M + IF( WNTUAS .OR. WNTUO ) + $ NRU = M + IF( WNTUN ) + $ NRU = 0 + IF( WNTVAS .OR. WNTVO ) + $ NCVT = N + IF( WNTVN ) + $ NCVT = 0 + IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in VT +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT, + $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO ) + ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in A +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), A, LDA, + $ U, LDU, DUM, 1, WORK( IWORK ), INFO ) + ELSE +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in A and computing right singular +* vectors in VT +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT, + $ LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO ) + END IF +* + END IF +* + END IF +* +* If DBDSQR failed to converge, copy unconverged superdiagonals +* to WORK( 2:MINMN ) +* + IF( INFO.NE.0 ) THEN + IF( IE.GT.2 ) THEN + DO 50 I = 1, MINMN - 1 + WORK( I+1 ) = WORK( I+IE-1 ) + 50 CONTINUE + END IF + IF( IE.LT.2 ) THEN + DO 60 I = MINMN - 1, 1, -1 + WORK( I+1 ) = WORK( I+IE-1 ) + 60 CONTINUE + END IF + END IF +* +* Undo scaling if necessary +* + IF( ISCL.EQ.1 ) THEN + IF( ANRM.GT.BIGNUM ) + $ CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, + $ IERR ) + IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM ) + $ CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1, WORK( 2 ), + $ MINMN, IERR ) + IF( ANRM.LT.SMLNUM ) + $ CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, + $ IERR ) + IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM ) + $ CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1, WORK( 2 ), + $ MINMN, IERR ) + END IF +* +* Return optimal workspace in WORK(1) +* + WORK( 1 ) = MAXWRK +* + RETURN +* +* End of DGESVD +* + END diff --git a/costa/native/external/lapack/dgesvx.f b/costa/native/external/lapack/dgesvx.f new file mode 100644 index 000000000..0019e5bbd --- /dev/null +++ b/costa/native/external/lapack/dgesvx.f @@ -0,0 +1,482 @@ + SUBROUTINE DGESVX( FACT, TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, + $ EQUED, R, C, B, LDB, X, LDX, RCOND, FERR, BERR, + $ WORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, TRANS + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ BERR( * ), C( * ), FERR( * ), R( * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* DGESVX uses the LU factorization to compute the solution to a real +* system of linear equations +* A * X = B, +* where A is an N-by-N matrix and X and B are N-by-NRHS matrices. +* +* Error bounds on the solution and a condition estimate are also +* provided. +* +* Description +* =========== +* +* The following steps are performed: +* +* 1. If FACT = 'E', real scaling factors are computed to equilibrate +* the system: +* TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B +* TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B +* TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B +* Whether or not the system will be equilibrated depends on the +* scaling of the matrix A, but if equilibration is used, A is +* overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') +* or diag(C)*B (if TRANS = 'T' or 'C'). +* +* 2. If FACT = 'N' or 'E', the LU decomposition is used to factor the +* matrix A (after equilibration if FACT = 'E') as +* A = P * L * U, +* where P is a permutation matrix, L is a unit lower triangular +* matrix, and U is upper triangular. +* +* 3. If some U(i,i)=0, so that U is exactly singular, then the routine +* returns with INFO = i. Otherwise, the factored form of A is used +* to estimate the condition number of the matrix A. If the +* reciprocal of the condition number is less than machine precision, +* INFO = N+1 is returned as a warning, but the routine still goes on +* to solve for X and compute error bounds as described below. +* +* 4. The system of equations is solved for X using the factored form +* of A. +* +* 5. Iterative refinement is applied to improve the computed solution +* matrix and calculate error bounds and backward error estimates +* for it. +* +* 6. If equilibration was used, the matrix X is premultiplied by +* diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so +* that it solves the original system before equilibration. +* +* Arguments +* ========= +* +* FACT (input) CHARACTER*1 +* Specifies whether or not the factored form of the matrix A is +* supplied on entry, and if not, whether the matrix A should be +* equilibrated before it is factored. +* = 'F': On entry, AF and IPIV contain the factored form of A. +* If EQUED is not 'N', the matrix A has been +* equilibrated with scaling factors given by R and C. +* A, AF, and IPIV are not modified. +* = 'N': The matrix A will be copied to AF and factored. +* = 'E': The matrix A will be equilibrated if necessary, then +* copied to AF and factored. +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Transpose) +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the N-by-N matrix A. If FACT = 'F' and EQUED is +* not 'N', then A must have been equilibrated by the scaling +* factors in R and/or C. A is not modified if FACT = 'F' or +* 'N', or if FACT = 'E' and EQUED = 'N' on exit. +* +* On exit, if EQUED .ne. 'N', A is scaled as follows: +* EQUED = 'R': A := diag(R) * A +* EQUED = 'C': A := A * diag(C) +* EQUED = 'B': A := diag(R) * A * diag(C). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* AF (input or output) DOUBLE PRECISION array, dimension (LDAF,N) +* If FACT = 'F', then AF is an input argument and on entry +* contains the factors L and U from the factorization +* A = P*L*U as computed by DGETRF. If EQUED .ne. 'N', then +* AF is the factored form of the equilibrated matrix A. +* +* If FACT = 'N', then AF is an output argument and on exit +* returns the factors L and U from the factorization A = P*L*U +* of the original matrix A. +* +* If FACT = 'E', then AF is an output argument and on exit +* returns the factors L and U from the factorization A = P*L*U +* of the equilibrated matrix A (see the description of A for +* the form of the equilibrated matrix). +* +* LDAF (input) INTEGER +* The leading dimension of the array AF. LDAF >= max(1,N). +* +* IPIV (input or output) INTEGER array, dimension (N) +* If FACT = 'F', then IPIV is an input argument and on entry +* contains the pivot indices from the factorization A = P*L*U +* as computed by DGETRF; row i of the matrix was interchanged +* with row IPIV(i). +* +* If FACT = 'N', then IPIV is an output argument and on exit +* contains the pivot indices from the factorization A = P*L*U +* of the original matrix A. +* +* If FACT = 'E', then IPIV is an output argument and on exit +* contains the pivot indices from the factorization A = P*L*U +* of the equilibrated matrix A. +* +* EQUED (input or output) CHARACTER*1 +* Specifies the form of equilibration that was done. +* = 'N': No equilibration (always true if FACT = 'N'). +* = 'R': Row equilibration, i.e., A has been premultiplied by +* diag(R). +* = 'C': Column equilibration, i.e., A has been postmultiplied +* by diag(C). +* = 'B': Both row and column equilibration, i.e., A has been +* replaced by diag(R) * A * diag(C). +* EQUED is an input argument if FACT = 'F'; otherwise, it is an +* output argument. +* +* R (input or output) DOUBLE PRECISION array, dimension (N) +* The row scale factors for A. If EQUED = 'R' or 'B', A is +* multiplied on the left by diag(R); if EQUED = 'N' or 'C', R +* is not accessed. R is an input argument if FACT = 'F'; +* otherwise, R is an output argument. If FACT = 'F' and +* EQUED = 'R' or 'B', each element of R must be positive. +* +* C (input or output) DOUBLE PRECISION array, dimension (N) +* The column scale factors for A. If EQUED = 'C' or 'B', A is +* multiplied on the right by diag(C); if EQUED = 'N' or 'R', C +* is not accessed. C is an input argument if FACT = 'F'; +* otherwise, C is an output argument. If FACT = 'F' and +* EQUED = 'C' or 'B', each element of C must be positive. +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, +* if EQUED = 'N', B is not modified; +* if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by +* diag(R)*B; +* if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is +* overwritten by diag(C)*B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (output) DOUBLE PRECISION array, dimension (LDX,NRHS) +* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X +* to the original system of equations. Note that A and B are +* modified on exit if EQUED .ne. 'N', and the solution to the +* equilibrated system is inv(diag(C))*X if TRANS = 'N' and +* EQUED = 'C' or 'B', or inv(diag(R))*X if TRANS = 'T' or 'C' +* and EQUED = 'R' or 'B'. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* RCOND (output) DOUBLE PRECISION +* The estimate of the reciprocal condition number of the matrix +* A after equilibration (if done). If RCOND is less than the +* machine precision (in particular, if RCOND = 0), the matrix +* is singular to working precision. This condition is +* indicated by a return code of INFO > 0. +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (4*N) +* On exit, WORK(1) contains the reciprocal pivot growth +* factor norm(A)/norm(U). The "max absolute element" norm is +* used. If WORK(1) is much less than 1, then the stability +* of the LU factorization of the (equilibrated) matrix A +* could be poor. This also means that the solution X, condition +* estimator RCOND, and forward error bound FERR could be +* unreliable. If factorization fails with 0 0: if INFO = i, and i is +* <= N: U(i,i) is exactly zero. The factorization has +* been completed, but the factor U is exactly +* singular, so the solution and error bounds +* could not be computed. RCOND = 0 is returned. +* = N+1: U is nonsingular, but RCOND is less than machine +* precision, meaning that the matrix is singular +* to working precision. Nevertheless, the +* solution and error bounds are computed because +* there are a number of situations where the +* computed solution can be more accurate than the +* value of RCOND would suggest. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL COLEQU, EQUIL, NOFACT, NOTRAN, ROWEQU + CHARACTER NORM + INTEGER I, INFEQU, J + DOUBLE PRECISION AMAX, ANORM, BIGNUM, COLCND, RCMAX, RCMIN, + $ ROWCND, RPVGRW, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANGE, DLANTR + EXTERNAL LSAME, DLAMCH, DLANGE, DLANTR +* .. +* .. External Subroutines .. + EXTERNAL DGECON, DGEEQU, DGERFS, DGETRF, DGETRS, DLACPY, + $ DLAQGE, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + NOTRAN = LSAME( TRANS, 'N' ) + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + ROWEQU = .FALSE. + COLEQU = .FALSE. + ELSE + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + SMLNUM = DLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + END IF +* +* Test the input parameters. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) ) + $ THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( ROWEQU .OR. COLEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -10 + ELSE + IF( ROWEQU ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 10 J = 1, N + RCMIN = MIN( RCMIN, R( J ) ) + RCMAX = MAX( RCMAX, R( J ) ) + 10 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -11 + ELSE IF( N.GT.0 ) THEN + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + ROWCND = ONE + END IF + END IF + IF( COLEQU .AND. INFO.EQ.0 ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 20 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 20 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -12 + ELSE IF( N.GT.0 ) THEN + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + COLCND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -16 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGESVX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL DGEEQU( N, N, A, LDA, R, C, ROWCND, COLCND, AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL DLAQGE( N, N, A, LDA, R, C, ROWCND, COLCND, AMAX, + $ EQUED ) + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + END IF + END IF +* +* Scale the right hand side. +* + IF( NOTRAN ) THEN + IF( ROWEQU ) THEN + DO 40 J = 1, NRHS + DO 30 I = 1, N + B( I, J ) = R( I )*B( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( COLEQU ) THEN + DO 60 J = 1, NRHS + DO 50 I = 1, N + B( I, J ) = C( I )*B( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the LU factorization of A. +* + CALL DLACPY( 'Full', N, N, A, LDA, AF, LDAF ) + CALL DGETRF( N, N, AF, LDAF, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN + IF( INFO.GT.0 ) THEN +* +* Compute the reciprocal pivot growth factor of the +* leading rank-deficient INFO columns of A. +* + RPVGRW = DLANTR( 'M', 'U', 'N', INFO, INFO, AF, LDAF, + $ WORK ) + IF( RPVGRW.EQ.ZERO ) THEN + RPVGRW = ONE + ELSE + RPVGRW = DLANGE( 'M', N, INFO, A, LDA, WORK ) / RPVGRW + END IF + WORK( 1 ) = RPVGRW + RCOND = ZERO + END IF + RETURN + END IF + END IF +* +* Compute the norm of the matrix A and the +* reciprocal pivot growth factor RPVGRW. +* + IF( NOTRAN ) THEN + NORM = '1' + ELSE + NORM = 'I' + END IF + ANORM = DLANGE( NORM, N, N, A, LDA, WORK ) + RPVGRW = DLANTR( 'M', 'U', 'N', N, N, AF, LDAF, WORK ) + IF( RPVGRW.EQ.ZERO ) THEN + RPVGRW = ONE + ELSE + RPVGRW = DLANGE( 'M', N, N, A, LDA, WORK ) / RPVGRW + END IF +* +* Compute the reciprocal of the condition number of A. +* + CALL DGECON( NORM, N, AF, LDAF, ANORM, RCOND, WORK, IWORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.DLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* +* Compute the solution matrix X. +* + CALL DLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL DGETRS( TRANS, N, NRHS, AF, LDAF, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL DGERFS( TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, X, + $ LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* Transform the solution matrix X to a solution of the original +* system. +* + IF( NOTRAN ) THEN + IF( COLEQU ) THEN + DO 80 J = 1, NRHS + DO 70 I = 1, N + X( I, J ) = C( I )*X( I, J ) + 70 CONTINUE + 80 CONTINUE + DO 90 J = 1, NRHS + FERR( J ) = FERR( J ) / COLCND + 90 CONTINUE + END IF + ELSE IF( ROWEQU ) THEN + DO 110 J = 1, NRHS + DO 100 I = 1, N + X( I, J ) = R( I )*X( I, J ) + 100 CONTINUE + 110 CONTINUE + DO 120 J = 1, NRHS + FERR( J ) = FERR( J ) / ROWCND + 120 CONTINUE + END IF +* + WORK( 1 ) = RPVGRW + RETURN +* +* End of DGESVX +* + END diff --git a/costa/native/external/lapack/dgetc2.f b/costa/native/external/lapack/dgetc2.f new file mode 100644 index 000000000..d7ac5868a --- /dev/null +++ b/costa/native/external/lapack/dgetc2.f @@ -0,0 +1,147 @@ + SUBROUTINE DGETC2( N, A, LDA, IPIV, JPIV, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), JPIV( * ) + DOUBLE PRECISION A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* DGETC2 computes an LU factorization with complete pivoting of the +* n-by-n matrix A. The factorization has the form A = P * L * U * Q, +* where P and Q are permutation matrices, L is lower triangular with +* unit diagonal elements and U is upper triangular. +* +* This is the Level 2 BLAS algorithm. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) +* On entry, the n-by-n matrix A to be factored. +* On exit, the factors L and U from the factorization +* A = P*L*U*Q; the unit diagonal elements of L are not stored. +* If U(k, k) appears to be less than SMIN, U(k, k) is given the +* value of SMIN, i.e., giving a nonsingular perturbed system. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (output) INTEGER array, dimension(N). +* The pivot indices; for 1 <= i <= N, row i of the +* matrix has been interchanged with row IPIV(i). +* +* JPIV (output) INTEGER array, dimension(N). +* The pivot indices; for 1 <= j <= N, column j of the +* matrix has been interchanged with column JPIV(j). +* +* INFO (output) INTEGER +* = 0: successful exit +* > 0: if INFO = k, U(k, k) is likely to produce owerflow if +* we try to solve for x in Ax = b. So U is perturbed to +* avoid the overflow. +* +* Further Details +* =============== +* +* Based on contributions by +* Bo Kagstrom and Peter Poromaa, Department of Computing Science, +* Umea University, S-901 87 Umea, Sweden. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IP, IPV, J, JP, JPV + DOUBLE PRECISION BIGNUM, EPS, SMIN, SMLNUM, XMAX +* .. +* .. External Subroutines .. + EXTERNAL DGER, DSWAP +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Set constants to control overflow +* + INFO = 0 + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) / EPS + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Factorize A using complete pivoting. +* Set pivots less than SMIN to SMIN. +* + DO 40 I = 1, N - 1 +* +* Find max element in matrix A +* + XMAX = ZERO + DO 20 IP = I, N + DO 10 JP = I, N + IF( ABS( A( IP, JP ) ).GE.XMAX ) THEN + XMAX = ABS( A( IP, JP ) ) + IPV = IP + JPV = JP + END IF + 10 CONTINUE + 20 CONTINUE + IF( I.EQ.1 ) + $ SMIN = MAX( EPS*XMAX, SMLNUM ) +* +* Swap rows +* + IF( IPV.NE.I ) + $ CALL DSWAP( N, A( IPV, 1 ), LDA, A( I, 1 ), LDA ) + IPIV( I ) = IPV +* +* Swap columns +* + IF( JPV.NE.I ) + $ CALL DSWAP( N, A( 1, JPV ), 1, A( 1, I ), 1 ) + JPIV( I ) = JPV +* +* Check for singularity +* + IF( ABS( A( I, I ) ).LT.SMIN ) THEN + INFO = I + A( I, I ) = SMIN + END IF + DO 30 J = I + 1, N + A( J, I ) = A( J, I ) / A( I, I ) + 30 CONTINUE + CALL DGER( N-I, N-I, -ONE, A( I+1, I ), 1, A( I, I+1 ), LDA, + $ A( I+1, I+1 ), LDA ) + 40 CONTINUE +* + IF( ABS( A( N, N ) ).LT.SMIN ) THEN + INFO = N + A( N, N ) = SMIN + END IF +* + RETURN +* +* End of DGETC2 +* + END diff --git a/costa/native/external/lapack/dgetf2.f b/costa/native/external/lapack/dgetf2.f new file mode 100644 index 000000000..e7d9b03de --- /dev/null +++ b/costa/native/external/lapack/dgetf2.f @@ -0,0 +1,135 @@ + SUBROUTINE DGETF2( M, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1992 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* DGETF2 computes an LU factorization of a general m-by-n matrix A +* using partial pivoting with row interchanges. +* +* The factorization has the form +* A = P * L * U +* where P is a permutation matrix, L is lower triangular with unit +* diagonal elements (lower trapezoidal if m > n), and U is upper +* triangular (upper trapezoidal if m < n). +* +* This is the right-looking Level 2 BLAS version of the algorithm. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the m by n matrix to be factored. +* On exit, the factors L and U from the factorization +* A = P*L*U; the unit diagonal elements of L are not stored. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* IPIV (output) INTEGER array, dimension (min(M,N)) +* The pivot indices; for 1 <= i <= min(M,N), row i of the +* matrix was interchanged with row IPIV(i). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* > 0: if INFO = k, U(k,k) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, and division by zero will occur if it is used +* to solve a system of equations. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER J, JP +* .. +* .. External Functions .. + INTEGER IDAMAX + EXTERNAL IDAMAX +* .. +* .. External Subroutines .. + EXTERNAL DGER, DSCAL, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGETF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* + DO 10 J = 1, MIN( M, N ) +* +* Find pivot and test for singularity. +* + JP = J - 1 + IDAMAX( M-J+1, A( J, J ), 1 ) + IPIV( J ) = JP + IF( A( JP, J ).NE.ZERO ) THEN +* +* Apply the interchange to columns 1:N. +* + IF( JP.NE.J ) + $ CALL DSWAP( N, A( J, 1 ), LDA, A( JP, 1 ), LDA ) +* +* Compute elements J+1:M of J-th column. +* + IF( J.LT.M ) + $ CALL DSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 ) +* + ELSE IF( INFO.EQ.0 ) THEN +* + INFO = J + END IF +* + IF( J.LT.MIN( M, N ) ) THEN +* +* Update trailing submatrix. +* + CALL DGER( M-J, N-J, -ONE, A( J+1, J ), 1, A( J, J+1 ), LDA, + $ A( J+1, J+1 ), LDA ) + END IF + 10 CONTINUE + RETURN +* +* End of DGETF2 +* + END diff --git a/costa/native/external/lapack/dgetrf.f b/costa/native/external/lapack/dgetrf.f new file mode 100644 index 000000000..2e0a4c4e7 --- /dev/null +++ b/costa/native/external/lapack/dgetrf.f @@ -0,0 +1,160 @@ + SUBROUTINE DGETRF( M, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* DGETRF computes an LU factorization of a general M-by-N matrix A +* using partial pivoting with row interchanges. +* +* The factorization has the form +* A = P * L * U +* where P is a permutation matrix, L is lower triangular with unit +* diagonal elements (lower trapezoidal if m > n), and U is upper +* triangular (upper trapezoidal if m < n). +* +* This is the right-looking Level 3 BLAS version of the algorithm. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the M-by-N matrix to be factored. +* On exit, the factors L and U from the factorization +* A = P*L*U; the unit diagonal elements of L are not stored. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* IPIV (output) INTEGER array, dimension (min(M,N)) +* The pivot indices; for 1 <= i <= min(M,N), row i of the +* matrix was interchanged with row IPIV(i). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, U(i,i) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, and division by zero will occur if it is used +* to solve a system of equations. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IINFO, J, JB, NB +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DGETF2, DLASWP, DTRSM, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGETRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'DGETRF', ' ', M, N, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN +* +* Use unblocked code. +* + CALL DGETF2( M, N, A, LDA, IPIV, INFO ) + ELSE +* +* Use blocked code. +* + DO 20 J = 1, MIN( M, N ), NB + JB = MIN( MIN( M, N )-J+1, NB ) +* +* Factor diagonal and subdiagonal blocks and test for exact +* singularity. +* + CALL DGETF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO ) +* +* Adjust INFO and the pivot indices. +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + J - 1 + DO 10 I = J, MIN( M, J+JB-1 ) + IPIV( I ) = J - 1 + IPIV( I ) + 10 CONTINUE +* +* Apply interchanges to columns 1:J-1. +* + CALL DLASWP( J-1, A, LDA, J, J+JB-1, IPIV, 1 ) +* + IF( J+JB.LE.N ) THEN +* +* Apply interchanges to columns J+JB:N. +* + CALL DLASWP( N-J-JB+1, A( 1, J+JB ), LDA, J, J+JB-1, + $ IPIV, 1 ) +* +* Compute block row of U. +* + CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', JB, + $ N-J-JB+1, ONE, A( J, J ), LDA, A( J, J+JB ), + $ LDA ) + IF( J+JB.LE.M ) THEN +* +* Update trailing submatrix. +* + CALL DGEMM( 'No transpose', 'No transpose', M-J-JB+1, + $ N-J-JB+1, JB, -ONE, A( J+JB, J ), LDA, + $ A( J, J+JB ), LDA, ONE, A( J+JB, J+JB ), + $ LDA ) + END IF + END IF + 20 CONTINUE + END IF + RETURN +* +* End of DGETRF +* + END diff --git a/costa/native/external/lapack/dgetri.f b/costa/native/external/lapack/dgetri.f new file mode 100644 index 000000000..c67a34803 --- /dev/null +++ b/costa/native/external/lapack/dgetri.f @@ -0,0 +1,193 @@ + SUBROUTINE DGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DGETRI computes the inverse of a matrix using the LU factorization +* computed by DGETRF. +* +* This method inverts U and then computes inv(A) by solving the system +* inv(A)*L = inv(U) for inv(A). +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the factors L and U from the factorization +* A = P*L*U as computed by DGETRF. +* On exit, if INFO = 0, the inverse of the original matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices from DGETRF; for 1<=i<=N, row i of the +* matrix was interchanged with row IPIV(i). +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO=0, then WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N). +* For optimal performance LWORK >= N*NB, where NB is +* the optimal blocksize returned by ILAENV. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, U(i,i) is exactly zero; the matrix is +* singular and its inverse could not be computed. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IWS, J, JB, JJ, JP, LDWORK, LWKOPT, NB, + $ NBMIN, NN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DGEMV, DSWAP, DTRSM, DTRTRI, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NB = ILAENV( 1, 'DGETRI', ' ', N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -3 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGETRI', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form inv(U). If INFO > 0 from DTRTRI, then U is singular, +* and the inverse is not computed. +* + CALL DTRTRI( 'Upper', 'Non-unit', N, A, LDA, INFO ) + IF( INFO.GT.0 ) + $ RETURN +* + NBMIN = 2 + LDWORK = N + IF( NB.GT.1 .AND. NB.LT.N ) THEN + IWS = MAX( LDWORK*NB, 1 ) + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DGETRI', ' ', N, -1, -1, -1 ) ) + END IF + ELSE + IWS = N + END IF +* +* Solve the equation inv(A)*L = inv(U) for inv(A). +* + IF( NB.LT.NBMIN .OR. NB.GE.N ) THEN +* +* Use unblocked code. +* + DO 20 J = N, 1, -1 +* +* Copy current column of L to WORK and replace with zeros. +* + DO 10 I = J + 1, N + WORK( I ) = A( I, J ) + A( I, J ) = ZERO + 10 CONTINUE +* +* Compute current column of inv(A). +* + IF( J.LT.N ) + $ CALL DGEMV( 'No transpose', N, N-J, -ONE, A( 1, J+1 ), + $ LDA, WORK( J+1 ), 1, ONE, A( 1, J ), 1 ) + 20 CONTINUE + ELSE +* +* Use blocked code. +* + NN = ( ( N-1 ) / NB )*NB + 1 + DO 50 J = NN, 1, -NB + JB = MIN( NB, N-J+1 ) +* +* Copy current block column of L to WORK and replace with +* zeros. +* + DO 40 JJ = J, J + JB - 1 + DO 30 I = JJ + 1, N + WORK( I+( JJ-J )*LDWORK ) = A( I, JJ ) + A( I, JJ ) = ZERO + 30 CONTINUE + 40 CONTINUE +* +* Compute current block column of inv(A). +* + IF( J+JB.LE.N ) + $ CALL DGEMM( 'No transpose', 'No transpose', N, JB, + $ N-J-JB+1, -ONE, A( 1, J+JB ), LDA, + $ WORK( J+JB ), LDWORK, ONE, A( 1, J ), LDA ) + CALL DTRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB, + $ ONE, WORK( J ), LDWORK, A( 1, J ), LDA ) + 50 CONTINUE + END IF +* +* Apply column interchanges. +* + DO 60 J = N - 1, 1, -1 + JP = IPIV( J ) + IF( JP.NE.J ) + $ CALL DSWAP( N, A( 1, J ), 1, A( 1, JP ), 1 ) + 60 CONTINUE +* + WORK( 1 ) = IWS + RETURN +* +* End of DGETRI +* + END diff --git a/costa/native/external/lapack/dgetrs.f b/costa/native/external/lapack/dgetrs.f new file mode 100644 index 000000000..652257634 --- /dev/null +++ b/costa/native/external/lapack/dgetrs.f @@ -0,0 +1,150 @@ + SUBROUTINE DGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* DGETRS solves a system of linear equations +* A * X = B or A' * X = B +* with a general N-by-N matrix A using the LU factorization computed +* by DGETRF. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A'* X = B (Transpose) +* = 'C': A'* X = B (Conjugate transpose = Transpose) +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* A (input) DOUBLE PRECISION array, dimension (LDA,N) +* The factors L and U from the factorization A = P*L*U +* as computed by DGETRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices from DGETRF; for 1<=i<=N, row i of the +* matrix was interchanged with row IPIV(i). +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLASWP, DTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGETRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( NOTRAN ) THEN +* +* Solve A * X = B. +* +* Apply row interchanges to the right hand sides. +* + CALL DLASWP( NRHS, B, LDB, 1, N, IPIV, 1 ) +* +* Solve L*X = B, overwriting B with X. +* + CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', N, NRHS, + $ ONE, A, LDA, B, LDB ) +* +* Solve U*X = B, overwriting B with X. +* + CALL DTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N, + $ NRHS, ONE, A, LDA, B, LDB ) + ELSE +* +* Solve A' * X = B. +* +* Solve U'*X = B, overwriting B with X. +* + CALL DTRSM( 'Left', 'Upper', 'Transpose', 'Non-unit', N, NRHS, + $ ONE, A, LDA, B, LDB ) +* +* Solve L'*X = B, overwriting B with X. +* + CALL DTRSM( 'Left', 'Lower', 'Transpose', 'Unit', N, NRHS, ONE, + $ A, LDA, B, LDB ) +* +* Apply row interchanges to the solution vectors. +* + CALL DLASWP( NRHS, B, LDB, 1, N, IPIV, -1 ) + END IF +* + RETURN +* +* End of DGETRS +* + END diff --git a/costa/native/external/lapack/dggbak.f b/costa/native/external/lapack/dggbak.f new file mode 100644 index 000000000..75ffd51e3 --- /dev/null +++ b/costa/native/external/lapack/dggbak.f @@ -0,0 +1,216 @@ + SUBROUTINE DGGBAK( JOB, SIDE, N, ILO, IHI, LSCALE, RSCALE, M, V, + $ LDV, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER JOB, SIDE + INTEGER IHI, ILO, INFO, LDV, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION LSCALE( * ), RSCALE( * ), V( LDV, * ) +* .. +* +* Purpose +* ======= +* +* DGGBAK forms the right or left eigenvectors of a real generalized +* eigenvalue problem A*x = lambda*B*x, by backward transformation on +* the computed eigenvectors of the balanced pair of matrices output by +* DGGBAL. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* Specifies the type of backward transformation required: +* = 'N': do nothing, return immediately; +* = 'P': do backward transformation for permutation only; +* = 'S': do backward transformation for scaling only; +* = 'B': do backward transformations for both permutation and +* scaling. +* JOB must be the same as the argument JOB supplied to DGGBAL. +* +* SIDE (input) CHARACTER*1 +* = 'R': V contains right eigenvectors; +* = 'L': V contains left eigenvectors. +* +* N (input) INTEGER +* The number of rows of the matrix V. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* The integers ILO and IHI determined by DGGBAL. +* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +* +* LSCALE (input) DOUBLE PRECISION array, dimension (N) +* Details of the permutations and/or scaling factors applied +* to the left side of A and B, as returned by DGGBAL. +* +* RSCALE (input) DOUBLE PRECISION array, dimension (N) +* Details of the permutations and/or scaling factors applied +* to the right side of A and B, as returned by DGGBAL. +* +* M (input) INTEGER +* The number of columns of the matrix V. M >= 0. +* +* V (input/output) DOUBLE PRECISION array, dimension (LDV,M) +* On entry, the matrix of right or left eigenvectors to be +* transformed, as returned by DTGEVC. +* On exit, V is overwritten by the transformed eigenvectors. +* +* LDV (input) INTEGER +* The leading dimension of the matrix V. LDV >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* See R.C. Ward, Balancing the generalized eigenvalue problem, +* SIAM J. Sci. Stat. Comp. 2 (1981), 141-152. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LEFTV, RIGHTV + INTEGER I, K +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + RIGHTV = LSAME( SIDE, 'R' ) + LEFTV = LSAME( SIDE, 'L' ) +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND. + $ .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN + INFO = -1 + ELSE IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 ) THEN + INFO = -4 + ELSE IF( IHI.LT.ILO .OR. IHI.GT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( M.LT.0 ) THEN + INFO = -6 + ELSE IF( LDV.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGGBAK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + IF( M.EQ.0 ) + $ RETURN + IF( LSAME( JOB, 'N' ) ) + $ RETURN +* + IF( ILO.EQ.IHI ) + $ GO TO 30 +* +* Backward balance +* + IF( LSAME( JOB, 'S' ) .OR. LSAME( JOB, 'B' ) ) THEN +* +* Backward transformation on right eigenvectors +* + IF( RIGHTV ) THEN + DO 10 I = ILO, IHI + CALL DSCAL( M, RSCALE( I ), V( I, 1 ), LDV ) + 10 CONTINUE + END IF +* +* Backward transformation on left eigenvectors +* + IF( LEFTV ) THEN + DO 20 I = ILO, IHI + CALL DSCAL( M, LSCALE( I ), V( I, 1 ), LDV ) + 20 CONTINUE + END IF + END IF +* +* Backward permutation +* + 30 CONTINUE + IF( LSAME( JOB, 'P' ) .OR. LSAME( JOB, 'B' ) ) THEN +* +* Backward permutation on right eigenvectors +* + IF( RIGHTV ) THEN + IF( ILO.EQ.1 ) + $ GO TO 50 +* + DO 40 I = ILO - 1, 1, -1 + K = RSCALE( I ) + IF( K.EQ.I ) + $ GO TO 40 + CALL DSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 40 CONTINUE +* + 50 CONTINUE + IF( IHI.EQ.N ) + $ GO TO 70 + DO 60 I = IHI + 1, N + K = RSCALE( I ) + IF( K.EQ.I ) + $ GO TO 60 + CALL DSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 60 CONTINUE + END IF +* +* Backward permutation on left eigenvectors +* + 70 CONTINUE + IF( LEFTV ) THEN + IF( ILO.EQ.1 ) + $ GO TO 90 + DO 80 I = ILO - 1, 1, -1 + K = LSCALE( I ) + IF( K.EQ.I ) + $ GO TO 80 + CALL DSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 80 CONTINUE +* + 90 CONTINUE + IF( IHI.EQ.N ) + $ GO TO 110 + DO 100 I = IHI + 1, N + K = LSCALE( I ) + IF( K.EQ.I ) + $ GO TO 100 + CALL DSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 100 CONTINUE + END IF + END IF +* + 110 CONTINUE +* + RETURN +* +* End of DGGBAK +* + END diff --git a/costa/native/external/lapack/dggbal.f b/costa/native/external/lapack/dggbal.f new file mode 100644 index 000000000..03fd54506 --- /dev/null +++ b/costa/native/external/lapack/dggbal.f @@ -0,0 +1,461 @@ + SUBROUTINE DGGBAL( JOB, N, A, LDA, B, LDB, ILO, IHI, LSCALE, + $ RSCALE, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER JOB + INTEGER IHI, ILO, INFO, LDA, LDB, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), LSCALE( * ), + $ RSCALE( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DGGBAL balances a pair of general real matrices (A,B). This +* involves, first, permuting A and B by similarity transformations to +* isolate eigenvalues in the first 1 to ILO$-$1 and last IHI+1 to N +* elements on the diagonal; and second, applying a diagonal similarity +* transformation to rows and columns ILO to IHI to make the rows +* and columns as close in norm as possible. Both steps are optional. +* +* Balancing may reduce the 1-norm of the matrices, and improve the +* accuracy of the computed eigenvalues and/or eigenvectors in the +* generalized eigenvalue problem A*x = lambda*B*x. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* Specifies the operations to be performed on A and B: +* = 'N': none: simply set ILO = 1, IHI = N, LSCALE(I) = 1.0 +* and RSCALE(I) = 1.0 for i = 1,...,N. +* = 'P': permute only; +* = 'S': scale only; +* = 'B': both permute and scale. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the input matrix A. +* On exit, A is overwritten by the balanced matrix. +* If JOB = 'N', A is not referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,N) +* On entry, the input matrix B. +* On exit, B is overwritten by the balanced matrix. +* If JOB = 'N', B is not referenced. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* ILO (output) INTEGER +* IHI (output) INTEGER +* ILO and IHI are set to integers such that on exit +* A(i,j) = 0 and B(i,j) = 0 if i > j and +* j = 1,...,ILO-1 or i = IHI+1,...,N. +* If JOB = 'N' or 'S', ILO = 1 and IHI = N. +* +* LSCALE (output) DOUBLE PRECISION array, dimension (N) +* Details of the permutations and scaling factors applied +* to the left side of A and B. If P(j) is the index of the +* row interchanged with row j, and D(j) +* is the scaling factor applied to row j, then +* LSCALE(j) = P(j) for J = 1,...,ILO-1 +* = D(j) for J = ILO,...,IHI +* = P(j) for J = IHI+1,...,N. +* The order in which the interchanges are made is N to IHI+1, +* then 1 to ILO-1. +* +* RSCALE (output) DOUBLE PRECISION array, dimension (N) +* Details of the permutations and scaling factors applied +* to the right side of A and B. If P(j) is the index of the +* column interchanged with column j, and D(j) +* is the scaling factor applied to column j, then +* LSCALE(j) = P(j) for J = 1,...,ILO-1 +* = D(j) for J = ILO,...,IHI +* = P(j) for J = IHI+1,...,N. +* The order in which the interchanges are made is N to IHI+1, +* then 1 to ILO-1. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (6*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* See R.C. WARD, Balancing the generalized eigenvalue problem, +* SIAM J. Sci. Stat. Comp. 2 (1981), 141-152. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0D+0, HALF = 0.5D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION THREE, SCLFAC + PARAMETER ( THREE = 3.0D+0, SCLFAC = 1.0D+1 ) +* .. +* .. Local Scalars .. + INTEGER I, ICAB, IFLOW, IP1, IR, IRAB, IT, J, JC, JP1, + $ K, KOUNT, L, LCAB, LM1, LRAB, LSFMAX, LSFMIN, + $ M, NR, NRP2 + DOUBLE PRECISION ALPHA, BASL, BETA, CAB, CMAX, COEF, COEF2, + $ COEF5, COR, EW, EWC, GAMMA, PGAMMA, RAB, SFMAX, + $ SFMIN, SUM, T, TA, TB, TC +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + DOUBLE PRECISION DDOT, DLAMCH + EXTERNAL LSAME, IDAMAX, DDOT, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DSCAL, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, INT, LOG10, MAX, MIN, SIGN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND. + $ .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGGBAL', -INFO ) + RETURN + END IF +* + K = 1 + L = N +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( LSAME( JOB, 'N' ) ) THEN + ILO = 1 + IHI = N + DO 10 I = 1, N + LSCALE( I ) = ONE + RSCALE( I ) = ONE + 10 CONTINUE + RETURN + END IF +* + IF( K.EQ.L ) THEN + ILO = 1 + IHI = 1 + LSCALE( 1 ) = ONE + RSCALE( 1 ) = ONE + RETURN + END IF +* + IF( LSAME( JOB, 'S' ) ) + $ GO TO 190 +* + GO TO 30 +* +* Permute the matrices A and B to isolate the eigenvalues. +* +* Find row with one nonzero in columns 1 through L +* + 20 CONTINUE + L = LM1 + IF( L.NE.1 ) + $ GO TO 30 +* + RSCALE( 1 ) = 1 + LSCALE( 1 ) = 1 + GO TO 190 +* + 30 CONTINUE + LM1 = L - 1 + DO 80 I = L, 1, -1 + DO 40 J = 1, LM1 + JP1 = J + 1 + IF( A( I, J ).NE.ZERO .OR. B( I, J ).NE.ZERO ) + $ GO TO 50 + 40 CONTINUE + J = L + GO TO 70 +* + 50 CONTINUE + DO 60 J = JP1, L + IF( A( I, J ).NE.ZERO .OR. B( I, J ).NE.ZERO ) + $ GO TO 80 + 60 CONTINUE + J = JP1 - 1 +* + 70 CONTINUE + M = L + IFLOW = 1 + GO TO 160 + 80 CONTINUE + GO TO 100 +* +* Find column with one nonzero in rows K through N +* + 90 CONTINUE + K = K + 1 +* + 100 CONTINUE + DO 150 J = K, L + DO 110 I = K, LM1 + IP1 = I + 1 + IF( A( I, J ).NE.ZERO .OR. B( I, J ).NE.ZERO ) + $ GO TO 120 + 110 CONTINUE + I = L + GO TO 140 + 120 CONTINUE + DO 130 I = IP1, L + IF( A( I, J ).NE.ZERO .OR. B( I, J ).NE.ZERO ) + $ GO TO 150 + 130 CONTINUE + I = IP1 - 1 + 140 CONTINUE + M = K + IFLOW = 2 + GO TO 160 + 150 CONTINUE + GO TO 190 +* +* Permute rows M and I +* + 160 CONTINUE + LSCALE( M ) = I + IF( I.EQ.M ) + $ GO TO 170 + CALL DSWAP( N-K+1, A( I, K ), LDA, A( M, K ), LDA ) + CALL DSWAP( N-K+1, B( I, K ), LDB, B( M, K ), LDB ) +* +* Permute columns M and J +* + 170 CONTINUE + RSCALE( M ) = J + IF( J.EQ.M ) + $ GO TO 180 + CALL DSWAP( L, A( 1, J ), 1, A( 1, M ), 1 ) + CALL DSWAP( L, B( 1, J ), 1, B( 1, M ), 1 ) +* + 180 CONTINUE + GO TO ( 20, 90 )IFLOW +* + 190 CONTINUE + ILO = K + IHI = L +* + IF( ILO.EQ.IHI ) + $ RETURN +* + IF( LSAME( JOB, 'P' ) ) + $ RETURN +* +* Balance the submatrix in rows ILO to IHI. +* + NR = IHI - ILO + 1 + DO 200 I = ILO, IHI + RSCALE( I ) = ZERO + LSCALE( I ) = ZERO +* + WORK( I ) = ZERO + WORK( I+N ) = ZERO + WORK( I+2*N ) = ZERO + WORK( I+3*N ) = ZERO + WORK( I+4*N ) = ZERO + WORK( I+5*N ) = ZERO + 200 CONTINUE +* +* Compute right side vector in resulting linear equations +* + BASL = LOG10( SCLFAC ) + DO 240 I = ILO, IHI + DO 230 J = ILO, IHI + TB = B( I, J ) + TA = A( I, J ) + IF( TA.EQ.ZERO ) + $ GO TO 210 + TA = LOG10( ABS( TA ) ) / BASL + 210 CONTINUE + IF( TB.EQ.ZERO ) + $ GO TO 220 + TB = LOG10( ABS( TB ) ) / BASL + 220 CONTINUE + WORK( I+4*N ) = WORK( I+4*N ) - TA - TB + WORK( J+5*N ) = WORK( J+5*N ) - TA - TB + 230 CONTINUE + 240 CONTINUE +* + COEF = ONE / DBLE( 2*NR ) + COEF2 = COEF*COEF + COEF5 = HALF*COEF2 + NRP2 = NR + 2 + BETA = ZERO + IT = 1 +* +* Start generalized conjugate gradient iteration +* + 250 CONTINUE +* + GAMMA = DDOT( NR, WORK( ILO+4*N ), 1, WORK( ILO+4*N ), 1 ) + + $ DDOT( NR, WORK( ILO+5*N ), 1, WORK( ILO+5*N ), 1 ) +* + EW = ZERO + EWC = ZERO + DO 260 I = ILO, IHI + EW = EW + WORK( I+4*N ) + EWC = EWC + WORK( I+5*N ) + 260 CONTINUE +* + GAMMA = COEF*GAMMA - COEF2*( EW**2+EWC**2 ) - COEF5*( EW-EWC )**2 + IF( GAMMA.EQ.ZERO ) + $ GO TO 350 + IF( IT.NE.1 ) + $ BETA = GAMMA / PGAMMA + T = COEF5*( EWC-THREE*EW ) + TC = COEF5*( EW-THREE*EWC ) +* + CALL DSCAL( NR, BETA, WORK( ILO ), 1 ) + CALL DSCAL( NR, BETA, WORK( ILO+N ), 1 ) +* + CALL DAXPY( NR, COEF, WORK( ILO+4*N ), 1, WORK( ILO+N ), 1 ) + CALL DAXPY( NR, COEF, WORK( ILO+5*N ), 1, WORK( ILO ), 1 ) +* + DO 270 I = ILO, IHI + WORK( I ) = WORK( I ) + TC + WORK( I+N ) = WORK( I+N ) + T + 270 CONTINUE +* +* Apply matrix to vector +* + DO 300 I = ILO, IHI + KOUNT = 0 + SUM = ZERO + DO 290 J = ILO, IHI + IF( A( I, J ).EQ.ZERO ) + $ GO TO 280 + KOUNT = KOUNT + 1 + SUM = SUM + WORK( J ) + 280 CONTINUE + IF( B( I, J ).EQ.ZERO ) + $ GO TO 290 + KOUNT = KOUNT + 1 + SUM = SUM + WORK( J ) + 290 CONTINUE + WORK( I+2*N ) = DBLE( KOUNT )*WORK( I+N ) + SUM + 300 CONTINUE +* + DO 330 J = ILO, IHI + KOUNT = 0 + SUM = ZERO + DO 320 I = ILO, IHI + IF( A( I, J ).EQ.ZERO ) + $ GO TO 310 + KOUNT = KOUNT + 1 + SUM = SUM + WORK( I+N ) + 310 CONTINUE + IF( B( I, J ).EQ.ZERO ) + $ GO TO 320 + KOUNT = KOUNT + 1 + SUM = SUM + WORK( I+N ) + 320 CONTINUE + WORK( J+3*N ) = DBLE( KOUNT )*WORK( J ) + SUM + 330 CONTINUE +* + SUM = DDOT( NR, WORK( ILO+N ), 1, WORK( ILO+2*N ), 1 ) + + $ DDOT( NR, WORK( ILO ), 1, WORK( ILO+3*N ), 1 ) + ALPHA = GAMMA / SUM +* +* Determine correction to current iteration +* + CMAX = ZERO + DO 340 I = ILO, IHI + COR = ALPHA*WORK( I+N ) + IF( ABS( COR ).GT.CMAX ) + $ CMAX = ABS( COR ) + LSCALE( I ) = LSCALE( I ) + COR + COR = ALPHA*WORK( I ) + IF( ABS( COR ).GT.CMAX ) + $ CMAX = ABS( COR ) + RSCALE( I ) = RSCALE( I ) + COR + 340 CONTINUE + IF( CMAX.LT.HALF ) + $ GO TO 350 +* + CALL DAXPY( NR, -ALPHA, WORK( ILO+2*N ), 1, WORK( ILO+4*N ), 1 ) + CALL DAXPY( NR, -ALPHA, WORK( ILO+3*N ), 1, WORK( ILO+5*N ), 1 ) +* + PGAMMA = GAMMA + IT = IT + 1 + IF( IT.LE.NRP2 ) + $ GO TO 250 +* +* End generalized conjugate gradient iteration +* + 350 CONTINUE + SFMIN = DLAMCH( 'S' ) + SFMAX = ONE / SFMIN + LSFMIN = INT( LOG10( SFMIN ) / BASL+ONE ) + LSFMAX = INT( LOG10( SFMAX ) / BASL ) + DO 360 I = ILO, IHI + IRAB = IDAMAX( N-ILO+1, A( I, ILO ), LDA ) + RAB = ABS( A( I, IRAB+ILO-1 ) ) + IRAB = IDAMAX( N-ILO+1, B( I, ILO ), LDA ) + RAB = MAX( RAB, ABS( B( I, IRAB+ILO-1 ) ) ) + LRAB = INT( LOG10( RAB+SFMIN ) / BASL+ONE ) + IR = LSCALE( I ) + SIGN( HALF, LSCALE( I ) ) + IR = MIN( MAX( IR, LSFMIN ), LSFMAX, LSFMAX-LRAB ) + LSCALE( I ) = SCLFAC**IR + ICAB = IDAMAX( IHI, A( 1, I ), 1 ) + CAB = ABS( A( ICAB, I ) ) + ICAB = IDAMAX( IHI, B( 1, I ), 1 ) + CAB = MAX( CAB, ABS( B( ICAB, I ) ) ) + LCAB = INT( LOG10( CAB+SFMIN ) / BASL+ONE ) + JC = RSCALE( I ) + SIGN( HALF, RSCALE( I ) ) + JC = MIN( MAX( JC, LSFMIN ), LSFMAX, LSFMAX-LCAB ) + RSCALE( I ) = SCLFAC**JC + 360 CONTINUE +* +* Row scaling of matrices A and B +* + DO 370 I = ILO, IHI + CALL DSCAL( N-ILO+1, LSCALE( I ), A( I, ILO ), LDA ) + CALL DSCAL( N-ILO+1, LSCALE( I ), B( I, ILO ), LDB ) + 370 CONTINUE +* +* Column scaling of matrices A and B +* + DO 380 J = ILO, IHI + CALL DSCAL( IHI, RSCALE( J ), A( 1, J ), 1 ) + CALL DSCAL( IHI, RSCALE( J ), B( 1, J ), 1 ) + 380 CONTINUE +* + RETURN +* +* End of DGGBAL +* + END diff --git a/costa/native/external/lapack/dgges.f b/costa/native/external/lapack/dgges.f new file mode 100644 index 000000000..ce29aa527 --- /dev/null +++ b/costa/native/external/lapack/dgges.f @@ -0,0 +1,550 @@ + SUBROUTINE DGGES( JOBVSL, JOBVSR, SORT, DELCTG, N, A, LDA, B, LDB, + $ SDIM, ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR, + $ LDVSR, WORK, LWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBVSL, JOBVSR, SORT + INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + DOUBLE PRECISION A( LDA, * ), ALPHAI( * ), ALPHAR( * ), + $ B( LDB, * ), BETA( * ), VSL( LDVSL, * ), + $ VSR( LDVSR, * ), WORK( * ) +* .. +* .. Function Arguments .. + LOGICAL DELCTG + EXTERNAL DELCTG +* .. +* +* Purpose +* ======= +* +* DGGES computes for a pair of N-by-N real nonsymmetric matrices (A,B), +* the generalized eigenvalues, the generalized real Schur form (S,T), +* optionally, the left and/or right matrices of Schur vectors (VSL and +* VSR). This gives the generalized Schur factorization +* +* (A,B) = ( (VSL)*S*(VSR)**T, (VSL)*T*(VSR)**T ) +* +* Optionally, it also orders the eigenvalues so that a selected cluster +* of eigenvalues appears in the leading diagonal blocks of the upper +* quasi-triangular matrix S and the upper triangular matrix T.The +* leading columns of VSL and VSR then form an orthonormal basis for the +* corresponding left and right eigenspaces (deflating subspaces). +* +* (If only the generalized eigenvalues are needed, use the driver +* DGGEV instead, which is faster.) +* +* A generalized eigenvalue for a pair of matrices (A,B) is a scalar w +* or a ratio alpha/beta = w, such that A - w*B is singular. It is +* usually represented as the pair (alpha,beta), as there is a +* reasonable interpretation for beta=0 or both being zero. +* +* A pair of matrices (S,T) is in generalized real Schur form if T is +* upper triangular with non-negative diagonal and S is block upper +* triangular with 1-by-1 and 2-by-2 blocks. 1-by-1 blocks correspond +* to real generalized eigenvalues, while 2-by-2 blocks of S will be +* "standardized" by making the corresponding elements of T have the +* form: +* [ a 0 ] +* [ 0 b ] +* +* and the pair of corresponding 2-by-2 blocks in S and T will have a +* complex conjugate pair of generalized eigenvalues. +* +* +* Arguments +* ========= +* +* JOBVSL (input) CHARACTER*1 +* = 'N': do not compute the left Schur vectors; +* = 'V': compute the left Schur vectors. +* +* JOBVSR (input) CHARACTER*1 +* = 'N': do not compute the right Schur vectors; +* = 'V': compute the right Schur vectors. +* +* SORT (input) CHARACTER*1 +* Specifies whether or not to order the eigenvalues on the +* diagonal of the generalized Schur form. +* = 'N': Eigenvalues are not ordered; +* = 'S': Eigenvalues are ordered (see DELZTG); +* +* DELZTG (input) LOGICAL FUNCTION of three DOUBLE PRECISION arguments +* DELZTG must be declared EXTERNAL in the calling subroutine. +* If SORT = 'N', DELZTG is not referenced. +* If SORT = 'S', DELZTG is used to select eigenvalues to sort +* to the top left of the Schur form. +* An eigenvalue (ALPHAR(j)+ALPHAI(j))/BETA(j) is selected if +* DELZTG(ALPHAR(j),ALPHAI(j),BETA(j)) is true; i.e. if either +* one of a complex conjugate pair of eigenvalues is selected, +* then both complex eigenvalues are selected. +* +* Note that in the ill-conditioned case, a selected complex +* eigenvalue may no longer satisfy DELZTG(ALPHAR(j),ALPHAI(j), +* BETA(j)) = .TRUE. after ordering. INFO is to be set to N+2 +* in this case. +* +* N (input) INTEGER +* The order of the matrices A, B, VSL, and VSR. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) +* On entry, the first of the pair of matrices. +* On exit, A has been overwritten by its generalized Schur +* form S. +* +* LDA (input) INTEGER +* The leading dimension of A. LDA >= max(1,N). +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB, N) +* On entry, the second of the pair of matrices. +* On exit, B has been overwritten by its generalized Schur +* form T. +* +* LDB (input) INTEGER +* The leading dimension of B. LDB >= max(1,N). +* +* SDIM (output) INTEGER +* If SORT = 'N', SDIM = 0. +* If SORT = 'S', SDIM = number of eigenvalues (after sorting) +* for which DELZTG is true. (Complex conjugate pairs for which +* DELZTG is true for either eigenvalue count as 2.) +* +* ALPHAR (output) DOUBLE PRECISION array, dimension (N) +* ALPHAI (output) DOUBLE PRECISION array, dimension (N) +* BETA (output) DOUBLE PRECISION array, dimension (N) +* On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will +* be the generalized eigenvalues. ALPHAR(j) + ALPHAI(j)*i, +* and BETA(j),j=1,...,N are the diagonals of the complex Schur +* form (S,T) that would result if the 2-by-2 diagonal blocks of +* the real Schur form of (A,B) were further reduced to +* triangular form using 2-by-2 complex unitary transformations. +* If ALPHAI(j) is zero, then the j-th eigenvalue is real; if +* positive, then the j-th and (j+1)-st eigenvalues are a +* complex conjugate pair, with ALPHAI(j+1) negative. +* +* Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) +* may easily over- or underflow, and BETA(j) may even be zero. +* Thus, the user should avoid naively computing the ratio. +* However, ALPHAR and ALPHAI will be always less than and +* usually comparable with norm(A) in magnitude, and BETA always +* less than and usually comparable with norm(B). +* +* VSL (output) DOUBLE PRECISION array, dimension (LDVSL,N) +* If JOBVSL = 'V', VSL will contain the left Schur vectors. +* Not referenced if JOBVSL = 'N'. +* +* LDVSL (input) INTEGER +* The leading dimension of the matrix VSL. LDVSL >=1, and +* if JOBVSL = 'V', LDVSL >= N. +* +* VSR (output) DOUBLE PRECISION array, dimension (LDVSR,N) +* If JOBVSR = 'V', VSR will contain the right Schur vectors. +* Not referenced if JOBVSR = 'N'. +* +* LDVSR (input) INTEGER +* The leading dimension of the matrix VSR. LDVSR >= 1, and +* if JOBVSR = 'V', LDVSR >= N. +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= 8*N+16. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* BWORK (workspace) LOGICAL array, dimension (N) +* Not referenced if SORT = 'N'. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* = 1,...,N: +* The QZ iteration failed. (A,B) are not in Schur +* form, but ALPHAR(j), ALPHAI(j), and BETA(j) should +* be correct for j=INFO+1,...,N. +* > N: =N+1: other than QZ iteration failed in DHGEQZ. +* =N+2: after reordering, roundoff changed values of +* some complex eigenvalues so that leading +* eigenvalues in the Generalized Schur form no +* longer satisfy DELZTG=.TRUE. This could also +* be caused due to scaling. +* =N+3: reordering failed in DTGSEN. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL CURSL, ILASCL, ILBSCL, ILVSL, ILVSR, LASTSL, + $ LQUERY, LST2SL, WANTST + INTEGER I, ICOLS, IERR, IHI, IJOBVL, IJOBVR, ILEFT, + $ ILO, IP, IRIGHT, IROWS, ITAU, IWRK, MAXWRK, + $ MINWRK + DOUBLE PRECISION ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, PVSL, + $ PVSR, SAFMAX, SAFMIN, SMLNUM +* .. +* .. Local Arrays .. + INTEGER IDUM( 1 ) + DOUBLE PRECISION DIF( 2 ) +* .. +* .. External Subroutines .. + EXTERNAL DGEQRF, DGGBAK, DGGBAL, DGGHRD, DHGEQZ, DLABAD, + $ DLACPY, DLASCL, DLASET, DORGQR, DORMQR, DTGSEN, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, DLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVSL, 'N' ) ) THEN + IJOBVL = 1 + ILVSL = .FALSE. + ELSE IF( LSAME( JOBVSL, 'V' ) ) THEN + IJOBVL = 2 + ILVSL = .TRUE. + ELSE + IJOBVL = -1 + ILVSL = .FALSE. + END IF +* + IF( LSAME( JOBVSR, 'N' ) ) THEN + IJOBVR = 1 + ILVSR = .FALSE. + ELSE IF( LSAME( JOBVSR, 'V' ) ) THEN + IJOBVR = 2 + ILVSR = .TRUE. + ELSE + IJOBVR = -1 + ILVSR = .FALSE. + END IF +* + WANTST = LSAME( SORT, 'S' ) +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDVSL.LT.1 .OR. ( ILVSL .AND. LDVSL.LT.N ) ) THEN + INFO = -15 + ELSE IF( LDVSR.LT.1 .OR. ( ILVSR .AND. LDVSR.LT.N ) ) THEN + INFO = -17 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + MINWRK = 1 + IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN + MINWRK = 7*( N+1 ) + 16 + MAXWRK = 7*( N+1 ) + N*ILAENV( 1, 'DGEQRF', ' ', N, 1, N, 0 ) + + $ 16 + IF( ILVSL ) THEN + MAXWRK = MAX( MAXWRK, 7*( N+1 )+N* + $ ILAENV( 1, 'DORGQR', ' ', N, 1, N, -1 ) ) + END IF + WORK( 1 ) = MAXWRK + END IF +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) + $ INFO = -19 + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGGES ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SDIM = 0 + RETURN + END IF +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SAFMIN = DLAMCH( 'S' ) + SAFMAX = ONE / SAFMIN + CALL DLABAD( SAFMIN, SAFMAX ) + SMLNUM = SQRT( SAFMIN ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = DLANGE( 'M', N, N, A, LDA, WORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF + IF( ILASCL ) + $ CALL DLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR ) +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = DLANGE( 'M', N, N, B, LDB, WORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF + IF( ILBSCL ) + $ CALL DLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR ) +* +* Permute the matrix to make it more nearly triangular +* (Workspace: need 6*N + 2*N space for storing balancing factors) +* + ILEFT = 1 + IRIGHT = N + 1 + IWRK = IRIGHT + N + CALL DGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), WORK( IWRK ), IERR ) +* +* Reduce B to triangular form (QR decomposition of B) +* (Workspace: need N, prefer N*NB) +* + IROWS = IHI + 1 - ILO + ICOLS = N + 1 - ILO + ITAU = IWRK + IWRK = ITAU + IROWS + CALL DGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Apply the orthogonal transformation to matrix A +* (Workspace: need N, prefer N*NB) +* + CALL DORMQR( 'L', 'T', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ), + $ LWORK+1-IWRK, IERR ) +* +* Initialize VSL +* (Workspace: need N, prefer N*NB) +* + IF( ILVSL ) THEN + CALL DLASET( 'Full', N, N, ZERO, ONE, VSL, LDVSL ) + CALL DLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VSL( ILO+1, ILO ), LDVSL ) + CALL DORGQR( IROWS, IROWS, IROWS, VSL( ILO, ILO ), LDVSL, + $ WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* +* Initialize VSR +* + IF( ILVSR ) + $ CALL DLASET( 'Full', N, N, ZERO, ONE, VSR, LDVSR ) +* +* Reduce to generalized Hessenberg form +* (Workspace: none needed) +* + CALL DGGHRD( JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, VSL, + $ LDVSL, VSR, LDVSR, IERR ) +* +* Perform QZ algorithm, computing Schur vectors if desired +* (Workspace: need N) +* + IWRK = ITAU + CALL DHGEQZ( 'S', JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR, + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) + IF( IERR.NE.0 ) THEN + IF( IERR.GT.0 .AND. IERR.LE.N ) THEN + INFO = IERR + ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN + INFO = IERR - N + ELSE + INFO = N + 1 + END IF + GO TO 50 + END IF +* +* Sort eigenvalues ALPHA/BETA if desired +* (Workspace: need 4*N+16 ) +* + SDIM = 0 + IF( WANTST ) THEN +* +* Undo scaling on eigenvalues before DELZTGing +* + IF( ILASCL ) THEN + CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N, + $ IERR ) + CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N, + $ IERR ) + END IF + IF( ILBSCL ) + $ CALL DLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) +* +* Select eigenvalues +* + DO 10 I = 1, N + BWORK( I ) = DELCTG( ALPHAR( I ), ALPHAI( I ), BETA( I ) ) + 10 CONTINUE +* + CALL DTGSEN( 0, ILVSL, ILVSR, BWORK, N, A, LDA, B, LDB, ALPHAR, + $ ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR, SDIM, PVSL, + $ PVSR, DIF, WORK( IWRK ), LWORK-IWRK+1, IDUM, 1, + $ IERR ) + IF( IERR.EQ.1 ) + $ INFO = N + 3 +* + END IF +* +* Apply back-permutation to VSL and VSR +* (Workspace: none needed) +* + IF( ILVSL ) + $ CALL DGGBAK( 'P', 'L', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VSL, LDVSL, IERR ) +* + IF( ILVSR ) + $ CALL DGGBAK( 'P', 'R', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VSR, LDVSR, IERR ) +* +* Check if unscaling would cause over/underflow, if so, rescale +* (ALPHAR(I),ALPHAI(I),BETA(I)) so BETA(I) is on the order of +* B(I,I) and ALPHAR(I) and ALPHAI(I) are on the order of A(I,I) +* + IF( ILASCL ) THEN + DO 20 I = 1, N + IF( ALPHAI( I ).NE.ZERO ) THEN + IF( ( ALPHAR( I ) / SAFMAX ).GT.( ANRMTO / ANRM ) .OR. + $ ( SAFMIN / ALPHAR( I ) ).GT.( ANRM / ANRMTO ) ) THEN + WORK( 1 ) = ABS( A( I, I ) / ALPHAR( I ) ) + BETA( I ) = BETA( I )*WORK( 1 ) + ALPHAR( I ) = ALPHAR( I )*WORK( 1 ) + ALPHAI( I ) = ALPHAI( I )*WORK( 1 ) + ELSE IF( ( ALPHAI( I ) / SAFMAX ).GT. + $ ( ANRMTO / ANRM ) .OR. + $ ( SAFMIN / ALPHAI( I ) ).GT.( ANRM / ANRMTO ) ) + $ THEN + WORK( 1 ) = ABS( A( I, I+1 ) / ALPHAI( I ) ) + BETA( I ) = BETA( I )*WORK( 1 ) + ALPHAR( I ) = ALPHAR( I )*WORK( 1 ) + ALPHAI( I ) = ALPHAI( I )*WORK( 1 ) + END IF + END IF + 20 CONTINUE + END IF +* + IF( ILBSCL ) THEN + DO 30 I = 1, N + IF( ALPHAI( I ).NE.ZERO ) THEN + IF( ( BETA( I ) / SAFMAX ).GT.( BNRMTO / BNRM ) .OR. + $ ( SAFMIN / BETA( I ) ).GT.( BNRM / BNRMTO ) ) THEN + WORK( 1 ) = ABS( B( I, I ) / BETA( I ) ) + BETA( I ) = BETA( I )*WORK( 1 ) + ALPHAR( I ) = ALPHAR( I )*WORK( 1 ) + ALPHAI( I ) = ALPHAI( I )*WORK( 1 ) + END IF + END IF + 30 CONTINUE + END IF +* +* Undo scaling +* + IF( ILASCL ) THEN + CALL DLASCL( 'H', 0, 0, ANRMTO, ANRM, N, N, A, LDA, IERR ) + CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N, IERR ) + CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N, IERR ) + END IF +* + IF( ILBSCL ) THEN + CALL DLASCL( 'U', 0, 0, BNRMTO, BNRM, N, N, B, LDB, IERR ) + CALL DLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) + END IF +* + IF( WANTST ) THEN +* +* Check if reordering is correct +* + LASTSL = .TRUE. + LST2SL = .TRUE. + SDIM = 0 + IP = 0 + DO 40 I = 1, N + CURSL = DELCTG( ALPHAR( I ), ALPHAI( I ), BETA( I ) ) + IF( ALPHAI( I ).EQ.ZERO ) THEN + IF( CURSL ) + $ SDIM = SDIM + 1 + IP = 0 + IF( CURSL .AND. .NOT.LASTSL ) + $ INFO = N + 2 + ELSE + IF( IP.EQ.1 ) THEN +* +* Last eigenvalue of conjugate pair +* + CURSL = CURSL .OR. LASTSL + LASTSL = CURSL + IF( CURSL ) + $ SDIM = SDIM + 2 + IP = -1 + IF( CURSL .AND. .NOT.LST2SL ) + $ INFO = N + 2 + ELSE +* +* First eigenvalue of conjugate pair +* + IP = 1 + END IF + END IF + LST2SL = LASTSL + LASTSL = CURSL + 40 CONTINUE +* + END IF +* + 50 CONTINUE +* + WORK( 1 ) = MAXWRK +* + RETURN +* +* End of DGGES +* + END diff --git a/costa/native/external/lapack/dggesx.f b/costa/native/external/lapack/dggesx.f new file mode 100644 index 000000000..17688320d --- /dev/null +++ b/costa/native/external/lapack/dggesx.f @@ -0,0 +1,640 @@ + SUBROUTINE DGGESX( JOBVSL, JOBVSR, SORT, DELCTG, SENSE, N, A, LDA, + $ B, LDB, SDIM, ALPHAR, ALPHAI, BETA, VSL, LDVSL, + $ VSR, LDVSR, RCONDE, RCONDV, WORK, LWORK, IWORK, + $ LIWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBVSL, JOBVSR, SENSE, SORT + INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LIWORK, LWORK, N, + $ SDIM +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), ALPHAI( * ), ALPHAR( * ), + $ B( LDB, * ), BETA( * ), RCONDE( 2 ), + $ RCONDV( 2 ), VSL( LDVSL, * ), VSR( LDVSR, * ), + $ WORK( * ) +* .. +* .. Function Arguments .. + LOGICAL DELCTG + EXTERNAL DELCTG +* .. +* +* Purpose +* ======= +* +* DGGESX computes for a pair of N-by-N real nonsymmetric matrices +* (A,B), the generalized eigenvalues, the real Schur form (S,T), and, +* optionally, the left and/or right matrices of Schur vectors (VSL and +* VSR). This gives the generalized Schur factorization +* +* (A,B) = ( (VSL) S (VSR)**T, (VSL) T (VSR)**T ) +* +* Optionally, it also orders the eigenvalues so that a selected cluster +* of eigenvalues appears in the leading diagonal blocks of the upper +* quasi-triangular matrix S and the upper triangular matrix T; computes +* a reciprocal condition number for the average of the selected +* eigenvalues (RCONDE); and computes a reciprocal condition number for +* the right and left deflating subspaces corresponding to the selected +* eigenvalues (RCONDV). The leading columns of VSL and VSR then form +* an orthonormal basis for the corresponding left and right eigenspaces +* (deflating subspaces). +* +* A generalized eigenvalue for a pair of matrices (A,B) is a scalar w +* or a ratio alpha/beta = w, such that A - w*B is singular. It is +* usually represented as the pair (alpha,beta), as there is a +* reasonable interpretation for beta=0 or for both being zero. +* +* A pair of matrices (S,T) is in generalized real Schur form if T is +* upper triangular with non-negative diagonal and S is block upper +* triangular with 1-by-1 and 2-by-2 blocks. 1-by-1 blocks correspond +* to real generalized eigenvalues, while 2-by-2 blocks of S will be +* "standardized" by making the corresponding elements of T have the +* form: +* [ a 0 ] +* [ 0 b ] +* +* and the pair of corresponding 2-by-2 blocks in S and T will have a +* complex conjugate pair of generalized eigenvalues. +* +* +* Arguments +* ========= +* +* JOBVSL (input) CHARACTER*1 +* = 'N': do not compute the left Schur vectors; +* = 'V': compute the left Schur vectors. +* +* JOBVSR (input) CHARACTER*1 +* = 'N': do not compute the right Schur vectors; +* = 'V': compute the right Schur vectors. +* +* SORT (input) CHARACTER*1 +* Specifies whether or not to order the eigenvalues on the +* diagonal of the generalized Schur form. +* = 'N': Eigenvalues are not ordered; +* = 'S': Eigenvalues are ordered (see DELZTG). +* +* DELZTG (input) LOGICAL FUNCTION of three DOUBLE PRECISION arguments +* DELZTG must be declared EXTERNAL in the calling subroutine. +* If SORT = 'N', DELZTG is not referenced. +* If SORT = 'S', DELZTG is used to select eigenvalues to sort +* to the top left of the Schur form. +* An eigenvalue (ALPHAR(j)+ALPHAI(j))/BETA(j) is selected if +* DELZTG(ALPHAR(j),ALPHAI(j),BETA(j)) is true; i.e. if either +* one of a complex conjugate pair of eigenvalues is selected, +* then both complex eigenvalues are selected. +* Note that a selected complex eigenvalue may no longer satisfy +* DELZTG(ALPHAR(j),ALPHAI(j),BETA(j)) = .TRUE. after ordering, +* since ordering may change the value of complex eigenvalues +* (especially if the eigenvalue is ill-conditioned), in this +* case INFO is set to N+3. +* +* SENSE (input) CHARACTER +* Determines which reciprocal condition numbers are computed. +* = 'N' : None are computed; +* = 'E' : Computed for average of selected eigenvalues only; +* = 'V' : Computed for selected deflating subspaces only; +* = 'B' : Computed for both. +* If SENSE = 'E', 'V', or 'B', SORT must equal 'S'. +* +* N (input) INTEGER +* The order of the matrices A, B, VSL, and VSR. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) +* On entry, the first of the pair of matrices. +* On exit, A has been overwritten by its generalized Schur +* form S. +* +* LDA (input) INTEGER +* The leading dimension of A. LDA >= max(1,N). +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB, N) +* On entry, the second of the pair of matrices. +* On exit, B has been overwritten by its generalized Schur +* form T. +* +* LDB (input) INTEGER +* The leading dimension of B. LDB >= max(1,N). +* +* SDIM (output) INTEGER +* If SORT = 'N', SDIM = 0. +* If SORT = 'S', SDIM = number of eigenvalues (after sorting) +* for which DELZTG is true. (Complex conjugate pairs for which +* DELZTG is true for either eigenvalue count as 2.) +* +* ALPHAR (output) DOUBLE PRECISION array, dimension (N) +* ALPHAI (output) DOUBLE PRECISION array, dimension (N) +* BETA (output) DOUBLE PRECISION array, dimension (N) +* On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will +* be the generalized eigenvalues. ALPHAR(j) + ALPHAI(j)*i +* and BETA(j),j=1,...,N are the diagonals of the complex Schur +* form (S,T) that would result if the 2-by-2 diagonal blocks of +* the real Schur form of (A,B) were further reduced to +* triangular form using 2-by-2 complex unitary transformations. +* If ALPHAI(j) is zero, then the j-th eigenvalue is real; if +* positive, then the j-th and (j+1)-st eigenvalues are a +* complex conjugate pair, with ALPHAI(j+1) negative. +* +* Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) +* may easily over- or underflow, and BETA(j) may even be zero. +* Thus, the user should avoid naively computing the ratio. +* However, ALPHAR and ALPHAI will be always less than and +* usually comparable with norm(A) in magnitude, and BETA always +* less than and usually comparable with norm(B). +* +* VSL (output) DOUBLE PRECISION array, dimension (LDVSL,N) +* If JOBVSL = 'V', VSL will contain the left Schur vectors. +* Not referenced if JOBVSL = 'N'. +* +* LDVSL (input) INTEGER +* The leading dimension of the matrix VSL. LDVSL >=1, and +* if JOBVSL = 'V', LDVSL >= N. +* +* VSR (output) DOUBLE PRECISION array, dimension (LDVSR,N) +* If JOBVSR = 'V', VSR will contain the right Schur vectors. +* Not referenced if JOBVSR = 'N'. +* +* LDVSR (input) INTEGER +* The leading dimension of the matrix VSR. LDVSR >= 1, and +* if JOBVSR = 'V', LDVSR >= N. +* +* RCONDE (output) DOUBLE PRECISION array, dimension ( 2 ) +* If SENSE = 'E' or 'B', RCONDE(1) and RCONDE(2) contain the +* reciprocal condition numbers for the average of the selected +* eigenvalues. +* Not referenced if SENSE = 'N' or 'V'. +* +* RCONDV (output) DOUBLE PRECISION array, dimension ( 2 ) +* If SENSE = 'V' or 'B', RCONDV(1) and RCONDV(2) contain the +* reciprocal condition numbers for the selected deflating +* subspaces. +* Not referenced if SENSE = 'N' or 'E'. +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= 8*(N+1)+16. +* If SENSE = 'E', 'V', or 'B', +* LWORK >= MAX( 8*(N+1)+16, 2*SDIM*(N-SDIM) ). +* +* IWORK (workspace) INTEGER array, dimension (LIWORK) +* Not referenced if SENSE = 'N'. +* +* LIWORK (input) INTEGER +* The dimension of the array WORK. LIWORK >= N+6. +* +* BWORK (workspace) LOGICAL array, dimension (N) +* Not referenced if SORT = 'N'. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* = 1,...,N: +* The QZ iteration failed. (A,B) are not in Schur +* form, but ALPHAR(j), ALPHAI(j), and BETA(j) should +* be correct for j=INFO+1,...,N. +* > N: =N+1: other than QZ iteration failed in DHGEQZ +* =N+2: after reordering, roundoff changed values of +* some complex eigenvalues so that leading +* eigenvalues in the Generalized Schur form no +* longer satisfy DELZTG=.TRUE. This could also +* be caused due to scaling. +* =N+3: reordering failed in DTGSEN. +* +* Further details +* =============== +* +* An approximate (asymptotic) bound on the average absolute error of +* the selected eigenvalues is +* +* EPS * norm((A, B)) / RCONDE( 1 ). +* +* An approximate (asymptotic) bound on the maximum angular error in +* the computed deflating subspaces is +* +* EPS * norm((A, B)) / RCONDV( 2 ). +* +* See LAPACK User's Guide, section 4.11 for more information. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL CURSL, ILASCL, ILBSCL, ILVSL, ILVSR, LASTSL, + $ LST2SL, WANTSB, WANTSE, WANTSN, WANTST, WANTSV + INTEGER I, ICOLS, IERR, IHI, IJOB, IJOBVL, IJOBVR, + $ ILEFT, ILO, IP, IRIGHT, IROWS, ITAU, IWRK, + $ LIWMIN, MAXWRK, MINWRK + DOUBLE PRECISION ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, PL, + $ PR, SAFMAX, SAFMIN, SMLNUM +* .. +* .. Local Arrays .. + DOUBLE PRECISION DIF( 2 ) +* .. +* .. External Subroutines .. + EXTERNAL DGEQRF, DGGBAK, DGGBAL, DGGHRD, DHGEQZ, DLABAD, + $ DLACPY, DLASCL, DLASET, DORGQR, DORMQR, DTGSEN, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, DLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVSL, 'N' ) ) THEN + IJOBVL = 1 + ILVSL = .FALSE. + ELSE IF( LSAME( JOBVSL, 'V' ) ) THEN + IJOBVL = 2 + ILVSL = .TRUE. + ELSE + IJOBVL = -1 + ILVSL = .FALSE. + END IF +* + IF( LSAME( JOBVSR, 'N' ) ) THEN + IJOBVR = 1 + ILVSR = .FALSE. + ELSE IF( LSAME( JOBVSR, 'V' ) ) THEN + IJOBVR = 2 + ILVSR = .TRUE. + ELSE + IJOBVR = -1 + ILVSR = .FALSE. + END IF +* + WANTST = LSAME( SORT, 'S' ) + WANTSN = LSAME( SENSE, 'N' ) + WANTSE = LSAME( SENSE, 'E' ) + WANTSV = LSAME( SENSE, 'V' ) + WANTSB = LSAME( SENSE, 'B' ) + IF( WANTSN ) THEN + IJOB = 0 + IWORK( 1 ) = 1 + ELSE IF( WANTSE ) THEN + IJOB = 1 + ELSE IF( WANTSV ) THEN + IJOB = 2 + ELSE IF( WANTSB ) THEN + IJOB = 4 + END IF +* +* Test the input arguments +* + INFO = 0 + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( .NOT.( WANTSN .OR. WANTSE .OR. WANTSV .OR. WANTSB ) .OR. + $ ( .NOT.WANTST .AND. .NOT.WANTSN ) ) THEN + INFO = -5 + ELSE IF( N.LT.0 ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDVSL.LT.1 .OR. ( ILVSL .AND. LDVSL.LT.N ) ) THEN + INFO = -16 + ELSE IF( LDVSR.LT.1 .OR. ( ILVSR .AND. LDVSR.LT.N ) ) THEN + INFO = -18 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + MINWRK = 1 + IF( INFO.EQ.0 .AND. LWORK.GE.1 ) THEN + MINWRK = 8*( N+1 ) + 16 + MAXWRK = 7*( N+1 ) + N*ILAENV( 1, 'DGEQRF', ' ', N, 1, N, 0 ) + + $ 16 + IF( ILVSL ) THEN + MAXWRK = MAX( MAXWRK, 8*( N+1 )+N* + $ ILAENV( 1, 'DORGQR', ' ', N, 1, N, -1 )+16 ) + END IF + WORK( 1 ) = MAXWRK + END IF + IF( .NOT.WANTSN ) THEN + LIWMIN = 1 + ELSE + LIWMIN = N + 6 + END IF + IWORK( 1 ) = LIWMIN +* + IF( INFO.EQ.0 .AND. LWORK.LT.MINWRK ) THEN + INFO = -22 + ELSE IF( INFO.EQ.0 .AND. IJOB.GE.1 ) THEN + IF( LIWORK.LT.LIWMIN ) + $ INFO = -24 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGGESX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SDIM = 0 + RETURN + END IF +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SAFMIN = DLAMCH( 'S' ) + SAFMAX = ONE / SAFMIN + CALL DLABAD( SAFMIN, SAFMAX ) + SMLNUM = SQRT( SAFMIN ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = DLANGE( 'M', N, N, A, LDA, WORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF + IF( ILASCL ) + $ CALL DLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR ) +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = DLANGE( 'M', N, N, B, LDB, WORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF + IF( ILBSCL ) + $ CALL DLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR ) +* +* Permute the matrix to make it more nearly triangular +* (Workspace: need 6*N + 2*N for permutation parameters) +* + ILEFT = 1 + IRIGHT = N + 1 + IWRK = IRIGHT + N + CALL DGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), WORK( IWRK ), IERR ) +* +* Reduce B to triangular form (QR decomposition of B) +* (Workspace: need N, prefer N*NB) +* + IROWS = IHI + 1 - ILO + ICOLS = N + 1 - ILO + ITAU = IWRK + IWRK = ITAU + IROWS + CALL DGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Apply the orthogonal transformation to matrix A +* (Workspace: need N, prefer N*NB) +* + CALL DORMQR( 'L', 'T', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ), + $ LWORK+1-IWRK, IERR ) +* +* Initialize VSL +* (Workspace: need N, prefer N*NB) +* + IF( ILVSL ) THEN + CALL DLASET( 'Full', N, N, ZERO, ONE, VSL, LDVSL ) + CALL DLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VSL( ILO+1, ILO ), LDVSL ) + CALL DORGQR( IROWS, IROWS, IROWS, VSL( ILO, ILO ), LDVSL, + $ WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* +* Initialize VSR +* + IF( ILVSR ) + $ CALL DLASET( 'Full', N, N, ZERO, ONE, VSR, LDVSR ) +* +* Reduce to generalized Hessenberg form +* (Workspace: none needed) +* + CALL DGGHRD( JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, VSL, + $ LDVSL, VSR, LDVSR, IERR ) +* + SDIM = 0 +* +* Perform QZ algorithm, computing Schur vectors if desired +* (Workspace: need N) +* + IWRK = ITAU + CALL DHGEQZ( 'S', JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR, + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) + IF( IERR.NE.0 ) THEN + IF( IERR.GT.0 .AND. IERR.LE.N ) THEN + INFO = IERR + ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN + INFO = IERR - N + ELSE + INFO = N + 1 + END IF + GO TO 60 + END IF +* +* Sort eigenvalues ALPHA/BETA and compute the reciprocal of +* condition number(s) +* (Workspace: If IJOB >= 1, need MAX( 8*(N+1), 2*SDIM*(N-SDIM) ) +* otherwise, need 8*(N+1) ) +* + IF( WANTST ) THEN +* +* Undo scaling on eigenvalues before DELZTGing +* + IF( ILASCL ) THEN + CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N, + $ IERR ) + CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N, + $ IERR ) + END IF + IF( ILBSCL ) + $ CALL DLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) +* +* Select eigenvalues +* + DO 10 I = 1, N + BWORK( I ) = DELCTG( ALPHAR( I ), ALPHAI( I ), BETA( I ) ) + 10 CONTINUE +* +* Reorder eigenvalues, transform Generalized Schur vectors, and +* compute reciprocal condition numbers +* + CALL DTGSEN( IJOB, ILVSL, ILVSR, BWORK, N, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR, + $ SDIM, PL, PR, DIF, WORK( IWRK ), LWORK-IWRK+1, + $ IWORK, LIWORK, IERR ) +* + IF( IJOB.GE.1 ) + $ MAXWRK = MAX( MAXWRK, 2*SDIM*( N-SDIM ) ) + IF( IERR.EQ.-22 ) THEN +* +* not enough real workspace +* + INFO = -22 + ELSE + RCONDE( 1 ) = PL + RCONDE( 2 ) = PR + RCONDV( 1 ) = DIF( 1 ) + RCONDV( 2 ) = DIF( 2 ) + IF( IERR.EQ.1 ) + $ INFO = N + 3 + END IF +* + END IF +* +* Apply permutation to VSL and VSR +* (Workspace: none needed) +* + IF( ILVSL ) + $ CALL DGGBAK( 'P', 'L', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VSL, LDVSL, IERR ) +* + IF( ILVSR ) + $ CALL DGGBAK( 'P', 'R', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VSR, LDVSR, IERR ) +* +* Check if unscaling would cause over/underflow, if so, rescale +* (ALPHAR(I),ALPHAI(I),BETA(I)) so BETA(I) is on the order of +* B(I,I) and ALPHAR(I) and ALPHAI(I) are on the order of A(I,I) +* + IF( ILASCL ) THEN + DO 20 I = 1, N + IF( ALPHAI( I ).NE.ZERO ) THEN + IF( ( ALPHAR( I ) / SAFMAX ).GT.( ANRMTO / ANRM ) .OR. + $ ( SAFMIN / ALPHAR( I ) ).GT.( ANRM / ANRMTO ) ) THEN + WORK( 1 ) = ABS( A( I, I ) / ALPHAR( I ) ) + BETA( I ) = BETA( I )*WORK( 1 ) + ALPHAR( I ) = ALPHAR( I )*WORK( 1 ) + ALPHAI( I ) = ALPHAI( I )*WORK( 1 ) + ELSE IF( ( ALPHAI( I ) / SAFMAX ).GT. + $ ( ANRMTO / ANRM ) .OR. + $ ( SAFMIN / ALPHAI( I ) ).GT.( ANRM / ANRMTO ) ) + $ THEN + WORK( 1 ) = ABS( A( I, I+1 ) / ALPHAI( I ) ) + BETA( I ) = BETA( I )*WORK( 1 ) + ALPHAR( I ) = ALPHAR( I )*WORK( 1 ) + ALPHAI( I ) = ALPHAI( I )*WORK( 1 ) + END IF + END IF + 20 CONTINUE + END IF +* + IF( ILBSCL ) THEN + DO 30 I = 1, N + IF( ALPHAI( I ).NE.ZERO ) THEN + IF( ( BETA( I ) / SAFMAX ).GT.( BNRMTO / BNRM ) .OR. + $ ( SAFMIN / BETA( I ) ).GT.( BNRM / BNRMTO ) ) THEN + WORK( 1 ) = ABS( B( I, I ) / BETA( I ) ) + BETA( I ) = BETA( I )*WORK( 1 ) + ALPHAR( I ) = ALPHAR( I )*WORK( 1 ) + ALPHAI( I ) = ALPHAI( I )*WORK( 1 ) + END IF + END IF + 30 CONTINUE + END IF +* +* Undo scaling +* + IF( ILASCL ) THEN + CALL DLASCL( 'H', 0, 0, ANRMTO, ANRM, N, N, A, LDA, IERR ) + CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N, IERR ) + CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N, IERR ) + END IF +* + IF( ILBSCL ) THEN + CALL DLASCL( 'U', 0, 0, BNRMTO, BNRM, N, N, B, LDB, IERR ) + CALL DLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) + END IF +* + 40 CONTINUE +* + IF( WANTST ) THEN +* +* Check if reordering is correct +* + LASTSL = .TRUE. + LST2SL = .TRUE. + SDIM = 0 + IP = 0 + DO 50 I = 1, N + CURSL = DELCTG( ALPHAR( I ), ALPHAI( I ), BETA( I ) ) + IF( ALPHAI( I ).EQ.ZERO ) THEN + IF( CURSL ) + $ SDIM = SDIM + 1 + IP = 0 + IF( CURSL .AND. .NOT.LASTSL ) + $ INFO = N + 2 + ELSE + IF( IP.EQ.1 ) THEN +* +* Last eigenvalue of conjugate pair +* + CURSL = CURSL .OR. LASTSL + LASTSL = CURSL + IF( CURSL ) + $ SDIM = SDIM + 2 + IP = -1 + IF( CURSL .AND. .NOT.LST2SL ) + $ INFO = N + 2 + ELSE +* +* First eigenvalue of conjugate pair +* + IP = 1 + END IF + END IF + LST2SL = LASTSL + LASTSL = CURSL + 50 CONTINUE +* + END IF +* + 60 CONTINUE +* + WORK( 1 ) = MAXWRK + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of DGGESX +* + END diff --git a/costa/native/external/lapack/dggev.f b/costa/native/external/lapack/dggev.f new file mode 100644 index 000000000..38b3a8bbe --- /dev/null +++ b/costa/native/external/lapack/dggev.f @@ -0,0 +1,481 @@ + SUBROUTINE DGGEV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHAR, ALPHAI, + $ BETA, VL, LDVL, VR, LDVR, WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBVL, JOBVR + INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), ALPHAI( * ), ALPHAR( * ), + $ B( LDB, * ), BETA( * ), VL( LDVL, * ), + $ VR( LDVR, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DGGEV computes for a pair of N-by-N real nonsymmetric matrices (A,B) +* the generalized eigenvalues, and optionally, the left and/or right +* generalized eigenvectors. +* +* A generalized eigenvalue for a pair of matrices (A,B) is a scalar +* lambda or a ratio alpha/beta = lambda, such that A - lambda*B is +* singular. It is usually represented as the pair (alpha,beta), as +* there is a reasonable interpretation for beta=0, and even for both +* being zero. +* +* The right eigenvector v(j) corresponding to the eigenvalue lambda(j) +* of (A,B) satisfies +* +* A * v(j) = lambda(j) * B * v(j). +* +* The left eigenvector u(j) corresponding to the eigenvalue lambda(j) +* of (A,B) satisfies +* +* u(j)**H * A = lambda(j) * u(j)**H * B . +* +* where u(j)**H is the conjugate-transpose of u(j). +* +* +* Arguments +* ========= +* +* JOBVL (input) CHARACTER*1 +* = 'N': do not compute the left generalized eigenvectors; +* = 'V': compute the left generalized eigenvectors. +* +* JOBVR (input) CHARACTER*1 +* = 'N': do not compute the right generalized eigenvectors; +* = 'V': compute the right generalized eigenvectors. +* +* N (input) INTEGER +* The order of the matrices A, B, VL, and VR. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) +* On entry, the matrix A in the pair (A,B). +* On exit, A has been overwritten. +* +* LDA (input) INTEGER +* The leading dimension of A. LDA >= max(1,N). +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB, N) +* On entry, the matrix B in the pair (A,B). +* On exit, B has been overwritten. +* +* LDB (input) INTEGER +* The leading dimension of B. LDB >= max(1,N). +* +* ALPHAR (output) DOUBLE PRECISION array, dimension (N) +* ALPHAI (output) DOUBLE PRECISION array, dimension (N) +* BETA (output) DOUBLE PRECISION array, dimension (N) +* On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will +* be the generalized eigenvalues. If ALPHAI(j) is zero, then +* the j-th eigenvalue is real; if positive, then the j-th and +* (j+1)-st eigenvalues are a complex conjugate pair, with +* ALPHAI(j+1) negative. +* +* Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) +* may easily over- or underflow, and BETA(j) may even be zero. +* Thus, the user should avoid naively computing the ratio +* alpha/beta. However, ALPHAR and ALPHAI will be always less +* than and usually comparable with norm(A) in magnitude, and +* BETA always less than and usually comparable with norm(B). +* +* VL (output) DOUBLE PRECISION array, dimension (LDVL,N) +* If JOBVL = 'V', the left eigenvectors u(j) are stored one +* after another in the columns of VL, in the same order as +* their eigenvalues. If the j-th eigenvalue is real, then +* u(j) = VL(:,j), the j-th column of VL. If the j-th and +* (j+1)-th eigenvalues form a complex conjugate pair, then +* u(j) = VL(:,j)+i*VL(:,j+1) and u(j+1) = VL(:,j)-i*VL(:,j+1). +* Each eigenvector will be scaled so the largest component have +* abs(real part)+abs(imag. part)=1. +* Not referenced if JOBVL = 'N'. +* +* LDVL (input) INTEGER +* The leading dimension of the matrix VL. LDVL >= 1, and +* if JOBVL = 'V', LDVL >= N. +* +* VR (output) DOUBLE PRECISION array, dimension (LDVR,N) +* If JOBVR = 'V', the right eigenvectors v(j) are stored one +* after another in the columns of VR, in the same order as +* their eigenvalues. If the j-th eigenvalue is real, then +* v(j) = VR(:,j), the j-th column of VR. If the j-th and +* (j+1)-th eigenvalues form a complex conjugate pair, then +* v(j) = VR(:,j)+i*VR(:,j+1) and v(j+1) = VR(:,j)-i*VR(:,j+1). +* Each eigenvector will be scaled so the largest component have +* abs(real part)+abs(imag. part)=1. +* Not referenced if JOBVR = 'N'. +* +* LDVR (input) INTEGER +* The leading dimension of the matrix VR. LDVR >= 1, and +* if JOBVR = 'V', LDVR >= N. +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,8*N). +* For good performance, LWORK must generally be larger. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* = 1,...,N: +* The QZ iteration failed. No eigenvectors have been +* calculated, but ALPHAR(j), ALPHAI(j), and BETA(j) +* should be correct for j=INFO+1,...,N. +* > N: =N+1: other than QZ iteration failed in DHGEQZ. +* =N+2: error return from DTGEVC. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ILASCL, ILBSCL, ILV, ILVL, ILVR, LQUERY + CHARACTER CHTEMP + INTEGER ICOLS, IERR, IHI, IJOBVL, IJOBVR, ILEFT, ILO, + $ IN, IRIGHT, IROWS, ITAU, IWRK, JC, JR, MAXWRK, + $ MINWRK + DOUBLE PRECISION ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, + $ SMLNUM, TEMP +* .. +* .. Local Arrays .. + LOGICAL LDUMMA( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DGEQRF, DGGBAK, DGGBAL, DGGHRD, DHGEQZ, DLACPY, + $ DLASCL, DLASET, DORGQR, DORMQR, DTGEVC, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, DLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVL, 'N' ) ) THEN + IJOBVL = 1 + ILVL = .FALSE. + ELSE IF( LSAME( JOBVL, 'V' ) ) THEN + IJOBVL = 2 + ILVL = .TRUE. + ELSE + IJOBVL = -1 + ILVL = .FALSE. + END IF +* + IF( LSAME( JOBVR, 'N' ) ) THEN + IJOBVR = 1 + ILVR = .FALSE. + ELSE IF( LSAME( JOBVR, 'V' ) ) THEN + IJOBVR = 2 + ILVR = .TRUE. + ELSE + IJOBVR = -1 + ILVR = .FALSE. + END IF + ILV = ILVL .OR. ILVR +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVL.LT.1 .OR. ( ILVL .AND. LDVL.LT.N ) ) THEN + INFO = -12 + ELSE IF( LDVR.LT.1 .OR. ( ILVR .AND. LDVR.LT.N ) ) THEN + INFO = -14 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV. The workspace is +* computed assuming ILO = 1 and IHI = N, the worst case.) +* + MINWRK = 1 + IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN + MAXWRK = 7*N + N*ILAENV( 1, 'DGEQRF', ' ', N, 1, N, 0 ) + MINWRK = MAX( 1, 8*N ) + WORK( 1 ) = MAXWRK + END IF +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) + $ INFO = -16 +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGGEV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = DLANGE( 'M', N, N, A, LDA, WORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF + IF( ILASCL ) + $ CALL DLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR ) +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = DLANGE( 'M', N, N, B, LDB, WORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF + IF( ILBSCL ) + $ CALL DLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR ) +* +* Permute the matrices A, B to isolate eigenvalues if possible +* (Workspace: need 6*N) +* + ILEFT = 1 + IRIGHT = N + 1 + IWRK = IRIGHT + N + CALL DGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), WORK( IWRK ), IERR ) +* +* Reduce B to triangular form (QR decomposition of B) +* (Workspace: need N, prefer N*NB) +* + IROWS = IHI + 1 - ILO + IF( ILV ) THEN + ICOLS = N + 1 - ILO + ELSE + ICOLS = IROWS + END IF + ITAU = IWRK + IWRK = ITAU + IROWS + CALL DGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Apply the orthogonal transformation to matrix A +* (Workspace: need N, prefer N*NB) +* + CALL DORMQR( 'L', 'T', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ), + $ LWORK+1-IWRK, IERR ) +* +* Initialize VL +* (Workspace: need N, prefer N*NB) +* + IF( ILVL ) THEN + CALL DLASET( 'Full', N, N, ZERO, ONE, VL, LDVL ) + CALL DLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VL( ILO+1, ILO ), LDVL ) + CALL DORGQR( IROWS, IROWS, IROWS, VL( ILO, ILO ), LDVL, + $ WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* +* Initialize VR +* + IF( ILVR ) + $ CALL DLASET( 'Full', N, N, ZERO, ONE, VR, LDVR ) +* +* Reduce to generalized Hessenberg form +* (Workspace: none needed) +* + IF( ILV ) THEN +* +* Eigenvectors requested -- work on whole matrix. +* + CALL DGGHRD( JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, IERR ) + ELSE + CALL DGGHRD( 'N', 'N', IROWS, 1, IROWS, A( ILO, ILO ), LDA, + $ B( ILO, ILO ), LDB, VL, LDVL, VR, LDVR, IERR ) + END IF +* +* Perform QZ algorithm (Compute eigenvalues, and optionally, the +* Schur forms and Schur vectors) +* (Workspace: need N) +* + IWRK = ITAU + IF( ILV ) THEN + CHTEMP = 'S' + ELSE + CHTEMP = 'E' + END IF + CALL DHGEQZ( CHTEMP, JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VL, LDVL, VR, LDVR, + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) + IF( IERR.NE.0 ) THEN + IF( IERR.GT.0 .AND. IERR.LE.N ) THEN + INFO = IERR + ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN + INFO = IERR - N + ELSE + INFO = N + 1 + END IF + GO TO 110 + END IF +* +* Compute Eigenvectors +* (Workspace: need 6*N) +* + IF( ILV ) THEN + IF( ILVL ) THEN + IF( ILVR ) THEN + CHTEMP = 'B' + ELSE + CHTEMP = 'L' + END IF + ELSE + CHTEMP = 'R' + END IF + CALL DTGEVC( CHTEMP, 'B', LDUMMA, N, A, LDA, B, LDB, VL, LDVL, + $ VR, LDVR, N, IN, WORK( IWRK ), IERR ) + IF( IERR.NE.0 ) THEN + INFO = N + 2 + GO TO 110 + END IF +* +* Undo balancing on VL and VR and normalization +* (Workspace: none needed) +* + IF( ILVL ) THEN + CALL DGGBAK( 'P', 'L', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VL, LDVL, IERR ) + DO 50 JC = 1, N + IF( ALPHAI( JC ).LT.ZERO ) + $ GO TO 50 + TEMP = ZERO + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 10 JR = 1, N + TEMP = MAX( TEMP, ABS( VL( JR, JC ) ) ) + 10 CONTINUE + ELSE + DO 20 JR = 1, N + TEMP = MAX( TEMP, ABS( VL( JR, JC ) )+ + $ ABS( VL( JR, JC+1 ) ) ) + 20 CONTINUE + END IF + IF( TEMP.LT.SMLNUM ) + $ GO TO 50 + TEMP = ONE / TEMP + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 30 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + 30 CONTINUE + ELSE + DO 40 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + VL( JR, JC+1 ) = VL( JR, JC+1 )*TEMP + 40 CONTINUE + END IF + 50 CONTINUE + END IF + IF( ILVR ) THEN + CALL DGGBAK( 'P', 'R', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VR, LDVR, IERR ) + DO 100 JC = 1, N + IF( ALPHAI( JC ).LT.ZERO ) + $ GO TO 100 + TEMP = ZERO + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 60 JR = 1, N + TEMP = MAX( TEMP, ABS( VR( JR, JC ) ) ) + 60 CONTINUE + ELSE + DO 70 JR = 1, N + TEMP = MAX( TEMP, ABS( VR( JR, JC ) )+ + $ ABS( VR( JR, JC+1 ) ) ) + 70 CONTINUE + END IF + IF( TEMP.LT.SMLNUM ) + $ GO TO 100 + TEMP = ONE / TEMP + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 80 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + 80 CONTINUE + ELSE + DO 90 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + VR( JR, JC+1 ) = VR( JR, JC+1 )*TEMP + 90 CONTINUE + END IF + 100 CONTINUE + END IF +* +* End of eigenvector calculation +* + END IF +* +* Undo scaling if necessary +* + IF( ILASCL ) THEN + CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N, IERR ) + CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N, IERR ) + END IF +* + IF( ILBSCL ) THEN + CALL DLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) + END IF +* + 110 CONTINUE +* + WORK( 1 ) = MAXWRK +* + RETURN +* +* End of DGGEV +* + END diff --git a/costa/native/external/lapack/dggevx.f b/costa/native/external/lapack/dggevx.f new file mode 100644 index 000000000..0676e1b99 --- /dev/null +++ b/costa/native/external/lapack/dggevx.f @@ -0,0 +1,698 @@ + SUBROUTINE DGGEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VL, LDVL, VR, LDVR, ILO, + $ IHI, LSCALE, RSCALE, ABNRM, BBNRM, RCONDE, + $ RCONDV, WORK, LWORK, IWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER BALANC, JOBVL, JOBVR, SENSE + INTEGER IHI, ILO, INFO, LDA, LDB, LDVL, LDVR, LWORK, N + DOUBLE PRECISION ABNRM, BBNRM +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), ALPHAI( * ), ALPHAR( * ), + $ B( LDB, * ), BETA( * ), LSCALE( * ), + $ RCONDE( * ), RCONDV( * ), RSCALE( * ), + $ VL( LDVL, * ), VR( LDVR, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DGGEVX computes for a pair of N-by-N real nonsymmetric matrices (A,B) +* the generalized eigenvalues, and optionally, the left and/or right +* generalized eigenvectors. +* +* Optionally also, it computes a balancing transformation to improve +* the conditioning of the eigenvalues and eigenvectors (ILO, IHI, +* LSCALE, RSCALE, ABNRM, and BBNRM), reciprocal condition numbers for +* the eigenvalues (RCONDE), and reciprocal condition numbers for the +* right eigenvectors (RCONDV). +* +* A generalized eigenvalue for a pair of matrices (A,B) is a scalar +* lambda or a ratio alpha/beta = lambda, such that A - lambda*B is +* singular. It is usually represented as the pair (alpha,beta), as +* there is a reasonable interpretation for beta=0, and even for both +* being zero. +* +* The right eigenvector v(j) corresponding to the eigenvalue lambda(j) +* of (A,B) satisfies +* +* A * v(j) = lambda(j) * B * v(j) . +* +* The left eigenvector u(j) corresponding to the eigenvalue lambda(j) +* of (A,B) satisfies +* +* u(j)**H * A = lambda(j) * u(j)**H * B. +* +* where u(j)**H is the conjugate-transpose of u(j). +* +* +* Arguments +* ========= +* +* BALANC (input) CHARACTER*1 +* Specifies the balance option to be performed. +* = 'N': do not diagonally scale or permute; +* = 'P': permute only; +* = 'S': scale only; +* = 'B': both permute and scale. +* Computed reciprocal condition numbers will be for the +* matrices after permuting and/or balancing. Permuting does +* not change condition numbers (in exact arithmetic), but +* balancing does. +* +* JOBVL (input) CHARACTER*1 +* = 'N': do not compute the left generalized eigenvectors; +* = 'V': compute the left generalized eigenvectors. +* +* JOBVR (input) CHARACTER*1 +* = 'N': do not compute the right generalized eigenvectors; +* = 'V': compute the right generalized eigenvectors. +* +* SENSE (input) CHARACTER*1 +* Determines which reciprocal condition numbers are computed. +* = 'N': none are computed; +* = 'E': computed for eigenvalues only; +* = 'V': computed for eigenvectors only; +* = 'B': computed for eigenvalues and eigenvectors. +* +* N (input) INTEGER +* The order of the matrices A, B, VL, and VR. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) +* On entry, the matrix A in the pair (A,B). +* On exit, A has been overwritten. If JOBVL='V' or JOBVR='V' +* or both, then A contains the first part of the real Schur +* form of the "balanced" versions of the input A and B. +* +* LDA (input) INTEGER +* The leading dimension of A. LDA >= max(1,N). +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB, N) +* On entry, the matrix B in the pair (A,B). +* On exit, B has been overwritten. If JOBVL='V' or JOBVR='V' +* or both, then B contains the second part of the real Schur +* form of the "balanced" versions of the input A and B. +* +* LDB (input) INTEGER +* The leading dimension of B. LDB >= max(1,N). +* +* ALPHAR (output) DOUBLE PRECISION array, dimension (N) +* ALPHAI (output) DOUBLE PRECISION array, dimension (N) +* BETA (output) DOUBLE PRECISION array, dimension (N) +* On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will +* be the generalized eigenvalues. If ALPHAI(j) is zero, then +* the j-th eigenvalue is real; if positive, then the j-th and +* (j+1)-st eigenvalues are a complex conjugate pair, with +* ALPHAI(j+1) negative. +* +* Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) +* may easily over- or underflow, and BETA(j) may even be zero. +* Thus, the user should avoid naively computing the ratio +* ALPHA/BETA. However, ALPHAR and ALPHAI will be always less +* than and usually comparable with norm(A) in magnitude, and +* BETA always less than and usually comparable with norm(B). +* +* VL (output) DOUBLE PRECISION array, dimension (LDVL,N) +* If JOBVL = 'V', the left eigenvectors u(j) are stored one +* after another in the columns of VL, in the same order as +* their eigenvalues. If the j-th eigenvalue is real, then +* u(j) = VL(:,j), the j-th column of VL. If the j-th and +* (j+1)-th eigenvalues form a complex conjugate pair, then +* u(j) = VL(:,j)+i*VL(:,j+1) and u(j+1) = VL(:,j)-i*VL(:,j+1). +* Each eigenvector will be scaled so the largest component have +* abs(real part) + abs(imag. part) = 1. +* Not referenced if JOBVL = 'N'. +* +* LDVL (input) INTEGER +* The leading dimension of the matrix VL. LDVL >= 1, and +* if JOBVL = 'V', LDVL >= N. +* +* VR (output) DOUBLE PRECISION array, dimension (LDVR,N) +* If JOBVR = 'V', the right eigenvectors v(j) are stored one +* after another in the columns of VR, in the same order as +* their eigenvalues. If the j-th eigenvalue is real, then +* v(j) = VR(:,j), the j-th column of VR. If the j-th and +* (j+1)-th eigenvalues form a complex conjugate pair, then +* v(j) = VR(:,j)+i*VR(:,j+1) and v(j+1) = VR(:,j)-i*VR(:,j+1). +* Each eigenvector will be scaled so the largest component have +* abs(real part) + abs(imag. part) = 1. +* Not referenced if JOBVR = 'N'. +* +* LDVR (input) INTEGER +* The leading dimension of the matrix VR. LDVR >= 1, and +* if JOBVR = 'V', LDVR >= N. +* +* ILO,IHI (output) INTEGER +* ILO and IHI are integer values such that on exit +* A(i,j) = 0 and B(i,j) = 0 if i > j and +* j = 1,...,ILO-1 or i = IHI+1,...,N. +* If BALANC = 'N' or 'S', ILO = 1 and IHI = N. +* +* LSCALE (output) DOUBLE PRECISION array, dimension (N) +* Details of the permutations and scaling factors applied +* to the left side of A and B. If PL(j) is the index of the +* row interchanged with row j, and DL(j) is the scaling +* factor applied to row j, then +* LSCALE(j) = PL(j) for j = 1,...,ILO-1 +* = DL(j) for j = ILO,...,IHI +* = PL(j) for j = IHI+1,...,N. +* The order in which the interchanges are made is N to IHI+1, +* then 1 to ILO-1. +* +* RSCALE (output) DOUBLE PRECISION array, dimension (N) +* Details of the permutations and scaling factors applied +* to the right side of A and B. If PR(j) is the index of the +* column interchanged with column j, and DR(j) is the scaling +* factor applied to column j, then +* RSCALE(j) = PR(j) for j = 1,...,ILO-1 +* = DR(j) for j = ILO,...,IHI +* = PR(j) for j = IHI+1,...,N +* The order in which the interchanges are made is N to IHI+1, +* then 1 to ILO-1. +* +* ABNRM (output) DOUBLE PRECISION +* The one-norm of the balanced matrix A. +* +* BBNRM (output) DOUBLE PRECISION +* The one-norm of the balanced matrix B. +* +* RCONDE (output) DOUBLE PRECISION array, dimension (N) +* If SENSE = 'E' or 'B', the reciprocal condition numbers of +* the selected eigenvalues, stored in consecutive elements of +* the array. For a complex conjugate pair of eigenvalues two +* consecutive elements of RCONDE are set to the same value. +* Thus RCONDE(j), RCONDV(j), and the j-th columns of VL and VR +* all correspond to the same eigenpair (but not in general the +* j-th eigenpair, unless all eigenpairs are selected). +* If SENSE = 'V', RCONDE is not referenced. +* +* RCONDV (output) DOUBLE PRECISION array, dimension (N) +* If SENSE = 'V' or 'B', the estimated reciprocal condition +* numbers of the selected eigenvectors, stored in consecutive +* elements of the array. For a complex eigenvector two +* consecutive elements of RCONDV are set to the same value. If +* the eigenvalues cannot be reordered to compute RCONDV(j), +* RCONDV(j) is set to 0; this can only occur when the true +* value would be very small anyway. +* If SENSE = 'E', RCONDV is not referenced. +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,6*N). +* If SENSE = 'E', LWORK >= 12*N. +* If SENSE = 'V' or 'B', LWORK >= 2*N*N+12*N+16. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace) INTEGER array, dimension (N+6) +* If SENSE = 'E', IWORK is not referenced. +* +* BWORK (workspace) LOGICAL array, dimension (N) +* If SENSE = 'N', BWORK is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* = 1,...,N: +* The QZ iteration failed. No eigenvectors have been +* calculated, but ALPHAR(j), ALPHAI(j), and BETA(j) +* should be correct for j=INFO+1,...,N. +* > N: =N+1: other than QZ iteration failed in DHGEQZ. +* =N+2: error return from DTGEVC. +* +* Further Details +* =============== +* +* Balancing a matrix pair (A,B) includes, first, permuting rows and +* columns to isolate eigenvalues, second, applying diagonal similarity +* transformation to the rows and columns to make the rows and columns +* as close in norm as possible. The computed reciprocal condition +* numbers correspond to the balanced matrix. Permuting rows and columns +* will not change the condition numbers (in exact arithmetic) but +* diagonal scaling will. For further explanation of balancing, see +* section 4.11.1.2 of LAPACK Users' Guide. +* +* An approximate error bound on the chordal distance between the i-th +* computed generalized eigenvalue w and the corresponding exact +* eigenvalue lambda is +* +* chord(w, lambda) <= EPS * norm(ABNRM, BBNRM) / RCONDE(I) +* +* An approximate error bound for the angle between the i-th computed +* eigenvector VL(i) or VR(i) is given by +* +* EPS * norm(ABNRM, BBNRM) / DIF(i). +* +* For further explanation of the reciprocal condition numbers RCONDE +* and RCONDV, see section 4.11 of LAPACK User's Guide. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ILASCL, ILBSCL, ILV, ILVL, ILVR, LQUERY, PAIR, + $ WANTSB, WANTSE, WANTSN, WANTSV + CHARACTER CHTEMP + INTEGER I, ICOLS, IERR, IJOBVL, IJOBVR, IN, IROWS, + $ ITAU, IWRK, IWRK1, J, JC, JR, M, MAXWRK, + $ MINWRK, MM + DOUBLE PRECISION ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, + $ SMLNUM, TEMP +* .. +* .. Local Arrays .. + LOGICAL LDUMMA( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DGEQRF, DGGBAK, DGGBAL, DGGHRD, DHGEQZ, DLACPY, + $ DLASCL, DLASET, DORGQR, DORMQR, DTGEVC, DTGSNA, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, DLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVL, 'N' ) ) THEN + IJOBVL = 1 + ILVL = .FALSE. + ELSE IF( LSAME( JOBVL, 'V' ) ) THEN + IJOBVL = 2 + ILVL = .TRUE. + ELSE + IJOBVL = -1 + ILVL = .FALSE. + END IF +* + IF( LSAME( JOBVR, 'N' ) ) THEN + IJOBVR = 1 + ILVR = .FALSE. + ELSE IF( LSAME( JOBVR, 'V' ) ) THEN + IJOBVR = 2 + ILVR = .TRUE. + ELSE + IJOBVR = -1 + ILVR = .FALSE. + END IF + ILV = ILVL .OR. ILVR +* + WANTSN = LSAME( SENSE, 'N' ) + WANTSE = LSAME( SENSE, 'E' ) + WANTSV = LSAME( SENSE, 'V' ) + WANTSB = LSAME( SENSE, 'B' ) +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.( LSAME( BALANC, 'N' ) .OR. LSAME( BALANC, + $ 'S' ) .OR. LSAME( BALANC, 'P' ) .OR. LSAME( BALANC, 'B' ) ) ) + $ THEN + INFO = -1 + ELSE IF( IJOBVL.LE.0 ) THEN + INFO = -2 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -3 + ELSE IF( .NOT.( WANTSN .OR. WANTSE .OR. WANTSB .OR. WANTSV ) ) + $ THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDVL.LT.1 .OR. ( ILVL .AND. LDVL.LT.N ) ) THEN + INFO = -14 + ELSE IF( LDVR.LT.1 .OR. ( ILVR .AND. LDVR.LT.N ) ) THEN + INFO = -16 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV. The workspace is +* computed assuming ILO = 1 and IHI = N, the worst case.) +* + MINWRK = 1 + IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN + MAXWRK = 5*N + N*ILAENV( 1, 'DGEQRF', ' ', N, 1, N, 0 ) + MINWRK = MAX( 1, 6*N ) + IF( WANTSE ) THEN + MINWRK = MAX( 1, 12*N ) + ELSE IF( WANTSV .OR. WANTSB ) THEN + MINWRK = 2*N*N + 12*N + 16 + MAXWRK = MAX( MAXWRK, 2*N*N+12*N+16 ) + END IF + WORK( 1 ) = MAXWRK + END IF +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -26 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGGEVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = DLANGE( 'M', N, N, A, LDA, WORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF + IF( ILASCL ) + $ CALL DLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR ) +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = DLANGE( 'M', N, N, B, LDB, WORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF + IF( ILBSCL ) + $ CALL DLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR ) +* +* Permute and/or balance the matrix pair (A,B) +* (Workspace: need 6*N) +* + CALL DGGBAL( BALANC, N, A, LDA, B, LDB, ILO, IHI, LSCALE, RSCALE, + $ WORK, IERR ) +* +* Compute ABNRM and BBNRM +* + ABNRM = DLANGE( '1', N, N, A, LDA, WORK( 1 ) ) + IF( ILASCL ) THEN + WORK( 1 ) = ABNRM + CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, 1, 1, WORK( 1 ), 1, + $ IERR ) + ABNRM = WORK( 1 ) + END IF +* + BBNRM = DLANGE( '1', N, N, B, LDB, WORK( 1 ) ) + IF( ILBSCL ) THEN + WORK( 1 ) = BBNRM + CALL DLASCL( 'G', 0, 0, BNRMTO, BNRM, 1, 1, WORK( 1 ), 1, + $ IERR ) + BBNRM = WORK( 1 ) + END IF +* +* Reduce B to triangular form (QR decomposition of B) +* (Workspace: need N, prefer N*NB ) +* + IROWS = IHI + 1 - ILO + IF( ILV .OR. .NOT.WANTSN ) THEN + ICOLS = N + 1 - ILO + ELSE + ICOLS = IROWS + END IF + ITAU = 1 + IWRK = ITAU + IROWS + CALL DGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Apply the orthogonal transformation to A +* (Workspace: need N, prefer N*NB) +* + CALL DORMQR( 'L', 'T', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ), + $ LWORK+1-IWRK, IERR ) +* +* Initialize VL and/or VR +* (Workspace: need N, prefer N*NB) +* + IF( ILVL ) THEN + CALL DLASET( 'Full', N, N, ZERO, ONE, VL, LDVL ) + CALL DLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VL( ILO+1, ILO ), LDVL ) + CALL DORGQR( IROWS, IROWS, IROWS, VL( ILO, ILO ), LDVL, + $ WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* + IF( ILVR ) + $ CALL DLASET( 'Full', N, N, ZERO, ONE, VR, LDVR ) +* +* Reduce to generalized Hessenberg form +* (Workspace: none needed) +* + IF( ILV .OR. .NOT.WANTSN ) THEN +* +* Eigenvectors requested -- work on whole matrix. +* + CALL DGGHRD( JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, IERR ) + ELSE + CALL DGGHRD( 'N', 'N', IROWS, 1, IROWS, A( ILO, ILO ), LDA, + $ B( ILO, ILO ), LDB, VL, LDVL, VR, LDVR, IERR ) + END IF +* +* Perform QZ algorithm (Compute eigenvalues, and optionally, the +* Schur forms and Schur vectors) +* (Workspace: need N) +* + IF( ILV .OR. .NOT.WANTSN ) THEN + CHTEMP = 'S' + ELSE + CHTEMP = 'E' + END IF +* + CALL DHGEQZ( CHTEMP, JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VL, LDVL, VR, LDVR, WORK, + $ LWORK, IERR ) + IF( IERR.NE.0 ) THEN + IF( IERR.GT.0 .AND. IERR.LE.N ) THEN + INFO = IERR + ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN + INFO = IERR - N + ELSE + INFO = N + 1 + END IF + GO TO 130 + END IF +* +* Compute Eigenvectors and estimate condition numbers if desired +* (Workspace: DTGEVC: need 6*N +* DTGSNA: need 2*N*(N+2)+16 if SENSE = 'V' or 'B', +* need N otherwise ) +* + IF( ILV .OR. .NOT.WANTSN ) THEN + IF( ILV ) THEN + IF( ILVL ) THEN + IF( ILVR ) THEN + CHTEMP = 'B' + ELSE + CHTEMP = 'L' + END IF + ELSE + CHTEMP = 'R' + END IF +* + CALL DTGEVC( CHTEMP, 'B', LDUMMA, N, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, N, IN, WORK, IERR ) + IF( IERR.NE.0 ) THEN + INFO = N + 2 + GO TO 130 + END IF + END IF +* + IF( .NOT.WANTSN ) THEN +* +* compute eigenvectors (DTGEVC) and estimate condition +* numbers (DTGSNA). Note that the definition of the condition +* number is not invariant under transformation (u,v) to +* (Q*u, Z*v), where (u,v) are eigenvectors of the generalized +* Schur form (S,T), Q and Z are orthogonal matrices. In order +* to avoid using extra 2*N*N workspace, we have to recalculate +* eigenvectors and estimate one condition numbers at a time. +* + PAIR = .FALSE. + DO 20 I = 1, N +* + IF( PAIR ) THEN + PAIR = .FALSE. + GO TO 20 + END IF + MM = 1 + IF( I.LT.N ) THEN + IF( A( I+1, I ).NE.ZERO ) THEN + PAIR = .TRUE. + MM = 2 + END IF + END IF +* + DO 10 J = 1, N + BWORK( J ) = .FALSE. + 10 CONTINUE + IF( MM.EQ.1 ) THEN + BWORK( I ) = .TRUE. + ELSE IF( MM.EQ.2 ) THEN + BWORK( I ) = .TRUE. + BWORK( I+1 ) = .TRUE. + END IF +* + IWRK = MM*N + 1 + IWRK1 = IWRK + MM*N +* +* Compute a pair of left and right eigenvectors. +* (compute workspace: need up to 4*N + 6*N) +* + IF( WANTSE .OR. WANTSB ) THEN + CALL DTGEVC( 'B', 'S', BWORK, N, A, LDA, B, LDB, + $ WORK( 1 ), N, WORK( IWRK ), N, MM, M, + $ WORK( IWRK1 ), IERR ) + IF( IERR.NE.0 ) THEN + INFO = N + 2 + GO TO 130 + END IF + END IF +* + CALL DTGSNA( SENSE, 'S', BWORK, N, A, LDA, B, LDB, + $ WORK( 1 ), N, WORK( IWRK ), N, RCONDE( I ), + $ RCONDV( I ), MM, M, WORK( IWRK1 ), + $ LWORK-IWRK1+1, IWORK, IERR ) +* + 20 CONTINUE + END IF + END IF +* +* Undo balancing on VL and VR and normalization +* (Workspace: none needed) +* + IF( ILVL ) THEN + CALL DGGBAK( BALANC, 'L', N, ILO, IHI, LSCALE, RSCALE, N, VL, + $ LDVL, IERR ) +* + DO 70 JC = 1, N + IF( ALPHAI( JC ).LT.ZERO ) + $ GO TO 70 + TEMP = ZERO + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 30 JR = 1, N + TEMP = MAX( TEMP, ABS( VL( JR, JC ) ) ) + 30 CONTINUE + ELSE + DO 40 JR = 1, N + TEMP = MAX( TEMP, ABS( VL( JR, JC ) )+ + $ ABS( VL( JR, JC+1 ) ) ) + 40 CONTINUE + END IF + IF( TEMP.LT.SMLNUM ) + $ GO TO 70 + TEMP = ONE / TEMP + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 50 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + 50 CONTINUE + ELSE + DO 60 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + VL( JR, JC+1 ) = VL( JR, JC+1 )*TEMP + 60 CONTINUE + END IF + 70 CONTINUE + END IF + IF( ILVR ) THEN + CALL DGGBAK( BALANC, 'R', N, ILO, IHI, LSCALE, RSCALE, N, VR, + $ LDVR, IERR ) + DO 120 JC = 1, N + IF( ALPHAI( JC ).LT.ZERO ) + $ GO TO 120 + TEMP = ZERO + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 80 JR = 1, N + TEMP = MAX( TEMP, ABS( VR( JR, JC ) ) ) + 80 CONTINUE + ELSE + DO 90 JR = 1, N + TEMP = MAX( TEMP, ABS( VR( JR, JC ) )+ + $ ABS( VR( JR, JC+1 ) ) ) + 90 CONTINUE + END IF + IF( TEMP.LT.SMLNUM ) + $ GO TO 120 + TEMP = ONE / TEMP + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 100 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + 100 CONTINUE + ELSE + DO 110 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + VR( JR, JC+1 ) = VR( JR, JC+1 )*TEMP + 110 CONTINUE + END IF + 120 CONTINUE + END IF +* +* Undo scaling if necessary +* + IF( ILASCL ) THEN + CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N, IERR ) + CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N, IERR ) + END IF +* + IF( ILBSCL ) THEN + CALL DLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) + END IF +* + 130 CONTINUE + WORK( 1 ) = MAXWRK +* + RETURN +* +* End of DGGEVX +* + END diff --git a/costa/native/external/lapack/dggglm.f b/costa/native/external/lapack/dggglm.f new file mode 100644 index 000000000..6504276cf --- /dev/null +++ b/costa/native/external/lapack/dggglm.f @@ -0,0 +1,212 @@ + SUBROUTINE DGGGLM( N, M, P, A, LDA, B, LDB, D, X, Y, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), D( * ), WORK( * ), + $ X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* DGGGLM solves a general Gauss-Markov linear model (GLM) problem: +* +* minimize || y ||_2 subject to d = A*x + B*y +* x +* +* where A is an N-by-M matrix, B is an N-by-P matrix, and d is a +* given N-vector. It is assumed that M <= N <= M+P, and +* +* rank(A) = M and rank( A B ) = N. +* +* Under these assumptions, the constrained equation is always +* consistent, and there is a unique solution x and a minimal 2-norm +* solution y, which is obtained using a generalized QR factorization +* of A and B. +* +* In particular, if matrix B is square nonsingular, then the problem +* GLM is equivalent to the following weighted linear least squares +* problem +* +* minimize || inv(B)*(d-A*x) ||_2 +* x +* +* where inv(B) denotes the inverse of B. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of rows of the matrices A and B. N >= 0. +* +* M (input) INTEGER +* The number of columns of the matrix A. 0 <= M <= N. +* +* P (input) INTEGER +* The number of columns of the matrix B. P >= N-M. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,M) +* On entry, the N-by-M matrix A. +* On exit, A is destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,P) +* On entry, the N-by-P matrix B. +* On exit, B is destroyed. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* D (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, D is the left hand side of the GLM equation. +* On exit, D is destroyed. +* +* X (output) DOUBLE PRECISION array, dimension (M) +* Y (output) DOUBLE PRECISION array, dimension (P) +* On exit, X and Y are the solutions of the GLM problem. +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N+M+P). +* For optimum performance, LWORK >= M+min(N,P)+max(N,P)*NB, +* where NB is an upper bound for the optimal blocksizes for +* DGEQRF, SGERQF, DORMQR and SORMRQ. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* =================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, LOPT, LWKOPT, NB, NB1, NB2, NB3, NB4, NP +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEMV, DGGQRF, DORMQR, DORMRQ, DTRSV, + $ XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NP = MIN( N, P ) + NB1 = ILAENV( 1, 'DGEQRF', ' ', N, M, -1, -1 ) + NB2 = ILAENV( 1, 'DGERQF', ' ', N, M, -1, -1 ) + NB3 = ILAENV( 1, 'DORMQR', ' ', N, M, P, -1 ) + NB4 = ILAENV( 1, 'DORMRQ', ' ', N, M, P, -1 ) + NB = MAX( NB1, NB2, NB3, NB4 ) + LWKOPT = M + NP + MAX( N, P )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( M.LT.0 .OR. M.GT.N ) THEN + INFO = -2 + ELSE IF( P.LT.0 .OR. P.LT.N-M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LWORK.LT.MAX( 1, N+M+P ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGGGLM', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Compute the GQR factorization of matrices A and B: +* +* Q'*A = ( R11 ) M, Q'*B*Z' = ( T11 T12 ) M +* ( 0 ) N-M ( 0 T22 ) N-M +* M M+P-N N-M +* +* where R11 and T22 are upper triangular, and Q and Z are +* orthogonal. +* + CALL DGGQRF( N, M, P, A, LDA, WORK, B, LDB, WORK( M+1 ), + $ WORK( M+NP+1 ), LWORK-M-NP, INFO ) + LOPT = WORK( M+NP+1 ) +* +* Update left-hand-side vector d = Q'*d = ( d1 ) M +* ( d2 ) N-M +* + CALL DORMQR( 'Left', 'Transpose', N, 1, M, A, LDA, WORK, D, + $ MAX( 1, N ), WORK( M+NP+1 ), LWORK-M-NP, INFO ) + LOPT = MAX( LOPT, INT( WORK( M+NP+1 ) ) ) +* +* Solve T22*y2 = d2 for y2 +* + CALL DTRSV( 'Upper', 'No transpose', 'Non unit', N-M, + $ B( M+1, M+P-N+1 ), LDB, D( M+1 ), 1 ) + CALL DCOPY( N-M, D( M+1 ), 1, Y( M+P-N+1 ), 1 ) +* +* Set y1 = 0 +* + DO 10 I = 1, M + P - N + Y( I ) = ZERO + 10 CONTINUE +* +* Update d1 = d1 - T12*y2 +* + CALL DGEMV( 'No transpose', M, N-M, -ONE, B( 1, M+P-N+1 ), LDB, + $ Y( M+P-N+1 ), 1, ONE, D, 1 ) +* +* Solve triangular system: R11*x = d1 +* + CALL DTRSV( 'Upper', 'No Transpose', 'Non unit', M, A, LDA, D, 1 ) +* +* Copy D to X +* + CALL DCOPY( M, D, 1, X, 1 ) +* +* Backward transformation y = Z'*y +* + CALL DORMRQ( 'Left', 'Transpose', P, 1, NP, + $ B( MAX( 1, N-P+1 ), 1 ), LDB, WORK( M+1 ), Y, + $ MAX( 1, P ), WORK( M+NP+1 ), LWORK-M-NP, INFO ) + WORK( 1 ) = M + NP + MAX( LOPT, INT( WORK( M+NP+1 ) ) ) +* + RETURN +* +* End of DGGGLM +* + END diff --git a/costa/native/external/lapack/dgghrd.f b/costa/native/external/lapack/dgghrd.f new file mode 100644 index 000000000..9473fb580 --- /dev/null +++ b/costa/native/external/lapack/dgghrd.f @@ -0,0 +1,253 @@ + SUBROUTINE DGGHRD( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q, + $ LDQ, Z, LDZ, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER COMPQ, COMPZ + INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* DGGHRD reduces a pair of real matrices (A,B) to generalized upper +* Hessenberg form using orthogonal transformations, where A is a +* general matrix and B is upper triangular: Q' * A * Z = H and +* Q' * B * Z = T, where H is upper Hessenberg, T is upper triangular, +* and Q and Z are orthogonal, and ' means transpose. +* +* The orthogonal matrices Q and Z are determined as products of Givens +* rotations. They may either be formed explicitly, or they may be +* postmultiplied into input matrices Q1 and Z1, so that +* +* Q1 * A * Z1' = (Q1*Q) * H * (Z1*Z)' +* Q1 * B * Z1' = (Q1*Q) * T * (Z1*Z)' +* +* Arguments +* ========= +* +* COMPQ (input) CHARACTER*1 +* = 'N': do not compute Q; +* = 'I': Q is initialized to the unit matrix, and the +* orthogonal matrix Q is returned; +* = 'V': Q must contain an orthogonal matrix Q1 on entry, +* and the product Q1*Q is returned. +* +* COMPZ (input) CHARACTER*1 +* = 'N': do not compute Z; +* = 'I': Z is initialized to the unit matrix, and the +* orthogonal matrix Z is returned; +* = 'V': Z must contain an orthogonal matrix Z1 on entry, +* and the product Z1*Z is returned. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* It is assumed that A is already upper triangular in rows and +* columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally set +* by a previous call to DGGBAL; otherwise they should be set +* to 1 and N respectively. +* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) +* On entry, the N-by-N general matrix to be reduced. +* On exit, the upper triangle and the first subdiagonal of A +* are overwritten with the upper Hessenberg matrix H, and the +* rest is set to zero. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB, N) +* On entry, the N-by-N upper triangular matrix B. +* On exit, the upper triangular matrix T = Q' B Z. The +* elements below the diagonal are set to zero. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N) +* If COMPQ='N': Q is not referenced. +* If COMPQ='I': on entry, Q need not be set, and on exit it +* contains the orthogonal matrix Q, where Q' +* is the product of the Givens transformations +* which are applied to A and B on the left. +* If COMPQ='V': on entry, Q must contain an orthogonal matrix +* Q1, and on exit this is overwritten by Q1*Q. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. +* LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise. +* +* Z (input/output) DOUBLE PRECISION array, dimension (LDZ, N) +* If COMPZ='N': Z is not referenced. +* If COMPZ='I': on entry, Z need not be set, and on exit it +* contains the orthogonal matrix Z, which is +* the product of the Givens transformations +* which are applied to A and B on the right. +* If COMPZ='V': on entry, Z must contain an orthogonal matrix +* Z1, and on exit this is overwritten by Z1*Z. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. +* LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* This routine reduces A to Hessenberg and B to triangular form by +* an unblocked reduction, as described in _Matrix_Computations_, +* by Golub and Van Loan (Johns Hopkins Press.) +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ILQ, ILZ + INTEGER ICOMPQ, ICOMPZ, JCOL, JROW + DOUBLE PRECISION C, S, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLARTG, DLASET, DROT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Decode COMPQ +* + IF( LSAME( COMPQ, 'N' ) ) THEN + ILQ = .FALSE. + ICOMPQ = 1 + ELSE IF( LSAME( COMPQ, 'V' ) ) THEN + ILQ = .TRUE. + ICOMPQ = 2 + ELSE IF( LSAME( COMPQ, 'I' ) ) THEN + ILQ = .TRUE. + ICOMPQ = 3 + ELSE + ICOMPQ = 0 + END IF +* +* Decode COMPZ +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ILZ = .FALSE. + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ILZ = .TRUE. + ICOMPZ = 2 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ILZ = .TRUE. + ICOMPZ = 3 + ELSE + ICOMPZ = 0 + END IF +* +* Test the input parameters. +* + INFO = 0 + IF( ICOMPQ.LE.0 ) THEN + INFO = -1 + ELSE IF( ICOMPZ.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 ) THEN + INFO = -4 + ELSE IF( IHI.GT.N .OR. IHI.LT.ILO-1 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( ( ILQ .AND. LDQ.LT.N ) .OR. LDQ.LT.1 ) THEN + INFO = -11 + ELSE IF( ( ILZ .AND. LDZ.LT.N ) .OR. LDZ.LT.1 ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGGHRD', -INFO ) + RETURN + END IF +* +* Initialize Q and Z if desired. +* + IF( ICOMPQ.EQ.3 ) + $ CALL DLASET( 'Full', N, N, ZERO, ONE, Q, LDQ ) + IF( ICOMPZ.EQ.3 ) + $ CALL DLASET( 'Full', N, N, ZERO, ONE, Z, LDZ ) +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* +* Zero out lower triangle of B +* + DO 20 JCOL = 1, N - 1 + DO 10 JROW = JCOL + 1, N + B( JROW, JCOL ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* Reduce A and B +* + DO 40 JCOL = ILO, IHI - 2 +* + DO 30 JROW = IHI, JCOL + 2, -1 +* +* Step 1: rotate rows JROW-1, JROW to kill A(JROW,JCOL) +* + TEMP = A( JROW-1, JCOL ) + CALL DLARTG( TEMP, A( JROW, JCOL ), C, S, + $ A( JROW-1, JCOL ) ) + A( JROW, JCOL ) = ZERO + CALL DROT( N-JCOL, A( JROW-1, JCOL+1 ), LDA, + $ A( JROW, JCOL+1 ), LDA, C, S ) + CALL DROT( N+2-JROW, B( JROW-1, JROW-1 ), LDB, + $ B( JROW, JROW-1 ), LDB, C, S ) + IF( ILQ ) + $ CALL DROT( N, Q( 1, JROW-1 ), 1, Q( 1, JROW ), 1, C, S ) +* +* Step 2: rotate columns JROW, JROW-1 to kill B(JROW,JROW-1) +* + TEMP = B( JROW, JROW ) + CALL DLARTG( TEMP, B( JROW, JROW-1 ), C, S, + $ B( JROW, JROW ) ) + B( JROW, JROW-1 ) = ZERO + CALL DROT( IHI, A( 1, JROW ), 1, A( 1, JROW-1 ), 1, C, S ) + CALL DROT( JROW-1, B( 1, JROW ), 1, B( 1, JROW-1 ), 1, C, + $ S ) + IF( ILZ ) + $ CALL DROT( N, Z( 1, JROW ), 1, Z( 1, JROW-1 ), 1, C, S ) + 30 CONTINUE + 40 CONTINUE +* + RETURN +* +* End of DGGHRD +* + END diff --git a/costa/native/external/lapack/dgglse.f b/costa/native/external/lapack/dgglse.f new file mode 100644 index 000000000..f83bae247 --- /dev/null +++ b/costa/native/external/lapack/dgglse.f @@ -0,0 +1,217 @@ + SUBROUTINE DGGLSE( M, N, P, A, LDA, B, LDB, C, D, X, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), C( * ), D( * ), + $ WORK( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* DGGLSE solves the linear equality-constrained least squares (LSE) +* problem: +* +* minimize || c - A*x ||_2 subject to B*x = d +* +* where A is an M-by-N matrix, B is a P-by-N matrix, c is a given +* M-vector, and d is a given P-vector. It is assumed that +* P <= N <= M+P, and +* +* rank(B) = P and rank( ( A ) ) = N. +* ( ( B ) ) +* +* These conditions ensure that the LSE problem has a unique solution, +* which is obtained using a GRQ factorization of the matrices B and A. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrices A and B. N >= 0. +* +* P (input) INTEGER +* The number of rows of the matrix B. 0 <= P <= N <= M+P. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, A is destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,N) +* On entry, the P-by-N matrix B. +* On exit, B is destroyed. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,P). +* +* C (input/output) DOUBLE PRECISION array, dimension (M) +* On entry, C contains the right hand side vector for the +* least squares part of the LSE problem. +* On exit, the residual sum of squares for the solution +* is given by the sum of squares of elements N-P+1 to M of +* vector C. +* +* D (input/output) DOUBLE PRECISION array, dimension (P) +* On entry, D contains the right hand side vector for the +* constrained equation. +* On exit, D is destroyed. +* +* X (output) DOUBLE PRECISION array, dimension (N) +* On exit, X is the solution of the LSE problem. +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,M+N+P). +* For optimum performance LWORK >= P+min(M,N)+max(M,N)*NB, +* where NB is an upper bound for the optimal blocksizes for +* DGEQRF, SGERQF, DORMQR and SORMRQ. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LOPT, LWKOPT, MN, NB, NB1, NB2, NB3, NB4, NR +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DGEMV, DGGRQF, DORMQR, DORMRQ, + $ DTRMV, DTRSV, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + MN = MIN( M, N ) + NB1 = ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 ) + NB2 = ILAENV( 1, 'DGERQF', ' ', M, N, -1, -1 ) + NB3 = ILAENV( 1, 'DORMQR', ' ', M, N, P, -1 ) + NB4 = ILAENV( 1, 'DORMRQ', ' ', M, N, P, -1 ) + NB = MAX( NB1, NB2, NB3, NB4 ) + LWKOPT = P + MN + MAX( M, N )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( P.LT.0 .OR. P.GT.N .OR. P.LT.N-M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -7 + ELSE IF( LWORK.LT.MAX( 1, M+N+P ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGGLSE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Compute the GRQ factorization of matrices B and A: +* +* B*Q' = ( 0 T12 ) P Z'*A*Q' = ( R11 R12 ) N-P +* N-P P ( 0 R22 ) M+P-N +* N-P P +* +* where T12 and R11 are upper triangular, and Q and Z are +* orthogonal. +* + CALL DGGRQF( P, M, N, B, LDB, WORK, A, LDA, WORK( P+1 ), + $ WORK( P+MN+1 ), LWORK-P-MN, INFO ) + LOPT = WORK( P+MN+1 ) +* +* Update c = Z'*c = ( c1 ) N-P +* ( c2 ) M+P-N +* + CALL DORMQR( 'Left', 'Transpose', M, 1, MN, A, LDA, WORK( P+1 ), + $ C, MAX( 1, M ), WORK( P+MN+1 ), LWORK-P-MN, INFO ) + LOPT = MAX( LOPT, INT( WORK( P+MN+1 ) ) ) +* +* Solve T12*x2 = d for x2 +* + CALL DTRSV( 'Upper', 'No transpose', 'Non unit', P, B( 1, N-P+1 ), + $ LDB, D, 1 ) +* +* Update c1 +* + CALL DGEMV( 'No transpose', N-P, P, -ONE, A( 1, N-P+1 ), LDA, D, + $ 1, ONE, C, 1 ) +* +* Sovle R11*x1 = c1 for x1 +* + CALL DTRSV( 'Upper', 'No transpose', 'Non unit', N-P, A, LDA, C, + $ 1 ) +* +* Put the solutions in X +* + CALL DCOPY( N-P, C, 1, X, 1 ) + CALL DCOPY( P, D, 1, X( N-P+1 ), 1 ) +* +* Compute the residual vector: +* + IF( M.LT.N ) THEN + NR = M + P - N + CALL DGEMV( 'No transpose', NR, N-M, -ONE, A( N-P+1, M+1 ), + $ LDA, D( NR+1 ), 1, ONE, C( N-P+1 ), 1 ) + ELSE + NR = P + END IF + CALL DTRMV( 'Upper', 'No transpose', 'Non unit', NR, + $ A( N-P+1, N-P+1 ), LDA, D, 1 ) + CALL DAXPY( NR, -ONE, D, 1, C( N-P+1 ), 1 ) +* +* Backward transformation x = Q'*x +* + CALL DORMRQ( 'Left', 'Transpose', N, 1, P, B, LDB, WORK( 1 ), X, + $ N, WORK( P+MN+1 ), LWORK-P-MN, INFO ) + WORK( 1 ) = P + MN + MAX( LOPT, INT( WORK( P+MN+1 ) ) ) +* + RETURN +* +* End of DGGLSE +* + END diff --git a/costa/native/external/lapack/dggqrf.f b/costa/native/external/lapack/dggqrf.f new file mode 100644 index 000000000..44c6954b1 --- /dev/null +++ b/costa/native/external/lapack/dggqrf.f @@ -0,0 +1,212 @@ + SUBROUTINE DGGQRF( N, M, P, A, LDA, TAUA, B, LDB, TAUB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* DGGQRF computes a generalized QR factorization of an N-by-M matrix A +* and an N-by-P matrix B: +* +* A = Q*R, B = Q*T*Z, +* +* where Q is an N-by-N orthogonal matrix, Z is a P-by-P orthogonal +* matrix, and R and T assume one of the forms: +* +* if N >= M, R = ( R11 ) M , or if N < M, R = ( R11 R12 ) N, +* ( 0 ) N-M N M-N +* M +* +* where R11 is upper triangular, and +* +* if N <= P, T = ( 0 T12 ) N, or if N > P, T = ( T11 ) N-P, +* P-N N ( T21 ) P +* P +* +* where T12 or T21 is upper triangular. +* +* In particular, if B is square and nonsingular, the GQR factorization +* of A and B implicitly gives the QR factorization of inv(B)*A: +* +* inv(B)*A = Z'*(inv(T)*R) +* +* where inv(B) denotes the inverse of the matrix B, and Z' denotes the +* transpose of the matrix Z. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of rows of the matrices A and B. N >= 0. +* +* M (input) INTEGER +* The number of columns of the matrix A. M >= 0. +* +* P (input) INTEGER +* The number of columns of the matrix B. P >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,M) +* On entry, the N-by-M matrix A. +* On exit, the elements on and above the diagonal of the array +* contain the min(N,M)-by-M upper trapezoidal matrix R (R is +* upper triangular if N >= M); the elements below the diagonal, +* with the array TAUA, represent the orthogonal matrix Q as a +* product of min(N,M) elementary reflectors (see Further +* Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* TAUA (output) DOUBLE PRECISION array, dimension (min(N,M)) +* The scalar factors of the elementary reflectors which +* represent the orthogonal matrix Q (see Further Details). +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,P) +* On entry, the N-by-P matrix B. +* On exit, if N <= P, the upper triangle of the subarray +* B(1:N,P-N+1:P) contains the N-by-N upper triangular matrix T; +* if N > P, the elements on and above the (N-P)-th subdiagonal +* contain the N-by-P upper trapezoidal matrix T; the remaining +* elements, with the array TAUB, represent the orthogonal +* matrix Z as a product of elementary reflectors (see Further +* Details). +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* TAUB (output) DOUBLE PRECISION array, dimension (min(N,P)) +* The scalar factors of the elementary reflectors which +* represent the orthogonal matrix Z (see Further Details). +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N,M,P). +* For optimum performance LWORK >= max(N,M,P)*max(NB1,NB2,NB3), +* where NB1 is the optimal blocksize for the QR factorization +* of an N-by-M matrix, NB2 is the optimal blocksize for the +* RQ factorization of an N-by-P matrix, and NB3 is the optimal +* blocksize for a call of DORMQR. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(1) H(2) . . . H(k), where k = min(n,m). +* +* Each H(i) has the form +* +* H(i) = I - taua * v * v' +* +* where taua is a real scalar, and v is a real vector with +* v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), +* and taua in TAUA(i). +* To form Q explicitly, use LAPACK subroutine DORGQR. +* To use Q to update another matrix, use LAPACK subroutine DORMQR. +* +* The matrix Z is represented as a product of elementary reflectors +* +* Z = H(1) H(2) . . . H(k), where k = min(n,p). +* +* Each H(i) has the form +* +* H(i) = I - taub * v * v' +* +* where taub is a real scalar, and v is a real vector with +* v(p-k+i+1:p) = 0 and v(p-k+i) = 1; v(1:p-k+i-1) is stored on exit in +* B(n-k+i,1:p-k+i-1), and taub in TAUB(i). +* To form Z explicitly, use LAPACK subroutine DORGRQ. +* To use Z to update another matrix, use LAPACK subroutine DORMRQ. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LOPT, LWKOPT, NB, NB1, NB2, NB3 +* .. +* .. External Subroutines .. + EXTERNAL DGEQRF, DGERQF, DORMQR, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NB1 = ILAENV( 1, 'DGEQRF', ' ', N, M, -1, -1 ) + NB2 = ILAENV( 1, 'DGERQF', ' ', N, P, -1, -1 ) + NB3 = ILAENV( 1, 'DORMQR', ' ', N, M, P, -1 ) + NB = MAX( NB1, NB2, NB3 ) + LWKOPT = MAX( N, M, P )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( P.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, N, M, P ) .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGGQRF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* QR factorization of N-by-M matrix A: A = Q*R +* + CALL DGEQRF( N, M, A, LDA, TAUA, WORK, LWORK, INFO ) + LOPT = WORK( 1 ) +* +* Update B := Q'*B. +* + CALL DORMQR( 'Left', 'Transpose', N, P, MIN( N, M ), A, LDA, TAUA, + $ B, LDB, WORK, LWORK, INFO ) + LOPT = MAX( LOPT, INT( WORK( 1 ) ) ) +* +* RQ factorization of N-by-P matrix B: B = T*Z. +* + CALL DGERQF( N, P, B, LDB, TAUB, WORK, LWORK, INFO ) + WORK( 1 ) = MAX( LOPT, INT( WORK( 1 ) ) ) +* + RETURN +* +* End of DGGQRF +* + END diff --git a/costa/native/external/lapack/dggrqf.f b/costa/native/external/lapack/dggrqf.f new file mode 100644 index 000000000..349f8d460 --- /dev/null +++ b/costa/native/external/lapack/dggrqf.f @@ -0,0 +1,212 @@ + SUBROUTINE DGGRQF( M, P, N, A, LDA, TAUA, B, LDB, TAUB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* DGGRQF computes a generalized RQ factorization of an M-by-N matrix A +* and a P-by-N matrix B: +* +* A = R*Q, B = Z*T*Q, +* +* where Q is an N-by-N orthogonal matrix, Z is a P-by-P orthogonal +* matrix, and R and T assume one of the forms: +* +* if M <= N, R = ( 0 R12 ) M, or if M > N, R = ( R11 ) M-N, +* N-M M ( R21 ) N +* N +* +* where R12 or R21 is upper triangular, and +* +* if P >= N, T = ( T11 ) N , or if P < N, T = ( T11 T12 ) P, +* ( 0 ) P-N P N-P +* N +* +* where T11 is upper triangular. +* +* In particular, if B is square and nonsingular, the GRQ factorization +* of A and B implicitly gives the RQ factorization of A*inv(B): +* +* A*inv(B) = (R*inv(T))*Z' +* +* where inv(B) denotes the inverse of the matrix B, and Z' denotes the +* transpose of the matrix Z. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* P (input) INTEGER +* The number of rows of the matrix B. P >= 0. +* +* N (input) INTEGER +* The number of columns of the matrices A and B. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, if M <= N, the upper triangle of the subarray +* A(1:M,N-M+1:N) contains the M-by-M upper triangular matrix R; +* if M > N, the elements on and above the (M-N)-th subdiagonal +* contain the M-by-N upper trapezoidal matrix R; the remaining +* elements, with the array TAUA, represent the orthogonal +* matrix Q as a product of elementary reflectors (see Further +* Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAUA (output) DOUBLE PRECISION array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors which +* represent the orthogonal matrix Q (see Further Details). +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,N) +* On entry, the P-by-N matrix B. +* On exit, the elements on and above the diagonal of the array +* contain the min(P,N)-by-N upper trapezoidal matrix T (T is +* upper triangular if P >= N); the elements below the diagonal, +* with the array TAUB, represent the orthogonal matrix Z as a +* product of elementary reflectors (see Further Details). +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,P). +* +* TAUB (output) DOUBLE PRECISION array, dimension (min(P,N)) +* The scalar factors of the elementary reflectors which +* represent the orthogonal matrix Z (see Further Details). +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N,M,P). +* For optimum performance LWORK >= max(N,M,P)*max(NB1,NB2,NB3), +* where NB1 is the optimal blocksize for the RQ factorization +* of an M-by-N matrix, NB2 is the optimal blocksize for the +* QR factorization of a P-by-N matrix, and NB3 is the optimal +* blocksize for a call of DORMRQ. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INF0= -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(1) H(2) . . . H(k), where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - taua * v * v' +* +* where taua is a real scalar, and v is a real vector with +* v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in +* A(m-k+i,1:n-k+i-1), and taua in TAUA(i). +* To form Q explicitly, use LAPACK subroutine DORGRQ. +* To use Q to update another matrix, use LAPACK subroutine DORMRQ. +* +* The matrix Z is represented as a product of elementary reflectors +* +* Z = H(1) H(2) . . . H(k), where k = min(p,n). +* +* Each H(i) has the form +* +* H(i) = I - taub * v * v' +* +* where taub is a real scalar, and v is a real vector with +* v(1:i-1) = 0 and v(i) = 1; v(i+1:p) is stored on exit in B(i+1:p,i), +* and taub in TAUB(i). +* To form Z explicitly, use LAPACK subroutine DORGQR. +* To use Z to update another matrix, use LAPACK subroutine DORMQR. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LOPT, LWKOPT, NB, NB1, NB2, NB3 +* .. +* .. External Subroutines .. + EXTERNAL DGEQRF, DGERQF, DORMRQ, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NB1 = ILAENV( 1, 'DGERQF', ' ', M, N, -1, -1 ) + NB2 = ILAENV( 1, 'DGEQRF', ' ', P, N, -1, -1 ) + NB3 = ILAENV( 1, 'DORMRQ', ' ', M, N, P, -1 ) + NB = MAX( NB1, NB2, NB3 ) + LWKOPT = MAX( N, M, P )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( P.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, M, P, N ) .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGGRQF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* RQ factorization of M-by-N matrix A: A = R*Q +* + CALL DGERQF( M, N, A, LDA, TAUA, WORK, LWORK, INFO ) + LOPT = WORK( 1 ) +* +* Update B := B*Q' +* + CALL DORMRQ( 'Right', 'Transpose', P, N, MIN( M, N ), + $ A( MAX( 1, M-N+1 ), 1 ), LDA, TAUA, B, LDB, WORK, + $ LWORK, INFO ) + LOPT = MAX( LOPT, INT( WORK( 1 ) ) ) +* +* QR factorization of P-by-N matrix B: B = Z*T +* + CALL DGEQRF( P, N, B, LDB, TAUB, WORK, LWORK, INFO ) + WORK( 1 ) = MAX( LOPT, INT( WORK( 1 ) ) ) +* + RETURN +* +* End of DGGRQF +* + END diff --git a/costa/native/external/lapack/dggsvd.f b/costa/native/external/lapack/dggsvd.f new file mode 100644 index 000000000..dd148186f --- /dev/null +++ b/costa/native/external/lapack/dggsvd.f @@ -0,0 +1,336 @@ + SUBROUTINE DGGSVD( JOBU, JOBV, JOBQ, M, N, P, K, L, A, LDA, B, + $ LDB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, WORK, + $ IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBQ, JOBU, JOBV + INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), Q( LDQ, * ), U( LDU, * ), + $ V( LDV, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DGGSVD computes the generalized singular value decomposition (GSVD) +* of an M-by-N real matrix A and P-by-N real matrix B: +* +* U'*A*Q = D1*( 0 R ), V'*B*Q = D2*( 0 R ) +* +* where U, V and Q are orthogonal matrices, and Z' is the transpose +* of Z. Let K+L = the effective numerical rank of the matrix (A',B')', +* then R is a K+L-by-K+L nonsingular upper triangular matrix, D1 and +* D2 are M-by-(K+L) and P-by-(K+L) "diagonal" matrices and of the +* following structures, respectively: +* +* If M-K-L >= 0, +* +* K L +* D1 = K ( I 0 ) +* L ( 0 C ) +* M-K-L ( 0 0 ) +* +* K L +* D2 = L ( 0 S ) +* P-L ( 0 0 ) +* +* N-K-L K L +* ( 0 R ) = K ( 0 R11 R12 ) +* L ( 0 0 R22 ) +* +* where +* +* C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), +* S = diag( BETA(K+1), ... , BETA(K+L) ), +* C**2 + S**2 = I. +* +* R is stored in A(1:K+L,N-K-L+1:N) on exit. +* +* If M-K-L < 0, +* +* K M-K K+L-M +* D1 = K ( I 0 0 ) +* M-K ( 0 C 0 ) +* +* K M-K K+L-M +* D2 = M-K ( 0 S 0 ) +* K+L-M ( 0 0 I ) +* P-L ( 0 0 0 ) +* +* N-K-L K M-K K+L-M +* ( 0 R ) = K ( 0 R11 R12 R13 ) +* M-K ( 0 0 R22 R23 ) +* K+L-M ( 0 0 0 R33 ) +* +* where +* +* C = diag( ALPHA(K+1), ... , ALPHA(M) ), +* S = diag( BETA(K+1), ... , BETA(M) ), +* C**2 + S**2 = I. +* +* (R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N), and R33 is stored +* ( 0 R22 R23 ) +* in B(M-K+1:L,N+M-K-L+1:N) on exit. +* +* The routine computes C, S, R, and optionally the orthogonal +* transformation matrices U, V and Q. +* +* In particular, if B is an N-by-N nonsingular matrix, then the GSVD of +* A and B implicitly gives the SVD of A*inv(B): +* A*inv(B) = U*(D1*inv(D2))*V'. +* If ( A',B')' has orthonormal columns, then the GSVD of A and B is +* also equal to the CS decomposition of A and B. Furthermore, the GSVD +* can be used to derive the solution of the eigenvalue problem: +* A'*A x = lambda* B'*B x. +* In some literature, the GSVD of A and B is presented in the form +* U'*A*X = ( 0 D1 ), V'*B*X = ( 0 D2 ) +* where U and V are orthogonal and X is nonsingular, D1 and D2 are +* ``diagonal''. The former GSVD form can be converted to the latter +* form by taking the nonsingular matrix X as +* +* X = Q*( I 0 ) +* ( 0 inv(R) ). +* +* Arguments +* ========= +* +* JOBU (input) CHARACTER*1 +* = 'U': Orthogonal matrix U is computed; +* = 'N': U is not computed. +* +* JOBV (input) CHARACTER*1 +* = 'V': Orthogonal matrix V is computed; +* = 'N': V is not computed. +* +* JOBQ (input) CHARACTER*1 +* = 'Q': Orthogonal matrix Q is computed; +* = 'N': Q is not computed. +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrices A and B. N >= 0. +* +* P (input) INTEGER +* The number of rows of the matrix B. P >= 0. +* +* K (output) INTEGER +* L (output) INTEGER +* On exit, K and L specify the dimension of the subblocks +* described in the Purpose section. +* K + L = effective numerical rank of (A',B')'. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, A contains the triangular matrix R, or part of R. +* See Purpose for details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,N) +* On entry, the P-by-N matrix B. +* On exit, B contains the triangular matrix R if M-K-L < 0. +* See Purpose for details. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDA >= max(1,P). +* +* ALPHA (output) DOUBLE PRECISION array, dimension (N) +* BETA (output) DOUBLE PRECISION array, dimension (N) +* On exit, ALPHA and BETA contain the generalized singular +* value pairs of A and B; +* ALPHA(1:K) = 1, +* BETA(1:K) = 0, +* and if M-K-L >= 0, +* ALPHA(K+1:K+L) = C, +* BETA(K+1:K+L) = S, +* or if M-K-L < 0, +* ALPHA(K+1:M)=C, ALPHA(M+1:K+L)=0 +* BETA(K+1:M) =S, BETA(M+1:K+L) =1 +* and +* ALPHA(K+L+1:N) = 0 +* BETA(K+L+1:N) = 0 +* +* U (output) DOUBLE PRECISION array, dimension (LDU,M) +* If JOBU = 'U', U contains the M-by-M orthogonal matrix U. +* If JOBU = 'N', U is not referenced. +* +* LDU (input) INTEGER +* The leading dimension of the array U. LDU >= max(1,M) if +* JOBU = 'U'; LDU >= 1 otherwise. +* +* V (output) DOUBLE PRECISION array, dimension (LDV,P) +* If JOBV = 'V', V contains the P-by-P orthogonal matrix V. +* If JOBV = 'N', V is not referenced. +* +* LDV (input) INTEGER +* The leading dimension of the array V. LDV >= max(1,P) if +* JOBV = 'V'; LDV >= 1 otherwise. +* +* Q (output) DOUBLE PRECISION array, dimension (LDQ,N) +* If JOBQ = 'Q', Q contains the N-by-N orthogonal matrix Q. +* If JOBQ = 'N', Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= max(1,N) if +* JOBQ = 'Q'; LDQ >= 1 otherwise. +* +* WORK (workspace) DOUBLE PRECISION array, +* dimension (max(3*N,M,P)+N) +* +* IWORK (workspace/output) INTEGER array, dimension (N) +* On exit, IWORK stores the sorting information. More +* precisely, the following loop will sort ALPHA +* for I = K+1, min(M,K+L) +* swap ALPHA(I) and ALPHA(IWORK(I)) +* endfor +* such that ALPHA(1) >= ALPHA(2) >= ... >= ALPHA(N). +* +* INFO (output)INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = 1, the Jacobi-type procedure failed to +* converge. For further details, see subroutine DTGSJA. +* +* Internal Parameters +* =================== +* +* TOLA DOUBLE PRECISION +* TOLB DOUBLE PRECISION +* TOLA and TOLB are the thresholds to determine the effective +* rank of (A',B')'. Generally, they are set to +* TOLA = MAX(M,N)*norm(A)*MAZHEPS, +* TOLB = MAX(P,N)*norm(B)*MAZHEPS. +* The size of TOLA and TOLB may affect the size of backward +* errors of the decomposition. +* +* Further Details +* =============== +* +* 2-96 Based on modifications by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL WANTQ, WANTU, WANTV + INTEGER I, IBND, ISUB, J, NCYCLE + DOUBLE PRECISION ANORM, BNORM, SMAX, TEMP, TOLA, TOLB, ULP, UNFL +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL LSAME, DLAMCH, DLANGE +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGGSVP, DTGSJA, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + WANTU = LSAME( JOBU, 'U' ) + WANTV = LSAME( JOBV, 'V' ) + WANTQ = LSAME( JOBQ, 'Q' ) +* + INFO = 0 + IF( .NOT.( WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( P.LT.0 ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -12 + ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN + INFO = -16 + ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN + INFO = -18 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -20 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGGSVD', -INFO ) + RETURN + END IF +* +* Compute the Frobenius norm of matrices A and B +* + ANORM = DLANGE( '1', M, N, A, LDA, WORK ) + BNORM = DLANGE( '1', P, N, B, LDB, WORK ) +* +* Get machine precision and set up threshold for determining +* the effective numerical rank of the matrices A and B. +* + ULP = DLAMCH( 'Precision' ) + UNFL = DLAMCH( 'Safe Minimum' ) + TOLA = MAX( M, N )*MAX( ANORM, UNFL )*ULP + TOLB = MAX( P, N )*MAX( BNORM, UNFL )*ULP +* +* Preprocessing +* + CALL DGGSVP( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, TOLA, + $ TOLB, K, L, U, LDU, V, LDV, Q, LDQ, IWORK, WORK, + $ WORK( N+1 ), INFO ) +* +* Compute the GSVD of two upper "triangular" matrices +* + CALL DTGSJA( JOBU, JOBV, JOBQ, M, P, N, K, L, A, LDA, B, LDB, + $ TOLA, TOLB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, + $ WORK, NCYCLE, INFO ) +* +* Sort the singular values and store the pivot indices in IWORK +* Copy ALPHA to WORK, then sort ALPHA in WORK +* + CALL DCOPY( N, ALPHA, 1, WORK, 1 ) + IBND = MIN( L, M-K ) + DO 20 I = 1, IBND +* +* Scan for largest ALPHA(K+I) +* + ISUB = I + SMAX = WORK( K+I ) + DO 10 J = I + 1, IBND + TEMP = WORK( K+J ) + IF( TEMP.GT.SMAX ) THEN + ISUB = J + SMAX = TEMP + END IF + 10 CONTINUE + IF( ISUB.NE.I ) THEN + WORK( K+ISUB ) = WORK( K+I ) + WORK( K+I ) = SMAX + IWORK( K+I ) = K + ISUB + ELSE + IWORK( K+I ) = K + I + END IF + 20 CONTINUE +* + RETURN +* +* End of DGGSVD +* + END diff --git a/costa/native/external/lapack/dggsvp.f b/costa/native/external/lapack/dggsvp.f new file mode 100644 index 000000000..c6475734a --- /dev/null +++ b/costa/native/external/lapack/dggsvp.f @@ -0,0 +1,394 @@ + SUBROUTINE DGGSVP( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, + $ TOLA, TOLB, K, L, U, LDU, V, LDV, Q, LDQ, + $ IWORK, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER JOBQ, JOBU, JOBV + INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P + DOUBLE PRECISION TOLA, TOLB +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ TAU( * ), U( LDU, * ), V( LDV, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DGGSVP computes orthogonal matrices U, V and Q such that +* +* N-K-L K L +* U'*A*Q = K ( 0 A12 A13 ) if M-K-L >= 0; +* L ( 0 0 A23 ) +* M-K-L ( 0 0 0 ) +* +* N-K-L K L +* = K ( 0 A12 A13 ) if M-K-L < 0; +* M-K ( 0 0 A23 ) +* +* N-K-L K L +* V'*B*Q = L ( 0 0 B13 ) +* P-L ( 0 0 0 ) +* +* where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular +* upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, +* otherwise A23 is (M-K)-by-L upper trapezoidal. K+L = the effective +* numerical rank of the (M+P)-by-N matrix (A',B')'. Z' denotes the +* transpose of Z. +* +* This decomposition is the preprocessing step for computing the +* Generalized Singular Value Decomposition (GSVD), see subroutine +* DGGSVD. +* +* Arguments +* ========= +* +* JOBU (input) CHARACTER*1 +* = 'U': Orthogonal matrix U is computed; +* = 'N': U is not computed. +* +* JOBV (input) CHARACTER*1 +* = 'V': Orthogonal matrix V is computed; +* = 'N': V is not computed. +* +* JOBQ (input) CHARACTER*1 +* = 'Q': Orthogonal matrix Q is computed; +* = 'N': Q is not computed. +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* P (input) INTEGER +* The number of rows of the matrix B. P >= 0. +* +* N (input) INTEGER +* The number of columns of the matrices A and B. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, A contains the triangular (or trapezoidal) matrix +* described in the Purpose section. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,N) +* On entry, the P-by-N matrix B. +* On exit, B contains the triangular matrix described in +* the Purpose section. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,P). +* +* TOLA (input) DOUBLE PRECISION +* TOLB (input) DOUBLE PRECISION +* TOLA and TOLB are the thresholds to determine the effective +* numerical rank of matrix B and a subblock of A. Generally, +* they are set to +* TOLA = MAX(M,N)*norm(A)*MAZHEPS, +* TOLB = MAX(P,N)*norm(B)*MAZHEPS. +* The size of TOLA and TOLB may affect the size of backward +* errors of the decomposition. +* +* K (output) INTEGER +* L (output) INTEGER +* On exit, K and L specify the dimension of the subblocks +* described in Purpose. +* K + L = effective numerical rank of (A',B')'. +* +* U (output) DOUBLE PRECISION array, dimension (LDU,M) +* If JOBU = 'U', U contains the orthogonal matrix U. +* If JOBU = 'N', U is not referenced. +* +* LDU (input) INTEGER +* The leading dimension of the array U. LDU >= max(1,M) if +* JOBU = 'U'; LDU >= 1 otherwise. +* +* V (output) DOUBLE PRECISION array, dimension (LDV,M) +* If JOBV = 'V', V contains the orthogonal matrix V. +* If JOBV = 'N', V is not referenced. +* +* LDV (input) INTEGER +* The leading dimension of the array V. LDV >= max(1,P) if +* JOBV = 'V'; LDV >= 1 otherwise. +* +* Q (output) DOUBLE PRECISION array, dimension (LDQ,N) +* If JOBQ = 'Q', Q contains the orthogonal matrix Q. +* If JOBQ = 'N', Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= max(1,N) if +* JOBQ = 'Q'; LDQ >= 1 otherwise. +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* TAU (workspace) DOUBLE PRECISION array, dimension (N) +* +* WORK (workspace) DOUBLE PRECISION array, dimension (max(3*N,M,P)) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* +* Further Details +* =============== +* +* The subroutine uses LAPACK subroutine DGEQPF for the QR factorization +* with column pivoting to detect the effective numerical rank of the +* a matrix. It may be replaced by a better rank determination strategy. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL FORWRD, WANTQ, WANTU, WANTV + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DGEQPF, DGEQR2, DGERQ2, DLACPY, DLAPMT, DLASET, + $ DORG2R, DORM2R, DORMR2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + WANTU = LSAME( JOBU, 'U' ) + WANTV = LSAME( JOBV, 'V' ) + WANTQ = LSAME( JOBQ, 'Q' ) + FORWRD = .TRUE. +* + INFO = 0 + IF( .NOT.( WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( P.LT.0 ) THEN + INFO = -5 + ELSE IF( N.LT.0 ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -10 + ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN + INFO = -16 + ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN + INFO = -18 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -20 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGGSVP', -INFO ) + RETURN + END IF +* +* QR with column pivoting of B: B*P = V*( S11 S12 ) +* ( 0 0 ) +* + DO 10 I = 1, N + IWORK( I ) = 0 + 10 CONTINUE + CALL DGEQPF( P, N, B, LDB, IWORK, TAU, WORK, INFO ) +* +* Update A := A*P +* + CALL DLAPMT( FORWRD, M, N, A, LDA, IWORK ) +* +* Determine the effective rank of matrix B. +* + L = 0 + DO 20 I = 1, MIN( P, N ) + IF( ABS( B( I, I ) ).GT.TOLB ) + $ L = L + 1 + 20 CONTINUE +* + IF( WANTV ) THEN +* +* Copy the details of V, and form V. +* + CALL DLASET( 'Full', P, P, ZERO, ZERO, V, LDV ) + IF( P.GT.1 ) + $ CALL DLACPY( 'Lower', P-1, N, B( 2, 1 ), LDB, V( 2, 1 ), + $ LDV ) + CALL DORG2R( P, P, MIN( P, N ), V, LDV, TAU, WORK, INFO ) + END IF +* +* Clean up B +* + DO 40 J = 1, L - 1 + DO 30 I = J + 1, L + B( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + IF( P.GT.L ) + $ CALL DLASET( 'Full', P-L, N, ZERO, ZERO, B( L+1, 1 ), LDB ) +* + IF( WANTQ ) THEN +* +* Set Q = I and Update Q := Q*P +* + CALL DLASET( 'Full', N, N, ZERO, ONE, Q, LDQ ) + CALL DLAPMT( FORWRD, N, N, Q, LDQ, IWORK ) + END IF +* + IF( P.GE.L .AND. N.NE.L ) THEN +* +* RQ factorization of (S11 S12): ( S11 S12 ) = ( 0 S12 )*Z +* + CALL DGERQ2( L, N, B, LDB, TAU, WORK, INFO ) +* +* Update A := A*Z' +* + CALL DORMR2( 'Right', 'Transpose', M, N, L, B, LDB, TAU, A, + $ LDA, WORK, INFO ) +* + IF( WANTQ ) THEN +* +* Update Q := Q*Z' +* + CALL DORMR2( 'Right', 'Transpose', N, N, L, B, LDB, TAU, Q, + $ LDQ, WORK, INFO ) + END IF +* +* Clean up B +* + CALL DLASET( 'Full', L, N-L, ZERO, ZERO, B, LDB ) + DO 60 J = N - L + 1, N + DO 50 I = J - N + L + 1, L + B( I, J ) = ZERO + 50 CONTINUE + 60 CONTINUE +* + END IF +* +* Let N-L L +* A = ( A11 A12 ) M, +* +* then the following does the complete QR decomposition of A11: +* +* A11 = U*( 0 T12 )*P1' +* ( 0 0 ) +* + DO 70 I = 1, N - L + IWORK( I ) = 0 + 70 CONTINUE + CALL DGEQPF( M, N-L, A, LDA, IWORK, TAU, WORK, INFO ) +* +* Determine the effective rank of A11 +* + K = 0 + DO 80 I = 1, MIN( M, N-L ) + IF( ABS( A( I, I ) ).GT.TOLA ) + $ K = K + 1 + 80 CONTINUE +* +* Update A12 := U'*A12, where A12 = A( 1:M, N-L+1:N ) +* + CALL DORM2R( 'Left', 'Transpose', M, L, MIN( M, N-L ), A, LDA, + $ TAU, A( 1, N-L+1 ), LDA, WORK, INFO ) +* + IF( WANTU ) THEN +* +* Copy the details of U, and form U +* + CALL DLASET( 'Full', M, M, ZERO, ZERO, U, LDU ) + IF( M.GT.1 ) + $ CALL DLACPY( 'Lower', M-1, N-L, A( 2, 1 ), LDA, U( 2, 1 ), + $ LDU ) + CALL DORG2R( M, M, MIN( M, N-L ), U, LDU, TAU, WORK, INFO ) + END IF +* + IF( WANTQ ) THEN +* +* Update Q( 1:N, 1:N-L ) = Q( 1:N, 1:N-L )*P1 +* + CALL DLAPMT( FORWRD, N, N-L, Q, LDQ, IWORK ) + END IF +* +* Clean up A: set the strictly lower triangular part of +* A(1:K, 1:K) = 0, and A( K+1:M, 1:N-L ) = 0. +* + DO 100 J = 1, K - 1 + DO 90 I = J + 1, K + A( I, J ) = ZERO + 90 CONTINUE + 100 CONTINUE + IF( M.GT.K ) + $ CALL DLASET( 'Full', M-K, N-L, ZERO, ZERO, A( K+1, 1 ), LDA ) +* + IF( N-L.GT.K ) THEN +* +* RQ factorization of ( T11 T12 ) = ( 0 T12 )*Z1 +* + CALL DGERQ2( K, N-L, A, LDA, TAU, WORK, INFO ) +* + IF( WANTQ ) THEN +* +* Update Q( 1:N,1:N-L ) = Q( 1:N,1:N-L )*Z1' +* + CALL DORMR2( 'Right', 'Transpose', N, N-L, K, A, LDA, TAU, + $ Q, LDQ, WORK, INFO ) + END IF +* +* Clean up A +* + CALL DLASET( 'Full', K, N-L-K, ZERO, ZERO, A, LDA ) + DO 120 J = N - L - K + 1, N - L + DO 110 I = J - N + L + K + 1, K + A( I, J ) = ZERO + 110 CONTINUE + 120 CONTINUE +* + END IF +* + IF( M.GT.K ) THEN +* +* QR factorization of A( K+1:M,N-L+1:N ) +* + CALL DGEQR2( M-K, L, A( K+1, N-L+1 ), LDA, TAU, WORK, INFO ) +* + IF( WANTU ) THEN +* +* Update U(:,K+1:M) := U(:,K+1:M)*U1 +* + CALL DORM2R( 'Right', 'No transpose', M, M-K, MIN( M-K, L ), + $ A( K+1, N-L+1 ), LDA, TAU, U( 1, K+1 ), LDU, + $ WORK, INFO ) + END IF +* +* Clean up +* + DO 140 J = N - L + 1, N + DO 130 I = J - N + K + L + 1, M + A( I, J ) = ZERO + 130 CONTINUE + 140 CONTINUE +* + END IF +* + RETURN +* +* End of DGGSVP +* + END diff --git a/costa/native/external/lapack/dgtcon.f b/costa/native/external/lapack/dgtcon.f new file mode 100644 index 000000000..4b5c0a2c2 --- /dev/null +++ b/costa/native/external/lapack/dgtcon.f @@ -0,0 +1,166 @@ + SUBROUTINE DGTCON( NORM, N, DL, D, DU, DU2, IPIV, ANORM, RCOND, + $ WORK, IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER INFO, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + DOUBLE PRECISION D( * ), DL( * ), DU( * ), DU2( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DGTCON estimates the reciprocal of the condition number of a real +* tridiagonal matrix A using the LU factorization as computed by +* DGTTRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies whether the 1-norm condition number or the +* infinity-norm condition number is required: +* = '1' or 'O': 1-norm; +* = 'I': Infinity-norm. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* DL (input) DOUBLE PRECISION array, dimension (N-1) +* The (n-1) multipliers that define the matrix L from the +* LU factorization of A as computed by DGTTRF. +* +* D (input) DOUBLE PRECISION array, dimension (N) +* The n diagonal elements of the upper triangular matrix U from +* the LU factorization of A. +* +* DU (input) DOUBLE PRECISION array, dimension (N-1) +* The (n-1) elements of the first superdiagonal of U. +* +* DU2 (input) DOUBLE PRECISION array, dimension (N-2) +* The (n-2) elements of the second superdiagonal of U. +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices; for 1 <= i <= n, row i of the matrix was +* interchanged with row IPIV(i). IPIV(i) will always be either +* i or i+1; IPIV(i) = i indicates a row interchange was not +* required. +* +* ANORM (input) DOUBLE PRECISION +* If NORM = '1' or 'O', the 1-norm of the original matrix A. +* If NORM = 'I', the infinity-norm of the original matrix A. +* +* RCOND (output) DOUBLE PRECISION +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +* estimate of the 1-norm of inv(A) computed in this routine. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ONENRM + INTEGER I, KASE, KASE1 + DOUBLE PRECISION AINVNM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DGTTRS, DLACON, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGTCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* +* Check that D(1:N) is non-zero. +* + DO 10 I = 1, N + IF( D( I ).EQ.ZERO ) + $ RETURN + 10 CONTINUE +* + AINVNM = ZERO + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 20 CONTINUE + CALL DLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(U)*inv(L). +* + CALL DGTTRS( 'No transpose', N, 1, DL, D, DU, DU2, IPIV, + $ WORK, N, INFO ) + ELSE +* +* Multiply by inv(L')*inv(U'). +* + CALL DGTTRS( 'Transpose', N, 1, DL, D, DU, DU2, IPIV, WORK, + $ N, INFO ) + END IF + GO TO 20 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of DGTCON +* + END diff --git a/costa/native/external/lapack/dgtrfs.f b/costa/native/external/lapack/dgtrfs.f new file mode 100644 index 000000000..96915ec9d --- /dev/null +++ b/costa/native/external/lapack/dgtrfs.f @@ -0,0 +1,357 @@ + SUBROUTINE DGTRFS( TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, DU2, + $ IPIV, B, LDB, X, LDX, FERR, BERR, WORK, IWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + DOUBLE PRECISION B( LDB, * ), BERR( * ), D( * ), DF( * ), + $ DL( * ), DLF( * ), DU( * ), DU2( * ), DUF( * ), + $ FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* DGTRFS improves the computed solution to a system of linear +* equations when the coefficient matrix is tridiagonal, and provides +* error bounds and backward error estimates for the solution. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose = Transpose) +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* DL (input) DOUBLE PRECISION array, dimension (N-1) +* The (n-1) subdiagonal elements of A. +* +* D (input) DOUBLE PRECISION array, dimension (N) +* The diagonal elements of A. +* +* DU (input) DOUBLE PRECISION array, dimension (N-1) +* The (n-1) superdiagonal elements of A. +* +* DLF (input) DOUBLE PRECISION array, dimension (N-1) +* The (n-1) multipliers that define the matrix L from the +* LU factorization of A as computed by DGTTRF. +* +* DF (input) DOUBLE PRECISION array, dimension (N) +* The n diagonal elements of the upper triangular matrix U from +* the LU factorization of A. +* +* DUF (input) DOUBLE PRECISION array, dimension (N-1) +* The (n-1) elements of the first superdiagonal of U. +* +* DU2 (input) DOUBLE PRECISION array, dimension (N-2) +* The (n-2) elements of the second superdiagonal of U. +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices; for 1 <= i <= n, row i of the matrix was +* interchanged with row IPIV(i). IPIV(i) will always be either +* i or i+1; IPIV(i) = i indicates a row interchange was not +* required. +* +* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) +* On entry, the solution matrix X, as computed by DGTTRS. +* On exit, the improved solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Internal Parameters +* =================== +* +* ITMAX is the maximum number of steps of iterative refinement. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) + DOUBLE PRECISION THREE + PARAMETER ( THREE = 3.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN + CHARACTER TRANSN, TRANST + INTEGER COUNT, I, J, KASE, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DGTTRS, DLACON, DLAGTM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -15 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGTRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANSN = 'N' + TRANST = 'T' + ELSE + TRANSN = 'T' + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = 4 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 110 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - op(A) * X, +* where op(A) = A, A**T, or A**H, depending on TRANS. +* + CALL DCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 ) + CALL DLAGTM( TRANS, N, 1, -ONE, DL, D, DU, X( 1, J ), LDX, ONE, + $ WORK( N+1 ), N ) +* +* Compute abs(op(A))*abs(x) + abs(b) for use in the backward +* error bound. +* + IF( NOTRAN ) THEN + IF( N.EQ.1 ) THEN + WORK( 1 ) = ABS( B( 1, J ) ) + ABS( D( 1 )*X( 1, J ) ) + ELSE + WORK( 1 ) = ABS( B( 1, J ) ) + ABS( D( 1 )*X( 1, J ) ) + + $ ABS( DU( 1 )*X( 2, J ) ) + DO 30 I = 2, N - 1 + WORK( I ) = ABS( B( I, J ) ) + + $ ABS( DL( I-1 )*X( I-1, J ) ) + + $ ABS( D( I )*X( I, J ) ) + + $ ABS( DU( I )*X( I+1, J ) ) + 30 CONTINUE + WORK( N ) = ABS( B( N, J ) ) + + $ ABS( DL( N-1 )*X( N-1, J ) ) + + $ ABS( D( N )*X( N, J ) ) + END IF + ELSE + IF( N.EQ.1 ) THEN + WORK( 1 ) = ABS( B( 1, J ) ) + ABS( D( 1 )*X( 1, J ) ) + ELSE + WORK( 1 ) = ABS( B( 1, J ) ) + ABS( D( 1 )*X( 1, J ) ) + + $ ABS( DL( 1 )*X( 2, J ) ) + DO 40 I = 2, N - 1 + WORK( I ) = ABS( B( I, J ) ) + + $ ABS( DU( I-1 )*X( I-1, J ) ) + + $ ABS( D( I )*X( I, J ) ) + + $ ABS( DL( I )*X( I+1, J ) ) + 40 CONTINUE + WORK( N ) = ABS( B( N, J ) ) + + $ ABS( DU( N-1 )*X( N-1, J ) ) + + $ ABS( D( N )*X( N, J ) ) + END IF + END IF +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + S = ZERO + DO 50 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 50 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL DGTTRS( TRANS, N, 1, DLF, DF, DUF, DU2, IPIV, + $ WORK( N+1 ), N, INFO ) + CALL DAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use DLACON to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 60 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 60 CONTINUE +* + KASE = 0 + 70 CONTINUE + CALL DLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**T). +* + CALL DGTTRS( TRANST, N, 1, DLF, DF, DUF, DU2, IPIV, + $ WORK( N+1 ), N, INFO ) + DO 80 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 80 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 90 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 90 CONTINUE + CALL DGTTRS( TRANSN, N, 1, DLF, DF, DUF, DU2, IPIV, + $ WORK( N+1 ), N, INFO ) + END IF + GO TO 70 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 100 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 100 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 110 CONTINUE +* + RETURN +* +* End of DGTRFS +* + END diff --git a/costa/native/external/lapack/dgtsv.f b/costa/native/external/lapack/dgtsv.f new file mode 100644 index 000000000..dcf1c1970 --- /dev/null +++ b/costa/native/external/lapack/dgtsv.f @@ -0,0 +1,263 @@ + SUBROUTINE DGTSV( N, NRHS, DL, D, DU, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION B( LDB, * ), D( * ), DL( * ), DU( * ) +* .. +* +* Purpose +* ======= +* +* DGTSV solves the equation +* +* A*X = B, +* +* where A is an n by n tridiagonal matrix, by Gaussian elimination with +* partial pivoting. +* +* Note that the equation A'*X = B may be solved by interchanging the +* order of the arguments DU and DL. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* DL (input/output) DOUBLE PRECISION array, dimension (N-1) +* On entry, DL must contain the (n-1) sub-diagonal elements of +* A. +* +* On exit, DL is overwritten by the (n-2) elements of the +* second super-diagonal of the upper triangular matrix U from +* the LU factorization of A, in DL(1), ..., DL(n-2). +* +* D (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, D must contain the diagonal elements of A. +* +* On exit, D is overwritten by the n diagonal elements of U. +* +* DU (input/output) DOUBLE PRECISION array, dimension (N-1) +* On entry, DU must contain the (n-1) super-diagonal elements +* of A. +* +* On exit, DU is overwritten by the (n-1) elements of the first +* super-diagonal of U. +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) +* On entry, the N by NRHS matrix of right hand side matrix B. +* On exit, if INFO = 0, the N by NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, U(i,i) is exactly zero, and the solution +* has not been computed. The factorization has not been +* completed unless i = N. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION FACT, TEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGTSV ', -INFO ) + RETURN + END IF +* + IF( N.EQ.0 ) + $ RETURN +* + IF( NRHS.EQ.1 ) THEN + DO 10 I = 1, N - 2 + IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN +* +* No row interchange required +* + IF( D( I ).NE.ZERO ) THEN + FACT = DL( I ) / D( I ) + D( I+1 ) = D( I+1 ) - FACT*DU( I ) + B( I+1, 1 ) = B( I+1, 1 ) - FACT*B( I, 1 ) + ELSE + INFO = I + RETURN + END IF + DL( I ) = ZERO + ELSE +* +* Interchange rows I and I+1 +* + FACT = D( I ) / DL( I ) + D( I ) = DL( I ) + TEMP = D( I+1 ) + D( I+1 ) = DU( I ) - FACT*TEMP + DL( I ) = DU( I+1 ) + DU( I+1 ) = -FACT*DL( I ) + DU( I ) = TEMP + TEMP = B( I, 1 ) + B( I, 1 ) = B( I+1, 1 ) + B( I+1, 1 ) = TEMP - FACT*B( I+1, 1 ) + END IF + 10 CONTINUE + IF( N.GT.1 ) THEN + I = N - 1 + IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN + IF( D( I ).NE.ZERO ) THEN + FACT = DL( I ) / D( I ) + D( I+1 ) = D( I+1 ) - FACT*DU( I ) + B( I+1, 1 ) = B( I+1, 1 ) - FACT*B( I, 1 ) + ELSE + INFO = I + RETURN + END IF + ELSE + FACT = D( I ) / DL( I ) + D( I ) = DL( I ) + TEMP = D( I+1 ) + D( I+1 ) = DU( I ) - FACT*TEMP + DU( I ) = TEMP + TEMP = B( I, 1 ) + B( I, 1 ) = B( I+1, 1 ) + B( I+1, 1 ) = TEMP - FACT*B( I+1, 1 ) + END IF + END IF + IF( D( N ).EQ.ZERO ) THEN + INFO = N + RETURN + END IF + ELSE + DO 40 I = 1, N - 2 + IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN +* +* No row interchange required +* + IF( D( I ).NE.ZERO ) THEN + FACT = DL( I ) / D( I ) + D( I+1 ) = D( I+1 ) - FACT*DU( I ) + DO 20 J = 1, NRHS + B( I+1, J ) = B( I+1, J ) - FACT*B( I, J ) + 20 CONTINUE + ELSE + INFO = I + RETURN + END IF + DL( I ) = ZERO + ELSE +* +* Interchange rows I and I+1 +* + FACT = D( I ) / DL( I ) + D( I ) = DL( I ) + TEMP = D( I+1 ) + D( I+1 ) = DU( I ) - FACT*TEMP + DL( I ) = DU( I+1 ) + DU( I+1 ) = -FACT*DL( I ) + DU( I ) = TEMP + DO 30 J = 1, NRHS + TEMP = B( I, J ) + B( I, J ) = B( I+1, J ) + B( I+1, J ) = TEMP - FACT*B( I+1, J ) + 30 CONTINUE + END IF + 40 CONTINUE + IF( N.GT.1 ) THEN + I = N - 1 + IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN + IF( D( I ).NE.ZERO ) THEN + FACT = DL( I ) / D( I ) + D( I+1 ) = D( I+1 ) - FACT*DU( I ) + DO 50 J = 1, NRHS + B( I+1, J ) = B( I+1, J ) - FACT*B( I, J ) + 50 CONTINUE + ELSE + INFO = I + RETURN + END IF + ELSE + FACT = D( I ) / DL( I ) + D( I ) = DL( I ) + TEMP = D( I+1 ) + D( I+1 ) = DU( I ) - FACT*TEMP + DU( I ) = TEMP + DO 60 J = 1, NRHS + TEMP = B( I, J ) + B( I, J ) = B( I+1, J ) + B( I+1, J ) = TEMP - FACT*B( I+1, J ) + 60 CONTINUE + END IF + END IF + IF( D( N ).EQ.ZERO ) THEN + INFO = N + RETURN + END IF + END IF +* +* Back solve with the matrix U from the factorization. +* + IF( NRHS.LE.2 ) THEN + J = 1 + 70 CONTINUE + B( N, J ) = B( N, J ) / D( N ) + IF( N.GT.1 ) + $ B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / D( N-1 ) + DO 80 I = N - 2, 1, -1 + B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DL( I )* + $ B( I+2, J ) ) / D( I ) + 80 CONTINUE + IF( J.LT.NRHS ) THEN + J = J + 1 + GO TO 70 + END IF + ELSE + DO 100 J = 1, NRHS + B( N, J ) = B( N, J ) / D( N ) + IF( N.GT.1 ) + $ B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / + $ D( N-1 ) + DO 90 I = N - 2, 1, -1 + B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DL( I )* + $ B( I+2, J ) ) / D( I ) + 90 CONTINUE + 100 CONTINUE + END IF +* + RETURN +* +* End of DGTSV +* + END diff --git a/costa/native/external/lapack/dgtsvx.f b/costa/native/external/lapack/dgtsvx.f new file mode 100644 index 000000000..f6d0abe68 --- /dev/null +++ b/costa/native/external/lapack/dgtsvx.f @@ -0,0 +1,293 @@ + SUBROUTINE DGTSVX( FACT, TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, + $ DU2, IPIV, B, LDB, X, LDX, RCOND, FERR, BERR, + $ WORK, IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER FACT, TRANS + INTEGER INFO, LDB, LDX, N, NRHS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + DOUBLE PRECISION B( LDB, * ), BERR( * ), D( * ), DF( * ), + $ DL( * ), DLF( * ), DU( * ), DU2( * ), DUF( * ), + $ FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* DGTSVX uses the LU factorization to compute the solution to a real +* system of linear equations A * X = B or A**T * X = B, +* where A is a tridiagonal matrix of order N and X and B are N-by-NRHS +* matrices. +* +* Error bounds on the solution and a condition estimate are also +* provided. +* +* Description +* =========== +* +* The following steps are performed: +* +* 1. If FACT = 'N', the LU decomposition is used to factor the matrix A +* as A = L * U, where L is a product of permutation and unit lower +* bidiagonal matrices and U is upper triangular with nonzeros in +* only the main diagonal and first two superdiagonals. +* +* 2. If some U(i,i)=0, so that U is exactly singular, then the routine +* returns with INFO = i. Otherwise, the factored form of A is used +* to estimate the condition number of the matrix A. If the +* reciprocal of the condition number is less than machine precision, +* INFO = N+1 is returned as a warning, but the routine still goes on +* to solve for X and compute error bounds as described below. +* +* 3. The system of equations is solved for X using the factored form +* of A. +* +* 4. Iterative refinement is applied to improve the computed solution +* matrix and calculate error bounds and backward error estimates +* for it. +* +* Arguments +* ========= +* +* FACT (input) CHARACTER*1 +* Specifies whether or not the factored form of A has been +* supplied on entry. +* = 'F': DLF, DF, DUF, DU2, and IPIV contain the factored +* form of A; DL, D, DU, DLF, DF, DUF, DU2 and IPIV +* will not be modified. +* = 'N': The matrix will be copied to DLF, DF, and DUF +* and factored. +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose = Transpose) +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* DL (input) DOUBLE PRECISION array, dimension (N-1) +* The (n-1) subdiagonal elements of A. +* +* D (input) DOUBLE PRECISION array, dimension (N) +* The n diagonal elements of A. +* +* DU (input) DOUBLE PRECISION array, dimension (N-1) +* The (n-1) superdiagonal elements of A. +* +* DLF (input or output) DOUBLE PRECISION array, dimension (N-1) +* If FACT = 'F', then DLF is an input argument and on entry +* contains the (n-1) multipliers that define the matrix L from +* the LU factorization of A as computed by DGTTRF. +* +* If FACT = 'N', then DLF is an output argument and on exit +* contains the (n-1) multipliers that define the matrix L from +* the LU factorization of A. +* +* DF (input or output) DOUBLE PRECISION array, dimension (N) +* If FACT = 'F', then DF is an input argument and on entry +* contains the n diagonal elements of the upper triangular +* matrix U from the LU factorization of A. +* +* If FACT = 'N', then DF is an output argument and on exit +* contains the n diagonal elements of the upper triangular +* matrix U from the LU factorization of A. +* +* DUF (input or output) DOUBLE PRECISION array, dimension (N-1) +* If FACT = 'F', then DUF is an input argument and on entry +* contains the (n-1) elements of the first superdiagonal of U. +* +* If FACT = 'N', then DUF is an output argument and on exit +* contains the (n-1) elements of the first superdiagonal of U. +* +* DU2 (input or output) DOUBLE PRECISION array, dimension (N-2) +* If FACT = 'F', then DU2 is an input argument and on entry +* contains the (n-2) elements of the second superdiagonal of +* U. +* +* If FACT = 'N', then DU2 is an output argument and on exit +* contains the (n-2) elements of the second superdiagonal of +* U. +* +* IPIV (input or output) INTEGER array, dimension (N) +* If FACT = 'F', then IPIV is an input argument and on entry +* contains the pivot indices from the LU factorization of A as +* computed by DGTTRF. +* +* If FACT = 'N', then IPIV is an output argument and on exit +* contains the pivot indices from the LU factorization of A; +* row i of the matrix was interchanged with row IPIV(i). +* IPIV(i) will always be either i or i+1; IPIV(i) = i indicates +* a row interchange was not required. +* +* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) +* The N-by-NRHS right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (output) DOUBLE PRECISION array, dimension (LDX,NRHS) +* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* RCOND (output) DOUBLE PRECISION +* The estimate of the reciprocal condition number of the matrix +* A. If RCOND is less than the machine precision (in +* particular, if RCOND = 0), the matrix is singular to working +* precision. This condition is indicated by a return code of +* INFO > 0. +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is +* <= N: U(i,i) is exactly zero. The factorization +* has not been completed unless i = N, but the +* factor U is exactly singular, so the solution +* and error bounds could not be computed. +* RCOND = 0 is returned. +* = N+1: U is nonsingular, but RCOND is less than machine +* precision, meaning that the matrix is singular +* to working precision. Nevertheless, the +* solution and error bounds are computed because +* there are a number of situations where the +* computed solution can be more accurate than the +* value of RCOND would suggest. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOFACT, NOTRAN + CHARACTER NORM + DOUBLE PRECISION ANORM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANGT + EXTERNAL LSAME, DLAMCH, DLANGT +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGTCON, DGTRFS, DGTTRF, DGTTRS, DLACPY, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -16 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGTSVX', -INFO ) + RETURN + END IF +* + IF( NOFACT ) THEN +* +* Compute the LU factorization of A. +* + CALL DCOPY( N, D, 1, DF, 1 ) + IF( N.GT.1 ) THEN + CALL DCOPY( N-1, DL, 1, DLF, 1 ) + CALL DCOPY( N-1, DU, 1, DUF, 1 ) + END IF + CALL DGTTRF( N, DLF, DF, DUF, DU2, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN + IF( INFO.GT.0 ) + $ RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + IF( NOTRAN ) THEN + NORM = '1' + ELSE + NORM = 'I' + END IF + ANORM = DLANGT( NORM, N, DL, D, DU ) +* +* Compute the reciprocal of the condition number of A. +* + CALL DGTCON( NORM, N, DLF, DF, DUF, DU2, IPIV, ANORM, RCOND, WORK, + $ IWORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.DLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* +* Compute the solution vectors X. +* + CALL DLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL DGTTRS( TRANS, N, NRHS, DLF, DF, DUF, DU2, IPIV, X, LDX, + $ INFO ) +* +* Use iterative refinement to improve the computed solutions and +* compute error bounds and backward error estimates for them. +* + CALL DGTRFS( TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, DU2, IPIV, + $ B, LDB, X, LDX, FERR, BERR, WORK, IWORK, INFO ) +* + RETURN +* +* End of DGTSVX +* + END diff --git a/costa/native/external/lapack/dgttrf.f b/costa/native/external/lapack/dgttrf.f new file mode 100644 index 000000000..2aaa8fd61 --- /dev/null +++ b/costa/native/external/lapack/dgttrf.f @@ -0,0 +1,169 @@ + SUBROUTINE DGTTRF( N, DL, D, DU, DU2, IPIV, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* Purpose +* ======= +* +* DGTTRF computes an LU factorization of a real tridiagonal matrix A +* using elimination with partial pivoting and row interchanges. +* +* The factorization has the form +* A = L * U +* where L is a product of permutation and unit lower bidiagonal +* matrices and U is upper triangular with nonzeros in only the main +* diagonal and first two superdiagonals. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. +* +* DL (input/output) DOUBLE PRECISION array, dimension (N-1) +* On entry, DL must contain the (n-1) sub-diagonal elements of +* A. +* +* On exit, DL is overwritten by the (n-1) multipliers that +* define the matrix L from the LU factorization of A. +* +* D (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, D must contain the diagonal elements of A. +* +* On exit, D is overwritten by the n diagonal elements of the +* upper triangular matrix U from the LU factorization of A. +* +* DU (input/output) DOUBLE PRECISION array, dimension (N-1) +* On entry, DU must contain the (n-1) super-diagonal elements +* of A. +* +* On exit, DU is overwritten by the (n-1) elements of the first +* super-diagonal of U. +* +* DU2 (output) DOUBLE PRECISION array, dimension (N-2) +* On exit, DU2 is overwritten by the (n-2) elements of the +* second super-diagonal of U. +* +* IPIV (output) INTEGER array, dimension (N) +* The pivot indices; for 1 <= i <= n, row i of the matrix was +* interchanged with row IPIV(i). IPIV(i) will always be either +* i or i+1; IPIV(i) = i indicates a row interchange was not +* required. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* > 0: if INFO = k, U(k,k) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, and division by zero will occur if it is used +* to solve a system of equations. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I + DOUBLE PRECISION FACT, TEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + CALL XERBLA( 'DGTTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Initialize IPIV(i) = i and DU2(I) = 0 +* + DO 10 I = 1, N + IPIV( I ) = I + 10 CONTINUE + DO 20 I = 1, N - 2 + DU2( I ) = ZERO + 20 CONTINUE +* + DO 30 I = 1, N - 2 + IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN +* +* No row interchange required, eliminate DL(I) +* + IF( D( I ).NE.ZERO ) THEN + FACT = DL( I ) / D( I ) + DL( I ) = FACT + D( I+1 ) = D( I+1 ) - FACT*DU( I ) + END IF + ELSE +* +* Interchange rows I and I+1, eliminate DL(I) +* + FACT = D( I ) / DL( I ) + D( I ) = DL( I ) + DL( I ) = FACT + TEMP = DU( I ) + DU( I ) = D( I+1 ) + D( I+1 ) = TEMP - FACT*D( I+1 ) + DU2( I ) = DU( I+1 ) + DU( I+1 ) = -FACT*DU( I+1 ) + IPIV( I ) = I + 1 + END IF + 30 CONTINUE + IF( N.GT.1 ) THEN + I = N - 1 + IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN + IF( D( I ).NE.ZERO ) THEN + FACT = DL( I ) / D( I ) + DL( I ) = FACT + D( I+1 ) = D( I+1 ) - FACT*DU( I ) + END IF + ELSE + FACT = D( I ) / DL( I ) + D( I ) = DL( I ) + DL( I ) = FACT + TEMP = DU( I ) + DU( I ) = D( I+1 ) + D( I+1 ) = TEMP - FACT*D( I+1 ) + IPIV( I ) = I + 1 + END IF + END IF +* +* Check for a zero on the diagonal of U. +* + DO 40 I = 1, N + IF( D( I ).EQ.ZERO ) THEN + INFO = I + GO TO 50 + END IF + 40 CONTINUE + 50 CONTINUE +* + RETURN +* +* End of DGTTRF +* + END diff --git a/costa/native/external/lapack/dgttrs.f b/costa/native/external/lapack/dgttrs.f new file mode 100644 index 000000000..2f5ad1694 --- /dev/null +++ b/costa/native/external/lapack/dgttrs.f @@ -0,0 +1,141 @@ + SUBROUTINE DGTTRS( TRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* Purpose +* ======= +* +* DGTTRS solves one of the systems of equations +* A*X = B or A'*X = B, +* with a tridiagonal matrix A using the LU factorization computed +* by DGTTRF. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER +* Specifies the form of the system of equations. +* = 'N': A * X = B (No transpose) +* = 'T': A'* X = B (Transpose) +* = 'C': A'* X = B (Conjugate transpose = Transpose) +* +* N (input) INTEGER +* The order of the matrix A. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* DL (input) DOUBLE PRECISION array, dimension (N-1) +* The (n-1) multipliers that define the matrix L from the +* LU factorization of A. +* +* D (input) DOUBLE PRECISION array, dimension (N) +* The n diagonal elements of the upper triangular matrix U from +* the LU factorization of A. +* +* DU (input) DOUBLE PRECISION array, dimension (N-1) +* The (n-1) elements of the first super-diagonal of U. +* +* DU2 (input) DOUBLE PRECISION array, dimension (N-2) +* The (n-2) elements of the second super-diagonal of U. +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices; for 1 <= i <= n, row i of the matrix was +* interchanged with row IPIV(i). IPIV(i) will always be either +* i or i+1; IPIV(i) = i indicates a row interchange was not +* required. +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) +* On entry, the matrix of right hand side vectors B. +* On exit, B is overwritten by the solution vectors X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL NOTRAN + INTEGER ITRANS, J, JB, NB +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DGTTS2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOTRAN = ( TRANS.EQ.'N' .OR. TRANS.EQ.'n' ) + IF( .NOT.NOTRAN .AND. .NOT.( TRANS.EQ.'T' .OR. TRANS.EQ. + $ 't' ) .AND. .NOT.( TRANS.EQ.'C' .OR. TRANS.EQ.'c' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( N, 1 ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGTTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* Decode TRANS +* + IF( NOTRAN ) THEN + ITRANS = 0 + ELSE + ITRANS = 1 + END IF +* +* Determine the number of right-hand sides to solve at a time. +* + IF( NRHS.EQ.1 ) THEN + NB = 1 + ELSE + NB = MAX( 1, ILAENV( 1, 'DGTTRS', TRANS, N, NRHS, -1, -1 ) ) + END IF +* + IF( NB.GE.NRHS ) THEN + CALL DGTTS2( ITRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB ) + ELSE + DO 10 J = 1, NRHS, NB + JB = MIN( NRHS-J+1, NB ) + CALL DGTTS2( ITRANS, N, JB, DL, D, DU, DU2, IPIV, B( 1, J ), + $ LDB ) + 10 CONTINUE + END IF +* +* End of DGTTRS +* + END diff --git a/costa/native/external/lapack/dgtts2.f b/costa/native/external/lapack/dgtts2.f new file mode 100644 index 000000000..67d9f9e68 --- /dev/null +++ b/costa/native/external/lapack/dgtts2.f @@ -0,0 +1,197 @@ + SUBROUTINE DGTTS2( ITRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER ITRANS, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* Purpose +* ======= +* +* DGTTS2 solves one of the systems of equations +* A*X = B or A'*X = B, +* with a tridiagonal matrix A using the LU factorization computed +* by DGTTRF. +* +* Arguments +* ========= +* +* ITRANS (input) INTEGER +* Specifies the form of the system of equations. +* = 0: A * X = B (No transpose) +* = 1: A'* X = B (Transpose) +* = 2: A'* X = B (Conjugate transpose = Transpose) +* +* N (input) INTEGER +* The order of the matrix A. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* DL (input) DOUBLE PRECISION array, dimension (N-1) +* The (n-1) multipliers that define the matrix L from the +* LU factorization of A. +* +* D (input) DOUBLE PRECISION array, dimension (N) +* The n diagonal elements of the upper triangular matrix U from +* the LU factorization of A. +* +* DU (input) DOUBLE PRECISION array, dimension (N-1) +* The (n-1) elements of the first super-diagonal of U. +* +* DU2 (input) DOUBLE PRECISION array, dimension (N-2) +* The (n-2) elements of the second super-diagonal of U. +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices; for 1 <= i <= n, row i of the matrix was +* interchanged with row IPIV(i). IPIV(i) will always be either +* i or i+1; IPIV(i) = i indicates a row interchange was not +* required. +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) +* On entry, the matrix of right hand side vectors B. +* On exit, B is overwritten by the solution vectors X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IP, J + DOUBLE PRECISION TEMP +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( ITRANS.EQ.0 ) THEN +* +* Solve A*X = B using the LU factorization of A, +* overwriting each right hand side vector with its solution. +* + IF( NRHS.LE.1 ) THEN + J = 1 + 10 CONTINUE +* +* Solve L*x = b. +* + DO 20 I = 1, N - 1 + IP = IPIV( I ) + TEMP = B( I+1-IP+I, J ) - DL( I )*B( IP, J ) + B( I, J ) = B( IP, J ) + B( I+1, J ) = TEMP + 20 CONTINUE +* +* Solve U*x = b. +* + B( N, J ) = B( N, J ) / D( N ) + IF( N.GT.1 ) + $ B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / + $ D( N-1 ) + DO 30 I = N - 2, 1, -1 + B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DU2( I )* + $ B( I+2, J ) ) / D( I ) + 30 CONTINUE + IF( J.LT.NRHS ) THEN + J = J + 1 + GO TO 10 + END IF + ELSE + DO 60 J = 1, NRHS +* +* Solve L*x = b. +* + DO 40 I = 1, N - 1 + IF( IPIV( I ).EQ.I ) THEN + B( I+1, J ) = B( I+1, J ) - DL( I )*B( I, J ) + ELSE + TEMP = B( I, J ) + B( I, J ) = B( I+1, J ) + B( I+1, J ) = TEMP - DL( I )*B( I, J ) + END IF + 40 CONTINUE +* +* Solve U*x = b. +* + B( N, J ) = B( N, J ) / D( N ) + IF( N.GT.1 ) + $ B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / + $ D( N-1 ) + DO 50 I = N - 2, 1, -1 + B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DU2( I )* + $ B( I+2, J ) ) / D( I ) + 50 CONTINUE + 60 CONTINUE + END IF + ELSE +* +* Solve A' * X = B. +* + IF( NRHS.LE.1 ) THEN +* +* Solve U'*x = b. +* + J = 1 + 70 CONTINUE + B( 1, J ) = B( 1, J ) / D( 1 ) + IF( N.GT.1 ) + $ B( 2, J ) = ( B( 2, J )-DU( 1 )*B( 1, J ) ) / D( 2 ) + DO 80 I = 3, N + B( I, J ) = ( B( I, J )-DU( I-1 )*B( I-1, J )-DU2( I-2 )* + $ B( I-2, J ) ) / D( I ) + 80 CONTINUE +* +* Solve L'*x = b. +* + DO 90 I = N - 1, 1, -1 + IP = IPIV( I ) + TEMP = B( I, J ) - DL( I )*B( I+1, J ) + B( I, J ) = B( IP, J ) + B( IP, J ) = TEMP + 90 CONTINUE + IF( J.LT.NRHS ) THEN + J = J + 1 + GO TO 70 + END IF +* + ELSE + DO 120 J = 1, NRHS +* +* Solve U'*x = b. +* + B( 1, J ) = B( 1, J ) / D( 1 ) + IF( N.GT.1 ) + $ B( 2, J ) = ( B( 2, J )-DU( 1 )*B( 1, J ) ) / D( 2 ) + DO 100 I = 3, N + B( I, J ) = ( B( I, J )-DU( I-1 )*B( I-1, J )- + $ DU2( I-2 )*B( I-2, J ) ) / D( I ) + 100 CONTINUE + DO 110 I = N - 1, 1, -1 + IF( IPIV( I ).EQ.I ) THEN + B( I, J ) = B( I, J ) - DL( I )*B( I+1, J ) + ELSE + TEMP = B( I+1, J ) + B( I+1, J ) = B( I, J ) - DL( I )*TEMP + B( I, J ) = TEMP + END IF + 110 CONTINUE + 120 CONTINUE + END IF + END IF +* +* End of DGTTS2 +* + END diff --git a/costa/native/external/lapack/dhgeqz.f b/costa/native/external/lapack/dhgeqz.f new file mode 100644 index 000000000..0e8af2e5e --- /dev/null +++ b/costa/native/external/lapack/dhgeqz.f @@ -0,0 +1,1243 @@ + SUBROUTINE DHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, Q, LDQ, Z, LDZ, WORK, + $ LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER COMPQ, COMPZ, JOB + INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), ALPHAI( * ), ALPHAR( * ), + $ B( LDB, * ), BETA( * ), Q( LDQ, * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* DHGEQZ implements a single-/double-shift version of the QZ method for +* finding the generalized eigenvalues +* +* w(j)=(ALPHAR(j) + i*ALPHAI(j))/BETAR(j) of the equation +* +* det( A - w(i) B ) = 0 +* +* In addition, the pair A,B may be reduced to generalized Schur form: +* B is upper triangular, and A is block upper triangular, where the +* diagonal blocks are either 1-by-1 or 2-by-2, the 2-by-2 blocks having +* complex generalized eigenvalues (see the description of the argument +* JOB.) +* +* If JOB='S', then the pair (A,B) is simultaneously reduced to Schur +* form by applying one orthogonal tranformation (usually called Q) on +* the left and another (usually called Z) on the right. The 2-by-2 +* upper-triangular diagonal blocks of B corresponding to 2-by-2 blocks +* of A will be reduced to positive diagonal matrices. (I.e., +* if A(j+1,j) is non-zero, then B(j+1,j)=B(j,j+1)=0 and B(j,j) and +* B(j+1,j+1) will be positive.) +* +* If JOB='E', then at each iteration, the same transformations +* are computed, but they are only applied to those parts of A and B +* which are needed to compute ALPHAR, ALPHAI, and BETAR. +* +* If JOB='S' and COMPQ and COMPZ are 'V' or 'I', then the orthogonal +* transformations used to reduce (A,B) are accumulated into the arrays +* Q and Z s.t.: +* +* Q(in) A(in) Z(in)* = Q(out) A(out) Z(out)* +* Q(in) B(in) Z(in)* = Q(out) B(out) Z(out)* +* +* Ref: C.B. Moler & G.W. Stewart, "An Algorithm for Generalized Matrix +* Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973), +* pp. 241--256. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* = 'E': compute only ALPHAR, ALPHAI, and BETA. A and B will +* not necessarily be put into generalized Schur form. +* = 'S': put A and B into generalized Schur form, as well +* as computing ALPHAR, ALPHAI, and BETA. +* +* COMPQ (input) CHARACTER*1 +* = 'N': do not modify Q. +* = 'V': multiply the array Q on the right by the transpose of +* the orthogonal tranformation that is applied to the +* left side of A and B to reduce them to Schur form. +* = 'I': like COMPQ='V', except that Q will be initialized to +* the identity first. +* +* COMPZ (input) CHARACTER*1 +* = 'N': do not modify Z. +* = 'V': multiply the array Z on the right by the orthogonal +* tranformation that is applied to the right side of +* A and B to reduce them to Schur form. +* = 'I': like COMPZ='V', except that Z will be initialized to +* the identity first. +* +* N (input) INTEGER +* The order of the matrices A, B, Q, and Z. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* It is assumed that A is already upper triangular in rows and +* columns 1:ILO-1 and IHI+1:N. +* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) +* On entry, the N-by-N upper Hessenberg matrix A. Elements +* below the subdiagonal must be zero. +* If JOB='S', then on exit A and B will have been +* simultaneously reduced to generalized Schur form. +* If JOB='E', then on exit A will have been destroyed. +* The diagonal blocks will be correct, but the off-diagonal +* portion will be meaningless. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max( 1, N ). +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB, N) +* On entry, the N-by-N upper triangular matrix B. Elements +* below the diagonal must be zero. 2-by-2 blocks in B +* corresponding to 2-by-2 blocks in A will be reduced to +* positive diagonal form. (I.e., if A(j+1,j) is non-zero, +* then B(j+1,j)=B(j,j+1)=0 and B(j,j) and B(j+1,j+1) will be +* positive.) +* If JOB='S', then on exit A and B will have been +* simultaneously reduced to Schur form. +* If JOB='E', then on exit B will have been destroyed. +* Elements corresponding to diagonal blocks of A will be +* correct, but the off-diagonal portion will be meaningless. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max( 1, N ). +* +* ALPHAR (output) DOUBLE PRECISION array, dimension (N) +* ALPHAR(1:N) will be set to real parts of the diagonal +* elements of A that would result from reducing A and B to +* Schur form and then further reducing them both to triangular +* form using unitary transformations s.t. the diagonal of B +* was non-negative real. Thus, if A(j,j) is in a 1-by-1 block +* (i.e., A(j+1,j)=A(j,j+1)=0), then ALPHAR(j)=A(j,j). +* Note that the (real or complex) values +* (ALPHAR(j) + i*ALPHAI(j))/BETA(j), j=1,...,N, are the +* generalized eigenvalues of the matrix pencil A - wB. +* +* ALPHAI (output) DOUBLE PRECISION array, dimension (N) +* ALPHAI(1:N) will be set to imaginary parts of the diagonal +* elements of A that would result from reducing A and B to +* Schur form and then further reducing them both to triangular +* form using unitary transformations s.t. the diagonal of B +* was non-negative real. Thus, if A(j,j) is in a 1-by-1 block +* (i.e., A(j+1,j)=A(j,j+1)=0), then ALPHAR(j)=0. +* Note that the (real or complex) values +* (ALPHAR(j) + i*ALPHAI(j))/BETA(j), j=1,...,N, are the +* generalized eigenvalues of the matrix pencil A - wB. +* +* BETA (output) DOUBLE PRECISION array, dimension (N) +* BETA(1:N) will be set to the (real) diagonal elements of B +* that would result from reducing A and B to Schur form and +* then further reducing them both to triangular form using +* unitary transformations s.t. the diagonal of B was +* non-negative real. Thus, if A(j,j) is in a 1-by-1 block +* (i.e., A(j+1,j)=A(j,j+1)=0), then BETA(j)=B(j,j). +* Note that the (real or complex) values +* (ALPHAR(j) + i*ALPHAI(j))/BETA(j), j=1,...,N, are the +* generalized eigenvalues of the matrix pencil A - wB. +* (Note that BETA(1:N) will always be non-negative, and no +* BETAI is necessary.) +* +* Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N) +* If COMPQ='N', then Q will not be referenced. +* If COMPQ='V' or 'I', then the transpose of the orthogonal +* transformations which are applied to A and B on the left +* will be applied to the array Q on the right. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= 1. +* If COMPQ='V' or 'I', then LDQ >= N. +* +* Z (input/output) DOUBLE PRECISION array, dimension (LDZ, N) +* If COMPZ='N', then Z will not be referenced. +* If COMPZ='V' or 'I', then the orthogonal transformations +* which are applied to A and B on the right will be applied +* to the array Z on the right. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1. +* If COMPZ='V' or 'I', then LDZ >= N. +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO >= 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N). +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* = 1,...,N: the QZ iteration did not converge. (A,B) is not +* in Schur form, but ALPHAR(i), ALPHAI(i), and +* BETA(i), i=INFO+1,...,N should be correct. +* = N+1,...,2*N: the shift calculation failed. (A,B) is not +* in Schur form, but ALPHAR(i), ALPHAI(i), and +* BETA(i), i=INFO-N+1,...,N should be correct. +* > 2*N: various "impossible" errors. +* +* Further Details +* =============== +* +* Iteration counters: +* +* JITER -- counts iterations. +* IITER -- counts iterations run since ILAST was last +* changed. This is therefore reset only when a 1-by-1 or +* 2-by-2 block deflates off the bottom. +* +* ===================================================================== +* +* .. Parameters .. +* $ SAFETY = 1.0E+0 ) + DOUBLE PRECISION HALF, ZERO, ONE, SAFETY + PARAMETER ( HALF = 0.5D+0, ZERO = 0.0D+0, ONE = 1.0D+0, + $ SAFETY = 1.0D+2 ) +* .. +* .. Local Scalars .. + LOGICAL ILAZR2, ILAZRO, ILPIVT, ILQ, ILSCHR, ILZ, + $ LQUERY + INTEGER ICOMPQ, ICOMPZ, IFIRST, IFRSTM, IITER, ILAST, + $ ILASTM, IN, ISCHUR, ISTART, J, JC, JCH, JITER, + $ JR, MAXIT + DOUBLE PRECISION A11, A12, A1I, A1R, A21, A22, A2I, A2R, AD11, + $ AD11L, AD12, AD12L, AD21, AD21L, AD22, AD22L, + $ AD32L, AN, ANORM, ASCALE, ATOL, B11, B1A, B1I, + $ B1R, B22, B2A, B2I, B2R, BN, BNORM, BSCALE, + $ BTOL, C, C11I, C11R, C12, C21, C22I, C22R, CL, + $ CQ, CR, CZ, ESHIFT, S, S1, S1INV, S2, SAFMAX, + $ SAFMIN, SCALE, SL, SQI, SQR, SR, SZI, SZR, T, + $ TAU, TEMP, TEMP2, TEMPI, TEMPR, U1, U12, U12L, + $ U2, ULP, VS, W11, W12, W21, W22, WABS, WI, WR, + $ WR2 +* .. +* .. Local Arrays .. + DOUBLE PRECISION V( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANHS, DLAPY2, DLAPY3 + EXTERNAL LSAME, DLAMCH, DLANHS, DLAPY2, DLAPY3 +* .. +* .. External Subroutines .. + EXTERNAL DLAG2, DLARFG, DLARTG, DLASET, DLASV2, DROT, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Decode JOB, COMPQ, COMPZ +* + IF( LSAME( JOB, 'E' ) ) THEN + ILSCHR = .FALSE. + ISCHUR = 1 + ELSE IF( LSAME( JOB, 'S' ) ) THEN + ILSCHR = .TRUE. + ISCHUR = 2 + ELSE + ISCHUR = 0 + END IF +* + IF( LSAME( COMPQ, 'N' ) ) THEN + ILQ = .FALSE. + ICOMPQ = 1 + ELSE IF( LSAME( COMPQ, 'V' ) ) THEN + ILQ = .TRUE. + ICOMPQ = 2 + ELSE IF( LSAME( COMPQ, 'I' ) ) THEN + ILQ = .TRUE. + ICOMPQ = 3 + ELSE + ICOMPQ = 0 + END IF +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ILZ = .FALSE. + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ILZ = .TRUE. + ICOMPZ = 2 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ILZ = .TRUE. + ICOMPZ = 3 + ELSE + ICOMPZ = 0 + END IF +* +* Check Argument Values +* + INFO = 0 + WORK( 1 ) = MAX( 1, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( ISCHUR.EQ.0 ) THEN + INFO = -1 + ELSE IF( ICOMPQ.EQ.0 ) THEN + INFO = -2 + ELSE IF( ICOMPZ.EQ.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( ILO.LT.1 ) THEN + INFO = -5 + ELSE IF( IHI.GT.N .OR. IHI.LT.ILO-1 ) THEN + INFO = -6 + ELSE IF( LDA.LT.N ) THEN + INFO = -8 + ELSE IF( LDB.LT.N ) THEN + INFO = -10 + ELSE IF( LDQ.LT.1 .OR. ( ILQ .AND. LDQ.LT.N ) ) THEN + INFO = -15 + ELSE IF( LDZ.LT.1 .OR. ( ILZ .AND. LDZ.LT.N ) ) THEN + INFO = -17 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -19 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DHGEQZ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + WORK( 1 ) = DBLE( 1 ) + RETURN + END IF +* +* Initialize Q and Z +* + IF( ICOMPQ.EQ.3 ) + $ CALL DLASET( 'Full', N, N, ZERO, ONE, Q, LDQ ) + IF( ICOMPZ.EQ.3 ) + $ CALL DLASET( 'Full', N, N, ZERO, ONE, Z, LDZ ) +* +* Machine Constants +* + IN = IHI + 1 - ILO + SAFMIN = DLAMCH( 'S' ) + SAFMAX = ONE / SAFMIN + ULP = DLAMCH( 'E' )*DLAMCH( 'B' ) + ANORM = DLANHS( 'F', IN, A( ILO, ILO ), LDA, WORK ) + BNORM = DLANHS( 'F', IN, B( ILO, ILO ), LDB, WORK ) + ATOL = MAX( SAFMIN, ULP*ANORM ) + BTOL = MAX( SAFMIN, ULP*BNORM ) + ASCALE = ONE / MAX( SAFMIN, ANORM ) + BSCALE = ONE / MAX( SAFMIN, BNORM ) +* +* Set Eigenvalues IHI+1:N +* + DO 30 J = IHI + 1, N + IF( B( J, J ).LT.ZERO ) THEN + IF( ILSCHR ) THEN + DO 10 JR = 1, J + A( JR, J ) = -A( JR, J ) + B( JR, J ) = -B( JR, J ) + 10 CONTINUE + ELSE + A( J, J ) = -A( J, J ) + B( J, J ) = -B( J, J ) + END IF + IF( ILZ ) THEN + DO 20 JR = 1, N + Z( JR, J ) = -Z( JR, J ) + 20 CONTINUE + END IF + END IF + ALPHAR( J ) = A( J, J ) + ALPHAI( J ) = ZERO + BETA( J ) = B( J, J ) + 30 CONTINUE +* +* If IHI < ILO, skip QZ steps +* + IF( IHI.LT.ILO ) + $ GO TO 380 +* +* MAIN QZ ITERATION LOOP +* +* Initialize dynamic indices +* +* Eigenvalues ILAST+1:N have been found. +* Column operations modify rows IFRSTM:whatever. +* Row operations modify columns whatever:ILASTM. +* +* If only eigenvalues are being computed, then +* IFRSTM is the row of the last splitting row above row ILAST; +* this is always at least ILO. +* IITER counts iterations since the last eigenvalue was found, +* to tell when to use an extraordinary shift. +* MAXIT is the maximum number of QZ sweeps allowed. +* + ILAST = IHI + IF( ILSCHR ) THEN + IFRSTM = 1 + ILASTM = N + ELSE + IFRSTM = ILO + ILASTM = IHI + END IF + IITER = 0 + ESHIFT = ZERO + MAXIT = 30*( IHI-ILO+1 ) +* + DO 360 JITER = 1, MAXIT +* +* Split the matrix if possible. +* +* Two tests: +* 1: A(j,j-1)=0 or j=ILO +* 2: B(j,j)=0 +* + IF( ILAST.EQ.ILO ) THEN +* +* Special case: j=ILAST +* + GO TO 80 + ELSE + IF( ABS( A( ILAST, ILAST-1 ) ).LE.ATOL ) THEN + A( ILAST, ILAST-1 ) = ZERO + GO TO 80 + END IF + END IF +* + IF( ABS( B( ILAST, ILAST ) ).LE.BTOL ) THEN + B( ILAST, ILAST ) = ZERO + GO TO 70 + END IF +* +* General case: j unfl ) +* __ +* (sA - wB) ( CZ -SZ ) +* ( SZ CZ ) +* + C11R = S1*A11 - WR*B11 + C11I = -WI*B11 + C12 = S1*A12 + C21 = S1*A21 + C22R = S1*A22 - WR*B22 + C22I = -WI*B22 +* + IF( ABS( C11R )+ABS( C11I )+ABS( C12 ).GT.ABS( C21 )+ + $ ABS( C22R )+ABS( C22I ) ) THEN + T = DLAPY3( C12, C11R, C11I ) + CZ = C12 / T + SZR = -C11R / T + SZI = -C11I / T + ELSE + CZ = DLAPY2( C22R, C22I ) + IF( CZ.LE.SAFMIN ) THEN + CZ = ZERO + SZR = ONE + SZI = ZERO + ELSE + TEMPR = C22R / CZ + TEMPI = C22I / CZ + T = DLAPY2( CZ, C21 ) + CZ = CZ / T + SZR = -C21*TEMPR / T + SZI = C21*TEMPI / T + END IF + END IF +* +* Compute Givens rotation on left +* +* ( CQ SQ ) +* ( __ ) A or B +* ( -SQ CQ ) +* + AN = ABS( A11 ) + ABS( A12 ) + ABS( A21 ) + ABS( A22 ) + BN = ABS( B11 ) + ABS( B22 ) + WABS = ABS( WR ) + ABS( WI ) + IF( S1*AN.GT.WABS*BN ) THEN + CQ = CZ*B11 + SQR = SZR*B22 + SQI = -SZI*B22 + ELSE + A1R = CZ*A11 + SZR*A12 + A1I = SZI*A12 + A2R = CZ*A21 + SZR*A22 + A2I = SZI*A22 + CQ = DLAPY2( A1R, A1I ) + IF( CQ.LE.SAFMIN ) THEN + CQ = ZERO + SQR = ONE + SQI = ZERO + ELSE + TEMPR = A1R / CQ + TEMPI = A1I / CQ + SQR = TEMPR*A2R + TEMPI*A2I + SQI = TEMPI*A2R - TEMPR*A2I + END IF + END IF + T = DLAPY3( CQ, SQR, SQI ) + CQ = CQ / T + SQR = SQR / T + SQI = SQI / T +* +* Compute diagonal elements of QBZ +* + TEMPR = SQR*SZR - SQI*SZI + TEMPI = SQR*SZI + SQI*SZR + B1R = CQ*CZ*B11 + TEMPR*B22 + B1I = TEMPI*B22 + B1A = DLAPY2( B1R, B1I ) + B2R = CQ*CZ*B22 + TEMPR*B11 + B2I = -TEMPI*B11 + B2A = DLAPY2( B2R, B2I ) +* +* Normalize so beta > 0, and Im( alpha1 ) > 0 +* + BETA( ILAST-1 ) = B1A + BETA( ILAST ) = B2A + ALPHAR( ILAST-1 ) = ( WR*B1A )*S1INV + ALPHAI( ILAST-1 ) = ( WI*B1A )*S1INV + ALPHAR( ILAST ) = ( WR*B2A )*S1INV + ALPHAI( ILAST ) = -( WI*B2A )*S1INV +* +* Step 3: Go to next block -- exit if finished. +* + ILAST = IFIRST - 1 + IF( ILAST.LT.ILO ) + $ GO TO 380 +* +* Reset counters +* + IITER = 0 + ESHIFT = ZERO + IF( .NOT.ILSCHR ) THEN + ILASTM = ILAST + IF( IFRSTM.GT.ILAST ) + $ IFRSTM = ILO + END IF + GO TO 350 + ELSE +* +* Usual case: 3x3 or larger block, using Francis implicit +* double-shift +* +* 2 +* Eigenvalue equation is w - c w + d = 0, +* +* -1 2 -1 +* so compute 1st column of (A B ) - c A B + d +* using the formula in QZIT (from EISPACK) +* +* We assume that the block is at least 3x3 +* + AD11 = ( ASCALE*A( ILAST-1, ILAST-1 ) ) / + $ ( BSCALE*B( ILAST-1, ILAST-1 ) ) + AD21 = ( ASCALE*A( ILAST, ILAST-1 ) ) / + $ ( BSCALE*B( ILAST-1, ILAST-1 ) ) + AD12 = ( ASCALE*A( ILAST-1, ILAST ) ) / + $ ( BSCALE*B( ILAST, ILAST ) ) + AD22 = ( ASCALE*A( ILAST, ILAST ) ) / + $ ( BSCALE*B( ILAST, ILAST ) ) + U12 = B( ILAST-1, ILAST ) / B( ILAST, ILAST ) + AD11L = ( ASCALE*A( IFIRST, IFIRST ) ) / + $ ( BSCALE*B( IFIRST, IFIRST ) ) + AD21L = ( ASCALE*A( IFIRST+1, IFIRST ) ) / + $ ( BSCALE*B( IFIRST, IFIRST ) ) + AD12L = ( ASCALE*A( IFIRST, IFIRST+1 ) ) / + $ ( BSCALE*B( IFIRST+1, IFIRST+1 ) ) + AD22L = ( ASCALE*A( IFIRST+1, IFIRST+1 ) ) / + $ ( BSCALE*B( IFIRST+1, IFIRST+1 ) ) + AD32L = ( ASCALE*A( IFIRST+2, IFIRST+1 ) ) / + $ ( BSCALE*B( IFIRST+1, IFIRST+1 ) ) + U12L = B( IFIRST, IFIRST+1 ) / B( IFIRST+1, IFIRST+1 ) +* + V( 1 ) = ( AD11-AD11L )*( AD22-AD11L ) - AD12*AD21 + + $ AD21*U12*AD11L + ( AD12L-AD11L*U12L )*AD21L + V( 2 ) = ( ( AD22L-AD11L )-AD21L*U12L-( AD11-AD11L )- + $ ( AD22-AD11L )+AD21*U12 )*AD21L + V( 3 ) = AD32L*AD21L +* + ISTART = IFIRST +* + CALL DLARFG( 3, V( 1 ), V( 2 ), 1, TAU ) + V( 1 ) = ONE +* +* Sweep +* + DO 290 J = ISTART, ILAST - 2 +* +* All but last elements: use 3x3 Householder transforms. +* +* Zero (j-1)st column of A +* + IF( J.GT.ISTART ) THEN + V( 1 ) = A( J, J-1 ) + V( 2 ) = A( J+1, J-1 ) + V( 3 ) = A( J+2, J-1 ) +* + CALL DLARFG( 3, A( J, J-1 ), V( 2 ), 1, TAU ) + V( 1 ) = ONE + A( J+1, J-1 ) = ZERO + A( J+2, J-1 ) = ZERO + END IF +* + DO 230 JC = J, ILASTM + TEMP = TAU*( A( J, JC )+V( 2 )*A( J+1, JC )+V( 3 )* + $ A( J+2, JC ) ) + A( J, JC ) = A( J, JC ) - TEMP + A( J+1, JC ) = A( J+1, JC ) - TEMP*V( 2 ) + A( J+2, JC ) = A( J+2, JC ) - TEMP*V( 3 ) + TEMP2 = TAU*( B( J, JC )+V( 2 )*B( J+1, JC )+V( 3 )* + $ B( J+2, JC ) ) + B( J, JC ) = B( J, JC ) - TEMP2 + B( J+1, JC ) = B( J+1, JC ) - TEMP2*V( 2 ) + B( J+2, JC ) = B( J+2, JC ) - TEMP2*V( 3 ) + 230 CONTINUE + IF( ILQ ) THEN + DO 240 JR = 1, N + TEMP = TAU*( Q( JR, J )+V( 2 )*Q( JR, J+1 )+V( 3 )* + $ Q( JR, J+2 ) ) + Q( JR, J ) = Q( JR, J ) - TEMP + Q( JR, J+1 ) = Q( JR, J+1 ) - TEMP*V( 2 ) + Q( JR, J+2 ) = Q( JR, J+2 ) - TEMP*V( 3 ) + 240 CONTINUE + END IF +* +* Zero j-th column of B (see DLAGBC for details) +* +* Swap rows to pivot +* + ILPIVT = .FALSE. + TEMP = MAX( ABS( B( J+1, J+1 ) ), ABS( B( J+1, J+2 ) ) ) + TEMP2 = MAX( ABS( B( J+2, J+1 ) ), ABS( B( J+2, J+2 ) ) ) + IF( MAX( TEMP, TEMP2 ).LT.SAFMIN ) THEN + SCALE = ZERO + U1 = ONE + U2 = ZERO + GO TO 250 + ELSE IF( TEMP.GE.TEMP2 ) THEN + W11 = B( J+1, J+1 ) + W21 = B( J+2, J+1 ) + W12 = B( J+1, J+2 ) + W22 = B( J+2, J+2 ) + U1 = B( J+1, J ) + U2 = B( J+2, J ) + ELSE + W21 = B( J+1, J+1 ) + W11 = B( J+2, J+1 ) + W22 = B( J+1, J+2 ) + W12 = B( J+2, J+2 ) + U2 = B( J+1, J ) + U1 = B( J+2, J ) + END IF +* +* Swap columns if nec. +* + IF( ABS( W12 ).GT.ABS( W11 ) ) THEN + ILPIVT = .TRUE. + TEMP = W12 + TEMP2 = W22 + W12 = W11 + W22 = W21 + W11 = TEMP + W21 = TEMP2 + END IF +* +* LU-factor +* + TEMP = W21 / W11 + U2 = U2 - TEMP*U1 + W22 = W22 - TEMP*W12 + W21 = ZERO +* +* Compute SCALE +* + SCALE = ONE + IF( ABS( W22 ).LT.SAFMIN ) THEN + SCALE = ZERO + U2 = ONE + U1 = -W12 / W11 + GO TO 250 + END IF + IF( ABS( W22 ).LT.ABS( U2 ) ) + $ SCALE = ABS( W22 / U2 ) + IF( ABS( W11 ).LT.ABS( U1 ) ) + $ SCALE = MIN( SCALE, ABS( W11 / U1 ) ) +* +* Solve +* + U2 = ( SCALE*U2 ) / W22 + U1 = ( SCALE*U1-W12*U2 ) / W11 +* + 250 CONTINUE + IF( ILPIVT ) THEN + TEMP = U2 + U2 = U1 + U1 = TEMP + END IF +* +* Compute Householder Vector +* + T = SQRT( SCALE**2+U1**2+U2**2 ) + TAU = ONE + SCALE / T + VS = -ONE / ( SCALE+T ) + V( 1 ) = ONE + V( 2 ) = VS*U1 + V( 3 ) = VS*U2 +* +* Apply transformations from the right. +* + DO 260 JR = IFRSTM, MIN( J+3, ILAST ) + TEMP = TAU*( A( JR, J )+V( 2 )*A( JR, J+1 )+V( 3 )* + $ A( JR, J+2 ) ) + A( JR, J ) = A( JR, J ) - TEMP + A( JR, J+1 ) = A( JR, J+1 ) - TEMP*V( 2 ) + A( JR, J+2 ) = A( JR, J+2 ) - TEMP*V( 3 ) + 260 CONTINUE + DO 270 JR = IFRSTM, J + 2 + TEMP = TAU*( B( JR, J )+V( 2 )*B( JR, J+1 )+V( 3 )* + $ B( JR, J+2 ) ) + B( JR, J ) = B( JR, J ) - TEMP + B( JR, J+1 ) = B( JR, J+1 ) - TEMP*V( 2 ) + B( JR, J+2 ) = B( JR, J+2 ) - TEMP*V( 3 ) + 270 CONTINUE + IF( ILZ ) THEN + DO 280 JR = 1, N + TEMP = TAU*( Z( JR, J )+V( 2 )*Z( JR, J+1 )+V( 3 )* + $ Z( JR, J+2 ) ) + Z( JR, J ) = Z( JR, J ) - TEMP + Z( JR, J+1 ) = Z( JR, J+1 ) - TEMP*V( 2 ) + Z( JR, J+2 ) = Z( JR, J+2 ) - TEMP*V( 3 ) + 280 CONTINUE + END IF + B( J+1, J ) = ZERO + B( J+2, J ) = ZERO + 290 CONTINUE +* +* Last elements: Use Givens rotations +* +* Rotations from the left +* + J = ILAST - 1 + TEMP = A( J, J-1 ) + CALL DLARTG( TEMP, A( J+1, J-1 ), C, S, A( J, J-1 ) ) + A( J+1, J-1 ) = ZERO +* + DO 300 JC = J, ILASTM + TEMP = C*A( J, JC ) + S*A( J+1, JC ) + A( J+1, JC ) = -S*A( J, JC ) + C*A( J+1, JC ) + A( J, JC ) = TEMP + TEMP2 = C*B( J, JC ) + S*B( J+1, JC ) + B( J+1, JC ) = -S*B( J, JC ) + C*B( J+1, JC ) + B( J, JC ) = TEMP2 + 300 CONTINUE + IF( ILQ ) THEN + DO 310 JR = 1, N + TEMP = C*Q( JR, J ) + S*Q( JR, J+1 ) + Q( JR, J+1 ) = -S*Q( JR, J ) + C*Q( JR, J+1 ) + Q( JR, J ) = TEMP + 310 CONTINUE + END IF +* +* Rotations from the right. +* + TEMP = B( J+1, J+1 ) + CALL DLARTG( TEMP, B( J+1, J ), C, S, B( J+1, J+1 ) ) + B( J+1, J ) = ZERO +* + DO 320 JR = IFRSTM, ILAST + TEMP = C*A( JR, J+1 ) + S*A( JR, J ) + A( JR, J ) = -S*A( JR, J+1 ) + C*A( JR, J ) + A( JR, J+1 ) = TEMP + 320 CONTINUE + DO 330 JR = IFRSTM, ILAST - 1 + TEMP = C*B( JR, J+1 ) + S*B( JR, J ) + B( JR, J ) = -S*B( JR, J+1 ) + C*B( JR, J ) + B( JR, J+1 ) = TEMP + 330 CONTINUE + IF( ILZ ) THEN + DO 340 JR = 1, N + TEMP = C*Z( JR, J+1 ) + S*Z( JR, J ) + Z( JR, J ) = -S*Z( JR, J+1 ) + C*Z( JR, J ) + Z( JR, J+1 ) = TEMP + 340 CONTINUE + END IF +* +* End of Double-Shift code +* + END IF +* + GO TO 350 +* +* End of iteration loop +* + 350 CONTINUE + 360 CONTINUE +* +* Drop-through = non-convergence +* + 370 CONTINUE + INFO = ILAST + GO TO 420 +* +* Successful completion of all QZ steps +* + 380 CONTINUE +* +* Set Eigenvalues 1:ILO-1 +* + DO 410 J = 1, ILO - 1 + IF( B( J, J ).LT.ZERO ) THEN + IF( ILSCHR ) THEN + DO 390 JR = 1, J + A( JR, J ) = -A( JR, J ) + B( JR, J ) = -B( JR, J ) + 390 CONTINUE + ELSE + A( J, J ) = -A( J, J ) + B( J, J ) = -B( J, J ) + END IF + IF( ILZ ) THEN + DO 400 JR = 1, N + Z( JR, J ) = -Z( JR, J ) + 400 CONTINUE + END IF + END IF + ALPHAR( J ) = A( J, J ) + ALPHAI( J ) = ZERO + BETA( J ) = B( J, J ) + 410 CONTINUE +* +* Normal Termination +* + INFO = 0 +* +* Exit (other than argument error) -- return optimal workspace size +* + 420 CONTINUE + WORK( 1 ) = DBLE( N ) + RETURN +* +* End of DHGEQZ +* + END diff --git a/costa/native/external/lapack/dhsein.f b/costa/native/external/lapack/dhsein.f new file mode 100644 index 000000000..e68f7a464 --- /dev/null +++ b/costa/native/external/lapack/dhsein.f @@ -0,0 +1,412 @@ + SUBROUTINE DHSEIN( SIDE, EIGSRC, INITV, SELECT, N, H, LDH, WR, WI, + $ VL, LDVL, VR, LDVR, MM, M, WORK, IFAILL, + $ IFAILR, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER EIGSRC, INITV, SIDE + INTEGER INFO, LDH, LDVL, LDVR, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + INTEGER IFAILL( * ), IFAILR( * ) + DOUBLE PRECISION H( LDH, * ), VL( LDVL, * ), VR( LDVR, * ), + $ WI( * ), WORK( * ), WR( * ) +* .. +* +* Purpose +* ======= +* +* DHSEIN uses inverse iteration to find specified right and/or left +* eigenvectors of a real upper Hessenberg matrix H. +* +* The right eigenvector x and the left eigenvector y of the matrix H +* corresponding to an eigenvalue w are defined by: +* +* H * x = w * x, y**h * H = w * y**h +* +* where y**h denotes the conjugate transpose of the vector y. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'R': compute right eigenvectors only; +* = 'L': compute left eigenvectors only; +* = 'B': compute both right and left eigenvectors. +* +* EIGSRC (input) CHARACTER*1 +* Specifies the source of eigenvalues supplied in (WR,WI): +* = 'Q': the eigenvalues were found using DHSEQR; thus, if +* H has zero subdiagonal elements, and so is +* block-triangular, then the j-th eigenvalue can be +* assumed to be an eigenvalue of the block containing +* the j-th row/column. This property allows DHSEIN to +* perform inverse iteration on just one diagonal block. +* = 'N': no assumptions are made on the correspondence +* between eigenvalues and diagonal blocks. In this +* case, DHSEIN must always perform inverse iteration +* using the whole matrix H. +* +* INITV (input) CHARACTER*1 +* = 'N': no initial vectors are supplied; +* = 'U': user-supplied initial vectors are stored in the arrays +* VL and/or VR. +* +* SELECT (input/output) LOGICAL array, dimension (N) +* Specifies the eigenvectors to be computed. To select the +* real eigenvector corresponding to a real eigenvalue WR(j), +* SELECT(j) must be set to .TRUE.. To select the complex +* eigenvector corresponding to a complex eigenvalue +* (WR(j),WI(j)), with complex conjugate (WR(j+1),WI(j+1)), +* either SELECT(j) or SELECT(j+1) or both must be set to +* .TRUE.; then on exit SELECT(j) is .TRUE. and SELECT(j+1) is +* .FALSE.. +* +* N (input) INTEGER +* The order of the matrix H. N >= 0. +* +* H (input) DOUBLE PRECISION array, dimension (LDH,N) +* The upper Hessenberg matrix H. +* +* LDH (input) INTEGER +* The leading dimension of the array H. LDH >= max(1,N). +* +* WR (input/output) DOUBLE PRECISION array, dimension (N) +* WI (input) DOUBLE PRECISION array, dimension (N) +* On entry, the real and imaginary parts of the eigenvalues of +* H; a complex conjugate pair of eigenvalues must be stored in +* consecutive elements of WR and WI. +* On exit, WR may have been altered since close eigenvalues +* are perturbed slightly in searching for independent +* eigenvectors. +* +* VL (input/output) DOUBLE PRECISION array, dimension (LDVL,MM) +* On entry, if INITV = 'U' and SIDE = 'L' or 'B', VL must +* contain starting vectors for the inverse iteration for the +* left eigenvectors; the starting vector for each eigenvector +* must be in the same column(s) in which the eigenvector will +* be stored. +* On exit, if SIDE = 'L' or 'B', the left eigenvectors +* specified by SELECT will be stored consecutively in the +* columns of VL, in the same order as their eigenvalues. A +* complex eigenvector corresponding to a complex eigenvalue is +* stored in two consecutive columns, the first holding the real +* part and the second the imaginary part. +* If SIDE = 'R', VL is not referenced. +* +* LDVL (input) INTEGER +* The leading dimension of the array VL. +* LDVL >= max(1,N) if SIDE = 'L' or 'B'; LDVL >= 1 otherwise. +* +* VR (input/output) DOUBLE PRECISION array, dimension (LDVR,MM) +* On entry, if INITV = 'U' and SIDE = 'R' or 'B', VR must +* contain starting vectors for the inverse iteration for the +* right eigenvectors; the starting vector for each eigenvector +* must be in the same column(s) in which the eigenvector will +* be stored. +* On exit, if SIDE = 'R' or 'B', the right eigenvectors +* specified by SELECT will be stored consecutively in the +* columns of VR, in the same order as their eigenvalues. A +* complex eigenvector corresponding to a complex eigenvalue is +* stored in two consecutive columns, the first holding the real +* part and the second the imaginary part. +* If SIDE = 'L', VR is not referenced. +* +* LDVR (input) INTEGER +* The leading dimension of the array VR. +* LDVR >= max(1,N) if SIDE = 'R' or 'B'; LDVR >= 1 otherwise. +* +* MM (input) INTEGER +* The number of columns in the arrays VL and/or VR. MM >= M. +* +* M (output) INTEGER +* The number of columns in the arrays VL and/or VR required to +* store the eigenvectors; each selected real eigenvector +* occupies one column and each selected complex eigenvector +* occupies two columns. +* +* WORK (workspace) DOUBLE PRECISION array, dimension ((N+2)*N) +* +* IFAILL (output) INTEGER array, dimension (MM) +* If SIDE = 'L' or 'B', IFAILL(i) = j > 0 if the left +* eigenvector in the i-th column of VL (corresponding to the +* eigenvalue w(j)) failed to converge; IFAILL(i) = 0 if the +* eigenvector converged satisfactorily. If the i-th and (i+1)th +* columns of VL hold a complex eigenvector, then IFAILL(i) and +* IFAILL(i+1) are set to the same value. +* If SIDE = 'R', IFAILL is not referenced. +* +* IFAILR (output) INTEGER array, dimension (MM) +* If SIDE = 'R' or 'B', IFAILR(i) = j > 0 if the right +* eigenvector in the i-th column of VR (corresponding to the +* eigenvalue w(j)) failed to converge; IFAILR(i) = 0 if the +* eigenvector converged satisfactorily. If the i-th and (i+1)th +* columns of VR hold a complex eigenvector, then IFAILR(i) and +* IFAILR(i+1) are set to the same value. +* If SIDE = 'L', IFAILR is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, i is the number of eigenvectors which +* failed to converge; see IFAILL and IFAILR for further +* details. +* +* Further Details +* =============== +* +* Each eigenvector is normalized so that the element of largest +* magnitude has magnitude 1; here the magnitude of a complex number +* (x,y) is taken to be |x|+|y|. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL BOTHV, FROMQR, LEFTV, NOINIT, PAIR, RIGHTV + INTEGER I, IINFO, K, KL, KLN, KR, KSI, KSR, LDWORK + DOUBLE PRECISION BIGNUM, EPS3, HNORM, SMLNUM, ULP, UNFL, WKI, + $ WKR +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANHS + EXTERNAL LSAME, DLAMCH, DLANHS +* .. +* .. External Subroutines .. + EXTERNAL DLAEIN, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters. +* + BOTHV = LSAME( SIDE, 'B' ) + RIGHTV = LSAME( SIDE, 'R' ) .OR. BOTHV + LEFTV = LSAME( SIDE, 'L' ) .OR. BOTHV +* + FROMQR = LSAME( EIGSRC, 'Q' ) +* + NOINIT = LSAME( INITV, 'N' ) +* +* Set M to the number of columns required to store the selected +* eigenvectors, and standardize the array SELECT. +* + M = 0 + PAIR = .FALSE. + DO 10 K = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + SELECT( K ) = .FALSE. + ELSE + IF( WI( K ).EQ.ZERO ) THEN + IF( SELECT( K ) ) + $ M = M + 1 + ELSE + PAIR = .TRUE. + IF( SELECT( K ) .OR. SELECT( K+1 ) ) THEN + SELECT( K ) = .TRUE. + M = M + 2 + END IF + END IF + END IF + 10 CONTINUE +* + INFO = 0 + IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -1 + ELSE IF( .NOT.FROMQR .AND. .NOT.LSAME( EIGSRC, 'N' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOINIT .AND. .NOT.LSAME( INITV, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDH.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVL.LT.1 .OR. ( LEFTV .AND. LDVL.LT.N ) ) THEN + INFO = -11 + ELSE IF( LDVR.LT.1 .OR. ( RIGHTV .AND. LDVR.LT.N ) ) THEN + INFO = -13 + ELSE IF( MM.LT.M ) THEN + INFO = -14 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DHSEIN', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* +* Set machine-dependent constants. +* + UNFL = DLAMCH( 'Safe minimum' ) + ULP = DLAMCH( 'Precision' ) + SMLNUM = UNFL*( N / ULP ) + BIGNUM = ( ONE-ULP ) / SMLNUM +* + LDWORK = N + 1 +* + KL = 1 + KLN = 0 + IF( FROMQR ) THEN + KR = 0 + ELSE + KR = N + END IF + KSR = 1 +* + DO 120 K = 1, N + IF( SELECT( K ) ) THEN +* +* Compute eigenvector(s) corresponding to W(K). +* + IF( FROMQR ) THEN +* +* If affiliation of eigenvalues is known, check whether +* the matrix splits. +* +* Determine KL and KR such that 1 <= KL <= K <= KR <= N +* and H(KL,KL-1) and H(KR+1,KR) are zero (or KL = 1 or +* KR = N). +* +* Then inverse iteration can be performed with the +* submatrix H(KL:N,KL:N) for a left eigenvector, and with +* the submatrix H(1:KR,1:KR) for a right eigenvector. +* + DO 20 I = K, KL + 1, -1 + IF( H( I, I-1 ).EQ.ZERO ) + $ GO TO 30 + 20 CONTINUE + 30 CONTINUE + KL = I + IF( K.GT.KR ) THEN + DO 40 I = K, N - 1 + IF( H( I+1, I ).EQ.ZERO ) + $ GO TO 50 + 40 CONTINUE + 50 CONTINUE + KR = I + END IF + END IF +* + IF( KL.NE.KLN ) THEN + KLN = KL +* +* Compute infinity-norm of submatrix H(KL:KR,KL:KR) if it +* has not ben computed before. +* + HNORM = DLANHS( 'I', KR-KL+1, H( KL, KL ), LDH, WORK ) + IF( HNORM.GT.ZERO ) THEN + EPS3 = HNORM*ULP + ELSE + EPS3 = SMLNUM + END IF + END IF +* +* Perturb eigenvalue if it is close to any previous +* selected eigenvalues affiliated to the submatrix +* H(KL:KR,KL:KR). Close roots are modified by EPS3. +* + WKR = WR( K ) + WKI = WI( K ) + 60 CONTINUE + DO 70 I = K - 1, KL, -1 + IF( SELECT( I ) .AND. ABS( WR( I )-WKR )+ + $ ABS( WI( I )-WKI ).LT.EPS3 ) THEN + WKR = WKR + EPS3 + GO TO 60 + END IF + 70 CONTINUE + WR( K ) = WKR +* + PAIR = WKI.NE.ZERO + IF( PAIR ) THEN + KSI = KSR + 1 + ELSE + KSI = KSR + END IF + IF( LEFTV ) THEN +* +* Compute left eigenvector. +* + CALL DLAEIN( .FALSE., NOINIT, N-KL+1, H( KL, KL ), LDH, + $ WKR, WKI, VL( KL, KSR ), VL( KL, KSI ), + $ WORK, LDWORK, WORK( N*N+N+1 ), EPS3, SMLNUM, + $ BIGNUM, IINFO ) + IF( IINFO.GT.0 ) THEN + IF( PAIR ) THEN + INFO = INFO + 2 + ELSE + INFO = INFO + 1 + END IF + IFAILL( KSR ) = K + IFAILL( KSI ) = K + ELSE + IFAILL( KSR ) = 0 + IFAILL( KSI ) = 0 + END IF + DO 80 I = 1, KL - 1 + VL( I, KSR ) = ZERO + 80 CONTINUE + IF( PAIR ) THEN + DO 90 I = 1, KL - 1 + VL( I, KSI ) = ZERO + 90 CONTINUE + END IF + END IF + IF( RIGHTV ) THEN +* +* Compute right eigenvector. +* + CALL DLAEIN( .TRUE., NOINIT, KR, H, LDH, WKR, WKI, + $ VR( 1, KSR ), VR( 1, KSI ), WORK, LDWORK, + $ WORK( N*N+N+1 ), EPS3, SMLNUM, BIGNUM, + $ IINFO ) + IF( IINFO.GT.0 ) THEN + IF( PAIR ) THEN + INFO = INFO + 2 + ELSE + INFO = INFO + 1 + END IF + IFAILR( KSR ) = K + IFAILR( KSI ) = K + ELSE + IFAILR( KSR ) = 0 + IFAILR( KSI ) = 0 + END IF + DO 100 I = KR + 1, N + VR( I, KSR ) = ZERO + 100 CONTINUE + IF( PAIR ) THEN + DO 110 I = KR + 1, N + VR( I, KSI ) = ZERO + 110 CONTINUE + END IF + END IF +* + IF( PAIR ) THEN + KSR = KSR + 2 + ELSE + KSR = KSR + 1 + END IF + END IF + 120 CONTINUE +* + RETURN +* +* End of DHSEIN +* + END diff --git a/costa/native/external/lapack/dhseqr.f b/costa/native/external/lapack/dhseqr.f new file mode 100644 index 000000000..48dbadac7 --- /dev/null +++ b/costa/native/external/lapack/dhseqr.f @@ -0,0 +1,467 @@ + SUBROUTINE DHSEQR( JOB, COMPZ, N, ILO, IHI, H, LDH, WR, WI, Z, + $ LDZ, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER COMPZ, JOB + INTEGER IHI, ILO, INFO, LDH, LDZ, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION H( LDH, * ), WI( * ), WORK( * ), WR( * ), + $ Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* DHSEQR computes the eigenvalues of a real upper Hessenberg matrix H +* and, optionally, the matrices T and Z from the Schur decomposition +* H = Z T Z**T, where T is an upper quasi-triangular matrix (the Schur +* form), and Z is the orthogonal matrix of Schur vectors. +* +* Optionally Z may be postmultiplied into an input orthogonal matrix Q, +* so that this routine can give the Schur factorization of a matrix A +* which has been reduced to the Hessenberg form H by the orthogonal +* matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* = 'E': compute eigenvalues only; +* = 'S': compute eigenvalues and the Schur form T. +* +* COMPZ (input) CHARACTER*1 +* = 'N': no Schur vectors are computed; +* = 'I': Z is initialized to the unit matrix and the matrix Z +* of Schur vectors of H is returned; +* = 'V': Z must contain an orthogonal matrix Q on entry, and +* the product Q*Z is returned. +* +* N (input) INTEGER +* The order of the matrix H. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* It is assumed that H is already upper triangular in rows +* and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally +* set by a previous call to DGEBAL, and then passed to SGEHRD +* when the matrix output by DGEBAL is reduced to Hessenberg +* form. Otherwise ILO and IHI should be set to 1 and N +* respectively. +* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +* +* H (input/output) DOUBLE PRECISION array, dimension (LDH,N) +* On entry, the upper Hessenberg matrix H. +* On exit, if JOB = 'S', H contains the upper quasi-triangular +* matrix T from the Schur decomposition (the Schur form); +* 2-by-2 diagonal blocks (corresponding to complex conjugate +* pairs of eigenvalues) are returned in standard form, with +* H(i,i) = H(i+1,i+1) and H(i+1,i)*H(i,i+1) < 0. If JOB = 'E', +* the contents of H are unspecified on exit. +* +* LDH (input) INTEGER +* The leading dimension of the array H. LDH >= max(1,N). +* +* WR (output) DOUBLE PRECISION array, dimension (N) +* WI (output) DOUBLE PRECISION array, dimension (N) +* The real and imaginary parts, respectively, of the computed +* eigenvalues. If two eigenvalues are computed as a complex +* conjugate pair, they are stored in consecutive elements of +* WR and WI, say the i-th and (i+1)th, with WI(i) > 0 and +* WI(i+1) < 0. If JOB = 'S', the eigenvalues are stored in the +* same order as on the diagonal of the Schur form returned in +* H, with WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 +* diagonal block, WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and +* WI(i+1) = -WI(i). +* +* Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N) +* If COMPZ = 'N': Z is not referenced. +* If COMPZ = 'I': on entry, Z need not be set, and on exit, Z +* contains the orthogonal matrix Z of the Schur vectors of H. +* If COMPZ = 'V': on entry Z must contain an N-by-N matrix Q, +* which is assumed to be equal to the unit matrix except for +* the submatrix Z(ILO:IHI,ILO:IHI); on exit Z contains Q*Z. +* Normally Q is the orthogonal matrix generated by DORGHR after +* the call to DGEHRD which formed the Hessenberg matrix H. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. +* LDZ >= max(1,N) if COMPZ = 'I' or 'V'; LDZ >= 1 otherwise. +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N). +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, DHSEQR failed to compute all of the +* eigenvalues in a total of 30*(IHI-ILO+1) iterations; +* elements 1:ilo-1 and i+1:n of WR and WI contain those +* eigenvalues which have been successfully computed. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0 ) + DOUBLE PRECISION CONST + PARAMETER ( CONST = 1.5D+0 ) + INTEGER NSMAX, LDS + PARAMETER ( NSMAX = 15, LDS = NSMAX ) +* .. +* .. Local Scalars .. + LOGICAL INITZ, LQUERY, WANTT, WANTZ + INTEGER I, I1, I2, IERR, II, ITEMP, ITN, ITS, J, K, L, + $ MAXB, NH, NR, NS, NV + DOUBLE PRECISION ABSW, OVFL, SMLNUM, TAU, TEMP, TST1, ULP, UNFL +* .. +* .. Local Arrays .. + DOUBLE PRECISION S( LDS, NSMAX ), V( NSMAX+1 ), VV( NSMAX+1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX, ILAENV + DOUBLE PRECISION DLAMCH, DLANHS, DLAPY2 + EXTERNAL LSAME, IDAMAX, ILAENV, DLAMCH, DLANHS, DLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEMV, DLACPY, DLAHQR, DLARFG, DLARFX, + $ DLASET, DSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + WANTT = LSAME( JOB, 'S' ) + INITZ = LSAME( COMPZ, 'I' ) + WANTZ = INITZ .OR. LSAME( COMPZ, 'V' ) +* + INFO = 0 + WORK( 1 ) = MAX( 1, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( JOB, 'E' ) .AND. .NOT.WANTT ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( COMPZ, 'N' ) .AND. .NOT.WANTZ ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -5 + ELSE IF( LDH.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDZ.LT.1 .OR. WANTZ .AND. LDZ.LT.MAX( 1, N ) ) THEN + INFO = -11 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DHSEQR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Initialize Z, if necessary +* + IF( INITZ ) + $ CALL DLASET( 'Full', N, N, ZERO, ONE, Z, LDZ ) +* +* Store the eigenvalues isolated by DGEBAL. +* + DO 10 I = 1, ILO - 1 + WR( I ) = H( I, I ) + WI( I ) = ZERO + 10 CONTINUE + DO 20 I = IHI + 1, N + WR( I ) = H( I, I ) + WI( I ) = ZERO + 20 CONTINUE +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN + IF( ILO.EQ.IHI ) THEN + WR( ILO ) = H( ILO, ILO ) + WI( ILO ) = ZERO + RETURN + END IF +* +* Set rows and columns ILO to IHI to zero below the first +* subdiagonal. +* + DO 40 J = ILO, IHI - 2 + DO 30 I = J + 2, N + H( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + NH = IHI - ILO + 1 +* +* Determine the order of the multi-shift QR algorithm to be used. +* + NS = ILAENV( 4, 'DHSEQR', JOB // COMPZ, N, ILO, IHI, -1 ) + MAXB = ILAENV( 8, 'DHSEQR', JOB // COMPZ, N, ILO, IHI, -1 ) + IF( NS.LE.2 .OR. NS.GT.NH .OR. MAXB.GE.NH ) THEN +* +* Use the standard double-shift algorithm +* + CALL DLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI, ILO, + $ IHI, Z, LDZ, INFO ) + RETURN + END IF + MAXB = MAX( 3, MAXB ) + NS = MIN( NS, MAXB, NSMAX ) +* +* Now 2 < NS <= MAXB < NH. +* +* Set machine-dependent constants for the stopping criterion. +* If norm(H) <= sqrt(OVFL), overflow should not occur. +* + UNFL = DLAMCH( 'Safe minimum' ) + OVFL = ONE / UNFL + CALL DLABAD( UNFL, OVFL ) + ULP = DLAMCH( 'Precision' ) + SMLNUM = UNFL*( NH / ULP ) +* +* I1 and I2 are the indices of the first row and last column of H +* to which transformations must be applied. If eigenvalues only are +* being computed, I1 and I2 are set inside the main loop. +* + IF( WANTT ) THEN + I1 = 1 + I2 = N + END IF +* +* ITN is the total number of multiple-shift QR iterations allowed. +* + ITN = 30*NH +* +* The main loop begins here. I is the loop index and decreases from +* IHI to ILO in steps of at most MAXB. Each iteration of the loop +* works with the active submatrix in rows and columns L to I. +* Eigenvalues I+1 to IHI have already converged. Either L = ILO or +* H(L,L-1) is negligible so that the matrix splits. +* + I = IHI + 50 CONTINUE + L = ILO + IF( I.LT.ILO ) + $ GO TO 170 +* +* Perform multiple-shift QR iterations on rows and columns ILO to I +* until a submatrix of order at most MAXB splits off at the bottom +* because a subdiagonal element has become negligible. +* + DO 150 ITS = 0, ITN +* +* Look for a single small subdiagonal element. +* + DO 60 K = I, L + 1, -1 + TST1 = ABS( H( K-1, K-1 ) ) + ABS( H( K, K ) ) + IF( TST1.EQ.ZERO ) + $ TST1 = DLANHS( '1', I-L+1, H( L, L ), LDH, WORK ) + IF( ABS( H( K, K-1 ) ).LE.MAX( ULP*TST1, SMLNUM ) ) + $ GO TO 70 + 60 CONTINUE + 70 CONTINUE + L = K + IF( L.GT.ILO ) THEN +* +* H(L,L-1) is negligible. +* + H( L, L-1 ) = ZERO + END IF +* +* Exit from loop if a submatrix of order <= MAXB has split off. +* + IF( L.GE.I-MAXB+1 ) + $ GO TO 160 +* +* Now the active submatrix is in rows and columns L to I. If +* eigenvalues only are being computed, only the active submatrix +* need be transformed. +* + IF( .NOT.WANTT ) THEN + I1 = L + I2 = I + END IF +* + IF( ITS.EQ.20 .OR. ITS.EQ.30 ) THEN +* +* Exceptional shifts. +* + DO 80 II = I - NS + 1, I + WR( II ) = CONST*( ABS( H( II, II-1 ) )+ + $ ABS( H( II, II ) ) ) + WI( II ) = ZERO + 80 CONTINUE + ELSE +* +* Use eigenvalues of trailing submatrix of order NS as shifts. +* + CALL DLACPY( 'Full', NS, NS, H( I-NS+1, I-NS+1 ), LDH, S, + $ LDS ) + CALL DLAHQR( .FALSE., .FALSE., NS, 1, NS, S, LDS, + $ WR( I-NS+1 ), WI( I-NS+1 ), 1, NS, Z, LDZ, + $ IERR ) + IF( IERR.GT.0 ) THEN +* +* If DLAHQR failed to compute all NS eigenvalues, use the +* unconverged diagonal elements as the remaining shifts. +* + DO 90 II = 1, IERR + WR( I-NS+II ) = S( II, II ) + WI( I-NS+II ) = ZERO + 90 CONTINUE + END IF + END IF +* +* Form the first column of (G-w(1)) (G-w(2)) . . . (G-w(ns)) +* where G is the Hessenberg submatrix H(L:I,L:I) and w is +* the vector of shifts (stored in WR and WI). The result is +* stored in the local array V. +* + V( 1 ) = ONE + DO 100 II = 2, NS + 1 + V( II ) = ZERO + 100 CONTINUE + NV = 1 + DO 120 J = I - NS + 1, I + IF( WI( J ).GE.ZERO ) THEN + IF( WI( J ).EQ.ZERO ) THEN +* +* real shift +* + CALL DCOPY( NV+1, V, 1, VV, 1 ) + CALL DGEMV( 'No transpose', NV+1, NV, ONE, H( L, L ), + $ LDH, VV, 1, -WR( J ), V, 1 ) + NV = NV + 1 + ELSE IF( WI( J ).GT.ZERO ) THEN +* +* complex conjugate pair of shifts +* + CALL DCOPY( NV+1, V, 1, VV, 1 ) + CALL DGEMV( 'No transpose', NV+1, NV, ONE, H( L, L ), + $ LDH, V, 1, -TWO*WR( J ), VV, 1 ) + ITEMP = IDAMAX( NV+1, VV, 1 ) + TEMP = ONE / MAX( ABS( VV( ITEMP ) ), SMLNUM ) + CALL DSCAL( NV+1, TEMP, VV, 1 ) + ABSW = DLAPY2( WR( J ), WI( J ) ) + TEMP = ( TEMP*ABSW )*ABSW + CALL DGEMV( 'No transpose', NV+2, NV+1, ONE, + $ H( L, L ), LDH, VV, 1, TEMP, V, 1 ) + NV = NV + 2 + END IF +* +* Scale V(1:NV) so that max(abs(V(i))) = 1. If V is zero, +* reset it to the unit vector. +* + ITEMP = IDAMAX( NV, V, 1 ) + TEMP = ABS( V( ITEMP ) ) + IF( TEMP.EQ.ZERO ) THEN + V( 1 ) = ONE + DO 110 II = 2, NV + V( II ) = ZERO + 110 CONTINUE + ELSE + TEMP = MAX( TEMP, SMLNUM ) + CALL DSCAL( NV, ONE / TEMP, V, 1 ) + END IF + END IF + 120 CONTINUE +* +* Multiple-shift QR step +* + DO 140 K = L, I - 1 +* +* The first iteration of this loop determines a reflection G +* from the vector V and applies it from left and right to H, +* thus creating a nonzero bulge below the subdiagonal. +* +* Each subsequent iteration determines a reflection G to +* restore the Hessenberg form in the (K-1)th column, and thus +* chases the bulge one step toward the bottom of the active +* submatrix. NR is the order of G. +* + NR = MIN( NS+1, I-K+1 ) + IF( K.GT.L ) + $ CALL DCOPY( NR, H( K, K-1 ), 1, V, 1 ) + CALL DLARFG( NR, V( 1 ), V( 2 ), 1, TAU ) + IF( K.GT.L ) THEN + H( K, K-1 ) = V( 1 ) + DO 130 II = K + 1, I + H( II, K-1 ) = ZERO + 130 CONTINUE + END IF + V( 1 ) = ONE +* +* Apply G from the left to transform the rows of the matrix in +* columns K to I2. +* + CALL DLARFX( 'Left', NR, I2-K+1, V, TAU, H( K, K ), LDH, + $ WORK ) +* +* Apply G from the right to transform the columns of the +* matrix in rows I1 to min(K+NR,I). +* + CALL DLARFX( 'Right', MIN( K+NR, I )-I1+1, NR, V, TAU, + $ H( I1, K ), LDH, WORK ) +* + IF( WANTZ ) THEN +* +* Accumulate transformations in the matrix Z +* + CALL DLARFX( 'Right', NH, NR, V, TAU, Z( ILO, K ), LDZ, + $ WORK ) + END IF + 140 CONTINUE +* + 150 CONTINUE +* +* Failure to converge in remaining number of iterations +* + INFO = I + RETURN +* + 160 CONTINUE +* +* A submatrix of order <= MAXB in rows and columns L to I has split +* off. Use the double-shift QR algorithm to handle it. +* + CALL DLAHQR( WANTT, WANTZ, N, L, I, H, LDH, WR, WI, ILO, IHI, Z, + $ LDZ, INFO ) + IF( INFO.GT.0 ) + $ RETURN +* +* Decrement number of remaining iterations, and return to start of +* the main loop with a new value of I. +* + ITN = ITN - ITS + I = L - 1 + GO TO 50 +* + 170 CONTINUE + WORK( 1 ) = MAX( 1, N ) + RETURN +* +* End of DHSEQR +* + END diff --git a/costa/native/external/lapack/dlabad.f b/costa/native/external/lapack/dlabad.f new file mode 100644 index 000000000..2a80925be --- /dev/null +++ b/costa/native/external/lapack/dlabad.f @@ -0,0 +1,56 @@ + SUBROUTINE DLABAD( SMALL, LARGE ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + DOUBLE PRECISION LARGE, SMALL +* .. +* +* Purpose +* ======= +* +* DLABAD takes as input the values computed by DLAMCH for underflow and +* overflow, and returns the square root of each of these values if the +* log of LARGE is sufficiently large. This subroutine is intended to +* identify machines with a large exponent range, such as the Crays, and +* redefine the underflow and overflow limits to be the square roots of +* the values computed by DLAMCH. This subroutine is needed because +* DLAMCH does not compensate for poor arithmetic in the upper half of +* the exponent range, as is found on a Cray. +* +* Arguments +* ========= +* +* SMALL (input/output) DOUBLE PRECISION +* On entry, the underflow threshold as computed by DLAMCH. +* On exit, if LOG10(LARGE) is sufficiently large, the square +* root of SMALL, otherwise unchanged. +* +* LARGE (input/output) DOUBLE PRECISION +* On entry, the overflow threshold as computed by DLAMCH. +* On exit, if LOG10(LARGE) is sufficiently large, the square +* root of LARGE, otherwise unchanged. +* +* ===================================================================== +* +* .. Intrinsic Functions .. + INTRINSIC LOG10, SQRT +* .. +* .. Executable Statements .. +* +* If it looks like we're on a Cray, take the square root of +* SMALL and LARGE to avoid overflow and underflow problems. +* + IF( LOG10( LARGE ).GT.2000.D0 ) THEN + SMALL = SQRT( SMALL ) + LARGE = SQRT( LARGE ) + END IF +* + RETURN +* +* End of DLABAD +* + END diff --git a/costa/native/external/lapack/dlabrd.f b/costa/native/external/lapack/dlabrd.f new file mode 100644 index 000000000..bea251b58 --- /dev/null +++ b/costa/native/external/lapack/dlabrd.f @@ -0,0 +1,291 @@ + SUBROUTINE DLABRD( M, N, NB, A, LDA, D, E, TAUQ, TAUP, X, LDX, Y, + $ LDY ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + INTEGER LDA, LDX, LDY, M, N, NB +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAUP( * ), + $ TAUQ( * ), X( LDX, * ), Y( LDY, * ) +* .. +* +* Purpose +* ======= +* +* DLABRD reduces the first NB rows and columns of a real general +* m by n matrix A to upper or lower bidiagonal form by an orthogonal +* transformation Q' * A * P, and returns the matrices X and Y which +* are needed to apply the transformation to the unreduced part of A. +* +* If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower +* bidiagonal form. +* +* This is an auxiliary routine called by DGEBRD +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows in the matrix A. +* +* N (input) INTEGER +* The number of columns in the matrix A. +* +* NB (input) INTEGER +* The number of leading rows and columns of A to be reduced. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the m by n general matrix to be reduced. +* On exit, the first NB rows and columns of the matrix are +* overwritten; the rest of the array is unchanged. +* If m >= n, elements on and below the diagonal in the first NB +* columns, with the array TAUQ, represent the orthogonal +* matrix Q as a product of elementary reflectors; and +* elements above the diagonal in the first NB rows, with the +* array TAUP, represent the orthogonal matrix P as a product +* of elementary reflectors. +* If m < n, elements below the diagonal in the first NB +* columns, with the array TAUQ, represent the orthogonal +* matrix Q as a product of elementary reflectors, and +* elements on and above the diagonal in the first NB rows, +* with the array TAUP, represent the orthogonal matrix P as +* a product of elementary reflectors. +* See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* D (output) DOUBLE PRECISION array, dimension (NB) +* The diagonal elements of the first NB rows and columns of +* the reduced matrix. D(i) = A(i,i). +* +* E (output) DOUBLE PRECISION array, dimension (NB) +* The off-diagonal elements of the first NB rows and columns of +* the reduced matrix. +* +* TAUQ (output) DOUBLE PRECISION array dimension (NB) +* The scalar factors of the elementary reflectors which +* represent the orthogonal matrix Q. See Further Details. +* +* TAUP (output) DOUBLE PRECISION array, dimension (NB) +* The scalar factors of the elementary reflectors which +* represent the orthogonal matrix P. See Further Details. +* +* X (output) DOUBLE PRECISION array, dimension (LDX,NB) +* The m-by-nb matrix X required to update the unreduced part +* of A. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= M. +* +* Y (output) DOUBLE PRECISION array, dimension (LDY,NB) +* The n-by-nb matrix Y required to update the unreduced part +* of A. +* +* LDY (output) INTEGER +* The leading dimension of the array Y. LDY >= N. +* +* Further Details +* =============== +* +* The matrices Q and P are represented as products of elementary +* reflectors: +* +* Q = H(1) H(2) . . . H(nb) and P = G(1) G(2) . . . G(nb) +* +* Each H(i) and G(i) has the form: +* +* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' +* +* where tauq and taup are real scalars, and v and u are real vectors. +* +* If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in +* A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in +* A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). +* +* If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in +* A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in +* A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). +* +* The elements of the vectors v and u together form the m-by-nb matrix +* V and the nb-by-n matrix U' which are needed, with X and Y, to apply +* the transformation to the unreduced part of the matrix, using a block +* update of the form: A := A - V*Y' - X*U'. +* +* The contents of A on exit are illustrated by the following examples +* with nb = 2: +* +* m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): +* +* ( 1 1 u1 u1 u1 ) ( 1 u1 u1 u1 u1 u1 ) +* ( v1 1 1 u2 u2 ) ( 1 1 u2 u2 u2 u2 ) +* ( v1 v2 a a a ) ( v1 1 a a a a ) +* ( v1 v2 a a a ) ( v1 v2 a a a a ) +* ( v1 v2 a a a ) ( v1 v2 a a a a ) +* ( v1 v2 a a a ) +* +* where a denotes an element of the original matrix which is unchanged, +* vi denotes an element of the vector defining H(i), and ui an element +* of the vector defining G(i). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + INTEGER I +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, DLARFG, DSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) + $ RETURN +* + IF( M.GE.N ) THEN +* +* Reduce to upper bidiagonal form +* + DO 10 I = 1, NB +* +* Update A(i:m,i) +* + CALL DGEMV( 'No transpose', M-I+1, I-1, -ONE, A( I, 1 ), + $ LDA, Y( I, 1 ), LDY, ONE, A( I, I ), 1 ) + CALL DGEMV( 'No transpose', M-I+1, I-1, -ONE, X( I, 1 ), + $ LDX, A( 1, I ), 1, ONE, A( I, I ), 1 ) +* +* Generate reflection Q(i) to annihilate A(i+1:m,i) +* + CALL DLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1, + $ TAUQ( I ) ) + D( I ) = A( I, I ) + IF( I.LT.N ) THEN + A( I, I ) = ONE +* +* Compute Y(i+1:n,i) +* + CALL DGEMV( 'Transpose', M-I+1, N-I, ONE, A( I, I+1 ), + $ LDA, A( I, I ), 1, ZERO, Y( I+1, I ), 1 ) + CALL DGEMV( 'Transpose', M-I+1, I-1, ONE, A( I, 1 ), LDA, + $ A( I, I ), 1, ZERO, Y( 1, I ), 1 ) + CALL DGEMV( 'No transpose', N-I, I-1, -ONE, Y( I+1, 1 ), + $ LDY, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 ) + CALL DGEMV( 'Transpose', M-I+1, I-1, ONE, X( I, 1 ), LDX, + $ A( I, I ), 1, ZERO, Y( 1, I ), 1 ) + CALL DGEMV( 'Transpose', I-1, N-I, -ONE, A( 1, I+1 ), + $ LDA, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 ) + CALL DSCAL( N-I, TAUQ( I ), Y( I+1, I ), 1 ) +* +* Update A(i,i+1:n) +* + CALL DGEMV( 'No transpose', N-I, I, -ONE, Y( I+1, 1 ), + $ LDY, A( I, 1 ), LDA, ONE, A( I, I+1 ), LDA ) + CALL DGEMV( 'Transpose', I-1, N-I, -ONE, A( 1, I+1 ), + $ LDA, X( I, 1 ), LDX, ONE, A( I, I+1 ), LDA ) +* +* Generate reflection P(i) to annihilate A(i,i+2:n) +* + CALL DLARFG( N-I, A( I, I+1 ), A( I, MIN( I+2, N ) ), + $ LDA, TAUP( I ) ) + E( I ) = A( I, I+1 ) + A( I, I+1 ) = ONE +* +* Compute X(i+1:m,i) +* + CALL DGEMV( 'No transpose', M-I, N-I, ONE, A( I+1, I+1 ), + $ LDA, A( I, I+1 ), LDA, ZERO, X( I+1, I ), 1 ) + CALL DGEMV( 'Transpose', N-I, I, ONE, Y( I+1, 1 ), LDY, + $ A( I, I+1 ), LDA, ZERO, X( 1, I ), 1 ) + CALL DGEMV( 'No transpose', M-I, I, -ONE, A( I+1, 1 ), + $ LDA, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL DGEMV( 'No transpose', I-1, N-I, ONE, A( 1, I+1 ), + $ LDA, A( I, I+1 ), LDA, ZERO, X( 1, I ), 1 ) + CALL DGEMV( 'No transpose', M-I, I-1, -ONE, X( I+1, 1 ), + $ LDX, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL DSCAL( M-I, TAUP( I ), X( I+1, I ), 1 ) + END IF + 10 CONTINUE + ELSE +* +* Reduce to lower bidiagonal form +* + DO 20 I = 1, NB +* +* Update A(i,i:n) +* + CALL DGEMV( 'No transpose', N-I+1, I-1, -ONE, Y( I, 1 ), + $ LDY, A( I, 1 ), LDA, ONE, A( I, I ), LDA ) + CALL DGEMV( 'Transpose', I-1, N-I+1, -ONE, A( 1, I ), LDA, + $ X( I, 1 ), LDX, ONE, A( I, I ), LDA ) +* +* Generate reflection P(i) to annihilate A(i,i+1:n) +* + CALL DLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA, + $ TAUP( I ) ) + D( I ) = A( I, I ) + IF( I.LT.M ) THEN + A( I, I ) = ONE +* +* Compute X(i+1:m,i) +* + CALL DGEMV( 'No transpose', M-I, N-I+1, ONE, A( I+1, I ), + $ LDA, A( I, I ), LDA, ZERO, X( I+1, I ), 1 ) + CALL DGEMV( 'Transpose', N-I+1, I-1, ONE, Y( I, 1 ), LDY, + $ A( I, I ), LDA, ZERO, X( 1, I ), 1 ) + CALL DGEMV( 'No transpose', M-I, I-1, -ONE, A( I+1, 1 ), + $ LDA, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL DGEMV( 'No transpose', I-1, N-I+1, ONE, A( 1, I ), + $ LDA, A( I, I ), LDA, ZERO, X( 1, I ), 1 ) + CALL DGEMV( 'No transpose', M-I, I-1, -ONE, X( I+1, 1 ), + $ LDX, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL DSCAL( M-I, TAUP( I ), X( I+1, I ), 1 ) +* +* Update A(i+1:m,i) +* + CALL DGEMV( 'No transpose', M-I, I-1, -ONE, A( I+1, 1 ), + $ LDA, Y( I, 1 ), LDY, ONE, A( I+1, I ), 1 ) + CALL DGEMV( 'No transpose', M-I, I, -ONE, X( I+1, 1 ), + $ LDX, A( 1, I ), 1, ONE, A( I+1, I ), 1 ) +* +* Generate reflection Q(i) to annihilate A(i+2:m,i) +* + CALL DLARFG( M-I, A( I+1, I ), A( MIN( I+2, M ), I ), 1, + $ TAUQ( I ) ) + E( I ) = A( I+1, I ) + A( I+1, I ) = ONE +* +* Compute Y(i+1:n,i) +* + CALL DGEMV( 'Transpose', M-I, N-I, ONE, A( I+1, I+1 ), + $ LDA, A( I+1, I ), 1, ZERO, Y( I+1, I ), 1 ) + CALL DGEMV( 'Transpose', M-I, I-1, ONE, A( I+1, 1 ), LDA, + $ A( I+1, I ), 1, ZERO, Y( 1, I ), 1 ) + CALL DGEMV( 'No transpose', N-I, I-1, -ONE, Y( I+1, 1 ), + $ LDY, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 ) + CALL DGEMV( 'Transpose', M-I, I, ONE, X( I+1, 1 ), LDX, + $ A( I+1, I ), 1, ZERO, Y( 1, I ), 1 ) + CALL DGEMV( 'Transpose', I, N-I, -ONE, A( 1, I+1 ), LDA, + $ Y( 1, I ), 1, ONE, Y( I+1, I ), 1 ) + CALL DSCAL( N-I, TAUQ( I ), Y( I+1, I ), 1 ) + END IF + 20 CONTINUE + END IF + RETURN +* +* End of DLABRD +* + END diff --git a/costa/native/external/lapack/dlacon.f b/costa/native/external/lapack/dlacon.f new file mode 100644 index 000000000..4efa0e816 --- /dev/null +++ b/costa/native/external/lapack/dlacon.f @@ -0,0 +1,204 @@ + SUBROUTINE DLACON( N, V, X, ISGN, EST, KASE ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + INTEGER KASE, N + DOUBLE PRECISION EST +* .. +* .. Array Arguments .. + INTEGER ISGN( * ) + DOUBLE PRECISION V( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* DLACON estimates the 1-norm of a square, real matrix A. +* Reverse communication is used for evaluating matrix-vector products. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix. N >= 1. +* +* V (workspace) DOUBLE PRECISION array, dimension (N) +* On the final return, V = A*W, where EST = norm(V)/norm(W) +* (W is not returned). +* +* X (input/output) DOUBLE PRECISION array, dimension (N) +* On an intermediate return, X should be overwritten by +* A * X, if KASE=1, +* A' * X, if KASE=2, +* and DLACON must be re-called with all the other parameters +* unchanged. +* +* ISGN (workspace) INTEGER array, dimension (N) +* +* EST (output) DOUBLE PRECISION +* An estimate (a lower bound) for norm(A). +* +* KASE (input/output) INTEGER +* On the initial call to DLACON, KASE should be 0. +* On an intermediate return, KASE will be 1 or 2, indicating +* whether X should be overwritten by A * X or A' * X. +* On the final return from DLACON, KASE will again be 0. +* +* Further Details +* ======= ======= +* +* Contributed by Nick Higham, University of Manchester. +* Originally named SONEST, dated March 16, 1988. +* +* Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of +* a real or complex matrix, with applications to condition estimation", +* ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, ITER, J, JLAST, JUMP + DOUBLE PRECISION ALTSGN, ESTOLD, TEMP +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DASUM + EXTERNAL IDAMAX, DASUM +* .. +* .. External Subroutines .. + EXTERNAL DCOPY +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, NINT, SIGN +* .. +* .. Save statement .. + SAVE +* .. +* .. Executable Statements .. +* + IF( KASE.EQ.0 ) THEN + DO 10 I = 1, N + X( I ) = ONE / DBLE( N ) + 10 CONTINUE + KASE = 1 + JUMP = 1 + RETURN + END IF +* + GO TO ( 20, 40, 70, 110, 140 )JUMP +* +* ................ ENTRY (JUMP = 1) +* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY A*X. +* + 20 CONTINUE + IF( N.EQ.1 ) THEN + V( 1 ) = X( 1 ) + EST = ABS( V( 1 ) ) +* ... QUIT + GO TO 150 + END IF + EST = DASUM( N, X, 1 ) +* + DO 30 I = 1, N + X( I ) = SIGN( ONE, X( I ) ) + ISGN( I ) = NINT( X( I ) ) + 30 CONTINUE + KASE = 2 + JUMP = 2 + RETURN +* +* ................ ENTRY (JUMP = 2) +* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY TRANDPOSE(A)*X. +* + 40 CONTINUE + J = IDAMAX( N, X, 1 ) + ITER = 2 +* +* MAIN LOOP - ITERATIONS 2,3,...,ITMAX. +* + 50 CONTINUE + DO 60 I = 1, N + X( I ) = ZERO + 60 CONTINUE + X( J ) = ONE + KASE = 1 + JUMP = 3 + RETURN +* +* ................ ENTRY (JUMP = 3) +* X HAS BEEN OVERWRITTEN BY A*X. +* + 70 CONTINUE + CALL DCOPY( N, X, 1, V, 1 ) + ESTOLD = EST + EST = DASUM( N, V, 1 ) + DO 80 I = 1, N + IF( NINT( SIGN( ONE, X( I ) ) ).NE.ISGN( I ) ) + $ GO TO 90 + 80 CONTINUE +* REPEATED SIGN VECTOR DETECTED, HENCE ALGORITHM HAS CONVERGED. + GO TO 120 +* + 90 CONTINUE +* TEST FOR CYCLING. + IF( EST.LE.ESTOLD ) + $ GO TO 120 +* + DO 100 I = 1, N + X( I ) = SIGN( ONE, X( I ) ) + ISGN( I ) = NINT( X( I ) ) + 100 CONTINUE + KASE = 2 + JUMP = 4 + RETURN +* +* ................ ENTRY (JUMP = 4) +* X HAS BEEN OVERWRITTEN BY TRANDPOSE(A)*X. +* + 110 CONTINUE + JLAST = J + J = IDAMAX( N, X, 1 ) + IF( ( X( JLAST ).NE.ABS( X( J ) ) ) .AND. ( ITER.LT.ITMAX ) ) THEN + ITER = ITER + 1 + GO TO 50 + END IF +* +* ITERATION COMPLETE. FINAL STAGE. +* + 120 CONTINUE + ALTSGN = ONE + DO 130 I = 1, N + X( I ) = ALTSGN*( ONE+DBLE( I-1 ) / DBLE( N-1 ) ) + ALTSGN = -ALTSGN + 130 CONTINUE + KASE = 1 + JUMP = 5 + RETURN +* +* ................ ENTRY (JUMP = 5) +* X HAS BEEN OVERWRITTEN BY A*X. +* + 140 CONTINUE + TEMP = TWO*( DASUM( N, X, 1 ) / DBLE( 3*N ) ) + IF( TEMP.GT.EST ) THEN + CALL DCOPY( N, X, 1, V, 1 ) + EST = TEMP + END IF +* + 150 CONTINUE + KASE = 0 + RETURN +* +* End of DLACON +* + END diff --git a/costa/native/external/lapack/dlacpy.f b/costa/native/external/lapack/dlacpy.f new file mode 100644 index 000000000..afec37171 --- /dev/null +++ b/costa/native/external/lapack/dlacpy.f @@ -0,0 +1,88 @@ + SUBROUTINE DLACPY( UPLO, M, N, A, LDA, B, LDB ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER LDA, LDB, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* DLACPY copies all or part of a two-dimensional matrix A to another +* matrix B. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies the part of the matrix A to be copied to B. +* = 'U': Upper triangular part +* = 'L': Lower triangular part +* Otherwise: All of the matrix A +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input) DOUBLE PRECISION array, dimension (LDA,N) +* The m by n matrix A. If UPLO = 'U', only the upper triangle +* or trapezoid is accessed; if UPLO = 'L', only the lower +* triangle or trapezoid is accessed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (output) DOUBLE PRECISION array, dimension (LDB,N) +* On exit, B = A in the locations specified by UPLO. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,M). +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, MIN( J, M ) + B( I, J ) = A( I, J ) + 10 CONTINUE + 20 CONTINUE + ELSE IF( LSAME( UPLO, 'L' ) ) THEN + DO 40 J = 1, N + DO 30 I = J, M + B( I, J ) = A( I, J ) + 30 CONTINUE + 40 CONTINUE + ELSE + DO 60 J = 1, N + DO 50 I = 1, M + B( I, J ) = A( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF + RETURN +* +* End of DLACPY +* + END diff --git a/costa/native/external/lapack/dladiv.f b/costa/native/external/lapack/dladiv.f new file mode 100644 index 000000000..205716257 --- /dev/null +++ b/costa/native/external/lapack/dladiv.f @@ -0,0 +1,63 @@ + SUBROUTINE DLADIV( A, B, C, D, P, Q ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + DOUBLE PRECISION A, B, C, D, P, Q +* .. +* +* Purpose +* ======= +* +* DLADIV performs complex division in real arithmetic +* +* a + i*b +* p + i*q = --------- +* c + i*d +* +* The algorithm is due to Robert L. Smith and can be found +* in D. Knuth, The art of Computer Programming, Vol.2, p.195 +* +* Arguments +* ========= +* +* A (input) DOUBLE PRECISION +* B (input) DOUBLE PRECISION +* C (input) DOUBLE PRECISION +* D (input) DOUBLE PRECISION +* The scalars a, b, c, and d in the above expression. +* +* P (output) DOUBLE PRECISION +* Q (output) DOUBLE PRECISION +* The scalars p and q in the above expression. +* +* ===================================================================== +* +* .. Local Scalars .. + DOUBLE PRECISION E, F +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* + IF( ABS( D ).LT.ABS( C ) ) THEN + E = D / C + F = C + D*E + P = ( A+B*E ) / F + Q = ( B-A*E ) / F + ELSE + E = C / D + F = D + C*E + P = ( B+A*E ) / F + Q = ( -A+B*E ) / F + END IF +* + RETURN +* +* End of DLADIV +* + END diff --git a/costa/native/external/lapack/dlae2.f b/costa/native/external/lapack/dlae2.f new file mode 100644 index 000000000..57c69b6dd --- /dev/null +++ b/costa/native/external/lapack/dlae2.f @@ -0,0 +1,124 @@ + SUBROUTINE DLAE2( A, B, C, RT1, RT2 ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + DOUBLE PRECISION A, B, C, RT1, RT2 +* .. +* +* Purpose +* ======= +* +* DLAE2 computes the eigenvalues of a 2-by-2 symmetric matrix +* [ A B ] +* [ B C ]. +* On return, RT1 is the eigenvalue of larger absolute value, and RT2 +* is the eigenvalue of smaller absolute value. +* +* Arguments +* ========= +* +* A (input) DOUBLE PRECISION +* The (1,1) element of the 2-by-2 matrix. +* +* B (input) DOUBLE PRECISION +* The (1,2) and (2,1) elements of the 2-by-2 matrix. +* +* C (input) DOUBLE PRECISION +* The (2,2) element of the 2-by-2 matrix. +* +* RT1 (output) DOUBLE PRECISION +* The eigenvalue of larger absolute value. +* +* RT2 (output) DOUBLE PRECISION +* The eigenvalue of smaller absolute value. +* +* Further Details +* =============== +* +* RT1 is accurate to a few ulps barring over/underflow. +* +* RT2 may be inaccurate if there is massive cancellation in the +* determinant A*C-B*B; higher precision or correctly rounded or +* correctly truncated arithmetic would be needed to compute RT2 +* accurately in all cases. +* +* Overflow is possible only if RT1 is within a factor of 5 of overflow. +* Underflow is harmless if the input data is 0 or exceeds +* underflow_threshold / macheps. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D0 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) + DOUBLE PRECISION HALF + PARAMETER ( HALF = 0.5D0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION AB, ACMN, ACMX, ADF, DF, RT, SM, TB +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* +* Compute the eigenvalues +* + SM = A + C + DF = A - C + ADF = ABS( DF ) + TB = B + B + AB = ABS( TB ) + IF( ABS( A ).GT.ABS( C ) ) THEN + ACMX = A + ACMN = C + ELSE + ACMX = C + ACMN = A + END IF + IF( ADF.GT.AB ) THEN + RT = ADF*SQRT( ONE+( AB / ADF )**2 ) + ELSE IF( ADF.LT.AB ) THEN + RT = AB*SQRT( ONE+( ADF / AB )**2 ) + ELSE +* +* Includes case AB=ADF=0 +* + RT = AB*SQRT( TWO ) + END IF + IF( SM.LT.ZERO ) THEN + RT1 = HALF*( SM-RT ) +* +* Order of execution important. +* To get fully accurate smaller eigenvalue, +* next line needs to be executed in higher precision. +* + RT2 = ( ACMX / RT1 )*ACMN - ( B / RT1 )*B + ELSE IF( SM.GT.ZERO ) THEN + RT1 = HALF*( SM+RT ) +* +* Order of execution important. +* To get fully accurate smaller eigenvalue, +* next line needs to be executed in higher precision. +* + RT2 = ( ACMX / RT1 )*ACMN - ( B / RT1 )*B + ELSE +* +* Includes case RT1 = RT2 = 0 +* + RT1 = HALF*RT + RT2 = -HALF*RT + END IF + RETURN +* +* End of DLAE2 +* + END diff --git a/costa/native/external/lapack/dlaebz.f b/costa/native/external/lapack/dlaebz.f new file mode 100644 index 000000000..c2c06c66e --- /dev/null +++ b/costa/native/external/lapack/dlaebz.f @@ -0,0 +1,552 @@ + SUBROUTINE DLAEBZ( IJOB, NITMAX, N, MMAX, MINP, NBMIN, ABSTOL, + $ RELTOL, PIVMIN, D, E, E2, NVAL, AB, C, MOUT, + $ NAB, WORK, IWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER IJOB, INFO, MINP, MMAX, MOUT, N, NBMIN, NITMAX + DOUBLE PRECISION ABSTOL, PIVMIN, RELTOL +* .. +* .. Array Arguments .. + INTEGER IWORK( * ), NAB( MMAX, * ), NVAL( * ) + DOUBLE PRECISION AB( MMAX, * ), C( * ), D( * ), E( * ), E2( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* DLAEBZ contains the iteration loops which compute and use the +* function N(w), which is the count of eigenvalues of a symmetric +* tridiagonal matrix T less than or equal to its argument w. It +* performs a choice of two types of loops: +* +* IJOB=1, followed by +* IJOB=2: It takes as input a list of intervals and returns a list of +* sufficiently small intervals whose union contains the same +* eigenvalues as the union of the original intervals. +* The input intervals are (AB(j,1),AB(j,2)], j=1,...,MINP. +* The output interval (AB(j,1),AB(j,2)] will contain +* eigenvalues NAB(j,1)+1,...,NAB(j,2), where 1 <= j <= MOUT. +* +* IJOB=3: It performs a binary search in each input interval +* (AB(j,1),AB(j,2)] for a point w(j) such that +* N(w(j))=NVAL(j), and uses C(j) as the starting point of +* the search. If such a w(j) is found, then on output +* AB(j,1)=AB(j,2)=w. If no such w(j) is found, then on output +* (AB(j,1),AB(j,2)] will be a small interval containing the +* point where N(w) jumps through NVAL(j), unless that point +* lies outside the initial interval. +* +* Note that the intervals are in all cases half-open intervals, +* i.e., of the form (a,b] , which includes b but not a . +* +* To avoid underflow, the matrix should be scaled so that its largest +* element is no greater than overflow**(1/2) * underflow**(1/4) +* in absolute value. To assure the most accurate computation +* of small eigenvalues, the matrix should be scaled to be +* not much smaller than that, either. +* +* See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal +* Matrix", Report CS41, Computer Science Dept., Stanford +* University, July 21, 1966 +* +* Note: the arguments are, in general, *not* checked for unreasonable +* values. +* +* Arguments +* ========= +* +* IJOB (input) INTEGER +* Specifies what is to be done: +* = 1: Compute NAB for the initial intervals. +* = 2: Perform bisection iteration to find eigenvalues of T. +* = 3: Perform bisection iteration to invert N(w), i.e., +* to find a point which has a specified number of +* eigenvalues of T to its left. +* Other values will cause DLAEBZ to return with INFO=-1. +* +* NITMAX (input) INTEGER +* The maximum number of "levels" of bisection to be +* performed, i.e., an interval of width W will not be made +* smaller than 2^(-NITMAX) * W. If not all intervals +* have converged after NITMAX iterations, then INFO is set +* to the number of non-converged intervals. +* +* N (input) INTEGER +* The dimension n of the tridiagonal matrix T. It must be at +* least 1. +* +* MMAX (input) INTEGER +* The maximum number of intervals. If more than MMAX intervals +* are generated, then DLAEBZ will quit with INFO=MMAX+1. +* +* MINP (input) INTEGER +* The initial number of intervals. It may not be greater than +* MMAX. +* +* NBMIN (input) INTEGER +* The smallest number of intervals that should be processed +* using a vector loop. If zero, then only the scalar loop +* will be used. +* +* ABSTOL (input) DOUBLE PRECISION +* The minimum (absolute) width of an interval. When an +* interval is narrower than ABSTOL, or than RELTOL times the +* larger (in magnitude) endpoint, then it is considered to be +* sufficiently small, i.e., converged. This must be at least +* zero. +* +* RELTOL (input) DOUBLE PRECISION +* The minimum relative width of an interval. When an interval +* is narrower than ABSTOL, or than RELTOL times the larger (in +* magnitude) endpoint, then it is considered to be +* sufficiently small, i.e., converged. Note: this should +* always be at least radix*machine epsilon. +* +* PIVMIN (input) DOUBLE PRECISION +* The minimum absolute value of a "pivot" in the Sturm +* sequence loop. This *must* be at least max |e(j)**2| * +* safe_min and at least safe_min, where safe_min is at least +* the smallest number that can divide one without overflow. +* +* D (input) DOUBLE PRECISION array, dimension (N) +* The diagonal elements of the tridiagonal matrix T. +* +* E (input) DOUBLE PRECISION array, dimension (N) +* The offdiagonal elements of the tridiagonal matrix T in +* positions 1 through N-1. E(N) is arbitrary. +* +* E2 (input) DOUBLE PRECISION array, dimension (N) +* The squares of the offdiagonal elements of the tridiagonal +* matrix T. E2(N) is ignored. +* +* NVAL (input/output) INTEGER array, dimension (MINP) +* If IJOB=1 or 2, not referenced. +* If IJOB=3, the desired values of N(w). The elements of NVAL +* will be reordered to correspond with the intervals in AB. +* Thus, NVAL(j) on output will not, in general be the same as +* NVAL(j) on input, but it will correspond with the interval +* (AB(j,1),AB(j,2)] on output. +* +* AB (input/output) DOUBLE PRECISION array, dimension (MMAX,2) +* The endpoints of the intervals. AB(j,1) is a(j), the left +* endpoint of the j-th interval, and AB(j,2) is b(j), the +* right endpoint of the j-th interval. The input intervals +* will, in general, be modified, split, and reordered by the +* calculation. +* +* C (input/output) DOUBLE PRECISION array, dimension (MMAX) +* If IJOB=1, ignored. +* If IJOB=2, workspace. +* If IJOB=3, then on input C(j) should be initialized to the +* first search point in the binary search. +* +* MOUT (output) INTEGER +* If IJOB=1, the number of eigenvalues in the intervals. +* If IJOB=2 or 3, the number of intervals output. +* If IJOB=3, MOUT will equal MINP. +* +* NAB (input/output) INTEGER array, dimension (MMAX,2) +* If IJOB=1, then on output NAB(i,j) will be set to N(AB(i,j)). +* If IJOB=2, then on input, NAB(i,j) should be set. It must +* satisfy the condition: +* N(AB(i,1)) <= NAB(i,1) <= NAB(i,2) <= N(AB(i,2)), +* which means that in interval i only eigenvalues +* NAB(i,1)+1,...,NAB(i,2) will be considered. Usually, +* NAB(i,j)=N(AB(i,j)), from a previous call to DLAEBZ with +* IJOB=1. +* On output, NAB(i,j) will contain +* max(na(k),min(nb(k),N(AB(i,j)))), where k is the index of +* the input interval that the output interval +* (AB(j,1),AB(j,2)] came from, and na(k) and nb(k) are the +* the input values of NAB(k,1) and NAB(k,2). +* If IJOB=3, then on output, NAB(i,j) contains N(AB(i,j)), +* unless N(w) > NVAL(i) for all search points w , in which +* case NAB(i,1) will not be modified, i.e., the output +* value will be the same as the input value (modulo +* reorderings -- see NVAL and AB), or unless N(w) < NVAL(i) +* for all search points w , in which case NAB(i,2) will +* not be modified. Normally, NAB should be set to some +* distinctive value(s) before DLAEBZ is called. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (MMAX) +* Workspace. +* +* IWORK (workspace) INTEGER array, dimension (MMAX) +* Workspace. +* +* INFO (output) INTEGER +* = 0: All intervals converged. +* = 1--MMAX: The last INFO intervals did not converge. +* = MMAX+1: More than MMAX intervals were generated. +* +* Further Details +* =============== +* +* This routine is intended to be called only by other LAPACK +* routines, thus the interface is less user-friendly. It is intended +* for two purposes: +* +* (a) finding eigenvalues. In this case, DLAEBZ should have one or +* more initial intervals set up in AB, and DLAEBZ should be called +* with IJOB=1. This sets up NAB, and also counts the eigenvalues. +* Intervals with no eigenvalues would usually be thrown out at +* this point. Also, if not all the eigenvalues in an interval i +* are desired, NAB(i,1) can be increased or NAB(i,2) decreased. +* For example, set NAB(i,1)=NAB(i,2)-1 to get the largest +* eigenvalue. DLAEBZ is then called with IJOB=2 and MMAX +* no smaller than the value of MOUT returned by the call with +* IJOB=1. After this (IJOB=2) call, eigenvalues NAB(i,1)+1 +* through NAB(i,2) are approximately AB(i,1) (or AB(i,2)) to the +* tolerance specified by ABSTOL and RELTOL. +* +* (b) finding an interval (a',b'] containing eigenvalues w(f),...,w(l). +* In this case, start with a Gershgorin interval (a,b). Set up +* AB to contain 2 search intervals, both initially (a,b). One +* NVAL element should contain f-1 and the other should contain l +* , while C should contain a and b, resp. NAB(i,1) should be -1 +* and NAB(i,2) should be N+1, to flag an error if the desired +* interval does not lie in (a,b). DLAEBZ is then called with +* IJOB=3. On exit, if w(f-1) < w(f), then one of the intervals -- +* j -- will have AB(j,1)=AB(j,2) and NAB(j,1)=NAB(j,2)=f-1, while +* if, to the specified tolerance, w(f-k)=...=w(f+r), k > 0 and r +* >= 0, then the interval will have N(AB(j,1))=NAB(j,1)=f-k and +* N(AB(j,2))=NAB(j,2)=f+r. The cases w(l) < w(l+1) and +* w(l-r)=...=w(l+k) are handled similarly. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, TWO, HALF + PARAMETER ( ZERO = 0.0D0, TWO = 2.0D0, + $ HALF = 1.0D0 / TWO ) +* .. +* .. Local Scalars .. + INTEGER ITMP1, ITMP2, J, JI, JIT, JP, KF, KFNEW, KL, + $ KLNEW + DOUBLE PRECISION TMP1, TMP2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* +* Check for Errors +* + INFO = 0 + IF( IJOB.LT.1 .OR. IJOB.GT.3 ) THEN + INFO = -1 + RETURN + END IF +* +* Initialize NAB +* + IF( IJOB.EQ.1 ) THEN +* +* Compute the number of eigenvalues in the initial intervals. +* + MOUT = 0 +*DIR$ NOVECTOR + DO 30 JI = 1, MINP + DO 20 JP = 1, 2 + TMP1 = D( 1 ) - AB( JI, JP ) + IF( ABS( TMP1 ).LT.PIVMIN ) + $ TMP1 = -PIVMIN + NAB( JI, JP ) = 0 + IF( TMP1.LE.ZERO ) + $ NAB( JI, JP ) = 1 +* + DO 10 J = 2, N + TMP1 = D( J ) - E2( J-1 ) / TMP1 - AB( JI, JP ) + IF( ABS( TMP1 ).LT.PIVMIN ) + $ TMP1 = -PIVMIN + IF( TMP1.LE.ZERO ) + $ NAB( JI, JP ) = NAB( JI, JP ) + 1 + 10 CONTINUE + 20 CONTINUE + MOUT = MOUT + NAB( JI, 2 ) - NAB( JI, 1 ) + 30 CONTINUE + RETURN + END IF +* +* Initialize for loop +* +* KF and KL have the following meaning: +* Intervals 1,...,KF-1 have converged. +* Intervals KF,...,KL still need to be refined. +* + KF = 1 + KL = MINP +* +* If IJOB=2, initialize C. +* If IJOB=3, use the user-supplied starting point. +* + IF( IJOB.EQ.2 ) THEN + DO 40 JI = 1, MINP + C( JI ) = HALF*( AB( JI, 1 )+AB( JI, 2 ) ) + 40 CONTINUE + END IF +* +* Iteration loop +* + DO 130 JIT = 1, NITMAX +* +* Loop over intervals +* + IF( KL-KF+1.GE.NBMIN .AND. NBMIN.GT.0 ) THEN +* +* Begin of Parallel Version of the loop +* + DO 60 JI = KF, KL +* +* Compute N(c), the number of eigenvalues less than c +* + WORK( JI ) = D( 1 ) - C( JI ) + IWORK( JI ) = 0 + IF( WORK( JI ).LE.PIVMIN ) THEN + IWORK( JI ) = 1 + WORK( JI ) = MIN( WORK( JI ), -PIVMIN ) + END IF +* + DO 50 J = 2, N + WORK( JI ) = D( J ) - E2( J-1 ) / WORK( JI ) - C( JI ) + IF( WORK( JI ).LE.PIVMIN ) THEN + IWORK( JI ) = IWORK( JI ) + 1 + WORK( JI ) = MIN( WORK( JI ), -PIVMIN ) + END IF + 50 CONTINUE + 60 CONTINUE +* + IF( IJOB.LE.2 ) THEN +* +* IJOB=2: Choose all intervals containing eigenvalues. +* + KLNEW = KL + DO 70 JI = KF, KL +* +* Insure that N(w) is monotone +* + IWORK( JI ) = MIN( NAB( JI, 2 ), + $ MAX( NAB( JI, 1 ), IWORK( JI ) ) ) +* +* Update the Queue -- add intervals if both halves +* contain eigenvalues. +* + IF( IWORK( JI ).EQ.NAB( JI, 2 ) ) THEN +* +* No eigenvalue in the upper interval: +* just use the lower interval. +* + AB( JI, 2 ) = C( JI ) +* + ELSE IF( IWORK( JI ).EQ.NAB( JI, 1 ) ) THEN +* +* No eigenvalue in the lower interval: +* just use the upper interval. +* + AB( JI, 1 ) = C( JI ) + ELSE + KLNEW = KLNEW + 1 + IF( KLNEW.LE.MMAX ) THEN +* +* Eigenvalue in both intervals -- add upper to +* queue. +* + AB( KLNEW, 2 ) = AB( JI, 2 ) + NAB( KLNEW, 2 ) = NAB( JI, 2 ) + AB( KLNEW, 1 ) = C( JI ) + NAB( KLNEW, 1 ) = IWORK( JI ) + AB( JI, 2 ) = C( JI ) + NAB( JI, 2 ) = IWORK( JI ) + ELSE + INFO = MMAX + 1 + END IF + END IF + 70 CONTINUE + IF( INFO.NE.0 ) + $ RETURN + KL = KLNEW + ELSE +* +* IJOB=3: Binary search. Keep only the interval containing +* w s.t. N(w) = NVAL +* + DO 80 JI = KF, KL + IF( IWORK( JI ).LE.NVAL( JI ) ) THEN + AB( JI, 1 ) = C( JI ) + NAB( JI, 1 ) = IWORK( JI ) + END IF + IF( IWORK( JI ).GE.NVAL( JI ) ) THEN + AB( JI, 2 ) = C( JI ) + NAB( JI, 2 ) = IWORK( JI ) + END IF + 80 CONTINUE + END IF +* + ELSE +* +* End of Parallel Version of the loop +* +* Begin of Serial Version of the loop +* + KLNEW = KL + DO 100 JI = KF, KL +* +* Compute N(w), the number of eigenvalues less than w +* + TMP1 = C( JI ) + TMP2 = D( 1 ) - TMP1 + ITMP1 = 0 + IF( TMP2.LE.PIVMIN ) THEN + ITMP1 = 1 + TMP2 = MIN( TMP2, -PIVMIN ) + END IF +* +* A series of compiler directives to defeat vectorization +* for the next loop +* +*$PL$ CMCHAR=' ' +CDIR$ NEXTSCALAR +C$DIR SCALAR +CDIR$ NEXT SCALAR +CVD$L NOVECTOR +CDEC$ NOVECTOR +CVD$ NOVECTOR +*VDIR NOVECTOR +*VOCL LOOP,SCALAR +CIBM PREFER SCALAR +*$PL$ CMCHAR='*' +* + DO 90 J = 2, N + TMP2 = D( J ) - E2( J-1 ) / TMP2 - TMP1 + IF( TMP2.LE.PIVMIN ) THEN + ITMP1 = ITMP1 + 1 + TMP2 = MIN( TMP2, -PIVMIN ) + END IF + 90 CONTINUE +* + IF( IJOB.LE.2 ) THEN +* +* IJOB=2: Choose all intervals containing eigenvalues. +* +* Insure that N(w) is monotone +* + ITMP1 = MIN( NAB( JI, 2 ), + $ MAX( NAB( JI, 1 ), ITMP1 ) ) +* +* Update the Queue -- add intervals if both halves +* contain eigenvalues. +* + IF( ITMP1.EQ.NAB( JI, 2 ) ) THEN +* +* No eigenvalue in the upper interval: +* just use the lower interval. +* + AB( JI, 2 ) = TMP1 +* + ELSE IF( ITMP1.EQ.NAB( JI, 1 ) ) THEN +* +* No eigenvalue in the lower interval: +* just use the upper interval. +* + AB( JI, 1 ) = TMP1 + ELSE IF( KLNEW.LT.MMAX ) THEN +* +* Eigenvalue in both intervals -- add upper to queue. +* + KLNEW = KLNEW + 1 + AB( KLNEW, 2 ) = AB( JI, 2 ) + NAB( KLNEW, 2 ) = NAB( JI, 2 ) + AB( KLNEW, 1 ) = TMP1 + NAB( KLNEW, 1 ) = ITMP1 + AB( JI, 2 ) = TMP1 + NAB( JI, 2 ) = ITMP1 + ELSE + INFO = MMAX + 1 + RETURN + END IF + ELSE +* +* IJOB=3: Binary search. Keep only the interval +* containing w s.t. N(w) = NVAL +* + IF( ITMP1.LE.NVAL( JI ) ) THEN + AB( JI, 1 ) = TMP1 + NAB( JI, 1 ) = ITMP1 + END IF + IF( ITMP1.GE.NVAL( JI ) ) THEN + AB( JI, 2 ) = TMP1 + NAB( JI, 2 ) = ITMP1 + END IF + END IF + 100 CONTINUE + KL = KLNEW +* +* End of Serial Version of the loop +* + END IF +* +* Check for convergence +* + KFNEW = KF + DO 110 JI = KF, KL + TMP1 = ABS( AB( JI, 2 )-AB( JI, 1 ) ) + TMP2 = MAX( ABS( AB( JI, 2 ) ), ABS( AB( JI, 1 ) ) ) + IF( TMP1.LT.MAX( ABSTOL, PIVMIN, RELTOL*TMP2 ) .OR. + $ NAB( JI, 1 ).GE.NAB( JI, 2 ) ) THEN +* +* Converged -- Swap with position KFNEW, +* then increment KFNEW +* + IF( JI.GT.KFNEW ) THEN + TMP1 = AB( JI, 1 ) + TMP2 = AB( JI, 2 ) + ITMP1 = NAB( JI, 1 ) + ITMP2 = NAB( JI, 2 ) + AB( JI, 1 ) = AB( KFNEW, 1 ) + AB( JI, 2 ) = AB( KFNEW, 2 ) + NAB( JI, 1 ) = NAB( KFNEW, 1 ) + NAB( JI, 2 ) = NAB( KFNEW, 2 ) + AB( KFNEW, 1 ) = TMP1 + AB( KFNEW, 2 ) = TMP2 + NAB( KFNEW, 1 ) = ITMP1 + NAB( KFNEW, 2 ) = ITMP2 + IF( IJOB.EQ.3 ) THEN + ITMP1 = NVAL( JI ) + NVAL( JI ) = NVAL( KFNEW ) + NVAL( KFNEW ) = ITMP1 + END IF + END IF + KFNEW = KFNEW + 1 + END IF + 110 CONTINUE + KF = KFNEW +* +* Choose Midpoints +* + DO 120 JI = KF, KL + C( JI ) = HALF*( AB( JI, 1 )+AB( JI, 2 ) ) + 120 CONTINUE +* +* If no more intervals to refine, quit. +* + IF( KF.GT.KL ) + $ GO TO 140 + 130 CONTINUE +* +* Converged +* + 140 CONTINUE + INFO = MAX( KL+1-KF, 0 ) + MOUT = KL +* + RETURN +* +* End of DLAEBZ +* + END diff --git a/costa/native/external/lapack/dlaed0.f b/costa/native/external/lapack/dlaed0.f new file mode 100644 index 000000000..6259d61d3 --- /dev/null +++ b/costa/native/external/lapack/dlaed0.f @@ -0,0 +1,350 @@ + SUBROUTINE DLAED0( ICOMPQ, QSIZ, N, D, E, Q, LDQ, QSTORE, LDQS, + $ WORK, IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER ICOMPQ, INFO, LDQ, LDQS, N, QSIZ +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION D( * ), E( * ), Q( LDQ, * ), QSTORE( LDQS, * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* DLAED0 computes all eigenvalues and corresponding eigenvectors of a +* symmetric tridiagonal matrix using the divide and conquer method. +* +* Arguments +* ========= +* +* ICOMPQ (input) INTEGER +* = 0: Compute eigenvalues only. +* = 1: Compute eigenvectors of original dense symmetric matrix +* also. On entry, Q contains the orthogonal matrix used +* to reduce the original matrix to tridiagonal form. +* = 2: Compute eigenvalues and eigenvectors of tridiagonal +* matrix. +* +* QSIZ (input) INTEGER +* The dimension of the orthogonal matrix used to reduce +* the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. +* +* N (input) INTEGER +* The dimension of the symmetric tridiagonal matrix. N >= 0. +* +* D (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the main diagonal of the tridiagonal matrix. +* On exit, its eigenvalues. +* +* E (input) DOUBLE PRECISION array, dimension (N-1) +* The off-diagonal elements of the tridiagonal matrix. +* On exit, E has been destroyed. +* +* Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N) +* On entry, Q must contain an N-by-N orthogonal matrix. +* If ICOMPQ = 0 Q is not referenced. +* If ICOMPQ = 1 On entry, Q is a subset of the columns of the +* orthogonal matrix used to reduce the full +* matrix to tridiagonal form corresponding to +* the subset of the full matrix which is being +* decomposed at this time. +* If ICOMPQ = 2 On entry, Q will be the identity matrix. +* On exit, Q contains the eigenvectors of the +* tridiagonal matrix. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. If eigenvectors are +* desired, then LDQ >= max(1,N). In any case, LDQ >= 1. +* +* QSTORE (workspace) DOUBLE PRECISION array, dimension (LDQS, N) +* Referenced only when ICOMPQ = 1. Used to store parts of +* the eigenvector matrix when the updating matrix multiplies +* take place. +* +* LDQS (input) INTEGER +* The leading dimension of the array QSTORE. If ICOMPQ = 1, +* then LDQS >= max(1,N). In any case, LDQS >= 1. +* +* WORK (workspace) DOUBLE PRECISION array, +* If ICOMPQ = 0 or 1, the dimension of WORK must be at least +* 1 + 3*N + 2*N*lg N + 2*N**2 +* ( lg( N ) = smallest integer k +* such that 2^k >= N ) +* If ICOMPQ = 2, the dimension of WORK must be at least +* 4*N + N**2. +* +* IWORK (workspace) INTEGER array, +* If ICOMPQ = 0 or 1, the dimension of IWORK must be at least +* 6 + 6*N + 5*N*lg N. +* ( lg( N ) = smallest integer k +* such that 2^k >= N ) +* If ICOMPQ = 2, the dimension of IWORK must be at least +* 3 + 5*N. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: The algorithm failed to compute an eigenvalue while +* working on the submatrix lying in rows and columns +* INFO/(N+1) through mod(INFO,N+1). +* +* Further Details +* =============== +* +* Based on contributions by +* Jeff Rutter, Computer Science Division, University of California +* at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.D0, ONE = 1.D0, TWO = 2.D0 ) +* .. +* .. Local Scalars .. + INTEGER CURLVL, CURPRB, CURR, I, IGIVCL, IGIVNM, + $ IGIVPT, INDXQ, IPERM, IPRMPT, IQ, IQPTR, IWREM, + $ J, K, LGN, MATSIZ, MSD2, SMLSIZ, SMM1, SPM1, + $ SPM2, SUBMAT, SUBPBS, TLVLS + DOUBLE PRECISION TEMP +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEMM, DLACPY, DLAED1, DLAED7, DSTEQR, + $ XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, INT, LOG, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.2 ) THEN + INFO = -1 + ELSE IF( ( ICOMPQ.EQ.1 ) .AND. ( QSIZ.LT.MAX( 0, N ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDQS.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLAED0', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + SMLSIZ = ILAENV( 9, 'DLAED0', ' ', 0, 0, 0, 0 ) +* +* Determine the size and placement of the submatrices, and save in +* the leading elements of IWORK. +* + IWORK( 1 ) = N + SUBPBS = 1 + TLVLS = 0 + 10 CONTINUE + IF( IWORK( SUBPBS ).GT.SMLSIZ ) THEN + DO 20 J = SUBPBS, 1, -1 + IWORK( 2*J ) = ( IWORK( J )+1 ) / 2 + IWORK( 2*J-1 ) = IWORK( J ) / 2 + 20 CONTINUE + TLVLS = TLVLS + 1 + SUBPBS = 2*SUBPBS + GO TO 10 + END IF + DO 30 J = 2, SUBPBS + IWORK( J ) = IWORK( J ) + IWORK( J-1 ) + 30 CONTINUE +* +* Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1 +* using rank-1 modifications (cuts). +* + SPM1 = SUBPBS - 1 + DO 40 I = 1, SPM1 + SUBMAT = IWORK( I ) + 1 + SMM1 = SUBMAT - 1 + D( SMM1 ) = D( SMM1 ) - ABS( E( SMM1 ) ) + D( SUBMAT ) = D( SUBMAT ) - ABS( E( SMM1 ) ) + 40 CONTINUE +* + INDXQ = 4*N + 3 + IF( ICOMPQ.NE.2 ) THEN +* +* Set up workspaces for eigenvalues only/accumulate new vectors +* routine +* + TEMP = LOG( DBLE( N ) ) / LOG( TWO ) + LGN = INT( TEMP ) + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IPRMPT = INDXQ + N + 1 + IPERM = IPRMPT + N*LGN + IQPTR = IPERM + N*LGN + IGIVPT = IQPTR + N + 2 + IGIVCL = IGIVPT + N*LGN +* + IGIVNM = 1 + IQ = IGIVNM + 2*N*LGN + IWREM = IQ + N**2 + 1 +* +* Initialize pointers +* + DO 50 I = 0, SUBPBS + IWORK( IPRMPT+I ) = 1 + IWORK( IGIVPT+I ) = 1 + 50 CONTINUE + IWORK( IQPTR ) = 1 + END IF +* +* Solve each submatrix eigenproblem at the bottom of the divide and +* conquer tree. +* + CURR = 0 + DO 70 I = 0, SPM1 + IF( I.EQ.0 ) THEN + SUBMAT = 1 + MATSIZ = IWORK( 1 ) + ELSE + SUBMAT = IWORK( I ) + 1 + MATSIZ = IWORK( I+1 ) - IWORK( I ) + END IF + IF( ICOMPQ.EQ.2 ) THEN + CALL DSTEQR( 'I', MATSIZ, D( SUBMAT ), E( SUBMAT ), + $ Q( SUBMAT, SUBMAT ), LDQ, WORK, INFO ) + IF( INFO.NE.0 ) + $ GO TO 130 + ELSE + CALL DSTEQR( 'I', MATSIZ, D( SUBMAT ), E( SUBMAT ), + $ WORK( IQ-1+IWORK( IQPTR+CURR ) ), MATSIZ, WORK, + $ INFO ) + IF( INFO.NE.0 ) + $ GO TO 130 + IF( ICOMPQ.EQ.1 ) THEN + CALL DGEMM( 'N', 'N', QSIZ, MATSIZ, MATSIZ, ONE, + $ Q( 1, SUBMAT ), LDQ, WORK( IQ-1+IWORK( IQPTR+ + $ CURR ) ), MATSIZ, ZERO, QSTORE( 1, SUBMAT ), + $ LDQS ) + END IF + IWORK( IQPTR+CURR+1 ) = IWORK( IQPTR+CURR ) + MATSIZ**2 + CURR = CURR + 1 + END IF + K = 1 + DO 60 J = SUBMAT, IWORK( I+1 ) + IWORK( INDXQ+J ) = K + K = K + 1 + 60 CONTINUE + 70 CONTINUE +* +* Successively merge eigensystems of adjacent submatrices +* into eigensystem for the corresponding larger matrix. +* +* while ( SUBPBS > 1 ) +* + CURLVL = 1 + 80 CONTINUE + IF( SUBPBS.GT.1 ) THEN + SPM2 = SUBPBS - 2 + DO 90 I = 0, SPM2, 2 + IF( I.EQ.0 ) THEN + SUBMAT = 1 + MATSIZ = IWORK( 2 ) + MSD2 = IWORK( 1 ) + CURPRB = 0 + ELSE + SUBMAT = IWORK( I ) + 1 + MATSIZ = IWORK( I+2 ) - IWORK( I ) + MSD2 = MATSIZ / 2 + CURPRB = CURPRB + 1 + END IF +* +* Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2) +* into an eigensystem of size MATSIZ. +* DLAED1 is used only for the full eigensystem of a tridiagonal +* matrix. +* DLAED7 handles the cases in which eigenvalues only or eigenvalues +* and eigenvectors of a full symmetric matrix (which was reduced to +* tridiagonal form) are desired. +* + IF( ICOMPQ.EQ.2 ) THEN + CALL DLAED1( MATSIZ, D( SUBMAT ), Q( SUBMAT, SUBMAT ), + $ LDQ, IWORK( INDXQ+SUBMAT ), + $ E( SUBMAT+MSD2-1 ), MSD2, WORK, + $ IWORK( SUBPBS+1 ), INFO ) + ELSE + CALL DLAED7( ICOMPQ, MATSIZ, QSIZ, TLVLS, CURLVL, CURPRB, + $ D( SUBMAT ), QSTORE( 1, SUBMAT ), LDQS, + $ IWORK( INDXQ+SUBMAT ), E( SUBMAT+MSD2-1 ), + $ MSD2, WORK( IQ ), IWORK( IQPTR ), + $ IWORK( IPRMPT ), IWORK( IPERM ), + $ IWORK( IGIVPT ), IWORK( IGIVCL ), + $ WORK( IGIVNM ), WORK( IWREM ), + $ IWORK( SUBPBS+1 ), INFO ) + END IF + IF( INFO.NE.0 ) + $ GO TO 130 + IWORK( I / 2+1 ) = IWORK( I+2 ) + 90 CONTINUE + SUBPBS = SUBPBS / 2 + CURLVL = CURLVL + 1 + GO TO 80 + END IF +* +* end while +* +* Re-merge the eigenvalues/vectors which were deflated at the final +* merge step. +* + IF( ICOMPQ.EQ.1 ) THEN + DO 100 I = 1, N + J = IWORK( INDXQ+I ) + WORK( I ) = D( J ) + CALL DCOPY( QSIZ, QSTORE( 1, J ), 1, Q( 1, I ), 1 ) + 100 CONTINUE + CALL DCOPY( N, WORK, 1, D, 1 ) + ELSE IF( ICOMPQ.EQ.2 ) THEN + DO 110 I = 1, N + J = IWORK( INDXQ+I ) + WORK( I ) = D( J ) + CALL DCOPY( N, Q( 1, J ), 1, WORK( N*I+1 ), 1 ) + 110 CONTINUE + CALL DCOPY( N, WORK, 1, D, 1 ) + CALL DLACPY( 'A', N, N, WORK( N+1 ), N, Q, LDQ ) + ELSE + DO 120 I = 1, N + J = IWORK( INDXQ+I ) + WORK( I ) = D( J ) + 120 CONTINUE + CALL DCOPY( N, WORK, 1, D, 1 ) + END IF + GO TO 140 +* + 130 CONTINUE + INFO = SUBMAT*( N+1 ) + SUBMAT + MATSIZ - 1 +* + 140 CONTINUE + RETURN +* +* End of DLAED0 +* + END diff --git a/costa/native/external/lapack/dlaed1.f b/costa/native/external/lapack/dlaed1.f new file mode 100644 index 000000000..674ee80c5 --- /dev/null +++ b/costa/native/external/lapack/dlaed1.f @@ -0,0 +1,196 @@ + SUBROUTINE DLAED1( N, D, Q, LDQ, INDXQ, RHO, CUTPNT, WORK, IWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER CUTPNT, INFO, LDQ, N + DOUBLE PRECISION RHO +* .. +* .. Array Arguments .. + INTEGER INDXQ( * ), IWORK( * ) + DOUBLE PRECISION D( * ), Q( LDQ, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DLAED1 computes the updated eigensystem of a diagonal +* matrix after modification by a rank-one symmetric matrix. This +* routine is used only for the eigenproblem which requires all +* eigenvalues and eigenvectors of a tridiagonal matrix. DLAED7 handles +* the case in which eigenvalues only or eigenvalues and eigenvectors +* of a full symmetric matrix (which was reduced to tridiagonal form) +* are desired. +* +* T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out) +* +* where Z = Q'u, u is a vector of length N with ones in the +* CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. +* +* The eigenvectors of the original matrix are stored in Q, and the +* eigenvalues are in D. The algorithm consists of three stages: +* +* The first stage consists of deflating the size of the problem +* when there are multiple eigenvalues or if there is a zero in +* the Z vector. For each such occurence the dimension of the +* secular equation problem is reduced by one. This stage is +* performed by the routine DLAED2. +* +* The second stage consists of calculating the updated +* eigenvalues. This is done by finding the roots of the secular +* equation via the routine DLAED4 (as called by DLAED3). +* This routine also calculates the eigenvectors of the current +* problem. +* +* The final stage consists of computing the updated eigenvectors +* directly using the updated eigenvalues. The eigenvectors for +* the current problem are multiplied with the eigenvectors from +* the overall problem. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The dimension of the symmetric tridiagonal matrix. N >= 0. +* +* D (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the eigenvalues of the rank-1-perturbed matrix. +* On exit, the eigenvalues of the repaired matrix. +* +* Q (input/output) DOUBLE PRECISION array, dimension (LDQ,N) +* On entry, the eigenvectors of the rank-1-perturbed matrix. +* On exit, the eigenvectors of the repaired tridiagonal matrix. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= max(1,N). +* +* INDXQ (input/output) INTEGER array, dimension (N) +* On entry, the permutation which separately sorts the two +* subproblems in D into ascending order. +* On exit, the permutation which will reintegrate the +* subproblems back into sorted order, +* i.e. D( INDXQ( I = 1, N ) ) will be in ascending order. +* +* RHO (input) DOUBLE PRECISION +* The subdiagonal entry used to create the rank-1 modification. +* +* CUTPNT (input) INTEGER +* The location of the last eigenvalue in the leading sub-matrix. +* min(1,N) <= CUTPNT <= N/2. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (4*N + N**2) +* +* IWORK (workspace) INTEGER array, dimension (4*N) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = 1, an eigenvalue did not converge +* +* Further Details +* =============== +* +* Based on contributions by +* Jeff Rutter, Computer Science Division, University of California +* at Berkeley, USA +* Modified by Francoise Tisseur, University of Tennessee. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER COLTYP, I, IDLMDA, INDX, INDXC, INDXP, IQ2, IS, + $ IW, IZ, K, N1, N2, ZPP1 +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLAED2, DLAED3, DLAMRG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( MIN( 1, N / 2 ).GT.CUTPNT .OR. ( N / 2 ).LT.CUTPNT ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLAED1', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* The following values are integer pointers which indicate +* the portion of the workspace +* used by a particular array in DLAED2 and DLAED3. +* + IZ = 1 + IDLMDA = IZ + N + IW = IDLMDA + N + IQ2 = IW + N +* + INDX = 1 + INDXC = INDX + N + COLTYP = INDXC + N + INDXP = COLTYP + N +* +* +* Form the z-vector which consists of the last row of Q_1 and the +* first row of Q_2. +* + CALL DCOPY( CUTPNT, Q( CUTPNT, 1 ), LDQ, WORK( IZ ), 1 ) + ZPP1 = CUTPNT + 1 + CALL DCOPY( N-CUTPNT, Q( ZPP1, ZPP1 ), LDQ, WORK( IZ+CUTPNT ), 1 ) +* +* Deflate eigenvalues. +* + CALL DLAED2( K, N, CUTPNT, D, Q, LDQ, INDXQ, RHO, WORK( IZ ), + $ WORK( IDLMDA ), WORK( IW ), WORK( IQ2 ), + $ IWORK( INDX ), IWORK( INDXC ), IWORK( INDXP ), + $ IWORK( COLTYP ), INFO ) +* + IF( INFO.NE.0 ) + $ GO TO 20 +* +* Solve Secular Equation. +* + IF( K.NE.0 ) THEN + IS = ( IWORK( COLTYP )+IWORK( COLTYP+1 ) )*CUTPNT + + $ ( IWORK( COLTYP+1 )+IWORK( COLTYP+2 ) )*( N-CUTPNT ) + IQ2 + CALL DLAED3( K, N, CUTPNT, D, Q, LDQ, RHO, WORK( IDLMDA ), + $ WORK( IQ2 ), IWORK( INDXC ), IWORK( COLTYP ), + $ WORK( IW ), WORK( IS ), INFO ) + IF( INFO.NE.0 ) + $ GO TO 20 +* +* Prepare the INDXQ sorting permutation. +* + N1 = K + N2 = N - K + CALL DLAMRG( N1, N2, D, 1, -1, INDXQ ) + ELSE + DO 10 I = 1, N + INDXQ( I ) = I + 10 CONTINUE + END IF +* + 20 CONTINUE + RETURN +* +* End of DLAED1 +* + END diff --git a/costa/native/external/lapack/dlaed2.f b/costa/native/external/lapack/dlaed2.f new file mode 100644 index 000000000..17e13505e --- /dev/null +++ b/costa/native/external/lapack/dlaed2.f @@ -0,0 +1,435 @@ + SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W, + $ Q2, INDX, INDXC, INDXP, COLTYP, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDQ, N, N1 + DOUBLE PRECISION RHO +* .. +* .. Array Arguments .. + INTEGER COLTYP( * ), INDX( * ), INDXC( * ), INDXP( * ), + $ INDXQ( * ) + DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ), + $ W( * ), Z( * ) +* .. +* +* Purpose +* ======= +* +* DLAED2 merges the two sets of eigenvalues together into a single +* sorted set. Then it tries to deflate the size of the problem. +* There are two ways in which deflation can occur: when two or more +* eigenvalues are close together or if there is a tiny entry in the +* Z vector. For each such occurrence the order of the related secular +* equation problem is reduced by one. +* +* Arguments +* ========= +* +* K (output) INTEGER +* The number of non-deflated eigenvalues, and the order of the +* related secular equation. 0 <= K <=N. +* +* N (input) INTEGER +* The dimension of the symmetric tridiagonal matrix. N >= 0. +* +* N1 (input) INTEGER +* The location of the last eigenvalue in the leading sub-matrix. +* min(1,N) <= N1 <= N/2. +* +* D (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, D contains the eigenvalues of the two submatrices to +* be combined. +* On exit, D contains the trailing (N-K) updated eigenvalues +* (those which were deflated) sorted into increasing order. +* +* Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N) +* On entry, Q contains the eigenvectors of two submatrices in +* the two square blocks with corners at (1,1), (N1,N1) +* and (N1+1, N1+1), (N,N). +* On exit, Q contains the trailing (N-K) updated eigenvectors +* (those which were deflated) in its last N-K columns. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= max(1,N). +* +* INDXQ (input/output) INTEGER array, dimension (N) +* The permutation which separately sorts the two sub-problems +* in D into ascending order. Note that elements in the second +* half of this permutation must first have N1 added to their +* values. Destroyed on exit. +* +* RHO (input/output) DOUBLE PRECISION +* On entry, the off-diagonal element associated with the rank-1 +* cut which originally split the two submatrices which are now +* being recombined. +* On exit, RHO has been modified to the value required by +* DLAED3. +* +* Z (input) DOUBLE PRECISION array, dimension (N) +* On entry, Z contains the updating vector (the last +* row of the first sub-eigenvector matrix and the first row of +* the second sub-eigenvector matrix). +* On exit, the contents of Z have been destroyed by the updating +* process. +* +* DLAMDA (output) DOUBLE PRECISION array, dimension (N) +* A copy of the first K eigenvalues which will be used by +* DLAED3 to form the secular equation. +* +* W (output) DOUBLE PRECISION array, dimension (N) +* The first k values of the final deflation-altered z-vector +* which will be passed to DLAED3. +* +* Q2 (output) DOUBLE PRECISION array, dimension (N1**2+(N-N1)**2) +* A copy of the first K eigenvectors which will be used by +* DLAED3 in a matrix multiply (DGEMM) to solve for the new +* eigenvectors. +* +* INDX (workspace) INTEGER array, dimension (N) +* The permutation used to sort the contents of DLAMDA into +* ascending order. +* +* INDXC (output) INTEGER array, dimension (N) +* The permutation used to arrange the columns of the deflated +* Q matrix into three groups: the first group contains non-zero +* elements only at and above N1, the second contains +* non-zero elements only below N1, and the third is dense. +* +* INDXP (workspace) INTEGER array, dimension (N) +* The permutation used to place deflated values of D at the end +* of the array. INDXP(1:K) points to the nondeflated D-values +* and INDXP(K+1:N) points to the deflated eigenvalues. +* +* COLTYP (workspace/output) INTEGER array, dimension (N) +* During execution, a label which will indicate which of the +* following types a column in the Q2 matrix is: +* 1 : non-zero in the upper half only; +* 2 : dense; +* 3 : non-zero in the lower half only; +* 4 : deflated. +* On exit, COLTYP(i) is the number of columns of type i, +* for i=1 to 4 only. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* Based on contributions by +* Jeff Rutter, Computer Science Division, University of California +* at Berkeley, USA +* Modified by Francoise Tisseur, University of Tennessee. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION MONE, ZERO, ONE, TWO, EIGHT + PARAMETER ( MONE = -1.0D0, ZERO = 0.0D0, ONE = 1.0D0, + $ TWO = 2.0D0, EIGHT = 8.0D0 ) +* .. +* .. Local Arrays .. + INTEGER CTOT( 4 ), PSM( 4 ) +* .. +* .. Local Scalars .. + INTEGER CT, I, IMAX, IQ1, IQ2, J, JMAX, JS, K2, N1P1, + $ N2, NJ, PJ + DOUBLE PRECISION C, EPS, S, T, TAU, TOL +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH, DLAPY2 + EXTERNAL IDAMAX, DLAMCH, DLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLACPY, DLAMRG, DROT, DSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( MIN( 1, ( N / 2 ) ).GT.N1 .OR. ( N / 2 ).LT.N1 ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLAED2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + N2 = N - N1 + N1P1 = N1 + 1 +* + IF( RHO.LT.ZERO ) THEN + CALL DSCAL( N2, MONE, Z( N1P1 ), 1 ) + END IF +* +* Normalize z so that norm(z) = 1. Since z is the concatenation of +* two normalized vectors, norm2(z) = sqrt(2). +* + T = ONE / SQRT( TWO ) + CALL DSCAL( N, T, Z, 1 ) +* +* RHO = ABS( norm(z)**2 * RHO ) +* + RHO = ABS( TWO*RHO ) +* +* Sort the eigenvalues into increasing order +* + DO 10 I = N1P1, N + INDXQ( I ) = INDXQ( I ) + N1 + 10 CONTINUE +* +* re-integrate the deflated parts from the last pass +* + DO 20 I = 1, N + DLAMDA( I ) = D( INDXQ( I ) ) + 20 CONTINUE + CALL DLAMRG( N1, N2, DLAMDA, 1, 1, INDXC ) + DO 30 I = 1, N + INDX( I ) = INDXQ( INDXC( I ) ) + 30 CONTINUE +* +* Calculate the allowable deflation tolerance +* + IMAX = IDAMAX( N, Z, 1 ) + JMAX = IDAMAX( N, D, 1 ) + EPS = DLAMCH( 'Epsilon' ) + TOL = EIGHT*EPS*MAX( ABS( D( JMAX ) ), ABS( Z( IMAX ) ) ) +* +* If the rank-1 modifier is small enough, no more needs to be done +* except to reorganize Q so that its columns correspond with the +* elements in D. +* + IF( RHO*ABS( Z( IMAX ) ).LE.TOL ) THEN + K = 0 + IQ2 = 1 + DO 40 J = 1, N + I = INDX( J ) + CALL DCOPY( N, Q( 1, I ), 1, Q2( IQ2 ), 1 ) + DLAMDA( J ) = D( I ) + IQ2 = IQ2 + N + 40 CONTINUE + CALL DLACPY( 'A', N, N, Q2, N, Q, LDQ ) + CALL DCOPY( N, DLAMDA, 1, D, 1 ) + GO TO 190 + END IF +* +* If there are multiple eigenvalues then the problem deflates. Here +* the number of equal eigenvalues are found. As each equal +* eigenvalue is found, an elementary reflector is computed to rotate +* the corresponding eigensubspace so that the corresponding +* components of Z are zero in this new basis. +* + DO 50 I = 1, N1 + COLTYP( I ) = 1 + 50 CONTINUE + DO 60 I = N1P1, N + COLTYP( I ) = 3 + 60 CONTINUE +* +* + K = 0 + K2 = N + 1 + DO 70 J = 1, N + NJ = INDX( J ) + IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + COLTYP( NJ ) = 4 + INDXP( K2 ) = NJ + IF( J.EQ.N ) + $ GO TO 100 + ELSE + PJ = NJ + GO TO 80 + END IF + 70 CONTINUE + 80 CONTINUE + J = J + 1 + NJ = INDX( J ) + IF( J.GT.N ) + $ GO TO 100 + IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + COLTYP( NJ ) = 4 + INDXP( K2 ) = NJ + ELSE +* +* Check if eigenvalues are close enough to allow deflation. +* + S = Z( PJ ) + C = Z( NJ ) +* +* Find sqrt(a**2+b**2) without overflow or +* destructive underflow. +* + TAU = DLAPY2( C, S ) + T = D( NJ ) - D( PJ ) + C = C / TAU + S = -S / TAU + IF( ABS( T*C*S ).LE.TOL ) THEN +* +* Deflation is possible. +* + Z( NJ ) = TAU + Z( PJ ) = ZERO + IF( COLTYP( NJ ).NE.COLTYP( PJ ) ) + $ COLTYP( NJ ) = 2 + COLTYP( PJ ) = 4 + CALL DROT( N, Q( 1, PJ ), 1, Q( 1, NJ ), 1, C, S ) + T = D( PJ )*C**2 + D( NJ )*S**2 + D( NJ ) = D( PJ )*S**2 + D( NJ )*C**2 + D( PJ ) = T + K2 = K2 - 1 + I = 1 + 90 CONTINUE + IF( K2+I.LE.N ) THEN + IF( D( PJ ).LT.D( INDXP( K2+I ) ) ) THEN + INDXP( K2+I-1 ) = INDXP( K2+I ) + INDXP( K2+I ) = PJ + I = I + 1 + GO TO 90 + ELSE + INDXP( K2+I-1 ) = PJ + END IF + ELSE + INDXP( K2+I-1 ) = PJ + END IF + PJ = NJ + ELSE + K = K + 1 + DLAMDA( K ) = D( PJ ) + W( K ) = Z( PJ ) + INDXP( K ) = PJ + PJ = NJ + END IF + END IF + GO TO 80 + 100 CONTINUE +* +* Record the last eigenvalue. +* + K = K + 1 + DLAMDA( K ) = D( PJ ) + W( K ) = Z( PJ ) + INDXP( K ) = PJ +* +* Count up the total number of the various types of columns, then +* form a permutation which positions the four column types into +* four uniform groups (although one or more of these groups may be +* empty). +* + DO 110 J = 1, 4 + CTOT( J ) = 0 + 110 CONTINUE + DO 120 J = 1, N + CT = COLTYP( J ) + CTOT( CT ) = CTOT( CT ) + 1 + 120 CONTINUE +* +* PSM(*) = Position in SubMatrix (of types 1 through 4) +* + PSM( 1 ) = 1 + PSM( 2 ) = 1 + CTOT( 1 ) + PSM( 3 ) = PSM( 2 ) + CTOT( 2 ) + PSM( 4 ) = PSM( 3 ) + CTOT( 3 ) + K = N - CTOT( 4 ) +* +* Fill out the INDXC array so that the permutation which it induces +* will place all type-1 columns first, all type-2 columns next, +* then all type-3's, and finally all type-4's. +* + DO 130 J = 1, N + JS = INDXP( J ) + CT = COLTYP( JS ) + INDX( PSM( CT ) ) = JS + INDXC( PSM( CT ) ) = J + PSM( CT ) = PSM( CT ) + 1 + 130 CONTINUE +* +* Sort the eigenvalues and corresponding eigenvectors into DLAMDA +* and Q2 respectively. The eigenvalues/vectors which were not +* deflated go into the first K slots of DLAMDA and Q2 respectively, +* while those which were deflated go into the last N - K slots. +* + I = 1 + IQ1 = 1 + IQ2 = 1 + ( CTOT( 1 )+CTOT( 2 ) )*N1 + DO 140 J = 1, CTOT( 1 ) + JS = INDX( I ) + CALL DCOPY( N1, Q( 1, JS ), 1, Q2( IQ1 ), 1 ) + Z( I ) = D( JS ) + I = I + 1 + IQ1 = IQ1 + N1 + 140 CONTINUE +* + DO 150 J = 1, CTOT( 2 ) + JS = INDX( I ) + CALL DCOPY( N1, Q( 1, JS ), 1, Q2( IQ1 ), 1 ) + CALL DCOPY( N2, Q( N1+1, JS ), 1, Q2( IQ2 ), 1 ) + Z( I ) = D( JS ) + I = I + 1 + IQ1 = IQ1 + N1 + IQ2 = IQ2 + N2 + 150 CONTINUE +* + DO 160 J = 1, CTOT( 3 ) + JS = INDX( I ) + CALL DCOPY( N2, Q( N1+1, JS ), 1, Q2( IQ2 ), 1 ) + Z( I ) = D( JS ) + I = I + 1 + IQ2 = IQ2 + N2 + 160 CONTINUE +* + IQ1 = IQ2 + DO 170 J = 1, CTOT( 4 ) + JS = INDX( I ) + CALL DCOPY( N, Q( 1, JS ), 1, Q2( IQ2 ), 1 ) + IQ2 = IQ2 + N + Z( I ) = D( JS ) + I = I + 1 + 170 CONTINUE +* +* The deflated eigenvalues and their corresponding vectors go back +* into the last N - K slots of D and Q respectively. +* + CALL DLACPY( 'A', N, CTOT( 4 ), Q2( IQ1 ), N, Q( 1, K+1 ), LDQ ) + CALL DCOPY( N-K, Z( K+1 ), 1, D( K+1 ), 1 ) +* +* Copy CTOT into COLTYP for referencing in DLAED3. +* + DO 180 J = 1, 4 + COLTYP( J ) = CTOT( J ) + 180 CONTINUE +* + 190 CONTINUE + RETURN +* +* End of DLAED2 +* + END diff --git a/costa/native/external/lapack/dlaed3.f b/costa/native/external/lapack/dlaed3.f new file mode 100644 index 000000000..5f14ab958 --- /dev/null +++ b/costa/native/external/lapack/dlaed3.f @@ -0,0 +1,265 @@ + SUBROUTINE DLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMDA, Q2, INDX, + $ CTOT, W, S, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, +* Courant Institute, NAG Ltd., and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDQ, N, N1 + DOUBLE PRECISION RHO +* .. +* .. Array Arguments .. + INTEGER CTOT( * ), INDX( * ) + DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ), + $ S( * ), W( * ) +* .. +* +* Purpose +* ======= +* +* DLAED3 finds the roots of the secular equation, as defined by the +* values in D, W, and RHO, between 1 and K. It makes the +* appropriate calls to DLAED4 and then updates the eigenvectors by +* multiplying the matrix of eigenvectors of the pair of eigensystems +* being combined by the matrix of eigenvectors of the K-by-K system +* which is solved here. +* +* This code makes very mild assumptions about floating point +* arithmetic. It will work on machines with a guard digit in +* add/subtract, or on those binary machines without guard digits +* which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. +* It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* K (input) INTEGER +* The number of terms in the rational function to be solved by +* DLAED4. K >= 0. +* +* N (input) INTEGER +* The number of rows and columns in the Q matrix. +* N >= K (deflation may result in N>K). +* +* N1 (input) INTEGER +* The location of the last eigenvalue in the leading submatrix. +* min(1,N) <= N1 <= N/2. +* +* D (output) DOUBLE PRECISION array, dimension (N) +* D(I) contains the updated eigenvalues for +* 1 <= I <= K. +* +* Q (output) DOUBLE PRECISION array, dimension (LDQ,N) +* Initially the first K columns are used as workspace. +* On output the columns 1 to K contain +* the updated eigenvectors. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= max(1,N). +* +* RHO (input) DOUBLE PRECISION +* The value of the parameter in the rank one update equation. +* RHO >= 0 required. +* +* DLAMDA (input/output) DOUBLE PRECISION array, dimension (K) +* The first K elements of this array contain the old roots +* of the deflated updating problem. These are the poles +* of the secular equation. May be changed on output by +* having lowest order bit set to zero on Cray X-MP, Cray Y-MP, +* Cray-2, or Cray C-90, as described above. +* +* Q2 (input) DOUBLE PRECISION array, dimension (LDQ2, N) +* The first K columns of this matrix contain the non-deflated +* eigenvectors for the split problem. +* +* INDX (input) INTEGER array, dimension (N) +* The permutation used to arrange the columns of the deflated +* Q matrix into three groups (see DLAED2). +* The rows of the eigenvectors found by DLAED4 must be likewise +* permuted before the matrix multiply can take place. +* +* CTOT (input) INTEGER array, dimension (4) +* A count of the total number of the various types of columns +* in Q, as described in INDX. The fourth column type is any +* column which has been deflated. +* +* W (input/output) DOUBLE PRECISION array, dimension (K) +* The first K elements of this array contain the components +* of the deflation-adjusted updating vector. Destroyed on +* output. +* +* S (workspace) DOUBLE PRECISION array, dimension (N1 + 1)*K +* Will contain the eigenvectors of the repaired matrix which +* will be multiplied by the previously accumulated eigenvectors +* to update the system. +* +* LDS (input) INTEGER +* The leading dimension of S. LDS >= max(1,K). +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = 1, an eigenvalue did not converge +* +* Further Details +* =============== +* +* Based on contributions by +* Jeff Rutter, Computer Science Division, University of California +* at Berkeley, USA +* Modified by Francoise Tisseur, University of Tennessee. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D0, ZERO = 0.0D0 ) +* .. +* .. Local Scalars .. + INTEGER I, II, IQ2, J, N12, N2, N23 + DOUBLE PRECISION TEMP +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMC3, DNRM2 + EXTERNAL DLAMC3, DNRM2 +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEMM, DLACPY, DLAED4, DLASET, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( K.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.K ) THEN + INFO = -2 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLAED3', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( K.EQ.0 ) + $ RETURN +* +* Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can +* be computed with high relative accuracy (barring over/underflow). +* This is a problem on machines without a guard digit in +* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). +* The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), +* which on any of these machines zeros out the bottommost +* bit of DLAMDA(I) if it is 1; this makes the subsequent +* subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation +* occurs. On binary machines with a guard digit (almost all +* machines) it does not change DLAMDA(I) at all. On hexadecimal +* and decimal machines with a guard digit, it slightly +* changes the bottommost bits of DLAMDA(I). It does not account +* for hexadecimal or decimal machines without guard digits +* (we know of none). We use a subroutine call to compute +* 2*DLAMBDA(I) to prevent optimizing compilers from eliminating +* this code. +* + DO 10 I = 1, K + DLAMDA( I ) = DLAMC3( DLAMDA( I ), DLAMDA( I ) ) - DLAMDA( I ) + 10 CONTINUE +* + DO 20 J = 1, K + CALL DLAED4( K, J, DLAMDA, W, Q( 1, J ), RHO, D( J ), INFO ) +* +* If the zero finder fails, the computation is terminated. +* + IF( INFO.NE.0 ) + $ GO TO 120 + 20 CONTINUE +* + IF( K.EQ.1 ) + $ GO TO 110 + IF( K.EQ.2 ) THEN + DO 30 J = 1, K + W( 1 ) = Q( 1, J ) + W( 2 ) = Q( 2, J ) + II = INDX( 1 ) + Q( 1, J ) = W( II ) + II = INDX( 2 ) + Q( 2, J ) = W( II ) + 30 CONTINUE + GO TO 110 + END IF +* +* Compute updated W. +* + CALL DCOPY( K, W, 1, S, 1 ) +* +* Initialize W(I) = Q(I,I) +* + CALL DCOPY( K, Q, LDQ+1, W, 1 ) + DO 60 J = 1, K + DO 40 I = 1, J - 1 + W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) ) + 40 CONTINUE + DO 50 I = J + 1, K + W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) ) + 50 CONTINUE + 60 CONTINUE + DO 70 I = 1, K + W( I ) = SIGN( SQRT( -W( I ) ), S( I ) ) + 70 CONTINUE +* +* Compute eigenvectors of the modified rank-1 modification. +* + DO 100 J = 1, K + DO 80 I = 1, K + S( I ) = W( I ) / Q( I, J ) + 80 CONTINUE + TEMP = DNRM2( K, S, 1 ) + DO 90 I = 1, K + II = INDX( I ) + Q( I, J ) = S( II ) / TEMP + 90 CONTINUE + 100 CONTINUE +* +* Compute the updated eigenvectors. +* + 110 CONTINUE +* + N2 = N - N1 + N12 = CTOT( 1 ) + CTOT( 2 ) + N23 = CTOT( 2 ) + CTOT( 3 ) +* + CALL DLACPY( 'A', N23, K, Q( CTOT( 1 )+1, 1 ), LDQ, S, N23 ) + IQ2 = N1*N12 + 1 + IF( N23.NE.0 ) THEN + CALL DGEMM( 'N', 'N', N2, K, N23, ONE, Q2( IQ2 ), N2, S, N23, + $ ZERO, Q( N1+1, 1 ), LDQ ) + ELSE + CALL DLASET( 'A', N2, K, ZERO, ZERO, Q( N1+1, 1 ), LDQ ) + END IF +* + CALL DLACPY( 'A', N12, K, Q, LDQ, S, N12 ) + IF( N12.NE.0 ) THEN + CALL DGEMM( 'N', 'N', N1, K, N12, ONE, Q2, N1, S, N12, ZERO, Q, + $ LDQ ) + ELSE + CALL DLASET( 'A', N1, K, ZERO, ZERO, Q( 1, 1 ), LDQ ) + END IF +* +* + 120 CONTINUE + RETURN +* +* End of DLAED3 +* + END diff --git a/costa/native/external/lapack/dlaed4.f b/costa/native/external/lapack/dlaed4.f new file mode 100644 index 000000000..b82cb1146 --- /dev/null +++ b/costa/native/external/lapack/dlaed4.f @@ -0,0 +1,846 @@ + SUBROUTINE DLAED4( N, I, D, Z, DELTA, RHO, DLAM, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, +* Courant Institute, NAG Ltd., and Rice University +* December 23, 1999 +* +* .. Scalar Arguments .. + INTEGER I, INFO, N + DOUBLE PRECISION DLAM, RHO +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), DELTA( * ), Z( * ) +* .. +* +* Purpose +* ======= +* +* This subroutine computes the I-th updated eigenvalue of a symmetric +* rank-one modification to a diagonal matrix whose elements are +* given in the array d, and that +* +* D(i) < D(j) for i < j +* +* and that RHO > 0. This is arranged by the calling routine, and is +* no loss in generality. The rank-one modified system is thus +* +* diag( D ) + RHO * Z * Z_transpose. +* +* where we assume the Euclidean norm of Z is 1. +* +* The method consists of approximating the rational functions in the +* secular equation by simpler interpolating rational functions. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The length of all arrays. +* +* I (input) INTEGER +* The index of the eigenvalue to be computed. 1 <= I <= N. +* +* D (input) DOUBLE PRECISION array, dimension (N) +* The original eigenvalues. It is assumed that they are in +* order, D(I) < D(J) for I < J. +* +* Z (input) DOUBLE PRECISION array, dimension (N) +* The components of the updating vector. +* +* DELTA (output) DOUBLE PRECISION array, dimension (N) +* If N .ne. 1, DELTA contains (D(j) - lambda_I) in its j-th +* component. If N = 1, then DELTA(1) = 1. The vector DELTA +* contains the information necessary to construct the +* eigenvectors. +* +* RHO (input) DOUBLE PRECISION +* The scalar in the symmetric updating formula. +* +* DLAM (output) DOUBLE PRECISION +* The computed lambda_I, the I-th updated eigenvalue. +* +* INFO (output) INTEGER +* = 0: successful exit +* > 0: if INFO = 1, the updating process failed. +* +* Internal Parameters +* =================== +* +* Logical variable ORGATI (origin-at-i?) is used for distinguishing +* whether D(i) or D(i+1) is treated as the origin. +* +* ORGATI = .true. origin at i +* ORGATI = .false. origin at i+1 +* +* Logical variable SWTCH3 (switch-for-3-poles?) is for noting +* if we are working with THREE poles! +* +* MAXIT is the maximum number of iterations allowed for each +* eigenvalue. +* +* Further Details +* =============== +* +* Based on contributions by +* Ren-Cang Li, Computer Science Division, University of California +* at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + INTEGER MAXIT + PARAMETER ( MAXIT = 30 ) + DOUBLE PRECISION ZERO, ONE, TWO, THREE, FOUR, EIGHT, TEN + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0, + $ THREE = 3.0D0, FOUR = 4.0D0, EIGHT = 8.0D0, + $ TEN = 10.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL ORGATI, SWTCH, SWTCH3 + INTEGER II, IIM1, IIP1, IP1, ITER, J, NITER + DOUBLE PRECISION A, B, C, DEL, DLTLB, DLTUB, DPHI, DPSI, DW, + $ EPS, ERRETM, ETA, MIDPT, PHI, PREW, PSI, + $ RHOINV, TAU, TEMP, TEMP1, W +* .. +* .. Local Arrays .. + DOUBLE PRECISION ZZ( 3 ) +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DLAED5, DLAED6 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Since this routine is called in an inner loop, we do no argument +* checking. +* +* Quick return for N=1 and 2. +* + INFO = 0 + IF( N.EQ.1 ) THEN +* +* Presumably, I=1 upon entry +* + DLAM = D( 1 ) + RHO*Z( 1 )*Z( 1 ) + DELTA( 1 ) = ONE + RETURN + END IF + IF( N.EQ.2 ) THEN + CALL DLAED5( I, D, Z, DELTA, RHO, DLAM ) + RETURN + END IF +* +* Compute machine epsilon +* + EPS = DLAMCH( 'Epsilon' ) + RHOINV = ONE / RHO +* +* The case I = N +* + IF( I.EQ.N ) THEN +* +* Initialize some basic variables +* + II = N - 1 + NITER = 1 +* +* Calculate initial guess +* + MIDPT = RHO / TWO +* +* If ||Z||_2 is not one, then TEMP should be set to +* RHO * ||Z||_2^2 / TWO +* + DO 10 J = 1, N + DELTA( J ) = ( D( J )-D( I ) ) - MIDPT + 10 CONTINUE +* + PSI = ZERO + DO 20 J = 1, N - 2 + PSI = PSI + Z( J )*Z( J ) / DELTA( J ) + 20 CONTINUE +* + C = RHOINV + PSI + W = C + Z( II )*Z( II ) / DELTA( II ) + + $ Z( N )*Z( N ) / DELTA( N ) +* + IF( W.LE.ZERO ) THEN + TEMP = Z( N-1 )*Z( N-1 ) / ( D( N )-D( N-1 )+RHO ) + + $ Z( N )*Z( N ) / RHO + IF( C.LE.TEMP ) THEN + TAU = RHO + ELSE + DEL = D( N ) - D( N-1 ) + A = -C*DEL + Z( N-1 )*Z( N-1 ) + Z( N )*Z( N ) + B = Z( N )*Z( N )*DEL + IF( A.LT.ZERO ) THEN + TAU = TWO*B / ( SQRT( A*A+FOUR*B*C )-A ) + ELSE + TAU = ( A+SQRT( A*A+FOUR*B*C ) ) / ( TWO*C ) + END IF + END IF +* +* It can be proved that +* D(N)+RHO/2 <= LAMBDA(N) < D(N)+TAU <= D(N)+RHO +* + DLTLB = MIDPT + DLTUB = RHO + ELSE + DEL = D( N ) - D( N-1 ) + A = -C*DEL + Z( N-1 )*Z( N-1 ) + Z( N )*Z( N ) + B = Z( N )*Z( N )*DEL + IF( A.LT.ZERO ) THEN + TAU = TWO*B / ( SQRT( A*A+FOUR*B*C )-A ) + ELSE + TAU = ( A+SQRT( A*A+FOUR*B*C ) ) / ( TWO*C ) + END IF +* +* It can be proved that +* D(N) < D(N)+TAU < LAMBDA(N) < D(N)+RHO/2 +* + DLTLB = ZERO + DLTUB = MIDPT + END IF +* + DO 30 J = 1, N + DELTA( J ) = ( D( J )-D( I ) ) - TAU + 30 CONTINUE +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 40 J = 1, II + TEMP = Z( J ) / DELTA( J ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 40 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + TEMP = Z( N ) / DELTA( N ) + PHI = Z( N )*TEMP + DPHI = TEMP*TEMP + ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV + + $ ABS( TAU )*( DPSI+DPHI ) +* + W = RHOINV + PHI + PSI +* +* Test for convergence +* + IF( ABS( W ).LE.EPS*ERRETM ) THEN + DLAM = D( I ) + TAU + GO TO 250 + END IF +* + IF( W.LE.ZERO ) THEN + DLTLB = MAX( DLTLB, TAU ) + ELSE + DLTUB = MIN( DLTUB, TAU ) + END IF +* +* Calculate the new step +* + NITER = NITER + 1 + C = W - DELTA( N-1 )*DPSI - DELTA( N )*DPHI + A = ( DELTA( N-1 )+DELTA( N ) )*W - + $ DELTA( N-1 )*DELTA( N )*( DPSI+DPHI ) + B = DELTA( N-1 )*DELTA( N )*W + IF( C.LT.ZERO ) + $ C = ABS( C ) + IF( C.EQ.ZERO ) THEN +* ETA = B/A +* ETA = RHO - TAU + ETA = DLTUB - TAU + ELSE IF( A.GE.ZERO ) THEN + ETA = ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF +* +* Note, eta should be positive if w is negative, and +* eta should be negative otherwise. However, +* if for some reason caused by roundoff, eta*w > 0, +* we simply use one Newton step instead. This way +* will guarantee eta*w < 0. +* + IF( W*ETA.GT.ZERO ) + $ ETA = -W / ( DPSI+DPHI ) + TEMP = TAU + ETA + IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN + IF( W.LT.ZERO ) THEN + ETA = ( DLTUB-TAU ) / TWO + ELSE + ETA = ( DLTLB-TAU ) / TWO + END IF + END IF + DO 50 J = 1, N + DELTA( J ) = DELTA( J ) - ETA + 50 CONTINUE +* + TAU = TAU + ETA +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 60 J = 1, II + TEMP = Z( J ) / DELTA( J ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 60 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + TEMP = Z( N ) / DELTA( N ) + PHI = Z( N )*TEMP + DPHI = TEMP*TEMP + ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV + + $ ABS( TAU )*( DPSI+DPHI ) +* + W = RHOINV + PHI + PSI +* +* Main loop to update the values of the array DELTA +* + ITER = NITER + 1 +* + DO 90 NITER = ITER, MAXIT +* +* Test for convergence +* + IF( ABS( W ).LE.EPS*ERRETM ) THEN + DLAM = D( I ) + TAU + GO TO 250 + END IF +* + IF( W.LE.ZERO ) THEN + DLTLB = MAX( DLTLB, TAU ) + ELSE + DLTUB = MIN( DLTUB, TAU ) + END IF +* +* Calculate the new step +* + C = W - DELTA( N-1 )*DPSI - DELTA( N )*DPHI + A = ( DELTA( N-1 )+DELTA( N ) )*W - + $ DELTA( N-1 )*DELTA( N )*( DPSI+DPHI ) + B = DELTA( N-1 )*DELTA( N )*W + IF( A.GE.ZERO ) THEN + ETA = ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF +* +* Note, eta should be positive if w is negative, and +* eta should be negative otherwise. However, +* if for some reason caused by roundoff, eta*w > 0, +* we simply use one Newton step instead. This way +* will guarantee eta*w < 0. +* + IF( W*ETA.GT.ZERO ) + $ ETA = -W / ( DPSI+DPHI ) + TEMP = TAU + ETA + IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN + IF( W.LT.ZERO ) THEN + ETA = ( DLTUB-TAU ) / TWO + ELSE + ETA = ( DLTLB-TAU ) / TWO + END IF + END IF + DO 70 J = 1, N + DELTA( J ) = DELTA( J ) - ETA + 70 CONTINUE +* + TAU = TAU + ETA +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 80 J = 1, II + TEMP = Z( J ) / DELTA( J ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 80 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + TEMP = Z( N ) / DELTA( N ) + PHI = Z( N )*TEMP + DPHI = TEMP*TEMP + ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV + + $ ABS( TAU )*( DPSI+DPHI ) +* + W = RHOINV + PHI + PSI + 90 CONTINUE +* +* Return with INFO = 1, NITER = MAXIT and not converged +* + INFO = 1 + DLAM = D( I ) + TAU + GO TO 250 +* +* End for the case I = N +* + ELSE +* +* The case for I < N +* + NITER = 1 + IP1 = I + 1 +* +* Calculate initial guess +* + DEL = D( IP1 ) - D( I ) + MIDPT = DEL / TWO + DO 100 J = 1, N + DELTA( J ) = ( D( J )-D( I ) ) - MIDPT + 100 CONTINUE +* + PSI = ZERO + DO 110 J = 1, I - 1 + PSI = PSI + Z( J )*Z( J ) / DELTA( J ) + 110 CONTINUE +* + PHI = ZERO + DO 120 J = N, I + 2, -1 + PHI = PHI + Z( J )*Z( J ) / DELTA( J ) + 120 CONTINUE + C = RHOINV + PSI + PHI + W = C + Z( I )*Z( I ) / DELTA( I ) + + $ Z( IP1 )*Z( IP1 ) / DELTA( IP1 ) +* + IF( W.GT.ZERO ) THEN +* +* d(i)< the ith eigenvalue < (d(i)+d(i+1))/2 +* +* We choose d(i) as origin. +* + ORGATI = .TRUE. + A = C*DEL + Z( I )*Z( I ) + Z( IP1 )*Z( IP1 ) + B = Z( I )*Z( I )*DEL + IF( A.GT.ZERO ) THEN + TAU = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + ELSE + TAU = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + END IF + DLTLB = ZERO + DLTUB = MIDPT + ELSE +* +* (d(i)+d(i+1))/2 <= the ith eigenvalue < d(i+1) +* +* We choose d(i+1) as origin. +* + ORGATI = .FALSE. + A = C*DEL - Z( I )*Z( I ) - Z( IP1 )*Z( IP1 ) + B = Z( IP1 )*Z( IP1 )*DEL + IF( A.LT.ZERO ) THEN + TAU = TWO*B / ( A-SQRT( ABS( A*A+FOUR*B*C ) ) ) + ELSE + TAU = -( A+SQRT( ABS( A*A+FOUR*B*C ) ) ) / ( TWO*C ) + END IF + DLTLB = -MIDPT + DLTUB = ZERO + END IF +* + IF( ORGATI ) THEN + DO 130 J = 1, N + DELTA( J ) = ( D( J )-D( I ) ) - TAU + 130 CONTINUE + ELSE + DO 140 J = 1, N + DELTA( J ) = ( D( J )-D( IP1 ) ) - TAU + 140 CONTINUE + END IF + IF( ORGATI ) THEN + II = I + ELSE + II = I + 1 + END IF + IIM1 = II - 1 + IIP1 = II + 1 +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 150 J = 1, IIM1 + TEMP = Z( J ) / DELTA( J ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 150 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + DPHI = ZERO + PHI = ZERO + DO 160 J = N, IIP1, -1 + TEMP = Z( J ) / DELTA( J ) + PHI = PHI + Z( J )*TEMP + DPHI = DPHI + TEMP*TEMP + ERRETM = ERRETM + PHI + 160 CONTINUE +* + W = RHOINV + PHI + PSI +* +* W is the value of the secular function with +* its ii-th element removed. +* + SWTCH3 = .FALSE. + IF( ORGATI ) THEN + IF( W.LT.ZERO ) + $ SWTCH3 = .TRUE. + ELSE + IF( W.GT.ZERO ) + $ SWTCH3 = .TRUE. + END IF + IF( II.EQ.1 .OR. II.EQ.N ) + $ SWTCH3 = .FALSE. +* + TEMP = Z( II ) / DELTA( II ) + DW = DPSI + DPHI + TEMP*TEMP + TEMP = Z( II )*TEMP + W = W + TEMP + ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV + + $ THREE*ABS( TEMP ) + ABS( TAU )*DW +* +* Test for convergence +* + IF( ABS( W ).LE.EPS*ERRETM ) THEN + IF( ORGATI ) THEN + DLAM = D( I ) + TAU + ELSE + DLAM = D( IP1 ) + TAU + END IF + GO TO 250 + END IF +* + IF( W.LE.ZERO ) THEN + DLTLB = MAX( DLTLB, TAU ) + ELSE + DLTUB = MIN( DLTUB, TAU ) + END IF +* +* Calculate the new step +* + NITER = NITER + 1 + IF( .NOT.SWTCH3 ) THEN + IF( ORGATI ) THEN + C = W - DELTA( IP1 )*DW - ( D( I )-D( IP1 ) )* + $ ( Z( I ) / DELTA( I ) )**2 + ELSE + C = W - DELTA( I )*DW - ( D( IP1 )-D( I ) )* + $ ( Z( IP1 ) / DELTA( IP1 ) )**2 + END IF + A = ( DELTA( I )+DELTA( IP1 ) )*W - + $ DELTA( I )*DELTA( IP1 )*DW + B = DELTA( I )*DELTA( IP1 )*W + IF( C.EQ.ZERO ) THEN + IF( A.EQ.ZERO ) THEN + IF( ORGATI ) THEN + A = Z( I )*Z( I ) + DELTA( IP1 )*DELTA( IP1 )* + $ ( DPSI+DPHI ) + ELSE + A = Z( IP1 )*Z( IP1 ) + DELTA( I )*DELTA( I )* + $ ( DPSI+DPHI ) + END IF + END IF + ETA = B / A + ELSE IF( A.LE.ZERO ) THEN + ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF + ELSE +* +* Interpolation using THREE most relevant poles +* + TEMP = RHOINV + PSI + PHI + IF( ORGATI ) THEN + TEMP1 = Z( IIM1 ) / DELTA( IIM1 ) + TEMP1 = TEMP1*TEMP1 + C = TEMP - DELTA( IIP1 )*( DPSI+DPHI ) - + $ ( D( IIM1 )-D( IIP1 ) )*TEMP1 + ZZ( 1 ) = Z( IIM1 )*Z( IIM1 ) + ZZ( 3 ) = DELTA( IIP1 )*DELTA( IIP1 )* + $ ( ( DPSI-TEMP1 )+DPHI ) + ELSE + TEMP1 = Z( IIP1 ) / DELTA( IIP1 ) + TEMP1 = TEMP1*TEMP1 + C = TEMP - DELTA( IIM1 )*( DPSI+DPHI ) - + $ ( D( IIP1 )-D( IIM1 ) )*TEMP1 + ZZ( 1 ) = DELTA( IIM1 )*DELTA( IIM1 )* + $ ( DPSI+( DPHI-TEMP1 ) ) + ZZ( 3 ) = Z( IIP1 )*Z( IIP1 ) + END IF + ZZ( 2 ) = Z( II )*Z( II ) + CALL DLAED6( NITER, ORGATI, C, DELTA( IIM1 ), ZZ, W, ETA, + $ INFO ) + IF( INFO.NE.0 ) + $ GO TO 250 + END IF +* +* Note, eta should be positive if w is negative, and +* eta should be negative otherwise. However, +* if for some reason caused by roundoff, eta*w > 0, +* we simply use one Newton step instead. This way +* will guarantee eta*w < 0. +* + IF( W*ETA.GE.ZERO ) + $ ETA = -W / DW + TEMP = TAU + ETA + IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN + IF( W.LT.ZERO ) THEN + ETA = ( DLTUB-TAU ) / TWO + ELSE + ETA = ( DLTLB-TAU ) / TWO + END IF + END IF +* + PREW = W +* + 170 CONTINUE + DO 180 J = 1, N + DELTA( J ) = DELTA( J ) - ETA + 180 CONTINUE +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 190 J = 1, IIM1 + TEMP = Z( J ) / DELTA( J ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 190 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + DPHI = ZERO + PHI = ZERO + DO 200 J = N, IIP1, -1 + TEMP = Z( J ) / DELTA( J ) + PHI = PHI + Z( J )*TEMP + DPHI = DPHI + TEMP*TEMP + ERRETM = ERRETM + PHI + 200 CONTINUE +* + TEMP = Z( II ) / DELTA( II ) + DW = DPSI + DPHI + TEMP*TEMP + TEMP = Z( II )*TEMP + W = RHOINV + PHI + PSI + TEMP + ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV + + $ THREE*ABS( TEMP ) + ABS( TAU+ETA )*DW +* + SWTCH = .FALSE. + IF( ORGATI ) THEN + IF( -W.GT.ABS( PREW ) / TEN ) + $ SWTCH = .TRUE. + ELSE + IF( W.GT.ABS( PREW ) / TEN ) + $ SWTCH = .TRUE. + END IF +* + TAU = TAU + ETA +* +* Main loop to update the values of the array DELTA +* + ITER = NITER + 1 +* + DO 240 NITER = ITER, MAXIT +* +* Test for convergence +* + IF( ABS( W ).LE.EPS*ERRETM ) THEN + IF( ORGATI ) THEN + DLAM = D( I ) + TAU + ELSE + DLAM = D( IP1 ) + TAU + END IF + GO TO 250 + END IF +* + IF( W.LE.ZERO ) THEN + DLTLB = MAX( DLTLB, TAU ) + ELSE + DLTUB = MIN( DLTUB, TAU ) + END IF +* +* Calculate the new step +* + IF( .NOT.SWTCH3 ) THEN + IF( .NOT.SWTCH ) THEN + IF( ORGATI ) THEN + C = W - DELTA( IP1 )*DW - + $ ( D( I )-D( IP1 ) )*( Z( I ) / DELTA( I ) )**2 + ELSE + C = W - DELTA( I )*DW - ( D( IP1 )-D( I ) )* + $ ( Z( IP1 ) / DELTA( IP1 ) )**2 + END IF + ELSE + TEMP = Z( II ) / DELTA( II ) + IF( ORGATI ) THEN + DPSI = DPSI + TEMP*TEMP + ELSE + DPHI = DPHI + TEMP*TEMP + END IF + C = W - DELTA( I )*DPSI - DELTA( IP1 )*DPHI + END IF + A = ( DELTA( I )+DELTA( IP1 ) )*W - + $ DELTA( I )*DELTA( IP1 )*DW + B = DELTA( I )*DELTA( IP1 )*W + IF( C.EQ.ZERO ) THEN + IF( A.EQ.ZERO ) THEN + IF( .NOT.SWTCH ) THEN + IF( ORGATI ) THEN + A = Z( I )*Z( I ) + DELTA( IP1 )* + $ DELTA( IP1 )*( DPSI+DPHI ) + ELSE + A = Z( IP1 )*Z( IP1 ) + + $ DELTA( I )*DELTA( I )*( DPSI+DPHI ) + END IF + ELSE + A = DELTA( I )*DELTA( I )*DPSI + + $ DELTA( IP1 )*DELTA( IP1 )*DPHI + END IF + END IF + ETA = B / A + ELSE IF( A.LE.ZERO ) THEN + ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF + ELSE +* +* Interpolation using THREE most relevant poles +* + TEMP = RHOINV + PSI + PHI + IF( SWTCH ) THEN + C = TEMP - DELTA( IIM1 )*DPSI - DELTA( IIP1 )*DPHI + ZZ( 1 ) = DELTA( IIM1 )*DELTA( IIM1 )*DPSI + ZZ( 3 ) = DELTA( IIP1 )*DELTA( IIP1 )*DPHI + ELSE + IF( ORGATI ) THEN + TEMP1 = Z( IIM1 ) / DELTA( IIM1 ) + TEMP1 = TEMP1*TEMP1 + C = TEMP - DELTA( IIP1 )*( DPSI+DPHI ) - + $ ( D( IIM1 )-D( IIP1 ) )*TEMP1 + ZZ( 1 ) = Z( IIM1 )*Z( IIM1 ) + ZZ( 3 ) = DELTA( IIP1 )*DELTA( IIP1 )* + $ ( ( DPSI-TEMP1 )+DPHI ) + ELSE + TEMP1 = Z( IIP1 ) / DELTA( IIP1 ) + TEMP1 = TEMP1*TEMP1 + C = TEMP - DELTA( IIM1 )*( DPSI+DPHI ) - + $ ( D( IIP1 )-D( IIM1 ) )*TEMP1 + ZZ( 1 ) = DELTA( IIM1 )*DELTA( IIM1 )* + $ ( DPSI+( DPHI-TEMP1 ) ) + ZZ( 3 ) = Z( IIP1 )*Z( IIP1 ) + END IF + END IF + CALL DLAED6( NITER, ORGATI, C, DELTA( IIM1 ), ZZ, W, ETA, + $ INFO ) + IF( INFO.NE.0 ) + $ GO TO 250 + END IF +* +* Note, eta should be positive if w is negative, and +* eta should be negative otherwise. However, +* if for some reason caused by roundoff, eta*w > 0, +* we simply use one Newton step instead. This way +* will guarantee eta*w < 0. +* + IF( W*ETA.GE.ZERO ) + $ ETA = -W / DW + TEMP = TAU + ETA + IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN + IF( W.LT.ZERO ) THEN + ETA = ( DLTUB-TAU ) / TWO + ELSE + ETA = ( DLTLB-TAU ) / TWO + END IF + END IF +* + DO 210 J = 1, N + DELTA( J ) = DELTA( J ) - ETA + 210 CONTINUE +* + TAU = TAU + ETA + PREW = W +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 220 J = 1, IIM1 + TEMP = Z( J ) / DELTA( J ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 220 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + DPHI = ZERO + PHI = ZERO + DO 230 J = N, IIP1, -1 + TEMP = Z( J ) / DELTA( J ) + PHI = PHI + Z( J )*TEMP + DPHI = DPHI + TEMP*TEMP + ERRETM = ERRETM + PHI + 230 CONTINUE +* + TEMP = Z( II ) / DELTA( II ) + DW = DPSI + DPHI + TEMP*TEMP + TEMP = Z( II )*TEMP + W = RHOINV + PHI + PSI + TEMP + ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV + + $ THREE*ABS( TEMP ) + ABS( TAU )*DW + IF( W*PREW.GT.ZERO .AND. ABS( W ).GT.ABS( PREW ) / TEN ) + $ SWTCH = .NOT.SWTCH +* + 240 CONTINUE +* +* Return with INFO = 1, NITER = MAXIT and not converged +* + INFO = 1 + IF( ORGATI ) THEN + DLAM = D( I ) + TAU + ELSE + DLAM = D( IP1 ) + TAU + END IF +* + END IF +* + 250 CONTINUE +* + RETURN +* +* End of DLAED4 +* + END diff --git a/costa/native/external/lapack/dlaed5.f b/costa/native/external/lapack/dlaed5.f new file mode 100644 index 000000000..8b8f9447b --- /dev/null +++ b/costa/native/external/lapack/dlaed5.f @@ -0,0 +1,125 @@ + SUBROUTINE DLAED5( I, D, Z, DELTA, RHO, DLAM ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, +* Courant Institute, NAG Ltd., and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER I + DOUBLE PRECISION DLAM, RHO +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( 2 ), DELTA( 2 ), Z( 2 ) +* .. +* +* Purpose +* ======= +* +* This subroutine computes the I-th eigenvalue of a symmetric rank-one +* modification of a 2-by-2 diagonal matrix +* +* diag( D ) + RHO * Z * transpose(Z) . +* +* The diagonal elements in the array D are assumed to satisfy +* +* D(i) < D(j) for i < j . +* +* We also assume RHO > 0 and that the Euclidean norm of the vector +* Z is one. +* +* Arguments +* ========= +* +* I (input) INTEGER +* The index of the eigenvalue to be computed. I = 1 or I = 2. +* +* D (input) DOUBLE PRECISION array, dimension (2) +* The original eigenvalues. We assume D(1) < D(2). +* +* Z (input) DOUBLE PRECISION array, dimension (2) +* The components of the updating vector. +* +* DELTA (output) DOUBLE PRECISION array, dimension (2) +* The vector DELTA contains the information necessary +* to construct the eigenvectors. +* +* RHO (input) DOUBLE PRECISION +* The scalar in the symmetric updating formula. +* +* DLAM (output) DOUBLE PRECISION +* The computed lambda_I, the I-th updated eigenvalue. +* +* Further Details +* =============== +* +* Based on contributions by +* Ren-Cang Li, Computer Science Division, University of California +* at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO, FOUR + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0, + $ FOUR = 4.0D0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION B, C, DEL, TAU, TEMP, W +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* + DEL = D( 2 ) - D( 1 ) + IF( I.EQ.1 ) THEN + W = ONE + TWO*RHO*( Z( 2 )*Z( 2 )-Z( 1 )*Z( 1 ) ) / DEL + IF( W.GT.ZERO ) THEN + B = DEL + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) ) + C = RHO*Z( 1 )*Z( 1 )*DEL +* +* B > ZERO, always +* + TAU = TWO*C / ( B+SQRT( ABS( B*B-FOUR*C ) ) ) + DLAM = D( 1 ) + TAU + DELTA( 1 ) = -Z( 1 ) / TAU + DELTA( 2 ) = Z( 2 ) / ( DEL-TAU ) + ELSE + B = -DEL + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) ) + C = RHO*Z( 2 )*Z( 2 )*DEL + IF( B.GT.ZERO ) THEN + TAU = -TWO*C / ( B+SQRT( B*B+FOUR*C ) ) + ELSE + TAU = ( B-SQRT( B*B+FOUR*C ) ) / TWO + END IF + DLAM = D( 2 ) + TAU + DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU ) + DELTA( 2 ) = -Z( 2 ) / TAU + END IF + TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) ) + DELTA( 1 ) = DELTA( 1 ) / TEMP + DELTA( 2 ) = DELTA( 2 ) / TEMP + ELSE +* +* Now I=2 +* + B = -DEL + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) ) + C = RHO*Z( 2 )*Z( 2 )*DEL + IF( B.GT.ZERO ) THEN + TAU = ( B+SQRT( B*B+FOUR*C ) ) / TWO + ELSE + TAU = TWO*C / ( -B+SQRT( B*B+FOUR*C ) ) + END IF + DLAM = D( 2 ) + TAU + DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU ) + DELTA( 2 ) = -Z( 2 ) / TAU + TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) ) + DELTA( 1 ) = DELTA( 1 ) / TEMP + DELTA( 2 ) = DELTA( 2 ) / TEMP + END IF + RETURN +* +* End OF DLAED5 +* + END diff --git a/costa/native/external/lapack/dlaed6.f b/costa/native/external/lapack/dlaed6.f new file mode 100644 index 000000000..8beb171a2 --- /dev/null +++ b/costa/native/external/lapack/dlaed6.f @@ -0,0 +1,299 @@ + SUBROUTINE DLAED6( KNITER, ORGATI, RHO, D, Z, FINIT, TAU, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, +* Courant Institute, NAG Ltd., and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + LOGICAL ORGATI + INTEGER INFO, KNITER + DOUBLE PRECISION FINIT, RHO, TAU +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( 3 ), Z( 3 ) +* .. +* +* Purpose +* ======= +* +* DLAED6 computes the positive or negative root (closest to the origin) +* of +* z(1) z(2) z(3) +* f(x) = rho + --------- + ---------- + --------- +* d(1)-x d(2)-x d(3)-x +* +* It is assumed that +* +* if ORGATI = .true. the root is between d(2) and d(3); +* otherwise it is between d(1) and d(2) +* +* This routine will be called by DLAED4 when necessary. In most cases, +* the root sought is the smallest in magnitude, though it might not be +* in some extremely rare situations. +* +* Arguments +* ========= +* +* KNITER (input) INTEGER +* Refer to DLAED4 for its significance. +* +* ORGATI (input) LOGICAL +* If ORGATI is true, the needed root is between d(2) and +* d(3); otherwise it is between d(1) and d(2). See +* DLAED4 for further details. +* +* RHO (input) DOUBLE PRECISION +* Refer to the equation f(x) above. +* +* D (input) DOUBLE PRECISION array, dimension (3) +* D satisfies d(1) < d(2) < d(3). +* +* Z (input) DOUBLE PRECISION array, dimension (3) +* Each of the elements in z must be positive. +* +* FINIT (input) DOUBLE PRECISION +* The value of f at 0. It is more accurate than the one +* evaluated inside this routine (if someone wants to do +* so). +* +* TAU (output) DOUBLE PRECISION +* The root of the equation f(x). +* +* INFO (output) INTEGER +* = 0: successful exit +* > 0: if INFO = 1, failure to converge +* +* Further Details +* =============== +* +* Based on contributions by +* Ren-Cang Li, Computer Science Division, University of California +* at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + INTEGER MAXIT + PARAMETER ( MAXIT = 20 ) + DOUBLE PRECISION ZERO, ONE, TWO, THREE, FOUR, EIGHT + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0, + $ THREE = 3.0D0, FOUR = 4.0D0, EIGHT = 8.0D0 ) +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Local Arrays .. + DOUBLE PRECISION DSCALE( 3 ), ZSCALE( 3 ) +* .. +* .. Local Scalars .. + LOGICAL FIRST, SCALE + INTEGER I, ITER, NITER + DOUBLE PRECISION A, B, BASE, C, DDF, DF, EPS, ERRETM, ETA, F, + $ FC, SCLFAC, SCLINV, SMALL1, SMALL2, SMINV1, + $ SMINV2, TEMP, TEMP1, TEMP2, TEMP3, TEMP4 +* .. +* .. Save statement .. + SAVE FIRST, SMALL1, SMINV1, SMALL2, SMINV2, EPS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, LOG, MAX, MIN, SQRT +* .. +* .. Data statements .. + DATA FIRST / .TRUE. / +* .. +* .. Executable Statements .. +* + INFO = 0 +* + NITER = 1 + TAU = ZERO + IF( KNITER.EQ.2 ) THEN + IF( ORGATI ) THEN + TEMP = ( D( 3 )-D( 2 ) ) / TWO + C = RHO + Z( 1 ) / ( ( D( 1 )-D( 2 ) )-TEMP ) + A = C*( D( 2 )+D( 3 ) ) + Z( 2 ) + Z( 3 ) + B = C*D( 2 )*D( 3 ) + Z( 2 )*D( 3 ) + Z( 3 )*D( 2 ) + ELSE + TEMP = ( D( 1 )-D( 2 ) ) / TWO + C = RHO + Z( 3 ) / ( ( D( 3 )-D( 2 ) )-TEMP ) + A = C*( D( 1 )+D( 2 ) ) + Z( 1 ) + Z( 2 ) + B = C*D( 1 )*D( 2 ) + Z( 1 )*D( 2 ) + Z( 2 )*D( 1 ) + END IF + TEMP = MAX( ABS( A ), ABS( B ), ABS( C ) ) + A = A / TEMP + B = B / TEMP + C = C / TEMP + IF( C.EQ.ZERO ) THEN + TAU = B / A + ELSE IF( A.LE.ZERO ) THEN + TAU = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + TAU = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF + TEMP = RHO + Z( 1 ) / ( D( 1 )-TAU ) + + $ Z( 2 ) / ( D( 2 )-TAU ) + Z( 3 ) / ( D( 3 )-TAU ) + IF( ABS( FINIT ).LE.ABS( TEMP ) ) + $ TAU = ZERO + END IF +* +* On first call to routine, get machine parameters for +* possible scaling to avoid overflow +* + IF( FIRST ) THEN + EPS = DLAMCH( 'Epsilon' ) + BASE = DLAMCH( 'Base' ) + SMALL1 = BASE**( INT( LOG( DLAMCH( 'SafMin' ) ) / LOG( BASE ) / + $ THREE ) ) + SMINV1 = ONE / SMALL1 + SMALL2 = SMALL1*SMALL1 + SMINV2 = SMINV1*SMINV1 + FIRST = .FALSE. + END IF +* +* Determine if scaling of inputs necessary to avoid overflow +* when computing 1/TEMP**3 +* + IF( ORGATI ) THEN + TEMP = MIN( ABS( D( 2 )-TAU ), ABS( D( 3 )-TAU ) ) + ELSE + TEMP = MIN( ABS( D( 1 )-TAU ), ABS( D( 2 )-TAU ) ) + END IF + SCALE = .FALSE. + IF( TEMP.LE.SMALL1 ) THEN + SCALE = .TRUE. + IF( TEMP.LE.SMALL2 ) THEN +* +* Scale up by power of radix nearest 1/SAFMIN**(2/3) +* + SCLFAC = SMINV2 + SCLINV = SMALL2 + ELSE +* +* Scale up by power of radix nearest 1/SAFMIN**(1/3) +* + SCLFAC = SMINV1 + SCLINV = SMALL1 + END IF +* +* Scaling up safe because D, Z, TAU scaled elsewhere to be O(1) +* + DO 10 I = 1, 3 + DSCALE( I ) = D( I )*SCLFAC + ZSCALE( I ) = Z( I )*SCLFAC + 10 CONTINUE + TAU = TAU*SCLFAC + ELSE +* +* Copy D and Z to DSCALE and ZSCALE +* + DO 20 I = 1, 3 + DSCALE( I ) = D( I ) + ZSCALE( I ) = Z( I ) + 20 CONTINUE + END IF +* + FC = ZERO + DF = ZERO + DDF = ZERO + DO 30 I = 1, 3 + TEMP = ONE / ( DSCALE( I )-TAU ) + TEMP1 = ZSCALE( I )*TEMP + TEMP2 = TEMP1*TEMP + TEMP3 = TEMP2*TEMP + FC = FC + TEMP1 / DSCALE( I ) + DF = DF + TEMP2 + DDF = DDF + TEMP3 + 30 CONTINUE + F = FINIT + TAU*FC +* + IF( ABS( F ).LE.ZERO ) + $ GO TO 60 +* +* Iteration begins +* +* It is not hard to see that +* +* 1) Iterations will go up monotonically +* if FINIT < 0; +* +* 2) Iterations will go down monotonically +* if FINIT > 0. +* + ITER = NITER + 1 +* + DO 50 NITER = ITER, MAXIT +* + IF( ORGATI ) THEN + TEMP1 = DSCALE( 2 ) - TAU + TEMP2 = DSCALE( 3 ) - TAU + ELSE + TEMP1 = DSCALE( 1 ) - TAU + TEMP2 = DSCALE( 2 ) - TAU + END IF + A = ( TEMP1+TEMP2 )*F - TEMP1*TEMP2*DF + B = TEMP1*TEMP2*F + C = F - ( TEMP1+TEMP2 )*DF + TEMP1*TEMP2*DDF + TEMP = MAX( ABS( A ), ABS( B ), ABS( C ) ) + A = A / TEMP + B = B / TEMP + C = C / TEMP + IF( C.EQ.ZERO ) THEN + ETA = B / A + ELSE IF( A.LE.ZERO ) THEN + ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF + IF( F*ETA.GE.ZERO ) THEN + ETA = -F / DF + END IF +* + TEMP = ETA + TAU + IF( ORGATI ) THEN + IF( ETA.GT.ZERO .AND. TEMP.GE.DSCALE( 3 ) ) + $ ETA = ( DSCALE( 3 )-TAU ) / TWO + IF( ETA.LT.ZERO .AND. TEMP.LE.DSCALE( 2 ) ) + $ ETA = ( DSCALE( 2 )-TAU ) / TWO + ELSE + IF( ETA.GT.ZERO .AND. TEMP.GE.DSCALE( 2 ) ) + $ ETA = ( DSCALE( 2 )-TAU ) / TWO + IF( ETA.LT.ZERO .AND. TEMP.LE.DSCALE( 1 ) ) + $ ETA = ( DSCALE( 1 )-TAU ) / TWO + END IF + TAU = TAU + ETA +* + FC = ZERO + ERRETM = ZERO + DF = ZERO + DDF = ZERO + DO 40 I = 1, 3 + TEMP = ONE / ( DSCALE( I )-TAU ) + TEMP1 = ZSCALE( I )*TEMP + TEMP2 = TEMP1*TEMP + TEMP3 = TEMP2*TEMP + TEMP4 = TEMP1 / DSCALE( I ) + FC = FC + TEMP4 + ERRETM = ERRETM + ABS( TEMP4 ) + DF = DF + TEMP2 + DDF = DDF + TEMP3 + 40 CONTINUE + F = FINIT + TAU*FC + ERRETM = EIGHT*( ABS( FINIT )+ABS( TAU )*ERRETM ) + + $ ABS( TAU )*DF + IF( ABS( F ).LE.EPS*ERRETM ) + $ GO TO 60 + 50 CONTINUE + INFO = 1 + 60 CONTINUE +* +* Undo scaling +* + IF( SCALE ) + $ TAU = TAU*SCLINV + RETURN +* +* End of DLAED6 +* + END diff --git a/costa/native/external/lapack/dlaed7.f b/costa/native/external/lapack/dlaed7.f new file mode 100644 index 000000000..cfa84b609 --- /dev/null +++ b/costa/native/external/lapack/dlaed7.f @@ -0,0 +1,288 @@ + SUBROUTINE DLAED7( ICOMPQ, N, QSIZ, TLVLS, CURLVL, CURPBM, D, Q, + $ LDQ, INDXQ, RHO, CUTPNT, QSTORE, QPTR, PRMPTR, + $ PERM, GIVPTR, GIVCOL, GIVNUM, WORK, IWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER CURLVL, CURPBM, CUTPNT, ICOMPQ, INFO, LDQ, N, + $ QSIZ, TLVLS + DOUBLE PRECISION RHO +* .. +* .. Array Arguments .. + INTEGER GIVCOL( 2, * ), GIVPTR( * ), INDXQ( * ), + $ IWORK( * ), PERM( * ), PRMPTR( * ), QPTR( * ) + DOUBLE PRECISION D( * ), GIVNUM( 2, * ), Q( LDQ, * ), + $ QSTORE( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DLAED7 computes the updated eigensystem of a diagonal +* matrix after modification by a rank-one symmetric matrix. This +* routine is used only for the eigenproblem which requires all +* eigenvalues and optionally eigenvectors of a dense symmetric matrix +* that has been reduced to tridiagonal form. DLAED1 handles +* the case in which all eigenvalues and eigenvectors of a symmetric +* tridiagonal matrix are desired. +* +* T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out) +* +* where Z = Q'u, u is a vector of length N with ones in the +* CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. +* +* The eigenvectors of the original matrix are stored in Q, and the +* eigenvalues are in D. The algorithm consists of three stages: +* +* The first stage consists of deflating the size of the problem +* when there are multiple eigenvalues or if there is a zero in +* the Z vector. For each such occurence the dimension of the +* secular equation problem is reduced by one. This stage is +* performed by the routine DLAED8. +* +* The second stage consists of calculating the updated +* eigenvalues. This is done by finding the roots of the secular +* equation via the routine DLAED4 (as called by DLAED9). +* This routine also calculates the eigenvectors of the current +* problem. +* +* The final stage consists of computing the updated eigenvectors +* directly using the updated eigenvalues. The eigenvectors for +* the current problem are multiplied with the eigenvectors from +* the overall problem. +* +* Arguments +* ========= +* +* ICOMPQ (input) INTEGER +* = 0: Compute eigenvalues only. +* = 1: Compute eigenvectors of original dense symmetric matrix +* also. On entry, Q contains the orthogonal matrix used +* to reduce the original matrix to tridiagonal form. +* +* N (input) INTEGER +* The dimension of the symmetric tridiagonal matrix. N >= 0. +* +* QSIZ (input) INTEGER +* The dimension of the orthogonal matrix used to reduce +* the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. +* +* TLVLS (input) INTEGER +* The total number of merging levels in the overall divide and +* conquer tree. +* +* CURLVL (input) INTEGER +* The current level in the overall merge routine, +* 0 <= CURLVL <= TLVLS. +* +* CURPBM (input) INTEGER +* The current problem in the current level in the overall +* merge routine (counting from upper left to lower right). +* +* D (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the eigenvalues of the rank-1-perturbed matrix. +* On exit, the eigenvalues of the repaired matrix. +* +* Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N) +* On entry, the eigenvectors of the rank-1-perturbed matrix. +* On exit, the eigenvectors of the repaired tridiagonal matrix. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= max(1,N). +* +* INDXQ (output) INTEGER array, dimension (N) +* The permutation which will reintegrate the subproblem just +* solved back into sorted order, i.e., D( INDXQ( I = 1, N ) ) +* will be in ascending order. +* +* RHO (input) DOUBLE PRECISION +* The subdiagonal element used to create the rank-1 +* modification. +* +* CUTPNT (input) INTEGER +* Contains the location of the last eigenvalue in the leading +* sub-matrix. min(1,N) <= CUTPNT <= N. +* +* QSTORE (input/output) DOUBLE PRECISION array, dimension (N**2+1) +* Stores eigenvectors of submatrices encountered during +* divide and conquer, packed together. QPTR points to +* beginning of the submatrices. +* +* QPTR (input/output) INTEGER array, dimension (N+2) +* List of indices pointing to beginning of submatrices stored +* in QSTORE. The submatrices are numbered starting at the +* bottom left of the divide and conquer tree, from left to +* right and bottom to top. +* +* PRMPTR (input) INTEGER array, dimension (N lg N) +* Contains a list of pointers which indicate where in PERM a +* level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) +* indicates the size of the permutation and also the size of +* the full, non-deflated problem. +* +* PERM (input) INTEGER array, dimension (N lg N) +* Contains the permutations (from deflation and sorting) to be +* applied to each eigenblock. +* +* GIVPTR (input) INTEGER array, dimension (N lg N) +* Contains a list of pointers which indicate where in GIVCOL a +* level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) +* indicates the number of Givens rotations. +* +* GIVCOL (input) INTEGER array, dimension (2, N lg N) +* Each pair of numbers indicates a pair of columns to take place +* in a Givens rotation. +* +* GIVNUM (input) DOUBLE PRECISION array, dimension (2, N lg N) +* Each number indicates the S value to be used in the +* corresponding Givens rotation. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (3*N+QSIZ*N) +* +* IWORK (workspace) INTEGER array, dimension (4*N) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = 1, an eigenvalue did not converge +* +* Further Details +* =============== +* +* Based on contributions by +* Jeff Rutter, Computer Science Division, University of California +* at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D0, ZERO = 0.0D0 ) +* .. +* .. Local Scalars .. + INTEGER COLTYP, CURR, I, IDLMDA, INDX, INDXC, INDXP, + $ IQ2, IS, IW, IZ, K, LDQ2, N1, N2, PTR +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DLAED8, DLAED9, DLAEDA, DLAMRG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.1 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ICOMPQ.EQ.1 .AND. QSIZ.LT.N ) THEN + INFO = -4 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( MIN( 1, N ).GT.CUTPNT .OR. N.LT.CUTPNT ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLAED7', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* The following values are for bookkeeping purposes only. They are +* integer pointers which indicate the portion of the workspace +* used by a particular array in DLAED8 and DLAED9. +* + IF( ICOMPQ.EQ.1 ) THEN + LDQ2 = QSIZ + ELSE + LDQ2 = N + END IF +* + IZ = 1 + IDLMDA = IZ + N + IW = IDLMDA + N + IQ2 = IW + N + IS = IQ2 + N*LDQ2 +* + INDX = 1 + INDXC = INDX + N + COLTYP = INDXC + N + INDXP = COLTYP + N +* +* Form the z-vector which consists of the last row of Q_1 and the +* first row of Q_2. +* + PTR = 1 + 2**TLVLS + DO 10 I = 1, CURLVL - 1 + PTR = PTR + 2**( TLVLS-I ) + 10 CONTINUE + CURR = PTR + CURPBM + CALL DLAEDA( N, TLVLS, CURLVL, CURPBM, PRMPTR, PERM, GIVPTR, + $ GIVCOL, GIVNUM, QSTORE, QPTR, WORK( IZ ), + $ WORK( IZ+N ), INFO ) +* +* When solving the final problem, we no longer need the stored data, +* so we will overwrite the data from this level onto the previously +* used storage space. +* + IF( CURLVL.EQ.TLVLS ) THEN + QPTR( CURR ) = 1 + PRMPTR( CURR ) = 1 + GIVPTR( CURR ) = 1 + END IF +* +* Sort and Deflate eigenvalues. +* + CALL DLAED8( ICOMPQ, K, N, QSIZ, D, Q, LDQ, INDXQ, RHO, CUTPNT, + $ WORK( IZ ), WORK( IDLMDA ), WORK( IQ2 ), LDQ2, + $ WORK( IW ), PERM( PRMPTR( CURR ) ), GIVPTR( CURR+1 ), + $ GIVCOL( 1, GIVPTR( CURR ) ), + $ GIVNUM( 1, GIVPTR( CURR ) ), IWORK( INDXP ), + $ IWORK( INDX ), INFO ) + PRMPTR( CURR+1 ) = PRMPTR( CURR ) + N + GIVPTR( CURR+1 ) = GIVPTR( CURR+1 ) + GIVPTR( CURR ) +* +* Solve Secular Equation. +* + IF( K.NE.0 ) THEN + CALL DLAED9( K, 1, K, N, D, WORK( IS ), K, RHO, WORK( IDLMDA ), + $ WORK( IW ), QSTORE( QPTR( CURR ) ), K, INFO ) + IF( INFO.NE.0 ) + $ GO TO 30 + IF( ICOMPQ.EQ.1 ) THEN + CALL DGEMM( 'N', 'N', QSIZ, K, K, ONE, WORK( IQ2 ), LDQ2, + $ QSTORE( QPTR( CURR ) ), K, ZERO, Q, LDQ ) + END IF + QPTR( CURR+1 ) = QPTR( CURR ) + K**2 +* +* Prepare the INDXQ sorting permutation. +* + N1 = K + N2 = N - K + CALL DLAMRG( N1, N2, D, 1, -1, INDXQ ) + ELSE + QPTR( CURR+1 ) = QPTR( CURR ) + DO 20 I = 1, N + INDXQ( I ) = I + 20 CONTINUE + END IF +* + 30 CONTINUE + RETURN +* +* End of DLAED7 +* + END diff --git a/costa/native/external/lapack/dlaed8.f b/costa/native/external/lapack/dlaed8.f new file mode 100644 index 000000000..58404b8f5 --- /dev/null +++ b/costa/native/external/lapack/dlaed8.f @@ -0,0 +1,400 @@ + SUBROUTINE DLAED8( ICOMPQ, K, N, QSIZ, D, Q, LDQ, INDXQ, RHO, + $ CUTPNT, Z, DLAMDA, Q2, LDQ2, W, PERM, GIVPTR, + $ GIVCOL, GIVNUM, INDXP, INDX, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, +* Courant Institute, NAG Ltd., and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER CUTPNT, GIVPTR, ICOMPQ, INFO, K, LDQ, LDQ2, N, + $ QSIZ + DOUBLE PRECISION RHO +* .. +* .. Array Arguments .. + INTEGER GIVCOL( 2, * ), INDX( * ), INDXP( * ), + $ INDXQ( * ), PERM( * ) + DOUBLE PRECISION D( * ), DLAMDA( * ), GIVNUM( 2, * ), + $ Q( LDQ, * ), Q2( LDQ2, * ), W( * ), Z( * ) +* .. +* +* Purpose +* ======= +* +* DLAED8 merges the two sets of eigenvalues together into a single +* sorted set. Then it tries to deflate the size of the problem. +* There are two ways in which deflation can occur: when two or more +* eigenvalues are close together or if there is a tiny element in the +* Z vector. For each such occurrence the order of the related secular +* equation problem is reduced by one. +* +* Arguments +* ========= +* +* ICOMPQ (input) INTEGER +* = 0: Compute eigenvalues only. +* = 1: Compute eigenvectors of original dense symmetric matrix +* also. On entry, Q contains the orthogonal matrix used +* to reduce the original matrix to tridiagonal form. +* +* K (output) INTEGER +* The number of non-deflated eigenvalues, and the order of the +* related secular equation. +* +* N (input) INTEGER +* The dimension of the symmetric tridiagonal matrix. N >= 0. +* +* QSIZ (input) INTEGER +* The dimension of the orthogonal matrix used to reduce +* the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. +* +* D (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the eigenvalues of the two submatrices to be +* combined. On exit, the trailing (N-K) updated eigenvalues +* (those which were deflated) sorted into increasing order. +* +* Q (input/output) DOUBLE PRECISION array, dimension (LDQ,N) +* If ICOMPQ = 0, Q is not referenced. Otherwise, +* on entry, Q contains the eigenvectors of the partially solved +* system which has been previously updated in matrix +* multiplies with other partially solved eigensystems. +* On exit, Q contains the trailing (N-K) updated eigenvectors +* (those which were deflated) in its last N-K columns. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= max(1,N). +* +* INDXQ (input) INTEGER array, dimension (N) +* The permutation which separately sorts the two sub-problems +* in D into ascending order. Note that elements in the second +* half of this permutation must first have CUTPNT added to +* their values in order to be accurate. +* +* RHO (input/output) DOUBLE PRECISION +* On entry, the off-diagonal element associated with the rank-1 +* cut which originally split the two submatrices which are now +* being recombined. +* On exit, RHO has been modified to the value required by +* DLAED3. +* +* CUTPNT (input) INTEGER +* The location of the last eigenvalue in the leading +* sub-matrix. min(1,N) <= CUTPNT <= N. +* +* Z (input) DOUBLE PRECISION array, dimension (N) +* On entry, Z contains the updating vector (the last row of +* the first sub-eigenvector matrix and the first row of the +* second sub-eigenvector matrix). +* On exit, the contents of Z are destroyed by the updating +* process. +* +* DLAMDA (output) DOUBLE PRECISION array, dimension (N) +* A copy of the first K eigenvalues which will be used by +* DLAED3 to form the secular equation. +* +* Q2 (output) DOUBLE PRECISION array, dimension (LDQ2,N) +* If ICOMPQ = 0, Q2 is not referenced. Otherwise, +* a copy of the first K eigenvectors which will be used by +* DLAED7 in a matrix multiply (DGEMM) to update the new +* eigenvectors. +* +* LDQ2 (input) INTEGER +* The leading dimension of the array Q2. LDQ2 >= max(1,N). +* +* W (output) DOUBLE PRECISION array, dimension (N) +* The first k values of the final deflation-altered z-vector and +* will be passed to DLAED3. +* +* PERM (output) INTEGER array, dimension (N) +* The permutations (from deflation and sorting) to be applied +* to each eigenblock. +* +* GIVPTR (output) INTEGER +* The number of Givens rotations which took place in this +* subproblem. +* +* GIVCOL (output) INTEGER array, dimension (2, N) +* Each pair of numbers indicates a pair of columns to take place +* in a Givens rotation. +* +* GIVNUM (output) DOUBLE PRECISION array, dimension (2, N) +* Each number indicates the S value to be used in the +* corresponding Givens rotation. +* +* INDXP (workspace) INTEGER array, dimension (N) +* The permutation used to place deflated values of D at the end +* of the array. INDXP(1:K) points to the nondeflated D-values +* and INDXP(K+1:N) points to the deflated eigenvalues. +* +* INDX (workspace) INTEGER array, dimension (N) +* The permutation used to sort the contents of D into ascending +* order. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* Based on contributions by +* Jeff Rutter, Computer Science Division, University of California +* at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION MONE, ZERO, ONE, TWO, EIGHT + PARAMETER ( MONE = -1.0D0, ZERO = 0.0D0, ONE = 1.0D0, + $ TWO = 2.0D0, EIGHT = 8.0D0 ) +* .. +* .. Local Scalars .. +* + INTEGER I, IMAX, J, JLAM, JMAX, JP, K2, N1, N1P1, N2 + DOUBLE PRECISION C, EPS, S, T, TAU, TOL +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH, DLAPY2 + EXTERNAL IDAMAX, DLAMCH, DLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLACPY, DLAMRG, DROT, DSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.1 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ICOMPQ.EQ.1 .AND. QSIZ.LT.N ) THEN + INFO = -4 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( CUTPNT.LT.MIN( 1, N ) .OR. CUTPNT.GT.N ) THEN + INFO = -10 + ELSE IF( LDQ2.LT.MAX( 1, N ) ) THEN + INFO = -14 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLAED8', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + N1 = CUTPNT + N2 = N - N1 + N1P1 = N1 + 1 +* + IF( RHO.LT.ZERO ) THEN + CALL DSCAL( N2, MONE, Z( N1P1 ), 1 ) + END IF +* +* Normalize z so that norm(z) = 1 +* + T = ONE / SQRT( TWO ) + DO 10 J = 1, N + INDX( J ) = J + 10 CONTINUE + CALL DSCAL( N, T, Z, 1 ) + RHO = ABS( TWO*RHO ) +* +* Sort the eigenvalues into increasing order +* + DO 20 I = CUTPNT + 1, N + INDXQ( I ) = INDXQ( I ) + CUTPNT + 20 CONTINUE + DO 30 I = 1, N + DLAMDA( I ) = D( INDXQ( I ) ) + W( I ) = Z( INDXQ( I ) ) + 30 CONTINUE + I = 1 + J = CUTPNT + 1 + CALL DLAMRG( N1, N2, DLAMDA, 1, 1, INDX ) + DO 40 I = 1, N + D( I ) = DLAMDA( INDX( I ) ) + Z( I ) = W( INDX( I ) ) + 40 CONTINUE +* +* Calculate the allowable deflation tolerence +* + IMAX = IDAMAX( N, Z, 1 ) + JMAX = IDAMAX( N, D, 1 ) + EPS = DLAMCH( 'Epsilon' ) + TOL = EIGHT*EPS*ABS( D( JMAX ) ) +* +* If the rank-1 modifier is small enough, no more needs to be done +* except to reorganize Q so that its columns correspond with the +* elements in D. +* + IF( RHO*ABS( Z( IMAX ) ).LE.TOL ) THEN + K = 0 + IF( ICOMPQ.EQ.0 ) THEN + DO 50 J = 1, N + PERM( J ) = INDXQ( INDX( J ) ) + 50 CONTINUE + ELSE + DO 60 J = 1, N + PERM( J ) = INDXQ( INDX( J ) ) + CALL DCOPY( QSIZ, Q( 1, PERM( J ) ), 1, Q2( 1, J ), 1 ) + 60 CONTINUE + CALL DLACPY( 'A', QSIZ, N, Q2( 1, 1 ), LDQ2, Q( 1, 1 ), + $ LDQ ) + END IF + RETURN + END IF +* +* If there are multiple eigenvalues then the problem deflates. Here +* the number of equal eigenvalues are found. As each equal +* eigenvalue is found, an elementary reflector is computed to rotate +* the corresponding eigensubspace so that the corresponding +* components of Z are zero in this new basis. +* + K = 0 + GIVPTR = 0 + K2 = N + 1 + DO 70 J = 1, N + IF( RHO*ABS( Z( J ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + INDXP( K2 ) = J + IF( J.EQ.N ) + $ GO TO 110 + ELSE + JLAM = J + GO TO 80 + END IF + 70 CONTINUE + 80 CONTINUE + J = J + 1 + IF( J.GT.N ) + $ GO TO 100 + IF( RHO*ABS( Z( J ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + INDXP( K2 ) = J + ELSE +* +* Check if eigenvalues are close enough to allow deflation. +* + S = Z( JLAM ) + C = Z( J ) +* +* Find sqrt(a**2+b**2) without overflow or +* destructive underflow. +* + TAU = DLAPY2( C, S ) + T = D( J ) - D( JLAM ) + C = C / TAU + S = -S / TAU + IF( ABS( T*C*S ).LE.TOL ) THEN +* +* Deflation is possible. +* + Z( J ) = TAU + Z( JLAM ) = ZERO +* +* Record the appropriate Givens rotation +* + GIVPTR = GIVPTR + 1 + GIVCOL( 1, GIVPTR ) = INDXQ( INDX( JLAM ) ) + GIVCOL( 2, GIVPTR ) = INDXQ( INDX( J ) ) + GIVNUM( 1, GIVPTR ) = C + GIVNUM( 2, GIVPTR ) = S + IF( ICOMPQ.EQ.1 ) THEN + CALL DROT( QSIZ, Q( 1, INDXQ( INDX( JLAM ) ) ), 1, + $ Q( 1, INDXQ( INDX( J ) ) ), 1, C, S ) + END IF + T = D( JLAM )*C*C + D( J )*S*S + D( J ) = D( JLAM )*S*S + D( J )*C*C + D( JLAM ) = T + K2 = K2 - 1 + I = 1 + 90 CONTINUE + IF( K2+I.LE.N ) THEN + IF( D( JLAM ).LT.D( INDXP( K2+I ) ) ) THEN + INDXP( K2+I-1 ) = INDXP( K2+I ) + INDXP( K2+I ) = JLAM + I = I + 1 + GO TO 90 + ELSE + INDXP( K2+I-1 ) = JLAM + END IF + ELSE + INDXP( K2+I-1 ) = JLAM + END IF + JLAM = J + ELSE + K = K + 1 + W( K ) = Z( JLAM ) + DLAMDA( K ) = D( JLAM ) + INDXP( K ) = JLAM + JLAM = J + END IF + END IF + GO TO 80 + 100 CONTINUE +* +* Record the last eigenvalue. +* + K = K + 1 + W( K ) = Z( JLAM ) + DLAMDA( K ) = D( JLAM ) + INDXP( K ) = JLAM +* + 110 CONTINUE +* +* Sort the eigenvalues and corresponding eigenvectors into DLAMDA +* and Q2 respectively. The eigenvalues/vectors which were not +* deflated go into the first K slots of DLAMDA and Q2 respectively, +* while those which were deflated go into the last N - K slots. +* + IF( ICOMPQ.EQ.0 ) THEN + DO 120 J = 1, N + JP = INDXP( J ) + DLAMDA( J ) = D( JP ) + PERM( J ) = INDXQ( INDX( JP ) ) + 120 CONTINUE + ELSE + DO 130 J = 1, N + JP = INDXP( J ) + DLAMDA( J ) = D( JP ) + PERM( J ) = INDXQ( INDX( JP ) ) + CALL DCOPY( QSIZ, Q( 1, PERM( J ) ), 1, Q2( 1, J ), 1 ) + 130 CONTINUE + END IF +* +* The deflated eigenvalues and their corresponding vectors go back +* into the last N - K slots of D and Q respectively. +* + IF( K.LT.N ) THEN + IF( ICOMPQ.EQ.0 ) THEN + CALL DCOPY( N-K, DLAMDA( K+1 ), 1, D( K+1 ), 1 ) + ELSE + CALL DCOPY( N-K, DLAMDA( K+1 ), 1, D( K+1 ), 1 ) + CALL DLACPY( 'A', QSIZ, N-K, Q2( 1, K+1 ), LDQ2, + $ Q( 1, K+1 ), LDQ ) + END IF + END IF +* + RETURN +* +* End of DLAED8 +* + END diff --git a/costa/native/external/lapack/dlaed9.f b/costa/native/external/lapack/dlaed9.f new file mode 100644 index 000000000..e18630f33 --- /dev/null +++ b/costa/native/external/lapack/dlaed9.f @@ -0,0 +1,206 @@ + SUBROUTINE DLAED9( K, KSTART, KSTOP, N, D, Q, LDQ, RHO, DLAMDA, W, + $ S, LDS, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, +* Courant Institute, NAG Ltd., and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, K, KSTART, KSTOP, LDQ, LDS, N + DOUBLE PRECISION RHO +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ), S( LDS, * ), + $ W( * ) +* .. +* +* Purpose +* ======= +* +* DLAED9 finds the roots of the secular equation, as defined by the +* values in D, Z, and RHO, between KSTART and KSTOP. It makes the +* appropriate calls to DLAED4 and then stores the new matrix of +* eigenvectors for use in calculating the next level of Z vectors. +* +* Arguments +* ========= +* +* K (input) INTEGER +* The number of terms in the rational function to be solved by +* DLAED4. K >= 0. +* +* KSTART (input) INTEGER +* KSTOP (input) INTEGER +* The updated eigenvalues Lambda(I), KSTART <= I <= KSTOP +* are to be computed. 1 <= KSTART <= KSTOP <= K. +* +* N (input) INTEGER +* The number of rows and columns in the Q matrix. +* N >= K (delation may result in N > K). +* +* D (output) DOUBLE PRECISION array, dimension (N) +* D(I) contains the updated eigenvalues +* for KSTART <= I <= KSTOP. +* +* Q (workspace) DOUBLE PRECISION array, dimension (LDQ,N) +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= max( 1, N ). +* +* RHO (input) DOUBLE PRECISION +* The value of the parameter in the rank one update equation. +* RHO >= 0 required. +* +* DLAMDA (input) DOUBLE PRECISION array, dimension (K) +* The first K elements of this array contain the old roots +* of the deflated updating problem. These are the poles +* of the secular equation. +* +* W (input) DOUBLE PRECISION array, dimension (K) +* The first K elements of this array contain the components +* of the deflation-adjusted updating vector. +* +* S (output) DOUBLE PRECISION array, dimension (LDS, K) +* Will contain the eigenvectors of the repaired matrix which +* will be stored for subsequent Z vector calculation and +* multiplied by the previously accumulated eigenvectors +* to update the system. +* +* LDS (input) INTEGER +* The leading dimension of S. LDS >= max( 1, K ). +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = 1, an eigenvalue did not converge +* +* Further Details +* =============== +* +* Based on contributions by +* Jeff Rutter, Computer Science Division, University of California +* at Berkeley, USA +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION TEMP +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMC3, DNRM2 + EXTERNAL DLAMC3, DNRM2 +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLAED4, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( K.LT.0 ) THEN + INFO = -1 + ELSE IF( KSTART.LT.1 .OR. KSTART.GT.MAX( 1, K ) ) THEN + INFO = -2 + ELSE IF( MAX( 1, KSTOP ).LT.KSTART .OR. KSTOP.GT.MAX( 1, K ) ) + $ THEN + INFO = -3 + ELSE IF( N.LT.K ) THEN + INFO = -4 + ELSE IF( LDQ.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDS.LT.MAX( 1, K ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLAED9', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( K.EQ.0 ) + $ RETURN +* +* Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can +* be computed with high relative accuracy (barring over/underflow). +* This is a problem on machines without a guard digit in +* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). +* The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), +* which on any of these machines zeros out the bottommost +* bit of DLAMDA(I) if it is 1; this makes the subsequent +* subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation +* occurs. On binary machines with a guard digit (almost all +* machines) it does not change DLAMDA(I) at all. On hexadecimal +* and decimal machines with a guard digit, it slightly +* changes the bottommost bits of DLAMDA(I). It does not account +* for hexadecimal or decimal machines without guard digits +* (we know of none). We use a subroutine call to compute +* 2*DLAMBDA(I) to prevent optimizing compilers from eliminating +* this code. +* + DO 10 I = 1, N + DLAMDA( I ) = DLAMC3( DLAMDA( I ), DLAMDA( I ) ) - DLAMDA( I ) + 10 CONTINUE +* + DO 20 J = KSTART, KSTOP + CALL DLAED4( K, J, DLAMDA, W, Q( 1, J ), RHO, D( J ), INFO ) +* +* If the zero finder fails, the computation is terminated. +* + IF( INFO.NE.0 ) + $ GO TO 120 + 20 CONTINUE +* + IF( K.EQ.1 .OR. K.EQ.2 ) THEN + DO 40 I = 1, K + DO 30 J = 1, K + S( J, I ) = Q( J, I ) + 30 CONTINUE + 40 CONTINUE + GO TO 120 + END IF +* +* Compute updated W. +* + CALL DCOPY( K, W, 1, S, 1 ) +* +* Initialize W(I) = Q(I,I) +* + CALL DCOPY( K, Q, LDQ+1, W, 1 ) + DO 70 J = 1, K + DO 50 I = 1, J - 1 + W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) ) + 50 CONTINUE + DO 60 I = J + 1, K + W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) ) + 60 CONTINUE + 70 CONTINUE + DO 80 I = 1, K + W( I ) = SIGN( SQRT( -W( I ) ), S( I, 1 ) ) + 80 CONTINUE +* +* Compute eigenvectors of the modified rank-1 modification. +* + DO 110 J = 1, K + DO 90 I = 1, K + Q( I, J ) = W( I ) / Q( I, J ) + 90 CONTINUE + TEMP = DNRM2( K, Q( 1, J ), 1 ) + DO 100 I = 1, K + S( I, J ) = Q( I, J ) / TEMP + 100 CONTINUE + 110 CONTINUE +* + 120 CONTINUE + RETURN +* +* End of DLAED9 +* + END diff --git a/costa/native/external/lapack/dlaeda.f b/costa/native/external/lapack/dlaeda.f new file mode 100644 index 000000000..96bcbb867 --- /dev/null +++ b/costa/native/external/lapack/dlaeda.f @@ -0,0 +1,218 @@ + SUBROUTINE DLAEDA( N, TLVLS, CURLVL, CURPBM, PRMPTR, PERM, GIVPTR, + $ GIVCOL, GIVNUM, Q, QPTR, Z, ZTEMP, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER CURLVL, CURPBM, INFO, N, TLVLS +* .. +* .. Array Arguments .. + INTEGER GIVCOL( 2, * ), GIVPTR( * ), PERM( * ), + $ PRMPTR( * ), QPTR( * ) + DOUBLE PRECISION GIVNUM( 2, * ), Q( * ), Z( * ), ZTEMP( * ) +* .. +* +* Purpose +* ======= +* +* DLAEDA computes the Z vector corresponding to the merge step in the +* CURLVLth step of the merge process with TLVLS steps for the CURPBMth +* problem. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The dimension of the symmetric tridiagonal matrix. N >= 0. +* +* TLVLS (input) INTEGER +* The total number of merging levels in the overall divide and +* conquer tree. +* +* CURLVL (input) INTEGER +* The current level in the overall merge routine, +* 0 <= curlvl <= tlvls. +* +* CURPBM (input) INTEGER +* The current problem in the current level in the overall +* merge routine (counting from upper left to lower right). +* +* PRMPTR (input) INTEGER array, dimension (N lg N) +* Contains a list of pointers which indicate where in PERM a +* level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) +* indicates the size of the permutation and incidentally the +* size of the full, non-deflated problem. +* +* PERM (input) INTEGER array, dimension (N lg N) +* Contains the permutations (from deflation and sorting) to be +* applied to each eigenblock. +* +* GIVPTR (input) INTEGER array, dimension (N lg N) +* Contains a list of pointers which indicate where in GIVCOL a +* level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) +* indicates the number of Givens rotations. +* +* GIVCOL (input) INTEGER array, dimension (2, N lg N) +* Each pair of numbers indicates a pair of columns to take place +* in a Givens rotation. +* +* GIVNUM (input) DOUBLE PRECISION array, dimension (2, N lg N) +* Each number indicates the S value to be used in the +* corresponding Givens rotation. +* +* Q (input) DOUBLE PRECISION array, dimension (N**2) +* Contains the square eigenblocks from previous levels, the +* starting positions for blocks are given by QPTR. +* +* QPTR (input) INTEGER array, dimension (N+2) +* Contains a list of pointers which indicate where in Q an +* eigenblock is stored. SQRT( QPTR(i+1) - QPTR(i) ) indicates +* the size of the block. +* +* Z (output) DOUBLE PRECISION array, dimension (N) +* On output this vector contains the updating vector (the last +* row of the first sub-eigenvector matrix and the first row of +* the second sub-eigenvector matrix). +* +* ZTEMP (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* Based on contributions by +* Jeff Rutter, Computer Science Division, University of California +* at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0D0, HALF = 0.5D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + INTEGER BSIZ1, BSIZ2, CURR, I, K, MID, PSIZ1, PSIZ2, + $ PTR, ZPTR1 +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEMV, DROT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, INT, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( N.LT.0 ) THEN + INFO = -1 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLAEDA', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine location of first number in second half. +* + MID = N / 2 + 1 +* +* Gather last/first rows of appropriate eigenblocks into center of Z +* + PTR = 1 +* +* Determine location of lowest level subproblem in the full storage +* scheme +* + CURR = PTR + CURPBM*2**CURLVL + 2**( CURLVL-1 ) - 1 +* +* Determine size of these matrices. We add HALF to the value of +* the SQRT in case the machine underestimates one of these square +* roots. +* + BSIZ1 = INT( HALF+SQRT( DBLE( QPTR( CURR+1 )-QPTR( CURR ) ) ) ) + BSIZ2 = INT( HALF+SQRT( DBLE( QPTR( CURR+2 )-QPTR( CURR+1 ) ) ) ) + DO 10 K = 1, MID - BSIZ1 - 1 + Z( K ) = ZERO + 10 CONTINUE + CALL DCOPY( BSIZ1, Q( QPTR( CURR )+BSIZ1-1 ), BSIZ1, + $ Z( MID-BSIZ1 ), 1 ) + CALL DCOPY( BSIZ2, Q( QPTR( CURR+1 ) ), BSIZ2, Z( MID ), 1 ) + DO 20 K = MID + BSIZ2, N + Z( K ) = ZERO + 20 CONTINUE +* +* Loop thru remaining levels 1 -> CURLVL applying the Givens +* rotations and permutation and then multiplying the center matrices +* against the current Z. +* + PTR = 2**TLVLS + 1 + DO 70 K = 1, CURLVL - 1 + CURR = PTR + CURPBM*2**( CURLVL-K ) + 2**( CURLVL-K-1 ) - 1 + PSIZ1 = PRMPTR( CURR+1 ) - PRMPTR( CURR ) + PSIZ2 = PRMPTR( CURR+2 ) - PRMPTR( CURR+1 ) + ZPTR1 = MID - PSIZ1 +* +* Apply Givens at CURR and CURR+1 +* + DO 30 I = GIVPTR( CURR ), GIVPTR( CURR+1 ) - 1 + CALL DROT( 1, Z( ZPTR1+GIVCOL( 1, I )-1 ), 1, + $ Z( ZPTR1+GIVCOL( 2, I )-1 ), 1, GIVNUM( 1, I ), + $ GIVNUM( 2, I ) ) + 30 CONTINUE + DO 40 I = GIVPTR( CURR+1 ), GIVPTR( CURR+2 ) - 1 + CALL DROT( 1, Z( MID-1+GIVCOL( 1, I ) ), 1, + $ Z( MID-1+GIVCOL( 2, I ) ), 1, GIVNUM( 1, I ), + $ GIVNUM( 2, I ) ) + 40 CONTINUE + PSIZ1 = PRMPTR( CURR+1 ) - PRMPTR( CURR ) + PSIZ2 = PRMPTR( CURR+2 ) - PRMPTR( CURR+1 ) + DO 50 I = 0, PSIZ1 - 1 + ZTEMP( I+1 ) = Z( ZPTR1+PERM( PRMPTR( CURR )+I )-1 ) + 50 CONTINUE + DO 60 I = 0, PSIZ2 - 1 + ZTEMP( PSIZ1+I+1 ) = Z( MID+PERM( PRMPTR( CURR+1 )+I )-1 ) + 60 CONTINUE +* +* Multiply Blocks at CURR and CURR+1 +* +* Determine size of these matrices. We add HALF to the value of +* the SQRT in case the machine underestimates one of these +* square roots. +* + BSIZ1 = INT( HALF+SQRT( DBLE( QPTR( CURR+1 )-QPTR( CURR ) ) ) ) + BSIZ2 = INT( HALF+SQRT( DBLE( QPTR( CURR+2 )-QPTR( CURR+ + $ 1 ) ) ) ) + IF( BSIZ1.GT.0 ) THEN + CALL DGEMV( 'T', BSIZ1, BSIZ1, ONE, Q( QPTR( CURR ) ), + $ BSIZ1, ZTEMP( 1 ), 1, ZERO, Z( ZPTR1 ), 1 ) + END IF + CALL DCOPY( PSIZ1-BSIZ1, ZTEMP( BSIZ1+1 ), 1, Z( ZPTR1+BSIZ1 ), + $ 1 ) + IF( BSIZ2.GT.0 ) THEN + CALL DGEMV( 'T', BSIZ2, BSIZ2, ONE, Q( QPTR( CURR+1 ) ), + $ BSIZ2, ZTEMP( PSIZ1+1 ), 1, ZERO, Z( MID ), 1 ) + END IF + CALL DCOPY( PSIZ2-BSIZ2, ZTEMP( PSIZ1+BSIZ2+1 ), 1, + $ Z( MID+BSIZ2 ), 1 ) +* + PTR = PTR + 2**( TLVLS-K ) + 70 CONTINUE +* + RETURN +* +* End of DLAEDA +* + END diff --git a/costa/native/external/lapack/dlaein.f b/costa/native/external/lapack/dlaein.f new file mode 100644 index 000000000..4186b82be --- /dev/null +++ b/costa/native/external/lapack/dlaein.f @@ -0,0 +1,532 @@ + SUBROUTINE DLAEIN( RIGHTV, NOINIT, N, H, LDH, WR, WI, VR, VI, B, + $ LDB, WORK, EPS3, SMLNUM, BIGNUM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + LOGICAL NOINIT, RIGHTV + INTEGER INFO, LDB, LDH, N + DOUBLE PRECISION BIGNUM, EPS3, SMLNUM, WI, WR +* .. +* .. Array Arguments .. + DOUBLE PRECISION B( LDB, * ), H( LDH, * ), VI( * ), VR( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* DLAEIN uses inverse iteration to find a right or left eigenvector +* corresponding to the eigenvalue (WR,WI) of a real upper Hessenberg +* matrix H. +* +* Arguments +* ========= +* +* RIGHTV (input) LOGICAL +* = .TRUE. : compute right eigenvector; +* = .FALSE.: compute left eigenvector. +* +* NOINIT (input) LOGICAL +* = .TRUE. : no initial vector supplied in (VR,VI). +* = .FALSE.: initial vector supplied in (VR,VI). +* +* N (input) INTEGER +* The order of the matrix H. N >= 0. +* +* H (input) DOUBLE PRECISION array, dimension (LDH,N) +* The upper Hessenberg matrix H. +* +* LDH (input) INTEGER +* The leading dimension of the array H. LDH >= max(1,N). +* +* WR (input) DOUBLE PRECISION +* WI (input) DOUBLE PRECISION +* The real and imaginary parts of the eigenvalue of H whose +* corresponding right or left eigenvector is to be computed. +* +* VR (input/output) DOUBLE PRECISION array, dimension (N) +* VI (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, if NOINIT = .FALSE. and WI = 0.0, VR must contain +* a real starting vector for inverse iteration using the real +* eigenvalue WR; if NOINIT = .FALSE. and WI.ne.0.0, VR and VI +* must contain the real and imaginary parts of a complex +* starting vector for inverse iteration using the complex +* eigenvalue (WR,WI); otherwise VR and VI need not be set. +* On exit, if WI = 0.0 (real eigenvalue), VR contains the +* computed real eigenvector; if WI.ne.0.0 (complex eigenvalue), +* VR and VI contain the real and imaginary parts of the +* computed complex eigenvector. The eigenvector is normalized +* so that the component of largest magnitude has magnitude 1; +* here the magnitude of a complex number (x,y) is taken to be +* |x| + |y|. +* VI is not referenced if WI = 0.0. +* +* B (workspace) DOUBLE PRECISION array, dimension (LDB,N) +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= N+1. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* EPS3 (input) DOUBLE PRECISION +* A small machine-dependent value which is used to perturb +* close eigenvalues, and to replace zero pivots. +* +* SMLNUM (input) DOUBLE PRECISION +* A machine-dependent value close to the underflow threshold. +* +* BIGNUM (input) DOUBLE PRECISION +* A machine-dependent value close to the overflow threshold. +* +* INFO (output) INTEGER +* = 0: successful exit +* = 1: inverse iteration did not converge; VR is set to the +* last iterate, and so is VI if WI.ne.0.0. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TENTH + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TENTH = 1.0D-1 ) +* .. +* .. Local Scalars .. + CHARACTER NORMIN, TRANS + INTEGER I, I1, I2, I3, IERR, ITS, J + DOUBLE PRECISION ABSBII, ABSBJJ, EI, EJ, GROWTO, NORM, NRMSML, + $ REC, ROOTN, SCALE, TEMP, VCRIT, VMAX, VNORM, W, + $ W1, X, XI, XR, Y +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DASUM, DLAPY2, DNRM2 + EXTERNAL IDAMAX, DASUM, DLAPY2, DNRM2 +* .. +* .. External Subroutines .. + EXTERNAL DLADIV, DLATRS, DSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, MAX, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* GROWTO is the threshold used in the acceptance test for an +* eigenvector. +* + ROOTN = SQRT( DBLE( N ) ) + GROWTO = TENTH / ROOTN + NRMSML = MAX( ONE, EPS3*ROOTN )*SMLNUM +* +* Form B = H - (WR,WI)*I (except that the subdiagonal elements and +* the imaginary parts of the diagonal elements are not stored). +* + DO 20 J = 1, N + DO 10 I = 1, J - 1 + B( I, J ) = H( I, J ) + 10 CONTINUE + B( J, J ) = H( J, J ) - WR + 20 CONTINUE +* + IF( WI.EQ.ZERO ) THEN +* +* Real eigenvalue. +* + IF( NOINIT ) THEN +* +* Set initial vector. +* + DO 30 I = 1, N + VR( I ) = EPS3 + 30 CONTINUE + ELSE +* +* Scale supplied initial vector. +* + VNORM = DNRM2( N, VR, 1 ) + CALL DSCAL( N, ( EPS3*ROOTN ) / MAX( VNORM, NRMSML ), VR, + $ 1 ) + END IF +* + IF( RIGHTV ) THEN +* +* LU decomposition with partial pivoting of B, replacing zero +* pivots by EPS3. +* + DO 60 I = 1, N - 1 + EI = H( I+1, I ) + IF( ABS( B( I, I ) ).LT.ABS( EI ) ) THEN +* +* Interchange rows and eliminate. +* + X = B( I, I ) / EI + B( I, I ) = EI + DO 40 J = I + 1, N + TEMP = B( I+1, J ) + B( I+1, J ) = B( I, J ) - X*TEMP + B( I, J ) = TEMP + 40 CONTINUE + ELSE +* +* Eliminate without interchange. +* + IF( B( I, I ).EQ.ZERO ) + $ B( I, I ) = EPS3 + X = EI / B( I, I ) + IF( X.NE.ZERO ) THEN + DO 50 J = I + 1, N + B( I+1, J ) = B( I+1, J ) - X*B( I, J ) + 50 CONTINUE + END IF + END IF + 60 CONTINUE + IF( B( N, N ).EQ.ZERO ) + $ B( N, N ) = EPS3 +* + TRANS = 'N' +* + ELSE +* +* UL decomposition with partial pivoting of B, replacing zero +* pivots by EPS3. +* + DO 90 J = N, 2, -1 + EJ = H( J, J-1 ) + IF( ABS( B( J, J ) ).LT.ABS( EJ ) ) THEN +* +* Interchange columns and eliminate. +* + X = B( J, J ) / EJ + B( J, J ) = EJ + DO 70 I = 1, J - 1 + TEMP = B( I, J-1 ) + B( I, J-1 ) = B( I, J ) - X*TEMP + B( I, J ) = TEMP + 70 CONTINUE + ELSE +* +* Eliminate without interchange. +* + IF( B( J, J ).EQ.ZERO ) + $ B( J, J ) = EPS3 + X = EJ / B( J, J ) + IF( X.NE.ZERO ) THEN + DO 80 I = 1, J - 1 + B( I, J-1 ) = B( I, J-1 ) - X*B( I, J ) + 80 CONTINUE + END IF + END IF + 90 CONTINUE + IF( B( 1, 1 ).EQ.ZERO ) + $ B( 1, 1 ) = EPS3 +* + TRANS = 'T' +* + END IF +* + NORMIN = 'N' + DO 110 ITS = 1, N +* +* Solve U*x = scale*v for a right eigenvector +* or U'*x = scale*v for a left eigenvector, +* overwriting x on v. +* + CALL DLATRS( 'Upper', TRANS, 'Nonunit', NORMIN, N, B, LDB, + $ VR, SCALE, WORK, IERR ) + NORMIN = 'Y' +* +* Test for sufficient growth in the norm of v. +* + VNORM = DASUM( N, VR, 1 ) + IF( VNORM.GE.GROWTO*SCALE ) + $ GO TO 120 +* +* Choose new orthogonal starting vector and try again. +* + TEMP = EPS3 / ( ROOTN+ONE ) + VR( 1 ) = EPS3 + DO 100 I = 2, N + VR( I ) = TEMP + 100 CONTINUE + VR( N-ITS+1 ) = VR( N-ITS+1 ) - EPS3*ROOTN + 110 CONTINUE +* +* Failure to find eigenvector in N iterations. +* + INFO = 1 +* + 120 CONTINUE +* +* Normalize eigenvector. +* + I = IDAMAX( N, VR, 1 ) + CALL DSCAL( N, ONE / ABS( VR( I ) ), VR, 1 ) + ELSE +* +* Complex eigenvalue. +* + IF( NOINIT ) THEN +* +* Set initial vector. +* + DO 130 I = 1, N + VR( I ) = EPS3 + VI( I ) = ZERO + 130 CONTINUE + ELSE +* +* Scale supplied initial vector. +* + NORM = DLAPY2( DNRM2( N, VR, 1 ), DNRM2( N, VI, 1 ) ) + REC = ( EPS3*ROOTN ) / MAX( NORM, NRMSML ) + CALL DSCAL( N, REC, VR, 1 ) + CALL DSCAL( N, REC, VI, 1 ) + END IF +* + IF( RIGHTV ) THEN +* +* LU decomposition with partial pivoting of B, replacing zero +* pivots by EPS3. +* +* The imaginary part of the (i,j)-th element of U is stored in +* B(j+1,i). +* + B( 2, 1 ) = -WI + DO 140 I = 2, N + B( I+1, 1 ) = ZERO + 140 CONTINUE +* + DO 170 I = 1, N - 1 + ABSBII = DLAPY2( B( I, I ), B( I+1, I ) ) + EI = H( I+1, I ) + IF( ABSBII.LT.ABS( EI ) ) THEN +* +* Interchange rows and eliminate. +* + XR = B( I, I ) / EI + XI = B( I+1, I ) / EI + B( I, I ) = EI + B( I+1, I ) = ZERO + DO 150 J = I + 1, N + TEMP = B( I+1, J ) + B( I+1, J ) = B( I, J ) - XR*TEMP + B( J+1, I+1 ) = B( J+1, I ) - XI*TEMP + B( I, J ) = TEMP + B( J+1, I ) = ZERO + 150 CONTINUE + B( I+2, I ) = -WI + B( I+1, I+1 ) = B( I+1, I+1 ) - XI*WI + B( I+2, I+1 ) = B( I+2, I+1 ) + XR*WI + ELSE +* +* Eliminate without interchanging rows. +* + IF( ABSBII.EQ.ZERO ) THEN + B( I, I ) = EPS3 + B( I+1, I ) = ZERO + ABSBII = EPS3 + END IF + EI = ( EI / ABSBII ) / ABSBII + XR = B( I, I )*EI + XI = -B( I+1, I )*EI + DO 160 J = I + 1, N + B( I+1, J ) = B( I+1, J ) - XR*B( I, J ) + + $ XI*B( J+1, I ) + B( J+1, I+1 ) = -XR*B( J+1, I ) - XI*B( I, J ) + 160 CONTINUE + B( I+2, I+1 ) = B( I+2, I+1 ) - WI + END IF +* +* Compute 1-norm of offdiagonal elements of i-th row. +* + WORK( I ) = DASUM( N-I, B( I, I+1 ), LDB ) + + $ DASUM( N-I, B( I+2, I ), 1 ) + 170 CONTINUE + IF( B( N, N ).EQ.ZERO .AND. B( N+1, N ).EQ.ZERO ) + $ B( N, N ) = EPS3 + WORK( N ) = ZERO +* + I1 = N + I2 = 1 + I3 = -1 + ELSE +* +* UL decomposition with partial pivoting of conjg(B), +* replacing zero pivots by EPS3. +* +* The imaginary part of the (i,j)-th element of U is stored in +* B(j+1,i). +* + B( N+1, N ) = WI + DO 180 J = 1, N - 1 + B( N+1, J ) = ZERO + 180 CONTINUE +* + DO 210 J = N, 2, -1 + EJ = H( J, J-1 ) + ABSBJJ = DLAPY2( B( J, J ), B( J+1, J ) ) + IF( ABSBJJ.LT.ABS( EJ ) ) THEN +* +* Interchange columns and eliminate +* + XR = B( J, J ) / EJ + XI = B( J+1, J ) / EJ + B( J, J ) = EJ + B( J+1, J ) = ZERO + DO 190 I = 1, J - 1 + TEMP = B( I, J-1 ) + B( I, J-1 ) = B( I, J ) - XR*TEMP + B( J, I ) = B( J+1, I ) - XI*TEMP + B( I, J ) = TEMP + B( J+1, I ) = ZERO + 190 CONTINUE + B( J+1, J-1 ) = WI + B( J-1, J-1 ) = B( J-1, J-1 ) + XI*WI + B( J, J-1 ) = B( J, J-1 ) - XR*WI + ELSE +* +* Eliminate without interchange. +* + IF( ABSBJJ.EQ.ZERO ) THEN + B( J, J ) = EPS3 + B( J+1, J ) = ZERO + ABSBJJ = EPS3 + END IF + EJ = ( EJ / ABSBJJ ) / ABSBJJ + XR = B( J, J )*EJ + XI = -B( J+1, J )*EJ + DO 200 I = 1, J - 1 + B( I, J-1 ) = B( I, J-1 ) - XR*B( I, J ) + + $ XI*B( J+1, I ) + B( J, I ) = -XR*B( J+1, I ) - XI*B( I, J ) + 200 CONTINUE + B( J, J-1 ) = B( J, J-1 ) + WI + END IF +* +* Compute 1-norm of offdiagonal elements of j-th column. +* + WORK( J ) = DASUM( J-1, B( 1, J ), 1 ) + + $ DASUM( J-1, B( J+1, 1 ), LDB ) + 210 CONTINUE + IF( B( 1, 1 ).EQ.ZERO .AND. B( 2, 1 ).EQ.ZERO ) + $ B( 1, 1 ) = EPS3 + WORK( 1 ) = ZERO +* + I1 = 1 + I2 = N + I3 = 1 + END IF +* + DO 270 ITS = 1, N + SCALE = ONE + VMAX = ONE + VCRIT = BIGNUM +* +* Solve U*(xr,xi) = scale*(vr,vi) for a right eigenvector, +* or U'*(xr,xi) = scale*(vr,vi) for a left eigenvector, +* overwriting (xr,xi) on (vr,vi). +* + DO 250 I = I1, I2, I3 +* + IF( WORK( I ).GT.VCRIT ) THEN + REC = ONE / VMAX + CALL DSCAL( N, REC, VR, 1 ) + CALL DSCAL( N, REC, VI, 1 ) + SCALE = SCALE*REC + VMAX = ONE + VCRIT = BIGNUM + END IF +* + XR = VR( I ) + XI = VI( I ) + IF( RIGHTV ) THEN + DO 220 J = I + 1, N + XR = XR - B( I, J )*VR( J ) + B( J+1, I )*VI( J ) + XI = XI - B( I, J )*VI( J ) - B( J+1, I )*VR( J ) + 220 CONTINUE + ELSE + DO 230 J = 1, I - 1 + XR = XR - B( J, I )*VR( J ) + B( I+1, J )*VI( J ) + XI = XI - B( J, I )*VI( J ) - B( I+1, J )*VR( J ) + 230 CONTINUE + END IF +* + W = ABS( B( I, I ) ) + ABS( B( I+1, I ) ) + IF( W.GT.SMLNUM ) THEN + IF( W.LT.ONE ) THEN + W1 = ABS( XR ) + ABS( XI ) + IF( W1.GT.W*BIGNUM ) THEN + REC = ONE / W1 + CALL DSCAL( N, REC, VR, 1 ) + CALL DSCAL( N, REC, VI, 1 ) + XR = VR( I ) + XI = VI( I ) + SCALE = SCALE*REC + VMAX = VMAX*REC + END IF + END IF +* +* Divide by diagonal element of B. +* + CALL DLADIV( XR, XI, B( I, I ), B( I+1, I ), VR( I ), + $ VI( I ) ) + VMAX = MAX( ABS( VR( I ) )+ABS( VI( I ) ), VMAX ) + VCRIT = BIGNUM / VMAX + ELSE + DO 240 J = 1, N + VR( J ) = ZERO + VI( J ) = ZERO + 240 CONTINUE + VR( I ) = ONE + VI( I ) = ONE + SCALE = ZERO + VMAX = ONE + VCRIT = BIGNUM + END IF + 250 CONTINUE +* +* Test for sufficient growth in the norm of (VR,VI). +* + VNORM = DASUM( N, VR, 1 ) + DASUM( N, VI, 1 ) + IF( VNORM.GE.GROWTO*SCALE ) + $ GO TO 280 +* +* Choose a new orthogonal starting vector and try again. +* + Y = EPS3 / ( ROOTN+ONE ) + VR( 1 ) = EPS3 + VI( 1 ) = ZERO +* + DO 260 I = 2, N + VR( I ) = Y + VI( I ) = ZERO + 260 CONTINUE + VR( N-ITS+1 ) = VR( N-ITS+1 ) - EPS3*ROOTN + 270 CONTINUE +* +* Failure to find eigenvector in N iterations +* + INFO = 1 +* + 280 CONTINUE +* +* Normalize eigenvector. +* + VNORM = ZERO + DO 290 I = 1, N + VNORM = MAX( VNORM, ABS( VR( I ) )+ABS( VI( I ) ) ) + 290 CONTINUE + CALL DSCAL( N, ONE / VNORM, VR, 1 ) + CALL DSCAL( N, ONE / VNORM, VI, 1 ) +* + END IF +* + RETURN +* +* End of DLAEIN +* + END diff --git a/costa/native/external/lapack/dlaev2.f b/costa/native/external/lapack/dlaev2.f new file mode 100644 index 000000000..7d7219de4 --- /dev/null +++ b/costa/native/external/lapack/dlaev2.f @@ -0,0 +1,170 @@ + SUBROUTINE DLAEV2( A, B, C, RT1, RT2, CS1, SN1 ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + DOUBLE PRECISION A, B, C, CS1, RT1, RT2, SN1 +* .. +* +* Purpose +* ======= +* +* DLAEV2 computes the eigendecomposition of a 2-by-2 symmetric matrix +* [ A B ] +* [ B C ]. +* On return, RT1 is the eigenvalue of larger absolute value, RT2 is the +* eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right +* eigenvector for RT1, giving the decomposition +* +* [ CS1 SN1 ] [ A B ] [ CS1 -SN1 ] = [ RT1 0 ] +* [-SN1 CS1 ] [ B C ] [ SN1 CS1 ] [ 0 RT2 ]. +* +* Arguments +* ========= +* +* A (input) DOUBLE PRECISION +* The (1,1) element of the 2-by-2 matrix. +* +* B (input) DOUBLE PRECISION +* The (1,2) element and the conjugate of the (2,1) element of +* the 2-by-2 matrix. +* +* C (input) DOUBLE PRECISION +* The (2,2) element of the 2-by-2 matrix. +* +* RT1 (output) DOUBLE PRECISION +* The eigenvalue of larger absolute value. +* +* RT2 (output) DOUBLE PRECISION +* The eigenvalue of smaller absolute value. +* +* CS1 (output) DOUBLE PRECISION +* SN1 (output) DOUBLE PRECISION +* The vector (CS1, SN1) is a unit right eigenvector for RT1. +* +* Further Details +* =============== +* +* RT1 is accurate to a few ulps barring over/underflow. +* +* RT2 may be inaccurate if there is massive cancellation in the +* determinant A*C-B*B; higher precision or correctly rounded or +* correctly truncated arithmetic would be needed to compute RT2 +* accurately in all cases. +* +* CS1 and SN1 are accurate to a few ulps barring over/underflow. +* +* Overflow is possible only if RT1 is within a factor of 5 of overflow. +* Underflow is harmless if the input data is 0 or exceeds +* underflow_threshold / macheps. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D0 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) + DOUBLE PRECISION HALF + PARAMETER ( HALF = 0.5D0 ) +* .. +* .. Local Scalars .. + INTEGER SGN1, SGN2 + DOUBLE PRECISION AB, ACMN, ACMX, ACS, ADF, CS, CT, DF, RT, SM, + $ TB, TN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* +* Compute the eigenvalues +* + SM = A + C + DF = A - C + ADF = ABS( DF ) + TB = B + B + AB = ABS( TB ) + IF( ABS( A ).GT.ABS( C ) ) THEN + ACMX = A + ACMN = C + ELSE + ACMX = C + ACMN = A + END IF + IF( ADF.GT.AB ) THEN + RT = ADF*SQRT( ONE+( AB / ADF )**2 ) + ELSE IF( ADF.LT.AB ) THEN + RT = AB*SQRT( ONE+( ADF / AB )**2 ) + ELSE +* +* Includes case AB=ADF=0 +* + RT = AB*SQRT( TWO ) + END IF + IF( SM.LT.ZERO ) THEN + RT1 = HALF*( SM-RT ) + SGN1 = -1 +* +* Order of execution important. +* To get fully accurate smaller eigenvalue, +* next line needs to be executed in higher precision. +* + RT2 = ( ACMX / RT1 )*ACMN - ( B / RT1 )*B + ELSE IF( SM.GT.ZERO ) THEN + RT1 = HALF*( SM+RT ) + SGN1 = 1 +* +* Order of execution important. +* To get fully accurate smaller eigenvalue, +* next line needs to be executed in higher precision. +* + RT2 = ( ACMX / RT1 )*ACMN - ( B / RT1 )*B + ELSE +* +* Includes case RT1 = RT2 = 0 +* + RT1 = HALF*RT + RT2 = -HALF*RT + SGN1 = 1 + END IF +* +* Compute the eigenvector +* + IF( DF.GE.ZERO ) THEN + CS = DF + RT + SGN2 = 1 + ELSE + CS = DF - RT + SGN2 = -1 + END IF + ACS = ABS( CS ) + IF( ACS.GT.AB ) THEN + CT = -TB / CS + SN1 = ONE / SQRT( ONE+CT*CT ) + CS1 = CT*SN1 + ELSE + IF( AB.EQ.ZERO ) THEN + CS1 = ONE + SN1 = ZERO + ELSE + TN = -CS / TB + CS1 = ONE / SQRT( ONE+TN*TN ) + SN1 = TN*CS1 + END IF + END IF + IF( SGN1.EQ.SGN2 ) THEN + TN = CS1 + CS1 = -SN1 + SN1 = TN + END IF + RETURN +* +* End of DLAEV2 +* + END diff --git a/costa/native/external/lapack/dlaexc.f b/costa/native/external/lapack/dlaexc.f new file mode 100644 index 000000000..05f80dcc6 --- /dev/null +++ b/costa/native/external/lapack/dlaexc.f @@ -0,0 +1,355 @@ + SUBROUTINE DLAEXC( WANTQ, N, T, LDT, Q, LDQ, J1, N1, N2, WORK, + $ INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + LOGICAL WANTQ + INTEGER INFO, J1, LDQ, LDT, N, N1, N2 +* .. +* .. Array Arguments .. + DOUBLE PRECISION Q( LDQ, * ), T( LDT, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DLAEXC swaps adjacent diagonal blocks T11 and T22 of order 1 or 2 in +* an upper quasi-triangular matrix T by an orthogonal similarity +* transformation. +* +* T must be in Schur canonical form, that is, block upper triangular +* with 1-by-1 and 2-by-2 diagonal blocks; each 2-by-2 diagonal block +* has its diagonal elemnts equal and its off-diagonal elements of +* opposite sign. +* +* Arguments +* ========= +* +* WANTQ (input) LOGICAL +* = .TRUE. : accumulate the transformation in the matrix Q; +* = .FALSE.: do not accumulate the transformation. +* +* N (input) INTEGER +* The order of the matrix T. N >= 0. +* +* T (input/output) DOUBLE PRECISION array, dimension (LDT,N) +* On entry, the upper quasi-triangular matrix T, in Schur +* canonical form. +* On exit, the updated matrix T, again in Schur canonical form. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= max(1,N). +* +* Q (input/output) DOUBLE PRECISION array, dimension (LDQ,N) +* On entry, if WANTQ is .TRUE., the orthogonal matrix Q. +* On exit, if WANTQ is .TRUE., the updated matrix Q. +* If WANTQ is .FALSE., Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. +* LDQ >= 1; and if WANTQ is .TRUE., LDQ >= N. +* +* J1 (input) INTEGER +* The index of the first row of the first block T11. +* +* N1 (input) INTEGER +* The order of the first block T11. N1 = 0, 1 or 2. +* +* N2 (input) INTEGER +* The order of the second block T22. N2 = 0, 1 or 2. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* = 1: the transformed matrix T would be too far from Schur +* form; the blocks are not swapped and T and Q are +* unchanged. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION TEN + PARAMETER ( TEN = 1.0D+1 ) + INTEGER LDD, LDX + PARAMETER ( LDD = 4, LDX = 2 ) +* .. +* .. Local Scalars .. + INTEGER IERR, J2, J3, J4, K, ND + DOUBLE PRECISION CS, DNORM, EPS, SCALE, SMLNUM, SN, T11, T22, + $ T33, TAU, TAU1, TAU2, TEMP, THRESH, WI1, WI2, + $ WR1, WR2, XNORM +* .. +* .. Local Arrays .. + DOUBLE PRECISION D( LDD, 4 ), U( 3 ), U1( 3 ), U2( 3 ), + $ X( LDX, 2 ) +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL DLAMCH, DLANGE +* .. +* .. External Subroutines .. + EXTERNAL DLACPY, DLANV2, DLARFG, DLARFX, DLARTG, DLASY2, + $ DROT +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.EQ.0 .OR. N1.EQ.0 .OR. N2.EQ.0 ) + $ RETURN + IF( J1+N1.GT.N ) + $ RETURN +* + J2 = J1 + 1 + J3 = J1 + 2 + J4 = J1 + 3 +* + IF( N1.EQ.1 .AND. N2.EQ.1 ) THEN +* +* Swap two 1-by-1 blocks. +* + T11 = T( J1, J1 ) + T22 = T( J2, J2 ) +* +* Determine the transformation to perform the interchange. +* + CALL DLARTG( T( J1, J2 ), T22-T11, CS, SN, TEMP ) +* +* Apply transformation to the matrix T. +* + IF( J3.LE.N ) + $ CALL DROT( N-J1-1, T( J1, J3 ), LDT, T( J2, J3 ), LDT, CS, + $ SN ) + CALL DROT( J1-1, T( 1, J1 ), 1, T( 1, J2 ), 1, CS, SN ) +* + T( J1, J1 ) = T22 + T( J2, J2 ) = T11 +* + IF( WANTQ ) THEN +* +* Accumulate transformation in the matrix Q. +* + CALL DROT( N, Q( 1, J1 ), 1, Q( 1, J2 ), 1, CS, SN ) + END IF +* + ELSE +* +* Swapping involves at least one 2-by-2 block. +* +* Copy the diagonal block of order N1+N2 to the local array D +* and compute its norm. +* + ND = N1 + N2 + CALL DLACPY( 'Full', ND, ND, T( J1, J1 ), LDT, D, LDD ) + DNORM = DLANGE( 'Max', ND, ND, D, LDD, WORK ) +* +* Compute machine-dependent threshold for test for accepting +* swap. +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) / EPS + THRESH = MAX( TEN*EPS*DNORM, SMLNUM ) +* +* Solve T11*X - X*T22 = scale*T12 for X. +* + CALL DLASY2( .FALSE., .FALSE., -1, N1, N2, D, LDD, + $ D( N1+1, N1+1 ), LDD, D( 1, N1+1 ), LDD, SCALE, X, + $ LDX, XNORM, IERR ) +* +* Swap the adjacent diagonal blocks. +* + K = N1 + N1 + N2 - 3 + GO TO ( 10, 20, 30 )K +* + 10 CONTINUE +* +* N1 = 1, N2 = 2: generate elementary reflector H so that: +* +* ( scale, X11, X12 ) H = ( 0, 0, * ) +* + U( 1 ) = SCALE + U( 2 ) = X( 1, 1 ) + U( 3 ) = X( 1, 2 ) + CALL DLARFG( 3, U( 3 ), U, 1, TAU ) + U( 3 ) = ONE + T11 = T( J1, J1 ) +* +* Perform swap provisionally on diagonal block in D. +* + CALL DLARFX( 'L', 3, 3, U, TAU, D, LDD, WORK ) + CALL DLARFX( 'R', 3, 3, U, TAU, D, LDD, WORK ) +* +* Test whether to reject swap. +* + IF( MAX( ABS( D( 3, 1 ) ), ABS( D( 3, 2 ) ), ABS( D( 3, + $ 3 )-T11 ) ).GT.THRESH )GO TO 50 +* +* Accept swap: apply transformation to the entire matrix T. +* + CALL DLARFX( 'L', 3, N-J1+1, U, TAU, T( J1, J1 ), LDT, WORK ) + CALL DLARFX( 'R', J2, 3, U, TAU, T( 1, J1 ), LDT, WORK ) +* + T( J3, J1 ) = ZERO + T( J3, J2 ) = ZERO + T( J3, J3 ) = T11 +* + IF( WANTQ ) THEN +* +* Accumulate transformation in the matrix Q. +* + CALL DLARFX( 'R', N, 3, U, TAU, Q( 1, J1 ), LDQ, WORK ) + END IF + GO TO 40 +* + 20 CONTINUE +* +* N1 = 2, N2 = 1: generate elementary reflector H so that: +* +* H ( -X11 ) = ( * ) +* ( -X21 ) = ( 0 ) +* ( scale ) = ( 0 ) +* + U( 1 ) = -X( 1, 1 ) + U( 2 ) = -X( 2, 1 ) + U( 3 ) = SCALE + CALL DLARFG( 3, U( 1 ), U( 2 ), 1, TAU ) + U( 1 ) = ONE + T33 = T( J3, J3 ) +* +* Perform swap provisionally on diagonal block in D. +* + CALL DLARFX( 'L', 3, 3, U, TAU, D, LDD, WORK ) + CALL DLARFX( 'R', 3, 3, U, TAU, D, LDD, WORK ) +* +* Test whether to reject swap. +* + IF( MAX( ABS( D( 2, 1 ) ), ABS( D( 3, 1 ) ), ABS( D( 1, + $ 1 )-T33 ) ).GT.THRESH )GO TO 50 +* +* Accept swap: apply transformation to the entire matrix T. +* + CALL DLARFX( 'R', J3, 3, U, TAU, T( 1, J1 ), LDT, WORK ) + CALL DLARFX( 'L', 3, N-J1, U, TAU, T( J1, J2 ), LDT, WORK ) +* + T( J1, J1 ) = T33 + T( J2, J1 ) = ZERO + T( J3, J1 ) = ZERO +* + IF( WANTQ ) THEN +* +* Accumulate transformation in the matrix Q. +* + CALL DLARFX( 'R', N, 3, U, TAU, Q( 1, J1 ), LDQ, WORK ) + END IF + GO TO 40 +* + 30 CONTINUE +* +* N1 = 2, N2 = 2: generate elementary reflectors H(1) and H(2) so +* that: +* +* H(2) H(1) ( -X11 -X12 ) = ( * * ) +* ( -X21 -X22 ) ( 0 * ) +* ( scale 0 ) ( 0 0 ) +* ( 0 scale ) ( 0 0 ) +* + U1( 1 ) = -X( 1, 1 ) + U1( 2 ) = -X( 2, 1 ) + U1( 3 ) = SCALE + CALL DLARFG( 3, U1( 1 ), U1( 2 ), 1, TAU1 ) + U1( 1 ) = ONE +* + TEMP = -TAU1*( X( 1, 2 )+U1( 2 )*X( 2, 2 ) ) + U2( 1 ) = -TEMP*U1( 2 ) - X( 2, 2 ) + U2( 2 ) = -TEMP*U1( 3 ) + U2( 3 ) = SCALE + CALL DLARFG( 3, U2( 1 ), U2( 2 ), 1, TAU2 ) + U2( 1 ) = ONE +* +* Perform swap provisionally on diagonal block in D. +* + CALL DLARFX( 'L', 3, 4, U1, TAU1, D, LDD, WORK ) + CALL DLARFX( 'R', 4, 3, U1, TAU1, D, LDD, WORK ) + CALL DLARFX( 'L', 3, 4, U2, TAU2, D( 2, 1 ), LDD, WORK ) + CALL DLARFX( 'R', 4, 3, U2, TAU2, D( 1, 2 ), LDD, WORK ) +* +* Test whether to reject swap. +* + IF( MAX( ABS( D( 3, 1 ) ), ABS( D( 3, 2 ) ), ABS( D( 4, 1 ) ), + $ ABS( D( 4, 2 ) ) ).GT.THRESH )GO TO 50 +* +* Accept swap: apply transformation to the entire matrix T. +* + CALL DLARFX( 'L', 3, N-J1+1, U1, TAU1, T( J1, J1 ), LDT, WORK ) + CALL DLARFX( 'R', J4, 3, U1, TAU1, T( 1, J1 ), LDT, WORK ) + CALL DLARFX( 'L', 3, N-J1+1, U2, TAU2, T( J2, J1 ), LDT, WORK ) + CALL DLARFX( 'R', J4, 3, U2, TAU2, T( 1, J2 ), LDT, WORK ) +* + T( J3, J1 ) = ZERO + T( J3, J2 ) = ZERO + T( J4, J1 ) = ZERO + T( J4, J2 ) = ZERO +* + IF( WANTQ ) THEN +* +* Accumulate transformation in the matrix Q. +* + CALL DLARFX( 'R', N, 3, U1, TAU1, Q( 1, J1 ), LDQ, WORK ) + CALL DLARFX( 'R', N, 3, U2, TAU2, Q( 1, J2 ), LDQ, WORK ) + END IF +* + 40 CONTINUE +* + IF( N2.EQ.2 ) THEN +* +* Standardize new 2-by-2 block T11 +* + CALL DLANV2( T( J1, J1 ), T( J1, J2 ), T( J2, J1 ), + $ T( J2, J2 ), WR1, WI1, WR2, WI2, CS, SN ) + CALL DROT( N-J1-1, T( J1, J1+2 ), LDT, T( J2, J1+2 ), LDT, + $ CS, SN ) + CALL DROT( J1-1, T( 1, J1 ), 1, T( 1, J2 ), 1, CS, SN ) + IF( WANTQ ) + $ CALL DROT( N, Q( 1, J1 ), 1, Q( 1, J2 ), 1, CS, SN ) + END IF +* + IF( N1.EQ.2 ) THEN +* +* Standardize new 2-by-2 block T22 +* + J3 = J1 + N2 + J4 = J3 + 1 + CALL DLANV2( T( J3, J3 ), T( J3, J4 ), T( J4, J3 ), + $ T( J4, J4 ), WR1, WI1, WR2, WI2, CS, SN ) + IF( J3+2.LE.N ) + $ CALL DROT( N-J3-1, T( J3, J3+2 ), LDT, T( J4, J3+2 ), + $ LDT, CS, SN ) + CALL DROT( J3-1, T( 1, J3 ), 1, T( 1, J4 ), 1, CS, SN ) + IF( WANTQ ) + $ CALL DROT( N, Q( 1, J3 ), 1, Q( 1, J4 ), 1, CS, SN ) + END IF +* + END IF + RETURN +* +* Exit with INFO = 1 if swap was rejected. +* + 50 CONTINUE + INFO = 1 + RETURN +* +* End of DLAEXC +* + END diff --git a/costa/native/external/lapack/dlag2.f b/costa/native/external/lapack/dlag2.f new file mode 100644 index 000000000..3cce61bb1 --- /dev/null +++ b/costa/native/external/lapack/dlag2.f @@ -0,0 +1,301 @@ + SUBROUTINE DLAG2( A, LDA, B, LDB, SAFMIN, SCALE1, SCALE2, WR1, + $ WR2, WI ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + INTEGER LDA, LDB + DOUBLE PRECISION SAFMIN, SCALE1, SCALE2, WI, WR1, WR2 +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* DLAG2 computes the eigenvalues of a 2 x 2 generalized eigenvalue +* problem A - w B, with scaling as necessary to avoid over-/underflow. +* +* The scaling factor "s" results in a modified eigenvalue equation +* +* s A - w B +* +* where s is a non-negative scaling factor chosen so that w, w B, +* and s A do not overflow and, if possible, do not underflow, either. +* +* Arguments +* ========= +* +* A (input) DOUBLE PRECISION array, dimension (LDA, 2) +* On entry, the 2 x 2 matrix A. It is assumed that its 1-norm +* is less than 1/SAFMIN. Entries less than +* sqrt(SAFMIN)*norm(A) are subject to being treated as zero. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= 2. +* +* B (input) DOUBLE PRECISION array, dimension (LDB, 2) +* On entry, the 2 x 2 upper triangular matrix B. It is +* assumed that the one-norm of B is less than 1/SAFMIN. The +* diagonals should be at least sqrt(SAFMIN) times the largest +* element of B (in absolute value); if a diagonal is smaller +* than that, then +/- sqrt(SAFMIN) will be used instead of +* that diagonal. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= 2. +* +* SAFMIN (input) DOUBLE PRECISION +* The smallest positive number s.t. 1/SAFMIN does not +* overflow. (This should always be DLAMCH('S') -- it is an +* argument in order to avoid having to call DLAMCH frequently.) +* +* SCALE1 (output) DOUBLE PRECISION +* A scaling factor used to avoid over-/underflow in the +* eigenvalue equation which defines the first eigenvalue. If +* the eigenvalues are complex, then the eigenvalues are +* ( WR1 +/- WI i ) / SCALE1 (which may lie outside the +* exponent range of the machine), SCALE1=SCALE2, and SCALE1 +* will always be positive. If the eigenvalues are real, then +* the first (real) eigenvalue is WR1 / SCALE1 , but this may +* overflow or underflow, and in fact, SCALE1 may be zero or +* less than the underflow threshhold if the exact eigenvalue +* is sufficiently large. +* +* SCALE2 (output) DOUBLE PRECISION +* A scaling factor used to avoid over-/underflow in the +* eigenvalue equation which defines the second eigenvalue. If +* the eigenvalues are complex, then SCALE2=SCALE1. If the +* eigenvalues are real, then the second (real) eigenvalue is +* WR2 / SCALE2 , but this may overflow or underflow, and in +* fact, SCALE2 may be zero or less than the underflow +* threshhold if the exact eigenvalue is sufficiently large. +* +* WR1 (output) DOUBLE PRECISION +* If the eigenvalue is real, then WR1 is SCALE1 times the +* eigenvalue closest to the (2,2) element of A B**(-1). If the +* eigenvalue is complex, then WR1=WR2 is SCALE1 times the real +* part of the eigenvalues. +* +* WR2 (output) DOUBLE PRECISION +* If the eigenvalue is real, then WR2 is SCALE2 times the +* other eigenvalue. If the eigenvalue is complex, then +* WR1=WR2 is SCALE1 times the real part of the eigenvalues. +* +* WI (output) DOUBLE PRECISION +* If the eigenvalue is real, then WI is zero. If the +* eigenvalue is complex, then WI is SCALE1 times the imaginary +* part of the eigenvalues. WI will always be non-negative. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0 ) + DOUBLE PRECISION HALF + PARAMETER ( HALF = ONE / TWO ) + DOUBLE PRECISION FUZZY1 + PARAMETER ( FUZZY1 = ONE+1.0D-5 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION A11, A12, A21, A22, ABI22, ANORM, AS11, AS12, + $ AS22, ASCALE, B11, B12, B22, BINV11, BINV22, + $ BMIN, BNORM, BSCALE, BSIZE, C1, C2, C3, C4, C5, + $ DIFF, DISCR, PP, QQ, R, RTMAX, RTMIN, S1, S2, + $ SAFMAX, SHIFT, SS, SUM, WABS, WBIG, WDET, + $ WSCALE, WSIZE, WSMALL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SIGN, SQRT +* .. +* .. Executable Statements .. +* + RTMIN = SQRT( SAFMIN ) + RTMAX = ONE / RTMIN + SAFMAX = ONE / SAFMIN +* +* Scale A +* + ANORM = MAX( ABS( A( 1, 1 ) )+ABS( A( 2, 1 ) ), + $ ABS( A( 1, 2 ) )+ABS( A( 2, 2 ) ), SAFMIN ) + ASCALE = ONE / ANORM + A11 = ASCALE*A( 1, 1 ) + A21 = ASCALE*A( 2, 1 ) + A12 = ASCALE*A( 1, 2 ) + A22 = ASCALE*A( 2, 2 ) +* +* Perturb B if necessary to insure non-singularity +* + B11 = B( 1, 1 ) + B12 = B( 1, 2 ) + B22 = B( 2, 2 ) + BMIN = RTMIN*MAX( ABS( B11 ), ABS( B12 ), ABS( B22 ), RTMIN ) + IF( ABS( B11 ).LT.BMIN ) + $ B11 = SIGN( BMIN, B11 ) + IF( ABS( B22 ).LT.BMIN ) + $ B22 = SIGN( BMIN, B22 ) +* +* Scale B +* + BNORM = MAX( ABS( B11 ), ABS( B12 )+ABS( B22 ), SAFMIN ) + BSIZE = MAX( ABS( B11 ), ABS( B22 ) ) + BSCALE = ONE / BSIZE + B11 = B11*BSCALE + B12 = B12*BSCALE + B22 = B22*BSCALE +* +* Compute larger eigenvalue by method described by C. van Loan +* +* ( AS is A shifted by -SHIFT*B ) +* + BINV11 = ONE / B11 + BINV22 = ONE / B22 + S1 = A11*BINV11 + S2 = A22*BINV22 + IF( ABS( S1 ).LE.ABS( S2 ) ) THEN + AS12 = A12 - S1*B12 + AS22 = A22 - S1*B22 + SS = A21*( BINV11*BINV22 ) + ABI22 = AS22*BINV22 - SS*B12 + PP = HALF*ABI22 + SHIFT = S1 + ELSE + AS12 = A12 - S2*B12 + AS11 = A11 - S2*B11 + SS = A21*( BINV11*BINV22 ) + ABI22 = -SS*B12 + PP = HALF*( AS11*BINV11+ABI22 ) + SHIFT = S2 + END IF + QQ = SS*AS12 + IF( ABS( PP*RTMIN ).GE.ONE ) THEN + DISCR = ( RTMIN*PP )**2 + QQ*SAFMIN + R = SQRT( ABS( DISCR ) )*RTMAX + ELSE + IF( PP**2+ABS( QQ ).LE.SAFMIN ) THEN + DISCR = ( RTMAX*PP )**2 + QQ*SAFMAX + R = SQRT( ABS( DISCR ) )*RTMIN + ELSE + DISCR = PP**2 + QQ + R = SQRT( ABS( DISCR ) ) + END IF + END IF +* +* Note: the test of R in the following IF is to cover the case when +* DISCR is small and negative and is flushed to zero during +* the calculation of R. On machines which have a consistent +* flush-to-zero threshhold and handle numbers above that +* threshhold correctly, it would not be necessary. +* + IF( DISCR.GE.ZERO .OR. R.EQ.ZERO ) THEN + SUM = PP + SIGN( R, PP ) + DIFF = PP - SIGN( R, PP ) + WBIG = SHIFT + SUM +* +* Compute smaller eigenvalue +* + WSMALL = SHIFT + DIFF + IF( HALF*ABS( WBIG ).GT.MAX( ABS( WSMALL ), SAFMIN ) ) THEN + WDET = ( A11*A22-A12*A21 )*( BINV11*BINV22 ) + WSMALL = WDET / WBIG + END IF +* +* Choose (real) eigenvalue closest to 2,2 element of A*B**(-1) +* for WR1. +* + IF( PP.GT.ABI22 ) THEN + WR1 = MIN( WBIG, WSMALL ) + WR2 = MAX( WBIG, WSMALL ) + ELSE + WR1 = MAX( WBIG, WSMALL ) + WR2 = MIN( WBIG, WSMALL ) + END IF + WI = ZERO + ELSE +* +* Complex eigenvalues +* + WR1 = SHIFT + PP + WR2 = WR1 + WI = R + END IF +* +* Further scaling to avoid underflow and overflow in computing +* SCALE1 and overflow in computing w*B. +* +* This scale factor (WSCALE) is bounded from above using C1 and C2, +* and from below using C3 and C4. +* C1 implements the condition s A must never overflow. +* C2 implements the condition w B must never overflow. +* C3, with C2, +* implement the condition that s A - w B must never overflow. +* C4 implements the condition s should not underflow. +* C5 implements the condition max(s,|w|) should be at least 2. +* + C1 = BSIZE*( SAFMIN*MAX( ONE, ASCALE ) ) + C2 = SAFMIN*MAX( ONE, BNORM ) + C3 = BSIZE*SAFMIN + IF( ASCALE.LE.ONE .AND. BSIZE.LE.ONE ) THEN + C4 = MIN( ONE, ( ASCALE / SAFMIN )*BSIZE ) + ELSE + C4 = ONE + END IF + IF( ASCALE.LE.ONE .OR. BSIZE.LE.ONE ) THEN + C5 = MIN( ONE, ASCALE*BSIZE ) + ELSE + C5 = ONE + END IF +* +* Scale first eigenvalue +* + WABS = ABS( WR1 ) + ABS( WI ) + WSIZE = MAX( SAFMIN, C1, FUZZY1*( WABS*C2+C3 ), + $ MIN( C4, HALF*MAX( WABS, C5 ) ) ) + IF( WSIZE.NE.ONE ) THEN + WSCALE = ONE / WSIZE + IF( WSIZE.GT.ONE ) THEN + SCALE1 = ( MAX( ASCALE, BSIZE )*WSCALE )* + $ MIN( ASCALE, BSIZE ) + ELSE + SCALE1 = ( MIN( ASCALE, BSIZE )*WSCALE )* + $ MAX( ASCALE, BSIZE ) + END IF + WR1 = WR1*WSCALE + IF( WI.NE.ZERO ) THEN + WI = WI*WSCALE + WR2 = WR1 + SCALE2 = SCALE1 + END IF + ELSE + SCALE1 = ASCALE*BSIZE + SCALE2 = SCALE1 + END IF +* +* Scale second eigenvalue (if real) +* + IF( WI.EQ.ZERO ) THEN + WSIZE = MAX( SAFMIN, C1, FUZZY1*( ABS( WR2 )*C2+C3 ), + $ MIN( C4, HALF*MAX( ABS( WR2 ), C5 ) ) ) + IF( WSIZE.NE.ONE ) THEN + WSCALE = ONE / WSIZE + IF( WSIZE.GT.ONE ) THEN + SCALE2 = ( MAX( ASCALE, BSIZE )*WSCALE )* + $ MIN( ASCALE, BSIZE ) + ELSE + SCALE2 = ( MIN( ASCALE, BSIZE )*WSCALE )* + $ MAX( ASCALE, BSIZE ) + END IF + WR2 = WR2*WSCALE + ELSE + SCALE2 = ASCALE*BSIZE + END IF + END IF +* +* End of DLAG2 +* + RETURN + END diff --git a/costa/native/external/lapack/dlags2.f b/costa/native/external/lapack/dlags2.f new file mode 100644 index 000000000..bf2acdefb --- /dev/null +++ b/costa/native/external/lapack/dlags2.f @@ -0,0 +1,270 @@ + SUBROUTINE DLAGS2( UPPER, A1, A2, A3, B1, B2, B3, CSU, SNU, CSV, + $ SNV, CSQ, SNQ ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + LOGICAL UPPER + DOUBLE PRECISION A1, A2, A3, B1, B2, B3, CSQ, CSU, CSV, SNQ, + $ SNU, SNV +* .. +* +* Purpose +* ======= +* +* DLAGS2 computes 2-by-2 orthogonal matrices U, V and Q, such +* that if ( UPPER ) then +* +* U'*A*Q = U'*( A1 A2 )*Q = ( x 0 ) +* ( 0 A3 ) ( x x ) +* and +* V'*B*Q = V'*( B1 B2 )*Q = ( x 0 ) +* ( 0 B3 ) ( x x ) +* +* or if ( .NOT.UPPER ) then +* +* U'*A*Q = U'*( A1 0 )*Q = ( x x ) +* ( A2 A3 ) ( 0 x ) +* and +* V'*B*Q = V'*( B1 0 )*Q = ( x x ) +* ( B2 B3 ) ( 0 x ) +* +* The rows of the transformed A and B are parallel, where +* +* U = ( CSU SNU ), V = ( CSV SNV ), Q = ( CSQ SNQ ) +* ( -SNU CSU ) ( -SNV CSV ) ( -SNQ CSQ ) +* +* Z' denotes the transpose of Z. +* +* +* Arguments +* ========= +* +* UPPER (input) LOGICAL +* = .TRUE.: the input matrices A and B are upper triangular. +* = .FALSE.: the input matrices A and B are lower triangular. +* +* A1 (input) DOUBLE PRECISION +* A2 (input) DOUBLE PRECISION +* A3 (input) DOUBLE PRECISION +* On entry, A1, A2 and A3 are elements of the input 2-by-2 +* upper (lower) triangular matrix A. +* +* B1 (input) DOUBLE PRECISION +* B2 (input) DOUBLE PRECISION +* B3 (input) DOUBLE PRECISION +* On entry, B1, B2 and B3 are elements of the input 2-by-2 +* upper (lower) triangular matrix B. +* +* CSU (output) DOUBLE PRECISION +* SNU (output) DOUBLE PRECISION +* The desired orthogonal matrix U. +* +* CSV (output) DOUBLE PRECISION +* SNV (output) DOUBLE PRECISION +* The desired orthogonal matrix V. +* +* CSQ (output) DOUBLE PRECISION +* SNQ (output) DOUBLE PRECISION +* The desired orthogonal matrix Q. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION A, AUA11, AUA12, AUA21, AUA22, AVB11, AVB12, + $ AVB21, AVB22, B, C, CSL, CSR, D, R, S1, S2, + $ SNL, SNR, UA11, UA11R, UA12, UA21, UA22, UA22R, + $ VB11, VB11R, VB12, VB21, VB22, VB22R +* .. +* .. External Subroutines .. + EXTERNAL DLARTG, DLASV2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* + IF( UPPER ) THEN +* +* Input matrices A and B are upper triangular matrices +* +* Form matrix C = A*adj(B) = ( a b ) +* ( 0 d ) +* + A = A1*B3 + D = A3*B1 + B = A2*B1 - A1*B2 +* +* The SVD of real 2-by-2 triangular C +* +* ( CSL -SNL )*( A B )*( CSR SNR ) = ( R 0 ) +* ( SNL CSL ) ( 0 D ) ( -SNR CSR ) ( 0 T ) +* + CALL DLASV2( A, B, D, S1, S2, SNR, CSR, SNL, CSL ) +* + IF( ABS( CSL ).GE.ABS( SNL ) .OR. ABS( CSR ).GE.ABS( SNR ) ) + $ THEN +* +* Compute the (1,1) and (1,2) elements of U'*A and V'*B, +* and (1,2) element of |U|'*|A| and |V|'*|B|. +* + UA11R = CSL*A1 + UA12 = CSL*A2 + SNL*A3 +* + VB11R = CSR*B1 + VB12 = CSR*B2 + SNR*B3 +* + AUA12 = ABS( CSL )*ABS( A2 ) + ABS( SNL )*ABS( A3 ) + AVB12 = ABS( CSR )*ABS( B2 ) + ABS( SNR )*ABS( B3 ) +* +* zero (1,2) elements of U'*A and V'*B +* + IF( ( ABS( UA11R )+ABS( UA12 ) ).NE.ZERO ) THEN + IF( AUA12 / ( ABS( UA11R )+ABS( UA12 ) ).LE.AVB12 / + $ ( ABS( VB11R )+ABS( VB12 ) ) ) THEN + CALL DLARTG( -UA11R, UA12, CSQ, SNQ, R ) + ELSE + CALL DLARTG( -VB11R, VB12, CSQ, SNQ, R ) + END IF + ELSE + CALL DLARTG( -VB11R, VB12, CSQ, SNQ, R ) + END IF +* + CSU = CSL + SNU = -SNL + CSV = CSR + SNV = -SNR +* + ELSE +* +* Compute the (2,1) and (2,2) elements of U'*A and V'*B, +* and (2,2) element of |U|'*|A| and |V|'*|B|. +* + UA21 = -SNL*A1 + UA22 = -SNL*A2 + CSL*A3 +* + VB21 = -SNR*B1 + VB22 = -SNR*B2 + CSR*B3 +* + AUA22 = ABS( SNL )*ABS( A2 ) + ABS( CSL )*ABS( A3 ) + AVB22 = ABS( SNR )*ABS( B2 ) + ABS( CSR )*ABS( B3 ) +* +* zero (2,2) elements of U'*A and V'*B, and then swap. +* + IF( ( ABS( UA21 )+ABS( UA22 ) ).NE.ZERO ) THEN + IF( AUA22 / ( ABS( UA21 )+ABS( UA22 ) ).LE.AVB22 / + $ ( ABS( VB21 )+ABS( VB22 ) ) ) THEN + CALL DLARTG( -UA21, UA22, CSQ, SNQ, R ) + ELSE + CALL DLARTG( -VB21, VB22, CSQ, SNQ, R ) + END IF + ELSE + CALL DLARTG( -VB21, VB22, CSQ, SNQ, R ) + END IF +* + CSU = SNL + SNU = CSL + CSV = SNR + SNV = CSR +* + END IF +* + ELSE +* +* Input matrices A and B are lower triangular matrices +* +* Form matrix C = A*adj(B) = ( a 0 ) +* ( c d ) +* + A = A1*B3 + D = A3*B1 + C = A2*B3 - A3*B2 +* +* The SVD of real 2-by-2 triangular C +* +* ( CSL -SNL )*( A 0 )*( CSR SNR ) = ( R 0 ) +* ( SNL CSL ) ( C D ) ( -SNR CSR ) ( 0 T ) +* + CALL DLASV2( A, C, D, S1, S2, SNR, CSR, SNL, CSL ) +* + IF( ABS( CSR ).GE.ABS( SNR ) .OR. ABS( CSL ).GE.ABS( SNL ) ) + $ THEN +* +* Compute the (2,1) and (2,2) elements of U'*A and V'*B, +* and (2,1) element of |U|'*|A| and |V|'*|B|. +* + UA21 = -SNR*A1 + CSR*A2 + UA22R = CSR*A3 +* + VB21 = -SNL*B1 + CSL*B2 + VB22R = CSL*B3 +* + AUA21 = ABS( SNR )*ABS( A1 ) + ABS( CSR )*ABS( A2 ) + AVB21 = ABS( SNL )*ABS( B1 ) + ABS( CSL )*ABS( B2 ) +* +* zero (2,1) elements of U'*A and V'*B. +* + IF( ( ABS( UA21 )+ABS( UA22R ) ).NE.ZERO ) THEN + IF( AUA21 / ( ABS( UA21 )+ABS( UA22R ) ).LE.AVB21 / + $ ( ABS( VB21 )+ABS( VB22R ) ) ) THEN + CALL DLARTG( UA22R, UA21, CSQ, SNQ, R ) + ELSE + CALL DLARTG( VB22R, VB21, CSQ, SNQ, R ) + END IF + ELSE + CALL DLARTG( VB22R, VB21, CSQ, SNQ, R ) + END IF +* + CSU = CSR + SNU = -SNR + CSV = CSL + SNV = -SNL +* + ELSE +* +* Compute the (1,1) and (1,2) elements of U'*A and V'*B, +* and (1,1) element of |U|'*|A| and |V|'*|B|. +* + UA11 = CSR*A1 + SNR*A2 + UA12 = SNR*A3 +* + VB11 = CSL*B1 + SNL*B2 + VB12 = SNL*B3 +* + AUA11 = ABS( CSR )*ABS( A1 ) + ABS( SNR )*ABS( A2 ) + AVB11 = ABS( CSL )*ABS( B1 ) + ABS( SNL )*ABS( B2 ) +* +* zero (1,1) elements of U'*A and V'*B, and then swap. +* + IF( ( ABS( UA11 )+ABS( UA12 ) ).NE.ZERO ) THEN + IF( AUA11 / ( ABS( UA11 )+ABS( UA12 ) ).LE.AVB11 / + $ ( ABS( VB11 )+ABS( VB12 ) ) ) THEN + CALL DLARTG( UA12, UA11, CSQ, SNQ, R ) + ELSE + CALL DLARTG( VB12, VB11, CSQ, SNQ, R ) + END IF + ELSE + CALL DLARTG( VB12, VB11, CSQ, SNQ, R ) + END IF +* + CSU = SNR + SNU = CSR + CSV = SNL + SNV = CSL +* + END IF +* + END IF +* + RETURN +* +* End of DLAGS2 +* + END diff --git a/costa/native/external/lapack/dlagtf.f b/costa/native/external/lapack/dlagtf.f new file mode 100644 index 000000000..b127f6b4c --- /dev/null +++ b/costa/native/external/lapack/dlagtf.f @@ -0,0 +1,191 @@ + SUBROUTINE DLAGTF( N, A, LAMBDA, B, C, TOL, D, IN, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, N + DOUBLE PRECISION LAMBDA, TOL +* .. +* .. Array Arguments .. + INTEGER IN( * ) + DOUBLE PRECISION A( * ), B( * ), C( * ), D( * ) +* .. +* +* Purpose +* ======= +* +* DLAGTF factorizes the matrix (T - lambda*I), where T is an n by n +* tridiagonal matrix and lambda is a scalar, as +* +* T - lambda*I = PLU, +* +* where P is a permutation matrix, L is a unit lower tridiagonal matrix +* with at most one non-zero sub-diagonal elements per column and U is +* an upper triangular matrix with at most two non-zero super-diagonal +* elements per column. +* +* The factorization is obtained by Gaussian elimination with partial +* pivoting and implicit row scaling. +* +* The parameter LAMBDA is included in the routine so that DLAGTF may +* be used, in conjunction with DLAGTS, to obtain eigenvectors of T by +* inverse iteration. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix T. +* +* A (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, A must contain the diagonal elements of T. +* +* On exit, A is overwritten by the n diagonal elements of the +* upper triangular matrix U of the factorization of T. +* +* LAMBDA (input) DOUBLE PRECISION +* On entry, the scalar lambda. +* +* B (input/output) DOUBLE PRECISION array, dimension (N-1) +* On entry, B must contain the (n-1) super-diagonal elements of +* T. +* +* On exit, B is overwritten by the (n-1) super-diagonal +* elements of the matrix U of the factorization of T. +* +* C (input/output) DOUBLE PRECISION array, dimension (N-1) +* On entry, C must contain the (n-1) sub-diagonal elements of +* T. +* +* On exit, C is overwritten by the (n-1) sub-diagonal elements +* of the matrix L of the factorization of T. +* +* TOL (input) DOUBLE PRECISION +* On entry, a relative tolerance used to indicate whether or +* not the matrix (T - lambda*I) is nearly singular. TOL should +* normally be chose as approximately the largest relative error +* in the elements of T. For example, if the elements of T are +* correct to about 4 significant figures, then TOL should be +* set to about 5*10**(-4). If TOL is supplied as less than eps, +* where eps is the relative machine precision, then the value +* eps is used in place of TOL. +* +* D (output) DOUBLE PRECISION array, dimension (N-2) +* On exit, D is overwritten by the (n-2) second super-diagonal +* elements of the matrix U of the factorization of T. +* +* IN (output) INTEGER array, dimension (N) +* On exit, IN contains details of the permutation matrix P. If +* an interchange occurred at the kth step of the elimination, +* then IN(k) = 1, otherwise IN(k) = 0. The element IN(n) +* returns the smallest positive integer j such that +* +* abs( u(j,j) ).le. norm( (T - lambda*I)(j) )*TOL, +* +* where norm( A(j) ) denotes the sum of the absolute values of +* the jth row of the matrix A. If no such j exists then IN(n) +* is returned as zero. If IN(n) is returned as positive, then a +* diagonal element of U is small, indicating that +* (T - lambda*I) is singular or nearly singular, +* +* INFO (output) INTEGER +* = 0 : successful exit +* .lt. 0: if INFO = -k, the kth argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER K + DOUBLE PRECISION EPS, MULT, PIV1, PIV2, SCALE1, SCALE2, TEMP, TL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + CALL XERBLA( 'DLAGTF', -INFO ) + RETURN + END IF +* + IF( N.EQ.0 ) + $ RETURN +* + A( 1 ) = A( 1 ) - LAMBDA + IN( N ) = 0 + IF( N.EQ.1 ) THEN + IF( A( 1 ).EQ.ZERO ) + $ IN( 1 ) = 1 + RETURN + END IF +* + EPS = DLAMCH( 'Epsilon' ) +* + TL = MAX( TOL, EPS ) + SCALE1 = ABS( A( 1 ) ) + ABS( B( 1 ) ) + DO 10 K = 1, N - 1 + A( K+1 ) = A( K+1 ) - LAMBDA + SCALE2 = ABS( C( K ) ) + ABS( A( K+1 ) ) + IF( K.LT.( N-1 ) ) + $ SCALE2 = SCALE2 + ABS( B( K+1 ) ) + IF( A( K ).EQ.ZERO ) THEN + PIV1 = ZERO + ELSE + PIV1 = ABS( A( K ) ) / SCALE1 + END IF + IF( C( K ).EQ.ZERO ) THEN + IN( K ) = 0 + PIV2 = ZERO + SCALE1 = SCALE2 + IF( K.LT.( N-1 ) ) + $ D( K ) = ZERO + ELSE + PIV2 = ABS( C( K ) ) / SCALE2 + IF( PIV2.LE.PIV1 ) THEN + IN( K ) = 0 + SCALE1 = SCALE2 + C( K ) = C( K ) / A( K ) + A( K+1 ) = A( K+1 ) - C( K )*B( K ) + IF( K.LT.( N-1 ) ) + $ D( K ) = ZERO + ELSE + IN( K ) = 1 + MULT = A( K ) / C( K ) + A( K ) = C( K ) + TEMP = A( K+1 ) + A( K+1 ) = B( K ) - MULT*TEMP + IF( K.LT.( N-1 ) ) THEN + D( K ) = B( K+1 ) + B( K+1 ) = -MULT*D( K ) + END IF + B( K ) = TEMP + C( K ) = MULT + END IF + END IF + IF( ( MAX( PIV1, PIV2 ).LE.TL ) .AND. ( IN( N ).EQ.0 ) ) + $ IN( N ) = K + 10 CONTINUE + IF( ( ABS( A( N ) ).LE.SCALE1*TL ) .AND. ( IN( N ).EQ.0 ) ) + $ IN( N ) = N +* + RETURN +* +* End of DLAGTF +* + END diff --git a/costa/native/external/lapack/dlagtm.f b/costa/native/external/lapack/dlagtm.f new file mode 100644 index 000000000..6ffc9ea21 --- /dev/null +++ b/costa/native/external/lapack/dlagtm.f @@ -0,0 +1,191 @@ + SUBROUTINE DLAGTM( TRANS, N, NRHS, ALPHA, DL, D, DU, X, LDX, BETA, + $ B, LDB ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER LDB, LDX, N, NRHS + DOUBLE PRECISION ALPHA, BETA +* .. +* .. Array Arguments .. + DOUBLE PRECISION B( LDB, * ), D( * ), DL( * ), DU( * ), + $ X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* DLAGTM performs a matrix-vector product of the form +* +* B := alpha * A * X + beta * B +* +* where A is a tridiagonal matrix of order N, B and X are N by NRHS +* matrices, and alpha and beta are real scalars, each of which may be +* 0., 1., or -1. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER +* Specifies the operation applied to A. +* = 'N': No transpose, B := alpha * A * X + beta * B +* = 'T': Transpose, B := alpha * A'* X + beta * B +* = 'C': Conjugate transpose = Transpose +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices X and B. +* +* ALPHA (input) DOUBLE PRECISION +* The scalar alpha. ALPHA must be 0., 1., or -1.; otherwise, +* it is assumed to be 0. +* +* DL (input) DOUBLE PRECISION array, dimension (N-1) +* The (n-1) sub-diagonal elements of T. +* +* D (input) DOUBLE PRECISION array, dimension (N) +* The diagonal elements of T. +* +* DU (input) DOUBLE PRECISION array, dimension (N-1) +* The (n-1) super-diagonal elements of T. +* +* X (input) DOUBLE PRECISION array, dimension (LDX,NRHS) +* The N by NRHS matrix X. +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(N,1). +* +* BETA (input) DOUBLE PRECISION +* The scalar beta. BETA must be 0., 1., or -1.; otherwise, +* it is assumed to be 1. +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) +* On entry, the N by NRHS matrix B. +* On exit, B is overwritten by the matrix expression +* B := alpha * A * X + beta * B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(N,1). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) + $ RETURN +* +* Multiply B by BETA if BETA.NE.1. +* + IF( BETA.EQ.ZERO ) THEN + DO 20 J = 1, NRHS + DO 10 I = 1, N + B( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE IF( BETA.EQ.-ONE ) THEN + DO 40 J = 1, NRHS + DO 30 I = 1, N + B( I, J ) = -B( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF +* + IF( ALPHA.EQ.ONE ) THEN + IF( LSAME( TRANS, 'N' ) ) THEN +* +* Compute B := B + A*X +* + DO 60 J = 1, NRHS + IF( N.EQ.1 ) THEN + B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J ) + ELSE + B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J ) + + $ DU( 1 )*X( 2, J ) + B( N, J ) = B( N, J ) + DL( N-1 )*X( N-1, J ) + + $ D( N )*X( N, J ) + DO 50 I = 2, N - 1 + B( I, J ) = B( I, J ) + DL( I-1 )*X( I-1, J ) + + $ D( I )*X( I, J ) + DU( I )*X( I+1, J ) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE +* +* Compute B := B + A'*X +* + DO 80 J = 1, NRHS + IF( N.EQ.1 ) THEN + B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J ) + ELSE + B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J ) + + $ DL( 1 )*X( 2, J ) + B( N, J ) = B( N, J ) + DU( N-1 )*X( N-1, J ) + + $ D( N )*X( N, J ) + DO 70 I = 2, N - 1 + B( I, J ) = B( I, J ) + DU( I-1 )*X( I-1, J ) + + $ D( I )*X( I, J ) + DL( I )*X( I+1, J ) + 70 CONTINUE + END IF + 80 CONTINUE + END IF + ELSE IF( ALPHA.EQ.-ONE ) THEN + IF( LSAME( TRANS, 'N' ) ) THEN +* +* Compute B := B - A*X +* + DO 100 J = 1, NRHS + IF( N.EQ.1 ) THEN + B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J ) + ELSE + B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J ) - + $ DU( 1 )*X( 2, J ) + B( N, J ) = B( N, J ) - DL( N-1 )*X( N-1, J ) - + $ D( N )*X( N, J ) + DO 90 I = 2, N - 1 + B( I, J ) = B( I, J ) - DL( I-1 )*X( I-1, J ) - + $ D( I )*X( I, J ) - DU( I )*X( I+1, J ) + 90 CONTINUE + END IF + 100 CONTINUE + ELSE +* +* Compute B := B - A'*X +* + DO 120 J = 1, NRHS + IF( N.EQ.1 ) THEN + B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J ) + ELSE + B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J ) - + $ DL( 1 )*X( 2, J ) + B( N, J ) = B( N, J ) - DU( N-1 )*X( N-1, J ) - + $ D( N )*X( N, J ) + DO 110 I = 2, N - 1 + B( I, J ) = B( I, J ) - DU( I-1 )*X( I-1, J ) - + $ D( I )*X( I, J ) - DL( I )*X( I+1, J ) + 110 CONTINUE + END IF + 120 CONTINUE + END IF + END IF + RETURN +* +* End of DLAGTM +* + END diff --git a/costa/native/external/lapack/dlagts.f b/costa/native/external/lapack/dlagts.f new file mode 100644 index 000000000..3bd41ff38 --- /dev/null +++ b/costa/native/external/lapack/dlagts.f @@ -0,0 +1,305 @@ + SUBROUTINE DLAGTS( JOB, N, A, B, C, D, IN, Y, TOL, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + INTEGER INFO, JOB, N + DOUBLE PRECISION TOL +* .. +* .. Array Arguments .. + INTEGER IN( * ) + DOUBLE PRECISION A( * ), B( * ), C( * ), D( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* DLAGTS may be used to solve one of the systems of equations +* +* (T - lambda*I)*x = y or (T - lambda*I)'*x = y, +* +* where T is an n by n tridiagonal matrix, for x, following the +* factorization of (T - lambda*I) as +* +* (T - lambda*I) = P*L*U , +* +* by routine DLAGTF. The choice of equation to be solved is +* controlled by the argument JOB, and in each case there is an option +* to perturb zero or very small diagonal elements of U, this option +* being intended for use in applications such as inverse iteration. +* +* Arguments +* ========= +* +* JOB (input) INTEGER +* Specifies the job to be performed by DLAGTS as follows: +* = 1: The equations (T - lambda*I)x = y are to be solved, +* but diagonal elements of U are not to be perturbed. +* = -1: The equations (T - lambda*I)x = y are to be solved +* and, if overflow would otherwise occur, the diagonal +* elements of U are to be perturbed. See argument TOL +* below. +* = 2: The equations (T - lambda*I)'x = y are to be solved, +* but diagonal elements of U are not to be perturbed. +* = -2: The equations (T - lambda*I)'x = y are to be solved +* and, if overflow would otherwise occur, the diagonal +* elements of U are to be perturbed. See argument TOL +* below. +* +* N (input) INTEGER +* The order of the matrix T. +* +* A (input) DOUBLE PRECISION array, dimension (N) +* On entry, A must contain the diagonal elements of U as +* returned from DLAGTF. +* +* B (input) DOUBLE PRECISION array, dimension (N-1) +* On entry, B must contain the first super-diagonal elements of +* U as returned from DLAGTF. +* +* C (input) DOUBLE PRECISION array, dimension (N-1) +* On entry, C must contain the sub-diagonal elements of L as +* returned from DLAGTF. +* +* D (input) DOUBLE PRECISION array, dimension (N-2) +* On entry, D must contain the second super-diagonal elements +* of U as returned from DLAGTF. +* +* IN (input) INTEGER array, dimension (N) +* On entry, IN must contain details of the matrix P as returned +* from DLAGTF. +* +* Y (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the right hand side vector y. +* On exit, Y is overwritten by the solution vector x. +* +* TOL (input/output) DOUBLE PRECISION +* On entry, with JOB .lt. 0, TOL should be the minimum +* perturbation to be made to very small diagonal elements of U. +* TOL should normally be chosen as about eps*norm(U), where eps +* is the relative machine precision, but if TOL is supplied as +* non-positive, then it is reset to eps*max( abs( u(i,j) ) ). +* If JOB .gt. 0 then TOL is not referenced. +* +* On exit, TOL is changed as described above, only if TOL is +* non-positive on entry. Otherwise TOL is unchanged. +* +* INFO (output) INTEGER +* = 0 : successful exit +* .lt. 0: if INFO = -i, the i-th argument had an illegal value +* .gt. 0: overflow would occur when computing the INFO(th) +* element of the solution vector x. This can only occur +* when JOB is supplied as positive and either means +* that a diagonal element of U is very small, or that +* the elements of the right-hand side vector y are very +* large. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER K + DOUBLE PRECISION ABSAK, AK, BIGNUM, EPS, PERT, SFMIN, TEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SIGN +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( ( ABS( JOB ).GT.2 ) .OR. ( JOB.EQ.0 ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLAGTS', -INFO ) + RETURN + END IF +* + IF( N.EQ.0 ) + $ RETURN +* + EPS = DLAMCH( 'Epsilon' ) + SFMIN = DLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SFMIN +* + IF( JOB.LT.0 ) THEN + IF( TOL.LE.ZERO ) THEN + TOL = ABS( A( 1 ) ) + IF( N.GT.1 ) + $ TOL = MAX( TOL, ABS( A( 2 ) ), ABS( B( 1 ) ) ) + DO 10 K = 3, N + TOL = MAX( TOL, ABS( A( K ) ), ABS( B( K-1 ) ), + $ ABS( D( K-2 ) ) ) + 10 CONTINUE + TOL = TOL*EPS + IF( TOL.EQ.ZERO ) + $ TOL = EPS + END IF + END IF +* + IF( ABS( JOB ).EQ.1 ) THEN + DO 20 K = 2, N + IF( IN( K-1 ).EQ.0 ) THEN + Y( K ) = Y( K ) - C( K-1 )*Y( K-1 ) + ELSE + TEMP = Y( K-1 ) + Y( K-1 ) = Y( K ) + Y( K ) = TEMP - C( K-1 )*Y( K ) + END IF + 20 CONTINUE + IF( JOB.EQ.1 ) THEN + DO 30 K = N, 1, -1 + IF( K.LE.N-2 ) THEN + TEMP = Y( K ) - B( K )*Y( K+1 ) - D( K )*Y( K+2 ) + ELSE IF( K.EQ.N-1 ) THEN + TEMP = Y( K ) - B( K )*Y( K+1 ) + ELSE + TEMP = Y( K ) + END IF + AK = A( K ) + ABSAK = ABS( AK ) + IF( ABSAK.LT.ONE ) THEN + IF( ABSAK.LT.SFMIN ) THEN + IF( ABSAK.EQ.ZERO .OR. ABS( TEMP )*SFMIN.GT.ABSAK ) + $ THEN + INFO = K + RETURN + ELSE + TEMP = TEMP*BIGNUM + AK = AK*BIGNUM + END IF + ELSE IF( ABS( TEMP ).GT.ABSAK*BIGNUM ) THEN + INFO = K + RETURN + END IF + END IF + Y( K ) = TEMP / AK + 30 CONTINUE + ELSE + DO 50 K = N, 1, -1 + IF( K.LE.N-2 ) THEN + TEMP = Y( K ) - B( K )*Y( K+1 ) - D( K )*Y( K+2 ) + ELSE IF( K.EQ.N-1 ) THEN + TEMP = Y( K ) - B( K )*Y( K+1 ) + ELSE + TEMP = Y( K ) + END IF + AK = A( K ) + PERT = SIGN( TOL, AK ) + 40 CONTINUE + ABSAK = ABS( AK ) + IF( ABSAK.LT.ONE ) THEN + IF( ABSAK.LT.SFMIN ) THEN + IF( ABSAK.EQ.ZERO .OR. ABS( TEMP )*SFMIN.GT.ABSAK ) + $ THEN + AK = AK + PERT + PERT = 2*PERT + GO TO 40 + ELSE + TEMP = TEMP*BIGNUM + AK = AK*BIGNUM + END IF + ELSE IF( ABS( TEMP ).GT.ABSAK*BIGNUM ) THEN + AK = AK + PERT + PERT = 2*PERT + GO TO 40 + END IF + END IF + Y( K ) = TEMP / AK + 50 CONTINUE + END IF + ELSE +* +* Come to here if JOB = 2 or -2 +* + IF( JOB.EQ.2 ) THEN + DO 60 K = 1, N + IF( K.GE.3 ) THEN + TEMP = Y( K ) - B( K-1 )*Y( K-1 ) - D( K-2 )*Y( K-2 ) + ELSE IF( K.EQ.2 ) THEN + TEMP = Y( K ) - B( K-1 )*Y( K-1 ) + ELSE + TEMP = Y( K ) + END IF + AK = A( K ) + ABSAK = ABS( AK ) + IF( ABSAK.LT.ONE ) THEN + IF( ABSAK.LT.SFMIN ) THEN + IF( ABSAK.EQ.ZERO .OR. ABS( TEMP )*SFMIN.GT.ABSAK ) + $ THEN + INFO = K + RETURN + ELSE + TEMP = TEMP*BIGNUM + AK = AK*BIGNUM + END IF + ELSE IF( ABS( TEMP ).GT.ABSAK*BIGNUM ) THEN + INFO = K + RETURN + END IF + END IF + Y( K ) = TEMP / AK + 60 CONTINUE + ELSE + DO 80 K = 1, N + IF( K.GE.3 ) THEN + TEMP = Y( K ) - B( K-1 )*Y( K-1 ) - D( K-2 )*Y( K-2 ) + ELSE IF( K.EQ.2 ) THEN + TEMP = Y( K ) - B( K-1 )*Y( K-1 ) + ELSE + TEMP = Y( K ) + END IF + AK = A( K ) + PERT = SIGN( TOL, AK ) + 70 CONTINUE + ABSAK = ABS( AK ) + IF( ABSAK.LT.ONE ) THEN + IF( ABSAK.LT.SFMIN ) THEN + IF( ABSAK.EQ.ZERO .OR. ABS( TEMP )*SFMIN.GT.ABSAK ) + $ THEN + AK = AK + PERT + PERT = 2*PERT + GO TO 70 + ELSE + TEMP = TEMP*BIGNUM + AK = AK*BIGNUM + END IF + ELSE IF( ABS( TEMP ).GT.ABSAK*BIGNUM ) THEN + AK = AK + PERT + PERT = 2*PERT + GO TO 70 + END IF + END IF + Y( K ) = TEMP / AK + 80 CONTINUE + END IF +* + DO 90 K = N, 2, -1 + IF( IN( K-1 ).EQ.0 ) THEN + Y( K-1 ) = Y( K-1 ) - C( K-1 )*Y( K ) + ELSE + TEMP = Y( K-1 ) + Y( K-1 ) = Y( K ) + Y( K ) = TEMP - C( K-1 )*Y( K ) + END IF + 90 CONTINUE + END IF +* +* End of DLAGTS +* + END diff --git a/costa/native/external/lapack/dlagv2.f b/costa/native/external/lapack/dlagv2.f new file mode 100644 index 000000000..2ed2473b0 --- /dev/null +++ b/costa/native/external/lapack/dlagv2.f @@ -0,0 +1,290 @@ + SUBROUTINE DLAGV2( A, LDA, B, LDB, ALPHAR, ALPHAI, BETA, CSL, SNL, + $ CSR, SNR ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER LDA, LDB + DOUBLE PRECISION CSL, CSR, SNL, SNR +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), ALPHAI( 2 ), ALPHAR( 2 ), + $ B( LDB, * ), BETA( 2 ) +* .. +* +* Purpose +* ======= +* +* DLAGV2 computes the Generalized Schur factorization of a real 2-by-2 +* matrix pencil (A,B) where B is upper triangular. This routine +* computes orthogonal (rotation) matrices given by CSL, SNL and CSR, +* SNR such that +* +* 1) if the pencil (A,B) has two real eigenvalues (include 0/0 or 1/0 +* types), then +* +* [ a11 a12 ] := [ CSL SNL ] [ a11 a12 ] [ CSR -SNR ] +* [ 0 a22 ] [ -SNL CSL ] [ a21 a22 ] [ SNR CSR ] +* +* [ b11 b12 ] := [ CSL SNL ] [ b11 b12 ] [ CSR -SNR ] +* [ 0 b22 ] [ -SNL CSL ] [ 0 b22 ] [ SNR CSR ], +* +* 2) if the pencil (A,B) has a pair of complex conjugate eigenvalues, +* then +* +* [ a11 a12 ] := [ CSL SNL ] [ a11 a12 ] [ CSR -SNR ] +* [ a21 a22 ] [ -SNL CSL ] [ a21 a22 ] [ SNR CSR ] +* +* [ b11 0 ] := [ CSL SNL ] [ b11 b12 ] [ CSR -SNR ] +* [ 0 b22 ] [ -SNL CSL ] [ 0 b22 ] [ SNR CSR ] +* +* where b11 >= b22 > 0. +* +* +* Arguments +* ========= +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA, 2) +* On entry, the 2 x 2 matrix A. +* On exit, A is overwritten by the ``A-part'' of the +* generalized Schur form. +* +* LDA (input) INTEGER +* THe leading dimension of the array A. LDA >= 2. +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB, 2) +* On entry, the upper triangular 2 x 2 matrix B. +* On exit, B is overwritten by the ``B-part'' of the +* generalized Schur form. +* +* LDB (input) INTEGER +* THe leading dimension of the array B. LDB >= 2. +* +* ALPHAR (output) DOUBLE PRECISION array, dimension (2) +* ALPHAI (output) DOUBLE PRECISION array, dimension (2) +* BETA (output) DOUBLE PRECISION array, dimension (2) +* (ALPHAR(k)+i*ALPHAI(k))/BETA(k) are the eigenvalues of the +* pencil (A,B), k=1,2, i = sqrt(-1). Note that BETA(k) may +* be zero. +* +* CSL (output) DOUBLE PRECISION +* The cosine of the left rotation matrix. +* +* SNL (output) DOUBLE PRECISION +* The sine of the left rotation matrix. +* +* CSR (output) DOUBLE PRECISION +* The cosine of the right rotation matrix. +* +* SNR (output) DOUBLE PRECISION +* The sine of the right rotation matrix. +* +* Further Details +* =============== +* +* Based on contributions by +* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION ANORM, ASCALE, BNORM, BSCALE, H1, H2, H3, QQ, + $ R, RR, SAFMIN, SCALE1, SCALE2, T, ULP, WI, WR1, + $ WR2 +* .. +* .. External Subroutines .. + EXTERNAL DLAG2, DLARTG, DLASV2, DROT +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, DLAPY2 + EXTERNAL DLAMCH, DLAPY2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* + SAFMIN = DLAMCH( 'S' ) + ULP = DLAMCH( 'P' ) +* +* Scale A +* + ANORM = MAX( ABS( A( 1, 1 ) )+ABS( A( 2, 1 ) ), + $ ABS( A( 1, 2 ) )+ABS( A( 2, 2 ) ), SAFMIN ) + ASCALE = ONE / ANORM + A( 1, 1 ) = ASCALE*A( 1, 1 ) + A( 1, 2 ) = ASCALE*A( 1, 2 ) + A( 2, 1 ) = ASCALE*A( 2, 1 ) + A( 2, 2 ) = ASCALE*A( 2, 2 ) +* +* Scale B +* + BNORM = MAX( ABS( B( 1, 1 ) ), ABS( B( 1, 2 ) )+ABS( B( 2, 2 ) ), + $ SAFMIN ) + BSCALE = ONE / BNORM + B( 1, 1 ) = BSCALE*B( 1, 1 ) + B( 1, 2 ) = BSCALE*B( 1, 2 ) + B( 2, 2 ) = BSCALE*B( 2, 2 ) +* +* Check if A can be deflated +* + IF( ABS( A( 2, 1 ) ).LE.ULP ) THEN + CSL = ONE + SNL = ZERO + CSR = ONE + SNR = ZERO + A( 2, 1 ) = ZERO + B( 2, 1 ) = ZERO +* +* Check if B is singular +* + ELSE IF( ABS( B( 1, 1 ) ).LE.ULP ) THEN + CALL DLARTG( A( 1, 1 ), A( 2, 1 ), CSL, SNL, R ) + CSR = ONE + SNR = ZERO + CALL DROT( 2, A( 1, 1 ), LDA, A( 2, 1 ), LDA, CSL, SNL ) + CALL DROT( 2, B( 1, 1 ), LDB, B( 2, 1 ), LDB, CSL, SNL ) + A( 2, 1 ) = ZERO + B( 1, 1 ) = ZERO + B( 2, 1 ) = ZERO +* + ELSE IF( ABS( B( 2, 2 ) ).LE.ULP ) THEN + CALL DLARTG( A( 2, 2 ), A( 2, 1 ), CSR, SNR, T ) + SNR = -SNR + CALL DROT( 2, A( 1, 1 ), 1, A( 1, 2 ), 1, CSR, SNR ) + CALL DROT( 2, B( 1, 1 ), 1, B( 1, 2 ), 1, CSR, SNR ) + CSL = ONE + SNL = ZERO + A( 2, 1 ) = ZERO + B( 2, 1 ) = ZERO + B( 2, 2 ) = ZERO +* + ELSE +* +* B is nonsingular, first compute the eigenvalues of (A,B) +* + CALL DLAG2( A, LDA, B, LDB, SAFMIN, SCALE1, SCALE2, WR1, WR2, + $ WI ) +* + IF( WI.EQ.ZERO ) THEN +* +* two real eigenvalues, compute s*A-w*B +* + H1 = SCALE1*A( 1, 1 ) - WR1*B( 1, 1 ) + H2 = SCALE1*A( 1, 2 ) - WR1*B( 1, 2 ) + H3 = SCALE1*A( 2, 2 ) - WR1*B( 2, 2 ) +* + RR = DLAPY2( H1, H2 ) + QQ = DLAPY2( SCALE1*A( 2, 1 ), H3 ) +* + IF( RR.GT.QQ ) THEN +* +* find right rotation matrix to zero 1,1 element of +* (sA - wB) +* + CALL DLARTG( H2, H1, CSR, SNR, T ) +* + ELSE +* +* find right rotation matrix to zero 2,1 element of +* (sA - wB) +* + CALL DLARTG( H3, SCALE1*A( 2, 1 ), CSR, SNR, T ) +* + END IF +* + SNR = -SNR + CALL DROT( 2, A( 1, 1 ), 1, A( 1, 2 ), 1, CSR, SNR ) + CALL DROT( 2, B( 1, 1 ), 1, B( 1, 2 ), 1, CSR, SNR ) +* +* compute inf norms of A and B +* + H1 = MAX( ABS( A( 1, 1 ) )+ABS( A( 1, 2 ) ), + $ ABS( A( 2, 1 ) )+ABS( A( 2, 2 ) ) ) + H2 = MAX( ABS( B( 1, 1 ) )+ABS( B( 1, 2 ) ), + $ ABS( B( 2, 1 ) )+ABS( B( 2, 2 ) ) ) +* + IF( ( SCALE1*H1 ).GE.ABS( WR1 )*H2 ) THEN +* +* find left rotation matrix Q to zero out B(2,1) +* + CALL DLARTG( B( 1, 1 ), B( 2, 1 ), CSL, SNL, R ) +* + ELSE +* +* find left rotation matrix Q to zero out A(2,1) +* + CALL DLARTG( A( 1, 1 ), A( 2, 1 ), CSL, SNL, R ) +* + END IF +* + CALL DROT( 2, A( 1, 1 ), LDA, A( 2, 1 ), LDA, CSL, SNL ) + CALL DROT( 2, B( 1, 1 ), LDB, B( 2, 1 ), LDB, CSL, SNL ) +* + A( 2, 1 ) = ZERO + B( 2, 1 ) = ZERO +* + ELSE +* +* a pair of complex conjugate eigenvalues +* first compute the SVD of the matrix B +* + CALL DLASV2( B( 1, 1 ), B( 1, 2 ), B( 2, 2 ), R, T, SNR, + $ CSR, SNL, CSL ) +* +* Form (A,B) := Q(A,B)Z' where Q is left rotation matrix and +* Z is right rotation matrix computed from DLASV2 +* + CALL DROT( 2, A( 1, 1 ), LDA, A( 2, 1 ), LDA, CSL, SNL ) + CALL DROT( 2, B( 1, 1 ), LDB, B( 2, 1 ), LDB, CSL, SNL ) + CALL DROT( 2, A( 1, 1 ), 1, A( 1, 2 ), 1, CSR, SNR ) + CALL DROT( 2, B( 1, 1 ), 1, B( 1, 2 ), 1, CSR, SNR ) +* + B( 2, 1 ) = ZERO + B( 1, 2 ) = ZERO +* + END IF +* + END IF +* +* Unscaling +* + A( 1, 1 ) = ANORM*A( 1, 1 ) + A( 2, 1 ) = ANORM*A( 2, 1 ) + A( 1, 2 ) = ANORM*A( 1, 2 ) + A( 2, 2 ) = ANORM*A( 2, 2 ) + B( 1, 1 ) = BNORM*B( 1, 1 ) + B( 2, 1 ) = BNORM*B( 2, 1 ) + B( 1, 2 ) = BNORM*B( 1, 2 ) + B( 2, 2 ) = BNORM*B( 2, 2 ) +* + IF( WI.EQ.ZERO ) THEN + ALPHAR( 1 ) = A( 1, 1 ) + ALPHAR( 2 ) = A( 2, 2 ) + ALPHAI( 1 ) = ZERO + ALPHAI( 2 ) = ZERO + BETA( 1 ) = B( 1, 1 ) + BETA( 2 ) = B( 2, 2 ) + ELSE + ALPHAR( 1 ) = ANORM*WR1 / SCALE1 / BNORM + ALPHAI( 1 ) = ANORM*WI / SCALE1 / BNORM + ALPHAR( 2 ) = ALPHAR( 1 ) + ALPHAI( 2 ) = -ALPHAI( 1 ) + BETA( 1 ) = ONE + BETA( 2 ) = ONE + END IF +* + 10 CONTINUE +* + RETURN +* +* End of DLAGV2 +* + END diff --git a/costa/native/external/lapack/dlahqr.f b/costa/native/external/lapack/dlahqr.f new file mode 100644 index 000000000..2e2f25b6d --- /dev/null +++ b/costa/native/external/lapack/dlahqr.f @@ -0,0 +1,435 @@ + SUBROUTINE DLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI, + $ ILOZ, IHIZ, Z, LDZ, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + LOGICAL WANTT, WANTZ + INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION H( LDH, * ), WI( * ), WR( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* DLAHQR is an auxiliary routine called by DHSEQR to update the +* eigenvalues and Schur decomposition already computed by DHSEQR, by +* dealing with the Hessenberg submatrix in rows and columns ILO to IHI. +* +* Arguments +* ========= +* +* WANTT (input) LOGICAL +* = .TRUE. : the full Schur form T is required; +* = .FALSE.: only eigenvalues are required. +* +* WANTZ (input) LOGICAL +* = .TRUE. : the matrix of Schur vectors Z is required; +* = .FALSE.: Schur vectors are not required. +* +* N (input) INTEGER +* The order of the matrix H. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* It is assumed that H is already upper quasi-triangular in +* rows and columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless +* ILO = 1). DLAHQR works primarily with the Hessenberg +* submatrix in rows and columns ILO to IHI, but applies +* transformations to all of H if WANTT is .TRUE.. +* 1 <= ILO <= max(1,IHI); IHI <= N. +* +* H (input/output) DOUBLE PRECISION array, dimension (LDH,N) +* On entry, the upper Hessenberg matrix H. +* On exit, if WANTT is .TRUE., H is upper quasi-triangular in +* rows and columns ILO:IHI, with any 2-by-2 diagonal blocks in +* standard form. If WANTT is .FALSE., the contents of H are +* unspecified on exit. +* +* LDH (input) INTEGER +* The leading dimension of the array H. LDH >= max(1,N). +* +* WR (output) DOUBLE PRECISION array, dimension (N) +* WI (output) DOUBLE PRECISION array, dimension (N) +* The real and imaginary parts, respectively, of the computed +* eigenvalues ILO to IHI are stored in the corresponding +* elements of WR and WI. If two eigenvalues are computed as a +* complex conjugate pair, they are stored in consecutive +* elements of WR and WI, say the i-th and (i+1)th, with +* WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., the +* eigenvalues are stored in the same order as on the diagonal +* of the Schur form returned in H, with WR(i) = H(i,i), and, if +* H(i:i+1,i:i+1) is a 2-by-2 diagonal block, +* WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and WI(i+1) = -WI(i). +* +* ILOZ (input) INTEGER +* IHIZ (input) INTEGER +* Specify the rows of Z to which transformations must be +* applied if WANTZ is .TRUE.. +* 1 <= ILOZ <= ILO; IHI <= IHIZ <= N. +* +* Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N) +* If WANTZ is .TRUE., on entry Z must contain the current +* matrix Z of transformations accumulated by DHSEQR, and on +* exit Z has been updated; transformations are applied only to +* the submatrix Z(ILOZ:IHIZ,ILO:IHI). +* If WANTZ is .FALSE., Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* > 0: DLAHQR failed to compute all the eigenvalues ILO to IHI +* in a total of 30*(IHI-ILO+1) iterations; if INFO = i, +* elements i+1:ihi of WR and WI contain those eigenvalues +* which have been successfully computed. +* +* Further Details +* =============== +* +* 2-96 Based on modifications by +* David Day, Sandia National Laboratory, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, HALF + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, HALF = 0.5D0 ) + DOUBLE PRECISION DAT1, DAT2 + PARAMETER ( DAT1 = 0.75D+0, DAT2 = -0.4375D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, I1, I2, ITN, ITS, J, K, L, M, NH, NR, NZ + DOUBLE PRECISION AVE, CS, DISC, H00, H10, H11, H12, H21, H22, + $ H33, H33S, H43H34, H44, H44S, OVFL, S, SMLNUM, + $ SN, SUM, T1, T2, T3, TST1, ULP, UNFL, V1, V2, + $ V3 +* .. +* .. Local Arrays .. + DOUBLE PRECISION V( 3 ), WORK( 1 ) +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, DLANHS + EXTERNAL DLAMCH, DLANHS +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLANV2, DLARFG, DROT +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SIGN, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + IF( ILO.EQ.IHI ) THEN + WR( ILO ) = H( ILO, ILO ) + WI( ILO ) = ZERO + RETURN + END IF +* + NH = IHI - ILO + 1 + NZ = IHIZ - ILOZ + 1 +* +* Set machine-dependent constants for the stopping criterion. +* If norm(H) <= sqrt(OVFL), overflow should not occur. +* + UNFL = DLAMCH( 'Safe minimum' ) + OVFL = ONE / UNFL + CALL DLABAD( UNFL, OVFL ) + ULP = DLAMCH( 'Precision' ) + SMLNUM = UNFL*( NH / ULP ) +* +* I1 and I2 are the indices of the first row and last column of H +* to which transformations must be applied. If eigenvalues only are +* being computed, I1 and I2 are set inside the main loop. +* + IF( WANTT ) THEN + I1 = 1 + I2 = N + END IF +* +* ITN is the total number of QR iterations allowed. +* + ITN = 30*NH +* +* The main loop begins here. I is the loop index and decreases from +* IHI to ILO in steps of 1 or 2. Each iteration of the loop works +* with the active submatrix in rows and columns L to I. +* Eigenvalues I+1 to IHI have already converged. Either L = ILO or +* H(L,L-1) is negligible so that the matrix splits. +* + I = IHI + 10 CONTINUE + L = ILO + IF( I.LT.ILO ) + $ GO TO 150 +* +* Perform QR iterations on rows and columns ILO to I until a +* submatrix of order 1 or 2 splits off at the bottom because a +* subdiagonal element has become negligible. +* + DO 130 ITS = 0, ITN +* +* Look for a single small subdiagonal element. +* + DO 20 K = I, L + 1, -1 + TST1 = ABS( H( K-1, K-1 ) ) + ABS( H( K, K ) ) + IF( TST1.EQ.ZERO ) + $ TST1 = DLANHS( '1', I-L+1, H( L, L ), LDH, WORK ) + IF( ABS( H( K, K-1 ) ).LE.MAX( ULP*TST1, SMLNUM ) ) + $ GO TO 30 + 20 CONTINUE + 30 CONTINUE + L = K + IF( L.GT.ILO ) THEN +* +* H(L,L-1) is negligible +* + H( L, L-1 ) = ZERO + END IF +* +* Exit from loop if a submatrix of order 1 or 2 has split off. +* + IF( L.GE.I-1 ) + $ GO TO 140 +* +* Now the active submatrix is in rows and columns L to I. If +* eigenvalues only are being computed, only the active submatrix +* need be transformed. +* + IF( .NOT.WANTT ) THEN + I1 = L + I2 = I + END IF +* + IF( ITS.EQ.10 .OR. ITS.EQ.20 ) THEN +* +* Exceptional shift. +* + S = ABS( H( I, I-1 ) ) + ABS( H( I-1, I-2 ) ) + H44 = DAT1*S + H( I, I ) + H33 = H44 + H43H34 = DAT2*S*S + ELSE +* +* Prepare to use Francis' double shift +* (i.e. 2nd degree generalized Rayleigh quotient) +* + H44 = H( I, I ) + H33 = H( I-1, I-1 ) + H43H34 = H( I, I-1 )*H( I-1, I ) + S = H( I-1, I-2 )*H( I-1, I-2 ) + DISC = ( H33-H44 )*HALF + DISC = DISC*DISC + H43H34 + IF( DISC.GT.ZERO ) THEN +* +* Real roots: use Wilkinson's shift twice +* + DISC = SQRT( DISC ) + AVE = HALF*( H33+H44 ) + IF( ABS( H33 )-ABS( H44 ).GT.ZERO ) THEN + H33 = H33*H44 - H43H34 + H44 = H33 / ( SIGN( DISC, AVE )+AVE ) + ELSE + H44 = SIGN( DISC, AVE ) + AVE + END IF + H33 = H44 + H43H34 = ZERO + END IF + END IF +* +* Look for two consecutive small subdiagonal elements. +* + DO 40 M = I - 2, L, -1 +* Determine the effect of starting the double-shift QR +* iteration at row M, and see if this would make H(M,M-1) +* negligible. +* + H11 = H( M, M ) + H22 = H( M+1, M+1 ) + H21 = H( M+1, M ) + H12 = H( M, M+1 ) + H44S = H44 - H11 + H33S = H33 - H11 + V1 = ( H33S*H44S-H43H34 ) / H21 + H12 + V2 = H22 - H11 - H33S - H44S + V3 = H( M+2, M+1 ) + S = ABS( V1 ) + ABS( V2 ) + ABS( V3 ) + V1 = V1 / S + V2 = V2 / S + V3 = V3 / S + V( 1 ) = V1 + V( 2 ) = V2 + V( 3 ) = V3 + IF( M.EQ.L ) + $ GO TO 50 + H00 = H( M-1, M-1 ) + H10 = H( M, M-1 ) + TST1 = ABS( V1 )*( ABS( H00 )+ABS( H11 )+ABS( H22 ) ) + IF( ABS( H10 )*( ABS( V2 )+ABS( V3 ) ).LE.ULP*TST1 ) + $ GO TO 50 + 40 CONTINUE + 50 CONTINUE +* +* Double-shift QR step +* + DO 120 K = M, I - 1 +* +* The first iteration of this loop determines a reflection G +* from the vector V and applies it from left and right to H, +* thus creating a nonzero bulge below the subdiagonal. +* +* Each subsequent iteration determines a reflection G to +* restore the Hessenberg form in the (K-1)th column, and thus +* chases the bulge one step toward the bottom of the active +* submatrix. NR is the order of G. +* + NR = MIN( 3, I-K+1 ) + IF( K.GT.M ) + $ CALL DCOPY( NR, H( K, K-1 ), 1, V, 1 ) + CALL DLARFG( NR, V( 1 ), V( 2 ), 1, T1 ) + IF( K.GT.M ) THEN + H( K, K-1 ) = V( 1 ) + H( K+1, K-1 ) = ZERO + IF( K.LT.I-1 ) + $ H( K+2, K-1 ) = ZERO + ELSE IF( M.GT.L ) THEN + H( K, K-1 ) = -H( K, K-1 ) + END IF + V2 = V( 2 ) + T2 = T1*V2 + IF( NR.EQ.3 ) THEN + V3 = V( 3 ) + T3 = T1*V3 +* +* Apply G from the left to transform the rows of the matrix +* in columns K to I2. +* + DO 60 J = K, I2 + SUM = H( K, J ) + V2*H( K+1, J ) + V3*H( K+2, J ) + H( K, J ) = H( K, J ) - SUM*T1 + H( K+1, J ) = H( K+1, J ) - SUM*T2 + H( K+2, J ) = H( K+2, J ) - SUM*T3 + 60 CONTINUE +* +* Apply G from the right to transform the columns of the +* matrix in rows I1 to min(K+3,I). +* + DO 70 J = I1, MIN( K+3, I ) + SUM = H( J, K ) + V2*H( J, K+1 ) + V3*H( J, K+2 ) + H( J, K ) = H( J, K ) - SUM*T1 + H( J, K+1 ) = H( J, K+1 ) - SUM*T2 + H( J, K+2 ) = H( J, K+2 ) - SUM*T3 + 70 CONTINUE +* + IF( WANTZ ) THEN +* +* Accumulate transformations in the matrix Z +* + DO 80 J = ILOZ, IHIZ + SUM = Z( J, K ) + V2*Z( J, K+1 ) + V3*Z( J, K+2 ) + Z( J, K ) = Z( J, K ) - SUM*T1 + Z( J, K+1 ) = Z( J, K+1 ) - SUM*T2 + Z( J, K+2 ) = Z( J, K+2 ) - SUM*T3 + 80 CONTINUE + END IF + ELSE IF( NR.EQ.2 ) THEN +* +* Apply G from the left to transform the rows of the matrix +* in columns K to I2. +* + DO 90 J = K, I2 + SUM = H( K, J ) + V2*H( K+1, J ) + H( K, J ) = H( K, J ) - SUM*T1 + H( K+1, J ) = H( K+1, J ) - SUM*T2 + 90 CONTINUE +* +* Apply G from the right to transform the columns of the +* matrix in rows I1 to min(K+3,I). +* + DO 100 J = I1, I + SUM = H( J, K ) + V2*H( J, K+1 ) + H( J, K ) = H( J, K ) - SUM*T1 + H( J, K+1 ) = H( J, K+1 ) - SUM*T2 + 100 CONTINUE +* + IF( WANTZ ) THEN +* +* Accumulate transformations in the matrix Z +* + DO 110 J = ILOZ, IHIZ + SUM = Z( J, K ) + V2*Z( J, K+1 ) + Z( J, K ) = Z( J, K ) - SUM*T1 + Z( J, K+1 ) = Z( J, K+1 ) - SUM*T2 + 110 CONTINUE + END IF + END IF + 120 CONTINUE +* + 130 CONTINUE +* +* Failure to converge in remaining number of iterations +* + INFO = I + RETURN +* + 140 CONTINUE +* + IF( L.EQ.I ) THEN +* +* H(I,I-1) is negligible: one eigenvalue has converged. +* + WR( I ) = H( I, I ) + WI( I ) = ZERO + ELSE IF( L.EQ.I-1 ) THEN +* +* H(I-1,I-2) is negligible: a pair of eigenvalues have converged. +* +* Transform the 2-by-2 submatrix to standard Schur form, +* and compute and store the eigenvalues. +* + CALL DLANV2( H( I-1, I-1 ), H( I-1, I ), H( I, I-1 ), + $ H( I, I ), WR( I-1 ), WI( I-1 ), WR( I ), WI( I ), + $ CS, SN ) +* + IF( WANTT ) THEN +* +* Apply the transformation to the rest of H. +* + IF( I2.GT.I ) + $ CALL DROT( I2-I, H( I-1, I+1 ), LDH, H( I, I+1 ), LDH, + $ CS, SN ) + CALL DROT( I-I1-1, H( I1, I-1 ), 1, H( I1, I ), 1, CS, SN ) + END IF + IF( WANTZ ) THEN +* +* Apply the transformation to Z. +* + CALL DROT( NZ, Z( ILOZ, I-1 ), 1, Z( ILOZ, I ), 1, CS, SN ) + END IF + END IF +* +* Decrement number of remaining iterations, and return to start of +* the main loop with new value of I. +* + ITN = ITN - ITS + I = L - 1 + GO TO 10 +* + 150 CONTINUE + RETURN +* +* End of DLAHQR +* + END diff --git a/costa/native/external/lapack/dlahrd.f b/costa/native/external/lapack/dlahrd.f new file mode 100644 index 000000000..9d473350d --- /dev/null +++ b/costa/native/external/lapack/dlahrd.f @@ -0,0 +1,206 @@ + SUBROUTINE DLAHRD( N, K, NB, A, LDA, TAU, T, LDT, Y, LDY ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER K, LDA, LDT, LDY, N, NB +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), T( LDT, NB ), TAU( NB ), + $ Y( LDY, NB ) +* .. +* +* Purpose +* ======= +* +* DLAHRD reduces the first NB columns of a real general n-by-(n-k+1) +* matrix A so that elements below the k-th subdiagonal are zero. The +* reduction is performed by an orthogonal similarity transformation +* Q' * A * Q. The routine returns the matrices V and T which determine +* Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T. +* +* This is an auxiliary routine called by DGEHRD. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. +* +* K (input) INTEGER +* The offset for the reduction. Elements below the k-th +* subdiagonal in the first NB columns are reduced to zero. +* +* NB (input) INTEGER +* The number of columns to be reduced. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N-K+1) +* On entry, the n-by-(n-k+1) general matrix A. +* On exit, the elements on and above the k-th subdiagonal in +* the first NB columns are overwritten with the corresponding +* elements of the reduced matrix; the elements below the k-th +* subdiagonal, with the array TAU, represent the matrix Q as a +* product of elementary reflectors. The other columns of A are +* unchanged. See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* TAU (output) DOUBLE PRECISION array, dimension (NB) +* The scalar factors of the elementary reflectors. See Further +* Details. +* +* T (output) DOUBLE PRECISION array, dimension (LDT,NB) +* The upper triangular matrix T. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= NB. +* +* Y (output) DOUBLE PRECISION array, dimension (LDY,NB) +* The n-by-nb matrix Y. +* +* LDY (input) INTEGER +* The leading dimension of the array Y. LDY >= N. +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of nb elementary reflectors +* +* Q = H(1) H(2) . . . H(nb). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in +* A(i+k+1:n,i), and tau in TAU(i). +* +* The elements of the vectors v together form the (n-k+1)-by-nb matrix +* V which is needed, with T and Y, to apply the transformation to the +* unreduced part of the matrix, using an update of the form: +* A := (I - V*T*V') * (A - Y*V'). +* +* The contents of A on exit are illustrated by the following example +* with n = 7, k = 3 and nb = 2: +* +* ( a h a a a ) +* ( a h a a a ) +* ( a h a a a ) +* ( h h a a a ) +* ( v1 h a a a ) +* ( v1 v2 a a a ) +* ( v1 v2 a a a ) +* +* where a denotes an element of the original matrix A, h denotes a +* modified element of the upper Hessenberg matrix H, and vi denotes an +* element of the vector defining H(i). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I + DOUBLE PRECISION EI +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DGEMV, DLARFG, DSCAL, DTRMV +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* + DO 10 I = 1, NB + IF( I.GT.1 ) THEN +* +* Update A(1:n,i) +* +* Compute i-th column of A - Y * V' +* + CALL DGEMV( 'No transpose', N, I-1, -ONE, Y, LDY, + $ A( K+I-1, 1 ), LDA, ONE, A( 1, I ), 1 ) +* +* Apply I - V * T' * V' to this column (call it b) from the +* left, using the last column of T as workspace +* +* Let V = ( V1 ) and b = ( b1 ) (first I-1 rows) +* ( V2 ) ( b2 ) +* +* where V1 is unit lower triangular +* +* w := V1' * b1 +* + CALL DCOPY( I-1, A( K+1, I ), 1, T( 1, NB ), 1 ) + CALL DTRMV( 'Lower', 'Transpose', 'Unit', I-1, A( K+1, 1 ), + $ LDA, T( 1, NB ), 1 ) +* +* w := w + V2'*b2 +* + CALL DGEMV( 'Transpose', N-K-I+1, I-1, ONE, A( K+I, 1 ), + $ LDA, A( K+I, I ), 1, ONE, T( 1, NB ), 1 ) +* +* w := T'*w +* + CALL DTRMV( 'Upper', 'Transpose', 'Non-unit', I-1, T, LDT, + $ T( 1, NB ), 1 ) +* +* b2 := b2 - V2*w +* + CALL DGEMV( 'No transpose', N-K-I+1, I-1, -ONE, A( K+I, 1 ), + $ LDA, T( 1, NB ), 1, ONE, A( K+I, I ), 1 ) +* +* b1 := b1 - V1*w +* + CALL DTRMV( 'Lower', 'No transpose', 'Unit', I-1, + $ A( K+1, 1 ), LDA, T( 1, NB ), 1 ) + CALL DAXPY( I-1, -ONE, T( 1, NB ), 1, A( K+1, I ), 1 ) +* + A( K+I-1, I-1 ) = EI + END IF +* +* Generate the elementary reflector H(i) to annihilate +* A(k+i+1:n,i) +* + CALL DLARFG( N-K-I+1, A( K+I, I ), A( MIN( K+I+1, N ), I ), 1, + $ TAU( I ) ) + EI = A( K+I, I ) + A( K+I, I ) = ONE +* +* Compute Y(1:n,i) +* + CALL DGEMV( 'No transpose', N, N-K-I+1, ONE, A( 1, I+1 ), LDA, + $ A( K+I, I ), 1, ZERO, Y( 1, I ), 1 ) + CALL DGEMV( 'Transpose', N-K-I+1, I-1, ONE, A( K+I, 1 ), LDA, + $ A( K+I, I ), 1, ZERO, T( 1, I ), 1 ) + CALL DGEMV( 'No transpose', N, I-1, -ONE, Y, LDY, T( 1, I ), 1, + $ ONE, Y( 1, I ), 1 ) + CALL DSCAL( N, TAU( I ), Y( 1, I ), 1 ) +* +* Compute T(1:i,i) +* + CALL DSCAL( I-1, -TAU( I ), T( 1, I ), 1 ) + CALL DTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T, LDT, + $ T( 1, I ), 1 ) + T( I, I ) = TAU( I ) +* + 10 CONTINUE + A( K+NB, NB ) = EI +* + RETURN +* +* End of DLAHRD +* + END diff --git a/costa/native/external/lapack/dlaic1.f b/costa/native/external/lapack/dlaic1.f new file mode 100644 index 000000000..7a6bf93f7 --- /dev/null +++ b/costa/native/external/lapack/dlaic1.f @@ -0,0 +1,293 @@ + SUBROUTINE DLAIC1( JOB, J, X, SEST, W, GAMMA, SESTPR, S, C ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + INTEGER J, JOB + DOUBLE PRECISION C, GAMMA, S, SEST, SESTPR +* .. +* .. Array Arguments .. + DOUBLE PRECISION W( J ), X( J ) +* .. +* +* Purpose +* ======= +* +* DLAIC1 applies one step of incremental condition estimation in +* its simplest version: +* +* Let x, twonorm(x) = 1, be an approximate singular vector of an j-by-j +* lower triangular matrix L, such that +* twonorm(L*x) = sest +* Then DLAIC1 computes sestpr, s, c such that +* the vector +* [ s*x ] +* xhat = [ c ] +* is an approximate singular vector of +* [ L 0 ] +* Lhat = [ w' gamma ] +* in the sense that +* twonorm(Lhat*xhat) = sestpr. +* +* Depending on JOB, an estimate for the largest or smallest singular +* value is computed. +* +* Note that [s c]' and sestpr**2 is an eigenpair of the system +* +* diag(sest*sest, 0) + [alpha gamma] * [ alpha ] +* [ gamma ] +* +* where alpha = x'*w. +* +* Arguments +* ========= +* +* JOB (input) INTEGER +* = 1: an estimate for the largest singular value is computed. +* = 2: an estimate for the smallest singular value is computed. +* +* J (input) INTEGER +* Length of X and W +* +* X (input) DOUBLE PRECISION array, dimension (J) +* The j-vector x. +* +* SEST (input) DOUBLE PRECISION +* Estimated singular value of j by j matrix L +* +* W (input) DOUBLE PRECISION array, dimension (J) +* The j-vector w. +* +* GAMMA (input) DOUBLE PRECISION +* The diagonal element gamma. +* +* SEDTPR (output) DOUBLE PRECISION +* Estimated singular value of (j+1) by (j+1) matrix Lhat. +* +* S (output) DOUBLE PRECISION +* Sine needed in forming xhat. +* +* C (output) DOUBLE PRECISION +* Cosine needed in forming xhat. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0 ) + DOUBLE PRECISION HALF, FOUR + PARAMETER ( HALF = 0.5D0, FOUR = 4.0D0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION ABSALP, ABSEST, ABSGAM, ALPHA, B, COSINE, EPS, + $ NORMA, S1, S2, SINE, T, TEST, TMP, ZETA1, ZETA2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SIGN, SQRT +* .. +* .. External Functions .. + DOUBLE PRECISION DDOT, DLAMCH + EXTERNAL DDOT, DLAMCH +* .. +* .. Executable Statements .. +* + EPS = DLAMCH( 'Epsilon' ) + ALPHA = DDOT( J, X, 1, W, 1 ) +* + ABSALP = ABS( ALPHA ) + ABSGAM = ABS( GAMMA ) + ABSEST = ABS( SEST ) +* + IF( JOB.EQ.1 ) THEN +* +* Estimating largest singular value +* +* special cases +* + IF( SEST.EQ.ZERO ) THEN + S1 = MAX( ABSGAM, ABSALP ) + IF( S1.EQ.ZERO ) THEN + S = ZERO + C = ONE + SESTPR = ZERO + ELSE + S = ALPHA / S1 + C = GAMMA / S1 + TMP = SQRT( S*S+C*C ) + S = S / TMP + C = C / TMP + SESTPR = S1*TMP + END IF + RETURN + ELSE IF( ABSGAM.LE.EPS*ABSEST ) THEN + S = ONE + C = ZERO + TMP = MAX( ABSEST, ABSALP ) + S1 = ABSEST / TMP + S2 = ABSALP / TMP + SESTPR = TMP*SQRT( S1*S1+S2*S2 ) + RETURN + ELSE IF( ABSALP.LE.EPS*ABSEST ) THEN + S1 = ABSGAM + S2 = ABSEST + IF( S1.LE.S2 ) THEN + S = ONE + C = ZERO + SESTPR = S2 + ELSE + S = ZERO + C = ONE + SESTPR = S1 + END IF + RETURN + ELSE IF( ABSEST.LE.EPS*ABSALP .OR. ABSEST.LE.EPS*ABSGAM ) THEN + S1 = ABSGAM + S2 = ABSALP + IF( S1.LE.S2 ) THEN + TMP = S1 / S2 + S = SQRT( ONE+TMP*TMP ) + SESTPR = S2*S + C = ( GAMMA / S2 ) / S + S = SIGN( ONE, ALPHA ) / S + ELSE + TMP = S2 / S1 + C = SQRT( ONE+TMP*TMP ) + SESTPR = S1*C + S = ( ALPHA / S1 ) / C + C = SIGN( ONE, GAMMA ) / C + END IF + RETURN + ELSE +* +* normal case +* + ZETA1 = ALPHA / ABSEST + ZETA2 = GAMMA / ABSEST +* + B = ( ONE-ZETA1*ZETA1-ZETA2*ZETA2 )*HALF + C = ZETA1*ZETA1 + IF( B.GT.ZERO ) THEN + T = C / ( B+SQRT( B*B+C ) ) + ELSE + T = SQRT( B*B+C ) - B + END IF +* + SINE = -ZETA1 / T + COSINE = -ZETA2 / ( ONE+T ) + TMP = SQRT( SINE*SINE+COSINE*COSINE ) + S = SINE / TMP + C = COSINE / TMP + SESTPR = SQRT( T+ONE )*ABSEST + RETURN + END IF +* + ELSE IF( JOB.EQ.2 ) THEN +* +* Estimating smallest singular value +* +* special cases +* + IF( SEST.EQ.ZERO ) THEN + SESTPR = ZERO + IF( MAX( ABSGAM, ABSALP ).EQ.ZERO ) THEN + SINE = ONE + COSINE = ZERO + ELSE + SINE = -GAMMA + COSINE = ALPHA + END IF + S1 = MAX( ABS( SINE ), ABS( COSINE ) ) + S = SINE / S1 + C = COSINE / S1 + TMP = SQRT( S*S+C*C ) + S = S / TMP + C = C / TMP + RETURN + ELSE IF( ABSGAM.LE.EPS*ABSEST ) THEN + S = ZERO + C = ONE + SESTPR = ABSGAM + RETURN + ELSE IF( ABSALP.LE.EPS*ABSEST ) THEN + S1 = ABSGAM + S2 = ABSEST + IF( S1.LE.S2 ) THEN + S = ZERO + C = ONE + SESTPR = S1 + ELSE + S = ONE + C = ZERO + SESTPR = S2 + END IF + RETURN + ELSE IF( ABSEST.LE.EPS*ABSALP .OR. ABSEST.LE.EPS*ABSGAM ) THEN + S1 = ABSGAM + S2 = ABSALP + IF( S1.LE.S2 ) THEN + TMP = S1 / S2 + C = SQRT( ONE+TMP*TMP ) + SESTPR = ABSEST*( TMP / C ) + S = -( GAMMA / S2 ) / C + C = SIGN( ONE, ALPHA ) / C + ELSE + TMP = S2 / S1 + S = SQRT( ONE+TMP*TMP ) + SESTPR = ABSEST / S + C = ( ALPHA / S1 ) / S + S = -SIGN( ONE, GAMMA ) / S + END IF + RETURN + ELSE +* +* normal case +* + ZETA1 = ALPHA / ABSEST + ZETA2 = GAMMA / ABSEST +* + NORMA = MAX( ONE+ZETA1*ZETA1+ABS( ZETA1*ZETA2 ), + $ ABS( ZETA1*ZETA2 )+ZETA2*ZETA2 ) +* +* See if root is closer to zero or to ONE +* + TEST = ONE + TWO*( ZETA1-ZETA2 )*( ZETA1+ZETA2 ) + IF( TEST.GE.ZERO ) THEN +* +* root is close to zero, compute directly +* + B = ( ZETA1*ZETA1+ZETA2*ZETA2+ONE )*HALF + C = ZETA2*ZETA2 + T = C / ( B+SQRT( ABS( B*B-C ) ) ) + SINE = ZETA1 / ( ONE-T ) + COSINE = -ZETA2 / T + SESTPR = SQRT( T+FOUR*EPS*EPS*NORMA )*ABSEST + ELSE +* +* root is closer to ONE, shift by that amount +* + B = ( ZETA2*ZETA2+ZETA1*ZETA1-ONE )*HALF + C = ZETA1*ZETA1 + IF( B.GE.ZERO ) THEN + T = -C / ( B+SQRT( B*B+C ) ) + ELSE + T = B - SQRT( B*B+C ) + END IF + SINE = -ZETA1 / T + COSINE = -ZETA2 / ( ONE+T ) + SESTPR = SQRT( ONE+T+FOUR*EPS*EPS*NORMA )*ABSEST + END IF + TMP = SQRT( SINE*SINE+COSINE*COSINE ) + S = SINE / TMP + C = COSINE / TMP + RETURN +* + END IF + END IF + RETURN +* +* End of DLAIC1 +* + END diff --git a/costa/native/external/lapack/dlaln2.f b/costa/native/external/lapack/dlaln2.f new file mode 100644 index 000000000..e594b5a5b --- /dev/null +++ b/costa/native/external/lapack/dlaln2.f @@ -0,0 +1,508 @@ + SUBROUTINE DLALN2( LTRANS, NA, NW, SMIN, CA, A, LDA, D1, D2, B, + $ LDB, WR, WI, X, LDX, SCALE, XNORM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + LOGICAL LTRANS + INTEGER INFO, LDA, LDB, LDX, NA, NW + DOUBLE PRECISION CA, D1, D2, SCALE, SMIN, WI, WR, XNORM +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* DLALN2 solves a system of the form (ca A - w D ) X = s B +* or (ca A' - w D) X = s B with possible scaling ("s") and +* perturbation of A. (A' means A-transpose.) +* +* A is an NA x NA real matrix, ca is a real scalar, D is an NA x NA +* real diagonal matrix, w is a real or complex value, and X and B are +* NA x 1 matrices -- real if w is real, complex if w is complex. NA +* may be 1 or 2. +* +* If w is complex, X and B are represented as NA x 2 matrices, +* the first column of each being the real part and the second +* being the imaginary part. +* +* "s" is a scaling factor (.LE. 1), computed by DLALN2, which is +* so chosen that X can be computed without overflow. X is further +* scaled if necessary to assure that norm(ca A - w D)*norm(X) is less +* than overflow. +* +* If both singular values of (ca A - w D) are less than SMIN, +* SMIN*identity will be used instead of (ca A - w D). If only one +* singular value is less than SMIN, one element of (ca A - w D) will be +* perturbed enough to make the smallest singular value roughly SMIN. +* If both singular values are at least SMIN, (ca A - w D) will not be +* perturbed. In any case, the perturbation will be at most some small +* multiple of max( SMIN, ulp*norm(ca A - w D) ). The singular values +* are computed by infinity-norm approximations, and thus will only be +* correct to a factor of 2 or so. +* +* Note: all input quantities are assumed to be smaller than overflow +* by a reasonable factor. (See BIGNUM.) +* +* Arguments +* ========== +* +* LTRANS (input) LOGICAL +* =.TRUE.: A-transpose will be used. +* =.FALSE.: A will be used (not transposed.) +* +* NA (input) INTEGER +* The size of the matrix A. It may (only) be 1 or 2. +* +* NW (input) INTEGER +* 1 if "w" is real, 2 if "w" is complex. It may only be 1 +* or 2. +* +* SMIN (input) DOUBLE PRECISION +* The desired lower bound on the singular values of A. This +* should be a safe distance away from underflow or overflow, +* say, between (underflow/machine precision) and (machine +* precision * overflow ). (See BIGNUM and ULP.) +* +* CA (input) DOUBLE PRECISION +* The coefficient c, which A is multiplied by. +* +* A (input) DOUBLE PRECISION array, dimension (LDA,NA) +* The NA x NA matrix A. +* +* LDA (input) INTEGER +* The leading dimension of A. It must be at least NA. +* +* D1 (input) DOUBLE PRECISION +* The 1,1 element in the diagonal matrix D. +* +* D2 (input) DOUBLE PRECISION +* The 2,2 element in the diagonal matrix D. Not used if NW=1. +* +* B (input) DOUBLE PRECISION array, dimension (LDB,NW) +* The NA x NW matrix B (right-hand side). If NW=2 ("w" is +* complex), column 1 contains the real part of B and column 2 +* contains the imaginary part. +* +* LDB (input) INTEGER +* The leading dimension of B. It must be at least NA. +* +* WR (input) DOUBLE PRECISION +* The real part of the scalar "w". +* +* WI (input) DOUBLE PRECISION +* The imaginary part of the scalar "w". Not used if NW=1. +* +* X (output) DOUBLE PRECISION array, dimension (LDX,NW) +* The NA x NW matrix X (unknowns), as computed by DLALN2. +* If NW=2 ("w" is complex), on exit, column 1 will contain +* the real part of X and column 2 will contain the imaginary +* part. +* +* LDX (input) INTEGER +* The leading dimension of X. It must be at least NA. +* +* SCALE (output) DOUBLE PRECISION +* The scale factor that B must be multiplied by to insure +* that overflow does not occur when computing X. Thus, +* (ca A - w D) X will be SCALE*B, not B (ignoring +* perturbations of A.) It will be at most 1. +* +* XNORM (output) DOUBLE PRECISION +* The infinity-norm of X, when X is regarded as an NA x NW +* real matrix. +* +* INFO (output) INTEGER +* An error flag. It will be set to zero if no error occurs, +* a negative number if an argument is in error, or a positive +* number if ca A - w D had to be perturbed. +* The possible values are: +* = 0: No error occurred, and (ca A - w D) did not have to be +* perturbed. +* = 1: (ca A - w D) had to be perturbed to make its smallest +* (or only) singular value greater than SMIN. +* NOTE: In the interests of speed, this routine does not +* check the inputs for errors. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D0 ) +* .. +* .. Local Scalars .. + INTEGER ICMAX, J + DOUBLE PRECISION BBND, BI1, BI2, BIGNUM, BNORM, BR1, BR2, CI21, + $ CI22, CMAX, CNORM, CR21, CR22, CSI, CSR, LI21, + $ LR21, SMINI, SMLNUM, TEMP, U22ABS, UI11, UI11R, + $ UI12, UI12S, UI22, UR11, UR11R, UR12, UR12S, + $ UR22, XI1, XI2, XR1, XR2 +* .. +* .. Local Arrays .. + LOGICAL RSWAP( 4 ), ZSWAP( 4 ) + INTEGER IPIVOT( 4, 4 ) + DOUBLE PRECISION CI( 2, 2 ), CIV( 4 ), CR( 2, 2 ), CRV( 4 ) +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DLADIV +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Equivalences .. + EQUIVALENCE ( CI( 1, 1 ), CIV( 1 ) ), + $ ( CR( 1, 1 ), CRV( 1 ) ) +* .. +* .. Data statements .. + DATA ZSWAP / .FALSE., .FALSE., .TRUE., .TRUE. / + DATA RSWAP / .FALSE., .TRUE., .FALSE., .TRUE. / + DATA IPIVOT / 1, 2, 3, 4, 2, 1, 4, 3, 3, 4, 1, 2, 4, + $ 3, 2, 1 / +* .. +* .. Executable Statements .. +* +* Compute BIGNUM +* + SMLNUM = TWO*DLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + SMINI = MAX( SMIN, SMLNUM ) +* +* Don't check for input errors +* + INFO = 0 +* +* Standard Initializations +* + SCALE = ONE +* + IF( NA.EQ.1 ) THEN +* +* 1 x 1 (i.e., scalar) system C X = B +* + IF( NW.EQ.1 ) THEN +* +* Real 1x1 system. +* +* C = ca A - w D +* + CSR = CA*A( 1, 1 ) - WR*D1 + CNORM = ABS( CSR ) +* +* If | C | < SMINI, use C = SMINI +* + IF( CNORM.LT.SMINI ) THEN + CSR = SMINI + CNORM = SMINI + INFO = 1 + END IF +* +* Check scaling for X = B / C +* + BNORM = ABS( B( 1, 1 ) ) + IF( CNORM.LT.ONE .AND. BNORM.GT.ONE ) THEN + IF( BNORM.GT.BIGNUM*CNORM ) + $ SCALE = ONE / BNORM + END IF +* +* Compute X +* + X( 1, 1 ) = ( B( 1, 1 )*SCALE ) / CSR + XNORM = ABS( X( 1, 1 ) ) + ELSE +* +* Complex 1x1 system (w is complex) +* +* C = ca A - w D +* + CSR = CA*A( 1, 1 ) - WR*D1 + CSI = -WI*D1 + CNORM = ABS( CSR ) + ABS( CSI ) +* +* If | C | < SMINI, use C = SMINI +* + IF( CNORM.LT.SMINI ) THEN + CSR = SMINI + CSI = ZERO + CNORM = SMINI + INFO = 1 + END IF +* +* Check scaling for X = B / C +* + BNORM = ABS( B( 1, 1 ) ) + ABS( B( 1, 2 ) ) + IF( CNORM.LT.ONE .AND. BNORM.GT.ONE ) THEN + IF( BNORM.GT.BIGNUM*CNORM ) + $ SCALE = ONE / BNORM + END IF +* +* Compute X +* + CALL DLADIV( SCALE*B( 1, 1 ), SCALE*B( 1, 2 ), CSR, CSI, + $ X( 1, 1 ), X( 1, 2 ) ) + XNORM = ABS( X( 1, 1 ) ) + ABS( X( 1, 2 ) ) + END IF +* + ELSE +* +* 2x2 System +* +* Compute the real part of C = ca A - w D (or ca A' - w D ) +* + CR( 1, 1 ) = CA*A( 1, 1 ) - WR*D1 + CR( 2, 2 ) = CA*A( 2, 2 ) - WR*D2 + IF( LTRANS ) THEN + CR( 1, 2 ) = CA*A( 2, 1 ) + CR( 2, 1 ) = CA*A( 1, 2 ) + ELSE + CR( 2, 1 ) = CA*A( 2, 1 ) + CR( 1, 2 ) = CA*A( 1, 2 ) + END IF +* + IF( NW.EQ.1 ) THEN +* +* Real 2x2 system (w is real) +* +* Find the largest element in C +* + CMAX = ZERO + ICMAX = 0 +* + DO 10 J = 1, 4 + IF( ABS( CRV( J ) ).GT.CMAX ) THEN + CMAX = ABS( CRV( J ) ) + ICMAX = J + END IF + 10 CONTINUE +* +* If norm(C) < SMINI, use SMINI*identity. +* + IF( CMAX.LT.SMINI ) THEN + BNORM = MAX( ABS( B( 1, 1 ) ), ABS( B( 2, 1 ) ) ) + IF( SMINI.LT.ONE .AND. BNORM.GT.ONE ) THEN + IF( BNORM.GT.BIGNUM*SMINI ) + $ SCALE = ONE / BNORM + END IF + TEMP = SCALE / SMINI + X( 1, 1 ) = TEMP*B( 1, 1 ) + X( 2, 1 ) = TEMP*B( 2, 1 ) + XNORM = TEMP*BNORM + INFO = 1 + RETURN + END IF +* +* Gaussian elimination with complete pivoting. +* + UR11 = CRV( ICMAX ) + CR21 = CRV( IPIVOT( 2, ICMAX ) ) + UR12 = CRV( IPIVOT( 3, ICMAX ) ) + CR22 = CRV( IPIVOT( 4, ICMAX ) ) + UR11R = ONE / UR11 + LR21 = UR11R*CR21 + UR22 = CR22 - UR12*LR21 +* +* If smaller pivot < SMINI, use SMINI +* + IF( ABS( UR22 ).LT.SMINI ) THEN + UR22 = SMINI + INFO = 1 + END IF + IF( RSWAP( ICMAX ) ) THEN + BR1 = B( 2, 1 ) + BR2 = B( 1, 1 ) + ELSE + BR1 = B( 1, 1 ) + BR2 = B( 2, 1 ) + END IF + BR2 = BR2 - LR21*BR1 + BBND = MAX( ABS( BR1*( UR22*UR11R ) ), ABS( BR2 ) ) + IF( BBND.GT.ONE .AND. ABS( UR22 ).LT.ONE ) THEN + IF( BBND.GE.BIGNUM*ABS( UR22 ) ) + $ SCALE = ONE / BBND + END IF +* + XR2 = ( BR2*SCALE ) / UR22 + XR1 = ( SCALE*BR1 )*UR11R - XR2*( UR11R*UR12 ) + IF( ZSWAP( ICMAX ) ) THEN + X( 1, 1 ) = XR2 + X( 2, 1 ) = XR1 + ELSE + X( 1, 1 ) = XR1 + X( 2, 1 ) = XR2 + END IF + XNORM = MAX( ABS( XR1 ), ABS( XR2 ) ) +* +* Further scaling if norm(A) norm(X) > overflow +* + IF( XNORM.GT.ONE .AND. CMAX.GT.ONE ) THEN + IF( XNORM.GT.BIGNUM / CMAX ) THEN + TEMP = CMAX / BIGNUM + X( 1, 1 ) = TEMP*X( 1, 1 ) + X( 2, 1 ) = TEMP*X( 2, 1 ) + XNORM = TEMP*XNORM + SCALE = TEMP*SCALE + END IF + END IF + ELSE +* +* Complex 2x2 system (w is complex) +* +* Find the largest element in C +* + CI( 1, 1 ) = -WI*D1 + CI( 2, 1 ) = ZERO + CI( 1, 2 ) = ZERO + CI( 2, 2 ) = -WI*D2 + CMAX = ZERO + ICMAX = 0 +* + DO 20 J = 1, 4 + IF( ABS( CRV( J ) )+ABS( CIV( J ) ).GT.CMAX ) THEN + CMAX = ABS( CRV( J ) ) + ABS( CIV( J ) ) + ICMAX = J + END IF + 20 CONTINUE +* +* If norm(C) < SMINI, use SMINI*identity. +* + IF( CMAX.LT.SMINI ) THEN + BNORM = MAX( ABS( B( 1, 1 ) )+ABS( B( 1, 2 ) ), + $ ABS( B( 2, 1 ) )+ABS( B( 2, 2 ) ) ) + IF( SMINI.LT.ONE .AND. BNORM.GT.ONE ) THEN + IF( BNORM.GT.BIGNUM*SMINI ) + $ SCALE = ONE / BNORM + END IF + TEMP = SCALE / SMINI + X( 1, 1 ) = TEMP*B( 1, 1 ) + X( 2, 1 ) = TEMP*B( 2, 1 ) + X( 1, 2 ) = TEMP*B( 1, 2 ) + X( 2, 2 ) = TEMP*B( 2, 2 ) + XNORM = TEMP*BNORM + INFO = 1 + RETURN + END IF +* +* Gaussian elimination with complete pivoting. +* + UR11 = CRV( ICMAX ) + UI11 = CIV( ICMAX ) + CR21 = CRV( IPIVOT( 2, ICMAX ) ) + CI21 = CIV( IPIVOT( 2, ICMAX ) ) + UR12 = CRV( IPIVOT( 3, ICMAX ) ) + UI12 = CIV( IPIVOT( 3, ICMAX ) ) + CR22 = CRV( IPIVOT( 4, ICMAX ) ) + CI22 = CIV( IPIVOT( 4, ICMAX ) ) + IF( ICMAX.EQ.1 .OR. ICMAX.EQ.4 ) THEN +* +* Code when off-diagonals of pivoted C are real +* + IF( ABS( UR11 ).GT.ABS( UI11 ) ) THEN + TEMP = UI11 / UR11 + UR11R = ONE / ( UR11*( ONE+TEMP**2 ) ) + UI11R = -TEMP*UR11R + ELSE + TEMP = UR11 / UI11 + UI11R = -ONE / ( UI11*( ONE+TEMP**2 ) ) + UR11R = -TEMP*UI11R + END IF + LR21 = CR21*UR11R + LI21 = CR21*UI11R + UR12S = UR12*UR11R + UI12S = UR12*UI11R + UR22 = CR22 - UR12*LR21 + UI22 = CI22 - UR12*LI21 + ELSE +* +* Code when diagonals of pivoted C are real +* + UR11R = ONE / UR11 + UI11R = ZERO + LR21 = CR21*UR11R + LI21 = CI21*UR11R + UR12S = UR12*UR11R + UI12S = UI12*UR11R + UR22 = CR22 - UR12*LR21 + UI12*LI21 + UI22 = -UR12*LI21 - UI12*LR21 + END IF + U22ABS = ABS( UR22 ) + ABS( UI22 ) +* +* If smaller pivot < SMINI, use SMINI +* + IF( U22ABS.LT.SMINI ) THEN + UR22 = SMINI + UI22 = ZERO + INFO = 1 + END IF + IF( RSWAP( ICMAX ) ) THEN + BR2 = B( 1, 1 ) + BR1 = B( 2, 1 ) + BI2 = B( 1, 2 ) + BI1 = B( 2, 2 ) + ELSE + BR1 = B( 1, 1 ) + BR2 = B( 2, 1 ) + BI1 = B( 1, 2 ) + BI2 = B( 2, 2 ) + END IF + BR2 = BR2 - LR21*BR1 + LI21*BI1 + BI2 = BI2 - LI21*BR1 - LR21*BI1 + BBND = MAX( ( ABS( BR1 )+ABS( BI1 ) )* + $ ( U22ABS*( ABS( UR11R )+ABS( UI11R ) ) ), + $ ABS( BR2 )+ABS( BI2 ) ) + IF( BBND.GT.ONE .AND. U22ABS.LT.ONE ) THEN + IF( BBND.GE.BIGNUM*U22ABS ) THEN + SCALE = ONE / BBND + BR1 = SCALE*BR1 + BI1 = SCALE*BI1 + BR2 = SCALE*BR2 + BI2 = SCALE*BI2 + END IF + END IF +* + CALL DLADIV( BR2, BI2, UR22, UI22, XR2, XI2 ) + XR1 = UR11R*BR1 - UI11R*BI1 - UR12S*XR2 + UI12S*XI2 + XI1 = UI11R*BR1 + UR11R*BI1 - UI12S*XR2 - UR12S*XI2 + IF( ZSWAP( ICMAX ) ) THEN + X( 1, 1 ) = XR2 + X( 2, 1 ) = XR1 + X( 1, 2 ) = XI2 + X( 2, 2 ) = XI1 + ELSE + X( 1, 1 ) = XR1 + X( 2, 1 ) = XR2 + X( 1, 2 ) = XI1 + X( 2, 2 ) = XI2 + END IF + XNORM = MAX( ABS( XR1 )+ABS( XI1 ), ABS( XR2 )+ABS( XI2 ) ) +* +* Further scaling if norm(A) norm(X) > overflow +* + IF( XNORM.GT.ONE .AND. CMAX.GT.ONE ) THEN + IF( XNORM.GT.BIGNUM / CMAX ) THEN + TEMP = CMAX / BIGNUM + X( 1, 1 ) = TEMP*X( 1, 1 ) + X( 2, 1 ) = TEMP*X( 2, 1 ) + X( 1, 2 ) = TEMP*X( 1, 2 ) + X( 2, 2 ) = TEMP*X( 2, 2 ) + XNORM = TEMP*XNORM + SCALE = TEMP*SCALE + END IF + END IF + END IF + END IF +* + RETURN +* +* End of DLALN2 +* + END diff --git a/costa/native/external/lapack/dlals0.f b/costa/native/external/lapack/dlals0.f new file mode 100644 index 000000000..a60ada8d9 --- /dev/null +++ b/costa/native/external/lapack/dlals0.f @@ -0,0 +1,375 @@ + SUBROUTINE DLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B, LDB, BX, LDBX, + $ PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM, + $ POLES, DIFL, DIFR, Z, K, C, S, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* December 1, 1999 +* +* .. Scalar Arguments .. + INTEGER GIVPTR, ICOMPQ, INFO, K, LDB, LDBX, LDGCOL, + $ LDGNUM, NL, NR, NRHS, SQRE + DOUBLE PRECISION C, S +* .. +* .. Array Arguments .. + INTEGER GIVCOL( LDGCOL, * ), PERM( * ) + DOUBLE PRECISION B( LDB, * ), BX( LDBX, * ), DIFL( * ), + $ DIFR( LDGNUM, * ), GIVNUM( LDGNUM, * ), + $ POLES( LDGNUM, * ), WORK( * ), Z( * ) +* .. +* +* Purpose +* ======= +* +* DLALS0 applies back the multiplying factors of either the left or the +* right singular vector matrix of a diagonal matrix appended by a row +* to the right hand side matrix B in solving the least squares problem +* using the divide-and-conquer SVD approach. +* +* For the left singular vector matrix, three types of orthogonal +* matrices are involved: +* +* (1L) Givens rotations: the number of such rotations is GIVPTR; the +* pairs of columns/rows they were applied to are stored in GIVCOL; +* and the C- and S-values of these rotations are stored in GIVNUM. +* +* (2L) Permutation. The (NL+1)-st row of B is to be moved to the first +* row, and for J=2:N, PERM(J)-th row of B is to be moved to the +* J-th row. +* +* (3L) The left singular vector matrix of the remaining matrix. +* +* For the right singular vector matrix, four types of orthogonal +* matrices are involved: +* +* (1R) The right singular vector matrix of the remaining matrix. +* +* (2R) If SQRE = 1, one extra Givens rotation to generate the right +* null space. +* +* (3R) The inverse transformation of (2L). +* +* (4R) The inverse transformation of (1L). +* +* Arguments +* ========= +* +* ICOMPQ (input) INTEGER +* Specifies whether singular vectors are to be computed in +* factored form: +* = 0: Left singular vector matrix. +* = 1: Right singular vector matrix. +* +* NL (input) INTEGER +* The row dimension of the upper block. NL >= 1. +* +* NR (input) INTEGER +* The row dimension of the lower block. NR >= 1. +* +* SQRE (input) INTEGER +* = 0: the lower block is an NR-by-NR square matrix. +* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. +* +* The bidiagonal matrix has row dimension N = NL + NR + 1, +* and column dimension M = N + SQRE. +* +* NRHS (input) INTEGER +* The number of columns of B and BX. NRHS must be at least 1. +* +* B (input/output) DOUBLE PRECISION array, dimension ( LDB, NRHS ) +* On input, B contains the right hand sides of the least +* squares problem in rows 1 through M. On output, B contains +* the solution X in rows 1 through N. +* +* LDB (input) INTEGER +* The leading dimension of B. LDB must be at least +* max(1,MAX( M, N ) ). +* +* BX (workspace) DOUBLE PRECISION array, dimension ( LDBX, NRHS ) +* +* LDBX (input) INTEGER +* The leading dimension of BX. +* +* PERM (input) INTEGER array, dimension ( N ) +* The permutations (from deflation and sorting) applied +* to the two blocks. +* +* GIVPTR (input) INTEGER +* The number of Givens rotations which took place in this +* subproblem. +* +* GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 ) +* Each pair of numbers indicates a pair of rows/columns +* involved in a Givens rotation. +* +* LDGCOL (input) INTEGER +* The leading dimension of GIVCOL, must be at least N. +* +* GIVNUM (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) +* Each number indicates the C or S value used in the +* corresponding Givens rotation. +* +* LDGNUM (input) INTEGER +* The leading dimension of arrays DIFR, POLES and +* GIVNUM, must be at least K. +* +* POLES (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) +* On entry, POLES(1:K, 1) contains the new singular +* values obtained from solving the secular equation, and +* POLES(1:K, 2) is an array containing the poles in the secular +* equation. +* +* DIFL (input) DOUBLE PRECISION array, dimension ( K ). +* On entry, DIFL(I) is the distance between I-th updated +* (undeflated) singular value and the I-th (undeflated) old +* singular value. +* +* DIFR (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ). +* On entry, DIFR(I, 1) contains the distances between I-th +* updated (undeflated) singular value and the I+1-th +* (undeflated) old singular value. And DIFR(I, 2) is the +* normalizing factor for the I-th right singular vector. +* +* Z (input) DOUBLE PRECISION array, dimension ( K ) +* Contain the components of the deflation-adjusted updating row +* vector. +* +* K (input) INTEGER +* Contains the dimension of the non-deflated matrix, +* This is the order of the related secular equation. 1 <= K <=N. +* +* C (input) DOUBLE PRECISION +* C contains garbage if SQRE =0 and the C-value of a Givens +* rotation related to the right null space if SQRE = 1. +* +* S (input) DOUBLE PRECISION +* S contains garbage if SQRE =0 and the S-value of a Givens +* rotation related to the right null space if SQRE = 1. +* +* WORK (workspace) DOUBLE PRECISION array, dimension ( K ) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Ren-Cang Li, Computer Science Division, University of +* California at Berkeley, USA +* Osni Marques, LBNL/NERSC, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO, NEGONE + PARAMETER ( ONE = 1.0D0, ZERO = 0.0D0, NEGONE = -1.0D0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, M, N, NLP1 + DOUBLE PRECISION DIFLJ, DIFRJ, DJ, DSIGJ, DSIGJP, TEMP +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEMV, DLACPY, DLASCL, DROT, DSCAL, + $ XERBLA +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMC3, DNRM2 + EXTERNAL DLAMC3, DNRM2 +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( NL.LT.1 ) THEN + INFO = -2 + ELSE IF( NR.LT.1 ) THEN + INFO = -3 + ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN + INFO = -4 + END IF +* + N = NL + NR + 1 +* + IF( NRHS.LT.1 ) THEN + INFO = -5 + ELSE IF( LDB.LT.N ) THEN + INFO = -7 + ELSE IF( LDBX.LT.N ) THEN + INFO = -9 + ELSE IF( GIVPTR.LT.0 ) THEN + INFO = -11 + ELSE IF( LDGCOL.LT.N ) THEN + INFO = -13 + ELSE IF( LDGNUM.LT.N ) THEN + INFO = -15 + ELSE IF( K.LT.1 ) THEN + INFO = -20 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLALS0', -INFO ) + RETURN + END IF +* + M = N + SQRE + NLP1 = NL + 1 +* + IF( ICOMPQ.EQ.0 ) THEN +* +* Apply back orthogonal transformations from the left. +* +* Step (1L): apply back the Givens rotations performed. +* + DO 10 I = 1, GIVPTR + CALL DROT( NRHS, B( GIVCOL( I, 2 ), 1 ), LDB, + $ B( GIVCOL( I, 1 ), 1 ), LDB, GIVNUM( I, 2 ), + $ GIVNUM( I, 1 ) ) + 10 CONTINUE +* +* Step (2L): permute rows of B. +* + CALL DCOPY( NRHS, B( NLP1, 1 ), LDB, BX( 1, 1 ), LDBX ) + DO 20 I = 2, N + CALL DCOPY( NRHS, B( PERM( I ), 1 ), LDB, BX( I, 1 ), LDBX ) + 20 CONTINUE +* +* Step (3L): apply the inverse of the left singular vector +* matrix to BX. +* + IF( K.EQ.1 ) THEN + CALL DCOPY( NRHS, BX, LDBX, B, LDB ) + IF( Z( 1 ).LT.ZERO ) THEN + CALL DSCAL( NRHS, NEGONE, B, LDB ) + END IF + ELSE + DO 50 J = 1, K + DIFLJ = DIFL( J ) + DJ = POLES( J, 1 ) + DSIGJ = -POLES( J, 2 ) + IF( J.LT.K ) THEN + DIFRJ = -DIFR( J, 1 ) + DSIGJP = -POLES( J+1, 2 ) + END IF + IF( ( Z( J ).EQ.ZERO ) .OR. ( POLES( J, 2 ).EQ.ZERO ) ) + $ THEN + WORK( J ) = ZERO + ELSE + WORK( J ) = -POLES( J, 2 )*Z( J ) / DIFLJ / + $ ( POLES( J, 2 )+DJ ) + END IF + DO 30 I = 1, J - 1 + IF( ( Z( I ).EQ.ZERO ) .OR. + $ ( POLES( I, 2 ).EQ.ZERO ) ) THEN + WORK( I ) = ZERO + ELSE + WORK( I ) = POLES( I, 2 )*Z( I ) / + $ ( DLAMC3( POLES( I, 2 ), DSIGJ )- + $ DIFLJ ) / ( POLES( I, 2 )+DJ ) + END IF + 30 CONTINUE + DO 40 I = J + 1, K + IF( ( Z( I ).EQ.ZERO ) .OR. + $ ( POLES( I, 2 ).EQ.ZERO ) ) THEN + WORK( I ) = ZERO + ELSE + WORK( I ) = POLES( I, 2 )*Z( I ) / + $ ( DLAMC3( POLES( I, 2 ), DSIGJP )+ + $ DIFRJ ) / ( POLES( I, 2 )+DJ ) + END IF + 40 CONTINUE + WORK( 1 ) = NEGONE + TEMP = DNRM2( K, WORK, 1 ) + CALL DGEMV( 'T', K, NRHS, ONE, BX, LDBX, WORK, 1, ZERO, + $ B( J, 1 ), LDB ) + CALL DLASCL( 'G', 0, 0, TEMP, ONE, 1, NRHS, B( J, 1 ), + $ LDB, INFO ) + 50 CONTINUE + END IF +* +* Move the deflated rows of BX to B also. +* + IF( K.LT.MAX( M, N ) ) + $ CALL DLACPY( 'A', N-K, NRHS, BX( K+1, 1 ), LDBX, + $ B( K+1, 1 ), LDB ) + ELSE +* +* Apply back the right orthogonal transformations. +* +* Step (1R): apply back the new right singular vector matrix +* to B. +* + IF( K.EQ.1 ) THEN + CALL DCOPY( NRHS, B, LDB, BX, LDBX ) + ELSE + DO 80 J = 1, K + DSIGJ = POLES( J, 2 ) + IF( Z( J ).EQ.ZERO ) THEN + WORK( J ) = ZERO + ELSE + WORK( J ) = -Z( J ) / DIFL( J ) / + $ ( DSIGJ+POLES( J, 1 ) ) / DIFR( J, 2 ) + END IF + DO 60 I = 1, J - 1 + IF( Z( J ).EQ.ZERO ) THEN + WORK( I ) = ZERO + ELSE + WORK( I ) = Z( J ) / ( DLAMC3( DSIGJ, -POLES( I+1, + $ 2 ) )-DIFR( I, 1 ) ) / + $ ( DSIGJ+POLES( I, 1 ) ) / DIFR( I, 2 ) + END IF + 60 CONTINUE + DO 70 I = J + 1, K + IF( Z( J ).EQ.ZERO ) THEN + WORK( I ) = ZERO + ELSE + WORK( I ) = Z( J ) / ( DLAMC3( DSIGJ, -POLES( I, + $ 2 ) )-DIFL( I ) ) / + $ ( DSIGJ+POLES( I, 1 ) ) / DIFR( I, 2 ) + END IF + 70 CONTINUE + CALL DGEMV( 'T', K, NRHS, ONE, B, LDB, WORK, 1, ZERO, + $ BX( J, 1 ), LDBX ) + 80 CONTINUE + END IF +* +* Step (2R): if SQRE = 1, apply back the rotation that is +* related to the right null space of the subproblem. +* + IF( SQRE.EQ.1 ) THEN + CALL DCOPY( NRHS, B( M, 1 ), LDB, BX( M, 1 ), LDBX ) + CALL DROT( NRHS, BX( 1, 1 ), LDBX, BX( M, 1 ), LDBX, C, S ) + END IF + IF( K.LT.MAX( M, N ) ) + $ CALL DLACPY( 'A', N-K, NRHS, B( K+1, 1 ), LDB, BX( K+1, 1 ), + $ LDBX ) +* +* Step (3R): permute rows of B. +* + CALL DCOPY( NRHS, BX( 1, 1 ), LDBX, B( NLP1, 1 ), LDB ) + IF( SQRE.EQ.1 ) THEN + CALL DCOPY( NRHS, BX( M, 1 ), LDBX, B( M, 1 ), LDB ) + END IF + DO 90 I = 2, N + CALL DCOPY( NRHS, BX( I, 1 ), LDBX, B( PERM( I ), 1 ), LDB ) + 90 CONTINUE +* +* Step (4R): apply back the Givens rotations performed. +* + DO 100 I = GIVPTR, 1, -1 + CALL DROT( NRHS, B( GIVCOL( I, 2 ), 1 ), LDB, + $ B( GIVCOL( I, 1 ), 1 ), LDB, GIVNUM( I, 2 ), + $ -GIVNUM( I, 1 ) ) + 100 CONTINUE + END IF +* + RETURN +* +* End of DLALS0 +* + END diff --git a/costa/native/external/lapack/dlalsa.f b/costa/native/external/lapack/dlalsa.f new file mode 100644 index 000000000..d7ecb6037 --- /dev/null +++ b/costa/native/external/lapack/dlalsa.f @@ -0,0 +1,363 @@ + SUBROUTINE DLALSA( ICOMPQ, SMLSIZ, N, NRHS, B, LDB, BX, LDBX, U, + $ LDU, VT, K, DIFL, DIFR, Z, POLES, GIVPTR, + $ GIVCOL, LDGCOL, PERM, GIVNUM, C, S, WORK, + $ IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER ICOMPQ, INFO, LDB, LDBX, LDGCOL, LDU, N, NRHS, + $ SMLSIZ +* .. +* .. Array Arguments .. + INTEGER GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ), + $ K( * ), PERM( LDGCOL, * ) + DOUBLE PRECISION B( LDB, * ), BX( LDBX, * ), C( * ), + $ DIFL( LDU, * ), DIFR( LDU, * ), + $ GIVNUM( LDU, * ), POLES( LDU, * ), S( * ), + $ U( LDU, * ), VT( LDU, * ), WORK( * ), + $ Z( LDU, * ) +* .. +* +* Purpose +* ======= +* +* DLALSA is an itermediate step in solving the least squares problem +* by computing the SVD of the coefficient matrix in compact form (The +* singular vectors are computed as products of simple orthorgonal +* matrices.). +* +* If ICOMPQ = 0, DLALSA applies the inverse of the left singular vector +* matrix of an upper bidiagonal matrix to the right hand side; and if +* ICOMPQ = 1, DLALSA applies the right singular vector matrix to the +* right hand side. The singular vector matrices were generated in +* compact form by DLALSA. +* +* Arguments +* ========= +* +* +* ICOMPQ (input) INTEGER +* Specifies whether the left or the right singular vector +* matrix is involved. +* = 0: Left singular vector matrix +* = 1: Right singular vector matrix +* +* SMLSIZ (input) INTEGER +* The maximum size of the subproblems at the bottom of the +* computation tree. +* +* N (input) INTEGER +* The row and column dimensions of the upper bidiagonal matrix. +* +* NRHS (input) INTEGER +* The number of columns of B and BX. NRHS must be at least 1. +* +* B (input) DOUBLE PRECISION array, dimension ( LDB, NRHS ) +* On input, B contains the right hand sides of the least +* squares problem in rows 1 through M. On output, B contains +* the solution X in rows 1 through N. +* +* LDB (input) INTEGER +* The leading dimension of B in the calling subprogram. +* LDB must be at least max(1,MAX( M, N ) ). +* +* BX (output) DOUBLE PRECISION array, dimension ( LDBX, NRHS ) +* On exit, the result of applying the left or right singular +* vector matrix to B. +* +* LDBX (input) INTEGER +* The leading dimension of BX. +* +* U (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ ). +* On entry, U contains the left singular vector matrices of all +* subproblems at the bottom level. +* +* LDU (input) INTEGER, LDU = > N. +* The leading dimension of arrays U, VT, DIFL, DIFR, +* POLES, GIVNUM, and Z. +* +* VT (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ+1 ). +* On entry, VT' contains the right singular vector matrices of +* all subproblems at the bottom level. +* +* K (input) INTEGER array, dimension ( N ). +* +* DIFL (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ). +* where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1. +* +* DIFR (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). +* On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record +* distances between singular values on the I-th level and +* singular values on the (I -1)-th level, and DIFR(*, 2 * I) +* record the normalizing factors of the right singular vectors +* matrices of subproblems on I-th level. +* +* Z (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ). +* On entry, Z(1, I) contains the components of the deflation- +* adjusted updating row vector for subproblems on the I-th +* level. +* +* POLES (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). +* On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old +* singular values involved in the secular equations on the I-th +* level. +* +* GIVPTR (input) INTEGER array, dimension ( N ). +* On entry, GIVPTR( I ) records the number of Givens +* rotations performed on the I-th problem on the computation +* tree. +* +* GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ). +* On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the +* locations of Givens rotations performed on the I-th level on +* the computation tree. +* +* LDGCOL (input) INTEGER, LDGCOL = > N. +* The leading dimension of arrays GIVCOL and PERM. +* +* PERM (input) INTEGER array, dimension ( LDGCOL, NLVL ). +* On entry, PERM(*, I) records permutations done on the I-th +* level of the computation tree. +* +* GIVNUM (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). +* On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S- +* values of Givens rotations performed on the I-th level on the +* computation tree. +* +* C (input) DOUBLE PRECISION array, dimension ( N ). +* On entry, if the I-th subproblem is not square, +* C( I ) contains the C-value of a Givens rotation related to +* the right null space of the I-th subproblem. +* +* S (input) DOUBLE PRECISION array, dimension ( N ). +* On entry, if the I-th subproblem is not square, +* S( I ) contains the S-value of a Givens rotation related to +* the right null space of the I-th subproblem. +* +* WORK (workspace) DOUBLE PRECISION array. +* The dimension must be at least N. +* +* IWORK (workspace) INTEGER array. +* The dimension must be at least 3 * N +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Ren-Cang Li, Computer Science Division, University of +* California at Berkeley, USA +* Osni Marques, LBNL/NERSC, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + INTEGER I, I1, IC, IM1, INODE, J, LF, LL, LVL, LVL2, + $ ND, NDB1, NDIML, NDIMR, NL, NLF, NLP1, NLVL, + $ NR, NRF, NRP1, SQRE +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEMM, DLALS0, DLASDT, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( SMLSIZ.LT.3 ) THEN + INFO = -2 + ELSE IF( N.LT.SMLSIZ ) THEN + INFO = -3 + ELSE IF( NRHS.LT.1 ) THEN + INFO = -4 + ELSE IF( LDB.LT.N ) THEN + INFO = -6 + ELSE IF( LDBX.LT.N ) THEN + INFO = -8 + ELSE IF( LDU.LT.N ) THEN + INFO = -10 + ELSE IF( LDGCOL.LT.N ) THEN + INFO = -19 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLALSA', -INFO ) + RETURN + END IF +* +* Book-keeping and setting up the computation tree. +* + INODE = 1 + NDIML = INODE + N + NDIMR = NDIML + N +* + CALL DLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ), + $ IWORK( NDIMR ), SMLSIZ ) +* +* The following code applies back the left singular vector factors. +* For applying back the right singular vector factors, go to 50. +* + IF( ICOMPQ.EQ.1 ) THEN + GO TO 50 + END IF +* +* The nodes on the bottom level of the tree were solved +* by DLASDQ. The corresponding left and right singular vector +* matrices are in explicit form. First apply back the left +* singular vector matrices. +* + NDB1 = ( ND+1 ) / 2 + DO 10 I = NDB1, ND +* +* IC : center row of each node +* NL : number of rows of left subproblem +* NR : number of rows of right subproblem +* NLF: starting row of the left subproblem +* NRF: starting row of the right subproblem +* + I1 = I - 1 + IC = IWORK( INODE+I1 ) + NL = IWORK( NDIML+I1 ) + NR = IWORK( NDIMR+I1 ) + NLF = IC - NL + NRF = IC + 1 + CALL DGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU, + $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX ) + CALL DGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU, + $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX ) + 10 CONTINUE +* +* Next copy the rows of B that correspond to unchanged rows +* in the bidiagonal matrix to BX. +* + DO 20 I = 1, ND + IC = IWORK( INODE+I-1 ) + CALL DCOPY( NRHS, B( IC, 1 ), LDB, BX( IC, 1 ), LDBX ) + 20 CONTINUE +* +* Finally go through the left singular vector matrices of all +* the other subproblems bottom-up on the tree. +* + J = 2**NLVL + SQRE = 0 +* + DO 40 LVL = NLVL, 1, -1 + LVL2 = 2*LVL - 1 +* +* find the first node LF and last node LL on +* the current level LVL +* + IF( LVL.EQ.1 ) THEN + LF = 1 + LL = 1 + ELSE + LF = 2**( LVL-1 ) + LL = 2*LF - 1 + END IF + DO 30 I = LF, LL + IM1 = I - 1 + IC = IWORK( INODE+IM1 ) + NL = IWORK( NDIML+IM1 ) + NR = IWORK( NDIMR+IM1 ) + NLF = IC - NL + NRF = IC + 1 + J = J - 1 + CALL DLALS0( ICOMPQ, NL, NR, SQRE, NRHS, BX( NLF, 1 ), LDBX, + $ B( NLF, 1 ), LDB, PERM( NLF, LVL ), + $ GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL, + $ GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ), + $ DIFL( NLF, LVL ), DIFR( NLF, LVL2 ), + $ Z( NLF, LVL ), K( J ), C( J ), S( J ), WORK, + $ INFO ) + 30 CONTINUE + 40 CONTINUE + GO TO 90 +* +* ICOMPQ = 1: applying back the right singular vector factors. +* + 50 CONTINUE +* +* First now go through the right singular vector matrices of all +* the tree nodes top-down. +* + J = 0 + DO 70 LVL = 1, NLVL + LVL2 = 2*LVL - 1 +* +* Find the first node LF and last node LL on +* the current level LVL. +* + IF( LVL.EQ.1 ) THEN + LF = 1 + LL = 1 + ELSE + LF = 2**( LVL-1 ) + LL = 2*LF - 1 + END IF + DO 60 I = LL, LF, -1 + IM1 = I - 1 + IC = IWORK( INODE+IM1 ) + NL = IWORK( NDIML+IM1 ) + NR = IWORK( NDIMR+IM1 ) + NLF = IC - NL + NRF = IC + 1 + IF( I.EQ.LL ) THEN + SQRE = 0 + ELSE + SQRE = 1 + END IF + J = J + 1 + CALL DLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B( NLF, 1 ), LDB, + $ BX( NLF, 1 ), LDBX, PERM( NLF, LVL ), + $ GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL, + $ GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ), + $ DIFL( NLF, LVL ), DIFR( NLF, LVL2 ), + $ Z( NLF, LVL ), K( J ), C( J ), S( J ), WORK, + $ INFO ) + 60 CONTINUE + 70 CONTINUE +* +* The nodes on the bottom level of the tree were solved +* by DLASDQ. The corresponding right singular vector +* matrices are in explicit form. Apply them back. +* + NDB1 = ( ND+1 ) / 2 + DO 80 I = NDB1, ND + I1 = I - 1 + IC = IWORK( INODE+I1 ) + NL = IWORK( NDIML+I1 ) + NR = IWORK( NDIMR+I1 ) + NLP1 = NL + 1 + IF( I.EQ.ND ) THEN + NRP1 = NR + ELSE + NRP1 = NR + 1 + END IF + NLF = IC - NL + NRF = IC + 1 + CALL DGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU, + $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX ) + CALL DGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU, + $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX ) + 80 CONTINUE +* + 90 CONTINUE +* + RETURN +* +* End of DLALSA +* + END diff --git a/costa/native/external/lapack/dlalsd.f b/costa/native/external/lapack/dlalsd.f new file mode 100644 index 000000000..188694aeb --- /dev/null +++ b/costa/native/external/lapack/dlalsd.f @@ -0,0 +1,433 @@ + SUBROUTINE DLALSD( UPLO, SMLSIZ, N, NRHS, D, E, B, LDB, RCOND, + $ RANK, WORK, IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS, RANK, SMLSIZ + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION B( LDB, * ), D( * ), E( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DLALSD uses the singular value decomposition of A to solve the least +* squares problem of finding X to minimize the Euclidean norm of each +* column of A*X-B, where A is N-by-N upper bidiagonal, and X and B +* are N-by-NRHS. The solution X overwrites B. +* +* The singular values of A smaller than RCOND times the largest +* singular value are treated as zero in solving the least squares +* problem; in this case a minimum norm solution is returned. +* The actual singular values are returned in D in ascending order. +* +* This code makes very mild assumptions about floating point +* arithmetic. It will work on machines with a guard digit in +* add/subtract, or on those binary machines without guard digits +* which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. +* It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': D and E define an upper bidiagonal matrix. +* = 'L': D and E define a lower bidiagonal matrix. +* +* SMLSIZ (input) INTEGER +* The maximum size of the subproblems at the bottom of the +* computation tree. +* +* N (input) INTEGER +* The dimension of the bidiagonal matrix. N >= 0. +* +* NRHS (input) INTEGER +* The number of columns of B. NRHS must be at least 1. +* +* D (input/output) DOUBLE PRECISION array, dimension (N) +* On entry D contains the main diagonal of the bidiagonal +* matrix. On exit, if INFO = 0, D contains its singular values. +* +* E (input) DOUBLE PRECISION array, dimension (N-1) +* Contains the super-diagonal entries of the bidiagonal matrix. +* On exit, E has been destroyed. +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) +* On input, B contains the right hand sides of the least +* squares problem. On output, B contains the solution X. +* +* LDB (input) INTEGER +* The leading dimension of B in the calling subprogram. +* LDB must be at least max(1,N). +* +* RCOND (input) DOUBLE PRECISION +* The singular values of A less than or equal to RCOND times +* the largest singular value are treated as zero in solving +* the least squares problem. If RCOND is negative, +* machine precision is used instead. +* For example, if diag(S)*X=B were the least squares problem, +* where diag(S) is a diagonal matrix of singular values, the +* solution would be X(i) = B(i) / S(i) if S(i) is greater than +* RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to +* RCOND*max(S). +* +* RANK (output) INTEGER +* The number of singular values of A greater than RCOND times +* the largest singular value. +* +* WORK (workspace) DOUBLE PRECISION array, dimension at least +* (9*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2), +* where NLVL = max(0, INT(log_2 (N/(SMLSIZ+1))) + 1). +* +* IWORK (workspace) INTEGER array, dimension at least +* (3*N*NLVL + 11*N) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: The algorithm failed to compute an singular value while +* working on the submatrix lying in rows and columns +* INFO/(N+1) through MOD(INFO,N+1). +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Ren-Cang Li, Computer Science Division, University of +* California at Berkeley, USA +* Osni Marques, LBNL/NERSC, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0 ) +* .. +* .. Local Scalars .. + INTEGER BX, BXST, C, DIFL, DIFR, GIVCOL, GIVNUM, + $ GIVPTR, I, ICMPQ1, ICMPQ2, IWK, J, K, NLVL, + $ NM1, NSIZE, NSUB, NWORK, PERM, POLES, S, SIZEI, + $ SMLSZP, SQRE, ST, ST1, U, VT, Z + DOUBLE PRECISION CS, EPS, ORGNRM, R, SN, TOL +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH, DLANST + EXTERNAL IDAMAX, DLAMCH, DLANST +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEMM, DLACPY, DLALSA, DLARTG, DLASCL, + $ DLASDA, DLASDQ, DLASET, DLASRT, DROT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, INT, LOG, SIGN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.1 ) THEN + INFO = -4 + ELSE IF( ( LDB.LT.1 ) .OR. ( LDB.LT.N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLALSD', -INFO ) + RETURN + END IF +* + EPS = DLAMCH( 'Epsilon' ) +* +* Set up the tolerance. +* + IF( ( RCOND.LE.ZERO ) .OR. ( RCOND.GE.ONE ) ) THEN + RCOND = EPS + END IF +* + RANK = 0 +* +* Quick return if possible. +* + IF( N.EQ.0 ) THEN + RETURN + ELSE IF( N.EQ.1 ) THEN + IF( D( 1 ).EQ.ZERO ) THEN + CALL DLASET( 'A', 1, NRHS, ZERO, ZERO, B, LDB ) + ELSE + RANK = 1 + CALL DLASCL( 'G', 0, 0, D( 1 ), ONE, 1, NRHS, B, LDB, INFO ) + D( 1 ) = ABS( D( 1 ) ) + END IF + RETURN + END IF +* +* Rotate the matrix if it is lower bidiagonal. +* + IF( UPLO.EQ.'L' ) THEN + DO 10 I = 1, N - 1 + CALL DLARTG( D( I ), E( I ), CS, SN, R ) + D( I ) = R + E( I ) = SN*D( I+1 ) + D( I+1 ) = CS*D( I+1 ) + IF( NRHS.EQ.1 ) THEN + CALL DROT( 1, B( I, 1 ), 1, B( I+1, 1 ), 1, CS, SN ) + ELSE + WORK( I*2-1 ) = CS + WORK( I*2 ) = SN + END IF + 10 CONTINUE + IF( NRHS.GT.1 ) THEN + DO 30 I = 1, NRHS + DO 20 J = 1, N - 1 + CS = WORK( J*2-1 ) + SN = WORK( J*2 ) + CALL DROT( 1, B( J, I ), 1, B( J+1, I ), 1, CS, SN ) + 20 CONTINUE + 30 CONTINUE + END IF + END IF +* +* Scale. +* + NM1 = N - 1 + ORGNRM = DLANST( 'M', N, D, E ) + IF( ORGNRM.EQ.ZERO ) THEN + CALL DLASET( 'A', N, NRHS, ZERO, ZERO, B, LDB ) + RETURN + END IF +* + CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, INFO ) + CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, NM1, 1, E, NM1, INFO ) +* +* If N is smaller than the minimum divide size SMLSIZ, then solve +* the problem with another solver. +* + IF( N.LE.SMLSIZ ) THEN + NWORK = 1 + N*N + CALL DLASET( 'A', N, N, ZERO, ONE, WORK, N ) + CALL DLASDQ( 'U', 0, N, N, 0, NRHS, D, E, WORK, N, WORK, N, B, + $ LDB, WORK( NWORK ), INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + TOL = RCOND*ABS( D( IDAMAX( N, D, 1 ) ) ) + DO 40 I = 1, N + IF( D( I ).LE.TOL ) THEN + CALL DLASET( 'A', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB ) + ELSE + CALL DLASCL( 'G', 0, 0, D( I ), ONE, 1, NRHS, B( I, 1 ), + $ LDB, INFO ) + RANK = RANK + 1 + END IF + 40 CONTINUE + CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, WORK, N, B, LDB, ZERO, + $ WORK( NWORK ), N ) + CALL DLACPY( 'A', N, NRHS, WORK( NWORK ), N, B, LDB ) +* +* Unscale. +* + CALL DLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO ) + CALL DLASRT( 'D', N, D, INFO ) + CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, N, NRHS, B, LDB, INFO ) +* + RETURN + END IF +* +* Book-keeping and setting up some constants. +* + NLVL = INT( LOG( DBLE( N ) / DBLE( SMLSIZ+1 ) ) / LOG( TWO ) ) + 1 +* + SMLSZP = SMLSIZ + 1 +* + U = 1 + VT = 1 + SMLSIZ*N + DIFL = VT + SMLSZP*N + DIFR = DIFL + NLVL*N + Z = DIFR + NLVL*N*2 + C = Z + NLVL*N + S = C + N + POLES = S + N + GIVNUM = POLES + 2*NLVL*N + BX = GIVNUM + 2*NLVL*N + NWORK = BX + N*NRHS +* + SIZEI = 1 + N + K = SIZEI + N + GIVPTR = K + N + PERM = GIVPTR + N + GIVCOL = PERM + NLVL*N + IWK = GIVCOL + NLVL*N*2 +* + ST = 1 + SQRE = 0 + ICMPQ1 = 1 + ICMPQ2 = 0 + NSUB = 0 +* + DO 50 I = 1, N + IF( ABS( D( I ) ).LT.EPS ) THEN + D( I ) = SIGN( EPS, D( I ) ) + END IF + 50 CONTINUE +* + DO 60 I = 1, NM1 + IF( ( ABS( E( I ) ).LT.EPS ) .OR. ( I.EQ.NM1 ) ) THEN + NSUB = NSUB + 1 + IWORK( NSUB ) = ST +* +* Subproblem found. First determine its size and then +* apply divide and conquer on it. +* + IF( I.LT.NM1 ) THEN +* +* A subproblem with E(I) small for I < NM1. +* + NSIZE = I - ST + 1 + IWORK( SIZEI+NSUB-1 ) = NSIZE + ELSE IF( ABS( E( I ) ).GE.EPS ) THEN +* +* A subproblem with E(NM1) not too small but I = NM1. +* + NSIZE = N - ST + 1 + IWORK( SIZEI+NSUB-1 ) = NSIZE + ELSE +* +* A subproblem with E(NM1) small. This implies an +* 1-by-1 subproblem at D(N), which is not solved +* explicitly. +* + NSIZE = I - ST + 1 + IWORK( SIZEI+NSUB-1 ) = NSIZE + NSUB = NSUB + 1 + IWORK( NSUB ) = N + IWORK( SIZEI+NSUB-1 ) = 1 + CALL DCOPY( NRHS, B( N, 1 ), LDB, WORK( BX+NM1 ), N ) + END IF + ST1 = ST - 1 + IF( NSIZE.EQ.1 ) THEN +* +* This is a 1-by-1 subproblem and is not solved +* explicitly. +* + CALL DCOPY( NRHS, B( ST, 1 ), LDB, WORK( BX+ST1 ), N ) + ELSE IF( NSIZE.LE.SMLSIZ ) THEN +* +* This is a small subproblem and is solved by DLASDQ. +* + CALL DLASET( 'A', NSIZE, NSIZE, ZERO, ONE, + $ WORK( VT+ST1 ), N ) + CALL DLASDQ( 'U', 0, NSIZE, NSIZE, 0, NRHS, D( ST ), + $ E( ST ), WORK( VT+ST1 ), N, WORK( NWORK ), + $ N, B( ST, 1 ), LDB, WORK( NWORK ), INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + CALL DLACPY( 'A', NSIZE, NRHS, B( ST, 1 ), LDB, + $ WORK( BX+ST1 ), N ) + ELSE +* +* A large problem. Solve it using divide and conquer. +* + CALL DLASDA( ICMPQ1, SMLSIZ, NSIZE, SQRE, D( ST ), + $ E( ST ), WORK( U+ST1 ), N, WORK( VT+ST1 ), + $ IWORK( K+ST1 ), WORK( DIFL+ST1 ), + $ WORK( DIFR+ST1 ), WORK( Z+ST1 ), + $ WORK( POLES+ST1 ), IWORK( GIVPTR+ST1 ), + $ IWORK( GIVCOL+ST1 ), N, IWORK( PERM+ST1 ), + $ WORK( GIVNUM+ST1 ), WORK( C+ST1 ), + $ WORK( S+ST1 ), WORK( NWORK ), IWORK( IWK ), + $ INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + BXST = BX + ST1 + CALL DLALSA( ICMPQ2, SMLSIZ, NSIZE, NRHS, B( ST, 1 ), + $ LDB, WORK( BXST ), N, WORK( U+ST1 ), N, + $ WORK( VT+ST1 ), IWORK( K+ST1 ), + $ WORK( DIFL+ST1 ), WORK( DIFR+ST1 ), + $ WORK( Z+ST1 ), WORK( POLES+ST1 ), + $ IWORK( GIVPTR+ST1 ), IWORK( GIVCOL+ST1 ), N, + $ IWORK( PERM+ST1 ), WORK( GIVNUM+ST1 ), + $ WORK( C+ST1 ), WORK( S+ST1 ), WORK( NWORK ), + $ IWORK( IWK ), INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + END IF + ST = I + 1 + END IF + 60 CONTINUE +* +* Apply the singular values and treat the tiny ones as zero. +* + TOL = RCOND*ABS( D( IDAMAX( N, D, 1 ) ) ) +* + DO 70 I = 1, N +* +* Some of the elements in D can be negative because 1-by-1 +* subproblems were not solved explicitly. +* + IF( ABS( D( I ) ).LE.TOL ) THEN + CALL DLASET( 'A', 1, NRHS, ZERO, ZERO, WORK( BX+I-1 ), N ) + ELSE + RANK = RANK + 1 + CALL DLASCL( 'G', 0, 0, D( I ), ONE, 1, NRHS, + $ WORK( BX+I-1 ), N, INFO ) + END IF + D( I ) = ABS( D( I ) ) + 70 CONTINUE +* +* Now apply back the right singular vectors. +* + ICMPQ2 = 1 + DO 80 I = 1, NSUB + ST = IWORK( I ) + ST1 = ST - 1 + NSIZE = IWORK( SIZEI+I-1 ) + BXST = BX + ST1 + IF( NSIZE.EQ.1 ) THEN + CALL DCOPY( NRHS, WORK( BXST ), N, B( ST, 1 ), LDB ) + ELSE IF( NSIZE.LE.SMLSIZ ) THEN + CALL DGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE, + $ WORK( VT+ST1 ), N, WORK( BXST ), N, ZERO, + $ B( ST, 1 ), LDB ) + ELSE + CALL DLALSA( ICMPQ2, SMLSIZ, NSIZE, NRHS, WORK( BXST ), N, + $ B( ST, 1 ), LDB, WORK( U+ST1 ), N, + $ WORK( VT+ST1 ), IWORK( K+ST1 ), + $ WORK( DIFL+ST1 ), WORK( DIFR+ST1 ), + $ WORK( Z+ST1 ), WORK( POLES+ST1 ), + $ IWORK( GIVPTR+ST1 ), IWORK( GIVCOL+ST1 ), N, + $ IWORK( PERM+ST1 ), WORK( GIVNUM+ST1 ), + $ WORK( C+ST1 ), WORK( S+ST1 ), WORK( NWORK ), + $ IWORK( IWK ), INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + END IF + 80 CONTINUE +* +* Unscale and sort the singular values. +* + CALL DLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO ) + CALL DLASRT( 'D', N, D, INFO ) + CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, N, NRHS, B, LDB, INFO ) +* + RETURN +* +* End of DLALSD +* + END diff --git a/costa/native/external/lapack/dlamch.f b/costa/native/external/lapack/dlamch.f new file mode 100644 index 000000000..64ac3becd --- /dev/null +++ b/costa/native/external/lapack/dlamch.f @@ -0,0 +1,857 @@ + DOUBLE PRECISION FUNCTION DLAMCH( CMACH ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER CMACH +* .. +* +* Purpose +* ======= +* +* DLAMCH determines double precision machine parameters. +* +* Arguments +* ========= +* +* CMACH (input) CHARACTER*1 +* Specifies the value to be returned by DLAMCH: +* = 'E' or 'e', DLAMCH := eps +* = 'S' or 's , DLAMCH := sfmin +* = 'B' or 'b', DLAMCH := base +* = 'P' or 'p', DLAMCH := eps*base +* = 'N' or 'n', DLAMCH := t +* = 'R' or 'r', DLAMCH := rnd +* = 'M' or 'm', DLAMCH := emin +* = 'U' or 'u', DLAMCH := rmin +* = 'L' or 'l', DLAMCH := emax +* = 'O' or 'o', DLAMCH := rmax +* +* where +* +* eps = relative machine precision +* sfmin = safe minimum, such that 1/sfmin does not overflow +* base = base of the machine +* prec = eps*base +* t = number of (base) digits in the mantissa +* rnd = 1.0 when rounding occurs in addition, 0.0 otherwise +* emin = minimum exponent before (gradual) underflow +* rmin = underflow threshold - base**(emin-1) +* emax = largest exponent before overflow +* rmax = overflow threshold - (base**emax)*(1-eps) +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL FIRST, LRND + INTEGER BETA, IMAX, IMIN, IT + DOUBLE PRECISION BASE, EMAX, EMIN, EPS, PREC, RMACH, RMAX, RMIN, + $ RND, SFMIN, SMALL, T +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLAMC2 +* .. +* .. Save statement .. + SAVE FIRST, EPS, SFMIN, BASE, T, RND, EMIN, RMIN, + $ EMAX, RMAX, PREC +* .. +* .. Data statements .. + DATA FIRST / .TRUE. / +* .. +* .. Executable Statements .. +* + IF( FIRST ) THEN + FIRST = .FALSE. + CALL DLAMC2( BETA, IT, LRND, EPS, IMIN, RMIN, IMAX, RMAX ) + BASE = BETA + T = IT + IF( LRND ) THEN + RND = ONE + EPS = ( BASE**( 1-IT ) ) / 2 + ELSE + RND = ZERO + EPS = BASE**( 1-IT ) + END IF + PREC = EPS*BASE + EMIN = IMIN + EMAX = IMAX + SFMIN = RMIN + SMALL = ONE / RMAX + IF( SMALL.GE.SFMIN ) THEN +* +* Use SMALL plus a bit, to avoid the possibility of rounding +* causing overflow when computing 1/sfmin. +* + SFMIN = SMALL*( ONE+EPS ) + END IF + END IF +* + IF( LSAME( CMACH, 'E' ) ) THEN + RMACH = EPS + ELSE IF( LSAME( CMACH, 'S' ) ) THEN + RMACH = SFMIN + ELSE IF( LSAME( CMACH, 'B' ) ) THEN + RMACH = BASE + ELSE IF( LSAME( CMACH, 'P' ) ) THEN + RMACH = PREC + ELSE IF( LSAME( CMACH, 'N' ) ) THEN + RMACH = T + ELSE IF( LSAME( CMACH, 'R' ) ) THEN + RMACH = RND + ELSE IF( LSAME( CMACH, 'M' ) ) THEN + RMACH = EMIN + ELSE IF( LSAME( CMACH, 'U' ) ) THEN + RMACH = RMIN + ELSE IF( LSAME( CMACH, 'L' ) ) THEN + RMACH = EMAX + ELSE IF( LSAME( CMACH, 'O' ) ) THEN + RMACH = RMAX + END IF +* + DLAMCH = RMACH + RETURN +* +* End of DLAMCH +* + END +* +************************************************************************ +* + SUBROUTINE DLAMC1( BETA, T, RND, IEEE1 ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + LOGICAL IEEE1, RND + INTEGER BETA, T +* .. +* +* Purpose +* ======= +* +* DLAMC1 determines the machine parameters given by BETA, T, RND, and +* IEEE1. +* +* Arguments +* ========= +* +* BETA (output) INTEGER +* The base of the machine. +* +* T (output) INTEGER +* The number of ( BETA ) digits in the mantissa. +* +* RND (output) LOGICAL +* Specifies whether proper rounding ( RND = .TRUE. ) or +* chopping ( RND = .FALSE. ) occurs in addition. This may not +* be a reliable guide to the way in which the machine performs +* its arithmetic. +* +* IEEE1 (output) LOGICAL +* Specifies whether rounding appears to be done in the IEEE +* 'round to nearest' style. +* +* Further Details +* =============== +* +* The routine is based on the routine ENVRON by Malcolm and +* incorporates suggestions by Gentleman and Marovich. See +* +* Malcolm M. A. (1972) Algorithms to reveal properties of +* floating-point arithmetic. Comms. of the ACM, 15, 949-951. +* +* Gentleman W. M. and Marovich S. B. (1974) More on algorithms +* that reveal properties of floating point arithmetic units. +* Comms. of the ACM, 17, 276-277. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL FIRST, LIEEE1, LRND + INTEGER LBETA, LT + DOUBLE PRECISION A, B, C, F, ONE, QTR, SAVEC, T1, T2 +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMC3 + EXTERNAL DLAMC3 +* .. +* .. Save statement .. + SAVE FIRST, LIEEE1, LBETA, LRND, LT +* .. +* .. Data statements .. + DATA FIRST / .TRUE. / +* .. +* .. Executable Statements .. +* + IF( FIRST ) THEN + FIRST = .FALSE. + ONE = 1 +* +* LBETA, LIEEE1, LT and LRND are the local values of BETA, +* IEEE1, T and RND. +* +* Throughout this routine we use the function DLAMC3 to ensure +* that relevant values are stored and not held in registers, or +* are not affected by optimizers. +* +* Compute a = 2.0**m with the smallest positive integer m such +* that +* +* fl( a + 1.0 ) = a. +* + A = 1 + C = 1 +* +*+ WHILE( C.EQ.ONE )LOOP + 10 CONTINUE + IF( C.EQ.ONE ) THEN + A = 2*A + C = DLAMC3( A, ONE ) + C = DLAMC3( C, -A ) + GO TO 10 + END IF +*+ END WHILE +* +* Now compute b = 2.0**m with the smallest positive integer m +* such that +* +* fl( a + b ) .gt. a. +* + B = 1 + C = DLAMC3( A, B ) +* +*+ WHILE( C.EQ.A )LOOP + 20 CONTINUE + IF( C.EQ.A ) THEN + B = 2*B + C = DLAMC3( A, B ) + GO TO 20 + END IF +*+ END WHILE +* +* Now compute the base. a and c are neighbouring floating point +* numbers in the interval ( beta**t, beta**( t + 1 ) ) and so +* their difference is beta. Adding 0.25 to c is to ensure that it +* is truncated to beta and not ( beta - 1 ). +* + QTR = ONE / 4 + SAVEC = C + C = DLAMC3( C, -A ) + LBETA = C + QTR +* +* Now determine whether rounding or chopping occurs, by adding a +* bit less than beta/2 and a bit more than beta/2 to a. +* + B = LBETA + F = DLAMC3( B / 2, -B / 100 ) + C = DLAMC3( F, A ) + IF( C.EQ.A ) THEN + LRND = .TRUE. + ELSE + LRND = .FALSE. + END IF + F = DLAMC3( B / 2, B / 100 ) + C = DLAMC3( F, A ) + IF( ( LRND ) .AND. ( C.EQ.A ) ) + $ LRND = .FALSE. +* +* Try and decide whether rounding is done in the IEEE 'round to +* nearest' style. B/2 is half a unit in the last place of the two +* numbers A and SAVEC. Furthermore, A is even, i.e. has last bit +* zero, and SAVEC is odd. Thus adding B/2 to A should not change +* A, but adding B/2 to SAVEC should change SAVEC. +* + T1 = DLAMC3( B / 2, A ) + T2 = DLAMC3( B / 2, SAVEC ) + LIEEE1 = ( T1.EQ.A ) .AND. ( T2.GT.SAVEC ) .AND. LRND +* +* Now find the mantissa, t. It should be the integer part of +* log to the base beta of a, however it is safer to determine t +* by powering. So we find t as the smallest positive integer for +* which +* +* fl( beta**t + 1.0 ) = 1.0. +* + LT = 0 + A = 1 + C = 1 +* +*+ WHILE( C.EQ.ONE )LOOP + 30 CONTINUE + IF( C.EQ.ONE ) THEN + LT = LT + 1 + A = A*LBETA + C = DLAMC3( A, ONE ) + C = DLAMC3( C, -A ) + GO TO 30 + END IF +*+ END WHILE +* + END IF +* + BETA = LBETA + T = LT + RND = LRND + IEEE1 = LIEEE1 + RETURN +* +* End of DLAMC1 +* + END +* +************************************************************************ +* + SUBROUTINE DLAMC2( BETA, T, RND, EPS, EMIN, RMIN, EMAX, RMAX ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + LOGICAL RND + INTEGER BETA, EMAX, EMIN, T + DOUBLE PRECISION EPS, RMAX, RMIN +* .. +* +* Purpose +* ======= +* +* DLAMC2 determines the machine parameters specified in its argument +* list. +* +* Arguments +* ========= +* +* BETA (output) INTEGER +* The base of the machine. +* +* T (output) INTEGER +* The number of ( BETA ) digits in the mantissa. +* +* RND (output) LOGICAL +* Specifies whether proper rounding ( RND = .TRUE. ) or +* chopping ( RND = .FALSE. ) occurs in addition. This may not +* be a reliable guide to the way in which the machine performs +* its arithmetic. +* +* EPS (output) DOUBLE PRECISION +* The smallest positive number such that +* +* fl( 1.0 - EPS ) .LT. 1.0, +* +* where fl denotes the computed value. +* +* EMIN (output) INTEGER +* The minimum exponent before (gradual) underflow occurs. +* +* RMIN (output) DOUBLE PRECISION +* The smallest normalized number for the machine, given by +* BASE**( EMIN - 1 ), where BASE is the floating point value +* of BETA. +* +* EMAX (output) INTEGER +* The maximum exponent before overflow occurs. +* +* RMAX (output) DOUBLE PRECISION +* The largest positive number for the machine, given by +* BASE**EMAX * ( 1 - EPS ), where BASE is the floating point +* value of BETA. +* +* Further Details +* =============== +* +* The computation of EPS is based on a routine PARANOIA by +* W. Kahan of the University of California at Berkeley. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL FIRST, IEEE, IWARN, LIEEE1, LRND + INTEGER GNMIN, GPMIN, I, LBETA, LEMAX, LEMIN, LT, + $ NGNMIN, NGPMIN + DOUBLE PRECISION A, B, C, HALF, LEPS, LRMAX, LRMIN, ONE, RBASE, + $ SIXTH, SMALL, THIRD, TWO, ZERO +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMC3 + EXTERNAL DLAMC3 +* .. +* .. External Subroutines .. + EXTERNAL DLAMC1, DLAMC4, DLAMC5 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Save statement .. + SAVE FIRST, IWARN, LBETA, LEMAX, LEMIN, LEPS, LRMAX, + $ LRMIN, LT +* .. +* .. Data statements .. + DATA FIRST / .TRUE. / , IWARN / .FALSE. / +* .. +* .. Executable Statements .. +* + IF( FIRST ) THEN + FIRST = .FALSE. + ZERO = 0 + ONE = 1 + TWO = 2 +* +* LBETA, LT, LRND, LEPS, LEMIN and LRMIN are the local values of +* BETA, T, RND, EPS, EMIN and RMIN. +* +* Throughout this routine we use the function DLAMC3 to ensure +* that relevant values are stored and not held in registers, or +* are not affected by optimizers. +* +* DLAMC1 returns the parameters LBETA, LT, LRND and LIEEE1. +* + CALL DLAMC1( LBETA, LT, LRND, LIEEE1 ) +* +* Start to find EPS. +* + B = LBETA + A = B**( -LT ) + LEPS = A +* +* Try some tricks to see whether or not this is the correct EPS. +* + B = TWO / 3 + HALF = ONE / 2 + SIXTH = DLAMC3( B, -HALF ) + THIRD = DLAMC3( SIXTH, SIXTH ) + B = DLAMC3( THIRD, -HALF ) + B = DLAMC3( B, SIXTH ) + B = ABS( B ) + IF( B.LT.LEPS ) + $ B = LEPS +* + LEPS = 1 +* +*+ WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP + 10 CONTINUE + IF( ( LEPS.GT.B ) .AND. ( B.GT.ZERO ) ) THEN + LEPS = B + C = DLAMC3( HALF*LEPS, ( TWO**5 )*( LEPS**2 ) ) + C = DLAMC3( HALF, -C ) + B = DLAMC3( HALF, C ) + C = DLAMC3( HALF, -B ) + B = DLAMC3( HALF, C ) + GO TO 10 + END IF +*+ END WHILE +* + IF( A.LT.LEPS ) + $ LEPS = A +* +* Computation of EPS complete. +* +* Now find EMIN. Let A = + or - 1, and + or - (1 + BASE**(-3)). +* Keep dividing A by BETA until (gradual) underflow occurs. This +* is detected when we cannot recover the previous A. +* + RBASE = ONE / LBETA + SMALL = ONE + DO 20 I = 1, 3 + SMALL = DLAMC3( SMALL*RBASE, ZERO ) + 20 CONTINUE + A = DLAMC3( ONE, SMALL ) + CALL DLAMC4( NGPMIN, ONE, LBETA ) + CALL DLAMC4( NGNMIN, -ONE, LBETA ) + CALL DLAMC4( GPMIN, A, LBETA ) + CALL DLAMC4( GNMIN, -A, LBETA ) + IEEE = .FALSE. +* + IF( ( NGPMIN.EQ.NGNMIN ) .AND. ( GPMIN.EQ.GNMIN ) ) THEN + IF( NGPMIN.EQ.GPMIN ) THEN + LEMIN = NGPMIN +* ( Non twos-complement machines, no gradual underflow; +* e.g., VAX ) + ELSE IF( ( GPMIN-NGPMIN ).EQ.3 ) THEN + LEMIN = NGPMIN - 1 + LT + IEEE = .TRUE. +* ( Non twos-complement machines, with gradual underflow; +* e.g., IEEE standard followers ) + ELSE + LEMIN = MIN( NGPMIN, GPMIN ) +* ( A guess; no known machine ) + IWARN = .TRUE. + END IF +* + ELSE IF( ( NGPMIN.EQ.GPMIN ) .AND. ( NGNMIN.EQ.GNMIN ) ) THEN + IF( ABS( NGPMIN-NGNMIN ).EQ.1 ) THEN + LEMIN = MAX( NGPMIN, NGNMIN ) +* ( Twos-complement machines, no gradual underflow; +* e.g., CYBER 205 ) + ELSE + LEMIN = MIN( NGPMIN, NGNMIN ) +* ( A guess; no known machine ) + IWARN = .TRUE. + END IF +* + ELSE IF( ( ABS( NGPMIN-NGNMIN ).EQ.1 ) .AND. + $ ( GPMIN.EQ.GNMIN ) ) THEN + IF( ( GPMIN-MIN( NGPMIN, NGNMIN ) ).EQ.3 ) THEN + LEMIN = MAX( NGPMIN, NGNMIN ) - 1 + LT +* ( Twos-complement machines with gradual underflow; +* no known machine ) + ELSE + LEMIN = MIN( NGPMIN, NGNMIN ) +* ( A guess; no known machine ) + IWARN = .TRUE. + END IF +* + ELSE + LEMIN = MIN( NGPMIN, NGNMIN, GPMIN, GNMIN ) +* ( A guess; no known machine ) + IWARN = .TRUE. + END IF +*** +* Comment out this if block if EMIN is ok + IF( IWARN ) THEN + FIRST = .TRUE. + WRITE( 6, FMT = 9999 )LEMIN + END IF +*** +* +* Assume IEEE arithmetic if we found denormalised numbers above, +* or if arithmetic seems to round in the IEEE style, determined +* in routine DLAMC1. A true IEEE machine should have both things +* true; however, faulty machines may have one or the other. +* + IEEE = IEEE .OR. LIEEE1 +* +* Compute RMIN by successive division by BETA. We could compute +* RMIN as BASE**( EMIN - 1 ), but some machines underflow during +* this computation. +* + LRMIN = 1 + DO 30 I = 1, 1 - LEMIN + LRMIN = DLAMC3( LRMIN*RBASE, ZERO ) + 30 CONTINUE +* +* Finally, call DLAMC5 to compute EMAX and RMAX. +* + CALL DLAMC5( LBETA, LT, LEMIN, IEEE, LEMAX, LRMAX ) + END IF +* + BETA = LBETA + T = LT + RND = LRND + EPS = LEPS + EMIN = LEMIN + RMIN = LRMIN + EMAX = LEMAX + RMAX = LRMAX +* + RETURN +* + 9999 FORMAT( / / ' WARNING. The value EMIN may be incorrect:-', + $ ' EMIN = ', I8, / + $ ' If, after inspection, the value EMIN looks', + $ ' acceptable please comment out ', + $ / ' the IF block as marked within the code of routine', + $ ' DLAMC2,', / ' otherwise supply EMIN explicitly.', / ) +* +* End of DLAMC2 +* + END +* +************************************************************************ +* + DOUBLE PRECISION FUNCTION DLAMC3( A, B ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + DOUBLE PRECISION A, B +* .. +* +* Purpose +* ======= +* +* DLAMC3 is intended to force A and B to be stored prior to doing +* the addition of A and B , for use in situations where optimizers +* might hold one of these in a register. +* +* Arguments +* ========= +* +* A, B (input) DOUBLE PRECISION +* The values A and B. +* +* ===================================================================== +* +* .. Executable Statements .. +* + DLAMC3 = A + B +* + RETURN +* +* End of DLAMC3 +* + END +* +************************************************************************ +* + SUBROUTINE DLAMC4( EMIN, START, BASE ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + INTEGER BASE, EMIN + DOUBLE PRECISION START +* .. +* +* Purpose +* ======= +* +* DLAMC4 is a service routine for DLAMC2. +* +* Arguments +* ========= +* +* EMIN (output) EMIN +* The minimum exponent before (gradual) underflow, computed by +* setting A = START and dividing by BASE until the previous A +* can not be recovered. +* +* START (input) DOUBLE PRECISION +* The starting point for determining EMIN. +* +* BASE (input) INTEGER +* The base of the machine. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I + DOUBLE PRECISION A, B1, B2, C1, C2, D1, D2, ONE, RBASE, ZERO +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMC3 + EXTERNAL DLAMC3 +* .. +* .. Executable Statements .. +* + A = START + ONE = 1 + RBASE = ONE / BASE + ZERO = 0 + EMIN = 1 + B1 = DLAMC3( A*RBASE, ZERO ) + C1 = A + C2 = A + D1 = A + D2 = A +*+ WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND. +* $ ( D1.EQ.A ).AND.( D2.EQ.A ) )LOOP + 10 CONTINUE + IF( ( C1.EQ.A ) .AND. ( C2.EQ.A ) .AND. ( D1.EQ.A ) .AND. + $ ( D2.EQ.A ) ) THEN + EMIN = EMIN - 1 + A = B1 + B1 = DLAMC3( A / BASE, ZERO ) + C1 = DLAMC3( B1*BASE, ZERO ) + D1 = ZERO + DO 20 I = 1, BASE + D1 = D1 + B1 + 20 CONTINUE + B2 = DLAMC3( A*RBASE, ZERO ) + C2 = DLAMC3( B2 / RBASE, ZERO ) + D2 = ZERO + DO 30 I = 1, BASE + D2 = D2 + B2 + 30 CONTINUE + GO TO 10 + END IF +*+ END WHILE +* + RETURN +* +* End of DLAMC4 +* + END +* +************************************************************************ +* + SUBROUTINE DLAMC5( BETA, P, EMIN, IEEE, EMAX, RMAX ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + LOGICAL IEEE + INTEGER BETA, EMAX, EMIN, P + DOUBLE PRECISION RMAX +* .. +* +* Purpose +* ======= +* +* DLAMC5 attempts to compute RMAX, the largest machine floating-point +* number, without overflow. It assumes that EMAX + abs(EMIN) sum +* approximately to a power of 2. It will fail on machines where this +* assumption does not hold, for example, the Cyber 205 (EMIN = -28625, +* EMAX = 28718). It will also fail if the value supplied for EMIN is +* too large (i.e. too close to zero), probably with overflow. +* +* Arguments +* ========= +* +* BETA (input) INTEGER +* The base of floating-point arithmetic. +* +* P (input) INTEGER +* The number of base BETA digits in the mantissa of a +* floating-point value. +* +* EMIN (input) INTEGER +* The minimum exponent before (gradual) underflow. +* +* IEEE (input) LOGICAL +* A logical flag specifying whether or not the arithmetic +* system is thought to comply with the IEEE standard. +* +* EMAX (output) INTEGER +* The largest exponent before overflow +* +* RMAX (output) DOUBLE PRECISION +* The largest machine floating-point number. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + INTEGER EXBITS, EXPSUM, I, LEXP, NBITS, TRY, UEXP + DOUBLE PRECISION OLDY, RECBAS, Y, Z +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMC3 + EXTERNAL DLAMC3 +* .. +* .. Intrinsic Functions .. + INTRINSIC MOD +* .. +* .. Executable Statements .. +* +* First compute LEXP and UEXP, two powers of 2 that bound +* abs(EMIN). We then assume that EMAX + abs(EMIN) will sum +* approximately to the bound that is closest to abs(EMIN). +* (EMAX is the exponent of the required number RMAX). +* + LEXP = 1 + EXBITS = 1 + 10 CONTINUE + TRY = LEXP*2 + IF( TRY.LE.( -EMIN ) ) THEN + LEXP = TRY + EXBITS = EXBITS + 1 + GO TO 10 + END IF + IF( LEXP.EQ.-EMIN ) THEN + UEXP = LEXP + ELSE + UEXP = TRY + EXBITS = EXBITS + 1 + END IF +* +* Now -LEXP is less than or equal to EMIN, and -UEXP is greater +* than or equal to EMIN. EXBITS is the number of bits needed to +* store the exponent. +* + IF( ( UEXP+EMIN ).GT.( -LEXP-EMIN ) ) THEN + EXPSUM = 2*LEXP + ELSE + EXPSUM = 2*UEXP + END IF +* +* EXPSUM is the exponent range, approximately equal to +* EMAX - EMIN + 1 . +* + EMAX = EXPSUM + EMIN - 1 + NBITS = 1 + EXBITS + P +* +* NBITS is the total number of bits needed to store a +* floating-point number. +* + IF( ( MOD( NBITS, 2 ).EQ.1 ) .AND. ( BETA.EQ.2 ) ) THEN +* +* Either there are an odd number of bits used to store a +* floating-point number, which is unlikely, or some bits are +* not used in the representation of numbers, which is possible, +* (e.g. Cray machines) or the mantissa has an implicit bit, +* (e.g. IEEE machines, Dec Vax machines), which is perhaps the +* most likely. We have to assume the last alternative. +* If this is true, then we need to reduce EMAX by one because +* there must be some way of representing zero in an implicit-bit +* system. On machines like Cray, we are reducing EMAX by one +* unnecessarily. +* + EMAX = EMAX - 1 + END IF +* + IF( IEEE ) THEN +* +* Assume we are on an IEEE machine which reserves one exponent +* for infinity and NaN. +* + EMAX = EMAX - 1 + END IF +* +* Now create RMAX, the largest machine number, which should +* be equal to (1.0 - BETA**(-P)) * BETA**EMAX . +* +* First compute 1.0 - BETA**(-P), being careful that the +* result is less than 1.0 . +* + RECBAS = ONE / BETA + Z = BETA - ONE + Y = ZERO + DO 20 I = 1, P + Z = Z*RECBAS + IF( Y.LT.ONE ) + $ OLDY = Y + Y = DLAMC3( Y, Z ) + 20 CONTINUE + IF( Y.GE.ONE ) + $ Y = OLDY +* +* Now multiply by BETA**EMAX to get RMAX. +* + DO 30 I = 1, EMAX + Y = DLAMC3( Y*BETA, ZERO ) + 30 CONTINUE +* + RMAX = Y + RETURN +* +* End of DLAMC5 +* + END diff --git a/costa/native/external/lapack/dlamrg.f b/costa/native/external/lapack/dlamrg.f new file mode 100644 index 000000000..ac52dd4be --- /dev/null +++ b/costa/native/external/lapack/dlamrg.f @@ -0,0 +1,104 @@ + SUBROUTINE DLAMRG( N1, N2, A, DTRD1, DTRD2, INDEX ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER DTRD1, DTRD2, N1, N2 +* .. +* .. Array Arguments .. + INTEGER INDEX( * ) + DOUBLE PRECISION A( * ) +* .. +* +* Purpose +* ======= +* +* DLAMRG will create a permutation list which will merge the elements +* of A (which is composed of two independently sorted sets) into a +* single set which is sorted in ascending order. +* +* Arguments +* ========= +* +* N1 (input) INTEGER +* N2 (input) INTEGER +* These arguements contain the respective lengths of the two +* sorted lists to be merged. +* +* A (input) DOUBLE PRECISION array, dimension (N1+N2) +* The first N1 elements of A contain a list of numbers which +* are sorted in either ascending or descending order. Likewise +* for the final N2 elements. +* +* DTRD1 (input) INTEGER +* DTRD2 (input) INTEGER +* These are the strides to be taken through the array A. +* Allowable strides are 1 and -1. They indicate whether a +* subset of A is sorted in ascending (DTRDx = 1) or descending +* (DTRDx = -1) order. +* +* INDEX (output) INTEGER array, dimension (N1+N2) +* On exit this array will contain a permutation such that +* if B( I ) = A( INDEX( I ) ) for I=1,N1+N2, then B will be +* sorted in ascending order. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IND1, IND2, N1SV, N2SV +* .. +* .. Executable Statements .. +* + N1SV = N1 + N2SV = N2 + IF( DTRD1.GT.0 ) THEN + IND1 = 1 + ELSE + IND1 = N1 + END IF + IF( DTRD2.GT.0 ) THEN + IND2 = 1 + N1 + ELSE + IND2 = N1 + N2 + END IF + I = 1 +* while ( (N1SV > 0) & (N2SV > 0) ) + 10 CONTINUE + IF( N1SV.GT.0 .AND. N2SV.GT.0 ) THEN + IF( A( IND1 ).LE.A( IND2 ) ) THEN + INDEX( I ) = IND1 + I = I + 1 + IND1 = IND1 + DTRD1 + N1SV = N1SV - 1 + ELSE + INDEX( I ) = IND2 + I = I + 1 + IND2 = IND2 + DTRD2 + N2SV = N2SV - 1 + END IF + GO TO 10 + END IF +* end while + IF( N1SV.EQ.0 ) THEN + DO 20 N1SV = 1, N2SV + INDEX( I ) = IND2 + I = I + 1 + IND2 = IND2 + DTRD2 + 20 CONTINUE + ELSE +* N2SV .EQ. 0 + DO 30 N2SV = 1, N1SV + INDEX( I ) = IND1 + I = I + 1 + IND1 = IND1 + DTRD1 + 30 CONTINUE + END IF +* + RETURN +* +* End of DLAMRG +* + END diff --git a/costa/native/external/lapack/dlangb.f b/costa/native/external/lapack/dlangb.f new file mode 100644 index 000000000..9aea88339 --- /dev/null +++ b/costa/native/external/lapack/dlangb.f @@ -0,0 +1,155 @@ + DOUBLE PRECISION FUNCTION DLANGB( NORM, N, KL, KU, AB, LDAB, + $ WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER KL, KU, LDAB, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DLANGB returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of an +* n by n band matrix A, with kl sub-diagonals and ku super-diagonals. +* +* Description +* =========== +* +* DLANGB returns the value +* +* DLANGB = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in DLANGB as described +* above. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, DLANGB is +* set to zero. +* +* KL (input) INTEGER +* The number of sub-diagonals of the matrix A. KL >= 0. +* +* KU (input) INTEGER +* The number of super-diagonals of the matrix A. KU >= 0. +* +* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) +* The band matrix A, stored in rows 1 to KL+KU+1. The j-th +* column of A is stored in the j-th column of the array AB as +* follows: +* AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KL+KU+1. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (LWORK), +* where LWORK >= N when NORM = 'I'; otherwise, WORK is not +* referenced. +* +* ===================================================================== +* +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, K, L + DOUBLE PRECISION SCALE, SUM, VALUE +* .. +* .. External Subroutines .. + EXTERNAL DLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + DO 20 J = 1, N + DO 10 I = MAX( KU+2-J, 1 ), MIN( N+KU+1-J, KL+KU+1 ) + VALUE = MAX( VALUE, ABS( AB( I, J ) ) ) + 10 CONTINUE + 20 CONTINUE + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + DO 40 J = 1, N + SUM = ZERO + DO 30 I = MAX( KU+2-J, 1 ), MIN( N+KU+1-J, KL+KU+1 ) + SUM = SUM + ABS( AB( I, J ) ) + 30 CONTINUE + VALUE = MAX( VALUE, SUM ) + 40 CONTINUE + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + DO 50 I = 1, N + WORK( I ) = ZERO + 50 CONTINUE + DO 70 J = 1, N + K = KU + 1 - J + DO 60 I = MAX( 1, J-KU ), MIN( N, J+KL ) + WORK( I ) = WORK( I ) + ABS( AB( K+I, J ) ) + 60 CONTINUE + 70 CONTINUE + VALUE = ZERO + DO 80 I = 1, N + VALUE = MAX( VALUE, WORK( I ) ) + 80 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + DO 90 J = 1, N + L = MAX( 1, J-KU ) + K = KU + 1 - J + L + CALL DLASSQ( MIN( N, J+KL )-L+1, AB( K, J ), 1, SCALE, SUM ) + 90 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + DLANGB = VALUE + RETURN +* +* End of DLANGB +* + END diff --git a/costa/native/external/lapack/dlange.f b/costa/native/external/lapack/dlange.f new file mode 100644 index 000000000..202ba6047 --- /dev/null +++ b/costa/native/external/lapack/dlange.f @@ -0,0 +1,145 @@ + DOUBLE PRECISION FUNCTION DLANGE( NORM, M, N, A, LDA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DLANGE returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* real matrix A. +* +* Description +* =========== +* +* DLANGE returns the value +* +* DLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in DLANGE as described +* above. +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. When M = 0, +* DLANGE is set to zero. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. When N = 0, +* DLANGE is set to zero. +* +* A (input) DOUBLE PRECISION array, dimension (LDA,N) +* The m by n matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(M,1). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (LWORK), +* where LWORK >= M when NORM = 'I'; otherwise, WORK is not +* referenced. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION SCALE, SUM, VALUE +* .. +* .. External Subroutines .. + EXTERNAL DLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( MIN( M, N ).EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + DO 20 J = 1, N + DO 10 I = 1, M + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 10 CONTINUE + 20 CONTINUE + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + DO 40 J = 1, N + SUM = ZERO + DO 30 I = 1, M + SUM = SUM + ABS( A( I, J ) ) + 30 CONTINUE + VALUE = MAX( VALUE, SUM ) + 40 CONTINUE + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + DO 50 I = 1, M + WORK( I ) = ZERO + 50 CONTINUE + DO 70 J = 1, N + DO 60 I = 1, M + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 60 CONTINUE + 70 CONTINUE + VALUE = ZERO + DO 80 I = 1, M + VALUE = MAX( VALUE, WORK( I ) ) + 80 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + DO 90 J = 1, N + CALL DLASSQ( M, A( 1, J ), 1, SCALE, SUM ) + 90 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + DLANGE = VALUE + RETURN +* +* End of DLANGE +* + END diff --git a/costa/native/external/lapack/dlangt.f b/costa/native/external/lapack/dlangt.f new file mode 100644 index 000000000..7d507bc81 --- /dev/null +++ b/costa/native/external/lapack/dlangt.f @@ -0,0 +1,142 @@ + DOUBLE PRECISION FUNCTION DLANGT( NORM, N, DL, D, DU ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), DL( * ), DU( * ) +* .. +* +* Purpose +* ======= +* +* DLANGT returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* real tridiagonal matrix A. +* +* Description +* =========== +* +* DLANGT returns the value +* +* DLANGT = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in DLANGT as described +* above. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, DLANGT is +* set to zero. +* +* DL (input) DOUBLE PRECISION array, dimension (N-1) +* The (n-1) sub-diagonal elements of A. +* +* D (input) DOUBLE PRECISION array, dimension (N) +* The diagonal elements of A. +* +* DU (input) DOUBLE PRECISION array, dimension (N-1) +* The (n-1) super-diagonal elements of A. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I + DOUBLE PRECISION ANORM, SCALE, SUM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) THEN + ANORM = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + ANORM = ABS( D( N ) ) + DO 10 I = 1, N - 1 + ANORM = MAX( ANORM, ABS( DL( I ) ) ) + ANORM = MAX( ANORM, ABS( D( I ) ) ) + ANORM = MAX( ANORM, ABS( DU( I ) ) ) + 10 CONTINUE + ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' ) THEN +* +* Find norm1(A). +* + IF( N.EQ.1 ) THEN + ANORM = ABS( D( 1 ) ) + ELSE + ANORM = MAX( ABS( D( 1 ) )+ABS( DL( 1 ) ), + $ ABS( D( N ) )+ABS( DU( N-1 ) ) ) + DO 20 I = 2, N - 1 + ANORM = MAX( ANORM, ABS( D( I ) )+ABS( DL( I ) )+ + $ ABS( DU( I-1 ) ) ) + 20 CONTINUE + END IF + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + IF( N.EQ.1 ) THEN + ANORM = ABS( D( 1 ) ) + ELSE + ANORM = MAX( ABS( D( 1 ) )+ABS( DU( 1 ) ), + $ ABS( D( N ) )+ABS( DL( N-1 ) ) ) + DO 30 I = 2, N - 1 + ANORM = MAX( ANORM, ABS( D( I ) )+ABS( DU( I ) )+ + $ ABS( DL( I-1 ) ) ) + 30 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + CALL DLASSQ( N, D, 1, SCALE, SUM ) + IF( N.GT.1 ) THEN + CALL DLASSQ( N-1, DL, 1, SCALE, SUM ) + CALL DLASSQ( N-1, DU, 1, SCALE, SUM ) + END IF + ANORM = SCALE*SQRT( SUM ) + END IF +* + DLANGT = ANORM + RETURN +* +* End of DLANGT +* + END diff --git a/costa/native/external/lapack/dlanhs.f b/costa/native/external/lapack/dlanhs.f new file mode 100644 index 000000000..45939f0ab --- /dev/null +++ b/costa/native/external/lapack/dlanhs.f @@ -0,0 +1,142 @@ + DOUBLE PRECISION FUNCTION DLANHS( NORM, N, A, LDA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DLANHS returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* Hessenberg matrix A. +* +* Description +* =========== +* +* DLANHS returns the value +* +* DLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in DLANHS as described +* above. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, DLANHS is +* set to zero. +* +* A (input) DOUBLE PRECISION array, dimension (LDA,N) +* The n by n upper Hessenberg matrix A; the part of A below the +* first sub-diagonal is not referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(N,1). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (LWORK), +* where LWORK >= N when NORM = 'I'; otherwise, WORK is not +* referenced. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION SCALE, SUM, VALUE +* .. +* .. External Subroutines .. + EXTERNAL DLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + DO 20 J = 1, N + DO 10 I = 1, MIN( N, J+1 ) + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 10 CONTINUE + 20 CONTINUE + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + DO 40 J = 1, N + SUM = ZERO + DO 30 I = 1, MIN( N, J+1 ) + SUM = SUM + ABS( A( I, J ) ) + 30 CONTINUE + VALUE = MAX( VALUE, SUM ) + 40 CONTINUE + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + DO 50 I = 1, N + WORK( I ) = ZERO + 50 CONTINUE + DO 70 J = 1, N + DO 60 I = 1, MIN( N, J+1 ) + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 60 CONTINUE + 70 CONTINUE + VALUE = ZERO + DO 80 I = 1, N + VALUE = MAX( VALUE, WORK( I ) ) + 80 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + DO 90 J = 1, N + CALL DLASSQ( MIN( N, J+1 ), A( 1, J ), 1, SCALE, SUM ) + 90 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + DLANHS = VALUE + RETURN +* +* End of DLANHS +* + END diff --git a/costa/native/external/lapack/dlansb.f b/costa/native/external/lapack/dlansb.f new file mode 100644 index 000000000..3a6893bc8 --- /dev/null +++ b/costa/native/external/lapack/dlansb.f @@ -0,0 +1,187 @@ + DOUBLE PRECISION FUNCTION DLANSB( NORM, UPLO, N, K, AB, LDAB, + $ WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM, UPLO + INTEGER K, LDAB, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DLANSB returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of an +* n by n symmetric band matrix A, with k super-diagonals. +* +* Description +* =========== +* +* DLANSB returns the value +* +* DLANSB = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in DLANSB as described +* above. +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* band matrix A is supplied. +* = 'U': Upper triangular part is supplied +* = 'L': Lower triangular part is supplied +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, DLANSB is +* set to zero. +* +* K (input) INTEGER +* The number of super-diagonals or sub-diagonals of the +* band matrix A. K >= 0. +* +* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) +* The upper or lower triangle of the symmetric band matrix A, +* stored in the first K+1 rows of AB. The j-th column of A is +* stored in the j-th column of the array AB as follows: +* if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+k). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= K+1. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (LWORK), +* where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +* WORK is not referenced. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, L + DOUBLE PRECISION ABSA, SCALE, SUM, VALUE +* .. +* .. External Subroutines .. + EXTERNAL DLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = MAX( K+2-J, 1 ), K + 1 + VALUE = MAX( VALUE, ABS( AB( I, J ) ) ) + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = 1, MIN( N+1-J, K+1 ) + VALUE = MAX( VALUE, ABS( AB( I, J ) ) ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is symmetric). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + SUM = ZERO + L = K + 1 - J + DO 50 I = MAX( 1, J-K ), J - 1 + ABSA = ABS( AB( L+I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 50 CONTINUE + WORK( J ) = SUM + ABS( AB( K+1, J ) ) + 60 CONTINUE + DO 70 I = 1, N + VALUE = MAX( VALUE, WORK( I ) ) + 70 CONTINUE + ELSE + DO 80 I = 1, N + WORK( I ) = ZERO + 80 CONTINUE + DO 100 J = 1, N + SUM = WORK( J ) + ABS( AB( 1, J ) ) + L = 1 - J + DO 90 I = J + 1, MIN( N, J+K ) + ABSA = ABS( AB( L+I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 90 CONTINUE + VALUE = MAX( VALUE, SUM ) + 100 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + IF( K.GT.0 ) THEN + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 2, N + CALL DLASSQ( MIN( J-1, K ), AB( MAX( K+2-J, 1 ), J ), + $ 1, SCALE, SUM ) + 110 CONTINUE + L = K + 1 + ELSE + DO 120 J = 1, N - 1 + CALL DLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE, + $ SUM ) + 120 CONTINUE + L = 1 + END IF + SUM = 2*SUM + ELSE + L = 1 + END IF + CALL DLASSQ( N, AB( L, 1 ), LDAB, SCALE, SUM ) + VALUE = SCALE*SQRT( SUM ) + END IF +* + DLANSB = VALUE + RETURN +* +* End of DLANSB +* + END diff --git a/costa/native/external/lapack/dlansp.f b/costa/native/external/lapack/dlansp.f new file mode 100644 index 000000000..52c283ebb --- /dev/null +++ b/costa/native/external/lapack/dlansp.f @@ -0,0 +1,197 @@ + DOUBLE PRECISION FUNCTION DLANSP( NORM, UPLO, N, AP, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM, UPLO + INTEGER N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AP( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DLANSP returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* real symmetric matrix A, supplied in packed form. +* +* Description +* =========== +* +* DLANSP returns the value +* +* DLANSP = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in DLANSP as described +* above. +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is supplied. +* = 'U': Upper triangular part of A is supplied +* = 'L': Lower triangular part of A is supplied +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, DLANSP is +* set to zero. +* +* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) +* The upper or lower triangle of the symmetric matrix A, packed +* columnwise in a linear array. The j-th column of A is stored +* in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (LWORK), +* where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +* WORK is not referenced. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, K + DOUBLE PRECISION ABSA, SCALE, SUM, VALUE +* .. +* .. External Subroutines .. + EXTERNAL DLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + K = 1 + DO 20 J = 1, N + DO 10 I = K, K + J - 1 + VALUE = MAX( VALUE, ABS( AP( I ) ) ) + 10 CONTINUE + K = K + J + 20 CONTINUE + ELSE + K = 1 + DO 40 J = 1, N + DO 30 I = K, K + N - J + VALUE = MAX( VALUE, ABS( AP( I ) ) ) + 30 CONTINUE + K = K + N - J + 1 + 40 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is symmetric). +* + VALUE = ZERO + K = 1 + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + SUM = ZERO + DO 50 I = 1, J - 1 + ABSA = ABS( AP( K ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + K = K + 1 + 50 CONTINUE + WORK( J ) = SUM + ABS( AP( K ) ) + K = K + 1 + 60 CONTINUE + DO 70 I = 1, N + VALUE = MAX( VALUE, WORK( I ) ) + 70 CONTINUE + ELSE + DO 80 I = 1, N + WORK( I ) = ZERO + 80 CONTINUE + DO 100 J = 1, N + SUM = WORK( J ) + ABS( AP( K ) ) + K = K + 1 + DO 90 I = J + 1, N + ABSA = ABS( AP( K ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + K = K + 1 + 90 CONTINUE + VALUE = MAX( VALUE, SUM ) + 100 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + K = 2 + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 2, N + CALL DLASSQ( J-1, AP( K ), 1, SCALE, SUM ) + K = K + J + 110 CONTINUE + ELSE + DO 120 J = 1, N - 1 + CALL DLASSQ( N-J, AP( K ), 1, SCALE, SUM ) + K = K + N - J + 1 + 120 CONTINUE + END IF + SUM = 2*SUM + K = 1 + DO 130 I = 1, N + IF( AP( K ).NE.ZERO ) THEN + ABSA = ABS( AP( K ) ) + IF( SCALE.LT.ABSA ) THEN + SUM = ONE + SUM*( SCALE / ABSA )**2 + SCALE = ABSA + ELSE + SUM = SUM + ( ABSA / SCALE )**2 + END IF + END IF + IF( LSAME( UPLO, 'U' ) ) THEN + K = K + I + 1 + ELSE + K = K + N - I + 1 + END IF + 130 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + DLANSP = VALUE + RETURN +* +* End of DLANSP +* + END diff --git a/costa/native/external/lapack/dlanst.f b/costa/native/external/lapack/dlanst.f new file mode 100644 index 000000000..e604348c3 --- /dev/null +++ b/costa/native/external/lapack/dlanst.f @@ -0,0 +1,125 @@ + DOUBLE PRECISION FUNCTION DLANST( NORM, N, D, E ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ) +* .. +* +* Purpose +* ======= +* +* DLANST returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* real symmetric tridiagonal matrix A. +* +* Description +* =========== +* +* DLANST returns the value +* +* DLANST = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in DLANST as described +* above. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, DLANST is +* set to zero. +* +* D (input) DOUBLE PRECISION array, dimension (N) +* The diagonal elements of A. +* +* E (input) DOUBLE PRECISION array, dimension (N-1) +* The (n-1) sub-diagonal or super-diagonal elements of A. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I + DOUBLE PRECISION ANORM, SCALE, SUM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) THEN + ANORM = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + ANORM = ABS( D( N ) ) + DO 10 I = 1, N - 1 + ANORM = MAX( ANORM, ABS( D( I ) ) ) + ANORM = MAX( ANORM, ABS( E( I ) ) ) + 10 CONTINUE + ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' .OR. + $ LSAME( NORM, 'I' ) ) THEN +* +* Find norm1(A). +* + IF( N.EQ.1 ) THEN + ANORM = ABS( D( 1 ) ) + ELSE + ANORM = MAX( ABS( D( 1 ) )+ABS( E( 1 ) ), + $ ABS( E( N-1 ) )+ABS( D( N ) ) ) + DO 20 I = 2, N - 1 + ANORM = MAX( ANORM, ABS( D( I ) )+ABS( E( I ) )+ + $ ABS( E( I-1 ) ) ) + 20 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + IF( N.GT.1 ) THEN + CALL DLASSQ( N-1, E, 1, SCALE, SUM ) + SUM = 2*SUM + END IF + CALL DLASSQ( N, D, 1, SCALE, SUM ) + ANORM = SCALE*SQRT( SUM ) + END IF +* + DLANST = ANORM + RETURN +* +* End of DLANST +* + END diff --git a/costa/native/external/lapack/dlansy.f b/costa/native/external/lapack/dlansy.f new file mode 100644 index 000000000..73d2ab71c --- /dev/null +++ b/costa/native/external/lapack/dlansy.f @@ -0,0 +1,174 @@ + DOUBLE PRECISION FUNCTION DLANSY( NORM, UPLO, N, A, LDA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM, UPLO + INTEGER LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DLANSY returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* real symmetric matrix A. +* +* Description +* =========== +* +* DLANSY returns the value +* +* DLANSY = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in DLANSY as described +* above. +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is to be referenced. +* = 'U': Upper triangular part of A is referenced +* = 'L': Lower triangular part of A is referenced +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, DLANSY is +* set to zero. +* +* A (input) DOUBLE PRECISION array, dimension (LDA,N) +* The symmetric matrix A. If UPLO = 'U', the leading n by n +* upper triangular part of A contains the upper triangular part +* of the matrix A, and the strictly lower triangular part of A +* is not referenced. If UPLO = 'L', the leading n by n lower +* triangular part of A contains the lower triangular part of +* the matrix A, and the strictly upper triangular part of A is +* not referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(N,1). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (LWORK), +* where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +* WORK is not referenced. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION ABSA, SCALE, SUM, VALUE +* .. +* .. External Subroutines .. + EXTERNAL DLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, J + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = J, N + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is symmetric). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + SUM = ZERO + DO 50 I = 1, J - 1 + ABSA = ABS( A( I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 50 CONTINUE + WORK( J ) = SUM + ABS( A( J, J ) ) + 60 CONTINUE + DO 70 I = 1, N + VALUE = MAX( VALUE, WORK( I ) ) + 70 CONTINUE + ELSE + DO 80 I = 1, N + WORK( I ) = ZERO + 80 CONTINUE + DO 100 J = 1, N + SUM = WORK( J ) + ABS( A( J, J ) ) + DO 90 I = J + 1, N + ABSA = ABS( A( I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 90 CONTINUE + VALUE = MAX( VALUE, SUM ) + 100 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 2, N + CALL DLASSQ( J-1, A( 1, J ), 1, SCALE, SUM ) + 110 CONTINUE + ELSE + DO 120 J = 1, N - 1 + CALL DLASSQ( N-J, A( J+1, J ), 1, SCALE, SUM ) + 120 CONTINUE + END IF + SUM = 2*SUM + CALL DLASSQ( N, A, LDA+1, SCALE, SUM ) + VALUE = SCALE*SQRT( SUM ) + END IF +* + DLANSY = VALUE + RETURN +* +* End of DLANSY +* + END diff --git a/costa/native/external/lapack/dlantb.f b/costa/native/external/lapack/dlantb.f new file mode 100644 index 000000000..5792bd64e --- /dev/null +++ b/costa/native/external/lapack/dlantb.f @@ -0,0 +1,285 @@ + DOUBLE PRECISION FUNCTION DLANTB( NORM, UPLO, DIAG, N, K, AB, + $ LDAB, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER K, LDAB, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DLANTB returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of an +* n by n triangular band matrix A, with ( k + 1 ) diagonals. +* +* Description +* =========== +* +* DLANTB returns the value +* +* DLANTB = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in DLANTB as described +* above. +* +* UPLO (input) CHARACTER*1 +* Specifies whether the matrix A is upper or lower triangular. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* DIAG (input) CHARACTER*1 +* Specifies whether or not the matrix A is unit triangular. +* = 'N': Non-unit triangular +* = 'U': Unit triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, DLANTB is +* set to zero. +* +* K (input) INTEGER +* The number of super-diagonals of the matrix A if UPLO = 'U', +* or the number of sub-diagonals of the matrix A if UPLO = 'L'. +* K >= 0. +* +* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) +* The upper or lower triangular band matrix A, stored in the +* first k+1 rows of AB. The j-th column of A is stored +* in the j-th column of the array AB as follows: +* if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+k). +* Note that when DIAG = 'U', the elements of the array AB +* corresponding to the diagonal elements of the matrix A are +* not referenced, but are assumed to be one. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= K+1. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (LWORK), +* where LWORK >= N when NORM = 'I'; otherwise, WORK is not +* referenced. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UDIAG + INTEGER I, J, L + DOUBLE PRECISION SCALE, SUM, VALUE +* .. +* .. External Subroutines .. + EXTERNAL DLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + IF( LSAME( DIAG, 'U' ) ) THEN + VALUE = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = MAX( K+2-J, 1 ), K + VALUE = MAX( VALUE, ABS( AB( I, J ) ) ) + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = 2, MIN( N+1-J, K+1 ) + VALUE = MAX( VALUE, ABS( AB( I, J ) ) ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + DO 50 I = MAX( K+2-J, 1 ), K + 1 + VALUE = MAX( VALUE, ABS( AB( I, J ) ) ) + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80 J = 1, N + DO 70 I = 1, MIN( N+1-J, K+1 ) + VALUE = MAX( VALUE, ABS( AB( I, J ) ) ) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + UDIAG = LSAME( DIAG, 'U' ) + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 90 I = MAX( K+2-J, 1 ), K + SUM = SUM + ABS( AB( I, J ) ) + 90 CONTINUE + ELSE + SUM = ZERO + DO 100 I = MAX( K+2-J, 1 ), K + 1 + SUM = SUM + ABS( AB( I, J ) ) + 100 CONTINUE + END IF + VALUE = MAX( VALUE, SUM ) + 110 CONTINUE + ELSE + DO 140 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 120 I = 2, MIN( N+1-J, K+1 ) + SUM = SUM + ABS( AB( I, J ) ) + 120 CONTINUE + ELSE + SUM = ZERO + DO 130 I = 1, MIN( N+1-J, K+1 ) + SUM = SUM + ABS( AB( I, J ) ) + 130 CONTINUE + END IF + VALUE = MAX( VALUE, SUM ) + 140 CONTINUE + END IF + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + DO 150 I = 1, N + WORK( I ) = ONE + 150 CONTINUE + DO 170 J = 1, N + L = K + 1 - J + DO 160 I = MAX( 1, J-K ), J - 1 + WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) ) + 160 CONTINUE + 170 CONTINUE + ELSE + DO 180 I = 1, N + WORK( I ) = ZERO + 180 CONTINUE + DO 200 J = 1, N + L = K + 1 - J + DO 190 I = MAX( 1, J-K ), J + WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) ) + 190 CONTINUE + 200 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + DO 210 I = 1, N + WORK( I ) = ONE + 210 CONTINUE + DO 230 J = 1, N + L = 1 - J + DO 220 I = J + 1, MIN( N, J+K ) + WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) ) + 220 CONTINUE + 230 CONTINUE + ELSE + DO 240 I = 1, N + WORK( I ) = ZERO + 240 CONTINUE + DO 260 J = 1, N + L = 1 - J + DO 250 I = J, MIN( N, J+K ) + WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) ) + 250 CONTINUE + 260 CONTINUE + END IF + END IF + DO 270 I = 1, N + VALUE = MAX( VALUE, WORK( I ) ) + 270 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = N + IF( K.GT.0 ) THEN + DO 280 J = 2, N + CALL DLASSQ( MIN( J-1, K ), + $ AB( MAX( K+2-J, 1 ), J ), 1, SCALE, + $ SUM ) + 280 CONTINUE + END IF + ELSE + SCALE = ZERO + SUM = ONE + DO 290 J = 1, N + CALL DLASSQ( MIN( J, K+1 ), AB( MAX( K+2-J, 1 ), J ), + $ 1, SCALE, SUM ) + 290 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = N + IF( K.GT.0 ) THEN + DO 300 J = 1, N - 1 + CALL DLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE, + $ SUM ) + 300 CONTINUE + END IF + ELSE + SCALE = ZERO + SUM = ONE + DO 310 J = 1, N + CALL DLASSQ( MIN( N-J+1, K+1 ), AB( 1, J ), 1, SCALE, + $ SUM ) + 310 CONTINUE + END IF + END IF + VALUE = SCALE*SQRT( SUM ) + END IF +* + DLANTB = VALUE + RETURN +* +* End of DLANTB +* + END diff --git a/costa/native/external/lapack/dlantp.f b/costa/native/external/lapack/dlantp.f new file mode 100644 index 000000000..019dcfaa6 --- /dev/null +++ b/costa/native/external/lapack/dlantp.f @@ -0,0 +1,286 @@ + DOUBLE PRECISION FUNCTION DLANTP( NORM, UPLO, DIAG, N, AP, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AP( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DLANTP returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* triangular matrix A, supplied in packed form. +* +* Description +* =========== +* +* DLANTP returns the value +* +* DLANTP = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in DLANTP as described +* above. +* +* UPLO (input) CHARACTER*1 +* Specifies whether the matrix A is upper or lower triangular. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* DIAG (input) CHARACTER*1 +* Specifies whether or not the matrix A is unit triangular. +* = 'N': Non-unit triangular +* = 'U': Unit triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, DLANTP is +* set to zero. +* +* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) +* The upper or lower triangular matrix A, packed columnwise in +* a linear array. The j-th column of A is stored in the array +* AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* Note that when DIAG = 'U', the elements of the array AP +* corresponding to the diagonal elements of the matrix A are +* not referenced, but are assumed to be one. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (LWORK), +* where LWORK >= N when NORM = 'I'; otherwise, WORK is not +* referenced. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UDIAG + INTEGER I, J, K + DOUBLE PRECISION SCALE, SUM, VALUE +* .. +* .. External Subroutines .. + EXTERNAL DLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + K = 1 + IF( LSAME( DIAG, 'U' ) ) THEN + VALUE = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = K, K + J - 2 + VALUE = MAX( VALUE, ABS( AP( I ) ) ) + 10 CONTINUE + K = K + J + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = K + 1, K + N - J + VALUE = MAX( VALUE, ABS( AP( I ) ) ) + 30 CONTINUE + K = K + N - J + 1 + 40 CONTINUE + END IF + ELSE + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + DO 50 I = K, K + J - 1 + VALUE = MAX( VALUE, ABS( AP( I ) ) ) + 50 CONTINUE + K = K + J + 60 CONTINUE + ELSE + DO 80 J = 1, N + DO 70 I = K, K + N - J + VALUE = MAX( VALUE, ABS( AP( I ) ) ) + 70 CONTINUE + K = K + N - J + 1 + 80 CONTINUE + END IF + END IF + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + K = 1 + UDIAG = LSAME( DIAG, 'U' ) + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 90 I = K, K + J - 2 + SUM = SUM + ABS( AP( I ) ) + 90 CONTINUE + ELSE + SUM = ZERO + DO 100 I = K, K + J - 1 + SUM = SUM + ABS( AP( I ) ) + 100 CONTINUE + END IF + K = K + J + VALUE = MAX( VALUE, SUM ) + 110 CONTINUE + ELSE + DO 140 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 120 I = K + 1, K + N - J + SUM = SUM + ABS( AP( I ) ) + 120 CONTINUE + ELSE + SUM = ZERO + DO 130 I = K, K + N - J + SUM = SUM + ABS( AP( I ) ) + 130 CONTINUE + END IF + K = K + N - J + 1 + VALUE = MAX( VALUE, SUM ) + 140 CONTINUE + END IF + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + K = 1 + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + DO 150 I = 1, N + WORK( I ) = ONE + 150 CONTINUE + DO 170 J = 1, N + DO 160 I = 1, J - 1 + WORK( I ) = WORK( I ) + ABS( AP( K ) ) + K = K + 1 + 160 CONTINUE + K = K + 1 + 170 CONTINUE + ELSE + DO 180 I = 1, N + WORK( I ) = ZERO + 180 CONTINUE + DO 200 J = 1, N + DO 190 I = 1, J + WORK( I ) = WORK( I ) + ABS( AP( K ) ) + K = K + 1 + 190 CONTINUE + 200 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + DO 210 I = 1, N + WORK( I ) = ONE + 210 CONTINUE + DO 230 J = 1, N + K = K + 1 + DO 220 I = J + 1, N + WORK( I ) = WORK( I ) + ABS( AP( K ) ) + K = K + 1 + 220 CONTINUE + 230 CONTINUE + ELSE + DO 240 I = 1, N + WORK( I ) = ZERO + 240 CONTINUE + DO 260 J = 1, N + DO 250 I = J, N + WORK( I ) = WORK( I ) + ABS( AP( K ) ) + K = K + 1 + 250 CONTINUE + 260 CONTINUE + END IF + END IF + VALUE = ZERO + DO 270 I = 1, N + VALUE = MAX( VALUE, WORK( I ) ) + 270 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = N + K = 2 + DO 280 J = 2, N + CALL DLASSQ( J-1, AP( K ), 1, SCALE, SUM ) + K = K + J + 280 CONTINUE + ELSE + SCALE = ZERO + SUM = ONE + K = 1 + DO 290 J = 1, N + CALL DLASSQ( J, AP( K ), 1, SCALE, SUM ) + K = K + J + 290 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = N + K = 2 + DO 300 J = 1, N - 1 + CALL DLASSQ( N-J, AP( K ), 1, SCALE, SUM ) + K = K + N - J + 1 + 300 CONTINUE + ELSE + SCALE = ZERO + SUM = ONE + K = 1 + DO 310 J = 1, N + CALL DLASSQ( N-J+1, AP( K ), 1, SCALE, SUM ) + K = K + N - J + 1 + 310 CONTINUE + END IF + END IF + VALUE = SCALE*SQRT( SUM ) + END IF +* + DLANTP = VALUE + RETURN +* +* End of DLANTP +* + END diff --git a/costa/native/external/lapack/dlantr.f b/costa/native/external/lapack/dlantr.f new file mode 100644 index 000000000..19e9b5d92 --- /dev/null +++ b/costa/native/external/lapack/dlantr.f @@ -0,0 +1,277 @@ + DOUBLE PRECISION FUNCTION DLANTR( NORM, UPLO, DIAG, M, N, A, LDA, + $ WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DLANTR returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* trapezoidal or triangular matrix A. +* +* Description +* =========== +* +* DLANTR returns the value +* +* DLANTR = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in DLANTR as described +* above. +* +* UPLO (input) CHARACTER*1 +* Specifies whether the matrix A is upper or lower trapezoidal. +* = 'U': Upper trapezoidal +* = 'L': Lower trapezoidal +* Note that A is triangular instead of trapezoidal if M = N. +* +* DIAG (input) CHARACTER*1 +* Specifies whether or not the matrix A has unit diagonal. +* = 'N': Non-unit diagonal +* = 'U': Unit diagonal +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0, and if +* UPLO = 'U', M <= N. When M = 0, DLANTR is set to zero. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0, and if +* UPLO = 'L', N <= M. When N = 0, DLANTR is set to zero. +* +* A (input) DOUBLE PRECISION array, dimension (LDA,N) +* The trapezoidal matrix A (A is triangular if M = N). +* If UPLO = 'U', the leading m by n upper trapezoidal part of +* the array A contains the upper trapezoidal matrix, and the +* strictly lower triangular part of A is not referenced. +* If UPLO = 'L', the leading m by n lower trapezoidal part of +* the array A contains the lower trapezoidal matrix, and the +* strictly upper triangular part of A is not referenced. Note +* that when DIAG = 'U', the diagonal elements of A are not +* referenced and are assumed to be one. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(M,1). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (LWORK), +* where LWORK >= M when NORM = 'I'; otherwise, WORK is not +* referenced. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UDIAG + INTEGER I, J + DOUBLE PRECISION SCALE, SUM, VALUE +* .. +* .. External Subroutines .. + EXTERNAL DLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( MIN( M, N ).EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + IF( LSAME( DIAG, 'U' ) ) THEN + VALUE = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, MIN( M, J-1 ) + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = J + 1, M + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + DO 50 I = 1, MIN( M, J ) + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80 J = 1, N + DO 70 I = J, M + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + UDIAG = LSAME( DIAG, 'U' ) + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 1, N + IF( ( UDIAG ) .AND. ( J.LE.M ) ) THEN + SUM = ONE + DO 90 I = 1, J - 1 + SUM = SUM + ABS( A( I, J ) ) + 90 CONTINUE + ELSE + SUM = ZERO + DO 100 I = 1, MIN( M, J ) + SUM = SUM + ABS( A( I, J ) ) + 100 CONTINUE + END IF + VALUE = MAX( VALUE, SUM ) + 110 CONTINUE + ELSE + DO 140 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 120 I = J + 1, M + SUM = SUM + ABS( A( I, J ) ) + 120 CONTINUE + ELSE + SUM = ZERO + DO 130 I = J, M + SUM = SUM + ABS( A( I, J ) ) + 130 CONTINUE + END IF + VALUE = MAX( VALUE, SUM ) + 140 CONTINUE + END IF + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + DO 150 I = 1, M + WORK( I ) = ONE + 150 CONTINUE + DO 170 J = 1, N + DO 160 I = 1, MIN( M, J-1 ) + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 160 CONTINUE + 170 CONTINUE + ELSE + DO 180 I = 1, M + WORK( I ) = ZERO + 180 CONTINUE + DO 200 J = 1, N + DO 190 I = 1, MIN( M, J ) + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 190 CONTINUE + 200 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + DO 210 I = 1, N + WORK( I ) = ONE + 210 CONTINUE + DO 220 I = N + 1, M + WORK( I ) = ZERO + 220 CONTINUE + DO 240 J = 1, N + DO 230 I = J + 1, M + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 230 CONTINUE + 240 CONTINUE + ELSE + DO 250 I = 1, M + WORK( I ) = ZERO + 250 CONTINUE + DO 270 J = 1, N + DO 260 I = J, M + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 260 CONTINUE + 270 CONTINUE + END IF + END IF + VALUE = ZERO + DO 280 I = 1, M + VALUE = MAX( VALUE, WORK( I ) ) + 280 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = MIN( M, N ) + DO 290 J = 2, N + CALL DLASSQ( MIN( M, J-1 ), A( 1, J ), 1, SCALE, SUM ) + 290 CONTINUE + ELSE + SCALE = ZERO + SUM = ONE + DO 300 J = 1, N + CALL DLASSQ( MIN( M, J ), A( 1, J ), 1, SCALE, SUM ) + 300 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = MIN( M, N ) + DO 310 J = 1, N + CALL DLASSQ( M-J, A( MIN( M, J+1 ), J ), 1, SCALE, + $ SUM ) + 310 CONTINUE + ELSE + SCALE = ZERO + SUM = ONE + DO 320 J = 1, N + CALL DLASSQ( M-J+1, A( J, J ), 1, SCALE, SUM ) + 320 CONTINUE + END IF + END IF + VALUE = SCALE*SQRT( SUM ) + END IF +* + DLANTR = VALUE + RETURN +* +* End of DLANTR +* + END diff --git a/costa/native/external/lapack/dlanv2.f b/costa/native/external/lapack/dlanv2.f new file mode 100644 index 000000000..ccc6ef62a --- /dev/null +++ b/costa/native/external/lapack/dlanv2.f @@ -0,0 +1,206 @@ + SUBROUTINE DLANV2( A, B, C, D, RT1R, RT1I, RT2R, RT2I, CS, SN ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + DOUBLE PRECISION A, B, C, CS, D, RT1I, RT1R, RT2I, RT2R, SN +* .. +* +* Purpose +* ======= +* +* DLANV2 computes the Schur factorization of a real 2-by-2 nonsymmetric +* matrix in standard form: +* +* [ A B ] = [ CS -SN ] [ AA BB ] [ CS SN ] +* [ C D ] [ SN CS ] [ CC DD ] [-SN CS ] +* +* where either +* 1) CC = 0 so that AA and DD are real eigenvalues of the matrix, or +* 2) AA = DD and BB*CC < 0, so that AA + or - sqrt(BB*CC) are complex +* conjugate eigenvalues. +* +* Arguments +* ========= +* +* A (input/output) DOUBLE PRECISION +* B (input/output) DOUBLE PRECISION +* C (input/output) DOUBLE PRECISION +* D (input/output) DOUBLE PRECISION +* On entry, the elements of the input matrix. +* On exit, they are overwritten by the elements of the +* standardised Schur form. +* +* RT1R (output) DOUBLE PRECISION +* RT1I (output) DOUBLE PRECISION +* RT2R (output) DOUBLE PRECISION +* RT2I (output) DOUBLE PRECISION +* The real and imaginary parts of the eigenvalues. If the +* eigenvalues are a complex conjugate pair, RT1I > 0. +* +* CS (output) DOUBLE PRECISION +* SN (output) DOUBLE PRECISION +* Parameters of the rotation matrix. +* +* Further Details +* =============== +* +* Modified by V. Sima, Research Institute for Informatics, Bucharest, +* Romania, to reduce the risk of cancellation errors, +* when computing real eigenvalues, and to ensure, if possible, that +* abs(RT1R) >= abs(RT2R). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0D+0, HALF = 0.5D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION MULTPL + PARAMETER ( MULTPL = 4.0D+0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION AA, BB, BCMAX, BCMIS, CC, CS1, DD, EPS, P, SAB, + $ SAC, SCALE, SIGMA, SN1, TAU, TEMP, Z +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, DLAPY2 + EXTERNAL DLAMCH, DLAPY2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SIGN, SQRT +* .. +* .. Executable Statements .. +* + EPS = DLAMCH( 'P' ) + IF( C.EQ.ZERO ) THEN + CS = ONE + SN = ZERO + GO TO 10 +* + ELSE IF( B.EQ.ZERO ) THEN +* +* Swap rows and columns +* + CS = ZERO + SN = ONE + TEMP = D + D = A + A = TEMP + B = -C + C = ZERO + GO TO 10 + ELSE IF( ( A-D ).EQ.ZERO .AND. SIGN( ONE, B ).NE.SIGN( ONE, C ) ) + $ THEN + CS = ONE + SN = ZERO + GO TO 10 + ELSE +* + TEMP = A - D + P = HALF*TEMP + BCMAX = MAX( ABS( B ), ABS( C ) ) + BCMIS = MIN( ABS( B ), ABS( C ) )*SIGN( ONE, B )*SIGN( ONE, C ) + SCALE = MAX( ABS( P ), BCMAX ) + Z = ( P / SCALE )*P + ( BCMAX / SCALE )*BCMIS +* +* If Z is of the order of the machine accuracy, postpone the +* decision on the nature of eigenvalues +* + IF( Z.GE.MULTPL*EPS ) THEN +* +* Real eigenvalues. Compute A and D. +* + Z = P + SIGN( SQRT( SCALE )*SQRT( Z ), P ) + A = D + Z + D = D - ( BCMAX / Z )*BCMIS +* +* Compute B and the rotation matrix +* + TAU = DLAPY2( C, Z ) + CS = Z / TAU + SN = C / TAU + B = B - C + C = ZERO + ELSE +* +* Complex eigenvalues, or real (almost) equal eigenvalues. +* Make diagonal elements equal. +* + SIGMA = B + C + TAU = DLAPY2( SIGMA, TEMP ) + CS = SQRT( HALF*( ONE+ABS( SIGMA ) / TAU ) ) + SN = -( P / ( TAU*CS ) )*SIGN( ONE, SIGMA ) +* +* Compute [ AA BB ] = [ A B ] [ CS -SN ] +* [ CC DD ] [ C D ] [ SN CS ] +* + AA = A*CS + B*SN + BB = -A*SN + B*CS + CC = C*CS + D*SN + DD = -C*SN + D*CS +* +* Compute [ A B ] = [ CS SN ] [ AA BB ] +* [ C D ] [-SN CS ] [ CC DD ] +* + A = AA*CS + CC*SN + B = BB*CS + DD*SN + C = -AA*SN + CC*CS + D = -BB*SN + DD*CS +* + TEMP = HALF*( A+D ) + A = TEMP + D = TEMP +* + IF( C.NE.ZERO ) THEN + IF( B.NE.ZERO ) THEN + IF( SIGN( ONE, B ).EQ.SIGN( ONE, C ) ) THEN +* +* Real eigenvalues: reduce to upper triangular form +* + SAB = SQRT( ABS( B ) ) + SAC = SQRT( ABS( C ) ) + P = SIGN( SAB*SAC, C ) + TAU = ONE / SQRT( ABS( B+C ) ) + A = TEMP + P + D = TEMP - P + B = B - C + C = ZERO + CS1 = SAB*TAU + SN1 = SAC*TAU + TEMP = CS*CS1 - SN*SN1 + SN = CS*SN1 + SN*CS1 + CS = TEMP + END IF + ELSE + B = -C + C = ZERO + TEMP = CS + CS = -SN + SN = TEMP + END IF + END IF + END IF +* + END IF +* + 10 CONTINUE +* +* Store eigenvalues in (RT1R,RT1I) and (RT2R,RT2I). +* + RT1R = A + RT2R = D + IF( C.EQ.ZERO ) THEN + RT1I = ZERO + RT2I = ZERO + ELSE + RT1I = SQRT( ABS( B ) )*SQRT( ABS( C ) ) + RT2I = -RT1I + END IF + RETURN +* +* End of DLANV2 +* + END diff --git a/costa/native/external/lapack/dlapll.f b/costa/native/external/lapack/dlapll.f new file mode 100644 index 000000000..9ae679831 --- /dev/null +++ b/costa/native/external/lapack/dlapll.f @@ -0,0 +1,100 @@ + SUBROUTINE DLAPLL( N, X, INCX, Y, INCY, SSMIN ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + INTEGER INCX, INCY, N + DOUBLE PRECISION SSMIN +* .. +* .. Array Arguments .. + DOUBLE PRECISION X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* Given two column vectors X and Y, let +* +* A = ( X Y ). +* +* The subroutine first computes the QR factorization of A = Q*R, +* and then computes the SVD of the 2-by-2 upper triangular matrix R. +* The smaller singular value of R is returned in SSMIN, which is used +* as the measurement of the linear dependency of the vectors X and Y. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The length of the vectors X and Y. +* +* X (input/output) DOUBLE PRECISION array, +* dimension (1+(N-1)*INCX) +* On entry, X contains the N-vector X. +* On exit, X is overwritten. +* +* INCX (input) INTEGER +* The increment between successive elements of X. INCX > 0. +* +* Y (input/output) DOUBLE PRECISION array, +* dimension (1+(N-1)*INCY) +* On entry, Y contains the N-vector Y. +* On exit, Y is overwritten. +* +* INCY (input) INTEGER +* The increment between successive elements of Y. INCY > 0. +* +* SSMIN (output) DOUBLE PRECISION +* The smallest singular value of the N-by-2 matrix A = ( X Y ). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION A11, A12, A22, C, SSMAX, TAU +* .. +* .. External Functions .. + DOUBLE PRECISION DDOT + EXTERNAL DDOT +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DLARFG, DLAS2 +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.1 ) THEN + SSMIN = ZERO + RETURN + END IF +* +* Compute the QR factorization of the N-by-2 matrix ( X Y ) +* + CALL DLARFG( N, X( 1 ), X( 1+INCX ), INCX, TAU ) + A11 = X( 1 ) + X( 1 ) = ONE +* + C = -TAU*DDOT( N, X, INCX, Y, INCY ) + CALL DAXPY( N, C, X, INCX, Y, INCY ) +* + CALL DLARFG( N-1, Y( 1+INCY ), Y( 1+2*INCY ), INCY, TAU ) +* + A12 = Y( 1 ) + A22 = Y( 1+INCY ) +* +* Compute the SVD of 2-by-2 Upper triangular matrix. +* + CALL DLAS2( A11, A12, A22, SSMIN, SSMAX ) +* + RETURN +* +* End of DLAPLL +* + END diff --git a/costa/native/external/lapack/dlapmt.f b/costa/native/external/lapack/dlapmt.f new file mode 100644 index 000000000..80b55c35d --- /dev/null +++ b/costa/native/external/lapack/dlapmt.f @@ -0,0 +1,135 @@ + SUBROUTINE DLAPMT( FORWRD, M, N, X, LDX, K ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + LOGICAL FORWRD + INTEGER LDX, M, N +* .. +* .. Array Arguments .. + INTEGER K( * ) + DOUBLE PRECISION X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* DLAPMT rearranges the columns of the M by N matrix X as specified +* by the permutation K(1),K(2),...,K(N) of the integers 1,...,N. +* If FORWRD = .TRUE., forward permutation: +* +* X(*,K(J)) is moved X(*,J) for J = 1,2,...,N. +* +* If FORWRD = .FALSE., backward permutation: +* +* X(*,J) is moved to X(*,K(J)) for J = 1,2,...,N. +* +* Arguments +* ========= +* +* FORWRD (input) LOGICAL +* = .TRUE., forward permutation +* = .FALSE., backward permutation +* +* M (input) INTEGER +* The number of rows of the matrix X. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix X. N >= 0. +* +* X (input/output) DOUBLE PRECISION array, dimension (LDX,N) +* On entry, the M by N matrix X. +* On exit, X contains the permuted matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X, LDX >= MAX(1,M). +* +* K (input) INTEGER array, dimension (N) +* On entry, K contains the permutation vector. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, II, IN, J + DOUBLE PRECISION TEMP +* .. +* .. Executable Statements .. +* + IF( N.LE.1 ) + $ RETURN +* + DO 10 I = 1, N + K( I ) = -K( I ) + 10 CONTINUE +* + IF( FORWRD ) THEN +* +* Forward permutation +* + DO 50 I = 1, N +* + IF( K( I ).GT.0 ) + $ GO TO 40 +* + J = I + K( J ) = -K( J ) + IN = K( J ) +* + 20 CONTINUE + IF( K( IN ).GT.0 ) + $ GO TO 40 +* + DO 30 II = 1, M + TEMP = X( II, J ) + X( II, J ) = X( II, IN ) + X( II, IN ) = TEMP + 30 CONTINUE +* + K( IN ) = -K( IN ) + J = IN + IN = K( IN ) + GO TO 20 +* + 40 CONTINUE +* + 50 CONTINUE +* + ELSE +* +* Backward permutation +* + DO 90 I = 1, N +* + IF( K( I ).GT.0 ) + $ GO TO 80 +* + K( I ) = -K( I ) + J = K( I ) + 60 CONTINUE + IF( J.EQ.I ) + $ GO TO 80 +* + DO 70 II = 1, M + TEMP = X( II, I ) + X( II, I ) = X( II, J ) + X( II, J ) = TEMP + 70 CONTINUE +* + K( J ) = -K( J ) + J = K( J ) + GO TO 60 +* + 80 CONTINUE +* + 90 CONTINUE +* + END IF +* + RETURN +* +* End of DLAPMT +* + END diff --git a/costa/native/external/lapack/dlapy2.f b/costa/native/external/lapack/dlapy2.f new file mode 100644 index 000000000..f3d732da4 --- /dev/null +++ b/costa/native/external/lapack/dlapy2.f @@ -0,0 +1,54 @@ + DOUBLE PRECISION FUNCTION DLAPY2( X, Y ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + DOUBLE PRECISION X, Y +* .. +* +* Purpose +* ======= +* +* DLAPY2 returns sqrt(x**2+y**2), taking care not to cause unnecessary +* overflow. +* +* Arguments +* ========= +* +* X (input) DOUBLE PRECISION +* Y (input) DOUBLE PRECISION +* X and Y specify the values x and y. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION W, XABS, YABS, Z +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + XABS = ABS( X ) + YABS = ABS( Y ) + W = MAX( XABS, YABS ) + Z = MIN( XABS, YABS ) + IF( Z.EQ.ZERO ) THEN + DLAPY2 = W + ELSE + DLAPY2 = W*SQRT( ONE+( Z / W )**2 ) + END IF + RETURN +* +* End of DLAPY2 +* + END diff --git a/costa/native/external/lapack/dlapy3.f b/costa/native/external/lapack/dlapy3.f new file mode 100644 index 000000000..8c1483263 --- /dev/null +++ b/costa/native/external/lapack/dlapy3.f @@ -0,0 +1,54 @@ + DOUBLE PRECISION FUNCTION DLAPY3( X, Y, Z ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + DOUBLE PRECISION X, Y, Z +* .. +* +* Purpose +* ======= +* +* DLAPY3 returns sqrt(x**2+y**2+z**2), taking care not to cause +* unnecessary overflow. +* +* Arguments +* ========= +* +* X (input) DOUBLE PRECISION +* Y (input) DOUBLE PRECISION +* Z (input) DOUBLE PRECISION +* X, Y and Z specify the values x, y and z. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION W, XABS, YABS, ZABS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* + XABS = ABS( X ) + YABS = ABS( Y ) + ZABS = ABS( Z ) + W = MAX( XABS, YABS, ZABS ) + IF( W.EQ.ZERO ) THEN + DLAPY3 = ZERO + ELSE + DLAPY3 = W*SQRT( ( XABS / W )**2+( YABS / W )**2+ + $ ( ZABS / W )**2 ) + END IF + RETURN +* +* End of DLAPY3 +* + END diff --git a/costa/native/external/lapack/dlaqgb.f b/costa/native/external/lapack/dlaqgb.f new file mode 100644 index 000000000..2f47b456c --- /dev/null +++ b/costa/native/external/lapack/dlaqgb.f @@ -0,0 +1,169 @@ + SUBROUTINE DLAQGB( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER EQUED + INTEGER KL, KU, LDAB, M, N + DOUBLE PRECISION AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ), C( * ), R( * ) +* .. +* +* Purpose +* ======= +* +* DLAQGB equilibrates a general M by N band matrix A with KL +* subdiagonals and KU superdiagonals using the row and scaling factors +* in the vectors R and C. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) +* On entry, the matrix A in band storage, in rows 1 to KL+KU+1. +* The j-th column of A is stored in the j-th column of the +* array AB as follows: +* AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) +* +* On exit, the equilibrated matrix, in the same storage format +* as A. See EQUED for the form of the equilibrated matrix. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDA >= KL+KU+1. +* +* R (output) DOUBLE PRECISION array, dimension (M) +* The row scale factors for A. +* +* C (output) DOUBLE PRECISION array, dimension (N) +* The column scale factors for A. +* +* ROWCND (output) DOUBLE PRECISION +* Ratio of the smallest R(i) to the largest R(i). +* +* COLCND (output) DOUBLE PRECISION +* Ratio of the smallest C(i) to the largest C(i). +* +* AMAX (input) DOUBLE PRECISION +* Absolute value of largest matrix entry. +* +* EQUED (output) CHARACTER*1 +* Specifies the form of equilibration that was done. +* = 'N': No equilibration +* = 'R': Row equilibration, i.e., A has been premultiplied by +* diag(R). +* = 'C': Column equilibration, i.e., A has been postmultiplied +* by diag(C). +* = 'B': Both row and column equilibration, i.e., A has been +* replaced by diag(R) * A * diag(C). +* +* Internal Parameters +* =================== +* +* THRESH is a threshold value used to decide if row or column scaling +* should be done based on the ratio of the row or column scaling +* factors. If ROWCND < THRESH, row scaling is done, and if +* COLCND < THRESH, column scaling is done. +* +* LARGE and SMALL are threshold values used to decide if row scaling +* should be done based on the absolute size of the largest matrix +* element. If AMAX > LARGE or AMAX < SMALL, row scaling is done. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, THRESH + PARAMETER ( ONE = 1.0D+0, THRESH = 0.1D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION CJ, LARGE, SMALL +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( ROWCND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) + $ THEN +* +* No row scaling +* + IF( COLCND.GE.THRESH ) THEN +* +* No column scaling +* + EQUED = 'N' + ELSE +* +* Column scaling +* + DO 20 J = 1, N + CJ = C( J ) + DO 10 I = MAX( 1, J-KU ), MIN( M, J+KL ) + AB( KU+1+I-J, J ) = CJ*AB( KU+1+I-J, J ) + 10 CONTINUE + 20 CONTINUE + EQUED = 'C' + END IF + ELSE IF( COLCND.GE.THRESH ) THEN +* +* Row scaling, no column scaling +* + DO 40 J = 1, N + DO 30 I = MAX( 1, J-KU ), MIN( M, J+KL ) + AB( KU+1+I-J, J ) = R( I )*AB( KU+1+I-J, J ) + 30 CONTINUE + 40 CONTINUE + EQUED = 'R' + ELSE +* +* Row and column scaling +* + DO 60 J = 1, N + CJ = C( J ) + DO 50 I = MAX( 1, J-KU ), MIN( M, J+KL ) + AB( KU+1+I-J, J ) = CJ*R( I )*AB( KU+1+I-J, J ) + 50 CONTINUE + 60 CONTINUE + EQUED = 'B' + END IF +* + RETURN +* +* End of DLAQGB +* + END diff --git a/costa/native/external/lapack/dlaqge.f b/costa/native/external/lapack/dlaqge.f new file mode 100644 index 000000000..d93956fcf --- /dev/null +++ b/costa/native/external/lapack/dlaqge.f @@ -0,0 +1,155 @@ + SUBROUTINE DLAQGE( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, + $ EQUED ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER EQUED + INTEGER LDA, M, N + DOUBLE PRECISION AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( * ), R( * ) +* .. +* +* Purpose +* ======= +* +* DLAQGE equilibrates a general M by N matrix A using the row and +* scaling factors in the vectors R and C. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the M by N matrix A. +* On exit, the equilibrated matrix. See EQUED for the form of +* the equilibrated matrix. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(M,1). +* +* R (input) DOUBLE PRECISION array, dimension (M) +* The row scale factors for A. +* +* C (input) DOUBLE PRECISION array, dimension (N) +* The column scale factors for A. +* +* ROWCND (input) DOUBLE PRECISION +* Ratio of the smallest R(i) to the largest R(i). +* +* COLCND (input) DOUBLE PRECISION +* Ratio of the smallest C(i) to the largest C(i). +* +* AMAX (input) DOUBLE PRECISION +* Absolute value of largest matrix entry. +* +* EQUED (output) CHARACTER*1 +* Specifies the form of equilibration that was done. +* = 'N': No equilibration +* = 'R': Row equilibration, i.e., A has been premultiplied by +* diag(R). +* = 'C': Column equilibration, i.e., A has been postmultiplied +* by diag(C). +* = 'B': Both row and column equilibration, i.e., A has been +* replaced by diag(R) * A * diag(C). +* +* Internal Parameters +* =================== +* +* THRESH is a threshold value used to decide if row or column scaling +* should be done based on the ratio of the row or column scaling +* factors. If ROWCND < THRESH, row scaling is done, and if +* COLCND < THRESH, column scaling is done. +* +* LARGE and SMALL are threshold values used to decide if row scaling +* should be done based on the absolute size of the largest matrix +* element. If AMAX > LARGE or AMAX < SMALL, row scaling is done. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, THRESH + PARAMETER ( ONE = 1.0D+0, THRESH = 0.1D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION CJ, LARGE, SMALL +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( ROWCND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) + $ THEN +* +* No row scaling +* + IF( COLCND.GE.THRESH ) THEN +* +* No column scaling +* + EQUED = 'N' + ELSE +* +* Column scaling +* + DO 20 J = 1, N + CJ = C( J ) + DO 10 I = 1, M + A( I, J ) = CJ*A( I, J ) + 10 CONTINUE + 20 CONTINUE + EQUED = 'C' + END IF + ELSE IF( COLCND.GE.THRESH ) THEN +* +* Row scaling, no column scaling +* + DO 40 J = 1, N + DO 30 I = 1, M + A( I, J ) = R( I )*A( I, J ) + 30 CONTINUE + 40 CONTINUE + EQUED = 'R' + ELSE +* +* Row and column scaling +* + DO 60 J = 1, N + CJ = C( J ) + DO 50 I = 1, M + A( I, J ) = CJ*R( I )*A( I, J ) + 50 CONTINUE + 60 CONTINUE + EQUED = 'B' + END IF +* + RETURN +* +* End of DLAQGE +* + END diff --git a/costa/native/external/lapack/dlaqp2.f b/costa/native/external/lapack/dlaqp2.f new file mode 100644 index 000000000..720b740dc --- /dev/null +++ b/costa/native/external/lapack/dlaqp2.f @@ -0,0 +1,166 @@ + SUBROUTINE DLAQP2( M, N, OFFSET, A, LDA, JPVT, TAU, VN1, VN2, + $ WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER LDA, M, N, OFFSET +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + DOUBLE PRECISION A( LDA, * ), TAU( * ), VN1( * ), VN2( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* DLAQP2 computes a QR factorization with column pivoting of +* the block A(OFFSET+1:M,1:N). +* The block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* OFFSET (input) INTEGER +* The number of rows of the matrix A that must be pivoted +* but no factorized. OFFSET >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, the upper triangle of block A(OFFSET+1:M,1:N) is +* the triangular factor obtained; the elements in block +* A(OFFSET+1:M,1:N) below the diagonal, together with the +* array TAU, represent the orthogonal matrix Q as a product of +* elementary reflectors. Block A(1:OFFSET,1:N) has been +* accordingly pivoted, but no factorized. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* JPVT (input/output) INTEGER array, dimension (N) +* On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted +* to the front of A*P (a leading column); if JPVT(i) = 0, +* the i-th column of A is a free column. +* On exit, if JPVT(i) = k, then the i-th column of A*P +* was the k-th column of A. +* +* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors. +* +* VN1 (input/output) DOUBLE PRECISION array, dimension (N) +* The vector with the partial column norms. +* +* VN2 (input/output) DOUBLE PRECISION array, dimension (N) +* The vector with the exact column norms. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* Further Details +* =============== +* +* Based on contributions by +* G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +* X. Sun, Computer Science Dept., Duke University, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, ITEMP, J, MN, OFFPI, PVT + DOUBLE PRECISION AII, TEMP, TEMP2 +* .. +* .. External Subroutines .. + EXTERNAL DLARF, DLARFG, DSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DNRM2 + EXTERNAL IDAMAX, DNRM2 +* .. +* .. Executable Statements .. +* + MN = MIN( M-OFFSET, N ) +* +* Compute factorization. +* + DO 20 I = 1, MN +* + OFFPI = OFFSET + I +* +* Determine ith pivot column and swap if necessary. +* + PVT = ( I-1 ) + IDAMAX( N-I+1, VN1( I ), 1 ) +* + IF( PVT.NE.I ) THEN + CALL DSWAP( M, A( 1, PVT ), 1, A( 1, I ), 1 ) + ITEMP = JPVT( PVT ) + JPVT( PVT ) = JPVT( I ) + JPVT( I ) = ITEMP + VN1( PVT ) = VN1( I ) + VN2( PVT ) = VN2( I ) + END IF +* +* Generate elementary reflector H(i). +* + IF( OFFPI.LT.M ) THEN + CALL DLARFG( M-OFFPI+1, A( OFFPI, I ), A( OFFPI+1, I ), 1, + $ TAU( I ) ) + ELSE + CALL DLARFG( 1, A( M, I ), A( M, I ), 1, TAU( I ) ) + END IF +* + IF( I.LT.N ) THEN +* +* Apply H(i)' to A(offset+i:m,i+1:n) from the left. +* + AII = A( OFFPI, I ) + A( OFFPI, I ) = ONE + CALL DLARF( 'Left', M-OFFPI+1, N-I, A( OFFPI, I ), 1, + $ TAU( I ), A( OFFPI, I+1 ), LDA, WORK( 1 ) ) + A( OFFPI, I ) = AII + END IF +* +* Update partial column norms. +* + DO 10 J = I + 1, N + IF( VN1( J ).NE.ZERO ) THEN + TEMP = ONE - ( ABS( A( OFFPI, J ) ) / VN1( J ) )**2 + TEMP = MAX( TEMP, ZERO ) + TEMP2 = ONE + 0.05D0*TEMP*( VN1( J ) / VN2( J ) )**2 + IF( TEMP2.EQ.ONE ) THEN + IF( OFFPI.LT.M ) THEN + VN1( J ) = DNRM2( M-OFFPI, A( OFFPI+1, J ), 1 ) + VN2( J ) = VN1( J ) + ELSE + VN1( J ) = ZERO + VN2( J ) = ZERO + END IF + ELSE + VN1( J ) = VN1( J )*SQRT( TEMP ) + END IF + END IF + 10 CONTINUE +* + 20 CONTINUE +* + RETURN +* +* End of DLAQP2 +* + END diff --git a/costa/native/external/lapack/dlaqps.f b/costa/native/external/lapack/dlaqps.f new file mode 100644 index 000000000..3190e3d7f --- /dev/null +++ b/costa/native/external/lapack/dlaqps.f @@ -0,0 +1,245 @@ + SUBROUTINE DLAQPS( M, N, OFFSET, NB, KB, A, LDA, JPVT, TAU, VN1, + $ VN2, AUXV, F, LDF ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER KB, LDA, LDF, M, N, NB, OFFSET +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + DOUBLE PRECISION A( LDA, * ), AUXV( * ), F( LDF, * ), TAU( * ), + $ VN1( * ), VN2( * ) +* .. +* +* Purpose +* ======= +* +* DLAQPS computes a step of QR factorization with column pivoting +* of a real M-by-N matrix A by using Blas-3. It tries to factorize +* NB columns from A starting from the row OFFSET+1, and updates all +* of the matrix with Blas-3 xGEMM. +* +* In some cases, due to catastrophic cancellations, it cannot +* factorize NB columns. Hence, the actual number of factorized +* columns is returned in KB. +* +* Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0 +* +* OFFSET (input) INTEGER +* The number of rows of A that have been factorized in +* previous steps. +* +* NB (input) INTEGER +* The number of columns to factorize. +* +* KB (output) INTEGER +* The number of columns actually factorized. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, block A(OFFSET+1:M,1:KB) is the triangular +* factor obtained and block A(1:OFFSET,1:N) has been +* accordingly pivoted, but no factorized. +* The rest of the matrix, block A(OFFSET+1:M,KB+1:N) has +* been updated. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* JPVT (input/output) INTEGER array, dimension (N) +* JPVT(I) = K <==> Column K of the full matrix A has been +* permuted into position I in AP. +* +* TAU (output) DOUBLE PRECISION array, dimension (KB) +* The scalar factors of the elementary reflectors. +* +* VN1 (input/output) DOUBLE PRECISION array, dimension (N) +* The vector with the partial column norms. +* +* VN2 (input/output) DOUBLE PRECISION array, dimension (N) +* The vector with the exact column norms. +* +* AUXV (input/output) DOUBLE PRECISION array, dimension (NB) +* Auxiliar vector. +* +* F (input/output) DOUBLE PRECISION array, dimension (LDF,NB) +* Matrix F' = L*Y'*A. +* +* LDF (input) INTEGER +* The leading dimension of the array F. LDF >= max(1,N). +* +* Further Details +* =============== +* +* Based on contributions by +* G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +* X. Sun, Computer Science Dept., Duke University, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER ITEMP, J, K, LASTRK, LSTICC, PVT, RK + DOUBLE PRECISION AKK, TEMP, TEMP2 +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DGEMV, DLARFG, DSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, MAX, MIN, NINT, SQRT +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DNRM2 + EXTERNAL IDAMAX, DNRM2 +* .. +* .. Executable Statements .. +* + LASTRK = MIN( M, N+OFFSET ) + LSTICC = 0 + K = 0 +* +* Beginning of while loop. +* + 10 CONTINUE + IF( ( K.LT.NB ) .AND. ( LSTICC.EQ.0 ) ) THEN + K = K + 1 + RK = OFFSET + K +* +* Determine ith pivot column and swap if necessary +* + PVT = ( K-1 ) + IDAMAX( N-K+1, VN1( K ), 1 ) + IF( PVT.NE.K ) THEN + CALL DSWAP( M, A( 1, PVT ), 1, A( 1, K ), 1 ) + CALL DSWAP( K-1, F( PVT, 1 ), LDF, F( K, 1 ), LDF ) + ITEMP = JPVT( PVT ) + JPVT( PVT ) = JPVT( K ) + JPVT( K ) = ITEMP + VN1( PVT ) = VN1( K ) + VN2( PVT ) = VN2( K ) + END IF +* +* Apply previous Householder reflectors to column K: +* A(RK:M,K) := A(RK:M,K) - A(RK:M,1:K-1)*F(K,1:K-1)'. +* + IF( K.GT.1 ) THEN + CALL DGEMV( 'No transpose', M-RK+1, K-1, -ONE, A( RK, 1 ), + $ LDA, F( K, 1 ), LDF, ONE, A( RK, K ), 1 ) + END IF +* +* Generate elementary reflector H(k). +* + IF( RK.LT.M ) THEN + CALL DLARFG( M-RK+1, A( RK, K ), A( RK+1, K ), 1, TAU( K ) ) + ELSE + CALL DLARFG( 1, A( RK, K ), A( RK, K ), 1, TAU( K ) ) + END IF +* + AKK = A( RK, K ) + A( RK, K ) = ONE +* +* Compute Kth column of F: +* +* Compute F(K+1:N,K) := tau(K)*A(RK:M,K+1:N)'*A(RK:M,K). +* + IF( K.LT.N ) THEN + CALL DGEMV( 'Transpose', M-RK+1, N-K, TAU( K ), + $ A( RK, K+1 ), LDA, A( RK, K ), 1, ZERO, + $ F( K+1, K ), 1 ) + END IF +* +* Padding F(1:K,K) with zeros. +* + DO 20 J = 1, K + F( J, K ) = ZERO + 20 CONTINUE +* +* Incremental updating of F: +* F(1:N,K) := F(1:N,K) - tau(K)*F(1:N,1:K-1)*A(RK:M,1:K-1)' +* *A(RK:M,K). +* + IF( K.GT.1 ) THEN + CALL DGEMV( 'Transpose', M-RK+1, K-1, -TAU( K ), A( RK, 1 ), + $ LDA, A( RK, K ), 1, ZERO, AUXV( 1 ), 1 ) +* + CALL DGEMV( 'No transpose', N, K-1, ONE, F( 1, 1 ), LDF, + $ AUXV( 1 ), 1, ONE, F( 1, K ), 1 ) + END IF +* +* Update the current row of A: +* A(RK,K+1:N) := A(RK,K+1:N) - A(RK,1:K)*F(K+1:N,1:K)'. +* + IF( K.LT.N ) THEN + CALL DGEMV( 'No transpose', N-K, K, -ONE, F( K+1, 1 ), LDF, + $ A( RK, 1 ), LDA, ONE, A( RK, K+1 ), LDA ) + END IF +* +* Update partial column norms. +* + IF( RK.LT.LASTRK ) THEN + DO 30 J = K + 1, N + IF( VN1( J ).NE.ZERO ) THEN + TEMP = ABS( A( RK, J ) ) / VN1( J ) + TEMP = MAX( ZERO, ( ONE+TEMP )*( ONE-TEMP ) ) + TEMP2 = ONE + 0.05D0*TEMP*( VN1( J ) / VN2( J ) )**2 + IF( TEMP2.EQ.ONE ) THEN + VN2( J ) = DBLE( LSTICC ) + LSTICC = J + ELSE + VN1( J ) = VN1( J )*SQRT( TEMP ) + END IF + END IF + 30 CONTINUE + END IF +* + A( RK, K ) = AKK +* +* End of while loop. +* + GO TO 10 + END IF + KB = K + RK = OFFSET + KB +* +* Apply the block reflector to the rest of the matrix: +* A(OFFSET+KB+1:M,KB+1:N) := A(OFFSET+KB+1:M,KB+1:N) - +* A(OFFSET+KB+1:M,1:KB)*F(KB+1:N,1:KB)'. +* + IF( KB.LT.MIN( N, M-OFFSET ) ) THEN + CALL DGEMM( 'No transpose', 'Transpose', M-RK, N-KB, KB, -ONE, + $ A( RK+1, 1 ), LDA, F( KB+1, 1 ), LDF, ONE, + $ A( RK+1, KB+1 ), LDA ) + END IF +* +* Recomputation of difficult columns. +* + 40 CONTINUE + IF( LSTICC.GT.0 ) THEN + ITEMP = NINT( VN2( LSTICC ) ) + VN1( LSTICC ) = DNRM2( M-RK, A( RK+1, LSTICC ), 1 ) + VN2( LSTICC ) = VN1( LSTICC ) + LSTICC = ITEMP + GO TO 40 + END IF +* + RETURN +* +* End of DLAQPS +* + END diff --git a/costa/native/external/lapack/dlaqsb.f b/costa/native/external/lapack/dlaqsb.f new file mode 100644 index 000000000..83aaf20ee --- /dev/null +++ b/costa/native/external/lapack/dlaqsb.f @@ -0,0 +1,149 @@ + SUBROUTINE DLAQSB( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER EQUED, UPLO + INTEGER KD, LDAB, N + DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ), S( * ) +* .. +* +* Purpose +* ======= +* +* DLAQSB equilibrates a symmetric band matrix A using the scaling +* factors in the vector S. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is stored. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of super-diagonals of the matrix A if UPLO = 'U', +* or the number of sub-diagonals if UPLO = 'L'. KD >= 0. +* +* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) +* On entry, the upper or lower triangle of the symmetric band +* matrix A, stored in the first KD+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* On exit, if INFO = 0, the triangular factor U or L from the +* Cholesky factorization A = U'*U or A = L*L' of the band +* matrix A, in the same storage format as A. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* S (output) DOUBLE PRECISION array, dimension (N) +* The scale factors for A. +* +* SCOND (input) DOUBLE PRECISION +* Ratio of the smallest S(i) to the largest S(i). +* +* AMAX (input) DOUBLE PRECISION +* Absolute value of largest matrix entry. +* +* EQUED (output) CHARACTER*1 +* Specifies whether or not equilibration was done. +* = 'N': No equilibration. +* = 'Y': Equilibration was done, i.e., A has been replaced by +* diag(S) * A * diag(S). +* +* Internal Parameters +* =================== +* +* THRESH is a threshold value used to decide if scaling should be done +* based on the ratio of the scaling factors. If SCOND < THRESH, +* scaling is done. +* +* LARGE and SMALL are threshold values used to decide if scaling should +* be done based on the absolute size of the largest matrix element. +* If AMAX > LARGE or AMAX < SMALL, scaling is done. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, THRESH + PARAMETER ( ONE = 1.0D+0, THRESH = 0.1D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION CJ, LARGE, SMALL +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN +* +* No equilibration +* + EQUED = 'N' + ELSE +* +* Replace A by diag(S) * A * diag(S). +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Upper triangle of A is stored in band format. +* + DO 20 J = 1, N + CJ = S( J ) + DO 10 I = MAX( 1, J-KD ), J + AB( KD+1+I-J, J ) = CJ*S( I )*AB( KD+1+I-J, J ) + 10 CONTINUE + 20 CONTINUE + ELSE +* +* Lower triangle of A is stored. +* + DO 40 J = 1, N + CJ = S( J ) + DO 30 I = J, MIN( N, J+KD ) + AB( 1+I-J, J ) = CJ*S( I )*AB( 1+I-J, J ) + 30 CONTINUE + 40 CONTINUE + END IF + EQUED = 'Y' + END IF +* + RETURN +* +* End of DLAQSB +* + END diff --git a/costa/native/external/lapack/dlaqsp.f b/costa/native/external/lapack/dlaqsp.f new file mode 100644 index 000000000..3c6db870c --- /dev/null +++ b/costa/native/external/lapack/dlaqsp.f @@ -0,0 +1,141 @@ + SUBROUTINE DLAQSP( UPLO, N, AP, S, SCOND, AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER EQUED, UPLO + INTEGER N + DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION AP( * ), S( * ) +* .. +* +* Purpose +* ======= +* +* DLAQSP equilibrates a symmetric matrix A using the scaling factors +* in the vector S. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is stored. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the symmetric matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* +* On exit, the equilibrated matrix: diag(S) * A * diag(S), in +* the same storage format as A. +* +* S (input) DOUBLE PRECISION array, dimension (N) +* The scale factors for A. +* +* SCOND (input) DOUBLE PRECISION +* Ratio of the smallest S(i) to the largest S(i). +* +* AMAX (input) DOUBLE PRECISION +* Absolute value of largest matrix entry. +* +* EQUED (output) CHARACTER*1 +* Specifies whether or not equilibration was done. +* = 'N': No equilibration. +* = 'Y': Equilibration was done, i.e., A has been replaced by +* diag(S) * A * diag(S). +* +* Internal Parameters +* =================== +* +* THRESH is a threshold value used to decide if scaling should be done +* based on the ratio of the scaling factors. If SCOND < THRESH, +* scaling is done. +* +* LARGE and SMALL are threshold values used to decide if scaling should +* be done based on the absolute size of the largest matrix element. +* If AMAX > LARGE or AMAX < SMALL, scaling is done. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, THRESH + PARAMETER ( ONE = 1.0D+0, THRESH = 0.1D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, JC + DOUBLE PRECISION CJ, LARGE, SMALL +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN +* +* No equilibration +* + EQUED = 'N' + ELSE +* +* Replace A by diag(S) * A * diag(S). +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Upper triangle of A is stored. +* + JC = 1 + DO 20 J = 1, N + CJ = S( J ) + DO 10 I = 1, J + AP( JC+I-1 ) = CJ*S( I )*AP( JC+I-1 ) + 10 CONTINUE + JC = JC + J + 20 CONTINUE + ELSE +* +* Lower triangle of A is stored. +* + JC = 1 + DO 40 J = 1, N + CJ = S( J ) + DO 30 I = J, N + AP( JC+I-J ) = CJ*S( I )*AP( JC+I-J ) + 30 CONTINUE + JC = JC + N - J + 1 + 40 CONTINUE + END IF + EQUED = 'Y' + END IF +* + RETURN +* +* End of DLAQSP +* + END diff --git a/costa/native/external/lapack/dlaqsy.f b/costa/native/external/lapack/dlaqsy.f new file mode 100644 index 000000000..b285f4bc4 --- /dev/null +++ b/costa/native/external/lapack/dlaqsy.f @@ -0,0 +1,142 @@ + SUBROUTINE DLAQSY( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER EQUED, UPLO + INTEGER LDA, N + DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), S( * ) +* .. +* +* Purpose +* ======= +* +* DLAQSY equilibrates a symmetric matrix A using the scaling factors +* in the vector S. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is stored. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the symmetric matrix A. If UPLO = 'U', the leading +* n by n upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n by n lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, if EQUED = 'Y', the equilibrated matrix: +* diag(S) * A * diag(S). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(N,1). +* +* S (input) DOUBLE PRECISION array, dimension (N) +* The scale factors for A. +* +* SCOND (input) DOUBLE PRECISION +* Ratio of the smallest S(i) to the largest S(i). +* +* AMAX (input) DOUBLE PRECISION +* Absolute value of largest matrix entry. +* +* EQUED (output) CHARACTER*1 +* Specifies whether or not equilibration was done. +* = 'N': No equilibration. +* = 'Y': Equilibration was done, i.e., A has been replaced by +* diag(S) * A * diag(S). +* +* Internal Parameters +* =================== +* +* THRESH is a threshold value used to decide if scaling should be done +* based on the ratio of the scaling factors. If SCOND < THRESH, +* scaling is done. +* +* LARGE and SMALL are threshold values used to decide if scaling should +* be done based on the absolute size of the largest matrix element. +* If AMAX > LARGE or AMAX < SMALL, scaling is done. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, THRESH + PARAMETER ( ONE = 1.0D+0, THRESH = 0.1D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION CJ, LARGE, SMALL +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN +* +* No equilibration +* + EQUED = 'N' + ELSE +* +* Replace A by diag(S) * A * diag(S). +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Upper triangle of A is stored. +* + DO 20 J = 1, N + CJ = S( J ) + DO 10 I = 1, J + A( I, J ) = CJ*S( I )*A( I, J ) + 10 CONTINUE + 20 CONTINUE + ELSE +* +* Lower triangle of A is stored. +* + DO 40 J = 1, N + CJ = S( J ) + DO 30 I = J, N + A( I, J ) = CJ*S( I )*A( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + EQUED = 'Y' + END IF +* + RETURN +* +* End of DLAQSY +* + END diff --git a/costa/native/external/lapack/dlaqtr.f b/costa/native/external/lapack/dlaqtr.f new file mode 100644 index 000000000..f1dbeb7db --- /dev/null +++ b/costa/native/external/lapack/dlaqtr.f @@ -0,0 +1,666 @@ + SUBROUTINE DLAQTR( LTRAN, LREAL, N, T, LDT, B, W, SCALE, X, WORK, + $ INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + LOGICAL LREAL, LTRAN + INTEGER INFO, LDT, N + DOUBLE PRECISION SCALE, W +* .. +* .. Array Arguments .. + DOUBLE PRECISION B( * ), T( LDT, * ), WORK( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* DLAQTR solves the real quasi-triangular system +* +* op(T)*p = scale*c, if LREAL = .TRUE. +* +* or the complex quasi-triangular systems +* +* op(T + iB)*(p+iq) = scale*(c+id), if LREAL = .FALSE. +* +* in real arithmetic, where T is upper quasi-triangular. +* If LREAL = .FALSE., then the first diagonal block of T must be +* 1 by 1, B is the specially structured matrix +* +* B = [ b(1) b(2) ... b(n) ] +* [ w ] +* [ w ] +* [ . ] +* [ w ] +* +* op(A) = A or A', A' denotes the conjugate transpose of +* matrix A. +* +* On input, X = [ c ]. On output, X = [ p ]. +* [ d ] [ q ] +* +* This subroutine is designed for the condition number estimation +* in routine DTRSNA. +* +* Arguments +* ========= +* +* LTRAN (input) LOGICAL +* On entry, LTRAN specifies the option of conjugate transpose: +* = .FALSE., op(T+i*B) = T+i*B, +* = .TRUE., op(T+i*B) = (T+i*B)'. +* +* LREAL (input) LOGICAL +* On entry, LREAL specifies the input matrix structure: +* = .FALSE., the input is complex +* = .TRUE., the input is real +* +* N (input) INTEGER +* On entry, N specifies the order of T+i*B. N >= 0. +* +* T (input) DOUBLE PRECISION array, dimension (LDT,N) +* On entry, T contains a matrix in Schur canonical form. +* If LREAL = .FALSE., then the first diagonal block of T mu +* be 1 by 1. +* +* LDT (input) INTEGER +* The leading dimension of the matrix T. LDT >= max(1,N). +* +* B (input) DOUBLE PRECISION array, dimension (N) +* On entry, B contains the elements to form the matrix +* B as described above. +* If LREAL = .TRUE., B is not referenced. +* +* W (input) DOUBLE PRECISION +* On entry, W is the diagonal element of the matrix B. +* If LREAL = .TRUE., W is not referenced. +* +* SCALE (output) DOUBLE PRECISION +* On exit, SCALE is the scale factor. +* +* X (input/output) DOUBLE PRECISION array, dimension (2*N) +* On entry, X contains the right hand side of the system. +* On exit, X is overwritten by the solution. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* On exit, INFO is set to +* 0: successful exit. +* 1: the some diagonal 1 by 1 block has been perturbed by +* a small number SMIN to keep nonsingularity. +* 2: the some diagonal 2 by 2 block has been perturbed by +* a small number in DLALN2 to keep nonsingularity. +* NOTE: In the interests of speed, this routine does not +* check the inputs for errors. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN + INTEGER I, IERR, J, J1, J2, JNEXT, K, N1, N2 + DOUBLE PRECISION BIGNUM, EPS, REC, SCALOC, SI, SMIN, SMINW, + $ SMLNUM, SR, TJJ, TMP, XJ, XMAX, XNORM, Z +* .. +* .. Local Arrays .. + DOUBLE PRECISION D( 2, 2 ), V( 2, 2 ) +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DASUM, DDOT, DLAMCH, DLANGE + EXTERNAL IDAMAX, DASUM, DDOT, DLAMCH, DLANGE +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DLADIV, DLALN2, DSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Do not test the input parameters for errors +* + NOTRAN = .NOT.LTRAN + INFO = 0 +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Set constants to control overflow +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) / EPS + BIGNUM = ONE / SMLNUM +* + XNORM = DLANGE( 'M', N, N, T, LDT, D ) + IF( .NOT.LREAL ) + $ XNORM = MAX( XNORM, ABS( W ), DLANGE( 'M', N, 1, B, N, D ) ) + SMIN = MAX( SMLNUM, EPS*XNORM ) +* +* Compute 1-norm of each column of strictly upper triangular +* part of T to control overflow in triangular solver. +* + WORK( 1 ) = ZERO + DO 10 J = 2, N + WORK( J ) = DASUM( J-1, T( 1, J ), 1 ) + 10 CONTINUE +* + IF( .NOT.LREAL ) THEN + DO 20 I = 2, N + WORK( I ) = WORK( I ) + ABS( B( I ) ) + 20 CONTINUE + END IF +* + N2 = 2*N + N1 = N + IF( .NOT.LREAL ) + $ N1 = N2 + K = IDAMAX( N1, X, 1 ) + XMAX = ABS( X( K ) ) + SCALE = ONE +* + IF( XMAX.GT.BIGNUM ) THEN + SCALE = BIGNUM / XMAX + CALL DSCAL( N1, SCALE, X, 1 ) + XMAX = BIGNUM + END IF +* + IF( LREAL ) THEN +* + IF( NOTRAN ) THEN +* +* Solve T*p = scale*c +* + JNEXT = N + DO 30 J = N, 1, -1 + IF( J.GT.JNEXT ) + $ GO TO 30 + J1 = J + J2 = J + JNEXT = J - 1 + IF( J.GT.1 ) THEN + IF( T( J, J-1 ).NE.ZERO ) THEN + J1 = J - 1 + JNEXT = J - 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* Meet 1 by 1 diagonal block +* +* Scale to avoid overflow when computing +* x(j) = b(j)/T(j,j) +* + XJ = ABS( X( J1 ) ) + TJJ = ABS( T( J1, J1 ) ) + TMP = T( J1, J1 ) + IF( TJJ.LT.SMIN ) THEN + TMP = SMIN + TJJ = SMIN + INFO = 1 + END IF +* + IF( XJ.EQ.ZERO ) + $ GO TO 30 +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.BIGNUM*TJJ ) THEN + REC = ONE / XJ + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J1 ) = X( J1 ) / TMP + XJ = ABS( X( J1 ) ) +* +* Scale x if necessary to avoid overflow when adding a +* multiple of column j1 of T. +* + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( WORK( J1 ).GT.( BIGNUM-XMAX )*REC ) THEN + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + END IF + END IF + IF( J1.GT.1 ) THEN + CALL DAXPY( J1-1, -X( J1 ), T( 1, J1 ), 1, X, 1 ) + K = IDAMAX( J1-1, X, 1 ) + XMAX = ABS( X( K ) ) + END IF +* + ELSE +* +* Meet 2 by 2 diagonal block +* +* Call 2 by 2 linear system solve, to take +* care of possible overflow by scaling factor. +* + D( 1, 1 ) = X( J1 ) + D( 2, 1 ) = X( J2 ) + CALL DLALN2( .FALSE., 2, 1, SMIN, ONE, T( J1, J1 ), + $ LDT, ONE, ONE, D, 2, ZERO, ZERO, V, 2, + $ SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 2 +* + IF( SCALOC.NE.ONE ) THEN + CALL DSCAL( N, SCALOC, X, 1 ) + SCALE = SCALE*SCALOC + END IF + X( J1 ) = V( 1, 1 ) + X( J2 ) = V( 2, 1 ) +* +* Scale V(1,1) (= X(J1)) and/or V(2,1) (=X(J2)) +* to avoid overflow in updating right-hand side. +* + XJ = MAX( ABS( V( 1, 1 ) ), ABS( V( 2, 1 ) ) ) + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( MAX( WORK( J1 ), WORK( J2 ) ).GT. + $ ( BIGNUM-XMAX )*REC ) THEN + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + END IF + END IF +* +* Update right-hand side +* + IF( J1.GT.1 ) THEN + CALL DAXPY( J1-1, -X( J1 ), T( 1, J1 ), 1, X, 1 ) + CALL DAXPY( J1-1, -X( J2 ), T( 1, J2 ), 1, X, 1 ) + K = IDAMAX( J1-1, X, 1 ) + XMAX = ABS( X( K ) ) + END IF +* + END IF +* + 30 CONTINUE +* + ELSE +* +* Solve T'*p = scale*c +* + JNEXT = 1 + DO 40 J = 1, N + IF( J.LT.JNEXT ) + $ GO TO 40 + J1 = J + J2 = J + JNEXT = J + 1 + IF( J.LT.N ) THEN + IF( T( J+1, J ).NE.ZERO ) THEN + J2 = J + 1 + JNEXT = J + 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* 1 by 1 diagonal block +* +* Scale if necessary to avoid overflow in forming the +* right-hand side element by inner product. +* + XJ = ABS( X( J1 ) ) + IF( XMAX.GT.ONE ) THEN + REC = ONE / XMAX + IF( WORK( J1 ).GT.( BIGNUM-XJ )*REC ) THEN + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + X( J1 ) = X( J1 ) - DDOT( J1-1, T( 1, J1 ), 1, X, 1 ) +* + XJ = ABS( X( J1 ) ) + TJJ = ABS( T( J1, J1 ) ) + TMP = T( J1, J1 ) + IF( TJJ.LT.SMIN ) THEN + TMP = SMIN + TJJ = SMIN + INFO = 1 + END IF +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.BIGNUM*TJJ ) THEN + REC = ONE / XJ + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J1 ) = X( J1 ) / TMP + XMAX = MAX( XMAX, ABS( X( J1 ) ) ) +* + ELSE +* +* 2 by 2 diagonal block +* +* Scale if necessary to avoid overflow in forming the +* right-hand side elements by inner product. +* + XJ = MAX( ABS( X( J1 ) ), ABS( X( J2 ) ) ) + IF( XMAX.GT.ONE ) THEN + REC = ONE / XMAX + IF( MAX( WORK( J2 ), WORK( J1 ) ).GT.( BIGNUM-XJ )* + $ REC ) THEN + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + D( 1, 1 ) = X( J1 ) - DDOT( J1-1, T( 1, J1 ), 1, X, + $ 1 ) + D( 2, 1 ) = X( J2 ) - DDOT( J1-1, T( 1, J2 ), 1, X, + $ 1 ) +* + CALL DLALN2( .TRUE., 2, 1, SMIN, ONE, T( J1, J1 ), + $ LDT, ONE, ONE, D, 2, ZERO, ZERO, V, 2, + $ SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 2 +* + IF( SCALOC.NE.ONE ) THEN + CALL DSCAL( N, SCALOC, X, 1 ) + SCALE = SCALE*SCALOC + END IF + X( J1 ) = V( 1, 1 ) + X( J2 ) = V( 2, 1 ) + XMAX = MAX( ABS( X( J1 ) ), ABS( X( J2 ) ), XMAX ) +* + END IF + 40 CONTINUE + END IF +* + ELSE +* + SMINW = MAX( EPS*ABS( W ), SMIN ) + IF( NOTRAN ) THEN +* +* Solve (T + iB)*(p+iq) = c+id +* + JNEXT = N + DO 70 J = N, 1, -1 + IF( J.GT.JNEXT ) + $ GO TO 70 + J1 = J + J2 = J + JNEXT = J - 1 + IF( J.GT.1 ) THEN + IF( T( J, J-1 ).NE.ZERO ) THEN + J1 = J - 1 + JNEXT = J - 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* 1 by 1 diagonal block +* +* Scale if necessary to avoid overflow in division +* + Z = W + IF( J1.EQ.1 ) + $ Z = B( 1 ) + XJ = ABS( X( J1 ) ) + ABS( X( N+J1 ) ) + TJJ = ABS( T( J1, J1 ) ) + ABS( Z ) + TMP = T( J1, J1 ) + IF( TJJ.LT.SMINW ) THEN + TMP = SMINW + TJJ = SMINW + INFO = 1 + END IF +* + IF( XJ.EQ.ZERO ) + $ GO TO 70 +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.BIGNUM*TJJ ) THEN + REC = ONE / XJ + CALL DSCAL( N2, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + CALL DLADIV( X( J1 ), X( N+J1 ), TMP, Z, SR, SI ) + X( J1 ) = SR + X( N+J1 ) = SI + XJ = ABS( X( J1 ) ) + ABS( X( N+J1 ) ) +* +* Scale x if necessary to avoid overflow when adding a +* multiple of column j1 of T. +* + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( WORK( J1 ).GT.( BIGNUM-XMAX )*REC ) THEN + CALL DSCAL( N2, REC, X, 1 ) + SCALE = SCALE*REC + END IF + END IF +* + IF( J1.GT.1 ) THEN + CALL DAXPY( J1-1, -X( J1 ), T( 1, J1 ), 1, X, 1 ) + CALL DAXPY( J1-1, -X( N+J1 ), T( 1, J1 ), 1, + $ X( N+1 ), 1 ) +* + X( 1 ) = X( 1 ) + B( J1 )*X( N+J1 ) + X( N+1 ) = X( N+1 ) - B( J1 )*X( J1 ) +* + XMAX = ZERO + DO 50 K = 1, J1 - 1 + XMAX = MAX( XMAX, ABS( X( K ) )+ + $ ABS( X( K+N ) ) ) + 50 CONTINUE + END IF +* + ELSE +* +* Meet 2 by 2 diagonal block +* + D( 1, 1 ) = X( J1 ) + D( 2, 1 ) = X( J2 ) + D( 1, 2 ) = X( N+J1 ) + D( 2, 2 ) = X( N+J2 ) + CALL DLALN2( .FALSE., 2, 2, SMINW, ONE, T( J1, J1 ), + $ LDT, ONE, ONE, D, 2, ZERO, -W, V, 2, + $ SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 2 +* + IF( SCALOC.NE.ONE ) THEN + CALL DSCAL( 2*N, SCALOC, X, 1 ) + SCALE = SCALOC*SCALE + END IF + X( J1 ) = V( 1, 1 ) + X( J2 ) = V( 2, 1 ) + X( N+J1 ) = V( 1, 2 ) + X( N+J2 ) = V( 2, 2 ) +* +* Scale X(J1), .... to avoid overflow in +* updating right hand side. +* + XJ = MAX( ABS( V( 1, 1 ) )+ABS( V( 1, 2 ) ), + $ ABS( V( 2, 1 ) )+ABS( V( 2, 2 ) ) ) + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( MAX( WORK( J1 ), WORK( J2 ) ).GT. + $ ( BIGNUM-XMAX )*REC ) THEN + CALL DSCAL( N2, REC, X, 1 ) + SCALE = SCALE*REC + END IF + END IF +* +* Update the right-hand side. +* + IF( J1.GT.1 ) THEN + CALL DAXPY( J1-1, -X( J1 ), T( 1, J1 ), 1, X, 1 ) + CALL DAXPY( J1-1, -X( J2 ), T( 1, J2 ), 1, X, 1 ) +* + CALL DAXPY( J1-1, -X( N+J1 ), T( 1, J1 ), 1, + $ X( N+1 ), 1 ) + CALL DAXPY( J1-1, -X( N+J2 ), T( 1, J2 ), 1, + $ X( N+1 ), 1 ) +* + X( 1 ) = X( 1 ) + B( J1 )*X( N+J1 ) + + $ B( J2 )*X( N+J2 ) + X( N+1 ) = X( N+1 ) - B( J1 )*X( J1 ) - + $ B( J2 )*X( J2 ) +* + XMAX = ZERO + DO 60 K = 1, J1 - 1 + XMAX = MAX( ABS( X( K ) )+ABS( X( K+N ) ), + $ XMAX ) + 60 CONTINUE + END IF +* + END IF + 70 CONTINUE +* + ELSE +* +* Solve (T + iB)'*(p+iq) = c+id +* + JNEXT = 1 + DO 80 J = 1, N + IF( J.LT.JNEXT ) + $ GO TO 80 + J1 = J + J2 = J + JNEXT = J + 1 + IF( J.LT.N ) THEN + IF( T( J+1, J ).NE.ZERO ) THEN + J2 = J + 1 + JNEXT = J + 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* 1 by 1 diagonal block +* +* Scale if necessary to avoid overflow in forming the +* right-hand side element by inner product. +* + XJ = ABS( X( J1 ) ) + ABS( X( J1+N ) ) + IF( XMAX.GT.ONE ) THEN + REC = ONE / XMAX + IF( WORK( J1 ).GT.( BIGNUM-XJ )*REC ) THEN + CALL DSCAL( N2, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + X( J1 ) = X( J1 ) - DDOT( J1-1, T( 1, J1 ), 1, X, 1 ) + X( N+J1 ) = X( N+J1 ) - DDOT( J1-1, T( 1, J1 ), 1, + $ X( N+1 ), 1 ) + IF( J1.GT.1 ) THEN + X( J1 ) = X( J1 ) - B( J1 )*X( N+1 ) + X( N+J1 ) = X( N+J1 ) + B( J1 )*X( 1 ) + END IF + XJ = ABS( X( J1 ) ) + ABS( X( J1+N ) ) +* + Z = W + IF( J1.EQ.1 ) + $ Z = B( 1 ) +* +* Scale if necessary to avoid overflow in +* complex division +* + TJJ = ABS( T( J1, J1 ) ) + ABS( Z ) + TMP = T( J1, J1 ) + IF( TJJ.LT.SMINW ) THEN + TMP = SMINW + TJJ = SMINW + INFO = 1 + END IF +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.BIGNUM*TJJ ) THEN + REC = ONE / XJ + CALL DSCAL( N2, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + CALL DLADIV( X( J1 ), X( N+J1 ), TMP, -Z, SR, SI ) + X( J1 ) = SR + X( J1+N ) = SI + XMAX = MAX( ABS( X( J1 ) )+ABS( X( J1+N ) ), XMAX ) +* + ELSE +* +* 2 by 2 diagonal block +* +* Scale if necessary to avoid overflow in forming the +* right-hand side element by inner product. +* + XJ = MAX( ABS( X( J1 ) )+ABS( X( N+J1 ) ), + $ ABS( X( J2 ) )+ABS( X( N+J2 ) ) ) + IF( XMAX.GT.ONE ) THEN + REC = ONE / XMAX + IF( MAX( WORK( J1 ), WORK( J2 ) ).GT. + $ ( BIGNUM-XJ ) / XMAX ) THEN + CALL DSCAL( N2, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + D( 1, 1 ) = X( J1 ) - DDOT( J1-1, T( 1, J1 ), 1, X, + $ 1 ) + D( 2, 1 ) = X( J2 ) - DDOT( J1-1, T( 1, J2 ), 1, X, + $ 1 ) + D( 1, 2 ) = X( N+J1 ) - DDOT( J1-1, T( 1, J1 ), 1, + $ X( N+1 ), 1 ) + D( 2, 2 ) = X( N+J2 ) - DDOT( J1-1, T( 1, J2 ), 1, + $ X( N+1 ), 1 ) + D( 1, 1 ) = D( 1, 1 ) - B( J1 )*X( N+1 ) + D( 2, 1 ) = D( 2, 1 ) - B( J2 )*X( N+1 ) + D( 1, 2 ) = D( 1, 2 ) + B( J1 )*X( 1 ) + D( 2, 2 ) = D( 2, 2 ) + B( J2 )*X( 1 ) +* + CALL DLALN2( .TRUE., 2, 2, SMINW, ONE, T( J1, J1 ), + $ LDT, ONE, ONE, D, 2, ZERO, W, V, 2, + $ SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 2 +* + IF( SCALOC.NE.ONE ) THEN + CALL DSCAL( N2, SCALOC, X, 1 ) + SCALE = SCALOC*SCALE + END IF + X( J1 ) = V( 1, 1 ) + X( J2 ) = V( 2, 1 ) + X( N+J1 ) = V( 1, 2 ) + X( N+J2 ) = V( 2, 2 ) + XMAX = MAX( ABS( X( J1 ) )+ABS( X( N+J1 ) ), + $ ABS( X( J2 ) )+ABS( X( N+J2 ) ), XMAX ) +* + END IF +* + 80 CONTINUE +* + END IF +* + END IF +* + RETURN +* +* End of DLAQTR +* + END diff --git a/costa/native/external/lapack/dlar1v.f b/costa/native/external/lapack/dlar1v.f new file mode 100644 index 000000000..0e93ea5ea --- /dev/null +++ b/costa/native/external/lapack/dlar1v.f @@ -0,0 +1,323 @@ + SUBROUTINE DLAR1V( N, B1, BN, SIGMA, D, L, LD, LLD, GERSCH, Z, + $ ZTZ, MINGMA, R, ISUPPZ, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER B1, BN, N, R + DOUBLE PRECISION MINGMA, SIGMA, ZTZ +* .. +* .. Array Arguments .. + INTEGER ISUPPZ( * ) + DOUBLE PRECISION D( * ), GERSCH( * ), L( * ), LD( * ), LLD( * ), + $ WORK( * ), Z( * ) +* .. +* +* Purpose +* ======= +* +* DLAR1V computes the (scaled) r-th column of the inverse of +* the sumbmatrix in rows B1 through BN of the tridiagonal matrix +* L D L^T - sigma I. The following steps accomplish this computation : +* (a) Stationary qd transform, L D L^T - sigma I = L(+) D(+) L(+)^T, +* (b) Progressive qd transform, L D L^T - sigma I = U(-) D(-) U(-)^T, +* (c) Computation of the diagonal elements of the inverse of +* L D L^T - sigma I by combining the above transforms, and choosing +* r as the index where the diagonal of the inverse is (one of the) +* largest in magnitude. +* (d) Computation of the (scaled) r-th column of the inverse using the +* twisted factorization obtained by combining the top part of the +* the stationary and the bottom part of the progressive transform. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix L D L^T. +* +* B1 (input) INTEGER +* First index of the submatrix of L D L^T. +* +* BN (input) INTEGER +* Last index of the submatrix of L D L^T. +* +* SIGMA (input) DOUBLE PRECISION +* The shift. Initially, when R = 0, SIGMA should be a good +* approximation to an eigenvalue of L D L^T. +* +* L (input) DOUBLE PRECISION array, dimension (N-1) +* The (n-1) subdiagonal elements of the unit bidiagonal matrix +* L, in elements 1 to N-1. +* +* D (input) DOUBLE PRECISION array, dimension (N) +* The n diagonal elements of the diagonal matrix D. +* +* LD (input) DOUBLE PRECISION array, dimension (N-1) +* The n-1 elements L(i)*D(i). +* +* LLD (input) DOUBLE PRECISION array, dimension (N-1) +* The n-1 elements L(i)*L(i)*D(i). +* +* GERSCH (input) DOUBLE PRECISION array, dimension (2*N) +* The n Gerschgorin intervals. These are used to restrict +* the initial search for R, when R is input as 0. +* +* Z (output) DOUBLE PRECISION array, dimension (N) +* The (scaled) r-th column of the inverse. Z(R) is returned +* to be 1. +* +* ZTZ (output) DOUBLE PRECISION +* The square of the norm of Z. +* +* MINGMA (output) DOUBLE PRECISION +* The reciprocal of the largest (in magnitude) diagonal +* element of the inverse of L D L^T - sigma I. +* +* R (input/output) INTEGER +* Initially, R should be input to be 0 and is then output as +* the index where the diagonal element of the inverse is +* largest in magnitude. In later iterations, this same value +* of R should be input. +* +* ISUPPZ (output) INTEGER array, dimension (2) +* The support of the vector in Z, i.e., the vector Z is +* nonzero only in elements ISUPPZ(1) through ISUPPZ( 2 ). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) +* +* Further Details +* =============== +* +* Based on contributions by +* Inderjit Dhillon, IBM Almaden, USA +* Osni Marques, LBNL/NERSC, USA +* +* ===================================================================== +* +* .. Parameters .. + INTEGER BLKSIZ + PARAMETER ( BLKSIZ = 32 ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL SAWNAN + INTEGER FROM, I, INDP, INDS, INDUMN, J, R1, R2, TO + DOUBLE PRECISION DMINUS, DPLUS, EPS, S, TMP +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + EPS = DLAMCH( 'Precision' ) + IF( R.EQ.0 ) THEN +* +* Eliminate the top and bottom indices from the possible values +* of R where the desired eigenvector is largest in magnitude. +* + R1 = B1 + DO 10 I = B1, BN + IF( SIGMA.GE.GERSCH( 2*I-1 ) .OR. SIGMA.LE.GERSCH( 2*I ) ) + $ THEN + R1 = I + GO TO 20 + END IF + 10 CONTINUE + 20 CONTINUE + R2 = BN + DO 30 I = BN, B1, -1 + IF( SIGMA.GE.GERSCH( 2*I-1 ) .OR. SIGMA.LE.GERSCH( 2*I ) ) + $ THEN + R2 = I + GO TO 40 + END IF + 30 CONTINUE + 40 CONTINUE + ELSE + R1 = R + R2 = R + END IF +* + INDUMN = N + INDS = 2*N + 1 + INDP = 3*N + 1 + SAWNAN = .FALSE. +* +* Compute the stationary transform (using the differential form) +* untill the index R2 +* + IF( B1.EQ.1 ) THEN + WORK( INDS ) = ZERO + ELSE + WORK( INDS ) = LLD( B1-1 ) + END IF + S = WORK( INDS ) - SIGMA + DO 50 I = B1, R2 - 1 + DPLUS = D( I ) + S + WORK( I ) = LD( I ) / DPLUS + WORK( INDS+I ) = S*WORK( I )*L( I ) + S = WORK( INDS+I ) - SIGMA + 50 CONTINUE +* + IF( .NOT.( S.GT.ZERO .OR. S.LT.ONE ) ) THEN +* +* Run a slower version of the above loop if a NaN is detected +* + SAWNAN = .TRUE. + J = B1 + 1 + 60 CONTINUE + IF( WORK( INDS+J ).GT.ZERO .OR. WORK( INDS+J ).LT.ONE ) THEN + J = J + 1 + GO TO 60 + END IF + WORK( INDS+J ) = LLD( J ) + S = WORK( INDS+J ) - SIGMA + DO 70 I = J + 1, R2 - 1 + DPLUS = D( I ) + S + WORK( I ) = LD( I ) / DPLUS + IF( WORK( I ).EQ.ZERO ) THEN + WORK( INDS+I ) = LLD( I ) + ELSE + WORK( INDS+I ) = S*WORK( I )*L( I ) + END IF + S = WORK( INDS+I ) - SIGMA + 70 CONTINUE + END IF + WORK( INDP+BN-1 ) = D( BN ) - SIGMA + DO 80 I = BN - 1, R1, -1 + DMINUS = LLD( I ) + WORK( INDP+I ) + TMP = D( I ) / DMINUS + WORK( INDUMN+I ) = L( I )*TMP + WORK( INDP+I-1 ) = WORK( INDP+I )*TMP - SIGMA + 80 CONTINUE + TMP = WORK( INDP+R1-1 ) + IF( .NOT.( TMP.GT.ZERO .OR. TMP.LT.ONE ) ) THEN +* +* Run a slower version of the above loop if a NaN is detected +* + SAWNAN = .TRUE. + J = BN - 3 + 90 CONTINUE + IF( WORK( INDP+J ).GT.ZERO .OR. WORK( INDP+J ).LT.ONE ) THEN + J = J - 1 + GO TO 90 + END IF + WORK( INDP+J ) = D( J+1 ) - SIGMA + DO 100 I = J, R1, -1 + DMINUS = LLD( I ) + WORK( INDP+I ) + TMP = D( I ) / DMINUS + WORK( INDUMN+I ) = L( I )*TMP + IF( TMP.EQ.ZERO ) THEN + WORK( INDP+I-1 ) = D( I ) - SIGMA + ELSE + WORK( INDP+I-1 ) = WORK( INDP+I )*TMP - SIGMA + END IF + 100 CONTINUE + END IF +* +* Find the index (from R1 to R2) of the largest (in magnitude) +* diagonal element of the inverse +* + MINGMA = WORK( INDS+R1-1 ) + WORK( INDP+R1-1 ) + IF( MINGMA.EQ.ZERO ) + $ MINGMA = EPS*WORK( INDS+R1-1 ) + R = R1 + DO 110 I = R1, R2 - 1 + TMP = WORK( INDS+I ) + WORK( INDP+I ) + IF( TMP.EQ.ZERO ) + $ TMP = EPS*WORK( INDS+I ) + IF( ABS( TMP ).LT.ABS( MINGMA ) ) THEN + MINGMA = TMP + R = I + 1 + END IF + 110 CONTINUE +* +* Compute the (scaled) r-th column of the inverse +* + ISUPPZ( 1 ) = B1 + ISUPPZ( 2 ) = BN + Z( R ) = ONE + ZTZ = ONE + IF( .NOT.SAWNAN ) THEN + FROM = R - 1 + TO = MAX( R-BLKSIZ, B1 ) + 120 CONTINUE + IF( FROM.GE.B1 ) THEN + DO 130 I = FROM, TO, -1 + Z( I ) = -( WORK( I )*Z( I+1 ) ) + ZTZ = ZTZ + Z( I )*Z( I ) + 130 CONTINUE + IF( ABS( Z( TO ) ).LE.EPS .AND. ABS( Z( TO+1 ) ).LE.EPS ) + $ THEN + ISUPPZ( 1 ) = TO + 2 + ELSE + FROM = TO - 1 + TO = MAX( TO-BLKSIZ, B1 ) + GO TO 120 + END IF + END IF + FROM = R + 1 + TO = MIN( R+BLKSIZ, BN ) + 140 CONTINUE + IF( FROM.LE.BN ) THEN + DO 150 I = FROM, TO + Z( I ) = -( WORK( INDUMN+I-1 )*Z( I-1 ) ) + ZTZ = ZTZ + Z( I )*Z( I ) + 150 CONTINUE + IF( ABS( Z( TO ) ).LE.EPS .AND. ABS( Z( TO-1 ) ).LE.EPS ) + $ THEN + ISUPPZ( 2 ) = TO - 2 + ELSE + FROM = TO + 1 + TO = MIN( TO+BLKSIZ, BN ) + GO TO 140 + END IF + END IF + ELSE + DO 160 I = R - 1, B1, -1 + IF( Z( I+1 ).EQ.ZERO ) THEN + Z( I ) = -( LD( I+1 ) / LD( I ) )*Z( I+2 ) + ELSE IF( ABS( Z( I+1 ) ).LE.EPS .AND. ABS( Z( I+2 ) ).LE. + $ EPS ) THEN + ISUPPZ( 1 ) = I + 3 + GO TO 170 + ELSE + Z( I ) = -( WORK( I )*Z( I+1 ) ) + END IF + ZTZ = ZTZ + Z( I )*Z( I ) + 160 CONTINUE + 170 CONTINUE + DO 180 I = R, BN - 1 + IF( Z( I ).EQ.ZERO ) THEN + Z( I+1 ) = -( LD( I-1 ) / LD( I ) )*Z( I-1 ) + ELSE IF( ABS( Z( I ) ).LE.EPS .AND. ABS( Z( I-1 ) ).LE.EPS ) + $ THEN + ISUPPZ( 2 ) = I - 2 + GO TO 190 + ELSE + Z( I+1 ) = -( WORK( INDUMN+I )*Z( I ) ) + END IF + ZTZ = ZTZ + Z( I+1 )*Z( I+1 ) + 180 CONTINUE + 190 CONTINUE + END IF + DO 200 I = B1, ISUPPZ( 1 ) - 3 + Z( I ) = ZERO + 200 CONTINUE + DO 210 I = ISUPPZ( 2 ) + 3, BN + Z( I ) = ZERO + 210 CONTINUE +* + RETURN +* +* End of DLAR1V +* + END diff --git a/costa/native/external/lapack/dlar2v.f b/costa/native/external/lapack/dlar2v.f new file mode 100644 index 000000000..83176732e --- /dev/null +++ b/costa/native/external/lapack/dlar2v.f @@ -0,0 +1,87 @@ + SUBROUTINE DLAR2V( N, X, Y, Z, INCX, C, S, INCC ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + INTEGER INCC, INCX, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( * ), S( * ), X( * ), Y( * ), Z( * ) +* .. +* +* Purpose +* ======= +* +* DLAR2V applies a vector of real plane rotations from both sides to +* a sequence of 2-by-2 real symmetric matrices, defined by the elements +* of the vectors x, y and z. For i = 1,2,...,n +* +* ( x(i) z(i) ) := ( c(i) s(i) ) ( x(i) z(i) ) ( c(i) -s(i) ) +* ( z(i) y(i) ) ( -s(i) c(i) ) ( z(i) y(i) ) ( s(i) c(i) ) +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of plane rotations to be applied. +* +* X (input/output) DOUBLE PRECISION array, +* dimension (1+(N-1)*INCX) +* The vector x. +* +* Y (input/output) DOUBLE PRECISION array, +* dimension (1+(N-1)*INCX) +* The vector y. +* +* Z (input/output) DOUBLE PRECISION array, +* dimension (1+(N-1)*INCX) +* The vector z. +* +* INCX (input) INTEGER +* The increment between elements of X, Y and Z. INCX > 0. +* +* C (input) DOUBLE PRECISION array, dimension (1+(N-1)*INCC) +* The cosines of the plane rotations. +* +* S (input) DOUBLE PRECISION array, dimension (1+(N-1)*INCC) +* The sines of the plane rotations. +* +* INCC (input) INTEGER +* The increment between elements of C and S. INCC > 0. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IC, IX + DOUBLE PRECISION CI, SI, T1, T2, T3, T4, T5, T6, XI, YI, ZI +* .. +* .. Executable Statements .. +* + IX = 1 + IC = 1 + DO 10 I = 1, N + XI = X( IX ) + YI = Y( IX ) + ZI = Z( IX ) + CI = C( IC ) + SI = S( IC ) + T1 = SI*ZI + T2 = CI*ZI + T3 = T2 - SI*XI + T4 = T2 + SI*YI + T5 = CI*XI + T1 + T6 = CI*YI - T1 + X( IX ) = CI*T5 + SI*T4 + Y( IX ) = CI*T6 - SI*T3 + Z( IX ) = CI*T4 - SI*T5 + IX = IX + INCX + IC = IC + INCC + 10 CONTINUE +* +* End of DLAR2V +* + RETURN + END diff --git a/costa/native/external/lapack/dlarf.f b/costa/native/external/lapack/dlarf.f new file mode 100644 index 000000000..e81b6092b --- /dev/null +++ b/costa/native/external/lapack/dlarf.f @@ -0,0 +1,116 @@ + SUBROUTINE DLARF( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, LDC, M, N + DOUBLE PRECISION TAU +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( LDC, * ), V( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DLARF applies a real elementary reflector H to a real m by n matrix +* C, from either the left or the right. H is represented in the form +* +* H = I - tau * v * v' +* +* where tau is a real scalar and v is a real vector. +* +* If tau = 0, then H is taken to be the unit matrix. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': form H * C +* = 'R': form C * H +* +* M (input) INTEGER +* The number of rows of the matrix C. +* +* N (input) INTEGER +* The number of columns of the matrix C. +* +* V (input) DOUBLE PRECISION array, dimension +* (1 + (M-1)*abs(INCV)) if SIDE = 'L' +* or (1 + (N-1)*abs(INCV)) if SIDE = 'R' +* The vector v in the representation of H. V is not used if +* TAU = 0. +* +* INCV (input) INTEGER +* The increment between elements of v. INCV <> 0. +* +* TAU (input) DOUBLE PRECISION +* The value tau in the representation of H. +* +* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) +* On entry, the m by n matrix C. +* On exit, C is overwritten by the matrix H * C if SIDE = 'L', +* or C * H if SIDE = 'R'. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) DOUBLE PRECISION array, dimension +* (N) if SIDE = 'L' +* or (M) if SIDE = 'R' +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, DGER +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C +* + IF( TAU.NE.ZERO ) THEN +* +* w := C' * v +* + CALL DGEMV( 'Transpose', M, N, ONE, C, LDC, V, INCV, ZERO, + $ WORK, 1 ) +* +* C := C - v * w' +* + CALL DGER( M, N, -TAU, V, INCV, WORK, 1, C, LDC ) + END IF + ELSE +* +* Form C * H +* + IF( TAU.NE.ZERO ) THEN +* +* w := C * v +* + CALL DGEMV( 'No transpose', M, N, ONE, C, LDC, V, INCV, + $ ZERO, WORK, 1 ) +* +* C := C - w * v' +* + CALL DGER( M, N, -TAU, WORK, 1, V, INCV, C, LDC ) + END IF + END IF + RETURN +* +* End of DLARF +* + END diff --git a/costa/native/external/lapack/dlarfb.f b/costa/native/external/lapack/dlarfb.f new file mode 100644 index 000000000..e36a7fd5f --- /dev/null +++ b/costa/native/external/lapack/dlarfb.f @@ -0,0 +1,588 @@ + SUBROUTINE DLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, LDV, + $ T, LDT, C, LDC, WORK, LDWORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, SIDE, STOREV, TRANS + INTEGER K, LDC, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( LDC, * ), T( LDT, * ), V( LDV, * ), + $ WORK( LDWORK, * ) +* .. +* +* Purpose +* ======= +* +* DLARFB applies a real block reflector H or its transpose H' to a +* real m by n matrix C, from either the left or the right. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply H or H' from the Left +* = 'R': apply H or H' from the Right +* +* TRANS (input) CHARACTER*1 +* = 'N': apply H (No transpose) +* = 'T': apply H' (Transpose) +* +* DIRECT (input) CHARACTER*1 +* Indicates how H is formed from a product of elementary +* reflectors +* = 'F': H = H(1) H(2) . . . H(k) (Forward) +* = 'B': H = H(k) . . . H(2) H(1) (Backward) +* +* STOREV (input) CHARACTER*1 +* Indicates how the vectors which define the elementary +* reflectors are stored: +* = 'C': Columnwise +* = 'R': Rowwise +* +* M (input) INTEGER +* The number of rows of the matrix C. +* +* N (input) INTEGER +* The number of columns of the matrix C. +* +* K (input) INTEGER +* The order of the matrix T (= the number of elementary +* reflectors whose product defines the block reflector). +* +* V (input) DOUBLE PRECISION array, dimension +* (LDV,K) if STOREV = 'C' +* (LDV,M) if STOREV = 'R' and SIDE = 'L' +* (LDV,N) if STOREV = 'R' and SIDE = 'R' +* The matrix V. See further details. +* +* LDV (input) INTEGER +* The leading dimension of the array V. +* If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M); +* if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N); +* if STOREV = 'R', LDV >= K. +* +* T (input) DOUBLE PRECISION array, dimension (LDT,K) +* The triangular k by k matrix T in the representation of the +* block reflector. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= K. +* +* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) +* On entry, the m by n matrix C. +* On exit, C is overwritten by H*C or H'*C or C*H or C*H'. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDA >= max(1,M). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (LDWORK,K) +* +* LDWORK (input) INTEGER +* The leading dimension of the array WORK. +* If SIDE = 'L', LDWORK >= max(1,N); +* if SIDE = 'R', LDWORK >= max(1,M). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + CHARACTER TRANST + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEMM, DTRMM +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) + $ RETURN +* + IF( LSAME( TRANS, 'N' ) ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* + IF( LSAME( STOREV, 'C' ) ) THEN +* + IF( LSAME( DIRECT, 'F' ) ) THEN +* +* Let V = ( V1 ) (first K rows) +* ( V2 ) +* where V1 is unit lower triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H' * C where C = ( C1 ) +* ( C2 ) +* +* W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) +* +* W := C1' +* + DO 10 J = 1, K + CALL DCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + 10 CONTINUE +* +* W := W * V1 +* + CALL DTRMM( 'Right', 'Lower', 'No transpose', 'Unit', N, + $ K, ONE, V, LDV, WORK, LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C2'*V2 +* + CALL DGEMM( 'Transpose', 'No transpose', N, K, M-K, + $ ONE, C( K+1, 1 ), LDC, V( K+1, 1 ), LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T' or W * T +* + CALL DTRMM( 'Right', 'Upper', TRANST, 'Non-unit', N, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V * W' +* + IF( M.GT.K ) THEN +* +* C2 := C2 - V2 * W' +* + CALL DGEMM( 'No transpose', 'Transpose', M-K, N, K, + $ -ONE, V( K+1, 1 ), LDV, WORK, LDWORK, ONE, + $ C( K+1, 1 ), LDC ) + END IF +* +* W := W * V1' +* + CALL DTRMM( 'Right', 'Lower', 'Transpose', 'Unit', N, K, + $ ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W' +* + DO 30 J = 1, K + DO 20 I = 1, N + C( J, I ) = C( J, I ) - WORK( I, J ) + 20 CONTINUE + 30 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H' where C = ( C1 C2 ) +* +* W := C * V = (C1*V1 + C2*V2) (stored in WORK) +* +* W := C1 +* + DO 40 J = 1, K + CALL DCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 ) + 40 CONTINUE +* +* W := W * V1 +* + CALL DTRMM( 'Right', 'Lower', 'No transpose', 'Unit', M, + $ K, ONE, V, LDV, WORK, LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C2 * V2 +* + CALL DGEMM( 'No transpose', 'No transpose', M, K, N-K, + $ ONE, C( 1, K+1 ), LDC, V( K+1, 1 ), LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T' +* + CALL DTRMM( 'Right', 'Upper', TRANS, 'Non-unit', M, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V' +* + IF( N.GT.K ) THEN +* +* C2 := C2 - W * V2' +* + CALL DGEMM( 'No transpose', 'Transpose', M, N-K, K, + $ -ONE, WORK, LDWORK, V( K+1, 1 ), LDV, ONE, + $ C( 1, K+1 ), LDC ) + END IF +* +* W := W * V1' +* + CALL DTRMM( 'Right', 'Lower', 'Transpose', 'Unit', M, K, + $ ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 60 J = 1, K + DO 50 I = 1, M + C( I, J ) = C( I, J ) - WORK( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + ELSE +* +* Let V = ( V1 ) +* ( V2 ) (last K rows) +* where V2 is unit upper triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H' * C where C = ( C1 ) +* ( C2 ) +* +* W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) +* +* W := C2' +* + DO 70 J = 1, K + CALL DCOPY( N, C( M-K+J, 1 ), LDC, WORK( 1, J ), 1 ) + 70 CONTINUE +* +* W := W * V2 +* + CALL DTRMM( 'Right', 'Upper', 'No transpose', 'Unit', N, + $ K, ONE, V( M-K+1, 1 ), LDV, WORK, LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C1'*V1 +* + CALL DGEMM( 'Transpose', 'No transpose', N, K, M-K, + $ ONE, C, LDC, V, LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T' or W * T +* + CALL DTRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V * W' +* + IF( M.GT.K ) THEN +* +* C1 := C1 - V1 * W' +* + CALL DGEMM( 'No transpose', 'Transpose', M-K, N, K, + $ -ONE, V, LDV, WORK, LDWORK, ONE, C, LDC ) + END IF +* +* W := W * V2' +* + CALL DTRMM( 'Right', 'Upper', 'Transpose', 'Unit', N, K, + $ ONE, V( M-K+1, 1 ), LDV, WORK, LDWORK ) +* +* C2 := C2 - W' +* + DO 90 J = 1, K + DO 80 I = 1, N + C( M-K+J, I ) = C( M-K+J, I ) - WORK( I, J ) + 80 CONTINUE + 90 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H' where C = ( C1 C2 ) +* +* W := C * V = (C1*V1 + C2*V2) (stored in WORK) +* +* W := C2 +* + DO 100 J = 1, K + CALL DCOPY( M, C( 1, N-K+J ), 1, WORK( 1, J ), 1 ) + 100 CONTINUE +* +* W := W * V2 +* + CALL DTRMM( 'Right', 'Upper', 'No transpose', 'Unit', M, + $ K, ONE, V( N-K+1, 1 ), LDV, WORK, LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C1 * V1 +* + CALL DGEMM( 'No transpose', 'No transpose', M, K, N-K, + $ ONE, C, LDC, V, LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T' +* + CALL DTRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V' +* + IF( N.GT.K ) THEN +* +* C1 := C1 - W * V1' +* + CALL DGEMM( 'No transpose', 'Transpose', M, N-K, K, + $ -ONE, WORK, LDWORK, V, LDV, ONE, C, LDC ) + END IF +* +* W := W * V2' +* + CALL DTRMM( 'Right', 'Upper', 'Transpose', 'Unit', M, K, + $ ONE, V( N-K+1, 1 ), LDV, WORK, LDWORK ) +* +* C2 := C2 - W +* + DO 120 J = 1, K + DO 110 I = 1, M + C( I, N-K+J ) = C( I, N-K+J ) - WORK( I, J ) + 110 CONTINUE + 120 CONTINUE + END IF + END IF +* + ELSE IF( LSAME( STOREV, 'R' ) ) THEN +* + IF( LSAME( DIRECT, 'F' ) ) THEN +* +* Let V = ( V1 V2 ) (V1: first K columns) +* where V1 is unit upper triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H' * C where C = ( C1 ) +* ( C2 ) +* +* W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) +* +* W := C1' +* + DO 130 J = 1, K + CALL DCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + 130 CONTINUE +* +* W := W * V1' +* + CALL DTRMM( 'Right', 'Upper', 'Transpose', 'Unit', N, K, + $ ONE, V, LDV, WORK, LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C2'*V2' +* + CALL DGEMM( 'Transpose', 'Transpose', N, K, M-K, ONE, + $ C( K+1, 1 ), LDC, V( 1, K+1 ), LDV, ONE, + $ WORK, LDWORK ) + END IF +* +* W := W * T' or W * T +* + CALL DTRMM( 'Right', 'Upper', TRANST, 'Non-unit', N, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V' * W' +* + IF( M.GT.K ) THEN +* +* C2 := C2 - V2' * W' +* + CALL DGEMM( 'Transpose', 'Transpose', M-K, N, K, -ONE, + $ V( 1, K+1 ), LDV, WORK, LDWORK, ONE, + $ C( K+1, 1 ), LDC ) + END IF +* +* W := W * V1 +* + CALL DTRMM( 'Right', 'Upper', 'No transpose', 'Unit', N, + $ K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W' +* + DO 150 J = 1, K + DO 140 I = 1, N + C( J, I ) = C( J, I ) - WORK( I, J ) + 140 CONTINUE + 150 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H' where C = ( C1 C2 ) +* +* W := C * V' = (C1*V1' + C2*V2') (stored in WORK) +* +* W := C1 +* + DO 160 J = 1, K + CALL DCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 ) + 160 CONTINUE +* +* W := W * V1' +* + CALL DTRMM( 'Right', 'Upper', 'Transpose', 'Unit', M, K, + $ ONE, V, LDV, WORK, LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C2 * V2' +* + CALL DGEMM( 'No transpose', 'Transpose', M, K, N-K, + $ ONE, C( 1, K+1 ), LDC, V( 1, K+1 ), LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T' +* + CALL DTRMM( 'Right', 'Upper', TRANS, 'Non-unit', M, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V +* + IF( N.GT.K ) THEN +* +* C2 := C2 - W * V2 +* + CALL DGEMM( 'No transpose', 'No transpose', M, N-K, K, + $ -ONE, WORK, LDWORK, V( 1, K+1 ), LDV, ONE, + $ C( 1, K+1 ), LDC ) + END IF +* +* W := W * V1 +* + CALL DTRMM( 'Right', 'Upper', 'No transpose', 'Unit', M, + $ K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 180 J = 1, K + DO 170 I = 1, M + C( I, J ) = C( I, J ) - WORK( I, J ) + 170 CONTINUE + 180 CONTINUE +* + END IF +* + ELSE +* +* Let V = ( V1 V2 ) (V2: last K columns) +* where V2 is unit lower triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H' * C where C = ( C1 ) +* ( C2 ) +* +* W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) +* +* W := C2' +* + DO 190 J = 1, K + CALL DCOPY( N, C( M-K+J, 1 ), LDC, WORK( 1, J ), 1 ) + 190 CONTINUE +* +* W := W * V2' +* + CALL DTRMM( 'Right', 'Lower', 'Transpose', 'Unit', N, K, + $ ONE, V( 1, M-K+1 ), LDV, WORK, LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C1'*V1' +* + CALL DGEMM( 'Transpose', 'Transpose', N, K, M-K, ONE, + $ C, LDC, V, LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T' or W * T +* + CALL DTRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V' * W' +* + IF( M.GT.K ) THEN +* +* C1 := C1 - V1' * W' +* + CALL DGEMM( 'Transpose', 'Transpose', M-K, N, K, -ONE, + $ V, LDV, WORK, LDWORK, ONE, C, LDC ) + END IF +* +* W := W * V2 +* + CALL DTRMM( 'Right', 'Lower', 'No transpose', 'Unit', N, + $ K, ONE, V( 1, M-K+1 ), LDV, WORK, LDWORK ) +* +* C2 := C2 - W' +* + DO 210 J = 1, K + DO 200 I = 1, N + C( M-K+J, I ) = C( M-K+J, I ) - WORK( I, J ) + 200 CONTINUE + 210 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H' where C = ( C1 C2 ) +* +* W := C * V' = (C1*V1' + C2*V2') (stored in WORK) +* +* W := C2 +* + DO 220 J = 1, K + CALL DCOPY( M, C( 1, N-K+J ), 1, WORK( 1, J ), 1 ) + 220 CONTINUE +* +* W := W * V2' +* + CALL DTRMM( 'Right', 'Lower', 'Transpose', 'Unit', M, K, + $ ONE, V( 1, N-K+1 ), LDV, WORK, LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C1 * V1' +* + CALL DGEMM( 'No transpose', 'Transpose', M, K, N-K, + $ ONE, C, LDC, V, LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T' +* + CALL DTRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V +* + IF( N.GT.K ) THEN +* +* C1 := C1 - W * V1 +* + CALL DGEMM( 'No transpose', 'No transpose', M, N-K, K, + $ -ONE, WORK, LDWORK, V, LDV, ONE, C, LDC ) + END IF +* +* W := W * V2 +* + CALL DTRMM( 'Right', 'Lower', 'No transpose', 'Unit', M, + $ K, ONE, V( 1, N-K+1 ), LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 240 J = 1, K + DO 230 I = 1, M + C( I, N-K+J ) = C( I, N-K+J ) - WORK( I, J ) + 230 CONTINUE + 240 CONTINUE +* + END IF +* + END IF + END IF +* + RETURN +* +* End of DLARFB +* + END diff --git a/costa/native/external/lapack/dlarfg.f b/costa/native/external/lapack/dlarfg.f new file mode 100644 index 000000000..0e67a7eb5 --- /dev/null +++ b/costa/native/external/lapack/dlarfg.f @@ -0,0 +1,138 @@ + SUBROUTINE DLARFG( N, ALPHA, X, INCX, TAU ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INCX, N + DOUBLE PRECISION ALPHA, TAU +* .. +* .. Array Arguments .. + DOUBLE PRECISION X( * ) +* .. +* +* Purpose +* ======= +* +* DLARFG generates a real elementary reflector H of order n, such +* that +* +* H * ( alpha ) = ( beta ), H' * H = I. +* ( x ) ( 0 ) +* +* where alpha and beta are scalars, and x is an (n-1)-element real +* vector. H is represented in the form +* +* H = I - tau * ( 1 ) * ( 1 v' ) , +* ( v ) +* +* where tau is a real scalar and v is a real (n-1)-element +* vector. +* +* If the elements of x are all zero, then tau = 0 and H is taken to be +* the unit matrix. +* +* Otherwise 1 <= tau <= 2. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the elementary reflector. +* +* ALPHA (input/output) DOUBLE PRECISION +* On entry, the value alpha. +* On exit, it is overwritten with the value beta. +* +* X (input/output) DOUBLE PRECISION array, dimension +* (1+(N-2)*abs(INCX)) +* On entry, the vector x. +* On exit, it is overwritten with the vector v. +* +* INCX (input) INTEGER +* The increment between elements of X. INCX > 0. +* +* TAU (output) DOUBLE PRECISION +* The value tau. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER J, KNT + DOUBLE PRECISION BETA, RSAFMN, SAFMIN, XNORM +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, DLAPY2, DNRM2 + EXTERNAL DLAMCH, DLAPY2, DNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SIGN +* .. +* .. External Subroutines .. + EXTERNAL DSCAL +* .. +* .. Executable Statements .. +* + IF( N.LE.1 ) THEN + TAU = ZERO + RETURN + END IF +* + XNORM = DNRM2( N-1, X, INCX ) +* + IF( XNORM.EQ.ZERO ) THEN +* +* H = I +* + TAU = ZERO + ELSE +* +* general case +* + BETA = -SIGN( DLAPY2( ALPHA, XNORM ), ALPHA ) + SAFMIN = DLAMCH( 'S' ) / DLAMCH( 'E' ) + IF( ABS( BETA ).LT.SAFMIN ) THEN +* +* XNORM, BETA may be inaccurate; scale X and recompute them +* + RSAFMN = ONE / SAFMIN + KNT = 0 + 10 CONTINUE + KNT = KNT + 1 + CALL DSCAL( N-1, RSAFMN, X, INCX ) + BETA = BETA*RSAFMN + ALPHA = ALPHA*RSAFMN + IF( ABS( BETA ).LT.SAFMIN ) + $ GO TO 10 +* +* New BETA is at most 1, at least SAFMIN +* + XNORM = DNRM2( N-1, X, INCX ) + BETA = -SIGN( DLAPY2( ALPHA, XNORM ), ALPHA ) + TAU = ( BETA-ALPHA ) / BETA + CALL DSCAL( N-1, ONE / ( ALPHA-BETA ), X, INCX ) +* +* If ALPHA is subnormal, it may lose relative accuracy +* + ALPHA = BETA + DO 20 J = 1, KNT + ALPHA = ALPHA*SAFMIN + 20 CONTINUE + ELSE + TAU = ( BETA-ALPHA ) / BETA + CALL DSCAL( N-1, ONE / ( ALPHA-BETA ), X, INCX ) + ALPHA = BETA + END IF + END IF +* + RETURN +* +* End of DLARFG +* + END diff --git a/costa/native/external/lapack/dlarft.f b/costa/native/external/lapack/dlarft.f new file mode 100644 index 000000000..147b22d95 --- /dev/null +++ b/costa/native/external/lapack/dlarft.f @@ -0,0 +1,218 @@ + SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* Purpose +* ======= +* +* DLARFT forms the triangular factor T of a real block reflector H +* of order n, which is defined as a product of k elementary reflectors. +* +* If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; +* +* If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. +* +* If STOREV = 'C', the vector which defines the elementary reflector +* H(i) is stored in the i-th column of the array V, and +* +* H = I - V * T * V' +* +* If STOREV = 'R', the vector which defines the elementary reflector +* H(i) is stored in the i-th row of the array V, and +* +* H = I - V' * T * V +* +* Arguments +* ========= +* +* DIRECT (input) CHARACTER*1 +* Specifies the order in which the elementary reflectors are +* multiplied to form the block reflector: +* = 'F': H = H(1) H(2) . . . H(k) (Forward) +* = 'B': H = H(k) . . . H(2) H(1) (Backward) +* +* STOREV (input) CHARACTER*1 +* Specifies how the vectors which define the elementary +* reflectors are stored (see also Further Details): +* = 'C': columnwise +* = 'R': rowwise +* +* N (input) INTEGER +* The order of the block reflector H. N >= 0. +* +* K (input) INTEGER +* The order of the triangular factor T (= the number of +* elementary reflectors). K >= 1. +* +* V (input/output) DOUBLE PRECISION array, dimension +* (LDV,K) if STOREV = 'C' +* (LDV,N) if STOREV = 'R' +* The matrix V. See further details. +* +* LDV (input) INTEGER +* The leading dimension of the array V. +* If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. +* +* TAU (input) DOUBLE PRECISION array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i). +* +* T (output) DOUBLE PRECISION array, dimension (LDT,K) +* The k by k triangular factor T of the block reflector. +* If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is +* lower triangular. The rest of the array is not used. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= K. +* +* Further Details +* =============== +* +* The shape of the matrix V and the storage of the vectors which define +* the H(i) is best illustrated by the following example with n = 5 and +* k = 3. The elements equal to 1 are not stored; the corresponding +* array elements are modified but restored on exit. The rest of the +* array is not used. +* +* DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +* +* V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) +* ( v1 1 ) ( 1 v2 v2 v2 ) +* ( v1 v2 1 ) ( 1 v3 v3 ) +* ( v1 v2 v3 ) +* ( v1 v2 v3 ) +* +* DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +* +* V = ( v1 v2 v3 ) V = ( v1 v1 1 ) +* ( v1 v2 v3 ) ( v2 v2 v2 1 ) +* ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) +* ( 1 v3 ) +* ( 1 ) +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION VII +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, DTRMV +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 20 I = 1, K + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO 10 J = 1, I + T( J, I ) = ZERO + 10 CONTINUE + ELSE +* +* general case +* + VII = V( I, I ) + V( I, I ) = ONE + IF( LSAME( STOREV, 'C' ) ) THEN +* +* T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i) +* + CALL DGEMV( 'Transpose', N-I+1, I-1, -TAU( I ), + $ V( I, 1 ), LDV, V( I, I ), 1, ZERO, + $ T( 1, I ), 1 ) + ELSE +* +* T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)' +* + CALL DGEMV( 'No transpose', I-1, N-I+1, -TAU( I ), + $ V( 1, I ), LDV, V( I, I ), LDV, ZERO, + $ T( 1, I ), 1 ) + END IF + V( I, I ) = VII +* +* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) +* + CALL DTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T, + $ LDT, T( 1, I ), 1 ) + T( I, I ) = TAU( I ) + END IF + 20 CONTINUE + ELSE + DO 40 I = K, 1, -1 + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO 30 J = I, K + T( J, I ) = ZERO + 30 CONTINUE + ELSE +* +* general case +* + IF( I.LT.K ) THEN + IF( LSAME( STOREV, 'C' ) ) THEN + VII = V( N-K+I, I ) + V( N-K+I, I ) = ONE +* +* T(i+1:k,i) := +* - tau(i) * V(1:n-k+i,i+1:k)' * V(1:n-k+i,i) +* + CALL DGEMV( 'Transpose', N-K+I, K-I, -TAU( I ), + $ V( 1, I+1 ), LDV, V( 1, I ), 1, ZERO, + $ T( I+1, I ), 1 ) + V( N-K+I, I ) = VII + ELSE + VII = V( I, N-K+I ) + V( I, N-K+I ) = ONE +* +* T(i+1:k,i) := +* - tau(i) * V(i+1:k,1:n-k+i) * V(i,1:n-k+i)' +* + CALL DGEMV( 'No transpose', K-I, N-K+I, -TAU( I ), + $ V( I+1, 1 ), LDV, V( I, 1 ), LDV, ZERO, + $ T( I+1, I ), 1 ) + V( I, N-K+I ) = VII + END IF +* +* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) +* + CALL DTRMV( 'Lower', 'No transpose', 'Non-unit', K-I, + $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) + END IF + T( I, I ) = TAU( I ) + END IF + 40 CONTINUE + END IF + RETURN +* +* End of DLARFT +* + END diff --git a/costa/native/external/lapack/dlarfx.f b/costa/native/external/lapack/dlarfx.f new file mode 100644 index 000000000..178a20ae4 --- /dev/null +++ b/costa/native/external/lapack/dlarfx.f @@ -0,0 +1,639 @@ + SUBROUTINE DLARFX( SIDE, M, N, V, TAU, C, LDC, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER LDC, M, N + DOUBLE PRECISION TAU +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( LDC, * ), V( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DLARFX applies a real elementary reflector H to a real m by n +* matrix C, from either the left or the right. H is represented in the +* form +* +* H = I - tau * v * v' +* +* where tau is a real scalar and v is a real vector. +* +* If tau = 0, then H is taken to be the unit matrix +* +* This version uses inline code if H has order < 11. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': form H * C +* = 'R': form C * H +* +* M (input) INTEGER +* The number of rows of the matrix C. +* +* N (input) INTEGER +* The number of columns of the matrix C. +* +* V (input) DOUBLE PRECISION array, dimension (M) if SIDE = 'L' +* or (N) if SIDE = 'R' +* The vector v in the representation of H. +* +* TAU (input) DOUBLE PRECISION +* The value tau in the representation of H. +* +* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) +* On entry, the m by n matrix C. +* On exit, C is overwritten by the matrix H * C if SIDE = 'L', +* or C * H if SIDE = 'R'. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDA >= (1,M). +* +* WORK (workspace) DOUBLE PRECISION array, dimension +* (N) if SIDE = 'L' +* or (M) if SIDE = 'R' +* WORK is not referenced if H has order < 11. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER J + DOUBLE PRECISION SUM, T1, T10, T2, T3, T4, T5, T6, T7, T8, T9, + $ V1, V10, V2, V3, V4, V5, V6, V7, V8, V9 +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, DGER +* .. +* .. Executable Statements .. +* + IF( TAU.EQ.ZERO ) + $ RETURN + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C, where H has order m. +* + GO TO ( 10, 30, 50, 70, 90, 110, 130, 150, + $ 170, 190 )M +* +* Code for general M +* +* w := C'*v +* + CALL DGEMV( 'Transpose', M, N, ONE, C, LDC, V, 1, ZERO, WORK, + $ 1 ) +* +* C := C - tau * v * w' +* + CALL DGER( M, N, -TAU, V, 1, WORK, 1, C, LDC ) + GO TO 410 + 10 CONTINUE +* +* Special code for 1 x 1 Householder +* + T1 = ONE - TAU*V( 1 )*V( 1 ) + DO 20 J = 1, N + C( 1, J ) = T1*C( 1, J ) + 20 CONTINUE + GO TO 410 + 30 CONTINUE +* +* Special code for 2 x 2 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + DO 40 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + 40 CONTINUE + GO TO 410 + 50 CONTINUE +* +* Special code for 3 x 3 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + DO 60 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + 60 CONTINUE + GO TO 410 + 70 CONTINUE +* +* Special code for 4 x 4 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + DO 80 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + 80 CONTINUE + GO TO 410 + 90 CONTINUE +* +* Special code for 5 x 5 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + DO 100 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + 100 CONTINUE + GO TO 410 + 110 CONTINUE +* +* Special code for 6 x 6 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + DO 120 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + 120 CONTINUE + GO TO 410 + 130 CONTINUE +* +* Special code for 7 x 7 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + V7 = V( 7 ) + T7 = TAU*V7 + DO 140 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + + $ V7*C( 7, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + C( 7, J ) = C( 7, J ) - SUM*T7 + 140 CONTINUE + GO TO 410 + 150 CONTINUE +* +* Special code for 8 x 8 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + V7 = V( 7 ) + T7 = TAU*V7 + V8 = V( 8 ) + T8 = TAU*V8 + DO 160 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + + $ V7*C( 7, J ) + V8*C( 8, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + C( 7, J ) = C( 7, J ) - SUM*T7 + C( 8, J ) = C( 8, J ) - SUM*T8 + 160 CONTINUE + GO TO 410 + 170 CONTINUE +* +* Special code for 9 x 9 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + V7 = V( 7 ) + T7 = TAU*V7 + V8 = V( 8 ) + T8 = TAU*V8 + V9 = V( 9 ) + T9 = TAU*V9 + DO 180 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + + $ V7*C( 7, J ) + V8*C( 8, J ) + V9*C( 9, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + C( 7, J ) = C( 7, J ) - SUM*T7 + C( 8, J ) = C( 8, J ) - SUM*T8 + C( 9, J ) = C( 9, J ) - SUM*T9 + 180 CONTINUE + GO TO 410 + 190 CONTINUE +* +* Special code for 10 x 10 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + V7 = V( 7 ) + T7 = TAU*V7 + V8 = V( 8 ) + T8 = TAU*V8 + V9 = V( 9 ) + T9 = TAU*V9 + V10 = V( 10 ) + T10 = TAU*V10 + DO 200 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + + $ V7*C( 7, J ) + V8*C( 8, J ) + V9*C( 9, J ) + + $ V10*C( 10, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + C( 7, J ) = C( 7, J ) - SUM*T7 + C( 8, J ) = C( 8, J ) - SUM*T8 + C( 9, J ) = C( 9, J ) - SUM*T9 + C( 10, J ) = C( 10, J ) - SUM*T10 + 200 CONTINUE + GO TO 410 + ELSE +* +* Form C * H, where H has order n. +* + GO TO ( 210, 230, 250, 270, 290, 310, 330, 350, + $ 370, 390 )N +* +* Code for general N +* +* w := C * v +* + CALL DGEMV( 'No transpose', M, N, ONE, C, LDC, V, 1, ZERO, + $ WORK, 1 ) +* +* C := C - tau * w * v' +* + CALL DGER( M, N, -TAU, WORK, 1, V, 1, C, LDC ) + GO TO 410 + 210 CONTINUE +* +* Special code for 1 x 1 Householder +* + T1 = ONE - TAU*V( 1 )*V( 1 ) + DO 220 J = 1, M + C( J, 1 ) = T1*C( J, 1 ) + 220 CONTINUE + GO TO 410 + 230 CONTINUE +* +* Special code for 2 x 2 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + DO 240 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + 240 CONTINUE + GO TO 410 + 250 CONTINUE +* +* Special code for 3 x 3 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + DO 260 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + 260 CONTINUE + GO TO 410 + 270 CONTINUE +* +* Special code for 4 x 4 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + DO 280 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + 280 CONTINUE + GO TO 410 + 290 CONTINUE +* +* Special code for 5 x 5 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + DO 300 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + 300 CONTINUE + GO TO 410 + 310 CONTINUE +* +* Special code for 6 x 6 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + DO 320 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + 320 CONTINUE + GO TO 410 + 330 CONTINUE +* +* Special code for 7 x 7 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + V7 = V( 7 ) + T7 = TAU*V7 + DO 340 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + + $ V7*C( J, 7 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + C( J, 7 ) = C( J, 7 ) - SUM*T7 + 340 CONTINUE + GO TO 410 + 350 CONTINUE +* +* Special code for 8 x 8 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + V7 = V( 7 ) + T7 = TAU*V7 + V8 = V( 8 ) + T8 = TAU*V8 + DO 360 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + + $ V7*C( J, 7 ) + V8*C( J, 8 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + C( J, 7 ) = C( J, 7 ) - SUM*T7 + C( J, 8 ) = C( J, 8 ) - SUM*T8 + 360 CONTINUE + GO TO 410 + 370 CONTINUE +* +* Special code for 9 x 9 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + V7 = V( 7 ) + T7 = TAU*V7 + V8 = V( 8 ) + T8 = TAU*V8 + V9 = V( 9 ) + T9 = TAU*V9 + DO 380 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + + $ V7*C( J, 7 ) + V8*C( J, 8 ) + V9*C( J, 9 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + C( J, 7 ) = C( J, 7 ) - SUM*T7 + C( J, 8 ) = C( J, 8 ) - SUM*T8 + C( J, 9 ) = C( J, 9 ) - SUM*T9 + 380 CONTINUE + GO TO 410 + 390 CONTINUE +* +* Special code for 10 x 10 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + V7 = V( 7 ) + T7 = TAU*V7 + V8 = V( 8 ) + T8 = TAU*V8 + V9 = V( 9 ) + T9 = TAU*V9 + V10 = V( 10 ) + T10 = TAU*V10 + DO 400 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + + $ V7*C( J, 7 ) + V8*C( J, 8 ) + V9*C( J, 9 ) + + $ V10*C( J, 10 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + C( J, 7 ) = C( J, 7 ) - SUM*T7 + C( J, 8 ) = C( J, 8 ) - SUM*T8 + C( J, 9 ) = C( J, 9 ) - SUM*T9 + C( J, 10 ) = C( J, 10 ) - SUM*T10 + 400 CONTINUE + GO TO 410 + END IF + 410 CONTINUE + RETURN +* +* End of DLARFX +* + END diff --git a/costa/native/external/lapack/dlargv.f b/costa/native/external/lapack/dlargv.f new file mode 100644 index 000000000..6b1c10411 --- /dev/null +++ b/costa/native/external/lapack/dlargv.f @@ -0,0 +1,100 @@ + SUBROUTINE DLARGV( N, X, INCX, Y, INCY, C, INCC ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INCC, INCX, INCY, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* DLARGV generates a vector of real plane rotations, determined by +* elements of the real vectors x and y. For i = 1,2,...,n +* +* ( c(i) s(i) ) ( x(i) ) = ( a(i) ) +* ( -s(i) c(i) ) ( y(i) ) = ( 0 ) +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of plane rotations to be generated. +* +* X (input/output) DOUBLE PRECISION array, +* dimension (1+(N-1)*INCX) +* On entry, the vector x. +* On exit, x(i) is overwritten by a(i), for i = 1,...,n. +* +* INCX (input) INTEGER +* The increment between elements of X. INCX > 0. +* +* Y (input/output) DOUBLE PRECISION array, +* dimension (1+(N-1)*INCY) +* On entry, the vector y. +* On exit, the sines of the plane rotations. +* +* INCY (input) INTEGER +* The increment between elements of Y. INCY > 0. +* +* C (output) DOUBLE PRECISION array, dimension (1+(N-1)*INCC) +* The cosines of the plane rotations. +* +* INCC (input) INTEGER +* The increment between elements of C. INCC > 0. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IC, IX, IY + DOUBLE PRECISION F, G, T, TT +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* + IX = 1 + IY = 1 + IC = 1 + DO 10 I = 1, N + F = X( IX ) + G = Y( IY ) + IF( G.EQ.ZERO ) THEN + C( IC ) = ONE + ELSE IF( F.EQ.ZERO ) THEN + C( IC ) = ZERO + Y( IY ) = ONE + X( IX ) = G + ELSE IF( ABS( F ).GT.ABS( G ) ) THEN + T = G / F + TT = SQRT( ONE+T*T ) + C( IC ) = ONE / TT + Y( IY ) = T*C( IC ) + X( IX ) = F*TT + ELSE + T = F / G + TT = SQRT( ONE+T*T ) + Y( IY ) = ONE / TT + C( IC ) = T*Y( IY ) + X( IX ) = G*TT + END IF + IC = IC + INCC + IY = IY + INCY + IX = IX + INCX + 10 CONTINUE + RETURN +* +* End of DLARGV +* + END diff --git a/costa/native/external/lapack/dlarnv.f b/costa/native/external/lapack/dlarnv.f new file mode 100644 index 000000000..46928d5fe --- /dev/null +++ b/costa/native/external/lapack/dlarnv.f @@ -0,0 +1,116 @@ + SUBROUTINE DLARNV( IDIST, ISEED, N, X ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER IDIST, N +* .. +* .. Array Arguments .. + INTEGER ISEED( 4 ) + DOUBLE PRECISION X( * ) +* .. +* +* Purpose +* ======= +* +* DLARNV returns a vector of n random real numbers from a uniform or +* normal distribution. +* +* Arguments +* ========= +* +* IDIST (input) INTEGER +* Specifies the distribution of the random numbers: +* = 1: uniform (0,1) +* = 2: uniform (-1,1) +* = 3: normal (0,1) +* +* ISEED (input/output) INTEGER array, dimension (4) +* On entry, the seed of the random number generator; the array +* elements must be between 0 and 4095, and ISEED(4) must be +* odd. +* On exit, the seed is updated. +* +* N (input) INTEGER +* The number of random numbers to be generated. +* +* X (output) DOUBLE PRECISION array, dimension (N) +* The generated random numbers. +* +* Further Details +* =============== +* +* This routine calls the auxiliary routine DLARUV to generate random +* real numbers from a uniform (0,1) distribution, in batches of up to +* 128 using vectorisable code. The Box-Muller method is used to +* transform numbers from a uniform to a normal distribution. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, TWO + PARAMETER ( ONE = 1.0D+0, TWO = 2.0D+0 ) + INTEGER LV + PARAMETER ( LV = 128 ) + DOUBLE PRECISION TWOPI + PARAMETER ( TWOPI = 6.2831853071795864769252867663D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IL, IL2, IV +* .. +* .. Local Arrays .. + DOUBLE PRECISION U( LV ) +* .. +* .. Intrinsic Functions .. + INTRINSIC COS, LOG, MIN, SQRT +* .. +* .. External Subroutines .. + EXTERNAL DLARUV +* .. +* .. Executable Statements .. +* + DO 40 IV = 1, N, LV / 2 + IL = MIN( LV / 2, N-IV+1 ) + IF( IDIST.EQ.3 ) THEN + IL2 = 2*IL + ELSE + IL2 = IL + END IF +* +* Call DLARUV to generate IL2 numbers from a uniform (0,1) +* distribution (IL2 <= LV) +* + CALL DLARUV( ISEED, IL2, U ) +* + IF( IDIST.EQ.1 ) THEN +* +* Copy generated numbers +* + DO 10 I = 1, IL + X( IV+I-1 ) = U( I ) + 10 CONTINUE + ELSE IF( IDIST.EQ.2 ) THEN +* +* Convert generated numbers to uniform (-1,1) distribution +* + DO 20 I = 1, IL + X( IV+I-1 ) = TWO*U( I ) - ONE + 20 CONTINUE + ELSE IF( IDIST.EQ.3 ) THEN +* +* Convert generated numbers to normal (0,1) distribution +* + DO 30 I = 1, IL + X( IV+I-1 ) = SQRT( -TWO*LOG( U( 2*I-1 ) ) )* + $ COS( TWOPI*U( 2*I ) ) + 30 CONTINUE + END IF + 40 CONTINUE + RETURN +* +* End of DLARNV +* + END diff --git a/costa/native/external/lapack/dlarrb.f b/costa/native/external/lapack/dlarrb.f new file mode 100644 index 000000000..01ad7bd23 --- /dev/null +++ b/costa/native/external/lapack/dlarrb.f @@ -0,0 +1,282 @@ + SUBROUTINE DLARRB( N, D, L, LD, LLD, IFIRST, ILAST, SIGMA, RELTOL, + $ W, WGAP, WERR, WORK, IWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER IFIRST, ILAST, INFO, N + DOUBLE PRECISION RELTOL, SIGMA +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION D( * ), L( * ), LD( * ), LLD( * ), W( * ), + $ WERR( * ), WGAP( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* Given the relatively robust representation(RRR) L D L^T, DLARRB +* does ``limited'' bisection to locate the eigenvalues of L D L^T, +* W( IFIRST ) thru' W( ILAST ), to more accuracy. Intervals +* [left, right] are maintained by storing their mid-points and +* semi-widths in the arrays W and WERR respectively. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix. +* +* D (input) DOUBLE PRECISION array, dimension (N) +* The n diagonal elements of the diagonal matrix D. +* +* L (input) DOUBLE PRECISION array, dimension (N-1) +* The n-1 subdiagonal elements of the unit bidiagonal matrix L. +* +* LD (input) DOUBLE PRECISION array, dimension (N-1) +* The n-1 elements L(i)*D(i). +* +* LLD (input) DOUBLE PRECISION array, dimension (N-1) +* The n-1 elements L(i)*L(i)*D(i). +* +* IFIRST (input) INTEGER +* The index of the first eigenvalue in the cluster. +* +* ILAST (input) INTEGER +* The index of the last eigenvalue in the cluster. +* +* SIGMA (input) DOUBLE PRECISION +* The shift used to form L D L^T (see DLARRF). +* +* RELTOL (input) DOUBLE PRECISION +* The relative tolerance. +* +* W (input/output) DOUBLE PRECISION array, dimension (N) +* On input, W( IFIRST ) thru' W( ILAST ) are estimates of the +* corresponding eigenvalues of L D L^T. +* On output, these estimates are ``refined''. +* +* WGAP (input/output) DOUBLE PRECISION array, dimension (N) +* The gaps between the eigenvalues of L D L^T. Very small +* gaps are changed on output. +* +* WERR (input/output) DOUBLE PRECISION array, dimension (N) +* On input, WERR( IFIRST ) thru' WERR( ILAST ) are the errors +* in the estimates W( IFIRST ) thru' W( ILAST ). +* On output, these are the ``refined'' errors. +* +*****Reminder to Inder --- WORK is never used in this subroutine ***** +* WORK (input) DOUBLE PRECISION array, dimension (???) +* Workspace. +* +* IWORK (input) INTEGER array, dimension (2*N) +* Workspace. +* +*****Reminder to Inder --- INFO is never set in this subroutine ****** +* INFO (output) INTEGER +* Error flag. +* +* Further Details +* =============== +* +* Based on contributions by +* Inderjit Dhillon, IBM Almaden, USA +* Osni Marques, LBNL/NERSC, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, TWO, HALF + PARAMETER ( ZERO = 0.0D0, TWO = 2.0D0, HALF = 0.5D0 ) +* .. +* .. Local Scalars .. + INTEGER CNT, I, I1, I2, INITI1, INITI2, J, K, NCNVRG, + $ NEIG, NINT, NRIGHT, OLNINT + DOUBLE PRECISION DELTA, EPS, GAP, LEFT, MID, PERT, RIGHT, S, + $ THRESH, TMP, WIDTH +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + EPS = DLAMCH( 'Precision' ) + I1 = IFIRST + I2 = IFIRST + NEIG = ILAST - IFIRST + 1 + NCNVRG = 0 + THRESH = RELTOL + DO 10 I = IFIRST, ILAST + IWORK( I ) = 0 + PERT = EPS*( ABS( SIGMA )+ABS( W( I ) ) ) + WERR( I ) = WERR( I ) + PERT + IF( WGAP( I ).LT.PERT ) + $ WGAP( I ) = PERT + 10 CONTINUE + DO 20 I = I1, ILAST + IF( I.EQ.1 ) THEN + GAP = WGAP( I ) + ELSE IF( I.EQ.N ) THEN + GAP = WGAP( I-1 ) + ELSE + GAP = MIN( WGAP( I-1 ), WGAP( I ) ) + END IF + IF( WERR( I ).LT.THRESH*GAP ) THEN + NCNVRG = NCNVRG + 1 + IWORK( I ) = 1 + IF( I1.EQ.I ) + $ I1 = I1 + 1 + ELSE + I2 = I + END IF + 20 CONTINUE +* +* Initialize the unconverged intervals. +* + I = I1 + NINT = 0 + RIGHT = ZERO + 30 CONTINUE + IF( I.LE.I2 ) THEN + IF( IWORK( I ).EQ.0 ) THEN + DELTA = EPS + LEFT = W( I ) - WERR( I ) +* +* Do while( CNT(LEFT).GT.I-1 ) +* + 40 CONTINUE + IF( I.GT.I1 .AND. LEFT.LE.RIGHT ) THEN + LEFT = RIGHT + CNT = I - 1 + ELSE + S = -LEFT + CNT = 0 + DO 50 J = 1, N - 1 + TMP = D( J ) + S + S = S*( LD( J ) / TMP )*L( J ) - LEFT + IF( TMP.LT.ZERO ) + $ CNT = CNT + 1 + 50 CONTINUE + TMP = D( N ) + S + IF( TMP.LT.ZERO ) + $ CNT = CNT + 1 + IF( CNT.GT.I-1 ) THEN + DELTA = TWO*DELTA + LEFT = LEFT - ( ABS( SIGMA )+ABS( LEFT ) )*DELTA + GO TO 40 + END IF + END IF + DELTA = EPS + RIGHT = W( I ) + WERR( I ) +* +* Do while( CNT(RIGHT).LT.I ) +* + 60 CONTINUE + S = -RIGHT + CNT = 0 + DO 70 J = 1, N - 1 + TMP = D( J ) + S + S = S*( LD( J ) / TMP )*L( J ) - RIGHT + IF( TMP.LT.ZERO ) + $ CNT = CNT + 1 + 70 CONTINUE + TMP = D( N ) + S + IF( TMP.LT.ZERO ) + $ CNT = CNT + 1 + IF( CNT.LT.I ) THEN + DELTA = TWO*DELTA + RIGHT = RIGHT + ( ABS( SIGMA )+ABS( RIGHT ) )*DELTA + GO TO 60 + END IF + WERR( I ) = LEFT + W( I ) = RIGHT + IWORK( N+I ) = CNT + NINT = NINT + 1 + I = CNT + 1 + ELSE + I = I + 1 + END IF + GO TO 30 + END IF +* +* While( NCNVRG.LT.NEIG ) +* + INITI1 = I1 + INITI2 = I2 + 80 CONTINUE + IF( NCNVRG.LT.NEIG ) THEN + OLNINT = NINT + I = I1 + DO 100 K = 1, OLNINT + NRIGHT = IWORK( N+I ) + IF( IWORK( I ).EQ.0 ) THEN + MID = HALF*( WERR( I )+W( I ) ) + S = -MID + CNT = 0 + DO 90 J = 1, N - 1 + TMP = D( J ) + S + S = S*( LD( J ) / TMP )*L( J ) - MID + IF( TMP.LT.ZERO ) + $ CNT = CNT + 1 + 90 CONTINUE + TMP = D( N ) + S + IF( TMP.LT.ZERO ) + $ CNT = CNT + 1 + CNT = MAX( I-1, MIN( NRIGHT, CNT ) ) + IF( I.EQ.NRIGHT ) THEN + IF( I.EQ.IFIRST ) THEN + GAP = WERR( I+1 ) - W( I ) + ELSE IF( I.EQ.ILAST ) THEN + GAP = WERR( I ) - W( I-1 ) + ELSE + GAP = MIN( WERR( I+1 )-W( I ), WERR( I )-W( I-1 ) ) + END IF + WIDTH = W( I ) - MID + IF( WIDTH.LT.THRESH*GAP ) THEN + NCNVRG = NCNVRG + 1 + IWORK( I ) = 1 + IF( I1.EQ.I ) THEN + I1 = I1 + 1 + NINT = NINT - 1 + END IF + END IF + END IF + IF( IWORK( I ).EQ.0 ) + $ I2 = K + IF( CNT.EQ.I-1 ) THEN + WERR( I ) = MID + ELSE IF( CNT.EQ.NRIGHT ) THEN + W( I ) = MID + ELSE + IWORK( N+I ) = CNT + NINT = NINT + 1 + WERR( CNT+1 ) = MID + W( CNT+1 ) = W( I ) + W( I ) = MID + I = CNT + 1 + IWORK( N+I ) = NRIGHT + END IF + END IF + I = NRIGHT + 1 + 100 CONTINUE + NINT = NINT - OLNINT + I2 + GO TO 80 + END IF + DO 110 I = INITI1, INITI2 + W( I ) = HALF*( WERR( I )+W( I ) ) + WERR( I ) = W( I ) - WERR( I ) + 110 CONTINUE +* + RETURN +* +* End of DLARRB +* + END diff --git a/costa/native/external/lapack/dlarre.f b/costa/native/external/lapack/dlarre.f new file mode 100644 index 000000000..d15d5cbdb --- /dev/null +++ b/costa/native/external/lapack/dlarre.f @@ -0,0 +1,318 @@ + SUBROUTINE DLARRE( N, D, E, TOL, NSPLIT, ISPLIT, M, W, WOFF, + $ GERSCH, WORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, M, N, NSPLIT + DOUBLE PRECISION TOL +* .. +* .. Array Arguments .. + INTEGER ISPLIT( * ) + DOUBLE PRECISION D( * ), E( * ), GERSCH( * ), W( * ), WOFF( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* Given the tridiagonal matrix T, DLARRE sets "small" off-diagonal +* elements to zero, and for each unreduced block T_i, it finds +* (i) the numbers sigma_i +* (ii) the base T_i - sigma_i I = L_i D_i L_i^T representations and +* (iii) eigenvalues of each L_i D_i L_i^T. +* The representations and eigenvalues found are then used by +* DSTEGR to compute the eigenvectors of a symmetric tridiagonal +* matrix. Currently, the base representations are limited to being +* positive or negative definite, and the eigenvalues of the definite +* matrices are found by the dqds algorithm (subroutine DLASQ2). As +* an added benefit, DLARRE also outputs the n Gerschgorin +* intervals for each L_i D_i L_i^T. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix. +* +* D (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the n diagonal elements of the tridiagonal +* matrix T. +* On exit, the n diagonal elements of the diagonal +* matrices D_i. +* +* E (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the (n-1) subdiagonal elements of the tridiagonal +* matrix T; E(N) need not be set. +* On exit, the subdiagonal elements of the unit bidiagonal +* matrices L_i. +* +* TOL (input) DOUBLE PRECISION +* The threshold for splitting. If on input |E(i)| < TOL, then +* the matrix T is split into smaller blocks. +* +* NSPLIT (input) INTEGER +* The number of blocks T splits into. 1 <= NSPLIT <= N. +* +* ISPLIT (output) INTEGER array, dimension (2*N) +* The splitting points, at which T breaks up into submatrices. +* The first submatrix consists of rows/columns 1 to ISPLIT(1), +* the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), +* etc., and the NSPLIT-th consists of rows/columns +* ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. +* +* M (output) INTEGER +* The total number of eigenvalues (of all the L_i D_i L_i^T) +* found. +* +* W (output) DOUBLE PRECISION array, dimension (N) +* The first M elements contain the eigenvalues. The +* eigenvalues of each of the blocks, L_i D_i L_i^T, are +* sorted in ascending order. +* +* WOFF (output) DOUBLE PRECISION array, dimension (N) +* The NSPLIT base points sigma_i. +* +* GERSCH (output) DOUBLE PRECISION array, dimension (2*N) +* The n Gerschgorin intervals. +* +* WORK (input) DOUBLE PRECISION array, dimension (4*N???) +* Workspace. +* +* INFO (output) INTEGER +* Output error code from DLASQ2 +* +* Further Details +* =============== +* +* Based on contributions by +* Inderjit Dhillon, IBM Almaden, USA +* Osni Marques, LBNL/NERSC, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO, FOUR, FOURTH + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0, + $ FOUR = 4.0D0, FOURTH = ONE / FOUR ) +* .. +* .. Local Scalars .. + INTEGER CNT, I, IBEGIN, IEND, IN, J, JBLK, MAXCNT + DOUBLE PRECISION DELTA, EPS, GL, GU, NRM, OFFD, S, SGNDEF, + $ SIGMA, TAU, TMP1, WIDTH +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLASQ2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + EPS = DLAMCH( 'Precision' ) +* +* Compute Splitting Points +* + NSPLIT = 1 + DO 10 I = 1, N - 1 + IF( ABS( E( I ) ).LE.TOL ) THEN + ISPLIT( NSPLIT ) = I + NSPLIT = NSPLIT + 1 + END IF + 10 CONTINUE + ISPLIT( NSPLIT ) = N +* + IBEGIN = 1 + DO 170 JBLK = 1, NSPLIT + IEND = ISPLIT( JBLK ) + IF( IBEGIN.EQ.IEND ) THEN + W( IBEGIN ) = D( IBEGIN ) + WOFF( JBLK ) = ZERO + IBEGIN = IEND + 1 + GO TO 170 + END IF + IN = IEND - IBEGIN + 1 +* +* Form the n Gerschgorin intervals +* + GL = D( IBEGIN ) - ABS( E( IBEGIN ) ) + GU = D( IBEGIN ) + ABS( E( IBEGIN ) ) + GERSCH( 2*IBEGIN-1 ) = GL + GERSCH( 2*IBEGIN ) = GU + GERSCH( 2*IEND-1 ) = D( IEND ) - ABS( E( IEND-1 ) ) + GERSCH( 2*IEND ) = D( IEND ) + ABS( E( IEND-1 ) ) + GL = MIN( GERSCH( 2*IEND-1 ), GL ) + GU = MAX( GERSCH( 2*IEND ), GU ) + DO 20 I = IBEGIN + 1, IEND - 1 + OFFD = ABS( E( I-1 ) ) + ABS( E( I ) ) + GERSCH( 2*I-1 ) = D( I ) - OFFD + GL = MIN( GERSCH( 2*I-1 ), GL ) + GERSCH( 2*I ) = D( I ) + OFFD + GU = MAX( GERSCH( 2*I ), GU ) + 20 CONTINUE + NRM = MAX( ABS( GL ), ABS( GU ) ) +* +* Find the number SIGMA where the base representation +* T - sigma I = L D L^T is to be formed. +* + WIDTH = GU - GL + DO 30 I = IBEGIN, IEND - 1 + WORK( I ) = E( I )*E( I ) + 30 CONTINUE + DO 50 J = 1, 2 + IF( J.EQ.1 ) THEN + TAU = GL + FOURTH*WIDTH + ELSE + TAU = GU - FOURTH*WIDTH + END IF + TMP1 = D( IBEGIN ) - TAU + IF( TMP1.LT.ZERO ) THEN + CNT = 1 + ELSE + CNT = 0 + END IF + DO 40 I = IBEGIN + 1, IEND + TMP1 = D( I ) - TAU - WORK( I-1 ) / TMP1 + IF( TMP1.LT.ZERO ) + $ CNT = CNT + 1 + 40 CONTINUE + IF( CNT.EQ.0 ) THEN + GL = TAU + ELSE IF( CNT.EQ.IN ) THEN + GU = TAU + END IF + IF( J.EQ.1 ) THEN + MAXCNT = CNT + SIGMA = GL + SGNDEF = ONE + ELSE + IF( IN-CNT.GT.MAXCNT ) THEN + SIGMA = GU + SGNDEF = -ONE + END IF + END IF + 50 CONTINUE +* +* Find the base L D L^T representation +* + WORK( 3*IN ) = ONE + DELTA = EPS + TAU = SGNDEF*NRM + 60 CONTINUE + SIGMA = SIGMA - DELTA*TAU + WORK( 1 ) = D( IBEGIN ) - SIGMA + J = IBEGIN + DO 70 I = 1, IN - 1 + WORK( 2*IN+I ) = ONE / WORK( 2*I-1 ) + TMP1 = E( J )*WORK( 2*IN+I ) + WORK( 2*I+1 ) = ( D( J+1 )-SIGMA ) - TMP1*E( J ) + WORK( 2*I ) = TMP1 + J = J + 1 + 70 CONTINUE + DO 80 I = IN, 1, -1 + TMP1 = SGNDEF*WORK( 2*I-1 ) + IF( TMP1.LT.ZERO .OR. WORK( 2*IN+I ).EQ.ZERO .OR. .NOT. + $ ( TMP1.GT.ZERO .OR. TMP1.LT.ONE ) ) THEN + DELTA = TWO*DELTA + GO TO 60 + END IF + J = J - 1 + 80 CONTINUE +* + J = IBEGIN + D( IBEGIN ) = WORK( 1 ) + WORK( 1 ) = ABS( WORK( 1 ) ) + DO 90 I = 1, IN - 1 + TMP1 = E( J ) + E( J ) = WORK( 2*I ) + WORK( 2*I ) = ABS( TMP1*WORK( 2*I ) ) + J = J + 1 + D( J ) = WORK( 2*I+1 ) + WORK( 2*I+1 ) = ABS( WORK( 2*I+1 ) ) + 90 CONTINUE +* + CALL DLASQ2( IN, WORK, INFO ) +* + TAU = SGNDEF*WORK( IN ) + WORK( 3*IN ) = ONE + DELTA = TWO*EPS + 100 CONTINUE + TAU = TAU*( ONE-DELTA ) +* + S = -TAU + J = IBEGIN + DO 110 I = 1, IN - 1 + WORK( I ) = D( J ) + S + WORK( 2*IN+I ) = ONE / WORK( I ) +* WORK( N+I ) = ( E( I ) * D( I ) ) / WORK( I ) + WORK( IN+I ) = ( E( J )*D( J ) )*WORK( 2*IN+I ) + S = S*WORK( IN+I )*E( J ) - TAU + J = J + 1 + 110 CONTINUE + WORK( IN ) = D( IEND ) + S +* +* Checking to see if all the diagonal elements of the new +* L D L^T representation have the same sign +* + DO 120 I = IN, 1, -1 + TMP1 = SGNDEF*WORK( I ) + IF( TMP1.LT.ZERO .OR. WORK( 2*IN+I ).EQ.ZERO .OR. .NOT. + $ ( TMP1.GT.ZERO .OR. TMP1.LT.ONE ) ) THEN + DELTA = TWO*DELTA + GO TO 100 + END IF + 120 CONTINUE +* + SIGMA = SIGMA + TAU + CALL DCOPY( IN, WORK, 1, D( IBEGIN ), 1 ) + CALL DCOPY( IN-1, WORK( IN+1 ), 1, E( IBEGIN ), 1 ) + WOFF( JBLK ) = SIGMA +* +* Update the n Gerschgorin intervals +* + DO 130 I = IBEGIN, IEND + GERSCH( 2*I-1 ) = GERSCH( 2*I-1 ) - SIGMA + GERSCH( 2*I ) = GERSCH( 2*I ) - SIGMA + 130 CONTINUE +* +* Compute the eigenvalues of L D L^T. +* + J = IBEGIN + DO 140 I = 1, IN - 1 + WORK( 2*I-1 ) = ABS( D( J ) ) + WORK( 2*I ) = E( J )*E( J )*WORK( 2*I-1 ) + J = J + 1 + 140 CONTINUE + WORK( 2*IN-1 ) = ABS( D( IEND ) ) +* + CALL DLASQ2( IN, WORK, INFO ) +* + J = IBEGIN + IF( SGNDEF.GT.ZERO ) THEN + DO 150 I = 1, IN + W( J ) = WORK( IN-I+1 ) + J = J + 1 + 150 CONTINUE + ELSE + DO 160 I = 1, IN + W( J ) = -WORK( I ) + J = J + 1 + 160 CONTINUE + END IF + IBEGIN = IEND + 1 + 170 CONTINUE + M = N +* + RETURN +* +* End of DLARRE +* + END diff --git a/costa/native/external/lapack/dlarrf.f b/costa/native/external/lapack/dlarrf.f new file mode 100644 index 000000000..e3994f953 --- /dev/null +++ b/costa/native/external/lapack/dlarrf.f @@ -0,0 +1,150 @@ + SUBROUTINE DLARRF( N, D, L, LD, LLD, IFIRST, ILAST, W, DPLUS, + $ LPLUS, WORK, IWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER IFIRST, ILAST, INFO, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION D( * ), DPLUS( * ), L( * ), LD( * ), LLD( * ), + $ LPLUS( * ), W( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* Given the initial representation L D L^T and its cluster of close +* eigenvalues (in a relative measure), W( IFIRST ), W( IFIRST+1 ), ... +* W( ILAST ), DLARRF finds a new relatively robust representation +* L D L^T - SIGMA I = L(+) D(+) L(+)^T such that at least one of the +* eigenvalues of L(+) D(+) L(+)^T is relatively isolated. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix. +* +* D (input) DOUBLE PRECISION array, dimension (N) +* The n diagonal elements of the diagonal matrix D. +* +* L (input) DOUBLE PRECISION array, dimension (N-1) +* The (n-1) subdiagonal elements of the unit bidiagonal +* matrix L. +* +* LD (input) DOUBLE PRECISION array, dimension (N-1) +* The n-1 elements L(i)*D(i). +* +* LLD (input) DOUBLE PRECISION array, dimension (N-1) +* The n-1 elements L(i)*L(i)*D(i). +* +* IFIRST (input) INTEGER +* The index of the first eigenvalue in the cluster. +* +* ILAST (input) INTEGER +* The index of the last eigenvalue in the cluster. +* +* W (input/output) DOUBLE PRECISION array, dimension (N) +* On input, the eigenvalues of L D L^T in ascending order. +* W( IFIRST ) through W( ILAST ) form the cluster of relatively +* close eigenalues. +* On output, W( IFIRST ) thru' W( ILAST ) are estimates of the +* corresponding eigenvalues of L(+) D(+) L(+)^T. +* +* SIGMA (input) DOUBLE PRECISION +* The shift used to form L(+) D(+) L(+)^T. +* +* DPLUS (output) DOUBLE PRECISION array, dimension (N) +* The n diagonal elements of the diagonal matrix D(+). +* +* LPLUS (output) DOUBLE PRECISION array, dimension (N) +* The first (n-1) elements of LPLUS contain the subdiagonal +* elements of the unit bidiagonal matrix L(+). LPLUS( N ) is +* set to SIGMA. +* +* WORK (input) DOUBLE PRECISION array, dimension (???) +* Workspace. +* +* Further Details +* =============== +* +* Based on contributions by +* Inderjit Dhillon, IBM Almaden, USA +* Osni Marques, LBNL/NERSC, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, TWO + PARAMETER ( ZERO = 0.0D0, TWO = 2.0D0 ) +* .. +* .. Local Scalars .. + INTEGER I + DOUBLE PRECISION DELTA, EPS, S, SIGMA +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* + INFO = 0 + EPS = DLAMCH( 'Precision' ) + IF( IFIRST.EQ.1 ) THEN + SIGMA = W( IFIRST ) + ELSE IF( ILAST.EQ.N ) THEN + SIGMA = W( ILAST ) + ELSE + INFO = 1 + RETURN + END IF +* +* Compute the new relatively robust representation (RRR) +* + DELTA = TWO*EPS + 10 CONTINUE + IF( IFIRST.EQ.1 ) THEN + SIGMA = SIGMA - ABS( SIGMA )*DELTA + ELSE + SIGMA = SIGMA + ABS( SIGMA )*DELTA + END IF + S = -SIGMA + DO 20 I = 1, N - 1 + DPLUS( I ) = D( I ) + S + LPLUS( I ) = LD( I ) / DPLUS( I ) + S = S*LPLUS( I )*L( I ) - SIGMA + 20 CONTINUE + DPLUS( N ) = D( N ) + S + IF( IFIRST.EQ.1 ) THEN + DO 30 I = 1, N + IF( DPLUS( I ).LT.ZERO ) THEN + DELTA = TWO*DELTA + GO TO 10 + END IF + 30 CONTINUE + ELSE + DO 40 I = 1, N + IF( DPLUS( I ).GT.ZERO ) THEN + DELTA = TWO*DELTA + GO TO 10 + END IF + 40 CONTINUE + END IF + DO 50 I = IFIRST, ILAST + W( I ) = W( I ) - SIGMA + 50 CONTINUE + LPLUS( N ) = SIGMA +* + RETURN +* +* End of DLARRF +* + END diff --git a/costa/native/external/lapack/dlarrv.f b/costa/native/external/lapack/dlarrv.f new file mode 100644 index 000000000..87eaf65b0 --- /dev/null +++ b/costa/native/external/lapack/dlarrv.f @@ -0,0 +1,419 @@ + SUBROUTINE DLARRV( N, D, L, ISPLIT, M, W, IBLOCK, GERSCH, TOL, Z, + $ LDZ, ISUPPZ, WORK, IWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDZ, M, N + DOUBLE PRECISION TOL +* .. +* .. Array Arguments .. + INTEGER IBLOCK( * ), ISPLIT( * ), ISUPPZ( * ), + $ IWORK( * ) + DOUBLE PRECISION D( * ), GERSCH( * ), L( * ), W( * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* DLARRV computes the eigenvectors of the tridiagonal matrix +* T = L D L^T given L, D and the eigenvalues of L D L^T. +* The input eigenvalues should have high relative accuracy with +* respect to the entries of L and D. The desired accuracy of the +* output can be specified by the input parameter TOL. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix. N >= 0. +* +* D (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the n diagonal elements of the diagonal matrix D. +* On exit, D may be overwritten. +* +* L (input/output) DOUBLE PRECISION array, dimension (N-1) +* On entry, the (n-1) subdiagonal elements of the unit +* bidiagonal matrix L in elements 1 to N-1 of L. L(N) need +* not be set. On exit, L is overwritten. +* +* ISPLIT (input) INTEGER array, dimension (N) +* The splitting points, at which T breaks up into submatrices. +* The first submatrix consists of rows/columns 1 to +* ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 +* through ISPLIT( 2 ), etc. +* +* TOL (input) DOUBLE PRECISION +* The absolute error tolerance for the +* eigenvalues/eigenvectors. +* Errors in the input eigenvalues must be bounded by TOL. +* The eigenvectors output have residual norms +* bounded by TOL, and the dot products between different +* eigenvectors are bounded by TOL. TOL must be at least +* N*EPS*|T|, where EPS is the machine precision and |T| is +* the 1-norm of the tridiagonal matrix. +* +* M (input) INTEGER +* The total number of eigenvalues found. 0 <= M <= N. +* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +* +* W (input) DOUBLE PRECISION array, dimension (N) +* The first M elements of W contain the eigenvalues for +* which eigenvectors are to be computed. The eigenvalues +* should be grouped by split-off block and ordered from +* smallest to largest within the block ( The output array +* W from DLARRE is expected here ). +* Errors in W must be bounded by TOL (see above). +* +* IBLOCK (input) INTEGER array, dimension (N) +* The submatrix indices associated with the corresponding +* eigenvalues in W; IBLOCK(i)=1 if eigenvalue W(i) belongs to +* the first submatrix from the top, =2 if W(i) belongs to +* the second submatrix, etc. +* +* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) ) +* If JOBZ = 'V', then if INFO = 0, the first M columns of Z +* contain the orthonormal eigenvectors of the matrix T +* corresponding to the selected eigenvalues, with the i-th +* column of Z holding the eigenvector associated with W(i). +* If JOBZ = 'N', then Z is not referenced. +* Note: the user must ensure that at least max(1,M) columns are +* supplied in the array Z; if RANGE = 'V', the exact value of M +* is not known in advance and an upper bound must be used. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* ISUPPZ (output) INTEGER ARRAY, dimension ( 2*max(1,M) ) +* The support of the eigenvectors in Z, i.e., the indices +* indicating the nonzero elements in Z. The i-th eigenvector +* is nonzero only in elements ISUPPZ( 2*i-1 ) through +* ISUPPZ( 2*i ). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (13*N) +* +* IWORK (workspace) INTEGER array, dimension (6*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = 1, internal error in DLARRB +* if INFO = 2, internal error in DSTEIN +* +* Further Details +* =============== +* +* Based on contributions by +* Inderjit Dhillon, IBM Almaden, USA +* Osni Marques, LBNL/NERSC, USA +* +* ===================================================================== +* +* .. Parameters .. + INTEGER MGSSIZ + PARAMETER ( MGSSIZ = 20 ) + DOUBLE PRECISION ZERO, ONE, FOUR + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, FOUR = 4.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL MGSCLS + INTEGER I, IBEGIN, IEND, IINDC1, IINDC2, IINDR, IINDWK, + $ IINFO, IM, IN, INDERR, INDGAP, INDLD, INDLLD, + $ INDWRK, ITER, ITMP1, ITMP2, J, JBLK, K, KTOT, + $ LSBDPT, MAXITR, NCLUS, NDEPTH, NDONE, NEWCLS, + $ NEWFRS, NEWFTT, NEWLST, NEWSIZ, NSPLIT, OLDCLS, + $ OLDFST, OLDIEN, OLDLST, OLDNCL, P, Q + DOUBLE PRECISION EPS, GAP, LAMBDA, MGSTOL, MINGMA, MINRGP, + $ NRMINV, RELGAP, RELTOL, RESID, RQCORR, SIGMA, + $ TMP1, ZTZ +* .. +* .. External Functions .. + DOUBLE PRECISION DDOT, DLAMCH, DNRM2 + EXTERNAL DDOT, DLAMCH, DNRM2 +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DLAR1V, DLARRB, DLARRF, DLASET, + $ DSCAL, DSTEIN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, MAX, MIN, SQRT +* .. +* .. Local Arrays .. + INTEGER TEMP( 1 ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INDERR = N + 1 + INDLD = 2*N + INDLLD = 3*N + INDGAP = 4*N + INDWRK = 5*N + 1 +* + IINDR = N + IINDC1 = 2*N + IINDC2 = 3*N + IINDWK = 4*N + 1 +* + EPS = DLAMCH( 'Precision' ) +* + DO 10 I = 1, 2*N + IWORK( I ) = 0 + 10 CONTINUE + DO 20 I = 1, M + WORK( INDERR+I-1 ) = EPS*ABS( W( I ) ) + 20 CONTINUE + CALL DLASET( 'Full', N, N, ZERO, ZERO, Z, LDZ ) + MGSTOL = 5.0D0*EPS +* + NSPLIT = IBLOCK( M ) + IBEGIN = 1 + DO 170 JBLK = 1, NSPLIT + IEND = ISPLIT( JBLK ) +* +* Find the eigenvectors of the submatrix indexed IBEGIN +* through IEND. +* + IF( IBEGIN.EQ.IEND ) THEN + Z( IBEGIN, IBEGIN ) = ONE + ISUPPZ( 2*IBEGIN-1 ) = IBEGIN + ISUPPZ( 2*IBEGIN ) = IBEGIN + IBEGIN = IEND + 1 + GO TO 170 + END IF + OLDIEN = IBEGIN - 1 + IN = IEND - OLDIEN + RELTOL = MIN( 1.0D-2, ONE / DBLE( IN ) ) + IM = IN + CALL DCOPY( IM, W( IBEGIN ), 1, WORK, 1 ) + DO 30 I = 1, IN - 1 + WORK( INDGAP+I ) = WORK( I+1 ) - WORK( I ) + 30 CONTINUE + WORK( INDGAP+IN ) = MAX( ABS( WORK( IN ) ), EPS ) + NDONE = 0 +* + NDEPTH = 0 + LSBDPT = 1 + NCLUS = 1 + IWORK( IINDC1+1 ) = 1 + IWORK( IINDC1+2 ) = IN +* +* While( NDONE.LT.IM ) do +* + 40 CONTINUE + IF( NDONE.LT.IM ) THEN + OLDNCL = NCLUS + NCLUS = 0 + LSBDPT = 1 - LSBDPT + DO 150 I = 1, OLDNCL + IF( LSBDPT.EQ.0 ) THEN + OLDCLS = IINDC1 + NEWCLS = IINDC2 + ELSE + OLDCLS = IINDC2 + NEWCLS = IINDC1 + END IF +* +* If NDEPTH > 1, retrieve the relatively robust +* representation (RRR) and perform limited bisection +* (if necessary) to get approximate eigenvalues. +* + J = OLDCLS + 2*I + OLDFST = IWORK( J-1 ) + OLDLST = IWORK( J ) + IF( NDEPTH.GT.0 ) THEN + J = OLDIEN + OLDFST + CALL DCOPY( IN, Z( IBEGIN, J ), 1, D( IBEGIN ), 1 ) + CALL DCOPY( IN, Z( IBEGIN, J+1 ), 1, L( IBEGIN ), 1 ) + SIGMA = L( IEND ) + END IF + K = IBEGIN + DO 50 J = 1, IN - 1 + WORK( INDLD+J ) = D( K )*L( K ) + WORK( INDLLD+J ) = WORK( INDLD+J )*L( K ) + K = K + 1 + 50 CONTINUE + IF( NDEPTH.GT.0 ) THEN + CALL DLARRB( IN, D( IBEGIN ), L( IBEGIN ), + $ WORK( INDLD+1 ), WORK( INDLLD+1 ), + $ OLDFST, OLDLST, SIGMA, RELTOL, WORK, + $ WORK( INDGAP+1 ), WORK( INDERR ), + $ WORK( INDWRK ), IWORK( IINDWK ), IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = 1 + RETURN + END IF + END IF +* +* Classify eigenvalues of the current representation (RRR) +* as (i) isolated, (ii) loosely clustered or (iii) tightly +* clustered +* + NEWFRS = OLDFST + DO 140 J = OLDFST, OLDLST + IF( J.EQ.OLDLST .OR. WORK( INDGAP+J ).GE.RELTOL* + $ ABS( WORK( J ) ) ) THEN + NEWLST = J + ELSE +* +* continue (to the next loop) +* + RELGAP = WORK( INDGAP+J ) / ABS( WORK( J ) ) + IF( J.EQ.NEWFRS ) THEN + MINRGP = RELGAP + ELSE + MINRGP = MIN( MINRGP, RELGAP ) + END IF + GO TO 140 + END IF + NEWSIZ = NEWLST - NEWFRS + 1 + MAXITR = 10 + NEWFTT = OLDIEN + NEWFRS + IF( NEWSIZ.GT.1 ) THEN + MGSCLS = NEWSIZ.LE.MGSSIZ .AND. MINRGP.GE.MGSTOL + IF( .NOT.MGSCLS ) THEN + CALL DLARRF( IN, D( IBEGIN ), L( IBEGIN ), + $ WORK( INDLD+1 ), WORK( INDLLD+1 ), + $ NEWFRS, NEWLST, WORK, + $ Z( IBEGIN, NEWFTT ), + $ Z( IBEGIN, NEWFTT+1 ), + $ WORK( INDWRK ), IWORK( IINDWK ), + $ INFO ) + IF( INFO.EQ.0 ) THEN + NCLUS = NCLUS + 1 + K = NEWCLS + 2*NCLUS + IWORK( K-1 ) = NEWFRS + IWORK( K ) = NEWLST + ELSE + INFO = 0 + IF( MINRGP.GE.MGSTOL ) THEN + MGSCLS = .TRUE. + ELSE +* +* Call DSTEIN to process this tight cluster. +* This happens only if MINRGP <= MGSTOL +* and DLARRF returns INFO = 1. The latter +* means that a new RRR to "break" the +* cluster could not be found. +* + WORK( INDWRK ) = D( IBEGIN ) + DO 60 K = 1, IN - 1 + WORK( INDWRK+K ) = D( IBEGIN+K ) + + $ WORK( INDLLD+K ) + 60 CONTINUE + DO 70 K = 1, NEWSIZ + IWORK( IINDWK+K-1 ) = 1 + 70 CONTINUE + DO 80 K = NEWFRS, NEWLST + ISUPPZ( 2*( IBEGIN+K )-3 ) = 1 + ISUPPZ( 2*( IBEGIN+K )-2 ) = IN + 80 CONTINUE + TEMP( 1 ) = IN + CALL DSTEIN( IN, WORK( INDWRK ), + $ WORK( INDLD+1 ), NEWSIZ, + $ WORK( NEWFRS ), + $ IWORK( IINDWK ), TEMP( 1 ), + $ Z( IBEGIN, NEWFTT ), LDZ, + $ WORK( INDWRK+IN ), + $ IWORK( IINDWK+IN ), + $ IWORK( IINDWK+2*IN ), IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = 2 + RETURN + END IF + NDONE = NDONE + NEWSIZ + END IF + END IF + END IF + ELSE + MGSCLS = .FALSE. + END IF + IF( NEWSIZ.EQ.1 .OR. MGSCLS ) THEN + KTOT = NEWFTT + DO 100 K = NEWFRS, NEWLST + ITER = 0 + 90 CONTINUE + LAMBDA = WORK( K ) + CALL DLAR1V( IN, 1, IN, LAMBDA, D( IBEGIN ), + $ L( IBEGIN ), WORK( INDLD+1 ), + $ WORK( INDLLD+1 ), + $ GERSCH( 2*OLDIEN+1 ), + $ Z( IBEGIN, KTOT ), ZTZ, MINGMA, + $ IWORK( IINDR+KTOT ), + $ ISUPPZ( 2*KTOT-1 ), + $ WORK( INDWRK ) ) + TMP1 = ONE / ZTZ + NRMINV = SQRT( TMP1 ) + RESID = ABS( MINGMA )*NRMINV + RQCORR = MINGMA*TMP1 + IF( K.EQ.IN ) THEN + GAP = WORK( INDGAP+K-1 ) + ELSE IF( K.EQ.1 ) THEN + GAP = WORK( INDGAP+K ) + ELSE + GAP = MIN( WORK( INDGAP+K-1 ), + $ WORK( INDGAP+K ) ) + END IF + ITER = ITER + 1 + IF( RESID.GT.TOL*GAP .AND. ABS( RQCORR ).GT. + $ FOUR*EPS*ABS( LAMBDA ) ) THEN + WORK( K ) = LAMBDA + RQCORR + IF( ITER.LT.MAXITR ) THEN + GO TO 90 + END IF + END IF + IWORK( KTOT ) = 1 + IF( NEWSIZ.EQ.1 ) + $ NDONE = NDONE + 1 + CALL DSCAL( IN, NRMINV, Z( IBEGIN, KTOT ), 1 ) + KTOT = KTOT + 1 + 100 CONTINUE + IF( NEWSIZ.GT.1 ) THEN + ITMP1 = ISUPPZ( 2*NEWFTT-1 ) + ITMP2 = ISUPPZ( 2*NEWFTT ) + KTOT = OLDIEN + NEWLST + DO 120 P = NEWFTT + 1, KTOT + DO 110 Q = NEWFTT, P - 1 + TMP1 = -DDOT( IN, Z( IBEGIN, P ), 1, + $ Z( IBEGIN, Q ), 1 ) + CALL DAXPY( IN, TMP1, Z( IBEGIN, Q ), 1, + $ Z( IBEGIN, P ), 1 ) + 110 CONTINUE + TMP1 = ONE / DNRM2( IN, Z( IBEGIN, P ), 1 ) + CALL DSCAL( IN, TMP1, Z( IBEGIN, P ), 1 ) + ITMP1 = MIN( ITMP1, ISUPPZ( 2*P-1 ) ) + ITMP2 = MAX( ITMP2, ISUPPZ( 2*P ) ) + 120 CONTINUE + DO 130 P = NEWFTT, KTOT + ISUPPZ( 2*P-1 ) = ITMP1 + ISUPPZ( 2*P ) = ITMP2 + 130 CONTINUE + NDONE = NDONE + NEWSIZ + END IF + END IF + NEWFRS = J + 1 + 140 CONTINUE + 150 CONTINUE + NDEPTH = NDEPTH + 1 + GO TO 40 + END IF + J = 2*IBEGIN + DO 160 I = IBEGIN, IEND + ISUPPZ( J-1 ) = ISUPPZ( J-1 ) + OLDIEN + ISUPPZ( J ) = ISUPPZ( J ) + OLDIEN + J = J + 2 + 160 CONTINUE + IBEGIN = IEND + 1 + 170 CONTINUE +* + RETURN +* +* End of DLARRV +* + END diff --git a/costa/native/external/lapack/dlartg.f b/costa/native/external/lapack/dlartg.f new file mode 100644 index 000000000..c0cd3d142 --- /dev/null +++ b/costa/native/external/lapack/dlartg.f @@ -0,0 +1,143 @@ + SUBROUTINE DLARTG( F, G, CS, SN, R ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + DOUBLE PRECISION CS, F, G, R, SN +* .. +* +* Purpose +* ======= +* +* DLARTG generate a plane rotation so that +* +* [ CS SN ] . [ F ] = [ R ] where CS**2 + SN**2 = 1. +* [ -SN CS ] [ G ] [ 0 ] +* +* This is a slower, more accurate version of the BLAS1 routine DROTG, +* with the following other differences: +* F and G are unchanged on return. +* If G=0, then CS=1 and SN=0. +* If F=0 and (G .ne. 0), then CS=0 and SN=1 without doing any +* floating point operations (saves work in DBDSQR when +* there are zeros on the diagonal). +* +* If F exceeds G in magnitude, CS will be positive. +* +* Arguments +* ========= +* +* F (input) DOUBLE PRECISION +* The first component of vector to be rotated. +* +* G (input) DOUBLE PRECISION +* The second component of vector to be rotated. +* +* CS (output) DOUBLE PRECISION +* The cosine of the rotation. +* +* SN (output) DOUBLE PRECISION +* The sine of the rotation. +* +* R (output) DOUBLE PRECISION +* The nonzero component of the rotated vector. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL FIRST + INTEGER COUNT, I + DOUBLE PRECISION EPS, F1, G1, SAFMIN, SAFMN2, SAFMX2, SCALE +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, LOG, MAX, SQRT +* .. +* .. Save statement .. + SAVE FIRST, SAFMX2, SAFMIN, SAFMN2 +* .. +* .. Data statements .. + DATA FIRST / .TRUE. / +* .. +* .. Executable Statements .. +* + IF( FIRST ) THEN + FIRST = .FALSE. + SAFMIN = DLAMCH( 'S' ) + EPS = DLAMCH( 'E' ) + SAFMN2 = DLAMCH( 'B' )**INT( LOG( SAFMIN / EPS ) / + $ LOG( DLAMCH( 'B' ) ) / TWO ) + SAFMX2 = ONE / SAFMN2 + END IF + IF( G.EQ.ZERO ) THEN + CS = ONE + SN = ZERO + R = F + ELSE IF( F.EQ.ZERO ) THEN + CS = ZERO + SN = ONE + R = G + ELSE + F1 = F + G1 = G + SCALE = MAX( ABS( F1 ), ABS( G1 ) ) + IF( SCALE.GE.SAFMX2 ) THEN + COUNT = 0 + 10 CONTINUE + COUNT = COUNT + 1 + F1 = F1*SAFMN2 + G1 = G1*SAFMN2 + SCALE = MAX( ABS( F1 ), ABS( G1 ) ) + IF( SCALE.GE.SAFMX2 ) + $ GO TO 10 + R = SQRT( F1**2+G1**2 ) + CS = F1 / R + SN = G1 / R + DO 20 I = 1, COUNT + R = R*SAFMX2 + 20 CONTINUE + ELSE IF( SCALE.LE.SAFMN2 ) THEN + COUNT = 0 + 30 CONTINUE + COUNT = COUNT + 1 + F1 = F1*SAFMX2 + G1 = G1*SAFMX2 + SCALE = MAX( ABS( F1 ), ABS( G1 ) ) + IF( SCALE.LE.SAFMN2 ) + $ GO TO 30 + R = SQRT( F1**2+G1**2 ) + CS = F1 / R + SN = G1 / R + DO 40 I = 1, COUNT + R = R*SAFMN2 + 40 CONTINUE + ELSE + R = SQRT( F1**2+G1**2 ) + CS = F1 / R + SN = G1 / R + END IF + IF( ABS( F ).GT.ABS( G ) .AND. CS.LT.ZERO ) THEN + CS = -CS + SN = -SN + R = -R + END IF + END IF + RETURN +* +* End of DLARTG +* + END diff --git a/costa/native/external/lapack/dlartv.f b/costa/native/external/lapack/dlartv.f new file mode 100644 index 000000000..26d15ad45 --- /dev/null +++ b/costa/native/external/lapack/dlartv.f @@ -0,0 +1,77 @@ + SUBROUTINE DLARTV( N, X, INCX, Y, INCY, C, S, INCC ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + INTEGER INCC, INCX, INCY, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( * ), S( * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* DLARTV applies a vector of real plane rotations to elements of the +* real vectors x and y. For i = 1,2,...,n +* +* ( x(i) ) := ( c(i) s(i) ) ( x(i) ) +* ( y(i) ) ( -s(i) c(i) ) ( y(i) ) +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of plane rotations to be applied. +* +* X (input/output) DOUBLE PRECISION array, +* dimension (1+(N-1)*INCX) +* The vector x. +* +* INCX (input) INTEGER +* The increment between elements of X. INCX > 0. +* +* Y (input/output) DOUBLE PRECISION array, +* dimension (1+(N-1)*INCY) +* The vector y. +* +* INCY (input) INTEGER +* The increment between elements of Y. INCY > 0. +* +* C (input) DOUBLE PRECISION array, dimension (1+(N-1)*INCC) +* The cosines of the plane rotations. +* +* S (input) DOUBLE PRECISION array, dimension (1+(N-1)*INCC) +* The sines of the plane rotations. +* +* INCC (input) INTEGER +* The increment between elements of C and S. INCC > 0. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IC, IX, IY + DOUBLE PRECISION XI, YI +* .. +* .. Executable Statements .. +* + IX = 1 + IY = 1 + IC = 1 + DO 10 I = 1, N + XI = X( IX ) + YI = Y( IY ) + X( IX ) = C( IC )*XI + S( IC )*YI + Y( IY ) = C( IC )*YI - S( IC )*XI + IX = IX + INCX + IY = IY + INCY + IC = IC + INCC + 10 CONTINUE + RETURN +* +* End of DLARTV +* + END diff --git a/costa/native/external/lapack/dlaruv.f b/costa/native/external/lapack/dlaruv.f new file mode 100644 index 000000000..003769065 --- /dev/null +++ b/costa/native/external/lapack/dlaruv.f @@ -0,0 +1,368 @@ + SUBROUTINE DLARUV( ISEED, N, X ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + INTEGER N +* .. +* .. Array Arguments .. + INTEGER ISEED( 4 ) + DOUBLE PRECISION X( N ) +* .. +* +* Purpose +* ======= +* +* DLARUV returns a vector of n random real numbers from a uniform (0,1) +* distribution (n <= 128). +* +* This is an auxiliary routine called by DLARNV and ZLARNV. +* +* Arguments +* ========= +* +* ISEED (input/output) INTEGER array, dimension (4) +* On entry, the seed of the random number generator; the array +* elements must be between 0 and 4095, and ISEED(4) must be +* odd. +* On exit, the seed is updated. +* +* N (input) INTEGER +* The number of random numbers to be generated. N <= 128. +* +* X (output) DOUBLE PRECISION array, dimension (N) +* The generated random numbers. +* +* Further Details +* =============== +* +* This routine uses a multiplicative congruential method with modulus +* 2**48 and multiplier 33952834046453 (see G.S.Fishman, +* 'Multiplicative congruential random number generators with modulus +* 2**b: an exhaustive analysis for b = 32 and a partial analysis for +* b = 48', Math. Comp. 189, pp 331-344, 1990). +* +* 48-bit integers are stored in 4 integer array elements with 12 bits +* per element. Hence the routine is portable across machines with +* integers of 32 bits or more. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D0 ) + INTEGER LV, IPW2 + DOUBLE PRECISION R + PARAMETER ( LV = 128, IPW2 = 4096, R = ONE / IPW2 ) +* .. +* .. Local Scalars .. + INTEGER I, I1, I2, I3, I4, IT1, IT2, IT3, IT4, J +* .. +* .. Local Arrays .. + INTEGER MM( LV, 4 ) +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MIN, MOD +* .. +* .. Data statements .. + DATA ( MM( 1, J ), J = 1, 4 ) / 494, 322, 2508, + $ 2549 / + DATA ( MM( 2, J ), J = 1, 4 ) / 2637, 789, 3754, + $ 1145 / + DATA ( MM( 3, J ), J = 1, 4 ) / 255, 1440, 1766, + $ 2253 / + DATA ( MM( 4, J ), J = 1, 4 ) / 2008, 752, 3572, + $ 305 / + DATA ( MM( 5, J ), J = 1, 4 ) / 1253, 2859, 2893, + $ 3301 / + DATA ( MM( 6, J ), J = 1, 4 ) / 3344, 123, 307, + $ 1065 / + DATA ( MM( 7, J ), J = 1, 4 ) / 4084, 1848, 1297, + $ 3133 / + DATA ( MM( 8, J ), J = 1, 4 ) / 1739, 643, 3966, + $ 2913 / + DATA ( MM( 9, J ), J = 1, 4 ) / 3143, 2405, 758, + $ 3285 / + DATA ( MM( 10, J ), J = 1, 4 ) / 3468, 2638, 2598, + $ 1241 / + DATA ( MM( 11, J ), J = 1, 4 ) / 688, 2344, 3406, + $ 1197 / + DATA ( MM( 12, J ), J = 1, 4 ) / 1657, 46, 2922, + $ 3729 / + DATA ( MM( 13, J ), J = 1, 4 ) / 1238, 3814, 1038, + $ 2501 / + DATA ( MM( 14, J ), J = 1, 4 ) / 3166, 913, 2934, + $ 1673 / + DATA ( MM( 15, J ), J = 1, 4 ) / 1292, 3649, 2091, + $ 541 / + DATA ( MM( 16, J ), J = 1, 4 ) / 3422, 339, 2451, + $ 2753 / + DATA ( MM( 17, J ), J = 1, 4 ) / 1270, 3808, 1580, + $ 949 / + DATA ( MM( 18, J ), J = 1, 4 ) / 2016, 822, 1958, + $ 2361 / + DATA ( MM( 19, J ), J = 1, 4 ) / 154, 2832, 2055, + $ 1165 / + DATA ( MM( 20, J ), J = 1, 4 ) / 2862, 3078, 1507, + $ 4081 / + DATA ( MM( 21, J ), J = 1, 4 ) / 697, 3633, 1078, + $ 2725 / + DATA ( MM( 22, J ), J = 1, 4 ) / 1706, 2970, 3273, + $ 3305 / + DATA ( MM( 23, J ), J = 1, 4 ) / 491, 637, 17, + $ 3069 / + DATA ( MM( 24, J ), J = 1, 4 ) / 931, 2249, 854, + $ 3617 / + DATA ( MM( 25, J ), J = 1, 4 ) / 1444, 2081, 2916, + $ 3733 / + DATA ( MM( 26, J ), J = 1, 4 ) / 444, 4019, 3971, + $ 409 / + DATA ( MM( 27, J ), J = 1, 4 ) / 3577, 1478, 2889, + $ 2157 / + DATA ( MM( 28, J ), J = 1, 4 ) / 3944, 242, 3831, + $ 1361 / + DATA ( MM( 29, J ), J = 1, 4 ) / 2184, 481, 2621, + $ 3973 / + DATA ( MM( 30, J ), J = 1, 4 ) / 1661, 2075, 1541, + $ 1865 / + DATA ( MM( 31, J ), J = 1, 4 ) / 3482, 4058, 893, + $ 2525 / + DATA ( MM( 32, J ), J = 1, 4 ) / 657, 622, 736, + $ 1409 / + DATA ( MM( 33, J ), J = 1, 4 ) / 3023, 3376, 3992, + $ 3445 / + DATA ( MM( 34, J ), J = 1, 4 ) / 3618, 812, 787, + $ 3577 / + DATA ( MM( 35, J ), J = 1, 4 ) / 1267, 234, 2125, + $ 77 / + DATA ( MM( 36, J ), J = 1, 4 ) / 1828, 641, 2364, + $ 3761 / + DATA ( MM( 37, J ), J = 1, 4 ) / 164, 4005, 2460, + $ 2149 / + DATA ( MM( 38, J ), J = 1, 4 ) / 3798, 1122, 257, + $ 1449 / + DATA ( MM( 39, J ), J = 1, 4 ) / 3087, 3135, 1574, + $ 3005 / + DATA ( MM( 40, J ), J = 1, 4 ) / 2400, 2640, 3912, + $ 225 / + DATA ( MM( 41, J ), J = 1, 4 ) / 2870, 2302, 1216, + $ 85 / + DATA ( MM( 42, J ), J = 1, 4 ) / 3876, 40, 3248, + $ 3673 / + DATA ( MM( 43, J ), J = 1, 4 ) / 1905, 1832, 3401, + $ 3117 / + DATA ( MM( 44, J ), J = 1, 4 ) / 1593, 2247, 2124, + $ 3089 / + DATA ( MM( 45, J ), J = 1, 4 ) / 1797, 2034, 2762, + $ 1349 / + DATA ( MM( 46, J ), J = 1, 4 ) / 1234, 2637, 149, + $ 2057 / + DATA ( MM( 47, J ), J = 1, 4 ) / 3460, 1287, 2245, + $ 413 / + DATA ( MM( 48, J ), J = 1, 4 ) / 328, 1691, 166, + $ 65 / + DATA ( MM( 49, J ), J = 1, 4 ) / 2861, 496, 466, + $ 1845 / + DATA ( MM( 50, J ), J = 1, 4 ) / 1950, 1597, 4018, + $ 697 / + DATA ( MM( 51, J ), J = 1, 4 ) / 617, 2394, 1399, + $ 3085 / + DATA ( MM( 52, J ), J = 1, 4 ) / 2070, 2584, 190, + $ 3441 / + DATA ( MM( 53, J ), J = 1, 4 ) / 3331, 1843, 2879, + $ 1573 / + DATA ( MM( 54, J ), J = 1, 4 ) / 769, 336, 153, + $ 3689 / + DATA ( MM( 55, J ), J = 1, 4 ) / 1558, 1472, 2320, + $ 2941 / + DATA ( MM( 56, J ), J = 1, 4 ) / 2412, 2407, 18, + $ 929 / + DATA ( MM( 57, J ), J = 1, 4 ) / 2800, 433, 712, + $ 533 / + DATA ( MM( 58, J ), J = 1, 4 ) / 189, 2096, 2159, + $ 2841 / + DATA ( MM( 59, J ), J = 1, 4 ) / 287, 1761, 2318, + $ 4077 / + DATA ( MM( 60, J ), J = 1, 4 ) / 2045, 2810, 2091, + $ 721 / + DATA ( MM( 61, J ), J = 1, 4 ) / 1227, 566, 3443, + $ 2821 / + DATA ( MM( 62, J ), J = 1, 4 ) / 2838, 442, 1510, + $ 2249 / + DATA ( MM( 63, J ), J = 1, 4 ) / 209, 41, 449, + $ 2397 / + DATA ( MM( 64, J ), J = 1, 4 ) / 2770, 1238, 1956, + $ 2817 / + DATA ( MM( 65, J ), J = 1, 4 ) / 3654, 1086, 2201, + $ 245 / + DATA ( MM( 66, J ), J = 1, 4 ) / 3993, 603, 3137, + $ 1913 / + DATA ( MM( 67, J ), J = 1, 4 ) / 192, 840, 3399, + $ 1997 / + DATA ( MM( 68, J ), J = 1, 4 ) / 2253, 3168, 1321, + $ 3121 / + DATA ( MM( 69, J ), J = 1, 4 ) / 3491, 1499, 2271, + $ 997 / + DATA ( MM( 70, J ), J = 1, 4 ) / 2889, 1084, 3667, + $ 1833 / + DATA ( MM( 71, J ), J = 1, 4 ) / 2857, 3438, 2703, + $ 2877 / + DATA ( MM( 72, J ), J = 1, 4 ) / 2094, 2408, 629, + $ 1633 / + DATA ( MM( 73, J ), J = 1, 4 ) / 1818, 1589, 2365, + $ 981 / + DATA ( MM( 74, J ), J = 1, 4 ) / 688, 2391, 2431, + $ 2009 / + DATA ( MM( 75, J ), J = 1, 4 ) / 1407, 288, 1113, + $ 941 / + DATA ( MM( 76, J ), J = 1, 4 ) / 634, 26, 3922, + $ 2449 / + DATA ( MM( 77, J ), J = 1, 4 ) / 3231, 512, 2554, + $ 197 / + DATA ( MM( 78, J ), J = 1, 4 ) / 815, 1456, 184, + $ 2441 / + DATA ( MM( 79, J ), J = 1, 4 ) / 3524, 171, 2099, + $ 285 / + DATA ( MM( 80, J ), J = 1, 4 ) / 1914, 1677, 3228, + $ 1473 / + DATA ( MM( 81, J ), J = 1, 4 ) / 516, 2657, 4012, + $ 2741 / + DATA ( MM( 82, J ), J = 1, 4 ) / 164, 2270, 1921, + $ 3129 / + DATA ( MM( 83, J ), J = 1, 4 ) / 303, 2587, 3452, + $ 909 / + DATA ( MM( 84, J ), J = 1, 4 ) / 2144, 2961, 3901, + $ 2801 / + DATA ( MM( 85, J ), J = 1, 4 ) / 3480, 1970, 572, + $ 421 / + DATA ( MM( 86, J ), J = 1, 4 ) / 119, 1817, 3309, + $ 4073 / + DATA ( MM( 87, J ), J = 1, 4 ) / 3357, 676, 3171, + $ 2813 / + DATA ( MM( 88, J ), J = 1, 4 ) / 837, 1410, 817, + $ 2337 / + DATA ( MM( 89, J ), J = 1, 4 ) / 2826, 3723, 3039, + $ 1429 / + DATA ( MM( 90, J ), J = 1, 4 ) / 2332, 2803, 1696, + $ 1177 / + DATA ( MM( 91, J ), J = 1, 4 ) / 2089, 3185, 1256, + $ 1901 / + DATA ( MM( 92, J ), J = 1, 4 ) / 3780, 184, 3715, + $ 81 / + DATA ( MM( 93, J ), J = 1, 4 ) / 1700, 663, 2077, + $ 1669 / + DATA ( MM( 94, J ), J = 1, 4 ) / 3712, 499, 3019, + $ 2633 / + DATA ( MM( 95, J ), J = 1, 4 ) / 150, 3784, 1497, + $ 2269 / + DATA ( MM( 96, J ), J = 1, 4 ) / 2000, 1631, 1101, + $ 129 / + DATA ( MM( 97, J ), J = 1, 4 ) / 3375, 1925, 717, + $ 1141 / + DATA ( MM( 98, J ), J = 1, 4 ) / 1621, 3912, 51, + $ 249 / + DATA ( MM( 99, J ), J = 1, 4 ) / 3090, 1398, 981, + $ 3917 / + DATA ( MM( 100, J ), J = 1, 4 ) / 3765, 1349, 1978, + $ 2481 / + DATA ( MM( 101, J ), J = 1, 4 ) / 1149, 1441, 1813, + $ 3941 / + DATA ( MM( 102, J ), J = 1, 4 ) / 3146, 2224, 3881, + $ 2217 / + DATA ( MM( 103, J ), J = 1, 4 ) / 33, 2411, 76, + $ 2749 / + DATA ( MM( 104, J ), J = 1, 4 ) / 3082, 1907, 3846, + $ 3041 / + DATA ( MM( 105, J ), J = 1, 4 ) / 2741, 3192, 3694, + $ 1877 / + DATA ( MM( 106, J ), J = 1, 4 ) / 359, 2786, 1682, + $ 345 / + DATA ( MM( 107, J ), J = 1, 4 ) / 3316, 382, 124, + $ 2861 / + DATA ( MM( 108, J ), J = 1, 4 ) / 1749, 37, 1660, + $ 1809 / + DATA ( MM( 109, J ), J = 1, 4 ) / 185, 759, 3997, + $ 3141 / + DATA ( MM( 110, J ), J = 1, 4 ) / 2784, 2948, 479, + $ 2825 / + DATA ( MM( 111, J ), J = 1, 4 ) / 2202, 1862, 1141, + $ 157 / + DATA ( MM( 112, J ), J = 1, 4 ) / 2199, 3802, 886, + $ 2881 / + DATA ( MM( 113, J ), J = 1, 4 ) / 1364, 2423, 3514, + $ 3637 / + DATA ( MM( 114, J ), J = 1, 4 ) / 1244, 2051, 1301, + $ 1465 / + DATA ( MM( 115, J ), J = 1, 4 ) / 2020, 2295, 3604, + $ 2829 / + DATA ( MM( 116, J ), J = 1, 4 ) / 3160, 1332, 1888, + $ 2161 / + DATA ( MM( 117, J ), J = 1, 4 ) / 2785, 1832, 1836, + $ 3365 / + DATA ( MM( 118, J ), J = 1, 4 ) / 2772, 2405, 1990, + $ 361 / + DATA ( MM( 119, J ), J = 1, 4 ) / 1217, 3638, 2058, + $ 2685 / + DATA ( MM( 120, J ), J = 1, 4 ) / 1822, 3661, 692, + $ 3745 / + DATA ( MM( 121, J ), J = 1, 4 ) / 1245, 327, 1194, + $ 2325 / + DATA ( MM( 122, J ), J = 1, 4 ) / 2252, 3660, 20, + $ 3609 / + DATA ( MM( 123, J ), J = 1, 4 ) / 3904, 716, 3285, + $ 3821 / + DATA ( MM( 124, J ), J = 1, 4 ) / 2774, 1842, 2046, + $ 3537 / + DATA ( MM( 125, J ), J = 1, 4 ) / 997, 3987, 2107, + $ 517 / + DATA ( MM( 126, J ), J = 1, 4 ) / 2573, 1368, 3508, + $ 3017 / + DATA ( MM( 127, J ), J = 1, 4 ) / 1148, 1848, 3525, + $ 2141 / + DATA ( MM( 128, J ), J = 1, 4 ) / 545, 2366, 3801, + $ 1537 / +* .. +* .. Executable Statements .. +* + I1 = ISEED( 1 ) + I2 = ISEED( 2 ) + I3 = ISEED( 3 ) + I4 = ISEED( 4 ) +* + DO 10 I = 1, MIN( N, LV ) +* +* Multiply the seed by i-th power of the multiplier modulo 2**48 +* + IT4 = I4*MM( I, 4 ) + IT3 = IT4 / IPW2 + IT4 = IT4 - IPW2*IT3 + IT3 = IT3 + I3*MM( I, 4 ) + I4*MM( I, 3 ) + IT2 = IT3 / IPW2 + IT3 = IT3 - IPW2*IT2 + IT2 = IT2 + I2*MM( I, 4 ) + I3*MM( I, 3 ) + I4*MM( I, 2 ) + IT1 = IT2 / IPW2 + IT2 = IT2 - IPW2*IT1 + IT1 = IT1 + I1*MM( I, 4 ) + I2*MM( I, 3 ) + I3*MM( I, 2 ) + + $ I4*MM( I, 1 ) + IT1 = MOD( IT1, IPW2 ) +* +* Convert 48-bit integer to a real number in the interval (0,1) +* + X( I ) = R*( DBLE( IT1 )+R*( DBLE( IT2 )+R*( DBLE( IT3 )+R* + $ DBLE( IT4 ) ) ) ) + 10 CONTINUE +* +* Return final value of seed +* + ISEED( 1 ) = IT1 + ISEED( 2 ) = IT2 + ISEED( 3 ) = IT3 + ISEED( 4 ) = IT4 + RETURN +* +* End of DLARUV +* + END diff --git a/costa/native/external/lapack/dlarz.f b/costa/native/external/lapack/dlarz.f new file mode 100644 index 000000000..ab0e776dd --- /dev/null +++ b/costa/native/external/lapack/dlarz.f @@ -0,0 +1,153 @@ + SUBROUTINE DLARZ( SIDE, M, N, L, V, INCV, TAU, C, LDC, WORK ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, L, LDC, M, N + DOUBLE PRECISION TAU +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( LDC, * ), V( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DLARZ applies a real elementary reflector H to a real M-by-N +* matrix C, from either the left or the right. H is represented in the +* form +* +* H = I - tau * v * v' +* +* where tau is a real scalar and v is a real vector. +* +* If tau = 0, then H is taken to be the unit matrix. +* +* +* H is a product of k elementary reflectors as returned by DTZRZF. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': form H * C +* = 'R': form C * H +* +* M (input) INTEGER +* The number of rows of the matrix C. +* +* N (input) INTEGER +* The number of columns of the matrix C. +* +* L (input) INTEGER +* The number of entries of the vector V containing +* the meaningful part of the Householder vectors. +* If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. +* +* V (input) DOUBLE PRECISION array, dimension (1+(L-1)*abs(INCV)) +* The vector v in the representation of H as returned by +* DTZRZF. V is not used if TAU = 0. +* +* INCV (input) INTEGER +* The increment between elements of v. INCV <> 0. +* +* TAU (input) DOUBLE PRECISION +* The value tau in the representation of H. +* +* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by the matrix H * C if SIDE = 'L', +* or C * H if SIDE = 'R'. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) DOUBLE PRECISION array, dimension +* (N) if SIDE = 'L' +* or (M) if SIDE = 'R' +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DGEMV, DGER +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C +* + IF( TAU.NE.ZERO ) THEN +* +* w( 1:n ) = C( 1, 1:n ) +* + CALL DCOPY( N, C, LDC, WORK, 1 ) +* +* w( 1:n ) = w( 1:n ) + C( m-l+1:m, 1:n )' * v( 1:l ) +* + CALL DGEMV( 'Transpose', L, N, ONE, C( M-L+1, 1 ), LDC, V, + $ INCV, ONE, WORK, 1 ) +* +* C( 1, 1:n ) = C( 1, 1:n ) - tau * w( 1:n ) +* + CALL DAXPY( N, -TAU, WORK, 1, C, LDC ) +* +* C( m-l+1:m, 1:n ) = C( m-l+1:m, 1:n ) - ... +* tau * v( 1:l ) * w( 1:n )' +* + CALL DGER( L, N, -TAU, V, INCV, WORK, 1, C( M-L+1, 1 ), + $ LDC ) + END IF +* + ELSE +* +* Form C * H +* + IF( TAU.NE.ZERO ) THEN +* +* w( 1:m ) = C( 1:m, 1 ) +* + CALL DCOPY( M, C, 1, WORK, 1 ) +* +* w( 1:m ) = w( 1:m ) + C( 1:m, n-l+1:n, 1:n ) * v( 1:l ) +* + CALL DGEMV( 'No transpose', M, L, ONE, C( 1, N-L+1 ), LDC, + $ V, INCV, ONE, WORK, 1 ) +* +* C( 1:m, 1 ) = C( 1:m, 1 ) - tau * w( 1:m ) +* + CALL DAXPY( M, -TAU, WORK, 1, C, 1 ) +* +* C( 1:m, n-l+1:n ) = C( 1:m, n-l+1:n ) - ... +* tau * w( 1:m ) * v( 1:l )' +* + CALL DGER( M, L, -TAU, WORK, 1, V, INCV, C( 1, N-L+1 ), + $ LDC ) +* + END IF +* + END IF +* + RETURN +* +* End of DLARZ +* + END diff --git a/costa/native/external/lapack/dlarzb.f b/costa/native/external/lapack/dlarzb.f new file mode 100644 index 000000000..6ffd9cc1e --- /dev/null +++ b/costa/native/external/lapack/dlarzb.f @@ -0,0 +1,221 @@ + SUBROUTINE DLARZB( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, V, + $ LDV, T, LDT, C, LDC, WORK, LDWORK ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* December 1, 1999 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, SIDE, STOREV, TRANS + INTEGER K, L, LDC, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( LDC, * ), T( LDT, * ), V( LDV, * ), + $ WORK( LDWORK, * ) +* .. +* +* Purpose +* ======= +* +* DLARZB applies a real block reflector H or its transpose H**T to +* a real distributed M-by-N C from the left or the right. +* +* Currently, only STOREV = 'R' and DIRECT = 'B' are supported. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply H or H' from the Left +* = 'R': apply H or H' from the Right +* +* TRANS (input) CHARACTER*1 +* = 'N': apply H (No transpose) +* = 'C': apply H' (Transpose) +* +* DIRECT (input) CHARACTER*1 +* Indicates how H is formed from a product of elementary +* reflectors +* = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet) +* = 'B': H = H(k) . . . H(2) H(1) (Backward) +* +* STOREV (input) CHARACTER*1 +* Indicates how the vectors which define the elementary +* reflectors are stored: +* = 'C': Columnwise (not supported yet) +* = 'R': Rowwise +* +* M (input) INTEGER +* The number of rows of the matrix C. +* +* N (input) INTEGER +* The number of columns of the matrix C. +* +* K (input) INTEGER +* The order of the matrix T (= the number of elementary +* reflectors whose product defines the block reflector). +* +* L (input) INTEGER +* The number of columns of the matrix V containing the +* meaningful part of the Householder reflectors. +* If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. +* +* V (input) DOUBLE PRECISION array, dimension (LDV,NV). +* If STOREV = 'C', NV = K; if STOREV = 'R', NV = L. +* +* LDV (input) INTEGER +* The leading dimension of the array V. +* If STOREV = 'C', LDV >= L; if STOREV = 'R', LDV >= K. +* +* T (input) DOUBLE PRECISION array, dimension (LDT,K) +* The triangular K-by-K matrix T in the representation of the +* block reflector. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= K. +* +* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by H*C or H'*C or C*H or C*H'. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (LDWORK,K) +* +* LDWORK (input) INTEGER +* The leading dimension of the array WORK. +* If SIDE = 'L', LDWORK >= max(1,N); +* if SIDE = 'R', LDWORK >= max(1,M). +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + CHARACTER TRANST + INTEGER I, INFO, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEMM, DTRMM, XERBLA +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) + $ RETURN +* +* Check for currently supported options +* + INFO = 0 + IF( .NOT.LSAME( DIRECT, 'B' ) ) THEN + INFO = -3 + ELSE IF( .NOT.LSAME( STOREV, 'R' ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLARZB', -INFO ) + RETURN + END IF +* + IF( LSAME( TRANS, 'N' ) ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H' * C +* +* W( 1:n, 1:k ) = C( 1:k, 1:n )' +* + DO 10 J = 1, K + CALL DCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + 10 CONTINUE +* +* W( 1:n, 1:k ) = W( 1:n, 1:k ) + ... +* C( m-l+1:m, 1:n )' * V( 1:k, 1:l )' +* + IF( L.GT.0 ) + $ CALL DGEMM( 'Transpose', 'Transpose', N, K, L, ONE, + $ C( M-L+1, 1 ), LDC, V, LDV, ONE, WORK, LDWORK ) +* +* W( 1:n, 1:k ) = W( 1:n, 1:k ) * T' or W( 1:m, 1:k ) * T +* + CALL DTRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K, ONE, T, + $ LDT, WORK, LDWORK ) +* +* C( 1:k, 1:n ) = C( 1:k, 1:n ) - W( 1:n, 1:k )' +* + DO 30 J = 1, N + DO 20 I = 1, K + C( I, J ) = C( I, J ) - WORK( J, I ) + 20 CONTINUE + 30 CONTINUE +* +* C( m-l+1:m, 1:n ) = C( m-l+1:m, 1:n ) - ... +* V( 1:k, 1:l )' * W( 1:n, 1:k )' +* + IF( L.GT.0 ) + $ CALL DGEMM( 'Transpose', 'Transpose', L, N, K, -ONE, V, LDV, + $ WORK, LDWORK, ONE, C( M-L+1, 1 ), LDC ) +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H' +* +* W( 1:m, 1:k ) = C( 1:m, 1:k ) +* + DO 40 J = 1, K + CALL DCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 ) + 40 CONTINUE +* +* W( 1:m, 1:k ) = W( 1:m, 1:k ) + ... +* C( 1:m, n-l+1:n ) * V( 1:k, 1:l )' +* + IF( L.GT.0 ) + $ CALL DGEMM( 'No transpose', 'Transpose', M, K, L, ONE, + $ C( 1, N-L+1 ), LDC, V, LDV, ONE, WORK, LDWORK ) +* +* W( 1:m, 1:k ) = W( 1:m, 1:k ) * T or W( 1:m, 1:k ) * T' +* + CALL DTRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K, ONE, T, + $ LDT, WORK, LDWORK ) +* +* C( 1:m, 1:k ) = C( 1:m, 1:k ) - W( 1:m, 1:k ) +* + DO 60 J = 1, K + DO 50 I = 1, M + C( I, J ) = C( I, J ) - WORK( I, J ) + 50 CONTINUE + 60 CONTINUE +* +* C( 1:m, n-l+1:n ) = C( 1:m, n-l+1:n ) - ... +* W( 1:m, 1:k ) * V( 1:k, 1:l ) +* + IF( L.GT.0 ) + $ CALL DGEMM( 'No transpose', 'No transpose', M, L, K, -ONE, + $ WORK, LDWORK, V, LDV, ONE, C( 1, N-L+1 ), LDC ) +* + END IF +* + RETURN +* +* End of DLARZB +* + END diff --git a/costa/native/external/lapack/dlarzt.f b/costa/native/external/lapack/dlarzt.f new file mode 100644 index 000000000..06a12d6d0 --- /dev/null +++ b/costa/native/external/lapack/dlarzt.f @@ -0,0 +1,185 @@ + SUBROUTINE DLARZT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* Purpose +* ======= +* +* DLARZT forms the triangular factor T of a real block reflector +* H of order > n, which is defined as a product of k elementary +* reflectors. +* +* If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; +* +* If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. +* +* If STOREV = 'C', the vector which defines the elementary reflector +* H(i) is stored in the i-th column of the array V, and +* +* H = I - V * T * V' +* +* If STOREV = 'R', the vector which defines the elementary reflector +* H(i) is stored in the i-th row of the array V, and +* +* H = I - V' * T * V +* +* Currently, only STOREV = 'R' and DIRECT = 'B' are supported. +* +* Arguments +* ========= +* +* DIRECT (input) CHARACTER*1 +* Specifies the order in which the elementary reflectors are +* multiplied to form the block reflector: +* = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet) +* = 'B': H = H(k) . . . H(2) H(1) (Backward) +* +* STOREV (input) CHARACTER*1 +* Specifies how the vectors which define the elementary +* reflectors are stored (see also Further Details): +* = 'C': columnwise (not supported yet) +* = 'R': rowwise +* +* N (input) INTEGER +* The order of the block reflector H. N >= 0. +* +* K (input) INTEGER +* The order of the triangular factor T (= the number of +* elementary reflectors). K >= 1. +* +* V (input/output) DOUBLE PRECISION array, dimension +* (LDV,K) if STOREV = 'C' +* (LDV,N) if STOREV = 'R' +* The matrix V. See further details. +* +* LDV (input) INTEGER +* The leading dimension of the array V. +* If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. +* +* TAU (input) DOUBLE PRECISION array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i). +* +* T (output) DOUBLE PRECISION array, dimension (LDT,K) +* The k by k triangular factor T of the block reflector. +* If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is +* lower triangular. The rest of the array is not used. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= K. +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* The shape of the matrix V and the storage of the vectors which define +* the H(i) is best illustrated by the following example with n = 5 and +* k = 3. The elements equal to 1 are not stored; the corresponding +* array elements are modified but restored on exit. The rest of the +* array is not used. +* +* DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +* +* ______V_____ +* ( v1 v2 v3 ) / \ +* ( v1 v2 v3 ) ( v1 v1 v1 v1 v1 . . . . 1 ) +* V = ( v1 v2 v3 ) ( v2 v2 v2 v2 v2 . . . 1 ) +* ( v1 v2 v3 ) ( v3 v3 v3 v3 v3 . . 1 ) +* ( v1 v2 v3 ) +* . . . +* . . . +* 1 . . +* 1 . +* 1 +* +* DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +* +* ______V_____ +* 1 / \ +* . 1 ( 1 . . . . v1 v1 v1 v1 v1 ) +* . . 1 ( . 1 . . . v2 v2 v2 v2 v2 ) +* . . . ( . . 1 . . v3 v3 v3 v3 v3 ) +* . . . +* ( v1 v2 v3 ) +* ( v1 v2 v3 ) +* V = ( v1 v2 v3 ) +* ( v1 v2 v3 ) +* ( v1 v2 v3 ) +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, J +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, DTRMV, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Check for currently supported options +* + INFO = 0 + IF( .NOT.LSAME( DIRECT, 'B' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( STOREV, 'R' ) ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLARZT', -INFO ) + RETURN + END IF +* + DO 20 I = K, 1, -1 + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO 10 J = I, K + T( J, I ) = ZERO + 10 CONTINUE + ELSE +* +* general case +* + IF( I.LT.K ) THEN +* +* T(i+1:k,i) = - tau(i) * V(i+1:k,1:n) * V(i,1:n)' +* + CALL DGEMV( 'No transpose', K-I, N, -TAU( I ), + $ V( I+1, 1 ), LDV, V( I, 1 ), LDV, ZERO, + $ T( I+1, I ), 1 ) +* +* T(i+1:k,i) = T(i+1:k,i+1:k) * T(i+1:k,i) +* + CALL DTRMV( 'Lower', 'No transpose', 'Non-unit', K-I, + $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) + END IF + T( I, I ) = TAU( I ) + END IF + 20 CONTINUE + RETURN +* +* End of DLARZT +* + END diff --git a/costa/native/external/lapack/dlas2.f b/costa/native/external/lapack/dlas2.f new file mode 100644 index 000000000..55652c087 --- /dev/null +++ b/costa/native/external/lapack/dlas2.f @@ -0,0 +1,122 @@ + SUBROUTINE DLAS2( F, G, H, SSMIN, SSMAX ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + DOUBLE PRECISION F, G, H, SSMAX, SSMIN +* .. +* +* Purpose +* ======= +* +* DLAS2 computes the singular values of the 2-by-2 matrix +* [ F G ] +* [ 0 H ]. +* On return, SSMIN is the smaller singular value and SSMAX is the +* larger singular value. +* +* Arguments +* ========= +* +* F (input) DOUBLE PRECISION +* The (1,1) element of the 2-by-2 matrix. +* +* G (input) DOUBLE PRECISION +* The (1,2) element of the 2-by-2 matrix. +* +* H (input) DOUBLE PRECISION +* The (2,2) element of the 2-by-2 matrix. +* +* SSMIN (output) DOUBLE PRECISION +* The smaller singular value. +* +* SSMAX (output) DOUBLE PRECISION +* The larger singular value. +* +* Further Details +* =============== +* +* Barring over/underflow, all output quantities are correct to within +* a few units in the last place (ulps), even in the absence of a guard +* digit in addition/subtraction. +* +* In IEEE arithmetic, the code works correctly if one matrix element is +* infinite. +* +* Overflow will not occur unless the largest singular value itself +* overflows, or is within a few ulps of overflow. (On machines with +* partial overflow, like the Cray, overflow may occur if the largest +* singular value is within a factor of 2 of overflow.) +* +* Underflow is harmless if underflow is gradual. Otherwise, results +* may correspond to a matrix modified by perturbations of size near +* the underflow threshold. +* +* ==================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION AS, AT, AU, C, FA, FHMN, FHMX, GA, HA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + FA = ABS( F ) + GA = ABS( G ) + HA = ABS( H ) + FHMN = MIN( FA, HA ) + FHMX = MAX( FA, HA ) + IF( FHMN.EQ.ZERO ) THEN + SSMIN = ZERO + IF( FHMX.EQ.ZERO ) THEN + SSMAX = GA + ELSE + SSMAX = MAX( FHMX, GA )*SQRT( ONE+ + $ ( MIN( FHMX, GA ) / MAX( FHMX, GA ) )**2 ) + END IF + ELSE + IF( GA.LT.FHMX ) THEN + AS = ONE + FHMN / FHMX + AT = ( FHMX-FHMN ) / FHMX + AU = ( GA / FHMX )**2 + C = TWO / ( SQRT( AS*AS+AU )+SQRT( AT*AT+AU ) ) + SSMIN = FHMN*C + SSMAX = FHMX / C + ELSE + AU = FHMX / GA + IF( AU.EQ.ZERO ) THEN +* +* Avoid possible harmful underflow if exponent range +* asymmetric (true SSMIN may not underflow even if +* AU underflows) +* + SSMIN = ( FHMN*FHMX ) / GA + SSMAX = GA + ELSE + AS = ONE + FHMN / FHMX + AT = ( FHMX-FHMN ) / FHMX + C = ONE / ( SQRT( ONE+( AS*AU )**2 )+ + $ SQRT( ONE+( AT*AU )**2 ) ) + SSMIN = ( FHMN*C )*AU + SSMIN = SSMIN + SSMIN + SSMAX = GA / ( C+C ) + END IF + END IF + END IF + RETURN +* +* End of DLAS2 +* + END diff --git a/costa/native/external/lapack/dlascl.f b/costa/native/external/lapack/dlascl.f new file mode 100644 index 000000000..4c05d4d79 --- /dev/null +++ b/costa/native/external/lapack/dlascl.f @@ -0,0 +1,268 @@ + SUBROUTINE DLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER TYPE + INTEGER INFO, KL, KU, LDA, M, N + DOUBLE PRECISION CFROM, CTO +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* DLASCL multiplies the M by N real matrix A by the real scalar +* CTO/CFROM. This is done without over/underflow as long as the final +* result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that +* A may be full, upper triangular, lower triangular, upper Hessenberg, +* or banded. +* +* Arguments +* ========= +* +* TYPE (input) CHARACTER*1 +* TYPE indices the storage type of the input matrix. +* = 'G': A is a full matrix. +* = 'L': A is a lower triangular matrix. +* = 'U': A is an upper triangular matrix. +* = 'H': A is an upper Hessenberg matrix. +* = 'B': A is a symmetric band matrix with lower bandwidth KL +* and upper bandwidth KU and with the only the lower +* half stored. +* = 'Q': A is a symmetric band matrix with lower bandwidth KL +* and upper bandwidth KU and with the only the upper +* half stored. +* = 'Z': A is a band matrix with lower bandwidth KL and upper +* bandwidth KU. +* +* KL (input) INTEGER +* The lower bandwidth of A. Referenced only if TYPE = 'B', +* 'Q' or 'Z'. +* +* KU (input) INTEGER +* The upper bandwidth of A. Referenced only if TYPE = 'B', +* 'Q' or 'Z'. +* +* CFROM (input) DOUBLE PRECISION +* CTO (input) DOUBLE PRECISION +* The matrix A is multiplied by CTO/CFROM. A(I,J) is computed +* without over/underflow if the final result CTO*A(I,J)/CFROM +* can be represented without over/underflow. CFROM must be +* nonzero. +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,M) +* The matrix to be multiplied by CTO/CFROM. See TYPE for the +* storage type. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* INFO (output) INTEGER +* 0 - successful exit +* <0 - if INFO = -i, the i-th argument had an illegal value. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL DONE + INTEGER I, ITYPE, J, K1, K2, K3, K4 + DOUBLE PRECISION BIGNUM, CFROM1, CFROMC, CTO1, CTOC, MUL, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 +* + IF( LSAME( TYPE, 'G' ) ) THEN + ITYPE = 0 + ELSE IF( LSAME( TYPE, 'L' ) ) THEN + ITYPE = 1 + ELSE IF( LSAME( TYPE, 'U' ) ) THEN + ITYPE = 2 + ELSE IF( LSAME( TYPE, 'H' ) ) THEN + ITYPE = 3 + ELSE IF( LSAME( TYPE, 'B' ) ) THEN + ITYPE = 4 + ELSE IF( LSAME( TYPE, 'Q' ) ) THEN + ITYPE = 5 + ELSE IF( LSAME( TYPE, 'Z' ) ) THEN + ITYPE = 6 + ELSE + ITYPE = -1 + END IF +* + IF( ITYPE.EQ.-1 ) THEN + INFO = -1 + ELSE IF( CFROM.EQ.ZERO ) THEN + INFO = -4 + ELSE IF( M.LT.0 ) THEN + INFO = -6 + ELSE IF( N.LT.0 .OR. ( ITYPE.EQ.4 .AND. N.NE.M ) .OR. + $ ( ITYPE.EQ.5 .AND. N.NE.M ) ) THEN + INFO = -7 + ELSE IF( ITYPE.LE.3 .AND. LDA.LT.MAX( 1, M ) ) THEN + INFO = -9 + ELSE IF( ITYPE.GE.4 ) THEN + IF( KL.LT.0 .OR. KL.GT.MAX( M-1, 0 ) ) THEN + INFO = -2 + ELSE IF( KU.LT.0 .OR. KU.GT.MAX( N-1, 0 ) .OR. + $ ( ( ITYPE.EQ.4 .OR. ITYPE.EQ.5 ) .AND. KL.NE.KU ) ) + $ THEN + INFO = -3 + ELSE IF( ( ITYPE.EQ.4 .AND. LDA.LT.KL+1 ) .OR. + $ ( ITYPE.EQ.5 .AND. LDA.LT.KU+1 ) .OR. + $ ( ITYPE.EQ.6 .AND. LDA.LT.2*KL+KU+1 ) ) THEN + INFO = -9 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLASCL', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. M.EQ.0 ) + $ RETURN +* +* Get machine parameters +* + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM +* + CFROMC = CFROM + CTOC = CTO +* + 10 CONTINUE + CFROM1 = CFROMC*SMLNUM + CTO1 = CTOC / BIGNUM + IF( ABS( CFROM1 ).GT.ABS( CTOC ) .AND. CTOC.NE.ZERO ) THEN + MUL = SMLNUM + DONE = .FALSE. + CFROMC = CFROM1 + ELSE IF( ABS( CTO1 ).GT.ABS( CFROMC ) ) THEN + MUL = BIGNUM + DONE = .FALSE. + CTOC = CTO1 + ELSE + MUL = CTOC / CFROMC + DONE = .TRUE. + END IF +* + IF( ITYPE.EQ.0 ) THEN +* +* Full matrix +* + DO 30 J = 1, N + DO 20 I = 1, M + A( I, J ) = A( I, J )*MUL + 20 CONTINUE + 30 CONTINUE +* + ELSE IF( ITYPE.EQ.1 ) THEN +* +* Lower triangular matrix +* + DO 50 J = 1, N + DO 40 I = J, M + A( I, J ) = A( I, J )*MUL + 40 CONTINUE + 50 CONTINUE +* + ELSE IF( ITYPE.EQ.2 ) THEN +* +* Upper triangular matrix +* + DO 70 J = 1, N + DO 60 I = 1, MIN( J, M ) + A( I, J ) = A( I, J )*MUL + 60 CONTINUE + 70 CONTINUE +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* Upper Hessenberg matrix +* + DO 90 J = 1, N + DO 80 I = 1, MIN( J+1, M ) + A( I, J ) = A( I, J )*MUL + 80 CONTINUE + 90 CONTINUE +* + ELSE IF( ITYPE.EQ.4 ) THEN +* +* Lower half of a symmetric band matrix +* + K3 = KL + 1 + K4 = N + 1 + DO 110 J = 1, N + DO 100 I = 1, MIN( K3, K4-J ) + A( I, J ) = A( I, J )*MUL + 100 CONTINUE + 110 CONTINUE +* + ELSE IF( ITYPE.EQ.5 ) THEN +* +* Upper half of a symmetric band matrix +* + K1 = KU + 2 + K3 = KU + 1 + DO 130 J = 1, N + DO 120 I = MAX( K1-J, 1 ), K3 + A( I, J ) = A( I, J )*MUL + 120 CONTINUE + 130 CONTINUE +* + ELSE IF( ITYPE.EQ.6 ) THEN +* +* Band matrix +* + K1 = KL + KU + 2 + K2 = KL + 1 + K3 = 2*KL + KU + 1 + K4 = KL + KU + 1 + M + DO 150 J = 1, N + DO 140 I = MAX( K1-J, K2 ), MIN( K3, K4-J ) + A( I, J ) = A( I, J )*MUL + 140 CONTINUE + 150 CONTINUE +* + END IF +* + IF( .NOT.DONE ) + $ GO TO 10 +* + RETURN +* +* End of DLASCL +* + END diff --git a/costa/native/external/lapack/dlasd0.f b/costa/native/external/lapack/dlasd0.f new file mode 100644 index 000000000..f7de695ff --- /dev/null +++ b/costa/native/external/lapack/dlasd0.f @@ -0,0 +1,231 @@ + SUBROUTINE DLASD0( N, SQRE, D, E, U, LDU, VT, LDVT, SMLSIZ, IWORK, + $ WORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDU, LDVT, N, SMLSIZ, SQRE +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION D( * ), E( * ), U( LDU, * ), VT( LDVT, * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* Using a divide and conquer approach, DLASD0 computes the singular +* value decomposition (SVD) of a real upper bidiagonal N-by-M +* matrix B with diagonal D and offdiagonal E, where M = N + SQRE. +* The algorithm computes orthogonal matrices U and VT such that +* B = U * S * VT. The singular values S are overwritten on D. +* +* A related subroutine, DLASDA, computes only the singular values, +* and optionally, the singular vectors in compact form. +* +* Arguments +* ========= +* +* N (input) INTEGER +* On entry, the row dimension of the upper bidiagonal matrix. +* This is also the dimension of the main diagonal array D. +* +* SQRE (input) INTEGER +* Specifies the column dimension of the bidiagonal matrix. +* = 0: The bidiagonal matrix has column dimension M = N; +* = 1: The bidiagonal matrix has column dimension M = N+1; +* +* D (input/output) DOUBLE PRECISION array, dimension (N) +* On entry D contains the main diagonal of the bidiagonal +* matrix. +* On exit D, if INFO = 0, contains its singular values. +* +* E (input) DOUBLE PRECISION array, dimension (M-1) +* Contains the subdiagonal entries of the bidiagonal matrix. +* On exit, E has been destroyed. +* +* U (output) DOUBLE PRECISION array, dimension at least (LDQ, N) +* On exit, U contains the left singular vectors. +* +* LDU (input) INTEGER +* On entry, leading dimension of U. +* +* VT (output) DOUBLE PRECISION array, dimension at least (LDVT, M) +* On exit, VT' contains the right singular vectors. +* +* LDVT (input) INTEGER +* On entry, leading dimension of VT. +* +* SMLSIZ (input) INTEGER +* On entry, maximum size of the subproblems at the +* bottom of the computation tree. +* +* IWORK INTEGER work array. +* Dimension must be at least (8 * N) +* +* WORK DOUBLE PRECISION work array. +* Dimension must be at least (3 * M**2 + 2 * M) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = 1, an singular value did not converge +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, I1, IC, IDXQ, IDXQC, IM1, INODE, ITEMP, IWK, + $ J, LF, LL, LVL, M, NCC, ND, NDB1, NDIML, NDIMR, + $ NL, NLF, NLP1, NLVL, NR, NRF, NRP1, SQREI + DOUBLE PRECISION ALPHA, BETA +* .. +* .. External Subroutines .. + EXTERNAL DLASD1, DLASDQ, DLASDT, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN + INFO = -2 + END IF +* + M = N + SQRE +* + IF( LDU.LT.N ) THEN + INFO = -6 + ELSE IF( LDVT.LT.M ) THEN + INFO = -8 + ELSE IF( SMLSIZ.LT.3 ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLASD0', -INFO ) + RETURN + END IF +* +* If the input matrix is too small, call DLASDQ to find the SVD. +* + IF( N.LE.SMLSIZ ) THEN + CALL DLASDQ( 'U', SQRE, N, M, N, 0, D, E, VT, LDVT, U, LDU, U, + $ LDU, WORK, INFO ) + RETURN + END IF +* +* Set up the computation tree. +* + INODE = 1 + NDIML = INODE + N + NDIMR = NDIML + N + IDXQ = NDIMR + N + IWK = IDXQ + N + CALL DLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ), + $ IWORK( NDIMR ), SMLSIZ ) +* +* For the nodes on bottom level of the tree, solve +* their subproblems by DLASDQ. +* + NDB1 = ( ND+1 ) / 2 + NCC = 0 + DO 30 I = NDB1, ND +* +* IC : center row of each node +* NL : number of rows of left subproblem +* NR : number of rows of right subproblem +* NLF: starting row of the left subproblem +* NRF: starting row of the right subproblem +* + I1 = I - 1 + IC = IWORK( INODE+I1 ) + NL = IWORK( NDIML+I1 ) + NLP1 = NL + 1 + NR = IWORK( NDIMR+I1 ) + NRP1 = NR + 1 + NLF = IC - NL + NRF = IC + 1 + SQREI = 1 + CALL DLASDQ( 'U', SQREI, NL, NLP1, NL, NCC, D( NLF ), E( NLF ), + $ VT( NLF, NLF ), LDVT, U( NLF, NLF ), LDU, + $ U( NLF, NLF ), LDU, WORK, INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + ITEMP = IDXQ + NLF - 2 + DO 10 J = 1, NL + IWORK( ITEMP+J ) = J + 10 CONTINUE + IF( I.EQ.ND ) THEN + SQREI = SQRE + ELSE + SQREI = 1 + END IF + NRP1 = NR + SQREI + CALL DLASDQ( 'U', SQREI, NR, NRP1, NR, NCC, D( NRF ), E( NRF ), + $ VT( NRF, NRF ), LDVT, U( NRF, NRF ), LDU, + $ U( NRF, NRF ), LDU, WORK, INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + ITEMP = IDXQ + IC + DO 20 J = 1, NR + IWORK( ITEMP+J-1 ) = J + 20 CONTINUE + 30 CONTINUE +* +* Now conquer each subproblem bottom-up. +* + DO 50 LVL = NLVL, 1, -1 +* +* Find the first node LF and last node LL on the +* current level LVL. +* + IF( LVL.EQ.1 ) THEN + LF = 1 + LL = 1 + ELSE + LF = 2**( LVL-1 ) + LL = 2*LF - 1 + END IF + DO 40 I = LF, LL + IM1 = I - 1 + IC = IWORK( INODE+IM1 ) + NL = IWORK( NDIML+IM1 ) + NR = IWORK( NDIMR+IM1 ) + NLF = IC - NL + IF( ( SQRE.EQ.0 ) .AND. ( I.EQ.LL ) ) THEN + SQREI = SQRE + ELSE + SQREI = 1 + END IF + IDXQC = IDXQ + NLF - 1 + ALPHA = D( IC ) + BETA = E( IC ) + CALL DLASD1( NL, NR, SQREI, D( NLF ), ALPHA, BETA, + $ U( NLF, NLF ), LDU, VT( NLF, NLF ), LDVT, + $ IWORK( IDXQC ), IWORK( IWK ), WORK, INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + 40 CONTINUE + 50 CONTINUE +* + RETURN +* +* End of DLASD0 +* + END diff --git a/costa/native/external/lapack/dlasd1.f b/costa/native/external/lapack/dlasd1.f new file mode 100644 index 000000000..e3c338275 --- /dev/null +++ b/costa/native/external/lapack/dlasd1.f @@ -0,0 +1,233 @@ + SUBROUTINE DLASD1( NL, NR, SQRE, D, ALPHA, BETA, U, LDU, VT, LDVT, + $ IDXQ, IWORK, WORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDU, LDVT, NL, NR, SQRE + DOUBLE PRECISION ALPHA, BETA +* .. +* .. Array Arguments .. + INTEGER IDXQ( * ), IWORK( * ) + DOUBLE PRECISION D( * ), U( LDU, * ), VT( LDVT, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DLASD1 computes the SVD of an upper bidiagonal N-by-M matrix B, +* where N = NL + NR + 1 and M = N + SQRE. DLASD1 is called from DLASD0. +* +* A related subroutine DLASD7 handles the case in which the singular +* values (and the singular vectors in factored form) are desired. +* +* DLASD1 computes the SVD as follows: +* +* ( D1(in) 0 0 0 ) +* B = U(in) * ( Z1' a Z2' b ) * VT(in) +* ( 0 0 D2(in) 0 ) +* +* = U(out) * ( D(out) 0) * VT(out) +* +* where Z' = (Z1' a Z2' b) = u' VT', and u is a vector of dimension M +* with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros +* elsewhere; and the entry b is empty if SQRE = 0. +* +* The left singular vectors of the original matrix are stored in U, and +* the transpose of the right singular vectors are stored in VT, and the +* singular values are in D. The algorithm consists of three stages: +* +* The first stage consists of deflating the size of the problem +* when there are multiple singular values or when there are zeros in +* the Z vector. For each such occurence the dimension of the +* secular equation problem is reduced by one. This stage is +* performed by the routine DLASD2. +* +* The second stage consists of calculating the updated +* singular values. This is done by finding the square roots of the +* roots of the secular equation via the routine DLASD4 (as called +* by DLASD3). This routine also calculates the singular vectors of +* the current problem. +* +* The final stage consists of computing the updated singular vectors +* directly using the updated singular values. The singular vectors +* for the current problem are multiplied with the singular vectors +* from the overall problem. +* +* Arguments +* ========= +* +* NL (input) INTEGER +* The row dimension of the upper block. NL >= 1. +* +* NR (input) INTEGER +* The row dimension of the lower block. NR >= 1. +* +* SQRE (input) INTEGER +* = 0: the lower block is an NR-by-NR square matrix. +* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. +* +* The bidiagonal matrix has row dimension N = NL + NR + 1, +* and column dimension M = N + SQRE. +* +* D (input/output) DOUBLE PRECISION array, +* dimension (N = NL+NR+1). +* On entry D(1:NL,1:NL) contains the singular values of the +* upper block; and D(NL+2:N) contains the singular values of +* the lower block. On exit D(1:N) contains the singular values +* of the modified matrix. +* +* ALPHA (input) DOUBLE PRECISION +* Contains the diagonal element associated with the added row. +* +* BETA (input) DOUBLE PRECISION +* Contains the off-diagonal element associated with the added +* row. +* +* U (input/output) DOUBLE PRECISION array, dimension(LDU,N) +* On entry U(1:NL, 1:NL) contains the left singular vectors of +* the upper block; U(NL+2:N, NL+2:N) contains the left singular +* vectors of the lower block. On exit U contains the left +* singular vectors of the bidiagonal matrix. +* +* LDU (input) INTEGER +* The leading dimension of the array U. LDU >= max( 1, N ). +* +* VT (input/output) DOUBLE PRECISION array, dimension(LDVT,M) +* where M = N + SQRE. +* On entry VT(1:NL+1, 1:NL+1)' contains the right singular +* vectors of the upper block; VT(NL+2:M, NL+2:M)' contains +* the right singular vectors of the lower block. On exit +* VT' contains the right singular vectors of the +* bidiagonal matrix. +* +* LDVT (input) INTEGER +* The leading dimension of the array VT. LDVT >= max( 1, M ). +* +* IDXQ (output) INTEGER array, dimension(N) +* This contains the permutation which will reintegrate the +* subproblem just solved back into sorted order, i.e. +* D( IDXQ( I = 1, N ) ) will be in ascending order. +* +* IWORK (workspace) INTEGER array, dimension( 4 * N ) +* +* WORK (workspace) DOUBLE PRECISION array, dimension( 3*M**2 + 2*M ) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = 1, an singular value did not converge +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. +* + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER COLTYP, I, IDX, IDXC, IDXP, IQ, ISIGMA, IU2, + $ IVT2, IZ, K, LDQ, LDU2, LDVT2, M, N, N1, N2 + DOUBLE PRECISION ORGNRM +* .. +* .. External Subroutines .. + EXTERNAL DLAMRG, DLASCL, DLASD2, DLASD3, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( NL.LT.1 ) THEN + INFO = -1 + ELSE IF( NR.LT.1 ) THEN + INFO = -2 + ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLASD1', -INFO ) + RETURN + END IF +* + N = NL + NR + 1 + M = N + SQRE +* +* The following values are for bookkeeping purposes only. They are +* integer pointers which indicate the portion of the workspace +* used by a particular array in DLASD2 and DLASD3. +* + LDU2 = N + LDVT2 = M +* + IZ = 1 + ISIGMA = IZ + M + IU2 = ISIGMA + N + IVT2 = IU2 + LDU2*N + IQ = IVT2 + LDVT2*M +* + IDX = 1 + IDXC = IDX + N + COLTYP = IDXC + N + IDXP = COLTYP + N +* +* Scale. +* + ORGNRM = MAX( ABS( ALPHA ), ABS( BETA ) ) + D( NL+1 ) = ZERO + DO 10 I = 1, N + IF( ABS( D( I ) ).GT.ORGNRM ) THEN + ORGNRM = ABS( D( I ) ) + END IF + 10 CONTINUE + CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, INFO ) + ALPHA = ALPHA / ORGNRM + BETA = BETA / ORGNRM +* +* Deflate singular values. +* + CALL DLASD2( NL, NR, SQRE, K, D, WORK( IZ ), ALPHA, BETA, U, LDU, + $ VT, LDVT, WORK( ISIGMA ), WORK( IU2 ), LDU2, + $ WORK( IVT2 ), LDVT2, IWORK( IDXP ), IWORK( IDX ), + $ IWORK( IDXC ), IDXQ, IWORK( COLTYP ), INFO ) +* +* Solve Secular Equation and update singular vectors. +* + LDQ = K + CALL DLASD3( NL, NR, SQRE, K, D, WORK( IQ ), LDQ, WORK( ISIGMA ), + $ U, LDU, WORK( IU2 ), LDU2, VT, LDVT, WORK( IVT2 ), + $ LDVT2, IWORK( IDXC ), IWORK( COLTYP ), WORK( IZ ), + $ INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF +* +* Unscale. +* + CALL DLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO ) +* +* Prepare the IDXQ sorting permutation. +* + N1 = K + N2 = N - K + CALL DLAMRG( N1, N2, D, 1, -1, IDXQ ) +* + RETURN +* +* End of DLASD1 +* + END diff --git a/costa/native/external/lapack/dlasd2.f b/costa/native/external/lapack/dlasd2.f new file mode 100644 index 000000000..9109402c7 --- /dev/null +++ b/costa/native/external/lapack/dlasd2.f @@ -0,0 +1,513 @@ + SUBROUTINE DLASD2( NL, NR, SQRE, K, D, Z, ALPHA, BETA, U, LDU, VT, + $ LDVT, DSIGMA, U2, LDU2, VT2, LDVT2, IDXP, IDX, + $ IDXC, IDXQ, COLTYP, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, +* Courant Institute, NAG Ltd., and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDU, LDU2, LDVT, LDVT2, NL, NR, SQRE + DOUBLE PRECISION ALPHA, BETA +* .. +* .. Array Arguments .. + INTEGER COLTYP( * ), IDX( * ), IDXC( * ), IDXP( * ), + $ IDXQ( * ) + DOUBLE PRECISION D( * ), DSIGMA( * ), U( LDU, * ), + $ U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ), + $ Z( * ) +* .. +* +* Purpose +* ======= +* +* DLASD2 merges the two sets of singular values together into a single +* sorted set. Then it tries to deflate the size of the problem. +* There are two ways in which deflation can occur: when two or more +* singular values are close together or if there is a tiny entry in the +* Z vector. For each such occurrence the order of the related secular +* equation problem is reduced by one. +* +* DLASD2 is called from DLASD1. +* +* Arguments +* ========= +* +* NL (input) INTEGER +* The row dimension of the upper block. NL >= 1. +* +* NR (input) INTEGER +* The row dimension of the lower block. NR >= 1. +* +* SQRE (input) INTEGER +* = 0: the lower block is an NR-by-NR square matrix. +* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. +* +* The bidiagonal matrix has N = NL + NR + 1 rows and +* M = N + SQRE >= N columns. +* +* K (output) INTEGER +* Contains the dimension of the non-deflated matrix, +* This is the order of the related secular equation. 1 <= K <=N. +* +* D (input/output) DOUBLE PRECISION array, dimension(N) +* On entry D contains the singular values of the two submatrices +* to be combined. On exit D contains the trailing (N-K) updated +* singular values (those which were deflated) sorted into +* increasing order. +* +* ALPHA (input) DOUBLE PRECISION +* Contains the diagonal element associated with the added row. +* +* BETA (input) DOUBLE PRECISION +* Contains the off-diagonal element associated with the added +* row. +* +* U (input/output) DOUBLE PRECISION array, dimension(LDU,N) +* On entry U contains the left singular vectors of two +* submatrices in the two square blocks with corners at (1,1), +* (NL, NL), and (NL+2, NL+2), (N,N). +* On exit U contains the trailing (N-K) updated left singular +* vectors (those which were deflated) in its last N-K columns. +* +* LDU (input) INTEGER +* The leading dimension of the array U. LDU >= N. +* +* Z (output) DOUBLE PRECISION array, dimension(N) +* On exit Z contains the updating row vector in the secular +* equation. +* +* DSIGMA (output) DOUBLE PRECISION array, dimension (N) +* Contains a copy of the diagonal elements (K-1 singular values +* and one zero) in the secular equation. +* +* U2 (output) DOUBLE PRECISION array, dimension(LDU2,N) +* Contains a copy of the first K-1 left singular vectors which +* will be used by DLASD3 in a matrix multiply (DGEMM) to solve +* for the new left singular vectors. U2 is arranged into four +* blocks. The first block contains a column with 1 at NL+1 and +* zero everywhere else; the second block contains non-zero +* entries only at and above NL; the third contains non-zero +* entries only below NL+1; and the fourth is dense. +* +* LDU2 (input) INTEGER +* The leading dimension of the array U2. LDU2 >= N. +* +* VT (input/output) DOUBLE PRECISION array, dimension(LDVT,M) +* On entry VT' contains the right singular vectors of two +* submatrices in the two square blocks with corners at (1,1), +* (NL+1, NL+1), and (NL+2, NL+2), (M,M). +* On exit VT' contains the trailing (N-K) updated right singular +* vectors (those which were deflated) in its last N-K columns. +* In case SQRE =1, the last row of VT spans the right null +* space. +* +* LDVT (input) INTEGER +* The leading dimension of the array VT. LDVT >= M. +* +* VT2 (output) DOUBLE PRECISION array, dimension(LDVT2,N) +* VT2' contains a copy of the first K right singular vectors +* which will be used by DLASD3 in a matrix multiply (DGEMM) to +* solve for the new right singular vectors. VT2 is arranged into +* three blocks. The first block contains a row that corresponds +* to the special 0 diagonal element in SIGMA; the second block +* contains non-zeros only at and before NL +1; the third block +* contains non-zeros only at and after NL +2. +* +* LDVT2 (input) INTEGER +* The leading dimension of the array VT2. LDVT2 >= M. +* +* IDXP (workspace) INTEGER array, dimension(N) +* This will contain the permutation used to place deflated +* values of D at the end of the array. On output IDXP(2:K) +* points to the nondeflated D-values and IDXP(K+1:N) +* points to the deflated singular values. +* +* IDX (workspace) INTEGER array, dimension(N) +* This will contain the permutation used to sort the contents of +* D into ascending order. +* +* IDXC (output) INTEGER array, dimension(N) +* This will contain the permutation used to arrange the columns +* of the deflated U matrix into three groups: the first group +* contains non-zero entries only at and above NL, the second +* contains non-zero entries only below NL+2, and the third is +* dense. +* +* COLTYP (workspace/output) INTEGER array, dimension(N) +* As workspace, this will contain a label which will indicate +* which of the following types a column in the U2 matrix or a +* row in the VT2 matrix is: +* 1 : non-zero in the upper half only +* 2 : non-zero in the lower half only +* 3 : dense +* 4 : deflated +* +* On exit, it is an array of dimension 4, with COLTYP(I) being +* the dimension of the I-th type columns. +* +* IDXQ (input) INTEGER array, dimension(N) +* This contains the permutation which separately sorts the two +* sub-problems in D into ascending order. Note that entries in +* the first hlaf of this permutation must first be moved one +* position backward; and entries in the second half +* must first have NL+1 added to their values. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO, EIGHT + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0, + $ EIGHT = 8.0D+0 ) +* .. +* .. Local Arrays .. + INTEGER CTOT( 4 ), PSM( 4 ) +* .. +* .. Local Scalars .. + INTEGER CT, I, IDXI, IDXJ, IDXJP, J, JP, JPREV, K2, M, + $ N, NLP1, NLP2 + DOUBLE PRECISION C, EPS, HLFTOL, S, TAU, TOL, Z1 +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, DLAPY2 + EXTERNAL DLAMCH, DLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLACPY, DLAMRG, DLASET, DROT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( NL.LT.1 ) THEN + INFO = -1 + ELSE IF( NR.LT.1 ) THEN + INFO = -2 + ELSE IF( ( SQRE.NE.1 ) .AND. ( SQRE.NE.0 ) ) THEN + INFO = -3 + END IF +* + N = NL + NR + 1 + M = N + SQRE +* + IF( LDU.LT.N ) THEN + INFO = -10 + ELSE IF( LDVT.LT.M ) THEN + INFO = -12 + ELSE IF( LDU2.LT.N ) THEN + INFO = -15 + ELSE IF( LDVT2.LT.M ) THEN + INFO = -17 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLASD2', -INFO ) + RETURN + END IF +* + NLP1 = NL + 1 + NLP2 = NL + 2 +* +* Generate the first part of the vector Z; and move the singular +* values in the first part of D one position backward. +* + Z1 = ALPHA*VT( NLP1, NLP1 ) + Z( 1 ) = Z1 + DO 10 I = NL, 1, -1 + Z( I+1 ) = ALPHA*VT( I, NLP1 ) + D( I+1 ) = D( I ) + IDXQ( I+1 ) = IDXQ( I ) + 1 + 10 CONTINUE +* +* Generate the second part of the vector Z. +* + DO 20 I = NLP2, M + Z( I ) = BETA*VT( I, NLP2 ) + 20 CONTINUE +* +* Initialize some reference arrays. +* + DO 30 I = 2, NLP1 + COLTYP( I ) = 1 + 30 CONTINUE + DO 40 I = NLP2, N + COLTYP( I ) = 2 + 40 CONTINUE +* +* Sort the singular values into increasing order +* + DO 50 I = NLP2, N + IDXQ( I ) = IDXQ( I ) + NLP1 + 50 CONTINUE +* +* DSIGMA, IDXC, IDXC, and the first column of U2 +* are used as storage space. +* + DO 60 I = 2, N + DSIGMA( I ) = D( IDXQ( I ) ) + U2( I, 1 ) = Z( IDXQ( I ) ) + IDXC( I ) = COLTYP( IDXQ( I ) ) + 60 CONTINUE +* + CALL DLAMRG( NL, NR, DSIGMA( 2 ), 1, 1, IDX( 2 ) ) +* + DO 70 I = 2, N + IDXI = 1 + IDX( I ) + D( I ) = DSIGMA( IDXI ) + Z( I ) = U2( IDXI, 1 ) + COLTYP( I ) = IDXC( IDXI ) + 70 CONTINUE +* +* Calculate the allowable deflation tolerance +* + EPS = DLAMCH( 'Epsilon' ) + TOL = MAX( ABS( ALPHA ), ABS( BETA ) ) + TOL = EIGHT*EPS*MAX( ABS( D( N ) ), TOL ) +* +* There are 2 kinds of deflation -- first a value in the z-vector +* is small, second two (or more) singular values are very close +* together (their difference is small). +* +* If the value in the z-vector is small, we simply permute the +* array so that the corresponding singular value is moved to the +* end. +* +* If two values in the D-vector are close, we perform a two-sided +* rotation designed to make one of the corresponding z-vector +* entries zero, and then permute the array so that the deflated +* singular value is moved to the end. +* +* If there are multiple singular values then the problem deflates. +* Here the number of equal singular values are found. As each equal +* singular value is found, an elementary reflector is computed to +* rotate the corresponding singular subspace so that the +* corresponding components of Z are zero in this new basis. +* + K = 1 + K2 = N + 1 + DO 80 J = 2, N + IF( ABS( Z( J ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + IDXP( K2 ) = J + COLTYP( J ) = 4 + IF( J.EQ.N ) + $ GO TO 120 + ELSE + JPREV = J + GO TO 90 + END IF + 80 CONTINUE + 90 CONTINUE + J = JPREV + 100 CONTINUE + J = J + 1 + IF( J.GT.N ) + $ GO TO 110 + IF( ABS( Z( J ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + IDXP( K2 ) = J + COLTYP( J ) = 4 + ELSE +* +* Check if singular values are close enough to allow deflation. +* + IF( ABS( D( J )-D( JPREV ) ).LE.TOL ) THEN +* +* Deflation is possible. +* + S = Z( JPREV ) + C = Z( J ) +* +* Find sqrt(a**2+b**2) without overflow or +* destructive underflow. +* + TAU = DLAPY2( C, S ) + C = C / TAU + S = -S / TAU + Z( J ) = TAU + Z( JPREV ) = ZERO +* +* Apply back the Givens rotation to the left and right +* singular vector matrices. +* + IDXJP = IDXQ( IDX( JPREV )+1 ) + IDXJ = IDXQ( IDX( J )+1 ) + IF( IDXJP.LE.NLP1 ) THEN + IDXJP = IDXJP - 1 + END IF + IF( IDXJ.LE.NLP1 ) THEN + IDXJ = IDXJ - 1 + END IF + CALL DROT( N, U( 1, IDXJP ), 1, U( 1, IDXJ ), 1, C, S ) + CALL DROT( M, VT( IDXJP, 1 ), LDVT, VT( IDXJ, 1 ), LDVT, C, + $ S ) + IF( COLTYP( J ).NE.COLTYP( JPREV ) ) THEN + COLTYP( J ) = 3 + END IF + COLTYP( JPREV ) = 4 + K2 = K2 - 1 + IDXP( K2 ) = JPREV + JPREV = J + ELSE + K = K + 1 + U2( K, 1 ) = Z( JPREV ) + DSIGMA( K ) = D( JPREV ) + IDXP( K ) = JPREV + JPREV = J + END IF + END IF + GO TO 100 + 110 CONTINUE +* +* Record the last singular value. +* + K = K + 1 + U2( K, 1 ) = Z( JPREV ) + DSIGMA( K ) = D( JPREV ) + IDXP( K ) = JPREV +* + 120 CONTINUE +* +* Count up the total number of the various types of columns, then +* form a permutation which positions the four column types into +* four groups of uniform structure (although one or more of these +* groups may be empty). +* + DO 130 J = 1, 4 + CTOT( J ) = 0 + 130 CONTINUE + DO 140 J = 2, N + CT = COLTYP( J ) + CTOT( CT ) = CTOT( CT ) + 1 + 140 CONTINUE +* +* PSM(*) = Position in SubMatrix (of types 1 through 4) +* + PSM( 1 ) = 2 + PSM( 2 ) = 2 + CTOT( 1 ) + PSM( 3 ) = PSM( 2 ) + CTOT( 2 ) + PSM( 4 ) = PSM( 3 ) + CTOT( 3 ) +* +* Fill out the IDXC array so that the permutation which it induces +* will place all type-1 columns first, all type-2 columns next, +* then all type-3's, and finally all type-4's, starting from the +* second column. This applies similarly to the rows of VT. +* + DO 150 J = 2, N + JP = IDXP( J ) + CT = COLTYP( JP ) + IDXC( PSM( CT ) ) = J + PSM( CT ) = PSM( CT ) + 1 + 150 CONTINUE +* +* Sort the singular values and corresponding singular vectors into +* DSIGMA, U2, and VT2 respectively. The singular values/vectors +* which were not deflated go into the first K slots of DSIGMA, U2, +* and VT2 respectively, while those which were deflated go into the +* last N - K slots, except that the first column/row will be treated +* separately. +* + DO 160 J = 2, N + JP = IDXP( J ) + DSIGMA( J ) = D( JP ) + IDXJ = IDXQ( IDX( IDXP( IDXC( J ) ) )+1 ) + IF( IDXJ.LE.NLP1 ) THEN + IDXJ = IDXJ - 1 + END IF + CALL DCOPY( N, U( 1, IDXJ ), 1, U2( 1, J ), 1 ) + CALL DCOPY( M, VT( IDXJ, 1 ), LDVT, VT2( J, 1 ), LDVT2 ) + 160 CONTINUE +* +* Determine DSIGMA(1), DSIGMA(2) and Z(1) +* + DSIGMA( 1 ) = ZERO + HLFTOL = TOL / TWO + IF( ABS( DSIGMA( 2 ) ).LE.HLFTOL ) + $ DSIGMA( 2 ) = HLFTOL + IF( M.GT.N ) THEN + Z( 1 ) = DLAPY2( Z1, Z( M ) ) + IF( Z( 1 ).LE.TOL ) THEN + C = ONE + S = ZERO + Z( 1 ) = TOL + ELSE + C = Z1 / Z( 1 ) + S = Z( M ) / Z( 1 ) + END IF + ELSE + IF( ABS( Z1 ).LE.TOL ) THEN + Z( 1 ) = TOL + ELSE + Z( 1 ) = Z1 + END IF + END IF +* +* Move the rest of the updating row to Z. +* + CALL DCOPY( K-1, U2( 2, 1 ), 1, Z( 2 ), 1 ) +* +* Determine the first column of U2, the first row of VT2 and the +* last row of VT. +* + CALL DLASET( 'A', N, 1, ZERO, ZERO, U2, LDU2 ) + U2( NLP1, 1 ) = ONE + IF( M.GT.N ) THEN + DO 170 I = 1, NLP1 + VT( M, I ) = -S*VT( NLP1, I ) + VT2( 1, I ) = C*VT( NLP1, I ) + 170 CONTINUE + DO 180 I = NLP2, M + VT2( 1, I ) = S*VT( M, I ) + VT( M, I ) = C*VT( M, I ) + 180 CONTINUE + ELSE + CALL DCOPY( M, VT( NLP1, 1 ), LDVT, VT2( 1, 1 ), LDVT2 ) + END IF + IF( M.GT.N ) THEN + CALL DCOPY( M, VT( M, 1 ), LDVT, VT2( M, 1 ), LDVT2 ) + END IF +* +* The deflated singular values and their corresponding vectors go +* into the back of D, U, and V respectively. +* + IF( N.GT.K ) THEN + CALL DCOPY( N-K, DSIGMA( K+1 ), 1, D( K+1 ), 1 ) + CALL DLACPY( 'A', N, N-K, U2( 1, K+1 ), LDU2, U( 1, K+1 ), + $ LDU ) + CALL DLACPY( 'A', N-K, M, VT2( K+1, 1 ), LDVT2, VT( K+1, 1 ), + $ LDVT ) + END IF +* +* Copy CTOT into COLTYP for referencing in DLASD3. +* + DO 190 J = 1, 4 + COLTYP( J ) = CTOT( J ) + 190 CONTINUE +* + RETURN +* +* End of DLASD2 +* + END diff --git a/costa/native/external/lapack/dlasd3.f b/costa/native/external/lapack/dlasd3.f new file mode 100644 index 000000000..e92b076ab --- /dev/null +++ b/costa/native/external/lapack/dlasd3.f @@ -0,0 +1,359 @@ + SUBROUTINE DLASD3( NL, NR, SQRE, K, D, Q, LDQ, DSIGMA, U, LDU, U2, + $ LDU2, VT, LDVT, VT2, LDVT2, IDXC, CTOT, Z, + $ INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, +* Courant Institute, NAG Ltd., and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDQ, LDU, LDU2, LDVT, LDVT2, NL, NR, + $ SQRE +* .. +* .. Array Arguments .. + INTEGER CTOT( * ), IDXC( * ) + DOUBLE PRECISION D( * ), DSIGMA( * ), Q( LDQ, * ), U( LDU, * ), + $ U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ), + $ Z( * ) +* .. +* +* Purpose +* ======= +* +* DLASD3 finds all the square roots of the roots of the secular +* equation, as defined by the values in D and Z. It makes the +* appropriate calls to DLASD4 and then updates the singular +* vectors by matrix multiplication. +* +* This code makes very mild assumptions about floating point +* arithmetic. It will work on machines with a guard digit in +* add/subtract, or on those binary machines without guard digits +* which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. +* It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* DLASD3 is called from DLASD1. +* +* Arguments +* ========= +* +* NL (input) INTEGER +* The row dimension of the upper block. NL >= 1. +* +* NR (input) INTEGER +* The row dimension of the lower block. NR >= 1. +* +* SQRE (input) INTEGER +* = 0: the lower block is an NR-by-NR square matrix. +* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. +* +* The bidiagonal matrix has N = NL + NR + 1 rows and +* M = N + SQRE >= N columns. +* +* K (input) INTEGER +* The size of the secular equation, 1 =< K = < N. +* +* D (output) DOUBLE PRECISION array, dimension(K) +* On exit the square roots of the roots of the secular equation, +* in ascending order. +* +* Q (workspace) DOUBLE PRECISION array, +* dimension at least (LDQ,K). +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= K. +* +* DSIGMA (input) DOUBLE PRECISION array, dimension(K) +* The first K elements of this array contain the old roots +* of the deflated updating problem. These are the poles +* of the secular equation. +* +* U (input) DOUBLE PRECISION array, dimension (LDU, N) +* The last N - K columns of this matrix contain the deflated +* left singular vectors. +* +* LDU (input) INTEGER +* The leading dimension of the array U. LDU >= N. +* +* U2 (input) DOUBLE PRECISION array, dimension (LDU2, N) +* The first K columns of this matrix contain the non-deflated +* left singular vectors for the split problem. +* +* LDU2 (input) INTEGER +* The leading dimension of the array U2. LDU2 >= N. +* +* VT (input) DOUBLE PRECISION array, dimension (LDVT, M) +* The last M - K columns of VT' contain the deflated +* right singular vectors. +* +* LDVT (input) INTEGER +* The leading dimension of the array VT. LDVT >= N. +* +* VT2 (input) DOUBLE PRECISION array, dimension (LDVT2, N) +* The first K columns of VT2' contain the non-deflated +* right singular vectors for the split problem. +* +* LDVT2 (input) INTEGER +* The leading dimension of the array VT2. LDVT2 >= N. +* +* IDXC (input) INTEGER array, dimension ( N ) +* The permutation used to arrange the columns of U (and rows of +* VT) into three groups: the first group contains non-zero +* entries only at and above (or before) NL +1; the second +* contains non-zero entries only at and below (or after) NL+2; +* and the third is dense. The first column of U and the row of +* VT are treated separately, however. +* +* The rows of the singular vectors found by DLASD4 +* must be likewise permuted before the matrix multiplies can +* take place. +* +* CTOT (input) INTEGER array, dimension ( 4 ) +* A count of the total number of the various types of columns +* in U (or rows in VT), as described in IDXC. The fourth column +* type is any column which has been deflated. +* +* Z (input) DOUBLE PRECISION array, dimension (K) +* The first K elements of this array contain the components +* of the deflation-adjusted updating row vector. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = 1, an singular value did not converge +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO, NEGONE + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0, + $ NEGONE = -1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER CTEMP, I, J, JC, KTEMP, M, N, NLP1, NLP2, NRP1 + DOUBLE PRECISION RHO, TEMP +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMC3, DNRM2 + EXTERNAL DLAMC3, DNRM2 +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEMM, DLACPY, DLASCL, DLASD4, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( NL.LT.1 ) THEN + INFO = -1 + ELSE IF( NR.LT.1 ) THEN + INFO = -2 + ELSE IF( ( SQRE.NE.1 ) .AND. ( SQRE.NE.0 ) ) THEN + INFO = -3 + END IF +* + N = NL + NR + 1 + M = N + SQRE + NLP1 = NL + 1 + NLP2 = NL + 2 +* + IF( ( K.LT.1 ) .OR. ( K.GT.N ) ) THEN + INFO = -4 + ELSE IF( LDQ.LT.K ) THEN + INFO = -7 + ELSE IF( LDU.LT.N ) THEN + INFO = -10 + ELSE IF( LDU2.LT.N ) THEN + INFO = -12 + ELSE IF( LDVT.LT.M ) THEN + INFO = -14 + ELSE IF( LDVT2.LT.M ) THEN + INFO = -16 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLASD3', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( K.EQ.1 ) THEN + D( 1 ) = ABS( Z( 1 ) ) + CALL DCOPY( M, VT2( 1, 1 ), LDVT2, VT( 1, 1 ), LDVT ) + IF( Z( 1 ).GT.ZERO ) THEN + CALL DCOPY( N, U2( 1, 1 ), 1, U( 1, 1 ), 1 ) + ELSE + DO 10 I = 1, N + U( I, 1 ) = -U2( I, 1 ) + 10 CONTINUE + END IF + RETURN + END IF +* +* Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can +* be computed with high relative accuracy (barring over/underflow). +* This is a problem on machines without a guard digit in +* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). +* The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), +* which on any of these machines zeros out the bottommost +* bit of DSIGMA(I) if it is 1; this makes the subsequent +* subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation +* occurs. On binary machines with a guard digit (almost all +* machines) it does not change DSIGMA(I) at all. On hexadecimal +* and decimal machines with a guard digit, it slightly +* changes the bottommost bits of DSIGMA(I). It does not account +* for hexadecimal or decimal machines without guard digits +* (we know of none). We use a subroutine call to compute +* 2*DLAMBDA(I) to prevent optimizing compilers from eliminating +* this code. +* + DO 20 I = 1, K + DSIGMA( I ) = DLAMC3( DSIGMA( I ), DSIGMA( I ) ) - DSIGMA( I ) + 20 CONTINUE +* +* Keep a copy of Z. +* + CALL DCOPY( K, Z, 1, Q, 1 ) +* +* Normalize Z. +* + RHO = DNRM2( K, Z, 1 ) + CALL DLASCL( 'G', 0, 0, RHO, ONE, K, 1, Z, K, INFO ) + RHO = RHO*RHO +* +* Find the new singular values. +* + DO 30 J = 1, K + CALL DLASD4( K, J, DSIGMA, Z, U( 1, J ), RHO, D( J ), + $ VT( 1, J ), INFO ) +* +* If the zero finder fails, the computation is terminated. +* + IF( INFO.NE.0 ) THEN + RETURN + END IF + 30 CONTINUE +* +* Compute updated Z. +* + DO 60 I = 1, K + Z( I ) = U( I, K )*VT( I, K ) + DO 40 J = 1, I - 1 + Z( I ) = Z( I )*( U( I, J )*VT( I, J ) / + $ ( DSIGMA( I )-DSIGMA( J ) ) / + $ ( DSIGMA( I )+DSIGMA( J ) ) ) + 40 CONTINUE + DO 50 J = I, K - 1 + Z( I ) = Z( I )*( U( I, J )*VT( I, J ) / + $ ( DSIGMA( I )-DSIGMA( J+1 ) ) / + $ ( DSIGMA( I )+DSIGMA( J+1 ) ) ) + 50 CONTINUE + Z( I ) = SIGN( SQRT( ABS( Z( I ) ) ), Q( I, 1 ) ) + 60 CONTINUE +* +* Compute left singular vectors of the modified diagonal matrix, +* and store related information for the right singular vectors. +* + DO 90 I = 1, K + VT( 1, I ) = Z( 1 ) / U( 1, I ) / VT( 1, I ) + U( 1, I ) = NEGONE + DO 70 J = 2, K + VT( J, I ) = Z( J ) / U( J, I ) / VT( J, I ) + U( J, I ) = DSIGMA( J )*VT( J, I ) + 70 CONTINUE + TEMP = DNRM2( K, U( 1, I ), 1 ) + Q( 1, I ) = U( 1, I ) / TEMP + DO 80 J = 2, K + JC = IDXC( J ) + Q( J, I ) = U( JC, I ) / TEMP + 80 CONTINUE + 90 CONTINUE +* +* Update the left singular vector matrix. +* + IF( K.EQ.2 ) THEN + CALL DGEMM( 'N', 'N', N, K, K, ONE, U2, LDU2, Q, LDQ, ZERO, U, + $ LDU ) + GO TO 100 + END IF + IF( CTOT( 1 ).GT.0 ) THEN + CALL DGEMM( 'N', 'N', NL, K, CTOT( 1 ), ONE, U2( 1, 2 ), LDU2, + $ Q( 2, 1 ), LDQ, ZERO, U( 1, 1 ), LDU ) + IF( CTOT( 3 ).GT.0 ) THEN + KTEMP = 2 + CTOT( 1 ) + CTOT( 2 ) + CALL DGEMM( 'N', 'N', NL, K, CTOT( 3 ), ONE, U2( 1, KTEMP ), + $ LDU2, Q( KTEMP, 1 ), LDQ, ONE, U( 1, 1 ), LDU ) + END IF + ELSE IF( CTOT( 3 ).GT.0 ) THEN + KTEMP = 2 + CTOT( 1 ) + CTOT( 2 ) + CALL DGEMM( 'N', 'N', NL, K, CTOT( 3 ), ONE, U2( 1, KTEMP ), + $ LDU2, Q( KTEMP, 1 ), LDQ, ZERO, U( 1, 1 ), LDU ) + ELSE + CALL DLACPY( 'F', NL, K, U2, LDU2, U, LDU ) + END IF + CALL DCOPY( K, Q( 1, 1 ), LDQ, U( NLP1, 1 ), LDU ) + KTEMP = 2 + CTOT( 1 ) + CTEMP = CTOT( 2 ) + CTOT( 3 ) + CALL DGEMM( 'N', 'N', NR, K, CTEMP, ONE, U2( NLP2, KTEMP ), LDU2, + $ Q( KTEMP, 1 ), LDQ, ZERO, U( NLP2, 1 ), LDU ) +* +* Generate the right singular vectors. +* + 100 CONTINUE + DO 120 I = 1, K + TEMP = DNRM2( K, VT( 1, I ), 1 ) + Q( I, 1 ) = VT( 1, I ) / TEMP + DO 110 J = 2, K + JC = IDXC( J ) + Q( I, J ) = VT( JC, I ) / TEMP + 110 CONTINUE + 120 CONTINUE +* +* Update the right singular vector matrix. +* + IF( K.EQ.2 ) THEN + CALL DGEMM( 'N', 'N', K, M, K, ONE, Q, LDQ, VT2, LDVT2, ZERO, + $ VT, LDVT ) + RETURN + END IF + KTEMP = 1 + CTOT( 1 ) + CALL DGEMM( 'N', 'N', K, NLP1, KTEMP, ONE, Q( 1, 1 ), LDQ, + $ VT2( 1, 1 ), LDVT2, ZERO, VT( 1, 1 ), LDVT ) + KTEMP = 2 + CTOT( 1 ) + CTOT( 2 ) + IF( KTEMP.LE.LDVT2 ) + $ CALL DGEMM( 'N', 'N', K, NLP1, CTOT( 3 ), ONE, Q( 1, KTEMP ), + $ LDQ, VT2( KTEMP, 1 ), LDVT2, ONE, VT( 1, 1 ), + $ LDVT ) +* + KTEMP = CTOT( 1 ) + 1 + NRP1 = NR + SQRE + IF( KTEMP.GT.1 ) THEN + DO 130 I = 1, K + Q( I, KTEMP ) = Q( I, 1 ) + 130 CONTINUE + DO 140 I = NLP2, M + VT2( KTEMP, I ) = VT2( 1, I ) + 140 CONTINUE + END IF + CTEMP = 1 + CTOT( 2 ) + CTOT( 3 ) + CALL DGEMM( 'N', 'N', K, NRP1, CTEMP, ONE, Q( 1, KTEMP ), LDQ, + $ VT2( KTEMP, NLP2 ), LDVT2, ZERO, VT( 1, NLP2 ), LDVT ) +* + RETURN +* +* End of DLASD3 +* + END diff --git a/costa/native/external/lapack/dlasd4.f b/costa/native/external/lapack/dlasd4.f new file mode 100644 index 000000000..fb10ac7ba --- /dev/null +++ b/costa/native/external/lapack/dlasd4.f @@ -0,0 +1,891 @@ + SUBROUTINE DLASD4( N, I, D, Z, DELTA, RHO, SIGMA, WORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, +* Courant Institute, NAG Ltd., and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + INTEGER I, INFO, N + DOUBLE PRECISION RHO, SIGMA +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), DELTA( * ), WORK( * ), Z( * ) +* .. +* +* Purpose +* ======= +* +* This subroutine computes the square root of the I-th updated +* eigenvalue of a positive symmetric rank-one modification to +* a positive diagonal matrix whose entries are given as the squares +* of the corresponding entries in the array d, and that +* +* 0 <= D(i) < D(j) for i < j +* +* and that RHO > 0. This is arranged by the calling routine, and is +* no loss in generality. The rank-one modified system is thus +* +* diag( D ) * diag( D ) + RHO * Z * Z_transpose. +* +* where we assume the Euclidean norm of Z is 1. +* +* The method consists of approximating the rational functions in the +* secular equation by simpler interpolating rational functions. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The length of all arrays. +* +* I (input) INTEGER +* The index of the eigenvalue to be computed. 1 <= I <= N. +* +* D (input) DOUBLE PRECISION array, dimension ( N ) +* The original eigenvalues. It is assumed that they are in +* order, 0 <= D(I) < D(J) for I < J. +* +* Z (input) DOUBLE PRECISION array, dimension ( N ) +* The components of the updating vector. +* +* DELTA (output) DOUBLE PRECISION array, dimension ( N ) +* If N .ne. 1, DELTA contains (D(j) - sigma_I) in its j-th +* component. If N = 1, then DELTA(1) = 1. The vector DELTA +* contains the information necessary to construct the +* (singular) eigenvectors. +* +* RHO (input) DOUBLE PRECISION +* The scalar in the symmetric updating formula. +* +* SIGMA (output) DOUBLE PRECISION +* The computed lambda_I, the I-th updated eigenvalue. +* +* WORK (workspace) DOUBLE PRECISION array, dimension ( N ) +* If N .ne. 1, WORK contains (D(j) + sigma_I) in its j-th +* component. If N = 1, then WORK( 1 ) = 1. +* +* INFO (output) INTEGER +* = 0: successful exit +* > 0: if INFO = 1, the updating process failed. +* +* Internal Parameters +* =================== +* +* Logical variable ORGATI (origin-at-i?) is used for distinguishing +* whether D(i) or D(i+1) is treated as the origin. +* +* ORGATI = .true. origin at i +* ORGATI = .false. origin at i+1 +* +* Logical variable SWTCH3 (switch-for-3-poles?) is for noting +* if we are working with THREE poles! +* +* MAXIT is the maximum number of iterations allowed for each +* eigenvalue. +* +* Further Details +* =============== +* +* Based on contributions by +* Ren-Cang Li, Computer Science Division, University of California +* at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + INTEGER MAXIT + PARAMETER ( MAXIT = 20 ) + DOUBLE PRECISION ZERO, ONE, TWO, THREE, FOUR, EIGHT, TEN + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0, + $ THREE = 3.0D+0, FOUR = 4.0D+0, EIGHT = 8.0D+0, + $ TEN = 10.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ORGATI, SWTCH, SWTCH3 + INTEGER II, IIM1, IIP1, IP1, ITER, J, NITER + DOUBLE PRECISION A, B, C, DELSQ, DELSQ2, DPHI, DPSI, DTIIM, + $ DTIIP, DTIPSQ, DTISQ, DTNSQ, DTNSQ1, DW, EPS, + $ ERRETM, ETA, PHI, PREW, PSI, RHOINV, SG2LB, + $ SG2UB, TAU, TEMP, TEMP1, TEMP2, W +* .. +* .. Local Arrays .. + DOUBLE PRECISION DD( 3 ), ZZ( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL DLAED6, DLASD5 +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Since this routine is called in an inner loop, we do no argument +* checking. +* +* Quick return for N=1 and 2. +* + INFO = 0 + IF( N.EQ.1 ) THEN +* +* Presumably, I=1 upon entry +* + SIGMA = SQRT( D( 1 )*D( 1 )+RHO*Z( 1 )*Z( 1 ) ) + DELTA( 1 ) = ONE + WORK( 1 ) = ONE + RETURN + END IF + IF( N.EQ.2 ) THEN + CALL DLASD5( I, D, Z, DELTA, RHO, SIGMA, WORK ) + RETURN + END IF +* +* Compute machine epsilon +* + EPS = DLAMCH( 'Epsilon' ) + RHOINV = ONE / RHO +* +* The case I = N +* + IF( I.EQ.N ) THEN +* +* Initialize some basic variables +* + II = N - 1 + NITER = 1 +* +* Calculate initial guess +* + TEMP = RHO / TWO +* +* If ||Z||_2 is not one, then TEMP should be set to +* RHO * ||Z||_2^2 / TWO +* + TEMP1 = TEMP / ( D( N )+SQRT( D( N )*D( N )+TEMP ) ) + DO 10 J = 1, N + WORK( J ) = D( J ) + D( N ) + TEMP1 + DELTA( J ) = ( D( J )-D( N ) ) - TEMP1 + 10 CONTINUE +* + PSI = ZERO + DO 20 J = 1, N - 2 + PSI = PSI + Z( J )*Z( J ) / ( DELTA( J )*WORK( J ) ) + 20 CONTINUE +* + C = RHOINV + PSI + W = C + Z( II )*Z( II ) / ( DELTA( II )*WORK( II ) ) + + $ Z( N )*Z( N ) / ( DELTA( N )*WORK( N ) ) +* + IF( W.LE.ZERO ) THEN + TEMP1 = SQRT( D( N )*D( N )+RHO ) + TEMP = Z( N-1 )*Z( N-1 ) / ( ( D( N-1 )+TEMP1 )* + $ ( D( N )-D( N-1 )+RHO / ( D( N )+TEMP1 ) ) ) + + $ Z( N )*Z( N ) / RHO +* +* The following TAU is to approximate +* SIGMA_n^2 - D( N )*D( N ) +* + IF( C.LE.TEMP ) THEN + TAU = RHO + ELSE + DELSQ = ( D( N )-D( N-1 ) )*( D( N )+D( N-1 ) ) + A = -C*DELSQ + Z( N-1 )*Z( N-1 ) + Z( N )*Z( N ) + B = Z( N )*Z( N )*DELSQ + IF( A.LT.ZERO ) THEN + TAU = TWO*B / ( SQRT( A*A+FOUR*B*C )-A ) + ELSE + TAU = ( A+SQRT( A*A+FOUR*B*C ) ) / ( TWO*C ) + END IF + END IF +* +* It can be proved that +* D(N)^2+RHO/2 <= SIGMA_n^2 < D(N)^2+TAU <= D(N)^2+RHO +* + ELSE + DELSQ = ( D( N )-D( N-1 ) )*( D( N )+D( N-1 ) ) + A = -C*DELSQ + Z( N-1 )*Z( N-1 ) + Z( N )*Z( N ) + B = Z( N )*Z( N )*DELSQ +* +* The following TAU is to approximate +* SIGMA_n^2 - D( N )*D( N ) +* + IF( A.LT.ZERO ) THEN + TAU = TWO*B / ( SQRT( A*A+FOUR*B*C )-A ) + ELSE + TAU = ( A+SQRT( A*A+FOUR*B*C ) ) / ( TWO*C ) + END IF +* +* It can be proved that +* D(N)^2 < D(N)^2+TAU < SIGMA(N)^2 < D(N)^2+RHO/2 +* + END IF +* +* The following ETA is to approximate SIGMA_n - D( N ) +* + ETA = TAU / ( D( N )+SQRT( D( N )*D( N )+TAU ) ) +* + SIGMA = D( N ) + ETA + DO 30 J = 1, N + DELTA( J ) = ( D( J )-D( I ) ) - ETA + WORK( J ) = D( J ) + D( I ) + ETA + 30 CONTINUE +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 40 J = 1, II + TEMP = Z( J ) / ( DELTA( J )*WORK( J ) ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 40 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + TEMP = Z( N ) / ( DELTA( N )*WORK( N ) ) + PHI = Z( N )*TEMP + DPHI = TEMP*TEMP + ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV + + $ ABS( TAU )*( DPSI+DPHI ) +* + W = RHOINV + PHI + PSI +* +* Test for convergence +* + IF( ABS( W ).LE.EPS*ERRETM ) THEN + GO TO 240 + END IF +* +* Calculate the new step +* + NITER = NITER + 1 + DTNSQ1 = WORK( N-1 )*DELTA( N-1 ) + DTNSQ = WORK( N )*DELTA( N ) + C = W - DTNSQ1*DPSI - DTNSQ*DPHI + A = ( DTNSQ+DTNSQ1 )*W - DTNSQ*DTNSQ1*( DPSI+DPHI ) + B = DTNSQ*DTNSQ1*W + IF( C.LT.ZERO ) + $ C = ABS( C ) + IF( C.EQ.ZERO ) THEN + ETA = RHO - SIGMA*SIGMA + ELSE IF( A.GE.ZERO ) THEN + ETA = ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF +* +* Note, eta should be positive if w is negative, and +* eta should be negative otherwise. However, +* if for some reason caused by roundoff, eta*w > 0, +* we simply use one Newton step instead. This way +* will guarantee eta*w < 0. +* + IF( W*ETA.GT.ZERO ) + $ ETA = -W / ( DPSI+DPHI ) + TEMP = ETA - DTNSQ + IF( TEMP.GT.RHO ) + $ ETA = RHO + DTNSQ +* + TAU = TAU + ETA + ETA = ETA / ( SIGMA+SQRT( ETA+SIGMA*SIGMA ) ) + DO 50 J = 1, N + DELTA( J ) = DELTA( J ) - ETA + WORK( J ) = WORK( J ) + ETA + 50 CONTINUE +* + SIGMA = SIGMA + ETA +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 60 J = 1, II + TEMP = Z( J ) / ( WORK( J )*DELTA( J ) ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 60 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + TEMP = Z( N ) / ( WORK( N )*DELTA( N ) ) + PHI = Z( N )*TEMP + DPHI = TEMP*TEMP + ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV + + $ ABS( TAU )*( DPSI+DPHI ) +* + W = RHOINV + PHI + PSI +* +* Main loop to update the values of the array DELTA +* + ITER = NITER + 1 +* + DO 90 NITER = ITER, MAXIT +* +* Test for convergence +* + IF( ABS( W ).LE.EPS*ERRETM ) THEN + GO TO 240 + END IF +* +* Calculate the new step +* + DTNSQ1 = WORK( N-1 )*DELTA( N-1 ) + DTNSQ = WORK( N )*DELTA( N ) + C = W - DTNSQ1*DPSI - DTNSQ*DPHI + A = ( DTNSQ+DTNSQ1 )*W - DTNSQ1*DTNSQ*( DPSI+DPHI ) + B = DTNSQ1*DTNSQ*W + IF( A.GE.ZERO ) THEN + ETA = ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF +* +* Note, eta should be positive if w is negative, and +* eta should be negative otherwise. However, +* if for some reason caused by roundoff, eta*w > 0, +* we simply use one Newton step instead. This way +* will guarantee eta*w < 0. +* + IF( W*ETA.GT.ZERO ) + $ ETA = -W / ( DPSI+DPHI ) + TEMP = ETA - DTNSQ + IF( TEMP.LE.ZERO ) + $ ETA = ETA / TWO +* + TAU = TAU + ETA + ETA = ETA / ( SIGMA+SQRT( ETA+SIGMA*SIGMA ) ) + DO 70 J = 1, N + DELTA( J ) = DELTA( J ) - ETA + WORK( J ) = WORK( J ) + ETA + 70 CONTINUE +* + SIGMA = SIGMA + ETA +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 80 J = 1, II + TEMP = Z( J ) / ( WORK( J )*DELTA( J ) ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 80 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + TEMP = Z( N ) / ( WORK( N )*DELTA( N ) ) + PHI = Z( N )*TEMP + DPHI = TEMP*TEMP + ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV + + $ ABS( TAU )*( DPSI+DPHI ) +* + W = RHOINV + PHI + PSI + 90 CONTINUE +* +* Return with INFO = 1, NITER = MAXIT and not converged +* + INFO = 1 + GO TO 240 +* +* End for the case I = N +* + ELSE +* +* The case for I < N +* + NITER = 1 + IP1 = I + 1 +* +* Calculate initial guess +* + DELSQ = ( D( IP1 )-D( I ) )*( D( IP1 )+D( I ) ) + DELSQ2 = DELSQ / TWO + TEMP = DELSQ2 / ( D( I )+SQRT( D( I )*D( I )+DELSQ2 ) ) + DO 100 J = 1, N + WORK( J ) = D( J ) + D( I ) + TEMP + DELTA( J ) = ( D( J )-D( I ) ) - TEMP + 100 CONTINUE +* + PSI = ZERO + DO 110 J = 1, I - 1 + PSI = PSI + Z( J )*Z( J ) / ( WORK( J )*DELTA( J ) ) + 110 CONTINUE +* + PHI = ZERO + DO 120 J = N, I + 2, -1 + PHI = PHI + Z( J )*Z( J ) / ( WORK( J )*DELTA( J ) ) + 120 CONTINUE + C = RHOINV + PSI + PHI + W = C + Z( I )*Z( I ) / ( WORK( I )*DELTA( I ) ) + + $ Z( IP1 )*Z( IP1 ) / ( WORK( IP1 )*DELTA( IP1 ) ) +* + IF( W.GT.ZERO ) THEN +* +* d(i)^2 < the ith sigma^2 < (d(i)^2+d(i+1)^2)/2 +* +* We choose d(i) as origin. +* + ORGATI = .TRUE. + SG2LB = ZERO + SG2UB = DELSQ2 + A = C*DELSQ + Z( I )*Z( I ) + Z( IP1 )*Z( IP1 ) + B = Z( I )*Z( I )*DELSQ + IF( A.GT.ZERO ) THEN + TAU = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + ELSE + TAU = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + END IF +* +* TAU now is an estimation of SIGMA^2 - D( I )^2. The +* following, however, is the corresponding estimation of +* SIGMA - D( I ). +* + ETA = TAU / ( D( I )+SQRT( D( I )*D( I )+TAU ) ) + ELSE +* +* (d(i)^2+d(i+1)^2)/2 <= the ith sigma^2 < d(i+1)^2/2 +* +* We choose d(i+1) as origin. +* + ORGATI = .FALSE. + SG2LB = -DELSQ2 + SG2UB = ZERO + A = C*DELSQ - Z( I )*Z( I ) - Z( IP1 )*Z( IP1 ) + B = Z( IP1 )*Z( IP1 )*DELSQ + IF( A.LT.ZERO ) THEN + TAU = TWO*B / ( A-SQRT( ABS( A*A+FOUR*B*C ) ) ) + ELSE + TAU = -( A+SQRT( ABS( A*A+FOUR*B*C ) ) ) / ( TWO*C ) + END IF +* +* TAU now is an estimation of SIGMA^2 - D( IP1 )^2. The +* following, however, is the corresponding estimation of +* SIGMA - D( IP1 ). +* + ETA = TAU / ( D( IP1 )+SQRT( ABS( D( IP1 )*D( IP1 )+ + $ TAU ) ) ) + END IF +* + IF( ORGATI ) THEN + II = I + SIGMA = D( I ) + ETA + DO 130 J = 1, N + WORK( J ) = D( J ) + D( I ) + ETA + DELTA( J ) = ( D( J )-D( I ) ) - ETA + 130 CONTINUE + ELSE + II = I + 1 + SIGMA = D( IP1 ) + ETA + DO 140 J = 1, N + WORK( J ) = D( J ) + D( IP1 ) + ETA + DELTA( J ) = ( D( J )-D( IP1 ) ) - ETA + 140 CONTINUE + END IF + IIM1 = II - 1 + IIP1 = II + 1 +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 150 J = 1, IIM1 + TEMP = Z( J ) / ( WORK( J )*DELTA( J ) ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 150 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + DPHI = ZERO + PHI = ZERO + DO 160 J = N, IIP1, -1 + TEMP = Z( J ) / ( WORK( J )*DELTA( J ) ) + PHI = PHI + Z( J )*TEMP + DPHI = DPHI + TEMP*TEMP + ERRETM = ERRETM + PHI + 160 CONTINUE +* + W = RHOINV + PHI + PSI +* +* W is the value of the secular function with +* its ii-th element removed. +* + SWTCH3 = .FALSE. + IF( ORGATI ) THEN + IF( W.LT.ZERO ) + $ SWTCH3 = .TRUE. + ELSE + IF( W.GT.ZERO ) + $ SWTCH3 = .TRUE. + END IF + IF( II.EQ.1 .OR. II.EQ.N ) + $ SWTCH3 = .FALSE. +* + TEMP = Z( II ) / ( WORK( II )*DELTA( II ) ) + DW = DPSI + DPHI + TEMP*TEMP + TEMP = Z( II )*TEMP + W = W + TEMP + ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV + + $ THREE*ABS( TEMP ) + ABS( TAU )*DW +* +* Test for convergence +* + IF( ABS( W ).LE.EPS*ERRETM ) THEN + GO TO 240 + END IF +* + IF( W.LE.ZERO ) THEN + SG2LB = MAX( SG2LB, TAU ) + ELSE + SG2UB = MIN( SG2UB, TAU ) + END IF +* +* Calculate the new step +* + NITER = NITER + 1 + IF( .NOT.SWTCH3 ) THEN + DTIPSQ = WORK( IP1 )*DELTA( IP1 ) + DTISQ = WORK( I )*DELTA( I ) + IF( ORGATI ) THEN + C = W - DTIPSQ*DW + DELSQ*( Z( I ) / DTISQ )**2 + ELSE + C = W - DTISQ*DW - DELSQ*( Z( IP1 ) / DTIPSQ )**2 + END IF + A = ( DTIPSQ+DTISQ )*W - DTIPSQ*DTISQ*DW + B = DTIPSQ*DTISQ*W + IF( C.EQ.ZERO ) THEN + IF( A.EQ.ZERO ) THEN + IF( ORGATI ) THEN + A = Z( I )*Z( I ) + DTIPSQ*DTIPSQ*( DPSI+DPHI ) + ELSE + A = Z( IP1 )*Z( IP1 ) + DTISQ*DTISQ*( DPSI+DPHI ) + END IF + END IF + ETA = B / A + ELSE IF( A.LE.ZERO ) THEN + ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF + ELSE +* +* Interpolation using THREE most relevant poles +* + DTIIM = WORK( IIM1 )*DELTA( IIM1 ) + DTIIP = WORK( IIP1 )*DELTA( IIP1 ) + TEMP = RHOINV + PSI + PHI + IF( ORGATI ) THEN + TEMP1 = Z( IIM1 ) / DTIIM + TEMP1 = TEMP1*TEMP1 + C = ( TEMP - DTIIP*( DPSI+DPHI ) ) - + $ ( D( IIM1 )-D( IIP1 ) )*( D( IIM1 )+D( IIP1 ) )*TEMP1 + ZZ( 1 ) = Z( IIM1 )*Z( IIM1 ) + IF( DPSI.LT.TEMP1 ) THEN + ZZ( 3 ) = DTIIP*DTIIP*DPHI + ELSE + ZZ( 3 ) = DTIIP*DTIIP*( ( DPSI-TEMP1 )+DPHI ) + END IF + ELSE + TEMP1 = Z( IIP1 ) / DTIIP + TEMP1 = TEMP1*TEMP1 + C = ( TEMP - DTIIM*( DPSI+DPHI ) ) - + $ ( D( IIP1 )-D( IIM1 ) )*( D( IIM1 )+D( IIP1 ) )*TEMP1 + IF( DPHI.LT.TEMP1 ) THEN + ZZ( 1 ) = DTIIM*DTIIM*DPSI + ELSE + ZZ( 1 ) = DTIIM*DTIIM*( DPSI+( DPHI-TEMP1 ) ) + END IF + ZZ( 3 ) = Z( IIP1 )*Z( IIP1 ) + END IF + ZZ( 2 ) = Z( II )*Z( II ) + DD( 1 ) = DTIIM + DD( 2 ) = DELTA( II )*WORK( II ) + DD( 3 ) = DTIIP + CALL DLAED6( NITER, ORGATI, C, DD, ZZ, W, ETA, INFO ) + IF( INFO.NE.0 ) + $ GO TO 240 + END IF +* +* Note, eta should be positive if w is negative, and +* eta should be negative otherwise. However, +* if for some reason caused by roundoff, eta*w > 0, +* we simply use one Newton step instead. This way +* will guarantee eta*w < 0. +* + IF( W*ETA.GE.ZERO ) + $ ETA = -W / DW + IF( ORGATI ) THEN + TEMP1 = WORK( I )*DELTA( I ) + TEMP = ETA - TEMP1 + ELSE + TEMP1 = WORK( IP1 )*DELTA( IP1 ) + TEMP = ETA - TEMP1 + END IF + IF( TEMP.GT.SG2UB .OR. TEMP.LT.SG2LB ) THEN + IF( W.LT.ZERO ) THEN + ETA = ( SG2UB-TAU ) / TWO + ELSE + ETA = ( SG2LB-TAU ) / TWO + END IF + END IF +* + TAU = TAU + ETA + ETA = ETA / ( SIGMA+SQRT( SIGMA*SIGMA+ETA ) ) +* + PREW = W +* + SIGMA = SIGMA + ETA + DO 170 J = 1, N + WORK( J ) = WORK( J ) + ETA + DELTA( J ) = DELTA( J ) - ETA + 170 CONTINUE +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 180 J = 1, IIM1 + TEMP = Z( J ) / ( WORK( J )*DELTA( J ) ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 180 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + DPHI = ZERO + PHI = ZERO + DO 190 J = N, IIP1, -1 + TEMP = Z( J ) / ( WORK( J )*DELTA( J ) ) + PHI = PHI + Z( J )*TEMP + DPHI = DPHI + TEMP*TEMP + ERRETM = ERRETM + PHI + 190 CONTINUE +* + TEMP = Z( II ) / ( WORK( II )*DELTA( II ) ) + DW = DPSI + DPHI + TEMP*TEMP + TEMP = Z( II )*TEMP + W = RHOINV + PHI + PSI + TEMP + ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV + + $ THREE*ABS( TEMP ) + ABS( TAU )*DW +* + IF( W.LE.ZERO ) THEN + SG2LB = MAX( SG2LB, TAU ) + ELSE + SG2UB = MIN( SG2UB, TAU ) + END IF +* + SWTCH = .FALSE. + IF( ORGATI ) THEN + IF( -W.GT.ABS( PREW ) / TEN ) + $ SWTCH = .TRUE. + ELSE + IF( W.GT.ABS( PREW ) / TEN ) + $ SWTCH = .TRUE. + END IF +* +* Main loop to update the values of the array DELTA and WORK +* + ITER = NITER + 1 +* + DO 230 NITER = ITER, MAXIT +* +* Test for convergence +* + IF( ABS( W ).LE.EPS*ERRETM ) THEN + GO TO 240 + END IF +* +* Calculate the new step +* + IF( .NOT.SWTCH3 ) THEN + DTIPSQ = WORK( IP1 )*DELTA( IP1 ) + DTISQ = WORK( I )*DELTA( I ) + IF( .NOT.SWTCH ) THEN + IF( ORGATI ) THEN + C = W - DTIPSQ*DW + DELSQ*( Z( I ) / DTISQ )**2 + ELSE + C = W - DTISQ*DW - DELSQ*( Z( IP1 ) / DTIPSQ )**2 + END IF + ELSE + TEMP = Z( II ) / ( WORK( II )*DELTA( II ) ) + IF( ORGATI ) THEN + DPSI = DPSI + TEMP*TEMP + ELSE + DPHI = DPHI + TEMP*TEMP + END IF + C = W - DTISQ*DPSI - DTIPSQ*DPHI + END IF + A = ( DTIPSQ+DTISQ )*W - DTIPSQ*DTISQ*DW + B = DTIPSQ*DTISQ*W + IF( C.EQ.ZERO ) THEN + IF( A.EQ.ZERO ) THEN + IF( .NOT.SWTCH ) THEN + IF( ORGATI ) THEN + A = Z( I )*Z( I ) + DTIPSQ*DTIPSQ* + $ ( DPSI+DPHI ) + ELSE + A = Z( IP1 )*Z( IP1 ) + + $ DTISQ*DTISQ*( DPSI+DPHI ) + END IF + ELSE + A = DTISQ*DTISQ*DPSI + DTIPSQ*DTIPSQ*DPHI + END IF + END IF + ETA = B / A + ELSE IF( A.LE.ZERO ) THEN + ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF + ELSE +* +* Interpolation using THREE most relevant poles +* + DTIIM = WORK( IIM1 )*DELTA( IIM1 ) + DTIIP = WORK( IIP1 )*DELTA( IIP1 ) + TEMP = RHOINV + PSI + PHI + IF( SWTCH ) THEN + C = TEMP - DTIIM*DPSI - DTIIP*DPHI + ZZ( 1 ) = DTIIM*DTIIM*DPSI + ZZ( 3 ) = DTIIP*DTIIP*DPHI + ELSE + IF( ORGATI ) THEN + TEMP1 = Z( IIM1 ) / DTIIM + TEMP1 = TEMP1*TEMP1 + TEMP2 = ( D( IIM1 )-D( IIP1 ) )* + $ ( D( IIM1 )+D( IIP1 ) )*TEMP1 + C = TEMP - DTIIP*( DPSI+DPHI ) - TEMP2 + ZZ( 1 ) = Z( IIM1 )*Z( IIM1 ) + IF( DPSI.LT.TEMP1 ) THEN + ZZ( 3 ) = DTIIP*DTIIP*DPHI + ELSE + ZZ( 3 ) = DTIIP*DTIIP*( ( DPSI-TEMP1 )+DPHI ) + END IF + ELSE + TEMP1 = Z( IIP1 ) / DTIIP + TEMP1 = TEMP1*TEMP1 + TEMP2 = ( D( IIP1 )-D( IIM1 ) )* + $ ( D( IIM1 )+D( IIP1 ) )*TEMP1 + C = TEMP - DTIIM*( DPSI+DPHI ) - TEMP2 + IF( DPHI.LT.TEMP1 ) THEN + ZZ( 1 ) = DTIIM*DTIIM*DPSI + ELSE + ZZ( 1 ) = DTIIM*DTIIM*( DPSI+( DPHI-TEMP1 ) ) + END IF + ZZ( 3 ) = Z( IIP1 )*Z( IIP1 ) + END IF + END IF + DD( 1 ) = DTIIM + DD( 2 ) = DELTA( II )*WORK( II ) + DD( 3 ) = DTIIP + CALL DLAED6( NITER, ORGATI, C, DD, ZZ, W, ETA, INFO ) + IF( INFO.NE.0 ) + $ GO TO 240 + END IF +* +* Note, eta should be positive if w is negative, and +* eta should be negative otherwise. However, +* if for some reason caused by roundoff, eta*w > 0, +* we simply use one Newton step instead. This way +* will guarantee eta*w < 0. +* + IF( W*ETA.GE.ZERO ) + $ ETA = -W / DW + IF( ORGATI ) THEN + TEMP1 = WORK( I )*DELTA( I ) + TEMP = ETA - TEMP1 + ELSE + TEMP1 = WORK( IP1 )*DELTA( IP1 ) + TEMP = ETA - TEMP1 + END IF + IF( TEMP.GT.SG2UB .OR. TEMP.LT.SG2LB ) THEN + IF( W.LT.ZERO ) THEN + ETA = ( SG2UB-TAU ) / TWO + ELSE + ETA = ( SG2LB-TAU ) / TWO + END IF + END IF +* + TAU = TAU + ETA + ETA = ETA / ( SIGMA+SQRT( SIGMA*SIGMA+ETA ) ) +* + SIGMA = SIGMA + ETA + DO 200 J = 1, N + WORK( J ) = WORK( J ) + ETA + DELTA( J ) = DELTA( J ) - ETA + 200 CONTINUE +* + PREW = W +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 210 J = 1, IIM1 + TEMP = Z( J ) / ( WORK( J )*DELTA( J ) ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 210 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + DPHI = ZERO + PHI = ZERO + DO 220 J = N, IIP1, -1 + TEMP = Z( J ) / ( WORK( J )*DELTA( J ) ) + PHI = PHI + Z( J )*TEMP + DPHI = DPHI + TEMP*TEMP + ERRETM = ERRETM + PHI + 220 CONTINUE +* + TEMP = Z( II ) / ( WORK( II )*DELTA( II ) ) + DW = DPSI + DPHI + TEMP*TEMP + TEMP = Z( II )*TEMP + W = RHOINV + PHI + PSI + TEMP + ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV + + $ THREE*ABS( TEMP ) + ABS( TAU )*DW + IF( W*PREW.GT.ZERO .AND. ABS( W ).GT.ABS( PREW ) / TEN ) + $ SWTCH = .NOT.SWTCH +* + IF( W.LE.ZERO ) THEN + SG2LB = MAX( SG2LB, TAU ) + ELSE + SG2UB = MIN( SG2UB, TAU ) + END IF +* + 230 CONTINUE +* +* Return with INFO = 1, NITER = MAXIT and not converged +* + INFO = 1 +* + END IF +* + 240 CONTINUE + RETURN +* +* End of DLASD4 +* + END diff --git a/costa/native/external/lapack/dlasd5.f b/costa/native/external/lapack/dlasd5.f new file mode 100644 index 000000000..aa33f8c7b --- /dev/null +++ b/costa/native/external/lapack/dlasd5.f @@ -0,0 +1,164 @@ + SUBROUTINE DLASD5( I, D, Z, DELTA, RHO, DSIGMA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, +* Courant Institute, NAG Ltd., and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER I + DOUBLE PRECISION DSIGMA, RHO +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( 2 ), DELTA( 2 ), WORK( 2 ), Z( 2 ) +* .. +* +* Purpose +* ======= +* +* This subroutine computes the square root of the I-th eigenvalue +* of a positive symmetric rank-one modification of a 2-by-2 diagonal +* matrix +* +* diag( D ) * diag( D ) + RHO * Z * transpose(Z) . +* +* The diagonal entries in the array D are assumed to satisfy +* +* 0 <= D(i) < D(j) for i < j . +* +* We also assume RHO > 0 and that the Euclidean norm of the vector +* Z is one. +* +* Arguments +* ========= +* +* I (input) INTEGER +* The index of the eigenvalue to be computed. I = 1 or I = 2. +* +* D (input) DOUBLE PRECISION array, dimension ( 2 ) +* The original eigenvalues. We assume 0 <= D(1) < D(2). +* +* Z (input) DOUBLE PRECISION array, dimension ( 2 ) +* The components of the updating vector. +* +* DELTA (output) DOUBLE PRECISION array, dimension ( 2 ) +* Contains (D(j) - lambda_I) in its j-th component. +* The vector DELTA contains the information necessary +* to construct the eigenvectors. +* +* RHO (input) DOUBLE PRECISION +* The scalar in the symmetric updating formula. +* +* DSIGMA (output) DOUBLE PRECISION +* The computed lambda_I, the I-th updated eigenvalue. +* +* WORK (workspace) DOUBLE PRECISION array, dimension ( 2 ) +* WORK contains (D(j) + sigma_I) in its j-th component. +* +* Further Details +* =============== +* +* Based on contributions by +* Ren-Cang Li, Computer Science Division, University of California +* at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO, THREE, FOUR + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0, + $ THREE = 3.0D+0, FOUR = 4.0D+0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION B, C, DEL, DELSQ, TAU, W +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* + DEL = D( 2 ) - D( 1 ) + DELSQ = DEL*( D( 2 )+D( 1 ) ) + IF( I.EQ.1 ) THEN + W = ONE + FOUR*RHO*( Z( 2 )*Z( 2 ) / ( D( 1 )+THREE*D( 2 ) )- + $ Z( 1 )*Z( 1 ) / ( THREE*D( 1 )+D( 2 ) ) ) / DEL + IF( W.GT.ZERO ) THEN + B = DELSQ + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) ) + C = RHO*Z( 1 )*Z( 1 )*DELSQ +* +* B > ZERO, always +* +* The following TAU is DSIGMA * DSIGMA - D( 1 ) * D( 1 ) +* + TAU = TWO*C / ( B+SQRT( ABS( B*B-FOUR*C ) ) ) +* +* The following TAU is DSIGMA - D( 1 ) +* + TAU = TAU / ( D( 1 )+SQRT( D( 1 )*D( 1 )+TAU ) ) + DSIGMA = D( 1 ) + TAU + DELTA( 1 ) = -TAU + DELTA( 2 ) = DEL - TAU + WORK( 1 ) = TWO*D( 1 ) + TAU + WORK( 2 ) = ( D( 1 )+TAU ) + D( 2 ) +* DELTA( 1 ) = -Z( 1 ) / TAU +* DELTA( 2 ) = Z( 2 ) / ( DEL-TAU ) + ELSE + B = -DELSQ + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) ) + C = RHO*Z( 2 )*Z( 2 )*DELSQ +* +* The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) +* + IF( B.GT.ZERO ) THEN + TAU = -TWO*C / ( B+SQRT( B*B+FOUR*C ) ) + ELSE + TAU = ( B-SQRT( B*B+FOUR*C ) ) / TWO + END IF +* +* The following TAU is DSIGMA - D( 2 ) +* + TAU = TAU / ( D( 2 )+SQRT( ABS( D( 2 )*D( 2 )+TAU ) ) ) + DSIGMA = D( 2 ) + TAU + DELTA( 1 ) = -( DEL+TAU ) + DELTA( 2 ) = -TAU + WORK( 1 ) = D( 1 ) + TAU + D( 2 ) + WORK( 2 ) = TWO*D( 2 ) + TAU +* DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU ) +* DELTA( 2 ) = -Z( 2 ) / TAU + END IF +* TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) ) +* DELTA( 1 ) = DELTA( 1 ) / TEMP +* DELTA( 2 ) = DELTA( 2 ) / TEMP + ELSE +* +* Now I=2 +* + B = -DELSQ + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) ) + C = RHO*Z( 2 )*Z( 2 )*DELSQ +* +* The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) +* + IF( B.GT.ZERO ) THEN + TAU = ( B+SQRT( B*B+FOUR*C ) ) / TWO + ELSE + TAU = TWO*C / ( -B+SQRT( B*B+FOUR*C ) ) + END IF +* +* The following TAU is DSIGMA - D( 2 ) +* + TAU = TAU / ( D( 2 )+SQRT( D( 2 )*D( 2 )+TAU ) ) + DSIGMA = D( 2 ) + TAU + DELTA( 1 ) = -( DEL+TAU ) + DELTA( 2 ) = -TAU + WORK( 1 ) = D( 1 ) + TAU + D( 2 ) + WORK( 2 ) = TWO*D( 2 ) + TAU +* DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU ) +* DELTA( 2 ) = -Z( 2 ) / TAU +* TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) ) +* DELTA( 1 ) = DELTA( 1 ) / TEMP +* DELTA( 2 ) = DELTA( 2 ) / TEMP + END IF + RETURN +* +* End of DLASD5 +* + END diff --git a/costa/native/external/lapack/dlasd6.f b/costa/native/external/lapack/dlasd6.f new file mode 100644 index 000000000..2001ae036 --- /dev/null +++ b/costa/native/external/lapack/dlasd6.f @@ -0,0 +1,306 @@ + SUBROUTINE DLASD6( ICOMPQ, NL, NR, SQRE, D, VF, VL, ALPHA, BETA, + $ IDXQ, PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, + $ LDGNUM, POLES, DIFL, DIFR, Z, K, C, S, WORK, + $ IWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER GIVPTR, ICOMPQ, INFO, K, LDGCOL, LDGNUM, NL, + $ NR, SQRE + DOUBLE PRECISION ALPHA, BETA, C, S +* .. +* .. Array Arguments .. + INTEGER GIVCOL( LDGCOL, * ), IDXQ( * ), IWORK( * ), + $ PERM( * ) + DOUBLE PRECISION D( * ), DIFL( * ), DIFR( * ), + $ GIVNUM( LDGNUM, * ), POLES( LDGNUM, * ), + $ VF( * ), VL( * ), WORK( * ), Z( * ) +* .. +* +* Purpose +* ======= +* +* DLASD6 computes the SVD of an updated upper bidiagonal matrix B +* obtained by merging two smaller ones by appending a row. This +* routine is used only for the problem which requires all singular +* values and optionally singular vector matrices in factored form. +* B is an N-by-M matrix with N = NL + NR + 1 and M = N + SQRE. +* A related subroutine, DLASD1, handles the case in which all singular +* values and singular vectors of the bidiagonal matrix are desired. +* +* DLASD6 computes the SVD as follows: +* +* ( D1(in) 0 0 0 ) +* B = U(in) * ( Z1' a Z2' b ) * VT(in) +* ( 0 0 D2(in) 0 ) +* +* = U(out) * ( D(out) 0) * VT(out) +* +* where Z' = (Z1' a Z2' b) = u' VT', and u is a vector of dimension M +* with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros +* elsewhere; and the entry b is empty if SQRE = 0. +* +* The singular values of B can be computed using D1, D2, the first +* components of all the right singular vectors of the lower block, and +* the last components of all the right singular vectors of the upper +* block. These components are stored and updated in VF and VL, +* respectively, in DLASD6. Hence U and VT are not explicitly +* referenced. +* +* The singular values are stored in D. The algorithm consists of two +* stages: +* +* The first stage consists of deflating the size of the problem +* when there are multiple singular values or if there is a zero +* in the Z vector. For each such occurence the dimension of the +* secular equation problem is reduced by one. This stage is +* performed by the routine DLASD7. +* +* The second stage consists of calculating the updated +* singular values. This is done by finding the roots of the +* secular equation via the routine DLASD4 (as called by DLASD8). +* This routine also updates VF and VL and computes the distances +* between the updated singular values and the old singular +* values. +* +* DLASD6 is called from DLASDA. +* +* Arguments +* ========= +* +* ICOMPQ (input) INTEGER +* Specifies whether singular vectors are to be computed in +* factored form: +* = 0: Compute singular values only. +* = 1: Compute singular vectors in factored form as well. +* +* NL (input) INTEGER +* The row dimension of the upper block. NL >= 1. +* +* NR (input) INTEGER +* The row dimension of the lower block. NR >= 1. +* +* SQRE (input) INTEGER +* = 0: the lower block is an NR-by-NR square matrix. +* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. +* +* The bidiagonal matrix has row dimension N = NL + NR + 1, +* and column dimension M = N + SQRE. +* +* D (input/output) DOUBLE PRECISION array, dimension ( NL+NR+1 ). +* On entry D(1:NL,1:NL) contains the singular values of the +* upper block, and D(NL+2:N) contains the singular values +* of the lower block. On exit D(1:N) contains the singular +* values of the modified matrix. +* +* VF (input/output) DOUBLE PRECISION array, dimension ( M ) +* On entry, VF(1:NL+1) contains the first components of all +* right singular vectors of the upper block; and VF(NL+2:M) +* contains the first components of all right singular vectors +* of the lower block. On exit, VF contains the first components +* of all right singular vectors of the bidiagonal matrix. +* +* VL (input/output) DOUBLE PRECISION array, dimension ( M ) +* On entry, VL(1:NL+1) contains the last components of all +* right singular vectors of the upper block; and VL(NL+2:M) +* contains the last components of all right singular vectors of +* the lower block. On exit, VL contains the last components of +* all right singular vectors of the bidiagonal matrix. +* +* ALPHA (input) DOUBLE PRECISION +* Contains the diagonal element associated with the added row. +* +* BETA (input) DOUBLE PRECISION +* Contains the off-diagonal element associated with the added +* row. +* +* IDXQ (output) INTEGER array, dimension ( N ) +* This contains the permutation which will reintegrate the +* subproblem just solved back into sorted order, i.e. +* D( IDXQ( I = 1, N ) ) will be in ascending order. +* +* PERM (output) INTEGER array, dimension ( N ) +* The permutations (from deflation and sorting) to be applied +* to each block. Not referenced if ICOMPQ = 0. +* +* GIVPTR (output) INTEGER +* The number of Givens rotations which took place in this +* subproblem. Not referenced if ICOMPQ = 0. +* +* GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 ) +* Each pair of numbers indicates a pair of columns to take place +* in a Givens rotation. Not referenced if ICOMPQ = 0. +* +* LDGCOL (input) INTEGER +* leading dimension of GIVCOL, must be at least N. +* +* GIVNUM (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) +* Each number indicates the C or S value to be used in the +* corresponding Givens rotation. Not referenced if ICOMPQ = 0. +* +* LDGNUM (input) INTEGER +* The leading dimension of GIVNUM and POLES, must be at least N. +* +* POLES (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) +* On exit, POLES(1,*) is an array containing the new singular +* values obtained from solving the secular equation, and +* POLES(2,*) is an array containing the poles in the secular +* equation. Not referenced if ICOMPQ = 0. +* +* DIFL (output) DOUBLE PRECISION array, dimension ( N ) +* On exit, DIFL(I) is the distance between I-th updated +* (undeflated) singular value and the I-th (undeflated) old +* singular value. +* +* DIFR (output) DOUBLE PRECISION array, +* dimension ( LDGNUM, 2 ) if ICOMPQ = 1 and +* dimension ( N ) if ICOMPQ = 0. +* On exit, DIFR(I, 1) is the distance between I-th updated +* (undeflated) singular value and the I+1-th (undeflated) old +* singular value. +* +* If ICOMPQ = 1, DIFR(1:K,2) is an array containing the +* normalizing factors for the right singular vector matrix. +* +* See DLASD8 for details on DIFL and DIFR. +* +* Z (output) DOUBLE PRECISION array, dimension ( M ) +* The first elements of this array contain the components +* of the deflation-adjusted updating row vector. +* +* K (output) INTEGER +* Contains the dimension of the non-deflated matrix, +* This is the order of the related secular equation. 1 <= K <=N. +* +* C (output) DOUBLE PRECISION +* C contains garbage if SQRE =0 and the C-value of a Givens +* rotation related to the right null space if SQRE = 1. +* +* S (output) DOUBLE PRECISION +* S contains garbage if SQRE =0 and the S-value of a Givens +* rotation related to the right null space if SQRE = 1. +* +* WORK (workspace) DOUBLE PRECISION array, dimension ( 4 * M ) +* +* IWORK (workspace) INTEGER array, dimension ( 3 * N ) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = 1, an singular value did not converge +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IDX, IDXC, IDXP, ISIGMA, IVFW, IVLW, IW, M, + $ N, N1, N2 + DOUBLE PRECISION ORGNRM +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLAMRG, DLASCL, DLASD7, DLASD8, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + N = NL + NR + 1 + M = N + SQRE +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( NL.LT.1 ) THEN + INFO = -2 + ELSE IF( NR.LT.1 ) THEN + INFO = -3 + ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN + INFO = -4 + ELSE IF( LDGCOL.LT.N ) THEN + INFO = -14 + ELSE IF( LDGNUM.LT.N ) THEN + INFO = -16 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLASD6', -INFO ) + RETURN + END IF +* +* The following values are for bookkeeping purposes only. They are +* integer pointers which indicate the portion of the workspace +* used by a particular array in DLASD7 and DLASD8. +* + ISIGMA = 1 + IW = ISIGMA + N + IVFW = IW + M + IVLW = IVFW + M +* + IDX = 1 + IDXC = IDX + N + IDXP = IDXC + N +* +* Scale. +* + ORGNRM = MAX( ABS( ALPHA ), ABS( BETA ) ) + D( NL+1 ) = ZERO + DO 10 I = 1, N + IF( ABS( D( I ) ).GT.ORGNRM ) THEN + ORGNRM = ABS( D( I ) ) + END IF + 10 CONTINUE + CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, INFO ) + ALPHA = ALPHA / ORGNRM + BETA = BETA / ORGNRM +* +* Sort and Deflate singular values. +* + CALL DLASD7( ICOMPQ, NL, NR, SQRE, K, D, Z, WORK( IW ), VF, + $ WORK( IVFW ), VL, WORK( IVLW ), ALPHA, BETA, + $ WORK( ISIGMA ), IWORK( IDX ), IWORK( IDXP ), IDXQ, + $ PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM, C, S, + $ INFO ) +* +* Solve Secular Equation, compute DIFL, DIFR, and update VF, VL. +* + CALL DLASD8( ICOMPQ, K, D, Z, VF, VL, DIFL, DIFR, LDGNUM, + $ WORK( ISIGMA ), WORK( IW ), INFO ) +* +* Save the poles if ICOMPQ = 1. +* + IF( ICOMPQ.EQ.1 ) THEN + CALL DCOPY( K, D, 1, POLES( 1, 1 ), 1 ) + CALL DCOPY( K, WORK( ISIGMA ), 1, POLES( 1, 2 ), 1 ) + END IF +* +* Unscale. +* + CALL DLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO ) +* +* Prepare the IDXQ sorting permutation. +* + N1 = K + N2 = N - K + CALL DLAMRG( N1, N2, D, 1, -1, IDXQ ) +* + RETURN +* +* End of DLASD6 +* + END diff --git a/costa/native/external/lapack/dlasd7.f b/costa/native/external/lapack/dlasd7.f new file mode 100644 index 000000000..c96ff5da8 --- /dev/null +++ b/costa/native/external/lapack/dlasd7.f @@ -0,0 +1,445 @@ + SUBROUTINE DLASD7( ICOMPQ, NL, NR, SQRE, K, D, Z, ZW, VF, VFW, VL, + $ VLW, ALPHA, BETA, DSIGMA, IDX, IDXP, IDXQ, + $ PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM, + $ C, S, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, +* Courant Institute, NAG Ltd., and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER GIVPTR, ICOMPQ, INFO, K, LDGCOL, LDGNUM, NL, + $ NR, SQRE + DOUBLE PRECISION ALPHA, BETA, C, S +* .. +* .. Array Arguments .. + INTEGER GIVCOL( LDGCOL, * ), IDX( * ), IDXP( * ), + $ IDXQ( * ), PERM( * ) + DOUBLE PRECISION D( * ), DSIGMA( * ), GIVNUM( LDGNUM, * ), + $ VF( * ), VFW( * ), VL( * ), VLW( * ), Z( * ), + $ ZW( * ) +* .. +* +* Purpose +* ======= +* +* DLASD7 merges the two sets of singular values together into a single +* sorted set. Then it tries to deflate the size of the problem. There +* are two ways in which deflation can occur: when two or more singular +* values are close together or if there is a tiny entry in the Z +* vector. For each such occurrence the order of the related +* secular equation problem is reduced by one. +* +* DLASD7 is called from DLASD6. +* +* Arguments +* ========= +* +* ICOMPQ (input) INTEGER +* Specifies whether singular vectors are to be computed +* in compact form, as follows: +* = 0: Compute singular values only. +* = 1: Compute singular vectors of upper +* bidiagonal matrix in compact form. +* +* NL (input) INTEGER +* The row dimension of the upper block. NL >= 1. +* +* NR (input) INTEGER +* The row dimension of the lower block. NR >= 1. +* +* SQRE (input) INTEGER +* = 0: the lower block is an NR-by-NR square matrix. +* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. +* +* The bidiagonal matrix has +* N = NL + NR + 1 rows and +* M = N + SQRE >= N columns. +* +* K (output) INTEGER +* Contains the dimension of the non-deflated matrix, this is +* the order of the related secular equation. 1 <= K <=N. +* +* D (input/output) DOUBLE PRECISION array, dimension ( N ) +* On entry D contains the singular values of the two submatrices +* to be combined. On exit D contains the trailing (N-K) updated +* singular values (those which were deflated) sorted into +* increasing order. +* +* Z (output) DOUBLE PRECISION array, dimension ( M ) +* On exit Z contains the updating row vector in the secular +* equation. +* +* ZW (workspace) DOUBLE PRECISION array, dimension ( M ) +* Workspace for Z. +* +* VF (input/output) DOUBLE PRECISION array, dimension ( M ) +* On entry, VF(1:NL+1) contains the first components of all +* right singular vectors of the upper block; and VF(NL+2:M) +* contains the first components of all right singular vectors +* of the lower block. On exit, VF contains the first components +* of all right singular vectors of the bidiagonal matrix. +* +* VFW (workspace) DOUBLE PRECISION array, dimension ( M ) +* Workspace for VF. +* +* VL (input/output) DOUBLE PRECISION array, dimension ( M ) +* On entry, VL(1:NL+1) contains the last components of all +* right singular vectors of the upper block; and VL(NL+2:M) +* contains the last components of all right singular vectors +* of the lower block. On exit, VL contains the last components +* of all right singular vectors of the bidiagonal matrix. +* +* VLW (workspace) DOUBLE PRECISION array, dimension ( M ) +* Workspace for VL. +* +* ALPHA (input) DOUBLE PRECISION +* Contains the diagonal element associated with the added row. +* +* BETA (input) DOUBLE PRECISION +* Contains the off-diagonal element associated with the added +* row. +* +* DSIGMA (output) DOUBLE PRECISION array, dimension ( N ) +* Contains a copy of the diagonal elements (K-1 singular values +* and one zero) in the secular equation. +* +* IDX (workspace) INTEGER array, dimension ( N ) +* This will contain the permutation used to sort the contents of +* D into ascending order. +* +* IDXP (workspace) INTEGER array, dimension ( N ) +* This will contain the permutation used to place deflated +* values of D at the end of the array. On output IDXP(2:K) +* points to the nondeflated D-values and IDXP(K+1:N) +* points to the deflated singular values. +* +* IDXQ (input) INTEGER array, dimension ( N ) +* This contains the permutation which separately sorts the two +* sub-problems in D into ascending order. Note that entries in +* the first half of this permutation must first be moved one +* position backward; and entries in the second half +* must first have NL+1 added to their values. +* +* PERM (output) INTEGER array, dimension ( N ) +* The permutations (from deflation and sorting) to be applied +* to each singular block. Not referenced if ICOMPQ = 0. +* +* GIVPTR (output) INTEGER +* The number of Givens rotations which took place in this +* subproblem. Not referenced if ICOMPQ = 0. +* +* GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 ) +* Each pair of numbers indicates a pair of columns to take place +* in a Givens rotation. Not referenced if ICOMPQ = 0. +* +* LDGCOL (input) INTEGER +* The leading dimension of GIVCOL, must be at least N. +* +* GIVNUM (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) +* Each number indicates the C or S value to be used in the +* corresponding Givens rotation. Not referenced if ICOMPQ = 0. +* +* LDGNUM (input) INTEGER +* The leading dimension of GIVNUM, must be at least N. +* +* C (output) DOUBLE PRECISION +* C contains garbage if SQRE =0 and the C-value of a Givens +* rotation related to the right null space if SQRE = 1. +* +* S (output) DOUBLE PRECISION +* S contains garbage if SQRE =0 and the S-value of a Givens +* rotation related to the right null space if SQRE = 1. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO, EIGHT + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0, + $ EIGHT = 8.0D+0 ) +* .. +* .. Local Scalars .. +* + INTEGER I, IDXI, IDXJ, IDXJP, J, JP, JPREV, K2, M, N, + $ NLP1, NLP2 + DOUBLE PRECISION EPS, HLFTOL, TAU, TOL, Z1 +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLAMRG, DROT, XERBLA +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, DLAPY2 + EXTERNAL DLAMCH, DLAPY2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + N = NL + NR + 1 + M = N + SQRE +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( NL.LT.1 ) THEN + INFO = -2 + ELSE IF( NR.LT.1 ) THEN + INFO = -3 + ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN + INFO = -4 + ELSE IF( LDGCOL.LT.N ) THEN + INFO = -22 + ELSE IF( LDGNUM.LT.N ) THEN + INFO = -24 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLASD7', -INFO ) + RETURN + END IF +* + NLP1 = NL + 1 + NLP2 = NL + 2 + IF( ICOMPQ.EQ.1 ) THEN + GIVPTR = 0 + END IF +* +* Generate the first part of the vector Z and move the singular +* values in the first part of D one position backward. +* + Z1 = ALPHA*VL( NLP1 ) + VL( NLP1 ) = ZERO + TAU = VF( NLP1 ) + DO 10 I = NL, 1, -1 + Z( I+1 ) = ALPHA*VL( I ) + VL( I ) = ZERO + VF( I+1 ) = VF( I ) + D( I+1 ) = D( I ) + IDXQ( I+1 ) = IDXQ( I ) + 1 + 10 CONTINUE + VF( 1 ) = TAU +* +* Generate the second part of the vector Z. +* + DO 20 I = NLP2, M + Z( I ) = BETA*VF( I ) + VF( I ) = ZERO + 20 CONTINUE +* +* Sort the singular values into increasing order +* + DO 30 I = NLP2, N + IDXQ( I ) = IDXQ( I ) + NLP1 + 30 CONTINUE +* +* DSIGMA, IDXC, IDXC, and ZW are used as storage space. +* + DO 40 I = 2, N + DSIGMA( I ) = D( IDXQ( I ) ) + ZW( I ) = Z( IDXQ( I ) ) + VFW( I ) = VF( IDXQ( I ) ) + VLW( I ) = VL( IDXQ( I ) ) + 40 CONTINUE +* + CALL DLAMRG( NL, NR, DSIGMA( 2 ), 1, 1, IDX( 2 ) ) +* + DO 50 I = 2, N + IDXI = 1 + IDX( I ) + D( I ) = DSIGMA( IDXI ) + Z( I ) = ZW( IDXI ) + VF( I ) = VFW( IDXI ) + VL( I ) = VLW( IDXI ) + 50 CONTINUE +* +* Calculate the allowable deflation tolerence +* + EPS = DLAMCH( 'Epsilon' ) + TOL = MAX( ABS( ALPHA ), ABS( BETA ) ) + TOL = EIGHT*EIGHT*EPS*MAX( ABS( D( N ) ), TOL ) +* +* There are 2 kinds of deflation -- first a value in the z-vector +* is small, second two (or more) singular values are very close +* together (their difference is small). +* +* If the value in the z-vector is small, we simply permute the +* array so that the corresponding singular value is moved to the +* end. +* +* If two values in the D-vector are close, we perform a two-sided +* rotation designed to make one of the corresponding z-vector +* entries zero, and then permute the array so that the deflated +* singular value is moved to the end. +* +* If there are multiple singular values then the problem deflates. +* Here the number of equal singular values are found. As each equal +* singular value is found, an elementary reflector is computed to +* rotate the corresponding singular subspace so that the +* corresponding components of Z are zero in this new basis. +* + K = 1 + K2 = N + 1 + DO 60 J = 2, N + IF( ABS( Z( J ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + IDXP( K2 ) = J + IF( J.EQ.N ) + $ GO TO 100 + ELSE + JPREV = J + GO TO 70 + END IF + 60 CONTINUE + 70 CONTINUE + J = JPREV + 80 CONTINUE + J = J + 1 + IF( J.GT.N ) + $ GO TO 90 + IF( ABS( Z( J ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + IDXP( K2 ) = J + ELSE +* +* Check if singular values are close enough to allow deflation. +* + IF( ABS( D( J )-D( JPREV ) ).LE.TOL ) THEN +* +* Deflation is possible. +* + S = Z( JPREV ) + C = Z( J ) +* +* Find sqrt(a**2+b**2) without overflow or +* destructive underflow. +* + TAU = DLAPY2( C, S ) + Z( J ) = TAU + Z( JPREV ) = ZERO + C = C / TAU + S = -S / TAU +* +* Record the appropriate Givens rotation +* + IF( ICOMPQ.EQ.1 ) THEN + GIVPTR = GIVPTR + 1 + IDXJP = IDXQ( IDX( JPREV )+1 ) + IDXJ = IDXQ( IDX( J )+1 ) + IF( IDXJP.LE.NLP1 ) THEN + IDXJP = IDXJP - 1 + END IF + IF( IDXJ.LE.NLP1 ) THEN + IDXJ = IDXJ - 1 + END IF + GIVCOL( GIVPTR, 2 ) = IDXJP + GIVCOL( GIVPTR, 1 ) = IDXJ + GIVNUM( GIVPTR, 2 ) = C + GIVNUM( GIVPTR, 1 ) = S + END IF + CALL DROT( 1, VF( JPREV ), 1, VF( J ), 1, C, S ) + CALL DROT( 1, VL( JPREV ), 1, VL( J ), 1, C, S ) + K2 = K2 - 1 + IDXP( K2 ) = JPREV + JPREV = J + ELSE + K = K + 1 + ZW( K ) = Z( JPREV ) + DSIGMA( K ) = D( JPREV ) + IDXP( K ) = JPREV + JPREV = J + END IF + END IF + GO TO 80 + 90 CONTINUE +* +* Record the last singular value. +* + K = K + 1 + ZW( K ) = Z( JPREV ) + DSIGMA( K ) = D( JPREV ) + IDXP( K ) = JPREV +* + 100 CONTINUE +* +* Sort the singular values into DSIGMA. The singular values which +* were not deflated go into the first K slots of DSIGMA, except +* that DSIGMA(1) is treated separately. +* + DO 110 J = 2, N + JP = IDXP( J ) + DSIGMA( J ) = D( JP ) + VFW( J ) = VF( JP ) + VLW( J ) = VL( JP ) + 110 CONTINUE + IF( ICOMPQ.EQ.1 ) THEN + DO 120 J = 2, N + JP = IDXP( J ) + PERM( J ) = IDXQ( IDX( JP )+1 ) + IF( PERM( J ).LE.NLP1 ) THEN + PERM( J ) = PERM( J ) - 1 + END IF + 120 CONTINUE + END IF +* +* The deflated singular values go back into the last N - K slots of +* D. +* + CALL DCOPY( N-K, DSIGMA( K+1 ), 1, D( K+1 ), 1 ) +* +* Determine DSIGMA(1), DSIGMA(2), Z(1), VF(1), VL(1), VF(M), and +* VL(M). +* + DSIGMA( 1 ) = ZERO + HLFTOL = TOL / TWO + IF( ABS( DSIGMA( 2 ) ).LE.HLFTOL ) + $ DSIGMA( 2 ) = HLFTOL + IF( M.GT.N ) THEN + Z( 1 ) = DLAPY2( Z1, Z( M ) ) + IF( Z( 1 ).LE.TOL ) THEN + C = ONE + S = ZERO + Z( 1 ) = TOL + ELSE + C = Z1 / Z( 1 ) + S = -Z( M ) / Z( 1 ) + END IF + CALL DROT( 1, VF( M ), 1, VF( 1 ), 1, C, S ) + CALL DROT( 1, VL( M ), 1, VL( 1 ), 1, C, S ) + ELSE + IF( ABS( Z1 ).LE.TOL ) THEN + Z( 1 ) = TOL + ELSE + Z( 1 ) = Z1 + END IF + END IF +* +* Restore Z, VF, and VL. +* + CALL DCOPY( K-1, ZW( 2 ), 1, Z( 2 ), 1 ) + CALL DCOPY( N-1, VFW( 2 ), 1, VF( 2 ), 1 ) + CALL DCOPY( N-1, VLW( 2 ), 1, VL( 2 ), 1 ) +* + RETURN +* +* End of DLASD7 +* + END diff --git a/costa/native/external/lapack/dlasd8.f b/costa/native/external/lapack/dlasd8.f new file mode 100644 index 000000000..9d1035865 --- /dev/null +++ b/costa/native/external/lapack/dlasd8.f @@ -0,0 +1,254 @@ + SUBROUTINE DLASD8( ICOMPQ, K, D, Z, VF, VL, DIFL, DIFR, LDDIFR, + $ DSIGMA, WORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, +* Courant Institute, NAG Ltd., and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER ICOMPQ, INFO, K, LDDIFR +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), DIFL( * ), DIFR( LDDIFR, * ), + $ DSIGMA( * ), VF( * ), VL( * ), WORK( * ), + $ Z( * ) +* .. +* +* Purpose +* ======= +* +* DLASD8 finds the square roots of the roots of the secular equation, +* as defined by the values in DSIGMA and Z. It makes the appropriate +* calls to DLASD4, and stores, for each element in D, the distance +* to its two nearest poles (elements in DSIGMA). It also updates +* the arrays VF and VL, the first and last components of all the +* right singular vectors of the original bidiagonal matrix. +* +* DLASD8 is called from DLASD6. +* +* Arguments +* ========= +* +* ICOMPQ (input) INTEGER +* Specifies whether singular vectors are to be computed in +* factored form in the calling routine: +* = 0: Compute singular values only. +* = 1: Compute singular vectors in factored form as well. +* +* K (input) INTEGER +* The number of terms in the rational function to be solved +* by DLASD4. K >= 1. +* +* D (output) DOUBLE PRECISION array, dimension ( K ) +* On output, D contains the updated singular values. +* +* Z (input) DOUBLE PRECISION array, dimension ( K ) +* The first K elements of this array contain the components +* of the deflation-adjusted updating row vector. +* +* VF (input/output) DOUBLE PRECISION array, dimension ( K ) +* On entry, VF contains information passed through DBEDE8. +* On exit, VF contains the first K components of the first +* components of all right singular vectors of the bidiagonal +* matrix. +* +* VL (input/output) DOUBLE PRECISION array, dimension ( K ) +* On entry, VL contains information passed through DBEDE8. +* On exit, VL contains the first K components of the last +* components of all right singular vectors of the bidiagonal +* matrix. +* +* DIFL (output) DOUBLE PRECISION array, dimension ( K ) +* On exit, DIFL(I) = D(I) - DSIGMA(I). +* +* DIFR (output) DOUBLE PRECISION array, +* dimension ( LDDIFR, 2 ) if ICOMPQ = 1 and +* dimension ( K ) if ICOMPQ = 0. +* On exit, DIFR(I,1) = D(I) - DSIGMA(I+1), DIFR(K,1) is not +* defined and will not be referenced. +* +* If ICOMPQ = 1, DIFR(1:K,2) is an array containing the +* normalizing factors for the right singular vector matrix. +* +* LDDIFR (input) INTEGER +* The leading dimension of DIFR, must be at least K. +* +* DSIGMA (input) DOUBLE PRECISION array, dimension ( K ) +* The first K elements of this array contain the old roots +* of the deflated updating problem. These are the poles +* of the secular equation. +* +* WORK (workspace) DOUBLE PRECISION array, dimension at least 3 * K +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = 1, an singular value did not converge +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IWK1, IWK2, IWK2I, IWK3, IWK3I, J + DOUBLE PRECISION DIFLJ, DIFRJ, DJ, DSIGJ, DSIGJP, RHO, TEMP +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLASCL, DLASD4, DLASET, XERBLA +* .. +* .. External Functions .. + DOUBLE PRECISION DDOT, DLAMC3, DNRM2 + EXTERNAL DDOT, DLAMC3, DNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( K.LT.1 ) THEN + INFO = -2 + ELSE IF( LDDIFR.LT.K ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLASD8', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( K.EQ.1 ) THEN + D( 1 ) = ABS( Z( 1 ) ) + DIFL( 1 ) = D( 1 ) + IF( ICOMPQ.EQ.1 ) THEN + DIFL( 2 ) = ONE + DIFR( 1, 2 ) = ONE + END IF + RETURN + END IF +* +* Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can +* be computed with high relative accuracy (barring over/underflow). +* This is a problem on machines without a guard digit in +* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). +* The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), +* which on any of these machines zeros out the bottommost +* bit of DSIGMA(I) if it is 1; this makes the subsequent +* subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation +* occurs. On binary machines with a guard digit (almost all +* machines) it does not change DSIGMA(I) at all. On hexadecimal +* and decimal machines with a guard digit, it slightly +* changes the bottommost bits of DSIGMA(I). It does not account +* for hexadecimal or decimal machines without guard digits +* (we know of none). We use a subroutine call to compute +* 2*DLAMBDA(I) to prevent optimizing compilers from eliminating +* this code. +* + DO 10 I = 1, K + DSIGMA( I ) = DLAMC3( DSIGMA( I ), DSIGMA( I ) ) - DSIGMA( I ) + 10 CONTINUE +* +* Book keeping. +* + IWK1 = 1 + IWK2 = IWK1 + K + IWK3 = IWK2 + K + IWK2I = IWK2 - 1 + IWK3I = IWK3 - 1 +* +* Normalize Z. +* + RHO = DNRM2( K, Z, 1 ) + CALL DLASCL( 'G', 0, 0, RHO, ONE, K, 1, Z, K, INFO ) + RHO = RHO*RHO +* +* Initialize WORK(IWK3). +* + CALL DLASET( 'A', K, 1, ONE, ONE, WORK( IWK3 ), K ) +* +* Compute the updated singular values, the arrays DIFL, DIFR, +* and the updated Z. +* + DO 40 J = 1, K + CALL DLASD4( K, J, DSIGMA, Z, WORK( IWK1 ), RHO, D( J ), + $ WORK( IWK2 ), INFO ) +* +* If the root finder fails, the computation is terminated. +* + IF( INFO.NE.0 ) THEN + RETURN + END IF + WORK( IWK3I+J ) = WORK( IWK3I+J )*WORK( J )*WORK( IWK2I+J ) + DIFL( J ) = -WORK( J ) + DIFR( J, 1 ) = -WORK( J+1 ) + DO 20 I = 1, J - 1 + WORK( IWK3I+I ) = WORK( IWK3I+I )*WORK( I )* + $ WORK( IWK2I+I ) / ( DSIGMA( I )- + $ DSIGMA( J ) ) / ( DSIGMA( I )+ + $ DSIGMA( J ) ) + 20 CONTINUE + DO 30 I = J + 1, K + WORK( IWK3I+I ) = WORK( IWK3I+I )*WORK( I )* + $ WORK( IWK2I+I ) / ( DSIGMA( I )- + $ DSIGMA( J ) ) / ( DSIGMA( I )+ + $ DSIGMA( J ) ) + 30 CONTINUE + 40 CONTINUE +* +* Compute updated Z. +* + DO 50 I = 1, K + Z( I ) = SIGN( SQRT( ABS( WORK( IWK3I+I ) ) ), Z( I ) ) + 50 CONTINUE +* +* Update VF and VL. +* + DO 80 J = 1, K + DIFLJ = DIFL( J ) + DJ = D( J ) + DSIGJ = -DSIGMA( J ) + IF( J.LT.K ) THEN + DIFRJ = -DIFR( J, 1 ) + DSIGJP = -DSIGMA( J+1 ) + END IF + WORK( J ) = -Z( J ) / DIFLJ / ( DSIGMA( J )+DJ ) + DO 60 I = 1, J - 1 + WORK( I ) = Z( I ) / ( DLAMC3( DSIGMA( I ), DSIGJ )-DIFLJ ) + $ / ( DSIGMA( I )+DJ ) + 60 CONTINUE + DO 70 I = J + 1, K + WORK( I ) = Z( I ) / ( DLAMC3( DSIGMA( I ), DSIGJP )+DIFRJ ) + $ / ( DSIGMA( I )+DJ ) + 70 CONTINUE + TEMP = DNRM2( K, WORK, 1 ) + WORK( IWK2I+J ) = DDOT( K, WORK, 1, VF, 1 ) / TEMP + WORK( IWK3I+J ) = DDOT( K, WORK, 1, VL, 1 ) / TEMP + IF( ICOMPQ.EQ.1 ) THEN + DIFR( J, 2 ) = TEMP + END IF + 80 CONTINUE +* + CALL DCOPY( K, WORK( IWK2 ), 1, VF, 1 ) + CALL DCOPY( K, WORK( IWK3 ), 1, VL, 1 ) +* + RETURN +* +* End of DLASD8 +* + END diff --git a/costa/native/external/lapack/dlasd9.f b/costa/native/external/lapack/dlasd9.f new file mode 100644 index 000000000..45fe9cb60 --- /dev/null +++ b/costa/native/external/lapack/dlasd9.f @@ -0,0 +1,256 @@ + SUBROUTINE DLASD9( ICOMPQ, LDU, K, D, Z, VF, VL, DIFL, DIFR, + $ DSIGMA, WORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, +* Courant Institute, NAG Ltd., and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER ICOMPQ, INFO, K, LDU +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), DIFL( * ), DIFR( LDU, * ), DSIGMA( * ), + $ VF( * ), VL( * ), WORK( * ), Z( * ) +* .. +* +* Purpose +* ======= +* +* DLASD9 finds the square roots of the roots of the secular equation, +* as defined by the values in DSIGMA and Z. It makes the +* appropriate calls to DLASD4, and stores, for each element in D, +* the distance to its two nearest poles (elements in DSIGMA). It also +* updates the arrays VF and VL, the first and last components of all +* the right singular vectors of the original bidiagonal matrix. +* +* DLASD9 is called from DLASD7. +* +* Arguments +* ========= +* +* ICOMPQ (input) INTEGER +* Specifies whether singular vectors are to be computed in +* factored form in the calling routine: +* +* ICOMPQ = 0 Compute singular values only. +* +* ICOMPQ = 1 Compute singular vector matrices in +* factored form also. +* K (input) INTEGER +* The number of terms in the rational function to be solved by +* DLASD4. K >= 1. +* +* D (output) DOUBLE PRECISION array, dimension(K) +* D(I) contains the updated singular values. +* +* DSIGMA (input) DOUBLE PRECISION array, dimension(K) +* The first K elements of this array contain the old roots +* of the deflated updating problem. These are the poles +* of the secular equation. +* +* Z (input) DOUBLE PRECISION array, dimension (K) +* The first K elements of this array contain the components +* of the deflation-adjusted updating row vector. +* +* VF (input/output) DOUBLE PRECISION array, dimension(K) +* On entry, VF contains information passed through SBEDE8.f +* On exit, VF contains the first K components of the first +* components of all right singular vectors of the bidiagonal +* matrix. +* +* VL (input/output) DOUBLE PRECISION array, dimension(K) +* On entry, VL contains information passed through SBEDE8.f +* On exit, VL contains the first K components of the last +* components of all right singular vectors of the bidiagonal +* matrix. +* +* DIFL (output) DOUBLE PRECISION array, dimension (K). +* On exit, DIFL(I) = D(I) - DSIGMA(I). +* +* DIFR (output) DOUBLE PRECISION array, +* dimension (LDU, 2) if ICOMPQ =1 and +* dimension (K) if ICOMPQ = 0. +* On exit, DIFR(I, 1) = D(I) - DSIGMA(I+1), DIFR(K, 1) is not +* defined and will not be referenced. +* +* If ICOMPQ = 1, DIFR(1:K, 2) is an array containing the +* normalizing factors for the right singular vector matrix. +* +* WORK (workspace) DOUBLE PRECISION array, +* dimension at least (3 * K) +* Workspace. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = 1, an singular value did not converge +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + INTEGER I, IWK1, IWK2, IWK2I, IWK3, IWK3I, J + DOUBLE PRECISION DIFLJ, DIFRJ, DJ, DJP1, DSIGJ, DSIGJP, RHO, + $ TEMP +* .. +* .. External Functions .. + DOUBLE PRECISION DDOT, DLAMC3, DNRM2 + EXTERNAL DDOT, DLAMC3, DNRM2 +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLASCL, DLASD4, DLASET, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( K.LT.1 ) THEN + INFO = -3 + ELSE IF( LDU.LT.K ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLASD9', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( K.EQ.1 ) THEN + D( 1 ) = ABS( Z( 1 ) ) + DIFL( 1 ) = D( 1 ) + IF( ICOMPQ.EQ.1 ) THEN + DIFL( 2 ) = ONE + DIFR( 1, 2 ) = ONE + END IF + RETURN + END IF +* +* Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can +* be computed with high relative accuracy (barring over/underflow). +* This is a problem on machines without a guard digit in +* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). +* The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), +* which on any of these machines zeros out the bottommost +* bit of DSIGMA(I) if it is 1; this makes the subsequent +* subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation +* occurs. On binary machines with a guard digit (almost all +* machines) it does not change DSIGMA(I) at all. On hexadecimal +* and decimal machines with a guard digit, it slightly +* changes the bottommost bits of DSIGMA(I). It does not account +* for hexadecimal or decimal machines without guard digits +* (we know of none). We use a subroutine call to compute +* 2*DLAMBDA(I) to prevent optimizing compilers from eliminating +* this code. +* + DO 10 I = 1, K + DSIGMA( I ) = DLAMC3( DSIGMA( I ), DSIGMA( I ) ) - DSIGMA( I ) + 10 CONTINUE +* +* Book keeping. +* + IWK1 = 1 + IWK2 = IWK1 + K + IWK3 = IWK2 + K + IWK2I = IWK2 - 1 + IWK3I = IWK3 - 1 +* +* Normalize Z. +* + RHO = DNRM2( K, Z, 1 ) + CALL DLASCL( 'G', 0, 0, RHO, ONE, K, 1, Z, K, INFO ) + RHO = RHO*RHO +* +* Initialize WORK(IWK3). +* + CALL DLASET( 'A', K, 1, ONE, ONE, WORK( IWK3 ), K ) +* +* Compute the updated singular values, the arrays DIFL, DIFR, +* and the updated Z. +* + DO 40 J = 1, K + CALL DLASD4( K, J, DSIGMA, Z, WORK( IWK1 ), RHO, D( J ), + $ WORK( IWK2 ), INFO ) +* +* If the root finder fails, the computation is terminated. +* + IF( INFO.NE.0 ) THEN + RETURN + END IF + WORK( IWK3I+J ) = WORK( IWK3I+J )*WORK( J )*WORK( IWK2I+J ) + DIFL( J ) = -WORK( J ) + DIFR( J, 1 ) = -WORK( J+1 ) + DO 20 I = 1, J - 1 + WORK( IWK3I+I ) = WORK( IWK3I+I )*WORK( I )* + $ WORK( IWK2I+I ) / ( DSIGMA( I )- + $ DSIGMA( J ) ) / ( DSIGMA( I )+ + $ DSIGMA( J ) ) + 20 CONTINUE + DO 30 I = J + 1, K + WORK( IWK3I+I ) = WORK( IWK3I+I )*WORK( I )* + $ WORK( IWK2I+I ) / ( DSIGMA( I )- + $ DSIGMA( J ) ) / ( DSIGMA( I )+ + $ DSIGMA( J ) ) + 30 CONTINUE + 40 CONTINUE +* +* Compute updated Z. +* + DO 50 I = 1, K + Z( I ) = SIGN( SQRT( ABS( WORK( IWK3I+I ) ) ), Z( I ) ) + 50 CONTINUE +* +* Update VF and VL. +* + DO 80 J = 1, K + DIFLJ = DIFL( J ) + DJ = D( J ) + DSIGJ = -DSIGMA( J ) + IF( J.LT.K ) THEN + DIFRJ = -DIFR( J, 1 ) + DJP1 = D( J+1 ) + DSIGJP = -DSIGMA( J+1 ) + END IF + WORK( J ) = -Z( J ) / DIFLJ / ( DSIGMA( J )+DJ ) + DO 60 I = 1, J - 1 + WORK( I ) = Z( I ) / ( DLAMC3( DSIGMA( I ), DSIGJ )-DIFLJ ) + $ / ( DSIGMA( I )+DJ ) + 60 CONTINUE + DO 70 I = J + 1, K + WORK( I ) = Z( I ) / ( DLAMC3( DSIGMA( I ), DSIGJP )+DIFRJ ) + $ / ( DSIGMA( I )+DJ ) + 70 CONTINUE + TEMP = DNRM2( K, WORK, 1 ) + WORK( IWK2I+J ) = DDOT( K, WORK, 1, VF, 1 ) / TEMP + WORK( IWK3I+J ) = DDOT( K, WORK, 1, VL, 1 ) / TEMP + IF( ICOMPQ.EQ.1 ) THEN + DIFR( J, 2 ) = TEMP + END IF + 80 CONTINUE +* + CALL DCOPY( K, WORK( IWK2 ), 1, VF, 1 ) + CALL DCOPY( K, WORK( IWK3 ), 1, VL, 1 ) +* + RETURN +* +* End of DLASD9 +* + END diff --git a/costa/native/external/lapack/dlasda.f b/costa/native/external/lapack/dlasda.f new file mode 100644 index 000000000..372516e30 --- /dev/null +++ b/costa/native/external/lapack/dlasda.f @@ -0,0 +1,391 @@ + SUBROUTINE DLASDA( ICOMPQ, SMLSIZ, N, SQRE, D, E, U, LDU, VT, K, + $ DIFL, DIFR, Z, POLES, GIVPTR, GIVCOL, LDGCOL, + $ PERM, GIVNUM, C, S, WORK, IWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + INTEGER ICOMPQ, INFO, LDGCOL, LDU, N, SMLSIZ, SQRE +* .. +* .. Array Arguments .. + INTEGER GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ), + $ K( * ), PERM( LDGCOL, * ) + DOUBLE PRECISION C( * ), D( * ), DIFL( LDU, * ), DIFR( LDU, * ), + $ E( * ), GIVNUM( LDU, * ), POLES( LDU, * ), + $ S( * ), U( LDU, * ), VT( LDU, * ), WORK( * ), + $ Z( LDU, * ) +* .. +* +* Purpose +* ======= +* +* Using a divide and conquer approach, DLASDA computes the singular +* value decomposition (SVD) of a real upper bidiagonal N-by-M matrix +* B with diagonal D and offdiagonal E, where M = N + SQRE. The +* algorithm computes the singular values in the SVD B = U * S * VT. +* The orthogonal matrices U and VT are optionally computed in +* compact form. +* +* A related subroutine, DLASD0, computes the singular values and +* the singular vectors in explicit form. +* +* Arguments +* ========= +* +* ICOMPQ (input) INTEGER +* Specifies whether singular vectors are to be computed +* in compact form, as follows +* = 0: Compute singular values only. +* = 1: Compute singular vectors of upper bidiagonal +* matrix in compact form. +* +* SMLSIZ (input) INTEGER +* The maximum size of the subproblems at the bottom of the +* computation tree. +* +* N (input) INTEGER +* The row dimension of the upper bidiagonal matrix. This is +* also the dimension of the main diagonal array D. +* +* SQRE (input) INTEGER +* Specifies the column dimension of the bidiagonal matrix. +* = 0: The bidiagonal matrix has column dimension M = N; +* = 1: The bidiagonal matrix has column dimension M = N + 1. +* +* D (input/output) DOUBLE PRECISION array, dimension ( N ) +* On entry D contains the main diagonal of the bidiagonal +* matrix. On exit D, if INFO = 0, contains its singular values. +* +* E (input) DOUBLE PRECISION array, dimension ( M-1 ) +* Contains the subdiagonal entries of the bidiagonal matrix. +* On exit, E has been destroyed. +* +* U (output) DOUBLE PRECISION array, +* dimension ( LDU, SMLSIZ ) if ICOMPQ = 1, and not referenced +* if ICOMPQ = 0. If ICOMPQ = 1, on exit, U contains the left +* singular vector matrices of all subproblems at the bottom +* level. +* +* LDU (input) INTEGER, LDU = > N. +* The leading dimension of arrays U, VT, DIFL, DIFR, POLES, +* GIVNUM, and Z. +* +* VT (output) DOUBLE PRECISION array, +* dimension ( LDU, SMLSIZ+1 ) if ICOMPQ = 1, and not referenced +* if ICOMPQ = 0. If ICOMPQ = 1, on exit, VT' contains the right +* singular vector matrices of all subproblems at the bottom +* level. +* +* K (output) INTEGER array, +* dimension ( N ) if ICOMPQ = 1 and dimension 1 if ICOMPQ = 0. +* If ICOMPQ = 1, on exit, K(I) is the dimension of the I-th +* secular equation on the computation tree. +* +* DIFL (output) DOUBLE PRECISION array, dimension ( LDU, NLVL ), +* where NLVL = floor(log_2 (N/SMLSIZ))). +* +* DIFR (output) DOUBLE PRECISION array, +* dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1 and +* dimension ( N ) if ICOMPQ = 0. +* If ICOMPQ = 1, on exit, DIFL(1:N, I) and DIFR(1:N, 2 * I - 1) +* record distances between singular values on the I-th +* level and singular values on the (I -1)-th level, and +* DIFR(1:N, 2 * I ) contains the normalizing factors for +* the right singular vector matrix. See DLASD8 for details. +* +* Z (output) DOUBLE PRECISION array, +* dimension ( LDU, NLVL ) if ICOMPQ = 1 and +* dimension ( N ) if ICOMPQ = 0. +* The first K elements of Z(1, I) contain the components of +* the deflation-adjusted updating row vector for subproblems +* on the I-th level. +* +* POLES (output) DOUBLE PRECISION array, +* dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not referenced +* if ICOMPQ = 0. If ICOMPQ = 1, on exit, POLES(1, 2*I - 1) and +* POLES(1, 2*I) contain the new and old singular values +* involved in the secular equations on the I-th level. +* +* GIVPTR (output) INTEGER array, +* dimension ( N ) if ICOMPQ = 1, and not referenced if +* ICOMPQ = 0. If ICOMPQ = 1, on exit, GIVPTR( I ) records +* the number of Givens rotations performed on the I-th +* problem on the computation tree. +* +* GIVCOL (output) INTEGER array, +* dimension ( LDGCOL, 2 * NLVL ) if ICOMPQ = 1, and not +* referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I, +* GIVCOL(1, 2 *I - 1) and GIVCOL(1, 2 *I) record the locations +* of Givens rotations performed on the I-th level on the +* computation tree. +* +* LDGCOL (input) INTEGER, LDGCOL = > N. +* The leading dimension of arrays GIVCOL and PERM. +* +* PERM (output) INTEGER array, +* dimension ( LDGCOL, NLVL ) if ICOMPQ = 1, and not referenced +* if ICOMPQ = 0. If ICOMPQ = 1, on exit, PERM(1, I) records +* permutations done on the I-th level of the computation tree. +* +* GIVNUM (output) DOUBLE PRECISION array, +* dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not +* referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I, +* GIVNUM(1, 2 *I - 1) and GIVNUM(1, 2 *I) record the C- and S- +* values of Givens rotations performed on the I-th level on +* the computation tree. +* +* C (output) DOUBLE PRECISION array, +* dimension ( N ) if ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. +* If ICOMPQ = 1 and the I-th subproblem is not square, on exit, +* C( I ) contains the C-value of a Givens rotation related to +* the right null space of the I-th subproblem. +* +* S (output) DOUBLE PRECISION array, dimension ( N ) if +* ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. If ICOMPQ = 1 +* and the I-th subproblem is not square, on exit, S( I ) +* contains the S-value of a Givens rotation related to +* the right null space of the I-th subproblem. +* +* WORK (workspace) DOUBLE PRECISION array, dimension +* (6 * N + (SMLSIZ + 1)*(SMLSIZ + 1)). +* +* IWORK (workspace) INTEGER array. +* Dimension must be at least (7 * N). +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = 1, an singular value did not converge +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, I1, IC, IDXQ, IDXQI, IM1, INODE, ITEMP, IWK, + $ J, LF, LL, LVL, LVL2, M, NCC, ND, NDB1, NDIML, + $ NDIMR, NL, NLF, NLP1, NLVL, NR, NRF, NRP1, NRU, + $ NWORK1, NWORK2, SMLSZP, SQREI, VF, VFI, VL, VLI + DOUBLE PRECISION ALPHA, BETA +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLASD6, DLASDQ, DLASDT, DLASET, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( SMLSIZ.LT.3 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN + INFO = -4 + ELSE IF( LDU.LT.( N+SQRE ) ) THEN + INFO = -8 + ELSE IF( LDGCOL.LT.N ) THEN + INFO = -17 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLASDA', -INFO ) + RETURN + END IF +* + M = N + SQRE +* +* If the input matrix is too small, call DLASDQ to find the SVD. +* + IF( N.LE.SMLSIZ ) THEN + IF( ICOMPQ.EQ.0 ) THEN + CALL DLASDQ( 'U', SQRE, N, 0, 0, 0, D, E, VT, LDU, U, LDU, + $ U, LDU, WORK, INFO ) + ELSE + CALL DLASDQ( 'U', SQRE, N, M, N, 0, D, E, VT, LDU, U, LDU, + $ U, LDU, WORK, INFO ) + END IF + RETURN + END IF +* +* Book-keeping and set up the computation tree. +* + INODE = 1 + NDIML = INODE + N + NDIMR = NDIML + N + IDXQ = NDIMR + N + IWK = IDXQ + N +* + NCC = 0 + NRU = 0 +* + SMLSZP = SMLSIZ + 1 + VF = 1 + VL = VF + M + NWORK1 = VL + M + NWORK2 = NWORK1 + SMLSZP*SMLSZP +* + CALL DLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ), + $ IWORK( NDIMR ), SMLSIZ ) +* +* for the nodes on bottom level of the tree, solve +* their subproblems by DLASDQ. +* + NDB1 = ( ND+1 ) / 2 + DO 30 I = NDB1, ND +* +* IC : center row of each node +* NL : number of rows of left subproblem +* NR : number of rows of right subproblem +* NLF: starting row of the left subproblem +* NRF: starting row of the right subproblem +* + I1 = I - 1 + IC = IWORK( INODE+I1 ) + NL = IWORK( NDIML+I1 ) + NLP1 = NL + 1 + NR = IWORK( NDIMR+I1 ) + NLF = IC - NL + NRF = IC + 1 + IDXQI = IDXQ + NLF - 2 + VFI = VF + NLF - 1 + VLI = VL + NLF - 1 + SQREI = 1 + IF( ICOMPQ.EQ.0 ) THEN + CALL DLASET( 'A', NLP1, NLP1, ZERO, ONE, WORK( NWORK1 ), + $ SMLSZP ) + CALL DLASDQ( 'U', SQREI, NL, NLP1, NRU, NCC, D( NLF ), + $ E( NLF ), WORK( NWORK1 ), SMLSZP, + $ WORK( NWORK2 ), NL, WORK( NWORK2 ), NL, + $ WORK( NWORK2 ), INFO ) + ITEMP = NWORK1 + NL*SMLSZP + CALL DCOPY( NLP1, WORK( NWORK1 ), 1, WORK( VFI ), 1 ) + CALL DCOPY( NLP1, WORK( ITEMP ), 1, WORK( VLI ), 1 ) + ELSE + CALL DLASET( 'A', NL, NL, ZERO, ONE, U( NLF, 1 ), LDU ) + CALL DLASET( 'A', NLP1, NLP1, ZERO, ONE, VT( NLF, 1 ), LDU ) + CALL DLASDQ( 'U', SQREI, NL, NLP1, NL, NCC, D( NLF ), + $ E( NLF ), VT( NLF, 1 ), LDU, U( NLF, 1 ), LDU, + $ U( NLF, 1 ), LDU, WORK( NWORK1 ), INFO ) + CALL DCOPY( NLP1, VT( NLF, 1 ), 1, WORK( VFI ), 1 ) + CALL DCOPY( NLP1, VT( NLF, NLP1 ), 1, WORK( VLI ), 1 ) + END IF + IF( INFO.NE.0 ) THEN + RETURN + END IF + DO 10 J = 1, NL + IWORK( IDXQI+J ) = J + 10 CONTINUE + IF( ( I.EQ.ND ) .AND. ( SQRE.EQ.0 ) ) THEN + SQREI = 0 + ELSE + SQREI = 1 + END IF + IDXQI = IDXQI + NLP1 + VFI = VFI + NLP1 + VLI = VLI + NLP1 + NRP1 = NR + SQREI + IF( ICOMPQ.EQ.0 ) THEN + CALL DLASET( 'A', NRP1, NRP1, ZERO, ONE, WORK( NWORK1 ), + $ SMLSZP ) + CALL DLASDQ( 'U', SQREI, NR, NRP1, NRU, NCC, D( NRF ), + $ E( NRF ), WORK( NWORK1 ), SMLSZP, + $ WORK( NWORK2 ), NR, WORK( NWORK2 ), NR, + $ WORK( NWORK2 ), INFO ) + ITEMP = NWORK1 + ( NRP1-1 )*SMLSZP + CALL DCOPY( NRP1, WORK( NWORK1 ), 1, WORK( VFI ), 1 ) + CALL DCOPY( NRP1, WORK( ITEMP ), 1, WORK( VLI ), 1 ) + ELSE + CALL DLASET( 'A', NR, NR, ZERO, ONE, U( NRF, 1 ), LDU ) + CALL DLASET( 'A', NRP1, NRP1, ZERO, ONE, VT( NRF, 1 ), LDU ) + CALL DLASDQ( 'U', SQREI, NR, NRP1, NR, NCC, D( NRF ), + $ E( NRF ), VT( NRF, 1 ), LDU, U( NRF, 1 ), LDU, + $ U( NRF, 1 ), LDU, WORK( NWORK1 ), INFO ) + CALL DCOPY( NRP1, VT( NRF, 1 ), 1, WORK( VFI ), 1 ) + CALL DCOPY( NRP1, VT( NRF, NRP1 ), 1, WORK( VLI ), 1 ) + END IF + IF( INFO.NE.0 ) THEN + RETURN + END IF + DO 20 J = 1, NR + IWORK( IDXQI+J ) = J + 20 CONTINUE + 30 CONTINUE +* +* Now conquer each subproblem bottom-up. +* + J = 2**NLVL + DO 50 LVL = NLVL, 1, -1 + LVL2 = LVL*2 - 1 +* +* Find the first node LF and last node LL on +* the current level LVL. +* + IF( LVL.EQ.1 ) THEN + LF = 1 + LL = 1 + ELSE + LF = 2**( LVL-1 ) + LL = 2*LF - 1 + END IF + DO 40 I = LF, LL + IM1 = I - 1 + IC = IWORK( INODE+IM1 ) + NL = IWORK( NDIML+IM1 ) + NR = IWORK( NDIMR+IM1 ) + NLF = IC - NL + NRF = IC + 1 + IF( I.EQ.LL ) THEN + SQREI = SQRE + ELSE + SQREI = 1 + END IF + VFI = VF + NLF - 1 + VLI = VL + NLF - 1 + IDXQI = IDXQ + NLF - 1 + ALPHA = D( IC ) + BETA = E( IC ) + IF( ICOMPQ.EQ.0 ) THEN + CALL DLASD6( ICOMPQ, NL, NR, SQREI, D( NLF ), + $ WORK( VFI ), WORK( VLI ), ALPHA, BETA, + $ IWORK( IDXQI ), PERM, GIVPTR( 1 ), GIVCOL, + $ LDGCOL, GIVNUM, LDU, POLES, DIFL, DIFR, Z, + $ K( 1 ), C( 1 ), S( 1 ), WORK( NWORK1 ), + $ IWORK( IWK ), INFO ) + ELSE + J = J - 1 + CALL DLASD6( ICOMPQ, NL, NR, SQREI, D( NLF ), + $ WORK( VFI ), WORK( VLI ), ALPHA, BETA, + $ IWORK( IDXQI ), PERM( NLF, LVL ), + $ GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL, + $ GIVNUM( NLF, LVL2 ), LDU, + $ POLES( NLF, LVL2 ), DIFL( NLF, LVL ), + $ DIFR( NLF, LVL2 ), Z( NLF, LVL ), K( J ), + $ C( J ), S( J ), WORK( NWORK1 ), + $ IWORK( IWK ), INFO ) + END IF + IF( INFO.NE.0 ) THEN + RETURN + END IF + 40 CONTINUE + 50 CONTINUE +* + RETURN +* +* End of DLASDA +* + END diff --git a/costa/native/external/lapack/dlasdq.f b/costa/native/external/lapack/dlasdq.f new file mode 100644 index 000000000..9bc3134f3 --- /dev/null +++ b/costa/native/external/lapack/dlasdq.f @@ -0,0 +1,317 @@ + SUBROUTINE DLASDQ( UPLO, SQRE, N, NCVT, NRU, NCC, D, E, VT, LDVT, + $ U, LDU, C, LDC, WORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU, SQRE +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( LDC, * ), D( * ), E( * ), U( LDU, * ), + $ VT( LDVT, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DLASDQ computes the singular value decomposition (SVD) of a real +* (upper or lower) bidiagonal matrix with diagonal D and offdiagonal +* E, accumulating the transformations if desired. Letting B denote +* the input bidiagonal matrix, the algorithm computes orthogonal +* matrices Q and P such that B = Q * S * P' (P' denotes the transpose +* of P). The singular values S are overwritten on D. +* +* The input matrix U is changed to U * Q if desired. +* The input matrix VT is changed to P' * VT if desired. +* The input matrix C is changed to Q' * C if desired. +* +* See "Computing Small Singular Values of Bidiagonal Matrices With +* Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, +* LAPACK Working Note #3, for a detailed description of the algorithm. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* On entry, UPLO specifies whether the input bidiagonal matrix +* is upper or lower bidiagonal, and wether it is square are +* not. +* UPLO = 'U' or 'u' B is upper bidiagonal. +* UPLO = 'L' or 'l' B is lower bidiagonal. +* +* SQRE (input) INTEGER +* = 0: then the input matrix is N-by-N. +* = 1: then the input matrix is N-by-(N+1) if UPLU = 'U' and +* (N+1)-by-N if UPLU = 'L'. +* +* The bidiagonal matrix has +* N = NL + NR + 1 rows and +* M = N + SQRE >= N columns. +* +* N (input) INTEGER +* On entry, N specifies the number of rows and columns +* in the matrix. N must be at least 0. +* +* NCVT (input) INTEGER +* On entry, NCVT specifies the number of columns of +* the matrix VT. NCVT must be at least 0. +* +* NRU (input) INTEGER +* On entry, NRU specifies the number of rows of +* the matrix U. NRU must be at least 0. +* +* NCC (input) INTEGER +* On entry, NCC specifies the number of columns of +* the matrix C. NCC must be at least 0. +* +* D (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, D contains the diagonal entries of the +* bidiagonal matrix whose SVD is desired. On normal exit, +* D contains the singular values in ascending order. +* +* E (input/output) DOUBLE PRECISION array. +* dimension is (N-1) if SQRE = 0 and N if SQRE = 1. +* On entry, the entries of E contain the offdiagonal entries +* of the bidiagonal matrix whose SVD is desired. On normal +* exit, E will contain 0. If the algorithm does not converge, +* D and E will contain the diagonal and superdiagonal entries +* of a bidiagonal matrix orthogonally equivalent to the one +* given as input. +* +* VT (input/output) DOUBLE PRECISION array, dimension (LDVT, NCVT) +* On entry, contains a matrix which on exit has been +* premultiplied by P', dimension N-by-NCVT if SQRE = 0 +* and (N+1)-by-NCVT if SQRE = 1 (not referenced if NCVT=0). +* +* LDVT (input) INTEGER +* On entry, LDVT specifies the leading dimension of VT as +* declared in the calling (sub) program. LDVT must be at +* least 1. If NCVT is nonzero LDVT must also be at least N. +* +* U (input/output) DOUBLE PRECISION array, dimension (LDU, N) +* On entry, contains a matrix which on exit has been +* postmultiplied by Q, dimension NRU-by-N if SQRE = 0 +* and NRU-by-(N+1) if SQRE = 1 (not referenced if NRU=0). +* +* LDU (input) INTEGER +* On entry, LDU specifies the leading dimension of U as +* declared in the calling (sub) program. LDU must be at +* least max( 1, NRU ) . +* +* C (input/output) DOUBLE PRECISION array, dimension (LDC, NCC) +* On entry, contains an N-by-NCC matrix which on exit +* has been premultiplied by Q' dimension N-by-NCC if SQRE = 0 +* and (N+1)-by-NCC if SQRE = 1 (not referenced if NCC=0). +* +* LDC (input) INTEGER +* On entry, LDC specifies the leading dimension of C as +* declared in the calling (sub) program. LDC must be at +* least 1. If NCC is nonzero, LDC must also be at least N. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) +* Workspace. Only referenced if one of NCVT, NRU, or NCC is +* nonzero, and if N is at least 2. +* +* INFO (output) INTEGER +* On exit, a value of 0 indicates a successful exit. +* If INFO < 0, argument number -INFO is illegal. +* If INFO > 0, the algorithm did not converge, and INFO +* specifies how many superdiagonals did not converge. +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ROTATE + INTEGER I, ISUB, IUPLO, J, NP1, SQRE1 + DOUBLE PRECISION CS, R, SMIN, SN +* .. +* .. External Subroutines .. + EXTERNAL DBDSQR, DLARTG, DLASR, DSWAP, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IUPLO = 0 + IF( LSAME( UPLO, 'U' ) ) + $ IUPLO = 1 + IF( LSAME( UPLO, 'L' ) ) + $ IUPLO = 2 + IF( IUPLO.EQ.0 ) THEN + INFO = -1 + ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NCVT.LT.0 ) THEN + INFO = -4 + ELSE IF( NRU.LT.0 ) THEN + INFO = -5 + ELSE IF( NCC.LT.0 ) THEN + INFO = -6 + ELSE IF( ( NCVT.EQ.0 .AND. LDVT.LT.1 ) .OR. + $ ( NCVT.GT.0 .AND. LDVT.LT.MAX( 1, N ) ) ) THEN + INFO = -10 + ELSE IF( LDU.LT.MAX( 1, NRU ) ) THEN + INFO = -12 + ELSE IF( ( NCC.EQ.0 .AND. LDC.LT.1 ) .OR. + $ ( NCC.GT.0 .AND. LDC.LT.MAX( 1, N ) ) ) THEN + INFO = -14 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLASDQ', -INFO ) + RETURN + END IF + IF( N.EQ.0 ) + $ RETURN +* +* ROTATE is true if any singular vectors desired, false otherwise +* + ROTATE = ( NCVT.GT.0 ) .OR. ( NRU.GT.0 ) .OR. ( NCC.GT.0 ) + NP1 = N + 1 + SQRE1 = SQRE +* +* If matrix non-square upper bidiagonal, rotate to be lower +* bidiagonal. The rotations are on the right. +* + IF( ( IUPLO.EQ.1 ) .AND. ( SQRE1.EQ.1 ) ) THEN + DO 10 I = 1, N - 1 + CALL DLARTG( D( I ), E( I ), CS, SN, R ) + D( I ) = R + E( I ) = SN*D( I+1 ) + D( I+1 ) = CS*D( I+1 ) + IF( ROTATE ) THEN + WORK( I ) = CS + WORK( N+I ) = SN + END IF + 10 CONTINUE + CALL DLARTG( D( N ), E( N ), CS, SN, R ) + D( N ) = R + E( N ) = ZERO + IF( ROTATE ) THEN + WORK( N ) = CS + WORK( N+N ) = SN + END IF + IUPLO = 2 + SQRE1 = 0 +* +* Update singular vectors if desired. +* + IF( NCVT.GT.0 ) + $ CALL DLASR( 'L', 'V', 'F', NP1, NCVT, WORK( 1 ), + $ WORK( NP1 ), VT, LDVT ) + END IF +* +* If matrix lower bidiagonal, rotate to be upper bidiagonal +* by applying Givens rotations on the left. +* + IF( IUPLO.EQ.2 ) THEN + DO 20 I = 1, N - 1 + CALL DLARTG( D( I ), E( I ), CS, SN, R ) + D( I ) = R + E( I ) = SN*D( I+1 ) + D( I+1 ) = CS*D( I+1 ) + IF( ROTATE ) THEN + WORK( I ) = CS + WORK( N+I ) = SN + END IF + 20 CONTINUE +* +* If matrix (N+1)-by-N lower bidiagonal, one additional +* rotation is needed. +* + IF( SQRE1.EQ.1 ) THEN + CALL DLARTG( D( N ), E( N ), CS, SN, R ) + D( N ) = R + IF( ROTATE ) THEN + WORK( N ) = CS + WORK( N+N ) = SN + END IF + END IF +* +* Update singular vectors if desired. +* + IF( NRU.GT.0 ) THEN + IF( SQRE1.EQ.0 ) THEN + CALL DLASR( 'R', 'V', 'F', NRU, N, WORK( 1 ), + $ WORK( NP1 ), U, LDU ) + ELSE + CALL DLASR( 'R', 'V', 'F', NRU, NP1, WORK( 1 ), + $ WORK( NP1 ), U, LDU ) + END IF + END IF + IF( NCC.GT.0 ) THEN + IF( SQRE1.EQ.0 ) THEN + CALL DLASR( 'L', 'V', 'F', N, NCC, WORK( 1 ), + $ WORK( NP1 ), C, LDC ) + ELSE + CALL DLASR( 'L', 'V', 'F', NP1, NCC, WORK( 1 ), + $ WORK( NP1 ), C, LDC ) + END IF + END IF + END IF +* +* Call DBDSQR to compute the SVD of the reduced real +* N-by-N upper bidiagonal matrix. +* + CALL DBDSQR( 'U', N, NCVT, NRU, NCC, D, E, VT, LDVT, U, LDU, C, + $ LDC, WORK, INFO ) +* +* Sort the singular values into ascending order (insertion sort on +* singular values, but only one transposition per singular vector) +* + DO 40 I = 1, N +* +* Scan for smallest D(I). +* + ISUB = I + SMIN = D( I ) + DO 30 J = I + 1, N + IF( D( J ).LT.SMIN ) THEN + ISUB = J + SMIN = D( J ) + END IF + 30 CONTINUE + IF( ISUB.NE.I ) THEN +* +* Swap singular values and vectors. +* + D( ISUB ) = D( I ) + D( I ) = SMIN + IF( NCVT.GT.0 ) + $ CALL DSWAP( NCVT, VT( ISUB, 1 ), LDVT, VT( I, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL DSWAP( NRU, U( 1, ISUB ), 1, U( 1, I ), 1 ) + IF( NCC.GT.0 ) + $ CALL DSWAP( NCC, C( ISUB, 1 ), LDC, C( I, 1 ), LDC ) + END IF + 40 CONTINUE +* + RETURN +* +* End of DLASDQ +* + END diff --git a/costa/native/external/lapack/dlasdt.f b/costa/native/external/lapack/dlasdt.f new file mode 100644 index 000000000..bb7842d07 --- /dev/null +++ b/costa/native/external/lapack/dlasdt.f @@ -0,0 +1,106 @@ + SUBROUTINE DLASDT( N, LVL, ND, INODE, NDIML, NDIMR, MSUB ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER LVL, MSUB, N, ND +* .. +* .. Array Arguments .. + INTEGER INODE( * ), NDIML( * ), NDIMR( * ) +* .. +* +* Purpose +* ======= +* +* DLASDT creates a tree of subproblems for bidiagonal divide and +* conquer. +* +* Arguments +* ========= +* +* N (input) INTEGER +* On entry, the number of diagonal elements of the +* bidiagonal matrix. +* +* LVL (output) INTEGER +* On exit, the number of levels on the computation tree. +* +* ND (output) INTEGER +* On exit, the number of nodes on the tree. +* +* INODE (output) INTEGER array, dimension ( N ) +* On exit, centers of subproblems. +* +* NDIML (output) INTEGER array, dimension ( N ) +* On exit, row dimensions of left children. +* +* NDIMR (output) INTEGER array, dimension ( N ) +* On exit, row dimensions of right children. +* +* MSUB (input) INTEGER. +* On entry, the maximum row dimension each subproblem at the +* bottom of the tree can be of. +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IL, IR, LLST, MAXN, NCRNT, NLVL + DOUBLE PRECISION TEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, INT, LOG, MAX +* .. +* .. Executable Statements .. +* +* Find the number of levels on the tree. +* + MAXN = MAX( 1, N ) + TEMP = LOG( DBLE( MAXN ) / DBLE( MSUB+1 ) ) / LOG( TWO ) + LVL = INT( TEMP ) + 1 +* + I = N / 2 + INODE( 1 ) = I + 1 + NDIML( 1 ) = I + NDIMR( 1 ) = N - I - 1 + IL = 0 + IR = 1 + LLST = 1 + DO 20 NLVL = 1, LVL - 1 +* +* Constructing the tree at (NLVL+1)-st level. The number of +* nodes created on this level is LLST * 2. +* + DO 10 I = 0, LLST - 1 + IL = IL + 2 + IR = IR + 2 + NCRNT = LLST + I + NDIML( IL ) = NDIML( NCRNT ) / 2 + NDIMR( IL ) = NDIML( NCRNT ) - NDIML( IL ) - 1 + INODE( IL ) = INODE( NCRNT ) - NDIMR( IL ) - 1 + NDIML( IR ) = NDIMR( NCRNT ) / 2 + NDIMR( IR ) = NDIMR( NCRNT ) - NDIML( IR ) - 1 + INODE( IR ) = INODE( NCRNT ) + NDIML( IR ) + 1 + 10 CONTINUE + LLST = LLST*2 + 20 CONTINUE + ND = LLST*2 - 1 +* + RETURN +* +* End of DLASDT +* + END diff --git a/costa/native/external/lapack/dlaset.f b/costa/native/external/lapack/dlaset.f new file mode 100644 index 000000000..14af572d4 --- /dev/null +++ b/costa/native/external/lapack/dlaset.f @@ -0,0 +1,115 @@ + SUBROUTINE DLASET( UPLO, M, N, ALPHA, BETA, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER LDA, M, N + DOUBLE PRECISION ALPHA, BETA +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* DLASET initializes an m-by-n matrix A to BETA on the diagonal and +* ALPHA on the offdiagonals. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies the part of the matrix A to be set. +* = 'U': Upper triangular part is set; the strictly lower +* triangular part of A is not changed. +* = 'L': Lower triangular part is set; the strictly upper +* triangular part of A is not changed. +* Otherwise: All of the matrix A is set. +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* ALPHA (input) DOUBLE PRECISION +* The constant to which the offdiagonal elements are to be set. +* +* BETA (input) DOUBLE PRECISION +* The constant to which the diagonal elements are to be set. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On exit, the leading m-by-n submatrix of A is set as follows: +* +* if UPLO = 'U', A(i,j) = ALPHA, 1<=i<=j-1, 1<=j<=n, +* if UPLO = 'L', A(i,j) = ALPHA, j+1<=i<=m, 1<=j<=n, +* otherwise, A(i,j) = ALPHA, 1<=i<=m, 1<=j<=n, i.ne.j, +* +* and, for all UPLO, A(i,i) = BETA, 1<=i<=min(m,n). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Set the strictly upper triangular or trapezoidal part of the +* array to ALPHA. +* + DO 20 J = 2, N + DO 10 I = 1, MIN( J-1, M ) + A( I, J ) = ALPHA + 10 CONTINUE + 20 CONTINUE +* + ELSE IF( LSAME( UPLO, 'L' ) ) THEN +* +* Set the strictly lower triangular or trapezoidal part of the +* array to ALPHA. +* + DO 40 J = 1, MIN( M, N ) + DO 30 I = J + 1, M + A( I, J ) = ALPHA + 30 CONTINUE + 40 CONTINUE +* + ELSE +* +* Set the leading m-by-n submatrix to ALPHA. +* + DO 60 J = 1, N + DO 50 I = 1, M + A( I, J ) = ALPHA + 50 CONTINUE + 60 CONTINUE + END IF +* +* Set the first min(M,N) diagonal elements to BETA. +* + DO 70 I = 1, MIN( M, N ) + A( I, I ) = BETA + 70 CONTINUE +* + RETURN +* +* End of DLASET +* + END diff --git a/costa/native/external/lapack/dlasq1.f b/costa/native/external/lapack/dlasq1.f new file mode 100644 index 000000000..0bbe3f0ed --- /dev/null +++ b/costa/native/external/lapack/dlasq1.f @@ -0,0 +1,149 @@ + SUBROUTINE DLASQ1( N, D, E, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DLASQ1 computes the singular values of a real N-by-N bidiagonal +* matrix with diagonal D and off-diagonal E. The singular values +* are computed to high relative accuracy, in the absence of +* denormalization, underflow and overflow. The algorithm was first +* presented in +* +* "Accurate singular values and differential qd algorithms" by K. V. +* Fernando and B. N. Parlett, Numer. Math., Vol-67, No. 2, pp. 191-230, +* 1994, +* +* and the present implementation is described in "An implementation of +* the dqds Algorithm (Positive Case)", LAPACK Working Note. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of rows and columns in the matrix. N >= 0. +* +* D (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, D contains the diagonal elements of the +* bidiagonal matrix whose SVD is desired. On normal exit, +* D contains the singular values in decreasing order. +* +* E (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, elements E(1:N-1) contain the off-diagonal elements +* of the bidiagonal matrix whose SVD is desired. +* On exit, E is overwritten. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: the algorithm failed +* = 1, a split was marked by a positive value in E +* = 2, current block of Z not diagonalized after 30*N +* iterations (in inner while loop) +* = 3, termination criterion of outer while loop not met +* (program created more than N unreduced blocks) +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) +* .. +* .. Local Scalars .. + INTEGER I, IINFO + DOUBLE PRECISION EPS, SCALE, SAFMIN, SIGMN, SIGMX +* .. +* .. External Subroutines .. + EXTERNAL DLAS2, DLASQ2, DLASRT, XERBLA +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -2 + CALL XERBLA( 'DLASQ1', -INFO ) + RETURN + ELSE IF( N.EQ.0 ) THEN + RETURN + ELSE IF( N.EQ.1 ) THEN + D( 1 ) = ABS( D( 1 ) ) + RETURN + ELSE IF( N.EQ.2 ) THEN + CALL DLAS2( D( 1 ), E( 1 ), D( 2 ), SIGMN, SIGMX ) + D( 1 ) = SIGMX + D( 2 ) = SIGMN + RETURN + END IF +* +* Estimate the largest singular value. +* + SIGMX = ZERO + DO 10 I = 1, N - 1 + D( I ) = ABS( D( I ) ) + SIGMX = MAX( SIGMX, ABS( E( I ) ) ) + 10 CONTINUE + D( N ) = ABS( D( N ) ) +* +* Early return if SIGMX is zero (matrix is already diagonal). +* + IF( SIGMX.EQ.ZERO ) THEN + CALL DLASRT( 'D', N, D, IINFO ) + RETURN + END IF +* + DO 20 I = 1, N + SIGMX = MAX( SIGMX, D( I ) ) + 20 CONTINUE +* +* Copy D and E into WORK (in the Z format) and scale (squaring the +* input data makes scaling by a power of the radix pointless). +* + EPS = DLAMCH( 'Precision' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SCALE = SQRT( EPS / SAFMIN ) + CALL DCOPY( N, D, 1, WORK( 1 ), 2 ) + CALL DCOPY( N-1, E, 1, WORK( 2 ), 2 ) + CALL DLASCL( 'G', 0, 0, SIGMX, SCALE, 2*N-1, 1, WORK, 2*N-1, + $ IINFO ) +* +* Compute the q's and e's. +* + DO 30 I = 1, 2*N - 1 + WORK( I ) = WORK( I )**2 + 30 CONTINUE + WORK( 2*N ) = ZERO +* + CALL DLASQ2( N, WORK, INFO ) +* + IF( INFO.EQ.0 ) THEN + DO 40 I = 1, N + D( I ) = SQRT( WORK( I ) ) + 40 CONTINUE + CALL DLASCL( 'G', 0, 0, SCALE, SIGMX, N, 1, D, N, IINFO ) + END IF +* + RETURN +* +* End of DLASQ1 +* + END diff --git a/costa/native/external/lapack/dlasq2.f b/costa/native/external/lapack/dlasq2.f new file mode 100644 index 000000000..271111a82 --- /dev/null +++ b/costa/native/external/lapack/dlasq2.f @@ -0,0 +1,436 @@ + SUBROUTINE DLASQ2( N, Z, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION Z( * ) +* .. +* +* Purpose +* ======= +* +* DLASQ2 computes all the eigenvalues of the symmetric positive +* definite tridiagonal matrix associated with the qd array Z to high +* relative accuracy are computed to high relative accuracy, in the +* absence of denormalization, underflow and overflow. +* +* To see the relation of Z to the tridiagonal matrix, let L be a +* unit lower bidiagonal matrix with subdiagonals Z(2,4,6,,..) and +* let U be an upper bidiagonal matrix with 1's above and diagonal +* Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the +* symmetric tridiagonal to which it is similar. +* +* Note : DLASQ2 defines a logical variable, IEEE, which is true +* on machines which follow ieee-754 floating-point standard in their +* handling of infinities and NaNs, and false otherwise. This variable +* is passed to DLASQ3. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of rows and columns in the matrix. N >= 0. +* +* Z (workspace) DOUBLE PRECISION array, dimension ( 4*N ) +* On entry Z holds the qd array. On exit, entries 1 to N hold +* the eigenvalues in decreasing order, Z( 2*N+1 ) holds the +* trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If +* N > 2, then Z( 2*N+3 ) holds the iteration count, Z( 2*N+4 ) +* holds NDIVS/NIN^2, and Z( 2*N+5 ) holds the percentage of +* shifts that failed. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if the i-th argument is a scalar and had an illegal +* value, then INFO = -i, if the i-th argument is an +* array and the j-entry had an illegal value, then +* INFO = -(i*100+j) +* > 0: the algorithm failed +* = 1, a split was marked by a positive value in E +* = 2, current block of Z not diagonalized after 30*N +* iterations (in inner while loop) +* = 3, termination criterion of outer while loop not met +* (program created more than N unreduced blocks) +* +* Further Details +* =============== +* Local Variables: I0:N0 defines a current unreduced segment of Z. +* The shifts are accumulated in SIGMA. Iteration count is in ITER. +* Ping-pong is controlled by PP (alternates between 0 and 1). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION CBIAS + PARAMETER ( CBIAS = 1.50D0 ) + DOUBLE PRECISION ZERO, HALF, ONE, TWO, FOUR, HUNDRD + PARAMETER ( ZERO = 0.0D0, HALF = 0.5D0, ONE = 1.0D0, + $ TWO = 2.0D0, FOUR = 4.0D0, HUNDRD = 100.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL IEEE + INTEGER I0, I4, IINFO, IPN4, ITER, IWHILA, IWHILB, K, + $ N0, NBIG, NDIV, NFAIL, PP, SPLT + DOUBLE PRECISION D, DESIG, DMIN, E, EMAX, EMIN, EPS, OLDEMN, + $ QMAX, QMIN, S, SAFMIN, SIGMA, T, TEMP, TOL, + $ TOL2, TRACE, ZMAX +* .. +* .. External Subroutines .. + EXTERNAL DLASQ3, DLASRT, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH, ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* (in case DLASQ2 is not called by DLASQ1) +* + INFO = 0 + EPS = DLAMCH( 'Precision' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + TOL = EPS*HUNDRD + TOL2 = TOL**2 +* + IF( N.LT.0 ) THEN + INFO = -1 + CALL XERBLA( 'DLASQ2', 1 ) + RETURN + ELSE IF( N.EQ.0 ) THEN + RETURN + ELSE IF( N.EQ.1 ) THEN +* +* 1-by-1 case. +* + IF( Z( 1 ).LT.ZERO ) THEN + INFO = -201 + CALL XERBLA( 'DLASQ2', 2 ) + END IF + RETURN + ELSE IF( N.EQ.2 ) THEN +* +* 2-by-2 case. +* + IF( Z( 2 ).LT.ZERO .OR. Z( 3 ).LT.ZERO ) THEN + INFO = -2 + CALL XERBLA( 'DLASQ2', 2 ) + RETURN + ELSE IF( Z( 3 ).GT.Z( 1 ) ) THEN + D = Z( 3 ) + Z( 3 ) = Z( 1 ) + Z( 1 ) = D + END IF + Z( 5 ) = Z( 1 ) + Z( 2 ) + Z( 3 ) + IF( Z( 2 ).GT.Z( 3 )*TOL2 ) THEN + T = HALF*( ( Z( 1 )-Z( 3 ) )+Z( 2 ) ) + S = Z( 3 )*( Z( 2 ) / T ) + IF( S.LE.T ) THEN + S = Z( 3 )*( Z( 2 ) / ( T*( ONE+SQRT( ONE+S / T ) ) ) ) + ELSE + S = Z( 3 )*( Z( 2 ) / ( T+SQRT( T )*SQRT( T+S ) ) ) + END IF + T = Z( 1 ) + ( S+Z( 2 ) ) + Z( 3 ) = Z( 3 )*( Z( 1 ) / T ) + Z( 1 ) = T + END IF + Z( 2 ) = Z( 3 ) + Z( 6 ) = Z( 2 ) + Z( 1 ) + RETURN + END IF +* +* Check for negative data and compute sums of q's and e's. +* + Z( 2*N ) = ZERO + EMIN = Z( 2 ) + QMAX = ZERO + ZMAX = ZERO + D = ZERO + E = ZERO +* + DO 10 K = 1, 2*( N-1 ), 2 + IF( Z( K ).LT.ZERO ) THEN + INFO = -( 200+K ) + CALL XERBLA( 'DLASQ2', 2 ) + RETURN + ELSE IF( Z( K+1 ).LT.ZERO ) THEN + INFO = -( 200+K+1 ) + CALL XERBLA( 'DLASQ2', 2 ) + RETURN + END IF + D = D + Z( K ) + E = E + Z( K+1 ) + QMAX = MAX( QMAX, Z( K ) ) + EMIN = MIN( EMIN, Z( K+1 ) ) + ZMAX = MAX( QMAX, ZMAX, Z( K+1 ) ) + 10 CONTINUE + IF( Z( 2*N-1 ).LT.ZERO ) THEN + INFO = -( 200+2*N-1 ) + CALL XERBLA( 'DLASQ2', 2 ) + RETURN + END IF + D = D + Z( 2*N-1 ) + QMAX = MAX( QMAX, Z( 2*N-1 ) ) + ZMAX = MAX( QMAX, ZMAX ) +* +* Check for diagonality. +* + IF( E.EQ.ZERO ) THEN + DO 20 K = 2, N + Z( K ) = Z( 2*K-1 ) + 20 CONTINUE + CALL DLASRT( 'D', N, Z, IINFO ) + Z( 2*N-1 ) = D + RETURN + END IF +* + TRACE = D + E +* +* Check for zero data. +* + IF( TRACE.EQ.ZERO ) THEN + Z( 2*N-1 ) = ZERO + RETURN + END IF +* +* Check whether the machine is IEEE conformable. +* + IEEE = ILAENV( 10, 'DLASQ2', 'N', 1, 2, 3, 4 ).EQ.1 .AND. + $ ILAENV( 11, 'DLASQ2', 'N', 1, 2, 3, 4 ).EQ.1 +* +* Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...). +* + DO 30 K = 2*N, 2, -2 + Z( 2*K ) = ZERO + Z( 2*K-1 ) = Z( K ) + Z( 2*K-2 ) = ZERO + Z( 2*K-3 ) = Z( K-1 ) + 30 CONTINUE +* + I0 = 1 + N0 = N +* +* Reverse the qd-array, if warranted. +* + IF( CBIAS*Z( 4*I0-3 ).LT.Z( 4*N0-3 ) ) THEN + IPN4 = 4*( I0+N0 ) + DO 40 I4 = 4*I0, 2*( I0+N0-1 ), 4 + TEMP = Z( I4-3 ) + Z( I4-3 ) = Z( IPN4-I4-3 ) + Z( IPN4-I4-3 ) = TEMP + TEMP = Z( I4-1 ) + Z( I4-1 ) = Z( IPN4-I4-5 ) + Z( IPN4-I4-5 ) = TEMP + 40 CONTINUE + END IF +* +* Initial split checking via dqd and Li's test. +* + PP = 0 +* + DO 80 K = 1, 2 +* + D = Z( 4*N0+PP-3 ) + DO 50 I4 = 4*( N0-1 ) + PP, 4*I0 + PP, -4 + IF( Z( I4-1 ).LE.TOL2*D ) THEN + Z( I4-1 ) = -ZERO + D = Z( I4-3 ) + ELSE + D = Z( I4-3 )*( D / ( D+Z( I4-1 ) ) ) + END IF + 50 CONTINUE +* +* dqd maps Z to ZZ plus Li's test. +* + EMIN = Z( 4*I0+PP+1 ) + D = Z( 4*I0+PP-3 ) + DO 60 I4 = 4*I0 + PP, 4*( N0-1 ) + PP, 4 + Z( I4-2*PP-2 ) = D + Z( I4-1 ) + IF( Z( I4-1 ).LE.TOL2*D ) THEN + Z( I4-1 ) = -ZERO + Z( I4-2*PP-2 ) = D + Z( I4-2*PP ) = ZERO + D = Z( I4+1 ) + ELSE IF( SAFMIN*Z( I4+1 ).LT.Z( I4-2*PP-2 ) .AND. + $ SAFMIN*Z( I4-2*PP-2 ).LT.Z( I4+1 ) ) THEN + TEMP = Z( I4+1 ) / Z( I4-2*PP-2 ) + Z( I4-2*PP ) = Z( I4-1 )*TEMP + D = D*TEMP + ELSE + Z( I4-2*PP ) = Z( I4+1 )*( Z( I4-1 ) / Z( I4-2*PP-2 ) ) + D = Z( I4+1 )*( D / Z( I4-2*PP-2 ) ) + END IF + EMIN = MIN( EMIN, Z( I4-2*PP ) ) + 60 CONTINUE + Z( 4*N0-PP-2 ) = D +* +* Now find qmax. +* + QMAX = Z( 4*I0-PP-2 ) + DO 70 I4 = 4*I0 - PP + 2, 4*N0 - PP - 2, 4 + QMAX = MAX( QMAX, Z( I4 ) ) + 70 CONTINUE +* +* Prepare for the next iteration on K. +* + PP = 1 - PP + 80 CONTINUE +* + ITER = 2 + NFAIL = 0 + NDIV = 2*( N0-I0 ) +* + DO 140 IWHILA = 1, N + 1 + IF( N0.LT.1 ) + $ GO TO 150 +* +* While array unfinished do +* +* E(N0) holds the value of SIGMA when submatrix in I0:N0 +* splits from the rest of the array, but is negated. +* + DESIG = ZERO + IF( N0.EQ.N ) THEN + SIGMA = ZERO + ELSE + SIGMA = -Z( 4*N0-1 ) + END IF + IF( SIGMA.LT.ZERO ) THEN + INFO = 1 + RETURN + END IF +* +* Find last unreduced submatrix's top index I0, find QMAX and +* EMIN. Find Gershgorin-type bound if Q's much greater than E's. +* + EMAX = ZERO + IF( N0.GT.I0 ) THEN + EMIN = ABS( Z( 4*N0-5 ) ) + ELSE + EMIN = ZERO + END IF + QMIN = Z( 4*N0-3 ) + QMAX = QMIN + DO 90 I4 = 4*N0, 8, -4 + IF( Z( I4-5 ).LE.ZERO ) + $ GO TO 100 + IF( QMIN.GE.FOUR*EMAX ) THEN + QMIN = MIN( QMIN, Z( I4-3 ) ) + EMAX = MAX( EMAX, Z( I4-5 ) ) + END IF + QMAX = MAX( QMAX, Z( I4-7 )+Z( I4-5 ) ) + EMIN = MIN( EMIN, Z( I4-5 ) ) + 90 CONTINUE + I4 = 4 +* + 100 CONTINUE + I0 = I4 / 4 +* +* Store EMIN for passing to DLASQ3. +* + Z( 4*N0-1 ) = EMIN +* +* Put -(initial shift) into DMIN. +* + DMIN = -MAX( ZERO, QMIN-TWO*SQRT( QMIN )*SQRT( EMAX ) ) +* +* Now I0:N0 is unreduced. PP = 0 for ping, PP = 1 for pong. +* + PP = 0 +* + NBIG = 30*( N0-I0+1 ) + DO 120 IWHILB = 1, NBIG + IF( I0.GT.N0 ) + $ GO TO 130 +* +* While submatrix unfinished take a good dqds step. +* + CALL DLASQ3( I0, N0, Z, PP, DMIN, SIGMA, DESIG, QMAX, NFAIL, + $ ITER, NDIV, IEEE ) +* + PP = 1 - PP +* +* When EMIN is very small check for splits. +* + IF( PP.EQ.0 .AND. N0-I0.GE.3 ) THEN + IF( Z( 4*N0 ).LE.TOL2*QMAX .OR. + $ Z( 4*N0-1 ).LE.TOL2*SIGMA ) THEN + SPLT = I0 - 1 + QMAX = Z( 4*I0-3 ) + EMIN = Z( 4*I0-1 ) + OLDEMN = Z( 4*I0 ) + DO 110 I4 = 4*I0, 4*( N0-3 ), 4 + IF( Z( I4 ).LE.TOL2*Z( I4-3 ) .OR. + $ Z( I4-1 ).LE.TOL2*SIGMA ) THEN + Z( I4-1 ) = -SIGMA + SPLT = I4 / 4 + QMAX = ZERO + EMIN = Z( I4+3 ) + OLDEMN = Z( I4+4 ) + ELSE + QMAX = MAX( QMAX, Z( I4+1 ) ) + EMIN = MIN( EMIN, Z( I4-1 ) ) + OLDEMN = MIN( OLDEMN, Z( I4 ) ) + END IF + 110 CONTINUE + Z( 4*N0-1 ) = EMIN + Z( 4*N0 ) = OLDEMN + I0 = SPLT + 1 + END IF + END IF +* + 120 CONTINUE +* + INFO = 2 + RETURN +* +* end IWHILB +* + 130 CONTINUE +* + 140 CONTINUE +* + INFO = 3 + RETURN +* +* end IWHILA +* + 150 CONTINUE +* +* Move q's to the front. +* + DO 160 K = 2, N + Z( K ) = Z( 4*K-3 ) + 160 CONTINUE +* +* Sort and compute sum of eigenvalues. +* + CALL DLASRT( 'D', N, Z, IINFO ) +* + E = ZERO + DO 170 K = N, 1, -1 + E = E + Z( K ) + 170 CONTINUE +* +* Store trace, sum(eigenvalues) and information on performance. +* + Z( 2*N+1 ) = TRACE + Z( 2*N+2 ) = E + Z( 2*N+3 ) = DBLE( ITER ) + Z( 2*N+4 ) = DBLE( NDIV ) / DBLE( N**2 ) + Z( 2*N+5 ) = HUNDRD*NFAIL / DBLE( ITER ) + RETURN +* +* End of DLASQ2 +* + END diff --git a/costa/native/external/lapack/dlasq3.f b/costa/native/external/lapack/dlasq3.f new file mode 100644 index 000000000..dd783616f --- /dev/null +++ b/costa/native/external/lapack/dlasq3.f @@ -0,0 +1,298 @@ + SUBROUTINE DLASQ3( I0, N0, Z, PP, DMIN, SIGMA, DESIG, QMAX, NFAIL, + $ ITER, NDIV, IEEE ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* May 17, 2000 +* +* .. Scalar Arguments .. + LOGICAL IEEE + INTEGER I0, ITER, N0, NDIV, NFAIL, PP + DOUBLE PRECISION DESIG, DMIN, QMAX, SIGMA +* .. +* .. Array Arguments .. + DOUBLE PRECISION Z( * ) +* .. +* +* Purpose +* ======= +* +* DLASQ3 checks for deflation, computes a shift (TAU) and calls dqds. +* In case of failure it changes shifts, and tries again until output +* is positive. +* +* Arguments +* ========= +* +* I0 (input) INTEGER +* First index. +* +* N0 (input) INTEGER +* Last index. +* +* Z (input) DOUBLE PRECISION array, dimension ( 4*N ) +* Z holds the qd array. +* +* PP (input) INTEGER +* PP=0 for ping, PP=1 for pong. +* +* DMIN (output) DOUBLE PRECISION +* Minimum value of d. +* +* SIGMA (output) DOUBLE PRECISION +* Sum of shifts used in current segment. +* +* DESIG (input/output) DOUBLE PRECISION +* Lower order part of SIGMA +* +* QMAX (input) DOUBLE PRECISION +* Maximum value of q. +* +* NFAIL (output) INTEGER +* Number of times shift was too big. +* +* ITER (output) INTEGER +* Number of iterations. +* +* NDIV (output) INTEGER +* Number of divisions. +* +* TTYPE (output) INTEGER +* Shift type. +* +* IEEE (input) LOGICAL +* Flag for IEEE or non IEEE arithmetic (passed to DLASQ5). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION CBIAS + PARAMETER ( CBIAS = 1.50D0 ) + DOUBLE PRECISION ZERO, QURTR, HALF, ONE, TWO, HUNDRD + PARAMETER ( ZERO = 0.0D0, QURTR = 0.250D0, HALF = 0.5D0, + $ ONE = 1.0D0, TWO = 2.0D0, HUNDRD = 100.0D0 ) +* .. +* .. Local Scalars .. + INTEGER IPN4, J4, N0IN, NN, TTYPE + DOUBLE PRECISION DMIN1, DMIN2, DN, DN1, DN2, EPS, S, SAFMIN, T, + $ TAU, TEMP, TOL, TOL2 +* .. +* .. External Subroutines .. + EXTERNAL DLASQ4, DLASQ5, DLASQ6 +* .. +* .. External Function .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MIN, SQRT +* .. +* .. Save statement .. + SAVE TTYPE + SAVE DMIN1, DMIN2, DN, DN1, DN2, TAU +* .. +* .. Data statement .. + DATA TTYPE / 0 / + DATA DMIN1 / ZERO /, DMIN2 / ZERO /, DN / ZERO /, + $ DN1 / ZERO /, DN2 / ZERO /, TAU / ZERO / +* .. +* .. Executable Statements .. +* + N0IN = N0 + EPS = DLAMCH( 'Precision' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + TOL = EPS*HUNDRD + TOL2 = TOL**2 +* +* Check for deflation. +* + 10 CONTINUE +* + IF( N0.LT.I0 ) + $ RETURN + IF( N0.EQ.I0 ) + $ GO TO 20 + NN = 4*N0 + PP + IF( N0.EQ.( I0+1 ) ) + $ GO TO 40 +* +* Check whether E(N0-1) is negligible, 1 eigenvalue. +* + IF( Z( NN-5 ).GT.TOL2*( SIGMA+Z( NN-3 ) ) .AND. + $ Z( NN-2*PP-4 ).GT.TOL2*Z( NN-7 ) ) + $ GO TO 30 +* + 20 CONTINUE +* + Z( 4*N0-3 ) = Z( 4*N0+PP-3 ) + SIGMA + N0 = N0 - 1 + GO TO 10 +* +* Check whether E(N0-2) is negligible, 2 eigenvalues. +* + 30 CONTINUE +* + IF( Z( NN-9 ).GT.TOL2*SIGMA .AND. + $ Z( NN-2*PP-8 ).GT.TOL2*Z( NN-11 ) ) + $ GO TO 50 +* + 40 CONTINUE +* + IF( Z( NN-3 ).GT.Z( NN-7 ) ) THEN + S = Z( NN-3 ) + Z( NN-3 ) = Z( NN-7 ) + Z( NN-7 ) = S + END IF + IF( Z( NN-5 ).GT.Z( NN-3 )*TOL2 ) THEN + T = HALF*( ( Z( NN-7 )-Z( NN-3 ) )+Z( NN-5 ) ) + S = Z( NN-3 )*( Z( NN-5 ) / T ) + IF( S.LE.T ) THEN + S = Z( NN-3 )*( Z( NN-5 ) / + $ ( T*( ONE+SQRT( ONE+S / T ) ) ) ) + ELSE + S = Z( NN-3 )*( Z( NN-5 ) / ( T+SQRT( T )*SQRT( T+S ) ) ) + END IF + T = Z( NN-7 ) + ( S+Z( NN-5 ) ) + Z( NN-3 ) = Z( NN-3 )*( Z( NN-7 ) / T ) + Z( NN-7 ) = T + END IF + Z( 4*N0-7 ) = Z( NN-7 ) + SIGMA + Z( 4*N0-3 ) = Z( NN-3 ) + SIGMA + N0 = N0 - 2 + GO TO 10 +* + 50 CONTINUE +* +* Reverse the qd-array, if warranted. +* + IF( DMIN.LE.ZERO .OR. N0.LT.N0IN ) THEN + IF( CBIAS*Z( 4*I0+PP-3 ).LT.Z( 4*N0+PP-3 ) ) THEN + IPN4 = 4*( I0+N0 ) + DO 60 J4 = 4*I0, 2*( I0+N0-1 ), 4 + TEMP = Z( J4-3 ) + Z( J4-3 ) = Z( IPN4-J4-3 ) + Z( IPN4-J4-3 ) = TEMP + TEMP = Z( J4-2 ) + Z( J4-2 ) = Z( IPN4-J4-2 ) + Z( IPN4-J4-2 ) = TEMP + TEMP = Z( J4-1 ) + Z( J4-1 ) = Z( IPN4-J4-5 ) + Z( IPN4-J4-5 ) = TEMP + TEMP = Z( J4 ) + Z( J4 ) = Z( IPN4-J4-4 ) + Z( IPN4-J4-4 ) = TEMP + 60 CONTINUE + IF( N0-I0.LE.4 ) THEN + Z( 4*N0+PP-1 ) = Z( 4*I0+PP-1 ) + Z( 4*N0-PP ) = Z( 4*I0-PP ) + END IF + DMIN2 = MIN( DMIN2, Z( 4*N0+PP-1 ) ) + Z( 4*N0+PP-1 ) = MIN( Z( 4*N0+PP-1 ), Z( 4*I0+PP-1 ), + $ Z( 4*I0+PP+3 ) ) + Z( 4*N0-PP ) = MIN( Z( 4*N0-PP ), Z( 4*I0-PP ), + $ Z( 4*I0-PP+4 ) ) + QMAX = MAX( QMAX, Z( 4*I0+PP-3 ), Z( 4*I0+PP+1 ) ) + DMIN = -ZERO + END IF + END IF +* + 70 CONTINUE +* + IF( DMIN.LT.ZERO .OR. SAFMIN*QMAX.LT.MIN( Z( 4*N0+PP-1 ), + $ Z( 4*N0+PP-9 ), DMIN2+Z( 4*N0-PP ) ) ) THEN +* +* Choose a shift. +* + CALL DLASQ4( I0, N0, Z, PP, N0IN, DMIN, DMIN1, DMIN2, DN, DN1, + $ DN2, TAU, TTYPE ) +* +* Call dqds until DMIN > 0. +* + 80 CONTINUE +* + CALL DLASQ5( I0, N0, Z, PP, TAU, DMIN, DMIN1, DMIN2, DN, + $ DN1, DN2, IEEE ) +* + NDIV = NDIV + ( N0-I0+2 ) + ITER = ITER + 1 +* +* Check status. +* + IF( DMIN.GE.ZERO .AND. DMIN1.GT.ZERO ) THEN +* +* Success. +* + GO TO 100 +* + ELSE IF( DMIN.LT.ZERO .AND. DMIN1.GT.ZERO .AND. + $ Z( 4*( N0-1 )-PP ).LT.TOL*( SIGMA+DN1 ) .AND. + $ ABS( DN ).LT.TOL*SIGMA ) THEN +* +* Convergence hidden by negative DN. +* + Z( 4*( N0-1 )-PP+2 ) = ZERO + DMIN = ZERO + GO TO 100 + ELSE IF( DMIN.LT.ZERO ) THEN +* +* TAU too big. Select new TAU and try again. +* + NFAIL = NFAIL + 1 + IF( TTYPE.LT.-22 ) THEN +* +* Failed twice. Play it safe. +* + TAU = ZERO + ELSE IF( DMIN1.GT.ZERO ) THEN +* +* Late failure. Gives excellent shift. +* + TAU = ( TAU+DMIN )*( ONE-TWO*EPS ) + TTYPE = TTYPE - 11 + ELSE +* +* Early failure. Divide by 4. +* + TAU = QURTR*TAU + TTYPE = TTYPE - 12 + END IF + GO TO 80 + ELSE IF( DMIN.NE.DMIN ) THEN +* +* NaN. +* + TAU = ZERO + GO TO 80 + ELSE +* +* Possible underflow. Play it safe. +* + GO TO 90 + END IF + END IF +* +* Risk of underflow. +* + 90 CONTINUE + CALL DLASQ6( I0, N0, Z, PP, DMIN, DMIN1, DMIN2, DN, DN1, DN2 ) + NDIV = NDIV + ( N0-I0+2 ) + ITER = ITER + 1 + TAU = ZERO +* + 100 CONTINUE + IF( TAU.LT.SIGMA ) THEN + DESIG = DESIG + TAU + T = SIGMA + DESIG + DESIG = DESIG - ( T-SIGMA ) + ELSE + T = SIGMA + TAU + DESIG = SIGMA - ( T-TAU ) + DESIG + END IF + SIGMA = T +* + RETURN +* +* End of DLASQ3 +* + END diff --git a/costa/native/external/lapack/dlasq4.f b/costa/native/external/lapack/dlasq4.f new file mode 100644 index 000000000..5702b8a3e --- /dev/null +++ b/costa/native/external/lapack/dlasq4.f @@ -0,0 +1,330 @@ + SUBROUTINE DLASQ4( I0, N0, Z, PP, N0IN, DMIN, DMIN1, DMIN2, DN, + $ DN1, DN2, TAU, TTYPE ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + INTEGER I0, N0, N0IN, PP, TTYPE + DOUBLE PRECISION DMIN, DMIN1, DMIN2, DN, DN1, DN2, TAU +* .. +* .. Array Arguments .. + DOUBLE PRECISION Z( * ) +* .. +* +* Purpose +* ======= +* +* DLASQ4 computes an approximation TAU to the smallest eigenvalue +* using values of d from the previous transform. +* +* I0 (input) INTEGER +* First index. +* +* N0 (input) INTEGER +* Last index. +* +* Z (input) DOUBLE PRECISION array, dimension ( 4*N ) +* Z holds the qd array. +* +* PP (input) INTEGER +* PP=0 for ping, PP=1 for pong. +* +* NOIN (input) INTEGER +* The value of N0 at start of EIGTEST. +* +* DMIN (input) DOUBLE PRECISION +* Minimum value of d. +* +* DMIN1 (input) DOUBLE PRECISION +* Minimum value of d, excluding D( N0 ). +* +* DMIN2 (input) DOUBLE PRECISION +* Minimum value of d, excluding D( N0 ) and D( N0-1 ). +* +* DN (input) DOUBLE PRECISION +* d(N) +* +* DN1 (input) DOUBLE PRECISION +* d(N-1) +* +* DN2 (input) DOUBLE PRECISION +* d(N-2) +* +* TAU (output) DOUBLE PRECISION +* This is the shift. +* +* TTYPE (output) INTEGER +* Shift type. +* +* Further Details +* =============== +* CNST1 = 9/16 +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION CNST1, CNST2, CNST3 + PARAMETER ( CNST1 = 0.5630D0, CNST2 = 1.010D0, + $ CNST3 = 1.050D0 ) + DOUBLE PRECISION QURTR, THIRD, HALF, ZERO, ONE, TWO, HUNDRD + PARAMETER ( QURTR = 0.250D0, THIRD = 0.3330D0, + $ HALF = 0.50D0, ZERO = 0.0D0, ONE = 1.0D0, + $ TWO = 2.0D0, HUNDRD = 100.0D0 ) +* .. +* .. Local Scalars .. + INTEGER I4, NN, NP + DOUBLE PRECISION A2, B1, B2, G, GAM, GAP1, GAP2, S +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Save statement .. + SAVE G +* .. +* .. Data statement .. + DATA G / ZERO / +* .. +* .. Executable Statements .. +* +* A negative DMIN forces the shift to take that absolute value +* TTYPE records the type of shift. +* + IF( DMIN.LE.ZERO ) THEN + TAU = -DMIN + TTYPE = -1 + RETURN + END IF +* + NN = 4*N0 + PP + IF( N0IN.EQ.N0 ) THEN +* +* No eigenvalues deflated. +* + IF( DMIN.EQ.DN .OR. DMIN.EQ.DN1 ) THEN +* + B1 = SQRT( Z( NN-3 ) )*SQRT( Z( NN-5 ) ) + B2 = SQRT( Z( NN-7 ) )*SQRT( Z( NN-9 ) ) + A2 = Z( NN-7 ) + Z( NN-5 ) +* +* Cases 2 and 3. +* + IF( DMIN.EQ.DN .AND. DMIN1.EQ.DN1 ) THEN + GAP2 = DMIN2 - A2 - DMIN2*QURTR + IF( GAP2.GT.ZERO .AND. GAP2.GT.B2 ) THEN + GAP1 = A2 - DN - ( B2 / GAP2 )*B2 + ELSE + GAP1 = A2 - DN - ( B1+B2 ) + END IF + IF( GAP1.GT.ZERO .AND. GAP1.GT.B1 ) THEN + S = MAX( DN-( B1 / GAP1 )*B1, HALF*DMIN ) + TTYPE = -2 + ELSE + S = ZERO + IF( DN.GT.B1 ) + $ S = DN - B1 + IF( A2.GT.( B1+B2 ) ) + $ S = MIN( S, A2-( B1+B2 ) ) + S = MAX( S, THIRD*DMIN ) + TTYPE = -3 + END IF + ELSE +* +* Case 4. +* + TTYPE = -4 + S = QURTR*DMIN + IF( DMIN.EQ.DN ) THEN + GAM = DN + A2 = ZERO + IF( Z( NN-5 ) .GT. Z( NN-7 ) ) + $ RETURN + B2 = Z( NN-5 ) / Z( NN-7 ) + NP = NN - 9 + ELSE + NP = NN - 2*PP + B2 = Z( NP-2 ) + GAM = DN1 + IF( Z( NP-4 ) .GT. Z( NP-2 ) ) + $ RETURN + A2 = Z( NP-4 ) / Z( NP-2 ) + IF( Z( NN-9 ) .GT. Z( NN-11 ) ) + $ RETURN + B2 = Z( NN-9 ) / Z( NN-11 ) + NP = NN - 13 + END IF +* +* Approximate contribution to norm squared from I < NN-1. +* + A2 = A2 + B2 + DO 10 I4 = NP, 4*I0 - 1 + PP, -4 + IF( B2.EQ.ZERO ) + $ GO TO 20 + B1 = B2 + IF( Z( I4 ) .GT. Z( I4-2 ) ) + $ RETURN + B2 = B2*( Z( I4 ) / Z( I4-2 ) ) + A2 = A2 + B2 + IF( HUNDRD*MAX( B2, B1 ).LT.A2 .OR. CNST1.LT.A2 ) + $ GO TO 20 + 10 CONTINUE + 20 CONTINUE + A2 = CNST3*A2 +* +* Rayleigh quotient residual bound. +* + IF( A2.LT.CNST1 ) + $ S = GAM*( ONE-SQRT( A2 ) ) / ( ONE+A2 ) + END IF + ELSE IF( DMIN.EQ.DN2 ) THEN +* +* Case 5. +* + TTYPE = -5 + S = QURTR*DMIN +* +* Compute contribution to norm squared from I > NN-2. +* + NP = NN - 2*PP + B1 = Z( NP-2 ) + B2 = Z( NP-6 ) + GAM = DN2 + IF( Z( NP-8 ).GT.B2 .OR. Z( NP-4 ).GT.B1 ) + $ RETURN + A2 = ( Z( NP-8 ) / B2 )*( ONE+Z( NP-4 ) / B1 ) +* +* Approximate contribution to norm squared from I < NN-2. +* + IF( N0-I0.GT.2 ) THEN + B2 = Z( NN-13 ) / Z( NN-15 ) + A2 = A2 + B2 + DO 30 I4 = NN - 17, 4*I0 - 1 + PP, -4 + IF( B2.EQ.ZERO ) + $ GO TO 40 + B1 = B2 + IF( Z( I4 ) .GT. Z( I4-2 ) ) + $ RETURN + B2 = B2*( Z( I4 ) / Z( I4-2 ) ) + A2 = A2 + B2 + IF( HUNDRD*MAX( B2, B1 ).LT.A2 .OR. CNST1.LT.A2 ) + $ GO TO 40 + 30 CONTINUE + 40 CONTINUE + A2 = CNST3*A2 + END IF +* + IF( A2.LT.CNST1 ) + $ S = GAM*( ONE-SQRT( A2 ) ) / ( ONE+A2 ) + ELSE +* +* Case 6, no information to guide us. +* + IF( TTYPE.EQ.-6 ) THEN + G = G + THIRD*( ONE-G ) + ELSE IF( TTYPE.EQ.-18 ) THEN + G = QURTR*THIRD + ELSE + G = QURTR + END IF + S = G*DMIN + TTYPE = -6 + END IF +* + ELSE IF( N0IN.EQ.( N0+1 ) ) THEN +* +* One eigenvalue just deflated. Use DMIN1, DN1 for DMIN and DN. +* + IF( DMIN1.EQ.DN1 .AND. DMIN2.EQ.DN2 ) THEN +* +* Cases 7 and 8. +* + TTYPE = -7 + S = THIRD*DMIN1 + IF( Z( NN-5 ).GT.Z( NN-7 ) ) + $ RETURN + B1 = Z( NN-5 ) / Z( NN-7 ) + B2 = B1 + IF( B2.EQ.ZERO ) + $ GO TO 60 + DO 50 I4 = 4*N0 - 9 + PP, 4*I0 - 1 + PP, -4 + A2 = B1 + IF( Z( I4 ).GT.Z( I4-2 ) ) + $ RETURN + B1 = B1*( Z( I4 ) / Z( I4-2 ) ) + B2 = B2 + B1 + IF( HUNDRD*MAX( B1, A2 ).LT.B2 ) + $ GO TO 60 + 50 CONTINUE + 60 CONTINUE + B2 = SQRT( CNST3*B2 ) + A2 = DMIN1 / ( ONE+B2**2 ) + GAP2 = HALF*DMIN2 - A2 + IF( GAP2.GT.ZERO .AND. GAP2.GT.B2*A2 ) THEN + S = MAX( S, A2*( ONE-CNST2*A2*( B2 / GAP2 )*B2 ) ) + ELSE + S = MAX( S, A2*( ONE-CNST2*B2 ) ) + TTYPE = -8 + END IF + ELSE +* +* Case 9. +* + S = QURTR*DMIN1 + IF( DMIN1.EQ.DN1 ) + $ S = HALF*DMIN1 + TTYPE = -9 + END IF +* + ELSE IF( N0IN.EQ.( N0+2 ) ) THEN +* +* Two eigenvalues deflated. Use DMIN2, DN2 for DMIN and DN. +* +* Cases 10 and 11. +* + IF( DMIN2.EQ.DN2 .AND. TWO*Z( NN-5 ).LT.Z( NN-7 ) ) THEN + TTYPE = -10 + S = THIRD*DMIN2 + IF( Z( NN-5 ).GT.Z( NN-7 ) ) + $ RETURN + B1 = Z( NN-5 ) / Z( NN-7 ) + B2 = B1 + IF( B2.EQ.ZERO ) + $ GO TO 80 + DO 70 I4 = 4*N0 - 9 + PP, 4*I0 - 1 + PP, -4 + IF( Z( I4 ).GT.Z( I4-2 ) ) + $ RETURN + B1 = B1*( Z( I4 ) / Z( I4-2 ) ) + B2 = B2 + B1 + IF( HUNDRD*B1.LT.B2 ) + $ GO TO 80 + 70 CONTINUE + 80 CONTINUE + B2 = SQRT( CNST3*B2 ) + A2 = DMIN2 / ( ONE+B2**2 ) + GAP2 = Z( NN-7 ) + Z( NN-9 ) - + $ SQRT( Z( NN-11 ) )*SQRT( Z( NN-9 ) ) - A2 + IF( GAP2.GT.ZERO .AND. GAP2.GT.B2*A2 ) THEN + S = MAX( S, A2*( ONE-CNST2*A2*( B2 / GAP2 )*B2 ) ) + ELSE + S = MAX( S, A2*( ONE-CNST2*B2 ) ) + END IF + ELSE + S = QURTR*DMIN2 + TTYPE = -11 + END IF + ELSE IF( N0IN.GT.( N0+2 ) ) THEN +* +* Case 12, more than two eigenvalues deflated. No information. +* + S = ZERO + TTYPE = -12 + END IF +* + TAU = S + RETURN +* +* End of DLASQ4 +* + END diff --git a/costa/native/external/lapack/dlasq5.f b/costa/native/external/lapack/dlasq5.f new file mode 100644 index 000000000..068b897fe --- /dev/null +++ b/costa/native/external/lapack/dlasq5.f @@ -0,0 +1,196 @@ + SUBROUTINE DLASQ5( I0, N0, Z, PP, TAU, DMIN, DMIN1, DMIN2, DN, + $ DNM1, DNM2, IEEE ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* May 17, 2000 +* +* .. Scalar Arguments .. + LOGICAL IEEE + INTEGER I0, N0, PP + DOUBLE PRECISION DMIN, DMIN1, DMIN2, DN, DNM1, DNM2, TAU +* .. +* .. Array Arguments .. + DOUBLE PRECISION Z( * ) +* .. +* +* Purpose +* ======= +* +* DLASQ5 computes one dqds transform in ping-pong form, one +* version for IEEE machines another for non IEEE machines. +* +* Arguments +* ========= +* +* I0 (input) INTEGER +* First index. +* +* N0 (input) INTEGER +* Last index. +* +* Z (input) DOUBLE PRECISION array, dimension ( 4*N ) +* Z holds the qd array. EMIN is stored in Z(4*N0) to avoid +* an extra argument. +* +* PP (input) INTEGER +* PP=0 for ping, PP=1 for pong. +* +* TAU (input) DOUBLE PRECISION +* This is the shift. +* +* DMIN (output) DOUBLE PRECISION +* Minimum value of d. +* +* DMIN1 (output) DOUBLE PRECISION +* Minimum value of d, excluding D( N0 ). +* +* DMIN2 (output) DOUBLE PRECISION +* Minimum value of d, excluding D( N0 ) and D( N0-1 ). +* +* DN (output) DOUBLE PRECISION +* d(N0), the last value of d. +* +* DNM1 (output) DOUBLE PRECISION +* d(N0-1). +* +* DNM2 (output) DOUBLE PRECISION +* d(N0-2). +* +* IEEE (input) LOGICAL +* Flag for IEEE or non IEEE arithmetic. +* +* ===================================================================== +* +* .. Parameter .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) +* .. +* .. Local Scalars .. + INTEGER J4, J4P2 + DOUBLE PRECISION D, EMIN, TEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( ( N0-I0-1 ).LE.0 ) + $ RETURN +* + J4 = 4*I0 + PP - 3 + EMIN = Z( J4+4 ) + D = Z( J4 ) - TAU + DMIN = D + DMIN1 = -Z( J4 ) +* + IF( IEEE ) THEN +* +* Code for IEEE arithmetic. +* + IF( PP.EQ.0 ) THEN + DO 10 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-2 ) = D + Z( J4-1 ) + TEMP = Z( J4+1 ) / Z( J4-2 ) + D = D*TEMP - TAU + DMIN = MIN( DMIN, D ) + Z( J4 ) = Z( J4-1 )*TEMP + EMIN = MIN( Z( J4 ), EMIN ) + 10 CONTINUE + ELSE + DO 20 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-3 ) = D + Z( J4 ) + TEMP = Z( J4+2 ) / Z( J4-3 ) + D = D*TEMP - TAU + DMIN = MIN( DMIN, D ) + Z( J4-1 ) = Z( J4 )*TEMP + EMIN = MIN( Z( J4-1 ), EMIN ) + 20 CONTINUE + END IF +* +* Unroll last two steps. +* + DNM2 = D + DMIN2 = DMIN + J4 = 4*( N0-2 ) - PP + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM2 + Z( J4P2 ) + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DNM1 = Z( J4P2+2 )*( DNM2 / Z( J4-2 ) ) - TAU + DMIN = MIN( DMIN, DNM1 ) +* + DMIN1 = DMIN + J4 = J4 + 4 + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM1 + Z( J4P2 ) + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DN = Z( J4P2+2 )*( DNM1 / Z( J4-2 ) ) - TAU + DMIN = MIN( DMIN, DN ) +* + ELSE +* +* Code for non IEEE arithmetic. +* + IF( PP.EQ.0 ) THEN + DO 30 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-2 ) = D + Z( J4-1 ) + IF( D.LT.ZERO ) THEN + RETURN + ELSE + Z( J4 ) = Z( J4+1 )*( Z( J4-1 ) / Z( J4-2 ) ) + D = Z( J4+1 )*( D / Z( J4-2 ) ) - TAU + END IF + DMIN = MIN( DMIN, D ) + EMIN = MIN( EMIN, Z( J4 ) ) + 30 CONTINUE + ELSE + DO 40 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-3 ) = D + Z( J4 ) + IF( D.LT.ZERO ) THEN + RETURN + ELSE + Z( J4-1 ) = Z( J4+2 )*( Z( J4 ) / Z( J4-3 ) ) + D = Z( J4+2 )*( D / Z( J4-3 ) ) - TAU + END IF + DMIN = MIN( DMIN, D ) + EMIN = MIN( EMIN, Z( J4-1 ) ) + 40 CONTINUE + END IF +* +* Unroll last two steps. +* + DNM2 = D + DMIN2 = DMIN + J4 = 4*( N0-2 ) - PP + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM2 + Z( J4P2 ) + IF( DNM2.LT.ZERO ) THEN + RETURN + ELSE + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DNM1 = Z( J4P2+2 )*( DNM2 / Z( J4-2 ) ) - TAU + END IF + DMIN = MIN( DMIN, DNM1 ) +* + DMIN1 = DMIN + J4 = J4 + 4 + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM1 + Z( J4P2 ) + IF( DNM1.LT.ZERO ) THEN + RETURN + ELSE + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DN = Z( J4P2+2 )*( DNM1 / Z( J4-2 ) ) - TAU + END IF + DMIN = MIN( DMIN, DN ) +* + END IF +* + Z( J4+2 ) = DN + Z( 4*N0-PP ) = EMIN + RETURN +* +* End of DLASQ5 +* + END diff --git a/costa/native/external/lapack/dlasq6.f b/costa/native/external/lapack/dlasq6.f new file mode 100644 index 000000000..480157c91 --- /dev/null +++ b/costa/native/external/lapack/dlasq6.f @@ -0,0 +1,176 @@ + SUBROUTINE DLASQ6( I0, N0, Z, PP, DMIN, DMIN1, DMIN2, DN, + $ DNM1, DNM2 ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + INTEGER I0, N0, PP + DOUBLE PRECISION DMIN, DMIN1, DMIN2, DN, DNM1, DNM2 +* .. +* .. Array Arguments .. + DOUBLE PRECISION Z( * ) +* .. +* +* Purpose +* ======= +* +* DLASQ6 computes one dqd (shift equal to zero) transform in +* ping-pong form, with protection against underflow and overflow. +* +* Arguments +* ========= +* +* I0 (input) INTEGER +* First index. +* +* N0 (input) INTEGER +* Last index. +* +* Z (input) DOUBLE PRECISION array, dimension ( 4*N ) +* Z holds the qd array. EMIN is stored in Z(4*N0) to avoid +* an extra argument. +* +* PP (input) INTEGER +* PP=0 for ping, PP=1 for pong. +* +* DMIN (output) DOUBLE PRECISION +* Minimum value of d. +* +* DMIN1 (output) DOUBLE PRECISION +* Minimum value of d, excluding D( N0 ). +* +* DMIN2 (output) DOUBLE PRECISION +* Minimum value of d, excluding D( N0 ) and D( N0-1 ). +* +* DN (output) DOUBLE PRECISION +* d(N0), the last value of d. +* +* DNM1 (output) DOUBLE PRECISION +* d(N0-1). +* +* DNM2 (output) DOUBLE PRECISION +* d(N0-2). +* +* ===================================================================== +* +* .. Parameter .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) +* .. +* .. Local Scalars .. + INTEGER J4, J4P2 + DOUBLE PRECISION D, EMIN, SAFMIN, TEMP +* .. +* .. External Function .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( ( N0-I0-1 ).LE.0 ) + $ RETURN +* + SAFMIN = DLAMCH( 'Safe minimum' ) + J4 = 4*I0 + PP - 3 + EMIN = Z( J4+4 ) + D = Z( J4 ) + DMIN = D +* + IF( PP.EQ.0 ) THEN + DO 10 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-2 ) = D + Z( J4-1 ) + IF( Z( J4-2 ).EQ.ZERO ) THEN + Z( J4 ) = ZERO + D = Z( J4+1 ) + DMIN = D + EMIN = ZERO + ELSE IF( SAFMIN*Z( J4+1 ).LT.Z( J4-2 ) .AND. + $ SAFMIN*Z( J4-2 ).LT.Z( J4+1 ) ) THEN + TEMP = Z( J4+1 ) / Z( J4-2 ) + Z( J4 ) = Z( J4-1 )*TEMP + D = D*TEMP + ELSE + Z( J4 ) = Z( J4+1 )*( Z( J4-1 ) / Z( J4-2 ) ) + D = Z( J4+1 )*( D / Z( J4-2 ) ) + END IF + DMIN = MIN( DMIN, D ) + EMIN = MIN( EMIN, Z( J4 ) ) + 10 CONTINUE + ELSE + DO 20 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-3 ) = D + Z( J4 ) + IF( Z( J4-3 ).EQ.ZERO ) THEN + Z( J4-1 ) = ZERO + D = Z( J4+2 ) + DMIN = D + EMIN = ZERO + ELSE IF( SAFMIN*Z( J4+2 ).LT.Z( J4-3 ) .AND. + $ SAFMIN*Z( J4-3 ).LT.Z( J4+2 ) ) THEN + TEMP = Z( J4+2 ) / Z( J4-3 ) + Z( J4-1 ) = Z( J4 )*TEMP + D = D*TEMP + ELSE + Z( J4-1 ) = Z( J4+2 )*( Z( J4 ) / Z( J4-3 ) ) + D = Z( J4+2 )*( D / Z( J4-3 ) ) + END IF + DMIN = MIN( DMIN, D ) + EMIN = MIN( EMIN, Z( J4-1 ) ) + 20 CONTINUE + END IF +* +* Unroll last two steps. +* + DNM2 = D + DMIN2 = DMIN + J4 = 4*( N0-2 ) - PP + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM2 + Z( J4P2 ) + IF( Z( J4-2 ).EQ.ZERO ) THEN + Z( J4 ) = ZERO + DNM1 = Z( J4P2+2 ) + DMIN = DNM1 + EMIN = ZERO + ELSE IF( SAFMIN*Z( J4P2+2 ).LT.Z( J4-2 ) .AND. + $ SAFMIN*Z( J4-2 ).LT.Z( J4P2+2 ) ) THEN + TEMP = Z( J4P2+2 ) / Z( J4-2 ) + Z( J4 ) = Z( J4P2 )*TEMP + DNM1 = DNM2*TEMP + ELSE + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DNM1 = Z( J4P2+2 )*( DNM2 / Z( J4-2 ) ) + END IF + DMIN = MIN( DMIN, DNM1 ) +* + DMIN1 = DMIN + J4 = J4 + 4 + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM1 + Z( J4P2 ) + IF( Z( J4-2 ).EQ.ZERO ) THEN + Z( J4 ) = ZERO + DN = Z( J4P2+2 ) + DMIN = DN + EMIN = ZERO + ELSE IF( SAFMIN*Z( J4P2+2 ).LT.Z( J4-2 ) .AND. + $ SAFMIN*Z( J4-2 ).LT.Z( J4P2+2 ) ) THEN + TEMP = Z( J4P2+2 ) / Z( J4-2 ) + Z( J4 ) = Z( J4P2 )*TEMP + DN = DNM1*TEMP + ELSE + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DN = Z( J4P2+2 )*( DNM1 / Z( J4-2 ) ) + END IF + DMIN = MIN( DMIN, DN ) +* + Z( J4+2 ) = DN + Z( 4*N0-PP ) = EMIN + RETURN +* +* End of DLASQ6 +* + END diff --git a/costa/native/external/lapack/dlasr.f b/costa/native/external/lapack/dlasr.f new file mode 100644 index 000000000..4b2cab754 --- /dev/null +++ b/costa/native/external/lapack/dlasr.f @@ -0,0 +1,325 @@ + SUBROUTINE DLASR( SIDE, PIVOT, DIRECT, M, N, C, S, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, PIVOT, SIDE + INTEGER LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( * ), S( * ) +* .. +* +* Purpose +* ======= +* +* DLASR performs the transformation +* +* A := P*A, when SIDE = 'L' or 'l' ( Left-hand side ) +* +* A := A*P', when SIDE = 'R' or 'r' ( Right-hand side ) +* +* where A is an m by n real matrix and P is an orthogonal matrix, +* consisting of a sequence of plane rotations determined by the +* parameters PIVOT and DIRECT as follows ( z = m when SIDE = 'L' or 'l' +* and z = n when SIDE = 'R' or 'r' ): +* +* When DIRECT = 'F' or 'f' ( Forward sequence ) then +* +* P = P( z - 1 )*...*P( 2 )*P( 1 ), +* +* and when DIRECT = 'B' or 'b' ( Backward sequence ) then +* +* P = P( 1 )*P( 2 )*...*P( z - 1 ), +* +* where P( k ) is a plane rotation matrix for the following planes: +* +* when PIVOT = 'V' or 'v' ( Variable pivot ), +* the plane ( k, k + 1 ) +* +* when PIVOT = 'T' or 't' ( Top pivot ), +* the plane ( 1, k + 1 ) +* +* when PIVOT = 'B' or 'b' ( Bottom pivot ), +* the plane ( k, z ) +* +* c( k ) and s( k ) must contain the cosine and sine that define the +* matrix P( k ). The two by two plane rotation part of the matrix +* P( k ), R( k ), is assumed to be of the form +* +* R( k ) = ( c( k ) s( k ) ). +* ( -s( k ) c( k ) ) +* +* This version vectorises across rows of the array A when SIDE = 'L'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* Specifies whether the plane rotation matrix P is applied to +* A on the left or the right. +* = 'L': Left, compute A := P*A +* = 'R': Right, compute A:= A*P' +* +* DIRECT (input) CHARACTER*1 +* Specifies whether P is a forward or backward sequence of +* plane rotations. +* = 'F': Forward, P = P( z - 1 )*...*P( 2 )*P( 1 ) +* = 'B': Backward, P = P( 1 )*P( 2 )*...*P( z - 1 ) +* +* PIVOT (input) CHARACTER*1 +* Specifies the plane for which P(k) is a plane rotation +* matrix. +* = 'V': Variable pivot, the plane (k,k+1) +* = 'T': Top pivot, the plane (1,k+1) +* = 'B': Bottom pivot, the plane (k,z) +* +* M (input) INTEGER +* The number of rows of the matrix A. If m <= 1, an immediate +* return is effected. +* +* N (input) INTEGER +* The number of columns of the matrix A. If n <= 1, an +* immediate return is effected. +* +* C, S (input) DOUBLE PRECISION arrays, dimension +* (M-1) if SIDE = 'L' +* (N-1) if SIDE = 'R' +* c(k) and s(k) contain the cosine and sine that define the +* matrix P(k). The two by two plane rotation part of the +* matrix P(k), R(k), is assumed to be of the form +* R( k ) = ( c( k ) s( k ) ). +* ( -s( k ) c( k ) ) +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* The m by n matrix A. On exit, A is overwritten by P*A if +* SIDE = 'R' or by A*P' if SIDE = 'L'. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, J + DOUBLE PRECISION CTEMP, STEMP, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( .NOT.( LSAME( SIDE, 'L' ) .OR. LSAME( SIDE, 'R' ) ) ) THEN + INFO = 1 + ELSE IF( .NOT.( LSAME( PIVOT, 'V' ) .OR. LSAME( PIVOT, + $ 'T' ) .OR. LSAME( PIVOT, 'B' ) ) ) THEN + INFO = 2 + ELSE IF( .NOT.( LSAME( DIRECT, 'F' ) .OR. LSAME( DIRECT, 'B' ) ) ) + $ THEN + INFO = 3 + ELSE IF( M.LT.0 ) THEN + INFO = 4 + ELSE IF( N.LT.0 ) THEN + INFO = 5 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = 9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLASR ', INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( ( M.EQ.0 ) .OR. ( N.EQ.0 ) ) + $ RETURN + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form P * A +* + IF( LSAME( PIVOT, 'V' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 20 J = 1, M - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 10 I = 1, N + TEMP = A( J+1, I ) + A( J+1, I ) = CTEMP*TEMP - STEMP*A( J, I ) + A( J, I ) = STEMP*TEMP + CTEMP*A( J, I ) + 10 CONTINUE + END IF + 20 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 40 J = M - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 30 I = 1, N + TEMP = A( J+1, I ) + A( J+1, I ) = CTEMP*TEMP - STEMP*A( J, I ) + A( J, I ) = STEMP*TEMP + CTEMP*A( J, I ) + 30 CONTINUE + END IF + 40 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'T' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 60 J = 2, M + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 50 I = 1, N + TEMP = A( J, I ) + A( J, I ) = CTEMP*TEMP - STEMP*A( 1, I ) + A( 1, I ) = STEMP*TEMP + CTEMP*A( 1, I ) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 80 J = M, 2, -1 + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 70 I = 1, N + TEMP = A( J, I ) + A( J, I ) = CTEMP*TEMP - STEMP*A( 1, I ) + A( 1, I ) = STEMP*TEMP + CTEMP*A( 1, I ) + 70 CONTINUE + END IF + 80 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'B' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 100 J = 1, M - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 90 I = 1, N + TEMP = A( J, I ) + A( J, I ) = STEMP*A( M, I ) + CTEMP*TEMP + A( M, I ) = CTEMP*A( M, I ) - STEMP*TEMP + 90 CONTINUE + END IF + 100 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 120 J = M - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 110 I = 1, N + TEMP = A( J, I ) + A( J, I ) = STEMP*A( M, I ) + CTEMP*TEMP + A( M, I ) = CTEMP*A( M, I ) - STEMP*TEMP + 110 CONTINUE + END IF + 120 CONTINUE + END IF + END IF + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form A * P' +* + IF( LSAME( PIVOT, 'V' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 140 J = 1, N - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 130 I = 1, M + TEMP = A( I, J+1 ) + A( I, J+1 ) = CTEMP*TEMP - STEMP*A( I, J ) + A( I, J ) = STEMP*TEMP + CTEMP*A( I, J ) + 130 CONTINUE + END IF + 140 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 160 J = N - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 150 I = 1, M + TEMP = A( I, J+1 ) + A( I, J+1 ) = CTEMP*TEMP - STEMP*A( I, J ) + A( I, J ) = STEMP*TEMP + CTEMP*A( I, J ) + 150 CONTINUE + END IF + 160 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'T' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 180 J = 2, N + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 170 I = 1, M + TEMP = A( I, J ) + A( I, J ) = CTEMP*TEMP - STEMP*A( I, 1 ) + A( I, 1 ) = STEMP*TEMP + CTEMP*A( I, 1 ) + 170 CONTINUE + END IF + 180 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 200 J = N, 2, -1 + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 190 I = 1, M + TEMP = A( I, J ) + A( I, J ) = CTEMP*TEMP - STEMP*A( I, 1 ) + A( I, 1 ) = STEMP*TEMP + CTEMP*A( I, 1 ) + 190 CONTINUE + END IF + 200 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'B' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 220 J = 1, N - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 210 I = 1, M + TEMP = A( I, J ) + A( I, J ) = STEMP*A( I, N ) + CTEMP*TEMP + A( I, N ) = CTEMP*A( I, N ) - STEMP*TEMP + 210 CONTINUE + END IF + 220 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 240 J = N - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 230 I = 1, M + TEMP = A( I, J ) + A( I, J ) = STEMP*A( I, N ) + CTEMP*TEMP + A( I, N ) = CTEMP*A( I, N ) - STEMP*TEMP + 230 CONTINUE + END IF + 240 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of DLASR +* + END diff --git a/costa/native/external/lapack/dlasrt.f b/costa/native/external/lapack/dlasrt.f new file mode 100644 index 000000000..e776034bc --- /dev/null +++ b/costa/native/external/lapack/dlasrt.f @@ -0,0 +1,244 @@ + SUBROUTINE DLASRT( ID, N, D, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER ID + INTEGER INFO, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ) +* .. +* +* Purpose +* ======= +* +* Sort the numbers in D in increasing order (if ID = 'I') or +* in decreasing order (if ID = 'D' ). +* +* Use Quick Sort, reverting to Insertion sort on arrays of +* size <= 20. Dimension of STACK limits N to about 2**32. +* +* Arguments +* ========= +* +* ID (input) CHARACTER*1 +* = 'I': sort D in increasing order; +* = 'D': sort D in decreasing order. +* +* N (input) INTEGER +* The length of the array D. +* +* D (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the array to be sorted. +* On exit, D has been sorted into increasing order +* (D(1) <= ... <= D(N) ) or into decreasing order +* (D(1) >= ... >= D(N) ), depending on ID. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + INTEGER SELECT + PARAMETER ( SELECT = 20 ) +* .. +* .. Local Scalars .. + INTEGER DIR, ENDD, I, J, START, STKPNT + DOUBLE PRECISION D1, D2, D3, DMNMX, TMP +* .. +* .. Local Arrays .. + INTEGER STACK( 2, 32 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input paramters. +* + INFO = 0 + DIR = -1 + IF( LSAME( ID, 'D' ) ) THEN + DIR = 0 + ELSE IF( LSAME( ID, 'I' ) ) THEN + DIR = 1 + END IF + IF( DIR.EQ.-1 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLASRT', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* + STKPNT = 1 + STACK( 1, 1 ) = 1 + STACK( 2, 1 ) = N + 10 CONTINUE + START = STACK( 1, STKPNT ) + ENDD = STACK( 2, STKPNT ) + STKPNT = STKPNT - 1 + IF( ENDD-START.LE.SELECT .AND. ENDD-START.GT.0 ) THEN +* +* Do Insertion sort on D( START:ENDD ) +* + IF( DIR.EQ.0 ) THEN +* +* Sort into decreasing order +* + DO 30 I = START + 1, ENDD + DO 20 J = I, START + 1, -1 + IF( D( J ).GT.D( J-1 ) ) THEN + DMNMX = D( J ) + D( J ) = D( J-1 ) + D( J-1 ) = DMNMX + ELSE + GO TO 30 + END IF + 20 CONTINUE + 30 CONTINUE +* + ELSE +* +* Sort into increasing order +* + DO 50 I = START + 1, ENDD + DO 40 J = I, START + 1, -1 + IF( D( J ).LT.D( J-1 ) ) THEN + DMNMX = D( J ) + D( J ) = D( J-1 ) + D( J-1 ) = DMNMX + ELSE + GO TO 50 + END IF + 40 CONTINUE + 50 CONTINUE +* + END IF +* + ELSE IF( ENDD-START.GT.SELECT ) THEN +* +* Partition D( START:ENDD ) and stack parts, largest one first +* +* Choose partition entry as median of 3 +* + D1 = D( START ) + D2 = D( ENDD ) + I = ( START+ENDD ) / 2 + D3 = D( I ) + IF( D1.LT.D2 ) THEN + IF( D3.LT.D1 ) THEN + DMNMX = D1 + ELSE IF( D3.LT.D2 ) THEN + DMNMX = D3 + ELSE + DMNMX = D2 + END IF + ELSE + IF( D3.LT.D2 ) THEN + DMNMX = D2 + ELSE IF( D3.LT.D1 ) THEN + DMNMX = D3 + ELSE + DMNMX = D1 + END IF + END IF +* + IF( DIR.EQ.0 ) THEN +* +* Sort into decreasing order +* + I = START - 1 + J = ENDD + 1 + 60 CONTINUE + 70 CONTINUE + J = J - 1 + IF( D( J ).LT.DMNMX ) + $ GO TO 70 + 80 CONTINUE + I = I + 1 + IF( D( I ).GT.DMNMX ) + $ GO TO 80 + IF( I.LT.J ) THEN + TMP = D( I ) + D( I ) = D( J ) + D( J ) = TMP + GO TO 60 + END IF + IF( J-START.GT.ENDD-J-1 ) THEN + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = START + STACK( 2, STKPNT ) = J + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = J + 1 + STACK( 2, STKPNT ) = ENDD + ELSE + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = J + 1 + STACK( 2, STKPNT ) = ENDD + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = START + STACK( 2, STKPNT ) = J + END IF + ELSE +* +* Sort into increasing order +* + I = START - 1 + J = ENDD + 1 + 90 CONTINUE + 100 CONTINUE + J = J - 1 + IF( D( J ).GT.DMNMX ) + $ GO TO 100 + 110 CONTINUE + I = I + 1 + IF( D( I ).LT.DMNMX ) + $ GO TO 110 + IF( I.LT.J ) THEN + TMP = D( I ) + D( I ) = D( J ) + D( J ) = TMP + GO TO 90 + END IF + IF( J-START.GT.ENDD-J-1 ) THEN + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = START + STACK( 2, STKPNT ) = J + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = J + 1 + STACK( 2, STKPNT ) = ENDD + ELSE + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = J + 1 + STACK( 2, STKPNT ) = ENDD + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = START + STACK( 2, STKPNT ) = J + END IF + END IF + END IF + IF( STKPNT.GT.0 ) + $ GO TO 10 + RETURN +* +* End of DLASRT +* + END diff --git a/costa/native/external/lapack/dlassq.f b/costa/native/external/lapack/dlassq.f new file mode 100644 index 000000000..b378c0635 --- /dev/null +++ b/costa/native/external/lapack/dlassq.f @@ -0,0 +1,89 @@ + SUBROUTINE DLASSQ( N, X, INCX, SCALE, SUMSQ ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INCX, N + DOUBLE PRECISION SCALE, SUMSQ +* .. +* .. Array Arguments .. + DOUBLE PRECISION X( * ) +* .. +* +* Purpose +* ======= +* +* DLASSQ returns the values scl and smsq such that +* +* ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, +* +* where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is +* assumed to be non-negative and scl returns the value +* +* scl = max( scale, abs( x( i ) ) ). +* +* scale and sumsq must be supplied in SCALE and SUMSQ and +* scl and smsq are overwritten on SCALE and SUMSQ respectively. +* +* The routine makes only one pass through the vector x. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of elements to be used from the vector X. +* +* X (input) DOUBLE PRECISION array, dimension (N) +* The vector for which a scaled sum of squares is computed. +* x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. +* +* INCX (input) INTEGER +* The increment between successive values of the vector X. +* INCX > 0. +* +* SCALE (input/output) DOUBLE PRECISION +* On entry, the value scale in the equation above. +* On exit, SCALE is overwritten with scl , the scaling factor +* for the sum of squares. +* +* SUMSQ (input/output) DOUBLE PRECISION +* On entry, the value sumsq in the equation above. +* On exit, SUMSQ is overwritten with smsq , the basic sum of +* squares from which scl has been factored out. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER IX + DOUBLE PRECISION ABSXI +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* + IF( N.GT.0 ) THEN + DO 10 IX = 1, 1 + ( N-1 )*INCX, INCX + IF( X( IX ).NE.ZERO ) THEN + ABSXI = ABS( X( IX ) ) + IF( SCALE.LT.ABSXI ) THEN + SUMSQ = 1 + SUMSQ*( SCALE / ABSXI )**2 + SCALE = ABSXI + ELSE + SUMSQ = SUMSQ + ( ABSXI / SCALE )**2 + END IF + END IF + 10 CONTINUE + END IF + RETURN +* +* End of DLASSQ +* + END diff --git a/costa/native/external/lapack/dlasv2.f b/costa/native/external/lapack/dlasv2.f new file mode 100644 index 000000000..5d936863d --- /dev/null +++ b/costa/native/external/lapack/dlasv2.f @@ -0,0 +1,250 @@ + SUBROUTINE DLASV2( F, G, H, SSMIN, SSMAX, SNR, CSR, SNL, CSL ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + DOUBLE PRECISION CSL, CSR, F, G, H, SNL, SNR, SSMAX, SSMIN +* .. +* +* Purpose +* ======= +* +* DLASV2 computes the singular value decomposition of a 2-by-2 +* triangular matrix +* [ F G ] +* [ 0 H ]. +* On return, abs(SSMAX) is the larger singular value, abs(SSMIN) is the +* smaller singular value, and (CSL,SNL) and (CSR,SNR) are the left and +* right singular vectors for abs(SSMAX), giving the decomposition +* +* [ CSL SNL ] [ F G ] [ CSR -SNR ] = [ SSMAX 0 ] +* [-SNL CSL ] [ 0 H ] [ SNR CSR ] [ 0 SSMIN ]. +* +* Arguments +* ========= +* +* F (input) DOUBLE PRECISION +* The (1,1) element of the 2-by-2 matrix. +* +* G (input) DOUBLE PRECISION +* The (1,2) element of the 2-by-2 matrix. +* +* H (input) DOUBLE PRECISION +* The (2,2) element of the 2-by-2 matrix. +* +* SSMIN (output) DOUBLE PRECISION +* abs(SSMIN) is the smaller singular value. +* +* SSMAX (output) DOUBLE PRECISION +* abs(SSMAX) is the larger singular value. +* +* SNL (output) DOUBLE PRECISION +* CSL (output) DOUBLE PRECISION +* The vector (CSL, SNL) is a unit left singular vector for the +* singular value abs(SSMAX). +* +* SNR (output) DOUBLE PRECISION +* CSR (output) DOUBLE PRECISION +* The vector (CSR, SNR) is a unit right singular vector for the +* singular value abs(SSMAX). +* +* Further Details +* =============== +* +* Any input parameter may be aliased with any output parameter. +* +* Barring over/underflow and assuming a guard digit in subtraction, all +* output quantities are correct to within a few units in the last +* place (ulps). +* +* In IEEE arithmetic, the code works correctly if one matrix element is +* infinite. +* +* Overflow will not occur unless the largest singular value itself +* overflows or is within a few ulps of overflow. (On machines with +* partial overflow, like the Cray, overflow may occur if the largest +* singular value is within a factor of 2 of overflow.) +* +* Underflow is harmless if underflow is gradual. Otherwise, results +* may correspond to a matrix modified by perturbations of size near +* the underflow threshold. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) + DOUBLE PRECISION HALF + PARAMETER ( HALF = 0.5D0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D0 ) + DOUBLE PRECISION FOUR + PARAMETER ( FOUR = 4.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL GASMAL, SWAP + INTEGER PMAX + DOUBLE PRECISION A, CLT, CRT, D, FA, FT, GA, GT, HA, HT, L, M, + $ MM, R, S, SLT, SRT, T, TEMP, TSIGN, TT +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SIGN, SQRT +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Executable Statements .. +* + FT = F + FA = ABS( FT ) + HT = H + HA = ABS( H ) +* +* PMAX points to the maximum absolute element of matrix +* PMAX = 1 if F largest in absolute values +* PMAX = 2 if G largest in absolute values +* PMAX = 3 if H largest in absolute values +* + PMAX = 1 + SWAP = ( HA.GT.FA ) + IF( SWAP ) THEN + PMAX = 3 + TEMP = FT + FT = HT + HT = TEMP + TEMP = FA + FA = HA + HA = TEMP +* +* Now FA .ge. HA +* + END IF + GT = G + GA = ABS( GT ) + IF( GA.EQ.ZERO ) THEN +* +* Diagonal matrix +* + SSMIN = HA + SSMAX = FA + CLT = ONE + CRT = ONE + SLT = ZERO + SRT = ZERO + ELSE + GASMAL = .TRUE. + IF( GA.GT.FA ) THEN + PMAX = 2 + IF( ( FA / GA ).LT.DLAMCH( 'EPS' ) ) THEN +* +* Case of very large GA +* + GASMAL = .FALSE. + SSMAX = GA + IF( HA.GT.ONE ) THEN + SSMIN = FA / ( GA / HA ) + ELSE + SSMIN = ( FA / GA )*HA + END IF + CLT = ONE + SLT = HT / GT + SRT = ONE + CRT = FT / GT + END IF + END IF + IF( GASMAL ) THEN +* +* Normal case +* + D = FA - HA + IF( D.EQ.FA ) THEN +* +* Copes with infinite F or H +* + L = ONE + ELSE + L = D / FA + END IF +* +* Note that 0 .le. L .le. 1 +* + M = GT / FT +* +* Note that abs(M) .le. 1/macheps +* + T = TWO - L +* +* Note that T .ge. 1 +* + MM = M*M + TT = T*T + S = SQRT( TT+MM ) +* +* Note that 1 .le. S .le. 1 + 1/macheps +* + IF( L.EQ.ZERO ) THEN + R = ABS( M ) + ELSE + R = SQRT( L*L+MM ) + END IF +* +* Note that 0 .le. R .le. 1 + 1/macheps +* + A = HALF*( S+R ) +* +* Note that 1 .le. A .le. 1 + abs(M) +* + SSMIN = HA / A + SSMAX = FA*A + IF( MM.EQ.ZERO ) THEN +* +* Note that M is very tiny +* + IF( L.EQ.ZERO ) THEN + T = SIGN( TWO, FT )*SIGN( ONE, GT ) + ELSE + T = GT / SIGN( D, FT ) + M / T + END IF + ELSE + T = ( M / ( S+T )+M / ( R+L ) )*( ONE+A ) + END IF + L = SQRT( T*T+FOUR ) + CRT = TWO / L + SRT = T / L + CLT = ( CRT+SRT*M ) / A + SLT = ( HT / FT )*SRT / A + END IF + END IF + IF( SWAP ) THEN + CSL = SRT + SNL = CRT + CSR = SLT + SNR = CLT + ELSE + CSL = CLT + SNL = SLT + CSR = CRT + SNR = SRT + END IF +* +* Correct signs of SSMAX and SSMIN +* + IF( PMAX.EQ.1 ) + $ TSIGN = SIGN( ONE, CSR )*SIGN( ONE, CSL )*SIGN( ONE, F ) + IF( PMAX.EQ.2 ) + $ TSIGN = SIGN( ONE, SNR )*SIGN( ONE, CSL )*SIGN( ONE, G ) + IF( PMAX.EQ.3 ) + $ TSIGN = SIGN( ONE, SNR )*SIGN( ONE, SNL )*SIGN( ONE, H ) + SSMAX = SIGN( SSMAX, TSIGN ) + SSMIN = SIGN( SSMIN, TSIGN*SIGN( ONE, F )*SIGN( ONE, H ) ) + RETURN +* +* End of DLASV2 +* + END diff --git a/costa/native/external/lapack/dlaswp.f b/costa/native/external/lapack/dlaswp.f new file mode 100644 index 000000000..99c0dda27 --- /dev/null +++ b/costa/native/external/lapack/dlaswp.f @@ -0,0 +1,120 @@ + SUBROUTINE DLASWP( N, A, LDA, K1, K2, IPIV, INCX ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INCX, K1, K2, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* DLASWP performs a series of row interchanges on the matrix A. +* One row interchange is initiated for each of rows K1 through K2 of A. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of columns of the matrix A. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the matrix of column dimension N to which the row +* interchanges will be applied. +* On exit, the permuted matrix. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* +* K1 (input) INTEGER +* The first element of IPIV for which a row interchange will +* be done. +* +* K2 (input) INTEGER +* The last element of IPIV for which a row interchange will +* be done. +* +* IPIV (input) INTEGER array, dimension (M*abs(INCX)) +* The vector of pivot indices. Only the elements in positions +* K1 through K2 of IPIV are accessed. +* IPIV(K) = L implies rows K and L are to be interchanged. +* +* INCX (input) INTEGER +* The increment between successive values of IPIV. If IPIV +* is negative, the pivots are applied in reverse order. +* +* Further Details +* =============== +* +* Modified by +* R. C. Whaley, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, I1, I2, INC, IP, IX, IX0, J, K, N32 + DOUBLE PRECISION TEMP +* .. +* .. Executable Statements .. +* +* Interchange row I with row IPIV(I) for each of rows K1 through K2. +* + IF( INCX.GT.0 ) THEN + IX0 = K1 + I1 = K1 + I2 = K2 + INC = 1 + ELSE IF( INCX.LT.0 ) THEN + IX0 = 1 + ( 1-K2 )*INCX + I1 = K2 + I2 = K1 + INC = -1 + ELSE + RETURN + END IF +* + N32 = ( N / 32 )*32 + IF( N32.NE.0 ) THEN + DO 30 J = 1, N32, 32 + IX = IX0 + DO 20 I = I1, I2, INC + IP = IPIV( IX ) + IF( IP.NE.I ) THEN + DO 10 K = J, J + 31 + TEMP = A( I, K ) + A( I, K ) = A( IP, K ) + A( IP, K ) = TEMP + 10 CONTINUE + END IF + IX = IX + INCX + 20 CONTINUE + 30 CONTINUE + END IF + IF( N32.NE.N ) THEN + N32 = N32 + 1 + IX = IX0 + DO 50 I = I1, I2, INC + IP = IPIV( IX ) + IF( IP.NE.I ) THEN + DO 40 K = N32, N + TEMP = A( I, K ) + A( I, K ) = A( IP, K ) + A( IP, K ) = TEMP + 40 CONTINUE + END IF + IX = IX + INCX + 50 CONTINUE + END IF +* + RETURN +* +* End of DLASWP +* + END diff --git a/costa/native/external/lapack/dlasy2.f b/costa/native/external/lapack/dlasy2.f new file mode 100644 index 000000000..a3c2369c3 --- /dev/null +++ b/costa/native/external/lapack/dlasy2.f @@ -0,0 +1,382 @@ + SUBROUTINE DLASY2( LTRANL, LTRANR, ISGN, N1, N2, TL, LDTL, TR, + $ LDTR, B, LDB, SCALE, X, LDX, XNORM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + LOGICAL LTRANL, LTRANR + INTEGER INFO, ISGN, LDB, LDTL, LDTR, LDX, N1, N2 + DOUBLE PRECISION SCALE, XNORM +* .. +* .. Array Arguments .. + DOUBLE PRECISION B( LDB, * ), TL( LDTL, * ), TR( LDTR, * ), + $ X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* DLASY2 solves for the N1 by N2 matrix X, 1 <= N1,N2 <= 2, in +* +* op(TL)*X + ISGN*X*op(TR) = SCALE*B, +* +* where TL is N1 by N1, TR is N2 by N2, B is N1 by N2, and ISGN = 1 or +* -1. op(T) = T or T', where T' denotes the transpose of T. +* +* Arguments +* ========= +* +* LTRANL (input) LOGICAL +* On entry, LTRANL specifies the op(TL): +* = .FALSE., op(TL) = TL, +* = .TRUE., op(TL) = TL'. +* +* LTRANR (input) LOGICAL +* On entry, LTRANR specifies the op(TR): +* = .FALSE., op(TR) = TR, +* = .TRUE., op(TR) = TR'. +* +* ISGN (input) INTEGER +* On entry, ISGN specifies the sign of the equation +* as described before. ISGN may only be 1 or -1. +* +* N1 (input) INTEGER +* On entry, N1 specifies the order of matrix TL. +* N1 may only be 0, 1 or 2. +* +* N2 (input) INTEGER +* On entry, N2 specifies the order of matrix TR. +* N2 may only be 0, 1 or 2. +* +* TL (input) DOUBLE PRECISION array, dimension (LDTL,2) +* On entry, TL contains an N1 by N1 matrix. +* +* LDTL (input) INTEGER +* The leading dimension of the matrix TL. LDTL >= max(1,N1). +* +* TR (input) DOUBLE PRECISION array, dimension (LDTR,2) +* On entry, TR contains an N2 by N2 matrix. +* +* LDTR (input) INTEGER +* The leading dimension of the matrix TR. LDTR >= max(1,N2). +* +* B (input) DOUBLE PRECISION array, dimension (LDB,2) +* On entry, the N1 by N2 matrix B contains the right-hand +* side of the equation. +* +* LDB (input) INTEGER +* The leading dimension of the matrix B. LDB >= max(1,N1). +* +* SCALE (output) DOUBLE PRECISION +* On exit, SCALE contains the scale factor. SCALE is chosen +* less than or equal to 1 to prevent the solution overflowing. +* +* X (output) DOUBLE PRECISION array, dimension (LDX,2) +* On exit, X contains the N1 by N2 solution. +* +* LDX (input) INTEGER +* The leading dimension of the matrix X. LDX >= max(1,N1). +* +* XNORM (output) DOUBLE PRECISION +* On exit, XNORM is the infinity-norm of the solution. +* +* INFO (output) INTEGER +* On exit, INFO is set to +* 0: successful exit. +* 1: TL and TR have too close eigenvalues, so TL or +* TR is perturbed to get a nonsingular equation. +* NOTE: In the interests of speed, this routine does not +* check the inputs for errors. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION TWO, HALF, EIGHT + PARAMETER ( TWO = 2.0D+0, HALF = 0.5D+0, EIGHT = 8.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL BSWAP, XSWAP + INTEGER I, IP, IPIV, IPSV, J, JP, JPSV, K + DOUBLE PRECISION BET, EPS, GAM, L21, SGN, SMIN, SMLNUM, TAU1, + $ TEMP, U11, U12, U22, XMAX +* .. +* .. Local Arrays .. + LOGICAL BSWPIV( 4 ), XSWPIV( 4 ) + INTEGER JPIV( 4 ), LOCL21( 4 ), LOCU12( 4 ), + $ LOCU22( 4 ) + DOUBLE PRECISION BTMP( 4 ), T16( 4, 4 ), TMP( 4 ), X2( 2 ) +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL IDAMAX, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Data statements .. + DATA LOCU12 / 3, 4, 1, 2 / , LOCL21 / 2, 1, 4, 3 / , + $ LOCU22 / 4, 3, 2, 1 / + DATA XSWPIV / .FALSE., .FALSE., .TRUE., .TRUE. / + DATA BSWPIV / .FALSE., .TRUE., .FALSE., .TRUE. / +* .. +* .. Executable Statements .. +* +* Do not check the input parameters for errors +* + INFO = 0 +* +* Quick return if possible +* + IF( N1.EQ.0 .OR. N2.EQ.0 ) + $ RETURN +* +* Set constants to control overflow +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) / EPS + SGN = ISGN +* + K = N1 + N1 + N2 - 2 + GO TO ( 10, 20, 30, 50 )K +* +* 1 by 1: TL11*X + SGN*X*TR11 = B11 +* + 10 CONTINUE + TAU1 = TL( 1, 1 ) + SGN*TR( 1, 1 ) + BET = ABS( TAU1 ) + IF( BET.LE.SMLNUM ) THEN + TAU1 = SMLNUM + BET = SMLNUM + INFO = 1 + END IF +* + SCALE = ONE + GAM = ABS( B( 1, 1 ) ) + IF( SMLNUM*GAM.GT.BET ) + $ SCALE = ONE / GAM +* + X( 1, 1 ) = ( B( 1, 1 )*SCALE ) / TAU1 + XNORM = ABS( X( 1, 1 ) ) + RETURN +* +* 1 by 2: +* TL11*[X11 X12] + ISGN*[X11 X12]*op[TR11 TR12] = [B11 B12] +* [TR21 TR22] +* + 20 CONTINUE +* + SMIN = MAX( EPS*MAX( ABS( TL( 1, 1 ) ), ABS( TR( 1, 1 ) ), + $ ABS( TR( 1, 2 ) ), ABS( TR( 2, 1 ) ), ABS( TR( 2, 2 ) ) ), + $ SMLNUM ) + TMP( 1 ) = TL( 1, 1 ) + SGN*TR( 1, 1 ) + TMP( 4 ) = TL( 1, 1 ) + SGN*TR( 2, 2 ) + IF( LTRANR ) THEN + TMP( 2 ) = SGN*TR( 2, 1 ) + TMP( 3 ) = SGN*TR( 1, 2 ) + ELSE + TMP( 2 ) = SGN*TR( 1, 2 ) + TMP( 3 ) = SGN*TR( 2, 1 ) + END IF + BTMP( 1 ) = B( 1, 1 ) + BTMP( 2 ) = B( 1, 2 ) + GO TO 40 +* +* 2 by 1: +* op[TL11 TL12]*[X11] + ISGN* [X11]*TR11 = [B11] +* [TL21 TL22] [X21] [X21] [B21] +* + 30 CONTINUE + SMIN = MAX( EPS*MAX( ABS( TR( 1, 1 ) ), ABS( TL( 1, 1 ) ), + $ ABS( TL( 1, 2 ) ), ABS( TL( 2, 1 ) ), ABS( TL( 2, 2 ) ) ), + $ SMLNUM ) + TMP( 1 ) = TL( 1, 1 ) + SGN*TR( 1, 1 ) + TMP( 4 ) = TL( 2, 2 ) + SGN*TR( 1, 1 ) + IF( LTRANL ) THEN + TMP( 2 ) = TL( 1, 2 ) + TMP( 3 ) = TL( 2, 1 ) + ELSE + TMP( 2 ) = TL( 2, 1 ) + TMP( 3 ) = TL( 1, 2 ) + END IF + BTMP( 1 ) = B( 1, 1 ) + BTMP( 2 ) = B( 2, 1 ) + 40 CONTINUE +* +* Solve 2 by 2 system using complete pivoting. +* Set pivots less than SMIN to SMIN. +* + IPIV = IDAMAX( 4, TMP, 1 ) + U11 = TMP( IPIV ) + IF( ABS( U11 ).LE.SMIN ) THEN + INFO = 1 + U11 = SMIN + END IF + U12 = TMP( LOCU12( IPIV ) ) + L21 = TMP( LOCL21( IPIV ) ) / U11 + U22 = TMP( LOCU22( IPIV ) ) - U12*L21 + XSWAP = XSWPIV( IPIV ) + BSWAP = BSWPIV( IPIV ) + IF( ABS( U22 ).LE.SMIN ) THEN + INFO = 1 + U22 = SMIN + END IF + IF( BSWAP ) THEN + TEMP = BTMP( 2 ) + BTMP( 2 ) = BTMP( 1 ) - L21*TEMP + BTMP( 1 ) = TEMP + ELSE + BTMP( 2 ) = BTMP( 2 ) - L21*BTMP( 1 ) + END IF + SCALE = ONE + IF( ( TWO*SMLNUM )*ABS( BTMP( 2 ) ).GT.ABS( U22 ) .OR. + $ ( TWO*SMLNUM )*ABS( BTMP( 1 ) ).GT.ABS( U11 ) ) THEN + SCALE = HALF / MAX( ABS( BTMP( 1 ) ), ABS( BTMP( 2 ) ) ) + BTMP( 1 ) = BTMP( 1 )*SCALE + BTMP( 2 ) = BTMP( 2 )*SCALE + END IF + X2( 2 ) = BTMP( 2 ) / U22 + X2( 1 ) = BTMP( 1 ) / U11 - ( U12 / U11 )*X2( 2 ) + IF( XSWAP ) THEN + TEMP = X2( 2 ) + X2( 2 ) = X2( 1 ) + X2( 1 ) = TEMP + END IF + X( 1, 1 ) = X2( 1 ) + IF( N1.EQ.1 ) THEN + X( 1, 2 ) = X2( 2 ) + XNORM = ABS( X( 1, 1 ) ) + ABS( X( 1, 2 ) ) + ELSE + X( 2, 1 ) = X2( 2 ) + XNORM = MAX( ABS( X( 1, 1 ) ), ABS( X( 2, 1 ) ) ) + END IF + RETURN +* +* 2 by 2: +* op[TL11 TL12]*[X11 X12] +ISGN* [X11 X12]*op[TR11 TR12] = [B11 B12] +* [TL21 TL22] [X21 X22] [X21 X22] [TR21 TR22] [B21 B22] +* +* Solve equivalent 4 by 4 system using complete pivoting. +* Set pivots less than SMIN to SMIN. +* + 50 CONTINUE + SMIN = MAX( ABS( TR( 1, 1 ) ), ABS( TR( 1, 2 ) ), + $ ABS( TR( 2, 1 ) ), ABS( TR( 2, 2 ) ) ) + SMIN = MAX( SMIN, ABS( TL( 1, 1 ) ), ABS( TL( 1, 2 ) ), + $ ABS( TL( 2, 1 ) ), ABS( TL( 2, 2 ) ) ) + SMIN = MAX( EPS*SMIN, SMLNUM ) + BTMP( 1 ) = ZERO + CALL DCOPY( 16, BTMP, 0, T16, 1 ) + T16( 1, 1 ) = TL( 1, 1 ) + SGN*TR( 1, 1 ) + T16( 2, 2 ) = TL( 2, 2 ) + SGN*TR( 1, 1 ) + T16( 3, 3 ) = TL( 1, 1 ) + SGN*TR( 2, 2 ) + T16( 4, 4 ) = TL( 2, 2 ) + SGN*TR( 2, 2 ) + IF( LTRANL ) THEN + T16( 1, 2 ) = TL( 2, 1 ) + T16( 2, 1 ) = TL( 1, 2 ) + T16( 3, 4 ) = TL( 2, 1 ) + T16( 4, 3 ) = TL( 1, 2 ) + ELSE + T16( 1, 2 ) = TL( 1, 2 ) + T16( 2, 1 ) = TL( 2, 1 ) + T16( 3, 4 ) = TL( 1, 2 ) + T16( 4, 3 ) = TL( 2, 1 ) + END IF + IF( LTRANR ) THEN + T16( 1, 3 ) = SGN*TR( 1, 2 ) + T16( 2, 4 ) = SGN*TR( 1, 2 ) + T16( 3, 1 ) = SGN*TR( 2, 1 ) + T16( 4, 2 ) = SGN*TR( 2, 1 ) + ELSE + T16( 1, 3 ) = SGN*TR( 2, 1 ) + T16( 2, 4 ) = SGN*TR( 2, 1 ) + T16( 3, 1 ) = SGN*TR( 1, 2 ) + T16( 4, 2 ) = SGN*TR( 1, 2 ) + END IF + BTMP( 1 ) = B( 1, 1 ) + BTMP( 2 ) = B( 2, 1 ) + BTMP( 3 ) = B( 1, 2 ) + BTMP( 4 ) = B( 2, 2 ) +* +* Perform elimination +* + DO 100 I = 1, 3 + XMAX = ZERO + DO 70 IP = I, 4 + DO 60 JP = I, 4 + IF( ABS( T16( IP, JP ) ).GE.XMAX ) THEN + XMAX = ABS( T16( IP, JP ) ) + IPSV = IP + JPSV = JP + END IF + 60 CONTINUE + 70 CONTINUE + IF( IPSV.NE.I ) THEN + CALL DSWAP( 4, T16( IPSV, 1 ), 4, T16( I, 1 ), 4 ) + TEMP = BTMP( I ) + BTMP( I ) = BTMP( IPSV ) + BTMP( IPSV ) = TEMP + END IF + IF( JPSV.NE.I ) + $ CALL DSWAP( 4, T16( 1, JPSV ), 1, T16( 1, I ), 1 ) + JPIV( I ) = JPSV + IF( ABS( T16( I, I ) ).LT.SMIN ) THEN + INFO = 1 + T16( I, I ) = SMIN + END IF + DO 90 J = I + 1, 4 + T16( J, I ) = T16( J, I ) / T16( I, I ) + BTMP( J ) = BTMP( J ) - T16( J, I )*BTMP( I ) + DO 80 K = I + 1, 4 + T16( J, K ) = T16( J, K ) - T16( J, I )*T16( I, K ) + 80 CONTINUE + 90 CONTINUE + 100 CONTINUE + IF( ABS( T16( 4, 4 ) ).LT.SMIN ) + $ T16( 4, 4 ) = SMIN + SCALE = ONE + IF( ( EIGHT*SMLNUM )*ABS( BTMP( 1 ) ).GT.ABS( T16( 1, 1 ) ) .OR. + $ ( EIGHT*SMLNUM )*ABS( BTMP( 2 ) ).GT.ABS( T16( 2, 2 ) ) .OR. + $ ( EIGHT*SMLNUM )*ABS( BTMP( 3 ) ).GT.ABS( T16( 3, 3 ) ) .OR. + $ ( EIGHT*SMLNUM )*ABS( BTMP( 4 ) ).GT.ABS( T16( 4, 4 ) ) ) THEN + SCALE = ( ONE / EIGHT ) / MAX( ABS( BTMP( 1 ) ), + $ ABS( BTMP( 2 ) ), ABS( BTMP( 3 ) ), ABS( BTMP( 4 ) ) ) + BTMP( 1 ) = BTMP( 1 )*SCALE + BTMP( 2 ) = BTMP( 2 )*SCALE + BTMP( 3 ) = BTMP( 3 )*SCALE + BTMP( 4 ) = BTMP( 4 )*SCALE + END IF + DO 120 I = 1, 4 + K = 5 - I + TEMP = ONE / T16( K, K ) + TMP( K ) = BTMP( K )*TEMP + DO 110 J = K + 1, 4 + TMP( K ) = TMP( K ) - ( TEMP*T16( K, J ) )*TMP( J ) + 110 CONTINUE + 120 CONTINUE + DO 130 I = 1, 3 + IF( JPIV( 4-I ).NE.4-I ) THEN + TEMP = TMP( 4-I ) + TMP( 4-I ) = TMP( JPIV( 4-I ) ) + TMP( JPIV( 4-I ) ) = TEMP + END IF + 130 CONTINUE + X( 1, 1 ) = TMP( 1 ) + X( 2, 1 ) = TMP( 2 ) + X( 1, 2 ) = TMP( 3 ) + X( 2, 2 ) = TMP( 4 ) + XNORM = MAX( ABS( TMP( 1 ) )+ABS( TMP( 3 ) ), + $ ABS( TMP( 2 ) )+ABS( TMP( 4 ) ) ) + RETURN +* +* End of DLASY2 +* + END diff --git a/costa/native/external/lapack/dlasyf.f b/costa/native/external/lapack/dlasyf.f new file mode 100644 index 000000000..e2dcb3a41 --- /dev/null +++ b/costa/native/external/lapack/dlasyf.f @@ -0,0 +1,588 @@ + SUBROUTINE DLASYF( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), W( LDW, * ) +* .. +* +* Purpose +* ======= +* +* DLASYF computes a partial factorization of a real symmetric matrix A +* using the Bunch-Kaufman diagonal pivoting method. The partial +* factorization has the form: +* +* A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: +* ( 0 U22 ) ( 0 D ) ( U12' U22' ) +* +* A = ( L11 0 ) ( D 0 ) ( L11' L21' ) if UPLO = 'L' +* ( L21 I ) ( 0 A22 ) ( 0 I ) +* +* where the order of D is at most NB. The actual order is returned in +* the argument KB, and is either NB or NB-1, or N if N <= NB. +* +* DLASYF is an auxiliary routine called by DSYTRF. It uses blocked code +* (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or +* A22 (if UPLO = 'L'). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is stored: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NB (input) INTEGER +* The maximum number of columns of the matrix A that should be +* factored. NB should be at least 2 to allow for 2-by-2 pivot +* blocks. +* +* KB (output) INTEGER +* The number of columns of A that were actually factored. +* KB is either NB-1 or NB, or N if N <= NB. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the symmetric matrix A. If UPLO = 'U', the leading +* n-by-n upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n-by-n lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* On exit, A contains details of the partial factorization. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (output) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D. +* If UPLO = 'U', only the last KB elements of IPIV are set; +* if UPLO = 'L', only the first KB elements are set. +* +* If IPIV(k) > 0, then rows and columns k and IPIV(k) were +* interchanged and D(k,k) is a 1-by-1 diagonal block. +* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +* +* W (workspace) DOUBLE PRECISION array, dimension (LDW,NB) +* +* LDW (input) INTEGER +* The leading dimension of the array W. LDW >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* > 0: if INFO = k, D(k,k) is exactly zero. The factorization +* has been completed, but the block diagonal matrix D is +* exactly singular. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0D+0, SEVTEN = 17.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER IMAX, J, JB, JJ, JMAX, JP, K, KK, KKW, KP, + $ KSTEP, KW + DOUBLE PRECISION ABSAKK, ALPHA, COLMAX, D11, D21, D22, R1, + $ ROWMAX, T +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + EXTERNAL LSAME, IDAMAX +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEMM, DGEMV, DSCAL, DSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Factorize the trailing columns of A using the upper triangle +* of A and working backwards, and compute the matrix W = U12*D +* for use in updating A11 +* +* K is the main loop index, decreasing from N in steps of 1 or 2 +* +* KW is the column of W which corresponds to column K of A +* + K = N + 10 CONTINUE + KW = NB + K - N +* +* Exit from loop +* + IF( ( K.LE.N-NB+1 .AND. NB.LT.N ) .OR. K.LT.1 ) + $ GO TO 30 +* +* Copy column K of A to column KW of W and update it +* + CALL DCOPY( K, A( 1, K ), 1, W( 1, KW ), 1 ) + IF( K.LT.N ) + $ CALL DGEMV( 'No transpose', K, N-K, -ONE, A( 1, K+1 ), LDA, + $ W( K, KW+1 ), LDW, ONE, W( 1, KW ), 1 ) +* + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( W( K, KW ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.GT.1 ) THEN + IMAX = IDAMAX( K-1, W( 1, KW ), 1 ) + COLMAX = ABS( W( IMAX, KW ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* Copy column IMAX to column KW-1 of W and update it +* + CALL DCOPY( IMAX, A( 1, IMAX ), 1, W( 1, KW-1 ), 1 ) + CALL DCOPY( K-IMAX, A( IMAX, IMAX+1 ), LDA, + $ W( IMAX+1, KW-1 ), 1 ) + IF( K.LT.N ) + $ CALL DGEMV( 'No transpose', K, N-K, -ONE, A( 1, K+1 ), + $ LDA, W( IMAX, KW+1 ), LDW, ONE, + $ W( 1, KW-1 ), 1 ) +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = IMAX + IDAMAX( K-IMAX, W( IMAX+1, KW-1 ), 1 ) + ROWMAX = ABS( W( JMAX, KW-1 ) ) + IF( IMAX.GT.1 ) THEN + JMAX = IDAMAX( IMAX-1, W( 1, KW-1 ), 1 ) + ROWMAX = MAX( ROWMAX, ABS( W( JMAX, KW-1 ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( W( IMAX, KW-1 ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX +* +* copy column KW-1 of W to column KW +* + CALL DCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 ) + ELSE +* +* interchange rows and columns K-1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K - KSTEP + 1 + KKW = NB + KK - N +* +* Updated column KP is already stored in column KKW of W +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP +* + A( KP, K ) = A( KK, K ) + CALL DCOPY( K-1-KP, A( KP+1, KK ), 1, A( KP, KP+1 ), + $ LDA ) + CALL DCOPY( KP, A( 1, KK ), 1, A( 1, KP ), 1 ) +* +* Interchange rows KK and KP in last KK columns of A and W +* + CALL DSWAP( N-KK+1, A( KK, KK ), LDA, A( KP, KK ), LDA ) + CALL DSWAP( N-KK+1, W( KK, KKW ), LDW, W( KP, KKW ), + $ LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column KW of W now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* +* Store U(k) in column k of A +* + CALL DCOPY( K, W( 1, KW ), 1, A( 1, K ), 1 ) + R1 = ONE / A( K, K ) + CALL DSCAL( K-1, R1, A( 1, K ), 1 ) + ELSE +* +* 2-by-2 pivot block D(k): columns KW and KW-1 of W now +* hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* + IF( K.GT.2 ) THEN +* +* Store U(k) and U(k-1) in columns k and k-1 of A +* + D21 = W( K-1, KW ) + D11 = W( K, KW ) / D21 + D22 = W( K-1, KW-1 ) / D21 + T = ONE / ( D11*D22-ONE ) + D21 = T / D21 + DO 20 J = 1, K - 2 + A( J, K-1 ) = D21*( D11*W( J, KW-1 )-W( J, KW ) ) + A( J, K ) = D21*( D22*W( J, KW )-W( J, KW-1 ) ) + 20 CONTINUE + END IF +* +* Copy D(k) to A +* + A( K-1, K-1 ) = W( K-1, KW-1 ) + A( K-1, K ) = W( K-1, KW ) + A( K, K ) = W( K, KW ) + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + 30 CONTINUE +* +* Update the upper triangle of A11 (= A(1:k,1:k)) as +* +* A11 := A11 - U12*D*U12' = A11 - U12*W' +* +* computing blocks of NB columns at a time +* + DO 50 J = ( ( K-1 ) / NB )*NB + 1, 1, -NB + JB = MIN( NB, K-J+1 ) +* +* Update the upper triangle of the diagonal block +* + DO 40 JJ = J, J + JB - 1 + CALL DGEMV( 'No transpose', JJ-J+1, N-K, -ONE, + $ A( J, K+1 ), LDA, W( JJ, KW+1 ), LDW, ONE, + $ A( J, JJ ), 1 ) + 40 CONTINUE +* +* Update the rectangular superdiagonal block +* + CALL DGEMM( 'No transpose', 'Transpose', J-1, JB, N-K, -ONE, + $ A( 1, K+1 ), LDA, W( J, KW+1 ), LDW, ONE, + $ A( 1, J ), LDA ) + 50 CONTINUE +* +* Put U12 in standard form by partially undoing the interchanges +* in columns k+1:n +* + J = K + 1 + 60 CONTINUE + JJ = J + JP = IPIV( J ) + IF( JP.LT.0 ) THEN + JP = -JP + J = J + 1 + END IF + J = J + 1 + IF( JP.NE.JJ .AND. J.LE.N ) + $ CALL DSWAP( N-J+1, A( JP, J ), LDA, A( JJ, J ), LDA ) + IF( J.LE.N ) + $ GO TO 60 +* +* Set KB to the number of columns factorized +* + KB = N - K +* + ELSE +* +* Factorize the leading columns of A using the lower triangle +* of A and working forwards, and compute the matrix W = L21*D +* for use in updating A22 +* +* K is the main loop index, increasing from 1 in steps of 1 or 2 +* + K = 1 + 70 CONTINUE +* +* Exit from loop +* + IF( ( K.GE.NB .AND. NB.LT.N ) .OR. K.GT.N ) + $ GO TO 90 +* +* Copy column K of A to column K of W and update it +* + CALL DCOPY( N-K+1, A( K, K ), 1, W( K, K ), 1 ) + CALL DGEMV( 'No transpose', N-K+1, K-1, -ONE, A( K, 1 ), LDA, + $ W( K, 1 ), LDW, ONE, W( K, K ), 1 ) +* + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( W( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.LT.N ) THEN + IMAX = K + IDAMAX( N-K, W( K+1, K ), 1 ) + COLMAX = ABS( W( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* Copy column IMAX to column K+1 of W and update it +* + CALL DCOPY( IMAX-K, A( IMAX, K ), LDA, W( K, K+1 ), 1 ) + CALL DCOPY( N-IMAX+1, A( IMAX, IMAX ), 1, W( IMAX, K+1 ), + $ 1 ) + CALL DGEMV( 'No transpose', N-K+1, K-1, -ONE, A( K, 1 ), + $ LDA, W( IMAX, 1 ), LDW, ONE, W( K, K+1 ), 1 ) +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = K - 1 + IDAMAX( IMAX-K, W( K, K+1 ), 1 ) + ROWMAX = ABS( W( JMAX, K+1 ) ) + IF( IMAX.LT.N ) THEN + JMAX = IMAX + IDAMAX( N-IMAX, W( IMAX+1, K+1 ), 1 ) + ROWMAX = MAX( ROWMAX, ABS( W( JMAX, K+1 ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( W( IMAX, K+1 ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX +* +* copy column K+1 of W to column K +* + CALL DCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 ) + ELSE +* +* interchange rows and columns K+1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K + KSTEP - 1 +* +* Updated column KP is already stored in column KK of W +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP +* + A( KP, K ) = A( KK, K ) + CALL DCOPY( KP-K-1, A( K+1, KK ), 1, A( KP, K+1 ), LDA ) + CALL DCOPY( N-KP+1, A( KP, KK ), 1, A( KP, KP ), 1 ) +* +* Interchange rows KK and KP in first KK columns of A and W +* + CALL DSWAP( KK, A( KK, 1 ), LDA, A( KP, 1 ), LDA ) + CALL DSWAP( KK, W( KK, 1 ), LDW, W( KP, 1 ), LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k of W now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* +* Store L(k) in column k of A +* + CALL DCOPY( N-K+1, W( K, K ), 1, A( K, K ), 1 ) + IF( K.LT.N ) THEN + R1 = ONE / A( K, K ) + CALL DSCAL( N-K, R1, A( K+1, K ), 1 ) + END IF + ELSE +* +* 2-by-2 pivot block D(k): columns k and k+1 of W now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* + IF( K.LT.N-1 ) THEN +* +* Store L(k) and L(k+1) in columns k and k+1 of A +* + D21 = W( K+1, K ) + D11 = W( K+1, K+1 ) / D21 + D22 = W( K, K ) / D21 + T = ONE / ( D11*D22-ONE ) + D21 = T / D21 + DO 80 J = K + 2, N + A( J, K ) = D21*( D11*W( J, K )-W( J, K+1 ) ) + A( J, K+1 ) = D21*( D22*W( J, K+1 )-W( J, K ) ) + 80 CONTINUE + END IF +* +* Copy D(k) to A +* + A( K, K ) = W( K, K ) + A( K+1, K ) = W( K+1, K ) + A( K+1, K+1 ) = W( K+1, K+1 ) + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 70 +* + 90 CONTINUE +* +* Update the lower triangle of A22 (= A(k:n,k:n)) as +* +* A22 := A22 - L21*D*L21' = A22 - L21*W' +* +* computing blocks of NB columns at a time +* + DO 110 J = K, N, NB + JB = MIN( NB, N-J+1 ) +* +* Update the lower triangle of the diagonal block +* + DO 100 JJ = J, J + JB - 1 + CALL DGEMV( 'No transpose', J+JB-JJ, K-1, -ONE, + $ A( JJ, 1 ), LDA, W( JJ, 1 ), LDW, ONE, + $ A( JJ, JJ ), 1 ) + 100 CONTINUE +* +* Update the rectangular subdiagonal block +* + IF( J+JB.LE.N ) + $ CALL DGEMM( 'No transpose', 'Transpose', N-J-JB+1, JB, + $ K-1, -ONE, A( J+JB, 1 ), LDA, W( J, 1 ), LDW, + $ ONE, A( J+JB, J ), LDA ) + 110 CONTINUE +* +* Put L21 in standard form by partially undoing the interchanges +* in columns 1:k-1 +* + J = K - 1 + 120 CONTINUE + JJ = J + JP = IPIV( J ) + IF( JP.LT.0 ) THEN + JP = -JP + J = J - 1 + END IF + J = J - 1 + IF( JP.NE.JJ .AND. J.GE.1 ) + $ CALL DSWAP( J, A( JP, 1 ), LDA, A( JJ, 1 ), LDA ) + IF( J.GE.1 ) + $ GO TO 120 +* +* Set KB to the number of columns factorized +* + KB = K - 1 +* + END IF + RETURN +* +* End of DLASYF +* + END diff --git a/costa/native/external/lapack/dlatbs.f b/costa/native/external/lapack/dlatbs.f new file mode 100644 index 000000000..fe7dd11cb --- /dev/null +++ b/costa/native/external/lapack/dlatbs.f @@ -0,0 +1,724 @@ + SUBROUTINE DLATBS( UPLO, TRANS, DIAG, NORMIN, N, KD, AB, LDAB, X, + $ SCALE, CNORM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1992 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORMIN, TRANS, UPLO + INTEGER INFO, KD, LDAB, N + DOUBLE PRECISION SCALE +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ), CNORM( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* DLATBS solves one of the triangular systems +* +* A *x = s*b or A'*x = s*b +* +* with scaling to prevent overflow, where A is an upper or lower +* triangular band matrix. Here A' denotes the transpose of A, x and b +* are n-element vectors, and s is a scaling factor, usually less than +* or equal to 1, chosen so that the components of x will be less than +* the overflow threshold. If the unscaled problem will not cause +* overflow, the Level 2 BLAS routine DTBSV is called. If the matrix A +* is singular (A(j,j) = 0 for some j), then s is set to 0 and a +* non-trivial solution to A*x = 0 is returned. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the matrix A is upper or lower triangular. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* TRANS (input) CHARACTER*1 +* Specifies the operation applied to A. +* = 'N': Solve A * x = s*b (No transpose) +* = 'T': Solve A'* x = s*b (Transpose) +* = 'C': Solve A'* x = s*b (Conjugate transpose = Transpose) +* +* DIAG (input) CHARACTER*1 +* Specifies whether or not the matrix A is unit triangular. +* = 'N': Non-unit triangular +* = 'U': Unit triangular +* +* NORMIN (input) CHARACTER*1 +* Specifies whether CNORM has been set or not. +* = 'Y': CNORM contains the column norms on entry +* = 'N': CNORM is not set on entry. On exit, the norms will +* be computed and stored in CNORM. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of subdiagonals or superdiagonals in the +* triangular matrix A. KD >= 0. +* +* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) +* The upper or lower triangular band matrix A, stored in the +* first KD+1 rows of the array. The j-th column of A is stored +* in the j-th column of the array AB as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* X (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the right hand side b of the triangular system. +* On exit, X is overwritten by the solution vector x. +* +* SCALE (output) DOUBLE PRECISION +* The scaling factor s for the triangular system +* A * x = s*b or A'* x = s*b. +* If SCALE = 0, the matrix A is singular or badly scaled, and +* the vector x is an exact or approximate solution to A*x = 0. +* +* CNORM (input or output) DOUBLE PRECISION array, dimension (N) +* +* If NORMIN = 'Y', CNORM is an input argument and CNORM(j) +* contains the norm of the off-diagonal part of the j-th column +* of A. If TRANS = 'N', CNORM(j) must be greater than or equal +* to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) +* must be greater than or equal to the 1-norm. +* +* If NORMIN = 'N', CNORM is an output argument and CNORM(j) +* returns the 1-norm of the offdiagonal part of the j-th column +* of A. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* Further Details +* ======= ======= +* +* A rough bound on x is computed; if that is less than overflow, DTBSV +* is called, otherwise, specific code is used which checks for possible +* overflow or divide-by-zero at every operation. +* +* A columnwise scheme is used for solving A*x = b. The basic algorithm +* if A is lower triangular is +* +* x[1:n] := b[1:n] +* for j = 1, ..., n +* x(j) := x(j) / A(j,j) +* x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] +* end +* +* Define bounds on the components of x after j iterations of the loop: +* M(j) = bound on x[1:j] +* G(j) = bound on x[j+1:n] +* Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. +* +* Then for iteration j+1 we have +* M(j+1) <= G(j) / | A(j+1,j+1) | +* G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | +* <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) +* +* where CNORM(j+1) is greater than or equal to the infinity-norm of +* column j+1 of A, not counting the diagonal. Hence +* +* G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) +* 1<=i<=j +* and +* +* |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) +* 1<=i< j +* +* Since |x(j)| <= M(j), we use the Level 2 BLAS routine DTBSV if the +* reciprocal of the largest M(j), j=1,..,n, is larger than +* max(underflow, 1/overflow). +* +* The bound on x(j) is also used to determine when a step in the +* columnwise method can be performed without fear of overflow. If +* the computed bound is greater than a large constant, x is scaled to +* prevent overflow, but if the bound overflows, x is set to 0, x(j) to +* 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. +* +* Similarly, a row-wise scheme is used to solve A'*x = b. The basic +* algorithm for A upper triangular is +* +* for j = 1, ..., n +* x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) +* end +* +* We simultaneously compute two bounds +* G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j +* M(j) = bound on x(i), 1<=i<=j +* +* The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we +* add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. +* Then the bound on x(j) is +* +* M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | +* +* <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) +* 1<=i<=j +* +* and we can safely call DTBSV if 1/M(n) and 1/G(n) are both greater +* than max(underflow, 1/overflow). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0D+0, HALF = 0.5D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + INTEGER I, IMAX, J, JFIRST, JINC, JLAST, JLEN, MAIND + DOUBLE PRECISION BIGNUM, GROW, REC, SMLNUM, SUMJ, TJJ, TJJS, + $ TMAX, TSCAL, USCAL, XBND, XJ, XMAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + DOUBLE PRECISION DASUM, DDOT, DLAMCH + EXTERNAL LSAME, IDAMAX, DASUM, DDOT, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DSCAL, DTBSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* +* Test the input parameters. +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT. + $ LSAME( NORMIN, 'N' ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( KD.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLATBS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine machine dependent parameters to control overflow. +* + SMLNUM = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' ) + BIGNUM = ONE / SMLNUM + SCALE = ONE +* + IF( LSAME( NORMIN, 'N' ) ) THEN +* +* Compute the 1-norm of each column, not including the diagonal. +* + IF( UPPER ) THEN +* +* A is upper triangular. +* + DO 10 J = 1, N + JLEN = MIN( KD, J-1 ) + CNORM( J ) = DASUM( JLEN, AB( KD+1-JLEN, J ), 1 ) + 10 CONTINUE + ELSE +* +* A is lower triangular. +* + DO 20 J = 1, N + JLEN = MIN( KD, N-J ) + IF( JLEN.GT.0 ) THEN + CNORM( J ) = DASUM( JLEN, AB( 2, J ), 1 ) + ELSE + CNORM( J ) = ZERO + END IF + 20 CONTINUE + END IF + END IF +* +* Scale the column norms by TSCAL if the maximum element in CNORM is +* greater than BIGNUM. +* + IMAX = IDAMAX( N, CNORM, 1 ) + TMAX = CNORM( IMAX ) + IF( TMAX.LE.BIGNUM ) THEN + TSCAL = ONE + ELSE + TSCAL = ONE / ( SMLNUM*TMAX ) + CALL DSCAL( N, TSCAL, CNORM, 1 ) + END IF +* +* Compute a bound on the computed solution vector to see if the +* Level 2 BLAS routine DTBSV can be used. +* + J = IDAMAX( N, X, 1 ) + XMAX = ABS( X( J ) ) + XBND = XMAX + IF( NOTRAN ) THEN +* +* Compute the growth in A * x = b. +* + IF( UPPER ) THEN + JFIRST = N + JLAST = 1 + JINC = -1 + MAIND = KD + 1 + ELSE + JFIRST = 1 + JLAST = N + JINC = 1 + MAIND = 1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 50 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, G(0) = max{x(i), i=1,...,n}. +* + GROW = ONE / MAX( XBND, SMLNUM ) + XBND = GROW + DO 30 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 50 +* +* M(j) = G(j-1) / abs(A(j,j)) +* + TJJ = ABS( AB( MAIND, J ) ) + XBND = MIN( XBND, MIN( ONE, TJJ )*GROW ) + IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN +* +* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) +* + GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) ) + ELSE +* +* G(j) could overflow, set GROW to 0. +* + GROW = ZERO + END IF + 30 CONTINUE + GROW = XBND + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) ) + DO 40 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 50 +* +* G(j) = G(j-1)*( 1 + CNORM(j) ) +* + GROW = GROW*( ONE / ( ONE+CNORM( J ) ) ) + 40 CONTINUE + END IF + 50 CONTINUE +* + ELSE +* +* Compute the growth in A' * x = b. +* + IF( UPPER ) THEN + JFIRST = 1 + JLAST = N + JINC = 1 + MAIND = KD + 1 + ELSE + JFIRST = N + JLAST = 1 + JINC = -1 + MAIND = 1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 80 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, M(0) = max{x(i), i=1,...,n}. +* + GROW = ONE / MAX( XBND, SMLNUM ) + XBND = GROW + DO 60 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 80 +* +* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) +* + XJ = ONE + CNORM( J ) + GROW = MIN( GROW, XBND / XJ ) +* +* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) +* + TJJ = ABS( AB( MAIND, J ) ) + IF( XJ.GT.TJJ ) + $ XBND = XBND*( TJJ / XJ ) + 60 CONTINUE + GROW = MIN( GROW, XBND ) + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) ) + DO 70 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 80 +* +* G(j) = ( 1 + CNORM(j) )*G(j-1) +* + XJ = ONE + CNORM( J ) + GROW = GROW / XJ + 70 CONTINUE + END IF + 80 CONTINUE + END IF +* + IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN +* +* Use the Level 2 BLAS solve if the reciprocal of the bound on +* elements of X is not too small. +* + CALL DTBSV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, X, 1 ) + ELSE +* +* Use a Level 1 BLAS solve, scaling intermediate results. +* + IF( XMAX.GT.BIGNUM ) THEN +* +* Scale X so that its components are less than or equal to +* BIGNUM in absolute value. +* + SCALE = BIGNUM / XMAX + CALL DSCAL( N, SCALE, X, 1 ) + XMAX = BIGNUM + END IF +* + IF( NOTRAN ) THEN +* +* Solve A * x = b +* + DO 110 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) / A(j,j), scaling x if necessary. +* + XJ = ABS( X( J ) ) + IF( NOUNIT ) THEN + TJJS = AB( MAIND, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 100 + END IF + TJJ = ABS( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by 1/b(j). +* + REC = ONE / XJ + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = X( J ) / TJJS + XJ = ABS( X( J ) ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM +* to avoid overflow when dividing by A(j,j). +* + REC = ( TJJ*BIGNUM ) / XJ + IF( CNORM( J ).GT.ONE ) THEN +* +* Scale by 1/CNORM(j) to avoid overflow when +* multiplying x(j) times column j. +* + REC = REC / CNORM( J ) + END IF + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = X( J ) / TJJS + XJ = ABS( X( J ) ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A*x = 0. +* + DO 90 I = 1, N + X( I ) = ZERO + 90 CONTINUE + X( J ) = ONE + XJ = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 100 CONTINUE +* +* Scale x if necessary to avoid overflow when adding a +* multiple of column j of A. +* + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN +* +* Scale x by 1/(2*abs(x(j))). +* + REC = REC*HALF + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + END IF + ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN +* +* Scale x by 1/2. +* + CALL DSCAL( N, HALF, X, 1 ) + SCALE = SCALE*HALF + END IF +* + IF( UPPER ) THEN + IF( J.GT.1 ) THEN +* +* Compute the update +* x(max(1,j-kd):j-1) := x(max(1,j-kd):j-1) - +* x(j)* A(max(1,j-kd):j-1,j) +* + JLEN = MIN( KD, J-1 ) + CALL DAXPY( JLEN, -X( J )*TSCAL, + $ AB( KD+1-JLEN, J ), 1, X( J-JLEN ), 1 ) + I = IDAMAX( J-1, X, 1 ) + XMAX = ABS( X( I ) ) + END IF + ELSE IF( J.LT.N ) THEN +* +* Compute the update +* x(j+1:min(j+kd,n)) := x(j+1:min(j+kd,n)) - +* x(j) * A(j+1:min(j+kd,n),j) +* + JLEN = MIN( KD, N-J ) + IF( JLEN.GT.0 ) + $ CALL DAXPY( JLEN, -X( J )*TSCAL, AB( 2, J ), 1, + $ X( J+1 ), 1 ) + I = J + IDAMAX( N-J, X( J+1 ), 1 ) + XMAX = ABS( X( I ) ) + END IF + 110 CONTINUE +* + ELSE +* +* Solve A' * x = b +* + DO 160 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = ABS( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = AB( MAIND, J )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = ABS( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = USCAL / TJJS + END IF + IF( REC.LT.ONE ) THEN + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + SUMJ = ZERO + IF( USCAL.EQ.ONE ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call DDOT to perform the dot product. +* + IF( UPPER ) THEN + JLEN = MIN( KD, J-1 ) + SUMJ = DDOT( JLEN, AB( KD+1-JLEN, J ), 1, + $ X( J-JLEN ), 1 ) + ELSE + JLEN = MIN( KD, N-J ) + IF( JLEN.GT.0 ) + $ SUMJ = DDOT( JLEN, AB( 2, J ), 1, X( J+1 ), 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + JLEN = MIN( KD, J-1 ) + DO 120 I = 1, JLEN + SUMJ = SUMJ + ( AB( KD+I-JLEN, J )*USCAL )* + $ X( J-JLEN-1+I ) + 120 CONTINUE + ELSE + JLEN = MIN( KD, N-J ) + DO 130 I = 1, JLEN + SUMJ = SUMJ + ( AB( I+1, J )*USCAL )*X( J+I ) + 130 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.TSCAL ) THEN +* +* Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - SUMJ + XJ = ABS( X( J ) ) + IF( NOUNIT ) THEN +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJS = AB( MAIND, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 150 + END IF + TJJ = ABS( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = X( J ) / TJJS + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = X( J ) / TJJS + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A'*x = 0. +* + DO 140 I = 1, N + X( I ) = ZERO + 140 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 150 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - sumj if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = X( J ) / TJJS - SUMJ + END IF + XMAX = MAX( XMAX, ABS( X( J ) ) ) + 160 CONTINUE + END IF + SCALE = SCALE / TSCAL + END IF +* +* Scale the column norms by 1/TSCAL for return. +* + IF( TSCAL.NE.ONE ) THEN + CALL DSCAL( N, ONE / TSCAL, CNORM, 1 ) + END IF +* + RETURN +* +* End of DLATBS +* + END diff --git a/costa/native/external/lapack/dlatdf.f b/costa/native/external/lapack/dlatdf.f new file mode 100644 index 000000000..728fd3a4b --- /dev/null +++ b/costa/native/external/lapack/dlatdf.f @@ -0,0 +1,238 @@ + SUBROUTINE DLATDF( IJOB, N, Z, LDZ, RHS, RDSUM, RDSCAL, IPIV, + $ JPIV ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER IJOB, LDZ, N + DOUBLE PRECISION RDSCAL, RDSUM +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), JPIV( * ) + DOUBLE PRECISION RHS( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* DLATDF uses the LU factorization of the n-by-n matrix Z computed by +* DGETC2 and computes a contribution to the reciprocal Dif-estimate +* by solving Z * x = b for x, and choosing the r.h.s. b such that +* the norm of x is as large as possible. On entry RHS = b holds the +* contribution from earlier solved sub-systems, and on return RHS = x. +* +* The factorization of Z returned by DGETC2 has the form Z = P*L*U*Q, +* where P and Q are permutation matrices. L is lower triangular with +* unit diagonal elements and U is upper triangular. +* +* Arguments +* ========= +* +* IJOB (input) INTEGER +* IJOB = 2: First compute an approximative null-vector e +* of Z using DGECON, e is normalized and solve for +* Zx = +-e - f with the sign giving the greater value +* of 2-norm(x). About 5 times as expensive as Default. +* IJOB .ne. 2: Local look ahead strategy where all entries of +* the r.h.s. b is choosen as either +1 or -1 (Default). +* +* N (input) INTEGER +* The number of columns of the matrix Z. +* +* Z (input) DOUBLE PRECISION array, dimension (LDZ, N) +* On entry, the LU part of the factorization of the n-by-n +* matrix Z computed by DGETC2: Z = P * L * U * Q +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDA >= max(1, N). +* +* RHS (input/output) DOUBLE PRECISION array, dimension N. +* On entry, RHS contains contributions from other subsystems. +* On exit, RHS contains the solution of the subsystem with +* entries acoording to the value of IJOB (see above). +* +* RDSUM (input/output) DOUBLE PRECISION +* On entry, the sum of squares of computed contributions to +* the Dif-estimate under computation by DTGSYL, where the +* scaling factor RDSCAL (see below) has been factored out. +* On exit, the corresponding sum of squares updated with the +* contributions from the current sub-system. +* If TRANS = 'T' RDSUM is not touched. +* NOTE: RDSUM only makes sense when DTGSY2 is called by STGSYL. +* +* RDSCAL (input/output) DOUBLE PRECISION +* On entry, scaling factor used to prevent overflow in RDSUM. +* On exit, RDSCAL is updated w.r.t. the current contributions +* in RDSUM. +* If TRANS = 'T', RDSCAL is not touched. +* NOTE: RDSCAL only makes sense when DTGSY2 is called by +* DTGSYL. +* +* IPIV (input) INTEGER array, dimension (N). +* The pivot indices; for 1 <= i <= N, row i of the +* matrix has been interchanged with row IPIV(i). +* +* JPIV (input) INTEGER array, dimension (N). +* The pivot indices; for 1 <= j <= N, column j of the +* matrix has been interchanged with column JPIV(j). +* +* Further Details +* =============== +* +* Based on contributions by +* Bo Kagstrom and Peter Poromaa, Department of Computing Science, +* Umea University, S-901 87 Umea, Sweden. +* +* This routine is a further developed implementation of algorithm +* BSOLVE in [1] using complete pivoting in the LU factorization. +* +* [1] Bo Kagstrom and Lars Westin, +* Generalized Schur Methods with Condition Estimators for +* Solving the Generalized Sylvester Equation, IEEE Transactions +* on Automatic Control, Vol. 34, No. 7, July 1989, pp 745-751. +* +* [2] Peter Poromaa, +* On Efficient and Robust Estimators for the Separation +* between two Regular Matrix Pairs with Applications in +* Condition Estimation. Report IMINF-95.05, Departement of +* Computing Science, Umea University, S-901 87 Umea, Sweden, 1995. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER MAXDIM + PARAMETER ( MAXDIM = 8 ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, J, K + DOUBLE PRECISION BM, BP, PMONE, SMINU, SPLUS, TEMP +* .. +* .. Local Arrays .. + INTEGER IWORK( MAXDIM ) + DOUBLE PRECISION WORK( 4*MAXDIM ), XM( MAXDIM ), XP( MAXDIM ) +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DGECON, DGESC2, DLASSQ, DLASWP, + $ DSCAL +* .. +* .. External Functions .. + DOUBLE PRECISION DASUM, DDOT + EXTERNAL DASUM, DDOT +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* + IF( IJOB.NE.2 ) THEN +* +* Apply permutations IPIV to RHS +* + CALL DLASWP( 1, RHS, LDZ, 1, N-1, IPIV, 1 ) +* +* Solve for L-part choosing RHS either to +1 or -1. +* + PMONE = -ONE +* + DO 10 J = 1, N - 1 + BP = RHS( J ) + ONE + BM = RHS( J ) - ONE + SPLUS = ONE +* +* Look-ahead for L-part RHS(1:N-1) = + or -1, SPLUS and +* SMIN computed more efficiently than in BSOLVE [1]. +* + SPLUS = SPLUS + DDOT( N-J, Z( J+1, J ), 1, Z( J+1, J ), 1 ) + SMINU = DDOT( N-J, Z( J+1, J ), 1, RHS( J+1 ), 1 ) + SPLUS = SPLUS*RHS( J ) + IF( SPLUS.GT.SMINU ) THEN + RHS( J ) = BP + ELSE IF( SMINU.GT.SPLUS ) THEN + RHS( J ) = BM + ELSE +* +* In this case the updating sums are equal and we can +* choose RHS(J) +1 or -1. The first time this happens +* we choose -1, thereafter +1. This is a simple way to +* get good estimates of matrices like Byers well-known +* example (see [1]). (Not done in BSOLVE.) +* + RHS( J ) = RHS( J ) + PMONE + PMONE = ONE + END IF +* +* Compute the remaining r.h.s. +* + TEMP = -RHS( J ) + CALL DAXPY( N-J, TEMP, Z( J+1, J ), 1, RHS( J+1 ), 1 ) +* + 10 CONTINUE +* +* Solve for U-part, look-ahead for RHS(N) = +-1. This is not done +* in BSOLVE and will hopefully give us a better estimate because +* any ill-conditioning of the original matrix is transfered to U +* and not to L. U(N, N) is an approximation to sigma_min(LU). +* + CALL DCOPY( N-1, RHS, 1, XP, 1 ) + XP( N ) = RHS( N ) + ONE + RHS( N ) = RHS( N ) - ONE + SPLUS = ZERO + SMINU = ZERO + DO 30 I = N, 1, -1 + TEMP = ONE / Z( I, I ) + XP( I ) = XP( I )*TEMP + RHS( I ) = RHS( I )*TEMP + DO 20 K = I + 1, N + XP( I ) = XP( I ) - XP( K )*( Z( I, K )*TEMP ) + RHS( I ) = RHS( I ) - RHS( K )*( Z( I, K )*TEMP ) + 20 CONTINUE + SPLUS = SPLUS + ABS( XP( I ) ) + SMINU = SMINU + ABS( RHS( I ) ) + 30 CONTINUE + IF( SPLUS.GT.SMINU ) + $ CALL DCOPY( N, XP, 1, RHS, 1 ) +* +* Apply the permutations JPIV to the computed solution (RHS) +* + CALL DLASWP( 1, RHS, LDZ, 1, N-1, JPIV, -1 ) +* +* Compute the sum of squares +* + CALL DLASSQ( N, RHS, 1, RDSCAL, RDSUM ) +* + ELSE +* +* IJOB = 2, Compute approximate nullvector XM of Z +* + CALL DGECON( 'I', N, Z, LDZ, ONE, TEMP, WORK, IWORK, INFO ) + CALL DCOPY( N, WORK( N+1 ), 1, XM, 1 ) +* +* Compute RHS +* + CALL DLASWP( 1, XM, LDZ, 1, N-1, IPIV, -1 ) + TEMP = ONE / SQRT( DDOT( N, XM, 1, XM, 1 ) ) + CALL DSCAL( N, TEMP, XM, 1 ) + CALL DCOPY( N, XM, 1, XP, 1 ) + CALL DAXPY( N, ONE, RHS, 1, XP, 1 ) + CALL DAXPY( N, -ONE, XM, 1, RHS, 1 ) + CALL DGESC2( N, Z, LDZ, RHS, IPIV, JPIV, TEMP ) + CALL DGESC2( N, Z, LDZ, XP, IPIV, JPIV, TEMP ) + IF( DASUM( N, XP, 1 ).GT.DASUM( N, RHS, 1 ) ) + $ CALL DCOPY( N, XP, 1, RHS, 1 ) +* +* Compute the sum of squares +* + CALL DLASSQ( N, RHS, 1, RDSCAL, RDSUM ) +* + END IF +* + RETURN +* +* End of DLATDF +* + END diff --git a/costa/native/external/lapack/dlatps.f b/costa/native/external/lapack/dlatps.f new file mode 100644 index 000000000..32b837bfa --- /dev/null +++ b/costa/native/external/lapack/dlatps.f @@ -0,0 +1,713 @@ + SUBROUTINE DLATPS( UPLO, TRANS, DIAG, NORMIN, N, AP, X, SCALE, + $ CNORM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1992 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORMIN, TRANS, UPLO + INTEGER INFO, N + DOUBLE PRECISION SCALE +* .. +* .. Array Arguments .. + DOUBLE PRECISION AP( * ), CNORM( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* DLATPS solves one of the triangular systems +* +* A *x = s*b or A'*x = s*b +* +* with scaling to prevent overflow, where A is an upper or lower +* triangular matrix stored in packed form. Here A' denotes the +* transpose of A, x and b are n-element vectors, and s is a scaling +* factor, usually less than or equal to 1, chosen so that the +* components of x will be less than the overflow threshold. If the +* unscaled problem will not cause overflow, the Level 2 BLAS routine +* DTPSV is called. If the matrix A is singular (A(j,j) = 0 for some j), +* then s is set to 0 and a non-trivial solution to A*x = 0 is returned. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the matrix A is upper or lower triangular. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* TRANS (input) CHARACTER*1 +* Specifies the operation applied to A. +* = 'N': Solve A * x = s*b (No transpose) +* = 'T': Solve A'* x = s*b (Transpose) +* = 'C': Solve A'* x = s*b (Conjugate transpose = Transpose) +* +* DIAG (input) CHARACTER*1 +* Specifies whether or not the matrix A is unit triangular. +* = 'N': Non-unit triangular +* = 'U': Unit triangular +* +* NORMIN (input) CHARACTER*1 +* Specifies whether CNORM has been set or not. +* = 'Y': CNORM contains the column norms on entry +* = 'N': CNORM is not set on entry. On exit, the norms will +* be computed and stored in CNORM. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) +* The upper or lower triangular matrix A, packed columnwise in +* a linear array. The j-th column of A is stored in the array +* AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* +* X (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the right hand side b of the triangular system. +* On exit, X is overwritten by the solution vector x. +* +* SCALE (output) DOUBLE PRECISION +* The scaling factor s for the triangular system +* A * x = s*b or A'* x = s*b. +* If SCALE = 0, the matrix A is singular or badly scaled, and +* the vector x is an exact or approximate solution to A*x = 0. +* +* CNORM (input or output) DOUBLE PRECISION array, dimension (N) +* +* If NORMIN = 'Y', CNORM is an input argument and CNORM(j) +* contains the norm of the off-diagonal part of the j-th column +* of A. If TRANS = 'N', CNORM(j) must be greater than or equal +* to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) +* must be greater than or equal to the 1-norm. +* +* If NORMIN = 'N', CNORM is an output argument and CNORM(j) +* returns the 1-norm of the offdiagonal part of the j-th column +* of A. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* Further Details +* ======= ======= +* +* A rough bound on x is computed; if that is less than overflow, DTPSV +* is called, otherwise, specific code is used which checks for possible +* overflow or divide-by-zero at every operation. +* +* A columnwise scheme is used for solving A*x = b. The basic algorithm +* if A is lower triangular is +* +* x[1:n] := b[1:n] +* for j = 1, ..., n +* x(j) := x(j) / A(j,j) +* x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] +* end +* +* Define bounds on the components of x after j iterations of the loop: +* M(j) = bound on x[1:j] +* G(j) = bound on x[j+1:n] +* Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. +* +* Then for iteration j+1 we have +* M(j+1) <= G(j) / | A(j+1,j+1) | +* G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | +* <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) +* +* where CNORM(j+1) is greater than or equal to the infinity-norm of +* column j+1 of A, not counting the diagonal. Hence +* +* G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) +* 1<=i<=j +* and +* +* |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) +* 1<=i< j +* +* Since |x(j)| <= M(j), we use the Level 2 BLAS routine DTPSV if the +* reciprocal of the largest M(j), j=1,..,n, is larger than +* max(underflow, 1/overflow). +* +* The bound on x(j) is also used to determine when a step in the +* columnwise method can be performed without fear of overflow. If +* the computed bound is greater than a large constant, x is scaled to +* prevent overflow, but if the bound overflows, x is set to 0, x(j) to +* 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. +* +* Similarly, a row-wise scheme is used to solve A'*x = b. The basic +* algorithm for A upper triangular is +* +* for j = 1, ..., n +* x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) +* end +* +* We simultaneously compute two bounds +* G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j +* M(j) = bound on x(i), 1<=i<=j +* +* The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we +* add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. +* Then the bound on x(j) is +* +* M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | +* +* <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) +* 1<=i<=j +* +* and we can safely call DTPSV if 1/M(n) and 1/G(n) are both greater +* than max(underflow, 1/overflow). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0D+0, HALF = 0.5D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + INTEGER I, IMAX, IP, J, JFIRST, JINC, JLAST, JLEN + DOUBLE PRECISION BIGNUM, GROW, REC, SMLNUM, SUMJ, TJJ, TJJS, + $ TMAX, TSCAL, USCAL, XBND, XJ, XMAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + DOUBLE PRECISION DASUM, DDOT, DLAMCH + EXTERNAL LSAME, IDAMAX, DASUM, DDOT, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DSCAL, DTPSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* +* Test the input parameters. +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT. + $ LSAME( NORMIN, 'N' ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLATPS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine machine dependent parameters to control overflow. +* + SMLNUM = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' ) + BIGNUM = ONE / SMLNUM + SCALE = ONE +* + IF( LSAME( NORMIN, 'N' ) ) THEN +* +* Compute the 1-norm of each column, not including the diagonal. +* + IF( UPPER ) THEN +* +* A is upper triangular. +* + IP = 1 + DO 10 J = 1, N + CNORM( J ) = DASUM( J-1, AP( IP ), 1 ) + IP = IP + J + 10 CONTINUE + ELSE +* +* A is lower triangular. +* + IP = 1 + DO 20 J = 1, N - 1 + CNORM( J ) = DASUM( N-J, AP( IP+1 ), 1 ) + IP = IP + N - J + 1 + 20 CONTINUE + CNORM( N ) = ZERO + END IF + END IF +* +* Scale the column norms by TSCAL if the maximum element in CNORM is +* greater than BIGNUM. +* + IMAX = IDAMAX( N, CNORM, 1 ) + TMAX = CNORM( IMAX ) + IF( TMAX.LE.BIGNUM ) THEN + TSCAL = ONE + ELSE + TSCAL = ONE / ( SMLNUM*TMAX ) + CALL DSCAL( N, TSCAL, CNORM, 1 ) + END IF +* +* Compute a bound on the computed solution vector to see if the +* Level 2 BLAS routine DTPSV can be used. +* + J = IDAMAX( N, X, 1 ) + XMAX = ABS( X( J ) ) + XBND = XMAX + IF( NOTRAN ) THEN +* +* Compute the growth in A * x = b. +* + IF( UPPER ) THEN + JFIRST = N + JLAST = 1 + JINC = -1 + ELSE + JFIRST = 1 + JLAST = N + JINC = 1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 50 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, G(0) = max{x(i), i=1,...,n}. +* + GROW = ONE / MAX( XBND, SMLNUM ) + XBND = GROW + IP = JFIRST*( JFIRST+1 ) / 2 + JLEN = N + DO 30 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 50 +* +* M(j) = G(j-1) / abs(A(j,j)) +* + TJJ = ABS( AP( IP ) ) + XBND = MIN( XBND, MIN( ONE, TJJ )*GROW ) + IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN +* +* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) +* + GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) ) + ELSE +* +* G(j) could overflow, set GROW to 0. +* + GROW = ZERO + END IF + IP = IP + JINC*JLEN + JLEN = JLEN - 1 + 30 CONTINUE + GROW = XBND + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) ) + DO 40 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 50 +* +* G(j) = G(j-1)*( 1 + CNORM(j) ) +* + GROW = GROW*( ONE / ( ONE+CNORM( J ) ) ) + 40 CONTINUE + END IF + 50 CONTINUE +* + ELSE +* +* Compute the growth in A' * x = b. +* + IF( UPPER ) THEN + JFIRST = 1 + JLAST = N + JINC = 1 + ELSE + JFIRST = N + JLAST = 1 + JINC = -1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 80 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, M(0) = max{x(i), i=1,...,n}. +* + GROW = ONE / MAX( XBND, SMLNUM ) + XBND = GROW + IP = JFIRST*( JFIRST+1 ) / 2 + JLEN = 1 + DO 60 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 80 +* +* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) +* + XJ = ONE + CNORM( J ) + GROW = MIN( GROW, XBND / XJ ) +* +* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) +* + TJJ = ABS( AP( IP ) ) + IF( XJ.GT.TJJ ) + $ XBND = XBND*( TJJ / XJ ) + JLEN = JLEN + 1 + IP = IP + JINC*JLEN + 60 CONTINUE + GROW = MIN( GROW, XBND ) + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) ) + DO 70 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 80 +* +* G(j) = ( 1 + CNORM(j) )*G(j-1) +* + XJ = ONE + CNORM( J ) + GROW = GROW / XJ + 70 CONTINUE + END IF + 80 CONTINUE + END IF +* + IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN +* +* Use the Level 2 BLAS solve if the reciprocal of the bound on +* elements of X is not too small. +* + CALL DTPSV( UPLO, TRANS, DIAG, N, AP, X, 1 ) + ELSE +* +* Use a Level 1 BLAS solve, scaling intermediate results. +* + IF( XMAX.GT.BIGNUM ) THEN +* +* Scale X so that its components are less than or equal to +* BIGNUM in absolute value. +* + SCALE = BIGNUM / XMAX + CALL DSCAL( N, SCALE, X, 1 ) + XMAX = BIGNUM + END IF +* + IF( NOTRAN ) THEN +* +* Solve A * x = b +* + IP = JFIRST*( JFIRST+1 ) / 2 + DO 110 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) / A(j,j), scaling x if necessary. +* + XJ = ABS( X( J ) ) + IF( NOUNIT ) THEN + TJJS = AP( IP )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 100 + END IF + TJJ = ABS( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by 1/b(j). +* + REC = ONE / XJ + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = X( J ) / TJJS + XJ = ABS( X( J ) ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM +* to avoid overflow when dividing by A(j,j). +* + REC = ( TJJ*BIGNUM ) / XJ + IF( CNORM( J ).GT.ONE ) THEN +* +* Scale by 1/CNORM(j) to avoid overflow when +* multiplying x(j) times column j. +* + REC = REC / CNORM( J ) + END IF + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = X( J ) / TJJS + XJ = ABS( X( J ) ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A*x = 0. +* + DO 90 I = 1, N + X( I ) = ZERO + 90 CONTINUE + X( J ) = ONE + XJ = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 100 CONTINUE +* +* Scale x if necessary to avoid overflow when adding a +* multiple of column j of A. +* + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN +* +* Scale x by 1/(2*abs(x(j))). +* + REC = REC*HALF + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + END IF + ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN +* +* Scale x by 1/2. +* + CALL DSCAL( N, HALF, X, 1 ) + SCALE = SCALE*HALF + END IF +* + IF( UPPER ) THEN + IF( J.GT.1 ) THEN +* +* Compute the update +* x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) +* + CALL DAXPY( J-1, -X( J )*TSCAL, AP( IP-J+1 ), 1, X, + $ 1 ) + I = IDAMAX( J-1, X, 1 ) + XMAX = ABS( X( I ) ) + END IF + IP = IP - J + ELSE + IF( J.LT.N ) THEN +* +* Compute the update +* x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) +* + CALL DAXPY( N-J, -X( J )*TSCAL, AP( IP+1 ), 1, + $ X( J+1 ), 1 ) + I = J + IDAMAX( N-J, X( J+1 ), 1 ) + XMAX = ABS( X( I ) ) + END IF + IP = IP + N - J + 1 + END IF + 110 CONTINUE +* + ELSE +* +* Solve A' * x = b +* + IP = JFIRST*( JFIRST+1 ) / 2 + JLEN = 1 + DO 160 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = ABS( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = AP( IP )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = ABS( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = USCAL / TJJS + END IF + IF( REC.LT.ONE ) THEN + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + SUMJ = ZERO + IF( USCAL.EQ.ONE ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call DDOT to perform the dot product. +* + IF( UPPER ) THEN + SUMJ = DDOT( J-1, AP( IP-J+1 ), 1, X, 1 ) + ELSE IF( J.LT.N ) THEN + SUMJ = DDOT( N-J, AP( IP+1 ), 1, X( J+1 ), 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + DO 120 I = 1, J - 1 + SUMJ = SUMJ + ( AP( IP-J+I )*USCAL )*X( I ) + 120 CONTINUE + ELSE IF( J.LT.N ) THEN + DO 130 I = 1, N - J + SUMJ = SUMJ + ( AP( IP+I )*USCAL )*X( J+I ) + 130 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.TSCAL ) THEN +* +* Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - SUMJ + XJ = ABS( X( J ) ) + IF( NOUNIT ) THEN +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJS = AP( IP )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 150 + END IF + TJJ = ABS( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = X( J ) / TJJS + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = X( J ) / TJJS + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A'*x = 0. +* + DO 140 I = 1, N + X( I ) = ZERO + 140 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 150 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - sumj if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = X( J ) / TJJS - SUMJ + END IF + XMAX = MAX( XMAX, ABS( X( J ) ) ) + JLEN = JLEN + 1 + IP = IP + JINC*JLEN + 160 CONTINUE + END IF + SCALE = SCALE / TSCAL + END IF +* +* Scale the column norms by 1/TSCAL for return. +* + IF( TSCAL.NE.ONE ) THEN + CALL DSCAL( N, ONE / TSCAL, CNORM, 1 ) + END IF +* + RETURN +* +* End of DLATPS +* + END diff --git a/costa/native/external/lapack/dlatrd.f b/costa/native/external/lapack/dlatrd.f new file mode 100644 index 000000000..bf613b544 --- /dev/null +++ b/costa/native/external/lapack/dlatrd.f @@ -0,0 +1,259 @@ + SUBROUTINE DLATRD( UPLO, N, NB, A, LDA, E, TAU, W, LDW ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER LDA, LDW, N, NB +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), E( * ), TAU( * ), W( LDW, * ) +* .. +* +* Purpose +* ======= +* +* DLATRD reduces NB rows and columns of a real symmetric matrix A to +* symmetric tridiagonal form by an orthogonal similarity +* transformation Q' * A * Q, and returns the matrices V and W which are +* needed to apply the transformation to the unreduced part of A. +* +* If UPLO = 'U', DLATRD reduces the last NB rows and columns of a +* matrix, of which the upper triangle is supplied; +* if UPLO = 'L', DLATRD reduces the first NB rows and columns of a +* matrix, of which the lower triangle is supplied. +* +* This is an auxiliary routine called by DSYTRD. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is stored: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. +* +* NB (input) INTEGER +* The number of rows and columns to be reduced. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the symmetric matrix A. If UPLO = 'U', the leading +* n-by-n upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n-by-n lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* On exit: +* if UPLO = 'U', the last NB columns have been reduced to +* tridiagonal form, with the diagonal elements overwriting +* the diagonal elements of A; the elements above the diagonal +* with the array TAU, represent the orthogonal matrix Q as a +* product of elementary reflectors; +* if UPLO = 'L', the first NB columns have been reduced to +* tridiagonal form, with the diagonal elements overwriting +* the diagonal elements of A; the elements below the diagonal +* with the array TAU, represent the orthogonal matrix Q as a +* product of elementary reflectors. +* See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= (1,N). +* +* E (output) DOUBLE PRECISION array, dimension (N-1) +* If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal +* elements of the last NB columns of the reduced matrix; +* if UPLO = 'L', E(1:nb) contains the subdiagonal elements of +* the first NB columns of the reduced matrix. +* +* TAU (output) DOUBLE PRECISION array, dimension (N-1) +* The scalar factors of the elementary reflectors, stored in +* TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'. +* See Further Details. +* +* W (output) DOUBLE PRECISION array, dimension (LDW,NB) +* The n-by-nb matrix W required to update the unreduced part +* of A. +* +* LDW (input) INTEGER +* The leading dimension of the array W. LDW >= max(1,N). +* +* Further Details +* =============== +* +* If UPLO = 'U', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(n) H(n-1) . . . H(n-nb+1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i), +* and tau in TAU(i-1). +* +* If UPLO = 'L', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(1) H(2) . . . H(nb). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), +* and tau in TAU(i). +* +* The elements of the vectors v together form the n-by-nb matrix V +* which is needed, with W, to apply the transformation to the unreduced +* part of the matrix, using a symmetric rank-2k update of the form: +* A := A - V*W' - W*V'. +* +* The contents of A on exit are illustrated by the following examples +* with n = 5 and nb = 2: +* +* if UPLO = 'U': if UPLO = 'L': +* +* ( a a a v4 v5 ) ( d ) +* ( a a v4 v5 ) ( 1 d ) +* ( a 1 v5 ) ( v1 1 a ) +* ( d 1 ) ( v1 v2 a a ) +* ( d ) ( v1 v2 a a a ) +* +* where d denotes a diagonal element of the reduced matrix, a denotes +* an element of the original matrix that is unchanged, and vi denotes +* an element of the vector defining H(i). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, HALF + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, HALF = 0.5D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IW + DOUBLE PRECISION ALPHA +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DGEMV, DLARFG, DSCAL, DSYMV +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DDOT + EXTERNAL LSAME, DDOT +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Reduce last NB columns of upper triangle +* + DO 10 I = N, N - NB + 1, -1 + IW = I - N + NB + IF( I.LT.N ) THEN +* +* Update A(1:i,i) +* + CALL DGEMV( 'No transpose', I, N-I, -ONE, A( 1, I+1 ), + $ LDA, W( I, IW+1 ), LDW, ONE, A( 1, I ), 1 ) + CALL DGEMV( 'No transpose', I, N-I, -ONE, W( 1, IW+1 ), + $ LDW, A( I, I+1 ), LDA, ONE, A( 1, I ), 1 ) + END IF + IF( I.GT.1 ) THEN +* +* Generate elementary reflector H(i) to annihilate +* A(1:i-2,i) +* + CALL DLARFG( I-1, A( I-1, I ), A( 1, I ), 1, TAU( I-1 ) ) + E( I-1 ) = A( I-1, I ) + A( I-1, I ) = ONE +* +* Compute W(1:i-1,i) +* + CALL DSYMV( 'Upper', I-1, ONE, A, LDA, A( 1, I ), 1, + $ ZERO, W( 1, IW ), 1 ) + IF( I.LT.N ) THEN + CALL DGEMV( 'Transpose', I-1, N-I, ONE, W( 1, IW+1 ), + $ LDW, A( 1, I ), 1, ZERO, W( I+1, IW ), 1 ) + CALL DGEMV( 'No transpose', I-1, N-I, -ONE, + $ A( 1, I+1 ), LDA, W( I+1, IW ), 1, ONE, + $ W( 1, IW ), 1 ) + CALL DGEMV( 'Transpose', I-1, N-I, ONE, A( 1, I+1 ), + $ LDA, A( 1, I ), 1, ZERO, W( I+1, IW ), 1 ) + CALL DGEMV( 'No transpose', I-1, N-I, -ONE, + $ W( 1, IW+1 ), LDW, W( I+1, IW ), 1, ONE, + $ W( 1, IW ), 1 ) + END IF + CALL DSCAL( I-1, TAU( I-1 ), W( 1, IW ), 1 ) + ALPHA = -HALF*TAU( I-1 )*DDOT( I-1, W( 1, IW ), 1, + $ A( 1, I ), 1 ) + CALL DAXPY( I-1, ALPHA, A( 1, I ), 1, W( 1, IW ), 1 ) + END IF +* + 10 CONTINUE + ELSE +* +* Reduce first NB columns of lower triangle +* + DO 20 I = 1, NB +* +* Update A(i:n,i) +* + CALL DGEMV( 'No transpose', N-I+1, I-1, -ONE, A( I, 1 ), + $ LDA, W( I, 1 ), LDW, ONE, A( I, I ), 1 ) + CALL DGEMV( 'No transpose', N-I+1, I-1, -ONE, W( I, 1 ), + $ LDW, A( I, 1 ), LDA, ONE, A( I, I ), 1 ) + IF( I.LT.N ) THEN +* +* Generate elementary reflector H(i) to annihilate +* A(i+2:n,i) +* + CALL DLARFG( N-I, A( I+1, I ), A( MIN( I+2, N ), I ), 1, + $ TAU( I ) ) + E( I ) = A( I+1, I ) + A( I+1, I ) = ONE +* +* Compute W(i+1:n,i) +* + CALL DSYMV( 'Lower', N-I, ONE, A( I+1, I+1 ), LDA, + $ A( I+1, I ), 1, ZERO, W( I+1, I ), 1 ) + CALL DGEMV( 'Transpose', N-I, I-1, ONE, W( I+1, 1 ), LDW, + $ A( I+1, I ), 1, ZERO, W( 1, I ), 1 ) + CALL DGEMV( 'No transpose', N-I, I-1, -ONE, A( I+1, 1 ), + $ LDA, W( 1, I ), 1, ONE, W( I+1, I ), 1 ) + CALL DGEMV( 'Transpose', N-I, I-1, ONE, A( I+1, 1 ), LDA, + $ A( I+1, I ), 1, ZERO, W( 1, I ), 1 ) + CALL DGEMV( 'No transpose', N-I, I-1, -ONE, W( I+1, 1 ), + $ LDW, W( 1, I ), 1, ONE, W( I+1, I ), 1 ) + CALL DSCAL( N-I, TAU( I ), W( I+1, I ), 1 ) + ALPHA = -HALF*TAU( I )*DDOT( N-I, W( I+1, I ), 1, + $ A( I+1, I ), 1 ) + CALL DAXPY( N-I, ALPHA, A( I+1, I ), 1, W( I+1, I ), 1 ) + END IF +* + 20 CONTINUE + END IF +* + RETURN +* +* End of DLATRD +* + END diff --git a/costa/native/external/lapack/dlatrs.f b/costa/native/external/lapack/dlatrs.f new file mode 100644 index 000000000..591c966d2 --- /dev/null +++ b/costa/native/external/lapack/dlatrs.f @@ -0,0 +1,702 @@ + SUBROUTINE DLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, LDA, X, SCALE, + $ CNORM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1992 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORMIN, TRANS, UPLO + INTEGER INFO, LDA, N + DOUBLE PRECISION SCALE +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), CNORM( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* DLATRS solves one of the triangular systems +* +* A *x = s*b or A'*x = s*b +* +* with scaling to prevent overflow. Here A is an upper or lower +* triangular matrix, A' denotes the transpose of A, x and b are +* n-element vectors, and s is a scaling factor, usually less than +* or equal to 1, chosen so that the components of x will be less than +* the overflow threshold. If the unscaled problem will not cause +* overflow, the Level 2 BLAS routine DTRSV is called. If the matrix A +* is singular (A(j,j) = 0 for some j), then s is set to 0 and a +* non-trivial solution to A*x = 0 is returned. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the matrix A is upper or lower triangular. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* TRANS (input) CHARACTER*1 +* Specifies the operation applied to A. +* = 'N': Solve A * x = s*b (No transpose) +* = 'T': Solve A'* x = s*b (Transpose) +* = 'C': Solve A'* x = s*b (Conjugate transpose = Transpose) +* +* DIAG (input) CHARACTER*1 +* Specifies whether or not the matrix A is unit triangular. +* = 'N': Non-unit triangular +* = 'U': Unit triangular +* +* NORMIN (input) CHARACTER*1 +* Specifies whether CNORM has been set or not. +* = 'Y': CNORM contains the column norms on entry +* = 'N': CNORM is not set on entry. On exit, the norms will +* be computed and stored in CNORM. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input) DOUBLE PRECISION array, dimension (LDA,N) +* The triangular matrix A. If UPLO = 'U', the leading n by n +* upper triangular part of the array A contains the upper +* triangular matrix, and the strictly lower triangular part of +* A is not referenced. If UPLO = 'L', the leading n by n lower +* triangular part of the array A contains the lower triangular +* matrix, and the strictly upper triangular part of A is not +* referenced. If DIAG = 'U', the diagonal elements of A are +* also not referenced and are assumed to be 1. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max (1,N). +* +* X (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the right hand side b of the triangular system. +* On exit, X is overwritten by the solution vector x. +* +* SCALE (output) DOUBLE PRECISION +* The scaling factor s for the triangular system +* A * x = s*b or A'* x = s*b. +* If SCALE = 0, the matrix A is singular or badly scaled, and +* the vector x is an exact or approximate solution to A*x = 0. +* +* CNORM (input or output) DOUBLE PRECISION array, dimension (N) +* +* If NORMIN = 'Y', CNORM is an input argument and CNORM(j) +* contains the norm of the off-diagonal part of the j-th column +* of A. If TRANS = 'N', CNORM(j) must be greater than or equal +* to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) +* must be greater than or equal to the 1-norm. +* +* If NORMIN = 'N', CNORM is an output argument and CNORM(j) +* returns the 1-norm of the offdiagonal part of the j-th column +* of A. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* Further Details +* ======= ======= +* +* A rough bound on x is computed; if that is less than overflow, DTRSV +* is called, otherwise, specific code is used which checks for possible +* overflow or divide-by-zero at every operation. +* +* A columnwise scheme is used for solving A*x = b. The basic algorithm +* if A is lower triangular is +* +* x[1:n] := b[1:n] +* for j = 1, ..., n +* x(j) := x(j) / A(j,j) +* x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] +* end +* +* Define bounds on the components of x after j iterations of the loop: +* M(j) = bound on x[1:j] +* G(j) = bound on x[j+1:n] +* Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. +* +* Then for iteration j+1 we have +* M(j+1) <= G(j) / | A(j+1,j+1) | +* G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | +* <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) +* +* where CNORM(j+1) is greater than or equal to the infinity-norm of +* column j+1 of A, not counting the diagonal. Hence +* +* G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) +* 1<=i<=j +* and +* +* |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) +* 1<=i< j +* +* Since |x(j)| <= M(j), we use the Level 2 BLAS routine DTRSV if the +* reciprocal of the largest M(j), j=1,..,n, is larger than +* max(underflow, 1/overflow). +* +* The bound on x(j) is also used to determine when a step in the +* columnwise method can be performed without fear of overflow. If +* the computed bound is greater than a large constant, x is scaled to +* prevent overflow, but if the bound overflows, x is set to 0, x(j) to +* 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. +* +* Similarly, a row-wise scheme is used to solve A'*x = b. The basic +* algorithm for A upper triangular is +* +* for j = 1, ..., n +* x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) +* end +* +* We simultaneously compute two bounds +* G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j +* M(j) = bound on x(i), 1<=i<=j +* +* The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we +* add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. +* Then the bound on x(j) is +* +* M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | +* +* <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) +* 1<=i<=j +* +* and we can safely call DTRSV if 1/M(n) and 1/G(n) are both greater +* than max(underflow, 1/overflow). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0D+0, HALF = 0.5D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + INTEGER I, IMAX, J, JFIRST, JINC, JLAST + DOUBLE PRECISION BIGNUM, GROW, REC, SMLNUM, SUMJ, TJJ, TJJS, + $ TMAX, TSCAL, USCAL, XBND, XJ, XMAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + DOUBLE PRECISION DASUM, DDOT, DLAMCH + EXTERNAL LSAME, IDAMAX, DASUM, DDOT, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DSCAL, DTRSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* +* Test the input parameters. +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT. + $ LSAME( NORMIN, 'N' ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLATRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine machine dependent parameters to control overflow. +* + SMLNUM = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' ) + BIGNUM = ONE / SMLNUM + SCALE = ONE +* + IF( LSAME( NORMIN, 'N' ) ) THEN +* +* Compute the 1-norm of each column, not including the diagonal. +* + IF( UPPER ) THEN +* +* A is upper triangular. +* + DO 10 J = 1, N + CNORM( J ) = DASUM( J-1, A( 1, J ), 1 ) + 10 CONTINUE + ELSE +* +* A is lower triangular. +* + DO 20 J = 1, N - 1 + CNORM( J ) = DASUM( N-J, A( J+1, J ), 1 ) + 20 CONTINUE + CNORM( N ) = ZERO + END IF + END IF +* +* Scale the column norms by TSCAL if the maximum element in CNORM is +* greater than BIGNUM. +* + IMAX = IDAMAX( N, CNORM, 1 ) + TMAX = CNORM( IMAX ) + IF( TMAX.LE.BIGNUM ) THEN + TSCAL = ONE + ELSE + TSCAL = ONE / ( SMLNUM*TMAX ) + CALL DSCAL( N, TSCAL, CNORM, 1 ) + END IF +* +* Compute a bound on the computed solution vector to see if the +* Level 2 BLAS routine DTRSV can be used. +* + J = IDAMAX( N, X, 1 ) + XMAX = ABS( X( J ) ) + XBND = XMAX + IF( NOTRAN ) THEN +* +* Compute the growth in A * x = b. +* + IF( UPPER ) THEN + JFIRST = N + JLAST = 1 + JINC = -1 + ELSE + JFIRST = 1 + JLAST = N + JINC = 1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 50 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, G(0) = max{x(i), i=1,...,n}. +* + GROW = ONE / MAX( XBND, SMLNUM ) + XBND = GROW + DO 30 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 50 +* +* M(j) = G(j-1) / abs(A(j,j)) +* + TJJ = ABS( A( J, J ) ) + XBND = MIN( XBND, MIN( ONE, TJJ )*GROW ) + IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN +* +* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) +* + GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) ) + ELSE +* +* G(j) could overflow, set GROW to 0. +* + GROW = ZERO + END IF + 30 CONTINUE + GROW = XBND + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) ) + DO 40 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 50 +* +* G(j) = G(j-1)*( 1 + CNORM(j) ) +* + GROW = GROW*( ONE / ( ONE+CNORM( J ) ) ) + 40 CONTINUE + END IF + 50 CONTINUE +* + ELSE +* +* Compute the growth in A' * x = b. +* + IF( UPPER ) THEN + JFIRST = 1 + JLAST = N + JINC = 1 + ELSE + JFIRST = N + JLAST = 1 + JINC = -1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 80 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, M(0) = max{x(i), i=1,...,n}. +* + GROW = ONE / MAX( XBND, SMLNUM ) + XBND = GROW + DO 60 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 80 +* +* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) +* + XJ = ONE + CNORM( J ) + GROW = MIN( GROW, XBND / XJ ) +* +* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) +* + TJJ = ABS( A( J, J ) ) + IF( XJ.GT.TJJ ) + $ XBND = XBND*( TJJ / XJ ) + 60 CONTINUE + GROW = MIN( GROW, XBND ) + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) ) + DO 70 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 80 +* +* G(j) = ( 1 + CNORM(j) )*G(j-1) +* + XJ = ONE + CNORM( J ) + GROW = GROW / XJ + 70 CONTINUE + END IF + 80 CONTINUE + END IF +* + IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN +* +* Use the Level 2 BLAS solve if the reciprocal of the bound on +* elements of X is not too small. +* + CALL DTRSV( UPLO, TRANS, DIAG, N, A, LDA, X, 1 ) + ELSE +* +* Use a Level 1 BLAS solve, scaling intermediate results. +* + IF( XMAX.GT.BIGNUM ) THEN +* +* Scale X so that its components are less than or equal to +* BIGNUM in absolute value. +* + SCALE = BIGNUM / XMAX + CALL DSCAL( N, SCALE, X, 1 ) + XMAX = BIGNUM + END IF +* + IF( NOTRAN ) THEN +* +* Solve A * x = b +* + DO 110 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) / A(j,j), scaling x if necessary. +* + XJ = ABS( X( J ) ) + IF( NOUNIT ) THEN + TJJS = A( J, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 100 + END IF + TJJ = ABS( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by 1/b(j). +* + REC = ONE / XJ + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = X( J ) / TJJS + XJ = ABS( X( J ) ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM +* to avoid overflow when dividing by A(j,j). +* + REC = ( TJJ*BIGNUM ) / XJ + IF( CNORM( J ).GT.ONE ) THEN +* +* Scale by 1/CNORM(j) to avoid overflow when +* multiplying x(j) times column j. +* + REC = REC / CNORM( J ) + END IF + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = X( J ) / TJJS + XJ = ABS( X( J ) ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A*x = 0. +* + DO 90 I = 1, N + X( I ) = ZERO + 90 CONTINUE + X( J ) = ONE + XJ = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 100 CONTINUE +* +* Scale x if necessary to avoid overflow when adding a +* multiple of column j of A. +* + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN +* +* Scale x by 1/(2*abs(x(j))). +* + REC = REC*HALF + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + END IF + ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN +* +* Scale x by 1/2. +* + CALL DSCAL( N, HALF, X, 1 ) + SCALE = SCALE*HALF + END IF +* + IF( UPPER ) THEN + IF( J.GT.1 ) THEN +* +* Compute the update +* x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) +* + CALL DAXPY( J-1, -X( J )*TSCAL, A( 1, J ), 1, X, + $ 1 ) + I = IDAMAX( J-1, X, 1 ) + XMAX = ABS( X( I ) ) + END IF + ELSE + IF( J.LT.N ) THEN +* +* Compute the update +* x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) +* + CALL DAXPY( N-J, -X( J )*TSCAL, A( J+1, J ), 1, + $ X( J+1 ), 1 ) + I = J + IDAMAX( N-J, X( J+1 ), 1 ) + XMAX = ABS( X( I ) ) + END IF + END IF + 110 CONTINUE +* + ELSE +* +* Solve A' * x = b +* + DO 160 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = ABS( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = A( J, J )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = ABS( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = USCAL / TJJS + END IF + IF( REC.LT.ONE ) THEN + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + SUMJ = ZERO + IF( USCAL.EQ.ONE ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call DDOT to perform the dot product. +* + IF( UPPER ) THEN + SUMJ = DDOT( J-1, A( 1, J ), 1, X, 1 ) + ELSE IF( J.LT.N ) THEN + SUMJ = DDOT( N-J, A( J+1, J ), 1, X( J+1 ), 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + DO 120 I = 1, J - 1 + SUMJ = SUMJ + ( A( I, J )*USCAL )*X( I ) + 120 CONTINUE + ELSE IF( J.LT.N ) THEN + DO 130 I = J + 1, N + SUMJ = SUMJ + ( A( I, J )*USCAL )*X( I ) + 130 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.TSCAL ) THEN +* +* Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - SUMJ + XJ = ABS( X( J ) ) + IF( NOUNIT ) THEN + TJJS = A( J, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 150 + END IF +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJ = ABS( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = X( J ) / TJJS + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = X( J ) / TJJS + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A'*x = 0. +* + DO 140 I = 1, N + X( I ) = ZERO + 140 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 150 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - sumj if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = X( J ) / TJJS - SUMJ + END IF + XMAX = MAX( XMAX, ABS( X( J ) ) ) + 160 CONTINUE + END IF + SCALE = SCALE / TSCAL + END IF +* +* Scale the column norms by 1/TSCAL for return. +* + IF( TSCAL.NE.ONE ) THEN + CALL DSCAL( N, ONE / TSCAL, CNORM, 1 ) + END IF +* + RETURN +* +* End of DLATRS +* + END diff --git a/costa/native/external/lapack/dlatrz.f b/costa/native/external/lapack/dlatrz.f new file mode 100644 index 000000000..ac72ecb0d --- /dev/null +++ b/costa/native/external/lapack/dlatrz.f @@ -0,0 +1,128 @@ + SUBROUTINE DLATRZ( M, N, L, A, LDA, TAU, WORK ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER L, LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DLATRZ factors the M-by-(M+L) real upper trapezoidal matrix +* [ A1 A2 ] = [ A(1:M,1:M) A(1:M,N-L+1:N) ] as ( R 0 ) * Z, by means +* of orthogonal transformations. Z is an (M+L)-by-(M+L) orthogonal +* matrix and, R and A1 are M-by-M upper triangular matrices. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* L (input) INTEGER +* The number of columns of the matrix A containing the +* meaningful part of the Householder vectors. N-M >= L >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the leading M-by-N upper trapezoidal part of the +* array A must contain the matrix to be factorized. +* On exit, the leading M-by-M upper triangular part of A +* contains the upper triangular matrix R, and elements N-L+1 to +* N of the first M rows of A, with the array TAU, represent the +* orthogonal matrix Z as a product of M elementary reflectors. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) DOUBLE PRECISION array, dimension (M) +* The scalar factors of the elementary reflectors. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (M) +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* The factorization is obtained by Householder's method. The kth +* transformation matrix, Z( k ), which is used to introduce zeros into +* the ( m - k + 1 )th row of A, is given in the form +* +* Z( k ) = ( I 0 ), +* ( 0 T( k ) ) +* +* where +* +* T( k ) = I - tau*u( k )*u( k )', u( k ) = ( 1 ), +* ( 0 ) +* ( z( k ) ) +* +* tau is a scalar and z( k ) is an l element vector. tau and z( k ) +* are chosen to annihilate the elements of the kth row of A2. +* +* The scalar tau is returned in the kth element of TAU and the vector +* u( k ) in the kth row of A2, such that the elements of z( k ) are +* in a( k, l + 1 ), ..., a( k, n ). The elements of R are returned in +* the upper triangular part of A1. +* +* Z is given by +* +* Z = Z( 1 ) * Z( 2 ) * ... * Z( m ). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I +* .. +* .. External Subroutines .. + EXTERNAL DLARFG, DLARZ +* .. +* .. Executable Statements .. +* +* Test the input arguments +* +* Quick return if possible +* + IF( M.EQ.0 ) THEN + RETURN + ELSE IF( M.EQ.N ) THEN + DO 10 I = 1, N + TAU( I ) = ZERO + 10 CONTINUE + RETURN + END IF +* + DO 20 I = M, 1, -1 +* +* Generate elementary reflector H(i) to annihilate +* [ A(i,i) A(i,n-l+1:n) ] +* + CALL DLARFG( L+1, A( I, I ), A( I, N-L+1 ), LDA, TAU( I ) ) +* +* Apply H(i) to A(1:i-1,i:n) from the right +* + CALL DLARZ( 'Right', I-1, N-I+1, L, A( I, N-L+1 ), LDA, + $ TAU( I ), A( 1, I ), LDA, WORK ) +* + 20 CONTINUE +* + RETURN +* +* End of DLATRZ +* + END diff --git a/costa/native/external/lapack/dlatzm.f b/costa/native/external/lapack/dlatzm.f new file mode 100644 index 000000000..7eaea60d3 --- /dev/null +++ b/costa/native/external/lapack/dlatzm.f @@ -0,0 +1,143 @@ + SUBROUTINE DLATZM( SIDE, M, N, V, INCV, TAU, C1, C2, LDC, WORK ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, LDC, M, N + DOUBLE PRECISION TAU +* .. +* .. Array Arguments .. + DOUBLE PRECISION C1( LDC, * ), C2( LDC, * ), V( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* This routine is deprecated and has been replaced by routine DORMRZ. +* +* DLATZM applies a Householder matrix generated by DTZRQF to a matrix. +* +* Let P = I - tau*u*u', u = ( 1 ), +* ( v ) +* where v is an (m-1) vector if SIDE = 'L', or a (n-1) vector if +* SIDE = 'R'. +* +* If SIDE equals 'L', let +* C = [ C1 ] 1 +* [ C2 ] m-1 +* n +* Then C is overwritten by P*C. +* +* If SIDE equals 'R', let +* C = [ C1, C2 ] m +* 1 n-1 +* Then C is overwritten by C*P. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': form P * C +* = 'R': form C * P +* +* M (input) INTEGER +* The number of rows of the matrix C. +* +* N (input) INTEGER +* The number of columns of the matrix C. +* +* V (input) DOUBLE PRECISION array, dimension +* (1 + (M-1)*abs(INCV)) if SIDE = 'L' +* (1 + (N-1)*abs(INCV)) if SIDE = 'R' +* The vector v in the representation of P. V is not used +* if TAU = 0. +* +* INCV (input) INTEGER +* The increment between elements of v. INCV <> 0 +* +* TAU (input) DOUBLE PRECISION +* The value tau in the representation of P. +* +* C1 (input/output) DOUBLE PRECISION array, dimension +* (LDC,N) if SIDE = 'L' +* (M,1) if SIDE = 'R' +* On entry, the n-vector C1 if SIDE = 'L', or the m-vector C1 +* if SIDE = 'R'. +* +* On exit, the first row of P*C if SIDE = 'L', or the first +* column of C*P if SIDE = 'R'. +* +* C2 (input/output) DOUBLE PRECISION array, dimension +* (LDC, N) if SIDE = 'L' +* (LDC, N-1) if SIDE = 'R' +* On entry, the (m - 1) x n matrix C2 if SIDE = 'L', or the +* m x (n - 1) matrix C2 if SIDE = 'R'. +* +* On exit, rows 2:m of P*C if SIDE = 'L', or columns 2:m of C*P +* if SIDE = 'R'. +* +* LDC (input) INTEGER +* The leading dimension of the arrays C1 and C2. LDC >= (1,M). +* +* WORK (workspace) DOUBLE PRECISION array, dimension +* (N) if SIDE = 'L' +* (M) if SIDE = 'R' +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DGEMV, DGER +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( ( MIN( M, N ).EQ.0 ) .OR. ( TAU.EQ.ZERO ) ) + $ RETURN +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* w := C1 + v' * C2 +* + CALL DCOPY( N, C1, LDC, WORK, 1 ) + CALL DGEMV( 'Transpose', M-1, N, ONE, C2, LDC, V, INCV, ONE, + $ WORK, 1 ) +* +* [ C1 ] := [ C1 ] - tau* [ 1 ] * w' +* [ C2 ] [ C2 ] [ v ] +* + CALL DAXPY( N, -TAU, WORK, 1, C1, LDC ) + CALL DGER( M-1, N, -TAU, V, INCV, WORK, 1, C2, LDC ) +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* w := C1 + C2 * v +* + CALL DCOPY( M, C1, 1, WORK, 1 ) + CALL DGEMV( 'No transpose', M, N-1, ONE, C2, LDC, V, INCV, ONE, + $ WORK, 1 ) +* +* [ C1, C2 ] := [ C1, C2 ] - tau* w * [ 1 , v'] +* + CALL DAXPY( M, -TAU, WORK, 1, C1, 1 ) + CALL DGER( M, N-1, -TAU, WORK, 1, V, INCV, C2, LDC ) + END IF +* + RETURN +* +* End of DLATZM +* + END diff --git a/costa/native/external/lapack/dlauu2.f b/costa/native/external/lapack/dlauu2.f new file mode 100644 index 000000000..65a47ee6c --- /dev/null +++ b/costa/native/external/lapack/dlauu2.f @@ -0,0 +1,136 @@ + SUBROUTINE DLAUU2( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* DLAUU2 computes the product U * U' or L' * L, where the triangular +* factor U or L is stored in the upper or lower triangular part of +* the array A. +* +* If UPLO = 'U' or 'u' then the upper triangle of the result is stored, +* overwriting the factor U in A. +* If UPLO = 'L' or 'l' then the lower triangle of the result is stored, +* overwriting the factor L in A. +* +* This is the unblocked form of the algorithm, calling Level 2 BLAS. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the triangular factor stored in the array A +* is upper or lower triangular: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the triangular factor U or L. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the triangular factor U or L. +* On exit, if UPLO = 'U', the upper triangle of A is +* overwritten with the upper triangle of the product U * U'; +* if UPLO = 'L', the lower triangle of A is overwritten with +* the lower triangle of the product L' * L. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I + DOUBLE PRECISION AII +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DDOT + EXTERNAL LSAME, DDOT +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, DSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLAUU2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Compute the product U * U'. +* + DO 10 I = 1, N + AII = A( I, I ) + IF( I.LT.N ) THEN + A( I, I ) = DDOT( N-I+1, A( I, I ), LDA, A( I, I ), LDA ) + CALL DGEMV( 'No transpose', I-1, N-I, ONE, A( 1, I+1 ), + $ LDA, A( I, I+1 ), LDA, AII, A( 1, I ), 1 ) + ELSE + CALL DSCAL( I, AII, A( 1, I ), 1 ) + END IF + 10 CONTINUE +* + ELSE +* +* Compute the product L' * L. +* + DO 20 I = 1, N + AII = A( I, I ) + IF( I.LT.N ) THEN + A( I, I ) = DDOT( N-I+1, A( I, I ), 1, A( I, I ), 1 ) + CALL DGEMV( 'Transpose', N-I, I-1, ONE, A( I+1, 1 ), LDA, + $ A( I+1, I ), 1, AII, A( I, 1 ), LDA ) + ELSE + CALL DSCAL( I, AII, A( I, 1 ), LDA ) + END IF + 20 CONTINUE + END IF +* + RETURN +* +* End of DLAUU2 +* + END diff --git a/costa/native/external/lapack/dlauum.f b/costa/native/external/lapack/dlauum.f new file mode 100644 index 000000000..581c84080 --- /dev/null +++ b/costa/native/external/lapack/dlauum.f @@ -0,0 +1,156 @@ + SUBROUTINE DLAUUM( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* DLAUUM computes the product U * U' or L' * L, where the triangular +* factor U or L is stored in the upper or lower triangular part of +* the array A. +* +* If UPLO = 'U' or 'u' then the upper triangle of the result is stored, +* overwriting the factor U in A. +* If UPLO = 'L' or 'l' then the lower triangle of the result is stored, +* overwriting the factor L in A. +* +* This is the blocked form of the algorithm, calling Level 3 BLAS. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the triangular factor stored in the array A +* is upper or lower triangular: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the triangular factor U or L. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the triangular factor U or L. +* On exit, if UPLO = 'U', the upper triangle of A is +* overwritten with the upper triangle of the product U * U'; +* if UPLO = 'L', the lower triangle of A is overwritten with +* the lower triangle of the product L' * L. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IB, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DLAUU2, DSYRK, DTRMM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLAUUM', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'DLAUUM', UPLO, N, -1, -1, -1 ) +* + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code +* + CALL DLAUU2( UPLO, N, A, LDA, INFO ) + ELSE +* +* Use blocked code +* + IF( UPPER ) THEN +* +* Compute the product U * U'. +* + DO 10 I = 1, N, NB + IB = MIN( NB, N-I+1 ) + CALL DTRMM( 'Right', 'Upper', 'Transpose', 'Non-unit', + $ I-1, IB, ONE, A( I, I ), LDA, A( 1, I ), + $ LDA ) + CALL DLAUU2( 'Upper', IB, A( I, I ), LDA, INFO ) + IF( I+IB.LE.N ) THEN + CALL DGEMM( 'No transpose', 'Transpose', I-1, IB, + $ N-I-IB+1, ONE, A( 1, I+IB ), LDA, + $ A( I, I+IB ), LDA, ONE, A( 1, I ), LDA ) + CALL DSYRK( 'Upper', 'No transpose', IB, N-I-IB+1, + $ ONE, A( I, I+IB ), LDA, ONE, A( I, I ), + $ LDA ) + END IF + 10 CONTINUE + ELSE +* +* Compute the product L' * L. +* + DO 20 I = 1, N, NB + IB = MIN( NB, N-I+1 ) + CALL DTRMM( 'Left', 'Lower', 'Transpose', 'Non-unit', IB, + $ I-1, ONE, A( I, I ), LDA, A( I, 1 ), LDA ) + CALL DLAUU2( 'Lower', IB, A( I, I ), LDA, INFO ) + IF( I+IB.LE.N ) THEN + CALL DGEMM( 'Transpose', 'No transpose', IB, I-1, + $ N-I-IB+1, ONE, A( I+IB, I ), LDA, + $ A( I+IB, 1 ), LDA, ONE, A( I, 1 ), LDA ) + CALL DSYRK( 'Lower', 'Transpose', IB, N-I-IB+1, ONE, + $ A( I+IB, I ), LDA, ONE, A( I, I ), LDA ) + END IF + 20 CONTINUE + END IF + END IF +* + RETURN +* +* End of DLAUUM +* + END diff --git a/costa/native/external/lapack/dopgtr.f b/costa/native/external/lapack/dopgtr.f new file mode 100644 index 000000000..32c7d59a2 --- /dev/null +++ b/costa/native/external/lapack/dopgtr.f @@ -0,0 +1,161 @@ + SUBROUTINE DOPGTR( UPLO, N, AP, TAU, Q, LDQ, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDQ, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AP( * ), Q( LDQ, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DOPGTR generates a real orthogonal matrix Q which is defined as the +* product of n-1 elementary reflectors H(i) of order n, as returned by +* DSPTRD using packed storage: +* +* if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), +* +* if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangular packed storage used in previous +* call to DSPTRD; +* = 'L': Lower triangular packed storage used in previous +* call to DSPTRD. +* +* N (input) INTEGER +* The order of the matrix Q. N >= 0. +* +* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) +* The vectors which define the elementary reflectors, as +* returned by DSPTRD. +* +* TAU (input) DOUBLE PRECISION array, dimension (N-1) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by DSPTRD. +* +* Q (output) DOUBLE PRECISION array, dimension (LDQ,N) +* The N-by-N orthogonal matrix Q. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= max(1,N). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (N-1) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IINFO, IJ, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DORG2L, DORG2R, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DOPGTR', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Q was determined by a call to DSPTRD with UPLO = 'U' +* +* Unpack the vectors which define the elementary reflectors and +* set the last row and column of Q equal to those of the unit +* matrix +* + IJ = 2 + DO 20 J = 1, N - 1 + DO 10 I = 1, J - 1 + Q( I, J ) = AP( IJ ) + IJ = IJ + 1 + 10 CONTINUE + IJ = IJ + 2 + Q( N, J ) = ZERO + 20 CONTINUE + DO 30 I = 1, N - 1 + Q( I, N ) = ZERO + 30 CONTINUE + Q( N, N ) = ONE +* +* Generate Q(1:n-1,1:n-1) +* + CALL DORG2L( N-1, N-1, N-1, Q, LDQ, TAU, WORK, IINFO ) +* + ELSE +* +* Q was determined by a call to DSPTRD with UPLO = 'L'. +* +* Unpack the vectors which define the elementary reflectors and +* set the first row and column of Q equal to those of the unit +* matrix +* + Q( 1, 1 ) = ONE + DO 40 I = 2, N + Q( I, 1 ) = ZERO + 40 CONTINUE + IJ = 3 + DO 60 J = 2, N + Q( 1, J ) = ZERO + DO 50 I = J + 1, N + Q( I, J ) = AP( IJ ) + IJ = IJ + 1 + 50 CONTINUE + IJ = IJ + 2 + 60 CONTINUE + IF( N.GT.1 ) THEN +* +* Generate Q(2:n,2:n) +* + CALL DORG2R( N-1, N-1, N-1, Q( 2, 2 ), LDQ, TAU, WORK, + $ IINFO ) + END IF + END IF + RETURN +* +* End of DOPGTR +* + END diff --git a/costa/native/external/lapack/dopmtr.f b/costa/native/external/lapack/dopmtr.f new file mode 100644 index 000000000..3d9d426b9 --- /dev/null +++ b/costa/native/external/lapack/dopmtr.f @@ -0,0 +1,258 @@ + SUBROUTINE DOPMTR( SIDE, UPLO, TRANS, M, N, AP, TAU, C, LDC, WORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS, UPLO + INTEGER INFO, LDC, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AP( * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DOPMTR overwrites the general real M-by-N matrix C with +* +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'T': Q**T * C C * Q**T +* +* where Q is a real orthogonal matrix of order nq, with nq = m if +* SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of +* nq-1 elementary reflectors, as returned by DSPTRD using packed +* storage: +* +* if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); +* +* if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q**T from the Left; +* = 'R': apply Q or Q**T from the Right. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangular packed storage used in previous +* call to DSPTRD; +* = 'L': Lower triangular packed storage used in previous +* call to DSPTRD. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q; +* = 'T': Transpose, apply Q**T. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* AP (input) DOUBLE PRECISION array, dimension +* (M*(M+1)/2) if SIDE = 'L' +* (N*(N+1)/2) if SIDE = 'R' +* The vectors which define the elementary reflectors, as +* returned by DSPTRD. AP is modified by the routine but +* restored on exit. +* +* TAU (input) DOUBLE PRECISION array, dimension (M-1) if SIDE = 'L' +* or (N-1) if SIDE = 'R' +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by DSPTRD. +* +* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) DOUBLE PRECISION array, dimension +* (N) if SIDE = 'L' +* (M) if SIDE = 'R' +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL FORWRD, LEFT, NOTRAN, UPPER + INTEGER I, I1, I2, I3, IC, II, JC, MI, NI, NQ + DOUBLE PRECISION AII +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLARF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + UPPER = LSAME( UPLO, 'U' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DOPMTR', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Q was determined by a call to DSPTRD with UPLO = 'U' +* + FORWRD = ( LEFT .AND. NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) +* + IF( FORWRD ) THEN + I1 = 1 + I2 = NQ - 1 + I3 = 1 + II = 2 + ELSE + I1 = NQ - 1 + I2 = 1 + I3 = -1 + II = NQ*( NQ+1 ) / 2 - 1 + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) is applied to C(1:i,1:n) +* + MI = I + ELSE +* +* H(i) is applied to C(1:m,1:i) +* + NI = I + END IF +* +* Apply H(i) +* + AII = AP( II ) + AP( II ) = ONE + CALL DLARF( SIDE, MI, NI, AP( II-I+1 ), 1, TAU( I ), C, LDC, + $ WORK ) + AP( II ) = AII +* + IF( FORWRD ) THEN + II = II + I + 2 + ELSE + II = II - I - 1 + END IF + 10 CONTINUE + ELSE +* +* Q was determined by a call to DSPTRD with UPLO = 'L'. +* + FORWRD = ( LEFT .AND. .NOT.NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. NOTRAN ) +* + IF( FORWRD ) THEN + I1 = 1 + I2 = NQ - 1 + I3 = 1 + II = 2 + ELSE + I1 = NQ - 1 + I2 = 1 + I3 = -1 + II = NQ*( NQ+1 ) / 2 - 1 + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 20 I = I1, I2, I3 + AII = AP( II ) + AP( II ) = ONE + IF( LEFT ) THEN +* +* H(i) is applied to C(i+1:m,1:n) +* + MI = M - I + IC = I + 1 + ELSE +* +* H(i) is applied to C(1:m,i+1:n) +* + NI = N - I + JC = I + 1 + END IF +* +* Apply H(i) +* + CALL DLARF( SIDE, MI, NI, AP( II ), 1, TAU( I ), + $ C( IC, JC ), LDC, WORK ) + AP( II ) = AII +* + IF( FORWRD ) THEN + II = II + NQ - I + 1 + ELSE + II = II - NQ + I - 2 + END IF + 20 CONTINUE + END IF + RETURN +* +* End of DOPMTR +* + END diff --git a/costa/native/external/lapack/dorg2l.f b/costa/native/external/lapack/dorg2l.f new file mode 100644 index 000000000..5b86f6710 --- /dev/null +++ b/costa/native/external/lapack/dorg2l.f @@ -0,0 +1,128 @@ + SUBROUTINE DORG2L( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DORG2L generates an m by n real matrix Q with orthonormal columns, +* which is defined as the last n columns of a product of k elementary +* reflectors of order m +* +* Q = H(k) . . . H(2) H(1) +* +* as returned by DGEQLF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. M >= N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. N >= K >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the (n-k+i)-th column must contain the vector which +* defines the elementary reflector H(i), for i = 1,2,...,k, as +* returned by DGEQLF in the last k columns of its array +* argument A. +* On exit, the m by n matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) DOUBLE PRECISION array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by DGEQLF. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, II, J, L +* .. +* .. External Subroutines .. + EXTERNAL DLARF, DSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORG2L', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* +* Initialise columns 1:n-k to columns of the unit matrix +* + DO 20 J = 1, N - K + DO 10 L = 1, M + A( L, J ) = ZERO + 10 CONTINUE + A( M-N+J, J ) = ONE + 20 CONTINUE +* + DO 40 I = 1, K + II = N - K + I +* +* Apply H(i) to A(1:m-k+i,1:n-k+i) from the left +* + A( M-N+II, II ) = ONE + CALL DLARF( 'Left', M-N+II, II-1, A( 1, II ), 1, TAU( I ), A, + $ LDA, WORK ) + CALL DSCAL( M-N+II-1, -TAU( I ), A( 1, II ), 1 ) + A( M-N+II, II ) = ONE - TAU( I ) +* +* Set A(m-k+i+1:m,n-k+i) to zero +* + DO 30 L = M - N + II + 1, M + A( L, II ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of DORG2L +* + END diff --git a/costa/native/external/lapack/dorg2r.f b/costa/native/external/lapack/dorg2r.f new file mode 100644 index 000000000..c15982e7a --- /dev/null +++ b/costa/native/external/lapack/dorg2r.f @@ -0,0 +1,130 @@ + SUBROUTINE DORG2R( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DORG2R generates an m by n real matrix Q with orthonormal columns, +* which is defined as the first n columns of a product of k elementary +* reflectors of order m +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by DGEQRF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. M >= N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. N >= K >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the i-th column must contain the vector which +* defines the elementary reflector H(i), for i = 1,2,...,k, as +* returned by DGEQRF in the first k columns of its array +* argument A. +* On exit, the m-by-n matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) DOUBLE PRECISION array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by DGEQRF. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, L +* .. +* .. External Subroutines .. + EXTERNAL DLARF, DSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORG2R', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* +* Initialise columns k+1:n to columns of the unit matrix +* + DO 20 J = K + 1, N + DO 10 L = 1, M + A( L, J ) = ZERO + 10 CONTINUE + A( J, J ) = ONE + 20 CONTINUE +* + DO 40 I = K, 1, -1 +* +* Apply H(i) to A(i:m,i:n) from the left +* + IF( I.LT.N ) THEN + A( I, I ) = ONE + CALL DLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ), + $ A( I, I+1 ), LDA, WORK ) + END IF + IF( I.LT.M ) + $ CALL DSCAL( M-I, -TAU( I ), A( I+1, I ), 1 ) + A( I, I ) = ONE - TAU( I ) +* +* Set A(1:i-1,i) to zero +* + DO 30 L = 1, I - 1 + A( L, I ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of DORG2R +* + END diff --git a/costa/native/external/lapack/dorgbr.f b/costa/native/external/lapack/dorgbr.f new file mode 100644 index 000000000..84dbe1757 --- /dev/null +++ b/costa/native/external/lapack/dorgbr.f @@ -0,0 +1,245 @@ + SUBROUTINE DORGBR( VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER VECT + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DORGBR generates one of the real orthogonal matrices Q or P**T +* determined by DGEBRD when reducing a real matrix A to bidiagonal +* form: A = Q * B * P**T. Q and P**T are defined as products of +* elementary reflectors H(i) or G(i) respectively. +* +* If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q +* is of order M: +* if m >= k, Q = H(1) H(2) . . . H(k) and DORGBR returns the first n +* columns of Q, where m >= n >= k; +* if m < k, Q = H(1) H(2) . . . H(m-1) and DORGBR returns Q as an +* M-by-M matrix. +* +* If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**T +* is of order N: +* if k < n, P**T = G(k) . . . G(2) G(1) and DORGBR returns the first m +* rows of P**T, where n >= m >= k; +* if k >= n, P**T = G(n-1) . . . G(2) G(1) and DORGBR returns P**T as +* an N-by-N matrix. +* +* Arguments +* ========= +* +* VECT (input) CHARACTER*1 +* Specifies whether the matrix Q or the matrix P**T is +* required, as defined in the transformation applied by DGEBRD: +* = 'Q': generate Q; +* = 'P': generate P**T. +* +* M (input) INTEGER +* The number of rows of the matrix Q or P**T to be returned. +* M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q or P**T to be returned. +* N >= 0. +* If VECT = 'Q', M >= N >= min(M,K); +* if VECT = 'P', N >= M >= min(N,K). +* +* K (input) INTEGER +* If VECT = 'Q', the number of columns in the original M-by-K +* matrix reduced by DGEBRD. +* If VECT = 'P', the number of rows in the original K-by-N +* matrix reduced by DGEBRD. +* K >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the vectors which define the elementary reflectors, +* as returned by DGEBRD. +* On exit, the M-by-N matrix Q or P**T. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (input) DOUBLE PRECISION array, dimension +* (min(M,K)) if VECT = 'Q' +* (min(N,K)) if VECT = 'P' +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i) or G(i), which determines Q or P**T, as +* returned by DGEBRD in its array argument TAUQ or TAUP. +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,min(M,N)). +* For optimum performance LWORK >= min(M,N)*NB, where NB +* is the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WANTQ + INTEGER I, IINFO, J, LWKOPT, MN, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DORGLQ, DORGQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + WANTQ = LSAME( VECT, 'Q' ) + MN = MIN( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.WANTQ .AND. .NOT.LSAME( VECT, 'P' ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 .OR. ( WANTQ .AND. ( N.GT.M .OR. N.LT.MIN( M, + $ K ) ) ) .OR. ( .NOT.WANTQ .AND. ( M.GT.N .OR. M.LT. + $ MIN( N, K ) ) ) ) THEN + INFO = -3 + ELSE IF( K.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LWORK.LT.MAX( 1, MN ) .AND. .NOT.LQUERY ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( WANTQ ) THEN + NB = ILAENV( 1, 'DORGQR', ' ', M, N, K, -1 ) + ELSE + NB = ILAENV( 1, 'DORGLQ', ' ', M, N, K, -1 ) + END IF + LWKOPT = MAX( 1, MN )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORGBR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( WANTQ ) THEN +* +* Form Q, determined by a call to DGEBRD to reduce an m-by-k +* matrix +* + IF( M.GE.K ) THEN +* +* If m >= k, assume m >= n >= k +* + CALL DORGQR( M, N, K, A, LDA, TAU, WORK, LWORK, IINFO ) +* + ELSE +* +* If m < k, assume m = n +* +* Shift the vectors which define the elementary reflectors one +* column to the right, and set the first row and column of Q +* to those of the unit matrix +* + DO 20 J = M, 2, -1 + A( 1, J ) = ZERO + DO 10 I = J + 1, M + A( I, J ) = A( I, J-1 ) + 10 CONTINUE + 20 CONTINUE + A( 1, 1 ) = ONE + DO 30 I = 2, M + A( I, 1 ) = ZERO + 30 CONTINUE + IF( M.GT.1 ) THEN +* +* Form Q(2:m,2:m) +* + CALL DORGQR( M-1, M-1, M-1, A( 2, 2 ), LDA, TAU, WORK, + $ LWORK, IINFO ) + END IF + END IF + ELSE +* +* Form P', determined by a call to DGEBRD to reduce a k-by-n +* matrix +* + IF( K.LT.N ) THEN +* +* If k < n, assume k <= m <= n +* + CALL DORGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, IINFO ) +* + ELSE +* +* If k >= n, assume m = n +* +* Shift the vectors which define the elementary reflectors one +* row downward, and set the first row and column of P' to +* those of the unit matrix +* + A( 1, 1 ) = ONE + DO 40 I = 2, N + A( I, 1 ) = ZERO + 40 CONTINUE + DO 60 J = 2, N + DO 50 I = J - 1, 2, -1 + A( I, J ) = A( I-1, J ) + 50 CONTINUE + A( 1, J ) = ZERO + 60 CONTINUE + IF( N.GT.1 ) THEN +* +* Form P'(2:n,2:n) +* + CALL DORGLQ( N-1, N-1, N-1, A( 2, 2 ), LDA, TAU, WORK, + $ LWORK, IINFO ) + END IF + END IF + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of DORGBR +* + END diff --git a/costa/native/external/lapack/dorghr.f b/costa/native/external/lapack/dorghr.f new file mode 100644 index 000000000..5d6881856 --- /dev/null +++ b/costa/native/external/lapack/dorghr.f @@ -0,0 +1,165 @@ + SUBROUTINE DORGHR( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER IHI, ILO, INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DORGHR generates a real orthogonal matrix Q which is defined as the +* product of IHI-ILO elementary reflectors of order N, as returned by +* DGEHRD: +* +* Q = H(ilo) H(ilo+1) . . . H(ihi-1). +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix Q. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* ILO and IHI must have the same values as in the previous call +* of DGEHRD. Q is equal to the unit matrix except in the +* submatrix Q(ilo+1:ihi,ilo+1:ihi). +* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the vectors which define the elementary reflectors, +* as returned by DGEHRD. +* On exit, the N-by-N orthogonal matrix Q. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* TAU (input) DOUBLE PRECISION array, dimension (N-1) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by DGEHRD. +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= IHI-ILO. +* For optimum performance LWORK >= (IHI-ILO)*NB, where NB is +* the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IINFO, J, LWKOPT, NB, NH +* .. +* .. External Subroutines .. + EXTERNAL DORGQR, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NH = IHI - ILO + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -2 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, NH ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'DORGQR', ' ', NH, NH, NH, -1 ) + LWKOPT = MAX( 1, NH )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORGHR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Shift the vectors which define the elementary reflectors one +* column to the right, and set the first ilo and the last n-ihi +* rows and columns to those of the unit matrix +* + DO 40 J = IHI, ILO + 1, -1 + DO 10 I = 1, J - 1 + A( I, J ) = ZERO + 10 CONTINUE + DO 20 I = J + 1, IHI + A( I, J ) = A( I, J-1 ) + 20 CONTINUE + DO 30 I = IHI + 1, N + A( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + DO 60 J = 1, ILO + DO 50 I = 1, N + A( I, J ) = ZERO + 50 CONTINUE + A( J, J ) = ONE + 60 CONTINUE + DO 80 J = IHI + 1, N + DO 70 I = 1, N + A( I, J ) = ZERO + 70 CONTINUE + A( J, J ) = ONE + 80 CONTINUE +* + IF( NH.GT.0 ) THEN +* +* Generate Q(ilo+1:ihi,ilo+1:ihi) +* + CALL DORGQR( NH, NH, NH, A( ILO+1, ILO+1 ), LDA, TAU( ILO ), + $ WORK, LWORK, IINFO ) + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of DORGHR +* + END diff --git a/costa/native/external/lapack/dorgl2.f b/costa/native/external/lapack/dorgl2.f new file mode 100644 index 000000000..3dab19cd0 --- /dev/null +++ b/costa/native/external/lapack/dorgl2.f @@ -0,0 +1,134 @@ + SUBROUTINE DORGL2( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DORGL2 generates an m by n real matrix Q with orthonormal rows, +* which is defined as the first m rows of a product of k elementary +* reflectors of order n +* +* Q = H(k) . . . H(2) H(1) +* +* as returned by DGELQF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. N >= M. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. M >= K >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the i-th row must contain the vector which defines +* the elementary reflector H(i), for i = 1,2,...,k, as returned +* by DGELQF in the first k rows of its array argument A. +* On exit, the m-by-n matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) DOUBLE PRECISION array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by DGELQF. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (M) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, L +* .. +* .. External Subroutines .. + EXTERNAL DLARF, DSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORGL2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.LE.0 ) + $ RETURN +* + IF( K.LT.M ) THEN +* +* Initialise rows k+1:m to rows of the unit matrix +* + DO 20 J = 1, N + DO 10 L = K + 1, M + A( L, J ) = ZERO + 10 CONTINUE + IF( J.GT.K .AND. J.LE.M ) + $ A( J, J ) = ONE + 20 CONTINUE + END IF +* + DO 40 I = K, 1, -1 +* +* Apply H(i) to A(i:m,i:n) from the right +* + IF( I.LT.N ) THEN + IF( I.LT.M ) THEN + A( I, I ) = ONE + CALL DLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, + $ TAU( I ), A( I+1, I ), LDA, WORK ) + END IF + CALL DSCAL( N-I, -TAU( I ), A( I, I+1 ), LDA ) + END IF + A( I, I ) = ONE - TAU( I ) +* +* Set A(i,1:i-1) to zero +* + DO 30 L = 1, I - 1 + A( I, L ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of DORGL2 +* + END diff --git a/costa/native/external/lapack/dorglq.f b/costa/native/external/lapack/dorglq.f new file mode 100644 index 000000000..b40018b6a --- /dev/null +++ b/costa/native/external/lapack/dorglq.f @@ -0,0 +1,216 @@ + SUBROUTINE DORGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DORGLQ generates an M-by-N real matrix Q with orthonormal rows, +* which is defined as the first M rows of a product of K elementary +* reflectors of order N +* +* Q = H(k) . . . H(2) H(1) +* +* as returned by DGELQF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. N >= M. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. M >= K >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the i-th row must contain the vector which defines +* the elementary reflector H(i), for i = 1,2,...,k, as returned +* by DGELQF in the first k rows of its array argument A. +* On exit, the M-by-N matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) DOUBLE PRECISION array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by DGELQF. +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,M). +* For optimum performance LWORK >= M*NB, where NB is +* the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, KI, KK, L, LDWORK, + $ LWKOPT, NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL DLARFB, DLARFT, DORGL2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'DORGLQ', ' ', M, N, K, -1 ) + LWKOPT = MAX( 1, M )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORGLQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.LE.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = M + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'DORGLQ', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DORGLQ', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the last block. +* The first kk rows are handled by the block method. +* + KI = ( ( K-NX-1 ) / NB )*NB + KK = MIN( K, KI+NB ) +* +* Set A(kk+1:m,1:kk) to zero. +* + DO 20 J = 1, KK + DO 10 I = KK + 1, M + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the last or only block. +* + IF( KK.LT.M ) + $ CALL DORGL2( M-KK, N-KK, K-KK, A( KK+1, KK+1 ), LDA, + $ TAU( KK+1 ), WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = KI + 1, 1, -NB + IB = MIN( NB, K-I+1 ) + IF( I+IB.LE.M ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL DLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, I ), + $ LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H' to A(i+ib:m,i:n) from the right +* + CALL DLARFB( 'Right', 'Transpose', 'Forward', 'Rowwise', + $ M-I-IB+1, N-I+1, IB, A( I, I ), LDA, WORK, + $ LDWORK, A( I+IB, I ), LDA, WORK( IB+1 ), + $ LDWORK ) + END IF +* +* Apply H' to columns i:n of current block +* + CALL DORGL2( IB, N-I+1, IB, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* +* Set columns 1:i-1 of current block to zero +* + DO 40 J = 1, I - 1 + DO 30 L = I, I + IB - 1 + A( L, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of DORGLQ +* + END diff --git a/costa/native/external/lapack/dorgql.f b/costa/native/external/lapack/dorgql.f new file mode 100644 index 000000000..6a997c902 --- /dev/null +++ b/costa/native/external/lapack/dorgql.f @@ -0,0 +1,214 @@ + SUBROUTINE DORGQL( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DORGQL generates an M-by-N real matrix Q with orthonormal columns, +* which is defined as the last N columns of a product of K elementary +* reflectors of order M +* +* Q = H(k) . . . H(2) H(1) +* +* as returned by DGEQLF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. M >= N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. N >= K >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the (n-k+i)-th column must contain the vector which +* defines the elementary reflector H(i), for i = 1,2,...,k, as +* returned by DGEQLF in the last k columns of its array +* argument A. +* On exit, the M-by-N matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) DOUBLE PRECISION array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by DGEQLF. +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N). +* For optimum performance LWORK >= N*NB, where NB is the +* optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, KK, L, LDWORK, LWKOPT, + $ NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL DLARFB, DLARFT, DORG2L, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'DORGQL', ' ', M, N, K, -1 ) + LWKOPT = MAX( 1, N )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORGQL', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'DORGQL', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DORGQL', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the first block. +* The last kk columns are handled by the block method. +* + KK = MIN( K, ( ( K-NX+NB-1 ) / NB )*NB ) +* +* Set A(m-kk+1:m,1:n-kk) to zero. +* + DO 20 J = 1, N - KK + DO 10 I = M - KK + 1, M + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the first or only block. +* + CALL DORG2L( M-KK, N-KK, K-KK, A, LDA, TAU, WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = K - KK + 1, K, NB + IB = MIN( NB, K-I+1 ) + IF( N-K+I.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL DLARFT( 'Backward', 'Columnwise', M-K+I+IB-1, IB, + $ A( 1, N-K+I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(1:m-k+i+ib-1,1:n-k+i-1) from the left +* + CALL DLARFB( 'Left', 'No transpose', 'Backward', + $ 'Columnwise', M-K+I+IB-1, N-K+I-1, IB, + $ A( 1, N-K+I ), LDA, WORK, LDWORK, A, LDA, + $ WORK( IB+1 ), LDWORK ) + END IF +* +* Apply H to rows 1:m-k+i+ib-1 of current block +* + CALL DORG2L( M-K+I+IB-1, IB, IB, A( 1, N-K+I ), LDA, + $ TAU( I ), WORK, IINFO ) +* +* Set rows m-k+i+ib:m of current block to zero +* + DO 40 J = N - K + I, N - K + I + IB - 1 + DO 30 L = M - K + I + IB, M + A( L, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of DORGQL +* + END diff --git a/costa/native/external/lapack/dorgqr.f b/costa/native/external/lapack/dorgqr.f new file mode 100644 index 000000000..171dfb527 --- /dev/null +++ b/costa/native/external/lapack/dorgqr.f @@ -0,0 +1,217 @@ + SUBROUTINE DORGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DORGQR generates an M-by-N real matrix Q with orthonormal columns, +* which is defined as the first N columns of a product of K elementary +* reflectors of order M +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by DGEQRF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. M >= N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. N >= K >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the i-th column must contain the vector which +* defines the elementary reflector H(i), for i = 1,2,...,k, as +* returned by DGEQRF in the first k columns of its array +* argument A. +* On exit, the M-by-N matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) DOUBLE PRECISION array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by DGEQRF. +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N). +* For optimum performance LWORK >= N*NB, where NB is the +* optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, KI, KK, L, LDWORK, + $ LWKOPT, NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL DLARFB, DLARFT, DORG2R, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'DORGQR', ' ', M, N, K, -1 ) + LWKOPT = MAX( 1, N )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORGQR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'DORGQR', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DORGQR', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the last block. +* The first kk columns are handled by the block method. +* + KI = ( ( K-NX-1 ) / NB )*NB + KK = MIN( K, KI+NB ) +* +* Set A(1:kk,kk+1:n) to zero. +* + DO 20 J = KK + 1, N + DO 10 I = 1, KK + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the last or only block. +* + IF( KK.LT.N ) + $ CALL DORG2R( M-KK, N-KK, K-KK, A( KK+1, KK+1 ), LDA, + $ TAU( KK+1 ), WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = KI + 1, 1, -NB + IB = MIN( NB, K-I+1 ) + IF( I+IB.LE.N ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL DLARFT( 'Forward', 'Columnwise', M-I+1, IB, + $ A( I, I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(i:m,i+ib:n) from the left +* + CALL DLARFB( 'Left', 'No transpose', 'Forward', + $ 'Columnwise', M-I+1, N-I-IB+1, IB, + $ A( I, I ), LDA, WORK, LDWORK, A( I, I+IB ), + $ LDA, WORK( IB+1 ), LDWORK ) + END IF +* +* Apply H to rows i:m of current block +* + CALL DORG2R( M-I+1, IB, IB, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* +* Set rows 1:i-1 of current block to zero +* + DO 40 J = I, I + IB - 1 + DO 30 L = 1, I - 1 + A( L, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of DORGQR +* + END diff --git a/costa/native/external/lapack/dorgr2.f b/costa/native/external/lapack/dorgr2.f new file mode 100644 index 000000000..ea88d346b --- /dev/null +++ b/costa/native/external/lapack/dorgr2.f @@ -0,0 +1,132 @@ + SUBROUTINE DORGR2( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DORGR2 generates an m by n real matrix Q with orthonormal rows, +* which is defined as the last m rows of a product of k elementary +* reflectors of order n +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by DGERQF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. N >= M. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. M >= K >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the (m-k+i)-th row must contain the vector which +* defines the elementary reflector H(i), for i = 1,2,...,k, as +* returned by DGERQF in the last k rows of its array argument +* A. +* On exit, the m by n matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) DOUBLE PRECISION array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by DGERQF. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (M) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, II, J, L +* .. +* .. External Subroutines .. + EXTERNAL DLARF, DSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORGR2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.LE.0 ) + $ RETURN +* + IF( K.LT.M ) THEN +* +* Initialise rows 1:m-k to rows of the unit matrix +* + DO 20 J = 1, N + DO 10 L = 1, M - K + A( L, J ) = ZERO + 10 CONTINUE + IF( J.GT.N-M .AND. J.LE.N-K ) + $ A( M-N+J, J ) = ONE + 20 CONTINUE + END IF +* + DO 40 I = 1, K + II = M - K + I +* +* Apply H(i) to A(1:m-k+i,1:n-k+i) from the right +* + A( II, N-M+II ) = ONE + CALL DLARF( 'Right', II-1, N-M+II, A( II, 1 ), LDA, TAU( I ), + $ A, LDA, WORK ) + CALL DSCAL( N-M+II-1, -TAU( I ), A( II, 1 ), LDA ) + A( II, N-M+II ) = ONE - TAU( I ) +* +* Set A(m-k+i,n-k+i+1:n) to zero +* + DO 30 L = N - M + II + 1, N + A( II, L ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of DORGR2 +* + END diff --git a/costa/native/external/lapack/dorgrq.f b/costa/native/external/lapack/dorgrq.f new file mode 100644 index 000000000..ba0a60052 --- /dev/null +++ b/costa/native/external/lapack/dorgrq.f @@ -0,0 +1,214 @@ + SUBROUTINE DORGRQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DORGRQ generates an M-by-N real matrix Q with orthonormal rows, +* which is defined as the last M rows of a product of K elementary +* reflectors of order N +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by DGERQF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. N >= M. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. M >= K >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the (m-k+i)-th row must contain the vector which +* defines the elementary reflector H(i), for i = 1,2,...,k, as +* returned by DGERQF in the last k rows of its array argument +* A. +* On exit, the M-by-N matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) DOUBLE PRECISION array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by DGERQF. +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,M). +* For optimum performance LWORK >= M*NB, where NB is the +* optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, II, IINFO, IWS, J, KK, L, LDWORK, + $ LWKOPT, NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL DLARFB, DLARFT, DORGR2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'DORGRQ', ' ', M, N, K, -1 ) + LWKOPT = MAX( 1, M )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORGRQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.LE.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = M + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'DORGRQ', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DORGRQ', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the first block. +* The last kk rows are handled by the block method. +* + KK = MIN( K, ( ( K-NX+NB-1 ) / NB )*NB ) +* +* Set A(1:m-kk,n-kk+1:n) to zero. +* + DO 20 J = N - KK + 1, N + DO 10 I = 1, M - KK + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the first or only block. +* + CALL DORGR2( M-KK, N-KK, K-KK, A, LDA, TAU, WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = K - KK + 1, K, NB + IB = MIN( NB, K-I+1 ) + II = M - K + I + IF( II.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL DLARFT( 'Backward', 'Rowwise', N-K+I+IB-1, IB, + $ A( II, 1 ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H' to A(1:m-k+i-1,1:n-k+i+ib-1) from the right +* + CALL DLARFB( 'Right', 'Transpose', 'Backward', 'Rowwise', + $ II-1, N-K+I+IB-1, IB, A( II, 1 ), LDA, WORK, + $ LDWORK, A, LDA, WORK( IB+1 ), LDWORK ) + END IF +* +* Apply H' to columns 1:n-k+i+ib-1 of current block +* + CALL DORGR2( IB, N-K+I+IB-1, IB, A( II, 1 ), LDA, TAU( I ), + $ WORK, IINFO ) +* +* Set columns n-k+i+ib:n of current block to zero +* + DO 40 L = N - K + I + IB, N + DO 30 J = II, II + IB - 1 + A( J, L ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of DORGRQ +* + END diff --git a/costa/native/external/lapack/dorgtr.f b/costa/native/external/lapack/dorgtr.f new file mode 100644 index 000000000..8723e01bf --- /dev/null +++ b/costa/native/external/lapack/dorgtr.f @@ -0,0 +1,184 @@ + SUBROUTINE DORGTR( UPLO, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DORGTR generates a real orthogonal matrix Q which is defined as the +* product of n-1 elementary reflectors of order N, as returned by +* DSYTRD: +* +* if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), +* +* if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A contains elementary reflectors +* from DSYTRD; +* = 'L': Lower triangle of A contains elementary reflectors +* from DSYTRD. +* +* N (input) INTEGER +* The order of the matrix Q. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the vectors which define the elementary reflectors, +* as returned by DSYTRD. +* On exit, the N-by-N orthogonal matrix Q. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* TAU (input) DOUBLE PRECISION array, dimension (N-1) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by DSYTRD. +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N-1). +* For optimum performance LWORK >= (N-1)*NB, where NB is +* the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER I, IINFO, J, LWKOPT, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DORGQL, DORGQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, N-1 ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( UPPER ) THEN + NB = ILAENV( 1, 'DORGQL', ' ', N-1, N-1, N-1, -1 ) + ELSE + NB = ILAENV( 1, 'DORGQR', ' ', N-1, N-1, N-1, -1 ) + END IF + LWKOPT = MAX( 1, N-1 )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORGTR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( UPPER ) THEN +* +* Q was determined by a call to DSYTRD with UPLO = 'U' +* +* Shift the vectors which define the elementary reflectors one +* column to the left, and set the last row and column of Q to +* those of the unit matrix +* + DO 20 J = 1, N - 1 + DO 10 I = 1, J - 1 + A( I, J ) = A( I, J+1 ) + 10 CONTINUE + A( N, J ) = ZERO + 20 CONTINUE + DO 30 I = 1, N - 1 + A( I, N ) = ZERO + 30 CONTINUE + A( N, N ) = ONE +* +* Generate Q(1:n-1,1:n-1) +* + CALL DORGQL( N-1, N-1, N-1, A, LDA, TAU, WORK, LWORK, IINFO ) +* + ELSE +* +* Q was determined by a call to DSYTRD with UPLO = 'L'. +* +* Shift the vectors which define the elementary reflectors one +* column to the right, and set the first row and column of Q to +* those of the unit matrix +* + DO 50 J = N, 2, -1 + A( 1, J ) = ZERO + DO 40 I = J + 1, N + A( I, J ) = A( I, J-1 ) + 40 CONTINUE + 50 CONTINUE + A( 1, 1 ) = ONE + DO 60 I = 2, N + A( I, 1 ) = ZERO + 60 CONTINUE + IF( N.GT.1 ) THEN +* +* Generate Q(2:n,2:n) +* + CALL DORGQR( N-1, N-1, N-1, A( 2, 2 ), LDA, TAU, WORK, + $ LWORK, IINFO ) + END IF + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of DORGTR +* + END diff --git a/costa/native/external/lapack/dorm2l.f b/costa/native/external/lapack/dorm2l.f new file mode 100644 index 000000000..02b3538fd --- /dev/null +++ b/costa/native/external/lapack/dorm2l.f @@ -0,0 +1,194 @@ + SUBROUTINE DORM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DORM2L overwrites the general real m by n matrix C with +* +* Q * C if SIDE = 'L' and TRANS = 'N', or +* +* Q'* C if SIDE = 'L' and TRANS = 'T', or +* +* C * Q if SIDE = 'R' and TRANS = 'N', or +* +* C * Q' if SIDE = 'R' and TRANS = 'T', +* +* where Q is a real orthogonal matrix defined as the product of k +* elementary reflectors +* +* Q = H(k) . . . H(2) H(1) +* +* as returned by DGEQLF. Q is of order m if SIDE = 'L' and of order n +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q' from the Left +* = 'R': apply Q or Q' from the Right +* +* TRANS (input) CHARACTER*1 +* = 'N': apply Q (No transpose) +* = 'T': apply Q' (Transpose) +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* A (input) DOUBLE PRECISION array, dimension (LDA,K) +* The i-th column must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* DGEQLF in the last k columns of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* If SIDE = 'L', LDA >= max(1,M); +* if SIDE = 'R', LDA >= max(1,N). +* +* TAU (input) DOUBLE PRECISION array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by DGEQLF. +* +* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) +* On entry, the m by n matrix C. +* On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) DOUBLE PRECISION array, dimension +* (N) if SIDE = 'L', +* (M) if SIDE = 'R' +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, MI, NI, NQ + DOUBLE PRECISION AII +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLARF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORM2L', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. NOTRAN ) .OR. ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) + $ THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) is applied to C(1:m-k+i,1:n) +* + MI = M - K + I + ELSE +* +* H(i) is applied to C(1:m,1:n-k+i) +* + NI = N - K + I + END IF +* +* Apply H(i) +* + AII = A( NQ-K+I, I ) + A( NQ-K+I, I ) = ONE + CALL DLARF( SIDE, MI, NI, A( 1, I ), 1, TAU( I ), C, LDC, + $ WORK ) + A( NQ-K+I, I ) = AII + 10 CONTINUE + RETURN +* +* End of DORM2L +* + END diff --git a/costa/native/external/lapack/dorm2r.f b/costa/native/external/lapack/dorm2r.f new file mode 100644 index 000000000..d5531bc37 --- /dev/null +++ b/costa/native/external/lapack/dorm2r.f @@ -0,0 +1,198 @@ + SUBROUTINE DORM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DORM2R overwrites the general real m by n matrix C with +* +* Q * C if SIDE = 'L' and TRANS = 'N', or +* +* Q'* C if SIDE = 'L' and TRANS = 'T', or +* +* C * Q if SIDE = 'R' and TRANS = 'N', or +* +* C * Q' if SIDE = 'R' and TRANS = 'T', +* +* where Q is a real orthogonal matrix defined as the product of k +* elementary reflectors +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by DGEQRF. Q is of order m if SIDE = 'L' and of order n +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q' from the Left +* = 'R': apply Q or Q' from the Right +* +* TRANS (input) CHARACTER*1 +* = 'N': apply Q (No transpose) +* = 'T': apply Q' (Transpose) +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* A (input) DOUBLE PRECISION array, dimension (LDA,K) +* The i-th column must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* DGEQRF in the first k columns of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* If SIDE = 'L', LDA >= max(1,M); +* if SIDE = 'R', LDA >= max(1,N). +* +* TAU (input) DOUBLE PRECISION array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by DGEQRF. +* +* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) +* On entry, the m by n matrix C. +* On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) DOUBLE PRECISION array, dimension +* (N) if SIDE = 'L', +* (M) if SIDE = 'R' +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, IC, JC, MI, NI, NQ + DOUBLE PRECISION AII +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLARF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORM2R', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. ( .NOT.LEFT .AND. NOTRAN ) ) + $ THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H(i) is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H(i) +* + AII = A( I, I ) + A( I, I ) = ONE + CALL DLARF( SIDE, MI, NI, A( I, I ), 1, TAU( I ), C( IC, JC ), + $ LDC, WORK ) + A( I, I ) = AII + 10 CONTINUE + RETURN +* +* End of DORM2R +* + END diff --git a/costa/native/external/lapack/dormbr.f b/costa/native/external/lapack/dormbr.f new file mode 100644 index 000000000..40ffe480e --- /dev/null +++ b/costa/native/external/lapack/dormbr.f @@ -0,0 +1,282 @@ + SUBROUTINE DORMBR( VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C, + $ LDC, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS, VECT + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* If VECT = 'Q', DORMBR overwrites the general real M-by-N matrix C +* with +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'T': Q**T * C C * Q**T +* +* If VECT = 'P', DORMBR overwrites the general real M-by-N matrix C +* with +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': P * C C * P +* TRANS = 'T': P**T * C C * P**T +* +* Here Q and P**T are the orthogonal matrices determined by DGEBRD when +* reducing a real matrix A to bidiagonal form: A = Q * B * P**T. Q and +* P**T are defined as products of elementary reflectors H(i) and G(i) +* respectively. +* +* Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the +* order of the orthogonal matrix Q or P**T that is applied. +* +* If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: +* if nq >= k, Q = H(1) H(2) . . . H(k); +* if nq < k, Q = H(1) H(2) . . . H(nq-1). +* +* If VECT = 'P', A is assumed to have been a K-by-NQ matrix: +* if k < nq, P = G(1) G(2) . . . G(k); +* if k >= nq, P = G(1) G(2) . . . G(nq-1). +* +* Arguments +* ========= +* +* VECT (input) CHARACTER*1 +* = 'Q': apply Q or Q**T; +* = 'P': apply P or P**T. +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q, Q**T, P or P**T from the Left; +* = 'R': apply Q, Q**T, P or P**T from the Right. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q or P; +* = 'T': Transpose, apply Q**T or P**T. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* If VECT = 'Q', the number of columns in the original +* matrix reduced by DGEBRD. +* If VECT = 'P', the number of rows in the original +* matrix reduced by DGEBRD. +* K >= 0. +* +* A (input) DOUBLE PRECISION array, dimension +* (LDA,min(nq,K)) if VECT = 'Q' +* (LDA,nq) if VECT = 'P' +* The vectors which define the elementary reflectors H(i) and +* G(i), whose products determine the matrices Q and P, as +* returned by DGEBRD. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* If VECT = 'Q', LDA >= max(1,nq); +* if VECT = 'P', LDA >= max(1,min(nq,K)). +* +* TAU (input) DOUBLE PRECISION array, dimension (min(nq,K)) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i) or G(i) which determines Q or P, as returned +* by DGEBRD in the array argument TAUQ or TAUP. +* +* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q +* or P*C or P**T*C or C*P or C*P**T. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If SIDE = 'L', LWORK >= max(1,N); +* if SIDE = 'R', LWORK >= max(1,M). +* For optimum performance LWORK >= N*NB if SIDE = 'L', and +* LWORK >= M*NB if SIDE = 'R', where NB is the optimal +* blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL APPLYQ, LEFT, LQUERY, NOTRAN + CHARACTER TRANST + INTEGER I1, I2, IINFO, LWKOPT, MI, NB, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DORMLQ, DORMQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + APPLYQ = LSAME( VECT, 'Q' ) + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q or P and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.APPLYQ .AND. .NOT.LSAME( VECT, 'P' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( K.LT.0 ) THEN + INFO = -6 + ELSE IF( ( APPLYQ .AND. LDA.LT.MAX( 1, NQ ) ) .OR. + $ ( .NOT.APPLYQ .AND. LDA.LT.MAX( 1, MIN( NQ, K ) ) ) ) + $ THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( APPLYQ ) THEN + IF( LEFT ) THEN + NB = ILAENV( 1, 'DORMQR', SIDE // TRANS, M-1, N, M-1, + $ -1 ) + ELSE + NB = ILAENV( 1, 'DORMQR', SIDE // TRANS, M, N-1, N-1, + $ -1 ) + END IF + ELSE + IF( LEFT ) THEN + NB = ILAENV( 1, 'DORMLQ', SIDE // TRANS, M-1, N, M-1, + $ -1 ) + ELSE + NB = ILAENV( 1, 'DORMLQ', SIDE // TRANS, M, N-1, N-1, + $ -1 ) + END IF + END IF + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORMBR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + WORK( 1 ) = 1 + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* + IF( APPLYQ ) THEN +* +* Apply Q +* + IF( NQ.GE.K ) THEN +* +* Q was determined by a call to DGEBRD with nq >= k +* + CALL DORMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, IINFO ) + ELSE IF( NQ.GT.1 ) THEN +* +* Q was determined by a call to DGEBRD with nq < k +* + IF( LEFT ) THEN + MI = M - 1 + NI = N + I1 = 2 + I2 = 1 + ELSE + MI = M + NI = N - 1 + I1 = 1 + I2 = 2 + END IF + CALL DORMQR( SIDE, TRANS, MI, NI, NQ-1, A( 2, 1 ), LDA, TAU, + $ C( I1, I2 ), LDC, WORK, LWORK, IINFO ) + END IF + ELSE +* +* Apply P +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF + IF( NQ.GT.K ) THEN +* +* P was determined by a call to DGEBRD with nq > k +* + CALL DORMLQ( SIDE, TRANST, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, IINFO ) + ELSE IF( NQ.GT.1 ) THEN +* +* P was determined by a call to DGEBRD with nq <= k +* + IF( LEFT ) THEN + MI = M - 1 + NI = N + I1 = 2 + I2 = 1 + ELSE + MI = M + NI = N - 1 + I1 = 1 + I2 = 2 + END IF + CALL DORMLQ( SIDE, TRANST, MI, NI, NQ-1, A( 1, 2 ), LDA, + $ TAU, C( I1, I2 ), LDC, WORK, LWORK, IINFO ) + END IF + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of DORMBR +* + END diff --git a/costa/native/external/lapack/dormhr.f b/costa/native/external/lapack/dormhr.f new file mode 100644 index 000000000..ce3657145 --- /dev/null +++ b/costa/native/external/lapack/dormhr.f @@ -0,0 +1,202 @@ + SUBROUTINE DORMHR( SIDE, TRANS, M, N, ILO, IHI, A, LDA, TAU, C, + $ LDC, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER IHI, ILO, INFO, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DORMHR overwrites the general real M-by-N matrix C with +* +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'T': Q**T * C C * Q**T +* +* where Q is a real orthogonal matrix of order nq, with nq = m if +* SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of +* IHI-ILO elementary reflectors, as returned by DGEHRD: +* +* Q = H(ilo) H(ilo+1) . . . H(ihi-1). +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q**T from the Left; +* = 'R': apply Q or Q**T from the Right. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q; +* = 'T': Transpose, apply Q**T. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* ILO and IHI must have the same values as in the previous call +* of DGEHRD. Q is equal to the unit matrix except in the +* submatrix Q(ilo+1:ihi,ilo+1:ihi). +* If SIDE = 'L', then 1 <= ILO <= IHI <= M, if M > 0, and +* ILO = 1 and IHI = 0, if M = 0; +* if SIDE = 'R', then 1 <= ILO <= IHI <= N, if N > 0, and +* ILO = 1 and IHI = 0, if N = 0. +* +* A (input) DOUBLE PRECISION array, dimension +* (LDA,M) if SIDE = 'L' +* (LDA,N) if SIDE = 'R' +* The vectors which define the elementary reflectors, as +* returned by DGEHRD. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'. +* +* TAU (input) DOUBLE PRECISION array, dimension +* (M-1) if SIDE = 'L' +* (N-1) if SIDE = 'R' +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by DGEHRD. +* +* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If SIDE = 'L', LWORK >= max(1,N); +* if SIDE = 'R', LWORK >= max(1,M). +* For optimum performance LWORK >= N*NB if SIDE = 'L', and +* LWORK >= M*NB if SIDE = 'R', where NB is the optimal +* blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LEFT, LQUERY + INTEGER I1, I2, IINFO, LWKOPT, MI, NB, NH, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DORMQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NH = IHI - ILO + LEFT = LSAME( SIDE, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.LSAME( TRANS, 'T' ) ) + $ THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, NQ ) ) THEN + INFO = -5 + ELSE IF( IHI.LT.MIN( ILO, NQ ) .OR. IHI.GT.NQ ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( LEFT ) THEN + NB = ILAENV( 1, 'DORMQR', SIDE // TRANS, NH, N, NH, -1 ) + ELSE + NB = ILAENV( 1, 'DORMQR', SIDE // TRANS, M, NH, NH, -1 ) + END IF + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORMHR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. NH.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( LEFT ) THEN + MI = NH + NI = N + I1 = ILO + 1 + I2 = 1 + ELSE + MI = M + NI = NH + I1 = 1 + I2 = ILO + 1 + END IF +* + CALL DORMQR( SIDE, TRANS, MI, NI, NH, A( ILO+1, ILO ), LDA, + $ TAU( ILO ), C( I1, I2 ), LDC, WORK, LWORK, IINFO ) +* + WORK( 1 ) = LWKOPT + RETURN +* +* End of DORMHR +* + END diff --git a/costa/native/external/lapack/dorml2.f b/costa/native/external/lapack/dorml2.f new file mode 100644 index 000000000..d07f31bf9 --- /dev/null +++ b/costa/native/external/lapack/dorml2.f @@ -0,0 +1,198 @@ + SUBROUTINE DORML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DORML2 overwrites the general real m by n matrix C with +* +* Q * C if SIDE = 'L' and TRANS = 'N', or +* +* Q'* C if SIDE = 'L' and TRANS = 'T', or +* +* C * Q if SIDE = 'R' and TRANS = 'N', or +* +* C * Q' if SIDE = 'R' and TRANS = 'T', +* +* where Q is a real orthogonal matrix defined as the product of k +* elementary reflectors +* +* Q = H(k) . . . H(2) H(1) +* +* as returned by DGELQF. Q is of order m if SIDE = 'L' and of order n +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q' from the Left +* = 'R': apply Q or Q' from the Right +* +* TRANS (input) CHARACTER*1 +* = 'N': apply Q (No transpose) +* = 'T': apply Q' (Transpose) +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* A (input) DOUBLE PRECISION array, dimension +* (LDA,M) if SIDE = 'L', +* (LDA,N) if SIDE = 'R' +* The i-th row must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* DGELQF in the first k rows of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,K). +* +* TAU (input) DOUBLE PRECISION array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by DGELQF. +* +* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) +* On entry, the m by n matrix C. +* On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) DOUBLE PRECISION array, dimension +* (N) if SIDE = 'L', +* (M) if SIDE = 'R' +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, IC, JC, MI, NI, NQ + DOUBLE PRECISION AII +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLARF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORML2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. NOTRAN ) .OR. ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) + $ THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H(i) is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H(i) +* + AII = A( I, I ) + A( I, I ) = ONE + CALL DLARF( SIDE, MI, NI, A( I, I ), LDA, TAU( I ), + $ C( IC, JC ), LDC, WORK ) + A( I, I ) = AII + 10 CONTINUE + RETURN +* +* End of DORML2 +* + END diff --git a/costa/native/external/lapack/dormlq.f b/costa/native/external/lapack/dormlq.f new file mode 100644 index 000000000..e5242a568 --- /dev/null +++ b/costa/native/external/lapack/dormlq.f @@ -0,0 +1,268 @@ + SUBROUTINE DORMLQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DORMLQ overwrites the general real M-by-N matrix C with +* +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'T': Q**T * C C * Q**T +* +* where Q is a real orthogonal matrix defined as the product of k +* elementary reflectors +* +* Q = H(k) . . . H(2) H(1) +* +* as returned by DGELQF. Q is of order M if SIDE = 'L' and of order N +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q**T from the Left; +* = 'R': apply Q or Q**T from the Right. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q; +* = 'T': Transpose, apply Q**T. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* A (input) DOUBLE PRECISION array, dimension +* (LDA,M) if SIDE = 'L', +* (LDA,N) if SIDE = 'R' +* The i-th row must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* DGELQF in the first k rows of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,K). +* +* TAU (input) DOUBLE PRECISION array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by DGELQF. +* +* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If SIDE = 'L', LWORK >= max(1,N); +* if SIDE = 'R', LWORK >= max(1,M). +* For optimum performance LWORK >= N*NB if SIDE = 'L', and +* LWORK >= M*NB if SIDE = 'R', where NB is the optimal +* blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT + PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + CHARACTER TRANST + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK, + $ LWKOPT, MI, NB, NBMIN, NI, NQ, NW +* .. +* .. Local Arrays .. + DOUBLE PRECISION T( LDT, NBMAX ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DLARFB, DLARFT, DORML2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. NB may be at most NBMAX, where NBMAX +* is used to define the local array T. +* + NB = MIN( NBMAX, ILAENV( 1, 'DORMLQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORMLQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IWS = NW*NB + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DORMLQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + ELSE + IWS = NW + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL DORML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, + $ IINFO ) + ELSE +* +* Use blocked code +* + IF( ( LEFT .AND. NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL DLARFT( 'Forward', 'Rowwise', NQ-I+1, IB, A( I, I ), + $ LDA, TAU( I ), T, LDT ) + IF( LEFT ) THEN +* +* H or H' is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H or H' is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H or H' +* + CALL DLARFB( SIDE, TRANST, 'Forward', 'Rowwise', MI, NI, IB, + $ A( I, I ), LDA, T, LDT, C( IC, JC ), LDC, WORK, + $ LDWORK ) + 10 CONTINUE + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of DORMLQ +* + END diff --git a/costa/native/external/lapack/dormql.f b/costa/native/external/lapack/dormql.f new file mode 100644 index 000000000..f6a00c2af --- /dev/null +++ b/costa/native/external/lapack/dormql.f @@ -0,0 +1,257 @@ + SUBROUTINE DORMQL( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DORMQL overwrites the general real M-by-N matrix C with +* +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'T': Q**T * C C * Q**T +* +* where Q is a real orthogonal matrix defined as the product of k +* elementary reflectors +* +* Q = H(k) . . . H(2) H(1) +* +* as returned by DGEQLF. Q is of order M if SIDE = 'L' and of order N +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q**T from the Left; +* = 'R': apply Q or Q**T from the Right. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q; +* = 'T': Transpose, apply Q**T. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* A (input) DOUBLE PRECISION array, dimension (LDA,K) +* The i-th column must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* DGEQLF in the last k columns of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* If SIDE = 'L', LDA >= max(1,M); +* if SIDE = 'R', LDA >= max(1,N). +* +* TAU (input) DOUBLE PRECISION array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by DGEQLF. +* +* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If SIDE = 'L', LWORK >= max(1,N); +* if SIDE = 'R', LWORK >= max(1,M). +* For optimum performance LWORK >= N*NB if SIDE = 'L', and +* LWORK >= M*NB if SIDE = 'R', where NB is the optimal +* blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT + PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + INTEGER I, I1, I2, I3, IB, IINFO, IWS, LDWORK, LWKOPT, + $ MI, NB, NBMIN, NI, NQ, NW +* .. +* .. Local Arrays .. + DOUBLE PRECISION T( LDT, NBMAX ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DLARFB, DLARFT, DORM2L, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. NB may be at most NBMAX, where NBMAX +* is used to define the local array T. +* + NB = MIN( NBMAX, ILAENV( 1, 'DORMQL', SIDE // TRANS, M, N, K, + $ -1 ) ) + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORMQL', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IWS = NW*NB + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DORMQL', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + ELSE + IWS = NW + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL DORM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, + $ IINFO ) + ELSE +* +* Use blocked code +* + IF( ( LEFT .AND. NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL DLARFT( 'Backward', 'Columnwise', NQ-K+I+IB-1, IB, + $ A( 1, I ), LDA, TAU( I ), T, LDT ) + IF( LEFT ) THEN +* +* H or H' is applied to C(1:m-k+i+ib-1,1:n) +* + MI = M - K + I + IB - 1 + ELSE +* +* H or H' is applied to C(1:m,1:n-k+i+ib-1) +* + NI = N - K + I + IB - 1 + END IF +* +* Apply H or H' +* + CALL DLARFB( SIDE, TRANS, 'Backward', 'Columnwise', MI, NI, + $ IB, A( 1, I ), LDA, T, LDT, C, LDC, WORK, + $ LDWORK ) + 10 CONTINUE + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of DORMQL +* + END diff --git a/costa/native/external/lapack/dormqr.f b/costa/native/external/lapack/dormqr.f new file mode 100644 index 000000000..57315ae60 --- /dev/null +++ b/costa/native/external/lapack/dormqr.f @@ -0,0 +1,261 @@ + SUBROUTINE DORMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DORMQR overwrites the general real M-by-N matrix C with +* +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'T': Q**T * C C * Q**T +* +* where Q is a real orthogonal matrix defined as the product of k +* elementary reflectors +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by DGEQRF. Q is of order M if SIDE = 'L' and of order N +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q**T from the Left; +* = 'R': apply Q or Q**T from the Right. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q; +* = 'T': Transpose, apply Q**T. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* A (input) DOUBLE PRECISION array, dimension (LDA,K) +* The i-th column must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* DGEQRF in the first k columns of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* If SIDE = 'L', LDA >= max(1,M); +* if SIDE = 'R', LDA >= max(1,N). +* +* TAU (input) DOUBLE PRECISION array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by DGEQRF. +* +* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If SIDE = 'L', LWORK >= max(1,N); +* if SIDE = 'R', LWORK >= max(1,M). +* For optimum performance LWORK >= N*NB if SIDE = 'L', and +* LWORK >= M*NB if SIDE = 'R', where NB is the optimal +* blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT + PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK, + $ LWKOPT, MI, NB, NBMIN, NI, NQ, NW +* .. +* .. Local Arrays .. + DOUBLE PRECISION T( LDT, NBMAX ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DLARFB, DLARFT, DORM2R, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. NB may be at most NBMAX, where NBMAX +* is used to define the local array T. +* + NB = MIN( NBMAX, ILAENV( 1, 'DORMQR', SIDE // TRANS, M, N, K, + $ -1 ) ) + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORMQR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IWS = NW*NB + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DORMQR', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + ELSE + IWS = NW + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL DORM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, + $ IINFO ) + ELSE +* +* Use blocked code +* + IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL DLARFT( 'Forward', 'Columnwise', NQ-I+1, IB, A( I, I ), + $ LDA, TAU( I ), T, LDT ) + IF( LEFT ) THEN +* +* H or H' is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H or H' is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H or H' +* + CALL DLARFB( SIDE, TRANS, 'Forward', 'Columnwise', MI, NI, + $ IB, A( I, I ), LDA, T, LDT, C( IC, JC ), LDC, + $ WORK, LDWORK ) + 10 CONTINUE + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of DORMQR +* + END diff --git a/costa/native/external/lapack/dormr2.f b/costa/native/external/lapack/dormr2.f new file mode 100644 index 000000000..13beef049 --- /dev/null +++ b/costa/native/external/lapack/dormr2.f @@ -0,0 +1,194 @@ + SUBROUTINE DORMR2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DORMR2 overwrites the general real m by n matrix C with +* +* Q * C if SIDE = 'L' and TRANS = 'N', or +* +* Q'* C if SIDE = 'L' and TRANS = 'T', or +* +* C * Q if SIDE = 'R' and TRANS = 'N', or +* +* C * Q' if SIDE = 'R' and TRANS = 'T', +* +* where Q is a real orthogonal matrix defined as the product of k +* elementary reflectors +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by DGERQF. Q is of order m if SIDE = 'L' and of order n +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q' from the Left +* = 'R': apply Q or Q' from the Right +* +* TRANS (input) CHARACTER*1 +* = 'N': apply Q (No transpose) +* = 'T': apply Q' (Transpose) +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* A (input) DOUBLE PRECISION array, dimension +* (LDA,M) if SIDE = 'L', +* (LDA,N) if SIDE = 'R' +* The i-th row must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* DGERQF in the last k rows of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,K). +* +* TAU (input) DOUBLE PRECISION array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by DGERQF. +* +* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) +* On entry, the m by n matrix C. +* On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) DOUBLE PRECISION array, dimension +* (N) if SIDE = 'L', +* (M) if SIDE = 'R' +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, MI, NI, NQ + DOUBLE PRECISION AII +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLARF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORMR2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. ( .NOT.LEFT .AND. NOTRAN ) ) + $ THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) is applied to C(1:m-k+i,1:n) +* + MI = M - K + I + ELSE +* +* H(i) is applied to C(1:m,1:n-k+i) +* + NI = N - K + I + END IF +* +* Apply H(i) +* + AII = A( I, NQ-K+I ) + A( I, NQ-K+I ) = ONE + CALL DLARF( SIDE, MI, NI, A( I, 1 ), LDA, TAU( I ), C, LDC, + $ WORK ) + A( I, NQ-K+I ) = AII + 10 CONTINUE + RETURN +* +* End of DORMR2 +* + END diff --git a/costa/native/external/lapack/dormr3.f b/costa/native/external/lapack/dormr3.f new file mode 100644 index 000000000..82ff9a872 --- /dev/null +++ b/costa/native/external/lapack/dormr3.f @@ -0,0 +1,207 @@ + SUBROUTINE DORMR3( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, L, LDA, LDC, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DORMR3 overwrites the general real m by n matrix C with +* +* Q * C if SIDE = 'L' and TRANS = 'N', or +* +* Q'* C if SIDE = 'L' and TRANS = 'T', or +* +* C * Q if SIDE = 'R' and TRANS = 'N', or +* +* C * Q' if SIDE = 'R' and TRANS = 'T', +* +* where Q is a real orthogonal matrix defined as the product of k +* elementary reflectors +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by DTZRZF. Q is of order m if SIDE = 'L' and of order n +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q' from the Left +* = 'R': apply Q or Q' from the Right +* +* TRANS (input) CHARACTER*1 +* = 'N': apply Q (No transpose) +* = 'T': apply Q' (Transpose) +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* L (input) INTEGER +* The number of columns of the matrix A containing +* the meaningful part of the Householder reflectors. +* If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. +* +* A (input) DOUBLE PRECISION array, dimension +* (LDA,M) if SIDE = 'L', +* (LDA,N) if SIDE = 'R' +* The i-th row must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* DTZRZF in the last k rows of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,K). +* +* TAU (input) DOUBLE PRECISION array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by DTZRZF. +* +* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) +* On entry, the m-by-n matrix C. +* On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) DOUBLE PRECISION array, dimension +* (N) if SIDE = 'L', +* (M) if SIDE = 'R' +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, IC, JA, JC, MI, NI, NQ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLARZ, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( L.LT.0 .OR. ( LEFT .AND. ( L.GT.M ) ) .OR. + $ ( .NOT.LEFT .AND. ( L.GT.N ) ) ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORMR3', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. .NOT.NOTRAN .OR. .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + JA = M - L + 1 + JC = 1 + ELSE + MI = M + JA = N - L + 1 + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) or H(i)' is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H(i) or H(i)' is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H(i) or H(i)' +* + CALL DLARZ( SIDE, MI, NI, L, A( I, JA ), LDA, TAU( I ), + $ C( IC, JC ), LDC, WORK ) +* + 10 CONTINUE +* + RETURN +* +* End of DORMR3 +* + END diff --git a/costa/native/external/lapack/dormrq.f b/costa/native/external/lapack/dormrq.f new file mode 100644 index 000000000..56972ff55 --- /dev/null +++ b/costa/native/external/lapack/dormrq.f @@ -0,0 +1,264 @@ + SUBROUTINE DORMRQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DORMRQ overwrites the general real M-by-N matrix C with +* +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'T': Q**T * C C * Q**T +* +* where Q is a real orthogonal matrix defined as the product of k +* elementary reflectors +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by DGERQF. Q is of order M if SIDE = 'L' and of order N +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q**T from the Left; +* = 'R': apply Q or Q**T from the Right. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q; +* = 'T': Transpose, apply Q**T. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* A (input) DOUBLE PRECISION array, dimension +* (LDA,M) if SIDE = 'L', +* (LDA,N) if SIDE = 'R' +* The i-th row must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* DGERQF in the last k rows of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,K). +* +* TAU (input) DOUBLE PRECISION array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by DGERQF. +* +* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If SIDE = 'L', LWORK >= max(1,N); +* if SIDE = 'R', LWORK >= max(1,M). +* For optimum performance LWORK >= N*NB if SIDE = 'L', and +* LWORK >= M*NB if SIDE = 'R', where NB is the optimal +* blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT + PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + CHARACTER TRANST + INTEGER I, I1, I2, I3, IB, IINFO, IWS, LDWORK, LWKOPT, + $ MI, NB, NBMIN, NI, NQ, NW +* .. +* .. Local Arrays .. + DOUBLE PRECISION T( LDT, NBMAX ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DLARFB, DLARFT, DORMR2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. NB may be at most NBMAX, where NBMAX +* is used to define the local array T. +* + NB = MIN( NBMAX, ILAENV( 1, 'DORMRQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORMRQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IWS = NW*NB + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DORMRQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + ELSE + IWS = NW + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL DORMR2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, + $ IINFO ) + ELSE +* +* Use blocked code +* + IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL DLARFT( 'Backward', 'Rowwise', NQ-K+I+IB-1, IB, + $ A( I, 1 ), LDA, TAU( I ), T, LDT ) + IF( LEFT ) THEN +* +* H or H' is applied to C(1:m-k+i+ib-1,1:n) +* + MI = M - K + I + IB - 1 + ELSE +* +* H or H' is applied to C(1:m,1:n-k+i+ib-1) +* + NI = N - K + I + IB - 1 + END IF +* +* Apply H or H' +* + CALL DLARFB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI, + $ IB, A( I, 1 ), LDA, T, LDT, C, LDC, WORK, + $ LDWORK ) + 10 CONTINUE + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of DORMRQ +* + END diff --git a/costa/native/external/lapack/dormrz.f b/costa/native/external/lapack/dormrz.f new file mode 100644 index 000000000..b1f329cba --- /dev/null +++ b/costa/native/external/lapack/dormrz.f @@ -0,0 +1,288 @@ + SUBROUTINE DORMRZ( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, L, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DORMRZ overwrites the general real M-by-N matrix C with +* +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'T': Q**T * C C * Q**T +* +* where Q is a real orthogonal matrix defined as the product of k +* elementary reflectors +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by DTZRZF. Q is of order M if SIDE = 'L' and of order N +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q**T from the Left; +* = 'R': apply Q or Q**T from the Right. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q; +* = 'T': Transpose, apply Q**T. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* L (input) INTEGER +* The number of columns of the matrix A containing +* the meaningful part of the Householder reflectors. +* If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. +* +* A (input) DOUBLE PRECISION array, dimension +* (LDA,M) if SIDE = 'L', +* (LDA,N) if SIDE = 'R' +* The i-th row must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* DTZRZF in the last k rows of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,K). +* +* TAU (input) DOUBLE PRECISION array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by DTZRZF. +* +* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If SIDE = 'L', LWORK >= max(1,N); +* if SIDE = 'R', LWORK >= max(1,M). +* For optimum performance LWORK >= N*NB if SIDE = 'L', and +* LWORK >= M*NB if SIDE = 'R', where NB is the optimal +* blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT + PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + CHARACTER TRANST + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JA, JC, + $ LDWORK, LWKOPT, MI, NB, NBMIN, NI, NQ, NW +* .. +* .. Local Arrays .. + DOUBLE PRECISION T( LDT, NBMAX ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DLARZB, DLARZT, DORMR3, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( L.LT.0 .OR. ( LEFT .AND. ( L.GT.M ) ) .OR. + $ ( .NOT.LEFT .AND. ( L.GT.N ) ) ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. NB may be at most NBMAX, where NBMAX +* is used to define the local array T. +* + NB = MIN( NBMAX, ILAENV( 1, 'DORMRQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORMRZ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IWS = NW*NB + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DORMRQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + ELSE + IWS = NW + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL DORMR3( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC, + $ WORK, IINFO ) + ELSE +* +* Use blocked code +* + IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + JA = M - L + 1 + ELSE + MI = M + IC = 1 + JA = N - L + 1 + END IF +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL DLARZT( 'Backward', 'Rowwise', L, IB, A( I, JA ), LDA, + $ TAU( I ), T, LDT ) +* + IF( LEFT ) THEN +* +* H or H' is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H or H' is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H or H' +* + CALL DLARZB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI, + $ IB, L, A( I, JA ), LDA, T, LDT, C( IC, JC ), + $ LDC, WORK, LDWORK ) + 10 CONTINUE +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of DORMRZ +* + END diff --git a/costa/native/external/lapack/dormtr.f b/costa/native/external/lapack/dormtr.f new file mode 100644 index 000000000..995754bdf --- /dev/null +++ b/costa/native/external/lapack/dormtr.f @@ -0,0 +1,223 @@ + SUBROUTINE DORMTR( SIDE, UPLO, TRANS, M, N, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS, UPLO + INTEGER INFO, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DORMTR overwrites the general real M-by-N matrix C with +* +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'T': Q**T * C C * Q**T +* +* where Q is a real orthogonal matrix of order nq, with nq = m if +* SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of +* nq-1 elementary reflectors, as returned by DSYTRD: +* +* if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); +* +* if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q**T from the Left; +* = 'R': apply Q or Q**T from the Right. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A contains elementary reflectors +* from DSYTRD; +* = 'L': Lower triangle of A contains elementary reflectors +* from DSYTRD. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q; +* = 'T': Transpose, apply Q**T. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* A (input) DOUBLE PRECISION array, dimension +* (LDA,M) if SIDE = 'L' +* (LDA,N) if SIDE = 'R' +* The vectors which define the elementary reflectors, as +* returned by DSYTRD. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'. +* +* TAU (input) DOUBLE PRECISION array, dimension +* (M-1) if SIDE = 'L' +* (N-1) if SIDE = 'R' +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by DSYTRD. +* +* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If SIDE = 'L', LWORK >= max(1,N); +* if SIDE = 'R', LWORK >= max(1,M). +* For optimum performance LWORK >= N*NB if SIDE = 'L', and +* LWORK >= M*NB if SIDE = 'R', where NB is the optimal +* blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, UPPER + INTEGER I1, I2, IINFO, LWKOPT, MI, NB, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DORMQL, DORMQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.LSAME( TRANS, 'T' ) ) + $ THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( UPPER ) THEN + IF( LEFT ) THEN + NB = ILAENV( 1, 'DORMQL', SIDE // TRANS, M-1, N, M-1, + $ -1 ) + ELSE + NB = ILAENV( 1, 'DORMQL', SIDE // TRANS, M, N-1, N-1, + $ -1 ) + END IF + ELSE + IF( LEFT ) THEN + NB = ILAENV( 1, 'DORMQR', SIDE // TRANS, M-1, N, M-1, + $ -1 ) + ELSE + NB = ILAENV( 1, 'DORMQR', SIDE // TRANS, M, N-1, N-1, + $ -1 ) + END IF + END IF + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORMTR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. NQ.EQ.1 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( LEFT ) THEN + MI = M - 1 + NI = N + ELSE + MI = M + NI = N - 1 + END IF +* + IF( UPPER ) THEN +* +* Q was determined by a call to DSYTRD with UPLO = 'U' +* + CALL DORMQL( SIDE, TRANS, MI, NI, NQ-1, A( 1, 2 ), LDA, TAU, C, + $ LDC, WORK, LWORK, IINFO ) + ELSE +* +* Q was determined by a call to DSYTRD with UPLO = 'L' +* + IF( LEFT ) THEN + I1 = 2 + I2 = 1 + ELSE + I1 = 1 + I2 = 2 + END IF + CALL DORMQR( SIDE, TRANS, MI, NI, NQ-1, A( 2, 1 ), LDA, TAU, + $ C( I1, I2 ), LDC, WORK, LWORK, IINFO ) + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of DORMTR +* + END diff --git a/costa/native/external/lapack/dpbcon.f b/costa/native/external/lapack/dpbcon.f new file mode 100644 index 000000000..d8ce1ef2c --- /dev/null +++ b/costa/native/external/lapack/dpbcon.f @@ -0,0 +1,188 @@ + SUBROUTINE DPBCON( UPLO, N, KD, AB, LDAB, ANORM, RCOND, WORK, + $ IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION AB( LDAB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DPBCON estimates the reciprocal of the condition number (in the +* 1-norm) of a real symmetric positive definite band matrix using the +* Cholesky factorization A = U**T*U or A = L*L**T computed by DPBTRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangular factor stored in AB; +* = 'L': Lower triangular factor stored in AB. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) +* The triangular factor U or L from the Cholesky factorization +* A = U**T*U or A = L*L**T of the band matrix A, stored in the +* first KD+1 rows of the array. The j-th column of U or L is +* stored in the j-th column of the array AB as follows: +* if UPLO ='U', AB(kd+1+i-j,j) = U(i,j) for max(1,j-kd)<=i<=j; +* if UPLO ='L', AB(1+i-j,j) = L(i,j) for j<=i<=min(n,j+kd). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* ANORM (input) DOUBLE PRECISION +* The 1-norm (or infinity-norm) of the symmetric band matrix A. +* +* RCOND (output) DOUBLE PRECISION +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +* estimate of the 1-norm of inv(A) computed in this routine. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + CHARACTER NORMIN + INTEGER IX, KASE + DOUBLE PRECISION AINVNM, SCALE, SCALEL, SCALEU, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, IDAMAX, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DLACON, DLATBS, DRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPBCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = DLAMCH( 'Safe minimum' ) +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + NORMIN = 'N' + 10 CONTINUE + CALL DLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN + IF( UPPER ) THEN +* +* Multiply by inv(U'). +* + CALL DLATBS( 'Upper', 'Transpose', 'Non-unit', NORMIN, N, + $ KD, AB, LDAB, WORK, SCALEL, WORK( 2*N+1 ), + $ INFO ) + NORMIN = 'Y' +* +* Multiply by inv(U). +* + CALL DLATBS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ KD, AB, LDAB, WORK, SCALEU, WORK( 2*N+1 ), + $ INFO ) + ELSE +* +* Multiply by inv(L). +* + CALL DLATBS( 'Lower', 'No transpose', 'Non-unit', NORMIN, N, + $ KD, AB, LDAB, WORK, SCALEL, WORK( 2*N+1 ), + $ INFO ) + NORMIN = 'Y' +* +* Multiply by inv(L'). +* + CALL DLATBS( 'Lower', 'Transpose', 'Non-unit', NORMIN, N, + $ KD, AB, LDAB, WORK, SCALEU, WORK( 2*N+1 ), + $ INFO ) + END IF +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + SCALE = SCALEL*SCALEU + IF( SCALE.NE.ONE ) THEN + IX = IDAMAX( N, WORK, 1 ) + IF( SCALE.LT.ABS( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL DRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE +* + RETURN +* +* End of DPBCON +* + END diff --git a/costa/native/external/lapack/dpbequ.f b/costa/native/external/lapack/dpbequ.f new file mode 100644 index 000000000..b82fbf543 --- /dev/null +++ b/costa/native/external/lapack/dpbequ.f @@ -0,0 +1,167 @@ + SUBROUTINE DPBEQU( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N + DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ), S( * ) +* .. +* +* Purpose +* ======= +* +* DPBEQU computes row and column scalings intended to equilibrate a +* symmetric positive definite band matrix A and reduce its condition +* number (with respect to the two-norm). S contains the scale factors, +* S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with +* elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This +* choice of S puts the condition number of B within a factor N of the +* smallest possible condition number over all possible diagonal +* scalings. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangular of A is stored; +* = 'L': Lower triangular of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) +* The upper or lower triangle of the symmetric band matrix A, +* stored in the first KD+1 rows of the array. The j-th column +* of A is stored in the j-th column of the array AB as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* LDAB (input) INTEGER +* The leading dimension of the array A. LDAB >= KD+1. +* +* S (output) DOUBLE PRECISION array, dimension (N) +* If INFO = 0, S contains the scale factors for A. +* +* SCOND (output) DOUBLE PRECISION +* If INFO = 0, S contains the ratio of the smallest S(i) to +* the largest S(i). If SCOND >= 0.1 and AMAX is neither too +* large nor too small, it is not worth scaling by S. +* +* AMAX (output) DOUBLE PRECISION +* Absolute value of largest matrix element. If AMAX is very +* close to overflow or very close to underflow, the matrix +* should be scaled. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, the i-th diagonal element is nonpositive. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, J + DOUBLE PRECISION SMIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPBEQU', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SCOND = ONE + AMAX = ZERO + RETURN + END IF +* + IF( UPPER ) THEN + J = KD + 1 + ELSE + J = 1 + END IF +* +* Initialize SMIN and AMAX. +* + S( 1 ) = AB( J, 1 ) + SMIN = S( 1 ) + AMAX = S( 1 ) +* +* Find the minimum and maximum diagonal elements. +* + DO 10 I = 2, N + S( I ) = AB( J, I ) + SMIN = MIN( SMIN, S( I ) ) + AMAX = MAX( AMAX, S( I ) ) + 10 CONTINUE +* + IF( SMIN.LE.ZERO ) THEN +* +* Find the first non-positive diagonal element and return. +* + DO 20 I = 1, N + IF( S( I ).LE.ZERO ) THEN + INFO = I + RETURN + END IF + 20 CONTINUE + ELSE +* +* Set the scale factors to the reciprocals +* of the diagonal elements. +* + DO 30 I = 1, N + S( I ) = ONE / SQRT( S( I ) ) + 30 CONTINUE +* +* Compute SCOND = min(S(I)) / max(S(I)) +* + SCOND = SQRT( SMIN ) / SQRT( AMAX ) + END IF + RETURN +* +* End of DPBEQU +* + END diff --git a/costa/native/external/lapack/dpbrfs.f b/costa/native/external/lapack/dpbrfs.f new file mode 100644 index 000000000..62d747282 --- /dev/null +++ b/costa/native/external/lapack/dpbrfs.f @@ -0,0 +1,337 @@ + SUBROUTINE DPBRFS( UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB, B, + $ LDB, X, LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, LDAFB, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* DPBRFS improves the computed solution to a system of linear +* equations when the coefficient matrix is symmetric positive definite +* and banded, and provides error bounds and backward error estimates +* for the solution. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) +* The upper or lower triangle of the symmetric band matrix A, +* stored in the first KD+1 rows of the array. The j-th column +* of A is stored in the j-th column of the array AB as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* AFB (input) DOUBLE PRECISION array, dimension (LDAFB,N) +* The triangular factor U or L from the Cholesky factorization +* A = U**T*U or A = L*L**T of the band matrix A as computed by +* DPBTRF, in the same storage format as A (see AB). +* +* LDAFB (input) INTEGER +* The leading dimension of the array AFB. LDAFB >= KD+1. +* +* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) +* On entry, the solution matrix X, as computed by DPBTRS. +* On exit, the improved solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Internal Parameters +* =================== +* +* ITMAX is the maximum number of steps of iterative refinement. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) + DOUBLE PRECISION THREE + PARAMETER ( THREE = 3.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, J, K, KASE, L, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DLACON, DPBTRS, DSBMV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDAFB.LT.KD+1 ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPBRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = MIN( N+1, 2*KD+2 ) + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL DCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 ) + CALL DSBMV( UPLO, N, KD, -ONE, AB, LDAB, X( 1, J ), 1, ONE, + $ WORK( N+1 ), 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + WORK( I ) = ABS( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = ABS( X( K, J ) ) + L = KD + 1 - K + DO 40 I = MAX( 1, K-KD ), K - 1 + WORK( I ) = WORK( I ) + ABS( AB( L+I, K ) )*XK + S = S + ABS( AB( L+I, K ) )*ABS( X( I, J ) ) + 40 CONTINUE + WORK( K ) = WORK( K ) + ABS( AB( KD+1, K ) )*XK + S + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = ABS( X( K, J ) ) + WORK( K ) = WORK( K ) + ABS( AB( 1, K ) )*XK + L = 1 - K + DO 60 I = K + 1, MIN( N, K+KD ) + WORK( I ) = WORK( I ) + ABS( AB( L+I, K ) )*XK + S = S + ABS( AB( L+I, K ) )*ABS( X( I, J ) ) + 60 CONTINUE + WORK( K ) = WORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL DPBTRS( UPLO, N, KD, 1, AFB, LDAFB, WORK( N+1 ), N, + $ INFO ) + CALL DAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use DLACON to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL DLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A'). +* + CALL DPBTRS( UPLO, N, KD, 1, AFB, LDAFB, WORK( N+1 ), N, + $ INFO ) + DO 110 I = 1, N + WORK( N+I ) = WORK( N+I )*WORK( I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( N+I ) = WORK( N+I )*WORK( I ) + 120 CONTINUE + CALL DPBTRS( UPLO, N, KD, 1, AFB, LDAFB, WORK( N+1 ), N, + $ INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of DPBRFS +* + END diff --git a/costa/native/external/lapack/dpbstf.f b/costa/native/external/lapack/dpbstf.f new file mode 100644 index 000000000..f88ffefa1 --- /dev/null +++ b/costa/native/external/lapack/dpbstf.f @@ -0,0 +1,251 @@ + SUBROUTINE DPBSTF( UPLO, N, KD, AB, LDAB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* DPBSTF computes a split Cholesky factorization of a real +* symmetric positive definite band matrix A. +* +* This routine is designed to be used in conjunction with DSBGST. +* +* The factorization has the form A = S**T*S where S is a band matrix +* of the same bandwidth as A and the following structure: +* +* S = ( U ) +* ( M L ) +* +* where U is upper triangular of order m = (n+kd)/2, and L is lower +* triangular of order n-m. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) +* On entry, the upper or lower triangle of the symmetric band +* matrix A, stored in the first kd+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* On exit, if INFO = 0, the factor S from the split Cholesky +* factorization A = S**T*S. See Further Details. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the factorization could not be completed, +* because the updated element a(i,i) was negative; the +* matrix A is not positive definite. +* +* Further Details +* =============== +* +* The band storage scheme is illustrated by the following example, when +* N = 7, KD = 2: +* +* S = ( s11 s12 s13 ) +* ( s22 s23 s24 ) +* ( s33 s34 ) +* ( s44 ) +* ( s53 s54 s55 ) +* ( s64 s65 s66 ) +* ( s75 s76 s77 ) +* +* If UPLO = 'U', the array AB holds: +* +* on entry: on exit: +* +* * * a13 a24 a35 a46 a57 * * s13 s24 s53 s64 s75 +* * a12 a23 a34 a45 a56 a67 * s12 s23 s34 s54 s65 s76 +* a11 a22 a33 a44 a55 a66 a77 s11 s22 s33 s44 s55 s66 s77 +* +* If UPLO = 'L', the array AB holds: +* +* on entry: on exit: +* +* a11 a22 a33 a44 a55 a66 a77 s11 s22 s33 s44 s55 s66 s77 +* a21 a32 a43 a54 a65 a76 * s12 s23 s34 s54 s65 s76 * +* a31 a42 a53 a64 a64 * * s13 s24 s53 s64 s75 * * +* +* Array elements marked * are not used by the routine. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, KLD, KM, M + DOUBLE PRECISION AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSYR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPBSTF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + KLD = MAX( 1, LDAB-1 ) +* +* Set the splitting point m. +* + M = ( N+KD ) / 2 +* + IF( UPPER ) THEN +* +* Factorize A(m+1:n,m+1:n) as L**T*L, and update A(1:m,1:m). +* + DO 10 J = N, M + 1, -1 +* +* Compute s(j,j) and test for non-positive-definiteness. +* + AJJ = AB( KD+1, J ) + IF( AJJ.LE.ZERO ) + $ GO TO 50 + AJJ = SQRT( AJJ ) + AB( KD+1, J ) = AJJ + KM = MIN( J-1, KD ) +* +* Compute elements j-km:j-1 of the j-th column and update the +* the leading submatrix within the band. +* + CALL DSCAL( KM, ONE / AJJ, AB( KD+1-KM, J ), 1 ) + CALL DSYR( 'Upper', KM, -ONE, AB( KD+1-KM, J ), 1, + $ AB( KD+1, J-KM ), KLD ) + 10 CONTINUE +* +* Factorize the updated submatrix A(1:m,1:m) as U**T*U. +* + DO 20 J = 1, M +* +* Compute s(j,j) and test for non-positive-definiteness. +* + AJJ = AB( KD+1, J ) + IF( AJJ.LE.ZERO ) + $ GO TO 50 + AJJ = SQRT( AJJ ) + AB( KD+1, J ) = AJJ + KM = MIN( KD, M-J ) +* +* Compute elements j+1:j+km of the j-th row and update the +* trailing submatrix within the band. +* + IF( KM.GT.0 ) THEN + CALL DSCAL( KM, ONE / AJJ, AB( KD, J+1 ), KLD ) + CALL DSYR( 'Upper', KM, -ONE, AB( KD, J+1 ), KLD, + $ AB( KD+1, J+1 ), KLD ) + END IF + 20 CONTINUE + ELSE +* +* Factorize A(m+1:n,m+1:n) as L**T*L, and update A(1:m,1:m). +* + DO 30 J = N, M + 1, -1 +* +* Compute s(j,j) and test for non-positive-definiteness. +* + AJJ = AB( 1, J ) + IF( AJJ.LE.ZERO ) + $ GO TO 50 + AJJ = SQRT( AJJ ) + AB( 1, J ) = AJJ + KM = MIN( J-1, KD ) +* +* Compute elements j-km:j-1 of the j-th row and update the +* trailing submatrix within the band. +* + CALL DSCAL( KM, ONE / AJJ, AB( KM+1, J-KM ), KLD ) + CALL DSYR( 'Lower', KM, -ONE, AB( KM+1, J-KM ), KLD, + $ AB( 1, J-KM ), KLD ) + 30 CONTINUE +* +* Factorize the updated submatrix A(1:m,1:m) as U**T*U. +* + DO 40 J = 1, M +* +* Compute s(j,j) and test for non-positive-definiteness. +* + AJJ = AB( 1, J ) + IF( AJJ.LE.ZERO ) + $ GO TO 50 + AJJ = SQRT( AJJ ) + AB( 1, J ) = AJJ + KM = MIN( KD, M-J ) +* +* Compute elements j+1:j+km of the j-th column and update the +* trailing submatrix within the band. +* + IF( KM.GT.0 ) THEN + CALL DSCAL( KM, ONE / AJJ, AB( 2, J ), 1 ) + CALL DSYR( 'Lower', KM, -ONE, AB( 2, J ), 1, + $ AB( 1, J+1 ), KLD ) + END IF + 40 CONTINUE + END IF + RETURN +* + 50 CONTINUE + INFO = J + RETURN +* +* End of DPBSTF +* + END diff --git a/costa/native/external/lapack/dpbsv.f b/costa/native/external/lapack/dpbsv.f new file mode 100644 index 000000000..79ddd4387 --- /dev/null +++ b/costa/native/external/lapack/dpbsv.f @@ -0,0 +1,152 @@ + SUBROUTINE DPBSV( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* DPBSV computes the solution to a real system of linear equations +* A * X = B, +* where A is an N-by-N symmetric positive definite band matrix and X +* and B are N-by-NRHS matrices. +* +* The Cholesky decomposition is used to factor A as +* A = U**T * U, if UPLO = 'U', or +* A = L * L**T, if UPLO = 'L', +* where U is an upper triangular band matrix, and L is a lower +* triangular band matrix, with the same number of superdiagonals or +* subdiagonals as A. The factored form of A is then used to solve the +* system of equations A * X = B. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) +* On entry, the upper or lower triangle of the symmetric band +* matrix A, stored in the first KD+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(KD+1+i-j,j) = A(i,j) for max(1,j-KD)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(N,j+KD). +* See below for further details. +* +* On exit, if INFO = 0, the triangular factor U or L from the +* Cholesky factorization A = U**T*U or A = L*L**T of the band +* matrix A, in the same storage format as A. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the leading minor of order i of A is not +* positive definite, so the factorization could not be +* completed, and the solution has not been computed. +* +* Further Details +* =============== +* +* The band storage scheme is illustrated by the following example, when +* N = 6, KD = 2, and UPLO = 'U': +* +* On entry: On exit: +* +* * * a13 a24 a35 a46 * * u13 u24 u35 u46 +* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +* +* Similarly, if UPLO = 'L' the format of A is as follows: +* +* On entry: On exit: +* +* a11 a22 a33 a44 a55 a66 l11 l22 l33 l44 l55 l66 +* a21 a32 a43 a54 a65 * l21 l32 l43 l54 l65 * +* a31 a42 a53 a64 * * l31 l42 l53 l64 * * +* +* Array elements marked * are not used by the routine. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DPBTRF, DPBTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPBSV ', -INFO ) + RETURN + END IF +* +* Compute the Cholesky factorization A = U'*U or A = L*L'. +* + CALL DPBTRF( UPLO, N, KD, AB, LDAB, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL DPBTRS( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO ) +* + END IF + RETURN +* +* End of DPBSV +* + END diff --git a/costa/native/external/lapack/dpbsvx.f b/costa/native/external/lapack/dpbsvx.f new file mode 100644 index 000000000..66406ce70 --- /dev/null +++ b/costa/native/external/lapack/dpbsvx.f @@ -0,0 +1,424 @@ + SUBROUTINE DPBSVX( FACT, UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB, + $ EQUED, S, B, LDB, X, LDX, RCOND, FERR, BERR, + $ WORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, UPLO + INTEGER INFO, KD, LDAB, LDAFB, LDB, LDX, N, NRHS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ BERR( * ), FERR( * ), S( * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* DPBSVX uses the Cholesky factorization A = U**T*U or A = L*L**T to +* compute the solution to a real system of linear equations +* A * X = B, +* where A is an N-by-N symmetric positive definite band matrix and X +* and B are N-by-NRHS matrices. +* +* Error bounds on the solution and a condition estimate are also +* provided. +* +* Description +* =========== +* +* The following steps are performed: +* +* 1. If FACT = 'E', real scaling factors are computed to equilibrate +* the system: +* diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B +* Whether or not the system will be equilibrated depends on the +* scaling of the matrix A, but if equilibration is used, A is +* overwritten by diag(S)*A*diag(S) and B by diag(S)*B. +* +* 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to +* factor the matrix A (after equilibration if FACT = 'E') as +* A = U**T * U, if UPLO = 'U', or +* A = L * L**T, if UPLO = 'L', +* where U is an upper triangular band matrix, and L is a lower +* triangular band matrix. +* +* 3. If the leading i-by-i principal minor is not positive definite, +* then the routine returns with INFO = i. Otherwise, the factored +* form of A is used to estimate the condition number of the matrix +* A. If the reciprocal of the condition number is less than machine +* precision, INFO = N+1 is returned as a warning, but the routine +* still goes on to solve for X and compute error bounds as +* described below. +* +* 4. The system of equations is solved for X using the factored form +* of A. +* +* 5. Iterative refinement is applied to improve the computed solution +* matrix and calculate error bounds and backward error estimates +* for it. +* +* 6. If equilibration was used, the matrix X is premultiplied by +* diag(S) so that it solves the original system before +* equilibration. +* +* Arguments +* ========= +* +* FACT (input) CHARACTER*1 +* Specifies whether or not the factored form of the matrix A is +* supplied on entry, and if not, whether the matrix A should be +* equilibrated before it is factored. +* = 'F': On entry, AFB contains the factored form of A. +* If EQUED = 'Y', the matrix A has been equilibrated +* with scaling factors given by S. AB and AFB will not +* be modified. +* = 'N': The matrix A will be copied to AFB and factored. +* = 'E': The matrix A will be equilibrated if necessary, then +* copied to AFB and factored. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* NRHS (input) INTEGER +* The number of right-hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) +* On entry, the upper or lower triangle of the symmetric band +* matrix A, stored in the first KD+1 rows of the array, except +* if FACT = 'F' and EQUED = 'Y', then A must contain the +* equilibrated matrix diag(S)*A*diag(S). The j-th column of A +* is stored in the j-th column of the array AB as follows: +* if UPLO = 'U', AB(KD+1+i-j,j) = A(i,j) for max(1,j-KD)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(N,j+KD). +* See below for further details. +* +* On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by +* diag(S)*A*diag(S). +* +* LDAB (input) INTEGER +* The leading dimension of the array A. LDAB >= KD+1. +* +* AFB (input or output) DOUBLE PRECISION array, dimension (LDAFB,N) +* If FACT = 'F', then AFB is an input argument and on entry +* contains the triangular factor U or L from the Cholesky +* factorization A = U**T*U or A = L*L**T of the band matrix +* A, in the same storage format as A (see AB). If EQUED = 'Y', +* then AFB is the factored form of the equilibrated matrix A. +* +* If FACT = 'N', then AFB is an output argument and on exit +* returns the triangular factor U or L from the Cholesky +* factorization A = U**T*U or A = L*L**T. +* +* If FACT = 'E', then AFB is an output argument and on exit +* returns the triangular factor U or L from the Cholesky +* factorization A = U**T*U or A = L*L**T of the equilibrated +* matrix A (see the description of A for the form of the +* equilibrated matrix). +* +* LDAFB (input) INTEGER +* The leading dimension of the array AFB. LDAFB >= KD+1. +* +* EQUED (input or output) CHARACTER*1 +* Specifies the form of equilibration that was done. +* = 'N': No equilibration (always true if FACT = 'N'). +* = 'Y': Equilibration was done, i.e., A has been replaced by +* diag(S) * A * diag(S). +* EQUED is an input argument if FACT = 'F'; otherwise, it is an +* output argument. +* +* S (input or output) DOUBLE PRECISION array, dimension (N) +* The scale factors for A; not accessed if EQUED = 'N'. S is +* an input argument if FACT = 'F'; otherwise, S is an output +* argument. If FACT = 'F' and EQUED = 'Y', each element of S +* must be positive. +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y', +* B is overwritten by diag(S) * B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (output) DOUBLE PRECISION array, dimension (LDX,NRHS) +* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to +* the original system of equations. Note that if EQUED = 'Y', +* A and B are modified on exit, and the solution to the +* equilibrated system is inv(diag(S))*X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* RCOND (output) DOUBLE PRECISION +* The estimate of the reciprocal condition number of the matrix +* A after equilibration (if done). If RCOND is less than the +* machine precision (in particular, if RCOND = 0), the matrix +* is singular to working precision. This condition is +* indicated by a return code of INFO > 0. +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is +* <= N: the leading minor of order i of A is +* not positive definite, so the factorization +* could not be completed, and the solution has not +* been computed. RCOND = 0 is returned. +* = N+1: U is nonsingular, but RCOND is less than machine +* precision, meaning that the matrix is singular +* to working precision. Nevertheless, the +* solution and error bounds are computed because +* there are a number of situations where the +* computed solution can be more accurate than the +* value of RCOND would suggest. +* +* Further Details +* =============== +* +* The band storage scheme is illustrated by the following example, when +* N = 6, KD = 2, and UPLO = 'U': +* +* Two-dimensional storage of the symmetric matrix A: +* +* a11 a12 a13 +* a22 a23 a24 +* a33 a34 a35 +* a44 a45 a46 +* a55 a56 +* (aij=conjg(aji)) a66 +* +* Band storage of the upper triangle of A: +* +* * * a13 a24 a35 a46 +* * a12 a23 a34 a45 a56 +* a11 a22 a33 a44 a55 a66 +* +* Similarly, if UPLO = 'L' the format of A is as follows: +* +* a11 a22 a33 a44 a55 a66 +* a21 a32 a43 a54 a65 * +* a31 a42 a53 a64 * * +* +* Array elements marked * are not used by the routine. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL EQUIL, NOFACT, RCEQU, UPPER + INTEGER I, INFEQU, J, J1, J2 + DOUBLE PRECISION AMAX, ANORM, BIGNUM, SCOND, SMAX, SMIN, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANSB + EXTERNAL LSAME, DLAMCH, DLANSB +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLACPY, DLAQSB, DPBCON, DPBEQU, DPBRFS, + $ DPBTRF, DPBTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + UPPER = LSAME( UPLO, 'U' ) + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + RCEQU = .FALSE. + ELSE + RCEQU = LSAME( EQUED, 'Y' ) + SMLNUM = DLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + END IF +* +* Test the input parameters. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) ) + $ THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -7 + ELSE IF( LDAFB.LT.KD+1 ) THEN + INFO = -9 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -10 + ELSE + IF( RCEQU ) THEN + SMIN = BIGNUM + SMAX = ZERO + DO 10 J = 1, N + SMIN = MIN( SMIN, S( J ) ) + SMAX = MAX( SMAX, S( J ) ) + 10 CONTINUE + IF( SMIN.LE.ZERO ) THEN + INFO = -11 + ELSE IF( N.GT.0 ) THEN + SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) + ELSE + SCOND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -15 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPBSVX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL DPBEQU( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL DLAQSB( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, EQUED ) + RCEQU = LSAME( EQUED, 'Y' ) + END IF + END IF +* +* Scale the right-hand side. +* + IF( RCEQU ) THEN + DO 30 J = 1, NRHS + DO 20 I = 1, N + B( I, J ) = S( I )*B( I, J ) + 20 CONTINUE + 30 CONTINUE + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the Cholesky factorization A = U'*U or A = L*L'. +* + IF( UPPER ) THEN + DO 40 J = 1, N + J1 = MAX( J-KD, 1 ) + CALL DCOPY( J-J1+1, AB( KD+1-J+J1, J ), 1, + $ AFB( KD+1-J+J1, J ), 1 ) + 40 CONTINUE + ELSE + DO 50 J = 1, N + J2 = MIN( J+KD, N ) + CALL DCOPY( J2-J+1, AB( 1, J ), 1, AFB( 1, J ), 1 ) + 50 CONTINUE + END IF +* + CALL DPBTRF( UPLO, N, KD, AFB, LDAFB, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN + IF( INFO.GT.0 ) + $ RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = DLANSB( '1', UPLO, N, KD, AB, LDAB, WORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL DPBCON( UPLO, N, KD, AFB, LDAFB, ANORM, RCOND, WORK, IWORK, + $ INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.DLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* +* Compute the solution matrix X. +* + CALL DLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL DPBTRS( UPLO, N, KD, NRHS, AFB, LDAFB, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL DPBRFS( UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB, B, LDB, X, + $ LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* Transform the solution matrix X to a solution of the original +* system. +* + IF( RCEQU ) THEN + DO 70 J = 1, NRHS + DO 60 I = 1, N + X( I, J ) = S( I )*X( I, J ) + 60 CONTINUE + 70 CONTINUE + DO 80 J = 1, NRHS + FERR( J ) = FERR( J ) / SCOND + 80 CONTINUE + END IF +* + RETURN +* +* End of DPBSVX +* + END diff --git a/costa/native/external/lapack/dpbtf2.f b/costa/native/external/lapack/dpbtf2.f new file mode 100644 index 000000000..6f211f31e --- /dev/null +++ b/costa/native/external/lapack/dpbtf2.f @@ -0,0 +1,195 @@ + SUBROUTINE DPBTF2( UPLO, N, KD, AB, LDAB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* DPBTF2 computes the Cholesky factorization of a real symmetric +* positive definite band matrix A. +* +* The factorization has the form +* A = U' * U , if UPLO = 'U', or +* A = L * L', if UPLO = 'L', +* where U is an upper triangular matrix, U' is the transpose of U, and +* L is lower triangular. +* +* This is the unblocked version of the algorithm, calling Level 2 BLAS. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is stored: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of super-diagonals of the matrix A if UPLO = 'U', +* or the number of sub-diagonals if UPLO = 'L'. KD >= 0. +* +* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) +* On entry, the upper or lower triangle of the symmetric band +* matrix A, stored in the first KD+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* On exit, if INFO = 0, the triangular factor U or L from the +* Cholesky factorization A = U'*U or A = L*L' of the band +* matrix A, in the same storage format as A. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* > 0: if INFO = k, the leading minor of order k is not +* positive definite, and the factorization could not be +* completed. +* +* Further Details +* =============== +* +* The band storage scheme is illustrated by the following example, when +* N = 6, KD = 2, and UPLO = 'U': +* +* On entry: On exit: +* +* * * a13 a24 a35 a46 * * u13 u24 u35 u46 +* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +* +* Similarly, if UPLO = 'L' the format of A is as follows: +* +* On entry: On exit: +* +* a11 a22 a33 a44 a55 a66 l11 l22 l33 l44 l55 l66 +* a21 a32 a43 a54 a65 * l21 l32 l43 l54 l65 * +* a31 a42 a53 a64 * * l31 l42 l53 l64 * * +* +* Array elements marked * are not used by the routine. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, KLD, KN + DOUBLE PRECISION AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSYR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPBTF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + KLD = MAX( 1, LDAB-1 ) +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U'*U. +* + DO 10 J = 1, N +* +* Compute U(J,J) and test for non-positive-definiteness. +* + AJJ = AB( KD+1, J ) + IF( AJJ.LE.ZERO ) + $ GO TO 30 + AJJ = SQRT( AJJ ) + AB( KD+1, J ) = AJJ +* +* Compute elements J+1:J+KN of row J and update the +* trailing submatrix within the band. +* + KN = MIN( KD, N-J ) + IF( KN.GT.0 ) THEN + CALL DSCAL( KN, ONE / AJJ, AB( KD, J+1 ), KLD ) + CALL DSYR( 'Upper', KN, -ONE, AB( KD, J+1 ), KLD, + $ AB( KD+1, J+1 ), KLD ) + END IF + 10 CONTINUE + ELSE +* +* Compute the Cholesky factorization A = L*L'. +* + DO 20 J = 1, N +* +* Compute L(J,J) and test for non-positive-definiteness. +* + AJJ = AB( 1, J ) + IF( AJJ.LE.ZERO ) + $ GO TO 30 + AJJ = SQRT( AJJ ) + AB( 1, J ) = AJJ +* +* Compute elements J+1:J+KN of column J and update the +* trailing submatrix within the band. +* + KN = MIN( KD, N-J ) + IF( KN.GT.0 ) THEN + CALL DSCAL( KN, ONE / AJJ, AB( 2, J ), 1 ) + CALL DSYR( 'Lower', KN, -ONE, AB( 2, J ), 1, + $ AB( 1, J+1 ), KLD ) + END IF + 20 CONTINUE + END IF + RETURN +* + 30 CONTINUE + INFO = J + RETURN +* +* End of DPBTF2 +* + END diff --git a/costa/native/external/lapack/dpbtrf.f b/costa/native/external/lapack/dpbtrf.f new file mode 100644 index 000000000..6974eb798 --- /dev/null +++ b/costa/native/external/lapack/dpbtrf.f @@ -0,0 +1,365 @@ + SUBROUTINE DPBTRF( UPLO, N, KD, AB, LDAB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* DPBTRF computes the Cholesky factorization of a real symmetric +* positive definite band matrix A. +* +* The factorization has the form +* A = U**T * U, if UPLO = 'U', or +* A = L * L**T, if UPLO = 'L', +* where U is an upper triangular matrix and L is lower triangular. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) +* On entry, the upper or lower triangle of the symmetric band +* matrix A, stored in the first KD+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* On exit, if INFO = 0, the triangular factor U or L from the +* Cholesky factorization A = U**T*U or A = L*L**T of the band +* matrix A, in the same storage format as A. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the leading minor of order i is not +* positive definite, and the factorization could not be +* completed. +* +* Further Details +* =============== +* +* The band storage scheme is illustrated by the following example, when +* N = 6, KD = 2, and UPLO = 'U': +* +* On entry: On exit: +* +* * * a13 a24 a35 a46 * * u13 u24 u35 u46 +* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +* +* Similarly, if UPLO = 'L' the format of A is as follows: +* +* On entry: On exit: +* +* a11 a22 a33 a44 a55 a66 l11 l22 l33 l44 l55 l66 +* a21 a32 a43 a54 a65 * l21 l32 l43 l54 l65 * +* a31 a42 a53 a64 * * l31 l42 l53 l64 * * +* +* Array elements marked * are not used by the routine. +* +* Contributed by +* Peter Mayes and Giuseppe Radicati, IBM ECSEC, Rome, March 23, 1989 +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) + INTEGER NBMAX, LDWORK + PARAMETER ( NBMAX = 32, LDWORK = NBMAX+1 ) +* .. +* .. Local Scalars .. + INTEGER I, I2, I3, IB, II, J, JJ, NB +* .. +* .. Local Arrays .. + DOUBLE PRECISION WORK( LDWORK, NBMAX ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DPBTF2, DPOTF2, DSYRK, DTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( ( .NOT.LSAME( UPLO, 'U' ) ) .AND. + $ ( .NOT.LSAME( UPLO, 'L' ) ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPBTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment +* + NB = ILAENV( 1, 'DPBTRF', UPLO, N, KD, -1, -1 ) +* +* The block size must not exceed the semi-bandwidth KD, and must not +* exceed the limit set by the size of the local array WORK. +* + NB = MIN( NB, NBMAX ) +* + IF( NB.LE.1 .OR. NB.GT.KD ) THEN +* +* Use unblocked code +* + CALL DPBTF2( UPLO, N, KD, AB, LDAB, INFO ) + ELSE +* +* Use blocked code +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Compute the Cholesky factorization of a symmetric band +* matrix, given the upper triangle of the matrix in band +* storage. +* +* Zero the upper triangle of the work array. +* + DO 20 J = 1, NB + DO 10 I = 1, J - 1 + WORK( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* Process the band matrix one diagonal block at a time. +* + DO 70 I = 1, N, NB + IB = MIN( NB, N-I+1 ) +* +* Factorize the diagonal block +* + CALL DPOTF2( UPLO, IB, AB( KD+1, I ), LDAB-1, II ) + IF( II.NE.0 ) THEN + INFO = I + II - 1 + GO TO 150 + END IF + IF( I+IB.LE.N ) THEN +* +* Update the relevant part of the trailing submatrix. +* If A11 denotes the diagonal block which has just been +* factorized, then we need to update the remaining +* blocks in the diagram: +* +* A11 A12 A13 +* A22 A23 +* A33 +* +* The numbers of rows and columns in the partitioning +* are IB, I2, I3 respectively. The blocks A12, A22 and +* A23 are empty if IB = KD. The upper triangle of A13 +* lies outside the band. +* + I2 = MIN( KD-IB, N-I-IB+1 ) + I3 = MIN( IB, N-I-KD+1 ) +* + IF( I2.GT.0 ) THEN +* +* Update A12 +* + CALL DTRSM( 'Left', 'Upper', 'Transpose', + $ 'Non-unit', IB, I2, ONE, AB( KD+1, I ), + $ LDAB-1, AB( KD+1-IB, I+IB ), LDAB-1 ) +* +* Update A22 +* + CALL DSYRK( 'Upper', 'Transpose', I2, IB, -ONE, + $ AB( KD+1-IB, I+IB ), LDAB-1, ONE, + $ AB( KD+1, I+IB ), LDAB-1 ) + END IF +* + IF( I3.GT.0 ) THEN +* +* Copy the lower triangle of A13 into the work array. +* + DO 40 JJ = 1, I3 + DO 30 II = JJ, IB + WORK( II, JJ ) = AB( II-JJ+1, JJ+I+KD-1 ) + 30 CONTINUE + 40 CONTINUE +* +* Update A13 (in the work array). +* + CALL DTRSM( 'Left', 'Upper', 'Transpose', + $ 'Non-unit', IB, I3, ONE, AB( KD+1, I ), + $ LDAB-1, WORK, LDWORK ) +* +* Update A23 +* + IF( I2.GT.0 ) + $ CALL DGEMM( 'Transpose', 'No Transpose', I2, I3, + $ IB, -ONE, AB( KD+1-IB, I+IB ), + $ LDAB-1, WORK, LDWORK, ONE, + $ AB( 1+IB, I+KD ), LDAB-1 ) +* +* Update A33 +* + CALL DSYRK( 'Upper', 'Transpose', I3, IB, -ONE, + $ WORK, LDWORK, ONE, AB( KD+1, I+KD ), + $ LDAB-1 ) +* +* Copy the lower triangle of A13 back into place. +* + DO 60 JJ = 1, I3 + DO 50 II = JJ, IB + AB( II-JJ+1, JJ+I+KD-1 ) = WORK( II, JJ ) + 50 CONTINUE + 60 CONTINUE + END IF + END IF + 70 CONTINUE + ELSE +* +* Compute the Cholesky factorization of a symmetric band +* matrix, given the lower triangle of the matrix in band +* storage. +* +* Zero the lower triangle of the work array. +* + DO 90 J = 1, NB + DO 80 I = J + 1, NB + WORK( I, J ) = ZERO + 80 CONTINUE + 90 CONTINUE +* +* Process the band matrix one diagonal block at a time. +* + DO 140 I = 1, N, NB + IB = MIN( NB, N-I+1 ) +* +* Factorize the diagonal block +* + CALL DPOTF2( UPLO, IB, AB( 1, I ), LDAB-1, II ) + IF( II.NE.0 ) THEN + INFO = I + II - 1 + GO TO 150 + END IF + IF( I+IB.LE.N ) THEN +* +* Update the relevant part of the trailing submatrix. +* If A11 denotes the diagonal block which has just been +* factorized, then we need to update the remaining +* blocks in the diagram: +* +* A11 +* A21 A22 +* A31 A32 A33 +* +* The numbers of rows and columns in the partitioning +* are IB, I2, I3 respectively. The blocks A21, A22 and +* A32 are empty if IB = KD. The lower triangle of A31 +* lies outside the band. +* + I2 = MIN( KD-IB, N-I-IB+1 ) + I3 = MIN( IB, N-I-KD+1 ) +* + IF( I2.GT.0 ) THEN +* +* Update A21 +* + CALL DTRSM( 'Right', 'Lower', 'Transpose', + $ 'Non-unit', I2, IB, ONE, AB( 1, I ), + $ LDAB-1, AB( 1+IB, I ), LDAB-1 ) +* +* Update A22 +* + CALL DSYRK( 'Lower', 'No Transpose', I2, IB, -ONE, + $ AB( 1+IB, I ), LDAB-1, ONE, + $ AB( 1, I+IB ), LDAB-1 ) + END IF +* + IF( I3.GT.0 ) THEN +* +* Copy the upper triangle of A31 into the work array. +* + DO 110 JJ = 1, IB + DO 100 II = 1, MIN( JJ, I3 ) + WORK( II, JJ ) = AB( KD+1-JJ+II, JJ+I-1 ) + 100 CONTINUE + 110 CONTINUE +* +* Update A31 (in the work array). +* + CALL DTRSM( 'Right', 'Lower', 'Transpose', + $ 'Non-unit', I3, IB, ONE, AB( 1, I ), + $ LDAB-1, WORK, LDWORK ) +* +* Update A32 +* + IF( I2.GT.0 ) + $ CALL DGEMM( 'No transpose', 'Transpose', I3, I2, + $ IB, -ONE, WORK, LDWORK, + $ AB( 1+IB, I ), LDAB-1, ONE, + $ AB( 1+KD-IB, I+IB ), LDAB-1 ) +* +* Update A33 +* + CALL DSYRK( 'Lower', 'No Transpose', I3, IB, -ONE, + $ WORK, LDWORK, ONE, AB( 1, I+KD ), + $ LDAB-1 ) +* +* Copy the upper triangle of A31 back into place. +* + DO 130 JJ = 1, IB + DO 120 II = 1, MIN( JJ, I3 ) + AB( KD+1-JJ+II, JJ+I-1 ) = WORK( II, JJ ) + 120 CONTINUE + 130 CONTINUE + END IF + END IF + 140 CONTINUE + END IF + END IF + RETURN +* + 150 CONTINUE + RETURN +* +* End of DPBTRF +* + END diff --git a/costa/native/external/lapack/dpbtrs.f b/costa/native/external/lapack/dpbtrs.f new file mode 100644 index 000000000..d8bedd242 --- /dev/null +++ b/costa/native/external/lapack/dpbtrs.f @@ -0,0 +1,146 @@ + SUBROUTINE DPBTRS( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* DPBTRS solves a system of linear equations A*X = B with a symmetric +* positive definite band matrix A using the Cholesky factorization +* A = U**T*U or A = L*L**T computed by DPBTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangular factor stored in AB; +* = 'L': Lower triangular factor stored in AB. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) +* The triangular factor U or L from the Cholesky factorization +* A = U**T*U or A = L*L**T of the band matrix A, stored in the +* first KD+1 rows of the array. The j-th column of U or L is +* stored in the j-th column of the array AB as follows: +* if UPLO ='U', AB(kd+1+i-j,j) = U(i,j) for max(1,j-kd)<=i<=j; +* if UPLO ='L', AB(1+i-j,j) = L(i,j) for j<=i<=min(n,j+kd). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DTBSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPBTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B where A = U'*U. +* + DO 10 J = 1, NRHS +* +* Solve U'*X = B, overwriting B with X. +* + CALL DTBSV( 'Upper', 'Transpose', 'Non-unit', N, KD, AB, + $ LDAB, B( 1, J ), 1 ) +* +* Solve U*X = B, overwriting B with X. +* + CALL DTBSV( 'Upper', 'No transpose', 'Non-unit', N, KD, AB, + $ LDAB, B( 1, J ), 1 ) + 10 CONTINUE + ELSE +* +* Solve A*X = B where A = L*L'. +* + DO 20 J = 1, NRHS +* +* Solve L*X = B, overwriting B with X. +* + CALL DTBSV( 'Lower', 'No transpose', 'Non-unit', N, KD, AB, + $ LDAB, B( 1, J ), 1 ) +* +* Solve L'*X = B, overwriting B with X. +* + CALL DTBSV( 'Lower', 'Transpose', 'Non-unit', N, KD, AB, + $ LDAB, B( 1, J ), 1 ) + 20 CONTINUE + END IF +* + RETURN +* +* End of DPBTRS +* + END diff --git a/costa/native/external/lapack/dpocon.f b/costa/native/external/lapack/dpocon.f new file mode 100644 index 000000000..cd36e9234 --- /dev/null +++ b/costa/native/external/lapack/dpocon.f @@ -0,0 +1,173 @@ + SUBROUTINE DPOCON( UPLO, N, A, LDA, ANORM, RCOND, WORK, IWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DPOCON estimates the reciprocal of the condition number (in the +* 1-norm) of a real symmetric positive definite matrix using the +* Cholesky factorization A = U**T*U or A = L*L**T computed by DPOTRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input) DOUBLE PRECISION array, dimension (LDA,N) +* The triangular factor U or L from the Cholesky factorization +* A = U**T*U or A = L*L**T, as computed by DPOTRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* ANORM (input) DOUBLE PRECISION +* The 1-norm (or infinity-norm) of the symmetric matrix A. +* +* RCOND (output) DOUBLE PRECISION +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +* estimate of the 1-norm of inv(A) computed in this routine. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + CHARACTER NORMIN + INTEGER IX, KASE + DOUBLE PRECISION AINVNM, SCALE, SCALEL, SCALEU, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, IDAMAX, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DLACON, DLATRS, DRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPOCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = DLAMCH( 'Safe minimum' ) +* +* Estimate the 1-norm of inv(A). +* + KASE = 0 + NORMIN = 'N' + 10 CONTINUE + CALL DLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN + IF( UPPER ) THEN +* +* Multiply by inv(U'). +* + CALL DLATRS( 'Upper', 'Transpose', 'Non-unit', NORMIN, N, A, + $ LDA, WORK, SCALEL, WORK( 2*N+1 ), INFO ) + NORMIN = 'Y' +* +* Multiply by inv(U). +* + CALL DLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ A, LDA, WORK, SCALEU, WORK( 2*N+1 ), INFO ) + ELSE +* +* Multiply by inv(L). +* + CALL DLATRS( 'Lower', 'No transpose', 'Non-unit', NORMIN, N, + $ A, LDA, WORK, SCALEL, WORK( 2*N+1 ), INFO ) + NORMIN = 'Y' +* +* Multiply by inv(L'). +* + CALL DLATRS( 'Lower', 'Transpose', 'Non-unit', NORMIN, N, A, + $ LDA, WORK, SCALEU, WORK( 2*N+1 ), INFO ) + END IF +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + SCALE = SCALEL*SCALEU + IF( SCALE.NE.ONE ) THEN + IX = IDAMAX( N, WORK, 1 ) + IF( SCALE.LT.ABS( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL DRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE + RETURN +* +* End of DPOCON +* + END diff --git a/costa/native/external/lapack/dpoequ.f b/costa/native/external/lapack/dpoequ.f new file mode 100644 index 000000000..c59d7e3bc --- /dev/null +++ b/costa/native/external/lapack/dpoequ.f @@ -0,0 +1,137 @@ + SUBROUTINE DPOEQU( N, A, LDA, S, SCOND, AMAX, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, N + DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), S( * ) +* .. +* +* Purpose +* ======= +* +* DPOEQU computes row and column scalings intended to equilibrate a +* symmetric positive definite matrix A and reduce its condition number +* (with respect to the two-norm). S contains the scale factors, +* S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with +* elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This +* choice of S puts the condition number of B within a factor N of the +* smallest possible condition number over all possible diagonal +* scalings. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input) DOUBLE PRECISION array, dimension (LDA,N) +* The N-by-N symmetric positive definite matrix whose scaling +* factors are to be computed. Only the diagonal elements of A +* are referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* S (output) DOUBLE PRECISION array, dimension (N) +* If INFO = 0, S contains the scale factors for A. +* +* SCOND (output) DOUBLE PRECISION +* If INFO = 0, S contains the ratio of the smallest S(i) to +* the largest S(i). If SCOND >= 0.1 and AMAX is neither too +* large nor too small, it is not worth scaling by S. +* +* AMAX (output) DOUBLE PRECISION +* Absolute value of largest matrix element. If AMAX is very +* close to overflow or very close to underflow, the matrix +* should be scaled. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the i-th diagonal element is nonpositive. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I + DOUBLE PRECISION SMIN +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPOEQU', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SCOND = ONE + AMAX = ZERO + RETURN + END IF +* +* Find the minimum and maximum diagonal elements. +* + S( 1 ) = A( 1, 1 ) + SMIN = S( 1 ) + AMAX = S( 1 ) + DO 10 I = 2, N + S( I ) = A( I, I ) + SMIN = MIN( SMIN, S( I ) ) + AMAX = MAX( AMAX, S( I ) ) + 10 CONTINUE +* + IF( SMIN.LE.ZERO ) THEN +* +* Find the first non-positive diagonal element and return. +* + DO 20 I = 1, N + IF( S( I ).LE.ZERO ) THEN + INFO = I + RETURN + END IF + 20 CONTINUE + ELSE +* +* Set the scale factors to the reciprocals +* of the diagonal elements. +* + DO 30 I = 1, N + S( I ) = ONE / SQRT( S( I ) ) + 30 CONTINUE +* +* Compute SCOND = min(S(I)) / max(S(I)) +* + SCOND = SQRT( SMIN ) / SQRT( AMAX ) + END IF + RETURN +* +* End of DPOEQU +* + END diff --git a/costa/native/external/lapack/dporfs.f b/costa/native/external/lapack/dporfs.f new file mode 100644 index 000000000..d38b57df1 --- /dev/null +++ b/costa/native/external/lapack/dporfs.f @@ -0,0 +1,327 @@ + SUBROUTINE DPORFS( UPLO, N, NRHS, A, LDA, AF, LDAF, B, LDB, X, + $ LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* DPORFS improves the computed solution to a system of linear +* equations when the coefficient matrix is symmetric positive definite, +* and provides error bounds and backward error estimates for the +* solution. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input) DOUBLE PRECISION array, dimension (LDA,N) +* The symmetric matrix A. If UPLO = 'U', the leading N-by-N +* upper triangular part of A contains the upper triangular part +* of the matrix A, and the strictly lower triangular part of A +* is not referenced. If UPLO = 'L', the leading N-by-N lower +* triangular part of A contains the lower triangular part of +* the matrix A, and the strictly upper triangular part of A is +* not referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) +* The triangular factor U or L from the Cholesky factorization +* A = U**T*U or A = L*L**T, as computed by DPOTRF. +* +* LDAF (input) INTEGER +* The leading dimension of the array AF. LDAF >= max(1,N). +* +* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) +* On entry, the solution matrix X, as computed by DPOTRS. +* On exit, the improved solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Internal Parameters +* =================== +* +* ITMAX is the maximum number of steps of iterative refinement. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) + DOUBLE PRECISION THREE + PARAMETER ( THREE = 3.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, J, K, KASE, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DLACON, DPOTRS, DSYMV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPORFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL DCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 ) + CALL DSYMV( UPLO, N, -ONE, A, LDA, X( 1, J ), 1, ONE, + $ WORK( N+1 ), 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + WORK( I ) = ABS( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = ABS( X( K, J ) ) + DO 40 I = 1, K - 1 + WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK + S = S + ABS( A( I, K ) )*ABS( X( I, J ) ) + 40 CONTINUE + WORK( K ) = WORK( K ) + ABS( A( K, K ) )*XK + S + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = ABS( X( K, J ) ) + WORK( K ) = WORK( K ) + ABS( A( K, K ) )*XK + DO 60 I = K + 1, N + WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK + S = S + ABS( A( I, K ) )*ABS( X( I, J ) ) + 60 CONTINUE + WORK( K ) = WORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL DPOTRS( UPLO, N, 1, AF, LDAF, WORK( N+1 ), N, INFO ) + CALL DAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use DLACON to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL DLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A'). +* + CALL DPOTRS( UPLO, N, 1, AF, LDAF, WORK( N+1 ), N, INFO ) + DO 110 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 120 CONTINUE + CALL DPOTRS( UPLO, N, 1, AF, LDAF, WORK( N+1 ), N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of DPORFS +* + END diff --git a/costa/native/external/lapack/dposv.f b/costa/native/external/lapack/dposv.f new file mode 100644 index 000000000..c3b59c8ca --- /dev/null +++ b/costa/native/external/lapack/dposv.f @@ -0,0 +1,122 @@ + SUBROUTINE DPOSV( UPLO, N, NRHS, A, LDA, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* DPOSV computes the solution to a real system of linear equations +* A * X = B, +* where A is an N-by-N symmetric positive definite matrix and X and B +* are N-by-NRHS matrices. +* +* The Cholesky decomposition is used to factor A as +* A = U**T* U, if UPLO = 'U', or +* A = L * L**T, if UPLO = 'L', +* where U is an upper triangular matrix and L is a lower triangular +* matrix. The factored form of A is then used to solve the system of +* equations A * X = B. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the symmetric matrix A. If UPLO = 'U', the leading +* N-by-N upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, if INFO = 0, the factor U or L from the Cholesky +* factorization A = U**T*U or A = L*L**T. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the leading minor of order i of A is not +* positive definite, so the factorization could not be +* completed, and the solution has not been computed. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DPOTRF, DPOTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPOSV ', -INFO ) + RETURN + END IF +* +* Compute the Cholesky factorization A = U'*U or A = L*L'. +* + CALL DPOTRF( UPLO, N, A, LDA, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL DPOTRS( UPLO, N, NRHS, A, LDA, B, LDB, INFO ) +* + END IF + RETURN +* +* End of DPOSV +* + END diff --git a/costa/native/external/lapack/dposvx.f b/costa/native/external/lapack/dposvx.f new file mode 100644 index 000000000..3e3be142a --- /dev/null +++ b/costa/native/external/lapack/dposvx.f @@ -0,0 +1,379 @@ + SUBROUTINE DPOSVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, EQUED, + $ S, B, LDB, X, LDX, RCOND, FERR, BERR, WORK, + $ IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ BERR( * ), FERR( * ), S( * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* DPOSVX uses the Cholesky factorization A = U**T*U or A = L*L**T to +* compute the solution to a real system of linear equations +* A * X = B, +* where A is an N-by-N symmetric positive definite matrix and X and B +* are N-by-NRHS matrices. +* +* Error bounds on the solution and a condition estimate are also +* provided. +* +* Description +* =========== +* +* The following steps are performed: +* +* 1. If FACT = 'E', real scaling factors are computed to equilibrate +* the system: +* diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B +* Whether or not the system will be equilibrated depends on the +* scaling of the matrix A, but if equilibration is used, A is +* overwritten by diag(S)*A*diag(S) and B by diag(S)*B. +* +* 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to +* factor the matrix A (after equilibration if FACT = 'E') as +* A = U**T* U, if UPLO = 'U', or +* A = L * L**T, if UPLO = 'L', +* where U is an upper triangular matrix and L is a lower triangular +* matrix. +* +* 3. If the leading i-by-i principal minor is not positive definite, +* then the routine returns with INFO = i. Otherwise, the factored +* form of A is used to estimate the condition number of the matrix +* A. If the reciprocal of the condition number is less than machine +* precision, INFO = N+1 is returned as a warning, but the routine +* still goes on to solve for X and compute error bounds as +* described below. +* +* 4. The system of equations is solved for X using the factored form +* of A. +* +* 5. Iterative refinement is applied to improve the computed solution +* matrix and calculate error bounds and backward error estimates +* for it. +* +* 6. If equilibration was used, the matrix X is premultiplied by +* diag(S) so that it solves the original system before +* equilibration. +* +* Arguments +* ========= +* +* FACT (input) CHARACTER*1 +* Specifies whether or not the factored form of the matrix A is +* supplied on entry, and if not, whether the matrix A should be +* equilibrated before it is factored. +* = 'F': On entry, AF contains the factored form of A. +* If EQUED = 'Y', the matrix A has been equilibrated +* with scaling factors given by S. A and AF will not +* be modified. +* = 'N': The matrix A will be copied to AF and factored. +* = 'E': The matrix A will be equilibrated if necessary, then +* copied to AF and factored. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the symmetric matrix A, except if FACT = 'F' and +* EQUED = 'Y', then A must contain the equilibrated matrix +* diag(S)*A*diag(S). If UPLO = 'U', the leading +* N-by-N upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. A is not modified if +* FACT = 'F' or 'N', or if FACT = 'E' and EQUED = 'N' on exit. +* +* On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by +* diag(S)*A*diag(S). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* AF (input or output) DOUBLE PRECISION array, dimension (LDAF,N) +* If FACT = 'F', then AF is an input argument and on entry +* contains the triangular factor U or L from the Cholesky +* factorization A = U**T*U or A = L*L**T, in the same storage +* format as A. If EQUED .ne. 'N', then AF is the factored form +* of the equilibrated matrix diag(S)*A*diag(S). +* +* If FACT = 'N', then AF is an output argument and on exit +* returns the triangular factor U or L from the Cholesky +* factorization A = U**T*U or A = L*L**T of the original +* matrix A. +* +* If FACT = 'E', then AF is an output argument and on exit +* returns the triangular factor U or L from the Cholesky +* factorization A = U**T*U or A = L*L**T of the equilibrated +* matrix A (see the description of A for the form of the +* equilibrated matrix). +* +* LDAF (input) INTEGER +* The leading dimension of the array AF. LDAF >= max(1,N). +* +* EQUED (input or output) CHARACTER*1 +* Specifies the form of equilibration that was done. +* = 'N': No equilibration (always true if FACT = 'N'). +* = 'Y': Equilibration was done, i.e., A has been replaced by +* diag(S) * A * diag(S). +* EQUED is an input argument if FACT = 'F'; otherwise, it is an +* output argument. +* +* S (input or output) DOUBLE PRECISION array, dimension (N) +* The scale factors for A; not accessed if EQUED = 'N'. S is +* an input argument if FACT = 'F'; otherwise, S is an output +* argument. If FACT = 'F' and EQUED = 'Y', each element of S +* must be positive. +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y', +* B is overwritten by diag(S) * B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (output) DOUBLE PRECISION array, dimension (LDX,NRHS) +* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to +* the original system of equations. Note that if EQUED = 'Y', +* A and B are modified on exit, and the solution to the +* equilibrated system is inv(diag(S))*X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* RCOND (output) DOUBLE PRECISION +* The estimate of the reciprocal condition number of the matrix +* A after equilibration (if done). If RCOND is less than the +* machine precision (in particular, if RCOND = 0), the matrix +* is singular to working precision. This condition is +* indicated by a return code of INFO > 0. +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is +* <= N: the leading minor of order i of A is +* not positive definite, so the factorization +* could not be completed, and the solution has not +* been computed. RCOND = 0 is returned. +* = N+1: U is nonsingular, but RCOND is less than machine +* precision, meaning that the matrix is singular +* to working precision. Nevertheless, the +* solution and error bounds are computed because +* there are a number of situations where the +* computed solution can be more accurate than the +* value of RCOND would suggest. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL EQUIL, NOFACT, RCEQU + INTEGER I, INFEQU, J + DOUBLE PRECISION AMAX, ANORM, BIGNUM, SCOND, SMAX, SMIN, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANSY + EXTERNAL LSAME, DLAMCH, DLANSY +* .. +* .. External Subroutines .. + EXTERNAL DLACPY, DLAQSY, DPOCON, DPOEQU, DPORFS, DPOTRF, + $ DPOTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + RCEQU = .FALSE. + ELSE + RCEQU = LSAME( EQUED, 'Y' ) + SMLNUM = DLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + END IF +* +* Test the input parameters. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) ) + $ THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -9 + ELSE + IF( RCEQU ) THEN + SMIN = BIGNUM + SMAX = ZERO + DO 10 J = 1, N + SMIN = MIN( SMIN, S( J ) ) + SMAX = MAX( SMAX, S( J ) ) + 10 CONTINUE + IF( SMIN.LE.ZERO ) THEN + INFO = -10 + ELSE IF( N.GT.0 ) THEN + SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) + ELSE + SCOND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -14 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPOSVX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL DPOEQU( N, A, LDA, S, SCOND, AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL DLAQSY( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED ) + RCEQU = LSAME( EQUED, 'Y' ) + END IF + END IF +* +* Scale the right hand side. +* + IF( RCEQU ) THEN + DO 30 J = 1, NRHS + DO 20 I = 1, N + B( I, J ) = S( I )*B( I, J ) + 20 CONTINUE + 30 CONTINUE + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the Cholesky factorization A = U'*U or A = L*L'. +* + CALL DLACPY( UPLO, N, N, A, LDA, AF, LDAF ) + CALL DPOTRF( UPLO, N, AF, LDAF, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN + IF( INFO.GT.0 ) + $ RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = DLANSY( '1', UPLO, N, A, LDA, WORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL DPOCON( UPLO, N, AF, LDAF, ANORM, RCOND, WORK, IWORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.DLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* +* Compute the solution matrix X. +* + CALL DLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL DPOTRS( UPLO, N, NRHS, AF, LDAF, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL DPORFS( UPLO, N, NRHS, A, LDA, AF, LDAF, B, LDB, X, LDX, + $ FERR, BERR, WORK, IWORK, INFO ) +* +* Transform the solution matrix X to a solution of the original +* system. +* + IF( RCEQU ) THEN + DO 50 J = 1, NRHS + DO 40 I = 1, N + X( I, J ) = S( I )*X( I, J ) + 40 CONTINUE + 50 CONTINUE + DO 60 J = 1, NRHS + FERR( J ) = FERR( J ) / SCOND + 60 CONTINUE + END IF +* + RETURN +* +* End of DPOSVX +* + END diff --git a/costa/native/external/lapack/dpotf2.f b/costa/native/external/lapack/dpotf2.f new file mode 100644 index 000000000..f9e0de06e --- /dev/null +++ b/costa/native/external/lapack/dpotf2.f @@ -0,0 +1,168 @@ + SUBROUTINE DPOTF2( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* DPOTF2 computes the Cholesky factorization of a real symmetric +* positive definite matrix A. +* +* The factorization has the form +* A = U' * U , if UPLO = 'U', or +* A = L * L', if UPLO = 'L', +* where U is an upper triangular matrix and L is lower triangular. +* +* This is the unblocked version of the algorithm, calling Level 2 BLAS. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is stored. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the symmetric matrix A. If UPLO = 'U', the leading +* n by n upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n by n lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, if INFO = 0, the factor U or L from the Cholesky +* factorization A = U'*U or A = L*L'. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* > 0: if INFO = k, the leading minor of order k is not +* positive definite, and the factorization could not be +* completed. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J + DOUBLE PRECISION AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DDOT + EXTERNAL LSAME, DDOT +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, DSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPOTF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U'*U. +* + DO 10 J = 1, N +* +* Compute U(J,J) and test for non-positive-definiteness. +* + AJJ = A( J, J ) - DDOT( J-1, A( 1, J ), 1, A( 1, J ), 1 ) + IF( AJJ.LE.ZERO ) THEN + A( J, J ) = AJJ + GO TO 30 + END IF + AJJ = SQRT( AJJ ) + A( J, J ) = AJJ +* +* Compute elements J+1:N of row J. +* + IF( J.LT.N ) THEN + CALL DGEMV( 'Transpose', J-1, N-J, -ONE, A( 1, J+1 ), + $ LDA, A( 1, J ), 1, ONE, A( J, J+1 ), LDA ) + CALL DSCAL( N-J, ONE / AJJ, A( J, J+1 ), LDA ) + END IF + 10 CONTINUE + ELSE +* +* Compute the Cholesky factorization A = L*L'. +* + DO 20 J = 1, N +* +* Compute L(J,J) and test for non-positive-definiteness. +* + AJJ = A( J, J ) - DDOT( J-1, A( J, 1 ), LDA, A( J, 1 ), + $ LDA ) + IF( AJJ.LE.ZERO ) THEN + A( J, J ) = AJJ + GO TO 30 + END IF + AJJ = SQRT( AJJ ) + A( J, J ) = AJJ +* +* Compute elements J+1:N of column J. +* + IF( J.LT.N ) THEN + CALL DGEMV( 'No transpose', N-J, J-1, -ONE, A( J+1, 1 ), + $ LDA, A( J, 1 ), LDA, ONE, A( J+1, J ), 1 ) + CALL DSCAL( N-J, ONE / AJJ, A( J+1, J ), 1 ) + END IF + 20 CONTINUE + END IF + GO TO 40 +* + 30 CONTINUE + INFO = J +* + 40 CONTINUE + RETURN +* +* End of DPOTF2 +* + END diff --git a/costa/native/external/lapack/dpotrf.f b/costa/native/external/lapack/dpotrf.f new file mode 100644 index 000000000..c4b0cb459 --- /dev/null +++ b/costa/native/external/lapack/dpotrf.f @@ -0,0 +1,184 @@ + SUBROUTINE DPOTRF( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* DPOTRF computes the Cholesky factorization of a real symmetric +* positive definite matrix A. +* +* The factorization has the form +* A = U**T * U, if UPLO = 'U', or +* A = L * L**T, if UPLO = 'L', +* where U is an upper triangular matrix and L is lower triangular. +* +* This is the block version of the algorithm, calling Level 3 BLAS. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the symmetric matrix A. If UPLO = 'U', the leading +* N-by-N upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, if INFO = 0, the factor U or L from the Cholesky +* factorization A = U**T*U or A = L*L**T. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the leading minor of order i is not +* positive definite, and the factorization could not be +* completed. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, JB, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DPOTF2, DSYRK, DTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPOTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'DPOTRF', UPLO, N, -1, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code. +* + CALL DPOTF2( UPLO, N, A, LDA, INFO ) + ELSE +* +* Use blocked code. +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U'*U. +* + DO 10 J = 1, N, NB +* +* Update and factorize the current diagonal block and test +* for non-positive-definiteness. +* + JB = MIN( NB, N-J+1 ) + CALL DSYRK( 'Upper', 'Transpose', JB, J-1, -ONE, + $ A( 1, J ), LDA, ONE, A( J, J ), LDA ) + CALL DPOTF2( 'Upper', JB, A( J, J ), LDA, INFO ) + IF( INFO.NE.0 ) + $ GO TO 30 + IF( J+JB.LE.N ) THEN +* +* Compute the current block row. +* + CALL DGEMM( 'Transpose', 'No transpose', JB, N-J-JB+1, + $ J-1, -ONE, A( 1, J ), LDA, A( 1, J+JB ), + $ LDA, ONE, A( J, J+JB ), LDA ) + CALL DTRSM( 'Left', 'Upper', 'Transpose', 'Non-unit', + $ JB, N-J-JB+1, ONE, A( J, J ), LDA, + $ A( J, J+JB ), LDA ) + END IF + 10 CONTINUE +* + ELSE +* +* Compute the Cholesky factorization A = L*L'. +* + DO 20 J = 1, N, NB +* +* Update and factorize the current diagonal block and test +* for non-positive-definiteness. +* + JB = MIN( NB, N-J+1 ) + CALL DSYRK( 'Lower', 'No transpose', JB, J-1, -ONE, + $ A( J, 1 ), LDA, ONE, A( J, J ), LDA ) + CALL DPOTF2( 'Lower', JB, A( J, J ), LDA, INFO ) + IF( INFO.NE.0 ) + $ GO TO 30 + IF( J+JB.LE.N ) THEN +* +* Compute the current block column. +* + CALL DGEMM( 'No transpose', 'Transpose', N-J-JB+1, JB, + $ J-1, -ONE, A( J+JB, 1 ), LDA, A( J, 1 ), + $ LDA, ONE, A( J+JB, J ), LDA ) + CALL DTRSM( 'Right', 'Lower', 'Transpose', 'Non-unit', + $ N-J-JB+1, JB, ONE, A( J, J ), LDA, + $ A( J+JB, J ), LDA ) + END IF + 20 CONTINUE + END IF + END IF + GO TO 40 +* + 30 CONTINUE + INFO = INFO + J - 1 +* + 40 CONTINUE + RETURN +* +* End of DPOTRF +* + END diff --git a/costa/native/external/lapack/dpotri.f b/costa/native/external/lapack/dpotri.f new file mode 100644 index 000000000..b3f956fc0 --- /dev/null +++ b/costa/native/external/lapack/dpotri.f @@ -0,0 +1,97 @@ + SUBROUTINE DPOTRI( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* DPOTRI computes the inverse of a real symmetric positive definite +* matrix A using the Cholesky factorization A = U**T*U or A = L*L**T +* computed by DPOTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the triangular factor U or L from the Cholesky +* factorization A = U**T*U or A = L*L**T, as computed by +* DPOTRF. +* On exit, the upper or lower triangle of the (symmetric) +* inverse of A, overwriting the input factor U or L. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the (i,i) element of the factor U or L is +* zero, and the inverse could not be computed. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLAUUM, DTRTRI, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPOTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Invert the triangular Cholesky factor U or L. +* + CALL DTRTRI( UPLO, 'Non-unit', N, A, LDA, INFO ) + IF( INFO.GT.0 ) + $ RETURN +* +* Form inv(U)*inv(U)' or inv(L)'*inv(L). +* + CALL DLAUUM( UPLO, N, A, LDA, INFO ) +* + RETURN +* +* End of DPOTRI +* + END diff --git a/costa/native/external/lapack/dpotrs.f b/costa/native/external/lapack/dpotrs.f new file mode 100644 index 000000000..ae3ab2f31 --- /dev/null +++ b/costa/native/external/lapack/dpotrs.f @@ -0,0 +1,133 @@ + SUBROUTINE DPOTRS( UPLO, N, NRHS, A, LDA, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* DPOTRS solves a system of linear equations A*X = B with a symmetric +* positive definite matrix A using the Cholesky factorization +* A = U**T*U or A = L*L**T computed by DPOTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* A (input) DOUBLE PRECISION array, dimension (LDA,N) +* The triangular factor U or L from the Cholesky factorization +* A = U**T*U or A = L*L**T, as computed by DPOTRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPOTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B where A = U'*U. +* +* Solve U'*X = B, overwriting B with X. +* + CALL DTRSM( 'Left', 'Upper', 'Transpose', 'Non-unit', N, NRHS, + $ ONE, A, LDA, B, LDB ) +* +* Solve U*X = B, overwriting B with X. +* + CALL DTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N, + $ NRHS, ONE, A, LDA, B, LDB ) + ELSE +* +* Solve A*X = B where A = L*L'. +* +* Solve L*X = B, overwriting B with X. +* + CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Non-unit', N, + $ NRHS, ONE, A, LDA, B, LDB ) +* +* Solve L'*X = B, overwriting B with X. +* + CALL DTRSM( 'Left', 'Lower', 'Transpose', 'Non-unit', N, NRHS, + $ ONE, A, LDA, B, LDB ) + END IF +* + RETURN +* +* End of DPOTRS +* + END diff --git a/costa/native/external/lapack/dppcon.f b/costa/native/external/lapack/dppcon.f new file mode 100644 index 000000000..0a3ba04d3 --- /dev/null +++ b/costa/native/external/lapack/dppcon.f @@ -0,0 +1,172 @@ + SUBROUTINE DPPCON( UPLO, N, AP, ANORM, RCOND, WORK, IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION AP( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DPPCON estimates the reciprocal of the condition number (in the +* 1-norm) of a real symmetric positive definite packed matrix using +* the Cholesky factorization A = U**T*U or A = L*L**T computed by +* DPPTRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) +* The triangular factor U or L from the Cholesky factorization +* A = U**T*U or A = L*L**T, packed columnwise in a linear +* array. The j-th column of U or L is stored in the array AP +* as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n. +* +* ANORM (input) DOUBLE PRECISION +* The 1-norm (or infinity-norm) of the symmetric matrix A. +* +* RCOND (output) DOUBLE PRECISION +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +* estimate of the 1-norm of inv(A) computed in this routine. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + CHARACTER NORMIN + INTEGER IX, KASE + DOUBLE PRECISION AINVNM, SCALE, SCALEL, SCALEU, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, IDAMAX, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DLACON, DLATPS, DRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPPCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = DLAMCH( 'Safe minimum' ) +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + NORMIN = 'N' + 10 CONTINUE + CALL DLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN + IF( UPPER ) THEN +* +* Multiply by inv(U'). +* + CALL DLATPS( 'Upper', 'Transpose', 'Non-unit', NORMIN, N, + $ AP, WORK, SCALEL, WORK( 2*N+1 ), INFO ) + NORMIN = 'Y' +* +* Multiply by inv(U). +* + CALL DLATPS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ AP, WORK, SCALEU, WORK( 2*N+1 ), INFO ) + ELSE +* +* Multiply by inv(L). +* + CALL DLATPS( 'Lower', 'No transpose', 'Non-unit', NORMIN, N, + $ AP, WORK, SCALEL, WORK( 2*N+1 ), INFO ) + NORMIN = 'Y' +* +* Multiply by inv(L'). +* + CALL DLATPS( 'Lower', 'Transpose', 'Non-unit', NORMIN, N, + $ AP, WORK, SCALEU, WORK( 2*N+1 ), INFO ) + END IF +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + SCALE = SCALEL*SCALEU + IF( SCALE.NE.ONE ) THEN + IX = IDAMAX( N, WORK, 1 ) + IF( SCALE.LT.ABS( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL DRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE + RETURN +* +* End of DPPCON +* + END diff --git a/costa/native/external/lapack/dppequ.f b/costa/native/external/lapack/dppequ.f new file mode 100644 index 000000000..ea6ca6bbe --- /dev/null +++ b/costa/native/external/lapack/dppequ.f @@ -0,0 +1,169 @@ + SUBROUTINE DPPEQU( UPLO, N, AP, S, SCOND, AMAX, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N + DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION AP( * ), S( * ) +* .. +* +* Purpose +* ======= +* +* DPPEQU computes row and column scalings intended to equilibrate a +* symmetric positive definite matrix A in packed storage and reduce +* its condition number (with respect to the two-norm). S contains the +* scale factors, S(i)=1/sqrt(A(i,i)), chosen so that the scaled matrix +* B with elements B(i,j)=S(i)*A(i,j)*S(j) has ones on the diagonal. +* This choice of S puts the condition number of B within a factor N of +* the smallest possible condition number over all possible diagonal +* scalings. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) +* The upper or lower triangle of the symmetric matrix A, packed +* columnwise in a linear array. The j-th column of A is stored +* in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* +* S (output) DOUBLE PRECISION array, dimension (N) +* If INFO = 0, S contains the scale factors for A. +* +* SCOND (output) DOUBLE PRECISION +* If INFO = 0, S contains the ratio of the smallest S(i) to +* the largest S(i). If SCOND >= 0.1 and AMAX is neither too +* large nor too small, it is not worth scaling by S. +* +* AMAX (output) DOUBLE PRECISION +* Absolute value of largest matrix element. If AMAX is very +* close to overflow or very close to underflow, the matrix +* should be scaled. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the i-th diagonal element is nonpositive. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, JJ + DOUBLE PRECISION SMIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPPEQU', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SCOND = ONE + AMAX = ZERO + RETURN + END IF +* +* Initialize SMIN and AMAX. +* + S( 1 ) = AP( 1 ) + SMIN = S( 1 ) + AMAX = S( 1 ) +* + IF( UPPER ) THEN +* +* UPLO = 'U': Upper triangle of A is stored. +* Find the minimum and maximum diagonal elements. +* + JJ = 1 + DO 10 I = 2, N + JJ = JJ + I + S( I ) = AP( JJ ) + SMIN = MIN( SMIN, S( I ) ) + AMAX = MAX( AMAX, S( I ) ) + 10 CONTINUE +* + ELSE +* +* UPLO = 'L': Lower triangle of A is stored. +* Find the minimum and maximum diagonal elements. +* + JJ = 1 + DO 20 I = 2, N + JJ = JJ + N - I + 2 + S( I ) = AP( JJ ) + SMIN = MIN( SMIN, S( I ) ) + AMAX = MAX( AMAX, S( I ) ) + 20 CONTINUE + END IF +* + IF( SMIN.LE.ZERO ) THEN +* +* Find the first non-positive diagonal element and return. +* + DO 30 I = 1, N + IF( S( I ).LE.ZERO ) THEN + INFO = I + RETURN + END IF + 30 CONTINUE + ELSE +* +* Set the scale factors to the reciprocals +* of the diagonal elements. +* + DO 40 I = 1, N + S( I ) = ONE / SQRT( S( I ) ) + 40 CONTINUE +* +* Compute SCOND = min(S(I)) / max(S(I)) +* + SCOND = SQRT( SMIN ) / SQRT( AMAX ) + END IF + RETURN +* +* End of DPPEQU +* + END diff --git a/costa/native/external/lapack/dpprfs.f b/costa/native/external/lapack/dpprfs.f new file mode 100644 index 000000000..6a0fc5044 --- /dev/null +++ b/costa/native/external/lapack/dpprfs.f @@ -0,0 +1,324 @@ + SUBROUTINE DPPRFS( UPLO, N, NRHS, AP, AFP, B, LDB, X, LDX, FERR, + $ BERR, WORK, IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION AFP( * ), AP( * ), B( LDB, * ), BERR( * ), + $ FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* DPPRFS improves the computed solution to a system of linear +* equations when the coefficient matrix is symmetric positive definite +* and packed, and provides error bounds and backward error estimates +* for the solution. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) +* The upper or lower triangle of the symmetric matrix A, packed +* columnwise in a linear array. The j-th column of A is stored +* in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* +* AFP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) +* The triangular factor U or L from the Cholesky factorization +* A = U**T*U or A = L*L**T, as computed by DPPTRF/ZPPTRF, +* packed columnwise in a linear array in the same format as A +* (see AP). +* +* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) +* On entry, the solution matrix X, as computed by DPPTRS. +* On exit, the improved solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Internal Parameters +* =================== +* +* ITMAX is the maximum number of steps of iterative refinement. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) + DOUBLE PRECISION THREE + PARAMETER ( THREE = 3.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, IK, J, K, KASE, KK, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DLACON, DPPTRS, DSPMV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPPRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL DCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 ) + CALL DSPMV( UPLO, N, -ONE, AP, X( 1, J ), 1, ONE, WORK( N+1 ), + $ 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + WORK( I ) = ABS( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + KK = 1 + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = ABS( X( K, J ) ) + IK = KK + DO 40 I = 1, K - 1 + WORK( I ) = WORK( I ) + ABS( AP( IK ) )*XK + S = S + ABS( AP( IK ) )*ABS( X( I, J ) ) + IK = IK + 1 + 40 CONTINUE + WORK( K ) = WORK( K ) + ABS( AP( KK+K-1 ) )*XK + S + KK = KK + K + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = ABS( X( K, J ) ) + WORK( K ) = WORK( K ) + ABS( AP( KK ) )*XK + IK = KK + 1 + DO 60 I = K + 1, N + WORK( I ) = WORK( I ) + ABS( AP( IK ) )*XK + S = S + ABS( AP( IK ) )*ABS( X( I, J ) ) + IK = IK + 1 + 60 CONTINUE + WORK( K ) = WORK( K ) + S + KK = KK + ( N-K+1 ) + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL DPPTRS( UPLO, N, 1, AFP, WORK( N+1 ), N, INFO ) + CALL DAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use DLACON to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL DLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A'). +* + CALL DPPTRS( UPLO, N, 1, AFP, WORK( N+1 ), N, INFO ) + DO 110 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 120 CONTINUE + CALL DPPTRS( UPLO, N, 1, AFP, WORK( N+1 ), N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of DPPRFS +* + END diff --git a/costa/native/external/lapack/dppsv.f b/costa/native/external/lapack/dppsv.f new file mode 100644 index 000000000..2e99b8cd8 --- /dev/null +++ b/costa/native/external/lapack/dppsv.f @@ -0,0 +1,134 @@ + SUBROUTINE DPPSV( UPLO, N, NRHS, AP, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION AP( * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* DPPSV computes the solution to a real system of linear equations +* A * X = B, +* where A is an N-by-N symmetric positive definite matrix stored in +* packed format and X and B are N-by-NRHS matrices. +* +* The Cholesky decomposition is used to factor A as +* A = U**T* U, if UPLO = 'U', or +* A = L * L**T, if UPLO = 'L', +* where U is an upper triangular matrix and L is a lower triangular +* matrix. The factored form of A is then used to solve the system of +* equations A * X = B. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the symmetric matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* See below for further details. +* +* On exit, if INFO = 0, the factor U or L from the Cholesky +* factorization A = U**T*U or A = L*L**T, in the same storage +* format as A. +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the leading minor of order i of A is not +* positive definite, so the factorization could not be +* completed, and the solution has not been computed. +* +* Further Details +* =============== +* +* The packed storage scheme is illustrated by the following example +* when N = 4, UPLO = 'U': +* +* Two-dimensional storage of the symmetric matrix A: +* +* a11 a12 a13 a14 +* a22 a23 a24 +* a33 a34 (aij = conjg(aji)) +* a44 +* +* Packed storage of the upper triangle of A: +* +* AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DPPTRF, DPPTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPPSV ', -INFO ) + RETURN + END IF +* +* Compute the Cholesky factorization A = U'*U or A = L*L'. +* + CALL DPPTRF( UPLO, N, AP, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL DPPTRS( UPLO, N, NRHS, AP, B, LDB, INFO ) +* + END IF + RETURN +* +* End of DPPSV +* + END diff --git a/costa/native/external/lapack/dppsvx.f b/costa/native/external/lapack/dppsvx.f new file mode 100644 index 000000000..60b9ae7dd --- /dev/null +++ b/costa/native/external/lapack/dppsvx.f @@ -0,0 +1,383 @@ + SUBROUTINE DPPSVX( FACT, UPLO, N, NRHS, AP, AFP, EQUED, S, B, LDB, + $ X, LDX, RCOND, FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, UPLO + INTEGER INFO, LDB, LDX, N, NRHS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION AFP( * ), AP( * ), B( LDB, * ), BERR( * ), + $ FERR( * ), S( * ), WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* DPPSVX uses the Cholesky factorization A = U**T*U or A = L*L**T to +* compute the solution to a real system of linear equations +* A * X = B, +* where A is an N-by-N symmetric positive definite matrix stored in +* packed format and X and B are N-by-NRHS matrices. +* +* Error bounds on the solution and a condition estimate are also +* provided. +* +* Description +* =========== +* +* The following steps are performed: +* +* 1. If FACT = 'E', real scaling factors are computed to equilibrate +* the system: +* diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B +* Whether or not the system will be equilibrated depends on the +* scaling of the matrix A, but if equilibration is used, A is +* overwritten by diag(S)*A*diag(S) and B by diag(S)*B. +* +* 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to +* factor the matrix A (after equilibration if FACT = 'E') as +* A = U**T* U, if UPLO = 'U', or +* A = L * L**T, if UPLO = 'L', +* where U is an upper triangular matrix and L is a lower triangular +* matrix. +* +* 3. If the leading i-by-i principal minor is not positive definite, +* then the routine returns with INFO = i. Otherwise, the factored +* form of A is used to estimate the condition number of the matrix +* A. If the reciprocal of the condition number is less than machine +* precision, INFO = N+1 is returned as a warning, but the routine +* still goes on to solve for X and compute error bounds as +* described below. +* +* 4. The system of equations is solved for X using the factored form +* of A. +* +* 5. Iterative refinement is applied to improve the computed solution +* matrix and calculate error bounds and backward error estimates +* for it. +* +* 6. If equilibration was used, the matrix X is premultiplied by +* diag(S) so that it solves the original system before +* equilibration. +* +* Arguments +* ========= +* +* FACT (input) CHARACTER*1 +* Specifies whether or not the factored form of the matrix A is +* supplied on entry, and if not, whether the matrix A should be +* equilibrated before it is factored. +* = 'F': On entry, AFP contains the factored form of A. +* If EQUED = 'Y', the matrix A has been equilibrated +* with scaling factors given by S. AP and AFP will not +* be modified. +* = 'N': The matrix A will be copied to AFP and factored. +* = 'E': The matrix A will be equilibrated if necessary, then +* copied to AFP and factored. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the symmetric matrix +* A, packed columnwise in a linear array, except if FACT = 'F' +* and EQUED = 'Y', then A must contain the equilibrated matrix +* diag(S)*A*diag(S). The j-th column of A is stored in the +* array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* See below for further details. A is not modified if +* FACT = 'F' or 'N', or if FACT = 'E' and EQUED = 'N' on exit. +* +* On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by +* diag(S)*A*diag(S). +* +* AFP (input or output) DOUBLE PRECISION array, dimension +* (N*(N+1)/2) +* If FACT = 'F', then AFP is an input argument and on entry +* contains the triangular factor U or L from the Cholesky +* factorization A = U'*U or A = L*L', in the same storage +* format as A. If EQUED .ne. 'N', then AFP is the factored +* form of the equilibrated matrix A. +* +* If FACT = 'N', then AFP is an output argument and on exit +* returns the triangular factor U or L from the Cholesky +* factorization A = U'*U or A = L*L' of the original matrix A. +* +* If FACT = 'E', then AFP is an output argument and on exit +* returns the triangular factor U or L from the Cholesky +* factorization A = U'*U or A = L*L' of the equilibrated +* matrix A (see the description of AP for the form of the +* equilibrated matrix). +* +* EQUED (input or output) CHARACTER*1 +* Specifies the form of equilibration that was done. +* = 'N': No equilibration (always true if FACT = 'N'). +* = 'Y': Equilibration was done, i.e., A has been replaced by +* diag(S) * A * diag(S). +* EQUED is an input argument if FACT = 'F'; otherwise, it is an +* output argument. +* +* S (input or output) DOUBLE PRECISION array, dimension (N) +* The scale factors for A; not accessed if EQUED = 'N'. S is +* an input argument if FACT = 'F'; otherwise, S is an output +* argument. If FACT = 'F' and EQUED = 'Y', each element of S +* must be positive. +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y', +* B is overwritten by diag(S) * B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (output) DOUBLE PRECISION array, dimension (LDX,NRHS) +* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to +* the original system of equations. Note that if EQUED = 'Y', +* A and B are modified on exit, and the solution to the +* equilibrated system is inv(diag(S))*X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* RCOND (output) DOUBLE PRECISION +* The estimate of the reciprocal condition number of the matrix +* A after equilibration (if done). If RCOND is less than the +* machine precision (in particular, if RCOND = 0), the matrix +* is singular to working precision. This condition is +* indicated by a return code of INFO > 0. +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is +* <= N: the leading minor of order i of A is +* not positive definite, so the factorization +* could not be completed, and the solution has not +* been computed. RCOND = 0 is returned. +* = N+1: U is nonsingular, but RCOND is less than machine +* precision, meaning that the matrix is singular +* to working precision. Nevertheless, the +* solution and error bounds are computed because +* there are a number of situations where the +* computed solution can be more accurate than the +* value of RCOND would suggest. +* +* Further Details +* =============== +* +* The packed storage scheme is illustrated by the following example +* when N = 4, UPLO = 'U': +* +* Two-dimensional storage of the symmetric matrix A: +* +* a11 a12 a13 a14 +* a22 a23 a24 +* a33 a34 (aij = conjg(aji)) +* a44 +* +* Packed storage of the upper triangle of A: +* +* AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL EQUIL, NOFACT, RCEQU + INTEGER I, INFEQU, J + DOUBLE PRECISION AMAX, ANORM, BIGNUM, SCOND, SMAX, SMIN, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANSP + EXTERNAL LSAME, DLAMCH, DLANSP +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLACPY, DLAQSP, DPPCON, DPPEQU, DPPRFS, + $ DPPTRF, DPPTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + RCEQU = .FALSE. + ELSE + RCEQU = LSAME( EQUED, 'Y' ) + SMLNUM = DLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + END IF +* +* Test the input parameters. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) ) + $ THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -7 + ELSE + IF( RCEQU ) THEN + SMIN = BIGNUM + SMAX = ZERO + DO 10 J = 1, N + SMIN = MIN( SMIN, S( J ) ) + SMAX = MAX( SMAX, S( J ) ) + 10 CONTINUE + IF( SMIN.LE.ZERO ) THEN + INFO = -8 + ELSE IF( N.GT.0 ) THEN + SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) + ELSE + SCOND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPPSVX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL DPPEQU( UPLO, N, AP, S, SCOND, AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL DLAQSP( UPLO, N, AP, S, SCOND, AMAX, EQUED ) + RCEQU = LSAME( EQUED, 'Y' ) + END IF + END IF +* +* Scale the right-hand side. +* + IF( RCEQU ) THEN + DO 30 J = 1, NRHS + DO 20 I = 1, N + B( I, J ) = S( I )*B( I, J ) + 20 CONTINUE + 30 CONTINUE + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the Cholesky factorization A = U'*U or A = L*L'. +* + CALL DCOPY( N*( N+1 ) / 2, AP, 1, AFP, 1 ) + CALL DPPTRF( UPLO, N, AFP, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN + IF( INFO.GT.0 ) + $ RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = DLANSP( 'I', UPLO, N, AP, WORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL DPPCON( UPLO, N, AFP, ANORM, RCOND, WORK, IWORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.DLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* +* Compute the solution matrix X. +* + CALL DLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL DPPTRS( UPLO, N, NRHS, AFP, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL DPPRFS( UPLO, N, NRHS, AP, AFP, B, LDB, X, LDX, FERR, BERR, + $ WORK, IWORK, INFO ) +* +* Transform the solution matrix X to a solution of the original +* system. +* + IF( RCEQU ) THEN + DO 50 J = 1, NRHS + DO 40 I = 1, N + X( I, J ) = S( I )*X( I, J ) + 40 CONTINUE + 50 CONTINUE + DO 60 J = 1, NRHS + FERR( J ) = FERR( J ) / SCOND + 60 CONTINUE + END IF +* + RETURN +* +* End of DPPSVX +* + END diff --git a/costa/native/external/lapack/dpptrf.f b/costa/native/external/lapack/dpptrf.f new file mode 100644 index 000000000..79fb4d762 --- /dev/null +++ b/costa/native/external/lapack/dpptrf.f @@ -0,0 +1,178 @@ + SUBROUTINE DPPTRF( UPLO, N, AP, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AP( * ) +* .. +* +* Purpose +* ======= +* +* DPPTRF computes the Cholesky factorization of a real symmetric +* positive definite matrix A stored in packed format. +* +* The factorization has the form +* A = U**T * U, if UPLO = 'U', or +* A = L * L**T, if UPLO = 'L', +* where U is an upper triangular matrix and L is lower triangular. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the symmetric matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* See below for further details. +* +* On exit, if INFO = 0, the triangular factor U or L from the +* Cholesky factorization A = U**T*U or A = L*L**T, in the same +* storage format as A. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the leading minor of order i is not +* positive definite, and the factorization could not be +* completed. +* +* Further Details +* ======= ======= +* +* The packed storage scheme is illustrated by the following example +* when N = 4, UPLO = 'U': +* +* Two-dimensional storage of the symmetric matrix A: +* +* a11 a12 a13 a14 +* a22 a23 a24 +* a33 a34 (aij = aji) +* a44 +* +* Packed storage of the upper triangle of A: +* +* AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, JC, JJ + DOUBLE PRECISION AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DDOT + EXTERNAL LSAME, DDOT +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSPR, DTPSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPPTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U'*U. +* + JJ = 0 + DO 10 J = 1, N + JC = JJ + 1 + JJ = JJ + J +* +* Compute elements 1:J-1 of column J. +* + IF( J.GT.1 ) + $ CALL DTPSV( 'Upper', 'Transpose', 'Non-unit', J-1, AP, + $ AP( JC ), 1 ) +* +* Compute U(J,J) and test for non-positive-definiteness. +* + AJJ = AP( JJ ) - DDOT( J-1, AP( JC ), 1, AP( JC ), 1 ) + IF( AJJ.LE.ZERO ) THEN + AP( JJ ) = AJJ + GO TO 30 + END IF + AP( JJ ) = SQRT( AJJ ) + 10 CONTINUE + ELSE +* +* Compute the Cholesky factorization A = L*L'. +* + JJ = 1 + DO 20 J = 1, N +* +* Compute L(J,J) and test for non-positive-definiteness. +* + AJJ = AP( JJ ) + IF( AJJ.LE.ZERO ) THEN + AP( JJ ) = AJJ + GO TO 30 + END IF + AJJ = SQRT( AJJ ) + AP( JJ ) = AJJ +* +* Compute elements J+1:N of column J and update the trailing +* submatrix. +* + IF( J.LT.N ) THEN + CALL DSCAL( N-J, ONE / AJJ, AP( JJ+1 ), 1 ) + CALL DSPR( 'Lower', N-J, -ONE, AP( JJ+1 ), 1, + $ AP( JJ+N-J+1 ) ) + JJ = JJ + N - J + 1 + END IF + 20 CONTINUE + END IF + GO TO 40 +* + 30 CONTINUE + INFO = J +* + 40 CONTINUE + RETURN +* +* End of DPPTRF +* + END diff --git a/costa/native/external/lapack/dpptri.f b/costa/native/external/lapack/dpptri.f new file mode 100644 index 000000000..f05bef4af --- /dev/null +++ b/costa/native/external/lapack/dpptri.f @@ -0,0 +1,129 @@ + SUBROUTINE DPPTRI( UPLO, N, AP, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AP( * ) +* .. +* +* Purpose +* ======= +* +* DPPTRI computes the inverse of a real symmetric positive definite +* matrix A using the Cholesky factorization A = U**T*U or A = L*L**T +* computed by DPPTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangular factor is stored in AP; +* = 'L': Lower triangular factor is stored in AP. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) +* On entry, the triangular factor U or L from the Cholesky +* factorization A = U**T*U or A = L*L**T, packed columnwise as +* a linear array. The j-th column of U or L is stored in the +* array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n. +* +* On exit, the upper or lower triangle of the (symmetric) +* inverse of A, overwriting the input factor U or L. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the (i,i) element of the factor U or L is +* zero, and the inverse could not be computed. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, JC, JJ, JJN + DOUBLE PRECISION AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DDOT + EXTERNAL LSAME, DDOT +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSPR, DTPMV, DTPTRI, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPPTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Invert the triangular Cholesky factor U or L. +* + CALL DTPTRI( UPLO, 'Non-unit', N, AP, INFO ) + IF( INFO.GT.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Compute the product inv(U) * inv(U)'. +* + JJ = 0 + DO 10 J = 1, N + JC = JJ + 1 + JJ = JJ + J + IF( J.GT.1 ) + $ CALL DSPR( 'Upper', J-1, ONE, AP( JC ), 1, AP ) + AJJ = AP( JJ ) + CALL DSCAL( J, AJJ, AP( JC ), 1 ) + 10 CONTINUE +* + ELSE +* +* Compute the product inv(L)' * inv(L). +* + JJ = 1 + DO 20 J = 1, N + JJN = JJ + N - J + 1 + AP( JJ ) = DDOT( N-J+1, AP( JJ ), 1, AP( JJ ), 1 ) + IF( J.LT.N ) + $ CALL DTPMV( 'Lower', 'Transpose', 'Non-unit', N-J, + $ AP( JJN ), AP( JJ+1 ), 1 ) + JJ = JJN + 20 CONTINUE + END IF +* + RETURN +* +* End of DPPTRI +* + END diff --git a/costa/native/external/lapack/dpptrs.f b/costa/native/external/lapack/dpptrs.f new file mode 100644 index 000000000..bcc8390ad --- /dev/null +++ b/costa/native/external/lapack/dpptrs.f @@ -0,0 +1,135 @@ + SUBROUTINE DPPTRS( UPLO, N, NRHS, AP, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION AP( * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* DPPTRS solves a system of linear equations A*X = B with a symmetric +* positive definite matrix A in packed storage using the Cholesky +* factorization A = U**T*U or A = L*L**T computed by DPPTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) +* The triangular factor U or L from the Cholesky factorization +* A = U**T*U or A = L*L**T, packed columnwise in a linear +* array. The j-th column of U or L is stored in the array AP +* as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n. +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DTPSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPPTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B where A = U'*U. +* + DO 10 I = 1, NRHS +* +* Solve U'*X = B, overwriting B with X. +* + CALL DTPSV( 'Upper', 'Transpose', 'Non-unit', N, AP, + $ B( 1, I ), 1 ) +* +* Solve U*X = B, overwriting B with X. +* + CALL DTPSV( 'Upper', 'No transpose', 'Non-unit', N, AP, + $ B( 1, I ), 1 ) + 10 CONTINUE + ELSE +* +* Solve A*X = B where A = L*L'. +* + DO 20 I = 1, NRHS +* +* Solve L*Y = B, overwriting B with X. +* + CALL DTPSV( 'Lower', 'No transpose', 'Non-unit', N, AP, + $ B( 1, I ), 1 ) +* +* Solve L'*X = Y, overwriting B with X. +* + CALL DTPSV( 'Lower', 'Transpose', 'Non-unit', N, AP, + $ B( 1, I ), 1 ) + 20 CONTINUE + END IF +* + RETURN +* +* End of DPPTRS +* + END diff --git a/costa/native/external/lapack/dptcon.f b/costa/native/external/lapack/dptcon.f new file mode 100644 index 000000000..291a4f758 --- /dev/null +++ b/costa/native/external/lapack/dptcon.f @@ -0,0 +1,150 @@ + SUBROUTINE DPTCON( N, D, E, ANORM, RCOND, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + INTEGER INFO, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DPTCON computes the reciprocal of the condition number (in the +* 1-norm) of a real symmetric positive definite tridiagonal matrix +* using the factorization A = L*D*L**T or A = U**T*D*U computed by +* DPTTRF. +* +* Norm(inv(A)) is computed by a direct method, and the reciprocal of +* the condition number is computed as +* RCOND = 1 / (ANORM * norm(inv(A))). +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* D (input) DOUBLE PRECISION array, dimension (N) +* The n diagonal elements of the diagonal matrix D from the +* factorization of A, as computed by DPTTRF. +* +* E (input) DOUBLE PRECISION array, dimension (N-1) +* The (n-1) off-diagonal elements of the unit bidiagonal factor +* U or L from the factorization of A, as computed by DPTTRF. +* +* ANORM (input) DOUBLE PRECISION +* The 1-norm of the original matrix A. +* +* RCOND (output) DOUBLE PRECISION +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is the +* 1-norm of inv(A) computed in this routine. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The method used is described in Nicholas J. Higham, "Efficient +* Algorithms for Computing the Condition Number of a Tridiagonal +* Matrix", SIAM J. Sci. Stat. Comput., Vol. 7, No. 1, January 1986. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IX + DOUBLE PRECISION AINVNM +* .. +* .. External Functions .. + INTEGER IDAMAX + EXTERNAL IDAMAX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPTCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* +* Check that D(1:N) is positive. +* + DO 10 I = 1, N + IF( D( I ).LE.ZERO ) + $ RETURN + 10 CONTINUE +* +* Solve M(A) * x = e, where M(A) = (m(i,j)) is given by +* +* m(i,j) = abs(A(i,j)), i = j, +* m(i,j) = -abs(A(i,j)), i .ne. j, +* +* and e = [ 1, 1, ..., 1 ]'. Note M(A) = M(L)*D*M(L)'. +* +* Solve M(L) * x = e. +* + WORK( 1 ) = ONE + DO 20 I = 2, N + WORK( I ) = ONE + WORK( I-1 )*ABS( E( I-1 ) ) + 20 CONTINUE +* +* Solve D * M(L)' * x = b. +* + WORK( N ) = WORK( N ) / D( N ) + DO 30 I = N - 1, 1, -1 + WORK( I ) = WORK( I ) / D( I ) + WORK( I+1 )*ABS( E( I ) ) + 30 CONTINUE +* +* Compute AINVNM = max(x(i)), 1<=i<=n. +* + IX = IDAMAX( N, WORK, 1 ) + AINVNM = ABS( WORK( IX ) ) +* +* Compute the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of DPTCON +* + END diff --git a/costa/native/external/lapack/dpteqr.f b/costa/native/external/lapack/dpteqr.f new file mode 100644 index 000000000..c73c1e6bb --- /dev/null +++ b/costa/native/external/lapack/dpteqr.f @@ -0,0 +1,190 @@ + SUBROUTINE DPTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + CHARACTER COMPZ + INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* DPTEQR computes all eigenvalues and, optionally, eigenvectors of a +* symmetric positive definite tridiagonal matrix by first factoring the +* matrix using DPTTRF, and then calling DBDSQR to compute the singular +* values of the bidiagonal factor. +* +* This routine computes the eigenvalues of the positive definite +* tridiagonal matrix to high relative accuracy. This means that if the +* eigenvalues range over many orders of magnitude in size, then the +* small eigenvalues and corresponding eigenvectors will be computed +* more accurately than, for example, with the standard QR method. +* +* The eigenvectors of a full or band symmetric positive definite matrix +* can also be found if DSYTRD, DSPTRD, or DSBTRD has been used to +* reduce this matrix to tridiagonal form. (The reduction to tridiagonal +* form, however, may preclude the possibility of obtaining high +* relative accuracy in the small eigenvalues of the original matrix, if +* these eigenvalues range over many orders of magnitude.) +* +* Arguments +* ========= +* +* COMPZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only. +* = 'V': Compute eigenvectors of original symmetric +* matrix also. Array Z contains the orthogonal +* matrix used to reduce the original matrix to +* tridiagonal form. +* = 'I': Compute eigenvectors of tridiagonal matrix also. +* +* N (input) INTEGER +* The order of the matrix. N >= 0. +* +* D (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the n diagonal elements of the tridiagonal +* matrix. +* On normal exit, D contains the eigenvalues, in descending +* order. +* +* E (input/output) DOUBLE PRECISION array, dimension (N-1) +* On entry, the (n-1) subdiagonal elements of the tridiagonal +* matrix. +* On exit, E has been destroyed. +* +* Z (input/output) DOUBLE PRECISION array, dimension (LDZ, N) +* On entry, if COMPZ = 'V', the orthogonal matrix used in the +* reduction to tridiagonal form. +* On exit, if COMPZ = 'V', the orthonormal eigenvectors of the +* original symmetric matrix; +* if COMPZ = 'I', the orthonormal eigenvectors of the +* tridiagonal matrix. +* If INFO > 0 on exit, Z contains the eigenvectors associated +* with only the stored eigenvalues. +* If COMPZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* COMPZ = 'V' or 'I', LDZ >= max(1,N). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, and i is: +* <= N the Cholesky factorization of the matrix could +* not be performed because the i-th principal minor +* was not positive definite. +* > N the SVD algorithm failed to converge; +* if INFO = N+i, i off-diagonal elements of the +* bidiagonal factor did not converge to zero. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DBDSQR, DLASET, DPTTRF, XERBLA +* .. +* .. Local Arrays .. + DOUBLE PRECISION C( 1, 1 ), VT( 1, 1 ) +* .. +* .. Local Scalars .. + INTEGER I, ICOMPZ, NRU +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ICOMPZ = 0 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ICOMPZ = 2 + ELSE + ICOMPZ = -1 + END IF + IF( ICOMPZ.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ( LDZ.LT.1 ) .OR. ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1, + $ N ) ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPTEQR', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ICOMPZ.GT.0 ) + $ Z( 1, 1 ) = ONE + RETURN + END IF + IF( ICOMPZ.EQ.2 ) + $ CALL DLASET( 'Full', N, N, ZERO, ONE, Z, LDZ ) +* +* Call DPTTRF to factor the matrix. +* + CALL DPTTRF( N, D, E, INFO ) + IF( INFO.NE.0 ) + $ RETURN + DO 10 I = 1, N + D( I ) = SQRT( D( I ) ) + 10 CONTINUE + DO 20 I = 1, N - 1 + E( I ) = E( I )*D( I ) + 20 CONTINUE +* +* Call DBDSQR to compute the singular values/vectors of the +* bidiagonal factor. +* + IF( ICOMPZ.GT.0 ) THEN + NRU = N + ELSE + NRU = 0 + END IF + CALL DBDSQR( 'Lower', N, 0, NRU, 0, D, E, VT, 1, Z, LDZ, C, 1, + $ WORK, INFO ) +* +* Square the singular values. +* + IF( INFO.EQ.0 ) THEN + DO 30 I = 1, N + D( I ) = D( I )*D( I ) + 30 CONTINUE + ELSE + INFO = N + INFO + END IF +* + RETURN +* +* End of DPTEQR +* + END diff --git a/costa/native/external/lapack/dptrfs.f b/costa/native/external/lapack/dptrfs.f new file mode 100644 index 000000000..21b555347 --- /dev/null +++ b/costa/native/external/lapack/dptrfs.f @@ -0,0 +1,302 @@ + SUBROUTINE DPTRFS( N, NRHS, D, E, DF, EF, B, LDB, X, LDX, FERR, + $ BERR, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION B( LDB, * ), BERR( * ), D( * ), DF( * ), + $ E( * ), EF( * ), FERR( * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* DPTRFS improves the computed solution to a system of linear +* equations when the coefficient matrix is symmetric positive definite +* and tridiagonal, and provides error bounds and backward error +* estimates for the solution. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* D (input) DOUBLE PRECISION array, dimension (N) +* The n diagonal elements of the tridiagonal matrix A. +* +* E (input) DOUBLE PRECISION array, dimension (N-1) +* The (n-1) subdiagonal elements of the tridiagonal matrix A. +* +* DF (input) DOUBLE PRECISION array, dimension (N) +* The n diagonal elements of the diagonal matrix D from the +* factorization computed by DPTTRF. +* +* EF (input) DOUBLE PRECISION array, dimension (N-1) +* The (n-1) subdiagonal elements of the unit bidiagonal factor +* L from the factorization computed by DPTTRF. +* +* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) +* On entry, the solution matrix X, as computed by DPTTRS. +* On exit, the improved solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Internal Parameters +* =================== +* +* ITMAX is the maximum number of steps of iterative refinement. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) + DOUBLE PRECISION THREE + PARAMETER ( THREE = 3.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER COUNT, I, IX, J, NZ + DOUBLE PRECISION BI, CX, DX, EPS, EX, LSTRES, S, SAFE1, SAFE2, + $ SAFMIN +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DPTTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL IDAMAX, DLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPTRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = 4 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 90 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X. Also compute +* abs(A)*abs(x) + abs(b) for use in the backward error bound. +* + IF( N.EQ.1 ) THEN + BI = B( 1, J ) + DX = D( 1 )*X( 1, J ) + WORK( N+1 ) = BI - DX + WORK( 1 ) = ABS( BI ) + ABS( DX ) + ELSE + BI = B( 1, J ) + DX = D( 1 )*X( 1, J ) + EX = E( 1 )*X( 2, J ) + WORK( N+1 ) = BI - DX - EX + WORK( 1 ) = ABS( BI ) + ABS( DX ) + ABS( EX ) + DO 30 I = 2, N - 1 + BI = B( I, J ) + CX = E( I-1 )*X( I-1, J ) + DX = D( I )*X( I, J ) + EX = E( I )*X( I+1, J ) + WORK( N+I ) = BI - CX - DX - EX + WORK( I ) = ABS( BI ) + ABS( CX ) + ABS( DX ) + ABS( EX ) + 30 CONTINUE + BI = B( N, J ) + CX = E( N-1 )*X( N-1, J ) + DX = D( N )*X( N, J ) + WORK( N+N ) = BI - CX - DX + WORK( N ) = ABS( BI ) + ABS( CX ) + ABS( DX ) + END IF +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + S = ZERO + DO 40 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 40 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL DPTTRS( N, 1, DF, EF, WORK( N+1 ), N, INFO ) + CALL DAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* + DO 50 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 50 CONTINUE + IX = IDAMAX( N, WORK, 1 ) + FERR( J ) = WORK( IX ) +* +* Estimate the norm of inv(A). +* +* Solve M(A) * x = e, where M(A) = (m(i,j)) is given by +* +* m(i,j) = abs(A(i,j)), i = j, +* m(i,j) = -abs(A(i,j)), i .ne. j, +* +* and e = [ 1, 1, ..., 1 ]'. Note M(A) = M(L)*D*M(L)'. +* +* Solve M(L) * x = e. +* + WORK( 1 ) = ONE + DO 60 I = 2, N + WORK( I ) = ONE + WORK( I-1 )*ABS( EF( I-1 ) ) + 60 CONTINUE +* +* Solve D * M(L)' * x = b. +* + WORK( N ) = WORK( N ) / DF( N ) + DO 70 I = N - 1, 1, -1 + WORK( I ) = WORK( I ) / DF( I ) + WORK( I+1 )*ABS( EF( I ) ) + 70 CONTINUE +* +* Compute norm(inv(A)) = max(x(i)), 1<=i<=n. +* + IX = IDAMAX( N, WORK, 1 ) + FERR( J ) = FERR( J )*ABS( WORK( IX ) ) +* +* Normalize error. +* + LSTRES = ZERO + DO 80 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 80 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 90 CONTINUE +* + RETURN +* +* End of DPTRFS +* + END diff --git a/costa/native/external/lapack/dptsv.f b/costa/native/external/lapack/dptsv.f new file mode 100644 index 000000000..8ec8d7773 --- /dev/null +++ b/costa/native/external/lapack/dptsv.f @@ -0,0 +1,100 @@ + SUBROUTINE DPTSV( N, NRHS, D, E, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 25, 1997 +* +* .. Scalar Arguments .. + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION B( LDB, * ), D( * ), E( * ) +* .. +* +* Purpose +* ======= +* +* DPTSV computes the solution to a real system of linear equations +* A*X = B, where A is an N-by-N symmetric positive definite tridiagonal +* matrix, and X and B are N-by-NRHS matrices. +* +* A is factored as A = L*D*L**T, and the factored form of A is then +* used to solve the system of equations. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* D (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the n diagonal elements of the tridiagonal matrix +* A. On exit, the n diagonal elements of the diagonal matrix +* D from the factorization A = L*D*L**T. +* +* E (input/output) DOUBLE PRECISION array, dimension (N-1) +* On entry, the (n-1) subdiagonal elements of the tridiagonal +* matrix A. On exit, the (n-1) subdiagonal elements of the +* unit bidiagonal factor L from the L*D*L**T factorization of +* A. (E can also be regarded as the superdiagonal of the unit +* bidiagonal factor U from the U**T*D*U factorization of A.) +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the leading minor of order i is not +* positive definite, and the solution has not been +* computed. The factorization has not been completed +* unless i = N. +* +* ===================================================================== +* +* .. External Subroutines .. + EXTERNAL DPTTRF, DPTTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPTSV ', -INFO ) + RETURN + END IF +* +* Compute the L*D*L' (or U'*D*U) factorization of A. +* + CALL DPTTRF( N, D, E, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL DPTTRS( N, NRHS, D, E, B, LDB, INFO ) + END IF + RETURN +* +* End of DPTSV +* + END diff --git a/costa/native/external/lapack/dptsvx.f b/costa/native/external/lapack/dptsvx.f new file mode 100644 index 000000000..c6415e79a --- /dev/null +++ b/costa/native/external/lapack/dptsvx.f @@ -0,0 +1,235 @@ + SUBROUTINE DPTSVX( FACT, N, NRHS, D, E, DF, EF, B, LDB, X, LDX, + $ RCOND, FERR, BERR, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER FACT + INTEGER INFO, LDB, LDX, N, NRHS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION B( LDB, * ), BERR( * ), D( * ), DF( * ), + $ E( * ), EF( * ), FERR( * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* DPTSVX uses the factorization A = L*D*L**T to compute the solution +* to a real system of linear equations A*X = B, where A is an N-by-N +* symmetric positive definite tridiagonal matrix and X and B are +* N-by-NRHS matrices. +* +* Error bounds on the solution and a condition estimate are also +* provided. +* +* Description +* =========== +* +* The following steps are performed: +* +* 1. If FACT = 'N', the matrix A is factored as A = L*D*L**T, where L +* is a unit lower bidiagonal matrix and D is diagonal. The +* factorization can also be regarded as having the form +* A = U**T*D*U. +* +* 2. If the leading i-by-i principal minor is not positive definite, +* then the routine returns with INFO = i. Otherwise, the factored +* form of A is used to estimate the condition number of the matrix +* A. If the reciprocal of the condition number is less than machine +* precision, INFO = N+1 is returned as a warning, but the routine +* still goes on to solve for X and compute error bounds as +* described below. +* +* 3. The system of equations is solved for X using the factored form +* of A. +* +* 4. Iterative refinement is applied to improve the computed solution +* matrix and calculate error bounds and backward error estimates +* for it. +* +* Arguments +* ========= +* +* FACT (input) CHARACTER*1 +* Specifies whether or not the factored form of A has been +* supplied on entry. +* = 'F': On entry, DF and EF contain the factored form of A. +* D, E, DF, and EF will not be modified. +* = 'N': The matrix A will be copied to DF and EF and +* factored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* D (input) DOUBLE PRECISION array, dimension (N) +* The n diagonal elements of the tridiagonal matrix A. +* +* E (input) DOUBLE PRECISION array, dimension (N-1) +* The (n-1) subdiagonal elements of the tridiagonal matrix A. +* +* DF (input or output) DOUBLE PRECISION array, dimension (N) +* If FACT = 'F', then DF is an input argument and on entry +* contains the n diagonal elements of the diagonal matrix D +* from the L*D*L**T factorization of A. +* If FACT = 'N', then DF is an output argument and on exit +* contains the n diagonal elements of the diagonal matrix D +* from the L*D*L**T factorization of A. +* +* EF (input or output) DOUBLE PRECISION array, dimension (N-1) +* If FACT = 'F', then EF is an input argument and on entry +* contains the (n-1) subdiagonal elements of the unit +* bidiagonal factor L from the L*D*L**T factorization of A. +* If FACT = 'N', then EF is an output argument and on exit +* contains the (n-1) subdiagonal elements of the unit +* bidiagonal factor L from the L*D*L**T factorization of A. +* +* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) +* The N-by-NRHS right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (output) DOUBLE PRECISION array, dimension (LDX,NRHS) +* If INFO = 0 of INFO = N+1, the N-by-NRHS solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* RCOND (output) DOUBLE PRECISION +* The reciprocal condition number of the matrix A. If RCOND +* is less than the machine precision (in particular, if +* RCOND = 0), the matrix is singular to working precision. +* This condition is indicated by a return code of INFO > 0. +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in any +* element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is +* <= N: the leading minor of order i of A is +* not positive definite, so the factorization +* could not be completed, and the solution has not +* been computed. RCOND = 0 is returned. +* = N+1: U is nonsingular, but RCOND is less than machine +* precision, meaning that the matrix is singular +* to working precision. Nevertheless, the +* solution and error bounds are computed because +* there are a number of situations where the +* computed solution can be more accurate than the +* value of RCOND would suggest. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOFACT + DOUBLE PRECISION ANORM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANST + EXTERNAL LSAME, DLAMCH, DLANST +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLACPY, DPTCON, DPTRFS, DPTTRF, DPTTRS, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPTSVX', -INFO ) + RETURN + END IF +* + IF( NOFACT ) THEN +* +* Compute the L*D*L' (or U'*D*U) factorization of A. +* + CALL DCOPY( N, D, 1, DF, 1 ) + IF( N.GT.1 ) + $ CALL DCOPY( N-1, E, 1, EF, 1 ) + CALL DPTTRF( N, DF, EF, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN + IF( INFO.GT.0 ) + $ RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = DLANST( '1', N, D, E ) +* +* Compute the reciprocal of the condition number of A. +* + CALL DPTCON( N, DF, EF, ANORM, RCOND, WORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.DLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* +* Compute the solution vectors X. +* + CALL DLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL DPTTRS( N, NRHS, DF, EF, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solutions and +* compute error bounds and backward error estimates for them. +* + CALL DPTRFS( N, NRHS, D, E, DF, EF, B, LDB, X, LDX, FERR, BERR, + $ WORK, INFO ) +* + RETURN +* +* End of DPTSVX +* + END diff --git a/costa/native/external/lapack/dpttrf.f b/costa/native/external/lapack/dpttrf.f new file mode 100644 index 000000000..cb814a3d5 --- /dev/null +++ b/costa/native/external/lapack/dpttrf.f @@ -0,0 +1,153 @@ + SUBROUTINE DPTTRF( N, D, E, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ) +* .. +* +* Purpose +* ======= +* +* DPTTRF computes the L*D*L' factorization of a real symmetric +* positive definite tridiagonal matrix A. The factorization may also +* be regarded as having the form A = U'*D*U. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* D (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the n diagonal elements of the tridiagonal matrix +* A. On exit, the n diagonal elements of the diagonal matrix +* D from the L*D*L' factorization of A. +* +* E (input/output) DOUBLE PRECISION array, dimension (N-1) +* On entry, the (n-1) subdiagonal elements of the tridiagonal +* matrix A. On exit, the (n-1) subdiagonal elements of the +* unit bidiagonal factor L from the L*D*L' factorization of A. +* E can also be regarded as the superdiagonal of the unit +* bidiagonal factor U from the U'*D*U factorization of A. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* > 0: if INFO = k, the leading minor of order k is not +* positive definite; if k < N, the factorization could not +* be completed, while if k = N, the factorization was +* completed, but D(N) = 0. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, I4 + DOUBLE PRECISION EI +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MOD +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + CALL XERBLA( 'DPTTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Compute the L*D*L' (or U'*D*U) factorization of A. +* + I4 = MOD( N-1, 4 ) + DO 10 I = 1, I4 + IF( D( I ).LE.ZERO ) THEN + INFO = I + GO TO 30 + END IF + EI = E( I ) + E( I ) = EI / D( I ) + D( I+1 ) = D( I+1 ) - E( I )*EI + 10 CONTINUE +* + DO 20 I = I4 + 1, N - 4, 4 +* +* Drop out of the loop if d(i) <= 0: the matrix is not positive +* definite. +* + IF( D( I ).LE.ZERO ) THEN + INFO = I + GO TO 30 + END IF +* +* Solve for e(i) and d(i+1). +* + EI = E( I ) + E( I ) = EI / D( I ) + D( I+1 ) = D( I+1 ) - E( I )*EI +* + IF( D( I+1 ).LE.ZERO ) THEN + INFO = I + 1 + GO TO 30 + END IF +* +* Solve for e(i+1) and d(i+2). +* + EI = E( I+1 ) + E( I+1 ) = EI / D( I+1 ) + D( I+2 ) = D( I+2 ) - E( I+1 )*EI +* + IF( D( I+2 ).LE.ZERO ) THEN + INFO = I + 2 + GO TO 30 + END IF +* +* Solve for e(i+2) and d(i+3). +* + EI = E( I+2 ) + E( I+2 ) = EI / D( I+2 ) + D( I+3 ) = D( I+3 ) - E( I+2 )*EI +* + IF( D( I+3 ).LE.ZERO ) THEN + INFO = I + 3 + GO TO 30 + END IF +* +* Solve for e(i+3) and d(i+4). +* + EI = E( I+3 ) + E( I+3 ) = EI / D( I+3 ) + D( I+4 ) = D( I+4 ) - E( I+3 )*EI + 20 CONTINUE +* +* Check d(n) for positive definiteness. +* + IF( D( N ).LE.ZERO ) + $ INFO = N +* + 30 CONTINUE + RETURN +* +* End of DPTTRF +* + END diff --git a/costa/native/external/lapack/dpttrs.f b/costa/native/external/lapack/dpttrs.f new file mode 100644 index 000000000..d25d20c7a --- /dev/null +++ b/costa/native/external/lapack/dpttrs.f @@ -0,0 +1,115 @@ + SUBROUTINE DPTTRS( N, NRHS, D, E, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION B( LDB, * ), D( * ), E( * ) +* .. +* +* Purpose +* ======= +* +* DPTTRS solves a tridiagonal system of the form +* A * X = B +* using the L*D*L' factorization of A computed by DPTTRF. D is a +* diagonal matrix specified in the vector D, L is a unit bidiagonal +* matrix whose subdiagonal is specified in the vector E, and X and B +* are N by NRHS matrices. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the tridiagonal matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* D (input) DOUBLE PRECISION array, dimension (N) +* The n diagonal elements of the diagonal matrix D from the +* L*D*L' factorization of A. +* +* E (input) DOUBLE PRECISION array, dimension (N-1) +* The (n-1) subdiagonal elements of the unit bidiagonal factor +* L from the L*D*L' factorization of A. E can also be regarded +* as the superdiagonal of the unit bidiagonal factor U from the +* factorization A = U'*D*U. +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) +* On entry, the right hand side vectors B for the system of +* linear equations. +* On exit, the solution vectors, X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER J, JB, NB +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DPTTS2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPTTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* Determine the number of right-hand sides to solve at a time. +* + IF( NRHS.EQ.1 ) THEN + NB = 1 + ELSE + NB = MAX( 1, ILAENV( 1, 'DPTTRS', ' ', N, NRHS, -1, -1 ) ) + END IF +* + IF( NB.GE.NRHS ) THEN + CALL DPTTS2( N, NRHS, D, E, B, LDB ) + ELSE + DO 10 J = 1, NRHS, NB + JB = MIN( NRHS-J+1, NB ) + CALL DPTTS2( N, JB, D, E, B( 1, J ), LDB ) + 10 CONTINUE + END IF +* + RETURN +* +* End of DPTTRS +* + END diff --git a/costa/native/external/lapack/dptts2.f b/costa/native/external/lapack/dptts2.f new file mode 100644 index 000000000..b56ad7828 --- /dev/null +++ b/costa/native/external/lapack/dptts2.f @@ -0,0 +1,94 @@ + SUBROUTINE DPTTS2( N, NRHS, D, E, B, LDB ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER LDB, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION B( LDB, * ), D( * ), E( * ) +* .. +* +* Purpose +* ======= +* +* DPTTS2 solves a tridiagonal system of the form +* A * X = B +* using the L*D*L' factorization of A computed by DPTTRF. D is a +* diagonal matrix specified in the vector D, L is a unit bidiagonal +* matrix whose subdiagonal is specified in the vector E, and X and B +* are N by NRHS matrices. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the tridiagonal matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* D (input) DOUBLE PRECISION array, dimension (N) +* The n diagonal elements of the diagonal matrix D from the +* L*D*L' factorization of A. +* +* E (input) DOUBLE PRECISION array, dimension (N-1) +* The (n-1) subdiagonal elements of the unit bidiagonal factor +* L from the L*D*L' factorization of A. E can also be regarded +* as the superdiagonal of the unit bidiagonal factor U from the +* factorization A = U'*D*U. +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) +* On entry, the right hand side vectors B for the system of +* linear equations. +* On exit, the solution vectors, X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Subroutines .. + EXTERNAL DSCAL +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.1 ) THEN + IF( N.EQ.1 ) + $ CALL DSCAL( NRHS, 1.D0 / D( 1 ), B, LDB ) + RETURN + END IF +* +* Solve A * X = B using the factorization A = L*D*L', +* overwriting each right hand side vector with its solution. +* + DO 30 J = 1, NRHS +* +* Solve L * x = b. +* + DO 10 I = 2, N + B( I, J ) = B( I, J ) - B( I-1, J )*E( I-1 ) + 10 CONTINUE +* +* Solve D * L' * x = b. +* + B( N, J ) = B( N, J ) / D( N ) + DO 20 I = N - 1, 1, -1 + B( I, J ) = B( I, J ) / D( I ) - B( I+1, J )*E( I ) + 20 CONTINUE + 30 CONTINUE +* + RETURN +* +* End of DPTTS2 +* + END diff --git a/costa/native/external/lapack/drscl.f b/costa/native/external/lapack/drscl.f new file mode 100644 index 000000000..d268c231f --- /dev/null +++ b/costa/native/external/lapack/drscl.f @@ -0,0 +1,115 @@ + SUBROUTINE DRSCL( N, SA, SX, INCX ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INCX, N + DOUBLE PRECISION SA +* .. +* .. Array Arguments .. + DOUBLE PRECISION SX( * ) +* .. +* +* Purpose +* ======= +* +* DRSCL multiplies an n-element real vector x by the real scalar 1/a. +* This is done without overflow or underflow as long as +* the final result x/a does not overflow or underflow. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of components of the vector x. +* +* SA (input) DOUBLE PRECISION +* The scalar a which is used to divide each component of x. +* SA must be >= 0, or the subroutine will divide by zero. +* +* SX (input/output) DOUBLE PRECISION array, dimension +* (1+(N-1)*abs(INCX)) +* The n-element vector x. +* +* INCX (input) INTEGER +* The increment between successive values of the vector SX. +* > 0: SX(1) = X(1) and SX(1+(i-1)*INCX) = x(i), 1< i<= n +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL DONE + DOUBLE PRECISION BIGNUM, CDEN, CDEN1, CNUM, CNUM1, MUL, SMLNUM +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* +* Get machine parameters +* + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Initialize the denominator to SA and the numerator to 1. +* + CDEN = SA + CNUM = ONE +* + 10 CONTINUE + CDEN1 = CDEN*SMLNUM + CNUM1 = CNUM / BIGNUM + IF( ABS( CDEN1 ).GT.ABS( CNUM ) .AND. CNUM.NE.ZERO ) THEN +* +* Pre-multiply X by SMLNUM if CDEN is large compared to CNUM. +* + MUL = SMLNUM + DONE = .FALSE. + CDEN = CDEN1 + ELSE IF( ABS( CNUM1 ).GT.ABS( CDEN ) ) THEN +* +* Pre-multiply X by BIGNUM if CDEN is small compared to CNUM. +* + MUL = BIGNUM + DONE = .FALSE. + CNUM = CNUM1 + ELSE +* +* Multiply X by CNUM / CDEN and return. +* + MUL = CNUM / CDEN + DONE = .TRUE. + END IF +* +* Scale the vector X by MUL +* + CALL DSCAL( N, MUL, SX, INCX ) +* + IF( .NOT.DONE ) + $ GO TO 10 +* + RETURN +* +* End of DRSCL +* + END diff --git a/costa/native/external/lapack/dsbev.f b/costa/native/external/lapack/dsbev.f new file mode 100644 index 000000000..919764536 --- /dev/null +++ b/costa/native/external/lapack/dsbev.f @@ -0,0 +1,206 @@ + SUBROUTINE DSBEV( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KD, LDAB, LDZ, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* DSBEV computes all the eigenvalues and, optionally, eigenvectors of +* a real symmetric band matrix A. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* AB (input/output) DOUBLE PRECISION array, dimension (LDAB, N) +* On entry, the upper or lower triangle of the symmetric band +* matrix A, stored in the first KD+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* On exit, AB is overwritten by values generated during the +* reduction to tridiagonal form. If UPLO = 'U', the first +* superdiagonal and the diagonal of the tridiagonal matrix T +* are returned in rows KD and KD+1 of AB, and if UPLO = 'L', +* the diagonal and first subdiagonal of T are returned in the +* first two rows of AB. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD + 1. +* +* W (output) DOUBLE PRECISION array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* Z (output) DOUBLE PRECISION array, dimension (LDZ, N) +* If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +* eigenvectors of the matrix A, with the i-th column of Z +* holding the eigenvector associated with W(i). +* If JOBZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (max(1,3*N-2)) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the algorithm failed to converge; i +* off-diagonal elements of an intermediate tridiagonal +* form did not converge to zero. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, WANTZ + INTEGER IINFO, IMAX, INDE, INDWRK, ISCALE + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANSB + EXTERNAL LSAME, DLAMCH, DLANSB +* .. +* .. External Subroutines .. + EXTERNAL DLASCL, DSBTRD, DSCAL, DSTEQR, DSTERF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSBEV ', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( LOWER ) THEN + W( 1 ) = AB( 1, 1 ) + ELSE + W( 1 ) = AB( KD+1, 1 ) + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = DLANSB( 'M', UPLO, N, KD, AB, LDAB, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL DLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL DLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + END IF +* +* Call DSBTRD to reduce symmetric band matrix to tridiagonal form. +* + INDE = 1 + INDWRK = INDE + N + CALL DSBTRD( JOBZ, UPLO, N, KD, AB, LDAB, W, WORK( INDE ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, call SSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, WORK( INDE ), INFO ) + ELSE + CALL DSTEQR( JOBZ, N, W, WORK( INDE ), Z, LDZ, WORK( INDWRK ), + $ INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* + RETURN +* +* End of DSBEV +* + END diff --git a/costa/native/external/lapack/dsbevd.f b/costa/native/external/lapack/dsbevd.f new file mode 100644 index 000000000..efa1bb0e6 --- /dev/null +++ b/costa/native/external/lapack/dsbevd.f @@ -0,0 +1,265 @@ + SUBROUTINE DSBEVD( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, + $ LWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KD, LDAB, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION AB( LDAB, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* DSBEVD computes all the eigenvalues and, optionally, eigenvectors of +* a real symmetric band matrix A. If eigenvectors are desired, it uses +* a divide and conquer algorithm. +* +* The divide and conquer algorithm makes very mild assumptions about +* floating point arithmetic. It will work on machines with a guard +* digit in add/subtract, or on those binary machines without guard +* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +* Cray-2. It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* AB (input/output) DOUBLE PRECISION array, dimension (LDAB, N) +* On entry, the upper or lower triangle of the symmetric band +* matrix A, stored in the first KD+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* On exit, AB is overwritten by values generated during the +* reduction to tridiagonal form. If UPLO = 'U', the first +* superdiagonal and the diagonal of the tridiagonal matrix T +* are returned in rows KD and KD+1 of AB, and if UPLO = 'L', +* the diagonal and first subdiagonal of T are returned in the +* first two rows of AB. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD + 1. +* +* W (output) DOUBLE PRECISION array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* Z (output) DOUBLE PRECISION array, dimension (LDZ, N) +* If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +* eigenvectors of the matrix A, with the i-th column of Z +* holding the eigenvector associated with W(i). +* If JOBZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace/output) DOUBLE PRECISION array, +* dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* IF N <= 1, LWORK must be at least 1. +* If JOBZ = 'N' and N > 2, LWORK must be at least 2*N. +* If JOBZ = 'V' and N > 2, LWORK must be at least +* ( 1 + 5*N + 2*N**2 ). +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array LIWORK. +* If JOBZ = 'N' or N <= 1, LIWORK must be at least 1. +* If JOBZ = 'V' and N > 2, LIWORK must be at least 3 + 5*N. +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the algorithm failed to converge; i +* off-diagonal elements of an intermediate tridiagonal +* form did not converge to zero. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, INDE, INDWK2, INDWRK, ISCALE, LIWMIN, + $ LLWRK2, LWMIN + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANSB + EXTERNAL LSAME, DLAMCH, DLANSB +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DLACPY, DLASCL, DSBTRD, DSCAL, DSTEDC, + $ DSTERF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LIWMIN = 1 + LWMIN = 1 + ELSE + IF( WANTZ ) THEN + LIWMIN = 3 + 5*N + LWMIN = 1 + 5*N + 2*N**2 + ELSE + LIWMIN = 1 + LWMIN = 2*N + END IF + END IF + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSBEVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = AB( 1, 1 ) + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = DLANSB( 'M', UPLO, N, KD, AB, LDAB, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL DLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL DLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + END IF +* +* Call DSBTRD to reduce symmetric band matrix to tridiagonal form. +* + INDE = 1 + INDWRK = INDE + N + INDWK2 = INDWRK + N*N + LLWRK2 = LWORK - INDWK2 + 1 + CALL DSBTRD( JOBZ, UPLO, N, KD, AB, LDAB, W, WORK( INDE ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, call SSTEDC. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, WORK( INDE ), INFO ) + ELSE + CALL DSTEDC( 'I', N, W, WORK( INDE ), WORK( INDWRK ), N, + $ WORK( INDWK2 ), LLWRK2, IWORK, LIWORK, INFO ) + CALL DGEMM( 'N', 'N', N, N, N, ONE, Z, LDZ, WORK( INDWRK ), N, + $ ZERO, WORK( INDWK2 ), N ) + CALL DLACPY( 'A', N, N, WORK( INDWK2 ), N, Z, LDZ ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) + $ CALL DSCAL( N, ONE / SIGMA, W, 1 ) +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of DSBEVD +* + END diff --git a/costa/native/external/lapack/dsbevx.f b/costa/native/external/lapack/dsbevx.f new file mode 100644 index 000000000..f832f1a60 --- /dev/null +++ b/costa/native/external/lapack/dsbevx.f @@ -0,0 +1,411 @@ + SUBROUTINE DSBEVX( JOBZ, RANGE, UPLO, N, KD, AB, LDAB, Q, LDQ, VL, + $ VU, IL, IU, ABSTOL, M, W, Z, LDZ, WORK, IWORK, + $ IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, KD, LDAB, LDQ, LDZ, M, N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + DOUBLE PRECISION AB( LDAB, * ), Q( LDQ, * ), W( * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* DSBEVX computes selected eigenvalues and, optionally, eigenvectors +* of a real symmetric band matrix A. Eigenvalues and eigenvectors can +* be selected by specifying either a range of values or a range of +* indices for the desired eigenvalues. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* RANGE (input) CHARACTER*1 +* = 'A': all eigenvalues will be found; +* = 'V': all eigenvalues in the half-open interval (VL,VU] +* will be found; +* = 'I': the IL-th through IU-th eigenvalues will be found. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* AB (input/output) DOUBLE PRECISION array, dimension (LDAB, N) +* On entry, the upper or lower triangle of the symmetric band +* matrix A, stored in the first KD+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* On exit, AB is overwritten by values generated during the +* reduction to tridiagonal form. If UPLO = 'U', the first +* superdiagonal and the diagonal of the tridiagonal matrix T +* are returned in rows KD and KD+1 of AB, and if UPLO = 'L', +* the diagonal and first subdiagonal of T are returned in the +* first two rows of AB. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD + 1. +* +* Q (output) DOUBLE PRECISION array, dimension (LDQ, N) +* If JOBZ = 'V', the N-by-N orthogonal matrix used in the +* reduction to tridiagonal form. +* If JOBZ = 'N', the array Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. If JOBZ = 'V', then +* LDQ >= max(1,N). +* +* VL (input) DOUBLE PRECISION +* VU (input) DOUBLE PRECISION +* If RANGE='V', the lower and upper bounds of the interval to +* be searched for eigenvalues. VL < VU. +* Not referenced if RANGE = 'A' or 'I'. +* +* IL (input) INTEGER +* IU (input) INTEGER +* If RANGE='I', the indices (in ascending order) of the +* smallest and largest eigenvalues to be returned. +* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +* Not referenced if RANGE = 'A' or 'V'. +* +* ABSTOL (input) DOUBLE PRECISION +* The absolute error tolerance for the eigenvalues. +* An approximate eigenvalue is accepted as converged +* when it is determined to lie in an interval [a,b] +* of width less than or equal to +* +* ABSTOL + EPS * max( |a|,|b| ) , +* +* where EPS is the machine precision. If ABSTOL is less than +* or equal to zero, then EPS*|T| will be used in its place, +* where |T| is the 1-norm of the tridiagonal matrix obtained +* by reducing AB to tridiagonal form. +* +* Eigenvalues will be computed most accurately when ABSTOL is +* set to twice the underflow threshold 2*DLAMCH('S'), not zero. +* If this routine returns with INFO>0, indicating that some +* eigenvectors did not converge, try setting ABSTOL to +* 2*DLAMCH('S'). +* +* See "Computing Small Singular Values of Bidiagonal Matrices +* with Guaranteed High Relative Accuracy," by Demmel and +* Kahan, LAPACK Working Note #3. +* +* M (output) INTEGER +* The total number of eigenvalues found. 0 <= M <= N. +* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +* +* W (output) DOUBLE PRECISION array, dimension (N) +* The first M elements contain the selected eigenvalues in +* ascending order. +* +* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M)) +* If JOBZ = 'V', then if INFO = 0, the first M columns of Z +* contain the orthonormal eigenvectors of the matrix A +* corresponding to the selected eigenvalues, with the i-th +* column of Z holding the eigenvector associated with W(i). +* If an eigenvector fails to converge, then that column of Z +* contains the latest approximation to the eigenvector, and the +* index of the eigenvector is returned in IFAIL. +* If JOBZ = 'N', then Z is not referenced. +* Note: the user must ensure that at least max(1,M) columns are +* supplied in the array Z; if RANGE = 'V', the exact value of M +* is not known in advance and an upper bound must be used. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (7*N) +* +* IWORK (workspace) INTEGER array, dimension (5*N) +* +* IFAIL (output) INTEGER array, dimension (N) +* If JOBZ = 'V', then if INFO = 0, the first M elements of +* IFAIL are zero. If INFO > 0, then IFAIL contains the +* indices of the eigenvectors that failed to converge. +* If JOBZ = 'N', then IFAIL is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, then i eigenvectors failed to converge. +* Their indices are stored in array IFAIL. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, VALEIG, WANTZ + CHARACTER ORDER + INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, + $ INDISP, INDIWO, INDWRK, ISCALE, ITMP1, J, JJ, + $ NSPLIT + DOUBLE PRECISION ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANSB + EXTERNAL LSAME, DLAMCH, DLANSB +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEMV, DLACPY, DLASCL, DSBTRD, DSCAL, + $ DSTEBZ, DSTEIN, DSTEQR, DSTERF, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) + LOWER = LSAME( UPLO, 'L' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -7 + ELSE IF( WANTZ .AND. LDQ.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -11 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -13 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) + $ INFO = -18 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSBEVX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + M = 1 + IF( LOWER ) THEN + TMP1 = AB( 1, 1 ) + ELSE + TMP1 = AB( KD+1, 1 ) + END IF + IF( VALEIG ) THEN + IF( .NOT.( VL.LT.TMP1 .AND. VU.GE.TMP1 ) ) + $ M = 0 + END IF + IF( M.EQ.1 ) THEN + W( 1 ) = TMP1 + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + END IF + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF( VALEIG ) THEN + VLL = VL + VUU = VU + ELSE + VLL = ZERO + VUU = ZERO + END IF + ANRM = DLANSB( 'M', UPLO, N, KD, AB, LDAB, WORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL DLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL DLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call DSBTRD to reduce symmetric band matrix to tridiagonal form. +* + INDD = 1 + INDE = INDD + N + INDWRK = INDE + N + CALL DSBTRD( JOBZ, UPLO, N, KD, AB, LDAB, WORK( INDD ), + $ WORK( INDE ), Q, LDQ, WORK( INDWRK ), IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal +* to zero, then call DSTERF or SSTEQR. If this fails for some +* eigenvalue, then try DSTEBZ. +* + IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND. + $ ( ABSTOL.LE.ZERO ) ) THEN + CALL DCOPY( N, WORK( INDD ), 1, W, 1 ) + INDEE = INDWRK + 2*N + IF( .NOT.WANTZ ) THEN + CALL DCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL DSTERF( N, W, WORK( INDEE ), INFO ) + ELSE + CALL DLACPY( 'A', N, N, Q, LDQ, Z, LDZ ) + CALL DCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL DSTEQR( JOBZ, N, W, WORK( INDEE ), Z, LDZ, + $ WORK( INDWRK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 10 I = 1, N + IFAIL( I ) = 0 + 10 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 30 + END IF + INFO = 0 + END IF +* +* Otherwise, call DSTEBZ and, if eigenvectors are desired, SSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWO = INDISP + N + CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ WORK( INDD ), WORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWRK ), + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL DSTEIN( N, WORK( INDD ), WORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ WORK( INDWRK ), IWORK( INDIWO ), IFAIL, INFO ) +* +* Apply orthogonal matrix used in reduction to tridiagonal +* form to eigenvectors returned by DSTEIN. +* + DO 20 J = 1, M + CALL DCOPY( N, Z( 1, J ), 1, WORK( 1 ), 1 ) + CALL DGEMV( 'N', N, N, ONE, Q, LDQ, WORK, 1, ZERO, + $ Z( 1, J ), 1 ) + 20 CONTINUE + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 30 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 50 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 40 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 40 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL DSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 50 CONTINUE + END IF +* + RETURN +* +* End of DSBEVX +* + END diff --git a/costa/native/external/lapack/dsbgst.f b/costa/native/external/lapack/dsbgst.f new file mode 100644 index 000000000..3d975aea3 --- /dev/null +++ b/costa/native/external/lapack/dsbgst.f @@ -0,0 +1,1346 @@ + SUBROUTINE DSBGST( VECT, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, X, + $ LDX, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO, VECT + INTEGER INFO, KA, KB, LDAB, LDBB, LDX, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ), BB( LDBB, * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* DSBGST reduces a real symmetric-definite banded generalized +* eigenproblem A*x = lambda*B*x to standard form C*y = lambda*y, +* such that C has the same bandwidth as A. +* +* B must have been previously factorized as S**T*S by DPBSTF, using a +* split Cholesky factorization. A is overwritten by C = X**T*A*X, where +* X = S**(-1)*Q and Q is an orthogonal matrix chosen to preserve the +* bandwidth of A. +* +* Arguments +* ========= +* +* VECT (input) CHARACTER*1 +* = 'N': do not form the transformation matrix X; +* = 'V': form X. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* KA (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KA >= 0. +* +* KB (input) INTEGER +* The number of superdiagonals of the matrix B if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KA >= KB >= 0. +* +* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) +* On entry, the upper or lower triangle of the symmetric band +* matrix A, stored in the first ka+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). +* +* On exit, the transformed matrix X**T*A*X, stored in the same +* format as A. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KA+1. +* +* BB (input) DOUBLE PRECISION array, dimension (LDBB,N) +* The banded factor S from the split Cholesky factorization of +* B, as returned by DPBSTF, stored in the first KB+1 rows of +* the array. +* +* LDBB (input) INTEGER +* The leading dimension of the array BB. LDBB >= KB+1. +* +* X (output) DOUBLE PRECISION array, dimension (LDX,N) +* If VECT = 'V', the n-by-n matrix X. +* If VECT = 'N', the array X is not referenced. +* +* LDX (input) INTEGER +* The leading dimension of the array X. +* LDX >= max(1,N) if VECT = 'V'; LDX >= 1 otherwise. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPDATE, UPPER, WANTX + INTEGER I, I0, I1, I2, INCA, J, J1, J1T, J2, J2T, K, + $ KA1, KB1, KBT, L, M, NR, NRT, NX + DOUBLE PRECISION BII, RA, RA1, T +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DGER, DLAR2V, DLARGV, DLARTG, DLARTV, DLASET, + $ DROT, DSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + WANTX = LSAME( VECT, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + KA1 = KA + 1 + KB1 = KB + 1 + INFO = 0 + IF( .NOT.WANTX .AND. .NOT.LSAME( VECT, 'N' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KA.LT.0 ) THEN + INFO = -4 + ELSE IF( KB.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KA+1 ) THEN + INFO = -7 + ELSE IF( LDBB.LT.KB+1 ) THEN + INFO = -9 + ELSE IF( LDX.LT.1 .OR. WANTX .AND. LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSBGST', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + INCA = LDAB*KA1 +* +* Initialize X to the unit matrix, if needed +* + IF( WANTX ) + $ CALL DLASET( 'Full', N, N, ZERO, ONE, X, LDX ) +* +* Set M to the splitting point m. It must be the same value as is +* used in DPBSTF. The chosen value allows the arrays WORK and RWORK +* to be of dimension (N). +* + M = ( N+KB ) / 2 +* +* The routine works in two phases, corresponding to the two halves +* of the split Cholesky factorization of B as S**T*S where +* +* S = ( U ) +* ( M L ) +* +* with U upper triangular of order m, and L lower triangular of +* order n-m. S has the same bandwidth as B. +* +* S is treated as a product of elementary matrices: +* +* S = S(m)*S(m-1)*...*S(2)*S(1)*S(m+1)*S(m+2)*...*S(n-1)*S(n) +* +* where S(i) is determined by the i-th row of S. +* +* In phase 1, the index i takes the values n, n-1, ... , m+1; +* in phase 2, it takes the values 1, 2, ... , m. +* +* For each value of i, the current matrix A is updated by forming +* inv(S(i))**T*A*inv(S(i)). This creates a triangular bulge outside +* the band of A. The bulge is then pushed down toward the bottom of +* A in phase 1, and up toward the top of A in phase 2, by applying +* plane rotations. +* +* There are kb*(kb+1)/2 elements in the bulge, but at most 2*kb-1 +* of them are linearly independent, so annihilating a bulge requires +* only 2*kb-1 plane rotations. The rotations are divided into a 1st +* set of kb-1 rotations, and a 2nd set of kb rotations. +* +* Wherever possible, rotations are generated and applied in vector +* operations of length NR between the indices J1 and J2 (sometimes +* replaced by modified values NRT, J1T or J2T). +* +* The cosines and sines of the rotations are stored in the array +* WORK. The cosines of the 1st set of rotations are stored in +* elements n+2:n+m-kb-1 and the sines of the 1st set in elements +* 2:m-kb-1; the cosines of the 2nd set are stored in elements +* n+m-kb+1:2*n and the sines of the second set in elements m-kb+1:n. +* +* The bulges are not formed explicitly; nonzero elements outside the +* band are created only when they are required for generating new +* rotations; they are stored in the array WORK, in positions where +* they are later overwritten by the sines of the rotations which +* annihilate them. +* +* **************************** Phase 1 ***************************** +* +* The logical structure of this phase is: +* +* UPDATE = .TRUE. +* DO I = N, M + 1, -1 +* use S(i) to update A and create a new bulge +* apply rotations to push all bulges KA positions downward +* END DO +* UPDATE = .FALSE. +* DO I = M + KA + 1, N - 1 +* apply rotations to push all bulges KA positions downward +* END DO +* +* To avoid duplicating code, the two loops are merged. +* + UPDATE = .TRUE. + I = N + 1 + 10 CONTINUE + IF( UPDATE ) THEN + I = I - 1 + KBT = MIN( KB, I-1 ) + I0 = I - 1 + I1 = MIN( N, I+KA ) + I2 = I - KBT + KA1 + IF( I.LT.M+1 ) THEN + UPDATE = .FALSE. + I = I + 1 + I0 = M + IF( KA.EQ.0 ) + $ GO TO 480 + GO TO 10 + END IF + ELSE + I = I + KA + IF( I.GT.N-1 ) + $ GO TO 480 + END IF +* + IF( UPPER ) THEN +* +* Transform A, working with the upper triangle +* + IF( UPDATE ) THEN +* +* Form inv(S(i))**T * A * inv(S(i)) +* + BII = BB( KB1, I ) + DO 20 J = I, I1 + AB( I-J+KA1, J ) = AB( I-J+KA1, J ) / BII + 20 CONTINUE + DO 30 J = MAX( 1, I-KA ), I + AB( J-I+KA1, I ) = AB( J-I+KA1, I ) / BII + 30 CONTINUE + DO 60 K = I - KBT, I - 1 + DO 40 J = I - KBT, K + AB( J-K+KA1, K ) = AB( J-K+KA1, K ) - + $ BB( J-I+KB1, I )*AB( K-I+KA1, I ) - + $ BB( K-I+KB1, I )*AB( J-I+KA1, I ) + + $ AB( KA1, I )*BB( J-I+KB1, I )* + $ BB( K-I+KB1, I ) + 40 CONTINUE + DO 50 J = MAX( 1, I-KA ), I - KBT - 1 + AB( J-K+KA1, K ) = AB( J-K+KA1, K ) - + $ BB( K-I+KB1, I )*AB( J-I+KA1, I ) + 50 CONTINUE + 60 CONTINUE + DO 80 J = I, I1 + DO 70 K = MAX( J-KA, I-KBT ), I - 1 + AB( K-J+KA1, J ) = AB( K-J+KA1, J ) - + $ BB( K-I+KB1, I )*AB( I-J+KA1, J ) + 70 CONTINUE + 80 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by inv(S(i)) +* + CALL DSCAL( N-M, ONE / BII, X( M+1, I ), 1 ) + IF( KBT.GT.0 ) + $ CALL DGER( N-M, KBT, -ONE, X( M+1, I ), 1, + $ BB( KB1-KBT, I ), 1, X( M+1, I-KBT ), LDX ) + END IF +* +* store a(i,i1) in RA1 for use in next loop over K +* + RA1 = AB( I-I1+KA1, I1 ) + END IF +* +* Generate and apply vectors of rotations to chase all the +* existing bulges KA positions down toward the bottom of the +* band +* + DO 130 K = 1, KB - 1 + IF( UPDATE ) THEN +* +* Determine the rotations which would annihilate the bulge +* which has in theory just been created +* + IF( I-K+KA.LT.N .AND. I-K.GT.1 ) THEN +* +* generate rotation to annihilate a(i,i-k+ka+1) +* + CALL DLARTG( AB( K+1, I-K+KA ), RA1, + $ WORK( N+I-K+KA-M ), WORK( I-K+KA-M ), + $ RA ) +* +* create nonzero element a(i-k,i-k+ka+1) outside the +* band and store it in WORK(i-k) +* + T = -BB( KB1-K, I )*RA1 + WORK( I-K ) = WORK( N+I-K+KA-M )*T - + $ WORK( I-K+KA-M )*AB( 1, I-K+KA ) + AB( 1, I-K+KA ) = WORK( I-K+KA-M )*T + + $ WORK( N+I-K+KA-M )*AB( 1, I-K+KA ) + RA1 = RA + END IF + END IF + J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1 + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + IF( UPDATE ) THEN + J2T = MAX( J2, I+2*KA-K+1 ) + ELSE + J2T = J2 + END IF + NRT = ( N-J2T+KA ) / KA1 + DO 90 J = J2T, J1, KA1 +* +* create nonzero element a(j-ka,j+1) outside the band +* and store it in WORK(j-m) +* + WORK( J-M ) = WORK( J-M )*AB( 1, J+1 ) + AB( 1, J+1 ) = WORK( N+J-M )*AB( 1, J+1 ) + 90 CONTINUE +* +* generate rotations in 1st set to annihilate elements which +* have been created outside the band +* + IF( NRT.GT.0 ) + $ CALL DLARGV( NRT, AB( 1, J2T ), INCA, WORK( J2T-M ), KA1, + $ WORK( N+J2T-M ), KA1 ) + IF( NR.GT.0 ) THEN +* +* apply rotations in 1st set from the right +* + DO 100 L = 1, KA - 1 + CALL DLARTV( NR, AB( KA1-L, J2 ), INCA, + $ AB( KA-L, J2+1 ), INCA, WORK( N+J2-M ), + $ WORK( J2-M ), KA1 ) + 100 CONTINUE +* +* apply rotations in 1st set from both sides to diagonal +* blocks +* + CALL DLAR2V( NR, AB( KA1, J2 ), AB( KA1, J2+1 ), + $ AB( KA, J2+1 ), INCA, WORK( N+J2-M ), + $ WORK( J2-M ), KA1 ) +* + END IF +* +* start applying rotations in 1st set from the left +* + DO 110 L = KA - 1, KB - K + 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL DLARTV( NRT, AB( L, J2+KA1-L ), INCA, + $ AB( L+1, J2+KA1-L ), INCA, + $ WORK( N+J2-M ), WORK( J2-M ), KA1 ) + 110 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 1st set +* + DO 120 J = J2, J1, KA1 + CALL DROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1, + $ WORK( N+J-M ), WORK( J-M ) ) + 120 CONTINUE + END IF + 130 CONTINUE +* + IF( UPDATE ) THEN + IF( I2.LE.N .AND. KBT.GT.0 ) THEN +* +* create nonzero element a(i-kbt,i-kbt+ka+1) outside the +* band and store it in WORK(i-kbt) +* + WORK( I-KBT ) = -BB( KB1-KBT, I )*RA1 + END IF + END IF +* + DO 170 K = KB, 1, -1 + IF( UPDATE ) THEN + J2 = I - K - 1 + MAX( 2, K-I0+1 )*KA1 + ELSE + J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1 + END IF +* +* finish applying rotations in 2nd set from the left +* + DO 140 L = KB - K, 1, -1 + NRT = ( N-J2+KA+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL DLARTV( NRT, AB( L, J2-L+1 ), INCA, + $ AB( L+1, J2-L+1 ), INCA, WORK( N+J2-KA ), + $ WORK( J2-KA ), KA1 ) + 140 CONTINUE + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + DO 150 J = J1, J2, -KA1 + WORK( J ) = WORK( J-KA ) + WORK( N+J ) = WORK( N+J-KA ) + 150 CONTINUE + DO 160 J = J2, J1, KA1 +* +* create nonzero element a(j-ka,j+1) outside the band +* and store it in WORK(j) +* + WORK( J ) = WORK( J )*AB( 1, J+1 ) + AB( 1, J+1 ) = WORK( N+J )*AB( 1, J+1 ) + 160 CONTINUE + IF( UPDATE ) THEN + IF( I-K.LT.N-KA .AND. K.LE.KBT ) + $ WORK( I-K+KA ) = WORK( I-K ) + END IF + 170 CONTINUE +* + DO 210 K = KB, 1, -1 + J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1 + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + IF( NR.GT.0 ) THEN +* +* generate rotations in 2nd set to annihilate elements +* which have been created outside the band +* + CALL DLARGV( NR, AB( 1, J2 ), INCA, WORK( J2 ), KA1, + $ WORK( N+J2 ), KA1 ) +* +* apply rotations in 2nd set from the right +* + DO 180 L = 1, KA - 1 + CALL DLARTV( NR, AB( KA1-L, J2 ), INCA, + $ AB( KA-L, J2+1 ), INCA, WORK( N+J2 ), + $ WORK( J2 ), KA1 ) + 180 CONTINUE +* +* apply rotations in 2nd set from both sides to diagonal +* blocks +* + CALL DLAR2V( NR, AB( KA1, J2 ), AB( KA1, J2+1 ), + $ AB( KA, J2+1 ), INCA, WORK( N+J2 ), + $ WORK( J2 ), KA1 ) +* + END IF +* +* start applying rotations in 2nd set from the left +* + DO 190 L = KA - 1, KB - K + 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL DLARTV( NRT, AB( L, J2+KA1-L ), INCA, + $ AB( L+1, J2+KA1-L ), INCA, WORK( N+J2 ), + $ WORK( J2 ), KA1 ) + 190 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 2nd set +* + DO 200 J = J2, J1, KA1 + CALL DROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1, + $ WORK( N+J ), WORK( J ) ) + 200 CONTINUE + END IF + 210 CONTINUE +* + DO 230 K = 1, KB - 1 + J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1 +* +* finish applying rotations in 1st set from the left +* + DO 220 L = KB - K, 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL DLARTV( NRT, AB( L, J2+KA1-L ), INCA, + $ AB( L+1, J2+KA1-L ), INCA, + $ WORK( N+J2-M ), WORK( J2-M ), KA1 ) + 220 CONTINUE + 230 CONTINUE +* + IF( KB.GT.1 ) THEN + DO 240 J = N - 1, I - KB + 2*KA + 1, -1 + WORK( N+J-M ) = WORK( N+J-KA-M ) + WORK( J-M ) = WORK( J-KA-M ) + 240 CONTINUE + END IF +* + ELSE +* +* Transform A, working with the lower triangle +* + IF( UPDATE ) THEN +* +* Form inv(S(i))**T * A * inv(S(i)) +* + BII = BB( 1, I ) + DO 250 J = I, I1 + AB( J-I+1, I ) = AB( J-I+1, I ) / BII + 250 CONTINUE + DO 260 J = MAX( 1, I-KA ), I + AB( I-J+1, J ) = AB( I-J+1, J ) / BII + 260 CONTINUE + DO 290 K = I - KBT, I - 1 + DO 270 J = I - KBT, K + AB( K-J+1, J ) = AB( K-J+1, J ) - + $ BB( I-J+1, J )*AB( I-K+1, K ) - + $ BB( I-K+1, K )*AB( I-J+1, J ) + + $ AB( 1, I )*BB( I-J+1, J )* + $ BB( I-K+1, K ) + 270 CONTINUE + DO 280 J = MAX( 1, I-KA ), I - KBT - 1 + AB( K-J+1, J ) = AB( K-J+1, J ) - + $ BB( I-K+1, K )*AB( I-J+1, J ) + 280 CONTINUE + 290 CONTINUE + DO 310 J = I, I1 + DO 300 K = MAX( J-KA, I-KBT ), I - 1 + AB( J-K+1, K ) = AB( J-K+1, K ) - + $ BB( I-K+1, K )*AB( J-I+1, I ) + 300 CONTINUE + 310 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by inv(S(i)) +* + CALL DSCAL( N-M, ONE / BII, X( M+1, I ), 1 ) + IF( KBT.GT.0 ) + $ CALL DGER( N-M, KBT, -ONE, X( M+1, I ), 1, + $ BB( KBT+1, I-KBT ), LDBB-1, + $ X( M+1, I-KBT ), LDX ) + END IF +* +* store a(i1,i) in RA1 for use in next loop over K +* + RA1 = AB( I1-I+1, I ) + END IF +* +* Generate and apply vectors of rotations to chase all the +* existing bulges KA positions down toward the bottom of the +* band +* + DO 360 K = 1, KB - 1 + IF( UPDATE ) THEN +* +* Determine the rotations which would annihilate the bulge +* which has in theory just been created +* + IF( I-K+KA.LT.N .AND. I-K.GT.1 ) THEN +* +* generate rotation to annihilate a(i-k+ka+1,i) +* + CALL DLARTG( AB( KA1-K, I ), RA1, WORK( N+I-K+KA-M ), + $ WORK( I-K+KA-M ), RA ) +* +* create nonzero element a(i-k+ka+1,i-k) outside the +* band and store it in WORK(i-k) +* + T = -BB( K+1, I-K )*RA1 + WORK( I-K ) = WORK( N+I-K+KA-M )*T - + $ WORK( I-K+KA-M )*AB( KA1, I-K ) + AB( KA1, I-K ) = WORK( I-K+KA-M )*T + + $ WORK( N+I-K+KA-M )*AB( KA1, I-K ) + RA1 = RA + END IF + END IF + J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1 + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + IF( UPDATE ) THEN + J2T = MAX( J2, I+2*KA-K+1 ) + ELSE + J2T = J2 + END IF + NRT = ( N-J2T+KA ) / KA1 + DO 320 J = J2T, J1, KA1 +* +* create nonzero element a(j+1,j-ka) outside the band +* and store it in WORK(j-m) +* + WORK( J-M ) = WORK( J-M )*AB( KA1, J-KA+1 ) + AB( KA1, J-KA+1 ) = WORK( N+J-M )*AB( KA1, J-KA+1 ) + 320 CONTINUE +* +* generate rotations in 1st set to annihilate elements which +* have been created outside the band +* + IF( NRT.GT.0 ) + $ CALL DLARGV( NRT, AB( KA1, J2T-KA ), INCA, WORK( J2T-M ), + $ KA1, WORK( N+J2T-M ), KA1 ) + IF( NR.GT.0 ) THEN +* +* apply rotations in 1st set from the left +* + DO 330 L = 1, KA - 1 + CALL DLARTV( NR, AB( L+1, J2-L ), INCA, + $ AB( L+2, J2-L ), INCA, WORK( N+J2-M ), + $ WORK( J2-M ), KA1 ) + 330 CONTINUE +* +* apply rotations in 1st set from both sides to diagonal +* blocks +* + CALL DLAR2V( NR, AB( 1, J2 ), AB( 1, J2+1 ), AB( 2, J2 ), + $ INCA, WORK( N+J2-M ), WORK( J2-M ), KA1 ) +* + END IF +* +* start applying rotations in 1st set from the right +* + DO 340 L = KA - 1, KB - K + 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL DLARTV( NRT, AB( KA1-L+1, J2 ), INCA, + $ AB( KA1-L, J2+1 ), INCA, WORK( N+J2-M ), + $ WORK( J2-M ), KA1 ) + 340 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 1st set +* + DO 350 J = J2, J1, KA1 + CALL DROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1, + $ WORK( N+J-M ), WORK( J-M ) ) + 350 CONTINUE + END IF + 360 CONTINUE +* + IF( UPDATE ) THEN + IF( I2.LE.N .AND. KBT.GT.0 ) THEN +* +* create nonzero element a(i-kbt+ka+1,i-kbt) outside the +* band and store it in WORK(i-kbt) +* + WORK( I-KBT ) = -BB( KBT+1, I-KBT )*RA1 + END IF + END IF +* + DO 400 K = KB, 1, -1 + IF( UPDATE ) THEN + J2 = I - K - 1 + MAX( 2, K-I0+1 )*KA1 + ELSE + J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1 + END IF +* +* finish applying rotations in 2nd set from the right +* + DO 370 L = KB - K, 1, -1 + NRT = ( N-J2+KA+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL DLARTV( NRT, AB( KA1-L+1, J2-KA ), INCA, + $ AB( KA1-L, J2-KA+1 ), INCA, + $ WORK( N+J2-KA ), WORK( J2-KA ), KA1 ) + 370 CONTINUE + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + DO 380 J = J1, J2, -KA1 + WORK( J ) = WORK( J-KA ) + WORK( N+J ) = WORK( N+J-KA ) + 380 CONTINUE + DO 390 J = J2, J1, KA1 +* +* create nonzero element a(j+1,j-ka) outside the band +* and store it in WORK(j) +* + WORK( J ) = WORK( J )*AB( KA1, J-KA+1 ) + AB( KA1, J-KA+1 ) = WORK( N+J )*AB( KA1, J-KA+1 ) + 390 CONTINUE + IF( UPDATE ) THEN + IF( I-K.LT.N-KA .AND. K.LE.KBT ) + $ WORK( I-K+KA ) = WORK( I-K ) + END IF + 400 CONTINUE +* + DO 440 K = KB, 1, -1 + J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1 + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + IF( NR.GT.0 ) THEN +* +* generate rotations in 2nd set to annihilate elements +* which have been created outside the band +* + CALL DLARGV( NR, AB( KA1, J2-KA ), INCA, WORK( J2 ), KA1, + $ WORK( N+J2 ), KA1 ) +* +* apply rotations in 2nd set from the left +* + DO 410 L = 1, KA - 1 + CALL DLARTV( NR, AB( L+1, J2-L ), INCA, + $ AB( L+2, J2-L ), INCA, WORK( N+J2 ), + $ WORK( J2 ), KA1 ) + 410 CONTINUE +* +* apply rotations in 2nd set from both sides to diagonal +* blocks +* + CALL DLAR2V( NR, AB( 1, J2 ), AB( 1, J2+1 ), AB( 2, J2 ), + $ INCA, WORK( N+J2 ), WORK( J2 ), KA1 ) +* + END IF +* +* start applying rotations in 2nd set from the right +* + DO 420 L = KA - 1, KB - K + 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL DLARTV( NRT, AB( KA1-L+1, J2 ), INCA, + $ AB( KA1-L, J2+1 ), INCA, WORK( N+J2 ), + $ WORK( J2 ), KA1 ) + 420 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 2nd set +* + DO 430 J = J2, J1, KA1 + CALL DROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1, + $ WORK( N+J ), WORK( J ) ) + 430 CONTINUE + END IF + 440 CONTINUE +* + DO 460 K = 1, KB - 1 + J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1 +* +* finish applying rotations in 1st set from the right +* + DO 450 L = KB - K, 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL DLARTV( NRT, AB( KA1-L+1, J2 ), INCA, + $ AB( KA1-L, J2+1 ), INCA, WORK( N+J2-M ), + $ WORK( J2-M ), KA1 ) + 450 CONTINUE + 460 CONTINUE +* + IF( KB.GT.1 ) THEN + DO 470 J = N - 1, I - KB + 2*KA + 1, -1 + WORK( N+J-M ) = WORK( N+J-KA-M ) + WORK( J-M ) = WORK( J-KA-M ) + 470 CONTINUE + END IF +* + END IF +* + GO TO 10 +* + 480 CONTINUE +* +* **************************** Phase 2 ***************************** +* +* The logical structure of this phase is: +* +* UPDATE = .TRUE. +* DO I = 1, M +* use S(i) to update A and create a new bulge +* apply rotations to push all bulges KA positions upward +* END DO +* UPDATE = .FALSE. +* DO I = M - KA - 1, 2, -1 +* apply rotations to push all bulges KA positions upward +* END DO +* +* To avoid duplicating code, the two loops are merged. +* + UPDATE = .TRUE. + I = 0 + 490 CONTINUE + IF( UPDATE ) THEN + I = I + 1 + KBT = MIN( KB, M-I ) + I0 = I + 1 + I1 = MAX( 1, I-KA ) + I2 = I + KBT - KA1 + IF( I.GT.M ) THEN + UPDATE = .FALSE. + I = I - 1 + I0 = M + 1 + IF( KA.EQ.0 ) + $ RETURN + GO TO 490 + END IF + ELSE + I = I - KA + IF( I.LT.2 ) + $ RETURN + END IF +* + IF( I.LT.M-KBT ) THEN + NX = M + ELSE + NX = N + END IF +* + IF( UPPER ) THEN +* +* Transform A, working with the upper triangle +* + IF( UPDATE ) THEN +* +* Form inv(S(i))**T * A * inv(S(i)) +* + BII = BB( KB1, I ) + DO 500 J = I1, I + AB( J-I+KA1, I ) = AB( J-I+KA1, I ) / BII + 500 CONTINUE + DO 510 J = I, MIN( N, I+KA ) + AB( I-J+KA1, J ) = AB( I-J+KA1, J ) / BII + 510 CONTINUE + DO 540 K = I + 1, I + KBT + DO 520 J = K, I + KBT + AB( K-J+KA1, J ) = AB( K-J+KA1, J ) - + $ BB( I-J+KB1, J )*AB( I-K+KA1, K ) - + $ BB( I-K+KB1, K )*AB( I-J+KA1, J ) + + $ AB( KA1, I )*BB( I-J+KB1, J )* + $ BB( I-K+KB1, K ) + 520 CONTINUE + DO 530 J = I + KBT + 1, MIN( N, I+KA ) + AB( K-J+KA1, J ) = AB( K-J+KA1, J ) - + $ BB( I-K+KB1, K )*AB( I-J+KA1, J ) + 530 CONTINUE + 540 CONTINUE + DO 560 J = I1, I + DO 550 K = I + 1, MIN( J+KA, I+KBT ) + AB( J-K+KA1, K ) = AB( J-K+KA1, K ) - + $ BB( I-K+KB1, K )*AB( J-I+KA1, I ) + 550 CONTINUE + 560 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by inv(S(i)) +* + CALL DSCAL( NX, ONE / BII, X( 1, I ), 1 ) + IF( KBT.GT.0 ) + $ CALL DGER( NX, KBT, -ONE, X( 1, I ), 1, BB( KB, I+1 ), + $ LDBB-1, X( 1, I+1 ), LDX ) + END IF +* +* store a(i1,i) in RA1 for use in next loop over K +* + RA1 = AB( I1-I+KA1, I ) + END IF +* +* Generate and apply vectors of rotations to chase all the +* existing bulges KA positions up toward the top of the band +* + DO 610 K = 1, KB - 1 + IF( UPDATE ) THEN +* +* Determine the rotations which would annihilate the bulge +* which has in theory just been created +* + IF( I+K-KA1.GT.0 .AND. I+K.LT.M ) THEN +* +* generate rotation to annihilate a(i+k-ka-1,i) +* + CALL DLARTG( AB( K+1, I ), RA1, WORK( N+I+K-KA ), + $ WORK( I+K-KA ), RA ) +* +* create nonzero element a(i+k-ka-1,i+k) outside the +* band and store it in WORK(m-kb+i+k) +* + T = -BB( KB1-K, I+K )*RA1 + WORK( M-KB+I+K ) = WORK( N+I+K-KA )*T - + $ WORK( I+K-KA )*AB( 1, I+K ) + AB( 1, I+K ) = WORK( I+K-KA )*T + + $ WORK( N+I+K-KA )*AB( 1, I+K ) + RA1 = RA + END IF + END IF + J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1 + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + IF( UPDATE ) THEN + J2T = MIN( J2, I-2*KA+K-1 ) + ELSE + J2T = J2 + END IF + NRT = ( J2T+KA-1 ) / KA1 + DO 570 J = J1, J2T, KA1 +* +* create nonzero element a(j-1,j+ka) outside the band +* and store it in WORK(j) +* + WORK( J ) = WORK( J )*AB( 1, J+KA-1 ) + AB( 1, J+KA-1 ) = WORK( N+J )*AB( 1, J+KA-1 ) + 570 CONTINUE +* +* generate rotations in 1st set to annihilate elements which +* have been created outside the band +* + IF( NRT.GT.0 ) + $ CALL DLARGV( NRT, AB( 1, J1+KA ), INCA, WORK( J1 ), KA1, + $ WORK( N+J1 ), KA1 ) + IF( NR.GT.0 ) THEN +* +* apply rotations in 1st set from the left +* + DO 580 L = 1, KA - 1 + CALL DLARTV( NR, AB( KA1-L, J1+L ), INCA, + $ AB( KA-L, J1+L ), INCA, WORK( N+J1 ), + $ WORK( J1 ), KA1 ) + 580 CONTINUE +* +* apply rotations in 1st set from both sides to diagonal +* blocks +* + CALL DLAR2V( NR, AB( KA1, J1 ), AB( KA1, J1-1 ), + $ AB( KA, J1 ), INCA, WORK( N+J1 ), + $ WORK( J1 ), KA1 ) +* + END IF +* +* start applying rotations in 1st set from the right +* + DO 590 L = KA - 1, KB - K + 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL DLARTV( NRT, AB( L, J1T ), INCA, + $ AB( L+1, J1T-1 ), INCA, WORK( N+J1T ), + $ WORK( J1T ), KA1 ) + 590 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 1st set +* + DO 600 J = J1, J2, KA1 + CALL DROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1, + $ WORK( N+J ), WORK( J ) ) + 600 CONTINUE + END IF + 610 CONTINUE +* + IF( UPDATE ) THEN + IF( I2.GT.0 .AND. KBT.GT.0 ) THEN +* +* create nonzero element a(i+kbt-ka-1,i+kbt) outside the +* band and store it in WORK(m-kb+i+kbt) +* + WORK( M-KB+I+KBT ) = -BB( KB1-KBT, I+KBT )*RA1 + END IF + END IF +* + DO 650 K = KB, 1, -1 + IF( UPDATE ) THEN + J2 = I + K + 1 - MAX( 2, K+I0-M )*KA1 + ELSE + J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1 + END IF +* +* finish applying rotations in 2nd set from the right +* + DO 620 L = KB - K, 1, -1 + NRT = ( J2+KA+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL DLARTV( NRT, AB( L, J1T+KA ), INCA, + $ AB( L+1, J1T+KA-1 ), INCA, + $ WORK( N+M-KB+J1T+KA ), + $ WORK( M-KB+J1T+KA ), KA1 ) + 620 CONTINUE + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + DO 630 J = J1, J2, KA1 + WORK( M-KB+J ) = WORK( M-KB+J+KA ) + WORK( N+M-KB+J ) = WORK( N+M-KB+J+KA ) + 630 CONTINUE + DO 640 J = J1, J2, KA1 +* +* create nonzero element a(j-1,j+ka) outside the band +* and store it in WORK(m-kb+j) +* + WORK( M-KB+J ) = WORK( M-KB+J )*AB( 1, J+KA-1 ) + AB( 1, J+KA-1 ) = WORK( N+M-KB+J )*AB( 1, J+KA-1 ) + 640 CONTINUE + IF( UPDATE ) THEN + IF( I+K.GT.KA1 .AND. K.LE.KBT ) + $ WORK( M-KB+I+K-KA ) = WORK( M-KB+I+K ) + END IF + 650 CONTINUE +* + DO 690 K = KB, 1, -1 + J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1 + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + IF( NR.GT.0 ) THEN +* +* generate rotations in 2nd set to annihilate elements +* which have been created outside the band +* + CALL DLARGV( NR, AB( 1, J1+KA ), INCA, WORK( M-KB+J1 ), + $ KA1, WORK( N+M-KB+J1 ), KA1 ) +* +* apply rotations in 2nd set from the left +* + DO 660 L = 1, KA - 1 + CALL DLARTV( NR, AB( KA1-L, J1+L ), INCA, + $ AB( KA-L, J1+L ), INCA, + $ WORK( N+M-KB+J1 ), WORK( M-KB+J1 ), KA1 ) + 660 CONTINUE +* +* apply rotations in 2nd set from both sides to diagonal +* blocks +* + CALL DLAR2V( NR, AB( KA1, J1 ), AB( KA1, J1-1 ), + $ AB( KA, J1 ), INCA, WORK( N+M-KB+J1 ), + $ WORK( M-KB+J1 ), KA1 ) +* + END IF +* +* start applying rotations in 2nd set from the right +* + DO 670 L = KA - 1, KB - K + 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL DLARTV( NRT, AB( L, J1T ), INCA, + $ AB( L+1, J1T-1 ), INCA, + $ WORK( N+M-KB+J1T ), WORK( M-KB+J1T ), + $ KA1 ) + 670 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 2nd set +* + DO 680 J = J1, J2, KA1 + CALL DROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1, + $ WORK( N+M-KB+J ), WORK( M-KB+J ) ) + 680 CONTINUE + END IF + 690 CONTINUE +* + DO 710 K = 1, KB - 1 + J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1 +* +* finish applying rotations in 1st set from the right +* + DO 700 L = KB - K, 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL DLARTV( NRT, AB( L, J1T ), INCA, + $ AB( L+1, J1T-1 ), INCA, WORK( N+J1T ), + $ WORK( J1T ), KA1 ) + 700 CONTINUE + 710 CONTINUE +* + IF( KB.GT.1 ) THEN + DO 720 J = 2, MIN( I+KB, M ) - 2*KA - 1 + WORK( N+J ) = WORK( N+J+KA ) + WORK( J ) = WORK( J+KA ) + 720 CONTINUE + END IF +* + ELSE +* +* Transform A, working with the lower triangle +* + IF( UPDATE ) THEN +* +* Form inv(S(i))**T * A * inv(S(i)) +* + BII = BB( 1, I ) + DO 730 J = I1, I + AB( I-J+1, J ) = AB( I-J+1, J ) / BII + 730 CONTINUE + DO 740 J = I, MIN( N, I+KA ) + AB( J-I+1, I ) = AB( J-I+1, I ) / BII + 740 CONTINUE + DO 770 K = I + 1, I + KBT + DO 750 J = K, I + KBT + AB( J-K+1, K ) = AB( J-K+1, K ) - + $ BB( J-I+1, I )*AB( K-I+1, I ) - + $ BB( K-I+1, I )*AB( J-I+1, I ) + + $ AB( 1, I )*BB( J-I+1, I )* + $ BB( K-I+1, I ) + 750 CONTINUE + DO 760 J = I + KBT + 1, MIN( N, I+KA ) + AB( J-K+1, K ) = AB( J-K+1, K ) - + $ BB( K-I+1, I )*AB( J-I+1, I ) + 760 CONTINUE + 770 CONTINUE + DO 790 J = I1, I + DO 780 K = I + 1, MIN( J+KA, I+KBT ) + AB( K-J+1, J ) = AB( K-J+1, J ) - + $ BB( K-I+1, I )*AB( I-J+1, J ) + 780 CONTINUE + 790 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by inv(S(i)) +* + CALL DSCAL( NX, ONE / BII, X( 1, I ), 1 ) + IF( KBT.GT.0 ) + $ CALL DGER( NX, KBT, -ONE, X( 1, I ), 1, BB( 2, I ), 1, + $ X( 1, I+1 ), LDX ) + END IF +* +* store a(i,i1) in RA1 for use in next loop over K +* + RA1 = AB( I-I1+1, I1 ) + END IF +* +* Generate and apply vectors of rotations to chase all the +* existing bulges KA positions up toward the top of the band +* + DO 840 K = 1, KB - 1 + IF( UPDATE ) THEN +* +* Determine the rotations which would annihilate the bulge +* which has in theory just been created +* + IF( I+K-KA1.GT.0 .AND. I+K.LT.M ) THEN +* +* generate rotation to annihilate a(i,i+k-ka-1) +* + CALL DLARTG( AB( KA1-K, I+K-KA ), RA1, + $ WORK( N+I+K-KA ), WORK( I+K-KA ), RA ) +* +* create nonzero element a(i+k,i+k-ka-1) outside the +* band and store it in WORK(m-kb+i+k) +* + T = -BB( K+1, I )*RA1 + WORK( M-KB+I+K ) = WORK( N+I+K-KA )*T - + $ WORK( I+K-KA )*AB( KA1, I+K-KA ) + AB( KA1, I+K-KA ) = WORK( I+K-KA )*T + + $ WORK( N+I+K-KA )*AB( KA1, I+K-KA ) + RA1 = RA + END IF + END IF + J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1 + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + IF( UPDATE ) THEN + J2T = MIN( J2, I-2*KA+K-1 ) + ELSE + J2T = J2 + END IF + NRT = ( J2T+KA-1 ) / KA1 + DO 800 J = J1, J2T, KA1 +* +* create nonzero element a(j+ka,j-1) outside the band +* and store it in WORK(j) +* + WORK( J ) = WORK( J )*AB( KA1, J-1 ) + AB( KA1, J-1 ) = WORK( N+J )*AB( KA1, J-1 ) + 800 CONTINUE +* +* generate rotations in 1st set to annihilate elements which +* have been created outside the band +* + IF( NRT.GT.0 ) + $ CALL DLARGV( NRT, AB( KA1, J1 ), INCA, WORK( J1 ), KA1, + $ WORK( N+J1 ), KA1 ) + IF( NR.GT.0 ) THEN +* +* apply rotations in 1st set from the right +* + DO 810 L = 1, KA - 1 + CALL DLARTV( NR, AB( L+1, J1 ), INCA, AB( L+2, J1-1 ), + $ INCA, WORK( N+J1 ), WORK( J1 ), KA1 ) + 810 CONTINUE +* +* apply rotations in 1st set from both sides to diagonal +* blocks +* + CALL DLAR2V( NR, AB( 1, J1 ), AB( 1, J1-1 ), + $ AB( 2, J1-1 ), INCA, WORK( N+J1 ), + $ WORK( J1 ), KA1 ) +* + END IF +* +* start applying rotations in 1st set from the left +* + DO 820 L = KA - 1, KB - K + 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL DLARTV( NRT, AB( KA1-L+1, J1T-KA1+L ), INCA, + $ AB( KA1-L, J1T-KA1+L ), INCA, + $ WORK( N+J1T ), WORK( J1T ), KA1 ) + 820 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 1st set +* + DO 830 J = J1, J2, KA1 + CALL DROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1, + $ WORK( N+J ), WORK( J ) ) + 830 CONTINUE + END IF + 840 CONTINUE +* + IF( UPDATE ) THEN + IF( I2.GT.0 .AND. KBT.GT.0 ) THEN +* +* create nonzero element a(i+kbt,i+kbt-ka-1) outside the +* band and store it in WORK(m-kb+i+kbt) +* + WORK( M-KB+I+KBT ) = -BB( KBT+1, I )*RA1 + END IF + END IF +* + DO 880 K = KB, 1, -1 + IF( UPDATE ) THEN + J2 = I + K + 1 - MAX( 2, K+I0-M )*KA1 + ELSE + J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1 + END IF +* +* finish applying rotations in 2nd set from the left +* + DO 850 L = KB - K, 1, -1 + NRT = ( J2+KA+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL DLARTV( NRT, AB( KA1-L+1, J1T+L-1 ), INCA, + $ AB( KA1-L, J1T+L-1 ), INCA, + $ WORK( N+M-KB+J1T+KA ), + $ WORK( M-KB+J1T+KA ), KA1 ) + 850 CONTINUE + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + DO 860 J = J1, J2, KA1 + WORK( M-KB+J ) = WORK( M-KB+J+KA ) + WORK( N+M-KB+J ) = WORK( N+M-KB+J+KA ) + 860 CONTINUE + DO 870 J = J1, J2, KA1 +* +* create nonzero element a(j+ka,j-1) outside the band +* and store it in WORK(m-kb+j) +* + WORK( M-KB+J ) = WORK( M-KB+J )*AB( KA1, J-1 ) + AB( KA1, J-1 ) = WORK( N+M-KB+J )*AB( KA1, J-1 ) + 870 CONTINUE + IF( UPDATE ) THEN + IF( I+K.GT.KA1 .AND. K.LE.KBT ) + $ WORK( M-KB+I+K-KA ) = WORK( M-KB+I+K ) + END IF + 880 CONTINUE +* + DO 920 K = KB, 1, -1 + J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1 + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + IF( NR.GT.0 ) THEN +* +* generate rotations in 2nd set to annihilate elements +* which have been created outside the band +* + CALL DLARGV( NR, AB( KA1, J1 ), INCA, WORK( M-KB+J1 ), + $ KA1, WORK( N+M-KB+J1 ), KA1 ) +* +* apply rotations in 2nd set from the right +* + DO 890 L = 1, KA - 1 + CALL DLARTV( NR, AB( L+1, J1 ), INCA, AB( L+2, J1-1 ), + $ INCA, WORK( N+M-KB+J1 ), WORK( M-KB+J1 ), + $ KA1 ) + 890 CONTINUE +* +* apply rotations in 2nd set from both sides to diagonal +* blocks +* + CALL DLAR2V( NR, AB( 1, J1 ), AB( 1, J1-1 ), + $ AB( 2, J1-1 ), INCA, WORK( N+M-KB+J1 ), + $ WORK( M-KB+J1 ), KA1 ) +* + END IF +* +* start applying rotations in 2nd set from the left +* + DO 900 L = KA - 1, KB - K + 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL DLARTV( NRT, AB( KA1-L+1, J1T-KA1+L ), INCA, + $ AB( KA1-L, J1T-KA1+L ), INCA, + $ WORK( N+M-KB+J1T ), WORK( M-KB+J1T ), + $ KA1 ) + 900 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 2nd set +* + DO 910 J = J1, J2, KA1 + CALL DROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1, + $ WORK( N+M-KB+J ), WORK( M-KB+J ) ) + 910 CONTINUE + END IF + 920 CONTINUE +* + DO 940 K = 1, KB - 1 + J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1 +* +* finish applying rotations in 1st set from the left +* + DO 930 L = KB - K, 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL DLARTV( NRT, AB( KA1-L+1, J1T-KA1+L ), INCA, + $ AB( KA1-L, J1T-KA1+L ), INCA, + $ WORK( N+J1T ), WORK( J1T ), KA1 ) + 930 CONTINUE + 940 CONTINUE +* + IF( KB.GT.1 ) THEN + DO 950 J = 2, MIN( I+KB, M ) - 2*KA - 1 + WORK( N+J ) = WORK( N+J+KA ) + WORK( J ) = WORK( J+KA ) + 950 CONTINUE + END IF +* + END IF +* + GO TO 490 +* +* End of DSBGST +* + END diff --git a/costa/native/external/lapack/dsbgv.f b/costa/native/external/lapack/dsbgv.f new file mode 100644 index 000000000..79f114af1 --- /dev/null +++ b/costa/native/external/lapack/dsbgv.f @@ -0,0 +1,189 @@ + SUBROUTINE DSBGV( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, Z, + $ LDZ, WORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KA, KB, LDAB, LDBB, LDZ, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ), BB( LDBB, * ), W( * ), + $ WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* DSBGV computes all the eigenvalues, and optionally, the eigenvectors +* of a real generalized symmetric-definite banded eigenproblem, of +* the form A*x=(lambda)*B*x. Here A and B are assumed to be symmetric +* and banded, and B is also positive definite. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangles of A and B are stored; +* = 'L': Lower triangles of A and B are stored. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* KA (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KA >= 0. +* +* KB (input) INTEGER +* The number of superdiagonals of the matrix B if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KB >= 0. +* +* AB (input/output) DOUBLE PRECISION array, dimension (LDAB, N) +* On entry, the upper or lower triangle of the symmetric band +* matrix A, stored in the first ka+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). +* +* On exit, the contents of AB are destroyed. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KA+1. +* +* BB (input/output) DOUBLE PRECISION array, dimension (LDBB, N) +* On entry, the upper or lower triangle of the symmetric band +* matrix B, stored in the first kb+1 rows of the array. The +* j-th column of B is stored in the j-th column of the array BB +* as follows: +* if UPLO = 'U', BB(kb+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j; +* if UPLO = 'L', BB(1+i-j,j) = B(i,j) for j<=i<=min(n,j+kb). +* +* On exit, the factor S from the split Cholesky factorization +* B = S**T*S, as returned by DPBSTF. +* +* LDBB (input) INTEGER +* The leading dimension of the array BB. LDBB >= KB+1. +* +* W (output) DOUBLE PRECISION array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* Z (output) DOUBLE PRECISION array, dimension (LDZ, N) +* If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of +* eigenvectors, with the i-th column of Z holding the +* eigenvector associated with W(i). The eigenvectors are +* normalized so that Z**T*B*Z = I. +* If JOBZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= N. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is: +* <= N: the algorithm failed to converge: +* i off-diagonal elements of an intermediate +* tridiagonal form did not converge to zero; +* > N: if INFO = N + i, for 1 <= i <= N, then DPBSTF +* returned INFO = i: B is not positive definite. +* The factorization of B could not be completed and +* no eigenvalues or eigenvectors were computed. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER, WANTZ + CHARACTER VECT + INTEGER IINFO, INDE, INDWRK +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DPBSTF, DSBGST, DSBTRD, DSTEQR, DSTERF, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KA.LT.0 ) THEN + INFO = -4 + ELSE IF( KB.LT.0 .OR. KB.GT.KA ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KA+1 ) THEN + INFO = -7 + ELSE IF( LDBB.LT.KB+1 ) THEN + INFO = -9 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSBGV ', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a split Cholesky factorization of B. +* + CALL DPBSTF( UPLO, N, KB, BB, LDBB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem. +* + INDE = 1 + INDWRK = INDE + N + CALL DSBGST( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, Z, LDZ, + $ WORK( INDWRK ), IINFO ) +* +* Reduce to tridiagonal form. +* + IF( WANTZ ) THEN + VECT = 'U' + ELSE + VECT = 'N' + END IF + CALL DSBTRD( VECT, UPLO, N, KA, AB, LDAB, W, WORK( INDE ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, call SSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, WORK( INDE ), INFO ) + ELSE + CALL DSTEQR( JOBZ, N, W, WORK( INDE ), Z, LDZ, WORK( INDWRK ), + $ INFO ) + END IF + RETURN +* +* End of DSBGV +* + END diff --git a/costa/native/external/lapack/dsbgvd.f b/costa/native/external/lapack/dsbgvd.f new file mode 100644 index 000000000..bc140a86f --- /dev/null +++ b/costa/native/external/lapack/dsbgvd.f @@ -0,0 +1,270 @@ + SUBROUTINE DSBGVD( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, + $ Z, LDZ, WORK, LWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KA, KB, LDAB, LDBB, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION AB( LDAB, * ), BB( LDBB, * ), W( * ), + $ WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* DSBGVD computes all the eigenvalues, and optionally, the eigenvectors +* of a real generalized symmetric-definite banded eigenproblem, of the +* form A*x=(lambda)*B*x. Here A and B are assumed to be symmetric and +* banded, and B is also positive definite. If eigenvectors are +* desired, it uses a divide and conquer algorithm. +* +* The divide and conquer algorithm makes very mild assumptions about +* floating point arithmetic. It will work on machines with a guard +* digit in add/subtract, or on those binary machines without guard +* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +* Cray-2. It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangles of A and B are stored; +* = 'L': Lower triangles of A and B are stored. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* KA (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KA >= 0. +* +* KB (input) INTEGER +* The number of superdiagonals of the matrix B if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KB >= 0. +* +* AB (input/output) DOUBLE PRECISION array, dimension (LDAB, N) +* On entry, the upper or lower triangle of the symmetric band +* matrix A, stored in the first ka+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). +* +* On exit, the contents of AB are destroyed. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KA+1. +* +* BB (input/output) DOUBLE PRECISION array, dimension (LDBB, N) +* On entry, the upper or lower triangle of the symmetric band +* matrix B, stored in the first kb+1 rows of the array. The +* j-th column of B is stored in the j-th column of the array BB +* as follows: +* if UPLO = 'U', BB(ka+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j; +* if UPLO = 'L', BB(1+i-j,j) = B(i,j) for j<=i<=min(n,j+kb). +* +* On exit, the factor S from the split Cholesky factorization +* B = S**T*S, as returned by DPBSTF. +* +* LDBB (input) INTEGER +* The leading dimension of the array BB. LDBB >= KB+1. +* +* W (output) DOUBLE PRECISION array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* Z (output) DOUBLE PRECISION array, dimension (LDZ, N) +* If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of +* eigenvectors, with the i-th column of Z holding the +* eigenvector associated with W(i). The eigenvectors are +* normalized so Z**T*B*Z = I. +* If JOBZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If N <= 1, LWORK >= 1. +* If JOBZ = 'N' and N > 1, LWORK >= 3*N. +* If JOBZ = 'V' and N > 1, LWORK >= 1 + 5*N + 2*N**2. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* On exit, if LIWORK > 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array IWORK. +* If JOBZ = 'N' or N <= 1, LIWORK >= 1. +* If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is: +* <= N: the algorithm failed to converge: +* i off-diagonal elements of an intermediate +* tridiagonal form did not converge to zero; +* > N: if INFO = N + i, for 1 <= i <= N, then DPBSTF +* returned INFO = i: B is not positive definite. +* The factorization of B could not be completed and +* no eigenvalues or eigenvectors were computed. +* +* Further Details +* =============== +* +* Based on contributions by +* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER VECT + INTEGER IINFO, INDE, INDWK2, INDWRK, LIWMIN, LLWRK2, + $ LWMIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DLACPY, DPBSTF, DSBGST, DSBTRD, DSTEDC, + $ DSTERF, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LIWMIN = 1 + LWMIN = 1 + ELSE + IF( WANTZ ) THEN + LIWMIN = 3 + 5*N + LWMIN = 1 + 5*N + 2*N**2 + ELSE + LIWMIN = 1 + LWMIN = 2*N + END IF + END IF +* + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KA.LT.0 ) THEN + INFO = -4 + ELSE IF( KB.LT.0 .OR. KB.GT.KA ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KA+1 ) THEN + INFO = -7 + ELSE IF( LDBB.LT.KB+1 ) THEN + INFO = -9 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -12 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -14 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -16 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSBGVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a split Cholesky factorization of B. +* + CALL DPBSTF( UPLO, N, KB, BB, LDBB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem. +* + INDE = 1 + INDWRK = INDE + N + INDWK2 = INDWRK + N*N + LLWRK2 = LWORK - INDWK2 + 1 + CALL DSBGST( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, Z, LDZ, + $ WORK( INDWRK ), IINFO ) +* +* Reduce to tridiagonal form. +* + IF( WANTZ ) THEN + VECT = 'U' + ELSE + VECT = 'N' + END IF + CALL DSBTRD( VECT, UPLO, N, KA, AB, LDAB, W, WORK( INDE ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, call SSTEDC. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, WORK( INDE ), INFO ) + ELSE + CALL DSTEDC( 'I', N, W, WORK( INDE ), WORK( INDWRK ), N, + $ WORK( INDWK2 ), LLWRK2, IWORK, LIWORK, INFO ) + CALL DGEMM( 'N', 'N', N, N, N, ONE, Z, LDZ, WORK( INDWRK ), N, + $ ZERO, WORK( INDWK2 ), N ) + CALL DLACPY( 'A', N, N, WORK( INDWK2 ), N, Z, LDZ ) + END IF +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of DSBGVD +* + END diff --git a/costa/native/external/lapack/dsbgvx.f b/costa/native/external/lapack/dsbgvx.f new file mode 100644 index 000000000..600e6d809 --- /dev/null +++ b/costa/native/external/lapack/dsbgvx.f @@ -0,0 +1,371 @@ + SUBROUTINE DSBGVX( JOBZ, RANGE, UPLO, N, KA, KB, AB, LDAB, BB, + $ LDBB, Q, LDQ, VL, VU, IL, IU, ABSTOL, M, W, Z, + $ LDZ, WORK, IWORK, IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, KA, KB, LDAB, LDBB, LDQ, LDZ, M, + $ N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + DOUBLE PRECISION AB( LDAB, * ), BB( LDBB, * ), Q( LDQ, * ), + $ W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* DSBGVX computes selected eigenvalues, and optionally, eigenvectors +* of a real generalized symmetric-definite banded eigenproblem, of +* the form A*x=(lambda)*B*x. Here A and B are assumed to be symmetric +* and banded, and B is also positive definite. Eigenvalues and +* eigenvectors can be selected by specifying either all eigenvalues, +* a range of values or a range of indices for the desired eigenvalues. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* RANGE (input) CHARACTER*1 +* = 'A': all eigenvalues will be found. +* = 'V': all eigenvalues in the half-open interval (VL,VU] +* will be found. +* = 'I': the IL-th through IU-th eigenvalues will be found. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangles of A and B are stored; +* = 'L': Lower triangles of A and B are stored. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* KA (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KA >= 0. +* +* KB (input) INTEGER +* The number of superdiagonals of the matrix B if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KB >= 0. +* +* AB (input/output) DOUBLE PRECISION array, dimension (LDAB, N) +* On entry, the upper or lower triangle of the symmetric band +* matrix A, stored in the first ka+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). +* +* On exit, the contents of AB are destroyed. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KA+1. +* +* BB (input/output) DOUBLE PRECISION array, dimension (LDBB, N) +* On entry, the upper or lower triangle of the symmetric band +* matrix B, stored in the first kb+1 rows of the array. The +* j-th column of B is stored in the j-th column of the array BB +* as follows: +* if UPLO = 'U', BB(ka+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j; +* if UPLO = 'L', BB(1+i-j,j) = B(i,j) for j<=i<=min(n,j+kb). +* +* On exit, the factor S from the split Cholesky factorization +* B = S**T*S, as returned by DPBSTF. +* +* LDBB (input) INTEGER +* The leading dimension of the array BB. LDBB >= KB+1. +* +* Q (output) DOUBLE PRECISION array, dimension (LDQ, N) +* If JOBZ = 'V', the n-by-n matrix used in the reduction of +* A*x = (lambda)*B*x to standard form, i.e. C*x = (lambda)*x, +* and consequently C to tridiagonal form. +* If JOBZ = 'N', the array Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. If JOBZ = 'N', +* LDQ >= 1. If JOBZ = 'V', LDQ >= max(1,N). +* +* VL (input) DOUBLE PRECISION +* VU (input) DOUBLE PRECISION +* If RANGE='V', the lower and upper bounds of the interval to +* be searched for eigenvalues. VL < VU. +* Not referenced if RANGE = 'A' or 'I'. +* +* IL (input) INTEGER +* IU (input) INTEGER +* If RANGE='I', the indices (in ascending order) of the +* smallest and largest eigenvalues to be returned. +* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +* Not referenced if RANGE = 'A' or 'V'. +* +* ABSTOL (input) DOUBLE PRECISION +* The absolute error tolerance for the eigenvalues. +* An approximate eigenvalue is accepted as converged +* when it is determined to lie in an interval [a,b] +* of width less than or equal to +* +* ABSTOL + EPS * max( |a|,|b| ) , +* +* where EPS is the machine precision. If ABSTOL is less than +* or equal to zero, then EPS*|T| will be used in its place, +* where |T| is the 1-norm of the tridiagonal matrix obtained +* by reducing A to tridiagonal form. +* +* Eigenvalues will be computed most accurately when ABSTOL is +* set to twice the underflow threshold 2*DLAMCH('S'), not zero. +* If this routine returns with INFO>0, indicating that some +* eigenvectors did not converge, try setting ABSTOL to +* 2*DLAMCH('S'). +* +* M (output) INTEGER +* The total number of eigenvalues found. 0 <= M <= N. +* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +* +* W (output) DOUBLE PRECISION array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* Z (output) DOUBLE PRECISION array, dimension (LDZ, N) +* If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of +* eigenvectors, with the i-th column of Z holding the +* eigenvector associated with W(i). The eigenvectors are +* normalized so Z**T*B*Z = I. +* If JOBZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (7N) +* +* IWORK (workspace/output) INTEGER array, dimension (5N) +* +* IFAIL (input) INTEGER array, dimension (M) +* If JOBZ = 'V', then if INFO = 0, the first M elements of +* IFAIL are zero. If INFO > 0, then IFAIL contains the +* indices of the eigenvalues that failed to converge. +* If JOBZ = 'N', then IFAIL is not referenced. +* +* INFO (output) INTEGER +* = 0 : successful exit +* < 0 : if INFO = -i, the i-th argument had an illegal value +* <= N: if INFO = i, then i eigenvectors failed to converge. +* Their indices are stored in IFAIL. +* > N : DPBSTF returned an error code; i.e., +* if INFO = N + i, for 1 <= i <= N, then the leading +* minor of order i of B is not positive definite. +* The factorization of B could not be completed and +* no eigenvalues or eigenvectors were computed. +* +* Further Details +* =============== +* +* Based on contributions by +* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, UPPER, VALEIG, WANTZ + CHARACTER ORDER, VECT + INTEGER I, IINFO, INDD, INDE, INDEE, INDIBL, INDISP, + $ INDIWO, INDWRK, ITMP1, J, JJ, NSPLIT + DOUBLE PRECISION TMP1 +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEMV, DLACPY, DPBSTF, DSBGST, DSBTRD, + $ DSTEBZ, DSTEIN, DSTEQR, DSTERF, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KA.LT.0 ) THEN + INFO = -5 + ELSE IF( KB.LT.0 .OR. KB.GT.KA ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KA+1 ) THEN + INFO = -8 + ELSE IF( LDBB.LT.KB+1 ) THEN + INFO = -10 + ELSE IF( LDQ.LT.1 ) THEN + INFO = -12 + ELSE IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN + INFO = -14 + ELSE IF( INDEIG .AND. IL.LT.1 ) THEN + INFO = -15 + ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) THEN + INFO = -16 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -21 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSBGVX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Form a split Cholesky factorization of B. +* + CALL DPBSTF( UPLO, N, KB, BB, LDBB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem. +* + CALL DSBGST( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, Q, LDQ, + $ WORK, IINFO ) +* +* Reduce symmetric band matrix to tridiagonal form. +* + INDD = 1 + INDE = INDD + N + INDWRK = INDE + N + IF( WANTZ ) THEN + VECT = 'U' + ELSE + VECT = 'N' + END IF + CALL DSBTRD( VECT, UPLO, N, KA, AB, LDAB, WORK( INDD ), + $ WORK( INDE ), Q, LDQ, WORK( INDWRK ), IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal +* to zero, then call DSTERF or SSTEQR. If this fails for some +* eigenvalue, then try DSTEBZ. +* + IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND. + $ ( ABSTOL.LE.ZERO ) ) THEN + CALL DCOPY( N, WORK( INDD ), 1, W, 1 ) + INDEE = INDWRK + 2*N + CALL DCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, WORK( INDEE ), INFO ) + ELSE + CALL DLACPY( 'A', N, N, Q, LDQ, Z, LDZ ) + CALL DSTEQR( JOBZ, N, W, WORK( INDEE ), Z, LDZ, + $ WORK( INDWRK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 10 I = 1, N + IFAIL( I ) = 0 + 10 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 30 + END IF + INFO = 0 + END IF +* +* Otherwise, call DSTEBZ and, if eigenvectors are desired, +* call DSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWO = INDISP + N + CALL DSTEBZ( RANGE, ORDER, N, VL, VU, IL, IU, ABSTOL, + $ WORK( INDD ), WORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWRK ), + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL DSTEIN( N, WORK( INDD ), WORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ WORK( INDWRK ), IWORK( INDIWO ), IFAIL, INFO ) +* +* Apply transformation matrix used in reduction to tridiagonal +* form to eigenvectors returned by DSTEIN. +* + DO 20 J = 1, M + CALL DCOPY( N, Z( 1, J ), 1, WORK( 1 ), 1 ) + CALL DGEMV( 'N', N, N, ONE, Q, LDQ, WORK, 1, ZERO, + $ Z( 1, J ), 1 ) + 20 CONTINUE + END IF +* + 30 CONTINUE +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 50 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 40 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 40 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL DSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 50 CONTINUE + END IF +* + RETURN +* +* End of DSBGVX +* + END diff --git a/costa/native/external/lapack/dsbtrd.f b/costa/native/external/lapack/dsbtrd.f new file mode 100644 index 000000000..94b6e67ca --- /dev/null +++ b/costa/native/external/lapack/dsbtrd.f @@ -0,0 +1,553 @@ + SUBROUTINE DSBTRD( VECT, UPLO, N, KD, AB, LDAB, D, E, Q, LDQ, + $ WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO, VECT + INTEGER INFO, KD, LDAB, LDQ, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ), D( * ), E( * ), Q( LDQ, * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* DSBTRD reduces a real symmetric band matrix A to symmetric +* tridiagonal form T by an orthogonal similarity transformation: +* Q**T * A * Q = T. +* +* Arguments +* ========= +* +* VECT (input) CHARACTER*1 +* = 'N': do not form Q; +* = 'V': form Q; +* = 'U': update a matrix X, by forming X*Q. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) +* On entry, the upper or lower triangle of the symmetric band +* matrix A, stored in the first KD+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* On exit, the diagonal elements of AB are overwritten by the +* diagonal elements of the tridiagonal matrix T; if KD > 0, the +* elements on the first superdiagonal (if UPLO = 'U') or the +* first subdiagonal (if UPLO = 'L') are overwritten by the +* off-diagonal elements of T; the rest of AB is overwritten by +* values generated during the reduction. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* D (output) DOUBLE PRECISION array, dimension (N) +* The diagonal elements of the tridiagonal matrix T. +* +* E (output) DOUBLE PRECISION array, dimension (N-1) +* The off-diagonal elements of the tridiagonal matrix T: +* E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'. +* +* Q (input/output) DOUBLE PRECISION array, dimension (LDQ,N) +* On entry, if VECT = 'U', then Q must contain an N-by-N +* matrix X; if VECT = 'N' or 'V', then Q need not be set. +* +* On exit: +* if VECT = 'V', Q contains the N-by-N orthogonal matrix Q; +* if VECT = 'U', Q contains the product X*Q; +* if VECT = 'N', the array Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. +* LDQ >= 1, and LDQ >= N if VECT = 'V' or 'U'. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* Modified by Linda Kaufman, Bell Labs. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL INITQ, UPPER, WANTQ + INTEGER I, I2, IBL, INCA, INCX, IQAEND, IQB, IQEND, J, + $ J1, J1END, J1INC, J2, JEND, JIN, JINC, K, KD1, + $ KDM1, KDN, L, LAST, LEND, NQ, NR, NRT + DOUBLE PRECISION TEMP +* .. +* .. External Subroutines .. + EXTERNAL DLAR2V, DLARGV, DLARTG, DLARTV, DLASET, DROT, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INITQ = LSAME( VECT, 'V' ) + WANTQ = INITQ .OR. LSAME( VECT, 'U' ) + UPPER = LSAME( UPLO, 'U' ) + KD1 = KD + 1 + KDM1 = KD - 1 + INCX = LDAB - 1 + IQEND = 1 +* + INFO = 0 + IF( .NOT.WANTQ .AND. .NOT.LSAME( VECT, 'N' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD1 ) THEN + INFO = -6 + ELSE IF( LDQ.LT.MAX( 1, N ) .AND. WANTQ ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSBTRD', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Initialize Q to the unit matrix, if needed +* + IF( INITQ ) + $ CALL DLASET( 'Full', N, N, ZERO, ONE, Q, LDQ ) +* +* Wherever possible, plane rotations are generated and applied in +* vector operations of length NR over the index set J1:J2:KD1. +* +* The cosines and sines of the plane rotations are stored in the +* arrays D and WORK. +* + INCA = KD1*LDAB + KDN = MIN( N-1, KD ) + IF( UPPER ) THEN +* + IF( KD.GT.1 ) THEN +* +* Reduce to tridiagonal form, working with upper triangle +* + NR = 0 + J1 = KDN + 2 + J2 = 1 +* + DO 90 I = 1, N - 2 +* +* Reduce i-th row of matrix to tridiagonal form +* + DO 80 K = KDN + 1, 2, -1 + J1 = J1 + KDN + J2 = J2 + KDN +* + IF( NR.GT.0 ) THEN +* +* generate plane rotations to annihilate nonzero +* elements which have been created outside the band +* + CALL DLARGV( NR, AB( 1, J1-1 ), INCA, WORK( J1 ), + $ KD1, D( J1 ), KD1 ) +* +* apply rotations from the right +* +* +* Dependent on the the number of diagonals either +* DLARTV or DROT is used +* + IF( NR.GE.2*KD-1 ) THEN + DO 10 L = 1, KD - 1 + CALL DLARTV( NR, AB( L+1, J1-1 ), INCA, + $ AB( L, J1 ), INCA, D( J1 ), + $ WORK( J1 ), KD1 ) + 10 CONTINUE +* + ELSE + JEND = J1 + ( NR-1 )*KD1 + DO 20 JINC = J1, JEND, KD1 + CALL DROT( KDM1, AB( 2, JINC-1 ), 1, + $ AB( 1, JINC ), 1, D( JINC ), + $ WORK( JINC ) ) + 20 CONTINUE + END IF + END IF +* +* + IF( K.GT.2 ) THEN + IF( K.LE.N-I+1 ) THEN +* +* generate plane rotation to annihilate a(i,i+k-1) +* within the band +* + CALL DLARTG( AB( KD-K+3, I+K-2 ), + $ AB( KD-K+2, I+K-1 ), D( I+K-1 ), + $ WORK( I+K-1 ), TEMP ) + AB( KD-K+3, I+K-2 ) = TEMP +* +* apply rotation from the right +* + CALL DROT( K-3, AB( KD-K+4, I+K-2 ), 1, + $ AB( KD-K+3, I+K-1 ), 1, D( I+K-1 ), + $ WORK( I+K-1 ) ) + END IF + NR = NR + 1 + J1 = J1 - KDN - 1 + END IF +* +* apply plane rotations from both sides to diagonal +* blocks +* + IF( NR.GT.0 ) + $ CALL DLAR2V( NR, AB( KD1, J1-1 ), AB( KD1, J1 ), + $ AB( KD, J1 ), INCA, D( J1 ), + $ WORK( J1 ), KD1 ) +* +* apply plane rotations from the left +* + IF( NR.GT.0 ) THEN + IF( 2*KD-1.LT.NR ) THEN +* +* Dependent on the the number of diagonals either +* DLARTV or DROT is used +* + DO 30 L = 1, KD - 1 + IF( J2+L.GT.N ) THEN + NRT = NR - 1 + ELSE + NRT = NR + END IF + IF( NRT.GT.0 ) + $ CALL DLARTV( NRT, AB( KD-L, J1+L ), INCA, + $ AB( KD-L+1, J1+L ), INCA, + $ D( J1 ), WORK( J1 ), KD1 ) + 30 CONTINUE + ELSE + J1END = J1 + KD1*( NR-2 ) + IF( J1END.GE.J1 ) THEN + DO 40 JIN = J1, J1END, KD1 + CALL DROT( KD-1, AB( KD-1, JIN+1 ), INCX, + $ AB( KD, JIN+1 ), INCX, + $ D( JIN ), WORK( JIN ) ) + 40 CONTINUE + END IF + LEND = MIN( KDM1, N-J2 ) + LAST = J1END + KD1 + IF( LEND.GT.0 ) + $ CALL DROT( LEND, AB( KD-1, LAST+1 ), INCX, + $ AB( KD, LAST+1 ), INCX, D( LAST ), + $ WORK( LAST ) ) + END IF + END IF +* + IF( WANTQ ) THEN +* +* accumulate product of plane rotations in Q +* + IF( INITQ ) THEN +* +* take advantage of the fact that Q was +* initially the Identity matrix +* + IQEND = MAX( IQEND, J2 ) + I2 = MAX( 0, K-3 ) + IQAEND = 1 + I*KD + IF( K.EQ.2 ) + $ IQAEND = IQAEND + KD + IQAEND = MIN( IQAEND, IQEND ) + DO 50 J = J1, J2, KD1 + IBL = I - I2 / KDM1 + I2 = I2 + 1 + IQB = MAX( 1, J-IBL ) + NQ = 1 + IQAEND - IQB + IQAEND = MIN( IQAEND+KD, IQEND ) + CALL DROT( NQ, Q( IQB, J-1 ), 1, Q( IQB, J ), + $ 1, D( J ), WORK( J ) ) + 50 CONTINUE + ELSE +* + DO 60 J = J1, J2, KD1 + CALL DROT( N, Q( 1, J-1 ), 1, Q( 1, J ), 1, + $ D( J ), WORK( J ) ) + 60 CONTINUE + END IF +* + END IF +* + IF( J2+KDN.GT.N ) THEN +* +* adjust J2 to keep within the bounds of the matrix +* + NR = NR - 1 + J2 = J2 - KDN - 1 + END IF +* + DO 70 J = J1, J2, KD1 +* +* create nonzero element a(j-1,j+kd) outside the band +* and store it in WORK +* + WORK( J+KD ) = WORK( J )*AB( 1, J+KD ) + AB( 1, J+KD ) = D( J )*AB( 1, J+KD ) + 70 CONTINUE + 80 CONTINUE + 90 CONTINUE + END IF +* + IF( KD.GT.0 ) THEN +* +* copy off-diagonal elements to E +* + DO 100 I = 1, N - 1 + E( I ) = AB( KD, I+1 ) + 100 CONTINUE + ELSE +* +* set E to zero if original matrix was diagonal +* + DO 110 I = 1, N - 1 + E( I ) = ZERO + 110 CONTINUE + END IF +* +* copy diagonal elements to D +* + DO 120 I = 1, N + D( I ) = AB( KD1, I ) + 120 CONTINUE +* + ELSE +* + IF( KD.GT.1 ) THEN +* +* Reduce to tridiagonal form, working with lower triangle +* + NR = 0 + J1 = KDN + 2 + J2 = 1 +* + DO 210 I = 1, N - 2 +* +* Reduce i-th column of matrix to tridiagonal form +* + DO 200 K = KDN + 1, 2, -1 + J1 = J1 + KDN + J2 = J2 + KDN +* + IF( NR.GT.0 ) THEN +* +* generate plane rotations to annihilate nonzero +* elements which have been created outside the band +* + CALL DLARGV( NR, AB( KD1, J1-KD1 ), INCA, + $ WORK( J1 ), KD1, D( J1 ), KD1 ) +* +* apply plane rotations from one side +* +* +* Dependent on the the number of diagonals either +* DLARTV or DROT is used +* + IF( NR.GT.2*KD-1 ) THEN + DO 130 L = 1, KD - 1 + CALL DLARTV( NR, AB( KD1-L, J1-KD1+L ), INCA, + $ AB( KD1-L+1, J1-KD1+L ), INCA, + $ D( J1 ), WORK( J1 ), KD1 ) + 130 CONTINUE + ELSE + JEND = J1 + KD1*( NR-1 ) + DO 140 JINC = J1, JEND, KD1 + CALL DROT( KDM1, AB( KD, JINC-KD ), INCX, + $ AB( KD1, JINC-KD ), INCX, + $ D( JINC ), WORK( JINC ) ) + 140 CONTINUE + END IF +* + END IF +* + IF( K.GT.2 ) THEN + IF( K.LE.N-I+1 ) THEN +* +* generate plane rotation to annihilate a(i+k-1,i) +* within the band +* + CALL DLARTG( AB( K-1, I ), AB( K, I ), + $ D( I+K-1 ), WORK( I+K-1 ), TEMP ) + AB( K-1, I ) = TEMP +* +* apply rotation from the left +* + CALL DROT( K-3, AB( K-2, I+1 ), LDAB-1, + $ AB( K-1, I+1 ), LDAB-1, D( I+K-1 ), + $ WORK( I+K-1 ) ) + END IF + NR = NR + 1 + J1 = J1 - KDN - 1 + END IF +* +* apply plane rotations from both sides to diagonal +* blocks +* + IF( NR.GT.0 ) + $ CALL DLAR2V( NR, AB( 1, J1-1 ), AB( 1, J1 ), + $ AB( 2, J1-1 ), INCA, D( J1 ), + $ WORK( J1 ), KD1 ) +* +* apply plane rotations from the right +* +* +* Dependent on the the number of diagonals either +* DLARTV or DROT is used +* + IF( NR.GT.0 ) THEN + IF( NR.GT.2*KD-1 ) THEN + DO 150 L = 1, KD - 1 + IF( J2+L.GT.N ) THEN + NRT = NR - 1 + ELSE + NRT = NR + END IF + IF( NRT.GT.0 ) + $ CALL DLARTV( NRT, AB( L+2, J1-1 ), INCA, + $ AB( L+1, J1 ), INCA, D( J1 ), + $ WORK( J1 ), KD1 ) + 150 CONTINUE + ELSE + J1END = J1 + KD1*( NR-2 ) + IF( J1END.GE.J1 ) THEN + DO 160 J1INC = J1, J1END, KD1 + CALL DROT( KDM1, AB( 3, J1INC-1 ), 1, + $ AB( 2, J1INC ), 1, D( J1INC ), + $ WORK( J1INC ) ) + 160 CONTINUE + END IF + LEND = MIN( KDM1, N-J2 ) + LAST = J1END + KD1 + IF( LEND.GT.0 ) + $ CALL DROT( LEND, AB( 3, LAST-1 ), 1, + $ AB( 2, LAST ), 1, D( LAST ), + $ WORK( LAST ) ) + END IF + END IF +* +* +* + IF( WANTQ ) THEN +* +* accumulate product of plane rotations in Q +* + IF( INITQ ) THEN +* +* take advantage of the fact that Q was +* initially the Identity matrix +* + IQEND = MAX( IQEND, J2 ) + I2 = MAX( 0, K-3 ) + IQAEND = 1 + I*KD + IF( K.EQ.2 ) + $ IQAEND = IQAEND + KD + IQAEND = MIN( IQAEND, IQEND ) + DO 170 J = J1, J2, KD1 + IBL = I - I2 / KDM1 + I2 = I2 + 1 + IQB = MAX( 1, J-IBL ) + NQ = 1 + IQAEND - IQB + IQAEND = MIN( IQAEND+KD, IQEND ) + CALL DROT( NQ, Q( IQB, J-1 ), 1, Q( IQB, J ), + $ 1, D( J ), WORK( J ) ) + 170 CONTINUE + ELSE +* + DO 180 J = J1, J2, KD1 + CALL DROT( N, Q( 1, J-1 ), 1, Q( 1, J ), 1, + $ D( J ), WORK( J ) ) + 180 CONTINUE + END IF + END IF +* + IF( J2+KDN.GT.N ) THEN +* +* adjust J2 to keep within the bounds of the matrix +* + NR = NR - 1 + J2 = J2 - KDN - 1 + END IF +* + DO 190 J = J1, J2, KD1 +* +* create nonzero element a(j+kd,j-1) outside the +* band and store it in WORK +* + WORK( J+KD ) = WORK( J )*AB( KD1, J ) + AB( KD1, J ) = D( J )*AB( KD1, J ) + 190 CONTINUE + 200 CONTINUE + 210 CONTINUE + END IF +* + IF( KD.GT.0 ) THEN +* +* copy off-diagonal elements to E +* + DO 220 I = 1, N - 1 + E( I ) = AB( 2, I ) + 220 CONTINUE + ELSE +* +* set E to zero if original matrix was diagonal +* + DO 230 I = 1, N - 1 + E( I ) = ZERO + 230 CONTINUE + END IF +* +* copy diagonal elements to D +* + DO 240 I = 1, N + D( I ) = AB( 1, I ) + 240 CONTINUE + END IF +* + RETURN +* +* End of DSBTRD +* + END diff --git a/costa/native/external/lapack/dsecnd.f b/costa/native/external/lapack/dsecnd.f new file mode 100644 index 000000000..b9c68c15f --- /dev/null +++ b/costa/native/external/lapack/dsecnd.f @@ -0,0 +1,34 @@ + DOUBLE PRECISION FUNCTION DSECND( ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* Purpose +* ======= +* +* DSECND returns the user time for a process in seconds. +* This version gets the time from the system function ETIME. +* +* ===================================================================== +* +* .. Local Scalars .. + REAL T1 +* .. +* .. Local Arrays .. + REAL TARRAY( 2 ) +* .. +* .. External Functions .. + REAL ETIME +C EXTERNAL ETIME +* .. +* .. Executable Statements .. +* + T1 = ETIME( TARRAY ) + DSECND = TARRAY( 1 ) + RETURN +* +* End of DSECND +* + END diff --git a/costa/native/external/lapack/dspcon.f b/costa/native/external/lapack/dspcon.f new file mode 100644 index 000000000..7c6ad1231 --- /dev/null +++ b/costa/native/external/lapack/dspcon.f @@ -0,0 +1,158 @@ + SUBROUTINE DSPCON( UPLO, N, AP, IPIV, ANORM, RCOND, WORK, IWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + DOUBLE PRECISION AP( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DSPCON estimates the reciprocal of the condition number (in the +* 1-norm) of a real symmetric packed matrix A using the factorization +* A = U*D*U**T or A = L*D*L**T computed by DSPTRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the details of the factorization are stored +* as an upper or lower triangular matrix. +* = 'U': Upper triangular, form is A = U*D*U**T; +* = 'L': Lower triangular, form is A = L*D*L**T. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) +* The block diagonal matrix D and the multipliers used to +* obtain the factor U or L as computed by DSPTRF, stored as a +* packed triangular matrix. +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by DSPTRF. +* +* ANORM (input) DOUBLE PRECISION +* The 1-norm of the original matrix A. +* +* RCOND (output) DOUBLE PRECISION +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +* estimate of the 1-norm of inv(A) computed in this routine. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IP, KASE + DOUBLE PRECISION AINVNM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLACON, DSPTRS, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSPCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.LE.ZERO ) THEN + RETURN + END IF +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + IP = N*( N+1 ) / 2 + DO 10 I = N, 1, -1 + IF( IPIV( I ).GT.0 .AND. AP( IP ).EQ.ZERO ) + $ RETURN + IP = IP - I + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + IP = 1 + DO 20 I = 1, N + IF( IPIV( I ).GT.0 .AND. AP( IP ).EQ.ZERO ) + $ RETURN + IP = IP + N - I + 1 + 20 CONTINUE + END IF +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + 30 CONTINUE + CALL DLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN +* +* Multiply by inv(L*D*L') or inv(U*D*U'). +* + CALL DSPTRS( UPLO, N, 1, AP, IPIV, WORK, N, INFO ) + GO TO 30 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of DSPCON +* + END diff --git a/costa/native/external/lapack/dspev.f b/costa/native/external/lapack/dspev.f new file mode 100644 index 000000000..36da8870a --- /dev/null +++ b/costa/native/external/lapack/dspev.f @@ -0,0 +1,188 @@ + SUBROUTINE DSPEV( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AP( * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* DSPEV computes all the eigenvalues and, optionally, eigenvectors of a +* real symmetric matrix A in packed storage. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the symmetric matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* +* On exit, AP is overwritten by values generated during the +* reduction to tridiagonal form. If UPLO = 'U', the diagonal +* and first superdiagonal of the tridiagonal matrix T overwrite +* the corresponding elements of A, and if UPLO = 'L', the +* diagonal and first subdiagonal of T overwrite the +* corresponding elements of A. +* +* W (output) DOUBLE PRECISION array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* Z (output) DOUBLE PRECISION array, dimension (LDZ, N) +* If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +* eigenvectors of the matrix A, with the i-th column of Z +* holding the eigenvector associated with W(i). +* If JOBZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, the algorithm failed to converge; i +* off-diagonal elements of an intermediate tridiagonal +* form did not converge to zero. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL WANTZ + INTEGER IINFO, IMAX, INDE, INDTAU, INDWRK, ISCALE + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANSP + EXTERNAL LSAME, DLAMCH, DLANSP +* .. +* .. External Subroutines .. + EXTERNAL DOPGTR, DSCAL, DSPTRD, DSTEQR, DSTERF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LSAME( UPLO, 'U' ) .OR. LSAME( UPLO, 'L' ) ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -7 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSPEV ', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = AP( 1 ) + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = DLANSP( 'M', UPLO, N, AP, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + CALL DSCAL( ( N*( N+1 ) ) / 2, SIGMA, AP, 1 ) + END IF +* +* Call DSPTRD to reduce symmetric packed matrix to tridiagonal form. +* + INDE = 1 + INDTAU = INDE + N + CALL DSPTRD( UPLO, N, AP, W, WORK( INDE ), WORK( INDTAU ), IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, first call +* DOPGTR to generate the orthogonal matrix, then call DSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, WORK( INDE ), INFO ) + ELSE + INDWRK = INDTAU + N + CALL DOPGTR( UPLO, N, AP, WORK( INDTAU ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) + CALL DSTEQR( JOBZ, N, W, WORK( INDE ), Z, LDZ, WORK( INDTAU ), + $ INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* + RETURN +* +* End of DSPEV +* + END diff --git a/costa/native/external/lapack/dspevd.f b/costa/native/external/lapack/dspevd.f new file mode 100644 index 000000000..217929e25 --- /dev/null +++ b/costa/native/external/lapack/dspevd.f @@ -0,0 +1,249 @@ + SUBROUTINE DSPEVD( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, LWORK, + $ IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION AP( * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* DSPEVD computes all the eigenvalues and, optionally, eigenvectors +* of a real symmetric matrix A in packed storage. If eigenvectors are +* desired, it uses a divide and conquer algorithm. +* +* The divide and conquer algorithm makes very mild assumptions about +* floating point arithmetic. It will work on machines with a guard +* digit in add/subtract, or on those binary machines without guard +* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +* Cray-2. It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the symmetric matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* +* On exit, AP is overwritten by values generated during the +* reduction to tridiagonal form. If UPLO = 'U', the diagonal +* and first superdiagonal of the tridiagonal matrix T overwrite +* the corresponding elements of A, and if UPLO = 'L', the +* diagonal and first subdiagonal of T overwrite the +* corresponding elements of A. +* +* W (output) DOUBLE PRECISION array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* Z (output) DOUBLE PRECISION array, dimension (LDZ, N) +* If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +* eigenvectors of the matrix A, with the i-th column of Z +* holding the eigenvector associated with W(i). +* If JOBZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace/output) DOUBLE PRECISION array, +* dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If N <= 1, LWORK must be at least 1. +* If JOBZ = 'N' and N > 1, LWORK must be at least 2*N. +* If JOBZ = 'V' and N > 1, LWORK must be at least +* 1 + 6*N + N**2. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array IWORK. +* If JOBZ = 'N' or N <= 1, LIWORK must be at least 1. +* If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, the algorithm failed to converge; i +* off-diagonal elements of an intermediate tridiagonal +* form did not converge to zero. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WANTZ + INTEGER IINFO, INDE, INDTAU, INDWRK, ISCALE, LIWMIN, + $ LLWORK, LWMIN + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANSP + EXTERNAL LSAME, DLAMCH, DLANSP +* .. +* .. External Subroutines .. + EXTERNAL DOPMTR, DSCAL, DSPTRD, DSTEDC, DSTERF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LIWMIN = 1 + LWMIN = 1 + ELSE + IF( WANTZ ) THEN + LIWMIN = 3 + 5*N + LWMIN = 1 + 6*N + N**2 + ELSE + LIWMIN = 1 + LWMIN = 2*N + END IF + END IF + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LSAME( UPLO, 'U' ) .OR. LSAME( UPLO, 'L' ) ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -7 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -9 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSPEVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = AP( 1 ) + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = DLANSP( 'M', UPLO, N, AP, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + CALL DSCAL( ( N*( N+1 ) ) / 2, SIGMA, AP, 1 ) + END IF +* +* Call DSPTRD to reduce symmetric packed matrix to tridiagonal form. +* + INDE = 1 + INDTAU = INDE + N + CALL DSPTRD( UPLO, N, AP, W, WORK( INDE ), WORK( INDTAU ), IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, first call +* DSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the +* tridiagonal matrix, then call DOPMTR to multiply it by the +* Householder transformations represented in AP. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, WORK( INDE ), INFO ) + ELSE + INDWRK = INDTAU + N + LLWORK = LWORK - INDWRK + 1 + CALL DSTEDC( 'I', N, W, WORK( INDE ), Z, LDZ, WORK( INDWRK ), + $ LLWORK, IWORK, LIWORK, INFO ) + CALL DOPMTR( 'L', UPLO, 'N', N, N, AP, WORK( INDTAU ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) + $ CALL DSCAL( N, ONE / SIGMA, W, 1 ) +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of DSPEVD +* + END diff --git a/costa/native/external/lapack/dspevx.f b/costa/native/external/lapack/dspevx.f new file mode 100644 index 000000000..d1ad6e9e9 --- /dev/null +++ b/costa/native/external/lapack/dspevx.f @@ -0,0 +1,377 @@ + SUBROUTINE DSPEVX( JOBZ, RANGE, UPLO, N, AP, VL, VU, IL, IU, + $ ABSTOL, M, W, Z, LDZ, WORK, IWORK, IFAIL, + $ INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDZ, M, N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + DOUBLE PRECISION AP( * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* DSPEVX computes selected eigenvalues and, optionally, eigenvectors +* of a real symmetric matrix A in packed storage. Eigenvalues/vectors +* can be selected by specifying either a range of values or a range of +* indices for the desired eigenvalues. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* RANGE (input) CHARACTER*1 +* = 'A': all eigenvalues will be found; +* = 'V': all eigenvalues in the half-open interval (VL,VU] +* will be found; +* = 'I': the IL-th through IU-th eigenvalues will be found. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the symmetric matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* +* On exit, AP is overwritten by values generated during the +* reduction to tridiagonal form. If UPLO = 'U', the diagonal +* and first superdiagonal of the tridiagonal matrix T overwrite +* the corresponding elements of A, and if UPLO = 'L', the +* diagonal and first subdiagonal of T overwrite the +* corresponding elements of A. +* +* VL (input) DOUBLE PRECISION +* VU (input) DOUBLE PRECISION +* If RANGE='V', the lower and upper bounds of the interval to +* be searched for eigenvalues. VL < VU. +* Not referenced if RANGE = 'A' or 'I'. +* +* IL (input) INTEGER +* IU (input) INTEGER +* If RANGE='I', the indices (in ascending order) of the +* smallest and largest eigenvalues to be returned. +* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +* Not referenced if RANGE = 'A' or 'V'. +* +* ABSTOL (input) DOUBLE PRECISION +* The absolute error tolerance for the eigenvalues. +* An approximate eigenvalue is accepted as converged +* when it is determined to lie in an interval [a,b] +* of width less than or equal to +* +* ABSTOL + EPS * max( |a|,|b| ) , +* +* where EPS is the machine precision. If ABSTOL is less than +* or equal to zero, then EPS*|T| will be used in its place, +* where |T| is the 1-norm of the tridiagonal matrix obtained +* by reducing AP to tridiagonal form. +* +* Eigenvalues will be computed most accurately when ABSTOL is +* set to twice the underflow threshold 2*DLAMCH('S'), not zero. +* If this routine returns with INFO>0, indicating that some +* eigenvectors did not converge, try setting ABSTOL to +* 2*DLAMCH('S'). +* +* See "Computing Small Singular Values of Bidiagonal Matrices +* with Guaranteed High Relative Accuracy," by Demmel and +* Kahan, LAPACK Working Note #3. +* +* M (output) INTEGER +* The total number of eigenvalues found. 0 <= M <= N. +* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +* +* W (output) DOUBLE PRECISION array, dimension (N) +* If INFO = 0, the selected eigenvalues in ascending order. +* +* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M)) +* If JOBZ = 'V', then if INFO = 0, the first M columns of Z +* contain the orthonormal eigenvectors of the matrix A +* corresponding to the selected eigenvalues, with the i-th +* column of Z holding the eigenvector associated with W(i). +* If an eigenvector fails to converge, then that column of Z +* contains the latest approximation to the eigenvector, and the +* index of the eigenvector is returned in IFAIL. +* If JOBZ = 'N', then Z is not referenced. +* Note: the user must ensure that at least max(1,M) columns are +* supplied in the array Z; if RANGE = 'V', the exact value of M +* is not known in advance and an upper bound must be used. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (8*N) +* +* IWORK (workspace) INTEGER array, dimension (5*N) +* +* IFAIL (output) INTEGER array, dimension (N) +* If JOBZ = 'V', then if INFO = 0, the first M elements of +* IFAIL are zero. If INFO > 0, then IFAIL contains the +* indices of the eigenvectors that failed to converge. +* If JOBZ = 'N', then IFAIL is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, then i eigenvectors failed to converge. +* Their indices are stored in array IFAIL. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, VALEIG, WANTZ + CHARACTER ORDER + INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, + $ INDISP, INDIWO, INDTAU, INDWRK, ISCALE, ITMP1, + $ J, JJ, NSPLIT + DOUBLE PRECISION ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANSP + EXTERNAL LSAME, DLAMCH, DLANSP +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DOPGTR, DOPMTR, DSCAL, DSPTRD, DSTEBZ, + $ DSTEIN, DSTEQR, DSTERF, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LSAME( UPLO, 'L' ) .OR. LSAME( UPLO, 'U' ) ) ) + $ THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -7 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -9 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) + $ INFO = -14 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSPEVX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = AP( 1 ) + ELSE + IF( VL.LT.AP( 1 ) .AND. VU.GE.AP( 1 ) ) THEN + M = 1 + W( 1 ) = AP( 1 ) + END IF + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF( VALEIG ) THEN + VLL = VL + VUU = VU + ELSE + VLL = ZERO + VUU = ZERO + END IF + ANRM = DLANSP( 'M', UPLO, N, AP, WORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + CALL DSCAL( ( N*( N+1 ) ) / 2, SIGMA, AP, 1 ) + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call DSPTRD to reduce symmetric packed matrix to tridiagonal form. +* + INDTAU = 1 + INDE = INDTAU + N + INDD = INDE + N + INDWRK = INDD + N + CALL DSPTRD( UPLO, N, AP, WORK( INDD ), WORK( INDE ), + $ WORK( INDTAU ), IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal +* to zero, then call DSTERF or DOPGTR and SSTEQR. If this fails +* for some eigenvalue, then try DSTEBZ. +* + IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND. + $ ( ABSTOL.LE.ZERO ) ) THEN + CALL DCOPY( N, WORK( INDD ), 1, W, 1 ) + INDEE = INDWRK + 2*N + IF( .NOT.WANTZ ) THEN + CALL DCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL DSTERF( N, W, WORK( INDEE ), INFO ) + ELSE + CALL DOPGTR( UPLO, N, AP, WORK( INDTAU ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) + CALL DCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL DSTEQR( JOBZ, N, W, WORK( INDEE ), Z, LDZ, + $ WORK( INDWRK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 10 I = 1, N + IFAIL( I ) = 0 + 10 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 20 + END IF + INFO = 0 + END IF +* +* Otherwise, call DSTEBZ and, if eigenvectors are desired, SSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWO = INDISP + N + CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ WORK( INDD ), WORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWRK ), + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL DSTEIN( N, WORK( INDD ), WORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ WORK( INDWRK ), IWORK( INDIWO ), IFAIL, INFO ) +* +* Apply orthogonal matrix used in reduction to tridiagonal +* form to eigenvectors returned by DSTEIN. +* + CALL DOPMTR( 'L', UPLO, 'N', N, M, AP, WORK( INDTAU ), Z, LDZ, + $ WORK( INDWRK ), INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 20 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 40 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 30 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 30 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL DSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 40 CONTINUE + END IF +* + RETURN +* +* End of DSPEVX +* + END diff --git a/costa/native/external/lapack/dspgst.f b/costa/native/external/lapack/dspgst.f new file mode 100644 index 000000000..b6e00f25b --- /dev/null +++ b/costa/native/external/lapack/dspgst.f @@ -0,0 +1,209 @@ + SUBROUTINE DSPGST( ITYPE, UPLO, N, AP, BP, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, ITYPE, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AP( * ), BP( * ) +* .. +* +* Purpose +* ======= +* +* DSPGST reduces a real symmetric-definite generalized eigenproblem +* to standard form, using packed storage. +* +* If ITYPE = 1, the problem is A*x = lambda*B*x, +* and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) +* +* If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or +* B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L. +* +* B must have been previously factorized as U**T*U or L*L**T by DPPTRF. +* +* Arguments +* ========= +* +* ITYPE (input) INTEGER +* = 1: compute inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T); +* = 2 or 3: compute U*A*U**T or L**T*A*L. +* +* UPLO (input) CHARACTER +* = 'U': Upper triangle of A is stored and B is factored as +* U**T*U; +* = 'L': Lower triangle of A is stored and B is factored as +* L*L**T. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the symmetric matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* +* On exit, if INFO = 0, the transformed matrix, stored in the +* same format as A. +* +* BP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) +* The triangular factor from the Cholesky factorization of B, +* stored in the same format as A, as returned by DPPTRF. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, HALF + PARAMETER ( ONE = 1.0D0, HALF = 0.5D0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, J1, J1J1, JJ, K, K1, K1K1, KK + DOUBLE PRECISION AJJ, AKK, BJJ, BKK, CT +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DSCAL, DSPMV, DSPR2, DTPMV, DTPSV, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DDOT + EXTERNAL LSAME, DDOT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSPGST', -INFO ) + RETURN + END IF +* + IF( ITYPE.EQ.1 ) THEN + IF( UPPER ) THEN +* +* Compute inv(U')*A*inv(U) +* +* J1 and JJ are the indices of A(1,j) and A(j,j) +* + JJ = 0 + DO 10 J = 1, N + J1 = JJ + 1 + JJ = JJ + J +* +* Compute the j-th column of the upper triangle of A +* + BJJ = BP( JJ ) + CALL DTPSV( UPLO, 'Transpose', 'Nonunit', J, BP, + $ AP( J1 ), 1 ) + CALL DSPMV( UPLO, J-1, -ONE, AP, BP( J1 ), 1, ONE, + $ AP( J1 ), 1 ) + CALL DSCAL( J-1, ONE / BJJ, AP( J1 ), 1 ) + AP( JJ ) = ( AP( JJ )-DDOT( J-1, AP( J1 ), 1, BP( J1 ), + $ 1 ) ) / BJJ + 10 CONTINUE + ELSE +* +* Compute inv(L)*A*inv(L') +* +* KK and K1K1 are the indices of A(k,k) and A(k+1,k+1) +* + KK = 1 + DO 20 K = 1, N + K1K1 = KK + N - K + 1 +* +* Update the lower triangle of A(k:n,k:n) +* + AKK = AP( KK ) + BKK = BP( KK ) + AKK = AKK / BKK**2 + AP( KK ) = AKK + IF( K.LT.N ) THEN + CALL DSCAL( N-K, ONE / BKK, AP( KK+1 ), 1 ) + CT = -HALF*AKK + CALL DAXPY( N-K, CT, BP( KK+1 ), 1, AP( KK+1 ), 1 ) + CALL DSPR2( UPLO, N-K, -ONE, AP( KK+1 ), 1, + $ BP( KK+1 ), 1, AP( K1K1 ) ) + CALL DAXPY( N-K, CT, BP( KK+1 ), 1, AP( KK+1 ), 1 ) + CALL DTPSV( UPLO, 'No transpose', 'Non-unit', N-K, + $ BP( K1K1 ), AP( KK+1 ), 1 ) + END IF + KK = K1K1 + 20 CONTINUE + END IF + ELSE + IF( UPPER ) THEN +* +* Compute U*A*U' +* +* K1 and KK are the indices of A(1,k) and A(k,k) +* + KK = 0 + DO 30 K = 1, N + K1 = KK + 1 + KK = KK + K +* +* Update the upper triangle of A(1:k,1:k) +* + AKK = AP( KK ) + BKK = BP( KK ) + CALL DTPMV( UPLO, 'No transpose', 'Non-unit', K-1, BP, + $ AP( K1 ), 1 ) + CT = HALF*AKK + CALL DAXPY( K-1, CT, BP( K1 ), 1, AP( K1 ), 1 ) + CALL DSPR2( UPLO, K-1, ONE, AP( K1 ), 1, BP( K1 ), 1, + $ AP ) + CALL DAXPY( K-1, CT, BP( K1 ), 1, AP( K1 ), 1 ) + CALL DSCAL( K-1, BKK, AP( K1 ), 1 ) + AP( KK ) = AKK*BKK**2 + 30 CONTINUE + ELSE +* +* Compute L'*A*L +* +* JJ and J1J1 are the indices of A(j,j) and A(j+1,j+1) +* + JJ = 1 + DO 40 J = 1, N + J1J1 = JJ + N - J + 1 +* +* Compute the j-th column of the lower triangle of A +* + AJJ = AP( JJ ) + BJJ = BP( JJ ) + AP( JJ ) = AJJ*BJJ + DDOT( N-J, AP( JJ+1 ), 1, + $ BP( JJ+1 ), 1 ) + CALL DSCAL( N-J, BJJ, AP( JJ+1 ), 1 ) + CALL DSPMV( UPLO, N-J, ONE, AP( J1J1 ), BP( JJ+1 ), 1, + $ ONE, AP( JJ+1 ), 1 ) + CALL DTPMV( UPLO, 'Transpose', 'Non-unit', N-J+1, + $ BP( JJ ), AP( JJ ), 1 ) + JJ = J1J1 + 40 CONTINUE + END IF + END IF + RETURN +* +* End of DSPGST +* + END diff --git a/costa/native/external/lapack/dspgv.f b/costa/native/external/lapack/dspgv.f new file mode 100644 index 000000000..672af289e --- /dev/null +++ b/costa/native/external/lapack/dspgv.f @@ -0,0 +1,196 @@ + SUBROUTINE DSPGV( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDZ, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AP( * ), BP( * ), W( * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* DSPGV computes all the eigenvalues and, optionally, the eigenvectors +* of a real generalized symmetric-definite eigenproblem, of the form +* A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. +* Here A and B are assumed to be symmetric, stored in packed format, +* and B is also positive definite. +* +* Arguments +* ========= +* +* ITYPE (input) INTEGER +* Specifies the problem type to be solved: +* = 1: A*x = (lambda)*B*x +* = 2: A*B*x = (lambda)*x +* = 3: B*A*x = (lambda)*x +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangles of A and B are stored; +* = 'L': Lower triangles of A and B are stored. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* AP (input/output) DOUBLE PRECISION array, dimension +* (N*(N+1)/2) +* On entry, the upper or lower triangle of the symmetric matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* +* On exit, the contents of AP are destroyed. +* +* BP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the symmetric matrix +* B, packed columnwise in a linear array. The j-th column of B +* is stored in the array BP as follows: +* if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j; +* if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n. +* +* On exit, the triangular factor U or L from the Cholesky +* factorization B = U**T*U or B = L*L**T, in the same storage +* format as B. +* +* W (output) DOUBLE PRECISION array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* Z (output) DOUBLE PRECISION array, dimension (LDZ, N) +* If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of +* eigenvectors. The eigenvectors are normalized as follows: +* if ITYPE = 1 or 2, Z**T*B*Z = I; +* if ITYPE = 3, Z**T*inv(B)*Z = I. +* If JOBZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: DPPTRF or DSPEV returned an error code: +* <= N: if INFO = i, DSPEV failed to converge; +* i off-diagonal elements of an intermediate +* tridiagonal form did not converge to zero. +* > N: if INFO = n + i, for 1 <= i <= n, then the leading +* minor of order i of B is not positive definite. +* The factorization of B could not be completed and +* no eigenvalues or eigenvectors were computed. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER, WANTZ + CHARACTER TRANS + INTEGER J, NEIG +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DPPTRF, DSPEV, DSPGST, DTPMV, DTPSV, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) +* + INFO = 0 + IF( ITYPE.LT.0 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSPGV ', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL DPPTRF( UPLO, N, BP, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL DSPGST( ITYPE, UPLO, N, AP, BP, INFO ) + CALL DSPEV( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, INFO ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + NEIG = N + IF( INFO.GT.0 ) + $ NEIG = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)'*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'T' + END IF +* + DO 10 J = 1, NEIG + CALL DTPSV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 10 CONTINUE +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U'*y +* + IF( UPPER ) THEN + TRANS = 'T' + ELSE + TRANS = 'N' + END IF +* + DO 20 J = 1, NEIG + CALL DTPMV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 20 CONTINUE + END IF + END IF + RETURN +* +* End of DSPGV +* + END diff --git a/costa/native/external/lapack/dspgvd.f b/costa/native/external/lapack/dspgvd.f new file mode 100644 index 000000000..049c1015e --- /dev/null +++ b/costa/native/external/lapack/dspgvd.f @@ -0,0 +1,281 @@ + SUBROUTINE DSPGVD( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK, + $ LWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION AP( * ), BP( * ), W( * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* DSPGVD computes all the eigenvalues, and optionally, the eigenvectors +* of a real generalized symmetric-definite eigenproblem, of the form +* A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and +* B are assumed to be symmetric, stored in packed format, and B is also +* positive definite. +* If eigenvectors are desired, it uses a divide and conquer algorithm. +* +* The divide and conquer algorithm makes very mild assumptions about +* floating point arithmetic. It will work on machines with a guard +* digit in add/subtract, or on those binary machines without guard +* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +* Cray-2. It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* ITYPE (input) INTEGER +* Specifies the problem type to be solved: +* = 1: A*x = (lambda)*B*x +* = 2: A*B*x = (lambda)*x +* = 3: B*A*x = (lambda)*x +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangles of A and B are stored; +* = 'L': Lower triangles of A and B are stored. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the symmetric matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* +* On exit, the contents of AP are destroyed. +* +* BP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the symmetric matrix +* B, packed columnwise in a linear array. The j-th column of B +* is stored in the array BP as follows: +* if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j; +* if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n. +* +* On exit, the triangular factor U or L from the Cholesky +* factorization B = U**T*U or B = L*L**T, in the same storage +* format as B. +* +* W (output) DOUBLE PRECISION array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* Z (output) DOUBLE PRECISION array, dimension (LDZ, N) +* If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of +* eigenvectors. The eigenvectors are normalized as follows: +* if ITYPE = 1 or 2, Z**T*B*Z = I; +* if ITYPE = 3, Z**T*inv(B)*Z = I. +* If JOBZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If N <= 1, LWORK >= 1. +* If JOBZ = 'N' and N > 1, LWORK >= 2*N. +* If JOBZ = 'V' and N > 1, LWORK >= 1 + 6*N + 2*N**2. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array IWORK. +* If JOBZ = 'N' or N <= 1, LIWORK >= 1. +* If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: DPPTRF or DSPEVD returned an error code: +* <= N: if INFO = i, DSPEVD failed to converge; +* i off-diagonal elements of an intermediate +* tridiagonal form did not converge to zero; +* > N: if INFO = N + i, for 1 <= i <= N, then the leading +* minor of order i of B is not positive definite. +* The factorization of B could not be completed and +* no eigenvalues or eigenvectors were computed. +* +* Further Details +* =============== +* +* Based on contributions by +* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER TRANS + INTEGER J, LGN, LIWMIN, LWMIN, NEIG +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DPPTRF, DSPEVD, DSPGST, DTPMV, DTPSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, INT, LOG, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LGN = 0 + LIWMIN = 1 + LWMIN = 1 + ELSE + LGN = INT( LOG( DBLE( N ) ) / LOG( TWO ) ) + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IF( WANTZ ) THEN + LIWMIN = 3 + 5*N + LWMIN = 1 + 5*N + 2*N*LGN + 2*N**2 + ELSE + LIWMIN = 1 + LWMIN = 2*N + END IF + END IF +* + IF( ITYPE.LT.0 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDZ.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSPGVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of BP. +* + CALL DPPTRF( UPLO, N, BP, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL DSPGST( ITYPE, UPLO, N, AP, BP, INFO ) + CALL DSPEVD( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, LWORK, IWORK, + $ LIWORK, INFO ) + LWMIN = MAX( DBLE( LWMIN ), DBLE( WORK( 1 ) ) ) + LIWMIN = MAX( DBLE( LIWMIN ), DBLE( IWORK( 1 ) ) ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + NEIG = N + IF( INFO.GT.0 ) + $ NEIG = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)'*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'T' + END IF +* + DO 10 J = 1, NEIG + CALL DTPSV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 10 CONTINUE +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U'*y +* + IF( UPPER ) THEN + TRANS = 'T' + ELSE + TRANS = 'N' + END IF +* + DO 20 J = 1, NEIG + CALL DTPMV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 20 CONTINUE + END IF + END IF +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of DSPGVD +* + END diff --git a/costa/native/external/lapack/dspgvx.f b/costa/native/external/lapack/dspgvx.f new file mode 100644 index 000000000..4d2c0d16d --- /dev/null +++ b/costa/native/external/lapack/dspgvx.f @@ -0,0 +1,286 @@ + SUBROUTINE DSPGVX( ITYPE, JOBZ, RANGE, UPLO, N, AP, BP, VL, VU, + $ IL, IU, ABSTOL, M, W, Z, LDZ, WORK, IWORK, + $ IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, ITYPE, IU, LDZ, M, N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + DOUBLE PRECISION AP( * ), BP( * ), W( * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* DSPGVX computes selected eigenvalues, and optionally, eigenvectors +* of a real generalized symmetric-definite eigenproblem, of the form +* A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A +* and B are assumed to be symmetric, stored in packed storage, and B +* is also positive definite. Eigenvalues and eigenvectors can be +* selected by specifying either a range of values or a range of indices +* for the desired eigenvalues. +* +* Arguments +* ========= +* +* ITYPE (input) INTEGER +* Specifies the problem type to be solved: +* = 1: A*x = (lambda)*B*x +* = 2: A*B*x = (lambda)*x +* = 3: B*A*x = (lambda)*x +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* RANGE (input) CHARACTER*1 +* = 'A': all eigenvalues will be found. +* = 'V': all eigenvalues in the half-open interval (VL,VU] +* will be found. +* = 'I': the IL-th through IU-th eigenvalues will be found. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A and B are stored; +* = 'L': Lower triangle of A and B are stored. +* +* N (input) INTEGER +* The order of the matrix pencil (A,B). N >= 0. +* +* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the symmetric matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* +* On exit, the contents of AP are destroyed. +* +* BP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the symmetric matrix +* B, packed columnwise in a linear array. The j-th column of B +* is stored in the array BP as follows: +* if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j; +* if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n. +* +* On exit, the triangular factor U or L from the Cholesky +* factorization B = U**T*U or B = L*L**T, in the same storage +* format as B. +* +* VL (input) DOUBLE PRECISION +* VU (input) DOUBLE PRECISION +* If RANGE='V', the lower and upper bounds of the interval to +* be searched for eigenvalues. VL < VU. +* Not referenced if RANGE = 'A' or 'I'. +* +* IL (input) INTEGER +* IU (input) INTEGER +* If RANGE='I', the indices (in ascending order) of the +* smallest and largest eigenvalues to be returned. +* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +* Not referenced if RANGE = 'A' or 'V'. +* +* ABSTOL (input) DOUBLE PRECISION +* The absolute error tolerance for the eigenvalues. +* An approximate eigenvalue is accepted as converged +* when it is determined to lie in an interval [a,b] +* of width less than or equal to +* +* ABSTOL + EPS * max( |a|,|b| ) , +* +* where EPS is the machine precision. If ABSTOL is less than +* or equal to zero, then EPS*|T| will be used in its place, +* where |T| is the 1-norm of the tridiagonal matrix obtained +* by reducing A to tridiagonal form. +* +* Eigenvalues will be computed most accurately when ABSTOL is +* set to twice the underflow threshold 2*DLAMCH('S'), not zero. +* If this routine returns with INFO>0, indicating that some +* eigenvectors did not converge, try setting ABSTOL to +* 2*DLAMCH('S'). +* +* M (output) INTEGER +* The total number of eigenvalues found. 0 <= M <= N. +* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +* +* W (output) DOUBLE PRECISION array, dimension (N) +* On normal exit, the first M elements contain the selected +* eigenvalues in ascending order. +* +* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M)) +* If JOBZ = 'N', then Z is not referenced. +* If JOBZ = 'V', then if INFO = 0, the first M columns of Z +* contain the orthonormal eigenvectors of the matrix A +* corresponding to the selected eigenvalues, with the i-th +* column of Z holding the eigenvector associated with W(i). +* The eigenvectors are normalized as follows: +* if ITYPE = 1 or 2, Z**T*B*Z = I; +* if ITYPE = 3, Z**T*inv(B)*Z = I. +* +* If an eigenvector fails to converge, then that column of Z +* contains the latest approximation to the eigenvector, and the +* index of the eigenvector is returned in IFAIL. +* Note: the user must ensure that at least max(1,M) columns are +* supplied in the array Z; if RANGE = 'V', the exact value of M +* is not known in advance and an upper bound must be used. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (8*N) +* +* IWORK (workspace) INTEGER array, dimension (5*N) +* +* IFAIL (output) INTEGER array, dimension (N) +* If JOBZ = 'V', then if INFO = 0, the first M elements of +* IFAIL are zero. If INFO > 0, then IFAIL contains the +* indices of the eigenvectors that failed to converge. +* If JOBZ = 'N', then IFAIL is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: DPPTRF or DSPEVX returned an error code: +* <= N: if INFO = i, DSPEVX failed to converge; +* i eigenvectors failed to converge. Their indices +* are stored in array IFAIL. +* > N: if INFO = N + i, for 1 <= i <= N, then the leading +* minor of order i of B is not positive definite. +* The factorization of B could not be completed and +* no eigenvalues or eigenvectors were computed. +* +* Further Details +* =============== +* +* Based on contributions by +* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, UPPER, VALEIG, WANTZ + CHARACTER TRANS + INTEGER J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DPPTRF, DSPEVX, DSPGST, DTPMV, DTPSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + UPPER = LSAME( UPLO, 'U' ) + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + INFO = 0 + IF( ITYPE.LT.0 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -3 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN + INFO = -9 + ELSE IF( INDEIG .AND. IL.LT.1 ) THEN + INFO = -10 + ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) THEN + INFO = -11 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -16 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSPGVX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Form a Cholesky factorization of B. +* + CALL DPPTRF( UPLO, N, BP, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL DSPGST( ITYPE, UPLO, N, AP, BP, INFO ) + CALL DSPEVX( JOBZ, RANGE, UPLO, N, AP, VL, VU, IL, IU, ABSTOL, M, + $ W, Z, LDZ, WORK, IWORK, IFAIL, INFO ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + IF( INFO.GT.0 ) + $ M = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)'*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'T' + END IF +* + DO 10 J = 1, M + CALL DTPSV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 10 CONTINUE +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U'*y +* + IF( UPPER ) THEN + TRANS = 'T' + ELSE + TRANS = 'N' + END IF +* + DO 20 J = 1, M + CALL DTPMV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 20 CONTINUE + END IF + END IF +* + RETURN +* +* End of DSPGVX +* + END diff --git a/costa/native/external/lapack/dsprfs.f b/costa/native/external/lapack/dsprfs.f new file mode 100644 index 000000000..47aaaefd8 --- /dev/null +++ b/costa/native/external/lapack/dsprfs.f @@ -0,0 +1,331 @@ + SUBROUTINE DSPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX, + $ FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + DOUBLE PRECISION AFP( * ), AP( * ), B( LDB, * ), BERR( * ), + $ FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* DSPRFS improves the computed solution to a system of linear +* equations when the coefficient matrix is symmetric indefinite +* and packed, and provides error bounds and backward error estimates +* for the solution. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) +* The upper or lower triangle of the symmetric matrix A, packed +* columnwise in a linear array. The j-th column of A is stored +* in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* +* AFP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) +* The factored form of the matrix A. AFP contains the block +* diagonal matrix D and the multipliers used to obtain the +* factor U or L from the factorization A = U*D*U**T or +* A = L*D*L**T as computed by DSPTRF, stored as a packed +* triangular matrix. +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by DSPTRF. +* +* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) +* On entry, the solution matrix X, as computed by DSPTRS. +* On exit, the improved solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Internal Parameters +* =================== +* +* ITMAX is the maximum number of steps of iterative refinement. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) + DOUBLE PRECISION THREE + PARAMETER ( THREE = 3.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, IK, J, K, KASE, KK, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DLACON, DSPMV, DSPTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSPRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL DCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 ) + CALL DSPMV( UPLO, N, -ONE, AP, X( 1, J ), 1, ONE, WORK( N+1 ), + $ 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + WORK( I ) = ABS( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + KK = 1 + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = ABS( X( K, J ) ) + IK = KK + DO 40 I = 1, K - 1 + WORK( I ) = WORK( I ) + ABS( AP( IK ) )*XK + S = S + ABS( AP( IK ) )*ABS( X( I, J ) ) + IK = IK + 1 + 40 CONTINUE + WORK( K ) = WORK( K ) + ABS( AP( KK+K-1 ) )*XK + S + KK = KK + K + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = ABS( X( K, J ) ) + WORK( K ) = WORK( K ) + ABS( AP( KK ) )*XK + IK = KK + 1 + DO 60 I = K + 1, N + WORK( I ) = WORK( I ) + ABS( AP( IK ) )*XK + S = S + ABS( AP( IK ) )*ABS( X( I, J ) ) + IK = IK + 1 + 60 CONTINUE + WORK( K ) = WORK( K ) + S + KK = KK + ( N-K+1 ) + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL DSPTRS( UPLO, N, 1, AFP, IPIV, WORK( N+1 ), N, INFO ) + CALL DAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use DLACON to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL DLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A'). +* + CALL DSPTRS( UPLO, N, 1, AFP, IPIV, WORK( N+1 ), N, + $ INFO ) + DO 110 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 120 CONTINUE + CALL DSPTRS( UPLO, N, 1, AFP, IPIV, WORK( N+1 ), N, + $ INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of DSPRFS +* + END diff --git a/costa/native/external/lapack/dspsv.f b/costa/native/external/lapack/dspsv.f new file mode 100644 index 000000000..412684b32 --- /dev/null +++ b/costa/native/external/lapack/dspsv.f @@ -0,0 +1,149 @@ + SUBROUTINE DSPSV( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION AP( * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* DSPSV computes the solution to a real system of linear equations +* A * X = B, +* where A is an N-by-N symmetric matrix stored in packed format and X +* and B are N-by-NRHS matrices. +* +* The diagonal pivoting method is used to factor A as +* A = U * D * U**T, if UPLO = 'U', or +* A = L * D * L**T, if UPLO = 'L', +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices, D is symmetric and block diagonal with 1-by-1 +* and 2-by-2 diagonal blocks. The factored form of A is then used to +* solve the system of equations A * X = B. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the symmetric matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* See below for further details. +* +* On exit, the block diagonal matrix D and the multipliers used +* to obtain the factor U or L from the factorization +* A = U*D*U**T or A = L*D*L**T as computed by DSPTRF, stored as +* a packed triangular matrix in the same storage format as A. +* +* IPIV (output) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D, as +* determined by DSPTRF. If IPIV(k) > 0, then rows and columns +* k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1 +* diagonal block. If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, +* then rows and columns k-1 and -IPIV(k) were interchanged and +* D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If UPLO = 'L' and +* IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and +* -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 +* diagonal block. +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, D(i,i) is exactly zero. The factorization +* has been completed, but the block diagonal matrix D is +* exactly singular, so the solution could not be +* computed. +* +* Further Details +* =============== +* +* The packed storage scheme is illustrated by the following example +* when N = 4, UPLO = 'U': +* +* Two-dimensional storage of the symmetric matrix A: +* +* a11 a12 a13 a14 +* a22 a23 a24 +* a33 a34 (aij = aji) +* a44 +* +* Packed storage of the upper triangle of A: +* +* AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DSPTRF, DSPTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSPSV ', -INFO ) + RETURN + END IF +* +* Compute the factorization A = U*D*U' or A = L*D*L'. +* + CALL DSPTRF( UPLO, N, AP, IPIV, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL DSPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* + END IF + RETURN +* +* End of DSPSV +* + END diff --git a/costa/native/external/lapack/dspsvx.f b/costa/native/external/lapack/dspsvx.f new file mode 100644 index 000000000..a414be28a --- /dev/null +++ b/costa/native/external/lapack/dspsvx.f @@ -0,0 +1,279 @@ + SUBROUTINE DSPSVX( FACT, UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, + $ LDX, RCOND, FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER FACT, UPLO + INTEGER INFO, LDB, LDX, N, NRHS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + DOUBLE PRECISION AFP( * ), AP( * ), B( LDB, * ), BERR( * ), + $ FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* DSPSVX uses the diagonal pivoting factorization A = U*D*U**T or +* A = L*D*L**T to compute the solution to a real system of linear +* equations A * X = B, where A is an N-by-N symmetric matrix stored +* in packed format and X and B are N-by-NRHS matrices. +* +* Error bounds on the solution and a condition estimate are also +* provided. +* +* Description +* =========== +* +* The following steps are performed: +* +* 1. If FACT = 'N', the diagonal pivoting method is used to factor A as +* A = U * D * U**T, if UPLO = 'U', or +* A = L * D * L**T, if UPLO = 'L', +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices and D is symmetric and block diagonal with +* 1-by-1 and 2-by-2 diagonal blocks. +* +* 2. If some D(i,i)=0, so that D is exactly singular, then the routine +* returns with INFO = i. Otherwise, the factored form of A is used +* to estimate the condition number of the matrix A. If the +* reciprocal of the condition number is less than machine precision, +* INFO = N+1 is returned as a warning, but the routine still goes on +* to solve for X and compute error bounds as described below. +* +* 3. The system of equations is solved for X using the factored form +* of A. +* +* 4. Iterative refinement is applied to improve the computed solution +* matrix and calculate error bounds and backward error estimates +* for it. +* +* Arguments +* ========= +* +* FACT (input) CHARACTER*1 +* Specifies whether or not the factored form of A has been +* supplied on entry. +* = 'F': On entry, AFP and IPIV contain the factored form of +* A. AP, AFP and IPIV will not be modified. +* = 'N': The matrix A will be copied to AFP and factored. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) +* The upper or lower triangle of the symmetric matrix A, packed +* columnwise in a linear array. The j-th column of A is stored +* in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* See below for further details. +* +* AFP (input or output) DOUBLE PRECISION array, dimension +* (N*(N+1)/2) +* If FACT = 'F', then AFP is an input argument and on entry +* contains the block diagonal matrix D and the multipliers used +* to obtain the factor U or L from the factorization +* A = U*D*U**T or A = L*D*L**T as computed by DSPTRF, stored as +* a packed triangular matrix in the same storage format as A. +* +* If FACT = 'N', then AFP is an output argument and on exit +* contains the block diagonal matrix D and the multipliers used +* to obtain the factor U or L from the factorization +* A = U*D*U**T or A = L*D*L**T as computed by DSPTRF, stored as +* a packed triangular matrix in the same storage format as A. +* +* IPIV (input or output) INTEGER array, dimension (N) +* If FACT = 'F', then IPIV is an input argument and on entry +* contains details of the interchanges and the block structure +* of D, as determined by DSPTRF. +* If IPIV(k) > 0, then rows and columns k and IPIV(k) were +* interchanged and D(k,k) is a 1-by-1 diagonal block. +* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +* +* If FACT = 'N', then IPIV is an output argument and on exit +* contains details of the interchanges and the block structure +* of D, as determined by DSPTRF. +* +* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) +* The N-by-NRHS right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (output) DOUBLE PRECISION array, dimension (LDX,NRHS) +* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* RCOND (output) DOUBLE PRECISION +* The estimate of the reciprocal condition number of the matrix +* A. If RCOND is less than the machine precision (in +* particular, if RCOND = 0), the matrix is singular to working +* precision. This condition is indicated by a return code of +* INFO > 0. +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is +* <= N: D(i,i) is exactly zero. The factorization +* has been completed but the factor D is exactly +* singular, so the solution and error bounds could +* not be computed. RCOND = 0 is returned. +* = N+1: D is nonsingular, but RCOND is less than machine +* precision, meaning that the matrix is singular +* to working precision. Nevertheless, the +* solution and error bounds are computed because +* there are a number of situations where the +* computed solution can be more accurate than the +* value of RCOND would suggest. +* +* Further Details +* =============== +* +* The packed storage scheme is illustrated by the following example +* when N = 4, UPLO = 'U': +* +* Two-dimensional storage of the symmetric matrix A: +* +* a11 a12 a13 a14 +* a22 a23 a24 +* a33 a34 (aij = aji) +* a44 +* +* Packed storage of the upper triangle of A: +* +* AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOFACT + DOUBLE PRECISION ANORM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANSP + EXTERNAL LSAME, DLAMCH, DLANSP +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLACPY, DSPCON, DSPRFS, DSPTRF, DSPTRS, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSPSVX', -INFO ) + RETURN + END IF +* + IF( NOFACT ) THEN +* +* Compute the factorization A = U*D*U' or A = L*D*L'. +* + CALL DCOPY( N*( N+1 ) / 2, AP, 1, AFP, 1 ) + CALL DSPTRF( UPLO, N, AFP, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN + IF( INFO.GT.0 ) + $ RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = DLANSP( 'I', UPLO, N, AP, WORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL DSPCON( UPLO, N, AFP, IPIV, ANORM, RCOND, WORK, IWORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.DLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* +* Compute the solution vectors X. +* + CALL DLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL DSPTRS( UPLO, N, NRHS, AFP, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solutions and +* compute error bounds and backward error estimates for them. +* + CALL DSPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX, FERR, + $ BERR, WORK, IWORK, INFO ) +* + RETURN +* +* End of DSPSVX +* + END diff --git a/costa/native/external/lapack/dsptrd.f b/costa/native/external/lapack/dsptrd.f new file mode 100644 index 000000000..148296107 --- /dev/null +++ b/costa/native/external/lapack/dsptrd.f @@ -0,0 +1,229 @@ + SUBROUTINE DSPTRD( UPLO, N, AP, D, E, TAU, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AP( * ), D( * ), E( * ), TAU( * ) +* .. +* +* Purpose +* ======= +* +* DSPTRD reduces a real symmetric matrix A stored in packed form to +* symmetric tridiagonal form T by an orthogonal similarity +* transformation: Q**T * A * Q = T. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the symmetric matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* On exit, if UPLO = 'U', the diagonal and first superdiagonal +* of A are overwritten by the corresponding elements of the +* tridiagonal matrix T, and the elements above the first +* superdiagonal, with the array TAU, represent the orthogonal +* matrix Q as a product of elementary reflectors; if UPLO +* = 'L', the diagonal and first subdiagonal of A are over- +* written by the corresponding elements of the tridiagonal +* matrix T, and the elements below the first subdiagonal, with +* the array TAU, represent the orthogonal matrix Q as a product +* of elementary reflectors. See Further Details. +* +* D (output) DOUBLE PRECISION array, dimension (N) +* The diagonal elements of the tridiagonal matrix T: +* D(i) = A(i,i). +* +* E (output) DOUBLE PRECISION array, dimension (N-1) +* The off-diagonal elements of the tridiagonal matrix T: +* E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. +* +* TAU (output) DOUBLE PRECISION array, dimension (N-1) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* If UPLO = 'U', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(n-1) . . . H(2) H(1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in AP, +* overwriting A(1:i-1,i+1), and tau is stored in TAU(i). +* +* If UPLO = 'L', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(1) H(2) . . . H(n-1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in AP, +* overwriting A(i+2:n,i), and tau is stored in TAU(i). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO, HALF + PARAMETER ( ONE = 1.0D0, ZERO = 0.0D0, + $ HALF = 1.0D0 / 2.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, I1, I1I1, II + DOUBLE PRECISION ALPHA, TAUI +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DLARFG, DSPMV, DSPR2, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DDOT + EXTERNAL LSAME, DDOT +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSPTRD', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Reduce the upper triangle of A. +* I1 is the index in AP of A(1,I+1). +* + I1 = N*( N-1 ) / 2 + 1 + DO 10 I = N - 1, 1, -1 +* +* Generate elementary reflector H(i) = I - tau * v * v' +* to annihilate A(1:i-1,i+1) +* + CALL DLARFG( I, AP( I1+I-1 ), AP( I1 ), 1, TAUI ) + E( I ) = AP( I1+I-1 ) +* + IF( TAUI.NE.ZERO ) THEN +* +* Apply H(i) from both sides to A(1:i,1:i) +* + AP( I1+I-1 ) = ONE +* +* Compute y := tau * A * v storing y in TAU(1:i) +* + CALL DSPMV( UPLO, I, TAUI, AP, AP( I1 ), 1, ZERO, TAU, + $ 1 ) +* +* Compute w := y - 1/2 * tau * (y'*v) * v +* + ALPHA = -HALF*TAUI*DDOT( I, TAU, 1, AP( I1 ), 1 ) + CALL DAXPY( I, ALPHA, AP( I1 ), 1, TAU, 1 ) +* +* Apply the transformation as a rank-2 update: +* A := A - v * w' - w * v' +* + CALL DSPR2( UPLO, I, -ONE, AP( I1 ), 1, TAU, 1, AP ) +* + AP( I1+I-1 ) = E( I ) + END IF + D( I+1 ) = AP( I1+I ) + TAU( I ) = TAUI + I1 = I1 - I + 10 CONTINUE + D( 1 ) = AP( 1 ) + ELSE +* +* Reduce the lower triangle of A. II is the index in AP of +* A(i,i) and I1I1 is the index of A(i+1,i+1). +* + II = 1 + DO 20 I = 1, N - 1 + I1I1 = II + N - I + 1 +* +* Generate elementary reflector H(i) = I - tau * v * v' +* to annihilate A(i+2:n,i) +* + CALL DLARFG( N-I, AP( II+1 ), AP( II+2 ), 1, TAUI ) + E( I ) = AP( II+1 ) +* + IF( TAUI.NE.ZERO ) THEN +* +* Apply H(i) from both sides to A(i+1:n,i+1:n) +* + AP( II+1 ) = ONE +* +* Compute y := tau * A * v storing y in TAU(i:n-1) +* + CALL DSPMV( UPLO, N-I, TAUI, AP( I1I1 ), AP( II+1 ), 1, + $ ZERO, TAU( I ), 1 ) +* +* Compute w := y - 1/2 * tau * (y'*v) * v +* + ALPHA = -HALF*TAUI*DDOT( N-I, TAU( I ), 1, AP( II+1 ), + $ 1 ) + CALL DAXPY( N-I, ALPHA, AP( II+1 ), 1, TAU( I ), 1 ) +* +* Apply the transformation as a rank-2 update: +* A := A - v * w' - w * v' +* + CALL DSPR2( UPLO, N-I, -ONE, AP( II+1 ), 1, TAU( I ), 1, + $ AP( I1I1 ) ) +* + AP( II+1 ) = E( I ) + END IF + D( I ) = AP( II ) + TAU( I ) = TAUI + II = I1I1 + 20 CONTINUE + D( N ) = AP( II ) + END IF +* + RETURN +* +* End of DSPTRD +* + END diff --git a/costa/native/external/lapack/dsptrf.f b/costa/native/external/lapack/dsptrf.f new file mode 100644 index 000000000..cce2df387 --- /dev/null +++ b/costa/native/external/lapack/dsptrf.f @@ -0,0 +1,548 @@ + SUBROUTINE DSPTRF( UPLO, N, AP, IPIV, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION AP( * ) +* .. +* +* Purpose +* ======= +* +* DSPTRF computes the factorization of a real symmetric matrix A stored +* in packed format using the Bunch-Kaufman diagonal pivoting method: +* +* A = U*D*U**T or A = L*D*L**T +* +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices, and D is symmetric and block diagonal with +* 1-by-1 and 2-by-2 diagonal blocks. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the symmetric matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* +* On exit, the block diagonal matrix D and the multipliers used +* to obtain the factor U or L, stored as a packed triangular +* matrix overwriting A (see below for further details). +* +* IPIV (output) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D. +* If IPIV(k) > 0, then rows and columns k and IPIV(k) were +* interchanged and D(k,k) is a 1-by-1 diagonal block. +* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, D(i,i) is exactly zero. The factorization +* has been completed, but the block diagonal matrix D is +* exactly singular, and division by zero will occur if it +* is used to solve a system of equations. +* +* Further Details +* =============== +* +* 5-96 - Based on modifications by J. Lewis, Boeing Computer Services +* Company +* +* If UPLO = 'U', then A = U*D*U', where +* U = P(n)*U(n)* ... *P(k)U(k)* ..., +* i.e., U is a product of terms P(k)*U(k), where k decreases from n to +* 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +* defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +* that if the diagonal block D(k) is of order s (s = 1 or 2), then +* +* ( I v 0 ) k-s +* U(k) = ( 0 I 0 ) s +* ( 0 0 I ) n-k +* k-s s n-k +* +* If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +* If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +* and A(k,k), and v overwrites A(1:k-2,k-1:k). +* +* If UPLO = 'L', then A = L*D*L', where +* L = P(1)*L(1)* ... *P(k)*L(k)* ..., +* i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +* n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +* defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +* that if the diagonal block D(k) is of order s (s = 1 or 2), then +* +* ( I 0 0 ) k-1 +* L(k) = ( 0 I 0 ) s +* ( 0 v I ) n-k-s+1 +* k-1 s n-k-s+1 +* +* If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +* If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +* and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0D+0, SEVTEN = 17.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IMAX, J, JMAX, K, KC, KK, KNC, KP, KPC, + $ KSTEP, KX, NPP + DOUBLE PRECISION ABSAKK, ALPHA, COLMAX, D11, D12, D21, D22, R1, + $ ROWMAX, T, WK, WKM1, WKP1 +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + EXTERNAL LSAME, IDAMAX +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSPR, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSPTRF', -INFO ) + RETURN + END IF +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U' using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2 +* + K = N + KC = ( N-1 )*N / 2 + 1 + 10 CONTINUE + KNC = KC +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 110 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( AP( KC+K-1 ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.GT.1 ) THEN + IMAX = IDAMAX( K-1, AP( KC ), 1 ) + COLMAX = ABS( AP( KC+IMAX-1 ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + ROWMAX = ZERO + JMAX = IMAX + KX = IMAX*( IMAX+1 ) / 2 + IMAX + DO 20 J = IMAX + 1, K + IF( ABS( AP( KX ) ).GT.ROWMAX ) THEN + ROWMAX = ABS( AP( KX ) ) + JMAX = J + END IF + KX = KX + J + 20 CONTINUE + KPC = ( IMAX-1 )*IMAX / 2 + 1 + IF( IMAX.GT.1 ) THEN + JMAX = IDAMAX( IMAX-1, AP( KPC ), 1 ) + ROWMAX = MAX( ROWMAX, ABS( AP( KPC+JMAX-1 ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( AP( KPC+IMAX-1 ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K-1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K - KSTEP + 1 + IF( KSTEP.EQ.2 ) + $ KNC = KNC - K + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the leading +* submatrix A(1:k,1:k) +* + CALL DSWAP( KP-1, AP( KNC ), 1, AP( KPC ), 1 ) + KX = KPC + KP - 1 + DO 30 J = KP + 1, KK - 1 + KX = KX + J - 1 + T = AP( KNC+J-1 ) + AP( KNC+J-1 ) = AP( KX ) + AP( KX ) = T + 30 CONTINUE + T = AP( KNC+KK-1 ) + AP( KNC+KK-1 ) = AP( KPC+KP-1 ) + AP( KPC+KP-1 ) = T + IF( KSTEP.EQ.2 ) THEN + T = AP( KC+K-2 ) + AP( KC+K-2 ) = AP( KC+KP-1 ) + AP( KC+KP-1 ) = T + END IF + END IF +* +* Update the leading submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* +* Perform a rank-1 update of A(1:k-1,1:k-1) as +* +* A := A - U(k)*D(k)*U(k)' = A - W(k)*1/D(k)*W(k)' +* + R1 = ONE / AP( KC+K-1 ) + CALL DSPR( UPLO, K-1, -R1, AP( KC ), 1, AP ) +* +* Store U(k) in column k +* + CALL DSCAL( K-1, R1, AP( KC ), 1 ) + ELSE +* +* 2-by-2 pivot block D(k): columns k and k-1 now hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* Perform a rank-2 update of A(1:k-2,1:k-2) as +* +* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )' +* = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )' +* + IF( K.GT.2 ) THEN +* + D12 = AP( K-1+( K-1 )*K / 2 ) + D22 = AP( K-1+( K-2 )*( K-1 ) / 2 ) / D12 + D11 = AP( K+( K-1 )*K / 2 ) / D12 + T = ONE / ( D11*D22-ONE ) + D12 = T / D12 +* + DO 50 J = K - 2, 1, -1 + WKM1 = D12*( D11*AP( J+( K-2 )*( K-1 ) / 2 )- + $ AP( J+( K-1 )*K / 2 ) ) + WK = D12*( D22*AP( J+( K-1 )*K / 2 )- + $ AP( J+( K-2 )*( K-1 ) / 2 ) ) + DO 40 I = J, 1, -1 + AP( I+( J-1 )*J / 2 ) = AP( I+( J-1 )*J / 2 ) - + $ AP( I+( K-1 )*K / 2 )*WK - + $ AP( I+( K-2 )*( K-1 ) / 2 )*WKM1 + 40 CONTINUE + AP( J+( K-1 )*K / 2 ) = WK + AP( J+( K-2 )*( K-1 ) / 2 ) = WKM1 + 50 CONTINUE +* + END IF +* + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + KC = KNC - K + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L' using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2 +* + K = 1 + KC = 1 + NPP = N*( N+1 ) / 2 + 60 CONTINUE + KNC = KC +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 110 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( AP( KC ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.LT.N ) THEN + IMAX = K + IDAMAX( N-K, AP( KC+1 ), 1 ) + COLMAX = ABS( AP( KC+IMAX-K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + ROWMAX = ZERO + KX = KC + IMAX - K + DO 70 J = K, IMAX - 1 + IF( ABS( AP( KX ) ).GT.ROWMAX ) THEN + ROWMAX = ABS( AP( KX ) ) + JMAX = J + END IF + KX = KX + N - J + 70 CONTINUE + KPC = NPP - ( N-IMAX+1 )*( N-IMAX+2 ) / 2 + 1 + IF( IMAX.LT.N ) THEN + JMAX = IMAX + IDAMAX( N-IMAX, AP( KPC+1 ), 1 ) + ROWMAX = MAX( ROWMAX, ABS( AP( KPC+JMAX-IMAX ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( AP( KPC ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K+1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K + KSTEP - 1 + IF( KSTEP.EQ.2 ) + $ KNC = KNC + N - K + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the trailing +* submatrix A(k:n,k:n) +* + IF( KP.LT.N ) + $ CALL DSWAP( N-KP, AP( KNC+KP-KK+1 ), 1, AP( KPC+1 ), + $ 1 ) + KX = KNC + KP - KK + DO 80 J = KK + 1, KP - 1 + KX = KX + N - J + 1 + T = AP( KNC+J-KK ) + AP( KNC+J-KK ) = AP( KX ) + AP( KX ) = T + 80 CONTINUE + T = AP( KNC ) + AP( KNC ) = AP( KPC ) + AP( KPC ) = T + IF( KSTEP.EQ.2 ) THEN + T = AP( KC+1 ) + AP( KC+1 ) = AP( KC+KP-K ) + AP( KC+KP-K ) = T + END IF + END IF +* +* Update the trailing submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* + IF( K.LT.N ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* +* A := A - L(k)*D(k)*L(k)' = A - W(k)*(1/D(k))*W(k)' +* + R1 = ONE / AP( KC ) + CALL DSPR( UPLO, N-K, -R1, AP( KC+1 ), 1, + $ AP( KC+N-K+1 ) ) +* +* Store L(k) in column K +* + CALL DSCAL( N-K, R1, AP( KC+1 ), 1 ) + END IF + ELSE +* +* 2-by-2 pivot block D(k): columns K and K+1 now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* + IF( K.LT.N-1 ) THEN +* +* Perform a rank-2 update of A(k+2:n,k+2:n) as +* +* A := A - ( L(k) L(k+1) )*D(k)*( L(k) L(k+1) )' +* = A - ( W(k) W(k+1) )*inv(D(k))*( W(k) W(k+1) )' +* + D21 = AP( K+1+( K-1 )*( 2*N-K ) / 2 ) + D11 = AP( K+1+K*( 2*N-K-1 ) / 2 ) / D21 + D22 = AP( K+( K-1 )*( 2*N-K ) / 2 ) / D21 + T = ONE / ( D11*D22-ONE ) + D21 = T / D21 +* + DO 100 J = K + 2, N + WK = D21*( D11*AP( J+( K-1 )*( 2*N-K ) / 2 )- + $ AP( J+K*( 2*N-K-1 ) / 2 ) ) + WKP1 = D21*( D22*AP( J+K*( 2*N-K-1 ) / 2 )- + $ AP( J+( K-1 )*( 2*N-K ) / 2 ) ) +* + DO 90 I = J, N + AP( I+( J-1 )*( 2*N-J ) / 2 ) = AP( I+( J-1 )* + $ ( 2*N-J ) / 2 ) - AP( I+( K-1 )*( 2*N-K ) / + $ 2 )*WK - AP( I+K*( 2*N-K-1 ) / 2 )*WKP1 + 90 CONTINUE +* + AP( J+( K-1 )*( 2*N-K ) / 2 ) = WK + AP( J+K*( 2*N-K-1 ) / 2 ) = WKP1 +* + 100 CONTINUE + END IF + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + KC = KNC + N - K + 2 + GO TO 60 +* + END IF +* + 110 CONTINUE + RETURN +* +* End of DSPTRF +* + END diff --git a/costa/native/external/lapack/dsptri.f b/costa/native/external/lapack/dsptri.f new file mode 100644 index 000000000..211dbaa83 --- /dev/null +++ b/costa/native/external/lapack/dsptri.f @@ -0,0 +1,335 @@ + SUBROUTINE DSPTRI( UPLO, N, AP, IPIV, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION AP( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DSPTRI computes the inverse of a real symmetric indefinite matrix +* A in packed storage using the factorization A = U*D*U**T or +* A = L*D*L**T computed by DSPTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the details of the factorization are stored +* as an upper or lower triangular matrix. +* = 'U': Upper triangular, form is A = U*D*U**T; +* = 'L': Lower triangular, form is A = L*D*L**T. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) +* On entry, the block diagonal matrix D and the multipliers +* used to obtain the factor U or L as computed by DSPTRF, +* stored as a packed triangular matrix. +* +* On exit, if INFO = 0, the (symmetric) inverse of the original +* matrix, stored as a packed triangular matrix. The j-th column +* of inv(A) is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = inv(A)(i,j) for 1<=i<=j; +* if UPLO = 'L', +* AP(i + (j-1)*(2n-j)/2) = inv(A)(i,j) for j<=i<=n. +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by DSPTRF. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +* inverse could not be computed. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KC, KCNEXT, KP, KPC, KSTEP, KX, NPP + DOUBLE PRECISION AK, AKKP1, AKP1, D, T, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DDOT + EXTERNAL LSAME, DDOT +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DSPMV, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSPTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + KP = N*( N+1 ) / 2 + DO 10 INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. AP( KP ).EQ.ZERO ) + $ RETURN + KP = KP - INFO + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + KP = 1 + DO 20 INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. AP( KP ).EQ.ZERO ) + $ RETURN + KP = KP + N - INFO + 1 + 20 CONTINUE + END IF + INFO = 0 +* + IF( UPPER ) THEN +* +* Compute inv(A) from the factorization A = U*D*U'. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + KC = 1 + 30 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + KCNEXT = KC + K + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + AP( KC+K-1 ) = ONE / AP( KC+K-1 ) +* +* Compute column K of the inverse. +* + IF( K.GT.1 ) THEN + CALL DCOPY( K-1, AP( KC ), 1, WORK, 1 ) + CALL DSPMV( UPLO, K-1, -ONE, AP, WORK, 1, ZERO, AP( KC ), + $ 1 ) + AP( KC+K-1 ) = AP( KC+K-1 ) - + $ DDOT( K-1, WORK, 1, AP( KC ), 1 ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = ABS( AP( KCNEXT+K-1 ) ) + AK = AP( KC+K-1 ) / T + AKP1 = AP( KCNEXT+K ) / T + AKKP1 = AP( KCNEXT+K-1 ) / T + D = T*( AK*AKP1-ONE ) + AP( KC+K-1 ) = AKP1 / D + AP( KCNEXT+K ) = AK / D + AP( KCNEXT+K-1 ) = -AKKP1 / D +* +* Compute columns K and K+1 of the inverse. +* + IF( K.GT.1 ) THEN + CALL DCOPY( K-1, AP( KC ), 1, WORK, 1 ) + CALL DSPMV( UPLO, K-1, -ONE, AP, WORK, 1, ZERO, AP( KC ), + $ 1 ) + AP( KC+K-1 ) = AP( KC+K-1 ) - + $ DDOT( K-1, WORK, 1, AP( KC ), 1 ) + AP( KCNEXT+K-1 ) = AP( KCNEXT+K-1 ) - + $ DDOT( K-1, AP( KC ), 1, AP( KCNEXT ), + $ 1 ) + CALL DCOPY( K-1, AP( KCNEXT ), 1, WORK, 1 ) + CALL DSPMV( UPLO, K-1, -ONE, AP, WORK, 1, ZERO, + $ AP( KCNEXT ), 1 ) + AP( KCNEXT+K ) = AP( KCNEXT+K ) - + $ DDOT( K-1, WORK, 1, AP( KCNEXT ), 1 ) + END IF + KSTEP = 2 + KCNEXT = KCNEXT + K + 1 + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the leading +* submatrix A(1:k+1,1:k+1) +* + KPC = ( KP-1 )*KP / 2 + 1 + CALL DSWAP( KP-1, AP( KC ), 1, AP( KPC ), 1 ) + KX = KPC + KP - 1 + DO 40 J = KP + 1, K - 1 + KX = KX + J - 1 + TEMP = AP( KC+J-1 ) + AP( KC+J-1 ) = AP( KX ) + AP( KX ) = TEMP + 40 CONTINUE + TEMP = AP( KC+K-1 ) + AP( KC+K-1 ) = AP( KPC+KP-1 ) + AP( KPC+KP-1 ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = AP( KC+K+K-1 ) + AP( KC+K+K-1 ) = AP( KC+K+KP-1 ) + AP( KC+K+KP-1 ) = TEMP + END IF + END IF +* + K = K + KSTEP + KC = KCNEXT + GO TO 30 + 50 CONTINUE +* + ELSE +* +* Compute inv(A) from the factorization A = L*D*L'. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + NPP = N*( N+1 ) / 2 + K = N + KC = NPP + 60 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 80 +* + KCNEXT = KC - ( N-K+2 ) + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + AP( KC ) = ONE / AP( KC ) +* +* Compute column K of the inverse. +* + IF( K.LT.N ) THEN + CALL DCOPY( N-K, AP( KC+1 ), 1, WORK, 1 ) + CALL DSPMV( UPLO, N-K, -ONE, AP( KC+N-K+1 ), WORK, 1, + $ ZERO, AP( KC+1 ), 1 ) + AP( KC ) = AP( KC ) - DDOT( N-K, WORK, 1, AP( KC+1 ), 1 ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = ABS( AP( KCNEXT+1 ) ) + AK = AP( KCNEXT ) / T + AKP1 = AP( KC ) / T + AKKP1 = AP( KCNEXT+1 ) / T + D = T*( AK*AKP1-ONE ) + AP( KCNEXT ) = AKP1 / D + AP( KC ) = AK / D + AP( KCNEXT+1 ) = -AKKP1 / D +* +* Compute columns K-1 and K of the inverse. +* + IF( K.LT.N ) THEN + CALL DCOPY( N-K, AP( KC+1 ), 1, WORK, 1 ) + CALL DSPMV( UPLO, N-K, -ONE, AP( KC+( N-K+1 ) ), WORK, 1, + $ ZERO, AP( KC+1 ), 1 ) + AP( KC ) = AP( KC ) - DDOT( N-K, WORK, 1, AP( KC+1 ), 1 ) + AP( KCNEXT+1 ) = AP( KCNEXT+1 ) - + $ DDOT( N-K, AP( KC+1 ), 1, + $ AP( KCNEXT+2 ), 1 ) + CALL DCOPY( N-K, AP( KCNEXT+2 ), 1, WORK, 1 ) + CALL DSPMV( UPLO, N-K, -ONE, AP( KC+( N-K+1 ) ), WORK, 1, + $ ZERO, AP( KCNEXT+2 ), 1 ) + AP( KCNEXT ) = AP( KCNEXT ) - + $ DDOT( N-K, WORK, 1, AP( KCNEXT+2 ), 1 ) + END IF + KSTEP = 2 + KCNEXT = KCNEXT - ( N-K+3 ) + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the trailing +* submatrix A(k-1:n,k-1:n) +* + KPC = NPP - ( N-KP+1 )*( N-KP+2 ) / 2 + 1 + IF( KP.LT.N ) + $ CALL DSWAP( N-KP, AP( KC+KP-K+1 ), 1, AP( KPC+1 ), 1 ) + KX = KC + KP - K + DO 70 J = K + 1, KP - 1 + KX = KX + N - J + 1 + TEMP = AP( KC+J-K ) + AP( KC+J-K ) = AP( KX ) + AP( KX ) = TEMP + 70 CONTINUE + TEMP = AP( KC ) + AP( KC ) = AP( KPC ) + AP( KPC ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = AP( KC-N+K-1 ) + AP( KC-N+K-1 ) = AP( KC-N+KP-1 ) + AP( KC-N+KP-1 ) = TEMP + END IF + END IF +* + K = K - KSTEP + KC = KCNEXT + GO TO 60 + 80 CONTINUE + END IF +* + RETURN +* +* End of DSPTRI +* + END diff --git a/costa/native/external/lapack/dsptrs.f b/costa/native/external/lapack/dsptrs.f new file mode 100644 index 000000000..d71634200 --- /dev/null +++ b/costa/native/external/lapack/dsptrs.f @@ -0,0 +1,378 @@ + SUBROUTINE DSPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION AP( * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* DSPTRS solves a system of linear equations A*X = B with a real +* symmetric matrix A stored in packed format using the factorization +* A = U*D*U**T or A = L*D*L**T computed by DSPTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the details of the factorization are stored +* as an upper or lower triangular matrix. +* = 'U': Upper triangular, form is A = U*D*U**T; +* = 'L': Lower triangular, form is A = L*D*L**T. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) +* The block diagonal matrix D and the multipliers used to +* obtain the factor U or L as computed by DSPTRF, stored as a +* packed triangular matrix. +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by DSPTRF. +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KC, KP + DOUBLE PRECISION AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, DGER, DSCAL, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSPTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*D*U'. +* +* First solve U*D*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + KC = N*( N+1 ) / 2 + 1 + 10 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 30 +* + KC = KC - K + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL DGER( K-1, NRHS, -ONE, AP( KC ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + CALL DSCAL( NRHS, ONE / AP( KC+K-1 ), B( K, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K-1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K-1 ) + $ CALL DSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in columns K-1 and K of A. +* + CALL DGER( K-2, NRHS, -ONE, AP( KC ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) + CALL DGER( K-2, NRHS, -ONE, AP( KC-( K-1 ) ), 1, + $ B( K-1, 1 ), LDB, B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = AP( KC+K-2 ) + AKM1 = AP( KC-1 ) / AKM1K + AK = AP( KC+K-1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 20 J = 1, NRHS + BKM1 = B( K-1, J ) / AKM1K + BK = B( K, J ) / AKM1K + B( K-1, J ) = ( AK*BKM1-BK ) / DENOM + B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM + 20 CONTINUE + KC = KC - K + 1 + K = K - 2 + END IF +* + GO TO 10 + 30 CONTINUE +* +* Next solve U'*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + KC = 1 + 40 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(U'(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL DGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, AP( KC ), + $ 1, ONE, B( K, 1 ), LDB ) +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + KC = KC + K + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(U'(K+1)), where U(K+1) is the transformation +* stored in columns K and K+1 of A. +* + CALL DGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, AP( KC ), + $ 1, ONE, B( K, 1 ), LDB ) + CALL DGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, + $ AP( KC+K ), 1, ONE, B( K+1, 1 ), LDB ) +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + KC = KC + 2*K + 1 + K = K + 2 + END IF +* + GO TO 40 + 50 CONTINUE +* + ELSE +* +* Solve A*X = B, where A = L*D*L'. +* +* First solve L*D*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + KC = 1 + 60 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 80 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL DGER( N-K, NRHS, -ONE, AP( KC+1 ), 1, B( K, 1 ), + $ LDB, B( K+1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + CALL DSCAL( NRHS, ONE / AP( KC ), B( K, 1 ), LDB ) + KC = KC + N - K + 1 + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K+1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K+1 ) + $ CALL DSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.LT.N-1 ) THEN + CALL DGER( N-K-1, NRHS, -ONE, AP( KC+2 ), 1, B( K, 1 ), + $ LDB, B( K+2, 1 ), LDB ) + CALL DGER( N-K-1, NRHS, -ONE, AP( KC+N-K+2 ), 1, + $ B( K+1, 1 ), LDB, B( K+2, 1 ), LDB ) + END IF +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = AP( KC+1 ) + AKM1 = AP( KC ) / AKM1K + AK = AP( KC+N-K+1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 70 J = 1, NRHS + BKM1 = B( K, J ) / AKM1K + BK = B( K+1, J ) / AKM1K + B( K, J ) = ( AK*BKM1-BK ) / DENOM + B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + 70 CONTINUE + KC = KC + 2*( N-K ) + 1 + K = K + 2 + END IF +* + GO TO 60 + 80 CONTINUE +* +* Next solve L'*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + KC = N*( N+1 ) / 2 + 1 + 90 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 100 +* + KC = KC - ( N-K+1 ) + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(L'(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL DGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, AP( KC+1 ), 1, ONE, B( K, 1 ), LDB ) +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(L'(K-1)), where L(K-1) is the transformation +* stored in columns K-1 and K of A. +* + IF( K.LT.N ) THEN + CALL DGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, AP( KC+1 ), 1, ONE, B( K, 1 ), LDB ) + CALL DGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, AP( KC-( N-K ) ), 1, ONE, B( K-1, 1 ), + $ LDB ) + END IF +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + KC = KC - ( N-K+2 ) + K = K - 2 + END IF +* + GO TO 90 + 100 CONTINUE + END IF +* + RETURN +* +* End of DSPTRS +* + END diff --git a/costa/native/external/lapack/dstebz.f b/costa/native/external/lapack/dstebz.f new file mode 100644 index 000000000..349972e85 --- /dev/null +++ b/costa/native/external/lapack/dstebz.f @@ -0,0 +1,652 @@ + SUBROUTINE DSTEBZ( RANGE, ORDER, N, VL, VU, IL, IU, ABSTOL, D, E, + $ M, NSPLIT, W, IBLOCK, ISPLIT, WORK, IWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER ORDER, RANGE + INTEGER IL, INFO, IU, M, N, NSPLIT + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IBLOCK( * ), ISPLIT( * ), IWORK( * ) + DOUBLE PRECISION D( * ), E( * ), W( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DSTEBZ computes the eigenvalues of a symmetric tridiagonal +* matrix T. The user may ask for all eigenvalues, all eigenvalues +* in the half-open interval (VL, VU], or the IL-th through IU-th +* eigenvalues. +* +* To avoid overflow, the matrix must be scaled so that its +* largest element is no greater than overflow**(1/2) * +* underflow**(1/4) in absolute value, and for greatest +* accuracy, it should not be much smaller than that. +* +* See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal +* Matrix", Report CS41, Computer Science Dept., Stanford +* University, July 21, 1966. +* +* Arguments +* ========= +* +* RANGE (input) CHARACTER +* = 'A': ("All") all eigenvalues will be found. +* = 'V': ("Value") all eigenvalues in the half-open interval +* (VL, VU] will be found. +* = 'I': ("Index") the IL-th through IU-th eigenvalues (of the +* entire matrix) will be found. +* +* ORDER (input) CHARACTER +* = 'B': ("By Block") the eigenvalues will be grouped by +* split-off block (see IBLOCK, ISPLIT) and +* ordered from smallest to largest within +* the block. +* = 'E': ("Entire matrix") +* the eigenvalues for the entire matrix +* will be ordered from smallest to +* largest. +* +* N (input) INTEGER +* The order of the tridiagonal matrix T. N >= 0. +* +* VL (input) DOUBLE PRECISION +* VU (input) DOUBLE PRECISION +* If RANGE='V', the lower and upper bounds of the interval to +* be searched for eigenvalues. Eigenvalues less than or equal +* to VL, or greater than VU, will not be returned. VL < VU. +* Not referenced if RANGE = 'A' or 'I'. +* +* IL (input) INTEGER +* IU (input) INTEGER +* If RANGE='I', the indices (in ascending order) of the +* smallest and largest eigenvalues to be returned. +* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +* Not referenced if RANGE = 'A' or 'V'. +* +* ABSTOL (input) DOUBLE PRECISION +* The absolute tolerance for the eigenvalues. An eigenvalue +* (or cluster) is considered to be located if it has been +* determined to lie in an interval whose width is ABSTOL or +* less. If ABSTOL is less than or equal to zero, then ULP*|T| +* will be used, where |T| means the 1-norm of T. +* +* Eigenvalues will be computed most accurately when ABSTOL is +* set to twice the underflow threshold 2*DLAMCH('S'), not zero. +* +* D (input) DOUBLE PRECISION array, dimension (N) +* The n diagonal elements of the tridiagonal matrix T. +* +* E (input) DOUBLE PRECISION array, dimension (N-1) +* The (n-1) off-diagonal elements of the tridiagonal matrix T. +* +* M (output) INTEGER +* The actual number of eigenvalues found. 0 <= M <= N. +* (See also the description of INFO=2,3.) +* +* NSPLIT (output) INTEGER +* The number of diagonal blocks in the matrix T. +* 1 <= NSPLIT <= N. +* +* W (output) DOUBLE PRECISION array, dimension (N) +* On exit, the first M elements of W will contain the +* eigenvalues. (DSTEBZ may use the remaining N-M elements as +* workspace.) +* +* IBLOCK (output) INTEGER array, dimension (N) +* At each row/column j where E(j) is zero or small, the +* matrix T is considered to split into a block diagonal +* matrix. On exit, if INFO = 0, IBLOCK(i) specifies to which +* block (from 1 to the number of blocks) the eigenvalue W(i) +* belongs. (DSTEBZ may use the remaining N-M elements as +* workspace.) +* +* ISPLIT (output) INTEGER array, dimension (N) +* The splitting points, at which T breaks up into submatrices. +* The first submatrix consists of rows/columns 1 to ISPLIT(1), +* the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), +* etc., and the NSPLIT-th consists of rows/columns +* ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. +* (Only the first NSPLIT elements will actually be used, but +* since the user cannot know a priori what value NSPLIT will +* have, N words must be reserved for ISPLIT.) +* +* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) +* +* IWORK (workspace) INTEGER array, dimension (3*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: some or all of the eigenvalues failed to converge or +* were not computed: +* =1 or 3: Bisection failed to converge for some +* eigenvalues; these eigenvalues are flagged by a +* negative block number. The effect is that the +* eigenvalues may not be as accurate as the +* absolute and relative tolerances. This is +* generally caused by unexpectedly inaccurate +* arithmetic. +* =2 or 3: RANGE='I' only: Not all of the eigenvalues +* IL:IU were found. +* Effect: M < IU+1-IL +* Cause: non-monotonic arithmetic, causing the +* Sturm sequence to be non-monotonic. +* Cure: recalculate, using RANGE='A', and pick +* out eigenvalues IL:IU. In some cases, +* increasing the PARAMETER "FUDGE" may +* make things work. +* = 4: RANGE='I', and the Gershgorin interval +* initially used was too small. No eigenvalues +* were computed. +* Probable cause: your machine has sloppy +* floating-point arithmetic. +* Cure: Increase the PARAMETER "FUDGE", +* recompile, and try again. +* +* Internal Parameters +* =================== +* +* RELFAC DOUBLE PRECISION, default = 2.0e0 +* The relative tolerance. An interval (a,b] lies within +* "relative tolerance" if b-a < RELFAC*ulp*max(|a|,|b|), +* where "ulp" is the machine precision (distance from 1 to +* the next larger floating point number.) +* +* FUDGE DOUBLE PRECISION, default = 2 +* A "fudge factor" to widen the Gershgorin intervals. Ideally, +* a value of 1 should work, but on machines with sloppy +* arithmetic, this needs to be larger. The default for +* publicly released versions should be large enough to handle +* the worst machine around. Note that this has no effect +* on accuracy of the solution. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO, HALF + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0, + $ HALF = 1.0D0 / TWO ) + DOUBLE PRECISION FUDGE, RELFAC + PARAMETER ( FUDGE = 2.0D0, RELFAC = 2.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL NCNVRG, TOOFEW + INTEGER IB, IBEGIN, IDISCL, IDISCU, IE, IEND, IINFO, + $ IM, IN, IOFF, IORDER, IOUT, IRANGE, ITMAX, + $ ITMP1, IW, IWOFF, J, JB, JDISC, JE, NB, NWL, + $ NWU + DOUBLE PRECISION ATOLI, BNORM, GL, GU, PIVMIN, RTOLI, SAFEMN, + $ TMP1, TMP2, TNORM, ULP, WKILL, WL, WLU, WU, WUL +* .. +* .. Local Arrays .. + INTEGER IDUMMA( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, ILAENV, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DLAEBZ, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, LOG, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Decode RANGE +* + IF( LSAME( RANGE, 'A' ) ) THEN + IRANGE = 1 + ELSE IF( LSAME( RANGE, 'V' ) ) THEN + IRANGE = 2 + ELSE IF( LSAME( RANGE, 'I' ) ) THEN + IRANGE = 3 + ELSE + IRANGE = 0 + END IF +* +* Decode ORDER +* + IF( LSAME( ORDER, 'B' ) ) THEN + IORDER = 2 + ELSE IF( LSAME( ORDER, 'E' ) ) THEN + IORDER = 1 + ELSE + IORDER = 0 + END IF +* +* Check for Errors +* + IF( IRANGE.LE.0 ) THEN + INFO = -1 + ELSE IF( IORDER.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( IRANGE.EQ.2 ) THEN + IF( VL.GE.VU ) + $ INFO = -5 + ELSE IF( IRANGE.EQ.3 .AND. ( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) ) + $ THEN + INFO = -6 + ELSE IF( IRANGE.EQ.3 .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) + $ THEN + INFO = -7 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSTEBZ', -INFO ) + RETURN + END IF +* +* Initialize error flags +* + INFO = 0 + NCNVRG = .FALSE. + TOOFEW = .FALSE. +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* +* Simplifications: +* + IF( IRANGE.EQ.3 .AND. IL.EQ.1 .AND. IU.EQ.N ) + $ IRANGE = 1 +* +* Get machine constants +* NB is the minimum vector length for vector bisection, or 0 +* if only scalar is to be done. +* + SAFEMN = DLAMCH( 'S' ) + ULP = DLAMCH( 'P' ) + RTOLI = ULP*RELFAC + NB = ILAENV( 1, 'DSTEBZ', ' ', N, -1, -1, -1 ) + IF( NB.LE.1 ) + $ NB = 0 +* +* Special Case when N=1 +* + IF( N.EQ.1 ) THEN + NSPLIT = 1 + ISPLIT( 1 ) = 1 + IF( IRANGE.EQ.2 .AND. ( VL.GE.D( 1 ) .OR. VU.LT.D( 1 ) ) ) THEN + M = 0 + ELSE + W( 1 ) = D( 1 ) + IBLOCK( 1 ) = 1 + M = 1 + END IF + RETURN + END IF +* +* Compute Splitting Points +* + NSPLIT = 1 + WORK( N ) = ZERO + PIVMIN = ONE +* +*DIR$ NOVECTOR + DO 10 J = 2, N + TMP1 = E( J-1 )**2 + IF( ABS( D( J )*D( J-1 ) )*ULP**2+SAFEMN.GT.TMP1 ) THEN + ISPLIT( NSPLIT ) = J - 1 + NSPLIT = NSPLIT + 1 + WORK( J-1 ) = ZERO + ELSE + WORK( J-1 ) = TMP1 + PIVMIN = MAX( PIVMIN, TMP1 ) + END IF + 10 CONTINUE + ISPLIT( NSPLIT ) = N + PIVMIN = PIVMIN*SAFEMN +* +* Compute Interval and ATOLI +* + IF( IRANGE.EQ.3 ) THEN +* +* RANGE='I': Compute the interval containing eigenvalues +* IL through IU. +* +* Compute Gershgorin interval for entire (split) matrix +* and use it as the initial interval +* + GU = D( 1 ) + GL = D( 1 ) + TMP1 = ZERO +* + DO 20 J = 1, N - 1 + TMP2 = SQRT( WORK( J ) ) + GU = MAX( GU, D( J )+TMP1+TMP2 ) + GL = MIN( GL, D( J )-TMP1-TMP2 ) + TMP1 = TMP2 + 20 CONTINUE +* + GU = MAX( GU, D( N )+TMP1 ) + GL = MIN( GL, D( N )-TMP1 ) + TNORM = MAX( ABS( GL ), ABS( GU ) ) + GL = GL - FUDGE*TNORM*ULP*N - FUDGE*TWO*PIVMIN + GU = GU + FUDGE*TNORM*ULP*N + FUDGE*PIVMIN +* +* Compute Iteration parameters +* + ITMAX = INT( ( LOG( TNORM+PIVMIN )-LOG( PIVMIN ) ) / + $ LOG( TWO ) ) + 2 + IF( ABSTOL.LE.ZERO ) THEN + ATOLI = ULP*TNORM + ELSE + ATOLI = ABSTOL + END IF +* + WORK( N+1 ) = GL + WORK( N+2 ) = GL + WORK( N+3 ) = GU + WORK( N+4 ) = GU + WORK( N+5 ) = GL + WORK( N+6 ) = GU + IWORK( 1 ) = -1 + IWORK( 2 ) = -1 + IWORK( 3 ) = N + 1 + IWORK( 4 ) = N + 1 + IWORK( 5 ) = IL - 1 + IWORK( 6 ) = IU +* + CALL DLAEBZ( 3, ITMAX, N, 2, 2, NB, ATOLI, RTOLI, PIVMIN, D, E, + $ WORK, IWORK( 5 ), WORK( N+1 ), WORK( N+5 ), IOUT, + $ IWORK, W, IBLOCK, IINFO ) +* + IF( IWORK( 6 ).EQ.IU ) THEN + WL = WORK( N+1 ) + WLU = WORK( N+3 ) + NWL = IWORK( 1 ) + WU = WORK( N+4 ) + WUL = WORK( N+2 ) + NWU = IWORK( 4 ) + ELSE + WL = WORK( N+2 ) + WLU = WORK( N+4 ) + NWL = IWORK( 2 ) + WU = WORK( N+3 ) + WUL = WORK( N+1 ) + NWU = IWORK( 3 ) + END IF +* + IF( NWL.LT.0 .OR. NWL.GE.N .OR. NWU.LT.1 .OR. NWU.GT.N ) THEN + INFO = 4 + RETURN + END IF + ELSE +* +* RANGE='A' or 'V' -- Set ATOLI +* + TNORM = MAX( ABS( D( 1 ) )+ABS( E( 1 ) ), + $ ABS( D( N ) )+ABS( E( N-1 ) ) ) +* + DO 30 J = 2, N - 1 + TNORM = MAX( TNORM, ABS( D( J ) )+ABS( E( J-1 ) )+ + $ ABS( E( J ) ) ) + 30 CONTINUE +* + IF( ABSTOL.LE.ZERO ) THEN + ATOLI = ULP*TNORM + ELSE + ATOLI = ABSTOL + END IF +* + IF( IRANGE.EQ.2 ) THEN + WL = VL + WU = VU + ELSE + WL = ZERO + WU = ZERO + END IF + END IF +* +* Find Eigenvalues -- Loop Over Blocks and recompute NWL and NWU. +* NWL accumulates the number of eigenvalues .le. WL, +* NWU accumulates the number of eigenvalues .le. WU +* + M = 0 + IEND = 0 + INFO = 0 + NWL = 0 + NWU = 0 +* + DO 70 JB = 1, NSPLIT + IOFF = IEND + IBEGIN = IOFF + 1 + IEND = ISPLIT( JB ) + IN = IEND - IOFF +* + IF( IN.EQ.1 ) THEN +* +* Special Case -- IN=1 +* + IF( IRANGE.EQ.1 .OR. WL.GE.D( IBEGIN )-PIVMIN ) + $ NWL = NWL + 1 + IF( IRANGE.EQ.1 .OR. WU.GE.D( IBEGIN )-PIVMIN ) + $ NWU = NWU + 1 + IF( IRANGE.EQ.1 .OR. ( WL.LT.D( IBEGIN )-PIVMIN .AND. WU.GE. + $ D( IBEGIN )-PIVMIN ) ) THEN + M = M + 1 + W( M ) = D( IBEGIN ) + IBLOCK( M ) = JB + END IF + ELSE +* +* General Case -- IN > 1 +* +* Compute Gershgorin Interval +* and use it as the initial interval +* + GU = D( IBEGIN ) + GL = D( IBEGIN ) + TMP1 = ZERO +* + DO 40 J = IBEGIN, IEND - 1 + TMP2 = ABS( E( J ) ) + GU = MAX( GU, D( J )+TMP1+TMP2 ) + GL = MIN( GL, D( J )-TMP1-TMP2 ) + TMP1 = TMP2 + 40 CONTINUE +* + GU = MAX( GU, D( IEND )+TMP1 ) + GL = MIN( GL, D( IEND )-TMP1 ) + BNORM = MAX( ABS( GL ), ABS( GU ) ) + GL = GL - FUDGE*BNORM*ULP*IN - FUDGE*PIVMIN + GU = GU + FUDGE*BNORM*ULP*IN + FUDGE*PIVMIN +* +* Compute ATOLI for the current submatrix +* + IF( ABSTOL.LE.ZERO ) THEN + ATOLI = ULP*MAX( ABS( GL ), ABS( GU ) ) + ELSE + ATOLI = ABSTOL + END IF +* + IF( IRANGE.GT.1 ) THEN + IF( GU.LT.WL ) THEN + NWL = NWL + IN + NWU = NWU + IN + GO TO 70 + END IF + GL = MAX( GL, WL ) + GU = MIN( GU, WU ) + IF( GL.GE.GU ) + $ GO TO 70 + END IF +* +* Set Up Initial Interval +* + WORK( N+1 ) = GL + WORK( N+IN+1 ) = GU + CALL DLAEBZ( 1, 0, IN, IN, 1, NB, ATOLI, RTOLI, PIVMIN, + $ D( IBEGIN ), E( IBEGIN ), WORK( IBEGIN ), + $ IDUMMA, WORK( N+1 ), WORK( N+2*IN+1 ), IM, + $ IWORK, W( M+1 ), IBLOCK( M+1 ), IINFO ) +* + NWL = NWL + IWORK( 1 ) + NWU = NWU + IWORK( IN+1 ) + IWOFF = M - IWORK( 1 ) +* +* Compute Eigenvalues +* + ITMAX = INT( ( LOG( GU-GL+PIVMIN )-LOG( PIVMIN ) ) / + $ LOG( TWO ) ) + 2 + CALL DLAEBZ( 2, ITMAX, IN, IN, 1, NB, ATOLI, RTOLI, PIVMIN, + $ D( IBEGIN ), E( IBEGIN ), WORK( IBEGIN ), + $ IDUMMA, WORK( N+1 ), WORK( N+2*IN+1 ), IOUT, + $ IWORK, W( M+1 ), IBLOCK( M+1 ), IINFO ) +* +* Copy Eigenvalues Into W and IBLOCK +* Use -JB for block number for unconverged eigenvalues. +* + DO 60 J = 1, IOUT + TMP1 = HALF*( WORK( J+N )+WORK( J+IN+N ) ) +* +* Flag non-convergence. +* + IF( J.GT.IOUT-IINFO ) THEN + NCNVRG = .TRUE. + IB = -JB + ELSE + IB = JB + END IF + DO 50 JE = IWORK( J ) + 1 + IWOFF, + $ IWORK( J+IN ) + IWOFF + W( JE ) = TMP1 + IBLOCK( JE ) = IB + 50 CONTINUE + 60 CONTINUE +* + M = M + IM + END IF + 70 CONTINUE +* +* If RANGE='I', then (WL,WU) contains eigenvalues NWL+1,...,NWU +* If NWL+1 < IL or NWU > IU, discard extra eigenvalues. +* + IF( IRANGE.EQ.3 ) THEN + IM = 0 + IDISCL = IL - 1 - NWL + IDISCU = NWU - IU +* + IF( IDISCL.GT.0 .OR. IDISCU.GT.0 ) THEN + DO 80 JE = 1, M + IF( W( JE ).LE.WLU .AND. IDISCL.GT.0 ) THEN + IDISCL = IDISCL - 1 + ELSE IF( W( JE ).GE.WUL .AND. IDISCU.GT.0 ) THEN + IDISCU = IDISCU - 1 + ELSE + IM = IM + 1 + W( IM ) = W( JE ) + IBLOCK( IM ) = IBLOCK( JE ) + END IF + 80 CONTINUE + M = IM + END IF + IF( IDISCL.GT.0 .OR. IDISCU.GT.0 ) THEN +* +* Code to deal with effects of bad arithmetic: +* Some low eigenvalues to be discarded are not in (WL,WLU], +* or high eigenvalues to be discarded are not in (WUL,WU] +* so just kill off the smallest IDISCL/largest IDISCU +* eigenvalues, by simply finding the smallest/largest +* eigenvalue(s). +* +* (If N(w) is monotone non-decreasing, this should never +* happen.) +* + IF( IDISCL.GT.0 ) THEN + WKILL = WU + DO 100 JDISC = 1, IDISCL + IW = 0 + DO 90 JE = 1, M + IF( IBLOCK( JE ).NE.0 .AND. + $ ( W( JE ).LT.WKILL .OR. IW.EQ.0 ) ) THEN + IW = JE + WKILL = W( JE ) + END IF + 90 CONTINUE + IBLOCK( IW ) = 0 + 100 CONTINUE + END IF + IF( IDISCU.GT.0 ) THEN +* + WKILL = WL + DO 120 JDISC = 1, IDISCU + IW = 0 + DO 110 JE = 1, M + IF( IBLOCK( JE ).NE.0 .AND. + $ ( W( JE ).GT.WKILL .OR. IW.EQ.0 ) ) THEN + IW = JE + WKILL = W( JE ) + END IF + 110 CONTINUE + IBLOCK( IW ) = 0 + 120 CONTINUE + END IF + IM = 0 + DO 130 JE = 1, M + IF( IBLOCK( JE ).NE.0 ) THEN + IM = IM + 1 + W( IM ) = W( JE ) + IBLOCK( IM ) = IBLOCK( JE ) + END IF + 130 CONTINUE + M = IM + END IF + IF( IDISCL.LT.0 .OR. IDISCU.LT.0 ) THEN + TOOFEW = .TRUE. + END IF + END IF +* +* If ORDER='B', do nothing -- the eigenvalues are already sorted +* by block. +* If ORDER='E', sort the eigenvalues from smallest to largest +* + IF( IORDER.EQ.1 .AND. NSPLIT.GT.1 ) THEN + DO 150 JE = 1, M - 1 + IE = 0 + TMP1 = W( JE ) + DO 140 J = JE + 1, M + IF( W( J ).LT.TMP1 ) THEN + IE = J + TMP1 = W( J ) + END IF + 140 CONTINUE +* + IF( IE.NE.0 ) THEN + ITMP1 = IBLOCK( IE ) + W( IE ) = W( JE ) + IBLOCK( IE ) = IBLOCK( JE ) + W( JE ) = TMP1 + IBLOCK( JE ) = ITMP1 + END IF + 150 CONTINUE + END IF +* + INFO = 0 + IF( NCNVRG ) + $ INFO = INFO + 1 + IF( TOOFEW ) + $ INFO = INFO + 2 + RETURN +* +* End of DSTEBZ +* + END diff --git a/costa/native/external/lapack/dstedc.f b/costa/native/external/lapack/dstedc.f new file mode 100644 index 000000000..e83f640ad --- /dev/null +++ b/costa/native/external/lapack/dstedc.f @@ -0,0 +1,397 @@ + SUBROUTINE DSTEDC( COMPZ, N, D, E, Z, LDZ, WORK, LWORK, IWORK, + $ LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER COMPZ + INTEGER INFO, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION D( * ), E( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* DSTEDC computes all eigenvalues and, optionally, eigenvectors of a +* symmetric tridiagonal matrix using the divide and conquer method. +* The eigenvectors of a full or band real symmetric matrix can also be +* found if DSYTRD or DSPTRD or DSBTRD has been used to reduce this +* matrix to tridiagonal form. +* +* This code makes very mild assumptions about floating point +* arithmetic. It will work on machines with a guard digit in +* add/subtract, or on those binary machines without guard digits +* which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. +* It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. See DLAED3 for details. +* +* Arguments +* ========= +* +* COMPZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only. +* = 'I': Compute eigenvectors of tridiagonal matrix also. +* = 'V': Compute eigenvectors of original dense symmetric +* matrix also. On entry, Z contains the orthogonal +* matrix used to reduce the original matrix to +* tridiagonal form. +* +* N (input) INTEGER +* The dimension of the symmetric tridiagonal matrix. N >= 0. +* +* D (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the diagonal elements of the tridiagonal matrix. +* On exit, if INFO = 0, the eigenvalues in ascending order. +* +* E (input/output) DOUBLE PRECISION array, dimension (N-1) +* On entry, the subdiagonal elements of the tridiagonal matrix. +* On exit, E has been destroyed. +* +* Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N) +* On entry, if COMPZ = 'V', then Z contains the orthogonal +* matrix used in the reduction to tridiagonal form. +* On exit, if INFO = 0, then if COMPZ = 'V', Z contains the +* orthonormal eigenvectors of the original symmetric matrix, +* and if COMPZ = 'I', Z contains the orthonormal eigenvectors +* of the symmetric tridiagonal matrix. +* If COMPZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1. +* If eigenvectors are desired, then LDZ >= max(1,N). +* +* WORK (workspace/output) DOUBLE PRECISION array, +* dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If COMPZ = 'N' or N <= 1 then LWORK must be at least 1. +* If COMPZ = 'V' and N > 1 then LWORK must be at least +* ( 1 + 3*N + 2*N*lg N + 3*N**2 ), +* where lg( N ) = smallest integer k such +* that 2**k >= N. +* If COMPZ = 'I' and N > 1 then LWORK must be at least +* ( 1 + 4*N + N**2 ). +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array IWORK. +* If COMPZ = 'N' or N <= 1 then LIWORK must be at least 1. +* If COMPZ = 'V' and N > 1 then LIWORK must be at least +* ( 6 + 6*N + 5*N*lg N ). +* If COMPZ = 'I' and N > 1 then LIWORK must be at least +* ( 3 + 5*N ). +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: The algorithm failed to compute an eigenvalue while +* working on the submatrix lying in rows and columns +* INFO/(N+1) through mod(INFO,N+1). +* +* Further Details +* =============== +* +* Based on contributions by +* Jeff Rutter, Computer Science Division, University of California +* at Berkeley, USA +* Modified by Francoise Tisseur, University of Tennessee. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER DTRTRW, END, I, ICOMPZ, II, J, K, LGN, LIWMIN, + $ LWMIN, M, SMLSIZ, START, STOREZ + DOUBLE PRECISION EPS, ORGNRM, P, TINY +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANST + EXTERNAL LSAME, ILAENV, DLAMCH, DLANST +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DLACPY, DLAED0, DLASCL, DLASET, DLASRT, + $ DSTEQR, DSTERF, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, INT, LOG, MAX, MOD, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ICOMPZ = 0 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ICOMPZ = 2 + ELSE + ICOMPZ = -1 + END IF + IF( N.LE.1 .OR. ICOMPZ.LE.0 ) THEN + LIWMIN = 1 + LWMIN = 1 + ELSE + LGN = INT( LOG( DBLE( N ) ) / LOG( TWO ) ) + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IF( ICOMPZ.EQ.1 ) THEN + LWMIN = 1 + 3*N + 2*N*LGN + 3*N**2 + LIWMIN = 6 + 6*N + 5*N*LGN + ELSE IF( ICOMPZ.EQ.2 ) THEN + LWMIN = 1 + 4*N + N**2 + LIWMIN = 3 + 5*N + END IF + END IF + IF( ICOMPZ.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ( LDZ.LT.1 ) .OR. ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1, + $ N ) ) ) THEN + INFO = -6 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -8 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSTEDC', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + IF( N.EQ.1 ) THEN + IF( ICOMPZ.NE.0 ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* + SMLSIZ = ILAENV( 9, 'DSTEDC', ' ', 0, 0, 0, 0 ) +* +* If the following conditional clause is removed, then the routine +* will use the Divide and Conquer routine to compute only the +* eigenvalues, which requires (3N + 3N**2) real workspace and +* (2 + 5N + 2N lg(N)) integer workspace. +* Since on many architectures DSTERF is much faster than any other +* algorithm for finding eigenvalues only, it is used here +* as the default. +* +* If COMPZ = 'N', use DSTERF to compute the eigenvalues. +* + IF( ICOMPZ.EQ.0 ) THEN + CALL DSTERF( N, D, E, INFO ) + RETURN + END IF +* +* If N is smaller than the minimum divide size (SMLSIZ+1), then +* solve the problem with another solver. +* + IF( N.LE.SMLSIZ ) THEN + IF( ICOMPZ.EQ.0 ) THEN + CALL DSTERF( N, D, E, INFO ) + RETURN + ELSE IF( ICOMPZ.EQ.2 ) THEN + CALL DSTEQR( 'I', N, D, E, Z, LDZ, WORK, INFO ) + RETURN + ELSE + CALL DSTEQR( 'V', N, D, E, Z, LDZ, WORK, INFO ) + RETURN + END IF + END IF +* +* If COMPZ = 'V', the Z matrix must be stored elsewhere for later +* use. +* + IF( ICOMPZ.EQ.1 ) THEN + STOREZ = 1 + N*N + ELSE + STOREZ = 1 + END IF +* + IF( ICOMPZ.EQ.2 ) THEN + CALL DLASET( 'Full', N, N, ZERO, ONE, Z, LDZ ) + END IF +* +* Scale. +* + ORGNRM = DLANST( 'M', N, D, E ) + IF( ORGNRM.EQ.ZERO ) + $ RETURN +* + EPS = DLAMCH( 'Epsilon' ) +* + START = 1 +* +* while ( START <= N ) +* + 10 CONTINUE + IF( START.LE.N ) THEN +* +* Let END be the position of the next subdiagonal entry such that +* E( END ) <= TINY or END = N if no such subdiagonal exists. The +* matrix identified by the elements between START and END +* constitutes an independent sub-problem. +* + END = START + 20 CONTINUE + IF( END.LT.N ) THEN + TINY = EPS*SQRT( ABS( D( END ) ) )*SQRT( ABS( D( END+1 ) ) ) + IF( ABS( E( END ) ).GT.TINY ) THEN + END = END + 1 + GO TO 20 + END IF + END IF +* +* (Sub) Problem determined. Compute its size and solve it. +* + M = END - START + 1 + IF( M.EQ.1 ) THEN + START = END + 1 + GO TO 10 + END IF + IF( M.GT.SMLSIZ ) THEN + INFO = SMLSIZ +* +* Scale. +* + ORGNRM = DLANST( 'M', M, D( START ), E( START ) ) + CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, M, 1, D( START ), M, + $ INFO ) + CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, M-1, 1, E( START ), + $ M-1, INFO ) +* + IF( ICOMPZ.EQ.1 ) THEN + DTRTRW = 1 + ELSE + DTRTRW = START + END IF + CALL DLAED0( ICOMPZ, N, M, D( START ), E( START ), + $ Z( DTRTRW, START ), LDZ, WORK( 1 ), N, + $ WORK( STOREZ ), IWORK, INFO ) + IF( INFO.NE.0 ) THEN + INFO = ( INFO / ( M+1 )+START-1 )*( N+1 ) + + $ MOD( INFO, ( M+1 ) ) + START - 1 + RETURN + END IF +* +* Scale back. +* + CALL DLASCL( 'G', 0, 0, ONE, ORGNRM, M, 1, D( START ), M, + $ INFO ) +* + ELSE + IF( ICOMPZ.EQ.1 ) THEN +* +* Since QR won't update a Z matrix which is larger than the +* length of D, we must solve the sub-problem in a workspace and +* then multiply back into Z. +* + CALL DSTEQR( 'I', M, D( START ), E( START ), WORK, M, + $ WORK( M*M+1 ), INFO ) + CALL DLACPY( 'A', N, M, Z( 1, START ), LDZ, + $ WORK( STOREZ ), N ) + CALL DGEMM( 'N', 'N', N, M, M, ONE, WORK( STOREZ ), LDZ, + $ WORK, M, ZERO, Z( 1, START ), LDZ ) + ELSE IF( ICOMPZ.EQ.2 ) THEN + CALL DSTEQR( 'I', M, D( START ), E( START ), + $ Z( START, START ), LDZ, WORK, INFO ) + ELSE + CALL DSTERF( M, D( START ), E( START ), INFO ) + END IF + IF( INFO.NE.0 ) THEN + INFO = START*( N+1 ) + END + RETURN + END IF + END IF +* + START = END + 1 + GO TO 10 + END IF +* +* endwhile +* +* If the problem split any number of times, then the eigenvalues +* will not be properly ordered. Here we permute the eigenvalues +* (and the associated eigenvectors) into ascending order. +* + IF( M.NE.N ) THEN + IF( ICOMPZ.EQ.0 ) THEN +* +* Use Quick Sort +* + CALL DLASRT( 'I', N, D, INFO ) +* + ELSE +* +* Use Selection Sort to minimize swaps of eigenvectors +* + DO 40 II = 2, N + I = II - 1 + K = I + P = D( I ) + DO 30 J = II, N + IF( D( J ).LT.P ) THEN + K = J + P = D( J ) + END IF + 30 CONTINUE + IF( K.NE.I ) THEN + D( K ) = D( I ) + D( I ) = P + CALL DSWAP( N, Z( 1, I ), 1, Z( 1, K ), 1 ) + END IF + 40 CONTINUE + END IF + END IF +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of DSTEDC +* + END diff --git a/costa/native/external/lapack/dstegr.f b/costa/native/external/lapack/dstegr.f new file mode 100644 index 000000000..2f2cd78b5 --- /dev/null +++ b/costa/native/external/lapack/dstegr.f @@ -0,0 +1,400 @@ + SUBROUTINE DSTEGR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, ABSTOL, + $ M, W, Z, LDZ, ISUPPZ, WORK, LWORK, IWORK, + $ LIWORK, INFO ) +* +* -- LAPACK computational routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE + INTEGER IL, INFO, IU, LDZ, LIWORK, LWORK, M, N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER ISUPPZ( * ), IWORK( * ) + DOUBLE PRECISION D( * ), E( * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* DSTEGR computes selected eigenvalues and, optionally, eigenvectors +* of a real symmetric tridiagonal matrix T. Eigenvalues and +* eigenvectors can be selected by specifying either a range of values +* or a range of indices for the desired eigenvalues. The eigenvalues +* are computed by the dqds algorithm, while orthogonal eigenvectors are +* computed from various ``good'' L D L^T representations (also known as +* Relatively Robust Representations). Gram-Schmidt orthogonalization is +* avoided as far as possible. More specifically, the various steps of +* the algorithm are as follows. For the i-th unreduced block of T, +* (a) Compute T - sigma_i = L_i D_i L_i^T, such that L_i D_i L_i^T +* is a relatively robust representation, +* (b) Compute the eigenvalues, lambda_j, of L_i D_i L_i^T to high +* relative accuracy by the dqds algorithm, +* (c) If there is a cluster of close eigenvalues, "choose" sigma_i +* close to the cluster, and go to step (a), +* (d) Given the approximate eigenvalue lambda_j of L_i D_i L_i^T, +* compute the corresponding eigenvector by forming a +* rank-revealing twisted factorization. +* The desired accuracy of the output can be specified by the input +* parameter ABSTOL. +* +* For more details, see "A new O(n^2) algorithm for the symmetric +* tridiagonal eigenvalue/eigenvector problem", by Inderjit Dhillon, +* Computer Science Division Technical Report No. UCB/CSD-97-971, +* UC Berkeley, May 1997. +* +* Note 1 : Currently DSTEGR is only set up to find ALL the n +* eigenvalues and eigenvectors of T in O(n^2) time +* Note 2 : Currently the routine DSTEIN is called when an appropriate +* sigma_i cannot be chosen in step (c) above. DSTEIN invokes modified +* Gram-Schmidt when eigenvalues are close. +* Note 3 : DSTEGR works only on machines which follow ieee-754 +* floating-point standard in their handling of infinities and NaNs. +* Normal execution of DSTEGR may create NaNs and infinities and hence +* may abort due to a floating point exception in environments which +* do not conform to the ieee standard. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* RANGE (input) CHARACTER*1 +* = 'A': all eigenvalues will be found. +* = 'V': all eigenvalues in the half-open interval (VL,VU] +* will be found. +* = 'I': the IL-th through IU-th eigenvalues will be found. +********** Only RANGE = 'A' is currently supported ********************* +* +* N (input) INTEGER +* The order of the matrix. N >= 0. +* +* D (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the n diagonal elements of the tridiagonal matrix +* T. On exit, D is overwritten. +* +* E (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the (n-1) subdiagonal elements of the tridiagonal +* matrix T in elements 1 to N-1 of E; E(N) need not be set. +* On exit, E is overwritten. +* +* VL (input) DOUBLE PRECISION +* VU (input) DOUBLE PRECISION +* If RANGE='V', the lower and upper bounds of the interval to +* be searched for eigenvalues. VL < VU. +* Not referenced if RANGE = 'A' or 'I'. +* +* IL (input) INTEGER +* IU (input) INTEGER +* If RANGE='I', the indices (in ascending order) of the +* smallest and largest eigenvalues to be returned. +* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +* Not referenced if RANGE = 'A' or 'V'. +* +* ABSTOL (input) DOUBLE PRECISION +* The absolute error tolerance for the +* eigenvalues/eigenvectors. IF JOBZ = 'V', the eigenvalues and +* eigenvectors output have residual norms bounded by ABSTOL, +* and the dot products between different eigenvectors are +* bounded by ABSTOL. If ABSTOL is less than N*EPS*|T|, then +* N*EPS*|T| will be used in its place, where EPS is the +* machine precision and |T| is the 1-norm of the tridiagonal +* matrix. The eigenvalues are computed to an accuracy of +* EPS*|T| irrespective of ABSTOL. If high relative accuracy +* is important, set ABSTOL to DLAMCH( 'Safe minimum' ). +* See Barlow and Demmel "Computing Accurate Eigensystems of +* Scaled Diagonally Dominant Matrices", LAPACK Working Note #7 +* for a discussion of which matrices define their eigenvalues +* to high relative accuracy. +* +* M (output) INTEGER +* The total number of eigenvalues found. 0 <= M <= N. +* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +* +* W (output) DOUBLE PRECISION array, dimension (N) +* The first M elements contain the selected eigenvalues in +* ascending order. +* +* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) ) +* If JOBZ = 'V', then if INFO = 0, the first M columns of Z +* contain the orthonormal eigenvectors of the matrix T +* corresponding to the selected eigenvalues, with the i-th +* column of Z holding the eigenvector associated with W(i). +* If JOBZ = 'N', then Z is not referenced. +* Note: the user must ensure that at least max(1,M) columns are +* supplied in the array Z; if RANGE = 'V', the exact value of M +* is not known in advance and an upper bound must be used. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* ISUPPZ (output) INTEGER ARRAY, dimension ( 2*max(1,M) ) +* The support of the eigenvectors in Z, i.e., the indices +* indicating the nonzero elements in Z. The i-th eigenvector +* is nonzero only in elements ISUPPZ( 2*i-1 ) through +* ISUPPZ( 2*i ). +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal +* (and minimal) LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,18*N) +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array IWORK. LIWORK >= max(1,10*N) +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = 1, internal error in DLARRE, +* if INFO = 2, internal error in DLARRV. +* +* Further Details +* =============== +* +* Based on contributions by +* Inderjit Dhillon, IBM Almaden, USA +* Osni Marques, LBNL/NERSC, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LQUERY, VALEIG, WANTZ + INTEGER I, IBEGIN, IEND, IINDBL, IINDWK, IINFO, IINSPL, + $ INDGRS, INDWOF, INDWRK, ITMP, J, JJ, LIWMIN, + $ LWMIN, NSPLIT + DOUBLE PRECISION BIGNUM, EPS, RMAX, RMIN, SAFMIN, SCALE, SMLNUM, + $ THRESH, TMP, TNRM, TOL +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANST + EXTERNAL LSAME, DLAMCH, DLANST +* .. +* .. External Subroutines .. + EXTERNAL DLARRE, DLARRV, DLASET, DSCAL, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + LQUERY = ( ( LWORK.EQ.-1 ) .OR. ( LIWORK.EQ.-1 ) ) + LWMIN = 18*N + LIWMIN = 10*N +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 +* +* The following two lines need to be removed once the +* RANGE = 'V' and RANGE = 'I' options are provided. +* + ELSE IF( VALEIG .OR. INDEIG ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN + INFO = -7 + ELSE IF( INDEIG .AND. IL.LT.1 ) THEN + INFO = -8 +* The following change should be made in DSTEVX also, otherwise +* IL can be specified as N+1 and IU as N. +* ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) THEN + ELSE IF( INDEIG .AND. ( IU.LT.IL .OR. IU.GT.N ) ) THEN + INFO = -9 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -14 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -17 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -19 + END IF + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSTEGR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = D( 1 ) + ELSE + IF( VL.LT.D( 1 ) .AND. VU.GE.D( 1 ) ) THEN + M = 1 + W( 1 ) = D( 1 ) + END IF + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + SCALE = ONE + TNRM = DLANST( 'M', N, D, E ) + IF( TNRM.GT.ZERO .AND. TNRM.LT.RMIN ) THEN + SCALE = RMIN / TNRM + ELSE IF( TNRM.GT.RMAX ) THEN + SCALE = RMAX / TNRM + END IF + IF( SCALE.NE.ONE ) THEN + CALL DSCAL( N, SCALE, D, 1 ) + CALL DSCAL( N-1, SCALE, E, 1 ) + TNRM = TNRM*SCALE + END IF + INDGRS = 1 + INDWOF = 2*N + 1 + INDWRK = 3*N + 1 +* + IINSPL = 1 + IINDBL = N + 1 + IINDWK = 2*N + 1 +* + CALL DLASET( 'Full', N, N, ZERO, ZERO, Z, LDZ ) +* +* Compute the desired eigenvalues of the tridiagonal after splitting +* into smaller subblocks if the corresponding of-diagonal elements +* are small +* + THRESH = EPS*TNRM + CALL DLARRE( N, D, E, THRESH, NSPLIT, IWORK( IINSPL ), M, W, + $ WORK( INDWOF ), WORK( INDGRS ), WORK( INDWRK ), + $ IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = 1 + RETURN + END IF +* + IF( WANTZ ) THEN +* +* Compute the desired eigenvectors corresponding to the computed +* eigenvalues +* + TOL = MAX( ABSTOL, DBLE( N )*THRESH ) + IBEGIN = 1 + DO 20 I = 1, NSPLIT + IEND = IWORK( IINSPL+I-1 ) + DO 10 J = IBEGIN, IEND + IWORK( IINDBL+J-1 ) = I + 10 CONTINUE + IBEGIN = IEND + 1 + 20 CONTINUE +* + CALL DLARRV( N, D, E, IWORK( IINSPL ), M, W, IWORK( IINDBL ), + $ WORK( INDGRS ), TOL, Z, LDZ, ISUPPZ, + $ WORK( INDWRK ), IWORK( IINDWK ), IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = 2 + RETURN + END IF +* + END IF +* + IBEGIN = 1 + DO 40 I = 1, NSPLIT + IEND = IWORK( IINSPL+I-1 ) + DO 30 J = IBEGIN, IEND + W( J ) = W( J ) + WORK( INDWOF+I-1 ) + 30 CONTINUE + IBEGIN = IEND + 1 + 40 CONTINUE +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( SCALE.NE.ONE ) THEN + CALL DSCAL( M, ONE / SCALE, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( NSPLIT.GT.1 ) THEN + DO 60 J = 1, M - 1 + I = 0 + TMP = W( J ) + DO 50 JJ = J + 1, M + IF( W( JJ ).LT.TMP ) THEN + I = JJ + TMP = W( JJ ) + END IF + 50 CONTINUE + IF( I.NE.0 ) THEN + W( I ) = W( J ) + W( J ) = TMP + IF( WANTZ ) THEN + CALL DSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + ITMP = ISUPPZ( 2*I-1 ) + ISUPPZ( 2*I-1 ) = ISUPPZ( 2*J-1 ) + ISUPPZ( 2*J-1 ) = ITMP + ITMP = ISUPPZ( 2*I ) + ISUPPZ( 2*I ) = ISUPPZ( 2*J ) + ISUPPZ( 2*J ) = ITMP + END IF + END IF + 60 CONTINUE + END IF +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of DSTEGR +* + END diff --git a/costa/native/external/lapack/dstein.f b/costa/native/external/lapack/dstein.f new file mode 100644 index 000000000..48b5fd285 --- /dev/null +++ b/costa/native/external/lapack/dstein.f @@ -0,0 +1,362 @@ + SUBROUTINE DSTEIN( N, D, E, M, W, IBLOCK, ISPLIT, Z, LDZ, WORK, + $ IWORK, IFAIL, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, LDZ, M, N +* .. +* .. Array Arguments .. + INTEGER IBLOCK( * ), IFAIL( * ), ISPLIT( * ), + $ IWORK( * ) + DOUBLE PRECISION D( * ), E( * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* DSTEIN computes the eigenvectors of a real symmetric tridiagonal +* matrix T corresponding to specified eigenvalues, using inverse +* iteration. +* +* The maximum number of iterations allowed for each eigenvector is +* specified by an internal parameter MAXITS (currently set to 5). +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix. N >= 0. +* +* D (input) DOUBLE PRECISION array, dimension (N) +* The n diagonal elements of the tridiagonal matrix T. +* +* E (input) DOUBLE PRECISION array, dimension (N) +* The (n-1) subdiagonal elements of the tridiagonal matrix +* T, in elements 1 to N-1. E(N) need not be set. +* +* M (input) INTEGER +* The number of eigenvectors to be found. 0 <= M <= N. +* +* W (input) DOUBLE PRECISION array, dimension (N) +* The first M elements of W contain the eigenvalues for +* which eigenvectors are to be computed. The eigenvalues +* should be grouped by split-off block and ordered from +* smallest to largest within the block. ( The output array +* W from DSTEBZ with ORDER = 'B' is expected here. ) +* +* IBLOCK (input) INTEGER array, dimension (N) +* The submatrix indices associated with the corresponding +* eigenvalues in W; IBLOCK(i)=1 if eigenvalue W(i) belongs to +* the first submatrix from the top, =2 if W(i) belongs to +* the second submatrix, etc. ( The output array IBLOCK +* from DSTEBZ is expected here. ) +* +* ISPLIT (input) INTEGER array, dimension (N) +* The splitting points, at which T breaks up into submatrices. +* The first submatrix consists of rows/columns 1 to +* ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 +* through ISPLIT( 2 ), etc. +* ( The output array ISPLIT from DSTEBZ is expected here. ) +* +* Z (output) DOUBLE PRECISION array, dimension (LDZ, M) +* The computed eigenvectors. The eigenvector associated +* with the eigenvalue W(i) is stored in the i-th column of +* Z. Any vector which fails to converge is set to its current +* iterate after MAXITS iterations. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= max(1,N). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (5*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* IFAIL (output) INTEGER array, dimension (M) +* On normal exit, all elements of IFAIL are zero. +* If one or more eigenvectors fail to converge after +* MAXITS iterations, then their indices are stored in +* array IFAIL. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, then i eigenvectors failed to converge +* in MAXITS iterations. Their indices are stored in +* array IFAIL. +* +* Internal Parameters +* =================== +* +* MAXITS INTEGER, default = 5 +* The maximum number of iterations performed. +* +* EXTRA INTEGER, default = 2 +* The number of iterations performed after norm growth +* criterion is satisfied, should be at least 1. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TEN, ODM3, ODM1 + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TEN = 1.0D+1, + $ ODM3 = 1.0D-3, ODM1 = 1.0D-1 ) + INTEGER MAXITS, EXTRA + PARAMETER ( MAXITS = 5, EXTRA = 2 ) +* .. +* .. Local Scalars .. + INTEGER B1, BLKSIZ, BN, GPIND, I, IINFO, INDRV1, + $ INDRV2, INDRV3, INDRV4, INDRV5, ITS, J, J1, + $ JBLK, JMAX, NBLK, NRMCHK + DOUBLE PRECISION DTPCRT, EPS, EPS1, NRM, ONENRM, ORTOL, PERTOL, + $ SCL, SEP, TOL, XJ, XJM, ZTR +* .. +* .. Local Arrays .. + INTEGER ISEED( 4 ) +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DASUM, DDOT, DLAMCH, DNRM2 + EXTERNAL IDAMAX, DASUM, DDOT, DLAMCH, DNRM2 +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DLAGTF, DLAGTS, DLARNV, DSCAL, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + DO 10 I = 1, M + IFAIL( I ) = 0 + 10 CONTINUE +* + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( M.LT.0 .OR. M.GT.N ) THEN + INFO = -4 + ELSE IF( LDZ.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE + DO 20 J = 2, M + IF( IBLOCK( J ).LT.IBLOCK( J-1 ) ) THEN + INFO = -6 + GO TO 30 + END IF + IF( IBLOCK( J ).EQ.IBLOCK( J-1 ) .AND. W( J ).LT.W( J-1 ) ) + $ THEN + INFO = -5 + GO TO 30 + END IF + 20 CONTINUE + 30 CONTINUE + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSTEIN', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. M.EQ.0 ) THEN + RETURN + ELSE IF( N.EQ.1 ) THEN + Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + EPS = DLAMCH( 'Precision' ) +* +* Initialize seed for random number generator DLARNV. +* + DO 40 I = 1, 4 + ISEED( I ) = 1 + 40 CONTINUE +* +* Initialize pointers. +* + INDRV1 = 0 + INDRV2 = INDRV1 + N + INDRV3 = INDRV2 + N + INDRV4 = INDRV3 + N + INDRV5 = INDRV4 + N +* +* Compute eigenvectors of matrix blocks. +* + J1 = 1 + DO 160 NBLK = 1, IBLOCK( M ) +* +* Find starting and ending indices of block nblk. +* + IF( NBLK.EQ.1 ) THEN + B1 = 1 + ELSE + B1 = ISPLIT( NBLK-1 ) + 1 + END IF + BN = ISPLIT( NBLK ) + BLKSIZ = BN - B1 + 1 + IF( BLKSIZ.EQ.1 ) + $ GO TO 60 + GPIND = B1 +* +* Compute reorthogonalization criterion and stopping criterion. +* + ONENRM = ABS( D( B1 ) ) + ABS( E( B1 ) ) + ONENRM = MAX( ONENRM, ABS( D( BN ) )+ABS( E( BN-1 ) ) ) + DO 50 I = B1 + 1, BN - 1 + ONENRM = MAX( ONENRM, ABS( D( I ) )+ABS( E( I-1 ) )+ + $ ABS( E( I ) ) ) + 50 CONTINUE + ORTOL = ODM3*ONENRM +* + DTPCRT = SQRT( ODM1 / BLKSIZ ) +* +* Loop through eigenvalues of block nblk. +* + 60 CONTINUE + JBLK = 0 + DO 150 J = J1, M + IF( IBLOCK( J ).NE.NBLK ) THEN + J1 = J + GO TO 160 + END IF + JBLK = JBLK + 1 + XJ = W( J ) +* +* Skip all the work if the block size is one. +* + IF( BLKSIZ.EQ.1 ) THEN + WORK( INDRV1+1 ) = ONE + GO TO 120 + END IF +* +* If eigenvalues j and j-1 are too close, add a relatively +* small perturbation. +* + IF( JBLK.GT.1 ) THEN + EPS1 = ABS( EPS*XJ ) + PERTOL = TEN*EPS1 + SEP = XJ - XJM + IF( SEP.LT.PERTOL ) + $ XJ = XJM + PERTOL + END IF +* + ITS = 0 + NRMCHK = 0 +* +* Get random starting vector. +* + CALL DLARNV( 2, ISEED, BLKSIZ, WORK( INDRV1+1 ) ) +* +* Copy the matrix T so it won't be destroyed in factorization. +* + CALL DCOPY( BLKSIZ, D( B1 ), 1, WORK( INDRV4+1 ), 1 ) + CALL DCOPY( BLKSIZ-1, E( B1 ), 1, WORK( INDRV2+2 ), 1 ) + CALL DCOPY( BLKSIZ-1, E( B1 ), 1, WORK( INDRV3+1 ), 1 ) +* +* Compute LU factors with partial pivoting ( PT = LU ) +* + TOL = ZERO + CALL DLAGTF( BLKSIZ, WORK( INDRV4+1 ), XJ, WORK( INDRV2+2 ), + $ WORK( INDRV3+1 ), TOL, WORK( INDRV5+1 ), IWORK, + $ IINFO ) +* +* Update iteration count. +* + 70 CONTINUE + ITS = ITS + 1 + IF( ITS.GT.MAXITS ) + $ GO TO 100 +* +* Normalize and scale the righthand side vector Pb. +* + SCL = BLKSIZ*ONENRM*MAX( EPS, + $ ABS( WORK( INDRV4+BLKSIZ ) ) ) / + $ DASUM( BLKSIZ, WORK( INDRV1+1 ), 1 ) + CALL DSCAL( BLKSIZ, SCL, WORK( INDRV1+1 ), 1 ) +* +* Solve the system LU = Pb. +* + CALL DLAGTS( -1, BLKSIZ, WORK( INDRV4+1 ), WORK( INDRV2+2 ), + $ WORK( INDRV3+1 ), WORK( INDRV5+1 ), IWORK, + $ WORK( INDRV1+1 ), TOL, IINFO ) +* +* Reorthogonalize by modified Gram-Schmidt if eigenvalues are +* close enough. +* + IF( JBLK.EQ.1 ) + $ GO TO 90 + IF( ABS( XJ-XJM ).GT.ORTOL ) + $ GPIND = J + IF( GPIND.NE.J ) THEN + DO 80 I = GPIND, J - 1 + ZTR = -DDOT( BLKSIZ, WORK( INDRV1+1 ), 1, Z( B1, I ), + $ 1 ) + CALL DAXPY( BLKSIZ, ZTR, Z( B1, I ), 1, + $ WORK( INDRV1+1 ), 1 ) + 80 CONTINUE + END IF +* +* Check the infinity norm of the iterate. +* + 90 CONTINUE + JMAX = IDAMAX( BLKSIZ, WORK( INDRV1+1 ), 1 ) + NRM = ABS( WORK( INDRV1+JMAX ) ) +* +* Continue for additional iterations after norm reaches +* stopping criterion. +* + IF( NRM.LT.DTPCRT ) + $ GO TO 70 + NRMCHK = NRMCHK + 1 + IF( NRMCHK.LT.EXTRA+1 ) + $ GO TO 70 +* + GO TO 110 +* +* If stopping criterion was not satisfied, update info and +* store eigenvector number in array ifail. +* + 100 CONTINUE + INFO = INFO + 1 + IFAIL( INFO ) = J +* +* Accept iterate as jth eigenvector. +* + 110 CONTINUE + SCL = ONE / DNRM2( BLKSIZ, WORK( INDRV1+1 ), 1 ) + JMAX = IDAMAX( BLKSIZ, WORK( INDRV1+1 ), 1 ) + IF( WORK( INDRV1+JMAX ).LT.ZERO ) + $ SCL = -SCL + CALL DSCAL( BLKSIZ, SCL, WORK( INDRV1+1 ), 1 ) + 120 CONTINUE + DO 130 I = 1, N + Z( I, J ) = ZERO + 130 CONTINUE + DO 140 I = 1, BLKSIZ + Z( B1+I-1, J ) = WORK( INDRV1+I ) + 140 CONTINUE +* +* Save the shift to check eigenvalue spacing at next +* iteration. +* + XJM = XJ +* + 150 CONTINUE + 160 CONTINUE +* + RETURN +* +* End of DSTEIN +* + END diff --git a/costa/native/external/lapack/dsteqr.f b/costa/native/external/lapack/dsteqr.f new file mode 100644 index 000000000..20faca125 --- /dev/null +++ b/costa/native/external/lapack/dsteqr.f @@ -0,0 +1,501 @@ + SUBROUTINE DSTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER COMPZ + INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* DSTEQR computes all eigenvalues and, optionally, eigenvectors of a +* symmetric tridiagonal matrix using the implicit QL or QR method. +* The eigenvectors of a full or band symmetric matrix can also be found +* if DSYTRD or DSPTRD or DSBTRD has been used to reduce this matrix to +* tridiagonal form. +* +* Arguments +* ========= +* +* COMPZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only. +* = 'V': Compute eigenvalues and eigenvectors of the original +* symmetric matrix. On entry, Z must contain the +* orthogonal matrix used to reduce the original matrix +* to tridiagonal form. +* = 'I': Compute eigenvalues and eigenvectors of the +* tridiagonal matrix. Z is initialized to the identity +* matrix. +* +* N (input) INTEGER +* The order of the matrix. N >= 0. +* +* D (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the diagonal elements of the tridiagonal matrix. +* On exit, if INFO = 0, the eigenvalues in ascending order. +* +* E (input/output) DOUBLE PRECISION array, dimension (N-1) +* On entry, the (n-1) subdiagonal elements of the tridiagonal +* matrix. +* On exit, E has been destroyed. +* +* Z (input/output) DOUBLE PRECISION array, dimension (LDZ, N) +* On entry, if COMPZ = 'V', then Z contains the orthogonal +* matrix used in the reduction to tridiagonal form. +* On exit, if INFO = 0, then if COMPZ = 'V', Z contains the +* orthonormal eigenvectors of the original symmetric matrix, +* and if COMPZ = 'I', Z contains the orthonormal eigenvectors +* of the symmetric tridiagonal matrix. +* If COMPZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* eigenvectors are desired, then LDZ >= max(1,N). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (max(1,2*N-2)) +* If COMPZ = 'N', then WORK is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: the algorithm has failed to find all the eigenvalues in +* a total of 30*N iterations; if INFO = i, then i +* elements of E have not converged to zero; on exit, D +* and E contain the elements of a symmetric tridiagonal +* matrix which is orthogonally similar to the original +* matrix. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO, THREE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0, + $ THREE = 3.0D0 ) + INTEGER MAXIT + PARAMETER ( MAXIT = 30 ) +* .. +* .. Local Scalars .. + INTEGER I, ICOMPZ, II, ISCALE, J, JTOT, K, L, L1, LEND, + $ LENDM1, LENDP1, LENDSV, LM1, LSV, M, MM, MM1, + $ NM1, NMAXIT + DOUBLE PRECISION ANORM, B, C, EPS, EPS2, F, G, P, R, RT1, RT2, + $ S, SAFMAX, SAFMIN, SSFMAX, SSFMIN, TST +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANST, DLAPY2 + EXTERNAL LSAME, DLAMCH, DLANST, DLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL DLAE2, DLAEV2, DLARTG, DLASCL, DLASET, DLASR, + $ DLASRT, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ICOMPZ = 0 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ICOMPZ = 2 + ELSE + ICOMPZ = -1 + END IF + IF( ICOMPZ.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ( LDZ.LT.1 ) .OR. ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1, + $ N ) ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSTEQR', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ICOMPZ.EQ.2 ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Determine the unit roundoff and over/underflow thresholds. +* + EPS = DLAMCH( 'E' ) + EPS2 = EPS**2 + SAFMIN = DLAMCH( 'S' ) + SAFMAX = ONE / SAFMIN + SSFMAX = SQRT( SAFMAX ) / THREE + SSFMIN = SQRT( SAFMIN ) / EPS2 +* +* Compute the eigenvalues and eigenvectors of the tridiagonal +* matrix. +* + IF( ICOMPZ.EQ.2 ) + $ CALL DLASET( 'Full', N, N, ZERO, ONE, Z, LDZ ) +* + NMAXIT = N*MAXIT + JTOT = 0 +* +* Determine where the matrix splits and choose QL or QR iteration +* for each block, according to whether top or bottom diagonal +* element is smaller. +* + L1 = 1 + NM1 = N - 1 +* + 10 CONTINUE + IF( L1.GT.N ) + $ GO TO 160 + IF( L1.GT.1 ) + $ E( L1-1 ) = ZERO + IF( L1.LE.NM1 ) THEN + DO 20 M = L1, NM1 + TST = ABS( E( M ) ) + IF( TST.EQ.ZERO ) + $ GO TO 30 + IF( TST.LE.( SQRT( ABS( D( M ) ) )*SQRT( ABS( D( M+ + $ 1 ) ) ) )*EPS ) THEN + E( M ) = ZERO + GO TO 30 + END IF + 20 CONTINUE + END IF + M = N +* + 30 CONTINUE + L = L1 + LSV = L + LEND = M + LENDSV = LEND + L1 = M + 1 + IF( LEND.EQ.L ) + $ GO TO 10 +* +* Scale submatrix in rows and columns L to LEND +* + ANORM = DLANST( 'I', LEND-L+1, D( L ), E( L ) ) + ISCALE = 0 + IF( ANORM.EQ.ZERO ) + $ GO TO 10 + IF( ANORM.GT.SSFMAX ) THEN + ISCALE = 1 + CALL DLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L+1, 1, D( L ), N, + $ INFO ) + CALL DLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L, 1, E( L ), N, + $ INFO ) + ELSE IF( ANORM.LT.SSFMIN ) THEN + ISCALE = 2 + CALL DLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L+1, 1, D( L ), N, + $ INFO ) + CALL DLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L, 1, E( L ), N, + $ INFO ) + END IF +* +* Choose between QL and QR iteration +* + IF( ABS( D( LEND ) ).LT.ABS( D( L ) ) ) THEN + LEND = LSV + L = LENDSV + END IF +* + IF( LEND.GT.L ) THEN +* +* QL Iteration +* +* Look for small subdiagonal element. +* + 40 CONTINUE + IF( L.NE.LEND ) THEN + LENDM1 = LEND - 1 + DO 50 M = L, LENDM1 + TST = ABS( E( M ) )**2 + IF( TST.LE.( EPS2*ABS( D( M ) ) )*ABS( D( M+1 ) )+ + $ SAFMIN )GO TO 60 + 50 CONTINUE + END IF +* + M = LEND +* + 60 CONTINUE + IF( M.LT.LEND ) + $ E( M ) = ZERO + P = D( L ) + IF( M.EQ.L ) + $ GO TO 80 +* +* If remaining matrix is 2-by-2, use DLAE2 or SLAEV2 +* to compute its eigensystem. +* + IF( M.EQ.L+1 ) THEN + IF( ICOMPZ.GT.0 ) THEN + CALL DLAEV2( D( L ), E( L ), D( L+1 ), RT1, RT2, C, S ) + WORK( L ) = C + WORK( N-1+L ) = S + CALL DLASR( 'R', 'V', 'B', N, 2, WORK( L ), + $ WORK( N-1+L ), Z( 1, L ), LDZ ) + ELSE + CALL DLAE2( D( L ), E( L ), D( L+1 ), RT1, RT2 ) + END IF + D( L ) = RT1 + D( L+1 ) = RT2 + E( L ) = ZERO + L = L + 2 + IF( L.LE.LEND ) + $ GO TO 40 + GO TO 140 + END IF +* + IF( JTOT.EQ.NMAXIT ) + $ GO TO 140 + JTOT = JTOT + 1 +* +* Form shift. +* + G = ( D( L+1 )-P ) / ( TWO*E( L ) ) + R = DLAPY2( G, ONE ) + G = D( M ) - P + ( E( L ) / ( G+SIGN( R, G ) ) ) +* + S = ONE + C = ONE + P = ZERO +* +* Inner loop +* + MM1 = M - 1 + DO 70 I = MM1, L, -1 + F = S*E( I ) + B = C*E( I ) + CALL DLARTG( G, F, C, S, R ) + IF( I.NE.M-1 ) + $ E( I+1 ) = R + G = D( I+1 ) - P + R = ( D( I )-G )*S + TWO*C*B + P = S*R + D( I+1 ) = G + P + G = C*R - B +* +* If eigenvectors are desired, then save rotations. +* + IF( ICOMPZ.GT.0 ) THEN + WORK( I ) = C + WORK( N-1+I ) = -S + END IF +* + 70 CONTINUE +* +* If eigenvectors are desired, then apply saved rotations. +* + IF( ICOMPZ.GT.0 ) THEN + MM = M - L + 1 + CALL DLASR( 'R', 'V', 'B', N, MM, WORK( L ), WORK( N-1+L ), + $ Z( 1, L ), LDZ ) + END IF +* + D( L ) = D( L ) - P + E( L ) = G + GO TO 40 +* +* Eigenvalue found. +* + 80 CONTINUE + D( L ) = P +* + L = L + 1 + IF( L.LE.LEND ) + $ GO TO 40 + GO TO 140 +* + ELSE +* +* QR Iteration +* +* Look for small superdiagonal element. +* + 90 CONTINUE + IF( L.NE.LEND ) THEN + LENDP1 = LEND + 1 + DO 100 M = L, LENDP1, -1 + TST = ABS( E( M-1 ) )**2 + IF( TST.LE.( EPS2*ABS( D( M ) ) )*ABS( D( M-1 ) )+ + $ SAFMIN )GO TO 110 + 100 CONTINUE + END IF +* + M = LEND +* + 110 CONTINUE + IF( M.GT.LEND ) + $ E( M-1 ) = ZERO + P = D( L ) + IF( M.EQ.L ) + $ GO TO 130 +* +* If remaining matrix is 2-by-2, use DLAE2 or SLAEV2 +* to compute its eigensystem. +* + IF( M.EQ.L-1 ) THEN + IF( ICOMPZ.GT.0 ) THEN + CALL DLAEV2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2, C, S ) + WORK( M ) = C + WORK( N-1+M ) = S + CALL DLASR( 'R', 'V', 'F', N, 2, WORK( M ), + $ WORK( N-1+M ), Z( 1, L-1 ), LDZ ) + ELSE + CALL DLAE2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2 ) + END IF + D( L-1 ) = RT1 + D( L ) = RT2 + E( L-1 ) = ZERO + L = L - 2 + IF( L.GE.LEND ) + $ GO TO 90 + GO TO 140 + END IF +* + IF( JTOT.EQ.NMAXIT ) + $ GO TO 140 + JTOT = JTOT + 1 +* +* Form shift. +* + G = ( D( L-1 )-P ) / ( TWO*E( L-1 ) ) + R = DLAPY2( G, ONE ) + G = D( M ) - P + ( E( L-1 ) / ( G+SIGN( R, G ) ) ) +* + S = ONE + C = ONE + P = ZERO +* +* Inner loop +* + LM1 = L - 1 + DO 120 I = M, LM1 + F = S*E( I ) + B = C*E( I ) + CALL DLARTG( G, F, C, S, R ) + IF( I.NE.M ) + $ E( I-1 ) = R + G = D( I ) - P + R = ( D( I+1 )-G )*S + TWO*C*B + P = S*R + D( I ) = G + P + G = C*R - B +* +* If eigenvectors are desired, then save rotations. +* + IF( ICOMPZ.GT.0 ) THEN + WORK( I ) = C + WORK( N-1+I ) = S + END IF +* + 120 CONTINUE +* +* If eigenvectors are desired, then apply saved rotations. +* + IF( ICOMPZ.GT.0 ) THEN + MM = L - M + 1 + CALL DLASR( 'R', 'V', 'F', N, MM, WORK( M ), WORK( N-1+M ), + $ Z( 1, M ), LDZ ) + END IF +* + D( L ) = D( L ) - P + E( LM1 ) = G + GO TO 90 +* +* Eigenvalue found. +* + 130 CONTINUE + D( L ) = P +* + L = L - 1 + IF( L.GE.LEND ) + $ GO TO 90 + GO TO 140 +* + END IF +* +* Undo scaling if necessary +* + 140 CONTINUE + IF( ISCALE.EQ.1 ) THEN + CALL DLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV+1, 1, + $ D( LSV ), N, INFO ) + CALL DLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV, 1, E( LSV ), + $ N, INFO ) + ELSE IF( ISCALE.EQ.2 ) THEN + CALL DLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV+1, 1, + $ D( LSV ), N, INFO ) + CALL DLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV, 1, E( LSV ), + $ N, INFO ) + END IF +* +* Check for no convergence to an eigenvalue after a total +* of N*MAXIT iterations. +* + IF( JTOT.LT.NMAXIT ) + $ GO TO 10 + DO 150 I = 1, N - 1 + IF( E( I ).NE.ZERO ) + $ INFO = INFO + 1 + 150 CONTINUE + GO TO 190 +* +* Order eigenvalues and eigenvectors. +* + 160 CONTINUE + IF( ICOMPZ.EQ.0 ) THEN +* +* Use Quick Sort +* + CALL DLASRT( 'I', N, D, INFO ) +* + ELSE +* +* Use Selection Sort to minimize swaps of eigenvectors +* + DO 180 II = 2, N + I = II - 1 + K = I + P = D( I ) + DO 170 J = II, N + IF( D( J ).LT.P ) THEN + K = J + P = D( J ) + END IF + 170 CONTINUE + IF( K.NE.I ) THEN + D( K ) = D( I ) + D( I ) = P + CALL DSWAP( N, Z( 1, I ), 1, Z( 1, K ), 1 ) + END IF + 180 CONTINUE + END IF +* + 190 CONTINUE + RETURN +* +* End of DSTEQR +* + END diff --git a/costa/native/external/lapack/dsterf.f b/costa/native/external/lapack/dsterf.f new file mode 100644 index 000000000..91a32bd26 --- /dev/null +++ b/costa/native/external/lapack/dsterf.f @@ -0,0 +1,365 @@ + SUBROUTINE DSTERF( N, D, E, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ) +* .. +* +* Purpose +* ======= +* +* DSTERF computes all eigenvalues of a symmetric tridiagonal matrix +* using the Pal-Walker-Kahan variant of the QL or QR algorithm. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix. N >= 0. +* +* D (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the n diagonal elements of the tridiagonal matrix. +* On exit, if INFO = 0, the eigenvalues in ascending order. +* +* E (input/output) DOUBLE PRECISION array, dimension (N-1) +* On entry, the (n-1) subdiagonal elements of the tridiagonal +* matrix. +* On exit, E has been destroyed. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: the algorithm failed to find all of the eigenvalues in +* a total of 30*N iterations; if INFO = i, then i +* elements of E have not converged to zero. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO, THREE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0, + $ THREE = 3.0D0 ) + INTEGER MAXIT + PARAMETER ( MAXIT = 30 ) +* .. +* .. Local Scalars .. + INTEGER I, ISCALE, JTOT, L, L1, LEND, LENDSV, LSV, M, + $ NMAXIT + DOUBLE PRECISION ALPHA, ANORM, BB, C, EPS, EPS2, GAMMA, OLDC, + $ OLDGAM, P, R, RT1, RT2, RTE, S, SAFMAX, SAFMIN, + $ SIGMA, SSFMAX, SSFMIN +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, DLANST, DLAPY2 + EXTERNAL DLAMCH, DLANST, DLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL DLAE2, DLASCL, DLASRT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.LT.0 ) THEN + INFO = -1 + CALL XERBLA( 'DSTERF', -INFO ) + RETURN + END IF + IF( N.LE.1 ) + $ RETURN +* +* Determine the unit roundoff for this environment. +* + EPS = DLAMCH( 'E' ) + EPS2 = EPS**2 + SAFMIN = DLAMCH( 'S' ) + SAFMAX = ONE / SAFMIN + SSFMAX = SQRT( SAFMAX ) / THREE + SSFMIN = SQRT( SAFMIN ) / EPS2 +* +* Compute the eigenvalues of the tridiagonal matrix. +* + NMAXIT = N*MAXIT + SIGMA = ZERO + JTOT = 0 +* +* Determine where the matrix splits and choose QL or QR iteration +* for each block, according to whether top or bottom diagonal +* element is smaller. +* + L1 = 1 +* + 10 CONTINUE + IF( L1.GT.N ) + $ GO TO 170 + IF( L1.GT.1 ) + $ E( L1-1 ) = ZERO + DO 20 M = L1, N - 1 + IF( ABS( E( M ) ).LE.( SQRT( ABS( D( M ) ) )*SQRT( ABS( D( M+ + $ 1 ) ) ) )*EPS ) THEN + E( M ) = ZERO + GO TO 30 + END IF + 20 CONTINUE + M = N +* + 30 CONTINUE + L = L1 + LSV = L + LEND = M + LENDSV = LEND + L1 = M + 1 + IF( LEND.EQ.L ) + $ GO TO 10 +* +* Scale submatrix in rows and columns L to LEND +* + ANORM = DLANST( 'I', LEND-L+1, D( L ), E( L ) ) + ISCALE = 0 + IF( ANORM.GT.SSFMAX ) THEN + ISCALE = 1 + CALL DLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L+1, 1, D( L ), N, + $ INFO ) + CALL DLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L, 1, E( L ), N, + $ INFO ) + ELSE IF( ANORM.LT.SSFMIN ) THEN + ISCALE = 2 + CALL DLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L+1, 1, D( L ), N, + $ INFO ) + CALL DLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L, 1, E( L ), N, + $ INFO ) + END IF +* + DO 40 I = L, LEND - 1 + E( I ) = E( I )**2 + 40 CONTINUE +* +* Choose between QL and QR iteration +* + IF( ABS( D( LEND ) ).LT.ABS( D( L ) ) ) THEN + LEND = LSV + L = LENDSV + END IF +* + IF( LEND.GE.L ) THEN +* +* QL Iteration +* +* Look for small subdiagonal element. +* + 50 CONTINUE + IF( L.NE.LEND ) THEN + DO 60 M = L, LEND - 1 + IF( ABS( E( M ) ).LE.EPS2*ABS( D( M )*D( M+1 ) ) ) + $ GO TO 70 + 60 CONTINUE + END IF + M = LEND +* + 70 CONTINUE + IF( M.LT.LEND ) + $ E( M ) = ZERO + P = D( L ) + IF( M.EQ.L ) + $ GO TO 90 +* +* If remaining matrix is 2 by 2, use DLAE2 to compute its +* eigenvalues. +* + IF( M.EQ.L+1 ) THEN + RTE = SQRT( E( L ) ) + CALL DLAE2( D( L ), RTE, D( L+1 ), RT1, RT2 ) + D( L ) = RT1 + D( L+1 ) = RT2 + E( L ) = ZERO + L = L + 2 + IF( L.LE.LEND ) + $ GO TO 50 + GO TO 150 + END IF +* + IF( JTOT.EQ.NMAXIT ) + $ GO TO 150 + JTOT = JTOT + 1 +* +* Form shift. +* + RTE = SQRT( E( L ) ) + SIGMA = ( D( L+1 )-P ) / ( TWO*RTE ) + R = DLAPY2( SIGMA, ONE ) + SIGMA = P - ( RTE / ( SIGMA+SIGN( R, SIGMA ) ) ) +* + C = ONE + S = ZERO + GAMMA = D( M ) - SIGMA + P = GAMMA*GAMMA +* +* Inner loop +* + DO 80 I = M - 1, L, -1 + BB = E( I ) + R = P + BB + IF( I.NE.M-1 ) + $ E( I+1 ) = S*R + OLDC = C + C = P / R + S = BB / R + OLDGAM = GAMMA + ALPHA = D( I ) + GAMMA = C*( ALPHA-SIGMA ) - S*OLDGAM + D( I+1 ) = OLDGAM + ( ALPHA-GAMMA ) + IF( C.NE.ZERO ) THEN + P = ( GAMMA*GAMMA ) / C + ELSE + P = OLDC*BB + END IF + 80 CONTINUE +* + E( L ) = S*P + D( L ) = SIGMA + GAMMA + GO TO 50 +* +* Eigenvalue found. +* + 90 CONTINUE + D( L ) = P +* + L = L + 1 + IF( L.LE.LEND ) + $ GO TO 50 + GO TO 150 +* + ELSE +* +* QR Iteration +* +* Look for small superdiagonal element. +* + 100 CONTINUE + DO 110 M = L, LEND + 1, -1 + IF( ABS( E( M-1 ) ).LE.EPS2*ABS( D( M )*D( M-1 ) ) ) + $ GO TO 120 + 110 CONTINUE + M = LEND +* + 120 CONTINUE + IF( M.GT.LEND ) + $ E( M-1 ) = ZERO + P = D( L ) + IF( M.EQ.L ) + $ GO TO 140 +* +* If remaining matrix is 2 by 2, use DLAE2 to compute its +* eigenvalues. +* + IF( M.EQ.L-1 ) THEN + RTE = SQRT( E( L-1 ) ) + CALL DLAE2( D( L ), RTE, D( L-1 ), RT1, RT2 ) + D( L ) = RT1 + D( L-1 ) = RT2 + E( L-1 ) = ZERO + L = L - 2 + IF( L.GE.LEND ) + $ GO TO 100 + GO TO 150 + END IF +* + IF( JTOT.EQ.NMAXIT ) + $ GO TO 150 + JTOT = JTOT + 1 +* +* Form shift. +* + RTE = SQRT( E( L-1 ) ) + SIGMA = ( D( L-1 )-P ) / ( TWO*RTE ) + R = DLAPY2( SIGMA, ONE ) + SIGMA = P - ( RTE / ( SIGMA+SIGN( R, SIGMA ) ) ) +* + C = ONE + S = ZERO + GAMMA = D( M ) - SIGMA + P = GAMMA*GAMMA +* +* Inner loop +* + DO 130 I = M, L - 1 + BB = E( I ) + R = P + BB + IF( I.NE.M ) + $ E( I-1 ) = S*R + OLDC = C + C = P / R + S = BB / R + OLDGAM = GAMMA + ALPHA = D( I+1 ) + GAMMA = C*( ALPHA-SIGMA ) - S*OLDGAM + D( I ) = OLDGAM + ( ALPHA-GAMMA ) + IF( C.NE.ZERO ) THEN + P = ( GAMMA*GAMMA ) / C + ELSE + P = OLDC*BB + END IF + 130 CONTINUE +* + E( L-1 ) = S*P + D( L ) = SIGMA + GAMMA + GO TO 100 +* +* Eigenvalue found. +* + 140 CONTINUE + D( L ) = P +* + L = L - 1 + IF( L.GE.LEND ) + $ GO TO 100 + GO TO 150 +* + END IF +* +* Undo scaling if necessary +* + 150 CONTINUE + IF( ISCALE.EQ.1 ) + $ CALL DLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV+1, 1, + $ D( LSV ), N, INFO ) + IF( ISCALE.EQ.2 ) + $ CALL DLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV+1, 1, + $ D( LSV ), N, INFO ) +* +* Check for no convergence to an eigenvalue after a total +* of N*MAXIT iterations. +* + IF( JTOT.LT.NMAXIT ) + $ GO TO 10 + DO 160 I = 1, N - 1 + IF( E( I ).NE.ZERO ) + $ INFO = INFO + 1 + 160 CONTINUE + GO TO 180 +* +* Sort eigenvalues in increasing order. +* + 170 CONTINUE + CALL DLASRT( 'I', N, D, INFO ) +* + 180 CONTINUE + RETURN +* +* End of DSTERF +* + END diff --git a/costa/native/external/lapack/dstev.f b/costa/native/external/lapack/dstev.f new file mode 100644 index 000000000..5b7ed867f --- /dev/null +++ b/costa/native/external/lapack/dstev.f @@ -0,0 +1,165 @@ + SUBROUTINE DSTEV( JOBZ, N, D, E, Z, LDZ, WORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER JOBZ + INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* DSTEV computes all eigenvalues and, optionally, eigenvectors of a +* real symmetric tridiagonal matrix A. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* N (input) INTEGER +* The order of the matrix. N >= 0. +* +* D (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the n diagonal elements of the tridiagonal matrix +* A. +* On exit, if INFO = 0, the eigenvalues in ascending order. +* +* E (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the (n-1) subdiagonal elements of the tridiagonal +* matrix A, stored in elements 1 to N-1 of E; E(N) need not +* be set, but is used by the routine. +* On exit, the contents of E are destroyed. +* +* Z (output) DOUBLE PRECISION array, dimension (LDZ, N) +* If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +* eigenvectors of the matrix A, with the i-th column of Z +* holding the eigenvector associated with D(i). +* If JOBZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (max(1,2*N-2)) +* If JOBZ = 'N', WORK is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the algorithm failed to converge; i +* off-diagonal elements of E did not converge to zero. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL WANTZ + INTEGER IMAX, ISCALE + DOUBLE PRECISION BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, SMLNUM, + $ TNRM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANST + EXTERNAL LSAME, DLAMCH, DLANST +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSTEQR, DSTERF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -6 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSTEV ', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + TNRM = DLANST( 'M', N, D, E ) + IF( TNRM.GT.ZERO .AND. TNRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / TNRM + ELSE IF( TNRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / TNRM + END IF + IF( ISCALE.EQ.1 ) THEN + CALL DSCAL( N, SIGMA, D, 1 ) + CALL DSCAL( N-1, SIGMA, E( 1 ), 1 ) + END IF +* +* For eigenvalues only, call DSTERF. For eigenvalues and +* eigenvectors, call DSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, D, E, INFO ) + ELSE + CALL DSTEQR( 'I', N, D, E, Z, LDZ, WORK, INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, D, 1 ) + END IF +* + RETURN +* +* End of DSTEV +* + END diff --git a/costa/native/external/lapack/dstevd.f b/costa/native/external/lapack/dstevd.f new file mode 100644 index 000000000..1edcfb1b2 --- /dev/null +++ b/costa/native/external/lapack/dstevd.f @@ -0,0 +1,217 @@ + SUBROUTINE DSTEVD( JOBZ, N, D, E, Z, LDZ, WORK, LWORK, IWORK, + $ LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ + INTEGER INFO, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION D( * ), E( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* DSTEVD computes all eigenvalues and, optionally, eigenvectors of a +* real symmetric tridiagonal matrix. If eigenvectors are desired, it +* uses a divide and conquer algorithm. +* +* The divide and conquer algorithm makes very mild assumptions about +* floating point arithmetic. It will work on machines with a guard +* digit in add/subtract, or on those binary machines without guard +* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +* Cray-2. It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* N (input) INTEGER +* The order of the matrix. N >= 0. +* +* D (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the n diagonal elements of the tridiagonal matrix +* A. +* On exit, if INFO = 0, the eigenvalues in ascending order. +* +* E (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the (n-1) subdiagonal elements of the tridiagonal +* matrix A, stored in elements 1 to N-1 of E; E(N) need not +* be set, but is used by the routine. +* On exit, the contents of E are destroyed. +* +* Z (output) DOUBLE PRECISION array, dimension (LDZ, N) +* If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +* eigenvectors of the matrix A, with the i-th column of Z +* holding the eigenvector associated with D(i). +* If JOBZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace/output) DOUBLE PRECISION array, +* dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If JOBZ = 'N' or N <= 1 then LWORK must be at least 1. +* If JOBZ = 'V' and N > 1 then LWORK must be at least +* ( 1 + 4*N + N**2 ). +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array IWORK. +* If JOBZ = 'N' or N <= 1 then LIWORK must be at least 1. +* If JOBZ = 'V' and N > 1 then LIWORK must be at least 3+5*N. +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the algorithm failed to converge; i +* off-diagonal elements of E did not converge to zero. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WANTZ + INTEGER ISCALE, LIWMIN, LWMIN + DOUBLE PRECISION BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, SMLNUM, + $ TNRM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANST + EXTERNAL LSAME, DLAMCH, DLANST +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSTEDC, DSTERF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + LIWMIN = 1 + LWMIN = 1 + IF( N.GT.1 .AND. WANTZ ) THEN + LWMIN = 1 + 4*N + N**2 + LIWMIN = 3 + 5*N + END IF +* + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -6 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -8 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSTEVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + TNRM = DLANST( 'M', N, D, E ) + IF( TNRM.GT.ZERO .AND. TNRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / TNRM + ELSE IF( TNRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / TNRM + END IF + IF( ISCALE.EQ.1 ) THEN + CALL DSCAL( N, SIGMA, D, 1 ) + CALL DSCAL( N-1, SIGMA, E( 1 ), 1 ) + END IF +* +* For eigenvalues only, call DSTERF. For eigenvalues and +* eigenvectors, call DSTEDC. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, D, E, INFO ) + ELSE + CALL DSTEDC( 'I', N, D, E, Z, LDZ, WORK, LWORK, IWORK, LIWORK, + $ INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) + $ CALL DSCAL( N, ONE / SIGMA, D, 1 ) +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of DSTEVD +* + END diff --git a/costa/native/external/lapack/dstevr.f b/costa/native/external/lapack/dstevr.f new file mode 100644 index 000000000..30cd7d391 --- /dev/null +++ b/costa/native/external/lapack/dstevr.f @@ -0,0 +1,434 @@ + SUBROUTINE DSTEVR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, ABSTOL, + $ M, W, Z, LDZ, ISUPPZ, WORK, LWORK, IWORK, + $ LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 20, 2000 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE + INTEGER IL, INFO, IU, LDZ, LIWORK, LWORK, M, N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER ISUPPZ( * ), IWORK( * ) + DOUBLE PRECISION D( * ), E( * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* DSTEVR computes selected eigenvalues and, optionally, eigenvectors +* of a real symmetric tridiagonal matrix T. Eigenvalues and +* eigenvectors can be selected by specifying either a range of values +* or a range of indices for the desired eigenvalues. +* +* Whenever possible, DSTEVR calls SSTEGR to compute the +* eigenspectrum using Relatively Robust Representations. DSTEGR +* computes eigenvalues by the dqds algorithm, while orthogonal +* eigenvectors are computed from various "good" L D L^T representations +* (also known as Relatively Robust Representations). Gram-Schmidt +* orthogonalization is avoided as far as possible. More specifically, +* the various steps of the algorithm are as follows. For the i-th +* unreduced block of T, +* (a) Compute T - sigma_i = L_i D_i L_i^T, such that L_i D_i L_i^T +* is a relatively robust representation, +* (b) Compute the eigenvalues, lambda_j, of L_i D_i L_i^T to high +* relative accuracy by the dqds algorithm, +* (c) If there is a cluster of close eigenvalues, "choose" sigma_i +* close to the cluster, and go to step (a), +* (d) Given the approximate eigenvalue lambda_j of L_i D_i L_i^T, +* compute the corresponding eigenvector by forming a +* rank-revealing twisted factorization. +* The desired accuracy of the output can be specified by the input +* parameter ABSTOL. +* +* For more details, see "A new O(n^2) algorithm for the symmetric +* tridiagonal eigenvalue/eigenvector problem", by Inderjit Dhillon, +* Computer Science Division Technical Report No. UCB//CSD-97-971, +* UC Berkeley, May 1997. +* +* +* Note 1 : DSTEVR calls SSTEGR when the full spectrum is requested +* on machines which conform to the ieee-754 floating point standard. +* DSTEVR calls SSTEBZ and SSTEIN on non-ieee machines and +* when partial spectrum requests are made. +* +* Normal execution of DSTEGR may create NaNs and infinities and +* hence may abort due to a floating point exception in environments +* which do not handle NaNs and infinities in the ieee standard default +* manner. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* RANGE (input) CHARACTER*1 +* = 'A': all eigenvalues will be found. +* = 'V': all eigenvalues in the half-open interval (VL,VU] +* will be found. +* = 'I': the IL-th through IU-th eigenvalues will be found. +********** For RANGE = 'V' or 'I' and IU - IL < N - 1, DSTEBZ and +********** DSTEIN are called +* +* N (input) INTEGER +* The order of the matrix. N >= 0. +* +* D (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the n diagonal elements of the tridiagonal matrix +* A. +* On exit, D may be multiplied by a constant factor chosen +* to avoid over/underflow in computing the eigenvalues. +* +* E (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the (n-1) subdiagonal elements of the tridiagonal +* matrix A in elements 1 to N-1 of E; E(N) need not be set. +* On exit, E may be multiplied by a constant factor chosen +* to avoid over/underflow in computing the eigenvalues. +* +* VL (input) DOUBLE PRECISION +* VU (input) DOUBLE PRECISION +* If RANGE='V', the lower and upper bounds of the interval to +* be searched for eigenvalues. VL < VU. +* Not referenced if RANGE = 'A' or 'I'. +* +* IL (input) INTEGER +* IU (input) INTEGER +* If RANGE='I', the indices (in ascending order) of the +* smallest and largest eigenvalues to be returned. +* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +* Not referenced if RANGE = 'A' or 'V'. +* +* ABSTOL (input) DOUBLE PRECISION +* The absolute error tolerance for the eigenvalues. +* An approximate eigenvalue is accepted as converged +* when it is determined to lie in an interval [a,b] +* of width less than or equal to +* +* ABSTOL + EPS * max( |a|,|b| ) , +* +* where EPS is the machine precision. If ABSTOL is less than +* or equal to zero, then EPS*|T| will be used in its place, +* where |T| is the 1-norm of the tridiagonal matrix obtained +* by reducing A to tridiagonal form. +* +* See "Computing Small Singular Values of Bidiagonal Matrices +* with Guaranteed High Relative Accuracy," by Demmel and +* Kahan, LAPACK Working Note #3. +* +* If high relative accuracy is important, set ABSTOL to +* DLAMCH( 'Safe minimum' ). Doing so will guarantee that +* eigenvalues are computed to high relative accuracy when +* possible in future releases. The current code does not +* make any guarantees about high relative accuracy, but +* future releases will. See J. Barlow and J. Demmel, +* "Computing Accurate Eigensystems of Scaled Diagonally +* Dominant Matrices", LAPACK Working Note #7, for a discussion +* of which matrices define their eigenvalues to high relative +* accuracy. +* +* M (output) INTEGER +* The total number of eigenvalues found. 0 <= M <= N. +* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +* +* W (output) DOUBLE PRECISION array, dimension (N) +* The first M elements contain the selected eigenvalues in +* ascending order. +* +* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) ) +* If JOBZ = 'V', then if INFO = 0, the first M columns of Z +* contain the orthonormal eigenvectors of the matrix A +* corresponding to the selected eigenvalues, with the i-th +* column of Z holding the eigenvector associated with W(i). +* Note: the user must ensure that at least max(1,M) columns are +* supplied in the array Z; if RANGE = 'V', the exact value of M +* is not known in advance and an upper bound must be used. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* ISUPPZ (output) INTEGER array, dimension ( 2*max(1,M) ) +* The support of the eigenvectors in Z, i.e., the indices +* indicating the nonzero elements in Z. The i-th eigenvector +* is nonzero only in elements ISUPPZ( 2*i-1 ) through +* ISUPPZ( 2*i ). +********** Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1 +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal (and +* minimal) LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= 20*N. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* On exit, if INFO = 0, IWORK(1) returns the optimal (and +* minimal) LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array IWORK. LIWORK >= 10*N. +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: Internal error +* +* Further Details +* =============== +* +* Based on contributions by +* Inderjit Dhillon, IBM Almaden, USA +* Osni Marques, LBNL/NERSC, USA +* Ken Stanley, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LQUERY, VALEIG, WANTZ + CHARACTER ORDER + INTEGER I, IEEEOK, IMAX, INDIBL, INDIFL, INDISP, + $ INDIWO, ISCALE, ITMP1, J, JJ, LIWMIN, LWMIN, + $ NSPLIT + DOUBLE PRECISION BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, SMLNUM, + $ TMP1, TNRM, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANST + EXTERNAL LSAME, ILAENV, DLAMCH, DLANST +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DSCAL, DSTEBZ, DSTEGR, DSTEIN, DSTERF, + $ DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* +* Test the input parameters. +* + IEEEOK = ILAENV( 10, 'DSTEVR', 'N', 1, 2, 3, 4 ) +* + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + LQUERY = ( ( LWORK.EQ.-1 ) .OR. ( LIWORK.EQ.-1 ) ) + LWMIN = 20*N + LIWMIN = 10*N +* +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -7 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -9 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -14 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -17 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -19 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSTEVR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = D( 1 ) + ELSE + IF( VL.LT.D( 1 ) .AND. VU.GE.D( 1 ) ) THEN + M = 1 + W( 1 ) = D( 1 ) + END IF + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + VLL = VL + VUU = VU +* + TNRM = DLANST( 'M', N, D, E ) + IF( TNRM.GT.ZERO .AND. TNRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / TNRM + ELSE IF( TNRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / TNRM + END IF + IF( ISCALE.EQ.1 ) THEN + CALL DSCAL( N, SIGMA, D, 1 ) + CALL DSCAL( N-1, SIGMA, E( 1 ), 1 ) + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* If all eigenvalues are desired, then +* call DSTERF or SSTEGR. If this fails for some eigenvalue, then +* try DSTEBZ. +* +* + IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND. + $ IEEEOK.EQ.1 ) THEN + CALL DCOPY( N-1, E( 1 ), 1, WORK( 1 ), 1 ) + IF( .NOT.WANTZ ) THEN + CALL DCOPY( N, D, 1, W, 1 ) + CALL DSTERF( N, W, WORK, INFO ) + ELSE + CALL DCOPY( N, D, 1, WORK( N+1 ), 1 ) + CALL DSTEGR( JOBZ, 'A', N, WORK( N+1 ), WORK, VL, VU, IL, + $ IU, ABSTOL, M, W, Z, LDZ, ISUPPZ, + $ WORK( 2*N+1 ), LWORK-2*N, IWORK, LIWORK, INFO ) +* + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 10 + END IF + INFO = 0 + END IF +* +* Otherwise, call DSTEBZ and, if eigenvectors are desired, SSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIFL = INDISP + N + INDIWO = INDIFL + N + CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTOL, D, E, M, + $ NSPLIT, W, IWORK( INDIBL ), IWORK( INDISP ), WORK, + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL DSTEIN( N, D, E, M, W, IWORK( INDIBL ), IWORK( INDISP ), + $ Z, LDZ, WORK, IWORK( INDIWO ), IWORK( INDIFL ), + $ INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 10 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 30 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 20 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 20 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( I ) + W( I ) = W( J ) + IWORK( I ) = IWORK( J ) + W( J ) = TMP1 + IWORK( J ) = ITMP1 + CALL DSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + END IF + 30 CONTINUE + END IF +* +* Causes problems with tests 19 & 20: +* IF (wantz .and. INDEIG ) Z( 1,1) = Z(1,1) / 1.002 + .002 +* +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of DSTEVR +* + END diff --git a/costa/native/external/lapack/dstevx.f b/costa/native/external/lapack/dstevx.f new file mode 100644 index 000000000..6f9366e55 --- /dev/null +++ b/costa/native/external/lapack/dstevx.f @@ -0,0 +1,346 @@ + SUBROUTINE DSTEVX( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, ABSTOL, + $ M, W, Z, LDZ, WORK, IWORK, IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE + INTEGER IL, INFO, IU, LDZ, M, N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + DOUBLE PRECISION D( * ), E( * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* DSTEVX computes selected eigenvalues and, optionally, eigenvectors +* of a real symmetric tridiagonal matrix A. Eigenvalues and +* eigenvectors can be selected by specifying either a range of values +* or a range of indices for the desired eigenvalues. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* RANGE (input) CHARACTER*1 +* = 'A': all eigenvalues will be found. +* = 'V': all eigenvalues in the half-open interval (VL,VU] +* will be found. +* = 'I': the IL-th through IU-th eigenvalues will be found. +* +* N (input) INTEGER +* The order of the matrix. N >= 0. +* +* D (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the n diagonal elements of the tridiagonal matrix +* A. +* On exit, D may be multiplied by a constant factor chosen +* to avoid over/underflow in computing the eigenvalues. +* +* E (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the (n-1) subdiagonal elements of the tridiagonal +* matrix A in elements 1 to N-1 of E; E(N) need not be set. +* On exit, E may be multiplied by a constant factor chosen +* to avoid over/underflow in computing the eigenvalues. +* +* VL (input) DOUBLE PRECISION +* VU (input) DOUBLE PRECISION +* If RANGE='V', the lower and upper bounds of the interval to +* be searched for eigenvalues. VL < VU. +* Not referenced if RANGE = 'A' or 'I'. +* +* IL (input) INTEGER +* IU (input) INTEGER +* If RANGE='I', the indices (in ascending order) of the +* smallest and largest eigenvalues to be returned. +* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +* Not referenced if RANGE = 'A' or 'V'. +* +* ABSTOL (input) DOUBLE PRECISION +* The absolute error tolerance for the eigenvalues. +* An approximate eigenvalue is accepted as converged +* when it is determined to lie in an interval [a,b] +* of width less than or equal to +* +* ABSTOL + EPS * max( |a|,|b| ) , +* +* where EPS is the machine precision. If ABSTOL is less +* than or equal to zero, then EPS*|T| will be used in +* its place, where |T| is the 1-norm of the tridiagonal +* matrix. +* +* Eigenvalues will be computed most accurately when ABSTOL is +* set to twice the underflow threshold 2*DLAMCH('S'), not zero. +* If this routine returns with INFO>0, indicating that some +* eigenvectors did not converge, try setting ABSTOL to +* 2*DLAMCH('S'). +* +* See "Computing Small Singular Values of Bidiagonal Matrices +* with Guaranteed High Relative Accuracy," by Demmel and +* Kahan, LAPACK Working Note #3. +* +* M (output) INTEGER +* The total number of eigenvalues found. 0 <= M <= N. +* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +* +* W (output) DOUBLE PRECISION array, dimension (N) +* The first M elements contain the selected eigenvalues in +* ascending order. +* +* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) ) +* If JOBZ = 'V', then if INFO = 0, the first M columns of Z +* contain the orthonormal eigenvectors of the matrix A +* corresponding to the selected eigenvalues, with the i-th +* column of Z holding the eigenvector associated with W(i). +* If an eigenvector fails to converge (INFO > 0), then that +* column of Z contains the latest approximation to the +* eigenvector, and the index of the eigenvector is returned +* in IFAIL. If JOBZ = 'N', then Z is not referenced. +* Note: the user must ensure that at least max(1,M) columns are +* supplied in the array Z; if RANGE = 'V', the exact value of M +* is not known in advance and an upper bound must be used. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (5*N) +* +* IWORK (workspace) INTEGER array, dimension (5*N) +* +* IFAIL (output) INTEGER array, dimension (N) +* If JOBZ = 'V', then if INFO = 0, the first M elements of +* IFAIL are zero. If INFO > 0, then IFAIL contains the +* indices of the eigenvectors that failed to converge. +* If JOBZ = 'N', then IFAIL is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, then i eigenvectors failed to converge. +* Their indices are stored in array IFAIL. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, VALEIG, WANTZ + CHARACTER ORDER + INTEGER I, IMAX, INDIBL, INDISP, INDIWO, INDWRK, + $ ISCALE, ITMP1, J, JJ, NSPLIT + DOUBLE PRECISION BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, SMLNUM, + $ TMP1, TNRM, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANST + EXTERNAL LSAME, DLAMCH, DLANST +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DSCAL, DSTEBZ, DSTEIN, DSTEQR, DSTERF, + $ DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -7 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -9 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) + $ INFO = -14 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSTEVX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = D( 1 ) + ELSE + IF( VL.LT.D( 1 ) .AND. VU.GE.D( 1 ) ) THEN + M = 1 + W( 1 ) = D( 1 ) + END IF + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + IF( VALEIG ) THEN + VLL = VL + VUU = VU + ELSE + VLL = ZERO + VUU = ZERO + END IF + TNRM = DLANST( 'M', N, D, E ) + IF( TNRM.GT.ZERO .AND. TNRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / TNRM + ELSE IF( TNRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / TNRM + END IF + IF( ISCALE.EQ.1 ) THEN + CALL DSCAL( N, SIGMA, D, 1 ) + CALL DSCAL( N-1, SIGMA, E( 1 ), 1 ) + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* If all eigenvalues are desired and ABSTOL is less than zero, then +* call DSTERF or SSTEQR. If this fails for some eigenvalue, then +* try DSTEBZ. +* + IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND. + $ ( ABSTOL.LE.ZERO ) ) THEN + CALL DCOPY( N, D, 1, W, 1 ) + CALL DCOPY( N-1, E( 1 ), 1, WORK( 1 ), 1 ) + INDWRK = N + 1 + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, WORK, INFO ) + ELSE + CALL DSTEQR( 'I', N, W, WORK, Z, LDZ, WORK( INDWRK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 10 I = 1, N + IFAIL( I ) = 0 + 10 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 20 + END IF + INFO = 0 + END IF +* +* Otherwise, call DSTEBZ and, if eigenvectors are desired, SSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDWRK = 1 + INDIBL = 1 + INDISP = INDIBL + N + INDIWO = INDISP + N + CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTOL, D, E, M, + $ NSPLIT, W, IWORK( INDIBL ), IWORK( INDISP ), + $ WORK( INDWRK ), IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL DSTEIN( N, D, E, M, W, IWORK( INDIBL ), IWORK( INDISP ), + $ Z, LDZ, WORK( INDWRK ), IWORK( INDIWO ), IFAIL, + $ INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 20 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 40 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 30 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 30 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL DSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 40 CONTINUE + END IF +* + RETURN +* +* End of DSTEVX +* + END diff --git a/costa/native/external/lapack/dsycon.f b/costa/native/external/lapack/dsycon.f new file mode 100644 index 000000000..47471312f --- /dev/null +++ b/costa/native/external/lapack/dsycon.f @@ -0,0 +1,161 @@ + SUBROUTINE DSYCON( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK, + $ IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DSYCON estimates the reciprocal of the condition number (in the +* 1-norm) of a real symmetric matrix A using the factorization +* A = U*D*U**T or A = L*D*L**T computed by DSYTRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the details of the factorization are stored +* as an upper or lower triangular matrix. +* = 'U': Upper triangular, form is A = U*D*U**T; +* = 'L': Lower triangular, form is A = L*D*L**T. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input) DOUBLE PRECISION array, dimension (LDA,N) +* The block diagonal matrix D and the multipliers used to +* obtain the factor U or L as computed by DSYTRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by DSYTRF. +* +* ANORM (input) DOUBLE PRECISION +* The 1-norm of the original matrix A. +* +* RCOND (output) DOUBLE PRECISION +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +* estimate of the 1-norm of inv(A) computed in this routine. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, KASE + DOUBLE PRECISION AINVNM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLACON, DSYTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.LE.ZERO ) THEN + RETURN + END IF +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO 10 I = N, 1, -1 + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO 20 I = 1, N + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO ) + $ RETURN + 20 CONTINUE + END IF +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + 30 CONTINUE + CALL DLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN +* +* Multiply by inv(L*D*L') or inv(U*D*U'). +* + CALL DSYTRS( UPLO, N, 1, A, LDA, IPIV, WORK, N, INFO ) + GO TO 30 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of DSYCON +* + END diff --git a/costa/native/external/lapack/dsyev.f b/costa/native/external/lapack/dsyev.f new file mode 100644 index 000000000..acdb440fc --- /dev/null +++ b/costa/native/external/lapack/dsyev.f @@ -0,0 +1,213 @@ + SUBROUTINE DSYEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), W( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DSYEV computes all eigenvalues and, optionally, eigenvectors of a +* real symmetric matrix A. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) +* On entry, the symmetric matrix A. If UPLO = 'U', the +* leading N-by-N upper triangular part of A contains the +* upper triangular part of the matrix A. If UPLO = 'L', +* the leading N-by-N lower triangular part of A contains +* the lower triangular part of the matrix A. +* On exit, if JOBZ = 'V', then if INFO = 0, A contains the +* orthonormal eigenvectors of the matrix A. +* If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') +* or the upper triangle (if UPLO='U') of A, including the +* diagonal, is destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* W (output) DOUBLE PRECISION array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of the array WORK. LWORK >= max(1,3*N-1). +* For optimal efficiency, LWORK >= (NB+2)*N, +* where NB is the blocksize for DSYTRD returned by ILAENV. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the algorithm failed to converge; i +* off-diagonal elements of an intermediate tridiagonal +* form did not converge to zero. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, IMAX, INDE, INDTAU, INDWRK, ISCALE, + $ LLWORK, LOPT, LWKOPT, NB + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANSY + EXTERNAL LSAME, ILAENV, DLAMCH, DLANSY +* .. +* .. External Subroutines .. + EXTERNAL DLASCL, DORGTR, DSCAL, DSTEQR, DSTERF, DSYTRD, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, 3*N-1 ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'DSYTRD', UPLO, N, -1, -1, -1 ) + LWKOPT = MAX( 1, ( NB+2 )*N ) + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYEV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( N.EQ.1 ) THEN + W( 1 ) = A( 1, 1 ) + WORK( 1 ) = 3 + IF( WANTZ ) + $ A( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = DLANSY( 'M', UPLO, N, A, LDA, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) + $ CALL DLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO ) +* +* Call DSYTRD to reduce symmetric matrix to tridiagonal form. +* + INDE = 1 + INDTAU = INDE + N + INDWRK = INDTAU + N + LLWORK = LWORK - INDWRK + 1 + CALL DSYTRD( UPLO, N, A, LDA, W, WORK( INDE ), WORK( INDTAU ), + $ WORK( INDWRK ), LLWORK, IINFO ) + LOPT = 2*N + WORK( INDWRK ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, first call +* DORGTR to generate the orthogonal matrix, then call DSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, WORK( INDE ), INFO ) + ELSE + CALL DORGTR( UPLO, N, A, LDA, WORK( INDTAU ), WORK( INDWRK ), + $ LLWORK, IINFO ) + CALL DSTEQR( JOBZ, N, W, WORK( INDE ), A, LDA, WORK( INDTAU ), + $ INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of DSYEV +* + END diff --git a/costa/native/external/lapack/dsyevd.f b/costa/native/external/lapack/dsyevd.f new file mode 100644 index 000000000..62b0c5fc4 --- /dev/null +++ b/costa/native/external/lapack/dsyevd.f @@ -0,0 +1,265 @@ + SUBROUTINE DSYEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, IWORK, + $ LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDA, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), W( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DSYEVD computes all eigenvalues and, optionally, eigenvectors of a +* real symmetric matrix A. If eigenvectors are desired, it uses a +* divide and conquer algorithm. +* +* The divide and conquer algorithm makes very mild assumptions about +* floating point arithmetic. It will work on machines with a guard +* digit in add/subtract, or on those binary machines without guard +* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +* Cray-2. It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Because of large use of BLAS of level 3, DSYEVD needs N**2 more +* workspace than DSYEVX. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) +* On entry, the symmetric matrix A. If UPLO = 'U', the +* leading N-by-N upper triangular part of A contains the +* upper triangular part of the matrix A. If UPLO = 'L', +* the leading N-by-N lower triangular part of A contains +* the lower triangular part of the matrix A. +* On exit, if JOBZ = 'V', then if INFO = 0, A contains the +* orthonormal eigenvectors of the matrix A. +* If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') +* or the upper triangle (if UPLO='U') of A, including the +* diagonal, is destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* W (output) DOUBLE PRECISION array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* WORK (workspace/output) DOUBLE PRECISION array, +* dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If N <= 1, LWORK must be at least 1. +* If JOBZ = 'N' and N > 1, LWORK must be at least 2*N+1. +* If JOBZ = 'V' and N > 1, LWORK must be at least +* 1 + 6*N + 2*N**2. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array IWORK. +* If N <= 1, LIWORK must be at least 1. +* If JOBZ = 'N' and N > 1, LIWORK must be at least 1. +* If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the algorithm failed to converge; i +* off-diagonal elements of an intermediate tridiagonal +* form did not converge to zero. +* +* Further Details +* =============== +* +* Based on contributions by +* Jeff Rutter, Computer Science Division, University of California +* at Berkeley, USA +* Modified by Francoise Tisseur, University of Tennessee. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. +* + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, INDE, INDTAU, INDWK2, INDWRK, ISCALE, + $ LIOPT, LIWMIN, LLWORK, LLWRK2, LOPT, LWMIN + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANSY + EXTERNAL LSAME, DLAMCH, DLANSY +* .. +* .. External Subroutines .. + EXTERNAL DLACPY, DLASCL, DORMTR, DSCAL, DSTEDC, DSTERF, + $ DSYTRD, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LIWMIN = 1 + LWMIN = 1 + LOPT = LWMIN + LIOPT = LIWMIN + ELSE + IF( WANTZ ) THEN + LIWMIN = 3 + 5*N + LWMIN = 1 + 6*N + 2*N**2 + ELSE + LIWMIN = 1 + LWMIN = 2*N + 1 + END IF + LOPT = LWMIN + LIOPT = LIWMIN + END IF + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -8 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LOPT + IWORK( 1 ) = LIOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYEVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = A( 1, 1 ) + IF( WANTZ ) + $ A( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = DLANSY( 'M', UPLO, N, A, LDA, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) + $ CALL DLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO ) +* +* Call DSYTRD to reduce symmetric matrix to tridiagonal form. +* + INDE = 1 + INDTAU = INDE + N + INDWRK = INDTAU + N + LLWORK = LWORK - INDWRK + 1 + INDWK2 = INDWRK + N*N + LLWRK2 = LWORK - INDWK2 + 1 +* + CALL DSYTRD( UPLO, N, A, LDA, W, WORK( INDE ), WORK( INDTAU ), + $ WORK( INDWRK ), LLWORK, IINFO ) + LOPT = 2*N + WORK( INDWRK ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, first call +* DSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the +* tridiagonal matrix, then call DORMTR to multiply it by the +* Householder transformations stored in A. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, WORK( INDE ), INFO ) + ELSE + CALL DSTEDC( 'I', N, W, WORK( INDE ), WORK( INDWRK ), N, + $ WORK( INDWK2 ), LLWRK2, IWORK, LIWORK, INFO ) + CALL DORMTR( 'L', UPLO, 'N', N, N, A, LDA, WORK( INDTAU ), + $ WORK( INDWRK ), N, WORK( INDWK2 ), LLWRK2, IINFO ) + CALL DLACPY( 'A', N, N, WORK( INDWRK ), N, A, LDA ) + LOPT = MAX( LOPT, 1+6*N+2*N**2 ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) + $ CALL DSCAL( N, ONE / SIGMA, W, 1 ) +* + WORK( 1 ) = LOPT + IWORK( 1 ) = LIOPT +* + RETURN +* +* End of DSYEVD +* + END diff --git a/costa/native/external/lapack/dsyevr.f b/costa/native/external/lapack/dsyevr.f new file mode 100644 index 000000000..b308aecd1 --- /dev/null +++ b/costa/native/external/lapack/dsyevr.f @@ -0,0 +1,497 @@ + SUBROUTINE DSYEVR( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, + $ ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, LWORK, + $ IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 20, 2000 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDA, LDZ, LIWORK, LWORK, M, N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER ISUPPZ( * ), IWORK( * ) + DOUBLE PRECISION A( LDA, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* DSYEVR computes selected eigenvalues and, optionally, eigenvectors +* of a real symmetric matrix T. Eigenvalues and eigenvectors can be +* selected by specifying either a range of values or a range of +* indices for the desired eigenvalues. +* +* Whenever possible, DSYEVR calls DSTEGR to compute the +* eigenspectrum using Relatively Robust Representations. DSTEGR +* computes eigenvalues by the dqds algorithm, while orthogonal +* eigenvectors are computed from various "good" L D L^T representations +* (also known as Relatively Robust Representations). Gram-Schmidt +* orthogonalization is avoided as far as possible. More specifically, +* the various steps of the algorithm are as follows. For the i-th +* unreduced block of T, +* (a) Compute T - sigma_i = L_i D_i L_i^T, such that L_i D_i L_i^T +* is a relatively robust representation, +* (b) Compute the eigenvalues, lambda_j, of L_i D_i L_i^T to high +* relative accuracy by the dqds algorithm, +* (c) If there is a cluster of close eigenvalues, "choose" sigma_i +* close to the cluster, and go to step (a), +* (d) Given the approximate eigenvalue lambda_j of L_i D_i L_i^T, +* compute the corresponding eigenvector by forming a +* rank-revealing twisted factorization. +* The desired accuracy of the output can be specified by the input +* parameter ABSTOL. +* +* For more details, see "A new O(n^2) algorithm for the symmetric +* tridiagonal eigenvalue/eigenvector problem", by Inderjit Dhillon, +* Computer Science Division Technical Report No. UCB//CSD-97-971, +* UC Berkeley, May 1997. +* +* +* Note 1 : DSYEVR calls DSTEGR when the full spectrum is requested +* on machines which conform to the ieee-754 floating point standard. +* DSYEVR calls DSTEBZ and SSTEIN on non-ieee machines and +* when partial spectrum requests are made. +* +* Normal execution of DSTEGR may create NaNs and infinities and +* hence may abort due to a floating point exception in environments +* which do not handle NaNs and infinities in the ieee standard default +* manner. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* RANGE (input) CHARACTER*1 +* = 'A': all eigenvalues will be found. +* = 'V': all eigenvalues in the half-open interval (VL,VU] +* will be found. +* = 'I': the IL-th through IU-th eigenvalues will be found. +********** For RANGE = 'V' or 'I' and IU - IL < N - 1, DSTEBZ and +********** DSTEIN are called +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) +* On entry, the symmetric matrix A. If UPLO = 'U', the +* leading N-by-N upper triangular part of A contains the +* upper triangular part of the matrix A. If UPLO = 'L', +* the leading N-by-N lower triangular part of A contains +* the lower triangular part of the matrix A. +* On exit, the lower triangle (if UPLO='L') or the upper +* triangle (if UPLO='U') of A, including the diagonal, is +* destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* VL (input) DOUBLE PRECISION +* VU (input) DOUBLE PRECISION +* If RANGE='V', the lower and upper bounds of the interval to +* be searched for eigenvalues. VL < VU. +* Not referenced if RANGE = 'A' or 'I'. +* +* IL (input) INTEGER +* IU (input) INTEGER +* If RANGE='I', the indices (in ascending order) of the +* smallest and largest eigenvalues to be returned. +* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +* Not referenced if RANGE = 'A' or 'V'. +* +* ABSTOL (input) DOUBLE PRECISION +* The absolute error tolerance for the eigenvalues. +* An approximate eigenvalue is accepted as converged +* when it is determined to lie in an interval [a,b] +* of width less than or equal to +* +* ABSTOL + EPS * max( |a|,|b| ) , +* +* where EPS is the machine precision. If ABSTOL is less than +* or equal to zero, then EPS*|T| will be used in its place, +* where |T| is the 1-norm of the tridiagonal matrix obtained +* by reducing A to tridiagonal form. +* +* See "Computing Small Singular Values of Bidiagonal Matrices +* with Guaranteed High Relative Accuracy," by Demmel and +* Kahan, LAPACK Working Note #3. +* +* If high relative accuracy is important, set ABSTOL to +* DLAMCH( 'Safe minimum' ). Doing so will guarantee that +* eigenvalues are computed to high relative accuracy when +* possible in future releases. The current code does not +* make any guarantees about high relative accuracy, but +* furutre releases will. See J. Barlow and J. Demmel, +* "Computing Accurate Eigensystems of Scaled Diagonally +* Dominant Matrices", LAPACK Working Note #7, for a discussion +* of which matrices define their eigenvalues to high relative +* accuracy. +* +* M (output) INTEGER +* The total number of eigenvalues found. 0 <= M <= N. +* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +* +* W (output) DOUBLE PRECISION array, dimension (N) +* The first M elements contain the selected eigenvalues in +* ascending order. +* +* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M)) +* If JOBZ = 'V', then if INFO = 0, the first M columns of Z +* contain the orthonormal eigenvectors of the matrix A +* corresponding to the selected eigenvalues, with the i-th +* column of Z holding the eigenvector associated with W(i). +* If JOBZ = 'N', then Z is not referenced. +* Note: the user must ensure that at least max(1,M) columns are +* supplied in the array Z; if RANGE = 'V', the exact value of M +* is not known in advance and an upper bound must be used. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* ISUPPZ (output) INTEGER array, dimension ( 2*max(1,M) ) +* The support of the eigenvectors in Z, i.e., the indices +* indicating the nonzero elements in Z. The i-th eigenvector +* is nonzero only in elements ISUPPZ( 2*i-1 ) through +* ISUPPZ( 2*i ). +********** Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1 +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,26*N). +* For optimal efficiency, LWORK >= (NB+6)*N, +* where NB is the max of the blocksize for DSYTRD and DORMTR +* returned by ILAENV. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* On exit, if INFO = 0, IWORK(1) returns the optimal LWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array IWORK. LIWORK >= max(1,10*N). +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: Internal error +* +* Further Details +* =============== +* +* Based on contributions by +* Inderjit Dhillon, IBM Almaden, USA +* Osni Marques, LBNL/NERSC, USA +* Ken Stanley, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, VALEIG, WANTZ + CHARACTER ORDER + INTEGER I, IEEEOK, IINFO, IMAX, INDD, INDDD, INDE, + $ INDEE, INDIBL, INDIFL, INDISP, INDIWO, INDTAU, + $ INDWK, INDWKN, ISCALE, ITMP1, J, JJ, LIWMIN, + $ LLWORK, LLWRKN, LWKOPT, LWMIN, NB, NSPLIT + DOUBLE PRECISION ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANSY + EXTERNAL LSAME, ILAENV, DLAMCH, DLANSY +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DORMTR, DSCAL, DSTEBZ, DSTEGR, DSTEIN, + $ DSTERF, DSWAP, DSYTRD, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + IEEEOK = ILAENV( 10, 'DSYEVR', 'N', 1, 2, 3, 4 ) +* + LOWER = LSAME( UPLO, 'L' ) + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + LQUERY = ( ( LWORK.EQ.-1 ) .OR. ( LIWORK.EQ.-1 ) ) +* + LWMIN = MAX( 1, 26*N ) + LIWMIN = MAX( 1, 10*N ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -8 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -10 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -15 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -18 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -20 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'ZHETRD', UPLO, N, -1, -1, -1 ) + NB = MAX( NB, ILAENV( 1, 'ZUNMTR', UPLO, N, -1, -1, -1 ) ) + LWKOPT = MAX( ( NB+1 )*N, LWMIN ) + WORK( 1 ) = LWKOPT + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYEVR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( N.EQ.1 ) THEN + WORK( 1 ) = 7 + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = A( 1, 1 ) + ELSE + IF( VL.LT.A( 1, 1 ) .AND. VU.GE.A( 1, 1 ) ) THEN + M = 1 + W( 1 ) = A( 1, 1 ) + END IF + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + VLL = VL + VUU = VU + ANRM = DLANSY( 'M', UPLO, N, A, LDA, WORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + DO 10 J = 1, N + CALL DSCAL( N-J+1, SIGMA, A( J, J ), 1 ) + 10 CONTINUE + ELSE + DO 20 J = 1, N + CALL DSCAL( J, SIGMA, A( 1, J ), 1 ) + 20 CONTINUE + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call DSYTRD to reduce symmetric matrix to tridiagonal form. +* + INDTAU = 1 + INDE = INDTAU + N + INDD = INDE + N + INDEE = INDD + N + INDDD = INDEE + N + INDIFL = INDDD + N + INDWK = INDIFL + N + LLWORK = LWORK - INDWK + 1 + CALL DSYTRD( UPLO, N, A, LDA, WORK( INDD ), WORK( INDE ), + $ WORK( INDTAU ), WORK( INDWK ), LLWORK, IINFO ) +* +* If all eigenvalues are desired +* then call DSTERF or SSTEGR and DORMTR. +* + IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND. + $ IEEEOK.EQ.1 ) THEN + IF( .NOT.WANTZ ) THEN + CALL DCOPY( N, WORK( INDD ), 1, W, 1 ) + CALL DCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL DSTERF( N, W, WORK( INDEE ), INFO ) + ELSE + CALL DCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL DCOPY( N, WORK( INDD ), 1, WORK( INDDD ), 1 ) +* + CALL DSTEGR( JOBZ, 'A', N, WORK( INDDD ), WORK( INDEE ), + $ VL, VU, IL, IU, ABSTOL, M, W, Z, LDZ, ISUPPZ, + $ WORK( INDWK ), LWORK, IWORK, LIWORK, INFO ) +* +* +* +* Apply orthogonal matrix used in reduction to tridiagonal +* form to eigenvectors returned by DSTEIN. +* + IF( WANTZ .AND. INFO.EQ.0 ) THEN + INDWKN = INDE + LLWRKN = LWORK - INDWKN + 1 + CALL DORMTR( 'L', UPLO, 'N', N, M, A, LDA, + $ WORK( INDTAU ), Z, LDZ, WORK( INDWKN ), + $ LLWRKN, IINFO ) + END IF + END IF +* +* + IF( INFO.EQ.0 ) THEN + M = N + GO TO 30 + END IF + INFO = 0 + END IF +* +* Otherwise, call DSTEBZ and, if eigenvectors are desired, SSTEIN. +* Also call DSTEBZ and SSTEIN if SSTEGR fails. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIFL = 1 + INDIBL = INDIFL + N + INDISP = INDIBL + N + INDIWO = INDISP + N + CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ WORK( INDD ), WORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWK ), + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL DSTEIN( N, WORK( INDD ), WORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ WORK( INDWK ), IWORK( INDIWO ), IWORK( INDIFL ), + $ INFO ) +* +* Apply orthogonal matrix used in reduction to tridiagonal +* form to eigenvectors returned by DSTEIN. +* + INDWKN = INDE + LLWRKN = LWORK - INDWKN + 1 + CALL DORMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z, + $ LDZ, WORK( INDWKN ), LLWRKN, IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 30 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 50 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 40 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 40 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL DSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + END IF + 50 CONTINUE + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWKOPT + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of DSYEVR +* + END diff --git a/costa/native/external/lapack/dsyevx.f b/costa/native/external/lapack/dsyevx.f new file mode 100644 index 000000000..e8fea917a --- /dev/null +++ b/costa/native/external/lapack/dsyevx.f @@ -0,0 +1,421 @@ + SUBROUTINE DSYEVX( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, + $ ABSTOL, M, W, Z, LDZ, WORK, LWORK, IWORK, + $ IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDA, LDZ, LWORK, M, N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + DOUBLE PRECISION A( LDA, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* DSYEVX computes selected eigenvalues and, optionally, eigenvectors +* of a real symmetric matrix A. Eigenvalues and eigenvectors can be +* selected by specifying either a range of values or a range of indices +* for the desired eigenvalues. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* RANGE (input) CHARACTER*1 +* = 'A': all eigenvalues will be found. +* = 'V': all eigenvalues in the half-open interval (VL,VU] +* will be found. +* = 'I': the IL-th through IU-th eigenvalues will be found. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) +* On entry, the symmetric matrix A. If UPLO = 'U', the +* leading N-by-N upper triangular part of A contains the +* upper triangular part of the matrix A. If UPLO = 'L', +* the leading N-by-N lower triangular part of A contains +* the lower triangular part of the matrix A. +* On exit, the lower triangle (if UPLO='L') or the upper +* triangle (if UPLO='U') of A, including the diagonal, is +* destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* VL (input) DOUBLE PRECISION +* VU (input) DOUBLE PRECISION +* If RANGE='V', the lower and upper bounds of the interval to +* be searched for eigenvalues. VL < VU. +* Not referenced if RANGE = 'A' or 'I'. +* +* IL (input) INTEGER +* IU (input) INTEGER +* If RANGE='I', the indices (in ascending order) of the +* smallest and largest eigenvalues to be returned. +* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +* Not referenced if RANGE = 'A' or 'V'. +* +* ABSTOL (input) DOUBLE PRECISION +* The absolute error tolerance for the eigenvalues. +* An approximate eigenvalue is accepted as converged +* when it is determined to lie in an interval [a,b] +* of width less than or equal to +* +* ABSTOL + EPS * max( |a|,|b| ) , +* +* where EPS is the machine precision. If ABSTOL is less than +* or equal to zero, then EPS*|T| will be used in its place, +* where |T| is the 1-norm of the tridiagonal matrix obtained +* by reducing A to tridiagonal form. +* +* Eigenvalues will be computed most accurately when ABSTOL is +* set to twice the underflow threshold 2*DLAMCH('S'), not zero. +* If this routine returns with INFO>0, indicating that some +* eigenvectors did not converge, try setting ABSTOL to +* 2*DLAMCH('S'). +* +* See "Computing Small Singular Values of Bidiagonal Matrices +* with Guaranteed High Relative Accuracy," by Demmel and +* Kahan, LAPACK Working Note #3. +* +* M (output) INTEGER +* The total number of eigenvalues found. 0 <= M <= N. +* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +* +* W (output) DOUBLE PRECISION array, dimension (N) +* On normal exit, the first M elements contain the selected +* eigenvalues in ascending order. +* +* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M)) +* If JOBZ = 'V', then if INFO = 0, the first M columns of Z +* contain the orthonormal eigenvectors of the matrix A +* corresponding to the selected eigenvalues, with the i-th +* column of Z holding the eigenvector associated with W(i). +* If an eigenvector fails to converge, then that column of Z +* contains the latest approximation to the eigenvector, and the +* index of the eigenvector is returned in IFAIL. +* If JOBZ = 'N', then Z is not referenced. +* Note: the user must ensure that at least max(1,M) columns are +* supplied in the array Z; if RANGE = 'V', the exact value of M +* is not known in advance and an upper bound must be used. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of the array WORK. LWORK >= max(1,8*N). +* For optimal efficiency, LWORK >= (NB+3)*N, +* where NB is the max of the blocksize for DSYTRD and DORMTR +* returned by ILAENV. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace) INTEGER array, dimension (5*N) +* +* IFAIL (output) INTEGER array, dimension (N) +* If JOBZ = 'V', then if INFO = 0, the first M elements of +* IFAIL are zero. If INFO > 0, then IFAIL contains the +* indices of the eigenvectors that failed to converge. +* If JOBZ = 'N', then IFAIL is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, then i eigenvectors failed to converge. +* Their indices are stored in array IFAIL. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, VALEIG, WANTZ + CHARACTER ORDER + INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, + $ INDISP, INDIWO, INDTAU, INDWKN, INDWRK, ISCALE, + $ ITMP1, J, JJ, LLWORK, LLWRKN, LOPT, LWKOPT, NB, + $ NSPLIT + DOUBLE PRECISION ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANSY + EXTERNAL LSAME, ILAENV, DLAMCH, DLANSY +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLACPY, DORGTR, DORMTR, DSCAL, DSTEBZ, + $ DSTEIN, DSTEQR, DSTERF, DSWAP, DSYTRD, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + LOWER = LSAME( UPLO, 'L' ) + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -8 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -10 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -15 + ELSE IF( LWORK.LT.MAX( 1, 8*N ) .AND. .NOT.LQUERY ) THEN + INFO = -17 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'DSYTRD', UPLO, N, -1, -1, -1 ) + NB = MAX( NB, ILAENV( 1, 'DORMTR', UPLO, N, -1, -1, -1 ) ) + LWKOPT = ( NB+3 )*N + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYEVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( N.EQ.1 ) THEN + WORK( 1 ) = 7 + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = A( 1, 1 ) + ELSE + IF( VL.LT.A( 1, 1 ) .AND. VU.GE.A( 1, 1 ) ) THEN + M = 1 + W( 1 ) = A( 1, 1 ) + END IF + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + VLL = VL + VUU = VU + ANRM = DLANSY( 'M', UPLO, N, A, LDA, WORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + DO 10 J = 1, N + CALL DSCAL( N-J+1, SIGMA, A( J, J ), 1 ) + 10 CONTINUE + ELSE + DO 20 J = 1, N + CALL DSCAL( J, SIGMA, A( 1, J ), 1 ) + 20 CONTINUE + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call DSYTRD to reduce symmetric matrix to tridiagonal form. +* + INDTAU = 1 + INDE = INDTAU + N + INDD = INDE + N + INDWRK = INDD + N + LLWORK = LWORK - INDWRK + 1 + CALL DSYTRD( UPLO, N, A, LDA, WORK( INDD ), WORK( INDE ), + $ WORK( INDTAU ), WORK( INDWRK ), LLWORK, IINFO ) + LOPT = 3*N + WORK( INDWRK ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal to +* zero, then call DSTERF or DORGTR and SSTEQR. If this fails for +* some eigenvalue, then try DSTEBZ. +* + IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND. + $ ( ABSTOL.LE.ZERO ) ) THEN + CALL DCOPY( N, WORK( INDD ), 1, W, 1 ) + INDEE = INDWRK + 2*N + IF( .NOT.WANTZ ) THEN + CALL DCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL DSTERF( N, W, WORK( INDEE ), INFO ) + ELSE + CALL DLACPY( 'A', N, N, A, LDA, Z, LDZ ) + CALL DORGTR( UPLO, N, Z, LDZ, WORK( INDTAU ), + $ WORK( INDWRK ), LLWORK, IINFO ) + CALL DCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL DSTEQR( JOBZ, N, W, WORK( INDEE ), Z, LDZ, + $ WORK( INDWRK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 30 I = 1, N + IFAIL( I ) = 0 + 30 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 40 + END IF + INFO = 0 + END IF +* +* Otherwise, call DSTEBZ and, if eigenvectors are desired, SSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWO = INDISP + N + CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ WORK( INDD ), WORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWRK ), + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL DSTEIN( N, WORK( INDD ), WORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ WORK( INDWRK ), IWORK( INDIWO ), IFAIL, INFO ) +* +* Apply orthogonal matrix used in reduction to tridiagonal +* form to eigenvectors returned by DSTEIN. +* + INDWKN = INDE + LLWRKN = LWORK - INDWKN + 1 + CALL DORMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z, + $ LDZ, WORK( INDWKN ), LLWRKN, IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 40 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 60 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 50 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 50 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL DSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 60 CONTINUE + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of DSYEVX +* + END diff --git a/costa/native/external/lapack/dsygs2.f b/costa/native/external/lapack/dsygs2.f new file mode 100644 index 000000000..7120376c7 --- /dev/null +++ b/costa/native/external/lapack/dsygs2.f @@ -0,0 +1,212 @@ + SUBROUTINE DSYGS2( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, ITYPE, LDA, LDB, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* DSYGS2 reduces a real symmetric-definite generalized eigenproblem +* to standard form. +* +* If ITYPE = 1, the problem is A*x = lambda*B*x, +* and A is overwritten by inv(U')*A*inv(U) or inv(L)*A*inv(L') +* +* If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or +* B*A*x = lambda*x, and A is overwritten by U*A*U` or L'*A*L. +* +* B must have been previously factorized as U'*U or L*L' by DPOTRF. +* +* Arguments +* ========= +* +* ITYPE (input) INTEGER +* = 1: compute inv(U')*A*inv(U) or inv(L)*A*inv(L'); +* = 2 or 3: compute U*A*U' or L'*A*L. +* +* UPLO (input) CHARACTER +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is stored, and how B has been factorized. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the symmetric matrix A. If UPLO = 'U', the leading +* n by n upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n by n lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, if INFO = 0, the transformed matrix, stored in the +* same format as A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input) DOUBLE PRECISION array, dimension (LDB,N) +* The triangular factor from the Cholesky factorization of B, +* as returned by DPOTRF. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, HALF + PARAMETER ( ONE = 1.0D0, HALF = 0.5D0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER K + DOUBLE PRECISION AKK, BKK, CT +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DSCAL, DSYR2, DTRMV, DTRSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYGS2', -INFO ) + RETURN + END IF +* + IF( ITYPE.EQ.1 ) THEN + IF( UPPER ) THEN +* +* Compute inv(U')*A*inv(U) +* + DO 10 K = 1, N +* +* Update the upper triangle of A(k:n,k:n) +* + AKK = A( K, K ) + BKK = B( K, K ) + AKK = AKK / BKK**2 + A( K, K ) = AKK + IF( K.LT.N ) THEN + CALL DSCAL( N-K, ONE / BKK, A( K, K+1 ), LDA ) + CT = -HALF*AKK + CALL DAXPY( N-K, CT, B( K, K+1 ), LDB, A( K, K+1 ), + $ LDA ) + CALL DSYR2( UPLO, N-K, -ONE, A( K, K+1 ), LDA, + $ B( K, K+1 ), LDB, A( K+1, K+1 ), LDA ) + CALL DAXPY( N-K, CT, B( K, K+1 ), LDB, A( K, K+1 ), + $ LDA ) + CALL DTRSV( UPLO, 'Transpose', 'Non-unit', N-K, + $ B( K+1, K+1 ), LDB, A( K, K+1 ), LDA ) + END IF + 10 CONTINUE + ELSE +* +* Compute inv(L)*A*inv(L') +* + DO 20 K = 1, N +* +* Update the lower triangle of A(k:n,k:n) +* + AKK = A( K, K ) + BKK = B( K, K ) + AKK = AKK / BKK**2 + A( K, K ) = AKK + IF( K.LT.N ) THEN + CALL DSCAL( N-K, ONE / BKK, A( K+1, K ), 1 ) + CT = -HALF*AKK + CALL DAXPY( N-K, CT, B( K+1, K ), 1, A( K+1, K ), 1 ) + CALL DSYR2( UPLO, N-K, -ONE, A( K+1, K ), 1, + $ B( K+1, K ), 1, A( K+1, K+1 ), LDA ) + CALL DAXPY( N-K, CT, B( K+1, K ), 1, A( K+1, K ), 1 ) + CALL DTRSV( UPLO, 'No transpose', 'Non-unit', N-K, + $ B( K+1, K+1 ), LDB, A( K+1, K ), 1 ) + END IF + 20 CONTINUE + END IF + ELSE + IF( UPPER ) THEN +* +* Compute U*A*U' +* + DO 30 K = 1, N +* +* Update the upper triangle of A(1:k,1:k) +* + AKK = A( K, K ) + BKK = B( K, K ) + CALL DTRMV( UPLO, 'No transpose', 'Non-unit', K-1, B, + $ LDB, A( 1, K ), 1 ) + CT = HALF*AKK + CALL DAXPY( K-1, CT, B( 1, K ), 1, A( 1, K ), 1 ) + CALL DSYR2( UPLO, K-1, ONE, A( 1, K ), 1, B( 1, K ), 1, + $ A, LDA ) + CALL DAXPY( K-1, CT, B( 1, K ), 1, A( 1, K ), 1 ) + CALL DSCAL( K-1, BKK, A( 1, K ), 1 ) + A( K, K ) = AKK*BKK**2 + 30 CONTINUE + ELSE +* +* Compute L'*A*L +* + DO 40 K = 1, N +* +* Update the lower triangle of A(1:k,1:k) +* + AKK = A( K, K ) + BKK = B( K, K ) + CALL DTRMV( UPLO, 'Transpose', 'Non-unit', K-1, B, LDB, + $ A( K, 1 ), LDA ) + CT = HALF*AKK + CALL DAXPY( K-1, CT, B( K, 1 ), LDB, A( K, 1 ), LDA ) + CALL DSYR2( UPLO, K-1, ONE, A( K, 1 ), LDA, B( K, 1 ), + $ LDB, A, LDA ) + CALL DAXPY( K-1, CT, B( K, 1 ), LDB, A( K, 1 ), LDA ) + CALL DSCAL( K-1, BKK, A( K, 1 ), LDA ) + A( K, K ) = AKK*BKK**2 + 40 CONTINUE + END IF + END IF + RETURN +* +* End of DSYGS2 +* + END diff --git a/costa/native/external/lapack/dsygst.f b/costa/native/external/lapack/dsygst.f new file mode 100644 index 000000000..7a42cd71b --- /dev/null +++ b/costa/native/external/lapack/dsygst.f @@ -0,0 +1,250 @@ + SUBROUTINE DSYGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, ITYPE, LDA, LDB, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* DSYGST reduces a real symmetric-definite generalized eigenproblem +* to standard form. +* +* If ITYPE = 1, the problem is A*x = lambda*B*x, +* and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) +* +* If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or +* B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L. +* +* B must have been previously factorized as U**T*U or L*L**T by DPOTRF. +* +* Arguments +* ========= +* +* ITYPE (input) INTEGER +* = 1: compute inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T); +* = 2 or 3: compute U*A*U**T or L**T*A*L. +* +* UPLO (input) CHARACTER +* = 'U': Upper triangle of A is stored and B is factored as +* U**T*U; +* = 'L': Lower triangle of A is stored and B is factored as +* L*L**T. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the symmetric matrix A. If UPLO = 'U', the leading +* N-by-N upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, if INFO = 0, the transformed matrix, stored in the +* same format as A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input) DOUBLE PRECISION array, dimension (LDB,N) +* The triangular factor from the Cholesky factorization of B, +* as returned by DPOTRF. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, HALF + PARAMETER ( ONE = 1.0D0, HALF = 0.5D0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER K, KB, NB +* .. +* .. External Subroutines .. + EXTERNAL DSYGS2, DSYMM, DSYR2K, DTRMM, DTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYGST', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'DSYGST', UPLO, N, -1, -1, -1 ) +* + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code +* + CALL DSYGS2( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + ELSE +* +* Use blocked code +* + IF( ITYPE.EQ.1 ) THEN + IF( UPPER ) THEN +* +* Compute inv(U')*A*inv(U) +* + DO 10 K = 1, N, NB + KB = MIN( N-K+1, NB ) +* +* Update the upper triangle of A(k:n,k:n) +* + CALL DSYGS2( ITYPE, UPLO, KB, A( K, K ), LDA, + $ B( K, K ), LDB, INFO ) + IF( K+KB.LE.N ) THEN + CALL DTRSM( 'Left', UPLO, 'Transpose', 'Non-unit', + $ KB, N-K-KB+1, ONE, B( K, K ), LDB, + $ A( K, K+KB ), LDA ) + CALL DSYMM( 'Left', UPLO, KB, N-K-KB+1, -HALF, + $ A( K, K ), LDA, B( K, K+KB ), LDB, ONE, + $ A( K, K+KB ), LDA ) + CALL DSYR2K( UPLO, 'Transpose', N-K-KB+1, KB, -ONE, + $ A( K, K+KB ), LDA, B( K, K+KB ), LDB, + $ ONE, A( K+KB, K+KB ), LDA ) + CALL DSYMM( 'Left', UPLO, KB, N-K-KB+1, -HALF, + $ A( K, K ), LDA, B( K, K+KB ), LDB, ONE, + $ A( K, K+KB ), LDA ) + CALL DTRSM( 'Right', UPLO, 'No transpose', + $ 'Non-unit', KB, N-K-KB+1, ONE, + $ B( K+KB, K+KB ), LDB, A( K, K+KB ), + $ LDA ) + END IF + 10 CONTINUE + ELSE +* +* Compute inv(L)*A*inv(L') +* + DO 20 K = 1, N, NB + KB = MIN( N-K+1, NB ) +* +* Update the lower triangle of A(k:n,k:n) +* + CALL DSYGS2( ITYPE, UPLO, KB, A( K, K ), LDA, + $ B( K, K ), LDB, INFO ) + IF( K+KB.LE.N ) THEN + CALL DTRSM( 'Right', UPLO, 'Transpose', 'Non-unit', + $ N-K-KB+1, KB, ONE, B( K, K ), LDB, + $ A( K+KB, K ), LDA ) + CALL DSYMM( 'Right', UPLO, N-K-KB+1, KB, -HALF, + $ A( K, K ), LDA, B( K+KB, K ), LDB, ONE, + $ A( K+KB, K ), LDA ) + CALL DSYR2K( UPLO, 'No transpose', N-K-KB+1, KB, + $ -ONE, A( K+KB, K ), LDA, B( K+KB, K ), + $ LDB, ONE, A( K+KB, K+KB ), LDA ) + CALL DSYMM( 'Right', UPLO, N-K-KB+1, KB, -HALF, + $ A( K, K ), LDA, B( K+KB, K ), LDB, ONE, + $ A( K+KB, K ), LDA ) + CALL DTRSM( 'Left', UPLO, 'No transpose', + $ 'Non-unit', N-K-KB+1, KB, ONE, + $ B( K+KB, K+KB ), LDB, A( K+KB, K ), + $ LDA ) + END IF + 20 CONTINUE + END IF + ELSE + IF( UPPER ) THEN +* +* Compute U*A*U' +* + DO 30 K = 1, N, NB + KB = MIN( N-K+1, NB ) +* +* Update the upper triangle of A(1:k+kb-1,1:k+kb-1) +* + CALL DTRMM( 'Left', UPLO, 'No transpose', 'Non-unit', + $ K-1, KB, ONE, B, LDB, A( 1, K ), LDA ) + CALL DSYMM( 'Right', UPLO, K-1, KB, HALF, A( K, K ), + $ LDA, B( 1, K ), LDB, ONE, A( 1, K ), LDA ) + CALL DSYR2K( UPLO, 'No transpose', K-1, KB, ONE, + $ A( 1, K ), LDA, B( 1, K ), LDB, ONE, A, + $ LDA ) + CALL DSYMM( 'Right', UPLO, K-1, KB, HALF, A( K, K ), + $ LDA, B( 1, K ), LDB, ONE, A( 1, K ), LDA ) + CALL DTRMM( 'Right', UPLO, 'Transpose', 'Non-unit', + $ K-1, KB, ONE, B( K, K ), LDB, A( 1, K ), + $ LDA ) + CALL DSYGS2( ITYPE, UPLO, KB, A( K, K ), LDA, + $ B( K, K ), LDB, INFO ) + 30 CONTINUE + ELSE +* +* Compute L'*A*L +* + DO 40 K = 1, N, NB + KB = MIN( N-K+1, NB ) +* +* Update the lower triangle of A(1:k+kb-1,1:k+kb-1) +* + CALL DTRMM( 'Right', UPLO, 'No transpose', 'Non-unit', + $ KB, K-1, ONE, B, LDB, A( K, 1 ), LDA ) + CALL DSYMM( 'Left', UPLO, KB, K-1, HALF, A( K, K ), + $ LDA, B( K, 1 ), LDB, ONE, A( K, 1 ), LDA ) + CALL DSYR2K( UPLO, 'Transpose', K-1, KB, ONE, + $ A( K, 1 ), LDA, B( K, 1 ), LDB, ONE, A, + $ LDA ) + CALL DSYMM( 'Left', UPLO, KB, K-1, HALF, A( K, K ), + $ LDA, B( K, 1 ), LDB, ONE, A( K, 1 ), LDA ) + CALL DTRMM( 'Left', UPLO, 'Transpose', 'Non-unit', KB, + $ K-1, ONE, B( K, K ), LDB, A( K, 1 ), LDA ) + CALL DSYGS2( ITYPE, UPLO, KB, A( K, K ), LDA, + $ B( K, K ), LDB, INFO ) + 40 CONTINUE + END IF + END IF + END IF + RETURN +* +* End of DSYGST +* + END diff --git a/costa/native/external/lapack/dsygv.f b/costa/native/external/lapack/dsygv.f new file mode 100644 index 000000000..73c316cb8 --- /dev/null +++ b/costa/native/external/lapack/dsygv.f @@ -0,0 +1,227 @@ + SUBROUTINE DSYGV( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, + $ LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDA, LDB, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), W( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DSYGV computes all the eigenvalues, and optionally, the eigenvectors +* of a real generalized symmetric-definite eigenproblem, of the form +* A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. +* Here A and B are assumed to be symmetric and B is also +* positive definite. +* +* Arguments +* ========= +* +* ITYPE (input) INTEGER +* Specifies the problem type to be solved: +* = 1: A*x = (lambda)*B*x +* = 2: A*B*x = (lambda)*x +* = 3: B*A*x = (lambda)*x +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangles of A and B are stored; +* = 'L': Lower triangles of A and B are stored. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) +* On entry, the symmetric matrix A. If UPLO = 'U', the +* leading N-by-N upper triangular part of A contains the +* upper triangular part of the matrix A. If UPLO = 'L', +* the leading N-by-N lower triangular part of A contains +* the lower triangular part of the matrix A. +* +* On exit, if JOBZ = 'V', then if INFO = 0, A contains the +* matrix Z of eigenvectors. The eigenvectors are normalized +* as follows: +* if ITYPE = 1 or 2, Z**T*B*Z = I; +* if ITYPE = 3, Z**T*inv(B)*Z = I. +* If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') +* or the lower triangle (if UPLO='L') of A, including the +* diagonal, is destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB, N) +* On entry, the symmetric positive definite matrix B. +* If UPLO = 'U', the leading N-by-N upper triangular part of B +* contains the upper triangular part of the matrix B. +* If UPLO = 'L', the leading N-by-N lower triangular part of B +* contains the lower triangular part of the matrix B. +* +* On exit, if INFO <= N, the part of B containing the matrix is +* overwritten by the triangular factor U or L from the Cholesky +* factorization B = U**T*U or B = L*L**T. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* W (output) DOUBLE PRECISION array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of the array WORK. LWORK >= max(1,3*N-1). +* For optimal efficiency, LWORK >= (NB+2)*N, +* where NB is the blocksize for DSYTRD returned by ILAENV. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: DPOTRF or DSYEV returned an error code: +* <= N: if INFO = i, DSYEV failed to converge; +* i off-diagonal elements of an intermediate +* tridiagonal form did not converge to zero; +* > N: if INFO = N + i, for 1 <= i <= N, then the leading +* minor of order i of B is not positive definite. +* The factorization of B could not be completed and +* no eigenvalues or eigenvectors were computed. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER TRANS + INTEGER LWKOPT, NB, NEIG +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DPOTRF, DSYEV, DSYGST, DTRMM, DTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, 3*N-1 ) .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'DSYTRD', UPLO, N, -1, -1, -1 ) + LWKOPT = ( NB+2 )*N + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYGV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL DPOTRF( UPLO, N, B, LDB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL DSYGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + CALL DSYEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, INFO ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + NEIG = N + IF( INFO.GT.0 ) + $ NEIG = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)'*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'T' + END IF +* + CALL DTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, + $ B, LDB, A, LDA ) +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U'*y +* + IF( UPPER ) THEN + TRANS = 'T' + ELSE + TRANS = 'N' + END IF +* + CALL DTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, + $ B, LDB, A, LDA ) + END IF + END IF +* + WORK( 1 ) = LWKOPT + RETURN +* +* End of DSYGV +* + END diff --git a/costa/native/external/lapack/dsygvd.f b/costa/native/external/lapack/dsygvd.f new file mode 100644 index 000000000..7c250b4a1 --- /dev/null +++ b/costa/native/external/lapack/dsygvd.f @@ -0,0 +1,277 @@ + SUBROUTINE DSYGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, + $ LWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDA, LDB, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), W( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DSYGVD computes all the eigenvalues, and optionally, the eigenvectors +* of a real generalized symmetric-definite eigenproblem, of the form +* A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and +* B are assumed to be symmetric and B is also positive definite. +* If eigenvectors are desired, it uses a divide and conquer algorithm. +* +* The divide and conquer algorithm makes very mild assumptions about +* floating point arithmetic. It will work on machines with a guard +* digit in add/subtract, or on those binary machines without guard +* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +* Cray-2. It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* ITYPE (input) INTEGER +* Specifies the problem type to be solved: +* = 1: A*x = (lambda)*B*x +* = 2: A*B*x = (lambda)*x +* = 3: B*A*x = (lambda)*x +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangles of A and B are stored; +* = 'L': Lower triangles of A and B are stored. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) +* On entry, the symmetric matrix A. If UPLO = 'U', the +* leading N-by-N upper triangular part of A contains the +* upper triangular part of the matrix A. If UPLO = 'L', +* the leading N-by-N lower triangular part of A contains +* the lower triangular part of the matrix A. +* +* On exit, if JOBZ = 'V', then if INFO = 0, A contains the +* matrix Z of eigenvectors. The eigenvectors are normalized +* as follows: +* if ITYPE = 1 or 2, Z**T*B*Z = I; +* if ITYPE = 3, Z**T*inv(B)*Z = I. +* If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') +* or the lower triangle (if UPLO='L') of A, including the +* diagonal, is destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB, N) +* On entry, the symmetric matrix B. If UPLO = 'U', the +* leading N-by-N upper triangular part of B contains the +* upper triangular part of the matrix B. If UPLO = 'L', +* the leading N-by-N lower triangular part of B contains +* the lower triangular part of the matrix B. +* +* On exit, if INFO <= N, the part of B containing the matrix is +* overwritten by the triangular factor U or L from the Cholesky +* factorization B = U**T*U or B = L*L**T. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* W (output) DOUBLE PRECISION array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If N <= 1, LWORK >= 1. +* If JOBZ = 'N' and N > 1, LWORK >= 2*N+1. +* If JOBZ = 'V' and N > 1, LWORK >= 1 + 6*N + 2*N**2. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array IWORK. +* If N <= 1, LIWORK >= 1. +* If JOBZ = 'N' and N > 1, LIWORK >= 1. +* If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: DPOTRF or DSYEVD returned an error code: +* <= N: if INFO = i, DSYEVD failed to converge; +* i off-diagonal elements of an intermediate +* tridiagonal form did not converge to zero; +* > N: if INFO = N + i, for 1 <= i <= N, then the leading +* minor of order i of B is not positive definite. +* The factorization of B could not be completed and +* no eigenvalues or eigenvectors were computed. +* +* Further Details +* =============== +* +* Based on contributions by +* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER TRANS + INTEGER LIOPT, LIWMIN, LOPT, LWMIN, NEIG +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DPOTRF, DSYEVD, DSYGST, DTRMM, DTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LIWMIN = 1 + LWMIN = 1 + LOPT = LWMIN + LIOPT = LIWMIN + ELSE + IF( WANTZ ) THEN + LIWMIN = 3 + 5*N + LWMIN = 1 + 6*N + 2*N**2 + ELSE + LIWMIN = 1 + LWMIN = 2*N + 1 + END IF + LOPT = LWMIN + LIOPT = LIWMIN + END IF + IF( ITYPE.LT.0 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LOPT + IWORK( 1 ) = LIOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYGVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL DPOTRF( UPLO, N, B, LDB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL DSYGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + CALL DSYEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, IWORK, LIWORK, + $ INFO ) + LOPT = MAX( DBLE( LOPT ), DBLE( WORK( 1 ) ) ) + LIOPT = MAX( DBLE( LIOPT ), DBLE( IWORK( 1 ) ) ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + NEIG = N + IF( INFO.GT.0 ) + $ NEIG = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)'*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'T' + END IF +* + CALL DTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, + $ B, LDB, A, LDA ) +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U'*y +* + IF( UPPER ) THEN + TRANS = 'T' + ELSE + TRANS = 'N' + END IF +* + CALL DTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, + $ B, LDB, A, LDA ) + END IF + END IF +* + WORK( 1 ) = LOPT + IWORK( 1 ) = LIOPT +* + RETURN +* +* End of DSYGVD +* + END diff --git a/costa/native/external/lapack/dsygvx.f b/costa/native/external/lapack/dsygvx.f new file mode 100644 index 000000000..0815bf584 --- /dev/null +++ b/costa/native/external/lapack/dsygvx.f @@ -0,0 +1,326 @@ + SUBROUTINE DSYGVX( ITYPE, JOBZ, RANGE, UPLO, N, A, LDA, B, LDB, + $ VL, VU, IL, IU, ABSTOL, M, W, Z, LDZ, WORK, + $ LWORK, IWORK, IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, ITYPE, IU, LDA, LDB, LDZ, LWORK, M, N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), W( * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* DSYGVX computes selected eigenvalues, and optionally, eigenvectors +* of a real generalized symmetric-definite eigenproblem, of the form +* A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A +* and B are assumed to be symmetric and B is also positive definite. +* Eigenvalues and eigenvectors can be selected by specifying either a +* range of values or a range of indices for the desired eigenvalues. +* +* Arguments +* ========= +* +* ITYPE (input) INTEGER +* Specifies the problem type to be solved: +* = 1: A*x = (lambda)*B*x +* = 2: A*B*x = (lambda)*x +* = 3: B*A*x = (lambda)*x +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* RANGE (input) CHARACTER*1 +* = 'A': all eigenvalues will be found. +* = 'V': all eigenvalues in the half-open interval (VL,VU] +* will be found. +* = 'I': the IL-th through IU-th eigenvalues will be found. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A and B are stored; +* = 'L': Lower triangle of A and B are stored. +* +* N (input) INTEGER +* The order of the matrix pencil (A,B). N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) +* On entry, the symmetric matrix A. If UPLO = 'U', the +* leading N-by-N upper triangular part of A contains the +* upper triangular part of the matrix A. If UPLO = 'L', +* the leading N-by-N lower triangular part of A contains +* the lower triangular part of the matrix A. +* +* On exit, the lower triangle (if UPLO='L') or the upper +* triangle (if UPLO='U') of A, including the diagonal, is +* destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) DOUBLE PRECISION array, dimension (LDA, N) +* On entry, the symmetric matrix B. If UPLO = 'U', the +* leading N-by-N upper triangular part of B contains the +* upper triangular part of the matrix B. If UPLO = 'L', +* the leading N-by-N lower triangular part of B contains +* the lower triangular part of the matrix B. +* +* On exit, if INFO <= N, the part of B containing the matrix is +* overwritten by the triangular factor U or L from the Cholesky +* factorization B = U**T*U or B = L*L**T. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* VL (input) DOUBLE PRECISION +* VU (input) DOUBLE PRECISION +* If RANGE='V', the lower and upper bounds of the interval to +* be searched for eigenvalues. VL < VU. +* Not referenced if RANGE = 'A' or 'I'. +* +* IL (input) INTEGER +* IU (input) INTEGER +* If RANGE='I', the indices (in ascending order) of the +* smallest and largest eigenvalues to be returned. +* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +* Not referenced if RANGE = 'A' or 'V'. +* +* ABSTOL (input) DOUBLE PRECISION +* The absolute error tolerance for the eigenvalues. +* An approximate eigenvalue is accepted as converged +* when it is determined to lie in an interval [a,b] +* of width less than or equal to +* +* ABSTOL + EPS * max( |a|,|b| ) , +* +* where EPS is the machine precision. If ABSTOL is less than +* or equal to zero, then EPS*|T| will be used in its place, +* where |T| is the 1-norm of the tridiagonal matrix obtained +* by reducing A to tridiagonal form. +* +* Eigenvalues will be computed most accurately when ABSTOL is +* set to twice the underflow threshold 2*DLAMCH('S'), not zero. +* If this routine returns with INFO>0, indicating that some +* eigenvectors did not converge, try setting ABSTOL to +* 2*DLAMCH('S'). +* +* M (output) INTEGER +* The total number of eigenvalues found. 0 <= M <= N. +* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +* +* W (output) DOUBLE PRECISION array, dimension (N) +* On normal exit, the first M elements contain the selected +* eigenvalues in ascending order. +* +* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M)) +* If JOBZ = 'N', then Z is not referenced. +* If JOBZ = 'V', then if INFO = 0, the first M columns of Z +* contain the orthonormal eigenvectors of the matrix A +* corresponding to the selected eigenvalues, with the i-th +* column of Z holding the eigenvector associated with W(i). +* The eigenvectors are normalized as follows: +* if ITYPE = 1 or 2, Z**T*B*Z = I; +* if ITYPE = 3, Z**T*inv(B)*Z = I. +* +* If an eigenvector fails to converge, then that column of Z +* contains the latest approximation to the eigenvector, and the +* index of the eigenvector is returned in IFAIL. +* Note: the user must ensure that at least max(1,M) columns are +* supplied in the array Z; if RANGE = 'V', the exact value of M +* is not known in advance and an upper bound must be used. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of the array WORK. LWORK >= max(1,8*N). +* For optimal efficiency, LWORK >= (NB+3)*N, +* where NB is the blocksize for DSYTRD returned by ILAENV. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace) INTEGER array, dimension (5*N) +* +* IFAIL (output) INTEGER array, dimension (N) +* If JOBZ = 'V', then if INFO = 0, the first M elements of +* IFAIL are zero. If INFO > 0, then IFAIL contains the +* indices of the eigenvectors that failed to converge. +* If JOBZ = 'N', then IFAIL is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: DPOTRF or DSYEVX returned an error code: +* <= N: if INFO = i, DSYEVX failed to converge; +* i eigenvectors failed to converge. Their indices +* are stored in array IFAIL. +* > N: if INFO = N + i, for 1 <= i <= N, then the leading +* minor of order i of B is not positive definite. +* The factorization of B could not be completed and +* no eigenvalues or eigenvectors were computed. +* +* Further Details +* =============== +* +* Based on contributions by +* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LQUERY, UPPER, VALEIG, WANTZ + CHARACTER TRANS + INTEGER LOPT, LWKOPT, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DPOTRF, DSYEVX, DSYGST, DTRMM, DTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + UPPER = LSAME( UPLO, 'U' ) + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( ITYPE.LT.0 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -3 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( VALEIG .AND. N.GT.0 ) THEN + IF( VU.LE.VL ) + $ INFO = -11 + ELSE IF( INDEIG .AND. IL.LT.1 ) THEN + INFO = -12 + ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) THEN + INFO = -13 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -18 + ELSE IF( LWORK.LT.MAX( 1, 8*N ) .AND. .NOT.LQUERY ) THEN + INFO = -20 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'DSYTRD', UPLO, N, -1, -1, -1 ) + LWKOPT = ( NB+3 )*N + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYGVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Form a Cholesky factorization of B. +* + CALL DPOTRF( UPLO, N, B, LDB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL DSYGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + CALL DSYEVX( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, ABSTOL, + $ M, W, Z, LDZ, WORK, LWORK, IWORK, IFAIL, INFO ) + LOPT = WORK( 1 ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + IF( INFO.GT.0 ) + $ M = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)'*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'T' + END IF +* + CALL DTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, M, ONE, B, + $ LDB, Z, LDZ ) +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U'*y +* + IF( UPPER ) THEN + TRANS = 'T' + ELSE + TRANS = 'N' + END IF +* + CALL DTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, M, ONE, B, + $ LDB, Z, LDZ ) + END IF + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of DSYGVX +* + END diff --git a/costa/native/external/lapack/dsyrfs.f b/costa/native/external/lapack/dsyrfs.f new file mode 100644 index 000000000..64c745599 --- /dev/null +++ b/costa/native/external/lapack/dsyrfs.f @@ -0,0 +1,335 @@ + SUBROUTINE DSYRFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, + $ X, LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* DSYRFS improves the computed solution to a system of linear +* equations when the coefficient matrix is symmetric indefinite, and +* provides error bounds and backward error estimates for the solution. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input) DOUBLE PRECISION array, dimension (LDA,N) +* The symmetric matrix A. If UPLO = 'U', the leading N-by-N +* upper triangular part of A contains the upper triangular part +* of the matrix A, and the strictly lower triangular part of A +* is not referenced. If UPLO = 'L', the leading N-by-N lower +* triangular part of A contains the lower triangular part of +* the matrix A, and the strictly upper triangular part of A is +* not referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) +* The factored form of the matrix A. AF contains the block +* diagonal matrix D and the multipliers used to obtain the +* factor U or L from the factorization A = U*D*U**T or +* A = L*D*L**T as computed by DSYTRF. +* +* LDAF (input) INTEGER +* The leading dimension of the array AF. LDAF >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by DSYTRF. +* +* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) +* On entry, the solution matrix X, as computed by DSYTRS. +* On exit, the improved solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Internal Parameters +* =================== +* +* ITMAX is the maximum number of steps of iterative refinement. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) + DOUBLE PRECISION THREE + PARAMETER ( THREE = 3.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, J, K, KASE, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DLACON, DSYMV, DSYTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL DCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 ) + CALL DSYMV( UPLO, N, -ONE, A, LDA, X( 1, J ), 1, ONE, + $ WORK( N+1 ), 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + WORK( I ) = ABS( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = ABS( X( K, J ) ) + DO 40 I = 1, K - 1 + WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK + S = S + ABS( A( I, K ) )*ABS( X( I, J ) ) + 40 CONTINUE + WORK( K ) = WORK( K ) + ABS( A( K, K ) )*XK + S + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = ABS( X( K, J ) ) + WORK( K ) = WORK( K ) + ABS( A( K, K ) )*XK + DO 60 I = K + 1, N + WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK + S = S + ABS( A( I, K ) )*ABS( X( I, J ) ) + 60 CONTINUE + WORK( K ) = WORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL DSYTRS( UPLO, N, 1, AF, LDAF, IPIV, WORK( N+1 ), N, + $ INFO ) + CALL DAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use DLACON to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL DLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A'). +* + CALL DSYTRS( UPLO, N, 1, AF, LDAF, IPIV, WORK( N+1 ), N, + $ INFO ) + DO 110 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 120 CONTINUE + CALL DSYTRS( UPLO, N, 1, AF, LDAF, IPIV, WORK( N+1 ), N, + $ INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of DSYRFS +* + END diff --git a/costa/native/external/lapack/dsysv.f b/costa/native/external/lapack/dsysv.f new file mode 100644 index 000000000..d2f42ea8e --- /dev/null +++ b/costa/native/external/lapack/dsysv.f @@ -0,0 +1,171 @@ + SUBROUTINE DSYSV( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DSYSV computes the solution to a real system of linear equations +* A * X = B, +* where A is an N-by-N symmetric matrix and X and B are N-by-NRHS +* matrices. +* +* The diagonal pivoting method is used to factor A as +* A = U * D * U**T, if UPLO = 'U', or +* A = L * D * L**T, if UPLO = 'L', +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices, and D is symmetric and block diagonal with +* 1-by-1 and 2-by-2 diagonal blocks. The factored form of A is then +* used to solve the system of equations A * X = B. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the symmetric matrix A. If UPLO = 'U', the leading +* N-by-N upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, if INFO = 0, the block diagonal matrix D and the +* multipliers used to obtain the factor U or L from the +* factorization A = U*D*U**T or A = L*D*L**T as computed by +* DSYTRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (output) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D, as +* determined by DSYTRF. If IPIV(k) > 0, then rows and columns +* k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1 +* diagonal block. If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, +* then rows and columns k-1 and -IPIV(k) were interchanged and +* D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If UPLO = 'L' and +* IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and +* -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 +* diagonal block. +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of WORK. LWORK >= 1, and for best performance +* LWORK >= N*NB, where NB is the optimal blocksize for +* DSYTRF. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, D(i,i) is exactly zero. The factorization +* has been completed, but the block diagonal matrix D is +* exactly singular, so the solution could not be computed. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LWKOPT, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DSYTRF, DSYTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'DSYTRF', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYSV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Compute the factorization A = U*D*U' or A = L*D*L'. +* + CALL DSYTRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL DSYTRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of DSYSV +* + END diff --git a/costa/native/external/lapack/dsysvx.f b/costa/native/external/lapack/dsysvx.f new file mode 100644 index 000000000..dbd51f84e --- /dev/null +++ b/costa/native/external/lapack/dsysvx.f @@ -0,0 +1,297 @@ + SUBROUTINE DSYSVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, + $ LDB, X, LDX, RCOND, FERR, BERR, WORK, LWORK, + $ IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER FACT, UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, LWORK, N, NRHS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* DSYSVX uses the diagonal pivoting factorization to compute the +* solution to a real system of linear equations A * X = B, +* where A is an N-by-N symmetric matrix and X and B are N-by-NRHS +* matrices. +* +* Error bounds on the solution and a condition estimate are also +* provided. +* +* Description +* =========== +* +* The following steps are performed: +* +* 1. If FACT = 'N', the diagonal pivoting method is used to factor A. +* The form of the factorization is +* A = U * D * U**T, if UPLO = 'U', or +* A = L * D * L**T, if UPLO = 'L', +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices, and D is symmetric and block diagonal with +* 1-by-1 and 2-by-2 diagonal blocks. +* +* 2. If some D(i,i)=0, so that D is exactly singular, then the routine +* returns with INFO = i. Otherwise, the factored form of A is used +* to estimate the condition number of the matrix A. If the +* reciprocal of the condition number is less than machine precision, +* INFO = N+1 is returned as a warning, but the routine still goes on +* to solve for X and compute error bounds as described below. +* +* 3. The system of equations is solved for X using the factored form +* of A. +* +* 4. Iterative refinement is applied to improve the computed solution +* matrix and calculate error bounds and backward error estimates +* for it. +* +* Arguments +* ========= +* +* FACT (input) CHARACTER*1 +* Specifies whether or not the factored form of A has been +* supplied on entry. +* = 'F': On entry, AF and IPIV contain the factored form of +* A. AF and IPIV will not be modified. +* = 'N': The matrix A will be copied to AF and factored. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input) DOUBLE PRECISION array, dimension (LDA,N) +* The symmetric matrix A. If UPLO = 'U', the leading N-by-N +* upper triangular part of A contains the upper triangular part +* of the matrix A, and the strictly lower triangular part of A +* is not referenced. If UPLO = 'L', the leading N-by-N lower +* triangular part of A contains the lower triangular part of +* the matrix A, and the strictly upper triangular part of A is +* not referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* AF (input or output) DOUBLE PRECISION array, dimension (LDAF,N) +* If FACT = 'F', then AF is an input argument and on entry +* contains the block diagonal matrix D and the multipliers used +* to obtain the factor U or L from the factorization +* A = U*D*U**T or A = L*D*L**T as computed by DSYTRF. +* +* If FACT = 'N', then AF is an output argument and on exit +* returns the block diagonal matrix D and the multipliers used +* to obtain the factor U or L from the factorization +* A = U*D*U**T or A = L*D*L**T. +* +* LDAF (input) INTEGER +* The leading dimension of the array AF. LDAF >= max(1,N). +* +* IPIV (input or output) INTEGER array, dimension (N) +* If FACT = 'F', then IPIV is an input argument and on entry +* contains details of the interchanges and the block structure +* of D, as determined by DSYTRF. +* If IPIV(k) > 0, then rows and columns k and IPIV(k) were +* interchanged and D(k,k) is a 1-by-1 diagonal block. +* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +* +* If FACT = 'N', then IPIV is an output argument and on exit +* contains details of the interchanges and the block structure +* of D, as determined by DSYTRF. +* +* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) +* The N-by-NRHS right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (output) DOUBLE PRECISION array, dimension (LDX,NRHS) +* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* RCOND (output) DOUBLE PRECISION +* The estimate of the reciprocal condition number of the matrix +* A. If RCOND is less than the machine precision (in +* particular, if RCOND = 0), the matrix is singular to working +* precision. This condition is indicated by a return code of +* INFO > 0. +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of WORK. LWORK >= 3*N, and for best performance +* LWORK >= N*NB, where NB is the optimal blocksize for +* DSYTRF. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is +* <= N: D(i,i) is exactly zero. The factorization +* has been completed but the factor D is exactly +* singular, so the solution and error bounds could +* not be computed. RCOND = 0 is returned. +* = N+1: D is nonsingular, but RCOND is less than machine +* precision, meaning that the matrix is singular +* to working precision. Nevertheless, the +* solution and error bounds are computed because +* there are a number of situations where the +* computed solution can be more accurate than the +* value of RCOND would suggest. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, NOFACT + INTEGER LWKOPT, NB + DOUBLE PRECISION ANORM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANSY + EXTERNAL LSAME, ILAENV, DLAMCH, DLANSY +* .. +* .. External Subroutines .. + EXTERNAL DLACPY, DSYCON, DSYRFS, DSYTRF, DSYTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -11 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LWORK.LT.MAX( 1, 3*N ) .AND. .NOT.LQUERY ) THEN + INFO = -18 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'DSYTRF', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYSVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* + IF( NOFACT ) THEN +* +* Compute the factorization A = U*D*U' or A = L*D*L'. +* + CALL DLACPY( UPLO, N, N, A, LDA, AF, LDAF ) + CALL DSYTRF( UPLO, N, AF, LDAF, IPIV, WORK, LWORK, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN + IF( INFO.GT.0 ) + $ RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = DLANSY( 'I', UPLO, N, A, LDA, WORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL DSYCON( UPLO, N, AF, LDAF, IPIV, ANORM, RCOND, WORK, IWORK, + $ INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.DLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* +* Compute the solution vectors X. +* + CALL DLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL DSYTRS( UPLO, N, NRHS, AF, LDAF, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solutions and +* compute error bounds and backward error estimates for them. +* + CALL DSYRFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, X, + $ LDX, FERR, BERR, WORK, IWORK, INFO ) +* + RETURN +* +* End of DSYSVX +* + END diff --git a/costa/native/external/lapack/dsytd2.f b/costa/native/external/lapack/dsytd2.f new file mode 100644 index 000000000..a0a086191 --- /dev/null +++ b/costa/native/external/lapack/dsytd2.f @@ -0,0 +1,249 @@ + SUBROUTINE DSYTD2( UPLO, N, A, LDA, D, E, TAU, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAU( * ) +* .. +* +* Purpose +* ======= +* +* DSYTD2 reduces a real symmetric matrix A to symmetric tridiagonal +* form T by an orthogonal similarity transformation: Q' * A * Q = T. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is stored: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the symmetric matrix A. If UPLO = 'U', the leading +* n-by-n upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n-by-n lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* On exit, if UPLO = 'U', the diagonal and first superdiagonal +* of A are overwritten by the corresponding elements of the +* tridiagonal matrix T, and the elements above the first +* superdiagonal, with the array TAU, represent the orthogonal +* matrix Q as a product of elementary reflectors; if UPLO +* = 'L', the diagonal and first subdiagonal of A are over- +* written by the corresponding elements of the tridiagonal +* matrix T, and the elements below the first subdiagonal, with +* the array TAU, represent the orthogonal matrix Q as a product +* of elementary reflectors. See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* D (output) DOUBLE PRECISION array, dimension (N) +* The diagonal elements of the tridiagonal matrix T: +* D(i) = A(i,i). +* +* E (output) DOUBLE PRECISION array, dimension (N-1) +* The off-diagonal elements of the tridiagonal matrix T: +* E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. +* +* TAU (output) DOUBLE PRECISION array, dimension (N-1) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* If UPLO = 'U', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(n-1) . . . H(2) H(1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in +* A(1:i-1,i+1), and tau in TAU(i). +* +* If UPLO = 'L', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(1) H(2) . . . H(n-1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), +* and tau in TAU(i). +* +* The contents of A on exit are illustrated by the following examples +* with n = 5: +* +* if UPLO = 'U': if UPLO = 'L': +* +* ( d e v2 v3 v4 ) ( d ) +* ( d e v3 v4 ) ( e d ) +* ( d e v4 ) ( v1 e d ) +* ( d e ) ( v1 v2 e d ) +* ( d ) ( v1 v2 v3 e d ) +* +* where d and e denote diagonal and off-diagonal elements of T, and vi +* denotes an element of the vector defining H(i). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO, HALF + PARAMETER ( ONE = 1.0D0, ZERO = 0.0D0, + $ HALF = 1.0D0 / 2.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I + DOUBLE PRECISION ALPHA, TAUI +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DLARFG, DSYMV, DSYR2, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DDOT + EXTERNAL LSAME, DDOT +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTD2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Reduce the upper triangle of A +* + DO 10 I = N - 1, 1, -1 +* +* Generate elementary reflector H(i) = I - tau * v * v' +* to annihilate A(1:i-1,i+1) +* + CALL DLARFG( I, A( I, I+1 ), A( 1, I+1 ), 1, TAUI ) + E( I ) = A( I, I+1 ) +* + IF( TAUI.NE.ZERO ) THEN +* +* Apply H(i) from both sides to A(1:i,1:i) +* + A( I, I+1 ) = ONE +* +* Compute x := tau * A * v storing x in TAU(1:i) +* + CALL DSYMV( UPLO, I, TAUI, A, LDA, A( 1, I+1 ), 1, ZERO, + $ TAU, 1 ) +* +* Compute w := x - 1/2 * tau * (x'*v) * v +* + ALPHA = -HALF*TAUI*DDOT( I, TAU, 1, A( 1, I+1 ), 1 ) + CALL DAXPY( I, ALPHA, A( 1, I+1 ), 1, TAU, 1 ) +* +* Apply the transformation as a rank-2 update: +* A := A - v * w' - w * v' +* + CALL DSYR2( UPLO, I, -ONE, A( 1, I+1 ), 1, TAU, 1, A, + $ LDA ) +* + A( I, I+1 ) = E( I ) + END IF + D( I+1 ) = A( I+1, I+1 ) + TAU( I ) = TAUI + 10 CONTINUE + D( 1 ) = A( 1, 1 ) + ELSE +* +* Reduce the lower triangle of A +* + DO 20 I = 1, N - 1 +* +* Generate elementary reflector H(i) = I - tau * v * v' +* to annihilate A(i+2:n,i) +* + CALL DLARFG( N-I, A( I+1, I ), A( MIN( I+2, N ), I ), 1, + $ TAUI ) + E( I ) = A( I+1, I ) +* + IF( TAUI.NE.ZERO ) THEN +* +* Apply H(i) from both sides to A(i+1:n,i+1:n) +* + A( I+1, I ) = ONE +* +* Compute x := tau * A * v storing y in TAU(i:n-1) +* + CALL DSYMV( UPLO, N-I, TAUI, A( I+1, I+1 ), LDA, + $ A( I+1, I ), 1, ZERO, TAU( I ), 1 ) +* +* Compute w := x - 1/2 * tau * (x'*v) * v +* + ALPHA = -HALF*TAUI*DDOT( N-I, TAU( I ), 1, A( I+1, I ), + $ 1 ) + CALL DAXPY( N-I, ALPHA, A( I+1, I ), 1, TAU( I ), 1 ) +* +* Apply the transformation as a rank-2 update: +* A := A - v * w' - w * v' +* + CALL DSYR2( UPLO, N-I, -ONE, A( I+1, I ), 1, TAU( I ), 1, + $ A( I+1, I+1 ), LDA ) +* + A( I+1, I ) = E( I ) + END IF + D( I ) = A( I, I ) + TAU( I ) = TAUI + 20 CONTINUE + D( N ) = A( N, N ) + END IF +* + RETURN +* +* End of DSYTD2 +* + END diff --git a/costa/native/external/lapack/dsytf2.f b/costa/native/external/lapack/dsytf2.f new file mode 100644 index 000000000..1a9d69ee2 --- /dev/null +++ b/costa/native/external/lapack/dsytf2.f @@ -0,0 +1,511 @@ + SUBROUTINE DSYTF2( UPLO, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* DSYTF2 computes the factorization of a real symmetric matrix A using +* the Bunch-Kaufman diagonal pivoting method: +* +* A = U*D*U' or A = L*D*L' +* +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices, U' is the transpose of U, and D is symmetric and +* block diagonal with 1-by-1 and 2-by-2 diagonal blocks. +* +* This is the unblocked version of the algorithm, calling Level 2 BLAS. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is stored: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the symmetric matrix A. If UPLO = 'U', the leading +* n-by-n upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n-by-n lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, the block diagonal matrix D and the multipliers used +* to obtain the factor U or L (see below for further details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (output) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D. +* If IPIV(k) > 0, then rows and columns k and IPIV(k) were +* interchanged and D(k,k) is a 1-by-1 diagonal block. +* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* > 0: if INFO = k, D(k,k) is exactly zero. The factorization +* has been completed, but the block diagonal matrix D is +* exactly singular, and division by zero will occur if it +* is used to solve a system of equations. +* +* Further Details +* =============== +* +* 1-96 - Based on modifications by J. Lewis, Boeing Computer Services +* Company +* +* If UPLO = 'U', then A = U*D*U', where +* U = P(n)*U(n)* ... *P(k)U(k)* ..., +* i.e., U is a product of terms P(k)*U(k), where k decreases from n to +* 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +* defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +* that if the diagonal block D(k) is of order s (s = 1 or 2), then +* +* ( I v 0 ) k-s +* U(k) = ( 0 I 0 ) s +* ( 0 0 I ) n-k +* k-s s n-k +* +* If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +* If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +* and A(k,k), and v overwrites A(1:k-2,k-1:k). +* +* If UPLO = 'L', then A = L*D*L', where +* L = P(1)*L(1)* ... *P(k)*L(k)* ..., +* i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +* n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +* defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +* that if the diagonal block D(k) is of order s (s = 1 or 2), then +* +* ( I 0 0 ) k-1 +* L(k) = ( 0 I 0 ) s +* ( 0 v I ) n-k-s+1 +* k-1 s n-k-s+1 +* +* If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +* If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +* and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0D+0, SEVTEN = 17.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IMAX, J, JMAX, K, KK, KP, KSTEP + DOUBLE PRECISION ABSAKK, ALPHA, COLMAX, D11, D12, D21, D22, R1, + $ ROWMAX, T, WK, WKM1, WKP1 +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + EXTERNAL LSAME, IDAMAX +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSWAP, DSYR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTF2', -INFO ) + RETURN + END IF +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U' using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2 +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 70 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( A( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.GT.1 ) THEN + IMAX = IDAMAX( K-1, A( 1, K ), 1 ) + COLMAX = ABS( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = IMAX + IDAMAX( K-IMAX, A( IMAX, IMAX+1 ), LDA ) + ROWMAX = ABS( A( IMAX, JMAX ) ) + IF( IMAX.GT.1 ) THEN + JMAX = IDAMAX( IMAX-1, A( 1, IMAX ), 1 ) + ROWMAX = MAX( ROWMAX, ABS( A( JMAX, IMAX ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K-1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K - KSTEP + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the leading +* submatrix A(1:k,1:k) +* + CALL DSWAP( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 ) + CALL DSWAP( KK-KP-1, A( KP+1, KK ), 1, A( KP, KP+1 ), + $ LDA ) + T = A( KK, KK ) + A( KK, KK ) = A( KP, KP ) + A( KP, KP ) = T + IF( KSTEP.EQ.2 ) THEN + T = A( K-1, K ) + A( K-1, K ) = A( KP, K ) + A( KP, K ) = T + END IF + END IF +* +* Update the leading submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* +* Perform a rank-1 update of A(1:k-1,1:k-1) as +* +* A := A - U(k)*D(k)*U(k)' = A - W(k)*1/D(k)*W(k)' +* + R1 = ONE / A( K, K ) + CALL DSYR( UPLO, K-1, -R1, A( 1, K ), 1, A, LDA ) +* +* Store U(k) in column k +* + CALL DSCAL( K-1, R1, A( 1, K ), 1 ) + ELSE +* +* 2-by-2 pivot block D(k): columns k and k-1 now hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* Perform a rank-2 update of A(1:k-2,1:k-2) as +* +* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )' +* = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )' +* + IF( K.GT.2 ) THEN +* + D12 = A( K-1, K ) + D22 = A( K-1, K-1 ) / D12 + D11 = A( K, K ) / D12 + T = ONE / ( D11*D22-ONE ) + D12 = T / D12 +* + DO 30 J = K - 2, 1, -1 + WKM1 = D12*( D11*A( J, K-1 )-A( J, K ) ) + WK = D12*( D22*A( J, K )-A( J, K-1 ) ) + DO 20 I = J, 1, -1 + A( I, J ) = A( I, J ) - A( I, K )*WK - + $ A( I, K-1 )*WKM1 + 20 CONTINUE + A( J, K ) = WK + A( J, K-1 ) = WKM1 + 30 CONTINUE +* + END IF +* + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L' using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2 +* + K = 1 + 40 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 70 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( A( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.LT.N ) THEN + IMAX = K + IDAMAX( N-K, A( K+1, K ), 1 ) + COLMAX = ABS( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = K - 1 + IDAMAX( IMAX-K, A( IMAX, K ), LDA ) + ROWMAX = ABS( A( IMAX, JMAX ) ) + IF( IMAX.LT.N ) THEN + JMAX = IMAX + IDAMAX( N-IMAX, A( IMAX+1, IMAX ), 1 ) + ROWMAX = MAX( ROWMAX, ABS( A( JMAX, IMAX ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K+1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K + KSTEP - 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the trailing +* submatrix A(k:n,k:n) +* + IF( KP.LT.N ) + $ CALL DSWAP( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 ) + CALL DSWAP( KP-KK-1, A( KK+1, KK ), 1, A( KP, KK+1 ), + $ LDA ) + T = A( KK, KK ) + A( KK, KK ) = A( KP, KP ) + A( KP, KP ) = T + IF( KSTEP.EQ.2 ) THEN + T = A( K+1, K ) + A( K+1, K ) = A( KP, K ) + A( KP, K ) = T + END IF + END IF +* +* Update the trailing submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* + IF( K.LT.N ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* +* A := A - L(k)*D(k)*L(k)' = A - W(k)*(1/D(k))*W(k)' +* + D11 = ONE / A( K, K ) + CALL DSYR( UPLO, N-K, -D11, A( K+1, K ), 1, + $ A( K+1, K+1 ), LDA ) +* +* Store L(k) in column K +* + CALL DSCAL( N-K, D11, A( K+1, K ), 1 ) + END IF + ELSE +* +* 2-by-2 pivot block D(k) +* + IF( K.LT.N-1 ) THEN +* +* Perform a rank-2 update of A(k+2:n,k+2:n) as +* +* A := A - ( (A(k) A(k+1))*D(k)**(-1) ) * (A(k) A(k+1))' +* +* where L(k) and L(k+1) are the k-th and (k+1)-th +* columns of L +* + D21 = A( K+1, K ) + D11 = A( K+1, K+1 ) / D21 + D22 = A( K, K ) / D21 + T = ONE / ( D11*D22-ONE ) + D21 = T / D21 +* + DO 60 J = K + 2, N +* + WK = D21*( D11*A( J, K )-A( J, K+1 ) ) + WKP1 = D21*( D22*A( J, K+1 )-A( J, K ) ) +* + DO 50 I = J, N + A( I, J ) = A( I, J ) - A( I, K )*WK - + $ A( I, K+1 )*WKP1 + 50 CONTINUE +* + A( J, K ) = WK + A( J, K+1 ) = WKP1 +* + 60 CONTINUE + END IF + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 40 +* + END IF +* + 70 CONTINUE +* + RETURN +* +* End of DSYTF2 +* + END diff --git a/costa/native/external/lapack/dsytrd.f b/costa/native/external/lapack/dsytrd.f new file mode 100644 index 000000000..583675c3f --- /dev/null +++ b/costa/native/external/lapack/dsytrd.f @@ -0,0 +1,295 @@ + SUBROUTINE DSYTRD( UPLO, N, A, LDA, D, E, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAU( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* DSYTRD reduces a real symmetric matrix A to real symmetric +* tridiagonal form T by an orthogonal similarity transformation: +* Q**T * A * Q = T. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the symmetric matrix A. If UPLO = 'U', the leading +* N-by-N upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* On exit, if UPLO = 'U', the diagonal and first superdiagonal +* of A are overwritten by the corresponding elements of the +* tridiagonal matrix T, and the elements above the first +* superdiagonal, with the array TAU, represent the orthogonal +* matrix Q as a product of elementary reflectors; if UPLO +* = 'L', the diagonal and first subdiagonal of A are over- +* written by the corresponding elements of the tridiagonal +* matrix T, and the elements below the first subdiagonal, with +* the array TAU, represent the orthogonal matrix Q as a product +* of elementary reflectors. See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* D (output) DOUBLE PRECISION array, dimension (N) +* The diagonal elements of the tridiagonal matrix T: +* D(i) = A(i,i). +* +* E (output) DOUBLE PRECISION array, dimension (N-1) +* The off-diagonal elements of the tridiagonal matrix T: +* E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. +* +* TAU (output) DOUBLE PRECISION array, dimension (N-1) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= 1. +* For optimum performance LWORK >= N*NB, where NB is the +* optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* If UPLO = 'U', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(n-1) . . . H(2) H(1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in +* A(1:i-1,i+1), and tau in TAU(i). +* +* If UPLO = 'L', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(1) H(2) . . . H(n-1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), +* and tau in TAU(i). +* +* The contents of A on exit are illustrated by the following examples +* with n = 5: +* +* if UPLO = 'U': if UPLO = 'L': +* +* ( d e v2 v3 v4 ) ( d ) +* ( d e v3 v4 ) ( e d ) +* ( d e v4 ) ( v1 e d ) +* ( d e ) ( v1 v2 e d ) +* ( d ) ( v1 v2 v3 e d ) +* +* where d and e denote diagonal and off-diagonal elements of T, and vi +* denotes an element of the vector defining H(i). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER I, IINFO, IWS, J, KK, LDWORK, LWKOPT, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL DLATRD, DSYR2K, DSYTD2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. +* + NB = ILAENV( 1, 'DSYTRD', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTRD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NX = N + IWS = 1 + IF( NB.GT.1 .AND. NB.LT.N ) THEN +* +* Determine when to cross over from blocked to unblocked code +* (last block is always handled by unblocked code). +* + NX = MAX( NB, ILAENV( 3, 'DSYTRD', UPLO, N, -1, -1, -1 ) ) + IF( NX.LT.N ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: determine the +* minimum value of NB, and reduce NB or force use of +* unblocked code by setting NX = N. +* + NB = MAX( LWORK / LDWORK, 1 ) + NBMIN = ILAENV( 2, 'DSYTRD', UPLO, N, -1, -1, -1 ) + IF( NB.LT.NBMIN ) + $ NX = N + END IF + ELSE + NX = N + END IF + ELSE + NB = 1 + END IF +* + IF( UPPER ) THEN +* +* Reduce the upper triangle of A. +* Columns 1:kk are handled by the unblocked method. +* + KK = N - ( ( N-NX+NB-1 ) / NB )*NB + DO 20 I = N - NB + 1, KK + 1, -NB +* +* Reduce columns i:i+nb-1 to tridiagonal form and form the +* matrix W which is needed to update the unreduced part of +* the matrix +* + CALL DLATRD( UPLO, I+NB-1, NB, A, LDA, E, TAU, WORK, + $ LDWORK ) +* +* Update the unreduced submatrix A(1:i-1,1:i-1), using an +* update of the form: A := A - V*W' - W*V' +* + CALL DSYR2K( UPLO, 'No transpose', I-1, NB, -ONE, A( 1, I ), + $ LDA, WORK, LDWORK, ONE, A, LDA ) +* +* Copy superdiagonal elements back into A, and diagonal +* elements into D +* + DO 10 J = I, I + NB - 1 + A( J-1, J ) = E( J-1 ) + D( J ) = A( J, J ) + 10 CONTINUE + 20 CONTINUE +* +* Use unblocked code to reduce the last or only block +* + CALL DSYTD2( UPLO, KK, A, LDA, D, E, TAU, IINFO ) + ELSE +* +* Reduce the lower triangle of A +* + DO 40 I = 1, N - NX, NB +* +* Reduce columns i:i+nb-1 to tridiagonal form and form the +* matrix W which is needed to update the unreduced part of +* the matrix +* + CALL DLATRD( UPLO, N-I+1, NB, A( I, I ), LDA, E( I ), + $ TAU( I ), WORK, LDWORK ) +* +* Update the unreduced submatrix A(i+ib:n,i+ib:n), using +* an update of the form: A := A - V*W' - W*V' +* + CALL DSYR2K( UPLO, 'No transpose', N-I-NB+1, NB, -ONE, + $ A( I+NB, I ), LDA, WORK( NB+1 ), LDWORK, ONE, + $ A( I+NB, I+NB ), LDA ) +* +* Copy subdiagonal elements back into A, and diagonal +* elements into D +* + DO 30 J = I, I + NB - 1 + A( J+1, J ) = E( J ) + D( J ) = A( J, J ) + 30 CONTINUE + 40 CONTINUE +* +* Use unblocked code to reduce the last or only block +* + CALL DSYTD2( UPLO, N-I+1, A( I, I ), LDA, D( I ), E( I ), + $ TAU( I ), IINFO ) + END IF +* + WORK( 1 ) = LWKOPT + RETURN +* +* End of DSYTRD +* + END diff --git a/costa/native/external/lapack/dsytrf.f b/costa/native/external/lapack/dsytrf.f new file mode 100644 index 000000000..d1199f2bf --- /dev/null +++ b/costa/native/external/lapack/dsytrf.f @@ -0,0 +1,288 @@ + SUBROUTINE DSYTRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DSYTRF computes the factorization of a real symmetric matrix A using +* the Bunch-Kaufman diagonal pivoting method. The form of the +* factorization is +* +* A = U*D*U**T or A = L*D*L**T +* +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices, and D is symmetric and block diagonal with +* 1-by-1 and 2-by-2 diagonal blocks. +* +* This is the blocked version of the algorithm, calling Level 3 BLAS. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the symmetric matrix A. If UPLO = 'U', the leading +* N-by-N upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, the block diagonal matrix D and the multipliers used +* to obtain the factor U or L (see below for further details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (output) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D. +* If IPIV(k) > 0, then rows and columns k and IPIV(k) were +* interchanged and D(k,k) is a 1-by-1 diagonal block. +* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of WORK. LWORK >=1. For best performance +* LWORK >= N*NB, where NB is the block size returned by ILAENV. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, D(i,i) is exactly zero. The factorization +* has been completed, but the block diagonal matrix D is +* exactly singular, and division by zero will occur if it +* is used to solve a system of equations. +* +* Further Details +* =============== +* +* If UPLO = 'U', then A = U*D*U', where +* U = P(n)*U(n)* ... *P(k)U(k)* ..., +* i.e., U is a product of terms P(k)*U(k), where k decreases from n to +* 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +* defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +* that if the diagonal block D(k) is of order s (s = 1 or 2), then +* +* ( I v 0 ) k-s +* U(k) = ( 0 I 0 ) s +* ( 0 0 I ) n-k +* k-s s n-k +* +* If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +* If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +* and A(k,k), and v overwrites A(1:k-2,k-1:k). +* +* If UPLO = 'L', then A = L*D*L', where +* L = P(1)*L(1)* ... *P(k)*L(k)* ..., +* i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +* n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +* defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +* that if the diagonal block D(k) is of order s (s = 1 or 2), then +* +* ( I 0 0 ) k-1 +* L(k) = ( 0 I 0 ) s +* ( 0 v I ) n-k-s+1 +* k-1 s n-k-s+1 +* +* If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +* If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +* and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER IINFO, IWS, J, K, KB, LDWORK, LWKOPT, NB, NBMIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DLASYF, DSYTF2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size +* + NB = ILAENV( 1, 'DSYTRF', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTRF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* + NBMIN = 2 + LDWORK = N + IF( NB.GT.1 .AND. NB.LT.N ) THEN + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN + NB = MAX( LWORK / LDWORK, 1 ) + NBMIN = MAX( 2, ILAENV( 2, 'DSYTRF', UPLO, N, -1, -1, -1 ) ) + END IF + ELSE + IWS = 1 + END IF + IF( NB.LT.NBMIN ) + $ NB = N +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U' using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* KB, where KB is the number of columns factorized by DLASYF; +* KB is either NB or NB-1, or K for the last block +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 40 +* + IF( K.GT.NB ) THEN +* +* Factorize columns k-kb+1:k of A and use blocked code to +* update columns 1:k-kb +* + CALL DLASYF( UPLO, K, NB, KB, A, LDA, IPIV, WORK, LDWORK, + $ IINFO ) + ELSE +* +* Use unblocked code to factorize columns 1:k of A +* + CALL DSYTF2( UPLO, K, A, LDA, IPIV, IINFO ) + KB = K + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO +* +* Decrease K and return to the start of the main loop +* + K = K - KB + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L' using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* KB, where KB is the number of columns factorized by DLASYF; +* KB is either NB or NB-1, or N-K+1 for the last block +* + K = 1 + 20 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 40 +* + IF( K.LE.N-NB ) THEN +* +* Factorize columns k:k+kb-1 of A and use blocked code to +* update columns k+kb:n +* + CALL DLASYF( UPLO, N-K+1, NB, KB, A( K, K ), LDA, IPIV( K ), + $ WORK, LDWORK, IINFO ) + ELSE +* +* Use unblocked code to factorize columns k:n of A +* + CALL DSYTF2( UPLO, N-K+1, A( K, K ), LDA, IPIV( K ), IINFO ) + KB = N - K + 1 + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + K - 1 +* +* Adjust IPIV +* + DO 30 J = K, K + KB - 1 + IF( IPIV( J ).GT.0 ) THEN + IPIV( J ) = IPIV( J ) + K - 1 + ELSE + IPIV( J ) = IPIV( J ) - K + 1 + END IF + 30 CONTINUE +* +* Increase K and return to the start of the main loop +* + K = K + KB + GO TO 20 +* + END IF +* + 40 CONTINUE + WORK( 1 ) = LWKOPT + RETURN +* +* End of DSYTRF +* + END diff --git a/costa/native/external/lapack/dsytri.f b/costa/native/external/lapack/dsytri.f new file mode 100644 index 000000000..87a3332c2 --- /dev/null +++ b/costa/native/external/lapack/dsytri.f @@ -0,0 +1,313 @@ + SUBROUTINE DSYTRI( UPLO, N, A, LDA, IPIV, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DSYTRI computes the inverse of a real symmetric indefinite matrix +* A using the factorization A = U*D*U**T or A = L*D*L**T computed by +* DSYTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the details of the factorization are stored +* as an upper or lower triangular matrix. +* = 'U': Upper triangular, form is A = U*D*U**T; +* = 'L': Lower triangular, form is A = L*D*L**T. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the block diagonal matrix D and the multipliers +* used to obtain the factor U or L as computed by DSYTRF. +* +* On exit, if INFO = 0, the (symmetric) inverse of the original +* matrix. If UPLO = 'U', the upper triangular part of the +* inverse is formed and the part of A below the diagonal is not +* referenced; if UPLO = 'L' the lower triangular part of the +* inverse is formed and the part of A above the diagonal is +* not referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by DSYTRF. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +* inverse could not be computed. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER K, KP, KSTEP + DOUBLE PRECISION AK, AKKP1, AKP1, D, T, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DDOT + EXTERNAL LSAME, DDOT +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DSWAP, DSYMV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO 10 INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO 20 INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 20 CONTINUE + END IF + INFO = 0 +* + IF( UPPER ) THEN +* +* Compute inv(A) from the factorization A = U*D*U'. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 30 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 40 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + A( K, K ) = ONE / A( K, K ) +* +* Compute column K of the inverse. +* + IF( K.GT.1 ) THEN + CALL DCOPY( K-1, A( 1, K ), 1, WORK, 1 ) + CALL DSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K ), 1 ) + A( K, K ) = A( K, K ) - DDOT( K-1, WORK, 1, A( 1, K ), + $ 1 ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = ABS( A( K, K+1 ) ) + AK = A( K, K ) / T + AKP1 = A( K+1, K+1 ) / T + AKKP1 = A( K, K+1 ) / T + D = T*( AK*AKP1-ONE ) + A( K, K ) = AKP1 / D + A( K+1, K+1 ) = AK / D + A( K, K+1 ) = -AKKP1 / D +* +* Compute columns K and K+1 of the inverse. +* + IF( K.GT.1 ) THEN + CALL DCOPY( K-1, A( 1, K ), 1, WORK, 1 ) + CALL DSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K ), 1 ) + A( K, K ) = A( K, K ) - DDOT( K-1, WORK, 1, A( 1, K ), + $ 1 ) + A( K, K+1 ) = A( K, K+1 ) - + $ DDOT( K-1, A( 1, K ), 1, A( 1, K+1 ), 1 ) + CALL DCOPY( K-1, A( 1, K+1 ), 1, WORK, 1 ) + CALL DSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K+1 ), 1 ) + A( K+1, K+1 ) = A( K+1, K+1 ) - + $ DDOT( K-1, WORK, 1, A( 1, K+1 ), 1 ) + END IF + KSTEP = 2 + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the leading +* submatrix A(1:k+1,1:k+1) +* + CALL DSWAP( KP-1, A( 1, K ), 1, A( 1, KP ), 1 ) + CALL DSWAP( K-KP-1, A( KP+1, K ), 1, A( KP, KP+1 ), LDA ) + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = A( K, K+1 ) + A( K, K+1 ) = A( KP, K+1 ) + A( KP, K+1 ) = TEMP + END IF + END IF +* + K = K + KSTEP + GO TO 30 + 40 CONTINUE +* + ELSE +* +* Compute inv(A) from the factorization A = L*D*L'. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 50 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 60 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + A( K, K ) = ONE / A( K, K ) +* +* Compute column K of the inverse. +* + IF( K.LT.N ) THEN + CALL DCOPY( N-K, A( K+1, K ), 1, WORK, 1 ) + CALL DSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1, + $ ZERO, A( K+1, K ), 1 ) + A( K, K ) = A( K, K ) - DDOT( N-K, WORK, 1, A( K+1, K ), + $ 1 ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = ABS( A( K, K-1 ) ) + AK = A( K-1, K-1 ) / T + AKP1 = A( K, K ) / T + AKKP1 = A( K, K-1 ) / T + D = T*( AK*AKP1-ONE ) + A( K-1, K-1 ) = AKP1 / D + A( K, K ) = AK / D + A( K, K-1 ) = -AKKP1 / D +* +* Compute columns K-1 and K of the inverse. +* + IF( K.LT.N ) THEN + CALL DCOPY( N-K, A( K+1, K ), 1, WORK, 1 ) + CALL DSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1, + $ ZERO, A( K+1, K ), 1 ) + A( K, K ) = A( K, K ) - DDOT( N-K, WORK, 1, A( K+1, K ), + $ 1 ) + A( K, K-1 ) = A( K, K-1 ) - + $ DDOT( N-K, A( K+1, K ), 1, A( K+1, K-1 ), + $ 1 ) + CALL DCOPY( N-K, A( K+1, K-1 ), 1, WORK, 1 ) + CALL DSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1, + $ ZERO, A( K+1, K-1 ), 1 ) + A( K-1, K-1 ) = A( K-1, K-1 ) - + $ DDOT( N-K, WORK, 1, A( K+1, K-1 ), 1 ) + END IF + KSTEP = 2 + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the trailing +* submatrix A(k-1:n,k-1:n) +* + IF( KP.LT.N ) + $ CALL DSWAP( N-KP, A( KP+1, K ), 1, A( KP+1, KP ), 1 ) + CALL DSWAP( KP-K-1, A( K+1, K ), 1, A( KP, K+1 ), LDA ) + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = A( K, K-1 ) + A( K, K-1 ) = A( KP, K-1 ) + A( KP, K-1 ) = TEMP + END IF + END IF +* + K = K - KSTEP + GO TO 50 + 60 CONTINUE + END IF +* + RETURN +* +* End of DSYTRI +* + END diff --git a/costa/native/external/lapack/dsytrs.f b/costa/native/external/lapack/dsytrs.f new file mode 100644 index 000000000..ba70be4a4 --- /dev/null +++ b/costa/native/external/lapack/dsytrs.f @@ -0,0 +1,370 @@ + SUBROUTINE DSYTRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* DSYTRS solves a system of linear equations A*X = B with a real +* symmetric matrix A using the factorization A = U*D*U**T or +* A = L*D*L**T computed by DSYTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the details of the factorization are stored +* as an upper or lower triangular matrix. +* = 'U': Upper triangular, form is A = U*D*U**T; +* = 'L': Lower triangular, form is A = L*D*L**T. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* A (input) DOUBLE PRECISION array, dimension (LDA,N) +* The block diagonal matrix D and the multipliers used to +* obtain the factor U or L as computed by DSYTRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by DSYTRF. +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KP + DOUBLE PRECISION AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, DGER, DSCAL, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*D*U'. +* +* First solve U*D*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 30 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL DGER( K-1, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + CALL DSCAL( NRHS, ONE / A( K, K ), B( K, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K-1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K-1 ) + $ CALL DSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in columns K-1 and K of A. +* + CALL DGER( K-2, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) + CALL DGER( K-2, NRHS, -ONE, A( 1, K-1 ), 1, B( K-1, 1 ), + $ LDB, B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = A( K-1, K ) + AKM1 = A( K-1, K-1 ) / AKM1K + AK = A( K, K ) / AKM1K + DENOM = AKM1*AK - ONE + DO 20 J = 1, NRHS + BKM1 = B( K-1, J ) / AKM1K + BK = B( K, J ) / AKM1K + B( K-1, J ) = ( AK*BKM1-BK ) / DENOM + B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM + 20 CONTINUE + K = K - 2 + END IF +* + GO TO 10 + 30 CONTINUE +* +* Next solve U'*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 40 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(U'(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL DGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, A( 1, K ), + $ 1, ONE, B( K, 1 ), LDB ) +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(U'(K+1)), where U(K+1) is the transformation +* stored in columns K and K+1 of A. +* + CALL DGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, A( 1, K ), + $ 1, ONE, B( K, 1 ), LDB ) + CALL DGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, + $ A( 1, K+1 ), 1, ONE, B( K+1, 1 ), LDB ) +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K + 2 + END IF +* + GO TO 40 + 50 CONTINUE +* + ELSE +* +* Solve A*X = B, where A = L*D*L'. +* +* First solve L*D*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 60 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 80 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL DGER( N-K, NRHS, -ONE, A( K+1, K ), 1, B( K, 1 ), + $ LDB, B( K+1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + CALL DSCAL( NRHS, ONE / A( K, K ), B( K, 1 ), LDB ) + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K+1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K+1 ) + $ CALL DSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.LT.N-1 ) THEN + CALL DGER( N-K-1, NRHS, -ONE, A( K+2, K ), 1, B( K, 1 ), + $ LDB, B( K+2, 1 ), LDB ) + CALL DGER( N-K-1, NRHS, -ONE, A( K+2, K+1 ), 1, + $ B( K+1, 1 ), LDB, B( K+2, 1 ), LDB ) + END IF +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = A( K+1, K ) + AKM1 = A( K, K ) / AKM1K + AK = A( K+1, K+1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 70 J = 1, NRHS + BKM1 = B( K, J ) / AKM1K + BK = B( K+1, J ) / AKM1K + B( K, J ) = ( AK*BKM1-BK ) / DENOM + B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + 70 CONTINUE + K = K + 2 + END IF +* + GO TO 60 + 80 CONTINUE +* +* Next solve L'*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 90 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 100 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(L'(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL DGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, A( K+1, K ), 1, ONE, B( K, 1 ), LDB ) +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(L'(K-1)), where L(K-1) is the transformation +* stored in columns K-1 and K of A. +* + IF( K.LT.N ) THEN + CALL DGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, A( K+1, K ), 1, ONE, B( K, 1 ), LDB ) + CALL DGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, A( K+1, K-1 ), 1, ONE, B( K-1, 1 ), + $ LDB ) + END IF +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 2 + END IF +* + GO TO 90 + 100 CONTINUE + END IF +* + RETURN +* +* End of DSYTRS +* + END diff --git a/costa/native/external/lapack/dtbcon.f b/costa/native/external/lapack/dtbcon.f new file mode 100644 index 000000000..178dbbb71 --- /dev/null +++ b/costa/native/external/lapack/dtbcon.f @@ -0,0 +1,198 @@ + SUBROUTINE DTBCON( NORM, UPLO, DIAG, N, KD, AB, LDAB, RCOND, WORK, + $ IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER INFO, KD, LDAB, N + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION AB( LDAB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DTBCON estimates the reciprocal of the condition number of a +* triangular band matrix A, in either the 1-norm or the infinity-norm. +* +* The norm of A is computed and an estimate is obtained for +* norm(inv(A)), then the reciprocal of the condition number is +* computed as +* RCOND = 1 / ( norm(A) * norm(inv(A)) ). +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies whether the 1-norm condition number or the +* infinity-norm condition number is required: +* = '1' or 'O': 1-norm; +* = 'I': Infinity-norm. +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals or subdiagonals of the +* triangular band matrix A. KD >= 0. +* +* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) +* The upper or lower triangular band matrix A, stored in the +* first kd+1 rows of the array. The j-th column of A is stored +* in the j-th column of the array AB as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* If DIAG = 'U', the diagonal elements of A are not referenced +* and are assumed to be 1. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* RCOND (output) DOUBLE PRECISION +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(norm(A) * norm(inv(A))). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, ONENRM, UPPER + CHARACTER NORMIN + INTEGER IX, KASE, KASE1 + DOUBLE PRECISION AINVNM, ANORM, SCALE, SMLNUM, XNORM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH, DLANTB + EXTERNAL LSAME, IDAMAX, DLAMCH, DLANTB +* .. +* .. External Subroutines .. + EXTERNAL DLACON, DLATBS, DRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTBCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + END IF +* + RCOND = ZERO + SMLNUM = DLAMCH( 'Safe minimum' )*DBLE( MAX( 1, N ) ) +* +* Compute the norm of the triangular matrix A. +* + ANORM = DLANTB( NORM, UPLO, DIAG, N, KD, AB, LDAB, WORK ) +* +* Continue only if ANORM > 0. +* + IF( ANORM.GT.ZERO ) THEN +* +* Estimate the norm of the inverse of A. +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 10 CONTINUE + CALL DLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(A). +* + CALL DLATBS( UPLO, 'No transpose', DIAG, NORMIN, N, KD, + $ AB, LDAB, WORK, SCALE, WORK( 2*N+1 ), INFO ) + ELSE +* +* Multiply by inv(A'). +* + CALL DLATBS( UPLO, 'Transpose', DIAG, NORMIN, N, KD, AB, + $ LDAB, WORK, SCALE, WORK( 2*N+1 ), INFO ) + END IF + NORMIN = 'Y' +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + IF( SCALE.NE.ONE ) THEN + IX = IDAMAX( N, WORK, 1 ) + XNORM = ABS( WORK( IX ) ) + IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL DRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / ANORM ) / AINVNM + END IF +* + 20 CONTINUE + RETURN +* +* End of DTBCON +* + END diff --git a/costa/native/external/lapack/dtbrfs.f b/costa/native/external/lapack/dtbrfs.f new file mode 100644 index 000000000..e16407cac --- /dev/null +++ b/costa/native/external/lapack/dtbrfs.f @@ -0,0 +1,381 @@ + SUBROUTINE DTBRFS( UPLO, TRANS, DIAG, N, KD, NRHS, AB, LDAB, B, + $ LDB, X, LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, KD, LDAB, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION AB( LDAB, * ), B( LDB, * ), BERR( * ), + $ FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* DTBRFS provides error bounds and backward error estimates for the +* solution to a system of linear equations with a triangular band +* coefficient matrix. +* +* The solution matrix X must be computed by DTBTRS or some other +* means before entering this routine. DTBRFS does not do iterative +* refinement because doing so cannot improve the backward error. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose = Transpose) +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals or subdiagonals of the +* triangular band matrix A. KD >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) +* The upper or lower triangular band matrix A, stored in the +* first kd+1 rows of the array. The j-th column of A is stored +* in the j-th column of the array AB as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* If DIAG = 'U', the diagonal elements of A are not referenced +* and are assumed to be 1. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input) DOUBLE PRECISION array, dimension (LDX,NRHS) +* The solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + CHARACTER TRANST + INTEGER I, J, K, KASE, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DLACON, DTBMV, DTBSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTBRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = KD + 2 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 250 J = 1, NRHS +* +* Compute residual R = B - op(A) * X, +* where op(A) = A or A', depending on TRANS. +* + CALL DCOPY( N, X( 1, J ), 1, WORK( N+1 ), 1 ) + CALL DTBMV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, WORK( N+1 ), + $ 1 ) + CALL DAXPY( N, -ONE, B( 1, J ), 1, WORK( N+1 ), 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 20 I = 1, N + WORK( I ) = ABS( B( I, J ) ) + 20 CONTINUE +* + IF( NOTRAN ) THEN +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + IF( NOUNIT ) THEN + DO 40 K = 1, N + XK = ABS( X( K, J ) ) + DO 30 I = MAX( 1, K-KD ), K + WORK( I ) = WORK( I ) + + $ ABS( AB( KD+1+I-K, K ) )*XK + 30 CONTINUE + 40 CONTINUE + ELSE + DO 60 K = 1, N + XK = ABS( X( K, J ) ) + DO 50 I = MAX( 1, K-KD ), K - 1 + WORK( I ) = WORK( I ) + + $ ABS( AB( KD+1+I-K, K ) )*XK + 50 CONTINUE + WORK( K ) = WORK( K ) + XK + 60 CONTINUE + END IF + ELSE + IF( NOUNIT ) THEN + DO 80 K = 1, N + XK = ABS( X( K, J ) ) + DO 70 I = K, MIN( N, K+KD ) + WORK( I ) = WORK( I ) + ABS( AB( 1+I-K, K ) )*XK + 70 CONTINUE + 80 CONTINUE + ELSE + DO 100 K = 1, N + XK = ABS( X( K, J ) ) + DO 90 I = K + 1, MIN( N, K+KD ) + WORK( I ) = WORK( I ) + ABS( AB( 1+I-K, K ) )*XK + 90 CONTINUE + WORK( K ) = WORK( K ) + XK + 100 CONTINUE + END IF + END IF + ELSE +* +* Compute abs(A')*abs(X) + abs(B). +* + IF( UPPER ) THEN + IF( NOUNIT ) THEN + DO 120 K = 1, N + S = ZERO + DO 110 I = MAX( 1, K-KD ), K + S = S + ABS( AB( KD+1+I-K, K ) )* + $ ABS( X( I, J ) ) + 110 CONTINUE + WORK( K ) = WORK( K ) + S + 120 CONTINUE + ELSE + DO 140 K = 1, N + S = ABS( X( K, J ) ) + DO 130 I = MAX( 1, K-KD ), K - 1 + S = S + ABS( AB( KD+1+I-K, K ) )* + $ ABS( X( I, J ) ) + 130 CONTINUE + WORK( K ) = WORK( K ) + S + 140 CONTINUE + END IF + ELSE + IF( NOUNIT ) THEN + DO 160 K = 1, N + S = ZERO + DO 150 I = K, MIN( N, K+KD ) + S = S + ABS( AB( 1+I-K, K ) )*ABS( X( I, J ) ) + 150 CONTINUE + WORK( K ) = WORK( K ) + S + 160 CONTINUE + ELSE + DO 180 K = 1, N + S = ABS( X( K, J ) ) + DO 170 I = K + 1, MIN( N, K+KD ) + S = S + ABS( AB( 1+I-K, K ) )*ABS( X( I, J ) ) + 170 CONTINUE + WORK( K ) = WORK( K ) + S + 180 CONTINUE + END IF + END IF + END IF + S = ZERO + DO 190 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 190 CONTINUE + BERR( J ) = S +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use DLACON to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 200 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 200 CONTINUE +* + KASE = 0 + 210 CONTINUE + CALL DLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)'). +* + CALL DTBSV( UPLO, TRANST, DIAG, N, KD, AB, LDAB, + $ WORK( N+1 ), 1 ) + DO 220 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 220 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 230 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 230 CONTINUE + CALL DTBSV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, + $ WORK( N+1 ), 1 ) + END IF + GO TO 210 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 240 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 240 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 250 CONTINUE +* + RETURN +* +* End of DTBRFS +* + END diff --git a/costa/native/external/lapack/dtbtrs.f b/costa/native/external/lapack/dtbtrs.f new file mode 100644 index 000000000..1409649ff --- /dev/null +++ b/costa/native/external/lapack/dtbtrs.f @@ -0,0 +1,163 @@ + SUBROUTINE DTBTRS( UPLO, TRANS, DIAG, N, KD, NRHS, AB, LDAB, B, + $ LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, KD, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* DTBTRS solves a triangular system of the form +* +* A * X = B or A**T * X = B, +* +* where A is a triangular band matrix of order N, and B is an +* N-by NRHS matrix. A check is made to verify that A is nonsingular. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* TRANS (input) CHARACTER*1 +* Specifies the form the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose = Transpose) +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals or subdiagonals of the +* triangular band matrix A. KD >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) +* The upper or lower triangular band matrix A, stored in the +* first kd+1 rows of AB. The j-th column of A is stored +* in the j-th column of the array AB as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* If DIAG = 'U', the diagonal elements of A are not referenced +* and are assumed to be 1. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, if INFO = 0, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the i-th diagonal element of A is zero, +* indicating that the matrix is singular and the +* solutions X have not been computed. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DTBSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOUNIT = LSAME( DIAG, 'N' ) + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT. + $ LSAME( TRANS, 'T' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTBTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check for singularity. +* + IF( NOUNIT ) THEN + IF( UPPER ) THEN + DO 10 INFO = 1, N + IF( AB( KD+1, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE + DO 20 INFO = 1, N + IF( AB( 1, INFO ).EQ.ZERO ) + $ RETURN + 20 CONTINUE + END IF + END IF + INFO = 0 +* +* Solve A * X = B or A' * X = B. +* + DO 30 J = 1, NRHS + CALL DTBSV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, B( 1, J ), 1 ) + 30 CONTINUE +* + RETURN +* +* End of DTBTRS +* + END diff --git a/costa/native/external/lapack/dtgevc.f b/costa/native/external/lapack/dtgevc.f new file mode 100644 index 000000000..180aa066d --- /dev/null +++ b/costa/native/external/lapack/dtgevc.f @@ -0,0 +1,1146 @@ + SUBROUTINE DTGEVC( SIDE, HOWMNY, SELECT, N, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, MM, M, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER HOWMNY, SIDE + INTEGER INFO, LDA, LDB, LDVL, LDVR, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), VL( LDVL, * ), + $ VR( LDVR, * ), WORK( * ) +* .. +* +* +* Purpose +* ======= +* +* DTGEVC computes some or all of the right and/or left generalized +* eigenvectors of a pair of real upper triangular matrices (A,B). +* +* The right generalized eigenvector x and the left generalized +* eigenvector y of (A,B) corresponding to a generalized eigenvalue +* w are defined by: +* +* (A - wB) * x = 0 and y**H * (A - wB) = 0 +* +* where y**H denotes the conjugate tranpose of y. +* +* If an eigenvalue w is determined by zero diagonal elements of both A +* and B, a unit vector is returned as the corresponding eigenvector. +* +* If all eigenvectors are requested, the routine may either return +* the matrices X and/or Y of right or left eigenvectors of (A,B), or +* the products Z*X and/or Q*Y, where Z and Q are input orthogonal +* matrices. If (A,B) was obtained from the generalized real-Schur +* factorization of an original pair of matrices +* (A0,B0) = (Q*A*Z**H,Q*B*Z**H), +* then Z*X and Q*Y are the matrices of right or left eigenvectors of +* A. +* +* A must be block upper triangular, with 1-by-1 and 2-by-2 diagonal +* blocks. Corresponding to each 2-by-2 diagonal block is a complex +* conjugate pair of eigenvalues and eigenvectors; only one +* eigenvector of the pair is computed, namely the one corresponding +* to the eigenvalue with positive imaginary part. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'R': compute right eigenvectors only; +* = 'L': compute left eigenvectors only; +* = 'B': compute both right and left eigenvectors. +* +* HOWMNY (input) CHARACTER*1 +* = 'A': compute all right and/or left eigenvectors; +* = 'B': compute all right and/or left eigenvectors, and +* backtransform them using the input matrices supplied +* in VR and/or VL; +* = 'S': compute selected right and/or left eigenvectors, +* specified by the logical array SELECT. +* +* SELECT (input) LOGICAL array, dimension (N) +* If HOWMNY='S', SELECT specifies the eigenvectors to be +* computed. +* If HOWMNY='A' or 'B', SELECT is not referenced. +* To select the real eigenvector corresponding to the real +* eigenvalue w(j), SELECT(j) must be set to .TRUE. To select +* the complex eigenvector corresponding to a complex conjugate +* pair w(j) and w(j+1), either SELECT(j) or SELECT(j+1) must +* be set to .TRUE.. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* A (input) DOUBLE PRECISION array, dimension (LDA,N) +* The upper quasi-triangular matrix A. +* +* LDA (input) INTEGER +* The leading dimension of array A. LDA >= max(1, N). +* +* B (input) DOUBLE PRECISION array, dimension (LDB,N) +* The upper triangular matrix B. If A has a 2-by-2 diagonal +* block, then the corresponding 2-by-2 block of B must be +* diagonal with positive elements. +* +* LDB (input) INTEGER +* The leading dimension of array B. LDB >= max(1,N). +* +* VL (input/output) DOUBLE PRECISION array, dimension (LDVL,MM) +* On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must +* contain an N-by-N matrix Q (usually the orthogonal matrix Q +* of left Schur vectors returned by DHGEQZ). +* On exit, if SIDE = 'L' or 'B', VL contains: +* if HOWMNY = 'A', the matrix Y of left eigenvectors of (A,B); +* if HOWMNY = 'B', the matrix Q*Y; +* if HOWMNY = 'S', the left eigenvectors of (A,B) specified by +* SELECT, stored consecutively in the columns of +* VL, in the same order as their eigenvalues. +* If SIDE = 'R', VL is not referenced. +* +* A complex eigenvector corresponding to a complex eigenvalue +* is stored in two consecutive columns, the first holding the +* real part, and the second the imaginary part. +* +* LDVL (input) INTEGER +* The leading dimension of array VL. +* LDVL >= max(1,N) if SIDE = 'L' or 'B'; LDVL >= 1 otherwise. +* +* VR (input/output) DOUBLE PRECISION array, dimension (LDVR,MM) +* On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must +* contain an N-by-N matrix Q (usually the orthogonal matrix Z +* of right Schur vectors returned by DHGEQZ). +* On exit, if SIDE = 'R' or 'B', VR contains: +* if HOWMNY = 'A', the matrix X of right eigenvectors of (A,B); +* if HOWMNY = 'B', the matrix Z*X; +* if HOWMNY = 'S', the right eigenvectors of (A,B) specified by +* SELECT, stored consecutively in the columns of +* VR, in the same order as their eigenvalues. +* If SIDE = 'L', VR is not referenced. +* +* A complex eigenvector corresponding to a complex eigenvalue +* is stored in two consecutive columns, the first holding the +* real part and the second the imaginary part. +* +* LDVR (input) INTEGER +* The leading dimension of the array VR. +* LDVR >= max(1,N) if SIDE = 'R' or 'B'; LDVR >= 1 otherwise. +* +* MM (input) INTEGER +* The number of columns in the arrays VL and/or VR. MM >= M. +* +* M (output) INTEGER +* The number of columns in the arrays VL and/or VR actually +* used to store the eigenvectors. If HOWMNY = 'A' or 'B', M +* is set to N. Each selected real eigenvector occupies one +* column and each selected complex eigenvector occupies two +* columns. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (6*N) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: the 2-by-2 block (INFO:INFO+1) does not have a complex +* eigenvalue. +* +* Further Details +* =============== +* +* Allocation of workspace: +* ---------- -- --------- +* +* WORK( j ) = 1-norm of j-th column of A, above the diagonal +* WORK( N+j ) = 1-norm of j-th column of B, above the diagonal +* WORK( 2*N+1:3*N ) = real part of eigenvector +* WORK( 3*N+1:4*N ) = imaginary part of eigenvector +* WORK( 4*N+1:5*N ) = real part of back-transformed eigenvector +* WORK( 5*N+1:6*N ) = imaginary part of back-transformed eigenvector +* +* Rowwise vs. columnwise solution methods: +* ------- -- ---------- -------- ------- +* +* Finding a generalized eigenvector consists basically of solving the +* singular triangular system +* +* (A - w B) x = 0 (for right) or: (A - w B)**H y = 0 (for left) +* +* Consider finding the i-th right eigenvector (assume all eigenvalues +* are real). The equation to be solved is: +* n i +* 0 = sum C(j,k) v(k) = sum C(j,k) v(k) for j = i,. . .,1 +* k=j k=j +* +* where C = (A - w B) (The components v(i+1:n) are 0.) +* +* The "rowwise" method is: +* +* (1) v(i) := 1 +* for j = i-1,. . .,1: +* i +* (2) compute s = - sum C(j,k) v(k) and +* k=j+1 +* +* (3) v(j) := s / C(j,j) +* +* Step 2 is sometimes called the "dot product" step, since it is an +* inner product between the j-th row and the portion of the eigenvector +* that has been computed so far. +* +* The "columnwise" method consists basically in doing the sums +* for all the rows in parallel. As each v(j) is computed, the +* contribution of v(j) times the j-th column of C is added to the +* partial sums. Since FORTRAN arrays are stored columnwise, this has +* the advantage that at each step, the elements of C that are accessed +* are adjacent to one another, whereas with the rowwise method, the +* elements accessed at a step are spaced LDA (and LDB) words apart. +* +* When finding left eigenvectors, the matrix in question is the +* transpose of the one in storage, so the rowwise method then +* actually accesses columns of A and B at each step, and so is the +* preferred method. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, SAFETY + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, + $ SAFETY = 1.0D+2 ) +* .. +* .. Local Scalars .. + LOGICAL COMPL, COMPR, IL2BY2, ILABAD, ILALL, ILBACK, + $ ILBBAD, ILCOMP, ILCPLX, LSA, LSB + INTEGER I, IBEG, IEIG, IEND, IHWMNY, IINFO, IM, ISIDE, + $ J, JA, JC, JE, JR, JW, NA, NW + DOUBLE PRECISION ACOEF, ACOEFA, ANORM, ASCALE, BCOEFA, BCOEFI, + $ BCOEFR, BIG, BIGNUM, BNORM, BSCALE, CIM2A, + $ CIM2B, CIMAGA, CIMAGB, CRE2A, CRE2B, CREALA, + $ CREALB, DMIN, SAFMIN, SALFAR, SBETA, SCALE, + $ SMALL, TEMP, TEMP2, TEMP2I, TEMP2R, ULP, XMAX, + $ XSCALE +* .. +* .. Local Arrays .. + DOUBLE PRECISION BDIAG( 2 ), SUM( 2, 2 ), SUMA( 2, 2 ), + $ SUMB( 2, 2 ) +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, DLACPY, DLAG2, DLALN2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* +* Decode and Test the input parameters +* + IF( LSAME( HOWMNY, 'A' ) ) THEN + IHWMNY = 1 + ILALL = .TRUE. + ILBACK = .FALSE. + ELSE IF( LSAME( HOWMNY, 'S' ) ) THEN + IHWMNY = 2 + ILALL = .FALSE. + ILBACK = .FALSE. + ELSE IF( LSAME( HOWMNY, 'B' ) .OR. LSAME( HOWMNY, 'T' ) ) THEN + IHWMNY = 3 + ILALL = .TRUE. + ILBACK = .TRUE. + ELSE + IHWMNY = -1 + ILALL = .TRUE. + END IF +* + IF( LSAME( SIDE, 'R' ) ) THEN + ISIDE = 1 + COMPL = .FALSE. + COMPR = .TRUE. + ELSE IF( LSAME( SIDE, 'L' ) ) THEN + ISIDE = 2 + COMPL = .TRUE. + COMPR = .FALSE. + ELSE IF( LSAME( SIDE, 'B' ) ) THEN + ISIDE = 3 + COMPL = .TRUE. + COMPR = .TRUE. + ELSE + ISIDE = -1 + END IF +* + INFO = 0 + IF( ISIDE.LT.0 ) THEN + INFO = -1 + ELSE IF( IHWMNY.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTGEVC', -INFO ) + RETURN + END IF +* +* Count the number of eigenvectors to be computed +* + IF( .NOT.ILALL ) THEN + IM = 0 + ILCPLX = .FALSE. + DO 10 J = 1, N + IF( ILCPLX ) THEN + ILCPLX = .FALSE. + GO TO 10 + END IF + IF( J.LT.N ) THEN + IF( A( J+1, J ).NE.ZERO ) + $ ILCPLX = .TRUE. + END IF + IF( ILCPLX ) THEN + IF( SELECT( J ) .OR. SELECT( J+1 ) ) + $ IM = IM + 2 + ELSE + IF( SELECT( J ) ) + $ IM = IM + 1 + END IF + 10 CONTINUE + ELSE + IM = N + END IF +* +* Check 2-by-2 diagonal blocks of A, B +* + ILABAD = .FALSE. + ILBBAD = .FALSE. + DO 20 J = 1, N - 1 + IF( A( J+1, J ).NE.ZERO ) THEN + IF( B( J, J ).EQ.ZERO .OR. B( J+1, J+1 ).EQ.ZERO .OR. + $ B( J, J+1 ).NE.ZERO )ILBBAD = .TRUE. + IF( J.LT.N-1 ) THEN + IF( A( J+2, J+1 ).NE.ZERO ) + $ ILABAD = .TRUE. + END IF + END IF + 20 CONTINUE +* + IF( ILABAD ) THEN + INFO = -5 + ELSE IF( ILBBAD ) THEN + INFO = -7 + ELSE IF( COMPL .AND. LDVL.LT.N .OR. LDVL.LT.1 ) THEN + INFO = -10 + ELSE IF( COMPR .AND. LDVR.LT.N .OR. LDVR.LT.1 ) THEN + INFO = -12 + ELSE IF( MM.LT.IM ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTGEVC', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + M = IM + IF( N.EQ.0 ) + $ RETURN +* +* Machine Constants +* + SAFMIN = DLAMCH( 'Safe minimum' ) + BIG = ONE / SAFMIN + CALL DLABAD( SAFMIN, BIG ) + ULP = DLAMCH( 'Epsilon' )*DLAMCH( 'Base' ) + SMALL = SAFMIN*N / ULP + BIG = ONE / SMALL + BIGNUM = ONE / ( SAFMIN*N ) +* +* Compute the 1-norm of each column of the strictly upper triangular +* part (i.e., excluding all elements belonging to the diagonal +* blocks) of A and B to check for possible overflow in the +* triangular solver. +* + ANORM = ABS( A( 1, 1 ) ) + IF( N.GT.1 ) + $ ANORM = ANORM + ABS( A( 2, 1 ) ) + BNORM = ABS( B( 1, 1 ) ) + WORK( 1 ) = ZERO + WORK( N+1 ) = ZERO +* + DO 50 J = 2, N + TEMP = ZERO + TEMP2 = ZERO + IF( A( J, J-1 ).EQ.ZERO ) THEN + IEND = J - 1 + ELSE + IEND = J - 2 + END IF + DO 30 I = 1, IEND + TEMP = TEMP + ABS( A( I, J ) ) + TEMP2 = TEMP2 + ABS( B( I, J ) ) + 30 CONTINUE + WORK( J ) = TEMP + WORK( N+J ) = TEMP2 + DO 40 I = IEND + 1, MIN( J+1, N ) + TEMP = TEMP + ABS( A( I, J ) ) + TEMP2 = TEMP2 + ABS( B( I, J ) ) + 40 CONTINUE + ANORM = MAX( ANORM, TEMP ) + BNORM = MAX( BNORM, TEMP2 ) + 50 CONTINUE +* + ASCALE = ONE / MAX( ANORM, SAFMIN ) + BSCALE = ONE / MAX( BNORM, SAFMIN ) +* +* Left eigenvectors +* + IF( COMPL ) THEN + IEIG = 0 +* +* Main loop over eigenvalues +* + ILCPLX = .FALSE. + DO 220 JE = 1, N +* +* Skip this iteration if (a) HOWMNY='S' and SELECT=.FALSE., or +* (b) this would be the second of a complex pair. +* Check for complex eigenvalue, so as to be sure of which +* entry(-ies) of SELECT to look at. +* + IF( ILCPLX ) THEN + ILCPLX = .FALSE. + GO TO 220 + END IF + NW = 1 + IF( JE.LT.N ) THEN + IF( A( JE+1, JE ).NE.ZERO ) THEN + ILCPLX = .TRUE. + NW = 2 + END IF + END IF + IF( ILALL ) THEN + ILCOMP = .TRUE. + ELSE IF( ILCPLX ) THEN + ILCOMP = SELECT( JE ) .OR. SELECT( JE+1 ) + ELSE + ILCOMP = SELECT( JE ) + END IF + IF( .NOT.ILCOMP ) + $ GO TO 220 +* +* Decide if (a) singular pencil, (b) real eigenvalue, or +* (c) complex eigenvalue. +* + IF( .NOT.ILCPLX ) THEN + IF( ABS( A( JE, JE ) ).LE.SAFMIN .AND. + $ ABS( B( JE, JE ) ).LE.SAFMIN ) THEN +* +* Singular matrix pencil -- return unit eigenvector +* + IEIG = IEIG + 1 + DO 60 JR = 1, N + VL( JR, IEIG ) = ZERO + 60 CONTINUE + VL( IEIG, IEIG ) = ONE + GO TO 220 + END IF + END IF +* +* Clear vector +* + DO 70 JR = 1, NW*N + WORK( 2*N+JR ) = ZERO + 70 CONTINUE +* T +* Compute coefficients in ( a A - b B ) y = 0 +* a is ACOEF +* b is BCOEFR + i*BCOEFI +* + IF( .NOT.ILCPLX ) THEN +* +* Real eigenvalue +* + TEMP = ONE / MAX( ABS( A( JE, JE ) )*ASCALE, + $ ABS( B( JE, JE ) )*BSCALE, SAFMIN ) + SALFAR = ( TEMP*A( JE, JE ) )*ASCALE + SBETA = ( TEMP*B( JE, JE ) )*BSCALE + ACOEF = SBETA*ASCALE + BCOEFR = SALFAR*BSCALE + BCOEFI = ZERO +* +* Scale to avoid underflow +* + SCALE = ONE + LSA = ABS( SBETA ).GE.SAFMIN .AND. ABS( ACOEF ).LT.SMALL + LSB = ABS( SALFAR ).GE.SAFMIN .AND. ABS( BCOEFR ).LT. + $ SMALL + IF( LSA ) + $ SCALE = ( SMALL / ABS( SBETA ) )*MIN( ANORM, BIG ) + IF( LSB ) + $ SCALE = MAX( SCALE, ( SMALL / ABS( SALFAR ) )* + $ MIN( BNORM, BIG ) ) + IF( LSA .OR. LSB ) THEN + SCALE = MIN( SCALE, ONE / + $ ( SAFMIN*MAX( ONE, ABS( ACOEF ), + $ ABS( BCOEFR ) ) ) ) + IF( LSA ) THEN + ACOEF = ASCALE*( SCALE*SBETA ) + ELSE + ACOEF = SCALE*ACOEF + END IF + IF( LSB ) THEN + BCOEFR = BSCALE*( SCALE*SALFAR ) + ELSE + BCOEFR = SCALE*BCOEFR + END IF + END IF + ACOEFA = ABS( ACOEF ) + BCOEFA = ABS( BCOEFR ) +* +* First component is 1 +* + WORK( 2*N+JE ) = ONE + XMAX = ONE + ELSE +* +* Complex eigenvalue +* + CALL DLAG2( A( JE, JE ), LDA, B( JE, JE ), LDB, + $ SAFMIN*SAFETY, ACOEF, TEMP, BCOEFR, TEMP2, + $ BCOEFI ) + BCOEFI = -BCOEFI + IF( BCOEFI.EQ.ZERO ) THEN + INFO = JE + RETURN + END IF +* +* Scale to avoid over/underflow +* + ACOEFA = ABS( ACOEF ) + BCOEFA = ABS( BCOEFR ) + ABS( BCOEFI ) + SCALE = ONE + IF( ACOEFA*ULP.LT.SAFMIN .AND. ACOEFA.GE.SAFMIN ) + $ SCALE = ( SAFMIN / ULP ) / ACOEFA + IF( BCOEFA*ULP.LT.SAFMIN .AND. BCOEFA.GE.SAFMIN ) + $ SCALE = MAX( SCALE, ( SAFMIN / ULP ) / BCOEFA ) + IF( SAFMIN*ACOEFA.GT.ASCALE ) + $ SCALE = ASCALE / ( SAFMIN*ACOEFA ) + IF( SAFMIN*BCOEFA.GT.BSCALE ) + $ SCALE = MIN( SCALE, BSCALE / ( SAFMIN*BCOEFA ) ) + IF( SCALE.NE.ONE ) THEN + ACOEF = SCALE*ACOEF + ACOEFA = ABS( ACOEF ) + BCOEFR = SCALE*BCOEFR + BCOEFI = SCALE*BCOEFI + BCOEFA = ABS( BCOEFR ) + ABS( BCOEFI ) + END IF +* +* Compute first two components of eigenvector +* + TEMP = ACOEF*A( JE+1, JE ) + TEMP2R = ACOEF*A( JE, JE ) - BCOEFR*B( JE, JE ) + TEMP2I = -BCOEFI*B( JE, JE ) + IF( ABS( TEMP ).GT.ABS( TEMP2R )+ABS( TEMP2I ) ) THEN + WORK( 2*N+JE ) = ONE + WORK( 3*N+JE ) = ZERO + WORK( 2*N+JE+1 ) = -TEMP2R / TEMP + WORK( 3*N+JE+1 ) = -TEMP2I / TEMP + ELSE + WORK( 2*N+JE+1 ) = ONE + WORK( 3*N+JE+1 ) = ZERO + TEMP = ACOEF*A( JE, JE+1 ) + WORK( 2*N+JE ) = ( BCOEFR*B( JE+1, JE+1 )-ACOEF* + $ A( JE+1, JE+1 ) ) / TEMP + WORK( 3*N+JE ) = BCOEFI*B( JE+1, JE+1 ) / TEMP + END IF + XMAX = MAX( ABS( WORK( 2*N+JE ) )+ABS( WORK( 3*N+JE ) ), + $ ABS( WORK( 2*N+JE+1 ) )+ABS( WORK( 3*N+JE+1 ) ) ) + END IF +* + DMIN = MAX( ULP*ACOEFA*ANORM, ULP*BCOEFA*BNORM, SAFMIN ) +* +* T +* Triangular solve of (a A - b B) y = 0 +* +* T +* (rowwise in (a A - b B) , or columnwise in (a A - b B) ) +* + IL2BY2 = .FALSE. +* + DO 160 J = JE + NW, N + IF( IL2BY2 ) THEN + IL2BY2 = .FALSE. + GO TO 160 + END IF +* + NA = 1 + BDIAG( 1 ) = B( J, J ) + IF( J.LT.N ) THEN + IF( A( J+1, J ).NE.ZERO ) THEN + IL2BY2 = .TRUE. + BDIAG( 2 ) = B( J+1, J+1 ) + NA = 2 + END IF + END IF +* +* Check whether scaling is necessary for dot products +* + XSCALE = ONE / MAX( ONE, XMAX ) + TEMP = MAX( WORK( J ), WORK( N+J ), + $ ACOEFA*WORK( J )+BCOEFA*WORK( N+J ) ) + IF( IL2BY2 ) + $ TEMP = MAX( TEMP, WORK( J+1 ), WORK( N+J+1 ), + $ ACOEFA*WORK( J+1 )+BCOEFA*WORK( N+J+1 ) ) + IF( TEMP.GT.BIGNUM*XSCALE ) THEN + DO 90 JW = 0, NW - 1 + DO 80 JR = JE, J - 1 + WORK( ( JW+2 )*N+JR ) = XSCALE* + $ WORK( ( JW+2 )*N+JR ) + 80 CONTINUE + 90 CONTINUE + XMAX = XMAX*XSCALE + END IF +* +* Compute dot products +* +* j-1 +* SUM = sum conjg( a*A(k,j) - b*B(k,j) )*x(k) +* k=je +* +* To reduce the op count, this is done as +* +* _ j-1 _ j-1 +* a*conjg( sum A(k,j)*x(k) ) - b*conjg( sum B(k,j)*x(k) ) +* k=je k=je +* +* which may cause underflow problems if A or B are close +* to underflow. (E.g., less than SMALL.) +* +* +* A series of compiler directives to defeat vectorization +* for the next loop +* +*$PL$ CMCHAR=' ' +CDIR$ NEXTSCALAR +C$DIR SCALAR +CDIR$ NEXT SCALAR +CVD$L NOVECTOR +CDEC$ NOVECTOR +CVD$ NOVECTOR +*VDIR NOVECTOR +*VOCL LOOP,SCALAR +CIBM PREFER SCALAR +*$PL$ CMCHAR='*' +* + DO 120 JW = 1, NW +* +*$PL$ CMCHAR=' ' +CDIR$ NEXTSCALAR +C$DIR SCALAR +CDIR$ NEXT SCALAR +CVD$L NOVECTOR +CDEC$ NOVECTOR +CVD$ NOVECTOR +*VDIR NOVECTOR +*VOCL LOOP,SCALAR +CIBM PREFER SCALAR +*$PL$ CMCHAR='*' +* + DO 110 JA = 1, NA + SUMA( JA, JW ) = ZERO + SUMB( JA, JW ) = ZERO +* + DO 100 JR = JE, J - 1 + SUMA( JA, JW ) = SUMA( JA, JW ) + + $ A( JR, J+JA-1 )* + $ WORK( ( JW+1 )*N+JR ) + SUMB( JA, JW ) = SUMB( JA, JW ) + + $ B( JR, J+JA-1 )* + $ WORK( ( JW+1 )*N+JR ) + 100 CONTINUE + 110 CONTINUE + 120 CONTINUE +* +*$PL$ CMCHAR=' ' +CDIR$ NEXTSCALAR +C$DIR SCALAR +CDIR$ NEXT SCALAR +CVD$L NOVECTOR +CDEC$ NOVECTOR +CVD$ NOVECTOR +*VDIR NOVECTOR +*VOCL LOOP,SCALAR +CIBM PREFER SCALAR +*$PL$ CMCHAR='*' +* + DO 130 JA = 1, NA + IF( ILCPLX ) THEN + SUM( JA, 1 ) = -ACOEF*SUMA( JA, 1 ) + + $ BCOEFR*SUMB( JA, 1 ) - + $ BCOEFI*SUMB( JA, 2 ) + SUM( JA, 2 ) = -ACOEF*SUMA( JA, 2 ) + + $ BCOEFR*SUMB( JA, 2 ) + + $ BCOEFI*SUMB( JA, 1 ) + ELSE + SUM( JA, 1 ) = -ACOEF*SUMA( JA, 1 ) + + $ BCOEFR*SUMB( JA, 1 ) + END IF + 130 CONTINUE +* +* T +* Solve ( a A - b B ) y = SUM(,) +* with scaling and perturbation of the denominator +* + CALL DLALN2( .TRUE., NA, NW, DMIN, ACOEF, A( J, J ), LDA, + $ BDIAG( 1 ), BDIAG( 2 ), SUM, 2, BCOEFR, + $ BCOEFI, WORK( 2*N+J ), N, SCALE, TEMP, + $ IINFO ) + IF( SCALE.LT.ONE ) THEN + DO 150 JW = 0, NW - 1 + DO 140 JR = JE, J - 1 + WORK( ( JW+2 )*N+JR ) = SCALE* + $ WORK( ( JW+2 )*N+JR ) + 140 CONTINUE + 150 CONTINUE + XMAX = SCALE*XMAX + END IF + XMAX = MAX( XMAX, TEMP ) + 160 CONTINUE +* +* Copy eigenvector to VL, back transforming if +* HOWMNY='B'. +* + IEIG = IEIG + 1 + IF( ILBACK ) THEN + DO 170 JW = 0, NW - 1 + CALL DGEMV( 'N', N, N+1-JE, ONE, VL( 1, JE ), LDVL, + $ WORK( ( JW+2 )*N+JE ), 1, ZERO, + $ WORK( ( JW+4 )*N+1 ), 1 ) + 170 CONTINUE + CALL DLACPY( ' ', N, NW, WORK( 4*N+1 ), N, VL( 1, JE ), + $ LDVL ) + IBEG = 1 + ELSE + CALL DLACPY( ' ', N, NW, WORK( 2*N+1 ), N, VL( 1, IEIG ), + $ LDVL ) + IBEG = JE + END IF +* +* Scale eigenvector +* + XMAX = ZERO + IF( ILCPLX ) THEN + DO 180 J = IBEG, N + XMAX = MAX( XMAX, ABS( VL( J, IEIG ) )+ + $ ABS( VL( J, IEIG+1 ) ) ) + 180 CONTINUE + ELSE + DO 190 J = IBEG, N + XMAX = MAX( XMAX, ABS( VL( J, IEIG ) ) ) + 190 CONTINUE + END IF +* + IF( XMAX.GT.SAFMIN ) THEN + XSCALE = ONE / XMAX +* + DO 210 JW = 0, NW - 1 + DO 200 JR = IBEG, N + VL( JR, IEIG+JW ) = XSCALE*VL( JR, IEIG+JW ) + 200 CONTINUE + 210 CONTINUE + END IF + IEIG = IEIG + NW - 1 +* + 220 CONTINUE + END IF +* +* Right eigenvectors +* + IF( COMPR ) THEN + IEIG = IM + 1 +* +* Main loop over eigenvalues +* + ILCPLX = .FALSE. + DO 500 JE = N, 1, -1 +* +* Skip this iteration if (a) HOWMNY='S' and SELECT=.FALSE., or +* (b) this would be the second of a complex pair. +* Check for complex eigenvalue, so as to be sure of which +* entry(-ies) of SELECT to look at -- if complex, SELECT(JE) +* or SELECT(JE-1). +* If this is a complex pair, the 2-by-2 diagonal block +* corresponding to the eigenvalue is in rows/columns JE-1:JE +* + IF( ILCPLX ) THEN + ILCPLX = .FALSE. + GO TO 500 + END IF + NW = 1 + IF( JE.GT.1 ) THEN + IF( A( JE, JE-1 ).NE.ZERO ) THEN + ILCPLX = .TRUE. + NW = 2 + END IF + END IF + IF( ILALL ) THEN + ILCOMP = .TRUE. + ELSE IF( ILCPLX ) THEN + ILCOMP = SELECT( JE ) .OR. SELECT( JE-1 ) + ELSE + ILCOMP = SELECT( JE ) + END IF + IF( .NOT.ILCOMP ) + $ GO TO 500 +* +* Decide if (a) singular pencil, (b) real eigenvalue, or +* (c) complex eigenvalue. +* + IF( .NOT.ILCPLX ) THEN + IF( ABS( A( JE, JE ) ).LE.SAFMIN .AND. + $ ABS( B( JE, JE ) ).LE.SAFMIN ) THEN +* +* Singular matrix pencil -- unit eigenvector +* + IEIG = IEIG - 1 + DO 230 JR = 1, N + VR( JR, IEIG ) = ZERO + 230 CONTINUE + VR( IEIG, IEIG ) = ONE + GO TO 500 + END IF + END IF +* +* Clear vector +* + DO 250 JW = 0, NW - 1 + DO 240 JR = 1, N + WORK( ( JW+2 )*N+JR ) = ZERO + 240 CONTINUE + 250 CONTINUE +* +* Compute coefficients in ( a A - b B ) x = 0 +* a is ACOEF +* b is BCOEFR + i*BCOEFI +* + IF( .NOT.ILCPLX ) THEN +* +* Real eigenvalue +* + TEMP = ONE / MAX( ABS( A( JE, JE ) )*ASCALE, + $ ABS( B( JE, JE ) )*BSCALE, SAFMIN ) + SALFAR = ( TEMP*A( JE, JE ) )*ASCALE + SBETA = ( TEMP*B( JE, JE ) )*BSCALE + ACOEF = SBETA*ASCALE + BCOEFR = SALFAR*BSCALE + BCOEFI = ZERO +* +* Scale to avoid underflow +* + SCALE = ONE + LSA = ABS( SBETA ).GE.SAFMIN .AND. ABS( ACOEF ).LT.SMALL + LSB = ABS( SALFAR ).GE.SAFMIN .AND. ABS( BCOEFR ).LT. + $ SMALL + IF( LSA ) + $ SCALE = ( SMALL / ABS( SBETA ) )*MIN( ANORM, BIG ) + IF( LSB ) + $ SCALE = MAX( SCALE, ( SMALL / ABS( SALFAR ) )* + $ MIN( BNORM, BIG ) ) + IF( LSA .OR. LSB ) THEN + SCALE = MIN( SCALE, ONE / + $ ( SAFMIN*MAX( ONE, ABS( ACOEF ), + $ ABS( BCOEFR ) ) ) ) + IF( LSA ) THEN + ACOEF = ASCALE*( SCALE*SBETA ) + ELSE + ACOEF = SCALE*ACOEF + END IF + IF( LSB ) THEN + BCOEFR = BSCALE*( SCALE*SALFAR ) + ELSE + BCOEFR = SCALE*BCOEFR + END IF + END IF + ACOEFA = ABS( ACOEF ) + BCOEFA = ABS( BCOEFR ) +* +* First component is 1 +* + WORK( 2*N+JE ) = ONE + XMAX = ONE +* +* Compute contribution from column JE of A and B to sum +* (See "Further Details", above.) +* + DO 260 JR = 1, JE - 1 + WORK( 2*N+JR ) = BCOEFR*B( JR, JE ) - + $ ACOEF*A( JR, JE ) + 260 CONTINUE + ELSE +* +* Complex eigenvalue +* + CALL DLAG2( A( JE-1, JE-1 ), LDA, B( JE-1, JE-1 ), LDB, + $ SAFMIN*SAFETY, ACOEF, TEMP, BCOEFR, TEMP2, + $ BCOEFI ) + IF( BCOEFI.EQ.ZERO ) THEN + INFO = JE - 1 + RETURN + END IF +* +* Scale to avoid over/underflow +* + ACOEFA = ABS( ACOEF ) + BCOEFA = ABS( BCOEFR ) + ABS( BCOEFI ) + SCALE = ONE + IF( ACOEFA*ULP.LT.SAFMIN .AND. ACOEFA.GE.SAFMIN ) + $ SCALE = ( SAFMIN / ULP ) / ACOEFA + IF( BCOEFA*ULP.LT.SAFMIN .AND. BCOEFA.GE.SAFMIN ) + $ SCALE = MAX( SCALE, ( SAFMIN / ULP ) / BCOEFA ) + IF( SAFMIN*ACOEFA.GT.ASCALE ) + $ SCALE = ASCALE / ( SAFMIN*ACOEFA ) + IF( SAFMIN*BCOEFA.GT.BSCALE ) + $ SCALE = MIN( SCALE, BSCALE / ( SAFMIN*BCOEFA ) ) + IF( SCALE.NE.ONE ) THEN + ACOEF = SCALE*ACOEF + ACOEFA = ABS( ACOEF ) + BCOEFR = SCALE*BCOEFR + BCOEFI = SCALE*BCOEFI + BCOEFA = ABS( BCOEFR ) + ABS( BCOEFI ) + END IF +* +* Compute first two components of eigenvector +* and contribution to sums +* + TEMP = ACOEF*A( JE, JE-1 ) + TEMP2R = ACOEF*A( JE, JE ) - BCOEFR*B( JE, JE ) + TEMP2I = -BCOEFI*B( JE, JE ) + IF( ABS( TEMP ).GE.ABS( TEMP2R )+ABS( TEMP2I ) ) THEN + WORK( 2*N+JE ) = ONE + WORK( 3*N+JE ) = ZERO + WORK( 2*N+JE-1 ) = -TEMP2R / TEMP + WORK( 3*N+JE-1 ) = -TEMP2I / TEMP + ELSE + WORK( 2*N+JE-1 ) = ONE + WORK( 3*N+JE-1 ) = ZERO + TEMP = ACOEF*A( JE-1, JE ) + WORK( 2*N+JE ) = ( BCOEFR*B( JE-1, JE-1 )-ACOEF* + $ A( JE-1, JE-1 ) ) / TEMP + WORK( 3*N+JE ) = BCOEFI*B( JE-1, JE-1 ) / TEMP + END IF +* + XMAX = MAX( ABS( WORK( 2*N+JE ) )+ABS( WORK( 3*N+JE ) ), + $ ABS( WORK( 2*N+JE-1 ) )+ABS( WORK( 3*N+JE-1 ) ) ) +* +* Compute contribution from columns JE and JE-1 +* of A and B to the sums. +* + CREALA = ACOEF*WORK( 2*N+JE-1 ) + CIMAGA = ACOEF*WORK( 3*N+JE-1 ) + CREALB = BCOEFR*WORK( 2*N+JE-1 ) - + $ BCOEFI*WORK( 3*N+JE-1 ) + CIMAGB = BCOEFI*WORK( 2*N+JE-1 ) + + $ BCOEFR*WORK( 3*N+JE-1 ) + CRE2A = ACOEF*WORK( 2*N+JE ) + CIM2A = ACOEF*WORK( 3*N+JE ) + CRE2B = BCOEFR*WORK( 2*N+JE ) - BCOEFI*WORK( 3*N+JE ) + CIM2B = BCOEFI*WORK( 2*N+JE ) + BCOEFR*WORK( 3*N+JE ) + DO 270 JR = 1, JE - 2 + WORK( 2*N+JR ) = -CREALA*A( JR, JE-1 ) + + $ CREALB*B( JR, JE-1 ) - + $ CRE2A*A( JR, JE ) + CRE2B*B( JR, JE ) + WORK( 3*N+JR ) = -CIMAGA*A( JR, JE-1 ) + + $ CIMAGB*B( JR, JE-1 ) - + $ CIM2A*A( JR, JE ) + CIM2B*B( JR, JE ) + 270 CONTINUE + END IF +* + DMIN = MAX( ULP*ACOEFA*ANORM, ULP*BCOEFA*BNORM, SAFMIN ) +* +* Columnwise triangular solve of (a A - b B) x = 0 +* + IL2BY2 = .FALSE. + DO 370 J = JE - NW, 1, -1 +* +* If a 2-by-2 block, is in position j-1:j, wait until +* next iteration to process it (when it will be j:j+1) +* + IF( .NOT.IL2BY2 .AND. J.GT.1 ) THEN + IF( A( J, J-1 ).NE.ZERO ) THEN + IL2BY2 = .TRUE. + GO TO 370 + END IF + END IF + BDIAG( 1 ) = B( J, J ) + IF( IL2BY2 ) THEN + NA = 2 + BDIAG( 2 ) = B( J+1, J+1 ) + ELSE + NA = 1 + END IF +* +* Compute x(j) (and x(j+1), if 2-by-2 block) +* + CALL DLALN2( .FALSE., NA, NW, DMIN, ACOEF, A( J, J ), + $ LDA, BDIAG( 1 ), BDIAG( 2 ), WORK( 2*N+J ), + $ N, BCOEFR, BCOEFI, SUM, 2, SCALE, TEMP, + $ IINFO ) + IF( SCALE.LT.ONE ) THEN +* + DO 290 JW = 0, NW - 1 + DO 280 JR = 1, JE + WORK( ( JW+2 )*N+JR ) = SCALE* + $ WORK( ( JW+2 )*N+JR ) + 280 CONTINUE + 290 CONTINUE + END IF + XMAX = MAX( SCALE*XMAX, TEMP ) +* + DO 310 JW = 1, NW + DO 300 JA = 1, NA + WORK( ( JW+1 )*N+J+JA-1 ) = SUM( JA, JW ) + 300 CONTINUE + 310 CONTINUE +* +* w = w + x(j)*(a A(*,j) - b B(*,j) ) with scaling +* + IF( J.GT.1 ) THEN +* +* Check whether scaling is necessary for sum. +* + XSCALE = ONE / MAX( ONE, XMAX ) + TEMP = ACOEFA*WORK( J ) + BCOEFA*WORK( N+J ) + IF( IL2BY2 ) + $ TEMP = MAX( TEMP, ACOEFA*WORK( J+1 )+BCOEFA* + $ WORK( N+J+1 ) ) + TEMP = MAX( TEMP, ACOEFA, BCOEFA ) + IF( TEMP.GT.BIGNUM*XSCALE ) THEN +* + DO 330 JW = 0, NW - 1 + DO 320 JR = 1, JE + WORK( ( JW+2 )*N+JR ) = XSCALE* + $ WORK( ( JW+2 )*N+JR ) + 320 CONTINUE + 330 CONTINUE + XMAX = XMAX*XSCALE + END IF +* +* Compute the contributions of the off-diagonals of +* column j (and j+1, if 2-by-2 block) of A and B to the +* sums. +* +* + DO 360 JA = 1, NA + IF( ILCPLX ) THEN + CREALA = ACOEF*WORK( 2*N+J+JA-1 ) + CIMAGA = ACOEF*WORK( 3*N+J+JA-1 ) + CREALB = BCOEFR*WORK( 2*N+J+JA-1 ) - + $ BCOEFI*WORK( 3*N+J+JA-1 ) + CIMAGB = BCOEFI*WORK( 2*N+J+JA-1 ) + + $ BCOEFR*WORK( 3*N+J+JA-1 ) + DO 340 JR = 1, J - 1 + WORK( 2*N+JR ) = WORK( 2*N+JR ) - + $ CREALA*A( JR, J+JA-1 ) + + $ CREALB*B( JR, J+JA-1 ) + WORK( 3*N+JR ) = WORK( 3*N+JR ) - + $ CIMAGA*A( JR, J+JA-1 ) + + $ CIMAGB*B( JR, J+JA-1 ) + 340 CONTINUE + ELSE + CREALA = ACOEF*WORK( 2*N+J+JA-1 ) + CREALB = BCOEFR*WORK( 2*N+J+JA-1 ) + DO 350 JR = 1, J - 1 + WORK( 2*N+JR ) = WORK( 2*N+JR ) - + $ CREALA*A( JR, J+JA-1 ) + + $ CREALB*B( JR, J+JA-1 ) + 350 CONTINUE + END IF + 360 CONTINUE + END IF +* + IL2BY2 = .FALSE. + 370 CONTINUE +* +* Copy eigenvector to VR, back transforming if +* HOWMNY='B'. +* + IEIG = IEIG - NW + IF( ILBACK ) THEN +* + DO 410 JW = 0, NW - 1 + DO 380 JR = 1, N + WORK( ( JW+4 )*N+JR ) = WORK( ( JW+2 )*N+1 )* + $ VR( JR, 1 ) + 380 CONTINUE +* +* A series of compiler directives to defeat +* vectorization for the next loop +* +* + DO 400 JC = 2, JE + DO 390 JR = 1, N + WORK( ( JW+4 )*N+JR ) = WORK( ( JW+4 )*N+JR ) + + $ WORK( ( JW+2 )*N+JC )*VR( JR, JC ) + 390 CONTINUE + 400 CONTINUE + 410 CONTINUE +* + DO 430 JW = 0, NW - 1 + DO 420 JR = 1, N + VR( JR, IEIG+JW ) = WORK( ( JW+4 )*N+JR ) + 420 CONTINUE + 430 CONTINUE +* + IEND = N + ELSE + DO 450 JW = 0, NW - 1 + DO 440 JR = 1, N + VR( JR, IEIG+JW ) = WORK( ( JW+2 )*N+JR ) + 440 CONTINUE + 450 CONTINUE +* + IEND = JE + END IF +* +* Scale eigenvector +* + XMAX = ZERO + IF( ILCPLX ) THEN + DO 460 J = 1, IEND + XMAX = MAX( XMAX, ABS( VR( J, IEIG ) )+ + $ ABS( VR( J, IEIG+1 ) ) ) + 460 CONTINUE + ELSE + DO 470 J = 1, IEND + XMAX = MAX( XMAX, ABS( VR( J, IEIG ) ) ) + 470 CONTINUE + END IF +* + IF( XMAX.GT.SAFMIN ) THEN + XSCALE = ONE / XMAX + DO 490 JW = 0, NW - 1 + DO 480 JR = 1, IEND + VR( JR, IEIG+JW ) = XSCALE*VR( JR, IEIG+JW ) + 480 CONTINUE + 490 CONTINUE + END IF + 500 CONTINUE + END IF +* + RETURN +* +* End of DTGEVC +* + END diff --git a/costa/native/external/lapack/dtgex2.f b/costa/native/external/lapack/dtgex2.f new file mode 100644 index 000000000..81b085e25 --- /dev/null +++ b/costa/native/external/lapack/dtgex2.f @@ -0,0 +1,582 @@ + SUBROUTINE DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, J1, N1, N2, WORK, LWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + LOGICAL WANTQ, WANTZ + INTEGER INFO, J1, LDA, LDB, LDQ, LDZ, LWORK, N, N1, N2 +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* DTGEX2 swaps adjacent diagonal blocks (A11, B11) and (A22, B22) +* of size 1-by-1 or 2-by-2 in an upper (quasi) triangular matrix pair +* (A, B) by an orthogonal equivalence transformation. +* +* (A, B) must be in generalized real Schur canonical form (as returned +* by DGGES), i.e. A is block upper triangular with 1-by-1 and 2-by-2 +* diagonal blocks. B is upper triangular. +* +* Optionally, the matrices Q and Z of generalized Schur vectors are +* updated. +* +* Q(in) * A(in) * Z(in)' = Q(out) * A(out) * Z(out)' +* Q(in) * B(in) * Z(in)' = Q(out) * B(out) * Z(out)' +* +* +* Arguments +* ========= +* +* WANTQ (input) LOGICAL +* .TRUE. : update the left transformation matrix Q; +* .FALSE.: do not update Q. +* +* WANTZ (input) LOGICAL +* .TRUE. : update the right transformation matrix Z; +* .FALSE.: do not update Z. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* A (input/output) DOUBLE PRECISION arrays, dimensions (LDA,N) +* On entry, the matrix A in the pair (A, B). +* On exit, the updated matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) DOUBLE PRECISION arrays, dimensions (LDB,N) +* On entry, the matrix B in the pair (A, B). +* On exit, the updated matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* Q (input/output) DOUBLE PRECISION array, dimension (LDZ,N) +* On entry, if WANTQ = .TRUE., the orthogonal matrix Q. +* On exit, the updated matrix Q. +* Not referenced if WANTQ = .FALSE.. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= 1. +* If WANTQ = .TRUE., LDQ >= N. +* +* Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N) +* On entry, if WANTZ =.TRUE., the orthogonal matrix Z. +* On exit, the updated matrix Z. +* Not referenced if WANTZ = .FALSE.. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1. +* If WANTZ = .TRUE., LDZ >= N. +* +* J1 (input) INTEGER +* The index to the first block (A11, B11). 1 <= J1 <= N. +* +* N1 (input) INTEGER +* The order of the first block (A11, B11). N1 = 0, 1 or 2. +* +* N2 (input) INTEGER +* The order of the second block (A22, B22). N2 = 0, 1 or 2. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (LWORK). +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* LWORK >= MAX( N*(N2+N1), (N2+N1)*(N2+N1)*2 ) +* +* INFO (output) INTEGER +* =0: Successful exit +* >0: If INFO = 1, the transformed matrix (A, B) would be +* too far from generalized Schur form; the blocks are +* not swapped and (A, B) and (Q, Z) are unchanged. +* The problem of swapping is too ill-conditioned. +* <0: If INFO = -16: LWORK is too small. Appropriate value +* for LWORK is returned in WORK(1). +* +* Further Details +* =============== +* +* Based on contributions by +* Bo Kagstrom and Peter Poromaa, Department of Computing Science, +* Umea University, S-901 87 Umea, Sweden. +* +* In the current code both weak and strong stability tests are +* performed. The user can omit the strong stability test by changing +* the internal logical parameter WANDS to .FALSE.. See ref. [2] for +* details. +* +* [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the +* Generalized Real Schur Form of a Regular Matrix Pair (A, B), in +* M.S. Moonen et al (eds), Linear Algebra for Large Scale and +* Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. +* +* [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified +* Eigenvalues of a Regular Matrix Pair (A, B) and Condition +* Estimation: Theory, Algorithms and Software, +* Report UMINF - 94.04, Department of Computing Science, Umea +* University, S-901 87 Umea, Sweden, 1994. Also as LAPACK Working +* Note 87. To appear in Numerical Algorithms, 1996. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION TEN + PARAMETER ( TEN = 1.0D+01 ) + INTEGER LDST + PARAMETER ( LDST = 4 ) + LOGICAL WANDS + PARAMETER ( WANDS = .TRUE. ) +* .. +* .. Local Scalars .. + LOGICAL DTRONG, WEAK + INTEGER I, IDUM, LINFO, M + DOUBLE PRECISION BQRA21, BRQA21, DDUM, DNORM, DSCALE, DSUM, EPS, + $ F, G, SA, SB, SCALE, SMLNUM, SS, THRESH, WS +* .. +* .. Local Arrays .. + INTEGER IWORK( LDST ) + DOUBLE PRECISION AI( 2 ), AR( 2 ), BE( 2 ), IR( LDST, LDST ), + $ IRCOP( LDST, LDST ), LI( LDST, LDST ), + $ LICOP( LDST, LDST ), S( LDST, LDST ), + $ SCPY( LDST, LDST ), T( LDST, LDST ), + $ TAUL( LDST ), TAUR( LDST ), TCPY( LDST, LDST ) +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEMM, DGEQR2, DGERQ2, DLACPY, DLAGV2, + $ DLARTG, DLASSQ, DORG2R, DORGR2, DORM2R, DORMR2, + $ DROT, DSCAL, DTGSY2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.LE.1 .OR. N1.LE.0 .OR. N2.LE.0 ) + $ RETURN + IF( N1.GT.N .OR. ( J1+N1 ).GT.N ) + $ RETURN + M = N1 + N2 + IF( LWORK.LT.MAX( N*M, M*M*2 ) ) THEN + INFO = -16 + WORK( 1 ) = MAX( N*M, M*M*2 ) + RETURN + END IF +* + WEAK = .FALSE. + DTRONG = .FALSE. +* +* Make a local copy of selected block +* + CALL DCOPY( LDST*LDST, ZERO, 0, LI, 1 ) + CALL DCOPY( LDST*LDST, ZERO, 0, IR, 1 ) + CALL DLACPY( 'Full', M, M, A( J1, J1 ), LDA, S, LDST ) + CALL DLACPY( 'Full', M, M, B( J1, J1 ), LDB, T, LDST ) +* +* Compute threshold for testing acceptance of swapping. +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) / EPS + DSCALE = ZERO + DSUM = ONE + CALL DLACPY( 'Full', M, M, S, LDST, WORK, M ) + CALL DLASSQ( M*M, WORK, 1, DSCALE, DSUM ) + CALL DLACPY( 'Full', M, M, T, LDST, WORK, M ) + CALL DLASSQ( M*M, WORK, 1, DSCALE, DSUM ) + DNORM = DSCALE*SQRT( DSUM ) + THRESH = MAX( TEN*EPS*DNORM, SMLNUM ) +* + IF( M.EQ.2 ) THEN +* +* CASE 1: Swap 1-by-1 and 1-by-1 blocks. +* +* Compute orthogonal QL and RQ that swap 1-by-1 and 1-by-1 blocks +* using Givens rotations and perform the swap tentatively. +* + F = S( 2, 2 )*T( 1, 1 ) - T( 2, 2 )*S( 1, 1 ) + G = S( 2, 2 )*T( 1, 2 ) - T( 2, 2 )*S( 1, 2 ) + SB = ABS( T( 2, 2 ) ) + SA = ABS( S( 2, 2 ) ) + CALL DLARTG( F, G, IR( 1, 2 ), IR( 1, 1 ), DDUM ) + IR( 2, 1 ) = -IR( 1, 2 ) + IR( 2, 2 ) = IR( 1, 1 ) + CALL DROT( 2, S( 1, 1 ), 1, S( 1, 2 ), 1, IR( 1, 1 ), + $ IR( 2, 1 ) ) + CALL DROT( 2, T( 1, 1 ), 1, T( 1, 2 ), 1, IR( 1, 1 ), + $ IR( 2, 1 ) ) + IF( SA.GE.SB ) THEN + CALL DLARTG( S( 1, 1 ), S( 2, 1 ), LI( 1, 1 ), LI( 2, 1 ), + $ DDUM ) + ELSE + CALL DLARTG( T( 1, 1 ), T( 2, 1 ), LI( 1, 1 ), LI( 2, 1 ), + $ DDUM ) + END IF + CALL DROT( 2, S( 1, 1 ), LDST, S( 2, 1 ), LDST, LI( 1, 1 ), + $ LI( 2, 1 ) ) + CALL DROT( 2, T( 1, 1 ), LDST, T( 2, 1 ), LDST, LI( 1, 1 ), + $ LI( 2, 1 ) ) + LI( 2, 2 ) = LI( 1, 1 ) + LI( 1, 2 ) = -LI( 2, 1 ) +* +* Weak stability test: +* |S21| + |T21| <= O(EPS * F-norm((S, T))) +* + WS = ABS( S( 2, 1 ) ) + ABS( T( 2, 1 ) ) + WEAK = WS.LE.THRESH + IF( .NOT.WEAK ) + $ GO TO 70 +* + IF( WANDS ) THEN +* +* Strong stability test: +* F-norm((A-QL'*S*QR, B-QL'*T*QR)) <= O(EPS*F-norm((A,B))) +* + CALL DLACPY( 'Full', M, M, A( J1, J1 ), LDA, WORK( M*M+1 ), + $ M ) + CALL DGEMM( 'N', 'N', M, M, M, ONE, LI, LDST, S, LDST, ZERO, + $ WORK, M ) + CALL DGEMM( 'N', 'T', M, M, M, -ONE, WORK, M, IR, LDST, ONE, + $ WORK( M*M+1 ), M ) + DSCALE = ZERO + DSUM = ONE + CALL DLASSQ( M*M, WORK( M*M+1 ), 1, DSCALE, DSUM ) +* + CALL DLACPY( 'Full', M, M, B( J1, J1 ), LDB, WORK( M*M+1 ), + $ M ) + CALL DGEMM( 'N', 'N', M, M, M, ONE, LI, LDST, T, LDST, ZERO, + $ WORK, M ) + CALL DGEMM( 'N', 'T', M, M, M, -ONE, WORK, M, IR, LDST, ONE, + $ WORK( M*M+1 ), M ) + CALL DLASSQ( M*M, WORK( M*M+1 ), 1, DSCALE, DSUM ) + SS = DSCALE*SQRT( DSUM ) + DTRONG = SS.LE.THRESH + IF( .NOT.DTRONG ) + $ GO TO 70 + END IF +* +* Update (A(J1:J1+M-1, M+J1:N), B(J1:J1+M-1, M+J1:N)) and +* (A(1:J1-1, J1:J1+M), B(1:J1-1, J1:J1+M)). +* + CALL DROT( J1+1, A( 1, J1 ), 1, A( 1, J1+1 ), 1, IR( 1, 1 ), + $ IR( 2, 1 ) ) + CALL DROT( J1+1, B( 1, J1 ), 1, B( 1, J1+1 ), 1, IR( 1, 1 ), + $ IR( 2, 1 ) ) + CALL DROT( N-J1+1, A( J1, J1 ), LDA, A( J1+1, J1 ), LDA, + $ LI( 1, 1 ), LI( 2, 1 ) ) + CALL DROT( N-J1+1, B( J1, J1 ), LDB, B( J1+1, J1 ), LDB, + $ LI( 1, 1 ), LI( 2, 1 ) ) +* +* Set N1-by-N2 (2,1) - blocks to ZERO. +* + A( J1+1, J1 ) = ZERO + B( J1+1, J1 ) = ZERO +* +* Accumulate transformations into Q and Z if requested. +* + IF( WANTZ ) + $ CALL DROT( N, Z( 1, J1 ), 1, Z( 1, J1+1 ), 1, IR( 1, 1 ), + $ IR( 2, 1 ) ) + IF( WANTQ ) + $ CALL DROT( N, Q( 1, J1 ), 1, Q( 1, J1+1 ), 1, LI( 1, 1 ), + $ LI( 2, 1 ) ) +* +* Exit with INFO = 0 if swap was successfully performed. +* + RETURN +* + ELSE +* +* CASE 2: Swap 1-by-1 and 2-by-2 blocks, or 2-by-2 +* and 2-by-2 blocks. +* +* Solve the generalized Sylvester equation +* S11 * R - L * S22 = SCALE * S12 +* T11 * R - L * T22 = SCALE * T12 +* for R and L. Solutions in LI and IR. +* + CALL DLACPY( 'Full', N1, N2, T( 1, N1+1 ), LDST, LI, LDST ) + CALL DLACPY( 'Full', N1, N2, S( 1, N1+1 ), LDST, + $ IR( N2+1, N1+1 ), LDST ) + CALL DTGSY2( 'N', 0, N1, N2, S, LDST, S( N1+1, N1+1 ), LDST, + $ IR( N2+1, N1+1 ), LDST, T, LDST, T( N1+1, N1+1 ), + $ LDST, LI, LDST, SCALE, DSUM, DSCALE, IWORK, IDUM, + $ LINFO ) +* +* Compute orthogonal matrix QL: +* +* QL' * LI = [ TL ] +* [ 0 ] +* where +* LI = [ -L ] +* [ SCALE * identity(N2) ] +* + DO 10 I = 1, N2 + CALL DSCAL( N1, -ONE, LI( 1, I ), 1 ) + LI( N1+I, I ) = SCALE + 10 CONTINUE + CALL DGEQR2( M, N2, LI, LDST, TAUL, WORK, LINFO ) + IF( LINFO.NE.0 ) + $ GO TO 70 + CALL DORG2R( M, M, N2, LI, LDST, TAUL, WORK, LINFO ) + IF( LINFO.NE.0 ) + $ GO TO 70 +* +* Compute orthogonal matrix RQ: +* +* IR * RQ' = [ 0 TR], +* +* where IR = [ SCALE * identity(N1), R ] +* + DO 20 I = 1, N1 + IR( N2+I, I ) = SCALE + 20 CONTINUE + CALL DGERQ2( N1, M, IR( N2+1, 1 ), LDST, TAUR, WORK, LINFO ) + IF( LINFO.NE.0 ) + $ GO TO 70 + CALL DORGR2( M, M, N1, IR, LDST, TAUR, WORK, LINFO ) + IF( LINFO.NE.0 ) + $ GO TO 70 +* +* Perform the swapping tentatively: +* + CALL DGEMM( 'T', 'N', M, M, M, ONE, LI, LDST, S, LDST, ZERO, + $ WORK, M ) + CALL DGEMM( 'N', 'T', M, M, M, ONE, WORK, M, IR, LDST, ZERO, S, + $ LDST ) + CALL DGEMM( 'T', 'N', M, M, M, ONE, LI, LDST, T, LDST, ZERO, + $ WORK, M ) + CALL DGEMM( 'N', 'T', M, M, M, ONE, WORK, M, IR, LDST, ZERO, T, + $ LDST ) + CALL DLACPY( 'F', M, M, S, LDST, SCPY, LDST ) + CALL DLACPY( 'F', M, M, T, LDST, TCPY, LDST ) + CALL DLACPY( 'F', M, M, IR, LDST, IRCOP, LDST ) + CALL DLACPY( 'F', M, M, LI, LDST, LICOP, LDST ) +* +* Triangularize the B-part by an RQ factorization. +* Apply transformation (from left) to A-part, giving S. +* + CALL DGERQ2( M, M, T, LDST, TAUR, WORK, LINFO ) + IF( LINFO.NE.0 ) + $ GO TO 70 + CALL DORMR2( 'R', 'T', M, M, M, T, LDST, TAUR, S, LDST, WORK, + $ LINFO ) + IF( LINFO.NE.0 ) + $ GO TO 70 + CALL DORMR2( 'L', 'N', M, M, M, T, LDST, TAUR, IR, LDST, WORK, + $ LINFO ) + IF( LINFO.NE.0 ) + $ GO TO 70 +* +* Compute F-norm(S21) in BRQA21. (T21 is 0.) +* + DSCALE = ZERO + DSUM = ONE + DO 30 I = 1, N2 + CALL DLASSQ( N1, S( N2+1, I ), 1, DSCALE, DSUM ) + 30 CONTINUE + BRQA21 = DSCALE*SQRT( DSUM ) +* +* Triangularize the B-part by a QR factorization. +* Apply transformation (from right) to A-part, giving S. +* + CALL DGEQR2( M, M, TCPY, LDST, TAUL, WORK, LINFO ) + IF( LINFO.NE.0 ) + $ GO TO 70 + CALL DORM2R( 'L', 'T', M, M, M, TCPY, LDST, TAUL, SCPY, LDST, + $ WORK, INFO ) + CALL DORM2R( 'R', 'N', M, M, M, TCPY, LDST, TAUL, LICOP, LDST, + $ WORK, INFO ) + IF( LINFO.NE.0 ) + $ GO TO 70 +* +* Compute F-norm(S21) in BQRA21. (T21 is 0.) +* + DSCALE = ZERO + DSUM = ONE + DO 40 I = 1, N2 + CALL DLASSQ( N1, SCPY( N2+1, I ), 1, DSCALE, DSUM ) + 40 CONTINUE + BQRA21 = DSCALE*SQRT( DSUM ) +* +* Decide which method to use. +* Weak stability test: +* F-norm(S21) <= O(EPS * F-norm((S, T))) +* + IF( BQRA21.LE.BRQA21 .AND. BQRA21.LE.THRESH ) THEN + CALL DLACPY( 'F', M, M, SCPY, LDST, S, LDST ) + CALL DLACPY( 'F', M, M, TCPY, LDST, T, LDST ) + CALL DLACPY( 'F', M, M, IRCOP, LDST, IR, LDST ) + CALL DLACPY( 'F', M, M, LICOP, LDST, LI, LDST ) + ELSE IF( BRQA21.GE.THRESH ) THEN + GO TO 70 + END IF +* +* Set lower triangle of B-part to zero +* + DO 50 I = 2, M + CALL DCOPY( M-I+1, ZERO, 0, T( I, I-1 ), 1 ) + 50 CONTINUE +* + IF( WANDS ) THEN +* +* Strong stability test: +* F-norm((A-QL*S*QR', B-QL*T*QR')) <= O(EPS*F-norm((A,B))) +* + CALL DLACPY( 'Full', M, M, A( J1, J1 ), LDA, WORK( M*M+1 ), + $ M ) + CALL DGEMM( 'N', 'N', M, M, M, ONE, LI, LDST, S, LDST, ZERO, + $ WORK, M ) + CALL DGEMM( 'N', 'N', M, M, M, -ONE, WORK, M, IR, LDST, ONE, + $ WORK( M*M+1 ), M ) + DSCALE = ZERO + DSUM = ONE + CALL DLASSQ( M*M, WORK( M*M+1 ), 1, DSCALE, DSUM ) +* + CALL DLACPY( 'Full', M, M, B( J1, J1 ), LDB, WORK( M*M+1 ), + $ M ) + CALL DGEMM( 'N', 'N', M, M, M, ONE, LI, LDST, T, LDST, ZERO, + $ WORK, M ) + CALL DGEMM( 'N', 'N', M, M, M, -ONE, WORK, M, IR, LDST, ONE, + $ WORK( M*M+1 ), M ) + CALL DLASSQ( M*M, WORK( M*M+1 ), 1, DSCALE, DSUM ) + SS = DSCALE*SQRT( DSUM ) + DTRONG = ( SS.LE.THRESH ) + IF( .NOT.DTRONG ) + $ GO TO 70 +* + END IF +* +* If the swap is accepted ("weakly" and "strongly"), apply the +* transformations and set N1-by-N2 (2,1)-block to zero. +* + DO 60 I = 1, N2 + CALL DCOPY( N1, ZERO, 0, S( N2+1, I ), 1 ) + 60 CONTINUE +* +* copy back M-by-M diagonal block starting at index J1 of (A, B) +* + CALL DLACPY( 'F', M, M, S, LDST, A( J1, J1 ), LDA ) + CALL DLACPY( 'F', M, M, T, LDST, B( J1, J1 ), LDB ) + CALL DCOPY( LDST*LDST, ZERO, 0, T, 1 ) +* +* Standardize existing 2-by-2 blocks. +* + CALL DCOPY( M*M, ZERO, 0, WORK, 1 ) + WORK( 1 ) = ONE + T( 1, 1 ) = ONE + IDUM = LWORK - M*M - 2 + IF( N2.GT.1 ) THEN + CALL DLAGV2( A( J1, J1 ), LDA, B( J1, J1 ), LDB, AR, AI, BE, + $ WORK( 1 ), WORK( 2 ), T( 1, 1 ), T( 2, 1 ) ) + WORK( M+1 ) = -WORK( 2 ) + WORK( M+2 ) = WORK( 1 ) + T( N2, N2 ) = T( 1, 1 ) + T( 1, 2 ) = -T( 2, 1 ) + END IF + WORK( M*M ) = ONE + T( M, M ) = ONE +* + IF( N1.GT.1 ) THEN + CALL DLAGV2( A( J1+N2, J1+N2 ), LDA, B( J1+N2, J1+N2 ), LDB, + $ TAUR, TAUL, WORK( M*M+1 ), WORK( N2*M+N2+1 ), + $ WORK( N2*M+N2+2 ), T( N2+1, N2+1 ), + $ T( M, M-1 ) ) + WORK( M*M ) = WORK( N2*M+N2+1 ) + WORK( M*M-1 ) = -WORK( N2*M+N2+2 ) + T( M, M ) = T( N2+1, N2+1 ) + T( M-1, M ) = -T( M, M-1 ) + END IF + CALL DGEMM( 'T', 'N', N2, N1, N2, ONE, WORK, M, A( J1, J1+N2 ), + $ LDA, ZERO, WORK( M*M+1 ), N2 ) + CALL DLACPY( 'Full', N2, N1, WORK( M*M+1 ), N2, A( J1, J1+N2 ), + $ LDA ) + CALL DGEMM( 'T', 'N', N2, N1, N2, ONE, WORK, M, B( J1, J1+N2 ), + $ LDB, ZERO, WORK( M*M+1 ), N2 ) + CALL DLACPY( 'Full', N2, N1, WORK( M*M+1 ), N2, B( J1, J1+N2 ), + $ LDB ) + CALL DGEMM( 'N', 'N', M, M, M, ONE, LI, LDST, WORK, M, ZERO, + $ WORK( M*M+1 ), M ) + CALL DLACPY( 'Full', M, M, WORK( M*M+1 ), M, LI, LDST ) + CALL DGEMM( 'N', 'N', N2, N1, N1, ONE, A( J1, J1+N2 ), LDA, + $ T( N2+1, N2+1 ), LDST, ZERO, WORK, N2 ) + CALL DLACPY( 'Full', N2, N1, WORK, N2, A( J1, J1+N2 ), LDA ) + CALL DGEMM( 'N', 'N', N2, N1, N1, ONE, B( J1, J1+N2 ), LDA, + $ T( N2+1, N2+1 ), LDST, ZERO, WORK, N2 ) + CALL DLACPY( 'Full', N2, N1, WORK, N2, B( J1, J1+N2 ), LDB ) + CALL DGEMM( 'T', 'N', M, M, M, ONE, IR, LDST, T, LDST, ZERO, + $ WORK, M ) + CALL DLACPY( 'Full', M, M, WORK, M, IR, LDST ) +* +* Accumulate transformations into Q and Z if requested. +* + IF( WANTQ ) THEN + CALL DGEMM( 'N', 'N', N, M, M, ONE, Q( 1, J1 ), LDQ, LI, + $ LDST, ZERO, WORK, N ) + CALL DLACPY( 'Full', N, M, WORK, N, Q( 1, J1 ), LDQ ) +* + END IF +* + IF( WANTZ ) THEN + CALL DGEMM( 'N', 'N', N, M, M, ONE, Z( 1, J1 ), LDZ, IR, + $ LDST, ZERO, WORK, N ) + CALL DLACPY( 'Full', N, M, WORK, N, Z( 1, J1 ), LDZ ) +* + END IF +* +* Update (A(J1:J1+M-1, M+J1:N), B(J1:J1+M-1, M+J1:N)) and +* (A(1:J1-1, J1:J1+M), B(1:J1-1, J1:J1+M)). +* + I = J1 + M + IF( I.LE.N ) THEN + CALL DGEMM( 'T', 'N', M, N-I+1, M, ONE, LI, LDST, + $ A( J1, I ), LDA, ZERO, WORK, M ) + CALL DLACPY( 'Full', M, N-I+1, WORK, M, A( J1, I ), LDA ) + CALL DGEMM( 'T', 'N', M, N-I+1, M, ONE, LI, LDST, + $ B( J1, I ), LDA, ZERO, WORK, M ) + CALL DLACPY( 'Full', M, N-I+1, WORK, M, B( J1, I ), LDA ) + END IF + I = J1 - 1 + IF( I.GT.0 ) THEN + CALL DGEMM( 'N', 'N', I, M, M, ONE, A( 1, J1 ), LDA, IR, + $ LDST, ZERO, WORK, I ) + CALL DLACPY( 'Full', I, M, WORK, I, A( 1, J1 ), LDA ) + CALL DGEMM( 'N', 'N', I, M, M, ONE, B( 1, J1 ), LDB, IR, + $ LDST, ZERO, WORK, I ) + CALL DLACPY( 'Full', I, M, WORK, I, B( 1, J1 ), LDB ) + END IF +* +* Exit with INFO = 0 if swap was successfully performed. +* + RETURN +* + END IF +* +* Exit with INFO = 1 if swap was rejected. +* + 70 CONTINUE +* + INFO = 1 + RETURN +* +* End of DTGEX2 +* + END diff --git a/costa/native/external/lapack/dtgexc.f b/costa/native/external/lapack/dtgexc.f new file mode 100644 index 000000000..169f0467e --- /dev/null +++ b/costa/native/external/lapack/dtgexc.f @@ -0,0 +1,434 @@ + SUBROUTINE DTGEXC( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, IFST, ILST, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + LOGICAL WANTQ, WANTZ + INTEGER IFST, ILST, INFO, LDA, LDB, LDQ, LDZ, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* DTGEXC reorders the generalized real Schur decomposition of a real +* matrix pair (A,B) using an orthogonal equivalence transformation +* +* (A, B) = Q * (A, B) * Z', +* +* so that the diagonal block of (A, B) with row index IFST is moved +* to row ILST. +* +* (A, B) must be in generalized real Schur canonical form (as returned +* by DGGES), i.e. A is block upper triangular with 1-by-1 and 2-by-2 +* diagonal blocks. B is upper triangular. +* +* Optionally, the matrices Q and Z of generalized Schur vectors are +* updated. +* +* Q(in) * A(in) * Z(in)' = Q(out) * A(out) * Z(out)' +* Q(in) * B(in) * Z(in)' = Q(out) * B(out) * Z(out)' +* +* +* Arguments +* ========= +* +* WANTQ (input) LOGICAL +* .TRUE. : update the left transformation matrix Q; +* .FALSE.: do not update Q. +* +* WANTZ (input) LOGICAL +* .TRUE. : update the right transformation matrix Z; +* .FALSE.: do not update Z. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the matrix A in generalized real Schur canonical +* form. +* On exit, the updated matrix A, again in generalized +* real Schur canonical form. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,N) +* On entry, the matrix B in generalized real Schur canonical +* form (A,B). +* On exit, the updated matrix B, again in generalized +* real Schur canonical form (A,B). +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* Q (input/output) DOUBLE PRECISION array, dimension (LDZ,N) +* On entry, if WANTQ = .TRUE., the orthogonal matrix Q. +* On exit, the updated matrix Q. +* If WANTQ = .FALSE., Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= 1. +* If WANTQ = .TRUE., LDQ >= N. +* +* Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N) +* On entry, if WANTZ = .TRUE., the orthogonal matrix Z. +* On exit, the updated matrix Z. +* If WANTZ = .FALSE., Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1. +* If WANTZ = .TRUE., LDZ >= N. +* +* IFST (input/output) INTEGER +* ILST (input/output) INTEGER +* Specify the reordering of the diagonal blocks of (A, B). +* The block with row index IFST is moved to row ILST, by a +* sequence of swapping between adjacent blocks. +* On exit, if IFST pointed on entry to the second row of +* a 2-by-2 block, it is changed to point to the first row; +* ILST always points to the first row of the block in its +* final position (which may differ from its input value by +* +1 or -1). 1 <= IFST, ILST <= N. +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= 4*N + 16. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* =0: successful exit. +* <0: if INFO = -i, the i-th argument had an illegal value. +* =1: The transformed matrix pair (A, B) would be too far +* from generalized Schur form; the problem is ill- +* conditioned. (A, B) may have been partially reordered, +* and ILST points to the first row of the current +* position of the block being moved. +* +* Further Details +* =============== +* +* Based on contributions by +* Bo Kagstrom and Peter Poromaa, Department of Computing Science, +* Umea University, S-901 87 Umea, Sweden. +* +* [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the +* Generalized Real Schur Form of a Regular Matrix Pair (A, B), in +* M.S. Moonen et al (eds), Linear Algebra for Large Scale and +* Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER HERE, LWMIN, NBF, NBL, NBNEXT +* .. +* .. External Subroutines .. + EXTERNAL DTGEX2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Decode and test input arguments. +* + INFO = 0 + LWMIN = MAX( 1, 4*N+16 ) + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDQ.LT.1 .OR. WANTQ .AND. ( LDQ.LT.MAX( 1, N ) ) ) THEN + INFO = -9 + ELSE IF( LDZ.LT.1 .OR. WANTZ .AND. ( LDZ.LT.MAX( 1, N ) ) ) THEN + INFO = -11 + ELSE IF( IFST.LT.1 .OR. IFST.GT.N ) THEN + INFO = -12 + ELSE IF( ILST.LT.1 .OR. ILST.GT.N ) THEN + INFO = -13 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -15 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTGEXC', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* +* Determine the first row of the specified block and find out +* if it is 1-by-1 or 2-by-2. +* + IF( IFST.GT.1 ) THEN + IF( A( IFST, IFST-1 ).NE.ZERO ) + $ IFST = IFST - 1 + END IF + NBF = 1 + IF( IFST.LT.N ) THEN + IF( A( IFST+1, IFST ).NE.ZERO ) + $ NBF = 2 + END IF +* +* Determine the first row of the final block +* and find out if it is 1-by-1 or 2-by-2. +* + IF( ILST.GT.1 ) THEN + IF( A( ILST, ILST-1 ).NE.ZERO ) + $ ILST = ILST - 1 + END IF + NBL = 1 + IF( ILST.LT.N ) THEN + IF( A( ILST+1, ILST ).NE.ZERO ) + $ NBL = 2 + END IF + IF( IFST.EQ.ILST ) + $ RETURN +* + IF( IFST.LT.ILST ) THEN +* +* Update ILST. +* + IF( NBF.EQ.2 .AND. NBL.EQ.1 ) + $ ILST = ILST - 1 + IF( NBF.EQ.1 .AND. NBL.EQ.2 ) + $ ILST = ILST + 1 +* + HERE = IFST +* + 10 CONTINUE +* +* Swap with next one below. +* + IF( NBF.EQ.1 .OR. NBF.EQ.2 ) THEN +* +* Current block either 1-by-1 or 2-by-2. +* + NBNEXT = 1 + IF( HERE+NBF+1.LE.N ) THEN + IF( A( HERE+NBF+1, HERE+NBF ).NE.ZERO ) + $ NBNEXT = 2 + END IF + CALL DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, HERE, NBF, NBNEXT, WORK, LWORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE + NBNEXT +* +* Test if 2-by-2 block breaks into two 1-by-1 blocks. +* + IF( NBF.EQ.2 ) THEN + IF( A( HERE+1, HERE ).EQ.ZERO ) + $ NBF = 3 + END IF +* + ELSE +* +* Current block consists of two 1-by-1 blocks, each of which +* must be swapped individually. +* + NBNEXT = 1 + IF( HERE+3.LE.N ) THEN + IF( A( HERE+3, HERE+2 ).NE.ZERO ) + $ NBNEXT = 2 + END IF + CALL DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, HERE+1, 1, NBNEXT, WORK, LWORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + IF( NBNEXT.EQ.1 ) THEN +* +* Swap two 1-by-1 blocks. +* + CALL DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, HERE, 1, 1, WORK, LWORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE + 1 +* + ELSE +* +* Recompute NBNEXT in case of 2-by-2 split. +* + IF( A( HERE+2, HERE+1 ).EQ.ZERO ) + $ NBNEXT = 1 + IF( NBNEXT.EQ.2 ) THEN +* +* 2-by-2 block did not split. +* + CALL DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, + $ Z, LDZ, HERE, 1, NBNEXT, WORK, LWORK, + $ INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE + 2 + ELSE +* +* 2-by-2 block did split. +* + CALL DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, + $ Z, LDZ, HERE, 1, 1, WORK, LWORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE + 1 + CALL DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, + $ Z, LDZ, HERE, 1, 1, WORK, LWORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE + 1 + END IF +* + END IF + END IF + IF( HERE.LT.ILST ) + $ GO TO 10 + ELSE + HERE = IFST +* + 20 CONTINUE +* +* Swap with next one below. +* + IF( NBF.EQ.1 .OR. NBF.EQ.2 ) THEN +* +* Current block either 1-by-1 or 2-by-2. +* + NBNEXT = 1 + IF( HERE.GE.3 ) THEN + IF( A( HERE-1, HERE-2 ).NE.ZERO ) + $ NBNEXT = 2 + END IF + CALL DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, HERE-NBNEXT, NBNEXT, NBF, WORK, LWORK, + $ INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE - NBNEXT +* +* Test if 2-by-2 block breaks into two 1-by-1 blocks. +* + IF( NBF.EQ.2 ) THEN + IF( A( HERE+1, HERE ).EQ.ZERO ) + $ NBF = 3 + END IF +* + ELSE +* +* Current block consists of two 1-by-1 blocks, each of which +* must be swapped individually. +* + NBNEXT = 1 + IF( HERE.GE.3 ) THEN + IF( A( HERE-1, HERE-2 ).NE.ZERO ) + $ NBNEXT = 2 + END IF + CALL DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, HERE-NBNEXT, NBNEXT, 1, WORK, LWORK, + $ INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + IF( NBNEXT.EQ.1 ) THEN +* +* Swap two 1-by-1 blocks. +* + CALL DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, HERE, NBNEXT, 1, WORK, LWORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE - 1 + ELSE +* +* Recompute NBNEXT in case of 2-by-2 split. +* + IF( A( HERE, HERE-1 ).EQ.ZERO ) + $ NBNEXT = 1 + IF( NBNEXT.EQ.2 ) THEN +* +* 2-by-2 block did not split. +* + CALL DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, + $ Z, LDZ, HERE-1, 2, 1, WORK, LWORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE - 2 + ELSE +* +* 2-by-2 block did split. +* + CALL DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, + $ Z, LDZ, HERE, 1, 1, WORK, LWORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE - 1 + CALL DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, + $ Z, LDZ, HERE, 1, 1, WORK, LWORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE - 1 + END IF + END IF + END IF + IF( HERE.GT.ILST ) + $ GO TO 20 + END IF + ILST = HERE + WORK( 1 ) = LWMIN + RETURN +* +* End of DTGEXC +* + END diff --git a/costa/native/external/lapack/dtgsen.f b/costa/native/external/lapack/dtgsen.f new file mode 100644 index 000000000..417adfe93 --- /dev/null +++ b/costa/native/external/lapack/dtgsen.f @@ -0,0 +1,718 @@ + SUBROUTINE DTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, Q, LDQ, Z, LDZ, M, PL, + $ PR, DIF, WORK, LWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + LOGICAL WANTQ, WANTZ + INTEGER IJOB, INFO, LDA, LDB, LDQ, LDZ, LIWORK, LWORK, + $ M, N + DOUBLE PRECISION PL, PR +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), ALPHAI( * ), ALPHAR( * ), + $ B( LDB, * ), BETA( * ), DIF( * ), Q( LDQ, * ), + $ WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* DTGSEN reorders the generalized real Schur decomposition of a real +* matrix pair (A, B) (in terms of an orthonormal equivalence trans- +* formation Q' * (A, B) * Z), so that a selected cluster of eigenvalues +* appears in the leading diagonal blocks of the upper quasi-triangular +* matrix A and the upper triangular B. The leading columns of Q and +* Z form orthonormal bases of the corresponding left and right eigen- +* spaces (deflating subspaces). (A, B) must be in generalized real +* Schur canonical form (as returned by DGGES), i.e. A is block upper +* triangular with 1-by-1 and 2-by-2 diagonal blocks. B is upper +* triangular. +* +* DTGSEN also computes the generalized eigenvalues +* +* w(j) = (ALPHAR(j) + i*ALPHAI(j))/BETA(j) +* +* of the reordered matrix pair (A, B). +* +* Optionally, DTGSEN computes the estimates of reciprocal condition +* numbers for eigenvalues and eigenspaces. These are Difu[(A11,B11), +* (A22,B22)] and Difl[(A11,B11), (A22,B22)], i.e. the separation(s) +* between the matrix pairs (A11, B11) and (A22,B22) that correspond to +* the selected cluster and the eigenvalues outside the cluster, resp., +* and norms of "projections" onto left and right eigenspaces w.r.t. +* the selected cluster in the (1,1)-block. +* +* Arguments +* ========= +* +* IJOB (input) INTEGER +* Specifies whether condition numbers are required for the +* cluster of eigenvalues (PL and PR) or the deflating subspaces +* (Difu and Difl): +* =0: Only reorder w.r.t. SELECT. No extras. +* =1: Reciprocal of norms of "projections" onto left and right +* eigenspaces w.r.t. the selected cluster (PL and PR). +* =2: Upper bounds on Difu and Difl. F-norm-based estimate +* (DIF(1:2)). +* =3: Estimate of Difu and Difl. 1-norm-based estimate +* (DIF(1:2)). +* About 5 times as expensive as IJOB = 2. +* =4: Compute PL, PR and DIF (i.e. 0, 1 and 2 above): Economic +* version to get it all. +* =5: Compute PL, PR and DIF (i.e. 0, 1 and 3 above) +* +* WANTQ (input) LOGICAL +* .TRUE. : update the left transformation matrix Q; +* .FALSE.: do not update Q. +* +* WANTZ (input) LOGICAL +* .TRUE. : update the right transformation matrix Z; +* .FALSE.: do not update Z. +* +* SELECT (input) LOGICAL array, dimension (N) +* SELECT specifies the eigenvalues in the selected cluster. +* To select a real eigenvalue w(j), SELECT(j) must be set to +* .TRUE.. To select a complex conjugate pair of eigenvalues +* w(j) and w(j+1), corresponding to a 2-by-2 diagonal block, +* either SELECT(j) or SELECT(j+1) or both must be set to +* .TRUE.; a complex conjugate pair of eigenvalues must be +* either both included in the cluster or both excluded. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension(LDA,N) +* On entry, the upper quasi-triangular matrix A, with (A, B) in +* generalized real Schur canonical form. +* On exit, A is overwritten by the reordered matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) DOUBLE PRECISION array, dimension(LDB,N) +* On entry, the upper triangular matrix B, with (A, B) in +* generalized real Schur canonical form. +* On exit, B is overwritten by the reordered matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* ALPHAR (output) DOUBLE PRECISION array, dimension (N) +* ALPHAI (output) DOUBLE PRECISION array, dimension (N) +* BETA (output) DOUBLE PRECISION array, dimension (N) +* On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will +* be the generalized eigenvalues. ALPHAR(j) + ALPHAI(j)*i +* and BETA(j),j=1,...,N are the diagonals of the complex Schur +* form (S,T) that would result if the 2-by-2 diagonal blocks of +* the real generalized Schur form of (A,B) were further reduced +* to triangular form using complex unitary transformations. +* If ALPHAI(j) is zero, then the j-th eigenvalue is real; if +* positive, then the j-th and (j+1)-st eigenvalues are a +* complex conjugate pair, with ALPHAI(j+1) negative. +* +* Q (input/output) DOUBLE PRECISION array, dimension (LDQ,N) +* On entry, if WANTQ = .TRUE., Q is an N-by-N matrix. +* On exit, Q has been postmultiplied by the left orthogonal +* transformation matrix which reorder (A, B); The leading M +* columns of Q form orthonormal bases for the specified pair of +* left eigenspaces (deflating subspaces). +* If WANTQ = .FALSE., Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= 1; +* and if WANTQ = .TRUE., LDQ >= N. +* +* Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N) +* On entry, if WANTZ = .TRUE., Z is an N-by-N matrix. +* On exit, Z has been postmultiplied by the left orthogonal +* transformation matrix which reorder (A, B); The leading M +* columns of Z form orthonormal bases for the specified pair of +* left eigenspaces (deflating subspaces). +* If WANTZ = .FALSE., Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1; +* If WANTZ = .TRUE., LDZ >= N. +* +* M (output) INTEGER +* The dimension of the specified pair of left and right eigen- +* spaces (deflating subspaces). 0 <= M <= N. +* +* PL, PR (output) DOUBLE PRECISION +* If IJOB = 1, 4 or 5, PL, PR are lower bounds on the +* reciprocal of the norm of "projections" onto left and right +* eigenspaces with respect to the selected cluster. +* 0 < PL, PR <= 1. +* If M = 0 or M = N, PL = PR = 1. +* If IJOB = 0, 2 or 3, PL and PR are not referenced. +* +* DIF (output) DOUBLE PRECISION array, dimension (2). +* If IJOB >= 2, DIF(1:2) store the estimates of Difu and Difl. +* If IJOB = 2 or 4, DIF(1:2) are F-norm-based upper bounds on +* Difu and Difl. If IJOB = 3 or 5, DIF(1:2) are 1-norm-based +* estimates of Difu and Difl. +* If M = 0 or N, DIF(1:2) = F-norm([A, B]). +* If IJOB = 0 or 1, DIF is not referenced. +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* IF IJOB = 0, WORK is not referenced. Otherwise, +* on exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= 4*N+16. +* If IJOB = 1, 2 or 4, LWORK >= MAX(4*N+16, 2*M*(N-M)). +* If IJOB = 3 or 5, LWORK >= MAX(4*N+16, 4*M*(N-M)). +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* IF IJOB = 0, IWORK is not referenced. Otherwise, +* on exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array IWORK. LIWORK >= 1. +* If IJOB = 1, 2 or 4, LIWORK >= N+6. +* If IJOB = 3 or 5, LIWORK >= MAX(2*M*(N-M), N+6). +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* =0: Successful exit. +* <0: If INFO = -i, the i-th argument had an illegal value. +* =1: Reordering of (A, B) failed because the transformed +* matrix pair (A, B) would be too far from generalized +* Schur form; the problem is very ill-conditioned. +* (A, B) may have been partially reordered. +* If requested, 0 is returned in DIF(*), PL and PR. +* +* Further Details +* =============== +* +* DTGSEN first collects the selected eigenvalues by computing +* orthogonal U and W that move them to the top left corner of (A, B). +* In other words, the selected eigenvalues are the eigenvalues of +* (A11, B11) in: +* +* U'*(A, B)*W = (A11 A12) (B11 B12) n1 +* ( 0 A22),( 0 B22) n2 +* n1 n2 n1 n2 +* +* where N = n1+n2 and U' means the transpose of U. The first n1 columns +* of U and W span the specified pair of left and right eigenspaces +* (deflating subspaces) of (A, B). +* +* If (A, B) has been obtained from the generalized real Schur +* decomposition of a matrix pair (C, D) = Q*(A, B)*Z', then the +* reordered generalized real Schur form of (C, D) is given by +* +* (C, D) = (Q*U)*(U'*(A, B)*W)*(Z*W)', +* +* and the first n1 columns of Q*U and Z*W span the corresponding +* deflating subspaces of (C, D) (Q and Z store Q*U and Z*W, resp.). +* +* Note that if the selected eigenvalue is sufficiently ill-conditioned, +* then its value may differ significantly from its value before +* reordering. +* +* The reciprocal condition numbers of the left and right eigenspaces +* spanned by the first n1 columns of U and W (or Q*U and Z*W) may +* be returned in DIF(1:2), corresponding to Difu and Difl, resp. +* +* The Difu and Difl are defined as: +* +* Difu[(A11, B11), (A22, B22)] = sigma-min( Zu ) +* and +* Difl[(A11, B11), (A22, B22)] = Difu[(A22, B22), (A11, B11)], +* +* where sigma-min(Zu) is the smallest singular value of the +* (2*n1*n2)-by-(2*n1*n2) matrix +* +* Zu = [ kron(In2, A11) -kron(A22', In1) ] +* [ kron(In2, B11) -kron(B22', In1) ]. +* +* Here, Inx is the identity matrix of size nx and A22' is the +* transpose of A22. kron(X, Y) is the Kronecker product between +* the matrices X and Y. +* +* When DIF(2) is small, small changes in (A, B) can cause large changes +* in the deflating subspace. An approximate (asymptotic) bound on the +* maximum angular error in the computed deflating subspaces is +* +* EPS * norm((A, B)) / DIF(2), +* +* where EPS is the machine precision. +* +* The reciprocal norm of the projectors on the left and right +* eigenspaces associated with (A11, B11) may be returned in PL and PR. +* They are computed as follows. First we compute L and R so that +* P*(A, B)*Q is block diagonal, where +* +* P = ( I -L ) n1 Q = ( I R ) n1 +* ( 0 I ) n2 and ( 0 I ) n2 +* n1 n2 n1 n2 +* +* and (L, R) is the solution to the generalized Sylvester equation +* +* A11*R - L*A22 = -A12 +* B11*R - L*B22 = -B12 +* +* Then PL = (F-norm(L)**2+1)**(-1/2) and PR = (F-norm(R)**2+1)**(-1/2). +* An approximate (asymptotic) bound on the average absolute error of +* the selected eigenvalues is +* +* EPS * norm((A, B)) / PL. +* +* There are also global error bounds which valid for perturbations up +* to a certain restriction: A lower bound (x) on the smallest +* F-norm(E,F) for which an eigenvalue of (A11, B11) may move and +* coalesce with an eigenvalue of (A22, B22) under perturbation (E,F), +* (i.e. (A + E, B + F), is +* +* x = min(Difu,Difl)/((1/(PL*PL)+1/(PR*PR))**(1/2)+2*max(1/PL,1/PR)). +* +* An approximate bound on x can be computed from DIF(1:2), PL and PR. +* +* If y = ( F-norm(E,F) / x) <= 1, the angles between the perturbed +* (L', R') and unperturbed (L, R) left and right deflating subspaces +* associated with the selected cluster in the (1,1)-blocks can be +* bounded as +* +* max-angle(L, L') <= arctan( y * PL / (1 - y * (1 - PL * PL)**(1/2)) +* max-angle(R, R') <= arctan( y * PR / (1 - y * (1 - PR * PR)**(1/2)) +* +* See LAPACK User's Guide section 4.11 or the following references +* for more information. +* +* Note that if the default method for computing the Frobenius-norm- +* based estimate DIF is not wanted (see DLATDF), then the parameter +* IDIFJB (see below) should be changed from 3 to 4 (routine DLATDF +* (IJOB = 2 will be used)). See DTGSYL for more details. +* +* Based on contributions by +* Bo Kagstrom and Peter Poromaa, Department of Computing Science, +* Umea University, S-901 87 Umea, Sweden. +* +* References +* ========== +* +* [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the +* Generalized Real Schur Form of a Regular Matrix Pair (A, B), in +* M.S. Moonen et al (eds), Linear Algebra for Large Scale and +* Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. +* +* [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified +* Eigenvalues of a Regular Matrix Pair (A, B) and Condition +* Estimation: Theory, Algorithms and Software, +* Report UMINF - 94.04, Department of Computing Science, Umea +* University, S-901 87 Umea, Sweden, 1994. Also as LAPACK Working +* Note 87. To appear in Numerical Algorithms, 1996. +* +* [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software +* for Solving the Generalized Sylvester Equation and Estimating the +* Separation between Regular Matrix Pairs, Report UMINF - 93.23, +* Department of Computing Science, Umea University, S-901 87 Umea, +* Sweden, December 1993, Revised April 1994, Also as LAPACK Working +* Note 75. To appear in ACM Trans. on Math. Software, Vol 22, No 1, +* 1996. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER IDIFJB + PARAMETER ( IDIFJB = 3 ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, PAIR, SWAP, WANTD, WANTD1, WANTD2, + $ WANTP + INTEGER I, IERR, IJB, K, KASE, KK, KS, LIWMIN, LWMIN, + $ MN2, N1, N2 + DOUBLE PRECISION DSCALE, DSUM, EPS, RDSCAL, SMLNUM +* .. +* .. External Subroutines .. + EXTERNAL DLACON, DLACPY, DLAG2, DLASSQ, DTGEXC, DTGSYL, + $ XERBLA +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + IF( IJOB.LT.0 .OR. IJOB.GT.5 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -14 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -16 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTGSEN', -INFO ) + RETURN + END IF +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) / EPS + IERR = 0 +* + WANTP = IJOB.EQ.1 .OR. IJOB.GE.4 + WANTD1 = IJOB.EQ.2 .OR. IJOB.EQ.4 + WANTD2 = IJOB.EQ.3 .OR. IJOB.EQ.5 + WANTD = WANTD1 .OR. WANTD2 +* +* Set M to the dimension of the specified pair of deflating +* subspaces. +* + M = 0 + PAIR = .FALSE. + DO 10 K = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + ELSE + IF( K.LT.N ) THEN + IF( A( K+1, K ).EQ.ZERO ) THEN + IF( SELECT( K ) ) + $ M = M + 1 + ELSE + PAIR = .TRUE. + IF( SELECT( K ) .OR. SELECT( K+1 ) ) + $ M = M + 2 + END IF + ELSE + IF( SELECT( N ) ) + $ M = M + 1 + END IF + END IF + 10 CONTINUE +* + IF( IJOB.EQ.1 .OR. IJOB.EQ.2 .OR. IJOB.EQ.4 ) THEN + LWMIN = MAX( 1, 4*N+16, 2*M*( N-M ) ) + LIWMIN = MAX( 1, N+6 ) + ELSE IF( IJOB.EQ.3 .OR. IJOB.EQ.5 ) THEN + LWMIN = MAX( 1, 4*N+16, 4*M*( N-M ) ) + LIWMIN = MAX( 1, 2*M*( N-M ), N+6 ) + ELSE + LWMIN = MAX( 1, 4*N+16 ) + LIWMIN = 1 + END IF +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -22 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -24 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTGSEN', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible. +* + IF( M.EQ.N .OR. M.EQ.0 ) THEN + IF( WANTP ) THEN + PL = ONE + PR = ONE + END IF + IF( WANTD ) THEN + DSCALE = ZERO + DSUM = ONE + DO 20 I = 1, N + CALL DLASSQ( N, A( 1, I ), 1, DSCALE, DSUM ) + CALL DLASSQ( N, B( 1, I ), 1, DSCALE, DSUM ) + 20 CONTINUE + DIF( 1 ) = DSCALE*SQRT( DSUM ) + DIF( 2 ) = DIF( 1 ) + END IF + GO TO 60 + END IF +* +* Collect the selected blocks at the top-left corner of (A, B). +* + KS = 0 + PAIR = .FALSE. + DO 30 K = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + ELSE +* + SWAP = SELECT( K ) + IF( K.LT.N ) THEN + IF( A( K+1, K ).NE.ZERO ) THEN + PAIR = .TRUE. + SWAP = SWAP .OR. SELECT( K+1 ) + END IF + END IF +* + IF( SWAP ) THEN + KS = KS + 1 +* +* Swap the K-th block to position KS. +* Perform the reordering of diagonal blocks in (A, B) +* by orthogonal transformation matrices and update +* Q and Z accordingly (if requested): +* + KK = K + IF( K.NE.KS ) + $ CALL DTGEXC( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, + $ Z, LDZ, KK, KS, WORK, LWORK, IERR ) +* + IF( IERR.GT.0 ) THEN +* +* Swap is rejected: exit. +* + INFO = 1 + IF( WANTP ) THEN + PL = ZERO + PR = ZERO + END IF + IF( WANTD ) THEN + DIF( 1 ) = ZERO + DIF( 2 ) = ZERO + END IF + GO TO 60 + END IF +* + IF( PAIR ) + $ KS = KS + 1 + END IF + END IF + 30 CONTINUE + IF( WANTP ) THEN +* +* Solve generalized Sylvester equation for R and L +* and compute PL and PR. +* + N1 = M + N2 = N - M + I = N1 + 1 + IJB = 0 + CALL DLACPY( 'Full', N1, N2, A( 1, I ), LDA, WORK, N1 ) + CALL DLACPY( 'Full', N1, N2, B( 1, I ), LDB, WORK( N1*N2+1 ), + $ N1 ) + CALL DTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, WORK, + $ N1, B, LDB, B( I, I ), LDB, WORK( N1*N2+1 ), N1, + $ DSCALE, DIF( 1 ), WORK( N1*N2*2+1 ), + $ LWORK-2*N1*N2, IWORK, IERR ) +* +* Estimate the reciprocal of norms of "projections" onto left +* and right eigenspaces. +* + RDSCAL = ZERO + DSUM = ONE + CALL DLASSQ( N1*N2, WORK, 1, RDSCAL, DSUM ) + PL = RDSCAL*SQRT( DSUM ) + IF( PL.EQ.ZERO ) THEN + PL = ONE + ELSE + PL = DSCALE / ( SQRT( DSCALE*DSCALE / PL+PL )*SQRT( PL ) ) + END IF + RDSCAL = ZERO + DSUM = ONE + CALL DLASSQ( N1*N2, WORK( N1*N2+1 ), 1, RDSCAL, DSUM ) + PR = RDSCAL*SQRT( DSUM ) + IF( PR.EQ.ZERO ) THEN + PR = ONE + ELSE + PR = DSCALE / ( SQRT( DSCALE*DSCALE / PR+PR )*SQRT( PR ) ) + END IF + END IF +* + IF( WANTD ) THEN +* +* Compute estimates of Difu and Difl. +* + IF( WANTD1 ) THEN + N1 = M + N2 = N - M + I = N1 + 1 + IJB = IDIFJB +* +* Frobenius norm-based Difu-estimate. +* + CALL DTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, WORK, + $ N1, B, LDB, B( I, I ), LDB, WORK( N1*N2+1 ), + $ N1, DSCALE, DIF( 1 ), WORK( 2*N1*N2+1 ), + $ LWORK-2*N1*N2, IWORK, IERR ) +* +* Frobenius norm-based Difl-estimate. +* + CALL DTGSYL( 'N', IJB, N2, N1, A( I, I ), LDA, A, LDA, WORK, + $ N2, B( I, I ), LDB, B, LDB, WORK( N1*N2+1 ), + $ N2, DSCALE, DIF( 2 ), WORK( 2*N1*N2+1 ), + $ LWORK-2*N1*N2, IWORK, IERR ) + ELSE +* +* +* Compute 1-norm-based estimates of Difu and Difl using +* reversed communication with DLACON. In each step a +* generalized Sylvester equation or a transposed variant +* is solved. +* + KASE = 0 + N1 = M + N2 = N - M + I = N1 + 1 + IJB = 0 + MN2 = 2*N1*N2 +* +* 1-norm-based estimate of Difu. +* + 40 CONTINUE + CALL DLACON( MN2, WORK( MN2+1 ), WORK, IWORK, DIF( 1 ), + $ KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Solve generalized Sylvester equation. +* + CALL DTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, + $ WORK, N1, B, LDB, B( I, I ), LDB, + $ WORK( N1*N2+1 ), N1, DSCALE, DIF( 1 ), + $ WORK( 2*N1*N2+1 ), LWORK-2*N1*N2, IWORK, + $ IERR ) + ELSE +* +* Solve the transposed variant. +* + CALL DTGSYL( 'T', IJB, N1, N2, A, LDA, A( I, I ), LDA, + $ WORK, N1, B, LDB, B( I, I ), LDB, + $ WORK( N1*N2+1 ), N1, DSCALE, DIF( 1 ), + $ WORK( 2*N1*N2+1 ), LWORK-2*N1*N2, IWORK, + $ IERR ) + END IF + GO TO 40 + END IF + DIF( 1 ) = DSCALE / DIF( 1 ) +* +* 1-norm-based estimate of Difl. +* + 50 CONTINUE + CALL DLACON( MN2, WORK( MN2+1 ), WORK, IWORK, DIF( 2 ), + $ KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Solve generalized Sylvester equation. +* + CALL DTGSYL( 'N', IJB, N2, N1, A( I, I ), LDA, A, LDA, + $ WORK, N2, B( I, I ), LDB, B, LDB, + $ WORK( N1*N2+1 ), N2, DSCALE, DIF( 2 ), + $ WORK( 2*N1*N2+1 ), LWORK-2*N1*N2, IWORK, + $ IERR ) + ELSE +* +* Solve the transposed variant. +* + CALL DTGSYL( 'T', IJB, N2, N1, A( I, I ), LDA, A, LDA, + $ WORK, N2, B( I, I ), LDB, B, LDB, + $ WORK( N1*N2+1 ), N2, DSCALE, DIF( 2 ), + $ WORK( 2*N1*N2+1 ), LWORK-2*N1*N2, IWORK, + $ IERR ) + END IF + GO TO 50 + END IF + DIF( 2 ) = DSCALE / DIF( 2 ) +* + END IF + END IF +* + 60 CONTINUE +* +* Compute generalized eigenvalues of reordered pair (A, B) and +* normalize the generalized Schur form. +* + PAIR = .FALSE. + DO 80 K = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + ELSE +* + IF( K.LT.N ) THEN + IF( A( K+1, K ).NE.ZERO ) THEN + PAIR = .TRUE. + END IF + END IF +* + IF( PAIR ) THEN +* +* Compute the eigenvalue(s) at position K. +* + WORK( 1 ) = A( K, K ) + WORK( 2 ) = A( K+1, K ) + WORK( 3 ) = A( K, K+1 ) + WORK( 4 ) = A( K+1, K+1 ) + WORK( 5 ) = B( K, K ) + WORK( 6 ) = B( K+1, K ) + WORK( 7 ) = B( K, K+1 ) + WORK( 8 ) = B( K+1, K+1 ) + CALL DLAG2( WORK, 2, WORK( 5 ), 2, SMLNUM*EPS, BETA( K ), + $ BETA( K+1 ), ALPHAR( K ), ALPHAR( K+1 ), + $ ALPHAI( K ) ) + ALPHAI( K+1 ) = -ALPHAI( K ) +* + ELSE +* + IF( SIGN( ONE, B( K, K ) ).LT.ZERO ) THEN +* +* If B(K,K) is negative, make it positive +* + DO 70 I = 1, N + A( K, I ) = -A( K, I ) + B( K, I ) = -B( K, I ) + Q( I, K ) = -Q( I, K ) + 70 CONTINUE + END IF +* + ALPHAR( K ) = A( K, K ) + ALPHAI( K ) = ZERO + BETA( K ) = B( K, K ) +* + END IF + END IF + 80 CONTINUE +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of DTGSEN +* + END diff --git a/costa/native/external/lapack/dtgsja.f b/costa/native/external/lapack/dtgsja.f new file mode 100644 index 000000000..2518d7a6b --- /dev/null +++ b/costa/native/external/lapack/dtgsja.f @@ -0,0 +1,516 @@ + SUBROUTINE DTGSJA( JOBU, JOBV, JOBQ, M, P, N, K, L, A, LDA, B, + $ LDB, TOLA, TOLB, ALPHA, BETA, U, LDU, V, LDV, + $ Q, LDQ, WORK, NCYCLE, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBQ, JOBU, JOBV + INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, + $ NCYCLE, P + DOUBLE PRECISION TOLA, TOLB +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), Q( LDQ, * ), U( LDU, * ), + $ V( LDV, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DTGSJA computes the generalized singular value decomposition (GSVD) +* of two real upper triangular (or trapezoidal) matrices A and B. +* +* On entry, it is assumed that matrices A and B have the following +* forms, which may be obtained by the preprocessing subroutine DGGSVP +* from a general M-by-N matrix A and P-by-N matrix B: +* +* N-K-L K L +* A = K ( 0 A12 A13 ) if M-K-L >= 0; +* L ( 0 0 A23 ) +* M-K-L ( 0 0 0 ) +* +* N-K-L K L +* A = K ( 0 A12 A13 ) if M-K-L < 0; +* M-K ( 0 0 A23 ) +* +* N-K-L K L +* B = L ( 0 0 B13 ) +* P-L ( 0 0 0 ) +* +* where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular +* upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, +* otherwise A23 is (M-K)-by-L upper trapezoidal. +* +* On exit, +* +* U'*A*Q = D1*( 0 R ), V'*B*Q = D2*( 0 R ), +* +* where U, V and Q are orthogonal matrices, Z' denotes the transpose +* of Z, R is a nonsingular upper triangular matrix, and D1 and D2 are +* ``diagonal'' matrices, which are of the following structures: +* +* If M-K-L >= 0, +* +* K L +* D1 = K ( I 0 ) +* L ( 0 C ) +* M-K-L ( 0 0 ) +* +* K L +* D2 = L ( 0 S ) +* P-L ( 0 0 ) +* +* N-K-L K L +* ( 0 R ) = K ( 0 R11 R12 ) K +* L ( 0 0 R22 ) L +* +* where +* +* C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), +* S = diag( BETA(K+1), ... , BETA(K+L) ), +* C**2 + S**2 = I. +* +* R is stored in A(1:K+L,N-K-L+1:N) on exit. +* +* If M-K-L < 0, +* +* K M-K K+L-M +* D1 = K ( I 0 0 ) +* M-K ( 0 C 0 ) +* +* K M-K K+L-M +* D2 = M-K ( 0 S 0 ) +* K+L-M ( 0 0 I ) +* P-L ( 0 0 0 ) +* +* N-K-L K M-K K+L-M +* ( 0 R ) = K ( 0 R11 R12 R13 ) +* M-K ( 0 0 R22 R23 ) +* K+L-M ( 0 0 0 R33 ) +* +* where +* C = diag( ALPHA(K+1), ... , ALPHA(M) ), +* S = diag( BETA(K+1), ... , BETA(M) ), +* C**2 + S**2 = I. +* +* R = ( R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N) and R33 is stored +* ( 0 R22 R23 ) +* in B(M-K+1:L,N+M-K-L+1:N) on exit. +* +* The computation of the orthogonal transformation matrices U, V or Q +* is optional. These matrices may either be formed explicitly, or they +* may be postmultiplied into input matrices U1, V1, or Q1. +* +* Arguments +* ========= +* +* JOBU (input) CHARACTER*1 +* = 'U': U must contain an orthogonal matrix U1 on entry, and +* the product U1*U is returned; +* = 'I': U is initialized to the unit matrix, and the +* orthogonal matrix U is returned; +* = 'N': U is not computed. +* +* JOBV (input) CHARACTER*1 +* = 'V': V must contain an orthogonal matrix V1 on entry, and +* the product V1*V is returned; +* = 'I': V is initialized to the unit matrix, and the +* orthogonal matrix V is returned; +* = 'N': V is not computed. +* +* JOBQ (input) CHARACTER*1 +* = 'Q': Q must contain an orthogonal matrix Q1 on entry, and +* the product Q1*Q is returned; +* = 'I': Q is initialized to the unit matrix, and the +* orthogonal matrix Q is returned; +* = 'N': Q is not computed. +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* P (input) INTEGER +* The number of rows of the matrix B. P >= 0. +* +* N (input) INTEGER +* The number of columns of the matrices A and B. N >= 0. +* +* K (input) INTEGER +* L (input) INTEGER +* K and L specify the subblocks in the input matrices A and B: +* A23 = A(K+1:MIN(K+L,M),N-L+1:N) and B13 = B(1:L,N-L+1:N) +* of A and B, whose GSVD is going to be computed by DTGSJA. +* See Further details. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, A(N-K+1:N,1:MIN(K+L,M) ) contains the triangular +* matrix R or part of R. See Purpose for details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,N) +* On entry, the P-by-N matrix B. +* On exit, if necessary, B(M-K+1:L,N+M-K-L+1:N) contains +* a part of R. See Purpose for details. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,P). +* +* TOLA (input) DOUBLE PRECISION +* TOLB (input) DOUBLE PRECISION +* TOLA and TOLB are the convergence criteria for the Jacobi- +* Kogbetliantz iteration procedure. Generally, they are the +* same as used in the preprocessing step, say +* TOLA = max(M,N)*norm(A)*MAZHEPS, +* TOLB = max(P,N)*norm(B)*MAZHEPS. +* +* ALPHA (output) DOUBLE PRECISION array, dimension (N) +* BETA (output) DOUBLE PRECISION array, dimension (N) +* On exit, ALPHA and BETA contain the generalized singular +* value pairs of A and B; +* ALPHA(1:K) = 1, +* BETA(1:K) = 0, +* and if M-K-L >= 0, +* ALPHA(K+1:K+L) = diag(C), +* BETA(K+1:K+L) = diag(S), +* or if M-K-L < 0, +* ALPHA(K+1:M)= C, ALPHA(M+1:K+L)= 0 +* BETA(K+1:M) = S, BETA(M+1:K+L) = 1. +* Furthermore, if K+L < N, +* ALPHA(K+L+1:N) = 0 and +* BETA(K+L+1:N) = 0. +* +* U (input/output) DOUBLE PRECISION array, dimension (LDU,M) +* On entry, if JOBU = 'U', U must contain a matrix U1 (usually +* the orthogonal matrix returned by DGGSVP). +* On exit, +* if JOBU = 'I', U contains the orthogonal matrix U; +* if JOBU = 'U', U contains the product U1*U. +* If JOBU = 'N', U is not referenced. +* +* LDU (input) INTEGER +* The leading dimension of the array U. LDU >= max(1,M) if +* JOBU = 'U'; LDU >= 1 otherwise. +* +* V (input/output) DOUBLE PRECISION array, dimension (LDV,P) +* On entry, if JOBV = 'V', V must contain a matrix V1 (usually +* the orthogonal matrix returned by DGGSVP). +* On exit, +* if JOBV = 'I', V contains the orthogonal matrix V; +* if JOBV = 'V', V contains the product V1*V. +* If JOBV = 'N', V is not referenced. +* +* LDV (input) INTEGER +* The leading dimension of the array V. LDV >= max(1,P) if +* JOBV = 'V'; LDV >= 1 otherwise. +* +* Q (input/output) DOUBLE PRECISION array, dimension (LDQ,N) +* On entry, if JOBQ = 'Q', Q must contain a matrix Q1 (usually +* the orthogonal matrix returned by DGGSVP). +* On exit, +* if JOBQ = 'I', Q contains the orthogonal matrix Q; +* if JOBQ = 'Q', Q contains the product Q1*Q. +* If JOBQ = 'N', Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= max(1,N) if +* JOBQ = 'Q'; LDQ >= 1 otherwise. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) +* +* NCYCLE (output) INTEGER +* The number of cycles required for convergence. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* = 1: the procedure does not converge after MAXIT cycles. +* +* Internal Parameters +* =================== +* +* MAXIT INTEGER +* MAXIT specifies the total loops that the iterative procedure +* may take. If after MAXIT cycles, the routine fails to +* converge, we return INFO = 1. +* +* Further Details +* =============== +* +* DTGSJA essentially uses a variant of Kogbetliantz algorithm to reduce +* min(L,M-K)-by-L triangular (or trapezoidal) matrix A23 and L-by-L +* matrix B13 to the form: +* +* U1'*A13*Q1 = C1*R1; V1'*B13*Q1 = S1*R1, +* +* where U1, V1 and Q1 are orthogonal matrix, and Z' is the transpose +* of Z. C1 and S1 are diagonal matrices satisfying +* +* C1**2 + S1**2 = I, +* +* and R1 is an L-by-L nonsingular upper triangular matrix. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER MAXIT + PARAMETER ( MAXIT = 40 ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. +* + LOGICAL INITQ, INITU, INITV, UPPER, WANTQ, WANTU, WANTV + INTEGER I, J, KCYCLE + DOUBLE PRECISION A1, A2, A3, B1, B2, B3, CSQ, CSU, CSV, ERROR, + $ GAMMA, RWK, SNQ, SNU, SNV, SSMIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLAGS2, DLAPLL, DLARTG, DLASET, DROT, + $ DSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + INITU = LSAME( JOBU, 'I' ) + WANTU = INITU .OR. LSAME( JOBU, 'U' ) +* + INITV = LSAME( JOBV, 'I' ) + WANTV = INITV .OR. LSAME( JOBV, 'V' ) +* + INITQ = LSAME( JOBQ, 'I' ) + WANTQ = INITQ .OR. LSAME( JOBQ, 'Q' ) +* + INFO = 0 + IF( .NOT.( INITU .OR. WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( INITV .OR. WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( INITQ .OR. WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( P.LT.0 ) THEN + INFO = -5 + ELSE IF( N.LT.0 ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -12 + ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN + INFO = -18 + ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN + INFO = -20 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -22 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTGSJA', -INFO ) + RETURN + END IF +* +* Initialize U, V and Q, if necessary +* + IF( INITU ) + $ CALL DLASET( 'Full', M, M, ZERO, ONE, U, LDU ) + IF( INITV ) + $ CALL DLASET( 'Full', P, P, ZERO, ONE, V, LDV ) + IF( INITQ ) + $ CALL DLASET( 'Full', N, N, ZERO, ONE, Q, LDQ ) +* +* Loop until convergence +* + UPPER = .FALSE. + DO 40 KCYCLE = 1, MAXIT +* + UPPER = .NOT.UPPER +* + DO 20 I = 1, L - 1 + DO 10 J = I + 1, L +* + A1 = ZERO + A2 = ZERO + A3 = ZERO + IF( K+I.LE.M ) + $ A1 = A( K+I, N-L+I ) + IF( K+J.LE.M ) + $ A3 = A( K+J, N-L+J ) +* + B1 = B( I, N-L+I ) + B3 = B( J, N-L+J ) +* + IF( UPPER ) THEN + IF( K+I.LE.M ) + $ A2 = A( K+I, N-L+J ) + B2 = B( I, N-L+J ) + ELSE + IF( K+J.LE.M ) + $ A2 = A( K+J, N-L+I ) + B2 = B( J, N-L+I ) + END IF +* + CALL DLAGS2( UPPER, A1, A2, A3, B1, B2, B3, CSU, SNU, + $ CSV, SNV, CSQ, SNQ ) +* +* Update (K+I)-th and (K+J)-th rows of matrix A: U'*A +* + IF( K+J.LE.M ) + $ CALL DROT( L, A( K+J, N-L+1 ), LDA, A( K+I, N-L+1 ), + $ LDA, CSU, SNU ) +* +* Update I-th and J-th rows of matrix B: V'*B +* + CALL DROT( L, B( J, N-L+1 ), LDB, B( I, N-L+1 ), LDB, + $ CSV, SNV ) +* +* Update (N-L+I)-th and (N-L+J)-th columns of matrices +* A and B: A*Q and B*Q +* + CALL DROT( MIN( K+L, M ), A( 1, N-L+J ), 1, + $ A( 1, N-L+I ), 1, CSQ, SNQ ) +* + CALL DROT( L, B( 1, N-L+J ), 1, B( 1, N-L+I ), 1, CSQ, + $ SNQ ) +* + IF( UPPER ) THEN + IF( K+I.LE.M ) + $ A( K+I, N-L+J ) = ZERO + B( I, N-L+J ) = ZERO + ELSE + IF( K+J.LE.M ) + $ A( K+J, N-L+I ) = ZERO + B( J, N-L+I ) = ZERO + END IF +* +* Update orthogonal matrices U, V, Q, if desired. +* + IF( WANTU .AND. K+J.LE.M ) + $ CALL DROT( M, U( 1, K+J ), 1, U( 1, K+I ), 1, CSU, + $ SNU ) +* + IF( WANTV ) + $ CALL DROT( P, V( 1, J ), 1, V( 1, I ), 1, CSV, SNV ) +* + IF( WANTQ ) + $ CALL DROT( N, Q( 1, N-L+J ), 1, Q( 1, N-L+I ), 1, CSQ, + $ SNQ ) +* + 10 CONTINUE + 20 CONTINUE +* + IF( .NOT.UPPER ) THEN +* +* The matrices A13 and B13 were lower triangular at the start +* of the cycle, and are now upper triangular. +* +* Convergence test: test the parallelism of the corresponding +* rows of A and B. +* + ERROR = ZERO + DO 30 I = 1, MIN( L, M-K ) + CALL DCOPY( L-I+1, A( K+I, N-L+I ), LDA, WORK, 1 ) + CALL DCOPY( L-I+1, B( I, N-L+I ), LDB, WORK( L+1 ), 1 ) + CALL DLAPLL( L-I+1, WORK, 1, WORK( L+1 ), 1, SSMIN ) + ERROR = MAX( ERROR, SSMIN ) + 30 CONTINUE +* + IF( ABS( ERROR ).LE.MIN( TOLA, TOLB ) ) + $ GO TO 50 + END IF +* +* End of cycle loop +* + 40 CONTINUE +* +* The algorithm has not converged after MAXIT cycles. +* + INFO = 1 + GO TO 100 +* + 50 CONTINUE +* +* If ERROR <= MIN(TOLA,TOLB), then the algorithm has converged. +* Compute the generalized singular value pairs (ALPHA, BETA), and +* set the triangular matrix R to array A. +* + DO 60 I = 1, K + ALPHA( I ) = ONE + BETA( I ) = ZERO + 60 CONTINUE +* + DO 70 I = 1, MIN( L, M-K ) +* + A1 = A( K+I, N-L+I ) + B1 = B( I, N-L+I ) +* + IF( A1.NE.ZERO ) THEN + GAMMA = B1 / A1 +* +* change sign if necessary +* + IF( GAMMA.LT.ZERO ) THEN + CALL DSCAL( L-I+1, -ONE, B( I, N-L+I ), LDB ) + IF( WANTV ) + $ CALL DSCAL( P, -ONE, V( 1, I ), 1 ) + END IF +* + CALL DLARTG( ABS( GAMMA ), ONE, BETA( K+I ), ALPHA( K+I ), + $ RWK ) +* + IF( ALPHA( K+I ).GE.BETA( K+I ) ) THEN + CALL DSCAL( L-I+1, ONE / ALPHA( K+I ), A( K+I, N-L+I ), + $ LDA ) + ELSE + CALL DSCAL( L-I+1, ONE / BETA( K+I ), B( I, N-L+I ), + $ LDB ) + CALL DCOPY( L-I+1, B( I, N-L+I ), LDB, A( K+I, N-L+I ), + $ LDA ) + END IF +* + ELSE +* + ALPHA( K+I ) = ZERO + BETA( K+I ) = ONE + CALL DCOPY( L-I+1, B( I, N-L+I ), LDB, A( K+I, N-L+I ), + $ LDA ) +* + END IF +* + 70 CONTINUE +* +* Post-assignment +* + DO 80 I = M + 1, K + L + ALPHA( I ) = ZERO + BETA( I ) = ONE + 80 CONTINUE +* + IF( K+L.LT.N ) THEN + DO 90 I = K + L + 1, N + ALPHA( I ) = ZERO + BETA( I ) = ZERO + 90 CONTINUE + END IF +* + 100 CONTINUE + NCYCLE = KCYCLE + RETURN +* +* End of DTGSJA +* + END diff --git a/costa/native/external/lapack/dtgsna.f b/costa/native/external/lapack/dtgsna.f new file mode 100644 index 000000000..0b6abb01a --- /dev/null +++ b/costa/native/external/lapack/dtgsna.f @@ -0,0 +1,585 @@ + SUBROUTINE DTGSNA( JOB, HOWMNY, SELECT, N, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, S, DIF, MM, M, WORK, LWORK, + $ IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER HOWMNY, JOB + INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), DIF( * ), S( * ), + $ VL( LDVL, * ), VR( LDVR, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DTGSNA estimates reciprocal condition numbers for specified +* eigenvalues and/or eigenvectors of a matrix pair (A, B) in +* generalized real Schur canonical form (or of any matrix pair +* (Q*A*Z', Q*B*Z') with orthogonal matrices Q and Z, where +* Z' denotes the transpose of Z. +* +* (A, B) must be in generalized real Schur form (as returned by DGGES), +* i.e. A is block upper triangular with 1-by-1 and 2-by-2 diagonal +* blocks. B is upper triangular. +* +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* Specifies whether condition numbers are required for +* eigenvalues (S) or eigenvectors (DIF): +* = 'E': for eigenvalues only (S); +* = 'V': for eigenvectors only (DIF); +* = 'B': for both eigenvalues and eigenvectors (S and DIF). +* +* HOWMNY (input) CHARACTER*1 +* = 'A': compute condition numbers for all eigenpairs; +* = 'S': compute condition numbers for selected eigenpairs +* specified by the array SELECT. +* +* SELECT (input) LOGICAL array, dimension (N) +* If HOWMNY = 'S', SELECT specifies the eigenpairs for which +* condition numbers are required. To select condition numbers +* for the eigenpair corresponding to a real eigenvalue w(j), +* SELECT(j) must be set to .TRUE.. To select condition numbers +* corresponding to a complex conjugate pair of eigenvalues w(j) +* and w(j+1), either SELECT(j) or SELECT(j+1) or both, must be +* set to .TRUE.. +* If HOWMNY = 'A', SELECT is not referenced. +* +* N (input) INTEGER +* The order of the square matrix pair (A, B). N >= 0. +* +* A (input) DOUBLE PRECISION array, dimension (LDA,N) +* The upper quasi-triangular matrix A in the pair (A,B). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input) DOUBLE PRECISION array, dimension (LDB,N) +* The upper triangular matrix B in the pair (A,B). +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* VL (input) DOUBLE PRECISION array, dimension (LDVL,M) +* If JOB = 'E' or 'B', VL must contain left eigenvectors of +* (A, B), corresponding to the eigenpairs specified by HOWMNY +* and SELECT. The eigenvectors must be stored in consecutive +* columns of VL, as returned by DTGEVC. +* If JOB = 'V', VL is not referenced. +* +* LDVL (input) INTEGER +* The leading dimension of the array VL. LDVL >= 1. +* If JOB = 'E' or 'B', LDVL >= N. +* +* VR (input) DOUBLE PRECISION array, dimension (LDVR,M) +* If JOB = 'E' or 'B', VR must contain right eigenvectors of +* (A, B), corresponding to the eigenpairs specified by HOWMNY +* and SELECT. The eigenvectors must be stored in consecutive +* columns ov VR, as returned by DTGEVC. +* If JOB = 'V', VR is not referenced. +* +* LDVR (input) INTEGER +* The leading dimension of the array VR. LDVR >= 1. +* If JOB = 'E' or 'B', LDVR >= N. +* +* S (output) DOUBLE PRECISION array, dimension (MM) +* If JOB = 'E' or 'B', the reciprocal condition numbers of the +* selected eigenvalues, stored in consecutive elements of the +* array. For a complex conjugate pair of eigenvalues two +* consecutive elements of S are set to the same value. Thus +* S(j), DIF(j), and the j-th columns of VL and VR all +* correspond to the same eigenpair (but not in general the +* j-th eigenpair, unless all eigenpairs are selected). +* If JOB = 'V', S is not referenced. +* +* DIF (output) DOUBLE PRECISION array, dimension (MM) +* If JOB = 'V' or 'B', the estimated reciprocal condition +* numbers of the selected eigenvectors, stored in consecutive +* elements of the array. For a complex eigenvector two +* consecutive elements of DIF are set to the same value. If +* the eigenvalues cannot be reordered to compute DIF(j), DIF(j) +* is set to 0; this can only occur when the true value would be +* very small anyway. +* If JOB = 'E', DIF is not referenced. +* +* MM (input) INTEGER +* The number of elements in the arrays S and DIF. MM >= M. +* +* M (output) INTEGER +* The number of elements of the arrays S and DIF used to store +* the specified condition numbers; for each selected real +* eigenvalue one element is used, and for each selected complex +* conjugate pair of eigenvalues, two elements are used. +* If HOWMNY = 'A', M is set to N. +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* If JOB = 'E', WORK is not referenced. Otherwise, +* on exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= N. +* If JOB = 'V' or 'B' LWORK >= 2*N*(N+2)+16. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace) INTEGER array, dimension (N + 6) +* If JOB = 'E', IWORK is not referenced. +* +* INFO (output) INTEGER +* =0: Successful exit +* <0: If INFO = -i, the i-th argument had an illegal value +* +* +* Further Details +* =============== +* +* The reciprocal of the condition number of a generalized eigenvalue +* w = (a, b) is defined as +* +* S(w) = (|u'Av|**2 + |u'Bv|**2)**(1/2) / (norm(u)*norm(v)) +* +* where u and v are the left and right eigenvectors of (A, B) +* corresponding to w; |z| denotes the absolute value of the complex +* number, and norm(u) denotes the 2-norm of the vector u. +* The pair (a, b) corresponds to an eigenvalue w = a/b (= u'Av/u'Bv) +* of the matrix pair (A, B). If both a and b equal zero, then (A B) is +* singular and S(I) = -1 is returned. +* +* An approximate error bound on the chordal distance between the i-th +* computed generalized eigenvalue w and the corresponding exact +* eigenvalue lambda is +* +* chord(w, lambda) <= EPS * norm(A, B) / S(I) +* +* where EPS is the machine precision. +* +* The reciprocal of the condition number DIF(i) of right eigenvector u +* and left eigenvector v corresponding to the generalized eigenvalue w +* is defined as follows: +* +* a) If the i-th eigenvalue w = (a,b) is real +* +* Suppose U and V are orthogonal transformations such that +* +* U'*(A, B)*V = (S, T) = ( a * ) ( b * ) 1 +* ( 0 S22 ),( 0 T22 ) n-1 +* 1 n-1 1 n-1 +* +* Then the reciprocal condition number DIF(i) is +* +* Difl((a, b), (S22, T22)) = sigma-min( Zl ), +* +* where sigma-min(Zl) denotes the smallest singular value of the +* 2(n-1)-by-2(n-1) matrix +* +* Zl = [ kron(a, In-1) -kron(1, S22) ] +* [ kron(b, In-1) -kron(1, T22) ] . +* +* Here In-1 is the identity matrix of size n-1. kron(X, Y) is the +* Kronecker product between the matrices X and Y. +* +* Note that if the default method for computing DIF(i) is wanted +* (see DLATDF), then the parameter DIFDRI (see below) should be +* changed from 3 to 4 (routine DLATDF(IJOB = 2 will be used)). +* See DTGSYL for more details. +* +* b) If the i-th and (i+1)-th eigenvalues are complex conjugate pair, +* +* Suppose U and V are orthogonal transformations such that +* +* U'*(A, B)*V = (S, T) = ( S11 * ) ( T11 * ) 2 +* ( 0 S22 ),( 0 T22) n-2 +* 2 n-2 2 n-2 +* +* and (S11, T11) corresponds to the complex conjugate eigenvalue +* pair (w, conjg(w)). There exist unitary matrices U1 and V1 such +* that +* +* U1'*S11*V1 = ( s11 s12 ) and U1'*T11*V1 = ( t11 t12 ) +* ( 0 s22 ) ( 0 t22 ) +* +* where the generalized eigenvalues w = s11/t11 and +* conjg(w) = s22/t22. +* +* Then the reciprocal condition number DIF(i) is bounded by +* +* min( d1, max( 1, |real(s11)/real(s22)| )*d2 ) +* +* where, d1 = Difl((s11, t11), (s22, t22)) = sigma-min(Z1), where +* Z1 is the complex 2-by-2 matrix +* +* Z1 = [ s11 -s22 ] +* [ t11 -t22 ], +* +* This is done by computing (using real arithmetic) the +* roots of the characteristical polynomial det(Z1' * Z1 - lambda I), +* where Z1' denotes the conjugate transpose of Z1 and det(X) denotes +* the determinant of X. +* +* and d2 is an upper bound on Difl((S11, T11), (S22, T22)), i.e. an +* upper bound on sigma-min(Z2), where Z2 is (2n-2)-by-(2n-2) +* +* Z2 = [ kron(S11', In-2) -kron(I2, S22) ] +* [ kron(T11', In-2) -kron(I2, T22) ] +* +* Note that if the default method for computing DIF is wanted (see +* DLATDF), then the parameter DIFDRI (see below) should be changed +* from 3 to 4 (routine DLATDF(IJOB = 2 will be used)). See DTGSYL +* for more details. +* +* For each eigenvalue/vector specified by SELECT, DIF stores a +* Frobenius norm-based estimate of Difl. +* +* An approximate error bound for the i-th computed eigenvector VL(i) or +* VR(i) is given by +* +* EPS * norm(A, B) / DIF(i). +* +* See ref. [2-3] for more details and further references. +* +* Based on contributions by +* Bo Kagstrom and Peter Poromaa, Department of Computing Science, +* Umea University, S-901 87 Umea, Sweden. +* +* References +* ========== +* +* [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the +* Generalized Real Schur Form of a Regular Matrix Pair (A, B), in +* M.S. Moonen et al (eds), Linear Algebra for Large Scale and +* Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. +* +* [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified +* Eigenvalues of a Regular Matrix Pair (A, B) and Condition +* Estimation: Theory, Algorithms and Software, +* Report UMINF - 94.04, Department of Computing Science, Umea +* University, S-901 87 Umea, Sweden, 1994. Also as LAPACK Working +* Note 87. To appear in Numerical Algorithms, 1996. +* +* [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software +* for Solving the Generalized Sylvester Equation and Estimating the +* Separation between Regular Matrix Pairs, Report UMINF - 93.23, +* Department of Computing Science, Umea University, S-901 87 Umea, +* Sweden, December 1993, Revised April 1994, Also as LAPACK Working +* Note 75. To appear in ACM Trans. on Math. Software, Vol 22, +* No 1, 1996. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER DIFDRI + PARAMETER ( DIFDRI = 3 ) + DOUBLE PRECISION ZERO, ONE, TWO, FOUR + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0, + $ FOUR = 4.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, PAIR, SOMCON, WANTBH, WANTDF, WANTS + INTEGER I, IERR, IFST, ILST, IZ, K, KS, LWMIN, N1, N2 + DOUBLE PRECISION ALPHAI, ALPHAR, ALPRQT, BETA, C1, C2, COND, + $ EPS, LNRM, RNRM, ROOT1, ROOT2, SCALE, SMLNUM, + $ TMPII, TMPIR, TMPRI, TMPRR, UHAV, UHAVI, UHBV, + $ UHBVI +* .. +* .. Local Arrays .. + DOUBLE PRECISION DUMMY( 1 ), DUMMY1( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DDOT, DLAMCH, DLAPY2, DNRM2 + EXTERNAL LSAME, DDOT, DLAMCH, DLAPY2, DNRM2 +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, DLACPY, DLAG2, DTGEXC, DTGSYL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + WANTBH = LSAME( JOB, 'B' ) + WANTS = LSAME( JOB, 'E' ) .OR. WANTBH + WANTDF = LSAME( JOB, 'V' ) .OR. WANTBH +* + SOMCON = LSAME( HOWMNY, 'S' ) +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) +* + IF( LSAME( JOB, 'V' ) .OR. LSAME( JOB, 'B' ) ) THEN + LWMIN = MAX( 1, 2*N*( N+2 )+16 ) + ELSE + LWMIN = 1 + END IF +* + IF( .NOT.WANTS .AND. .NOT.WANTDF ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( HOWMNY, 'A' ) .AND. .NOT.SOMCON ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( WANTS .AND. LDVL.LT.N ) THEN + INFO = -10 + ELSE IF( WANTS .AND. LDVR.LT.N ) THEN + INFO = -12 + ELSE +* +* Set M to the number of eigenpairs for which condition numbers +* are required, and test MM. +* + IF( SOMCON ) THEN + M = 0 + PAIR = .FALSE. + DO 10 K = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + ELSE + IF( K.LT.N ) THEN + IF( A( K+1, K ).EQ.ZERO ) THEN + IF( SELECT( K ) ) + $ M = M + 1 + ELSE + PAIR = .TRUE. + IF( SELECT( K ) .OR. SELECT( K+1 ) ) + $ M = M + 2 + END IF + ELSE + IF( SELECT( N ) ) + $ M = M + 1 + END IF + END IF + 10 CONTINUE + ELSE + M = N + END IF +* + IF( MM.LT.M ) THEN + INFO = -15 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -18 +* ELSE IF( WANTDF .AND. LWORK.LT.2*N*( N+2 )+16 ) THEN +* INFO = -18 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTGSNA', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) / EPS + KS = 0 + PAIR = .FALSE. +* + DO 20 K = 1, N +* +* Determine whether A(k,k) begins a 1-by-1 or 2-by-2 block. +* + IF( PAIR ) THEN + PAIR = .FALSE. + GO TO 20 + ELSE + IF( K.LT.N ) + $ PAIR = A( K+1, K ).NE.ZERO + END IF +* +* Determine whether condition numbers are required for the k-th +* eigenpair. +* + IF( SOMCON ) THEN + IF( PAIR ) THEN + IF( .NOT.SELECT( K ) .AND. .NOT.SELECT( K+1 ) ) + $ GO TO 20 + ELSE + IF( .NOT.SELECT( K ) ) + $ GO TO 20 + END IF + END IF +* + KS = KS + 1 +* + IF( WANTS ) THEN +* +* Compute the reciprocal condition number of the k-th +* eigenvalue. +* + IF( PAIR ) THEN +* +* Complex eigenvalue pair. +* + RNRM = DLAPY2( DNRM2( N, VR( 1, KS ), 1 ), + $ DNRM2( N, VR( 1, KS+1 ), 1 ) ) + LNRM = DLAPY2( DNRM2( N, VL( 1, KS ), 1 ), + $ DNRM2( N, VL( 1, KS+1 ), 1 ) ) + CALL DGEMV( 'N', N, N, ONE, A, LDA, VR( 1, KS ), 1, ZERO, + $ WORK, 1 ) + TMPRR = DDOT( N, WORK, 1, VL( 1, KS ), 1 ) + TMPRI = DDOT( N, WORK, 1, VL( 1, KS+1 ), 1 ) + CALL DGEMV( 'N', N, N, ONE, A, LDA, VR( 1, KS+1 ), 1, + $ ZERO, WORK, 1 ) + TMPII = DDOT( N, WORK, 1, VL( 1, KS+1 ), 1 ) + TMPIR = DDOT( N, WORK, 1, VL( 1, KS ), 1 ) + UHAV = TMPRR + TMPII + UHAVI = TMPIR - TMPRI + CALL DGEMV( 'N', N, N, ONE, B, LDB, VR( 1, KS ), 1, ZERO, + $ WORK, 1 ) + TMPRR = DDOT( N, WORK, 1, VL( 1, KS ), 1 ) + TMPRI = DDOT( N, WORK, 1, VL( 1, KS+1 ), 1 ) + CALL DGEMV( 'N', N, N, ONE, B, LDB, VR( 1, KS+1 ), 1, + $ ZERO, WORK, 1 ) + TMPII = DDOT( N, WORK, 1, VL( 1, KS+1 ), 1 ) + TMPIR = DDOT( N, WORK, 1, VL( 1, KS ), 1 ) + UHBV = TMPRR + TMPII + UHBVI = TMPIR - TMPRI + UHAV = DLAPY2( UHAV, UHAVI ) + UHBV = DLAPY2( UHBV, UHBVI ) + COND = DLAPY2( UHAV, UHBV ) + S( KS ) = COND / ( RNRM*LNRM ) + S( KS+1 ) = S( KS ) +* + ELSE +* +* Real eigenvalue. +* + RNRM = DNRM2( N, VR( 1, KS ), 1 ) + LNRM = DNRM2( N, VL( 1, KS ), 1 ) + CALL DGEMV( 'N', N, N, ONE, A, LDA, VR( 1, KS ), 1, ZERO, + $ WORK, 1 ) + UHAV = DDOT( N, WORK, 1, VL( 1, KS ), 1 ) + CALL DGEMV( 'N', N, N, ONE, B, LDB, VR( 1, KS ), 1, ZERO, + $ WORK, 1 ) + UHBV = DDOT( N, WORK, 1, VL( 1, KS ), 1 ) + COND = DLAPY2( UHAV, UHBV ) + IF( COND.EQ.ZERO ) THEN + S( KS ) = -ONE + ELSE + S( KS ) = COND / ( RNRM*LNRM ) + END IF + END IF + END IF +* + IF( WANTDF ) THEN + IF( N.EQ.1 ) THEN + DIF( KS ) = DLAPY2( A( 1, 1 ), B( 1, 1 ) ) + GO TO 20 + END IF +* +* Estimate the reciprocal condition number of the k-th +* eigenvectors. + IF( PAIR ) THEN +* +* Copy the 2-by 2 pencil beginning at (A(k,k), B(k, k)). +* Compute the eigenvalue(s) at position K. +* + WORK( 1 ) = A( K, K ) + WORK( 2 ) = A( K+1, K ) + WORK( 3 ) = A( K, K+1 ) + WORK( 4 ) = A( K+1, K+1 ) + WORK( 5 ) = B( K, K ) + WORK( 6 ) = B( K+1, K ) + WORK( 7 ) = B( K, K+1 ) + WORK( 8 ) = B( K+1, K+1 ) + CALL DLAG2( WORK, 2, WORK( 5 ), 2, SMLNUM*EPS, BETA, + $ DUMMY1( 1 ), ALPHAR, DUMMY( 1 ), ALPHAI ) + ALPRQT = ONE + C1 = TWO*( ALPHAR*ALPHAR+ALPHAI*ALPHAI+BETA*BETA ) + C2 = FOUR*BETA*BETA*ALPHAI*ALPHAI + ROOT1 = C1 + SQRT( C1*C1-4.0D0*C2 ) + ROOT2 = C2 / ROOT1 + ROOT1 = ROOT1 / TWO + COND = MIN( SQRT( ROOT1 ), SQRT( ROOT2 ) ) + END IF +* +* Copy the matrix (A, B) to the array WORK and swap the +* diagonal block beginning at A(k,k) to the (1,1) position. +* + CALL DLACPY( 'Full', N, N, A, LDA, WORK, N ) + CALL DLACPY( 'Full', N, N, B, LDB, WORK( N*N+1 ), N ) + IFST = K + ILST = 1 +* + CALL DTGEXC( .FALSE., .FALSE., N, WORK, N, WORK( N*N+1 ), N, + $ DUMMY, 1, DUMMY1, 1, IFST, ILST, + $ WORK( N*N*2+1 ), LWORK-2*N*N, IERR ) +* + IF( IERR.GT.0 ) THEN +* +* Ill-conditioned problem - swap rejected. +* + DIF( KS ) = ZERO + ELSE +* +* Reordering successful, solve generalized Sylvester +* equation for R and L, +* A22 * R - L * A11 = A12 +* B22 * R - L * B11 = B12, +* and compute estimate of Difl((A11,B11), (A22, B22)). +* + N1 = 1 + IF( WORK( 2 ).NE.ZERO ) + $ N1 = 2 + N2 = N - N1 + IF( N2.EQ.0 ) THEN + DIF( KS ) = COND + ELSE + I = N*N + 1 + IZ = 2*N*N + 1 + CALL DTGSYL( 'N', DIFDRI, N2, N1, WORK( N*N1+N1+1 ), + $ N, WORK, N, WORK( N1+1 ), N, + $ WORK( N*N1+N1+I ), N, WORK( I ), N, + $ WORK( N1+I ), N, SCALE, DIF( KS ), + $ WORK( IZ+1 ), LWORK-2*N*N, IWORK, IERR ) +* + IF( PAIR ) + $ DIF( KS ) = MIN( MAX( ONE, ALPRQT )*DIF( KS ), + $ COND ) + END IF + END IF + IF( PAIR ) + $ DIF( KS+1 ) = DIF( KS ) + END IF + IF( PAIR ) + $ KS = KS + 1 +* + 20 CONTINUE + WORK( 1 ) = LWMIN + RETURN +* +* End of DTGSNA +* + END diff --git a/costa/native/external/lapack/dtgsy2.f b/costa/native/external/lapack/dtgsy2.f new file mode 100644 index 000000000..114e753af --- /dev/null +++ b/costa/native/external/lapack/dtgsy2.f @@ -0,0 +1,950 @@ + SUBROUTINE DTGSY2( TRANS, IJOB, M, N, A, LDA, B, LDB, C, LDC, D, + $ LDD, E, LDE, F, LDF, SCALE, RDSUM, RDSCAL, + $ IWORK, PQ, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER IJOB, INFO, LDA, LDB, LDC, LDD, LDE, LDF, M, N, + $ PQ + DOUBLE PRECISION RDSCAL, RDSUM, SCALE +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), C( LDC, * ), + $ D( LDD, * ), E( LDE, * ), F( LDF, * ) +* .. +* +* Purpose +* ======= +* +* DTGSY2 solves the generalized Sylvester equation: +* +* A * R - L * B = scale * C (1) +* D * R - L * E = scale * F, +* +* using Level 1 and 2 BLAS. where R and L are unknown M-by-N matrices, +* (A, D), (B, E) and (C, F) are given matrix pairs of size M-by-M, +* N-by-N and M-by-N, respectively, with real entries. (A, D) and (B, E) +* must be in generalized Schur canonical form, i.e. A, B are upper +* quasi triangular and D, E are upper triangular. The solution (R, L) +* overwrites (C, F). 0 <= SCALE <= 1 is an output scaling factor +* chosen to avoid overflow. +* +* In matrix notation solving equation (1) corresponds to solve +* Z*x = scale*b, where Z is defined as +* +* Z = [ kron(In, A) -kron(B', Im) ] (2) +* [ kron(In, D) -kron(E', Im) ], +* +* Ik is the identity matrix of size k and X' is the transpose of X. +* kron(X, Y) is the Kronecker product between the matrices X and Y. +* In the process of solving (1), we solve a number of such systems +* where Dim(In), Dim(In) = 1 or 2. +* +* If TRANS = 'T', solve the transposed system Z'*y = scale*b for y, +* which is equivalent to solve for R and L in +* +* A' * R + D' * L = scale * C (3) +* R * B' + L * E' = scale * -F +* +* This case is used to compute an estimate of Dif[(A, D), (B, E)] = +* sigma_min(Z) using reverse communicaton with DLACON. +* +* DTGSY2 also (IJOB >= 1) contributes to the computation in STGSYL +* of an upper bound on the separation between to matrix pairs. Then +* the input (A, D), (B, E) are sub-pencils of the matrix pair in +* DTGSYL. See STGSYL for details. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER +* = 'N', solve the generalized Sylvester equation (1). +* = 'T': solve the 'transposed' system (3). +* +* IJOB (input) INTEGER +* Specifies what kind of functionality to be performed. +* = 0: solve (1) only. +* = 1: A contribution from this subsystem to a Frobenius +* norm-based estimate of the separation between two matrix +* pairs is computed. (look ahead strategy is used). +* = 2: A contribution from this subsystem to a Frobenius +* norm-based estimate of the separation between two matrix +* pairs is computed. (DGECON on sub-systems is used.) +* Not referenced if TRANS = 'T'. +* +* M (input) INTEGER +* On entry, M specifies the order of A and D, and the row +* dimension of C, F, R and L. +* +* N (input) INTEGER +* On entry, N specifies the order of B and E, and the column +* dimension of C, F, R and L. +* +* A (input) DOUBLE PRECISION array, dimension (LDA, M) +* On entry, A contains an upper quasi triangular matrix. +* +* LDA (input) INTEGER +* The leading dimension of the matrix A. LDA >= max(1, M). +* +* B (input) DOUBLE PRECISION array, dimension (LDB, N) +* On entry, B contains an upper quasi triangular matrix. +* +* LDB (input) INTEGER +* The leading dimension of the matrix B. LDB >= max(1, N). +* +* C (input/ output) DOUBLE PRECISION array, dimension (LDC, N) +* On entry, C contains the right-hand-side of the first matrix +* equation in (1). +* On exit, if IJOB = 0, C has been overwritten by the +* solution R. +* +* LDC (input) INTEGER +* The leading dimension of the matrix C. LDC >= max(1, M). +* +* D (input) DOUBLE PRECISION array, dimension (LDD, M) +* On entry, D contains an upper triangular matrix. +* +* LDD (input) INTEGER +* The leading dimension of the matrix D. LDD >= max(1, M). +* +* E (input) DOUBLE PRECISION array, dimension (LDE, N) +* On entry, E contains an upper triangular matrix. +* +* LDE (input) INTEGER +* The leading dimension of the matrix E. LDE >= max(1, N). +* +* F (input/ output) DOUBLE PRECISION array, dimension (LDF, N) +* On entry, F contains the right-hand-side of the second matrix +* equation in (1). +* On exit, if IJOB = 0, F has been overwritten by the +* solution L. +* +* LDF (input) INTEGER +* The leading dimension of the matrix F. LDF >= max(1, M). +* +* SCALE (output) DOUBLE PRECISION +* On exit, 0 <= SCALE <= 1. If 0 < SCALE < 1, the solutions +* R and L (C and F on entry) will hold the solutions to a +* slightly perturbed system but the input matrices A, B, D and +* E have not been changed. If SCALE = 0, R and L will hold the +* solutions to the homogeneous system with C = F = 0. Normally, +* SCALE = 1. +* +* RDSUM (input/output) DOUBLE PRECISION +* On entry, the sum of squares of computed contributions to +* the Dif-estimate under computation by DTGSYL, where the +* scaling factor RDSCAL (see below) has been factored out. +* On exit, the corresponding sum of squares updated with the +* contributions from the current sub-system. +* If TRANS = 'T' RDSUM is not touched. +* NOTE: RDSUM only makes sense when DTGSY2 is called by STGSYL. +* +* RDSCAL (input/output) DOUBLE PRECISION +* On entry, scaling factor used to prevent overflow in RDSUM. +* On exit, RDSCAL is updated w.r.t. the current contributions +* in RDSUM. +* If TRANS = 'T', RDSCAL is not touched. +* NOTE: RDSCAL only makes sense when DTGSY2 is called by +* DTGSYL. +* +* IWORK (workspace) INTEGER array, dimension (M+N+2) +* +* PQ (output) INTEGER +* On exit, the number of subsystems (of size 2-by-2, 4-by-4 and +* 8-by-8) solved by this routine. +* +* INFO (output) INTEGER +* On exit, if INFO is set to +* =0: Successful exit +* <0: If INFO = -i, the i-th argument had an illegal value. +* >0: The matrix pairs (A, D) and (B, E) have common or very +* close eigenvalues. +* +* Further Details +* =============== +* +* Based on contributions by +* Bo Kagstrom and Peter Poromaa, Department of Computing Science, +* Umea University, S-901 87 Umea, Sweden. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER LDZ + PARAMETER ( LDZ = 8 ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN + INTEGER I, IE, IERR, II, IS, ISP1, J, JE, JJ, JS, JSP1, + $ K, MB, NB, P, Q, ZDIM + DOUBLE PRECISION ALPHA, SCALOC +* .. +* .. Local Arrays .. + INTEGER IPIV( LDZ ), JPIV( LDZ ) + DOUBLE PRECISION RHS( LDZ ), Z( LDZ, LDZ ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DGEMM, DGEMV, DGER, DGESC2, + $ DGETC2, DLATDF, DSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Decode and test input parameters +* + INFO = 0 + IERR = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -1 + ELSE IF( ( IJOB.LT.0 ) .OR. ( IJOB.GT.2 ) ) THEN + INFO = -2 + ELSE IF( M.LE.0 ) THEN + INFO = -3 + ELSE IF( N.LE.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LDD.LT.MAX( 1, M ) ) THEN + INFO = -12 + ELSE IF( LDE.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDF.LT.MAX( 1, M ) ) THEN + INFO = -16 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTGSY2', -INFO ) + RETURN + END IF +* +* Determine block structure of A +* + PQ = 0 + P = 0 + I = 1 + 10 CONTINUE + IF( I.GT.M ) + $ GO TO 20 + P = P + 1 + IWORK( P ) = I + IF( I.EQ.M ) + $ GO TO 20 + IF( A( I+1, I ).NE.ZERO ) THEN + I = I + 2 + ELSE + I = I + 1 + END IF + GO TO 10 + 20 CONTINUE + IWORK( P+1 ) = M + 1 +* +* Determine block structure of B +* + Q = P + 1 + J = 1 + 30 CONTINUE + IF( J.GT.N ) + $ GO TO 40 + Q = Q + 1 + IWORK( Q ) = J + IF( J.EQ.N ) + $ GO TO 40 + IF( B( J+1, J ).NE.ZERO ) THEN + J = J + 2 + ELSE + J = J + 1 + END IF + GO TO 30 + 40 CONTINUE + IWORK( Q+1 ) = N + 1 + PQ = P*( Q-P-1 ) +* + IF( NOTRAN ) THEN +* +* Solve (I, J) - subsystem +* A(I, I) * R(I, J) - L(I, J) * B(J, J) = C(I, J) +* D(I, I) * R(I, J) - L(I, J) * E(J, J) = F(I, J) +* for I = P, P - 1, ..., 1; J = 1, 2, ..., Q +* + SCALE = ONE + SCALOC = ONE + DO 120 J = P + 2, Q + JS = IWORK( J ) + JSP1 = JS + 1 + JE = IWORK( J+1 ) - 1 + NB = JE - JS + 1 + DO 110 I = P, 1, -1 +* + IS = IWORK( I ) + ISP1 = IS + 1 + IE = IWORK( I+1 ) - 1 + MB = IE - IS + 1 + ZDIM = MB*NB*2 +* + IF( ( MB.EQ.1 ) .AND. ( NB.EQ.1 ) ) THEN +* +* Build a 2-by-2 system Z * x = RHS +* + Z( 1, 1 ) = A( IS, IS ) + Z( 2, 1 ) = D( IS, IS ) + Z( 1, 2 ) = -B( JS, JS ) + Z( 2, 2 ) = -E( JS, JS ) +* +* Set up right hand side(s) +* + RHS( 1 ) = C( IS, JS ) + RHS( 2 ) = F( IS, JS ) +* +* Solve Z * x = RHS +* + CALL DGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR ) + IF( IERR.GT.0 ) + $ INFO = IERR +* + IF( IJOB.EQ.0 ) THEN + CALL DGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV, + $ SCALOC ) + IF( SCALOC.NE.ONE ) THEN + DO 50 K = 1, N + CALL DSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL DSCAL( M, SCALOC, F( 1, K ), 1 ) + 50 CONTINUE + SCALE = SCALE*SCALOC + END IF + ELSE + CALL DLATDF( IJOB, ZDIM, Z, LDZ, RHS, RDSUM, + $ RDSCAL, IPIV, JPIV ) + END IF +* +* Unpack solution vector(s) +* + C( IS, JS ) = RHS( 1 ) + F( IS, JS ) = RHS( 2 ) +* +* Substitute R(I, J) and L(I, J) into remaining +* equation. +* + IF( I.GT.1 ) THEN + ALPHA = -RHS( 1 ) + CALL DAXPY( IS-1, ALPHA, A( 1, IS ), 1, C( 1, JS ), + $ 1 ) + CALL DAXPY( IS-1, ALPHA, D( 1, IS ), 1, F( 1, JS ), + $ 1 ) + END IF + IF( J.LT.Q ) THEN + CALL DAXPY( N-JE, RHS( 2 ), B( JS, JE+1 ), LDB, + $ C( IS, JE+1 ), LDC ) + CALL DAXPY( N-JE, RHS( 2 ), E( JS, JE+1 ), LDE, + $ F( IS, JE+1 ), LDF ) + END IF +* + ELSE IF( ( MB.EQ.1 ) .AND. ( NB.EQ.2 ) ) THEN +* +* Build a 4-by-4 system Z * x = RHS +* + Z( 1, 1 ) = A( IS, IS ) + Z( 2, 1 ) = ZERO + Z( 3, 1 ) = D( IS, IS ) + Z( 4, 1 ) = ZERO +* + Z( 1, 2 ) = ZERO + Z( 2, 2 ) = A( IS, IS ) + Z( 3, 2 ) = ZERO + Z( 4, 2 ) = D( IS, IS ) +* + Z( 1, 3 ) = -B( JS, JS ) + Z( 2, 3 ) = -B( JS, JSP1 ) + Z( 3, 3 ) = -E( JS, JS ) + Z( 4, 3 ) = -E( JS, JSP1 ) +* + Z( 1, 4 ) = -B( JSP1, JS ) + Z( 2, 4 ) = -B( JSP1, JSP1 ) + Z( 3, 4 ) = ZERO + Z( 4, 4 ) = -E( JSP1, JSP1 ) +* +* Set up right hand side(s) +* + RHS( 1 ) = C( IS, JS ) + RHS( 2 ) = C( IS, JSP1 ) + RHS( 3 ) = F( IS, JS ) + RHS( 4 ) = F( IS, JSP1 ) +* +* Solve Z * x = RHS +* + CALL DGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR ) + IF( IERR.GT.0 ) + $ INFO = IERR +* + IF( IJOB.EQ.0 ) THEN + CALL DGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV, + $ SCALOC ) + IF( SCALOC.NE.ONE ) THEN + DO 60 K = 1, N + CALL DSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL DSCAL( M, SCALOC, F( 1, K ), 1 ) + 60 CONTINUE + SCALE = SCALE*SCALOC + END IF + ELSE + CALL DLATDF( IJOB, ZDIM, Z, LDZ, RHS, RDSUM, + $ RDSCAL, IPIV, JPIV ) + END IF +* +* Unpack solution vector(s) +* + C( IS, JS ) = RHS( 1 ) + C( IS, JSP1 ) = RHS( 2 ) + F( IS, JS ) = RHS( 3 ) + F( IS, JSP1 ) = RHS( 4 ) +* +* Substitute R(I, J) and L(I, J) into remaining +* equation. +* + IF( I.GT.1 ) THEN + CALL DGER( IS-1, NB, -ONE, A( 1, IS ), 1, RHS( 1 ), + $ 1, C( 1, JS ), LDC ) + CALL DGER( IS-1, NB, -ONE, D( 1, IS ), 1, RHS( 1 ), + $ 1, F( 1, JS ), LDF ) + END IF + IF( J.LT.Q ) THEN + CALL DAXPY( N-JE, RHS( 3 ), B( JS, JE+1 ), LDB, + $ C( IS, JE+1 ), LDC ) + CALL DAXPY( N-JE, RHS( 3 ), E( JS, JE+1 ), LDE, + $ F( IS, JE+1 ), LDF ) + CALL DAXPY( N-JE, RHS( 4 ), B( JSP1, JE+1 ), LDB, + $ C( IS, JE+1 ), LDC ) + CALL DAXPY( N-JE, RHS( 4 ), E( JSP1, JE+1 ), LDE, + $ F( IS, JE+1 ), LDF ) + END IF +* + ELSE IF( ( MB.EQ.2 ) .AND. ( NB.EQ.1 ) ) THEN +* +* Build a 4-by-4 system Z * x = RHS +* + Z( 1, 1 ) = A( IS, IS ) + Z( 2, 1 ) = A( ISP1, IS ) + Z( 3, 1 ) = D( IS, IS ) + Z( 4, 1 ) = ZERO +* + Z( 1, 2 ) = A( IS, ISP1 ) + Z( 2, 2 ) = A( ISP1, ISP1 ) + Z( 3, 2 ) = D( IS, ISP1 ) + Z( 4, 2 ) = D( ISP1, ISP1 ) +* + Z( 1, 3 ) = -B( JS, JS ) + Z( 2, 3 ) = ZERO + Z( 3, 3 ) = -E( JS, JS ) + Z( 4, 3 ) = ZERO +* + Z( 1, 4 ) = ZERO + Z( 2, 4 ) = -B( JS, JS ) + Z( 3, 4 ) = ZERO + Z( 4, 4 ) = -E( JS, JS ) +* +* Set up right hand side(s) +* + RHS( 1 ) = C( IS, JS ) + RHS( 2 ) = C( ISP1, JS ) + RHS( 3 ) = F( IS, JS ) + RHS( 4 ) = F( ISP1, JS ) +* +* Solve Z * x = RHS +* + CALL DGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR ) + IF( IERR.GT.0 ) + $ INFO = IERR + IF( IJOB.EQ.0 ) THEN + CALL DGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV, + $ SCALOC ) + IF( SCALOC.NE.ONE ) THEN + DO 70 K = 1, N + CALL DSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL DSCAL( M, SCALOC, F( 1, K ), 1 ) + 70 CONTINUE + SCALE = SCALE*SCALOC + END IF + ELSE + CALL DLATDF( IJOB, ZDIM, Z, LDZ, RHS, RDSUM, + $ RDSCAL, IPIV, JPIV ) + END IF +* +* Unpack solution vector(s) +* + C( IS, JS ) = RHS( 1 ) + C( ISP1, JS ) = RHS( 2 ) + F( IS, JS ) = RHS( 3 ) + F( ISP1, JS ) = RHS( 4 ) +* +* Substitute R(I, J) and L(I, J) into remaining +* equation. +* + IF( I.GT.1 ) THEN + CALL DGEMV( 'N', IS-1, MB, -ONE, A( 1, IS ), LDA, + $ RHS( 1 ), 1, ONE, C( 1, JS ), 1 ) + CALL DGEMV( 'N', IS-1, MB, -ONE, D( 1, IS ), LDD, + $ RHS( 1 ), 1, ONE, F( 1, JS ), 1 ) + END IF + IF( J.LT.Q ) THEN + CALL DGER( MB, N-JE, ONE, RHS( 3 ), 1, + $ B( JS, JE+1 ), LDB, C( IS, JE+1 ), LDC ) + CALL DGER( MB, N-JE, ONE, RHS( 3 ), 1, + $ E( JS, JE+1 ), LDB, F( IS, JE+1 ), LDC ) + END IF +* + ELSE IF( ( MB.EQ.2 ) .AND. ( NB.EQ.2 ) ) THEN +* +* Build an 8-by-8 system Z * x = RHS +* + CALL DCOPY( LDZ*LDZ, ZERO, 0, Z, 1 ) +* + Z( 1, 1 ) = A( IS, IS ) + Z( 2, 1 ) = A( ISP1, IS ) + Z( 5, 1 ) = D( IS, IS ) +* + Z( 1, 2 ) = A( IS, ISP1 ) + Z( 2, 2 ) = A( ISP1, ISP1 ) + Z( 5, 2 ) = D( IS, ISP1 ) + Z( 6, 2 ) = D( ISP1, ISP1 ) +* + Z( 3, 3 ) = A( IS, IS ) + Z( 4, 3 ) = A( ISP1, IS ) + Z( 7, 3 ) = D( IS, IS ) +* + Z( 3, 4 ) = A( IS, ISP1 ) + Z( 4, 4 ) = A( ISP1, ISP1 ) + Z( 7, 4 ) = D( IS, ISP1 ) + Z( 8, 4 ) = D( ISP1, ISP1 ) +* + Z( 1, 5 ) = -B( JS, JS ) + Z( 3, 5 ) = -B( JS, JSP1 ) + Z( 5, 5 ) = -E( JS, JS ) + Z( 7, 5 ) = -E( JS, JSP1 ) +* + Z( 2, 6 ) = -B( JS, JS ) + Z( 4, 6 ) = -B( JS, JSP1 ) + Z( 6, 6 ) = -E( JS, JS ) + Z( 8, 6 ) = -E( JS, JSP1 ) +* + Z( 1, 7 ) = -B( JSP1, JS ) + Z( 3, 7 ) = -B( JSP1, JSP1 ) + Z( 7, 7 ) = -E( JSP1, JSP1 ) +* + Z( 2, 8 ) = -B( JSP1, JS ) + Z( 4, 8 ) = -B( JSP1, JSP1 ) + Z( 8, 8 ) = -E( JSP1, JSP1 ) +* +* Set up right hand side(s) +* + K = 1 + II = MB*NB + 1 + DO 80 JJ = 0, NB - 1 + CALL DCOPY( MB, C( IS, JS+JJ ), 1, RHS( K ), 1 ) + CALL DCOPY( MB, F( IS, JS+JJ ), 1, RHS( II ), 1 ) + K = K + MB + II = II + MB + 80 CONTINUE +* +* Solve Z * x = RHS +* + CALL DGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR ) + IF( IERR.GT.0 ) + $ INFO = IERR + IF( IJOB.EQ.0 ) THEN + CALL DGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV, + $ SCALOC ) + IF( SCALOC.NE.ONE ) THEN + DO 90 K = 1, N + CALL DSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL DSCAL( M, SCALOC, F( 1, K ), 1 ) + 90 CONTINUE + SCALE = SCALE*SCALOC + END IF + ELSE + CALL DLATDF( IJOB, ZDIM, Z, LDZ, RHS, RDSUM, + $ RDSCAL, IPIV, JPIV ) + END IF +* +* Unpack solution vector(s) +* + K = 1 + II = MB*NB + 1 + DO 100 JJ = 0, NB - 1 + CALL DCOPY( MB, RHS( K ), 1, C( IS, JS+JJ ), 1 ) + CALL DCOPY( MB, RHS( II ), 1, F( IS, JS+JJ ), 1 ) + K = K + MB + II = II + MB + 100 CONTINUE +* +* Substitute R(I, J) and L(I, J) into remaining +* equation. +* + IF( I.GT.1 ) THEN + CALL DGEMM( 'N', 'N', IS-1, NB, MB, -ONE, + $ A( 1, IS ), LDA, RHS( 1 ), MB, ONE, + $ C( 1, JS ), LDC ) + CALL DGEMM( 'N', 'N', IS-1, NB, MB, -ONE, + $ D( 1, IS ), LDD, RHS( 1 ), MB, ONE, + $ F( 1, JS ), LDF ) + END IF + IF( J.LT.Q ) THEN + K = MB*NB + 1 + CALL DGEMM( 'N', 'N', MB, N-JE, NB, ONE, RHS( K ), + $ MB, B( JS, JE+1 ), LDB, ONE, + $ C( IS, JE+1 ), LDC ) + CALL DGEMM( 'N', 'N', MB, N-JE, NB, ONE, RHS( K ), + $ MB, E( JS, JE+1 ), LDE, ONE, + $ F( IS, JE+1 ), LDF ) + END IF +* + END IF +* + 110 CONTINUE + 120 CONTINUE + ELSE +* +* Solve (I, J) - subsystem +* A(I, I)' * R(I, J) + D(I, I)' * L(J, J) = C(I, J) +* R(I, I) * B(J, J) + L(I, J) * E(J, J) = -F(I, J) +* for I = 1, 2, ..., P, J = Q, Q - 1, ..., 1 +* + SCALE = ONE + SCALOC = ONE + DO 200 I = 1, P +* + IS = IWORK( I ) + ISP1 = IS + 1 + IE = IWORK( I+1 ) - 1 + MB = IE - IS + 1 + DO 190 J = Q, P + 2, -1 +* + JS = IWORK( J ) + JSP1 = JS + 1 + JE = IWORK( J+1 ) - 1 + NB = JE - JS + 1 + ZDIM = MB*NB*2 + IF( ( MB.EQ.1 ) .AND. ( NB.EQ.1 ) ) THEN +* +* Build a 2-by-2 system Z' * x = RHS +* + Z( 1, 1 ) = A( IS, IS ) + Z( 2, 1 ) = -B( JS, JS ) + Z( 1, 2 ) = D( IS, IS ) + Z( 2, 2 ) = -E( JS, JS ) +* +* Set up right hand side(s) +* + RHS( 1 ) = C( IS, JS ) + RHS( 2 ) = F( IS, JS ) +* +* Solve Z' * x = RHS +* + CALL DGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR ) + IF( IERR.GT.0 ) + $ INFO = IERR +* + CALL DGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV, SCALOC ) + IF( SCALOC.NE.ONE ) THEN + DO 130 K = 1, N + CALL DSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL DSCAL( M, SCALOC, F( 1, K ), 1 ) + 130 CONTINUE + SCALE = SCALE*SCALOC + END IF +* +* Unpack solution vector(s) +* + C( IS, JS ) = RHS( 1 ) + F( IS, JS ) = RHS( 2 ) +* +* Substitute R(I, J) and L(I, J) into remaining +* equation. +* + IF( J.GT.P+2 ) THEN + ALPHA = RHS( 1 ) + CALL DAXPY( JS-1, ALPHA, B( 1, JS ), 1, F( IS, 1 ), + $ LDF ) + ALPHA = RHS( 2 ) + CALL DAXPY( JS-1, ALPHA, E( 1, JS ), 1, F( IS, 1 ), + $ LDF ) + END IF + IF( I.LT.P ) THEN + ALPHA = -RHS( 1 ) + CALL DAXPY( M-IE, ALPHA, A( IS, IE+1 ), LDA, + $ C( IE+1, JS ), 1 ) + ALPHA = -RHS( 2 ) + CALL DAXPY( M-IE, ALPHA, D( IS, IE+1 ), LDD, + $ C( IE+1, JS ), 1 ) + END IF +* + ELSE IF( ( MB.EQ.1 ) .AND. ( NB.EQ.2 ) ) THEN +* +* Build a 4-by-4 system Z' * x = RHS +* + Z( 1, 1 ) = A( IS, IS ) + Z( 2, 1 ) = ZERO + Z( 3, 1 ) = -B( JS, JS ) + Z( 4, 1 ) = -B( JSP1, JS ) +* + Z( 1, 2 ) = ZERO + Z( 2, 2 ) = A( IS, IS ) + Z( 3, 2 ) = -B( JS, JSP1 ) + Z( 4, 2 ) = -B( JSP1, JSP1 ) +* + Z( 1, 3 ) = D( IS, IS ) + Z( 2, 3 ) = ZERO + Z( 3, 3 ) = -E( JS, JS ) + Z( 4, 3 ) = ZERO +* + Z( 1, 4 ) = ZERO + Z( 2, 4 ) = D( IS, IS ) + Z( 3, 4 ) = -E( JS, JSP1 ) + Z( 4, 4 ) = -E( JSP1, JSP1 ) +* +* Set up right hand side(s) +* + RHS( 1 ) = C( IS, JS ) + RHS( 2 ) = C( IS, JSP1 ) + RHS( 3 ) = F( IS, JS ) + RHS( 4 ) = F( IS, JSP1 ) +* +* Solve Z' * x = RHS +* + CALL DGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR ) + IF( IERR.GT.0 ) + $ INFO = IERR + CALL DGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV, SCALOC ) + IF( SCALOC.NE.ONE ) THEN + DO 140 K = 1, N + CALL DSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL DSCAL( M, SCALOC, F( 1, K ), 1 ) + 140 CONTINUE + SCALE = SCALE*SCALOC + END IF +* +* Unpack solution vector(s) +* + C( IS, JS ) = RHS( 1 ) + C( IS, JSP1 ) = RHS( 2 ) + F( IS, JS ) = RHS( 3 ) + F( IS, JSP1 ) = RHS( 4 ) +* +* Substitute R(I, J) and L(I, J) into remaining +* equation. +* + IF( J.GT.P+2 ) THEN + CALL DAXPY( JS-1, RHS( 1 ), B( 1, JS ), 1, + $ F( IS, 1 ), LDF ) + CALL DAXPY( JS-1, RHS( 2 ), B( 1, JSP1 ), 1, + $ F( IS, 1 ), LDF ) + CALL DAXPY( JS-1, RHS( 3 ), E( 1, JS ), 1, + $ F( IS, 1 ), LDF ) + CALL DAXPY( JS-1, RHS( 4 ), E( 1, JSP1 ), 1, + $ F( IS, 1 ), LDF ) + END IF + IF( I.LT.P ) THEN + CALL DGER( M-IE, NB, -ONE, A( IS, IE+1 ), LDA, + $ RHS( 1 ), 1, C( IE+1, JS ), LDC ) + CALL DGER( M-IE, NB, -ONE, D( IS, IE+1 ), LDD, + $ RHS( 3 ), 1, C( IE+1, JS ), LDC ) + END IF +* + ELSE IF( ( MB.EQ.2 ) .AND. ( NB.EQ.1 ) ) THEN +* +* Build a 4-by-4 system Z' * x = RHS +* + Z( 1, 1 ) = A( IS, IS ) + Z( 2, 1 ) = A( IS, ISP1 ) + Z( 3, 1 ) = -B( JS, JS ) + Z( 4, 1 ) = ZERO +* + Z( 1, 2 ) = A( ISP1, IS ) + Z( 2, 2 ) = A( ISP1, ISP1 ) + Z( 3, 2 ) = ZERO + Z( 4, 2 ) = -B( JS, JS ) +* + Z( 1, 3 ) = D( IS, IS ) + Z( 2, 3 ) = D( IS, ISP1 ) + Z( 3, 3 ) = -E( JS, JS ) + Z( 4, 3 ) = ZERO +* + Z( 1, 4 ) = ZERO + Z( 2, 4 ) = D( ISP1, ISP1 ) + Z( 3, 4 ) = ZERO + Z( 4, 4 ) = -E( JS, JS ) +* +* Set up right hand side(s) +* + RHS( 1 ) = C( IS, JS ) + RHS( 2 ) = C( ISP1, JS ) + RHS( 3 ) = F( IS, JS ) + RHS( 4 ) = F( ISP1, JS ) +* +* Solve Z' * x = RHS +* + CALL DGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR ) + IF( IERR.GT.0 ) + $ INFO = IERR +* + CALL DGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV, SCALOC ) + IF( SCALOC.NE.ONE ) THEN + DO 150 K = 1, N + CALL DSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL DSCAL( M, SCALOC, F( 1, K ), 1 ) + 150 CONTINUE + SCALE = SCALE*SCALOC + END IF +* +* Unpack solution vector(s) +* + C( IS, JS ) = RHS( 1 ) + C( ISP1, JS ) = RHS( 2 ) + F( IS, JS ) = RHS( 3 ) + F( ISP1, JS ) = RHS( 4 ) +* +* Substitute R(I, J) and L(I, J) into remaining +* equation. +* + IF( J.GT.P+2 ) THEN + CALL DGER( MB, JS-1, ONE, RHS( 1 ), 1, B( 1, JS ), + $ 1, F( IS, 1 ), LDF ) + CALL DGER( MB, JS-1, ONE, RHS( 3 ), 1, E( 1, JS ), + $ 1, F( IS, 1 ), LDF ) + END IF + IF( I.LT.P ) THEN + CALL DGEMV( 'T', MB, M-IE, -ONE, A( IS, IE+1 ), + $ LDA, RHS( 1 ), 1, ONE, C( IE+1, JS ), + $ 1 ) + CALL DGEMV( 'T', MB, M-IE, -ONE, D( IS, IE+1 ), + $ LDD, RHS( 3 ), 1, ONE, C( IE+1, JS ), + $ 1 ) + END IF +* + ELSE IF( ( MB.EQ.2 ) .AND. ( NB.EQ.2 ) ) THEN +* +* Build an 8-by-8 system Z' * x = RHS +* + CALL DCOPY( LDZ*LDZ, ZERO, 0, Z, 1 ) +* + Z( 1, 1 ) = A( IS, IS ) + Z( 2, 1 ) = A( IS, ISP1 ) + Z( 5, 1 ) = -B( JS, JS ) + Z( 7, 1 ) = -B( JSP1, JS ) +* + Z( 1, 2 ) = A( ISP1, IS ) + Z( 2, 2 ) = A( ISP1, ISP1 ) + Z( 6, 2 ) = -B( JS, JS ) + Z( 8, 2 ) = -B( JSP1, JS ) +* + Z( 3, 3 ) = A( IS, IS ) + Z( 4, 3 ) = A( IS, ISP1 ) + Z( 5, 3 ) = -B( JS, JSP1 ) + Z( 7, 3 ) = -B( JSP1, JSP1 ) +* + Z( 3, 4 ) = A( ISP1, IS ) + Z( 4, 4 ) = A( ISP1, ISP1 ) + Z( 6, 4 ) = -B( JS, JSP1 ) + Z( 8, 4 ) = -B( JSP1, JSP1 ) +* + Z( 1, 5 ) = D( IS, IS ) + Z( 2, 5 ) = D( IS, ISP1 ) + Z( 5, 5 ) = -E( JS, JS ) +* + Z( 2, 6 ) = D( ISP1, ISP1 ) + Z( 6, 6 ) = -E( JS, JS ) +* + Z( 3, 7 ) = D( IS, IS ) + Z( 4, 7 ) = D( IS, ISP1 ) + Z( 5, 7 ) = -E( JS, JSP1 ) + Z( 7, 7 ) = -E( JSP1, JSP1 ) +* + Z( 4, 8 ) = D( ISP1, ISP1 ) + Z( 6, 8 ) = -E( JS, JSP1 ) + Z( 8, 8 ) = -E( JSP1, JSP1 ) +* +* Set up right hand side(s) +* + K = 1 + II = MB*NB + 1 + DO 160 JJ = 0, NB - 1 + CALL DCOPY( MB, C( IS, JS+JJ ), 1, RHS( K ), 1 ) + CALL DCOPY( MB, F( IS, JS+JJ ), 1, RHS( II ), 1 ) + K = K + MB + II = II + MB + 160 CONTINUE +* +* +* Solve Z' * x = RHS +* + CALL DGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR ) + IF( IERR.GT.0 ) + $ INFO = IERR +* + CALL DGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV, SCALOC ) + IF( SCALOC.NE.ONE ) THEN + DO 170 K = 1, N + CALL DSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL DSCAL( M, SCALOC, F( 1, K ), 1 ) + 170 CONTINUE + SCALE = SCALE*SCALOC + END IF +* +* Unpack solution vector(s) +* + K = 1 + II = MB*NB + 1 + DO 180 JJ = 0, NB - 1 + CALL DCOPY( MB, RHS( K ), 1, C( IS, JS+JJ ), 1 ) + CALL DCOPY( MB, RHS( II ), 1, F( IS, JS+JJ ), 1 ) + K = K + MB + II = II + MB + 180 CONTINUE +* +* Substitute R(I, J) and L(I, J) into remaining +* equation. +* + IF( J.GT.P+2 ) THEN + CALL DGEMM( 'N', 'T', MB, JS-1, NB, ONE, + $ C( IS, JS ), LDC, B( 1, JS ), LDB, ONE, + $ F( IS, 1 ), LDF ) + CALL DGEMM( 'N', 'T', MB, JS-1, NB, ONE, + $ F( IS, JS ), LDF, E( 1, JS ), LDE, ONE, + $ F( IS, 1 ), LDF ) + END IF + IF( I.LT.P ) THEN + CALL DGEMM( 'T', 'N', M-IE, NB, MB, -ONE, + $ A( IS, IE+1 ), LDA, C( IS, JS ), LDC, + $ ONE, C( IE+1, JS ), LDC ) + CALL DGEMM( 'T', 'N', M-IE, NB, MB, -ONE, + $ D( IS, IE+1 ), LDD, F( IS, JS ), LDF, + $ ONE, C( IE+1, JS ), LDC ) + END IF +* + END IF +* + 190 CONTINUE + 200 CONTINUE +* + END IF + RETURN +* +* End of DTGSY2 +* + END diff --git a/costa/native/external/lapack/dtgsyl.f b/costa/native/external/lapack/dtgsyl.f new file mode 100644 index 000000000..ff00f18f1 --- /dev/null +++ b/costa/native/external/lapack/dtgsyl.f @@ -0,0 +1,534 @@ + SUBROUTINE DTGSYL( TRANS, IJOB, M, N, A, LDA, B, LDB, C, LDC, D, + $ LDD, E, LDE, F, LDF, SCALE, DIF, WORK, LWORK, + $ IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER IJOB, INFO, LDA, LDB, LDC, LDD, LDE, LDF, + $ LWORK, M, N + DOUBLE PRECISION DIF, SCALE +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), C( LDC, * ), + $ D( LDD, * ), E( LDE, * ), F( LDF, * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* DTGSYL solves the generalized Sylvester equation: +* +* A * R - L * B = scale * C (1) +* D * R - L * E = scale * F +* +* where R and L are unknown m-by-n matrices, (A, D), (B, E) and +* (C, F) are given matrix pairs of size m-by-m, n-by-n and m-by-n, +* respectively, with real entries. (A, D) and (B, E) must be in +* generalized (real) Schur canonical form, i.e. A, B are upper quasi +* triangular and D, E are upper triangular. +* +* The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1 is an output +* scaling factor chosen to avoid overflow. +* +* In matrix notation (1) is equivalent to solve Zx = scale b, where +* Z is defined as +* +* Z = [ kron(In, A) -kron(B', Im) ] (2) +* [ kron(In, D) -kron(E', Im) ]. +* +* Here Ik is the identity matrix of size k and X' is the transpose of +* X. kron(X, Y) is the Kronecker product between the matrices X and Y. +* +* If TRANS = 'T', DTGSYL solves the transposed system Z'*y = scale*b, +* which is equivalent to solve for R and L in +* +* A' * R + D' * L = scale * C (3) +* R * B' + L * E' = scale * (-F) +* +* This case (TRANS = 'T') is used to compute an one-norm-based estimate +* of Dif[(A,D), (B,E)], the separation between the matrix pairs (A,D) +* and (B,E), using DLACON. +* +* If IJOB >= 1, DTGSYL computes a Frobenius norm-based estimate +* of Dif[(A,D),(B,E)]. That is, the reciprocal of a lower bound on the +* reciprocal of the smallest singular value of Z. See [1-2] for more +* information. +* +* This is a level 3 BLAS algorithm. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER*1 +* = 'N', solve the generalized Sylvester equation (1). +* = 'T', solve the 'transposed' system (3). +* +* IJOB (input) INTEGER +* Specifies what kind of functionality to be performed. +* =0: solve (1) only. +* =1: The functionality of 0 and 3. +* =2: The functionality of 0 and 4. +* =3: Only an estimate of Dif[(A,D), (B,E)] is computed. +* (look ahead strategy IJOB = 1 is used). +* =4: Only an estimate of Dif[(A,D), (B,E)] is computed. +* ( DGECON on sub-systems is used ). +* Not referenced if TRANS = 'T'. +* +* M (input) INTEGER +* The order of the matrices A and D, and the row dimension of +* the matrices C, F, R and L. +* +* N (input) INTEGER +* The order of the matrices B and E, and the column dimension +* of the matrices C, F, R and L. +* +* A (input) DOUBLE PRECISION array, dimension (LDA, M) +* The upper quasi triangular matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1, M). +* +* B (input) DOUBLE PRECISION array, dimension (LDB, N) +* The upper quasi triangular matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1, N). +* +* C (input/output) DOUBLE PRECISION array, dimension (LDC, N) +* On entry, C contains the right-hand-side of the first matrix +* equation in (1) or (3). +* On exit, if IJOB = 0, 1 or 2, C has been overwritten by +* the solution R. If IJOB = 3 or 4 and TRANS = 'N', C holds R, +* the solution achieved during the computation of the +* Dif-estimate. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1, M). +* +* D (input) DOUBLE PRECISION array, dimension (LDD, M) +* The upper triangular matrix D. +* +* LDD (input) INTEGER +* The leading dimension of the array D. LDD >= max(1, M). +* +* E (input) DOUBLE PRECISION array, dimension (LDE, N) +* The upper triangular matrix E. +* +* LDE (input) INTEGER +* The leading dimension of the array E. LDE >= max(1, N). +* +* F (input/output) DOUBLE PRECISION array, dimension (LDF, N) +* On entry, F contains the right-hand-side of the second matrix +* equation in (1) or (3). +* On exit, if IJOB = 0, 1 or 2, F has been overwritten by +* the solution L. If IJOB = 3 or 4 and TRANS = 'N', F holds L, +* the solution achieved during the computation of the +* Dif-estimate. +* +* LDF (input) INTEGER +* The leading dimension of the array F. LDF >= max(1, M). +* +* DIF (output) DOUBLE PRECISION +* On exit DIF is the reciprocal of a lower bound of the +* reciprocal of the Dif-function, i.e. DIF is an upper bound of +* Dif[(A,D), (B,E)] = sigma_min(Z), where Z as in (2). +* IF IJOB = 0 or TRANS = 'T', DIF is not touched. +* +* SCALE (output) DOUBLE PRECISION +* On exit SCALE is the scaling factor in (1) or (3). +* If 0 < SCALE < 1, C and F hold the solutions R and L, resp., +* to a slightly perturbed system but the input matrices A, B, D +* and E have not been changed. If SCALE = 0, C and F hold the +* solutions R and L, respectively, to the homogeneous system +* with C = F = 0. Normally, SCALE = 1. +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* If IJOB = 0, WORK is not referenced. Otherwise, +* on exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK > = 1. +* If IJOB = 1 or 2 and TRANS = 'N', LWORK >= 2*M*N. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace) INTEGER array, dimension (M+N+6) +* +* INFO (output) INTEGER +* =0: successful exit +* <0: If INFO = -i, the i-th argument had an illegal value. +* >0: (A, D) and (B, E) have common or close eigenvalues. +* +* Further Details +* =============== +* +* Based on contributions by +* Bo Kagstrom and Peter Poromaa, Department of Computing Science, +* Umea University, S-901 87 Umea, Sweden. +* +* [1] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software +* for Solving the Generalized Sylvester Equation and Estimating the +* Separation between Regular Matrix Pairs, Report UMINF - 93.23, +* Department of Computing Science, Umea University, S-901 87 Umea, +* Sweden, December 1993, Revised April 1994, Also as LAPACK Working +* Note 75. To appear in ACM Trans. on Math. Software, Vol 22, +* No 1, 1996. +* +* [2] B. Kagstrom, A Perturbation Analysis of the Generalized Sylvester +* Equation (AR - LB, DR - LE ) = (C, F), SIAM J. Matrix Anal. +* Appl., 15(4):1045-1060, 1994 +* +* [3] B. Kagstrom and L. Westin, Generalized Schur Methods with +* Condition Estimators for Solving the Generalized Sylvester +* Equation, IEEE Transactions on Automatic Control, Vol. 34, No. 7, +* July 1989, pp 745-751. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, NOTRAN + INTEGER I, IE, IFUNC, IROUND, IS, ISOLVE, J, JE, JS, K, + $ LINFO, LWMIN, MB, NB, P, PPQQ, PQ, Q + DOUBLE PRECISION DSCALE, DSUM, SCALE2, SCALOC +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEMM, DLACPY, DSCAL, DTGSY2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode and test input parameters +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* + IF( ( IJOB.EQ.1 .OR. IJOB.EQ.2 ) .AND. NOTRAN ) THEN + LWMIN = MAX( 1, 2*M*N ) + ELSE + LWMIN = 1 + END IF +* + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -1 + ELSE IF( ( IJOB.LT.0 ) .OR. ( IJOB.GT.4 ) ) THEN + INFO = -2 + ELSE IF( M.LE.0 ) THEN + INFO = -3 + ELSE IF( N.LE.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LDD.LT.MAX( 1, M ) ) THEN + INFO = -12 + ELSE IF( LDE.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDF.LT.MAX( 1, M ) ) THEN + INFO = -16 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -20 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTGSYL', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Determine optimal block sizes MB and NB +* + MB = ILAENV( 2, 'DTGSYL', TRANS, M, N, -1, -1 ) + NB = ILAENV( 5, 'DTGSYL', TRANS, M, N, -1, -1 ) +* + ISOLVE = 1 + IFUNC = 0 + IF( IJOB.GE.3 .AND. NOTRAN ) THEN + IFUNC = IJOB - 2 + DO 10 J = 1, N + CALL DCOPY( M, ZERO, 0, C( 1, J ), 1 ) + CALL DCOPY( M, ZERO, 0, F( 1, J ), 1 ) + 10 CONTINUE + ELSE IF( IJOB.GE.1 .AND. NOTRAN ) THEN + ISOLVE = 2 + END IF +* + IF( ( MB.LE.1 .AND. NB.LE.1 ) .OR. ( MB.GE.M .AND. NB.GE.N ) ) + $ THEN +* + DO 30 IROUND = 1, ISOLVE +* +* Use unblocked Level 2 solver +* + DSCALE = ZERO + DSUM = ONE + PQ = 0 + CALL DTGSY2( TRANS, IFUNC, M, N, A, LDA, B, LDB, C, LDC, D, + $ LDD, E, LDE, F, LDF, SCALE, DSUM, DSCALE, + $ IWORK, PQ, INFO ) + IF( DSCALE.NE.ZERO ) THEN + IF( IJOB.EQ.1 .OR. IJOB.EQ.3 ) THEN + DIF = SQRT( DBLE( 2*M*N ) ) / ( DSCALE*SQRT( DSUM ) ) + ELSE + DIF = SQRT( DBLE( PQ ) ) / ( DSCALE*SQRT( DSUM ) ) + END IF + END IF +* + IF( ISOLVE.EQ.2 .AND. IROUND.EQ.1 ) THEN + IFUNC = IJOB + SCALE2 = SCALE + CALL DLACPY( 'F', M, N, C, LDC, WORK, M ) + CALL DLACPY( 'F', M, N, F, LDF, WORK( M*N+1 ), M ) + DO 20 J = 1, N + CALL DCOPY( M, ZERO, 0, C( 1, J ), 1 ) + CALL DCOPY( M, ZERO, 0, F( 1, J ), 1 ) + 20 CONTINUE + ELSE IF( ISOLVE.EQ.2 .AND. IROUND.EQ.2 ) THEN + CALL DLACPY( 'F', M, N, WORK, M, C, LDC ) + CALL DLACPY( 'F', M, N, WORK( M*N+1 ), M, F, LDF ) + SCALE = SCALE2 + END IF + 30 CONTINUE +* + RETURN + END IF +* +* Determine block structure of A +* + P = 0 + I = 1 + 40 CONTINUE + IF( I.GT.M ) + $ GO TO 50 + P = P + 1 + IWORK( P ) = I + I = I + MB + IF( I.GE.M ) + $ GO TO 50 + IF( A( I, I-1 ).NE.ZERO ) + $ I = I + 1 + GO TO 40 + 50 CONTINUE +* + IWORK( P+1 ) = M + 1 + IF( IWORK( P ).EQ.IWORK( P+1 ) ) + $ P = P - 1 +* +* Determine block structure of B +* + Q = P + 1 + J = 1 + 60 CONTINUE + IF( J.GT.N ) + $ GO TO 70 + Q = Q + 1 + IWORK( Q ) = J + J = J + NB + IF( J.GE.N ) + $ GO TO 70 + IF( B( J, J-1 ).NE.ZERO ) + $ J = J + 1 + GO TO 60 + 70 CONTINUE +* + IWORK( Q+1 ) = N + 1 + IF( IWORK( Q ).EQ.IWORK( Q+1 ) ) + $ Q = Q - 1 +* + IF( NOTRAN ) THEN +* + DO 150 IROUND = 1, ISOLVE +* +* Solve (I, J)-subsystem +* A(I, I) * R(I, J) - L(I, J) * B(J, J) = C(I, J) +* D(I, I) * R(I, J) - L(I, J) * E(J, J) = F(I, J) +* for I = P, P - 1,..., 1; J = 1, 2,..., Q +* + DSCALE = ZERO + DSUM = ONE + PQ = 0 + SCALE = ONE + DO 130 J = P + 2, Q + JS = IWORK( J ) + JE = IWORK( J+1 ) - 1 + NB = JE - JS + 1 + DO 120 I = P, 1, -1 + IS = IWORK( I ) + IE = IWORK( I+1 ) - 1 + MB = IE - IS + 1 + PPQQ = 0 + CALL DTGSY2( TRANS, IFUNC, MB, NB, A( IS, IS ), LDA, + $ B( JS, JS ), LDB, C( IS, JS ), LDC, + $ D( IS, IS ), LDD, E( JS, JS ), LDE, + $ F( IS, JS ), LDF, SCALOC, DSUM, DSCALE, + $ IWORK( Q+2 ), PPQQ, LINFO ) + IF( LINFO.GT.0 ) + $ INFO = LINFO +* + PQ = PQ + PPQQ + IF( SCALOC.NE.ONE ) THEN + DO 80 K = 1, JS - 1 + CALL DSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL DSCAL( M, SCALOC, F( 1, K ), 1 ) + 80 CONTINUE + DO 90 K = JS, JE + CALL DSCAL( IS-1, SCALOC, C( 1, K ), 1 ) + CALL DSCAL( IS-1, SCALOC, F( 1, K ), 1 ) + 90 CONTINUE + DO 100 K = JS, JE + CALL DSCAL( M-IE, SCALOC, C( IE+1, K ), 1 ) + CALL DSCAL( M-IE, SCALOC, F( IE+1, K ), 1 ) + 100 CONTINUE + DO 110 K = JE + 1, N + CALL DSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL DSCAL( M, SCALOC, F( 1, K ), 1 ) + 110 CONTINUE + SCALE = SCALE*SCALOC + END IF +* +* Substitute R(I, J) and L(I, J) into remaining +* equation. +* + IF( I.GT.1 ) THEN + CALL DGEMM( 'N', 'N', IS-1, NB, MB, -ONE, + $ A( 1, IS ), LDA, C( IS, JS ), LDC, ONE, + $ C( 1, JS ), LDC ) + CALL DGEMM( 'N', 'N', IS-1, NB, MB, -ONE, + $ D( 1, IS ), LDD, C( IS, JS ), LDC, ONE, + $ F( 1, JS ), LDF ) + END IF + IF( J.LT.Q ) THEN + CALL DGEMM( 'N', 'N', MB, N-JE, NB, ONE, + $ F( IS, JS ), LDF, B( JS, JE+1 ), LDB, + $ ONE, C( IS, JE+1 ), LDC ) + CALL DGEMM( 'N', 'N', MB, N-JE, NB, ONE, + $ F( IS, JS ), LDF, E( JS, JE+1 ), LDE, + $ ONE, F( IS, JE+1 ), LDF ) + END IF + 120 CONTINUE + 130 CONTINUE + IF( DSCALE.NE.ZERO ) THEN + IF( IJOB.EQ.1 .OR. IJOB.EQ.3 ) THEN + DIF = SQRT( DBLE( 2*M*N ) ) / ( DSCALE*SQRT( DSUM ) ) + ELSE + DIF = SQRT( DBLE( PQ ) ) / ( DSCALE*SQRT( DSUM ) ) + END IF + END IF + IF( ISOLVE.EQ.2 .AND. IROUND.EQ.1 ) THEN + IFUNC = IJOB + SCALE2 = SCALE + CALL DLACPY( 'F', M, N, C, LDC, WORK, M ) + CALL DLACPY( 'F', M, N, F, LDF, WORK( M*N+1 ), M ) + DO 140 J = 1, N + CALL DCOPY( M, ZERO, 0, C( 1, J ), 1 ) + CALL DCOPY( M, ZERO, 0, F( 1, J ), 1 ) + 140 CONTINUE + ELSE IF( ISOLVE.EQ.2 .AND. IROUND.EQ.2 ) THEN + CALL DLACPY( 'F', M, N, WORK, M, C, LDC ) + CALL DLACPY( 'F', M, N, WORK( M*N+1 ), M, F, LDF ) + SCALE = SCALE2 + END IF + 150 CONTINUE +* + ELSE +* +* Solve transposed (I, J)-subsystem +* A(I, I)' * R(I, J) + D(I, I)' * L(I, J) = C(I, J) +* R(I, J) * B(J, J)' + L(I, J) * E(J, J)' = -F(I, J) +* for I = 1,2,..., P; J = Q, Q-1,..., 1 +* + SCALE = ONE + DO 210 I = 1, P + IS = IWORK( I ) + IE = IWORK( I+1 ) - 1 + MB = IE - IS + 1 + DO 200 J = Q, P + 2, -1 + JS = IWORK( J ) + JE = IWORK( J+1 ) - 1 + NB = JE - JS + 1 + CALL DTGSY2( TRANS, IFUNC, MB, NB, A( IS, IS ), LDA, + $ B( JS, JS ), LDB, C( IS, JS ), LDC, + $ D( IS, IS ), LDD, E( JS, JS ), LDE, + $ F( IS, JS ), LDF, SCALOC, DSUM, DSCALE, + $ IWORK( Q+2 ), PPQQ, LINFO ) + IF( LINFO.GT.0 ) + $ INFO = LINFO + IF( SCALOC.NE.ONE ) THEN + DO 160 K = 1, JS - 1 + CALL DSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL DSCAL( M, SCALOC, F( 1, K ), 1 ) + 160 CONTINUE + DO 170 K = JS, JE + CALL DSCAL( IS-1, SCALOC, C( 1, K ), 1 ) + CALL DSCAL( IS-1, SCALOC, F( 1, K ), 1 ) + 170 CONTINUE + DO 180 K = JS, JE + CALL DSCAL( M-IE, SCALOC, C( IE+1, K ), 1 ) + CALL DSCAL( M-IE, SCALOC, F( IE+1, K ), 1 ) + 180 CONTINUE + DO 190 K = JE + 1, N + CALL DSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL DSCAL( M, SCALOC, F( 1, K ), 1 ) + 190 CONTINUE + SCALE = SCALE*SCALOC + END IF +* +* Substitute R(I, J) and L(I, J) into remaining equation. +* + IF( J.GT.P+2 ) THEN + CALL DGEMM( 'N', 'T', MB, JS-1, NB, ONE, C( IS, JS ), + $ LDC, B( 1, JS ), LDB, ONE, F( IS, 1 ), + $ LDF ) + CALL DGEMM( 'N', 'T', MB, JS-1, NB, ONE, F( IS, JS ), + $ LDF, E( 1, JS ), LDE, ONE, F( IS, 1 ), + $ LDF ) + END IF + IF( I.LT.P ) THEN + CALL DGEMM( 'T', 'N', M-IE, NB, MB, -ONE, + $ A( IS, IE+1 ), LDA, C( IS, JS ), LDC, ONE, + $ C( IE+1, JS ), LDC ) + CALL DGEMM( 'T', 'N', M-IE, NB, MB, -ONE, + $ D( IS, IE+1 ), LDD, F( IS, JS ), LDF, ONE, + $ C( IE+1, JS ), LDC ) + END IF + 200 CONTINUE + 210 CONTINUE +* + END IF +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of DTGSYL +* + END diff --git a/costa/native/external/lapack/dtpcon.f b/costa/native/external/lapack/dtpcon.f new file mode 100644 index 000000000..2d66707db --- /dev/null +++ b/costa/native/external/lapack/dtpcon.f @@ -0,0 +1,187 @@ + SUBROUTINE DTPCON( NORM, UPLO, DIAG, N, AP, RCOND, WORK, IWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER INFO, N + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION AP( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DTPCON estimates the reciprocal of the condition number of a packed +* triangular matrix A, in either the 1-norm or the infinity-norm. +* +* The norm of A is computed and an estimate is obtained for +* norm(inv(A)), then the reciprocal of the condition number is +* computed as +* RCOND = 1 / ( norm(A) * norm(inv(A)) ). +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies whether the 1-norm condition number or the +* infinity-norm condition number is required: +* = '1' or 'O': 1-norm; +* = 'I': Infinity-norm. +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) +* The upper or lower triangular matrix A, packed columnwise in +* a linear array. The j-th column of A is stored in the array +* AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* If DIAG = 'U', the diagonal elements of A are not referenced +* and are assumed to be 1. +* +* RCOND (output) DOUBLE PRECISION +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(norm(A) * norm(inv(A))). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, ONENRM, UPPER + CHARACTER NORMIN + INTEGER IX, KASE, KASE1 + DOUBLE PRECISION AINVNM, ANORM, SCALE, SMLNUM, XNORM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH, DLANTP + EXTERNAL LSAME, IDAMAX, DLAMCH, DLANTP +* .. +* .. External Subroutines .. + EXTERNAL DLACON, DLATPS, DRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTPCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + END IF +* + RCOND = ZERO + SMLNUM = DLAMCH( 'Safe minimum' )*DBLE( MAX( 1, N ) ) +* +* Compute the norm of the triangular matrix A. +* + ANORM = DLANTP( NORM, UPLO, DIAG, N, AP, WORK ) +* +* Continue only if ANORM > 0. +* + IF( ANORM.GT.ZERO ) THEN +* +* Estimate the norm of the inverse of A. +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 10 CONTINUE + CALL DLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(A). +* + CALL DLATPS( UPLO, 'No transpose', DIAG, NORMIN, N, AP, + $ WORK, SCALE, WORK( 2*N+1 ), INFO ) + ELSE +* +* Multiply by inv(A'). +* + CALL DLATPS( UPLO, 'Transpose', DIAG, NORMIN, N, AP, + $ WORK, SCALE, WORK( 2*N+1 ), INFO ) + END IF + NORMIN = 'Y' +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + IF( SCALE.NE.ONE ) THEN + IX = IDAMAX( N, WORK, 1 ) + XNORM = ABS( WORK( IX ) ) + IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL DRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / ANORM ) / AINVNM + END IF +* + 20 CONTINUE + RETURN +* +* End of DTPCON +* + END diff --git a/costa/native/external/lapack/dtprfs.f b/costa/native/external/lapack/dtprfs.f new file mode 100644 index 000000000..da514cac7 --- /dev/null +++ b/costa/native/external/lapack/dtprfs.f @@ -0,0 +1,375 @@ + SUBROUTINE DTPRFS( UPLO, TRANS, DIAG, N, NRHS, AP, B, LDB, X, LDX, + $ FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION AP( * ), B( LDB, * ), BERR( * ), FERR( * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* DTPRFS provides error bounds and backward error estimates for the +* solution to a system of linear equations with a triangular packed +* coefficient matrix. +* +* The solution matrix X must be computed by DTPTRS or some other +* means before entering this routine. DTPRFS does not do iterative +* refinement because doing so cannot improve the backward error. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose = Transpose) +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) +* The upper or lower triangular matrix A, packed columnwise in +* a linear array. The j-th column of A is stored in the array +* AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* If DIAG = 'U', the diagonal elements of A are not referenced +* and are assumed to be 1. +* +* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input) DOUBLE PRECISION array, dimension (LDX,NRHS) +* The solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + CHARACTER TRANST + INTEGER I, J, K, KASE, KC, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DLACON, DTPMV, DTPSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTPRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 250 J = 1, NRHS +* +* Compute residual R = B - op(A) * X, +* where op(A) = A or A', depending on TRANS. +* + CALL DCOPY( N, X( 1, J ), 1, WORK( N+1 ), 1 ) + CALL DTPMV( UPLO, TRANS, DIAG, N, AP, WORK( N+1 ), 1 ) + CALL DAXPY( N, -ONE, B( 1, J ), 1, WORK( N+1 ), 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 20 I = 1, N + WORK( I ) = ABS( B( I, J ) ) + 20 CONTINUE +* + IF( NOTRAN ) THEN +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + KC = 1 + IF( NOUNIT ) THEN + DO 40 K = 1, N + XK = ABS( X( K, J ) ) + DO 30 I = 1, K + WORK( I ) = WORK( I ) + ABS( AP( KC+I-1 ) )*XK + 30 CONTINUE + KC = KC + K + 40 CONTINUE + ELSE + DO 60 K = 1, N + XK = ABS( X( K, J ) ) + DO 50 I = 1, K - 1 + WORK( I ) = WORK( I ) + ABS( AP( KC+I-1 ) )*XK + 50 CONTINUE + WORK( K ) = WORK( K ) + XK + KC = KC + K + 60 CONTINUE + END IF + ELSE + KC = 1 + IF( NOUNIT ) THEN + DO 80 K = 1, N + XK = ABS( X( K, J ) ) + DO 70 I = K, N + WORK( I ) = WORK( I ) + ABS( AP( KC+I-K ) )*XK + 70 CONTINUE + KC = KC + N - K + 1 + 80 CONTINUE + ELSE + DO 100 K = 1, N + XK = ABS( X( K, J ) ) + DO 90 I = K + 1, N + WORK( I ) = WORK( I ) + ABS( AP( KC+I-K ) )*XK + 90 CONTINUE + WORK( K ) = WORK( K ) + XK + KC = KC + N - K + 1 + 100 CONTINUE + END IF + END IF + ELSE +* +* Compute abs(A')*abs(X) + abs(B). +* + IF( UPPER ) THEN + KC = 1 + IF( NOUNIT ) THEN + DO 120 K = 1, N + S = ZERO + DO 110 I = 1, K + S = S + ABS( AP( KC+I-1 ) )*ABS( X( I, J ) ) + 110 CONTINUE + WORK( K ) = WORK( K ) + S + KC = KC + K + 120 CONTINUE + ELSE + DO 140 K = 1, N + S = ABS( X( K, J ) ) + DO 130 I = 1, K - 1 + S = S + ABS( AP( KC+I-1 ) )*ABS( X( I, J ) ) + 130 CONTINUE + WORK( K ) = WORK( K ) + S + KC = KC + K + 140 CONTINUE + END IF + ELSE + KC = 1 + IF( NOUNIT ) THEN + DO 160 K = 1, N + S = ZERO + DO 150 I = K, N + S = S + ABS( AP( KC+I-K ) )*ABS( X( I, J ) ) + 150 CONTINUE + WORK( K ) = WORK( K ) + S + KC = KC + N - K + 1 + 160 CONTINUE + ELSE + DO 180 K = 1, N + S = ABS( X( K, J ) ) + DO 170 I = K + 1, N + S = S + ABS( AP( KC+I-K ) )*ABS( X( I, J ) ) + 170 CONTINUE + WORK( K ) = WORK( K ) + S + KC = KC + N - K + 1 + 180 CONTINUE + END IF + END IF + END IF + S = ZERO + DO 190 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 190 CONTINUE + BERR( J ) = S +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use DLACON to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 200 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 200 CONTINUE +* + KASE = 0 + 210 CONTINUE + CALL DLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)'). +* + CALL DTPSV( UPLO, TRANST, DIAG, N, AP, WORK( N+1 ), 1 ) + DO 220 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 220 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 230 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 230 CONTINUE + CALL DTPSV( UPLO, TRANS, DIAG, N, AP, WORK( N+1 ), 1 ) + END IF + GO TO 210 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 240 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 240 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 250 CONTINUE +* + RETURN +* +* End of DTPRFS +* + END diff --git a/costa/native/external/lapack/dtptri.f b/costa/native/external/lapack/dtptri.f new file mode 100644 index 000000000..230620d2d --- /dev/null +++ b/costa/native/external/lapack/dtptri.f @@ -0,0 +1,176 @@ + SUBROUTINE DTPTRI( UPLO, DIAG, N, AP, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER DIAG, UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AP( * ) +* .. +* +* Purpose +* ======= +* +* DTPTRI computes the inverse of a real upper or lower triangular +* matrix A stored in packed format. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangular matrix A, stored +* columnwise in a linear array. The j-th column of A is stored +* in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*((2*n-j)/2) = A(i,j) for j<=i<=n. +* See below for further details. +* On exit, the (triangular) inverse of the original matrix, in +* the same packed storage format. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, A(i,i) is exactly zero. The triangular +* matrix is singular and its inverse can not be computed. +* +* Further Details +* =============== +* +* A triangular matrix A can be transferred to packed storage using one +* of the following program segments: +* +* UPLO = 'U': UPLO = 'L': +* +* JC = 1 JC = 1 +* DO 2 J = 1, N DO 2 J = 1, N +* DO 1 I = 1, J DO 1 I = J, N +* AP(JC+I-1) = A(I,J) AP(JC+I-J) = A(I,J) +* 1 CONTINUE 1 CONTINUE +* JC = JC + J JC = JC + N - J + 1 +* 2 CONTINUE 2 CONTINUE +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J, JC, JCLAST, JJ + DOUBLE PRECISION AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DTPMV, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTPTRI', -INFO ) + RETURN + END IF +* +* Check for singularity if non-unit. +* + IF( NOUNIT ) THEN + IF( UPPER ) THEN + JJ = 0 + DO 10 INFO = 1, N + JJ = JJ + INFO + IF( AP( JJ ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE + JJ = 1 + DO 20 INFO = 1, N + IF( AP( JJ ).EQ.ZERO ) + $ RETURN + JJ = JJ + N - INFO + 1 + 20 CONTINUE + END IF + INFO = 0 + END IF +* + IF( UPPER ) THEN +* +* Compute inverse of upper triangular matrix. +* + JC = 1 + DO 30 J = 1, N + IF( NOUNIT ) THEN + AP( JC+J-1 ) = ONE / AP( JC+J-1 ) + AJJ = -AP( JC+J-1 ) + ELSE + AJJ = -ONE + END IF +* +* Compute elements 1:j-1 of j-th column. +* + CALL DTPMV( 'Upper', 'No transpose', DIAG, J-1, AP, + $ AP( JC ), 1 ) + CALL DSCAL( J-1, AJJ, AP( JC ), 1 ) + JC = JC + J + 30 CONTINUE +* + ELSE +* +* Compute inverse of lower triangular matrix. +* + JC = N*( N+1 ) / 2 + DO 40 J = N, 1, -1 + IF( NOUNIT ) THEN + AP( JC ) = ONE / AP( JC ) + AJJ = -AP( JC ) + ELSE + AJJ = -ONE + END IF + IF( J.LT.N ) THEN +* +* Compute elements j+1:n of j-th column. +* + CALL DTPMV( 'Lower', 'No transpose', DIAG, N-J, + $ AP( JCLAST ), AP( JC+1 ), 1 ) + CALL DSCAL( N-J, AJJ, AP( JC+1 ), 1 ) + END IF + JCLAST = JC + JC = JC - N + J - 2 + 40 CONTINUE + END IF +* + RETURN +* +* End of DTPTRI +* + END diff --git a/costa/native/external/lapack/dtptrs.f b/costa/native/external/lapack/dtptrs.f new file mode 100644 index 000000000..886901528 --- /dev/null +++ b/costa/native/external/lapack/dtptrs.f @@ -0,0 +1,154 @@ + SUBROUTINE DTPTRS( UPLO, TRANS, DIAG, N, NRHS, AP, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION AP( * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* DTPTRS solves a triangular system of the form +* +* A * X = B or A**T * X = B, +* +* where A is a triangular matrix of order N stored in packed format, +* and B is an N-by-NRHS matrix. A check is made to verify that A is +* nonsingular. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose = Transpose) +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) +* The upper or lower triangular matrix A, packed columnwise in +* a linear array. The j-th column of A is stored in the array +* AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, if INFO = 0, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the i-th diagonal element of A is zero, +* indicating that the matrix is singular and the +* solutions X have not been computed. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J, JC +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DTPSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT. + $ LSAME( TRANS, 'T' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTPTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check for singularity. +* + IF( NOUNIT ) THEN + IF( UPPER ) THEN + JC = 1 + DO 10 INFO = 1, N + IF( AP( JC+INFO-1 ).EQ.ZERO ) + $ RETURN + JC = JC + INFO + 10 CONTINUE + ELSE + JC = 1 + DO 20 INFO = 1, N + IF( AP( JC ).EQ.ZERO ) + $ RETURN + JC = JC + N - INFO + 1 + 20 CONTINUE + END IF + END IF + INFO = 0 +* +* Solve A * x = b or A' * x = b. +* + DO 30 J = 1, NRHS + CALL DTPSV( UPLO, TRANS, DIAG, N, AP, B( 1, J ), 1 ) + 30 CONTINUE +* + RETURN +* +* End of DTPTRS +* + END diff --git a/costa/native/external/lapack/dtrcon.f b/costa/native/external/lapack/dtrcon.f new file mode 100644 index 000000000..8da58c760 --- /dev/null +++ b/costa/native/external/lapack/dtrcon.f @@ -0,0 +1,193 @@ + SUBROUTINE DTRCON( NORM, UPLO, DIAG, N, A, LDA, RCOND, WORK, + $ IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER INFO, LDA, N + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DTRCON estimates the reciprocal of the condition number of a +* triangular matrix A, in either the 1-norm or the infinity-norm. +* +* The norm of A is computed and an estimate is obtained for +* norm(inv(A)), then the reciprocal of the condition number is +* computed as +* RCOND = 1 / ( norm(A) * norm(inv(A)) ). +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies whether the 1-norm condition number or the +* infinity-norm condition number is required: +* = '1' or 'O': 1-norm; +* = 'I': Infinity-norm. +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input) DOUBLE PRECISION array, dimension (LDA,N) +* The triangular matrix A. If UPLO = 'U', the leading N-by-N +* upper triangular part of the array A contains the upper +* triangular matrix, and the strictly lower triangular part of +* A is not referenced. If UPLO = 'L', the leading N-by-N lower +* triangular part of the array A contains the lower triangular +* matrix, and the strictly upper triangular part of A is not +* referenced. If DIAG = 'U', the diagonal elements of A are +* also not referenced and are assumed to be 1. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* RCOND (output) DOUBLE PRECISION +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(norm(A) * norm(inv(A))). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, ONENRM, UPPER + CHARACTER NORMIN + INTEGER IX, KASE, KASE1 + DOUBLE PRECISION AINVNM, ANORM, SCALE, SMLNUM, XNORM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH, DLANTR + EXTERNAL LSAME, IDAMAX, DLAMCH, DLANTR +* .. +* .. External Subroutines .. + EXTERNAL DLACON, DLATRS, DRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTRCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + END IF +* + RCOND = ZERO + SMLNUM = DLAMCH( 'Safe minimum' )*DBLE( MAX( 1, N ) ) +* +* Compute the norm of the triangular matrix A. +* + ANORM = DLANTR( NORM, UPLO, DIAG, N, N, A, LDA, WORK ) +* +* Continue only if ANORM > 0. +* + IF( ANORM.GT.ZERO ) THEN +* +* Estimate the norm of the inverse of A. +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 10 CONTINUE + CALL DLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(A). +* + CALL DLATRS( UPLO, 'No transpose', DIAG, NORMIN, N, A, + $ LDA, WORK, SCALE, WORK( 2*N+1 ), INFO ) + ELSE +* +* Multiply by inv(A'). +* + CALL DLATRS( UPLO, 'Transpose', DIAG, NORMIN, N, A, LDA, + $ WORK, SCALE, WORK( 2*N+1 ), INFO ) + END IF + NORMIN = 'Y' +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + IF( SCALE.NE.ONE ) THEN + IX = IDAMAX( N, WORK, 1 ) + XNORM = ABS( WORK( IX ) ) + IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL DRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / ANORM ) / AINVNM + END IF +* + 20 CONTINUE + RETURN +* +* End of DTRCON +* + END diff --git a/costa/native/external/lapack/dtrevc.f b/costa/native/external/lapack/dtrevc.f new file mode 100644 index 000000000..b30f1aab0 --- /dev/null +++ b/costa/native/external/lapack/dtrevc.f @@ -0,0 +1,1005 @@ + SUBROUTINE DTREVC( SIDE, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR, + $ LDVR, MM, M, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER HOWMNY, SIDE + INTEGER INFO, LDT, LDVL, LDVR, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + DOUBLE PRECISION T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* DTREVC computes some or all of the right and/or left eigenvectors of +* a real upper quasi-triangular matrix T. +* +* The right eigenvector x and the left eigenvector y of T corresponding +* to an eigenvalue w are defined by: +* +* T*x = w*x, y'*T = w*y' +* +* where y' denotes the conjugate transpose of the vector y. +* +* If all eigenvectors are requested, the routine may either return the +* matrices X and/or Y of right or left eigenvectors of T, or the +* products Q*X and/or Q*Y, where Q is an input orthogonal +* matrix. If T was obtained from the real-Schur factorization of an +* original matrix A = Q*T*Q', then Q*X and Q*Y are the matrices of +* right or left eigenvectors of A. +* +* T must be in Schur canonical form (as returned by DHSEQR), that is, +* block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each +* 2-by-2 diagonal block has its diagonal elements equal and its +* off-diagonal elements of opposite sign. Corresponding to each 2-by-2 +* diagonal block is a complex conjugate pair of eigenvalues and +* eigenvectors; only one eigenvector of the pair is computed, namely +* the one corresponding to the eigenvalue with positive imaginary part. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'R': compute right eigenvectors only; +* = 'L': compute left eigenvectors only; +* = 'B': compute both right and left eigenvectors. +* +* HOWMNY (input) CHARACTER*1 +* = 'A': compute all right and/or left eigenvectors; +* = 'B': compute all right and/or left eigenvectors, +* and backtransform them using the input matrices +* supplied in VR and/or VL; +* = 'S': compute selected right and/or left eigenvectors, +* specified by the logical array SELECT. +* +* SELECT (input/output) LOGICAL array, dimension (N) +* If HOWMNY = 'S', SELECT specifies the eigenvectors to be +* computed. +* If HOWMNY = 'A' or 'B', SELECT is not referenced. +* To select the real eigenvector corresponding to a real +* eigenvalue w(j), SELECT(j) must be set to .TRUE.. To select +* the complex eigenvector corresponding to a complex conjugate +* pair w(j) and w(j+1), either SELECT(j) or SELECT(j+1) must be +* set to .TRUE.; then on exit SELECT(j) is .TRUE. and +* SELECT(j+1) is .FALSE.. +* +* N (input) INTEGER +* The order of the matrix T. N >= 0. +* +* T (input) DOUBLE PRECISION array, dimension (LDT,N) +* The upper quasi-triangular matrix T in Schur canonical form. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= max(1,N). +* +* VL (input/output) DOUBLE PRECISION array, dimension (LDVL,MM) +* On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must +* contain an N-by-N matrix Q (usually the orthogonal matrix Q +* of Schur vectors returned by DHSEQR). +* On exit, if SIDE = 'L' or 'B', VL contains: +* if HOWMNY = 'A', the matrix Y of left eigenvectors of T; +* VL has the same quasi-lower triangular form +* as T'. If T(i,i) is a real eigenvalue, then +* the i-th column VL(i) of VL is its +* corresponding eigenvector. If T(i:i+1,i:i+1) +* is a 2-by-2 block whose eigenvalues are +* complex-conjugate eigenvalues of T, then +* VL(i)+sqrt(-1)*VL(i+1) is the complex +* eigenvector corresponding to the eigenvalue +* with positive real part. +* if HOWMNY = 'B', the matrix Q*Y; +* if HOWMNY = 'S', the left eigenvectors of T specified by +* SELECT, stored consecutively in the columns +* of VL, in the same order as their +* eigenvalues. +* A complex eigenvector corresponding to a complex eigenvalue +* is stored in two consecutive columns, the first holding the +* real part, and the second the imaginary part. +* If SIDE = 'R', VL is not referenced. +* +* LDVL (input) INTEGER +* The leading dimension of the array VL. LDVL >= max(1,N) if +* SIDE = 'L' or 'B'; LDVL >= 1 otherwise. +* +* VR (input/output) DOUBLE PRECISION array, dimension (LDVR,MM) +* On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must +* contain an N-by-N matrix Q (usually the orthogonal matrix Q +* of Schur vectors returned by DHSEQR). +* On exit, if SIDE = 'R' or 'B', VR contains: +* if HOWMNY = 'A', the matrix X of right eigenvectors of T; +* VR has the same quasi-upper triangular form +* as T. If T(i,i) is a real eigenvalue, then +* the i-th column VR(i) of VR is its +* corresponding eigenvector. If T(i:i+1,i:i+1) +* is a 2-by-2 block whose eigenvalues are +* complex-conjugate eigenvalues of T, then +* VR(i)+sqrt(-1)*VR(i+1) is the complex +* eigenvector corresponding to the eigenvalue +* with positive real part. +* if HOWMNY = 'B', the matrix Q*X; +* if HOWMNY = 'S', the right eigenvectors of T specified by +* SELECT, stored consecutively in the columns +* of VR, in the same order as their +* eigenvalues. +* A complex eigenvector corresponding to a complex eigenvalue +* is stored in two consecutive columns, the first holding the +* real part and the second the imaginary part. +* If SIDE = 'L', VR is not referenced. +* +* LDVR (input) INTEGER +* The leading dimension of the array VR. LDVR >= max(1,N) if +* SIDE = 'R' or 'B'; LDVR >= 1 otherwise. +* +* MM (input) INTEGER +* The number of columns in the arrays VL and/or VR. MM >= M. +* +* M (output) INTEGER +* The number of columns in the arrays VL and/or VR actually +* used to store the eigenvectors. +* If HOWMNY = 'A' or 'B', M is set to N. +* Each selected real eigenvector occupies one column and each +* selected complex eigenvector occupies two columns. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The algorithm used in this program is basically backward (forward) +* substitution, with scaling to make the the code robust against +* possible overflow. +* +* Each eigenvector is normalized so that the element of largest +* magnitude has magnitude 1; here the magnitude of a complex number +* (x,y) is taken to be |x| + |y|. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLV, BOTHV, LEFTV, OVER, PAIR, RIGHTV, SOMEV + INTEGER I, IERR, II, IP, IS, J, J1, J2, JNXT, K, KI, N2 + DOUBLE PRECISION BETA, BIGNUM, EMAX, OVFL, REC, REMAX, SCALE, + $ SMIN, SMLNUM, ULP, UNFL, VCRIT, VMAX, WI, WR, + $ XNORM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + DOUBLE PRECISION DDOT, DLAMCH + EXTERNAL LSAME, IDAMAX, DDOT, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DGEMV, DLALN2, DSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Local Arrays .. + DOUBLE PRECISION X( 2, 2 ) +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + BOTHV = LSAME( SIDE, 'B' ) + RIGHTV = LSAME( SIDE, 'R' ) .OR. BOTHV + LEFTV = LSAME( SIDE, 'L' ) .OR. BOTHV +* + ALLV = LSAME( HOWMNY, 'A' ) + OVER = LSAME( HOWMNY, 'B' ) + SOMEV = LSAME( HOWMNY, 'S' ) +* + INFO = 0 + IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -1 + ELSE IF( .NOT.ALLV .AND. .NOT.OVER .AND. .NOT.SOMEV ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDVL.LT.1 .OR. ( LEFTV .AND. LDVL.LT.N ) ) THEN + INFO = -8 + ELSE IF( LDVR.LT.1 .OR. ( RIGHTV .AND. LDVR.LT.N ) ) THEN + INFO = -10 + ELSE +* +* Set M to the number of columns required to store the selected +* eigenvectors, standardize the array SELECT if necessary, and +* test MM. +* + IF( SOMEV ) THEN + M = 0 + PAIR = .FALSE. + DO 10 J = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + SELECT( J ) = .FALSE. + ELSE + IF( J.LT.N ) THEN + IF( T( J+1, J ).EQ.ZERO ) THEN + IF( SELECT( J ) ) + $ M = M + 1 + ELSE + PAIR = .TRUE. + IF( SELECT( J ) .OR. SELECT( J+1 ) ) THEN + SELECT( J ) = .TRUE. + M = M + 2 + END IF + END IF + ELSE + IF( SELECT( N ) ) + $ M = M + 1 + END IF + END IF + 10 CONTINUE + ELSE + M = N + END IF +* + IF( MM.LT.M ) THEN + INFO = -11 + END IF + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTREVC', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* +* Set the constants to control overflow. +* + UNFL = DLAMCH( 'Safe minimum' ) + OVFL = ONE / UNFL + CALL DLABAD( UNFL, OVFL ) + ULP = DLAMCH( 'Precision' ) + SMLNUM = UNFL*( N / ULP ) + BIGNUM = ( ONE-ULP ) / SMLNUM +* +* Compute 1-norm of each column of strictly upper triangular +* part of T to control overflow in triangular solver. +* + WORK( 1 ) = ZERO + DO 30 J = 2, N + WORK( J ) = ZERO + DO 20 I = 1, J - 1 + WORK( J ) = WORK( J ) + ABS( T( I, J ) ) + 20 CONTINUE + 30 CONTINUE +* +* Index IP is used to specify the real or complex eigenvalue: +* IP = 0, real eigenvalue, +* 1, first of conjugate complex pair: (wr,wi) +* -1, second of conjugate complex pair: (wr,wi) +* + N2 = 2*N +* + IF( RIGHTV ) THEN +* +* Compute right eigenvectors. +* + IP = 0 + IS = M + DO 140 KI = N, 1, -1 +* + IF( IP.EQ.1 ) + $ GO TO 130 + IF( KI.EQ.1 ) + $ GO TO 40 + IF( T( KI, KI-1 ).EQ.ZERO ) + $ GO TO 40 + IP = -1 +* + 40 CONTINUE + IF( SOMEV ) THEN + IF( IP.EQ.0 ) THEN + IF( .NOT.SELECT( KI ) ) + $ GO TO 130 + ELSE + IF( .NOT.SELECT( KI-1 ) ) + $ GO TO 130 + END IF + END IF +* +* Compute the KI-th eigenvalue (WR,WI). +* + WR = T( KI, KI ) + WI = ZERO + IF( IP.NE.0 ) + $ WI = SQRT( ABS( T( KI, KI-1 ) ) )* + $ SQRT( ABS( T( KI-1, KI ) ) ) + SMIN = MAX( ULP*( ABS( WR )+ABS( WI ) ), SMLNUM ) +* + IF( IP.EQ.0 ) THEN +* +* Real right eigenvector +* + WORK( KI+N ) = ONE +* +* Form right-hand side +* + DO 50 K = 1, KI - 1 + WORK( K+N ) = -T( K, KI ) + 50 CONTINUE +* +* Solve the upper quasi-triangular system: +* (T(1:KI-1,1:KI-1) - WR)*X = SCALE*WORK. +* + JNXT = KI - 1 + DO 60 J = KI - 1, 1, -1 + IF( J.GT.JNXT ) + $ GO TO 60 + J1 = J + J2 = J + JNXT = J - 1 + IF( J.GT.1 ) THEN + IF( T( J, J-1 ).NE.ZERO ) THEN + J1 = J - 1 + JNXT = J - 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* 1-by-1 diagonal block +* + CALL DLALN2( .FALSE., 1, 1, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+N ), N, WR, + $ ZERO, X, 2, SCALE, XNORM, IERR ) +* +* Scale X(1,1) to avoid overflow when updating +* the right-hand side. +* + IF( XNORM.GT.ONE ) THEN + IF( WORK( J ).GT.BIGNUM / XNORM ) THEN + X( 1, 1 ) = X( 1, 1 ) / XNORM + SCALE = SCALE / XNORM + END IF + END IF +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) + $ CALL DSCAL( KI, SCALE, WORK( 1+N ), 1 ) + WORK( J+N ) = X( 1, 1 ) +* +* Update right-hand side +* + CALL DAXPY( J-1, -X( 1, 1 ), T( 1, J ), 1, + $ WORK( 1+N ), 1 ) +* + ELSE +* +* 2-by-2 diagonal block +* + CALL DLALN2( .FALSE., 2, 1, SMIN, ONE, + $ T( J-1, J-1 ), LDT, ONE, ONE, + $ WORK( J-1+N ), N, WR, ZERO, X, 2, + $ SCALE, XNORM, IERR ) +* +* Scale X(1,1) and X(2,1) to avoid overflow when +* updating the right-hand side. +* + IF( XNORM.GT.ONE ) THEN + BETA = MAX( WORK( J-1 ), WORK( J ) ) + IF( BETA.GT.BIGNUM / XNORM ) THEN + X( 1, 1 ) = X( 1, 1 ) / XNORM + X( 2, 1 ) = X( 2, 1 ) / XNORM + SCALE = SCALE / XNORM + END IF + END IF +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) + $ CALL DSCAL( KI, SCALE, WORK( 1+N ), 1 ) + WORK( J-1+N ) = X( 1, 1 ) + WORK( J+N ) = X( 2, 1 ) +* +* Update right-hand side +* + CALL DAXPY( J-2, -X( 1, 1 ), T( 1, J-1 ), 1, + $ WORK( 1+N ), 1 ) + CALL DAXPY( J-2, -X( 2, 1 ), T( 1, J ), 1, + $ WORK( 1+N ), 1 ) + END IF + 60 CONTINUE +* +* Copy the vector x or Q*x to VR and normalize. +* + IF( .NOT.OVER ) THEN + CALL DCOPY( KI, WORK( 1+N ), 1, VR( 1, IS ), 1 ) +* + II = IDAMAX( KI, VR( 1, IS ), 1 ) + REMAX = ONE / ABS( VR( II, IS ) ) + CALL DSCAL( KI, REMAX, VR( 1, IS ), 1 ) +* + DO 70 K = KI + 1, N + VR( K, IS ) = ZERO + 70 CONTINUE + ELSE + IF( KI.GT.1 ) + $ CALL DGEMV( 'N', N, KI-1, ONE, VR, LDVR, + $ WORK( 1+N ), 1, WORK( KI+N ), + $ VR( 1, KI ), 1 ) +* + II = IDAMAX( N, VR( 1, KI ), 1 ) + REMAX = ONE / ABS( VR( II, KI ) ) + CALL DSCAL( N, REMAX, VR( 1, KI ), 1 ) + END IF +* + ELSE +* +* Complex right eigenvector. +* +* Initial solve +* [ (T(KI-1,KI-1) T(KI-1,KI) ) - (WR + I* WI)]*X = 0. +* [ (T(KI,KI-1) T(KI,KI) ) ] +* + IF( ABS( T( KI-1, KI ) ).GE.ABS( T( KI, KI-1 ) ) ) THEN + WORK( KI-1+N ) = ONE + WORK( KI+N2 ) = WI / T( KI-1, KI ) + ELSE + WORK( KI-1+N ) = -WI / T( KI, KI-1 ) + WORK( KI+N2 ) = ONE + END IF + WORK( KI+N ) = ZERO + WORK( KI-1+N2 ) = ZERO +* +* Form right-hand side +* + DO 80 K = 1, KI - 2 + WORK( K+N ) = -WORK( KI-1+N )*T( K, KI-1 ) + WORK( K+N2 ) = -WORK( KI+N2 )*T( K, KI ) + 80 CONTINUE +* +* Solve upper quasi-triangular system: +* (T(1:KI-2,1:KI-2) - (WR+i*WI))*X = SCALE*(WORK+i*WORK2) +* + JNXT = KI - 2 + DO 90 J = KI - 2, 1, -1 + IF( J.GT.JNXT ) + $ GO TO 90 + J1 = J + J2 = J + JNXT = J - 1 + IF( J.GT.1 ) THEN + IF( T( J, J-1 ).NE.ZERO ) THEN + J1 = J - 1 + JNXT = J - 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* 1-by-1 diagonal block +* + CALL DLALN2( .FALSE., 1, 2, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+N ), N, WR, WI, + $ X, 2, SCALE, XNORM, IERR ) +* +* Scale X(1,1) and X(1,2) to avoid overflow when +* updating the right-hand side. +* + IF( XNORM.GT.ONE ) THEN + IF( WORK( J ).GT.BIGNUM / XNORM ) THEN + X( 1, 1 ) = X( 1, 1 ) / XNORM + X( 1, 2 ) = X( 1, 2 ) / XNORM + SCALE = SCALE / XNORM + END IF + END IF +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) THEN + CALL DSCAL( KI, SCALE, WORK( 1+N ), 1 ) + CALL DSCAL( KI, SCALE, WORK( 1+N2 ), 1 ) + END IF + WORK( J+N ) = X( 1, 1 ) + WORK( J+N2 ) = X( 1, 2 ) +* +* Update the right-hand side +* + CALL DAXPY( J-1, -X( 1, 1 ), T( 1, J ), 1, + $ WORK( 1+N ), 1 ) + CALL DAXPY( J-1, -X( 1, 2 ), T( 1, J ), 1, + $ WORK( 1+N2 ), 1 ) +* + ELSE +* +* 2-by-2 diagonal block +* + CALL DLALN2( .FALSE., 2, 2, SMIN, ONE, + $ T( J-1, J-1 ), LDT, ONE, ONE, + $ WORK( J-1+N ), N, WR, WI, X, 2, SCALE, + $ XNORM, IERR ) +* +* Scale X to avoid overflow when updating +* the right-hand side. +* + IF( XNORM.GT.ONE ) THEN + BETA = MAX( WORK( J-1 ), WORK( J ) ) + IF( BETA.GT.BIGNUM / XNORM ) THEN + REC = ONE / XNORM + X( 1, 1 ) = X( 1, 1 )*REC + X( 1, 2 ) = X( 1, 2 )*REC + X( 2, 1 ) = X( 2, 1 )*REC + X( 2, 2 ) = X( 2, 2 )*REC + SCALE = SCALE*REC + END IF + END IF +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) THEN + CALL DSCAL( KI, SCALE, WORK( 1+N ), 1 ) + CALL DSCAL( KI, SCALE, WORK( 1+N2 ), 1 ) + END IF + WORK( J-1+N ) = X( 1, 1 ) + WORK( J+N ) = X( 2, 1 ) + WORK( J-1+N2 ) = X( 1, 2 ) + WORK( J+N2 ) = X( 2, 2 ) +* +* Update the right-hand side +* + CALL DAXPY( J-2, -X( 1, 1 ), T( 1, J-1 ), 1, + $ WORK( 1+N ), 1 ) + CALL DAXPY( J-2, -X( 2, 1 ), T( 1, J ), 1, + $ WORK( 1+N ), 1 ) + CALL DAXPY( J-2, -X( 1, 2 ), T( 1, J-1 ), 1, + $ WORK( 1+N2 ), 1 ) + CALL DAXPY( J-2, -X( 2, 2 ), T( 1, J ), 1, + $ WORK( 1+N2 ), 1 ) + END IF + 90 CONTINUE +* +* Copy the vector x or Q*x to VR and normalize. +* + IF( .NOT.OVER ) THEN + CALL DCOPY( KI, WORK( 1+N ), 1, VR( 1, IS-1 ), 1 ) + CALL DCOPY( KI, WORK( 1+N2 ), 1, VR( 1, IS ), 1 ) +* + EMAX = ZERO + DO 100 K = 1, KI + EMAX = MAX( EMAX, ABS( VR( K, IS-1 ) )+ + $ ABS( VR( K, IS ) ) ) + 100 CONTINUE +* + REMAX = ONE / EMAX + CALL DSCAL( KI, REMAX, VR( 1, IS-1 ), 1 ) + CALL DSCAL( KI, REMAX, VR( 1, IS ), 1 ) +* + DO 110 K = KI + 1, N + VR( K, IS-1 ) = ZERO + VR( K, IS ) = ZERO + 110 CONTINUE +* + ELSE +* + IF( KI.GT.2 ) THEN + CALL DGEMV( 'N', N, KI-2, ONE, VR, LDVR, + $ WORK( 1+N ), 1, WORK( KI-1+N ), + $ VR( 1, KI-1 ), 1 ) + CALL DGEMV( 'N', N, KI-2, ONE, VR, LDVR, + $ WORK( 1+N2 ), 1, WORK( KI+N2 ), + $ VR( 1, KI ), 1 ) + ELSE + CALL DSCAL( N, WORK( KI-1+N ), VR( 1, KI-1 ), 1 ) + CALL DSCAL( N, WORK( KI+N2 ), VR( 1, KI ), 1 ) + END IF +* + EMAX = ZERO + DO 120 K = 1, N + EMAX = MAX( EMAX, ABS( VR( K, KI-1 ) )+ + $ ABS( VR( K, KI ) ) ) + 120 CONTINUE + REMAX = ONE / EMAX + CALL DSCAL( N, REMAX, VR( 1, KI-1 ), 1 ) + CALL DSCAL( N, REMAX, VR( 1, KI ), 1 ) + END IF + END IF +* + IS = IS - 1 + IF( IP.NE.0 ) + $ IS = IS - 1 + 130 CONTINUE + IF( IP.EQ.1 ) + $ IP = 0 + IF( IP.EQ.-1 ) + $ IP = 1 + 140 CONTINUE + END IF +* + IF( LEFTV ) THEN +* +* Compute left eigenvectors. +* + IP = 0 + IS = 1 + DO 260 KI = 1, N +* + IF( IP.EQ.-1 ) + $ GO TO 250 + IF( KI.EQ.N ) + $ GO TO 150 + IF( T( KI+1, KI ).EQ.ZERO ) + $ GO TO 150 + IP = 1 +* + 150 CONTINUE + IF( SOMEV ) THEN + IF( .NOT.SELECT( KI ) ) + $ GO TO 250 + END IF +* +* Compute the KI-th eigenvalue (WR,WI). +* + WR = T( KI, KI ) + WI = ZERO + IF( IP.NE.0 ) + $ WI = SQRT( ABS( T( KI, KI+1 ) ) )* + $ SQRT( ABS( T( KI+1, KI ) ) ) + SMIN = MAX( ULP*( ABS( WR )+ABS( WI ) ), SMLNUM ) +* + IF( IP.EQ.0 ) THEN +* +* Real left eigenvector. +* + WORK( KI+N ) = ONE +* +* Form right-hand side +* + DO 160 K = KI + 1, N + WORK( K+N ) = -T( KI, K ) + 160 CONTINUE +* +* Solve the quasi-triangular system: +* (T(KI+1:N,KI+1:N) - WR)'*X = SCALE*WORK +* + VMAX = ONE + VCRIT = BIGNUM +* + JNXT = KI + 1 + DO 170 J = KI + 1, N + IF( J.LT.JNXT ) + $ GO TO 170 + J1 = J + J2 = J + JNXT = J + 1 + IF( J.LT.N ) THEN + IF( T( J+1, J ).NE.ZERO ) THEN + J2 = J + 1 + JNXT = J + 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* 1-by-1 diagonal block +* +* Scale if necessary to avoid overflow when forming +* the right-hand side. +* + IF( WORK( J ).GT.VCRIT ) THEN + REC = ONE / VMAX + CALL DSCAL( N-KI+1, REC, WORK( KI+N ), 1 ) + VMAX = ONE + VCRIT = BIGNUM + END IF +* + WORK( J+N ) = WORK( J+N ) - + $ DDOT( J-KI-1, T( KI+1, J ), 1, + $ WORK( KI+1+N ), 1 ) +* +* Solve (T(J,J)-WR)'*X = WORK +* + CALL DLALN2( .FALSE., 1, 1, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+N ), N, WR, + $ ZERO, X, 2, SCALE, XNORM, IERR ) +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) + $ CALL DSCAL( N-KI+1, SCALE, WORK( KI+N ), 1 ) + WORK( J+N ) = X( 1, 1 ) + VMAX = MAX( ABS( WORK( J+N ) ), VMAX ) + VCRIT = BIGNUM / VMAX +* + ELSE +* +* 2-by-2 diagonal block +* +* Scale if necessary to avoid overflow when forming +* the right-hand side. +* + BETA = MAX( WORK( J ), WORK( J+1 ) ) + IF( BETA.GT.VCRIT ) THEN + REC = ONE / VMAX + CALL DSCAL( N-KI+1, REC, WORK( KI+N ), 1 ) + VMAX = ONE + VCRIT = BIGNUM + END IF +* + WORK( J+N ) = WORK( J+N ) - + $ DDOT( J-KI-1, T( KI+1, J ), 1, + $ WORK( KI+1+N ), 1 ) +* + WORK( J+1+N ) = WORK( J+1+N ) - + $ DDOT( J-KI-1, T( KI+1, J+1 ), 1, + $ WORK( KI+1+N ), 1 ) +* +* Solve +* [T(J,J)-WR T(J,J+1) ]'* X = SCALE*( WORK1 ) +* [T(J+1,J) T(J+1,J+1)-WR] ( WORK2 ) +* + CALL DLALN2( .TRUE., 2, 1, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+N ), N, WR, + $ ZERO, X, 2, SCALE, XNORM, IERR ) +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) + $ CALL DSCAL( N-KI+1, SCALE, WORK( KI+N ), 1 ) + WORK( J+N ) = X( 1, 1 ) + WORK( J+1+N ) = X( 2, 1 ) +* + VMAX = MAX( ABS( WORK( J+N ) ), + $ ABS( WORK( J+1+N ) ), VMAX ) + VCRIT = BIGNUM / VMAX +* + END IF + 170 CONTINUE +* +* Copy the vector x or Q*x to VL and normalize. +* + IF( .NOT.OVER ) THEN + CALL DCOPY( N-KI+1, WORK( KI+N ), 1, VL( KI, IS ), 1 ) +* + II = IDAMAX( N-KI+1, VL( KI, IS ), 1 ) + KI - 1 + REMAX = ONE / ABS( VL( II, IS ) ) + CALL DSCAL( N-KI+1, REMAX, VL( KI, IS ), 1 ) +* + DO 180 K = 1, KI - 1 + VL( K, IS ) = ZERO + 180 CONTINUE +* + ELSE +* + IF( KI.LT.N ) + $ CALL DGEMV( 'N', N, N-KI, ONE, VL( 1, KI+1 ), LDVL, + $ WORK( KI+1+N ), 1, WORK( KI+N ), + $ VL( 1, KI ), 1 ) +* + II = IDAMAX( N, VL( 1, KI ), 1 ) + REMAX = ONE / ABS( VL( II, KI ) ) + CALL DSCAL( N, REMAX, VL( 1, KI ), 1 ) +* + END IF +* + ELSE +* +* Complex left eigenvector. +* +* Initial solve: +* ((T(KI,KI) T(KI,KI+1) )' - (WR - I* WI))*X = 0. +* ((T(KI+1,KI) T(KI+1,KI+1)) ) +* + IF( ABS( T( KI, KI+1 ) ).GE.ABS( T( KI+1, KI ) ) ) THEN + WORK( KI+N ) = WI / T( KI, KI+1 ) + WORK( KI+1+N2 ) = ONE + ELSE + WORK( KI+N ) = ONE + WORK( KI+1+N2 ) = -WI / T( KI+1, KI ) + END IF + WORK( KI+1+N ) = ZERO + WORK( KI+N2 ) = ZERO +* +* Form right-hand side +* + DO 190 K = KI + 2, N + WORK( K+N ) = -WORK( KI+N )*T( KI, K ) + WORK( K+N2 ) = -WORK( KI+1+N2 )*T( KI+1, K ) + 190 CONTINUE +* +* Solve complex quasi-triangular system: +* ( T(KI+2,N:KI+2,N) - (WR-i*WI) )*X = WORK1+i*WORK2 +* + VMAX = ONE + VCRIT = BIGNUM +* + JNXT = KI + 2 + DO 200 J = KI + 2, N + IF( J.LT.JNXT ) + $ GO TO 200 + J1 = J + J2 = J + JNXT = J + 1 + IF( J.LT.N ) THEN + IF( T( J+1, J ).NE.ZERO ) THEN + J2 = J + 1 + JNXT = J + 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* 1-by-1 diagonal block +* +* Scale if necessary to avoid overflow when +* forming the right-hand side elements. +* + IF( WORK( J ).GT.VCRIT ) THEN + REC = ONE / VMAX + CALL DSCAL( N-KI+1, REC, WORK( KI+N ), 1 ) + CALL DSCAL( N-KI+1, REC, WORK( KI+N2 ), 1 ) + VMAX = ONE + VCRIT = BIGNUM + END IF +* + WORK( J+N ) = WORK( J+N ) - + $ DDOT( J-KI-2, T( KI+2, J ), 1, + $ WORK( KI+2+N ), 1 ) + WORK( J+N2 ) = WORK( J+N2 ) - + $ DDOT( J-KI-2, T( KI+2, J ), 1, + $ WORK( KI+2+N2 ), 1 ) +* +* Solve (T(J,J)-(WR-i*WI))*(X11+i*X12)= WK+I*WK2 +* + CALL DLALN2( .FALSE., 1, 2, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+N ), N, WR, + $ -WI, X, 2, SCALE, XNORM, IERR ) +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) THEN + CALL DSCAL( N-KI+1, SCALE, WORK( KI+N ), 1 ) + CALL DSCAL( N-KI+1, SCALE, WORK( KI+N2 ), 1 ) + END IF + WORK( J+N ) = X( 1, 1 ) + WORK( J+N2 ) = X( 1, 2 ) + VMAX = MAX( ABS( WORK( J+N ) ), + $ ABS( WORK( J+N2 ) ), VMAX ) + VCRIT = BIGNUM / VMAX +* + ELSE +* +* 2-by-2 diagonal block +* +* Scale if necessary to avoid overflow when forming +* the right-hand side elements. +* + BETA = MAX( WORK( J ), WORK( J+1 ) ) + IF( BETA.GT.VCRIT ) THEN + REC = ONE / VMAX + CALL DSCAL( N-KI+1, REC, WORK( KI+N ), 1 ) + CALL DSCAL( N-KI+1, REC, WORK( KI+N2 ), 1 ) + VMAX = ONE + VCRIT = BIGNUM + END IF +* + WORK( J+N ) = WORK( J+N ) - + $ DDOT( J-KI-2, T( KI+2, J ), 1, + $ WORK( KI+2+N ), 1 ) +* + WORK( J+N2 ) = WORK( J+N2 ) - + $ DDOT( J-KI-2, T( KI+2, J ), 1, + $ WORK( KI+2+N2 ), 1 ) +* + WORK( J+1+N ) = WORK( J+1+N ) - + $ DDOT( J-KI-2, T( KI+2, J+1 ), 1, + $ WORK( KI+2+N ), 1 ) +* + WORK( J+1+N2 ) = WORK( J+1+N2 ) - + $ DDOT( J-KI-2, T( KI+2, J+1 ), 1, + $ WORK( KI+2+N2 ), 1 ) +* +* Solve 2-by-2 complex linear equation +* ([T(j,j) T(j,j+1) ]'-(wr-i*wi)*I)*X = SCALE*B +* ([T(j+1,j) T(j+1,j+1)] ) +* + CALL DLALN2( .TRUE., 2, 2, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+N ), N, WR, + $ -WI, X, 2, SCALE, XNORM, IERR ) +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) THEN + CALL DSCAL( N-KI+1, SCALE, WORK( KI+N ), 1 ) + CALL DSCAL( N-KI+1, SCALE, WORK( KI+N2 ), 1 ) + END IF + WORK( J+N ) = X( 1, 1 ) + WORK( J+N2 ) = X( 1, 2 ) + WORK( J+1+N ) = X( 2, 1 ) + WORK( J+1+N2 ) = X( 2, 2 ) + VMAX = MAX( ABS( X( 1, 1 ) ), ABS( X( 1, 2 ) ), + $ ABS( X( 2, 1 ) ), ABS( X( 2, 2 ) ), VMAX ) + VCRIT = BIGNUM / VMAX +* + END IF + 200 CONTINUE +* +* Copy the vector x or Q*x to VL and normalize. +* + 210 CONTINUE + IF( .NOT.OVER ) THEN + CALL DCOPY( N-KI+1, WORK( KI+N ), 1, VL( KI, IS ), 1 ) + CALL DCOPY( N-KI+1, WORK( KI+N2 ), 1, VL( KI, IS+1 ), + $ 1 ) +* + EMAX = ZERO + DO 220 K = KI, N + EMAX = MAX( EMAX, ABS( VL( K, IS ) )+ + $ ABS( VL( K, IS+1 ) ) ) + 220 CONTINUE + REMAX = ONE / EMAX + CALL DSCAL( N-KI+1, REMAX, VL( KI, IS ), 1 ) + CALL DSCAL( N-KI+1, REMAX, VL( KI, IS+1 ), 1 ) +* + DO 230 K = 1, KI - 1 + VL( K, IS ) = ZERO + VL( K, IS+1 ) = ZERO + 230 CONTINUE + ELSE + IF( KI.LT.N-1 ) THEN + CALL DGEMV( 'N', N, N-KI-1, ONE, VL( 1, KI+2 ), + $ LDVL, WORK( KI+2+N ), 1, WORK( KI+N ), + $ VL( 1, KI ), 1 ) + CALL DGEMV( 'N', N, N-KI-1, ONE, VL( 1, KI+2 ), + $ LDVL, WORK( KI+2+N2 ), 1, + $ WORK( KI+1+N2 ), VL( 1, KI+1 ), 1 ) + ELSE + CALL DSCAL( N, WORK( KI+N ), VL( 1, KI ), 1 ) + CALL DSCAL( N, WORK( KI+1+N2 ), VL( 1, KI+1 ), 1 ) + END IF +* + EMAX = ZERO + DO 240 K = 1, N + EMAX = MAX( EMAX, ABS( VL( K, KI ) )+ + $ ABS( VL( K, KI+1 ) ) ) + 240 CONTINUE + REMAX = ONE / EMAX + CALL DSCAL( N, REMAX, VL( 1, KI ), 1 ) + CALL DSCAL( N, REMAX, VL( 1, KI+1 ), 1 ) +* + END IF +* + END IF +* + IS = IS + 1 + IF( IP.NE.0 ) + $ IS = IS + 1 + 250 CONTINUE + IF( IP.EQ.-1 ) + $ IP = 0 + IF( IP.EQ.1 ) + $ IP = -1 +* + 260 CONTINUE +* + END IF +* + RETURN +* +* End of DTREVC +* + END diff --git a/costa/native/external/lapack/dtrexc.f b/costa/native/external/lapack/dtrexc.f new file mode 100644 index 000000000..aadadf095 --- /dev/null +++ b/costa/native/external/lapack/dtrexc.f @@ -0,0 +1,346 @@ + SUBROUTINE DTREXC( COMPQ, N, T, LDT, Q, LDQ, IFST, ILST, WORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER COMPQ + INTEGER IFST, ILST, INFO, LDQ, LDT, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION Q( LDQ, * ), T( LDT, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DTREXC reorders the real Schur factorization of a real matrix +* A = Q*T*Q**T, so that the diagonal block of T with row index IFST is +* moved to row ILST. +* +* The real Schur form T is reordered by an orthogonal similarity +* transformation Z**T*T*Z, and optionally the matrix Q of Schur vectors +* is updated by postmultiplying it with Z. +* +* T must be in Schur canonical form (as returned by DHSEQR), that is, +* block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each +* 2-by-2 diagonal block has its diagonal elements equal and its +* off-diagonal elements of opposite sign. +* +* Arguments +* ========= +* +* COMPQ (input) CHARACTER*1 +* = 'V': update the matrix Q of Schur vectors; +* = 'N': do not update Q. +* +* N (input) INTEGER +* The order of the matrix T. N >= 0. +* +* T (input/output) DOUBLE PRECISION array, dimension (LDT,N) +* On entry, the upper quasi-triangular matrix T, in Schur +* Schur canonical form. +* On exit, the reordered upper quasi-triangular matrix, again +* in Schur canonical form. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= max(1,N). +* +* Q (input/output) DOUBLE PRECISION array, dimension (LDQ,N) +* On entry, if COMPQ = 'V', the matrix Q of Schur vectors. +* On exit, if COMPQ = 'V', Q has been postmultiplied by the +* orthogonal transformation matrix Z which reorders T. +* If COMPQ = 'N', Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= max(1,N). +* +* IFST (input/output) INTEGER +* ILST (input/output) INTEGER +* Specify the reordering of the diagonal blocks of T. +* The block with row index IFST is moved to row ILST, by a +* sequence of transpositions between adjacent blocks. +* On exit, if IFST pointed on entry to the second row of a +* 2-by-2 block, it is changed to point to the first row; ILST +* always points to the first row of the block in its final +* position (which may differ from its input value by +1 or -1). +* 1 <= IFST <= N; 1 <= ILST <= N. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* = 1: two adjacent blocks were too close to swap (the problem +* is very ill-conditioned); T may have been partially +* reordered, and ILST points to the first row of the +* current position of the block being moved. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL WANTQ + INTEGER HERE, NBF, NBL, NBNEXT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLAEXC, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Decode and test the input arguments. +* + INFO = 0 + WANTQ = LSAME( COMPQ, 'V' ) + IF( .NOT.WANTQ .AND. .NOT.LSAME( COMPQ, 'N' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.MAX( 1, N ) ) ) THEN + INFO = -6 + ELSE IF( IFST.LT.1 .OR. IFST.GT.N ) THEN + INFO = -7 + ELSE IF( ILST.LT.1 .OR. ILST.GT.N ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTREXC', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* +* Determine the first row of specified block +* and find out it is 1 by 1 or 2 by 2. +* + IF( IFST.GT.1 ) THEN + IF( T( IFST, IFST-1 ).NE.ZERO ) + $ IFST = IFST - 1 + END IF + NBF = 1 + IF( IFST.LT.N ) THEN + IF( T( IFST+1, IFST ).NE.ZERO ) + $ NBF = 2 + END IF +* +* Determine the first row of the final block +* and find out it is 1 by 1 or 2 by 2. +* + IF( ILST.GT.1 ) THEN + IF( T( ILST, ILST-1 ).NE.ZERO ) + $ ILST = ILST - 1 + END IF + NBL = 1 + IF( ILST.LT.N ) THEN + IF( T( ILST+1, ILST ).NE.ZERO ) + $ NBL = 2 + END IF +* + IF( IFST.EQ.ILST ) + $ RETURN +* + IF( IFST.LT.ILST ) THEN +* +* Update ILST +* + IF( NBF.EQ.2 .AND. NBL.EQ.1 ) + $ ILST = ILST - 1 + IF( NBF.EQ.1 .AND. NBL.EQ.2 ) + $ ILST = ILST + 1 +* + HERE = IFST +* + 10 CONTINUE +* +* Swap block with next one below +* + IF( NBF.EQ.1 .OR. NBF.EQ.2 ) THEN +* +* Current block either 1 by 1 or 2 by 2 +* + NBNEXT = 1 + IF( HERE+NBF+1.LE.N ) THEN + IF( T( HERE+NBF+1, HERE+NBF ).NE.ZERO ) + $ NBNEXT = 2 + END IF + CALL DLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, NBF, NBNEXT, + $ WORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE + NBNEXT +* +* Test if 2 by 2 block breaks into two 1 by 1 blocks +* + IF( NBF.EQ.2 ) THEN + IF( T( HERE+1, HERE ).EQ.ZERO ) + $ NBF = 3 + END IF +* + ELSE +* +* Current block consists of two 1 by 1 blocks each of which +* must be swapped individually +* + NBNEXT = 1 + IF( HERE+3.LE.N ) THEN + IF( T( HERE+3, HERE+2 ).NE.ZERO ) + $ NBNEXT = 2 + END IF + CALL DLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE+1, 1, NBNEXT, + $ WORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + IF( NBNEXT.EQ.1 ) THEN +* +* Swap two 1 by 1 blocks, no problems possible +* + CALL DLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, 1, NBNEXT, + $ WORK, INFO ) + HERE = HERE + 1 + ELSE +* +* Recompute NBNEXT in case 2 by 2 split +* + IF( T( HERE+2, HERE+1 ).EQ.ZERO ) + $ NBNEXT = 1 + IF( NBNEXT.EQ.2 ) THEN +* +* 2 by 2 Block did not split +* + CALL DLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, 1, + $ NBNEXT, WORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE + 2 + ELSE +* +* 2 by 2 Block did split +* + CALL DLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, 1, 1, + $ WORK, INFO ) + CALL DLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE+1, 1, 1, + $ WORK, INFO ) + HERE = HERE + 2 + END IF + END IF + END IF + IF( HERE.LT.ILST ) + $ GO TO 10 +* + ELSE +* + HERE = IFST + 20 CONTINUE +* +* Swap block with next one above +* + IF( NBF.EQ.1 .OR. NBF.EQ.2 ) THEN +* +* Current block either 1 by 1 or 2 by 2 +* + NBNEXT = 1 + IF( HERE.GE.3 ) THEN + IF( T( HERE-1, HERE-2 ).NE.ZERO ) + $ NBNEXT = 2 + END IF + CALL DLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE-NBNEXT, NBNEXT, + $ NBF, WORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE - NBNEXT +* +* Test if 2 by 2 block breaks into two 1 by 1 blocks +* + IF( NBF.EQ.2 ) THEN + IF( T( HERE+1, HERE ).EQ.ZERO ) + $ NBF = 3 + END IF +* + ELSE +* +* Current block consists of two 1 by 1 blocks each of which +* must be swapped individually +* + NBNEXT = 1 + IF( HERE.GE.3 ) THEN + IF( T( HERE-1, HERE-2 ).NE.ZERO ) + $ NBNEXT = 2 + END IF + CALL DLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE-NBNEXT, NBNEXT, + $ 1, WORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + IF( NBNEXT.EQ.1 ) THEN +* +* Swap two 1 by 1 blocks, no problems possible +* + CALL DLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, NBNEXT, 1, + $ WORK, INFO ) + HERE = HERE - 1 + ELSE +* +* Recompute NBNEXT in case 2 by 2 split +* + IF( T( HERE, HERE-1 ).EQ.ZERO ) + $ NBNEXT = 1 + IF( NBNEXT.EQ.2 ) THEN +* +* 2 by 2 Block did not split +* + CALL DLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE-1, 2, 1, + $ WORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE - 2 + ELSE +* +* 2 by 2 Block did split +* + CALL DLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, 1, 1, + $ WORK, INFO ) + CALL DLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE-1, 1, 1, + $ WORK, INFO ) + HERE = HERE - 2 + END IF + END IF + END IF + IF( HERE.GT.ILST ) + $ GO TO 20 + END IF + ILST = HERE +* + RETURN +* +* End of DTREXC +* + END diff --git a/costa/native/external/lapack/dtrrfs.f b/costa/native/external/lapack/dtrrfs.f new file mode 100644 index 000000000..909298471 --- /dev/null +++ b/costa/native/external/lapack/dtrrfs.f @@ -0,0 +1,371 @@ + SUBROUTINE DTRRFS( UPLO, TRANS, DIAG, N, NRHS, A, LDA, B, LDB, X, + $ LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, LDA, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), BERR( * ), FERR( * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* DTRRFS provides error bounds and backward error estimates for the +* solution to a system of linear equations with a triangular +* coefficient matrix. +* +* The solution matrix X must be computed by DTRTRS or some other +* means before entering this routine. DTRRFS does not do iterative +* refinement because doing so cannot improve the backward error. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose = Transpose) +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input) DOUBLE PRECISION array, dimension (LDA,N) +* The triangular matrix A. If UPLO = 'U', the leading N-by-N +* upper triangular part of the array A contains the upper +* triangular matrix, and the strictly lower triangular part of +* A is not referenced. If UPLO = 'L', the leading N-by-N lower +* triangular part of the array A contains the lower triangular +* matrix, and the strictly upper triangular part of A is not +* referenced. If DIAG = 'U', the diagonal elements of A are +* also not referenced and are assumed to be 1. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input) DOUBLE PRECISION array, dimension (LDX,NRHS) +* The solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + CHARACTER TRANST + INTEGER I, J, K, KASE, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DLACON, DTRMV, DTRSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTRRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 250 J = 1, NRHS +* +* Compute residual R = B - op(A) * X, +* where op(A) = A or A', depending on TRANS. +* + CALL DCOPY( N, X( 1, J ), 1, WORK( N+1 ), 1 ) + CALL DTRMV( UPLO, TRANS, DIAG, N, A, LDA, WORK( N+1 ), 1 ) + CALL DAXPY( N, -ONE, B( 1, J ), 1, WORK( N+1 ), 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 20 I = 1, N + WORK( I ) = ABS( B( I, J ) ) + 20 CONTINUE +* + IF( NOTRAN ) THEN +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + IF( NOUNIT ) THEN + DO 40 K = 1, N + XK = ABS( X( K, J ) ) + DO 30 I = 1, K + WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK + 30 CONTINUE + 40 CONTINUE + ELSE + DO 60 K = 1, N + XK = ABS( X( K, J ) ) + DO 50 I = 1, K - 1 + WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK + 50 CONTINUE + WORK( K ) = WORK( K ) + XK + 60 CONTINUE + END IF + ELSE + IF( NOUNIT ) THEN + DO 80 K = 1, N + XK = ABS( X( K, J ) ) + DO 70 I = K, N + WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK + 70 CONTINUE + 80 CONTINUE + ELSE + DO 100 K = 1, N + XK = ABS( X( K, J ) ) + DO 90 I = K + 1, N + WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK + 90 CONTINUE + WORK( K ) = WORK( K ) + XK + 100 CONTINUE + END IF + END IF + ELSE +* +* Compute abs(A')*abs(X) + abs(B). +* + IF( UPPER ) THEN + IF( NOUNIT ) THEN + DO 120 K = 1, N + S = ZERO + DO 110 I = 1, K + S = S + ABS( A( I, K ) )*ABS( X( I, J ) ) + 110 CONTINUE + WORK( K ) = WORK( K ) + S + 120 CONTINUE + ELSE + DO 140 K = 1, N + S = ABS( X( K, J ) ) + DO 130 I = 1, K - 1 + S = S + ABS( A( I, K ) )*ABS( X( I, J ) ) + 130 CONTINUE + WORK( K ) = WORK( K ) + S + 140 CONTINUE + END IF + ELSE + IF( NOUNIT ) THEN + DO 160 K = 1, N + S = ZERO + DO 150 I = K, N + S = S + ABS( A( I, K ) )*ABS( X( I, J ) ) + 150 CONTINUE + WORK( K ) = WORK( K ) + S + 160 CONTINUE + ELSE + DO 180 K = 1, N + S = ABS( X( K, J ) ) + DO 170 I = K + 1, N + S = S + ABS( A( I, K ) )*ABS( X( I, J ) ) + 170 CONTINUE + WORK( K ) = WORK( K ) + S + 180 CONTINUE + END IF + END IF + END IF + S = ZERO + DO 190 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 190 CONTINUE + BERR( J ) = S +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use DLACON to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 200 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 200 CONTINUE +* + KASE = 0 + 210 CONTINUE + CALL DLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)'). +* + CALL DTRSV( UPLO, TRANST, DIAG, N, A, LDA, WORK( N+1 ), + $ 1 ) + DO 220 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 220 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 230 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 230 CONTINUE + CALL DTRSV( UPLO, TRANS, DIAG, N, A, LDA, WORK( N+1 ), + $ 1 ) + END IF + GO TO 210 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 240 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 240 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 250 CONTINUE +* + RETURN +* +* End of DTRRFS +* + END diff --git a/costa/native/external/lapack/dtrsen.f b/costa/native/external/lapack/dtrsen.f new file mode 100644 index 000000000..65b7f765d --- /dev/null +++ b/costa/native/external/lapack/dtrsen.f @@ -0,0 +1,457 @@ + SUBROUTINE DTRSEN( JOB, COMPQ, SELECT, N, T, LDT, Q, LDQ, WR, WI, + $ M, S, SEP, WORK, LWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER COMPQ, JOB + INTEGER INFO, LDQ, LDT, LIWORK, LWORK, M, N + DOUBLE PRECISION S, SEP +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + INTEGER IWORK( * ) + DOUBLE PRECISION Q( LDQ, * ), T( LDT, * ), WI( * ), WORK( * ), + $ WR( * ) +* .. +* +* Purpose +* ======= +* +* DTRSEN reorders the real Schur factorization of a real matrix +* A = Q*T*Q**T, so that a selected cluster of eigenvalues appears in +* the leading diagonal blocks of the upper quasi-triangular matrix T, +* and the leading columns of Q form an orthonormal basis of the +* corresponding right invariant subspace. +* +* Optionally the routine computes the reciprocal condition numbers of +* the cluster of eigenvalues and/or the invariant subspace. +* +* T must be in Schur canonical form (as returned by DHSEQR), that is, +* block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each +* 2-by-2 diagonal block has its diagonal elemnts equal and its +* off-diagonal elements of opposite sign. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* Specifies whether condition numbers are required for the +* cluster of eigenvalues (S) or the invariant subspace (SEP): +* = 'N': none; +* = 'E': for eigenvalues only (S); +* = 'V': for invariant subspace only (SEP); +* = 'B': for both eigenvalues and invariant subspace (S and +* SEP). +* +* COMPQ (input) CHARACTER*1 +* = 'V': update the matrix Q of Schur vectors; +* = 'N': do not update Q. +* +* SELECT (input) LOGICAL array, dimension (N) +* SELECT specifies the eigenvalues in the selected cluster. To +* select a real eigenvalue w(j), SELECT(j) must be set to +* .TRUE.. To select a complex conjugate pair of eigenvalues +* w(j) and w(j+1), corresponding to a 2-by-2 diagonal block, +* either SELECT(j) or SELECT(j+1) or both must be set to +* .TRUE.; a complex conjugate pair of eigenvalues must be +* either both included in the cluster or both excluded. +* +* N (input) INTEGER +* The order of the matrix T. N >= 0. +* +* T (input/output) DOUBLE PRECISION array, dimension (LDT,N) +* On entry, the upper quasi-triangular matrix T, in Schur +* canonical form. +* On exit, T is overwritten by the reordered matrix T, again in +* Schur canonical form, with the selected eigenvalues in the +* leading diagonal blocks. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= max(1,N). +* +* Q (input/output) DOUBLE PRECISION array, dimension (LDQ,N) +* On entry, if COMPQ = 'V', the matrix Q of Schur vectors. +* On exit, if COMPQ = 'V', Q has been postmultiplied by the +* orthogonal transformation matrix which reorders T; the +* leading M columns of Q form an orthonormal basis for the +* specified invariant subspace. +* If COMPQ = 'N', Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. +* LDQ >= 1; and if COMPQ = 'V', LDQ >= N. +* +* WR (output) DOUBLE PRECISION array, dimension (N) +* WI (output) DOUBLE PRECISION array, dimension (N) +* The real and imaginary parts, respectively, of the reordered +* eigenvalues of T. The eigenvalues are stored in the same +* order as on the diagonal of T, with WR(i) = T(i,i) and, if +* T(i:i+1,i:i+1) is a 2-by-2 diagonal block, WI(i) > 0 and +* WI(i+1) = -WI(i). Note that if a complex eigenvalue is +* sufficiently ill-conditioned, then its value may differ +* significantly from its value before reordering. +* +* M (output) INTEGER +* The dimension of the specified invariant subspace. +* 0 < = M <= N. +* +* S (output) DOUBLE PRECISION +* If JOB = 'E' or 'B', S is a lower bound on the reciprocal +* condition number for the selected cluster of eigenvalues. +* S cannot underestimate the true reciprocal condition number +* by more than a factor of sqrt(N). If M = 0 or N, S = 1. +* If JOB = 'N' or 'V', S is not referenced. +* +* SEP (output) DOUBLE PRECISION +* If JOB = 'V' or 'B', SEP is the estimated reciprocal +* condition number of the specified invariant subspace. If +* M = 0 or N, SEP = norm(T). +* If JOB = 'N' or 'E', SEP is not referenced. +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If JOB = 'N', LWORK >= max(1,N); +* if JOB = 'E', LWORK >= M*(N-M); +* if JOB = 'V' or 'B', LWORK >= 2*M*(N-M). +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace) INTEGER array, dimension (LIWORK) +* IF JOB = 'N' or 'E', IWORK is not referenced. +* +* LIWORK (input) INTEGER +* The dimension of the array IWORK. +* If JOB = 'N' or 'E', LIWORK >= 1; +* if JOB = 'V' or 'B', LIWORK >= M*(N-M). +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* = 1: reordering of T failed because some eigenvalues are too +* close to separate (the problem is very ill-conditioned); +* T may have been partially reordered, and WR and WI +* contain the eigenvalues in the same order as in T; S and +* SEP (if requested) are set to zero. +* +* Further Details +* =============== +* +* DTRSEN first collects the selected eigenvalues by computing an +* orthogonal transformation Z to move them to the top left corner of T. +* In other words, the selected eigenvalues are the eigenvalues of T11 +* in: +* +* Z'*T*Z = ( T11 T12 ) n1 +* ( 0 T22 ) n2 +* n1 n2 +* +* where N = n1+n2 and Z' means the transpose of Z. The first n1 columns +* of Z span the specified invariant subspace of T. +* +* If T has been obtained from the real Schur factorization of a matrix +* A = Q*T*Q', then the reordered real Schur factorization of A is given +* by A = (Q*Z)*(Z'*T*Z)*(Q*Z)', and the first n1 columns of Q*Z span +* the corresponding invariant subspace of A. +* +* The reciprocal condition number of the average of the eigenvalues of +* T11 may be returned in S. S lies between 0 (very badly conditioned) +* and 1 (very well conditioned). It is computed as follows. First we +* compute R so that +* +* P = ( I R ) n1 +* ( 0 0 ) n2 +* n1 n2 +* +* is the projector on the invariant subspace associated with T11. +* R is the solution of the Sylvester equation: +* +* T11*R - R*T22 = T12. +* +* Let F-norm(M) denote the Frobenius-norm of M and 2-norm(M) denote +* the two-norm of M. Then S is computed as the lower bound +* +* (1 + F-norm(R)**2)**(-1/2) +* +* on the reciprocal of 2-norm(P), the true reciprocal condition number. +* S cannot underestimate 1 / 2-norm(P) by more than a factor of +* sqrt(N). +* +* An approximate error bound for the computed average of the +* eigenvalues of T11 is +* +* EPS * norm(T) / S +* +* where EPS is the machine precision. +* +* The reciprocal condition number of the right invariant subspace +* spanned by the first n1 columns of Z (or of Q*Z) is returned in SEP. +* SEP is defined as the separation of T11 and T22: +* +* sep( T11, T22 ) = sigma-min( C ) +* +* where sigma-min(C) is the smallest singular value of the +* n1*n2-by-n1*n2 matrix +* +* C = kprod( I(n2), T11 ) - kprod( transpose(T22), I(n1) ) +* +* I(m) is an m by m identity matrix, and kprod denotes the Kronecker +* product. We estimate sigma-min(C) by the reciprocal of an estimate of +* the 1-norm of inverse(C). The true reciprocal 1-norm of inverse(C) +* cannot differ from sigma-min(C) by more than a factor of sqrt(n1*n2). +* +* When SEP is small, small changes in T can cause large changes in +* the invariant subspace. An approximate bound on the maximum angular +* error in the computed right invariant subspace is +* +* EPS * norm(T) / SEP +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, PAIR, SWAP, WANTBH, WANTQ, WANTS, + $ WANTSP + INTEGER IERR, K, KASE, KK, KS, LIWMIN, LWMIN, N1, N2, + $ NN + DOUBLE PRECISION EST, RNORM, SCALE +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLANGE + EXTERNAL LSAME, DLANGE +* .. +* .. External Subroutines .. + EXTERNAL DLACON, DLACPY, DTREXC, DTRSYL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + WANTBH = LSAME( JOB, 'B' ) + WANTS = LSAME( JOB, 'E' ) .OR. WANTBH + WANTSP = LSAME( JOB, 'V' ) .OR. WANTBH + WANTQ = LSAME( COMPQ, 'V' ) +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.WANTS .AND. .NOT.WANTSP ) + $ THEN + INFO = -1 + ELSE IF( .NOT.LSAME( COMPQ, 'N' ) .AND. .NOT.WANTQ ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -8 + ELSE +* +* Set M to the dimension of the specified invariant subspace, +* and test LWORK and LIWORK. +* + M = 0 + PAIR = .FALSE. + DO 10 K = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + ELSE + IF( K.LT.N ) THEN + IF( T( K+1, K ).EQ.ZERO ) THEN + IF( SELECT( K ) ) + $ M = M + 1 + ELSE + PAIR = .TRUE. + IF( SELECT( K ) .OR. SELECT( K+1 ) ) + $ M = M + 2 + END IF + ELSE + IF( SELECT( N ) ) + $ M = M + 1 + END IF + END IF + 10 CONTINUE +* + N1 = M + N2 = N - M + NN = N1*N2 +* + IF( WANTSP ) THEN + LWMIN = MAX( 1, 2*NN ) + LIWMIN = MAX( 1, NN ) + ELSE IF( LSAME( JOB, 'N' ) ) THEN + LWMIN = MAX( 1, N ) + LIWMIN = 1 + ELSE IF( LSAME( JOB, 'E' ) ) THEN + LWMIN = MAX( 1, NN ) + LIWMIN = 1 + END IF +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -15 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -17 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTRSEN', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible. +* + IF( M.EQ.N .OR. M.EQ.0 ) THEN + IF( WANTS ) + $ S = ONE + IF( WANTSP ) + $ SEP = DLANGE( '1', N, N, T, LDT, WORK ) + GO TO 40 + END IF +* +* Collect the selected blocks at the top-left corner of T. +* + KS = 0 + PAIR = .FALSE. + DO 20 K = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + ELSE + SWAP = SELECT( K ) + IF( K.LT.N ) THEN + IF( T( K+1, K ).NE.ZERO ) THEN + PAIR = .TRUE. + SWAP = SWAP .OR. SELECT( K+1 ) + END IF + END IF + IF( SWAP ) THEN + KS = KS + 1 +* +* Swap the K-th block to position KS. +* + IERR = 0 + KK = K + IF( K.NE.KS ) + $ CALL DTREXC( COMPQ, N, T, LDT, Q, LDQ, KK, KS, WORK, + $ IERR ) + IF( IERR.EQ.1 .OR. IERR.EQ.2 ) THEN +* +* Blocks too close to swap: exit. +* + INFO = 1 + IF( WANTS ) + $ S = ZERO + IF( WANTSP ) + $ SEP = ZERO + GO TO 40 + END IF + IF( PAIR ) + $ KS = KS + 1 + END IF + END IF + 20 CONTINUE +* + IF( WANTS ) THEN +* +* Solve Sylvester equation for R: +* +* T11*R - R*T22 = scale*T12 +* + CALL DLACPY( 'F', N1, N2, T( 1, N1+1 ), LDT, WORK, N1 ) + CALL DTRSYL( 'N', 'N', -1, N1, N2, T, LDT, T( N1+1, N1+1 ), + $ LDT, WORK, N1, SCALE, IERR ) +* +* Estimate the reciprocal of the condition number of the cluster +* of eigenvalues. +* + RNORM = DLANGE( 'F', N1, N2, WORK, N1, WORK ) + IF( RNORM.EQ.ZERO ) THEN + S = ONE + ELSE + S = SCALE / ( SQRT( SCALE*SCALE / RNORM+RNORM )* + $ SQRT( RNORM ) ) + END IF + END IF +* + IF( WANTSP ) THEN +* +* Estimate sep(T11,T22). +* + EST = ZERO + KASE = 0 + 30 CONTINUE + CALL DLACON( NN, WORK( NN+1 ), WORK, IWORK, EST, KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Solve T11*R - R*T22 = scale*X. +* + CALL DTRSYL( 'N', 'N', -1, N1, N2, T, LDT, + $ T( N1+1, N1+1 ), LDT, WORK, N1, SCALE, + $ IERR ) + ELSE +* +* Solve T11'*R - R*T22' = scale*X. +* + CALL DTRSYL( 'T', 'T', -1, N1, N2, T, LDT, + $ T( N1+1, N1+1 ), LDT, WORK, N1, SCALE, + $ IERR ) + END IF + GO TO 30 + END IF +* + SEP = SCALE / EST + END IF +* + 40 CONTINUE +* +* Store the output eigenvalues in WR and WI. +* + DO 50 K = 1, N + WR( K ) = T( K, K ) + WI( K ) = ZERO + 50 CONTINUE + DO 60 K = 1, N - 1 + IF( T( K+1, K ).NE.ZERO ) THEN + WI( K ) = SQRT( ABS( T( K, K+1 ) ) )* + $ SQRT( ABS( T( K+1, K ) ) ) + WI( K+1 ) = -WI( K ) + END IF + 60 CONTINUE +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of DTRSEN +* + END diff --git a/costa/native/external/lapack/dtrsna.f b/costa/native/external/lapack/dtrsna.f new file mode 100644 index 000000000..11b4ced40 --- /dev/null +++ b/costa/native/external/lapack/dtrsna.f @@ -0,0 +1,493 @@ + SUBROUTINE DTRSNA( JOB, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR, + $ LDVR, S, SEP, MM, M, WORK, LDWORK, IWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER HOWMNY, JOB + INTEGER INFO, LDT, LDVL, LDVR, LDWORK, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + INTEGER IWORK( * ) + DOUBLE PRECISION S( * ), SEP( * ), T( LDT, * ), VL( LDVL, * ), + $ VR( LDVR, * ), WORK( LDWORK, * ) +* .. +* +* Purpose +* ======= +* +* DTRSNA estimates reciprocal condition numbers for specified +* eigenvalues and/or right eigenvectors of a real upper +* quasi-triangular matrix T (or of any matrix Q*T*Q**T with Q +* orthogonal). +* +* T must be in Schur canonical form (as returned by DHSEQR), that is, +* block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each +* 2-by-2 diagonal block has its diagonal elements equal and its +* off-diagonal elements of opposite sign. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* Specifies whether condition numbers are required for +* eigenvalues (S) or eigenvectors (SEP): +* = 'E': for eigenvalues only (S); +* = 'V': for eigenvectors only (SEP); +* = 'B': for both eigenvalues and eigenvectors (S and SEP). +* +* HOWMNY (input) CHARACTER*1 +* = 'A': compute condition numbers for all eigenpairs; +* = 'S': compute condition numbers for selected eigenpairs +* specified by the array SELECT. +* +* SELECT (input) LOGICAL array, dimension (N) +* If HOWMNY = 'S', SELECT specifies the eigenpairs for which +* condition numbers are required. To select condition numbers +* for the eigenpair corresponding to a real eigenvalue w(j), +* SELECT(j) must be set to .TRUE.. To select condition numbers +* corresponding to a complex conjugate pair of eigenvalues w(j) +* and w(j+1), either SELECT(j) or SELECT(j+1) or both, must be +* set to .TRUE.. +* If HOWMNY = 'A', SELECT is not referenced. +* +* N (input) INTEGER +* The order of the matrix T. N >= 0. +* +* T (input) DOUBLE PRECISION array, dimension (LDT,N) +* The upper quasi-triangular matrix T, in Schur canonical form. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= max(1,N). +* +* VL (input) DOUBLE PRECISION array, dimension (LDVL,M) +* If JOB = 'E' or 'B', VL must contain left eigenvectors of T +* (or of any Q*T*Q**T with Q orthogonal), corresponding to the +* eigenpairs specified by HOWMNY and SELECT. The eigenvectors +* must be stored in consecutive columns of VL, as returned by +* DHSEIN or DTREVC. +* If JOB = 'V', VL is not referenced. +* +* LDVL (input) INTEGER +* The leading dimension of the array VL. +* LDVL >= 1; and if JOB = 'E' or 'B', LDVL >= N. +* +* VR (input) DOUBLE PRECISION array, dimension (LDVR,M) +* If JOB = 'E' or 'B', VR must contain right eigenvectors of T +* (or of any Q*T*Q**T with Q orthogonal), corresponding to the +* eigenpairs specified by HOWMNY and SELECT. The eigenvectors +* must be stored in consecutive columns of VR, as returned by +* DHSEIN or DTREVC. +* If JOB = 'V', VR is not referenced. +* +* LDVR (input) INTEGER +* The leading dimension of the array VR. +* LDVR >= 1; and if JOB = 'E' or 'B', LDVR >= N. +* +* S (output) DOUBLE PRECISION array, dimension (MM) +* If JOB = 'E' or 'B', the reciprocal condition numbers of the +* selected eigenvalues, stored in consecutive elements of the +* array. For a complex conjugate pair of eigenvalues two +* consecutive elements of S are set to the same value. Thus +* S(j), SEP(j), and the j-th columns of VL and VR all +* correspond to the same eigenpair (but not in general the +* j-th eigenpair, unless all eigenpairs are selected). +* If JOB = 'V', S is not referenced. +* +* SEP (output) DOUBLE PRECISION array, dimension (MM) +* If JOB = 'V' or 'B', the estimated reciprocal condition +* numbers of the selected eigenvectors, stored in consecutive +* elements of the array. For a complex eigenvector two +* consecutive elements of SEP are set to the same value. If +* the eigenvalues cannot be reordered to compute SEP(j), SEP(j) +* is set to 0; this can only occur when the true value would be +* very small anyway. +* If JOB = 'E', SEP is not referenced. +* +* MM (input) INTEGER +* The number of elements in the arrays S (if JOB = 'E' or 'B') +* and/or SEP (if JOB = 'V' or 'B'). MM >= M. +* +* M (output) INTEGER +* The number of elements of the arrays S and/or SEP actually +* used to store the estimated condition numbers. +* If HOWMNY = 'A', M is set to N. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (LDWORK,N+1) +* If JOB = 'E', WORK is not referenced. +* +* LDWORK (input) INTEGER +* The leading dimension of the array WORK. +* LDWORK >= 1; and if JOB = 'V' or 'B', LDWORK >= N. +* +* IWORK (workspace) INTEGER array, dimension (N) +* If JOB = 'E', IWORK is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The reciprocal of the condition number of an eigenvalue lambda is +* defined as +* +* S(lambda) = |v'*u| / (norm(u)*norm(v)) +* +* where u and v are the right and left eigenvectors of T corresponding +* to lambda; v' denotes the conjugate-transpose of v, and norm(u) +* denotes the Euclidean norm. These reciprocal condition numbers always +* lie between zero (very badly conditioned) and one (very well +* conditioned). If n = 1, S(lambda) is defined to be 1. +* +* An approximate error bound for a computed eigenvalue W(i) is given by +* +* EPS * norm(T) / S(i) +* +* where EPS is the machine precision. +* +* The reciprocal of the condition number of the right eigenvector u +* corresponding to lambda is defined as follows. Suppose +* +* T = ( lambda c ) +* ( 0 T22 ) +* +* Then the reciprocal condition number is +* +* SEP( lambda, T22 ) = sigma-min( T22 - lambda*I ) +* +* where sigma-min denotes the smallest singular value. We approximate +* the smallest singular value by the reciprocal of an estimate of the +* one-norm of the inverse of T22 - lambda*I. If n = 1, SEP(1) is +* defined to be abs(T(1,1)). +* +* An approximate error bound for a computed right eigenvector VR(i) +* is given by +* +* EPS * norm(T) / SEP(i) +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL PAIR, SOMCON, WANTBH, WANTS, WANTSP + INTEGER I, IERR, IFST, ILST, J, K, KASE, KS, N2, NN + DOUBLE PRECISION BIGNUM, COND, CS, DELTA, DUMM, EPS, EST, LNRM, + $ MU, PROD, PROD1, PROD2, RNRM, SCALE, SMLNUM, SN +* .. +* .. Local Arrays .. + DOUBLE PRECISION DUMMY( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DDOT, DLAMCH, DLAPY2, DNRM2 + EXTERNAL LSAME, DDOT, DLAMCH, DLAPY2, DNRM2 +* .. +* .. External Subroutines .. + EXTERNAL DLACON, DLACPY, DLAQTR, DTREXC, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + WANTBH = LSAME( JOB, 'B' ) + WANTS = LSAME( JOB, 'E' ) .OR. WANTBH + WANTSP = LSAME( JOB, 'V' ) .OR. WANTBH +* + SOMCON = LSAME( HOWMNY, 'S' ) +* + INFO = 0 + IF( .NOT.WANTS .AND. .NOT.WANTSP ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( HOWMNY, 'A' ) .AND. .NOT.SOMCON ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDVL.LT.1 .OR. ( WANTS .AND. LDVL.LT.N ) ) THEN + INFO = -8 + ELSE IF( LDVR.LT.1 .OR. ( WANTS .AND. LDVR.LT.N ) ) THEN + INFO = -10 + ELSE +* +* Set M to the number of eigenpairs for which condition numbers +* are required, and test MM. +* + IF( SOMCON ) THEN + M = 0 + PAIR = .FALSE. + DO 10 K = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + ELSE + IF( K.LT.N ) THEN + IF( T( K+1, K ).EQ.ZERO ) THEN + IF( SELECT( K ) ) + $ M = M + 1 + ELSE + PAIR = .TRUE. + IF( SELECT( K ) .OR. SELECT( K+1 ) ) + $ M = M + 2 + END IF + ELSE + IF( SELECT( N ) ) + $ M = M + 1 + END IF + END IF + 10 CONTINUE + ELSE + M = N + END IF +* + IF( MM.LT.M ) THEN + INFO = -13 + ELSE IF( LDWORK.LT.1 .OR. ( WANTSP .AND. LDWORK.LT.N ) ) THEN + INFO = -16 + END IF + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTRSNA', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( SOMCON ) THEN + IF( .NOT.SELECT( 1 ) ) + $ RETURN + END IF + IF( WANTS ) + $ S( 1 ) = ONE + IF( WANTSP ) + $ SEP( 1 ) = ABS( T( 1, 1 ) ) + RETURN + END IF +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) / EPS + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* + KS = 0 + PAIR = .FALSE. + DO 60 K = 1, N +* +* Determine whether T(k,k) begins a 1-by-1 or 2-by-2 block. +* + IF( PAIR ) THEN + PAIR = .FALSE. + GO TO 60 + ELSE + IF( K.LT.N ) + $ PAIR = T( K+1, K ).NE.ZERO + END IF +* +* Determine whether condition numbers are required for the k-th +* eigenpair. +* + IF( SOMCON ) THEN + IF( PAIR ) THEN + IF( .NOT.SELECT( K ) .AND. .NOT.SELECT( K+1 ) ) + $ GO TO 60 + ELSE + IF( .NOT.SELECT( K ) ) + $ GO TO 60 + END IF + END IF +* + KS = KS + 1 +* + IF( WANTS ) THEN +* +* Compute the reciprocal condition number of the k-th +* eigenvalue. +* + IF( .NOT.PAIR ) THEN +* +* Real eigenvalue. +* + PROD = DDOT( N, VR( 1, KS ), 1, VL( 1, KS ), 1 ) + RNRM = DNRM2( N, VR( 1, KS ), 1 ) + LNRM = DNRM2( N, VL( 1, KS ), 1 ) + S( KS ) = ABS( PROD ) / ( RNRM*LNRM ) + ELSE +* +* Complex eigenvalue. +* + PROD1 = DDOT( N, VR( 1, KS ), 1, VL( 1, KS ), 1 ) + PROD1 = PROD1 + DDOT( N, VR( 1, KS+1 ), 1, VL( 1, KS+1 ), + $ 1 ) + PROD2 = DDOT( N, VL( 1, KS ), 1, VR( 1, KS+1 ), 1 ) + PROD2 = PROD2 - DDOT( N, VL( 1, KS+1 ), 1, VR( 1, KS ), + $ 1 ) + RNRM = DLAPY2( DNRM2( N, VR( 1, KS ), 1 ), + $ DNRM2( N, VR( 1, KS+1 ), 1 ) ) + LNRM = DLAPY2( DNRM2( N, VL( 1, KS ), 1 ), + $ DNRM2( N, VL( 1, KS+1 ), 1 ) ) + COND = DLAPY2( PROD1, PROD2 ) / ( RNRM*LNRM ) + S( KS ) = COND + S( KS+1 ) = COND + END IF + END IF +* + IF( WANTSP ) THEN +* +* Estimate the reciprocal condition number of the k-th +* eigenvector. +* +* Copy the matrix T to the array WORK and swap the diagonal +* block beginning at T(k,k) to the (1,1) position. +* + CALL DLACPY( 'Full', N, N, T, LDT, WORK, LDWORK ) + IFST = K + ILST = 1 + CALL DTREXC( 'No Q', N, WORK, LDWORK, DUMMY, 1, IFST, ILST, + $ WORK( 1, N+1 ), IERR ) +* + IF( IERR.EQ.1 .OR. IERR.EQ.2 ) THEN +* +* Could not swap because blocks not well separated +* + SCALE = ONE + EST = BIGNUM + ELSE +* +* Reordering successful +* + IF( WORK( 2, 1 ).EQ.ZERO ) THEN +* +* Form C = T22 - lambda*I in WORK(2:N,2:N). +* + DO 20 I = 2, N + WORK( I, I ) = WORK( I, I ) - WORK( 1, 1 ) + 20 CONTINUE + N2 = 1 + NN = N - 1 + ELSE +* +* Triangularize the 2 by 2 block by unitary +* transformation U = [ cs i*ss ] +* [ i*ss cs ]. +* such that the (1,1) position of WORK is complex +* eigenvalue lambda with positive imaginary part. (2,2) +* position of WORK is the complex eigenvalue lambda +* with negative imaginary part. +* + MU = SQRT( ABS( WORK( 1, 2 ) ) )* + $ SQRT( ABS( WORK( 2, 1 ) ) ) + DELTA = DLAPY2( MU, WORK( 2, 1 ) ) + CS = MU / DELTA + SN = -WORK( 2, 1 ) / DELTA +* +* Form +* +* C' = WORK(2:N,2:N) + i*[rwork(1) ..... rwork(n-1) ] +* [ mu ] +* [ .. ] +* [ .. ] +* [ mu ] +* where C' is conjugate transpose of complex matrix C, +* and RWORK is stored starting in the N+1-st column of +* WORK. +* + DO 30 J = 3, N + WORK( 2, J ) = CS*WORK( 2, J ) + WORK( J, J ) = WORK( J, J ) - WORK( 1, 1 ) + 30 CONTINUE + WORK( 2, 2 ) = ZERO +* + WORK( 1, N+1 ) = TWO*MU + DO 40 I = 2, N - 1 + WORK( I, N+1 ) = SN*WORK( 1, I+1 ) + 40 CONTINUE + N2 = 2 + NN = 2*( N-1 ) + END IF +* +* Estimate norm(inv(C')) +* + EST = ZERO + KASE = 0 + 50 CONTINUE + CALL DLACON( NN, WORK( 1, N+2 ), WORK( 1, N+4 ), IWORK, + $ EST, KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN + IF( N2.EQ.1 ) THEN +* +* Real eigenvalue: solve C'*x = scale*c. +* + CALL DLAQTR( .TRUE., .TRUE., N-1, WORK( 2, 2 ), + $ LDWORK, DUMMY, DUMM, SCALE, + $ WORK( 1, N+4 ), WORK( 1, N+6 ), + $ IERR ) + ELSE +* +* Complex eigenvalue: solve +* C'*(p+iq) = scale*(c+id) in real arithmetic. +* + CALL DLAQTR( .TRUE., .FALSE., N-1, WORK( 2, 2 ), + $ LDWORK, WORK( 1, N+1 ), MU, SCALE, + $ WORK( 1, N+4 ), WORK( 1, N+6 ), + $ IERR ) + END IF + ELSE + IF( N2.EQ.1 ) THEN +* +* Real eigenvalue: solve C*x = scale*c. +* + CALL DLAQTR( .FALSE., .TRUE., N-1, WORK( 2, 2 ), + $ LDWORK, DUMMY, DUMM, SCALE, + $ WORK( 1, N+4 ), WORK( 1, N+6 ), + $ IERR ) + ELSE +* +* Complex eigenvalue: solve +* C*(p+iq) = scale*(c+id) in real arithmetic. +* + CALL DLAQTR( .FALSE., .FALSE., N-1, + $ WORK( 2, 2 ), LDWORK, + $ WORK( 1, N+1 ), MU, SCALE, + $ WORK( 1, N+4 ), WORK( 1, N+6 ), + $ IERR ) +* + END IF + END IF +* + GO TO 50 + END IF + END IF +* + SEP( KS ) = SCALE / MAX( EST, SMLNUM ) + IF( PAIR ) + $ SEP( KS+1 ) = SEP( KS ) + END IF +* + IF( PAIR ) + $ KS = KS + 1 +* + 60 CONTINUE + RETURN +* +* End of DTRSNA +* + END diff --git a/costa/native/external/lapack/dtrsyl.f b/costa/native/external/lapack/dtrsyl.f new file mode 100644 index 000000000..5da5c3c17 --- /dev/null +++ b/costa/native/external/lapack/dtrsyl.f @@ -0,0 +1,914 @@ + SUBROUTINE DTRSYL( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C, + $ LDC, SCALE, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER TRANA, TRANB + INTEGER INFO, ISGN, LDA, LDB, LDC, M, N + DOUBLE PRECISION SCALE +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* DTRSYL solves the real Sylvester matrix equation: +* +* op(A)*X + X*op(B) = scale*C or +* op(A)*X - X*op(B) = scale*C, +* +* where op(A) = A or A**T, and A and B are both upper quasi- +* triangular. A is M-by-M and B is N-by-N; the right hand side C and +* the solution X are M-by-N; and scale is an output scale factor, set +* <= 1 to avoid overflow in X. +* +* A and B must be in Schur canonical form (as returned by DHSEQR), that +* is, block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; +* each 2-by-2 diagonal block has its diagonal elements equal and its +* off-diagonal elements of opposite sign. +* +* Arguments +* ========= +* +* TRANA (input) CHARACTER*1 +* Specifies the option op(A): +* = 'N': op(A) = A (No transpose) +* = 'T': op(A) = A**T (Transpose) +* = 'C': op(A) = A**H (Conjugate transpose = Transpose) +* +* TRANB (input) CHARACTER*1 +* Specifies the option op(B): +* = 'N': op(B) = B (No transpose) +* = 'T': op(B) = B**T (Transpose) +* = 'C': op(B) = B**H (Conjugate transpose = Transpose) +* +* ISGN (input) INTEGER +* Specifies the sign in the equation: +* = +1: solve op(A)*X + X*op(B) = scale*C +* = -1: solve op(A)*X - X*op(B) = scale*C +* +* M (input) INTEGER +* The order of the matrix A, and the number of rows in the +* matrices X and C. M >= 0. +* +* N (input) INTEGER +* The order of the matrix B, and the number of columns in the +* matrices X and C. N >= 0. +* +* A (input) DOUBLE PRECISION array, dimension (LDA,M) +* The upper quasi-triangular matrix A, in Schur canonical form. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input) DOUBLE PRECISION array, dimension (LDB,N) +* The upper quasi-triangular matrix B, in Schur canonical form. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) +* On entry, the M-by-N right hand side matrix C. +* On exit, C is overwritten by the solution matrix X. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M) +* +* SCALE (output) DOUBLE PRECISION +* The scale factor, scale, set <= 1 to avoid overflow in X. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* = 1: A and B have common or very close eigenvalues; perturbed +* values were used to solve the equation (but the matrices +* A and B are unchanged). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRNA, NOTRNB + INTEGER IERR, J, K, K1, K2, KNEXT, L, L1, L2, LNEXT + DOUBLE PRECISION A11, BIGNUM, DA11, DB, EPS, SCALOC, SGN, SMIN, + $ SMLNUM, SUML, SUMR, XNORM +* .. +* .. Local Arrays .. + DOUBLE PRECISION DUM( 1 ), VEC( 2, 2 ), X( 2, 2 ) +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DDOT, DLAMCH, DLANGE + EXTERNAL LSAME, DDOT, DLAMCH, DLANGE +* .. +* .. External Subroutines .. + EXTERNAL DLALN2, DLASY2, DSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, MAX, MIN +* .. +* .. Executable Statements .. +* +* Decode and Test input parameters +* + NOTRNA = LSAME( TRANA, 'N' ) + NOTRNB = LSAME( TRANB, 'N' ) +* + INFO = 0 + IF( .NOT.NOTRNA .AND. .NOT.LSAME( TRANA, 'T' ) .AND. .NOT. + $ LSAME( TRANA, 'C' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRNB .AND. .NOT.LSAME( TRANB, 'T' ) .AND. .NOT. + $ LSAME( TRANB, 'C' ) ) THEN + INFO = -2 + ELSE IF( ISGN.NE.1 .AND. ISGN.NE.-1 ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTRSYL', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Set constants to control overflow +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SMLNUM*DBLE( M*N ) / EPS + BIGNUM = ONE / SMLNUM +* + SMIN = MAX( SMLNUM, EPS*DLANGE( 'M', M, M, A, LDA, DUM ), + $ EPS*DLANGE( 'M', N, N, B, LDB, DUM ) ) +* + SCALE = ONE + SGN = ISGN +* + IF( NOTRNA .AND. NOTRNB ) THEN +* +* Solve A*X + ISGN*X*B = scale*C. +* +* The (K,L)th block of X is determined starting from +* bottom-left corner column by column by +* +* A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) +* +* Where +* M L-1 +* R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(J,L)]. +* I=K+1 J=1 +* +* Start column loop (index = L) +* L1 (L2) : column index of the first (first) row of X(K,L). +* + LNEXT = 1 + DO 60 L = 1, N + IF( L.LT.LNEXT ) + $ GO TO 60 + IF( L.EQ.N ) THEN + L1 = L + L2 = L + ELSE + IF( B( L+1, L ).NE.ZERO ) THEN + L1 = L + L2 = L + 1 + LNEXT = L + 2 + ELSE + L1 = L + L2 = L + LNEXT = L + 1 + END IF + END IF +* +* Start row loop (index = K) +* K1 (K2): row index of the first (last) row of X(K,L). +* + KNEXT = M + DO 50 K = M, 1, -1 + IF( K.GT.KNEXT ) + $ GO TO 50 + IF( K.EQ.1 ) THEN + K1 = K + K2 = K + ELSE + IF( A( K, K-1 ).NE.ZERO ) THEN + K1 = K - 1 + K2 = K + KNEXT = K - 2 + ELSE + K1 = K + K2 = K + KNEXT = K - 1 + END IF + END IF +* + IF( L1.EQ.L2 .AND. K1.EQ.K2 ) THEN + SUML = DDOT( M-K1, A( K1, MIN( K1+1, M ) ), LDA, + $ C( MIN( K1+1, M ), L1 ), 1 ) + SUMR = DDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) + SCALOC = ONE +* + A11 = A( K1, K1 ) + SGN*B( L1, L1 ) + DA11 = ABS( A11 ) + IF( DA11.LE.SMIN ) THEN + A11 = SMIN + DA11 = SMIN + INFO = 1 + END IF + DB = ABS( VEC( 1, 1 ) ) + IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN + IF( DB.GT.BIGNUM*DA11 ) + $ SCALOC = ONE / DB + END IF + X( 1, 1 ) = ( VEC( 1, 1 )*SCALOC ) / A11 +* + IF( SCALOC.NE.ONE ) THEN + DO 10 J = 1, N + CALL DSCAL( M, SCALOC, C( 1, J ), 1 ) + 10 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) +* + ELSE IF( L1.EQ.L2 .AND. K1.NE.K2 ) THEN +* + SUML = DDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L1 ), 1 ) + SUMR = DDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = DDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L1 ), 1 ) + SUMR = DDOT( L1-1, C( K2, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR ) +* + CALL DLALN2( .FALSE., 2, 1, SMIN, ONE, A( K1, K1 ), + $ LDA, ONE, ONE, VEC, 2, -SGN*B( L1, L1 ), + $ ZERO, X, 2, SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 20 J = 1, N + CALL DSCAL( M, SCALOC, C( 1, J ), 1 ) + 20 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K2, L1 ) = X( 2, 1 ) +* + ELSE IF( L1.NE.L2 .AND. K1.EQ.K2 ) THEN +* + SUML = DDOT( M-K1, A( K1, MIN( K1+1, M ) ), LDA, + $ C( MIN( K1+1, M ), L1 ), 1 ) + SUMR = DDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 1, 1 ) = SGN*( C( K1, L1 )-( SUML+SGN*SUMR ) ) +* + SUML = DDOT( M-K1, A( K1, MIN( K1+1, M ) ), LDA, + $ C( MIN( K1+1, M ), L2 ), 1 ) + SUMR = DDOT( L1-1, C( K1, 1 ), LDC, B( 1, L2 ), 1 ) + VEC( 2, 1 ) = SGN*( C( K1, L2 )-( SUML+SGN*SUMR ) ) +* + CALL DLALN2( .TRUE., 2, 1, SMIN, ONE, B( L1, L1 ), + $ LDB, ONE, ONE, VEC, 2, -SGN*A( K1, K1 ), + $ ZERO, X, 2, SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 30 J = 1, N + CALL DSCAL( M, SCALOC, C( 1, J ), 1 ) + 30 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K1, L2 ) = X( 2, 1 ) +* + ELSE IF( L1.NE.L2 .AND. K1.NE.K2 ) THEN +* + SUML = DDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L1 ), 1 ) + SUMR = DDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = DDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L2 ), 1 ) + SUMR = DDOT( L1-1, C( K1, 1 ), LDC, B( 1, L2 ), 1 ) + VEC( 1, 2 ) = C( K1, L2 ) - ( SUML+SGN*SUMR ) +* + SUML = DDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L1 ), 1 ) + SUMR = DDOT( L1-1, C( K2, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = DDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L2 ), 1 ) + SUMR = DDOT( L1-1, C( K2, 1 ), LDC, B( 1, L2 ), 1 ) + VEC( 2, 2 ) = C( K2, L2 ) - ( SUML+SGN*SUMR ) +* + CALL DLASY2( .FALSE., .FALSE., ISGN, 2, 2, + $ A( K1, K1 ), LDA, B( L1, L1 ), LDB, VEC, + $ 2, SCALOC, X, 2, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 40 J = 1, N + CALL DSCAL( M, SCALOC, C( 1, J ), 1 ) + 40 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K1, L2 ) = X( 1, 2 ) + C( K2, L1 ) = X( 2, 1 ) + C( K2, L2 ) = X( 2, 2 ) + END IF +* + 50 CONTINUE +* + 60 CONTINUE +* + ELSE IF( .NOT.NOTRNA .AND. NOTRNB ) THEN +* +* Solve A' *X + ISGN*X*B = scale*C. +* +* The (K,L)th block of X is determined starting from +* upper-left corner column by column by +* +* A(K,K)'*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) +* +* Where +* K-1 L-1 +* R(K,L) = SUM [A(I,K)'*X(I,L)] +ISGN*SUM [X(K,J)*B(J,L)] +* I=1 J=1 +* +* Start column loop (index = L) +* L1 (L2): column index of the first (last) row of X(K,L) +* + LNEXT = 1 + DO 120 L = 1, N + IF( L.LT.LNEXT ) + $ GO TO 120 + IF( L.EQ.N ) THEN + L1 = L + L2 = L + ELSE + IF( B( L+1, L ).NE.ZERO ) THEN + L1 = L + L2 = L + 1 + LNEXT = L + 2 + ELSE + L1 = L + L2 = L + LNEXT = L + 1 + END IF + END IF +* +* Start row loop (index = K) +* K1 (K2): row index of the first (last) row of X(K,L) +* + KNEXT = 1 + DO 110 K = 1, M + IF( K.LT.KNEXT ) + $ GO TO 110 + IF( K.EQ.M ) THEN + K1 = K + K2 = K + ELSE + IF( A( K+1, K ).NE.ZERO ) THEN + K1 = K + K2 = K + 1 + KNEXT = K + 2 + ELSE + K1 = K + K2 = K + KNEXT = K + 1 + END IF + END IF +* + IF( L1.EQ.L2 .AND. K1.EQ.K2 ) THEN + SUML = DDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 ) + SUMR = DDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) + SCALOC = ONE +* + A11 = A( K1, K1 ) + SGN*B( L1, L1 ) + DA11 = ABS( A11 ) + IF( DA11.LE.SMIN ) THEN + A11 = SMIN + DA11 = SMIN + INFO = 1 + END IF + DB = ABS( VEC( 1, 1 ) ) + IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN + IF( DB.GT.BIGNUM*DA11 ) + $ SCALOC = ONE / DB + END IF + X( 1, 1 ) = ( VEC( 1, 1 )*SCALOC ) / A11 +* + IF( SCALOC.NE.ONE ) THEN + DO 70 J = 1, N + CALL DSCAL( M, SCALOC, C( 1, J ), 1 ) + 70 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) +* + ELSE IF( L1.EQ.L2 .AND. K1.NE.K2 ) THEN +* + SUML = DDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 ) + SUMR = DDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = DDOT( K1-1, A( 1, K2 ), 1, C( 1, L1 ), 1 ) + SUMR = DDOT( L1-1, C( K2, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR ) +* + CALL DLALN2( .TRUE., 2, 1, SMIN, ONE, A( K1, K1 ), + $ LDA, ONE, ONE, VEC, 2, -SGN*B( L1, L1 ), + $ ZERO, X, 2, SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 80 J = 1, N + CALL DSCAL( M, SCALOC, C( 1, J ), 1 ) + 80 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K2, L1 ) = X( 2, 1 ) +* + ELSE IF( L1.NE.L2 .AND. K1.EQ.K2 ) THEN +* + SUML = DDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 ) + SUMR = DDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 1, 1 ) = SGN*( C( K1, L1 )-( SUML+SGN*SUMR ) ) +* + SUML = DDOT( K1-1, A( 1, K1 ), 1, C( 1, L2 ), 1 ) + SUMR = DDOT( L1-1, C( K1, 1 ), LDC, B( 1, L2 ), 1 ) + VEC( 2, 1 ) = SGN*( C( K1, L2 )-( SUML+SGN*SUMR ) ) +* + CALL DLALN2( .TRUE., 2, 1, SMIN, ONE, B( L1, L1 ), + $ LDB, ONE, ONE, VEC, 2, -SGN*A( K1, K1 ), + $ ZERO, X, 2, SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 90 J = 1, N + CALL DSCAL( M, SCALOC, C( 1, J ), 1 ) + 90 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K1, L2 ) = X( 2, 1 ) +* + ELSE IF( L1.NE.L2 .AND. K1.NE.K2 ) THEN +* + SUML = DDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 ) + SUMR = DDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = DDOT( K1-1, A( 1, K1 ), 1, C( 1, L2 ), 1 ) + SUMR = DDOT( L1-1, C( K1, 1 ), LDC, B( 1, L2 ), 1 ) + VEC( 1, 2 ) = C( K1, L2 ) - ( SUML+SGN*SUMR ) +* + SUML = DDOT( K1-1, A( 1, K2 ), 1, C( 1, L1 ), 1 ) + SUMR = DDOT( L1-1, C( K2, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = DDOT( K1-1, A( 1, K2 ), 1, C( 1, L2 ), 1 ) + SUMR = DDOT( L1-1, C( K2, 1 ), LDC, B( 1, L2 ), 1 ) + VEC( 2, 2 ) = C( K2, L2 ) - ( SUML+SGN*SUMR ) +* + CALL DLASY2( .TRUE., .FALSE., ISGN, 2, 2, A( K1, K1 ), + $ LDA, B( L1, L1 ), LDB, VEC, 2, SCALOC, X, + $ 2, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 100 J = 1, N + CALL DSCAL( M, SCALOC, C( 1, J ), 1 ) + 100 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K1, L2 ) = X( 1, 2 ) + C( K2, L1 ) = X( 2, 1 ) + C( K2, L2 ) = X( 2, 2 ) + END IF +* + 110 CONTINUE + 120 CONTINUE +* + ELSE IF( .NOT.NOTRNA .AND. .NOT.NOTRNB ) THEN +* +* Solve A'*X + ISGN*X*B' = scale*C. +* +* The (K,L)th block of X is determined starting from +* top-right corner column by column by +* +* A(K,K)'*X(K,L) + ISGN*X(K,L)*B(L,L)' = C(K,L) - R(K,L) +* +* Where +* K-1 N +* R(K,L) = SUM [A(I,K)'*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)']. +* I=1 J=L+1 +* +* Start column loop (index = L) +* L1 (L2): column index of the first (last) row of X(K,L) +* + LNEXT = N + DO 180 L = N, 1, -1 + IF( L.GT.LNEXT ) + $ GO TO 180 + IF( L.EQ.1 ) THEN + L1 = L + L2 = L + ELSE + IF( B( L, L-1 ).NE.ZERO ) THEN + L1 = L - 1 + L2 = L + LNEXT = L - 2 + ELSE + L1 = L + L2 = L + LNEXT = L - 1 + END IF + END IF +* +* Start row loop (index = K) +* K1 (K2): row index of the first (last) row of X(K,L) +* + KNEXT = 1 + DO 170 K = 1, M + IF( K.LT.KNEXT ) + $ GO TO 170 + IF( K.EQ.M ) THEN + K1 = K + K2 = K + ELSE + IF( A( K+1, K ).NE.ZERO ) THEN + K1 = K + K2 = K + 1 + KNEXT = K + 2 + ELSE + K1 = K + K2 = K + KNEXT = K + 1 + END IF + END IF +* + IF( L1.EQ.L2 .AND. K1.EQ.K2 ) THEN + SUML = DDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 ) + SUMR = DDOT( N-L1, C( K1, MIN( L1+1, N ) ), LDC, + $ B( L1, MIN( L1+1, N ) ), LDB ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) + SCALOC = ONE +* + A11 = A( K1, K1 ) + SGN*B( L1, L1 ) + DA11 = ABS( A11 ) + IF( DA11.LE.SMIN ) THEN + A11 = SMIN + DA11 = SMIN + INFO = 1 + END IF + DB = ABS( VEC( 1, 1 ) ) + IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN + IF( DB.GT.BIGNUM*DA11 ) + $ SCALOC = ONE / DB + END IF + X( 1, 1 ) = ( VEC( 1, 1 )*SCALOC ) / A11 +* + IF( SCALOC.NE.ONE ) THEN + DO 130 J = 1, N + CALL DSCAL( M, SCALOC, C( 1, J ), 1 ) + 130 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) +* + ELSE IF( L1.EQ.L2 .AND. K1.NE.K2 ) THEN +* + SUML = DDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 ) + SUMR = DDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = DDOT( K1-1, A( 1, K2 ), 1, C( 1, L1 ), 1 ) + SUMR = DDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR ) +* + CALL DLALN2( .TRUE., 2, 1, SMIN, ONE, A( K1, K1 ), + $ LDA, ONE, ONE, VEC, 2, -SGN*B( L1, L1 ), + $ ZERO, X, 2, SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 140 J = 1, N + CALL DSCAL( M, SCALOC, C( 1, J ), 1 ) + 140 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K2, L1 ) = X( 2, 1 ) +* + ELSE IF( L1.NE.L2 .AND. K1.EQ.K2 ) THEN +* + SUML = DDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 ) + SUMR = DDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 1, 1 ) = SGN*( C( K1, L1 )-( SUML+SGN*SUMR ) ) +* + SUML = DDOT( K1-1, A( 1, K1 ), 1, C( 1, L2 ), 1 ) + SUMR = DDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L2, MIN( L2+1, N ) ), LDB ) + VEC( 2, 1 ) = SGN*( C( K1, L2 )-( SUML+SGN*SUMR ) ) +* + CALL DLALN2( .FALSE., 2, 1, SMIN, ONE, B( L1, L1 ), + $ LDB, ONE, ONE, VEC, 2, -SGN*A( K1, K1 ), + $ ZERO, X, 2, SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 150 J = 1, N + CALL DSCAL( M, SCALOC, C( 1, J ), 1 ) + 150 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K1, L2 ) = X( 2, 1 ) +* + ELSE IF( L1.NE.L2 .AND. K1.NE.K2 ) THEN +* + SUML = DDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 ) + SUMR = DDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = DDOT( K1-1, A( 1, K1 ), 1, C( 1, L2 ), 1 ) + SUMR = DDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L2, MIN( L2+1, N ) ), LDB ) + VEC( 1, 2 ) = C( K1, L2 ) - ( SUML+SGN*SUMR ) +* + SUML = DDOT( K1-1, A( 1, K2 ), 1, C( 1, L1 ), 1 ) + SUMR = DDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = DDOT( K1-1, A( 1, K2 ), 1, C( 1, L2 ), 1 ) + SUMR = DDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC, + $ B( L2, MIN( L2+1, N ) ), LDB ) + VEC( 2, 2 ) = C( K2, L2 ) - ( SUML+SGN*SUMR ) +* + CALL DLASY2( .TRUE., .TRUE., ISGN, 2, 2, A( K1, K1 ), + $ LDA, B( L1, L1 ), LDB, VEC, 2, SCALOC, X, + $ 2, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 160 J = 1, N + CALL DSCAL( M, SCALOC, C( 1, J ), 1 ) + 160 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K1, L2 ) = X( 1, 2 ) + C( K2, L1 ) = X( 2, 1 ) + C( K2, L2 ) = X( 2, 2 ) + END IF +* + 170 CONTINUE + 180 CONTINUE +* + ELSE IF( NOTRNA .AND. .NOT.NOTRNB ) THEN +* +* Solve A*X + ISGN*X*B' = scale*C. +* +* The (K,L)th block of X is determined starting from +* bottom-right corner column by column by +* +* A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L)' = C(K,L) - R(K,L) +* +* Where +* M N +* R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)']. +* I=K+1 J=L+1 +* +* Start column loop (index = L) +* L1 (L2): column index of the first (last) row of X(K,L) +* + LNEXT = N + DO 240 L = N, 1, -1 + IF( L.GT.LNEXT ) + $ GO TO 240 + IF( L.EQ.1 ) THEN + L1 = L + L2 = L + ELSE + IF( B( L, L-1 ).NE.ZERO ) THEN + L1 = L - 1 + L2 = L + LNEXT = L - 2 + ELSE + L1 = L + L2 = L + LNEXT = L - 1 + END IF + END IF +* +* Start row loop (index = K) +* K1 (K2): row index of the first (last) row of X(K,L) +* + KNEXT = M + DO 230 K = M, 1, -1 + IF( K.GT.KNEXT ) + $ GO TO 230 + IF( K.EQ.1 ) THEN + K1 = K + K2 = K + ELSE + IF( A( K, K-1 ).NE.ZERO ) THEN + K1 = K - 1 + K2 = K + KNEXT = K - 2 + ELSE + K1 = K + K2 = K + KNEXT = K - 1 + END IF + END IF +* + IF( L1.EQ.L2 .AND. K1.EQ.K2 ) THEN + SUML = DDOT( M-K1, A( K1, MIN( K1+1, M ) ), LDA, + $ C( MIN( K1+1, M ), L1 ), 1 ) + SUMR = DDOT( N-L1, C( K1, MIN( L1+1, N ) ), LDC, + $ B( L1, MIN( L1+1, N ) ), LDB ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) + SCALOC = ONE +* + A11 = A( K1, K1 ) + SGN*B( L1, L1 ) + DA11 = ABS( A11 ) + IF( DA11.LE.SMIN ) THEN + A11 = SMIN + DA11 = SMIN + INFO = 1 + END IF + DB = ABS( VEC( 1, 1 ) ) + IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN + IF( DB.GT.BIGNUM*DA11 ) + $ SCALOC = ONE / DB + END IF + X( 1, 1 ) = ( VEC( 1, 1 )*SCALOC ) / A11 +* + IF( SCALOC.NE.ONE ) THEN + DO 190 J = 1, N + CALL DSCAL( M, SCALOC, C( 1, J ), 1 ) + 190 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) +* + ELSE IF( L1.EQ.L2 .AND. K1.NE.K2 ) THEN +* + SUML = DDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L1 ), 1 ) + SUMR = DDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = DDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L1 ), 1 ) + SUMR = DDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR ) +* + CALL DLALN2( .FALSE., 2, 1, SMIN, ONE, A( K1, K1 ), + $ LDA, ONE, ONE, VEC, 2, -SGN*B( L1, L1 ), + $ ZERO, X, 2, SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 200 J = 1, N + CALL DSCAL( M, SCALOC, C( 1, J ), 1 ) + 200 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K2, L1 ) = X( 2, 1 ) +* + ELSE IF( L1.NE.L2 .AND. K1.EQ.K2 ) THEN +* + SUML = DDOT( M-K1, A( K1, MIN( K1+1, M ) ), LDA, + $ C( MIN( K1+1, M ), L1 ), 1 ) + SUMR = DDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 1, 1 ) = SGN*( C( K1, L1 )-( SUML+SGN*SUMR ) ) +* + SUML = DDOT( M-K1, A( K1, MIN( K1+1, M ) ), LDA, + $ C( MIN( K1+1, M ), L2 ), 1 ) + SUMR = DDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L2, MIN( L2+1, N ) ), LDB ) + VEC( 2, 1 ) = SGN*( C( K1, L2 )-( SUML+SGN*SUMR ) ) +* + CALL DLALN2( .FALSE., 2, 1, SMIN, ONE, B( L1, L1 ), + $ LDB, ONE, ONE, VEC, 2, -SGN*A( K1, K1 ), + $ ZERO, X, 2, SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 210 J = 1, N + CALL DSCAL( M, SCALOC, C( 1, J ), 1 ) + 210 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K1, L2 ) = X( 2, 1 ) +* + ELSE IF( L1.NE.L2 .AND. K1.NE.K2 ) THEN +* + SUML = DDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L1 ), 1 ) + SUMR = DDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = DDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L2 ), 1 ) + SUMR = DDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L2, MIN( L2+1, N ) ), LDB ) + VEC( 1, 2 ) = C( K1, L2 ) - ( SUML+SGN*SUMR ) +* + SUML = DDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L1 ), 1 ) + SUMR = DDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = DDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L2 ), 1 ) + SUMR = DDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC, + $ B( L2, MIN( L2+1, N ) ), LDB ) + VEC( 2, 2 ) = C( K2, L2 ) - ( SUML+SGN*SUMR ) +* + CALL DLASY2( .FALSE., .TRUE., ISGN, 2, 2, A( K1, K1 ), + $ LDA, B( L1, L1 ), LDB, VEC, 2, SCALOC, X, + $ 2, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 220 J = 1, N + CALL DSCAL( M, SCALOC, C( 1, J ), 1 ) + 220 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K1, L2 ) = X( 1, 2 ) + C( K2, L1 ) = X( 2, 1 ) + C( K2, L2 ) = X( 2, 2 ) + END IF +* + 230 CONTINUE + 240 CONTINUE +* + END IF +* + RETURN +* +* End of DTRSYL +* + END diff --git a/costa/native/external/lapack/dtrti2.f b/costa/native/external/lapack/dtrti2.f new file mode 100644 index 000000000..1432ea152 --- /dev/null +++ b/costa/native/external/lapack/dtrti2.f @@ -0,0 +1,147 @@ + SUBROUTINE DTRTI2( UPLO, DIAG, N, A, LDA, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER DIAG, UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* DTRTI2 computes the inverse of a real upper or lower triangular +* matrix. +* +* This is the Level 2 BLAS version of the algorithm. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the matrix A is upper or lower triangular. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* DIAG (input) CHARACTER*1 +* Specifies whether or not the matrix A is unit triangular. +* = 'N': Non-unit triangular +* = 'U': Unit triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the triangular matrix A. If UPLO = 'U', the +* leading n by n upper triangular part of the array A contains +* the upper triangular matrix, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n by n lower triangular part of the array A contains +* the lower triangular matrix, and the strictly upper +* triangular part of A is not referenced. If DIAG = 'U', the +* diagonal elements of A are also not referenced and are +* assumed to be 1. +* +* On exit, the (triangular) inverse of the original matrix, in +* the same storage format. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J + DOUBLE PRECISION AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DTRMV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTRTI2', -INFO ) + RETURN + END IF +* + IF( UPPER ) THEN +* +* Compute inverse of upper triangular matrix. +* + DO 10 J = 1, N + IF( NOUNIT ) THEN + A( J, J ) = ONE / A( J, J ) + AJJ = -A( J, J ) + ELSE + AJJ = -ONE + END IF +* +* Compute elements 1:j-1 of j-th column. +* + CALL DTRMV( 'Upper', 'No transpose', DIAG, J-1, A, LDA, + $ A( 1, J ), 1 ) + CALL DSCAL( J-1, AJJ, A( 1, J ), 1 ) + 10 CONTINUE + ELSE +* +* Compute inverse of lower triangular matrix. +* + DO 20 J = N, 1, -1 + IF( NOUNIT ) THEN + A( J, J ) = ONE / A( J, J ) + AJJ = -A( J, J ) + ELSE + AJJ = -ONE + END IF + IF( J.LT.N ) THEN +* +* Compute elements j+1:n of j-th column. +* + CALL DTRMV( 'Lower', 'No transpose', DIAG, N-J, + $ A( J+1, J+1 ), LDA, A( J+1, J ), 1 ) + CALL DSCAL( N-J, AJJ, A( J+1, J ), 1 ) + END IF + 20 CONTINUE + END IF +* + RETURN +* +* End of DTRTI2 +* + END diff --git a/costa/native/external/lapack/dtrtri.f b/costa/native/external/lapack/dtrtri.f new file mode 100644 index 000000000..70f05ed4e --- /dev/null +++ b/costa/native/external/lapack/dtrtri.f @@ -0,0 +1,177 @@ + SUBROUTINE DTRTRI( UPLO, DIAG, N, A, LDA, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER DIAG, UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* DTRTRI computes the inverse of a real upper or lower triangular +* matrix A. +* +* This is the Level 3 BLAS version of the algorithm. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the triangular matrix A. If UPLO = 'U', the +* leading N-by-N upper triangular part of the array A contains +* the upper triangular matrix, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of the array A contains +* the lower triangular matrix, and the strictly upper +* triangular part of A is not referenced. If DIAG = 'U', the +* diagonal elements of A are also not referenced and are +* assumed to be 1. +* On exit, the (triangular) inverse of the original matrix, in +* the same storage format. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, A(i,i) is exactly zero. The triangular +* matrix is singular and its inverse can not be computed. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J, JB, NB, NN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DTRMM, DTRSM, DTRTI2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTRTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check for singularity if non-unit. +* + IF( NOUNIT ) THEN + DO 10 INFO = 1, N + IF( A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + INFO = 0 + END IF +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'DTRTRI', UPLO // DIAG, N, -1, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code +* + CALL DTRTI2( UPLO, DIAG, N, A, LDA, INFO ) + ELSE +* +* Use blocked code +* + IF( UPPER ) THEN +* +* Compute inverse of upper triangular matrix +* + DO 20 J = 1, N, NB + JB = MIN( NB, N-J+1 ) +* +* Compute rows 1:j-1 of current block column +* + CALL DTRMM( 'Left', 'Upper', 'No transpose', DIAG, J-1, + $ JB, ONE, A, LDA, A( 1, J ), LDA ) + CALL DTRSM( 'Right', 'Upper', 'No transpose', DIAG, J-1, + $ JB, -ONE, A( J, J ), LDA, A( 1, J ), LDA ) +* +* Compute inverse of current diagonal block +* + CALL DTRTI2( 'Upper', DIAG, JB, A( J, J ), LDA, INFO ) + 20 CONTINUE + ELSE +* +* Compute inverse of lower triangular matrix +* + NN = ( ( N-1 ) / NB )*NB + 1 + DO 30 J = NN, 1, -NB + JB = MIN( NB, N-J+1 ) + IF( J+JB.LE.N ) THEN +* +* Compute rows j+jb:n of current block column +* + CALL DTRMM( 'Left', 'Lower', 'No transpose', DIAG, + $ N-J-JB+1, JB, ONE, A( J+JB, J+JB ), LDA, + $ A( J+JB, J ), LDA ) + CALL DTRSM( 'Right', 'Lower', 'No transpose', DIAG, + $ N-J-JB+1, JB, -ONE, A( J, J ), LDA, + $ A( J+JB, J ), LDA ) + END IF +* +* Compute inverse of current diagonal block +* + CALL DTRTI2( 'Lower', DIAG, JB, A( J, J ), LDA, INFO ) + 30 CONTINUE + END IF + END IF +* + RETURN +* +* End of DTRTRI +* + END diff --git a/costa/native/external/lapack/dtrtrs.f b/costa/native/external/lapack/dtrtrs.f new file mode 100644 index 000000000..c1b4c5c4c --- /dev/null +++ b/costa/native/external/lapack/dtrtrs.f @@ -0,0 +1,148 @@ + SUBROUTINE DTRTRS( UPLO, TRANS, DIAG, N, NRHS, A, LDA, B, LDB, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* DTRTRS solves a triangular system of the form +* +* A * X = B or A**T * X = B, +* +* where A is a triangular matrix of order N, and B is an N-by-NRHS +* matrix. A check is made to verify that A is nonsingular. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose = Transpose) +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* A (input) DOUBLE PRECISION array, dimension (LDA,N) +* The triangular matrix A. If UPLO = 'U', the leading N-by-N +* upper triangular part of the array A contains the upper +* triangular matrix, and the strictly lower triangular part of +* A is not referenced. If UPLO = 'L', the leading N-by-N lower +* triangular part of the array A contains the lower triangular +* matrix, and the strictly upper triangular part of A is not +* referenced. If DIAG = 'U', the diagonal elements of A are +* also not referenced and are assumed to be 1. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, if INFO = 0, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the i-th diagonal element of A is zero, +* indicating that the matrix is singular and the solutions +* X have not been computed. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT. + $ LSAME( TRANS, 'T' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTRTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check for singularity. +* + IF( NOUNIT ) THEN + DO 10 INFO = 1, N + IF( A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + END IF + INFO = 0 +* +* Solve A * x = b or A' * x = b. +* + CALL DTRSM( 'Left', UPLO, TRANS, DIAG, N, NRHS, ONE, A, LDA, B, + $ LDB ) +* + RETURN +* +* End of DTRTRS +* + END diff --git a/costa/native/external/lapack/dtzrqf.f b/costa/native/external/lapack/dtzrqf.f new file mode 100644 index 000000000..e13bf32e2 --- /dev/null +++ b/costa/native/external/lapack/dtzrqf.f @@ -0,0 +1,165 @@ + SUBROUTINE DTZRQF( M, N, A, LDA, TAU, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ) +* .. +* +* Purpose +* ======= +* +* This routine is deprecated and has been replaced by routine DTZRZF. +* +* DTZRQF reduces the M-by-N ( M<=N ) real upper trapezoidal matrix A +* to upper triangular form by means of orthogonal transformations. +* +* The upper trapezoidal matrix A is factored as +* +* A = ( R 0 ) * Z, +* +* where Z is an N-by-N orthogonal matrix and R is an M-by-M upper +* triangular matrix. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= M. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the leading M-by-N upper trapezoidal part of the +* array A must contain the matrix to be factorized. +* On exit, the leading M-by-M upper triangular part of A +* contains the upper triangular matrix R, and elements M+1 to +* N of the first M rows of A, with the array TAU, represent the +* orthogonal matrix Z as a product of M elementary reflectors. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) DOUBLE PRECISION array, dimension (M) +* The scalar factors of the elementary reflectors. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The factorization is obtained by Householder's method. The kth +* transformation matrix, Z( k ), which is used to introduce zeros into +* the ( m - k + 1 )th row of A, is given in the form +* +* Z( k ) = ( I 0 ), +* ( 0 T( k ) ) +* +* where +* +* T( k ) = I - tau*u( k )*u( k )', u( k ) = ( 1 ), +* ( 0 ) +* ( z( k ) ) +* +* tau is a scalar and z( k ) is an ( n - m ) element vector. +* tau and z( k ) are chosen to annihilate the elements of the kth row +* of X. +* +* The scalar tau is returned in the kth element of TAU and the vector +* u( k ) in the kth row of A, such that the elements of z( k ) are +* in a( k, m + 1 ), ..., a( k, n ). The elements of R are returned in +* the upper triangular part of A. +* +* Z is given by +* +* Z = Z( 1 ) * Z( 2 ) * ... * Z( m ). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, K, M1 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DGEMV, DGER, DLARFG, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTZRQF', -INFO ) + RETURN + END IF +* +* Perform the factorization. +* + IF( M.EQ.0 ) + $ RETURN + IF( M.EQ.N ) THEN + DO 10 I = 1, N + TAU( I ) = ZERO + 10 CONTINUE + ELSE + M1 = MIN( M+1, N ) + DO 20 K = M, 1, -1 +* +* Use a Householder reflection to zero the kth row of A. +* First set up the reflection. +* + CALL DLARFG( N-M+1, A( K, K ), A( K, M1 ), LDA, TAU( K ) ) +* + IF( ( TAU( K ).NE.ZERO ) .AND. ( K.GT.1 ) ) THEN +* +* We now perform the operation A := A*P( k ). +* +* Use the first ( k - 1 ) elements of TAU to store a( k ), +* where a( k ) consists of the first ( k - 1 ) elements of +* the kth column of A. Also let B denote the first +* ( k - 1 ) rows of the last ( n - m ) columns of A. +* + CALL DCOPY( K-1, A( 1, K ), 1, TAU, 1 ) +* +* Form w = a( k ) + B*z( k ) in TAU. +* + CALL DGEMV( 'No transpose', K-1, N-M, ONE, A( 1, M1 ), + $ LDA, A( K, M1 ), LDA, ONE, TAU, 1 ) +* +* Now form a( k ) := a( k ) - tau*w +* and B := B - tau*w*z( k )'. +* + CALL DAXPY( K-1, -TAU( K ), TAU, 1, A( 1, K ), 1 ) + CALL DGER( K-1, N-M, -TAU( K ), TAU, 1, A( K, M1 ), LDA, + $ A( 1, M1 ), LDA ) + END IF + 20 CONTINUE + END IF +* + RETURN +* +* End of DTZRQF +* + END diff --git a/costa/native/external/lapack/dtzrzf.f b/costa/native/external/lapack/dtzrzf.f new file mode 100644 index 000000000..895c4833f --- /dev/null +++ b/costa/native/external/lapack/dtzrzf.f @@ -0,0 +1,241 @@ + SUBROUTINE DTZRZF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DTZRZF reduces the M-by-N ( M<=N ) real upper trapezoidal matrix A +* to upper triangular form by means of orthogonal transformations. +* +* The upper trapezoidal matrix A is factored as +* +* A = ( R 0 ) * Z, +* +* where Z is an N-by-N orthogonal matrix and R is an M-by-M upper +* triangular matrix. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the leading M-by-N upper trapezoidal part of the +* array A must contain the matrix to be factorized. +* On exit, the leading M-by-M upper triangular part of A +* contains the upper triangular matrix R, and elements M+1 to +* N of the first M rows of A, with the array TAU, represent the +* orthogonal matrix Z as a product of M elementary reflectors. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) DOUBLE PRECISION array, dimension (M) +* The scalar factors of the elementary reflectors. +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,M). +* For optimum performance LWORK >= M*NB, where NB is +* the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* The factorization is obtained by Householder's method. The kth +* transformation matrix, Z( k ), which is used to introduce zeros into +* the ( m - k + 1 )th row of A, is given in the form +* +* Z( k ) = ( I 0 ), +* ( 0 T( k ) ) +* +* where +* +* T( k ) = I - tau*u( k )*u( k )', u( k ) = ( 1 ), +* ( 0 ) +* ( z( k ) ) +* +* tau is a scalar and z( k ) is an ( n - m ) element vector. +* tau and z( k ) are chosen to annihilate the elements of the kth row +* of X. +* +* The scalar tau is returned in the kth element of TAU and the vector +* u( k ) in the kth row of A, such that the elements of z( k ) are +* in a( k, m + 1 ), ..., a( k, n ). The elements of R are returned in +* the upper triangular part of A. +* +* Z is given by +* +* Z = Z( 1 ) * Z( 2 ) * ... * Z( m ). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IWS, KI, KK, LDWORK, LWKOPT, M1, MU, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL DLARZB, DLARZT, DLATRZ, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. +* + NB = ILAENV( 1, 'DGERQF', ' ', M, N, -1, -1 ) + LWKOPT = M*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTZRZF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + ELSE IF( M.EQ.N ) THEN + DO 10 I = 1, N + TAU( I ) = ZERO + 10 CONTINUE + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 1 + IWS = M + IF( NB.GT.1 .AND. NB.LT.M ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'DGERQF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.M ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DGERQF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.M .AND. NX.LT.M ) THEN +* +* Use blocked code initially. +* The last kk rows are handled by the block method. +* + M1 = MIN( M+1, N ) + KI = ( ( M-NX-1 ) / NB )*NB + KK = MIN( M, KI+NB ) +* + DO 20 I = M - KK + KI + 1, M - KK + 1, -NB + IB = MIN( M-I+1, NB ) +* +* Compute the TZ factorization of the current block +* A(i:i+ib-1,i:n) +* + CALL DLATRZ( IB, N-I+1, N-M, A( I, I ), LDA, TAU( I ), + $ WORK ) + IF( I.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL DLARZT( 'Backward', 'Rowwise', N-M, IB, A( I, M1 ), + $ LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(1:i-1,i:n) from the right +* + CALL DLARZB( 'Right', 'No transpose', 'Backward', + $ 'Rowwise', I-1, N-I+1, IB, N-M, A( I, M1 ), + $ LDA, WORK, LDWORK, A( 1, I ), LDA, + $ WORK( IB+1 ), LDWORK ) + END IF + 20 CONTINUE + MU = I + NB - 1 + ELSE + MU = M + END IF +* +* Use unblocked code to factor the last or only block +* + IF( MU.GT.0 ) + $ CALL DLATRZ( MU, N, N-M, A, LDA, TAU, WORK ) +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of DTZRZF +* + END diff --git a/costa/native/external/lapack/dzsum1.f b/costa/native/external/lapack/dzsum1.f new file mode 100644 index 000000000..341165cef --- /dev/null +++ b/costa/native/external/lapack/dzsum1.f @@ -0,0 +1,82 @@ + DOUBLE PRECISION FUNCTION DZSUM1( N, CX, INCX ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + INTEGER INCX, N +* .. +* .. Array Arguments .. + COMPLEX*16 CX( * ) +* .. +* +* Purpose +* ======= +* +* DZSUM1 takes the sum of the absolute values of a complex +* vector and returns a double precision result. +* +* Based on DZASUM from the Level 1 BLAS. +* The change is to use the 'genuine' absolute value. +* +* Contributed by Nick Higham for use with ZLACON. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of elements in the vector CX. +* +* CX (input) COMPLEX*16 array, dimension (N) +* The vector whose elements will be summed. +* +* INCX (input) INTEGER +* The spacing between successive values of CX. INCX > 0. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, NINCX + DOUBLE PRECISION STEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* + DZSUM1 = 0.0D0 + STEMP = 0.0D0 + IF( N.LE.0 ) + $ RETURN + IF( INCX.EQ.1 ) + $ GO TO 20 +* +* CODE FOR INCREMENT NOT EQUAL TO 1 +* + NINCX = N*INCX + DO 10 I = 1, NINCX, INCX +* +* NEXT LINE MODIFIED. +* + STEMP = STEMP + ABS( CX( I ) ) + 10 CONTINUE + DZSUM1 = STEMP + RETURN +* +* CODE FOR INCREMENT EQUAL TO 1 +* + 20 CONTINUE + DO 30 I = 1, N +* +* NEXT LINE MODIFIED. +* + STEMP = STEMP + ABS( CX( I ) ) + 30 CONTINUE + DZSUM1 = STEMP + RETURN +* +* End of DZSUM1 +* + END diff --git a/costa/native/external/lapack/icmax1.f b/costa/native/external/lapack/icmax1.f new file mode 100644 index 000000000..c96f977bf --- /dev/null +++ b/costa/native/external/lapack/icmax1.f @@ -0,0 +1,96 @@ + INTEGER FUNCTION ICMAX1( N, CX, INCX ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INCX, N +* .. +* .. Array Arguments .. + COMPLEX CX( * ) +* .. +* +* Purpose +* ======= +* +* ICMAX1 finds the index of the element whose real part has maximum +* absolute value. +* +* Based on ICAMAX from Level 1 BLAS. +* The change is to use the 'genuine' absolute value. +* +* Contributed by Nick Higham for use with CLACON. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of elements in the vector CX. +* +* CX (input) COMPLEX array, dimension (N) +* The vector whose elements will be summed. +* +* INCX (input) INTEGER +* The spacing between successive values of CX. INCX >= 1. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IX + REAL SMAX + COMPLEX ZDUM +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. +* +* NEXT LINE IS THE ONLY MODIFICATION. + CABS1( ZDUM ) = ABS( ZDUM ) +* .. +* .. Executable Statements .. +* + ICMAX1 = 0 + IF( N.LT.1 ) + $ RETURN + ICMAX1 = 1 + IF( N.EQ.1 ) + $ RETURN + IF( INCX.EQ.1 ) + $ GO TO 30 +* +* CODE FOR INCREMENT NOT EQUAL TO 1 +* + IX = 1 + SMAX = CABS1( CX( 1 ) ) + IX = IX + INCX + DO 20 I = 2, N + IF( CABS1( CX( IX ) ).LE.SMAX ) + $ GO TO 10 + ICMAX1 = I + SMAX = CABS1( CX( IX ) ) + 10 CONTINUE + IX = IX + INCX + 20 CONTINUE + RETURN +* +* CODE FOR INCREMENT EQUAL TO 1 +* + 30 CONTINUE + SMAX = CABS1( CX( 1 ) ) + DO 40 I = 2, N + IF( CABS1( CX( I ) ).LE.SMAX ) + $ GO TO 40 + ICMAX1 = I + SMAX = CABS1( CX( I ) ) + 40 CONTINUE + RETURN +* +* End of ICMAX1 +* + END diff --git a/costa/native/external/lapack/ieeeck.f b/costa/native/external/lapack/ieeeck.f new file mode 100644 index 000000000..3c09fe95e --- /dev/null +++ b/costa/native/external/lapack/ieeeck.f @@ -0,0 +1,148 @@ + INTEGER FUNCTION IEEECK( ISPEC, ZERO, ONE ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1998 +* +* .. Scalar Arguments .. + INTEGER ISPEC + REAL ONE, ZERO +* .. +* +* Purpose +* ======= +* +* IEEECK is called from the ILAENV to verify that Infinity and +* possibly NaN arithmetic is safe (i.e. will not trap). +* +* Arguments +* ========= +* +* ISPEC (input) INTEGER +* Specifies whether to test just for inifinity arithmetic +* or whether to test for infinity and NaN arithmetic. +* = 0: Verify infinity arithmetic only. +* = 1: Verify infinity and NaN arithmetic. +* +* ZERO (input) REAL +* Must contain the value 0.0 +* This is passed to prevent the compiler from optimizing +* away this code. +* +* ONE (input) REAL +* Must contain the value 1.0 +* This is passed to prevent the compiler from optimizing +* away this code. +* +* RETURN VALUE: INTEGER +* = 0: Arithmetic failed to produce the correct answers +* = 1: Arithmetic produced the correct answers +* +* .. Local Scalars .. + REAL NAN1, NAN2, NAN3, NAN4, NAN5, NAN6, NEGINF, + $ NEGZRO, NEWZRO, POSINF +* .. +* .. Executable Statements .. + IEEECK = 1 +* + POSINF = ONE / ZERO + IF( POSINF.LE.ONE ) THEN + IEEECK = 0 + RETURN + END IF +* + NEGINF = -ONE / ZERO + IF( NEGINF.GE.ZERO ) THEN + IEEECK = 0 + RETURN + END IF +* + NEGZRO = ONE / ( NEGINF+ONE ) + IF( NEGZRO.NE.ZERO ) THEN + IEEECK = 0 + RETURN + END IF +* + NEGINF = ONE / NEGZRO + IF( NEGINF.GE.ZERO ) THEN + IEEECK = 0 + RETURN + END IF +* + NEWZRO = NEGZRO + ZERO + IF( NEWZRO.NE.ZERO ) THEN + IEEECK = 0 + RETURN + END IF +* + POSINF = ONE / NEWZRO + IF( POSINF.LE.ONE ) THEN + IEEECK = 0 + RETURN + END IF +* + NEGINF = NEGINF*POSINF + IF( NEGINF.GE.ZERO ) THEN + IEEECK = 0 + RETURN + END IF +* + POSINF = POSINF*POSINF + IF( POSINF.LE.ONE ) THEN + IEEECK = 0 + RETURN + END IF +* +* +* +* +* Return if we were only asked to check infinity arithmetic +* + IF( ISPEC.EQ.0 ) + $ RETURN +* + NAN1 = POSINF + NEGINF +* + NAN2 = POSINF / NEGINF +* + NAN3 = POSINF / POSINF +* + NAN4 = POSINF*ZERO +* + NAN5 = NEGINF*NEGZRO +* + NAN6 = NAN5*0.0 +* + IF( NAN1.EQ.NAN1 ) THEN + IEEECK = 0 + RETURN + END IF +* + IF( NAN2.EQ.NAN2 ) THEN + IEEECK = 0 + RETURN + END IF +* + IF( NAN3.EQ.NAN3 ) THEN + IEEECK = 0 + RETURN + END IF +* + IF( NAN4.EQ.NAN4 ) THEN + IEEECK = 0 + RETURN + END IF +* + IF( NAN5.EQ.NAN5 ) THEN + IEEECK = 0 + RETURN + END IF +* + IF( NAN6.EQ.NAN6 ) THEN + IEEECK = 0 + RETURN + END IF +* + RETURN + END diff --git a/costa/native/external/lapack/ilaenv.f b/costa/native/external/lapack/ilaenv.f new file mode 100644 index 000000000..7263d6010 --- /dev/null +++ b/costa/native/external/lapack/ilaenv.f @@ -0,0 +1,547 @@ + INTEGER FUNCTION ILAENV( ISPEC, NAME, OPTS, N1, N2, N3, + $ N4 ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER*( * ) NAME, OPTS + INTEGER ISPEC, N1, N2, N3, N4 +* .. +* +* Purpose +* ======= +* +* ILAENV is called from the LAPACK routines to choose problem-dependent +* parameters for the local environment. See ISPEC for a description of +* the parameters. +* +* This version provides a set of parameters which should give good, +* but not optimal, performance on many of the currently available +* computers. Users are encouraged to modify this subroutine to set +* the tuning parameters for their particular machine using the option +* and problem size information in the arguments. +* +* This routine will not function correctly if it is converted to all +* lower case. Converting it to all upper case is allowed. +* +* Arguments +* ========= +* +* ISPEC (input) INTEGER +* Specifies the parameter to be returned as the value of +* ILAENV. +* = 1: the optimal blocksize; if this value is 1, an unblocked +* algorithm will give the best performance. +* = 2: the minimum block size for which the block routine +* should be used; if the usable block size is less than +* this value, an unblocked routine should be used. +* = 3: the crossover point (in a block routine, for N less +* than this value, an unblocked routine should be used) +* = 4: the number of shifts, used in the nonsymmetric +* eigenvalue routines +* = 5: the minimum column dimension for blocking to be used; +* rectangular blocks must have dimension at least k by m, +* where k is given by ILAENV(2,...) and m by ILAENV(5,...) +* = 6: the crossover point for the SVD (when reducing an m by n +* matrix to bidiagonal form, if max(m,n)/min(m,n) exceeds +* this value, a QR factorization is used first to reduce +* the matrix to a triangular form.) +* = 7: the number of processors +* = 8: the crossover point for the multishift QR and QZ methods +* for nonsymmetric eigenvalue problems. +* = 9: maximum size of the subproblems at the bottom of the +* computation tree in the divide-and-conquer algorithm +* (used by xGELSD and xGESDD) +* =10: ieee NaN arithmetic can be trusted not to trap +* =11: infinity arithmetic can be trusted not to trap +* +* NAME (input) CHARACTER*(*) +* The name of the calling subroutine, in either upper case or +* lower case. +* +* OPTS (input) CHARACTER*(*) +* The character options to the subroutine NAME, concatenated +* into a single character string. For example, UPLO = 'U', +* TRANS = 'T', and DIAG = 'N' for a triangular routine would +* be specified as OPTS = 'UTN'. +* +* N1 (input) INTEGER +* N2 (input) INTEGER +* N3 (input) INTEGER +* N4 (input) INTEGER +* Problem dimensions for the subroutine NAME; these may not all +* be required. +* +* (ILAENV) (output) INTEGER +* >= 0: the value of the parameter specified by ISPEC +* < 0: if ILAENV = -k, the k-th argument had an illegal value. +* +* Further Details +* =============== +* +* The following conventions have been used when calling ILAENV from the +* LAPACK routines: +* 1) OPTS is a concatenation of all of the character options to +* subroutine NAME, in the same order that they appear in the +* argument list for NAME, even if they are not used in determining +* the value of the parameter specified by ISPEC. +* 2) The problem dimensions N1, N2, N3, N4 are specified in the order +* that they appear in the argument list for NAME. N1 is used +* first, N2 second, and so on, and unused problem dimensions are +* passed a value of -1. +* 3) The parameter value returned by ILAENV is checked for validity in +* the calling subroutine. For example, ILAENV is used to retrieve +* the optimal blocksize for STRTRI as follows: +* +* NB = ILAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 ) +* IF( NB.LE.1 ) NB = MAX( 1, N ) +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL CNAME, SNAME + CHARACTER*1 C1 + CHARACTER*2 C2, C4 + CHARACTER*3 C3 + CHARACTER*6 SUBNAM + INTEGER I, IC, IZ, NB, NBMIN, NX +* .. +* .. Intrinsic Functions .. + INTRINSIC CHAR, ICHAR, INT, MIN, REAL +* .. +* .. External Functions .. + INTEGER IEEECK + EXTERNAL IEEECK +* .. +* .. Executable Statements .. +* + GO TO ( 100, 100, 100, 400, 500, 600, 700, 800, 900, 1000, + $ 1100 ) ISPEC +* +* Invalid value for ISPEC +* + ILAENV = -1 + RETURN +* + 100 CONTINUE +* +* Convert NAME to upper case if the first character is lower case. +* + ILAENV = 1 + SUBNAM = NAME + IC = ICHAR( SUBNAM( 1:1 ) ) + IZ = ICHAR( 'Z' ) + IF( IZ.EQ.90 .OR. IZ.EQ.122 ) THEN +* +* ASCII character set +* + IF( IC.GE.97 .AND. IC.LE.122 ) THEN + SUBNAM( 1:1 ) = CHAR( IC-32 ) + DO 10 I = 2, 6 + IC = ICHAR( SUBNAM( I:I ) ) + IF( IC.GE.97 .AND. IC.LE.122 ) + $ SUBNAM( I:I ) = CHAR( IC-32 ) + 10 CONTINUE + END IF +* + ELSE IF( IZ.EQ.233 .OR. IZ.EQ.169 ) THEN +* +* EBCDIC character set +* + IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR. + $ ( IC.GE.145 .AND. IC.LE.153 ) .OR. + $ ( IC.GE.162 .AND. IC.LE.169 ) ) THEN + SUBNAM( 1:1 ) = CHAR( IC+64 ) + DO 20 I = 2, 6 + IC = ICHAR( SUBNAM( I:I ) ) + IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR. + $ ( IC.GE.145 .AND. IC.LE.153 ) .OR. + $ ( IC.GE.162 .AND. IC.LE.169 ) ) + $ SUBNAM( I:I ) = CHAR( IC+64 ) + 20 CONTINUE + END IF +* + ELSE IF( IZ.EQ.218 .OR. IZ.EQ.250 ) THEN +* +* Prime machines: ASCII+128 +* + IF( IC.GE.225 .AND. IC.LE.250 ) THEN + SUBNAM( 1:1 ) = CHAR( IC-32 ) + DO 30 I = 2, 6 + IC = ICHAR( SUBNAM( I:I ) ) + IF( IC.GE.225 .AND. IC.LE.250 ) + $ SUBNAM( I:I ) = CHAR( IC-32 ) + 30 CONTINUE + END IF + END IF +* + C1 = SUBNAM( 1:1 ) + SNAME = C1.EQ.'S' .OR. C1.EQ.'D' + CNAME = C1.EQ.'C' .OR. C1.EQ.'Z' + IF( .NOT.( CNAME .OR. SNAME ) ) + $ RETURN + C2 = SUBNAM( 2:3 ) + C3 = SUBNAM( 4:6 ) + C4 = C3( 2:3 ) +* + GO TO ( 110, 200, 300 ) ISPEC +* + 110 CONTINUE +* +* ISPEC = 1: block size +* +* In these examples, separate code is provided for setting NB for +* real and complex. We assume that NB will take the same value in +* single or double precision. +* + NB = 1 +* + IF( C2.EQ.'GE' ) THEN + IF( C3.EQ.'TRF' ) THEN + IF( SNAME ) THEN + NB = 64 + ELSE + NB = 64 + END IF + ELSE IF( C3.EQ.'QRF' .OR. C3.EQ.'RQF' .OR. C3.EQ.'LQF' .OR. + $ C3.EQ.'QLF' ) THEN + IF( SNAME ) THEN + NB = 32 + ELSE + NB = 32 + END IF + ELSE IF( C3.EQ.'HRD' ) THEN + IF( SNAME ) THEN + NB = 32 + ELSE + NB = 32 + END IF + ELSE IF( C3.EQ.'BRD' ) THEN + IF( SNAME ) THEN + NB = 32 + ELSE + NB = 32 + END IF + ELSE IF( C3.EQ.'TRI' ) THEN + IF( SNAME ) THEN + NB = 64 + ELSE + NB = 64 + END IF + END IF + ELSE IF( C2.EQ.'PO' ) THEN + IF( C3.EQ.'TRF' ) THEN + IF( SNAME ) THEN + NB = 64 + ELSE + NB = 64 + END IF + END IF + ELSE IF( C2.EQ.'SY' ) THEN + IF( C3.EQ.'TRF' ) THEN + IF( SNAME ) THEN + NB = 64 + ELSE + NB = 64 + END IF + ELSE IF( SNAME .AND. C3.EQ.'TRD' ) THEN + NB = 32 + ELSE IF( SNAME .AND. C3.EQ.'GST' ) THEN + NB = 64 + END IF + ELSE IF( CNAME .AND. C2.EQ.'HE' ) THEN + IF( C3.EQ.'TRF' ) THEN + NB = 64 + ELSE IF( C3.EQ.'TRD' ) THEN + NB = 32 + ELSE IF( C3.EQ.'GST' ) THEN + NB = 64 + END IF + ELSE IF( SNAME .AND. C2.EQ.'OR' ) THEN + IF( C3( 1:1 ).EQ.'G' ) THEN + IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR. + $ C4.EQ.'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR. + $ C4.EQ.'BR' ) THEN + NB = 32 + END IF + ELSE IF( C3( 1:1 ).EQ.'M' ) THEN + IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR. + $ C4.EQ.'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR. + $ C4.EQ.'BR' ) THEN + NB = 32 + END IF + END IF + ELSE IF( CNAME .AND. C2.EQ.'UN' ) THEN + IF( C3( 1:1 ).EQ.'G' ) THEN + IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR. + $ C4.EQ.'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR. + $ C4.EQ.'BR' ) THEN + NB = 32 + END IF + ELSE IF( C3( 1:1 ).EQ.'M' ) THEN + IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR. + $ C4.EQ.'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR. + $ C4.EQ.'BR' ) THEN + NB = 32 + END IF + END IF + ELSE IF( C2.EQ.'GB' ) THEN + IF( C3.EQ.'TRF' ) THEN + IF( SNAME ) THEN + IF( N4.LE.64 ) THEN + NB = 1 + ELSE + NB = 32 + END IF + ELSE + IF( N4.LE.64 ) THEN + NB = 1 + ELSE + NB = 32 + END IF + END IF + END IF + ELSE IF( C2.EQ.'PB' ) THEN + IF( C3.EQ.'TRF' ) THEN + IF( SNAME ) THEN + IF( N2.LE.64 ) THEN + NB = 1 + ELSE + NB = 32 + END IF + ELSE + IF( N2.LE.64 ) THEN + NB = 1 + ELSE + NB = 32 + END IF + END IF + END IF + ELSE IF( C2.EQ.'TR' ) THEN + IF( C3.EQ.'TRI' ) THEN + IF( SNAME ) THEN + NB = 64 + ELSE + NB = 64 + END IF + END IF + ELSE IF( C2.EQ.'LA' ) THEN + IF( C3.EQ.'UUM' ) THEN + IF( SNAME ) THEN + NB = 64 + ELSE + NB = 64 + END IF + END IF + ELSE IF( SNAME .AND. C2.EQ.'ST' ) THEN + IF( C3.EQ.'EBZ' ) THEN + NB = 1 + END IF + END IF + ILAENV = NB + RETURN +* + 200 CONTINUE +* +* ISPEC = 2: minimum block size +* + NBMIN = 2 + IF( C2.EQ.'GE' ) THEN + IF( C3.EQ.'QRF' .OR. C3.EQ.'RQF' .OR. C3.EQ.'LQF' .OR. + $ C3.EQ.'QLF' ) THEN + IF( SNAME ) THEN + NBMIN = 2 + ELSE + NBMIN = 2 + END IF + ELSE IF( C3.EQ.'HRD' ) THEN + IF( SNAME ) THEN + NBMIN = 2 + ELSE + NBMIN = 2 + END IF + ELSE IF( C3.EQ.'BRD' ) THEN + IF( SNAME ) THEN + NBMIN = 2 + ELSE + NBMIN = 2 + END IF + ELSE IF( C3.EQ.'TRI' ) THEN + IF( SNAME ) THEN + NBMIN = 2 + ELSE + NBMIN = 2 + END IF + END IF + ELSE IF( C2.EQ.'SY' ) THEN + IF( C3.EQ.'TRF' ) THEN + IF( SNAME ) THEN + NBMIN = 8 + ELSE + NBMIN = 8 + END IF + ELSE IF( SNAME .AND. C3.EQ.'TRD' ) THEN + NBMIN = 2 + END IF + ELSE IF( CNAME .AND. C2.EQ.'HE' ) THEN + IF( C3.EQ.'TRD' ) THEN + NBMIN = 2 + END IF + ELSE IF( SNAME .AND. C2.EQ.'OR' ) THEN + IF( C3( 1:1 ).EQ.'G' ) THEN + IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR. + $ C4.EQ.'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR. + $ C4.EQ.'BR' ) THEN + NBMIN = 2 + END IF + ELSE IF( C3( 1:1 ).EQ.'M' ) THEN + IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR. + $ C4.EQ.'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR. + $ C4.EQ.'BR' ) THEN + NBMIN = 2 + END IF + END IF + ELSE IF( CNAME .AND. C2.EQ.'UN' ) THEN + IF( C3( 1:1 ).EQ.'G' ) THEN + IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR. + $ C4.EQ.'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR. + $ C4.EQ.'BR' ) THEN + NBMIN = 2 + END IF + ELSE IF( C3( 1:1 ).EQ.'M' ) THEN + IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR. + $ C4.EQ.'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR. + $ C4.EQ.'BR' ) THEN + NBMIN = 2 + END IF + END IF + END IF + ILAENV = NBMIN + RETURN +* + 300 CONTINUE +* +* ISPEC = 3: crossover point +* + NX = 0 + IF( C2.EQ.'GE' ) THEN + IF( C3.EQ.'QRF' .OR. C3.EQ.'RQF' .OR. C3.EQ.'LQF' .OR. + $ C3.EQ.'QLF' ) THEN + IF( SNAME ) THEN + NX = 128 + ELSE + NX = 128 + END IF + ELSE IF( C3.EQ.'HRD' ) THEN + IF( SNAME ) THEN + NX = 128 + ELSE + NX = 128 + END IF + ELSE IF( C3.EQ.'BRD' ) THEN + IF( SNAME ) THEN + NX = 128 + ELSE + NX = 128 + END IF + END IF + ELSE IF( C2.EQ.'SY' ) THEN + IF( SNAME .AND. C3.EQ.'TRD' ) THEN + NX = 32 + END IF + ELSE IF( CNAME .AND. C2.EQ.'HE' ) THEN + IF( C3.EQ.'TRD' ) THEN + NX = 32 + END IF + ELSE IF( SNAME .AND. C2.EQ.'OR' ) THEN + IF( C3( 1:1 ).EQ.'G' ) THEN + IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR. + $ C4.EQ.'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR. + $ C4.EQ.'BR' ) THEN + NX = 128 + END IF + END IF + ELSE IF( CNAME .AND. C2.EQ.'UN' ) THEN + IF( C3( 1:1 ).EQ.'G' ) THEN + IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR. + $ C4.EQ.'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR. + $ C4.EQ.'BR' ) THEN + NX = 128 + END IF + END IF + END IF + ILAENV = NX + RETURN +* + 400 CONTINUE +* +* ISPEC = 4: number of shifts (used by xHSEQR) +* + ILAENV = 6 + RETURN +* + 500 CONTINUE +* +* ISPEC = 5: minimum column dimension (not used) +* + ILAENV = 2 + RETURN +* + 600 CONTINUE +* +* ISPEC = 6: crossover point for SVD (used by xGELSS and xGESVD) +* + ILAENV = INT( REAL( MIN( N1, N2 ) )*1.6E0 ) + RETURN +* + 700 CONTINUE +* +* ISPEC = 7: number of processors (not used) +* + ILAENV = 1 + RETURN +* + 800 CONTINUE +* +* ISPEC = 8: crossover point for multishift (used by xHSEQR) +* + ILAENV = 50 + RETURN +* + 900 CONTINUE +* +* ISPEC = 9: maximum size of the subproblems at the bottom of the +* computation tree in the divide-and-conquer algorithm +* (used by xGELSD and xGESDD) +* + ILAENV = 25 + RETURN +* + 1000 CONTINUE +* +* ISPEC = 10: ieee NaN arithmetic can be trusted not to trap +* +C ILAENV = 0 + ILAENV = 1 + IF( ILAENV.EQ.1 ) THEN + ILAENV = IEEECK( 0, 0.0, 1.0 ) + END IF + RETURN +* + 1100 CONTINUE +* +* ISPEC = 11: infinity arithmetic can be trusted not to trap +* +C ILAENV = 0 + ILAENV = 1 + IF( ILAENV.EQ.1 ) THEN + ILAENV = IEEECK( 1, 0.0, 1.0 ) + END IF + RETURN +* +* End of ILAENV +* + END diff --git a/costa/native/external/lapack/izmax1.f b/costa/native/external/lapack/izmax1.f new file mode 100644 index 000000000..a3488ea4c --- /dev/null +++ b/costa/native/external/lapack/izmax1.f @@ -0,0 +1,96 @@ + INTEGER FUNCTION IZMAX1( N, CX, INCX ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INCX, N +* .. +* .. Array Arguments .. + COMPLEX*16 CX( * ) +* .. +* +* Purpose +* ======= +* +* IZMAX1 finds the index of the element whose real part has maximum +* absolute value. +* +* Based on IZAMAX from Level 1 BLAS. +* The change is to use the 'genuine' absolute value. +* +* Contributed by Nick Higham for use with ZLACON. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of elements in the vector CX. +* +* CX (input) COMPLEX*16 array, dimension (N) +* The vector whose elements will be summed. +* +* INCX (input) INTEGER +* The spacing between successive values of CX. INCX >= 1. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IX + DOUBLE PRECISION SMAX + COMPLEX*16 ZDUM +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. +* +* NEXT LINE IS THE ONLY MODIFICATION. + CABS1( ZDUM ) = ABS( ZDUM ) +* .. +* .. Executable Statements .. +* + IZMAX1 = 0 + IF( N.LT.1 ) + $ RETURN + IZMAX1 = 1 + IF( N.EQ.1 ) + $ RETURN + IF( INCX.EQ.1 ) + $ GO TO 30 +* +* CODE FOR INCREMENT NOT EQUAL TO 1 +* + IX = 1 + SMAX = CABS1( CX( 1 ) ) + IX = IX + INCX + DO 20 I = 2, N + IF( CABS1( CX( IX ) ).LE.SMAX ) + $ GO TO 10 + IZMAX1 = I + SMAX = CABS1( CX( IX ) ) + 10 CONTINUE + IX = IX + INCX + 20 CONTINUE + RETURN +* +* CODE FOR INCREMENT EQUAL TO 1 +* + 30 CONTINUE + SMAX = CABS1( CX( 1 ) ) + DO 40 I = 2, N + IF( CABS1( CX( I ) ).LE.SMAX ) + $ GO TO 40 + IZMAX1 = I + SMAX = CABS1( CX( I ) ) + 40 CONTINUE + RETURN +* +* End of IZMAX1 +* + END diff --git a/costa/native/external/lapack/lsame.f b/costa/native/external/lapack/lsame.f new file mode 100644 index 000000000..bf25d86f2 --- /dev/null +++ b/costa/native/external/lapack/lsame.f @@ -0,0 +1,87 @@ + LOGICAL FUNCTION LSAME( CA, CB ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER CA, CB +* .. +* +* Purpose +* ======= +* +* LSAME returns .TRUE. if CA is the same letter as CB regardless of +* case. +* +* Arguments +* ========= +* +* CA (input) CHARACTER*1 +* CB (input) CHARACTER*1 +* CA and CB specify the single characters to be compared. +* +* ===================================================================== +* +* .. Intrinsic Functions .. + INTRINSIC ICHAR +* .. +* .. Local Scalars .. + INTEGER INTA, INTB, ZCODE +* .. +* .. Executable Statements .. +* +* Test if the characters are equal +* + LSAME = CA.EQ.CB + IF( LSAME ) + $ RETURN +* +* Now test for equivalence if both characters are alphabetic. +* + ZCODE = ICHAR( 'Z' ) +* +* Use 'Z' rather than 'A' so that ASCII can be detected on Prime +* machines, on which ICHAR returns a value with bit 8 set. +* ICHAR('A') on Prime machines returns 193 which is the same as +* ICHAR('A') on an EBCDIC machine. +* + INTA = ICHAR( CA ) + INTB = ICHAR( CB ) +* + IF( ZCODE.EQ.90 .OR. ZCODE.EQ.122 ) THEN +* +* ASCII is assumed - ZCODE is the ASCII code of either lower or +* upper case 'Z'. +* + IF( INTA.GE.97 .AND. INTA.LE.122 ) INTA = INTA - 32 + IF( INTB.GE.97 .AND. INTB.LE.122 ) INTB = INTB - 32 +* + ELSE IF( ZCODE.EQ.233 .OR. ZCODE.EQ.169 ) THEN +* +* EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or +* upper case 'Z'. +* + IF( INTA.GE.129 .AND. INTA.LE.137 .OR. + $ INTA.GE.145 .AND. INTA.LE.153 .OR. + $ INTA.GE.162 .AND. INTA.LE.169 ) INTA = INTA + 64 + IF( INTB.GE.129 .AND. INTB.LE.137 .OR. + $ INTB.GE.145 .AND. INTB.LE.153 .OR. + $ INTB.GE.162 .AND. INTB.LE.169 ) INTB = INTB + 64 +* + ELSE IF( ZCODE.EQ.218 .OR. ZCODE.EQ.250 ) THEN +* +* ASCII is assumed, on Prime machines - ZCODE is the ASCII code +* plus 128 of either lower or upper case 'Z'. +* + IF( INTA.GE.225 .AND. INTA.LE.250 ) INTA = INTA - 32 + IF( INTB.GE.225 .AND. INTB.LE.250 ) INTB = INTB - 32 + END IF + LSAME = INTA.EQ.INTB +* +* RETURN +* +* End of LSAME +* + END diff --git a/costa/native/external/lapack/lsamen.f b/costa/native/external/lapack/lsamen.f new file mode 100644 index 000000000..4ba41e6d3 --- /dev/null +++ b/costa/native/external/lapack/lsamen.f @@ -0,0 +1,68 @@ + LOGICAL FUNCTION LSAMEN( N, CA, CB ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER*( * ) CA, CB + INTEGER N +* .. +* +* Purpose +* ======= +* +* LSAMEN tests if the first N letters of CA are the same as the +* first N letters of CB, regardless of case. +* LSAMEN returns .TRUE. if CA and CB are equivalent except for case +* and .FALSE. otherwise. LSAMEN also returns .FALSE. if LEN( CA ) +* or LEN( CB ) is less than N. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of characters in CA and CB to be compared. +* +* CA (input) CHARACTER*(*) +* CB (input) CHARACTER*(*) +* CA and CB specify two character strings of length at least N. +* Only the first N characters of each string will be accessed. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC LEN +* .. +* .. Executable Statements .. +* + LSAMEN = .FALSE. + IF( LEN( CA ).LT.N .OR. LEN( CB ).LT.N ) + $ GO TO 20 +* +* Do for each character in the two strings. +* + DO 10 I = 1, N +* +* Test if the characters are equal using LSAME. +* + IF( .NOT.LSAME( CA( I: I ), CB( I: I ) ) ) + $ GO TO 20 +* + 10 CONTINUE + LSAMEN = .TRUE. +* + 20 CONTINUE + RETURN +* +* End of LSAMEN +* + END diff --git a/costa/native/external/lapack/sbdsdc.f b/costa/native/external/lapack/sbdsdc.f new file mode 100644 index 000000000..625bfeb40 --- /dev/null +++ b/costa/native/external/lapack/sbdsdc.f @@ -0,0 +1,427 @@ + SUBROUTINE SBDSDC( UPLO, COMPQ, N, D, E, U, LDU, VT, LDVT, Q, IQ, + $ WORK, IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* December 1, 1999 +* +* .. Scalar Arguments .. + CHARACTER COMPQ, UPLO + INTEGER INFO, LDU, LDVT, N +* .. +* .. Array Arguments .. + INTEGER IQ( * ), IWORK( * ) + REAL D( * ), E( * ), Q( * ), U( LDU, * ), + $ VT( LDVT, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SBDSDC computes the singular value decomposition (SVD) of a real +* N-by-N (upper or lower) bidiagonal matrix B: B = U * S * VT, +* using a divide and conquer method, where S is a diagonal matrix +* with non-negative diagonal elements (the singular values of B), and +* U and VT are orthogonal matrices of left and right singular vectors, +* respectively. SBDSDC can be used to compute all singular values, +* and optionally, singular vectors or singular vectors in compact form. +* +* This code makes very mild assumptions about floating point +* arithmetic. It will work on machines with a guard digit in +* add/subtract, or on those binary machines without guard digits +* which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. +* It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. See SLASD3 for details. +* +* The code currently call SLASDQ if singular values only are desired. +* However, it can be slightly modified to compute singular values +* using the divide and conquer method. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': B is upper bidiagonal. +* = 'L': B is lower bidiagonal. +* +* COMPQ (input) CHARACTER*1 +* Specifies whether singular vectors are to be computed +* as follows: +* = 'N': Compute singular values only; +* = 'P': Compute singular values and compute singular +* vectors in compact form; +* = 'I': Compute singular values and singular vectors. +* +* N (input) INTEGER +* The order of the matrix B. N >= 0. +* +* D (input/output) REAL array, dimension (N) +* On entry, the n diagonal elements of the bidiagonal matrix B. +* On exit, if INFO=0, the singular values of B. +* +* E (input/output) REAL array, dimension (N) +* On entry, the elements of E contain the offdiagonal +* elements of the bidiagonal matrix whose SVD is desired. +* On exit, E has been destroyed. +* +* U (output) REAL array, dimension (LDU,N) +* If COMPQ = 'I', then: +* On exit, if INFO = 0, U contains the left singular vectors +* of the bidiagonal matrix. +* For other values of COMPQ, U is not referenced. +* +* LDU (input) INTEGER +* The leading dimension of the array U. LDU >= 1. +* If singular vectors are desired, then LDU >= max( 1, N ). +* +* VT (output) REAL array, dimension (LDVT,N) +* If COMPQ = 'I', then: +* On exit, if INFO = 0, VT' contains the right singular +* vectors of the bidiagonal matrix. +* For other values of COMPQ, VT is not referenced. +* +* LDVT (input) INTEGER +* The leading dimension of the array VT. LDVT >= 1. +* If singular vectors are desired, then LDVT >= max( 1, N ). +* +* Q (output) REAL array, dimension (LDQ) +* If COMPQ = 'P', then: +* On exit, if INFO = 0, Q and IQ contain the left +* and right singular vectors in a compact form, +* requiring O(N log N) space instead of 2*N**2. +* In particular, Q contains all the REAL data in +* LDQ >= N*(11 + 2*SMLSIZ + 8*INT(LOG_2(N/(SMLSIZ+1)))) +* words of memory, where SMLSIZ is returned by ILAENV and +* is equal to the maximum size of the subproblems at the +* bottom of the computation tree (usually about 25). +* For other values of COMPQ, Q is not referenced. +* +* IQ (output) INTEGER array, dimension (LDIQ) +* If COMPQ = 'P', then: +* On exit, if INFO = 0, Q and IQ contain the left +* and right singular vectors in a compact form, +* requiring O(N log N) space instead of 2*N**2. +* In particular, IQ contains all INTEGER data in +* LDIQ >= N*(3 + 3*INT(LOG_2(N/(SMLSIZ+1)))) +* words of memory, where SMLSIZ is returned by ILAENV and +* is equal to the maximum size of the subproblems at the +* bottom of the computation tree (usually about 25). +* For other values of COMPQ, IQ is not referenced. +* +* WORK (workspace) REAL array, dimension (LWORK) +* If COMPQ = 'N' then LWORK >= (4 * N). +* If COMPQ = 'P' then LWORK >= (6 * N). +* If COMPQ = 'I' then LWORK >= (3 * N**2 + 4 * N). +* +* IWORK (workspace) INTEGER array, dimension (8*N) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: The algorithm failed to compute an singular value. +* The update process of divide and conquer failed. +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER DIFL, DIFR, GIVCOL, GIVNUM, GIVPTR, I, IC, + $ ICOMPQ, IERR, II, IS, IU, IUPLO, IVT, J, K, KK, + $ MLVL, NM1, NSIZE, PERM, POLES, QSTART, SMLSIZ, + $ SMLSZP, SQRE, START, WSTART, Z + REAL CS, EPS, ORGNRM, P, R, SN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANST + EXTERNAL SLAMCH, SLANST, ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLARTG, SLASCL, SLASD0, SLASDA, SLASDQ, + $ SLASET, SLASR, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL, ABS, INT, LOG, SIGN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IUPLO = 0 + IF( LSAME( UPLO, 'U' ) ) + $ IUPLO = 1 + IF( LSAME( UPLO, 'L' ) ) + $ IUPLO = 2 + IF( LSAME( COMPQ, 'N' ) ) THEN + ICOMPQ = 0 + ELSE IF( LSAME( COMPQ, 'P' ) ) THEN + ICOMPQ = 1 + ELSE IF( LSAME( COMPQ, 'I' ) ) THEN + ICOMPQ = 2 + ELSE + ICOMPQ = -1 + END IF + IF( IUPLO.EQ.0 ) THEN + INFO = -1 + ELSE IF( ICOMPQ.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ( LDU.LT.1 ) .OR. ( ( ICOMPQ.EQ.2 ) .AND. ( LDU.LT. + $ N ) ) ) THEN + INFO = -7 + ELSE IF( ( LDVT.LT.1 ) .OR. ( ( ICOMPQ.EQ.2 ) .AND. ( LDVT.LT. + $ N ) ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SBDSDC', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + SMLSIZ = ILAENV( 9, 'SBDSDC', ' ', 0, 0, 0, 0 ) + IF( N.EQ.1 ) THEN + IF( ICOMPQ.EQ.1 ) THEN + Q( 1 ) = SIGN( ONE, D( 1 ) ) + Q( 1+SMLSIZ*N ) = ONE + ELSE IF( ICOMPQ.EQ.2 ) THEN + U( 1, 1 ) = SIGN( ONE, D( 1 ) ) + VT( 1, 1 ) = ONE + END IF + D( 1 ) = ABS( D( 1 ) ) + RETURN + END IF + NM1 = N - 1 +* +* If matrix lower bidiagonal, rotate to be upper bidiagonal +* by applying Givens rotations on the left +* + WSTART = 1 + QSTART = 3 + IF( ICOMPQ.EQ.1 ) THEN + CALL SCOPY( N, D, 1, Q( 1 ), 1 ) + CALL SCOPY( N-1, E, 1, Q( N+1 ), 1 ) + END IF + IF( IUPLO.EQ.2 ) THEN + QSTART = 5 + WSTART = 2*N - 1 + DO 10 I = 1, N - 1 + CALL SLARTG( D( I ), E( I ), CS, SN, R ) + D( I ) = R + E( I ) = SN*D( I+1 ) + D( I+1 ) = CS*D( I+1 ) + IF( ICOMPQ.EQ.1 ) THEN + Q( I+2*N ) = CS + Q( I+3*N ) = SN + ELSE IF( ICOMPQ.EQ.2 ) THEN + WORK( I ) = CS + WORK( NM1+I ) = -SN + END IF + 10 CONTINUE + END IF +* +* If ICOMPQ = 0, use SLASDQ to compute the singular values. +* + IF( ICOMPQ.EQ.0 ) THEN + CALL SLASDQ( 'U', 0, N, 0, 0, 0, D, E, VT, LDVT, U, LDU, U, + $ LDU, WORK( WSTART ), INFO ) + GO TO 40 + END IF +* +* If N is smaller than the minimum divide size SMLSIZ, then solve +* the problem with another solver. +* + IF( N.LE.SMLSIZ ) THEN + IF( ICOMPQ.EQ.2 ) THEN + CALL SLASET( 'A', N, N, ZERO, ONE, U, LDU ) + CALL SLASET( 'A', N, N, ZERO, ONE, VT, LDVT ) + CALL SLASDQ( 'U', 0, N, N, N, 0, D, E, VT, LDVT, U, LDU, U, + $ LDU, WORK( WSTART ), INFO ) + ELSE IF( ICOMPQ.EQ.1 ) THEN + IU = 1 + IVT = IU + N + CALL SLASET( 'A', N, N, ZERO, ONE, Q( IU+( QSTART-1 )*N ), + $ N ) + CALL SLASET( 'A', N, N, ZERO, ONE, Q( IVT+( QSTART-1 )*N ), + $ N ) + CALL SLASDQ( 'U', 0, N, N, N, 0, D, E, + $ Q( IVT+( QSTART-1 )*N ), N, + $ Q( IU+( QSTART-1 )*N ), N, + $ Q( IU+( QSTART-1 )*N ), N, WORK( WSTART ), + $ INFO ) + END IF + GO TO 40 + END IF +* + IF( ICOMPQ.EQ.2 ) THEN + CALL SLASET( 'A', N, N, ZERO, ONE, U, LDU ) + CALL SLASET( 'A', N, N, ZERO, ONE, VT, LDVT ) + END IF +* +* Scale. +* + ORGNRM = SLANST( 'M', N, D, E ) + IF( ORGNRM.EQ.ZERO ) + $ RETURN + CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, IERR ) + CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, NM1, 1, E, NM1, IERR ) +* + EPS = SLAMCH( 'Epsilon' ) +* + MLVL = INT( LOG( REAL( N ) / REAL( SMLSIZ+1 ) ) / LOG( TWO ) ) + 1 + SMLSZP = SMLSIZ + 1 +* + IF( ICOMPQ.EQ.1 ) THEN + IU = 1 + IVT = 1 + SMLSIZ + DIFL = IVT + SMLSZP + DIFR = DIFL + MLVL + Z = DIFR + MLVL*2 + IC = Z + MLVL + IS = IC + 1 + POLES = IS + 1 + GIVNUM = POLES + 2*MLVL +* + K = 1 + GIVPTR = 2 + PERM = 3 + GIVCOL = PERM + MLVL + END IF +* + DO 20 I = 1, N + IF( ABS( D( I ) ).LT.EPS ) THEN + D( I ) = SIGN( EPS, D( I ) ) + END IF + 20 CONTINUE +* + START = 1 + SQRE = 0 +* + DO 30 I = 1, NM1 + IF( ( ABS( E( I ) ).LT.EPS ) .OR. ( I.EQ.NM1 ) ) THEN +* +* Subproblem found. First determine its size and then +* apply divide and conquer on it. +* + IF( I.LT.NM1 ) THEN +* +* A subproblem with E(I) small for I < NM1. +* + NSIZE = I - START + 1 + ELSE IF( ABS( E( I ) ).GE.EPS ) THEN +* +* A subproblem with E(NM1) not too small but I = NM1. +* + NSIZE = N - START + 1 + ELSE +* +* A subproblem with E(NM1) small. This implies an +* 1-by-1 subproblem at D(N). Solve this 1-by-1 problem +* first. +* + NSIZE = I - START + 1 + IF( ICOMPQ.EQ.2 ) THEN + U( N, N ) = SIGN( ONE, D( N ) ) + VT( N, N ) = ONE + ELSE IF( ICOMPQ.EQ.1 ) THEN + Q( N+( QSTART-1 )*N ) = SIGN( ONE, D( N ) ) + Q( N+( SMLSIZ+QSTART-1 )*N ) = ONE + END IF + D( N ) = ABS( D( N ) ) + END IF + IF( ICOMPQ.EQ.2 ) THEN + CALL SLASD0( NSIZE, SQRE, D( START ), E( START ), + $ U( START, START ), LDU, VT( START, START ), + $ LDVT, SMLSIZ, IWORK, WORK( WSTART ), INFO ) + ELSE + CALL SLASDA( ICOMPQ, SMLSIZ, NSIZE, SQRE, D( START ), + $ E( START ), Q( START+( IU+QSTART-2 )*N ), N, + $ Q( START+( IVT+QSTART-2 )*N ), + $ IQ( START+K*N ), Q( START+( DIFL+QSTART-2 )* + $ N ), Q( START+( DIFR+QSTART-2 )*N ), + $ Q( START+( Z+QSTART-2 )*N ), + $ Q( START+( POLES+QSTART-2 )*N ), + $ IQ( START+GIVPTR*N ), IQ( START+GIVCOL*N ), + $ N, IQ( START+PERM*N ), + $ Q( START+( GIVNUM+QSTART-2 )*N ), + $ Q( START+( IC+QSTART-2 )*N ), + $ Q( START+( IS+QSTART-2 )*N ), + $ WORK( WSTART ), IWORK, INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + END IF + START = I + 1 + END IF + 30 CONTINUE +* +* Unscale +* + CALL SLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, IERR ) + 40 CONTINUE +* +* Use Selection Sort to minimize swaps of singular vectors +* + DO 60 II = 2, N + I = II - 1 + KK = I + P = D( I ) + DO 50 J = II, N + IF( D( J ).GT.P ) THEN + KK = J + P = D( J ) + END IF + 50 CONTINUE + IF( KK.NE.I ) THEN + D( KK ) = D( I ) + D( I ) = P + IF( ICOMPQ.EQ.1 ) THEN + IQ( I ) = KK + ELSE IF( ICOMPQ.EQ.2 ) THEN + CALL SSWAP( N, U( 1, I ), 1, U( 1, KK ), 1 ) + CALL SSWAP( N, VT( I, 1 ), LDVT, VT( KK, 1 ), LDVT ) + END IF + ELSE IF( ICOMPQ.EQ.1 ) THEN + IQ( I ) = I + END IF + 60 CONTINUE +* +* If ICOMPQ = 1, use IQ(N,1) as the indicator for UPLO +* + IF( ICOMPQ.EQ.1 ) THEN + IF( IUPLO.EQ.1 ) THEN + IQ( N ) = 1 + ELSE + IQ( N ) = 0 + END IF + END IF +* +* If B is lower bidiagonal, update U by those Givens rotations +* which rotated B to be upper bidiagonal +* + IF( ( IUPLO.EQ.2 ) .AND. ( ICOMPQ.EQ.2 ) ) + $ CALL SLASR( 'L', 'V', 'B', N, N, WORK( 1 ), WORK( N ), U, LDU ) +* + RETURN +* +* End of SBDSDC +* + END diff --git a/costa/native/external/lapack/sbdsqr.f b/costa/native/external/lapack/sbdsqr.f new file mode 100644 index 000000000..d075d84f2 --- /dev/null +++ b/costa/native/external/lapack/sbdsqr.f @@ -0,0 +1,733 @@ + SUBROUTINE SBDSQR( UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U, + $ LDU, C, LDC, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU +* .. +* .. Array Arguments .. + REAL C( LDC, * ), D( * ), E( * ), U( LDU, * ), + $ VT( LDVT, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SBDSQR computes the singular value decomposition (SVD) of a real +* N-by-N (upper or lower) bidiagonal matrix B: B = Q * S * P' (P' +* denotes the transpose of P), where S is a diagonal matrix with +* non-negative diagonal elements (the singular values of B), and Q +* and P are orthogonal matrices. +* +* The routine computes S, and optionally computes U * Q, P' * VT, +* or Q' * C, for given real input matrices U, VT, and C. +* +* See "Computing Small Singular Values of Bidiagonal Matrices With +* Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, +* LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11, +* no. 5, pp. 873-912, Sept 1990) and +* "Accurate singular values and differential qd algorithms," by +* B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics +* Department, University of California at Berkeley, July 1992 +* for a detailed description of the algorithm. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': B is upper bidiagonal; +* = 'L': B is lower bidiagonal. +* +* N (input) INTEGER +* The order of the matrix B. N >= 0. +* +* NCVT (input) INTEGER +* The number of columns of the matrix VT. NCVT >= 0. +* +* NRU (input) INTEGER +* The number of rows of the matrix U. NRU >= 0. +* +* NCC (input) INTEGER +* The number of columns of the matrix C. NCC >= 0. +* +* D (input/output) REAL array, dimension (N) +* On entry, the n diagonal elements of the bidiagonal matrix B. +* On exit, if INFO=0, the singular values of B in decreasing +* order. +* +* E (input/output) REAL array, dimension (N) +* On entry, the elements of E contain the +* offdiagonal elements of the bidiagonal matrix whose SVD +* is desired. On normal exit (INFO = 0), E is destroyed. +* If the algorithm does not converge (INFO > 0), D and E +* will contain the diagonal and superdiagonal elements of a +* bidiagonal matrix orthogonally equivalent to the one given +* as input. E(N) is used for workspace. +* +* VT (input/output) REAL array, dimension (LDVT, NCVT) +* On entry, an N-by-NCVT matrix VT. +* On exit, VT is overwritten by P' * VT. +* VT is not referenced if NCVT = 0. +* +* LDVT (input) INTEGER +* The leading dimension of the array VT. +* LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0. +* +* U (input/output) REAL array, dimension (LDU, N) +* On entry, an NRU-by-N matrix U. +* On exit, U is overwritten by U * Q. +* U is not referenced if NRU = 0. +* +* LDU (input) INTEGER +* The leading dimension of the array U. LDU >= max(1,NRU). +* +* C (input/output) REAL array, dimension (LDC, NCC) +* On entry, an N-by-NCC matrix C. +* On exit, C is overwritten by Q' * C. +* C is not referenced if NCC = 0. +* +* LDC (input) INTEGER +* The leading dimension of the array C. +* LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0. +* +* WORK (workspace) REAL array, dimension (4*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: If INFO = -i, the i-th argument had an illegal value +* > 0: the algorithm did not converge; D and E contain the +* elements of a bidiagonal matrix which is orthogonally +* similar to the input matrix B; if INFO = i, i +* elements of E have not converged to zero. +* +* Internal Parameters +* =================== +* +* TOLMUL REAL, default = max(10,min(100,EPS**(-1/8))) +* TOLMUL controls the convergence criterion of the QR loop. +* If it is positive, TOLMUL*EPS is the desired relative +* precision in the computed singular values. +* If it is negative, abs(TOLMUL*EPS*sigma_max) is the +* desired absolute accuracy in the computed singular +* values (corresponds to relative accuracy +* abs(TOLMUL*EPS) in the largest singular value. +* abs(TOLMUL) should be between 1 and 1/EPS, and preferably +* between 10 (for fast convergence) and .1/EPS +* (for there to be some accuracy in the results). +* Default is to lose at either one eighth or 2 of the +* available decimal digits in each computed singular value +* (whichever is smaller). +* +* MAXITR INTEGER, default = 6 +* MAXITR controls the maximum number of passes of the +* algorithm through its inner loop. The algorithms stops +* (and so fails to converge) if the number of passes +* through the inner loop exceeds MAXITR*N**2. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) + REAL ONE + PARAMETER ( ONE = 1.0E0 ) + REAL NEGONE + PARAMETER ( NEGONE = -1.0E0 ) + REAL HNDRTH + PARAMETER ( HNDRTH = 0.01E0 ) + REAL TEN + PARAMETER ( TEN = 10.0E0 ) + REAL HNDRD + PARAMETER ( HNDRD = 100.0E0 ) + REAL MEIGTH + PARAMETER ( MEIGTH = -0.125E0 ) + INTEGER MAXITR + PARAMETER ( MAXITR = 6 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, ROTATE + INTEGER I, IDIR, ISUB, ITER, J, LL, LLL, M, MAXIT, NM1, + $ NM12, NM13, OLDLL, OLDM + REAL ABSE, ABSS, COSL, COSR, CS, EPS, F, G, H, MU, + $ OLDCS, OLDSN, R, SHIFT, SIGMN, SIGMX, SINL, + $ SINR, SLL, SMAX, SMIN, SMINL, SMINLO, SMINOA, + $ SN, THRESH, TOL, TOLMUL, UNFL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SLARTG, SLAS2, SLASQ1, SLASR, SLASV2, SROT, + $ SSCAL, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, REAL, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LOWER ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NCVT.LT.0 ) THEN + INFO = -3 + ELSE IF( NRU.LT.0 ) THEN + INFO = -4 + ELSE IF( NCC.LT.0 ) THEN + INFO = -5 + ELSE IF( ( NCVT.EQ.0 .AND. LDVT.LT.1 ) .OR. + $ ( NCVT.GT.0 .AND. LDVT.LT.MAX( 1, N ) ) ) THEN + INFO = -9 + ELSE IF( LDU.LT.MAX( 1, NRU ) ) THEN + INFO = -11 + ELSE IF( ( NCC.EQ.0 .AND. LDC.LT.1 ) .OR. + $ ( NCC.GT.0 .AND. LDC.LT.MAX( 1, N ) ) ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SBDSQR', -INFO ) + RETURN + END IF + IF( N.EQ.0 ) + $ RETURN + IF( N.EQ.1 ) + $ GO TO 160 +* +* ROTATE is true if any singular vectors desired, false otherwise +* + ROTATE = ( NCVT.GT.0 ) .OR. ( NRU.GT.0 ) .OR. ( NCC.GT.0 ) +* +* If no singular vectors desired, use qd algorithm +* + IF( .NOT.ROTATE ) THEN + CALL SLASQ1( N, D, E, WORK, INFO ) + RETURN + END IF +* + NM1 = N - 1 + NM12 = NM1 + NM1 + NM13 = NM12 + NM1 + IDIR = 0 +* +* Get machine constants +* + EPS = SLAMCH( 'Epsilon' ) + UNFL = SLAMCH( 'Safe minimum' ) +* +* If matrix lower bidiagonal, rotate to be upper bidiagonal +* by applying Givens rotations on the left +* + IF( LOWER ) THEN + DO 10 I = 1, N - 1 + CALL SLARTG( D( I ), E( I ), CS, SN, R ) + D( I ) = R + E( I ) = SN*D( I+1 ) + D( I+1 ) = CS*D( I+1 ) + WORK( I ) = CS + WORK( NM1+I ) = SN + 10 CONTINUE +* +* Update singular vectors if desired +* + IF( NRU.GT.0 ) + $ CALL SLASR( 'R', 'V', 'F', NRU, N, WORK( 1 ), WORK( N ), U, + $ LDU ) + IF( NCC.GT.0 ) + $ CALL SLASR( 'L', 'V', 'F', N, NCC, WORK( 1 ), WORK( N ), C, + $ LDC ) + END IF +* +* Compute singular values to relative accuracy TOL +* (By setting TOL to be negative, algorithm will compute +* singular values to absolute accuracy ABS(TOL)*norm(input matrix)) +* + TOLMUL = MAX( TEN, MIN( HNDRD, EPS**MEIGTH ) ) + TOL = TOLMUL*EPS +* +* Compute approximate maximum, minimum singular values +* + SMAX = ZERO + DO 20 I = 1, N + SMAX = MAX( SMAX, ABS( D( I ) ) ) + 20 CONTINUE + DO 30 I = 1, N - 1 + SMAX = MAX( SMAX, ABS( E( I ) ) ) + 30 CONTINUE + SMINL = ZERO + IF( TOL.GE.ZERO ) THEN +* +* Relative accuracy desired +* + SMINOA = ABS( D( 1 ) ) + IF( SMINOA.EQ.ZERO ) + $ GO TO 50 + MU = SMINOA + DO 40 I = 2, N + MU = ABS( D( I ) )*( MU / ( MU+ABS( E( I-1 ) ) ) ) + SMINOA = MIN( SMINOA, MU ) + IF( SMINOA.EQ.ZERO ) + $ GO TO 50 + 40 CONTINUE + 50 CONTINUE + SMINOA = SMINOA / SQRT( REAL( N ) ) + THRESH = MAX( TOL*SMINOA, MAXITR*N*N*UNFL ) + ELSE +* +* Absolute accuracy desired +* + THRESH = MAX( ABS( TOL )*SMAX, MAXITR*N*N*UNFL ) + END IF +* +* Prepare for main iteration loop for the singular values +* (MAXIT is the maximum number of passes through the inner +* loop permitted before nonconvergence signalled.) +* + MAXIT = MAXITR*N*N + ITER = 0 + OLDLL = -1 + OLDM = -1 +* +* M points to last element of unconverged part of matrix +* + M = N +* +* Begin main iteration loop +* + 60 CONTINUE +* +* Check for convergence or exceeding iteration count +* + IF( M.LE.1 ) + $ GO TO 160 + IF( ITER.GT.MAXIT ) + $ GO TO 200 +* +* Find diagonal block of matrix to work on +* + IF( TOL.LT.ZERO .AND. ABS( D( M ) ).LE.THRESH ) + $ D( M ) = ZERO + SMAX = ABS( D( M ) ) + SMIN = SMAX + DO 70 LLL = 1, M - 1 + LL = M - LLL + ABSS = ABS( D( LL ) ) + ABSE = ABS( E( LL ) ) + IF( TOL.LT.ZERO .AND. ABSS.LE.THRESH ) + $ D( LL ) = ZERO + IF( ABSE.LE.THRESH ) + $ GO TO 80 + SMIN = MIN( SMIN, ABSS ) + SMAX = MAX( SMAX, ABSS, ABSE ) + 70 CONTINUE + LL = 0 + GO TO 90 + 80 CONTINUE + E( LL ) = ZERO +* +* Matrix splits since E(LL) = 0 +* + IF( LL.EQ.M-1 ) THEN +* +* Convergence of bottom singular value, return to top of loop +* + M = M - 1 + GO TO 60 + END IF + 90 CONTINUE + LL = LL + 1 +* +* E(LL) through E(M-1) are nonzero, E(LL-1) is zero +* + IF( LL.EQ.M-1 ) THEN +* +* 2 by 2 block, handle separately +* + CALL SLASV2( D( M-1 ), E( M-1 ), D( M ), SIGMN, SIGMX, SINR, + $ COSR, SINL, COSL ) + D( M-1 ) = SIGMX + E( M-1 ) = ZERO + D( M ) = SIGMN +* +* Compute singular vectors, if desired +* + IF( NCVT.GT.0 ) + $ CALL SROT( NCVT, VT( M-1, 1 ), LDVT, VT( M, 1 ), LDVT, COSR, + $ SINR ) + IF( NRU.GT.0 ) + $ CALL SROT( NRU, U( 1, M-1 ), 1, U( 1, M ), 1, COSL, SINL ) + IF( NCC.GT.0 ) + $ CALL SROT( NCC, C( M-1, 1 ), LDC, C( M, 1 ), LDC, COSL, + $ SINL ) + M = M - 2 + GO TO 60 + END IF +* +* If working on new submatrix, choose shift direction +* (from larger end diagonal element towards smaller) +* + IF( LL.GT.OLDM .OR. M.LT.OLDLL ) THEN + IF( ABS( D( LL ) ).GE.ABS( D( M ) ) ) THEN +* +* Chase bulge from top (big end) to bottom (small end) +* + IDIR = 1 + ELSE +* +* Chase bulge from bottom (big end) to top (small end) +* + IDIR = 2 + END IF + END IF +* +* Apply convergence tests +* + IF( IDIR.EQ.1 ) THEN +* +* Run convergence test in forward direction +* First apply standard test to bottom of matrix +* + IF( ABS( E( M-1 ) ).LE.ABS( TOL )*ABS( D( M ) ) .OR. + $ ( TOL.LT.ZERO .AND. ABS( E( M-1 ) ).LE.THRESH ) ) THEN + E( M-1 ) = ZERO + GO TO 60 + END IF +* + IF( TOL.GE.ZERO ) THEN +* +* If relative accuracy desired, +* apply convergence criterion forward +* + MU = ABS( D( LL ) ) + SMINL = MU + DO 100 LLL = LL, M - 1 + IF( ABS( E( LLL ) ).LE.TOL*MU ) THEN + E( LLL ) = ZERO + GO TO 60 + END IF + SMINLO = SMINL + MU = ABS( D( LLL+1 ) )*( MU / ( MU+ABS( E( LLL ) ) ) ) + SMINL = MIN( SMINL, MU ) + 100 CONTINUE + END IF +* + ELSE +* +* Run convergence test in backward direction +* First apply standard test to top of matrix +* + IF( ABS( E( LL ) ).LE.ABS( TOL )*ABS( D( LL ) ) .OR. + $ ( TOL.LT.ZERO .AND. ABS( E( LL ) ).LE.THRESH ) ) THEN + E( LL ) = ZERO + GO TO 60 + END IF +* + IF( TOL.GE.ZERO ) THEN +* +* If relative accuracy desired, +* apply convergence criterion backward +* + MU = ABS( D( M ) ) + SMINL = MU + DO 110 LLL = M - 1, LL, -1 + IF( ABS( E( LLL ) ).LE.TOL*MU ) THEN + E( LLL ) = ZERO + GO TO 60 + END IF + SMINLO = SMINL + MU = ABS( D( LLL ) )*( MU / ( MU+ABS( E( LLL ) ) ) ) + SMINL = MIN( SMINL, MU ) + 110 CONTINUE + END IF + END IF + OLDLL = LL + OLDM = M +* +* Compute shift. First, test if shifting would ruin relative +* accuracy, and if so set the shift to zero. +* + IF( TOL.GE.ZERO .AND. N*TOL*( SMINL / SMAX ).LE. + $ MAX( EPS, HNDRTH*TOL ) ) THEN +* +* Use a zero shift to avoid loss of relative accuracy +* + SHIFT = ZERO + ELSE +* +* Compute the shift from 2-by-2 block at end of matrix +* + IF( IDIR.EQ.1 ) THEN + SLL = ABS( D( LL ) ) + CALL SLAS2( D( M-1 ), E( M-1 ), D( M ), SHIFT, R ) + ELSE + SLL = ABS( D( M ) ) + CALL SLAS2( D( LL ), E( LL ), D( LL+1 ), SHIFT, R ) + END IF +* +* Test if shift negligible, and if so set to zero +* + IF( SLL.GT.ZERO ) THEN + IF( ( SHIFT / SLL )**2.LT.EPS ) + $ SHIFT = ZERO + END IF + END IF +* +* Increment iteration count +* + ITER = ITER + M - LL +* +* If SHIFT = 0, do simplified QR iteration +* + IF( SHIFT.EQ.ZERO ) THEN + IF( IDIR.EQ.1 ) THEN +* +* Chase bulge from top to bottom +* Save cosines and sines for later singular vector updates +* + CS = ONE + OLDCS = ONE + DO 120 I = LL, M - 1 + CALL SLARTG( D( I )*CS, E( I ), CS, SN, R ) + IF( I.GT.LL ) + $ E( I-1 ) = OLDSN*R + CALL SLARTG( OLDCS*R, D( I+1 )*SN, OLDCS, OLDSN, D( I ) ) + WORK( I-LL+1 ) = CS + WORK( I-LL+1+NM1 ) = SN + WORK( I-LL+1+NM12 ) = OLDCS + WORK( I-LL+1+NM13 ) = OLDSN + 120 CONTINUE + H = D( M )*CS + D( M ) = H*OLDCS + E( M-1 ) = H*OLDSN +* +* Update singular vectors +* + IF( NCVT.GT.0 ) + $ CALL SLASR( 'L', 'V', 'F', M-LL+1, NCVT, WORK( 1 ), + $ WORK( N ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL SLASR( 'R', 'V', 'F', NRU, M-LL+1, WORK( NM12+1 ), + $ WORK( NM13+1 ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL SLASR( 'L', 'V', 'F', M-LL+1, NCC, WORK( NM12+1 ), + $ WORK( NM13+1 ), C( LL, 1 ), LDC ) +* +* Test convergence +* + IF( ABS( E( M-1 ) ).LE.THRESH ) + $ E( M-1 ) = ZERO +* + ELSE +* +* Chase bulge from bottom to top +* Save cosines and sines for later singular vector updates +* + CS = ONE + OLDCS = ONE + DO 130 I = M, LL + 1, -1 + CALL SLARTG( D( I )*CS, E( I-1 ), CS, SN, R ) + IF( I.LT.M ) + $ E( I ) = OLDSN*R + CALL SLARTG( OLDCS*R, D( I-1 )*SN, OLDCS, OLDSN, D( I ) ) + WORK( I-LL ) = CS + WORK( I-LL+NM1 ) = -SN + WORK( I-LL+NM12 ) = OLDCS + WORK( I-LL+NM13 ) = -OLDSN + 130 CONTINUE + H = D( LL )*CS + D( LL ) = H*OLDCS + E( LL ) = H*OLDSN +* +* Update singular vectors +* + IF( NCVT.GT.0 ) + $ CALL SLASR( 'L', 'V', 'B', M-LL+1, NCVT, WORK( NM12+1 ), + $ WORK( NM13+1 ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL SLASR( 'R', 'V', 'B', NRU, M-LL+1, WORK( 1 ), + $ WORK( N ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL SLASR( 'L', 'V', 'B', M-LL+1, NCC, WORK( 1 ), + $ WORK( N ), C( LL, 1 ), LDC ) +* +* Test convergence +* + IF( ABS( E( LL ) ).LE.THRESH ) + $ E( LL ) = ZERO + END IF + ELSE +* +* Use nonzero shift +* + IF( IDIR.EQ.1 ) THEN +* +* Chase bulge from top to bottom +* Save cosines and sines for later singular vector updates +* + F = ( ABS( D( LL ) )-SHIFT )* + $ ( SIGN( ONE, D( LL ) )+SHIFT / D( LL ) ) + G = E( LL ) + DO 140 I = LL, M - 1 + CALL SLARTG( F, G, COSR, SINR, R ) + IF( I.GT.LL ) + $ E( I-1 ) = R + F = COSR*D( I ) + SINR*E( I ) + E( I ) = COSR*E( I ) - SINR*D( I ) + G = SINR*D( I+1 ) + D( I+1 ) = COSR*D( I+1 ) + CALL SLARTG( F, G, COSL, SINL, R ) + D( I ) = R + F = COSL*E( I ) + SINL*D( I+1 ) + D( I+1 ) = COSL*D( I+1 ) - SINL*E( I ) + IF( I.LT.M-1 ) THEN + G = SINL*E( I+1 ) + E( I+1 ) = COSL*E( I+1 ) + END IF + WORK( I-LL+1 ) = COSR + WORK( I-LL+1+NM1 ) = SINR + WORK( I-LL+1+NM12 ) = COSL + WORK( I-LL+1+NM13 ) = SINL + 140 CONTINUE + E( M-1 ) = F +* +* Update singular vectors +* + IF( NCVT.GT.0 ) + $ CALL SLASR( 'L', 'V', 'F', M-LL+1, NCVT, WORK( 1 ), + $ WORK( N ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL SLASR( 'R', 'V', 'F', NRU, M-LL+1, WORK( NM12+1 ), + $ WORK( NM13+1 ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL SLASR( 'L', 'V', 'F', M-LL+1, NCC, WORK( NM12+1 ), + $ WORK( NM13+1 ), C( LL, 1 ), LDC ) +* +* Test convergence +* + IF( ABS( E( M-1 ) ).LE.THRESH ) + $ E( M-1 ) = ZERO +* + ELSE +* +* Chase bulge from bottom to top +* Save cosines and sines for later singular vector updates +* + F = ( ABS( D( M ) )-SHIFT )*( SIGN( ONE, D( M ) )+SHIFT / + $ D( M ) ) + G = E( M-1 ) + DO 150 I = M, LL + 1, -1 + CALL SLARTG( F, G, COSR, SINR, R ) + IF( I.LT.M ) + $ E( I ) = R + F = COSR*D( I ) + SINR*E( I-1 ) + E( I-1 ) = COSR*E( I-1 ) - SINR*D( I ) + G = SINR*D( I-1 ) + D( I-1 ) = COSR*D( I-1 ) + CALL SLARTG( F, G, COSL, SINL, R ) + D( I ) = R + F = COSL*E( I-1 ) + SINL*D( I-1 ) + D( I-1 ) = COSL*D( I-1 ) - SINL*E( I-1 ) + IF( I.GT.LL+1 ) THEN + G = SINL*E( I-2 ) + E( I-2 ) = COSL*E( I-2 ) + END IF + WORK( I-LL ) = COSR + WORK( I-LL+NM1 ) = -SINR + WORK( I-LL+NM12 ) = COSL + WORK( I-LL+NM13 ) = -SINL + 150 CONTINUE + E( LL ) = F +* +* Test convergence +* + IF( ABS( E( LL ) ).LE.THRESH ) + $ E( LL ) = ZERO +* +* Update singular vectors if desired +* + IF( NCVT.GT.0 ) + $ CALL SLASR( 'L', 'V', 'B', M-LL+1, NCVT, WORK( NM12+1 ), + $ WORK( NM13+1 ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL SLASR( 'R', 'V', 'B', NRU, M-LL+1, WORK( 1 ), + $ WORK( N ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL SLASR( 'L', 'V', 'B', M-LL+1, NCC, WORK( 1 ), + $ WORK( N ), C( LL, 1 ), LDC ) + END IF + END IF +* +* QR iteration finished, go back and check convergence +* + GO TO 60 +* +* All singular values converged, so make them positive +* + 160 CONTINUE + DO 170 I = 1, N + IF( D( I ).LT.ZERO ) THEN + D( I ) = -D( I ) +* +* Change sign of singular vectors, if desired +* + IF( NCVT.GT.0 ) + $ CALL SSCAL( NCVT, NEGONE, VT( I, 1 ), LDVT ) + END IF + 170 CONTINUE +* +* Sort the singular values into decreasing order (insertion sort on +* singular values, but only one transposition per singular vector) +* + DO 190 I = 1, N - 1 +* +* Scan for smallest D(I) +* + ISUB = 1 + SMIN = D( 1 ) + DO 180 J = 2, N + 1 - I + IF( D( J ).LE.SMIN ) THEN + ISUB = J + SMIN = D( J ) + END IF + 180 CONTINUE + IF( ISUB.NE.N+1-I ) THEN +* +* Swap singular values and vectors +* + D( ISUB ) = D( N+1-I ) + D( N+1-I ) = SMIN + IF( NCVT.GT.0 ) + $ CALL SSWAP( NCVT, VT( ISUB, 1 ), LDVT, VT( N+1-I, 1 ), + $ LDVT ) + IF( NRU.GT.0 ) + $ CALL SSWAP( NRU, U( 1, ISUB ), 1, U( 1, N+1-I ), 1 ) + IF( NCC.GT.0 ) + $ CALL SSWAP( NCC, C( ISUB, 1 ), LDC, C( N+1-I, 1 ), LDC ) + END IF + 190 CONTINUE + GO TO 220 +* +* Maximum number of iterations exceeded, failure to converge +* + 200 CONTINUE + INFO = 0 + DO 210 I = 1, N - 1 + IF( E( I ).NE.ZERO ) + $ INFO = INFO + 1 + 210 CONTINUE + 220 CONTINUE + RETURN +* +* End of SBDSQR +* + END diff --git a/costa/native/external/lapack/scsum1.f b/costa/native/external/lapack/scsum1.f new file mode 100644 index 000000000..03adac2a1 --- /dev/null +++ b/costa/native/external/lapack/scsum1.f @@ -0,0 +1,82 @@ + REAL FUNCTION SCSUM1( N, CX, INCX ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + INTEGER INCX, N +* .. +* .. Array Arguments .. + COMPLEX CX( * ) +* .. +* +* Purpose +* ======= +* +* SCSUM1 takes the sum of the absolute values of a complex +* vector and returns a single precision result. +* +* Based on SCASUM from the Level 1 BLAS. +* The change is to use the 'genuine' absolute value. +* +* Contributed by Nick Higham for use with CLACON. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of elements in the vector CX. +* +* CX (input) COMPLEX array, dimension (N) +* The vector whose elements will be summed. +* +* INCX (input) INTEGER +* The spacing between successive values of CX. INCX > 0. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, NINCX + REAL STEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* + SCSUM1 = 0.0E0 + STEMP = 0.0E0 + IF( N.LE.0 ) + $ RETURN + IF( INCX.EQ.1 ) + $ GO TO 20 +* +* CODE FOR INCREMENT NOT EQUAL TO 1 +* + NINCX = N*INCX + DO 10 I = 1, NINCX, INCX +* +* NEXT LINE MODIFIED. +* + STEMP = STEMP + ABS( CX( I ) ) + 10 CONTINUE + SCSUM1 = STEMP + RETURN +* +* CODE FOR INCREMENT EQUAL TO 1 +* + 20 CONTINUE + DO 30 I = 1, N +* +* NEXT LINE MODIFIED. +* + STEMP = STEMP + ABS( CX( I ) ) + 30 CONTINUE + SCSUM1 = STEMP + RETURN +* +* End of SCSUM1 +* + END diff --git a/costa/native/external/lapack/sdisna.f b/costa/native/external/lapack/sdisna.f new file mode 100644 index 000000000..438282e2a --- /dev/null +++ b/costa/native/external/lapack/sdisna.f @@ -0,0 +1,180 @@ + SUBROUTINE SDISNA( JOB, M, N, D, SEP, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER JOB + INTEGER INFO, M, N +* .. +* .. Array Arguments .. + REAL D( * ), SEP( * ) +* .. +* +* Purpose +* ======= +* +* SDISNA computes the reciprocal condition numbers for the eigenvectors +* of a real symmetric or complex Hermitian matrix or for the left or +* right singular vectors of a general m-by-n matrix. The reciprocal +* condition number is the 'gap' between the corresponding eigenvalue or +* singular value and the nearest other one. +* +* The bound on the error, measured by angle in radians, in the I-th +* computed vector is given by +* +* SLAMCH( 'E' ) * ( ANORM / SEP( I ) ) +* +* where ANORM = 2-norm(A) = max( abs( D(j) ) ). SEP(I) is not allowed +* to be smaller than SLAMCH( 'E' )*ANORM in order to limit the size of +* the error bound. +* +* SDISNA may also be used to compute error bounds for eigenvectors of +* the generalized symmetric definite eigenproblem. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* Specifies for which problem the reciprocal condition numbers +* should be computed: +* = 'E': the eigenvectors of a symmetric/Hermitian matrix; +* = 'L': the left singular vectors of a general matrix; +* = 'R': the right singular vectors of a general matrix. +* +* M (input) INTEGER +* The number of rows of the matrix. M >= 0. +* +* N (input) INTEGER +* If JOB = 'L' or 'R', the number of columns of the matrix, +* in which case N >= 0. Ignored if JOB = 'E'. +* +* D (input) REAL array, dimension (M) if JOB = 'E' +* dimension (min(M,N)) if JOB = 'L' or 'R' +* The eigenvalues (if JOB = 'E') or singular values (if JOB = +* 'L' or 'R') of the matrix, in either increasing or decreasing +* order. If singular values, they must be non-negative. +* +* SEP (output) REAL array, dimension (M) if JOB = 'E' +* dimension (min(M,N)) if JOB = 'L' or 'R' +* The reciprocal condition numbers of the vectors. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL DECR, EIGEN, INCR, LEFT, RIGHT, SING + INTEGER I, K + REAL ANORM, EPS, NEWGAP, OLDGAP, SAFMIN, THRESH +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + EIGEN = LSAME( JOB, 'E' ) + LEFT = LSAME( JOB, 'L' ) + RIGHT = LSAME( JOB, 'R' ) + SING = LEFT .OR. RIGHT + IF( EIGEN ) THEN + K = M + ELSE IF( SING ) THEN + K = MIN( M, N ) + END IF + IF( .NOT.EIGEN .AND. .NOT.SING ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( K.LT.0 ) THEN + INFO = -3 + ELSE + INCR = .TRUE. + DECR = .TRUE. + DO 10 I = 1, K - 1 + IF( INCR ) + $ INCR = INCR .AND. D( I ).LE.D( I+1 ) + IF( DECR ) + $ DECR = DECR .AND. D( I ).GE.D( I+1 ) + 10 CONTINUE + IF( SING .AND. K.GT.0 ) THEN + IF( INCR ) + $ INCR = INCR .AND. ZERO.LE.D( 1 ) + IF( DECR ) + $ DECR = DECR .AND. D( K ).GE.ZERO + END IF + IF( .NOT.( INCR .OR. DECR ) ) + $ INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SDISNA', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( K.EQ.0 ) + $ RETURN +* +* Compute reciprocal condition numbers +* + IF( K.EQ.1 ) THEN + SEP( 1 ) = SLAMCH( 'O' ) + ELSE + OLDGAP = ABS( D( 2 )-D( 1 ) ) + SEP( 1 ) = OLDGAP + DO 20 I = 2, K - 1 + NEWGAP = ABS( D( I+1 )-D( I ) ) + SEP( I ) = MIN( OLDGAP, NEWGAP ) + OLDGAP = NEWGAP + 20 CONTINUE + SEP( K ) = OLDGAP + END IF + IF( SING ) THEN + IF( ( LEFT .AND. M.GT.N ) .OR. ( RIGHT .AND. M.LT.N ) ) THEN + IF( INCR ) + $ SEP( 1 ) = MIN( SEP( 1 ), D( 1 ) ) + IF( DECR ) + $ SEP( K ) = MIN( SEP( K ), D( K ) ) + END IF + END IF +* +* Ensure that reciprocal condition numbers are not less than +* threshold, in order to limit the size of the error bound +* + EPS = SLAMCH( 'E' ) + SAFMIN = SLAMCH( 'S' ) + ANORM = MAX( ABS( D( 1 ) ), ABS( D( K ) ) ) + IF( ANORM.EQ.ZERO ) THEN + THRESH = EPS + ELSE + THRESH = MAX( EPS*ANORM, SAFMIN ) + END IF + DO 30 I = 1, K + SEP( I ) = MAX( SEP( I ), THRESH ) + 30 CONTINUE +* + RETURN +* +* End of SDISNA +* + END diff --git a/costa/native/external/lapack/second.f b/costa/native/external/lapack/second.f new file mode 100644 index 000000000..0094216ab --- /dev/null +++ b/costa/native/external/lapack/second.f @@ -0,0 +1,34 @@ + REAL FUNCTION SECOND( ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* Purpose +* ======= +* +* SECOND returns the user time for a process in seconds. +* This version gets the time from the system function ETIME. +* +* ===================================================================== +* +* .. Local Scalars .. + REAL T1 +* .. +* .. Local Arrays .. + REAL TARRAY( 2 ) +* .. +* .. External Functions .. + REAL ETIME +C EXTERNAL ETIME +* .. +* .. Executable Statements .. +* + T1 = ETIME( TARRAY ) + SECOND = TARRAY( 1 ) + RETURN +* +* End of SECOND +* + END diff --git a/costa/native/external/lapack/sgbbrd.f b/costa/native/external/lapack/sgbbrd.f new file mode 100644 index 000000000..7bd467538 --- /dev/null +++ b/costa/native/external/lapack/sgbbrd.f @@ -0,0 +1,444 @@ + SUBROUTINE SGBBRD( VECT, M, N, NCC, KL, KU, AB, LDAB, D, E, Q, + $ LDQ, PT, LDPT, C, LDC, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER VECT + INTEGER INFO, KL, KU, LDAB, LDC, LDPT, LDQ, M, N, NCC +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ), C( LDC, * ), D( * ), E( * ), + $ PT( LDPT, * ), Q( LDQ, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGBBRD reduces a real general m-by-n band matrix A to upper +* bidiagonal form B by an orthogonal transformation: Q' * A * P = B. +* +* The routine computes B, and optionally forms Q or P', or computes +* Q'*C for a given matrix C. +* +* Arguments +* ========= +* +* VECT (input) CHARACTER*1 +* Specifies whether or not the matrices Q and P' are to be +* formed. +* = 'N': do not form Q or P'; +* = 'Q': form Q only; +* = 'P': form P' only; +* = 'B': form both. +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* NCC (input) INTEGER +* The number of columns of the matrix C. NCC >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals of the matrix A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals of the matrix A. KU >= 0. +* +* AB (input/output) REAL array, dimension (LDAB,N) +* On entry, the m-by-n band matrix A, stored in rows 1 to +* KL+KU+1. The j-th column of A is stored in the j-th column of +* the array AB as follows: +* AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl). +* On exit, A is overwritten by values generated during the +* reduction. +* +* LDAB (input) INTEGER +* The leading dimension of the array A. LDAB >= KL+KU+1. +* +* D (output) REAL array, dimension (min(M,N)) +* The diagonal elements of the bidiagonal matrix B. +* +* E (output) REAL array, dimension (min(M,N)-1) +* The superdiagonal elements of the bidiagonal matrix B. +* +* Q (output) REAL array, dimension (LDQ,M) +* If VECT = 'Q' or 'B', the m-by-m orthogonal matrix Q. +* If VECT = 'N' or 'P', the array Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. +* LDQ >= max(1,M) if VECT = 'Q' or 'B'; LDQ >= 1 otherwise. +* +* PT (output) REAL array, dimension (LDPT,N) +* If VECT = 'P' or 'B', the n-by-n orthogonal matrix P'. +* If VECT = 'N' or 'Q', the array PT is not referenced. +* +* LDPT (input) INTEGER +* The leading dimension of the array PT. +* LDPT >= max(1,N) if VECT = 'P' or 'B'; LDPT >= 1 otherwise. +* +* C (input/output) REAL array, dimension (LDC,NCC) +* On entry, an m-by-ncc matrix C. +* On exit, C is overwritten by Q'*C. +* C is not referenced if NCC = 0. +* +* LDC (input) INTEGER +* The leading dimension of the array C. +* LDC >= max(1,M) if NCC > 0; LDC >= 1 if NCC = 0. +* +* WORK (workspace) REAL array, dimension (2*max(M,N)) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL WANTB, WANTC, WANTPT, WANTQ + INTEGER I, INCA, J, J1, J2, KB, KB1, KK, KLM, KLU1, + $ KUN, L, MINMN, ML, ML0, MN, MU, MU0, NR, NRT + REAL RA, RB, RC, RS +* .. +* .. External Subroutines .. + EXTERNAL SLARGV, SLARTG, SLARTV, SLASET, SROT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + WANTB = LSAME( VECT, 'B' ) + WANTQ = LSAME( VECT, 'Q' ) .OR. WANTB + WANTPT = LSAME( VECT, 'P' ) .OR. WANTB + WANTC = NCC.GT.0 + KLU1 = KL + KU + 1 + INFO = 0 + IF( .NOT.WANTQ .AND. .NOT.WANTPT .AND. .NOT.LSAME( VECT, 'N' ) ) + $ THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NCC.LT.0 ) THEN + INFO = -4 + ELSE IF( KL.LT.0 ) THEN + INFO = -5 + ELSE IF( KU.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KLU1 ) THEN + INFO = -8 + ELSE IF( LDQ.LT.1 .OR. WANTQ .AND. LDQ.LT.MAX( 1, M ) ) THEN + INFO = -12 + ELSE IF( LDPT.LT.1 .OR. WANTPT .AND. LDPT.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDC.LT.1 .OR. WANTC .AND. LDC.LT.MAX( 1, M ) ) THEN + INFO = -16 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGBBRD', -INFO ) + RETURN + END IF +* +* Initialize Q and P' to the unit matrix, if needed +* + IF( WANTQ ) + $ CALL SLASET( 'Full', M, M, ZERO, ONE, Q, LDQ ) + IF( WANTPT ) + $ CALL SLASET( 'Full', N, N, ZERO, ONE, PT, LDPT ) +* +* Quick return if possible. +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* + MINMN = MIN( M, N ) +* + IF( KL+KU.GT.1 ) THEN +* +* Reduce to upper bidiagonal form if KU > 0; if KU = 0, reduce +* first to lower bidiagonal form and then transform to upper +* bidiagonal +* + IF( KU.GT.0 ) THEN + ML0 = 1 + MU0 = 2 + ELSE + ML0 = 2 + MU0 = 1 + END IF +* +* Wherever possible, plane rotations are generated and applied in +* vector operations of length NR over the index set J1:J2:KLU1. +* +* The sines of the plane rotations are stored in WORK(1:max(m,n)) +* and the cosines in WORK(max(m,n)+1:2*max(m,n)). +* + MN = MAX( M, N ) + KLM = MIN( M-1, KL ) + KUN = MIN( N-1, KU ) + KB = KLM + KUN + KB1 = KB + 1 + INCA = KB1*LDAB + NR = 0 + J1 = KLM + 2 + J2 = 1 - KUN +* + DO 90 I = 1, MINMN +* +* Reduce i-th column and i-th row of matrix to bidiagonal form +* + ML = KLM + 1 + MU = KUN + 1 + DO 80 KK = 1, KB + J1 = J1 + KB + J2 = J2 + KB +* +* generate plane rotations to annihilate nonzero elements +* which have been created below the band +* + IF( NR.GT.0 ) + $ CALL SLARGV( NR, AB( KLU1, J1-KLM-1 ), INCA, + $ WORK( J1 ), KB1, WORK( MN+J1 ), KB1 ) +* +* apply plane rotations from the left +* + DO 10 L = 1, KB + IF( J2-KLM+L-1.GT.N ) THEN + NRT = NR - 1 + ELSE + NRT = NR + END IF + IF( NRT.GT.0 ) + $ CALL SLARTV( NRT, AB( KLU1-L, J1-KLM+L-1 ), INCA, + $ AB( KLU1-L+1, J1-KLM+L-1 ), INCA, + $ WORK( MN+J1 ), WORK( J1 ), KB1 ) + 10 CONTINUE +* + IF( ML.GT.ML0 ) THEN + IF( ML.LE.M-I+1 ) THEN +* +* generate plane rotation to annihilate a(i+ml-1,i) +* within the band, and apply rotation from the left +* + CALL SLARTG( AB( KU+ML-1, I ), AB( KU+ML, I ), + $ WORK( MN+I+ML-1 ), WORK( I+ML-1 ), + $ RA ) + AB( KU+ML-1, I ) = RA + IF( I.LT.N ) + $ CALL SROT( MIN( KU+ML-2, N-I ), + $ AB( KU+ML-2, I+1 ), LDAB-1, + $ AB( KU+ML-1, I+1 ), LDAB-1, + $ WORK( MN+I+ML-1 ), WORK( I+ML-1 ) ) + END IF + NR = NR + 1 + J1 = J1 - KB1 + END IF +* + IF( WANTQ ) THEN +* +* accumulate product of plane rotations in Q +* + DO 20 J = J1, J2, KB1 + CALL SROT( M, Q( 1, J-1 ), 1, Q( 1, J ), 1, + $ WORK( MN+J ), WORK( J ) ) + 20 CONTINUE + END IF +* + IF( WANTC ) THEN +* +* apply plane rotations to C +* + DO 30 J = J1, J2, KB1 + CALL SROT( NCC, C( J-1, 1 ), LDC, C( J, 1 ), LDC, + $ WORK( MN+J ), WORK( J ) ) + 30 CONTINUE + END IF +* + IF( J2+KUN.GT.N ) THEN +* +* adjust J2 to keep within the bounds of the matrix +* + NR = NR - 1 + J2 = J2 - KB1 + END IF +* + DO 40 J = J1, J2, KB1 +* +* create nonzero element a(j-1,j+ku) above the band +* and store it in WORK(n+1:2*n) +* + WORK( J+KUN ) = WORK( J )*AB( 1, J+KUN ) + AB( 1, J+KUN ) = WORK( MN+J )*AB( 1, J+KUN ) + 40 CONTINUE +* +* generate plane rotations to annihilate nonzero elements +* which have been generated above the band +* + IF( NR.GT.0 ) + $ CALL SLARGV( NR, AB( 1, J1+KUN-1 ), INCA, + $ WORK( J1+KUN ), KB1, WORK( MN+J1+KUN ), + $ KB1 ) +* +* apply plane rotations from the right +* + DO 50 L = 1, KB + IF( J2+L-1.GT.M ) THEN + NRT = NR - 1 + ELSE + NRT = NR + END IF + IF( NRT.GT.0 ) + $ CALL SLARTV( NRT, AB( L+1, J1+KUN-1 ), INCA, + $ AB( L, J1+KUN ), INCA, + $ WORK( MN+J1+KUN ), WORK( J1+KUN ), + $ KB1 ) + 50 CONTINUE +* + IF( ML.EQ.ML0 .AND. MU.GT.MU0 ) THEN + IF( MU.LE.N-I+1 ) THEN +* +* generate plane rotation to annihilate a(i,i+mu-1) +* within the band, and apply rotation from the right +* + CALL SLARTG( AB( KU-MU+3, I+MU-2 ), + $ AB( KU-MU+2, I+MU-1 ), + $ WORK( MN+I+MU-1 ), WORK( I+MU-1 ), + $ RA ) + AB( KU-MU+3, I+MU-2 ) = RA + CALL SROT( MIN( KL+MU-2, M-I ), + $ AB( KU-MU+4, I+MU-2 ), 1, + $ AB( KU-MU+3, I+MU-1 ), 1, + $ WORK( MN+I+MU-1 ), WORK( I+MU-1 ) ) + END IF + NR = NR + 1 + J1 = J1 - KB1 + END IF +* + IF( WANTPT ) THEN +* +* accumulate product of plane rotations in P' +* + DO 60 J = J1, J2, KB1 + CALL SROT( N, PT( J+KUN-1, 1 ), LDPT, + $ PT( J+KUN, 1 ), LDPT, WORK( MN+J+KUN ), + $ WORK( J+KUN ) ) + 60 CONTINUE + END IF +* + IF( J2+KB.GT.M ) THEN +* +* adjust J2 to keep within the bounds of the matrix +* + NR = NR - 1 + J2 = J2 - KB1 + END IF +* + DO 70 J = J1, J2, KB1 +* +* create nonzero element a(j+kl+ku,j+ku-1) below the +* band and store it in WORK(1:n) +* + WORK( J+KB ) = WORK( J+KUN )*AB( KLU1, J+KUN ) + AB( KLU1, J+KUN ) = WORK( MN+J+KUN )*AB( KLU1, J+KUN ) + 70 CONTINUE +* + IF( ML.GT.ML0 ) THEN + ML = ML - 1 + ELSE + MU = MU - 1 + END IF + 80 CONTINUE + 90 CONTINUE + END IF +* + IF( KU.EQ.0 .AND. KL.GT.0 ) THEN +* +* A has been reduced to lower bidiagonal form +* +* Transform lower bidiagonal form to upper bidiagonal by applying +* plane rotations from the left, storing diagonal elements in D +* and off-diagonal elements in E +* + DO 100 I = 1, MIN( M-1, N ) + CALL SLARTG( AB( 1, I ), AB( 2, I ), RC, RS, RA ) + D( I ) = RA + IF( I.LT.N ) THEN + E( I ) = RS*AB( 1, I+1 ) + AB( 1, I+1 ) = RC*AB( 1, I+1 ) + END IF + IF( WANTQ ) + $ CALL SROT( M, Q( 1, I ), 1, Q( 1, I+1 ), 1, RC, RS ) + IF( WANTC ) + $ CALL SROT( NCC, C( I, 1 ), LDC, C( I+1, 1 ), LDC, RC, + $ RS ) + 100 CONTINUE + IF( M.LE.N ) + $ D( M ) = AB( 1, M ) + ELSE IF( KU.GT.0 ) THEN +* +* A has been reduced to upper bidiagonal form +* + IF( M.LT.N ) THEN +* +* Annihilate a(m,m+1) by applying plane rotations from the +* right, storing diagonal elements in D and off-diagonal +* elements in E +* + RB = AB( KU, M+1 ) + DO 110 I = M, 1, -1 + CALL SLARTG( AB( KU+1, I ), RB, RC, RS, RA ) + D( I ) = RA + IF( I.GT.1 ) THEN + RB = -RS*AB( KU, I ) + E( I-1 ) = RC*AB( KU, I ) + END IF + IF( WANTPT ) + $ CALL SROT( N, PT( I, 1 ), LDPT, PT( M+1, 1 ), LDPT, + $ RC, RS ) + 110 CONTINUE + ELSE +* +* Copy off-diagonal elements to E and diagonal elements to D +* + DO 120 I = 1, MINMN - 1 + E( I ) = AB( KU, I+1 ) + 120 CONTINUE + DO 130 I = 1, MINMN + D( I ) = AB( KU+1, I ) + 130 CONTINUE + END IF + ELSE +* +* A is diagonal. Set elements of E to zero and copy diagonal +* elements to D. +* + DO 140 I = 1, MINMN - 1 + E( I ) = ZERO + 140 CONTINUE + DO 150 I = 1, MINMN + D( I ) = AB( 1, I ) + 150 CONTINUE + END IF + RETURN +* +* End of SGBBRD +* + END diff --git a/costa/native/external/lapack/sgbcon.f b/costa/native/external/lapack/sgbcon.f new file mode 100644 index 000000000..cfc387c39 --- /dev/null +++ b/costa/native/external/lapack/sgbcon.f @@ -0,0 +1,222 @@ + SUBROUTINE SGBCON( NORM, N, KL, KU, AB, LDAB, IPIV, ANORM, RCOND, + $ WORK, IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER INFO, KL, KU, LDAB, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + REAL AB( LDAB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGBCON estimates the reciprocal of the condition number of a real +* general band matrix A, in either the 1-norm or the infinity-norm, +* using the LU factorization computed by SGBTRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as +* RCOND = 1 / ( norm(A) * norm(inv(A)) ). +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies whether the 1-norm condition number or the +* infinity-norm condition number is required: +* = '1' or 'O': 1-norm; +* = 'I': Infinity-norm. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* AB (input) REAL array, dimension (LDAB,N) +* Details of the LU factorization of the band matrix A, as +* computed by SGBTRF. U is stored as an upper triangular band +* matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and +* the multipliers used during the factorization are stored in +* rows KL+KU+2 to 2*KL+KU+1. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices; for 1 <= i <= N, row i of the matrix was +* interchanged with row IPIV(i). +* +* ANORM (input) REAL +* If NORM = '1' or 'O', the 1-norm of the original matrix A. +* If NORM = 'I', the infinity-norm of the original matrix A. +* +* RCOND (output) REAL +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(norm(A) * norm(inv(A))). +* +* WORK (workspace) REAL array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LNOTI, ONENRM + CHARACTER NORMIN + INTEGER IX, J, JP, KASE, KASE1, KD, LM + REAL AINVNM, SCALE, SMLNUM, T +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SDOT, SLAMCH + EXTERNAL LSAME, ISAMAX, SDOT, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SLACON, SLATBS, SRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.2*KL+KU+1 ) THEN + INFO = -6 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGBCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = SLAMCH( 'Safe minimum' ) +* +* Estimate the norm of inv(A). +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KD = KL + KU + 1 + LNOTI = KL.GT.0 + KASE = 0 + 10 CONTINUE + CALL SLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(L). +* + IF( LNOTI ) THEN + DO 20 J = 1, N - 1 + LM = MIN( KL, N-J ) + JP = IPIV( J ) + T = WORK( JP ) + IF( JP.NE.J ) THEN + WORK( JP ) = WORK( J ) + WORK( J ) = T + END IF + CALL SAXPY( LM, -T, AB( KD+1, J ), 1, WORK( J+1 ), 1 ) + 20 CONTINUE + END IF +* +* Multiply by inv(U). +* + CALL SLATBS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ KL+KU, AB, LDAB, WORK, SCALE, WORK( 2*N+1 ), + $ INFO ) + ELSE +* +* Multiply by inv(U'). +* + CALL SLATBS( 'Upper', 'Transpose', 'Non-unit', NORMIN, N, + $ KL+KU, AB, LDAB, WORK, SCALE, WORK( 2*N+1 ), + $ INFO ) +* +* Multiply by inv(L'). +* + IF( LNOTI ) THEN + DO 30 J = N - 1, 1, -1 + LM = MIN( KL, N-J ) + WORK( J ) = WORK( J ) - SDOT( LM, AB( KD+1, J ), 1, + $ WORK( J+1 ), 1 ) + JP = IPIV( J ) + IF( JP.NE.J ) THEN + T = WORK( JP ) + WORK( JP ) = WORK( J ) + WORK( J ) = T + END IF + 30 CONTINUE + END IF + END IF +* +* Divide X by 1/SCALE if doing so will not cause overflow. +* + NORMIN = 'Y' + IF( SCALE.NE.ONE ) THEN + IX = ISAMAX( N, WORK, 1 ) + IF( SCALE.LT.ABS( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 40 + CALL SRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 40 CONTINUE + RETURN +* +* End of SGBCON +* + END diff --git a/costa/native/external/lapack/sgbequ.f b/costa/native/external/lapack/sgbequ.f new file mode 100644 index 000000000..d354045e6 --- /dev/null +++ b/costa/native/external/lapack/sgbequ.f @@ -0,0 +1,240 @@ + SUBROUTINE SGBEQU( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + INTEGER INFO, KL, KU, LDAB, M, N + REAL AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ), C( * ), R( * ) +* .. +* +* Purpose +* ======= +* +* SGBEQU computes row and column scalings intended to equilibrate an +* M-by-N band matrix A and reduce its condition number. R returns the +* row scale factors and C the column scale factors, chosen to try to +* make the largest element in each row and column of the matrix B with +* elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. +* +* R(i) and C(j) are restricted to be between SMLNUM = smallest safe +* number and BIGNUM = largest safe number. Use of these scaling +* factors is not guaranteed to reduce the condition number of A but +* works well in practice. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* AB (input) REAL array, dimension (LDAB,N) +* The band matrix A, stored in rows 1 to KL+KU+1. The j-th +* column of A is stored in the j-th column of the array AB as +* follows: +* AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KL+KU+1. +* +* R (output) REAL array, dimension (M) +* If INFO = 0, or INFO > M, R contains the row scale factors +* for A. +* +* C (output) REAL array, dimension (N) +* If INFO = 0, C contains the column scale factors for A. +* +* ROWCND (output) REAL +* If INFO = 0 or INFO > M, ROWCND contains the ratio of the +* smallest R(i) to the largest R(i). If ROWCND >= 0.1 and +* AMAX is neither too large nor too small, it is not worth +* scaling by R. +* +* COLCND (output) REAL +* If INFO = 0, COLCND contains the ratio of the smallest +* C(i) to the largest C(i). If COLCND >= 0.1, it is not +* worth scaling by C. +* +* AMAX (output) REAL +* Absolute value of largest matrix element. If AMAX is very +* close to overflow or very close to underflow, the matrix +* should be scaled. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is +* <= M: the i-th row of A is exactly zero +* > M: the (i-M)-th column of A is exactly zero +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, KD + REAL BIGNUM, RCMAX, RCMIN, SMLNUM +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGBEQU', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + ROWCND = ONE + COLCND = ONE + AMAX = ZERO + RETURN + END IF +* +* Get machine constants. +* + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM +* +* Compute row scale factors. +* + DO 10 I = 1, M + R( I ) = ZERO + 10 CONTINUE +* +* Find the maximum element in each row. +* + KD = KU + 1 + DO 30 J = 1, N + DO 20 I = MAX( J-KU, 1 ), MIN( J+KL, M ) + R( I ) = MAX( R( I ), ABS( AB( KD+I-J, J ) ) ) + 20 CONTINUE + 30 CONTINUE +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 40 I = 1, M + RCMAX = MAX( RCMAX, R( I ) ) + RCMIN = MIN( RCMIN, R( I ) ) + 40 CONTINUE + AMAX = RCMAX +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 50 I = 1, M + IF( R( I ).EQ.ZERO ) THEN + INFO = I + RETURN + END IF + 50 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 60 I = 1, M + R( I ) = ONE / MIN( MAX( R( I ), SMLNUM ), BIGNUM ) + 60 CONTINUE +* +* Compute ROWCND = min(R(I)) / max(R(I)) +* + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* +* Compute column scale factors +* + DO 70 J = 1, N + C( J ) = ZERO + 70 CONTINUE +* +* Find the maximum element in each column, +* assuming the row scaling computed above. +* + KD = KU + 1 + DO 90 J = 1, N + DO 80 I = MAX( J-KU, 1 ), MIN( J+KL, M ) + C( J ) = MAX( C( J ), ABS( AB( KD+I-J, J ) )*R( I ) ) + 80 CONTINUE + 90 CONTINUE +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 100 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 100 CONTINUE +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 110 J = 1, N + IF( C( J ).EQ.ZERO ) THEN + INFO = M + J + RETURN + END IF + 110 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 120 J = 1, N + C( J ) = ONE / MIN( MAX( C( J ), SMLNUM ), BIGNUM ) + 120 CONTINUE +* +* Compute COLCND = min(C(J)) / max(C(J)) +* + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* + RETURN +* +* End of SGBEQU +* + END diff --git a/costa/native/external/lapack/sgbrfs.f b/costa/native/external/lapack/sgbrfs.f new file mode 100644 index 000000000..ff4d5737d --- /dev/null +++ b/costa/native/external/lapack/sgbrfs.f @@ -0,0 +1,351 @@ + SUBROUTINE SGBRFS( TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, LDAFB, + $ IPIV, B, LDB, X, LDX, FERR, BERR, WORK, IWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, KL, KU, LDAB, LDAFB, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + REAL AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* SGBRFS improves the computed solution to a system of linear +* equations when the coefficient matrix is banded, and provides +* error bounds and backward error estimates for the solution. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose = Transpose) +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AB (input) REAL array, dimension (LDAB,N) +* The original band matrix A, stored in rows 1 to KL+KU+1. +* The j-th column of A is stored in the j-th column of the +* array AB as follows: +* AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KL+KU+1. +* +* AFB (input) REAL array, dimension (LDAFB,N) +* Details of the LU factorization of the band matrix A, as +* computed by SGBTRF. U is stored as an upper triangular band +* matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and +* the multipliers used during the factorization are stored in +* rows KL+KU+2 to 2*KL+KU+1. +* +* LDAFB (input) INTEGER +* The leading dimension of the array AFB. LDAFB >= 2*KL*KU+1. +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices from SGBTRF; for 1<=i<=N, row i of the +* matrix was interchanged with row IPIV(i). +* +* B (input) REAL array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input/output) REAL array, dimension (LDX,NRHS) +* On entry, the solution matrix X, as computed by SGBTRS. +* On exit, the improved solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) REAL array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Internal Parameters +* =================== +* +* ITMAX is the maximum number of steps of iterative refinement. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) + REAL THREE + PARAMETER ( THREE = 3.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN + CHARACTER TRANST + INTEGER COUNT, I, J, K, KASE, KK, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SGBMV, SGBTRS, SLACON, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -7 + ELSE IF( LDAFB.LT.2*KL+KU+1 ) THEN + INFO = -9 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -14 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGBRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = MIN( KL+KU+2, N+1 ) + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - op(A) * X, +* where op(A) = A, A**T, or A**H, depending on TRANS. +* + CALL SCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 ) + CALL SGBMV( TRANS, N, N, KL, KU, -ONE, AB, LDAB, X( 1, J ), 1, + $ ONE, WORK( N+1 ), 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + WORK( I ) = ABS( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(op(A))*abs(X) + abs(B). +* + IF( NOTRAN ) THEN + DO 50 K = 1, N + KK = KU + 1 - K + XK = ABS( X( K, J ) ) + DO 40 I = MAX( 1, K-KU ), MIN( N, K+KL ) + WORK( I ) = WORK( I ) + ABS( AB( KK+I, K ) )*XK + 40 CONTINUE + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + KK = KU + 1 - K + DO 60 I = MAX( 1, K-KU ), MIN( N, K+KL ) + S = S + ABS( AB( KK+I, K ) )*ABS( X( I, J ) ) + 60 CONTINUE + WORK( K ) = WORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL SGBTRS( TRANS, N, KL, KU, 1, AFB, LDAFB, IPIV, + $ WORK( N+1 ), N, INFO ) + CALL SAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use SLACON to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL SLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**T). +* + CALL SGBTRS( TRANST, N, KL, KU, 1, AFB, LDAFB, IPIV, + $ WORK( N+1 ), N, INFO ) + DO 110 I = 1, N + WORK( N+I ) = WORK( N+I )*WORK( I ) + 110 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 120 I = 1, N + WORK( N+I ) = WORK( N+I )*WORK( I ) + 120 CONTINUE + CALL SGBTRS( TRANS, N, KL, KU, 1, AFB, LDAFB, IPIV, + $ WORK( N+1 ), N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of SGBRFS +* + END diff --git a/costa/native/external/lapack/sgbsv.f b/costa/native/external/lapack/sgbsv.f new file mode 100644 index 000000000..17dd5ea0d --- /dev/null +++ b/costa/native/external/lapack/sgbsv.f @@ -0,0 +1,143 @@ + SUBROUTINE SGBSV( N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + INTEGER INFO, KL, KU, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL AB( LDAB, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* SGBSV computes the solution to a real system of linear equations +* A * X = B, where A is a band matrix of order N with KL subdiagonals +* and KU superdiagonals, and X and B are N-by-NRHS matrices. +* +* The LU decomposition with partial pivoting and row interchanges is +* used to factor A as A = L * U, where L is a product of permutation +* and unit lower triangular matrices with KL subdiagonals, and U is +* upper triangular with KL+KU superdiagonals. The factored form of A +* is then used to solve the system of equations A * X = B. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AB (input/output) REAL array, dimension (LDAB,N) +* On entry, the matrix A in band storage, in rows KL+1 to +* 2*KL+KU+1; rows 1 to KL of the array need not be set. +* The j-th column of A is stored in the j-th column of the +* array AB as follows: +* AB(KL+KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+KL) +* On exit, details of the factorization: U is stored as an +* upper triangular band matrix with KL+KU superdiagonals in +* rows 1 to KL+KU+1, and the multipliers used during the +* factorization are stored in rows KL+KU+2 to 2*KL+KU+1. +* See below for further details. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +* +* IPIV (output) INTEGER array, dimension (N) +* The pivot indices that define the permutation matrix P; +* row i of the matrix was interchanged with row IPIV(i). +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, U(i,i) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, and the solution has not been computed. +* +* Further Details +* =============== +* +* The band storage scheme is illustrated by the following example, when +* M = N = 6, KL = 2, KU = 1: +* +* On entry: On exit: +* +* * * * + + + * * * u14 u25 u36 +* * * + + + + * * u13 u24 u35 u46 +* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +* a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * +* a31 a42 a53 a64 * * m31 m42 m53 m64 * * +* +* Array elements marked * are not used by the routine; elements marked +* + need not be set on entry, but are required by the routine to store +* elements of U because of fill-in resulting from the row interchanges. +* +* ===================================================================== +* +* .. External Subroutines .. + EXTERNAL SGBTRF, SGBTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( KL.LT.0 ) THEN + INFO = -2 + ELSE IF( KU.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.2*KL+KU+1 ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( N, 1 ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGBSV ', -INFO ) + RETURN + END IF +* +* Compute the LU factorization of the band matrix A. +* + CALL SGBTRF( N, N, KL, KU, AB, LDAB, IPIV, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL SGBTRS( 'No transpose', N, KL, KU, NRHS, AB, LDAB, IPIV, + $ B, LDB, INFO ) + END IF + RETURN +* +* End of SGBSV +* + END diff --git a/costa/native/external/lapack/sgbsvx.f b/costa/native/external/lapack/sgbsvx.f new file mode 100644 index 000000000..62037dc6f --- /dev/null +++ b/costa/native/external/lapack/sgbsvx.f @@ -0,0 +1,517 @@ + SUBROUTINE SGBSVX( FACT, TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, + $ LDAFB, IPIV, EQUED, R, C, B, LDB, X, LDX, + $ RCOND, FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, TRANS + INTEGER INFO, KL, KU, LDAB, LDAFB, LDB, LDX, N, NRHS + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + REAL AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ BERR( * ), C( * ), FERR( * ), R( * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* SGBSVX uses the LU factorization to compute the solution to a real +* system of linear equations A * X = B, A**T * X = B, or A**H * X = B, +* where A is a band matrix of order N with KL subdiagonals and KU +* superdiagonals, and X and B are N-by-NRHS matrices. +* +* Error bounds on the solution and a condition estimate are also +* provided. +* +* Description +* =========== +* +* The following steps are performed by this subroutine: +* +* 1. If FACT = 'E', real scaling factors are computed to equilibrate +* the system: +* TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B +* TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B +* TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B +* Whether or not the system will be equilibrated depends on the +* scaling of the matrix A, but if equilibration is used, A is +* overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') +* or diag(C)*B (if TRANS = 'T' or 'C'). +* +* 2. If FACT = 'N' or 'E', the LU decomposition is used to factor the +* matrix A (after equilibration if FACT = 'E') as +* A = L * U, +* where L is a product of permutation and unit lower triangular +* matrices with KL subdiagonals, and U is upper triangular with +* KL+KU superdiagonals. +* +* 3. If some U(i,i)=0, so that U is exactly singular, then the routine +* returns with INFO = i. Otherwise, the factored form of A is used +* to estimate the condition number of the matrix A. If the +* reciprocal of the condition number is less than machine precision, +* INFO = N+1 is returned as a warning, but the routine still goes on +* to solve for X and compute error bounds as described below. +* +* 4. The system of equations is solved for X using the factored form +* of A. +* +* 5. Iterative refinement is applied to improve the computed solution +* matrix and calculate error bounds and backward error estimates +* for it. +* +* 6. If equilibration was used, the matrix X is premultiplied by +* diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so +* that it solves the original system before equilibration. +* +* Arguments +* ========= +* +* FACT (input) CHARACTER*1 +* Specifies whether or not the factored form of the matrix A is +* supplied on entry, and if not, whether the matrix A should be +* equilibrated before it is factored. +* = 'F': On entry, AFB and IPIV contain the factored form of +* A. If EQUED is not 'N', the matrix A has been +* equilibrated with scaling factors given by R and C. +* AB, AFB, and IPIV are not modified. +* = 'N': The matrix A will be copied to AFB and factored. +* = 'E': The matrix A will be equilibrated if necessary, then +* copied to AFB and factored. +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations. +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Transpose) +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AB (input/output) REAL array, dimension (LDAB,N) +* On entry, the matrix A in band storage, in rows 1 to KL+KU+1. +* The j-th column of A is stored in the j-th column of the +* array AB as follows: +* AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl) +* +* If FACT = 'F' and EQUED is not 'N', then A must have been +* equilibrated by the scaling factors in R and/or C. AB is not +* modified if FACT = 'F' or 'N', or if FACT = 'E' and +* EQUED = 'N' on exit. +* +* On exit, if EQUED .ne. 'N', A is scaled as follows: +* EQUED = 'R': A := diag(R) * A +* EQUED = 'C': A := A * diag(C) +* EQUED = 'B': A := diag(R) * A * diag(C). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KL+KU+1. +* +* AFB (input or output) REAL array, dimension (LDAFB,N) +* If FACT = 'F', then AFB is an input argument and on entry +* contains details of the LU factorization of the band matrix +* A, as computed by SGBTRF. U is stored as an upper triangular +* band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, +* and the multipliers used during the factorization are stored +* in rows KL+KU+2 to 2*KL+KU+1. If EQUED .ne. 'N', then AFB is +* the factored form of the equilibrated matrix A. +* +* If FACT = 'N', then AFB is an output argument and on exit +* returns details of the LU factorization of A. +* +* If FACT = 'E', then AFB is an output argument and on exit +* returns details of the LU factorization of the equilibrated +* matrix A (see the description of AB for the form of the +* equilibrated matrix). +* +* LDAFB (input) INTEGER +* The leading dimension of the array AFB. LDAFB >= 2*KL+KU+1. +* +* IPIV (input or output) INTEGER array, dimension (N) +* If FACT = 'F', then IPIV is an input argument and on entry +* contains the pivot indices from the factorization A = L*U +* as computed by SGBTRF; row i of the matrix was interchanged +* with row IPIV(i). +* +* If FACT = 'N', then IPIV is an output argument and on exit +* contains the pivot indices from the factorization A = L*U +* of the original matrix A. +* +* If FACT = 'E', then IPIV is an output argument and on exit +* contains the pivot indices from the factorization A = L*U +* of the equilibrated matrix A. +* +* EQUED (input or output) CHARACTER*1 +* Specifies the form of equilibration that was done. +* = 'N': No equilibration (always true if FACT = 'N'). +* = 'R': Row equilibration, i.e., A has been premultiplied by +* diag(R). +* = 'C': Column equilibration, i.e., A has been postmultiplied +* by diag(C). +* = 'B': Both row and column equilibration, i.e., A has been +* replaced by diag(R) * A * diag(C). +* EQUED is an input argument if FACT = 'F'; otherwise, it is an +* output argument. +* +* R (input or output) REAL array, dimension (N) +* The row scale factors for A. If EQUED = 'R' or 'B', A is +* multiplied on the left by diag(R); if EQUED = 'N' or 'C', R +* is not accessed. R is an input argument if FACT = 'F'; +* otherwise, R is an output argument. If FACT = 'F' and +* EQUED = 'R' or 'B', each element of R must be positive. +* +* C (input or output) REAL array, dimension (N) +* The column scale factors for A. If EQUED = 'C' or 'B', A is +* multiplied on the right by diag(C); if EQUED = 'N' or 'R', C +* is not accessed. C is an input argument if FACT = 'F'; +* otherwise, C is an output argument. If FACT = 'F' and +* EQUED = 'C' or 'B', each element of C must be positive. +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, +* if EQUED = 'N', B is not modified; +* if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by +* diag(R)*B; +* if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is +* overwritten by diag(C)*B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (output) REAL array, dimension (LDX,NRHS) +* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X +* to the original system of equations. Note that A and B are +* modified on exit if EQUED .ne. 'N', and the solution to the +* equilibrated system is inv(diag(C))*X if TRANS = 'N' and +* EQUED = 'C' or 'B', or inv(diag(R))*X if TRANS = 'T' or 'C' +* and EQUED = 'R' or 'B'. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* RCOND (output) REAL +* The estimate of the reciprocal condition number of the matrix +* A after equilibration (if done). If RCOND is less than the +* machine precision (in particular, if RCOND = 0), the matrix +* is singular to working precision. This condition is +* indicated by a return code of INFO > 0. +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace/output) REAL array, dimension (3*N) +* On exit, WORK(1) contains the reciprocal pivot growth +* factor norm(A)/norm(U). The "max absolute element" norm is +* used. If WORK(1) is much less than 1, then the stability +* of the LU factorization of the (equilibrated) matrix A +* could be poor. This also means that the solution X, condition +* estimator RCOND, and forward error bound FERR could be +* unreliable. If factorization fails with 0 0: if INFO = i, and i is +* <= N: U(i,i) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, so the solution and error bounds +* could not be computed. RCOND = 0 is returned. +* = N+1: U is nonsingular, but RCOND is less than machine +* precision, meaning that the matrix is singular +* to working precision. Nevertheless, the +* solution and error bounds are computed because +* there are a number of situations where the +* computed solution can be more accurate than the +* value of RCOND would suggest. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL COLEQU, EQUIL, NOFACT, NOTRAN, ROWEQU + CHARACTER NORM + INTEGER I, INFEQU, J, J1, J2 + REAL AMAX, ANORM, BIGNUM, COLCND, RCMAX, RCMIN, + $ ROWCND, RPVGRW, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANGB, SLANTB + EXTERNAL LSAME, SLAMCH, SLANGB, SLANTB +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGBCON, SGBEQU, SGBRFS, SGBTRF, SGBTRS, + $ SLACPY, SLAQGB, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + NOTRAN = LSAME( TRANS, 'N' ) + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + ROWEQU = .FALSE. + COLEQU = .FALSE. + ELSE + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + SMLNUM = SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + END IF +* +* Test the input parameters. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) ) + $ THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KL.LT.0 ) THEN + INFO = -4 + ELSE IF( KU.LT.0 ) THEN + INFO = -5 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -8 + ELSE IF( LDAFB.LT.2*KL+KU+1 ) THEN + INFO = -10 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( ROWEQU .OR. COLEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -12 + ELSE + IF( ROWEQU ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 10 J = 1, N + RCMIN = MIN( RCMIN, R( J ) ) + RCMAX = MAX( RCMAX, R( J ) ) + 10 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -13 + ELSE IF( N.GT.0 ) THEN + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + ROWCND = ONE + END IF + END IF + IF( COLEQU .AND. INFO.EQ.0 ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 20 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 20 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -14 + ELSE IF( N.GT.0 ) THEN + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + COLCND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -16 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -18 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGBSVX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL SGBEQU( N, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL SLAQGB( N, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, EQUED ) + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + END IF + END IF +* +* Scale the right hand side. +* + IF( NOTRAN ) THEN + IF( ROWEQU ) THEN + DO 40 J = 1, NRHS + DO 30 I = 1, N + B( I, J ) = R( I )*B( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( COLEQU ) THEN + DO 60 J = 1, NRHS + DO 50 I = 1, N + B( I, J ) = C( I )*B( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the LU factorization of the band matrix A. +* + DO 70 J = 1, N + J1 = MAX( J-KU, 1 ) + J2 = MIN( J+KL, N ) + CALL SCOPY( J2-J1+1, AB( KU+1-J+J1, J ), 1, + $ AFB( KL+KU+1-J+J1, J ), 1 ) + 70 CONTINUE +* + CALL SGBTRF( N, N, KL, KU, AFB, LDAFB, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN + IF( INFO.GT.0 ) THEN +* +* Compute the reciprocal pivot growth factor of the +* leading rank-deficient INFO columns of A. +* + ANORM = ZERO + DO 90 J = 1, INFO + DO 80 I = MAX( KU+2-J, 1 ), + $ MIN( N+KU+1-J, KL+KU+1 ) + ANORM = MAX( ANORM, ABS( AB( I, J ) ) ) + 80 CONTINUE + 90 CONTINUE + RPVGRW = SLANTB( 'M', 'U', 'N', INFO, + $ MIN( INFO-1, KL+KU ), AFB( MAX( 1, + $ KL+KU+2-INFO ), 1 ), LDAFB, WORK ) + IF( RPVGRW.EQ.ZERO ) THEN + RPVGRW = ONE + ELSE + RPVGRW = ANORM / RPVGRW + END IF + WORK( 1 ) = RPVGRW + RCOND = ZERO + END IF + RETURN + END IF + END IF +* +* Compute the norm of the matrix A and the +* reciprocal pivot growth factor RPVGRW. +* + IF( NOTRAN ) THEN + NORM = '1' + ELSE + NORM = 'I' + END IF + ANORM = SLANGB( NORM, N, KL, KU, AB, LDAB, WORK ) + RPVGRW = SLANTB( 'M', 'U', 'N', N, KL+KU, AFB, LDAFB, WORK ) + IF( RPVGRW.EQ.ZERO ) THEN + RPVGRW = ONE + ELSE + RPVGRW = SLANGB( 'M', N, KL, KU, AB, LDAB, WORK ) / RPVGRW + END IF +* +* Compute the reciprocal of the condition number of A. +* + CALL SGBCON( NORM, N, KL, KU, AFB, LDAFB, IPIV, ANORM, RCOND, + $ WORK, IWORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* +* Compute the solution matrix X. +* + CALL SLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL SGBTRS( TRANS, N, KL, KU, NRHS, AFB, LDAFB, IPIV, X, LDX, + $ INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL SGBRFS( TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, LDAFB, IPIV, + $ B, LDB, X, LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* Transform the solution matrix X to a solution of the original +* system. +* + IF( NOTRAN ) THEN + IF( COLEQU ) THEN + DO 110 J = 1, NRHS + DO 100 I = 1, N + X( I, J ) = C( I )*X( I, J ) + 100 CONTINUE + 110 CONTINUE + DO 120 J = 1, NRHS + FERR( J ) = FERR( J ) / COLCND + 120 CONTINUE + END IF + ELSE IF( ROWEQU ) THEN + DO 140 J = 1, NRHS + DO 130 I = 1, N + X( I, J ) = R( I )*X( I, J ) + 130 CONTINUE + 140 CONTINUE + DO 150 J = 1, NRHS + FERR( J ) = FERR( J ) / ROWCND + 150 CONTINUE + END IF +* + WORK( 1 ) = RPVGRW + RETURN +* +* End of SGBSVX +* + END diff --git a/costa/native/external/lapack/sgbtf2.f b/costa/native/external/lapack/sgbtf2.f new file mode 100644 index 000000000..1c6da88db --- /dev/null +++ b/costa/native/external/lapack/sgbtf2.f @@ -0,0 +1,203 @@ + SUBROUTINE SGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + INTEGER INFO, KL, KU, LDAB, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* SGBTF2 computes an LU factorization of a real m-by-n band matrix A +* using partial pivoting with row interchanges. +* +* This is the unblocked version of the algorithm, calling Level 2 BLAS. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* AB (input/output) REAL array, dimension (LDAB,N) +* On entry, the matrix A in band storage, in rows KL+1 to +* 2*KL+KU+1; rows 1 to KL of the array need not be set. +* The j-th column of A is stored in the j-th column of the +* array AB as follows: +* AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) +* +* On exit, details of the factorization: U is stored as an +* upper triangular band matrix with KL+KU superdiagonals in +* rows 1 to KL+KU+1, and the multipliers used during the +* factorization are stored in rows KL+KU+2 to 2*KL+KU+1. +* See below for further details. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +* +* IPIV (output) INTEGER array, dimension (min(M,N)) +* The pivot indices; for 1 <= i <= min(M,N), row i of the +* matrix was interchanged with row IPIV(i). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = +i, U(i,i) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, and division by zero will occur if it is used +* to solve a system of equations. +* +* Further Details +* =============== +* +* The band storage scheme is illustrated by the following example, when +* M = N = 6, KL = 2, KU = 1: +* +* On entry: On exit: +* +* * * * + + + * * * u14 u25 u36 +* * * + + + + * * u13 u24 u35 u46 +* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +* a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * +* a31 a42 a53 a64 * * m31 m42 m53 m64 * * +* +* Array elements marked * are not used by the routine; elements marked +* + need not be set on entry, but are required by the routine to store +* elements of U, because of fill-in resulting from the row +* interchanges. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, JP, JU, KM, KV +* .. +* .. External Functions .. + INTEGER ISAMAX + EXTERNAL ISAMAX +* .. +* .. External Subroutines .. + EXTERNAL SGER, SSCAL, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* KV is the number of superdiagonals in the factor U, allowing for +* fill-in. +* + KV = KU + KL +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KL+KV+1 ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGBTF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Gaussian elimination with partial pivoting +* +* Set fill-in elements in columns KU+2 to KV to zero. +* + DO 20 J = KU + 2, MIN( KV, N ) + DO 10 I = KV - J + 2, KL + AB( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* JU is the index of the last column affected by the current stage +* of the factorization. +* + JU = 1 +* + DO 40 J = 1, MIN( M, N ) +* +* Set fill-in elements in column J+KV to zero. +* + IF( J+KV.LE.N ) THEN + DO 30 I = 1, KL + AB( I, J+KV ) = ZERO + 30 CONTINUE + END IF +* +* Find pivot and test for singularity. KM is the number of +* subdiagonal elements in the current column. +* + KM = MIN( KL, M-J ) + JP = ISAMAX( KM+1, AB( KV+1, J ), 1 ) + IPIV( J ) = JP + J - 1 + IF( AB( KV+JP, J ).NE.ZERO ) THEN + JU = MAX( JU, MIN( J+KU+JP-1, N ) ) +* +* Apply interchange to columns J to JU. +* + IF( JP.NE.1 ) + $ CALL SSWAP( JU-J+1, AB( KV+JP, J ), LDAB-1, + $ AB( KV+1, J ), LDAB-1 ) +* + IF( KM.GT.0 ) THEN +* +* Compute multipliers. +* + CALL SSCAL( KM, ONE / AB( KV+1, J ), AB( KV+2, J ), 1 ) +* +* Update trailing submatrix within the band. +* + IF( JU.GT.J ) + $ CALL SGER( KM, JU-J, -ONE, AB( KV+2, J ), 1, + $ AB( KV, J+1 ), LDAB-1, AB( KV+1, J+1 ), + $ LDAB-1 ) + END IF + ELSE +* +* If pivot is zero, set INFO to the index of the pivot +* unless a zero pivot has already been found. +* + IF( INFO.EQ.0 ) + $ INFO = J + END IF + 40 CONTINUE + RETURN +* +* End of SGBTF2 +* + END diff --git a/costa/native/external/lapack/sgbtrf.f b/costa/native/external/lapack/sgbtrf.f new file mode 100644 index 000000000..45c628b05 --- /dev/null +++ b/costa/native/external/lapack/sgbtrf.f @@ -0,0 +1,442 @@ + SUBROUTINE SGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + INTEGER INFO, KL, KU, LDAB, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* SGBTRF computes an LU factorization of a real m-by-n band matrix A +* using partial pivoting with row interchanges. +* +* This is the blocked version of the algorithm, calling Level 3 BLAS. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* AB (input/output) REAL array, dimension (LDAB,N) +* On entry, the matrix A in band storage, in rows KL+1 to +* 2*KL+KU+1; rows 1 to KL of the array need not be set. +* The j-th column of A is stored in the j-th column of the +* array AB as follows: +* AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) +* +* On exit, details of the factorization: U is stored as an +* upper triangular band matrix with KL+KU superdiagonals in +* rows 1 to KL+KU+1, and the multipliers used during the +* factorization are stored in rows KL+KU+2 to 2*KL+KU+1. +* See below for further details. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +* +* IPIV (output) INTEGER array, dimension (min(M,N)) +* The pivot indices; for 1 <= i <= min(M,N), row i of the +* matrix was interchanged with row IPIV(i). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = +i, U(i,i) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, and division by zero will occur if it is used +* to solve a system of equations. +* +* Further Details +* =============== +* +* The band storage scheme is illustrated by the following example, when +* M = N = 6, KL = 2, KU = 1: +* +* On entry: On exit: +* +* * * * + + + * * * u14 u25 u36 +* * * + + + + * * u13 u24 u35 u46 +* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +* a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * +* a31 a42 a53 a64 * * m31 m42 m53 m64 * * +* +* Array elements marked * are not used by the routine; elements marked +* + need not be set on entry, but are required by the routine to store +* elements of U because of fill-in resulting from the row interchanges. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) + INTEGER NBMAX, LDWORK + PARAMETER ( NBMAX = 64, LDWORK = NBMAX+1 ) +* .. +* .. Local Scalars .. + INTEGER I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP, + $ JU, K2, KM, KV, NB, NW + REAL TEMP +* .. +* .. Local Arrays .. + REAL WORK13( LDWORK, NBMAX ), + $ WORK31( LDWORK, NBMAX ) +* .. +* .. External Functions .. + INTEGER ILAENV, ISAMAX + EXTERNAL ILAENV, ISAMAX +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGBTF2, SGEMM, SGER, SLASWP, SSCAL, + $ SSWAP, STRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* KV is the number of superdiagonals in the factor U, allowing for +* fill-in +* + KV = KU + KL +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KL+KV+1 ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGBTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment +* + NB = ILAENV( 1, 'SGBTRF', ' ', M, N, KL, KU ) +* +* The block size must not exceed the limit set by the size of the +* local arrays WORK13 and WORK31. +* + NB = MIN( NB, NBMAX ) +* + IF( NB.LE.1 .OR. NB.GT.KL ) THEN +* +* Use unblocked code +* + CALL SGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO ) + ELSE +* +* Use blocked code +* +* Zero the superdiagonal elements of the work array WORK13 +* + DO 20 J = 1, NB + DO 10 I = 1, J - 1 + WORK13( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* Zero the subdiagonal elements of the work array WORK31 +* + DO 40 J = 1, NB + DO 30 I = J + 1, NB + WORK31( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE +* +* Gaussian elimination with partial pivoting +* +* Set fill-in elements in columns KU+2 to KV to zero +* + DO 60 J = KU + 2, MIN( KV, N ) + DO 50 I = KV - J + 2, KL + AB( I, J ) = ZERO + 50 CONTINUE + 60 CONTINUE +* +* JU is the index of the last column affected by the current +* stage of the factorization +* + JU = 1 +* + DO 180 J = 1, MIN( M, N ), NB + JB = MIN( NB, MIN( M, N )-J+1 ) +* +* The active part of the matrix is partitioned +* +* A11 A12 A13 +* A21 A22 A23 +* A31 A32 A33 +* +* Here A11, A21 and A31 denote the current block of JB columns +* which is about to be factorized. The number of rows in the +* partitioning are JB, I2, I3 respectively, and the numbers +* of columns are JB, J2, J3. The superdiagonal elements of A13 +* and the subdiagonal elements of A31 lie outside the band. +* + I2 = MIN( KL-JB, M-J-JB+1 ) + I3 = MIN( JB, M-J-KL+1 ) +* +* J2 and J3 are computed after JU has been updated. +* +* Factorize the current block of JB columns +* + DO 80 JJ = J, J + JB - 1 +* +* Set fill-in elements in column JJ+KV to zero +* + IF( JJ+KV.LE.N ) THEN + DO 70 I = 1, KL + AB( I, JJ+KV ) = ZERO + 70 CONTINUE + END IF +* +* Find pivot and test for singularity. KM is the number of +* subdiagonal elements in the current column. +* + KM = MIN( KL, M-JJ ) + JP = ISAMAX( KM+1, AB( KV+1, JJ ), 1 ) + IPIV( JJ ) = JP + JJ - J + IF( AB( KV+JP, JJ ).NE.ZERO ) THEN + JU = MAX( JU, MIN( JJ+KU+JP-1, N ) ) + IF( JP.NE.1 ) THEN +* +* Apply interchange to columns J to J+JB-1 +* + IF( JP+JJ-1.LT.J+KL ) THEN +* + CALL SSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1, + $ AB( KV+JP+JJ-J, J ), LDAB-1 ) + ELSE +* +* The interchange affects columns J to JJ-1 of A31 +* which are stored in the work array WORK31 +* + CALL SSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1, + $ WORK31( JP+JJ-J-KL, 1 ), LDWORK ) + CALL SSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1, + $ AB( KV+JP, JJ ), LDAB-1 ) + END IF + END IF +* +* Compute multipliers +* + CALL SSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ), + $ 1 ) +* +* Update trailing submatrix within the band and within +* the current block. JM is the index of the last column +* which needs to be updated. +* + JM = MIN( JU, J+JB-1 ) + IF( JM.GT.JJ ) + $ CALL SGER( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1, + $ AB( KV, JJ+1 ), LDAB-1, + $ AB( KV+1, JJ+1 ), LDAB-1 ) + ELSE +* +* If pivot is zero, set INFO to the index of the pivot +* unless a zero pivot has already been found. +* + IF( INFO.EQ.0 ) + $ INFO = JJ + END IF +* +* Copy current column of A31 into the work array WORK31 +* + NW = MIN( JJ-J+1, I3 ) + IF( NW.GT.0 ) + $ CALL SCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1, + $ WORK31( 1, JJ-J+1 ), 1 ) + 80 CONTINUE + IF( J+JB.LE.N ) THEN +* +* Apply the row interchanges to the other blocks. +* + J2 = MIN( JU-J+1, KV ) - JB + J3 = MAX( 0, JU-J-KV+1 ) +* +* Use SLASWP to apply the row interchanges to A12, A22, and +* A32. +* + CALL SLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB, + $ IPIV( J ), 1 ) +* +* Adjust the pivot indices. +* + DO 90 I = J, J + JB - 1 + IPIV( I ) = IPIV( I ) + J - 1 + 90 CONTINUE +* +* Apply the row interchanges to A13, A23, and A33 +* columnwise. +* + K2 = J - 1 + JB + J2 + DO 110 I = 1, J3 + JJ = K2 + I + DO 100 II = J + I - 1, J + JB - 1 + IP = IPIV( II ) + IF( IP.NE.II ) THEN + TEMP = AB( KV+1+II-JJ, JJ ) + AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ ) + AB( KV+1+IP-JJ, JJ ) = TEMP + END IF + 100 CONTINUE + 110 CONTINUE +* +* Update the relevant part of the trailing submatrix +* + IF( J2.GT.0 ) THEN +* +* Update A12 +* + CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit', + $ JB, J2, ONE, AB( KV+1, J ), LDAB-1, + $ AB( KV+1-JB, J+JB ), LDAB-1 ) +* + IF( I2.GT.0 ) THEN +* +* Update A22 +* + CALL SGEMM( 'No transpose', 'No transpose', I2, J2, + $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1, + $ AB( KV+1-JB, J+JB ), LDAB-1, ONE, + $ AB( KV+1, J+JB ), LDAB-1 ) + END IF +* + IF( I3.GT.0 ) THEN +* +* Update A32 +* + CALL SGEMM( 'No transpose', 'No transpose', I3, J2, + $ JB, -ONE, WORK31, LDWORK, + $ AB( KV+1-JB, J+JB ), LDAB-1, ONE, + $ AB( KV+KL+1-JB, J+JB ), LDAB-1 ) + END IF + END IF +* + IF( J3.GT.0 ) THEN +* +* Copy the lower triangle of A13 into the work array +* WORK13 +* + DO 130 JJ = 1, J3 + DO 120 II = JJ, JB + WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 ) + 120 CONTINUE + 130 CONTINUE +* +* Update A13 in the work array +* + CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit', + $ JB, J3, ONE, AB( KV+1, J ), LDAB-1, + $ WORK13, LDWORK ) +* + IF( I2.GT.0 ) THEN +* +* Update A23 +* + CALL SGEMM( 'No transpose', 'No transpose', I2, J3, + $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1, + $ WORK13, LDWORK, ONE, AB( 1+JB, J+KV ), + $ LDAB-1 ) + END IF +* + IF( I3.GT.0 ) THEN +* +* Update A33 +* + CALL SGEMM( 'No transpose', 'No transpose', I3, J3, + $ JB, -ONE, WORK31, LDWORK, WORK13, + $ LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 ) + END IF +* +* Copy the lower triangle of A13 back into place +* + DO 150 JJ = 1, J3 + DO 140 II = JJ, JB + AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ ) + 140 CONTINUE + 150 CONTINUE + END IF + ELSE +* +* Adjust the pivot indices. +* + DO 160 I = J, J + JB - 1 + IPIV( I ) = IPIV( I ) + J - 1 + 160 CONTINUE + END IF +* +* Partially undo the interchanges in the current block to +* restore the upper triangular form of A31 and copy the upper +* triangle of A31 back into place +* + DO 170 JJ = J + JB - 1, J, -1 + JP = IPIV( JJ ) - JJ + 1 + IF( JP.NE.1 ) THEN +* +* Apply interchange to columns J to JJ-1 +* + IF( JP+JJ-1.LT.J+KL ) THEN +* +* The interchange does not affect A31 +* + CALL SSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1, + $ AB( KV+JP+JJ-J, J ), LDAB-1 ) + ELSE +* +* The interchange does affect A31 +* + CALL SSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1, + $ WORK31( JP+JJ-J-KL, 1 ), LDWORK ) + END IF + END IF +* +* Copy the current column of A31 back into place +* + NW = MIN( I3, JJ-J+1 ) + IF( NW.GT.0 ) + $ CALL SCOPY( NW, WORK31( 1, JJ-J+1 ), 1, + $ AB( KV+KL+1-JJ+J, JJ ), 1 ) + 170 CONTINUE + 180 CONTINUE + END IF +* + RETURN +* +* End of SGBTRF +* + END diff --git a/costa/native/external/lapack/sgbtrs.f b/costa/native/external/lapack/sgbtrs.f new file mode 100644 index 000000000..bb00ae32b --- /dev/null +++ b/costa/native/external/lapack/sgbtrs.f @@ -0,0 +1,187 @@ + SUBROUTINE SGBTRS( TRANS, N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, KL, KU, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL AB( LDAB, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* SGBTRS solves a system of linear equations +* A * X = B or A' * X = B +* with a general band matrix A using the LU factorization computed +* by SGBTRF. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations. +* = 'N': A * X = B (No transpose) +* = 'T': A'* X = B (Transpose) +* = 'C': A'* X = B (Conjugate transpose = Transpose) +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AB (input) REAL array, dimension (LDAB,N) +* Details of the LU factorization of the band matrix A, as +* computed by SGBTRF. U is stored as an upper triangular band +* matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and +* the multipliers used during the factorization are stored in +* rows KL+KU+2 to 2*KL+KU+1. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices; for 1 <= i <= N, row i of the matrix was +* interchanged with row IPIV(i). +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LNOTI, NOTRAN + INTEGER I, J, KD, L, LM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, SGER, SSWAP, STBSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.( 2*KL+KU+1 ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGBTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + KD = KU + KL + 1 + LNOTI = KL.GT.0 +* + IF( NOTRAN ) THEN +* +* Solve A*X = B. +* +* Solve L*X = B, overwriting B with X. +* +* L is represented as a product of permutations and unit lower +* triangular matrices L = P(1) * L(1) * ... * P(n-1) * L(n-1), +* where each transformation L(i) is a rank-one modification of +* the identity matrix. +* + IF( LNOTI ) THEN + DO 10 J = 1, N - 1 + LM = MIN( KL, N-J ) + L = IPIV( J ) + IF( L.NE.J ) + $ CALL SSWAP( NRHS, B( L, 1 ), LDB, B( J, 1 ), LDB ) + CALL SGER( LM, NRHS, -ONE, AB( KD+1, J ), 1, B( J, 1 ), + $ LDB, B( J+1, 1 ), LDB ) + 10 CONTINUE + END IF +* + DO 20 I = 1, NRHS +* +* Solve U*X = B, overwriting B with X. +* + CALL STBSV( 'Upper', 'No transpose', 'Non-unit', N, KL+KU, + $ AB, LDAB, B( 1, I ), 1 ) + 20 CONTINUE +* + ELSE +* +* Solve A'*X = B. +* + DO 30 I = 1, NRHS +* +* Solve U'*X = B, overwriting B with X. +* + CALL STBSV( 'Upper', 'Transpose', 'Non-unit', N, KL+KU, AB, + $ LDAB, B( 1, I ), 1 ) + 30 CONTINUE +* +* Solve L'*X = B, overwriting B with X. +* + IF( LNOTI ) THEN + DO 40 J = N - 1, 1, -1 + LM = MIN( KL, N-J ) + CALL SGEMV( 'Transpose', LM, NRHS, -ONE, B( J+1, 1 ), + $ LDB, AB( KD+1, J ), 1, ONE, B( J, 1 ), LDB ) + L = IPIV( J ) + IF( L.NE.J ) + $ CALL SSWAP( NRHS, B( L, 1 ), LDB, B( J, 1 ), LDB ) + 40 CONTINUE + END IF + END IF + RETURN +* +* End of SGBTRS +* + END diff --git a/costa/native/external/lapack/sgebak.f b/costa/native/external/lapack/sgebak.f new file mode 100644 index 000000000..e0e72e7a5 --- /dev/null +++ b/costa/native/external/lapack/sgebak.f @@ -0,0 +1,189 @@ + SUBROUTINE SGEBAK( JOB, SIDE, N, ILO, IHI, SCALE, M, V, LDV, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER JOB, SIDE + INTEGER IHI, ILO, INFO, LDV, M, N +* .. +* .. Array Arguments .. + REAL V( LDV, * ), SCALE( * ) +* .. +* +* Purpose +* ======= +* +* SGEBAK forms the right or left eigenvectors of a real general matrix +* by backward transformation on the computed eigenvectors of the +* balanced matrix output by SGEBAL. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* Specifies the type of backward transformation required: +* = 'N', do nothing, return immediately; +* = 'P', do backward transformation for permutation only; +* = 'S', do backward transformation for scaling only; +* = 'B', do backward transformations for both permutation and +* scaling. +* JOB must be the same as the argument JOB supplied to SGEBAL. +* +* SIDE (input) CHARACTER*1 +* = 'R': V contains right eigenvectors; +* = 'L': V contains left eigenvectors. +* +* N (input) INTEGER +* The number of rows of the matrix V. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* The integers ILO and IHI determined by SGEBAL. +* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +* +* SCALE (input) REAL array, dimension (N) +* Details of the permutation and scaling factors, as returned +* by SGEBAL. +* +* M (input) INTEGER +* The number of columns of the matrix V. M >= 0. +* +* V (input/output) REAL array, dimension (LDV,M) +* On entry, the matrix of right or left eigenvectors to be +* transformed, as returned by SHSEIN or STREVC. +* On exit, V is overwritten by the transformed eigenvectors. +* +* LDV (input) INTEGER +* The leading dimension of the array V. LDV >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LEFTV, RIGHTV + INTEGER I, II, K + REAL S +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Decode and Test the input parameters +* + RIGHTV = LSAME( SIDE, 'R' ) + LEFTV = LSAME( SIDE, 'L' ) +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND. + $ .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN + INFO = -1 + ELSE IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -5 + ELSE IF( M.LT.0 ) THEN + INFO = -7 + ELSE IF( LDV.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEBAK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + IF( M.EQ.0 ) + $ RETURN + IF( LSAME( JOB, 'N' ) ) + $ RETURN +* + IF( ILO.EQ.IHI ) + $ GO TO 30 +* +* Backward balance +* + IF( LSAME( JOB, 'S' ) .OR. LSAME( JOB, 'B' ) ) THEN +* + IF( RIGHTV ) THEN + DO 10 I = ILO, IHI + S = SCALE( I ) + CALL SSCAL( M, S, V( I, 1 ), LDV ) + 10 CONTINUE + END IF +* + IF( LEFTV ) THEN + DO 20 I = ILO, IHI + S = ONE / SCALE( I ) + CALL SSCAL( M, S, V( I, 1 ), LDV ) + 20 CONTINUE + END IF +* + END IF +* +* Backward permutation +* +* For I = ILO-1 step -1 until 1, +* IHI+1 step 1 until N do -- +* + 30 CONTINUE + IF( LSAME( JOB, 'P' ) .OR. LSAME( JOB, 'B' ) ) THEN + IF( RIGHTV ) THEN + DO 40 II = 1, N + I = II + IF( I.GE.ILO .AND. I.LE.IHI ) + $ GO TO 40 + IF( I.LT.ILO ) + $ I = ILO - II + K = SCALE( I ) + IF( K.EQ.I ) + $ GO TO 40 + CALL SSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 40 CONTINUE + END IF +* + IF( LEFTV ) THEN + DO 50 II = 1, N + I = II + IF( I.GE.ILO .AND. I.LE.IHI ) + $ GO TO 50 + IF( I.LT.ILO ) + $ I = ILO - II + K = SCALE( I ) + IF( K.EQ.I ) + $ GO TO 50 + CALL SSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 50 CONTINUE + END IF + END IF +* + RETURN +* +* End of SGEBAK +* + END diff --git a/costa/native/external/lapack/sgebal.f b/costa/native/external/lapack/sgebal.f new file mode 100644 index 000000000..e253abbb5 --- /dev/null +++ b/costa/native/external/lapack/sgebal.f @@ -0,0 +1,323 @@ + SUBROUTINE SGEBAL( JOB, N, A, LDA, ILO, IHI, SCALE, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOB + INTEGER IHI, ILO, INFO, LDA, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), SCALE( * ) +* .. +* +* Purpose +* ======= +* +* SGEBAL balances a general real matrix A. This involves, first, +* permuting A by a similarity transformation to isolate eigenvalues +* in the first 1 to ILO-1 and last IHI+1 to N elements on the +* diagonal; and second, applying a diagonal similarity transformation +* to rows and columns ILO to IHI to make the rows and columns as +* close in norm as possible. Both steps are optional. +* +* Balancing may reduce the 1-norm of the matrix, and improve the +* accuracy of the computed eigenvalues and/or eigenvectors. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* Specifies the operations to be performed on A: +* = 'N': none: simply set ILO = 1, IHI = N, SCALE(I) = 1.0 +* for i = 1,...,N; +* = 'P': permute only; +* = 'S': scale only; +* = 'B': both permute and scale. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the input matrix A. +* On exit, A is overwritten by the balanced matrix. +* If JOB = 'N', A is not referenced. +* See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* ILO (output) INTEGER +* IHI (output) INTEGER +* ILO and IHI are set to integers such that on exit +* A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N. +* If JOB = 'N' or 'S', ILO = 1 and IHI = N. +* +* SCALE (output) REAL array, dimension (N) +* Details of the permutations and scaling factors applied to +* A. If P(j) is the index of the row and column interchanged +* with row and column j and D(j) is the scaling factor +* applied to row and column j, then +* SCALE(j) = P(j) for j = 1,...,ILO-1 +* = D(j) for j = ILO,...,IHI +* = P(j) for j = IHI+1,...,N. +* The order in which the interchanges are made is N to IHI+1, +* then 1 to ILO-1. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The permutations consist of row and column interchanges which put +* the matrix in the form +* +* ( T1 X Y ) +* P A P = ( 0 B Z ) +* ( 0 0 T2 ) +* +* where T1 and T2 are upper triangular matrices whose eigenvalues lie +* along the diagonal. The column indices ILO and IHI mark the starting +* and ending columns of the submatrix B. Balancing consists of applying +* a diagonal similarity transformation inv(D) * B * D to make the +* 1-norms of each row of B and its corresponding column nearly equal. +* The output matrix is +* +* ( T1 X*D Y ) +* ( 0 inv(D)*B*D inv(D)*Z ). +* ( 0 0 T2 ) +* +* Information about the permutations P and the diagonal matrix D is +* returned in the vector SCALE. +* +* This subroutine is based on the EISPACK routine BALANC. +* +* Modified by Tzu-Yi Chen, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL SCLFAC + PARAMETER ( SCLFAC = 0.8E+1 ) + REAL FACTOR + PARAMETER ( FACTOR = 0.95E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOCONV + INTEGER I, ICA, IEXC, IRA, J, K, L, M + REAL C, CA, F, G, R, RA, S, SFMAX1, SFMAX2, SFMIN1, + $ SFMIN2 +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SLAMCH + EXTERNAL LSAME, ISAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND. + $ .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEBAL', -INFO ) + RETURN + END IF +* + K = 1 + L = N +* + IF( N.EQ.0 ) + $ GO TO 210 +* + IF( LSAME( JOB, 'N' ) ) THEN + DO 10 I = 1, N + SCALE( I ) = ONE + 10 CONTINUE + GO TO 210 + END IF +* + IF( LSAME( JOB, 'S' ) ) + $ GO TO 120 +* +* Permutation to isolate eigenvalues if possible +* + GO TO 50 +* +* Row and column exchange. +* + 20 CONTINUE + SCALE( M ) = J + IF( J.EQ.M ) + $ GO TO 30 +* + CALL SSWAP( L, A( 1, J ), 1, A( 1, M ), 1 ) + CALL SSWAP( N-K+1, A( J, K ), LDA, A( M, K ), LDA ) +* + 30 CONTINUE + GO TO ( 40, 80 )IEXC +* +* Search for rows isolating an eigenvalue and push them down. +* + 40 CONTINUE + IF( L.EQ.1 ) + $ GO TO 210 + L = L - 1 +* + 50 CONTINUE + DO 70 J = L, 1, -1 +* + DO 60 I = 1, L + IF( I.EQ.J ) + $ GO TO 60 + IF( A( J, I ).NE.ZERO ) + $ GO TO 70 + 60 CONTINUE +* + M = L + IEXC = 1 + GO TO 20 + 70 CONTINUE +* + GO TO 90 +* +* Search for columns isolating an eigenvalue and push them left. +* + 80 CONTINUE + K = K + 1 +* + 90 CONTINUE + DO 110 J = K, L +* + DO 100 I = K, L + IF( I.EQ.J ) + $ GO TO 100 + IF( A( I, J ).NE.ZERO ) + $ GO TO 110 + 100 CONTINUE +* + M = K + IEXC = 2 + GO TO 20 + 110 CONTINUE +* + 120 CONTINUE + DO 130 I = K, L + SCALE( I ) = ONE + 130 CONTINUE +* + IF( LSAME( JOB, 'P' ) ) + $ GO TO 210 +* +* Balance the submatrix in rows K to L. +* +* Iterative loop for norm reduction +* + SFMIN1 = SLAMCH( 'S' ) / SLAMCH( 'P' ) + SFMAX1 = ONE / SFMIN1 + SFMIN2 = SFMIN1*SCLFAC + SFMAX2 = ONE / SFMIN2 + 140 CONTINUE + NOCONV = .FALSE. +* + DO 200 I = K, L + C = ZERO + R = ZERO +* + DO 150 J = K, L + IF( J.EQ.I ) + $ GO TO 150 + C = C + ABS( A( J, I ) ) + R = R + ABS( A( I, J ) ) + 150 CONTINUE + ICA = ISAMAX( L, A( 1, I ), 1 ) + CA = ABS( A( ICA, I ) ) + IRA = ISAMAX( N-K+1, A( I, K ), LDA ) + RA = ABS( A( I, IRA+K-1 ) ) +* +* Guard against zero C or R due to underflow. +* + IF( C.EQ.ZERO .OR. R.EQ.ZERO ) + $ GO TO 200 + G = R / SCLFAC + F = ONE + S = C + R + 160 CONTINUE + IF( C.GE.G .OR. MAX( F, C, CA ).GE.SFMAX2 .OR. + $ MIN( R, G, RA ).LE.SFMIN2 )GO TO 170 + F = F*SCLFAC + C = C*SCLFAC + CA = CA*SCLFAC + R = R / SCLFAC + G = G / SCLFAC + RA = RA / SCLFAC + GO TO 160 +* + 170 CONTINUE + G = C / SCLFAC + 180 CONTINUE + IF( G.LT.R .OR. MAX( R, RA ).GE.SFMAX2 .OR. + $ MIN( F, C, G, CA ).LE.SFMIN2 )GO TO 190 + F = F / SCLFAC + C = C / SCLFAC + G = G / SCLFAC + CA = CA / SCLFAC + R = R*SCLFAC + RA = RA*SCLFAC + GO TO 180 +* +* Now balance. +* + 190 CONTINUE + IF( ( C+R ).GE.FACTOR*S ) + $ GO TO 200 + IF( F.LT.ONE .AND. SCALE( I ).LT.ONE ) THEN + IF( F*SCALE( I ).LE.SFMIN1 ) + $ GO TO 200 + END IF + IF( F.GT.ONE .AND. SCALE( I ).GT.ONE ) THEN + IF( SCALE( I ).GE.SFMAX1 / F ) + $ GO TO 200 + END IF + G = ONE / F + SCALE( I ) = SCALE( I )*F + NOCONV = .TRUE. +* + CALL SSCAL( N-K+1, G, A( I, K ), LDA ) + CALL SSCAL( L, F, A( 1, I ), 1 ) +* + 200 CONTINUE +* + IF( NOCONV ) + $ GO TO 140 +* + 210 CONTINUE + ILO = K + IHI = L +* + RETURN +* +* End of SGEBAL +* + END diff --git a/costa/native/external/lapack/sgebd2.f b/costa/native/external/lapack/sgebd2.f new file mode 100644 index 000000000..e81504f96 --- /dev/null +++ b/costa/native/external/lapack/sgebd2.f @@ -0,0 +1,238 @@ + SUBROUTINE SGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), D( * ), E( * ), TAUP( * ), + $ TAUQ( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGEBD2 reduces a real general m by n matrix A to upper or lower +* bidiagonal form B by an orthogonal transformation: Q' * A * P = B. +* +* If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows in the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns in the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the m by n general matrix to be reduced. +* On exit, +* if m >= n, the diagonal and the first superdiagonal are +* overwritten with the upper bidiagonal matrix B; the +* elements below the diagonal, with the array TAUQ, represent +* the orthogonal matrix Q as a product of elementary +* reflectors, and the elements above the first superdiagonal, +* with the array TAUP, represent the orthogonal matrix P as +* a product of elementary reflectors; +* if m < n, the diagonal and the first subdiagonal are +* overwritten with the lower bidiagonal matrix B; the +* elements below the first subdiagonal, with the array TAUQ, +* represent the orthogonal matrix Q as a product of +* elementary reflectors, and the elements above the diagonal, +* with the array TAUP, represent the orthogonal matrix P as +* a product of elementary reflectors. +* See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* D (output) REAL array, dimension (min(M,N)) +* The diagonal elements of the bidiagonal matrix B: +* D(i) = A(i,i). +* +* E (output) REAL array, dimension (min(M,N)-1) +* The off-diagonal elements of the bidiagonal matrix B: +* if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; +* if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. +* +* TAUQ (output) REAL array dimension (min(M,N)) +* The scalar factors of the elementary reflectors which +* represent the orthogonal matrix Q. See Further Details. +* +* TAUP (output) REAL array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors which +* represent the orthogonal matrix P. See Further Details. +* +* WORK (workspace) REAL array, dimension (max(M,N)) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The matrices Q and P are represented as products of elementary +* reflectors: +* +* If m >= n, +* +* Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) +* +* Each H(i) and G(i) has the form: +* +* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' +* +* where tauq and taup are real scalars, and v and u are real vectors; +* v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); +* u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); +* tauq is stored in TAUQ(i) and taup in TAUP(i). +* +* If m < n, +* +* Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) +* +* Each H(i) and G(i) has the form: +* +* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' +* +* where tauq and taup are real scalars, and v and u are real vectors; +* v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); +* u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); +* tauq is stored in TAUQ(i) and taup in TAUP(i). +* +* The contents of A on exit are illustrated by the following examples: +* +* m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): +* +* ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) +* ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) +* ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) +* ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) +* ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) +* ( v1 v2 v3 v4 v5 ) +* +* where d and e denote diagonal and off-diagonal elements of B, vi +* denotes an element of the vector defining H(i), and ui an element of +* the vector defining G(i). +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I +* .. +* .. External Subroutines .. + EXTERNAL SLARF, SLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.LT.0 ) THEN + CALL XERBLA( 'SGEBD2', -INFO ) + RETURN + END IF +* + IF( M.GE.N ) THEN +* +* Reduce to upper bidiagonal form +* + DO 10 I = 1, N +* +* Generate elementary reflector H(i) to annihilate A(i+1:m,i) +* + CALL SLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1, + $ TAUQ( I ) ) + D( I ) = A( I, I ) + A( I, I ) = ONE +* +* Apply H(i) to A(i:m,i+1:n) from the left +* + CALL SLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAUQ( I ), + $ A( I, I+1 ), LDA, WORK ) + A( I, I ) = D( I ) +* + IF( I.LT.N ) THEN +* +* Generate elementary reflector G(i) to annihilate +* A(i,i+2:n) +* + CALL SLARFG( N-I, A( I, I+1 ), A( I, MIN( I+2, N ) ), + $ LDA, TAUP( I ) ) + E( I ) = A( I, I+1 ) + A( I, I+1 ) = ONE +* +* Apply G(i) to A(i+1:m,i+1:n) from the right +* + CALL SLARF( 'Right', M-I, N-I, A( I, I+1 ), LDA, + $ TAUP( I ), A( I+1, I+1 ), LDA, WORK ) + A( I, I+1 ) = E( I ) + ELSE + TAUP( I ) = ZERO + END IF + 10 CONTINUE + ELSE +* +* Reduce to lower bidiagonal form +* + DO 20 I = 1, M +* +* Generate elementary reflector G(i) to annihilate A(i,i+1:n) +* + CALL SLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA, + $ TAUP( I ) ) + D( I ) = A( I, I ) + A( I, I ) = ONE +* +* Apply G(i) to A(i+1:m,i:n) from the right +* + CALL SLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, TAUP( I ), + $ A( MIN( I+1, M ), I ), LDA, WORK ) + A( I, I ) = D( I ) +* + IF( I.LT.M ) THEN +* +* Generate elementary reflector H(i) to annihilate +* A(i+2:m,i) +* + CALL SLARFG( M-I, A( I+1, I ), A( MIN( I+2, M ), I ), 1, + $ TAUQ( I ) ) + E( I ) = A( I+1, I ) + A( I+1, I ) = ONE +* +* Apply H(i) to A(i+1:m,i+1:n) from the left +* + CALL SLARF( 'Left', M-I, N-I, A( I+1, I ), 1, TAUQ( I ), + $ A( I+1, I+1 ), LDA, WORK ) + A( I+1, I ) = E( I ) + ELSE + TAUQ( I ) = ZERO + END IF + 20 CONTINUE + END IF + RETURN +* +* End of SGEBD2 +* + END diff --git a/costa/native/external/lapack/sgebrd.f b/costa/native/external/lapack/sgebrd.f new file mode 100644 index 000000000..91cca0733 --- /dev/null +++ b/costa/native/external/lapack/sgebrd.f @@ -0,0 +1,269 @@ + SUBROUTINE SGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), D( * ), E( * ), TAUP( * ), + $ TAUQ( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGEBRD reduces a general real M-by-N matrix A to upper or lower +* bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. +* +* If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows in the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns in the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the M-by-N general matrix to be reduced. +* On exit, +* if m >= n, the diagonal and the first superdiagonal are +* overwritten with the upper bidiagonal matrix B; the +* elements below the diagonal, with the array TAUQ, represent +* the orthogonal matrix Q as a product of elementary +* reflectors, and the elements above the first superdiagonal, +* with the array TAUP, represent the orthogonal matrix P as +* a product of elementary reflectors; +* if m < n, the diagonal and the first subdiagonal are +* overwritten with the lower bidiagonal matrix B; the +* elements below the first subdiagonal, with the array TAUQ, +* represent the orthogonal matrix Q as a product of +* elementary reflectors, and the elements above the diagonal, +* with the array TAUP, represent the orthogonal matrix P as +* a product of elementary reflectors. +* See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* D (output) REAL array, dimension (min(M,N)) +* The diagonal elements of the bidiagonal matrix B: +* D(i) = A(i,i). +* +* E (output) REAL array, dimension (min(M,N)-1) +* The off-diagonal elements of the bidiagonal matrix B: +* if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; +* if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. +* +* TAUQ (output) REAL array dimension (min(M,N)) +* The scalar factors of the elementary reflectors which +* represent the orthogonal matrix Q. See Further Details. +* +* TAUP (output) REAL array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors which +* represent the orthogonal matrix P. See Further Details. +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of the array WORK. LWORK >= max(1,M,N). +* For optimum performance LWORK >= (M+N)*NB, where NB +* is the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The matrices Q and P are represented as products of elementary +* reflectors: +* +* If m >= n, +* +* Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) +* +* Each H(i) and G(i) has the form: +* +* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' +* +* where tauq and taup are real scalars, and v and u are real vectors; +* v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); +* u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); +* tauq is stored in TAUQ(i) and taup in TAUP(i). +* +* If m < n, +* +* Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) +* +* Each H(i) and G(i) has the form: +* +* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' +* +* where tauq and taup are real scalars, and v and u are real vectors; +* v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); +* u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); +* tauq is stored in TAUQ(i) and taup in TAUP(i). +* +* The contents of A on exit are illustrated by the following examples: +* +* m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): +* +* ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) +* ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) +* ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) +* ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) +* ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) +* ( v1 v2 v3 v4 v5 ) +* +* where d and e denote diagonal and off-diagonal elements of B, vi +* denotes an element of the vector defining H(i), and ui an element of +* the vector defining G(i). +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IINFO, J, LDWRKX, LDWRKY, LWKOPT, MINMN, NB, + $ NBMIN, NX + REAL WS +* .. +* .. External Subroutines .. + EXTERNAL SGEBD2, SGEMM, SLABRD, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, REAL +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NB = MAX( 1, ILAENV( 1, 'SGEBRD', ' ', M, N, -1, -1 ) ) + LWKOPT = ( M+N )*NB + WORK( 1 ) = REAL( LWKOPT ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, M, N ) .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF + IF( INFO.LT.0 ) THEN + CALL XERBLA( 'SGEBRD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + MINMN = MIN( M, N ) + IF( MINMN.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + WS = MAX( M, N ) + LDWRKX = M + LDWRKY = N +* + IF( NB.GT.1 .AND. NB.LT.MINMN ) THEN +* +* Set the crossover point NX. +* + NX = MAX( NB, ILAENV( 3, 'SGEBRD', ' ', M, N, -1, -1 ) ) +* +* Determine when to switch from blocked to unblocked code. +* + IF( NX.LT.MINMN ) THEN + WS = ( M+N )*NB + IF( LWORK.LT.WS ) THEN +* +* Not enough work space for the optimal NB, consider using +* a smaller block size. +* + NBMIN = ILAENV( 2, 'SGEBRD', ' ', M, N, -1, -1 ) + IF( LWORK.GE.( M+N )*NBMIN ) THEN + NB = LWORK / ( M+N ) + ELSE + NB = 1 + NX = MINMN + END IF + END IF + END IF + ELSE + NX = MINMN + END IF +* + DO 30 I = 1, MINMN - NX, NB +* +* Reduce rows and columns i:i+nb-1 to bidiagonal form and return +* the matrices X and Y which are needed to update the unreduced +* part of the matrix +* + CALL SLABRD( M-I+1, N-I+1, NB, A( I, I ), LDA, D( I ), E( I ), + $ TAUQ( I ), TAUP( I ), WORK, LDWRKX, + $ WORK( LDWRKX*NB+1 ), LDWRKY ) +* +* Update the trailing submatrix A(i+nb:m,i+nb:n), using an update +* of the form A := A - V*Y' - X*U' +* + CALL SGEMM( 'No transpose', 'Transpose', M-I-NB+1, N-I-NB+1, + $ NB, -ONE, A( I+NB, I ), LDA, + $ WORK( LDWRKX*NB+NB+1 ), LDWRKY, ONE, + $ A( I+NB, I+NB ), LDA ) + CALL SGEMM( 'No transpose', 'No transpose', M-I-NB+1, N-I-NB+1, + $ NB, -ONE, WORK( NB+1 ), LDWRKX, A( I, I+NB ), LDA, + $ ONE, A( I+NB, I+NB ), LDA ) +* +* Copy diagonal and off-diagonal elements of B back into A +* + IF( M.GE.N ) THEN + DO 10 J = I, I + NB - 1 + A( J, J ) = D( J ) + A( J, J+1 ) = E( J ) + 10 CONTINUE + ELSE + DO 20 J = I, I + NB - 1 + A( J, J ) = D( J ) + A( J+1, J ) = E( J ) + 20 CONTINUE + END IF + 30 CONTINUE +* +* Use unblocked code to reduce the remainder of the matrix +* + CALL SGEBD2( M-I+1, N-I+1, A( I, I ), LDA, D( I ), E( I ), + $ TAUQ( I ), TAUP( I ), WORK, IINFO ) + WORK( 1 ) = WS + RETURN +* +* End of SGEBRD +* + END diff --git a/costa/native/external/lapack/sgecon.f b/costa/native/external/lapack/sgecon.f new file mode 100644 index 000000000..2429fd259 --- /dev/null +++ b/costa/native/external/lapack/sgecon.f @@ -0,0 +1,181 @@ + SUBROUTINE SGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, IWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER INFO, LDA, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGECON estimates the reciprocal of the condition number of a general +* real matrix A, in either the 1-norm or the infinity-norm, using +* the LU factorization computed by SGETRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as +* RCOND = 1 / ( norm(A) * norm(inv(A)) ). +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies whether the 1-norm condition number or the +* infinity-norm condition number is required: +* = '1' or 'O': 1-norm; +* = 'I': Infinity-norm. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input) REAL array, dimension (LDA,N) +* The factors L and U from the factorization A = P*L*U +* as computed by SGETRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* ANORM (input) REAL +* If NORM = '1' or 'O', the 1-norm of the original matrix A. +* If NORM = 'I', the infinity-norm of the original matrix A. +* +* RCOND (output) REAL +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(norm(A) * norm(inv(A))). +* +* WORK (workspace) REAL array, dimension (4*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ONENRM + CHARACTER NORMIN + INTEGER IX, KASE, KASE1 + REAL AINVNM, SCALE, SL, SMLNUM, SU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SLAMCH + EXTERNAL LSAME, ISAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SLACON, SLATRS, SRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGECON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = SLAMCH( 'Safe minimum' ) +* +* Estimate the norm of inv(A). +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 10 CONTINUE + CALL SLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(L). +* + CALL SLATRS( 'Lower', 'No transpose', 'Unit', NORMIN, N, A, + $ LDA, WORK, SL, WORK( 2*N+1 ), INFO ) +* +* Multiply by inv(U). +* + CALL SLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ A, LDA, WORK, SU, WORK( 3*N+1 ), INFO ) + ELSE +* +* Multiply by inv(U'). +* + CALL SLATRS( 'Upper', 'Transpose', 'Non-unit', NORMIN, N, A, + $ LDA, WORK, SU, WORK( 3*N+1 ), INFO ) +* +* Multiply by inv(L'). +* + CALL SLATRS( 'Lower', 'Transpose', 'Unit', NORMIN, N, A, + $ LDA, WORK, SL, WORK( 2*N+1 ), INFO ) + END IF +* +* Divide X by 1/(SL*SU) if doing so will not cause overflow. +* + SCALE = SL*SU + NORMIN = 'Y' + IF( SCALE.NE.ONE ) THEN + IX = ISAMAX( N, WORK, 1 ) + IF( SCALE.LT.ABS( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL SRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE + RETURN +* +* End of SGECON +* + END diff --git a/costa/native/external/lapack/sgeequ.f b/costa/native/external/lapack/sgeequ.f new file mode 100644 index 000000000..9008e08de --- /dev/null +++ b/costa/native/external/lapack/sgeequ.f @@ -0,0 +1,226 @@ + SUBROUTINE SGEEQU( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N + REAL AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( * ), R( * ) +* .. +* +* Purpose +* ======= +* +* SGEEQU computes row and column scalings intended to equilibrate an +* M-by-N matrix A and reduce its condition number. R returns the row +* scale factors and C the column scale factors, chosen to try to make +* the largest element in each row and column of the matrix B with +* elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. +* +* R(i) and C(j) are restricted to be between SMLNUM = smallest safe +* number and BIGNUM = largest safe number. Use of these scaling +* factors is not guaranteed to reduce the condition number of A but +* works well in practice. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input) REAL array, dimension (LDA,N) +* The M-by-N matrix whose equilibration factors are +* to be computed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* R (output) REAL array, dimension (M) +* If INFO = 0 or INFO > M, R contains the row scale factors +* for A. +* +* C (output) REAL array, dimension (N) +* If INFO = 0, C contains the column scale factors for A. +* +* ROWCND (output) REAL +* If INFO = 0 or INFO > M, ROWCND contains the ratio of the +* smallest R(i) to the largest R(i). If ROWCND >= 0.1 and +* AMAX is neither too large nor too small, it is not worth +* scaling by R. +* +* COLCND (output) REAL +* If INFO = 0, COLCND contains the ratio of the smallest +* C(i) to the largest C(i). If COLCND >= 0.1, it is not +* worth scaling by C. +* +* AMAX (output) REAL +* Absolute value of largest matrix element. If AMAX is very +* close to overflow or very close to underflow, the matrix +* should be scaled. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is +* <= M: the i-th row of A is exactly zero +* > M: the (i-M)-th column of A is exactly zero +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL BIGNUM, RCMAX, RCMIN, SMLNUM +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEEQU', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + ROWCND = ONE + COLCND = ONE + AMAX = ZERO + RETURN + END IF +* +* Get machine constants. +* + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM +* +* Compute row scale factors. +* + DO 10 I = 1, M + R( I ) = ZERO + 10 CONTINUE +* +* Find the maximum element in each row. +* + DO 30 J = 1, N + DO 20 I = 1, M + R( I ) = MAX( R( I ), ABS( A( I, J ) ) ) + 20 CONTINUE + 30 CONTINUE +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 40 I = 1, M + RCMAX = MAX( RCMAX, R( I ) ) + RCMIN = MIN( RCMIN, R( I ) ) + 40 CONTINUE + AMAX = RCMAX +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 50 I = 1, M + IF( R( I ).EQ.ZERO ) THEN + INFO = I + RETURN + END IF + 50 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 60 I = 1, M + R( I ) = ONE / MIN( MAX( R( I ), SMLNUM ), BIGNUM ) + 60 CONTINUE +* +* Compute ROWCND = min(R(I)) / max(R(I)) +* + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* +* Compute column scale factors +* + DO 70 J = 1, N + C( J ) = ZERO + 70 CONTINUE +* +* Find the maximum element in each column, +* assuming the row scaling computed above. +* + DO 90 J = 1, N + DO 80 I = 1, M + C( J ) = MAX( C( J ), ABS( A( I, J ) )*R( I ) ) + 80 CONTINUE + 90 CONTINUE +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 100 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 100 CONTINUE +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 110 J = 1, N + IF( C( J ).EQ.ZERO ) THEN + INFO = M + J + RETURN + END IF + 110 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 120 J = 1, N + C( J ) = ONE / MIN( MAX( C( J ), SMLNUM ), BIGNUM ) + 120 CONTINUE +* +* Compute COLCND = min(C(J)) / max(C(J)) +* + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* + RETURN +* +* End of SGEEQU +* + END diff --git a/costa/native/external/lapack/sgees.f b/costa/native/external/lapack/sgees.f new file mode 100644 index 000000000..0db56d7d0 --- /dev/null +++ b/costa/native/external/lapack/sgees.f @@ -0,0 +1,431 @@ + SUBROUTINE SGEES( JOBVS, SORT, SELECT, N, A, LDA, SDIM, WR, WI, + $ VS, LDVS, WORK, LWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBVS, SORT + INTEGER INFO, LDA, LDVS, LWORK, N, SDIM +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + REAL A( LDA, * ), VS( LDVS, * ), WI( * ), WORK( * ), + $ WR( * ) +* .. +* .. Function Arguments .. + LOGICAL SELECT + EXTERNAL SELECT +* .. +* +* Purpose +* ======= +* +* SGEES computes for an N-by-N real nonsymmetric matrix A, the +* eigenvalues, the real Schur form T, and, optionally, the matrix of +* Schur vectors Z. This gives the Schur factorization A = Z*T*(Z**T). +* +* Optionally, it also orders the eigenvalues on the diagonal of the +* real Schur form so that selected eigenvalues are at the top left. +* The leading columns of Z then form an orthonormal basis for the +* invariant subspace corresponding to the selected eigenvalues. +* +* A matrix is in real Schur form if it is upper quasi-triangular with +* 1-by-1 and 2-by-2 blocks. 2-by-2 blocks will be standardized in the +* form +* [ a b ] +* [ c a ] +* +* where b*c < 0. The eigenvalues of such a block are a +- sqrt(bc). +* +* Arguments +* ========= +* +* JOBVS (input) CHARACTER*1 +* = 'N': Schur vectors are not computed; +* = 'V': Schur vectors are computed. +* +* SORT (input) CHARACTER*1 +* Specifies whether or not to order the eigenvalues on the +* diagonal of the Schur form. +* = 'N': Eigenvalues are not ordered; +* = 'S': Eigenvalues are ordered (see SELECT). +* +* SELECT (input) LOGICAL FUNCTION of two REAL arguments +* SELECT must be declared EXTERNAL in the calling subroutine. +* If SORT = 'S', SELECT is used to select eigenvalues to sort +* to the top left of the Schur form. +* If SORT = 'N', SELECT is not referenced. +* An eigenvalue WR(j)+sqrt(-1)*WI(j) is selected if +* SELECT(WR(j),WI(j)) is true; i.e., if either one of a complex +* conjugate pair of eigenvalues is selected, then both complex +* eigenvalues are selected. +* Note that a selected complex eigenvalue may no longer +* satisfy SELECT(WR(j),WI(j)) = .TRUE. after ordering, since +* ordering may change the value of complex eigenvalues +* (especially if the eigenvalue is ill-conditioned); in this +* case INFO is set to N+2 (see INFO below). +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the N-by-N matrix A. +* On exit, A has been overwritten by its real Schur form T. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* SDIM (output) INTEGER +* If SORT = 'N', SDIM = 0. +* If SORT = 'S', SDIM = number of eigenvalues (after sorting) +* for which SELECT is true. (Complex conjugate +* pairs for which SELECT is true for either +* eigenvalue count as 2.) +* +* WR (output) REAL array, dimension (N) +* WI (output) REAL array, dimension (N) +* WR and WI contain the real and imaginary parts, +* respectively, of the computed eigenvalues in the same order +* that they appear on the diagonal of the output Schur form T. +* Complex conjugate pairs of eigenvalues will appear +* consecutively with the eigenvalue having the positive +* imaginary part first. +* +* VS (output) REAL array, dimension (LDVS,N) +* If JOBVS = 'V', VS contains the orthogonal matrix Z of Schur +* vectors. +* If JOBVS = 'N', VS is not referenced. +* +* LDVS (input) INTEGER +* The leading dimension of the array VS. LDVS >= 1; if +* JOBVS = 'V', LDVS >= N. +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) contains the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,3*N). +* For good performance, LWORK must generally be larger. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* BWORK (workspace) LOGICAL array, dimension (N) +* Not referenced if SORT = 'N'. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, and i is +* <= N: the QR algorithm failed to compute all the +* eigenvalues; elements 1:ILO-1 and i+1:N of WR and WI +* contain those eigenvalues which have converged; if +* JOBVS = 'V', VS contains the matrix which reduces A +* to its partially converged Schur form. +* = N+1: the eigenvalues could not be reordered because some +* eigenvalues were too close to separate (the problem +* is very ill-conditioned); +* = N+2: after reordering, roundoff changed values of some +* complex eigenvalues so that leading eigenvalues in +* the Schur form no longer satisfy SELECT=.TRUE. This +* could also be caused by underflow due to scaling. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL CURSL, LASTSL, LQUERY, LST2SL, SCALEA, WANTST, + $ WANTVS + INTEGER HSWORK, I, I1, I2, IBAL, ICOND, IERR, IEVAL, + $ IHI, ILO, INXT, IP, ITAU, IWRK, K, MAXB, + $ MAXWRK, MINWRK + REAL ANRM, BIGNUM, CSCALE, EPS, S, SEP, SMLNUM +* .. +* .. Local Arrays .. + INTEGER IDUM( 1 ) + REAL DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEBAK, SGEBAL, SGEHRD, SHSEQR, SLABAD, + $ SLACPY, SLASCL, SORGHR, SSWAP, STRSEN, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANGE + EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + WANTVS = LSAME( JOBVS, 'V' ) + WANTST = LSAME( SORT, 'S' ) + IF( ( .NOT.WANTVS ) .AND. ( .NOT.LSAME( JOBVS, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDVS.LT.1 .OR. ( WANTVS .AND. LDVS.LT.N ) ) THEN + INFO = -11 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV. +* HSWORK refers to the workspace preferred by SHSEQR, as +* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, +* the worst case.) +* + MINWRK = 1 + IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN + MAXWRK = 2*N + N*ILAENV( 1, 'SGEHRD', ' ', N, 1, N, 0 ) + MINWRK = MAX( 1, 3*N ) + IF( .NOT.WANTVS ) THEN + MAXB = MAX( ILAENV( 8, 'SHSEQR', 'SN', N, 1, N, -1 ), 2 ) + K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'SHSEQR', 'SN', N, 1, + $ N, -1 ) ) ) + HSWORK = MAX( K*( K+2 ), 2*N ) + MAXWRK = MAX( MAXWRK, N+HSWORK, 1 ) + ELSE + MAXWRK = MAX( MAXWRK, 2*N+( N-1 )* + $ ILAENV( 1, 'SORGHR', ' ', N, 1, N, -1 ) ) + MAXB = MAX( ILAENV( 8, 'SHSEQR', 'EN', N, 1, N, -1 ), 2 ) + K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'SHSEQR', 'EN', N, 1, + $ N, -1 ) ) ) + HSWORK = MAX( K*( K+2 ), 2*N ) + MAXWRK = MAX( MAXWRK, N+HSWORK, 1 ) + END IF + WORK( 1 ) = MAXWRK + END IF + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEES ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SDIM = 0 + RETURN + END IF +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = SLANGE( 'M', N, N, A, LDA, DUM ) + SCALEA = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + SCALEA = .TRUE. + CSCALE = SMLNUM + ELSE IF( ANRM.GT.BIGNUM ) THEN + SCALEA = .TRUE. + CSCALE = BIGNUM + END IF + IF( SCALEA ) + $ CALL SLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR ) +* +* Permute the matrix to make it more nearly triangular +* (Workspace: need N) +* + IBAL = 1 + CALL SGEBAL( 'P', N, A, LDA, ILO, IHI, WORK( IBAL ), IERR ) +* +* Reduce to upper Hessenberg form +* (Workspace: need 3*N, prefer 2*N+N*NB) +* + ITAU = N + IBAL + IWRK = N + ITAU + CALL SGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* + IF( WANTVS ) THEN +* +* Copy Householder vectors to VS +* + CALL SLACPY( 'L', N, N, A, LDA, VS, LDVS ) +* +* Generate orthogonal matrix in VS +* (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* + CALL SORGHR( N, ILO, IHI, VS, LDVS, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) + END IF +* + SDIM = 0 +* +* Perform QR iteration, accumulating Schur vectors in VS if desired +* (Workspace: need N+1, prefer N+HSWORK (see comments) ) +* + IWRK = ITAU + CALL SHSEQR( 'S', JOBVS, N, ILO, IHI, A, LDA, WR, WI, VS, LDVS, + $ WORK( IWRK ), LWORK-IWRK+1, IEVAL ) + IF( IEVAL.GT.0 ) + $ INFO = IEVAL +* +* Sort eigenvalues if desired +* + IF( WANTST .AND. INFO.EQ.0 ) THEN + IF( SCALEA ) THEN + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, WR, N, IERR ) + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, WI, N, IERR ) + END IF + DO 10 I = 1, N + BWORK( I ) = SELECT( WR( I ), WI( I ) ) + 10 CONTINUE +* +* Reorder eigenvalues and transform Schur vectors +* (Workspace: none needed) +* + CALL STRSEN( 'N', JOBVS, BWORK, N, A, LDA, VS, LDVS, WR, WI, + $ SDIM, S, SEP, WORK( IWRK ), LWORK-IWRK+1, IDUM, 1, + $ ICOND ) + IF( ICOND.GT.0 ) + $ INFO = N + ICOND + END IF +* + IF( WANTVS ) THEN +* +* Undo balancing +* (Workspace: need N) +* + CALL SGEBAK( 'P', 'R', N, ILO, IHI, WORK( IBAL ), N, VS, LDVS, + $ IERR ) + END IF +* + IF( SCALEA ) THEN +* +* Undo scaling for the Schur form of A +* + CALL SLASCL( 'H', 0, 0, CSCALE, ANRM, N, N, A, LDA, IERR ) + CALL SCOPY( N, A, LDA+1, WR, 1 ) + IF( CSCALE.EQ.SMLNUM ) THEN +* +* If scaling back towards underflow, adjust WI if an +* offdiagonal element of a 2-by-2 block in the Schur form +* underflows. +* + IF( IEVAL.GT.0 ) THEN + I1 = IEVAL + 1 + I2 = IHI - 1 + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, WI, + $ MAX( ILO-1, 1 ), IERR ) + ELSE IF( WANTST ) THEN + I1 = 1 + I2 = N - 1 + ELSE + I1 = ILO + I2 = IHI - 1 + END IF + INXT = I1 - 1 + DO 20 I = I1, I2 + IF( I.LT.INXT ) + $ GO TO 20 + IF( WI( I ).EQ.ZERO ) THEN + INXT = I + 1 + ELSE + IF( A( I+1, I ).EQ.ZERO ) THEN + WI( I ) = ZERO + WI( I+1 ) = ZERO + ELSE IF( A( I+1, I ).NE.ZERO .AND. A( I, I+1 ).EQ. + $ ZERO ) THEN + WI( I ) = ZERO + WI( I+1 ) = ZERO + IF( I.GT.1 ) + $ CALL SSWAP( I-1, A( 1, I ), 1, A( 1, I+1 ), 1 ) + IF( N.GT.I+1 ) + $ CALL SSWAP( N-I-1, A( I, I+2 ), LDA, + $ A( I+1, I+2 ), LDA ) + CALL SSWAP( N, VS( 1, I ), 1, VS( 1, I+1 ), 1 ) + A( I, I+1 ) = A( I+1, I ) + A( I+1, I ) = ZERO + END IF + INXT = I + 2 + END IF + 20 CONTINUE + END IF +* +* Undo scaling for the imaginary part of the eigenvalues +* + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N-IEVAL, 1, + $ WI( IEVAL+1 ), MAX( N-IEVAL, 1 ), IERR ) + END IF +* + IF( WANTST .AND. INFO.EQ.0 ) THEN +* +* Check if reordering successful +* + LASTSL = .TRUE. + LST2SL = .TRUE. + SDIM = 0 + IP = 0 + DO 30 I = 1, N + CURSL = SELECT( WR( I ), WI( I ) ) + IF( WI( I ).EQ.ZERO ) THEN + IF( CURSL ) + $ SDIM = SDIM + 1 + IP = 0 + IF( CURSL .AND. .NOT.LASTSL ) + $ INFO = N + 2 + ELSE + IF( IP.EQ.1 ) THEN +* +* Last eigenvalue of conjugate pair +* + CURSL = CURSL .OR. LASTSL + LASTSL = CURSL + IF( CURSL ) + $ SDIM = SDIM + 2 + IP = -1 + IF( CURSL .AND. .NOT.LST2SL ) + $ INFO = N + 2 + ELSE +* +* First eigenvalue of conjugate pair +* + IP = 1 + END IF + END IF + LST2SL = LASTSL + LASTSL = CURSL + 30 CONTINUE + END IF +* + WORK( 1 ) = MAXWRK + RETURN +* +* End of SGEES +* + END diff --git a/costa/native/external/lapack/sgeesx.f b/costa/native/external/lapack/sgeesx.f new file mode 100644 index 000000000..2672bb2bb --- /dev/null +++ b/costa/native/external/lapack/sgeesx.f @@ -0,0 +1,502 @@ + SUBROUTINE SGEESX( JOBVS, SORT, SELECT, SENSE, N, A, LDA, SDIM, + $ WR, WI, VS, LDVS, RCONDE, RCONDV, WORK, LWORK, + $ IWORK, LIWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBVS, SENSE, SORT + INTEGER INFO, LDA, LDVS, LIWORK, LWORK, N, SDIM + REAL RCONDE, RCONDV +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + INTEGER IWORK( * ) + REAL A( LDA, * ), VS( LDVS, * ), WI( * ), WORK( * ), + $ WR( * ) +* .. +* .. Function Arguments .. + LOGICAL SELECT + EXTERNAL SELECT +* .. +* +* Purpose +* ======= +* +* SGEESX computes for an N-by-N real nonsymmetric matrix A, the +* eigenvalues, the real Schur form T, and, optionally, the matrix of +* Schur vectors Z. This gives the Schur factorization A = Z*T*(Z**T). +* +* Optionally, it also orders the eigenvalues on the diagonal of the +* real Schur form so that selected eigenvalues are at the top left; +* computes a reciprocal condition number for the average of the +* selected eigenvalues (RCONDE); and computes a reciprocal condition +* number for the right invariant subspace corresponding to the +* selected eigenvalues (RCONDV). The leading columns of Z form an +* orthonormal basis for this invariant subspace. +* +* For further explanation of the reciprocal condition numbers RCONDE +* and RCONDV, see Section 4.10 of the LAPACK Users' Guide (where +* these quantities are called s and sep respectively). +* +* A real matrix is in real Schur form if it is upper quasi-triangular +* with 1-by-1 and 2-by-2 blocks. 2-by-2 blocks will be standardized in +* the form +* [ a b ] +* [ c a ] +* +* where b*c < 0. The eigenvalues of such a block are a +- sqrt(bc). +* +* Arguments +* ========= +* +* JOBVS (input) CHARACTER*1 +* = 'N': Schur vectors are not computed; +* = 'V': Schur vectors are computed. +* +* SORT (input) CHARACTER*1 +* Specifies whether or not to order the eigenvalues on the +* diagonal of the Schur form. +* = 'N': Eigenvalues are not ordered; +* = 'S': Eigenvalues are ordered (see SELECT). +* +* SELECT (input) LOGICAL FUNCTION of two REAL arguments +* SELECT must be declared EXTERNAL in the calling subroutine. +* If SORT = 'S', SELECT is used to select eigenvalues to sort +* to the top left of the Schur form. +* If SORT = 'N', SELECT is not referenced. +* An eigenvalue WR(j)+sqrt(-1)*WI(j) is selected if +* SELECT(WR(j),WI(j)) is true; i.e., if either one of a +* complex conjugate pair of eigenvalues is selected, then both +* are. Note that a selected complex eigenvalue may no longer +* satisfy SELECT(WR(j),WI(j)) = .TRUE. after ordering, since +* ordering may change the value of complex eigenvalues +* (especially if the eigenvalue is ill-conditioned); in this +* case INFO may be set to N+3 (see INFO below). +* +* SENSE (input) CHARACTER*1 +* Determines which reciprocal condition numbers are computed. +* = 'N': None are computed; +* = 'E': Computed for average of selected eigenvalues only; +* = 'V': Computed for selected right invariant subspace only; +* = 'B': Computed for both. +* If SENSE = 'E', 'V' or 'B', SORT must equal 'S'. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA, N) +* On entry, the N-by-N matrix A. +* On exit, A is overwritten by its real Schur form T. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* SDIM (output) INTEGER +* If SORT = 'N', SDIM = 0. +* If SORT = 'S', SDIM = number of eigenvalues (after sorting) +* for which SELECT is true. (Complex conjugate +* pairs for which SELECT is true for either +* eigenvalue count as 2.) +* +* WR (output) REAL array, dimension (N) +* WI (output) REAL array, dimension (N) +* WR and WI contain the real and imaginary parts, respectively, +* of the computed eigenvalues, in the same order that they +* appear on the diagonal of the output Schur form T. Complex +* conjugate pairs of eigenvalues appear consecutively with the +* eigenvalue having the positive imaginary part first. +* +* VS (output) REAL array, dimension (LDVS,N) +* If JOBVS = 'V', VS contains the orthogonal matrix Z of Schur +* vectors. +* If JOBVS = 'N', VS is not referenced. +* +* LDVS (input) INTEGER +* The leading dimension of the array VS. LDVS >= 1, and if +* JOBVS = 'V', LDVS >= N. +* +* RCONDE (output) REAL +* If SENSE = 'E' or 'B', RCONDE contains the reciprocal +* condition number for the average of the selected eigenvalues. +* Not referenced if SENSE = 'N' or 'V'. +* +* RCONDV (output) REAL +* If SENSE = 'V' or 'B', RCONDV contains the reciprocal +* condition number for the selected right invariant subspace. +* Not referenced if SENSE = 'N' or 'E'. +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,3*N). +* Also, if SENSE = 'E' or 'V' or 'B', +* LWORK >= N+2*SDIM*(N-SDIM), where SDIM is the number of +* selected eigenvalues computed by this routine. Note that +* N+2*SDIM*(N-SDIM) <= N+N*N/2. +* For good performance, LWORK must generally be larger. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* Not referenced if SENSE = 'N' or 'E'. +* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array IWORK. +* LIWORK >= 1; if SENSE = 'V' or 'B', LIWORK >= SDIM*(N-SDIM). +* +* BWORK (workspace) LOGICAL array, dimension (N) +* Not referenced if SORT = 'N'. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, and i is +* <= N: the QR algorithm failed to compute all the +* eigenvalues; elements 1:ILO-1 and i+1:N of WR and WI +* contain those eigenvalues which have converged; if +* JOBVS = 'V', VS contains the transformation which +* reduces A to its partially converged Schur form. +* = N+1: the eigenvalues could not be reordered because some +* eigenvalues were too close to separate (the problem +* is very ill-conditioned); +* = N+2: after reordering, roundoff changed values of some +* complex eigenvalues so that leading eigenvalues in +* the Schur form no longer satisfy SELECT=.TRUE. This +* could also be caused by underflow due to scaling. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL CURSL, LASTSL, LST2SL, SCALEA, WANTSB, + $ WANTSE, WANTSN, WANTST, WANTSV, WANTVS + INTEGER HSWORK, I, I1, I2, IBAL, ICOND, IERR, IEVAL, + $ IHI, ILO, INXT, IP, ITAU, IWRK, K, MAXB, + $ MAXWRK, MINWRK + REAL ANRM, BIGNUM, CSCALE, EPS, SMLNUM +* .. +* .. Local Arrays .. + REAL DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEBAK, SGEBAL, SGEHRD, SHSEQR, SLABAD, + $ SLACPY, SLASCL, SORGHR, SSWAP, STRSEN, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANGE + EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + WANTVS = LSAME( JOBVS, 'V' ) + WANTST = LSAME( SORT, 'S' ) + WANTSN = LSAME( SENSE, 'N' ) + WANTSE = LSAME( SENSE, 'E' ) + WANTSV = LSAME( SENSE, 'V' ) + WANTSB = LSAME( SENSE, 'B' ) + IF( ( .NOT.WANTVS ) .AND. ( .NOT.LSAME( JOBVS, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( WANTSN .OR. WANTSE .OR. WANTSV .OR. WANTSB ) .OR. + $ ( .NOT.WANTST .AND. .NOT.WANTSN ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVS.LT.1 .OR. ( WANTVS .AND. LDVS.LT.N ) ) THEN + INFO = -12 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "RWorkspace:" describe the +* minimal amount of real workspace needed at that point in the +* code, as well as the preferred amount for good performance. +* IWorkspace refers to integer workspace. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV. +* HSWORK refers to the workspace preferred by SHSEQR, as +* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, +* the worst case. +* If SENSE = 'E', 'V' or 'B', then the amount of workspace needed +* depends on SDIM, which is computed by the routine STRSEN later +* in the code.) +* + MINWRK = 1 + IF( INFO.EQ.0 .AND. LWORK.GE.1 ) THEN + MAXWRK = 2*N + N*ILAENV( 1, 'SGEHRD', ' ', N, 1, N, 0 ) + MINWRK = MAX( 1, 3*N ) + IF( .NOT.WANTVS ) THEN + MAXB = MAX( ILAENV( 8, 'SHSEQR', 'SN', N, 1, N, -1 ), 2 ) + K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'SHSEQR', 'SN', N, 1, + $ N, -1 ) ) ) + HSWORK = MAX( K*( K+2 ), 2*N ) + MAXWRK = MAX( MAXWRK, N+HSWORK, 1 ) + ELSE + MAXWRK = MAX( MAXWRK, 2*N+( N-1 )* + $ ILAENV( 1, 'SORGHR', ' ', N, 1, N, -1 ) ) + MAXB = MAX( ILAENV( 8, 'SHSEQR', 'SV', N, 1, N, -1 ), 2 ) + K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'SHSEQR', 'SV', N, 1, + $ N, -1 ) ) ) + HSWORK = MAX( K*( K+2 ), 2*N ) + MAXWRK = MAX( MAXWRK, N+HSWORK, 1 ) + END IF + WORK( 1 ) = MAXWRK + END IF + IF( LWORK.LT.MINWRK ) THEN + INFO = -16 + END IF + IF( LIWORK.LT.1 ) THEN + INFO = -18 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEESX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SDIM = 0 + RETURN + END IF +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = SLANGE( 'M', N, N, A, LDA, DUM ) + SCALEA = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + SCALEA = .TRUE. + CSCALE = SMLNUM + ELSE IF( ANRM.GT.BIGNUM ) THEN + SCALEA = .TRUE. + CSCALE = BIGNUM + END IF + IF( SCALEA ) + $ CALL SLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR ) +* +* Permute the matrix to make it more nearly triangular +* (RWorkspace: need N) +* + IBAL = 1 + CALL SGEBAL( 'P', N, A, LDA, ILO, IHI, WORK( IBAL ), IERR ) +* +* Reduce to upper Hessenberg form +* (RWorkspace: need 3*N, prefer 2*N+N*NB) +* + ITAU = N + IBAL + IWRK = N + ITAU + CALL SGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* + IF( WANTVS ) THEN +* +* Copy Householder vectors to VS +* + CALL SLACPY( 'L', N, N, A, LDA, VS, LDVS ) +* +* Generate orthogonal matrix in VS +* (RWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* + CALL SORGHR( N, ILO, IHI, VS, LDVS, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) + END IF +* + SDIM = 0 +* +* Perform QR iteration, accumulating Schur vectors in VS if desired +* (RWorkspace: need N+1, prefer N+HSWORK (see comments) ) +* + IWRK = ITAU + CALL SHSEQR( 'S', JOBVS, N, ILO, IHI, A, LDA, WR, WI, VS, LDVS, + $ WORK( IWRK ), LWORK-IWRK+1, IEVAL ) + IF( IEVAL.GT.0 ) + $ INFO = IEVAL +* +* Sort eigenvalues if desired +* + IF( WANTST .AND. INFO.EQ.0 ) THEN + IF( SCALEA ) THEN + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, WR, N, IERR ) + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, WI, N, IERR ) + END IF + DO 10 I = 1, N + BWORK( I ) = SELECT( WR( I ), WI( I ) ) + 10 CONTINUE +* +* Reorder eigenvalues, transform Schur vectors, and compute +* reciprocal condition numbers +* (RWorkspace: if SENSE is not 'N', need N+2*SDIM*(N-SDIM) +* otherwise, need N ) +* (IWorkspace: if SENSE is 'V' or 'B', need SDIM*(N-SDIM) +* otherwise, need 0 ) +* + CALL STRSEN( SENSE, JOBVS, BWORK, N, A, LDA, VS, LDVS, WR, WI, + $ SDIM, RCONDE, RCONDV, WORK( IWRK ), LWORK-IWRK+1, + $ IWORK, LIWORK, ICOND ) + IF( .NOT.WANTSN ) + $ MAXWRK = MAX( MAXWRK, N+2*SDIM*( N-SDIM ) ) + IF( ICOND.EQ.-15 ) THEN +* +* Not enough real workspace +* + INFO = -16 + ELSE IF( ICOND.EQ.-17 ) THEN +* +* Not enough integer workspace +* + INFO = -18 + ELSE IF( ICOND.GT.0 ) THEN +* +* STRSEN failed to reorder or to restore standard Schur form +* + INFO = ICOND + N + END IF + END IF +* + IF( WANTVS ) THEN +* +* Undo balancing +* (RWorkspace: need N) +* + CALL SGEBAK( 'P', 'R', N, ILO, IHI, WORK( IBAL ), N, VS, LDVS, + $ IERR ) + END IF +* + IF( SCALEA ) THEN +* +* Undo scaling for the Schur form of A +* + CALL SLASCL( 'H', 0, 0, CSCALE, ANRM, N, N, A, LDA, IERR ) + CALL SCOPY( N, A, LDA+1, WR, 1 ) + IF( ( WANTSV .OR. WANTSB ) .AND. INFO.EQ.0 ) THEN + DUM( 1 ) = RCONDV + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, 1, 1, DUM, 1, IERR ) + RCONDV = DUM( 1 ) + END IF + IF( CSCALE.EQ.SMLNUM ) THEN +* +* If scaling back towards underflow, adjust WI if an +* offdiagonal element of a 2-by-2 block in the Schur form +* underflows. +* + IF( IEVAL.GT.0 ) THEN + I1 = IEVAL + 1 + I2 = IHI - 1 + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, WI, N, + $ IERR ) + ELSE IF( WANTST ) THEN + I1 = 1 + I2 = N - 1 + ELSE + I1 = ILO + I2 = IHI - 1 + END IF + INXT = I1 - 1 + DO 20 I = I1, I2 + IF( I.LT.INXT ) + $ GO TO 20 + IF( WI( I ).EQ.ZERO ) THEN + INXT = I + 1 + ELSE + IF( A( I+1, I ).EQ.ZERO ) THEN + WI( I ) = ZERO + WI( I+1 ) = ZERO + ELSE IF( A( I+1, I ).NE.ZERO .AND. A( I, I+1 ).EQ. + $ ZERO ) THEN + WI( I ) = ZERO + WI( I+1 ) = ZERO + IF( I.GT.1 ) + $ CALL SSWAP( I-1, A( 1, I ), 1, A( 1, I+1 ), 1 ) + IF( N.GT.I+1 ) + $ CALL SSWAP( N-I-1, A( I, I+2 ), LDA, + $ A( I+1, I+2 ), LDA ) + CALL SSWAP( N, VS( 1, I ), 1, VS( 1, I+1 ), 1 ) + A( I, I+1 ) = A( I+1, I ) + A( I+1, I ) = ZERO + END IF + INXT = I + 2 + END IF + 20 CONTINUE + END IF + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N-IEVAL, 1, + $ WI( IEVAL+1 ), MAX( N-IEVAL, 1 ), IERR ) + END IF +* + IF( WANTST .AND. INFO.EQ.0 ) THEN +* +* Check if reordering successful +* + LASTSL = .TRUE. + LST2SL = .TRUE. + SDIM = 0 + IP = 0 + DO 30 I = 1, N + CURSL = SELECT( WR( I ), WI( I ) ) + IF( WI( I ).EQ.ZERO ) THEN + IF( CURSL ) + $ SDIM = SDIM + 1 + IP = 0 + IF( CURSL .AND. .NOT.LASTSL ) + $ INFO = N + 2 + ELSE + IF( IP.EQ.1 ) THEN +* +* Last eigenvalue of conjugate pair +* + CURSL = CURSL .OR. LASTSL + LASTSL = CURSL + IF( CURSL ) + $ SDIM = SDIM + 2 + IP = -1 + IF( CURSL .AND. .NOT.LST2SL ) + $ INFO = N + 2 + ELSE +* +* First eigenvalue of conjugate pair +* + IP = 1 + END IF + END IF + LST2SL = LASTSL + LASTSL = CURSL + 30 CONTINUE + END IF +* + WORK( 1 ) = MAXWRK + IF( WANTSV .OR. WANTSB ) THEN + IWORK( 1 ) = SDIM*(N-SDIM) + ELSE + IWORK( 1 ) = 1 + END IF +* + RETURN +* +* End of SGEESX +* + END diff --git a/costa/native/external/lapack/sgeev.f b/costa/native/external/lapack/sgeev.f new file mode 100644 index 000000000..203402ece --- /dev/null +++ b/costa/native/external/lapack/sgeev.f @@ -0,0 +1,410 @@ + SUBROUTINE SGEEV( JOBVL, JOBVR, N, A, LDA, WR, WI, VL, LDVL, VR, + $ LDVR, WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* December 8, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBVL, JOBVR + INTEGER INFO, LDA, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), VL( LDVL, * ), VR( LDVR, * ), + $ WI( * ), WORK( * ), WR( * ) +* .. +* +* Purpose +* ======= +* +* SGEEV computes for an N-by-N real nonsymmetric matrix A, the +* eigenvalues and, optionally, the left and/or right eigenvectors. +* +* The right eigenvector v(j) of A satisfies +* A * v(j) = lambda(j) * v(j) +* where lambda(j) is its eigenvalue. +* The left eigenvector u(j) of A satisfies +* u(j)**H * A = lambda(j) * u(j)**H +* where u(j)**H denotes the conjugate transpose of u(j). +* +* The computed eigenvectors are normalized to have Euclidean norm +* equal to 1 and largest component real. +* +* Arguments +* ========= +* +* JOBVL (input) CHARACTER*1 +* = 'N': left eigenvectors of A are not computed; +* = 'V': left eigenvectors of A are computed. +* +* JOBVR (input) CHARACTER*1 +* = 'N': right eigenvectors of A are not computed; +* = 'V': right eigenvectors of A are computed. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the N-by-N matrix A. +* On exit, A has been overwritten. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* WR (output) REAL array, dimension (N) +* WI (output) REAL array, dimension (N) +* WR and WI contain the real and imaginary parts, +* respectively, of the computed eigenvalues. Complex +* conjugate pairs of eigenvalues appear consecutively +* with the eigenvalue having the positive imaginary part +* first. +* +* VL (output) REAL array, dimension (LDVL,N) +* If JOBVL = 'V', the left eigenvectors u(j) are stored one +* after another in the columns of VL, in the same order +* as their eigenvalues. +* If JOBVL = 'N', VL is not referenced. +* If the j-th eigenvalue is real, then u(j) = VL(:,j), +* the j-th column of VL. +* If the j-th and (j+1)-st eigenvalues form a complex +* conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and +* u(j+1) = VL(:,j) - i*VL(:,j+1). +* +* LDVL (input) INTEGER +* The leading dimension of the array VL. LDVL >= 1; if +* JOBVL = 'V', LDVL >= N. +* +* VR (output) REAL array, dimension (LDVR,N) +* If JOBVR = 'V', the right eigenvectors v(j) are stored one +* after another in the columns of VR, in the same order +* as their eigenvalues. +* If JOBVR = 'N', VR is not referenced. +* If the j-th eigenvalue is real, then v(j) = VR(:,j), +* the j-th column of VR. +* If the j-th and (j+1)-st eigenvalues form a complex +* conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and +* v(j+1) = VR(:,j) - i*VR(:,j+1). +* +* LDVR (input) INTEGER +* The leading dimension of the array VR. LDVR >= 1; if +* JOBVR = 'V', LDVR >= N. +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,3*N), and +* if JOBVL = 'V' or JOBVR = 'V', LWORK >= 4*N. For good +* performance, LWORK must generally be larger. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, the QR algorithm failed to compute all the +* eigenvalues, and no eigenvectors have been computed; +* elements i+1:N of WR and WI contain eigenvalues which +* have converged. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, SCALEA, WANTVL, WANTVR + CHARACTER SIDE + INTEGER HSWORK, I, IBAL, IERR, IHI, ILO, ITAU, IWRK, K, + $ MAXB, MAXWRK, MINWRK, NOUT + REAL ANRM, BIGNUM, CS, CSCALE, EPS, R, SCL, SMLNUM, + $ SN +* .. +* .. Local Arrays .. + LOGICAL SELECT( 1 ) + REAL DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL SGEBAK, SGEBAL, SGEHRD, SHSEQR, SLABAD, SLACPY, + $ SLARTG, SLASCL, SORGHR, SROT, SSCAL, STREVC, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV, ISAMAX + REAL SLAMCH, SLANGE, SLAPY2, SNRM2 + EXTERNAL LSAME, ILAENV, ISAMAX, SLAMCH, SLANGE, SLAPY2, + $ SNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + WANTVL = LSAME( JOBVL, 'V' ) + WANTVR = LSAME( JOBVR, 'V' ) + IF( ( .NOT.WANTVL ) .AND. ( .NOT.LSAME( JOBVL, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( ( .NOT.WANTVR ) .AND. ( .NOT.LSAME( JOBVR, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDVL.LT.1 .OR. ( WANTVL .AND. LDVL.LT.N ) ) THEN + INFO = -9 + ELSE IF( LDVR.LT.1 .OR. ( WANTVR .AND. LDVR.LT.N ) ) THEN + INFO = -11 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV. +* HSWORK refers to the workspace preferred by SHSEQR, as +* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, +* the worst case.) +* + MINWRK = 1 + IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN + MAXWRK = 2*N + N*ILAENV( 1, 'SGEHRD', ' ', N, 1, N, 0 ) + IF( ( .NOT.WANTVL ) .AND. ( .NOT.WANTVR ) ) THEN + MINWRK = MAX( 1, 3*N ) + MAXB = MAX( ILAENV( 8, 'SHSEQR', 'EN', N, 1, N, -1 ), 2 ) + K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'SHSEQR', 'EN', N, 1, + $ N, -1 ) ) ) + HSWORK = MAX( K*( K+2 ), 2*N ) + MAXWRK = MAX( MAXWRK, N+1, N+HSWORK ) + ELSE + MINWRK = MAX( 1, 4*N ) + MAXWRK = MAX( MAXWRK, 2*N+( N-1 )* + $ ILAENV( 1, 'SORGHR', ' ', N, 1, N, -1 ) ) + MAXB = MAX( ILAENV( 8, 'SHSEQR', 'SV', N, 1, N, -1 ), 2 ) + K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'SHSEQR', 'SV', N, 1, + $ N, -1 ) ) ) + HSWORK = MAX( K*( K+2 ), 2*N ) + MAXWRK = MAX( MAXWRK, N+1, N+HSWORK ) + MAXWRK = MAX( MAXWRK, 4*N ) + END IF + WORK( 1 ) = MAXWRK + END IF + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEEV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = SLANGE( 'M', N, N, A, LDA, DUM ) + SCALEA = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + SCALEA = .TRUE. + CSCALE = SMLNUM + ELSE IF( ANRM.GT.BIGNUM ) THEN + SCALEA = .TRUE. + CSCALE = BIGNUM + END IF + IF( SCALEA ) + $ CALL SLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR ) +* +* Balance the matrix +* (Workspace: need N) +* + IBAL = 1 + CALL SGEBAL( 'B', N, A, LDA, ILO, IHI, WORK( IBAL ), IERR ) +* +* Reduce to upper Hessenberg form +* (Workspace: need 3*N, prefer 2*N+N*NB) +* + ITAU = IBAL + N + IWRK = ITAU + N + CALL SGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* + IF( WANTVL ) THEN +* +* Want left eigenvectors +* Copy Householder vectors to VL +* + SIDE = 'L' + CALL SLACPY( 'L', N, N, A, LDA, VL, LDVL ) +* +* Generate orthogonal matrix in VL +* (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* + CALL SORGHR( N, ILO, IHI, VL, LDVL, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* +* Perform QR iteration, accumulating Schur vectors in VL +* (Workspace: need N+1, prefer N+HSWORK (see comments) ) +* + IWRK = ITAU + CALL SHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, WR, WI, VL, LDVL, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) +* + IF( WANTVR ) THEN +* +* Want left and right eigenvectors +* Copy Schur vectors to VR +* + SIDE = 'B' + CALL SLACPY( 'F', N, N, VL, LDVL, VR, LDVR ) + END IF +* + ELSE IF( WANTVR ) THEN +* +* Want right eigenvectors +* Copy Householder vectors to VR +* + SIDE = 'R' + CALL SLACPY( 'L', N, N, A, LDA, VR, LDVR ) +* +* Generate orthogonal matrix in VR +* (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* + CALL SORGHR( N, ILO, IHI, VR, LDVR, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* +* Perform QR iteration, accumulating Schur vectors in VR +* (Workspace: need N+1, prefer N+HSWORK (see comments) ) +* + IWRK = ITAU + CALL SHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, WR, WI, VR, LDVR, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) +* + ELSE +* +* Compute eigenvalues only +* (Workspace: need N+1, prefer N+HSWORK (see comments) ) +* + IWRK = ITAU + CALL SHSEQR( 'E', 'N', N, ILO, IHI, A, LDA, WR, WI, VR, LDVR, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) + END IF +* +* If INFO > 0 from SHSEQR, then quit +* + IF( INFO.GT.0 ) + $ GO TO 50 +* + IF( WANTVL .OR. WANTVR ) THEN +* +* Compute left and/or right eigenvectors +* (Workspace: need 4*N) +* + CALL STREVC( SIDE, 'B', SELECT, N, A, LDA, VL, LDVL, VR, LDVR, + $ N, NOUT, WORK( IWRK ), IERR ) + END IF +* + IF( WANTVL ) THEN +* +* Undo balancing of left eigenvectors +* (Workspace: need N) +* + CALL SGEBAK( 'B', 'L', N, ILO, IHI, WORK( IBAL ), N, VL, LDVL, + $ IERR ) +* +* Normalize left eigenvectors and make largest component real +* + DO 20 I = 1, N + IF( WI( I ).EQ.ZERO ) THEN + SCL = ONE / SNRM2( N, VL( 1, I ), 1 ) + CALL SSCAL( N, SCL, VL( 1, I ), 1 ) + ELSE IF( WI( I ).GT.ZERO ) THEN + SCL = ONE / SLAPY2( SNRM2( N, VL( 1, I ), 1 ), + $ SNRM2( N, VL( 1, I+1 ), 1 ) ) + CALL SSCAL( N, SCL, VL( 1, I ), 1 ) + CALL SSCAL( N, SCL, VL( 1, I+1 ), 1 ) + DO 10 K = 1, N + WORK( IWRK+K-1 ) = VL( K, I )**2 + VL( K, I+1 )**2 + 10 CONTINUE + K = ISAMAX( N, WORK( IWRK ), 1 ) + CALL SLARTG( VL( K, I ), VL( K, I+1 ), CS, SN, R ) + CALL SROT( N, VL( 1, I ), 1, VL( 1, I+1 ), 1, CS, SN ) + VL( K, I+1 ) = ZERO + END IF + 20 CONTINUE + END IF +* + IF( WANTVR ) THEN +* +* Undo balancing of right eigenvectors +* (Workspace: need N) +* + CALL SGEBAK( 'B', 'R', N, ILO, IHI, WORK( IBAL ), N, VR, LDVR, + $ IERR ) +* +* Normalize right eigenvectors and make largest component real +* + DO 40 I = 1, N + IF( WI( I ).EQ.ZERO ) THEN + SCL = ONE / SNRM2( N, VR( 1, I ), 1 ) + CALL SSCAL( N, SCL, VR( 1, I ), 1 ) + ELSE IF( WI( I ).GT.ZERO ) THEN + SCL = ONE / SLAPY2( SNRM2( N, VR( 1, I ), 1 ), + $ SNRM2( N, VR( 1, I+1 ), 1 ) ) + CALL SSCAL( N, SCL, VR( 1, I ), 1 ) + CALL SSCAL( N, SCL, VR( 1, I+1 ), 1 ) + DO 30 K = 1, N + WORK( IWRK+K-1 ) = VR( K, I )**2 + VR( K, I+1 )**2 + 30 CONTINUE + K = ISAMAX( N, WORK( IWRK ), 1 ) + CALL SLARTG( VR( K, I ), VR( K, I+1 ), CS, SN, R ) + CALL SROT( N, VR( 1, I ), 1, VR( 1, I+1 ), 1, CS, SN ) + VR( K, I+1 ) = ZERO + END IF + 40 CONTINUE + END IF +* +* Undo scaling if necessary +* + 50 CONTINUE + IF( SCALEA ) THEN + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N-INFO, 1, WR( INFO+1 ), + $ MAX( N-INFO, 1 ), IERR ) + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N-INFO, 1, WI( INFO+1 ), + $ MAX( N-INFO, 1 ), IERR ) + IF( INFO.GT.0 ) THEN + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, WR, N, + $ IERR ) + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, WI, N, + $ IERR ) + END IF + END IF +* + WORK( 1 ) = MAXWRK + RETURN +* +* End of SGEEV +* + END diff --git a/costa/native/external/lapack/sgeevx.f b/costa/native/external/lapack/sgeevx.f new file mode 100644 index 000000000..ab5898c6e --- /dev/null +++ b/costa/native/external/lapack/sgeevx.f @@ -0,0 +1,543 @@ + SUBROUTINE SGEEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, WR, WI, + $ VL, LDVL, VR, LDVR, ILO, IHI, SCALE, ABNRM, + $ RCONDE, RCONDV, WORK, LWORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER BALANC, JOBVL, JOBVR, SENSE + INTEGER IHI, ILO, INFO, LDA, LDVL, LDVR, LWORK, N + REAL ABNRM +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), RCONDE( * ), RCONDV( * ), + $ SCALE( * ), VL( LDVL, * ), VR( LDVR, * ), + $ WI( * ), WORK( * ), WR( * ) +* .. +* +* Purpose +* ======= +* +* SGEEVX computes for an N-by-N real nonsymmetric matrix A, the +* eigenvalues and, optionally, the left and/or right eigenvectors. +* +* Optionally also, it computes a balancing transformation to improve +* the conditioning of the eigenvalues and eigenvectors (ILO, IHI, +* SCALE, and ABNRM), reciprocal condition numbers for the eigenvalues +* (RCONDE), and reciprocal condition numbers for the right +* eigenvectors (RCONDV). +* +* The right eigenvector v(j) of A satisfies +* A * v(j) = lambda(j) * v(j) +* where lambda(j) is its eigenvalue. +* The left eigenvector u(j) of A satisfies +* u(j)**H * A = lambda(j) * u(j)**H +* where u(j)**H denotes the conjugate transpose of u(j). +* +* The computed eigenvectors are normalized to have Euclidean norm +* equal to 1 and largest component real. +* +* Balancing a matrix means permuting the rows and columns to make it +* more nearly upper triangular, and applying a diagonal similarity +* transformation D * A * D**(-1), where D is a diagonal matrix, to +* make its rows and columns closer in norm and the condition numbers +* of its eigenvalues and eigenvectors smaller. The computed +* reciprocal condition numbers correspond to the balanced matrix. +* Permuting rows and columns will not change the condition numbers +* (in exact arithmetic) but diagonal scaling will. For further +* explanation of balancing, see section 4.10.2 of the LAPACK +* Users' Guide. +* +* Arguments +* ========= +* +* BALANC (input) CHARACTER*1 +* Indicates how the input matrix should be diagonally scaled +* and/or permuted to improve the conditioning of its +* eigenvalues. +* = 'N': Do not diagonally scale or permute; +* = 'P': Perform permutations to make the matrix more nearly +* upper triangular. Do not diagonally scale; +* = 'S': Diagonally scale the matrix, i.e. replace A by +* D*A*D**(-1), where D is a diagonal matrix chosen +* to make the rows and columns of A more equal in +* norm. Do not permute; +* = 'B': Both diagonally scale and permute A. +* +* Computed reciprocal condition numbers will be for the matrix +* after balancing and/or permuting. Permuting does not change +* condition numbers (in exact arithmetic), but balancing does. +* +* JOBVL (input) CHARACTER*1 +* = 'N': left eigenvectors of A are not computed; +* = 'V': left eigenvectors of A are computed. +* If SENSE = 'E' or 'B', JOBVL must = 'V'. +* +* JOBVR (input) CHARACTER*1 +* = 'N': right eigenvectors of A are not computed; +* = 'V': right eigenvectors of A are computed. +* If SENSE = 'E' or 'B', JOBVR must = 'V'. +* +* SENSE (input) CHARACTER*1 +* Determines which reciprocal condition numbers are computed. +* = 'N': None are computed; +* = 'E': Computed for eigenvalues only; +* = 'V': Computed for right eigenvectors only; +* = 'B': Computed for eigenvalues and right eigenvectors. +* +* If SENSE = 'E' or 'B', both left and right eigenvectors +* must also be computed (JOBVL = 'V' and JOBVR = 'V'). +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the N-by-N matrix A. +* On exit, A has been overwritten. If JOBVL = 'V' or +* JOBVR = 'V', A contains the real Schur form of the balanced +* version of the input matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* WR (output) REAL array, dimension (N) +* WI (output) REAL array, dimension (N) +* WR and WI contain the real and imaginary parts, +* respectively, of the computed eigenvalues. Complex +* conjugate pairs of eigenvalues will appear consecutively +* with the eigenvalue having the positive imaginary part +* first. +* +* VL (output) REAL array, dimension (LDVL,N) +* If JOBVL = 'V', the left eigenvectors u(j) are stored one +* after another in the columns of VL, in the same order +* as their eigenvalues. +* If JOBVL = 'N', VL is not referenced. +* If the j-th eigenvalue is real, then u(j) = VL(:,j), +* the j-th column of VL. +* If the j-th and (j+1)-st eigenvalues form a complex +* conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and +* u(j+1) = VL(:,j) - i*VL(:,j+1). +* +* LDVL (input) INTEGER +* The leading dimension of the array VL. LDVL >= 1; if +* JOBVL = 'V', LDVL >= N. +* +* VR (output) REAL array, dimension (LDVR,N) +* If JOBVR = 'V', the right eigenvectors v(j) are stored one +* after another in the columns of VR, in the same order +* as their eigenvalues. +* If JOBVR = 'N', VR is not referenced. +* If the j-th eigenvalue is real, then v(j) = VR(:,j), +* the j-th column of VR. +* If the j-th and (j+1)-st eigenvalues form a complex +* conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and +* v(j+1) = VR(:,j) - i*VR(:,j+1). +* +* LDVR (input) INTEGER +* The leading dimension of the array VR. LDVR >= 1, and if +* JOBVR = 'V', LDVR >= N. +* +* ILO,IHI (output) INTEGER +* ILO and IHI are integer values determined when A was +* balanced. The balanced A(i,j) = 0 if I > J and +* J = 1,...,ILO-1 or I = IHI+1,...,N. +* +* SCALE (output) REAL array, dimension (N) +* Details of the permutations and scaling factors applied +* when balancing A. If P(j) is the index of the row and column +* interchanged with row and column j, and D(j) is the scaling +* factor applied to row and column j, then +* SCALE(J) = P(J), for J = 1,...,ILO-1 +* = D(J), for J = ILO,...,IHI +* = P(J) for J = IHI+1,...,N. +* The order in which the interchanges are made is N to IHI+1, +* then 1 to ILO-1. +* +* ABNRM (output) REAL +* The one-norm of the balanced matrix (the maximum +* of the sum of absolute values of elements of any column). +* +* RCONDE (output) REAL array, dimension (N) +* RCONDE(j) is the reciprocal condition number of the j-th +* eigenvalue. +* +* RCONDV (output) REAL array, dimension (N) +* RCONDV(j) is the reciprocal condition number of the j-th +* right eigenvector. +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. If SENSE = 'N' or 'E', +* LWORK >= max(1,2*N), and if JOBVL = 'V' or JOBVR = 'V', +* LWORK >= 3*N. If SENSE = 'V' or 'B', LWORK >= N*(N+6). +* For good performance, LWORK must generally be larger. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace) INTEGER array, dimension (2*N-2) +* If SENSE = 'N' or 'E', not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, the QR algorithm failed to compute all the +* eigenvalues, and no eigenvectors or condition numbers +* have been computed; elements 1:ILO-1 and i+1:N of WR +* and WI contain eigenvalues which have converged. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, SCALEA, WANTVL, WANTVR, WNTSNB, WNTSNE, + $ WNTSNN, WNTSNV + CHARACTER JOB, SIDE + INTEGER HSWORK, I, ICOND, IERR, ITAU, IWRK, K, MAXB, + $ MAXWRK, MINWRK, NOUT + REAL ANRM, BIGNUM, CS, CSCALE, EPS, R, SCL, SMLNUM, + $ SN +* .. +* .. Local Arrays .. + LOGICAL SELECT( 1 ) + REAL DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL SGEBAK, SGEBAL, SGEHRD, SHSEQR, SLABAD, SLACPY, + $ SLARTG, SLASCL, SORGHR, SROT, SSCAL, STREVC, + $ STRSNA, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV, ISAMAX + REAL SLAMCH, SLANGE, SLAPY2, SNRM2 + EXTERNAL LSAME, ILAENV, ISAMAX, SLAMCH, SLANGE, SLAPY2, + $ SNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + WANTVL = LSAME( JOBVL, 'V' ) + WANTVR = LSAME( JOBVR, 'V' ) + WNTSNN = LSAME( SENSE, 'N' ) + WNTSNE = LSAME( SENSE, 'E' ) + WNTSNV = LSAME( SENSE, 'V' ) + WNTSNB = LSAME( SENSE, 'B' ) + IF( .NOT.( LSAME( BALANC, 'N' ) .OR. LSAME( BALANC, 'S' ) .OR. + $ LSAME( BALANC, 'P' ) .OR. LSAME( BALANC, 'B' ) ) ) THEN + INFO = -1 + ELSE IF( ( .NOT.WANTVL ) .AND. ( .NOT.LSAME( JOBVL, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( ( .NOT.WANTVR ) .AND. ( .NOT.LSAME( JOBVR, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( .NOT.( WNTSNN .OR. WNTSNE .OR. WNTSNB .OR. WNTSNV ) .OR. + $ ( ( WNTSNE .OR. WNTSNB ) .AND. .NOT.( WANTVL .AND. + $ WANTVR ) ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVL.LT.1 .OR. ( WANTVL .AND. LDVL.LT.N ) ) THEN + INFO = -11 + ELSE IF( LDVR.LT.1 .OR. ( WANTVR .AND. LDVR.LT.N ) ) THEN + INFO = -13 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV. +* HSWORK refers to the workspace preferred by SHSEQR, as +* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, +* the worst case.) +* + MINWRK = 1 + IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN + MAXWRK = N + N*ILAENV( 1, 'SGEHRD', ' ', N, 1, N, 0 ) + IF( ( .NOT.WANTVL ) .AND. ( .NOT.WANTVR ) ) THEN + MINWRK = MAX( 1, 2*N ) + IF( .NOT.WNTSNN ) + $ MINWRK = MAX( MINWRK, N*N+6*N ) + MAXB = MAX( ILAENV( 8, 'SHSEQR', 'SN', N, 1, N, -1 ), 2 ) + IF( WNTSNN ) THEN + K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'SHSEQR', 'EN', N, + $ 1, N, -1 ) ) ) + ELSE + K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'SHSEQR', 'SN', N, + $ 1, N, -1 ) ) ) + END IF + HSWORK = MAX( K*( K+2 ), 2*N ) + MAXWRK = MAX( MAXWRK, 1, HSWORK ) + IF( .NOT.WNTSNN ) + $ MAXWRK = MAX( MAXWRK, N*N+6*N ) + ELSE + MINWRK = MAX( 1, 3*N ) + IF( ( .NOT.WNTSNN ) .AND. ( .NOT.WNTSNE ) ) + $ MINWRK = MAX( MINWRK, N*N+6*N ) + MAXB = MAX( ILAENV( 8, 'SHSEQR', 'SN', N, 1, N, -1 ), 2 ) + K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'SHSEQR', 'EN', N, 1, + $ N, -1 ) ) ) + HSWORK = MAX( K*( K+2 ), 2*N ) + MAXWRK = MAX( MAXWRK, 1, HSWORK ) + MAXWRK = MAX( MAXWRK, N+( N-1 )* + $ ILAENV( 1, 'SORGHR', ' ', N, 1, N, -1 ) ) + IF( ( .NOT.WNTSNN ) .AND. ( .NOT.WNTSNE ) ) + $ MAXWRK = MAX( MAXWRK, N*N+6*N ) + MAXWRK = MAX( MAXWRK, 3*N, 1 ) + END IF + WORK( 1 ) = MAXWRK + END IF + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -21 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEEVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ICOND = 0 + ANRM = SLANGE( 'M', N, N, A, LDA, DUM ) + SCALEA = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + SCALEA = .TRUE. + CSCALE = SMLNUM + ELSE IF( ANRM.GT.BIGNUM ) THEN + SCALEA = .TRUE. + CSCALE = BIGNUM + END IF + IF( SCALEA ) + $ CALL SLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR ) +* +* Balance the matrix and compute ABNRM +* + CALL SGEBAL( BALANC, N, A, LDA, ILO, IHI, SCALE, IERR ) + ABNRM = SLANGE( '1', N, N, A, LDA, DUM ) + IF( SCALEA ) THEN + DUM( 1 ) = ABNRM + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, 1, 1, DUM, 1, IERR ) + ABNRM = DUM( 1 ) + END IF +* +* Reduce to upper Hessenberg form +* (Workspace: need 2*N, prefer N+N*NB) +* + ITAU = 1 + IWRK = ITAU + N + CALL SGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* + IF( WANTVL ) THEN +* +* Want left eigenvectors +* Copy Householder vectors to VL +* + SIDE = 'L' + CALL SLACPY( 'L', N, N, A, LDA, VL, LDVL ) +* +* Generate orthogonal matrix in VL +* (Workspace: need 2*N-1, prefer N+(N-1)*NB) +* + CALL SORGHR( N, ILO, IHI, VL, LDVL, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* +* Perform QR iteration, accumulating Schur vectors in VL +* (Workspace: need 1, prefer HSWORK (see comments) ) +* + IWRK = ITAU + CALL SHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, WR, WI, VL, LDVL, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) +* + IF( WANTVR ) THEN +* +* Want left and right eigenvectors +* Copy Schur vectors to VR +* + SIDE = 'B' + CALL SLACPY( 'F', N, N, VL, LDVL, VR, LDVR ) + END IF +* + ELSE IF( WANTVR ) THEN +* +* Want right eigenvectors +* Copy Householder vectors to VR +* + SIDE = 'R' + CALL SLACPY( 'L', N, N, A, LDA, VR, LDVR ) +* +* Generate orthogonal matrix in VR +* (Workspace: need 2*N-1, prefer N+(N-1)*NB) +* + CALL SORGHR( N, ILO, IHI, VR, LDVR, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* +* Perform QR iteration, accumulating Schur vectors in VR +* (Workspace: need 1, prefer HSWORK (see comments) ) +* + IWRK = ITAU + CALL SHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, WR, WI, VR, LDVR, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) +* + ELSE +* +* Compute eigenvalues only +* If condition numbers desired, compute Schur form +* + IF( WNTSNN ) THEN + JOB = 'E' + ELSE + JOB = 'S' + END IF +* +* (Workspace: need 1, prefer HSWORK (see comments) ) +* + IWRK = ITAU + CALL SHSEQR( JOB, 'N', N, ILO, IHI, A, LDA, WR, WI, VR, LDVR, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) + END IF +* +* If INFO > 0 from SHSEQR, then quit +* + IF( INFO.GT.0 ) + $ GO TO 50 +* + IF( WANTVL .OR. WANTVR ) THEN +* +* Compute left and/or right eigenvectors +* (Workspace: need 3*N) +* + CALL STREVC( SIDE, 'B', SELECT, N, A, LDA, VL, LDVL, VR, LDVR, + $ N, NOUT, WORK( IWRK ), IERR ) + END IF +* +* Compute condition numbers if desired +* (Workspace: need N*N+6*N unless SENSE = 'E') +* + IF( .NOT.WNTSNN ) THEN + CALL STRSNA( SENSE, 'A', SELECT, N, A, LDA, VL, LDVL, VR, LDVR, + $ RCONDE, RCONDV, N, NOUT, WORK( IWRK ), N, IWORK, + $ ICOND ) + END IF +* + IF( WANTVL ) THEN +* +* Undo balancing of left eigenvectors +* + CALL SGEBAK( BALANC, 'L', N, ILO, IHI, SCALE, N, VL, LDVL, + $ IERR ) +* +* Normalize left eigenvectors and make largest component real +* + DO 20 I = 1, N + IF( WI( I ).EQ.ZERO ) THEN + SCL = ONE / SNRM2( N, VL( 1, I ), 1 ) + CALL SSCAL( N, SCL, VL( 1, I ), 1 ) + ELSE IF( WI( I ).GT.ZERO ) THEN + SCL = ONE / SLAPY2( SNRM2( N, VL( 1, I ), 1 ), + $ SNRM2( N, VL( 1, I+1 ), 1 ) ) + CALL SSCAL( N, SCL, VL( 1, I ), 1 ) + CALL SSCAL( N, SCL, VL( 1, I+1 ), 1 ) + DO 10 K = 1, N + WORK( K ) = VL( K, I )**2 + VL( K, I+1 )**2 + 10 CONTINUE + K = ISAMAX( N, WORK, 1 ) + CALL SLARTG( VL( K, I ), VL( K, I+1 ), CS, SN, R ) + CALL SROT( N, VL( 1, I ), 1, VL( 1, I+1 ), 1, CS, SN ) + VL( K, I+1 ) = ZERO + END IF + 20 CONTINUE + END IF +* + IF( WANTVR ) THEN +* +* Undo balancing of right eigenvectors +* + CALL SGEBAK( BALANC, 'R', N, ILO, IHI, SCALE, N, VR, LDVR, + $ IERR ) +* +* Normalize right eigenvectors and make largest component real +* + DO 40 I = 1, N + IF( WI( I ).EQ.ZERO ) THEN + SCL = ONE / SNRM2( N, VR( 1, I ), 1 ) + CALL SSCAL( N, SCL, VR( 1, I ), 1 ) + ELSE IF( WI( I ).GT.ZERO ) THEN + SCL = ONE / SLAPY2( SNRM2( N, VR( 1, I ), 1 ), + $ SNRM2( N, VR( 1, I+1 ), 1 ) ) + CALL SSCAL( N, SCL, VR( 1, I ), 1 ) + CALL SSCAL( N, SCL, VR( 1, I+1 ), 1 ) + DO 30 K = 1, N + WORK( K ) = VR( K, I )**2 + VR( K, I+1 )**2 + 30 CONTINUE + K = ISAMAX( N, WORK, 1 ) + CALL SLARTG( VR( K, I ), VR( K, I+1 ), CS, SN, R ) + CALL SROT( N, VR( 1, I ), 1, VR( 1, I+1 ), 1, CS, SN ) + VR( K, I+1 ) = ZERO + END IF + 40 CONTINUE + END IF +* +* Undo scaling if necessary +* + 50 CONTINUE + IF( SCALEA ) THEN + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N-INFO, 1, WR( INFO+1 ), + $ MAX( N-INFO, 1 ), IERR ) + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N-INFO, 1, WI( INFO+1 ), + $ MAX( N-INFO, 1 ), IERR ) + IF( INFO.EQ.0 ) THEN + IF( ( WNTSNV .OR. WNTSNB ) .AND. ICOND.EQ.0 ) + $ CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, RCONDV, N, + $ IERR ) + ELSE + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, WR, N, + $ IERR ) + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, WI, N, + $ IERR ) + END IF + END IF +* + WORK( 1 ) = MAXWRK + RETURN +* +* End of SGEEVX +* + END diff --git a/costa/native/external/lapack/sgegs.f b/costa/native/external/lapack/sgegs.f new file mode 100644 index 000000000..c5f9237b2 --- /dev/null +++ b/costa/native/external/lapack/sgegs.f @@ -0,0 +1,470 @@ + SUBROUTINE SGEGS( JOBVSL, JOBVSR, N, A, LDA, B, LDB, ALPHAR, + $ ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR, WORK, + $ LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBVSL, JOBVSR + INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), ALPHAI( * ), ALPHAR( * ), + $ B( LDB, * ), BETA( * ), VSL( LDVSL, * ), + $ VSR( LDVSR, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* This routine is deprecated and has been replaced by routine SGGES. +* +* SGEGS computes for a pair of N-by-N real nonsymmetric matrices A, B: +* the generalized eigenvalues (alphar +/- alphai*i, beta), the real +* Schur form (A, B), and optionally left and/or right Schur vectors +* (VSL and VSR). +* +* (If only the generalized eigenvalues are needed, use the driver SGEGV +* instead.) +* +* A generalized eigenvalue for a pair of matrices (A,B) is, roughly +* speaking, a scalar w or a ratio alpha/beta = w, such that A - w*B +* is singular. It is usually represented as the pair (alpha,beta), +* as there is a reasonable interpretation for beta=0, and even for +* both being zero. A good beginning reference is the book, "Matrix +* Computations", by G. Golub & C. van Loan (Johns Hopkins U. Press) +* +* The (generalized) Schur form of a pair of matrices is the result of +* multiplying both matrices on the left by one orthogonal matrix and +* both on the right by another orthogonal matrix, these two orthogonal +* matrices being chosen so as to bring the pair of matrices into +* (real) Schur form. +* +* A pair of matrices A, B is in generalized real Schur form if B is +* upper triangular with non-negative diagonal and A is block upper +* triangular with 1-by-1 and 2-by-2 blocks. 1-by-1 blocks correspond +* to real generalized eigenvalues, while 2-by-2 blocks of A will be +* "standardized" by making the corresponding elements of B have the +* form: +* [ a 0 ] +* [ 0 b ] +* +* and the pair of corresponding 2-by-2 blocks in A and B will +* have a complex conjugate pair of generalized eigenvalues. +* +* The left and right Schur vectors are the columns of VSL and VSR, +* respectively, where VSL and VSR are the orthogonal matrices +* which reduce A and B to Schur form: +* +* Schur form of (A,B) = ( (VSL)**T A (VSR), (VSL)**T B (VSR) ) +* +* Arguments +* ========= +* +* JOBVSL (input) CHARACTER*1 +* = 'N': do not compute the left Schur vectors; +* = 'V': compute the left Schur vectors. +* +* JOBVSR (input) CHARACTER*1 +* = 'N': do not compute the right Schur vectors; +* = 'V': compute the right Schur vectors. +* +* N (input) INTEGER +* The order of the matrices A, B, VSL, and VSR. N >= 0. +* +* A (input/output) REAL array, dimension (LDA, N) +* On entry, the first of the pair of matrices whose generalized +* eigenvalues and (optionally) Schur vectors are to be +* computed. +* On exit, the generalized Schur form of A. +* Note: to avoid overflow, the Frobenius norm of the matrix +* A should be less than the overflow threshold. +* +* LDA (input) INTEGER +* The leading dimension of A. LDA >= max(1,N). +* +* B (input/output) REAL array, dimension (LDB, N) +* On entry, the second of the pair of matrices whose +* generalized eigenvalues and (optionally) Schur vectors are +* to be computed. +* On exit, the generalized Schur form of B. +* Note: to avoid overflow, the Frobenius norm of the matrix +* B should be less than the overflow threshold. +* +* LDB (input) INTEGER +* The leading dimension of B. LDB >= max(1,N). +* +* ALPHAR (output) REAL array, dimension (N) +* ALPHAI (output) REAL array, dimension (N) +* BETA (output) REAL array, dimension (N) +* On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will +* be the generalized eigenvalues. ALPHAR(j) + ALPHAI(j)*i, +* j=1,...,N and BETA(j),j=1,...,N are the diagonals of the +* complex Schur form (A,B) that would result if the 2-by-2 +* diagonal blocks of the real Schur form of (A,B) were further +* reduced to triangular form using 2-by-2 complex unitary +* transformations. If ALPHAI(j) is zero, then the j-th +* eigenvalue is real; if positive, then the j-th and (j+1)-st +* eigenvalues are a complex conjugate pair, with ALPHAI(j+1) +* negative. +* +* Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) +* may easily over- or underflow, and BETA(j) may even be zero. +* Thus, the user should avoid naively computing the ratio +* alpha/beta. However, ALPHAR and ALPHAI will be always less +* than and usually comparable with norm(A) in magnitude, and +* BETA always less than and usually comparable with norm(B). +* +* VSL (output) REAL array, dimension (LDVSL,N) +* If JOBVSL = 'V', VSL will contain the left Schur vectors. +* (See "Purpose", above.) +* Not referenced if JOBVSL = 'N'. +* +* LDVSL (input) INTEGER +* The leading dimension of the matrix VSL. LDVSL >=1, and +* if JOBVSL = 'V', LDVSL >= N. +* +* VSR (output) REAL array, dimension (LDVSR,N) +* If JOBVSR = 'V', VSR will contain the right Schur vectors. +* (See "Purpose", above.) +* Not referenced if JOBVSR = 'N'. +* +* LDVSR (input) INTEGER +* The leading dimension of the matrix VSR. LDVSR >= 1, and +* if JOBVSR = 'V', LDVSR >= N. +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,4*N). +* For good performance, LWORK must generally be larger. +* To compute the optimal value of LWORK, call ILAENV to get +* blocksizes (for SGEQRF, SORMQR, and SORGQR.) Then compute: +* NB -- MAX of the blocksizes for SGEQRF, SORMQR, and SORGQR +* The optimal LWORK is 2*N + N*(NB+1). +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* = 1,...,N: +* The QZ iteration failed. (A,B) are not in Schur +* form, but ALPHAR(j), ALPHAI(j), and BETA(j) should +* be correct for j=INFO+1,...,N. +* > N: errors that usually indicate LAPACK problems: +* =N+1: error return from SGGBAL +* =N+2: error return from SGEQRF +* =N+3: error return from SORMQR +* =N+4: error return from SORGQR +* =N+5: error return from SGGHRD +* =N+6: error return from SHGEQZ (other than failed +* iteration) +* =N+7: error return from SGGBAK (computing VSL) +* =N+8: error return from SGGBAK (computing VSR) +* =N+9: error return from SLASCL (various places) +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL ILASCL, ILBSCL, ILVSL, ILVSR, LQUERY + INTEGER ICOLS, IHI, IINFO, IJOBVL, IJOBVR, ILEFT, + $ ILO, IRIGHT, IROWS, ITAU, IWORK, LOPT, LWKMIN, + $ LWKOPT, NB, NB1, NB2, NB3 + REAL ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, + $ SAFMIN, SMLNUM +* .. +* .. External Subroutines .. + EXTERNAL SGEQRF, SGGBAK, SGGBAL, SGGHRD, SHGEQZ, SLACPY, + $ SLASCL, SLASET, SORGQR, SORMQR, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANGE + EXTERNAL ILAENV, LSAME, SLAMCH, SLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVSL, 'N' ) ) THEN + IJOBVL = 1 + ILVSL = .FALSE. + ELSE IF( LSAME( JOBVSL, 'V' ) ) THEN + IJOBVL = 2 + ILVSL = .TRUE. + ELSE + IJOBVL = -1 + ILVSL = .FALSE. + END IF +* + IF( LSAME( JOBVSR, 'N' ) ) THEN + IJOBVR = 1 + ILVSR = .FALSE. + ELSE IF( LSAME( JOBVSR, 'V' ) ) THEN + IJOBVR = 2 + ILVSR = .TRUE. + ELSE + IJOBVR = -1 + ILVSR = .FALSE. + END IF +* +* Test the input arguments +* + LWKMIN = MAX( 4*N, 1 ) + LWKOPT = LWKMIN + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + INFO = 0 + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVSL.LT.1 .OR. ( ILVSL .AND. LDVSL.LT.N ) ) THEN + INFO = -12 + ELSE IF( LDVSR.LT.1 .OR. ( ILVSR .AND. LDVSR.LT.N ) ) THEN + INFO = -14 + ELSE IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN + INFO = -16 + END IF +* + IF( INFO.EQ.0 ) THEN + NB1 = ILAENV( 1, 'SGEQRF', ' ', N, N, -1, -1 ) + NB2 = ILAENV( 1, 'SORMQR', ' ', N, N, N, -1 ) + NB3 = ILAENV( 1, 'SORGQR', ' ', N, N, N, -1 ) + NB = MAX( NB1, NB2, NB3 ) + LOPT = 2*N+N*(NB+1) + WORK( 1 ) = LOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEGS ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = SLAMCH( 'E' )*SLAMCH( 'B' ) + SAFMIN = SLAMCH( 'S' ) + SMLNUM = N*SAFMIN / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = SLANGE( 'M', N, N, A, LDA, WORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF +* + IF( ILASCL ) THEN + CALL SLASCL( 'G', -1, -1, ANRM, ANRMTO, N, N, A, LDA, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + END IF +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = SLANGE( 'M', N, N, B, LDB, WORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF +* + IF( ILBSCL ) THEN + CALL SLASCL( 'G', -1, -1, BNRM, BNRMTO, N, N, B, LDB, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + END IF +* +* Permute the matrix to make it more nearly triangular +* Workspace layout: (2*N words -- "work..." not actually used) +* left_permutation, right_permutation, work... +* + ILEFT = 1 + IRIGHT = N + 1 + IWORK = IRIGHT + N + CALL SGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), WORK( IWORK ), IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 1 + GO TO 10 + END IF +* +* Reduce B to triangular form, and initialize VSL and/or VSR +* Workspace layout: ("work..." must have at least N words) +* left_permutation, right_permutation, tau, work... +* + IROWS = IHI + 1 - ILO + ICOLS = N + 1 - ILO + ITAU = IWORK + IWORK = ITAU + IROWS + CALL SGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWORK ), LWORK+1-IWORK, IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 2 + GO TO 10 + END IF +* + CALL SORMQR( 'L', 'T', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWORK ), + $ LWORK+1-IWORK, IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 3 + GO TO 10 + END IF +* + IF( ILVSL ) THEN + CALL SLASET( 'Full', N, N, ZERO, ONE, VSL, LDVSL ) + CALL SLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VSL( ILO+1, ILO ), LDVSL ) + CALL SORGQR( IROWS, IROWS, IROWS, VSL( ILO, ILO ), LDVSL, + $ WORK( ITAU ), WORK( IWORK ), LWORK+1-IWORK, + $ IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 4 + GO TO 10 + END IF + END IF +* + IF( ILVSR ) + $ CALL SLASET( 'Full', N, N, ZERO, ONE, VSR, LDVSR ) +* +* Reduce to generalized Hessenberg form +* + CALL SGGHRD( JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, VSL, + $ LDVSL, VSR, LDVSR, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 5 + GO TO 10 + END IF +* +* Perform QZ algorithm, computing Schur vectors if desired +* Workspace layout: ("work..." must have at least 1 word) +* left_permutation, right_permutation, work... +* + IWORK = ITAU + CALL SHGEQZ( 'S', JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR, + $ WORK( IWORK ), LWORK+1-IWORK, IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + IF( IINFO.GT.0 .AND. IINFO.LE.N ) THEN + INFO = IINFO + ELSE IF( IINFO.GT.N .AND. IINFO.LE.2*N ) THEN + INFO = IINFO - N + ELSE + INFO = N + 6 + END IF + GO TO 10 + END IF +* +* Apply permutation to VSL and VSR +* + IF( ILVSL ) THEN + CALL SGGBAK( 'P', 'L', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VSL, LDVSL, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 7 + GO TO 10 + END IF + END IF + IF( ILVSR ) THEN + CALL SGGBAK( 'P', 'R', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VSR, LDVSR, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 8 + GO TO 10 + END IF + END IF +* +* Undo scaling +* + IF( ILASCL ) THEN + CALL SLASCL( 'H', -1, -1, ANRMTO, ANRM, N, N, A, LDA, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + CALL SLASCL( 'G', -1, -1, ANRMTO, ANRM, N, 1, ALPHAR, N, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + CALL SLASCL( 'G', -1, -1, ANRMTO, ANRM, N, 1, ALPHAI, N, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + END IF +* + IF( ILBSCL ) THEN + CALL SLASCL( 'U', -1, -1, BNRMTO, BNRM, N, N, B, LDB, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + CALL SLASCL( 'G', -1, -1, BNRMTO, BNRM, N, 1, BETA, N, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + END IF +* + 10 CONTINUE + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of SGEGS +* + END diff --git a/costa/native/external/lapack/sgegv.f b/costa/native/external/lapack/sgegv.f new file mode 100644 index 000000000..ab5a4953a --- /dev/null +++ b/costa/native/external/lapack/sgegv.f @@ -0,0 +1,641 @@ + SUBROUTINE SGEGV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHAR, ALPHAI, + $ BETA, VL, LDVL, VR, LDVR, WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBVL, JOBVR + INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), ALPHAI( * ), ALPHAR( * ), + $ B( LDB, * ), BETA( * ), VL( LDVL, * ), + $ VR( LDVR, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* This routine is deprecated and has been replaced by routine SGGEV. +* +* SGEGV computes for a pair of n-by-n real nonsymmetric matrices A and +* B, the generalized eigenvalues (alphar +/- alphai*i, beta), and +* optionally, the left and/or right generalized eigenvectors (VL and +* VR). +* +* A generalized eigenvalue for a pair of matrices (A,B) is, roughly +* speaking, a scalar w or a ratio alpha/beta = w, such that A - w*B +* is singular. It is usually represented as the pair (alpha,beta), +* as there is a reasonable interpretation for beta=0, and even for +* both being zero. A good beginning reference is the book, "Matrix +* Computations", by G. Golub & C. van Loan (Johns Hopkins U. Press) +* +* A right generalized eigenvector corresponding to a generalized +* eigenvalue w for a pair of matrices (A,B) is a vector r such +* that (A - w B) r = 0 . A left generalized eigenvector is a vector +* l such that l**H * (A - w B) = 0, where l**H is the +* conjugate-transpose of l. +* +* Note: this routine performs "full balancing" on A and B -- see +* "Further Details", below. +* +* Arguments +* ========= +* +* JOBVL (input) CHARACTER*1 +* = 'N': do not compute the left generalized eigenvectors; +* = 'V': compute the left generalized eigenvectors. +* +* JOBVR (input) CHARACTER*1 +* = 'N': do not compute the right generalized eigenvectors; +* = 'V': compute the right generalized eigenvectors. +* +* N (input) INTEGER +* The order of the matrices A, B, VL, and VR. N >= 0. +* +* A (input/output) REAL array, dimension (LDA, N) +* On entry, the first of the pair of matrices whose +* generalized eigenvalues and (optionally) generalized +* eigenvectors are to be computed. +* On exit, the contents will have been destroyed. (For a +* description of the contents of A on exit, see "Further +* Details", below.) +* +* LDA (input) INTEGER +* The leading dimension of A. LDA >= max(1,N). +* +* B (input/output) REAL array, dimension (LDB, N) +* On entry, the second of the pair of matrices whose +* generalized eigenvalues and (optionally) generalized +* eigenvectors are to be computed. +* On exit, the contents will have been destroyed. (For a +* description of the contents of B on exit, see "Further +* Details", below.) +* +* LDB (input) INTEGER +* The leading dimension of B. LDB >= max(1,N). +* +* ALPHAR (output) REAL array, dimension (N) +* ALPHAI (output) REAL array, dimension (N) +* BETA (output) REAL array, dimension (N) +* On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will +* be the generalized eigenvalues. If ALPHAI(j) is zero, then +* the j-th eigenvalue is real; if positive, then the j-th and +* (j+1)-st eigenvalues are a complex conjugate pair, with +* ALPHAI(j+1) negative. +* +* Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) +* may easily over- or underflow, and BETA(j) may even be zero. +* Thus, the user should avoid naively computing the ratio +* alpha/beta. However, ALPHAR and ALPHAI will be always less +* than and usually comparable with norm(A) in magnitude, and +* BETA always less than and usually comparable with norm(B). +* +* VL (output) REAL array, dimension (LDVL,N) +* If JOBVL = 'V', the left generalized eigenvectors. (See +* "Purpose", above.) Real eigenvectors take one column, +* complex take two columns, the first for the real part and +* the second for the imaginary part. Complex eigenvectors +* correspond to an eigenvalue with positive imaginary part. +* Each eigenvector will be scaled so the largest component +* will have abs(real part) + abs(imag. part) = 1, *except* +* that for eigenvalues with alpha=beta=0, a zero vector will +* be returned as the corresponding eigenvector. +* Not referenced if JOBVL = 'N'. +* +* LDVL (input) INTEGER +* The leading dimension of the matrix VL. LDVL >= 1, and +* if JOBVL = 'V', LDVL >= N. +* +* VR (output) REAL array, dimension (LDVR,N) +* If JOBVR = 'V', the right generalized eigenvectors. (See +* "Purpose", above.) Real eigenvectors take one column, +* complex take two columns, the first for the real part and +* the second for the imaginary part. Complex eigenvectors +* correspond to an eigenvalue with positive imaginary part. +* Each eigenvector will be scaled so the largest component +* will have abs(real part) + abs(imag. part) = 1, *except* +* that for eigenvalues with alpha=beta=0, a zero vector will +* be returned as the corresponding eigenvector. +* Not referenced if JOBVR = 'N'. +* +* LDVR (input) INTEGER +* The leading dimension of the matrix VR. LDVR >= 1, and +* if JOBVR = 'V', LDVR >= N. +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,8*N). +* For good performance, LWORK must generally be larger. +* To compute the optimal value of LWORK, call ILAENV to get +* blocksizes (for SGEQRF, SORMQR, and SORGQR.) Then compute: +* NB -- MAX of the blocksizes for SGEQRF, SORMQR, and SORGQR; +* The optimal LWORK is: +* 2*N + MAX( 6*N, N*(NB+1) ). +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* = 1,...,N: +* The QZ iteration failed. No eigenvectors have been +* calculated, but ALPHAR(j), ALPHAI(j), and BETA(j) +* should be correct for j=INFO+1,...,N. +* > N: errors that usually indicate LAPACK problems: +* =N+1: error return from SGGBAL +* =N+2: error return from SGEQRF +* =N+3: error return from SORMQR +* =N+4: error return from SORGQR +* =N+5: error return from SGGHRD +* =N+6: error return from SHGEQZ (other than failed +* iteration) +* =N+7: error return from STGEVC +* =N+8: error return from SGGBAK (computing VL) +* =N+9: error return from SGGBAK (computing VR) +* =N+10: error return from SLASCL (various calls) +* +* Further Details +* =============== +* +* Balancing +* --------- +* +* This driver calls SGGBAL to both permute and scale rows and columns +* of A and B. The permutations PL and PR are chosen so that PL*A*PR +* and PL*B*R will be upper triangular except for the diagonal blocks +* A(i:j,i:j) and B(i:j,i:j), with i and j as close together as +* possible. The diagonal scaling matrices DL and DR are chosen so +* that the pair DL*PL*A*PR*DR, DL*PL*B*PR*DR have elements close to +* one (except for the elements that start out zero.) +* +* After the eigenvalues and eigenvectors of the balanced matrices +* have been computed, SGGBAK transforms the eigenvectors back to what +* they would have been (in perfect arithmetic) if they had not been +* balanced. +* +* Contents of A and B on Exit +* -------- -- - --- - -- ---- +* +* If any eigenvectors are computed (either JOBVL='V' or JOBVR='V' or +* both), then on exit the arrays A and B will contain the real Schur +* form[*] of the "balanced" versions of A and B. If no eigenvectors +* are computed, then only the diagonal blocks will be correct. +* +* [*] See SHGEQZ, SGEGS, or read the book "Matrix Computations", +* by Golub & van Loan, pub. by Johns Hopkins U. Press. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL ILIMIT, ILV, ILVL, ILVR, LQUERY + CHARACTER CHTEMP + INTEGER ICOLS, IHI, IINFO, IJOBVL, IJOBVR, ILEFT, ILO, + $ IN, IRIGHT, IROWS, ITAU, IWORK, JC, JR, LOPT, + $ LWKMIN, LWKOPT, NB, NB1, NB2, NB3 + REAL ABSAI, ABSAR, ABSB, ANRM, ANRM1, ANRM2, BNRM, + $ BNRM1, BNRM2, EPS, ONEPLS, SAFMAX, SAFMIN, + $ SALFAI, SALFAR, SBETA, SCALE, TEMP +* .. +* .. Local Arrays .. + LOGICAL LDUMMA( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL SGEQRF, SGGBAK, SGGBAL, SGGHRD, SHGEQZ, SLACPY, + $ SLASCL, SLASET, SORGQR, SORMQR, STGEVC, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANGE + EXTERNAL ILAENV, LSAME, SLAMCH, SLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, MAX +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVL, 'N' ) ) THEN + IJOBVL = 1 + ILVL = .FALSE. + ELSE IF( LSAME( JOBVL, 'V' ) ) THEN + IJOBVL = 2 + ILVL = .TRUE. + ELSE + IJOBVL = -1 + ILVL = .FALSE. + END IF +* + IF( LSAME( JOBVR, 'N' ) ) THEN + IJOBVR = 1 + ILVR = .FALSE. + ELSE IF( LSAME( JOBVR, 'V' ) ) THEN + IJOBVR = 2 + ILVR = .TRUE. + ELSE + IJOBVR = -1 + ILVR = .FALSE. + END IF + ILV = ILVL .OR. ILVR +* +* Test the input arguments +* + LWKMIN = MAX( 8*N, 1 ) + LWKOPT = LWKMIN + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + INFO = 0 + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVL.LT.1 .OR. ( ILVL .AND. LDVL.LT.N ) ) THEN + INFO = -12 + ELSE IF( LDVR.LT.1 .OR. ( ILVR .AND. LDVR.LT.N ) ) THEN + INFO = -14 + ELSE IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN + INFO = -16 + END IF +* + IF( INFO.EQ.0 ) THEN + NB1 = ILAENV( 1, 'SGEQRF', ' ', N, N, -1, -1 ) + NB2 = ILAENV( 1, 'SORMQR', ' ', N, N, N, -1 ) + NB3 = ILAENV( 1, 'SORGQR', ' ', N, N, N, -1 ) + NB = MAX( NB1, NB2, NB3 ) + LOPT = 2*N + MAX( 6*N, N*(NB+1) ) + WORK( 1 ) = LOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEGV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = SLAMCH( 'E' )*SLAMCH( 'B' ) + SAFMIN = SLAMCH( 'S' ) + SAFMIN = SAFMIN + SAFMIN + SAFMAX = ONE / SAFMIN + ONEPLS = ONE + ( 4*EPS ) +* +* Scale A +* + ANRM = SLANGE( 'M', N, N, A, LDA, WORK ) + ANRM1 = ANRM + ANRM2 = ONE + IF( ANRM.LT.ONE ) THEN + IF( SAFMAX*ANRM.LT.ONE ) THEN + ANRM1 = SAFMIN + ANRM2 = SAFMAX*ANRM + END IF + END IF +* + IF( ANRM.GT.ZERO ) THEN + CALL SLASCL( 'G', -1, -1, ANRM, ONE, N, N, A, LDA, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 10 + RETURN + END IF + END IF +* +* Scale B +* + BNRM = SLANGE( 'M', N, N, B, LDB, WORK ) + BNRM1 = BNRM + BNRM2 = ONE + IF( BNRM.LT.ONE ) THEN + IF( SAFMAX*BNRM.LT.ONE ) THEN + BNRM1 = SAFMIN + BNRM2 = SAFMAX*BNRM + END IF + END IF +* + IF( BNRM.GT.ZERO ) THEN + CALL SLASCL( 'G', -1, -1, BNRM, ONE, N, N, B, LDB, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 10 + RETURN + END IF + END IF +* +* Permute the matrix to make it more nearly triangular +* Workspace layout: (8*N words -- "work" requires 6*N words) +* left_permutation, right_permutation, work... +* + ILEFT = 1 + IRIGHT = N + 1 + IWORK = IRIGHT + N + CALL SGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), WORK( IWORK ), IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 1 + GO TO 120 + END IF +* +* Reduce B to triangular form, and initialize VL and/or VR +* Workspace layout: ("work..." must have at least N words) +* left_permutation, right_permutation, tau, work... +* + IROWS = IHI + 1 - ILO + IF( ILV ) THEN + ICOLS = N + 1 - ILO + ELSE + ICOLS = IROWS + END IF + ITAU = IWORK + IWORK = ITAU + IROWS + CALL SGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWORK ), LWORK+1-IWORK, IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 2 + GO TO 120 + END IF +* + CALL SORMQR( 'L', 'T', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWORK ), + $ LWORK+1-IWORK, IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 3 + GO TO 120 + END IF +* + IF( ILVL ) THEN + CALL SLASET( 'Full', N, N, ZERO, ONE, VL, LDVL ) + CALL SLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VL( ILO+1, ILO ), LDVL ) + CALL SORGQR( IROWS, IROWS, IROWS, VL( ILO, ILO ), LDVL, + $ WORK( ITAU ), WORK( IWORK ), LWORK+1-IWORK, + $ IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 4 + GO TO 120 + END IF + END IF +* + IF( ILVR ) + $ CALL SLASET( 'Full', N, N, ZERO, ONE, VR, LDVR ) +* +* Reduce to generalized Hessenberg form +* + IF( ILV ) THEN +* +* Eigenvectors requested -- work on whole matrix. +* + CALL SGGHRD( JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, IINFO ) + ELSE + CALL SGGHRD( 'N', 'N', IROWS, 1, IROWS, A( ILO, ILO ), LDA, + $ B( ILO, ILO ), LDB, VL, LDVL, VR, LDVR, IINFO ) + END IF + IF( IINFO.NE.0 ) THEN + INFO = N + 5 + GO TO 120 + END IF +* +* Perform QZ algorithm +* Workspace layout: ("work..." must have at least 1 word) +* left_permutation, right_permutation, work... +* + IWORK = ITAU + IF( ILV ) THEN + CHTEMP = 'S' + ELSE + CHTEMP = 'E' + END IF + CALL SHGEQZ( CHTEMP, JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VL, LDVL, VR, LDVR, + $ WORK( IWORK ), LWORK+1-IWORK, IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + IF( IINFO.GT.0 .AND. IINFO.LE.N ) THEN + INFO = IINFO + ELSE IF( IINFO.GT.N .AND. IINFO.LE.2*N ) THEN + INFO = IINFO - N + ELSE + INFO = N + 6 + END IF + GO TO 120 + END IF +* + IF( ILV ) THEN +* +* Compute Eigenvectors (STGEVC requires 6*N words of workspace) +* + IF( ILVL ) THEN + IF( ILVR ) THEN + CHTEMP = 'B' + ELSE + CHTEMP = 'L' + END IF + ELSE + CHTEMP = 'R' + END IF +* + CALL STGEVC( CHTEMP, 'B', LDUMMA, N, A, LDA, B, LDB, VL, LDVL, + $ VR, LDVR, N, IN, WORK( IWORK ), IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 7 + GO TO 120 + END IF +* +* Undo balancing on VL and VR, rescale +* + IF( ILVL ) THEN + CALL SGGBAK( 'P', 'L', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VL, LDVL, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 8 + GO TO 120 + END IF + DO 50 JC = 1, N + IF( ALPHAI( JC ).LT.ZERO ) + $ GO TO 50 + TEMP = ZERO + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 10 JR = 1, N + TEMP = MAX( TEMP, ABS( VL( JR, JC ) ) ) + 10 CONTINUE + ELSE + DO 20 JR = 1, N + TEMP = MAX( TEMP, ABS( VL( JR, JC ) )+ + $ ABS( VL( JR, JC+1 ) ) ) + 20 CONTINUE + END IF + IF( TEMP.LT.SAFMIN ) + $ GO TO 50 + TEMP = ONE / TEMP + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 30 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + 30 CONTINUE + ELSE + DO 40 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + VL( JR, JC+1 ) = VL( JR, JC+1 )*TEMP + 40 CONTINUE + END IF + 50 CONTINUE + END IF + IF( ILVR ) THEN + CALL SGGBAK( 'P', 'R', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VR, LDVR, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + GO TO 120 + END IF + DO 100 JC = 1, N + IF( ALPHAI( JC ).LT.ZERO ) + $ GO TO 100 + TEMP = ZERO + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 60 JR = 1, N + TEMP = MAX( TEMP, ABS( VR( JR, JC ) ) ) + 60 CONTINUE + ELSE + DO 70 JR = 1, N + TEMP = MAX( TEMP, ABS( VR( JR, JC ) )+ + $ ABS( VR( JR, JC+1 ) ) ) + 70 CONTINUE + END IF + IF( TEMP.LT.SAFMIN ) + $ GO TO 100 + TEMP = ONE / TEMP + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 80 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + 80 CONTINUE + ELSE + DO 90 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + VR( JR, JC+1 ) = VR( JR, JC+1 )*TEMP + 90 CONTINUE + END IF + 100 CONTINUE + END IF +* +* End of eigenvector calculation +* + END IF +* +* Undo scaling in alpha, beta +* +* Note: this does not give the alpha and beta for the unscaled +* problem. +* +* Un-scaling is limited to avoid underflow in alpha and beta +* if they are significant. +* + DO 110 JC = 1, N + ABSAR = ABS( ALPHAR( JC ) ) + ABSAI = ABS( ALPHAI( JC ) ) + ABSB = ABS( BETA( JC ) ) + SALFAR = ANRM*ALPHAR( JC ) + SALFAI = ANRM*ALPHAI( JC ) + SBETA = BNRM*BETA( JC ) + ILIMIT = .FALSE. + SCALE = ONE +* +* Check for significant underflow in ALPHAI +* + IF( ABS( SALFAI ).LT.SAFMIN .AND. ABSAI.GE. + $ MAX( SAFMIN, EPS*ABSAR, EPS*ABSB ) ) THEN + ILIMIT = .TRUE. + SCALE = ( ONEPLS*SAFMIN / ANRM1 ) / + $ MAX( ONEPLS*SAFMIN, ANRM2*ABSAI ) +* + ELSE IF( SALFAI.EQ.ZERO ) THEN +* +* If insignificant underflow in ALPHAI, then make the +* conjugate eigenvalue real. +* + IF( ALPHAI( JC ).LT.ZERO .AND. JC.GT.1 ) THEN + ALPHAI( JC-1 ) = ZERO + ELSE IF( ALPHAI( JC ).GT.ZERO .AND. JC.LT.N ) THEN + ALPHAI( JC+1 ) = ZERO + END IF + END IF +* +* Check for significant underflow in ALPHAR +* + IF( ABS( SALFAR ).LT.SAFMIN .AND. ABSAR.GE. + $ MAX( SAFMIN, EPS*ABSAI, EPS*ABSB ) ) THEN + ILIMIT = .TRUE. + SCALE = MAX( SCALE, ( ONEPLS*SAFMIN / ANRM1 ) / + $ MAX( ONEPLS*SAFMIN, ANRM2*ABSAR ) ) + END IF +* +* Check for significant underflow in BETA +* + IF( ABS( SBETA ).LT.SAFMIN .AND. ABSB.GE. + $ MAX( SAFMIN, EPS*ABSAR, EPS*ABSAI ) ) THEN + ILIMIT = .TRUE. + SCALE = MAX( SCALE, ( ONEPLS*SAFMIN / BNRM1 ) / + $ MAX( ONEPLS*SAFMIN, BNRM2*ABSB ) ) + END IF +* +* Check for possible overflow when limiting scaling +* + IF( ILIMIT ) THEN + TEMP = ( SCALE*SAFMIN )*MAX( ABS( SALFAR ), ABS( SALFAI ), + $ ABS( SBETA ) ) + IF( TEMP.GT.ONE ) + $ SCALE = SCALE / TEMP + IF( SCALE.LT.ONE ) + $ ILIMIT = .FALSE. + END IF +* +* Recompute un-scaled ALPHAR, ALPHAI, BETA if necessary. +* + IF( ILIMIT ) THEN + SALFAR = ( SCALE*ALPHAR( JC ) )*ANRM + SALFAI = ( SCALE*ALPHAI( JC ) )*ANRM + SBETA = ( SCALE*BETA( JC ) )*BNRM + END IF + ALPHAR( JC ) = SALFAR + ALPHAI( JC ) = SALFAI + BETA( JC ) = SBETA + 110 CONTINUE +* + 120 CONTINUE + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of SGEGV +* + END diff --git a/costa/native/external/lapack/sgehd2.f b/costa/native/external/lapack/sgehd2.f new file mode 100644 index 000000000..45c133e37 --- /dev/null +++ b/costa/native/external/lapack/sgehd2.f @@ -0,0 +1,150 @@ + SUBROUTINE SGEHD2( N, ILO, IHI, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + INTEGER IHI, ILO, INFO, LDA, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGEHD2 reduces a real general matrix A to upper Hessenberg form H by +* an orthogonal similarity transformation: Q' * A * Q = H . +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* It is assumed that A is already upper triangular in rows +* and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally +* set by a previous call to SGEBAL; otherwise they should be +* set to 1 and N respectively. See Further Details. +* 1 <= ILO <= IHI <= max(1,N). +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the n by n general matrix to be reduced. +* On exit, the upper triangle and the first subdiagonal of A +* are overwritten with the upper Hessenberg matrix H, and the +* elements below the first subdiagonal, with the array TAU, +* represent the orthogonal matrix Q as a product of elementary +* reflectors. See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* TAU (output) REAL array, dimension (N-1) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of (ihi-ilo) elementary +* reflectors +* +* Q = H(ilo) H(ilo+1) . . . H(ihi-1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on +* exit in A(i+2:ihi,i), and tau in TAU(i). +* +* The contents of A are illustrated by the following example, with +* n = 7, ilo = 2 and ihi = 6: +* +* on entry, on exit, +* +* ( a a a a a a a ) ( a a h h h h a ) +* ( a a a a a a ) ( a h h h h a ) +* ( a a a a a a ) ( h h h h h h ) +* ( a a a a a a ) ( v2 h h h h h ) +* ( a a a a a a ) ( v2 v3 h h h h ) +* ( a a a a a a ) ( v2 v3 v4 h h h ) +* ( a ) ( a ) +* +* where a denotes an element of the original matrix A, h denotes a +* modified element of the upper Hessenberg matrix H, and vi denotes an +* element of the vector defining H(i). +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I + REAL AII +* .. +* .. External Subroutines .. + EXTERNAL SLARF, SLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -2 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEHD2', -INFO ) + RETURN + END IF +* + DO 10 I = ILO, IHI - 1 +* +* Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) +* + CALL SLARFG( IHI-I, A( I+1, I ), A( MIN( I+2, N ), I ), 1, + $ TAU( I ) ) + AII = A( I+1, I ) + A( I+1, I ) = ONE +* +* Apply H(i) to A(1:ihi,i+1:ihi) from the right +* + CALL SLARF( 'Right', IHI, IHI-I, A( I+1, I ), 1, TAU( I ), + $ A( 1, I+1 ), LDA, WORK ) +* +* Apply H(i) to A(i+1:ihi,i+1:n) from the left +* + CALL SLARF( 'Left', IHI-I, N-I, A( I+1, I ), 1, TAU( I ), + $ A( I+1, I+1 ), LDA, WORK ) +* + A( I+1, I ) = AII + 10 CONTINUE +* + RETURN +* +* End of SGEHD2 +* + END diff --git a/costa/native/external/lapack/sgehrd.f b/costa/native/external/lapack/sgehrd.f new file mode 100644 index 000000000..9de584f5d --- /dev/null +++ b/costa/native/external/lapack/sgehrd.f @@ -0,0 +1,255 @@ + SUBROUTINE SGEHRD( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER IHI, ILO, INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGEHRD reduces a real general matrix A to upper Hessenberg form H by +* an orthogonal similarity transformation: Q' * A * Q = H . +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* It is assumed that A is already upper triangular in rows +* and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally +* set by a previous call to SGEBAL; otherwise they should be +* set to 1 and N respectively. See Further Details. +* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the N-by-N general matrix to be reduced. +* On exit, the upper triangle and the first subdiagonal of A +* are overwritten with the upper Hessenberg matrix H, and the +* elements below the first subdiagonal, with the array TAU, +* represent the orthogonal matrix Q as a product of elementary +* reflectors. See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* TAU (output) REAL array, dimension (N-1) +* The scalar factors of the elementary reflectors (see Further +* Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to +* zero. +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of the array WORK. LWORK >= max(1,N). +* For optimum performance LWORK >= N*NB, where NB is the +* optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of (ihi-ilo) elementary +* reflectors +* +* Q = H(ilo) H(ilo+1) . . . H(ihi-1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on +* exit in A(i+2:ihi,i), and tau in TAU(i). +* +* The contents of A are illustrated by the following example, with +* n = 7, ilo = 2 and ihi = 6: +* +* on entry, on exit, +* +* ( a a a a a a a ) ( a a h h h h a ) +* ( a a a a a a ) ( a h h h h a ) +* ( a a a a a a ) ( h h h h h h ) +* ( a a a a a a ) ( v2 h h h h h ) +* ( a a a a a a ) ( v2 v3 h h h h ) +* ( a a a a a a ) ( v2 v3 v4 h h h ) +* ( a ) ( a ) +* +* where a denotes an element of the original matrix A, h denotes a +* modified element of the upper Hessenberg matrix H, and vi denotes an +* element of the vector defining H(i). +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT + PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, LDWORK, LWKOPT, NB, NBMIN, + $ NH, NX + REAL EI +* .. +* .. Local Arrays .. + REAL T( LDT, NBMAX ) +* .. +* .. External Subroutines .. + EXTERNAL SGEHD2, SGEMM, SLAHRD, SLARFB, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NB = MIN( NBMAX, ILAENV( 1, 'SGEHRD', ' ', N, ILO, IHI, -1 ) ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -2 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEHRD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Set elements 1:ILO-1 and IHI:N-1 of TAU to zero +* + DO 10 I = 1, ILO - 1 + TAU( I ) = ZERO + 10 CONTINUE + DO 20 I = MAX( 1, IHI ), N - 1 + TAU( I ) = ZERO + 20 CONTINUE +* +* Quick return if possible +* + NH = IHI - ILO + 1 + IF( NH.LE.1 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Determine the block size. +* + NB = MIN( NBMAX, ILAENV( 1, 'SGEHRD', ' ', N, ILO, IHI, -1 ) ) + NBMIN = 2 + IWS = 1 + IF( NB.GT.1 .AND. NB.LT.NH ) THEN +* +* Determine when to cross over from blocked to unblocked code +* (last block is always handled by unblocked code). +* + NX = MAX( NB, ILAENV( 3, 'SGEHRD', ' ', N, ILO, IHI, -1 ) ) + IF( NX.LT.NH ) THEN +* +* Determine if workspace is large enough for blocked code. +* + IWS = N*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: determine the +* minimum value of NB, and reduce NB or force use of +* unblocked code. +* + NBMIN = MAX( 2, ILAENV( 2, 'SGEHRD', ' ', N, ILO, IHI, + $ -1 ) ) + IF( LWORK.GE.N*NBMIN ) THEN + NB = LWORK / N + ELSE + NB = 1 + END IF + END IF + END IF + END IF + LDWORK = N +* + IF( NB.LT.NBMIN .OR. NB.GE.NH ) THEN +* +* Use unblocked code below +* + I = ILO +* + ELSE +* +* Use blocked code +* + DO 30 I = ILO, IHI - 1 - NX, NB + IB = MIN( NB, IHI-I ) +* +* Reduce columns i:i+ib-1 to Hessenberg form, returning the +* matrices V and T of the block reflector H = I - V*T*V' +* which performs the reduction, and also the matrix Y = A*V*T +* + CALL SLAHRD( IHI, I, IB, A( 1, I ), LDA, TAU( I ), T, LDT, + $ WORK, LDWORK ) +* +* Apply the block reflector H to A(1:ihi,i+ib:ihi) from the +* right, computing A := A - Y * V'. V(i+ib,ib-1) must be set +* to 1. +* + EI = A( I+IB, I+IB-1 ) + A( I+IB, I+IB-1 ) = ONE + CALL SGEMM( 'No transpose', 'Transpose', IHI, IHI-I-IB+1, + $ IB, -ONE, WORK, LDWORK, A( I+IB, I ), LDA, ONE, + $ A( 1, I+IB ), LDA ) + A( I+IB, I+IB-1 ) = EI +* +* Apply the block reflector H to A(i+1:ihi,i+ib:n) from the +* left +* + CALL SLARFB( 'Left', 'Transpose', 'Forward', 'Columnwise', + $ IHI-I, N-I-IB+1, IB, A( I+1, I ), LDA, T, LDT, + $ A( I+1, I+IB ), LDA, WORK, LDWORK ) + 30 CONTINUE + END IF +* +* Use unblocked code to reduce the rest of the matrix +* + CALL SGEHD2( N, I, IHI, A, LDA, TAU, WORK, IINFO ) + WORK( 1 ) = IWS +* + RETURN +* +* End of SGEHRD +* + END diff --git a/costa/native/external/lapack/sgelq2.f b/costa/native/external/lapack/sgelq2.f new file mode 100644 index 000000000..63a1b32c1 --- /dev/null +++ b/costa/native/external/lapack/sgelq2.f @@ -0,0 +1,122 @@ + SUBROUTINE SGELQ2( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGELQ2 computes an LQ factorization of a real m by n matrix A: +* A = L * Q. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the m by n matrix A. +* On exit, the elements on and below the diagonal of the array +* contain the m by min(m,n) lower trapezoidal matrix L (L is +* lower triangular if m <= n); the elements above the diagonal, +* with the array TAU, represent the orthogonal matrix Q as a +* product of elementary reflectors (see Further Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) REAL array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace) REAL array, dimension (M) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(k) . . . H(2) H(1), where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), +* and tau in TAU(i). +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, K + REAL AII +* .. +* .. External Subroutines .. + EXTERNAL SLARF, SLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGELQ2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = 1, K +* +* Generate elementary reflector H(i) to annihilate A(i,i+1:n) +* + CALL SLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA, + $ TAU( I ) ) + IF( I.LT.M ) THEN +* +* Apply H(i) to A(i+1:m,i:n) from the right +* + AII = A( I, I ) + A( I, I ) = ONE + CALL SLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, TAU( I ), + $ A( I+1, I ), LDA, WORK ) + A( I, I ) = AII + END IF + 10 CONTINUE + RETURN +* +* End of SGELQ2 +* + END diff --git a/costa/native/external/lapack/sgelqf.f b/costa/native/external/lapack/sgelqf.f new file mode 100644 index 000000000..d2797a6e4 --- /dev/null +++ b/costa/native/external/lapack/sgelqf.f @@ -0,0 +1,196 @@ + SUBROUTINE SGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGELQF computes an LQ factorization of a real M-by-N matrix A: +* A = L * Q. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, the elements on and below the diagonal of the array +* contain the m-by-min(m,n) lower trapezoidal matrix L (L is +* lower triangular if m <= n); the elements above the diagonal, +* with the array TAU, represent the orthogonal matrix Q as a +* product of elementary reflectors (see Further Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) REAL array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,M). +* For optimum performance LWORK >= M*NB, where NB is the +* optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(k) . . . H(2) H(1), where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), +* and tau in TAU(i). +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL SGELQ2, SLARFB, SLARFT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 ) + LWKOPT = M*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGELQF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = M + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'SGELQF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SGELQF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially +* + DO 10 I = 1, K - NX, NB + IB = MIN( K-I+1, NB ) +* +* Compute the LQ factorization of the current block +* A(i:i+ib-1,i:n) +* + CALL SGELQ2( IB, N-I+1, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) + IF( I+IB.LE.M ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL SLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, I ), + $ LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(i+ib:m,i:n) from the right +* + CALL SLARFB( 'Right', 'No transpose', 'Forward', + $ 'Rowwise', M-I-IB+1, N-I+1, IB, A( I, I ), + $ LDA, WORK, LDWORK, A( I+IB, I ), LDA, + $ WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + ELSE + I = 1 + END IF +* +* Use unblocked code to factor the last or only block. +* + IF( I.LE.K ) + $ CALL SGELQ2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of SGELQF +* + END diff --git a/costa/native/external/lapack/sgels.f b/costa/native/external/lapack/sgels.f new file mode 100644 index 000000000..a749a0348 --- /dev/null +++ b/costa/native/external/lapack/sgels.f @@ -0,0 +1,403 @@ + SUBROUTINE SGELS( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGELS solves overdetermined or underdetermined real linear systems +* involving an M-by-N matrix A, or its transpose, using a QR or LQ +* factorization of A. It is assumed that A has full rank. +* +* The following options are provided: +* +* 1. If TRANS = 'N' and m >= n: find the least squares solution of +* an overdetermined system, i.e., solve the least squares problem +* minimize || B - A*X ||. +* +* 2. If TRANS = 'N' and m < n: find the minimum norm solution of +* an underdetermined system A * X = B. +* +* 3. If TRANS = 'T' and m >= n: find the minimum norm solution of +* an undetermined system A**T * X = B. +* +* 4. If TRANS = 'T' and m < n: find the least squares solution of +* an overdetermined system, i.e., solve the least squares problem +* minimize || B - A**T * X ||. +* +* Several right hand side vectors b and solution vectors x can be +* handled in a single call; they are stored as the columns of the +* M-by-NRHS right hand side matrix B and the N-by-NRHS solution +* matrix X. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER +* = 'N': the linear system involves A; +* = 'T': the linear system involves A**T. +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of +* columns of the matrices B and X. NRHS >=0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, +* if M >= N, A is overwritten by details of its QR +* factorization as returned by SGEQRF; +* if M < N, A is overwritten by details of its LQ +* factorization as returned by SGELQF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the matrix B of right hand side vectors, stored +* columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS +* if TRANS = 'T'. +* On exit, B is overwritten by the solution vectors, stored +* columnwise: +* if TRANS = 'N' and m >= n, rows 1 to n of B contain the least +* squares solution vectors; the residual sum of squares for the +* solution in each column is given by the sum of squares of +* elements N+1 to M in that column; +* if TRANS = 'N' and m < n, rows 1 to N of B contain the +* minimum norm solution vectors; +* if TRANS = 'T' and m >= n, rows 1 to M of B contain the +* minimum norm solution vectors; +* if TRANS = 'T' and m < n, rows 1 to M of B contain the +* least squares solution vectors; the residual sum of squares +* for the solution in each column is given by the sum of +* squares of elements M+1 to N in that column. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= MAX(1,M,N). +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* LWORK >= max( 1, MN + max( MN, NRHS ) ). +* For optimal performance, +* LWORK >= max( 1, MN + max( MN, NRHS )*NB ). +* where MN = min(M,N) and NB is the optimum block size. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, TPSD + INTEGER BROW, I, IASCL, IBSCL, J, MN, NB, SCLLEN, WSIZE + REAL ANRM, BIGNUM, BNRM, SMLNUM +* .. +* .. Local Arrays .. + REAL RWORK( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANGE + EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE +* .. +* .. External Subroutines .. + EXTERNAL SGELQF, SGEQRF, SLABAD, SLASCL, SLASET, SORMLQ, + $ SORMQR, STRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, REAL +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + MN = MIN( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.( LSAME( TRANS, 'N' ) .OR. LSAME( TRANS, 'T' ) ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, MN + MAX( MN, NRHS ) ) .AND. + $ .NOT.LQUERY ) THEN + INFO = -10 + END IF +* +* Figure out optimal block size +* + IF( INFO.EQ.0 .OR. INFO.EQ.-10 ) THEN +* + TPSD = .TRUE. + IF( LSAME( TRANS, 'N' ) ) + $ TPSD = .FALSE. +* + IF( M.GE.N ) THEN + NB = ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 ) + IF( TPSD ) THEN + NB = MAX( NB, ILAENV( 1, 'SORMQR', 'LN', M, NRHS, N, + $ -1 ) ) + ELSE + NB = MAX( NB, ILAENV( 1, 'SORMQR', 'LT', M, NRHS, N, + $ -1 ) ) + END IF + ELSE + NB = ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 ) + IF( TPSD ) THEN + NB = MAX( NB, ILAENV( 1, 'SORMLQ', 'LT', N, NRHS, M, + $ -1 ) ) + ELSE + NB = MAX( NB, ILAENV( 1, 'SORMLQ', 'LN', N, NRHS, M, + $ -1 ) ) + END IF + END IF +* + WSIZE = MAX( 1, MN + MAX( MN, NRHS )*NB ) + WORK( 1 ) = REAL( WSIZE ) +* + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGELS ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, NRHS ).EQ.0 ) THEN + CALL SLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + RETURN + END IF +* +* Get machine parameters +* + SMLNUM = SLAMCH( 'S' ) / SLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max element outside range [SMLNUM,BIGNUM] +* + ANRM = SLANGE( 'M', M, N, A, LDA, RWORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL SLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + GO TO 50 + END IF +* + BROW = M + IF( TPSD ) + $ BROW = N + BNRM = SLANGE( 'M', BROW, NRHS, B, LDB, RWORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL SLASCL( 'G', 0, 0, BNRM, SMLNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL SLASCL( 'G', 0, 0, BNRM, BIGNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 2 + END IF +* + IF( M.GE.N ) THEN +* +* compute QR factorization of A +* + CALL SGEQRF( M, N, A, LDA, WORK( 1 ), WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least N, optimally N*NB +* + IF( .NOT.TPSD ) THEN +* +* Least-Squares Problem min || A * X - B || +* +* B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS) +* + CALL SORMQR( 'Left', 'Transpose', M, NRHS, N, A, LDA, + $ WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least NRHS, optimally NRHS*NB +* +* B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) +* + CALL STRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N, + $ NRHS, ONE, A, LDA, B, LDB ) +* + SCLLEN = N +* + ELSE +* +* Overdetermined system of equations A' * X = B +* +* B(1:N,1:NRHS) := inv(R') * B(1:N,1:NRHS) +* + CALL STRSM( 'Left', 'Upper', 'Transpose', 'Non-unit', N, + $ NRHS, ONE, A, LDA, B, LDB ) +* +* B(N+1:M,1:NRHS) = ZERO +* + DO 20 J = 1, NRHS + DO 10 I = N + 1, M + B( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS) +* + CALL SORMQR( 'Left', 'No transpose', M, NRHS, N, A, LDA, + $ WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least NRHS, optimally NRHS*NB +* + SCLLEN = M +* + END IF +* + ELSE +* +* Compute LQ factorization of A +* + CALL SGELQF( M, N, A, LDA, WORK( 1 ), WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least M, optimally M*NB. +* + IF( .NOT.TPSD ) THEN +* +* underdetermined system of equations A * X = B +* +* B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) +* + CALL STRSM( 'Left', 'Lower', 'No transpose', 'Non-unit', M, + $ NRHS, ONE, A, LDA, B, LDB ) +* +* B(M+1:N,1:NRHS) = 0 +* + DO 40 J = 1, NRHS + DO 30 I = M + 1, N + B( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE +* +* B(1:N,1:NRHS) := Q(1:N,:)' * B(1:M,1:NRHS) +* + CALL SORMLQ( 'Left', 'Transpose', N, NRHS, M, A, LDA, + $ WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least NRHS, optimally NRHS*NB +* + SCLLEN = N +* + ELSE +* +* overdetermined system min || A' * X - B || +* +* B(1:N,1:NRHS) := Q * B(1:N,1:NRHS) +* + CALL SORMLQ( 'Left', 'No transpose', N, NRHS, M, A, LDA, + $ WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least NRHS, optimally NRHS*NB +* +* B(1:M,1:NRHS) := inv(L') * B(1:M,1:NRHS) +* + CALL STRSM( 'Left', 'Lower', 'Transpose', 'Non-unit', M, + $ NRHS, ONE, A, LDA, B, LDB ) +* + SCLLEN = M +* + END IF +* + END IF +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL SLASCL( 'G', 0, 0, SMLNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL SLASCL( 'G', 0, 0, BIGNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF +* + 50 CONTINUE + WORK( 1 ) = REAL( WSIZE ) +* + RETURN +* +* End of SGELS +* + END diff --git a/costa/native/external/lapack/sgelsd.f b/costa/native/external/lapack/sgelsd.f new file mode 100644 index 000000000..12e9ed52b --- /dev/null +++ b/costa/native/external/lapack/sgelsd.f @@ -0,0 +1,530 @@ + SUBROUTINE SGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, + $ RANK, WORK, LWORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), B( LDB, * ), S( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGELSD computes the minimum-norm solution to a real linear least +* squares problem: +* minimize 2-norm(| b - A*x |) +* using the singular value decomposition (SVD) of A. A is an M-by-N +* matrix which may be rank-deficient. +* +* Several right hand side vectors b and solution vectors x can be +* handled in a single call; they are stored as the columns of the +* M-by-NRHS right hand side matrix B and the N-by-NRHS solution +* matrix X. +* +* The problem is solved in three steps: +* (1) Reduce the coefficient matrix A to bidiagonal form with +* Householder transformations, reducing the original problem +* into a "bidiagonal least squares problem" (BLS) +* (2) Solve the BLS using a divide and conquer approach. +* (3) Apply back all the Householder tranformations to solve +* the original least squares problem. +* +* The effective rank of A is determined by treating as zero those +* singular values which are less than RCOND times the largest singular +* value. +* +* The divide and conquer algorithm makes very mild assumptions about +* floating point arithmetic. It will work on machines with a guard +* digit in add/subtract, or on those binary machines without guard +* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +* Cray-2. It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of A. M >= 0. +* +* N (input) INTEGER +* The number of columns of A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input) REAL array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, A has been destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the M-by-NRHS right hand side matrix B. +* On exit, B is overwritten by the N-by-NRHS solution +* matrix X. If m >= n and RANK = n, the residual +* sum-of-squares for the solution in the i-th column is given +* by the sum of squares of elements n+1:m in that column. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,max(M,N)). +* +* S (output) REAL array, dimension (min(M,N)) +* The singular values of A in decreasing order. +* The condition number of A in the 2-norm = S(1)/S(min(m,n)). +* +* RCOND (input) REAL +* RCOND is used to determine the effective rank of A. +* Singular values S(i) <= RCOND*S(1) are treated as zero. +* If RCOND < 0, machine precision is used instead. +* +* RANK (output) INTEGER +* The effective rank of A, i.e., the number of singular values +* which are greater than RCOND*S(1). +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK must be at least 1. +* The exact minimum amount of workspace needed depends on M, +* N and NRHS. As long as LWORK is at least +* 12*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2, +* if M is greater than or equal to N or +* 12*M + 2*M*SMLSIZ + 8*M*NLVL + M*NRHS + (SMLSIZ+1)**2, +* if M is less than N, the code will execute correctly. +* SMLSIZ is returned by ILAENV and is equal to the maximum +* size of the subproblems at the bottom of the computation +* tree (usually about 25), and +* NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 ) +* For good performance, LWORK should generally be larger. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* +* IWORK (workspace) INTEGER array, dimension (LIWORK) +* LIWORK >= 3 * MINMN * NLVL + 11 * MINMN, +* where MINMN = MIN( M,N ). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: the algorithm for computing the SVD failed to converge; +* if INFO = i, i off-diagonal elements of an intermediate +* bidiagonal form did not converge to zero. +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Ren-Cang Li, Computer Science Division, University of +* California at Berkeley, USA +* Osni Marques, LBNL/NERSC, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER IASCL, IBSCL, IE, IL, ITAU, ITAUP, ITAUQ, + $ LDWORK, MAXMN, MAXWRK, MINMN, MINWRK, MM, + $ MNTHR, NLVL, NWORK, SMLSIZ, WLALSD + REAL ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM +* .. +* .. External Subroutines .. + EXTERNAL SGEBRD, SGELQF, SGEQRF, SLABAD, SLACPY, SLALSD, + $ SLASCL, SLASET, SORMBR, SORMLQ, SORMQR, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + REAL SLAMCH, SLANGE + EXTERNAL SLAMCH, SLANGE, ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL, INT, LOG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + MINMN = MIN( M, N ) + MAXMN = MAX( M, N ) + MNTHR = ILAENV( 6, 'SGELSD', ' ', M, N, NRHS, -1 ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN + INFO = -7 + END IF +* + SMLSIZ = ILAENV( 9, 'SGELSD', ' ', 0, 0, 0, 0 ) +* +* Compute workspace. +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + MINWRK = 1 + MINMN = MAX( 1, MINMN ) + NLVL = MAX( INT( LOG( REAL( MINMN ) / REAL( SMLSIZ+1 ) ) / + $ LOG( TWO ) ) + 1, 0 ) +* + IF( INFO.EQ.0 ) THEN + MAXWRK = 0 + MM = M + IF( M.GE.N .AND. M.GE.MNTHR ) THEN +* +* Path 1a - overdetermined, with many more rows than columns. +* + MM = N + MAXWRK = MAX( MAXWRK, N+N*ILAENV( 1, 'SGEQRF', ' ', M, N, + $ -1, -1 ) ) + MAXWRK = MAX( MAXWRK, N+NRHS* + $ ILAENV( 1, 'SORMQR', 'LT', M, NRHS, N, -1 ) ) + END IF + IF( M.GE.N ) THEN +* +* Path 1 - overdetermined or exactly determined. +* + MAXWRK = MAX( MAXWRK, 3*N+( MM+N )* + $ ILAENV( 1, 'SGEBRD', ' ', MM, N, -1, -1 ) ) + MAXWRK = MAX( MAXWRK, 3*N+NRHS* + $ ILAENV( 1, 'SORMBR', 'QLT', MM, NRHS, N, -1 ) ) + MAXWRK = MAX( MAXWRK, 3*N+( N-1 )* + $ ILAENV( 1, 'SORMBR', 'PLN', N, NRHS, N, -1 ) ) + WLALSD = 9*N+2*N*SMLSIZ+8*N*NLVL+N*NRHS+(SMLSIZ+1)**2 + MAXWRK = MAX( MAXWRK, 3*N+WLALSD ) + MINWRK = MAX( 3*N+MM, 3*N+NRHS, 3*N+WLALSD ) + END IF + IF( N.GT.M ) THEN + WLALSD = 9*M+2*M*SMLSIZ+8*M*NLVL+M*NRHS+(SMLSIZ+1)**2 + IF( N.GE.MNTHR ) THEN +* +* Path 2a - underdetermined, with many more columns +* than rows. +* + MAXWRK = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 ) + MAXWRK = MAX( MAXWRK, M*M+4*M+2*M* + $ ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) ) + MAXWRK = MAX( MAXWRK, M*M+4*M+NRHS* + $ ILAENV( 1, 'SORMBR', 'QLT', M, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, M*M+4*M+( M-1 )* + $ ILAENV( 1, 'SORMBR', 'PLN', M, NRHS, M, -1 ) ) + IF( NRHS.GT.1 ) THEN + MAXWRK = MAX( MAXWRK, M*M+M+M*NRHS ) + ELSE + MAXWRK = MAX( MAXWRK, M*M+2*M ) + END IF + MAXWRK = MAX( MAXWRK, M+NRHS* + $ ILAENV( 1, 'SORMLQ', 'LT', N, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, M*M+4*M+WLALSD ) + ELSE +* +* Path 2 - remaining underdetermined cases. +* + MAXWRK = 3*M + ( N+M )*ILAENV( 1, 'SGEBRD', ' ', M, N, + $ -1, -1 ) + MAXWRK = MAX( MAXWRK, 3*M+NRHS* + $ ILAENV( 1, 'SORMBR', 'QLT', M, NRHS, N, -1 ) ) + MAXWRK = MAX( MAXWRK, 3*M+M* + $ ILAENV( 1, 'SORMBR', 'PLN', N, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, 3*M+WLALSD ) + END IF + MINWRK = MAX( 3*M+NRHS, 3*M+M, 3*M+WLALSD ) + END IF + MINWRK = MIN( MINWRK, MAXWRK ) + WORK( 1 ) = MAXWRK + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGELSD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + GO TO 10 + END IF +* +* Quick return if possible. +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RANK = 0 + RETURN + END IF +* +* Get machine parameters. +* + EPS = SLAMCH( 'P' ) + SFMIN = SLAMCH( 'S' ) + SMLNUM = SFMIN / EPS + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Scale A if max entry outside range [SMLNUM,BIGNUM]. +* + ANRM = SLANGE( 'M', M, N, A, LDA, WORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM. +* + CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM. +* + CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL SLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + CALL SLASET( 'F', MINMN, 1, ZERO, ZERO, S, 1 ) + RANK = 0 + GO TO 10 + END IF +* +* Scale B if max entry outside range [SMLNUM,BIGNUM]. +* + BNRM = SLANGE( 'M', M, NRHS, B, LDB, WORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM. +* + CALL SLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM. +* + CALL SLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 2 + END IF +* +* If M < N make sure certain entries of B are zero. +* + IF( M.LT.N ) + $ CALL SLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB ) +* +* Overdetermined case. +* + IF( M.GE.N ) THEN +* +* Path 1 - overdetermined or exactly determined. +* + MM = M + IF( M.GE.MNTHR ) THEN +* +* Path 1a - overdetermined, with many more rows than columns. +* + MM = N + ITAU = 1 + NWORK = ITAU + N +* +* Compute A=Q*R. +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) +* +* Multiply B by transpose(Q). +* (Workspace: need N+NRHS, prefer N+NRHS*NB) +* + CALL SORMQR( 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAU ), B, + $ LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* +* Zero out below R. +* + IF( N.GT.1 ) THEN + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA ) + END IF + END IF +* + IE = 1 + ITAUQ = IE + N + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize R in A. +* (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB) +* + CALL SGEBRD( MM, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors of R. +* (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB) +* + CALL SORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, WORK( ITAUQ ), + $ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* +* Solve the bidiagonal least squares problem. +* + CALL SLALSD( 'U', SMLSIZ, N, NRHS, S, WORK( IE ), B, LDB, + $ RCOND, RANK, WORK( NWORK ), IWORK, INFO ) + IF( INFO.NE.0 ) THEN + GO TO 10 + END IF +* +* Multiply B by right bidiagonalizing vectors of R. +* + CALL SORMBR( 'P', 'L', 'N', N, NRHS, N, A, LDA, WORK( ITAUP ), + $ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* + ELSE IF( N.GE.MNTHR .AND. LWORK.GE.4*M+M*M+ + $ MAX( M, 2*M-4, NRHS, N-3*M ) ) THEN +* +* Path 2a - underdetermined, with many more columns than rows +* and sufficient workspace for an efficient algorithm. +* + LDWORK = M + IF( LWORK.GE.MAX( 4*M+M*LDA+MAX( M, 2*M-4, NRHS, N-3*M ), + $ M*LDA+M+M*NRHS ) )LDWORK = LDA + ITAU = 1 + NWORK = M + 1 +* +* Compute A=L*Q. +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) + IL = NWORK +* +* Copy L to WORK(IL), zeroing out above its diagonal. +* + CALL SLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, WORK( IL+LDWORK ), + $ LDWORK ) + IE = IL + LDWORK*M + ITAUQ = IE + M + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IL). +* (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB) +* + CALL SGEBRD( M, M, WORK( IL ), LDWORK, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors of L. +* (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) +* + CALL SORMBR( 'Q', 'L', 'T', M, NRHS, M, WORK( IL ), LDWORK, + $ WORK( ITAUQ ), B, LDB, WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) +* +* Solve the bidiagonal least squares problem. +* + CALL SLALSD( 'U', SMLSIZ, M, NRHS, S, WORK( IE ), B, LDB, + $ RCOND, RANK, WORK( NWORK ), IWORK, INFO ) + IF( INFO.NE.0 ) THEN + GO TO 10 + END IF +* +* Multiply B by right bidiagonalizing vectors of L. +* + CALL SORMBR( 'P', 'L', 'N', M, NRHS, M, WORK( IL ), LDWORK, + $ WORK( ITAUP ), B, LDB, WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) +* +* Zero out below first M rows of B. +* + CALL SLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB ) + NWORK = ITAU + M +* +* Multiply transpose(Q) by B. +* (Workspace: need M+NRHS, prefer M+NRHS*NB) +* + CALL SORMLQ( 'L', 'T', N, NRHS, M, A, LDA, WORK( ITAU ), B, + $ LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* + ELSE +* +* Path 2 - remaining underdetermined cases. +* + IE = 1 + ITAUQ = IE + M + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize A. +* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) +* + CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors. +* (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB) +* + CALL SORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAUQ ), + $ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* +* Solve the bidiagonal least squares problem. +* + CALL SLALSD( 'L', SMLSIZ, M, NRHS, S, WORK( IE ), B, LDB, + $ RCOND, RANK, WORK( NWORK ), IWORK, INFO ) + IF( INFO.NE.0 ) THEN + GO TO 10 + END IF +* +* Multiply B by right bidiagonalizing vectors of A. +* + CALL SORMBR( 'P', 'L', 'N', N, NRHS, M, A, LDA, WORK( ITAUP ), + $ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* + END IF +* +* Undo scaling. +* + IF( IASCL.EQ.1 ) THEN + CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO ) + CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO ) + CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL SLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL SLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) + END IF +* + 10 CONTINUE + WORK( 1 ) = MAXWRK + RETURN +* +* End of SGELSD +* + END diff --git a/costa/native/external/lapack/sgelss.f b/costa/native/external/lapack/sgelss.f new file mode 100644 index 000000000..d13ae96ae --- /dev/null +++ b/costa/native/external/lapack/sgelss.f @@ -0,0 +1,613 @@ + SUBROUTINE SGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK, + $ WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK + REAL RCOND +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), S( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGELSS computes the minimum norm solution to a real linear least +* squares problem: +* +* Minimize 2-norm(| b - A*x |). +* +* using the singular value decomposition (SVD) of A. A is an M-by-N +* matrix which may be rank-deficient. +* +* Several right hand side vectors b and solution vectors x can be +* handled in a single call; they are stored as the columns of the +* M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix +* X. +* +* The effective rank of A is determined by treating as zero those +* singular values which are less than RCOND times the largest singular +* value. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, the first min(m,n) rows of A are overwritten with +* its right singular vectors, stored rowwise. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the M-by-NRHS right hand side matrix B. +* On exit, B is overwritten by the N-by-NRHS solution +* matrix X. If m >= n and RANK = n, the residual +* sum-of-squares for the solution in the i-th column is given +* by the sum of squares of elements n+1:m in that column. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,max(M,N)). +* +* S (output) REAL array, dimension (min(M,N)) +* The singular values of A in decreasing order. +* The condition number of A in the 2-norm = S(1)/S(min(m,n)). +* +* RCOND (input) REAL +* RCOND is used to determine the effective rank of A. +* Singular values S(i) <= RCOND*S(1) are treated as zero. +* If RCOND < 0, machine precision is used instead. +* +* RANK (output) INTEGER +* The effective rank of A, i.e., the number of singular values +* which are greater than RCOND*S(1). +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= 1, and also: +* LWORK >= 3*min(M,N) + max( 2*min(M,N), max(M,N), NRHS ) +* For good performance, LWORK should generally be larger. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: the algorithm for computing the SVD failed to converge; +* if INFO = i, i off-diagonal elements of an intermediate +* bidiagonal form did not converge to zero. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER BDSPAC, BL, CHUNK, I, IASCL, IBSCL, IE, IL, + $ ITAU, ITAUP, ITAUQ, IWORK, LDWORK, MAXMN, + $ MAXWRK, MINMN, MINWRK, MM, MNTHR + REAL ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM, THR +* .. +* .. Local Arrays .. + REAL VDUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL SBDSQR, SCOPY, SGEBRD, SGELQF, SGEMM, SGEMV, + $ SGEQRF, SLABAD, SLACPY, SLASCL, SLASET, SORGBR, + $ SORMBR, SORMLQ, SORMQR, SRSCL, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + REAL SLAMCH, SLANGE + EXTERNAL ILAENV, SLAMCH, SLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + MINMN = MIN( M, N ) + MAXMN = MAX( M, N ) + MNTHR = ILAENV( 6, 'SGELSS', ' ', M, N, NRHS, -1 ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN + INFO = -7 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + MINWRK = 1 + IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN + MAXWRK = 0 + MM = M + IF( M.GE.N .AND. M.GE.MNTHR ) THEN +* +* Path 1a - overdetermined, with many more rows than columns +* + MM = N + MAXWRK = MAX( MAXWRK, N+N*ILAENV( 1, 'SGEQRF', ' ', M, N, + $ -1, -1 ) ) + MAXWRK = MAX( MAXWRK, N+NRHS* + $ ILAENV( 1, 'SORMQR', 'LT', M, NRHS, N, -1 ) ) + END IF + IF( M.GE.N ) THEN +* +* Path 1 - overdetermined or exactly determined +* +* Compute workspace needed for SBDSQR +* + BDSPAC = MAX( 1, 5*N ) + MAXWRK = MAX( MAXWRK, 3*N+( MM+N )* + $ ILAENV( 1, 'SGEBRD', ' ', MM, N, -1, -1 ) ) + MAXWRK = MAX( MAXWRK, 3*N+NRHS* + $ ILAENV( 1, 'SORMBR', 'QLT', MM, NRHS, N, -1 ) ) + MAXWRK = MAX( MAXWRK, 3*N+( N-1 )* + $ ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) ) + MAXWRK = MAX( MAXWRK, BDSPAC ) + MAXWRK = MAX( MAXWRK, N*NRHS ) + MINWRK = MAX( 3*N+MM, 3*N+NRHS, BDSPAC ) + MAXWRK = MAX( MINWRK, MAXWRK ) + END IF + IF( N.GT.M ) THEN +* +* Compute workspace needed for SBDSQR +* + BDSPAC = MAX( 1, 5*M ) + MINWRK = MAX( 3*M+NRHS, 3*M+N, BDSPAC ) + IF( N.GE.MNTHR ) THEN +* +* Path 2a - underdetermined, with many more columns +* than rows +* + MAXWRK = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 ) + MAXWRK = MAX( MAXWRK, M*M+4*M+2*M* + $ ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) ) + MAXWRK = MAX( MAXWRK, M*M+4*M+NRHS* + $ ILAENV( 1, 'SORMBR', 'QLT', M, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, M*M+4*M+( M-1 )* + $ ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) ) + MAXWRK = MAX( MAXWRK, M*M+M+BDSPAC ) + IF( NRHS.GT.1 ) THEN + MAXWRK = MAX( MAXWRK, M*M+M+M*NRHS ) + ELSE + MAXWRK = MAX( MAXWRK, M*M+2*M ) + END IF + MAXWRK = MAX( MAXWRK, M+NRHS* + $ ILAENV( 1, 'SORMLQ', 'LT', N, NRHS, M, -1 ) ) + ELSE +* +* Path 2 - underdetermined +* + MAXWRK = 3*M + ( N+M )*ILAENV( 1, 'SGEBRD', ' ', M, N, + $ -1, -1 ) + MAXWRK = MAX( MAXWRK, 3*M+NRHS* + $ ILAENV( 1, 'SORMBR', 'QLT', M, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, 3*M+M* + $ ILAENV( 1, 'SORGBR', 'P', M, N, M, -1 ) ) + MAXWRK = MAX( MAXWRK, BDSPAC ) + MAXWRK = MAX( MAXWRK, N*NRHS ) + END IF + END IF + MAXWRK = MAX( MINWRK, MAXWRK ) + WORK( 1 ) = MAXWRK + END IF +* + MINWRK = MAX( MINWRK, 1 ) + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) + $ INFO = -12 + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGELSS', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RANK = 0 + RETURN + END IF +* +* Get machine parameters +* + EPS = SLAMCH( 'P' ) + SFMIN = SLAMCH( 'S' ) + SMLNUM = SFMIN / EPS + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = SLANGE( 'M', M, N, A, LDA, WORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL SLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + CALL SLASET( 'F', MINMN, 1, ZERO, ZERO, S, 1 ) + RANK = 0 + GO TO 70 + END IF +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = SLANGE( 'M', M, NRHS, B, LDB, WORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL SLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL SLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 2 + END IF +* +* Overdetermined case +* + IF( M.GE.N ) THEN +* +* Path 1 - overdetermined or exactly determined +* + MM = M + IF( M.GE.MNTHR ) THEN +* +* Path 1a - overdetermined, with many more rows than columns +* + MM = N + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, INFO ) +* +* Multiply B by transpose(Q) +* (Workspace: need N+NRHS, prefer N+NRHS*NB) +* + CALL SORMQR( 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAU ), B, + $ LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) +* +* Zero out below R +* + IF( N.GT.1 ) + $ CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA ) + END IF +* + IE = 1 + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in A +* (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB) +* + CALL SGEBRD( MM, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors of R +* (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB) +* + CALL SORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, WORK( ITAUQ ), + $ B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) +* +* Generate right bidiagonalizing vectors of R in A +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, INFO ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration +* multiply B by transpose of left singular vectors +* compute right singular vectors in A +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', N, N, 0, NRHS, S, WORK( IE ), A, LDA, VDUM, + $ 1, B, LDB, WORK( IWORK ), INFO ) + IF( INFO.NE.0 ) + $ GO TO 70 +* +* Multiply B by reciprocals of singular values +* + THR = MAX( RCOND*S( 1 ), SFMIN ) + IF( RCOND.LT.ZERO ) + $ THR = MAX( EPS*S( 1 ), SFMIN ) + RANK = 0 + DO 10 I = 1, N + IF( S( I ).GT.THR ) THEN + CALL SRSCL( NRHS, S( I ), B( I, 1 ), LDB ) + RANK = RANK + 1 + ELSE + CALL SLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB ) + END IF + 10 CONTINUE +* +* Multiply B by right singular vectors +* (Workspace: need N, prefer N*NRHS) +* + IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN + CALL SGEMM( 'T', 'N', N, NRHS, N, ONE, A, LDA, B, LDB, ZERO, + $ WORK, LDB ) + CALL SLACPY( 'G', N, NRHS, WORK, LDB, B, LDB ) + ELSE IF( NRHS.GT.1 ) THEN + CHUNK = LWORK / N + DO 20 I = 1, NRHS, CHUNK + BL = MIN( NRHS-I+1, CHUNK ) + CALL SGEMM( 'T', 'N', N, BL, N, ONE, A, LDA, B( 1, I ), + $ LDB, ZERO, WORK, N ) + CALL SLACPY( 'G', N, BL, WORK, N, B( 1, I ), LDB ) + 20 CONTINUE + ELSE + CALL SGEMV( 'T', N, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 ) + CALL SCOPY( N, WORK, 1, B, 1 ) + END IF +* + ELSE IF( N.GE.MNTHR .AND. LWORK.GE.4*M+M*M+ + $ MAX( M, 2*M-4, NRHS, N-3*M ) ) THEN +* +* Path 2a - underdetermined, with many more columns than rows +* and sufficient workspace for an efficient algorithm +* + LDWORK = M + IF( LWORK.GE.MAX( 4*M+M*LDA+MAX( M, 2*M-4, NRHS, N-3*M ), + $ M*LDA+M+M*NRHS ) )LDWORK = LDA + ITAU = 1 + IWORK = M + 1 +* +* Compute A=L*Q +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, INFO ) + IL = IWORK +* +* Copy L to WORK(IL), zeroing out above it +* + CALL SLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, WORK( IL+LDWORK ), + $ LDWORK ) + IE = IL + LDWORK*M + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IL) +* (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB) +* + CALL SGEBRD( M, M, WORK( IL ), LDWORK, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors of L +* (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) +* + CALL SORMBR( 'Q', 'L', 'T', M, NRHS, M, WORK( IL ), LDWORK, + $ WORK( ITAUQ ), B, LDB, WORK( IWORK ), + $ LWORK-IWORK+1, INFO ) +* +* Generate right bidiagonalizing vectors of R in WORK(IL) +* (Workspace: need M*M+5*M-1, prefer M*M+4*M+(M-1)*NB) +* + CALL SORGBR( 'P', M, M, M, WORK( IL ), LDWORK, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, INFO ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, +* computing right singular vectors of L in WORK(IL) and +* multiplying B by transpose of left singular vectors +* (Workspace: need M*M+M+BDSPAC) +* + CALL SBDSQR( 'U', M, M, 0, NRHS, S, WORK( IE ), WORK( IL ), + $ LDWORK, A, LDA, B, LDB, WORK( IWORK ), INFO ) + IF( INFO.NE.0 ) + $ GO TO 70 +* +* Multiply B by reciprocals of singular values +* + THR = MAX( RCOND*S( 1 ), SFMIN ) + IF( RCOND.LT.ZERO ) + $ THR = MAX( EPS*S( 1 ), SFMIN ) + RANK = 0 + DO 30 I = 1, M + IF( S( I ).GT.THR ) THEN + CALL SRSCL( NRHS, S( I ), B( I, 1 ), LDB ) + RANK = RANK + 1 + ELSE + CALL SLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB ) + END IF + 30 CONTINUE + IWORK = IE +* +* Multiply B by right singular vectors of L in WORK(IL) +* (Workspace: need M*M+2*M, prefer M*M+M+M*NRHS) +* + IF( LWORK.GE.LDB*NRHS+IWORK-1 .AND. NRHS.GT.1 ) THEN + CALL SGEMM( 'T', 'N', M, NRHS, M, ONE, WORK( IL ), LDWORK, + $ B, LDB, ZERO, WORK( IWORK ), LDB ) + CALL SLACPY( 'G', M, NRHS, WORK( IWORK ), LDB, B, LDB ) + ELSE IF( NRHS.GT.1 ) THEN + CHUNK = ( LWORK-IWORK+1 ) / M + DO 40 I = 1, NRHS, CHUNK + BL = MIN( NRHS-I+1, CHUNK ) + CALL SGEMM( 'T', 'N', M, BL, M, ONE, WORK( IL ), LDWORK, + $ B( 1, I ), LDB, ZERO, WORK( IWORK ), N ) + CALL SLACPY( 'G', M, BL, WORK( IWORK ), N, B( 1, I ), + $ LDB ) + 40 CONTINUE + ELSE + CALL SGEMV( 'T', M, M, ONE, WORK( IL ), LDWORK, B( 1, 1 ), + $ 1, ZERO, WORK( IWORK ), 1 ) + CALL SCOPY( M, WORK( IWORK ), 1, B( 1, 1 ), 1 ) + END IF +* +* Zero out below first M rows of B +* + CALL SLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB ) + IWORK = ITAU + M +* +* Multiply transpose(Q) by B +* (Workspace: need M+NRHS, prefer M+NRHS*NB) +* + CALL SORMLQ( 'L', 'T', N, NRHS, M, A, LDA, WORK( ITAU ), B, + $ LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) +* + ELSE +* +* Path 2 - remaining underdetermined cases +* + IE = 1 + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize A +* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) +* + CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors +* (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB) +* + CALL SORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAUQ ), + $ B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) +* +* Generate right bidiagonalizing vectors in A +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL SORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, INFO ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, +* computing right singular vectors of A in A and +* multiplying B by transpose of left singular vectors +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'L', M, N, 0, NRHS, S, WORK( IE ), A, LDA, VDUM, + $ 1, B, LDB, WORK( IWORK ), INFO ) + IF( INFO.NE.0 ) + $ GO TO 70 +* +* Multiply B by reciprocals of singular values +* + THR = MAX( RCOND*S( 1 ), SFMIN ) + IF( RCOND.LT.ZERO ) + $ THR = MAX( EPS*S( 1 ), SFMIN ) + RANK = 0 + DO 50 I = 1, M + IF( S( I ).GT.THR ) THEN + CALL SRSCL( NRHS, S( I ), B( I, 1 ), LDB ) + RANK = RANK + 1 + ELSE + CALL SLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB ) + END IF + 50 CONTINUE +* +* Multiply B by right singular vectors of A +* (Workspace: need N, prefer N*NRHS) +* + IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN + CALL SGEMM( 'T', 'N', N, NRHS, M, ONE, A, LDA, B, LDB, ZERO, + $ WORK, LDB ) + CALL SLACPY( 'F', N, NRHS, WORK, LDB, B, LDB ) + ELSE IF( NRHS.GT.1 ) THEN + CHUNK = LWORK / N + DO 60 I = 1, NRHS, CHUNK + BL = MIN( NRHS-I+1, CHUNK ) + CALL SGEMM( 'T', 'N', N, BL, M, ONE, A, LDA, B( 1, I ), + $ LDB, ZERO, WORK, N ) + CALL SLACPY( 'F', N, BL, WORK, N, B( 1, I ), LDB ) + 60 CONTINUE + ELSE + CALL SGEMV( 'T', M, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 ) + CALL SCOPY( N, WORK, 1, B, 1 ) + END IF + END IF +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO ) + CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO ) + CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL SLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL SLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) + END IF +* + 70 CONTINUE + WORK( 1 ) = MAXWRK + RETURN +* +* End of SGELSS +* + END diff --git a/costa/native/external/lapack/sgelsx.f b/costa/native/external/lapack/sgelsx.f new file mode 100644 index 000000000..b0016cf8c --- /dev/null +++ b/costa/native/external/lapack/sgelsx.f @@ -0,0 +1,350 @@ + SUBROUTINE SGELSX( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK, + $ WORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, M, N, NRHS, RANK + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + REAL A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* This routine is deprecated and has been replaced by routine SGELSY. +* +* SGELSX computes the minimum-norm solution to a real linear least +* squares problem: +* minimize || A * X - B || +* using a complete orthogonal factorization of A. A is an M-by-N +* matrix which may be rank-deficient. +* +* Several right hand side vectors b and solution vectors x can be +* handled in a single call; they are stored as the columns of the +* M-by-NRHS right hand side matrix B and the N-by-NRHS solution +* matrix X. +* +* The routine first computes a QR factorization with column pivoting: +* A * P = Q * [ R11 R12 ] +* [ 0 R22 ] +* with R11 defined as the largest leading submatrix whose estimated +* condition number is less than 1/RCOND. The order of R11, RANK, +* is the effective rank of A. +* +* Then, R22 is considered to be negligible, and R12 is annihilated +* by orthogonal transformations from the right, arriving at the +* complete orthogonal factorization: +* A * P = Q * [ T11 0 ] * Z +* [ 0 0 ] +* The minimum-norm solution is then +* X = P * Z' [ inv(T11)*Q1'*B ] +* [ 0 ] +* where Q1 consists of the first RANK columns of Q. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of +* columns of matrices B and X. NRHS >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, A has been overwritten by details of its +* complete orthogonal factorization. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the M-by-NRHS right hand side matrix B. +* On exit, the N-by-NRHS solution matrix X. +* If m >= n and RANK = n, the residual sum-of-squares for +* the solution in the i-th column is given by the sum of +* squares of elements N+1:M in that column. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,M,N). +* +* JPVT (input/output) INTEGER array, dimension (N) +* On entry, if JPVT(i) .ne. 0, the i-th column of A is an +* initial column, otherwise it is a free column. Before +* the QR factorization of A, all initial columns are +* permuted to the leading positions; only the remaining +* free columns are moved as a result of column pivoting +* during the factorization. +* On exit, if JPVT(i) = k, then the i-th column of A*P +* was the k-th column of A. +* +* RCOND (input) REAL +* RCOND is used to determine the effective rank of A, which +* is defined as the order of the largest leading triangular +* submatrix R11 in the QR factorization with pivoting of A, +* whose estimated condition number < 1/RCOND. +* +* RANK (output) INTEGER +* The effective rank of A, i.e., the order of the submatrix +* R11. This is the same as the order of the submatrix T11 +* in the complete orthogonal factorization of A. +* +* WORK (workspace) REAL array, dimension +* (max( min(M,N)+3*N, 2*min(M,N)+NRHS )), +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + INTEGER IMAX, IMIN + PARAMETER ( IMAX = 1, IMIN = 2 ) + REAL ZERO, ONE, DONE, NTDONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, DONE = ZERO, + $ NTDONE = ONE ) +* .. +* .. Local Scalars .. + INTEGER I, IASCL, IBSCL, ISMAX, ISMIN, J, K, MN + REAL ANRM, BIGNUM, BNRM, C1, C2, S1, S2, SMAX, + $ SMAXPR, SMIN, SMINPR, SMLNUM, T1, T2 +* .. +* .. External Functions .. + REAL SLAMCH, SLANGE + EXTERNAL SLAMCH, SLANGE +* .. +* .. External Subroutines .. + EXTERNAL SGEQPF, SLABAD, SLAIC1, SLASCL, SLASET, SLATZM, + $ SORM2R, STRSM, STZRQF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + MN = MIN( M, N ) + ISMIN = MN + 1 + ISMAX = 2*MN + 1 +* +* Test the input arguments. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -7 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGELSX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, NRHS ).EQ.0 ) THEN + RANK = 0 + RETURN + END IF +* +* Get machine parameters +* + SMLNUM = SLAMCH( 'S' ) / SLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max elements outside range [SMLNUM,BIGNUM] +* + ANRM = SLANGE( 'M', M, N, A, LDA, WORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL SLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + RANK = 0 + GO TO 100 + END IF +* + BNRM = SLANGE( 'M', M, NRHS, B, LDB, WORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL SLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL SLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 2 + END IF +* +* Compute QR factorization with column pivoting of A: +* A * P = Q * R +* + CALL SGEQPF( M, N, A, LDA, JPVT, WORK( 1 ), WORK( MN+1 ), INFO ) +* +* workspace 3*N. Details of Householder rotations stored +* in WORK(1:MN). +* +* Determine RANK using incremental condition estimation +* + WORK( ISMIN ) = ONE + WORK( ISMAX ) = ONE + SMAX = ABS( A( 1, 1 ) ) + SMIN = SMAX + IF( ABS( A( 1, 1 ) ).EQ.ZERO ) THEN + RANK = 0 + CALL SLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + GO TO 100 + ELSE + RANK = 1 + END IF +* + 10 CONTINUE + IF( RANK.LT.MN ) THEN + I = RANK + 1 + CALL SLAIC1( IMIN, RANK, WORK( ISMIN ), SMIN, A( 1, I ), + $ A( I, I ), SMINPR, S1, C1 ) + CALL SLAIC1( IMAX, RANK, WORK( ISMAX ), SMAX, A( 1, I ), + $ A( I, I ), SMAXPR, S2, C2 ) +* + IF( SMAXPR*RCOND.LE.SMINPR ) THEN + DO 20 I = 1, RANK + WORK( ISMIN+I-1 ) = S1*WORK( ISMIN+I-1 ) + WORK( ISMAX+I-1 ) = S2*WORK( ISMAX+I-1 ) + 20 CONTINUE + WORK( ISMIN+RANK ) = C1 + WORK( ISMAX+RANK ) = C2 + SMIN = SMINPR + SMAX = SMAXPR + RANK = RANK + 1 + GO TO 10 + END IF + END IF +* +* Logically partition R = [ R11 R12 ] +* [ 0 R22 ] +* where R11 = R(1:RANK,1:RANK) +* +* [R11,R12] = [ T11, 0 ] * Y +* + IF( RANK.LT.N ) + $ CALL STZRQF( RANK, N, A, LDA, WORK( MN+1 ), INFO ) +* +* Details of Householder rotations stored in WORK(MN+1:2*MN) +* +* B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS) +* + CALL SORM2R( 'Left', 'Transpose', M, NRHS, MN, A, LDA, WORK( 1 ), + $ B, LDB, WORK( 2*MN+1 ), INFO ) +* +* workspace NRHS +* +* B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS) +* + CALL STRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', RANK, + $ NRHS, ONE, A, LDA, B, LDB ) +* + DO 40 I = RANK + 1, N + DO 30 J = 1, NRHS + B( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE +* +* B(1:N,1:NRHS) := Y' * B(1:N,1:NRHS) +* + IF( RANK.LT.N ) THEN + DO 50 I = 1, RANK + CALL SLATZM( 'Left', N-RANK+1, NRHS, A( I, RANK+1 ), LDA, + $ WORK( MN+I ), B( I, 1 ), B( RANK+1, 1 ), LDB, + $ WORK( 2*MN+1 ) ) + 50 CONTINUE + END IF +* +* workspace NRHS +* +* B(1:N,1:NRHS) := P * B(1:N,1:NRHS) +* + DO 90 J = 1, NRHS + DO 60 I = 1, N + WORK( 2*MN+I ) = NTDONE + 60 CONTINUE + DO 80 I = 1, N + IF( WORK( 2*MN+I ).EQ.NTDONE ) THEN + IF( JPVT( I ).NE.I ) THEN + K = I + T1 = B( K, J ) + T2 = B( JPVT( K ), J ) + 70 CONTINUE + B( JPVT( K ), J ) = T1 + WORK( 2*MN+K ) = DONE + T1 = T2 + K = JPVT( K ) + T2 = B( JPVT( K ), J ) + IF( JPVT( K ).NE.I ) + $ GO TO 70 + B( I, J ) = T1 + WORK( 2*MN+K ) = DONE + END IF + END IF + 80 CONTINUE + 90 CONTINUE +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO ) + CALL SLASCL( 'U', 0, 0, SMLNUM, ANRM, RANK, RANK, A, LDA, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO ) + CALL SLASCL( 'U', 0, 0, BIGNUM, ANRM, RANK, RANK, A, LDA, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL SLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL SLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) + END IF +* + 100 CONTINUE +* + RETURN +* +* End of SGELSX +* + END diff --git a/costa/native/external/lapack/sgelsy.f b/costa/native/external/lapack/sgelsy.f new file mode 100644 index 000000000..0a4883793 --- /dev/null +++ b/costa/native/external/lapack/sgelsy.f @@ -0,0 +1,379 @@ + SUBROUTINE SGELSY( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK, + $ WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + REAL A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGELSY computes the minimum-norm solution to a real linear least +* squares problem: +* minimize || A * X - B || +* using a complete orthogonal factorization of A. A is an M-by-N +* matrix which may be rank-deficient. +* +* Several right hand side vectors b and solution vectors x can be +* handled in a single call; they are stored as the columns of the +* M-by-NRHS right hand side matrix B and the N-by-NRHS solution +* matrix X. +* +* The routine first computes a QR factorization with column pivoting: +* A * P = Q * [ R11 R12 ] +* [ 0 R22 ] +* with R11 defined as the largest leading submatrix whose estimated +* condition number is less than 1/RCOND. The order of R11, RANK, +* is the effective rank of A. +* +* Then, R22 is considered to be negligible, and R12 is annihilated +* by orthogonal transformations from the right, arriving at the +* complete orthogonal factorization: +* A * P = Q * [ T11 0 ] * Z +* [ 0 0 ] +* The minimum-norm solution is then +* X = P * Z' [ inv(T11)*Q1'*B ] +* [ 0 ] +* where Q1 consists of the first RANK columns of Q. +* +* This routine is basically identical to the original xGELSX except +* three differences: +* o The call to the subroutine xGEQPF has been substituted by the +* the call to the subroutine xGEQP3. This subroutine is a Blas-3 +* version of the QR factorization with column pivoting. +* o Matrix B (the right hand side) is updated with Blas-3. +* o The permutation of matrix B (the right hand side) is faster and +* more simple. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of +* columns of matrices B and X. NRHS >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, A has been overwritten by details of its +* complete orthogonal factorization. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the M-by-NRHS right hand side matrix B. +* On exit, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,M,N). +* +* JPVT (input/output) INTEGER array, dimension (N) +* On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted +* to the front of AP, otherwise column i is a free column. +* On exit, if JPVT(i) = k, then the i-th column of AP +* was the k-th column of A. +* +* RCOND (input) REAL +* RCOND is used to determine the effective rank of A, which +* is defined as the order of the largest leading triangular +* submatrix R11 in the QR factorization with pivoting of A, +* whose estimated condition number < 1/RCOND. +* +* RANK (output) INTEGER +* The effective rank of A, i.e., the order of the submatrix +* R11. This is the same as the order of the submatrix T11 +* in the complete orthogonal factorization of A. +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* The unblocked strategy requires that: +* LWORK >= MAX( MN+3*N+1, 2*MN+NRHS ), +* where MN = min( M, N ). +* The block algorithm requires that: +* LWORK >= MAX( MN+2*N+NB*(N+1), 2*MN+NB*NRHS ), +* where NB is an upper bound on the blocksize returned +* by ILAENV for the routines SGEQP3, STZRZF, STZRQF, SORMQR, +* and SORMRZ. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: If INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* E. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +* G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +* +* ===================================================================== +* +* .. Parameters .. + INTEGER IMAX, IMIN + PARAMETER ( IMAX = 1, IMIN = 2 ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IASCL, IBSCL, ISMAX, ISMIN, J, LWKOPT, MN, + $ NB, NB1, NB2, NB3, NB4 + REAL ANRM, BIGNUM, BNRM, C1, C2, S1, S2, SMAX, + $ SMAXPR, SMIN, SMINPR, SMLNUM, WSIZE +* .. +* .. External Functions .. + INTEGER ILAENV + REAL SLAMCH, SLANGE + EXTERNAL ILAENV, SLAMCH, SLANGE +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEQP3, SLABAD, SLAIC1, SLASCL, SLASET, + $ SORMQR, SORMRZ, STRSM, STZRZF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, REAL +* .. +* .. Executable Statements .. +* + MN = MIN( M, N ) + ISMIN = MN + 1 + ISMAX = 2*MN + 1 +* +* Test the input arguments. +* + INFO = 0 + NB1 = ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 ) + NB2 = ILAENV( 1, 'SGERQF', ' ', M, N, -1, -1 ) + NB3 = ILAENV( 1, 'SORMQR', ' ', M, N, NRHS, -1 ) + NB4 = ILAENV( 1, 'SORMRQ', ' ', M, N, NRHS, -1 ) + NB = MAX( NB1, NB2, NB3, NB4 ) + LWKOPT = MAX( 1, MN+2*N+NB*(N+1), 2*MN+NB*NRHS ) + WORK( 1 ) = REAL( LWKOPT ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -7 + ELSE IF( LWORK.LT.MAX( 1, MN+3*N+1, 2*MN+NRHS ) .AND. + $ .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGELSY', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, NRHS ).EQ.0 ) THEN + RANK = 0 + RETURN + END IF +* +* Get machine parameters +* + SMLNUM = SLAMCH( 'S' ) / SLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max entries outside range [SMLNUM,BIGNUM] +* + ANRM = SLANGE( 'M', M, N, A, LDA, WORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL SLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + RANK = 0 + GO TO 70 + END IF +* + BNRM = SLANGE( 'M', M, NRHS, B, LDB, WORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL SLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL SLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 2 + END IF +* +* Compute QR factorization with column pivoting of A: +* A * P = Q * R +* + CALL SGEQP3( M, N, A, LDA, JPVT, WORK( 1 ), WORK( MN+1 ), + $ LWORK-MN, INFO ) + WSIZE = MN + WORK( MN+1 ) +* +* workspace: MN+2*N+NB*(N+1). +* Details of Householder rotations stored in WORK(1:MN). +* +* Determine RANK using incremental condition estimation +* + WORK( ISMIN ) = ONE + WORK( ISMAX ) = ONE + SMAX = ABS( A( 1, 1 ) ) + SMIN = SMAX + IF( ABS( A( 1, 1 ) ).EQ.ZERO ) THEN + RANK = 0 + CALL SLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + GO TO 70 + ELSE + RANK = 1 + END IF +* + 10 CONTINUE + IF( RANK.LT.MN ) THEN + I = RANK + 1 + CALL SLAIC1( IMIN, RANK, WORK( ISMIN ), SMIN, A( 1, I ), + $ A( I, I ), SMINPR, S1, C1 ) + CALL SLAIC1( IMAX, RANK, WORK( ISMAX ), SMAX, A( 1, I ), + $ A( I, I ), SMAXPR, S2, C2 ) +* + IF( SMAXPR*RCOND.LE.SMINPR ) THEN + DO 20 I = 1, RANK + WORK( ISMIN+I-1 ) = S1*WORK( ISMIN+I-1 ) + WORK( ISMAX+I-1 ) = S2*WORK( ISMAX+I-1 ) + 20 CONTINUE + WORK( ISMIN+RANK ) = C1 + WORK( ISMAX+RANK ) = C2 + SMIN = SMINPR + SMAX = SMAXPR + RANK = RANK + 1 + GO TO 10 + END IF + END IF +* +* workspace: 3*MN. +* +* Logically partition R = [ R11 R12 ] +* [ 0 R22 ] +* where R11 = R(1:RANK,1:RANK) +* +* [R11,R12] = [ T11, 0 ] * Y +* + IF( RANK.LT.N ) + $ CALL STZRZF( RANK, N, A, LDA, WORK( MN+1 ), WORK( 2*MN+1 ), + $ LWORK-2*MN, INFO ) +* +* workspace: 2*MN. +* Details of Householder rotations stored in WORK(MN+1:2*MN) +* +* B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS) +* + CALL SORMQR( 'Left', 'Transpose', M, NRHS, MN, A, LDA, WORK( 1 ), + $ B, LDB, WORK( 2*MN+1 ), LWORK-2*MN, INFO ) + WSIZE = MAX( WSIZE, 2*MN+WORK( 2*MN+1 ) ) +* +* workspace: 2*MN+NB*NRHS. +* +* B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS) +* + CALL STRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', RANK, + $ NRHS, ONE, A, LDA, B, LDB ) +* + DO 40 J = 1, NRHS + DO 30 I = RANK + 1, N + B( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE +* +* B(1:N,1:NRHS) := Y' * B(1:N,1:NRHS) +* + IF( RANK.LT.N ) THEN + CALL SORMRZ( 'Left', 'Transpose', N, NRHS, RANK, N-RANK, A, + $ LDA, WORK( MN+1 ), B, LDB, WORK( 2*MN+1 ), + $ LWORK-2*MN, INFO ) + END IF +* +* workspace: 2*MN+NRHS. +* +* B(1:N,1:NRHS) := P * B(1:N,1:NRHS) +* + DO 60 J = 1, NRHS + DO 50 I = 1, N + WORK( JPVT( I ) ) = B( I, J ) + 50 CONTINUE + CALL SCOPY( N, WORK( 1 ), 1, B( 1, J ), 1 ) + 60 CONTINUE +* +* workspace: N. +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO ) + CALL SLASCL( 'U', 0, 0, SMLNUM, ANRM, RANK, RANK, A, LDA, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO ) + CALL SLASCL( 'U', 0, 0, BIGNUM, ANRM, RANK, RANK, A, LDA, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL SLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL SLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) + END IF +* + 70 CONTINUE + WORK( 1 ) = REAL( LWKOPT ) +* + RETURN +* +* End of SGELSY +* + END diff --git a/costa/native/external/lapack/sgeql2.f b/costa/native/external/lapack/sgeql2.f new file mode 100644 index 000000000..6dd47937c --- /dev/null +++ b/costa/native/external/lapack/sgeql2.f @@ -0,0 +1,123 @@ + SUBROUTINE SGEQL2( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGEQL2 computes a QL factorization of a real m by n matrix A: +* A = Q * L. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the m by n matrix A. +* On exit, if m >= n, the lower triangle of the subarray +* A(m-n+1:m,1:n) contains the n by n lower triangular matrix L; +* if m <= n, the elements on and below the (n-m)-th +* superdiagonal contain the m by n lower trapezoidal matrix L; +* the remaining elements, with the array TAU, represent the +* orthogonal matrix Q as a product of elementary reflectors +* (see Further Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) REAL array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(k) . . . H(2) H(1), where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in +* A(1:m-k+i-1,n-k+i), and tau in TAU(i). +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, K + REAL AII +* .. +* .. External Subroutines .. + EXTERNAL SLARF, SLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEQL2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = K, 1, -1 +* +* Generate elementary reflector H(i) to annihilate +* A(1:m-k+i-1,n-k+i) +* + CALL SLARFG( M-K+I, A( M-K+I, N-K+I ), A( 1, N-K+I ), 1, + $ TAU( I ) ) +* +* Apply H(i) to A(1:m-k+i,1:n-k+i-1) from the left +* + AII = A( M-K+I, N-K+I ) + A( M-K+I, N-K+I ) = ONE + CALL SLARF( 'Left', M-K+I, N-K+I-1, A( 1, N-K+I ), 1, TAU( I ), + $ A, LDA, WORK ) + A( M-K+I, N-K+I ) = AII + 10 CONTINUE + RETURN +* +* End of SGEQL2 +* + END diff --git a/costa/native/external/lapack/sgeqlf.f b/costa/native/external/lapack/sgeqlf.f new file mode 100644 index 000000000..5078932ad --- /dev/null +++ b/costa/native/external/lapack/sgeqlf.f @@ -0,0 +1,205 @@ + SUBROUTINE SGEQLF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGEQLF computes a QL factorization of a real M-by-N matrix A: +* A = Q * L. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, +* if m >= n, the lower triangle of the subarray +* A(m-n+1:m,1:n) contains the N-by-N lower triangular matrix L; +* if m <= n, the elements on and below the (n-m)-th +* superdiagonal contain the M-by-N lower trapezoidal matrix L; +* the remaining elements, with the array TAU, represent the +* orthogonal matrix Q as a product of elementary reflectors +* (see Further Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) REAL array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N). +* For optimum performance LWORK >= N*NB, where NB is the +* optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(k) . . . H(2) H(1), where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in +* A(1:m-k+i-1,n-k+i), and tau in TAU(i). +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, KI, KK, LDWORK, LWKOPT, + $ MU, NB, NBMIN, NU, NX +* .. +* .. External Subroutines .. + EXTERNAL SGEQL2, SLARFB, SLARFT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'SGEQLF', ' ', M, N, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEQLF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 1 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'SGEQLF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SGEQLF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially. +* The last kk columns are handled by the block method. +* + KI = ( ( K-NX-1 ) / NB )*NB + KK = MIN( K, KI+NB ) +* + DO 10 I = K - KK + KI + 1, K - KK + 1, -NB + IB = MIN( K-I+1, NB ) +* +* Compute the QL factorization of the current block +* A(1:m-k+i+ib-1,n-k+i:n-k+i+ib-1) +* + CALL SGEQL2( M-K+I+IB-1, IB, A( 1, N-K+I ), LDA, TAU( I ), + $ WORK, IINFO ) + IF( N-K+I.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL SLARFT( 'Backward', 'Columnwise', M-K+I+IB-1, IB, + $ A( 1, N-K+I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H' to A(1:m-k+i+ib-1,1:n-k+i-1) from the left +* + CALL SLARFB( 'Left', 'Transpose', 'Backward', + $ 'Columnwise', M-K+I+IB-1, N-K+I-1, IB, + $ A( 1, N-K+I ), LDA, WORK, LDWORK, A, LDA, + $ WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + MU = M - K + I + NB - 1 + NU = N - K + I + NB - 1 + ELSE + MU = M + NU = N + END IF +* +* Use unblocked code to factor the last or only block +* + IF( MU.GT.0 .AND. NU.GT.0 ) + $ CALL SGEQL2( MU, NU, A, LDA, TAU, WORK, IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of SGEQLF +* + END diff --git a/costa/native/external/lapack/sgeqp3.f b/costa/native/external/lapack/sgeqp3.f new file mode 100644 index 000000000..a52a5d70a --- /dev/null +++ b/costa/native/external/lapack/sgeqp3.f @@ -0,0 +1,279 @@ + SUBROUTINE SGEQP3( M, N, A, LDA, JPVT, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGEQP3 computes a QR factorization with column pivoting of a +* matrix A: A*P = Q*R using Level 3 BLAS. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, the upper triangle of the array contains the +* min(M,N)-by-N upper trapezoidal matrix R; the elements below +* the diagonal, together with the array TAU, represent the +* orthogonal matrix Q as a product of min(M,N) elementary +* reflectors. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* JPVT (input/output) INTEGER array, dimension (N) +* On entry, if JPVT(J).ne.0, the J-th column of A is permuted +* to the front of A*P (a leading column); if JPVT(J)=0, +* the J-th column of A is a free column. +* On exit, if JPVT(J)=K, then the J-th column of A*P was the +* the K-th column of A. +* +* TAU (output) REAL array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors. +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO=0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= 3*N+1. +* For optimal performance LWORK >= 2*N+( N+1 )*NB, where NB +* is the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(1) H(2) . . . H(k), where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real/complex scalar, and v is a real/complex vector +* with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in +* A(i+1:m,i), and tau in TAU(i). +* +* Based on contributions by +* G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +* X. Sun, Computer Science Dept., Duke University, USA +* +* ===================================================================== +* +* .. Parameters .. + INTEGER INB, INBMIN, IXOVER + PARAMETER ( INB = 1, INBMIN = 2, IXOVER = 3 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER FJB, IWS, J, JB, LWKOPT, MINMN, MINWS, NA, NB, + $ NBMIN, NFXD, NX, SM, SMINMN, SN, TOPBMN +* .. +* .. External Subroutines .. + EXTERNAL SGEQRF, SLAQP2, SLAQPS, SORMQR, SSWAP, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + REAL SNRM2 + EXTERNAL ILAENV, SNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* + IWS = 3*N + 1 + MINMN = MIN( M, N ) +* +* Test input arguments +* ==================== +* + INFO = 0 + NB = ILAENV( INB, 'SGEQRF', ' ', M, N, -1, -1 ) + LWKOPT = 2*N+( N+1 )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( ( LWORK.LT.IWS ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEQP3', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible. +* + IF( MINMN.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Move initial columns up front. +* + NFXD = 1 + DO 10 J = 1, N + IF( JPVT( J ).NE.0 ) THEN + IF( J.NE.NFXD ) THEN + CALL SSWAP( M, A( 1, J ), 1, A( 1, NFXD ), 1 ) + JPVT( J ) = JPVT( NFXD ) + JPVT( NFXD ) = J + ELSE + JPVT( J ) = J + END IF + NFXD = NFXD + 1 + ELSE + JPVT( J ) = J + END IF + 10 CONTINUE + NFXD = NFXD - 1 +* +* Factorize fixed columns +* ======================= +* +* Compute the QR factorization of fixed columns and update +* remaining columns. +* + IF( NFXD.GT.0 ) THEN + NA = MIN( M, NFXD ) +*CC CALL SGEQR2( M, NA, A, LDA, TAU, WORK, INFO ) + CALL SGEQRF( M, NA, A, LDA, TAU, WORK, LWORK, INFO ) + IWS = MAX( IWS, INT( WORK( 1 ) ) ) + IF( NA.LT.N ) THEN +*CC CALL SORM2R( 'Left', 'Transpose', M, N-NA, NA, A, LDA, +*CC $ TAU, A( 1, NA+1 ), LDA, WORK, INFO ) + CALL SORMQR( 'Left', 'Transpose', M, N-NA, NA, A, LDA, TAU, + $ A( 1, NA+1 ), LDA, WORK, LWORK, INFO ) + IWS = MAX( IWS, INT( WORK( 1 ) ) ) + END IF + END IF +* +* Factorize free columns +* ====================== +* + IF( NFXD.LT.MINMN ) THEN +* + SM = M - NFXD + SN = N - NFXD + SMINMN = MINMN - NFXD +* +* Determine the block size. +* + NB = ILAENV( INB, 'SGEQRF', ' ', SM, SN, -1, -1 ) + NBMIN = 2 + NX = 0 +* + IF( ( NB.GT.1 ) .AND. ( NB.LT.SMINMN ) ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( IXOVER, 'SGEQRF', ' ', SM, SN, -1, + $ -1 ) ) +* +* + IF( NX.LT.SMINMN ) THEN +* +* Determine if workspace is large enough for blocked code. +* + MINWS = 2*SN + ( SN+1 )*NB + IWS = MAX( IWS, MINWS ) + IF( LWORK.LT.MINWS ) THEN +* +* Not enough workspace to use optimal NB: Reduce NB and +* determine the minimum value of NB. +* + NB = ( LWORK-2*SN ) / ( SN+1 ) + NBMIN = MAX( 2, ILAENV( INBMIN, 'SGEQRF', ' ', SM, SN, + $ -1, -1 ) ) +* +* + END IF + END IF + END IF +* +* Initialize partial column norms. The first N elements of work +* store the exact column norms. +* + DO 20 J = NFXD + 1, N + WORK( J ) = SNRM2( SM, A( NFXD+1, J ), 1 ) + WORK( N+J ) = WORK( J ) + 20 CONTINUE +* + IF( ( NB.GE.NBMIN ) .AND. ( NB.LT.SMINMN ) .AND. + $ ( NX.LT.SMINMN ) ) THEN +* +* Use blocked code initially. +* + J = NFXD + 1 +* +* Compute factorization: while loop. +* +* + TOPBMN = MINMN - NX + 30 CONTINUE + IF( J.LE.TOPBMN ) THEN + JB = MIN( NB, TOPBMN-J+1 ) +* +* Factorize JB columns among columns J:N. +* + CALL SLAQPS( M, N-J+1, J-1, JB, FJB, A( 1, J ), LDA, + $ JPVT( J ), TAU( J ), WORK( J ), WORK( N+J ), + $ WORK( 2*N+1 ), WORK( 2*N+JB+1 ), N-J+1 ) +* + J = J + FJB + GO TO 30 + END IF + ELSE + J = NFXD + 1 + END IF +* +* Use unblocked code to factor the last or only block. +* +* + IF( J.LE.MINMN ) + $ CALL SLAQP2( M, N-J+1, J-1, A( 1, J ), LDA, JPVT( J ), + $ TAU( J ), WORK( J ), WORK( N+J ), + $ WORK( 2*N+1 ) ) +* + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of SGEQP3 +* + END diff --git a/costa/native/external/lapack/sgeqpf.f b/costa/native/external/lapack/sgeqpf.f new file mode 100644 index 000000000..8156b0867 --- /dev/null +++ b/costa/native/external/lapack/sgeqpf.f @@ -0,0 +1,221 @@ + SUBROUTINE SGEQPF( M, N, A, LDA, JPVT, TAU, WORK, INFO ) +* +* -- LAPACK test routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* This routine is deprecated and has been replaced by routine SGEQP3. +* +* SGEQPF computes a QR factorization with column pivoting of a +* real M-by-N matrix A: A*P = Q*R. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0 +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, the upper triangle of the array contains the +* min(M,N)-by-N upper triangular matrix R; the elements +* below the diagonal, together with the array TAU, +* represent the orthogonal matrix Q as a product of +* min(m,n) elementary reflectors. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* JPVT (input/output) INTEGER array, dimension (N) +* On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted +* to the front of A*P (a leading column); if JPVT(i) = 0, +* the i-th column of A is a free column. +* On exit, if JPVT(i) = k, then the i-th column of A*P +* was the k-th column of A. +* +* TAU (output) REAL array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors. +* +* WORK (workspace) REAL array, dimension (3*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(1) H(2) . . . H(n) +* +* Each H(i) has the form +* +* H = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i). +* +* The matrix P is represented in jpvt as follows: If +* jpvt(j) = i +* then the jth column of P is the ith canonical unit vector. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, ITEMP, J, MA, MN, PVT + REAL AII, TEMP, TEMP2 +* .. +* .. External Subroutines .. + EXTERNAL SGEQR2, SLARF, SLARFG, SORM2R, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SNRM2 + EXTERNAL ISAMAX, SNRM2 +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEQPF', -INFO ) + RETURN + END IF +* + MN = MIN( M, N ) +* +* Move initial columns up front +* + ITEMP = 1 + DO 10 I = 1, N + IF( JPVT( I ).NE.0 ) THEN + IF( I.NE.ITEMP ) THEN + CALL SSWAP( M, A( 1, I ), 1, A( 1, ITEMP ), 1 ) + JPVT( I ) = JPVT( ITEMP ) + JPVT( ITEMP ) = I + ELSE + JPVT( I ) = I + END IF + ITEMP = ITEMP + 1 + ELSE + JPVT( I ) = I + END IF + 10 CONTINUE + ITEMP = ITEMP - 1 +* +* Compute the QR factorization and update remaining columns +* + IF( ITEMP.GT.0 ) THEN + MA = MIN( ITEMP, M ) + CALL SGEQR2( M, MA, A, LDA, TAU, WORK, INFO ) + IF( MA.LT.N ) THEN + CALL SORM2R( 'Left', 'Transpose', M, N-MA, MA, A, LDA, TAU, + $ A( 1, MA+1 ), LDA, WORK, INFO ) + END IF + END IF +* + IF( ITEMP.LT.MN ) THEN +* +* Initialize partial column norms. The first n elements of +* work store the exact column norms. +* + DO 20 I = ITEMP + 1, N + WORK( I ) = SNRM2( M-ITEMP, A( ITEMP+1, I ), 1 ) + WORK( N+I ) = WORK( I ) + 20 CONTINUE +* +* Compute factorization +* + DO 40 I = ITEMP + 1, MN +* +* Determine ith pivot column and swap if necessary +* + PVT = ( I-1 ) + ISAMAX( N-I+1, WORK( I ), 1 ) +* + IF( PVT.NE.I ) THEN + CALL SSWAP( M, A( 1, PVT ), 1, A( 1, I ), 1 ) + ITEMP = JPVT( PVT ) + JPVT( PVT ) = JPVT( I ) + JPVT( I ) = ITEMP + WORK( PVT ) = WORK( I ) + WORK( N+PVT ) = WORK( N+I ) + END IF +* +* Generate elementary reflector H(i) +* + IF( I.LT.M ) THEN + CALL SLARFG( M-I+1, A( I, I ), A( I+1, I ), 1, TAU( I ) ) + ELSE + CALL SLARFG( 1, A( M, M ), A( M, M ), 1, TAU( M ) ) + END IF +* + IF( I.LT.N ) THEN +* +* Apply H(i) to A(i:m,i+1:n) from the left +* + AII = A( I, I ) + A( I, I ) = ONE + CALL SLARF( 'LEFT', M-I+1, N-I, A( I, I ), 1, TAU( I ), + $ A( I, I+1 ), LDA, WORK( 2*N+1 ) ) + A( I, I ) = AII + END IF +* +* Update partial column norms +* + DO 30 J = I + 1, N + IF( WORK( J ).NE.ZERO ) THEN + TEMP = ONE - ( ABS( A( I, J ) ) / WORK( J ) )**2 + TEMP = MAX( TEMP, ZERO ) + TEMP2 = ONE + 0.05*TEMP*( WORK( J ) / WORK( N+J ) )**2 + IF( TEMP2.EQ.ONE ) THEN + IF( M-I.GT.0 ) THEN + WORK( J ) = SNRM2( M-I, A( I+1, J ), 1 ) + WORK( N+J ) = WORK( J ) + ELSE + WORK( J ) = ZERO + WORK( N+J ) = ZERO + END IF + ELSE + WORK( J ) = WORK( J )*SQRT( TEMP ) + END IF + END IF + 30 CONTINUE +* + 40 CONTINUE + END IF + RETURN +* +* End of SGEQPF +* + END diff --git a/costa/native/external/lapack/sgeqr2.f b/costa/native/external/lapack/sgeqr2.f new file mode 100644 index 000000000..4eae8cf99 --- /dev/null +++ b/costa/native/external/lapack/sgeqr2.f @@ -0,0 +1,122 @@ + SUBROUTINE SGEQR2( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGEQR2 computes a QR factorization of a real m by n matrix A: +* A = Q * R. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the m by n matrix A. +* On exit, the elements on and above the diagonal of the array +* contain the min(m,n) by n upper trapezoidal matrix R (R is +* upper triangular if m >= n); the elements below the diagonal, +* with the array TAU, represent the orthogonal matrix Q as a +* product of elementary reflectors (see Further Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) REAL array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(1) H(2) . . . H(k), where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), +* and tau in TAU(i). +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, K + REAL AII +* .. +* .. External Subroutines .. + EXTERNAL SLARF, SLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEQR2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = 1, K +* +* Generate elementary reflector H(i) to annihilate A(i+1:m,i) +* + CALL SLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1, + $ TAU( I ) ) + IF( I.LT.N ) THEN +* +* Apply H(i) to A(i:m,i+1:n) from the left +* + AII = A( I, I ) + A( I, I ) = ONE + CALL SLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ), + $ A( I, I+1 ), LDA, WORK ) + A( I, I ) = AII + END IF + 10 CONTINUE + RETURN +* +* End of SGEQR2 +* + END diff --git a/costa/native/external/lapack/sgeqrf.f b/costa/native/external/lapack/sgeqrf.f new file mode 100644 index 000000000..39da1a2c3 --- /dev/null +++ b/costa/native/external/lapack/sgeqrf.f @@ -0,0 +1,197 @@ + SUBROUTINE SGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGEQRF computes a QR factorization of a real M-by-N matrix A: +* A = Q * R. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, the elements on and above the diagonal of the array +* contain the min(M,N)-by-N upper trapezoidal matrix R (R is +* upper triangular if m >= n); the elements below the diagonal, +* with the array TAU, represent the orthogonal matrix Q as a +* product of min(m,n) elementary reflectors (see Further +* Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) REAL array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N). +* For optimum performance LWORK >= N*NB, where NB is +* the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(1) H(2) . . . H(k), where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), +* and tau in TAU(i). +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL SGEQR2, SLARFB, SLARFT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEQRF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'SGEQRF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SGEQRF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially +* + DO 10 I = 1, K - NX, NB + IB = MIN( K-I+1, NB ) +* +* Compute the QR factorization of the current block +* A(i:m,i:i+ib-1) +* + CALL SGEQR2( M-I+1, IB, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) + IF( I+IB.LE.N ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL SLARFT( 'Forward', 'Columnwise', M-I+1, IB, + $ A( I, I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H' to A(i:m,i+ib:n) from the left +* + CALL SLARFB( 'Left', 'Transpose', 'Forward', + $ 'Columnwise', M-I+1, N-I-IB+1, IB, + $ A( I, I ), LDA, WORK, LDWORK, A( I, I+IB ), + $ LDA, WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + ELSE + I = 1 + END IF +* +* Use unblocked code to factor the last or only block. +* + IF( I.LE.K ) + $ CALL SGEQR2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of SGEQRF +* + END diff --git a/costa/native/external/lapack/sgerfs.f b/costa/native/external/lapack/sgerfs.f new file mode 100644 index 000000000..56eef6157 --- /dev/null +++ b/costa/native/external/lapack/sgerfs.f @@ -0,0 +1,332 @@ + SUBROUTINE SGERFS( TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, + $ X, LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* SGERFS improves the computed solution to a system of linear +* equations and provides error bounds and backward error estimates for +* the solution. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose = Transpose) +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input) REAL array, dimension (LDA,N) +* The original N-by-N matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* AF (input) REAL array, dimension (LDAF,N) +* The factors L and U from the factorization A = P*L*U +* as computed by SGETRF. +* +* LDAF (input) INTEGER +* The leading dimension of the array AF. LDAF >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices from SGETRF; for 1<=i<=N, row i of the +* matrix was interchanged with row IPIV(i). +* +* B (input) REAL array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input/output) REAL array, dimension (LDX,NRHS) +* On entry, the solution matrix X, as computed by SGETRS. +* On exit, the improved solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) REAL array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Internal Parameters +* =================== +* +* ITMAX is the maximum number of steps of iterative refinement. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) + REAL THREE + PARAMETER ( THREE = 3.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN + CHARACTER TRANST + INTEGER COUNT, I, J, K, KASE, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SGEMV, SGETRS, SLACON, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGERFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - op(A) * X, +* where op(A) = A, A**T, or A**H, depending on TRANS. +* + CALL SCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 ) + CALL SGEMV( TRANS, N, N, -ONE, A, LDA, X( 1, J ), 1, ONE, + $ WORK( N+1 ), 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + WORK( I ) = ABS( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(op(A))*abs(X) + abs(B). +* + IF( NOTRAN ) THEN + DO 50 K = 1, N + XK = ABS( X( K, J ) ) + DO 40 I = 1, N + WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK + 40 CONTINUE + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + DO 60 I = 1, N + S = S + ABS( A( I, K ) )*ABS( X( I, J ) ) + 60 CONTINUE + WORK( K ) = WORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL SGETRS( TRANS, N, 1, AF, LDAF, IPIV, WORK( N+1 ), N, + $ INFO ) + CALL SAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use SLACON to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL SLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**T). +* + CALL SGETRS( TRANST, N, 1, AF, LDAF, IPIV, WORK( N+1 ), + $ N, INFO ) + DO 110 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 110 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 120 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 120 CONTINUE + CALL SGETRS( TRANS, N, 1, AF, LDAF, IPIV, WORK( N+1 ), N, + $ INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of SGERFS +* + END diff --git a/costa/native/external/lapack/sgerq2.f b/costa/native/external/lapack/sgerq2.f new file mode 100644 index 000000000..db0fa748e --- /dev/null +++ b/costa/native/external/lapack/sgerq2.f @@ -0,0 +1,123 @@ + SUBROUTINE SGERQ2( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGERQ2 computes an RQ factorization of a real m by n matrix A: +* A = R * Q. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the m by n matrix A. +* On exit, if m <= n, the upper triangle of the subarray +* A(1:m,n-m+1:n) contains the m by m upper triangular matrix R; +* if m >= n, the elements on and above the (m-n)-th subdiagonal +* contain the m by n upper trapezoidal matrix R; the remaining +* elements, with the array TAU, represent the orthogonal matrix +* Q as a product of elementary reflectors (see Further +* Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) REAL array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace) REAL array, dimension (M) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(1) H(2) . . . H(k), where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in +* A(m-k+i,1:n-k+i-1), and tau in TAU(i). +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, K + REAL AII +* .. +* .. External Subroutines .. + EXTERNAL SLARF, SLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGERQ2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = K, 1, -1 +* +* Generate elementary reflector H(i) to annihilate +* A(m-k+i,1:n-k+i-1) +* + CALL SLARFG( N-K+I, A( M-K+I, N-K+I ), A( M-K+I, 1 ), LDA, + $ TAU( I ) ) +* +* Apply H(i) to A(1:m-k+i-1,1:n-k+i) from the right +* + AII = A( M-K+I, N-K+I ) + A( M-K+I, N-K+I ) = ONE + CALL SLARF( 'Right', M-K+I-1, N-K+I, A( M-K+I, 1 ), LDA, + $ TAU( I ), A, LDA, WORK ) + A( M-K+I, N-K+I ) = AII + 10 CONTINUE + RETURN +* +* End of SGERQ2 +* + END diff --git a/costa/native/external/lapack/sgerqf.f b/costa/native/external/lapack/sgerqf.f new file mode 100644 index 000000000..e5163dbfa --- /dev/null +++ b/costa/native/external/lapack/sgerqf.f @@ -0,0 +1,205 @@ + SUBROUTINE SGERQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGERQF computes an RQ factorization of a real M-by-N matrix A: +* A = R * Q. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, +* if m <= n, the upper triangle of the subarray +* A(1:m,n-m+1:n) contains the M-by-M upper triangular matrix R; +* if m >= n, the elements on and above the (m-n)-th subdiagonal +* contain the M-by-N upper trapezoidal matrix R; +* the remaining elements, with the array TAU, represent the +* orthogonal matrix Q as a product of min(m,n) elementary +* reflectors (see Further Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) REAL array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,M). +* For optimum performance LWORK >= M*NB, where NB is +* the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(1) H(2) . . . H(k), where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in +* A(m-k+i,1:n-k+i-1), and tau in TAU(i). +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, KI, KK, LDWORK, LWKOPT, + $ MU, NB, NBMIN, NU, NX +* .. +* .. External Subroutines .. + EXTERNAL SGERQ2, SLARFB, SLARFT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'SGERQF', ' ', M, N, -1, -1 ) + LWKOPT = M*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGERQF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 1 + IWS = M + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'SGERQF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SGERQF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially. +* The last kk rows are handled by the block method. +* + KI = ( ( K-NX-1 ) / NB )*NB + KK = MIN( K, KI+NB ) +* + DO 10 I = K - KK + KI + 1, K - KK + 1, -NB + IB = MIN( K-I+1, NB ) +* +* Compute the RQ factorization of the current block +* A(m-k+i:m-k+i+ib-1,1:n-k+i+ib-1) +* + CALL SGERQ2( IB, N-K+I+IB-1, A( M-K+I, 1 ), LDA, TAU( I ), + $ WORK, IINFO ) + IF( M-K+I.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL SLARFT( 'Backward', 'Rowwise', N-K+I+IB-1, IB, + $ A( M-K+I, 1 ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(1:m-k+i-1,1:n-k+i+ib-1) from the right +* + CALL SLARFB( 'Right', 'No transpose', 'Backward', + $ 'Rowwise', M-K+I-1, N-K+I+IB-1, IB, + $ A( M-K+I, 1 ), LDA, WORK, LDWORK, A, LDA, + $ WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + MU = M - K + I + NB - 1 + NU = N - K + I + NB - 1 + ELSE + MU = M + NU = N + END IF +* +* Use unblocked code to factor the last or only block +* + IF( MU.GT.0 .AND. NU.GT.0 ) + $ CALL SGERQ2( MU, NU, A, LDA, TAU, WORK, IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of SGERQF +* + END diff --git a/costa/native/external/lapack/sgesc2.f b/costa/native/external/lapack/sgesc2.f new file mode 100644 index 000000000..28af144aa --- /dev/null +++ b/costa/native/external/lapack/sgesc2.f @@ -0,0 +1,133 @@ + SUBROUTINE SGESC2( N, A, LDA, RHS, IPIV, JPIV, SCALE ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER LDA, N + REAL SCALE +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), JPIV( * ) + REAL A( LDA, * ), RHS( * ) +* .. +* +* Purpose +* ======= +* +* SGESC2 solves a system of linear equations +* +* A * X = scale* RHS +* +* with a general N-by-N matrix A using the LU factorization with +* complete pivoting computed by SGETC2. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. +* +* A (input) REAL array, dimension (LDA,N) +* On entry, the LU part of the factorization of the n-by-n +* matrix A computed by SGETC2: A = P * L * U * Q +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1, N). +* +* RHS (input/output) REAL array, dimension (N). +* On entry, the right hand side vector b. +* On exit, the solution vector X. +* +* IPIV (iput) INTEGER array, dimension (N). +* The pivot indices; for 1 <= i <= N, row i of the +* matrix has been interchanged with row IPIV(i). +* +* JPIV (iput) INTEGER array, dimension (N). +* The pivot indices; for 1 <= j <= N, column j of the +* matrix has been interchanged with column JPIV(j). +* +* SCALE (output) REAL +* On exit, SCALE contains the scale factor. SCALE is chosen +* 0 <= SCALE <= 1 to prevent owerflow in the solution. +* +* Further Details +* =============== +* +* Based on contributions by +* Bo Kagstrom and Peter Poromaa, Department of Computing Science, +* Umea University, S-901 87 Umea, Sweden. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, TWO + PARAMETER ( ONE = 1.0E+0, TWO = 2.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL BIGNUM, EPS, SMLNUM, TEMP +* .. +* .. External Subroutines .. + EXTERNAL SLABAD, SLASWP, SSCAL +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SLAMCH + EXTERNAL ISAMAX, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Set constant to control owerflow +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) / EPS + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Apply permutations IPIV to RHS +* + CALL SLASWP( 1, RHS, LDA, 1, N-1, IPIV, 1 ) +* +* Solve for L part +* + DO 20 I = 1, N - 1 + DO 10 J = I + 1, N + RHS( J ) = RHS( J ) - A( J, I )*RHS( I ) + 10 CONTINUE + 20 CONTINUE +* +* Solve for U part +* + SCALE = ONE +* +* Check for scaling +* + I = ISAMAX( N, RHS, 1 ) + IF( TWO*SMLNUM*ABS( RHS( I ) ).GT.ABS( A( N, N ) ) ) THEN + TEMP = ( ONE / TWO ) / ABS( RHS( I ) ) + CALL SSCAL( N, TEMP, RHS( 1 ), 1 ) + SCALE = SCALE*TEMP + END IF +* + DO 40 I = N, 1, -1 + TEMP = ONE / A( I, I ) + RHS( I ) = RHS( I )*TEMP + DO 30 J = I + 1, N + RHS( I ) = RHS( I ) - RHS( J )*( A( I, J )*TEMP ) + 30 CONTINUE + 40 CONTINUE +* +* Apply permutations JPIV to the solution (RHS) +* + CALL SLASWP( 1, RHS, LDA, 1, N-1, JPIV, -1 ) + RETURN +* +* End of SGESC2 +* + END diff --git a/costa/native/external/lapack/sgesdd.f b/costa/native/external/lapack/sgesdd.f new file mode 100644 index 000000000..52f5d2273 --- /dev/null +++ b/costa/native/external/lapack/sgesdd.f @@ -0,0 +1,1335 @@ + SUBROUTINE SGESDD( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, + $ LWORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ + INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), S( * ), U( LDU, * ), + $ VT( LDVT, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGESDD computes the singular value decomposition (SVD) of a real +* M-by-N matrix A, optionally computing the left and right singular +* vectors. If singular vectors are desired, it uses a +* divide-and-conquer algorithm. +* +* The SVD is written +* +* A = U * SIGMA * transpose(V) +* +* where SIGMA is an M-by-N matrix which is zero except for its +* min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and +* V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA +* are the singular values of A; they are real and non-negative, and +* are returned in descending order. The first min(m,n) columns of +* U and V are the left and right singular vectors of A. +* +* Note that the routine returns VT = V**T, not V. +* +* The divide and conquer algorithm makes very mild assumptions about +* floating point arithmetic. It will work on machines with a guard +* digit in add/subtract, or on those binary machines without guard +* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +* Cray-2. It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* Specifies options for computing all or part of the matrix U: +* = 'A': all M columns of U and all N rows of V**T are +* returned in the arrays U and VT; +* = 'S': the first min(M,N) columns of U and the first +* min(M,N) rows of V**T are returned in the arrays U +* and VT; +* = 'O': If M >= N, the first N columns of U are overwritten +* on the array A and all rows of V**T are returned in +* the array VT; +* otherwise, all columns of U are returned in the +* array U and the first M rows of V**T are overwritten +* in the array VT; +* = 'N': no columns of U or rows of V**T are computed. +* +* M (input) INTEGER +* The number of rows of the input matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the input matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, +* if JOBZ = 'O', A is overwritten with the first N columns +* of U (the left singular vectors, stored +* columnwise) if M >= N; +* A is overwritten with the first M rows +* of V**T (the right singular vectors, stored +* rowwise) otherwise. +* if JOBZ .ne. 'O', the contents of A are destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* S (output) REAL array, dimension (min(M,N)) +* The singular values of A, sorted so that S(i) >= S(i+1). +* +* U (output) REAL array, dimension (LDU,UCOL) +* UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N; +* UCOL = min(M,N) if JOBZ = 'S'. +* If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M +* orthogonal matrix U; +* if JOBZ = 'S', U contains the first min(M,N) columns of U +* (the left singular vectors, stored columnwise); +* if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced. +* +* LDU (input) INTEGER +* The leading dimension of the array U. LDU >= 1; if +* JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M. +* +* VT (output) REAL array, dimension (LDVT,N) +* If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the +* N-by-N orthogonal matrix V**T; +* if JOBZ = 'S', VT contains the first min(M,N) rows of +* V**T (the right singular vectors, stored rowwise); +* if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced. +* +* LDVT (input) INTEGER +* The leading dimension of the array VT. LDVT >= 1; if +* JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N; +* if JOBZ = 'S', LDVT >= min(M,N). +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK; +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= 1. +* If JOBZ = 'N', +* LWORK >= 3*min(M,N) + max(max(M,N),6*min(M,N)). +* If JOBZ = 'O', +* LWORK >= 3*min(M,N)*min(M,N) + +* max(max(M,N),5*min(M,N)*min(M,N)+4*min(M,N)). +* If JOBZ = 'S' or 'A' +* LWORK >= 3*min(M,N)*min(M,N) + +* max(max(M,N),4*min(M,N)*min(M,N)+4*min(M,N)). +* For good performance, LWORK should generally be larger. +* If LWORK < 0 but other input arguments are legal, WORK(1) +* returns the optimal LWORK. +* +* IWORK (workspace) INTEGER array, dimension (8*min(M,N)) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: SBDSDC did not converge, updating process failed. +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WNTQA, WNTQAS, WNTQN, WNTQO, WNTQS + INTEGER BDSPAC, BLK, CHUNK, I, IE, IERR, IL, + $ IR, ISCL, ITAU, ITAUP, ITAUQ, IU, IVT, LDWKVT, + $ LDWRKL, LDWRKR, LDWRKU, MAXWRK, MINMN, MINWRK, + $ MNTHR, NWORK, WRKBL + REAL ANRM, BIGNUM, EPS, SMLNUM +* .. +* .. Local Arrays .. + INTEGER IDUM( 1 ) + REAL DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL SBDSDC, SGEBRD, SGELQF, SGEMM, SGEQRF, SLACPY, + $ SLASCL, SLASET, SORGBR, SORGLQ, SORGQR, SORMBR, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANGE + EXTERNAL ILAENV, LSAME, SLAMCH, SLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + MINMN = MIN( M, N ) + MNTHR = INT( MINMN*11.0E0 / 6.0E0 ) + WNTQA = LSAME( JOBZ, 'A' ) + WNTQS = LSAME( JOBZ, 'S' ) + WNTQAS = WNTQA .OR. WNTQS + WNTQO = LSAME( JOBZ, 'O' ) + WNTQN = LSAME( JOBZ, 'N' ) + MINWRK = 1 + MAXWRK = 1 + LQUERY = ( LWORK.EQ.-1 ) +* + IF( .NOT.( WNTQA .OR. WNTQS .OR. WNTQO .OR. WNTQN ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDU.LT.1 .OR. ( WNTQAS .AND. LDU.LT.M ) .OR. + $ ( WNTQO .AND. M.LT.N .AND. LDU.LT.M ) ) THEN + INFO = -8 + ELSE IF( LDVT.LT.1 .OR. ( WNTQA .AND. LDVT.LT.N ) .OR. + $ ( WNTQS .AND. LDVT.LT.MINMN ) .OR. + $ ( WNTQO .AND. M.GE.N .AND. LDVT.LT.N ) ) THEN + INFO = -10 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + IF( INFO.EQ.0 .AND. M.GT.0 .AND. N.GT.0 ) THEN + IF( M.GE.N ) THEN +* +* Compute space needed for SBDSDC +* + IF( WNTQN ) THEN + BDSPAC = 7*N + ELSE + BDSPAC = 3*N*N + 4*N + END IF + IF( M.GE.MNTHR ) THEN + IF( WNTQN ) THEN +* +* Path 1 (M much larger than N, JOBZ='N') +* + WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, + $ -1 ) + WRKBL = MAX( WRKBL, 3*N+2*N* + $ ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) ) + MAXWRK = MAX( WRKBL, BDSPAC+N ) + MINWRK = BDSPAC + N + ELSE IF( WNTQO ) THEN +* +* Path 2 (M much larger than N, JOBZ='O') +* + WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'SORGQR', ' ', M, + $ N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+2*N* + $ ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'SORMBR', 'QLN', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'SORMBR', 'PRT', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC+3*N ) + MAXWRK = WRKBL + 2*N*N + MINWRK = BDSPAC + 2*N*N + 3*N + ELSE IF( WNTQS ) THEN +* +* Path 3 (M much larger than N, JOBZ='S') +* + WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'SORGQR', ' ', M, + $ N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+2*N* + $ ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'SORMBR', 'QLN', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'SORMBR', 'PRT', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC+3*N ) + MAXWRK = WRKBL + N*N + MINWRK = BDSPAC + N*N + 3*N + ELSE IF( WNTQA ) THEN +* +* Path 4 (M much larger than N, JOBZ='A') +* + WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'SORGQR', ' ', M, + $ M, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+2*N* + $ ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'SORMBR', 'QLN', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'SORMBR', 'PRT', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC+3*N ) + MAXWRK = WRKBL + N*N + MINWRK = BDSPAC + N*N + 3*N + END IF + ELSE +* +* Path 5 (M at least N, but not much larger) +* + WRKBL = 3*N + ( M+N )*ILAENV( 1, 'SGEBRD', ' ', M, N, -1, + $ -1 ) + IF( WNTQN ) THEN + MAXWRK = MAX( WRKBL, BDSPAC+3*N ) + MINWRK = 3*N + MAX( M, BDSPAC ) + ELSE IF( WNTQO ) THEN + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'SORMBR', 'QLN', M, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'SORMBR', 'PRT', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC+3*N ) + MAXWRK = WRKBL + M*N + MINWRK = 3*N + MAX( M, N*N+BDSPAC ) + ELSE IF( WNTQS ) THEN + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'SORMBR', 'QLN', M, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'SORMBR', 'PRT', N, N, N, -1 ) ) + MAXWRK = MAX( WRKBL, BDSPAC+3*N ) + MINWRK = 3*N + MAX( M, BDSPAC ) + ELSE IF( WNTQA ) THEN + WRKBL = MAX( WRKBL, 3*N+M* + $ ILAENV( 1, 'SORMBR', 'QLN', M, M, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'SORMBR', 'PRT', N, N, N, -1 ) ) + MAXWRK = MAX( MAXWRK, BDSPAC+3*N ) + MINWRK = 3*N + MAX( M, BDSPAC ) + END IF + END IF + ELSE +* +* Compute space needed for SBDSDC +* + IF( WNTQN ) THEN + BDSPAC = 7*M + ELSE + BDSPAC = 3*M*M + 4*M + END IF + IF( N.GE.MNTHR ) THEN + IF( WNTQN ) THEN +* +* Path 1t (N much larger than M, JOBZ='N') +* + WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, + $ -1 ) + WRKBL = MAX( WRKBL, 3*M+2*M* + $ ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) ) + MAXWRK = MAX( WRKBL, BDSPAC+M ) + MINWRK = BDSPAC + M + ELSE IF( WNTQO ) THEN +* +* Path 2t (N much larger than M, JOBZ='O') +* + WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'SORGLQ', ' ', M, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+2*M* + $ ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+M* + $ ILAENV( 1, 'SORMBR', 'QLN', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+M* + $ ILAENV( 1, 'SORMBR', 'PRT', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC+3*M ) + MAXWRK = WRKBL + 2*M*M + MINWRK = BDSPAC + 2*M*M + 3*M + ELSE IF( WNTQS ) THEN +* +* Path 3t (N much larger than M, JOBZ='S') +* + WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'SORGLQ', ' ', M, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+2*M* + $ ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+M* + $ ILAENV( 1, 'SORMBR', 'QLN', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+M* + $ ILAENV( 1, 'SORMBR', 'PRT', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC+3*M ) + MAXWRK = WRKBL + M*M + MINWRK = BDSPAC + M*M + 3*M + ELSE IF( WNTQA ) THEN +* +* Path 4t (N much larger than M, JOBZ='A') +* + WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'SORGLQ', ' ', N, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+2*M* + $ ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+M* + $ ILAENV( 1, 'SORMBR', 'QLN', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+M* + $ ILAENV( 1, 'SORMBR', 'PRT', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC+3*M ) + MAXWRK = WRKBL + M*M + MINWRK = BDSPAC + M*M + 3*M + END IF + ELSE +* +* Path 5t (N greater than M, but not much larger) +* + WRKBL = 3*M + ( M+N )*ILAENV( 1, 'SGEBRD', ' ', M, N, -1, + $ -1 ) + IF( WNTQN ) THEN + MAXWRK = MAX( WRKBL, BDSPAC+3*M ) + MINWRK = 3*M + MAX( N, BDSPAC ) + ELSE IF( WNTQO ) THEN + WRKBL = MAX( WRKBL, 3*M+M* + $ ILAENV( 1, 'SORMBR', 'QLN', M, M, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+M* + $ ILAENV( 1, 'SORMBR', 'PRT', M, N, M, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC+3*M ) + MAXWRK = WRKBL + M*N + MINWRK = 3*M + MAX( N, M*M+BDSPAC ) + ELSE IF( WNTQS ) THEN + WRKBL = MAX( WRKBL, 3*M+M* + $ ILAENV( 1, 'SORMBR', 'QLN', M, M, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+M* + $ ILAENV( 1, 'SORMBR', 'PRT', M, N, M, -1 ) ) + MAXWRK = MAX( WRKBL, BDSPAC+3*M ) + MINWRK = 3*M + MAX( N, BDSPAC ) + ELSE IF( WNTQA ) THEN + WRKBL = MAX( WRKBL, 3*M+M* + $ ILAENV( 1, 'SORMBR', 'QLN', M, M, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+M* + $ ILAENV( 1, 'SORMBR', 'PRT', N, N, M, -1 ) ) + MAXWRK = MAX( WRKBL, BDSPAC+3*M ) + MINWRK = 3*M + MAX( N, BDSPAC ) + END IF + END IF + END IF + WORK( 1 ) = MAXWRK + END IF +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGESDD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + IF( LWORK.GE.1 ) + $ WORK( 1 ) = ONE + RETURN + END IF +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SQRT( SLAMCH( 'S' ) ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = SLANGE( 'M', M, N, A, LDA, DUM ) + ISCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ISCL = 1 + CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR ) + ELSE IF( ANRM.GT.BIGNUM ) THEN + ISCL = 1 + CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR ) + END IF +* + IF( M.GE.N ) THEN +* +* A has at least as many rows as columns. If A has sufficiently +* more rows than columns, first reduce using the QR +* decomposition (if sufficient workspace available) +* + IF( M.GE.MNTHR ) THEN +* + IF( WNTQN ) THEN +* +* Path 1 (M much larger than N, JOBZ='N') +* No singular vectors to be computed +* + ITAU = 1 + NWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Zero out below R +* + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA ) + IE = 1 + ITAUQ = IE + N + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize R in A +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL SGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) + NWORK = IE + N +* +* Perform bidiagonal SVD, computing singular values only +* (Workspace: need N+BDSPAC) +* + CALL SBDSDC( 'U', 'N', N, S, WORK( IE ), DUM, 1, DUM, 1, + $ DUM, IDUM, WORK( NWORK ), IWORK, INFO ) +* + ELSE IF( WNTQO ) THEN +* +* Path 2 (M much larger than N, JOBZ = 'O') +* N left singular vectors to be overwritten on A and +* N right singular vectors to be computed in VT +* + IR = 1 +* +* WORK(IR) is LDWRKR by N +* + IF( LWORK.GE.LDA*N+N*N+3*N+BDSPAC ) THEN + LDWRKR = LDA + ELSE + LDWRKR = ( LWORK-N*N-3*N-BDSPAC ) / N + END IF + ITAU = IR + LDWRKR*N + NWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy R to WORK(IR), zeroing out below it +* + CALL SLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR ) + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IR+1 ), + $ LDWRKR ) +* +* Generate Q in A +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize R in VT, copying result to WORK(IR) +* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +* + CALL SGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* WORK(IU) is N by N +* + IU = NWORK + NWORK = IU + N*N +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in WORK(IU) and computing right +* singular vectors of bidiagonal matrix in VT +* (Workspace: need N+N*N+BDSPAC) +* + CALL SBDSDC( 'U', 'I', N, S, WORK( IE ), WORK( IU ), N, + $ VT, LDVT, DUM, IDUM, WORK( NWORK ), IWORK, + $ INFO ) +* +* Overwrite WORK(IU) by left singular vectors of R +* and VT by right singular vectors of R +* (Workspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB) +* + CALL SORMBR( 'Q', 'L', 'N', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IU ), N, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + CALL SORMBR( 'P', 'R', 'T', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IU), storing result in WORK(IR) and copying to A +* (Workspace: need 2*N*N, prefer N*N+M*N) +* + DO 10 I = 1, M, LDWRKR + CHUNK = MIN( M-I+1, LDWRKR ) + CALL SGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ), + $ LDA, WORK( IU ), N, ZERO, WORK( IR ), + $ LDWRKR ) + CALL SLACPY( 'F', CHUNK, N, WORK( IR ), LDWRKR, + $ A( I, 1 ), LDA ) + 10 CONTINUE +* + ELSE IF( WNTQS ) THEN +* +* Path 3 (M much larger than N, JOBZ='S') +* N left singular vectors to be computed in U and +* N right singular vectors to be computed in VT +* + IR = 1 +* +* WORK(IR) is N by N +* + LDWRKR = N + ITAU = IR + LDWRKR*N + NWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy R to WORK(IR), zeroing out below it +* + CALL SLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR ) + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IR+1 ), + $ LDWRKR ) +* +* Generate Q in A +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +* + CALL SGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagoal matrix in U and computing right singular +* vectors of bidiagonal matrix in VT +* (Workspace: need N+BDSPAC) +* + CALL SBDSDC( 'U', 'I', N, S, WORK( IE ), U, LDU, VT, + $ LDVT, DUM, IDUM, WORK( NWORK ), IWORK, + $ INFO ) +* +* Overwrite U by left singular vectors of R and VT +* by right singular vectors of R +* (Workspace: need N*N+3*N, prefer N*N+2*N+N*NB) +* + CALL SORMBR( 'Q', 'L', 'N', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* + CALL SORMBR( 'P', 'R', 'T', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in U +* (Workspace: need N*N) +* + CALL SLACPY( 'F', N, N, U, LDU, WORK( IR ), LDWRKR ) + CALL SGEMM( 'N', 'N', M, N, N, ONE, A, LDA, WORK( IR ), + $ LDWRKR, ZERO, U, LDU ) +* + ELSE IF( WNTQA ) THEN +* +* Path 4 (M much larger than N, JOBZ='A') +* M left singular vectors to be computed in U and +* N right singular vectors to be computed in VT +* + IU = 1 +* +* WORK(IU) is N by N +* + LDWRKU = N + ITAU = IU + LDWRKU*N + NWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + CALL SLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) + CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Produce R in A, zeroing out other entries +* + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize R in A +* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +* + CALL SGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in WORK(IU) and computing right +* singular vectors of bidiagonal matrix in VT +* (Workspace: need N+N*N+BDSPAC) +* + CALL SBDSDC( 'U', 'I', N, S, WORK( IE ), WORK( IU ), N, + $ VT, LDVT, DUM, IDUM, WORK( NWORK ), IWORK, + $ INFO ) +* +* Overwrite WORK(IU) by left singular vectors of R and VT +* by right singular vectors of R +* (Workspace: need N*N+3*N, prefer N*N+2*N+N*NB) +* + CALL SORMBR( 'Q', 'L', 'N', N, N, N, A, LDA, + $ WORK( ITAUQ ), WORK( IU ), LDWRKU, + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) + CALL SORMBR( 'P', 'R', 'T', N, N, N, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IU), storing result in A +* (Workspace: need N*N) +* + CALL SGEMM( 'N', 'N', M, N, N, ONE, U, LDU, WORK( IU ), + $ LDWRKU, ZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL SLACPY( 'F', M, N, A, LDA, U, LDU ) +* + END IF +* + ELSE +* +* M .LT. MNTHR +* +* Path 5 (M at least N, but not much larger) +* Reduce to bidiagonal form without QR decomposition +* + IE = 1 + ITAUQ = IE + N + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize A +* (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) +* + CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) + IF( WNTQN ) THEN +* +* Perform bidiagonal SVD, only computing singular values +* (Workspace: need N+BDSPAC) +* + CALL SBDSDC( 'U', 'N', N, S, WORK( IE ), DUM, 1, DUM, 1, + $ DUM, IDUM, WORK( NWORK ), IWORK, INFO ) + ELSE IF( WNTQO ) THEN + IU = NWORK + IF( LWORK.GE.M*N+3*N+BDSPAC ) THEN +* +* WORK( IU ) is M by N +* + LDWRKU = M + NWORK = IU + LDWRKU*N + CALL SLASET( 'F', M, N, ZERO, ZERO, WORK( IU ), + $ LDWRKU ) + ELSE +* +* WORK( IU ) is N by N +* + LDWRKU = N + NWORK = IU + LDWRKU*N +* +* WORK(IR) is LDWRKR by N +* + IR = NWORK + LDWRKR = ( LWORK-N*N-3*N ) / N + END IF + NWORK = IU + LDWRKU*N +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in WORK(IU) and computing right +* singular vectors of bidiagonal matrix in VT +* (Workspace: need N+N*N+BDSPAC) +* + CALL SBDSDC( 'U', 'I', N, S, WORK( IE ), WORK( IU ), + $ LDWRKU, VT, LDVT, DUM, IDUM, WORK( NWORK ), + $ IWORK, INFO ) +* +* Overwrite VT by right singular vectors of A +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL SORMBR( 'P', 'R', 'T', N, N, N, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* + IF( LWORK.GE.M*N+3*N+BDSPAC ) THEN +* +* Overwrite WORK(IU) by left singular vectors of A +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL SORMBR( 'Q', 'L', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), WORK( IU ), LDWRKU, + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Copy left singular vectors of A from WORK(IU) to A +* + CALL SLACPY( 'F', M, N, WORK( IU ), LDWRKU, A, LDA ) + ELSE +* +* Generate Q in A +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL SORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Multiply Q in A by left singular vectors of +* bidiagonal matrix in WORK(IU), storing result in +* WORK(IR) and copying to A +* (Workspace: need 2*N*N, prefer N*N+M*N) +* + DO 20 I = 1, M, LDWRKR + CHUNK = MIN( M-I+1, LDWRKR ) + CALL SGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ), + $ LDA, WORK( IU ), LDWRKU, ZERO, + $ WORK( IR ), LDWRKR ) + CALL SLACPY( 'F', CHUNK, N, WORK( IR ), LDWRKR, + $ A( I, 1 ), LDA ) + 20 CONTINUE + END IF +* + ELSE IF( WNTQS ) THEN +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in U and computing right singular +* vectors of bidiagonal matrix in VT +* (Workspace: need N+BDSPAC) +* + CALL SLASET( 'F', M, N, ZERO, ZERO, U, LDU ) + CALL SBDSDC( 'U', 'I', N, S, WORK( IE ), U, LDU, VT, + $ LDVT, DUM, IDUM, WORK( NWORK ), IWORK, + $ INFO ) +* +* Overwrite U by left singular vectors of A and VT +* by right singular vectors of A +* (Workspace: need 3*N, prefer 2*N+N*NB) +* + CALL SORMBR( 'Q', 'L', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + CALL SORMBR( 'P', 'R', 'T', N, N, N, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + ELSE IF( WNTQA ) THEN +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in U and computing right singular +* vectors of bidiagonal matrix in VT +* (Workspace: need N+BDSPAC) +* + CALL SLASET( 'F', M, M, ZERO, ZERO, U, LDU ) + CALL SBDSDC( 'U', 'I', N, S, WORK( IE ), U, LDU, VT, + $ LDVT, DUM, IDUM, WORK( NWORK ), IWORK, + $ INFO ) +* +* Set the right corner of U to identity matrix +* + CALL SLASET( 'F', M-N, M-N, ZERO, ONE, U( N+1, N+1 ), + $ LDU ) +* +* Overwrite U by left singular vectors of A and VT +* by right singular vectors of A +* (Workspace: need N*N+2*N+M, prefer N*N+2*N+M*NB) +* + CALL SORMBR( 'Q', 'L', 'N', M, M, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + CALL SORMBR( 'P', 'R', 'T', N, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + END IF +* + END IF +* + ELSE +* +* A has more columns than rows. If A has sufficiently more +* columns than rows, first reduce using the LQ decomposition (if +* sufficient workspace available) +* + IF( N.GE.MNTHR ) THEN +* + IF( WNTQN ) THEN +* +* Path 1t (N much larger than M, JOBZ='N') +* No singular vectors to be computed +* + ITAU = 1 + NWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Zero out above L +* + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA ) + IE = 1 + ITAUQ = IE + M + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize L in A +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL SGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) + NWORK = IE + M +* +* Perform bidiagonal SVD, computing singular values only +* (Workspace: need M+BDSPAC) +* + CALL SBDSDC( 'U', 'N', M, S, WORK( IE ), DUM, 1, DUM, 1, + $ DUM, IDUM, WORK( NWORK ), IWORK, INFO ) +* + ELSE IF( WNTQO ) THEN +* +* Path 2t (N much larger than M, JOBZ='O') +* M right singular vectors to be overwritten on A and +* M left singular vectors to be computed in U +* + IVT = 1 +* +* IVT is M by M +* + IL = IVT + M*M + IF( LWORK.GE.M*N+M*M+3*M+BDSPAC ) THEN +* +* WORK(IL) is M by N +* + LDWRKL = M + CHUNK = N + ELSE + LDWRKL = M + CHUNK = ( LWORK-M*M ) / M + END IF + ITAU = IL + LDWRKL*M + NWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy L to WORK(IL), zeroing about above it +* + CALL SLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWRKL ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IL+LDWRKL ), LDWRKL ) +* +* Generate Q in A +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IL) +* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* + CALL SGEBRD( M, M, WORK( IL ), LDWRKL, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in U, and computing right singular +* vectors of bidiagonal matrix in WORK(IVT) +* (Workspace: need M+M*M+BDSPAC) +* + CALL SBDSDC( 'U', 'I', M, S, WORK( IE ), U, LDU, + $ WORK( IVT ), M, DUM, IDUM, WORK( NWORK ), + $ IWORK, INFO ) +* +* Overwrite U by left singular vectors of L and WORK(IVT) +* by right singular vectors of L +* (Workspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB) +* + CALL SORMBR( 'Q', 'L', 'N', M, M, M, WORK( IL ), LDWRKL, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + CALL SORMBR( 'P', 'R', 'T', M, M, M, WORK( IL ), LDWRKL, + $ WORK( ITAUP ), WORK( IVT ), M, + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Multiply right singular vectors of L in WORK(IVT) by Q +* in A, storing result in WORK(IL) and copying to A +* (Workspace: need 2*M*M, prefer M*M+M*N) +* + DO 30 I = 1, N, CHUNK + BLK = MIN( N-I+1, CHUNK ) + CALL SGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IVT ), M, + $ A( 1, I ), LDA, ZERO, WORK( IL ), LDWRKL ) + CALL SLACPY( 'F', M, BLK, WORK( IL ), LDWRKL, + $ A( 1, I ), LDA ) + 30 CONTINUE +* + ELSE IF( WNTQS ) THEN +* +* Path 3t (N much larger than M, JOBZ='S') +* M right singular vectors to be computed in VT and +* M left singular vectors to be computed in U +* + IL = 1 +* +* WORK(IL) is M by M +* + LDWRKL = M + ITAU = IL + LDWRKL*M + NWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy L to WORK(IL), zeroing out above it +* + CALL SLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWRKL ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IL+LDWRKL ), LDWRKL ) +* +* Generate Q in A +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to U +* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* + CALL SGEBRD( M, M, WORK( IL ), LDWRKL, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in U and computing right singular +* vectors of bidiagonal matrix in VT +* (Workspace: need M+BDSPAC) +* + CALL SBDSDC( 'U', 'I', M, S, WORK( IE ), U, LDU, VT, + $ LDVT, DUM, IDUM, WORK( NWORK ), IWORK, + $ INFO ) +* +* Overwrite U by left singular vectors of L and VT +* by right singular vectors of L +* (Workspace: need M*M+3*M, prefer M*M+2*M+M*NB) +* + CALL SORMBR( 'Q', 'L', 'N', M, M, M, WORK( IL ), LDWRKL, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + CALL SORMBR( 'P', 'R', 'T', M, M, M, WORK( IL ), LDWRKL, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Multiply right singular vectors of L in WORK(IL) by +* Q in A, storing result in VT +* (Workspace: need M*M) +* + CALL SLACPY( 'F', M, M, VT, LDVT, WORK( IL ), LDWRKL ) + CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IL ), LDWRKL, + $ A, LDA, ZERO, VT, LDVT ) +* + ELSE IF( WNTQA ) THEN +* +* Path 4t (N much larger than M, JOBZ='A') +* N right singular vectors to be computed in VT and +* M left singular vectors to be computed in U +* + IVT = 1 +* +* WORK(IVT) is M by M +* + LDWKVT = M + ITAU = IVT + LDWKVT*M + NWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Produce L in A, zeroing out other entries +* + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize L in A +* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* + CALL SGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in U and computing right singular +* vectors of bidiagonal matrix in WORK(IVT) +* (Workspace: need M+M*M+BDSPAC) +* + CALL SBDSDC( 'U', 'I', M, S, WORK( IE ), U, LDU, + $ WORK( IVT ), LDWKVT, DUM, IDUM, + $ WORK( NWORK ), IWORK, INFO ) +* +* Overwrite U by left singular vectors of L and WORK(IVT) +* by right singular vectors of L +* (Workspace: need M*M+3*M, prefer M*M+2*M+M*NB) +* + CALL SORMBR( 'Q', 'L', 'N', M, M, M, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + CALL SORMBR( 'P', 'R', 'T', M, M, M, A, LDA, + $ WORK( ITAUP ), WORK( IVT ), LDWKVT, + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Multiply right singular vectors of L in WORK(IVT) by +* Q in VT, storing result in A +* (Workspace: need M*M) +* + CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IVT ), LDWKVT, + $ VT, LDVT, ZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL SLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* + END IF +* + ELSE +* +* N .LT. MNTHR +* +* Path 5t (N greater than M, but not much larger) +* Reduce to bidiagonal form without LQ decomposition +* + IE = 1 + ITAUQ = IE + M + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize A +* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) +* + CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) + IF( WNTQN ) THEN +* +* Perform bidiagonal SVD, only computing singular values +* (Workspace: need M+BDSPAC) +* + CALL SBDSDC( 'L', 'N', M, S, WORK( IE ), DUM, 1, DUM, 1, + $ DUM, IDUM, WORK( NWORK ), IWORK, INFO ) + ELSE IF( WNTQO ) THEN + LDWKVT = M + IVT = NWORK + IF( LWORK.GE.M*N+3*M+BDSPAC ) THEN +* +* WORK( IVT ) is M by N +* + CALL SLASET( 'F', M, N, ZERO, ZERO, WORK( IVT ), + $ LDWKVT ) + NWORK = IVT + LDWKVT*N + ELSE +* +* WORK( IVT ) is M by M +* + NWORK = IVT + LDWKVT*M + IL = NWORK +* +* WORK(IL) is M by CHUNK +* + CHUNK = ( LWORK-M*M-3*M ) / M + END IF +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in U and computing right singular +* vectors of bidiagonal matrix in WORK(IVT) +* (Workspace: need M*M+BDSPAC) +* + CALL SBDSDC( 'L', 'I', M, S, WORK( IE ), U, LDU, + $ WORK( IVT ), LDWKVT, DUM, IDUM, + $ WORK( NWORK ), IWORK, INFO ) +* +* Overwrite U by left singular vectors of A +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL SORMBR( 'Q', 'L', 'N', M, M, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* + IF( LWORK.GE.M*N+3*M+BDSPAC ) THEN +* +* Overwrite WORK(IVT) by left singular vectors of A +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL SORMBR( 'P', 'R', 'T', M, N, M, A, LDA, + $ WORK( ITAUP ), WORK( IVT ), LDWKVT, + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Copy right singular vectors of A from WORK(IVT) to A +* + CALL SLACPY( 'F', M, N, WORK( IVT ), LDWKVT, A, LDA ) + ELSE +* +* Generate P**T in A +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL SORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Multiply Q in A by right singular vectors of +* bidiagonal matrix in WORK(IVT), storing result in +* WORK(IL) and copying to A +* (Workspace: need 2*M*M, prefer M*M+M*N) +* + DO 40 I = 1, N, CHUNK + BLK = MIN( N-I+1, CHUNK ) + CALL SGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IVT ), + $ LDWKVT, A( 1, I ), LDA, ZERO, + $ WORK( IL ), M ) + CALL SLACPY( 'F', M, BLK, WORK( IL ), M, A( 1, I ), + $ LDA ) + 40 CONTINUE + END IF + ELSE IF( WNTQS ) THEN +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in U and computing right singular +* vectors of bidiagonal matrix in VT +* (Workspace: need M+BDSPAC) +* + CALL SLASET( 'F', M, N, ZERO, ZERO, VT, LDVT ) + CALL SBDSDC( 'L', 'I', M, S, WORK( IE ), U, LDU, VT, + $ LDVT, DUM, IDUM, WORK( NWORK ), IWORK, + $ INFO ) +* +* Overwrite U by left singular vectors of A and VT +* by right singular vectors of A +* (Workspace: need 3*M, prefer 2*M+M*NB) +* + CALL SORMBR( 'Q', 'L', 'N', M, M, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + CALL SORMBR( 'P', 'R', 'T', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + ELSE IF( WNTQA ) THEN +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in U and computing right singular +* vectors of bidiagonal matrix in VT +* (Workspace: need M+BDSPAC) +* + CALL SLASET( 'F', N, N, ZERO, ZERO, VT, LDVT ) + CALL SBDSDC( 'L', 'I', M, S, WORK( IE ), U, LDU, VT, + $ LDVT, DUM, IDUM, WORK( NWORK ), IWORK, + $ INFO ) +* +* Set the right corner of VT to identity matrix +* + CALL SLASET( 'F', N-M, N-M, ZERO, ONE, VT( M+1, M+1 ), + $ LDVT ) +* +* Overwrite U by left singular vectors of A and VT +* by right singular vectors of A +* (Workspace: need 2*M+N, prefer 2*M+N*NB) +* + CALL SORMBR( 'Q', 'L', 'N', M, M, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + CALL SORMBR( 'P', 'R', 'T', N, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + END IF +* + END IF +* + END IF +* +* Undo scaling if necessary +* + IF( ISCL.EQ.1 ) THEN + IF( ANRM.GT.BIGNUM ) + $ CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, + $ IERR ) + IF( ANRM.LT.SMLNUM ) + $ CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, + $ IERR ) + END IF +* +* Return optimal workspace in WORK(1) +* + WORK( 1 ) = REAL( MAXWRK ) +* + RETURN +* +* End of SGESDD +* + END diff --git a/costa/native/external/lapack/sgesv.f b/costa/native/external/lapack/sgesv.f new file mode 100644 index 000000000..7844aa38c --- /dev/null +++ b/costa/native/external/lapack/sgesv.f @@ -0,0 +1,108 @@ + SUBROUTINE SGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* SGESV computes the solution to a real system of linear equations +* A * X = B, +* where A is an N-by-N matrix and X and B are N-by-NRHS matrices. +* +* The LU decomposition with partial pivoting and row interchanges is +* used to factor A as +* A = P * L * U, +* where P is a permutation matrix, L is unit lower triangular, and U is +* upper triangular. The factored form of A is then used to solve the +* system of equations A * X = B. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the N-by-N coefficient matrix A. +* On exit, the factors L and U from the factorization +* A = P*L*U; the unit diagonal elements of L are not stored. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (output) INTEGER array, dimension (N) +* The pivot indices that define the permutation matrix P; +* row i of the matrix was interchanged with row IPIV(i). +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS matrix of right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, U(i,i) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, so the solution could not be computed. +* +* ===================================================================== +* +* .. External Subroutines .. + EXTERNAL SGETRF, SGETRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGESV ', -INFO ) + RETURN + END IF +* +* Compute the LU factorization of A. +* + CALL SGETRF( N, N, A, LDA, IPIV, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL SGETRS( 'No transpose', N, NRHS, A, LDA, IPIV, B, LDB, + $ INFO ) + END IF + RETURN +* +* End of SGESV +* + END diff --git a/costa/native/external/lapack/sgesvd.f b/costa/native/external/lapack/sgesvd.f new file mode 100644 index 000000000..77c69d659 --- /dev/null +++ b/costa/native/external/lapack/sgesvd.f @@ -0,0 +1,3417 @@ + SUBROUTINE SGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, + $ WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBU, JOBVT + INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), S( * ), U( LDU, * ), + $ VT( LDVT, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGESVD computes the singular value decomposition (SVD) of a real +* M-by-N matrix A, optionally computing the left and/or right singular +* vectors. The SVD is written +* +* A = U * SIGMA * transpose(V) +* +* where SIGMA is an M-by-N matrix which is zero except for its +* min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and +* V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA +* are the singular values of A; they are real and non-negative, and +* are returned in descending order. The first min(m,n) columns of +* U and V are the left and right singular vectors of A. +* +* Note that the routine returns V**T, not V. +* +* Arguments +* ========= +* +* JOBU (input) CHARACTER*1 +* Specifies options for computing all or part of the matrix U: +* = 'A': all M columns of U are returned in array U: +* = 'S': the first min(m,n) columns of U (the left singular +* vectors) are returned in the array U; +* = 'O': the first min(m,n) columns of U (the left singular +* vectors) are overwritten on the array A; +* = 'N': no columns of U (no left singular vectors) are +* computed. +* +* JOBVT (input) CHARACTER*1 +* Specifies options for computing all or part of the matrix +* V**T: +* = 'A': all N rows of V**T are returned in the array VT; +* = 'S': the first min(m,n) rows of V**T (the right singular +* vectors) are returned in the array VT; +* = 'O': the first min(m,n) rows of V**T (the right singular +* vectors) are overwritten on the array A; +* = 'N': no rows of V**T (no right singular vectors) are +* computed. +* +* JOBVT and JOBU cannot both be 'O'. +* +* M (input) INTEGER +* The number of rows of the input matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the input matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, +* if JOBU = 'O', A is overwritten with the first min(m,n) +* columns of U (the left singular vectors, +* stored columnwise); +* if JOBVT = 'O', A is overwritten with the first min(m,n) +* rows of V**T (the right singular vectors, +* stored rowwise); +* if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A +* are destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* S (output) REAL array, dimension (min(M,N)) +* The singular values of A, sorted so that S(i) >= S(i+1). +* +* U (output) REAL array, dimension (LDU,UCOL) +* (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'. +* If JOBU = 'A', U contains the M-by-M orthogonal matrix U; +* if JOBU = 'S', U contains the first min(m,n) columns of U +* (the left singular vectors, stored columnwise); +* if JOBU = 'N' or 'O', U is not referenced. +* +* LDU (input) INTEGER +* The leading dimension of the array U. LDU >= 1; if +* JOBU = 'S' or 'A', LDU >= M. +* +* VT (output) REAL array, dimension (LDVT,N) +* If JOBVT = 'A', VT contains the N-by-N orthogonal matrix +* V**T; +* if JOBVT = 'S', VT contains the first min(m,n) rows of +* V**T (the right singular vectors, stored rowwise); +* if JOBVT = 'N' or 'O', VT is not referenced. +* +* LDVT (input) INTEGER +* The leading dimension of the array VT. LDVT >= 1; if +* JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N). +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK; +* if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged +* superdiagonal elements of an upper bidiagonal matrix B +* whose diagonal is in S (not necessarily sorted). B +* satisfies A = U * B * VT, so it has the same singular values +* as A, and singular vectors related by U and VT. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= 1. +* LWORK >= MAX(3*MIN(M,N)+MAX(M,N),5*MIN(M,N)). +* For good performance, LWORK should generally be larger. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if SBDSQR did not converge, INFO specifies how many +* superdiagonals of an intermediate bidiagonal form B +* did not converge to zero. See the description of WORK +* above for details. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS, + $ WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS + INTEGER BDSPAC, BLK, CHUNK, I, IE, IERR, IR, ISCL, + $ ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU, + $ MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU, + $ NRVT, WRKBL + REAL ANRM, BIGNUM, EPS, SMLNUM +* .. +* .. Local Arrays .. + REAL DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL SBDSQR, SGEBRD, SGELQF, SGEMM, SGEQRF, SLACPY, + $ SLASCL, SLASET, SORGBR, SORGLQ, SORGQR, SORMBR, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANGE + EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + MINMN = MIN( M, N ) + MNTHR = ILAENV( 6, 'SGESVD', JOBU // JOBVT, M, N, 0, 0 ) + WNTUA = LSAME( JOBU, 'A' ) + WNTUS = LSAME( JOBU, 'S' ) + WNTUAS = WNTUA .OR. WNTUS + WNTUO = LSAME( JOBU, 'O' ) + WNTUN = LSAME( JOBU, 'N' ) + WNTVA = LSAME( JOBVT, 'A' ) + WNTVS = LSAME( JOBVT, 'S' ) + WNTVAS = WNTVA .OR. WNTVS + WNTVO = LSAME( JOBVT, 'O' ) + WNTVN = LSAME( JOBVT, 'N' ) + MINWRK = 1 + LQUERY = ( LWORK.EQ.-1 ) +* + IF( .NOT.( WNTUA .OR. WNTUS .OR. WNTUO .OR. WNTUN ) ) THEN + INFO = -1 + ELSE IF( .NOT.( WNTVA .OR. WNTVS .OR. WNTVO .OR. WNTVN ) .OR. + $ ( WNTVO .AND. WNTUO ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDU.LT.1 .OR. ( WNTUAS .AND. LDU.LT.M ) ) THEN + INFO = -9 + ELSE IF( LDVT.LT.1 .OR. ( WNTVA .AND. LDVT.LT.N ) .OR. + $ ( WNTVS .AND. LDVT.LT.MINMN ) ) THEN + INFO = -11 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) .AND. M.GT.0 .AND. + $ N.GT.0 ) THEN + IF( M.GE.N ) THEN +* +* Compute space needed for SBDSQR +* + BDSPAC = 5*N + IF( M.GE.MNTHR ) THEN + IF( WNTUN ) THEN +* +* Path 1 (M much larger than N, JOBU='N') +* + MAXWRK = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, + $ -1 ) + MAXWRK = MAX( MAXWRK, 3*N+2*N* + $ ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) ) + IF( WNTVO .OR. WNTVAS ) + $ MAXWRK = MAX( MAXWRK, 3*N+( N-1 )* + $ ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) ) + MAXWRK = MAX( MAXWRK, BDSPAC ) + MINWRK = MAX( 4*N, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + ELSE IF( WNTUO .AND. WNTVN ) THEN +* +* Path 2 (M much larger than N, JOBU='O', JOBVT='N') +* + WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'SORGQR', ' ', M, + $ N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+2*N* + $ ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N ) + MINWRK = MAX( 3*N+M, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + ELSE IF( WNTUO .AND. WNTVAS ) THEN +* +* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or +* 'A') +* + WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'SORGQR', ' ', M, + $ N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+2*N* + $ ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+( N-1 )* + $ ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N ) + MINWRK = MAX( 3*N+M, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + ELSE IF( WNTUS .AND. WNTVN ) THEN +* +* Path 4 (M much larger than N, JOBU='S', JOBVT='N') +* + WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'SORGQR', ' ', M, + $ N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+2*N* + $ ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = N*N + WRKBL + MINWRK = MAX( 3*N+M, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + ELSE IF( WNTUS .AND. WNTVO ) THEN +* +* Path 5 (M much larger than N, JOBU='S', JOBVT='O') +* + WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'SORGQR', ' ', M, + $ N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+2*N* + $ ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+( N-1 )* + $ ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = 2*N*N + WRKBL + MINWRK = MAX( 3*N+M, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + ELSE IF( WNTUS .AND. WNTVAS ) THEN +* +* Path 6 (M much larger than N, JOBU='S', JOBVT='S' or +* 'A') +* + WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'SORGQR', ' ', M, + $ N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+2*N* + $ ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+( N-1 )* + $ ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = N*N + WRKBL + MINWRK = MAX( 3*N+M, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + ELSE IF( WNTUA .AND. WNTVN ) THEN +* +* Path 7 (M much larger than N, JOBU='A', JOBVT='N') +* + WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'SORGQR', ' ', M, + $ M, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+2*N* + $ ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = N*N + WRKBL + MINWRK = MAX( 3*N+M, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + ELSE IF( WNTUA .AND. WNTVO ) THEN +* +* Path 8 (M much larger than N, JOBU='A', JOBVT='O') +* + WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'SORGQR', ' ', M, + $ M, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+2*N* + $ ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+( N-1 )* + $ ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = 2*N*N + WRKBL + MINWRK = MAX( 3*N+M, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + ELSE IF( WNTUA .AND. WNTVAS ) THEN +* +* Path 9 (M much larger than N, JOBU='A', JOBVT='S' or +* 'A') +* + WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'SORGQR', ' ', M, + $ M, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+2*N* + $ ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+N* + $ ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 3*N+( N-1 )* + $ ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = N*N + WRKBL + MINWRK = MAX( 3*N+M, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + END IF + ELSE +* +* Path 10 (M at least N, but not much larger) +* + MAXWRK = 3*N + ( M+N )*ILAENV( 1, 'SGEBRD', ' ', M, N, + $ -1, -1 ) + IF( WNTUS .OR. WNTUO ) + $ MAXWRK = MAX( MAXWRK, 3*N+N* + $ ILAENV( 1, 'SORGBR', 'Q', M, N, N, -1 ) ) + IF( WNTUA ) + $ MAXWRK = MAX( MAXWRK, 3*N+M* + $ ILAENV( 1, 'SORGBR', 'Q', M, M, N, -1 ) ) + IF( .NOT.WNTVN ) + $ MAXWRK = MAX( MAXWRK, 3*N+( N-1 )* + $ ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) ) + MAXWRK = MAX( MAXWRK, BDSPAC ) + MINWRK = MAX( 3*N+M, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + END IF + ELSE +* +* Compute space needed for SBDSQR +* + BDSPAC = 5*M + IF( N.GE.MNTHR ) THEN + IF( WNTVN ) THEN +* +* Path 1t(N much larger than M, JOBVT='N') +* + MAXWRK = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, + $ -1 ) + MAXWRK = MAX( MAXWRK, 3*M+2*M* + $ ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) ) + IF( WNTUO .OR. WNTUAS ) + $ MAXWRK = MAX( MAXWRK, 3*M+M* + $ ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) ) + MAXWRK = MAX( MAXWRK, BDSPAC ) + MINWRK = MAX( 4*M, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + ELSE IF( WNTVO .AND. WNTUN ) THEN +* +* Path 2t(N much larger than M, JOBU='N', JOBVT='O') +* + WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'SORGLQ', ' ', M, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+2*M* + $ ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+( M-1 )* + $ ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M ) + MINWRK = MAX( 3*M+N, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + ELSE IF( WNTVO .AND. WNTUAS ) THEN +* +* Path 3t(N much larger than M, JOBU='S' or 'A', +* JOBVT='O') +* + WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'SORGLQ', ' ', M, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+2*M* + $ ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+( M-1 )* + $ ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+M* + $ ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M ) + MINWRK = MAX( 3*M+N, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + ELSE IF( WNTVS .AND. WNTUN ) THEN +* +* Path 4t(N much larger than M, JOBU='N', JOBVT='S') +* + WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'SORGLQ', ' ', M, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+2*M* + $ ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+( M-1 )* + $ ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = M*M + WRKBL + MINWRK = MAX( 3*M+N, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + ELSE IF( WNTVS .AND. WNTUO ) THEN +* +* Path 5t(N much larger than M, JOBU='O', JOBVT='S') +* + WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'SORGLQ', ' ', M, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+2*M* + $ ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+( M-1 )* + $ ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+M* + $ ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = 2*M*M + WRKBL + MINWRK = MAX( 3*M+N, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + ELSE IF( WNTVS .AND. WNTUAS ) THEN +* +* Path 6t(N much larger than M, JOBU='S' or 'A', +* JOBVT='S') +* + WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'SORGLQ', ' ', M, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+2*M* + $ ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+( M-1 )* + $ ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+M* + $ ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = M*M + WRKBL + MINWRK = MAX( 3*M+N, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + ELSE IF( WNTVA .AND. WNTUN ) THEN +* +* Path 7t(N much larger than M, JOBU='N', JOBVT='A') +* + WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'SORGLQ', ' ', N, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+2*M* + $ ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+( M-1 )* + $ ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = M*M + WRKBL + MINWRK = MAX( 3*M+N, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + ELSE IF( WNTVA .AND. WNTUO ) THEN +* +* Path 8t(N much larger than M, JOBU='O', JOBVT='A') +* + WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'SORGLQ', ' ', N, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+2*M* + $ ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+( M-1 )* + $ ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+M* + $ ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = 2*M*M + WRKBL + MINWRK = MAX( 3*M+N, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + ELSE IF( WNTVA .AND. WNTUAS ) THEN +* +* Path 9t(N much larger than M, JOBU='S' or 'A', +* JOBVT='A') +* + WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'SORGLQ', ' ', N, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+2*M* + $ ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+( M-1 )* + $ ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 3*M+M* + $ ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = M*M + WRKBL + MINWRK = MAX( 3*M+N, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + END IF + ELSE +* +* Path 10t(N greater than M, but not much larger) +* + MAXWRK = 3*M + ( M+N )*ILAENV( 1, 'SGEBRD', ' ', M, N, + $ -1, -1 ) + IF( WNTVS .OR. WNTVO ) + $ MAXWRK = MAX( MAXWRK, 3*M+M* + $ ILAENV( 1, 'SORGBR', 'P', M, N, M, -1 ) ) + IF( WNTVA ) + $ MAXWRK = MAX( MAXWRK, 3*M+N* + $ ILAENV( 1, 'SORGBR', 'P', N, N, M, -1 ) ) + IF( .NOT.WNTUN ) + $ MAXWRK = MAX( MAXWRK, 3*M+( M-1 )* + $ ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) ) + MAXWRK = MAX( MAXWRK, BDSPAC ) + MINWRK = MAX( 3*M+N, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + END IF + END IF + WORK( 1 ) = MAXWRK + END IF +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGESVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + IF( LWORK.GE.1 ) + $ WORK( 1 ) = ONE + RETURN + END IF +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SQRT( SLAMCH( 'S' ) ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = SLANGE( 'M', M, N, A, LDA, DUM ) + ISCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ISCL = 1 + CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR ) + ELSE IF( ANRM.GT.BIGNUM ) THEN + ISCL = 1 + CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR ) + END IF +* + IF( M.GE.N ) THEN +* +* A has at least as many rows as columns. If A has sufficiently +* more rows than columns, first reduce using the QR +* decomposition (if sufficient workspace available) +* + IF( M.GE.MNTHR ) THEN +* + IF( WNTUN ) THEN +* +* Path 1 (M much larger than N, JOBU='N') +* No left singular vectors to be computed +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Zero out below R +* + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA ) + IE = 1 + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in A +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL SGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + NCVT = 0 + IF( WNTVO .OR. WNTVAS ) THEN +* +* If right singular vectors desired, generate P'. +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + NCVT = N + END IF + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in A if desired +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', N, NCVT, 0, 0, S, WORK( IE ), A, LDA, + $ DUM, 1, DUM, 1, WORK( IWORK ), INFO ) +* +* If right singular vectors desired in VT, copy them there +* + IF( WNTVAS ) + $ CALL SLACPY( 'F', N, N, A, LDA, VT, LDVT ) +* + ELSE IF( WNTUO .AND. WNTVN ) THEN +* +* Path 2 (M much larger than N, JOBU='O', JOBVT='N') +* N left singular vectors to be overwritten on A and +* no right singular vectors to be computed +* + IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN +* +* WORK(IU) is LDA by N, WORK(IR) is LDA by N +* + LDWRKU = LDA + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN +* +* WORK(IU) is LDA by N, WORK(IR) is N by N +* + LDWRKU = LDA + LDWRKR = N + ELSE +* +* WORK(IU) is LDWRKU by N, WORK(IR) is N by N +* + LDWRKU = ( LWORK-N*N-N ) / N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IR) and zero out below it +* + CALL SLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR ) + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IR+1 ), + $ LDWRKR ) +* +* Generate Q in A +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +* + CALL SGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing R +* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) +* + CALL SORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) +* (Workspace: need N*N+BDSPAC) +* + CALL SBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, 1, + $ WORK( IR ), LDWRKR, DUM, 1, + $ WORK( IWORK ), INFO ) + IU = IE + N +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in WORK(IU) and copying to A +* (Workspace: need N*N+2*N, prefer N*N+M*N+N) +* + DO 10 I = 1, M, LDWRKU + CHUNK = MIN( M-I+1, LDWRKU ) + CALL SGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ), + $ LDA, WORK( IR ), LDWRKR, ZERO, + $ WORK( IU ), LDWRKU ) + CALL SLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU, + $ A( I, 1 ), LDA ) + 10 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + IE = 1 + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize A +* (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) +* + CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing A +* (Workspace: need 4*N, prefer 3*N+N*NB) +* + CALL SORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in A +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, 1, + $ A, LDA, DUM, 1, WORK( IWORK ), INFO ) +* + END IF +* + ELSE IF( WNTUO .AND. WNTVAS ) THEN +* +* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A') +* N left singular vectors to be overwritten on A and +* N right singular vectors to be computed in VT +* + IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by N +* + LDWRKU = LDA + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is N by N +* + LDWRKU = LDA + LDWRKR = N + ELSE +* +* WORK(IU) is LDWRKU by N and WORK(IR) is N by N +* + LDWRKU = ( LWORK-N*N-N ) / N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to VT, zeroing out below it +* + CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT ) + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, VT( 2, 1 ), + $ LDVT ) +* +* Generate Q in A +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT, copying result to WORK(IR) +* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +* + CALL SGEBRD( N, N, VT, LDVT, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR ) +* +* Generate left vectors bidiagonalizing R in WORK(IR) +* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) +* + CALL SORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing R in VT +* (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB) +* + CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) and computing right +* singular vectors of R in VT +* (Workspace: need N*N+BDSPAC) +* + CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, LDVT, + $ WORK( IR ), LDWRKR, DUM, 1, + $ WORK( IWORK ), INFO ) + IU = IE + N +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in WORK(IU) and copying to A +* (Workspace: need N*N+2*N, prefer N*N+M*N+N) +* + DO 20 I = 1, M, LDWRKU + CHUNK = MIN( M-I+1, LDWRKU ) + CALL SGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ), + $ LDA, WORK( IR ), LDWRKR, ZERO, + $ WORK( IU ), LDWRKU ) + CALL SLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU, + $ A( I, 1 ), LDA ) + 20 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to VT, zeroing out below it +* + CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT ) + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, VT( 2, 1 ), + $ LDVT ) +* +* Generate Q in A +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL SGEBRD( N, N, VT, LDVT, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in A by left vectors bidiagonalizing R +* (Workspace: need 3*N+M, prefer 3*N+M*NB) +* + CALL SORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT, + $ WORK( ITAUQ ), A, LDA, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing R in VT +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in A and computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, LDVT, + $ A, LDA, DUM, 1, WORK( IWORK ), INFO ) +* + END IF +* + ELSE IF( WNTUS ) THEN +* + IF( WNTVN ) THEN +* +* Path 4 (M much larger than N, JOBU='S', JOBVT='N') +* N left singular vectors to be computed in U and +* no right singular vectors to be computed +* + IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IR) is LDA by N +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is N by N +* + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IR), zeroing out below it +* + CALL SLACPY( 'U', N, N, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, + $ WORK( IR+1 ), LDWRKR ) +* +* Generate Q in A +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +* + CALL SGEBRD( N, N, WORK( IR ), LDWRKR, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing R in WORK(IR) +* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) +* + CALL SORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) +* (Workspace: need N*N+BDSPAC) +* + CALL SBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, + $ 1, WORK( IR ), LDWRKR, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in U +* (Workspace: need N*N) +* + CALL SGEMM( 'N', 'N', M, N, N, ONE, A, LDA, + $ WORK( IR ), LDWRKR, ZERO, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SORGQR( M, N, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), + $ LDA ) +* +* Bidiagonalize R in A +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL SGEBRD( N, N, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left vectors bidiagonalizing R +* (Workspace: need 3*N+M, prefer 3*N+M*NB) +* + CALL SORMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, + $ 1, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVO ) THEN +* +* Path 5 (M much larger than N, JOBU='S', JOBVT='O') +* N left singular vectors to be computed in U and +* N right singular vectors to be overwritten on A +* + IF( LWORK.GE.2*N*N+MAX( 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is N by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = N + ELSE +* +* WORK(IU) is N by N and WORK(IR) is N by N +* + LDWRKU = N + IR = IU + LDWRKU*N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL SLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, + $ WORK( IU+1 ), LDWRKU ) +* +* Generate Q in A +* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) +* + CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to +* WORK(IR) +* (Workspace: need 2*N*N+4*N, +* prefer 2*N*N+3*N+2*N*NB) +* + CALL SGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL SLACPY( 'U', N, N, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB) +* + CALL SORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in WORK(IR) +* (Workspace: need 2*N*N+4*N-1, +* prefer 2*N*N+3*N+(N-1)*NB) +* + CALL SORGBR( 'P', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in WORK(IR) +* (Workspace: need 2*N*N+BDSPAC) +* + CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ), + $ WORK( IR ), LDWRKR, WORK( IU ), + $ LDWRKU, DUM, 1, WORK( IWORK ), INFO ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IU), storing result in U +* (Workspace: need N*N) +* + CALL SGEMM( 'N', 'N', M, N, N, ONE, A, LDA, + $ WORK( IU ), LDWRKU, ZERO, U, LDU ) +* +* Copy right singular vectors of R to A +* (Workspace: need N*N) +* + CALL SLACPY( 'F', N, N, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SORGQR( M, N, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), + $ LDA ) +* +* Bidiagonalize R in A +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL SGEBRD( N, N, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left vectors bidiagonalizing R +* (Workspace: need 3*N+M, prefer 3*N+M*NB) +* + CALL SORMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing R in A +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in A +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A, + $ LDA, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVAS ) THEN +* +* Path 6 (M much larger than N, JOBU='S', JOBVT='S' +* or 'A') +* N left singular vectors to be computed in U and +* N right singular vectors to be computed in VT +* + IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IU) is LDA by N +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is N by N +* + LDWRKU = N + END IF + ITAU = IU + LDWRKU*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL SLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, + $ WORK( IU+1 ), LDWRKU ) +* +* Generate Q in A +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to VT +* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +* + CALL SGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL SLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT, + $ LDVT ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) +* + CALL SORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (Workspace: need N*N+4*N-1, +* prefer N*N+3*N+(N-1)*NB) +* + CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in VT +* (Workspace: need N*N+BDSPAC) +* + CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, + $ LDVT, WORK( IU ), LDWRKU, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IU), storing result in U +* (Workspace: need N*N) +* + CALL SGEMM( 'N', 'N', M, N, N, ONE, A, LDA, + $ WORK( IU ), LDWRKU, ZERO, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SORGQR( M, N, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to VT, zeroing out below it +* + CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT ) + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, VT( 2, 1 ), + $ LDVT ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL SGEBRD( N, N, VT, LDVT, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in VT +* (Workspace: need 3*N+M, prefer 3*N+M*NB) +* + CALL SORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, + $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + END IF +* + ELSE IF( WNTUA ) THEN +* + IF( WNTVN ) THEN +* +* Path 7 (M much larger than N, JOBU='A', JOBVT='N') +* M left singular vectors to be computed in U and +* no right singular vectors to be computed +* + IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IR) is LDA by N +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is N by N +* + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Copy R to WORK(IR), zeroing out below it +* + CALL SLACPY( 'U', N, N, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, + $ WORK( IR+1 ), LDWRKR ) +* +* Generate Q in U +* (Workspace: need N*N+N+M, prefer N*N+N+M*NB) +* + CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +* + CALL SGEBRD( N, N, WORK( IR ), LDWRKR, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in WORK(IR) +* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) +* + CALL SORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) +* (Workspace: need N*N+BDSPAC) +* + CALL SBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, + $ 1, WORK( IR ), LDWRKR, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IR), storing result in A +* (Workspace: need N*N) +* + CALL SGEMM( 'N', 'N', M, N, N, ONE, U, LDU, + $ WORK( IR ), LDWRKR, ZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL SLACPY( 'F', M, N, A, LDA, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need N+M, prefer N+M*NB) +* + CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), + $ LDA ) +* +* Bidiagonalize R in A +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL SGEBRD( N, N, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in A +* (Workspace: need 3*N+M, prefer 3*N+M*NB) +* + CALL SORMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, + $ 1, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVO ) THEN +* +* Path 8 (M much larger than N, JOBU='A', JOBVT='O') +* M left singular vectors to be computed in U and +* N right singular vectors to be overwritten on A +* + IF( LWORK.GE.2*N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is N by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = N + ELSE +* +* WORK(IU) is N by N and WORK(IR) is N by N +* + LDWRKU = N + IR = IU + LDWRKU*N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB) +* + CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL SLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, + $ WORK( IU+1 ), LDWRKU ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to +* WORK(IR) +* (Workspace: need 2*N*N+4*N, +* prefer 2*N*N+3*N+2*N*NB) +* + CALL SGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL SLACPY( 'U', N, N, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB) +* + CALL SORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in WORK(IR) +* (Workspace: need 2*N*N+4*N-1, +* prefer 2*N*N+3*N+(N-1)*NB) +* + CALL SORGBR( 'P', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in WORK(IR) +* (Workspace: need 2*N*N+BDSPAC) +* + CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ), + $ WORK( IR ), LDWRKR, WORK( IU ), + $ LDWRKU, DUM, 1, WORK( IWORK ), INFO ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IU), storing result in A +* (Workspace: need N*N) +* + CALL SGEMM( 'N', 'N', M, N, N, ONE, U, LDU, + $ WORK( IU ), LDWRKU, ZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL SLACPY( 'F', M, N, A, LDA, U, LDU ) +* +* Copy right singular vectors of R from WORK(IR) to A +* + CALL SLACPY( 'F', N, N, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need N+M, prefer N+M*NB) +* + CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), + $ LDA ) +* +* Bidiagonalize R in A +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL SGEBRD( N, N, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in A +* (Workspace: need 3*N+M, prefer 3*N+M*NB) +* + CALL SORMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in A +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in A +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A, + $ LDA, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVAS ) THEN +* +* Path 9 (M much larger than N, JOBU='A', JOBVT='S' +* or 'A') +* M left singular vectors to be computed in U and +* N right singular vectors to be computed in VT +* + IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IU) is LDA by N +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is N by N +* + LDWRKU = N + END IF + ITAU = IU + LDWRKU*N + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need N*N+N+M, prefer N*N+N+M*NB) +* + CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL SLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, + $ WORK( IU+1 ), LDWRKU ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to VT +* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +* + CALL SGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL SLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT, + $ LDVT ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) +* + CALL SORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (Workspace: need N*N+4*N-1, +* prefer N*N+3*N+(N-1)*NB) +* + CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in VT +* (Workspace: need N*N+BDSPAC) +* + CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, + $ LDVT, WORK( IU ), LDWRKU, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IU), storing result in A +* (Workspace: need N*N) +* + CALL SGEMM( 'N', 'N', M, N, N, ONE, U, LDU, + $ WORK( IU ), LDWRKU, ZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL SLACPY( 'F', M, N, A, LDA, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need N+M, prefer N+M*NB) +* + CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R from A to VT, zeroing out below it +* + CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT ) + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, VT( 2, 1 ), + $ LDVT ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL SGEBRD( N, N, VT, LDVT, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in VT +* (Workspace: need 3*N+M, prefer 3*N+M*NB) +* + CALL SORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, + $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + END IF +* + END IF +* + ELSE +* +* M .LT. MNTHR +* +* Path 10 (M at least N, but not much larger) +* Reduce to bidiagonal form without QR decomposition +* + IE = 1 + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize A +* (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) +* + CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + IF( WNTUAS ) THEN +* +* If left singular vectors desired in U, copy result to U +* and generate left bidiagonalizing vectors in U +* (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB) +* + CALL SLACPY( 'L', M, N, A, LDA, U, LDU ) + IF( WNTUS ) + $ NCU = N + IF( WNTUA ) + $ NCU = M + CALL SORGBR( 'Q', M, NCU, N, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVAS ) THEN +* +* If right singular vectors desired in VT, copy result to +* VT and generate right bidiagonalizing vectors in VT +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT ) + CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTUO ) THEN +* +* If left singular vectors desired in A, generate left +* bidiagonalizing vectors in A +* (Workspace: need 4*N, prefer 3*N+N*NB) +* + CALL SORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVO ) THEN +* +* If right singular vectors desired in A, generate right +* bidiagonalizing vectors in A +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IWORK = IE + N + IF( WNTUAS .OR. WNTUO ) + $ NRU = M + IF( WNTUN ) + $ NRU = 0 + IF( WNTVAS .OR. WNTVO ) + $ NCVT = N + IF( WNTVN ) + $ NCVT = 0 + IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in VT +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT, + $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO ) + ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in A +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), A, LDA, + $ U, LDU, DUM, 1, WORK( IWORK ), INFO ) + ELSE +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in A and computing right singular +* vectors in VT +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT, + $ LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO ) + END IF +* + END IF +* + ELSE +* +* A has more columns than rows. If A has sufficiently more +* columns than rows, first reduce using the LQ decomposition (if +* sufficient workspace available) +* + IF( N.GE.MNTHR ) THEN +* + IF( WNTVN ) THEN +* +* Path 1t(N much larger than M, JOBVT='N') +* No right singular vectors to be computed +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Zero out above L +* + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA ) + IE = 1 + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in A +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL SGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + IF( WNTUO .OR. WNTUAS ) THEN +* +* If left singular vectors desired, generate Q +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL SORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IWORK = IE + M + NRU = 0 + IF( WNTUO .OR. WNTUAS ) + $ NRU = M +* +* Perform bidiagonal QR iteration, computing left singular +* vectors of A in A if desired +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', M, 0, NRU, 0, S, WORK( IE ), DUM, 1, A, + $ LDA, DUM, 1, WORK( IWORK ), INFO ) +* +* If left singular vectors desired in U, copy them there +* + IF( WNTUAS ) + $ CALL SLACPY( 'F', M, M, A, LDA, U, LDU ) +* + ELSE IF( WNTVO .AND. WNTUN ) THEN +* +* Path 2t(N much larger than M, JOBU='N', JOBVT='O') +* M right singular vectors to be overwritten on A and +* no left singular vectors to be computed +* + IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is M by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = M + ELSE +* +* WORK(IU) is M by CHUNK and WORK(IR) is M by M +* + LDWRKU = M + CHUNK = ( LWORK-M*M-M ) / M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IR) and zero out above it +* + CALL SLACPY( 'L', M, M, A, LDA, WORK( IR ), LDWRKR ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IR+LDWRKR ), LDWRKR ) +* +* Generate Q in A +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IR) +* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* + CALL SGEBRD( M, M, WORK( IR ), LDWRKR, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing L +* (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB) +* + CALL SORGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of L in WORK(IR) +* (Workspace: need M*M+BDSPAC) +* + CALL SBDSQR( 'U', M, M, 0, 0, S, WORK( IE ), + $ WORK( IR ), LDWRKR, DUM, 1, DUM, 1, + $ WORK( IWORK ), INFO ) + IU = IE + M +* +* Multiply right singular vectors of L in WORK(IR) by Q +* in A, storing result in WORK(IU) and copying to A +* (Workspace: need M*M+2*M, prefer M*M+M*N+M) +* + DO 30 I = 1, N, CHUNK + BLK = MIN( N-I+1, CHUNK ) + CALL SGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ), + $ LDWRKR, A( 1, I ), LDA, ZERO, + $ WORK( IU ), LDWRKU ) + CALL SLACPY( 'F', M, BLK, WORK( IU ), LDWRKU, + $ A( 1, I ), LDA ) + 30 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + IE = 1 + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize A +* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) +* + CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing A +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL SORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in A +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'L', M, N, 0, 0, S, WORK( IE ), A, LDA, + $ DUM, 1, DUM, 1, WORK( IWORK ), INFO ) +* + END IF +* + ELSE IF( WNTVO .AND. WNTUAS ) THEN +* +* Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O') +* M right singular vectors to be overwritten on A and +* M left singular vectors to be computed in U +* + IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is M by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = M + ELSE +* +* WORK(IU) is M by CHUNK and WORK(IR) is M by M +* + LDWRKU = M + CHUNK = ( LWORK-M*M-M ) / M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing about above it +* + CALL SLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ), + $ LDU ) +* +* Generate Q in A +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U, copying result to WORK(IR) +* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* + CALL SGEBRD( M, M, U, LDU, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR ) +* +* Generate right vectors bidiagonalizing L in WORK(IR) +* (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB) +* + CALL SORGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing L in U +* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) +* + CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in U, and computing right +* singular vectors of L in WORK(IR) +* (Workspace: need M*M+BDSPAC) +* + CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ), + $ WORK( IR ), LDWRKR, U, LDU, DUM, 1, + $ WORK( IWORK ), INFO ) + IU = IE + M +* +* Multiply right singular vectors of L in WORK(IR) by Q +* in A, storing result in WORK(IU) and copying to A +* (Workspace: need M*M+2*M, prefer M*M+M*N+M)) +* + DO 40 I = 1, N, CHUNK + BLK = MIN( N-I+1, CHUNK ) + CALL SGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ), + $ LDWRKR, A( 1, I ), LDA, ZERO, + $ WORK( IU ), LDWRKU ) + CALL SLACPY( 'F', M, BLK, WORK( IU ), LDWRKU, + $ A( 1, I ), LDA ) + 40 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing out above it +* + CALL SLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ), + $ LDU ) +* +* Generate Q in A +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL SGEBRD( M, M, U, LDU, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right vectors bidiagonalizing L by Q in A +* (Workspace: need 3*M+N, prefer 3*M+N*NB) +* + CALL SORMBR( 'P', 'L', 'T', M, N, M, U, LDU, + $ WORK( ITAUP ), A, LDA, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing L in U +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in A +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), A, LDA, + $ U, LDU, DUM, 1, WORK( IWORK ), INFO ) +* + END IF +* + ELSE IF( WNTVS ) THEN +* + IF( WNTUN ) THEN +* +* Path 4t(N much larger than M, JOBU='N', JOBVT='S') +* M right singular vectors to be computed in VT and +* no left singular vectors to be computed +* + IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IR) is LDA by M +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is M by M +* + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IR), zeroing out above it +* + CALL SLACPY( 'L', M, M, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IR+LDWRKR ), LDWRKR ) +* +* Generate Q in A +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IR) +* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* + CALL SGEBRD( M, M, WORK( IR ), LDWRKR, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing L in +* WORK(IR) +* (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB) +* + CALL SORGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of L in WORK(IR) +* (Workspace: need M*M+BDSPAC) +* + CALL SBDSQR( 'U', M, M, 0, 0, S, WORK( IE ), + $ WORK( IR ), LDWRKR, DUM, 1, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IR) by +* Q in A, storing result in VT +* (Workspace: need M*M) +* + CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ), + $ LDWRKR, A, LDA, ZERO, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy result to VT +* + CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SORGLQ( M, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), + $ LDA ) +* +* Bidiagonalize L in A +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL SGEBRD( M, M, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right vectors bidiagonalizing L by Q in VT +* (Workspace: need 3*M+N, prefer 3*M+N*NB) +* + CALL SORMBR( 'P', 'L', 'T', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT, + $ LDVT, DUM, 1, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTUO ) THEN +* +* Path 5t(N much larger than M, JOBU='O', JOBVT='S') +* M right singular vectors to be computed in VT and +* M left singular vectors to be overwritten on A +* + IF( LWORK.GE.2*M*M+MAX( 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is LDA by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is M by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = M + ELSE +* +* WORK(IU) is M by M and WORK(IR) is M by M +* + LDWRKU = M + IR = IU + LDWRKU*M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out below it +* + CALL SLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) +* +* Generate Q in A +* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) +* + CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to +* WORK(IR) +* (Workspace: need 2*M*M+4*M, +* prefer 2*M*M+3*M+2*M*NB) +* + CALL SGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', M, M, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (Workspace: need 2*M*M+4*M-1, +* prefer 2*M*M+3*M+(M-1)*NB) +* + CALL SORGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in WORK(IR) +* (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB) +* + CALL SORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in WORK(IR) and computing +* right singular vectors of L in WORK(IU) +* (Workspace: need 2*M*M+BDSPAC) +* + CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ), + $ WORK( IU ), LDWRKU, WORK( IR ), + $ LDWRKR, DUM, 1, WORK( IWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in A, storing result in VT +* (Workspace: need M*M) +* + CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ), + $ LDWRKU, A, LDA, ZERO, VT, LDVT ) +* +* Copy left singular vectors of L to A +* (Workspace: need M*M) +* + CALL SLACPY( 'F', M, M, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SORGLQ( M, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), + $ LDA ) +* +* Bidiagonalize L in A +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL SGEBRD( M, M, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right vectors bidiagonalizing L by Q in VT +* (Workspace: need 3*M+N, prefer 3*M+N*NB) +* + CALL SORMBR( 'P', 'L', 'T', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors of L in A +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL SORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, compute left +* singular vectors of A in A and compute right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT, + $ LDVT, A, LDA, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTUAS ) THEN +* +* Path 6t(N much larger than M, JOBU='S' or 'A', +* JOBVT='S') +* M right singular vectors to be computed in VT and +* M left singular vectors to be computed in U +* + IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IU) is LDA by N +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is LDA by M +* + LDWRKU = M + END IF + ITAU = IU + LDWRKU*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out above it +* + CALL SLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) +* +* Generate Q in A +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to U +* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* + CALL SGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', M, M, WORK( IU ), LDWRKU, U, + $ LDU ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (Workspace: need M*M+4*M-1, +* prefer M*M+3*M+(M-1)*NB) +* + CALL SORGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) +* + CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in U and computing right +* singular vectors of L in WORK(IU) +* (Workspace: need M*M+BDSPAC) +* + CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ), + $ WORK( IU ), LDWRKU, U, LDU, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in A, storing result in VT +* (Workspace: need M*M) +* + CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ), + $ LDWRKU, A, LDA, ZERO, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SORGLQ( M, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing out above it +* + CALL SLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ), + $ LDU ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL SGEBRD( M, M, U, LDU, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in U by Q +* in VT +* (Workspace: need 3*M+N, prefer 3*M+N*NB) +* + CALL SORMBR( 'P', 'L', 'T', M, N, M, U, LDU, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT, + $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + END IF +* + ELSE IF( WNTVA ) THEN +* + IF( WNTUN ) THEN +* +* Path 7t(N much larger than M, JOBU='N', JOBVT='A') +* N right singular vectors to be computed in VT and +* no left singular vectors to be computed +* + IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IR) is LDA by M +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is M by M +* + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Copy L to WORK(IR), zeroing out above it +* + CALL SLACPY( 'L', M, M, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IR+LDWRKR ), LDWRKR ) +* +* Generate Q in VT +* (Workspace: need M*M+M+N, prefer M*M+M+N*NB) +* + CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IR) +* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* + CALL SGEBRD( M, M, WORK( IR ), LDWRKR, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in WORK(IR) +* (Workspace: need M*M+4*M-1, +* prefer M*M+3*M+(M-1)*NB) +* + CALL SORGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of L in WORK(IR) +* (Workspace: need M*M+BDSPAC) +* + CALL SBDSQR( 'U', M, M, 0, 0, S, WORK( IE ), + $ WORK( IR ), LDWRKR, DUM, 1, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IR) by +* Q in VT, storing result in A +* (Workspace: need M*M) +* + CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ), + $ LDWRKR, VT, LDVT, ZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL SLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need M+N, prefer M+N*NB) +* + CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), + $ LDA ) +* +* Bidiagonalize L in A +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL SGEBRD( M, M, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in A by Q +* in VT +* (Workspace: need 3*M+N, prefer 3*M+N*NB) +* + CALL SORMBR( 'P', 'L', 'T', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT, + $ LDVT, DUM, 1, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTUO ) THEN +* +* Path 8t(N much larger than M, JOBU='O', JOBVT='A') +* N right singular vectors to be computed in VT and +* M left singular vectors to be overwritten on A +* + IF( LWORK.GE.2*M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is LDA by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is M by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = M + ELSE +* +* WORK(IU) is M by M and WORK(IR) is M by M +* + LDWRKU = M + IR = IU + LDWRKU*M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB) +* + CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out above it +* + CALL SLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to +* WORK(IR) +* (Workspace: need 2*M*M+4*M, +* prefer 2*M*M+3*M+2*M*NB) +* + CALL SGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', M, M, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (Workspace: need 2*M*M+4*M-1, +* prefer 2*M*M+3*M+(M-1)*NB) +* + CALL SORGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in WORK(IR) +* (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB) +* + CALL SORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in WORK(IR) and computing +* right singular vectors of L in WORK(IU) +* (Workspace: need 2*M*M+BDSPAC) +* + CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ), + $ WORK( IU ), LDWRKU, WORK( IR ), + $ LDWRKR, DUM, 1, WORK( IWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in VT, storing result in A +* (Workspace: need M*M) +* + CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ), + $ LDWRKU, VT, LDVT, ZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL SLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* +* Copy left singular vectors of A from WORK(IR) to A +* + CALL SLACPY( 'F', M, M, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need M+N, prefer M+N*NB) +* + CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), + $ LDA ) +* +* Bidiagonalize L in A +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL SGEBRD( M, M, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in A by Q +* in VT +* (Workspace: need 3*M+N, prefer 3*M+N*NB) +* + CALL SORMBR( 'P', 'L', 'T', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in A +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL SORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in A and computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT, + $ LDVT, A, LDA, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTUAS ) THEN +* +* Path 9t(N much larger than M, JOBU='S' or 'A', +* JOBVT='A') +* N right singular vectors to be computed in VT and +* M left singular vectors to be computed in U +* + IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IU) is LDA by M +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is M by M +* + LDWRKU = M + END IF + ITAU = IU + LDWRKU*M + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need M*M+M+N, prefer M*M+M+N*NB) +* + CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out above it +* + CALL SLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to U +* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* + CALL SGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', M, M, WORK( IU ), LDWRKU, U, + $ LDU ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB) +* + CALL SORGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) +* + CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in U and computing right +* singular vectors of L in WORK(IU) +* (Workspace: need M*M+BDSPAC) +* + CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ), + $ WORK( IU ), LDWRKU, U, LDU, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in VT, storing result in A +* (Workspace: need M*M) +* + CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ), + $ LDWRKU, VT, LDVT, ZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL SLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need M+N, prefer M+N*NB) +* + CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing out above it +* + CALL SLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ), + $ LDU ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL SGEBRD( M, M, U, LDU, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in U by Q +* in VT +* (Workspace: need 3*M+N, prefer 3*M+N*NB) +* + CALL SORMBR( 'P', 'L', 'T', M, N, M, U, LDU, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT, + $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + END IF +* + END IF +* + ELSE +* +* N .LT. MNTHR +* +* Path 10t(N greater than M, but not much larger) +* Reduce to bidiagonal form without LQ decomposition +* + IE = 1 + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize A +* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) +* + CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + IF( WNTUAS ) THEN +* +* If left singular vectors desired in U, copy result to U +* and generate left bidiagonalizing vectors in U +* (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB) +* + CALL SLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL SORGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVAS ) THEN +* +* If right singular vectors desired in VT, copy result to +* VT and generate right bidiagonalizing vectors in VT +* (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB) +* + CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT ) + IF( WNTVA ) + $ NRVT = N + IF( WNTVS ) + $ NRVT = M + CALL SORGBR( 'P', NRVT, N, M, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTUO ) THEN +* +* If left singular vectors desired in A, generate left +* bidiagonalizing vectors in A +* (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB) +* + CALL SORGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVO ) THEN +* +* If right singular vectors desired in A, generate right +* bidiagonalizing vectors in A +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL SORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IWORK = IE + M + IF( WNTUAS .OR. WNTUO ) + $ NRU = M + IF( WNTUN ) + $ NRU = 0 + IF( WNTVAS .OR. WNTVO ) + $ NCVT = N + IF( WNTVN ) + $ NCVT = 0 + IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in VT +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT, + $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO ) + ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in A +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), A, LDA, + $ U, LDU, DUM, 1, WORK( IWORK ), INFO ) + ELSE +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in A and computing right singular +* vectors in VT +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT, + $ LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO ) + END IF +* + END IF +* + END IF +* +* If SBDSQR failed to converge, copy unconverged superdiagonals +* to WORK( 2:MINMN ) +* + IF( INFO.NE.0 ) THEN + IF( IE.GT.2 ) THEN + DO 50 I = 1, MINMN - 1 + WORK( I+1 ) = WORK( I+IE-1 ) + 50 CONTINUE + END IF + IF( IE.LT.2 ) THEN + DO 60 I = MINMN - 1, 1, -1 + WORK( I+1 ) = WORK( I+IE-1 ) + 60 CONTINUE + END IF + END IF +* +* Undo scaling if necessary +* + IF( ISCL.EQ.1 ) THEN + IF( ANRM.GT.BIGNUM ) + $ CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, + $ IERR ) + IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM ) + $ CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1, WORK( 2 ), + $ MINMN, IERR ) + IF( ANRM.LT.SMLNUM ) + $ CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, + $ IERR ) + IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM ) + $ CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1, WORK( 2 ), + $ MINMN, IERR ) + END IF +* +* Return optimal workspace in WORK(1) +* + WORK( 1 ) = MAXWRK +* + RETURN +* +* End of SGESVD +* + END diff --git a/costa/native/external/lapack/sgesvx.f b/costa/native/external/lapack/sgesvx.f new file mode 100644 index 000000000..59456add6 --- /dev/null +++ b/costa/native/external/lapack/sgesvx.f @@ -0,0 +1,482 @@ + SUBROUTINE SGESVX( FACT, TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, + $ EQUED, R, C, B, LDB, X, LDX, RCOND, FERR, BERR, + $ WORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, TRANS + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ BERR( * ), C( * ), FERR( * ), R( * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* SGESVX uses the LU factorization to compute the solution to a real +* system of linear equations +* A * X = B, +* where A is an N-by-N matrix and X and B are N-by-NRHS matrices. +* +* Error bounds on the solution and a condition estimate are also +* provided. +* +* Description +* =========== +* +* The following steps are performed: +* +* 1. If FACT = 'E', real scaling factors are computed to equilibrate +* the system: +* TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B +* TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B +* TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B +* Whether or not the system will be equilibrated depends on the +* scaling of the matrix A, but if equilibration is used, A is +* overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') +* or diag(C)*B (if TRANS = 'T' or 'C'). +* +* 2. If FACT = 'N' or 'E', the LU decomposition is used to factor the +* matrix A (after equilibration if FACT = 'E') as +* A = P * L * U, +* where P is a permutation matrix, L is a unit lower triangular +* matrix, and U is upper triangular. +* +* 3. If some U(i,i)=0, so that U is exactly singular, then the routine +* returns with INFO = i. Otherwise, the factored form of A is used +* to estimate the condition number of the matrix A. If the +* reciprocal of the condition number is less than machine precision, +* INFO = N+1 is returned as a warning, but the routine still goes on +* to solve for X and compute error bounds as described below. +* +* 4. The system of equations is solved for X using the factored form +* of A. +* +* 5. Iterative refinement is applied to improve the computed solution +* matrix and calculate error bounds and backward error estimates +* for it. +* +* 6. If equilibration was used, the matrix X is premultiplied by +* diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so +* that it solves the original system before equilibration. +* +* Arguments +* ========= +* +* FACT (input) CHARACTER*1 +* Specifies whether or not the factored form of the matrix A is +* supplied on entry, and if not, whether the matrix A should be +* equilibrated before it is factored. +* = 'F': On entry, AF and IPIV contain the factored form of A. +* If EQUED is not 'N', the matrix A has been +* equilibrated with scaling factors given by R and C. +* A, AF, and IPIV are not modified. +* = 'N': The matrix A will be copied to AF and factored. +* = 'E': The matrix A will be equilibrated if necessary, then +* copied to AF and factored. +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Transpose) +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the N-by-N matrix A. If FACT = 'F' and EQUED is +* not 'N', then A must have been equilibrated by the scaling +* factors in R and/or C. A is not modified if FACT = 'F' or +* 'N', or if FACT = 'E' and EQUED = 'N' on exit. +* +* On exit, if EQUED .ne. 'N', A is scaled as follows: +* EQUED = 'R': A := diag(R) * A +* EQUED = 'C': A := A * diag(C) +* EQUED = 'B': A := diag(R) * A * diag(C). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* AF (input or output) REAL array, dimension (LDAF,N) +* If FACT = 'F', then AF is an input argument and on entry +* contains the factors L and U from the factorization +* A = P*L*U as computed by SGETRF. If EQUED .ne. 'N', then +* AF is the factored form of the equilibrated matrix A. +* +* If FACT = 'N', then AF is an output argument and on exit +* returns the factors L and U from the factorization A = P*L*U +* of the original matrix A. +* +* If FACT = 'E', then AF is an output argument and on exit +* returns the factors L and U from the factorization A = P*L*U +* of the equilibrated matrix A (see the description of A for +* the form of the equilibrated matrix). +* +* LDAF (input) INTEGER +* The leading dimension of the array AF. LDAF >= max(1,N). +* +* IPIV (input or output) INTEGER array, dimension (N) +* If FACT = 'F', then IPIV is an input argument and on entry +* contains the pivot indices from the factorization A = P*L*U +* as computed by SGETRF; row i of the matrix was interchanged +* with row IPIV(i). +* +* If FACT = 'N', then IPIV is an output argument and on exit +* contains the pivot indices from the factorization A = P*L*U +* of the original matrix A. +* +* If FACT = 'E', then IPIV is an output argument and on exit +* contains the pivot indices from the factorization A = P*L*U +* of the equilibrated matrix A. +* +* EQUED (input or output) CHARACTER*1 +* Specifies the form of equilibration that was done. +* = 'N': No equilibration (always true if FACT = 'N'). +* = 'R': Row equilibration, i.e., A has been premultiplied by +* diag(R). +* = 'C': Column equilibration, i.e., A has been postmultiplied +* by diag(C). +* = 'B': Both row and column equilibration, i.e., A has been +* replaced by diag(R) * A * diag(C). +* EQUED is an input argument if FACT = 'F'; otherwise, it is an +* output argument. +* +* R (input or output) REAL array, dimension (N) +* The row scale factors for A. If EQUED = 'R' or 'B', A is +* multiplied on the left by diag(R); if EQUED = 'N' or 'C', R +* is not accessed. R is an input argument if FACT = 'F'; +* otherwise, R is an output argument. If FACT = 'F' and +* EQUED = 'R' or 'B', each element of R must be positive. +* +* C (input or output) REAL array, dimension (N) +* The column scale factors for A. If EQUED = 'C' or 'B', A is +* multiplied on the right by diag(C); if EQUED = 'N' or 'R', C +* is not accessed. C is an input argument if FACT = 'F'; +* otherwise, C is an output argument. If FACT = 'F' and +* EQUED = 'C' or 'B', each element of C must be positive. +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, +* if EQUED = 'N', B is not modified; +* if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by +* diag(R)*B; +* if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is +* overwritten by diag(C)*B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (output) REAL array, dimension (LDX,NRHS) +* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X +* to the original system of equations. Note that A and B are +* modified on exit if EQUED .ne. 'N', and the solution to the +* equilibrated system is inv(diag(C))*X if TRANS = 'N' and +* EQUED = 'C' or 'B', or inv(diag(R))*X if TRANS = 'T' or 'C' +* and EQUED = 'R' or 'B'. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* RCOND (output) REAL +* The estimate of the reciprocal condition number of the matrix +* A after equilibration (if done). If RCOND is less than the +* machine precision (in particular, if RCOND = 0), the matrix +* is singular to working precision. This condition is +* indicated by a return code of INFO > 0. +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace/output) REAL array, dimension (4*N) +* On exit, WORK(1) contains the reciprocal pivot growth +* factor norm(A)/norm(U). The "max absolute element" norm is +* used. If WORK(1) is much less than 1, then the stability +* of the LU factorization of the (equilibrated) matrix A +* could be poor. This also means that the solution X, condition +* estimator RCOND, and forward error bound FERR could be +* unreliable. If factorization fails with 0 0: if INFO = i, and i is +* <= N: U(i,i) is exactly zero. The factorization has +* been completed, but the factor U is exactly +* singular, so the solution and error bounds +* could not be computed. RCOND = 0 is returned. +* = N+1: U is nonsingular, but RCOND is less than machine +* precision, meaning that the matrix is singular +* to working precision. Nevertheless, the +* solution and error bounds are computed because +* there are a number of situations where the +* computed solution can be more accurate than the +* value of RCOND would suggest. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL COLEQU, EQUIL, NOFACT, NOTRAN, ROWEQU + CHARACTER NORM + INTEGER I, INFEQU, J + REAL AMAX, ANORM, BIGNUM, COLCND, RCMAX, RCMIN, + $ ROWCND, RPVGRW, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANGE, SLANTR + EXTERNAL LSAME, SLAMCH, SLANGE, SLANTR +* .. +* .. External Subroutines .. + EXTERNAL SGECON, SGEEQU, SGERFS, SGETRF, SGETRS, SLACPY, + $ SLAQGE, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + NOTRAN = LSAME( TRANS, 'N' ) + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + ROWEQU = .FALSE. + COLEQU = .FALSE. + ELSE + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + SMLNUM = SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + END IF +* +* Test the input parameters. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) ) + $ THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( ROWEQU .OR. COLEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -10 + ELSE + IF( ROWEQU ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 10 J = 1, N + RCMIN = MIN( RCMIN, R( J ) ) + RCMAX = MAX( RCMAX, R( J ) ) + 10 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -11 + ELSE IF( N.GT.0 ) THEN + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + ROWCND = ONE + END IF + END IF + IF( COLEQU .AND. INFO.EQ.0 ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 20 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 20 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -12 + ELSE IF( N.GT.0 ) THEN + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + COLCND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -16 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGESVX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL SGEEQU( N, N, A, LDA, R, C, ROWCND, COLCND, AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL SLAQGE( N, N, A, LDA, R, C, ROWCND, COLCND, AMAX, + $ EQUED ) + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + END IF + END IF +* +* Scale the right hand side. +* + IF( NOTRAN ) THEN + IF( ROWEQU ) THEN + DO 40 J = 1, NRHS + DO 30 I = 1, N + B( I, J ) = R( I )*B( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( COLEQU ) THEN + DO 60 J = 1, NRHS + DO 50 I = 1, N + B( I, J ) = C( I )*B( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the LU factorization of A. +* + CALL SLACPY( 'Full', N, N, A, LDA, AF, LDAF ) + CALL SGETRF( N, N, AF, LDAF, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN + IF( INFO.GT.0 ) THEN +* +* Compute the reciprocal pivot growth factor of the +* leading rank-deficient INFO columns of A. +* + RPVGRW = SLANTR( 'M', 'U', 'N', INFO, INFO, AF, LDAF, + $ WORK ) + IF( RPVGRW.EQ.ZERO ) THEN + RPVGRW = ONE + ELSE + RPVGRW = SLANGE( 'M', N, INFO, A, LDA, WORK ) / RPVGRW + END IF + WORK( 1 ) = RPVGRW + RCOND = ZERO + END IF + RETURN + END IF + END IF +* +* Compute the norm of the matrix A and the +* reciprocal pivot growth factor RPVGRW. +* + IF( NOTRAN ) THEN + NORM = '1' + ELSE + NORM = 'I' + END IF + ANORM = SLANGE( NORM, N, N, A, LDA, WORK ) + RPVGRW = SLANTR( 'M', 'U', 'N', N, N, AF, LDAF, WORK ) + IF( RPVGRW.EQ.ZERO ) THEN + RPVGRW = ONE + ELSE + RPVGRW = SLANGE( 'M', N, N, A, LDA, WORK ) / RPVGRW + END IF +* +* Compute the reciprocal of the condition number of A. +* + CALL SGECON( NORM, N, AF, LDAF, ANORM, RCOND, WORK, IWORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* +* Compute the solution matrix X. +* + CALL SLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL SGETRS( TRANS, N, NRHS, AF, LDAF, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL SGERFS( TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, X, + $ LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* Transform the solution matrix X to a solution of the original +* system. +* + IF( NOTRAN ) THEN + IF( COLEQU ) THEN + DO 80 J = 1, NRHS + DO 70 I = 1, N + X( I, J ) = C( I )*X( I, J ) + 70 CONTINUE + 80 CONTINUE + DO 90 J = 1, NRHS + FERR( J ) = FERR( J ) / COLCND + 90 CONTINUE + END IF + ELSE IF( ROWEQU ) THEN + DO 110 J = 1, NRHS + DO 100 I = 1, N + X( I, J ) = R( I )*X( I, J ) + 100 CONTINUE + 110 CONTINUE + DO 120 J = 1, NRHS + FERR( J ) = FERR( J ) / ROWCND + 120 CONTINUE + END IF +* + WORK( 1 ) = RPVGRW + RETURN +* +* End of SGESVX +* + END diff --git a/costa/native/external/lapack/sgetc2.f b/costa/native/external/lapack/sgetc2.f new file mode 100644 index 000000000..2456e97e4 --- /dev/null +++ b/costa/native/external/lapack/sgetc2.f @@ -0,0 +1,147 @@ + SUBROUTINE SGETC2( N, A, LDA, IPIV, JPIV, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), JPIV( * ) + REAL A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* SGETC2 computes an LU factorization with complete pivoting of the +* n-by-n matrix A. The factorization has the form A = P * L * U * Q, +* where P and Q are permutation matrices, L is lower triangular with +* unit diagonal elements and U is upper triangular. +* +* This is the Level 2 BLAS algorithm. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA, N) +* On entry, the n-by-n matrix A to be factored. +* On exit, the factors L and U from the factorization +* A = P*L*U*Q; the unit diagonal elements of L are not stored. +* If U(k, k) appears to be less than SMIN, U(k, k) is given the +* value of SMIN, i.e., giving a nonsingular perturbed system. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (output) INTEGER array, dimension(N). +* The pivot indices; for 1 <= i <= N, row i of the +* matrix has been interchanged with row IPIV(i). +* +* JPIV (output) INTEGER array, dimension(N). +* The pivot indices; for 1 <= j <= N, column j of the +* matrix has been interchanged with column JPIV(j). +* +* INFO (output) INTEGER +* = 0: successful exit +* > 0: if INFO = k, U(k, k) is likely to produce owerflow if +* we try to solve for x in Ax = b. So U is perturbed to +* avoid the overflow. +* +* Further Details +* =============== +* +* Based on contributions by +* Bo Kagstrom and Peter Poromaa, Department of Computing Science, +* Umea University, S-901 87 Umea, Sweden. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IP, IPV, J, JP, JPV + REAL BIGNUM, EPS, SMIN, SMLNUM, XMAX +* .. +* .. External Subroutines .. + EXTERNAL SGER, SLABAD, SSWAP +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Set constants to control overflow +* + INFO = 0 + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) / EPS + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Factorize A using complete pivoting. +* Set pivots less than SMIN to SMIN. +* + DO 40 I = 1, N - 1 +* +* Find max element in matrix A +* + XMAX = ZERO + DO 20 IP = I, N + DO 10 JP = I, N + IF( ABS( A( IP, JP ) ).GE.XMAX ) THEN + XMAX = ABS( A( IP, JP ) ) + IPV = IP + JPV = JP + END IF + 10 CONTINUE + 20 CONTINUE + IF( I.EQ.1 ) + $ SMIN = MAX( EPS*XMAX, SMLNUM ) +* +* Swap rows +* + IF( IPV.NE.I ) + $ CALL SSWAP( N, A( IPV, 1 ), LDA, A( I, 1 ), LDA ) + IPIV( I ) = IPV +* +* Swap columns +* + IF( JPV.NE.I ) + $ CALL SSWAP( N, A( 1, JPV ), 1, A( 1, I ), 1 ) + JPIV( I ) = JPV +* +* Check for singularity +* + IF( ABS( A( I, I ) ).LT.SMIN ) THEN + INFO = I + A( I, I ) = SMIN + END IF + DO 30 J = I + 1, N + A( J, I ) = A( J, I ) / A( I, I ) + 30 CONTINUE + CALL SGER( N-I, N-I, -ONE, A( I+1, I ), 1, A( I, I+1 ), LDA, + $ A( I+1, I+1 ), LDA ) + 40 CONTINUE +* + IF( ABS( A( N, N ) ).LT.SMIN ) THEN + INFO = N + A( N, N ) = SMIN + END IF +* + RETURN +* +* End of SGETC2 +* + END diff --git a/costa/native/external/lapack/sgetf2.f b/costa/native/external/lapack/sgetf2.f new file mode 100644 index 000000000..ff641eef7 --- /dev/null +++ b/costa/native/external/lapack/sgetf2.f @@ -0,0 +1,135 @@ + SUBROUTINE SGETF2( M, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1992 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* SGETF2 computes an LU factorization of a general m-by-n matrix A +* using partial pivoting with row interchanges. +* +* The factorization has the form +* A = P * L * U +* where P is a permutation matrix, L is lower triangular with unit +* diagonal elements (lower trapezoidal if m > n), and U is upper +* triangular (upper trapezoidal if m < n). +* +* This is the right-looking Level 2 BLAS version of the algorithm. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the m by n matrix to be factored. +* On exit, the factors L and U from the factorization +* A = P*L*U; the unit diagonal elements of L are not stored. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* IPIV (output) INTEGER array, dimension (min(M,N)) +* The pivot indices; for 1 <= i <= min(M,N), row i of the +* matrix was interchanged with row IPIV(i). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* > 0: if INFO = k, U(k,k) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, and division by zero will occur if it is used +* to solve a system of equations. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER J, JP +* .. +* .. External Functions .. + INTEGER ISAMAX + EXTERNAL ISAMAX +* .. +* .. External Subroutines .. + EXTERNAL SGER, SSCAL, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGETF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* + DO 10 J = 1, MIN( M, N ) +* +* Find pivot and test for singularity. +* + JP = J - 1 + ISAMAX( M-J+1, A( J, J ), 1 ) + IPIV( J ) = JP + IF( A( JP, J ).NE.ZERO ) THEN +* +* Apply the interchange to columns 1:N. +* + IF( JP.NE.J ) + $ CALL SSWAP( N, A( J, 1 ), LDA, A( JP, 1 ), LDA ) +* +* Compute elements J+1:M of J-th column. +* + IF( J.LT.M ) + $ CALL SSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 ) +* + ELSE IF( INFO.EQ.0 ) THEN +* + INFO = J + END IF +* + IF( J.LT.MIN( M, N ) ) THEN +* +* Update trailing submatrix. +* + CALL SGER( M-J, N-J, -ONE, A( J+1, J ), 1, A( J, J+1 ), LDA, + $ A( J+1, J+1 ), LDA ) + END IF + 10 CONTINUE + RETURN +* +* End of SGETF2 +* + END diff --git a/costa/native/external/lapack/sgetrf.f b/costa/native/external/lapack/sgetrf.f new file mode 100644 index 000000000..577ef21d9 --- /dev/null +++ b/costa/native/external/lapack/sgetrf.f @@ -0,0 +1,160 @@ + SUBROUTINE SGETRF( M, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* SGETRF computes an LU factorization of a general M-by-N matrix A +* using partial pivoting with row interchanges. +* +* The factorization has the form +* A = P * L * U +* where P is a permutation matrix, L is lower triangular with unit +* diagonal elements (lower trapezoidal if m > n), and U is upper +* triangular (upper trapezoidal if m < n). +* +* This is the right-looking Level 3 BLAS version of the algorithm. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the M-by-N matrix to be factored. +* On exit, the factors L and U from the factorization +* A = P*L*U; the unit diagonal elements of L are not stored. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* IPIV (output) INTEGER array, dimension (min(M,N)) +* The pivot indices; for 1 <= i <= min(M,N), row i of the +* matrix was interchanged with row IPIV(i). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, U(i,i) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, and division by zero will occur if it is used +* to solve a system of equations. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IINFO, J, JB, NB +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, SGETF2, SLASWP, STRSM, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGETRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'SGETRF', ' ', M, N, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN +* +* Use unblocked code. +* + CALL SGETF2( M, N, A, LDA, IPIV, INFO ) + ELSE +* +* Use blocked code. +* + DO 20 J = 1, MIN( M, N ), NB + JB = MIN( MIN( M, N )-J+1, NB ) +* +* Factor diagonal and subdiagonal blocks and test for exact +* singularity. +* + CALL SGETF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO ) +* +* Adjust INFO and the pivot indices. +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + J - 1 + DO 10 I = J, MIN( M, J+JB-1 ) + IPIV( I ) = J - 1 + IPIV( I ) + 10 CONTINUE +* +* Apply interchanges to columns 1:J-1. +* + CALL SLASWP( J-1, A, LDA, J, J+JB-1, IPIV, 1 ) +* + IF( J+JB.LE.N ) THEN +* +* Apply interchanges to columns J+JB:N. +* + CALL SLASWP( N-J-JB+1, A( 1, J+JB ), LDA, J, J+JB-1, + $ IPIV, 1 ) +* +* Compute block row of U. +* + CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit', JB, + $ N-J-JB+1, ONE, A( J, J ), LDA, A( J, J+JB ), + $ LDA ) + IF( J+JB.LE.M ) THEN +* +* Update trailing submatrix. +* + CALL SGEMM( 'No transpose', 'No transpose', M-J-JB+1, + $ N-J-JB+1, JB, -ONE, A( J+JB, J ), LDA, + $ A( J, J+JB ), LDA, ONE, A( J+JB, J+JB ), + $ LDA ) + END IF + END IF + 20 CONTINUE + END IF + RETURN +* +* End of SGETRF +* + END diff --git a/costa/native/external/lapack/sgetri.f b/costa/native/external/lapack/sgetri.f new file mode 100644 index 000000000..ec5932f16 --- /dev/null +++ b/costa/native/external/lapack/sgetri.f @@ -0,0 +1,193 @@ + SUBROUTINE SGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGETRI computes the inverse of a matrix using the LU factorization +* computed by SGETRF. +* +* This method inverts U and then computes inv(A) by solving the system +* inv(A)*L = inv(U) for inv(A). +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the factors L and U from the factorization +* A = P*L*U as computed by SGETRF. +* On exit, if INFO = 0, the inverse of the original matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices from SGETRF; for 1<=i<=N, row i of the +* matrix was interchanged with row IPIV(i). +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO=0, then WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N). +* For optimal performance LWORK >= N*NB, where NB is +* the optimal blocksize returned by ILAENV. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, U(i,i) is exactly zero; the matrix is +* singular and its inverse could not be computed. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IWS, J, JB, JJ, JP, LDWORK, LWKOPT, NB, + $ NBMIN, NN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, SGEMV, SSWAP, STRSM, STRTRI, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NB = ILAENV( 1, 'SGETRI', ' ', N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -3 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGETRI', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form inv(U). If INFO > 0 from STRTRI, then U is singular, +* and the inverse is not computed. +* + CALL STRTRI( 'Upper', 'Non-unit', N, A, LDA, INFO ) + IF( INFO.GT.0 ) + $ RETURN +* + NBMIN = 2 + LDWORK = N + IF( NB.GT.1 .AND. NB.LT.N ) THEN + IWS = MAX( LDWORK*NB, 1 ) + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SGETRI', ' ', N, -1, -1, -1 ) ) + END IF + ELSE + IWS = N + END IF +* +* Solve the equation inv(A)*L = inv(U) for inv(A). +* + IF( NB.LT.NBMIN .OR. NB.GE.N ) THEN +* +* Use unblocked code. +* + DO 20 J = N, 1, -1 +* +* Copy current column of L to WORK and replace with zeros. +* + DO 10 I = J + 1, N + WORK( I ) = A( I, J ) + A( I, J ) = ZERO + 10 CONTINUE +* +* Compute current column of inv(A). +* + IF( J.LT.N ) + $ CALL SGEMV( 'No transpose', N, N-J, -ONE, A( 1, J+1 ), + $ LDA, WORK( J+1 ), 1, ONE, A( 1, J ), 1 ) + 20 CONTINUE + ELSE +* +* Use blocked code. +* + NN = ( ( N-1 ) / NB )*NB + 1 + DO 50 J = NN, 1, -NB + JB = MIN( NB, N-J+1 ) +* +* Copy current block column of L to WORK and replace with +* zeros. +* + DO 40 JJ = J, J + JB - 1 + DO 30 I = JJ + 1, N + WORK( I+( JJ-J )*LDWORK ) = A( I, JJ ) + A( I, JJ ) = ZERO + 30 CONTINUE + 40 CONTINUE +* +* Compute current block column of inv(A). +* + IF( J+JB.LE.N ) + $ CALL SGEMM( 'No transpose', 'No transpose', N, JB, + $ N-J-JB+1, -ONE, A( 1, J+JB ), LDA, + $ WORK( J+JB ), LDWORK, ONE, A( 1, J ), LDA ) + CALL STRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB, + $ ONE, WORK( J ), LDWORK, A( 1, J ), LDA ) + 50 CONTINUE + END IF +* +* Apply column interchanges. +* + DO 60 J = N - 1, 1, -1 + JP = IPIV( J ) + IF( JP.NE.J ) + $ CALL SSWAP( N, A( 1, J ), 1, A( 1, JP ), 1 ) + 60 CONTINUE +* + WORK( 1 ) = IWS + RETURN +* +* End of SGETRI +* + END diff --git a/costa/native/external/lapack/sgetrs.f b/costa/native/external/lapack/sgetrs.f new file mode 100644 index 000000000..e04d8c20c --- /dev/null +++ b/costa/native/external/lapack/sgetrs.f @@ -0,0 +1,150 @@ + SUBROUTINE SGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* SGETRS solves a system of linear equations +* A * X = B or A' * X = B +* with a general N-by-N matrix A using the LU factorization computed +* by SGETRF. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A'* X = B (Transpose) +* = 'C': A'* X = B (Conjugate transpose = Transpose) +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* A (input) REAL array, dimension (LDA,N) +* The factors L and U from the factorization A = P*L*U +* as computed by SGETRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices from SGETRF; for 1<=i<=N, row i of the +* matrix was interchanged with row IPIV(i). +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLASWP, STRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGETRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( NOTRAN ) THEN +* +* Solve A * X = B. +* +* Apply row interchanges to the right hand sides. +* + CALL SLASWP( NRHS, B, LDB, 1, N, IPIV, 1 ) +* +* Solve L*X = B, overwriting B with X. +* + CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit', N, NRHS, + $ ONE, A, LDA, B, LDB ) +* +* Solve U*X = B, overwriting B with X. +* + CALL STRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N, + $ NRHS, ONE, A, LDA, B, LDB ) + ELSE +* +* Solve A' * X = B. +* +* Solve U'*X = B, overwriting B with X. +* + CALL STRSM( 'Left', 'Upper', 'Transpose', 'Non-unit', N, NRHS, + $ ONE, A, LDA, B, LDB ) +* +* Solve L'*X = B, overwriting B with X. +* + CALL STRSM( 'Left', 'Lower', 'Transpose', 'Unit', N, NRHS, ONE, + $ A, LDA, B, LDB ) +* +* Apply row interchanges to the solution vectors. +* + CALL SLASWP( NRHS, B, LDB, 1, N, IPIV, -1 ) + END IF +* + RETURN +* +* End of SGETRS +* + END diff --git a/costa/native/external/lapack/sggbak.f b/costa/native/external/lapack/sggbak.f new file mode 100644 index 000000000..3d717752e --- /dev/null +++ b/costa/native/external/lapack/sggbak.f @@ -0,0 +1,216 @@ + SUBROUTINE SGGBAK( JOB, SIDE, N, ILO, IHI, LSCALE, RSCALE, M, V, + $ LDV, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER JOB, SIDE + INTEGER IHI, ILO, INFO, LDV, M, N +* .. +* .. Array Arguments .. + REAL LSCALE( * ), RSCALE( * ), V( LDV, * ) +* .. +* +* Purpose +* ======= +* +* SGGBAK forms the right or left eigenvectors of a real generalized +* eigenvalue problem A*x = lambda*B*x, by backward transformation on +* the computed eigenvectors of the balanced pair of matrices output by +* SGGBAL. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* Specifies the type of backward transformation required: +* = 'N': do nothing, return immediately; +* = 'P': do backward transformation for permutation only; +* = 'S': do backward transformation for scaling only; +* = 'B': do backward transformations for both permutation and +* scaling. +* JOB must be the same as the argument JOB supplied to SGGBAL. +* +* SIDE (input) CHARACTER*1 +* = 'R': V contains right eigenvectors; +* = 'L': V contains left eigenvectors. +* +* N (input) INTEGER +* The number of rows of the matrix V. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* The integers ILO and IHI determined by SGGBAL. +* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +* +* LSCALE (input) REAL array, dimension (N) +* Details of the permutations and/or scaling factors applied +* to the left side of A and B, as returned by SGGBAL. +* +* RSCALE (input) REAL array, dimension (N) +* Details of the permutations and/or scaling factors applied +* to the right side of A and B, as returned by SGGBAL. +* +* M (input) INTEGER +* The number of columns of the matrix V. M >= 0. +* +* V (input/output) REAL array, dimension (LDV,M) +* On entry, the matrix of right or left eigenvectors to be +* transformed, as returned by STGEVC. +* On exit, V is overwritten by the transformed eigenvectors. +* +* LDV (input) INTEGER +* The leading dimension of the matrix V. LDV >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* See R.C. Ward, Balancing the generalized eigenvalue problem, +* SIAM J. Sci. Stat. Comp. 2 (1981), 141-152. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LEFTV, RIGHTV + INTEGER I, K +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + RIGHTV = LSAME( SIDE, 'R' ) + LEFTV = LSAME( SIDE, 'L' ) +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND. + $ .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN + INFO = -1 + ELSE IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 ) THEN + INFO = -4 + ELSE IF( IHI.LT.ILO .OR. IHI.GT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( M.LT.0 ) THEN + INFO = -6 + ELSE IF( LDV.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGGBAK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + IF( M.EQ.0 ) + $ RETURN + IF( LSAME( JOB, 'N' ) ) + $ RETURN +* + IF( ILO.EQ.IHI ) + $ GO TO 30 +* +* Backward balance +* + IF( LSAME( JOB, 'S' ) .OR. LSAME( JOB, 'B' ) ) THEN +* +* Backward transformation on right eigenvectors +* + IF( RIGHTV ) THEN + DO 10 I = ILO, IHI + CALL SSCAL( M, RSCALE( I ), V( I, 1 ), LDV ) + 10 CONTINUE + END IF +* +* Backward transformation on left eigenvectors +* + IF( LEFTV ) THEN + DO 20 I = ILO, IHI + CALL SSCAL( M, LSCALE( I ), V( I, 1 ), LDV ) + 20 CONTINUE + END IF + END IF +* +* Backward permutation +* + 30 CONTINUE + IF( LSAME( JOB, 'P' ) .OR. LSAME( JOB, 'B' ) ) THEN +* +* Backward permutation on right eigenvectors +* + IF( RIGHTV ) THEN + IF( ILO.EQ.1 ) + $ GO TO 50 +* + DO 40 I = ILO - 1, 1, -1 + K = RSCALE( I ) + IF( K.EQ.I ) + $ GO TO 40 + CALL SSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 40 CONTINUE +* + 50 CONTINUE + IF( IHI.EQ.N ) + $ GO TO 70 + DO 60 I = IHI + 1, N + K = RSCALE( I ) + IF( K.EQ.I ) + $ GO TO 60 + CALL SSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 60 CONTINUE + END IF +* +* Backward permutation on left eigenvectors +* + 70 CONTINUE + IF( LEFTV ) THEN + IF( ILO.EQ.1 ) + $ GO TO 90 + DO 80 I = ILO - 1, 1, -1 + K = LSCALE( I ) + IF( K.EQ.I ) + $ GO TO 80 + CALL SSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 80 CONTINUE +* + 90 CONTINUE + IF( IHI.EQ.N ) + $ GO TO 110 + DO 100 I = IHI + 1, N + K = LSCALE( I ) + IF( K.EQ.I ) + $ GO TO 100 + CALL SSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 100 CONTINUE + END IF + END IF +* + 110 CONTINUE +* + RETURN +* +* End of SGGBAK +* + END diff --git a/costa/native/external/lapack/sggbal.f b/costa/native/external/lapack/sggbal.f new file mode 100644 index 000000000..4e0df23f2 --- /dev/null +++ b/costa/native/external/lapack/sggbal.f @@ -0,0 +1,461 @@ + SUBROUTINE SGGBAL( JOB, N, A, LDA, B, LDB, ILO, IHI, LSCALE, + $ RSCALE, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER JOB + INTEGER IHI, ILO, INFO, LDA, LDB, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), LSCALE( * ), + $ RSCALE( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGGBAL balances a pair of general real matrices (A,B). This +* involves, first, permuting A and B by similarity transformations to +* isolate eigenvalues in the first 1 to ILO$-$1 and last IHI+1 to N +* elements on the diagonal; and second, applying a diagonal similarity +* transformation to rows and columns ILO to IHI to make the rows +* and columns as close in norm as possible. Both steps are optional. +* +* Balancing may reduce the 1-norm of the matrices, and improve the +* accuracy of the computed eigenvalues and/or eigenvectors in the +* generalized eigenvalue problem A*x = lambda*B*x. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* Specifies the operations to be performed on A and B: +* = 'N': none: simply set ILO = 1, IHI = N, LSCALE(I) = 1.0 +* and RSCALE(I) = 1.0 for i = 1,...,N. +* = 'P': permute only; +* = 'S': scale only; +* = 'B': both permute and scale. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the input matrix A. +* On exit, A is overwritten by the balanced matrix. +* If JOB = 'N', A is not referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) REAL array, dimension (LDB,N) +* On entry, the input matrix B. +* On exit, B is overwritten by the balanced matrix. +* If JOB = 'N', B is not referenced. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* ILO (output) INTEGER +* IHI (output) INTEGER +* ILO and IHI are set to integers such that on exit +* A(i,j) = 0 and B(i,j) = 0 if i > j and +* j = 1,...,ILO-1 or i = IHI+1,...,N. +* If JOB = 'N' or 'S', ILO = 1 and IHI = N. +* +* LSCALE (output) REAL array, dimension (N) +* Details of the permutations and scaling factors applied +* to the left side of A and B. If P(j) is the index of the +* row interchanged with row j, and D(j) +* is the scaling factor applied to row j, then +* LSCALE(j) = P(j) for J = 1,...,ILO-1 +* = D(j) for J = ILO,...,IHI +* = P(j) for J = IHI+1,...,N. +* The order in which the interchanges are made is N to IHI+1, +* then 1 to ILO-1. +* +* RSCALE (output) REAL array, dimension (N) +* Details of the permutations and scaling factors applied +* to the right side of A and B. If P(j) is the index of the +* column interchanged with column j, and D(j) +* is the scaling factor applied to column j, then +* LSCALE(j) = P(j) for J = 1,...,ILO-1 +* = D(j) for J = ILO,...,IHI +* = P(j) for J = IHI+1,...,N. +* The order in which the interchanges are made is N to IHI+1, +* then 1 to ILO-1. +* +* WORK (workspace) REAL array, dimension (6*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* See R.C. WARD, Balancing the generalized eigenvalue problem, +* SIAM J. Sci. Stat. Comp. 2 (1981), 141-152. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0E+0, HALF = 0.5E+0, ONE = 1.0E+0 ) + REAL THREE, SCLFAC + PARAMETER ( THREE = 3.0E+0, SCLFAC = 1.0E+1 ) +* .. +* .. Local Scalars .. + INTEGER I, ICAB, IFLOW, IP1, IR, IRAB, IT, J, JC, JP1, + $ K, KOUNT, L, LCAB, LM1, LRAB, LSFMAX, LSFMIN, + $ M, NR, NRP2 + REAL ALPHA, BASL, BETA, CAB, CMAX, COEF, COEF2, + $ COEF5, COR, EW, EWC, GAMMA, PGAMMA, RAB, SFMAX, + $ SFMIN, SUM, T, TA, TB, TC +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SDOT, SLAMCH + EXTERNAL LSAME, ISAMAX, SDOT, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SSCAL, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, LOG10, MAX, MIN, REAL, SIGN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND. + $ .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGGBAL', -INFO ) + RETURN + END IF +* + K = 1 + L = N +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( LSAME( JOB, 'N' ) ) THEN + ILO = 1 + IHI = N + DO 10 I = 1, N + LSCALE( I ) = ONE + RSCALE( I ) = ONE + 10 CONTINUE + RETURN + END IF +* + IF( K.EQ.L ) THEN + ILO = 1 + IHI = 1 + LSCALE( 1 ) = ONE + RSCALE( 1 ) = ONE + RETURN + END IF +* + IF( LSAME( JOB, 'S' ) ) + $ GO TO 190 +* + GO TO 30 +* +* Permute the matrices A and B to isolate the eigenvalues. +* +* Find row with one nonzero in columns 1 through L +* + 20 CONTINUE + L = LM1 + IF( L.NE.1 ) + $ GO TO 30 +* + RSCALE( 1 ) = 1 + LSCALE( 1 ) = 1 + GO TO 190 +* + 30 CONTINUE + LM1 = L - 1 + DO 80 I = L, 1, -1 + DO 40 J = 1, LM1 + JP1 = J + 1 + IF( A( I, J ).NE.ZERO .OR. B( I, J ).NE.ZERO ) + $ GO TO 50 + 40 CONTINUE + J = L + GO TO 70 +* + 50 CONTINUE + DO 60 J = JP1, L + IF( A( I, J ).NE.ZERO .OR. B( I, J ).NE.ZERO ) + $ GO TO 80 + 60 CONTINUE + J = JP1 - 1 +* + 70 CONTINUE + M = L + IFLOW = 1 + GO TO 160 + 80 CONTINUE + GO TO 100 +* +* Find column with one nonzero in rows K through N +* + 90 CONTINUE + K = K + 1 +* + 100 CONTINUE + DO 150 J = K, L + DO 110 I = K, LM1 + IP1 = I + 1 + IF( A( I, J ).NE.ZERO .OR. B( I, J ).NE.ZERO ) + $ GO TO 120 + 110 CONTINUE + I = L + GO TO 140 + 120 CONTINUE + DO 130 I = IP1, L + IF( A( I, J ).NE.ZERO .OR. B( I, J ).NE.ZERO ) + $ GO TO 150 + 130 CONTINUE + I = IP1 - 1 + 140 CONTINUE + M = K + IFLOW = 2 + GO TO 160 + 150 CONTINUE + GO TO 190 +* +* Permute rows M and I +* + 160 CONTINUE + LSCALE( M ) = I + IF( I.EQ.M ) + $ GO TO 170 + CALL SSWAP( N-K+1, A( I, K ), LDA, A( M, K ), LDA ) + CALL SSWAP( N-K+1, B( I, K ), LDB, B( M, K ), LDB ) +* +* Permute columns M and J +* + 170 CONTINUE + RSCALE( M ) = J + IF( J.EQ.M ) + $ GO TO 180 + CALL SSWAP( L, A( 1, J ), 1, A( 1, M ), 1 ) + CALL SSWAP( L, B( 1, J ), 1, B( 1, M ), 1 ) +* + 180 CONTINUE + GO TO ( 20, 90 )IFLOW +* + 190 CONTINUE + ILO = K + IHI = L +* + IF( ILO.EQ.IHI ) + $ RETURN +* + IF( LSAME( JOB, 'P' ) ) + $ RETURN +* +* Balance the submatrix in rows ILO to IHI. +* + NR = IHI - ILO + 1 + DO 200 I = ILO, IHI + RSCALE( I ) = ZERO + LSCALE( I ) = ZERO +* + WORK( I ) = ZERO + WORK( I+N ) = ZERO + WORK( I+2*N ) = ZERO + WORK( I+3*N ) = ZERO + WORK( I+4*N ) = ZERO + WORK( I+5*N ) = ZERO + 200 CONTINUE +* +* Compute right side vector in resulting linear equations +* + BASL = LOG10( SCLFAC ) + DO 240 I = ILO, IHI + DO 230 J = ILO, IHI + TB = B( I, J ) + TA = A( I, J ) + IF( TA.EQ.ZERO ) + $ GO TO 210 + TA = LOG10( ABS( TA ) ) / BASL + 210 CONTINUE + IF( TB.EQ.ZERO ) + $ GO TO 220 + TB = LOG10( ABS( TB ) ) / BASL + 220 CONTINUE + WORK( I+4*N ) = WORK( I+4*N ) - TA - TB + WORK( J+5*N ) = WORK( J+5*N ) - TA - TB + 230 CONTINUE + 240 CONTINUE +* + COEF = ONE / REAL( 2*NR ) + COEF2 = COEF*COEF + COEF5 = HALF*COEF2 + NRP2 = NR + 2 + BETA = ZERO + IT = 1 +* +* Start generalized conjugate gradient iteration +* + 250 CONTINUE +* + GAMMA = SDOT( NR, WORK( ILO+4*N ), 1, WORK( ILO+4*N ), 1 ) + + $ SDOT( NR, WORK( ILO+5*N ), 1, WORK( ILO+5*N ), 1 ) +* + EW = ZERO + EWC = ZERO + DO 260 I = ILO, IHI + EW = EW + WORK( I+4*N ) + EWC = EWC + WORK( I+5*N ) + 260 CONTINUE +* + GAMMA = COEF*GAMMA - COEF2*( EW**2+EWC**2 ) - COEF5*( EW-EWC )**2 + IF( GAMMA.EQ.ZERO ) + $ GO TO 350 + IF( IT.NE.1 ) + $ BETA = GAMMA / PGAMMA + T = COEF5*( EWC-THREE*EW ) + TC = COEF5*( EW-THREE*EWC ) +* + CALL SSCAL( NR, BETA, WORK( ILO ), 1 ) + CALL SSCAL( NR, BETA, WORK( ILO+N ), 1 ) +* + CALL SAXPY( NR, COEF, WORK( ILO+4*N ), 1, WORK( ILO+N ), 1 ) + CALL SAXPY( NR, COEF, WORK( ILO+5*N ), 1, WORK( ILO ), 1 ) +* + DO 270 I = ILO, IHI + WORK( I ) = WORK( I ) + TC + WORK( I+N ) = WORK( I+N ) + T + 270 CONTINUE +* +* Apply matrix to vector +* + DO 300 I = ILO, IHI + KOUNT = 0 + SUM = ZERO + DO 290 J = ILO, IHI + IF( A( I, J ).EQ.ZERO ) + $ GO TO 280 + KOUNT = KOUNT + 1 + SUM = SUM + WORK( J ) + 280 CONTINUE + IF( B( I, J ).EQ.ZERO ) + $ GO TO 290 + KOUNT = KOUNT + 1 + SUM = SUM + WORK( J ) + 290 CONTINUE + WORK( I+2*N ) = REAL( KOUNT )*WORK( I+N ) + SUM + 300 CONTINUE +* + DO 330 J = ILO, IHI + KOUNT = 0 + SUM = ZERO + DO 320 I = ILO, IHI + IF( A( I, J ).EQ.ZERO ) + $ GO TO 310 + KOUNT = KOUNT + 1 + SUM = SUM + WORK( I+N ) + 310 CONTINUE + IF( B( I, J ).EQ.ZERO ) + $ GO TO 320 + KOUNT = KOUNT + 1 + SUM = SUM + WORK( I+N ) + 320 CONTINUE + WORK( J+3*N ) = REAL( KOUNT )*WORK( J ) + SUM + 330 CONTINUE +* + SUM = SDOT( NR, WORK( ILO+N ), 1, WORK( ILO+2*N ), 1 ) + + $ SDOT( NR, WORK( ILO ), 1, WORK( ILO+3*N ), 1 ) + ALPHA = GAMMA / SUM +* +* Determine correction to current iteration +* + CMAX = ZERO + DO 340 I = ILO, IHI + COR = ALPHA*WORK( I+N ) + IF( ABS( COR ).GT.CMAX ) + $ CMAX = ABS( COR ) + LSCALE( I ) = LSCALE( I ) + COR + COR = ALPHA*WORK( I ) + IF( ABS( COR ).GT.CMAX ) + $ CMAX = ABS( COR ) + RSCALE( I ) = RSCALE( I ) + COR + 340 CONTINUE + IF( CMAX.LT.HALF ) + $ GO TO 350 +* + CALL SAXPY( NR, -ALPHA, WORK( ILO+2*N ), 1, WORK( ILO+4*N ), 1 ) + CALL SAXPY( NR, -ALPHA, WORK( ILO+3*N ), 1, WORK( ILO+5*N ), 1 ) +* + PGAMMA = GAMMA + IT = IT + 1 + IF( IT.LE.NRP2 ) + $ GO TO 250 +* +* End generalized conjugate gradient iteration +* + 350 CONTINUE + SFMIN = SLAMCH( 'S' ) + SFMAX = ONE / SFMIN + LSFMIN = INT( LOG10( SFMIN ) / BASL+ONE ) + LSFMAX = INT( LOG10( SFMAX ) / BASL ) + DO 360 I = ILO, IHI + IRAB = ISAMAX( N-ILO+1, A( I, ILO ), LDA ) + RAB = ABS( A( I, IRAB+ILO-1 ) ) + IRAB = ISAMAX( N-ILO+1, B( I, ILO ), LDA ) + RAB = MAX( RAB, ABS( B( I, IRAB+ILO-1 ) ) ) + LRAB = INT( LOG10( RAB+SFMIN ) / BASL+ONE ) + IR = LSCALE( I ) + SIGN( HALF, LSCALE( I ) ) + IR = MIN( MAX( IR, LSFMIN ), LSFMAX, LSFMAX-LRAB ) + LSCALE( I ) = SCLFAC**IR + ICAB = ISAMAX( IHI, A( 1, I ), 1 ) + CAB = ABS( A( ICAB, I ) ) + ICAB = ISAMAX( IHI, B( 1, I ), 1 ) + CAB = MAX( CAB, ABS( B( ICAB, I ) ) ) + LCAB = INT( LOG10( CAB+SFMIN ) / BASL+ONE ) + JC = RSCALE( I ) + SIGN( HALF, RSCALE( I ) ) + JC = MIN( MAX( JC, LSFMIN ), LSFMAX, LSFMAX-LCAB ) + RSCALE( I ) = SCLFAC**JC + 360 CONTINUE +* +* Row scaling of matrices A and B +* + DO 370 I = ILO, IHI + CALL SSCAL( N-ILO+1, LSCALE( I ), A( I, ILO ), LDA ) + CALL SSCAL( N-ILO+1, LSCALE( I ), B( I, ILO ), LDB ) + 370 CONTINUE +* +* Column scaling of matrices A and B +* + DO 380 J = ILO, IHI + CALL SSCAL( IHI, RSCALE( J ), A( 1, J ), 1 ) + CALL SSCAL( IHI, RSCALE( J ), B( 1, J ), 1 ) + 380 CONTINUE +* + RETURN +* +* End of SGGBAL +* + END diff --git a/costa/native/external/lapack/sgges.f b/costa/native/external/lapack/sgges.f new file mode 100644 index 000000000..727b9da90 --- /dev/null +++ b/costa/native/external/lapack/sgges.f @@ -0,0 +1,548 @@ + SUBROUTINE SGGES( JOBVSL, JOBVSR, SORT, SELCTG, N, A, LDA, B, LDB, + $ SDIM, ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR, + $ LDVSR, WORK, LWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBVSL, JOBVSR, SORT + INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + REAL A( LDA, * ), ALPHAI( * ), ALPHAR( * ), + $ B( LDB, * ), BETA( * ), VSL( LDVSL, * ), + $ VSR( LDVSR, * ), WORK( * ) +* .. +* .. Function Arguments .. + LOGICAL SELCTG + EXTERNAL SELCTG +* .. +* +* Purpose +* ======= +* +* SGGES computes for a pair of N-by-N real nonsymmetric matrices (A,B), +* the generalized eigenvalues, the generalized real Schur form (S,T), +* optionally, the left and/or right matrices of Schur vectors (VSL and +* VSR). This gives the generalized Schur factorization +* +* (A,B) = ( (VSL)*S*(VSR)**T, (VSL)*T*(VSR)**T ) +* +* Optionally, it also orders the eigenvalues so that a selected cluster +* of eigenvalues appears in the leading diagonal blocks of the upper +* quasi-triangular matrix S and the upper triangular matrix T.The +* leading columns of VSL and VSR then form an orthonormal basis for the +* corresponding left and right eigenspaces (deflating subspaces). +* +* (If only the generalized eigenvalues are needed, use the driver +* SGGEV instead, which is faster.) +* +* A generalized eigenvalue for a pair of matrices (A,B) is a scalar w +* or a ratio alpha/beta = w, such that A - w*B is singular. It is +* usually represented as the pair (alpha,beta), as there is a +* reasonable interpretation for beta=0 or both being zero. +* +* A pair of matrices (S,T) is in generalized real Schur form if T is +* upper triangular with non-negative diagonal and S is block upper +* triangular with 1-by-1 and 2-by-2 blocks. 1-by-1 blocks correspond +* to real generalized eigenvalues, while 2-by-2 blocks of S will be +* "standardized" by making the corresponding elements of T have the +* form: +* [ a 0 ] +* [ 0 b ] +* +* and the pair of corresponding 2-by-2 blocks in S and T will have a +* complex conjugate pair of generalized eigenvalues. +* +* +* Arguments +* ========= +* +* JOBVSL (input) CHARACTER*1 +* = 'N': do not compute the left Schur vectors; +* = 'V': compute the left Schur vectors. +* +* JOBVSR (input) CHARACTER*1 +* = 'N': do not compute the right Schur vectors; +* = 'V': compute the right Schur vectors. +* +* SORT (input) CHARACTER*1 +* Specifies whether or not to order the eigenvalues on the +* diagonal of the generalized Schur form. +* = 'N': Eigenvalues are not ordered; +* = 'S': Eigenvalues are ordered (see SELCTG); +* +* SELCTG (input) LOGICAL FUNCTION of three REAL arguments +* SELCTG must be declared EXTERNAL in the calling subroutine. +* If SORT = 'N', SELCTG is not referenced. +* If SORT = 'S', SELCTG is used to select eigenvalues to sort +* to the top left of the Schur form. +* An eigenvalue (ALPHAR(j)+ALPHAI(j))/BETA(j) is selected if +* SELCTG(ALPHAR(j),ALPHAI(j),BETA(j)) is true; i.e. if either +* one of a complex conjugate pair of eigenvalues is selected, +* then both complex eigenvalues are selected. +* +* Note that in the ill-conditioned case, a selected complex +* eigenvalue may no longer satisfy SELCTG(ALPHAR(j),ALPHAI(j), +* BETA(j)) = .TRUE. after ordering. INFO is to be set to N+2 +* in this case. +* +* N (input) INTEGER +* The order of the matrices A, B, VSL, and VSR. N >= 0. +* +* A (input/output) REAL array, dimension (LDA, N) +* On entry, the first of the pair of matrices. +* On exit, A has been overwritten by its generalized Schur +* form S. +* +* LDA (input) INTEGER +* The leading dimension of A. LDA >= max(1,N). +* +* B (input/output) REAL array, dimension (LDB, N) +* On entry, the second of the pair of matrices. +* On exit, B has been overwritten by its generalized Schur +* form T. +* +* LDB (input) INTEGER +* The leading dimension of B. LDB >= max(1,N). +* +* SDIM (output) INTEGER +* If SORT = 'N', SDIM = 0. +* If SORT = 'S', SDIM = number of eigenvalues (after sorting) +* for which SELCTG is true. (Complex conjugate pairs for which +* SELCTG is true for either eigenvalue count as 2.) +* +* ALPHAR (output) REAL array, dimension (N) +* ALPHAI (output) REAL array, dimension (N) +* BETA (output) REAL array, dimension (N) +* On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will +* be the generalized eigenvalues. ALPHAR(j) + ALPHAI(j)*i, +* and BETA(j),j=1,...,N are the diagonals of the complex Schur +* form (S,T) that would result if the 2-by-2 diagonal blocks of +* the real Schur form of (A,B) were further reduced to +* triangular form using 2-by-2 complex unitary transformations. +* If ALPHAI(j) is zero, then the j-th eigenvalue is real; if +* positive, then the j-th and (j+1)-st eigenvalues are a +* complex conjugate pair, with ALPHAI(j+1) negative. +* +* Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) +* may easily over- or underflow, and BETA(j) may even be zero. +* Thus, the user should avoid naively computing the ratio. +* However, ALPHAR and ALPHAI will be always less than and +* usually comparable with norm(A) in magnitude, and BETA always +* less than and usually comparable with norm(B). +* +* VSL (output) REAL array, dimension (LDVSL,N) +* If JOBVSL = 'V', VSL will contain the left Schur vectors. +* Not referenced if JOBVSL = 'N'. +* +* LDVSL (input) INTEGER +* The leading dimension of the matrix VSL. LDVSL >=1, and +* if JOBVSL = 'V', LDVSL >= N. +* +* VSR (output) REAL array, dimension (LDVSR,N) +* If JOBVSR = 'V', VSR will contain the right Schur vectors. +* Not referenced if JOBVSR = 'N'. +* +* LDVSR (input) INTEGER +* The leading dimension of the matrix VSR. LDVSR >= 1, and +* if JOBVSR = 'V', LDVSR >= N. +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= 8*N+16. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* BWORK (workspace) LOGICAL array, dimension (N) +* Not referenced if SORT = 'N'. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* = 1,...,N: +* The QZ iteration failed. (A,B) are not in Schur +* form, but ALPHAR(j), ALPHAI(j), and BETA(j) should +* be correct for j=INFO+1,...,N. +* > N: =N+1: other than QZ iteration failed in SHGEQZ. +* =N+2: after reordering, roundoff changed values of +* some complex eigenvalues so that leading +* eigenvalues in the Generalized Schur form no +* longer satisfy SELCTG=.TRUE. This could also +* be caused due to scaling. +* =N+3: reordering failed in STGSEN. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL CURSL, ILASCL, ILBSCL, ILVSL, ILVSR, LASTSL, + $ LQUERY, LST2SL, WANTST + INTEGER I, ICOLS, IERR, IHI, IJOBVL, IJOBVR, ILEFT, + $ ILO, IP, IRIGHT, IROWS, ITAU, IWRK, MAXWRK, + $ MINWRK + REAL ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, PVSL, + $ PVSR, SAFMAX, SAFMIN, SMLNUM +* .. +* .. Local Arrays .. + INTEGER IDUM( 1 ) + REAL DIF( 2 ) +* .. +* .. External Subroutines .. + EXTERNAL SGEQRF, SGGBAK, SGGBAL, SGGHRD, SHGEQZ, SLABAD, + $ SLACPY, SLASCL, SLASET, SORGQR, SORMQR, STGSEN, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANGE + EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVSL, 'N' ) ) THEN + IJOBVL = 1 + ILVSL = .FALSE. + ELSE IF( LSAME( JOBVSL, 'V' ) ) THEN + IJOBVL = 2 + ILVSL = .TRUE. + ELSE + IJOBVL = -1 + ILVSL = .FALSE. + END IF +* + IF( LSAME( JOBVSR, 'N' ) ) THEN + IJOBVR = 1 + ILVSR = .FALSE. + ELSE IF( LSAME( JOBVSR, 'V' ) ) THEN + IJOBVR = 2 + ILVSR = .TRUE. + ELSE + IJOBVR = -1 + ILVSR = .FALSE. + END IF +* + WANTST = LSAME( SORT, 'S' ) +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDVSL.LT.1 .OR. ( ILVSL .AND. LDVSL.LT.N ) ) THEN + INFO = -15 + ELSE IF( LDVSR.LT.1 .OR. ( ILVSR .AND. LDVSR.LT.N ) ) THEN + INFO = -17 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + MINWRK = 1 + IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN + MINWRK = 7*( N+1 ) + 16 + MAXWRK = 7*( N+1 ) + N*ILAENV( 1, 'SGEQRF', ' ', N, 1, N, 0 ) + + $ 16 + IF( ILVSL ) THEN + MAXWRK = MAX( MAXWRK, 7*( N+1 )+N* + $ ILAENV( 1, 'SORGQR', ' ', N, 1, N, -1 ) ) + END IF + WORK( 1 ) = MAXWRK + END IF +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) + $ INFO = -19 + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGGES ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SDIM = 0 + RETURN + END IF +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SAFMIN = SLAMCH( 'S' ) + SAFMAX = ONE / SAFMIN + CALL SLABAD( SAFMIN, SAFMAX ) + SMLNUM = SQRT( SAFMIN ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = SLANGE( 'M', N, N, A, LDA, WORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF + IF( ILASCL ) + $ CALL SLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR ) +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = SLANGE( 'M', N, N, B, LDB, WORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF + IF( ILBSCL ) + $ CALL SLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR ) +* +* Permute the matrix to make it more nearly triangular +* (Workspace: need 6*N + 2*N space for storing balancing factors) +* + ILEFT = 1 + IRIGHT = N + 1 + IWRK = IRIGHT + N + CALL SGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), WORK( IWRK ), IERR ) +* +* Reduce B to triangular form (QR decomposition of B) +* (Workspace: need N, prefer N*NB) +* + IROWS = IHI + 1 - ILO + ICOLS = N + 1 - ILO + ITAU = IWRK + IWRK = ITAU + IROWS + CALL SGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Apply the orthogonal transformation to matrix A +* (Workspace: need N, prefer N*NB) +* + CALL SORMQR( 'L', 'T', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ), + $ LWORK+1-IWRK, IERR ) +* +* Initialize VSL +* (Workspace: need N, prefer N*NB) +* + IF( ILVSL ) THEN + CALL SLASET( 'Full', N, N, ZERO, ONE, VSL, LDVSL ) + CALL SLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VSL( ILO+1, ILO ), LDVSL ) + CALL SORGQR( IROWS, IROWS, IROWS, VSL( ILO, ILO ), LDVSL, + $ WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* +* Initialize VSR +* + IF( ILVSR ) + $ CALL SLASET( 'Full', N, N, ZERO, ONE, VSR, LDVSR ) +* +* Reduce to generalized Hessenberg form +* (Workspace: none needed) +* + CALL SGGHRD( JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, VSL, + $ LDVSL, VSR, LDVSR, IERR ) +* +* Perform QZ algorithm, computing Schur vectors if desired +* (Workspace: need N) +* + IWRK = ITAU + CALL SHGEQZ( 'S', JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR, + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) + IF( IERR.NE.0 ) THEN + IF( IERR.GT.0 .AND. IERR.LE.N ) THEN + INFO = IERR + ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN + INFO = IERR - N + ELSE + INFO = N + 1 + END IF + GO TO 40 + END IF +* +* Sort eigenvalues ALPHA/BETA if desired +* (Workspace: need 4*N+16 ) +* + SDIM = 0 + IF( WANTST ) THEN +* +* Undo scaling on eigenvalues before SELCTGing +* + IF( ILASCL ) THEN + CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N, + $ IERR ) + CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N, + $ IERR ) + END IF + IF( ILBSCL ) + $ CALL SLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) +* +* Select eigenvalues +* + DO 10 I = 1, N + BWORK( I ) = SELCTG( ALPHAR( I ), ALPHAI( I ), BETA( I ) ) + 10 CONTINUE +* + CALL STGSEN( 0, ILVSL, ILVSR, BWORK, N, A, LDA, B, LDB, ALPHAR, + $ ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR, SDIM, PVSL, + $ PVSR, DIF, WORK( IWRK ), LWORK-IWRK+1, IDUM, 1, + $ IERR ) + IF( IERR.EQ.1 ) + $ INFO = N + 3 +* + END IF +* +* Apply back-permutation to VSL and VSR +* (Workspace: none needed) +* + IF( ILVSL ) + $ CALL SGGBAK( 'P', 'L', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VSL, LDVSL, IERR ) +* + IF( ILVSR ) + $ CALL SGGBAK( 'P', 'R', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VSR, LDVSR, IERR ) +* +* Check if unscaling would cause over/underflow, if so, rescale +* (ALPHAR(I),ALPHAI(I),BETA(I)) so BETA(I) is on the order of +* B(I,I) and ALPHAR(I) and ALPHAI(I) are on the order of A(I,I) +* + IF( ILASCL )THEN + DO 50 I = 1, N + IF( ALPHAI( I ).NE.ZERO ) THEN + IF( ( ALPHAR( I )/SAFMAX ).GT.( ANRMTO/ANRM ) .OR. + $ ( SAFMIN/ALPHAR( I ) ).GT.( ANRM/ANRMTO ) ) THEN + WORK( 1 ) = ABS( A( I, I )/ALPHAR( I ) ) + BETA( I ) = BETA( I )*WORK( 1 ) + ALPHAR( I ) = ALPHAR( I )*WORK( 1 ) + ALPHAI( I ) = ALPHAI( I )*WORK( 1 ) + ELSE IF( ( ALPHAI( I )/SAFMAX ).GT.( ANRMTO/ANRM ) .OR. + $ ( SAFMIN/ALPHAI( I ) ).GT.( ANRM/ANRMTO ) ) THEN + WORK( 1 ) = ABS( A( I, I+1 )/ALPHAI( I ) ) + BETA( I ) = BETA( I )*WORK( 1 ) + ALPHAR( I ) = ALPHAR( I )*WORK( 1 ) + ALPHAI( I ) = ALPHAI( I )*WORK( 1 ) + END IF + END IF + 50 CONTINUE + END IF +* + IF( ILBSCL )THEN + DO 60 I = 1, N + IF( ALPHAI( I ).NE.ZERO ) THEN + IF( ( BETA( I )/SAFMAX ).GT.( BNRMTO/BNRM ) .OR. + $ ( SAFMIN/BETA( I ) ).GT.( BNRM/BNRMTO ) ) THEN + WORK( 1 ) = ABS(B( I, I )/BETA( I )) + BETA( I ) = BETA( I )*WORK( 1 ) + ALPHAR( I ) = ALPHAR( I )*WORK( 1 ) + ALPHAI( I ) = ALPHAI( I )*WORK( 1 ) + END IF + END IF + 60 CONTINUE + END IF +* +* Undo scaling +* + IF( ILASCL ) THEN + CALL SLASCL( 'H', 0, 0, ANRMTO, ANRM, N, N, A, LDA, IERR ) + CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N, IERR ) + CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N, IERR ) + END IF +* + IF( ILBSCL ) THEN + CALL SLASCL( 'U', 0, 0, BNRMTO, BNRM, N, N, B, LDB, IERR ) + CALL SLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) + END IF +* + IF( WANTST ) THEN +* +* Check if reordering is correct +* + LASTSL = .TRUE. + LST2SL = .TRUE. + SDIM = 0 + IP = 0 + DO 30 I = 1, N + CURSL = SELCTG( ALPHAR( I ), ALPHAI( I ), BETA( I ) ) + IF( ALPHAI( I ).EQ.ZERO ) THEN + IF( CURSL ) + $ SDIM = SDIM + 1 + IP = 0 + IF( CURSL .AND. .NOT.LASTSL ) + $ INFO = N + 2 + ELSE + IF( IP.EQ.1 ) THEN +* +* Last eigenvalue of conjugate pair +* + CURSL = CURSL .OR. LASTSL + LASTSL = CURSL + IF( CURSL ) + $ SDIM = SDIM + 2 + IP = -1 + IF( CURSL .AND. .NOT.LST2SL ) + $ INFO = N + 2 + ELSE +* +* First eigenvalue of conjugate pair +* + IP = 1 + END IF + END IF + LST2SL = LASTSL + LASTSL = CURSL + 30 CONTINUE +* + END IF +* + 40 CONTINUE +* + WORK( 1 ) = MAXWRK +* + RETURN +* +* End of SGGES +* + END diff --git a/costa/native/external/lapack/sggesx.f b/costa/native/external/lapack/sggesx.f new file mode 100644 index 000000000..e7f1a33ea --- /dev/null +++ b/costa/native/external/lapack/sggesx.f @@ -0,0 +1,640 @@ + SUBROUTINE SGGESX( JOBVSL, JOBVSR, SORT, SELCTG, SENSE, N, A, LDA, + $ B, LDB, SDIM, ALPHAR, ALPHAI, BETA, VSL, LDVSL, + $ VSR, LDVSR, RCONDE, RCONDV, WORK, LWORK, IWORK, + $ LIWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBVSL, JOBVSR, SENSE, SORT + INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LIWORK, LWORK, N, + $ SDIM +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + INTEGER IWORK( * ) + REAL A( LDA, * ), ALPHAI( * ), ALPHAR( * ), + $ B( LDB, * ), BETA( * ), RCONDE( 2 ), + $ RCONDV( 2 ), VSL( LDVSL, * ), VSR( LDVSR, * ), + $ WORK( * ) +* .. +* .. Function Arguments .. + LOGICAL SELCTG + EXTERNAL SELCTG +* .. +* +* Purpose +* ======= +* +* SGGESX computes for a pair of N-by-N real nonsymmetric matrices +* (A,B), the generalized eigenvalues, the real Schur form (S,T), and, +* optionally, the left and/or right matrices of Schur vectors (VSL and +* VSR). This gives the generalized Schur factorization +* +* (A,B) = ( (VSL) S (VSR)**T, (VSL) T (VSR)**T ) +* +* Optionally, it also orders the eigenvalues so that a selected cluster +* of eigenvalues appears in the leading diagonal blocks of the upper +* quasi-triangular matrix S and the upper triangular matrix T; computes +* a reciprocal condition number for the average of the selected +* eigenvalues (RCONDE); and computes a reciprocal condition number for +* the right and left deflating subspaces corresponding to the selected +* eigenvalues (RCONDV). The leading columns of VSL and VSR then form +* an orthonormal basis for the corresponding left and right eigenspaces +* (deflating subspaces). +* +* A generalized eigenvalue for a pair of matrices (A,B) is a scalar w +* or a ratio alpha/beta = w, such that A - w*B is singular. It is +* usually represented as the pair (alpha,beta), as there is a +* reasonable interpretation for beta=0 or for both being zero. +* +* A pair of matrices (S,T) is in generalized real Schur form if T is +* upper triangular with non-negative diagonal and S is block upper +* triangular with 1-by-1 and 2-by-2 blocks. 1-by-1 blocks correspond +* to real generalized eigenvalues, while 2-by-2 blocks of S will be +* "standardized" by making the corresponding elements of T have the +* form: +* [ a 0 ] +* [ 0 b ] +* +* and the pair of corresponding 2-by-2 blocks in S and T will have a +* complex conjugate pair of generalized eigenvalues. +* +* +* Arguments +* ========= +* +* JOBVSL (input) CHARACTER*1 +* = 'N': do not compute the left Schur vectors; +* = 'V': compute the left Schur vectors. +* +* JOBVSR (input) CHARACTER*1 +* = 'N': do not compute the right Schur vectors; +* = 'V': compute the right Schur vectors. +* +* SORT (input) CHARACTER*1 +* Specifies whether or not to order the eigenvalues on the +* diagonal of the generalized Schur form. +* = 'N': Eigenvalues are not ordered; +* = 'S': Eigenvalues are ordered (see SELCTG). +* +* SELCTG (input) LOGICAL FUNCTION of three REAL arguments +* SELCTG must be declared EXTERNAL in the calling subroutine. +* If SORT = 'N', SELCTG is not referenced. +* If SORT = 'S', SELCTG is used to select eigenvalues to sort +* to the top left of the Schur form. +* An eigenvalue (ALPHAR(j)+ALPHAI(j))/BETA(j) is selected if +* SELCTG(ALPHAR(j),ALPHAI(j),BETA(j)) is true; i.e. if either +* one of a complex conjugate pair of eigenvalues is selected, +* then both complex eigenvalues are selected. +* Note that a selected complex eigenvalue may no longer satisfy +* SELCTG(ALPHAR(j),ALPHAI(j),BETA(j)) = .TRUE. after ordering, +* since ordering may change the value of complex eigenvalues +* (especially if the eigenvalue is ill-conditioned), in this +* case INFO is set to N+3. +* +* SENSE (input) CHARACTER +* Determines which reciprocal condition numbers are computed. +* = 'N' : None are computed; +* = 'E' : Computed for average of selected eigenvalues only; +* = 'V' : Computed for selected deflating subspaces only; +* = 'B' : Computed for both. +* If SENSE = 'E', 'V', or 'B', SORT must equal 'S'. +* +* N (input) INTEGER +* The order of the matrices A, B, VSL, and VSR. N >= 0. +* +* A (input/output) REAL array, dimension (LDA, N) +* On entry, the first of the pair of matrices. +* On exit, A has been overwritten by its generalized Schur +* form S. +* +* LDA (input) INTEGER +* The leading dimension of A. LDA >= max(1,N). +* +* B (input/output) REAL array, dimension (LDB, N) +* On entry, the second of the pair of matrices. +* On exit, B has been overwritten by its generalized Schur +* form T. +* +* LDB (input) INTEGER +* The leading dimension of B. LDB >= max(1,N). +* +* SDIM (output) INTEGER +* If SORT = 'N', SDIM = 0. +* If SORT = 'S', SDIM = number of eigenvalues (after sorting) +* for which SELCTG is true. (Complex conjugate pairs for which +* SELCTG is true for either eigenvalue count as 2.) +* +* ALPHAR (output) REAL array, dimension (N) +* ALPHAI (output) REAL array, dimension (N) +* BETA (output) REAL array, dimension (N) +* On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will +* be the generalized eigenvalues. ALPHAR(j) + ALPHAI(j)*i +* and BETA(j),j=1,...,N are the diagonals of the complex Schur +* form (S,T) that would result if the 2-by-2 diagonal blocks of +* the real Schur form of (A,B) were further reduced to +* triangular form using 2-by-2 complex unitary transformations. +* If ALPHAI(j) is zero, then the j-th eigenvalue is real; if +* positive, then the j-th and (j+1)-st eigenvalues are a +* complex conjugate pair, with ALPHAI(j+1) negative. +* +* Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) +* may easily over- or underflow, and BETA(j) may even be zero. +* Thus, the user should avoid naively computing the ratio. +* However, ALPHAR and ALPHAI will be always less than and +* usually comparable with norm(A) in magnitude, and BETA always +* less than and usually comparable with norm(B). +* +* VSL (output) REAL array, dimension (LDVSL,N) +* If JOBVSL = 'V', VSL will contain the left Schur vectors. +* Not referenced if JOBVSL = 'N'. +* +* LDVSL (input) INTEGER +* The leading dimension of the matrix VSL. LDVSL >=1, and +* if JOBVSL = 'V', LDVSL >= N. +* +* VSR (output) REAL array, dimension (LDVSR,N) +* If JOBVSR = 'V', VSR will contain the right Schur vectors. +* Not referenced if JOBVSR = 'N'. +* +* LDVSR (input) INTEGER +* The leading dimension of the matrix VSR. LDVSR >= 1, and +* if JOBVSR = 'V', LDVSR >= N. +* +* RCONDE (output) REAL array, dimension ( 2 ) +* If SENSE = 'E' or 'B', RCONDE(1) and RCONDE(2) contain the +* reciprocal condition numbers for the average of the selected +* eigenvalues. +* Not referenced if SENSE = 'N' or 'V'. +* +* RCONDV (output) REAL array, dimension ( 2 ) +* If SENSE = 'V' or 'B', RCONDV(1) and RCONDV(2) contain the +* reciprocal condition numbers for the selected deflating +* subspaces. +* Not referenced if SENSE = 'N' or 'E'. +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= 8*(N+1)+16. +* If SENSE = 'E', 'V', or 'B', +* LWORK >= MAX( 8*(N+1)+16, 2*SDIM*(N-SDIM) ). +* +* IWORK (workspace) INTEGER array, dimension (LIWORK) +* Not referenced if SENSE = 'N'. +* +* LIWORK (input) INTEGER +* The dimension of the array WORK. LIWORK >= N+6. +* +* BWORK (workspace) LOGICAL array, dimension (N) +* Not referenced if SORT = 'N'. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* = 1,...,N: +* The QZ iteration failed. (A,B) are not in Schur +* form, but ALPHAR(j), ALPHAI(j), and BETA(j) should +* be correct for j=INFO+1,...,N. +* > N: =N+1: other than QZ iteration failed in SHGEQZ +* =N+2: after reordering, roundoff changed values of +* some complex eigenvalues so that leading +* eigenvalues in the Generalized Schur form no +* longer satisfy SELCTG=.TRUE. This could also +* be caused due to scaling. +* =N+3: reordering failed in STGSEN. +* +* Further details +* =============== +* +* An approximate (asymptotic) bound on the average absolute error of +* the selected eigenvalues is +* +* EPS * norm((A, B)) / RCONDE( 1 ). +* +* An approximate (asymptotic) bound on the maximum angular error in +* the computed deflating subspaces is +* +* EPS * norm((A, B)) / RCONDV( 2 ). +* +* See LAPACK User's Guide, section 4.11 for more information. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL CURSL, ILASCL, ILBSCL, ILVSL, ILVSR, LASTSL, + $ LST2SL, WANTSB, WANTSE, WANTSN, WANTST, WANTSV + INTEGER I, ICOLS, IERR, IHI, IJOB, IJOBVL, IJOBVR, + $ ILEFT, ILO, IP, IRIGHT, IROWS, ITAU, IWRK, + $ LIWMIN, MAXWRK, MINWRK + REAL ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, PL, + $ PR, SAFMAX, SAFMIN, SMLNUM +* .. +* .. Local Arrays .. + REAL DIF( 2 ) +* .. +* .. External Subroutines .. + EXTERNAL SGEQRF, SGGBAK, SGGBAL, SGGHRD, SHGEQZ, SLABAD, + $ SLACPY, SLASCL, SLASET, SORGQR, SORMQR, STGSEN, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANGE + EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVSL, 'N' ) ) THEN + IJOBVL = 1 + ILVSL = .FALSE. + ELSE IF( LSAME( JOBVSL, 'V' ) ) THEN + IJOBVL = 2 + ILVSL = .TRUE. + ELSE + IJOBVL = -1 + ILVSL = .FALSE. + END IF +* + IF( LSAME( JOBVSR, 'N' ) ) THEN + IJOBVR = 1 + ILVSR = .FALSE. + ELSE IF( LSAME( JOBVSR, 'V' ) ) THEN + IJOBVR = 2 + ILVSR = .TRUE. + ELSE + IJOBVR = -1 + ILVSR = .FALSE. + END IF +* + WANTST = LSAME( SORT, 'S' ) + WANTSN = LSAME( SENSE, 'N' ) + WANTSE = LSAME( SENSE, 'E' ) + WANTSV = LSAME( SENSE, 'V' ) + WANTSB = LSAME( SENSE, 'B' ) + IF( WANTSN ) THEN + IJOB = 0 + IWORK( 1 ) = 1 + ELSE IF( WANTSE ) THEN + IJOB = 1 + ELSE IF( WANTSV ) THEN + IJOB = 2 + ELSE IF( WANTSB ) THEN + IJOB = 4 + END IF +* +* Test the input arguments +* + INFO = 0 + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( .NOT.( WANTSN .OR. WANTSE .OR. WANTSV .OR. WANTSB ) .OR. + $ ( .NOT.WANTST .AND. .NOT.WANTSN ) ) THEN + INFO = -5 + ELSE IF( N.LT.0 ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDVSL.LT.1 .OR. ( ILVSL .AND. LDVSL.LT.N ) ) THEN + INFO = -16 + ELSE IF( LDVSR.LT.1 .OR. ( ILVSR .AND. LDVSR.LT.N ) ) THEN + INFO = -18 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + MINWRK = 1 + IF( INFO.EQ.0 .AND. LWORK.GE.1 ) THEN + MINWRK = 8*( N+1 ) + 16 + MAXWRK = 7*( N+1 ) + N*ILAENV( 1, 'SGEQRF', ' ', N, 1, N, 0 ) + + $ 16 + IF( ILVSL ) THEN + MAXWRK = MAX( MAXWRK, 8*( N+1 )+N* + $ ILAENV( 1, 'SORGQR', ' ', N, 1, N, -1 )+16 ) + END IF + WORK( 1 ) = MAXWRK + END IF + IF( .NOT.WANTSN ) THEN + LIWMIN = 1 + ELSE + LIWMIN = N + 6 + END IF + IWORK( 1 ) = LIWMIN +* + IF( INFO.EQ.0 .AND. LWORK.LT.MINWRK ) THEN + INFO = -22 + ELSE IF( INFO.EQ.0 .AND. IJOB.GE.1 ) THEN + IF( LIWORK.LT.LIWMIN ) + $ INFO = -24 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGGESX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SDIM = 0 + RETURN + END IF +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SAFMIN = SLAMCH( 'S' ) + SAFMAX = ONE / SAFMIN + CALL SLABAD( SAFMIN, SAFMAX ) + SMLNUM = SQRT( SAFMIN ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = SLANGE( 'M', N, N, A, LDA, WORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF + IF( ILASCL ) + $ CALL SLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR ) +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = SLANGE( 'M', N, N, B, LDB, WORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF + IF( ILBSCL ) + $ CALL SLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR ) +* +* Permute the matrix to make it more nearly triangular +* (Workspace: need 6*N + 2*N for permutation parameters) +* + ILEFT = 1 + IRIGHT = N + 1 + IWRK = IRIGHT + N + CALL SGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), WORK( IWRK ), IERR ) +* +* Reduce B to triangular form (QR decomposition of B) +* (Workspace: need N, prefer N*NB) +* + IROWS = IHI + 1 - ILO + ICOLS = N + 1 - ILO + ITAU = IWRK + IWRK = ITAU + IROWS + CALL SGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Apply the orthogonal transformation to matrix A +* (Workspace: need N, prefer N*NB) +* + CALL SORMQR( 'L', 'T', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ), + $ LWORK+1-IWRK, IERR ) +* +* Initialize VSL +* (Workspace: need N, prefer N*NB) +* + IF( ILVSL ) THEN + CALL SLASET( 'Full', N, N, ZERO, ONE, VSL, LDVSL ) + CALL SLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VSL( ILO+1, ILO ), LDVSL ) + CALL SORGQR( IROWS, IROWS, IROWS, VSL( ILO, ILO ), LDVSL, + $ WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* +* Initialize VSR +* + IF( ILVSR ) + $ CALL SLASET( 'Full', N, N, ZERO, ONE, VSR, LDVSR ) +* +* Reduce to generalized Hessenberg form +* (Workspace: none needed) +* + CALL SGGHRD( JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, VSL, + $ LDVSL, VSR, LDVSR, IERR ) +* + SDIM = 0 +* +* Perform QZ algorithm, computing Schur vectors if desired +* (Workspace: need N) +* + IWRK = ITAU + CALL SHGEQZ( 'S', JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR, + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) + IF( IERR.NE.0 ) THEN + IF( IERR.GT.0 .AND. IERR.LE.N ) THEN + INFO = IERR + ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN + INFO = IERR - N + ELSE + INFO = N + 1 + END IF + GO TO 50 + END IF +* +* Sort eigenvalues ALPHA/BETA and compute the reciprocal of +* condition number(s) +* (Workspace: If IJOB >= 1, need MAX( 8*(N+1), 2*SDIM*(N-SDIM) ) +* otherwise, need 8*(N+1) ) +* + IF( WANTST ) THEN +* +* Undo scaling on eigenvalues before SELCTGing +* + IF( ILASCL ) THEN + CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N, + $ IERR ) + CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N, + $ IERR ) + END IF + IF( ILBSCL ) + $ CALL SLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) +* +* Select eigenvalues +* + DO 10 I = 1, N + BWORK( I ) = SELCTG( ALPHAR( I ), ALPHAI( I ), BETA( I ) ) + 10 CONTINUE +* +* Reorder eigenvalues, transform Generalized Schur vectors, and +* compute reciprocal condition numbers +* + CALL STGSEN( IJOB, ILVSL, ILVSR, BWORK, N, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR, + $ SDIM, PL, PR, DIF, WORK( IWRK ), LWORK-IWRK+1, + $ IWORK, LIWORK, IERR ) +* + IF( IJOB.GE.1 ) + $ MAXWRK = MAX( MAXWRK, 2*SDIM*( N-SDIM ) ) + IF( IERR.EQ.-22 ) THEN +* +* not enough real workspace +* + INFO = -22 + ELSE + RCONDE( 1 ) = PL + RCONDE( 2 ) = PR + RCONDV( 1 ) = DIF( 1 ) + RCONDV( 2 ) = DIF( 2 ) + IF( IERR.EQ.1 ) + $ INFO = N + 3 + END IF +* + END IF +* +* Apply permutation to VSL and VSR +* (Workspace: none needed) +* + IF( ILVSL ) + $ CALL SGGBAK( 'P', 'L', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VSL, LDVSL, IERR ) +* + IF( ILVSR ) + $ CALL SGGBAK( 'P', 'R', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VSR, LDVSR, IERR ) +* +* Check if unscaling would cause over/underflow, if so, rescale +* (ALPHAR(I),ALPHAI(I),BETA(I)) so BETA(I) is on the order of +* B(I,I) and ALPHAR(I) and ALPHAI(I) are on the order of A(I,I) +* + IF( ILASCL ) THEN + DO 20 I = 1, N + IF( ALPHAI( I ).NE.ZERO ) THEN + IF( ( ALPHAR( I ) / SAFMAX ).GT.( ANRMTO / ANRM ) .OR. + $ ( SAFMIN / ALPHAR( I ) ).GT.( ANRM / ANRMTO ) ) + $ THEN + WORK( 1 ) = ABS( A( I, I ) / ALPHAR( I ) ) + BETA( I ) = BETA( I )*WORK( 1 ) + ALPHAR( I ) = ALPHAR( I )*WORK( 1 ) + ALPHAI( I ) = ALPHAI( I )*WORK( 1 ) + ELSE IF( ( ALPHAI( I ) / SAFMAX ).GT.( ANRMTO / ANRM ) + $ .OR. ( SAFMIN / ALPHAI( I ) ).GT.( ANRM / ANRMTO ) ) + $ THEN + WORK( 1 ) = ABS( A( I, I+1 ) / ALPHAI( I ) ) + BETA( I ) = BETA( I )*WORK( 1 ) + ALPHAR( I ) = ALPHAR( I )*WORK( 1 ) + ALPHAI( I ) = ALPHAI( I )*WORK( 1 ) + END IF + END IF + 20 CONTINUE + END IF +* + IF( ILBSCL ) THEN + DO 25 I = 1, N + IF( ALPHAI( I ).NE.ZERO ) THEN + IF( ( BETA( I ) / SAFMAX ).GT.( BNRMTO / BNRM ) .OR. + $ ( SAFMIN / BETA( I ) ).GT.( BNRM / BNRMTO ) ) THEN + WORK( 1 ) = ABS( B( I, I ) / BETA( I ) ) + BETA( I ) = BETA( I )*WORK( 1 ) + ALPHAR( I ) = ALPHAR( I )*WORK( 1 ) + ALPHAI( I ) = ALPHAI( I )*WORK( 1 ) + END IF + END IF + 25 CONTINUE + END IF +* +* Undo scaling +* + IF( ILASCL ) THEN + CALL SLASCL( 'H', 0, 0, ANRMTO, ANRM, N, N, A, LDA, IERR ) + CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N, IERR ) + CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N, IERR ) + END IF +* + IF( ILBSCL ) THEN + CALL SLASCL( 'U', 0, 0, BNRMTO, BNRM, N, N, B, LDB, IERR ) + CALL SLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) + END IF +* + 30 CONTINUE +* + IF( WANTST ) THEN +* +* Check if reordering is correct +* + LASTSL = .TRUE. + LST2SL = .TRUE. + SDIM = 0 + IP = 0 + DO 40 I = 1, N + CURSL = SELCTG( ALPHAR( I ), ALPHAI( I ), BETA( I ) ) + IF( ALPHAI( I ).EQ.ZERO ) THEN + IF( CURSL ) + $ SDIM = SDIM + 1 + IP = 0 + IF( CURSL .AND. .NOT.LASTSL ) + $ INFO = N + 2 + ELSE + IF( IP.EQ.1 ) THEN +* +* Last eigenvalue of conjugate pair +* + CURSL = CURSL .OR. LASTSL + LASTSL = CURSL + IF( CURSL ) + $ SDIM = SDIM + 2 + IP = -1 + IF( CURSL .AND. .NOT.LST2SL ) + $ INFO = N + 2 + ELSE +* +* First eigenvalue of conjugate pair +* + IP = 1 + END IF + END IF + LST2SL = LASTSL + LASTSL = CURSL + 40 CONTINUE +* + END IF +* + 50 CONTINUE +* + WORK( 1 ) = MAXWRK + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of SGGESX +* + END diff --git a/costa/native/external/lapack/sggev.f b/costa/native/external/lapack/sggev.f new file mode 100644 index 000000000..3e75fc8a7 --- /dev/null +++ b/costa/native/external/lapack/sggev.f @@ -0,0 +1,482 @@ + SUBROUTINE SGGEV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHAR, ALPHAI, + $ BETA, VL, LDVL, VR, LDVR, WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBVL, JOBVR + INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), ALPHAI( * ), ALPHAR( * ), + $ B( LDB, * ), BETA( * ), VL( LDVL, * ), + $ VR( LDVR, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGGEV computes for a pair of N-by-N real nonsymmetric matrices (A,B) +* the generalized eigenvalues, and optionally, the left and/or right +* generalized eigenvectors. +* +* A generalized eigenvalue for a pair of matrices (A,B) is a scalar +* lambda or a ratio alpha/beta = lambda, such that A - lambda*B is +* singular. It is usually represented as the pair (alpha,beta), as +* there is a reasonable interpretation for beta=0, and even for both +* being zero. +* +* The right eigenvector v(j) corresponding to the eigenvalue lambda(j) +* of (A,B) satisfies +* +* A * v(j) = lambda(j) * B * v(j). +* +* The left eigenvector u(j) corresponding to the eigenvalue lambda(j) +* of (A,B) satisfies +* +* u(j)**H * A = lambda(j) * u(j)**H * B . +* +* where u(j)**H is the conjugate-transpose of u(j). +* +* +* Arguments +* ========= +* +* JOBVL (input) CHARACTER*1 +* = 'N': do not compute the left generalized eigenvectors; +* = 'V': compute the left generalized eigenvectors. +* +* JOBVR (input) CHARACTER*1 +* = 'N': do not compute the right generalized eigenvectors; +* = 'V': compute the right generalized eigenvectors. +* +* N (input) INTEGER +* The order of the matrices A, B, VL, and VR. N >= 0. +* +* A (input/output) REAL array, dimension (LDA, N) +* On entry, the matrix A in the pair (A,B). +* On exit, A has been overwritten. +* +* LDA (input) INTEGER +* The leading dimension of A. LDA >= max(1,N). +* +* B (input/output) REAL array, dimension (LDB, N) +* On entry, the matrix B in the pair (A,B). +* On exit, B has been overwritten. +* +* LDB (input) INTEGER +* The leading dimension of B. LDB >= max(1,N). +* +* ALPHAR (output) REAL array, dimension (N) +* ALPHAI (output) REAL array, dimension (N) +* BETA (output) REAL array, dimension (N) +* On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will +* be the generalized eigenvalues. If ALPHAI(j) is zero, then +* the j-th eigenvalue is real; if positive, then the j-th and +* (j+1)-st eigenvalues are a complex conjugate pair, with +* ALPHAI(j+1) negative. +* +* Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) +* may easily over- or underflow, and BETA(j) may even be zero. +* Thus, the user should avoid naively computing the ratio +* alpha/beta. However, ALPHAR and ALPHAI will be always less +* than and usually comparable with norm(A) in magnitude, and +* BETA always less than and usually comparable with norm(B). +* +* VL (output) REAL array, dimension (LDVL,N) +* If JOBVL = 'V', the left eigenvectors u(j) are stored one +* after another in the columns of VL, in the same order as +* their eigenvalues. If the j-th eigenvalue is real, then +* u(j) = VL(:,j), the j-th column of VL. If the j-th and +* (j+1)-th eigenvalues form a complex conjugate pair, then +* u(j) = VL(:,j)+i*VL(:,j+1) and u(j+1) = VL(:,j)-i*VL(:,j+1). +* Each eigenvector will be scaled so the largest component have +* abs(real part)+abs(imag. part)=1. +* Not referenced if JOBVL = 'N'. +* +* LDVL (input) INTEGER +* The leading dimension of the matrix VL. LDVL >= 1, and +* if JOBVL = 'V', LDVL >= N. +* +* VR (output) REAL array, dimension (LDVR,N) +* If JOBVR = 'V', the right eigenvectors v(j) are stored one +* after another in the columns of VR, in the same order as +* their eigenvalues. If the j-th eigenvalue is real, then +* v(j) = VR(:,j), the j-th column of VR. If the j-th and +* (j+1)-th eigenvalues form a complex conjugate pair, then +* v(j) = VR(:,j)+i*VR(:,j+1) and v(j+1) = VR(:,j)-i*VR(:,j+1). +* Each eigenvector will be scaled so the largest component have +* abs(real part)+abs(imag. part)=1. +* Not referenced if JOBVR = 'N'. +* +* LDVR (input) INTEGER +* The leading dimension of the matrix VR. LDVR >= 1, and +* if JOBVR = 'V', LDVR >= N. +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,8*N). +* For good performance, LWORK must generally be larger. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* = 1,...,N: +* The QZ iteration failed. No eigenvectors have been +* calculated, but ALPHAR(j), ALPHAI(j), and BETA(j) +* should be correct for j=INFO+1,...,N. +* > N: =N+1: other than QZ iteration failed in SHGEQZ. +* =N+2: error return from STGEVC. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ILASCL, ILBSCL, ILV, ILVL, ILVR, LQUERY + CHARACTER CHTEMP + INTEGER ICOLS, IERR, IHI, IJOBVL, IJOBVR, ILEFT, ILO, + $ IN, IRIGHT, IROWS, ITAU, IWRK, JC, JR, MAXWRK, + $ MINWRK + REAL ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, + $ SMLNUM, TEMP +* .. +* .. Local Arrays .. + LOGICAL LDUMMA( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL SGEQRF, SGGBAK, SGGBAL, SGGHRD, SHGEQZ, SLABAD, + $ SLACPY, SLASCL, SLASET, SORGQR, SORMQR, STGEVC, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANGE + EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVL, 'N' ) ) THEN + IJOBVL = 1 + ILVL = .FALSE. + ELSE IF( LSAME( JOBVL, 'V' ) ) THEN + IJOBVL = 2 + ILVL = .TRUE. + ELSE + IJOBVL = -1 + ILVL = .FALSE. + END IF +* + IF( LSAME( JOBVR, 'N' ) ) THEN + IJOBVR = 1 + ILVR = .FALSE. + ELSE IF( LSAME( JOBVR, 'V' ) ) THEN + IJOBVR = 2 + ILVR = .TRUE. + ELSE + IJOBVR = -1 + ILVR = .FALSE. + END IF + ILV = ILVL .OR. ILVR +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVL.LT.1 .OR. ( ILVL .AND. LDVL.LT.N ) ) THEN + INFO = -12 + ELSE IF( LDVR.LT.1 .OR. ( ILVR .AND. LDVR.LT.N ) ) THEN + INFO = -14 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV. The workspace is +* computed assuming ILO = 1 and IHI = N, the worst case.) +* + MINWRK = 1 + IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN + MAXWRK = 7*N + N*ILAENV( 1, 'SGEQRF', ' ', N, 1, N, 0 ) + MINWRK = MAX( 1, 8*N ) + WORK( 1 ) = MAXWRK + END IF +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) + $ INFO = -16 +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGGEV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = SLANGE( 'M', N, N, A, LDA, WORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF + IF( ILASCL ) + $ CALL SLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR ) +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = SLANGE( 'M', N, N, B, LDB, WORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF + IF( ILBSCL ) + $ CALL SLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR ) +* +* Permute the matrices A, B to isolate eigenvalues if possible +* (Workspace: need 6*N) +* + ILEFT = 1 + IRIGHT = N + 1 + IWRK = IRIGHT + N + CALL SGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), WORK( IWRK ), IERR ) +* +* Reduce B to triangular form (QR decomposition of B) +* (Workspace: need N, prefer N*NB) +* + IROWS = IHI + 1 - ILO + IF( ILV ) THEN + ICOLS = N + 1 - ILO + ELSE + ICOLS = IROWS + END IF + ITAU = IWRK + IWRK = ITAU + IROWS + CALL SGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Apply the orthogonal transformation to matrix A +* (Workspace: need N, prefer N*NB) +* + CALL SORMQR( 'L', 'T', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ), + $ LWORK+1-IWRK, IERR ) +* +* Initialize VL +* (Workspace: need N, prefer N*NB) +* + IF( ILVL ) THEN + CALL SLASET( 'Full', N, N, ZERO, ONE, VL, LDVL ) + CALL SLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VL( ILO+1, ILO ), LDVL ) + CALL SORGQR( IROWS, IROWS, IROWS, VL( ILO, ILO ), LDVL, + $ WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* +* Initialize VR +* + IF( ILVR ) + $ CALL SLASET( 'Full', N, N, ZERO, ONE, VR, LDVR ) +* +* Reduce to generalized Hessenberg form +* (Workspace: none needed) +* + IF( ILV ) THEN +* +* Eigenvectors requested -- work on whole matrix. +* + CALL SGGHRD( JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, IERR ) + ELSE + CALL SGGHRD( 'N', 'N', IROWS, 1, IROWS, A( ILO, ILO ), LDA, + $ B( ILO, ILO ), LDB, VL, LDVL, VR, LDVR, IERR ) + END IF +* +* Perform QZ algorithm (Compute eigenvalues, and optionally, the +* Schur forms and Schur vectors) +* (Workspace: need N) +* + IWRK = ITAU + IF( ILV ) THEN + CHTEMP = 'S' + ELSE + CHTEMP = 'E' + END IF + CALL SHGEQZ( CHTEMP, JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VL, LDVL, VR, LDVR, + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) + IF( IERR.NE.0 ) THEN + IF( IERR.GT.0 .AND. IERR.LE.N ) THEN + INFO = IERR + ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN + INFO = IERR - N + ELSE + INFO = N + 1 + END IF + GO TO 110 + END IF +* +* Compute Eigenvectors +* (Workspace: need 6*N) +* + IF( ILV ) THEN + IF( ILVL ) THEN + IF( ILVR ) THEN + CHTEMP = 'B' + ELSE + CHTEMP = 'L' + END IF + ELSE + CHTEMP = 'R' + END IF + CALL STGEVC( CHTEMP, 'B', LDUMMA, N, A, LDA, B, LDB, VL, LDVL, + $ VR, LDVR, N, IN, WORK( IWRK ), IERR ) + IF( IERR.NE.0 ) THEN + INFO = N + 2 + GO TO 110 + END IF +* +* Undo balancing on VL and VR and normalization +* (Workspace: none needed) +* + IF( ILVL ) THEN + CALL SGGBAK( 'P', 'L', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VL, LDVL, IERR ) + DO 50 JC = 1, N + IF( ALPHAI( JC ).LT.ZERO ) + $ GO TO 50 + TEMP = ZERO + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 10 JR = 1, N + TEMP = MAX( TEMP, ABS( VL( JR, JC ) ) ) + 10 CONTINUE + ELSE + DO 20 JR = 1, N + TEMP = MAX( TEMP, ABS( VL( JR, JC ) )+ + $ ABS( VL( JR, JC+1 ) ) ) + 20 CONTINUE + END IF + IF( TEMP.LT.SMLNUM ) + $ GO TO 50 + TEMP = ONE / TEMP + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 30 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + 30 CONTINUE + ELSE + DO 40 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + VL( JR, JC+1 ) = VL( JR, JC+1 )*TEMP + 40 CONTINUE + END IF + 50 CONTINUE + END IF + IF( ILVR ) THEN + CALL SGGBAK( 'P', 'R', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VR, LDVR, IERR ) + DO 100 JC = 1, N + IF( ALPHAI( JC ).LT.ZERO ) + $ GO TO 100 + TEMP = ZERO + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 60 JR = 1, N + TEMP = MAX( TEMP, ABS( VR( JR, JC ) ) ) + 60 CONTINUE + ELSE + DO 70 JR = 1, N + TEMP = MAX( TEMP, ABS( VR( JR, JC ) )+ + $ ABS( VR( JR, JC+1 ) ) ) + 70 CONTINUE + END IF + IF( TEMP.LT.SMLNUM ) + $ GO TO 100 + TEMP = ONE / TEMP + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 80 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + 80 CONTINUE + ELSE + DO 90 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + VR( JR, JC+1 ) = VR( JR, JC+1 )*TEMP + 90 CONTINUE + END IF + 100 CONTINUE + END IF +* +* End of eigenvector calculation +* + END IF +* +* Undo scaling if necessary +* + IF( ILASCL ) THEN + CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N, IERR ) + CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N, IERR ) + END IF +* + IF( ILBSCL ) THEN + CALL SLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) + END IF +* + 110 CONTINUE +* + WORK( 1 ) = MAXWRK +* + RETURN +* +* End of SGGEV +* + END diff --git a/costa/native/external/lapack/sggevx.f b/costa/native/external/lapack/sggevx.f new file mode 100644 index 000000000..e42999be8 --- /dev/null +++ b/costa/native/external/lapack/sggevx.f @@ -0,0 +1,698 @@ + SUBROUTINE SGGEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VL, LDVL, VR, LDVR, ILO, + $ IHI, LSCALE, RSCALE, ABNRM, BBNRM, RCONDE, + $ RCONDV, WORK, LWORK, IWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER BALANC, JOBVL, JOBVR, SENSE + INTEGER IHI, ILO, INFO, LDA, LDB, LDVL, LDVR, LWORK, N + REAL ABNRM, BBNRM +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + INTEGER IWORK( * ) + REAL A( LDA, * ), ALPHAI( * ), ALPHAR( * ), + $ B( LDB, * ), BETA( * ), LSCALE( * ), + $ RCONDE( * ), RCONDV( * ), RSCALE( * ), + $ VL( LDVL, * ), VR( LDVR, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGGEVX computes for a pair of N-by-N real nonsymmetric matrices (A,B) +* the generalized eigenvalues, and optionally, the left and/or right +* generalized eigenvectors. +* +* Optionally also, it computes a balancing transformation to improve +* the conditioning of the eigenvalues and eigenvectors (ILO, IHI, +* LSCALE, RSCALE, ABNRM, and BBNRM), reciprocal condition numbers for +* the eigenvalues (RCONDE), and reciprocal condition numbers for the +* right eigenvectors (RCONDV). +* +* A generalized eigenvalue for a pair of matrices (A,B) is a scalar +* lambda or a ratio alpha/beta = lambda, such that A - lambda*B is +* singular. It is usually represented as the pair (alpha,beta), as +* there is a reasonable interpretation for beta=0, and even for both +* being zero. +* +* The right eigenvector v(j) corresponding to the eigenvalue lambda(j) +* of (A,B) satisfies +* +* A * v(j) = lambda(j) * B * v(j) . +* +* The left eigenvector u(j) corresponding to the eigenvalue lambda(j) +* of (A,B) satisfies +* +* u(j)**H * A = lambda(j) * u(j)**H * B. +* +* where u(j)**H is the conjugate-transpose of u(j). +* +* +* Arguments +* ========= +* +* BALANC (input) CHARACTER*1 +* Specifies the balance option to be performed. +* = 'N': do not diagonally scale or permute; +* = 'P': permute only; +* = 'S': scale only; +* = 'B': both permute and scale. +* Computed reciprocal condition numbers will be for the +* matrices after permuting and/or balancing. Permuting does +* not change condition numbers (in exact arithmetic), but +* balancing does. +* +* JOBVL (input) CHARACTER*1 +* = 'N': do not compute the left generalized eigenvectors; +* = 'V': compute the left generalized eigenvectors. +* +* JOBVR (input) CHARACTER*1 +* = 'N': do not compute the right generalized eigenvectors; +* = 'V': compute the right generalized eigenvectors. +* +* SENSE (input) CHARACTER*1 +* Determines which reciprocal condition numbers are computed. +* = 'N': none are computed; +* = 'E': computed for eigenvalues only; +* = 'V': computed for eigenvectors only; +* = 'B': computed for eigenvalues and eigenvectors. +* +* N (input) INTEGER +* The order of the matrices A, B, VL, and VR. N >= 0. +* +* A (input/output) REAL array, dimension (LDA, N) +* On entry, the matrix A in the pair (A,B). +* On exit, A has been overwritten. If JOBVL='V' or JOBVR='V' +* or both, then A contains the first part of the real Schur +* form of the "balanced" versions of the input A and B. +* +* LDA (input) INTEGER +* The leading dimension of A. LDA >= max(1,N). +* +* B (input/output) REAL array, dimension (LDB, N) +* On entry, the matrix B in the pair (A,B). +* On exit, B has been overwritten. If JOBVL='V' or JOBVR='V' +* or both, then B contains the second part of the real Schur +* form of the "balanced" versions of the input A and B. +* +* LDB (input) INTEGER +* The leading dimension of B. LDB >= max(1,N). +* +* ALPHAR (output) REAL array, dimension (N) +* ALPHAI (output) REAL array, dimension (N) +* BETA (output) REAL array, dimension (N) +* On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will +* be the generalized eigenvalues. If ALPHAI(j) is zero, then +* the j-th eigenvalue is real; if positive, then the j-th and +* (j+1)-st eigenvalues are a complex conjugate pair, with +* ALPHAI(j+1) negative. +* +* Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) +* may easily over- or underflow, and BETA(j) may even be zero. +* Thus, the user should avoid naively computing the ratio +* ALPHA/BETA. However, ALPHAR and ALPHAI will be always less +* than and usually comparable with norm(A) in magnitude, and +* BETA always less than and usually comparable with norm(B). +* +* VL (output) REAL array, dimension (LDVL,N) +* If JOBVL = 'V', the left eigenvectors u(j) are stored one +* after another in the columns of VL, in the same order as +* their eigenvalues. If the j-th eigenvalue is real, then +* u(j) = VL(:,j), the j-th column of VL. If the j-th and +* (j+1)-th eigenvalues form a complex conjugate pair, then +* u(j) = VL(:,j)+i*VL(:,j+1) and u(j+1) = VL(:,j)-i*VL(:,j+1). +* Each eigenvector will be scaled so the largest component have +* abs(real part) + abs(imag. part) = 1. +* Not referenced if JOBVL = 'N'. +* +* LDVL (input) INTEGER +* The leading dimension of the matrix VL. LDVL >= 1, and +* if JOBVL = 'V', LDVL >= N. +* +* VR (output) REAL array, dimension (LDVR,N) +* If JOBVR = 'V', the right eigenvectors v(j) are stored one +* after another in the columns of VR, in the same order as +* their eigenvalues. If the j-th eigenvalue is real, then +* v(j) = VR(:,j), the j-th column of VR. If the j-th and +* (j+1)-th eigenvalues form a complex conjugate pair, then +* v(j) = VR(:,j)+i*VR(:,j+1) and v(j+1) = VR(:,j)-i*VR(:,j+1). +* Each eigenvector will be scaled so the largest component have +* abs(real part) + abs(imag. part) = 1. +* Not referenced if JOBVR = 'N'. +* +* LDVR (input) INTEGER +* The leading dimension of the matrix VR. LDVR >= 1, and +* if JOBVR = 'V', LDVR >= N. +* +* ILO,IHI (output) INTEGER +* ILO and IHI are integer values such that on exit +* A(i,j) = 0 and B(i,j) = 0 if i > j and +* j = 1,...,ILO-1 or i = IHI+1,...,N. +* If BALANC = 'N' or 'S', ILO = 1 and IHI = N. +* +* LSCALE (output) REAL array, dimension (N) +* Details of the permutations and scaling factors applied +* to the left side of A and B. If PL(j) is the index of the +* row interchanged with row j, and DL(j) is the scaling +* factor applied to row j, then +* LSCALE(j) = PL(j) for j = 1,...,ILO-1 +* = DL(j) for j = ILO,...,IHI +* = PL(j) for j = IHI+1,...,N. +* The order in which the interchanges are made is N to IHI+1, +* then 1 to ILO-1. +* +* RSCALE (output) REAL array, dimension (N) +* Details of the permutations and scaling factors applied +* to the right side of A and B. If PR(j) is the index of the +* column interchanged with column j, and DR(j) is the scaling +* factor applied to column j, then +* RSCALE(j) = PR(j) for j = 1,...,ILO-1 +* = DR(j) for j = ILO,...,IHI +* = PR(j) for j = IHI+1,...,N +* The order in which the interchanges are made is N to IHI+1, +* then 1 to ILO-1. +* +* ABNRM (output) REAL +* The one-norm of the balanced matrix A. +* +* BBNRM (output) REAL +* The one-norm of the balanced matrix B. +* +* RCONDE (output) REAL array, dimension (N) +* If SENSE = 'E' or 'B', the reciprocal condition numbers of +* the selected eigenvalues, stored in consecutive elements of +* the array. For a complex conjugate pair of eigenvalues two +* consecutive elements of RCONDE are set to the same value. +* Thus RCONDE(j), RCONDV(j), and the j-th columns of VL and VR +* all correspond to the same eigenpair (but not in general the +* j-th eigenpair, unless all eigenpairs are selected). +* If SENSE = 'V', RCONDE is not referenced. +* +* RCONDV (output) REAL array, dimension (N) +* If SENSE = 'V' or 'B', the estimated reciprocal condition +* numbers of the selected eigenvectors, stored in consecutive +* elements of the array. For a complex eigenvector two +* consecutive elements of RCONDV are set to the same value. If +* the eigenvalues cannot be reordered to compute RCONDV(j), +* RCONDV(j) is set to 0; this can only occur when the true +* value would be very small anyway. +* If SENSE = 'E', RCONDV is not referenced. +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,6*N). +* If SENSE = 'E', LWORK >= 12*N. +* If SENSE = 'V' or 'B', LWORK >= 2*N*N+12*N+16. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace) INTEGER array, dimension (N+6) +* If SENSE = 'E', IWORK is not referenced. +* +* BWORK (workspace) LOGICAL array, dimension (N) +* If SENSE = 'N', BWORK is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* = 1,...,N: +* The QZ iteration failed. No eigenvectors have been +* calculated, but ALPHAR(j), ALPHAI(j), and BETA(j) +* should be correct for j=INFO+1,...,N. +* > N: =N+1: other than QZ iteration failed in SHGEQZ. +* =N+2: error return from STGEVC. +* +* Further Details +* =============== +* +* Balancing a matrix pair (A,B) includes, first, permuting rows and +* columns to isolate eigenvalues, second, applying diagonal similarity +* transformation to the rows and columns to make the rows and columns +* as close in norm as possible. The computed reciprocal condition +* numbers correspond to the balanced matrix. Permuting rows and columns +* will not change the condition numbers (in exact arithmetic) but +* diagonal scaling will. For further explanation of balancing, see +* section 4.11.1.2 of LAPACK Users' Guide. +* +* An approximate error bound on the chordal distance between the i-th +* computed generalized eigenvalue w and the corresponding exact +* eigenvalue lambda is +* +* chord(w, lambda) <= EPS * norm(ABNRM, BBNRM) / RCONDE(I) +* +* An approximate error bound for the angle between the i-th computed +* eigenvector VL(i) or VR(i) is given by +* +* EPS * norm(ABNRM, BBNRM) / DIF(i). +* +* For further explanation of the reciprocal condition numbers RCONDE +* and RCONDV, see section 4.11 of LAPACK User's Guide. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ILASCL, ILBSCL, ILV, ILVL, ILVR, LQUERY, PAIR, + $ WANTSB, WANTSE, WANTSN, WANTSV + CHARACTER CHTEMP + INTEGER I, ICOLS, IERR, IJOBVL, IJOBVR, IN, IROWS, + $ ITAU, IWRK, IWRK1, J, JC, JR, M, MAXWRK, + $ MINWRK, MM + REAL ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, + $ SMLNUM, TEMP +* .. +* .. Local Arrays .. + LOGICAL LDUMMA( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL SGEQRF, SGGBAK, SGGBAL, SGGHRD, SHGEQZ, SLABAD, + $ SLACPY, SLASCL, SLASET, SORGQR, SORMQR, STGEVC, + $ STGSNA, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANGE + EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVL, 'N' ) ) THEN + IJOBVL = 1 + ILVL = .FALSE. + ELSE IF( LSAME( JOBVL, 'V' ) ) THEN + IJOBVL = 2 + ILVL = .TRUE. + ELSE + IJOBVL = -1 + ILVL = .FALSE. + END IF +* + IF( LSAME( JOBVR, 'N' ) ) THEN + IJOBVR = 1 + ILVR = .FALSE. + ELSE IF( LSAME( JOBVR, 'V' ) ) THEN + IJOBVR = 2 + ILVR = .TRUE. + ELSE + IJOBVR = -1 + ILVR = .FALSE. + END IF + ILV = ILVL .OR. ILVR +* + WANTSN = LSAME( SENSE, 'N' ) + WANTSE = LSAME( SENSE, 'E' ) + WANTSV = LSAME( SENSE, 'V' ) + WANTSB = LSAME( SENSE, 'B' ) +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.( LSAME( BALANC, 'N' ) .OR. LSAME( BALANC, + $ 'S' ) .OR. LSAME( BALANC, 'P' ) .OR. LSAME( BALANC, 'B' ) ) ) + $ THEN + INFO = -1 + ELSE IF( IJOBVL.LE.0 ) THEN + INFO = -2 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -3 + ELSE IF( .NOT.( WANTSN .OR. WANTSE .OR. WANTSB .OR. WANTSV ) ) + $ THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDVL.LT.1 .OR. ( ILVL .AND. LDVL.LT.N ) ) THEN + INFO = -14 + ELSE IF( LDVR.LT.1 .OR. ( ILVR .AND. LDVR.LT.N ) ) THEN + INFO = -16 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV. The workspace is +* computed assuming ILO = 1 and IHI = N, the worst case.) +* + MINWRK = 1 + IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN + MAXWRK = 5*N + N*ILAENV( 1, 'SGEQRF', ' ', N, 1, N, 0 ) + MINWRK = MAX( 1, 6*N ) + IF( WANTSE ) THEN + MINWRK = MAX( 1, 12*N ) + ELSE IF( WANTSV .OR. WANTSB ) THEN + MINWRK = 2*N*N + 12*N + 16 + MAXWRK = MAX( MAXWRK, 2*N*N+12*N+16 ) + END IF + WORK( 1 ) = MAXWRK + END IF +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -26 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGGEVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = SLANGE( 'M', N, N, A, LDA, WORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF + IF( ILASCL ) + $ CALL SLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR ) +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = SLANGE( 'M', N, N, B, LDB, WORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF + IF( ILBSCL ) + $ CALL SLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR ) +* +* Permute and/or balance the matrix pair (A,B) +* (Workspace: need 6*N) +* + CALL SGGBAL( BALANC, N, A, LDA, B, LDB, ILO, IHI, LSCALE, RSCALE, + $ WORK, IERR ) +* +* Compute ABNRM and BBNRM +* + ABNRM = SLANGE( '1', N, N, A, LDA, WORK( 1 ) ) + IF( ILASCL ) THEN + WORK( 1 ) = ABNRM + CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, 1, 1, WORK( 1 ), 1, + $ IERR ) + ABNRM = WORK( 1 ) + END IF +* + BBNRM = SLANGE( '1', N, N, B, LDB, WORK( 1 ) ) + IF( ILBSCL ) THEN + WORK( 1 ) = BBNRM + CALL SLASCL( 'G', 0, 0, BNRMTO, BNRM, 1, 1, WORK( 1 ), 1, + $ IERR ) + BBNRM = WORK( 1 ) + END IF +* +* Reduce B to triangular form (QR decomposition of B) +* (Workspace: need N, prefer N*NB ) +* + IROWS = IHI + 1 - ILO + IF( ILV .OR. .NOT.WANTSN ) THEN + ICOLS = N + 1 - ILO + ELSE + ICOLS = IROWS + END IF + ITAU = 1 + IWRK = ITAU + IROWS + CALL SGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Apply the orthogonal transformation to A +* (Workspace: need N, prefer N*NB) +* + CALL SORMQR( 'L', 'T', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ), + $ LWORK+1-IWRK, IERR ) +* +* Initialize VL and/or VR +* (Workspace: need N, prefer N*NB) +* + IF( ILVL ) THEN + CALL SLASET( 'Full', N, N, ZERO, ONE, VL, LDVL ) + CALL SLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VL( ILO+1, ILO ), LDVL ) + CALL SORGQR( IROWS, IROWS, IROWS, VL( ILO, ILO ), LDVL, + $ WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* + IF( ILVR ) + $ CALL SLASET( 'Full', N, N, ZERO, ONE, VR, LDVR ) +* +* Reduce to generalized Hessenberg form +* (Workspace: none needed) +* + IF( ILV .OR. .NOT.WANTSN ) THEN +* +* Eigenvectors requested -- work on whole matrix. +* + CALL SGGHRD( JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, IERR ) + ELSE + CALL SGGHRD( 'N', 'N', IROWS, 1, IROWS, A( ILO, ILO ), LDA, + $ B( ILO, ILO ), LDB, VL, LDVL, VR, LDVR, IERR ) + END IF +* +* Perform QZ algorithm (Compute eigenvalues, and optionally, the +* Schur forms and Schur vectors) +* (Workspace: need N) +* + IF( ILV .OR. .NOT.WANTSN ) THEN + CHTEMP = 'S' + ELSE + CHTEMP = 'E' + END IF +* + CALL SHGEQZ( CHTEMP, JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VL, LDVL, VR, LDVR, WORK, + $ LWORK, IERR ) + IF( IERR.NE.0 ) THEN + IF( IERR.GT.0 .AND. IERR.LE.N ) THEN + INFO = IERR + ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN + INFO = IERR - N + ELSE + INFO = N + 1 + END IF + GO TO 130 + END IF +* +* Compute Eigenvectors and estimate condition numbers if desired +* (Workspace: STGEVC: need 6*N +* STGSNA: need 2*N*(N+2)+16 if SENSE = 'V' or 'B', +* need N otherwise ) +* + IF( ILV .OR. .NOT.WANTSN ) THEN + IF( ILV ) THEN + IF( ILVL ) THEN + IF( ILVR ) THEN + CHTEMP = 'B' + ELSE + CHTEMP = 'L' + END IF + ELSE + CHTEMP = 'R' + END IF +* + CALL STGEVC( CHTEMP, 'B', LDUMMA, N, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, N, IN, WORK, IERR ) + IF( IERR.NE.0 ) THEN + INFO = N + 2 + GO TO 130 + END IF + END IF +* + IF( .NOT.WANTSN ) THEN +* +* compute eigenvectors (STGEVC) and estimate condition +* numbers (STGSNA). Note that the definition of the condition +* number is not invariant under transformation (u,v) to +* (Q*u, Z*v), where (u,v) are eigenvectors of the generalized +* Schur form (S,T), Q and Z are orthogonal matrices. In order +* to avoid using extra 2*N*N workspace, we have to recalculate +* eigenvectors and estimate one condition numbers at a time. +* + PAIR = .FALSE. + DO 20 I = 1, N +* + IF( PAIR ) THEN + PAIR = .FALSE. + GO TO 20 + END IF + MM = 1 + IF( I.LT.N ) THEN + IF( A( I+1, I ).NE.ZERO ) THEN + PAIR = .TRUE. + MM = 2 + END IF + END IF +* + DO 10 J = 1, N + BWORK( J ) = .FALSE. + 10 CONTINUE + IF( MM.EQ.1 ) THEN + BWORK( I ) = .TRUE. + ELSE IF( MM.EQ.2 ) THEN + BWORK( I ) = .TRUE. + BWORK( I+1 ) = .TRUE. + END IF +* + IWRK = MM*N + 1 + IWRK1 = IWRK + MM*N +* +* Compute a pair of left and right eigenvectors. +* (compute workspace: need up to 4*N + 6*N) +* + IF( WANTSE .OR. WANTSB ) THEN + CALL STGEVC( 'B', 'S', BWORK, N, A, LDA, B, LDB, + $ WORK( 1 ), N, WORK( IWRK ), N, MM, M, + $ WORK( IWRK1 ), IERR ) + IF( IERR.NE.0 ) THEN + INFO = N + 2 + GO TO 130 + END IF + END IF +* + CALL STGSNA( SENSE, 'S', BWORK, N, A, LDA, B, LDB, + $ WORK( 1 ), N, WORK( IWRK ), N, RCONDE( I ), + $ RCONDV( I ), MM, M, WORK( IWRK1 ), + $ LWORK-IWRK1+1, IWORK, IERR ) +* + 20 CONTINUE + END IF + END IF +* +* Undo balancing on VL and VR and normalization +* (Workspace: none needed) +* + IF( ILVL ) THEN + CALL SGGBAK( BALANC, 'L', N, ILO, IHI, LSCALE, RSCALE, N, VL, + $ LDVL, IERR ) +* + DO 70 JC = 1, N + IF( ALPHAI( JC ).LT.ZERO ) + $ GO TO 70 + TEMP = ZERO + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 30 JR = 1, N + TEMP = MAX( TEMP, ABS( VL( JR, JC ) ) ) + 30 CONTINUE + ELSE + DO 40 JR = 1, N + TEMP = MAX( TEMP, ABS( VL( JR, JC ) )+ + $ ABS( VL( JR, JC+1 ) ) ) + 40 CONTINUE + END IF + IF( TEMP.LT.SMLNUM ) + $ GO TO 70 + TEMP = ONE / TEMP + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 50 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + 50 CONTINUE + ELSE + DO 60 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + VL( JR, JC+1 ) = VL( JR, JC+1 )*TEMP + 60 CONTINUE + END IF + 70 CONTINUE + END IF + IF( ILVR ) THEN + CALL SGGBAK( BALANC, 'R', N, ILO, IHI, LSCALE, RSCALE, N, VR, + $ LDVR, IERR ) + DO 120 JC = 1, N + IF( ALPHAI( JC ).LT.ZERO ) + $ GO TO 120 + TEMP = ZERO + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 80 JR = 1, N + TEMP = MAX( TEMP, ABS( VR( JR, JC ) ) ) + 80 CONTINUE + ELSE + DO 90 JR = 1, N + TEMP = MAX( TEMP, ABS( VR( JR, JC ) )+ + $ ABS( VR( JR, JC+1 ) ) ) + 90 CONTINUE + END IF + IF( TEMP.LT.SMLNUM ) + $ GO TO 120 + TEMP = ONE / TEMP + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 100 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + 100 CONTINUE + ELSE + DO 110 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + VR( JR, JC+1 ) = VR( JR, JC+1 )*TEMP + 110 CONTINUE + END IF + 120 CONTINUE + END IF +* +* Undo scaling if necessary +* + IF( ILASCL ) THEN + CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N, IERR ) + CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N, IERR ) + END IF +* + IF( ILBSCL ) THEN + CALL SLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) + END IF +* + 130 CONTINUE + WORK( 1 ) = MAXWRK +* + RETURN +* +* End of SGGEVX +* + END diff --git a/costa/native/external/lapack/sggglm.f b/costa/native/external/lapack/sggglm.f new file mode 100644 index 000000000..4abef5320 --- /dev/null +++ b/costa/native/external/lapack/sggglm.f @@ -0,0 +1,212 @@ + SUBROUTINE SGGGLM( N, M, P, A, LDA, B, LDB, D, X, Y, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), D( * ), WORK( * ), + $ X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* SGGGLM solves a general Gauss-Markov linear model (GLM) problem: +* +* minimize || y ||_2 subject to d = A*x + B*y +* x +* +* where A is an N-by-M matrix, B is an N-by-P matrix, and d is a +* given N-vector. It is assumed that M <= N <= M+P, and +* +* rank(A) = M and rank( A B ) = N. +* +* Under these assumptions, the constrained equation is always +* consistent, and there is a unique solution x and a minimal 2-norm +* solution y, which is obtained using a generalized QR factorization +* of A and B. +* +* In particular, if matrix B is square nonsingular, then the problem +* GLM is equivalent to the following weighted linear least squares +* problem +* +* minimize || inv(B)*(d-A*x) ||_2 +* x +* +* where inv(B) denotes the inverse of B. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of rows of the matrices A and B. N >= 0. +* +* M (input) INTEGER +* The number of columns of the matrix A. 0 <= M <= N. +* +* P (input) INTEGER +* The number of columns of the matrix B. P >= N-M. +* +* A (input/output) REAL array, dimension (LDA,M) +* On entry, the N-by-M matrix A. +* On exit, A is destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) REAL array, dimension (LDB,P) +* On entry, the N-by-P matrix B. +* On exit, B is destroyed. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* D (input/output) REAL array, dimension (N) +* On entry, D is the left hand side of the GLM equation. +* On exit, D is destroyed. +* +* X (output) REAL array, dimension (M) +* Y (output) REAL array, dimension (P) +* On exit, X and Y are the solutions of the GLM problem. +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N+M+P). +* For optimum performance, LWORK >= M+min(N,P)+max(N,P)*NB, +* where NB is an upper bound for the optimal blocksizes for +* SGEQRF, SGERQF, SORMQR and SORMRQ. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* =================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, LOPT, LWKOPT, NB, NB1, NB2, NB3, NB4, NP +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMV, SGGQRF, SORMQR, SORMRQ, STRSV, + $ XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NP = MIN( N, P ) + NB1 = ILAENV( 1, 'SGEQRF', ' ', N, M, -1, -1 ) + NB2 = ILAENV( 1, 'SGERQF', ' ', N, M, -1, -1 ) + NB3 = ILAENV( 1, 'SORMQR', ' ', N, M, P, -1 ) + NB4 = ILAENV( 1, 'SORMRQ', ' ', N, M, P, -1 ) + NB = MAX( NB1, NB2, NB3, NB4 ) + LWKOPT = M + NP + MAX( N, P )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( M.LT.0 .OR. M.GT.N ) THEN + INFO = -2 + ELSE IF( P.LT.0 .OR. P.LT.N-M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LWORK.LT.MAX( 1, N+M+P ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGGGLM', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Compute the GQR factorization of matrices A and B: +* +* Q'*A = ( R11 ) M, Q'*B*Z' = ( T11 T12 ) M +* ( 0 ) N-M ( 0 T22 ) N-M +* M M+P-N N-M +* +* where R11 and T22 are upper triangular, and Q and Z are +* orthogonal. +* + CALL SGGQRF( N, M, P, A, LDA, WORK, B, LDB, WORK( M+1 ), + $ WORK( M+NP+1 ), LWORK-M-NP, INFO ) + LOPT = WORK( M+NP+1 ) +* +* Update left-hand-side vector d = Q'*d = ( d1 ) M +* ( d2 ) N-M +* + CALL SORMQR( 'Left', 'Transpose', N, 1, M, A, LDA, WORK, D, + $ MAX( 1, N ), WORK( M+NP+1 ), LWORK-M-NP, INFO ) + LOPT = MAX( LOPT, INT( WORK( M+NP+1 ) ) ) +* +* Solve T22*y2 = d2 for y2 +* + CALL STRSV( 'Upper', 'No transpose', 'Non unit', N-M, + $ B( M+1, M+P-N+1 ), LDB, D( M+1 ), 1 ) + CALL SCOPY( N-M, D( M+1 ), 1, Y( M+P-N+1 ), 1 ) +* +* Set y1 = 0 +* + DO 10 I = 1, M + P - N + Y( I ) = ZERO + 10 CONTINUE +* +* Update d1 = d1 - T12*y2 +* + CALL SGEMV( 'No transpose', M, N-M, -ONE, B( 1, M+P-N+1 ), LDB, + $ Y( M+P-N+1 ), 1, ONE, D, 1 ) +* +* Solve triangular system: R11*x = d1 +* + CALL STRSV( 'Upper', 'No Transpose', 'Non unit', M, A, LDA, D, 1 ) +* +* Copy D to X +* + CALL SCOPY( M, D, 1, X, 1 ) +* +* Backward transformation y = Z'*y +* + CALL SORMRQ( 'Left', 'Transpose', P, 1, NP, + $ B( MAX( 1, N-P+1 ), 1 ), LDB, WORK( M+1 ), Y, + $ MAX( 1, P ), WORK( M+NP+1 ), LWORK-M-NP, INFO ) + WORK( 1 ) = M + NP + MAX( LOPT, INT( WORK( M+NP+1 ) ) ) +* + RETURN +* +* End of SGGGLM +* + END diff --git a/costa/native/external/lapack/sgghrd.f b/costa/native/external/lapack/sgghrd.f new file mode 100644 index 000000000..dcba074e9 --- /dev/null +++ b/costa/native/external/lapack/sgghrd.f @@ -0,0 +1,253 @@ + SUBROUTINE SGGHRD( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q, + $ LDQ, Z, LDZ, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER COMPQ, COMPZ + INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* SGGHRD reduces a pair of real matrices (A,B) to generalized upper +* Hessenberg form using orthogonal transformations, where A is a +* general matrix and B is upper triangular: Q' * A * Z = H and +* Q' * B * Z = T, where H is upper Hessenberg, T is upper triangular, +* and Q and Z are orthogonal, and ' means transpose. +* +* The orthogonal matrices Q and Z are determined as products of Givens +* rotations. They may either be formed explicitly, or they may be +* postmultiplied into input matrices Q1 and Z1, so that +* +* Q1 * A * Z1' = (Q1*Q) * H * (Z1*Z)' +* Q1 * B * Z1' = (Q1*Q) * T * (Z1*Z)' +* +* Arguments +* ========= +* +* COMPQ (input) CHARACTER*1 +* = 'N': do not compute Q; +* = 'I': Q is initialized to the unit matrix, and the +* orthogonal matrix Q is returned; +* = 'V': Q must contain an orthogonal matrix Q1 on entry, +* and the product Q1*Q is returned. +* +* COMPZ (input) CHARACTER*1 +* = 'N': do not compute Z; +* = 'I': Z is initialized to the unit matrix, and the +* orthogonal matrix Z is returned; +* = 'V': Z must contain an orthogonal matrix Z1 on entry, +* and the product Z1*Z is returned. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* It is assumed that A is already upper triangular in rows and +* columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally set +* by a previous call to SGGBAL; otherwise they should be set +* to 1 and N respectively. +* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +* +* A (input/output) REAL array, dimension (LDA, N) +* On entry, the N-by-N general matrix to be reduced. +* On exit, the upper triangle and the first subdiagonal of A +* are overwritten with the upper Hessenberg matrix H, and the +* rest is set to zero. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) REAL array, dimension (LDB, N) +* On entry, the N-by-N upper triangular matrix B. +* On exit, the upper triangular matrix T = Q' B Z. The +* elements below the diagonal are set to zero. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* Q (input/output) REAL array, dimension (LDQ, N) +* If COMPQ='N': Q is not referenced. +* If COMPQ='I': on entry, Q need not be set, and on exit it +* contains the orthogonal matrix Q, where Q' +* is the product of the Givens transformations +* which are applied to A and B on the left. +* If COMPQ='V': on entry, Q must contain an orthogonal matrix +* Q1, and on exit this is overwritten by Q1*Q. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. +* LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise. +* +* Z (input/output) REAL array, dimension (LDZ, N) +* If COMPZ='N': Z is not referenced. +* If COMPZ='I': on entry, Z need not be set, and on exit it +* contains the orthogonal matrix Z, which is +* the product of the Givens transformations +* which are applied to A and B on the right. +* If COMPZ='V': on entry, Z must contain an orthogonal matrix +* Z1, and on exit this is overwritten by Z1*Z. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. +* LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* This routine reduces A to Hessenberg and B to triangular form by +* an unblocked reduction, as described in _Matrix_Computations_, +* by Golub and Van Loan (Johns Hopkins Press.) +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ILQ, ILZ + INTEGER ICOMPQ, ICOMPZ, JCOL, JROW + REAL C, S, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLARTG, SLASET, SROT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Decode COMPQ +* + IF( LSAME( COMPQ, 'N' ) ) THEN + ILQ = .FALSE. + ICOMPQ = 1 + ELSE IF( LSAME( COMPQ, 'V' ) ) THEN + ILQ = .TRUE. + ICOMPQ = 2 + ELSE IF( LSAME( COMPQ, 'I' ) ) THEN + ILQ = .TRUE. + ICOMPQ = 3 + ELSE + ICOMPQ = 0 + END IF +* +* Decode COMPZ +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ILZ = .FALSE. + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ILZ = .TRUE. + ICOMPZ = 2 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ILZ = .TRUE. + ICOMPZ = 3 + ELSE + ICOMPZ = 0 + END IF +* +* Test the input parameters. +* + INFO = 0 + IF( ICOMPQ.LE.0 ) THEN + INFO = -1 + ELSE IF( ICOMPZ.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 ) THEN + INFO = -4 + ELSE IF( IHI.GT.N .OR. IHI.LT.ILO-1 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( ( ILQ .AND. LDQ.LT.N ) .OR. LDQ.LT.1 ) THEN + INFO = -11 + ELSE IF( ( ILZ .AND. LDZ.LT.N ) .OR. LDZ.LT.1 ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGGHRD', -INFO ) + RETURN + END IF +* +* Initialize Q and Z if desired. +* + IF( ICOMPQ.EQ.3 ) + $ CALL SLASET( 'Full', N, N, ZERO, ONE, Q, LDQ ) + IF( ICOMPZ.EQ.3 ) + $ CALL SLASET( 'Full', N, N, ZERO, ONE, Z, LDZ ) +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* +* Zero out lower triangle of B +* + DO 20 JCOL = 1, N - 1 + DO 10 JROW = JCOL + 1, N + B( JROW, JCOL ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* Reduce A and B +* + DO 40 JCOL = ILO, IHI - 2 +* + DO 30 JROW = IHI, JCOL + 2, -1 +* +* Step 1: rotate rows JROW-1, JROW to kill A(JROW,JCOL) +* + TEMP = A( JROW-1, JCOL ) + CALL SLARTG( TEMP, A( JROW, JCOL ), C, S, + $ A( JROW-1, JCOL ) ) + A( JROW, JCOL ) = ZERO + CALL SROT( N-JCOL, A( JROW-1, JCOL+1 ), LDA, + $ A( JROW, JCOL+1 ), LDA, C, S ) + CALL SROT( N+2-JROW, B( JROW-1, JROW-1 ), LDB, + $ B( JROW, JROW-1 ), LDB, C, S ) + IF( ILQ ) + $ CALL SROT( N, Q( 1, JROW-1 ), 1, Q( 1, JROW ), 1, C, S ) +* +* Step 2: rotate columns JROW, JROW-1 to kill B(JROW,JROW-1) +* + TEMP = B( JROW, JROW ) + CALL SLARTG( TEMP, B( JROW, JROW-1 ), C, S, + $ B( JROW, JROW ) ) + B( JROW, JROW-1 ) = ZERO + CALL SROT( IHI, A( 1, JROW ), 1, A( 1, JROW-1 ), 1, C, S ) + CALL SROT( JROW-1, B( 1, JROW ), 1, B( 1, JROW-1 ), 1, C, + $ S ) + IF( ILZ ) + $ CALL SROT( N, Z( 1, JROW ), 1, Z( 1, JROW-1 ), 1, C, S ) + 30 CONTINUE + 40 CONTINUE +* + RETURN +* +* End of SGGHRD +* + END diff --git a/costa/native/external/lapack/sgglse.f b/costa/native/external/lapack/sgglse.f new file mode 100644 index 000000000..7f36a7525 --- /dev/null +++ b/costa/native/external/lapack/sgglse.f @@ -0,0 +1,217 @@ + SUBROUTINE SGGLSE( M, N, P, A, LDA, B, LDB, C, D, X, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), C( * ), D( * ), + $ WORK( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* SGGLSE solves the linear equality-constrained least squares (LSE) +* problem: +* +* minimize || c - A*x ||_2 subject to B*x = d +* +* where A is an M-by-N matrix, B is a P-by-N matrix, c is a given +* M-vector, and d is a given P-vector. It is assumed that +* P <= N <= M+P, and +* +* rank(B) = P and rank( ( A ) ) = N. +* ( ( B ) ) +* +* These conditions ensure that the LSE problem has a unique solution, +* which is obtained using a GRQ factorization of the matrices B and A. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrices A and B. N >= 0. +* +* P (input) INTEGER +* The number of rows of the matrix B. 0 <= P <= N <= M+P. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, A is destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) REAL array, dimension (LDB,N) +* On entry, the P-by-N matrix B. +* On exit, B is destroyed. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,P). +* +* C (input/output) REAL array, dimension (M) +* On entry, C contains the right hand side vector for the +* least squares part of the LSE problem. +* On exit, the residual sum of squares for the solution +* is given by the sum of squares of elements N-P+1 to M of +* vector C. +* +* D (input/output) REAL array, dimension (P) +* On entry, D contains the right hand side vector for the +* constrained equation. +* On exit, D is destroyed. +* +* X (output) REAL array, dimension (N) +* On exit, X is the solution of the LSE problem. +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,M+N+P). +* For optimum performance LWORK >= P+min(M,N)+max(M,N)*NB, +* where NB is an upper bound for the optimal blocksizes for +* SGEQRF, SGERQF, SORMQR and SORMRQ. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LOPT, LWKOPT, MN, NB, NB1, NB2, NB3, NB4, NR +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SGEMV, SGGRQF, SORMQR, SORMRQ, + $ STRMV, STRSV, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + MN = MIN( M, N ) + NB1 = ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 ) + NB2 = ILAENV( 1, 'SGERQF', ' ', M, N, -1, -1 ) + NB3 = ILAENV( 1, 'SORMQR', ' ', M, N, P, -1 ) + NB4 = ILAENV( 1, 'SORMRQ', ' ', M, N, P, -1 ) + NB = MAX( NB1, NB2, NB3, NB4 ) + LWKOPT = P + MN + MAX( M, N )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( P.LT.0 .OR. P.GT.N .OR. P.LT.N-M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -7 + ELSE IF( LWORK.LT.MAX( 1, M+N+P ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGGLSE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Compute the GRQ factorization of matrices B and A: +* +* B*Q' = ( 0 T12 ) P Z'*A*Q' = ( R11 R12 ) N-P +* N-P P ( 0 R22 ) M+P-N +* N-P P +* +* where T12 and R11 are upper triangular, and Q and Z are +* orthogonal. +* + CALL SGGRQF( P, M, N, B, LDB, WORK, A, LDA, WORK( P+1 ), + $ WORK( P+MN+1 ), LWORK-P-MN, INFO ) + LOPT = WORK( P+MN+1 ) +* +* Update c = Z'*c = ( c1 ) N-P +* ( c2 ) M+P-N +* + CALL SORMQR( 'Left', 'Transpose', M, 1, MN, A, LDA, WORK( P+1 ), + $ C, MAX( 1, M ), WORK( P+MN+1 ), LWORK-P-MN, INFO ) + LOPT = MAX( LOPT, INT( WORK( P+MN+1 ) ) ) +* +* Solve T12*x2 = d for x2 +* + CALL STRSV( 'Upper', 'No transpose', 'Non unit', P, B( 1, N-P+1 ), + $ LDB, D, 1 ) +* +* Update c1 +* + CALL SGEMV( 'No transpose', N-P, P, -ONE, A( 1, N-P+1 ), LDA, D, + $ 1, ONE, C, 1 ) +* +* Sovle R11*x1 = c1 for x1 +* + CALL STRSV( 'Upper', 'No transpose', 'Non unit', N-P, A, LDA, C, + $ 1 ) +* +* Put the solutions in X +* + CALL SCOPY( N-P, C, 1, X, 1 ) + CALL SCOPY( P, D, 1, X( N-P+1 ), 1 ) +* +* Compute the residual vector: +* + IF( M.LT.N ) THEN + NR = M + P - N + CALL SGEMV( 'No transpose', NR, N-M, -ONE, A( N-P+1, M+1 ), + $ LDA, D( NR+1 ), 1, ONE, C( N-P+1 ), 1 ) + ELSE + NR = P + END IF + CALL STRMV( 'Upper', 'No transpose', 'Non unit', NR, + $ A( N-P+1, N-P+1 ), LDA, D, 1 ) + CALL SAXPY( NR, -ONE, D, 1, C( N-P+1 ), 1 ) +* +* Backward transformation x = Q'*x +* + CALL SORMRQ( 'Left', 'Transpose', N, 1, P, B, LDB, WORK( 1 ), X, + $ N, WORK( P+MN+1 ), LWORK-P-MN, INFO ) + WORK( 1 ) = P + MN + MAX( LOPT, INT( WORK( P+MN+1 ) ) ) +* + RETURN +* +* End of SGGLSE +* + END diff --git a/costa/native/external/lapack/sggqrf.f b/costa/native/external/lapack/sggqrf.f new file mode 100644 index 000000000..bd26f715f --- /dev/null +++ b/costa/native/external/lapack/sggqrf.f @@ -0,0 +1,212 @@ + SUBROUTINE SGGQRF( N, M, P, A, LDA, TAUA, B, LDB, TAUB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGGQRF computes a generalized QR factorization of an N-by-M matrix A +* and an N-by-P matrix B: +* +* A = Q*R, B = Q*T*Z, +* +* where Q is an N-by-N orthogonal matrix, Z is a P-by-P orthogonal +* matrix, and R and T assume one of the forms: +* +* if N >= M, R = ( R11 ) M , or if N < M, R = ( R11 R12 ) N, +* ( 0 ) N-M N M-N +* M +* +* where R11 is upper triangular, and +* +* if N <= P, T = ( 0 T12 ) N, or if N > P, T = ( T11 ) N-P, +* P-N N ( T21 ) P +* P +* +* where T12 or T21 is upper triangular. +* +* In particular, if B is square and nonsingular, the GQR factorization +* of A and B implicitly gives the QR factorization of inv(B)*A: +* +* inv(B)*A = Z'*(inv(T)*R) +* +* where inv(B) denotes the inverse of the matrix B, and Z' denotes the +* transpose of the matrix Z. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of rows of the matrices A and B. N >= 0. +* +* M (input) INTEGER +* The number of columns of the matrix A. M >= 0. +* +* P (input) INTEGER +* The number of columns of the matrix B. P >= 0. +* +* A (input/output) REAL array, dimension (LDA,M) +* On entry, the N-by-M matrix A. +* On exit, the elements on and above the diagonal of the array +* contain the min(N,M)-by-M upper trapezoidal matrix R (R is +* upper triangular if N >= M); the elements below the diagonal, +* with the array TAUA, represent the orthogonal matrix Q as a +* product of min(N,M) elementary reflectors (see Further +* Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* TAUA (output) REAL array, dimension (min(N,M)) +* The scalar factors of the elementary reflectors which +* represent the orthogonal matrix Q (see Further Details). +* +* B (input/output) REAL array, dimension (LDB,P) +* On entry, the N-by-P matrix B. +* On exit, if N <= P, the upper triangle of the subarray +* B(1:N,P-N+1:P) contains the N-by-N upper triangular matrix T; +* if N > P, the elements on and above the (N-P)-th subdiagonal +* contain the N-by-P upper trapezoidal matrix T; the remaining +* elements, with the array TAUB, represent the orthogonal +* matrix Z as a product of elementary reflectors (see Further +* Details). +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* TAUB (output) REAL array, dimension (min(N,P)) +* The scalar factors of the elementary reflectors which +* represent the orthogonal matrix Z (see Further Details). +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N,M,P). +* For optimum performance LWORK >= max(N,M,P)*max(NB1,NB2,NB3), +* where NB1 is the optimal blocksize for the QR factorization +* of an N-by-M matrix, NB2 is the optimal blocksize for the +* RQ factorization of an N-by-P matrix, and NB3 is the optimal +* blocksize for a call of SORMQR. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(1) H(2) . . . H(k), where k = min(n,m). +* +* Each H(i) has the form +* +* H(i) = I - taua * v * v' +* +* where taua is a real scalar, and v is a real vector with +* v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), +* and taua in TAUA(i). +* To form Q explicitly, use LAPACK subroutine SORGQR. +* To use Q to update another matrix, use LAPACK subroutine SORMQR. +* +* The matrix Z is represented as a product of elementary reflectors +* +* Z = H(1) H(2) . . . H(k), where k = min(n,p). +* +* Each H(i) has the form +* +* H(i) = I - taub * v * v' +* +* where taub is a real scalar, and v is a real vector with +* v(p-k+i+1:p) = 0 and v(p-k+i) = 1; v(1:p-k+i-1) is stored on exit in +* B(n-k+i,1:p-k+i-1), and taub in TAUB(i). +* To form Z explicitly, use LAPACK subroutine SORGRQ. +* To use Z to update another matrix, use LAPACK subroutine SORMRQ. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LOPT, LWKOPT, NB, NB1, NB2, NB3 +* .. +* .. External Subroutines .. + EXTERNAL SGEQRF, SGERQF, SORMQR, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NB1 = ILAENV( 1, 'SGEQRF', ' ', N, M, -1, -1 ) + NB2 = ILAENV( 1, 'SGERQF', ' ', N, P, -1, -1 ) + NB3 = ILAENV( 1, 'SORMQR', ' ', N, M, P, -1 ) + NB = MAX( NB1, NB2, NB3 ) + LWKOPT = MAX( N, M, P )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( P.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, N, M, P ) .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGGQRF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* QR factorization of N-by-M matrix A: A = Q*R +* + CALL SGEQRF( N, M, A, LDA, TAUA, WORK, LWORK, INFO ) + LOPT = WORK( 1 ) +* +* Update B := Q'*B. +* + CALL SORMQR( 'Left', 'Transpose', N, P, MIN( N, M ), A, LDA, TAUA, + $ B, LDB, WORK, LWORK, INFO ) + LOPT = MAX( LOPT, INT( WORK( 1 ) ) ) +* +* RQ factorization of N-by-P matrix B: B = T*Z. +* + CALL SGERQF( N, P, B, LDB, TAUB, WORK, LWORK, INFO ) + WORK( 1 ) = MAX( LOPT, INT( WORK( 1 ) ) ) +* + RETURN +* +* End of SGGQRF +* + END diff --git a/costa/native/external/lapack/sggrqf.f b/costa/native/external/lapack/sggrqf.f new file mode 100644 index 000000000..7af19ecf6 --- /dev/null +++ b/costa/native/external/lapack/sggrqf.f @@ -0,0 +1,212 @@ + SUBROUTINE SGGRQF( M, P, N, A, LDA, TAUA, B, LDB, TAUB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGGRQF computes a generalized RQ factorization of an M-by-N matrix A +* and a P-by-N matrix B: +* +* A = R*Q, B = Z*T*Q, +* +* where Q is an N-by-N orthogonal matrix, Z is a P-by-P orthogonal +* matrix, and R and T assume one of the forms: +* +* if M <= N, R = ( 0 R12 ) M, or if M > N, R = ( R11 ) M-N, +* N-M M ( R21 ) N +* N +* +* where R12 or R21 is upper triangular, and +* +* if P >= N, T = ( T11 ) N , or if P < N, T = ( T11 T12 ) P, +* ( 0 ) P-N P N-P +* N +* +* where T11 is upper triangular. +* +* In particular, if B is square and nonsingular, the GRQ factorization +* of A and B implicitly gives the RQ factorization of A*inv(B): +* +* A*inv(B) = (R*inv(T))*Z' +* +* where inv(B) denotes the inverse of the matrix B, and Z' denotes the +* transpose of the matrix Z. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* P (input) INTEGER +* The number of rows of the matrix B. P >= 0. +* +* N (input) INTEGER +* The number of columns of the matrices A and B. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, if M <= N, the upper triangle of the subarray +* A(1:M,N-M+1:N) contains the M-by-M upper triangular matrix R; +* if M > N, the elements on and above the (M-N)-th subdiagonal +* contain the M-by-N upper trapezoidal matrix R; the remaining +* elements, with the array TAUA, represent the orthogonal +* matrix Q as a product of elementary reflectors (see Further +* Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAUA (output) REAL array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors which +* represent the orthogonal matrix Q (see Further Details). +* +* B (input/output) REAL array, dimension (LDB,N) +* On entry, the P-by-N matrix B. +* On exit, the elements on and above the diagonal of the array +* contain the min(P,N)-by-N upper trapezoidal matrix T (T is +* upper triangular if P >= N); the elements below the diagonal, +* with the array TAUB, represent the orthogonal matrix Z as a +* product of elementary reflectors (see Further Details). +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,P). +* +* TAUB (output) REAL array, dimension (min(P,N)) +* The scalar factors of the elementary reflectors which +* represent the orthogonal matrix Z (see Further Details). +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N,M,P). +* For optimum performance LWORK >= max(N,M,P)*max(NB1,NB2,NB3), +* where NB1 is the optimal blocksize for the RQ factorization +* of an M-by-N matrix, NB2 is the optimal blocksize for the +* QR factorization of a P-by-N matrix, and NB3 is the optimal +* blocksize for a call of SORMRQ. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INF0= -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(1) H(2) . . . H(k), where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - taua * v * v' +* +* where taua is a real scalar, and v is a real vector with +* v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in +* A(m-k+i,1:n-k+i-1), and taua in TAUA(i). +* To form Q explicitly, use LAPACK subroutine SORGRQ. +* To use Q to update another matrix, use LAPACK subroutine SORMRQ. +* +* The matrix Z is represented as a product of elementary reflectors +* +* Z = H(1) H(2) . . . H(k), where k = min(p,n). +* +* Each H(i) has the form +* +* H(i) = I - taub * v * v' +* +* where taub is a real scalar, and v is a real vector with +* v(1:i-1) = 0 and v(i) = 1; v(i+1:p) is stored on exit in B(i+1:p,i), +* and taub in TAUB(i). +* To form Z explicitly, use LAPACK subroutine SORGQR. +* To use Z to update another matrix, use LAPACK subroutine SORMQR. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LOPT, LWKOPT, NB, NB1, NB2, NB3 +* .. +* .. External Subroutines .. + EXTERNAL SGEQRF, SGERQF, SORMRQ, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NB1 = ILAENV( 1, 'SGERQF', ' ', M, N, -1, -1 ) + NB2 = ILAENV( 1, 'SGEQRF', ' ', P, N, -1, -1 ) + NB3 = ILAENV( 1, 'SORMRQ', ' ', M, N, P, -1 ) + NB = MAX( NB1, NB2, NB3 ) + LWKOPT = MAX( N, M, P)*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( P.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, M, P, N ) .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGGRQF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* RQ factorization of M-by-N matrix A: A = R*Q +* + CALL SGERQF( M, N, A, LDA, TAUA, WORK, LWORK, INFO ) + LOPT = WORK( 1 ) +* +* Update B := B*Q' +* + CALL SORMRQ( 'Right', 'Transpose', P, N, MIN( M, N ), + $ A( MAX( 1, M-N+1 ), 1 ), LDA, TAUA, B, LDB, WORK, + $ LWORK, INFO ) + LOPT = MAX( LOPT, INT( WORK( 1 ) ) ) +* +* QR factorization of P-by-N matrix B: B = Z*T +* + CALL SGEQRF( P, N, B, LDB, TAUB, WORK, LWORK, INFO ) + WORK( 1 ) = MAX( LOPT, INT( WORK( 1 ) ) ) +* + RETURN +* +* End of SGGRQF +* + END diff --git a/costa/native/external/lapack/sggsvd.f b/costa/native/external/lapack/sggsvd.f new file mode 100644 index 000000000..1e2612843 --- /dev/null +++ b/costa/native/external/lapack/sggsvd.f @@ -0,0 +1,336 @@ + SUBROUTINE SGGSVD( JOBU, JOBV, JOBQ, M, N, P, K, L, A, LDA, B, + $ LDB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, WORK, + $ IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBQ, JOBU, JOBV + INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), Q( LDQ, * ), U( LDU, * ), + $ V( LDV, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGGSVD computes the generalized singular value decomposition (GSVD) +* of an M-by-N real matrix A and P-by-N real matrix B: +* +* U'*A*Q = D1*( 0 R ), V'*B*Q = D2*( 0 R ) +* +* where U, V and Q are orthogonal matrices, and Z' is the transpose +* of Z. Let K+L = the effective numerical rank of the matrix (A',B')', +* then R is a K+L-by-K+L nonsingular upper triangular matrix, D1 and +* D2 are M-by-(K+L) and P-by-(K+L) "diagonal" matrices and of the +* following structures, respectively: +* +* If M-K-L >= 0, +* +* K L +* D1 = K ( I 0 ) +* L ( 0 C ) +* M-K-L ( 0 0 ) +* +* K L +* D2 = L ( 0 S ) +* P-L ( 0 0 ) +* +* N-K-L K L +* ( 0 R ) = K ( 0 R11 R12 ) +* L ( 0 0 R22 ) +* +* where +* +* C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), +* S = diag( BETA(K+1), ... , BETA(K+L) ), +* C**2 + S**2 = I. +* +* R is stored in A(1:K+L,N-K-L+1:N) on exit. +* +* If M-K-L < 0, +* +* K M-K K+L-M +* D1 = K ( I 0 0 ) +* M-K ( 0 C 0 ) +* +* K M-K K+L-M +* D2 = M-K ( 0 S 0 ) +* K+L-M ( 0 0 I ) +* P-L ( 0 0 0 ) +* +* N-K-L K M-K K+L-M +* ( 0 R ) = K ( 0 R11 R12 R13 ) +* M-K ( 0 0 R22 R23 ) +* K+L-M ( 0 0 0 R33 ) +* +* where +* +* C = diag( ALPHA(K+1), ... , ALPHA(M) ), +* S = diag( BETA(K+1), ... , BETA(M) ), +* C**2 + S**2 = I. +* +* (R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N), and R33 is stored +* ( 0 R22 R23 ) +* in B(M-K+1:L,N+M-K-L+1:N) on exit. +* +* The routine computes C, S, R, and optionally the orthogonal +* transformation matrices U, V and Q. +* +* In particular, if B is an N-by-N nonsingular matrix, then the GSVD of +* A and B implicitly gives the SVD of A*inv(B): +* A*inv(B) = U*(D1*inv(D2))*V'. +* If ( A',B')' has orthonormal columns, then the GSVD of A and B is +* also equal to the CS decomposition of A and B. Furthermore, the GSVD +* can be used to derive the solution of the eigenvalue problem: +* A'*A x = lambda* B'*B x. +* In some literature, the GSVD of A and B is presented in the form +* U'*A*X = ( 0 D1 ), V'*B*X = ( 0 D2 ) +* where U and V are orthogonal and X is nonsingular, D1 and D2 are +* ``diagonal''. The former GSVD form can be converted to the latter +* form by taking the nonsingular matrix X as +* +* X = Q*( I 0 ) +* ( 0 inv(R) ). +* +* Arguments +* ========= +* +* JOBU (input) CHARACTER*1 +* = 'U': Orthogonal matrix U is computed; +* = 'N': U is not computed. +* +* JOBV (input) CHARACTER*1 +* = 'V': Orthogonal matrix V is computed; +* = 'N': V is not computed. +* +* JOBQ (input) CHARACTER*1 +* = 'Q': Orthogonal matrix Q is computed; +* = 'N': Q is not computed. +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrices A and B. N >= 0. +* +* P (input) INTEGER +* The number of rows of the matrix B. P >= 0. +* +* K (output) INTEGER +* L (output) INTEGER +* On exit, K and L specify the dimension of the subblocks +* described in the Purpose section. +* K + L = effective numerical rank of (A',B')'. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, A contains the triangular matrix R, or part of R. +* See Purpose for details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) REAL array, dimension (LDB,N) +* On entry, the P-by-N matrix B. +* On exit, B contains the triangular matrix R if M-K-L < 0. +* See Purpose for details. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDA >= max(1,P). +* +* ALPHA (output) REAL array, dimension (N) +* BETA (output) REAL array, dimension (N) +* On exit, ALPHA and BETA contain the generalized singular +* value pairs of A and B; +* ALPHA(1:K) = 1, +* BETA(1:K) = 0, +* and if M-K-L >= 0, +* ALPHA(K+1:K+L) = C, +* BETA(K+1:K+L) = S, +* or if M-K-L < 0, +* ALPHA(K+1:M)=C, ALPHA(M+1:K+L)=0 +* BETA(K+1:M) =S, BETA(M+1:K+L) =1 +* and +* ALPHA(K+L+1:N) = 0 +* BETA(K+L+1:N) = 0 +* +* U (output) REAL array, dimension (LDU,M) +* If JOBU = 'U', U contains the M-by-M orthogonal matrix U. +* If JOBU = 'N', U is not referenced. +* +* LDU (input) INTEGER +* The leading dimension of the array U. LDU >= max(1,M) if +* JOBU = 'U'; LDU >= 1 otherwise. +* +* V (output) REAL array, dimension (LDV,P) +* If JOBV = 'V', V contains the P-by-P orthogonal matrix V. +* If JOBV = 'N', V is not referenced. +* +* LDV (input) INTEGER +* The leading dimension of the array V. LDV >= max(1,P) if +* JOBV = 'V'; LDV >= 1 otherwise. +* +* Q (output) REAL array, dimension (LDQ,N) +* If JOBQ = 'Q', Q contains the N-by-N orthogonal matrix Q. +* If JOBQ = 'N', Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= max(1,N) if +* JOBQ = 'Q'; LDQ >= 1 otherwise. +* +* WORK (workspace) REAL array, +* dimension (max(3*N,M,P)+N) +* +* IWORK (workspace/output) INTEGER array, dimension (N) +* On exit, IWORK stores the sorting information. More +* precisely, the following loop will sort ALPHA +* for I = K+1, min(M,K+L) +* swap ALPHA(I) and ALPHA(IWORK(I)) +* endfor +* such that ALPHA(1) >= ALPHA(2) >= ... >= ALPHA(N). +* +* INFO (output)INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = 1, the Jacobi-type procedure failed to +* converge. For further details, see subroutine STGSJA. +* +* Internal Parameters +* =================== +* +* TOLA REAL +* TOLB REAL +* TOLA and TOLB are the thresholds to determine the effective +* rank of (A',B')'. Generally, they are set to +* TOLA = MAX(M,N)*norm(A)*MACHEPS, +* TOLB = MAX(P,N)*norm(B)*MACHEPS. +* The size of TOLA and TOLB may affect the size of backward +* errors of the decomposition. +* +* Further Details +* =============== +* +* 2-96 Based on modifications by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL WANTQ, WANTU, WANTV + INTEGER I, IBND, ISUB, J, NCYCLE + REAL ANORM, BNORM, SMAX, TEMP, TOLA, TOLB, ULP, UNFL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANGE + EXTERNAL LSAME, SLAMCH, SLANGE +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGGSVP, STGSJA, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + WANTU = LSAME( JOBU, 'U' ) + WANTV = LSAME( JOBV, 'V' ) + WANTQ = LSAME( JOBQ, 'Q' ) +* + INFO = 0 + IF( .NOT.( WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( P.LT.0 ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -12 + ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN + INFO = -16 + ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN + INFO = -18 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -20 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGGSVD', -INFO ) + RETURN + END IF +* +* Compute the Frobenius norm of matrices A and B +* + ANORM = SLANGE( '1', M, N, A, LDA, WORK ) + BNORM = SLANGE( '1', P, N, B, LDB, WORK ) +* +* Get machine precision and set up threshold for determining +* the effective numerical rank of the matrices A and B. +* + ULP = SLAMCH( 'Precision' ) + UNFL = SLAMCH( 'Safe Minimum' ) + TOLA = MAX( M, N )*MAX( ANORM, UNFL )*ULP + TOLB = MAX( P, N )*MAX( BNORM, UNFL )*ULP +* +* Preprocessing +* + CALL SGGSVP( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, TOLA, + $ TOLB, K, L, U, LDU, V, LDV, Q, LDQ, IWORK, WORK, + $ WORK( N+1 ), INFO ) +* +* Compute the GSVD of two upper "triangular" matrices +* + CALL STGSJA( JOBU, JOBV, JOBQ, M, P, N, K, L, A, LDA, B, LDB, + $ TOLA, TOLB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, + $ WORK, NCYCLE, INFO ) +* +* Sort the singular values and store the pivot indices in IWORK +* Copy ALPHA to WORK, then sort ALPHA in WORK +* + CALL SCOPY( N, ALPHA, 1, WORK, 1 ) + IBND = MIN( L, M-K ) + DO 20 I = 1, IBND +* +* Scan for largest ALPHA(K+I) +* + ISUB = I + SMAX = WORK( K+I ) + DO 10 J = I + 1, IBND + TEMP = WORK( K+J ) + IF( TEMP.GT.SMAX ) THEN + ISUB = J + SMAX = TEMP + END IF + 10 CONTINUE + IF( ISUB.NE.I ) THEN + WORK( K+ISUB ) = WORK( K+I ) + WORK( K+I ) = SMAX + IWORK( K+I ) = K + ISUB + ELSE + IWORK( K+I ) = K + I + END IF + 20 CONTINUE +* + RETURN +* +* End of SGGSVD +* + END diff --git a/costa/native/external/lapack/sggsvp.f b/costa/native/external/lapack/sggsvp.f new file mode 100644 index 000000000..fcd021870 --- /dev/null +++ b/costa/native/external/lapack/sggsvp.f @@ -0,0 +1,394 @@ + SUBROUTINE SGGSVP( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, + $ TOLA, TOLB, K, L, U, LDU, V, LDV, Q, LDQ, + $ IWORK, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER JOBQ, JOBU, JOBV + INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P + REAL TOLA, TOLB +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ TAU( * ), U( LDU, * ), V( LDV, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGGSVP computes orthogonal matrices U, V and Q such that +* +* N-K-L K L +* U'*A*Q = K ( 0 A12 A13 ) if M-K-L >= 0; +* L ( 0 0 A23 ) +* M-K-L ( 0 0 0 ) +* +* N-K-L K L +* = K ( 0 A12 A13 ) if M-K-L < 0; +* M-K ( 0 0 A23 ) +* +* N-K-L K L +* V'*B*Q = L ( 0 0 B13 ) +* P-L ( 0 0 0 ) +* +* where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular +* upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, +* otherwise A23 is (M-K)-by-L upper trapezoidal. K+L = the effective +* numerical rank of the (M+P)-by-N matrix (A',B')'. Z' denotes the +* transpose of Z. +* +* This decomposition is the preprocessing step for computing the +* Generalized Singular Value Decomposition (GSVD), see subroutine +* SGGSVD. +* +* Arguments +* ========= +* +* JOBU (input) CHARACTER*1 +* = 'U': Orthogonal matrix U is computed; +* = 'N': U is not computed. +* +* JOBV (input) CHARACTER*1 +* = 'V': Orthogonal matrix V is computed; +* = 'N': V is not computed. +* +* JOBQ (input) CHARACTER*1 +* = 'Q': Orthogonal matrix Q is computed; +* = 'N': Q is not computed. +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* P (input) INTEGER +* The number of rows of the matrix B. P >= 0. +* +* N (input) INTEGER +* The number of columns of the matrices A and B. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, A contains the triangular (or trapezoidal) matrix +* described in the Purpose section. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) REAL array, dimension (LDB,N) +* On entry, the P-by-N matrix B. +* On exit, B contains the triangular matrix described in +* the Purpose section. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,P). +* +* TOLA (input) REAL +* TOLB (input) REAL +* TOLA and TOLB are the thresholds to determine the effective +* numerical rank of matrix B and a subblock of A. Generally, +* they are set to +* TOLA = MAX(M,N)*norm(A)*MACHEPS, +* TOLB = MAX(P,N)*norm(B)*MACHEPS. +* The size of TOLA and TOLB may affect the size of backward +* errors of the decomposition. +* +* K (output) INTEGER +* L (output) INTEGER +* On exit, K and L specify the dimension of the subblocks +* described in Purpose. +* K + L = effective numerical rank of (A',B')'. +* +* U (output) REAL array, dimension (LDU,M) +* If JOBU = 'U', U contains the orthogonal matrix U. +* If JOBU = 'N', U is not referenced. +* +* LDU (input) INTEGER +* The leading dimension of the array U. LDU >= max(1,M) if +* JOBU = 'U'; LDU >= 1 otherwise. +* +* V (output) REAL array, dimension (LDV,M) +* If JOBV = 'V', V contains the orthogonal matrix V. +* If JOBV = 'N', V is not referenced. +* +* LDV (input) INTEGER +* The leading dimension of the array V. LDV >= max(1,P) if +* JOBV = 'V'; LDV >= 1 otherwise. +* +* Q (output) REAL array, dimension (LDQ,N) +* If JOBQ = 'Q', Q contains the orthogonal matrix Q. +* If JOBQ = 'N', Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= max(1,N) if +* JOBQ = 'Q'; LDQ >= 1 otherwise. +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* TAU (workspace) REAL array, dimension (N) +* +* WORK (workspace) REAL array, dimension (max(3*N,M,P)) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* +* Further Details +* =============== +* +* The subroutine uses LAPACK subroutine SGEQPF for the QR factorization +* with column pivoting to detect the effective numerical rank of the +* a matrix. It may be replaced by a better rank determination strategy. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL FORWRD, WANTQ, WANTU, WANTV + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SGEQPF, SGEQR2, SGERQ2, SLACPY, SLAPMT, SLASET, + $ SORG2R, SORM2R, SORMR2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + WANTU = LSAME( JOBU, 'U' ) + WANTV = LSAME( JOBV, 'V' ) + WANTQ = LSAME( JOBQ, 'Q' ) + FORWRD = .TRUE. +* + INFO = 0 + IF( .NOT.( WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( P.LT.0 ) THEN + INFO = -5 + ELSE IF( N.LT.0 ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -10 + ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN + INFO = -16 + ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN + INFO = -18 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -20 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGGSVP', -INFO ) + RETURN + END IF +* +* QR with column pivoting of B: B*P = V*( S11 S12 ) +* ( 0 0 ) +* + DO 10 I = 1, N + IWORK( I ) = 0 + 10 CONTINUE + CALL SGEQPF( P, N, B, LDB, IWORK, TAU, WORK, INFO ) +* +* Update A := A*P +* + CALL SLAPMT( FORWRD, M, N, A, LDA, IWORK ) +* +* Determine the effective rank of matrix B. +* + L = 0 + DO 20 I = 1, MIN( P, N ) + IF( ABS( B( I, I ) ).GT.TOLB ) + $ L = L + 1 + 20 CONTINUE +* + IF( WANTV ) THEN +* +* Copy the details of V, and form V. +* + CALL SLASET( 'Full', P, P, ZERO, ZERO, V, LDV ) + IF( P.GT.1 ) + $ CALL SLACPY( 'Lower', P-1, N, B( 2, 1 ), LDB, V( 2, 1 ), + $ LDV ) + CALL SORG2R( P, P, MIN( P, N ), V, LDV, TAU, WORK, INFO ) + END IF +* +* Clean up B +* + DO 40 J = 1, L - 1 + DO 30 I = J + 1, L + B( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + IF( P.GT.L ) + $ CALL SLASET( 'Full', P-L, N, ZERO, ZERO, B( L+1, 1 ), LDB ) +* + IF( WANTQ ) THEN +* +* Set Q = I and Update Q := Q*P +* + CALL SLASET( 'Full', N, N, ZERO, ONE, Q, LDQ ) + CALL SLAPMT( FORWRD, N, N, Q, LDQ, IWORK ) + END IF +* + IF( P.GE.L .AND. N.NE.L ) THEN +* +* RQ factorization of (S11 S12): ( S11 S12 ) = ( 0 S12 )*Z +* + CALL SGERQ2( L, N, B, LDB, TAU, WORK, INFO ) +* +* Update A := A*Z' +* + CALL SORMR2( 'Right', 'Transpose', M, N, L, B, LDB, TAU, A, + $ LDA, WORK, INFO ) +* + IF( WANTQ ) THEN +* +* Update Q := Q*Z' +* + CALL SORMR2( 'Right', 'Transpose', N, N, L, B, LDB, TAU, Q, + $ LDQ, WORK, INFO ) + END IF +* +* Clean up B +* + CALL SLASET( 'Full', L, N-L, ZERO, ZERO, B, LDB ) + DO 60 J = N - L + 1, N + DO 50 I = J - N + L + 1, L + B( I, J ) = ZERO + 50 CONTINUE + 60 CONTINUE +* + END IF +* +* Let N-L L +* A = ( A11 A12 ) M, +* +* then the following does the complete QR decomposition of A11: +* +* A11 = U*( 0 T12 )*P1' +* ( 0 0 ) +* + DO 70 I = 1, N - L + IWORK( I ) = 0 + 70 CONTINUE + CALL SGEQPF( M, N-L, A, LDA, IWORK, TAU, WORK, INFO ) +* +* Determine the effective rank of A11 +* + K = 0 + DO 80 I = 1, MIN( M, N-L ) + IF( ABS( A( I, I ) ).GT.TOLA ) + $ K = K + 1 + 80 CONTINUE +* +* Update A12 := U'*A12, where A12 = A( 1:M, N-L+1:N ) +* + CALL SORM2R( 'Left', 'Transpose', M, L, MIN( M, N-L ), A, LDA, + $ TAU, A( 1, N-L+1 ), LDA, WORK, INFO ) +* + IF( WANTU ) THEN +* +* Copy the details of U, and form U +* + CALL SLASET( 'Full', M, M, ZERO, ZERO, U, LDU ) + IF( M.GT.1 ) + $ CALL SLACPY( 'Lower', M-1, N-L, A( 2, 1 ), LDA, U( 2, 1 ), + $ LDU ) + CALL SORG2R( M, M, MIN( M, N-L ), U, LDU, TAU, WORK, INFO ) + END IF +* + IF( WANTQ ) THEN +* +* Update Q( 1:N, 1:N-L ) = Q( 1:N, 1:N-L )*P1 +* + CALL SLAPMT( FORWRD, N, N-L, Q, LDQ, IWORK ) + END IF +* +* Clean up A: set the strictly lower triangular part of +* A(1:K, 1:K) = 0, and A( K+1:M, 1:N-L ) = 0. +* + DO 100 J = 1, K - 1 + DO 90 I = J + 1, K + A( I, J ) = ZERO + 90 CONTINUE + 100 CONTINUE + IF( M.GT.K ) + $ CALL SLASET( 'Full', M-K, N-L, ZERO, ZERO, A( K+1, 1 ), LDA ) +* + IF( N-L.GT.K ) THEN +* +* RQ factorization of ( T11 T12 ) = ( 0 T12 )*Z1 +* + CALL SGERQ2( K, N-L, A, LDA, TAU, WORK, INFO ) +* + IF( WANTQ ) THEN +* +* Update Q( 1:N,1:N-L ) = Q( 1:N,1:N-L )*Z1' +* + CALL SORMR2( 'Right', 'Transpose', N, N-L, K, A, LDA, TAU, + $ Q, LDQ, WORK, INFO ) + END IF +* +* Clean up A +* + CALL SLASET( 'Full', K, N-L-K, ZERO, ZERO, A, LDA ) + DO 120 J = N - L - K + 1, N - L + DO 110 I = J - N + L + K + 1, K + A( I, J ) = ZERO + 110 CONTINUE + 120 CONTINUE +* + END IF +* + IF( M.GT.K ) THEN +* +* QR factorization of A( K+1:M,N-L+1:N ) +* + CALL SGEQR2( M-K, L, A( K+1, N-L+1 ), LDA, TAU, WORK, INFO ) +* + IF( WANTU ) THEN +* +* Update U(:,K+1:M) := U(:,K+1:M)*U1 +* + CALL SORM2R( 'Right', 'No transpose', M, M-K, MIN( M-K, L ), + $ A( K+1, N-L+1 ), LDA, TAU, U( 1, K+1 ), LDU, + $ WORK, INFO ) + END IF +* +* Clean up +* + DO 140 J = N - L + 1, N + DO 130 I = J - N + K + L + 1, M + A( I, J ) = ZERO + 130 CONTINUE + 140 CONTINUE +* + END IF +* + RETURN +* +* End of SGGSVP +* + END diff --git a/costa/native/external/lapack/sgtcon.f b/costa/native/external/lapack/sgtcon.f new file mode 100644 index 000000000..c719e6998 --- /dev/null +++ b/costa/native/external/lapack/sgtcon.f @@ -0,0 +1,166 @@ + SUBROUTINE SGTCON( NORM, N, DL, D, DU, DU2, IPIV, ANORM, RCOND, + $ WORK, IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER INFO, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + REAL D( * ), DL( * ), DU( * ), DU2( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SGTCON estimates the reciprocal of the condition number of a real +* tridiagonal matrix A using the LU factorization as computed by +* SGTTRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies whether the 1-norm condition number or the +* infinity-norm condition number is required: +* = '1' or 'O': 1-norm; +* = 'I': Infinity-norm. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* DL (input) REAL array, dimension (N-1) +* The (n-1) multipliers that define the matrix L from the +* LU factorization of A as computed by SGTTRF. +* +* D (input) REAL array, dimension (N) +* The n diagonal elements of the upper triangular matrix U from +* the LU factorization of A. +* +* DU (input) REAL array, dimension (N-1) +* The (n-1) elements of the first superdiagonal of U. +* +* DU2 (input) REAL array, dimension (N-2) +* The (n-2) elements of the second superdiagonal of U. +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices; for 1 <= i <= n, row i of the matrix was +* interchanged with row IPIV(i). IPIV(i) will always be either +* i or i+1; IPIV(i) = i indicates a row interchange was not +* required. +* +* ANORM (input) REAL +* If NORM = '1' or 'O', the 1-norm of the original matrix A. +* If NORM = 'I', the infinity-norm of the original matrix A. +* +* RCOND (output) REAL +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +* estimate of the 1-norm of inv(A) computed in this routine. +* +* WORK (workspace) REAL array, dimension (2*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ONENRM + INTEGER I, KASE, KASE1 + REAL AINVNM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SGTTRS, SLACON, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGTCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* +* Check that D(1:N) is non-zero. +* + DO 10 I = 1, N + IF( D( I ).EQ.ZERO ) + $ RETURN + 10 CONTINUE +* + AINVNM = ZERO + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 20 CONTINUE + CALL SLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(U)*inv(L). +* + CALL SGTTRS( 'No transpose', N, 1, DL, D, DU, DU2, IPIV, + $ WORK, N, INFO ) + ELSE +* +* Multiply by inv(L')*inv(U'). +* + CALL SGTTRS( 'Transpose', N, 1, DL, D, DU, DU2, IPIV, WORK, + $ N, INFO ) + END IF + GO TO 20 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of SGTCON +* + END diff --git a/costa/native/external/lapack/sgtrfs.f b/costa/native/external/lapack/sgtrfs.f new file mode 100644 index 000000000..531cf7dea --- /dev/null +++ b/costa/native/external/lapack/sgtrfs.f @@ -0,0 +1,357 @@ + SUBROUTINE SGTRFS( TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, DU2, + $ IPIV, B, LDB, X, LDX, FERR, BERR, WORK, IWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + REAL B( LDB, * ), BERR( * ), D( * ), DF( * ), + $ DL( * ), DLF( * ), DU( * ), DU2( * ), DUF( * ), + $ FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* SGTRFS improves the computed solution to a system of linear +* equations when the coefficient matrix is tridiagonal, and provides +* error bounds and backward error estimates for the solution. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose = Transpose) +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* DL (input) REAL array, dimension (N-1) +* The (n-1) subdiagonal elements of A. +* +* D (input) REAL array, dimension (N) +* The diagonal elements of A. +* +* DU (input) REAL array, dimension (N-1) +* The (n-1) superdiagonal elements of A. +* +* DLF (input) REAL array, dimension (N-1) +* The (n-1) multipliers that define the matrix L from the +* LU factorization of A as computed by SGTTRF. +* +* DF (input) REAL array, dimension (N) +* The n diagonal elements of the upper triangular matrix U from +* the LU factorization of A. +* +* DUF (input) REAL array, dimension (N-1) +* The (n-1) elements of the first superdiagonal of U. +* +* DU2 (input) REAL array, dimension (N-2) +* The (n-2) elements of the second superdiagonal of U. +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices; for 1 <= i <= n, row i of the matrix was +* interchanged with row IPIV(i). IPIV(i) will always be either +* i or i+1; IPIV(i) = i indicates a row interchange was not +* required. +* +* B (input) REAL array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input/output) REAL array, dimension (LDX,NRHS) +* On entry, the solution matrix X, as computed by SGTTRS. +* On exit, the improved solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) REAL array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Internal Parameters +* =================== +* +* ITMAX is the maximum number of steps of iterative refinement. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) + REAL THREE + PARAMETER ( THREE = 3.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN + CHARACTER TRANSN, TRANST + INTEGER COUNT, I, J, KASE, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SGTTRS, SLACON, SLAGTM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -15 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGTRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANSN = 'N' + TRANST = 'T' + ELSE + TRANSN = 'T' + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = 4 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 110 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - op(A) * X, +* where op(A) = A, A**T, or A**H, depending on TRANS. +* + CALL SCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 ) + CALL SLAGTM( TRANS, N, 1, -ONE, DL, D, DU, X( 1, J ), LDX, ONE, + $ WORK( N+1 ), N ) +* +* Compute abs(op(A))*abs(x) + abs(b) for use in the backward +* error bound. +* + IF( NOTRAN ) THEN + IF( N.EQ.1 ) THEN + WORK( 1 ) = ABS( B( 1, J ) ) + ABS( D( 1 )*X( 1, J ) ) + ELSE + WORK( 1 ) = ABS( B( 1, J ) ) + ABS( D( 1 )*X( 1, J ) ) + + $ ABS( DU( 1 )*X( 2, J ) ) + DO 30 I = 2, N - 1 + WORK( I ) = ABS( B( I, J ) ) + + $ ABS( DL( I-1 )*X( I-1, J ) ) + + $ ABS( D( I )*X( I, J ) ) + + $ ABS( DU( I )*X( I+1, J ) ) + 30 CONTINUE + WORK( N ) = ABS( B( N, J ) ) + + $ ABS( DL( N-1 )*X( N-1, J ) ) + + $ ABS( D( N )*X( N, J ) ) + END IF + ELSE + IF( N.EQ.1 ) THEN + WORK( 1 ) = ABS( B( 1, J ) ) + ABS( D( 1 )*X( 1, J ) ) + ELSE + WORK( 1 ) = ABS( B( 1, J ) ) + ABS( D( 1 )*X( 1, J ) ) + + $ ABS( DL( 1 )*X( 2, J ) ) + DO 40 I = 2, N - 1 + WORK( I ) = ABS( B( I, J ) ) + + $ ABS( DU( I-1 )*X( I-1, J ) ) + + $ ABS( D( I )*X( I, J ) ) + + $ ABS( DL( I )*X( I+1, J ) ) + 40 CONTINUE + WORK( N ) = ABS( B( N, J ) ) + + $ ABS( DU( N-1 )*X( N-1, J ) ) + + $ ABS( D( N )*X( N, J ) ) + END IF + END IF +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + S = ZERO + DO 50 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 50 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL SGTTRS( TRANS, N, 1, DLF, DF, DUF, DU2, IPIV, + $ WORK( N+1 ), N, INFO ) + CALL SAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use SLACON to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 60 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 60 CONTINUE +* + KASE = 0 + 70 CONTINUE + CALL SLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**T). +* + CALL SGTTRS( TRANST, N, 1, DLF, DF, DUF, DU2, IPIV, + $ WORK( N+1 ), N, INFO ) + DO 80 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 80 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 90 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 90 CONTINUE + CALL SGTTRS( TRANSN, N, 1, DLF, DF, DUF, DU2, IPIV, + $ WORK( N+1 ), N, INFO ) + END IF + GO TO 70 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 100 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 100 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 110 CONTINUE +* + RETURN +* +* End of SGTRFS +* + END diff --git a/costa/native/external/lapack/sgtsv.f b/costa/native/external/lapack/sgtsv.f new file mode 100644 index 000000000..7422e47d5 --- /dev/null +++ b/costa/native/external/lapack/sgtsv.f @@ -0,0 +1,263 @@ + SUBROUTINE SGTSV( N, NRHS, DL, D, DU, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL B( LDB, * ), D( * ), DL( * ), DU( * ) +* .. +* +* Purpose +* ======= +* +* SGTSV solves the equation +* +* A*X = B, +* +* where A is an n by n tridiagonal matrix, by Gaussian elimination with +* partial pivoting. +* +* Note that the equation A'*X = B may be solved by interchanging the +* order of the arguments DU and DL. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* DL (input/output) REAL array, dimension (N-1) +* On entry, DL must contain the (n-1) sub-diagonal elements of +* A. +* +* On exit, DL is overwritten by the (n-2) elements of the +* second super-diagonal of the upper triangular matrix U from +* the LU factorization of A, in DL(1), ..., DL(n-2). +* +* D (input/output) REAL array, dimension (N) +* On entry, D must contain the diagonal elements of A. +* +* On exit, D is overwritten by the n diagonal elements of U. +* +* DU (input/output) REAL array, dimension (N-1) +* On entry, DU must contain the (n-1) super-diagonal elements +* of A. +* +* On exit, DU is overwritten by the (n-1) elements of the first +* super-diagonal of U. +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the N by NRHS matrix of right hand side matrix B. +* On exit, if INFO = 0, the N by NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, U(i,i) is exactly zero, and the solution +* has not been computed. The factorization has not been +* completed unless i = N. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL FACT, TEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGTSV ', -INFO ) + RETURN + END IF +* + IF( N.EQ.0 ) + $ RETURN +* + IF( NRHS.EQ.1 ) THEN + DO 10 I = 1, N - 2 + IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN +* +* No row interchange required +* + IF( D( I ).NE.ZERO ) THEN + FACT = DL( I ) / D( I ) + D( I+1 ) = D( I+1 ) - FACT*DU( I ) + B( I+1, 1 ) = B( I+1, 1 ) - FACT*B( I, 1 ) + ELSE + INFO = I + RETURN + END IF + DL( I ) = ZERO + ELSE +* +* Interchange rows I and I+1 +* + FACT = D( I ) / DL( I ) + D( I ) = DL( I ) + TEMP = D( I+1 ) + D( I+1 ) = DU( I ) - FACT*TEMP + DL( I ) = DU( I+1 ) + DU( I+1 ) = -FACT*DL( I ) + DU( I ) = TEMP + TEMP = B( I, 1 ) + B( I, 1 ) = B( I+1, 1 ) + B( I+1, 1 ) = TEMP - FACT*B( I+1, 1 ) + END IF + 10 CONTINUE + IF( N.GT.1 ) THEN + I = N - 1 + IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN + IF( D( I ).NE.ZERO ) THEN + FACT = DL( I ) / D( I ) + D( I+1 ) = D( I+1 ) - FACT*DU( I ) + B( I+1, 1 ) = B( I+1, 1 ) - FACT*B( I, 1 ) + ELSE + INFO = I + RETURN + END IF + ELSE + FACT = D( I ) / DL( I ) + D( I ) = DL( I ) + TEMP = D( I+1 ) + D( I+1 ) = DU( I ) - FACT*TEMP + DU( I ) = TEMP + TEMP = B( I, 1 ) + B( I, 1 ) = B( I+1, 1 ) + B( I+1, 1 ) = TEMP - FACT*B( I+1, 1 ) + END IF + END IF + IF( D( N ).EQ.ZERO ) THEN + INFO = N + RETURN + END IF + ELSE + DO 40 I = 1, N - 2 + IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN +* +* No row interchange required +* + IF( D( I ).NE.ZERO ) THEN + FACT = DL( I ) / D( I ) + D( I+1 ) = D( I+1 ) - FACT*DU( I ) + DO 20 J = 1, NRHS + B( I+1, J ) = B( I+1, J ) - FACT*B( I, J ) + 20 CONTINUE + ELSE + INFO = I + RETURN + END IF + DL( I ) = ZERO + ELSE +* +* Interchange rows I and I+1 +* + FACT = D( I ) / DL( I ) + D( I ) = DL( I ) + TEMP = D( I+1 ) + D( I+1 ) = DU( I ) - FACT*TEMP + DL( I ) = DU( I+1 ) + DU( I+1 ) = -FACT*DL( I ) + DU( I ) = TEMP + DO 30 J = 1, NRHS + TEMP = B( I, J ) + B( I, J ) = B( I+1, J ) + B( I+1, J ) = TEMP - FACT*B( I+1, J ) + 30 CONTINUE + END IF + 40 CONTINUE + IF( N.GT.1 ) THEN + I = N - 1 + IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN + IF( D( I ).NE.ZERO ) THEN + FACT = DL( I ) / D( I ) + D( I+1 ) = D( I+1 ) - FACT*DU( I ) + DO 50 J = 1, NRHS + B( I+1, J ) = B( I+1, J ) - FACT*B( I, J ) + 50 CONTINUE + ELSE + INFO = I + RETURN + END IF + ELSE + FACT = D( I ) / DL( I ) + D( I ) = DL( I ) + TEMP = D( I+1 ) + D( I+1 ) = DU( I ) - FACT*TEMP + DU( I ) = TEMP + DO 60 J = 1, NRHS + TEMP = B( I, J ) + B( I, J ) = B( I+1, J ) + B( I+1, J ) = TEMP - FACT*B( I+1, J ) + 60 CONTINUE + END IF + END IF + IF( D( N ).EQ.ZERO ) THEN + INFO = N + RETURN + END IF + END IF +* +* Back solve with the matrix U from the factorization. +* + IF( NRHS.LE.2 ) THEN + J = 1 + 70 CONTINUE + B( N, J ) = B( N, J ) / D( N ) + IF( N.GT.1 ) + $ B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / D( N-1 ) + DO 80 I = N - 2, 1, -1 + B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DL( I )* + $ B( I+2, J ) ) / D( I ) + 80 CONTINUE + IF( J.LT.NRHS ) THEN + J = J + 1 + GO TO 70 + END IF + ELSE + DO 100 J = 1, NRHS + B( N, J ) = B( N, J ) / D( N ) + IF( N.GT.1 ) + $ B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / + $ D( N-1 ) + DO 90 I = N - 2, 1, -1 + B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DL( I )* + $ B( I+2, J ) ) / D( I ) + 90 CONTINUE + 100 CONTINUE + END IF +* + RETURN +* +* End of SGTSV +* + END diff --git a/costa/native/external/lapack/sgtsvx.f b/costa/native/external/lapack/sgtsvx.f new file mode 100644 index 000000000..2188bd6a2 --- /dev/null +++ b/costa/native/external/lapack/sgtsvx.f @@ -0,0 +1,293 @@ + SUBROUTINE SGTSVX( FACT, TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, + $ DU2, IPIV, B, LDB, X, LDX, RCOND, FERR, BERR, + $ WORK, IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER FACT, TRANS + INTEGER INFO, LDB, LDX, N, NRHS + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + REAL B( LDB, * ), BERR( * ), D( * ), DF( * ), + $ DL( * ), DLF( * ), DU( * ), DU2( * ), DUF( * ), + $ FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* SGTSVX uses the LU factorization to compute the solution to a real +* system of linear equations A * X = B or A**T * X = B, +* where A is a tridiagonal matrix of order N and X and B are N-by-NRHS +* matrices. +* +* Error bounds on the solution and a condition estimate are also +* provided. +* +* Description +* =========== +* +* The following steps are performed: +* +* 1. If FACT = 'N', the LU decomposition is used to factor the matrix A +* as A = L * U, where L is a product of permutation and unit lower +* bidiagonal matrices and U is upper triangular with nonzeros in +* only the main diagonal and first two superdiagonals. +* +* 2. If some U(i,i)=0, so that U is exactly singular, then the routine +* returns with INFO = i. Otherwise, the factored form of A is used +* to estimate the condition number of the matrix A. If the +* reciprocal of the condition number is less than machine precision, +* INFO = N+1 is returned as a warning, but the routine still goes on +* to solve for X and compute error bounds as described below. +* +* 3. The system of equations is solved for X using the factored form +* of A. +* +* 4. Iterative refinement is applied to improve the computed solution +* matrix and calculate error bounds and backward error estimates +* for it. +* +* Arguments +* ========= +* +* FACT (input) CHARACTER*1 +* Specifies whether or not the factored form of A has been +* supplied on entry. +* = 'F': DLF, DF, DUF, DU2, and IPIV contain the factored +* form of A; DL, D, DU, DLF, DF, DUF, DU2 and IPIV +* will not be modified. +* = 'N': The matrix will be copied to DLF, DF, and DUF +* and factored. +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose = Transpose) +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* DL (input) REAL array, dimension (N-1) +* The (n-1) subdiagonal elements of A. +* +* D (input) REAL array, dimension (N) +* The n diagonal elements of A. +* +* DU (input) REAL array, dimension (N-1) +* The (n-1) superdiagonal elements of A. +* +* DLF (input or output) REAL array, dimension (N-1) +* If FACT = 'F', then DLF is an input argument and on entry +* contains the (n-1) multipliers that define the matrix L from +* the LU factorization of A as computed by SGTTRF. +* +* If FACT = 'N', then DLF is an output argument and on exit +* contains the (n-1) multipliers that define the matrix L from +* the LU factorization of A. +* +* DF (input or output) REAL array, dimension (N) +* If FACT = 'F', then DF is an input argument and on entry +* contains the n diagonal elements of the upper triangular +* matrix U from the LU factorization of A. +* +* If FACT = 'N', then DF is an output argument and on exit +* contains the n diagonal elements of the upper triangular +* matrix U from the LU factorization of A. +* +* DUF (input or output) REAL array, dimension (N-1) +* If FACT = 'F', then DUF is an input argument and on entry +* contains the (n-1) elements of the first superdiagonal of U. +* +* If FACT = 'N', then DUF is an output argument and on exit +* contains the (n-1) elements of the first superdiagonal of U. +* +* DU2 (input or output) REAL array, dimension (N-2) +* If FACT = 'F', then DU2 is an input argument and on entry +* contains the (n-2) elements of the second superdiagonal of +* U. +* +* If FACT = 'N', then DU2 is an output argument and on exit +* contains the (n-2) elements of the second superdiagonal of +* U. +* +* IPIV (input or output) INTEGER array, dimension (N) +* If FACT = 'F', then IPIV is an input argument and on entry +* contains the pivot indices from the LU factorization of A as +* computed by SGTTRF. +* +* If FACT = 'N', then IPIV is an output argument and on exit +* contains the pivot indices from the LU factorization of A; +* row i of the matrix was interchanged with row IPIV(i). +* IPIV(i) will always be either i or i+1; IPIV(i) = i indicates +* a row interchange was not required. +* +* B (input) REAL array, dimension (LDB,NRHS) +* The N-by-NRHS right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (output) REAL array, dimension (LDX,NRHS) +* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* RCOND (output) REAL +* The estimate of the reciprocal condition number of the matrix +* A. If RCOND is less than the machine precision (in +* particular, if RCOND = 0), the matrix is singular to working +* precision. This condition is indicated by a return code of +* INFO > 0. +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) REAL array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is +* <= N: U(i,i) is exactly zero. The factorization +* has not been completed unless i = N, but the +* factor U is exactly singular, so the solution +* and error bounds could not be computed. +* RCOND = 0 is returned. +* = N+1: U is nonsingular, but RCOND is less than machine +* precision, meaning that the matrix is singular +* to working precision. Nevertheless, the +* solution and error bounds are computed because +* there are a number of situations where the +* computed solution can be more accurate than the +* value of RCOND would suggest. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOFACT, NOTRAN + CHARACTER NORM + REAL ANORM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANGT + EXTERNAL LSAME, SLAMCH, SLANGT +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGTCON, SGTRFS, SGTTRF, SGTTRS, SLACPY, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -16 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGTSVX', -INFO ) + RETURN + END IF +* + IF( NOFACT ) THEN +* +* Compute the LU factorization of A. +* + CALL SCOPY( N, D, 1, DF, 1 ) + IF( N.GT.1 ) THEN + CALL SCOPY( N-1, DL, 1, DLF, 1 ) + CALL SCOPY( N-1, DU, 1, DUF, 1 ) + END IF + CALL SGTTRF( N, DLF, DF, DUF, DU2, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN + IF( INFO.GT.0 ) + $ RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + IF( NOTRAN ) THEN + NORM = '1' + ELSE + NORM = 'I' + END IF + ANORM = SLANGT( NORM, N, DL, D, DU ) +* +* Compute the reciprocal of the condition number of A. +* + CALL SGTCON( NORM, N, DLF, DF, DUF, DU2, IPIV, ANORM, RCOND, WORK, + $ IWORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* +* Compute the solution vectors X. +* + CALL SLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL SGTTRS( TRANS, N, NRHS, DLF, DF, DUF, DU2, IPIV, X, LDX, + $ INFO ) +* +* Use iterative refinement to improve the computed solutions and +* compute error bounds and backward error estimates for them. +* + CALL SGTRFS( TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, DU2, IPIV, + $ B, LDB, X, LDX, FERR, BERR, WORK, IWORK, INFO ) +* + RETURN +* +* End of SGTSVX +* + END diff --git a/costa/native/external/lapack/sgttrf.f b/costa/native/external/lapack/sgttrf.f new file mode 100644 index 000000000..c7732ba80 --- /dev/null +++ b/costa/native/external/lapack/sgttrf.f @@ -0,0 +1,169 @@ + SUBROUTINE SGTTRF( N, DL, D, DU, DU2, IPIV, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* Purpose +* ======= +* +* SGTTRF computes an LU factorization of a real tridiagonal matrix A +* using elimination with partial pivoting and row interchanges. +* +* The factorization has the form +* A = L * U +* where L is a product of permutation and unit lower bidiagonal +* matrices and U is upper triangular with nonzeros in only the main +* diagonal and first two superdiagonals. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. +* +* DL (input/output) REAL array, dimension (N-1) +* On entry, DL must contain the (n-1) sub-diagonal elements of +* A. +* +* On exit, DL is overwritten by the (n-1) multipliers that +* define the matrix L from the LU factorization of A. +* +* D (input/output) REAL array, dimension (N) +* On entry, D must contain the diagonal elements of A. +* +* On exit, D is overwritten by the n diagonal elements of the +* upper triangular matrix U from the LU factorization of A. +* +* DU (input/output) REAL array, dimension (N-1) +* On entry, DU must contain the (n-1) super-diagonal elements +* of A. +* +* On exit, DU is overwritten by the (n-1) elements of the first +* super-diagonal of U. +* +* DU2 (output) REAL array, dimension (N-2) +* On exit, DU2 is overwritten by the (n-2) elements of the +* second super-diagonal of U. +* +* IPIV (output) INTEGER array, dimension (N) +* The pivot indices; for 1 <= i <= n, row i of the matrix was +* interchanged with row IPIV(i). IPIV(i) will always be either +* i or i+1; IPIV(i) = i indicates a row interchange was not +* required. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* > 0: if INFO = k, U(k,k) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, and division by zero will occur if it is used +* to solve a system of equations. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I + REAL FACT, TEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + CALL XERBLA( 'SGTTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Initialize IPIV(i) = i and DU2(I) = 0 +* + DO 10 I = 1, N + IPIV( I ) = I + 10 CONTINUE + DO 20 I = 1, N - 2 + DU2( I ) = ZERO + 20 CONTINUE +* + DO 30 I = 1, N - 2 + IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN +* +* No row interchange required, eliminate DL(I) +* + IF( D( I ).NE.ZERO ) THEN + FACT = DL( I ) / D( I ) + DL( I ) = FACT + D( I+1 ) = D( I+1 ) - FACT*DU( I ) + END IF + ELSE +* +* Interchange rows I and I+1, eliminate DL(I) +* + FACT = D( I ) / DL( I ) + D( I ) = DL( I ) + DL( I ) = FACT + TEMP = DU( I ) + DU( I ) = D( I+1 ) + D( I+1 ) = TEMP - FACT*D( I+1 ) + DU2( I ) = DU( I+1 ) + DU( I+1 ) = -FACT*DU( I+1 ) + IPIV( I ) = I + 1 + END IF + 30 CONTINUE + IF( N.GT.1 ) THEN + I = N - 1 + IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN + IF( D( I ).NE.ZERO ) THEN + FACT = DL( I ) / D( I ) + DL( I ) = FACT + D( I+1 ) = D( I+1 ) - FACT*DU( I ) + END IF + ELSE + FACT = D( I ) / DL( I ) + D( I ) = DL( I ) + DL( I ) = FACT + TEMP = DU( I ) + DU( I ) = D( I+1 ) + D( I+1 ) = TEMP - FACT*D( I+1 ) + IPIV( I ) = I + 1 + END IF + END IF +* +* Check for a zero on the diagonal of U. +* + DO 40 I = 1, N + IF( D( I ).EQ.ZERO ) THEN + INFO = I + GO TO 50 + END IF + 40 CONTINUE + 50 CONTINUE +* + RETURN +* +* End of SGTTRF +* + END diff --git a/costa/native/external/lapack/sgttrs.f b/costa/native/external/lapack/sgttrs.f new file mode 100644 index 000000000..4d19cc570 --- /dev/null +++ b/costa/native/external/lapack/sgttrs.f @@ -0,0 +1,141 @@ + SUBROUTINE SGTTRS( TRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* Purpose +* ======= +* +* SGTTRS solves one of the systems of equations +* A*X = B or A'*X = B, +* with a tridiagonal matrix A using the LU factorization computed +* by SGTTRF. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER +* Specifies the form of the system of equations. +* = 'N': A * X = B (No transpose) +* = 'T': A'* X = B (Transpose) +* = 'C': A'* X = B (Conjugate transpose = Transpose) +* +* N (input) INTEGER +* The order of the matrix A. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* DL (input) REAL array, dimension (N-1) +* The (n-1) multipliers that define the matrix L from the +* LU factorization of A. +* +* D (input) REAL array, dimension (N) +* The n diagonal elements of the upper triangular matrix U from +* the LU factorization of A. +* +* DU (input) REAL array, dimension (N-1) +* The (n-1) elements of the first super-diagonal of U. +* +* DU2 (input) REAL array, dimension (N-2) +* The (n-2) elements of the second super-diagonal of U. +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices; for 1 <= i <= n, row i of the matrix was +* interchanged with row IPIV(i). IPIV(i) will always be either +* i or i+1; IPIV(i) = i indicates a row interchange was not +* required. +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the matrix of right hand side vectors B. +* On exit, B is overwritten by the solution vectors X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL NOTRAN + INTEGER ITRANS, J, JB, NB +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SGTTS2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOTRAN = ( TRANS.EQ.'N' .OR. TRANS.EQ.'n' ) + IF( .NOT.NOTRAN .AND. .NOT.( TRANS.EQ.'T' .OR. TRANS.EQ. + $ 't' ) .AND. .NOT.( TRANS.EQ.'C' .OR. TRANS.EQ.'c' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( N, 1 ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGTTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* Decode TRANS +* + IF( NOTRAN ) THEN + ITRANS = 0 + ELSE + ITRANS = 1 + END IF +* +* Determine the number of right-hand sides to solve at a time. +* + IF( NRHS.EQ.1 ) THEN + NB = 1 + ELSE + NB = MAX( 1, ILAENV( 1, 'SGTTRS', TRANS, N, NRHS, -1, -1 ) ) + END IF +* + IF( NB.GE.NRHS ) THEN + CALL SGTTS2( ITRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB ) + ELSE + DO 10 J = 1, NRHS, NB + JB = MIN( NRHS-J+1, NB ) + CALL SGTTS2( ITRANS, N, JB, DL, D, DU, DU2, IPIV, B( 1, J ), + $ LDB ) + 10 CONTINUE + END IF +* +* End of SGTTRS +* + END diff --git a/costa/native/external/lapack/sgtts2.f b/costa/native/external/lapack/sgtts2.f new file mode 100644 index 000000000..268fdb968 --- /dev/null +++ b/costa/native/external/lapack/sgtts2.f @@ -0,0 +1,197 @@ + SUBROUTINE SGTTS2( ITRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER ITRANS, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* Purpose +* ======= +* +* SGTTS2 solves one of the systems of equations +* A*X = B or A'*X = B, +* with a tridiagonal matrix A using the LU factorization computed +* by SGTTRF. +* +* Arguments +* ========= +* +* ITRANS (input) INTEGER +* Specifies the form of the system of equations. +* = 0: A * X = B (No transpose) +* = 1: A'* X = B (Transpose) +* = 2: A'* X = B (Conjugate transpose = Transpose) +* +* N (input) INTEGER +* The order of the matrix A. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* DL (input) REAL array, dimension (N-1) +* The (n-1) multipliers that define the matrix L from the +* LU factorization of A. +* +* D (input) REAL array, dimension (N) +* The n diagonal elements of the upper triangular matrix U from +* the LU factorization of A. +* +* DU (input) REAL array, dimension (N-1) +* The (n-1) elements of the first super-diagonal of U. +* +* DU2 (input) REAL array, dimension (N-2) +* The (n-2) elements of the second super-diagonal of U. +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices; for 1 <= i <= n, row i of the matrix was +* interchanged with row IPIV(i). IPIV(i) will always be either +* i or i+1; IPIV(i) = i indicates a row interchange was not +* required. +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the matrix of right hand side vectors B. +* On exit, B is overwritten by the solution vectors X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IP, J + REAL TEMP +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( ITRANS.EQ.0 ) THEN +* +* Solve A*X = B using the LU factorization of A, +* overwriting each right hand side vector with its solution. +* + IF( NRHS.LE.1 ) THEN + J = 1 + 10 CONTINUE +* +* Solve L*x = b. +* + DO 20 I = 1, N - 1 + IP = IPIV( I ) + TEMP = B( I+1-IP+I, J ) - DL( I )*B( IP, J ) + B( I, J ) = B( IP, J ) + B( I+1, J ) = TEMP + 20 CONTINUE +* +* Solve U*x = b. +* + B( N, J ) = B( N, J ) / D( N ) + IF( N.GT.1 ) + $ B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / + $ D( N-1 ) + DO 30 I = N - 2, 1, -1 + B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DU2( I )* + $ B( I+2, J ) ) / D( I ) + 30 CONTINUE + IF( J.LT.NRHS ) THEN + J = J + 1 + GO TO 10 + END IF + ELSE + DO 60 J = 1, NRHS +* +* Solve L*x = b. +* + DO 40 I = 1, N - 1 + IF( IPIV( I ).EQ.I ) THEN + B( I+1, J ) = B( I+1, J ) - DL( I )*B( I, J ) + ELSE + TEMP = B( I, J ) + B( I, J ) = B( I+1, J ) + B( I+1, J ) = TEMP - DL( I )*B( I, J ) + END IF + 40 CONTINUE +* +* Solve U*x = b. +* + B( N, J ) = B( N, J ) / D( N ) + IF( N.GT.1 ) + $ B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / + $ D( N-1 ) + DO 50 I = N - 2, 1, -1 + B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DU2( I )* + $ B( I+2, J ) ) / D( I ) + 50 CONTINUE + 60 CONTINUE + END IF + ELSE +* +* Solve A' * X = B. +* + IF( NRHS.LE.1 ) THEN +* +* Solve U'*x = b. +* + J = 1 + 70 CONTINUE + B( 1, J ) = B( 1, J ) / D( 1 ) + IF( N.GT.1 ) + $ B( 2, J ) = ( B( 2, J )-DU( 1 )*B( 1, J ) ) / D( 2 ) + DO 80 I = 3, N + B( I, J ) = ( B( I, J )-DU( I-1 )*B( I-1, J )-DU2( I-2 )* + $ B( I-2, J ) ) / D( I ) + 80 CONTINUE +* +* Solve L'*x = b. +* + DO 90 I = N - 1, 1, -1 + IP = IPIV( I ) + TEMP = B( I, J ) - DL( I )*B( I+1, J ) + B( I, J ) = B( IP, J ) + B( IP, J ) = TEMP + 90 CONTINUE + IF( J.LT.NRHS ) THEN + J = J + 1 + GO TO 70 + END IF +* + ELSE + DO 120 J = 1, NRHS +* +* Solve U'*x = b. +* + B( 1, J ) = B( 1, J ) / D( 1 ) + IF( N.GT.1 ) + $ B( 2, J ) = ( B( 2, J )-DU( 1 )*B( 1, J ) ) / D( 2 ) + DO 100 I = 3, N + B( I, J ) = ( B( I, J )-DU( I-1 )*B( I-1, J )- + $ DU2( I-2 )*B( I-2, J ) ) / D( I ) + 100 CONTINUE + DO 110 I = N - 1, 1, -1 + IF( IPIV( I ).EQ.I ) THEN + B( I, J ) = B( I, J ) - DL( I )*B( I+1, J ) + ELSE + TEMP = B( I+1, J ) + B( I+1, J ) = B( I, J ) - DL( I )*TEMP + B( I, J ) = TEMP + END IF + 110 CONTINUE + 120 CONTINUE + END IF + END IF +* +* End of SGTTS2 +* + END diff --git a/costa/native/external/lapack/shgeqz.f b/costa/native/external/lapack/shgeqz.f new file mode 100644 index 000000000..0f5a28363 --- /dev/null +++ b/costa/native/external/lapack/shgeqz.f @@ -0,0 +1,1243 @@ + SUBROUTINE SHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, Q, LDQ, Z, LDZ, WORK, + $ LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER COMPQ, COMPZ, JOB + INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, LWORK, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), ALPHAI( * ), ALPHAR( * ), + $ B( LDB, * ), BETA( * ), Q( LDQ, * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* SHGEQZ implements a single-/double-shift version of the QZ method for +* finding the generalized eigenvalues +* +* w(j)=(ALPHAR(j) + i*ALPHAI(j))/BETAR(j) of the equation +* +* det( A - w(i) B ) = 0 +* +* In addition, the pair A,B may be reduced to generalized Schur form: +* B is upper triangular, and A is block upper triangular, where the +* diagonal blocks are either 1-by-1 or 2-by-2, the 2-by-2 blocks having +* complex generalized eigenvalues (see the description of the argument +* JOB.) +* +* If JOB='S', then the pair (A,B) is simultaneously reduced to Schur +* form by applying one orthogonal tranformation (usually called Q) on +* the left and another (usually called Z) on the right. The 2-by-2 +* upper-triangular diagonal blocks of B corresponding to 2-by-2 blocks +* of A will be reduced to positive diagonal matrices. (I.e., +* if A(j+1,j) is non-zero, then B(j+1,j)=B(j,j+1)=0 and B(j,j) and +* B(j+1,j+1) will be positive.) +* +* If JOB='E', then at each iteration, the same transformations +* are computed, but they are only applied to those parts of A and B +* which are needed to compute ALPHAR, ALPHAI, and BETAR. +* +* If JOB='S' and COMPQ and COMPZ are 'V' or 'I', then the orthogonal +* transformations used to reduce (A,B) are accumulated into the arrays +* Q and Z s.t.: +* +* Q(in) A(in) Z(in)* = Q(out) A(out) Z(out)* +* Q(in) B(in) Z(in)* = Q(out) B(out) Z(out)* +* +* Ref: C.B. Moler & G.W. Stewart, "An Algorithm for Generalized Matrix +* Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973), +* pp. 241--256. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* = 'E': compute only ALPHAR, ALPHAI, and BETA. A and B will +* not necessarily be put into generalized Schur form. +* = 'S': put A and B into generalized Schur form, as well +* as computing ALPHAR, ALPHAI, and BETA. +* +* COMPQ (input) CHARACTER*1 +* = 'N': do not modify Q. +* = 'V': multiply the array Q on the right by the transpose of +* the orthogonal tranformation that is applied to the +* left side of A and B to reduce them to Schur form. +* = 'I': like COMPQ='V', except that Q will be initialized to +* the identity first. +* +* COMPZ (input) CHARACTER*1 +* = 'N': do not modify Z. +* = 'V': multiply the array Z on the right by the orthogonal +* tranformation that is applied to the right side of +* A and B to reduce them to Schur form. +* = 'I': like COMPZ='V', except that Z will be initialized to +* the identity first. +* +* N (input) INTEGER +* The order of the matrices A, B, Q, and Z. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* It is assumed that A is already upper triangular in rows and +* columns 1:ILO-1 and IHI+1:N. +* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +* +* A (input/output) REAL array, dimension (LDA, N) +* On entry, the N-by-N upper Hessenberg matrix A. Elements +* below the subdiagonal must be zero. +* If JOB='S', then on exit A and B will have been +* simultaneously reduced to generalized Schur form. +* If JOB='E', then on exit A will have been destroyed. +* The diagonal blocks will be correct, but the off-diagonal +* portion will be meaningless. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max( 1, N ). +* +* B (input/output) REAL array, dimension (LDB, N) +* On entry, the N-by-N upper triangular matrix B. Elements +* below the diagonal must be zero. 2-by-2 blocks in B +* corresponding to 2-by-2 blocks in A will be reduced to +* positive diagonal form. (I.e., if A(j+1,j) is non-zero, +* then B(j+1,j)=B(j,j+1)=0 and B(j,j) and B(j+1,j+1) will be +* positive.) +* If JOB='S', then on exit A and B will have been +* simultaneously reduced to Schur form. +* If JOB='E', then on exit B will have been destroyed. +* Elements corresponding to diagonal blocks of A will be +* correct, but the off-diagonal portion will be meaningless. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max( 1, N ). +* +* ALPHAR (output) REAL array, dimension (N) +* ALPHAR(1:N) will be set to real parts of the diagonal +* elements of A that would result from reducing A and B to +* Schur form and then further reducing them both to triangular +* form using unitary transformations s.t. the diagonal of B +* was non-negative real. Thus, if A(j,j) is in a 1-by-1 block +* (i.e., A(j+1,j)=A(j,j+1)=0), then ALPHAR(j)=A(j,j). +* Note that the (real or complex) values +* (ALPHAR(j) + i*ALPHAI(j))/BETA(j), j=1,...,N, are the +* generalized eigenvalues of the matrix pencil A - wB. +* +* ALPHAI (output) REAL array, dimension (N) +* ALPHAI(1:N) will be set to imaginary parts of the diagonal +* elements of A that would result from reducing A and B to +* Schur form and then further reducing them both to triangular +* form using unitary transformations s.t. the diagonal of B +* was non-negative real. Thus, if A(j,j) is in a 1-by-1 block +* (i.e., A(j+1,j)=A(j,j+1)=0), then ALPHAR(j)=0. +* Note that the (real or complex) values +* (ALPHAR(j) + i*ALPHAI(j))/BETA(j), j=1,...,N, are the +* generalized eigenvalues of the matrix pencil A - wB. +* +* BETA (output) REAL array, dimension (N) +* BETA(1:N) will be set to the (real) diagonal elements of B +* that would result from reducing A and B to Schur form and +* then further reducing them both to triangular form using +* unitary transformations s.t. the diagonal of B was +* non-negative real. Thus, if A(j,j) is in a 1-by-1 block +* (i.e., A(j+1,j)=A(j,j+1)=0), then BETA(j)=B(j,j). +* Note that the (real or complex) values +* (ALPHAR(j) + i*ALPHAI(j))/BETA(j), j=1,...,N, are the +* generalized eigenvalues of the matrix pencil A - wB. +* (Note that BETA(1:N) will always be non-negative, and no +* BETAI is necessary.) +* +* Q (input/output) REAL array, dimension (LDQ, N) +* If COMPQ='N', then Q will not be referenced. +* If COMPQ='V' or 'I', then the transpose of the orthogonal +* transformations which are applied to A and B on the left +* will be applied to the array Q on the right. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= 1. +* If COMPQ='V' or 'I', then LDQ >= N. +* +* Z (input/output) REAL array, dimension (LDZ, N) +* If COMPZ='N', then Z will not be referenced. +* If COMPZ='V' or 'I', then the orthogonal transformations +* which are applied to A and B on the right will be applied +* to the array Z on the right. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1. +* If COMPZ='V' or 'I', then LDZ >= N. +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO >= 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N). +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* = 1,...,N: the QZ iteration did not converge. (A,B) is not +* in Schur form, but ALPHAR(i), ALPHAI(i), and +* BETA(i), i=INFO+1,...,N should be correct. +* = N+1,...,2*N: the shift calculation failed. (A,B) is not +* in Schur form, but ALPHAR(i), ALPHAI(i), and +* BETA(i), i=INFO-N+1,...,N should be correct. +* > 2*N: various "impossible" errors. +* +* Further Details +* =============== +* +* Iteration counters: +* +* JITER -- counts iterations. +* IITER -- counts iterations run since ILAST was last +* changed. This is therefore reset only when a 1-by-1 or +* 2-by-2 block deflates off the bottom. +* +* ===================================================================== +* +* .. Parameters .. +* $ SAFETY = 1.0E+0 ) + REAL HALF, ZERO, ONE, SAFETY + PARAMETER ( HALF = 0.5E+0, ZERO = 0.0E+0, ONE = 1.0E+0, + $ SAFETY = 1.0E+2 ) +* .. +* .. Local Scalars .. + LOGICAL ILAZR2, ILAZRO, ILPIVT, ILQ, ILSCHR, ILZ, + $ LQUERY + INTEGER ICOMPQ, ICOMPZ, IFIRST, IFRSTM, IITER, ILAST, + $ ILASTM, IN, ISCHUR, ISTART, J, JC, JCH, JITER, + $ JR, MAXIT + REAL A11, A12, A1I, A1R, A21, A22, A2I, A2R, AD11, + $ AD11L, AD12, AD12L, AD21, AD21L, AD22, AD22L, + $ AD32L, AN, ANORM, ASCALE, ATOL, B11, B1A, B1I, + $ B1R, B22, B2A, B2I, B2R, BN, BNORM, BSCALE, + $ BTOL, C, C11I, C11R, C12, C21, C22I, C22R, CL, + $ CQ, CR, CZ, ESHIFT, S, S1, S1INV, S2, SAFMAX, + $ SAFMIN, SCALE, SL, SQI, SQR, SR, SZI, SZR, T, + $ TAU, TEMP, TEMP2, TEMPI, TEMPR, U1, U12, U12L, + $ U2, ULP, VS, W11, W12, W21, W22, WABS, WI, WR, + $ WR2 +* .. +* .. Local Arrays .. + REAL V( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANHS, SLAPY2, SLAPY3 + EXTERNAL LSAME, SLAMCH, SLANHS, SLAPY2, SLAPY3 +* .. +* .. External Subroutines .. + EXTERNAL SLAG2, SLARFG, SLARTG, SLASET, SLASV2, SROT, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Decode JOB, COMPQ, COMPZ +* + IF( LSAME( JOB, 'E' ) ) THEN + ILSCHR = .FALSE. + ISCHUR = 1 + ELSE IF( LSAME( JOB, 'S' ) ) THEN + ILSCHR = .TRUE. + ISCHUR = 2 + ELSE + ISCHUR = 0 + END IF +* + IF( LSAME( COMPQ, 'N' ) ) THEN + ILQ = .FALSE. + ICOMPQ = 1 + ELSE IF( LSAME( COMPQ, 'V' ) ) THEN + ILQ = .TRUE. + ICOMPQ = 2 + ELSE IF( LSAME( COMPQ, 'I' ) ) THEN + ILQ = .TRUE. + ICOMPQ = 3 + ELSE + ICOMPQ = 0 + END IF +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ILZ = .FALSE. + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ILZ = .TRUE. + ICOMPZ = 2 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ILZ = .TRUE. + ICOMPZ = 3 + ELSE + ICOMPZ = 0 + END IF +* +* Check Argument Values +* + INFO = 0 + WORK( 1 ) = MAX( 1, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( ISCHUR.EQ.0 ) THEN + INFO = -1 + ELSE IF( ICOMPQ.EQ.0 ) THEN + INFO = -2 + ELSE IF( ICOMPZ.EQ.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( ILO.LT.1 ) THEN + INFO = -5 + ELSE IF( IHI.GT.N .OR. IHI.LT.ILO-1 ) THEN + INFO = -6 + ELSE IF( LDA.LT.N ) THEN + INFO = -8 + ELSE IF( LDB.LT.N ) THEN + INFO = -10 + ELSE IF( LDQ.LT.1 .OR. ( ILQ .AND. LDQ.LT.N ) ) THEN + INFO = -15 + ELSE IF( LDZ.LT.1 .OR. ( ILZ .AND. LDZ.LT.N ) ) THEN + INFO = -17 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -19 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SHGEQZ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + WORK( 1 ) = REAL( 1 ) + RETURN + END IF +* +* Initialize Q and Z +* + IF( ICOMPQ.EQ.3 ) + $ CALL SLASET( 'Full', N, N, ZERO, ONE, Q, LDQ ) + IF( ICOMPZ.EQ.3 ) + $ CALL SLASET( 'Full', N, N, ZERO, ONE, Z, LDZ ) +* +* Machine Constants +* + IN = IHI + 1 - ILO + SAFMIN = SLAMCH( 'S' ) + SAFMAX = ONE / SAFMIN + ULP = SLAMCH( 'E' )*SLAMCH( 'B' ) + ANORM = SLANHS( 'F', IN, A( ILO, ILO ), LDA, WORK ) + BNORM = SLANHS( 'F', IN, B( ILO, ILO ), LDB, WORK ) + ATOL = MAX( SAFMIN, ULP*ANORM ) + BTOL = MAX( SAFMIN, ULP*BNORM ) + ASCALE = ONE / MAX( SAFMIN, ANORM ) + BSCALE = ONE / MAX( SAFMIN, BNORM ) +* +* Set Eigenvalues IHI+1:N +* + DO 30 J = IHI + 1, N + IF( B( J, J ).LT.ZERO ) THEN + IF( ILSCHR ) THEN + DO 10 JR = 1, J + A( JR, J ) = -A( JR, J ) + B( JR, J ) = -B( JR, J ) + 10 CONTINUE + ELSE + A( J, J ) = -A( J, J ) + B( J, J ) = -B( J, J ) + END IF + IF( ILZ ) THEN + DO 20 JR = 1, N + Z( JR, J ) = -Z( JR, J ) + 20 CONTINUE + END IF + END IF + ALPHAR( J ) = A( J, J ) + ALPHAI( J ) = ZERO + BETA( J ) = B( J, J ) + 30 CONTINUE +* +* If IHI < ILO, skip QZ steps +* + IF( IHI.LT.ILO ) + $ GO TO 380 +* +* MAIN QZ ITERATION LOOP +* +* Initialize dynamic indices +* +* Eigenvalues ILAST+1:N have been found. +* Column operations modify rows IFRSTM:whatever. +* Row operations modify columns whatever:ILASTM. +* +* If only eigenvalues are being computed, then +* IFRSTM is the row of the last splitting row above row ILAST; +* this is always at least ILO. +* IITER counts iterations since the last eigenvalue was found, +* to tell when to use an extraordinary shift. +* MAXIT is the maximum number of QZ sweeps allowed. +* + ILAST = IHI + IF( ILSCHR ) THEN + IFRSTM = 1 + ILASTM = N + ELSE + IFRSTM = ILO + ILASTM = IHI + END IF + IITER = 0 + ESHIFT = ZERO + MAXIT = 30*( IHI-ILO+1 ) +* + DO 360 JITER = 1, MAXIT +* +* Split the matrix if possible. +* +* Two tests: +* 1: A(j,j-1)=0 or j=ILO +* 2: B(j,j)=0 +* + IF( ILAST.EQ.ILO ) THEN +* +* Special case: j=ILAST +* + GO TO 80 + ELSE + IF( ABS( A( ILAST, ILAST-1 ) ).LE.ATOL ) THEN + A( ILAST, ILAST-1 ) = ZERO + GO TO 80 + END IF + END IF +* + IF( ABS( B( ILAST, ILAST ) ).LE.BTOL ) THEN + B( ILAST, ILAST ) = ZERO + GO TO 70 + END IF +* +* General case: j unfl ) +* __ +* (sA - wB) ( CZ -SZ ) +* ( SZ CZ ) +* + C11R = S1*A11 - WR*B11 + C11I = -WI*B11 + C12 = S1*A12 + C21 = S1*A21 + C22R = S1*A22 - WR*B22 + C22I = -WI*B22 +* + IF( ABS( C11R )+ABS( C11I )+ABS( C12 ).GT.ABS( C21 )+ + $ ABS( C22R )+ABS( C22I ) ) THEN + T = SLAPY3( C12, C11R, C11I ) + CZ = C12 / T + SZR = -C11R / T + SZI = -C11I / T + ELSE + CZ = SLAPY2( C22R, C22I ) + IF( CZ.LE.SAFMIN ) THEN + CZ = ZERO + SZR = ONE + SZI = ZERO + ELSE + TEMPR = C22R / CZ + TEMPI = C22I / CZ + T = SLAPY2( CZ, C21 ) + CZ = CZ / T + SZR = -C21*TEMPR / T + SZI = C21*TEMPI / T + END IF + END IF +* +* Compute Givens rotation on left +* +* ( CQ SQ ) +* ( __ ) A or B +* ( -SQ CQ ) +* + AN = ABS( A11 ) + ABS( A12 ) + ABS( A21 ) + ABS( A22 ) + BN = ABS( B11 ) + ABS( B22 ) + WABS = ABS( WR ) + ABS( WI ) + IF( S1*AN.GT.WABS*BN ) THEN + CQ = CZ*B11 + SQR = SZR*B22 + SQI = -SZI*B22 + ELSE + A1R = CZ*A11 + SZR*A12 + A1I = SZI*A12 + A2R = CZ*A21 + SZR*A22 + A2I = SZI*A22 + CQ = SLAPY2( A1R, A1I ) + IF( CQ.LE.SAFMIN ) THEN + CQ = ZERO + SQR = ONE + SQI = ZERO + ELSE + TEMPR = A1R / CQ + TEMPI = A1I / CQ + SQR = TEMPR*A2R + TEMPI*A2I + SQI = TEMPI*A2R - TEMPR*A2I + END IF + END IF + T = SLAPY3( CQ, SQR, SQI ) + CQ = CQ / T + SQR = SQR / T + SQI = SQI / T +* +* Compute diagonal elements of QBZ +* + TEMPR = SQR*SZR - SQI*SZI + TEMPI = SQR*SZI + SQI*SZR + B1R = CQ*CZ*B11 + TEMPR*B22 + B1I = TEMPI*B22 + B1A = SLAPY2( B1R, B1I ) + B2R = CQ*CZ*B22 + TEMPR*B11 + B2I = -TEMPI*B11 + B2A = SLAPY2( B2R, B2I ) +* +* Normalize so beta > 0, and Im( alpha1 ) > 0 +* + BETA( ILAST-1 ) = B1A + BETA( ILAST ) = B2A + ALPHAR( ILAST-1 ) = ( WR*B1A )*S1INV + ALPHAI( ILAST-1 ) = ( WI*B1A )*S1INV + ALPHAR( ILAST ) = ( WR*B2A )*S1INV + ALPHAI( ILAST ) = -( WI*B2A )*S1INV +* +* Step 3: Go to next block -- exit if finished. +* + ILAST = IFIRST - 1 + IF( ILAST.LT.ILO ) + $ GO TO 380 +* +* Reset counters +* + IITER = 0 + ESHIFT = ZERO + IF( .NOT.ILSCHR ) THEN + ILASTM = ILAST + IF( IFRSTM.GT.ILAST ) + $ IFRSTM = ILO + END IF + GO TO 350 + ELSE +* +* Usual case: 3x3 or larger block, using Francis implicit +* double-shift +* +* 2 +* Eigenvalue equation is w - c w + d = 0, +* +* -1 2 -1 +* so compute 1st column of (A B ) - c A B + d +* using the formula in QZIT (from EISPACK) +* +* We assume that the block is at least 3x3 +* + AD11 = ( ASCALE*A( ILAST-1, ILAST-1 ) ) / + $ ( BSCALE*B( ILAST-1, ILAST-1 ) ) + AD21 = ( ASCALE*A( ILAST, ILAST-1 ) ) / + $ ( BSCALE*B( ILAST-1, ILAST-1 ) ) + AD12 = ( ASCALE*A( ILAST-1, ILAST ) ) / + $ ( BSCALE*B( ILAST, ILAST ) ) + AD22 = ( ASCALE*A( ILAST, ILAST ) ) / + $ ( BSCALE*B( ILAST, ILAST ) ) + U12 = B( ILAST-1, ILAST ) / B( ILAST, ILAST ) + AD11L = ( ASCALE*A( IFIRST, IFIRST ) ) / + $ ( BSCALE*B( IFIRST, IFIRST ) ) + AD21L = ( ASCALE*A( IFIRST+1, IFIRST ) ) / + $ ( BSCALE*B( IFIRST, IFIRST ) ) + AD12L = ( ASCALE*A( IFIRST, IFIRST+1 ) ) / + $ ( BSCALE*B( IFIRST+1, IFIRST+1 ) ) + AD22L = ( ASCALE*A( IFIRST+1, IFIRST+1 ) ) / + $ ( BSCALE*B( IFIRST+1, IFIRST+1 ) ) + AD32L = ( ASCALE*A( IFIRST+2, IFIRST+1 ) ) / + $ ( BSCALE*B( IFIRST+1, IFIRST+1 ) ) + U12L = B( IFIRST, IFIRST+1 ) / B( IFIRST+1, IFIRST+1 ) +* + V( 1 ) = ( AD11-AD11L )*( AD22-AD11L ) - AD12*AD21 + + $ AD21*U12*AD11L + ( AD12L-AD11L*U12L )*AD21L + V( 2 ) = ( ( AD22L-AD11L )-AD21L*U12L-( AD11-AD11L )- + $ ( AD22-AD11L )+AD21*U12 )*AD21L + V( 3 ) = AD32L*AD21L +* + ISTART = IFIRST +* + CALL SLARFG( 3, V( 1 ), V( 2 ), 1, TAU ) + V( 1 ) = ONE +* +* Sweep +* + DO 290 J = ISTART, ILAST - 2 +* +* All but last elements: use 3x3 Householder transforms. +* +* Zero (j-1)st column of A +* + IF( J.GT.ISTART ) THEN + V( 1 ) = A( J, J-1 ) + V( 2 ) = A( J+1, J-1 ) + V( 3 ) = A( J+2, J-1 ) +* + CALL SLARFG( 3, A( J, J-1 ), V( 2 ), 1, TAU ) + V( 1 ) = ONE + A( J+1, J-1 ) = ZERO + A( J+2, J-1 ) = ZERO + END IF +* + DO 230 JC = J, ILASTM + TEMP = TAU*( A( J, JC )+V( 2 )*A( J+1, JC )+V( 3 )* + $ A( J+2, JC ) ) + A( J, JC ) = A( J, JC ) - TEMP + A( J+1, JC ) = A( J+1, JC ) - TEMP*V( 2 ) + A( J+2, JC ) = A( J+2, JC ) - TEMP*V( 3 ) + TEMP2 = TAU*( B( J, JC )+V( 2 )*B( J+1, JC )+V( 3 )* + $ B( J+2, JC ) ) + B( J, JC ) = B( J, JC ) - TEMP2 + B( J+1, JC ) = B( J+1, JC ) - TEMP2*V( 2 ) + B( J+2, JC ) = B( J+2, JC ) - TEMP2*V( 3 ) + 230 CONTINUE + IF( ILQ ) THEN + DO 240 JR = 1, N + TEMP = TAU*( Q( JR, J )+V( 2 )*Q( JR, J+1 )+V( 3 )* + $ Q( JR, J+2 ) ) + Q( JR, J ) = Q( JR, J ) - TEMP + Q( JR, J+1 ) = Q( JR, J+1 ) - TEMP*V( 2 ) + Q( JR, J+2 ) = Q( JR, J+2 ) - TEMP*V( 3 ) + 240 CONTINUE + END IF +* +* Zero j-th column of B (see SLAGBC for details) +* +* Swap rows to pivot +* + ILPIVT = .FALSE. + TEMP = MAX( ABS( B( J+1, J+1 ) ), ABS( B( J+1, J+2 ) ) ) + TEMP2 = MAX( ABS( B( J+2, J+1 ) ), ABS( B( J+2, J+2 ) ) ) + IF( MAX( TEMP, TEMP2 ).LT.SAFMIN ) THEN + SCALE = ZERO + U1 = ONE + U2 = ZERO + GO TO 250 + ELSE IF( TEMP.GE.TEMP2 ) THEN + W11 = B( J+1, J+1 ) + W21 = B( J+2, J+1 ) + W12 = B( J+1, J+2 ) + W22 = B( J+2, J+2 ) + U1 = B( J+1, J ) + U2 = B( J+2, J ) + ELSE + W21 = B( J+1, J+1 ) + W11 = B( J+2, J+1 ) + W22 = B( J+1, J+2 ) + W12 = B( J+2, J+2 ) + U2 = B( J+1, J ) + U1 = B( J+2, J ) + END IF +* +* Swap columns if nec. +* + IF( ABS( W12 ).GT.ABS( W11 ) ) THEN + ILPIVT = .TRUE. + TEMP = W12 + TEMP2 = W22 + W12 = W11 + W22 = W21 + W11 = TEMP + W21 = TEMP2 + END IF +* +* LU-factor +* + TEMP = W21 / W11 + U2 = U2 - TEMP*U1 + W22 = W22 - TEMP*W12 + W21 = ZERO +* +* Compute SCALE +* + SCALE = ONE + IF( ABS( W22 ).LT.SAFMIN ) THEN + SCALE = ZERO + U2 = ONE + U1 = -W12 / W11 + GO TO 250 + END IF + IF( ABS( W22 ).LT.ABS( U2 ) ) + $ SCALE = ABS( W22 / U2 ) + IF( ABS( W11 ).LT.ABS( U1 ) ) + $ SCALE = MIN( SCALE, ABS( W11 / U1 ) ) +* +* Solve +* + U2 = ( SCALE*U2 ) / W22 + U1 = ( SCALE*U1-W12*U2 ) / W11 +* + 250 CONTINUE + IF( ILPIVT ) THEN + TEMP = U2 + U2 = U1 + U1 = TEMP + END IF +* +* Compute Householder Vector +* + T = SQRT( SCALE**2+U1**2+U2**2 ) + TAU = ONE + SCALE / T + VS = -ONE / ( SCALE+T ) + V( 1 ) = ONE + V( 2 ) = VS*U1 + V( 3 ) = VS*U2 +* +* Apply transformations from the right. +* + DO 260 JR = IFRSTM, MIN( J+3, ILAST ) + TEMP = TAU*( A( JR, J )+V( 2 )*A( JR, J+1 )+V( 3 )* + $ A( JR, J+2 ) ) + A( JR, J ) = A( JR, J ) - TEMP + A( JR, J+1 ) = A( JR, J+1 ) - TEMP*V( 2 ) + A( JR, J+2 ) = A( JR, J+2 ) - TEMP*V( 3 ) + 260 CONTINUE + DO 270 JR = IFRSTM, J + 2 + TEMP = TAU*( B( JR, J )+V( 2 )*B( JR, J+1 )+V( 3 )* + $ B( JR, J+2 ) ) + B( JR, J ) = B( JR, J ) - TEMP + B( JR, J+1 ) = B( JR, J+1 ) - TEMP*V( 2 ) + B( JR, J+2 ) = B( JR, J+2 ) - TEMP*V( 3 ) + 270 CONTINUE + IF( ILZ ) THEN + DO 280 JR = 1, N + TEMP = TAU*( Z( JR, J )+V( 2 )*Z( JR, J+1 )+V( 3 )* + $ Z( JR, J+2 ) ) + Z( JR, J ) = Z( JR, J ) - TEMP + Z( JR, J+1 ) = Z( JR, J+1 ) - TEMP*V( 2 ) + Z( JR, J+2 ) = Z( JR, J+2 ) - TEMP*V( 3 ) + 280 CONTINUE + END IF + B( J+1, J ) = ZERO + B( J+2, J ) = ZERO + 290 CONTINUE +* +* Last elements: Use Givens rotations +* +* Rotations from the left +* + J = ILAST - 1 + TEMP = A( J, J-1 ) + CALL SLARTG( TEMP, A( J+1, J-1 ), C, S, A( J, J-1 ) ) + A( J+1, J-1 ) = ZERO +* + DO 300 JC = J, ILASTM + TEMP = C*A( J, JC ) + S*A( J+1, JC ) + A( J+1, JC ) = -S*A( J, JC ) + C*A( J+1, JC ) + A( J, JC ) = TEMP + TEMP2 = C*B( J, JC ) + S*B( J+1, JC ) + B( J+1, JC ) = -S*B( J, JC ) + C*B( J+1, JC ) + B( J, JC ) = TEMP2 + 300 CONTINUE + IF( ILQ ) THEN + DO 310 JR = 1, N + TEMP = C*Q( JR, J ) + S*Q( JR, J+1 ) + Q( JR, J+1 ) = -S*Q( JR, J ) + C*Q( JR, J+1 ) + Q( JR, J ) = TEMP + 310 CONTINUE + END IF +* +* Rotations from the right. +* + TEMP = B( J+1, J+1 ) + CALL SLARTG( TEMP, B( J+1, J ), C, S, B( J+1, J+1 ) ) + B( J+1, J ) = ZERO +* + DO 320 JR = IFRSTM, ILAST + TEMP = C*A( JR, J+1 ) + S*A( JR, J ) + A( JR, J ) = -S*A( JR, J+1 ) + C*A( JR, J ) + A( JR, J+1 ) = TEMP + 320 CONTINUE + DO 330 JR = IFRSTM, ILAST - 1 + TEMP = C*B( JR, J+1 ) + S*B( JR, J ) + B( JR, J ) = -S*B( JR, J+1 ) + C*B( JR, J ) + B( JR, J+1 ) = TEMP + 330 CONTINUE + IF( ILZ ) THEN + DO 340 JR = 1, N + TEMP = C*Z( JR, J+1 ) + S*Z( JR, J ) + Z( JR, J ) = -S*Z( JR, J+1 ) + C*Z( JR, J ) + Z( JR, J+1 ) = TEMP + 340 CONTINUE + END IF +* +* End of Double-Shift code +* + END IF +* + GO TO 350 +* +* End of iteration loop +* + 350 CONTINUE + 360 CONTINUE +* +* Drop-through = non-convergence +* + 370 CONTINUE + INFO = ILAST + GO TO 420 +* +* Successful completion of all QZ steps +* + 380 CONTINUE +* +* Set Eigenvalues 1:ILO-1 +* + DO 410 J = 1, ILO - 1 + IF( B( J, J ).LT.ZERO ) THEN + IF( ILSCHR ) THEN + DO 390 JR = 1, J + A( JR, J ) = -A( JR, J ) + B( JR, J ) = -B( JR, J ) + 390 CONTINUE + ELSE + A( J, J ) = -A( J, J ) + B( J, J ) = -B( J, J ) + END IF + IF( ILZ ) THEN + DO 400 JR = 1, N + Z( JR, J ) = -Z( JR, J ) + 400 CONTINUE + END IF + END IF + ALPHAR( J ) = A( J, J ) + ALPHAI( J ) = ZERO + BETA( J ) = B( J, J ) + 410 CONTINUE +* +* Normal Termination +* + INFO = 0 +* +* Exit (other than argument error) -- return optimal workspace size +* + 420 CONTINUE + WORK( 1 ) = REAL( N ) + RETURN +* +* End of SHGEQZ +* + END diff --git a/costa/native/external/lapack/shsein.f b/costa/native/external/lapack/shsein.f new file mode 100644 index 000000000..2f57f5b2b --- /dev/null +++ b/costa/native/external/lapack/shsein.f @@ -0,0 +1,412 @@ + SUBROUTINE SHSEIN( SIDE, EIGSRC, INITV, SELECT, N, H, LDH, WR, WI, + $ VL, LDVL, VR, LDVR, MM, M, WORK, IFAILL, + $ IFAILR, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER EIGSRC, INITV, SIDE + INTEGER INFO, LDH, LDVL, LDVR, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + INTEGER IFAILL( * ), IFAILR( * ) + REAL H( LDH, * ), VL( LDVL, * ), VR( LDVR, * ), + $ WI( * ), WORK( * ), WR( * ) +* .. +* +* Purpose +* ======= +* +* SHSEIN uses inverse iteration to find specified right and/or left +* eigenvectors of a real upper Hessenberg matrix H. +* +* The right eigenvector x and the left eigenvector y of the matrix H +* corresponding to an eigenvalue w are defined by: +* +* H * x = w * x, y**h * H = w * y**h +* +* where y**h denotes the conjugate transpose of the vector y. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'R': compute right eigenvectors only; +* = 'L': compute left eigenvectors only; +* = 'B': compute both right and left eigenvectors. +* +* EIGSRC (input) CHARACTER*1 +* Specifies the source of eigenvalues supplied in (WR,WI): +* = 'Q': the eigenvalues were found using SHSEQR; thus, if +* H has zero subdiagonal elements, and so is +* block-triangular, then the j-th eigenvalue can be +* assumed to be an eigenvalue of the block containing +* the j-th row/column. This property allows SHSEIN to +* perform inverse iteration on just one diagonal block. +* = 'N': no assumptions are made on the correspondence +* between eigenvalues and diagonal blocks. In this +* case, SHSEIN must always perform inverse iteration +* using the whole matrix H. +* +* INITV (input) CHARACTER*1 +* = 'N': no initial vectors are supplied; +* = 'U': user-supplied initial vectors are stored in the arrays +* VL and/or VR. +* +* SELECT (input/output) LOGICAL array, dimension (N) +* Specifies the eigenvectors to be computed. To select the +* real eigenvector corresponding to a real eigenvalue WR(j), +* SELECT(j) must be set to .TRUE.. To select the complex +* eigenvector corresponding to a complex eigenvalue +* (WR(j),WI(j)), with complex conjugate (WR(j+1),WI(j+1)), +* either SELECT(j) or SELECT(j+1) or both must be set to +* .TRUE.; then on exit SELECT(j) is .TRUE. and SELECT(j+1) is +* .FALSE.. +* +* N (input) INTEGER +* The order of the matrix H. N >= 0. +* +* H (input) REAL array, dimension (LDH,N) +* The upper Hessenberg matrix H. +* +* LDH (input) INTEGER +* The leading dimension of the array H. LDH >= max(1,N). +* +* WR (input/output) REAL array, dimension (N) +* WI (input) REAL array, dimension (N) +* On entry, the real and imaginary parts of the eigenvalues of +* H; a complex conjugate pair of eigenvalues must be stored in +* consecutive elements of WR and WI. +* On exit, WR may have been altered since close eigenvalues +* are perturbed slightly in searching for independent +* eigenvectors. +* +* VL (input/output) REAL array, dimension (LDVL,MM) +* On entry, if INITV = 'U' and SIDE = 'L' or 'B', VL must +* contain starting vectors for the inverse iteration for the +* left eigenvectors; the starting vector for each eigenvector +* must be in the same column(s) in which the eigenvector will +* be stored. +* On exit, if SIDE = 'L' or 'B', the left eigenvectors +* specified by SELECT will be stored consecutively in the +* columns of VL, in the same order as their eigenvalues. A +* complex eigenvector corresponding to a complex eigenvalue is +* stored in two consecutive columns, the first holding the real +* part and the second the imaginary part. +* If SIDE = 'R', VL is not referenced. +* +* LDVL (input) INTEGER +* The leading dimension of the array VL. +* LDVL >= max(1,N) if SIDE = 'L' or 'B'; LDVL >= 1 otherwise. +* +* VR (input/output) REAL array, dimension (LDVR,MM) +* On entry, if INITV = 'U' and SIDE = 'R' or 'B', VR must +* contain starting vectors for the inverse iteration for the +* right eigenvectors; the starting vector for each eigenvector +* must be in the same column(s) in which the eigenvector will +* be stored. +* On exit, if SIDE = 'R' or 'B', the right eigenvectors +* specified by SELECT will be stored consecutively in the +* columns of VR, in the same order as their eigenvalues. A +* complex eigenvector corresponding to a complex eigenvalue is +* stored in two consecutive columns, the first holding the real +* part and the second the imaginary part. +* If SIDE = 'L', VR is not referenced. +* +* LDVR (input) INTEGER +* The leading dimension of the array VR. +* LDVR >= max(1,N) if SIDE = 'R' or 'B'; LDVR >= 1 otherwise. +* +* MM (input) INTEGER +* The number of columns in the arrays VL and/or VR. MM >= M. +* +* M (output) INTEGER +* The number of columns in the arrays VL and/or VR required to +* store the eigenvectors; each selected real eigenvector +* occupies one column and each selected complex eigenvector +* occupies two columns. +* +* WORK (workspace) REAL array, dimension ((N+2)*N) +* +* IFAILL (output) INTEGER array, dimension (MM) +* If SIDE = 'L' or 'B', IFAILL(i) = j > 0 if the left +* eigenvector in the i-th column of VL (corresponding to the +* eigenvalue w(j)) failed to converge; IFAILL(i) = 0 if the +* eigenvector converged satisfactorily. If the i-th and (i+1)th +* columns of VL hold a complex eigenvector, then IFAILL(i) and +* IFAILL(i+1) are set to the same value. +* If SIDE = 'R', IFAILL is not referenced. +* +* IFAILR (output) INTEGER array, dimension (MM) +* If SIDE = 'R' or 'B', IFAILR(i) = j > 0 if the right +* eigenvector in the i-th column of VR (corresponding to the +* eigenvalue w(j)) failed to converge; IFAILR(i) = 0 if the +* eigenvector converged satisfactorily. If the i-th and (i+1)th +* columns of VR hold a complex eigenvector, then IFAILR(i) and +* IFAILR(i+1) are set to the same value. +* If SIDE = 'L', IFAILR is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, i is the number of eigenvectors which +* failed to converge; see IFAILL and IFAILR for further +* details. +* +* Further Details +* =============== +* +* Each eigenvector is normalized so that the element of largest +* magnitude has magnitude 1; here the magnitude of a complex number +* (x,y) is taken to be |x|+|y|. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL BOTHV, FROMQR, LEFTV, NOINIT, PAIR, RIGHTV + INTEGER I, IINFO, K, KL, KLN, KR, KSI, KSR, LDWORK + REAL BIGNUM, EPS3, HNORM, SMLNUM, ULP, UNFL, WKI, + $ WKR +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANHS + EXTERNAL LSAME, SLAMCH, SLANHS +* .. +* .. External Subroutines .. + EXTERNAL SLAEIN, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters. +* + BOTHV = LSAME( SIDE, 'B' ) + RIGHTV = LSAME( SIDE, 'R' ) .OR. BOTHV + LEFTV = LSAME( SIDE, 'L' ) .OR. BOTHV +* + FROMQR = LSAME( EIGSRC, 'Q' ) +* + NOINIT = LSAME( INITV, 'N' ) +* +* Set M to the number of columns required to store the selected +* eigenvectors, and standardize the array SELECT. +* + M = 0 + PAIR = .FALSE. + DO 10 K = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + SELECT( K ) = .FALSE. + ELSE + IF( WI( K ).EQ.ZERO ) THEN + IF( SELECT( K ) ) + $ M = M + 1 + ELSE + PAIR = .TRUE. + IF( SELECT( K ) .OR. SELECT( K+1 ) ) THEN + SELECT( K ) = .TRUE. + M = M + 2 + END IF + END IF + END IF + 10 CONTINUE +* + INFO = 0 + IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -1 + ELSE IF( .NOT.FROMQR .AND. .NOT.LSAME( EIGSRC, 'N' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOINIT .AND. .NOT.LSAME( INITV, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDH.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVL.LT.1 .OR. ( LEFTV .AND. LDVL.LT.N ) ) THEN + INFO = -11 + ELSE IF( LDVR.LT.1 .OR. ( RIGHTV .AND. LDVR.LT.N ) ) THEN + INFO = -13 + ELSE IF( MM.LT.M ) THEN + INFO = -14 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SHSEIN', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* +* Set machine-dependent constants. +* + UNFL = SLAMCH( 'Safe minimum' ) + ULP = SLAMCH( 'Precision' ) + SMLNUM = UNFL*( N / ULP ) + BIGNUM = ( ONE-ULP ) / SMLNUM +* + LDWORK = N + 1 +* + KL = 1 + KLN = 0 + IF( FROMQR ) THEN + KR = 0 + ELSE + KR = N + END IF + KSR = 1 +* + DO 120 K = 1, N + IF( SELECT( K ) ) THEN +* +* Compute eigenvector(s) corresponding to W(K). +* + IF( FROMQR ) THEN +* +* If affiliation of eigenvalues is known, check whether +* the matrix splits. +* +* Determine KL and KR such that 1 <= KL <= K <= KR <= N +* and H(KL,KL-1) and H(KR+1,KR) are zero (or KL = 1 or +* KR = N). +* +* Then inverse iteration can be performed with the +* submatrix H(KL:N,KL:N) for a left eigenvector, and with +* the submatrix H(1:KR,1:KR) for a right eigenvector. +* + DO 20 I = K, KL + 1, -1 + IF( H( I, I-1 ).EQ.ZERO ) + $ GO TO 30 + 20 CONTINUE + 30 CONTINUE + KL = I + IF( K.GT.KR ) THEN + DO 40 I = K, N - 1 + IF( H( I+1, I ).EQ.ZERO ) + $ GO TO 50 + 40 CONTINUE + 50 CONTINUE + KR = I + END IF + END IF +* + IF( KL.NE.KLN ) THEN + KLN = KL +* +* Compute infinity-norm of submatrix H(KL:KR,KL:KR) if it +* has not ben computed before. +* + HNORM = SLANHS( 'I', KR-KL+1, H( KL, KL ), LDH, WORK ) + IF( HNORM.GT.ZERO ) THEN + EPS3 = HNORM*ULP + ELSE + EPS3 = SMLNUM + END IF + END IF +* +* Perturb eigenvalue if it is close to any previous +* selected eigenvalues affiliated to the submatrix +* H(KL:KR,KL:KR). Close roots are modified by EPS3. +* + WKR = WR( K ) + WKI = WI( K ) + 60 CONTINUE + DO 70 I = K - 1, KL, -1 + IF( SELECT( I ) .AND. ABS( WR( I )-WKR )+ + $ ABS( WI( I )-WKI ).LT.EPS3 ) THEN + WKR = WKR + EPS3 + GO TO 60 + END IF + 70 CONTINUE + WR( K ) = WKR +* + PAIR = WKI.NE.ZERO + IF( PAIR ) THEN + KSI = KSR + 1 + ELSE + KSI = KSR + END IF + IF( LEFTV ) THEN +* +* Compute left eigenvector. +* + CALL SLAEIN( .FALSE., NOINIT, N-KL+1, H( KL, KL ), LDH, + $ WKR, WKI, VL( KL, KSR ), VL( KL, KSI ), + $ WORK, LDWORK, WORK( N*N+N+1 ), EPS3, SMLNUM, + $ BIGNUM, IINFO ) + IF( IINFO.GT.0 ) THEN + IF( PAIR ) THEN + INFO = INFO + 2 + ELSE + INFO = INFO + 1 + END IF + IFAILL( KSR ) = K + IFAILL( KSI ) = K + ELSE + IFAILL( KSR ) = 0 + IFAILL( KSI ) = 0 + END IF + DO 80 I = 1, KL - 1 + VL( I, KSR ) = ZERO + 80 CONTINUE + IF( PAIR ) THEN + DO 90 I = 1, KL - 1 + VL( I, KSI ) = ZERO + 90 CONTINUE + END IF + END IF + IF( RIGHTV ) THEN +* +* Compute right eigenvector. +* + CALL SLAEIN( .TRUE., NOINIT, KR, H, LDH, WKR, WKI, + $ VR( 1, KSR ), VR( 1, KSI ), WORK, LDWORK, + $ WORK( N*N+N+1 ), EPS3, SMLNUM, BIGNUM, + $ IINFO ) + IF( IINFO.GT.0 ) THEN + IF( PAIR ) THEN + INFO = INFO + 2 + ELSE + INFO = INFO + 1 + END IF + IFAILR( KSR ) = K + IFAILR( KSI ) = K + ELSE + IFAILR( KSR ) = 0 + IFAILR( KSI ) = 0 + END IF + DO 100 I = KR + 1, N + VR( I, KSR ) = ZERO + 100 CONTINUE + IF( PAIR ) THEN + DO 110 I = KR + 1, N + VR( I, KSI ) = ZERO + 110 CONTINUE + END IF + END IF +* + IF( PAIR ) THEN + KSR = KSR + 2 + ELSE + KSR = KSR + 1 + END IF + END IF + 120 CONTINUE +* + RETURN +* +* End of SHSEIN +* + END diff --git a/costa/native/external/lapack/shseqr.f b/costa/native/external/lapack/shseqr.f new file mode 100644 index 000000000..00c08ab39 --- /dev/null +++ b/costa/native/external/lapack/shseqr.f @@ -0,0 +1,467 @@ + SUBROUTINE SHSEQR( JOB, COMPZ, N, ILO, IHI, H, LDH, WR, WI, Z, + $ LDZ, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER COMPZ, JOB + INTEGER IHI, ILO, INFO, LDH, LDZ, LWORK, N +* .. +* .. Array Arguments .. + REAL H( LDH, * ), WI( * ), WORK( * ), WR( * ), + $ Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* SHSEQR computes the eigenvalues of a real upper Hessenberg matrix H +* and, optionally, the matrices T and Z from the Schur decomposition +* H = Z T Z**T, where T is an upper quasi-triangular matrix (the Schur +* form), and Z is the orthogonal matrix of Schur vectors. +* +* Optionally Z may be postmultiplied into an input orthogonal matrix Q, +* so that this routine can give the Schur factorization of a matrix A +* which has been reduced to the Hessenberg form H by the orthogonal +* matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* = 'E': compute eigenvalues only; +* = 'S': compute eigenvalues and the Schur form T. +* +* COMPZ (input) CHARACTER*1 +* = 'N': no Schur vectors are computed; +* = 'I': Z is initialized to the unit matrix and the matrix Z +* of Schur vectors of H is returned; +* = 'V': Z must contain an orthogonal matrix Q on entry, and +* the product Q*Z is returned. +* +* N (input) INTEGER +* The order of the matrix H. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* It is assumed that H is already upper triangular in rows +* and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally +* set by a previous call to SGEBAL, and then passed to SGEHRD +* when the matrix output by SGEBAL is reduced to Hessenberg +* form. Otherwise ILO and IHI should be set to 1 and N +* respectively. +* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +* +* H (input/output) REAL array, dimension (LDH,N) +* On entry, the upper Hessenberg matrix H. +* On exit, if JOB = 'S', H contains the upper quasi-triangular +* matrix T from the Schur decomposition (the Schur form); +* 2-by-2 diagonal blocks (corresponding to complex conjugate +* pairs of eigenvalues) are returned in standard form, with +* H(i,i) = H(i+1,i+1) and H(i+1,i)*H(i,i+1) < 0. If JOB = 'E', +* the contents of H are unspecified on exit. +* +* LDH (input) INTEGER +* The leading dimension of the array H. LDH >= max(1,N). +* +* WR (output) REAL array, dimension (N) +* WI (output) REAL array, dimension (N) +* The real and imaginary parts, respectively, of the computed +* eigenvalues. If two eigenvalues are computed as a complex +* conjugate pair, they are stored in consecutive elements of +* WR and WI, say the i-th and (i+1)th, with WI(i) > 0 and +* WI(i+1) < 0. If JOB = 'S', the eigenvalues are stored in the +* same order as on the diagonal of the Schur form returned in +* H, with WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 +* diagonal block, WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and +* WI(i+1) = -WI(i). +* +* Z (input/output) REAL array, dimension (LDZ,N) +* If COMPZ = 'N': Z is not referenced. +* If COMPZ = 'I': on entry, Z need not be set, and on exit, Z +* contains the orthogonal matrix Z of the Schur vectors of H. +* If COMPZ = 'V': on entry Z must contain an N-by-N matrix Q, +* which is assumed to be equal to the unit matrix except for +* the submatrix Z(ILO:IHI,ILO:IHI); on exit Z contains Q*Z. +* Normally Q is the orthogonal matrix generated by SORGHR after +* the call to SGEHRD which formed the Hessenberg matrix H. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. +* LDZ >= max(1,N) if COMPZ = 'I' or 'V'; LDZ >= 1 otherwise. +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N). +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, SHSEQR failed to compute all of the +* eigenvalues in a total of 30*(IHI-ILO+1) iterations; +* elements 1:ilo-1 and i+1:n of WR and WI contain those +* eigenvalues which have been successfully computed. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0 ) + REAL CONST + PARAMETER ( CONST = 1.5E+0 ) + INTEGER NSMAX, LDS + PARAMETER ( NSMAX = 15, LDS = NSMAX ) +* .. +* .. Local Scalars .. + LOGICAL INITZ, LQUERY, WANTT, WANTZ + INTEGER I, I1, I2, IERR, II, ITEMP, ITN, ITS, J, K, L, + $ MAXB, NH, NR, NS, NV + REAL ABSW, OVFL, SMLNUM, TAU, TEMP, TST1, ULP, UNFL +* .. +* .. Local Arrays .. + REAL S( LDS, NSMAX ), V( NSMAX+1 ), VV( NSMAX+1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV, ISAMAX + REAL SLAMCH, SLANHS, SLAPY2 + EXTERNAL LSAME, ILAENV, ISAMAX, SLAMCH, SLANHS, SLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMV, SLABAD, SLACPY, SLAHQR, SLARFG, + $ SLARFX, SLASET, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + WANTT = LSAME( JOB, 'S' ) + INITZ = LSAME( COMPZ, 'I' ) + WANTZ = INITZ .OR. LSAME( COMPZ, 'V' ) +* + INFO = 0 + WORK( 1 ) = MAX( 1, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( JOB, 'E' ) .AND. .NOT.WANTT ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( COMPZ, 'N' ) .AND. .NOT.WANTZ ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -5 + ELSE IF( LDH.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDZ.LT.1 .OR. WANTZ .AND. LDZ.LT.MAX( 1, N ) ) THEN + INFO = -11 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SHSEQR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Initialize Z, if necessary +* + IF( INITZ ) + $ CALL SLASET( 'Full', N, N, ZERO, ONE, Z, LDZ ) +* +* Store the eigenvalues isolated by SGEBAL. +* + DO 10 I = 1, ILO - 1 + WR( I ) = H( I, I ) + WI( I ) = ZERO + 10 CONTINUE + DO 20 I = IHI + 1, N + WR( I ) = H( I, I ) + WI( I ) = ZERO + 20 CONTINUE +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN + IF( ILO.EQ.IHI ) THEN + WR( ILO ) = H( ILO, ILO ) + WI( ILO ) = ZERO + RETURN + END IF +* +* Set rows and columns ILO to IHI to zero below the first +* subdiagonal. +* + DO 40 J = ILO, IHI - 2 + DO 30 I = J + 2, N + H( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + NH = IHI - ILO + 1 +* +* Determine the order of the multi-shift QR algorithm to be used. +* + NS = ILAENV( 4, 'SHSEQR', JOB // COMPZ, N, ILO, IHI, -1 ) + MAXB = ILAENV( 8, 'SHSEQR', JOB // COMPZ, N, ILO, IHI, -1 ) + IF( NS.LE.2 .OR. NS.GT.NH .OR. MAXB.GE.NH ) THEN +* +* Use the standard double-shift algorithm +* + CALL SLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI, ILO, + $ IHI, Z, LDZ, INFO ) + RETURN + END IF + MAXB = MAX( 3, MAXB ) + NS = MIN( NS, MAXB, NSMAX ) +* +* Now 2 < NS <= MAXB < NH. +* +* Set machine-dependent constants for the stopping criterion. +* If norm(H) <= sqrt(OVFL), overflow should not occur. +* + UNFL = SLAMCH( 'Safe minimum' ) + OVFL = ONE / UNFL + CALL SLABAD( UNFL, OVFL ) + ULP = SLAMCH( 'Precision' ) + SMLNUM = UNFL*( NH / ULP ) +* +* I1 and I2 are the indices of the first row and last column of H +* to which transformations must be applied. If eigenvalues only are +* being computed, I1 and I2 are set inside the main loop. +* + IF( WANTT ) THEN + I1 = 1 + I2 = N + END IF +* +* ITN is the total number of multiple-shift QR iterations allowed. +* + ITN = 30*NH +* +* The main loop begins here. I is the loop index and decreases from +* IHI to ILO in steps of at most MAXB. Each iteration of the loop +* works with the active submatrix in rows and columns L to I. +* Eigenvalues I+1 to IHI have already converged. Either L = ILO or +* H(L,L-1) is negligible so that the matrix splits. +* + I = IHI + 50 CONTINUE + L = ILO + IF( I.LT.ILO ) + $ GO TO 170 +* +* Perform multiple-shift QR iterations on rows and columns ILO to I +* until a submatrix of order at most MAXB splits off at the bottom +* because a subdiagonal element has become negligible. +* + DO 150 ITS = 0, ITN +* +* Look for a single small subdiagonal element. +* + DO 60 K = I, L + 1, -1 + TST1 = ABS( H( K-1, K-1 ) ) + ABS( H( K, K ) ) + IF( TST1.EQ.ZERO ) + $ TST1 = SLANHS( '1', I-L+1, H( L, L ), LDH, WORK ) + IF( ABS( H( K, K-1 ) ).LE.MAX( ULP*TST1, SMLNUM ) ) + $ GO TO 70 + 60 CONTINUE + 70 CONTINUE + L = K + IF( L.GT.ILO ) THEN +* +* H(L,L-1) is negligible. +* + H( L, L-1 ) = ZERO + END IF +* +* Exit from loop if a submatrix of order <= MAXB has split off. +* + IF( L.GE.I-MAXB+1 ) + $ GO TO 160 +* +* Now the active submatrix is in rows and columns L to I. If +* eigenvalues only are being computed, only the active submatrix +* need be transformed. +* + IF( .NOT.WANTT ) THEN + I1 = L + I2 = I + END IF +* + IF( ITS.EQ.20 .OR. ITS.EQ.30 ) THEN +* +* Exceptional shifts. +* + DO 80 II = I - NS + 1, I + WR( II ) = CONST*( ABS( H( II, II-1 ) )+ + $ ABS( H( II, II ) ) ) + WI( II ) = ZERO + 80 CONTINUE + ELSE +* +* Use eigenvalues of trailing submatrix of order NS as shifts. +* + CALL SLACPY( 'Full', NS, NS, H( I-NS+1, I-NS+1 ), LDH, S, + $ LDS ) + CALL SLAHQR( .FALSE., .FALSE., NS, 1, NS, S, LDS, + $ WR( I-NS+1 ), WI( I-NS+1 ), 1, NS, Z, LDZ, + $ IERR ) + IF( IERR.GT.0 ) THEN +* +* If SLAHQR failed to compute all NS eigenvalues, use the +* unconverged diagonal elements as the remaining shifts. +* + DO 90 II = 1, IERR + WR( I-NS+II ) = S( II, II ) + WI( I-NS+II ) = ZERO + 90 CONTINUE + END IF + END IF +* +* Form the first column of (G-w(1)) (G-w(2)) . . . (G-w(ns)) +* where G is the Hessenberg submatrix H(L:I,L:I) and w is +* the vector of shifts (stored in WR and WI). The result is +* stored in the local array V. +* + V( 1 ) = ONE + DO 100 II = 2, NS + 1 + V( II ) = ZERO + 100 CONTINUE + NV = 1 + DO 120 J = I - NS + 1, I + IF( WI( J ).GE.ZERO ) THEN + IF( WI( J ).EQ.ZERO ) THEN +* +* real shift +* + CALL SCOPY( NV+1, V, 1, VV, 1 ) + CALL SGEMV( 'No transpose', NV+1, NV, ONE, H( L, L ), + $ LDH, VV, 1, -WR( J ), V, 1 ) + NV = NV + 1 + ELSE IF( WI( J ).GT.ZERO ) THEN +* +* complex conjugate pair of shifts +* + CALL SCOPY( NV+1, V, 1, VV, 1 ) + CALL SGEMV( 'No transpose', NV+1, NV, ONE, H( L, L ), + $ LDH, V, 1, -TWO*WR( J ), VV, 1 ) + ITEMP = ISAMAX( NV+1, VV, 1 ) + TEMP = ONE / MAX( ABS( VV( ITEMP ) ), SMLNUM ) + CALL SSCAL( NV+1, TEMP, VV, 1 ) + ABSW = SLAPY2( WR( J ), WI( J ) ) + TEMP = ( TEMP*ABSW )*ABSW + CALL SGEMV( 'No transpose', NV+2, NV+1, ONE, + $ H( L, L ), LDH, VV, 1, TEMP, V, 1 ) + NV = NV + 2 + END IF +* +* Scale V(1:NV) so that max(abs(V(i))) = 1. If V is zero, +* reset it to the unit vector. +* + ITEMP = ISAMAX( NV, V, 1 ) + TEMP = ABS( V( ITEMP ) ) + IF( TEMP.EQ.ZERO ) THEN + V( 1 ) = ONE + DO 110 II = 2, NV + V( II ) = ZERO + 110 CONTINUE + ELSE + TEMP = MAX( TEMP, SMLNUM ) + CALL SSCAL( NV, ONE / TEMP, V, 1 ) + END IF + END IF + 120 CONTINUE +* +* Multiple-shift QR step +* + DO 140 K = L, I - 1 +* +* The first iteration of this loop determines a reflection G +* from the vector V and applies it from left and right to H, +* thus creating a nonzero bulge below the subdiagonal. +* +* Each subsequent iteration determines a reflection G to +* restore the Hessenberg form in the (K-1)th column, and thus +* chases the bulge one step toward the bottom of the active +* submatrix. NR is the order of G. +* + NR = MIN( NS+1, I-K+1 ) + IF( K.GT.L ) + $ CALL SCOPY( NR, H( K, K-1 ), 1, V, 1 ) + CALL SLARFG( NR, V( 1 ), V( 2 ), 1, TAU ) + IF( K.GT.L ) THEN + H( K, K-1 ) = V( 1 ) + DO 130 II = K + 1, I + H( II, K-1 ) = ZERO + 130 CONTINUE + END IF + V( 1 ) = ONE +* +* Apply G from the left to transform the rows of the matrix in +* columns K to I2. +* + CALL SLARFX( 'Left', NR, I2-K+1, V, TAU, H( K, K ), LDH, + $ WORK ) +* +* Apply G from the right to transform the columns of the +* matrix in rows I1 to min(K+NR,I). +* + CALL SLARFX( 'Right', MIN( K+NR, I )-I1+1, NR, V, TAU, + $ H( I1, K ), LDH, WORK ) +* + IF( WANTZ ) THEN +* +* Accumulate transformations in the matrix Z +* + CALL SLARFX( 'Right', NH, NR, V, TAU, Z( ILO, K ), LDZ, + $ WORK ) + END IF + 140 CONTINUE +* + 150 CONTINUE +* +* Failure to converge in remaining number of iterations +* + INFO = I + RETURN +* + 160 CONTINUE +* +* A submatrix of order <= MAXB in rows and columns L to I has split +* off. Use the double-shift QR algorithm to handle it. +* + CALL SLAHQR( WANTT, WANTZ, N, L, I, H, LDH, WR, WI, ILO, IHI, Z, + $ LDZ, INFO ) + IF( INFO.GT.0 ) + $ RETURN +* +* Decrement number of remaining iterations, and return to start of +* the main loop with a new value of I. +* + ITN = ITN - ITS + I = L - 1 + GO TO 50 +* + 170 CONTINUE + WORK( 1 ) = MAX( 1, N ) + RETURN +* +* End of SHSEQR +* + END diff --git a/costa/native/external/lapack/slabad.f b/costa/native/external/lapack/slabad.f new file mode 100644 index 000000000..f5c8a0bae --- /dev/null +++ b/costa/native/external/lapack/slabad.f @@ -0,0 +1,56 @@ + SUBROUTINE SLABAD( SMALL, LARGE ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + REAL LARGE, SMALL +* .. +* +* Purpose +* ======= +* +* SLABAD takes as input the values computed by SLAMCH for underflow and +* overflow, and returns the square root of each of these values if the +* log of LARGE is sufficiently large. This subroutine is intended to +* identify machines with a large exponent range, such as the Crays, and +* redefine the underflow and overflow limits to be the square roots of +* the values computed by SLAMCH. This subroutine is needed because +* SLAMCH does not compensate for poor arithmetic in the upper half of +* the exponent range, as is found on a Cray. +* +* Arguments +* ========= +* +* SMALL (input/output) REAL +* On entry, the underflow threshold as computed by SLAMCH. +* On exit, if LOG10(LARGE) is sufficiently large, the square +* root of SMALL, otherwise unchanged. +* +* LARGE (input/output) REAL +* On entry, the overflow threshold as computed by SLAMCH. +* On exit, if LOG10(LARGE) is sufficiently large, the square +* root of LARGE, otherwise unchanged. +* +* ===================================================================== +* +* .. Intrinsic Functions .. + INTRINSIC LOG10, SQRT +* .. +* .. Executable Statements .. +* +* If it looks like we're on a Cray, take the square root of +* SMALL and LARGE to avoid overflow and underflow problems. +* + IF( LOG10( LARGE ).GT.2000. ) THEN + SMALL = SQRT( SMALL ) + LARGE = SQRT( LARGE ) + END IF +* + RETURN +* +* End of SLABAD +* + END diff --git a/costa/native/external/lapack/slabrd.f b/costa/native/external/lapack/slabrd.f new file mode 100644 index 000000000..d6da46050 --- /dev/null +++ b/costa/native/external/lapack/slabrd.f @@ -0,0 +1,291 @@ + SUBROUTINE SLABRD( M, N, NB, A, LDA, D, E, TAUQ, TAUP, X, LDX, Y, + $ LDY ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + INTEGER LDA, LDX, LDY, M, N, NB +* .. +* .. Array Arguments .. + REAL A( LDA, * ), D( * ), E( * ), TAUP( * ), + $ TAUQ( * ), X( LDX, * ), Y( LDY, * ) +* .. +* +* Purpose +* ======= +* +* SLABRD reduces the first NB rows and columns of a real general +* m by n matrix A to upper or lower bidiagonal form by an orthogonal +* transformation Q' * A * P, and returns the matrices X and Y which +* are needed to apply the transformation to the unreduced part of A. +* +* If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower +* bidiagonal form. +* +* This is an auxiliary routine called by SGEBRD +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows in the matrix A. +* +* N (input) INTEGER +* The number of columns in the matrix A. +* +* NB (input) INTEGER +* The number of leading rows and columns of A to be reduced. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the m by n general matrix to be reduced. +* On exit, the first NB rows and columns of the matrix are +* overwritten; the rest of the array is unchanged. +* If m >= n, elements on and below the diagonal in the first NB +* columns, with the array TAUQ, represent the orthogonal +* matrix Q as a product of elementary reflectors; and +* elements above the diagonal in the first NB rows, with the +* array TAUP, represent the orthogonal matrix P as a product +* of elementary reflectors. +* If m < n, elements below the diagonal in the first NB +* columns, with the array TAUQ, represent the orthogonal +* matrix Q as a product of elementary reflectors, and +* elements on and above the diagonal in the first NB rows, +* with the array TAUP, represent the orthogonal matrix P as +* a product of elementary reflectors. +* See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* D (output) REAL array, dimension (NB) +* The diagonal elements of the first NB rows and columns of +* the reduced matrix. D(i) = A(i,i). +* +* E (output) REAL array, dimension (NB) +* The off-diagonal elements of the first NB rows and columns of +* the reduced matrix. +* +* TAUQ (output) REAL array dimension (NB) +* The scalar factors of the elementary reflectors which +* represent the orthogonal matrix Q. See Further Details. +* +* TAUP (output) REAL array, dimension (NB) +* The scalar factors of the elementary reflectors which +* represent the orthogonal matrix P. See Further Details. +* +* X (output) REAL array, dimension (LDX,NB) +* The m-by-nb matrix X required to update the unreduced part +* of A. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= M. +* +* Y (output) REAL array, dimension (LDY,NB) +* The n-by-nb matrix Y required to update the unreduced part +* of A. +* +* LDY (output) INTEGER +* The leading dimension of the array Y. LDY >= N. +* +* Further Details +* =============== +* +* The matrices Q and P are represented as products of elementary +* reflectors: +* +* Q = H(1) H(2) . . . H(nb) and P = G(1) G(2) . . . G(nb) +* +* Each H(i) and G(i) has the form: +* +* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' +* +* where tauq and taup are real scalars, and v and u are real vectors. +* +* If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in +* A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in +* A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). +* +* If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in +* A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in +* A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). +* +* The elements of the vectors v and u together form the m-by-nb matrix +* V and the nb-by-n matrix U' which are needed, with X and Y, to apply +* the transformation to the unreduced part of the matrix, using a block +* update of the form: A := A - V*Y' - X*U'. +* +* The contents of A on exit are illustrated by the following examples +* with nb = 2: +* +* m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): +* +* ( 1 1 u1 u1 u1 ) ( 1 u1 u1 u1 u1 u1 ) +* ( v1 1 1 u2 u2 ) ( 1 1 u2 u2 u2 u2 ) +* ( v1 v2 a a a ) ( v1 1 a a a a ) +* ( v1 v2 a a a ) ( v1 v2 a a a a ) +* ( v1 v2 a a a ) ( v1 v2 a a a a ) +* ( v1 v2 a a a ) +* +* where a denotes an element of the original matrix which is unchanged, +* vi denotes an element of the vector defining H(i), and ui an element +* of the vector defining G(i). +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + INTEGER I +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, SLARFG, SSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) + $ RETURN +* + IF( M.GE.N ) THEN +* +* Reduce to upper bidiagonal form +* + DO 10 I = 1, NB +* +* Update A(i:m,i) +* + CALL SGEMV( 'No transpose', M-I+1, I-1, -ONE, A( I, 1 ), + $ LDA, Y( I, 1 ), LDY, ONE, A( I, I ), 1 ) + CALL SGEMV( 'No transpose', M-I+1, I-1, -ONE, X( I, 1 ), + $ LDX, A( 1, I ), 1, ONE, A( I, I ), 1 ) +* +* Generate reflection Q(i) to annihilate A(i+1:m,i) +* + CALL SLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1, + $ TAUQ( I ) ) + D( I ) = A( I, I ) + IF( I.LT.N ) THEN + A( I, I ) = ONE +* +* Compute Y(i+1:n,i) +* + CALL SGEMV( 'Transpose', M-I+1, N-I, ONE, A( I, I+1 ), + $ LDA, A( I, I ), 1, ZERO, Y( I+1, I ), 1 ) + CALL SGEMV( 'Transpose', M-I+1, I-1, ONE, A( I, 1 ), LDA, + $ A( I, I ), 1, ZERO, Y( 1, I ), 1 ) + CALL SGEMV( 'No transpose', N-I, I-1, -ONE, Y( I+1, 1 ), + $ LDY, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 ) + CALL SGEMV( 'Transpose', M-I+1, I-1, ONE, X( I, 1 ), LDX, + $ A( I, I ), 1, ZERO, Y( 1, I ), 1 ) + CALL SGEMV( 'Transpose', I-1, N-I, -ONE, A( 1, I+1 ), + $ LDA, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 ) + CALL SSCAL( N-I, TAUQ( I ), Y( I+1, I ), 1 ) +* +* Update A(i,i+1:n) +* + CALL SGEMV( 'No transpose', N-I, I, -ONE, Y( I+1, 1 ), + $ LDY, A( I, 1 ), LDA, ONE, A( I, I+1 ), LDA ) + CALL SGEMV( 'Transpose', I-1, N-I, -ONE, A( 1, I+1 ), + $ LDA, X( I, 1 ), LDX, ONE, A( I, I+1 ), LDA ) +* +* Generate reflection P(i) to annihilate A(i,i+2:n) +* + CALL SLARFG( N-I, A( I, I+1 ), A( I, MIN( I+2, N ) ), + $ LDA, TAUP( I ) ) + E( I ) = A( I, I+1 ) + A( I, I+1 ) = ONE +* +* Compute X(i+1:m,i) +* + CALL SGEMV( 'No transpose', M-I, N-I, ONE, A( I+1, I+1 ), + $ LDA, A( I, I+1 ), LDA, ZERO, X( I+1, I ), 1 ) + CALL SGEMV( 'Transpose', N-I, I, ONE, Y( I+1, 1 ), LDY, + $ A( I, I+1 ), LDA, ZERO, X( 1, I ), 1 ) + CALL SGEMV( 'No transpose', M-I, I, -ONE, A( I+1, 1 ), + $ LDA, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL SGEMV( 'No transpose', I-1, N-I, ONE, A( 1, I+1 ), + $ LDA, A( I, I+1 ), LDA, ZERO, X( 1, I ), 1 ) + CALL SGEMV( 'No transpose', M-I, I-1, -ONE, X( I+1, 1 ), + $ LDX, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL SSCAL( M-I, TAUP( I ), X( I+1, I ), 1 ) + END IF + 10 CONTINUE + ELSE +* +* Reduce to lower bidiagonal form +* + DO 20 I = 1, NB +* +* Update A(i,i:n) +* + CALL SGEMV( 'No transpose', N-I+1, I-1, -ONE, Y( I, 1 ), + $ LDY, A( I, 1 ), LDA, ONE, A( I, I ), LDA ) + CALL SGEMV( 'Transpose', I-1, N-I+1, -ONE, A( 1, I ), LDA, + $ X( I, 1 ), LDX, ONE, A( I, I ), LDA ) +* +* Generate reflection P(i) to annihilate A(i,i+1:n) +* + CALL SLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA, + $ TAUP( I ) ) + D( I ) = A( I, I ) + IF( I.LT.M ) THEN + A( I, I ) = ONE +* +* Compute X(i+1:m,i) +* + CALL SGEMV( 'No transpose', M-I, N-I+1, ONE, A( I+1, I ), + $ LDA, A( I, I ), LDA, ZERO, X( I+1, I ), 1 ) + CALL SGEMV( 'Transpose', N-I+1, I-1, ONE, Y( I, 1 ), LDY, + $ A( I, I ), LDA, ZERO, X( 1, I ), 1 ) + CALL SGEMV( 'No transpose', M-I, I-1, -ONE, A( I+1, 1 ), + $ LDA, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL SGEMV( 'No transpose', I-1, N-I+1, ONE, A( 1, I ), + $ LDA, A( I, I ), LDA, ZERO, X( 1, I ), 1 ) + CALL SGEMV( 'No transpose', M-I, I-1, -ONE, X( I+1, 1 ), + $ LDX, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL SSCAL( M-I, TAUP( I ), X( I+1, I ), 1 ) +* +* Update A(i+1:m,i) +* + CALL SGEMV( 'No transpose', M-I, I-1, -ONE, A( I+1, 1 ), + $ LDA, Y( I, 1 ), LDY, ONE, A( I+1, I ), 1 ) + CALL SGEMV( 'No transpose', M-I, I, -ONE, X( I+1, 1 ), + $ LDX, A( 1, I ), 1, ONE, A( I+1, I ), 1 ) +* +* Generate reflection Q(i) to annihilate A(i+2:m,i) +* + CALL SLARFG( M-I, A( I+1, I ), A( MIN( I+2, M ), I ), 1, + $ TAUQ( I ) ) + E( I ) = A( I+1, I ) + A( I+1, I ) = ONE +* +* Compute Y(i+1:n,i) +* + CALL SGEMV( 'Transpose', M-I, N-I, ONE, A( I+1, I+1 ), + $ LDA, A( I+1, I ), 1, ZERO, Y( I+1, I ), 1 ) + CALL SGEMV( 'Transpose', M-I, I-1, ONE, A( I+1, 1 ), LDA, + $ A( I+1, I ), 1, ZERO, Y( 1, I ), 1 ) + CALL SGEMV( 'No transpose', N-I, I-1, -ONE, Y( I+1, 1 ), + $ LDY, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 ) + CALL SGEMV( 'Transpose', M-I, I, ONE, X( I+1, 1 ), LDX, + $ A( I+1, I ), 1, ZERO, Y( 1, I ), 1 ) + CALL SGEMV( 'Transpose', I, N-I, -ONE, A( 1, I+1 ), LDA, + $ Y( 1, I ), 1, ONE, Y( I+1, I ), 1 ) + CALL SSCAL( N-I, TAUQ( I ), Y( I+1, I ), 1 ) + END IF + 20 CONTINUE + END IF + RETURN +* +* End of SLABRD +* + END diff --git a/costa/native/external/lapack/slacon.f b/costa/native/external/lapack/slacon.f new file mode 100644 index 000000000..719f87e9c --- /dev/null +++ b/costa/native/external/lapack/slacon.f @@ -0,0 +1,204 @@ + SUBROUTINE SLACON( N, V, X, ISGN, EST, KASE ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + INTEGER KASE, N + REAL EST +* .. +* .. Array Arguments .. + INTEGER ISGN( * ) + REAL V( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* SLACON estimates the 1-norm of a square, real matrix A. +* Reverse communication is used for evaluating matrix-vector products. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix. N >= 1. +* +* V (workspace) REAL array, dimension (N) +* On the final return, V = A*W, where EST = norm(V)/norm(W) +* (W is not returned). +* +* X (input/output) REAL array, dimension (N) +* On an intermediate return, X should be overwritten by +* A * X, if KASE=1, +* A' * X, if KASE=2, +* and SLACON must be re-called with all the other parameters +* unchanged. +* +* ISGN (workspace) INTEGER array, dimension (N) +* +* EST (output) REAL +* An estimate (a lower bound) for norm(A). +* +* KASE (input/output) INTEGER +* On the initial call to SLACON, KASE should be 0. +* On an intermediate return, KASE will be 1 or 2, indicating +* whether X should be overwritten by A * X or A' * X. +* On the final return from SLACON, KASE will again be 0. +* +* Further Details +* ======= ======= +* +* Contributed by Nick Higham, University of Manchester. +* Originally named SONEST, dated March 16, 1988. +* +* Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of +* a real or complex matrix, with applications to condition estimation", +* ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, ITER, J, JLAST, JUMP + REAL ALTSGN, ESTOLD, TEMP +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SASUM + EXTERNAL ISAMAX, SASUM +* .. +* .. External Subroutines .. + EXTERNAL SCOPY +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, NINT, REAL, SIGN +* .. +* .. Save statement .. + SAVE +* .. +* .. Executable Statements .. +* + IF( KASE.EQ.0 ) THEN + DO 10 I = 1, N + X( I ) = ONE / REAL( N ) + 10 CONTINUE + KASE = 1 + JUMP = 1 + RETURN + END IF +* + GO TO ( 20, 40, 70, 110, 140 )JUMP +* +* ................ ENTRY (JUMP = 1) +* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY A*X. +* + 20 CONTINUE + IF( N.EQ.1 ) THEN + V( 1 ) = X( 1 ) + EST = ABS( V( 1 ) ) +* ... QUIT + GO TO 150 + END IF + EST = SASUM( N, X, 1 ) +* + DO 30 I = 1, N + X( I ) = SIGN( ONE, X( I ) ) + ISGN( I ) = NINT( X( I ) ) + 30 CONTINUE + KASE = 2 + JUMP = 2 + RETURN +* +* ................ ENTRY (JUMP = 2) +* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X. +* + 40 CONTINUE + J = ISAMAX( N, X, 1 ) + ITER = 2 +* +* MAIN LOOP - ITERATIONS 2,3,...,ITMAX. +* + 50 CONTINUE + DO 60 I = 1, N + X( I ) = ZERO + 60 CONTINUE + X( J ) = ONE + KASE = 1 + JUMP = 3 + RETURN +* +* ................ ENTRY (JUMP = 3) +* X HAS BEEN OVERWRITTEN BY A*X. +* + 70 CONTINUE + CALL SCOPY( N, X, 1, V, 1 ) + ESTOLD = EST + EST = SASUM( N, V, 1 ) + DO 80 I = 1, N + IF( NINT( SIGN( ONE, X( I ) ) ).NE.ISGN( I ) ) + $ GO TO 90 + 80 CONTINUE +* REPEATED SIGN VECTOR DETECTED, HENCE ALGORITHM HAS CONVERGED. + GO TO 120 +* + 90 CONTINUE +* TEST FOR CYCLING. + IF( EST.LE.ESTOLD ) + $ GO TO 120 +* + DO 100 I = 1, N + X( I ) = SIGN( ONE, X( I ) ) + ISGN( I ) = NINT( X( I ) ) + 100 CONTINUE + KASE = 2 + JUMP = 4 + RETURN +* +* ................ ENTRY (JUMP = 4) +* X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X. +* + 110 CONTINUE + JLAST = J + J = ISAMAX( N, X, 1 ) + IF( ( X( JLAST ).NE.ABS( X( J ) ) ) .AND. ( ITER.LT.ITMAX ) ) THEN + ITER = ITER + 1 + GO TO 50 + END IF +* +* ITERATION COMPLETE. FINAL STAGE. +* + 120 CONTINUE + ALTSGN = ONE + DO 130 I = 1, N + X( I ) = ALTSGN*( ONE+REAL( I-1 ) / REAL( N-1 ) ) + ALTSGN = -ALTSGN + 130 CONTINUE + KASE = 1 + JUMP = 5 + RETURN +* +* ................ ENTRY (JUMP = 5) +* X HAS BEEN OVERWRITTEN BY A*X. +* + 140 CONTINUE + TEMP = TWO*( SASUM( N, X, 1 ) / REAL( 3*N ) ) + IF( TEMP.GT.EST ) THEN + CALL SCOPY( N, X, 1, V, 1 ) + EST = TEMP + END IF +* + 150 CONTINUE + KASE = 0 + RETURN +* +* End of SLACON +* + END diff --git a/costa/native/external/lapack/slacpy.f b/costa/native/external/lapack/slacpy.f new file mode 100644 index 000000000..ac22a9c6e --- /dev/null +++ b/costa/native/external/lapack/slacpy.f @@ -0,0 +1,88 @@ + SUBROUTINE SLACPY( UPLO, M, N, A, LDA, B, LDB ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER LDA, LDB, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* SLACPY copies all or part of a two-dimensional matrix A to another +* matrix B. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies the part of the matrix A to be copied to B. +* = 'U': Upper triangular part +* = 'L': Lower triangular part +* Otherwise: All of the matrix A +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input) REAL array, dimension (LDA,N) +* The m by n matrix A. If UPLO = 'U', only the upper triangle +* or trapezoid is accessed; if UPLO = 'L', only the lower +* triangle or trapezoid is accessed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (output) REAL array, dimension (LDB,N) +* On exit, B = A in the locations specified by UPLO. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,M). +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, MIN( J, M ) + B( I, J ) = A( I, J ) + 10 CONTINUE + 20 CONTINUE + ELSE IF( LSAME( UPLO, 'L' ) ) THEN + DO 40 J = 1, N + DO 30 I = J, M + B( I, J ) = A( I, J ) + 30 CONTINUE + 40 CONTINUE + ELSE + DO 60 J = 1, N + DO 50 I = 1, M + B( I, J ) = A( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF + RETURN +* +* End of SLACPY +* + END diff --git a/costa/native/external/lapack/sladiv.f b/costa/native/external/lapack/sladiv.f new file mode 100644 index 000000000..f6354c842 --- /dev/null +++ b/costa/native/external/lapack/sladiv.f @@ -0,0 +1,63 @@ + SUBROUTINE SLADIV( A, B, C, D, P, Q ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + REAL A, B, C, D, P, Q +* .. +* +* Purpose +* ======= +* +* SLADIV performs complex division in real arithmetic +* +* a + i*b +* p + i*q = --------- +* c + i*d +* +* The algorithm is due to Robert L. Smith and can be found +* in D. Knuth, The art of Computer Programming, Vol.2, p.195 +* +* Arguments +* ========= +* +* A (input) REAL +* B (input) REAL +* C (input) REAL +* D (input) REAL +* The scalars a, b, c, and d in the above expression. +* +* P (output) REAL +* Q (output) REAL +* The scalars p and q in the above expression. +* +* ===================================================================== +* +* .. Local Scalars .. + REAL E, F +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* + IF( ABS( D ).LT.ABS( C ) ) THEN + E = D / C + F = C + D*E + P = ( A+B*E ) / F + Q = ( B-A*E ) / F + ELSE + E = C / D + F = D + C*E + P = ( B+A*E ) / F + Q = ( -A+B*E ) / F + END IF +* + RETURN +* +* End of SLADIV +* + END diff --git a/costa/native/external/lapack/slae2.f b/costa/native/external/lapack/slae2.f new file mode 100644 index 000000000..ddb8ef446 --- /dev/null +++ b/costa/native/external/lapack/slae2.f @@ -0,0 +1,124 @@ + SUBROUTINE SLAE2( A, B, C, RT1, RT2 ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + REAL A, B, C, RT1, RT2 +* .. +* +* Purpose +* ======= +* +* SLAE2 computes the eigenvalues of a 2-by-2 symmetric matrix +* [ A B ] +* [ B C ]. +* On return, RT1 is the eigenvalue of larger absolute value, and RT2 +* is the eigenvalue of smaller absolute value. +* +* Arguments +* ========= +* +* A (input) REAL +* The (1,1) element of the 2-by-2 matrix. +* +* B (input) REAL +* The (1,2) and (2,1) elements of the 2-by-2 matrix. +* +* C (input) REAL +* The (2,2) element of the 2-by-2 matrix. +* +* RT1 (output) REAL +* The eigenvalue of larger absolute value. +* +* RT2 (output) REAL +* The eigenvalue of smaller absolute value. +* +* Further Details +* =============== +* +* RT1 is accurate to a few ulps barring over/underflow. +* +* RT2 may be inaccurate if there is massive cancellation in the +* determinant A*C-B*B; higher precision or correctly rounded or +* correctly truncated arithmetic would be needed to compute RT2 +* accurately in all cases. +* +* Overflow is possible only if RT1 is within a factor of 5 of overflow. +* Underflow is harmless if the input data is 0 or exceeds +* underflow_threshold / macheps. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E0 ) + REAL TWO + PARAMETER ( TWO = 2.0E0 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) + REAL HALF + PARAMETER ( HALF = 0.5E0 ) +* .. +* .. Local Scalars .. + REAL AB, ACMN, ACMX, ADF, DF, RT, SM, TB +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* +* Compute the eigenvalues +* + SM = A + C + DF = A - C + ADF = ABS( DF ) + TB = B + B + AB = ABS( TB ) + IF( ABS( A ).GT.ABS( C ) ) THEN + ACMX = A + ACMN = C + ELSE + ACMX = C + ACMN = A + END IF + IF( ADF.GT.AB ) THEN + RT = ADF*SQRT( ONE+( AB / ADF )**2 ) + ELSE IF( ADF.LT.AB ) THEN + RT = AB*SQRT( ONE+( ADF / AB )**2 ) + ELSE +* +* Includes case AB=ADF=0 +* + RT = AB*SQRT( TWO ) + END IF + IF( SM.LT.ZERO ) THEN + RT1 = HALF*( SM-RT ) +* +* Order of execution important. +* To get fully accurate smaller eigenvalue, +* next line needs to be executed in higher precision. +* + RT2 = ( ACMX / RT1 )*ACMN - ( B / RT1 )*B + ELSE IF( SM.GT.ZERO ) THEN + RT1 = HALF*( SM+RT ) +* +* Order of execution important. +* To get fully accurate smaller eigenvalue, +* next line needs to be executed in higher precision. +* + RT2 = ( ACMX / RT1 )*ACMN - ( B / RT1 )*B + ELSE +* +* Includes case RT1 = RT2 = 0 +* + RT1 = HALF*RT + RT2 = -HALF*RT + END IF + RETURN +* +* End of SLAE2 +* + END diff --git a/costa/native/external/lapack/slaebz.f b/costa/native/external/lapack/slaebz.f new file mode 100644 index 000000000..3617e9385 --- /dev/null +++ b/costa/native/external/lapack/slaebz.f @@ -0,0 +1,552 @@ + SUBROUTINE SLAEBZ( IJOB, NITMAX, N, MMAX, MINP, NBMIN, ABSTOL, + $ RELTOL, PIVMIN, D, E, E2, NVAL, AB, C, MOUT, + $ NAB, WORK, IWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER IJOB, INFO, MINP, MMAX, MOUT, N, NBMIN, NITMAX + REAL ABSTOL, PIVMIN, RELTOL +* .. +* .. Array Arguments .. + INTEGER IWORK( * ), NAB( MMAX, * ), NVAL( * ) + REAL AB( MMAX, * ), C( * ), D( * ), E( * ), E2( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* SLAEBZ contains the iteration loops which compute and use the +* function N(w), which is the count of eigenvalues of a symmetric +* tridiagonal matrix T less than or equal to its argument w. It +* performs a choice of two types of loops: +* +* IJOB=1, followed by +* IJOB=2: It takes as input a list of intervals and returns a list of +* sufficiently small intervals whose union contains the same +* eigenvalues as the union of the original intervals. +* The input intervals are (AB(j,1),AB(j,2)], j=1,...,MINP. +* The output interval (AB(j,1),AB(j,2)] will contain +* eigenvalues NAB(j,1)+1,...,NAB(j,2), where 1 <= j <= MOUT. +* +* IJOB=3: It performs a binary search in each input interval +* (AB(j,1),AB(j,2)] for a point w(j) such that +* N(w(j))=NVAL(j), and uses C(j) as the starting point of +* the search. If such a w(j) is found, then on output +* AB(j,1)=AB(j,2)=w. If no such w(j) is found, then on output +* (AB(j,1),AB(j,2)] will be a small interval containing the +* point where N(w) jumps through NVAL(j), unless that point +* lies outside the initial interval. +* +* Note that the intervals are in all cases half-open intervals, +* i.e., of the form (a,b] , which includes b but not a . +* +* To avoid underflow, the matrix should be scaled so that its largest +* element is no greater than overflow**(1/2) * underflow**(1/4) +* in absolute value. To assure the most accurate computation +* of small eigenvalues, the matrix should be scaled to be +* not much smaller than that, either. +* +* See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal +* Matrix", Report CS41, Computer Science Dept., Stanford +* University, July 21, 1966 +* +* Note: the arguments are, in general, *not* checked for unreasonable +* values. +* +* Arguments +* ========= +* +* IJOB (input) INTEGER +* Specifies what is to be done: +* = 1: Compute NAB for the initial intervals. +* = 2: Perform bisection iteration to find eigenvalues of T. +* = 3: Perform bisection iteration to invert N(w), i.e., +* to find a point which has a specified number of +* eigenvalues of T to its left. +* Other values will cause SLAEBZ to return with INFO=-1. +* +* NITMAX (input) INTEGER +* The maximum number of "levels" of bisection to be +* performed, i.e., an interval of width W will not be made +* smaller than 2^(-NITMAX) * W. If not all intervals +* have converged after NITMAX iterations, then INFO is set +* to the number of non-converged intervals. +* +* N (input) INTEGER +* The dimension n of the tridiagonal matrix T. It must be at +* least 1. +* +* MMAX (input) INTEGER +* The maximum number of intervals. If more than MMAX intervals +* are generated, then SLAEBZ will quit with INFO=MMAX+1. +* +* MINP (input) INTEGER +* The initial number of intervals. It may not be greater than +* MMAX. +* +* NBMIN (input) INTEGER +* The smallest number of intervals that should be processed +* using a vector loop. If zero, then only the scalar loop +* will be used. +* +* ABSTOL (input) REAL +* The minimum (absolute) width of an interval. When an +* interval is narrower than ABSTOL, or than RELTOL times the +* larger (in magnitude) endpoint, then it is considered to be +* sufficiently small, i.e., converged. This must be at least +* zero. +* +* RELTOL (input) REAL +* The minimum relative width of an interval. When an interval +* is narrower than ABSTOL, or than RELTOL times the larger (in +* magnitude) endpoint, then it is considered to be +* sufficiently small, i.e., converged. Note: this should +* always be at least radix*machine epsilon. +* +* PIVMIN (input) REAL +* The minimum absolute value of a "pivot" in the Sturm +* sequence loop. This *must* be at least max |e(j)**2| * +* safe_min and at least safe_min, where safe_min is at least +* the smallest number that can divide one without overflow. +* +* D (input) REAL array, dimension (N) +* The diagonal elements of the tridiagonal matrix T. +* +* E (input) REAL array, dimension (N) +* The offdiagonal elements of the tridiagonal matrix T in +* positions 1 through N-1. E(N) is arbitrary. +* +* E2 (input) REAL array, dimension (N) +* The squares of the offdiagonal elements of the tridiagonal +* matrix T. E2(N) is ignored. +* +* NVAL (input/output) INTEGER array, dimension (MINP) +* If IJOB=1 or 2, not referenced. +* If IJOB=3, the desired values of N(w). The elements of NVAL +* will be reordered to correspond with the intervals in AB. +* Thus, NVAL(j) on output will not, in general be the same as +* NVAL(j) on input, but it will correspond with the interval +* (AB(j,1),AB(j,2)] on output. +* +* AB (input/output) REAL array, dimension (MMAX,2) +* The endpoints of the intervals. AB(j,1) is a(j), the left +* endpoint of the j-th interval, and AB(j,2) is b(j), the +* right endpoint of the j-th interval. The input intervals +* will, in general, be modified, split, and reordered by the +* calculation. +* +* C (input/output) REAL array, dimension (MMAX) +* If IJOB=1, ignored. +* If IJOB=2, workspace. +* If IJOB=3, then on input C(j) should be initialized to the +* first search point in the binary search. +* +* MOUT (output) INTEGER +* If IJOB=1, the number of eigenvalues in the intervals. +* If IJOB=2 or 3, the number of intervals output. +* If IJOB=3, MOUT will equal MINP. +* +* NAB (input/output) INTEGER array, dimension (MMAX,2) +* If IJOB=1, then on output NAB(i,j) will be set to N(AB(i,j)). +* If IJOB=2, then on input, NAB(i,j) should be set. It must +* satisfy the condition: +* N(AB(i,1)) <= NAB(i,1) <= NAB(i,2) <= N(AB(i,2)), +* which means that in interval i only eigenvalues +* NAB(i,1)+1,...,NAB(i,2) will be considered. Usually, +* NAB(i,j)=N(AB(i,j)), from a previous call to SLAEBZ with +* IJOB=1. +* On output, NAB(i,j) will contain +* max(na(k),min(nb(k),N(AB(i,j)))), where k is the index of +* the input interval that the output interval +* (AB(j,1),AB(j,2)] came from, and na(k) and nb(k) are the +* the input values of NAB(k,1) and NAB(k,2). +* If IJOB=3, then on output, NAB(i,j) contains N(AB(i,j)), +* unless N(w) > NVAL(i) for all search points w , in which +* case NAB(i,1) will not be modified, i.e., the output +* value will be the same as the input value (modulo +* reorderings -- see NVAL and AB), or unless N(w) < NVAL(i) +* for all search points w , in which case NAB(i,2) will +* not be modified. Normally, NAB should be set to some +* distinctive value(s) before SLAEBZ is called. +* +* WORK (workspace) REAL array, dimension (MMAX) +* Workspace. +* +* IWORK (workspace) INTEGER array, dimension (MMAX) +* Workspace. +* +* INFO (output) INTEGER +* = 0: All intervals converged. +* = 1--MMAX: The last INFO intervals did not converge. +* = MMAX+1: More than MMAX intervals were generated. +* +* Further Details +* =============== +* +* This routine is intended to be called only by other LAPACK +* routines, thus the interface is less user-friendly. It is intended +* for two purposes: +* +* (a) finding eigenvalues. In this case, SLAEBZ should have one or +* more initial intervals set up in AB, and SLAEBZ should be called +* with IJOB=1. This sets up NAB, and also counts the eigenvalues. +* Intervals with no eigenvalues would usually be thrown out at +* this point. Also, if not all the eigenvalues in an interval i +* are desired, NAB(i,1) can be increased or NAB(i,2) decreased. +* For example, set NAB(i,1)=NAB(i,2)-1 to get the largest +* eigenvalue. SLAEBZ is then called with IJOB=2 and MMAX +* no smaller than the value of MOUT returned by the call with +* IJOB=1. After this (IJOB=2) call, eigenvalues NAB(i,1)+1 +* through NAB(i,2) are approximately AB(i,1) (or AB(i,2)) to the +* tolerance specified by ABSTOL and RELTOL. +* +* (b) finding an interval (a',b'] containing eigenvalues w(f),...,w(l). +* In this case, start with a Gershgorin interval (a,b). Set up +* AB to contain 2 search intervals, both initially (a,b). One +* NVAL element should contain f-1 and the other should contain l +* , while C should contain a and b, resp. NAB(i,1) should be -1 +* and NAB(i,2) should be N+1, to flag an error if the desired +* interval does not lie in (a,b). SLAEBZ is then called with +* IJOB=3. On exit, if w(f-1) < w(f), then one of the intervals -- +* j -- will have AB(j,1)=AB(j,2) and NAB(j,1)=NAB(j,2)=f-1, while +* if, to the specified tolerance, w(f-k)=...=w(f+r), k > 0 and r +* >= 0, then the interval will have N(AB(j,1))=NAB(j,1)=f-k and +* N(AB(j,2))=NAB(j,2)=f+r. The cases w(l) < w(l+1) and +* w(l-r)=...=w(l+k) are handled similarly. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, TWO, HALF + PARAMETER ( ZERO = 0.0E0, TWO = 2.0E0, + $ HALF = 1.0E0 / TWO ) +* .. +* .. Local Scalars .. + INTEGER ITMP1, ITMP2, J, JI, JIT, JP, KF, KFNEW, KL, + $ KLNEW + REAL TMP1, TMP2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* +* Check for Errors +* + INFO = 0 + IF( IJOB.LT.1 .OR. IJOB.GT.3 ) THEN + INFO = -1 + RETURN + END IF +* +* Initialize NAB +* + IF( IJOB.EQ.1 ) THEN +* +* Compute the number of eigenvalues in the initial intervals. +* + MOUT = 0 +CDIR$ NOVECTOR + DO 30 JI = 1, MINP + DO 20 JP = 1, 2 + TMP1 = D( 1 ) - AB( JI, JP ) + IF( ABS( TMP1 ).LT.PIVMIN ) + $ TMP1 = -PIVMIN + NAB( JI, JP ) = 0 + IF( TMP1.LE.ZERO ) + $ NAB( JI, JP ) = 1 +* + DO 10 J = 2, N + TMP1 = D( J ) - E2( J-1 ) / TMP1 - AB( JI, JP ) + IF( ABS( TMP1 ).LT.PIVMIN ) + $ TMP1 = -PIVMIN + IF( TMP1.LE.ZERO ) + $ NAB( JI, JP ) = NAB( JI, JP ) + 1 + 10 CONTINUE + 20 CONTINUE + MOUT = MOUT + NAB( JI, 2 ) - NAB( JI, 1 ) + 30 CONTINUE + RETURN + END IF +* +* Initialize for loop +* +* KF and KL have the following meaning: +* Intervals 1,...,KF-1 have converged. +* Intervals KF,...,KL still need to be refined. +* + KF = 1 + KL = MINP +* +* If IJOB=2, initialize C. +* If IJOB=3, use the user-supplied starting point. +* + IF( IJOB.EQ.2 ) THEN + DO 40 JI = 1, MINP + C( JI ) = HALF*( AB( JI, 1 )+AB( JI, 2 ) ) + 40 CONTINUE + END IF +* +* Iteration loop +* + DO 130 JIT = 1, NITMAX +* +* Loop over intervals +* + IF( KL-KF+1.GE.NBMIN .AND. NBMIN.GT.0 ) THEN +* +* Begin of Parallel Version of the loop +* + DO 60 JI = KF, KL +* +* Compute N(c), the number of eigenvalues less than c +* + WORK( JI ) = D( 1 ) - C( JI ) + IWORK( JI ) = 0 + IF( WORK( JI ).LE.PIVMIN ) THEN + IWORK( JI ) = 1 + WORK( JI ) = MIN( WORK( JI ), -PIVMIN ) + END IF +* + DO 50 J = 2, N + WORK( JI ) = D( J ) - E2( J-1 ) / WORK( JI ) - C( JI ) + IF( WORK( JI ).LE.PIVMIN ) THEN + IWORK( JI ) = IWORK( JI ) + 1 + WORK( JI ) = MIN( WORK( JI ), -PIVMIN ) + END IF + 50 CONTINUE + 60 CONTINUE +* + IF( IJOB.LE.2 ) THEN +* +* IJOB=2: Choose all intervals containing eigenvalues. +* + KLNEW = KL + DO 70 JI = KF, KL +* +* Insure that N(w) is monotone +* + IWORK( JI ) = MIN( NAB( JI, 2 ), + $ MAX( NAB( JI, 1 ), IWORK( JI ) ) ) +* +* Update the Queue -- add intervals if both halves +* contain eigenvalues. +* + IF( IWORK( JI ).EQ.NAB( JI, 2 ) ) THEN +* +* No eigenvalue in the upper interval: +* just use the lower interval. +* + AB( JI, 2 ) = C( JI ) +* + ELSE IF( IWORK( JI ).EQ.NAB( JI, 1 ) ) THEN +* +* No eigenvalue in the lower interval: +* just use the upper interval. +* + AB( JI, 1 ) = C( JI ) + ELSE + KLNEW = KLNEW + 1 + IF( KLNEW.LE.MMAX ) THEN +* +* Eigenvalue in both intervals -- add upper to +* queue. +* + AB( KLNEW, 2 ) = AB( JI, 2 ) + NAB( KLNEW, 2 ) = NAB( JI, 2 ) + AB( KLNEW, 1 ) = C( JI ) + NAB( KLNEW, 1 ) = IWORK( JI ) + AB( JI, 2 ) = C( JI ) + NAB( JI, 2 ) = IWORK( JI ) + ELSE + INFO = MMAX + 1 + END IF + END IF + 70 CONTINUE + IF( INFO.NE.0 ) + $ RETURN + KL = KLNEW + ELSE +* +* IJOB=3: Binary search. Keep only the interval containing +* w s.t. N(w) = NVAL +* + DO 80 JI = KF, KL + IF( IWORK( JI ).LE.NVAL( JI ) ) THEN + AB( JI, 1 ) = C( JI ) + NAB( JI, 1 ) = IWORK( JI ) + END IF + IF( IWORK( JI ).GE.NVAL( JI ) ) THEN + AB( JI, 2 ) = C( JI ) + NAB( JI, 2 ) = IWORK( JI ) + END IF + 80 CONTINUE + END IF +* + ELSE +* +* End of Parallel Version of the loop +* +* Begin of Serial Version of the loop +* + KLNEW = KL + DO 100 JI = KF, KL +* +* Compute N(w), the number of eigenvalues less than w +* + TMP1 = C( JI ) + TMP2 = D( 1 ) - TMP1 + ITMP1 = 0 + IF( TMP2.LE.PIVMIN ) THEN + ITMP1 = 1 + TMP2 = MIN( TMP2, -PIVMIN ) + END IF +* +* A series of compiler directives to defeat vectorization +* for the next loop +* +*$PL$ CMCHAR=' ' +CDIR$ NEXTSCALAR +C$DIR SCALAR +CDIR$ NEXT SCALAR +CVD$L NOVECTOR +CDEC$ NOVECTOR +CVD$ NOVECTOR +*VDIR NOVECTOR +*VOCL LOOP,SCALAR +CIBM PREFER SCALAR +*$PL$ CMCHAR='*' +* + DO 90 J = 2, N + TMP2 = D( J ) - E2( J-1 ) / TMP2 - TMP1 + IF( TMP2.LE.PIVMIN ) THEN + ITMP1 = ITMP1 + 1 + TMP2 = MIN( TMP2, -PIVMIN ) + END IF + 90 CONTINUE +* + IF( IJOB.LE.2 ) THEN +* +* IJOB=2: Choose all intervals containing eigenvalues. +* +* Insure that N(w) is monotone +* + ITMP1 = MIN( NAB( JI, 2 ), + $ MAX( NAB( JI, 1 ), ITMP1 ) ) +* +* Update the Queue -- add intervals if both halves +* contain eigenvalues. +* + IF( ITMP1.EQ.NAB( JI, 2 ) ) THEN +* +* No eigenvalue in the upper interval: +* just use the lower interval. +* + AB( JI, 2 ) = TMP1 +* + ELSE IF( ITMP1.EQ.NAB( JI, 1 ) ) THEN +* +* No eigenvalue in the lower interval: +* just use the upper interval. +* + AB( JI, 1 ) = TMP1 + ELSE IF( KLNEW.LT.MMAX ) THEN +* +* Eigenvalue in both intervals -- add upper to queue. +* + KLNEW = KLNEW + 1 + AB( KLNEW, 2 ) = AB( JI, 2 ) + NAB( KLNEW, 2 ) = NAB( JI, 2 ) + AB( KLNEW, 1 ) = TMP1 + NAB( KLNEW, 1 ) = ITMP1 + AB( JI, 2 ) = TMP1 + NAB( JI, 2 ) = ITMP1 + ELSE + INFO = MMAX + 1 + RETURN + END IF + ELSE +* +* IJOB=3: Binary search. Keep only the interval +* containing w s.t. N(w) = NVAL +* + IF( ITMP1.LE.NVAL( JI ) ) THEN + AB( JI, 1 ) = TMP1 + NAB( JI, 1 ) = ITMP1 + END IF + IF( ITMP1.GE.NVAL( JI ) ) THEN + AB( JI, 2 ) = TMP1 + NAB( JI, 2 ) = ITMP1 + END IF + END IF + 100 CONTINUE + KL = KLNEW +* +* End of Serial Version of the loop +* + END IF +* +* Check for convergence +* + KFNEW = KF + DO 110 JI = KF, KL + TMP1 = ABS( AB( JI, 2 )-AB( JI, 1 ) ) + TMP2 = MAX( ABS( AB( JI, 2 ) ), ABS( AB( JI, 1 ) ) ) + IF( TMP1.LT.MAX( ABSTOL, PIVMIN, RELTOL*TMP2 ) .OR. + $ NAB( JI, 1 ).GE.NAB( JI, 2 ) ) THEN +* +* Converged -- Swap with position KFNEW, +* then increment KFNEW +* + IF( JI.GT.KFNEW ) THEN + TMP1 = AB( JI, 1 ) + TMP2 = AB( JI, 2 ) + ITMP1 = NAB( JI, 1 ) + ITMP2 = NAB( JI, 2 ) + AB( JI, 1 ) = AB( KFNEW, 1 ) + AB( JI, 2 ) = AB( KFNEW, 2 ) + NAB( JI, 1 ) = NAB( KFNEW, 1 ) + NAB( JI, 2 ) = NAB( KFNEW, 2 ) + AB( KFNEW, 1 ) = TMP1 + AB( KFNEW, 2 ) = TMP2 + NAB( KFNEW, 1 ) = ITMP1 + NAB( KFNEW, 2 ) = ITMP2 + IF( IJOB.EQ.3 ) THEN + ITMP1 = NVAL( JI ) + NVAL( JI ) = NVAL( KFNEW ) + NVAL( KFNEW ) = ITMP1 + END IF + END IF + KFNEW = KFNEW + 1 + END IF + 110 CONTINUE + KF = KFNEW +* +* Choose Midpoints +* + DO 120 JI = KF, KL + C( JI ) = HALF*( AB( JI, 1 )+AB( JI, 2 ) ) + 120 CONTINUE +* +* If no more intervals to refine, quit. +* + IF( KF.GT.KL ) + $ GO TO 140 + 130 CONTINUE +* +* Converged +* + 140 CONTINUE + INFO = MAX( KL+1-KF, 0 ) + MOUT = KL +* + RETURN +* +* End of SLAEBZ +* + END diff --git a/costa/native/external/lapack/slaed0.f b/costa/native/external/lapack/slaed0.f new file mode 100644 index 000000000..8fc44a510 --- /dev/null +++ b/costa/native/external/lapack/slaed0.f @@ -0,0 +1,350 @@ + SUBROUTINE SLAED0( ICOMPQ, QSIZ, N, D, E, Q, LDQ, QSTORE, LDQS, + $ WORK, IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER ICOMPQ, INFO, LDQ, LDQS, N, QSIZ +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL D( * ), E( * ), Q( LDQ, * ), QSTORE( LDQS, * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* SLAED0 computes all eigenvalues and corresponding eigenvectors of a +* symmetric tridiagonal matrix using the divide and conquer method. +* +* Arguments +* ========= +* +* ICOMPQ (input) INTEGER +* = 0: Compute eigenvalues only. +* = 1: Compute eigenvectors of original dense symmetric matrix +* also. On entry, Q contains the orthogonal matrix used +* to reduce the original matrix to tridiagonal form. +* = 2: Compute eigenvalues and eigenvectors of tridiagonal +* matrix. +* +* QSIZ (input) INTEGER +* The dimension of the orthogonal matrix used to reduce +* the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. +* +* N (input) INTEGER +* The dimension of the symmetric tridiagonal matrix. N >= 0. +* +* D (input/output) REAL array, dimension (N) +* On entry, the main diagonal of the tridiagonal matrix. +* On exit, its eigenvalues. +* +* E (input) REAL array, dimension (N-1) +* The off-diagonal elements of the tridiagonal matrix. +* On exit, E has been destroyed. +* +* Q (input/output) REAL array, dimension (LDQ, N) +* On entry, Q must contain an N-by-N orthogonal matrix. +* If ICOMPQ = 0 Q is not referenced. +* If ICOMPQ = 1 On entry, Q is a subset of the columns of the +* orthogonal matrix used to reduce the full +* matrix to tridiagonal form corresponding to +* the subset of the full matrix which is being +* decomposed at this time. +* If ICOMPQ = 2 On entry, Q will be the identity matrix. +* On exit, Q contains the eigenvectors of the +* tridiagonal matrix. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. If eigenvectors are +* desired, then LDQ >= max(1,N). In any case, LDQ >= 1. +* +* QSTORE (workspace) REAL array, dimension (LDQS, N) +* Referenced only when ICOMPQ = 1. Used to store parts of +* the eigenvector matrix when the updating matrix multiplies +* take place. +* +* LDQS (input) INTEGER +* The leading dimension of the array QSTORE. If ICOMPQ = 1, +* then LDQS >= max(1,N). In any case, LDQS >= 1. +* +* WORK (workspace) REAL array, +* If ICOMPQ = 0 or 1, the dimension of WORK must be at least +* 1 + 3*N + 2*N*lg N + 2*N**2 +* ( lg( N ) = smallest integer k +* such that 2^k >= N ) +* If ICOMPQ = 2, the dimension of WORK must be at least +* 4*N + N**2. +* +* IWORK (workspace) INTEGER array, +* If ICOMPQ = 0 or 1, the dimension of IWORK must be at least +* 6 + 6*N + 5*N*lg N. +* ( lg( N ) = smallest integer k +* such that 2^k >= N ) +* If ICOMPQ = 2, the dimension of IWORK must be at least +* 3 + 5*N. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: The algorithm failed to compute an eigenvalue while +* working on the submatrix lying in rows and columns +* INFO/(N+1) through mod(INFO,N+1). +* +* Further Details +* =============== +* +* Based on contributions by +* Jeff Rutter, Computer Science Division, University of California +* at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.E0, ONE = 1.E0, TWO = 2.E0 ) +* .. +* .. Local Scalars .. + INTEGER CURLVL, CURPRB, CURR, I, IGIVCL, IGIVNM, + $ IGIVPT, INDXQ, IPERM, IPRMPT, IQ, IQPTR, IWREM, + $ J, K, LGN, MATSIZ, MSD2, SMLSIZ, SMM1, SPM1, + $ SPM2, SUBMAT, SUBPBS, TLVLS + REAL TEMP +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMM, SLACPY, SLAED1, SLAED7, SSTEQR, + $ XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, LOG, MAX, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.2 ) THEN + INFO = -1 + ELSE IF( ( ICOMPQ.EQ.1 ) .AND. ( QSIZ.LT.MAX( 0, N ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDQS.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLAED0', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + SMLSIZ = ILAENV( 9, 'SLAED0', ' ', 0, 0, 0, 0 ) +* +* Determine the size and placement of the submatrices, and save in +* the leading elements of IWORK. +* + IWORK( 1 ) = N + SUBPBS = 1 + TLVLS = 0 + 10 CONTINUE + IF( IWORK( SUBPBS ).GT.SMLSIZ ) THEN + DO 20 J = SUBPBS, 1, -1 + IWORK( 2*J ) = ( IWORK( J )+1 ) / 2 + IWORK( 2*J-1 ) = IWORK( J ) / 2 + 20 CONTINUE + TLVLS = TLVLS + 1 + SUBPBS = 2*SUBPBS + GO TO 10 + END IF + DO 30 J = 2, SUBPBS + IWORK( J ) = IWORK( J ) + IWORK( J-1 ) + 30 CONTINUE +* +* Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1 +* using rank-1 modifications (cuts). +* + SPM1 = SUBPBS - 1 + DO 40 I = 1, SPM1 + SUBMAT = IWORK( I ) + 1 + SMM1 = SUBMAT - 1 + D( SMM1 ) = D( SMM1 ) - ABS( E( SMM1 ) ) + D( SUBMAT ) = D( SUBMAT ) - ABS( E( SMM1 ) ) + 40 CONTINUE +* + INDXQ = 4*N + 3 + IF( ICOMPQ.NE.2 ) THEN +* +* Set up workspaces for eigenvalues only/accumulate new vectors +* routine +* + TEMP = LOG( REAL( N ) ) / LOG( TWO ) + LGN = INT( TEMP ) + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IPRMPT = INDXQ + N + 1 + IPERM = IPRMPT + N*LGN + IQPTR = IPERM + N*LGN + IGIVPT = IQPTR + N + 2 + IGIVCL = IGIVPT + N*LGN +* + IGIVNM = 1 + IQ = IGIVNM + 2*N*LGN + IWREM = IQ + N**2 + 1 +* +* Initialize pointers +* + DO 50 I = 0, SUBPBS + IWORK( IPRMPT+I ) = 1 + IWORK( IGIVPT+I ) = 1 + 50 CONTINUE + IWORK( IQPTR ) = 1 + END IF +* +* Solve each submatrix eigenproblem at the bottom of the divide and +* conquer tree. +* + CURR = 0 + DO 70 I = 0, SPM1 + IF( I.EQ.0 ) THEN + SUBMAT = 1 + MATSIZ = IWORK( 1 ) + ELSE + SUBMAT = IWORK( I ) + 1 + MATSIZ = IWORK( I+1 ) - IWORK( I ) + END IF + IF( ICOMPQ.EQ.2 ) THEN + CALL SSTEQR( 'I', MATSIZ, D( SUBMAT ), E( SUBMAT ), + $ Q( SUBMAT, SUBMAT ), LDQ, WORK, INFO ) + IF( INFO.NE.0 ) + $ GO TO 130 + ELSE + CALL SSTEQR( 'I', MATSIZ, D( SUBMAT ), E( SUBMAT ), + $ WORK( IQ-1+IWORK( IQPTR+CURR ) ), MATSIZ, WORK, + $ INFO ) + IF( INFO.NE.0 ) + $ GO TO 130 + IF( ICOMPQ.EQ.1 ) THEN + CALL SGEMM( 'N', 'N', QSIZ, MATSIZ, MATSIZ, ONE, + $ Q( 1, SUBMAT ), LDQ, WORK( IQ-1+IWORK( IQPTR+ + $ CURR ) ), MATSIZ, ZERO, QSTORE( 1, SUBMAT ), + $ LDQS ) + END IF + IWORK( IQPTR+CURR+1 ) = IWORK( IQPTR+CURR ) + MATSIZ**2 + CURR = CURR + 1 + END IF + K = 1 + DO 60 J = SUBMAT, IWORK( I+1 ) + IWORK( INDXQ+J ) = K + K = K + 1 + 60 CONTINUE + 70 CONTINUE +* +* Successively merge eigensystems of adjacent submatrices +* into eigensystem for the corresponding larger matrix. +* +* while ( SUBPBS > 1 ) +* + CURLVL = 1 + 80 CONTINUE + IF( SUBPBS.GT.1 ) THEN + SPM2 = SUBPBS - 2 + DO 90 I = 0, SPM2, 2 + IF( I.EQ.0 ) THEN + SUBMAT = 1 + MATSIZ = IWORK( 2 ) + MSD2 = IWORK( 1 ) + CURPRB = 0 + ELSE + SUBMAT = IWORK( I ) + 1 + MATSIZ = IWORK( I+2 ) - IWORK( I ) + MSD2 = MATSIZ / 2 + CURPRB = CURPRB + 1 + END IF +* +* Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2) +* into an eigensystem of size MATSIZ. +* SLAED1 is used only for the full eigensystem of a tridiagonal +* matrix. +* SLAED7 handles the cases in which eigenvalues only or eigenvalues +* and eigenvectors of a full symmetric matrix (which was reduced to +* tridiagonal form) are desired. +* + IF( ICOMPQ.EQ.2 ) THEN + CALL SLAED1( MATSIZ, D( SUBMAT ), Q( SUBMAT, SUBMAT ), + $ LDQ, IWORK( INDXQ+SUBMAT ), + $ E( SUBMAT+MSD2-1 ), MSD2, WORK, + $ IWORK( SUBPBS+1 ), INFO ) + ELSE + CALL SLAED7( ICOMPQ, MATSIZ, QSIZ, TLVLS, CURLVL, CURPRB, + $ D( SUBMAT ), QSTORE( 1, SUBMAT ), LDQS, + $ IWORK( INDXQ+SUBMAT ), E( SUBMAT+MSD2-1 ), + $ MSD2, WORK( IQ ), IWORK( IQPTR ), + $ IWORK( IPRMPT ), IWORK( IPERM ), + $ IWORK( IGIVPT ), IWORK( IGIVCL ), + $ WORK( IGIVNM ), WORK( IWREM ), + $ IWORK( SUBPBS+1 ), INFO ) + END IF + IF( INFO.NE.0 ) + $ GO TO 130 + IWORK( I / 2+1 ) = IWORK( I+2 ) + 90 CONTINUE + SUBPBS = SUBPBS / 2 + CURLVL = CURLVL + 1 + GO TO 80 + END IF +* +* end while +* +* Re-merge the eigenvalues/vectors which were deflated at the final +* merge step. +* + IF( ICOMPQ.EQ.1 ) THEN + DO 100 I = 1, N + J = IWORK( INDXQ+I ) + WORK( I ) = D( J ) + CALL SCOPY( QSIZ, QSTORE( 1, J ), 1, Q( 1, I ), 1 ) + 100 CONTINUE + CALL SCOPY( N, WORK, 1, D, 1 ) + ELSE IF( ICOMPQ.EQ.2 ) THEN + DO 110 I = 1, N + J = IWORK( INDXQ+I ) + WORK( I ) = D( J ) + CALL SCOPY( N, Q( 1, J ), 1, WORK( N*I+1 ), 1 ) + 110 CONTINUE + CALL SCOPY( N, WORK, 1, D, 1 ) + CALL SLACPY( 'A', N, N, WORK( N+1 ), N, Q, LDQ ) + ELSE + DO 120 I = 1, N + J = IWORK( INDXQ+I ) + WORK( I ) = D( J ) + 120 CONTINUE + CALL SCOPY( N, WORK, 1, D, 1 ) + END IF + GO TO 140 +* + 130 CONTINUE + INFO = SUBMAT*( N+1 ) + SUBMAT + MATSIZ - 1 +* + 140 CONTINUE + RETURN +* +* End of SLAED0 +* + END diff --git a/costa/native/external/lapack/slaed1.f b/costa/native/external/lapack/slaed1.f new file mode 100644 index 000000000..b1f825459 --- /dev/null +++ b/costa/native/external/lapack/slaed1.f @@ -0,0 +1,196 @@ + SUBROUTINE SLAED1( N, D, Q, LDQ, INDXQ, RHO, CUTPNT, WORK, IWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER CUTPNT, INFO, LDQ, N + REAL RHO +* .. +* .. Array Arguments .. + INTEGER INDXQ( * ), IWORK( * ) + REAL D( * ), Q( LDQ, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SLAED1 computes the updated eigensystem of a diagonal +* matrix after modification by a rank-one symmetric matrix. This +* routine is used only for the eigenproblem which requires all +* eigenvalues and eigenvectors of a tridiagonal matrix. SLAED7 handles +* the case in which eigenvalues only or eigenvalues and eigenvectors +* of a full symmetric matrix (which was reduced to tridiagonal form) +* are desired. +* +* T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out) +* +* where Z = Q'u, u is a vector of length N with ones in the +* CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. +* +* The eigenvectors of the original matrix are stored in Q, and the +* eigenvalues are in D. The algorithm consists of three stages: +* +* The first stage consists of deflating the size of the problem +* when there are multiple eigenvalues or if there is a zero in +* the Z vector. For each such occurence the dimension of the +* secular equation problem is reduced by one. This stage is +* performed by the routine SLAED2. +* +* The second stage consists of calculating the updated +* eigenvalues. This is done by finding the roots of the secular +* equation via the routine SLAED4 (as called by SLAED3). +* This routine also calculates the eigenvectors of the current +* problem. +* +* The final stage consists of computing the updated eigenvectors +* directly using the updated eigenvalues. The eigenvectors for +* the current problem are multiplied with the eigenvectors from +* the overall problem. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The dimension of the symmetric tridiagonal matrix. N >= 0. +* +* D (input/output) REAL array, dimension (N) +* On entry, the eigenvalues of the rank-1-perturbed matrix. +* On exit, the eigenvalues of the repaired matrix. +* +* Q (input/output) REAL array, dimension (LDQ,N) +* On entry, the eigenvectors of the rank-1-perturbed matrix. +* On exit, the eigenvectors of the repaired tridiagonal matrix. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= max(1,N). +* +* INDXQ (input/output) INTEGER array, dimension (N) +* On entry, the permutation which separately sorts the two +* subproblems in D into ascending order. +* On exit, the permutation which will reintegrate the +* subproblems back into sorted order, +* i.e. D( INDXQ( I = 1, N ) ) will be in ascending order. +* +* RHO (input) REAL +* The subdiagonal entry used to create the rank-1 modification. +* +* CUTPNT (input) INTEGER +* The location of the last eigenvalue in the leading sub-matrix. +* min(1,N) <= CUTPNT <= N/2. +* +* WORK (workspace) REAL array, dimension (4*N + N**2) +* +* IWORK (workspace) INTEGER array, dimension (4*N) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = 1, an eigenvalue did not converge +* +* Further Details +* =============== +* +* Based on contributions by +* Jeff Rutter, Computer Science Division, University of California +* at Berkeley, USA +* Modified by Francoise Tisseur, University of Tennessee. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER COLTYP, CPP1, I, IDLMDA, INDX, INDXC, INDXP, + $ IQ2, IS, IW, IZ, K, N1, N2 +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLAED2, SLAED3, SLAMRG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( MIN( 1, N / 2 ).GT.CUTPNT .OR. ( N / 2 ).LT.CUTPNT ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLAED1', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* The following values are integer pointers which indicate +* the portion of the workspace +* used by a particular array in SLAED2 and SLAED3. +* + IZ = 1 + IDLMDA = IZ + N + IW = IDLMDA + N + IQ2 = IW + N +* + INDX = 1 + INDXC = INDX + N + COLTYP = INDXC + N + INDXP = COLTYP + N +* +* +* Form the z-vector which consists of the last row of Q_1 and the +* first row of Q_2. +* + CALL SCOPY( CUTPNT, Q( CUTPNT, 1 ), LDQ, WORK( IZ ), 1 ) + CPP1 = CUTPNT + 1 + CALL SCOPY( N-CUTPNT, Q( CPP1, CPP1 ), LDQ, WORK( IZ+CUTPNT ), 1 ) +* +* Deflate eigenvalues. +* + CALL SLAED2( K, N, CUTPNT, D, Q, LDQ, INDXQ, RHO, WORK( IZ ), + $ WORK( IDLMDA ), WORK( IW ), WORK( IQ2 ), + $ IWORK( INDX ), IWORK( INDXC ), IWORK( INDXP ), + $ IWORK( COLTYP ), INFO ) +* + IF( INFO.NE.0 ) + $ GO TO 20 +* +* Solve Secular Equation. +* + IF( K.NE.0 ) THEN + IS = ( IWORK( COLTYP )+IWORK( COLTYP+1 ) )*CUTPNT + + $ ( IWORK( COLTYP+1 )+IWORK( COLTYP+2 ) )*( N-CUTPNT ) + IQ2 + CALL SLAED3( K, N, CUTPNT, D, Q, LDQ, RHO, WORK( IDLMDA ), + $ WORK( IQ2 ), IWORK( INDXC ), IWORK( COLTYP ), + $ WORK( IW ), WORK( IS ), INFO ) + IF( INFO.NE.0 ) + $ GO TO 20 +* +* Prepare the INDXQ sorting permutation. +* + N1 = K + N2 = N - K + CALL SLAMRG( N1, N2, D, 1, -1, INDXQ ) + ELSE + DO 10 I = 1, N + INDXQ( I ) = I + 10 CONTINUE + END IF +* + 20 CONTINUE + RETURN +* +* End of SLAED1 +* + END diff --git a/costa/native/external/lapack/slaed2.f b/costa/native/external/lapack/slaed2.f new file mode 100644 index 000000000..c31de3540 --- /dev/null +++ b/costa/native/external/lapack/slaed2.f @@ -0,0 +1,435 @@ + SUBROUTINE SLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W, + $ Q2, INDX, INDXC, INDXP, COLTYP, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDQ, N, N1 + REAL RHO +* .. +* .. Array Arguments .. + INTEGER COLTYP( * ), INDX( * ), INDXC( * ), INDXP( * ), + $ INDXQ( * ) + REAL D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ), + $ W( * ), Z( * ) +* .. +* +* Purpose +* ======= +* +* SLAED2 merges the two sets of eigenvalues together into a single +* sorted set. Then it tries to deflate the size of the problem. +* There are two ways in which deflation can occur: when two or more +* eigenvalues are close together or if there is a tiny entry in the +* Z vector. For each such occurrence the order of the related secular +* equation problem is reduced by one. +* +* Arguments +* ========= +* +* K (output) INTEGER +* The number of non-deflated eigenvalues, and the order of the +* related secular equation. 0 <= K <=N. +* +* N (input) INTEGER +* The dimension of the symmetric tridiagonal matrix. N >= 0. +* +* N1 (input) INTEGER +* The location of the last eigenvalue in the leading sub-matrix. +* min(1,N) <= N1 <= N/2. +* +* D (input/output) REAL array, dimension (N) +* On entry, D contains the eigenvalues of the two submatrices to +* be combined. +* On exit, D contains the trailing (N-K) updated eigenvalues +* (those which were deflated) sorted into increasing order. +* +* Q (input/output) REAL array, dimension (LDQ, N) +* On entry, Q contains the eigenvectors of two submatrices in +* the two square blocks with corners at (1,1), (N1,N1) +* and (N1+1, N1+1), (N,N). +* On exit, Q contains the trailing (N-K) updated eigenvectors +* (those which were deflated) in its last N-K columns. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= max(1,N). +* +* INDXQ (input/output) INTEGER array, dimension (N) +* The permutation which separately sorts the two sub-problems +* in D into ascending order. Note that elements in the second +* half of this permutation must first have N1 added to their +* values. Destroyed on exit. +* +* RHO (input/output) REAL +* On entry, the off-diagonal element associated with the rank-1 +* cut which originally split the two submatrices which are now +* being recombined. +* On exit, RHO has been modified to the value required by +* SLAED3. +* +* Z (input) REAL array, dimension (N) +* On entry, Z contains the updating vector (the last +* row of the first sub-eigenvector matrix and the first row of +* the second sub-eigenvector matrix). +* On exit, the contents of Z have been destroyed by the updating +* process. +* +* DLAMDA (output) REAL array, dimension (N) +* A copy of the first K eigenvalues which will be used by +* SLAED3 to form the secular equation. +* +* W (output) REAL array, dimension (N) +* The first k values of the final deflation-altered z-vector +* which will be passed to SLAED3. +* +* Q2 (output) REAL array, dimension (N1**2+(N-N1)**2) +* A copy of the first K eigenvectors which will be used by +* SLAED3 in a matrix multiply (SGEMM) to solve for the new +* eigenvectors. +* +* INDX (workspace) INTEGER array, dimension (N) +* The permutation used to sort the contents of DLAMDA into +* ascending order. +* +* INDXC (output) INTEGER array, dimension (N) +* The permutation used to arrange the columns of the deflated +* Q matrix into three groups: the first group contains non-zero +* elements only at and above N1, the second contains +* non-zero elements only below N1, and the third is dense. +* +* INDXP (workspace) INTEGER array, dimension (N) +* The permutation used to place deflated values of D at the end +* of the array. INDXP(1:K) points to the nondeflated D-values +* and INDXP(K+1:N) points to the deflated eigenvalues. +* +* COLTYP (workspace/output) INTEGER array, dimension (N) +* During execution, a label which will indicate which of the +* following types a column in the Q2 matrix is: +* 1 : non-zero in the upper half only; +* 2 : dense; +* 3 : non-zero in the lower half only; +* 4 : deflated. +* On exit, COLTYP(i) is the number of columns of type i, +* for i=1 to 4 only. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* Based on contributions by +* Jeff Rutter, Computer Science Division, University of California +* at Berkeley, USA +* Modified by Francoise Tisseur, University of Tennessee. +* +* ===================================================================== +* +* .. Parameters .. + REAL MONE, ZERO, ONE, TWO, EIGHT + PARAMETER ( MONE = -1.0E0, ZERO = 0.0E0, ONE = 1.0E0, + $ TWO = 2.0E0, EIGHT = 8.0E0 ) +* .. +* .. Local Arrays .. + INTEGER CTOT( 4 ), PSM( 4 ) +* .. +* .. Local Scalars .. + INTEGER CT, I, IMAX, IQ1, IQ2, J, JMAX, JS, K2, N1P1, + $ N2, NJ, PJ + REAL C, EPS, S, T, TAU, TOL +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SLAMCH, SLAPY2 + EXTERNAL ISAMAX, SLAMCH, SLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLACPY, SLAMRG, SROT, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( MIN( 1, ( N / 2 ) ).GT.N1 .OR. ( N / 2 ).LT.N1 ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLAED2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + N2 = N - N1 + N1P1 = N1 + 1 +* + IF( RHO.LT.ZERO ) THEN + CALL SSCAL( N2, MONE, Z( N1P1 ), 1 ) + END IF +* +* Normalize z so that norm(z) = 1. Since z is the concatenation of +* two normalized vectors, norm2(z) = sqrt(2). +* + T = ONE / SQRT( TWO ) + CALL SSCAL( N, T, Z, 1 ) +* +* RHO = ABS( norm(z)**2 * RHO ) +* + RHO = ABS( TWO*RHO ) +* +* Sort the eigenvalues into increasing order +* + DO 10 I = N1P1, N + INDXQ( I ) = INDXQ( I ) + N1 + 10 CONTINUE +* +* re-integrate the deflated parts from the last pass +* + DO 20 I = 1, N + DLAMDA( I ) = D( INDXQ( I ) ) + 20 CONTINUE + CALL SLAMRG( N1, N2, DLAMDA, 1, 1, INDXC ) + DO 30 I = 1, N + INDX( I ) = INDXQ( INDXC( I ) ) + 30 CONTINUE +* +* Calculate the allowable deflation tolerance +* + IMAX = ISAMAX( N, Z, 1 ) + JMAX = ISAMAX( N, D, 1 ) + EPS = SLAMCH( 'Epsilon' ) + TOL = EIGHT*EPS*MAX( ABS( D( JMAX ) ), ABS( Z( IMAX ) ) ) +* +* If the rank-1 modifier is small enough, no more needs to be done +* except to reorganize Q so that its columns correspond with the +* elements in D. +* + IF( RHO*ABS( Z( IMAX ) ).LE.TOL ) THEN + K = 0 + IQ2 = 1 + DO 40 J = 1, N + I = INDX( J ) + CALL SCOPY( N, Q( 1, I ), 1, Q2( IQ2 ), 1 ) + DLAMDA( J ) = D( I ) + IQ2 = IQ2 + N + 40 CONTINUE + CALL SLACPY( 'A', N, N, Q2, N, Q, LDQ ) + CALL SCOPY( N, DLAMDA, 1, D, 1 ) + GO TO 190 + END IF +* +* If there are multiple eigenvalues then the problem deflates. Here +* the number of equal eigenvalues are found. As each equal +* eigenvalue is found, an elementary reflector is computed to rotate +* the corresponding eigensubspace so that the corresponding +* components of Z are zero in this new basis. +* + DO 50 I = 1, N1 + COLTYP( I ) = 1 + 50 CONTINUE + DO 60 I = N1P1, N + COLTYP( I ) = 3 + 60 CONTINUE +* +* + K = 0 + K2 = N + 1 + DO 70 J = 1, N + NJ = INDX( J ) + IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + COLTYP( NJ ) = 4 + INDXP( K2 ) = NJ + IF( J.EQ.N ) + $ GO TO 100 + ELSE + PJ = NJ + GO TO 80 + END IF + 70 CONTINUE + 80 CONTINUE + J = J + 1 + NJ = INDX( J ) + IF( J.GT.N ) + $ GO TO 100 + IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + COLTYP( NJ ) = 4 + INDXP( K2 ) = NJ + ELSE +* +* Check if eigenvalues are close enough to allow deflation. +* + S = Z( PJ ) + C = Z( NJ ) +* +* Find sqrt(a**2+b**2) without overflow or +* destructive underflow. +* + TAU = SLAPY2( C, S ) + T = D( NJ ) - D( PJ ) + C = C / TAU + S = -S / TAU + IF( ABS( T*C*S ).LE.TOL ) THEN +* +* Deflation is possible. +* + Z( NJ ) = TAU + Z( PJ ) = ZERO + IF( COLTYP( NJ ).NE.COLTYP( PJ ) ) + $ COLTYP( NJ ) = 2 + COLTYP( PJ ) = 4 + CALL SROT( N, Q( 1, PJ ), 1, Q( 1, NJ ), 1, C, S ) + T = D( PJ )*C**2 + D( NJ )*S**2 + D( NJ ) = D( PJ )*S**2 + D( NJ )*C**2 + D( PJ ) = T + K2 = K2 - 1 + I = 1 + 90 CONTINUE + IF( K2+I.LE.N ) THEN + IF( D( PJ ).LT.D( INDXP( K2+I ) ) ) THEN + INDXP( K2+I-1 ) = INDXP( K2+I ) + INDXP( K2+I ) = PJ + I = I + 1 + GO TO 90 + ELSE + INDXP( K2+I-1 ) = PJ + END IF + ELSE + INDXP( K2+I-1 ) = PJ + END IF + PJ = NJ + ELSE + K = K + 1 + DLAMDA( K ) = D( PJ ) + W( K ) = Z( PJ ) + INDXP( K ) = PJ + PJ = NJ + END IF + END IF + GO TO 80 + 100 CONTINUE +* +* Record the last eigenvalue. +* + K = K + 1 + DLAMDA( K ) = D( PJ ) + W( K ) = Z( PJ ) + INDXP( K ) = PJ +* +* Count up the total number of the various types of columns, then +* form a permutation which positions the four column types into +* four uniform groups (although one or more of these groups may be +* empty). +* + DO 110 J = 1, 4 + CTOT( J ) = 0 + 110 CONTINUE + DO 120 J = 1, N + CT = COLTYP( J ) + CTOT( CT ) = CTOT( CT ) + 1 + 120 CONTINUE +* +* PSM(*) = Position in SubMatrix (of types 1 through 4) +* + PSM( 1 ) = 1 + PSM( 2 ) = 1 + CTOT( 1 ) + PSM( 3 ) = PSM( 2 ) + CTOT( 2 ) + PSM( 4 ) = PSM( 3 ) + CTOT( 3 ) + K = N - CTOT( 4 ) +* +* Fill out the INDXC array so that the permutation which it induces +* will place all type-1 columns first, all type-2 columns next, +* then all type-3's, and finally all type-4's. +* + DO 130 J = 1, N + JS = INDXP( J ) + CT = COLTYP( JS ) + INDX( PSM( CT ) ) = JS + INDXC( PSM( CT ) ) = J + PSM( CT ) = PSM( CT ) + 1 + 130 CONTINUE +* +* Sort the eigenvalues and corresponding eigenvectors into DLAMDA +* and Q2 respectively. The eigenvalues/vectors which were not +* deflated go into the first K slots of DLAMDA and Q2 respectively, +* while those which were deflated go into the last N - K slots. +* + I = 1 + IQ1 = 1 + IQ2 = 1 + ( CTOT( 1 )+CTOT( 2 ) )*N1 + DO 140 J = 1, CTOT( 1 ) + JS = INDX( I ) + CALL SCOPY( N1, Q( 1, JS ), 1, Q2( IQ1 ), 1 ) + Z( I ) = D( JS ) + I = I + 1 + IQ1 = IQ1 + N1 + 140 CONTINUE +* + DO 150 J = 1, CTOT( 2 ) + JS = INDX( I ) + CALL SCOPY( N1, Q( 1, JS ), 1, Q2( IQ1 ), 1 ) + CALL SCOPY( N2, Q( N1+1, JS ), 1, Q2( IQ2 ), 1 ) + Z( I ) = D( JS ) + I = I + 1 + IQ1 = IQ1 + N1 + IQ2 = IQ2 + N2 + 150 CONTINUE +* + DO 160 J = 1, CTOT( 3 ) + JS = INDX( I ) + CALL SCOPY( N2, Q( N1+1, JS ), 1, Q2( IQ2 ), 1 ) + Z( I ) = D( JS ) + I = I + 1 + IQ2 = IQ2 + N2 + 160 CONTINUE +* + IQ1 = IQ2 + DO 170 J = 1, CTOT( 4 ) + JS = INDX( I ) + CALL SCOPY( N, Q( 1, JS ), 1, Q2( IQ2 ), 1 ) + IQ2 = IQ2 + N + Z( I ) = D( JS ) + I = I + 1 + 170 CONTINUE +* +* The deflated eigenvalues and their corresponding vectors go back +* into the last N - K slots of D and Q respectively. +* + CALL SLACPY( 'A', N, CTOT( 4 ), Q2( IQ1 ), N, Q( 1, K+1 ), LDQ ) + CALL SCOPY( N-K, Z( K+1 ), 1, D( K+1 ), 1 ) +* +* Copy CTOT into COLTYP for referencing in SLAED3. +* + DO 180 J = 1, 4 + COLTYP( J ) = CTOT( J ) + 180 CONTINUE +* + 190 CONTINUE + RETURN +* +* End of SLAED2 +* + END diff --git a/costa/native/external/lapack/slaed3.f b/costa/native/external/lapack/slaed3.f new file mode 100644 index 000000000..011f1a644 --- /dev/null +++ b/costa/native/external/lapack/slaed3.f @@ -0,0 +1,265 @@ + SUBROUTINE SLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMDA, Q2, INDX, + $ CTOT, W, S, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, +* Courant Institute, NAG Ltd., and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDQ, N, N1 + REAL RHO +* .. +* .. Array Arguments .. + INTEGER CTOT( * ), INDX( * ) + REAL D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ), + $ S( * ), W( * ) +* .. +* +* Purpose +* ======= +* +* SLAED3 finds the roots of the secular equation, as defined by the +* values in D, W, and RHO, between 1 and K. It makes the +* appropriate calls to SLAED4 and then updates the eigenvectors by +* multiplying the matrix of eigenvectors of the pair of eigensystems +* being combined by the matrix of eigenvectors of the K-by-K system +* which is solved here. +* +* This code makes very mild assumptions about floating point +* arithmetic. It will work on machines with a guard digit in +* add/subtract, or on those binary machines without guard digits +* which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. +* It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* K (input) INTEGER +* The number of terms in the rational function to be solved by +* SLAED4. K >= 0. +* +* N (input) INTEGER +* The number of rows and columns in the Q matrix. +* N >= K (deflation may result in N>K). +* +* N1 (input) INTEGER +* The location of the last eigenvalue in the leading submatrix. +* min(1,N) <= N1 <= N/2. +* +* D (output) REAL array, dimension (N) +* D(I) contains the updated eigenvalues for +* 1 <= I <= K. +* +* Q (output) REAL array, dimension (LDQ,N) +* Initially the first K columns are used as workspace. +* On output the columns 1 to K contain +* the updated eigenvectors. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= max(1,N). +* +* RHO (input) REAL +* The value of the parameter in the rank one update equation. +* RHO >= 0 required. +* +* DLAMDA (input/output) REAL array, dimension (K) +* The first K elements of this array contain the old roots +* of the deflated updating problem. These are the poles +* of the secular equation. May be changed on output by +* having lowest order bit set to zero on Cray X-MP, Cray Y-MP, +* Cray-2, or Cray C-90, as described above. +* +* Q2 (input) REAL array, dimension (LDQ2, N) +* The first K columns of this matrix contain the non-deflated +* eigenvectors for the split problem. +* +* INDX (input) INTEGER array, dimension (N) +* The permutation used to arrange the columns of the deflated +* Q matrix into three groups (see SLAED2). +* The rows of the eigenvectors found by SLAED4 must be likewise +* permuted before the matrix multiply can take place. +* +* CTOT (input) INTEGER array, dimension (4) +* A count of the total number of the various types of columns +* in Q, as described in INDX. The fourth column type is any +* column which has been deflated. +* +* W (input/output) REAL array, dimension (K) +* The first K elements of this array contain the components +* of the deflation-adjusted updating vector. Destroyed on +* output. +* +* S (workspace) REAL array, dimension (N1 + 1)*K +* Will contain the eigenvectors of the repaired matrix which +* will be multiplied by the previously accumulated eigenvectors +* to update the system. +* +* LDS (input) INTEGER +* The leading dimension of S. LDS >= max(1,K). +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = 1, an eigenvalue did not converge +* +* Further Details +* =============== +* +* Based on contributions by +* Jeff Rutter, Computer Science Division, University of California +* at Berkeley, USA +* Modified by Francoise Tisseur, University of Tennessee. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E0, ZERO = 0.0E0 ) +* .. +* .. Local Scalars .. + INTEGER I, II, IQ2, J, N12, N2, N23 + REAL TEMP +* .. +* .. External Functions .. + REAL SLAMC3, SNRM2 + EXTERNAL SLAMC3, SNRM2 +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMM, SLACPY, SLAED4, SLASET, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( K.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.K ) THEN + INFO = -2 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLAED3', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( K.EQ.0 ) + $ RETURN +* +* Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can +* be computed with high relative accuracy (barring over/underflow). +* This is a problem on machines without a guard digit in +* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). +* The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), +* which on any of these machines zeros out the bottommost +* bit of DLAMDA(I) if it is 1; this makes the subsequent +* subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation +* occurs. On binary machines with a guard digit (almost all +* machines) it does not change DLAMDA(I) at all. On hexadecimal +* and decimal machines with a guard digit, it slightly +* changes the bottommost bits of DLAMDA(I). It does not account +* for hexadecimal or decimal machines without guard digits +* (we know of none). We use a subroutine call to compute +* 2*DLAMBDA(I) to prevent optimizing compilers from eliminating +* this code. +* + DO 10 I = 1, K + DLAMDA( I ) = SLAMC3( DLAMDA( I ), DLAMDA( I ) ) - DLAMDA( I ) + 10 CONTINUE +* + DO 20 J = 1, K + CALL SLAED4( K, J, DLAMDA, W, Q( 1, J ), RHO, D( J ), INFO ) +* +* If the zero finder fails, the computation is terminated. +* + IF( INFO.NE.0 ) + $ GO TO 120 + 20 CONTINUE +* + IF( K.EQ.1 ) + $ GO TO 110 + IF( K.EQ.2 ) THEN + DO 30 J = 1, K + W( 1 ) = Q( 1, J ) + W( 2 ) = Q( 2, J ) + II = INDX( 1 ) + Q( 1, J ) = W( II ) + II = INDX( 2 ) + Q( 2, J ) = W( II ) + 30 CONTINUE + GO TO 110 + END IF +* +* Compute updated W. +* + CALL SCOPY( K, W, 1, S, 1 ) +* +* Initialize W(I) = Q(I,I) +* + CALL SCOPY( K, Q, LDQ+1, W, 1 ) + DO 60 J = 1, K + DO 40 I = 1, J - 1 + W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) ) + 40 CONTINUE + DO 50 I = J + 1, K + W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) ) + 50 CONTINUE + 60 CONTINUE + DO 70 I = 1, K + W( I ) = SIGN( SQRT( -W( I ) ), S( I ) ) + 70 CONTINUE +* +* Compute eigenvectors of the modified rank-1 modification. +* + DO 100 J = 1, K + DO 80 I = 1, K + S( I ) = W( I ) / Q( I, J ) + 80 CONTINUE + TEMP = SNRM2( K, S, 1 ) + DO 90 I = 1, K + II = INDX( I ) + Q( I, J ) = S( II ) / TEMP + 90 CONTINUE + 100 CONTINUE +* +* Compute the updated eigenvectors. +* + 110 CONTINUE +* + N2 = N - N1 + N12 = CTOT( 1 ) + CTOT( 2 ) + N23 = CTOT( 2 ) + CTOT( 3 ) +* + CALL SLACPY( 'A', N23, K, Q( CTOT( 1 )+1, 1 ), LDQ, S, N23 ) + IQ2 = N1*N12 + 1 + IF( N23.NE.0 ) THEN + CALL SGEMM( 'N', 'N', N2, K, N23, ONE, Q2( IQ2 ), N2, S, N23, + $ ZERO, Q( N1+1, 1 ), LDQ ) + ELSE + CALL SLASET( 'A', N2, K, ZERO, ZERO, Q( N1+1, 1 ), LDQ ) + END IF +* + CALL SLACPY( 'A', N12, K, Q, LDQ, S, N12 ) + IF( N12.NE.0 ) THEN + CALL SGEMM( 'N', 'N', N1, K, N12, ONE, Q2, N1, S, N12, ZERO, Q, + $ LDQ ) + ELSE + CALL SLASET( 'A', N1, K, ZERO, ZERO, Q( 1, 1 ), LDQ ) + END IF +* +* + 120 CONTINUE + RETURN +* +* End of SLAED3 +* + END diff --git a/costa/native/external/lapack/slaed4.f b/costa/native/external/lapack/slaed4.f new file mode 100644 index 000000000..d091585e2 --- /dev/null +++ b/costa/native/external/lapack/slaed4.f @@ -0,0 +1,846 @@ + SUBROUTINE SLAED4( N, I, D, Z, DELTA, RHO, DLAM, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, +* Courant Institute, NAG Ltd., and Rice University +* December 23, 1999 +* +* .. Scalar Arguments .. + INTEGER I, INFO, N + REAL DLAM, RHO +* .. +* .. Array Arguments .. + REAL D( * ), DELTA( * ), Z( * ) +* .. +* +* Purpose +* ======= +* +* This subroutine computes the I-th updated eigenvalue of a symmetric +* rank-one modification to a diagonal matrix whose elements are +* given in the array d, and that +* +* D(i) < D(j) for i < j +* +* and that RHO > 0. This is arranged by the calling routine, and is +* no loss in generality. The rank-one modified system is thus +* +* diag( D ) + RHO * Z * Z_transpose. +* +* where we assume the Euclidean norm of Z is 1. +* +* The method consists of approximating the rational functions in the +* secular equation by simpler interpolating rational functions. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The length of all arrays. +* +* I (input) INTEGER +* The index of the eigenvalue to be computed. 1 <= I <= N. +* +* D (input) REAL array, dimension (N) +* The original eigenvalues. It is assumed that they are in +* order, D(I) < D(J) for I < J. +* +* Z (input) REAL array, dimension (N) +* The components of the updating vector. +* +* DELTA (output) REAL array, dimension (N) +* If N .ne. 1, DELTA contains (D(j) - lambda_I) in its j-th +* component. If N = 1, then DELTA(1) = 1. The vector DELTA +* contains the information necessary to construct the +* eigenvectors. +* +* RHO (input) REAL +* The scalar in the symmetric updating formula. +* +* DLAM (output) REAL +* The computed lambda_I, the I-th updated eigenvalue. +* +* INFO (output) INTEGER +* = 0: successful exit +* > 0: if INFO = 1, the updating process failed. +* +* Internal Parameters +* =================== +* +* Logical variable ORGATI (origin-at-i?) is used for distinguishing +* whether D(i) or D(i+1) is treated as the origin. +* +* ORGATI = .true. origin at i +* ORGATI = .false. origin at i+1 +* +* Logical variable SWTCH3 (switch-for-3-poles?) is for noting +* if we are working with THREE poles! +* +* MAXIT is the maximum number of iterations allowed for each +* eigenvalue. +* +* Further Details +* =============== +* +* Based on contributions by +* Ren-Cang Li, Computer Science Division, University of California +* at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + INTEGER MAXIT + PARAMETER ( MAXIT = 30 ) + REAL ZERO, ONE, TWO, THREE, FOUR, EIGHT, TEN + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0, + $ THREE = 3.0E0, FOUR = 4.0E0, EIGHT = 8.0E0, + $ TEN = 10.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL ORGATI, SWTCH, SWTCH3 + INTEGER II, IIM1, IIP1, IP1, ITER, J, NITER + REAL A, B, C, DEL, DLTLB, DLTUB, DPHI, DPSI, DW, + $ EPS, ERRETM, ETA, MIDPT, PHI, PREW, PSI, + $ RHOINV, TAU, TEMP, TEMP1, W +* .. +* .. Local Arrays .. + REAL ZZ( 3 ) +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SLAED5, SLAED6 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Since this routine is called in an inner loop, we do no argument +* checking. +* +* Quick return for N=1 and 2. +* + INFO = 0 + IF( N.EQ.1 ) THEN +* +* Presumably, I=1 upon entry +* + DLAM = D( 1 ) + RHO*Z( 1 )*Z( 1 ) + DELTA( 1 ) = ONE + RETURN + END IF + IF( N.EQ.2 ) THEN + CALL SLAED5( I, D, Z, DELTA, RHO, DLAM ) + RETURN + END IF +* +* Compute machine epsilon +* + EPS = SLAMCH( 'Epsilon' ) + RHOINV = ONE / RHO +* +* The case I = N +* + IF( I.EQ.N ) THEN +* +* Initialize some basic variables +* + II = N - 1 + NITER = 1 +* +* Calculate initial guess +* + MIDPT = RHO / TWO +* +* If ||Z||_2 is not one, then TEMP should be set to +* RHO * ||Z||_2^2 / TWO +* + DO 10 J = 1, N + DELTA( J ) = ( D( J )-D( I ) ) - MIDPT + 10 CONTINUE +* + PSI = ZERO + DO 20 J = 1, N - 2 + PSI = PSI + Z( J )*Z( J ) / DELTA( J ) + 20 CONTINUE +* + C = RHOINV + PSI + W = C + Z( II )*Z( II ) / DELTA( II ) + + $ Z( N )*Z( N ) / DELTA( N ) +* + IF( W.LE.ZERO ) THEN + TEMP = Z( N-1 )*Z( N-1 ) / ( D( N )-D( N-1 )+RHO ) + + $ Z( N )*Z( N ) / RHO + IF( C.LE.TEMP ) THEN + TAU = RHO + ELSE + DEL = D( N ) - D( N-1 ) + A = -C*DEL + Z( N-1 )*Z( N-1 ) + Z( N )*Z( N ) + B = Z( N )*Z( N )*DEL + IF( A.LT.ZERO ) THEN + TAU = TWO*B / ( SQRT( A*A+FOUR*B*C )-A ) + ELSE + TAU = ( A+SQRT( A*A+FOUR*B*C ) ) / ( TWO*C ) + END IF + END IF +* +* It can be proved that +* D(N)+RHO/2 <= LAMBDA(N) < D(N)+TAU <= D(N)+RHO +* + DLTLB = MIDPT + DLTUB = RHO + ELSE + DEL = D( N ) - D( N-1 ) + A = -C*DEL + Z( N-1 )*Z( N-1 ) + Z( N )*Z( N ) + B = Z( N )*Z( N )*DEL + IF( A.LT.ZERO ) THEN + TAU = TWO*B / ( SQRT( A*A+FOUR*B*C )-A ) + ELSE + TAU = ( A+SQRT( A*A+FOUR*B*C ) ) / ( TWO*C ) + END IF +* +* It can be proved that +* D(N) < D(N)+TAU < LAMBDA(N) < D(N)+RHO/2 +* + DLTLB = ZERO + DLTUB = MIDPT + END IF +* + DO 30 J = 1, N + DELTA( J ) = ( D( J )-D( I ) ) - TAU + 30 CONTINUE +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 40 J = 1, II + TEMP = Z( J ) / DELTA( J ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 40 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + TEMP = Z( N ) / DELTA( N ) + PHI = Z( N )*TEMP + DPHI = TEMP*TEMP + ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV + + $ ABS( TAU )*( DPSI+DPHI ) +* + W = RHOINV + PHI + PSI +* +* Test for convergence +* + IF( ABS( W ).LE.EPS*ERRETM ) THEN + DLAM = D( I ) + TAU + GO TO 250 + END IF +* + IF( W.LE.ZERO ) THEN + DLTLB = MAX( DLTLB, TAU ) + ELSE + DLTUB = MIN( DLTUB, TAU ) + END IF +* +* Calculate the new step +* + NITER = NITER + 1 + C = W - DELTA( N-1 )*DPSI - DELTA( N )*DPHI + A = ( DELTA( N-1 )+DELTA( N ) )*W - + $ DELTA( N-1 )*DELTA( N )*( DPSI+DPHI ) + B = DELTA( N-1 )*DELTA( N )*W + IF( C.LT.ZERO ) + $ C = ABS( C ) + IF( C.EQ.ZERO ) THEN +* ETA = B/A +* ETA = RHO - TAU + ETA = DLTUB - TAU + ELSE IF( A.GE.ZERO ) THEN + ETA = ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF +* +* Note, eta should be positive if w is negative, and +* eta should be negative otherwise. However, +* if for some reason caused by roundoff, eta*w > 0, +* we simply use one Newton step instead. This way +* will guarantee eta*w < 0. +* + IF( W*ETA.GT.ZERO ) + $ ETA = -W / ( DPSI+DPHI ) + TEMP = TAU + ETA + IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN + IF( W.LT.ZERO ) THEN + ETA = ( DLTUB-TAU ) / TWO + ELSE + ETA = ( DLTLB-TAU ) / TWO + END IF + END IF + DO 50 J = 1, N + DELTA( J ) = DELTA( J ) - ETA + 50 CONTINUE +* + TAU = TAU + ETA +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 60 J = 1, II + TEMP = Z( J ) / DELTA( J ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 60 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + TEMP = Z( N ) / DELTA( N ) + PHI = Z( N )*TEMP + DPHI = TEMP*TEMP + ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV + + $ ABS( TAU )*( DPSI+DPHI ) +* + W = RHOINV + PHI + PSI +* +* Main loop to update the values of the array DELTA +* + ITER = NITER + 1 +* + DO 90 NITER = ITER, MAXIT +* +* Test for convergence +* + IF( ABS( W ).LE.EPS*ERRETM ) THEN + DLAM = D( I ) + TAU + GO TO 250 + END IF +* + IF( W.LE.ZERO ) THEN + DLTLB = MAX( DLTLB, TAU ) + ELSE + DLTUB = MIN( DLTUB, TAU ) + END IF +* +* Calculate the new step +* + C = W - DELTA( N-1 )*DPSI - DELTA( N )*DPHI + A = ( DELTA( N-1 )+DELTA( N ) )*W - + $ DELTA( N-1 )*DELTA( N )*( DPSI+DPHI ) + B = DELTA( N-1 )*DELTA( N )*W + IF( A.GE.ZERO ) THEN + ETA = ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF +* +* Note, eta should be positive if w is negative, and +* eta should be negative otherwise. However, +* if for some reason caused by roundoff, eta*w > 0, +* we simply use one Newton step instead. This way +* will guarantee eta*w < 0. +* + IF( W*ETA.GT.ZERO ) + $ ETA = -W / ( DPSI+DPHI ) + TEMP = TAU + ETA + IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN + IF( W.LT.ZERO ) THEN + ETA = ( DLTUB-TAU ) / TWO + ELSE + ETA = ( DLTLB-TAU ) / TWO + END IF + END IF + DO 70 J = 1, N + DELTA( J ) = DELTA( J ) - ETA + 70 CONTINUE +* + TAU = TAU + ETA +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 80 J = 1, II + TEMP = Z( J ) / DELTA( J ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 80 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + TEMP = Z( N ) / DELTA( N ) + PHI = Z( N )*TEMP + DPHI = TEMP*TEMP + ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV + + $ ABS( TAU )*( DPSI+DPHI ) +* + W = RHOINV + PHI + PSI + 90 CONTINUE +* +* Return with INFO = 1, NITER = MAXIT and not converged +* + INFO = 1 + DLAM = D( I ) + TAU + GO TO 250 +* +* End for the case I = N +* + ELSE +* +* The case for I < N +* + NITER = 1 + IP1 = I + 1 +* +* Calculate initial guess +* + DEL = D( IP1 ) - D( I ) + MIDPT = DEL / TWO + DO 100 J = 1, N + DELTA( J ) = ( D( J )-D( I ) ) - MIDPT + 100 CONTINUE +* + PSI = ZERO + DO 110 J = 1, I - 1 + PSI = PSI + Z( J )*Z( J ) / DELTA( J ) + 110 CONTINUE +* + PHI = ZERO + DO 120 J = N, I + 2, -1 + PHI = PHI + Z( J )*Z( J ) / DELTA( J ) + 120 CONTINUE + C = RHOINV + PSI + PHI + W = C + Z( I )*Z( I ) / DELTA( I ) + + $ Z( IP1 )*Z( IP1 ) / DELTA( IP1 ) +* + IF( W.GT.ZERO ) THEN +* +* d(i)< the ith eigenvalue < (d(i)+d(i+1))/2 +* +* We choose d(i) as origin. +* + ORGATI = .TRUE. + A = C*DEL + Z( I )*Z( I ) + Z( IP1 )*Z( IP1 ) + B = Z( I )*Z( I )*DEL + IF( A.GT.ZERO ) THEN + TAU = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + ELSE + TAU = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + END IF + DLTLB = ZERO + DLTUB = MIDPT + ELSE +* +* (d(i)+d(i+1))/2 <= the ith eigenvalue < d(i+1) +* +* We choose d(i+1) as origin. +* + ORGATI = .FALSE. + A = C*DEL - Z( I )*Z( I ) - Z( IP1 )*Z( IP1 ) + B = Z( IP1 )*Z( IP1 )*DEL + IF( A.LT.ZERO ) THEN + TAU = TWO*B / ( A-SQRT( ABS( A*A+FOUR*B*C ) ) ) + ELSE + TAU = -( A+SQRT( ABS( A*A+FOUR*B*C ) ) ) / ( TWO*C ) + END IF + DLTLB = -MIDPT + DLTUB = ZERO + END IF +* + IF( ORGATI ) THEN + DO 130 J = 1, N + DELTA( J ) = ( D( J )-D( I ) ) - TAU + 130 CONTINUE + ELSE + DO 140 J = 1, N + DELTA( J ) = ( D( J )-D( IP1 ) ) - TAU + 140 CONTINUE + END IF + IF( ORGATI ) THEN + II = I + ELSE + II = I + 1 + END IF + IIM1 = II - 1 + IIP1 = II + 1 +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 150 J = 1, IIM1 + TEMP = Z( J ) / DELTA( J ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 150 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + DPHI = ZERO + PHI = ZERO + DO 160 J = N, IIP1, -1 + TEMP = Z( J ) / DELTA( J ) + PHI = PHI + Z( J )*TEMP + DPHI = DPHI + TEMP*TEMP + ERRETM = ERRETM + PHI + 160 CONTINUE +* + W = RHOINV + PHI + PSI +* +* W is the value of the secular function with +* its ii-th element removed. +* + SWTCH3 = .FALSE. + IF( ORGATI ) THEN + IF( W.LT.ZERO ) + $ SWTCH3 = .TRUE. + ELSE + IF( W.GT.ZERO ) + $ SWTCH3 = .TRUE. + END IF + IF( II.EQ.1 .OR. II.EQ.N ) + $ SWTCH3 = .FALSE. +* + TEMP = Z( II ) / DELTA( II ) + DW = DPSI + DPHI + TEMP*TEMP + TEMP = Z( II )*TEMP + W = W + TEMP + ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV + + $ THREE*ABS( TEMP ) + ABS( TAU )*DW +* +* Test for convergence +* + IF( ABS( W ).LE.EPS*ERRETM ) THEN + IF( ORGATI ) THEN + DLAM = D( I ) + TAU + ELSE + DLAM = D( IP1 ) + TAU + END IF + GO TO 250 + END IF +* + IF( W.LE.ZERO ) THEN + DLTLB = MAX( DLTLB, TAU ) + ELSE + DLTUB = MIN( DLTUB, TAU ) + END IF +* +* Calculate the new step +* + NITER = NITER + 1 + IF( .NOT.SWTCH3 ) THEN + IF( ORGATI ) THEN + C = W - DELTA( IP1 )*DW - ( D( I )-D( IP1 ) )* + $ ( Z( I ) / DELTA( I ) )**2 + ELSE + C = W - DELTA( I )*DW - ( D( IP1 )-D( I ) )* + $ ( Z( IP1 ) / DELTA( IP1 ) )**2 + END IF + A = ( DELTA( I )+DELTA( IP1 ) )*W - + $ DELTA( I )*DELTA( IP1 )*DW + B = DELTA( I )*DELTA( IP1 )*W + IF( C.EQ.ZERO ) THEN + IF( A.EQ.ZERO ) THEN + IF( ORGATI ) THEN + A = Z( I )*Z( I ) + DELTA( IP1 )*DELTA( IP1 )* + $ ( DPSI+DPHI ) + ELSE + A = Z( IP1 )*Z( IP1 ) + DELTA( I )*DELTA( I )* + $ ( DPSI+DPHI ) + END IF + END IF + ETA = B / A + ELSE IF( A.LE.ZERO ) THEN + ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF + ELSE +* +* Interpolation using THREE most relevant poles +* + TEMP = RHOINV + PSI + PHI + IF( ORGATI ) THEN + TEMP1 = Z( IIM1 ) / DELTA( IIM1 ) + TEMP1 = TEMP1*TEMP1 + C = TEMP - DELTA( IIP1 )*( DPSI+DPHI ) - + $ ( D( IIM1 )-D( IIP1 ) )*TEMP1 + ZZ( 1 ) = Z( IIM1 )*Z( IIM1 ) + ZZ( 3 ) = DELTA( IIP1 )*DELTA( IIP1 )* + $ ( ( DPSI-TEMP1 )+DPHI ) + ELSE + TEMP1 = Z( IIP1 ) / DELTA( IIP1 ) + TEMP1 = TEMP1*TEMP1 + C = TEMP - DELTA( IIM1 )*( DPSI+DPHI ) - + $ ( D( IIP1 )-D( IIM1 ) )*TEMP1 + ZZ( 1 ) = DELTA( IIM1 )*DELTA( IIM1 )* + $ ( DPSI+( DPHI-TEMP1 ) ) + ZZ( 3 ) = Z( IIP1 )*Z( IIP1 ) + END IF + ZZ( 2 ) = Z( II )*Z( II ) + CALL SLAED6( NITER, ORGATI, C, DELTA( IIM1 ), ZZ, W, ETA, + $ INFO ) + IF( INFO.NE.0 ) + $ GO TO 250 + END IF +* +* Note, eta should be positive if w is negative, and +* eta should be negative otherwise. However, +* if for some reason caused by roundoff, eta*w > 0, +* we simply use one Newton step instead. This way +* will guarantee eta*w < 0. +* + IF( W*ETA.GE.ZERO ) + $ ETA = -W / DW + TEMP = TAU + ETA + IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN + IF( W.LT.ZERO ) THEN + ETA = ( DLTUB-TAU ) / TWO + ELSE + ETA = ( DLTLB-TAU ) / TWO + END IF + END IF +* + PREW = W +* + 170 CONTINUE + DO 180 J = 1, N + DELTA( J ) = DELTA( J ) - ETA + 180 CONTINUE +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 190 J = 1, IIM1 + TEMP = Z( J ) / DELTA( J ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 190 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + DPHI = ZERO + PHI = ZERO + DO 200 J = N, IIP1, -1 + TEMP = Z( J ) / DELTA( J ) + PHI = PHI + Z( J )*TEMP + DPHI = DPHI + TEMP*TEMP + ERRETM = ERRETM + PHI + 200 CONTINUE +* + TEMP = Z( II ) / DELTA( II ) + DW = DPSI + DPHI + TEMP*TEMP + TEMP = Z( II )*TEMP + W = RHOINV + PHI + PSI + TEMP + ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV + + $ THREE*ABS( TEMP ) + ABS( TAU+ETA )*DW +* + SWTCH = .FALSE. + IF( ORGATI ) THEN + IF( -W.GT.ABS( PREW ) / TEN ) + $ SWTCH = .TRUE. + ELSE + IF( W.GT.ABS( PREW ) / TEN ) + $ SWTCH = .TRUE. + END IF +* + TAU = TAU + ETA +* +* Main loop to update the values of the array DELTA +* + ITER = NITER + 1 +* + DO 240 NITER = ITER, MAXIT +* +* Test for convergence +* + IF( ABS( W ).LE.EPS*ERRETM ) THEN + IF( ORGATI ) THEN + DLAM = D( I ) + TAU + ELSE + DLAM = D( IP1 ) + TAU + END IF + GO TO 250 + END IF +* + IF( W.LE.ZERO ) THEN + DLTLB = MAX( DLTLB, TAU ) + ELSE + DLTUB = MIN( DLTUB, TAU ) + END IF +* +* Calculate the new step +* + IF( .NOT.SWTCH3 ) THEN + IF( .NOT.SWTCH ) THEN + IF( ORGATI ) THEN + C = W - DELTA( IP1 )*DW - + $ ( D( I )-D( IP1 ) )*( Z( I ) / DELTA( I ) )**2 + ELSE + C = W - DELTA( I )*DW - ( D( IP1 )-D( I ) )* + $ ( Z( IP1 ) / DELTA( IP1 ) )**2 + END IF + ELSE + TEMP = Z( II ) / DELTA( II ) + IF( ORGATI ) THEN + DPSI = DPSI + TEMP*TEMP + ELSE + DPHI = DPHI + TEMP*TEMP + END IF + C = W - DELTA( I )*DPSI - DELTA( IP1 )*DPHI + END IF + A = ( DELTA( I )+DELTA( IP1 ) )*W - + $ DELTA( I )*DELTA( IP1 )*DW + B = DELTA( I )*DELTA( IP1 )*W + IF( C.EQ.ZERO ) THEN + IF( A.EQ.ZERO ) THEN + IF( .NOT.SWTCH ) THEN + IF( ORGATI ) THEN + A = Z( I )*Z( I ) + DELTA( IP1 )* + $ DELTA( IP1 )*( DPSI+DPHI ) + ELSE + A = Z( IP1 )*Z( IP1 ) + + $ DELTA( I )*DELTA( I )*( DPSI+DPHI ) + END IF + ELSE + A = DELTA( I )*DELTA( I )*DPSI + + $ DELTA( IP1 )*DELTA( IP1 )*DPHI + END IF + END IF + ETA = B / A + ELSE IF( A.LE.ZERO ) THEN + ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF + ELSE +* +* Interpolation using THREE most relevant poles +* + TEMP = RHOINV + PSI + PHI + IF( SWTCH ) THEN + C = TEMP - DELTA( IIM1 )*DPSI - DELTA( IIP1 )*DPHI + ZZ( 1 ) = DELTA( IIM1 )*DELTA( IIM1 )*DPSI + ZZ( 3 ) = DELTA( IIP1 )*DELTA( IIP1 )*DPHI + ELSE + IF( ORGATI ) THEN + TEMP1 = Z( IIM1 ) / DELTA( IIM1 ) + TEMP1 = TEMP1*TEMP1 + C = TEMP - DELTA( IIP1 )*( DPSI+DPHI ) - + $ ( D( IIM1 )-D( IIP1 ) )*TEMP1 + ZZ( 1 ) = Z( IIM1 )*Z( IIM1 ) + ZZ( 3 ) = DELTA( IIP1 )*DELTA( IIP1 )* + $ ( ( DPSI-TEMP1 )+DPHI ) + ELSE + TEMP1 = Z( IIP1 ) / DELTA( IIP1 ) + TEMP1 = TEMP1*TEMP1 + C = TEMP - DELTA( IIM1 )*( DPSI+DPHI ) - + $ ( D( IIP1 )-D( IIM1 ) )*TEMP1 + ZZ( 1 ) = DELTA( IIM1 )*DELTA( IIM1 )* + $ ( DPSI+( DPHI-TEMP1 ) ) + ZZ( 3 ) = Z( IIP1 )*Z( IIP1 ) + END IF + END IF + CALL SLAED6( NITER, ORGATI, C, DELTA( IIM1 ), ZZ, W, ETA, + $ INFO ) + IF( INFO.NE.0 ) + $ GO TO 250 + END IF +* +* Note, eta should be positive if w is negative, and +* eta should be negative otherwise. However, +* if for some reason caused by roundoff, eta*w > 0, +* we simply use one Newton step instead. This way +* will guarantee eta*w < 0. +* + IF( W*ETA.GE.ZERO ) + $ ETA = -W / DW + TEMP = TAU + ETA + IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN + IF( W.LT.ZERO ) THEN + ETA = ( DLTUB-TAU ) / TWO + ELSE + ETA = ( DLTLB-TAU ) / TWO + END IF + END IF +* + DO 210 J = 1, N + DELTA( J ) = DELTA( J ) - ETA + 210 CONTINUE +* + TAU = TAU + ETA + PREW = W +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 220 J = 1, IIM1 + TEMP = Z( J ) / DELTA( J ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 220 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + DPHI = ZERO + PHI = ZERO + DO 230 J = N, IIP1, -1 + TEMP = Z( J ) / DELTA( J ) + PHI = PHI + Z( J )*TEMP + DPHI = DPHI + TEMP*TEMP + ERRETM = ERRETM + PHI + 230 CONTINUE +* + TEMP = Z( II ) / DELTA( II ) + DW = DPSI + DPHI + TEMP*TEMP + TEMP = Z( II )*TEMP + W = RHOINV + PHI + PSI + TEMP + ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV + + $ THREE*ABS( TEMP ) + ABS( TAU )*DW + IF( W*PREW.GT.ZERO .AND. ABS( W ).GT.ABS( PREW ) / TEN ) + $ SWTCH = .NOT.SWTCH +* + 240 CONTINUE +* +* Return with INFO = 1, NITER = MAXIT and not converged +* + INFO = 1 + IF( ORGATI ) THEN + DLAM = D( I ) + TAU + ELSE + DLAM = D( IP1 ) + TAU + END IF +* + END IF +* + 250 CONTINUE +* + RETURN +* +* End of SLAED4 +* + END diff --git a/costa/native/external/lapack/slaed5.f b/costa/native/external/lapack/slaed5.f new file mode 100644 index 000000000..b92934132 --- /dev/null +++ b/costa/native/external/lapack/slaed5.f @@ -0,0 +1,125 @@ + SUBROUTINE SLAED5( I, D, Z, DELTA, RHO, DLAM ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, +* Courant Institute, NAG Ltd., and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER I + REAL DLAM, RHO +* .. +* .. Array Arguments .. + REAL D( 2 ), DELTA( 2 ), Z( 2 ) +* .. +* +* Purpose +* ======= +* +* This subroutine computes the I-th eigenvalue of a symmetric rank-one +* modification of a 2-by-2 diagonal matrix +* +* diag( D ) + RHO * Z * transpose(Z) . +* +* The diagonal elements in the array D are assumed to satisfy +* +* D(i) < D(j) for i < j . +* +* We also assume RHO > 0 and that the Euclidean norm of the vector +* Z is one. +* +* Arguments +* ========= +* +* I (input) INTEGER +* The index of the eigenvalue to be computed. I = 1 or I = 2. +* +* D (input) REAL array, dimension (2) +* The original eigenvalues. We assume D(1) < D(2). +* +* Z (input) REAL array, dimension (2) +* The components of the updating vector. +* +* DELTA (output) REAL array, dimension (2) +* The vector DELTA contains the information necessary +* to construct the eigenvectors. +* +* RHO (input) REAL +* The scalar in the symmetric updating formula. +* +* DLAM (output) REAL +* The computed lambda_I, the I-th updated eigenvalue. +* +* Further Details +* =============== +* +* Based on contributions by +* Ren-Cang Li, Computer Science Division, University of California +* at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO, FOUR + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0, + $ FOUR = 4.0E0 ) +* .. +* .. Local Scalars .. + REAL B, C, DEL, TAU, TEMP, W +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* + DEL = D( 2 ) - D( 1 ) + IF( I.EQ.1 ) THEN + W = ONE + TWO*RHO*( Z( 2 )*Z( 2 )-Z( 1 )*Z( 1 ) ) / DEL + IF( W.GT.ZERO ) THEN + B = DEL + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) ) + C = RHO*Z( 1 )*Z( 1 )*DEL +* +* B > ZERO, always +* + TAU = TWO*C / ( B+SQRT( ABS( B*B-FOUR*C ) ) ) + DLAM = D( 1 ) + TAU + DELTA( 1 ) = -Z( 1 ) / TAU + DELTA( 2 ) = Z( 2 ) / ( DEL-TAU ) + ELSE + B = -DEL + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) ) + C = RHO*Z( 2 )*Z( 2 )*DEL + IF( B.GT.ZERO ) THEN + TAU = -TWO*C / ( B+SQRT( B*B+FOUR*C ) ) + ELSE + TAU = ( B-SQRT( B*B+FOUR*C ) ) / TWO + END IF + DLAM = D( 2 ) + TAU + DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU ) + DELTA( 2 ) = -Z( 2 ) / TAU + END IF + TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) ) + DELTA( 1 ) = DELTA( 1 ) / TEMP + DELTA( 2 ) = DELTA( 2 ) / TEMP + ELSE +* +* Now I=2 +* + B = -DEL + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) ) + C = RHO*Z( 2 )*Z( 2 )*DEL + IF( B.GT.ZERO ) THEN + TAU = ( B+SQRT( B*B+FOUR*C ) ) / TWO + ELSE + TAU = TWO*C / ( -B+SQRT( B*B+FOUR*C ) ) + END IF + DLAM = D( 2 ) + TAU + DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU ) + DELTA( 2 ) = -Z( 2 ) / TAU + TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) ) + DELTA( 1 ) = DELTA( 1 ) / TEMP + DELTA( 2 ) = DELTA( 2 ) / TEMP + END IF + RETURN +* +* End OF SLAED5 +* + END diff --git a/costa/native/external/lapack/slaed6.f b/costa/native/external/lapack/slaed6.f new file mode 100644 index 000000000..d3ec92450 --- /dev/null +++ b/costa/native/external/lapack/slaed6.f @@ -0,0 +1,299 @@ + SUBROUTINE SLAED6( KNITER, ORGATI, RHO, D, Z, FINIT, TAU, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, +* Courant Institute, NAG Ltd., and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + LOGICAL ORGATI + INTEGER INFO, KNITER + REAL FINIT, RHO, TAU +* .. +* .. Array Arguments .. + REAL D( 3 ), Z( 3 ) +* .. +* +* Purpose +* ======= +* +* SLAED6 computes the positive or negative root (closest to the origin) +* of +* z(1) z(2) z(3) +* f(x) = rho + --------- + ---------- + --------- +* d(1)-x d(2)-x d(3)-x +* +* It is assumed that +* +* if ORGATI = .true. the root is between d(2) and d(3); +* otherwise it is between d(1) and d(2) +* +* This routine will be called by SLAED4 when necessary. In most cases, +* the root sought is the smallest in magnitude, though it might not be +* in some extremely rare situations. +* +* Arguments +* ========= +* +* KNITER (input) INTEGER +* Refer to SLAED4 for its significance. +* +* ORGATI (input) LOGICAL +* If ORGATI is true, the needed root is between d(2) and +* d(3); otherwise it is between d(1) and d(2). See +* SLAED4 for further details. +* +* RHO (input) REAL +* Refer to the equation f(x) above. +* +* D (input) REAL array, dimension (3) +* D satisfies d(1) < d(2) < d(3). +* +* Z (input) REAL array, dimension (3) +* Each of the elements in z must be positive. +* +* FINIT (input) REAL +* The value of f at 0. It is more accurate than the one +* evaluated inside this routine (if someone wants to do +* so). +* +* TAU (output) REAL +* The root of the equation f(x). +* +* INFO (output) INTEGER +* = 0: successful exit +* > 0: if INFO = 1, failure to converge +* +* Further Details +* =============== +* +* Based on contributions by +* Ren-Cang Li, Computer Science Division, University of California +* at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + INTEGER MAXIT + PARAMETER ( MAXIT = 20 ) + REAL ZERO, ONE, TWO, THREE, FOUR, EIGHT + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0, + $ THREE = 3.0E0, FOUR = 4.0E0, EIGHT = 8.0E0 ) +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Local Arrays .. + REAL DSCALE( 3 ), ZSCALE( 3 ) +* .. +* .. Local Scalars .. + LOGICAL FIRST, SCALE + INTEGER I, ITER, NITER + REAL A, B, BASE, C, DDF, DF, EPS, ERRETM, ETA, F, + $ FC, SCLFAC, SCLINV, SMALL1, SMALL2, SMINV1, + $ SMINV2, TEMP, TEMP1, TEMP2, TEMP3, TEMP4 +* .. +* .. Save statement .. + SAVE FIRST, SMALL1, SMINV1, SMALL2, SMINV2, EPS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, LOG, MAX, MIN, SQRT +* .. +* .. Data statements .. + DATA FIRST / .TRUE. / +* .. +* .. Executable Statements .. +* + INFO = 0 +* + NITER = 1 + TAU = ZERO + IF( KNITER.EQ.2 ) THEN + IF( ORGATI ) THEN + TEMP = ( D( 3 )-D( 2 ) ) / TWO + C = RHO + Z( 1 ) / ( ( D( 1 )-D( 2 ) )-TEMP ) + A = C*( D( 2 )+D( 3 ) ) + Z( 2 ) + Z( 3 ) + B = C*D( 2 )*D( 3 ) + Z( 2 )*D( 3 ) + Z( 3 )*D( 2 ) + ELSE + TEMP = ( D( 1 )-D( 2 ) ) / TWO + C = RHO + Z( 3 ) / ( ( D( 3 )-D( 2 ) )-TEMP ) + A = C*( D( 1 )+D( 2 ) ) + Z( 1 ) + Z( 2 ) + B = C*D( 1 )*D( 2 ) + Z( 1 )*D( 2 ) + Z( 2 )*D( 1 ) + END IF + TEMP = MAX( ABS( A ), ABS( B ), ABS( C ) ) + A = A / TEMP + B = B / TEMP + C = C / TEMP + IF( C.EQ.ZERO ) THEN + TAU = B / A + ELSE IF( A.LE.ZERO ) THEN + TAU = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + TAU = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF + TEMP = RHO + Z( 1 ) / ( D( 1 )-TAU ) + + $ Z( 2 ) / ( D( 2 )-TAU ) + Z( 3 ) / ( D( 3 )-TAU ) + IF( ABS( FINIT ).LE.ABS( TEMP ) ) + $ TAU = ZERO + END IF +* +* On first call to routine, get machine parameters for +* possible scaling to avoid overflow +* + IF( FIRST ) THEN + EPS = SLAMCH( 'Epsilon' ) + BASE = SLAMCH( 'Base' ) + SMALL1 = BASE**( INT( LOG( SLAMCH( 'SafMin' ) ) / LOG( BASE ) / + $ THREE ) ) + SMINV1 = ONE / SMALL1 + SMALL2 = SMALL1*SMALL1 + SMINV2 = SMINV1*SMINV1 + FIRST = .FALSE. + END IF +* +* Determine if scaling of inputs necessary to avoid overflow +* when computing 1/TEMP**3 +* + IF( ORGATI ) THEN + TEMP = MIN( ABS( D( 2 )-TAU ), ABS( D( 3 )-TAU ) ) + ELSE + TEMP = MIN( ABS( D( 1 )-TAU ), ABS( D( 2 )-TAU ) ) + END IF + SCALE = .FALSE. + IF( TEMP.LE.SMALL1 ) THEN + SCALE = .TRUE. + IF( TEMP.LE.SMALL2 ) THEN +* +* Scale up by power of radix nearest 1/SAFMIN**(2/3) +* + SCLFAC = SMINV2 + SCLINV = SMALL2 + ELSE +* +* Scale up by power of radix nearest 1/SAFMIN**(1/3) +* + SCLFAC = SMINV1 + SCLINV = SMALL1 + END IF +* +* Scaling up safe because D, Z, TAU scaled elsewhere to be O(1) +* + DO 10 I = 1, 3 + DSCALE( I ) = D( I )*SCLFAC + ZSCALE( I ) = Z( I )*SCLFAC + 10 CONTINUE + TAU = TAU*SCLFAC + ELSE +* +* Copy D and Z to DSCALE and ZSCALE +* + DO 20 I = 1, 3 + DSCALE( I ) = D( I ) + ZSCALE( I ) = Z( I ) + 20 CONTINUE + END IF +* + FC = ZERO + DF = ZERO + DDF = ZERO + DO 30 I = 1, 3 + TEMP = ONE / ( DSCALE( I )-TAU ) + TEMP1 = ZSCALE( I )*TEMP + TEMP2 = TEMP1*TEMP + TEMP3 = TEMP2*TEMP + FC = FC + TEMP1 / DSCALE( I ) + DF = DF + TEMP2 + DDF = DDF + TEMP3 + 30 CONTINUE + F = FINIT + TAU*FC +* + IF( ABS( F ).LE.ZERO ) + $ GO TO 60 +* +* Iteration begins +* +* It is not hard to see that +* +* 1) Iterations will go up monotonically +* if FINIT < 0; +* +* 2) Iterations will go down monotonically +* if FINIT > 0. +* + ITER = NITER + 1 +* + DO 50 NITER = ITER, MAXIT +* + IF( ORGATI ) THEN + TEMP1 = DSCALE( 2 ) - TAU + TEMP2 = DSCALE( 3 ) - TAU + ELSE + TEMP1 = DSCALE( 1 ) - TAU + TEMP2 = DSCALE( 2 ) - TAU + END IF + A = ( TEMP1+TEMP2 )*F - TEMP1*TEMP2*DF + B = TEMP1*TEMP2*F + C = F - ( TEMP1+TEMP2 )*DF + TEMP1*TEMP2*DDF + TEMP = MAX( ABS( A ), ABS( B ), ABS( C ) ) + A = A / TEMP + B = B / TEMP + C = C / TEMP + IF( C.EQ.ZERO ) THEN + ETA = B / A + ELSE IF( A.LE.ZERO ) THEN + ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF + IF( F*ETA.GE.ZERO ) THEN + ETA = -F / DF + END IF +* + TEMP = ETA + TAU + IF( ORGATI ) THEN + IF( ETA.GT.ZERO .AND. TEMP.GE.DSCALE( 3 ) ) + $ ETA = ( DSCALE( 3 )-TAU ) / TWO + IF( ETA.LT.ZERO .AND. TEMP.LE.DSCALE( 2 ) ) + $ ETA = ( DSCALE( 2 )-TAU ) / TWO + ELSE + IF( ETA.GT.ZERO .AND. TEMP.GE.DSCALE( 2 ) ) + $ ETA = ( DSCALE( 2 )-TAU ) / TWO + IF( ETA.LT.ZERO .AND. TEMP.LE.DSCALE( 1 ) ) + $ ETA = ( DSCALE( 1 )-TAU ) / TWO + END IF + TAU = TAU + ETA +* + FC = ZERO + ERRETM = ZERO + DF = ZERO + DDF = ZERO + DO 40 I = 1, 3 + TEMP = ONE / ( DSCALE( I )-TAU ) + TEMP1 = ZSCALE( I )*TEMP + TEMP2 = TEMP1*TEMP + TEMP3 = TEMP2*TEMP + TEMP4 = TEMP1 / DSCALE( I ) + FC = FC + TEMP4 + ERRETM = ERRETM + ABS( TEMP4 ) + DF = DF + TEMP2 + DDF = DDF + TEMP3 + 40 CONTINUE + F = FINIT + TAU*FC + ERRETM = EIGHT*( ABS( FINIT )+ABS( TAU )*ERRETM ) + + $ ABS( TAU )*DF + IF( ABS( F ).LE.EPS*ERRETM ) + $ GO TO 60 + 50 CONTINUE + INFO = 1 + 60 CONTINUE +* +* Undo scaling +* + IF( SCALE ) + $ TAU = TAU*SCLINV + RETURN +* +* End of SLAED6 +* + END diff --git a/costa/native/external/lapack/slaed7.f b/costa/native/external/lapack/slaed7.f new file mode 100644 index 000000000..5678348f1 --- /dev/null +++ b/costa/native/external/lapack/slaed7.f @@ -0,0 +1,288 @@ + SUBROUTINE SLAED7( ICOMPQ, N, QSIZ, TLVLS, CURLVL, CURPBM, D, Q, + $ LDQ, INDXQ, RHO, CUTPNT, QSTORE, QPTR, PRMPTR, + $ PERM, GIVPTR, GIVCOL, GIVNUM, WORK, IWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER CURLVL, CURPBM, CUTPNT, ICOMPQ, INFO, LDQ, N, + $ QSIZ, TLVLS + REAL RHO +* .. +* .. Array Arguments .. + INTEGER GIVCOL( 2, * ), GIVPTR( * ), INDXQ( * ), + $ IWORK( * ), PERM( * ), PRMPTR( * ), QPTR( * ) + REAL D( * ), GIVNUM( 2, * ), Q( LDQ, * ), + $ QSTORE( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SLAED7 computes the updated eigensystem of a diagonal +* matrix after modification by a rank-one symmetric matrix. This +* routine is used only for the eigenproblem which requires all +* eigenvalues and optionally eigenvectors of a dense symmetric matrix +* that has been reduced to tridiagonal form. SLAED1 handles +* the case in which all eigenvalues and eigenvectors of a symmetric +* tridiagonal matrix are desired. +* +* T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out) +* +* where Z = Q'u, u is a vector of length N with ones in the +* CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. +* +* The eigenvectors of the original matrix are stored in Q, and the +* eigenvalues are in D. The algorithm consists of three stages: +* +* The first stage consists of deflating the size of the problem +* when there are multiple eigenvalues or if there is a zero in +* the Z vector. For each such occurence the dimension of the +* secular equation problem is reduced by one. This stage is +* performed by the routine SLAED8. +* +* The second stage consists of calculating the updated +* eigenvalues. This is done by finding the roots of the secular +* equation via the routine SLAED4 (as called by SLAED9). +* This routine also calculates the eigenvectors of the current +* problem. +* +* The final stage consists of computing the updated eigenvectors +* directly using the updated eigenvalues. The eigenvectors for +* the current problem are multiplied with the eigenvectors from +* the overall problem. +* +* Arguments +* ========= +* +* ICOMPQ (input) INTEGER +* = 0: Compute eigenvalues only. +* = 1: Compute eigenvectors of original dense symmetric matrix +* also. On entry, Q contains the orthogonal matrix used +* to reduce the original matrix to tridiagonal form. +* +* N (input) INTEGER +* The dimension of the symmetric tridiagonal matrix. N >= 0. +* +* QSIZ (input) INTEGER +* The dimension of the orthogonal matrix used to reduce +* the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. +* +* TLVLS (input) INTEGER +* The total number of merging levels in the overall divide and +* conquer tree. +* +* CURLVL (input) INTEGER +* The current level in the overall merge routine, +* 0 <= CURLVL <= TLVLS. +* +* CURPBM (input) INTEGER +* The current problem in the current level in the overall +* merge routine (counting from upper left to lower right). +* +* D (input/output) REAL array, dimension (N) +* On entry, the eigenvalues of the rank-1-perturbed matrix. +* On exit, the eigenvalues of the repaired matrix. +* +* Q (input/output) REAL array, dimension (LDQ, N) +* On entry, the eigenvectors of the rank-1-perturbed matrix. +* On exit, the eigenvectors of the repaired tridiagonal matrix. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= max(1,N). +* +* INDXQ (output) INTEGER array, dimension (N) +* The permutation which will reintegrate the subproblem just +* solved back into sorted order, i.e., D( INDXQ( I = 1, N ) ) +* will be in ascending order. +* +* RHO (input) REAL +* The subdiagonal element used to create the rank-1 +* modification. +* +* CUTPNT (input) INTEGER +* Contains the location of the last eigenvalue in the leading +* sub-matrix. min(1,N) <= CUTPNT <= N. +* +* QSTORE (input/output) REAL array, dimension (N**2+1) +* Stores eigenvectors of submatrices encountered during +* divide and conquer, packed together. QPTR points to +* beginning of the submatrices. +* +* QPTR (input/output) INTEGER array, dimension (N+2) +* List of indices pointing to beginning of submatrices stored +* in QSTORE. The submatrices are numbered starting at the +* bottom left of the divide and conquer tree, from left to +* right and bottom to top. +* +* PRMPTR (input) INTEGER array, dimension (N lg N) +* Contains a list of pointers which indicate where in PERM a +* level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) +* indicates the size of the permutation and also the size of +* the full, non-deflated problem. +* +* PERM (input) INTEGER array, dimension (N lg N) +* Contains the permutations (from deflation and sorting) to be +* applied to each eigenblock. +* +* GIVPTR (input) INTEGER array, dimension (N lg N) +* Contains a list of pointers which indicate where in GIVCOL a +* level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) +* indicates the number of Givens rotations. +* +* GIVCOL (input) INTEGER array, dimension (2, N lg N) +* Each pair of numbers indicates a pair of columns to take place +* in a Givens rotation. +* +* GIVNUM (input) REAL array, dimension (2, N lg N) +* Each number indicates the S value to be used in the +* corresponding Givens rotation. +* +* WORK (workspace) REAL array, dimension (3*N+QSIZ*N) +* +* IWORK (workspace) INTEGER array, dimension (4*N) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = 1, an eigenvalue did not converge +* +* Further Details +* =============== +* +* Based on contributions by +* Jeff Rutter, Computer Science Division, University of California +* at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E0, ZERO = 0.0E0 ) +* .. +* .. Local Scalars .. + INTEGER COLTYP, CURR, I, IDLMDA, INDX, INDXC, INDXP, + $ IQ2, IS, IW, IZ, K, LDQ2, N1, N2, PTR +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, SLAED8, SLAED9, SLAEDA, SLAMRG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.1 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ICOMPQ.EQ.1 .AND. QSIZ.LT.N ) THEN + INFO = -4 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( MIN( 1, N ).GT.CUTPNT .OR. N.LT.CUTPNT ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLAED7', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* The following values are for bookkeeping purposes only. They are +* integer pointers which indicate the portion of the workspace +* used by a particular array in SLAED8 and SLAED9. +* + IF( ICOMPQ.EQ.1 ) THEN + LDQ2 = QSIZ + ELSE + LDQ2 = N + END IF +* + IZ = 1 + IDLMDA = IZ + N + IW = IDLMDA + N + IQ2 = IW + N + IS = IQ2 + N*LDQ2 +* + INDX = 1 + INDXC = INDX + N + COLTYP = INDXC + N + INDXP = COLTYP + N +* +* Form the z-vector which consists of the last row of Q_1 and the +* first row of Q_2. +* + PTR = 1 + 2**TLVLS + DO 10 I = 1, CURLVL - 1 + PTR = PTR + 2**( TLVLS-I ) + 10 CONTINUE + CURR = PTR + CURPBM + CALL SLAEDA( N, TLVLS, CURLVL, CURPBM, PRMPTR, PERM, GIVPTR, + $ GIVCOL, GIVNUM, QSTORE, QPTR, WORK( IZ ), + $ WORK( IZ+N ), INFO ) +* +* When solving the final problem, we no longer need the stored data, +* so we will overwrite the data from this level onto the previously +* used storage space. +* + IF( CURLVL.EQ.TLVLS ) THEN + QPTR( CURR ) = 1 + PRMPTR( CURR ) = 1 + GIVPTR( CURR ) = 1 + END IF +* +* Sort and Deflate eigenvalues. +* + CALL SLAED8( ICOMPQ, K, N, QSIZ, D, Q, LDQ, INDXQ, RHO, CUTPNT, + $ WORK( IZ ), WORK( IDLMDA ), WORK( IQ2 ), LDQ2, + $ WORK( IW ), PERM( PRMPTR( CURR ) ), GIVPTR( CURR+1 ), + $ GIVCOL( 1, GIVPTR( CURR ) ), + $ GIVNUM( 1, GIVPTR( CURR ) ), IWORK( INDXP ), + $ IWORK( INDX ), INFO ) + PRMPTR( CURR+1 ) = PRMPTR( CURR ) + N + GIVPTR( CURR+1 ) = GIVPTR( CURR+1 ) + GIVPTR( CURR ) +* +* Solve Secular Equation. +* + IF( K.NE.0 ) THEN + CALL SLAED9( K, 1, K, N, D, WORK( IS ), K, RHO, WORK( IDLMDA ), + $ WORK( IW ), QSTORE( QPTR( CURR ) ), K, INFO ) + IF( INFO.NE.0 ) + $ GO TO 30 + IF( ICOMPQ.EQ.1 ) THEN + CALL SGEMM( 'N', 'N', QSIZ, K, K, ONE, WORK( IQ2 ), LDQ2, + $ QSTORE( QPTR( CURR ) ), K, ZERO, Q, LDQ ) + END IF + QPTR( CURR+1 ) = QPTR( CURR ) + K**2 +* +* Prepare the INDXQ sorting permutation. +* + N1 = K + N2 = N - K + CALL SLAMRG( N1, N2, D, 1, -1, INDXQ ) + ELSE + QPTR( CURR+1 ) = QPTR( CURR ) + DO 20 I = 1, N + INDXQ( I ) = I + 20 CONTINUE + END IF +* + 30 CONTINUE + RETURN +* +* End of SLAED7 +* + END diff --git a/costa/native/external/lapack/slaed8.f b/costa/native/external/lapack/slaed8.f new file mode 100644 index 000000000..05571eb05 --- /dev/null +++ b/costa/native/external/lapack/slaed8.f @@ -0,0 +1,400 @@ + SUBROUTINE SLAED8( ICOMPQ, K, N, QSIZ, D, Q, LDQ, INDXQ, RHO, + $ CUTPNT, Z, DLAMDA, Q2, LDQ2, W, PERM, GIVPTR, + $ GIVCOL, GIVNUM, INDXP, INDX, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, +* Courant Institute, NAG Ltd., and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER CUTPNT, GIVPTR, ICOMPQ, INFO, K, LDQ, LDQ2, N, + $ QSIZ + REAL RHO +* .. +* .. Array Arguments .. + INTEGER GIVCOL( 2, * ), INDX( * ), INDXP( * ), + $ INDXQ( * ), PERM( * ) + REAL D( * ), DLAMDA( * ), GIVNUM( 2, * ), + $ Q( LDQ, * ), Q2( LDQ2, * ), W( * ), Z( * ) +* .. +* +* Purpose +* ======= +* +* SLAED8 merges the two sets of eigenvalues together into a single +* sorted set. Then it tries to deflate the size of the problem. +* There are two ways in which deflation can occur: when two or more +* eigenvalues are close together or if there is a tiny element in the +* Z vector. For each such occurrence the order of the related secular +* equation problem is reduced by one. +* +* Arguments +* ========= +* +* ICOMPQ (input) INTEGER +* = 0: Compute eigenvalues only. +* = 1: Compute eigenvectors of original dense symmetric matrix +* also. On entry, Q contains the orthogonal matrix used +* to reduce the original matrix to tridiagonal form. +* +* K (output) INTEGER +* The number of non-deflated eigenvalues, and the order of the +* related secular equation. +* +* N (input) INTEGER +* The dimension of the symmetric tridiagonal matrix. N >= 0. +* +* QSIZ (input) INTEGER +* The dimension of the orthogonal matrix used to reduce +* the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. +* +* D (input/output) REAL array, dimension (N) +* On entry, the eigenvalues of the two submatrices to be +* combined. On exit, the trailing (N-K) updated eigenvalues +* (those which were deflated) sorted into increasing order. +* +* Q (input/output) REAL array, dimension (LDQ,N) +* If ICOMPQ = 0, Q is not referenced. Otherwise, +* on entry, Q contains the eigenvectors of the partially solved +* system which has been previously updated in matrix +* multiplies with other partially solved eigensystems. +* On exit, Q contains the trailing (N-K) updated eigenvectors +* (those which were deflated) in its last N-K columns. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= max(1,N). +* +* INDXQ (input) INTEGER array, dimension (N) +* The permutation which separately sorts the two sub-problems +* in D into ascending order. Note that elements in the second +* half of this permutation must first have CUTPNT added to +* their values in order to be accurate. +* +* RHO (input/output) REAL +* On entry, the off-diagonal element associated with the rank-1 +* cut which originally split the two submatrices which are now +* being recombined. +* On exit, RHO has been modified to the value required by +* SLAED3. +* +* CUTPNT (input) INTEGER +* The location of the last eigenvalue in the leading +* sub-matrix. min(1,N) <= CUTPNT <= N. +* +* Z (input) REAL array, dimension (N) +* On entry, Z contains the updating vector (the last row of +* the first sub-eigenvector matrix and the first row of the +* second sub-eigenvector matrix). +* On exit, the contents of Z are destroyed by the updating +* process. +* +* DLAMDA (output) REAL array, dimension (N) +* A copy of the first K eigenvalues which will be used by +* SLAED3 to form the secular equation. +* +* Q2 (output) REAL array, dimension (LDQ2,N) +* If ICOMPQ = 0, Q2 is not referenced. Otherwise, +* a copy of the first K eigenvectors which will be used by +* SLAED7 in a matrix multiply (SGEMM) to update the new +* eigenvectors. +* +* LDQ2 (input) INTEGER +* The leading dimension of the array Q2. LDQ2 >= max(1,N). +* +* W (output) REAL array, dimension (N) +* The first k values of the final deflation-altered z-vector and +* will be passed to SLAED3. +* +* PERM (output) INTEGER array, dimension (N) +* The permutations (from deflation and sorting) to be applied +* to each eigenblock. +* +* GIVPTR (output) INTEGER +* The number of Givens rotations which took place in this +* subproblem. +* +* GIVCOL (output) INTEGER array, dimension (2, N) +* Each pair of numbers indicates a pair of columns to take place +* in a Givens rotation. +* +* GIVNUM (output) REAL array, dimension (2, N) +* Each number indicates the S value to be used in the +* corresponding Givens rotation. +* +* INDXP (workspace) INTEGER array, dimension (N) +* The permutation used to place deflated values of D at the end +* of the array. INDXP(1:K) points to the nondeflated D-values +* and INDXP(K+1:N) points to the deflated eigenvalues. +* +* INDX (workspace) INTEGER array, dimension (N) +* The permutation used to sort the contents of D into ascending +* order. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* Based on contributions by +* Jeff Rutter, Computer Science Division, University of California +* at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL MONE, ZERO, ONE, TWO, EIGHT + PARAMETER ( MONE = -1.0E0, ZERO = 0.0E0, ONE = 1.0E0, + $ TWO = 2.0E0, EIGHT = 8.0E0 ) +* .. +* .. Local Scalars .. +* + INTEGER I, IMAX, J, JLAM, JMAX, JP, K2, N1, N1P1, N2 + REAL C, EPS, S, T, TAU, TOL +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SLAMCH, SLAPY2 + EXTERNAL ISAMAX, SLAMCH, SLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLACPY, SLAMRG, SROT, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.1 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ICOMPQ.EQ.1 .AND. QSIZ.LT.N ) THEN + INFO = -4 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( CUTPNT.LT.MIN( 1, N ) .OR. CUTPNT.GT.N ) THEN + INFO = -10 + ELSE IF( LDQ2.LT.MAX( 1, N ) ) THEN + INFO = -14 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLAED8', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + N1 = CUTPNT + N2 = N - N1 + N1P1 = N1 + 1 +* + IF( RHO.LT.ZERO ) THEN + CALL SSCAL( N2, MONE, Z( N1P1 ), 1 ) + END IF +* +* Normalize z so that norm(z) = 1 +* + T = ONE / SQRT( TWO ) + DO 10 J = 1, N + INDX( J ) = J + 10 CONTINUE + CALL SSCAL( N, T, Z, 1 ) + RHO = ABS( TWO*RHO ) +* +* Sort the eigenvalues into increasing order +* + DO 20 I = CUTPNT + 1, N + INDXQ( I ) = INDXQ( I ) + CUTPNT + 20 CONTINUE + DO 30 I = 1, N + DLAMDA( I ) = D( INDXQ( I ) ) + W( I ) = Z( INDXQ( I ) ) + 30 CONTINUE + I = 1 + J = CUTPNT + 1 + CALL SLAMRG( N1, N2, DLAMDA, 1, 1, INDX ) + DO 40 I = 1, N + D( I ) = DLAMDA( INDX( I ) ) + Z( I ) = W( INDX( I ) ) + 40 CONTINUE +* +* Calculate the allowable deflation tolerence +* + IMAX = ISAMAX( N, Z, 1 ) + JMAX = ISAMAX( N, D, 1 ) + EPS = SLAMCH( 'Epsilon' ) + TOL = EIGHT*EPS*ABS( D( JMAX ) ) +* +* If the rank-1 modifier is small enough, no more needs to be done +* except to reorganize Q so that its columns correspond with the +* elements in D. +* + IF( RHO*ABS( Z( IMAX ) ).LE.TOL ) THEN + K = 0 + IF( ICOMPQ.EQ.0 ) THEN + DO 50 J = 1, N + PERM( J ) = INDXQ( INDX( J ) ) + 50 CONTINUE + ELSE + DO 60 J = 1, N + PERM( J ) = INDXQ( INDX( J ) ) + CALL SCOPY( QSIZ, Q( 1, PERM( J ) ), 1, Q2( 1, J ), 1 ) + 60 CONTINUE + CALL SLACPY( 'A', QSIZ, N, Q2( 1, 1 ), LDQ2, Q( 1, 1 ), + $ LDQ ) + END IF + RETURN + END IF +* +* If there are multiple eigenvalues then the problem deflates. Here +* the number of equal eigenvalues are found. As each equal +* eigenvalue is found, an elementary reflector is computed to rotate +* the corresponding eigensubspace so that the corresponding +* components of Z are zero in this new basis. +* + K = 0 + GIVPTR = 0 + K2 = N + 1 + DO 70 J = 1, N + IF( RHO*ABS( Z( J ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + INDXP( K2 ) = J + IF( J.EQ.N ) + $ GO TO 110 + ELSE + JLAM = J + GO TO 80 + END IF + 70 CONTINUE + 80 CONTINUE + J = J + 1 + IF( J.GT.N ) + $ GO TO 100 + IF( RHO*ABS( Z( J ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + INDXP( K2 ) = J + ELSE +* +* Check if eigenvalues are close enough to allow deflation. +* + S = Z( JLAM ) + C = Z( J ) +* +* Find sqrt(a**2+b**2) without overflow or +* destructive underflow. +* + TAU = SLAPY2( C, S ) + T = D( J ) - D( JLAM ) + C = C / TAU + S = -S / TAU + IF( ABS( T*C*S ).LE.TOL ) THEN +* +* Deflation is possible. +* + Z( J ) = TAU + Z( JLAM ) = ZERO +* +* Record the appropriate Givens rotation +* + GIVPTR = GIVPTR + 1 + GIVCOL( 1, GIVPTR ) = INDXQ( INDX( JLAM ) ) + GIVCOL( 2, GIVPTR ) = INDXQ( INDX( J ) ) + GIVNUM( 1, GIVPTR ) = C + GIVNUM( 2, GIVPTR ) = S + IF( ICOMPQ.EQ.1 ) THEN + CALL SROT( QSIZ, Q( 1, INDXQ( INDX( JLAM ) ) ), 1, + $ Q( 1, INDXQ( INDX( J ) ) ), 1, C, S ) + END IF + T = D( JLAM )*C*C + D( J )*S*S + D( J ) = D( JLAM )*S*S + D( J )*C*C + D( JLAM ) = T + K2 = K2 - 1 + I = 1 + 90 CONTINUE + IF( K2+I.LE.N ) THEN + IF( D( JLAM ).LT.D( INDXP( K2+I ) ) ) THEN + INDXP( K2+I-1 ) = INDXP( K2+I ) + INDXP( K2+I ) = JLAM + I = I + 1 + GO TO 90 + ELSE + INDXP( K2+I-1 ) = JLAM + END IF + ELSE + INDXP( K2+I-1 ) = JLAM + END IF + JLAM = J + ELSE + K = K + 1 + W( K ) = Z( JLAM ) + DLAMDA( K ) = D( JLAM ) + INDXP( K ) = JLAM + JLAM = J + END IF + END IF + GO TO 80 + 100 CONTINUE +* +* Record the last eigenvalue. +* + K = K + 1 + W( K ) = Z( JLAM ) + DLAMDA( K ) = D( JLAM ) + INDXP( K ) = JLAM +* + 110 CONTINUE +* +* Sort the eigenvalues and corresponding eigenvectors into DLAMDA +* and Q2 respectively. The eigenvalues/vectors which were not +* deflated go into the first K slots of DLAMDA and Q2 respectively, +* while those which were deflated go into the last N - K slots. +* + IF( ICOMPQ.EQ.0 ) THEN + DO 120 J = 1, N + JP = INDXP( J ) + DLAMDA( J ) = D( JP ) + PERM( J ) = INDXQ( INDX( JP ) ) + 120 CONTINUE + ELSE + DO 130 J = 1, N + JP = INDXP( J ) + DLAMDA( J ) = D( JP ) + PERM( J ) = INDXQ( INDX( JP ) ) + CALL SCOPY( QSIZ, Q( 1, PERM( J ) ), 1, Q2( 1, J ), 1 ) + 130 CONTINUE + END IF +* +* The deflated eigenvalues and their corresponding vectors go back +* into the last N - K slots of D and Q respectively. +* + IF( K.LT.N ) THEN + IF( ICOMPQ.EQ.0 ) THEN + CALL SCOPY( N-K, DLAMDA( K+1 ), 1, D( K+1 ), 1 ) + ELSE + CALL SCOPY( N-K, DLAMDA( K+1 ), 1, D( K+1 ), 1 ) + CALL SLACPY( 'A', QSIZ, N-K, Q2( 1, K+1 ), LDQ2, + $ Q( 1, K+1 ), LDQ ) + END IF + END IF +* + RETURN +* +* End of SLAED8 +* + END diff --git a/costa/native/external/lapack/slaed9.f b/costa/native/external/lapack/slaed9.f new file mode 100644 index 000000000..cdac9dc00 --- /dev/null +++ b/costa/native/external/lapack/slaed9.f @@ -0,0 +1,206 @@ + SUBROUTINE SLAED9( K, KSTART, KSTOP, N, D, Q, LDQ, RHO, DLAMDA, W, + $ S, LDS, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, +* Courant Institute, NAG Ltd., and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, K, KSTART, KSTOP, LDQ, LDS, N + REAL RHO +* .. +* .. Array Arguments .. + REAL D( * ), DLAMDA( * ), Q( LDQ, * ), S( LDS, * ), + $ W( * ) +* .. +* +* Purpose +* ======= +* +* SLAED9 finds the roots of the secular equation, as defined by the +* values in D, Z, and RHO, between KSTART and KSTOP. It makes the +* appropriate calls to SLAED4 and then stores the new matrix of +* eigenvectors for use in calculating the next level of Z vectors. +* +* Arguments +* ========= +* +* K (input) INTEGER +* The number of terms in the rational function to be solved by +* SLAED4. K >= 0. +* +* KSTART (input) INTEGER +* KSTOP (input) INTEGER +* The updated eigenvalues Lambda(I), KSTART <= I <= KSTOP +* are to be computed. 1 <= KSTART <= KSTOP <= K. +* +* N (input) INTEGER +* The number of rows and columns in the Q matrix. +* N >= K (delation may result in N > K). +* +* D (output) REAL array, dimension (N) +* D(I) contains the updated eigenvalues +* for KSTART <= I <= KSTOP. +* +* Q (workspace) REAL array, dimension (LDQ,N) +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= max( 1, N ). +* +* RHO (input) REAL +* The value of the parameter in the rank one update equation. +* RHO >= 0 required. +* +* DLAMDA (input) REAL array, dimension (K) +* The first K elements of this array contain the old roots +* of the deflated updating problem. These are the poles +* of the secular equation. +* +* W (input) REAL array, dimension (K) +* The first K elements of this array contain the components +* of the deflation-adjusted updating vector. +* +* S (output) REAL array, dimension (LDS, K) +* Will contain the eigenvectors of the repaired matrix which +* will be stored for subsequent Z vector calculation and +* multiplied by the previously accumulated eigenvectors +* to update the system. +* +* LDS (input) INTEGER +* The leading dimension of S. LDS >= max( 1, K ). +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = 1, an eigenvalue did not converge +* +* Further Details +* =============== +* +* Based on contributions by +* Jeff Rutter, Computer Science Division, University of California +* at Berkeley, USA +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J + REAL TEMP +* .. +* .. External Functions .. + REAL SLAMC3, SNRM2 + EXTERNAL SLAMC3, SNRM2 +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLAED4, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( K.LT.0 ) THEN + INFO = -1 + ELSE IF( KSTART.LT.1 .OR. KSTART.GT.MAX( 1, K ) ) THEN + INFO = -2 + ELSE IF( MAX( 1, KSTOP ).LT.KSTART .OR. KSTOP.GT.MAX( 1, K ) ) + $ THEN + INFO = -3 + ELSE IF( N.LT.K ) THEN + INFO = -4 + ELSE IF( LDQ.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDS.LT.MAX( 1, K ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLAED9', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( K.EQ.0 ) + $ RETURN +* +* Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can +* be computed with high relative accuracy (barring over/underflow). +* This is a problem on machines without a guard digit in +* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). +* The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), +* which on any of these machines zeros out the bottommost +* bit of DLAMDA(I) if it is 1; this makes the subsequent +* subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation +* occurs. On binary machines with a guard digit (almost all +* machines) it does not change DLAMDA(I) at all. On hexadecimal +* and decimal machines with a guard digit, it slightly +* changes the bottommost bits of DLAMDA(I). It does not account +* for hexadecimal or decimal machines without guard digits +* (we know of none). We use a subroutine call to compute +* 2*DLAMBDA(I) to prevent optimizing compilers from eliminating +* this code. +* + DO 10 I = 1, N + DLAMDA( I ) = SLAMC3( DLAMDA( I ), DLAMDA( I ) ) - DLAMDA( I ) + 10 CONTINUE +* + DO 20 J = KSTART, KSTOP + CALL SLAED4( K, J, DLAMDA, W, Q( 1, J ), RHO, D( J ), INFO ) +* +* If the zero finder fails, the computation is terminated. +* + IF( INFO.NE.0 ) + $ GO TO 120 + 20 CONTINUE +* + IF( K.EQ.1 .OR. K.EQ.2 ) THEN + DO 40 I = 1, K + DO 30 J = 1, K + S( J, I ) = Q( J, I ) + 30 CONTINUE + 40 CONTINUE + GO TO 120 + END IF +* +* Compute updated W. +* + CALL SCOPY( K, W, 1, S, 1 ) +* +* Initialize W(I) = Q(I,I) +* + CALL SCOPY( K, Q, LDQ+1, W, 1 ) + DO 70 J = 1, K + DO 50 I = 1, J - 1 + W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) ) + 50 CONTINUE + DO 60 I = J + 1, K + W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) ) + 60 CONTINUE + 70 CONTINUE + DO 80 I = 1, K + W( I ) = SIGN( SQRT( -W( I ) ), S( I, 1 ) ) + 80 CONTINUE +* +* Compute eigenvectors of the modified rank-1 modification. +* + DO 110 J = 1, K + DO 90 I = 1, K + Q( I, J ) = W( I ) / Q( I, J ) + 90 CONTINUE + TEMP = SNRM2( K, Q( 1, J ), 1 ) + DO 100 I = 1, K + S( I, J ) = Q( I, J ) / TEMP + 100 CONTINUE + 110 CONTINUE +* + 120 CONTINUE + RETURN +* +* End of SLAED9 +* + END diff --git a/costa/native/external/lapack/slaeda.f b/costa/native/external/lapack/slaeda.f new file mode 100644 index 000000000..33933ff82 --- /dev/null +++ b/costa/native/external/lapack/slaeda.f @@ -0,0 +1,218 @@ + SUBROUTINE SLAEDA( N, TLVLS, CURLVL, CURPBM, PRMPTR, PERM, GIVPTR, + $ GIVCOL, GIVNUM, Q, QPTR, Z, ZTEMP, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER CURLVL, CURPBM, INFO, N, TLVLS +* .. +* .. Array Arguments .. + INTEGER GIVCOL( 2, * ), GIVPTR( * ), PERM( * ), + $ PRMPTR( * ), QPTR( * ) + REAL GIVNUM( 2, * ), Q( * ), Z( * ), ZTEMP( * ) +* .. +* +* Purpose +* ======= +* +* SLAEDA computes the Z vector corresponding to the merge step in the +* CURLVLth step of the merge process with TLVLS steps for the CURPBMth +* problem. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The dimension of the symmetric tridiagonal matrix. N >= 0. +* +* TLVLS (input) INTEGER +* The total number of merging levels in the overall divide and +* conquer tree. +* +* CURLVL (input) INTEGER +* The current level in the overall merge routine, +* 0 <= curlvl <= tlvls. +* +* CURPBM (input) INTEGER +* The current problem in the current level in the overall +* merge routine (counting from upper left to lower right). +* +* PRMPTR (input) INTEGER array, dimension (N lg N) +* Contains a list of pointers which indicate where in PERM a +* level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) +* indicates the size of the permutation and incidentally the +* size of the full, non-deflated problem. +* +* PERM (input) INTEGER array, dimension (N lg N) +* Contains the permutations (from deflation and sorting) to be +* applied to each eigenblock. +* +* GIVPTR (input) INTEGER array, dimension (N lg N) +* Contains a list of pointers which indicate where in GIVCOL a +* level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) +* indicates the number of Givens rotations. +* +* GIVCOL (input) INTEGER array, dimension (2, N lg N) +* Each pair of numbers indicates a pair of columns to take place +* in a Givens rotation. +* +* GIVNUM (input) REAL array, dimension (2, N lg N) +* Each number indicates the S value to be used in the +* corresponding Givens rotation. +* +* Q (input) REAL array, dimension (N**2) +* Contains the square eigenblocks from previous levels, the +* starting positions for blocks are given by QPTR. +* +* QPTR (input) INTEGER array, dimension (N+2) +* Contains a list of pointers which indicate where in Q an +* eigenblock is stored. SQRT( QPTR(i+1) - QPTR(i) ) indicates +* the size of the block. +* +* Z (output) REAL array, dimension (N) +* On output this vector contains the updating vector (the last +* row of the first sub-eigenvector matrix and the first row of +* the second sub-eigenvector matrix). +* +* ZTEMP (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* Based on contributions by +* Jeff Rutter, Computer Science Division, University of California +* at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0E0, HALF = 0.5E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + INTEGER BSIZ1, BSIZ2, CURR, I, K, MID, PSIZ1, PSIZ2, + $ PTR, ZPTR1 +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMV, SROT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( N.LT.0 ) THEN + INFO = -1 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLAEDA', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine location of first number in second half. +* + MID = N / 2 + 1 +* +* Gather last/first rows of appropriate eigenblocks into center of Z +* + PTR = 1 +* +* Determine location of lowest level subproblem in the full storage +* scheme +* + CURR = PTR + CURPBM*2**CURLVL + 2**( CURLVL-1 ) - 1 +* +* Determine size of these matrices. We add HALF to the value of +* the SQRT in case the machine underestimates one of these square +* roots. +* + BSIZ1 = INT( HALF+SQRT( REAL( QPTR( CURR+1 )-QPTR( CURR ) ) ) ) + BSIZ2 = INT( HALF+SQRT( REAL( QPTR( CURR+2 )-QPTR( CURR+1 ) ) ) ) + DO 10 K = 1, MID - BSIZ1 - 1 + Z( K ) = ZERO + 10 CONTINUE + CALL SCOPY( BSIZ1, Q( QPTR( CURR )+BSIZ1-1 ), BSIZ1, + $ Z( MID-BSIZ1 ), 1 ) + CALL SCOPY( BSIZ2, Q( QPTR( CURR+1 ) ), BSIZ2, Z( MID ), 1 ) + DO 20 K = MID + BSIZ2, N + Z( K ) = ZERO + 20 CONTINUE +* +* Loop thru remaining levels 1 -> CURLVL applying the Givens +* rotations and permutation and then multiplying the center matrices +* against the current Z. +* + PTR = 2**TLVLS + 1 + DO 70 K = 1, CURLVL - 1 + CURR = PTR + CURPBM*2**( CURLVL-K ) + 2**( CURLVL-K-1 ) - 1 + PSIZ1 = PRMPTR( CURR+1 ) - PRMPTR( CURR ) + PSIZ2 = PRMPTR( CURR+2 ) - PRMPTR( CURR+1 ) + ZPTR1 = MID - PSIZ1 +* +* Apply Givens at CURR and CURR+1 +* + DO 30 I = GIVPTR( CURR ), GIVPTR( CURR+1 ) - 1 + CALL SROT( 1, Z( ZPTR1+GIVCOL( 1, I )-1 ), 1, + $ Z( ZPTR1+GIVCOL( 2, I )-1 ), 1, GIVNUM( 1, I ), + $ GIVNUM( 2, I ) ) + 30 CONTINUE + DO 40 I = GIVPTR( CURR+1 ), GIVPTR( CURR+2 ) - 1 + CALL SROT( 1, Z( MID-1+GIVCOL( 1, I ) ), 1, + $ Z( MID-1+GIVCOL( 2, I ) ), 1, GIVNUM( 1, I ), + $ GIVNUM( 2, I ) ) + 40 CONTINUE + PSIZ1 = PRMPTR( CURR+1 ) - PRMPTR( CURR ) + PSIZ2 = PRMPTR( CURR+2 ) - PRMPTR( CURR+1 ) + DO 50 I = 0, PSIZ1 - 1 + ZTEMP( I+1 ) = Z( ZPTR1+PERM( PRMPTR( CURR )+I )-1 ) + 50 CONTINUE + DO 60 I = 0, PSIZ2 - 1 + ZTEMP( PSIZ1+I+1 ) = Z( MID+PERM( PRMPTR( CURR+1 )+I )-1 ) + 60 CONTINUE +* +* Multiply Blocks at CURR and CURR+1 +* +* Determine size of these matrices. We add HALF to the value of +* the SQRT in case the machine underestimates one of these +* square roots. +* + BSIZ1 = INT( HALF+SQRT( REAL( QPTR( CURR+1 )-QPTR( CURR ) ) ) ) + BSIZ2 = INT( HALF+SQRT( REAL( QPTR( CURR+2 )-QPTR( CURR+ + $ 1 ) ) ) ) + IF( BSIZ1.GT.0 ) THEN + CALL SGEMV( 'T', BSIZ1, BSIZ1, ONE, Q( QPTR( CURR ) ), + $ BSIZ1, ZTEMP( 1 ), 1, ZERO, Z( ZPTR1 ), 1 ) + END IF + CALL SCOPY( PSIZ1-BSIZ1, ZTEMP( BSIZ1+1 ), 1, Z( ZPTR1+BSIZ1 ), + $ 1 ) + IF( BSIZ2.GT.0 ) THEN + CALL SGEMV( 'T', BSIZ2, BSIZ2, ONE, Q( QPTR( CURR+1 ) ), + $ BSIZ2, ZTEMP( PSIZ1+1 ), 1, ZERO, Z( MID ), 1 ) + END IF + CALL SCOPY( PSIZ2-BSIZ2, ZTEMP( PSIZ1+BSIZ2+1 ), 1, + $ Z( MID+BSIZ2 ), 1 ) +* + PTR = PTR + 2**( TLVLS-K ) + 70 CONTINUE +* + RETURN +* +* End of SLAEDA +* + END diff --git a/costa/native/external/lapack/slaein.f b/costa/native/external/lapack/slaein.f new file mode 100644 index 000000000..455e65c2a --- /dev/null +++ b/costa/native/external/lapack/slaein.f @@ -0,0 +1,532 @@ + SUBROUTINE SLAEIN( RIGHTV, NOINIT, N, H, LDH, WR, WI, VR, VI, B, + $ LDB, WORK, EPS3, SMLNUM, BIGNUM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + LOGICAL NOINIT, RIGHTV + INTEGER INFO, LDB, LDH, N + REAL BIGNUM, EPS3, SMLNUM, WI, WR +* .. +* .. Array Arguments .. + REAL B( LDB, * ), H( LDH, * ), VI( * ), VR( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* SLAEIN uses inverse iteration to find a right or left eigenvector +* corresponding to the eigenvalue (WR,WI) of a real upper Hessenberg +* matrix H. +* +* Arguments +* ========= +* +* RIGHTV (input) LOGICAL +* = .TRUE. : compute right eigenvector; +* = .FALSE.: compute left eigenvector. +* +* NOINIT (input) LOGICAL +* = .TRUE. : no initial vector supplied in (VR,VI). +* = .FALSE.: initial vector supplied in (VR,VI). +* +* N (input) INTEGER +* The order of the matrix H. N >= 0. +* +* H (input) REAL array, dimension (LDH,N) +* The upper Hessenberg matrix H. +* +* LDH (input) INTEGER +* The leading dimension of the array H. LDH >= max(1,N). +* +* WR (input) REAL +* WI (input) REAL +* The real and imaginary parts of the eigenvalue of H whose +* corresponding right or left eigenvector is to be computed. +* +* VR (input/output) REAL array, dimension (N) +* VI (input/output) REAL array, dimension (N) +* On entry, if NOINIT = .FALSE. and WI = 0.0, VR must contain +* a real starting vector for inverse iteration using the real +* eigenvalue WR; if NOINIT = .FALSE. and WI.ne.0.0, VR and VI +* must contain the real and imaginary parts of a complex +* starting vector for inverse iteration using the complex +* eigenvalue (WR,WI); otherwise VR and VI need not be set. +* On exit, if WI = 0.0 (real eigenvalue), VR contains the +* computed real eigenvector; if WI.ne.0.0 (complex eigenvalue), +* VR and VI contain the real and imaginary parts of the +* computed complex eigenvector. The eigenvector is normalized +* so that the component of largest magnitude has magnitude 1; +* here the magnitude of a complex number (x,y) is taken to be +* |x| + |y|. +* VI is not referenced if WI = 0.0. +* +* B (workspace) REAL array, dimension (LDB,N) +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= N+1. +* +* WORK (workspace) REAL array, dimension (N) +* +* EPS3 (input) REAL +* A small machine-dependent value which is used to perturb +* close eigenvalues, and to replace zero pivots. +* +* SMLNUM (input) REAL +* A machine-dependent value close to the underflow threshold. +* +* BIGNUM (input) REAL +* A machine-dependent value close to the overflow threshold. +* +* INFO (output) INTEGER +* = 0: successful exit +* = 1: inverse iteration did not converge; VR is set to the +* last iterate, and so is VI if WI.ne.0.0. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TENTH + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TENTH = 1.0E-1 ) +* .. +* .. Local Scalars .. + CHARACTER NORMIN, TRANS + INTEGER I, I1, I2, I3, IERR, ITS, J + REAL ABSBII, ABSBJJ, EI, EJ, GROWTO, NORM, NRMSML, + $ REC, ROOTN, SCALE, TEMP, VCRIT, VMAX, VNORM, W, + $ W1, X, XI, XR, Y +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SASUM, SLAPY2, SNRM2 + EXTERNAL ISAMAX, SASUM, SLAPY2, SNRM2 +* .. +* .. External Subroutines .. + EXTERNAL SLADIV, SLATRS, SSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, REAL, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* GROWTO is the threshold used in the acceptance test for an +* eigenvector. +* + ROOTN = SQRT( REAL( N ) ) + GROWTO = TENTH / ROOTN + NRMSML = MAX( ONE, EPS3*ROOTN )*SMLNUM +* +* Form B = H - (WR,WI)*I (except that the subdiagonal elements and +* the imaginary parts of the diagonal elements are not stored). +* + DO 20 J = 1, N + DO 10 I = 1, J - 1 + B( I, J ) = H( I, J ) + 10 CONTINUE + B( J, J ) = H( J, J ) - WR + 20 CONTINUE +* + IF( WI.EQ.ZERO ) THEN +* +* Real eigenvalue. +* + IF( NOINIT ) THEN +* +* Set initial vector. +* + DO 30 I = 1, N + VR( I ) = EPS3 + 30 CONTINUE + ELSE +* +* Scale supplied initial vector. +* + VNORM = SNRM2( N, VR, 1 ) + CALL SSCAL( N, ( EPS3*ROOTN ) / MAX( VNORM, NRMSML ), VR, + $ 1 ) + END IF +* + IF( RIGHTV ) THEN +* +* LU decomposition with partial pivoting of B, replacing zero +* pivots by EPS3. +* + DO 60 I = 1, N - 1 + EI = H( I+1, I ) + IF( ABS( B( I, I ) ).LT.ABS( EI ) ) THEN +* +* Interchange rows and eliminate. +* + X = B( I, I ) / EI + B( I, I ) = EI + DO 40 J = I + 1, N + TEMP = B( I+1, J ) + B( I+1, J ) = B( I, J ) - X*TEMP + B( I, J ) = TEMP + 40 CONTINUE + ELSE +* +* Eliminate without interchange. +* + IF( B( I, I ).EQ.ZERO ) + $ B( I, I ) = EPS3 + X = EI / B( I, I ) + IF( X.NE.ZERO ) THEN + DO 50 J = I + 1, N + B( I+1, J ) = B( I+1, J ) - X*B( I, J ) + 50 CONTINUE + END IF + END IF + 60 CONTINUE + IF( B( N, N ).EQ.ZERO ) + $ B( N, N ) = EPS3 +* + TRANS = 'N' +* + ELSE +* +* UL decomposition with partial pivoting of B, replacing zero +* pivots by EPS3. +* + DO 90 J = N, 2, -1 + EJ = H( J, J-1 ) + IF( ABS( B( J, J ) ).LT.ABS( EJ ) ) THEN +* +* Interchange columns and eliminate. +* + X = B( J, J ) / EJ + B( J, J ) = EJ + DO 70 I = 1, J - 1 + TEMP = B( I, J-1 ) + B( I, J-1 ) = B( I, J ) - X*TEMP + B( I, J ) = TEMP + 70 CONTINUE + ELSE +* +* Eliminate without interchange. +* + IF( B( J, J ).EQ.ZERO ) + $ B( J, J ) = EPS3 + X = EJ / B( J, J ) + IF( X.NE.ZERO ) THEN + DO 80 I = 1, J - 1 + B( I, J-1 ) = B( I, J-1 ) - X*B( I, J ) + 80 CONTINUE + END IF + END IF + 90 CONTINUE + IF( B( 1, 1 ).EQ.ZERO ) + $ B( 1, 1 ) = EPS3 +* + TRANS = 'T' +* + END IF +* + NORMIN = 'N' + DO 110 ITS = 1, N +* +* Solve U*x = scale*v for a right eigenvector +* or U'*x = scale*v for a left eigenvector, +* overwriting x on v. +* + CALL SLATRS( 'Upper', TRANS, 'Nonunit', NORMIN, N, B, LDB, + $ VR, SCALE, WORK, IERR ) + NORMIN = 'Y' +* +* Test for sufficient growth in the norm of v. +* + VNORM = SASUM( N, VR, 1 ) + IF( VNORM.GE.GROWTO*SCALE ) + $ GO TO 120 +* +* Choose new orthogonal starting vector and try again. +* + TEMP = EPS3 / ( ROOTN+ONE ) + VR( 1 ) = EPS3 + DO 100 I = 2, N + VR( I ) = TEMP + 100 CONTINUE + VR( N-ITS+1 ) = VR( N-ITS+1 ) - EPS3*ROOTN + 110 CONTINUE +* +* Failure to find eigenvector in N iterations. +* + INFO = 1 +* + 120 CONTINUE +* +* Normalize eigenvector. +* + I = ISAMAX( N, VR, 1 ) + CALL SSCAL( N, ONE / ABS( VR( I ) ), VR, 1 ) + ELSE +* +* Complex eigenvalue. +* + IF( NOINIT ) THEN +* +* Set initial vector. +* + DO 130 I = 1, N + VR( I ) = EPS3 + VI( I ) = ZERO + 130 CONTINUE + ELSE +* +* Scale supplied initial vector. +* + NORM = SLAPY2( SNRM2( N, VR, 1 ), SNRM2( N, VI, 1 ) ) + REC = ( EPS3*ROOTN ) / MAX( NORM, NRMSML ) + CALL SSCAL( N, REC, VR, 1 ) + CALL SSCAL( N, REC, VI, 1 ) + END IF +* + IF( RIGHTV ) THEN +* +* LU decomposition with partial pivoting of B, replacing zero +* pivots by EPS3. +* +* The imaginary part of the (i,j)-th element of U is stored in +* B(j+1,i). +* + B( 2, 1 ) = -WI + DO 140 I = 2, N + B( I+1, 1 ) = ZERO + 140 CONTINUE +* + DO 170 I = 1, N - 1 + ABSBII = SLAPY2( B( I, I ), B( I+1, I ) ) + EI = H( I+1, I ) + IF( ABSBII.LT.ABS( EI ) ) THEN +* +* Interchange rows and eliminate. +* + XR = B( I, I ) / EI + XI = B( I+1, I ) / EI + B( I, I ) = EI + B( I+1, I ) = ZERO + DO 150 J = I + 1, N + TEMP = B( I+1, J ) + B( I+1, J ) = B( I, J ) - XR*TEMP + B( J+1, I+1 ) = B( J+1, I ) - XI*TEMP + B( I, J ) = TEMP + B( J+1, I ) = ZERO + 150 CONTINUE + B( I+2, I ) = -WI + B( I+1, I+1 ) = B( I+1, I+1 ) - XI*WI + B( I+2, I+1 ) = B( I+2, I+1 ) + XR*WI + ELSE +* +* Eliminate without interchanging rows. +* + IF( ABSBII.EQ.ZERO ) THEN + B( I, I ) = EPS3 + B( I+1, I ) = ZERO + ABSBII = EPS3 + END IF + EI = ( EI / ABSBII ) / ABSBII + XR = B( I, I )*EI + XI = -B( I+1, I )*EI + DO 160 J = I + 1, N + B( I+1, J ) = B( I+1, J ) - XR*B( I, J ) + + $ XI*B( J+1, I ) + B( J+1, I+1 ) = -XR*B( J+1, I ) - XI*B( I, J ) + 160 CONTINUE + B( I+2, I+1 ) = B( I+2, I+1 ) - WI + END IF +* +* Compute 1-norm of offdiagonal elements of i-th row. +* + WORK( I ) = SASUM( N-I, B( I, I+1 ), LDB ) + + $ SASUM( N-I, B( I+2, I ), 1 ) + 170 CONTINUE + IF( B( N, N ).EQ.ZERO .AND. B( N+1, N ).EQ.ZERO ) + $ B( N, N ) = EPS3 + WORK( N ) = ZERO +* + I1 = N + I2 = 1 + I3 = -1 + ELSE +* +* UL decomposition with partial pivoting of conjg(B), +* replacing zero pivots by EPS3. +* +* The imaginary part of the (i,j)-th element of U is stored in +* B(j+1,i). +* + B( N+1, N ) = WI + DO 180 J = 1, N - 1 + B( N+1, J ) = ZERO + 180 CONTINUE +* + DO 210 J = N, 2, -1 + EJ = H( J, J-1 ) + ABSBJJ = SLAPY2( B( J, J ), B( J+1, J ) ) + IF( ABSBJJ.LT.ABS( EJ ) ) THEN +* +* Interchange columns and eliminate +* + XR = B( J, J ) / EJ + XI = B( J+1, J ) / EJ + B( J, J ) = EJ + B( J+1, J ) = ZERO + DO 190 I = 1, J - 1 + TEMP = B( I, J-1 ) + B( I, J-1 ) = B( I, J ) - XR*TEMP + B( J, I ) = B( J+1, I ) - XI*TEMP + B( I, J ) = TEMP + B( J+1, I ) = ZERO + 190 CONTINUE + B( J+1, J-1 ) = WI + B( J-1, J-1 ) = B( J-1, J-1 ) + XI*WI + B( J, J-1 ) = B( J, J-1 ) - XR*WI + ELSE +* +* Eliminate without interchange. +* + IF( ABSBJJ.EQ.ZERO ) THEN + B( J, J ) = EPS3 + B( J+1, J ) = ZERO + ABSBJJ = EPS3 + END IF + EJ = ( EJ / ABSBJJ ) / ABSBJJ + XR = B( J, J )*EJ + XI = -B( J+1, J )*EJ + DO 200 I = 1, J - 1 + B( I, J-1 ) = B( I, J-1 ) - XR*B( I, J ) + + $ XI*B( J+1, I ) + B( J, I ) = -XR*B( J+1, I ) - XI*B( I, J ) + 200 CONTINUE + B( J, J-1 ) = B( J, J-1 ) + WI + END IF +* +* Compute 1-norm of offdiagonal elements of j-th column. +* + WORK( J ) = SASUM( J-1, B( 1, J ), 1 ) + + $ SASUM( J-1, B( J+1, 1 ), LDB ) + 210 CONTINUE + IF( B( 1, 1 ).EQ.ZERO .AND. B( 2, 1 ).EQ.ZERO ) + $ B( 1, 1 ) = EPS3 + WORK( 1 ) = ZERO +* + I1 = 1 + I2 = N + I3 = 1 + END IF +* + DO 270 ITS = 1, N + SCALE = ONE + VMAX = ONE + VCRIT = BIGNUM +* +* Solve U*(xr,xi) = scale*(vr,vi) for a right eigenvector, +* or U'*(xr,xi) = scale*(vr,vi) for a left eigenvector, +* overwriting (xr,xi) on (vr,vi). +* + DO 250 I = I1, I2, I3 +* + IF( WORK( I ).GT.VCRIT ) THEN + REC = ONE / VMAX + CALL SSCAL( N, REC, VR, 1 ) + CALL SSCAL( N, REC, VI, 1 ) + SCALE = SCALE*REC + VMAX = ONE + VCRIT = BIGNUM + END IF +* + XR = VR( I ) + XI = VI( I ) + IF( RIGHTV ) THEN + DO 220 J = I + 1, N + XR = XR - B( I, J )*VR( J ) + B( J+1, I )*VI( J ) + XI = XI - B( I, J )*VI( J ) - B( J+1, I )*VR( J ) + 220 CONTINUE + ELSE + DO 230 J = 1, I - 1 + XR = XR - B( J, I )*VR( J ) + B( I+1, J )*VI( J ) + XI = XI - B( J, I )*VI( J ) - B( I+1, J )*VR( J ) + 230 CONTINUE + END IF +* + W = ABS( B( I, I ) ) + ABS( B( I+1, I ) ) + IF( W.GT.SMLNUM ) THEN + IF( W.LT.ONE ) THEN + W1 = ABS( XR ) + ABS( XI ) + IF( W1.GT.W*BIGNUM ) THEN + REC = ONE / W1 + CALL SSCAL( N, REC, VR, 1 ) + CALL SSCAL( N, REC, VI, 1 ) + XR = VR( I ) + XI = VI( I ) + SCALE = SCALE*REC + VMAX = VMAX*REC + END IF + END IF +* +* Divide by diagonal element of B. +* + CALL SLADIV( XR, XI, B( I, I ), B( I+1, I ), VR( I ), + $ VI( I ) ) + VMAX = MAX( ABS( VR( I ) )+ABS( VI( I ) ), VMAX ) + VCRIT = BIGNUM / VMAX + ELSE + DO 240 J = 1, N + VR( J ) = ZERO + VI( J ) = ZERO + 240 CONTINUE + VR( I ) = ONE + VI( I ) = ONE + SCALE = ZERO + VMAX = ONE + VCRIT = BIGNUM + END IF + 250 CONTINUE +* +* Test for sufficient growth in the norm of (VR,VI). +* + VNORM = SASUM( N, VR, 1 ) + SASUM( N, VI, 1 ) + IF( VNORM.GE.GROWTO*SCALE ) + $ GO TO 280 +* +* Choose a new orthogonal starting vector and try again. +* + Y = EPS3 / ( ROOTN+ONE ) + VR( 1 ) = EPS3 + VI( 1 ) = ZERO +* + DO 260 I = 2, N + VR( I ) = Y + VI( I ) = ZERO + 260 CONTINUE + VR( N-ITS+1 ) = VR( N-ITS+1 ) - EPS3*ROOTN + 270 CONTINUE +* +* Failure to find eigenvector in N iterations +* + INFO = 1 +* + 280 CONTINUE +* +* Normalize eigenvector. +* + VNORM = ZERO + DO 290 I = 1, N + VNORM = MAX( VNORM, ABS( VR( I ) )+ABS( VI( I ) ) ) + 290 CONTINUE + CALL SSCAL( N, ONE / VNORM, VR, 1 ) + CALL SSCAL( N, ONE / VNORM, VI, 1 ) +* + END IF +* + RETURN +* +* End of SLAEIN +* + END diff --git a/costa/native/external/lapack/slaev2.f b/costa/native/external/lapack/slaev2.f new file mode 100644 index 000000000..b90eb51ac --- /dev/null +++ b/costa/native/external/lapack/slaev2.f @@ -0,0 +1,170 @@ + SUBROUTINE SLAEV2( A, B, C, RT1, RT2, CS1, SN1 ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + REAL A, B, C, CS1, RT1, RT2, SN1 +* .. +* +* Purpose +* ======= +* +* SLAEV2 computes the eigendecomposition of a 2-by-2 symmetric matrix +* [ A B ] +* [ B C ]. +* On return, RT1 is the eigenvalue of larger absolute value, RT2 is the +* eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right +* eigenvector for RT1, giving the decomposition +* +* [ CS1 SN1 ] [ A B ] [ CS1 -SN1 ] = [ RT1 0 ] +* [-SN1 CS1 ] [ B C ] [ SN1 CS1 ] [ 0 RT2 ]. +* +* Arguments +* ========= +* +* A (input) REAL +* The (1,1) element of the 2-by-2 matrix. +* +* B (input) REAL +* The (1,2) element and the conjugate of the (2,1) element of +* the 2-by-2 matrix. +* +* C (input) REAL +* The (2,2) element of the 2-by-2 matrix. +* +* RT1 (output) REAL +* The eigenvalue of larger absolute value. +* +* RT2 (output) REAL +* The eigenvalue of smaller absolute value. +* +* CS1 (output) REAL +* SN1 (output) REAL +* The vector (CS1, SN1) is a unit right eigenvector for RT1. +* +* Further Details +* =============== +* +* RT1 is accurate to a few ulps barring over/underflow. +* +* RT2 may be inaccurate if there is massive cancellation in the +* determinant A*C-B*B; higher precision or correctly rounded or +* correctly truncated arithmetic would be needed to compute RT2 +* accurately in all cases. +* +* CS1 and SN1 are accurate to a few ulps barring over/underflow. +* +* Overflow is possible only if RT1 is within a factor of 5 of overflow. +* Underflow is harmless if the input data is 0 or exceeds +* underflow_threshold / macheps. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E0 ) + REAL TWO + PARAMETER ( TWO = 2.0E0 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) + REAL HALF + PARAMETER ( HALF = 0.5E0 ) +* .. +* .. Local Scalars .. + INTEGER SGN1, SGN2 + REAL AB, ACMN, ACMX, ACS, ADF, CS, CT, DF, RT, SM, + $ TB, TN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* +* Compute the eigenvalues +* + SM = A + C + DF = A - C + ADF = ABS( DF ) + TB = B + B + AB = ABS( TB ) + IF( ABS( A ).GT.ABS( C ) ) THEN + ACMX = A + ACMN = C + ELSE + ACMX = C + ACMN = A + END IF + IF( ADF.GT.AB ) THEN + RT = ADF*SQRT( ONE+( AB / ADF )**2 ) + ELSE IF( ADF.LT.AB ) THEN + RT = AB*SQRT( ONE+( ADF / AB )**2 ) + ELSE +* +* Includes case AB=ADF=0 +* + RT = AB*SQRT( TWO ) + END IF + IF( SM.LT.ZERO ) THEN + RT1 = HALF*( SM-RT ) + SGN1 = -1 +* +* Order of execution important. +* To get fully accurate smaller eigenvalue, +* next line needs to be executed in higher precision. +* + RT2 = ( ACMX / RT1 )*ACMN - ( B / RT1 )*B + ELSE IF( SM.GT.ZERO ) THEN + RT1 = HALF*( SM+RT ) + SGN1 = 1 +* +* Order of execution important. +* To get fully accurate smaller eigenvalue, +* next line needs to be executed in higher precision. +* + RT2 = ( ACMX / RT1 )*ACMN - ( B / RT1 )*B + ELSE +* +* Includes case RT1 = RT2 = 0 +* + RT1 = HALF*RT + RT2 = -HALF*RT + SGN1 = 1 + END IF +* +* Compute the eigenvector +* + IF( DF.GE.ZERO ) THEN + CS = DF + RT + SGN2 = 1 + ELSE + CS = DF - RT + SGN2 = -1 + END IF + ACS = ABS( CS ) + IF( ACS.GT.AB ) THEN + CT = -TB / CS + SN1 = ONE / SQRT( ONE+CT*CT ) + CS1 = CT*SN1 + ELSE + IF( AB.EQ.ZERO ) THEN + CS1 = ONE + SN1 = ZERO + ELSE + TN = -CS / TB + CS1 = ONE / SQRT( ONE+TN*TN ) + SN1 = TN*CS1 + END IF + END IF + IF( SGN1.EQ.SGN2 ) THEN + TN = CS1 + CS1 = -SN1 + SN1 = TN + END IF + RETURN +* +* End of SLAEV2 +* + END diff --git a/costa/native/external/lapack/slaexc.f b/costa/native/external/lapack/slaexc.f new file mode 100644 index 000000000..bc6080b40 --- /dev/null +++ b/costa/native/external/lapack/slaexc.f @@ -0,0 +1,354 @@ + SUBROUTINE SLAEXC( WANTQ, N, T, LDT, Q, LDQ, J1, N1, N2, WORK, + $ INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + LOGICAL WANTQ + INTEGER INFO, J1, LDQ, LDT, N, N1, N2 +* .. +* .. Array Arguments .. + REAL Q( LDQ, * ), T( LDT, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SLAEXC swaps adjacent diagonal blocks T11 and T22 of order 1 or 2 in +* an upper quasi-triangular matrix T by an orthogonal similarity +* transformation. +* +* T must be in Schur canonical form, that is, block upper triangular +* with 1-by-1 and 2-by-2 diagonal blocks; each 2-by-2 diagonal block +* has its diagonal elemnts equal and its off-diagonal elements of +* opposite sign. +* +* Arguments +* ========= +* +* WANTQ (input) LOGICAL +* = .TRUE. : accumulate the transformation in the matrix Q; +* = .FALSE.: do not accumulate the transformation. +* +* N (input) INTEGER +* The order of the matrix T. N >= 0. +* +* T (input/output) REAL array, dimension (LDT,N) +* On entry, the upper quasi-triangular matrix T, in Schur +* canonical form. +* On exit, the updated matrix T, again in Schur canonical form. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= max(1,N). +* +* Q (input/output) REAL array, dimension (LDQ,N) +* On entry, if WANTQ is .TRUE., the orthogonal matrix Q. +* On exit, if WANTQ is .TRUE., the updated matrix Q. +* If WANTQ is .FALSE., Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. +* LDQ >= 1; and if WANTQ is .TRUE., LDQ >= N. +* +* J1 (input) INTEGER +* The index of the first row of the first block T11. +* +* N1 (input) INTEGER +* The order of the first block T11. N1 = 0, 1 or 2. +* +* N2 (input) INTEGER +* The order of the second block T22. N2 = 0, 1 or 2. +* +* WORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* = 1: the transformed matrix T would be too far from Schur +* form; the blocks are not swapped and T and Q are +* unchanged. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL TEN + PARAMETER ( TEN = 1.0E+1 ) + INTEGER LDD, LDX + PARAMETER ( LDD = 4, LDX = 2 ) +* .. +* .. Local Scalars .. + INTEGER IERR, J2, J3, J4, K, ND + REAL CS, DNORM, EPS, SCALE, SMLNUM, SN, T11, T22, + $ T33, TAU, TAU1, TAU2, TEMP, THRESH, WI1, WI2, + $ WR1, WR2, XNORM +* .. +* .. Local Arrays .. + REAL D( LDD, 4 ), U( 3 ), U1( 3 ), U2( 3 ), + $ X( LDX, 2 ) +* .. +* .. External Functions .. + REAL SLAMCH, SLANGE + EXTERNAL SLAMCH, SLANGE +* .. +* .. External Subroutines .. + EXTERNAL SLACPY, SLANV2, SLARFG, SLARFX, SLARTG, SLASY2, + $ SROT +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.EQ.0 .OR. N1.EQ.0 .OR. N2.EQ.0 ) + $ RETURN + IF( J1+N1.GT.N ) + $ RETURN +* + J2 = J1 + 1 + J3 = J1 + 2 + J4 = J1 + 3 +* + IF( N1.EQ.1 .AND. N2.EQ.1 ) THEN +* +* Swap two 1-by-1 blocks. +* + T11 = T( J1, J1 ) + T22 = T( J2, J2 ) +* +* Determine the transformation to perform the interchange. +* + CALL SLARTG( T( J1, J2 ), T22-T11, CS, SN, TEMP ) +* +* Apply transformation to the matrix T. +* + IF( J3.LE.N ) + $ CALL SROT( N-J1-1, T( J1, J3 ), LDT, T( J2, J3 ), LDT, CS, + $ SN ) + CALL SROT( J1-1, T( 1, J1 ), 1, T( 1, J2 ), 1, CS, SN ) +* + T( J1, J1 ) = T22 + T( J2, J2 ) = T11 +* + IF( WANTQ ) THEN +* +* Accumulate transformation in the matrix Q. +* + CALL SROT( N, Q( 1, J1 ), 1, Q( 1, J2 ), 1, CS, SN ) + END IF +* + ELSE +* +* Swapping involves at least one 2-by-2 block. +* +* Copy the diagonal block of order N1+N2 to the local array D +* and compute its norm. +* + ND = N1 + N2 + CALL SLACPY( 'Full', ND, ND, T( J1, J1 ), LDT, D, LDD ) + DNORM = SLANGE( 'Max', ND, ND, D, LDD, WORK ) +* +* Compute machine-dependent threshold for test for accepting +* swap. +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) / EPS + THRESH = MAX( TEN*EPS*DNORM, SMLNUM ) +* +* Solve T11*X - X*T22 = scale*T12 for X. +* + CALL SLASY2( .FALSE., .FALSE., -1, N1, N2, D, LDD, + $ D( N1+1, N1+1 ), LDD, D( 1, N1+1 ), LDD, SCALE, X, + $ LDX, XNORM, IERR ) +* +* Swap the adjacent diagonal blocks. +* + K = N1 + N1 + N2 - 3 + GO TO ( 10, 20, 30 )K +* + 10 CONTINUE +* +* N1 = 1, N2 = 2: generate elementary reflector H so that: +* +* ( scale, X11, X12 ) H = ( 0, 0, * ) +* + U( 1 ) = SCALE + U( 2 ) = X( 1, 1 ) + U( 3 ) = X( 1, 2 ) + CALL SLARFG( 3, U( 3 ), U, 1, TAU ) + U( 3 ) = ONE + T11 = T( J1, J1 ) +* +* Perform swap provisionally on diagonal block in D. +* + CALL SLARFX( 'L', 3, 3, U, TAU, D, LDD, WORK ) + CALL SLARFX( 'R', 3, 3, U, TAU, D, LDD, WORK ) +* +* Test whether to reject swap. +* + IF( MAX( ABS( D( 3, 1 ) ), ABS( D( 3, 2 ) ), ABS( D( 3, + $ 3 )-T11 ) ).GT.THRESH )GO TO 50 +* +* Accept swap: apply transformation to the entire matrix T. +* + CALL SLARFX( 'L', 3, N-J1+1, U, TAU, T( J1, J1 ), LDT, WORK ) + CALL SLARFX( 'R', J2, 3, U, TAU, T( 1, J1 ), LDT, WORK ) +* + T( J3, J1 ) = ZERO + T( J3, J2 ) = ZERO + T( J3, J3 ) = T11 +* + IF( WANTQ ) THEN +* +* Accumulate transformation in the matrix Q. +* + CALL SLARFX( 'R', N, 3, U, TAU, Q( 1, J1 ), LDQ, WORK ) + END IF + GO TO 40 +* + 20 CONTINUE +* +* N1 = 2, N2 = 1: generate elementary reflector H so that: +* +* H ( -X11 ) = ( * ) +* ( -X21 ) = ( 0 ) +* ( scale ) = ( 0 ) +* + U( 1 ) = -X( 1, 1 ) + U( 2 ) = -X( 2, 1 ) + U( 3 ) = SCALE + CALL SLARFG( 3, U( 1 ), U( 2 ), 1, TAU ) + U( 1 ) = ONE + T33 = T( J3, J3 ) +* +* Perform swap provisionally on diagonal block in D. +* + CALL SLARFX( 'L', 3, 3, U, TAU, D, LDD, WORK ) + CALL SLARFX( 'R', 3, 3, U, TAU, D, LDD, WORK ) +* +* Test whether to reject swap. +* + IF( MAX( ABS( D( 2, 1 ) ), ABS( D( 3, 1 ) ), ABS( D( 1, + $ 1 )-T33 ) ).GT.THRESH )GO TO 50 +* +* Accept swap: apply transformation to the entire matrix T. +* + CALL SLARFX( 'R', J3, 3, U, TAU, T( 1, J1 ), LDT, WORK ) + CALL SLARFX( 'L', 3, N-J1, U, TAU, T( J1, J2 ), LDT, WORK ) +* + T( J1, J1 ) = T33 + T( J2, J1 ) = ZERO + T( J3, J1 ) = ZERO +* + IF( WANTQ ) THEN +* +* Accumulate transformation in the matrix Q. +* + CALL SLARFX( 'R', N, 3, U, TAU, Q( 1, J1 ), LDQ, WORK ) + END IF + GO TO 40 +* + 30 CONTINUE +* +* N1 = 2, N2 = 2: generate elementary reflectors H(1) and H(2) so +* that: +* +* H(2) H(1) ( -X11 -X12 ) = ( * * ) +* ( -X21 -X22 ) ( 0 * ) +* ( scale 0 ) ( 0 0 ) +* ( 0 scale ) ( 0 0 ) +* + U1( 1 ) = -X( 1, 1 ) + U1( 2 ) = -X( 2, 1 ) + U1( 3 ) = SCALE + CALL SLARFG( 3, U1( 1 ), U1( 2 ), 1, TAU1 ) + U1( 1 ) = ONE +* + TEMP = -TAU1*( X( 1, 2 )+U1( 2 )*X( 2, 2 ) ) + U2( 1 ) = -TEMP*U1( 2 ) - X( 2, 2 ) + U2( 2 ) = -TEMP*U1( 3 ) + U2( 3 ) = SCALE + CALL SLARFG( 3, U2( 1 ), U2( 2 ), 1, TAU2 ) + U2( 1 ) = ONE +* +* Perform swap provisionally on diagonal block in D. +* + CALL SLARFX( 'L', 3, 4, U1, TAU1, D, LDD, WORK ) + CALL SLARFX( 'R', 4, 3, U1, TAU1, D, LDD, WORK ) + CALL SLARFX( 'L', 3, 4, U2, TAU2, D( 2, 1 ), LDD, WORK ) + CALL SLARFX( 'R', 4, 3, U2, TAU2, D( 1, 2 ), LDD, WORK ) +* +* Test whether to reject swap. +* + IF( MAX( ABS( D( 3, 1 ) ), ABS( D( 3, 2 ) ), ABS( D( 4, 1 ) ), + $ ABS( D( 4, 2 ) ) ).GT.THRESH )GO TO 50 +* +* Accept swap: apply transformation to the entire matrix T. +* + CALL SLARFX( 'L', 3, N-J1+1, U1, TAU1, T( J1, J1 ), LDT, WORK ) + CALL SLARFX( 'R', J4, 3, U1, TAU1, T( 1, J1 ), LDT, WORK ) + CALL SLARFX( 'L', 3, N-J1+1, U2, TAU2, T( J2, J1 ), LDT, WORK ) + CALL SLARFX( 'R', J4, 3, U2, TAU2, T( 1, J2 ), LDT, WORK ) +* + T( J3, J1 ) = ZERO + T( J3, J2 ) = ZERO + T( J4, J1 ) = ZERO + T( J4, J2 ) = ZERO +* + IF( WANTQ ) THEN +* +* Accumulate transformation in the matrix Q. +* + CALL SLARFX( 'R', N, 3, U1, TAU1, Q( 1, J1 ), LDQ, WORK ) + CALL SLARFX( 'R', N, 3, U2, TAU2, Q( 1, J2 ), LDQ, WORK ) + END IF +* + 40 CONTINUE +* + IF( N2.EQ.2 ) THEN +* +* Standardize new 2-by-2 block T11 +* + CALL SLANV2( T( J1, J1 ), T( J1, J2 ), T( J2, J1 ), + $ T( J2, J2 ), WR1, WI1, WR2, WI2, CS, SN ) + CALL SROT( N-J1-1, T( J1, J1+2 ), LDT, T( J2, J1+2 ), LDT, + $ CS, SN ) + CALL SROT( J1-1, T( 1, J1 ), 1, T( 1, J2 ), 1, CS, SN ) + IF( WANTQ ) + $ CALL SROT( N, Q( 1, J1 ), 1, Q( 1, J2 ), 1, CS, SN ) + END IF +* + IF( N1.EQ.2 ) THEN +* +* Standardize new 2-by-2 block T22 +* + J3 = J1 + N2 + J4 = J3 + 1 + CALL SLANV2( T( J3, J3 ), T( J3, J4 ), T( J4, J3 ), + $ T( J4, J4 ), WR1, WI1, WR2, WI2, CS, SN ) + IF( J3+2.LE.N ) + $ CALL SROT( N-J3-1, T( J3, J3+2 ), LDT, T( J4, J3+2 ), + $ LDT, CS, SN ) + CALL SROT( J3-1, T( 1, J3 ), 1, T( 1, J4 ), 1, CS, SN ) + IF( WANTQ ) + $ CALL SROT( N, Q( 1, J3 ), 1, Q( 1, J4 ), 1, CS, SN ) + END IF +* + END IF + RETURN +* +* Exit with INFO = 1 if swap was rejected. +* + 50 INFO = 1 + RETURN +* +* End of SLAEXC +* + END diff --git a/costa/native/external/lapack/slag2.f b/costa/native/external/lapack/slag2.f new file mode 100644 index 000000000..f5c357251 --- /dev/null +++ b/costa/native/external/lapack/slag2.f @@ -0,0 +1,301 @@ + SUBROUTINE SLAG2( A, LDA, B, LDB, SAFMIN, SCALE1, SCALE2, WR1, + $ WR2, WI ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + INTEGER LDA, LDB + REAL SAFMIN, SCALE1, SCALE2, WI, WR1, WR2 +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* SLAG2 computes the eigenvalues of a 2 x 2 generalized eigenvalue +* problem A - w B, with scaling as necessary to avoid over-/underflow. +* +* The scaling factor "s" results in a modified eigenvalue equation +* +* s A - w B +* +* where s is a non-negative scaling factor chosen so that w, w B, +* and s A do not overflow and, if possible, do not underflow, either. +* +* Arguments +* ========= +* +* A (input) REAL array, dimension (LDA, 2) +* On entry, the 2 x 2 matrix A. It is assumed that its 1-norm +* is less than 1/SAFMIN. Entries less than +* sqrt(SAFMIN)*norm(A) are subject to being treated as zero. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= 2. +* +* B (input) REAL array, dimension (LDB, 2) +* On entry, the 2 x 2 upper triangular matrix B. It is +* assumed that the one-norm of B is less than 1/SAFMIN. The +* diagonals should be at least sqrt(SAFMIN) times the largest +* element of B (in absolute value); if a diagonal is smaller +* than that, then +/- sqrt(SAFMIN) will be used instead of +* that diagonal. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= 2. +* +* SAFMIN (input) REAL +* The smallest positive number s.t. 1/SAFMIN does not +* overflow. (This should always be SLAMCH('S') -- it is an +* argument in order to avoid having to call SLAMCH frequently.) +* +* SCALE1 (output) REAL +* A scaling factor used to avoid over-/underflow in the +* eigenvalue equation which defines the first eigenvalue. If +* the eigenvalues are complex, then the eigenvalues are +* ( WR1 +/- WI i ) / SCALE1 (which may lie outside the +* exponent range of the machine), SCALE1=SCALE2, and SCALE1 +* will always be positive. If the eigenvalues are real, then +* the first (real) eigenvalue is WR1 / SCALE1 , but this may +* overflow or underflow, and in fact, SCALE1 may be zero or +* less than the underflow threshhold if the exact eigenvalue +* is sufficiently large. +* +* SCALE2 (output) REAL +* A scaling factor used to avoid over-/underflow in the +* eigenvalue equation which defines the second eigenvalue. If +* the eigenvalues are complex, then SCALE2=SCALE1. If the +* eigenvalues are real, then the second (real) eigenvalue is +* WR2 / SCALE2 , but this may overflow or underflow, and in +* fact, SCALE2 may be zero or less than the underflow +* threshhold if the exact eigenvalue is sufficiently large. +* +* WR1 (output) REAL +* If the eigenvalue is real, then WR1 is SCALE1 times the +* eigenvalue closest to the (2,2) element of A B**(-1). If the +* eigenvalue is complex, then WR1=WR2 is SCALE1 times the real +* part of the eigenvalues. +* +* WR2 (output) REAL +* If the eigenvalue is real, then WR2 is SCALE2 times the +* other eigenvalue. If the eigenvalue is complex, then +* WR1=WR2 is SCALE1 times the real part of the eigenvalues. +* +* WI (output) REAL +* If the eigenvalue is real, then WI is zero. If the +* eigenvalue is complex, then WI is SCALE1 times the imaginary +* part of the eigenvalues. WI will always be non-negative. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0 ) + REAL HALF + PARAMETER ( HALF = ONE / TWO ) + REAL FUZZY1 + PARAMETER ( FUZZY1 = ONE+1.0E-5 ) +* .. +* .. Local Scalars .. + REAL A11, A12, A21, A22, ABI22, ANORM, AS11, AS12, + $ AS22, ASCALE, B11, B12, B22, BINV11, BINV22, + $ BMIN, BNORM, BSCALE, BSIZE, C1, C2, C3, C4, C5, + $ DIFF, DISCR, PP, QQ, R, RTMAX, RTMIN, S1, S2, + $ SAFMAX, SHIFT, SS, SUM, WABS, WBIG, WDET, + $ WSCALE, WSIZE, WSMALL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SIGN, SQRT +* .. +* .. Executable Statements .. +* + RTMIN = SQRT( SAFMIN ) + RTMAX = ONE / RTMIN + SAFMAX = ONE / SAFMIN +* +* Scale A +* + ANORM = MAX( ABS( A( 1, 1 ) )+ABS( A( 2, 1 ) ), + $ ABS( A( 1, 2 ) )+ABS( A( 2, 2 ) ), SAFMIN ) + ASCALE = ONE / ANORM + A11 = ASCALE*A( 1, 1 ) + A21 = ASCALE*A( 2, 1 ) + A12 = ASCALE*A( 1, 2 ) + A22 = ASCALE*A( 2, 2 ) +* +* Perturb B if necessary to insure non-singularity +* + B11 = B( 1, 1 ) + B12 = B( 1, 2 ) + B22 = B( 2, 2 ) + BMIN = RTMIN*MAX( ABS( B11 ), ABS( B12 ), ABS( B22 ), RTMIN ) + IF( ABS( B11 ).LT.BMIN ) + $ B11 = SIGN( BMIN, B11 ) + IF( ABS( B22 ).LT.BMIN ) + $ B22 = SIGN( BMIN, B22 ) +* +* Scale B +* + BNORM = MAX( ABS( B11 ), ABS( B12 )+ABS( B22 ), SAFMIN ) + BSIZE = MAX( ABS( B11 ), ABS( B22 ) ) + BSCALE = ONE / BSIZE + B11 = B11*BSCALE + B12 = B12*BSCALE + B22 = B22*BSCALE +* +* Compute larger eigenvalue by method described by C. van Loan +* +* ( AS is A shifted by -SHIFT*B ) +* + BINV11 = ONE / B11 + BINV22 = ONE / B22 + S1 = A11*BINV11 + S2 = A22*BINV22 + IF( ABS( S1 ).LE.ABS( S2 ) ) THEN + AS12 = A12 - S1*B12 + AS22 = A22 - S1*B22 + SS = A21*( BINV11*BINV22 ) + ABI22 = AS22*BINV22 - SS*B12 + PP = HALF*ABI22 + SHIFT = S1 + ELSE + AS12 = A12 - S2*B12 + AS11 = A11 - S2*B11 + SS = A21*( BINV11*BINV22 ) + ABI22 = -SS*B12 + PP = HALF*( AS11*BINV11+ABI22 ) + SHIFT = S2 + END IF + QQ = SS*AS12 + IF( ABS( PP*RTMIN ).GE.ONE ) THEN + DISCR = ( RTMIN*PP )**2 + QQ*SAFMIN + R = SQRT( ABS( DISCR ) )*RTMAX + ELSE + IF( PP**2+ABS( QQ ).LE.SAFMIN ) THEN + DISCR = ( RTMAX*PP )**2 + QQ*SAFMAX + R = SQRT( ABS( DISCR ) )*RTMIN + ELSE + DISCR = PP**2 + QQ + R = SQRT( ABS( DISCR ) ) + END IF + END IF +* +* Note: the test of R in the following IF is to cover the case when +* DISCR is small and negative and is flushed to zero during +* the calculation of R. On machines which have a consistent +* flush-to-zero threshhold and handle numbers above that +* threshhold correctly, it would not be necessary. +* + IF( DISCR.GE.ZERO .OR. R.EQ.ZERO ) THEN + SUM = PP + SIGN( R, PP ) + DIFF = PP - SIGN( R, PP ) + WBIG = SHIFT + SUM +* +* Compute smaller eigenvalue +* + WSMALL = SHIFT + DIFF + IF( HALF*ABS( WBIG ).GT.MAX( ABS( WSMALL ), SAFMIN ) ) THEN + WDET = ( A11*A22-A12*A21 )*( BINV11*BINV22 ) + WSMALL = WDET / WBIG + END IF +* +* Choose (real) eigenvalue closest to 2,2 element of A*B**(-1) +* for WR1. +* + IF( PP.GT.ABI22 ) THEN + WR1 = MIN( WBIG, WSMALL ) + WR2 = MAX( WBIG, WSMALL ) + ELSE + WR1 = MAX( WBIG, WSMALL ) + WR2 = MIN( WBIG, WSMALL ) + END IF + WI = ZERO + ELSE +* +* Complex eigenvalues +* + WR1 = SHIFT + PP + WR2 = WR1 + WI = R + END IF +* +* Further scaling to avoid underflow and overflow in computing +* SCALE1 and overflow in computing w*B. +* +* This scale factor (WSCALE) is bounded from above using C1 and C2, +* and from below using C3 and C4. +* C1 implements the condition s A must never overflow. +* C2 implements the condition w B must never overflow. +* C3, with C2, +* implement the condition that s A - w B must never overflow. +* C4 implements the condition s should not underflow. +* C5 implements the condition max(s,|w|) should be at least 2. +* + C1 = BSIZE*( SAFMIN*MAX( ONE, ASCALE ) ) + C2 = SAFMIN*MAX( ONE, BNORM ) + C3 = BSIZE*SAFMIN + IF( ASCALE.LE.ONE .AND. BSIZE.LE.ONE ) THEN + C4 = MIN( ONE, ( ASCALE / SAFMIN )*BSIZE ) + ELSE + C4 = ONE + END IF + IF( ASCALE.LE.ONE .OR. BSIZE.LE.ONE ) THEN + C5 = MIN( ONE, ASCALE*BSIZE ) + ELSE + C5 = ONE + END IF +* +* Scale first eigenvalue +* + WABS = ABS( WR1 ) + ABS( WI ) + WSIZE = MAX( SAFMIN, C1, FUZZY1*( WABS*C2+C3 ), + $ MIN( C4, HALF*MAX( WABS, C5 ) ) ) + IF( WSIZE.NE.ONE ) THEN + WSCALE = ONE / WSIZE + IF( WSIZE.GT.ONE ) THEN + SCALE1 = ( MAX( ASCALE, BSIZE )*WSCALE )* + $ MIN( ASCALE, BSIZE ) + ELSE + SCALE1 = ( MIN( ASCALE, BSIZE )*WSCALE )* + $ MAX( ASCALE, BSIZE ) + END IF + WR1 = WR1*WSCALE + IF( WI.NE.ZERO ) THEN + WI = WI*WSCALE + WR2 = WR1 + SCALE2 = SCALE1 + END IF + ELSE + SCALE1 = ASCALE*BSIZE + SCALE2 = SCALE1 + END IF +* +* Scale second eigenvalue (if real) +* + IF( WI.EQ.ZERO ) THEN + WSIZE = MAX( SAFMIN, C1, FUZZY1*( ABS( WR2 )*C2+C3 ), + $ MIN( C4, HALF*MAX( ABS( WR2 ), C5 ) ) ) + IF( WSIZE.NE.ONE ) THEN + WSCALE = ONE / WSIZE + IF( WSIZE.GT.ONE ) THEN + SCALE2 = ( MAX( ASCALE, BSIZE )*WSCALE )* + $ MIN( ASCALE, BSIZE ) + ELSE + SCALE2 = ( MIN( ASCALE, BSIZE )*WSCALE )* + $ MAX( ASCALE, BSIZE ) + END IF + WR2 = WR2*WSCALE + ELSE + SCALE2 = ASCALE*BSIZE + END IF + END IF +* +* End of SLAG2 +* + RETURN + END diff --git a/costa/native/external/lapack/slags2.f b/costa/native/external/lapack/slags2.f new file mode 100644 index 000000000..0d833e7a1 --- /dev/null +++ b/costa/native/external/lapack/slags2.f @@ -0,0 +1,270 @@ + SUBROUTINE SLAGS2( UPPER, A1, A2, A3, B1, B2, B3, CSU, SNU, CSV, + $ SNV, CSQ, SNQ ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + LOGICAL UPPER + REAL A1, A2, A3, B1, B2, B3, CSQ, CSU, CSV, SNQ, + $ SNU, SNV +* .. +* +* Purpose +* ======= +* +* SLAGS2 computes 2-by-2 orthogonal matrices U, V and Q, such +* that if ( UPPER ) then +* +* U'*A*Q = U'*( A1 A2 )*Q = ( x 0 ) +* ( 0 A3 ) ( x x ) +* and +* V'*B*Q = V'*( B1 B2 )*Q = ( x 0 ) +* ( 0 B3 ) ( x x ) +* +* or if ( .NOT.UPPER ) then +* +* U'*A*Q = U'*( A1 0 )*Q = ( x x ) +* ( A2 A3 ) ( 0 x ) +* and +* V'*B*Q = V'*( B1 0 )*Q = ( x x ) +* ( B2 B3 ) ( 0 x ) +* +* The rows of the transformed A and B are parallel, where +* +* U = ( CSU SNU ), V = ( CSV SNV ), Q = ( CSQ SNQ ) +* ( -SNU CSU ) ( -SNV CSV ) ( -SNQ CSQ ) +* +* Z' denotes the transpose of Z. +* +* +* Arguments +* ========= +* +* UPPER (input) LOGICAL +* = .TRUE.: the input matrices A and B are upper triangular. +* = .FALSE.: the input matrices A and B are lower triangular. +* +* A1 (input) REAL +* A2 (input) REAL +* A3 (input) REAL +* On entry, A1, A2 and A3 are elements of the input 2-by-2 +* upper (lower) triangular matrix A. +* +* B1 (input) REAL +* B2 (input) REAL +* B3 (input) REAL +* On entry, B1, B2 and B3 are elements of the input 2-by-2 +* upper (lower) triangular matrix B. +* +* CSU (output) REAL +* SNU (output) REAL +* The desired orthogonal matrix U. +* +* CSV (output) REAL +* SNV (output) REAL +* The desired orthogonal matrix V. +* +* CSQ (output) REAL +* SNQ (output) REAL +* The desired orthogonal matrix Q. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + REAL A, AUA11, AUA12, AUA21, AUA22, AVB11, AVB12, + $ AVB21, AVB22, CSL, CSR, D, S1, S2, SNL, + $ SNR, UA11R, UA22R, VB11R, VB22R, B, C, R, UA11, + $ UA12, UA21, UA22, VB11, VB12, VB21, VB22 +* .. +* .. External Subroutines .. + EXTERNAL SLARTG, SLASV2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* + IF( UPPER ) THEN +* +* Input matrices A and B are upper triangular matrices +* +* Form matrix C = A*adj(B) = ( a b ) +* ( 0 d ) +* + A = A1*B3 + D = A3*B1 + B = A2*B1 - A1*B2 +* +* The SVD of real 2-by-2 triangular C +* +* ( CSL -SNL )*( A B )*( CSR SNR ) = ( R 0 ) +* ( SNL CSL ) ( 0 D ) ( -SNR CSR ) ( 0 T ) +* + CALL SLASV2( A, B, D, S1, S2, SNR, CSR, SNL, CSL ) +* + IF( ABS( CSL ).GE.ABS( SNL ) .OR. ABS( CSR ).GE.ABS( SNR ) ) + $ THEN +* +* Compute the (1,1) and (1,2) elements of U'*A and V'*B, +* and (1,2) element of |U|'*|A| and |V|'*|B|. +* + UA11R = CSL*A1 + UA12 = CSL*A2 + SNL*A3 +* + VB11R = CSR*B1 + VB12 = CSR*B2 + SNR*B3 +* + AUA12 = ABS( CSL )*ABS( A2 ) + ABS( SNL )*ABS( A3 ) + AVB12 = ABS( CSR )*ABS( B2 ) + ABS( SNR )*ABS( B3 ) +* +* zero (1,2) elements of U'*A and V'*B +* + IF( ( ABS( UA11R )+ABS( UA12 ) ).NE.ZERO ) THEN + IF( AUA12 / ( ABS( UA11R )+ABS( UA12 ) ).LE.AVB12 / + $ ( ABS( VB11R )+ABS( VB12 ) ) ) THEN + CALL SLARTG( -UA11R, UA12, CSQ, SNQ, R ) + ELSE + CALL SLARTG( -VB11R, VB12, CSQ, SNQ, R ) + END IF + ELSE + CALL SLARTG( -VB11R, VB12, CSQ, SNQ, R ) + END IF +* + CSU = CSL + SNU = -SNL + CSV = CSR + SNV = -SNR +* + ELSE +* +* Compute the (2,1) and (2,2) elements of U'*A and V'*B, +* and (2,2) element of |U|'*|A| and |V|'*|B|. +* + UA21 = -SNL*A1 + UA22 = -SNL*A2 + CSL*A3 +* + VB21 = -SNR*B1 + VB22 = -SNR*B2 + CSR*B3 +* + AUA22 = ABS( SNL )*ABS( A2 ) + ABS( CSL )*ABS( A3 ) + AVB22 = ABS( SNR )*ABS( B2 ) + ABS( CSR )*ABS( B3 ) +* +* zero (2,2) elements of U'*A and V'*B, and then swap. +* + IF( ( ABS( UA21 )+ABS( UA22 ) ).NE.ZERO ) THEN + IF( AUA22 / ( ABS( UA21 )+ABS( UA22 ) ).LE.AVB22 / + $ ( ABS( VB21 )+ABS( VB22 ) ) ) THEN + CALL SLARTG( -UA21, UA22, CSQ, SNQ, R ) + ELSE + CALL SLARTG( -VB21, VB22, CSQ, SNQ, R ) + END IF + ELSE + CALL SLARTG( -VB21, VB22, CSQ, SNQ, R ) + END IF +* + CSU = SNL + SNU = CSL + CSV = SNR + SNV = CSR +* + END IF +* + ELSE +* +* Input matrices A and B are lower triangular matrices +* +* Form matrix C = A*adj(B) = ( a 0 ) +* ( c d ) +* + A = A1*B3 + D = A3*B1 + C = A2*B3 - A3*B2 +* +* The SVD of real 2-by-2 triangular C +* +* ( CSL -SNL )*( A 0 )*( CSR SNR ) = ( R 0 ) +* ( SNL CSL ) ( C D ) ( -SNR CSR ) ( 0 T ) +* + CALL SLASV2( A, C, D, S1, S2, SNR, CSR, SNL, CSL ) +* + IF( ABS( CSR ).GE.ABS( SNR ) .OR. ABS( CSL ).GE.ABS( SNL ) ) + $ THEN +* +* Compute the (2,1) and (2,2) elements of U'*A and V'*B, +* and (2,1) element of |U|'*|A| and |V|'*|B|. +* + UA21 = -SNR*A1 + CSR*A2 + UA22R = CSR*A3 +* + VB21 = -SNL*B1 + CSL*B2 + VB22R = CSL*B3 +* + AUA21 = ABS( SNR )*ABS( A1 ) + ABS( CSR )*ABS( A2 ) + AVB21 = ABS( SNL )*ABS( B1 ) + ABS( CSL )*ABS( B2 ) +* +* zero (2,1) elements of U'*A and V'*B. +* + IF( ( ABS( UA21 )+ABS( UA22R ) ).NE.ZERO ) THEN + IF( AUA21 / ( ABS( UA21 )+ABS( UA22R ) ).LE.AVB21 / + $ ( ABS( VB21 )+ABS( VB22R ) ) ) THEN + CALL SLARTG( UA22R, UA21, CSQ, SNQ, R ) + ELSE + CALL SLARTG( VB22R, VB21, CSQ, SNQ, R ) + END IF + ELSE + CALL SLARTG( VB22R, VB21, CSQ, SNQ, R ) + END IF +* + CSU = CSR + SNU = -SNR + CSV = CSL + SNV = -SNL +* + ELSE +* +* Compute the (1,1) and (1,2) elements of U'*A and V'*B, +* and (1,1) element of |U|'*|A| and |V|'*|B|. +* + UA11 = CSR*A1 + SNR*A2 + UA12 = SNR*A3 +* + VB11 = CSL*B1 + SNL*B2 + VB12 = SNL*B3 +* + AUA11 = ABS( CSR )*ABS( A1 ) + ABS( SNR )*ABS( A2 ) + AVB11 = ABS( CSL )*ABS( B1 ) + ABS( SNL )*ABS( B2 ) +* +* zero (1,1) elements of U'*A and V'*B, and then swap. +* + IF( ( ABS( UA11 )+ABS( UA12 ) ).NE.ZERO ) THEN + IF( AUA11 / ( ABS( UA11 )+ABS( UA12 ) ).LE.AVB11 / + $ ( ABS( VB11 )+ABS( VB12 ) ) ) THEN + CALL SLARTG( UA12, UA11, CSQ, SNQ, R ) + ELSE + CALL SLARTG( VB12, VB11, CSQ, SNQ, R ) + END IF + ELSE + CALL SLARTG( VB12, VB11, CSQ, SNQ, R ) + END IF +* + CSU = SNR + SNU = CSR + CSV = SNL + SNV = CSL +* + END IF +* + END IF +* + RETURN +* +* End of SLAGS2 +* + END diff --git a/costa/native/external/lapack/slagtf.f b/costa/native/external/lapack/slagtf.f new file mode 100644 index 000000000..c736e4b2b --- /dev/null +++ b/costa/native/external/lapack/slagtf.f @@ -0,0 +1,191 @@ + SUBROUTINE SLAGTF( N, A, LAMBDA, B, C, TOL, D, IN, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, N + REAL LAMBDA, TOL +* .. +* .. Array Arguments .. + INTEGER IN( * ) + REAL A( * ), B( * ), C( * ), D( * ) +* .. +* +* Purpose +* ======= +* +* SLAGTF factorizes the matrix (T - lambda*I), where T is an n by n +* tridiagonal matrix and lambda is a scalar, as +* +* T - lambda*I = PLU, +* +* where P is a permutation matrix, L is a unit lower tridiagonal matrix +* with at most one non-zero sub-diagonal elements per column and U is +* an upper triangular matrix with at most two non-zero super-diagonal +* elements per column. +* +* The factorization is obtained by Gaussian elimination with partial +* pivoting and implicit row scaling. +* +* The parameter LAMBDA is included in the routine so that SLAGTF may +* be used, in conjunction with SLAGTS, to obtain eigenvectors of T by +* inverse iteration. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix T. +* +* A (input/output) REAL array, dimension (N) +* On entry, A must contain the diagonal elements of T. +* +* On exit, A is overwritten by the n diagonal elements of the +* upper triangular matrix U of the factorization of T. +* +* LAMBDA (input) REAL +* On entry, the scalar lambda. +* +* B (input/output) REAL array, dimension (N-1) +* On entry, B must contain the (n-1) super-diagonal elements of +* T. +* +* On exit, B is overwritten by the (n-1) super-diagonal +* elements of the matrix U of the factorization of T. +* +* C (input/output) REAL array, dimension (N-1) +* On entry, C must contain the (n-1) sub-diagonal elements of +* T. +* +* On exit, C is overwritten by the (n-1) sub-diagonal elements +* of the matrix L of the factorization of T. +* +* TOL (input) REAL +* On entry, a relative tolerance used to indicate whether or +* not the matrix (T - lambda*I) is nearly singular. TOL should +* normally be chose as approximately the largest relative error +* in the elements of T. For example, if the elements of T are +* correct to about 4 significant figures, then TOL should be +* set to about 5*10**(-4). If TOL is supplied as less than eps, +* where eps is the relative machine precision, then the value +* eps is used in place of TOL. +* +* D (output) REAL array, dimension (N-2) +* On exit, D is overwritten by the (n-2) second super-diagonal +* elements of the matrix U of the factorization of T. +* +* IN (output) INTEGER array, dimension (N) +* On exit, IN contains details of the permutation matrix P. If +* an interchange occurred at the kth step of the elimination, +* then IN(k) = 1, otherwise IN(k) = 0. The element IN(n) +* returns the smallest positive integer j such that +* +* abs( u(j,j) ).le. norm( (T - lambda*I)(j) )*TOL, +* +* where norm( A(j) ) denotes the sum of the absolute values of +* the jth row of the matrix A. If no such j exists then IN(n) +* is returned as zero. If IN(n) is returned as positive, then a +* diagonal element of U is small, indicating that +* (T - lambda*I) is singular or nearly singular, +* +* INFO (output) INTEGER +* = 0 : successful exit +* .lt. 0: if INFO = -k, the kth argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER K + REAL EPS, MULT, PIV1, PIV2, SCALE1, SCALE2, TEMP, TL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + CALL XERBLA( 'SLAGTF', -INFO ) + RETURN + END IF +* + IF( N.EQ.0 ) + $ RETURN +* + A( 1 ) = A( 1 ) - LAMBDA + IN( N ) = 0 + IF( N.EQ.1 ) THEN + IF( A( 1 ).EQ.ZERO ) + $ IN( 1 ) = 1 + RETURN + END IF +* + EPS = SLAMCH( 'Epsilon' ) +* + TL = MAX( TOL, EPS ) + SCALE1 = ABS( A( 1 ) ) + ABS( B( 1 ) ) + DO 10 K = 1, N - 1 + A( K+1 ) = A( K+1 ) - LAMBDA + SCALE2 = ABS( C( K ) ) + ABS( A( K+1 ) ) + IF( K.LT.( N-1 ) ) + $ SCALE2 = SCALE2 + ABS( B( K+1 ) ) + IF( A( K ).EQ.ZERO ) THEN + PIV1 = ZERO + ELSE + PIV1 = ABS( A( K ) ) / SCALE1 + END IF + IF( C( K ).EQ.ZERO ) THEN + IN( K ) = 0 + PIV2 = ZERO + SCALE1 = SCALE2 + IF( K.LT.( N-1 ) ) + $ D( K ) = ZERO + ELSE + PIV2 = ABS( C( K ) ) / SCALE2 + IF( PIV2.LE.PIV1 ) THEN + IN( K ) = 0 + SCALE1 = SCALE2 + C( K ) = C( K ) / A( K ) + A( K+1 ) = A( K+1 ) - C( K )*B( K ) + IF( K.LT.( N-1 ) ) + $ D( K ) = ZERO + ELSE + IN( K ) = 1 + MULT = A( K ) / C( K ) + A( K ) = C( K ) + TEMP = A( K+1 ) + A( K+1 ) = B( K ) - MULT*TEMP + IF( K.LT.( N-1 ) ) THEN + D( K ) = B( K+1 ) + B( K+1 ) = -MULT*D( K ) + END IF + B( K ) = TEMP + C( K ) = MULT + END IF + END IF + IF( ( MAX( PIV1, PIV2 ).LE.TL ) .AND. ( IN( N ).EQ.0 ) ) + $ IN( N ) = K + 10 CONTINUE + IF( ( ABS( A( N ) ).LE.SCALE1*TL ) .AND. ( IN( N ).EQ.0 ) ) + $ IN( N ) = N +* + RETURN +* +* End of SLAGTF +* + END diff --git a/costa/native/external/lapack/slagtm.f b/costa/native/external/lapack/slagtm.f new file mode 100644 index 000000000..7b6d336e6 --- /dev/null +++ b/costa/native/external/lapack/slagtm.f @@ -0,0 +1,191 @@ + SUBROUTINE SLAGTM( TRANS, N, NRHS, ALPHA, DL, D, DU, X, LDX, BETA, + $ B, LDB ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER LDB, LDX, N, NRHS + REAL ALPHA, BETA +* .. +* .. Array Arguments .. + REAL B( LDB, * ), D( * ), DL( * ), DU( * ), + $ X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* SLAGTM performs a matrix-vector product of the form +* +* B := alpha * A * X + beta * B +* +* where A is a tridiagonal matrix of order N, B and X are N by NRHS +* matrices, and alpha and beta are real scalars, each of which may be +* 0., 1., or -1. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER +* Specifies the operation applied to A. +* = 'N': No transpose, B := alpha * A * X + beta * B +* = 'T': Transpose, B := alpha * A'* X + beta * B +* = 'C': Conjugate transpose = Transpose +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices X and B. +* +* ALPHA (input) REAL +* The scalar alpha. ALPHA must be 0., 1., or -1.; otherwise, +* it is assumed to be 0. +* +* DL (input) REAL array, dimension (N-1) +* The (n-1) sub-diagonal elements of T. +* +* D (input) REAL array, dimension (N) +* The diagonal elements of T. +* +* DU (input) REAL array, dimension (N-1) +* The (n-1) super-diagonal elements of T. +* +* X (input) REAL array, dimension (LDX,NRHS) +* The N by NRHS matrix X. +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(N,1). +* +* BETA (input) REAL +* The scalar beta. BETA must be 0., 1., or -1.; otherwise, +* it is assumed to be 1. +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the N by NRHS matrix B. +* On exit, B is overwritten by the matrix expression +* B := alpha * A * X + beta * B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(N,1). +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) + $ RETURN +* +* Multiply B by BETA if BETA.NE.1. +* + IF( BETA.EQ.ZERO ) THEN + DO 20 J = 1, NRHS + DO 10 I = 1, N + B( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE IF( BETA.EQ.-ONE ) THEN + DO 40 J = 1, NRHS + DO 30 I = 1, N + B( I, J ) = -B( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF +* + IF( ALPHA.EQ.ONE ) THEN + IF( LSAME( TRANS, 'N' ) ) THEN +* +* Compute B := B + A*X +* + DO 60 J = 1, NRHS + IF( N.EQ.1 ) THEN + B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J ) + ELSE + B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J ) + + $ DU( 1 )*X( 2, J ) + B( N, J ) = B( N, J ) + DL( N-1 )*X( N-1, J ) + + $ D( N )*X( N, J ) + DO 50 I = 2, N - 1 + B( I, J ) = B( I, J ) + DL( I-1 )*X( I-1, J ) + + $ D( I )*X( I, J ) + DU( I )*X( I+1, J ) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE +* +* Compute B := B + A'*X +* + DO 80 J = 1, NRHS + IF( N.EQ.1 ) THEN + B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J ) + ELSE + B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J ) + + $ DL( 1 )*X( 2, J ) + B( N, J ) = B( N, J ) + DU( N-1 )*X( N-1, J ) + + $ D( N )*X( N, J ) + DO 70 I = 2, N - 1 + B( I, J ) = B( I, J ) + DU( I-1 )*X( I-1, J ) + + $ D( I )*X( I, J ) + DL( I )*X( I+1, J ) + 70 CONTINUE + END IF + 80 CONTINUE + END IF + ELSE IF( ALPHA.EQ.-ONE ) THEN + IF( LSAME( TRANS, 'N' ) ) THEN +* +* Compute B := B - A*X +* + DO 100 J = 1, NRHS + IF( N.EQ.1 ) THEN + B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J ) + ELSE + B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J ) - + $ DU( 1 )*X( 2, J ) + B( N, J ) = B( N, J ) - DL( N-1 )*X( N-1, J ) - + $ D( N )*X( N, J ) + DO 90 I = 2, N - 1 + B( I, J ) = B( I, J ) - DL( I-1 )*X( I-1, J ) - + $ D( I )*X( I, J ) - DU( I )*X( I+1, J ) + 90 CONTINUE + END IF + 100 CONTINUE + ELSE +* +* Compute B := B - A'*X +* + DO 120 J = 1, NRHS + IF( N.EQ.1 ) THEN + B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J ) + ELSE + B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J ) - + $ DL( 1 )*X( 2, J ) + B( N, J ) = B( N, J ) - DU( N-1 )*X( N-1, J ) - + $ D( N )*X( N, J ) + DO 110 I = 2, N - 1 + B( I, J ) = B( I, J ) - DU( I-1 )*X( I-1, J ) - + $ D( I )*X( I, J ) - DL( I )*X( I+1, J ) + 110 CONTINUE + END IF + 120 CONTINUE + END IF + END IF + RETURN +* +* End of SLAGTM +* + END diff --git a/costa/native/external/lapack/slagts.f b/costa/native/external/lapack/slagts.f new file mode 100644 index 000000000..44687dded --- /dev/null +++ b/costa/native/external/lapack/slagts.f @@ -0,0 +1,305 @@ + SUBROUTINE SLAGTS( JOB, N, A, B, C, D, IN, Y, TOL, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + INTEGER INFO, JOB, N + REAL TOL +* .. +* .. Array Arguments .. + INTEGER IN( * ) + REAL A( * ), B( * ), C( * ), D( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* SLAGTS may be used to solve one of the systems of equations +* +* (T - lambda*I)*x = y or (T - lambda*I)'*x = y, +* +* where T is an n by n tridiagonal matrix, for x, following the +* factorization of (T - lambda*I) as +* +* (T - lambda*I) = P*L*U , +* +* by routine SLAGTF. The choice of equation to be solved is +* controlled by the argument JOB, and in each case there is an option +* to perturb zero or very small diagonal elements of U, this option +* being intended for use in applications such as inverse iteration. +* +* Arguments +* ========= +* +* JOB (input) INTEGER +* Specifies the job to be performed by SLAGTS as follows: +* = 1: The equations (T - lambda*I)x = y are to be solved, +* but diagonal elements of U are not to be perturbed. +* = -1: The equations (T - lambda*I)x = y are to be solved +* and, if overflow would otherwise occur, the diagonal +* elements of U are to be perturbed. See argument TOL +* below. +* = 2: The equations (T - lambda*I)'x = y are to be solved, +* but diagonal elements of U are not to be perturbed. +* = -2: The equations (T - lambda*I)'x = y are to be solved +* and, if overflow would otherwise occur, the diagonal +* elements of U are to be perturbed. See argument TOL +* below. +* +* N (input) INTEGER +* The order of the matrix T. +* +* A (input) REAL array, dimension (N) +* On entry, A must contain the diagonal elements of U as +* returned from SLAGTF. +* +* B (input) REAL array, dimension (N-1) +* On entry, B must contain the first super-diagonal elements of +* U as returned from SLAGTF. +* +* C (input) REAL array, dimension (N-1) +* On entry, C must contain the sub-diagonal elements of L as +* returned from SLAGTF. +* +* D (input) REAL array, dimension (N-2) +* On entry, D must contain the second super-diagonal elements +* of U as returned from SLAGTF. +* +* IN (input) INTEGER array, dimension (N) +* On entry, IN must contain details of the matrix P as returned +* from SLAGTF. +* +* Y (input/output) REAL array, dimension (N) +* On entry, the right hand side vector y. +* On exit, Y is overwritten by the solution vector x. +* +* TOL (input/output) REAL +* On entry, with JOB .lt. 0, TOL should be the minimum +* perturbation to be made to very small diagonal elements of U. +* TOL should normally be chosen as about eps*norm(U), where eps +* is the relative machine precision, but if TOL is supplied as +* non-positive, then it is reset to eps*max( abs( u(i,j) ) ). +* If JOB .gt. 0 then TOL is not referenced. +* +* On exit, TOL is changed as described above, only if TOL is +* non-positive on entry. Otherwise TOL is unchanged. +* +* INFO (output) INTEGER +* = 0 : successful exit +* .lt. 0: if INFO = -i, the i-th argument had an illegal value +* .gt. 0: overflow would occur when computing the INFO(th) +* element of the solution vector x. This can only occur +* when JOB is supplied as positive and either means +* that a diagonal element of U is very small, or that +* the elements of the right-hand side vector y are very +* large. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER K + REAL ABSAK, AK, BIGNUM, EPS, PERT, SFMIN, TEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SIGN +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( ( ABS( JOB ).GT.2 ) .OR. ( JOB.EQ.0 ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLAGTS', -INFO ) + RETURN + END IF +* + IF( N.EQ.0 ) + $ RETURN +* + EPS = SLAMCH( 'Epsilon' ) + SFMIN = SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SFMIN +* + IF( JOB.LT.0 ) THEN + IF( TOL.LE.ZERO ) THEN + TOL = ABS( A( 1 ) ) + IF( N.GT.1 ) + $ TOL = MAX( TOL, ABS( A( 2 ) ), ABS( B( 1 ) ) ) + DO 10 K = 3, N + TOL = MAX( TOL, ABS( A( K ) ), ABS( B( K-1 ) ), + $ ABS( D( K-2 ) ) ) + 10 CONTINUE + TOL = TOL*EPS + IF( TOL.EQ.ZERO ) + $ TOL = EPS + END IF + END IF +* + IF( ABS( JOB ).EQ.1 ) THEN + DO 20 K = 2, N + IF( IN( K-1 ).EQ.0 ) THEN + Y( K ) = Y( K ) - C( K-1 )*Y( K-1 ) + ELSE + TEMP = Y( K-1 ) + Y( K-1 ) = Y( K ) + Y( K ) = TEMP - C( K-1 )*Y( K ) + END IF + 20 CONTINUE + IF( JOB.EQ.1 ) THEN + DO 30 K = N, 1, -1 + IF( K.LE.N-2 ) THEN + TEMP = Y( K ) - B( K )*Y( K+1 ) - D( K )*Y( K+2 ) + ELSE IF( K.EQ.N-1 ) THEN + TEMP = Y( K ) - B( K )*Y( K+1 ) + ELSE + TEMP = Y( K ) + END IF + AK = A( K ) + ABSAK = ABS( AK ) + IF( ABSAK.LT.ONE ) THEN + IF( ABSAK.LT.SFMIN ) THEN + IF( ABSAK.EQ.ZERO .OR. ABS( TEMP )*SFMIN.GT.ABSAK ) + $ THEN + INFO = K + RETURN + ELSE + TEMP = TEMP*BIGNUM + AK = AK*BIGNUM + END IF + ELSE IF( ABS( TEMP ).GT.ABSAK*BIGNUM ) THEN + INFO = K + RETURN + END IF + END IF + Y( K ) = TEMP / AK + 30 CONTINUE + ELSE + DO 50 K = N, 1, -1 + IF( K.LE.N-2 ) THEN + TEMP = Y( K ) - B( K )*Y( K+1 ) - D( K )*Y( K+2 ) + ELSE IF( K.EQ.N-1 ) THEN + TEMP = Y( K ) - B( K )*Y( K+1 ) + ELSE + TEMP = Y( K ) + END IF + AK = A( K ) + PERT = SIGN( TOL, AK ) + 40 CONTINUE + ABSAK = ABS( AK ) + IF( ABSAK.LT.ONE ) THEN + IF( ABSAK.LT.SFMIN ) THEN + IF( ABSAK.EQ.ZERO .OR. ABS( TEMP )*SFMIN.GT.ABSAK ) + $ THEN + AK = AK + PERT + PERT = 2*PERT + GO TO 40 + ELSE + TEMP = TEMP*BIGNUM + AK = AK*BIGNUM + END IF + ELSE IF( ABS( TEMP ).GT.ABSAK*BIGNUM ) THEN + AK = AK + PERT + PERT = 2*PERT + GO TO 40 + END IF + END IF + Y( K ) = TEMP / AK + 50 CONTINUE + END IF + ELSE +* +* Come to here if JOB = 2 or -2 +* + IF( JOB.EQ.2 ) THEN + DO 60 K = 1, N + IF( K.GE.3 ) THEN + TEMP = Y( K ) - B( K-1 )*Y( K-1 ) - D( K-2 )*Y( K-2 ) + ELSE IF( K.EQ.2 ) THEN + TEMP = Y( K ) - B( K-1 )*Y( K-1 ) + ELSE + TEMP = Y( K ) + END IF + AK = A( K ) + ABSAK = ABS( AK ) + IF( ABSAK.LT.ONE ) THEN + IF( ABSAK.LT.SFMIN ) THEN + IF( ABSAK.EQ.ZERO .OR. ABS( TEMP )*SFMIN.GT.ABSAK ) + $ THEN + INFO = K + RETURN + ELSE + TEMP = TEMP*BIGNUM + AK = AK*BIGNUM + END IF + ELSE IF( ABS( TEMP ).GT.ABSAK*BIGNUM ) THEN + INFO = K + RETURN + END IF + END IF + Y( K ) = TEMP / AK + 60 CONTINUE + ELSE + DO 80 K = 1, N + IF( K.GE.3 ) THEN + TEMP = Y( K ) - B( K-1 )*Y( K-1 ) - D( K-2 )*Y( K-2 ) + ELSE IF( K.EQ.2 ) THEN + TEMP = Y( K ) - B( K-1 )*Y( K-1 ) + ELSE + TEMP = Y( K ) + END IF + AK = A( K ) + PERT = SIGN( TOL, AK ) + 70 CONTINUE + ABSAK = ABS( AK ) + IF( ABSAK.LT.ONE ) THEN + IF( ABSAK.LT.SFMIN ) THEN + IF( ABSAK.EQ.ZERO .OR. ABS( TEMP )*SFMIN.GT.ABSAK ) + $ THEN + AK = AK + PERT + PERT = 2*PERT + GO TO 70 + ELSE + TEMP = TEMP*BIGNUM + AK = AK*BIGNUM + END IF + ELSE IF( ABS( TEMP ).GT.ABSAK*BIGNUM ) THEN + AK = AK + PERT + PERT = 2*PERT + GO TO 70 + END IF + END IF + Y( K ) = TEMP / AK + 80 CONTINUE + END IF +* + DO 90 K = N, 2, -1 + IF( IN( K-1 ).EQ.0 ) THEN + Y( K-1 ) = Y( K-1 ) - C( K-1 )*Y( K ) + ELSE + TEMP = Y( K-1 ) + Y( K-1 ) = Y( K ) + Y( K ) = TEMP - C( K-1 )*Y( K ) + END IF + 90 CONTINUE + END IF +* +* End of SLAGTS +* + END diff --git a/costa/native/external/lapack/slagv2.f b/costa/native/external/lapack/slagv2.f new file mode 100644 index 000000000..2aa0f1fb6 --- /dev/null +++ b/costa/native/external/lapack/slagv2.f @@ -0,0 +1,290 @@ + SUBROUTINE SLAGV2( A, LDA, B, LDB, ALPHAR, ALPHAI, BETA, CSL, SNL, + $ CSR, SNR ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER LDA, LDB + REAL CSL, CSR, SNL, SNR +* .. +* .. Array Arguments .. + REAL A( LDA, * ), ALPHAI( 2 ), ALPHAR( 2 ), + $ B( LDB, * ), BETA( 2 ) +* .. +* +* Purpose +* ======= +* +* SLAGV2 computes the Generalized Schur factorization of a real 2-by-2 +* matrix pencil (A,B) where B is upper triangular. This routine +* computes orthogonal (rotation) matrices given by CSL, SNL and CSR, +* SNR such that +* +* 1) if the pencil (A,B) has two real eigenvalues (include 0/0 or 1/0 +* types), then +* +* [ a11 a12 ] := [ CSL SNL ] [ a11 a12 ] [ CSR -SNR ] +* [ 0 a22 ] [ -SNL CSL ] [ a21 a22 ] [ SNR CSR ] +* +* [ b11 b12 ] := [ CSL SNL ] [ b11 b12 ] [ CSR -SNR ] +* [ 0 b22 ] [ -SNL CSL ] [ 0 b22 ] [ SNR CSR ], +* +* 2) if the pencil (A,B) has a pair of complex conjugate eigenvalues, +* then +* +* [ a11 a12 ] := [ CSL SNL ] [ a11 a12 ] [ CSR -SNR ] +* [ a21 a22 ] [ -SNL CSL ] [ a21 a22 ] [ SNR CSR ] +* +* [ b11 0 ] := [ CSL SNL ] [ b11 b12 ] [ CSR -SNR ] +* [ 0 b22 ] [ -SNL CSL ] [ 0 b22 ] [ SNR CSR ] +* +* where b11 >= b22 > 0. +* +* +* Arguments +* ========= +* +* A (input/output) REAL array, dimension (LDA, 2) +* On entry, the 2 x 2 matrix A. +* On exit, A is overwritten by the ``A-part'' of the +* generalized Schur form. +* +* LDA (input) INTEGER +* THe leading dimension of the array A. LDA >= 2. +* +* B (input/output) REAL array, dimension (LDB, 2) +* On entry, the upper triangular 2 x 2 matrix B. +* On exit, B is overwritten by the ``B-part'' of the +* generalized Schur form. +* +* LDB (input) INTEGER +* THe leading dimension of the array B. LDB >= 2. +* +* ALPHAR (output) REAL array, dimension (2) +* ALPHAI (output) REAL array, dimension (2) +* BETA (output) REAL array, dimension (2) +* (ALPHAR(k)+i*ALPHAI(k))/BETA(k) are the eigenvalues of the +* pencil (A,B), k=1,2, i = sqrt(-1). Note that BETA(k) may +* be zero. +* +* CSL (output) REAL +* The cosine of the left rotation matrix. +* +* SNL (output) REAL +* The sine of the left rotation matrix. +* +* CSR (output) REAL +* The cosine of the right rotation matrix. +* +* SNR (output) REAL +* The sine of the right rotation matrix. +* +* Further Details +* =============== +* +* Based on contributions by +* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + REAL ANORM, ASCALE, BNORM, BSCALE, H1, H2, H3, QQ, + $ R, RR, SAFMIN, SCALE1, SCALE2, T, ULP, WI, WR1, + $ WR2 +* .. +* .. External Subroutines .. + EXTERNAL SLAG2, SLARTG, SLASV2, SROT +* .. +* .. External Functions .. + REAL SLAMCH, SLAPY2 + EXTERNAL SLAMCH, SLAPY2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* + SAFMIN = SLAMCH( 'S' ) + ULP = SLAMCH( 'P' ) +* +* Scale A +* + ANORM = MAX( ABS( A( 1, 1 ) )+ABS( A( 2, 1 ) ), + $ ABS( A( 1, 2 ) )+ABS( A( 2, 2 ) ), SAFMIN ) + ASCALE = ONE / ANORM + A( 1, 1 ) = ASCALE*A( 1, 1 ) + A( 1, 2 ) = ASCALE*A( 1, 2 ) + A( 2, 1 ) = ASCALE*A( 2, 1 ) + A( 2, 2 ) = ASCALE*A( 2, 2 ) +* +* Scale B +* + BNORM = MAX( ABS( B( 1, 1 ) ), ABS( B( 1, 2 ) )+ABS( B( 2, 2 ) ), + $ SAFMIN ) + BSCALE = ONE / BNORM + B( 1, 1 ) = BSCALE*B( 1, 1 ) + B( 1, 2 ) = BSCALE*B( 1, 2 ) + B( 2, 2 ) = BSCALE*B( 2, 2 ) +* +* Check if A can be deflated +* + IF( ABS( A( 2, 1 ) ).LE.ULP ) THEN + CSL = ONE + SNL = ZERO + CSR = ONE + SNR = ZERO + A( 2, 1 ) = ZERO + B( 2, 1 ) = ZERO +* +* Check if B is singular +* + ELSE IF( ABS( B( 1, 1 ) ).LE.ULP ) THEN + CALL SLARTG( A( 1, 1 ), A( 2, 1 ), CSL, SNL, R ) + CSR = ONE + SNR = ZERO + CALL SROT( 2, A( 1, 1 ), LDA, A( 2, 1 ), LDA, CSL, SNL ) + CALL SROT( 2, B( 1, 1 ), LDB, B( 2, 1 ), LDB, CSL, SNL ) + A( 2, 1 ) = ZERO + B( 1, 1 ) = ZERO + B( 2, 1 ) = ZERO +* + ELSE IF( ABS( B( 2, 2 ) ).LE.ULP ) THEN + CALL SLARTG( A( 2, 2 ), A( 2, 1 ), CSR, SNR, T ) + SNR = -SNR + CALL SROT( 2, A( 1, 1 ), 1, A( 1, 2 ), 1, CSR, SNR ) + CALL SROT( 2, B( 1, 1 ), 1, B( 1, 2 ), 1, CSR, SNR ) + CSL = ONE + SNL = ZERO + A( 2, 1 ) = ZERO + B( 2, 1 ) = ZERO + B( 2, 2 ) = ZERO +* + ELSE +* +* B is nonsingular, first compute the eigenvalues of (A,B) +* + CALL SLAG2( A, LDA, B, LDB, SAFMIN, SCALE1, SCALE2, WR1, WR2, + $ WI ) +* + IF( WI.EQ.ZERO ) THEN +* +* two real eigenvalues, compute s*A-w*B +* + H1 = SCALE1*A( 1, 1 ) - WR1*B( 1, 1 ) + H2 = SCALE1*A( 1, 2 ) - WR1*B( 1, 2 ) + H3 = SCALE1*A( 2, 2 ) - WR1*B( 2, 2 ) +* + RR = SLAPY2( H1, H2 ) + QQ = SLAPY2( SCALE1*A( 2, 1 ), H3 ) +* + IF( RR.GT.QQ ) THEN +* +* find right rotation matrix to zero 1,1 element of +* (sA - wB) +* + CALL SLARTG( H2, H1, CSR, SNR, T ) +* + ELSE +* +* find right rotation matrix to zero 2,1 element of +* (sA - wB) +* + CALL SLARTG( H3, SCALE1*A( 2, 1 ), CSR, SNR, T ) +* + END IF +* + SNR = -SNR + CALL SROT( 2, A( 1, 1 ), 1, A( 1, 2 ), 1, CSR, SNR ) + CALL SROT( 2, B( 1, 1 ), 1, B( 1, 2 ), 1, CSR, SNR ) +* +* compute inf norms of A and B +* + H1 = MAX( ABS( A( 1, 1 ) )+ABS( A( 1, 2 ) ), + $ ABS( A( 2, 1 ) )+ABS( A( 2, 2 ) ) ) + H2 = MAX( ABS( B( 1, 1 ) )+ABS( B( 1, 2 ) ), + $ ABS( B( 2, 1 ) )+ABS( B( 2, 2 ) ) ) +* + IF( ( SCALE1*H1 ).GE.ABS( WR1 )*H2 ) THEN +* +* find left rotation matrix Q to zero out B(2,1) +* + CALL SLARTG( B( 1, 1 ), B( 2, 1 ), CSL, SNL, R ) +* + ELSE +* +* find left rotation matrix Q to zero out A(2,1) +* + CALL SLARTG( A( 1, 1 ), A( 2, 1 ), CSL, SNL, R ) +* + END IF +* + CALL SROT( 2, A( 1, 1 ), LDA, A( 2, 1 ), LDA, CSL, SNL ) + CALL SROT( 2, B( 1, 1 ), LDB, B( 2, 1 ), LDB, CSL, SNL ) +* + A( 2, 1 ) = ZERO + B( 2, 1 ) = ZERO +* + ELSE +* +* a pair of complex conjugate eigenvalues +* first compute the SVD of the matrix B +* + CALL SLASV2( B( 1, 1 ), B( 1, 2 ), B( 2, 2 ), R, T, SNR, + $ CSR, SNL, CSL ) +* +* Form (A,B) := Q(A,B)Z' where Q is left rotation matrix and +* Z is right rotation matrix computed from SLASV2 +* + CALL SROT( 2, A( 1, 1 ), LDA, A( 2, 1 ), LDA, CSL, SNL ) + CALL SROT( 2, B( 1, 1 ), LDB, B( 2, 1 ), LDB, CSL, SNL ) + CALL SROT( 2, A( 1, 1 ), 1, A( 1, 2 ), 1, CSR, SNR ) + CALL SROT( 2, B( 1, 1 ), 1, B( 1, 2 ), 1, CSR, SNR ) +* + B( 2, 1 ) = ZERO + B( 1, 2 ) = ZERO +* + END IF +* + END IF +* +* Unscaling +* + A( 1, 1 ) = ANORM*A( 1, 1 ) + A( 2, 1 ) = ANORM*A( 2, 1 ) + A( 1, 2 ) = ANORM*A( 1, 2 ) + A( 2, 2 ) = ANORM*A( 2, 2 ) + B( 1, 1 ) = BNORM*B( 1, 1 ) + B( 2, 1 ) = BNORM*B( 2, 1 ) + B( 1, 2 ) = BNORM*B( 1, 2 ) + B( 2, 2 ) = BNORM*B( 2, 2 ) +* + IF( WI.EQ.ZERO ) THEN + ALPHAR( 1 ) = A( 1, 1 ) + ALPHAR( 2 ) = A( 2, 2 ) + ALPHAI( 1 ) = ZERO + ALPHAI( 2 ) = ZERO + BETA( 1 ) = B( 1, 1 ) + BETA( 2 ) = B( 2, 2 ) + ELSE + ALPHAR( 1 ) = ANORM*WR1 / SCALE1 / BNORM + ALPHAI( 1 ) = ANORM*WI / SCALE1 / BNORM + ALPHAR( 2 ) = ALPHAR( 1 ) + ALPHAI( 2 ) = -ALPHAI( 1 ) + BETA( 1 ) = ONE + BETA( 2 ) = ONE + END IF +* + 10 CONTINUE +* + RETURN +* +* End of SLAGV2 +* + END diff --git a/costa/native/external/lapack/slahqr.f b/costa/native/external/lapack/slahqr.f new file mode 100644 index 000000000..9bbd14eea --- /dev/null +++ b/costa/native/external/lapack/slahqr.f @@ -0,0 +1,435 @@ + SUBROUTINE SLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI, + $ ILOZ, IHIZ, Z, LDZ, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + LOGICAL WANTT, WANTZ + INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, N +* .. +* .. Array Arguments .. + REAL H( LDH, * ), WI( * ), WR( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* SLAHQR is an auxiliary routine called by SHSEQR to update the +* eigenvalues and Schur decomposition already computed by SHSEQR, by +* dealing with the Hessenberg submatrix in rows and columns ILO to IHI. +* +* Arguments +* ========= +* +* WANTT (input) LOGICAL +* = .TRUE. : the full Schur form T is required; +* = .FALSE.: only eigenvalues are required. +* +* WANTZ (input) LOGICAL +* = .TRUE. : the matrix of Schur vectors Z is required; +* = .FALSE.: Schur vectors are not required. +* +* N (input) INTEGER +* The order of the matrix H. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* It is assumed that H is already upper quasi-triangular in +* rows and columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless +* ILO = 1). SLAHQR works primarily with the Hessenberg +* submatrix in rows and columns ILO to IHI, but applies +* transformations to all of H if WANTT is .TRUE.. +* 1 <= ILO <= max(1,IHI); IHI <= N. +* +* H (input/output) REAL array, dimension (LDH,N) +* On entry, the upper Hessenberg matrix H. +* On exit, if WANTT is .TRUE., H is upper quasi-triangular in +* rows and columns ILO:IHI, with any 2-by-2 diagonal blocks in +* standard form. If WANTT is .FALSE., the contents of H are +* unspecified on exit. +* +* LDH (input) INTEGER +* The leading dimension of the array H. LDH >= max(1,N). +* +* WR (output) REAL array, dimension (N) +* WI (output) REAL array, dimension (N) +* The real and imaginary parts, respectively, of the computed +* eigenvalues ILO to IHI are stored in the corresponding +* elements of WR and WI. If two eigenvalues are computed as a +* complex conjugate pair, they are stored in consecutive +* elements of WR and WI, say the i-th and (i+1)th, with +* WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., the +* eigenvalues are stored in the same order as on the diagonal +* of the Schur form returned in H, with WR(i) = H(i,i), and, if +* H(i:i+1,i:i+1) is a 2-by-2 diagonal block, +* WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and WI(i+1) = -WI(i). +* +* ILOZ (input) INTEGER +* IHIZ (input) INTEGER +* Specify the rows of Z to which transformations must be +* applied if WANTZ is .TRUE.. +* 1 <= ILOZ <= ILO; IHI <= IHIZ <= N. +* +* Z (input/output) REAL array, dimension (LDZ,N) +* If WANTZ is .TRUE., on entry Z must contain the current +* matrix Z of transformations accumulated by SHSEQR, and on +* exit Z has been updated; transformations are applied only to +* the submatrix Z(ILOZ:IHIZ,ILO:IHI). +* If WANTZ is .FALSE., Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* > 0: SLAHQR failed to compute all the eigenvalues ILO to IHI +* in a total of 30*(IHI-ILO+1) iterations; if INFO = i, +* elements i+1:ihi of WR and WI contain those eigenvalues +* which have been successfully computed. +* +* Further Details +* =============== +* +* 2-96 Based on modifications by +* David Day, Sandia National Laboratory, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, HALF + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, HALF = 0.5E0 ) + REAL DAT1, DAT2 + PARAMETER ( DAT1 = 0.75E+0, DAT2 = -0.4375E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, I1, I2, ITN, ITS, J, K, L, M, NH, NR, NZ + REAL AVE, CS, DISC, H00, H10, H11, H12, H21, H22, + $ H33, H33S, H43H34, H44, H44S, OVFL, S, SMLNUM, + $ SN, SUM, T1, T2, T3, TST1, ULP, UNFL, V1, V2, + $ V3 +* .. +* .. Local Arrays .. + REAL V( 3 ), WORK( 1 ) +* .. +* .. External Functions .. + REAL SLAMCH, SLANHS + EXTERNAL SLAMCH, SLANHS +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLABAD, SLANV2, SLARFG, SROT +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SIGN, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + IF( ILO.EQ.IHI ) THEN + WR( ILO ) = H( ILO, ILO ) + WI( ILO ) = ZERO + RETURN + END IF +* + NH = IHI - ILO + 1 + NZ = IHIZ - ILOZ + 1 +* +* Set machine-dependent constants for the stopping criterion. +* If norm(H) <= sqrt(OVFL), overflow should not occur. +* + UNFL = SLAMCH( 'Safe minimum' ) + OVFL = ONE / UNFL + CALL SLABAD( UNFL, OVFL ) + ULP = SLAMCH( 'Precision' ) + SMLNUM = UNFL*( NH / ULP ) +* +* I1 and I2 are the indices of the first row and last column of H +* to which transformations must be applied. If eigenvalues only are +* being computed, I1 and I2 are set inside the main loop. +* + IF( WANTT ) THEN + I1 = 1 + I2 = N + END IF +* +* ITN is the total number of QR iterations allowed. +* + ITN = 30*NH +* +* The main loop begins here. I is the loop index and decreases from +* IHI to ILO in steps of 1 or 2. Each iteration of the loop works +* with the active submatrix in rows and columns L to I. +* Eigenvalues I+1 to IHI have already converged. Either L = ILO or +* H(L,L-1) is negligible so that the matrix splits. +* + I = IHI + 10 CONTINUE + L = ILO + IF( I.LT.ILO ) + $ GO TO 150 +* +* Perform QR iterations on rows and columns ILO to I until a +* submatrix of order 1 or 2 splits off at the bottom because a +* subdiagonal element has become negligible. +* + DO 130 ITS = 0, ITN +* +* Look for a single small subdiagonal element. +* + DO 20 K = I, L + 1, -1 + TST1 = ABS( H( K-1, K-1 ) ) + ABS( H( K, K ) ) + IF( TST1.EQ.ZERO ) + $ TST1 = SLANHS( '1', I-L+1, H( L, L ), LDH, WORK ) + IF( ABS( H( K, K-1 ) ).LE.MAX( ULP*TST1, SMLNUM ) ) + $ GO TO 30 + 20 CONTINUE + 30 CONTINUE + L = K + IF( L.GT.ILO ) THEN +* +* H(L,L-1) is negligible +* + H( L, L-1 ) = ZERO + END IF +* +* Exit from loop if a submatrix of order 1 or 2 has split off. +* + IF( L.GE.I-1 ) + $ GO TO 140 +* +* Now the active submatrix is in rows and columns L to I. If +* eigenvalues only are being computed, only the active submatrix +* need be transformed. +* + IF( .NOT.WANTT ) THEN + I1 = L + I2 = I + END IF +* + IF( ITS.EQ.10 .OR. ITS.EQ.20 ) THEN +* +* Exceptional shift. +* + S = ABS( H( I, I-1 ) ) + ABS( H( I-1, I-2 ) ) + H44 = DAT1*S + H( I, I ) + H33 = H44 + H43H34 = DAT2*S*S + ELSE +* +* Prepare to use Francis' double shift +* (i.e. 2nd degree generalized Rayleigh quotient) +* + H44 = H( I, I ) + H33 = H( I-1, I-1 ) + H43H34 = H( I, I-1 )*H( I-1, I ) + S = H( I-1, I-2 )*H( I-1, I-2 ) + DISC = ( H33-H44 )*HALF + DISC = DISC*DISC + H43H34 + IF( DISC.GT.ZERO ) THEN +* +* Real roots: use Wilkinson's shift twice +* + DISC = SQRT( DISC ) + AVE = HALF*( H33+H44 ) + IF( ABS( H33 )-ABS( H44 ).GT.ZERO ) THEN + H33 = H33*H44 - H43H34 + H44 = H33 / ( SIGN( DISC, AVE )+AVE ) + ELSE + H44 = SIGN( DISC, AVE ) + AVE + END IF + H33 = H44 + H43H34 = ZERO + END IF + END IF +* +* Look for two consecutive small subdiagonal elements. +* + DO 40 M = I - 2, L, -1 +* Determine the effect of starting the double-shift QR +* iteration at row M, and see if this would make H(M,M-1) +* negligible. +* + H11 = H( M, M ) + H22 = H( M+1, M+1 ) + H21 = H( M+1, M ) + H12 = H( M, M+1 ) + H44S = H44 - H11 + H33S = H33 - H11 + V1 = ( H33S*H44S-H43H34 ) / H21 + H12 + V2 = H22 - H11 - H33S - H44S + V3 = H( M+2, M+1 ) + S = ABS( V1 ) + ABS( V2 ) + ABS( V3 ) + V1 = V1 / S + V2 = V2 / S + V3 = V3 / S + V( 1 ) = V1 + V( 2 ) = V2 + V( 3 ) = V3 + IF( M.EQ.L ) + $ GO TO 50 + H00 = H( M-1, M-1 ) + H10 = H( M, M-1 ) + TST1 = ABS( V1 )*( ABS( H00 )+ABS( H11 )+ABS( H22 ) ) + IF( ABS( H10 )*( ABS( V2 )+ABS( V3 ) ).LE.ULP*TST1 ) + $ GO TO 50 + 40 CONTINUE + 50 CONTINUE +* +* Double-shift QR step +* + DO 120 K = M, I - 1 +* +* The first iteration of this loop determines a reflection G +* from the vector V and applies it from left and right to H, +* thus creating a nonzero bulge below the subdiagonal. +* +* Each subsequent iteration determines a reflection G to +* restore the Hessenberg form in the (K-1)th column, and thus +* chases the bulge one step toward the bottom of the active +* submatrix. NR is the order of G. +* + NR = MIN( 3, I-K+1 ) + IF( K.GT.M ) + $ CALL SCOPY( NR, H( K, K-1 ), 1, V, 1 ) + CALL SLARFG( NR, V( 1 ), V( 2 ), 1, T1 ) + IF( K.GT.M ) THEN + H( K, K-1 ) = V( 1 ) + H( K+1, K-1 ) = ZERO + IF( K.LT.I-1 ) + $ H( K+2, K-1 ) = ZERO + ELSE IF( M.GT.L ) THEN + H( K, K-1 ) = -H( K, K-1 ) + END IF + V2 = V( 2 ) + T2 = T1*V2 + IF( NR.EQ.3 ) THEN + V3 = V( 3 ) + T3 = T1*V3 +* +* Apply G from the left to transform the rows of the matrix +* in columns K to I2. +* + DO 60 J = K, I2 + SUM = H( K, J ) + V2*H( K+1, J ) + V3*H( K+2, J ) + H( K, J ) = H( K, J ) - SUM*T1 + H( K+1, J ) = H( K+1, J ) - SUM*T2 + H( K+2, J ) = H( K+2, J ) - SUM*T3 + 60 CONTINUE +* +* Apply G from the right to transform the columns of the +* matrix in rows I1 to min(K+3,I). +* + DO 70 J = I1, MIN( K+3, I ) + SUM = H( J, K ) + V2*H( J, K+1 ) + V3*H( J, K+2 ) + H( J, K ) = H( J, K ) - SUM*T1 + H( J, K+1 ) = H( J, K+1 ) - SUM*T2 + H( J, K+2 ) = H( J, K+2 ) - SUM*T3 + 70 CONTINUE +* + IF( WANTZ ) THEN +* +* Accumulate transformations in the matrix Z +* + DO 80 J = ILOZ, IHIZ + SUM = Z( J, K ) + V2*Z( J, K+1 ) + V3*Z( J, K+2 ) + Z( J, K ) = Z( J, K ) - SUM*T1 + Z( J, K+1 ) = Z( J, K+1 ) - SUM*T2 + Z( J, K+2 ) = Z( J, K+2 ) - SUM*T3 + 80 CONTINUE + END IF + ELSE IF( NR.EQ.2 ) THEN +* +* Apply G from the left to transform the rows of the matrix +* in columns K to I2. +* + DO 90 J = K, I2 + SUM = H( K, J ) + V2*H( K+1, J ) + H( K, J ) = H( K, J ) - SUM*T1 + H( K+1, J ) = H( K+1, J ) - SUM*T2 + 90 CONTINUE +* +* Apply G from the right to transform the columns of the +* matrix in rows I1 to min(K+3,I). +* + DO 100 J = I1, I + SUM = H( J, K ) + V2*H( J, K+1 ) + H( J, K ) = H( J, K ) - SUM*T1 + H( J, K+1 ) = H( J, K+1 ) - SUM*T2 + 100 CONTINUE +* + IF( WANTZ ) THEN +* +* Accumulate transformations in the matrix Z +* + DO 110 J = ILOZ, IHIZ + SUM = Z( J, K ) + V2*Z( J, K+1 ) + Z( J, K ) = Z( J, K ) - SUM*T1 + Z( J, K+1 ) = Z( J, K+1 ) - SUM*T2 + 110 CONTINUE + END IF + END IF + 120 CONTINUE +* + 130 CONTINUE +* +* Failure to converge in remaining number of iterations +* + INFO = I + RETURN +* + 140 CONTINUE +* + IF( L.EQ.I ) THEN +* +* H(I,I-1) is negligible: one eigenvalue has converged. +* + WR( I ) = H( I, I ) + WI( I ) = ZERO + ELSE IF( L.EQ.I-1 ) THEN +* +* H(I-1,I-2) is negligible: a pair of eigenvalues have converged. +* +* Transform the 2-by-2 submatrix to standard Schur form, +* and compute and store the eigenvalues. +* + CALL SLANV2( H( I-1, I-1 ), H( I-1, I ), H( I, I-1 ), + $ H( I, I ), WR( I-1 ), WI( I-1 ), WR( I ), WI( I ), + $ CS, SN ) +* + IF( WANTT ) THEN +* +* Apply the transformation to the rest of H. +* + IF( I2.GT.I ) + $ CALL SROT( I2-I, H( I-1, I+1 ), LDH, H( I, I+1 ), LDH, + $ CS, SN ) + CALL SROT( I-I1-1, H( I1, I-1 ), 1, H( I1, I ), 1, CS, SN ) + END IF + IF( WANTZ ) THEN +* +* Apply the transformation to Z. +* + CALL SROT( NZ, Z( ILOZ, I-1 ), 1, Z( ILOZ, I ), 1, CS, SN ) + END IF + END IF +* +* Decrement number of remaining iterations, and return to start of +* the main loop with new value of I. +* + ITN = ITN - ITS + I = L - 1 + GO TO 10 +* + 150 CONTINUE + RETURN +* +* End of SLAHQR +* + END diff --git a/costa/native/external/lapack/slahrd.f b/costa/native/external/lapack/slahrd.f new file mode 100644 index 000000000..6d18352e5 --- /dev/null +++ b/costa/native/external/lapack/slahrd.f @@ -0,0 +1,206 @@ + SUBROUTINE SLAHRD( N, K, NB, A, LDA, TAU, T, LDT, Y, LDY ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER K, LDA, LDT, LDY, N, NB +* .. +* .. Array Arguments .. + REAL A( LDA, * ), T( LDT, NB ), TAU( NB ), + $ Y( LDY, NB ) +* .. +* +* Purpose +* ======= +* +* SLAHRD reduces the first NB columns of a real general n-by-(n-k+1) +* matrix A so that elements below the k-th subdiagonal are zero. The +* reduction is performed by an orthogonal similarity transformation +* Q' * A * Q. The routine returns the matrices V and T which determine +* Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T. +* +* This is an auxiliary routine called by SGEHRD. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. +* +* K (input) INTEGER +* The offset for the reduction. Elements below the k-th +* subdiagonal in the first NB columns are reduced to zero. +* +* NB (input) INTEGER +* The number of columns to be reduced. +* +* A (input/output) REAL array, dimension (LDA,N-K+1) +* On entry, the n-by-(n-k+1) general matrix A. +* On exit, the elements on and above the k-th subdiagonal in +* the first NB columns are overwritten with the corresponding +* elements of the reduced matrix; the elements below the k-th +* subdiagonal, with the array TAU, represent the matrix Q as a +* product of elementary reflectors. The other columns of A are +* unchanged. See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* TAU (output) REAL array, dimension (NB) +* The scalar factors of the elementary reflectors. See Further +* Details. +* +* T (output) REAL array, dimension (LDT,NB) +* The upper triangular matrix T. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= NB. +* +* Y (output) REAL array, dimension (LDY,NB) +* The n-by-nb matrix Y. +* +* LDY (input) INTEGER +* The leading dimension of the array Y. LDY >= N. +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of nb elementary reflectors +* +* Q = H(1) H(2) . . . H(nb). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in +* A(i+k+1:n,i), and tau in TAU(i). +* +* The elements of the vectors v together form the (n-k+1)-by-nb matrix +* V which is needed, with T and Y, to apply the transformation to the +* unreduced part of the matrix, using an update of the form: +* A := (I - V*T*V') * (A - Y*V'). +* +* The contents of A on exit are illustrated by the following example +* with n = 7, k = 3 and nb = 2: +* +* ( a h a a a ) +* ( a h a a a ) +* ( a h a a a ) +* ( h h a a a ) +* ( v1 h a a a ) +* ( v1 v2 a a a ) +* ( v1 v2 a a a ) +* +* where a denotes an element of the original matrix A, h denotes a +* modified element of the upper Hessenberg matrix H, and vi denotes an +* element of the vector defining H(i). +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I + REAL EI +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SGEMV, SLARFG, SSCAL, STRMV +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* + DO 10 I = 1, NB + IF( I.GT.1 ) THEN +* +* Update A(1:n,i) +* +* Compute i-th column of A - Y * V' +* + CALL SGEMV( 'No transpose', N, I-1, -ONE, Y, LDY, + $ A( K+I-1, 1 ), LDA, ONE, A( 1, I ), 1 ) +* +* Apply I - V * T' * V' to this column (call it b) from the +* left, using the last column of T as workspace +* +* Let V = ( V1 ) and b = ( b1 ) (first I-1 rows) +* ( V2 ) ( b2 ) +* +* where V1 is unit lower triangular +* +* w := V1' * b1 +* + CALL SCOPY( I-1, A( K+1, I ), 1, T( 1, NB ), 1 ) + CALL STRMV( 'Lower', 'Transpose', 'Unit', I-1, A( K+1, 1 ), + $ LDA, T( 1, NB ), 1 ) +* +* w := w + V2'*b2 +* + CALL SGEMV( 'Transpose', N-K-I+1, I-1, ONE, A( K+I, 1 ), + $ LDA, A( K+I, I ), 1, ONE, T( 1, NB ), 1 ) +* +* w := T'*w +* + CALL STRMV( 'Upper', 'Transpose', 'Non-unit', I-1, T, LDT, + $ T( 1, NB ), 1 ) +* +* b2 := b2 - V2*w +* + CALL SGEMV( 'No transpose', N-K-I+1, I-1, -ONE, A( K+I, 1 ), + $ LDA, T( 1, NB ), 1, ONE, A( K+I, I ), 1 ) +* +* b1 := b1 - V1*w +* + CALL STRMV( 'Lower', 'No transpose', 'Unit', I-1, + $ A( K+1, 1 ), LDA, T( 1, NB ), 1 ) + CALL SAXPY( I-1, -ONE, T( 1, NB ), 1, A( K+1, I ), 1 ) +* + A( K+I-1, I-1 ) = EI + END IF +* +* Generate the elementary reflector H(i) to annihilate +* A(k+i+1:n,i) +* + CALL SLARFG( N-K-I+1, A( K+I, I ), A( MIN( K+I+1, N ), I ), 1, + $ TAU( I ) ) + EI = A( K+I, I ) + A( K+I, I ) = ONE +* +* Compute Y(1:n,i) +* + CALL SGEMV( 'No transpose', N, N-K-I+1, ONE, A( 1, I+1 ), LDA, + $ A( K+I, I ), 1, ZERO, Y( 1, I ), 1 ) + CALL SGEMV( 'Transpose', N-K-I+1, I-1, ONE, A( K+I, 1 ), LDA, + $ A( K+I, I ), 1, ZERO, T( 1, I ), 1 ) + CALL SGEMV( 'No transpose', N, I-1, -ONE, Y, LDY, T( 1, I ), 1, + $ ONE, Y( 1, I ), 1 ) + CALL SSCAL( N, TAU( I ), Y( 1, I ), 1 ) +* +* Compute T(1:i,i) +* + CALL SSCAL( I-1, -TAU( I ), T( 1, I ), 1 ) + CALL STRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T, LDT, + $ T( 1, I ), 1 ) + T( I, I ) = TAU( I ) +* + 10 CONTINUE + A( K+NB, NB ) = EI +* + RETURN +* +* End of SLAHRD +* + END diff --git a/costa/native/external/lapack/slaic1.f b/costa/native/external/lapack/slaic1.f new file mode 100644 index 000000000..651059fb0 --- /dev/null +++ b/costa/native/external/lapack/slaic1.f @@ -0,0 +1,293 @@ + SUBROUTINE SLAIC1( JOB, J, X, SEST, W, GAMMA, SESTPR, S, C ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + INTEGER J, JOB + REAL C, GAMMA, S, SEST, SESTPR +* .. +* .. Array Arguments .. + REAL W( J ), X( J ) +* .. +* +* Purpose +* ======= +* +* SLAIC1 applies one step of incremental condition estimation in +* its simplest version: +* +* Let x, twonorm(x) = 1, be an approximate singular vector of an j-by-j +* lower triangular matrix L, such that +* twonorm(L*x) = sest +* Then SLAIC1 computes sestpr, s, c such that +* the vector +* [ s*x ] +* xhat = [ c ] +* is an approximate singular vector of +* [ L 0 ] +* Lhat = [ w' gamma ] +* in the sense that +* twonorm(Lhat*xhat) = sestpr. +* +* Depending on JOB, an estimate for the largest or smallest singular +* value is computed. +* +* Note that [s c]' and sestpr**2 is an eigenpair of the system +* +* diag(sest*sest, 0) + [alpha gamma] * [ alpha ] +* [ gamma ] +* +* where alpha = x'*w. +* +* Arguments +* ========= +* +* JOB (input) INTEGER +* = 1: an estimate for the largest singular value is computed. +* = 2: an estimate for the smallest singular value is computed. +* +* J (input) INTEGER +* Length of X and W +* +* X (input) REAL array, dimension (J) +* The j-vector x. +* +* SEST (input) REAL +* Estimated singular value of j by j matrix L +* +* W (input) REAL array, dimension (J) +* The j-vector w. +* +* GAMMA (input) REAL +* The diagonal element gamma. +* +* SESTPR (output) REAL +* Estimated singular value of (j+1) by (j+1) matrix Lhat. +* +* S (output) REAL +* Sine needed in forming xhat. +* +* C (output) REAL +* Cosine needed in forming xhat. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0 ) + REAL HALF, FOUR + PARAMETER ( HALF = 0.5E0, FOUR = 4.0E0 ) +* .. +* .. Local Scalars .. + REAL ABSALP, ABSEST, ABSGAM, ALPHA, B, COSINE, EPS, + $ NORMA, S1, S2, SINE, T, TEST, TMP, ZETA1, ZETA2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SIGN, SQRT +* .. +* .. External Functions .. + REAL SDOT, SLAMCH + EXTERNAL SDOT, SLAMCH +* .. +* .. Executable Statements .. +* + EPS = SLAMCH( 'Epsilon' ) + ALPHA = SDOT( J, X, 1, W, 1 ) +* + ABSALP = ABS( ALPHA ) + ABSGAM = ABS( GAMMA ) + ABSEST = ABS( SEST ) +* + IF( JOB.EQ.1 ) THEN +* +* Estimating largest singular value +* +* special cases +* + IF( SEST.EQ.ZERO ) THEN + S1 = MAX( ABSGAM, ABSALP ) + IF( S1.EQ.ZERO ) THEN + S = ZERO + C = ONE + SESTPR = ZERO + ELSE + S = ALPHA / S1 + C = GAMMA / S1 + TMP = SQRT( S*S+C*C ) + S = S / TMP + C = C / TMP + SESTPR = S1*TMP + END IF + RETURN + ELSE IF( ABSGAM.LE.EPS*ABSEST ) THEN + S = ONE + C = ZERO + TMP = MAX( ABSEST, ABSALP ) + S1 = ABSEST / TMP + S2 = ABSALP / TMP + SESTPR = TMP*SQRT( S1*S1+S2*S2 ) + RETURN + ELSE IF( ABSALP.LE.EPS*ABSEST ) THEN + S1 = ABSGAM + S2 = ABSEST + IF( S1.LE.S2 ) THEN + S = ONE + C = ZERO + SESTPR = S2 + ELSE + S = ZERO + C = ONE + SESTPR = S1 + END IF + RETURN + ELSE IF( ABSEST.LE.EPS*ABSALP .OR. ABSEST.LE.EPS*ABSGAM ) THEN + S1 = ABSGAM + S2 = ABSALP + IF( S1.LE.S2 ) THEN + TMP = S1 / S2 + S = SQRT( ONE+TMP*TMP ) + SESTPR = S2*S + C = ( GAMMA / S2 ) / S + S = SIGN( ONE, ALPHA ) / S + ELSE + TMP = S2 / S1 + C = SQRT( ONE+TMP*TMP ) + SESTPR = S1*C + S = ( ALPHA / S1 ) / C + C = SIGN( ONE, GAMMA ) / C + END IF + RETURN + ELSE +* +* normal case +* + ZETA1 = ALPHA / ABSEST + ZETA2 = GAMMA / ABSEST +* + B = ( ONE-ZETA1*ZETA1-ZETA2*ZETA2 )*HALF + C = ZETA1*ZETA1 + IF( B.GT.ZERO ) THEN + T = C / ( B+SQRT( B*B+C ) ) + ELSE + T = SQRT( B*B+C ) - B + END IF +* + SINE = -ZETA1 / T + COSINE = -ZETA2 / ( ONE+T ) + TMP = SQRT( SINE*SINE+COSINE*COSINE ) + S = SINE / TMP + C = COSINE / TMP + SESTPR = SQRT( T+ONE )*ABSEST + RETURN + END IF +* + ELSE IF( JOB.EQ.2 ) THEN +* +* Estimating smallest singular value +* +* special cases +* + IF( SEST.EQ.ZERO ) THEN + SESTPR = ZERO + IF( MAX( ABSGAM, ABSALP ).EQ.ZERO ) THEN + SINE = ONE + COSINE = ZERO + ELSE + SINE = -GAMMA + COSINE = ALPHA + END IF + S1 = MAX( ABS( SINE ), ABS( COSINE ) ) + S = SINE / S1 + C = COSINE / S1 + TMP = SQRT( S*S+C*C ) + S = S / TMP + C = C / TMP + RETURN + ELSE IF( ABSGAM.LE.EPS*ABSEST ) THEN + S = ZERO + C = ONE + SESTPR = ABSGAM + RETURN + ELSE IF( ABSALP.LE.EPS*ABSEST ) THEN + S1 = ABSGAM + S2 = ABSEST + IF( S1.LE.S2 ) THEN + S = ZERO + C = ONE + SESTPR = S1 + ELSE + S = ONE + C = ZERO + SESTPR = S2 + END IF + RETURN + ELSE IF( ABSEST.LE.EPS*ABSALP .OR. ABSEST.LE.EPS*ABSGAM ) THEN + S1 = ABSGAM + S2 = ABSALP + IF( S1.LE.S2 ) THEN + TMP = S1 / S2 + C = SQRT( ONE+TMP*TMP ) + SESTPR = ABSEST*( TMP / C ) + S = -( GAMMA / S2 ) / C + C = SIGN( ONE, ALPHA ) / C + ELSE + TMP = S2 / S1 + S = SQRT( ONE+TMP*TMP ) + SESTPR = ABSEST / S + C = ( ALPHA / S1 ) / S + S = -SIGN( ONE, GAMMA ) / S + END IF + RETURN + ELSE +* +* normal case +* + ZETA1 = ALPHA / ABSEST + ZETA2 = GAMMA / ABSEST +* + NORMA = MAX( ONE+ZETA1*ZETA1+ABS( ZETA1*ZETA2 ), + $ ABS( ZETA1*ZETA2 )+ZETA2*ZETA2 ) +* +* See if root is closer to zero or to ONE +* + TEST = ONE + TWO*( ZETA1-ZETA2 )*( ZETA1+ZETA2 ) + IF( TEST.GE.ZERO ) THEN +* +* root is close to zero, compute directly +* + B = ( ZETA1*ZETA1+ZETA2*ZETA2+ONE )*HALF + C = ZETA2*ZETA2 + T = C / ( B+SQRT( ABS( B*B-C ) ) ) + SINE = ZETA1 / ( ONE-T ) + COSINE = -ZETA2 / T + SESTPR = SQRT( T+FOUR*EPS*EPS*NORMA )*ABSEST + ELSE +* +* root is closer to ONE, shift by that amount +* + B = ( ZETA2*ZETA2+ZETA1*ZETA1-ONE )*HALF + C = ZETA1*ZETA1 + IF( B.GE.ZERO ) THEN + T = -C / ( B+SQRT( B*B+C ) ) + ELSE + T = B - SQRT( B*B+C ) + END IF + SINE = -ZETA1 / T + COSINE = -ZETA2 / ( ONE+T ) + SESTPR = SQRT( ONE+T+FOUR*EPS*EPS*NORMA )*ABSEST + END IF + TMP = SQRT( SINE*SINE+COSINE*COSINE ) + S = SINE / TMP + C = COSINE / TMP + RETURN +* + END IF + END IF + RETURN +* +* End of SLAIC1 +* + END diff --git a/costa/native/external/lapack/slaln2.f b/costa/native/external/lapack/slaln2.f new file mode 100644 index 000000000..7b4c3cc92 --- /dev/null +++ b/costa/native/external/lapack/slaln2.f @@ -0,0 +1,508 @@ + SUBROUTINE SLALN2( LTRANS, NA, NW, SMIN, CA, A, LDA, D1, D2, B, + $ LDB, WR, WI, X, LDX, SCALE, XNORM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + LOGICAL LTRANS + INTEGER INFO, LDA, LDB, LDX, NA, NW + REAL CA, D1, D2, SCALE, SMIN, WI, WR, XNORM +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* SLALN2 solves a system of the form (ca A - w D ) X = s B +* or (ca A' - w D) X = s B with possible scaling ("s") and +* perturbation of A. (A' means A-transpose.) +* +* A is an NA x NA real matrix, ca is a real scalar, D is an NA x NA +* real diagonal matrix, w is a real or complex value, and X and B are +* NA x 1 matrices -- real if w is real, complex if w is complex. NA +* may be 1 or 2. +* +* If w is complex, X and B are represented as NA x 2 matrices, +* the first column of each being the real part and the second +* being the imaginary part. +* +* "s" is a scaling factor (.LE. 1), computed by SLALN2, which is +* so chosen that X can be computed without overflow. X is further +* scaled if necessary to assure that norm(ca A - w D)*norm(X) is less +* than overflow. +* +* If both singular values of (ca A - w D) are less than SMIN, +* SMIN*identity will be used instead of (ca A - w D). If only one +* singular value is less than SMIN, one element of (ca A - w D) will be +* perturbed enough to make the smallest singular value roughly SMIN. +* If both singular values are at least SMIN, (ca A - w D) will not be +* perturbed. In any case, the perturbation will be at most some small +* multiple of max( SMIN, ulp*norm(ca A - w D) ). The singular values +* are computed by infinity-norm approximations, and thus will only be +* correct to a factor of 2 or so. +* +* Note: all input quantities are assumed to be smaller than overflow +* by a reasonable factor. (See BIGNUM.) +* +* Arguments +* ========== +* +* LTRANS (input) LOGICAL +* =.TRUE.: A-transpose will be used. +* =.FALSE.: A will be used (not transposed.) +* +* NA (input) INTEGER +* The size of the matrix A. It may (only) be 1 or 2. +* +* NW (input) INTEGER +* 1 if "w" is real, 2 if "w" is complex. It may only be 1 +* or 2. +* +* SMIN (input) REAL +* The desired lower bound on the singular values of A. This +* should be a safe distance away from underflow or overflow, +* say, between (underflow/machine precision) and (machine +* precision * overflow ). (See BIGNUM and ULP.) +* +* CA (input) REAL +* The coefficient c, which A is multiplied by. +* +* A (input) REAL array, dimension (LDA,NA) +* The NA x NA matrix A. +* +* LDA (input) INTEGER +* The leading dimension of A. It must be at least NA. +* +* D1 (input) REAL +* The 1,1 element in the diagonal matrix D. +* +* D2 (input) REAL +* The 2,2 element in the diagonal matrix D. Not used if NW=1. +* +* B (input) REAL array, dimension (LDB,NW) +* The NA x NW matrix B (right-hand side). If NW=2 ("w" is +* complex), column 1 contains the real part of B and column 2 +* contains the imaginary part. +* +* LDB (input) INTEGER +* The leading dimension of B. It must be at least NA. +* +* WR (input) REAL +* The real part of the scalar "w". +* +* WI (input) REAL +* The imaginary part of the scalar "w". Not used if NW=1. +* +* X (output) REAL array, dimension (LDX,NW) +* The NA x NW matrix X (unknowns), as computed by SLALN2. +* If NW=2 ("w" is complex), on exit, column 1 will contain +* the real part of X and column 2 will contain the imaginary +* part. +* +* LDX (input) INTEGER +* The leading dimension of X. It must be at least NA. +* +* SCALE (output) REAL +* The scale factor that B must be multiplied by to insure +* that overflow does not occur when computing X. Thus, +* (ca A - w D) X will be SCALE*B, not B (ignoring +* perturbations of A.) It will be at most 1. +* +* XNORM (output) REAL +* The infinity-norm of X, when X is regarded as an NA x NW +* real matrix. +* +* INFO (output) INTEGER +* An error flag. It will be set to zero if no error occurs, +* a negative number if an argument is in error, or a positive +* number if ca A - w D had to be perturbed. +* The possible values are: +* = 0: No error occurred, and (ca A - w D) did not have to be +* perturbed. +* = 1: (ca A - w D) had to be perturbed to make its smallest +* (or only) singular value greater than SMIN. +* NOTE: In the interests of speed, this routine does not +* check the inputs for errors. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) + REAL TWO + PARAMETER ( TWO = 2.0E0 ) +* .. +* .. Local Scalars .. + INTEGER ICMAX, J + REAL BBND, BI1, BI2, BIGNUM, BNORM, BR1, BR2, CI21, + $ CI22, CMAX, CNORM, CR21, CR22, CSI, CSR, LI21, + $ LR21, SMINI, SMLNUM, TEMP, U22ABS, UI11, UI11R, + $ UI12, UI12S, UI22, UR11, UR11R, UR12, UR12S, + $ UR22, XI1, XI2, XR1, XR2 +* .. +* .. Local Arrays .. + LOGICAL CSWAP( 4 ), RSWAP( 4 ) + INTEGER IPIVOT( 4, 4 ) + REAL CI( 2, 2 ), CIV( 4 ), CR( 2, 2 ), CRV( 4 ) +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SLADIV +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Equivalences .. + EQUIVALENCE ( CI( 1, 1 ), CIV( 1 ) ), + $ ( CR( 1, 1 ), CRV( 1 ) ) +* .. +* .. Data statements .. + DATA CSWAP / .FALSE., .FALSE., .TRUE., .TRUE. / + DATA RSWAP / .FALSE., .TRUE., .FALSE., .TRUE. / + DATA IPIVOT / 1, 2, 3, 4, 2, 1, 4, 3, 3, 4, 1, 2, 4, + $ 3, 2, 1 / +* .. +* .. Executable Statements .. +* +* Compute BIGNUM +* + SMLNUM = TWO*SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + SMINI = MAX( SMIN, SMLNUM ) +* +* Don't check for input errors +* + INFO = 0 +* +* Standard Initializations +* + SCALE = ONE +* + IF( NA.EQ.1 ) THEN +* +* 1 x 1 (i.e., scalar) system C X = B +* + IF( NW.EQ.1 ) THEN +* +* Real 1x1 system. +* +* C = ca A - w D +* + CSR = CA*A( 1, 1 ) - WR*D1 + CNORM = ABS( CSR ) +* +* If | C | < SMINI, use C = SMINI +* + IF( CNORM.LT.SMINI ) THEN + CSR = SMINI + CNORM = SMINI + INFO = 1 + END IF +* +* Check scaling for X = B / C +* + BNORM = ABS( B( 1, 1 ) ) + IF( CNORM.LT.ONE .AND. BNORM.GT.ONE ) THEN + IF( BNORM.GT.BIGNUM*CNORM ) + $ SCALE = ONE / BNORM + END IF +* +* Compute X +* + X( 1, 1 ) = ( B( 1, 1 )*SCALE ) / CSR + XNORM = ABS( X( 1, 1 ) ) + ELSE +* +* Complex 1x1 system (w is complex) +* +* C = ca A - w D +* + CSR = CA*A( 1, 1 ) - WR*D1 + CSI = -WI*D1 + CNORM = ABS( CSR ) + ABS( CSI ) +* +* If | C | < SMINI, use C = SMINI +* + IF( CNORM.LT.SMINI ) THEN + CSR = SMINI + CSI = ZERO + CNORM = SMINI + INFO = 1 + END IF +* +* Check scaling for X = B / C +* + BNORM = ABS( B( 1, 1 ) ) + ABS( B( 1, 2 ) ) + IF( CNORM.LT.ONE .AND. BNORM.GT.ONE ) THEN + IF( BNORM.GT.BIGNUM*CNORM ) + $ SCALE = ONE / BNORM + END IF +* +* Compute X +* + CALL SLADIV( SCALE*B( 1, 1 ), SCALE*B( 1, 2 ), CSR, CSI, + $ X( 1, 1 ), X( 1, 2 ) ) + XNORM = ABS( X( 1, 1 ) ) + ABS( X( 1, 2 ) ) + END IF +* + ELSE +* +* 2x2 System +* +* Compute the real part of C = ca A - w D (or ca A' - w D ) +* + CR( 1, 1 ) = CA*A( 1, 1 ) - WR*D1 + CR( 2, 2 ) = CA*A( 2, 2 ) - WR*D2 + IF( LTRANS ) THEN + CR( 1, 2 ) = CA*A( 2, 1 ) + CR( 2, 1 ) = CA*A( 1, 2 ) + ELSE + CR( 2, 1 ) = CA*A( 2, 1 ) + CR( 1, 2 ) = CA*A( 1, 2 ) + END IF +* + IF( NW.EQ.1 ) THEN +* +* Real 2x2 system (w is real) +* +* Find the largest element in C +* + CMAX = ZERO + ICMAX = 0 +* + DO 10 J = 1, 4 + IF( ABS( CRV( J ) ).GT.CMAX ) THEN + CMAX = ABS( CRV( J ) ) + ICMAX = J + END IF + 10 CONTINUE +* +* If norm(C) < SMINI, use SMINI*identity. +* + IF( CMAX.LT.SMINI ) THEN + BNORM = MAX( ABS( B( 1, 1 ) ), ABS( B( 2, 1 ) ) ) + IF( SMINI.LT.ONE .AND. BNORM.GT.ONE ) THEN + IF( BNORM.GT.BIGNUM*SMINI ) + $ SCALE = ONE / BNORM + END IF + TEMP = SCALE / SMINI + X( 1, 1 ) = TEMP*B( 1, 1 ) + X( 2, 1 ) = TEMP*B( 2, 1 ) + XNORM = TEMP*BNORM + INFO = 1 + RETURN + END IF +* +* Gaussian elimination with complete pivoting. +* + UR11 = CRV( ICMAX ) + CR21 = CRV( IPIVOT( 2, ICMAX ) ) + UR12 = CRV( IPIVOT( 3, ICMAX ) ) + CR22 = CRV( IPIVOT( 4, ICMAX ) ) + UR11R = ONE / UR11 + LR21 = UR11R*CR21 + UR22 = CR22 - UR12*LR21 +* +* If smaller pivot < SMINI, use SMINI +* + IF( ABS( UR22 ).LT.SMINI ) THEN + UR22 = SMINI + INFO = 1 + END IF + IF( RSWAP( ICMAX ) ) THEN + BR1 = B( 2, 1 ) + BR2 = B( 1, 1 ) + ELSE + BR1 = B( 1, 1 ) + BR2 = B( 2, 1 ) + END IF + BR2 = BR2 - LR21*BR1 + BBND = MAX( ABS( BR1*( UR22*UR11R ) ), ABS( BR2 ) ) + IF( BBND.GT.ONE .AND. ABS( UR22 ).LT.ONE ) THEN + IF( BBND.GE.BIGNUM*ABS( UR22 ) ) + $ SCALE = ONE / BBND + END IF +* + XR2 = ( BR2*SCALE ) / UR22 + XR1 = ( SCALE*BR1 )*UR11R - XR2*( UR11R*UR12 ) + IF( CSWAP( ICMAX ) ) THEN + X( 1, 1 ) = XR2 + X( 2, 1 ) = XR1 + ELSE + X( 1, 1 ) = XR1 + X( 2, 1 ) = XR2 + END IF + XNORM = MAX( ABS( XR1 ), ABS( XR2 ) ) +* +* Further scaling if norm(A) norm(X) > overflow +* + IF( XNORM.GT.ONE .AND. CMAX.GT.ONE ) THEN + IF( XNORM.GT.BIGNUM / CMAX ) THEN + TEMP = CMAX / BIGNUM + X( 1, 1 ) = TEMP*X( 1, 1 ) + X( 2, 1 ) = TEMP*X( 2, 1 ) + XNORM = TEMP*XNORM + SCALE = TEMP*SCALE + END IF + END IF + ELSE +* +* Complex 2x2 system (w is complex) +* +* Find the largest element in C +* + CI( 1, 1 ) = -WI*D1 + CI( 2, 1 ) = ZERO + CI( 1, 2 ) = ZERO + CI( 2, 2 ) = -WI*D2 + CMAX = ZERO + ICMAX = 0 +* + DO 20 J = 1, 4 + IF( ABS( CRV( J ) )+ABS( CIV( J ) ).GT.CMAX ) THEN + CMAX = ABS( CRV( J ) ) + ABS( CIV( J ) ) + ICMAX = J + END IF + 20 CONTINUE +* +* If norm(C) < SMINI, use SMINI*identity. +* + IF( CMAX.LT.SMINI ) THEN + BNORM = MAX( ABS( B( 1, 1 ) )+ABS( B( 1, 2 ) ), + $ ABS( B( 2, 1 ) )+ABS( B( 2, 2 ) ) ) + IF( SMINI.LT.ONE .AND. BNORM.GT.ONE ) THEN + IF( BNORM.GT.BIGNUM*SMINI ) + $ SCALE = ONE / BNORM + END IF + TEMP = SCALE / SMINI + X( 1, 1 ) = TEMP*B( 1, 1 ) + X( 2, 1 ) = TEMP*B( 2, 1 ) + X( 1, 2 ) = TEMP*B( 1, 2 ) + X( 2, 2 ) = TEMP*B( 2, 2 ) + XNORM = TEMP*BNORM + INFO = 1 + RETURN + END IF +* +* Gaussian elimination with complete pivoting. +* + UR11 = CRV( ICMAX ) + UI11 = CIV( ICMAX ) + CR21 = CRV( IPIVOT( 2, ICMAX ) ) + CI21 = CIV( IPIVOT( 2, ICMAX ) ) + UR12 = CRV( IPIVOT( 3, ICMAX ) ) + UI12 = CIV( IPIVOT( 3, ICMAX ) ) + CR22 = CRV( IPIVOT( 4, ICMAX ) ) + CI22 = CIV( IPIVOT( 4, ICMAX ) ) + IF( ICMAX.EQ.1 .OR. ICMAX.EQ.4 ) THEN +* +* Code when off-diagonals of pivoted C are real +* + IF( ABS( UR11 ).GT.ABS( UI11 ) ) THEN + TEMP = UI11 / UR11 + UR11R = ONE / ( UR11*( ONE+TEMP**2 ) ) + UI11R = -TEMP*UR11R + ELSE + TEMP = UR11 / UI11 + UI11R = -ONE / ( UI11*( ONE+TEMP**2 ) ) + UR11R = -TEMP*UI11R + END IF + LR21 = CR21*UR11R + LI21 = CR21*UI11R + UR12S = UR12*UR11R + UI12S = UR12*UI11R + UR22 = CR22 - UR12*LR21 + UI22 = CI22 - UR12*LI21 + ELSE +* +* Code when diagonals of pivoted C are real +* + UR11R = ONE / UR11 + UI11R = ZERO + LR21 = CR21*UR11R + LI21 = CI21*UR11R + UR12S = UR12*UR11R + UI12S = UI12*UR11R + UR22 = CR22 - UR12*LR21 + UI12*LI21 + UI22 = -UR12*LI21 - UI12*LR21 + END IF + U22ABS = ABS( UR22 ) + ABS( UI22 ) +* +* If smaller pivot < SMINI, use SMINI +* + IF( U22ABS.LT.SMINI ) THEN + UR22 = SMINI + UI22 = ZERO + INFO = 1 + END IF + IF( RSWAP( ICMAX ) ) THEN + BR2 = B( 1, 1 ) + BR1 = B( 2, 1 ) + BI2 = B( 1, 2 ) + BI1 = B( 2, 2 ) + ELSE + BR1 = B( 1, 1 ) + BR2 = B( 2, 1 ) + BI1 = B( 1, 2 ) + BI2 = B( 2, 2 ) + END IF + BR2 = BR2 - LR21*BR1 + LI21*BI1 + BI2 = BI2 - LI21*BR1 - LR21*BI1 + BBND = MAX( ( ABS( BR1 )+ABS( BI1 ) )* + $ ( U22ABS*( ABS( UR11R )+ABS( UI11R ) ) ), + $ ABS( BR2 )+ABS( BI2 ) ) + IF( BBND.GT.ONE .AND. U22ABS.LT.ONE ) THEN + IF( BBND.GE.BIGNUM*U22ABS ) THEN + SCALE = ONE / BBND + BR1 = SCALE*BR1 + BI1 = SCALE*BI1 + BR2 = SCALE*BR2 + BI2 = SCALE*BI2 + END IF + END IF +* + CALL SLADIV( BR2, BI2, UR22, UI22, XR2, XI2 ) + XR1 = UR11R*BR1 - UI11R*BI1 - UR12S*XR2 + UI12S*XI2 + XI1 = UI11R*BR1 + UR11R*BI1 - UI12S*XR2 - UR12S*XI2 + IF( CSWAP( ICMAX ) ) THEN + X( 1, 1 ) = XR2 + X( 2, 1 ) = XR1 + X( 1, 2 ) = XI2 + X( 2, 2 ) = XI1 + ELSE + X( 1, 1 ) = XR1 + X( 2, 1 ) = XR2 + X( 1, 2 ) = XI1 + X( 2, 2 ) = XI2 + END IF + XNORM = MAX( ABS( XR1 )+ABS( XI1 ), ABS( XR2 )+ABS( XI2 ) ) +* +* Further scaling if norm(A) norm(X) > overflow +* + IF( XNORM.GT.ONE .AND. CMAX.GT.ONE ) THEN + IF( XNORM.GT.BIGNUM / CMAX ) THEN + TEMP = CMAX / BIGNUM + X( 1, 1 ) = TEMP*X( 1, 1 ) + X( 2, 1 ) = TEMP*X( 2, 1 ) + X( 1, 2 ) = TEMP*X( 1, 2 ) + X( 2, 2 ) = TEMP*X( 2, 2 ) + XNORM = TEMP*XNORM + SCALE = TEMP*SCALE + END IF + END IF + END IF + END IF +* + RETURN +* +* End of SLALN2 +* + END diff --git a/costa/native/external/lapack/slals0.f b/costa/native/external/lapack/slals0.f new file mode 100644 index 000000000..bf9004bea --- /dev/null +++ b/costa/native/external/lapack/slals0.f @@ -0,0 +1,375 @@ + SUBROUTINE SLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B, LDB, BX, LDBX, + $ PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM, + $ POLES, DIFL, DIFR, Z, K, C, S, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* December 1, 1999 +* +* .. Scalar Arguments .. + INTEGER GIVPTR, ICOMPQ, INFO, K, LDB, LDBX, LDGCOL, + $ LDGNUM, NL, NR, NRHS, SQRE + REAL C, S +* .. +* .. Array Arguments .. + INTEGER GIVCOL( LDGCOL, * ), PERM( * ) + REAL B( LDB, * ), BX( LDBX, * ), DIFL( * ), + $ DIFR( LDGNUM, * ), GIVNUM( LDGNUM, * ), + $ POLES( LDGNUM, * ), WORK( * ), Z( * ) +* .. +* +* Purpose +* ======= +* +* SLALS0 applies back the multiplying factors of either the left or the +* right singular vector matrix of a diagonal matrix appended by a row +* to the right hand side matrix B in solving the least squares problem +* using the divide-and-conquer SVD approach. +* +* For the left singular vector matrix, three types of orthogonal +* matrices are involved: +* +* (1L) Givens rotations: the number of such rotations is GIVPTR; the +* pairs of columns/rows they were applied to are stored in GIVCOL; +* and the C- and S-values of these rotations are stored in GIVNUM. +* +* (2L) Permutation. The (NL+1)-st row of B is to be moved to the first +* row, and for J=2:N, PERM(J)-th row of B is to be moved to the +* J-th row. +* +* (3L) The left singular vector matrix of the remaining matrix. +* +* For the right singular vector matrix, four types of orthogonal +* matrices are involved: +* +* (1R) The right singular vector matrix of the remaining matrix. +* +* (2R) If SQRE = 1, one extra Givens rotation to generate the right +* null space. +* +* (3R) The inverse transformation of (2L). +* +* (4R) The inverse transformation of (1L). +* +* Arguments +* ========= +* +* ICOMPQ (input) INTEGER +* Specifies whether singular vectors are to be computed in +* factored form: +* = 0: Left singular vector matrix. +* = 1: Right singular vector matrix. +* +* NL (input) INTEGER +* The row dimension of the upper block. NL >= 1. +* +* NR (input) INTEGER +* The row dimension of the lower block. NR >= 1. +* +* SQRE (input) INTEGER +* = 0: the lower block is an NR-by-NR square matrix. +* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. +* +* The bidiagonal matrix has row dimension N = NL + NR + 1, +* and column dimension M = N + SQRE. +* +* NRHS (input) INTEGER +* The number of columns of B and BX. NRHS must be at least 1. +* +* B (input/output) REAL array, dimension ( LDB, NRHS ) +* On input, B contains the right hand sides of the least +* squares problem in rows 1 through M. On output, B contains +* the solution X in rows 1 through N. +* +* LDB (input) INTEGER +* The leading dimension of B. LDB must be at least +* max(1,MAX( M, N ) ). +* +* BX (workspace) REAL array, dimension ( LDBX, NRHS ) +* +* LDBX (input) INTEGER +* The leading dimension of BX. +* +* PERM (input) INTEGER array, dimension ( N ) +* The permutations (from deflation and sorting) applied +* to the two blocks. +* +* GIVPTR (input) INTEGER +* The number of Givens rotations which took place in this +* subproblem. +* +* GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 ) +* Each pair of numbers indicates a pair of rows/columns +* involved in a Givens rotation. +* +* LDGCOL (input) INTEGER +* The leading dimension of GIVCOL, must be at least N. +* +* GIVNUM (input) REAL array, dimension ( LDGNUM, 2 ) +* Each number indicates the C or S value used in the +* corresponding Givens rotation. +* +* LDGNUM (input) INTEGER +* The leading dimension of arrays DIFR, POLES and +* GIVNUM, must be at least K. +* +* POLES (input) REAL array, dimension ( LDGNUM, 2 ) +* On entry, POLES(1:K, 1) contains the new singular +* values obtained from solving the secular equation, and +* POLES(1:K, 2) is an array containing the poles in the secular +* equation. +* +* DIFL (input) REAL array, dimension ( K ). +* On entry, DIFL(I) is the distance between I-th updated +* (undeflated) singular value and the I-th (undeflated) old +* singular value. +* +* DIFR (input) REAL array, dimension ( LDGNUM, 2 ). +* On entry, DIFR(I, 1) contains the distances between I-th +* updated (undeflated) singular value and the I+1-th +* (undeflated) old singular value. And DIFR(I, 2) is the +* normalizing factor for the I-th right singular vector. +* +* Z (input) REAL array, dimension ( K ) +* Contain the components of the deflation-adjusted updating row +* vector. +* +* K (input) INTEGER +* Contains the dimension of the non-deflated matrix, +* This is the order of the related secular equation. 1 <= K <=N. +* +* C (input) REAL +* C contains garbage if SQRE =0 and the C-value of a Givens +* rotation related to the right null space if SQRE = 1. +* +* S (input) REAL +* S contains garbage if SQRE =0 and the S-value of a Givens +* rotation related to the right null space if SQRE = 1. +* +* WORK (workspace) REAL array, dimension ( K ) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Ren-Cang Li, Computer Science Division, University of +* California at Berkeley, USA +* Osni Marques, LBNL/NERSC, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO, NEGONE + PARAMETER ( ONE = 1.0E0, ZERO = 0.0E0, NEGONE = -1.0E0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, M, N, NLP1 + REAL DIFLJ, DIFRJ, DJ, DSIGJ, DSIGJP, TEMP +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMV, SLACPY, SLASCL, SROT, SSCAL, + $ XERBLA +* .. +* .. External Functions .. + REAL SLAMC3, SNRM2 + EXTERNAL SLAMC3, SNRM2 +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( NL.LT.1 ) THEN + INFO = -2 + ELSE IF( NR.LT.1 ) THEN + INFO = -3 + ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN + INFO = -4 + END IF +* + N = NL + NR + 1 +* + IF( NRHS.LT.1 ) THEN + INFO = -5 + ELSE IF( LDB.LT.N ) THEN + INFO = -7 + ELSE IF( LDBX.LT.N ) THEN + INFO = -9 + ELSE IF( GIVPTR.LT.0 ) THEN + INFO = -11 + ELSE IF( LDGCOL.LT.N ) THEN + INFO = -13 + ELSE IF( LDGNUM.LT.N ) THEN + INFO = -15 + ELSE IF( K.LT.1 ) THEN + INFO = -20 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLALS0', -INFO ) + RETURN + END IF +* + M = N + SQRE + NLP1 = NL + 1 +* + IF( ICOMPQ.EQ.0 ) THEN +* +* Apply back orthogonal transformations from the left. +* +* Step (1L): apply back the Givens rotations performed. +* + DO 10 I = 1, GIVPTR + CALL SROT( NRHS, B( GIVCOL( I, 2 ), 1 ), LDB, + $ B( GIVCOL( I, 1 ), 1 ), LDB, GIVNUM( I, 2 ), + $ GIVNUM( I, 1 ) ) + 10 CONTINUE +* +* Step (2L): permute rows of B. +* + CALL SCOPY( NRHS, B( NLP1, 1 ), LDB, BX( 1, 1 ), LDBX ) + DO 20 I = 2, N + CALL SCOPY( NRHS, B( PERM( I ), 1 ), LDB, BX( I, 1 ), LDBX ) + 20 CONTINUE +* +* Step (3L): apply the inverse of the left singular vector +* matrix to BX. +* + IF( K.EQ.1 ) THEN + CALL SCOPY( NRHS, BX, LDBX, B, LDB ) + IF( Z( 1 ).LT.ZERO ) THEN + CALL SSCAL( NRHS, NEGONE, B, LDB ) + END IF + ELSE + DO 50 J = 1, K + DIFLJ = DIFL( J ) + DJ = POLES( J, 1 ) + DSIGJ = -POLES( J, 2 ) + IF( J.LT.K ) THEN + DIFRJ = -DIFR( J, 1 ) + DSIGJP = -POLES( J+1, 2 ) + END IF + IF( ( Z( J ).EQ.ZERO ) .OR. ( POLES( J, 2 ).EQ.ZERO ) ) + $ THEN + WORK( J ) = ZERO + ELSE + WORK( J ) = -POLES( J, 2 )*Z( J ) / DIFLJ / + $ ( POLES( J, 2 )+DJ ) + END IF + DO 30 I = 1, J - 1 + IF( ( Z( I ).EQ.ZERO ) .OR. + $ ( POLES( I, 2 ).EQ.ZERO ) ) THEN + WORK( I ) = ZERO + ELSE + WORK( I ) = POLES( I, 2 )*Z( I ) / + $ ( SLAMC3( POLES( I, 2 ), DSIGJ )- + $ DIFLJ ) / ( POLES( I, 2 )+DJ ) + END IF + 30 CONTINUE + DO 40 I = J + 1, K + IF( ( Z( I ).EQ.ZERO ) .OR. + $ ( POLES( I, 2 ).EQ.ZERO ) ) THEN + WORK( I ) = ZERO + ELSE + WORK( I ) = POLES( I, 2 )*Z( I ) / + $ ( SLAMC3( POLES( I, 2 ), DSIGJP )+ + $ DIFRJ ) / ( POLES( I, 2 )+DJ ) + END IF + 40 CONTINUE + WORK( 1 ) = NEGONE + TEMP = SNRM2( K, WORK, 1 ) + CALL SGEMV( 'T', K, NRHS, ONE, BX, LDBX, WORK, 1, ZERO, + $ B( J, 1 ), LDB ) + CALL SLASCL( 'G', 0, 0, TEMP, ONE, 1, NRHS, B( J, 1 ), + $ LDB, INFO ) + 50 CONTINUE + END IF +* +* Move the deflated rows of BX to B also. +* + IF( K.LT.MAX( M, N ) ) + $ CALL SLACPY( 'A', N-K, NRHS, BX( K+1, 1 ), LDBX, + $ B( K+1, 1 ), LDB ) + ELSE +* +* Apply back the right orthogonal transformations. +* +* Step (1R): apply back the new right singular vector matrix +* to B. +* + IF( K.EQ.1 ) THEN + CALL SCOPY( NRHS, B, LDB, BX, LDBX ) + ELSE + DO 80 J = 1, K + DSIGJ = POLES( J, 2 ) + IF( Z( J ).EQ.ZERO ) THEN + WORK( J ) = ZERO + ELSE + WORK( J ) = -Z( J ) / DIFL( J ) / + $ ( DSIGJ+POLES( J, 1 ) ) / DIFR( J, 2 ) + END IF + DO 60 I = 1, J - 1 + IF( Z( J ).EQ.ZERO ) THEN + WORK( I ) = ZERO + ELSE + WORK( I ) = Z( J ) / ( SLAMC3( DSIGJ, -POLES( I+1, + $ 2 ) )-DIFR( I, 1 ) ) / + $ ( DSIGJ+POLES( I, 1 ) ) / DIFR( I, 2 ) + END IF + 60 CONTINUE + DO 70 I = J + 1, K + IF( Z( J ).EQ.ZERO ) THEN + WORK( I ) = ZERO + ELSE + WORK( I ) = Z( J ) / ( SLAMC3( DSIGJ, -POLES( I, + $ 2 ) )-DIFL( I ) ) / + $ ( DSIGJ+POLES( I, 1 ) ) / DIFR( I, 2 ) + END IF + 70 CONTINUE + CALL SGEMV( 'T', K, NRHS, ONE, B, LDB, WORK, 1, ZERO, + $ BX( J, 1 ), LDBX ) + 80 CONTINUE + END IF +* +* Step (2R): if SQRE = 1, apply back the rotation that is +* related to the right null space of the subproblem. +* + IF( SQRE.EQ.1 ) THEN + CALL SCOPY( NRHS, B( M, 1 ), LDB, BX( M, 1 ), LDBX ) + CALL SROT( NRHS, BX( 1, 1 ), LDBX, BX( M, 1 ), LDBX, C, S ) + END IF + IF( K.LT.MAX( M, N ) ) + $ CALL SLACPY( 'A', N-K, NRHS, B( K+1, 1 ), LDB, BX( K+1, 1 ), + $ LDBX ) +* +* Step (3R): permute rows of B. +* + CALL SCOPY( NRHS, BX( 1, 1 ), LDBX, B( NLP1, 1 ), LDB ) + IF( SQRE.EQ.1 ) THEN + CALL SCOPY( NRHS, BX( M, 1 ), LDBX, B( M, 1 ), LDB ) + END IF + DO 90 I = 2, N + CALL SCOPY( NRHS, BX( I, 1 ), LDBX, B( PERM( I ), 1 ), LDB ) + 90 CONTINUE +* +* Step (4R): apply back the Givens rotations performed. +* + DO 100 I = GIVPTR, 1, -1 + CALL SROT( NRHS, B( GIVCOL( I, 2 ), 1 ), LDB, + $ B( GIVCOL( I, 1 ), 1 ), LDB, GIVNUM( I, 2 ), + $ -GIVNUM( I, 1 ) ) + 100 CONTINUE + END IF +* + RETURN +* +* End of SLALS0 +* + END diff --git a/costa/native/external/lapack/slalsa.f b/costa/native/external/lapack/slalsa.f new file mode 100644 index 000000000..1294010ca --- /dev/null +++ b/costa/native/external/lapack/slalsa.f @@ -0,0 +1,363 @@ + SUBROUTINE SLALSA( ICOMPQ, SMLSIZ, N, NRHS, B, LDB, BX, LDBX, U, + $ LDU, VT, K, DIFL, DIFR, Z, POLES, GIVPTR, + $ GIVCOL, LDGCOL, PERM, GIVNUM, C, S, WORK, + $ IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER ICOMPQ, INFO, LDB, LDBX, LDGCOL, LDU, N, NRHS, + $ SMLSIZ +* .. +* .. Array Arguments .. + INTEGER GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ), + $ K( * ), PERM( LDGCOL, * ) + REAL B( LDB, * ), BX( LDBX, * ), C( * ), + $ DIFL( LDU, * ), DIFR( LDU, * ), + $ GIVNUM( LDU, * ), POLES( LDU, * ), S( * ), + $ U( LDU, * ), VT( LDU, * ), WORK( * ), + $ Z( LDU, * ) +* .. +* +* Purpose +* ======= +* +* SLALSA is an itermediate step in solving the least squares problem +* by computing the SVD of the coefficient matrix in compact form (The +* singular vectors are computed as products of simple orthorgonal +* matrices.). +* +* If ICOMPQ = 0, SLALSA applies the inverse of the left singular vector +* matrix of an upper bidiagonal matrix to the right hand side; and if +* ICOMPQ = 1, SLALSA applies the right singular vector matrix to the +* right hand side. The singular vector matrices were generated in +* compact form by SLALSA. +* +* Arguments +* ========= +* +* +* ICOMPQ (input) INTEGER +* Specifies whether the left or the right singular vector +* matrix is involved. +* = 0: Left singular vector matrix +* = 1: Right singular vector matrix +* +* SMLSIZ (input) INTEGER +* The maximum size of the subproblems at the bottom of the +* computation tree. +* +* N (input) INTEGER +* The row and column dimensions of the upper bidiagonal matrix. +* +* NRHS (input) INTEGER +* The number of columns of B and BX. NRHS must be at least 1. +* +* B (input) REAL array, dimension ( LDB, NRHS ) +* On input, B contains the right hand sides of the least +* squares problem in rows 1 through M. On output, B contains +* the solution X in rows 1 through N. +* +* LDB (input) INTEGER +* The leading dimension of B in the calling subprogram. +* LDB must be at least max(1,MAX( M, N ) ). +* +* BX (output) REAL array, dimension ( LDBX, NRHS ) +* On exit, the result of applying the left or right singular +* vector matrix to B. +* +* LDBX (input) INTEGER +* The leading dimension of BX. +* +* U (input) REAL array, dimension ( LDU, SMLSIZ ). +* On entry, U contains the left singular vector matrices of all +* subproblems at the bottom level. +* +* LDU (input) INTEGER, LDU = > N. +* The leading dimension of arrays U, VT, DIFL, DIFR, +* POLES, GIVNUM, and Z. +* +* VT (input) REAL array, dimension ( LDU, SMLSIZ+1 ). +* On entry, VT' contains the right singular vector matrices of +* all subproblems at the bottom level. +* +* K (input) INTEGER array, dimension ( N ). +* +* DIFL (input) REAL array, dimension ( LDU, NLVL ). +* where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1. +* +* DIFR (input) REAL array, dimension ( LDU, 2 * NLVL ). +* On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record +* distances between singular values on the I-th level and +* singular values on the (I -1)-th level, and DIFR(*, 2 * I) +* record the normalizing factors of the right singular vectors +* matrices of subproblems on I-th level. +* +* Z (input) REAL array, dimension ( LDU, NLVL ). +* On entry, Z(1, I) contains the components of the deflation- +* adjusted updating row vector for subproblems on the I-th +* level. +* +* POLES (input) REAL array, dimension ( LDU, 2 * NLVL ). +* On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old +* singular values involved in the secular equations on the I-th +* level. +* +* GIVPTR (input) INTEGER array, dimension ( N ). +* On entry, GIVPTR( I ) records the number of Givens +* rotations performed on the I-th problem on the computation +* tree. +* +* GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ). +* On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the +* locations of Givens rotations performed on the I-th level on +* the computation tree. +* +* LDGCOL (input) INTEGER, LDGCOL = > N. +* The leading dimension of arrays GIVCOL and PERM. +* +* PERM (input) INTEGER array, dimension ( LDGCOL, NLVL ). +* On entry, PERM(*, I) records permutations done on the I-th +* level of the computation tree. +* +* GIVNUM (input) REAL array, dimension ( LDU, 2 * NLVL ). +* On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S- +* values of Givens rotations performed on the I-th level on the +* computation tree. +* +* C (input) REAL array, dimension ( N ). +* On entry, if the I-th subproblem is not square, +* C( I ) contains the C-value of a Givens rotation related to +* the right null space of the I-th subproblem. +* +* S (input) REAL array, dimension ( N ). +* On entry, if the I-th subproblem is not square, +* S( I ) contains the S-value of a Givens rotation related to +* the right null space of the I-th subproblem. +* +* WORK (workspace) REAL array. +* The dimension must be at least N. +* +* IWORK (workspace) INTEGER array. +* The dimension must be at least 3 * N +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Ren-Cang Li, Computer Science Division, University of +* California at Berkeley, USA +* Osni Marques, LBNL/NERSC, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + INTEGER I, I1, IC, IM1, INODE, J, LF, LL, LVL, LVL2, + $ ND, NDB1, NDIML, NDIMR, NL, NLF, NLP1, NLVL, + $ NR, NRF, NRP1, SQRE +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMM, SLALS0, SLASDT, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( SMLSIZ.LT.3 ) THEN + INFO = -2 + ELSE IF( N.LT.SMLSIZ ) THEN + INFO = -3 + ELSE IF( NRHS.LT.1 ) THEN + INFO = -4 + ELSE IF( LDB.LT.N ) THEN + INFO = -6 + ELSE IF( LDBX.LT.N ) THEN + INFO = -8 + ELSE IF( LDU.LT.N ) THEN + INFO = -10 + ELSE IF( LDGCOL.LT.N ) THEN + INFO = -19 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLALSA', -INFO ) + RETURN + END IF +* +* Book-keeping and setting up the computation tree. +* + INODE = 1 + NDIML = INODE + N + NDIMR = NDIML + N +* + CALL SLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ), + $ IWORK( NDIMR ), SMLSIZ ) +* +* The following code applies back the left singular vector factors. +* For applying back the right singular vector factors, go to 50. +* + IF( ICOMPQ.EQ.1 ) THEN + GO TO 50 + END IF +* +* The nodes on the bottom level of the tree were solved +* by SLASDQ. The corresponding left and right singular vector +* matrices are in explicit form. First apply back the left +* singular vector matrices. +* + NDB1 = ( ND+1 ) / 2 + DO 10 I = NDB1, ND +* +* IC : center row of each node +* NL : number of rows of left subproblem +* NR : number of rows of right subproblem +* NLF: starting row of the left subproblem +* NRF: starting row of the right subproblem +* + I1 = I - 1 + IC = IWORK( INODE+I1 ) + NL = IWORK( NDIML+I1 ) + NR = IWORK( NDIMR+I1 ) + NLF = IC - NL + NRF = IC + 1 + CALL SGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU, + $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX ) + CALL SGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU, + $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX ) + 10 CONTINUE +* +* Next copy the rows of B that correspond to unchanged rows +* in the bidiagonal matrix to BX. +* + DO 20 I = 1, ND + IC = IWORK( INODE+I-1 ) + CALL SCOPY( NRHS, B( IC, 1 ), LDB, BX( IC, 1 ), LDBX ) + 20 CONTINUE +* +* Finally go through the left singular vector matrices of all +* the other subproblems bottom-up on the tree. +* + J = 2**NLVL + SQRE = 0 +* + DO 40 LVL = NLVL, 1, -1 + LVL2 = 2*LVL - 1 +* +* find the first node LF and last node LL on +* the current level LVL +* + IF( LVL.EQ.1 ) THEN + LF = 1 + LL = 1 + ELSE + LF = 2**( LVL-1 ) + LL = 2*LF - 1 + END IF + DO 30 I = LF, LL + IM1 = I - 1 + IC = IWORK( INODE+IM1 ) + NL = IWORK( NDIML+IM1 ) + NR = IWORK( NDIMR+IM1 ) + NLF = IC - NL + NRF = IC + 1 + J = J - 1 + CALL SLALS0( ICOMPQ, NL, NR, SQRE, NRHS, BX( NLF, 1 ), LDBX, + $ B( NLF, 1 ), LDB, PERM( NLF, LVL ), + $ GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL, + $ GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ), + $ DIFL( NLF, LVL ), DIFR( NLF, LVL2 ), + $ Z( NLF, LVL ), K( J ), C( J ), S( J ), WORK, + $ INFO ) + 30 CONTINUE + 40 CONTINUE + GO TO 90 +* +* ICOMPQ = 1: applying back the right singular vector factors. +* + 50 CONTINUE +* +* First now go through the right singular vector matrices of all +* the tree nodes top-down. +* + J = 0 + DO 70 LVL = 1, NLVL + LVL2 = 2*LVL - 1 +* +* Find the first node LF and last node LL on +* the current level LVL. +* + IF( LVL.EQ.1 ) THEN + LF = 1 + LL = 1 + ELSE + LF = 2**( LVL-1 ) + LL = 2*LF - 1 + END IF + DO 60 I = LL, LF, -1 + IM1 = I - 1 + IC = IWORK( INODE+IM1 ) + NL = IWORK( NDIML+IM1 ) + NR = IWORK( NDIMR+IM1 ) + NLF = IC - NL + NRF = IC + 1 + IF( I.EQ.LL ) THEN + SQRE = 0 + ELSE + SQRE = 1 + END IF + J = J + 1 + CALL SLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B( NLF, 1 ), LDB, + $ BX( NLF, 1 ), LDBX, PERM( NLF, LVL ), + $ GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL, + $ GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ), + $ DIFL( NLF, LVL ), DIFR( NLF, LVL2 ), + $ Z( NLF, LVL ), K( J ), C( J ), S( J ), WORK, + $ INFO ) + 60 CONTINUE + 70 CONTINUE +* +* The nodes on the bottom level of the tree were solved +* by SLASDQ. The corresponding right singular vector +* matrices are in explicit form. Apply them back. +* + NDB1 = ( ND+1 ) / 2 + DO 80 I = NDB1, ND + I1 = I - 1 + IC = IWORK( INODE+I1 ) + NL = IWORK( NDIML+I1 ) + NR = IWORK( NDIMR+I1 ) + NLP1 = NL + 1 + IF( I.EQ.ND ) THEN + NRP1 = NR + ELSE + NRP1 = NR + 1 + END IF + NLF = IC - NL + NRF = IC + 1 + CALL SGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU, + $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX ) + CALL SGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU, + $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX ) + 80 CONTINUE +* + 90 CONTINUE +* + RETURN +* +* End of SLALSA +* + END diff --git a/costa/native/external/lapack/slalsd.f b/costa/native/external/lapack/slalsd.f new file mode 100644 index 000000000..b14739e8b --- /dev/null +++ b/costa/native/external/lapack/slalsd.f @@ -0,0 +1,433 @@ + SUBROUTINE SLALSD( UPLO, SMLSIZ, N, NRHS, D, E, B, LDB, RCOND, + $ RANK, WORK, IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS, RANK, SMLSIZ + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL B( LDB, * ), D( * ), E( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SLALSD uses the singular value decomposition of A to solve the least +* squares problem of finding X to minimize the Euclidean norm of each +* column of A*X-B, where A is N-by-N upper bidiagonal, and X and B +* are N-by-NRHS. The solution X overwrites B. +* +* The singular values of A smaller than RCOND times the largest +* singular value are treated as zero in solving the least squares +* problem; in this case a minimum norm solution is returned. +* The actual singular values are returned in D in ascending order. +* +* This code makes very mild assumptions about floating point +* arithmetic. It will work on machines with a guard digit in +* add/subtract, or on those binary machines without guard digits +* which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. +* It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': D and E define an upper bidiagonal matrix. +* = 'L': D and E define a lower bidiagonal matrix. +* +* SMLSIZ (input) INTEGER +* The maximum size of the subproblems at the bottom of the +* computation tree. +* +* N (input) INTEGER +* The dimension of the bidiagonal matrix. N >= 0. +* +* NRHS (input) INTEGER +* The number of columns of B. NRHS must be at least 1. +* +* D (input/output) REAL array, dimension (N) +* On entry D contains the main diagonal of the bidiagonal +* matrix. On exit, if INFO = 0, D contains its singular values. +* +* E (input) REAL array, dimension (N-1) +* Contains the super-diagonal entries of the bidiagonal matrix. +* On exit, E has been destroyed. +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On input, B contains the right hand sides of the least +* squares problem. On output, B contains the solution X. +* +* LDB (input) INTEGER +* The leading dimension of B in the calling subprogram. +* LDB must be at least max(1,N). +* +* RCOND (input) REAL +* The singular values of A less than or equal to RCOND times +* the largest singular value are treated as zero in solving +* the least squares problem. If RCOND is negative, +* machine precision is used instead. +* For example, if diag(S)*X=B were the least squares problem, +* where diag(S) is a diagonal matrix of singular values, the +* solution would be X(i) = B(i) / S(i) if S(i) is greater than +* RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to +* RCOND*max(S). +* +* RANK (output) INTEGER +* The number of singular values of A greater than RCOND times +* the largest singular value. +* +* WORK (workspace) REAL array, dimension at least +* (9*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2), +* where NLVL = max(0, INT(log_2 (N/(SMLSIZ+1))) + 1). +* +* IWORK (workspace) INTEGER array, dimension at least +* (3*N*NLVL + 11*N) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: The algorithm failed to compute an singular value while +* working on the submatrix lying in rows and columns +* INFO/(N+1) through MOD(INFO,N+1). +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Ren-Cang Li, Computer Science Division, University of +* California at Berkeley, USA +* Osni Marques, LBNL/NERSC, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0 ) +* .. +* .. Local Scalars .. + INTEGER BX, BXST, C, DIFL, DIFR, GIVCOL, GIVNUM, + $ GIVPTR, I, ICMPQ1, ICMPQ2, IWK, J, K, NLVL, + $ NM1, NSIZE, NSUB, NWORK, PERM, POLES, S, SIZEI, + $ SMLSZP, SQRE, ST, ST1, U, VT, Z + REAL CS, EPS, ORGNRM, R, SN, TOL +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SLAMCH, SLANST + EXTERNAL ISAMAX, SLAMCH, SLANST +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMM, SLACPY, SLALSA, SLARTG, SLASCL, + $ SLASDA, SLASDQ, SLASET, SLASRT, SROT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, LOG, REAL, SIGN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.1 ) THEN + INFO = -4 + ELSE IF( ( LDB.LT.1 ) .OR. ( LDB.LT.N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLALSD', -INFO ) + RETURN + END IF +* + EPS = SLAMCH( 'Epsilon' ) +* +* Set up the tolerance. +* + IF( ( RCOND.LE.ZERO ) .OR. ( RCOND.GE.ONE ) ) THEN + RCOND = EPS + END IF +* + RANK = 0 +* +* Quick return if possible. +* + IF( N.EQ.0 ) THEN + RETURN + ELSE IF( N.EQ.1 ) THEN + IF( D( 1 ).EQ.ZERO ) THEN + CALL SLASET( 'A', 1, NRHS, ZERO, ZERO, B, LDB ) + ELSE + RANK = 1 + CALL SLASCL( 'G', 0, 0, D( 1 ), ONE, 1, NRHS, B, LDB, INFO ) + D( 1 ) = ABS( D( 1 ) ) + END IF + RETURN + END IF +* +* Rotate the matrix if it is lower bidiagonal. +* + IF( UPLO.EQ.'L' ) THEN + DO 10 I = 1, N - 1 + CALL SLARTG( D( I ), E( I ), CS, SN, R ) + D( I ) = R + E( I ) = SN*D( I+1 ) + D( I+1 ) = CS*D( I+1 ) + IF( NRHS.EQ.1 ) THEN + CALL SROT( 1, B( I, 1 ), 1, B( I+1, 1 ), 1, CS, SN ) + ELSE + WORK( I*2-1 ) = CS + WORK( I*2 ) = SN + END IF + 10 CONTINUE + IF( NRHS.GT.1 ) THEN + DO 30 I = 1, NRHS + DO 20 J = 1, N - 1 + CS = WORK( J*2-1 ) + SN = WORK( J*2 ) + CALL SROT( 1, B( J, I ), 1, B( J+1, I ), 1, CS, SN ) + 20 CONTINUE + 30 CONTINUE + END IF + END IF +* +* Scale. +* + NM1 = N - 1 + ORGNRM = SLANST( 'M', N, D, E ) + IF( ORGNRM.EQ.ZERO ) THEN + CALL SLASET( 'A', N, NRHS, ZERO, ZERO, B, LDB ) + RETURN + END IF +* + CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, INFO ) + CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, NM1, 1, E, NM1, INFO ) +* +* If N is smaller than the minimum divide size SMLSIZ, then solve +* the problem with another solver. +* + IF( N.LE.SMLSIZ ) THEN + NWORK = 1 + N*N + CALL SLASET( 'A', N, N, ZERO, ONE, WORK, N ) + CALL SLASDQ( 'U', 0, N, N, 0, NRHS, D, E, WORK, N, WORK, N, B, + $ LDB, WORK( NWORK ), INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + TOL = RCOND*ABS( D( ISAMAX( N, D, 1 ) ) ) + DO 40 I = 1, N + IF( D( I ).LE.TOL ) THEN + CALL SLASET( 'A', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB ) + ELSE + CALL SLASCL( 'G', 0, 0, D( I ), ONE, 1, NRHS, B( I, 1 ), + $ LDB, INFO ) + RANK = RANK + 1 + END IF + 40 CONTINUE + CALL SGEMM( 'T', 'N', N, NRHS, N, ONE, WORK, N, B, LDB, ZERO, + $ WORK( NWORK ), N ) + CALL SLACPY( 'A', N, NRHS, WORK( NWORK ), N, B, LDB ) +* +* Unscale. +* + CALL SLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO ) + CALL SLASRT( 'D', N, D, INFO ) + CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, N, NRHS, B, LDB, INFO ) +* + RETURN + END IF +* +* Book-keeping and setting up some constants. +* + NLVL = INT( LOG( REAL( N ) / REAL( SMLSIZ+1 ) ) / LOG( TWO ) ) + 1 +* + SMLSZP = SMLSIZ + 1 +* + U = 1 + VT = 1 + SMLSIZ*N + DIFL = VT + SMLSZP*N + DIFR = DIFL + NLVL*N + Z = DIFR + NLVL*N*2 + C = Z + NLVL*N + S = C + N + POLES = S + N + GIVNUM = POLES + 2*NLVL*N + BX = GIVNUM + 2*NLVL*N + NWORK = BX + N*NRHS +* + SIZEI = 1 + N + K = SIZEI + N + GIVPTR = K + N + PERM = GIVPTR + N + GIVCOL = PERM + NLVL*N + IWK = GIVCOL + NLVL*N*2 +* + ST = 1 + SQRE = 0 + ICMPQ1 = 1 + ICMPQ2 = 0 + NSUB = 0 +* + DO 50 I = 1, N + IF( ABS( D( I ) ).LT.EPS ) THEN + D( I ) = SIGN( EPS, D( I ) ) + END IF + 50 CONTINUE +* + DO 60 I = 1, NM1 + IF( ( ABS( E( I ) ).LT.EPS ) .OR. ( I.EQ.NM1 ) ) THEN + NSUB = NSUB + 1 + IWORK( NSUB ) = ST +* +* Subproblem found. First determine its size and then +* apply divide and conquer on it. +* + IF( I.LT.NM1 ) THEN +* +* A subproblem with E(I) small for I < NM1. +* + NSIZE = I - ST + 1 + IWORK( SIZEI+NSUB-1 ) = NSIZE + ELSE IF( ABS( E( I ) ).GE.EPS ) THEN +* +* A subproblem with E(NM1) not too small but I = NM1. +* + NSIZE = N - ST + 1 + IWORK( SIZEI+NSUB-1 ) = NSIZE + ELSE +* +* A subproblem with E(NM1) small. This implies an +* 1-by-1 subproblem at D(N), which is not solved +* explicitly. +* + NSIZE = I - ST + 1 + IWORK( SIZEI+NSUB-1 ) = NSIZE + NSUB = NSUB + 1 + IWORK( NSUB ) = N + IWORK( SIZEI+NSUB-1 ) = 1 + CALL SCOPY( NRHS, B( N, 1 ), LDB, WORK( BX+NM1 ), N ) + END IF + ST1 = ST - 1 + IF( NSIZE.EQ.1 ) THEN +* +* This is a 1-by-1 subproblem and is not solved +* explicitly. +* + CALL SCOPY( NRHS, B( ST, 1 ), LDB, WORK( BX+ST1 ), N ) + ELSE IF( NSIZE.LE.SMLSIZ ) THEN +* +* This is a small subproblem and is solved by SLASDQ. +* + CALL SLASET( 'A', NSIZE, NSIZE, ZERO, ONE, + $ WORK( VT+ST1 ), N ) + CALL SLASDQ( 'U', 0, NSIZE, NSIZE, 0, NRHS, D( ST ), + $ E( ST ), WORK( VT+ST1 ), N, WORK( NWORK ), + $ N, B( ST, 1 ), LDB, WORK( NWORK ), INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + CALL SLACPY( 'A', NSIZE, NRHS, B( ST, 1 ), LDB, + $ WORK( BX+ST1 ), N ) + ELSE +* +* A large problem. Solve it using divide and conquer. +* + CALL SLASDA( ICMPQ1, SMLSIZ, NSIZE, SQRE, D( ST ), + $ E( ST ), WORK( U+ST1 ), N, WORK( VT+ST1 ), + $ IWORK( K+ST1 ), WORK( DIFL+ST1 ), + $ WORK( DIFR+ST1 ), WORK( Z+ST1 ), + $ WORK( POLES+ST1 ), IWORK( GIVPTR+ST1 ), + $ IWORK( GIVCOL+ST1 ), N, IWORK( PERM+ST1 ), + $ WORK( GIVNUM+ST1 ), WORK( C+ST1 ), + $ WORK( S+ST1 ), WORK( NWORK ), IWORK( IWK ), + $ INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + BXST = BX + ST1 + CALL SLALSA( ICMPQ2, SMLSIZ, NSIZE, NRHS, B( ST, 1 ), + $ LDB, WORK( BXST ), N, WORK( U+ST1 ), N, + $ WORK( VT+ST1 ), IWORK( K+ST1 ), + $ WORK( DIFL+ST1 ), WORK( DIFR+ST1 ), + $ WORK( Z+ST1 ), WORK( POLES+ST1 ), + $ IWORK( GIVPTR+ST1 ), IWORK( GIVCOL+ST1 ), N, + $ IWORK( PERM+ST1 ), WORK( GIVNUM+ST1 ), + $ WORK( C+ST1 ), WORK( S+ST1 ), WORK( NWORK ), + $ IWORK( IWK ), INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + END IF + ST = I + 1 + END IF + 60 CONTINUE +* +* Apply the singular values and treat the tiny ones as zero. +* + TOL = RCOND*ABS( D( ISAMAX( N, D, 1 ) ) ) +* + DO 70 I = 1, N +* +* Some of the elements in D can be negative because 1-by-1 +* subproblems were not solved explicitly. +* + IF( ABS( D( I ) ).LE.TOL ) THEN + CALL SLASET( 'A', 1, NRHS, ZERO, ZERO, WORK( BX+I-1 ), N ) + ELSE + RANK = RANK + 1 + CALL SLASCL( 'G', 0, 0, D( I ), ONE, 1, NRHS, + $ WORK( BX+I-1 ), N, INFO ) + END IF + D( I ) = ABS( D( I ) ) + 70 CONTINUE +* +* Now apply back the right singular vectors. +* + ICMPQ2 = 1 + DO 80 I = 1, NSUB + ST = IWORK( I ) + ST1 = ST - 1 + NSIZE = IWORK( SIZEI+I-1 ) + BXST = BX + ST1 + IF( NSIZE.EQ.1 ) THEN + CALL SCOPY( NRHS, WORK( BXST ), N, B( ST, 1 ), LDB ) + ELSE IF( NSIZE.LE.SMLSIZ ) THEN + CALL SGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE, + $ WORK( VT+ST1 ), N, WORK( BXST ), N, ZERO, + $ B( ST, 1 ), LDB ) + ELSE + CALL SLALSA( ICMPQ2, SMLSIZ, NSIZE, NRHS, WORK( BXST ), N, + $ B( ST, 1 ), LDB, WORK( U+ST1 ), N, + $ WORK( VT+ST1 ), IWORK( K+ST1 ), + $ WORK( DIFL+ST1 ), WORK( DIFR+ST1 ), + $ WORK( Z+ST1 ), WORK( POLES+ST1 ), + $ IWORK( GIVPTR+ST1 ), IWORK( GIVCOL+ST1 ), N, + $ IWORK( PERM+ST1 ), WORK( GIVNUM+ST1 ), + $ WORK( C+ST1 ), WORK( S+ST1 ), WORK( NWORK ), + $ IWORK( IWK ), INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + END IF + 80 CONTINUE +* +* Unscale and sort the singular values. +* + CALL SLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO ) + CALL SLASRT( 'D', N, D, INFO ) + CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, N, NRHS, B, LDB, INFO ) +* + RETURN +* +* End of SLALSD +* + END diff --git a/costa/native/external/lapack/slamch.f b/costa/native/external/lapack/slamch.f new file mode 100644 index 000000000..afb4d3686 --- /dev/null +++ b/costa/native/external/lapack/slamch.f @@ -0,0 +1,857 @@ + REAL FUNCTION SLAMCH( CMACH ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER CMACH +* .. +* +* Purpose +* ======= +* +* SLAMCH determines single precision machine parameters. +* +* Arguments +* ========= +* +* CMACH (input) CHARACTER*1 +* Specifies the value to be returned by SLAMCH: +* = 'E' or 'e', SLAMCH := eps +* = 'S' or 's , SLAMCH := sfmin +* = 'B' or 'b', SLAMCH := base +* = 'P' or 'p', SLAMCH := eps*base +* = 'N' or 'n', SLAMCH := t +* = 'R' or 'r', SLAMCH := rnd +* = 'M' or 'm', SLAMCH := emin +* = 'U' or 'u', SLAMCH := rmin +* = 'L' or 'l', SLAMCH := emax +* = 'O' or 'o', SLAMCH := rmax +* +* where +* +* eps = relative machine precision +* sfmin = safe minimum, such that 1/sfmin does not overflow +* base = base of the machine +* prec = eps*base +* t = number of (base) digits in the mantissa +* rnd = 1.0 when rounding occurs in addition, 0.0 otherwise +* emin = minimum exponent before (gradual) underflow +* rmin = underflow threshold - base**(emin-1) +* emax = largest exponent before overflow +* rmax = overflow threshold - (base**emax)*(1-eps) +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL FIRST, LRND + INTEGER BETA, IMAX, IMIN, IT + REAL BASE, EMAX, EMIN, EPS, PREC, RMACH, RMAX, RMIN, + $ RND, SFMIN, SMALL, T +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLAMC2 +* .. +* .. Save statement .. + SAVE FIRST, EPS, SFMIN, BASE, T, RND, EMIN, RMIN, + $ EMAX, RMAX, PREC +* .. +* .. Data statements .. + DATA FIRST / .TRUE. / +* .. +* .. Executable Statements .. +* + IF( FIRST ) THEN + FIRST = .FALSE. + CALL SLAMC2( BETA, IT, LRND, EPS, IMIN, RMIN, IMAX, RMAX ) + BASE = BETA + T = IT + IF( LRND ) THEN + RND = ONE + EPS = ( BASE**( 1-IT ) ) / 2 + ELSE + RND = ZERO + EPS = BASE**( 1-IT ) + END IF + PREC = EPS*BASE + EMIN = IMIN + EMAX = IMAX + SFMIN = RMIN + SMALL = ONE / RMAX + IF( SMALL.GE.SFMIN ) THEN +* +* Use SMALL plus a bit, to avoid the possibility of rounding +* causing overflow when computing 1/sfmin. +* + SFMIN = SMALL*( ONE+EPS ) + END IF + END IF +* + IF( LSAME( CMACH, 'E' ) ) THEN + RMACH = EPS + ELSE IF( LSAME( CMACH, 'S' ) ) THEN + RMACH = SFMIN + ELSE IF( LSAME( CMACH, 'B' ) ) THEN + RMACH = BASE + ELSE IF( LSAME( CMACH, 'P' ) ) THEN + RMACH = PREC + ELSE IF( LSAME( CMACH, 'N' ) ) THEN + RMACH = T + ELSE IF( LSAME( CMACH, 'R' ) ) THEN + RMACH = RND + ELSE IF( LSAME( CMACH, 'M' ) ) THEN + RMACH = EMIN + ELSE IF( LSAME( CMACH, 'U' ) ) THEN + RMACH = RMIN + ELSE IF( LSAME( CMACH, 'L' ) ) THEN + RMACH = EMAX + ELSE IF( LSAME( CMACH, 'O' ) ) THEN + RMACH = RMAX + END IF +* + SLAMCH = RMACH + RETURN +* +* End of SLAMCH +* + END +* +************************************************************************ +* + SUBROUTINE SLAMC1( BETA, T, RND, IEEE1 ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + LOGICAL IEEE1, RND + INTEGER BETA, T +* .. +* +* Purpose +* ======= +* +* SLAMC1 determines the machine parameters given by BETA, T, RND, and +* IEEE1. +* +* Arguments +* ========= +* +* BETA (output) INTEGER +* The base of the machine. +* +* T (output) INTEGER +* The number of ( BETA ) digits in the mantissa. +* +* RND (output) LOGICAL +* Specifies whether proper rounding ( RND = .TRUE. ) or +* chopping ( RND = .FALSE. ) occurs in addition. This may not +* be a reliable guide to the way in which the machine performs +* its arithmetic. +* +* IEEE1 (output) LOGICAL +* Specifies whether rounding appears to be done in the IEEE +* 'round to nearest' style. +* +* Further Details +* =============== +* +* The routine is based on the routine ENVRON by Malcolm and +* incorporates suggestions by Gentleman and Marovich. See +* +* Malcolm M. A. (1972) Algorithms to reveal properties of +* floating-point arithmetic. Comms. of the ACM, 15, 949-951. +* +* Gentleman W. M. and Marovich S. B. (1974) More on algorithms +* that reveal properties of floating point arithmetic units. +* Comms. of the ACM, 17, 276-277. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL FIRST, LIEEE1, LRND + INTEGER LBETA, LT + REAL A, B, C, F, ONE, QTR, SAVEC, T1, T2 +* .. +* .. External Functions .. + REAL SLAMC3 + EXTERNAL SLAMC3 +* .. +* .. Save statement .. + SAVE FIRST, LIEEE1, LBETA, LRND, LT +* .. +* .. Data statements .. + DATA FIRST / .TRUE. / +* .. +* .. Executable Statements .. +* + IF( FIRST ) THEN + FIRST = .FALSE. + ONE = 1 +* +* LBETA, LIEEE1, LT and LRND are the local values of BETA, +* IEEE1, T and RND. +* +* Throughout this routine we use the function SLAMC3 to ensure +* that relevant values are stored and not held in registers, or +* are not affected by optimizers. +* +* Compute a = 2.0**m with the smallest positive integer m such +* that +* +* fl( a + 1.0 ) = a. +* + A = 1 + C = 1 +* +*+ WHILE( C.EQ.ONE )LOOP + 10 CONTINUE + IF( C.EQ.ONE ) THEN + A = 2*A + C = SLAMC3( A, ONE ) + C = SLAMC3( C, -A ) + GO TO 10 + END IF +*+ END WHILE +* +* Now compute b = 2.0**m with the smallest positive integer m +* such that +* +* fl( a + b ) .gt. a. +* + B = 1 + C = SLAMC3( A, B ) +* +*+ WHILE( C.EQ.A )LOOP + 20 CONTINUE + IF( C.EQ.A ) THEN + B = 2*B + C = SLAMC3( A, B ) + GO TO 20 + END IF +*+ END WHILE +* +* Now compute the base. a and c are neighbouring floating point +* numbers in the interval ( beta**t, beta**( t + 1 ) ) and so +* their difference is beta. Adding 0.25 to c is to ensure that it +* is truncated to beta and not ( beta - 1 ). +* + QTR = ONE / 4 + SAVEC = C + C = SLAMC3( C, -A ) + LBETA = C + QTR +* +* Now determine whether rounding or chopping occurs, by adding a +* bit less than beta/2 and a bit more than beta/2 to a. +* + B = LBETA + F = SLAMC3( B / 2, -B / 100 ) + C = SLAMC3( F, A ) + IF( C.EQ.A ) THEN + LRND = .TRUE. + ELSE + LRND = .FALSE. + END IF + F = SLAMC3( B / 2, B / 100 ) + C = SLAMC3( F, A ) + IF( ( LRND ) .AND. ( C.EQ.A ) ) + $ LRND = .FALSE. +* +* Try and decide whether rounding is done in the IEEE 'round to +* nearest' style. B/2 is half a unit in the last place of the two +* numbers A and SAVEC. Furthermore, A is even, i.e. has last bit +* zero, and SAVEC is odd. Thus adding B/2 to A should not change +* A, but adding B/2 to SAVEC should change SAVEC. +* + T1 = SLAMC3( B / 2, A ) + T2 = SLAMC3( B / 2, SAVEC ) + LIEEE1 = ( T1.EQ.A ) .AND. ( T2.GT.SAVEC ) .AND. LRND +* +* Now find the mantissa, t. It should be the integer part of +* log to the base beta of a, however it is safer to determine t +* by powering. So we find t as the smallest positive integer for +* which +* +* fl( beta**t + 1.0 ) = 1.0. +* + LT = 0 + A = 1 + C = 1 +* +*+ WHILE( C.EQ.ONE )LOOP + 30 CONTINUE + IF( C.EQ.ONE ) THEN + LT = LT + 1 + A = A*LBETA + C = SLAMC3( A, ONE ) + C = SLAMC3( C, -A ) + GO TO 30 + END IF +*+ END WHILE +* + END IF +* + BETA = LBETA + T = LT + RND = LRND + IEEE1 = LIEEE1 + RETURN +* +* End of SLAMC1 +* + END +* +************************************************************************ +* + SUBROUTINE SLAMC2( BETA, T, RND, EPS, EMIN, RMIN, EMAX, RMAX ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + LOGICAL RND + INTEGER BETA, EMAX, EMIN, T + REAL EPS, RMAX, RMIN +* .. +* +* Purpose +* ======= +* +* SLAMC2 determines the machine parameters specified in its argument +* list. +* +* Arguments +* ========= +* +* BETA (output) INTEGER +* The base of the machine. +* +* T (output) INTEGER +* The number of ( BETA ) digits in the mantissa. +* +* RND (output) LOGICAL +* Specifies whether proper rounding ( RND = .TRUE. ) or +* chopping ( RND = .FALSE. ) occurs in addition. This may not +* be a reliable guide to the way in which the machine performs +* its arithmetic. +* +* EPS (output) REAL +* The smallest positive number such that +* +* fl( 1.0 - EPS ) .LT. 1.0, +* +* where fl denotes the computed value. +* +* EMIN (output) INTEGER +* The minimum exponent before (gradual) underflow occurs. +* +* RMIN (output) REAL +* The smallest normalized number for the machine, given by +* BASE**( EMIN - 1 ), where BASE is the floating point value +* of BETA. +* +* EMAX (output) INTEGER +* The maximum exponent before overflow occurs. +* +* RMAX (output) REAL +* The largest positive number for the machine, given by +* BASE**EMAX * ( 1 - EPS ), where BASE is the floating point +* value of BETA. +* +* Further Details +* =============== +* +* The computation of EPS is based on a routine PARANOIA by +* W. Kahan of the University of California at Berkeley. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL FIRST, IEEE, IWARN, LIEEE1, LRND + INTEGER GNMIN, GPMIN, I, LBETA, LEMAX, LEMIN, LT, + $ NGNMIN, NGPMIN + REAL A, B, C, HALF, LEPS, LRMAX, LRMIN, ONE, RBASE, + $ SIXTH, SMALL, THIRD, TWO, ZERO +* .. +* .. External Functions .. + REAL SLAMC3 + EXTERNAL SLAMC3 +* .. +* .. External Subroutines .. + EXTERNAL SLAMC1, SLAMC4, SLAMC5 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Save statement .. + SAVE FIRST, IWARN, LBETA, LEMAX, LEMIN, LEPS, LRMAX, + $ LRMIN, LT +* .. +* .. Data statements .. + DATA FIRST / .TRUE. / , IWARN / .FALSE. / +* .. +* .. Executable Statements .. +* + IF( FIRST ) THEN + FIRST = .FALSE. + ZERO = 0 + ONE = 1 + TWO = 2 +* +* LBETA, LT, LRND, LEPS, LEMIN and LRMIN are the local values of +* BETA, T, RND, EPS, EMIN and RMIN. +* +* Throughout this routine we use the function SLAMC3 to ensure +* that relevant values are stored and not held in registers, or +* are not affected by optimizers. +* +* SLAMC1 returns the parameters LBETA, LT, LRND and LIEEE1. +* + CALL SLAMC1( LBETA, LT, LRND, LIEEE1 ) +* +* Start to find EPS. +* + B = LBETA + A = B**( -LT ) + LEPS = A +* +* Try some tricks to see whether or not this is the correct EPS. +* + B = TWO / 3 + HALF = ONE / 2 + SIXTH = SLAMC3( B, -HALF ) + THIRD = SLAMC3( SIXTH, SIXTH ) + B = SLAMC3( THIRD, -HALF ) + B = SLAMC3( B, SIXTH ) + B = ABS( B ) + IF( B.LT.LEPS ) + $ B = LEPS +* + LEPS = 1 +* +*+ WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP + 10 CONTINUE + IF( ( LEPS.GT.B ) .AND. ( B.GT.ZERO ) ) THEN + LEPS = B + C = SLAMC3( HALF*LEPS, ( TWO**5 )*( LEPS**2 ) ) + C = SLAMC3( HALF, -C ) + B = SLAMC3( HALF, C ) + C = SLAMC3( HALF, -B ) + B = SLAMC3( HALF, C ) + GO TO 10 + END IF +*+ END WHILE +* + IF( A.LT.LEPS ) + $ LEPS = A +* +* Computation of EPS complete. +* +* Now find EMIN. Let A = + or - 1, and + or - (1 + BASE**(-3)). +* Keep dividing A by BETA until (gradual) underflow occurs. This +* is detected when we cannot recover the previous A. +* + RBASE = ONE / LBETA + SMALL = ONE + DO 20 I = 1, 3 + SMALL = SLAMC3( SMALL*RBASE, ZERO ) + 20 CONTINUE + A = SLAMC3( ONE, SMALL ) + CALL SLAMC4( NGPMIN, ONE, LBETA ) + CALL SLAMC4( NGNMIN, -ONE, LBETA ) + CALL SLAMC4( GPMIN, A, LBETA ) + CALL SLAMC4( GNMIN, -A, LBETA ) + IEEE = .FALSE. +* + IF( ( NGPMIN.EQ.NGNMIN ) .AND. ( GPMIN.EQ.GNMIN ) ) THEN + IF( NGPMIN.EQ.GPMIN ) THEN + LEMIN = NGPMIN +* ( Non twos-complement machines, no gradual underflow; +* e.g., VAX ) + ELSE IF( ( GPMIN-NGPMIN ).EQ.3 ) THEN + LEMIN = NGPMIN - 1 + LT + IEEE = .TRUE. +* ( Non twos-complement machines, with gradual underflow; +* e.g., IEEE standard followers ) + ELSE + LEMIN = MIN( NGPMIN, GPMIN ) +* ( A guess; no known machine ) + IWARN = .TRUE. + END IF +* + ELSE IF( ( NGPMIN.EQ.GPMIN ) .AND. ( NGNMIN.EQ.GNMIN ) ) THEN + IF( ABS( NGPMIN-NGNMIN ).EQ.1 ) THEN + LEMIN = MAX( NGPMIN, NGNMIN ) +* ( Twos-complement machines, no gradual underflow; +* e.g., CYBER 205 ) + ELSE + LEMIN = MIN( NGPMIN, NGNMIN ) +* ( A guess; no known machine ) + IWARN = .TRUE. + END IF +* + ELSE IF( ( ABS( NGPMIN-NGNMIN ).EQ.1 ) .AND. + $ ( GPMIN.EQ.GNMIN ) ) THEN + IF( ( GPMIN-MIN( NGPMIN, NGNMIN ) ).EQ.3 ) THEN + LEMIN = MAX( NGPMIN, NGNMIN ) - 1 + LT +* ( Twos-complement machines with gradual underflow; +* no known machine ) + ELSE + LEMIN = MIN( NGPMIN, NGNMIN ) +* ( A guess; no known machine ) + IWARN = .TRUE. + END IF +* + ELSE + LEMIN = MIN( NGPMIN, NGNMIN, GPMIN, GNMIN ) +* ( A guess; no known machine ) + IWARN = .TRUE. + END IF +*** +* Comment out this if block if EMIN is ok + IF( IWARN ) THEN + FIRST = .TRUE. + WRITE( 6, FMT = 9999 )LEMIN + END IF +*** +* +* Assume IEEE arithmetic if we found denormalised numbers above, +* or if arithmetic seems to round in the IEEE style, determined +* in routine SLAMC1. A true IEEE machine should have both things +* true; however, faulty machines may have one or the other. +* + IEEE = IEEE .OR. LIEEE1 +* +* Compute RMIN by successive division by BETA. We could compute +* RMIN as BASE**( EMIN - 1 ), but some machines underflow during +* this computation. +* + LRMIN = 1 + DO 30 I = 1, 1 - LEMIN + LRMIN = SLAMC3( LRMIN*RBASE, ZERO ) + 30 CONTINUE +* +* Finally, call SLAMC5 to compute EMAX and RMAX. +* + CALL SLAMC5( LBETA, LT, LEMIN, IEEE, LEMAX, LRMAX ) + END IF +* + BETA = LBETA + T = LT + RND = LRND + EPS = LEPS + EMIN = LEMIN + RMIN = LRMIN + EMAX = LEMAX + RMAX = LRMAX +* + RETURN +* + 9999 FORMAT( / / ' WARNING. The value EMIN may be incorrect:-', + $ ' EMIN = ', I8, / + $ ' If, after inspection, the value EMIN looks', + $ ' acceptable please comment out ', + $ / ' the IF block as marked within the code of routine', + $ ' SLAMC2,', / ' otherwise supply EMIN explicitly.', / ) +* +* End of SLAMC2 +* + END +* +************************************************************************ +* + REAL FUNCTION SLAMC3( A, B ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + REAL A, B +* .. +* +* Purpose +* ======= +* +* SLAMC3 is intended to force A and B to be stored prior to doing +* the addition of A and B , for use in situations where optimizers +* might hold one of these in a register. +* +* Arguments +* ========= +* +* A, B (input) REAL +* The values A and B. +* +* ===================================================================== +* +* .. Executable Statements .. +* + SLAMC3 = A + B +* + RETURN +* +* End of SLAMC3 +* + END +* +************************************************************************ +* + SUBROUTINE SLAMC4( EMIN, START, BASE ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + INTEGER BASE, EMIN + REAL START +* .. +* +* Purpose +* ======= +* +* SLAMC4 is a service routine for SLAMC2. +* +* Arguments +* ========= +* +* EMIN (output) EMIN +* The minimum exponent before (gradual) underflow, computed by +* setting A = START and dividing by BASE until the previous A +* can not be recovered. +* +* START (input) REAL +* The starting point for determining EMIN. +* +* BASE (input) INTEGER +* The base of the machine. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I + REAL A, B1, B2, C1, C2, D1, D2, ONE, RBASE, ZERO +* .. +* .. External Functions .. + REAL SLAMC3 + EXTERNAL SLAMC3 +* .. +* .. Executable Statements .. +* + A = START + ONE = 1 + RBASE = ONE / BASE + ZERO = 0 + EMIN = 1 + B1 = SLAMC3( A*RBASE, ZERO ) + C1 = A + C2 = A + D1 = A + D2 = A +*+ WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND. +* $ ( D1.EQ.A ).AND.( D2.EQ.A ) )LOOP + 10 CONTINUE + IF( ( C1.EQ.A ) .AND. ( C2.EQ.A ) .AND. ( D1.EQ.A ) .AND. + $ ( D2.EQ.A ) ) THEN + EMIN = EMIN - 1 + A = B1 + B1 = SLAMC3( A / BASE, ZERO ) + C1 = SLAMC3( B1*BASE, ZERO ) + D1 = ZERO + DO 20 I = 1, BASE + D1 = D1 + B1 + 20 CONTINUE + B2 = SLAMC3( A*RBASE, ZERO ) + C2 = SLAMC3( B2 / RBASE, ZERO ) + D2 = ZERO + DO 30 I = 1, BASE + D2 = D2 + B2 + 30 CONTINUE + GO TO 10 + END IF +*+ END WHILE +* + RETURN +* +* End of SLAMC4 +* + END +* +************************************************************************ +* + SUBROUTINE SLAMC5( BETA, P, EMIN, IEEE, EMAX, RMAX ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + LOGICAL IEEE + INTEGER BETA, EMAX, EMIN, P + REAL RMAX +* .. +* +* Purpose +* ======= +* +* SLAMC5 attempts to compute RMAX, the largest machine floating-point +* number, without overflow. It assumes that EMAX + abs(EMIN) sum +* approximately to a power of 2. It will fail on machines where this +* assumption does not hold, for example, the Cyber 205 (EMIN = -28625, +* EMAX = 28718). It will also fail if the value supplied for EMIN is +* too large (i.e. too close to zero), probably with overflow. +* +* Arguments +* ========= +* +* BETA (input) INTEGER +* The base of floating-point arithmetic. +* +* P (input) INTEGER +* The number of base BETA digits in the mantissa of a +* floating-point value. +* +* EMIN (input) INTEGER +* The minimum exponent before (gradual) underflow. +* +* IEEE (input) LOGICAL +* A logical flag specifying whether or not the arithmetic +* system is thought to comply with the IEEE standard. +* +* EMAX (output) INTEGER +* The largest exponent before overflow +* +* RMAX (output) REAL +* The largest machine floating-point number. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + INTEGER EXBITS, EXPSUM, I, LEXP, NBITS, TRY, UEXP + REAL OLDY, RECBAS, Y, Z +* .. +* .. External Functions .. + REAL SLAMC3 + EXTERNAL SLAMC3 +* .. +* .. Intrinsic Functions .. + INTRINSIC MOD +* .. +* .. Executable Statements .. +* +* First compute LEXP and UEXP, two powers of 2 that bound +* abs(EMIN). We then assume that EMAX + abs(EMIN) will sum +* approximately to the bound that is closest to abs(EMIN). +* (EMAX is the exponent of the required number RMAX). +* + LEXP = 1 + EXBITS = 1 + 10 CONTINUE + TRY = LEXP*2 + IF( TRY.LE.( -EMIN ) ) THEN + LEXP = TRY + EXBITS = EXBITS + 1 + GO TO 10 + END IF + IF( LEXP.EQ.-EMIN ) THEN + UEXP = LEXP + ELSE + UEXP = TRY + EXBITS = EXBITS + 1 + END IF +* +* Now -LEXP is less than or equal to EMIN, and -UEXP is greater +* than or equal to EMIN. EXBITS is the number of bits needed to +* store the exponent. +* + IF( ( UEXP+EMIN ).GT.( -LEXP-EMIN ) ) THEN + EXPSUM = 2*LEXP + ELSE + EXPSUM = 2*UEXP + END IF +* +* EXPSUM is the exponent range, approximately equal to +* EMAX - EMIN + 1 . +* + EMAX = EXPSUM + EMIN - 1 + NBITS = 1 + EXBITS + P +* +* NBITS is the total number of bits needed to store a +* floating-point number. +* + IF( ( MOD( NBITS, 2 ).EQ.1 ) .AND. ( BETA.EQ.2 ) ) THEN +* +* Either there are an odd number of bits used to store a +* floating-point number, which is unlikely, or some bits are +* not used in the representation of numbers, which is possible, +* (e.g. Cray machines) or the mantissa has an implicit bit, +* (e.g. IEEE machines, Dec Vax machines), which is perhaps the +* most likely. We have to assume the last alternative. +* If this is true, then we need to reduce EMAX by one because +* there must be some way of representing zero in an implicit-bit +* system. On machines like Cray, we are reducing EMAX by one +* unnecessarily. +* + EMAX = EMAX - 1 + END IF +* + IF( IEEE ) THEN +* +* Assume we are on an IEEE machine which reserves one exponent +* for infinity and NaN. +* + EMAX = EMAX - 1 + END IF +* +* Now create RMAX, the largest machine number, which should +* be equal to (1.0 - BETA**(-P)) * BETA**EMAX . +* +* First compute 1.0 - BETA**(-P), being careful that the +* result is less than 1.0 . +* + RECBAS = ONE / BETA + Z = BETA - ONE + Y = ZERO + DO 20 I = 1, P + Z = Z*RECBAS + IF( Y.LT.ONE ) + $ OLDY = Y + Y = SLAMC3( Y, Z ) + 20 CONTINUE + IF( Y.GE.ONE ) + $ Y = OLDY +* +* Now multiply by BETA**EMAX to get RMAX. +* + DO 30 I = 1, EMAX + Y = SLAMC3( Y*BETA, ZERO ) + 30 CONTINUE +* + RMAX = Y + RETURN +* +* End of SLAMC5 +* + END diff --git a/costa/native/external/lapack/slamrg.f b/costa/native/external/lapack/slamrg.f new file mode 100644 index 000000000..402189a11 --- /dev/null +++ b/costa/native/external/lapack/slamrg.f @@ -0,0 +1,104 @@ + SUBROUTINE SLAMRG( N1, N2, A, STRD1, STRD2, INDEX ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER N1, N2, STRD1, STRD2 +* .. +* .. Array Arguments .. + INTEGER INDEX( * ) + REAL A( * ) +* .. +* +* Purpose +* ======= +* +* SLAMRG will create a permutation list which will merge the elements +* of A (which is composed of two independently sorted sets) into a +* single set which is sorted in ascending order. +* +* Arguments +* ========= +* +* N1 (input) INTEGER +* N2 (input) INTEGER +* These arguements contain the respective lengths of the two +* sorted lists to be merged. +* +* A (input) REAL array, dimension (N1+N2) +* The first N1 elements of A contain a list of numbers which +* are sorted in either ascending or descending order. Likewise +* for the final N2 elements. +* +* STRD1 (input) INTEGER +* STRD2 (input) INTEGER +* These are the strides to be taken through the array A. +* Allowable strides are 1 and -1. They indicate whether a +* subset of A is sorted in ascending (STRDx = 1) or descending +* (STRDx = -1) order. +* +* INDEX (output) INTEGER array, dimension (N1+N2) +* On exit this array will contain a permutation such that +* if B( I ) = A( INDEX( I ) ) for I=1,N1+N2, then B will be +* sorted in ascending order. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IND1, IND2, N1SV, N2SV +* .. +* .. Executable Statements .. +* + N1SV = N1 + N2SV = N2 + IF( STRD1.GT.0 ) THEN + IND1 = 1 + ELSE + IND1 = N1 + END IF + IF( STRD2.GT.0 ) THEN + IND2 = 1 + N1 + ELSE + IND2 = N1 + N2 + END IF + I = 1 +* while ( (N1SV > 0) & (N2SV > 0) ) + 10 CONTINUE + IF( N1SV.GT.0 .AND. N2SV.GT.0 ) THEN + IF( A( IND1 ).LE.A( IND2 ) ) THEN + INDEX( I ) = IND1 + I = I + 1 + IND1 = IND1 + STRD1 + N1SV = N1SV - 1 + ELSE + INDEX( I ) = IND2 + I = I + 1 + IND2 = IND2 + STRD2 + N2SV = N2SV - 1 + END IF + GO TO 10 + END IF +* end while + IF( N1SV.EQ.0 ) THEN + DO 20 N1SV = 1, N2SV + INDEX( I ) = IND2 + I = I + 1 + IND2 = IND2 + STRD2 + 20 CONTINUE + ELSE +* N2SV .EQ. 0 + DO 30 N2SV = 1, N1SV + INDEX( I ) = IND1 + I = I + 1 + IND1 = IND1 + STRD1 + 30 CONTINUE + END IF +* + RETURN +* +* End of SLAMRG +* + END diff --git a/costa/native/external/lapack/slangb.f b/costa/native/external/lapack/slangb.f new file mode 100644 index 000000000..3b1e51abb --- /dev/null +++ b/costa/native/external/lapack/slangb.f @@ -0,0 +1,155 @@ + REAL FUNCTION SLANGB( NORM, N, KL, KU, AB, LDAB, + $ WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER KL, KU, LDAB, N +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SLANGB returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of an +* n by n band matrix A, with kl sub-diagonals and ku super-diagonals. +* +* Description +* =========== +* +* SLANGB returns the value +* +* SLANGB = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in SLANGB as described +* above. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, SLANGB is +* set to zero. +* +* KL (input) INTEGER +* The number of sub-diagonals of the matrix A. KL >= 0. +* +* KU (input) INTEGER +* The number of super-diagonals of the matrix A. KU >= 0. +* +* AB (input) REAL array, dimension (LDAB,N) +* The band matrix A, stored in rows 1 to KL+KU+1. The j-th +* column of A is stored in the j-th column of the array AB as +* follows: +* AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KL+KU+1. +* +* WORK (workspace) REAL array, dimension (LWORK), +* where LWORK >= N when NORM = 'I'; otherwise, WORK is not +* referenced. +* +* ===================================================================== +* +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, K, L + REAL SCALE, SUM, VALUE +* .. +* .. External Subroutines .. + EXTERNAL SLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + DO 20 J = 1, N + DO 10 I = MAX( KU+2-J, 1 ), MIN( N+KU+1-J, KL+KU+1 ) + VALUE = MAX( VALUE, ABS( AB( I, J ) ) ) + 10 CONTINUE + 20 CONTINUE + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + DO 40 J = 1, N + SUM = ZERO + DO 30 I = MAX( KU+2-J, 1 ), MIN( N+KU+1-J, KL+KU+1 ) + SUM = SUM + ABS( AB( I, J ) ) + 30 CONTINUE + VALUE = MAX( VALUE, SUM ) + 40 CONTINUE + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + DO 50 I = 1, N + WORK( I ) = ZERO + 50 CONTINUE + DO 70 J = 1, N + K = KU + 1 - J + DO 60 I = MAX( 1, J-KU ), MIN( N, J+KL ) + WORK( I ) = WORK( I ) + ABS( AB( K+I, J ) ) + 60 CONTINUE + 70 CONTINUE + VALUE = ZERO + DO 80 I = 1, N + VALUE = MAX( VALUE, WORK( I ) ) + 80 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + DO 90 J = 1, N + L = MAX( 1, J-KU ) + K = KU + 1 - J + L + CALL SLASSQ( MIN( N, J+KL )-L+1, AB( K, J ), 1, SCALE, SUM ) + 90 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + SLANGB = VALUE + RETURN +* +* End of SLANGB +* + END diff --git a/costa/native/external/lapack/slange.f b/costa/native/external/lapack/slange.f new file mode 100644 index 000000000..beaeab919 --- /dev/null +++ b/costa/native/external/lapack/slange.f @@ -0,0 +1,145 @@ + REAL FUNCTION SLANGE( NORM, M, N, A, LDA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER LDA, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SLANGE returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* real matrix A. +* +* Description +* =========== +* +* SLANGE returns the value +* +* SLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in SLANGE as described +* above. +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. When M = 0, +* SLANGE is set to zero. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. When N = 0, +* SLANGE is set to zero. +* +* A (input) REAL array, dimension (LDA,N) +* The m by n matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(M,1). +* +* WORK (workspace) REAL array, dimension (LWORK), +* where LWORK >= M when NORM = 'I'; otherwise, WORK is not +* referenced. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL SCALE, SUM, VALUE +* .. +* .. External Subroutines .. + EXTERNAL SLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( MIN( M, N ).EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + DO 20 J = 1, N + DO 10 I = 1, M + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 10 CONTINUE + 20 CONTINUE + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + DO 40 J = 1, N + SUM = ZERO + DO 30 I = 1, M + SUM = SUM + ABS( A( I, J ) ) + 30 CONTINUE + VALUE = MAX( VALUE, SUM ) + 40 CONTINUE + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + DO 50 I = 1, M + WORK( I ) = ZERO + 50 CONTINUE + DO 70 J = 1, N + DO 60 I = 1, M + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 60 CONTINUE + 70 CONTINUE + VALUE = ZERO + DO 80 I = 1, M + VALUE = MAX( VALUE, WORK( I ) ) + 80 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + DO 90 J = 1, N + CALL SLASSQ( M, A( 1, J ), 1, SCALE, SUM ) + 90 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + SLANGE = VALUE + RETURN +* +* End of SLANGE +* + END diff --git a/costa/native/external/lapack/slangt.f b/costa/native/external/lapack/slangt.f new file mode 100644 index 000000000..7dd25cfee --- /dev/null +++ b/costa/native/external/lapack/slangt.f @@ -0,0 +1,142 @@ + REAL FUNCTION SLANGT( NORM, N, DL, D, DU ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER N +* .. +* .. Array Arguments .. + REAL D( * ), DL( * ), DU( * ) +* .. +* +* Purpose +* ======= +* +* SLANGT returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* real tridiagonal matrix A. +* +* Description +* =========== +* +* SLANGT returns the value +* +* SLANGT = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in SLANGT as described +* above. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, SLANGT is +* set to zero. +* +* DL (input) REAL array, dimension (N-1) +* The (n-1) sub-diagonal elements of A. +* +* D (input) REAL array, dimension (N) +* The diagonal elements of A. +* +* DU (input) REAL array, dimension (N-1) +* The (n-1) super-diagonal elements of A. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I + REAL ANORM, SCALE, SUM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) THEN + ANORM = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + ANORM = ABS( D( N ) ) + DO 10 I = 1, N - 1 + ANORM = MAX( ANORM, ABS( DL( I ) ) ) + ANORM = MAX( ANORM, ABS( D( I ) ) ) + ANORM = MAX( ANORM, ABS( DU( I ) ) ) + 10 CONTINUE + ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' ) THEN +* +* Find norm1(A). +* + IF( N.EQ.1 ) THEN + ANORM = ABS( D( 1 ) ) + ELSE + ANORM = MAX( ABS( D( 1 ) )+ABS( DL( 1 ) ), + $ ABS( D( N ) )+ABS( DU( N-1 ) ) ) + DO 20 I = 2, N - 1 + ANORM = MAX( ANORM, ABS( D( I ) )+ABS( DL( I ) )+ + $ ABS( DU( I-1 ) ) ) + 20 CONTINUE + END IF + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + IF( N.EQ.1 ) THEN + ANORM = ABS( D( 1 ) ) + ELSE + ANORM = MAX( ABS( D( 1 ) )+ABS( DU( 1 ) ), + $ ABS( D( N ) )+ABS( DL( N-1 ) ) ) + DO 30 I = 2, N - 1 + ANORM = MAX( ANORM, ABS( D( I ) )+ABS( DU( I ) )+ + $ ABS( DL( I-1 ) ) ) + 30 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + CALL SLASSQ( N, D, 1, SCALE, SUM ) + IF( N.GT.1 ) THEN + CALL SLASSQ( N-1, DL, 1, SCALE, SUM ) + CALL SLASSQ( N-1, DU, 1, SCALE, SUM ) + END IF + ANORM = SCALE*SQRT( SUM ) + END IF +* + SLANGT = ANORM + RETURN +* +* End of SLANGT +* + END diff --git a/costa/native/external/lapack/slanhs.f b/costa/native/external/lapack/slanhs.f new file mode 100644 index 000000000..a8c314bbe --- /dev/null +++ b/costa/native/external/lapack/slanhs.f @@ -0,0 +1,142 @@ + REAL FUNCTION SLANHS( NORM, N, A, LDA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER LDA, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SLANHS returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* Hessenberg matrix A. +* +* Description +* =========== +* +* SLANHS returns the value +* +* SLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in SLANHS as described +* above. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, SLANHS is +* set to zero. +* +* A (input) REAL array, dimension (LDA,N) +* The n by n upper Hessenberg matrix A; the part of A below the +* first sub-diagonal is not referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(N,1). +* +* WORK (workspace) REAL array, dimension (LWORK), +* where LWORK >= N when NORM = 'I'; otherwise, WORK is not +* referenced. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL SCALE, SUM, VALUE +* .. +* .. External Subroutines .. + EXTERNAL SLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + DO 20 J = 1, N + DO 10 I = 1, MIN( N, J+1 ) + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 10 CONTINUE + 20 CONTINUE + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + DO 40 J = 1, N + SUM = ZERO + DO 30 I = 1, MIN( N, J+1 ) + SUM = SUM + ABS( A( I, J ) ) + 30 CONTINUE + VALUE = MAX( VALUE, SUM ) + 40 CONTINUE + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + DO 50 I = 1, N + WORK( I ) = ZERO + 50 CONTINUE + DO 70 J = 1, N + DO 60 I = 1, MIN( N, J+1 ) + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 60 CONTINUE + 70 CONTINUE + VALUE = ZERO + DO 80 I = 1, N + VALUE = MAX( VALUE, WORK( I ) ) + 80 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + DO 90 J = 1, N + CALL SLASSQ( MIN( N, J+1 ), A( 1, J ), 1, SCALE, SUM ) + 90 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + SLANHS = VALUE + RETURN +* +* End of SLANHS +* + END diff --git a/costa/native/external/lapack/slansb.f b/costa/native/external/lapack/slansb.f new file mode 100644 index 000000000..a0e0280c9 --- /dev/null +++ b/costa/native/external/lapack/slansb.f @@ -0,0 +1,187 @@ + REAL FUNCTION SLANSB( NORM, UPLO, N, K, AB, LDAB, + $ WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM, UPLO + INTEGER K, LDAB, N +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SLANSB returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of an +* n by n symmetric band matrix A, with k super-diagonals. +* +* Description +* =========== +* +* SLANSB returns the value +* +* SLANSB = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in SLANSB as described +* above. +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* band matrix A is supplied. +* = 'U': Upper triangular part is supplied +* = 'L': Lower triangular part is supplied +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, SLANSB is +* set to zero. +* +* K (input) INTEGER +* The number of super-diagonals or sub-diagonals of the +* band matrix A. K >= 0. +* +* AB (input) REAL array, dimension (LDAB,N) +* The upper or lower triangle of the symmetric band matrix A, +* stored in the first K+1 rows of AB. The j-th column of A is +* stored in the j-th column of the array AB as follows: +* if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+k). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= K+1. +* +* WORK (workspace) REAL array, dimension (LWORK), +* where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +* WORK is not referenced. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, L + REAL ABSA, SCALE, SUM, VALUE +* .. +* .. External Subroutines .. + EXTERNAL SLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = MAX( K+2-J, 1 ), K + 1 + VALUE = MAX( VALUE, ABS( AB( I, J ) ) ) + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = 1, MIN( N+1-J, K+1 ) + VALUE = MAX( VALUE, ABS( AB( I, J ) ) ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is symmetric). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + SUM = ZERO + L = K + 1 - J + DO 50 I = MAX( 1, J-K ), J - 1 + ABSA = ABS( AB( L+I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 50 CONTINUE + WORK( J ) = SUM + ABS( AB( K+1, J ) ) + 60 CONTINUE + DO 70 I = 1, N + VALUE = MAX( VALUE, WORK( I ) ) + 70 CONTINUE + ELSE + DO 80 I = 1, N + WORK( I ) = ZERO + 80 CONTINUE + DO 100 J = 1, N + SUM = WORK( J ) + ABS( AB( 1, J ) ) + L = 1 - J + DO 90 I = J + 1, MIN( N, J+K ) + ABSA = ABS( AB( L+I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 90 CONTINUE + VALUE = MAX( VALUE, SUM ) + 100 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + IF( K.GT.0 ) THEN + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 2, N + CALL SLASSQ( MIN( J-1, K ), AB( MAX( K+2-J, 1 ), J ), + $ 1, SCALE, SUM ) + 110 CONTINUE + L = K + 1 + ELSE + DO 120 J = 1, N - 1 + CALL SLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE, + $ SUM ) + 120 CONTINUE + L = 1 + END IF + SUM = 2*SUM + ELSE + L = 1 + END IF + CALL SLASSQ( N, AB( L, 1 ), LDAB, SCALE, SUM ) + VALUE = SCALE*SQRT( SUM ) + END IF +* + SLANSB = VALUE + RETURN +* +* End of SLANSB +* + END diff --git a/costa/native/external/lapack/slansp.f b/costa/native/external/lapack/slansp.f new file mode 100644 index 000000000..263728cfb --- /dev/null +++ b/costa/native/external/lapack/slansp.f @@ -0,0 +1,197 @@ + REAL FUNCTION SLANSP( NORM, UPLO, N, AP, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM, UPLO + INTEGER N +* .. +* .. Array Arguments .. + REAL AP( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SLANSP returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* real symmetric matrix A, supplied in packed form. +* +* Description +* =========== +* +* SLANSP returns the value +* +* SLANSP = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in SLANSP as described +* above. +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is supplied. +* = 'U': Upper triangular part of A is supplied +* = 'L': Lower triangular part of A is supplied +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, SLANSP is +* set to zero. +* +* AP (input) REAL array, dimension (N*(N+1)/2) +* The upper or lower triangle of the symmetric matrix A, packed +* columnwise in a linear array. The j-th column of A is stored +* in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* +* WORK (workspace) REAL array, dimension (LWORK), +* where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +* WORK is not referenced. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, K + REAL ABSA, SCALE, SUM, VALUE +* .. +* .. External Subroutines .. + EXTERNAL SLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + K = 1 + DO 20 J = 1, N + DO 10 I = K, K + J - 1 + VALUE = MAX( VALUE, ABS( AP( I ) ) ) + 10 CONTINUE + K = K + J + 20 CONTINUE + ELSE + K = 1 + DO 40 J = 1, N + DO 30 I = K, K + N - J + VALUE = MAX( VALUE, ABS( AP( I ) ) ) + 30 CONTINUE + K = K + N - J + 1 + 40 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is symmetric). +* + VALUE = ZERO + K = 1 + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + SUM = ZERO + DO 50 I = 1, J - 1 + ABSA = ABS( AP( K ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + K = K + 1 + 50 CONTINUE + WORK( J ) = SUM + ABS( AP( K ) ) + K = K + 1 + 60 CONTINUE + DO 70 I = 1, N + VALUE = MAX( VALUE, WORK( I ) ) + 70 CONTINUE + ELSE + DO 80 I = 1, N + WORK( I ) = ZERO + 80 CONTINUE + DO 100 J = 1, N + SUM = WORK( J ) + ABS( AP( K ) ) + K = K + 1 + DO 90 I = J + 1, N + ABSA = ABS( AP( K ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + K = K + 1 + 90 CONTINUE + VALUE = MAX( VALUE, SUM ) + 100 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + K = 2 + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 2, N + CALL SLASSQ( J-1, AP( K ), 1, SCALE, SUM ) + K = K + J + 110 CONTINUE + ELSE + DO 120 J = 1, N - 1 + CALL SLASSQ( N-J, AP( K ), 1, SCALE, SUM ) + K = K + N - J + 1 + 120 CONTINUE + END IF + SUM = 2*SUM + K = 1 + DO 130 I = 1, N + IF( AP( K ).NE.ZERO ) THEN + ABSA = ABS( AP( K ) ) + IF( SCALE.LT.ABSA ) THEN + SUM = ONE + SUM*( SCALE / ABSA )**2 + SCALE = ABSA + ELSE + SUM = SUM + ( ABSA / SCALE )**2 + END IF + END IF + IF( LSAME( UPLO, 'U' ) ) THEN + K = K + I + 1 + ELSE + K = K + N - I + 1 + END IF + 130 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + SLANSP = VALUE + RETURN +* +* End of SLANSP +* + END diff --git a/costa/native/external/lapack/slanst.f b/costa/native/external/lapack/slanst.f new file mode 100644 index 000000000..71aa0b1d9 --- /dev/null +++ b/costa/native/external/lapack/slanst.f @@ -0,0 +1,125 @@ + REAL FUNCTION SLANST( NORM, N, D, E ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER N +* .. +* .. Array Arguments .. + REAL D( * ), E( * ) +* .. +* +* Purpose +* ======= +* +* SLANST returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* real symmetric tridiagonal matrix A. +* +* Description +* =========== +* +* SLANST returns the value +* +* SLANST = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in SLANST as described +* above. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, SLANST is +* set to zero. +* +* D (input) REAL array, dimension (N) +* The diagonal elements of A. +* +* E (input) REAL array, dimension (N-1) +* The (n-1) sub-diagonal or super-diagonal elements of A. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I + REAL ANORM, SCALE, SUM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) THEN + ANORM = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + ANORM = ABS( D( N ) ) + DO 10 I = 1, N - 1 + ANORM = MAX( ANORM, ABS( D( I ) ) ) + ANORM = MAX( ANORM, ABS( E( I ) ) ) + 10 CONTINUE + ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' .OR. + $ LSAME( NORM, 'I' ) ) THEN +* +* Find norm1(A). +* + IF( N.EQ.1 ) THEN + ANORM = ABS( D( 1 ) ) + ELSE + ANORM = MAX( ABS( D( 1 ) )+ABS( E( 1 ) ), + $ ABS( E( N-1 ) )+ABS( D( N ) ) ) + DO 20 I = 2, N - 1 + ANORM = MAX( ANORM, ABS( D( I ) )+ABS( E( I ) )+ + $ ABS( E( I-1 ) ) ) + 20 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + IF( N.GT.1 ) THEN + CALL SLASSQ( N-1, E, 1, SCALE, SUM ) + SUM = 2*SUM + END IF + CALL SLASSQ( N, D, 1, SCALE, SUM ) + ANORM = SCALE*SQRT( SUM ) + END IF +* + SLANST = ANORM + RETURN +* +* End of SLANST +* + END diff --git a/costa/native/external/lapack/slansy.f b/costa/native/external/lapack/slansy.f new file mode 100644 index 000000000..fb922ea17 --- /dev/null +++ b/costa/native/external/lapack/slansy.f @@ -0,0 +1,174 @@ + REAL FUNCTION SLANSY( NORM, UPLO, N, A, LDA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM, UPLO + INTEGER LDA, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SLANSY returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* real symmetric matrix A. +* +* Description +* =========== +* +* SLANSY returns the value +* +* SLANSY = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in SLANSY as described +* above. +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is to be referenced. +* = 'U': Upper triangular part of A is referenced +* = 'L': Lower triangular part of A is referenced +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, SLANSY is +* set to zero. +* +* A (input) REAL array, dimension (LDA,N) +* The symmetric matrix A. If UPLO = 'U', the leading n by n +* upper triangular part of A contains the upper triangular part +* of the matrix A, and the strictly lower triangular part of A +* is not referenced. If UPLO = 'L', the leading n by n lower +* triangular part of A contains the lower triangular part of +* the matrix A, and the strictly upper triangular part of A is +* not referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(N,1). +* +* WORK (workspace) REAL array, dimension (LWORK), +* where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +* WORK is not referenced. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL ABSA, SCALE, SUM, VALUE +* .. +* .. External Subroutines .. + EXTERNAL SLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, J + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = J, N + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is symmetric). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + SUM = ZERO + DO 50 I = 1, J - 1 + ABSA = ABS( A( I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 50 CONTINUE + WORK( J ) = SUM + ABS( A( J, J ) ) + 60 CONTINUE + DO 70 I = 1, N + VALUE = MAX( VALUE, WORK( I ) ) + 70 CONTINUE + ELSE + DO 80 I = 1, N + WORK( I ) = ZERO + 80 CONTINUE + DO 100 J = 1, N + SUM = WORK( J ) + ABS( A( J, J ) ) + DO 90 I = J + 1, N + ABSA = ABS( A( I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 90 CONTINUE + VALUE = MAX( VALUE, SUM ) + 100 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 2, N + CALL SLASSQ( J-1, A( 1, J ), 1, SCALE, SUM ) + 110 CONTINUE + ELSE + DO 120 J = 1, N - 1 + CALL SLASSQ( N-J, A( J+1, J ), 1, SCALE, SUM ) + 120 CONTINUE + END IF + SUM = 2*SUM + CALL SLASSQ( N, A, LDA+1, SCALE, SUM ) + VALUE = SCALE*SQRT( SUM ) + END IF +* + SLANSY = VALUE + RETURN +* +* End of SLANSY +* + END diff --git a/costa/native/external/lapack/slantb.f b/costa/native/external/lapack/slantb.f new file mode 100644 index 000000000..3b01cf0ea --- /dev/null +++ b/costa/native/external/lapack/slantb.f @@ -0,0 +1,285 @@ + REAL FUNCTION SLANTB( NORM, UPLO, DIAG, N, K, AB, + $ LDAB, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER K, LDAB, N +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SLANTB returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of an +* n by n triangular band matrix A, with ( k + 1 ) diagonals. +* +* Description +* =========== +* +* SLANTB returns the value +* +* SLANTB = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in SLANTB as described +* above. +* +* UPLO (input) CHARACTER*1 +* Specifies whether the matrix A is upper or lower triangular. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* DIAG (input) CHARACTER*1 +* Specifies whether or not the matrix A is unit triangular. +* = 'N': Non-unit triangular +* = 'U': Unit triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, SLANTB is +* set to zero. +* +* K (input) INTEGER +* The number of super-diagonals of the matrix A if UPLO = 'U', +* or the number of sub-diagonals of the matrix A if UPLO = 'L'. +* K >= 0. +* +* AB (input) REAL array, dimension (LDAB,N) +* The upper or lower triangular band matrix A, stored in the +* first k+1 rows of AB. The j-th column of A is stored +* in the j-th column of the array AB as follows: +* if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+k). +* Note that when DIAG = 'U', the elements of the array AB +* corresponding to the diagonal elements of the matrix A are +* not referenced, but are assumed to be one. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= K+1. +* +* WORK (workspace) REAL array, dimension (LWORK), +* where LWORK >= N when NORM = 'I'; otherwise, WORK is not +* referenced. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UDIAG + INTEGER I, J, L + REAL SCALE, SUM, VALUE +* .. +* .. External Subroutines .. + EXTERNAL SLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + IF( LSAME( DIAG, 'U' ) ) THEN + VALUE = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = MAX( K+2-J, 1 ), K + VALUE = MAX( VALUE, ABS( AB( I, J ) ) ) + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = 2, MIN( N+1-J, K+1 ) + VALUE = MAX( VALUE, ABS( AB( I, J ) ) ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + DO 50 I = MAX( K+2-J, 1 ), K + 1 + VALUE = MAX( VALUE, ABS( AB( I, J ) ) ) + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80 J = 1, N + DO 70 I = 1, MIN( N+1-J, K+1 ) + VALUE = MAX( VALUE, ABS( AB( I, J ) ) ) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + UDIAG = LSAME( DIAG, 'U' ) + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 90 I = MAX( K+2-J, 1 ), K + SUM = SUM + ABS( AB( I, J ) ) + 90 CONTINUE + ELSE + SUM = ZERO + DO 100 I = MAX( K+2-J, 1 ), K + 1 + SUM = SUM + ABS( AB( I, J ) ) + 100 CONTINUE + END IF + VALUE = MAX( VALUE, SUM ) + 110 CONTINUE + ELSE + DO 140 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 120 I = 2, MIN( N+1-J, K+1 ) + SUM = SUM + ABS( AB( I, J ) ) + 120 CONTINUE + ELSE + SUM = ZERO + DO 130 I = 1, MIN( N+1-J, K+1 ) + SUM = SUM + ABS( AB( I, J ) ) + 130 CONTINUE + END IF + VALUE = MAX( VALUE, SUM ) + 140 CONTINUE + END IF + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + DO 150 I = 1, N + WORK( I ) = ONE + 150 CONTINUE + DO 170 J = 1, N + L = K + 1 - J + DO 160 I = MAX( 1, J-K ), J - 1 + WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) ) + 160 CONTINUE + 170 CONTINUE + ELSE + DO 180 I = 1, N + WORK( I ) = ZERO + 180 CONTINUE + DO 200 J = 1, N + L = K + 1 - J + DO 190 I = MAX( 1, J-K ), J + WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) ) + 190 CONTINUE + 200 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + DO 210 I = 1, N + WORK( I ) = ONE + 210 CONTINUE + DO 230 J = 1, N + L = 1 - J + DO 220 I = J + 1, MIN( N, J+K ) + WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) ) + 220 CONTINUE + 230 CONTINUE + ELSE + DO 240 I = 1, N + WORK( I ) = ZERO + 240 CONTINUE + DO 260 J = 1, N + L = 1 - J + DO 250 I = J, MIN( N, J+K ) + WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) ) + 250 CONTINUE + 260 CONTINUE + END IF + END IF + DO 270 I = 1, N + VALUE = MAX( VALUE, WORK( I ) ) + 270 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = N + IF( K.GT.0 ) THEN + DO 280 J = 2, N + CALL SLASSQ( MIN( J-1, K ), + $ AB( MAX( K+2-J, 1 ), J ), 1, SCALE, + $ SUM ) + 280 CONTINUE + END IF + ELSE + SCALE = ZERO + SUM = ONE + DO 290 J = 1, N + CALL SLASSQ( MIN( J, K+1 ), AB( MAX( K+2-J, 1 ), J ), + $ 1, SCALE, SUM ) + 290 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = N + IF( K.GT.0 ) THEN + DO 300 J = 1, N - 1 + CALL SLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE, + $ SUM ) + 300 CONTINUE + END IF + ELSE + SCALE = ZERO + SUM = ONE + DO 310 J = 1, N + CALL SLASSQ( MIN( N-J+1, K+1 ), AB( 1, J ), 1, SCALE, + $ SUM ) + 310 CONTINUE + END IF + END IF + VALUE = SCALE*SQRT( SUM ) + END IF +* + SLANTB = VALUE + RETURN +* +* End of SLANTB +* + END diff --git a/costa/native/external/lapack/slantp.f b/costa/native/external/lapack/slantp.f new file mode 100644 index 000000000..d034508d5 --- /dev/null +++ b/costa/native/external/lapack/slantp.f @@ -0,0 +1,286 @@ + REAL FUNCTION SLANTP( NORM, UPLO, DIAG, N, AP, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER N +* .. +* .. Array Arguments .. + REAL AP( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SLANTP returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* triangular matrix A, supplied in packed form. +* +* Description +* =========== +* +* SLANTP returns the value +* +* SLANTP = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in SLANTP as described +* above. +* +* UPLO (input) CHARACTER*1 +* Specifies whether the matrix A is upper or lower triangular. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* DIAG (input) CHARACTER*1 +* Specifies whether or not the matrix A is unit triangular. +* = 'N': Non-unit triangular +* = 'U': Unit triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, SLANTP is +* set to zero. +* +* AP (input) REAL array, dimension (N*(N+1)/2) +* The upper or lower triangular matrix A, packed columnwise in +* a linear array. The j-th column of A is stored in the array +* AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* Note that when DIAG = 'U', the elements of the array AP +* corresponding to the diagonal elements of the matrix A are +* not referenced, but are assumed to be one. +* +* WORK (workspace) REAL array, dimension (LWORK), +* where LWORK >= N when NORM = 'I'; otherwise, WORK is not +* referenced. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UDIAG + INTEGER I, J, K + REAL SCALE, SUM, VALUE +* .. +* .. External Subroutines .. + EXTERNAL SLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + K = 1 + IF( LSAME( DIAG, 'U' ) ) THEN + VALUE = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = K, K + J - 2 + VALUE = MAX( VALUE, ABS( AP( I ) ) ) + 10 CONTINUE + K = K + J + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = K + 1, K + N - J + VALUE = MAX( VALUE, ABS( AP( I ) ) ) + 30 CONTINUE + K = K + N - J + 1 + 40 CONTINUE + END IF + ELSE + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + DO 50 I = K, K + J - 1 + VALUE = MAX( VALUE, ABS( AP( I ) ) ) + 50 CONTINUE + K = K + J + 60 CONTINUE + ELSE + DO 80 J = 1, N + DO 70 I = K, K + N - J + VALUE = MAX( VALUE, ABS( AP( I ) ) ) + 70 CONTINUE + K = K + N - J + 1 + 80 CONTINUE + END IF + END IF + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + K = 1 + UDIAG = LSAME( DIAG, 'U' ) + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 90 I = K, K + J - 2 + SUM = SUM + ABS( AP( I ) ) + 90 CONTINUE + ELSE + SUM = ZERO + DO 100 I = K, K + J - 1 + SUM = SUM + ABS( AP( I ) ) + 100 CONTINUE + END IF + K = K + J + VALUE = MAX( VALUE, SUM ) + 110 CONTINUE + ELSE + DO 140 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 120 I = K + 1, K + N - J + SUM = SUM + ABS( AP( I ) ) + 120 CONTINUE + ELSE + SUM = ZERO + DO 130 I = K, K + N - J + SUM = SUM + ABS( AP( I ) ) + 130 CONTINUE + END IF + K = K + N - J + 1 + VALUE = MAX( VALUE, SUM ) + 140 CONTINUE + END IF + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + K = 1 + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + DO 150 I = 1, N + WORK( I ) = ONE + 150 CONTINUE + DO 170 J = 1, N + DO 160 I = 1, J - 1 + WORK( I ) = WORK( I ) + ABS( AP( K ) ) + K = K + 1 + 160 CONTINUE + K = K + 1 + 170 CONTINUE + ELSE + DO 180 I = 1, N + WORK( I ) = ZERO + 180 CONTINUE + DO 200 J = 1, N + DO 190 I = 1, J + WORK( I ) = WORK( I ) + ABS( AP( K ) ) + K = K + 1 + 190 CONTINUE + 200 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + DO 210 I = 1, N + WORK( I ) = ONE + 210 CONTINUE + DO 230 J = 1, N + K = K + 1 + DO 220 I = J + 1, N + WORK( I ) = WORK( I ) + ABS( AP( K ) ) + K = K + 1 + 220 CONTINUE + 230 CONTINUE + ELSE + DO 240 I = 1, N + WORK( I ) = ZERO + 240 CONTINUE + DO 260 J = 1, N + DO 250 I = J, N + WORK( I ) = WORK( I ) + ABS( AP( K ) ) + K = K + 1 + 250 CONTINUE + 260 CONTINUE + END IF + END IF + VALUE = ZERO + DO 270 I = 1, N + VALUE = MAX( VALUE, WORK( I ) ) + 270 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = N + K = 2 + DO 280 J = 2, N + CALL SLASSQ( J-1, AP( K ), 1, SCALE, SUM ) + K = K + J + 280 CONTINUE + ELSE + SCALE = ZERO + SUM = ONE + K = 1 + DO 290 J = 1, N + CALL SLASSQ( J, AP( K ), 1, SCALE, SUM ) + K = K + J + 290 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = N + K = 2 + DO 300 J = 1, N - 1 + CALL SLASSQ( N-J, AP( K ), 1, SCALE, SUM ) + K = K + N - J + 1 + 300 CONTINUE + ELSE + SCALE = ZERO + SUM = ONE + K = 1 + DO 310 J = 1, N + CALL SLASSQ( N-J+1, AP( K ), 1, SCALE, SUM ) + K = K + N - J + 1 + 310 CONTINUE + END IF + END IF + VALUE = SCALE*SQRT( SUM ) + END IF +* + SLANTP = VALUE + RETURN +* +* End of SLANTP +* + END diff --git a/costa/native/external/lapack/slantr.f b/costa/native/external/lapack/slantr.f new file mode 100644 index 000000000..89e6afd34 --- /dev/null +++ b/costa/native/external/lapack/slantr.f @@ -0,0 +1,277 @@ + REAL FUNCTION SLANTR( NORM, UPLO, DIAG, M, N, A, LDA, + $ WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER LDA, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SLANTR returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* trapezoidal or triangular matrix A. +* +* Description +* =========== +* +* SLANTR returns the value +* +* SLANTR = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in SLANTR as described +* above. +* +* UPLO (input) CHARACTER*1 +* Specifies whether the matrix A is upper or lower trapezoidal. +* = 'U': Upper trapezoidal +* = 'L': Lower trapezoidal +* Note that A is triangular instead of trapezoidal if M = N. +* +* DIAG (input) CHARACTER*1 +* Specifies whether or not the matrix A has unit diagonal. +* = 'N': Non-unit diagonal +* = 'U': Unit diagonal +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0, and if +* UPLO = 'U', M <= N. When M = 0, SLANTR is set to zero. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0, and if +* UPLO = 'L', N <= M. When N = 0, SLANTR is set to zero. +* +* A (input) REAL array, dimension (LDA,N) +* The trapezoidal matrix A (A is triangular if M = N). +* If UPLO = 'U', the leading m by n upper trapezoidal part of +* the array A contains the upper trapezoidal matrix, and the +* strictly lower triangular part of A is not referenced. +* If UPLO = 'L', the leading m by n lower trapezoidal part of +* the array A contains the lower trapezoidal matrix, and the +* strictly upper triangular part of A is not referenced. Note +* that when DIAG = 'U', the diagonal elements of A are not +* referenced and are assumed to be one. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(M,1). +* +* WORK (workspace) REAL array, dimension (LWORK), +* where LWORK >= M when NORM = 'I'; otherwise, WORK is not +* referenced. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UDIAG + INTEGER I, J + REAL SCALE, SUM, VALUE +* .. +* .. External Subroutines .. + EXTERNAL SLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( MIN( M, N ).EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + IF( LSAME( DIAG, 'U' ) ) THEN + VALUE = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, MIN( M, J-1 ) + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = J + 1, M + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + DO 50 I = 1, MIN( M, J ) + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80 J = 1, N + DO 70 I = J, M + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + UDIAG = LSAME( DIAG, 'U' ) + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 1, N + IF( ( UDIAG ) .AND. ( J.LE.M ) ) THEN + SUM = ONE + DO 90 I = 1, J - 1 + SUM = SUM + ABS( A( I, J ) ) + 90 CONTINUE + ELSE + SUM = ZERO + DO 100 I = 1, MIN( M, J ) + SUM = SUM + ABS( A( I, J ) ) + 100 CONTINUE + END IF + VALUE = MAX( VALUE, SUM ) + 110 CONTINUE + ELSE + DO 140 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 120 I = J + 1, M + SUM = SUM + ABS( A( I, J ) ) + 120 CONTINUE + ELSE + SUM = ZERO + DO 130 I = J, M + SUM = SUM + ABS( A( I, J ) ) + 130 CONTINUE + END IF + VALUE = MAX( VALUE, SUM ) + 140 CONTINUE + END IF + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + DO 150 I = 1, M + WORK( I ) = ONE + 150 CONTINUE + DO 170 J = 1, N + DO 160 I = 1, MIN( M, J-1 ) + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 160 CONTINUE + 170 CONTINUE + ELSE + DO 180 I = 1, M + WORK( I ) = ZERO + 180 CONTINUE + DO 200 J = 1, N + DO 190 I = 1, MIN( M, J ) + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 190 CONTINUE + 200 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + DO 210 I = 1, N + WORK( I ) = ONE + 210 CONTINUE + DO 220 I = N + 1, M + WORK( I ) = ZERO + 220 CONTINUE + DO 240 J = 1, N + DO 230 I = J + 1, M + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 230 CONTINUE + 240 CONTINUE + ELSE + DO 250 I = 1, M + WORK( I ) = ZERO + 250 CONTINUE + DO 270 J = 1, N + DO 260 I = J, M + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 260 CONTINUE + 270 CONTINUE + END IF + END IF + VALUE = ZERO + DO 280 I = 1, M + VALUE = MAX( VALUE, WORK( I ) ) + 280 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = MIN( M, N ) + DO 290 J = 2, N + CALL SLASSQ( MIN( M, J-1 ), A( 1, J ), 1, SCALE, SUM ) + 290 CONTINUE + ELSE + SCALE = ZERO + SUM = ONE + DO 300 J = 1, N + CALL SLASSQ( MIN( M, J ), A( 1, J ), 1, SCALE, SUM ) + 300 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = MIN( M, N ) + DO 310 J = 1, N + CALL SLASSQ( M-J, A( MIN( M, J+1 ), J ), 1, SCALE, + $ SUM ) + 310 CONTINUE + ELSE + SCALE = ZERO + SUM = ONE + DO 320 J = 1, N + CALL SLASSQ( M-J+1, A( J, J ), 1, SCALE, SUM ) + 320 CONTINUE + END IF + END IF + VALUE = SCALE*SQRT( SUM ) + END IF +* + SLANTR = VALUE + RETURN +* +* End of SLANTR +* + END diff --git a/costa/native/external/lapack/slanv2.f b/costa/native/external/lapack/slanv2.f new file mode 100644 index 000000000..fdf81f56b --- /dev/null +++ b/costa/native/external/lapack/slanv2.f @@ -0,0 +1,206 @@ + SUBROUTINE SLANV2( A, B, C, D, RT1R, RT1I, RT2R, RT2I, CS, SN ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + REAL A, B, C, CS, D, RT1I, RT1R, RT2I, RT2R, SN +* .. +* +* Purpose +* ======= +* +* SLANV2 computes the Schur factorization of a real 2-by-2 nonsymmetric +* matrix in standard form: +* +* [ A B ] = [ CS -SN ] [ AA BB ] [ CS SN ] +* [ C D ] [ SN CS ] [ CC DD ] [-SN CS ] +* +* where either +* 1) CC = 0 so that AA and DD are real eigenvalues of the matrix, or +* 2) AA = DD and BB*CC < 0, so that AA + or - sqrt(BB*CC) are complex +* conjugate eigenvalues. +* +* Arguments +* ========= +* +* A (input/output) REAL +* B (input/output) REAL +* C (input/output) REAL +* D (input/output) REAL +* On entry, the elements of the input matrix. +* On exit, they are overwritten by the elements of the +* standardised Schur form. +* +* RT1R (output) REAL +* RT1I (output) REAL +* RT2R (output) REAL +* RT2I (output) REAL +* The real and imaginary parts of the eigenvalues. If the +* eigenvalues are a complex conjugate pair, RT1I > 0. +* +* CS (output) REAL +* SN (output) REAL +* Parameters of the rotation matrix. +* +* Further Details +* =============== +* +* Modified by V. Sima, Research Institute for Informatics, Bucharest, +* Romania, to reduce the risk of cancellation errors, +* when computing real eigenvalues, and to ensure, if possible, that +* abs(RT1R) >= abs(RT2R). +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0E+0, HALF = 0.5E+0, ONE = 1.0E+0 ) + REAL MULTPL + PARAMETER ( MULTPL = 4.0E+0 ) +* .. +* .. Local Scalars .. + REAL AA, BB, BCMAX, BCMIS, CC, CS1, DD, EPS, P, SAB, + $ SAC, SCALE, SIGMA, SN1, TAU, TEMP, Z +* .. +* .. External Functions .. + REAL SLAMCH, SLAPY2 + EXTERNAL SLAMCH, SLAPY2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SIGN, SQRT +* .. +* .. Executable Statements .. +* + EPS = SLAMCH( 'P' ) + IF( C.EQ.ZERO ) THEN + CS = ONE + SN = ZERO + GO TO 10 +* + ELSE IF( B.EQ.ZERO ) THEN +* +* Swap rows and columns +* + CS = ZERO + SN = ONE + TEMP = D + D = A + A = TEMP + B = -C + C = ZERO + GO TO 10 + ELSE IF( (A-D).EQ.ZERO .AND. SIGN( ONE, B ).NE. + $ SIGN( ONE, C ) ) THEN + CS = ONE + SN = ZERO + GO TO 10 + ELSE +* + TEMP = A - D + P = HALF*TEMP + BCMAX = MAX( ABS( B ), ABS( C ) ) + BCMIS = MIN( ABS( B ), ABS( C ) )*SIGN( ONE, B )*SIGN( ONE, C ) + SCALE = MAX( ABS( P ), BCMAX ) + Z = ( P / SCALE )*P + ( BCMAX / SCALE )*BCMIS +* +* If Z is of the order of the machine accuracy, postpone the +* decision on the nature of eigenvalues +* + IF( Z.GE.MULTPL*EPS ) THEN +* +* Real eigenvalues. Compute A and D. +* + Z = P + SIGN( SQRT( SCALE )*SQRT( Z ), P ) + A = D + Z + D = D - ( BCMAX / Z )*BCMIS +* +* Compute B and the rotation matrix +* + TAU = SLAPY2( C, Z ) + CS = Z / TAU + SN = C / TAU + B = B - C + C = ZERO + ELSE +* +* Complex eigenvalues, or real (almost) equal eigenvalues. +* Make diagonal elements equal. +* + SIGMA = B + C + TAU = SLAPY2( SIGMA, TEMP ) + CS = SQRT( HALF*( ONE+ABS( SIGMA ) / TAU ) ) + SN = -( P / ( TAU*CS ) )*SIGN( ONE, SIGMA ) +* +* Compute [ AA BB ] = [ A B ] [ CS -SN ] +* [ CC DD ] [ C D ] [ SN CS ] +* + AA = A*CS + B*SN + BB = -A*SN + B*CS + CC = C*CS + D*SN + DD = -C*SN + D*CS +* +* Compute [ A B ] = [ CS SN ] [ AA BB ] +* [ C D ] [-SN CS ] [ CC DD ] +* + A = AA*CS + CC*SN + B = BB*CS + DD*SN + C = -AA*SN + CC*CS + D = -BB*SN + DD*CS +* + TEMP = HALF*( A+D ) + A = TEMP + D = TEMP +* + IF( C.NE.ZERO ) THEN + IF( B.NE.ZERO ) THEN + IF( SIGN( ONE, B ).EQ.SIGN( ONE, C ) ) THEN +* +* Real eigenvalues: reduce to upper triangular form +* + SAB = SQRT( ABS( B ) ) + SAC = SQRT( ABS( C ) ) + P = SIGN( SAB*SAC, C ) + TAU = ONE / SQRT( ABS( B+C ) ) + A = TEMP + P + D = TEMP - P + B = B - C + C = ZERO + CS1 = SAB*TAU + SN1 = SAC*TAU + TEMP = CS*CS1 - SN*SN1 + SN = CS*SN1 + SN*CS1 + CS = TEMP + END IF + ELSE + B = -C + C = ZERO + TEMP = CS + CS = -SN + SN = TEMP + END IF + END IF + END IF +* + END IF +* + 10 CONTINUE +* +* Store eigenvalues in (RT1R,RT1I) and (RT2R,RT2I). +* + RT1R = A + RT2R = D + IF( C.EQ.ZERO ) THEN + RT1I = ZERO + RT2I = ZERO + ELSE + RT1I = SQRT( ABS( B ) )*SQRT( ABS( C ) ) + RT2I = -RT1I + END IF + RETURN +* +* End of SLANV2 +* + END diff --git a/costa/native/external/lapack/slapll.f b/costa/native/external/lapack/slapll.f new file mode 100644 index 000000000..790c8473a --- /dev/null +++ b/costa/native/external/lapack/slapll.f @@ -0,0 +1,100 @@ + SUBROUTINE SLAPLL( N, X, INCX, Y, INCY, SSMIN ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + INTEGER INCX, INCY, N + REAL SSMIN +* .. +* .. Array Arguments .. + REAL X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* Given two column vectors X and Y, let +* +* A = ( X Y ). +* +* The subroutine first computes the QR factorization of A = Q*R, +* and then computes the SVD of the 2-by-2 upper triangular matrix R. +* The smaller singular value of R is returned in SSMIN, which is used +* as the measurement of the linear dependency of the vectors X and Y. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The length of the vectors X and Y. +* +* X (input/output) REAL array, +* dimension (1+(N-1)*INCX) +* On entry, X contains the N-vector X. +* On exit, X is overwritten. +* +* INCX (input) INTEGER +* The increment between successive elements of X. INCX > 0. +* +* Y (input/output) REAL array, +* dimension (1+(N-1)*INCY) +* On entry, Y contains the N-vector Y. +* On exit, Y is overwritten. +* +* INCY (input) INTEGER +* The increment between successive elements of Y. INCY > 0. +* +* SSMIN (output) REAL +* The smallest singular value of the N-by-2 matrix A = ( X Y ). +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + REAL A11, A12, A22, C, SSMAX, TAU +* .. +* .. External Functions .. + REAL SDOT + EXTERNAL SDOT +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SLARFG, SLAS2 +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.1 ) THEN + SSMIN = ZERO + RETURN + END IF +* +* Compute the QR factorization of the N-by-2 matrix ( X Y ) +* + CALL SLARFG( N, X( 1 ), X( 1+INCX ), INCX, TAU ) + A11 = X( 1 ) + X( 1 ) = ONE +* + C = -TAU*SDOT( N, X, INCX, Y, INCY ) + CALL SAXPY( N, C, X, INCX, Y, INCY ) +* + CALL SLARFG( N-1, Y( 1+INCY ), Y( 1+2*INCY ), INCY, TAU ) +* + A12 = Y( 1 ) + A22 = Y( 1+INCY ) +* +* Compute the SVD of 2-by-2 Upper triangular matrix. +* + CALL SLAS2( A11, A12, A22, SSMIN, SSMAX ) +* + RETURN +* +* End of SLAPLL +* + END diff --git a/costa/native/external/lapack/slapmt.f b/costa/native/external/lapack/slapmt.f new file mode 100644 index 000000000..13be7b2d4 --- /dev/null +++ b/costa/native/external/lapack/slapmt.f @@ -0,0 +1,135 @@ + SUBROUTINE SLAPMT( FORWRD, M, N, X, LDX, K ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + LOGICAL FORWRD + INTEGER LDX, M, N +* .. +* .. Array Arguments .. + INTEGER K( * ) + REAL X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* SLAPMT rearranges the columns of the M by N matrix X as specified +* by the permutation K(1),K(2),...,K(N) of the integers 1,...,N. +* If FORWRD = .TRUE., forward permutation: +* +* X(*,K(J)) is moved X(*,J) for J = 1,2,...,N. +* +* If FORWRD = .FALSE., backward permutation: +* +* X(*,J) is moved to X(*,K(J)) for J = 1,2,...,N. +* +* Arguments +* ========= +* +* FORWRD (input) LOGICAL +* = .TRUE., forward permutation +* = .FALSE., backward permutation +* +* M (input) INTEGER +* The number of rows of the matrix X. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix X. N >= 0. +* +* X (input/output) REAL array, dimension (LDX,N) +* On entry, the M by N matrix X. +* On exit, X contains the permuted matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X, LDX >= MAX(1,M). +* +* K (input) INTEGER array, dimension (N) +* On entry, K contains the permutation vector. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, II, J, IN + REAL TEMP +* .. +* .. Executable Statements .. +* + IF( N.LE.1 ) + $ RETURN +* + DO 10 I = 1, N + K( I ) = -K( I ) + 10 CONTINUE +* + IF( FORWRD ) THEN +* +* Forward permutation +* + DO 60 I = 1, N +* + IF( K( I ).GT.0 ) + $ GO TO 40 +* + J = I + K( J ) = -K( J ) + IN = K( J ) +* + 20 CONTINUE + IF( K( IN ).GT.0 ) + $ GO TO 40 +* + DO 30 II = 1, M + TEMP = X( II, J ) + X( II, J ) = X( II, IN ) + X( II, IN ) = TEMP + 30 CONTINUE +* + K( IN ) = -K( IN ) + J = IN + IN = K( IN ) + GO TO 20 +* + 40 CONTINUE +* + 60 CONTINUE +* + ELSE +* +* Backward permutation +* + DO 110 I = 1, N +* + IF( K( I ).GT.0 ) + $ GO TO 100 +* + K( I ) = -K( I ) + J = K( I ) + 80 CONTINUE + IF( J.EQ.I ) + $ GO TO 100 +* + DO 90 II = 1, M + TEMP = X( II, I ) + X( II, I ) = X( II, J ) + X( II, J ) = TEMP + 90 CONTINUE +* + K( J ) = -K( J ) + J = K( J ) + GO TO 80 +* + 100 CONTINUE + + 110 CONTINUE +* + END IF +* + RETURN +* +* End of SLAPMT +* + END diff --git a/costa/native/external/lapack/slapy2.f b/costa/native/external/lapack/slapy2.f new file mode 100644 index 000000000..b3f26b7e8 --- /dev/null +++ b/costa/native/external/lapack/slapy2.f @@ -0,0 +1,54 @@ + REAL FUNCTION SLAPY2( X, Y ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + REAL X, Y +* .. +* +* Purpose +* ======= +* +* SLAPY2 returns sqrt(x**2+y**2), taking care not to cause unnecessary +* overflow. +* +* Arguments +* ========= +* +* X (input) REAL +* Y (input) REAL +* X and Y specify the values x and y. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) + REAL ONE + PARAMETER ( ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + REAL W, XABS, YABS, Z +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + XABS = ABS( X ) + YABS = ABS( Y ) + W = MAX( XABS, YABS ) + Z = MIN( XABS, YABS ) + IF( Z.EQ.ZERO ) THEN + SLAPY2 = W + ELSE + SLAPY2 = W*SQRT( ONE+( Z / W )**2 ) + END IF + RETURN +* +* End of SLAPY2 +* + END diff --git a/costa/native/external/lapack/slapy3.f b/costa/native/external/lapack/slapy3.f new file mode 100644 index 000000000..06b97237f --- /dev/null +++ b/costa/native/external/lapack/slapy3.f @@ -0,0 +1,54 @@ + REAL FUNCTION SLAPY3( X, Y, Z ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + REAL X, Y, Z +* .. +* +* Purpose +* ======= +* +* SLAPY3 returns sqrt(x**2+y**2+z**2), taking care not to cause +* unnecessary overflow. +* +* Arguments +* ========= +* +* X (input) REAL +* Y (input) REAL +* Z (input) REAL +* X, Y and Z specify the values x, y and z. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) +* .. +* .. Local Scalars .. + REAL W, XABS, YABS, ZABS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* + XABS = ABS( X ) + YABS = ABS( Y ) + ZABS = ABS( Z ) + W = MAX( XABS, YABS, ZABS ) + IF( W.EQ.ZERO ) THEN + SLAPY3 = ZERO + ELSE + SLAPY3 = W*SQRT( ( XABS / W )**2+( YABS / W )**2+ + $ ( ZABS / W )**2 ) + END IF + RETURN +* +* End of SLAPY3 +* + END diff --git a/costa/native/external/lapack/slaqgb.f b/costa/native/external/lapack/slaqgb.f new file mode 100644 index 000000000..4303602ff --- /dev/null +++ b/costa/native/external/lapack/slaqgb.f @@ -0,0 +1,169 @@ + SUBROUTINE SLAQGB( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER EQUED + INTEGER KL, KU, LDAB, M, N + REAL AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ), C( * ), R( * ) +* .. +* +* Purpose +* ======= +* +* SLAQGB equilibrates a general M by N band matrix A with KL +* subdiagonals and KU superdiagonals using the row and scaling factors +* in the vectors R and C. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* AB (input/output) REAL array, dimension (LDAB,N) +* On entry, the matrix A in band storage, in rows 1 to KL+KU+1. +* The j-th column of A is stored in the j-th column of the +* array AB as follows: +* AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) +* +* On exit, the equilibrated matrix, in the same storage format +* as A. See EQUED for the form of the equilibrated matrix. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDA >= KL+KU+1. +* +* R (output) REAL array, dimension (M) +* The row scale factors for A. +* +* C (output) REAL array, dimension (N) +* The column scale factors for A. +* +* ROWCND (output) REAL +* Ratio of the smallest R(i) to the largest R(i). +* +* COLCND (output) REAL +* Ratio of the smallest C(i) to the largest C(i). +* +* AMAX (input) REAL +* Absolute value of largest matrix entry. +* +* EQUED (output) CHARACTER*1 +* Specifies the form of equilibration that was done. +* = 'N': No equilibration +* = 'R': Row equilibration, i.e., A has been premultiplied by +* diag(R). +* = 'C': Column equilibration, i.e., A has been postmultiplied +* by diag(C). +* = 'B': Both row and column equilibration, i.e., A has been +* replaced by diag(R) * A * diag(C). +* +* Internal Parameters +* =================== +* +* THRESH is a threshold value used to decide if row or column scaling +* should be done based on the ratio of the row or column scaling +* factors. If ROWCND < THRESH, row scaling is done, and if +* COLCND < THRESH, column scaling is done. +* +* LARGE and SMALL are threshold values used to decide if row scaling +* should be done based on the absolute size of the largest matrix +* element. If AMAX > LARGE or AMAX < SMALL, row scaling is done. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, THRESH + PARAMETER ( ONE = 1.0E+0, THRESH = 0.1E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL CJ, LARGE, SMALL +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( ROWCND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) + $ THEN +* +* No row scaling +* + IF( COLCND.GE.THRESH ) THEN +* +* No column scaling +* + EQUED = 'N' + ELSE +* +* Column scaling +* + DO 20 J = 1, N + CJ = C( J ) + DO 10 I = MAX( 1, J-KU ), MIN( M, J+KL ) + AB( KU+1+I-J, J ) = CJ*AB( KU+1+I-J, J ) + 10 CONTINUE + 20 CONTINUE + EQUED = 'C' + END IF + ELSE IF( COLCND.GE.THRESH ) THEN +* +* Row scaling, no column scaling +* + DO 40 J = 1, N + DO 30 I = MAX( 1, J-KU ), MIN( M, J+KL ) + AB( KU+1+I-J, J ) = R( I )*AB( KU+1+I-J, J ) + 30 CONTINUE + 40 CONTINUE + EQUED = 'R' + ELSE +* +* Row and column scaling +* + DO 60 J = 1, N + CJ = C( J ) + DO 50 I = MAX( 1, J-KU ), MIN( M, J+KL ) + AB( KU+1+I-J, J ) = CJ*R( I )*AB( KU+1+I-J, J ) + 50 CONTINUE + 60 CONTINUE + EQUED = 'B' + END IF +* + RETURN +* +* End of SLAQGB +* + END diff --git a/costa/native/external/lapack/slaqge.f b/costa/native/external/lapack/slaqge.f new file mode 100644 index 000000000..bbef3372a --- /dev/null +++ b/costa/native/external/lapack/slaqge.f @@ -0,0 +1,155 @@ + SUBROUTINE SLAQGE( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, + $ EQUED ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER EQUED + INTEGER LDA, M, N + REAL AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( * ), R( * ) +* .. +* +* Purpose +* ======= +* +* SLAQGE equilibrates a general M by N matrix A using the row and +* scaling factors in the vectors R and C. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the M by N matrix A. +* On exit, the equilibrated matrix. See EQUED for the form of +* the equilibrated matrix. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(M,1). +* +* R (input) REAL array, dimension (M) +* The row scale factors for A. +* +* C (input) REAL array, dimension (N) +* The column scale factors for A. +* +* ROWCND (input) REAL +* Ratio of the smallest R(i) to the largest R(i). +* +* COLCND (input) REAL +* Ratio of the smallest C(i) to the largest C(i). +* +* AMAX (input) REAL +* Absolute value of largest matrix entry. +* +* EQUED (output) CHARACTER*1 +* Specifies the form of equilibration that was done. +* = 'N': No equilibration +* = 'R': Row equilibration, i.e., A has been premultiplied by +* diag(R). +* = 'C': Column equilibration, i.e., A has been postmultiplied +* by diag(C). +* = 'B': Both row and column equilibration, i.e., A has been +* replaced by diag(R) * A * diag(C). +* +* Internal Parameters +* =================== +* +* THRESH is a threshold value used to decide if row or column scaling +* should be done based on the ratio of the row or column scaling +* factors. If ROWCND < THRESH, row scaling is done, and if +* COLCND < THRESH, column scaling is done. +* +* LARGE and SMALL are threshold values used to decide if row scaling +* should be done based on the absolute size of the largest matrix +* element. If AMAX > LARGE or AMAX < SMALL, row scaling is done. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, THRESH + PARAMETER ( ONE = 1.0E+0, THRESH = 0.1E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL CJ, LARGE, SMALL +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( ROWCND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) + $ THEN +* +* No row scaling +* + IF( COLCND.GE.THRESH ) THEN +* +* No column scaling +* + EQUED = 'N' + ELSE +* +* Column scaling +* + DO 20 J = 1, N + CJ = C( J ) + DO 10 I = 1, M + A( I, J ) = CJ*A( I, J ) + 10 CONTINUE + 20 CONTINUE + EQUED = 'C' + END IF + ELSE IF( COLCND.GE.THRESH ) THEN +* +* Row scaling, no column scaling +* + DO 40 J = 1, N + DO 30 I = 1, M + A( I, J ) = R( I )*A( I, J ) + 30 CONTINUE + 40 CONTINUE + EQUED = 'R' + ELSE +* +* Row and column scaling +* + DO 60 J = 1, N + CJ = C( J ) + DO 50 I = 1, M + A( I, J ) = CJ*R( I )*A( I, J ) + 50 CONTINUE + 60 CONTINUE + EQUED = 'B' + END IF +* + RETURN +* +* End of SLAQGE +* + END diff --git a/costa/native/external/lapack/slaqp2.f b/costa/native/external/lapack/slaqp2.f new file mode 100644 index 000000000..65a42779f --- /dev/null +++ b/costa/native/external/lapack/slaqp2.f @@ -0,0 +1,166 @@ + SUBROUTINE SLAQP2( M, N, OFFSET, A, LDA, JPVT, TAU, VN1, VN2, + $ WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER LDA, M, N, OFFSET +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + REAL A( LDA, * ), TAU( * ), VN1( * ), VN2( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* SLAQP2 computes a QR factorization with column pivoting of +* the block A(OFFSET+1:M,1:N). +* The block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* OFFSET (input) INTEGER +* The number of rows of the matrix A that must be pivoted +* but no factorized. OFFSET >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, the upper triangle of block A(OFFSET+1:M,1:N) is +* the triangular factor obtained; the elements in block +* A(OFFSET+1:M,1:N) below the diagonal, together with the +* array TAU, represent the orthogonal matrix Q as a product of +* elementary reflectors. Block A(1:OFFSET,1:N) has been +* accordingly pivoted, but no factorized. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* JPVT (input/output) INTEGER array, dimension (N) +* On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted +* to the front of A*P (a leading column); if JPVT(i) = 0, +* the i-th column of A is a free column. +* On exit, if JPVT(i) = k, then the i-th column of A*P +* was the k-th column of A. +* +* TAU (output) REAL array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors. +* +* VN1 (input/output) REAL array, dimension (N) +* The vector with the partial column norms. +* +* VN2 (input/output) REAL array, dimension (N) +* The vector with the exact column norms. +* +* WORK (workspace) REAL array, dimension (N) +* +* Further Details +* =============== +* +* Based on contributions by +* G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +* X. Sun, Computer Science Dept., Duke University, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, ITEMP, J, MN, OFFPI, PVT + REAL AII, TEMP, TEMP2 +* .. +* .. External Subroutines .. + EXTERNAL SLARF, SLARFG, SSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SNRM2 + EXTERNAL ISAMAX, SNRM2 +* .. +* .. Executable Statements .. +* + MN = MIN( M-OFFSET, N ) +* +* Compute factorization. +* + DO 20 I = 1, MN +* + OFFPI = OFFSET + I +* +* Determine ith pivot column and swap if necessary. +* + PVT = ( I-1 ) + ISAMAX( N-I+1, VN1( I ), 1 ) +* + IF( PVT.NE.I ) THEN + CALL SSWAP( M, A( 1, PVT ), 1, A( 1, I ), 1 ) + ITEMP = JPVT( PVT ) + JPVT( PVT ) = JPVT( I ) + JPVT( I ) = ITEMP + VN1( PVT ) = VN1( I ) + VN2( PVT ) = VN2( I ) + END IF +* +* Generate elementary reflector H(i). +* + IF( OFFPI.LT.M ) THEN + CALL SLARFG( M-OFFPI+1, A( OFFPI, I ), A( OFFPI+1, I ), 1, + $ TAU( I ) ) + ELSE + CALL SLARFG( 1, A( M, I ), A( M, I ), 1, TAU( I ) ) + END IF +* + IF( I.LT.N ) THEN +* +* Apply H(i)' to A(offset+i:m,i+1:n) from the left. +* + AII = A( OFFPI, I ) + A( OFFPI, I ) = ONE + CALL SLARF( 'Left', M-OFFPI+1, N-I, A( OFFPI, I ), 1, + $ TAU( I ), A( OFFPI, I+1 ), LDA, WORK( 1 ) ) + A( OFFPI, I ) = AII + END IF +* +* Update partial column norms. +* + DO 10 J = I + 1, N + IF( VN1( J ).NE.ZERO ) THEN + TEMP = ONE - ( ABS( A( OFFPI, J ) ) / VN1( J ) )**2 + TEMP = MAX( TEMP, ZERO ) + TEMP2 = ONE + 0.05*TEMP*( VN1( J ) / VN2( J ) )**2 + IF( TEMP2.EQ.ONE ) THEN + IF( OFFPI.LT.M ) THEN + VN1( J ) = SNRM2( M-OFFPI, A( OFFPI+1, J ), 1 ) + VN2( J ) = VN1( J ) + ELSE + VN1( J ) = ZERO + VN2( J ) = ZERO + END IF + ELSE + VN1( J ) = VN1( J )*SQRT( TEMP ) + END IF + END IF + 10 CONTINUE +* + 20 CONTINUE +* + RETURN +* +* End of SLAQP2 +* + END diff --git a/costa/native/external/lapack/slaqps.f b/costa/native/external/lapack/slaqps.f new file mode 100644 index 000000000..5b26b03cb --- /dev/null +++ b/costa/native/external/lapack/slaqps.f @@ -0,0 +1,245 @@ + SUBROUTINE SLAQPS( M, N, OFFSET, NB, KB, A, LDA, JPVT, TAU, VN1, + $ VN2, AUXV, F, LDF ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER KB, LDA, LDF, M, N, NB, OFFSET +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + REAL A( LDA, * ), AUXV( * ), F( LDF, * ), TAU( * ), + $ VN1( * ), VN2( * ) +* .. +* +* Purpose +* ======= +* +* SLAQPS computes a step of QR factorization with column pivoting +* of a real M-by-N matrix A by using Blas-3. It tries to factorize +* NB columns from A starting from the row OFFSET+1, and updates all +* of the matrix with Blas-3 xGEMM. +* +* In some cases, due to catastrophic cancellations, it cannot +* factorize NB columns. Hence, the actual number of factorized +* columns is returned in KB. +* +* Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0 +* +* OFFSET (input) INTEGER +* The number of rows of A that have been factorized in +* previous steps. +* +* NB (input) INTEGER +* The number of columns to factorize. +* +* KB (output) INTEGER +* The number of columns actually factorized. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, block A(OFFSET+1:M,1:KB) is the triangular +* factor obtained and block A(1:OFFSET,1:N) has been +* accordingly pivoted, but no factorized. +* The rest of the matrix, block A(OFFSET+1:M,KB+1:N) has +* been updated. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* JPVT (input/output) INTEGER array, dimension (N) +* JPVT(I) = K <==> Column K of the full matrix A has been +* permuted into position I in AP. +* +* TAU (output) REAL array, dimension (KB) +* The scalar factors of the elementary reflectors. +* +* VN1 (input/output) REAL array, dimension (N) +* The vector with the partial column norms. +* +* VN2 (input/output) REAL array, dimension (N) +* The vector with the exact column norms. +* +* AUXV (input/output) REAL array, dimension (NB) +* Auxiliar vector. +* +* F (input/output) REAL array, dimension (LDF,NB) +* Matrix F' = L*Y'*A. +* +* LDF (input) INTEGER +* The leading dimension of the array F. LDF >= max(1,N). +* +* Further Details +* =============== +* +* Based on contributions by +* G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +* X. Sun, Computer Science Dept., Duke University, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER ITEMP, J, K, LASTRK, LSTICC, PVT, RK + REAL AKK, TEMP, TEMP2 +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, SGEMV, SLARFG, SSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, NINT, REAL, SQRT +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SNRM2 + EXTERNAL ISAMAX, SNRM2 +* .. +* .. Executable Statements .. +* + LASTRK = MIN( M, N+OFFSET ) + LSTICC = 0 + K = 0 +* +* Beginning of while loop. +* + 10 CONTINUE + IF( ( K.LT.NB ) .AND. ( LSTICC.EQ.0 ) ) THEN + K = K + 1 + RK = OFFSET + K +* +* Determine ith pivot column and swap if necessary +* + PVT = ( K-1 ) + ISAMAX( N-K+1, VN1( K ), 1 ) + IF( PVT.NE.K ) THEN + CALL SSWAP( M, A( 1, PVT ), 1, A( 1, K ), 1 ) + CALL SSWAP( K-1, F( PVT, 1 ), LDF, F( K, 1 ), LDF ) + ITEMP = JPVT( PVT ) + JPVT( PVT ) = JPVT( K ) + JPVT( K ) = ITEMP + VN1( PVT ) = VN1( K ) + VN2( PVT ) = VN2( K ) + END IF +* +* Apply previous Householder reflectors to column K: +* A(RK:M,K) := A(RK:M,K) - A(RK:M,1:K-1)*F(K,1:K-1)'. +* + IF( K.GT.1 ) THEN + CALL SGEMV( 'No transpose', M-RK+1, K-1, -ONE, A( RK, 1 ), + $ LDA, F( K, 1 ), LDF, ONE, A( RK, K ), 1 ) + END IF +* +* Generate elementary reflector H(k). +* + IF( RK.LT.M ) THEN + CALL SLARFG( M-RK+1, A( RK, K ), A( RK+1, K ), 1, TAU( K ) ) + ELSE + CALL SLARFG( 1, A( RK, K ), A( RK, K ), 1, TAU( K ) ) + END IF +* + AKK = A( RK, K ) + A( RK, K ) = ONE +* +* Compute Kth column of F: +* +* Compute F(K+1:N,K) := tau(K)*A(RK:M,K+1:N)'*A(RK:M,K). +* + IF( K.LT.N ) THEN + CALL SGEMV( 'Transpose', M-RK+1, N-K, TAU( K ), + $ A( RK, K+1 ), LDA, A( RK, K ), 1, ZERO, + $ F( K+1, K ), 1 ) + END IF +* +* Padding F(1:K,K) with zeros. +* + DO 20 J = 1, K + F( J, K ) = ZERO + 20 CONTINUE +* +* Incremental updating of F: +* F(1:N,K) := F(1:N,K) - tau(K)*F(1:N,1:K-1)*A(RK:M,1:K-1)' +* *A(RK:M,K). +* + IF( K.GT.1 ) THEN + CALL SGEMV( 'Transpose', M-RK+1, K-1, -TAU( K ), A( RK, 1 ), + $ LDA, A( RK, K ), 1, ZERO, AUXV( 1 ), 1 ) +* + CALL SGEMV( 'No transpose', N, K-1, ONE, F( 1, 1 ), LDF, + $ AUXV( 1 ), 1, ONE, F( 1, K ), 1 ) + END IF +* +* Update the current row of A: +* A(RK,K+1:N) := A(RK,K+1:N) - A(RK,1:K)*F(K+1:N,1:K)'. +* + IF( K.LT.N ) THEN + CALL SGEMV( 'No transpose', N-K, K, -ONE, F( K+1, 1 ), LDF, + $ A( RK, 1 ), LDA, ONE, A( RK, K+1 ), LDA ) + END IF +* +* Update partial column norms. +* + IF( RK.LT.LASTRK ) THEN + DO 30 J = K + 1, N + IF( VN1( J ).NE.ZERO ) THEN + TEMP = ABS( A( RK, J ) ) / VN1( J ) + TEMP = MAX( ZERO, ( ONE+TEMP )*( ONE-TEMP ) ) + TEMP2 = ONE + 0.05*TEMP*( VN1( J ) / VN2( J ) )**2 + IF( TEMP2.EQ.ONE ) THEN + VN2( J ) = REAL( LSTICC ) + LSTICC = J + ELSE + VN1( J ) = VN1( J )*SQRT( TEMP ) + END IF + END IF + 30 CONTINUE + END IF +* + A( RK, K ) = AKK +* +* End of while loop. +* + GO TO 10 + END IF + KB = K + RK = OFFSET + KB +* +* Apply the block reflector to the rest of the matrix: +* A(OFFSET+KB+1:M,KB+1:N) := A(OFFSET+KB+1:M,KB+1:N) - +* A(OFFSET+KB+1:M,1:KB)*F(KB+1:N,1:KB)'. +* + IF( KB.LT.MIN( N, M-OFFSET ) ) THEN + CALL SGEMM( 'No transpose', 'Transpose', M-RK, N-KB, KB, -ONE, + $ A( RK+1, 1 ), LDA, F( KB+1, 1 ), LDF, ONE, + $ A( RK+1, KB+1 ), LDA ) + END IF +* +* Recomputation of difficult columns. +* + 40 CONTINUE + IF( LSTICC.GT.0 ) THEN + ITEMP = NINT( VN2( LSTICC ) ) + VN1( LSTICC ) = SNRM2( M-RK, A( RK+1, LSTICC ), 1 ) + VN2( LSTICC ) = VN1( LSTICC ) + LSTICC = ITEMP + GO TO 40 + END IF +* + RETURN +* +* End of SLAQPS +* + END diff --git a/costa/native/external/lapack/slaqsb.f b/costa/native/external/lapack/slaqsb.f new file mode 100644 index 000000000..6662c5086 --- /dev/null +++ b/costa/native/external/lapack/slaqsb.f @@ -0,0 +1,149 @@ + SUBROUTINE SLAQSB( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER EQUED, UPLO + INTEGER KD, LDAB, N + REAL AMAX, SCOND +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ), S( * ) +* .. +* +* Purpose +* ======= +* +* SLAQSB equilibrates a symmetric band matrix A using the scaling +* factors in the vector S. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is stored. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of super-diagonals of the matrix A if UPLO = 'U', +* or the number of sub-diagonals if UPLO = 'L'. KD >= 0. +* +* AB (input/output) REAL array, dimension (LDAB,N) +* On entry, the upper or lower triangle of the symmetric band +* matrix A, stored in the first KD+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* On exit, if INFO = 0, the triangular factor U or L from the +* Cholesky factorization A = U'*U or A = L*L' of the band +* matrix A, in the same storage format as A. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* S (output) REAL array, dimension (N) +* The scale factors for A. +* +* SCOND (input) REAL +* Ratio of the smallest S(i) to the largest S(i). +* +* AMAX (input) REAL +* Absolute value of largest matrix entry. +* +* EQUED (output) CHARACTER*1 +* Specifies whether or not equilibration was done. +* = 'N': No equilibration. +* = 'Y': Equilibration was done, i.e., A has been replaced by +* diag(S) * A * diag(S). +* +* Internal Parameters +* =================== +* +* THRESH is a threshold value used to decide if scaling should be done +* based on the ratio of the scaling factors. If SCOND < THRESH, +* scaling is done. +* +* LARGE and SMALL are threshold values used to decide if scaling should +* be done based on the absolute size of the largest matrix element. +* If AMAX > LARGE or AMAX < SMALL, scaling is done. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, THRESH + PARAMETER ( ONE = 1.0E+0, THRESH = 0.1E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL CJ, LARGE, SMALL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN +* +* No equilibration +* + EQUED = 'N' + ELSE +* +* Replace A by diag(S) * A * diag(S). +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Upper triangle of A is stored in band format. +* + DO 20 J = 1, N + CJ = S( J ) + DO 10 I = MAX( 1, J-KD ), J + AB( KD+1+I-J, J ) = CJ*S( I )*AB( KD+1+I-J, J ) + 10 CONTINUE + 20 CONTINUE + ELSE +* +* Lower triangle of A is stored. +* + DO 40 J = 1, N + CJ = S( J ) + DO 30 I = J, MIN( N, J+KD ) + AB( 1+I-J, J ) = CJ*S( I )*AB( 1+I-J, J ) + 30 CONTINUE + 40 CONTINUE + END IF + EQUED = 'Y' + END IF +* + RETURN +* +* End of SLAQSB +* + END diff --git a/costa/native/external/lapack/slaqsp.f b/costa/native/external/lapack/slaqsp.f new file mode 100644 index 000000000..f6d788f38 --- /dev/null +++ b/costa/native/external/lapack/slaqsp.f @@ -0,0 +1,141 @@ + SUBROUTINE SLAQSP( UPLO, N, AP, S, SCOND, AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER EQUED, UPLO + INTEGER N + REAL AMAX, SCOND +* .. +* .. Array Arguments .. + REAL AP( * ), S( * ) +* .. +* +* Purpose +* ======= +* +* SLAQSP equilibrates a symmetric matrix A using the scaling factors +* in the vector S. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is stored. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) REAL array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the symmetric matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* +* On exit, the equilibrated matrix: diag(S) * A * diag(S), in +* the same storage format as A. +* +* S (input) REAL array, dimension (N) +* The scale factors for A. +* +* SCOND (input) REAL +* Ratio of the smallest S(i) to the largest S(i). +* +* AMAX (input) REAL +* Absolute value of largest matrix entry. +* +* EQUED (output) CHARACTER*1 +* Specifies whether or not equilibration was done. +* = 'N': No equilibration. +* = 'Y': Equilibration was done, i.e., A has been replaced by +* diag(S) * A * diag(S). +* +* Internal Parameters +* =================== +* +* THRESH is a threshold value used to decide if scaling should be done +* based on the ratio of the scaling factors. If SCOND < THRESH, +* scaling is done. +* +* LARGE and SMALL are threshold values used to decide if scaling should +* be done based on the absolute size of the largest matrix element. +* If AMAX > LARGE or AMAX < SMALL, scaling is done. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, THRESH + PARAMETER ( ONE = 1.0E+0, THRESH = 0.1E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, JC + REAL CJ, LARGE, SMALL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN +* +* No equilibration +* + EQUED = 'N' + ELSE +* +* Replace A by diag(S) * A * diag(S). +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Upper triangle of A is stored. +* + JC = 1 + DO 20 J = 1, N + CJ = S( J ) + DO 10 I = 1, J + AP( JC+I-1 ) = CJ*S( I )*AP( JC+I-1 ) + 10 CONTINUE + JC = JC + J + 20 CONTINUE + ELSE +* +* Lower triangle of A is stored. +* + JC = 1 + DO 40 J = 1, N + CJ = S( J ) + DO 30 I = J, N + AP( JC+I-J ) = CJ*S( I )*AP( JC+I-J ) + 30 CONTINUE + JC = JC + N - J + 1 + 40 CONTINUE + END IF + EQUED = 'Y' + END IF +* + RETURN +* +* End of SLAQSP +* + END diff --git a/costa/native/external/lapack/slaqsy.f b/costa/native/external/lapack/slaqsy.f new file mode 100644 index 000000000..0c22da64e --- /dev/null +++ b/costa/native/external/lapack/slaqsy.f @@ -0,0 +1,142 @@ + SUBROUTINE SLAQSY( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER EQUED, UPLO + INTEGER LDA, N + REAL AMAX, SCOND +* .. +* .. Array Arguments .. + REAL A( LDA, * ), S( * ) +* .. +* +* Purpose +* ======= +* +* SLAQSY equilibrates a symmetric matrix A using the scaling factors +* in the vector S. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is stored. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the symmetric matrix A. If UPLO = 'U', the leading +* n by n upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n by n lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, if EQUED = 'Y', the equilibrated matrix: +* diag(S) * A * diag(S). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(N,1). +* +* S (input) REAL array, dimension (N) +* The scale factors for A. +* +* SCOND (input) REAL +* Ratio of the smallest S(i) to the largest S(i). +* +* AMAX (input) REAL +* Absolute value of largest matrix entry. +* +* EQUED (output) CHARACTER*1 +* Specifies whether or not equilibration was done. +* = 'N': No equilibration. +* = 'Y': Equilibration was done, i.e., A has been replaced by +* diag(S) * A * diag(S). +* +* Internal Parameters +* =================== +* +* THRESH is a threshold value used to decide if scaling should be done +* based on the ratio of the scaling factors. If SCOND < THRESH, +* scaling is done. +* +* LARGE and SMALL are threshold values used to decide if scaling should +* be done based on the absolute size of the largest matrix element. +* If AMAX > LARGE or AMAX < SMALL, scaling is done. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, THRESH + PARAMETER ( ONE = 1.0E+0, THRESH = 0.1E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL CJ, LARGE, SMALL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN +* +* No equilibration +* + EQUED = 'N' + ELSE +* +* Replace A by diag(S) * A * diag(S). +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Upper triangle of A is stored. +* + DO 20 J = 1, N + CJ = S( J ) + DO 10 I = 1, J + A( I, J ) = CJ*S( I )*A( I, J ) + 10 CONTINUE + 20 CONTINUE + ELSE +* +* Lower triangle of A is stored. +* + DO 40 J = 1, N + CJ = S( J ) + DO 30 I = J, N + A( I, J ) = CJ*S( I )*A( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + EQUED = 'Y' + END IF +* + RETURN +* +* End of SLAQSY +* + END diff --git a/costa/native/external/lapack/slaqtr.f b/costa/native/external/lapack/slaqtr.f new file mode 100644 index 000000000..1a6c1b825 --- /dev/null +++ b/costa/native/external/lapack/slaqtr.f @@ -0,0 +1,666 @@ + SUBROUTINE SLAQTR( LTRAN, LREAL, N, T, LDT, B, W, SCALE, X, WORK, + $ INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + LOGICAL LREAL, LTRAN + INTEGER INFO, LDT, N + REAL SCALE, W +* .. +* .. Array Arguments .. + REAL B( * ), T( LDT, * ), WORK( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* SLAQTR solves the real quasi-triangular system +* +* op(T)*p = scale*c, if LREAL = .TRUE. +* +* or the complex quasi-triangular systems +* +* op(T + iB)*(p+iq) = scale*(c+id), if LREAL = .FALSE. +* +* in real arithmetic, where T is upper quasi-triangular. +* If LREAL = .FALSE., then the first diagonal block of T must be +* 1 by 1, B is the specially structured matrix +* +* B = [ b(1) b(2) ... b(n) ] +* [ w ] +* [ w ] +* [ . ] +* [ w ] +* +* op(A) = A or A', A' denotes the conjugate transpose of +* matrix A. +* +* On input, X = [ c ]. On output, X = [ p ]. +* [ d ] [ q ] +* +* This subroutine is designed for the condition number estimation +* in routine STRSNA. +* +* Arguments +* ========= +* +* LTRAN (input) LOGICAL +* On entry, LTRAN specifies the option of conjugate transpose: +* = .FALSE., op(T+i*B) = T+i*B, +* = .TRUE., op(T+i*B) = (T+i*B)'. +* +* LREAL (input) LOGICAL +* On entry, LREAL specifies the input matrix structure: +* = .FALSE., the input is complex +* = .TRUE., the input is real +* +* N (input) INTEGER +* On entry, N specifies the order of T+i*B. N >= 0. +* +* T (input) REAL array, dimension (LDT,N) +* On entry, T contains a matrix in Schur canonical form. +* If LREAL = .FALSE., then the first diagonal block of T must +* be 1 by 1. +* +* LDT (input) INTEGER +* The leading dimension of the matrix T. LDT >= max(1,N). +* +* B (input) REAL array, dimension (N) +* On entry, B contains the elements to form the matrix +* B as described above. +* If LREAL = .TRUE., B is not referenced. +* +* W (input) REAL +* On entry, W is the diagonal element of the matrix B. +* If LREAL = .TRUE., W is not referenced. +* +* SCALE (output) REAL +* On exit, SCALE is the scale factor. +* +* X (input/output) REAL array, dimension (2*N) +* On entry, X contains the right hand side of the system. +* On exit, X is overwritten by the solution. +* +* WORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* On exit, INFO is set to +* 0: successful exit. +* 1: the some diagonal 1 by 1 block has been perturbed by +* a small number SMIN to keep nonsingularity. +* 2: the some diagonal 2 by 2 block has been perturbed by +* a small number in SLALN2 to keep nonsingularity. +* NOTE: In the interests of speed, this routine does not +* check the inputs for errors. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN + INTEGER I, IERR, J, J1, J2, JNEXT, K, N1, N2 + REAL BIGNUM, EPS, REC, SCALOC, SI, SMIN, SMINW, + $ SMLNUM, SR, TJJ, TMP, XJ, XMAX, XNORM, Z +* .. +* .. Local Arrays .. + REAL D( 2, 2 ), V( 2, 2 ) +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SASUM, SDOT, SLAMCH, SLANGE + EXTERNAL ISAMAX, SASUM, SDOT, SLAMCH, SLANGE +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SLADIV, SLALN2, SSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Do not test the input parameters for errors +* + NOTRAN = .NOT.LTRAN + INFO = 0 +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Set constants to control overflow +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) / EPS + BIGNUM = ONE / SMLNUM +* + XNORM = SLANGE( 'M', N, N, T, LDT, D ) + IF( .NOT.LREAL ) + $ XNORM = MAX( XNORM, ABS( W ), SLANGE( 'M', N, 1, B, N, D ) ) + SMIN = MAX( SMLNUM, EPS*XNORM ) +* +* Compute 1-norm of each column of strictly upper triangular +* part of T to control overflow in triangular solver. +* + WORK( 1 ) = ZERO + DO 10 J = 2, N + WORK( J ) = SASUM( J-1, T( 1, J ), 1 ) + 10 CONTINUE +* + IF( .NOT.LREAL ) THEN + DO 20 I = 2, N + WORK( I ) = WORK( I ) + ABS( B( I ) ) + 20 CONTINUE + END IF +* + N2 = 2*N + N1 = N + IF( .NOT.LREAL ) + $ N1 = N2 + K = ISAMAX( N1, X, 1 ) + XMAX = ABS( X( K ) ) + SCALE = ONE +* + IF( XMAX.GT.BIGNUM ) THEN + SCALE = BIGNUM / XMAX + CALL SSCAL( N1, SCALE, X, 1 ) + XMAX = BIGNUM + END IF +* + IF( LREAL ) THEN +* + IF( NOTRAN ) THEN +* +* Solve T*p = scale*c +* + JNEXT = N + DO 30 J = N, 1, -1 + IF( J.GT.JNEXT ) + $ GO TO 30 + J1 = J + J2 = J + JNEXT = J - 1 + IF( J.GT.1 ) THEN + IF( T( J, J-1 ).NE.ZERO ) THEN + J1 = J - 1 + JNEXT = J - 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* Meet 1 by 1 diagonal block +* +* Scale to avoid overflow when computing +* x(j) = b(j)/T(j,j) +* + XJ = ABS( X( J1 ) ) + TJJ = ABS( T( J1, J1 ) ) + TMP = T( J1, J1 ) + IF( TJJ.LT.SMIN ) THEN + TMP = SMIN + TJJ = SMIN + INFO = 1 + END IF +* + IF( XJ.EQ.ZERO ) + $ GO TO 30 +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.BIGNUM*TJJ ) THEN + REC = ONE / XJ + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J1 ) = X( J1 ) / TMP + XJ = ABS( X( J1 ) ) +* +* Scale x if necessary to avoid overflow when adding a +* multiple of column j1 of T. +* + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( WORK( J1 ).GT.( BIGNUM-XMAX )*REC ) THEN + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + END IF + END IF + IF( J1.GT.1 ) THEN + CALL SAXPY( J1-1, -X( J1 ), T( 1, J1 ), 1, X, 1 ) + K = ISAMAX( J1-1, X, 1 ) + XMAX = ABS( X( K ) ) + END IF +* + ELSE +* +* Meet 2 by 2 diagonal block +* +* Call 2 by 2 linear system solve, to take +* care of possible overflow by scaling factor. +* + D( 1, 1 ) = X( J1 ) + D( 2, 1 ) = X( J2 ) + CALL SLALN2( .FALSE., 2, 1, SMIN, ONE, T( J1, J1 ), + $ LDT, ONE, ONE, D, 2, ZERO, ZERO, V, 2, + $ SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 2 +* + IF( SCALOC.NE.ONE ) THEN + CALL SSCAL( N, SCALOC, X, 1 ) + SCALE = SCALE*SCALOC + END IF + X( J1 ) = V( 1, 1 ) + X( J2 ) = V( 2, 1 ) +* +* Scale V(1,1) (= X(J1)) and/or V(2,1) (=X(J2)) +* to avoid overflow in updating right-hand side. +* + XJ = MAX( ABS( V( 1, 1 ) ), ABS( V( 2, 1 ) ) ) + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( MAX( WORK( J1 ), WORK( J2 ) ).GT. + $ ( BIGNUM-XMAX )*REC ) THEN + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + END IF + END IF +* +* Update right-hand side +* + IF( J1.GT.1 ) THEN + CALL SAXPY( J1-1, -X( J1 ), T( 1, J1 ), 1, X, 1 ) + CALL SAXPY( J1-1, -X( J2 ), T( 1, J2 ), 1, X, 1 ) + K = ISAMAX( J1-1, X, 1 ) + XMAX = ABS( X( K ) ) + END IF +* + END IF +* + 30 CONTINUE +* + ELSE +* +* Solve T'*p = scale*c +* + JNEXT = 1 + DO 40 J = 1, N + IF( J.LT.JNEXT ) + $ GO TO 40 + J1 = J + J2 = J + JNEXT = J + 1 + IF( J.LT.N ) THEN + IF( T( J+1, J ).NE.ZERO ) THEN + J2 = J + 1 + JNEXT = J + 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* 1 by 1 diagonal block +* +* Scale if necessary to avoid overflow in forming the +* right-hand side element by inner product. +* + XJ = ABS( X( J1 ) ) + IF( XMAX.GT.ONE ) THEN + REC = ONE / XMAX + IF( WORK( J1 ).GT.( BIGNUM-XJ )*REC ) THEN + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + X( J1 ) = X( J1 ) - SDOT( J1-1, T( 1, J1 ), 1, X, 1 ) +* + XJ = ABS( X( J1 ) ) + TJJ = ABS( T( J1, J1 ) ) + TMP = T( J1, J1 ) + IF( TJJ.LT.SMIN ) THEN + TMP = SMIN + TJJ = SMIN + INFO = 1 + END IF +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.BIGNUM*TJJ ) THEN + REC = ONE / XJ + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J1 ) = X( J1 ) / TMP + XMAX = MAX( XMAX, ABS( X( J1 ) ) ) +* + ELSE +* +* 2 by 2 diagonal block +* +* Scale if necessary to avoid overflow in forming the +* right-hand side elements by inner product. +* + XJ = MAX( ABS( X( J1 ) ), ABS( X( J2 ) ) ) + IF( XMAX.GT.ONE ) THEN + REC = ONE / XMAX + IF( MAX( WORK( J2 ), WORK( J1 ) ).GT.( BIGNUM-XJ )* + $ REC ) THEN + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + D( 1, 1 ) = X( J1 ) - SDOT( J1-1, T( 1, J1 ), 1, X, + $ 1 ) + D( 2, 1 ) = X( J2 ) - SDOT( J1-1, T( 1, J2 ), 1, X, + $ 1 ) +* + CALL SLALN2( .TRUE., 2, 1, SMIN, ONE, T( J1, J1 ), + $ LDT, ONE, ONE, D, 2, ZERO, ZERO, V, 2, + $ SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 2 +* + IF( SCALOC.NE.ONE ) THEN + CALL SSCAL( N, SCALOC, X, 1 ) + SCALE = SCALE*SCALOC + END IF + X( J1 ) = V( 1, 1 ) + X( J2 ) = V( 2, 1 ) + XMAX = MAX( ABS( X( J1 ) ), ABS( X( J2 ) ), XMAX ) +* + END IF + 40 CONTINUE + END IF +* + ELSE +* + SMINW = MAX( EPS*ABS( W ), SMIN ) + IF( NOTRAN ) THEN +* +* Solve (T + iB)*(p+iq) = c+id +* + JNEXT = N + DO 70 J = N, 1, -1 + IF( J.GT.JNEXT ) + $ GO TO 70 + J1 = J + J2 = J + JNEXT = J - 1 + IF( J.GT.1 ) THEN + IF( T( J, J-1 ).NE.ZERO ) THEN + J1 = J - 1 + JNEXT = J - 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* 1 by 1 diagonal block +* +* Scale if necessary to avoid overflow in division +* + Z = W + IF( J1.EQ.1 ) + $ Z = B( 1 ) + XJ = ABS( X( J1 ) ) + ABS( X( N+J1 ) ) + TJJ = ABS( T( J1, J1 ) ) + ABS( Z ) + TMP = T( J1, J1 ) + IF( TJJ.LT.SMINW ) THEN + TMP = SMINW + TJJ = SMINW + INFO = 1 + END IF +* + IF( XJ.EQ.ZERO ) + $ GO TO 70 +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.BIGNUM*TJJ ) THEN + REC = ONE / XJ + CALL SSCAL( N2, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + CALL SLADIV( X( J1 ), X( N+J1 ), TMP, Z, SR, SI ) + X( J1 ) = SR + X( N+J1 ) = SI + XJ = ABS( X( J1 ) ) + ABS( X( N+J1 ) ) +* +* Scale x if necessary to avoid overflow when adding a +* multiple of column j1 of T. +* + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( WORK( J1 ).GT.( BIGNUM-XMAX )*REC ) THEN + CALL SSCAL( N2, REC, X, 1 ) + SCALE = SCALE*REC + END IF + END IF +* + IF( J1.GT.1 ) THEN + CALL SAXPY( J1-1, -X( J1 ), T( 1, J1 ), 1, X, 1 ) + CALL SAXPY( J1-1, -X( N+J1 ), T( 1, J1 ), 1, + $ X( N+1 ), 1 ) +* + X( 1 ) = X( 1 ) + B( J1 )*X( N+J1 ) + X( N+1 ) = X( N+1 ) - B( J1 )*X( J1 ) +* + XMAX = ZERO + DO 50 K = 1, J1 - 1 + XMAX = MAX( XMAX, ABS( X( K ) )+ + $ ABS( X( K+N ) ) ) + 50 CONTINUE + END IF +* + ELSE +* +* Meet 2 by 2 diagonal block +* + D( 1, 1 ) = X( J1 ) + D( 2, 1 ) = X( J2 ) + D( 1, 2 ) = X( N+J1 ) + D( 2, 2 ) = X( N+J2 ) + CALL SLALN2( .FALSE., 2, 2, SMINW, ONE, T( J1, J1 ), + $ LDT, ONE, ONE, D, 2, ZERO, -W, V, 2, + $ SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 2 +* + IF( SCALOC.NE.ONE ) THEN + CALL SSCAL( 2*N, SCALOC, X, 1 ) + SCALE = SCALOC*SCALE + END IF + X( J1 ) = V( 1, 1 ) + X( J2 ) = V( 2, 1 ) + X( N+J1 ) = V( 1, 2 ) + X( N+J2 ) = V( 2, 2 ) +* +* Scale X(J1), .... to avoid overflow in +* updating right hand side. +* + XJ = MAX( ABS( V( 1, 1 ) )+ABS( V( 1, 2 ) ), + $ ABS( V( 2, 1 ) )+ABS( V( 2, 2 ) ) ) + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( MAX( WORK( J1 ), WORK( J2 ) ).GT. + $ ( BIGNUM-XMAX )*REC ) THEN + CALL SSCAL( N2, REC, X, 1 ) + SCALE = SCALE*REC + END IF + END IF +* +* Update the right-hand side. +* + IF( J1.GT.1 ) THEN + CALL SAXPY( J1-1, -X( J1 ), T( 1, J1 ), 1, X, 1 ) + CALL SAXPY( J1-1, -X( J2 ), T( 1, J2 ), 1, X, 1 ) +* + CALL SAXPY( J1-1, -X( N+J1 ), T( 1, J1 ), 1, + $ X( N+1 ), 1 ) + CALL SAXPY( J1-1, -X( N+J2 ), T( 1, J2 ), 1, + $ X( N+1 ), 1 ) +* + X( 1 ) = X( 1 ) + B( J1 )*X( N+J1 ) + + $ B( J2 )*X( N+J2 ) + X( N+1 ) = X( N+1 ) - B( J1 )*X( J1 ) - + $ B( J2 )*X( J2 ) +* + XMAX = ZERO + DO 60 K = 1, J1 - 1 + XMAX = MAX( ABS( X( K ) )+ABS( X( K+N ) ), + $ XMAX ) + 60 CONTINUE + END IF +* + END IF + 70 CONTINUE +* + ELSE +* +* Solve (T + iB)'*(p+iq) = c+id +* + JNEXT = 1 + DO 80 J = 1, N + IF( J.LT.JNEXT ) + $ GO TO 80 + J1 = J + J2 = J + JNEXT = J + 1 + IF( J.LT.N ) THEN + IF( T( J+1, J ).NE.ZERO ) THEN + J2 = J + 1 + JNEXT = J + 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* 1 by 1 diagonal block +* +* Scale if necessary to avoid overflow in forming the +* right-hand side element by inner product. +* + XJ = ABS( X( J1 ) ) + ABS( X( J1+N ) ) + IF( XMAX.GT.ONE ) THEN + REC = ONE / XMAX + IF( WORK( J1 ).GT.( BIGNUM-XJ )*REC ) THEN + CALL SSCAL( N2, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + X( J1 ) = X( J1 ) - SDOT( J1-1, T( 1, J1 ), 1, X, 1 ) + X( N+J1 ) = X( N+J1 ) - SDOT( J1-1, T( 1, J1 ), 1, + $ X( N+1 ), 1 ) + IF( J1.GT.1 ) THEN + X( J1 ) = X( J1 ) - B( J1 )*X( N+1 ) + X( N+J1 ) = X( N+J1 ) + B( J1 )*X( 1 ) + END IF + XJ = ABS( X( J1 ) ) + ABS( X( J1+N ) ) +* + Z = W + IF( J1.EQ.1 ) + $ Z = B( 1 ) +* +* Scale if necessary to avoid overflow in +* complex division +* + TJJ = ABS( T( J1, J1 ) ) + ABS( Z ) + TMP = T( J1, J1 ) + IF( TJJ.LT.SMINW ) THEN + TMP = SMINW + TJJ = SMINW + INFO = 1 + END IF +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.BIGNUM*TJJ ) THEN + REC = ONE / XJ + CALL SSCAL( N2, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + CALL SLADIV( X( J1 ), X( N+J1 ), TMP, -Z, SR, SI ) + X( J1 ) = SR + X( J1+N ) = SI + XMAX = MAX( ABS( X( J1 ) )+ABS( X( J1+N ) ), XMAX ) +* + ELSE +* +* 2 by 2 diagonal block +* +* Scale if necessary to avoid overflow in forming the +* right-hand side element by inner product. +* + XJ = MAX( ABS( X( J1 ) )+ABS( X( N+J1 ) ), + $ ABS( X( J2 ) )+ABS( X( N+J2 ) ) ) + IF( XMAX.GT.ONE ) THEN + REC = ONE / XMAX + IF( MAX( WORK( J1 ), WORK( J2 ) ).GT. + $ ( BIGNUM-XJ ) / XMAX ) THEN + CALL SSCAL( N2, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + D( 1, 1 ) = X( J1 ) - SDOT( J1-1, T( 1, J1 ), 1, X, + $ 1 ) + D( 2, 1 ) = X( J2 ) - SDOT( J1-1, T( 1, J2 ), 1, X, + $ 1 ) + D( 1, 2 ) = X( N+J1 ) - SDOT( J1-1, T( 1, J1 ), 1, + $ X( N+1 ), 1 ) + D( 2, 2 ) = X( N+J2 ) - SDOT( J1-1, T( 1, J2 ), 1, + $ X( N+1 ), 1 ) + D( 1, 1 ) = D( 1, 1 ) - B( J1 )*X( N+1 ) + D( 2, 1 ) = D( 2, 1 ) - B( J2 )*X( N+1 ) + D( 1, 2 ) = D( 1, 2 ) + B( J1 )*X( 1 ) + D( 2, 2 ) = D( 2, 2 ) + B( J2 )*X( 1 ) +* + CALL SLALN2( .TRUE., 2, 2, SMINW, ONE, T( J1, J1 ), + $ LDT, ONE, ONE, D, 2, ZERO, W, V, 2, + $ SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 2 +* + IF( SCALOC.NE.ONE ) THEN + CALL SSCAL( N2, SCALOC, X, 1 ) + SCALE = SCALOC*SCALE + END IF + X( J1 ) = V( 1, 1 ) + X( J2 ) = V( 2, 1 ) + X( N+J1 ) = V( 1, 2 ) + X( N+J2 ) = V( 2, 2 ) + XMAX = MAX( ABS( X( J1 ) )+ABS( X( N+J1 ) ), + $ ABS( X( J2 ) )+ABS( X( N+J2 ) ), XMAX ) +* + END IF +* + 80 CONTINUE +* + END IF +* + END IF +* + RETURN +* +* End of SLAQTR +* + END diff --git a/costa/native/external/lapack/slar1v.f b/costa/native/external/lapack/slar1v.f new file mode 100644 index 000000000..9838098bd --- /dev/null +++ b/costa/native/external/lapack/slar1v.f @@ -0,0 +1,323 @@ + SUBROUTINE SLAR1V( N, B1, BN, SIGMA, D, L, LD, LLD, GERSCH, Z, + $ ZTZ, MINGMA, R, ISUPPZ, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER B1, BN, N, R + REAL MINGMA, SIGMA, ZTZ +* .. +* .. Array Arguments .. + INTEGER ISUPPZ( * ) + REAL D( * ), GERSCH( * ), L( * ), LD( * ), LLD( * ), + $ WORK( * ), Z( * ) +* .. +* +* Purpose +* ======= +* +* SLAR1V computes the (scaled) r-th column of the inverse of +* the sumbmatrix in rows B1 through BN of the tridiagonal matrix +* L D L^T - sigma I. The following steps accomplish this computation : +* (a) Stationary qd transform, L D L^T - sigma I = L(+) D(+) L(+)^T, +* (b) Progressive qd transform, L D L^T - sigma I = U(-) D(-) U(-)^T, +* (c) Computation of the diagonal elements of the inverse of +* L D L^T - sigma I by combining the above transforms, and choosing +* r as the index where the diagonal of the inverse is (one of the) +* largest in magnitude. +* (d) Computation of the (scaled) r-th column of the inverse using the +* twisted factorization obtained by combining the top part of the +* the stationary and the bottom part of the progressive transform. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix L D L^T. +* +* B1 (input) INTEGER +* First index of the submatrix of L D L^T. +* +* BN (input) INTEGER +* Last index of the submatrix of L D L^T. +* +* SIGMA (input) REAL +* The shift. Initially, when R = 0, SIGMA should be a good +* approximation to an eigenvalue of L D L^T. +* +* L (input) REAL array, dimension (N-1) +* The (n-1) subdiagonal elements of the unit bidiagonal matrix +* L, in elements 1 to N-1. +* +* D (input) REAL array, dimension (N) +* The n diagonal elements of the diagonal matrix D. +* +* LD (input) REAL array, dimension (N-1) +* The n-1 elements L(i)*D(i). +* +* LLD (input) REAL array, dimension (N-1) +* The n-1 elements L(i)*L(i)*D(i). +* +* GERSCH (input) REAL array, dimension (2*N) +* The n Gerschgorin intervals. These are used to restrict +* the initial search for R, when R is input as 0. +* +* Z (output) REAL array, dimension (N) +* The (scaled) r-th column of the inverse. Z(R) is returned +* to be 1. +* +* ZTZ (output) REAL +* The square of the norm of Z. +* +* MINGMA (output) REAL +* The reciprocal of the largest (in magnitude) diagonal +* element of the inverse of L D L^T - sigma I. +* +* R (input/output) INTEGER +* Initially, R should be input to be 0 and is then output as +* the index where the diagonal element of the inverse is +* largest in magnitude. In later iterations, this same value +* of R should be input. +* +* ISUPPZ (output) INTEGER array, dimension (2) +* The support of the vector in Z, i.e., the vector Z is +* nonzero only in elements ISUPPZ(1) through ISUPPZ( 2 ). +* +* WORK (workspace) REAL array, dimension (4*N) +* +* Further Details +* =============== +* +* Based on contributions by +* Inderjit Dhillon, IBM Almaden, USA +* Osni Marques, LBNL/NERSC, USA +* +* ===================================================================== +* +* .. Parameters .. + INTEGER BLKSIZ + PARAMETER ( BLKSIZ = 32 ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL SAWNAN + INTEGER FROM, I, INDP, INDS, INDUMN, J, R1, R2, TO + REAL DMINUS, DPLUS, EPS, S, TMP +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + EPS = SLAMCH( 'Precision' ) + IF( R.EQ.0 ) THEN +* +* Eliminate the top and bottom indices from the possible values +* of R where the desired eigenvector is largest in magnitude. +* + R1 = B1 + DO 10 I = B1, BN + IF( SIGMA.GE.GERSCH( 2*I-1 ) .OR. SIGMA.LE.GERSCH( 2*I ) ) + $ THEN + R1 = I + GO TO 20 + END IF + 10 CONTINUE + 20 CONTINUE + R2 = BN + DO 30 I = BN, B1, -1 + IF( SIGMA.GE.GERSCH( 2*I-1 ) .OR. SIGMA.LE.GERSCH( 2*I ) ) + $ THEN + R2 = I + GO TO 40 + END IF + 30 CONTINUE + 40 CONTINUE + ELSE + R1 = R + R2 = R + END IF +* + INDUMN = N + INDS = 2*N + 1 + INDP = 3*N + 1 + SAWNAN = .FALSE. +* +* Compute the stationary transform (using the differential form) +* untill the index R2 +* + IF( B1.EQ.1 ) THEN + WORK( INDS ) = ZERO + ELSE + WORK( INDS ) = LLD( B1-1 ) + END IF + S = WORK( INDS ) - SIGMA + DO 50 I = B1, R2 - 1 + DPLUS = D( I ) + S + WORK( I ) = LD( I ) / DPLUS + WORK( INDS+I ) = S*WORK( I )*L( I ) + S = WORK( INDS+I ) - SIGMA + 50 CONTINUE +* + IF( .NOT.( S.GT.ZERO .OR. S.LT.ONE ) ) THEN +* +* Run a slower version of the above loop if a NaN is detected +* + SAWNAN = .TRUE. + J = B1 + 1 + 60 CONTINUE + IF( WORK( INDS+J ).GT.ZERO .OR. WORK( INDS+J ).LT.ONE ) THEN + J = J + 1 + GO TO 60 + END IF + WORK( INDS+J ) = LLD( J ) + S = WORK( INDS+J ) - SIGMA + DO 70 I = J + 1, R2 - 1 + DPLUS = D( I ) + S + WORK( I ) = LD( I ) / DPLUS + IF( WORK( I ).EQ.ZERO ) THEN + WORK( INDS+I ) = LLD( I ) + ELSE + WORK( INDS+I ) = S*WORK( I )*L( I ) + END IF + S = WORK( INDS+I ) - SIGMA + 70 CONTINUE + END IF + WORK( INDP+BN-1 ) = D( BN ) - SIGMA + DO 80 I = BN - 1, R1, -1 + DMINUS = LLD( I ) + WORK( INDP+I ) + TMP = D( I ) / DMINUS + WORK( INDUMN+I ) = L( I )*TMP + WORK( INDP+I-1 ) = WORK( INDP+I )*TMP - SIGMA + 80 CONTINUE + TMP = WORK( INDP+R1-1 ) + IF( .NOT.( TMP.GT.ZERO .OR. TMP.LT.ONE ) ) THEN +* +* Run a slower version of the above loop if a NaN is detected +* + SAWNAN = .TRUE. + J = BN - 3 + 90 CONTINUE + IF( WORK( INDP+J ).GT.ZERO .OR. WORK( INDP+J ).LT.ONE ) THEN + J = J - 1 + GO TO 90 + END IF + WORK( INDP+J ) = D( J+1 ) - SIGMA + DO 100 I = J, R1, -1 + DMINUS = LLD( I ) + WORK( INDP+I ) + TMP = D( I ) / DMINUS + WORK( INDUMN+I ) = L( I )*TMP + IF( TMP.EQ.ZERO ) THEN + WORK( INDP+I-1 ) = D( I ) - SIGMA + ELSE + WORK( INDP+I-1 ) = WORK( INDP+I )*TMP - SIGMA + END IF + 100 CONTINUE + END IF +* +* Find the index (from R1 to R2) of the largest (in magnitude) +* diagonal element of the inverse +* + MINGMA = WORK( INDS+R1-1 ) + WORK( INDP+R1-1 ) + IF( MINGMA.EQ.ZERO ) + $ MINGMA = EPS*WORK( INDS+R1-1 ) + R = R1 + DO 110 I = R1, R2 - 1 + TMP = WORK( INDS+I ) + WORK( INDP+I ) + IF( TMP.EQ.ZERO ) + $ TMP = EPS*WORK( INDS+I ) + IF( ABS( TMP ).LT.ABS( MINGMA ) ) THEN + MINGMA = TMP + R = I + 1 + END IF + 110 CONTINUE +* +* Compute the (scaled) r-th column of the inverse +* + ISUPPZ( 1 ) = B1 + ISUPPZ( 2 ) = BN + Z( R ) = ONE + ZTZ = ONE + IF( .NOT.SAWNAN ) THEN + FROM = R - 1 + TO = MAX( R-BLKSIZ, B1 ) + 120 CONTINUE + IF( FROM.GE.B1 ) THEN + DO 130 I = FROM, TO, -1 + Z( I ) = -( WORK( I )*Z( I+1 ) ) + ZTZ = ZTZ + Z( I )*Z( I ) + 130 CONTINUE + IF( ABS( Z( TO ) ).LE.EPS .AND. ABS( Z( TO+1 ) ).LE.EPS ) + $ THEN + ISUPPZ( 1 ) = TO + 2 + ELSE + FROM = TO - 1 + TO = MAX( TO-BLKSIZ, B1 ) + GO TO 120 + END IF + END IF + FROM = R + 1 + TO = MIN( R+BLKSIZ, BN ) + 140 CONTINUE + IF( FROM.LE.BN ) THEN + DO 150 I = FROM, TO + Z( I ) = -( WORK( INDUMN+I-1 )*Z( I-1 ) ) + ZTZ = ZTZ + Z( I )*Z( I ) + 150 CONTINUE + IF( ABS( Z( TO ) ).LE.EPS .AND. ABS( Z( TO-1 ) ).LE.EPS ) + $ THEN + ISUPPZ( 2 ) = TO - 2 + ELSE + FROM = TO + 1 + TO = MIN( TO+BLKSIZ, BN ) + GO TO 140 + END IF + END IF + ELSE + DO 160 I = R - 1, B1, -1 + IF( Z( I+1 ).EQ.ZERO ) THEN + Z( I ) = -( LD( I+1 ) / LD( I ) )*Z( I+2 ) + ELSE IF( ABS( Z( I+1 ) ).LE.EPS .AND. ABS( Z( I+2 ) ).LE. + $ EPS ) THEN + ISUPPZ( 1 ) = I + 3 + GO TO 170 + ELSE + Z( I ) = -( WORK( I )*Z( I+1 ) ) + END IF + ZTZ = ZTZ + Z( I )*Z( I ) + 160 CONTINUE + 170 CONTINUE + DO 180 I = R, BN - 1 + IF( Z( I ).EQ.ZERO ) THEN + Z( I+1 ) = -( LD( I-1 ) / LD( I ) )*Z( I-1 ) + ELSE IF( ABS( Z( I ) ).LE.EPS .AND. ABS( Z( I-1 ) ).LE.EPS ) + $ THEN + ISUPPZ( 2 ) = I - 2 + GO TO 190 + ELSE + Z( I+1 ) = -( WORK( INDUMN+I )*Z( I ) ) + END IF + ZTZ = ZTZ + Z( I+1 )*Z( I+1 ) + 180 CONTINUE + 190 CONTINUE + END IF + DO 200 I = B1, ISUPPZ( 1 ) - 3 + Z( I ) = ZERO + 200 CONTINUE + DO 210 I = ISUPPZ( 2 ) + 3, BN + Z( I ) = ZERO + 210 CONTINUE +* + RETURN +* +* End of SLAR1V +* + END diff --git a/costa/native/external/lapack/slar2v.f b/costa/native/external/lapack/slar2v.f new file mode 100644 index 000000000..ec9248d5f --- /dev/null +++ b/costa/native/external/lapack/slar2v.f @@ -0,0 +1,87 @@ + SUBROUTINE SLAR2V( N, X, Y, Z, INCX, C, S, INCC ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + INTEGER INCC, INCX, N +* .. +* .. Array Arguments .. + REAL C( * ), S( * ), X( * ), Y( * ), Z( * ) +* .. +* +* Purpose +* ======= +* +* SLAR2V applies a vector of real plane rotations from both sides to +* a sequence of 2-by-2 real symmetric matrices, defined by the elements +* of the vectors x, y and z. For i = 1,2,...,n +* +* ( x(i) z(i) ) := ( c(i) s(i) ) ( x(i) z(i) ) ( c(i) -s(i) ) +* ( z(i) y(i) ) ( -s(i) c(i) ) ( z(i) y(i) ) ( s(i) c(i) ) +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of plane rotations to be applied. +* +* X (input/output) REAL array, +* dimension (1+(N-1)*INCX) +* The vector x. +* +* Y (input/output) REAL array, +* dimension (1+(N-1)*INCX) +* The vector y. +* +* Z (input/output) REAL array, +* dimension (1+(N-1)*INCX) +* The vector z. +* +* INCX (input) INTEGER +* The increment between elements of X, Y and Z. INCX > 0. +* +* C (input) REAL array, dimension (1+(N-1)*INCC) +* The cosines of the plane rotations. +* +* S (input) REAL array, dimension (1+(N-1)*INCC) +* The sines of the plane rotations. +* +* INCC (input) INTEGER +* The increment between elements of C and S. INCC > 0. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IC, IX + REAL CI, SI, T1, T2, T3, T4, T5, T6, XI, YI, ZI +* .. +* .. Executable Statements .. +* + IX = 1 + IC = 1 + DO 10 I = 1, N + XI = X( IX ) + YI = Y( IX ) + ZI = Z( IX ) + CI = C( IC ) + SI = S( IC ) + T1 = SI*ZI + T2 = CI*ZI + T3 = T2 - SI*XI + T4 = T2 + SI*YI + T5 = CI*XI + T1 + T6 = CI*YI - T1 + X( IX ) = CI*T5 + SI*T4 + Y( IX ) = CI*T6 - SI*T3 + Z( IX ) = CI*T4 - SI*T5 + IX = IX + INCX + IC = IC + INCC + 10 CONTINUE +* +* End of SLAR2V +* + RETURN + END diff --git a/costa/native/external/lapack/slarf.f b/costa/native/external/lapack/slarf.f new file mode 100644 index 000000000..a5624ce09 --- /dev/null +++ b/costa/native/external/lapack/slarf.f @@ -0,0 +1,116 @@ + SUBROUTINE SLARF( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, LDC, M, N + REAL TAU +* .. +* .. Array Arguments .. + REAL C( LDC, * ), V( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SLARF applies a real elementary reflector H to a real m by n matrix +* C, from either the left or the right. H is represented in the form +* +* H = I - tau * v * v' +* +* where tau is a real scalar and v is a real vector. +* +* If tau = 0, then H is taken to be the unit matrix. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': form H * C +* = 'R': form C * H +* +* M (input) INTEGER +* The number of rows of the matrix C. +* +* N (input) INTEGER +* The number of columns of the matrix C. +* +* V (input) REAL array, dimension +* (1 + (M-1)*abs(INCV)) if SIDE = 'L' +* or (1 + (N-1)*abs(INCV)) if SIDE = 'R' +* The vector v in the representation of H. V is not used if +* TAU = 0. +* +* INCV (input) INTEGER +* The increment between elements of v. INCV <> 0. +* +* TAU (input) REAL +* The value tau in the representation of H. +* +* C (input/output) REAL array, dimension (LDC,N) +* On entry, the m by n matrix C. +* On exit, C is overwritten by the matrix H * C if SIDE = 'L', +* or C * H if SIDE = 'R'. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) REAL array, dimension +* (N) if SIDE = 'L' +* or (M) if SIDE = 'R' +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, SGER +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C +* + IF( TAU.NE.ZERO ) THEN +* +* w := C' * v +* + CALL SGEMV( 'Transpose', M, N, ONE, C, LDC, V, INCV, ZERO, + $ WORK, 1 ) +* +* C := C - v * w' +* + CALL SGER( M, N, -TAU, V, INCV, WORK, 1, C, LDC ) + END IF + ELSE +* +* Form C * H +* + IF( TAU.NE.ZERO ) THEN +* +* w := C * v +* + CALL SGEMV( 'No transpose', M, N, ONE, C, LDC, V, INCV, + $ ZERO, WORK, 1 ) +* +* C := C - w * v' +* + CALL SGER( M, N, -TAU, WORK, 1, V, INCV, C, LDC ) + END IF + END IF + RETURN +* +* End of SLARF +* + END diff --git a/costa/native/external/lapack/slarfb.f b/costa/native/external/lapack/slarfb.f new file mode 100644 index 000000000..7cc29efa5 --- /dev/null +++ b/costa/native/external/lapack/slarfb.f @@ -0,0 +1,588 @@ + SUBROUTINE SLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, LDV, + $ T, LDT, C, LDC, WORK, LDWORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, SIDE, STOREV, TRANS + INTEGER K, LDC, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. + REAL C( LDC, * ), T( LDT, * ), V( LDV, * ), + $ WORK( LDWORK, * ) +* .. +* +* Purpose +* ======= +* +* SLARFB applies a real block reflector H or its transpose H' to a +* real m by n matrix C, from either the left or the right. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply H or H' from the Left +* = 'R': apply H or H' from the Right +* +* TRANS (input) CHARACTER*1 +* = 'N': apply H (No transpose) +* = 'T': apply H' (Transpose) +* +* DIRECT (input) CHARACTER*1 +* Indicates how H is formed from a product of elementary +* reflectors +* = 'F': H = H(1) H(2) . . . H(k) (Forward) +* = 'B': H = H(k) . . . H(2) H(1) (Backward) +* +* STOREV (input) CHARACTER*1 +* Indicates how the vectors which define the elementary +* reflectors are stored: +* = 'C': Columnwise +* = 'R': Rowwise +* +* M (input) INTEGER +* The number of rows of the matrix C. +* +* N (input) INTEGER +* The number of columns of the matrix C. +* +* K (input) INTEGER +* The order of the matrix T (= the number of elementary +* reflectors whose product defines the block reflector). +* +* V (input) REAL array, dimension +* (LDV,K) if STOREV = 'C' +* (LDV,M) if STOREV = 'R' and SIDE = 'L' +* (LDV,N) if STOREV = 'R' and SIDE = 'R' +* The matrix V. See further details. +* +* LDV (input) INTEGER +* The leading dimension of the array V. +* If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M); +* if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N); +* if STOREV = 'R', LDV >= K. +* +* T (input) REAL array, dimension (LDT,K) +* The triangular k by k matrix T in the representation of the +* block reflector. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= K. +* +* C (input/output) REAL array, dimension (LDC,N) +* On entry, the m by n matrix C. +* On exit, C is overwritten by H*C or H'*C or C*H or C*H'. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDA >= max(1,M). +* +* WORK (workspace) REAL array, dimension (LDWORK,K) +* +* LDWORK (input) INTEGER +* The leading dimension of the array WORK. +* If SIDE = 'L', LDWORK >= max(1,N); +* if SIDE = 'R', LDWORK >= max(1,M). +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + CHARACTER TRANST + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMM, STRMM +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) + $ RETURN +* + IF( LSAME( TRANS, 'N' ) ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* + IF( LSAME( STOREV, 'C' ) ) THEN +* + IF( LSAME( DIRECT, 'F' ) ) THEN +* +* Let V = ( V1 ) (first K rows) +* ( V2 ) +* where V1 is unit lower triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H' * C where C = ( C1 ) +* ( C2 ) +* +* W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) +* +* W := C1' +* + DO 10 J = 1, K + CALL SCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + 10 CONTINUE +* +* W := W * V1 +* + CALL STRMM( 'Right', 'Lower', 'No transpose', 'Unit', N, + $ K, ONE, V, LDV, WORK, LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C2'*V2 +* + CALL SGEMM( 'Transpose', 'No transpose', N, K, M-K, + $ ONE, C( K+1, 1 ), LDC, V( K+1, 1 ), LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T' or W * T +* + CALL STRMM( 'Right', 'Upper', TRANST, 'Non-unit', N, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V * W' +* + IF( M.GT.K ) THEN +* +* C2 := C2 - V2 * W' +* + CALL SGEMM( 'No transpose', 'Transpose', M-K, N, K, + $ -ONE, V( K+1, 1 ), LDV, WORK, LDWORK, ONE, + $ C( K+1, 1 ), LDC ) + END IF +* +* W := W * V1' +* + CALL STRMM( 'Right', 'Lower', 'Transpose', 'Unit', N, K, + $ ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W' +* + DO 30 J = 1, K + DO 20 I = 1, N + C( J, I ) = C( J, I ) - WORK( I, J ) + 20 CONTINUE + 30 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H' where C = ( C1 C2 ) +* +* W := C * V = (C1*V1 + C2*V2) (stored in WORK) +* +* W := C1 +* + DO 40 J = 1, K + CALL SCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 ) + 40 CONTINUE +* +* W := W * V1 +* + CALL STRMM( 'Right', 'Lower', 'No transpose', 'Unit', M, + $ K, ONE, V, LDV, WORK, LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C2 * V2 +* + CALL SGEMM( 'No transpose', 'No transpose', M, K, N-K, + $ ONE, C( 1, K+1 ), LDC, V( K+1, 1 ), LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T' +* + CALL STRMM( 'Right', 'Upper', TRANS, 'Non-unit', M, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V' +* + IF( N.GT.K ) THEN +* +* C2 := C2 - W * V2' +* + CALL SGEMM( 'No transpose', 'Transpose', M, N-K, K, + $ -ONE, WORK, LDWORK, V( K+1, 1 ), LDV, ONE, + $ C( 1, K+1 ), LDC ) + END IF +* +* W := W * V1' +* + CALL STRMM( 'Right', 'Lower', 'Transpose', 'Unit', M, K, + $ ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 60 J = 1, K + DO 50 I = 1, M + C( I, J ) = C( I, J ) - WORK( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + ELSE +* +* Let V = ( V1 ) +* ( V2 ) (last K rows) +* where V2 is unit upper triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H' * C where C = ( C1 ) +* ( C2 ) +* +* W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) +* +* W := C2' +* + DO 70 J = 1, K + CALL SCOPY( N, C( M-K+J, 1 ), LDC, WORK( 1, J ), 1 ) + 70 CONTINUE +* +* W := W * V2 +* + CALL STRMM( 'Right', 'Upper', 'No transpose', 'Unit', N, + $ K, ONE, V( M-K+1, 1 ), LDV, WORK, LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C1'*V1 +* + CALL SGEMM( 'Transpose', 'No transpose', N, K, M-K, + $ ONE, C, LDC, V, LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T' or W * T +* + CALL STRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V * W' +* + IF( M.GT.K ) THEN +* +* C1 := C1 - V1 * W' +* + CALL SGEMM( 'No transpose', 'Transpose', M-K, N, K, + $ -ONE, V, LDV, WORK, LDWORK, ONE, C, LDC ) + END IF +* +* W := W * V2' +* + CALL STRMM( 'Right', 'Upper', 'Transpose', 'Unit', N, K, + $ ONE, V( M-K+1, 1 ), LDV, WORK, LDWORK ) +* +* C2 := C2 - W' +* + DO 90 J = 1, K + DO 80 I = 1, N + C( M-K+J, I ) = C( M-K+J, I ) - WORK( I, J ) + 80 CONTINUE + 90 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H' where C = ( C1 C2 ) +* +* W := C * V = (C1*V1 + C2*V2) (stored in WORK) +* +* W := C2 +* + DO 100 J = 1, K + CALL SCOPY( M, C( 1, N-K+J ), 1, WORK( 1, J ), 1 ) + 100 CONTINUE +* +* W := W * V2 +* + CALL STRMM( 'Right', 'Upper', 'No transpose', 'Unit', M, + $ K, ONE, V( N-K+1, 1 ), LDV, WORK, LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C1 * V1 +* + CALL SGEMM( 'No transpose', 'No transpose', M, K, N-K, + $ ONE, C, LDC, V, LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T' +* + CALL STRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V' +* + IF( N.GT.K ) THEN +* +* C1 := C1 - W * V1' +* + CALL SGEMM( 'No transpose', 'Transpose', M, N-K, K, + $ -ONE, WORK, LDWORK, V, LDV, ONE, C, LDC ) + END IF +* +* W := W * V2' +* + CALL STRMM( 'Right', 'Upper', 'Transpose', 'Unit', M, K, + $ ONE, V( N-K+1, 1 ), LDV, WORK, LDWORK ) +* +* C2 := C2 - W +* + DO 120 J = 1, K + DO 110 I = 1, M + C( I, N-K+J ) = C( I, N-K+J ) - WORK( I, J ) + 110 CONTINUE + 120 CONTINUE + END IF + END IF +* + ELSE IF( LSAME( STOREV, 'R' ) ) THEN +* + IF( LSAME( DIRECT, 'F' ) ) THEN +* +* Let V = ( V1 V2 ) (V1: first K columns) +* where V1 is unit upper triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H' * C where C = ( C1 ) +* ( C2 ) +* +* W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) +* +* W := C1' +* + DO 130 J = 1, K + CALL SCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + 130 CONTINUE +* +* W := W * V1' +* + CALL STRMM( 'Right', 'Upper', 'Transpose', 'Unit', N, K, + $ ONE, V, LDV, WORK, LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C2'*V2' +* + CALL SGEMM( 'Transpose', 'Transpose', N, K, M-K, ONE, + $ C( K+1, 1 ), LDC, V( 1, K+1 ), LDV, ONE, + $ WORK, LDWORK ) + END IF +* +* W := W * T' or W * T +* + CALL STRMM( 'Right', 'Upper', TRANST, 'Non-unit', N, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V' * W' +* + IF( M.GT.K ) THEN +* +* C2 := C2 - V2' * W' +* + CALL SGEMM( 'Transpose', 'Transpose', M-K, N, K, -ONE, + $ V( 1, K+1 ), LDV, WORK, LDWORK, ONE, + $ C( K+1, 1 ), LDC ) + END IF +* +* W := W * V1 +* + CALL STRMM( 'Right', 'Upper', 'No transpose', 'Unit', N, + $ K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W' +* + DO 150 J = 1, K + DO 140 I = 1, N + C( J, I ) = C( J, I ) - WORK( I, J ) + 140 CONTINUE + 150 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H' where C = ( C1 C2 ) +* +* W := C * V' = (C1*V1' + C2*V2') (stored in WORK) +* +* W := C1 +* + DO 160 J = 1, K + CALL SCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 ) + 160 CONTINUE +* +* W := W * V1' +* + CALL STRMM( 'Right', 'Upper', 'Transpose', 'Unit', M, K, + $ ONE, V, LDV, WORK, LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C2 * V2' +* + CALL SGEMM( 'No transpose', 'Transpose', M, K, N-K, + $ ONE, C( 1, K+1 ), LDC, V( 1, K+1 ), LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T' +* + CALL STRMM( 'Right', 'Upper', TRANS, 'Non-unit', M, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V +* + IF( N.GT.K ) THEN +* +* C2 := C2 - W * V2 +* + CALL SGEMM( 'No transpose', 'No transpose', M, N-K, K, + $ -ONE, WORK, LDWORK, V( 1, K+1 ), LDV, ONE, + $ C( 1, K+1 ), LDC ) + END IF +* +* W := W * V1 +* + CALL STRMM( 'Right', 'Upper', 'No transpose', 'Unit', M, + $ K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 180 J = 1, K + DO 170 I = 1, M + C( I, J ) = C( I, J ) - WORK( I, J ) + 170 CONTINUE + 180 CONTINUE +* + END IF +* + ELSE +* +* Let V = ( V1 V2 ) (V2: last K columns) +* where V2 is unit lower triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H' * C where C = ( C1 ) +* ( C2 ) +* +* W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) +* +* W := C2' +* + DO 190 J = 1, K + CALL SCOPY( N, C( M-K+J, 1 ), LDC, WORK( 1, J ), 1 ) + 190 CONTINUE +* +* W := W * V2' +* + CALL STRMM( 'Right', 'Lower', 'Transpose', 'Unit', N, K, + $ ONE, V( 1, M-K+1 ), LDV, WORK, LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C1'*V1' +* + CALL SGEMM( 'Transpose', 'Transpose', N, K, M-K, ONE, + $ C, LDC, V, LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T' or W * T +* + CALL STRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V' * W' +* + IF( M.GT.K ) THEN +* +* C1 := C1 - V1' * W' +* + CALL SGEMM( 'Transpose', 'Transpose', M-K, N, K, -ONE, + $ V, LDV, WORK, LDWORK, ONE, C, LDC ) + END IF +* +* W := W * V2 +* + CALL STRMM( 'Right', 'Lower', 'No transpose', 'Unit', N, + $ K, ONE, V( 1, M-K+1 ), LDV, WORK, LDWORK ) +* +* C2 := C2 - W' +* + DO 210 J = 1, K + DO 200 I = 1, N + C( M-K+J, I ) = C( M-K+J, I ) - WORK( I, J ) + 200 CONTINUE + 210 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H' where C = ( C1 C2 ) +* +* W := C * V' = (C1*V1' + C2*V2') (stored in WORK) +* +* W := C2 +* + DO 220 J = 1, K + CALL SCOPY( M, C( 1, N-K+J ), 1, WORK( 1, J ), 1 ) + 220 CONTINUE +* +* W := W * V2' +* + CALL STRMM( 'Right', 'Lower', 'Transpose', 'Unit', M, K, + $ ONE, V( 1, N-K+1 ), LDV, WORK, LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C1 * V1' +* + CALL SGEMM( 'No transpose', 'Transpose', M, K, N-K, + $ ONE, C, LDC, V, LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T' +* + CALL STRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V +* + IF( N.GT.K ) THEN +* +* C1 := C1 - W * V1 +* + CALL SGEMM( 'No transpose', 'No transpose', M, N-K, K, + $ -ONE, WORK, LDWORK, V, LDV, ONE, C, LDC ) + END IF +* +* W := W * V2 +* + CALL STRMM( 'Right', 'Lower', 'No transpose', 'Unit', M, + $ K, ONE, V( 1, N-K+1 ), LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 240 J = 1, K + DO 230 I = 1, M + C( I, N-K+J ) = C( I, N-K+J ) - WORK( I, J ) + 230 CONTINUE + 240 CONTINUE +* + END IF +* + END IF + END IF +* + RETURN +* +* End of SLARFB +* + END diff --git a/costa/native/external/lapack/slarfg.f b/costa/native/external/lapack/slarfg.f new file mode 100644 index 000000000..1a7539808 --- /dev/null +++ b/costa/native/external/lapack/slarfg.f @@ -0,0 +1,138 @@ + SUBROUTINE SLARFG( N, ALPHA, X, INCX, TAU ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INCX, N + REAL ALPHA, TAU +* .. +* .. Array Arguments .. + REAL X( * ) +* .. +* +* Purpose +* ======= +* +* SLARFG generates a real elementary reflector H of order n, such +* that +* +* H * ( alpha ) = ( beta ), H' * H = I. +* ( x ) ( 0 ) +* +* where alpha and beta are scalars, and x is an (n-1)-element real +* vector. H is represented in the form +* +* H = I - tau * ( 1 ) * ( 1 v' ) , +* ( v ) +* +* where tau is a real scalar and v is a real (n-1)-element +* vector. +* +* If the elements of x are all zero, then tau = 0 and H is taken to be +* the unit matrix. +* +* Otherwise 1 <= tau <= 2. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the elementary reflector. +* +* ALPHA (input/output) REAL +* On entry, the value alpha. +* On exit, it is overwritten with the value beta. +* +* X (input/output) REAL array, dimension +* (1+(N-2)*abs(INCX)) +* On entry, the vector x. +* On exit, it is overwritten with the vector v. +* +* INCX (input) INTEGER +* The increment between elements of X. INCX > 0. +* +* TAU (output) REAL +* The value tau. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER J, KNT + REAL BETA, RSAFMN, SAFMIN, XNORM +* .. +* .. External Functions .. + REAL SLAMCH, SLAPY2, SNRM2 + EXTERNAL SLAMCH, SLAPY2, SNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SIGN +* .. +* .. External Subroutines .. + EXTERNAL SSCAL +* .. +* .. Executable Statements .. +* + IF( N.LE.1 ) THEN + TAU = ZERO + RETURN + END IF +* + XNORM = SNRM2( N-1, X, INCX ) +* + IF( XNORM.EQ.ZERO ) THEN +* +* H = I +* + TAU = ZERO + ELSE +* +* general case +* + BETA = -SIGN( SLAPY2( ALPHA, XNORM ), ALPHA ) + SAFMIN = SLAMCH( 'S' ) / SLAMCH( 'E' ) + IF( ABS( BETA ).LT.SAFMIN ) THEN +* +* XNORM, BETA may be inaccurate; scale X and recompute them +* + RSAFMN = ONE / SAFMIN + KNT = 0 + 10 CONTINUE + KNT = KNT + 1 + CALL SSCAL( N-1, RSAFMN, X, INCX ) + BETA = BETA*RSAFMN + ALPHA = ALPHA*RSAFMN + IF( ABS( BETA ).LT.SAFMIN ) + $ GO TO 10 +* +* New BETA is at most 1, at least SAFMIN +* + XNORM = SNRM2( N-1, X, INCX ) + BETA = -SIGN( SLAPY2( ALPHA, XNORM ), ALPHA ) + TAU = ( BETA-ALPHA ) / BETA + CALL SSCAL( N-1, ONE / ( ALPHA-BETA ), X, INCX ) +* +* If ALPHA is subnormal, it may lose relative accuracy +* + ALPHA = BETA + DO 20 J = 1, KNT + ALPHA = ALPHA*SAFMIN + 20 CONTINUE + ELSE + TAU = ( BETA-ALPHA ) / BETA + CALL SSCAL( N-1, ONE / ( ALPHA-BETA ), X, INCX ) + ALPHA = BETA + END IF + END IF +* + RETURN +* +* End of SLARFG +* + END diff --git a/costa/native/external/lapack/slarft.f b/costa/native/external/lapack/slarft.f new file mode 100644 index 000000000..c72a0eb94 --- /dev/null +++ b/costa/native/external/lapack/slarft.f @@ -0,0 +1,218 @@ + SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. + REAL T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* Purpose +* ======= +* +* SLARFT forms the triangular factor T of a real block reflector H +* of order n, which is defined as a product of k elementary reflectors. +* +* If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; +* +* If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. +* +* If STOREV = 'C', the vector which defines the elementary reflector +* H(i) is stored in the i-th column of the array V, and +* +* H = I - V * T * V' +* +* If STOREV = 'R', the vector which defines the elementary reflector +* H(i) is stored in the i-th row of the array V, and +* +* H = I - V' * T * V +* +* Arguments +* ========= +* +* DIRECT (input) CHARACTER*1 +* Specifies the order in which the elementary reflectors are +* multiplied to form the block reflector: +* = 'F': H = H(1) H(2) . . . H(k) (Forward) +* = 'B': H = H(k) . . . H(2) H(1) (Backward) +* +* STOREV (input) CHARACTER*1 +* Specifies how the vectors which define the elementary +* reflectors are stored (see also Further Details): +* = 'C': columnwise +* = 'R': rowwise +* +* N (input) INTEGER +* The order of the block reflector H. N >= 0. +* +* K (input) INTEGER +* The order of the triangular factor T (= the number of +* elementary reflectors). K >= 1. +* +* V (input/output) REAL array, dimension +* (LDV,K) if STOREV = 'C' +* (LDV,N) if STOREV = 'R' +* The matrix V. See further details. +* +* LDV (input) INTEGER +* The leading dimension of the array V. +* If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. +* +* TAU (input) REAL array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i). +* +* T (output) REAL array, dimension (LDT,K) +* The k by k triangular factor T of the block reflector. +* If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is +* lower triangular. The rest of the array is not used. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= K. +* +* Further Details +* =============== +* +* The shape of the matrix V and the storage of the vectors which define +* the H(i) is best illustrated by the following example with n = 5 and +* k = 3. The elements equal to 1 are not stored; the corresponding +* array elements are modified but restored on exit. The rest of the +* array is not used. +* +* DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +* +* V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) +* ( v1 1 ) ( 1 v2 v2 v2 ) +* ( v1 v2 1 ) ( 1 v3 v3 ) +* ( v1 v2 v3 ) +* ( v1 v2 v3 ) +* +* DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +* +* V = ( v1 v2 v3 ) V = ( v1 v1 1 ) +* ( v1 v2 v3 ) ( v2 v2 v2 1 ) +* ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) +* ( 1 v3 ) +* ( 1 ) +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL VII +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, STRMV +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 20 I = 1, K + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO 10 J = 1, I + T( J, I ) = ZERO + 10 CONTINUE + ELSE +* +* general case +* + VII = V( I, I ) + V( I, I ) = ONE + IF( LSAME( STOREV, 'C' ) ) THEN +* +* T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i) +* + CALL SGEMV( 'Transpose', N-I+1, I-1, -TAU( I ), + $ V( I, 1 ), LDV, V( I, I ), 1, ZERO, + $ T( 1, I ), 1 ) + ELSE +* +* T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)' +* + CALL SGEMV( 'No transpose', I-1, N-I+1, -TAU( I ), + $ V( 1, I ), LDV, V( I, I ), LDV, ZERO, + $ T( 1, I ), 1 ) + END IF + V( I, I ) = VII +* +* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) +* + CALL STRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T, + $ LDT, T( 1, I ), 1 ) + T( I, I ) = TAU( I ) + END IF + 20 CONTINUE + ELSE + DO 40 I = K, 1, -1 + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO 30 J = I, K + T( J, I ) = ZERO + 30 CONTINUE + ELSE +* +* general case +* + IF( I.LT.K ) THEN + IF( LSAME( STOREV, 'C' ) ) THEN + VII = V( N-K+I, I ) + V( N-K+I, I ) = ONE +* +* T(i+1:k,i) := +* - tau(i) * V(1:n-k+i,i+1:k)' * V(1:n-k+i,i) +* + CALL SGEMV( 'Transpose', N-K+I, K-I, -TAU( I ), + $ V( 1, I+1 ), LDV, V( 1, I ), 1, ZERO, + $ T( I+1, I ), 1 ) + V( N-K+I, I ) = VII + ELSE + VII = V( I, N-K+I ) + V( I, N-K+I ) = ONE +* +* T(i+1:k,i) := +* - tau(i) * V(i+1:k,1:n-k+i) * V(i,1:n-k+i)' +* + CALL SGEMV( 'No transpose', K-I, N-K+I, -TAU( I ), + $ V( I+1, 1 ), LDV, V( I, 1 ), LDV, ZERO, + $ T( I+1, I ), 1 ) + V( I, N-K+I ) = VII + END IF +* +* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) +* + CALL STRMV( 'Lower', 'No transpose', 'Non-unit', K-I, + $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) + END IF + T( I, I ) = TAU( I ) + END IF + 40 CONTINUE + END IF + RETURN +* +* End of SLARFT +* + END diff --git a/costa/native/external/lapack/slarfx.f b/costa/native/external/lapack/slarfx.f new file mode 100644 index 000000000..1df9d8a19 --- /dev/null +++ b/costa/native/external/lapack/slarfx.f @@ -0,0 +1,638 @@ + SUBROUTINE SLARFX( SIDE, M, N, V, TAU, C, LDC, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER LDC, M, N + REAL TAU +* .. +* .. Array Arguments .. + REAL C( LDC, * ), V( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SLARFX applies a real elementary reflector H to a real m by n +* matrix C, from either the left or the right. H is represented in the +* form +* +* H = I - tau * v * v' +* +* where tau is a real scalar and v is a real vector. +* +* If tau = 0, then H is taken to be the unit matrix +* +* This version uses inline code if H has order < 11. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': form H * C +* = 'R': form C * H +* +* M (input) INTEGER +* The number of rows of the matrix C. +* +* N (input) INTEGER +* The number of columns of the matrix C. +* +* V (input) REAL array, dimension (M) if SIDE = 'L' +* or (N) if SIDE = 'R' +* The vector v in the representation of H. +* +* TAU (input) REAL +* The value tau in the representation of H. +* +* C (input/output) REAL array, dimension (LDC,N) +* On entry, the m by n matrix C. +* On exit, C is overwritten by the matrix H * C if SIDE = 'L', +* or C * H if SIDE = 'R'. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDA >= (1,M). +* +* WORK (workspace) REAL array, dimension +* (N) if SIDE = 'L' +* or (M) if SIDE = 'R' +* WORK is not referenced if H has order < 11. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER J + REAL SUM, T1, T10, T2, T3, T4, T5, T6, T7, T8, T9, + $ V1, V10, V2, V3, V4, V5, V6, V7, V8, V9 +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, SGER +* .. +* .. Executable Statements .. +* + IF( TAU.EQ.ZERO ) + $ RETURN + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C, where H has order m. +* + GO TO ( 10, 30, 50, 70, 90, 110, 130, 150, + $ 170, 190 )M +* +* Code for general M +* +* w := C'*v +* + CALL SGEMV( 'Transpose', M, N, ONE, C, LDC, V, 1, ZERO, WORK, + $ 1 ) +* +* C := C - tau * v * w' +* + CALL SGER( M, N, -TAU, V, 1, WORK, 1, C, LDC ) + GO TO 410 + 10 CONTINUE +* +* Special code for 1 x 1 Householder +* + T1 = ONE - TAU*V( 1 )*V( 1 ) + DO 20 J = 1, N + C( 1, J ) = T1*C( 1, J ) + 20 CONTINUE + GO TO 410 + 30 CONTINUE +* +* Special code for 2 x 2 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + DO 40 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + 40 CONTINUE + GO TO 410 + 50 CONTINUE +* +* Special code for 3 x 3 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + DO 60 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + 60 CONTINUE + GO TO 410 + 70 CONTINUE +* +* Special code for 4 x 4 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + DO 80 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + 80 CONTINUE + GO TO 410 + 90 CONTINUE +* +* Special code for 5 x 5 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + DO 100 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + 100 CONTINUE + GO TO 410 + 110 CONTINUE +* +* Special code for 6 x 6 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + DO 120 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + 120 CONTINUE + GO TO 410 + 130 CONTINUE +* +* Special code for 7 x 7 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + V7 = V( 7 ) + T7 = TAU*V7 + DO 140 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + + $ V7*C( 7, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + C( 7, J ) = C( 7, J ) - SUM*T7 + 140 CONTINUE + GO TO 410 + 150 CONTINUE +* +* Special code for 8 x 8 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + V7 = V( 7 ) + T7 = TAU*V7 + V8 = V( 8 ) + T8 = TAU*V8 + DO 160 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + + $ V7*C( 7, J ) + V8*C( 8, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + C( 7, J ) = C( 7, J ) - SUM*T7 + C( 8, J ) = C( 8, J ) - SUM*T8 + 160 CONTINUE + GO TO 410 + 170 CONTINUE +* +* Special code for 9 x 9 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + V7 = V( 7 ) + T7 = TAU*V7 + V8 = V( 8 ) + T8 = TAU*V8 + V9 = V( 9 ) + T9 = TAU*V9 + DO 180 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + + $ V7*C( 7, J ) + V8*C( 8, J ) + V9*C( 9, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + C( 7, J ) = C( 7, J ) - SUM*T7 + C( 8, J ) = C( 8, J ) - SUM*T8 + C( 9, J ) = C( 9, J ) - SUM*T9 + 180 CONTINUE + GO TO 410 + 190 CONTINUE +* +* Special code for 10 x 10 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + V7 = V( 7 ) + T7 = TAU*V7 + V8 = V( 8 ) + T8 = TAU*V8 + V9 = V( 9 ) + T9 = TAU*V9 + V10 = V( 10 ) + T10 = TAU*V10 + DO 200 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + + $ V7*C( 7, J ) + V8*C( 8, J ) + V9*C( 9, J ) + + $ V10*C( 10, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + C( 7, J ) = C( 7, J ) - SUM*T7 + C( 8, J ) = C( 8, J ) - SUM*T8 + C( 9, J ) = C( 9, J ) - SUM*T9 + C( 10, J ) = C( 10, J ) - SUM*T10 + 200 CONTINUE + GO TO 410 + ELSE +* +* Form C * H, where H has order n. +* + GO TO ( 210, 230, 250, 270, 290, 310, 330, 350, + $ 370, 390 )N +* +* Code for general N +* +* w := C * v +* + CALL SGEMV( 'No transpose', M, N, ONE, C, LDC, V, 1, ZERO, + $ WORK, 1 ) +* +* C := C - tau * w * v' +* + CALL SGER( M, N, -TAU, WORK, 1, V, 1, C, LDC ) + GO TO 410 + 210 CONTINUE +* +* Special code for 1 x 1 Householder +* + T1 = ONE - TAU*V( 1 )*V( 1 ) + DO 220 J = 1, M + C( J, 1 ) = T1*C( J, 1 ) + 220 CONTINUE + GO TO 410 + 230 CONTINUE +* +* Special code for 2 x 2 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + DO 240 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + 240 CONTINUE + GO TO 410 + 250 CONTINUE +* +* Special code for 3 x 3 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + DO 260 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + 260 CONTINUE + GO TO 410 + 270 CONTINUE +* +* Special code for 4 x 4 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + DO 280 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + 280 CONTINUE + GO TO 410 + 290 CONTINUE +* +* Special code for 5 x 5 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + DO 300 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + 300 CONTINUE + GO TO 410 + 310 CONTINUE +* +* Special code for 6 x 6 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + DO 320 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + 320 CONTINUE + GO TO 410 + 330 CONTINUE +* +* Special code for 7 x 7 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + V7 = V( 7 ) + T7 = TAU*V7 + DO 340 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + + $ V7*C( J, 7 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + C( J, 7 ) = C( J, 7 ) - SUM*T7 + 340 CONTINUE + GO TO 410 + 350 CONTINUE +* +* Special code for 8 x 8 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + V7 = V( 7 ) + T7 = TAU*V7 + V8 = V( 8 ) + T8 = TAU*V8 + DO 360 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + + $ V7*C( J, 7 ) + V8*C( J, 8 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + C( J, 7 ) = C( J, 7 ) - SUM*T7 + C( J, 8 ) = C( J, 8 ) - SUM*T8 + 360 CONTINUE + GO TO 410 + 370 CONTINUE +* +* Special code for 9 x 9 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + V7 = V( 7 ) + T7 = TAU*V7 + V8 = V( 8 ) + T8 = TAU*V8 + V9 = V( 9 ) + T9 = TAU*V9 + DO 380 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + + $ V7*C( J, 7 ) + V8*C( J, 8 ) + V9*C( J, 9 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + C( J, 7 ) = C( J, 7 ) - SUM*T7 + C( J, 8 ) = C( J, 8 ) - SUM*T8 + C( J, 9 ) = C( J, 9 ) - SUM*T9 + 380 CONTINUE + GO TO 410 + 390 CONTINUE +* +* Special code for 10 x 10 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + V7 = V( 7 ) + T7 = TAU*V7 + V8 = V( 8 ) + T8 = TAU*V8 + V9 = V( 9 ) + T9 = TAU*V9 + V10 = V( 10 ) + T10 = TAU*V10 + DO 400 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + + $ V7*C( J, 7 ) + V8*C( J, 8 ) + V9*C( J, 9 ) + + $ V10*C( J, 10 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + C( J, 7 ) = C( J, 7 ) - SUM*T7 + C( J, 8 ) = C( J, 8 ) - SUM*T8 + C( J, 9 ) = C( J, 9 ) - SUM*T9 + C( J, 10 ) = C( J, 10 ) - SUM*T10 + 400 CONTINUE + GO TO 410 + END IF + 410 RETURN +* +* End of SLARFX +* + END diff --git a/costa/native/external/lapack/slargv.f b/costa/native/external/lapack/slargv.f new file mode 100644 index 000000000..d75674069 --- /dev/null +++ b/costa/native/external/lapack/slargv.f @@ -0,0 +1,100 @@ + SUBROUTINE SLARGV( N, X, INCX, Y, INCY, C, INCC ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INCC, INCX, INCY, N +* .. +* .. Array Arguments .. + REAL C( * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* SLARGV generates a vector of real plane rotations, determined by +* elements of the real vectors x and y. For i = 1,2,...,n +* +* ( c(i) s(i) ) ( x(i) ) = ( a(i) ) +* ( -s(i) c(i) ) ( y(i) ) = ( 0 ) +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of plane rotations to be generated. +* +* X (input/output) REAL array, +* dimension (1+(N-1)*INCX) +* On entry, the vector x. +* On exit, x(i) is overwritten by a(i), for i = 1,...,n. +* +* INCX (input) INTEGER +* The increment between elements of X. INCX > 0. +* +* Y (input/output) REAL array, +* dimension (1+(N-1)*INCY) +* On entry, the vector y. +* On exit, the sines of the plane rotations. +* +* INCY (input) INTEGER +* The increment between elements of Y. INCY > 0. +* +* C (output) REAL array, dimension (1+(N-1)*INCC) +* The cosines of the plane rotations. +* +* INCC (input) INTEGER +* The increment between elements of C. INCC > 0. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IC, IX, IY + REAL F, G, T, TT +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* + IX = 1 + IY = 1 + IC = 1 + DO 10 I = 1, N + F = X( IX ) + G = Y( IY ) + IF( G.EQ.ZERO ) THEN + C( IC ) = ONE + ELSE IF( F.EQ.ZERO ) THEN + C( IC ) = ZERO + Y( IY ) = ONE + X( IX ) = G + ELSE IF( ABS( F ).GT.ABS( G ) ) THEN + T = G / F + TT = SQRT( ONE+T*T ) + C( IC ) = ONE / TT + Y( IY ) = T*C( IC ) + X( IX ) = F*TT + ELSE + T = F / G + TT = SQRT( ONE+T*T ) + Y( IY ) = ONE / TT + C( IC ) = T*Y( IY ) + X( IX ) = G*TT + END IF + IC = IC + INCC + IY = IY + INCY + IX = IX + INCX + 10 CONTINUE + RETURN +* +* End of SLARGV +* + END diff --git a/costa/native/external/lapack/slarnv.f b/costa/native/external/lapack/slarnv.f new file mode 100644 index 000000000..9b1d0c769 --- /dev/null +++ b/costa/native/external/lapack/slarnv.f @@ -0,0 +1,116 @@ + SUBROUTINE SLARNV( IDIST, ISEED, N, X ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER IDIST, N +* .. +* .. Array Arguments .. + INTEGER ISEED( 4 ) + REAL X( * ) +* .. +* +* Purpose +* ======= +* +* SLARNV returns a vector of n random real numbers from a uniform or +* normal distribution. +* +* Arguments +* ========= +* +* IDIST (input) INTEGER +* Specifies the distribution of the random numbers: +* = 1: uniform (0,1) +* = 2: uniform (-1,1) +* = 3: normal (0,1) +* +* ISEED (input/output) INTEGER array, dimension (4) +* On entry, the seed of the random number generator; the array +* elements must be between 0 and 4095, and ISEED(4) must be +* odd. +* On exit, the seed is updated. +* +* N (input) INTEGER +* The number of random numbers to be generated. +* +* X (output) REAL array, dimension (N) +* The generated random numbers. +* +* Further Details +* =============== +* +* This routine calls the auxiliary routine SLARUV to generate random +* real numbers from a uniform (0,1) distribution, in batches of up to +* 128 using vectorisable code. The Box-Muller method is used to +* transform numbers from a uniform to a normal distribution. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, TWO + PARAMETER ( ONE = 1.0E+0, TWO = 2.0E+0 ) + INTEGER LV + PARAMETER ( LV = 128 ) + REAL TWOPI + PARAMETER ( TWOPI = 6.2831853071795864769252867663E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IL, IL2, IV +* .. +* .. Local Arrays .. + REAL U( LV ) +* .. +* .. Intrinsic Functions .. + INTRINSIC COS, LOG, MIN, SQRT +* .. +* .. External Subroutines .. + EXTERNAL SLARUV +* .. +* .. Executable Statements .. +* + DO 40 IV = 1, N, LV / 2 + IL = MIN( LV / 2, N-IV+1 ) + IF( IDIST.EQ.3 ) THEN + IL2 = 2*IL + ELSE + IL2 = IL + END IF +* +* Call SLARUV to generate IL2 numbers from a uniform (0,1) +* distribution (IL2 <= LV) +* + CALL SLARUV( ISEED, IL2, U ) +* + IF( IDIST.EQ.1 ) THEN +* +* Copy generated numbers +* + DO 10 I = 1, IL + X( IV+I-1 ) = U( I ) + 10 CONTINUE + ELSE IF( IDIST.EQ.2 ) THEN +* +* Convert generated numbers to uniform (-1,1) distribution +* + DO 20 I = 1, IL + X( IV+I-1 ) = TWO*U( I ) - ONE + 20 CONTINUE + ELSE IF( IDIST.EQ.3 ) THEN +* +* Convert generated numbers to normal (0,1) distribution +* + DO 30 I = 1, IL + X( IV+I-1 ) = SQRT( -TWO*LOG( U( 2*I-1 ) ) )* + $ COS( TWOPI*U( 2*I ) ) + 30 CONTINUE + END IF + 40 CONTINUE + RETURN +* +* End of SLARNV +* + END diff --git a/costa/native/external/lapack/slarrb.f b/costa/native/external/lapack/slarrb.f new file mode 100644 index 000000000..6d12ccd3d --- /dev/null +++ b/costa/native/external/lapack/slarrb.f @@ -0,0 +1,282 @@ + SUBROUTINE SLARRB( N, D, L, LD, LLD, IFIRST, ILAST, SIGMA, RELTOL, + $ W, WGAP, WERR, WORK, IWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER IFIRST, ILAST, INFO, N + REAL RELTOL, SIGMA +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL D( * ), L( * ), LD( * ), LLD( * ), W( * ), + $ WERR( * ), WGAP( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* Given the relatively robust representation(RRR) L D L^T, SLARRB +* does ``limited'' bisection to locate the eigenvalues of L D L^T, +* W( IFIRST ) thru' W( ILAST ), to more accuracy. Intervals +* [left, right] are maintained by storing their mid-points and +* semi-widths in the arrays W and WERR respectively. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix. +* +* D (input) REAL array, dimension (N) +* The n diagonal elements of the diagonal matrix D. +* +* L (input) REAL array, dimension (N-1) +* The n-1 subdiagonal elements of the unit bidiagonal matrix L. +* +* LD (input) REAL array, dimension (N-1) +* The n-1 elements L(i)*D(i). +* +* LLD (input) REAL array, dimension (N-1) +* The n-1 elements L(i)*L(i)*D(i). +* +* IFIRST (input) INTEGER +* The index of the first eigenvalue in the cluster. +* +* ILAST (input) INTEGER +* The index of the last eigenvalue in the cluster. +* +* SIGMA (input) REAL +* The shift used to form L D L^T (see SLARRF). +* +* RELTOL (input) REAL +* The relative tolerance. +* +* W (input/output) REAL array, dimension (N) +* On input, W( IFIRST ) thru' W( ILAST ) are estimates of the +* corresponding eigenvalues of L D L^T. +* On output, these estimates are ``refined''. +* +* WGAP (input/output) REAL array, dimension (N) +* The gaps between the eigenvalues of L D L^T. Very small +* gaps are changed on output. +* +* WERR (input/output) REAL array, dimension (N) +* On input, WERR( IFIRST ) thru' WERR( ILAST ) are the errors +* in the estimates W( IFIRST ) thru' W( ILAST ). +* On output, these are the ``refined'' errors. +* +*****Reminder to Inder --- WORK is never used in this subroutine ***** +* WORK (input) REAL array, dimension (???) +* Workspace. +* +* IWORK (input) INTEGER array, dimension (2*N) +* Workspace. +* +*****Reminder to Inder --- INFO is never set in this subroutine ****** +* INFO (output) INTEGER +* Error flag. +* +* Further Details +* =============== +* +* Based on contributions by +* Inderjit Dhillon, IBM Almaden, USA +* Osni Marques, LBNL/NERSC, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, TWO, HALF + PARAMETER ( ZERO = 0.0E0, TWO = 2.0E0, HALF = 0.5E0 ) +* .. +* .. Local Scalars .. + INTEGER CNT, I, I1, I2, INITI1, INITI2, J, K, NCNVRG, + $ NEIG, NINT, NRIGHT, OLNINT + REAL DELTA, EPS, GAP, LEFT, MID, PERT, RIGHT, S, + $ THRESH, TMP, WIDTH +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + EPS = SLAMCH( 'Precision' ) + I1 = IFIRST + I2 = IFIRST + NEIG = ILAST - IFIRST + 1 + NCNVRG = 0 + THRESH = RELTOL + DO 10 I = IFIRST, ILAST + IWORK( I ) = 0 + PERT = EPS*( ABS( SIGMA )+ABS( W( I ) ) ) + WERR( I ) = WERR( I ) + PERT + IF( WGAP( I ).LT.PERT ) + $ WGAP( I ) = PERT + 10 CONTINUE + DO 20 I = I1, ILAST + IF( I.EQ.1 ) THEN + GAP = WGAP( I ) + ELSE IF( I.EQ.N ) THEN + GAP = WGAP( I-1 ) + ELSE + GAP = MIN( WGAP( I-1 ), WGAP( I ) ) + END IF + IF( WERR( I ).LT.THRESH*GAP ) THEN + NCNVRG = NCNVRG + 1 + IWORK( I ) = 1 + IF( I1.EQ.I ) + $ I1 = I1 + 1 + ELSE + I2 = I + END IF + 20 CONTINUE +* +* Initialize the unconverged intervals. +* + I = I1 + NINT = 0 + RIGHT = ZERO + 30 CONTINUE + IF( I.LE.I2 ) THEN + IF( IWORK( I ).EQ.0 ) THEN + DELTA = EPS + LEFT = W( I ) - WERR( I ) +* +* Do while( CNT(LEFT).GT.I-1 ) +* + 40 CONTINUE + IF( I.GT.I1 .AND. LEFT.LE.RIGHT ) THEN + LEFT = RIGHT + CNT = I - 1 + ELSE + S = -LEFT + CNT = 0 + DO 50 J = 1, N - 1 + TMP = D( J ) + S + S = S*( LD( J ) / TMP )*L( J ) - LEFT + IF( TMP.LT.ZERO ) + $ CNT = CNT + 1 + 50 CONTINUE + TMP = D( N ) + S + IF( TMP.LT.ZERO ) + $ CNT = CNT + 1 + IF( CNT.GT.I-1 ) THEN + DELTA = TWO*DELTA + LEFT = LEFT - ( ABS( SIGMA )+ABS( LEFT ) )*DELTA + GO TO 40 + END IF + END IF + DELTA = EPS + RIGHT = W( I ) + WERR( I ) +* +* Do while( CNT(RIGHT).LT.I ) +* + 60 CONTINUE + S = -RIGHT + CNT = 0 + DO 70 J = 1, N - 1 + TMP = D( J ) + S + S = S*( LD( J ) / TMP )*L( J ) - RIGHT + IF( TMP.LT.ZERO ) + $ CNT = CNT + 1 + 70 CONTINUE + TMP = D( N ) + S + IF( TMP.LT.ZERO ) + $ CNT = CNT + 1 + IF( CNT.LT.I ) THEN + DELTA = TWO*DELTA + RIGHT = RIGHT + ( ABS( SIGMA )+ABS( RIGHT ) )*DELTA + GO TO 60 + END IF + WERR( I ) = LEFT + W( I ) = RIGHT + IWORK( N+I ) = CNT + NINT = NINT + 1 + I = CNT + 1 + ELSE + I = I + 1 + END IF + GO TO 30 + END IF +* +* While( NCNVRG.LT.NEIG ) +* + INITI1 = I1 + INITI2 = I2 + 80 CONTINUE + IF( NCNVRG.LT.NEIG ) THEN + OLNINT = NINT + I = I1 + DO 100 K = 1, OLNINT + NRIGHT = IWORK( N+I ) + IF( IWORK( I ).EQ.0 ) THEN + MID = HALF*( WERR( I )+W( I ) ) + S = -MID + CNT = 0 + DO 90 J = 1, N - 1 + TMP = D( J ) + S + S = S*( LD( J ) / TMP )*L( J ) - MID + IF( TMP.LT.ZERO ) + $ CNT = CNT + 1 + 90 CONTINUE + TMP = D( N ) + S + IF( TMP.LT.ZERO ) + $ CNT = CNT + 1 + CNT = MAX( I-1, MIN( NRIGHT, CNT ) ) + IF( I.EQ.NRIGHT ) THEN + IF( I.EQ.IFIRST ) THEN + GAP = WERR( I+1 ) - W( I ) + ELSE IF( I.EQ.ILAST ) THEN + GAP = WERR( I ) - W( I-1 ) + ELSE + GAP = MIN( WERR( I+1 )-W( I ), WERR( I )-W( I-1 ) ) + END IF + WIDTH = W( I ) - MID + IF( WIDTH.LT.THRESH*GAP ) THEN + NCNVRG = NCNVRG + 1 + IWORK( I ) = 1 + IF( I1.EQ.I ) THEN + I1 = I1 + 1 + NINT = NINT - 1 + END IF + END IF + END IF + IF( IWORK( I ).EQ.0 ) + $ I2 = K + IF( CNT.EQ.I-1 ) THEN + WERR( I ) = MID + ELSE IF( CNT.EQ.NRIGHT ) THEN + W( I ) = MID + ELSE + IWORK( N+I ) = CNT + NINT = NINT + 1 + WERR( CNT+1 ) = MID + W( CNT+1 ) = W( I ) + W( I ) = MID + I = CNT + 1 + IWORK( N+I ) = NRIGHT + END IF + END IF + I = NRIGHT + 1 + 100 CONTINUE + NINT = NINT - OLNINT + I2 + GO TO 80 + END IF + DO 110 I = INITI1, INITI2 + W( I ) = HALF*( WERR( I )+W( I ) ) + WERR( I ) = W( I ) - WERR( I ) + 110 CONTINUE +* + RETURN +* +* End of SLARRB +* + END diff --git a/costa/native/external/lapack/slarre.f b/costa/native/external/lapack/slarre.f new file mode 100644 index 000000000..d47b772d4 --- /dev/null +++ b/costa/native/external/lapack/slarre.f @@ -0,0 +1,318 @@ + SUBROUTINE SLARRE( N, D, E, TOL, NSPLIT, ISPLIT, M, W, WOFF, + $ GERSCH, WORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, M, N, NSPLIT + REAL TOL +* .. +* .. Array Arguments .. + INTEGER ISPLIT( * ) + REAL D( * ), E( * ), GERSCH( * ), W( * ), WOFF( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* Given the tridiagonal matrix T, SLARRE sets "small" off-diagonal +* elements to zero, and for each unreduced block T_i, it finds +* (i) the numbers sigma_i +* (ii) the base T_i - sigma_i I = L_i D_i L_i^T representations and +* (iii) eigenvalues of each L_i D_i L_i^T. +* The representations and eigenvalues found are then used by +* SSTEGR to compute the eigenvectors of a symmetric tridiagonal +* matrix. Currently, the base representations are limited to being +* positive or negative definite, and the eigenvalues of the definite +* matrices are found by the dqds algorithm (subroutine SLASQ2). As +* an added benefit, SLARRE also outputs the n Gerschgorin +* intervals for each L_i D_i L_i^T. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix. +* +* D (input/output) REAL array, dimension (N) +* On entry, the n diagonal elements of the tridiagonal +* matrix T. +* On exit, the n diagonal elements of the diagonal +* matrices D_i. +* +* E (input/output) REAL array, dimension (N) +* On entry, the (n-1) subdiagonal elements of the tridiagonal +* matrix T; E(N) need not be set. +* On exit, the subdiagonal elements of the unit bidiagonal +* matrices L_i. +* +* TOL (input) REAL +* The threshold for splitting. If on input |E(i)| < TOL, then +* the matrix T is split into smaller blocks. +* +* NSPLIT (input) INTEGER +* The number of blocks T splits into. 1 <= NSPLIT <= N. +* +* ISPLIT (output) INTEGER array, dimension (2*N) +* The splitting points, at which T breaks up into submatrices. +* The first submatrix consists of rows/columns 1 to ISPLIT(1), +* the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), +* etc., and the NSPLIT-th consists of rows/columns +* ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. +* +* M (output) INTEGER +* The total number of eigenvalues (of all the L_i D_i L_i^T) +* found. +* +* W (output) REAL array, dimension (N) +* The first M elements contain the eigenvalues. The +* eigenvalues of each of the blocks, L_i D_i L_i^T, are +* sorted in ascending order. +* +* WOFF (output) REAL array, dimension (N) +* The NSPLIT base points sigma_i. +* +* GERSCH (output) REAL array, dimension (2*N) +* The n Gerschgorin intervals. +* +* WORK (input) REAL array, dimension (4*N???) +* Workspace. +* +* INFO (output) INTEGER +* Output error code from SLASQ2 +* +* Further Details +* =============== +* +* Based on contributions by +* Inderjit Dhillon, IBM Almaden, USA +* Osni Marques, LBNL/NERSC, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO, FOUR, FOURTH + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0, + $ FOUR = 4.0E0, FOURTH = ONE / FOUR ) +* .. +* .. Local Scalars .. + INTEGER CNT, I, IBEGIN, IEND, IN, J, JBLK, MAXCNT + REAL DELTA, EPS, GL, GU, NRM, OFFD, S, SGNDEF, + $ SIGMA, TAU, TMP1, WIDTH +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLASQ2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + EPS = SLAMCH( 'Precision' ) +* +* Compute Splitting Points +* + NSPLIT = 1 + DO 10 I = 1, N - 1 + IF( ABS( E( I ) ).LE.TOL ) THEN + ISPLIT( NSPLIT ) = I + NSPLIT = NSPLIT + 1 + END IF + 10 CONTINUE + ISPLIT( NSPLIT ) = N +* + IBEGIN = 1 + DO 170 JBLK = 1, NSPLIT + IEND = ISPLIT( JBLK ) + IF( IBEGIN.EQ.IEND ) THEN + W( IBEGIN ) = D( IBEGIN ) + WOFF( JBLK ) = ZERO + IBEGIN = IEND + 1 + GO TO 170 + END IF + IN = IEND - IBEGIN + 1 +* +* Form the n Gerschgorin intervals +* + GL = D( IBEGIN ) - ABS( E( IBEGIN ) ) + GU = D( IBEGIN ) + ABS( E( IBEGIN ) ) + GERSCH( 2*IBEGIN-1 ) = GL + GERSCH( 2*IBEGIN ) = GU + GERSCH( 2*IEND-1 ) = D( IEND ) - ABS( E( IEND-1 ) ) + GERSCH( 2*IEND ) = D( IEND ) + ABS( E( IEND-1 ) ) + GL = MIN( GERSCH( 2*IEND-1 ), GL ) + GU = MAX( GERSCH( 2*IEND ), GU ) + DO 20 I = IBEGIN + 1, IEND - 1 + OFFD = ABS( E( I-1 ) ) + ABS( E( I ) ) + GERSCH( 2*I-1 ) = D( I ) - OFFD + GL = MIN( GERSCH( 2*I-1 ), GL ) + GERSCH( 2*I ) = D( I ) + OFFD + GU = MAX( GERSCH( 2*I ), GU ) + 20 CONTINUE + NRM = MAX( ABS( GL ), ABS( GU ) ) +* +* Find the number SIGMA where the base representation +* T - sigma I = L D L^T is to be formed. +* + WIDTH = GU - GL + DO 30 I = IBEGIN, IEND - 1 + WORK( I ) = E( I )*E( I ) + 30 CONTINUE + DO 50 J = 1, 2 + IF( J.EQ.1 ) THEN + TAU = GL + FOURTH*WIDTH + ELSE + TAU = GU - FOURTH*WIDTH + END IF + TMP1 = D( IBEGIN ) - TAU + IF( TMP1.LT.ZERO ) THEN + CNT = 1 + ELSE + CNT = 0 + END IF + DO 40 I = IBEGIN + 1, IEND + TMP1 = D( I ) - TAU - WORK( I-1 ) / TMP1 + IF( TMP1.LT.ZERO ) + $ CNT = CNT + 1 + 40 CONTINUE + IF( CNT.EQ.0 ) THEN + GL = TAU + ELSE IF( CNT.EQ.IN ) THEN + GU = TAU + END IF + IF( J.EQ.1 ) THEN + MAXCNT = CNT + SIGMA = GL + SGNDEF = ONE + ELSE + IF( IN-CNT.GT.MAXCNT ) THEN + SIGMA = GU + SGNDEF = -ONE + END IF + END IF + 50 CONTINUE +* +* Find the base L D L^T representation +* + WORK( 3*IN ) = ONE + DELTA = EPS + TAU = SGNDEF*NRM + 60 CONTINUE + SIGMA = SIGMA - DELTA*TAU + WORK( 1 ) = D( IBEGIN ) - SIGMA + J = IBEGIN + DO 70 I = 1, IN - 1 + WORK( 2*IN+I ) = ONE / WORK( 2*I-1 ) + TMP1 = E( J )*WORK( 2*IN+I ) + WORK( 2*I+1 ) = ( D( J+1 )-SIGMA ) - TMP1*E( J ) + WORK( 2*I ) = TMP1 + J = J + 1 + 70 CONTINUE + DO 80 I = IN, 1, -1 + TMP1 = SGNDEF*WORK( 2*I-1 ) + IF( TMP1.LT.ZERO .OR. WORK( 2*IN+I ).EQ.ZERO .OR. .NOT. + $ ( TMP1.GT.ZERO .OR. TMP1.LT.ONE ) ) THEN + DELTA = TWO*DELTA + GO TO 60 + END IF + J = J - 1 + 80 CONTINUE +* + J = IBEGIN + D( IBEGIN ) = WORK( 1 ) + WORK( 1 ) = ABS( WORK( 1 ) ) + DO 90 I = 1, IN - 1 + TMP1 = E( J ) + E( J ) = WORK( 2*I ) + WORK( 2*I ) = ABS( TMP1*WORK( 2*I ) ) + J = J + 1 + D( J ) = WORK( 2*I+1 ) + WORK( 2*I+1 ) = ABS( WORK( 2*I+1 ) ) + 90 CONTINUE +* + CALL SLASQ2( IN, WORK, INFO ) +* + TAU = SGNDEF*WORK( IN ) + WORK( 3*IN ) = ONE + DELTA = TWO*EPS + 100 CONTINUE + TAU = TAU*( ONE-DELTA ) +* + S = -TAU + J = IBEGIN + DO 110 I = 1, IN - 1 + WORK( I ) = D( J ) + S + WORK( 2*IN+I ) = ONE / WORK( I ) +* WORK( N+I ) = ( E( I ) * D( I ) ) / WORK( I ) + WORK( IN+I ) = ( E( J )*D( J ) )*WORK( 2*IN+I ) + S = S*WORK( IN+I )*E( J ) - TAU + J = J + 1 + 110 CONTINUE + WORK( IN ) = D( IEND ) + S +* +* Checking to see if all the diagonal elements of the new +* L D L^T representation have the same sign +* + DO 120 I = IN, 1, -1 + TMP1 = SGNDEF*WORK( I ) + IF( TMP1.LT.ZERO .OR. WORK( 2*IN+I ).EQ.ZERO .OR. .NOT. + $ ( TMP1.GT.ZERO .OR. TMP1.LT.ONE ) ) THEN + DELTA = TWO*DELTA + GO TO 100 + END IF + 120 CONTINUE +* + SIGMA = SIGMA + TAU + CALL SCOPY( IN, WORK, 1, D( IBEGIN ), 1 ) + CALL SCOPY( IN-1, WORK( IN+1 ), 1, E( IBEGIN ), 1 ) + WOFF( JBLK ) = SIGMA +* +* Update the n Gerschgorin intervals +* + DO 130 I = IBEGIN, IEND + GERSCH( 2*I-1 ) = GERSCH( 2*I-1 ) - SIGMA + GERSCH( 2*I ) = GERSCH( 2*I ) - SIGMA + 130 CONTINUE +* +* Compute the eigenvalues of L D L^T. +* + J = IBEGIN + DO 140 I = 1, IN - 1 + WORK( 2*I-1 ) = ABS( D( J ) ) + WORK( 2*I ) = E( J )*E( J )*WORK( 2*I-1 ) + J = J + 1 + 140 CONTINUE + WORK( 2*IN-1 ) = ABS( D( IEND ) ) +* + CALL SLASQ2( IN, WORK, INFO ) +* + J = IBEGIN + IF( SGNDEF.GT.ZERO ) THEN + DO 150 I = 1, IN + W( J ) = WORK( IN-I+1 ) + J = J + 1 + 150 CONTINUE + ELSE + DO 160 I = 1, IN + W( J ) = -WORK( I ) + J = J + 1 + 160 CONTINUE + END IF + IBEGIN = IEND + 1 + 170 CONTINUE + M = N +* + RETURN +* +* End of SLARRE +* + END diff --git a/costa/native/external/lapack/slarrf.f b/costa/native/external/lapack/slarrf.f new file mode 100644 index 000000000..eb08d9b4f --- /dev/null +++ b/costa/native/external/lapack/slarrf.f @@ -0,0 +1,150 @@ + SUBROUTINE SLARRF( N, D, L, LD, LLD, IFIRST, ILAST, W, DPLUS, + $ LPLUS, WORK, IWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER IFIRST, ILAST, INFO, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL D( * ), DPLUS( * ), L( * ), LD( * ), LLD( * ), + $ LPLUS( * ), W( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* Given the initial representation L D L^T and its cluster of close +* eigenvalues (in a relative measure), W( IFIRST ), W( IFIRST+1 ), ... +* W( ILAST ), SLARRF finds a new relatively robust representation +* L D L^T - SIGMA I = L(+) D(+) L(+)^T such that at least one of the +* eigenvalues of L(+) D(+) L(+)^T is relatively isolated. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix. +* +* D (input) REAL array, dimension (N) +* The n diagonal elements of the diagonal matrix D. +* +* L (input) REAL array, dimension (N-1) +* The (n-1) subdiagonal elements of the unit bidiagonal +* matrix L. +* +* LD (input) REAL array, dimension (N-1) +* The n-1 elements L(i)*D(i). +* +* LLD (input) REAL array, dimension (N-1) +* The n-1 elements L(i)*L(i)*D(i). +* +* IFIRST (input) INTEGER +* The index of the first eigenvalue in the cluster. +* +* ILAST (input) INTEGER +* The index of the last eigenvalue in the cluster. +* +* W (input/output) REAL array, dimension (N) +* On input, the eigenvalues of L D L^T in ascending order. +* W( IFIRST ) through W( ILAST ) form the cluster of relatively +* close eigenalues. +* On output, W( IFIRST ) thru' W( ILAST ) are estimates of the +* corresponding eigenvalues of L(+) D(+) L(+)^T. +* +* SIGMA (input) REAL +* The shift used to form L(+) D(+) L(+)^T. +* +* DPLUS (output) REAL array, dimension (N) +* The n diagonal elements of the diagonal matrix D(+). +* +* LPLUS (output) REAL array, dimension (N) +* The first (n-1) elements of LPLUS contain the subdiagonal +* elements of the unit bidiagonal matrix L(+). LPLUS( N ) is +* set to SIGMA. +* +* WORK (input) REAL array, dimension (???) +* Workspace. +* +* Further Details +* =============== +* +* Based on contributions by +* Inderjit Dhillon, IBM Almaden, USA +* Osni Marques, LBNL/NERSC, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, TWO + PARAMETER ( ZERO = 0.0E0, TWO = 2.0E0 ) +* .. +* .. Local Scalars .. + INTEGER I + REAL DELTA, EPS, S, SIGMA +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* + INFO = 0 + EPS = SLAMCH( 'Precision' ) + IF( IFIRST.EQ.1 ) THEN + SIGMA = W( IFIRST ) + ELSE IF( ILAST.EQ.N ) THEN + SIGMA = W( ILAST ) + ELSE + INFO = 1 + RETURN + END IF +* +* Compute the new relatively robust representation (RRR) +* + DELTA = TWO*EPS + 10 CONTINUE + IF( IFIRST.EQ.1 ) THEN + SIGMA = SIGMA - ABS( SIGMA )*DELTA + ELSE + SIGMA = SIGMA + ABS( SIGMA )*DELTA + END IF + S = -SIGMA + DO 20 I = 1, N - 1 + DPLUS( I ) = D( I ) + S + LPLUS( I ) = LD( I ) / DPLUS( I ) + S = S*LPLUS( I )*L( I ) - SIGMA + 20 CONTINUE + DPLUS( N ) = D( N ) + S + IF( IFIRST.EQ.1 ) THEN + DO 30 I = 1, N + IF( DPLUS( I ).LT.ZERO ) THEN + DELTA = TWO*DELTA + GO TO 10 + END IF + 30 CONTINUE + ELSE + DO 40 I = 1, N + IF( DPLUS( I ).GT.ZERO ) THEN + DELTA = TWO*DELTA + GO TO 10 + END IF + 40 CONTINUE + END IF + DO 50 I = IFIRST, ILAST + W( I ) = W( I ) - SIGMA + 50 CONTINUE + LPLUS( N ) = SIGMA +* + RETURN +* +* End of SLARRF +* + END diff --git a/costa/native/external/lapack/slarrv.f b/costa/native/external/lapack/slarrv.f new file mode 100644 index 000000000..74d782320 --- /dev/null +++ b/costa/native/external/lapack/slarrv.f @@ -0,0 +1,417 @@ + SUBROUTINE SLARRV( N, D, L, ISPLIT, M, W, IBLOCK, GERSCH, TOL, Z, + $ LDZ, ISUPPZ, WORK, IWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDZ, M, N + REAL TOL +* .. +* .. Array Arguments .. + INTEGER IBLOCK( * ), ISPLIT( * ), ISUPPZ( * ), + $ IWORK( * ) + REAL D( * ), GERSCH( * ), L( * ), W( * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* SLARRV computes the eigenvectors of the tridiagonal matrix +* T = L D L^T given L, D and the eigenvalues of L D L^T. +* The input eigenvalues should have high relative accuracy with +* respect to the entries of L and D. The desired accuracy of the +* output can be specified by the input parameter TOL. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix. N >= 0. +* +* D (input/output) REAL array, dimension (N) +* On entry, the n diagonal elements of the diagonal matrix D. +* On exit, D may be overwritten. +* +* L (input/output) REAL array, dimension (N-1) +* On entry, the (n-1) subdiagonal elements of the unit +* bidiagonal matrix L in elements 1 to N-1 of L. L(N) need +* not be set. On exit, L is overwritten. +* +* ISPLIT (input) INTEGER array, dimension (N) +* The splitting points, at which T breaks up into submatrices. +* The first submatrix consists of rows/columns 1 to +* ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 +* through ISPLIT( 2 ), etc. +* +* TOL (input) REAL +* The absolute error tolerance for the +* eigenvalues/eigenvectors. +* Errors in the input eigenvalues must be bounded by TOL. +* The eigenvectors output have residual norms +* bounded by TOL, and the dot products between different +* eigenvectors are bounded by TOL. TOL must be at least +* N*EPS*|T|, where EPS is the machine precision and |T| is +* the 1-norm of the tridiagonal matrix. +* +* M (input) INTEGER +* The total number of eigenvalues found. 0 <= M <= N. +* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +* +* W (input) REAL array, dimension (N) +* The first M elements of W contain the eigenvalues for +* which eigenvectors are to be computed. The eigenvalues +* should be grouped by split-off block and ordered from +* smallest to largest within the block ( The output array +* W from SLARRE is expected here ). +* Errors in W must be bounded by TOL (see above). +* +* IBLOCK (input) INTEGER array, dimension (N) +* The submatrix indices associated with the corresponding +* eigenvalues in W; IBLOCK(i)=1 if eigenvalue W(i) belongs to +* the first submatrix from the top, =2 if W(i) belongs to +* the second submatrix, etc. +* +* Z (output) REAL array, dimension (LDZ, max(1,M) ) +* If JOBZ = 'V', then if INFO = 0, the first M columns of Z +* contain the orthonormal eigenvectors of the matrix T +* corresponding to the selected eigenvalues, with the i-th +* column of Z holding the eigenvector associated with W(i). +* If JOBZ = 'N', then Z is not referenced. +* Note: the user must ensure that at least max(1,M) columns are +* supplied in the array Z; if RANGE = 'V', the exact value of M +* is not known in advance and an upper bound must be used. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* ISUPPZ (output) INTEGER ARRAY, dimension ( 2*max(1,M) ) +* The support of the eigenvectors in Z, i.e., the indices +* indicating the nonzero elements in Z. The i-th eigenvector +* is nonzero only in elements ISUPPZ( 2*i-1 ) through +* ISUPPZ( 2*i ). +* +* WORK (workspace) REAL array, dimension (13*N) +* +* IWORK (workspace) INTEGER array, dimension (6*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = 1, internal error in SLARRB +* if INFO = 2, internal error in SSTEIN +* +* Further Details +* =============== +* +* Based on contributions by +* Inderjit Dhillon, IBM Almaden, USA +* Osni Marques, LBNL/NERSC, USA +* +* ===================================================================== +* +* .. Parameters .. + INTEGER MGSSIZ + PARAMETER ( MGSSIZ = 20 ) + REAL ZERO, ONE, FOUR + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, FOUR = 4.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL MGSCLS + INTEGER I, IBEGIN, IEND, IINDC1, IINDC2, IINDR, IINDWK, + $ IINFO, IM, IN, INDERR, INDGAP, INDLD, INDLLD, + $ INDWRK, ITER, ITMP1, ITMP2, J, JBLK, K, KTOT, + $ LSBDPT, MAXITR, NCLUS, NDEPTH, NDONE, NEWCLS, + $ NEWFRS, NEWFTT, NEWLST, NEWSIZ, NSPLIT, OLDCLS, + $ OLDFST, OLDIEN, OLDLST, OLDNCL, P, Q, + $ TEMP( 1 ) + REAL EPS, GAP, LAMBDA, MGSTOL, MINGMA, MINRGP, + $ NRMINV, RELGAP, RELTOL, RESID, RQCORR, SIGMA, + $ TMP1, ZTZ +* .. +* .. External Functions .. + REAL SDOT, SLAMCH, SNRM2 + EXTERNAL SDOT, SLAMCH, SNRM2 +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SLAR1V, SLARRB, SLARRF, SLASET, + $ SSCAL, SSTEIN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INDERR = N + 1 + INDLD = 2*N + INDLLD = 3*N + INDGAP = 4*N + INDWRK = 5*N + 1 +* + IINDR = N + IINDC1 = 2*N + IINDC2 = 3*N + IINDWK = 4*N + 1 +* + EPS = SLAMCH( 'Precision' ) +* + DO 10 I = 1, 2*N + IWORK( I ) = 0 + 10 CONTINUE + DO 20 I = 1, M + WORK( INDERR+I-1 ) = EPS*ABS( W( I ) ) + 20 CONTINUE + CALL SLASET( 'Full', N, N, ZERO, ZERO, Z, LDZ ) + MGSTOL = 5.0E0*EPS +* + NSPLIT = IBLOCK( M ) + IBEGIN = 1 + DO 170 JBLK = 1, NSPLIT + IEND = ISPLIT( JBLK ) +* +* Find the eigenvectors of the submatrix indexed IBEGIN +* through IEND. +* + IF( IBEGIN.EQ.IEND ) THEN + Z( IBEGIN, IBEGIN ) = ONE + ISUPPZ( 2*IBEGIN-1 ) = IBEGIN + ISUPPZ( 2*IBEGIN ) = IBEGIN + IBEGIN = IEND + 1 + GO TO 170 + END IF + OLDIEN = IBEGIN - 1 + IN = IEND - OLDIEN + RELTOL = MIN( 1.0E-2, ONE / REAL( IN ) ) + IM = IN + CALL SCOPY( IM, W( IBEGIN ), 1, WORK, 1 ) + DO 30 I = 1, IN - 1 + WORK( INDGAP+I ) = WORK( I+1 ) - WORK( I ) + 30 CONTINUE + WORK( INDGAP+IN ) = MAX( ABS( WORK( IN ) ), EPS ) + NDONE = 0 +* + NDEPTH = 0 + LSBDPT = 1 + NCLUS = 1 + IWORK( IINDC1+1 ) = 1 + IWORK( IINDC1+2 ) = IN +* +* While( NDONE.LT.IM ) do +* + 40 CONTINUE + IF( NDONE.LT.IM ) THEN + OLDNCL = NCLUS + NCLUS = 0 + LSBDPT = 1 - LSBDPT + DO 150 I = 1, OLDNCL + IF( LSBDPT.EQ.0 ) THEN + OLDCLS = IINDC1 + NEWCLS = IINDC2 + ELSE + OLDCLS = IINDC2 + NEWCLS = IINDC1 + END IF +* +* If NDEPTH > 1, retrieve the relatively robust +* representation (RRR) and perform limited bisection +* (if necessary) to get approximate eigenvalues. +* + J = OLDCLS + 2*I + OLDFST = IWORK( J-1 ) + OLDLST = IWORK( J ) + IF( NDEPTH.GT.0 ) THEN + J = OLDIEN + OLDFST + CALL SCOPY( IN, Z( IBEGIN, J ), 1, D( IBEGIN ), 1 ) + CALL SCOPY( IN, Z( IBEGIN, J+1 ), 1, L( IBEGIN ), 1 ) + SIGMA = L( IEND ) + END IF + K = IBEGIN + DO 50 J = 1, IN - 1 + WORK( INDLD+J ) = D( K )*L( K ) + WORK( INDLLD+J ) = WORK( INDLD+J )*L( K ) + K = K + 1 + 50 CONTINUE + IF( NDEPTH.GT.0 ) THEN + CALL SLARRB( IN, D( IBEGIN ), L( IBEGIN ), + $ WORK( INDLD+1 ), WORK( INDLLD+1 ), + $ OLDFST, OLDLST, SIGMA, RELTOL, WORK, + $ WORK( INDGAP+1 ), WORK( INDERR ), + $ WORK( INDWRK ), IWORK( IINDWK ), IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = 1 + RETURN + END IF + END IF +* +* Classify eigenvalues of the current representation (RRR) +* as (i) isolated, (ii) loosely clustered or (iii) tightly +* clustered +* + NEWFRS = OLDFST + DO 140 J = OLDFST, OLDLST + IF( J.EQ.OLDLST .OR. WORK( INDGAP+J ).GE.RELTOL* + $ ABS( WORK( J ) ) ) THEN + NEWLST = J + ELSE +* +* continue (to the next loop) +* + RELGAP = WORK( INDGAP+J ) / ABS( WORK( J ) ) + IF( J.EQ.NEWFRS ) THEN + MINRGP = RELGAP + ELSE + MINRGP = MIN( MINRGP, RELGAP ) + END IF + GO TO 140 + END IF + NEWSIZ = NEWLST - NEWFRS + 1 + MAXITR = 10 + NEWFTT = OLDIEN + NEWFRS + IF( NEWSIZ.GT.1 ) THEN + MGSCLS = NEWSIZ.LE.MGSSIZ .AND. MINRGP.GE.MGSTOL + IF( .NOT.MGSCLS ) THEN + CALL SLARRF( IN, D( IBEGIN ), L( IBEGIN ), + $ WORK( INDLD+1 ), WORK( INDLLD+1 ), + $ NEWFRS, NEWLST, WORK, + $ Z( IBEGIN, NEWFTT ), + $ Z( IBEGIN, NEWFTT+1 ), + $ WORK( INDWRK ), IWORK( IINDWK ), + $ INFO ) + IF( INFO.EQ.0 ) THEN + NCLUS = NCLUS + 1 + K = NEWCLS + 2*NCLUS + IWORK( K-1 ) = NEWFRS + IWORK( K ) = NEWLST + ELSE + INFO = 0 + IF( MINRGP.GE.MGSTOL ) THEN + MGSCLS = .TRUE. + ELSE +* +* Call SSTEIN to process this tight cluster. +* This happens only if MINRGP <= MGSTOL +* and SLARRF returns INFO = 1. The latter +* means that a new RRR to "break" the +* cluster could not be found. +* + WORK( INDWRK ) = D( IBEGIN ) + DO 60 K = 1, IN - 1 + WORK( INDWRK+K ) = D( IBEGIN+K ) + + $ WORK( INDLLD+K ) + 60 CONTINUE + DO 70 K = 1, NEWSIZ + IWORK( IINDWK+K-1 ) = 1 + 70 CONTINUE + DO 80 K = NEWFRS, NEWLST + ISUPPZ( 2*( IBEGIN+K )-3 ) = 1 + ISUPPZ( 2*( IBEGIN+K )-2 ) = IN + 80 CONTINUE + TEMP( 1 ) = IN + CALL SSTEIN( IN, WORK( INDWRK ), + $ WORK( INDLD+1 ), NEWSIZ, + $ WORK( NEWFRS ), + $ IWORK( IINDWK ), TEMP( 1 ), + $ Z( IBEGIN, NEWFTT ), LDZ, + $ WORK( INDWRK+IN ), + $ IWORK( IINDWK+IN ), + $ IWORK( IINDWK+2*IN ), IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = 2 + RETURN + END IF + NDONE = NDONE + NEWSIZ + END IF + END IF + END IF + ELSE + MGSCLS = .FALSE. + END IF + IF( NEWSIZ.EQ.1 .OR. MGSCLS ) THEN + KTOT = NEWFTT + DO 100 K = NEWFRS, NEWLST + ITER = 0 + 90 CONTINUE + LAMBDA = WORK( K ) + CALL SLAR1V( IN, 1, IN, LAMBDA, D( IBEGIN ), + $ L( IBEGIN ), WORK( INDLD+1 ), + $ WORK( INDLLD+1 ), + $ GERSCH( 2*OLDIEN+1 ), + $ Z( IBEGIN, KTOT ), ZTZ, MINGMA, + $ IWORK( IINDR+KTOT ), + $ ISUPPZ( 2*KTOT-1 ), + $ WORK( INDWRK ) ) + TMP1 = ONE / ZTZ + NRMINV = SQRT( TMP1 ) + RESID = ABS( MINGMA )*NRMINV + RQCORR = MINGMA*TMP1 + IF( K.EQ.IN ) THEN + GAP = WORK( INDGAP+K-1 ) + ELSE IF( K.EQ.1 ) THEN + GAP = WORK( INDGAP+K ) + ELSE + GAP = MIN( WORK( INDGAP+K-1 ), + $ WORK( INDGAP+K ) ) + END IF + ITER = ITER + 1 + IF( RESID.GT.TOL*GAP .AND. ABS( RQCORR ).GT. + $ FOUR*EPS*ABS( LAMBDA ) ) THEN + WORK( K ) = LAMBDA + RQCORR + IF( ITER.LT.MAXITR ) THEN + GO TO 90 + END IF + END IF + IWORK( KTOT ) = 1 + IF( NEWSIZ.EQ.1 ) + $ NDONE = NDONE + 1 + CALL SSCAL( IN, NRMINV, Z( IBEGIN, KTOT ), 1 ) + KTOT = KTOT + 1 + 100 CONTINUE + IF( NEWSIZ.GT.1 ) THEN + ITMP1 = ISUPPZ( 2*NEWFTT-1 ) + ITMP2 = ISUPPZ( 2*NEWFTT ) + KTOT = OLDIEN + NEWLST + DO 120 P = NEWFTT + 1, KTOT + DO 110 Q = NEWFTT, P - 1 + TMP1 = -SDOT( IN, Z( IBEGIN, P ), 1, + $ Z( IBEGIN, Q ), 1 ) + CALL SAXPY( IN, TMP1, Z( IBEGIN, Q ), 1, + $ Z( IBEGIN, P ), 1 ) + 110 CONTINUE + TMP1 = ONE / SNRM2( IN, Z( IBEGIN, P ), 1 ) + CALL SSCAL( IN, TMP1, Z( IBEGIN, P ), 1 ) + ITMP1 = MIN( ITMP1, ISUPPZ( 2*P-1 ) ) + ITMP2 = MAX( ITMP2, ISUPPZ( 2*P ) ) + 120 CONTINUE + DO 130 P = NEWFTT, KTOT + ISUPPZ( 2*P-1 ) = ITMP1 + ISUPPZ( 2*P ) = ITMP2 + 130 CONTINUE + NDONE = NDONE + NEWSIZ + END IF + END IF + NEWFRS = J + 1 + 140 CONTINUE + 150 CONTINUE + NDEPTH = NDEPTH + 1 + GO TO 40 + END IF + J = 2*IBEGIN + DO 160 I = IBEGIN, IEND + ISUPPZ( J-1 ) = ISUPPZ( J-1 ) + OLDIEN + ISUPPZ( J ) = ISUPPZ( J ) + OLDIEN + J = J + 2 + 160 CONTINUE + IBEGIN = IEND + 1 + 170 CONTINUE +* + RETURN +* +* End of SLARRV +* + END diff --git a/costa/native/external/lapack/slartg.f b/costa/native/external/lapack/slartg.f new file mode 100644 index 000000000..8ee53ac6e --- /dev/null +++ b/costa/native/external/lapack/slartg.f @@ -0,0 +1,143 @@ + SUBROUTINE SLARTG( F, G, CS, SN, R ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + REAL CS, F, G, R, SN +* .. +* +* Purpose +* ======= +* +* SLARTG generate a plane rotation so that +* +* [ CS SN ] . [ F ] = [ R ] where CS**2 + SN**2 = 1. +* [ -SN CS ] [ G ] [ 0 ] +* +* This is a slower, more accurate version of the BLAS1 routine SROTG, +* with the following other differences: +* F and G are unchanged on return. +* If G=0, then CS=1 and SN=0. +* If F=0 and (G .ne. 0), then CS=0 and SN=1 without doing any +* floating point operations (saves work in SBDSQR when +* there are zeros on the diagonal). +* +* If F exceeds G in magnitude, CS will be positive. +* +* Arguments +* ========= +* +* F (input) REAL +* The first component of vector to be rotated. +* +* G (input) REAL +* The second component of vector to be rotated. +* +* CS (output) REAL +* The cosine of the rotation. +* +* SN (output) REAL +* The sine of the rotation. +* +* R (output) REAL +* The nonzero component of the rotated vector. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) + REAL ONE + PARAMETER ( ONE = 1.0E0 ) + REAL TWO + PARAMETER ( TWO = 2.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL FIRST + INTEGER COUNT, I + REAL EPS, F1, G1, SAFMIN, SAFMN2, SAFMX2, SCALE +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, LOG, MAX, SQRT +* .. +* .. Save statement .. + SAVE FIRST, SAFMX2, SAFMIN, SAFMN2 +* .. +* .. Data statements .. + DATA FIRST / .TRUE. / +* .. +* .. Executable Statements .. +* + IF( FIRST ) THEN + FIRST = .FALSE. + SAFMIN = SLAMCH( 'S' ) + EPS = SLAMCH( 'E' ) + SAFMN2 = SLAMCH( 'B' )**INT( LOG( SAFMIN / EPS ) / + $ LOG( SLAMCH( 'B' ) ) / TWO ) + SAFMX2 = ONE / SAFMN2 + END IF + IF( G.EQ.ZERO ) THEN + CS = ONE + SN = ZERO + R = F + ELSE IF( F.EQ.ZERO ) THEN + CS = ZERO + SN = ONE + R = G + ELSE + F1 = F + G1 = G + SCALE = MAX( ABS( F1 ), ABS( G1 ) ) + IF( SCALE.GE.SAFMX2 ) THEN + COUNT = 0 + 10 CONTINUE + COUNT = COUNT + 1 + F1 = F1*SAFMN2 + G1 = G1*SAFMN2 + SCALE = MAX( ABS( F1 ), ABS( G1 ) ) + IF( SCALE.GE.SAFMX2 ) + $ GO TO 10 + R = SQRT( F1**2+G1**2 ) + CS = F1 / R + SN = G1 / R + DO 20 I = 1, COUNT + R = R*SAFMX2 + 20 CONTINUE + ELSE IF( SCALE.LE.SAFMN2 ) THEN + COUNT = 0 + 30 CONTINUE + COUNT = COUNT + 1 + F1 = F1*SAFMX2 + G1 = G1*SAFMX2 + SCALE = MAX( ABS( F1 ), ABS( G1 ) ) + IF( SCALE.LE.SAFMN2 ) + $ GO TO 30 + R = SQRT( F1**2+G1**2 ) + CS = F1 / R + SN = G1 / R + DO 40 I = 1, COUNT + R = R*SAFMN2 + 40 CONTINUE + ELSE + R = SQRT( F1**2+G1**2 ) + CS = F1 / R + SN = G1 / R + END IF + IF( ABS( F ).GT.ABS( G ) .AND. CS.LT.ZERO ) THEN + CS = -CS + SN = -SN + R = -R + END IF + END IF + RETURN +* +* End of SLARTG +* + END diff --git a/costa/native/external/lapack/slartv.f b/costa/native/external/lapack/slartv.f new file mode 100644 index 000000000..e0420090a --- /dev/null +++ b/costa/native/external/lapack/slartv.f @@ -0,0 +1,77 @@ + SUBROUTINE SLARTV( N, X, INCX, Y, INCY, C, S, INCC ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + INTEGER INCC, INCX, INCY, N +* .. +* .. Array Arguments .. + REAL C( * ), S( * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* SLARTV applies a vector of real plane rotations to elements of the +* real vectors x and y. For i = 1,2,...,n +* +* ( x(i) ) := ( c(i) s(i) ) ( x(i) ) +* ( y(i) ) ( -s(i) c(i) ) ( y(i) ) +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of plane rotations to be applied. +* +* X (input/output) REAL array, +* dimension (1+(N-1)*INCX) +* The vector x. +* +* INCX (input) INTEGER +* The increment between elements of X. INCX > 0. +* +* Y (input/output) REAL array, +* dimension (1+(N-1)*INCY) +* The vector y. +* +* INCY (input) INTEGER +* The increment between elements of Y. INCY > 0. +* +* C (input) REAL array, dimension (1+(N-1)*INCC) +* The cosines of the plane rotations. +* +* S (input) REAL array, dimension (1+(N-1)*INCC) +* The sines of the plane rotations. +* +* INCC (input) INTEGER +* The increment between elements of C and S. INCC > 0. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IC, IX, IY + REAL XI, YI +* .. +* .. Executable Statements .. +* + IX = 1 + IY = 1 + IC = 1 + DO 10 I = 1, N + XI = X( IX ) + YI = Y( IY ) + X( IX ) = C( IC )*XI + S( IC )*YI + Y( IY ) = C( IC )*YI - S( IC )*XI + IX = IX + INCX + IY = IY + INCY + IC = IC + INCC + 10 CONTINUE + RETURN +* +* End of SLARTV +* + END diff --git a/costa/native/external/lapack/slaruv.f b/costa/native/external/lapack/slaruv.f new file mode 100644 index 000000000..d3d1fc416 --- /dev/null +++ b/costa/native/external/lapack/slaruv.f @@ -0,0 +1,368 @@ + SUBROUTINE SLARUV( ISEED, N, X ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + INTEGER N +* .. +* .. Array Arguments .. + INTEGER ISEED( 4 ) + REAL X( N ) +* .. +* +* Purpose +* ======= +* +* SLARUV returns a vector of n random real numbers from a uniform (0,1) +* distribution (n <= 128). +* +* This is an auxiliary routine called by SLARNV and CLARNV. +* +* Arguments +* ========= +* +* ISEED (input/output) INTEGER array, dimension (4) +* On entry, the seed of the random number generator; the array +* elements must be between 0 and 4095, and ISEED(4) must be +* odd. +* On exit, the seed is updated. +* +* N (input) INTEGER +* The number of random numbers to be generated. N <= 128. +* +* X (output) REAL array, dimension (N) +* The generated random numbers. +* +* Further Details +* =============== +* +* This routine uses a multiplicative congruential method with modulus +* 2**48 and multiplier 33952834046453 (see G.S.Fishman, +* 'Multiplicative congruential random number generators with modulus +* 2**b: an exhaustive analysis for b = 32 and a partial analysis for +* b = 48', Math. Comp. 189, pp 331-344, 1990). +* +* 48-bit integers are stored in 4 integer array elements with 12 bits +* per element. Hence the routine is portable across machines with +* integers of 32 bits or more. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E0 ) + INTEGER LV, IPW2 + REAL R + PARAMETER ( LV = 128, IPW2 = 4096, R = ONE / IPW2 ) +* .. +* .. Local Scalars .. + INTEGER I, I1, I2, I3, I4, IT1, IT2, IT3, IT4, J +* .. +* .. Local Arrays .. + INTEGER MM( LV, 4 ) +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN, MOD, REAL +* .. +* .. Data statements .. + DATA ( MM( 1, J ), J = 1, 4 ) / 494, 322, 2508, + $ 2549 / + DATA ( MM( 2, J ), J = 1, 4 ) / 2637, 789, 3754, + $ 1145 / + DATA ( MM( 3, J ), J = 1, 4 ) / 255, 1440, 1766, + $ 2253 / + DATA ( MM( 4, J ), J = 1, 4 ) / 2008, 752, 3572, + $ 305 / + DATA ( MM( 5, J ), J = 1, 4 ) / 1253, 2859, 2893, + $ 3301 / + DATA ( MM( 6, J ), J = 1, 4 ) / 3344, 123, 307, + $ 1065 / + DATA ( MM( 7, J ), J = 1, 4 ) / 4084, 1848, 1297, + $ 3133 / + DATA ( MM( 8, J ), J = 1, 4 ) / 1739, 643, 3966, + $ 2913 / + DATA ( MM( 9, J ), J = 1, 4 ) / 3143, 2405, 758, + $ 3285 / + DATA ( MM( 10, J ), J = 1, 4 ) / 3468, 2638, 2598, + $ 1241 / + DATA ( MM( 11, J ), J = 1, 4 ) / 688, 2344, 3406, + $ 1197 / + DATA ( MM( 12, J ), J = 1, 4 ) / 1657, 46, 2922, + $ 3729 / + DATA ( MM( 13, J ), J = 1, 4 ) / 1238, 3814, 1038, + $ 2501 / + DATA ( MM( 14, J ), J = 1, 4 ) / 3166, 913, 2934, + $ 1673 / + DATA ( MM( 15, J ), J = 1, 4 ) / 1292, 3649, 2091, + $ 541 / + DATA ( MM( 16, J ), J = 1, 4 ) / 3422, 339, 2451, + $ 2753 / + DATA ( MM( 17, J ), J = 1, 4 ) / 1270, 3808, 1580, + $ 949 / + DATA ( MM( 18, J ), J = 1, 4 ) / 2016, 822, 1958, + $ 2361 / + DATA ( MM( 19, J ), J = 1, 4 ) / 154, 2832, 2055, + $ 1165 / + DATA ( MM( 20, J ), J = 1, 4 ) / 2862, 3078, 1507, + $ 4081 / + DATA ( MM( 21, J ), J = 1, 4 ) / 697, 3633, 1078, + $ 2725 / + DATA ( MM( 22, J ), J = 1, 4 ) / 1706, 2970, 3273, + $ 3305 / + DATA ( MM( 23, J ), J = 1, 4 ) / 491, 637, 17, + $ 3069 / + DATA ( MM( 24, J ), J = 1, 4 ) / 931, 2249, 854, + $ 3617 / + DATA ( MM( 25, J ), J = 1, 4 ) / 1444, 2081, 2916, + $ 3733 / + DATA ( MM( 26, J ), J = 1, 4 ) / 444, 4019, 3971, + $ 409 / + DATA ( MM( 27, J ), J = 1, 4 ) / 3577, 1478, 2889, + $ 2157 / + DATA ( MM( 28, J ), J = 1, 4 ) / 3944, 242, 3831, + $ 1361 / + DATA ( MM( 29, J ), J = 1, 4 ) / 2184, 481, 2621, + $ 3973 / + DATA ( MM( 30, J ), J = 1, 4 ) / 1661, 2075, 1541, + $ 1865 / + DATA ( MM( 31, J ), J = 1, 4 ) / 3482, 4058, 893, + $ 2525 / + DATA ( MM( 32, J ), J = 1, 4 ) / 657, 622, 736, + $ 1409 / + DATA ( MM( 33, J ), J = 1, 4 ) / 3023, 3376, 3992, + $ 3445 / + DATA ( MM( 34, J ), J = 1, 4 ) / 3618, 812, 787, + $ 3577 / + DATA ( MM( 35, J ), J = 1, 4 ) / 1267, 234, 2125, + $ 77 / + DATA ( MM( 36, J ), J = 1, 4 ) / 1828, 641, 2364, + $ 3761 / + DATA ( MM( 37, J ), J = 1, 4 ) / 164, 4005, 2460, + $ 2149 / + DATA ( MM( 38, J ), J = 1, 4 ) / 3798, 1122, 257, + $ 1449 / + DATA ( MM( 39, J ), J = 1, 4 ) / 3087, 3135, 1574, + $ 3005 / + DATA ( MM( 40, J ), J = 1, 4 ) / 2400, 2640, 3912, + $ 225 / + DATA ( MM( 41, J ), J = 1, 4 ) / 2870, 2302, 1216, + $ 85 / + DATA ( MM( 42, J ), J = 1, 4 ) / 3876, 40, 3248, + $ 3673 / + DATA ( MM( 43, J ), J = 1, 4 ) / 1905, 1832, 3401, + $ 3117 / + DATA ( MM( 44, J ), J = 1, 4 ) / 1593, 2247, 2124, + $ 3089 / + DATA ( MM( 45, J ), J = 1, 4 ) / 1797, 2034, 2762, + $ 1349 / + DATA ( MM( 46, J ), J = 1, 4 ) / 1234, 2637, 149, + $ 2057 / + DATA ( MM( 47, J ), J = 1, 4 ) / 3460, 1287, 2245, + $ 413 / + DATA ( MM( 48, J ), J = 1, 4 ) / 328, 1691, 166, + $ 65 / + DATA ( MM( 49, J ), J = 1, 4 ) / 2861, 496, 466, + $ 1845 / + DATA ( MM( 50, J ), J = 1, 4 ) / 1950, 1597, 4018, + $ 697 / + DATA ( MM( 51, J ), J = 1, 4 ) / 617, 2394, 1399, + $ 3085 / + DATA ( MM( 52, J ), J = 1, 4 ) / 2070, 2584, 190, + $ 3441 / + DATA ( MM( 53, J ), J = 1, 4 ) / 3331, 1843, 2879, + $ 1573 / + DATA ( MM( 54, J ), J = 1, 4 ) / 769, 336, 153, + $ 3689 / + DATA ( MM( 55, J ), J = 1, 4 ) / 1558, 1472, 2320, + $ 2941 / + DATA ( MM( 56, J ), J = 1, 4 ) / 2412, 2407, 18, + $ 929 / + DATA ( MM( 57, J ), J = 1, 4 ) / 2800, 433, 712, + $ 533 / + DATA ( MM( 58, J ), J = 1, 4 ) / 189, 2096, 2159, + $ 2841 / + DATA ( MM( 59, J ), J = 1, 4 ) / 287, 1761, 2318, + $ 4077 / + DATA ( MM( 60, J ), J = 1, 4 ) / 2045, 2810, 2091, + $ 721 / + DATA ( MM( 61, J ), J = 1, 4 ) / 1227, 566, 3443, + $ 2821 / + DATA ( MM( 62, J ), J = 1, 4 ) / 2838, 442, 1510, + $ 2249 / + DATA ( MM( 63, J ), J = 1, 4 ) / 209, 41, 449, + $ 2397 / + DATA ( MM( 64, J ), J = 1, 4 ) / 2770, 1238, 1956, + $ 2817 / + DATA ( MM( 65, J ), J = 1, 4 ) / 3654, 1086, 2201, + $ 245 / + DATA ( MM( 66, J ), J = 1, 4 ) / 3993, 603, 3137, + $ 1913 / + DATA ( MM( 67, J ), J = 1, 4 ) / 192, 840, 3399, + $ 1997 / + DATA ( MM( 68, J ), J = 1, 4 ) / 2253, 3168, 1321, + $ 3121 / + DATA ( MM( 69, J ), J = 1, 4 ) / 3491, 1499, 2271, + $ 997 / + DATA ( MM( 70, J ), J = 1, 4 ) / 2889, 1084, 3667, + $ 1833 / + DATA ( MM( 71, J ), J = 1, 4 ) / 2857, 3438, 2703, + $ 2877 / + DATA ( MM( 72, J ), J = 1, 4 ) / 2094, 2408, 629, + $ 1633 / + DATA ( MM( 73, J ), J = 1, 4 ) / 1818, 1589, 2365, + $ 981 / + DATA ( MM( 74, J ), J = 1, 4 ) / 688, 2391, 2431, + $ 2009 / + DATA ( MM( 75, J ), J = 1, 4 ) / 1407, 288, 1113, + $ 941 / + DATA ( MM( 76, J ), J = 1, 4 ) / 634, 26, 3922, + $ 2449 / + DATA ( MM( 77, J ), J = 1, 4 ) / 3231, 512, 2554, + $ 197 / + DATA ( MM( 78, J ), J = 1, 4 ) / 815, 1456, 184, + $ 2441 / + DATA ( MM( 79, J ), J = 1, 4 ) / 3524, 171, 2099, + $ 285 / + DATA ( MM( 80, J ), J = 1, 4 ) / 1914, 1677, 3228, + $ 1473 / + DATA ( MM( 81, J ), J = 1, 4 ) / 516, 2657, 4012, + $ 2741 / + DATA ( MM( 82, J ), J = 1, 4 ) / 164, 2270, 1921, + $ 3129 / + DATA ( MM( 83, J ), J = 1, 4 ) / 303, 2587, 3452, + $ 909 / + DATA ( MM( 84, J ), J = 1, 4 ) / 2144, 2961, 3901, + $ 2801 / + DATA ( MM( 85, J ), J = 1, 4 ) / 3480, 1970, 572, + $ 421 / + DATA ( MM( 86, J ), J = 1, 4 ) / 119, 1817, 3309, + $ 4073 / + DATA ( MM( 87, J ), J = 1, 4 ) / 3357, 676, 3171, + $ 2813 / + DATA ( MM( 88, J ), J = 1, 4 ) / 837, 1410, 817, + $ 2337 / + DATA ( MM( 89, J ), J = 1, 4 ) / 2826, 3723, 3039, + $ 1429 / + DATA ( MM( 90, J ), J = 1, 4 ) / 2332, 2803, 1696, + $ 1177 / + DATA ( MM( 91, J ), J = 1, 4 ) / 2089, 3185, 1256, + $ 1901 / + DATA ( MM( 92, J ), J = 1, 4 ) / 3780, 184, 3715, + $ 81 / + DATA ( MM( 93, J ), J = 1, 4 ) / 1700, 663, 2077, + $ 1669 / + DATA ( MM( 94, J ), J = 1, 4 ) / 3712, 499, 3019, + $ 2633 / + DATA ( MM( 95, J ), J = 1, 4 ) / 150, 3784, 1497, + $ 2269 / + DATA ( MM( 96, J ), J = 1, 4 ) / 2000, 1631, 1101, + $ 129 / + DATA ( MM( 97, J ), J = 1, 4 ) / 3375, 1925, 717, + $ 1141 / + DATA ( MM( 98, J ), J = 1, 4 ) / 1621, 3912, 51, + $ 249 / + DATA ( MM( 99, J ), J = 1, 4 ) / 3090, 1398, 981, + $ 3917 / + DATA ( MM( 100, J ), J = 1, 4 ) / 3765, 1349, 1978, + $ 2481 / + DATA ( MM( 101, J ), J = 1, 4 ) / 1149, 1441, 1813, + $ 3941 / + DATA ( MM( 102, J ), J = 1, 4 ) / 3146, 2224, 3881, + $ 2217 / + DATA ( MM( 103, J ), J = 1, 4 ) / 33, 2411, 76, + $ 2749 / + DATA ( MM( 104, J ), J = 1, 4 ) / 3082, 1907, 3846, + $ 3041 / + DATA ( MM( 105, J ), J = 1, 4 ) / 2741, 3192, 3694, + $ 1877 / + DATA ( MM( 106, J ), J = 1, 4 ) / 359, 2786, 1682, + $ 345 / + DATA ( MM( 107, J ), J = 1, 4 ) / 3316, 382, 124, + $ 2861 / + DATA ( MM( 108, J ), J = 1, 4 ) / 1749, 37, 1660, + $ 1809 / + DATA ( MM( 109, J ), J = 1, 4 ) / 185, 759, 3997, + $ 3141 / + DATA ( MM( 110, J ), J = 1, 4 ) / 2784, 2948, 479, + $ 2825 / + DATA ( MM( 111, J ), J = 1, 4 ) / 2202, 1862, 1141, + $ 157 / + DATA ( MM( 112, J ), J = 1, 4 ) / 2199, 3802, 886, + $ 2881 / + DATA ( MM( 113, J ), J = 1, 4 ) / 1364, 2423, 3514, + $ 3637 / + DATA ( MM( 114, J ), J = 1, 4 ) / 1244, 2051, 1301, + $ 1465 / + DATA ( MM( 115, J ), J = 1, 4 ) / 2020, 2295, 3604, + $ 2829 / + DATA ( MM( 116, J ), J = 1, 4 ) / 3160, 1332, 1888, + $ 2161 / + DATA ( MM( 117, J ), J = 1, 4 ) / 2785, 1832, 1836, + $ 3365 / + DATA ( MM( 118, J ), J = 1, 4 ) / 2772, 2405, 1990, + $ 361 / + DATA ( MM( 119, J ), J = 1, 4 ) / 1217, 3638, 2058, + $ 2685 / + DATA ( MM( 120, J ), J = 1, 4 ) / 1822, 3661, 692, + $ 3745 / + DATA ( MM( 121, J ), J = 1, 4 ) / 1245, 327, 1194, + $ 2325 / + DATA ( MM( 122, J ), J = 1, 4 ) / 2252, 3660, 20, + $ 3609 / + DATA ( MM( 123, J ), J = 1, 4 ) / 3904, 716, 3285, + $ 3821 / + DATA ( MM( 124, J ), J = 1, 4 ) / 2774, 1842, 2046, + $ 3537 / + DATA ( MM( 125, J ), J = 1, 4 ) / 997, 3987, 2107, + $ 517 / + DATA ( MM( 126, J ), J = 1, 4 ) / 2573, 1368, 3508, + $ 3017 / + DATA ( MM( 127, J ), J = 1, 4 ) / 1148, 1848, 3525, + $ 2141 / + DATA ( MM( 128, J ), J = 1, 4 ) / 545, 2366, 3801, + $ 1537 / +* .. +* .. Executable Statements .. +* + I1 = ISEED( 1 ) + I2 = ISEED( 2 ) + I3 = ISEED( 3 ) + I4 = ISEED( 4 ) +* + DO 10 I = 1, MIN( N, LV ) +* +* Multiply the seed by i-th power of the multiplier modulo 2**48 +* + IT4 = I4*MM( I, 4 ) + IT3 = IT4 / IPW2 + IT4 = IT4 - IPW2*IT3 + IT3 = IT3 + I3*MM( I, 4 ) + I4*MM( I, 3 ) + IT2 = IT3 / IPW2 + IT3 = IT3 - IPW2*IT2 + IT2 = IT2 + I2*MM( I, 4 ) + I3*MM( I, 3 ) + I4*MM( I, 2 ) + IT1 = IT2 / IPW2 + IT2 = IT2 - IPW2*IT1 + IT1 = IT1 + I1*MM( I, 4 ) + I2*MM( I, 3 ) + I3*MM( I, 2 ) + + $ I4*MM( I, 1 ) + IT1 = MOD( IT1, IPW2 ) +* +* Convert 48-bit integer to a real number in the interval (0,1) +* + X( I ) = R*( REAL( IT1 )+R*( REAL( IT2 )+R*( REAL( IT3 )+R* + $ REAL( IT4 ) ) ) ) + 10 CONTINUE +* +* Return final value of seed +* + ISEED( 1 ) = IT1 + ISEED( 2 ) = IT2 + ISEED( 3 ) = IT3 + ISEED( 4 ) = IT4 + RETURN +* +* End of SLARUV +* + END diff --git a/costa/native/external/lapack/slarz.f b/costa/native/external/lapack/slarz.f new file mode 100644 index 000000000..de91e468f --- /dev/null +++ b/costa/native/external/lapack/slarz.f @@ -0,0 +1,153 @@ + SUBROUTINE SLARZ( SIDE, M, N, L, V, INCV, TAU, C, LDC, WORK ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, L, LDC, M, N + REAL TAU +* .. +* .. Array Arguments .. + REAL C( LDC, * ), V( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SLARZ applies a real elementary reflector H to a real M-by-N +* matrix C, from either the left or the right. H is represented in the +* form +* +* H = I - tau * v * v' +* +* where tau is a real scalar and v is a real vector. +* +* If tau = 0, then H is taken to be the unit matrix. +* +* +* H is a product of k elementary reflectors as returned by STZRZF. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': form H * C +* = 'R': form C * H +* +* M (input) INTEGER +* The number of rows of the matrix C. +* +* N (input) INTEGER +* The number of columns of the matrix C. +* +* L (input) INTEGER +* The number of entries of the vector V containing +* the meaningful part of the Householder vectors. +* If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. +* +* V (input) REAL array, dimension (1+(L-1)*abs(INCV)) +* The vector v in the representation of H as returned by +* STZRZF. V is not used if TAU = 0. +* +* INCV (input) INTEGER +* The increment between elements of v. INCV <> 0. +* +* TAU (input) REAL +* The value tau in the representation of H. +* +* C (input/output) REAL array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by the matrix H * C if SIDE = 'L', +* or C * H if SIDE = 'R'. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) REAL array, dimension +* (N) if SIDE = 'L' +* or (M) if SIDE = 'R' +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SGEMV, SGER +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C +* + IF( TAU.NE.ZERO ) THEN +* +* w( 1:n ) = C( 1, 1:n ) +* + CALL SCOPY( N, C, LDC, WORK, 1 ) +* +* w( 1:n ) = w( 1:n ) + C( m-l+1:m, 1:n )' * v( 1:l ) +* + CALL SGEMV( 'Transpose', L, N, ONE, C( M-L+1, 1 ), LDC, V, + $ INCV, ONE, WORK, 1 ) +* +* C( 1, 1:n ) = C( 1, 1:n ) - tau * w( 1:n ) +* + CALL SAXPY( N, -TAU, WORK, 1, C, LDC ) +* +* C( m-l+1:m, 1:n ) = C( m-l+1:m, 1:n ) - ... +* tau * v( 1:l ) * w( 1:n )' +* + CALL SGER( L, N, -TAU, V, INCV, WORK, 1, C( M-L+1, 1 ), + $ LDC ) + END IF +* + ELSE +* +* Form C * H +* + IF( TAU.NE.ZERO ) THEN +* +* w( 1:m ) = C( 1:m, 1 ) +* + CALL SCOPY( M, C, 1, WORK, 1 ) +* +* w( 1:m ) = w( 1:m ) + C( 1:m, n-l+1:n, 1:n ) * v( 1:l ) +* + CALL SGEMV( 'No transpose', M, L, ONE, C( 1, N-L+1 ), LDC, + $ V, INCV, ONE, WORK, 1 ) +* +* C( 1:m, 1 ) = C( 1:m, 1 ) - tau * w( 1:m ) +* + CALL SAXPY( M, -TAU, WORK, 1, C, 1 ) +* +* C( 1:m, n-l+1:n ) = C( 1:m, n-l+1:n ) - ... +* tau * w( 1:m ) * v( 1:l )' +* + CALL SGER( M, L, -TAU, WORK, 1, V, INCV, C( 1, N-L+1 ), + $ LDC ) +* + END IF +* + END IF +* + RETURN +* +* End of SLARZ +* + END diff --git a/costa/native/external/lapack/slarzb.f b/costa/native/external/lapack/slarzb.f new file mode 100644 index 000000000..94b3aa0d7 --- /dev/null +++ b/costa/native/external/lapack/slarzb.f @@ -0,0 +1,221 @@ + SUBROUTINE SLARZB( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, V, + $ LDV, T, LDT, C, LDC, WORK, LDWORK ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* December 1, 1999 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, SIDE, STOREV, TRANS + INTEGER K, L, LDC, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. + REAL C( LDC, * ), T( LDT, * ), V( LDV, * ), + $ WORK( LDWORK, * ) +* .. +* +* Purpose +* ======= +* +* SLARZB applies a real block reflector H or its transpose H**T to +* a real distributed M-by-N C from the left or the right. +* +* Currently, only STOREV = 'R' and DIRECT = 'B' are supported. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply H or H' from the Left +* = 'R': apply H or H' from the Right +* +* TRANS (input) CHARACTER*1 +* = 'N': apply H (No transpose) +* = 'C': apply H' (Transpose) +* +* DIRECT (input) CHARACTER*1 +* Indicates how H is formed from a product of elementary +* reflectors +* = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet) +* = 'B': H = H(k) . . . H(2) H(1) (Backward) +* +* STOREV (input) CHARACTER*1 +* Indicates how the vectors which define the elementary +* reflectors are stored: +* = 'C': Columnwise (not supported yet) +* = 'R': Rowwise +* +* M (input) INTEGER +* The number of rows of the matrix C. +* +* N (input) INTEGER +* The number of columns of the matrix C. +* +* K (input) INTEGER +* The order of the matrix T (= the number of elementary +* reflectors whose product defines the block reflector). +* +* L (input) INTEGER +* The number of columns of the matrix V containing the +* meaningful part of the Householder reflectors. +* If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. +* +* V (input) REAL array, dimension (LDV,NV). +* If STOREV = 'C', NV = K; if STOREV = 'R', NV = L. +* +* LDV (input) INTEGER +* The leading dimension of the array V. +* If STOREV = 'C', LDV >= L; if STOREV = 'R', LDV >= K. +* +* T (input) REAL array, dimension (LDT,K) +* The triangular K-by-K matrix T in the representation of the +* block reflector. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= K. +* +* C (input/output) REAL array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by H*C or H'*C or C*H or C*H'. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) REAL array, dimension (LDWORK,K) +* +* LDWORK (input) INTEGER +* The leading dimension of the array WORK. +* If SIDE = 'L', LDWORK >= max(1,N); +* if SIDE = 'R', LDWORK >= max(1,M). +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + CHARACTER TRANST + INTEGER I, INFO, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMM, STRMM, XERBLA +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) + $ RETURN +* +* Check for currently supported options +* + INFO = 0 + IF( .NOT.LSAME( DIRECT, 'B' ) ) THEN + INFO = -3 + ELSE IF( .NOT.LSAME( STOREV, 'R' ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLARZB', -INFO ) + RETURN + END IF +* + IF( LSAME( TRANS, 'N' ) ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H' * C +* +* W( 1:n, 1:k ) = C( 1:k, 1:n )' +* + DO 10 J = 1, K + CALL SCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + 10 CONTINUE +* +* W( 1:n, 1:k ) = W( 1:n, 1:k ) + ... +* C( m-l+1:m, 1:n )' * V( 1:k, 1:l )' +* + IF( L.GT.0 ) + $ CALL SGEMM( 'Transpose', 'Transpose', N, K, L, ONE, + $ C( M-L+1, 1 ), LDC, V, LDV, ONE, WORK, LDWORK ) +* +* W( 1:n, 1:k ) = W( 1:n, 1:k ) * T' or W( 1:m, 1:k ) * T +* + CALL STRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K, ONE, T, + $ LDT, WORK, LDWORK ) +* +* C( 1:k, 1:n ) = C( 1:k, 1:n ) - W( 1:n, 1:k )' +* + DO 30 J = 1, N + DO 20 I = 1, K + C( I, J ) = C( I, J ) - WORK( J, I ) + 20 CONTINUE + 30 CONTINUE +* +* C( m-l+1:m, 1:n ) = C( m-l+1:m, 1:n ) - ... +* V( 1:k, 1:l )' * W( 1:n, 1:k )' +* + IF( L.GT.0 ) + $ CALL SGEMM( 'Transpose', 'Transpose', L, N, K, -ONE, V, LDV, + $ WORK, LDWORK, ONE, C( M-L+1, 1 ), LDC ) +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H' +* +* W( 1:m, 1:k ) = C( 1:m, 1:k ) +* + DO 40 J = 1, K + CALL SCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 ) + 40 CONTINUE +* +* W( 1:m, 1:k ) = W( 1:m, 1:k ) + ... +* C( 1:m, n-l+1:n ) * V( 1:k, 1:l )' +* + IF( L.GT.0 ) + $ CALL SGEMM( 'No transpose', 'Transpose', M, K, L, ONE, + $ C( 1, N-L+1 ), LDC, V, LDV, ONE, WORK, LDWORK ) +* +* W( 1:m, 1:k ) = W( 1:m, 1:k ) * T or W( 1:m, 1:k ) * T' +* + CALL STRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K, ONE, T, + $ LDT, WORK, LDWORK ) +* +* C( 1:m, 1:k ) = C( 1:m, 1:k ) - W( 1:m, 1:k ) +* + DO 60 J = 1, K + DO 50 I = 1, M + C( I, J ) = C( I, J ) - WORK( I, J ) + 50 CONTINUE + 60 CONTINUE +* +* C( 1:m, n-l+1:n ) = C( 1:m, n-l+1:n ) - ... +* W( 1:m, 1:k ) * V( 1:k, 1:l ) +* + IF( L.GT.0 ) + $ CALL SGEMM( 'No transpose', 'No transpose', M, L, K, -ONE, + $ WORK, LDWORK, V, LDV, ONE, C( 1, N-L+1 ), LDC ) +* + END IF +* + RETURN +* +* End of SLARZB +* + END diff --git a/costa/native/external/lapack/slarzt.f b/costa/native/external/lapack/slarzt.f new file mode 100644 index 000000000..a57df4fb5 --- /dev/null +++ b/costa/native/external/lapack/slarzt.f @@ -0,0 +1,185 @@ + SUBROUTINE SLARZT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. + REAL T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* Purpose +* ======= +* +* SLARZT forms the triangular factor T of a real block reflector +* H of order > n, which is defined as a product of k elementary +* reflectors. +* +* If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; +* +* If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. +* +* If STOREV = 'C', the vector which defines the elementary reflector +* H(i) is stored in the i-th column of the array V, and +* +* H = I - V * T * V' +* +* If STOREV = 'R', the vector which defines the elementary reflector +* H(i) is stored in the i-th row of the array V, and +* +* H = I - V' * T * V +* +* Currently, only STOREV = 'R' and DIRECT = 'B' are supported. +* +* Arguments +* ========= +* +* DIRECT (input) CHARACTER*1 +* Specifies the order in which the elementary reflectors are +* multiplied to form the block reflector: +* = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet) +* = 'B': H = H(k) . . . H(2) H(1) (Backward) +* +* STOREV (input) CHARACTER*1 +* Specifies how the vectors which define the elementary +* reflectors are stored (see also Further Details): +* = 'C': columnwise (not supported yet) +* = 'R': rowwise +* +* N (input) INTEGER +* The order of the block reflector H. N >= 0. +* +* K (input) INTEGER +* The order of the triangular factor T (= the number of +* elementary reflectors). K >= 1. +* +* V (input/output) REAL array, dimension +* (LDV,K) if STOREV = 'C' +* (LDV,N) if STOREV = 'R' +* The matrix V. See further details. +* +* LDV (input) INTEGER +* The leading dimension of the array V. +* If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. +* +* TAU (input) REAL array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i). +* +* T (output) REAL array, dimension (LDT,K) +* The k by k triangular factor T of the block reflector. +* If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is +* lower triangular. The rest of the array is not used. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= K. +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* The shape of the matrix V and the storage of the vectors which define +* the H(i) is best illustrated by the following example with n = 5 and +* k = 3. The elements equal to 1 are not stored; the corresponding +* array elements are modified but restored on exit. The rest of the +* array is not used. +* +* DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +* +* ______V_____ +* ( v1 v2 v3 ) / \ +* ( v1 v2 v3 ) ( v1 v1 v1 v1 v1 . . . . 1 ) +* V = ( v1 v2 v3 ) ( v2 v2 v2 v2 v2 . . . 1 ) +* ( v1 v2 v3 ) ( v3 v3 v3 v3 v3 . . 1 ) +* ( v1 v2 v3 ) +* . . . +* . . . +* 1 . . +* 1 . +* 1 +* +* DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +* +* ______V_____ +* 1 / \ +* . 1 ( 1 . . . . v1 v1 v1 v1 v1 ) +* . . 1 ( . 1 . . . v2 v2 v2 v2 v2 ) +* . . . ( . . 1 . . v3 v3 v3 v3 v3 ) +* . . . +* ( v1 v2 v3 ) +* ( v1 v2 v3 ) +* V = ( v1 v2 v3 ) +* ( v1 v2 v3 ) +* ( v1 v2 v3 ) +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, J +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, STRMV, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Check for currently supported options +* + INFO = 0 + IF( .NOT.LSAME( DIRECT, 'B' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( STOREV, 'R' ) ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLARZT', -INFO ) + RETURN + END IF +* + DO 20 I = K, 1, -1 + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO 10 J = I, K + T( J, I ) = ZERO + 10 CONTINUE + ELSE +* +* general case +* + IF( I.LT.K ) THEN +* +* T(i+1:k,i) = - tau(i) * V(i+1:k,1:n) * V(i,1:n)' +* + CALL SGEMV( 'No transpose', K-I, N, -TAU( I ), + $ V( I+1, 1 ), LDV, V( I, 1 ), LDV, ZERO, + $ T( I+1, I ), 1 ) +* +* T(i+1:k,i) = T(i+1:k,i+1:k) * T(i+1:k,i) +* + CALL STRMV( 'Lower', 'No transpose', 'Non-unit', K-I, + $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) + END IF + T( I, I ) = TAU( I ) + END IF + 20 CONTINUE + RETURN +* +* End of SLARZT +* + END diff --git a/costa/native/external/lapack/slas2.f b/costa/native/external/lapack/slas2.f new file mode 100644 index 000000000..475ed8915 --- /dev/null +++ b/costa/native/external/lapack/slas2.f @@ -0,0 +1,122 @@ + SUBROUTINE SLAS2( F, G, H, SSMIN, SSMAX ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + REAL F, G, H, SSMAX, SSMIN +* .. +* +* Purpose +* ======= +* +* SLAS2 computes the singular values of the 2-by-2 matrix +* [ F G ] +* [ 0 H ]. +* On return, SSMIN is the smaller singular value and SSMAX is the +* larger singular value. +* +* Arguments +* ========= +* +* F (input) REAL +* The (1,1) element of the 2-by-2 matrix. +* +* G (input) REAL +* The (1,2) element of the 2-by-2 matrix. +* +* H (input) REAL +* The (2,2) element of the 2-by-2 matrix. +* +* SSMIN (output) REAL +* The smaller singular value. +* +* SSMAX (output) REAL +* The larger singular value. +* +* Further Details +* =============== +* +* Barring over/underflow, all output quantities are correct to within +* a few units in the last place (ulps), even in the absence of a guard +* digit in addition/subtraction. +* +* In IEEE arithmetic, the code works correctly if one matrix element is +* infinite. +* +* Overflow will not occur unless the largest singular value itself +* overflows, or is within a few ulps of overflow. (On machines with +* partial overflow, like the Cray, overflow may occur if the largest +* singular value is within a factor of 2 of overflow.) +* +* Underflow is harmless if underflow is gradual. Otherwise, results +* may correspond to a matrix modified by perturbations of size near +* the underflow threshold. +* +* ==================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) + REAL ONE + PARAMETER ( ONE = 1.0E0 ) + REAL TWO + PARAMETER ( TWO = 2.0E0 ) +* .. +* .. Local Scalars .. + REAL AS, AT, AU, C, FA, FHMN, FHMX, GA, HA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + FA = ABS( F ) + GA = ABS( G ) + HA = ABS( H ) + FHMN = MIN( FA, HA ) + FHMX = MAX( FA, HA ) + IF( FHMN.EQ.ZERO ) THEN + SSMIN = ZERO + IF( FHMX.EQ.ZERO ) THEN + SSMAX = GA + ELSE + SSMAX = MAX( FHMX, GA )*SQRT( ONE+ + $ ( MIN( FHMX, GA ) / MAX( FHMX, GA ) )**2 ) + END IF + ELSE + IF( GA.LT.FHMX ) THEN + AS = ONE + FHMN / FHMX + AT = ( FHMX-FHMN ) / FHMX + AU = ( GA / FHMX )**2 + C = TWO / ( SQRT( AS*AS+AU )+SQRT( AT*AT+AU ) ) + SSMIN = FHMN*C + SSMAX = FHMX / C + ELSE + AU = FHMX / GA + IF( AU.EQ.ZERO ) THEN +* +* Avoid possible harmful underflow if exponent range +* asymmetric (true SSMIN may not underflow even if +* AU underflows) +* + SSMIN = ( FHMN*FHMX ) / GA + SSMAX = GA + ELSE + AS = ONE + FHMN / FHMX + AT = ( FHMX-FHMN ) / FHMX + C = ONE / ( SQRT( ONE+( AS*AU )**2 )+ + $ SQRT( ONE+( AT*AU )**2 ) ) + SSMIN = ( FHMN*C )*AU + SSMIN = SSMIN + SSMIN + SSMAX = GA / ( C+C ) + END IF + END IF + END IF + RETURN +* +* End of SLAS2 +* + END diff --git a/costa/native/external/lapack/slascl.f b/costa/native/external/lapack/slascl.f new file mode 100644 index 000000000..b1dc0b1b2 --- /dev/null +++ b/costa/native/external/lapack/slascl.f @@ -0,0 +1,268 @@ + SUBROUTINE SLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER TYPE + INTEGER INFO, KL, KU, LDA, M, N + REAL CFROM, CTO +* .. +* .. Array Arguments .. + REAL A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* SLASCL multiplies the M by N real matrix A by the real scalar +* CTO/CFROM. This is done without over/underflow as long as the final +* result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that +* A may be full, upper triangular, lower triangular, upper Hessenberg, +* or banded. +* +* Arguments +* ========= +* +* TYPE (input) CHARACTER*1 +* TYPE indices the storage type of the input matrix. +* = 'G': A is a full matrix. +* = 'L': A is a lower triangular matrix. +* = 'U': A is an upper triangular matrix. +* = 'H': A is an upper Hessenberg matrix. +* = 'B': A is a symmetric band matrix with lower bandwidth KL +* and upper bandwidth KU and with the only the lower +* half stored. +* = 'Q': A is a symmetric band matrix with lower bandwidth KL +* and upper bandwidth KU and with the only the upper +* half stored. +* = 'Z': A is a band matrix with lower bandwidth KL and upper +* bandwidth KU. +* +* KL (input) INTEGER +* The lower bandwidth of A. Referenced only if TYPE = 'B', +* 'Q' or 'Z'. +* +* KU (input) INTEGER +* The upper bandwidth of A. Referenced only if TYPE = 'B', +* 'Q' or 'Z'. +* +* CFROM (input) REAL +* CTO (input) REAL +* The matrix A is multiplied by CTO/CFROM. A(I,J) is computed +* without over/underflow if the final result CTO*A(I,J)/CFROM +* can be represented without over/underflow. CFROM must be +* nonzero. +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,M) +* The matrix to be multiplied by CTO/CFROM. See TYPE for the +* storage type. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* INFO (output) INTEGER +* 0 - successful exit +* <0 - if INFO = -i, the i-th argument had an illegal value. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL DONE + INTEGER I, ITYPE, J, K1, K2, K3, K4 + REAL BIGNUM, CFROM1, CFROMC, CTO1, CTOC, MUL, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 +* + IF( LSAME( TYPE, 'G' ) ) THEN + ITYPE = 0 + ELSE IF( LSAME( TYPE, 'L' ) ) THEN + ITYPE = 1 + ELSE IF( LSAME( TYPE, 'U' ) ) THEN + ITYPE = 2 + ELSE IF( LSAME( TYPE, 'H' ) ) THEN + ITYPE = 3 + ELSE IF( LSAME( TYPE, 'B' ) ) THEN + ITYPE = 4 + ELSE IF( LSAME( TYPE, 'Q' ) ) THEN + ITYPE = 5 + ELSE IF( LSAME( TYPE, 'Z' ) ) THEN + ITYPE = 6 + ELSE + ITYPE = -1 + END IF +* + IF( ITYPE.EQ.-1 ) THEN + INFO = -1 + ELSE IF( CFROM.EQ.ZERO ) THEN + INFO = -4 + ELSE IF( M.LT.0 ) THEN + INFO = -6 + ELSE IF( N.LT.0 .OR. ( ITYPE.EQ.4 .AND. N.NE.M ) .OR. + $ ( ITYPE.EQ.5 .AND. N.NE.M ) ) THEN + INFO = -7 + ELSE IF( ITYPE.LE.3 .AND. LDA.LT.MAX( 1, M ) ) THEN + INFO = -9 + ELSE IF( ITYPE.GE.4 ) THEN + IF( KL.LT.0 .OR. KL.GT.MAX( M-1, 0 ) ) THEN + INFO = -2 + ELSE IF( KU.LT.0 .OR. KU.GT.MAX( N-1, 0 ) .OR. + $ ( ( ITYPE.EQ.4 .OR. ITYPE.EQ.5 ) .AND. KL.NE.KU ) ) + $ THEN + INFO = -3 + ELSE IF( ( ITYPE.EQ.4 .AND. LDA.LT.KL+1 ) .OR. + $ ( ITYPE.EQ.5 .AND. LDA.LT.KU+1 ) .OR. + $ ( ITYPE.EQ.6 .AND. LDA.LT.2*KL+KU+1 ) ) THEN + INFO = -9 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLASCL', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. M.EQ.0 ) + $ RETURN +* +* Get machine parameters +* + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM +* + CFROMC = CFROM + CTOC = CTO +* + 10 CONTINUE + CFROM1 = CFROMC*SMLNUM + CTO1 = CTOC / BIGNUM + IF( ABS( CFROM1 ).GT.ABS( CTOC ) .AND. CTOC.NE.ZERO ) THEN + MUL = SMLNUM + DONE = .FALSE. + CFROMC = CFROM1 + ELSE IF( ABS( CTO1 ).GT.ABS( CFROMC ) ) THEN + MUL = BIGNUM + DONE = .FALSE. + CTOC = CTO1 + ELSE + MUL = CTOC / CFROMC + DONE = .TRUE. + END IF +* + IF( ITYPE.EQ.0 ) THEN +* +* Full matrix +* + DO 30 J = 1, N + DO 20 I = 1, M + A( I, J ) = A( I, J )*MUL + 20 CONTINUE + 30 CONTINUE +* + ELSE IF( ITYPE.EQ.1 ) THEN +* +* Lower triangular matrix +* + DO 50 J = 1, N + DO 40 I = J, M + A( I, J ) = A( I, J )*MUL + 40 CONTINUE + 50 CONTINUE +* + ELSE IF( ITYPE.EQ.2 ) THEN +* +* Upper triangular matrix +* + DO 70 J = 1, N + DO 60 I = 1, MIN( J, M ) + A( I, J ) = A( I, J )*MUL + 60 CONTINUE + 70 CONTINUE +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* Upper Hessenberg matrix +* + DO 90 J = 1, N + DO 80 I = 1, MIN( J+1, M ) + A( I, J ) = A( I, J )*MUL + 80 CONTINUE + 90 CONTINUE +* + ELSE IF( ITYPE.EQ.4 ) THEN +* +* Lower half of a symmetric band matrix +* + K3 = KL + 1 + K4 = N + 1 + DO 110 J = 1, N + DO 100 I = 1, MIN( K3, K4-J ) + A( I, J ) = A( I, J )*MUL + 100 CONTINUE + 110 CONTINUE +* + ELSE IF( ITYPE.EQ.5 ) THEN +* +* Upper half of a symmetric band matrix +* + K1 = KU + 2 + K3 = KU + 1 + DO 130 J = 1, N + DO 120 I = MAX( K1-J, 1 ), K3 + A( I, J ) = A( I, J )*MUL + 120 CONTINUE + 130 CONTINUE +* + ELSE IF( ITYPE.EQ.6 ) THEN +* +* Band matrix +* + K1 = KL + KU + 2 + K2 = KL + 1 + K3 = 2*KL + KU + 1 + K4 = KL + KU + 1 + M + DO 150 J = 1, N + DO 140 I = MAX( K1-J, K2 ), MIN( K3, K4-J ) + A( I, J ) = A( I, J )*MUL + 140 CONTINUE + 150 CONTINUE +* + END IF +* + IF( .NOT.DONE ) + $ GO TO 10 +* + RETURN +* +* End of SLASCL +* + END diff --git a/costa/native/external/lapack/slasd0.f b/costa/native/external/lapack/slasd0.f new file mode 100644 index 000000000..d39d129b6 --- /dev/null +++ b/costa/native/external/lapack/slasd0.f @@ -0,0 +1,231 @@ + SUBROUTINE SLASD0( N, SQRE, D, E, U, LDU, VT, LDVT, SMLSIZ, IWORK, + $ WORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDU, LDVT, N, SMLSIZ, SQRE +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL D( * ), E( * ), U( LDU, * ), VT( LDVT, * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* Using a divide and conquer approach, SLASD0 computes the singular +* value decomposition (SVD) of a real upper bidiagonal N-by-M +* matrix B with diagonal D and offdiagonal E, where M = N + SQRE. +* The algorithm computes orthogonal matrices U and VT such that +* B = U * S * VT. The singular values S are overwritten on D. +* +* A related subroutine, SLASDA, computes only the singular values, +* and optionally, the singular vectors in compact form. +* +* Arguments +* ========= +* +* N (input) INTEGER +* On entry, the row dimension of the upper bidiagonal matrix. +* This is also the dimension of the main diagonal array D. +* +* SQRE (input) INTEGER +* Specifies the column dimension of the bidiagonal matrix. +* = 0: The bidiagonal matrix has column dimension M = N; +* = 1: The bidiagonal matrix has column dimension M = N+1; +* +* D (input/output) REAL array, dimension (N) +* On entry D contains the main diagonal of the bidiagonal +* matrix. +* On exit D, if INFO = 0, contains its singular values. +* +* E (input) REAL array, dimension (M-1) +* Contains the subdiagonal entries of the bidiagonal matrix. +* On exit, E has been destroyed. +* +* U (output) REAL array, dimension at least (LDQ, N) +* On exit, U contains the left singular vectors. +* +* LDU (input) INTEGER +* On entry, leading dimension of U. +* +* VT (output) REAL array, dimension at least (LDVT, M) +* On exit, VT' contains the right singular vectors. +* +* LDVT (input) INTEGER +* On entry, leading dimension of VT. +* +* SMLSIZ (input) INTEGER +* On entry, maximum size of the subproblems at the +* bottom of the computation tree. +* +* IWORK INTEGER work array. +* Dimension must be at least (8 * N) +* +* WORK REAL work array. +* Dimension must be at least (3 * M**2 + 2 * M) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = 1, an singular value did not converge +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, I1, IC, IDXQ, IDXQC, IM1, INODE, ITEMP, IWK, + $ J, LF, LL, LVL, M, NCC, ND, NDB1, NDIML, NDIMR, + $ NL, NLF, NLP1, NLVL, NR, NRF, NRP1, SQREI + REAL ALPHA, BETA +* .. +* .. External Subroutines .. + EXTERNAL SLASD1, SLASDQ, SLASDT, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN + INFO = -2 + END IF +* + M = N + SQRE +* + IF( LDU.LT.N ) THEN + INFO = -6 + ELSE IF( LDVT.LT.M ) THEN + INFO = -8 + ELSE IF( SMLSIZ.LT.3 ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLASD0', -INFO ) + RETURN + END IF +* +* If the input matrix is too small, call SLASDQ to find the SVD. +* + IF( N.LE.SMLSIZ ) THEN + CALL SLASDQ( 'U', SQRE, N, M, N, 0, D, E, VT, LDVT, U, LDU, U, + $ LDU, WORK, INFO ) + RETURN + END IF +* +* Set up the computation tree. +* + INODE = 1 + NDIML = INODE + N + NDIMR = NDIML + N + IDXQ = NDIMR + N + IWK = IDXQ + N + CALL SLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ), + $ IWORK( NDIMR ), SMLSIZ ) +* +* For the nodes on bottom level of the tree, solve +* their subproblems by SLASDQ. +* + NDB1 = ( ND+1 ) / 2 + NCC = 0 + DO 30 I = NDB1, ND +* +* IC : center row of each node +* NL : number of rows of left subproblem +* NR : number of rows of right subproblem +* NLF: starting row of the left subproblem +* NRF: starting row of the right subproblem +* + I1 = I - 1 + IC = IWORK( INODE+I1 ) + NL = IWORK( NDIML+I1 ) + NLP1 = NL + 1 + NR = IWORK( NDIMR+I1 ) + NRP1 = NR + 1 + NLF = IC - NL + NRF = IC + 1 + SQREI = 1 + CALL SLASDQ( 'U', SQREI, NL, NLP1, NL, NCC, D( NLF ), E( NLF ), + $ VT( NLF, NLF ), LDVT, U( NLF, NLF ), LDU, + $ U( NLF, NLF ), LDU, WORK, INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + ITEMP = IDXQ + NLF - 2 + DO 10 J = 1, NL + IWORK( ITEMP+J ) = J + 10 CONTINUE + IF( I.EQ.ND ) THEN + SQREI = SQRE + ELSE + SQREI = 1 + END IF + NRP1 = NR + SQREI + CALL SLASDQ( 'U', SQREI, NR, NRP1, NR, NCC, D( NRF ), E( NRF ), + $ VT( NRF, NRF ), LDVT, U( NRF, NRF ), LDU, + $ U( NRF, NRF ), LDU, WORK, INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + ITEMP = IDXQ + IC + DO 20 J = 1, NR + IWORK( ITEMP+J-1 ) = J + 20 CONTINUE + 30 CONTINUE +* +* Now conquer each subproblem bottom-up. +* + DO 50 LVL = NLVL, 1, -1 +* +* Find the first node LF and last node LL on the +* current level LVL. +* + IF( LVL.EQ.1 ) THEN + LF = 1 + LL = 1 + ELSE + LF = 2**( LVL-1 ) + LL = 2*LF - 1 + END IF + DO 40 I = LF, LL + IM1 = I - 1 + IC = IWORK( INODE+IM1 ) + NL = IWORK( NDIML+IM1 ) + NR = IWORK( NDIMR+IM1 ) + NLF = IC - NL + IF( ( SQRE.EQ.0 ) .AND. ( I.EQ.LL ) ) THEN + SQREI = SQRE + ELSE + SQREI = 1 + END IF + IDXQC = IDXQ + NLF - 1 + ALPHA = D( IC ) + BETA = E( IC ) + CALL SLASD1( NL, NR, SQREI, D( NLF ), ALPHA, BETA, + $ U( NLF, NLF ), LDU, VT( NLF, NLF ), LDVT, + $ IWORK( IDXQC ), IWORK( IWK ), WORK, INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + 40 CONTINUE + 50 CONTINUE +* + RETURN +* +* End of SLASD0 +* + END diff --git a/costa/native/external/lapack/slasd1.f b/costa/native/external/lapack/slasd1.f new file mode 100644 index 000000000..03418b926 --- /dev/null +++ b/costa/native/external/lapack/slasd1.f @@ -0,0 +1,233 @@ + SUBROUTINE SLASD1( NL, NR, SQRE, D, ALPHA, BETA, U, LDU, VT, LDVT, + $ IDXQ, IWORK, WORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDU, LDVT, NL, NR, SQRE + REAL ALPHA, BETA +* .. +* .. Array Arguments .. + INTEGER IDXQ( * ), IWORK( * ) + REAL D( * ), U( LDU, * ), VT( LDVT, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SLASD1 computes the SVD of an upper bidiagonal N-by-M matrix B, +* where N = NL + NR + 1 and M = N + SQRE. SLASD1 is called from SLASD0. +* +* A related subroutine SLASD7 handles the case in which the singular +* values (and the singular vectors in factored form) are desired. +* +* SLASD1 computes the SVD as follows: +* +* ( D1(in) 0 0 0 ) +* B = U(in) * ( Z1' a Z2' b ) * VT(in) +* ( 0 0 D2(in) 0 ) +* +* = U(out) * ( D(out) 0) * VT(out) +* +* where Z' = (Z1' a Z2' b) = u' VT', and u is a vector of dimension M +* with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros +* elsewhere; and the entry b is empty if SQRE = 0. +* +* The left singular vectors of the original matrix are stored in U, and +* the transpose of the right singular vectors are stored in VT, and the +* singular values are in D. The algorithm consists of three stages: +* +* The first stage consists of deflating the size of the problem +* when there are multiple singular values or when there are zeros in +* the Z vector. For each such occurence the dimension of the +* secular equation problem is reduced by one. This stage is +* performed by the routine SLASD2. +* +* The second stage consists of calculating the updated +* singular values. This is done by finding the square roots of the +* roots of the secular equation via the routine SLASD4 (as called +* by SLASD3). This routine also calculates the singular vectors of +* the current problem. +* +* The final stage consists of computing the updated singular vectors +* directly using the updated singular values. The singular vectors +* for the current problem are multiplied with the singular vectors +* from the overall problem. +* +* Arguments +* ========= +* +* NL (input) INTEGER +* The row dimension of the upper block. NL >= 1. +* +* NR (input) INTEGER +* The row dimension of the lower block. NR >= 1. +* +* SQRE (input) INTEGER +* = 0: the lower block is an NR-by-NR square matrix. +* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. +* +* The bidiagonal matrix has row dimension N = NL + NR + 1, +* and column dimension M = N + SQRE. +* +* D (input/output) REAL array, +* dimension (N = NL+NR+1). +* On entry D(1:NL,1:NL) contains the singular values of the +* upper block; and D(NL+2:N) contains the singular values of +* the lower block. On exit D(1:N) contains the singular values +* of the modified matrix. +* +* ALPHA (input) REAL +* Contains the diagonal element associated with the added row. +* +* BETA (input) REAL +* Contains the off-diagonal element associated with the added +* row. +* +* U (input/output) REAL array, dimension(LDU,N) +* On entry U(1:NL, 1:NL) contains the left singular vectors of +* the upper block; U(NL+2:N, NL+2:N) contains the left singular +* vectors of the lower block. On exit U contains the left +* singular vectors of the bidiagonal matrix. +* +* LDU (input) INTEGER +* The leading dimension of the array U. LDU >= max( 1, N ). +* +* VT (input/output) REAL array, dimension(LDVT,M) +* where M = N + SQRE. +* On entry VT(1:NL+1, 1:NL+1)' contains the right singular +* vectors of the upper block; VT(NL+2:M, NL+2:M)' contains +* the right singular vectors of the lower block. On exit +* VT' contains the right singular vectors of the +* bidiagonal matrix. +* +* LDVT (input) INTEGER +* The leading dimension of the array VT. LDVT >= max( 1, M ). +* +* IDXQ (output) INTEGER array, dimension(N) +* This contains the permutation which will reintegrate the +* subproblem just solved back into sorted order, i.e. +* D( IDXQ( I = 1, N ) ) will be in ascending order. +* +* IWORK (workspace) INTEGER array, dimension( 4 * N ) +* +* WORK (workspace) REAL array, dimension( 3*M**2 + 2*M ) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = 1, an singular value did not converge +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. +* + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER COLTYP, I, IDX, IDXC, IDXP, IQ, ISIGMA, IU2, + $ IVT2, IZ, K, LDQ, LDU2, LDVT2, M, N, N1, N2 + REAL ORGNRM +* .. +* .. External Subroutines .. + EXTERNAL SLAMRG, SLASCL, SLASD2, SLASD3, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( NL.LT.1 ) THEN + INFO = -1 + ELSE IF( NR.LT.1 ) THEN + INFO = -2 + ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLASD1', -INFO ) + RETURN + END IF +* + N = NL + NR + 1 + M = N + SQRE +* +* The following values are for bookkeeping purposes only. They are +* integer pointers which indicate the portion of the workspace +* used by a particular array in SLASD2 and SLASD3. +* + LDU2 = N + LDVT2 = M +* + IZ = 1 + ISIGMA = IZ + M + IU2 = ISIGMA + N + IVT2 = IU2 + LDU2*N + IQ = IVT2 + LDVT2*M +* + IDX = 1 + IDXC = IDX + N + COLTYP = IDXC + N + IDXP = COLTYP + N +* +* Scale. +* + ORGNRM = MAX( ABS( ALPHA ), ABS( BETA ) ) + D( NL+1 ) = ZERO + DO 10 I = 1, N + IF( ABS( D( I ) ).GT.ORGNRM ) THEN + ORGNRM = ABS( D( I ) ) + END IF + 10 CONTINUE + CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, INFO ) + ALPHA = ALPHA / ORGNRM + BETA = BETA / ORGNRM +* +* Deflate singular values. +* + CALL SLASD2( NL, NR, SQRE, K, D, WORK( IZ ), ALPHA, BETA, U, LDU, + $ VT, LDVT, WORK( ISIGMA ), WORK( IU2 ), LDU2, + $ WORK( IVT2 ), LDVT2, IWORK( IDXP ), IWORK( IDX ), + $ IWORK( IDXC ), IDXQ, IWORK( COLTYP ), INFO ) +* +* Solve Secular Equation and update singular vectors. +* + LDQ = K + CALL SLASD3( NL, NR, SQRE, K, D, WORK( IQ ), LDQ, WORK( ISIGMA ), + $ U, LDU, WORK( IU2 ), LDU2, VT, LDVT, WORK( IVT2 ), + $ LDVT2, IWORK( IDXC ), IWORK( COLTYP ), WORK( IZ ), + $ INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF +* +* Unscale. +* + CALL SLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO ) +* +* Prepare the IDXQ sorting permutation. +* + N1 = K + N2 = N - K + CALL SLAMRG( N1, N2, D, 1, -1, IDXQ ) +* + RETURN +* +* End of SLASD1 +* + END diff --git a/costa/native/external/lapack/slasd2.f b/costa/native/external/lapack/slasd2.f new file mode 100644 index 000000000..dd9c10716 --- /dev/null +++ b/costa/native/external/lapack/slasd2.f @@ -0,0 +1,513 @@ + SUBROUTINE SLASD2( NL, NR, SQRE, K, D, Z, ALPHA, BETA, U, LDU, VT, + $ LDVT, DSIGMA, U2, LDU2, VT2, LDVT2, IDXP, IDX, + $ IDXC, IDXQ, COLTYP, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, +* Courant Institute, NAG Ltd., and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDU, LDU2, LDVT, LDVT2, NL, NR, SQRE + REAL ALPHA, BETA +* .. +* .. Array Arguments .. + INTEGER COLTYP( * ), IDX( * ), IDXC( * ), IDXP( * ), + $ IDXQ( * ) + REAL D( * ), DSIGMA( * ), U( LDU, * ), + $ U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ), + $ Z( * ) +* .. +* +* Purpose +* ======= +* +* SLASD2 merges the two sets of singular values together into a single +* sorted set. Then it tries to deflate the size of the problem. +* There are two ways in which deflation can occur: when two or more +* singular values are close together or if there is a tiny entry in the +* Z vector. For each such occurrence the order of the related secular +* equation problem is reduced by one. +* +* SLASD2 is called from SLASD1. +* +* Arguments +* ========= +* +* NL (input) INTEGER +* The row dimension of the upper block. NL >= 1. +* +* NR (input) INTEGER +* The row dimension of the lower block. NR >= 1. +* +* SQRE (input) INTEGER +* = 0: the lower block is an NR-by-NR square matrix. +* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. +* +* The bidiagonal matrix has N = NL + NR + 1 rows and +* M = N + SQRE >= N columns. +* +* K (output) INTEGER +* Contains the dimension of the non-deflated matrix, +* This is the order of the related secular equation. 1 <= K <=N. +* +* D (input/output) REAL array, dimension(N) +* On entry D contains the singular values of the two submatrices +* to be combined. On exit D contains the trailing (N-K) updated +* singular values (those which were deflated) sorted into +* increasing order. +* +* ALPHA (input) REAL +* Contains the diagonal element associated with the added row. +* +* BETA (input) REAL +* Contains the off-diagonal element associated with the added +* row. +* +* U (input/output) REAL array, dimension(LDU,N) +* On entry U contains the left singular vectors of two +* submatrices in the two square blocks with corners at (1,1), +* (NL, NL), and (NL+2, NL+2), (N,N). +* On exit U contains the trailing (N-K) updated left singular +* vectors (those which were deflated) in its last N-K columns. +* +* LDU (input) INTEGER +* The leading dimension of the array U. LDU >= N. +* +* Z (output) REAL array, dimension(N) +* On exit Z contains the updating row vector in the secular +* equation. +* +* DSIGMA (output) REAL array, dimension (N) +* Contains a copy of the diagonal elements (K-1 singular values +* and one zero) in the secular equation. +* +* U2 (output) REAL array, dimension(LDU2,N) +* Contains a copy of the first K-1 left singular vectors which +* will be used by SLASD3 in a matrix multiply (SGEMM) to solve +* for the new left singular vectors. U2 is arranged into four +* blocks. The first block contains a column with 1 at NL+1 and +* zero everywhere else; the second block contains non-zero +* entries only at and above NL; the third contains non-zero +* entries only below NL+1; and the fourth is dense. +* +* LDU2 (input) INTEGER +* The leading dimension of the array U2. LDU2 >= N. +* +* VT (input/output) REAL array, dimension(LDVT,M) +* On entry VT' contains the right singular vectors of two +* submatrices in the two square blocks with corners at (1,1), +* (NL+1, NL+1), and (NL+2, NL+2), (M,M). +* On exit VT' contains the trailing (N-K) updated right singular +* vectors (those which were deflated) in its last N-K columns. +* In case SQRE =1, the last row of VT spans the right null +* space. +* +* LDVT (input) INTEGER +* The leading dimension of the array VT. LDVT >= M. +* +* VT2 (output) REAL array, dimension(LDVT2,N) +* VT2' contains a copy of the first K right singular vectors +* which will be used by SLASD3 in a matrix multiply (SGEMM) to +* solve for the new right singular vectors. VT2 is arranged into +* three blocks. The first block contains a row that corresponds +* to the special 0 diagonal element in SIGMA; the second block +* contains non-zeros only at and before NL +1; the third block +* contains non-zeros only at and after NL +2. +* +* LDVT2 (input) INTEGER +* The leading dimension of the array VT2. LDVT2 >= M. +* +* IDXP (workspace) INTEGER array, dimension(N) +* This will contain the permutation used to place deflated +* values of D at the end of the array. On output IDXP(2:K) +* points to the nondeflated D-values and IDXP(K+1:N) +* points to the deflated singular values. +* +* IDX (workspace) INTEGER array, dimension(N) +* This will contain the permutation used to sort the contents of +* D into ascending order. +* +* IDXC (output) INTEGER array, dimension(N) +* This will contain the permutation used to arrange the columns +* of the deflated U matrix into three groups: the first group +* contains non-zero entries only at and above NL, the second +* contains non-zero entries only below NL+2, and the third is +* dense. +* +* COLTYP (workspace/output) INTEGER array, dimension(N) +* As workspace, this will contain a label which will indicate +* which of the following types a column in the U2 matrix or a +* row in the VT2 matrix is: +* 1 : non-zero in the upper half only +* 2 : non-zero in the lower half only +* 3 : dense +* 4 : deflated +* +* On exit, it is an array of dimension 4, with COLTYP(I) being +* the dimension of the I-th type columns. +* +* IDXQ (input) INTEGER array, dimension(N) +* This contains the permutation which separately sorts the two +* sub-problems in D into ascending order. Note that entries in +* the first hlaf of this permutation must first be moved one +* position backward; and entries in the second half +* must first have NL+1 added to their values. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO, EIGHT + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0, + $ EIGHT = 8.0E+0 ) +* .. +* .. Local Arrays .. + INTEGER CTOT( 4 ), PSM( 4 ) +* .. +* .. Local Scalars .. + INTEGER CT, I, IDXI, IDXJ, IDXJP, J, JP, JPREV, K2, M, + $ N, NLP1, NLP2 + REAL C, EPS, HLFTOL, S, TAU, TOL, Z1 +* .. +* .. External Functions .. + REAL SLAMCH, SLAPY2 + EXTERNAL SLAMCH, SLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLACPY, SLAMRG, SLASET, SROT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( NL.LT.1 ) THEN + INFO = -1 + ELSE IF( NR.LT.1 ) THEN + INFO = -2 + ELSE IF( ( SQRE.NE.1 ) .AND. ( SQRE.NE.0 ) ) THEN + INFO = -3 + END IF +* + N = NL + NR + 1 + M = N + SQRE +* + IF( LDU.LT.N ) THEN + INFO = -10 + ELSE IF( LDVT.LT.M ) THEN + INFO = -12 + ELSE IF( LDU2.LT.N ) THEN + INFO = -15 + ELSE IF( LDVT2.LT.M ) THEN + INFO = -17 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLASD2', -INFO ) + RETURN + END IF +* + NLP1 = NL + 1 + NLP2 = NL + 2 +* +* Generate the first part of the vector Z; and move the singular +* values in the first part of D one position backward. +* + Z1 = ALPHA*VT( NLP1, NLP1 ) + Z( 1 ) = Z1 + DO 10 I = NL, 1, -1 + Z( I+1 ) = ALPHA*VT( I, NLP1 ) + D( I+1 ) = D( I ) + IDXQ( I+1 ) = IDXQ( I ) + 1 + 10 CONTINUE +* +* Generate the second part of the vector Z. +* + DO 20 I = NLP2, M + Z( I ) = BETA*VT( I, NLP2 ) + 20 CONTINUE +* +* Initialize some reference arrays. +* + DO 30 I = 2, NLP1 + COLTYP( I ) = 1 + 30 CONTINUE + DO 40 I = NLP2, N + COLTYP( I ) = 2 + 40 CONTINUE +* +* Sort the singular values into increasing order +* + DO 50 I = NLP2, N + IDXQ( I ) = IDXQ( I ) + NLP1 + 50 CONTINUE +* +* DSIGMA, IDXC, IDXC, and the first column of U2 +* are used as storage space. +* + DO 60 I = 2, N + DSIGMA( I ) = D( IDXQ( I ) ) + U2( I, 1 ) = Z( IDXQ( I ) ) + IDXC( I ) = COLTYP( IDXQ( I ) ) + 60 CONTINUE +* + CALL SLAMRG( NL, NR, DSIGMA( 2 ), 1, 1, IDX( 2 ) ) +* + DO 70 I = 2, N + IDXI = 1 + IDX( I ) + D( I ) = DSIGMA( IDXI ) + Z( I ) = U2( IDXI, 1 ) + COLTYP( I ) = IDXC( IDXI ) + 70 CONTINUE +* +* Calculate the allowable deflation tolerance +* + EPS = SLAMCH( 'Epsilon' ) + TOL = MAX( ABS( ALPHA ), ABS( BETA ) ) + TOL = EIGHT*EPS*MAX( ABS( D( N ) ), TOL ) +* +* There are 2 kinds of deflation -- first a value in the z-vector +* is small, second two (or more) singular values are very close +* together (their difference is small). +* +* If the value in the z-vector is small, we simply permute the +* array so that the corresponding singular value is moved to the +* end. +* +* If two values in the D-vector are close, we perform a two-sided +* rotation designed to make one of the corresponding z-vector +* entries zero, and then permute the array so that the deflated +* singular value is moved to the end. +* +* If there are multiple singular values then the problem deflates. +* Here the number of equal singular values are found. As each equal +* singular value is found, an elementary reflector is computed to +* rotate the corresponding singular subspace so that the +* corresponding components of Z are zero in this new basis. +* + K = 1 + K2 = N + 1 + DO 80 J = 2, N + IF( ABS( Z( J ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + IDXP( K2 ) = J + COLTYP( J ) = 4 + IF( J.EQ.N ) + $ GO TO 120 + ELSE + JPREV = J + GO TO 90 + END IF + 80 CONTINUE + 90 CONTINUE + J = JPREV + 100 CONTINUE + J = J + 1 + IF( J.GT.N ) + $ GO TO 110 + IF( ABS( Z( J ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + IDXP( K2 ) = J + COLTYP( J ) = 4 + ELSE +* +* Check if singular values are close enough to allow deflation. +* + IF( ABS( D( J )-D( JPREV ) ).LE.TOL ) THEN +* +* Deflation is possible. +* + S = Z( JPREV ) + C = Z( J ) +* +* Find sqrt(a**2+b**2) without overflow or +* destructive underflow. +* + TAU = SLAPY2( C, S ) + C = C / TAU + S = -S / TAU + Z( J ) = TAU + Z( JPREV ) = ZERO +* +* Apply back the Givens rotation to the left and right +* singular vector matrices. +* + IDXJP = IDXQ( IDX( JPREV )+1 ) + IDXJ = IDXQ( IDX( J )+1 ) + IF( IDXJP.LE.NLP1 ) THEN + IDXJP = IDXJP - 1 + END IF + IF( IDXJ.LE.NLP1 ) THEN + IDXJ = IDXJ - 1 + END IF + CALL SROT( N, U( 1, IDXJP ), 1, U( 1, IDXJ ), 1, C, S ) + CALL SROT( M, VT( IDXJP, 1 ), LDVT, VT( IDXJ, 1 ), LDVT, C, + $ S ) + IF( COLTYP( J ).NE.COLTYP( JPREV ) ) THEN + COLTYP( J ) = 3 + END IF + COLTYP( JPREV ) = 4 + K2 = K2 - 1 + IDXP( K2 ) = JPREV + JPREV = J + ELSE + K = K + 1 + U2( K, 1 ) = Z( JPREV ) + DSIGMA( K ) = D( JPREV ) + IDXP( K ) = JPREV + JPREV = J + END IF + END IF + GO TO 100 + 110 CONTINUE +* +* Record the last singular value. +* + K = K + 1 + U2( K, 1 ) = Z( JPREV ) + DSIGMA( K ) = D( JPREV ) + IDXP( K ) = JPREV +* + 120 CONTINUE +* +* Count up the total number of the various types of columns, then +* form a permutation which positions the four column types into +* four groups of uniform structure (although one or more of these +* groups may be empty). +* + DO 130 J = 1, 4 + CTOT( J ) = 0 + 130 CONTINUE + DO 140 J = 2, N + CT = COLTYP( J ) + CTOT( CT ) = CTOT( CT ) + 1 + 140 CONTINUE +* +* PSM(*) = Position in SubMatrix (of types 1 through 4) +* + PSM( 1 ) = 2 + PSM( 2 ) = 2 + CTOT( 1 ) + PSM( 3 ) = PSM( 2 ) + CTOT( 2 ) + PSM( 4 ) = PSM( 3 ) + CTOT( 3 ) +* +* Fill out the IDXC array so that the permutation which it induces +* will place all type-1 columns first, all type-2 columns next, +* then all type-3's, and finally all type-4's, starting from the +* second column. This applies similarly to the rows of VT. +* + DO 150 J = 2, N + JP = IDXP( J ) + CT = COLTYP( JP ) + IDXC( PSM( CT ) ) = J + PSM( CT ) = PSM( CT ) + 1 + 150 CONTINUE +* +* Sort the singular values and corresponding singular vectors into +* DSIGMA, U2, and VT2 respectively. The singular values/vectors +* which were not deflated go into the first K slots of DSIGMA, U2, +* and VT2 respectively, while those which were deflated go into the +* last N - K slots, except that the first column/row will be treated +* separately. +* + DO 160 J = 2, N + JP = IDXP( J ) + DSIGMA( J ) = D( JP ) + IDXJ = IDXQ( IDX( IDXP( IDXC( J ) ) )+1 ) + IF( IDXJ.LE.NLP1 ) THEN + IDXJ = IDXJ - 1 + END IF + CALL SCOPY( N, U( 1, IDXJ ), 1, U2( 1, J ), 1 ) + CALL SCOPY( M, VT( IDXJ, 1 ), LDVT, VT2( J, 1 ), LDVT2 ) + 160 CONTINUE +* +* Determine DSIGMA(1), DSIGMA(2) and Z(1) +* + DSIGMA( 1 ) = ZERO + HLFTOL = TOL / TWO + IF( ABS( DSIGMA( 2 ) ).LE.HLFTOL ) + $ DSIGMA( 2 ) = HLFTOL + IF( M.GT.N ) THEN + Z( 1 ) = SLAPY2( Z1, Z( M ) ) + IF( Z( 1 ).LE.TOL ) THEN + C = ONE + S = ZERO + Z( 1 ) = TOL + ELSE + C = Z1 / Z( 1 ) + S = Z( M ) / Z( 1 ) + END IF + ELSE + IF( ABS( Z1 ).LE.TOL ) THEN + Z( 1 ) = TOL + ELSE + Z( 1 ) = Z1 + END IF + END IF +* +* Move the rest of the updating row to Z. +* + CALL SCOPY( K-1, U2( 2, 1 ), 1, Z( 2 ), 1 ) +* +* Determine the first column of U2, the first row of VT2 and the +* last row of VT. +* + CALL SLASET( 'A', N, 1, ZERO, ZERO, U2, LDU2 ) + U2( NLP1, 1 ) = ONE + IF( M.GT.N ) THEN + DO 170 I = 1, NLP1 + VT( M, I ) = -S*VT( NLP1, I ) + VT2( 1, I ) = C*VT( NLP1, I ) + 170 CONTINUE + DO 180 I = NLP2, M + VT2( 1, I ) = S*VT( M, I ) + VT( M, I ) = C*VT( M, I ) + 180 CONTINUE + ELSE + CALL SCOPY( M, VT( NLP1, 1 ), LDVT, VT2( 1, 1 ), LDVT2 ) + END IF + IF( M.GT.N ) THEN + CALL SCOPY( M, VT( M, 1 ), LDVT, VT2( M, 1 ), LDVT2 ) + END IF +* +* The deflated singular values and their corresponding vectors go +* into the back of D, U, and V respectively. +* + IF( N.GT.K ) THEN + CALL SCOPY( N-K, DSIGMA( K+1 ), 1, D( K+1 ), 1 ) + CALL SLACPY( 'A', N, N-K, U2( 1, K+1 ), LDU2, U( 1, K+1 ), + $ LDU ) + CALL SLACPY( 'A', N-K, M, VT2( K+1, 1 ), LDVT2, VT( K+1, 1 ), + $ LDVT ) + END IF +* +* Copy CTOT into COLTYP for referencing in SLASD3. +* + DO 190 J = 1, 4 + COLTYP( J ) = CTOT( J ) + 190 CONTINUE +* + RETURN +* +* End of SLASD2 +* + END diff --git a/costa/native/external/lapack/slasd3.f b/costa/native/external/lapack/slasd3.f new file mode 100644 index 000000000..2e1af51cd --- /dev/null +++ b/costa/native/external/lapack/slasd3.f @@ -0,0 +1,359 @@ + SUBROUTINE SLASD3( NL, NR, SQRE, K, D, Q, LDQ, DSIGMA, U, LDU, U2, + $ LDU2, VT, LDVT, VT2, LDVT2, IDXC, CTOT, Z, + $ INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, +* Courant Institute, NAG Ltd., and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDQ, LDU, LDU2, LDVT, LDVT2, NL, NR, + $ SQRE +* .. +* .. Array Arguments .. + INTEGER CTOT( * ), IDXC( * ) + REAL D( * ), DSIGMA( * ), Q( LDQ, * ), U( LDU, * ), + $ U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ), + $ Z( * ) +* .. +* +* Purpose +* ======= +* +* SLASD3 finds all the square roots of the roots of the secular +* equation, as defined by the values in D and Z. It makes the +* appropriate calls to SLASD4 and then updates the singular +* vectors by matrix multiplication. +* +* This code makes very mild assumptions about floating point +* arithmetic. It will work on machines with a guard digit in +* add/subtract, or on those binary machines without guard digits +* which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. +* It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* SLASD3 is called from SLASD1. +* +* Arguments +* ========= +* +* NL (input) INTEGER +* The row dimension of the upper block. NL >= 1. +* +* NR (input) INTEGER +* The row dimension of the lower block. NR >= 1. +* +* SQRE (input) INTEGER +* = 0: the lower block is an NR-by-NR square matrix. +* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. +* +* The bidiagonal matrix has N = NL + NR + 1 rows and +* M = N + SQRE >= N columns. +* +* K (input) INTEGER +* The size of the secular equation, 1 =< K = < N. +* +* D (output) REAL array, dimension(K) +* On exit the square roots of the roots of the secular equation, +* in ascending order. +* +* Q (workspace) REAL array, +* dimension at least (LDQ,K). +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= K. +* +* DSIGMA (input) REAL array, dimension(K) +* The first K elements of this array contain the old roots +* of the deflated updating problem. These are the poles +* of the secular equation. +* +* U (input) REAL array, dimension (LDU, N) +* The last N - K columns of this matrix contain the deflated +* left singular vectors. +* +* LDU (input) INTEGER +* The leading dimension of the array U. LDU >= N. +* +* U2 (input) REAL array, dimension (LDU2, N) +* The first K columns of this matrix contain the non-deflated +* left singular vectors for the split problem. +* +* LDU2 (input) INTEGER +* The leading dimension of the array U2. LDU2 >= N. +* +* VT (input) REAL array, dimension (LDVT, M) +* The last M - K columns of VT' contain the deflated +* right singular vectors. +* +* LDVT (input) INTEGER +* The leading dimension of the array VT. LDVT >= N. +* +* VT2 (input) REAL array, dimension (LDVT2, N) +* The first K columns of VT2' contain the non-deflated +* right singular vectors for the split problem. +* +* LDVT2 (input) INTEGER +* The leading dimension of the array VT2. LDVT2 >= N. +* +* IDXC (input) INTEGER array, dimension ( N ) +* The permutation used to arrange the columns of U (and rows of +* VT) into three groups: the first group contains non-zero +* entries only at and above (or before) NL +1; the second +* contains non-zero entries only at and below (or after) NL+2; +* and the third is dense. The first column of U and the row of +* VT are treated separately, however. +* +* The rows of the singular vectors found by SLASD4 +* must be likewise permuted before the matrix multiplies can +* take place. +* +* CTOT (input) INTEGER array, dimension ( 4 ) +* A count of the total number of the various types of columns +* in U (or rows in VT), as described in IDXC. The fourth column +* type is any column which has been deflated. +* +* Z (input) REAL array, dimension (K) +* The first K elements of this array contain the components +* of the deflation-adjusted updating row vector. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = 1, an singular value did not converge +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO, NEGONE + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0, + $ NEGONE = -1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER CTEMP, I, J, JC, KTEMP, M, N, NLP1, NLP2, NRP1 + REAL RHO, TEMP +* .. +* .. External Functions .. + REAL SLAMC3, SNRM2 + EXTERNAL SLAMC3, SNRM2 +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMM, SLACPY, SLASCL, SLASD4, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( NL.LT.1 ) THEN + INFO = -1 + ELSE IF( NR.LT.1 ) THEN + INFO = -2 + ELSE IF( ( SQRE.NE.1 ) .AND. ( SQRE.NE.0 ) ) THEN + INFO = -3 + END IF +* + N = NL + NR + 1 + M = N + SQRE + NLP1 = NL + 1 + NLP2 = NL + 2 +* + IF( ( K.LT.1 ) .OR. ( K.GT.N ) ) THEN + INFO = -4 + ELSE IF( LDQ.LT.K ) THEN + INFO = -7 + ELSE IF( LDU.LT.N ) THEN + INFO = -10 + ELSE IF( LDU2.LT.N ) THEN + INFO = -12 + ELSE IF( LDVT.LT.M ) THEN + INFO = -14 + ELSE IF( LDVT2.LT.M ) THEN + INFO = -16 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLASD3', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( K.EQ.1 ) THEN + D( 1 ) = ABS( Z( 1 ) ) + CALL SCOPY( M, VT2( 1, 1 ), LDVT2, VT( 1, 1 ), LDVT ) + IF( Z( 1 ).GT.ZERO ) THEN + CALL SCOPY( N, U2( 1, 1 ), 1, U( 1, 1 ), 1 ) + ELSE + DO 10 I = 1, N + U( I, 1 ) = -U2( I, 1 ) + 10 CONTINUE + END IF + RETURN + END IF +* +* Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can +* be computed with high relative accuracy (barring over/underflow). +* This is a problem on machines without a guard digit in +* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). +* The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), +* which on any of these machines zeros out the bottommost +* bit of DSIGMA(I) if it is 1; this makes the subsequent +* subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation +* occurs. On binary machines with a guard digit (almost all +* machines) it does not change DSIGMA(I) at all. On hexadecimal +* and decimal machines with a guard digit, it slightly +* changes the bottommost bits of DSIGMA(I). It does not account +* for hexadecimal or decimal machines without guard digits +* (we know of none). We use a subroutine call to compute +* 2*DLAMBDA(I) to prevent optimizing compilers from eliminating +* this code. +* + DO 20 I = 1, K + DSIGMA( I ) = SLAMC3( DSIGMA( I ), DSIGMA( I ) ) - DSIGMA( I ) + 20 CONTINUE +* +* Keep a copy of Z. +* + CALL SCOPY( K, Z, 1, Q, 1 ) +* +* Normalize Z. +* + RHO = SNRM2( K, Z, 1 ) + CALL SLASCL( 'G', 0, 0, RHO, ONE, K, 1, Z, K, INFO ) + RHO = RHO*RHO +* +* Find the new singular values. +* + DO 30 J = 1, K + CALL SLASD4( K, J, DSIGMA, Z, U( 1, J ), RHO, D( J ), + $ VT( 1, J ), INFO ) +* +* If the zero finder fails, the computation is terminated. +* + IF( INFO.NE.0 ) THEN + RETURN + END IF + 30 CONTINUE +* +* Compute updated Z. +* + DO 60 I = 1, K + Z( I ) = U( I, K )*VT( I, K ) + DO 40 J = 1, I - 1 + Z( I ) = Z( I )*( U( I, J )*VT( I, J ) / + $ ( DSIGMA( I )-DSIGMA( J ) ) / + $ ( DSIGMA( I )+DSIGMA( J ) ) ) + 40 CONTINUE + DO 50 J = I, K - 1 + Z( I ) = Z( I )*( U( I, J )*VT( I, J ) / + $ ( DSIGMA( I )-DSIGMA( J+1 ) ) / + $ ( DSIGMA( I )+DSIGMA( J+1 ) ) ) + 50 CONTINUE + Z( I ) = SIGN( SQRT( ABS( Z( I ) ) ), Q( I, 1 ) ) + 60 CONTINUE +* +* Compute left singular vectors of the modified diagonal matrix, +* and store related information for the right singular vectors. +* + DO 90 I = 1, K + VT( 1, I ) = Z( 1 ) / U( 1, I ) / VT( 1, I ) + U( 1, I ) = NEGONE + DO 70 J = 2, K + VT( J, I ) = Z( J ) / U( J, I ) / VT( J, I ) + U( J, I ) = DSIGMA( J )*VT( J, I ) + 70 CONTINUE + TEMP = SNRM2( K, U( 1, I ), 1 ) + Q( 1, I ) = U( 1, I ) / TEMP + DO 80 J = 2, K + JC = IDXC( J ) + Q( J, I ) = U( JC, I ) / TEMP + 80 CONTINUE + 90 CONTINUE +* +* Update the left singular vector matrix. +* + IF( K.EQ.2 ) THEN + CALL SGEMM( 'N', 'N', N, K, K, ONE, U2, LDU2, Q, LDQ, ZERO, U, + $ LDU ) + GO TO 100 + END IF + IF( CTOT( 1 ).GT.0 ) THEN + CALL SGEMM( 'N', 'N', NL, K, CTOT( 1 ), ONE, U2( 1, 2 ), LDU2, + $ Q( 2, 1 ), LDQ, ZERO, U( 1, 1 ), LDU ) + IF( CTOT( 3 ).GT.0 ) THEN + KTEMP = 2 + CTOT( 1 ) + CTOT( 2 ) + CALL SGEMM( 'N', 'N', NL, K, CTOT( 3 ), ONE, U2( 1, KTEMP ), + $ LDU2, Q( KTEMP, 1 ), LDQ, ONE, U( 1, 1 ), LDU ) + END IF + ELSE IF( CTOT( 3 ).GT.0 ) THEN + KTEMP = 2 + CTOT( 1 ) + CTOT( 2 ) + CALL SGEMM( 'N', 'N', NL, K, CTOT( 3 ), ONE, U2( 1, KTEMP ), + $ LDU2, Q( KTEMP, 1 ), LDQ, ZERO, U( 1, 1 ), LDU ) + ELSE + CALL SLACPY( 'F', NL, K, U2, LDU2, U, LDU ) + END IF + CALL SCOPY( K, Q( 1, 1 ), LDQ, U( NLP1, 1 ), LDU ) + KTEMP = 2 + CTOT( 1 ) + CTEMP = CTOT( 2 ) + CTOT( 3 ) + CALL SGEMM( 'N', 'N', NR, K, CTEMP, ONE, U2( NLP2, KTEMP ), LDU2, + $ Q( KTEMP, 1 ), LDQ, ZERO, U( NLP2, 1 ), LDU ) +* +* Generate the right singular vectors. +* + 100 CONTINUE + DO 120 I = 1, K + TEMP = SNRM2( K, VT( 1, I ), 1 ) + Q( I, 1 ) = VT( 1, I ) / TEMP + DO 110 J = 2, K + JC = IDXC( J ) + Q( I, J ) = VT( JC, I ) / TEMP + 110 CONTINUE + 120 CONTINUE +* +* Update the right singular vector matrix. +* + IF( K.EQ.2 ) THEN + CALL SGEMM( 'N', 'N', K, M, K, ONE, Q, LDQ, VT2, LDVT2, ZERO, + $ VT, LDVT ) + RETURN + END IF + KTEMP = 1 + CTOT( 1 ) + CALL SGEMM( 'N', 'N', K, NLP1, KTEMP, ONE, Q( 1, 1 ), LDQ, + $ VT2( 1, 1 ), LDVT2, ZERO, VT( 1, 1 ), LDVT ) + KTEMP = 2 + CTOT( 1 ) + CTOT( 2 ) + IF( KTEMP.LE.LDVT2 ) + $ CALL SGEMM( 'N', 'N', K, NLP1, CTOT( 3 ), ONE, Q( 1, KTEMP ), + $ LDQ, VT2( KTEMP, 1 ), LDVT2, ONE, VT( 1, 1 ), + $ LDVT ) +* + KTEMP = CTOT( 1 ) + 1 + NRP1 = NR + SQRE + IF( KTEMP.GT.1 ) THEN + DO 130 I = 1, K + Q( I, KTEMP ) = Q( I, 1 ) + 130 CONTINUE + DO 140 I = NLP2, M + VT2( KTEMP, I ) = VT2( 1, I ) + 140 CONTINUE + END IF + CTEMP = 1 + CTOT( 2 ) + CTOT( 3 ) + CALL SGEMM( 'N', 'N', K, NRP1, CTEMP, ONE, Q( 1, KTEMP ), LDQ, + $ VT2( KTEMP, NLP2 ), LDVT2, ZERO, VT( 1, NLP2 ), LDVT ) +* + RETURN +* +* End of SLASD3 +* + END diff --git a/costa/native/external/lapack/slasd4.f b/costa/native/external/lapack/slasd4.f new file mode 100644 index 000000000..35454324b --- /dev/null +++ b/costa/native/external/lapack/slasd4.f @@ -0,0 +1,891 @@ + SUBROUTINE SLASD4( N, I, D, Z, DELTA, RHO, SIGMA, WORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, +* Courant Institute, NAG Ltd., and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + INTEGER I, INFO, N + REAL RHO, SIGMA +* .. +* .. Array Arguments .. + REAL D( * ), DELTA( * ), WORK( * ), Z( * ) +* .. +* +* Purpose +* ======= +* +* This subroutine computes the square root of the I-th updated +* eigenvalue of a positive symmetric rank-one modification to +* a positive diagonal matrix whose entries are given as the squares +* of the corresponding entries in the array d, and that +* +* 0 <= D(i) < D(j) for i < j +* +* and that RHO > 0. This is arranged by the calling routine, and is +* no loss in generality. The rank-one modified system is thus +* +* diag( D ) * diag( D ) + RHO * Z * Z_transpose. +* +* where we assume the Euclidean norm of Z is 1. +* +* The method consists of approximating the rational functions in the +* secular equation by simpler interpolating rational functions. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The length of all arrays. +* +* I (input) INTEGER +* The index of the eigenvalue to be computed. 1 <= I <= N. +* +* D (input) REAL array, dimension ( N ) +* The original eigenvalues. It is assumed that they are in +* order, 0 <= D(I) < D(J) for I < J. +* +* Z (input) REAL array, dimension ( N ) +* The components of the updating vector. +* +* DELTA (output) REAL array, dimension ( N ) +* If N .ne. 1, DELTA contains (D(j) - sigma_I) in its j-th +* component. If N = 1, then DELTA(1) = 1. The vector DELTA +* contains the information necessary to construct the +* (singular) eigenvectors. +* +* RHO (input) REAL +* The scalar in the symmetric updating formula. +* +* SIGMA (output) REAL +* The computed lambda_I, the I-th updated eigenvalue. +* +* WORK (workspace) REAL array, dimension ( N ) +* If N .ne. 1, WORK contains (D(j) + sigma_I) in its j-th +* component. If N = 1, then WORK( 1 ) = 1. +* +* INFO (output) INTEGER +* = 0: successful exit +* > 0: if INFO = 1, the updating process failed. +* +* Internal Parameters +* =================== +* +* Logical variable ORGATI (origin-at-i?) is used for distinguishing +* whether D(i) or D(i+1) is treated as the origin. +* +* ORGATI = .true. origin at i +* ORGATI = .false. origin at i+1 +* +* Logical variable SWTCH3 (switch-for-3-poles?) is for noting +* if we are working with THREE poles! +* +* MAXIT is the maximum number of iterations allowed for each +* eigenvalue. +* +* Further Details +* =============== +* +* Based on contributions by +* Ren-Cang Li, Computer Science Division, University of California +* at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + INTEGER MAXIT + PARAMETER ( MAXIT = 20 ) + REAL ZERO, ONE, TWO, THREE, FOUR, EIGHT, TEN + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0, + $ THREE = 3.0E+0, FOUR = 4.0E+0, EIGHT = 8.0E+0, + $ TEN = 10.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ORGATI, SWTCH, SWTCH3 + INTEGER II, IIM1, IIP1, IP1, ITER, J, NITER + REAL A, B, C, DELSQ, DELSQ2, DPHI, DPSI, DTIIM, + $ DTIIP, DTIPSQ, DTISQ, DTNSQ, DTNSQ1, DW, EPS, + $ ERRETM, ETA, PHI, PREW, PSI, RHOINV, SG2LB, + $ SG2UB, TAU, TEMP, TEMP1, TEMP2, W +* .. +* .. Local Arrays .. + REAL DD( 3 ), ZZ( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL SLAED6, SLASD5 +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Since this routine is called in an inner loop, we do no argument +* checking. +* +* Quick return for N=1 and 2. +* + INFO = 0 + IF( N.EQ.1 ) THEN +* +* Presumably, I=1 upon entry +* + SIGMA = SQRT( D( 1 )*D( 1 )+RHO*Z( 1 )*Z( 1 ) ) + DELTA( 1 ) = ONE + WORK( 1 ) = ONE + RETURN + END IF + IF( N.EQ.2 ) THEN + CALL SLASD5( I, D, Z, DELTA, RHO, SIGMA, WORK ) + RETURN + END IF +* +* Compute machine epsilon +* + EPS = SLAMCH( 'Epsilon' ) + RHOINV = ONE / RHO +* +* The case I = N +* + IF( I.EQ.N ) THEN +* +* Initialize some basic variables +* + II = N - 1 + NITER = 1 +* +* Calculate initial guess +* + TEMP = RHO / TWO +* +* If ||Z||_2 is not one, then TEMP should be set to +* RHO * ||Z||_2^2 / TWO +* + TEMP1 = TEMP / ( D( N )+SQRT( D( N )*D( N )+TEMP ) ) + DO 10 J = 1, N + WORK( J ) = D( J ) + D( N ) + TEMP1 + DELTA( J ) = ( D( J )-D( N ) ) - TEMP1 + 10 CONTINUE +* + PSI = ZERO + DO 20 J = 1, N - 2 + PSI = PSI + Z( J )*Z( J ) / ( DELTA( J )*WORK( J ) ) + 20 CONTINUE +* + C = RHOINV + PSI + W = C + Z( II )*Z( II ) / ( DELTA( II )*WORK( II ) ) + + $ Z( N )*Z( N ) / ( DELTA( N )*WORK( N ) ) +* + IF( W.LE.ZERO ) THEN + TEMP1 = SQRT( D( N )*D( N )+RHO ) + TEMP = Z( N-1 )*Z( N-1 ) / ( ( D( N-1 )+TEMP1 )* + $ ( D( N )-D( N-1 )+RHO / ( D( N )+TEMP1 ) ) ) + + $ Z( N )*Z( N ) / RHO +* +* The following TAU is to approximate +* SIGMA_n^2 - D( N )*D( N ) +* + IF( C.LE.TEMP ) THEN + TAU = RHO + ELSE + DELSQ = ( D( N )-D( N-1 ) )*( D( N )+D( N-1 ) ) + A = -C*DELSQ + Z( N-1 )*Z( N-1 ) + Z( N )*Z( N ) + B = Z( N )*Z( N )*DELSQ + IF( A.LT.ZERO ) THEN + TAU = TWO*B / ( SQRT( A*A+FOUR*B*C )-A ) + ELSE + TAU = ( A+SQRT( A*A+FOUR*B*C ) ) / ( TWO*C ) + END IF + END IF +* +* It can be proved that +* D(N)^2+RHO/2 <= SIGMA_n^2 < D(N)^2+TAU <= D(N)^2+RHO +* + ELSE + DELSQ = ( D( N )-D( N-1 ) )*( D( N )+D( N-1 ) ) + A = -C*DELSQ + Z( N-1 )*Z( N-1 ) + Z( N )*Z( N ) + B = Z( N )*Z( N )*DELSQ +* +* The following TAU is to approximate +* SIGMA_n^2 - D( N )*D( N ) +* + IF( A.LT.ZERO ) THEN + TAU = TWO*B / ( SQRT( A*A+FOUR*B*C )-A ) + ELSE + TAU = ( A+SQRT( A*A+FOUR*B*C ) ) / ( TWO*C ) + END IF +* +* It can be proved that +* D(N)^2 < D(N)^2+TAU < SIGMA(N)^2 < D(N)^2+RHO/2 +* + END IF +* +* The following ETA is to approximate SIGMA_n - D( N ) +* + ETA = TAU / ( D( N )+SQRT( D( N )*D( N )+TAU ) ) +* + SIGMA = D( N ) + ETA + DO 30 J = 1, N + DELTA( J ) = ( D( J )-D( I ) ) - ETA + WORK( J ) = D( J ) + D( I ) + ETA + 30 CONTINUE +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 40 J = 1, II + TEMP = Z( J ) / ( DELTA( J )*WORK( J ) ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 40 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + TEMP = Z( N ) / ( DELTA( N )*WORK( N ) ) + PHI = Z( N )*TEMP + DPHI = TEMP*TEMP + ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV + + $ ABS( TAU )*( DPSI+DPHI ) +* + W = RHOINV + PHI + PSI +* +* Test for convergence +* + IF( ABS( W ).LE.EPS*ERRETM ) THEN + GO TO 240 + END IF +* +* Calculate the new step +* + NITER = NITER + 1 + DTNSQ1 = WORK( N-1 )*DELTA( N-1 ) + DTNSQ = WORK( N )*DELTA( N ) + C = W - DTNSQ1*DPSI - DTNSQ*DPHI + A = ( DTNSQ+DTNSQ1 )*W - DTNSQ*DTNSQ1*( DPSI+DPHI ) + B = DTNSQ*DTNSQ1*W + IF( C.LT.ZERO ) + $ C = ABS( C ) + IF( C.EQ.ZERO ) THEN + ETA = RHO - SIGMA*SIGMA + ELSE IF( A.GE.ZERO ) THEN + ETA = ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF +* +* Note, eta should be positive if w is negative, and +* eta should be negative otherwise. However, +* if for some reason caused by roundoff, eta*w > 0, +* we simply use one Newton step instead. This way +* will guarantee eta*w < 0. +* + IF( W*ETA.GT.ZERO ) + $ ETA = -W / ( DPSI+DPHI ) + TEMP = ETA - DTNSQ + IF( TEMP.GT.RHO ) + $ ETA = RHO + DTNSQ +* + TAU = TAU + ETA + ETA = ETA / ( SIGMA+SQRT( ETA+SIGMA*SIGMA ) ) + DO 50 J = 1, N + DELTA( J ) = DELTA( J ) - ETA + WORK( J ) = WORK( J ) + ETA + 50 CONTINUE +* + SIGMA = SIGMA + ETA +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 60 J = 1, II + TEMP = Z( J ) / ( WORK( J )*DELTA( J ) ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 60 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + TEMP = Z( N ) / ( WORK( N )*DELTA( N ) ) + PHI = Z( N )*TEMP + DPHI = TEMP*TEMP + ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV + + $ ABS( TAU )*( DPSI+DPHI ) +* + W = RHOINV + PHI + PSI +* +* Main loop to update the values of the array DELTA +* + ITER = NITER + 1 +* + DO 90 NITER = ITER, MAXIT +* +* Test for convergence +* + IF( ABS( W ).LE.EPS*ERRETM ) THEN + GO TO 240 + END IF +* +* Calculate the new step +* + DTNSQ1 = WORK( N-1 )*DELTA( N-1 ) + DTNSQ = WORK( N )*DELTA( N ) + C = W - DTNSQ1*DPSI - DTNSQ*DPHI + A = ( DTNSQ+DTNSQ1 )*W - DTNSQ1*DTNSQ*( DPSI+DPHI ) + B = DTNSQ1*DTNSQ*W + IF( A.GE.ZERO ) THEN + ETA = ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF +* +* Note, eta should be positive if w is negative, and +* eta should be negative otherwise. However, +* if for some reason caused by roundoff, eta*w > 0, +* we simply use one Newton step instead. This way +* will guarantee eta*w < 0. +* + IF( W*ETA.GT.ZERO ) + $ ETA = -W / ( DPSI+DPHI ) + TEMP = ETA - DTNSQ + IF( TEMP.LE.ZERO ) + $ ETA = ETA / TWO +* + TAU = TAU + ETA + ETA = ETA / ( SIGMA+SQRT( ETA+SIGMA*SIGMA ) ) + DO 70 J = 1, N + DELTA( J ) = DELTA( J ) - ETA + WORK( J ) = WORK( J ) + ETA + 70 CONTINUE +* + SIGMA = SIGMA + ETA +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 80 J = 1, II + TEMP = Z( J ) / ( WORK( J )*DELTA( J ) ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 80 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + TEMP = Z( N ) / ( WORK( N )*DELTA( N ) ) + PHI = Z( N )*TEMP + DPHI = TEMP*TEMP + ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV + + $ ABS( TAU )*( DPSI+DPHI ) +* + W = RHOINV + PHI + PSI + 90 CONTINUE +* +* Return with INFO = 1, NITER = MAXIT and not converged +* + INFO = 1 + GO TO 240 +* +* End for the case I = N +* + ELSE +* +* The case for I < N +* + NITER = 1 + IP1 = I + 1 +* +* Calculate initial guess +* + DELSQ = ( D( IP1 )-D( I ) )*( D( IP1 )+D( I ) ) + DELSQ2 = DELSQ / TWO + TEMP = DELSQ2 / ( D( I )+SQRT( D( I )*D( I )+DELSQ2 ) ) + DO 100 J = 1, N + WORK( J ) = D( J ) + D( I ) + TEMP + DELTA( J ) = ( D( J )-D( I ) ) - TEMP + 100 CONTINUE +* + PSI = ZERO + DO 110 J = 1, I - 1 + PSI = PSI + Z( J )*Z( J ) / ( WORK( J )*DELTA( J ) ) + 110 CONTINUE +* + PHI = ZERO + DO 120 J = N, I + 2, -1 + PHI = PHI + Z( J )*Z( J ) / ( WORK( J )*DELTA( J ) ) + 120 CONTINUE + C = RHOINV + PSI + PHI + W = C + Z( I )*Z( I ) / ( WORK( I )*DELTA( I ) ) + + $ Z( IP1 )*Z( IP1 ) / ( WORK( IP1 )*DELTA( IP1 ) ) +* + IF( W.GT.ZERO ) THEN +* +* d(i)^2 < the ith sigma^2 < (d(i)^2+d(i+1)^2)/2 +* +* We choose d(i) as origin. +* + ORGATI = .TRUE. + SG2LB = ZERO + SG2UB = DELSQ2 + A = C*DELSQ + Z( I )*Z( I ) + Z( IP1 )*Z( IP1 ) + B = Z( I )*Z( I )*DELSQ + IF( A.GT.ZERO ) THEN + TAU = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + ELSE + TAU = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + END IF +* +* TAU now is an estimation of SIGMA^2 - D( I )^2. The +* following, however, is the corresponding estimation of +* SIGMA - D( I ). +* + ETA = TAU / ( D( I )+SQRT( D( I )*D( I )+TAU ) ) + ELSE +* +* (d(i)^2+d(i+1)^2)/2 <= the ith sigma^2 < d(i+1)^2/2 +* +* We choose d(i+1) as origin. +* + ORGATI = .FALSE. + SG2LB = -DELSQ2 + SG2UB = ZERO + A = C*DELSQ - Z( I )*Z( I ) - Z( IP1 )*Z( IP1 ) + B = Z( IP1 )*Z( IP1 )*DELSQ + IF( A.LT.ZERO ) THEN + TAU = TWO*B / ( A-SQRT( ABS( A*A+FOUR*B*C ) ) ) + ELSE + TAU = -( A+SQRT( ABS( A*A+FOUR*B*C ) ) ) / ( TWO*C ) + END IF +* +* TAU now is an estimation of SIGMA^2 - D( IP1 )^2. The +* following, however, is the corresponding estimation of +* SIGMA - D( IP1 ). +* + ETA = TAU / ( D( IP1 )+SQRT( ABS( D( IP1 )*D( IP1 )+ + $ TAU ) ) ) + END IF +* + IF( ORGATI ) THEN + II = I + SIGMA = D( I ) + ETA + DO 130 J = 1, N + WORK( J ) = D( J ) + D( I ) + ETA + DELTA( J ) = ( D( J )-D( I ) ) - ETA + 130 CONTINUE + ELSE + II = I + 1 + SIGMA = D( IP1 ) + ETA + DO 140 J = 1, N + WORK( J ) = D( J ) + D( IP1 ) + ETA + DELTA( J ) = ( D( J )-D( IP1 ) ) - ETA + 140 CONTINUE + END IF + IIM1 = II - 1 + IIP1 = II + 1 +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 150 J = 1, IIM1 + TEMP = Z( J ) / ( WORK( J )*DELTA( J ) ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 150 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + DPHI = ZERO + PHI = ZERO + DO 160 J = N, IIP1, -1 + TEMP = Z( J ) / ( WORK( J )*DELTA( J ) ) + PHI = PHI + Z( J )*TEMP + DPHI = DPHI + TEMP*TEMP + ERRETM = ERRETM + PHI + 160 CONTINUE +* + W = RHOINV + PHI + PSI +* +* W is the value of the secular function with +* its ii-th element removed. +* + SWTCH3 = .FALSE. + IF( ORGATI ) THEN + IF( W.LT.ZERO ) + $ SWTCH3 = .TRUE. + ELSE + IF( W.GT.ZERO ) + $ SWTCH3 = .TRUE. + END IF + IF( II.EQ.1 .OR. II.EQ.N ) + $ SWTCH3 = .FALSE. +* + TEMP = Z( II ) / ( WORK( II )*DELTA( II ) ) + DW = DPSI + DPHI + TEMP*TEMP + TEMP = Z( II )*TEMP + W = W + TEMP + ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV + + $ THREE*ABS( TEMP ) + ABS( TAU )*DW +* +* Test for convergence +* + IF( ABS( W ).LE.EPS*ERRETM ) THEN + GO TO 240 + END IF +* + IF( W.LE.ZERO ) THEN + SG2LB = MAX( SG2LB, TAU ) + ELSE + SG2UB = MIN( SG2UB, TAU ) + END IF +* +* Calculate the new step +* + NITER = NITER + 1 + IF( .NOT.SWTCH3 ) THEN + DTIPSQ = WORK( IP1 )*DELTA( IP1 ) + DTISQ = WORK( I )*DELTA( I ) + IF( ORGATI ) THEN + C = W - DTIPSQ*DW + DELSQ*( Z( I ) / DTISQ )**2 + ELSE + C = W - DTISQ*DW - DELSQ*( Z( IP1 ) / DTIPSQ )**2 + END IF + A = ( DTIPSQ+DTISQ )*W - DTIPSQ*DTISQ*DW + B = DTIPSQ*DTISQ*W + IF( C.EQ.ZERO ) THEN + IF( A.EQ.ZERO ) THEN + IF( ORGATI ) THEN + A = Z( I )*Z( I ) + DTIPSQ*DTIPSQ*( DPSI+DPHI ) + ELSE + A = Z( IP1 )*Z( IP1 ) + DTISQ*DTISQ*( DPSI+DPHI ) + END IF + END IF + ETA = B / A + ELSE IF( A.LE.ZERO ) THEN + ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF + ELSE +* +* Interpolation using THREE most relevant poles +* + DTIIM = WORK( IIM1 )*DELTA( IIM1 ) + DTIIP = WORK( IIP1 )*DELTA( IIP1 ) + TEMP = RHOINV + PSI + PHI + IF( ORGATI ) THEN + TEMP1 = Z( IIM1 ) / DTIIM + TEMP1 = TEMP1*TEMP1 + C = ( TEMP - DTIIP*( DPSI+DPHI ) ) - + $ ( D( IIM1 )-D( IIP1 ) )*( D( IIM1 )+D( IIP1 ) )*TEMP1 + ZZ( 1 ) = Z( IIM1 )*Z( IIM1 ) + IF( DPSI.LT.TEMP1 ) THEN + ZZ( 3 ) = DTIIP*DTIIP*DPHI + ELSE + ZZ( 3 ) = DTIIP*DTIIP*( ( DPSI-TEMP1 )+DPHI ) + END IF + ELSE + TEMP1 = Z( IIP1 ) / DTIIP + TEMP1 = TEMP1*TEMP1 + C = ( TEMP - DTIIM*( DPSI+DPHI ) ) - + $ ( D( IIP1 )-D( IIM1 ) )*( D( IIM1 )+D( IIP1 ) )*TEMP1 + IF( DPHI.LT.TEMP1 ) THEN + ZZ( 1 ) = DTIIM*DTIIM*DPSI + ELSE + ZZ( 1 ) = DTIIM*DTIIM*( DPSI+( DPHI-TEMP1 ) ) + END IF + ZZ( 3 ) = Z( IIP1 )*Z( IIP1 ) + END IF + ZZ( 2 ) = Z( II )*Z( II ) + DD( 1 ) = DTIIM + DD( 2 ) = DELTA( II )*WORK( II ) + DD( 3 ) = DTIIP + CALL SLAED6( NITER, ORGATI, C, DD, ZZ, W, ETA, INFO ) + IF( INFO.NE.0 ) + $ GO TO 240 + END IF +* +* Note, eta should be positive if w is negative, and +* eta should be negative otherwise. However, +* if for some reason caused by roundoff, eta*w > 0, +* we simply use one Newton step instead. This way +* will guarantee eta*w < 0. +* + IF( W*ETA.GE.ZERO ) + $ ETA = -W / DW + IF( ORGATI ) THEN + TEMP1 = WORK( I )*DELTA( I ) + TEMP = ETA - TEMP1 + ELSE + TEMP1 = WORK( IP1 )*DELTA( IP1 ) + TEMP = ETA - TEMP1 + END IF + IF( TEMP.GT.SG2UB .OR. TEMP.LT.SG2LB ) THEN + IF( W.LT.ZERO ) THEN + ETA = ( SG2UB-TAU ) / TWO + ELSE + ETA = ( SG2LB-TAU ) / TWO + END IF + END IF +* + TAU = TAU + ETA + ETA = ETA / ( SIGMA+SQRT( SIGMA*SIGMA+ETA ) ) +* + PREW = W +* + SIGMA = SIGMA + ETA + DO 170 J = 1, N + WORK( J ) = WORK( J ) + ETA + DELTA( J ) = DELTA( J ) - ETA + 170 CONTINUE +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 180 J = 1, IIM1 + TEMP = Z( J ) / ( WORK( J )*DELTA( J ) ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 180 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + DPHI = ZERO + PHI = ZERO + DO 190 J = N, IIP1, -1 + TEMP = Z( J ) / ( WORK( J )*DELTA( J ) ) + PHI = PHI + Z( J )*TEMP + DPHI = DPHI + TEMP*TEMP + ERRETM = ERRETM + PHI + 190 CONTINUE +* + TEMP = Z( II ) / ( WORK( II )*DELTA( II ) ) + DW = DPSI + DPHI + TEMP*TEMP + TEMP = Z( II )*TEMP + W = RHOINV + PHI + PSI + TEMP + ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV + + $ THREE*ABS( TEMP ) + ABS( TAU )*DW +* + IF( W.LE.ZERO ) THEN + SG2LB = MAX( SG2LB, TAU ) + ELSE + SG2UB = MIN( SG2UB, TAU ) + END IF +* + SWTCH = .FALSE. + IF( ORGATI ) THEN + IF( -W.GT.ABS( PREW ) / TEN ) + $ SWTCH = .TRUE. + ELSE + IF( W.GT.ABS( PREW ) / TEN ) + $ SWTCH = .TRUE. + END IF +* +* Main loop to update the values of the array DELTA and WORK +* + ITER = NITER + 1 +* + DO 230 NITER = ITER, MAXIT +* +* Test for convergence +* + IF( ABS( W ).LE.EPS*ERRETM ) THEN + GO TO 240 + END IF +* +* Calculate the new step +* + IF( .NOT.SWTCH3 ) THEN + DTIPSQ = WORK( IP1 )*DELTA( IP1 ) + DTISQ = WORK( I )*DELTA( I ) + IF( .NOT.SWTCH ) THEN + IF( ORGATI ) THEN + C = W - DTIPSQ*DW + DELSQ*( Z( I ) / DTISQ )**2 + ELSE + C = W - DTISQ*DW - DELSQ*( Z( IP1 ) / DTIPSQ )**2 + END IF + ELSE + TEMP = Z( II ) / ( WORK( II )*DELTA( II ) ) + IF( ORGATI ) THEN + DPSI = DPSI + TEMP*TEMP + ELSE + DPHI = DPHI + TEMP*TEMP + END IF + C = W - DTISQ*DPSI - DTIPSQ*DPHI + END IF + A = ( DTIPSQ+DTISQ )*W - DTIPSQ*DTISQ*DW + B = DTIPSQ*DTISQ*W + IF( C.EQ.ZERO ) THEN + IF( A.EQ.ZERO ) THEN + IF( .NOT.SWTCH ) THEN + IF( ORGATI ) THEN + A = Z( I )*Z( I ) + DTIPSQ*DTIPSQ* + $ ( DPSI+DPHI ) + ELSE + A = Z( IP1 )*Z( IP1 ) + + $ DTISQ*DTISQ*( DPSI+DPHI ) + END IF + ELSE + A = DTISQ*DTISQ*DPSI + DTIPSQ*DTIPSQ*DPHI + END IF + END IF + ETA = B / A + ELSE IF( A.LE.ZERO ) THEN + ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF + ELSE +* +* Interpolation using THREE most relevant poles +* + DTIIM = WORK( IIM1 )*DELTA( IIM1 ) + DTIIP = WORK( IIP1 )*DELTA( IIP1 ) + TEMP = RHOINV + PSI + PHI + IF( SWTCH ) THEN + C = TEMP - DTIIM*DPSI - DTIIP*DPHI + ZZ( 1 ) = DTIIM*DTIIM*DPSI + ZZ( 3 ) = DTIIP*DTIIP*DPHI + ELSE + IF( ORGATI ) THEN + TEMP1 = Z( IIM1 ) / DTIIM + TEMP1 = TEMP1*TEMP1 + TEMP2 = ( D( IIM1 )-D( IIP1 ) )* + $ ( D( IIM1 )+D( IIP1 ) )*TEMP1 + C = TEMP - DTIIP*( DPSI+DPHI ) - TEMP2 + ZZ( 1 ) = Z( IIM1 )*Z( IIM1 ) + IF( DPSI.LT.TEMP1 ) THEN + ZZ( 3 ) = DTIIP*DTIIP*DPHI + ELSE + ZZ( 3 ) = DTIIP*DTIIP*( ( DPSI-TEMP1 )+DPHI ) + END IF + ELSE + TEMP1 = Z( IIP1 ) / DTIIP + TEMP1 = TEMP1*TEMP1 + TEMP2 = ( D( IIP1 )-D( IIM1 ) )* + $ ( D( IIM1 )+D( IIP1 ) )*TEMP1 + C = TEMP - DTIIM*( DPSI+DPHI ) - TEMP2 + IF( DPHI.LT.TEMP1 ) THEN + ZZ( 1 ) = DTIIM*DTIIM*DPSI + ELSE + ZZ( 1 ) = DTIIM*DTIIM*( DPSI+( DPHI-TEMP1 ) ) + END IF + ZZ( 3 ) = Z( IIP1 )*Z( IIP1 ) + END IF + END IF + DD( 1 ) = DTIIM + DD( 2 ) = DELTA( II )*WORK( II ) + DD( 3 ) = DTIIP + CALL SLAED6( NITER, ORGATI, C, DD, ZZ, W, ETA, INFO ) + IF( INFO.NE.0 ) + $ GO TO 240 + END IF +* +* Note, eta should be positive if w is negative, and +* eta should be negative otherwise. However, +* if for some reason caused by roundoff, eta*w > 0, +* we simply use one Newton step instead. This way +* will guarantee eta*w < 0. +* + IF( W*ETA.GE.ZERO ) + $ ETA = -W / DW + IF( ORGATI ) THEN + TEMP1 = WORK( I )*DELTA( I ) + TEMP = ETA - TEMP1 + ELSE + TEMP1 = WORK( IP1 )*DELTA( IP1 ) + TEMP = ETA - TEMP1 + END IF + IF( TEMP.GT.SG2UB .OR. TEMP.LT.SG2LB ) THEN + IF( W.LT.ZERO ) THEN + ETA = ( SG2UB-TAU ) / TWO + ELSE + ETA = ( SG2LB-TAU ) / TWO + END IF + END IF +* + TAU = TAU + ETA + ETA = ETA / ( SIGMA+SQRT( SIGMA*SIGMA+ETA ) ) +* + SIGMA = SIGMA + ETA + DO 200 J = 1, N + WORK( J ) = WORK( J ) + ETA + DELTA( J ) = DELTA( J ) - ETA + 200 CONTINUE +* + PREW = W +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 210 J = 1, IIM1 + TEMP = Z( J ) / ( WORK( J )*DELTA( J ) ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 210 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + DPHI = ZERO + PHI = ZERO + DO 220 J = N, IIP1, -1 + TEMP = Z( J ) / ( WORK( J )*DELTA( J ) ) + PHI = PHI + Z( J )*TEMP + DPHI = DPHI + TEMP*TEMP + ERRETM = ERRETM + PHI + 220 CONTINUE +* + TEMP = Z( II ) / ( WORK( II )*DELTA( II ) ) + DW = DPSI + DPHI + TEMP*TEMP + TEMP = Z( II )*TEMP + W = RHOINV + PHI + PSI + TEMP + ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV + + $ THREE*ABS( TEMP ) + ABS( TAU )*DW + IF( W*PREW.GT.ZERO .AND. ABS( W ).GT.ABS( PREW ) / TEN ) + $ SWTCH = .NOT.SWTCH +* + IF( W.LE.ZERO ) THEN + SG2LB = MAX( SG2LB, TAU ) + ELSE + SG2UB = MIN( SG2UB, TAU ) + END IF +* + 230 CONTINUE +* +* Return with INFO = 1, NITER = MAXIT and not converged +* + INFO = 1 +* + END IF +* + 240 CONTINUE + RETURN +* +* End of SLASD4 +* + END diff --git a/costa/native/external/lapack/slasd5.f b/costa/native/external/lapack/slasd5.f new file mode 100644 index 000000000..7325bfe61 --- /dev/null +++ b/costa/native/external/lapack/slasd5.f @@ -0,0 +1,164 @@ + SUBROUTINE SLASD5( I, D, Z, DELTA, RHO, DSIGMA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, +* Courant Institute, NAG Ltd., and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER I + REAL DSIGMA, RHO +* .. +* .. Array Arguments .. + REAL D( 2 ), DELTA( 2 ), WORK( 2 ), Z( 2 ) +* .. +* +* Purpose +* ======= +* +* This subroutine computes the square root of the I-th eigenvalue +* of a positive symmetric rank-one modification of a 2-by-2 diagonal +* matrix +* +* diag( D ) * diag( D ) + RHO * Z * transpose(Z) . +* +* The diagonal entries in the array D are assumed to satisfy +* +* 0 <= D(i) < D(j) for i < j . +* +* We also assume RHO > 0 and that the Euclidean norm of the vector +* Z is one. +* +* Arguments +* ========= +* +* I (input) INTEGER +* The index of the eigenvalue to be computed. I = 1 or I = 2. +* +* D (input) REAL array, dimension ( 2 ) +* The original eigenvalues. We assume 0 <= D(1) < D(2). +* +* Z (input) REAL array, dimension ( 2 ) +* The components of the updating vector. +* +* DELTA (output) REAL array, dimension ( 2 ) +* Contains (D(j) - lambda_I) in its j-th component. +* The vector DELTA contains the information necessary +* to construct the eigenvectors. +* +* RHO (input) REAL +* The scalar in the symmetric updating formula. +* +* DSIGMA (output) REAL +* The computed lambda_I, the I-th updated eigenvalue. +* +* WORK (workspace) REAL array, dimension ( 2 ) +* WORK contains (D(j) + sigma_I) in its j-th component. +* +* Further Details +* =============== +* +* Based on contributions by +* Ren-Cang Li, Computer Science Division, University of California +* at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO, THREE, FOUR + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0, + $ THREE = 3.0E+0, FOUR = 4.0E+0 ) +* .. +* .. Local Scalars .. + REAL B, C, DEL, DELSQ, TAU, W +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* + DEL = D( 2 ) - D( 1 ) + DELSQ = DEL*( D( 2 )+D( 1 ) ) + IF( I.EQ.1 ) THEN + W = ONE + FOUR*RHO*( Z( 2 )*Z( 2 ) / ( D( 1 )+THREE*D( 2 ) )- + $ Z( 1 )*Z( 1 ) / ( THREE*D( 1 )+D( 2 ) ) ) / DEL + IF( W.GT.ZERO ) THEN + B = DELSQ + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) ) + C = RHO*Z( 1 )*Z( 1 )*DELSQ +* +* B > ZERO, always +* +* The following TAU is DSIGMA * DSIGMA - D( 1 ) * D( 1 ) +* + TAU = TWO*C / ( B+SQRT( ABS( B*B-FOUR*C ) ) ) +* +* The following TAU is DSIGMA - D( 1 ) +* + TAU = TAU / ( D( 1 )+SQRT( D( 1 )*D( 1 )+TAU ) ) + DSIGMA = D( 1 ) + TAU + DELTA( 1 ) = -TAU + DELTA( 2 ) = DEL - TAU + WORK( 1 ) = TWO*D( 1 ) + TAU + WORK( 2 ) = ( D( 1 )+TAU ) + D( 2 ) +* DELTA( 1 ) = -Z( 1 ) / TAU +* DELTA( 2 ) = Z( 2 ) / ( DEL-TAU ) + ELSE + B = -DELSQ + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) ) + C = RHO*Z( 2 )*Z( 2 )*DELSQ +* +* The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) +* + IF( B.GT.ZERO ) THEN + TAU = -TWO*C / ( B+SQRT( B*B+FOUR*C ) ) + ELSE + TAU = ( B-SQRT( B*B+FOUR*C ) ) / TWO + END IF +* +* The following TAU is DSIGMA - D( 2 ) +* + TAU = TAU / ( D( 2 )+SQRT( ABS( D( 2 )*D( 2 )+TAU ) ) ) + DSIGMA = D( 2 ) + TAU + DELTA( 1 ) = -( DEL+TAU ) + DELTA( 2 ) = -TAU + WORK( 1 ) = D( 1 ) + TAU + D( 2 ) + WORK( 2 ) = TWO*D( 2 ) + TAU +* DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU ) +* DELTA( 2 ) = -Z( 2 ) / TAU + END IF +* TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) ) +* DELTA( 1 ) = DELTA( 1 ) / TEMP +* DELTA( 2 ) = DELTA( 2 ) / TEMP + ELSE +* +* Now I=2 +* + B = -DELSQ + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) ) + C = RHO*Z( 2 )*Z( 2 )*DELSQ +* +* The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) +* + IF( B.GT.ZERO ) THEN + TAU = ( B+SQRT( B*B+FOUR*C ) ) / TWO + ELSE + TAU = TWO*C / ( -B+SQRT( B*B+FOUR*C ) ) + END IF +* +* The following TAU is DSIGMA - D( 2 ) +* + TAU = TAU / ( D( 2 )+SQRT( D( 2 )*D( 2 )+TAU ) ) + DSIGMA = D( 2 ) + TAU + DELTA( 1 ) = -( DEL+TAU ) + DELTA( 2 ) = -TAU + WORK( 1 ) = D( 1 ) + TAU + D( 2 ) + WORK( 2 ) = TWO*D( 2 ) + TAU +* DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU ) +* DELTA( 2 ) = -Z( 2 ) / TAU +* TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) ) +* DELTA( 1 ) = DELTA( 1 ) / TEMP +* DELTA( 2 ) = DELTA( 2 ) / TEMP + END IF + RETURN +* +* End of SLASD5 +* + END diff --git a/costa/native/external/lapack/slasd6.f b/costa/native/external/lapack/slasd6.f new file mode 100644 index 000000000..e25223f66 --- /dev/null +++ b/costa/native/external/lapack/slasd6.f @@ -0,0 +1,306 @@ + SUBROUTINE SLASD6( ICOMPQ, NL, NR, SQRE, D, VF, VL, ALPHA, BETA, + $ IDXQ, PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, + $ LDGNUM, POLES, DIFL, DIFR, Z, K, C, S, WORK, + $ IWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER GIVPTR, ICOMPQ, INFO, K, LDGCOL, LDGNUM, NL, + $ NR, SQRE + REAL ALPHA, BETA, C, S +* .. +* .. Array Arguments .. + INTEGER GIVCOL( LDGCOL, * ), IDXQ( * ), IWORK( * ), + $ PERM( * ) + REAL D( * ), DIFL( * ), DIFR( * ), + $ GIVNUM( LDGNUM, * ), POLES( LDGNUM, * ), + $ VF( * ), VL( * ), WORK( * ), Z( * ) +* .. +* +* Purpose +* ======= +* +* SLASD6 computes the SVD of an updated upper bidiagonal matrix B +* obtained by merging two smaller ones by appending a row. This +* routine is used only for the problem which requires all singular +* values and optionally singular vector matrices in factored form. +* B is an N-by-M matrix with N = NL + NR + 1 and M = N + SQRE. +* A related subroutine, SLASD1, handles the case in which all singular +* values and singular vectors of the bidiagonal matrix are desired. +* +* SLASD6 computes the SVD as follows: +* +* ( D1(in) 0 0 0 ) +* B = U(in) * ( Z1' a Z2' b ) * VT(in) +* ( 0 0 D2(in) 0 ) +* +* = U(out) * ( D(out) 0) * VT(out) +* +* where Z' = (Z1' a Z2' b) = u' VT', and u is a vector of dimension M +* with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros +* elsewhere; and the entry b is empty if SQRE = 0. +* +* The singular values of B can be computed using D1, D2, the first +* components of all the right singular vectors of the lower block, and +* the last components of all the right singular vectors of the upper +* block. These components are stored and updated in VF and VL, +* respectively, in SLASD6. Hence U and VT are not explicitly +* referenced. +* +* The singular values are stored in D. The algorithm consists of two +* stages: +* +* The first stage consists of deflating the size of the problem +* when there are multiple singular values or if there is a zero +* in the Z vector. For each such occurence the dimension of the +* secular equation problem is reduced by one. This stage is +* performed by the routine SLASD7. +* +* The second stage consists of calculating the updated +* singular values. This is done by finding the roots of the +* secular equation via the routine SLASD4 (as called by SLASD8). +* This routine also updates VF and VL and computes the distances +* between the updated singular values and the old singular +* values. +* +* SLASD6 is called from SLASDA. +* +* Arguments +* ========= +* +* ICOMPQ (input) INTEGER +* Specifies whether singular vectors are to be computed in +* factored form: +* = 0: Compute singular values only. +* = 1: Compute singular vectors in factored form as well. +* +* NL (input) INTEGER +* The row dimension of the upper block. NL >= 1. +* +* NR (input) INTEGER +* The row dimension of the lower block. NR >= 1. +* +* SQRE (input) INTEGER +* = 0: the lower block is an NR-by-NR square matrix. +* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. +* +* The bidiagonal matrix has row dimension N = NL + NR + 1, +* and column dimension M = N + SQRE. +* +* D (input/output) REAL array, dimension ( NL+NR+1 ). +* On entry D(1:NL,1:NL) contains the singular values of the +* upper block, and D(NL+2:N) contains the singular values +* of the lower block. On exit D(1:N) contains the singular +* values of the modified matrix. +* +* VF (input/output) REAL array, dimension ( M ) +* On entry, VF(1:NL+1) contains the first components of all +* right singular vectors of the upper block; and VF(NL+2:M) +* contains the first components of all right singular vectors +* of the lower block. On exit, VF contains the first components +* of all right singular vectors of the bidiagonal matrix. +* +* VL (input/output) REAL array, dimension ( M ) +* On entry, VL(1:NL+1) contains the last components of all +* right singular vectors of the upper block; and VL(NL+2:M) +* contains the last components of all right singular vectors of +* the lower block. On exit, VL contains the last components of +* all right singular vectors of the bidiagonal matrix. +* +* ALPHA (input) REAL +* Contains the diagonal element associated with the added row. +* +* BETA (input) REAL +* Contains the off-diagonal element associated with the added +* row. +* +* IDXQ (output) INTEGER array, dimension ( N ) +* This contains the permutation which will reintegrate the +* subproblem just solved back into sorted order, i.e. +* D( IDXQ( I = 1, N ) ) will be in ascending order. +* +* PERM (output) INTEGER array, dimension ( N ) +* The permutations (from deflation and sorting) to be applied +* to each block. Not referenced if ICOMPQ = 0. +* +* GIVPTR (output) INTEGER +* The number of Givens rotations which took place in this +* subproblem. Not referenced if ICOMPQ = 0. +* +* GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 ) +* Each pair of numbers indicates a pair of columns to take place +* in a Givens rotation. Not referenced if ICOMPQ = 0. +* +* LDGCOL (input) INTEGER +* leading dimension of GIVCOL, must be at least N. +* +* GIVNUM (output) REAL array, dimension ( LDGNUM, 2 ) +* Each number indicates the C or S value to be used in the +* corresponding Givens rotation. Not referenced if ICOMPQ = 0. +* +* LDGNUM (input) INTEGER +* The leading dimension of GIVNUM and POLES, must be at least N. +* +* POLES (output) REAL array, dimension ( LDGNUM, 2 ) +* On exit, POLES(1,*) is an array containing the new singular +* values obtained from solving the secular equation, and +* POLES(2,*) is an array containing the poles in the secular +* equation. Not referenced if ICOMPQ = 0. +* +* DIFL (output) REAL array, dimension ( N ) +* On exit, DIFL(I) is the distance between I-th updated +* (undeflated) singular value and the I-th (undeflated) old +* singular value. +* +* DIFR (output) REAL array, +* dimension ( LDGNUM, 2 ) if ICOMPQ = 1 and +* dimension ( N ) if ICOMPQ = 0. +* On exit, DIFR(I, 1) is the distance between I-th updated +* (undeflated) singular value and the I+1-th (undeflated) old +* singular value. +* +* If ICOMPQ = 1, DIFR(1:K,2) is an array containing the +* normalizing factors for the right singular vector matrix. +* +* See SLASD8 for details on DIFL and DIFR. +* +* Z (output) REAL array, dimension ( M ) +* The first elements of this array contain the components +* of the deflation-adjusted updating row vector. +* +* K (output) INTEGER +* Contains the dimension of the non-deflated matrix, +* This is the order of the related secular equation. 1 <= K <=N. +* +* C (output) REAL +* C contains garbage if SQRE =0 and the C-value of a Givens +* rotation related to the right null space if SQRE = 1. +* +* S (output) REAL +* S contains garbage if SQRE =0 and the S-value of a Givens +* rotation related to the right null space if SQRE = 1. +* +* WORK (workspace) REAL array, dimension ( 4 * M ) +* +* IWORK (workspace) INTEGER array, dimension ( 3 * N ) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = 1, an singular value did not converge +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IDX, IDXC, IDXP, ISIGMA, IVFW, IVLW, IW, M, + $ N, N1, N2 + REAL ORGNRM +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLAMRG, SLASCL, SLASD7, SLASD8, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + N = NL + NR + 1 + M = N + SQRE +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( NL.LT.1 ) THEN + INFO = -2 + ELSE IF( NR.LT.1 ) THEN + INFO = -3 + ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN + INFO = -4 + ELSE IF( LDGCOL.LT.N ) THEN + INFO = -14 + ELSE IF( LDGNUM.LT.N ) THEN + INFO = -16 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLASD6', -INFO ) + RETURN + END IF +* +* The following values are for bookkeeping purposes only. They are +* integer pointers which indicate the portion of the workspace +* used by a particular array in SLASD7 and SLASD8. +* + ISIGMA = 1 + IW = ISIGMA + N + IVFW = IW + M + IVLW = IVFW + M +* + IDX = 1 + IDXC = IDX + N + IDXP = IDXC + N +* +* Scale. +* + ORGNRM = MAX( ABS( ALPHA ), ABS( BETA ) ) + D( NL+1 ) = ZERO + DO 10 I = 1, N + IF( ABS( D( I ) ).GT.ORGNRM ) THEN + ORGNRM = ABS( D( I ) ) + END IF + 10 CONTINUE + CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, INFO ) + ALPHA = ALPHA / ORGNRM + BETA = BETA / ORGNRM +* +* Sort and Deflate singular values. +* + CALL SLASD7( ICOMPQ, NL, NR, SQRE, K, D, Z, WORK( IW ), VF, + $ WORK( IVFW ), VL, WORK( IVLW ), ALPHA, BETA, + $ WORK( ISIGMA ), IWORK( IDX ), IWORK( IDXP ), IDXQ, + $ PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM, C, S, + $ INFO ) +* +* Solve Secular Equation, compute DIFL, DIFR, and update VF, VL. +* + CALL SLASD8( ICOMPQ, K, D, Z, VF, VL, DIFL, DIFR, LDGNUM, + $ WORK( ISIGMA ), WORK( IW ), INFO ) +* +* Save the poles if ICOMPQ = 1. +* + IF( ICOMPQ.EQ.1 ) THEN + CALL SCOPY( K, D, 1, POLES( 1, 1 ), 1 ) + CALL SCOPY( K, WORK( ISIGMA ), 1, POLES( 1, 2 ), 1 ) + END IF +* +* Unscale. +* + CALL SLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO ) +* +* Prepare the IDXQ sorting permutation. +* + N1 = K + N2 = N - K + CALL SLAMRG( N1, N2, D, 1, -1, IDXQ ) +* + RETURN +* +* End of SLASD6 +* + END diff --git a/costa/native/external/lapack/slasd7.f b/costa/native/external/lapack/slasd7.f new file mode 100644 index 000000000..0b3e6442a --- /dev/null +++ b/costa/native/external/lapack/slasd7.f @@ -0,0 +1,445 @@ + SUBROUTINE SLASD7( ICOMPQ, NL, NR, SQRE, K, D, Z, ZW, VF, VFW, VL, + $ VLW, ALPHA, BETA, DSIGMA, IDX, IDXP, IDXQ, + $ PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM, + $ C, S, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, +* Courant Institute, NAG Ltd., and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER GIVPTR, ICOMPQ, INFO, K, LDGCOL, LDGNUM, NL, + $ NR, SQRE + REAL ALPHA, BETA, C, S +* .. +* .. Array Arguments .. + INTEGER GIVCOL( LDGCOL, * ), IDX( * ), IDXP( * ), + $ IDXQ( * ), PERM( * ) + REAL D( * ), DSIGMA( * ), GIVNUM( LDGNUM, * ), + $ VF( * ), VFW( * ), VL( * ), VLW( * ), Z( * ), + $ ZW( * ) +* .. +* +* Purpose +* ======= +* +* SLASD7 merges the two sets of singular values together into a single +* sorted set. Then it tries to deflate the size of the problem. There +* are two ways in which deflation can occur: when two or more singular +* values are close together or if there is a tiny entry in the Z +* vector. For each such occurrence the order of the related +* secular equation problem is reduced by one. +* +* SLASD7 is called from SLASD6. +* +* Arguments +* ========= +* +* ICOMPQ (input) INTEGER +* Specifies whether singular vectors are to be computed +* in compact form, as follows: +* = 0: Compute singular values only. +* = 1: Compute singular vectors of upper +* bidiagonal matrix in compact form. +* +* NL (input) INTEGER +* The row dimension of the upper block. NL >= 1. +* +* NR (input) INTEGER +* The row dimension of the lower block. NR >= 1. +* +* SQRE (input) INTEGER +* = 0: the lower block is an NR-by-NR square matrix. +* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. +* +* The bidiagonal matrix has +* N = NL + NR + 1 rows and +* M = N + SQRE >= N columns. +* +* K (output) INTEGER +* Contains the dimension of the non-deflated matrix, this is +* the order of the related secular equation. 1 <= K <=N. +* +* D (input/output) REAL array, dimension ( N ) +* On entry D contains the singular values of the two submatrices +* to be combined. On exit D contains the trailing (N-K) updated +* singular values (those which were deflated) sorted into +* increasing order. +* +* Z (output) REAL array, dimension ( M ) +* On exit Z contains the updating row vector in the secular +* equation. +* +* ZW (workspace) REAL array, dimension ( M ) +* Workspace for Z. +* +* VF (input/output) REAL array, dimension ( M ) +* On entry, VF(1:NL+1) contains the first components of all +* right singular vectors of the upper block; and VF(NL+2:M) +* contains the first components of all right singular vectors +* of the lower block. On exit, VF contains the first components +* of all right singular vectors of the bidiagonal matrix. +* +* VFW (workspace) REAL array, dimension ( M ) +* Workspace for VF. +* +* VL (input/output) REAL array, dimension ( M ) +* On entry, VL(1:NL+1) contains the last components of all +* right singular vectors of the upper block; and VL(NL+2:M) +* contains the last components of all right singular vectors +* of the lower block. On exit, VL contains the last components +* of all right singular vectors of the bidiagonal matrix. +* +* VLW (workspace) REAL array, dimension ( M ) +* Workspace for VL. +* +* ALPHA (input) REAL +* Contains the diagonal element associated with the added row. +* +* BETA (input) REAL +* Contains the off-diagonal element associated with the added +* row. +* +* DSIGMA (output) REAL array, dimension ( N ) +* Contains a copy of the diagonal elements (K-1 singular values +* and one zero) in the secular equation. +* +* IDX (workspace) INTEGER array, dimension ( N ) +* This will contain the permutation used to sort the contents of +* D into ascending order. +* +* IDXP (workspace) INTEGER array, dimension ( N ) +* This will contain the permutation used to place deflated +* values of D at the end of the array. On output IDXP(2:K) +* points to the nondeflated D-values and IDXP(K+1:N) +* points to the deflated singular values. +* +* IDXQ (input) INTEGER array, dimension ( N ) +* This contains the permutation which separately sorts the two +* sub-problems in D into ascending order. Note that entries in +* the first half of this permutation must first be moved one +* position backward; and entries in the second half +* must first have NL+1 added to their values. +* +* PERM (output) INTEGER array, dimension ( N ) +* The permutations (from deflation and sorting) to be applied +* to each singular block. Not referenced if ICOMPQ = 0. +* +* GIVPTR (output) INTEGER +* The number of Givens rotations which took place in this +* subproblem. Not referenced if ICOMPQ = 0. +* +* GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 ) +* Each pair of numbers indicates a pair of columns to take place +* in a Givens rotation. Not referenced if ICOMPQ = 0. +* +* LDGCOL (input) INTEGER +* The leading dimension of GIVCOL, must be at least N. +* +* GIVNUM (output) REAL array, dimension ( LDGNUM, 2 ) +* Each number indicates the C or S value to be used in the +* corresponding Givens rotation. Not referenced if ICOMPQ = 0. +* +* LDGNUM (input) INTEGER +* The leading dimension of GIVNUM, must be at least N. +* +* C (output) REAL +* C contains garbage if SQRE =0 and the C-value of a Givens +* rotation related to the right null space if SQRE = 1. +* +* S (output) REAL +* S contains garbage if SQRE =0 and the S-value of a Givens +* rotation related to the right null space if SQRE = 1. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO, EIGHT + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0, + $ EIGHT = 8.0E+0 ) +* .. +* .. Local Scalars .. +* + INTEGER I, IDXI, IDXJ, IDXJP, J, JP, JPREV, K2, M, N, + $ NLP1, NLP2 + REAL EPS, HLFTOL, TAU, TOL, Z1 +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLAMRG, SROT, XERBLA +* .. +* .. External Functions .. + REAL SLAMCH, SLAPY2 + EXTERNAL SLAMCH, SLAPY2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + N = NL + NR + 1 + M = N + SQRE +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( NL.LT.1 ) THEN + INFO = -2 + ELSE IF( NR.LT.1 ) THEN + INFO = -3 + ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN + INFO = -4 + ELSE IF( LDGCOL.LT.N ) THEN + INFO = -22 + ELSE IF( LDGNUM.LT.N ) THEN + INFO = -24 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLASD7', -INFO ) + RETURN + END IF +* + NLP1 = NL + 1 + NLP2 = NL + 2 + IF( ICOMPQ.EQ.1 ) THEN + GIVPTR = 0 + END IF +* +* Generate the first part of the vector Z and move the singular +* values in the first part of D one position backward. +* + Z1 = ALPHA*VL( NLP1 ) + VL( NLP1 ) = ZERO + TAU = VF( NLP1 ) + DO 10 I = NL, 1, -1 + Z( I+1 ) = ALPHA*VL( I ) + VL( I ) = ZERO + VF( I+1 ) = VF( I ) + D( I+1 ) = D( I ) + IDXQ( I+1 ) = IDXQ( I ) + 1 + 10 CONTINUE + VF( 1 ) = TAU +* +* Generate the second part of the vector Z. +* + DO 20 I = NLP2, M + Z( I ) = BETA*VF( I ) + VF( I ) = ZERO + 20 CONTINUE +* +* Sort the singular values into increasing order +* + DO 30 I = NLP2, N + IDXQ( I ) = IDXQ( I ) + NLP1 + 30 CONTINUE +* +* DSIGMA, IDXC, IDXC, and ZW are used as storage space. +* + DO 40 I = 2, N + DSIGMA( I ) = D( IDXQ( I ) ) + ZW( I ) = Z( IDXQ( I ) ) + VFW( I ) = VF( IDXQ( I ) ) + VLW( I ) = VL( IDXQ( I ) ) + 40 CONTINUE +* + CALL SLAMRG( NL, NR, DSIGMA( 2 ), 1, 1, IDX( 2 ) ) +* + DO 50 I = 2, N + IDXI = 1 + IDX( I ) + D( I ) = DSIGMA( IDXI ) + Z( I ) = ZW( IDXI ) + VF( I ) = VFW( IDXI ) + VL( I ) = VLW( IDXI ) + 50 CONTINUE +* +* Calculate the allowable deflation tolerence +* + EPS = SLAMCH( 'Epsilon' ) + TOL = MAX( ABS( ALPHA ), ABS( BETA ) ) + TOL = EIGHT*EIGHT*EPS*MAX( ABS( D( N ) ), TOL ) +* +* There are 2 kinds of deflation -- first a value in the z-vector +* is small, second two (or more) singular values are very close +* together (their difference is small). +* +* If the value in the z-vector is small, we simply permute the +* array so that the corresponding singular value is moved to the +* end. +* +* If two values in the D-vector are close, we perform a two-sided +* rotation designed to make one of the corresponding z-vector +* entries zero, and then permute the array so that the deflated +* singular value is moved to the end. +* +* If there are multiple singular values then the problem deflates. +* Here the number of equal singular values are found. As each equal +* singular value is found, an elementary reflector is computed to +* rotate the corresponding singular subspace so that the +* corresponding components of Z are zero in this new basis. +* + K = 1 + K2 = N + 1 + DO 60 J = 2, N + IF( ABS( Z( J ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + IDXP( K2 ) = J + IF( J.EQ.N ) + $ GO TO 100 + ELSE + JPREV = J + GO TO 70 + END IF + 60 CONTINUE + 70 CONTINUE + J = JPREV + 80 CONTINUE + J = J + 1 + IF( J.GT.N ) + $ GO TO 90 + IF( ABS( Z( J ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + IDXP( K2 ) = J + ELSE +* +* Check if singular values are close enough to allow deflation. +* + IF( ABS( D( J )-D( JPREV ) ).LE.TOL ) THEN +* +* Deflation is possible. +* + S = Z( JPREV ) + C = Z( J ) +* +* Find sqrt(a**2+b**2) without overflow or +* destructive underflow. +* + TAU = SLAPY2( C, S ) + Z( J ) = TAU + Z( JPREV ) = ZERO + C = C / TAU + S = -S / TAU +* +* Record the appropriate Givens rotation +* + IF( ICOMPQ.EQ.1 ) THEN + GIVPTR = GIVPTR + 1 + IDXJP = IDXQ( IDX( JPREV )+1 ) + IDXJ = IDXQ( IDX( J )+1 ) + IF( IDXJP.LE.NLP1 ) THEN + IDXJP = IDXJP - 1 + END IF + IF( IDXJ.LE.NLP1 ) THEN + IDXJ = IDXJ - 1 + END IF + GIVCOL( GIVPTR, 2 ) = IDXJP + GIVCOL( GIVPTR, 1 ) = IDXJ + GIVNUM( GIVPTR, 2 ) = C + GIVNUM( GIVPTR, 1 ) = S + END IF + CALL SROT( 1, VF( JPREV ), 1, VF( J ), 1, C, S ) + CALL SROT( 1, VL( JPREV ), 1, VL( J ), 1, C, S ) + K2 = K2 - 1 + IDXP( K2 ) = JPREV + JPREV = J + ELSE + K = K + 1 + ZW( K ) = Z( JPREV ) + DSIGMA( K ) = D( JPREV ) + IDXP( K ) = JPREV + JPREV = J + END IF + END IF + GO TO 80 + 90 CONTINUE +* +* Record the last singular value. +* + K = K + 1 + ZW( K ) = Z( JPREV ) + DSIGMA( K ) = D( JPREV ) + IDXP( K ) = JPREV +* + 100 CONTINUE +* +* Sort the singular values into DSIGMA. The singular values which +* were not deflated go into the first K slots of DSIGMA, except +* that DSIGMA(1) is treated separately. +* + DO 110 J = 2, N + JP = IDXP( J ) + DSIGMA( J ) = D( JP ) + VFW( J ) = VF( JP ) + VLW( J ) = VL( JP ) + 110 CONTINUE + IF( ICOMPQ.EQ.1 ) THEN + DO 120 J = 2, N + JP = IDXP( J ) + PERM( J ) = IDXQ( IDX( JP )+1 ) + IF( PERM( J ).LE.NLP1 ) THEN + PERM( J ) = PERM( J ) - 1 + END IF + 120 CONTINUE + END IF +* +* The deflated singular values go back into the last N - K slots of +* D. +* + CALL SCOPY( N-K, DSIGMA( K+1 ), 1, D( K+1 ), 1 ) +* +* Determine DSIGMA(1), DSIGMA(2), Z(1), VF(1), VL(1), VF(M), and +* VL(M). +* + DSIGMA( 1 ) = ZERO + HLFTOL = TOL / TWO + IF( ABS( DSIGMA( 2 ) ).LE.HLFTOL ) + $ DSIGMA( 2 ) = HLFTOL + IF( M.GT.N ) THEN + Z( 1 ) = SLAPY2( Z1, Z( M ) ) + IF( Z( 1 ).LE.TOL ) THEN + C = ONE + S = ZERO + Z( 1 ) = TOL + ELSE + C = Z1 / Z( 1 ) + S = -Z( M ) / Z( 1 ) + END IF + CALL SROT( 1, VF( M ), 1, VF( 1 ), 1, C, S ) + CALL SROT( 1, VL( M ), 1, VL( 1 ), 1, C, S ) + ELSE + IF( ABS( Z1 ).LE.TOL ) THEN + Z( 1 ) = TOL + ELSE + Z( 1 ) = Z1 + END IF + END IF +* +* Restore Z, VF, and VL. +* + CALL SCOPY( K-1, ZW( 2 ), 1, Z( 2 ), 1 ) + CALL SCOPY( N-1, VFW( 2 ), 1, VF( 2 ), 1 ) + CALL SCOPY( N-1, VLW( 2 ), 1, VL( 2 ), 1 ) +* + RETURN +* +* End of SLASD7 +* + END diff --git a/costa/native/external/lapack/slasd8.f b/costa/native/external/lapack/slasd8.f new file mode 100644 index 000000000..803afc2a8 --- /dev/null +++ b/costa/native/external/lapack/slasd8.f @@ -0,0 +1,254 @@ + SUBROUTINE SLASD8( ICOMPQ, K, D, Z, VF, VL, DIFL, DIFR, LDDIFR, + $ DSIGMA, WORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, +* Courant Institute, NAG Ltd., and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER ICOMPQ, INFO, K, LDDIFR +* .. +* .. Array Arguments .. + REAL D( * ), DIFL( * ), DIFR( LDDIFR, * ), + $ DSIGMA( * ), VF( * ), VL( * ), WORK( * ), + $ Z( * ) +* .. +* +* Purpose +* ======= +* +* SLASD8 finds the square roots of the roots of the secular equation, +* as defined by the values in DSIGMA and Z. It makes the appropriate +* calls to SLASD4, and stores, for each element in D, the distance +* to its two nearest poles (elements in DSIGMA). It also updates +* the arrays VF and VL, the first and last components of all the +* right singular vectors of the original bidiagonal matrix. +* +* SLASD8 is called from SLASD6. +* +* Arguments +* ========= +* +* ICOMPQ (input) INTEGER +* Specifies whether singular vectors are to be computed in +* factored form in the calling routine: +* = 0: Compute singular values only. +* = 1: Compute singular vectors in factored form as well. +* +* K (input) INTEGER +* The number of terms in the rational function to be solved +* by SLASD4. K >= 1. +* +* D (output) REAL array, dimension ( K ) +* On output, D contains the updated singular values. +* +* Z (input) REAL array, dimension ( K ) +* The first K elements of this array contain the components +* of the deflation-adjusted updating row vector. +* +* VF (input/output) REAL array, dimension ( K ) +* On entry, VF contains information passed through DBEDE8. +* On exit, VF contains the first K components of the first +* components of all right singular vectors of the bidiagonal +* matrix. +* +* VL (input/output) REAL array, dimension ( K ) +* On entry, VL contains information passed through DBEDE8. +* On exit, VL contains the first K components of the last +* components of all right singular vectors of the bidiagonal +* matrix. +* +* DIFL (output) REAL array, dimension ( K ) +* On exit, DIFL(I) = D(I) - DSIGMA(I). +* +* DIFR (output) REAL array, +* dimension ( LDDIFR, 2 ) if ICOMPQ = 1 and +* dimension ( K ) if ICOMPQ = 0. +* On exit, DIFR(I,1) = D(I) - DSIGMA(I+1), DIFR(K,1) is not +* defined and will not be referenced. +* +* If ICOMPQ = 1, DIFR(1:K,2) is an array containing the +* normalizing factors for the right singular vector matrix. +* +* LDDIFR (input) INTEGER +* The leading dimension of DIFR, must be at least K. +* +* DSIGMA (input) REAL array, dimension ( K ) +* The first K elements of this array contain the old roots +* of the deflated updating problem. These are the poles +* of the secular equation. +* +* WORK (workspace) REAL array, dimension at least 3 * K +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = 1, an singular value did not converge +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IWK1, IWK2, IWK2I, IWK3, IWK3I, J + REAL DIFLJ, DIFRJ, DJ, DSIGJ, DSIGJP, RHO, TEMP +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLASCL, SLASD4, SLASET, XERBLA +* .. +* .. External Functions .. + REAL SDOT, SLAMC3, SNRM2 + EXTERNAL SDOT, SLAMC3, SNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( K.LT.1 ) THEN + INFO = -2 + ELSE IF( LDDIFR.LT.K ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLASD8', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( K.EQ.1 ) THEN + D( 1 ) = ABS( Z( 1 ) ) + DIFL( 1 ) = D( 1 ) + IF( ICOMPQ.EQ.1 ) THEN + DIFL( 2 ) = ONE + DIFR( 1, 2 ) = ONE + END IF + RETURN + END IF +* +* Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can +* be computed with high relative accuracy (barring over/underflow). +* This is a problem on machines without a guard digit in +* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). +* The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), +* which on any of these machines zeros out the bottommost +* bit of DSIGMA(I) if it is 1; this makes the subsequent +* subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation +* occurs. On binary machines with a guard digit (almost all +* machines) it does not change DSIGMA(I) at all. On hexadecimal +* and decimal machines with a guard digit, it slightly +* changes the bottommost bits of DSIGMA(I). It does not account +* for hexadecimal or decimal machines without guard digits +* (we know of none). We use a subroutine call to compute +* 2*DLAMBDA(I) to prevent optimizing compilers from eliminating +* this code. +* + DO 10 I = 1, K + DSIGMA( I ) = SLAMC3( DSIGMA( I ), DSIGMA( I ) ) - DSIGMA( I ) + 10 CONTINUE +* +* Book keeping. +* + IWK1 = 1 + IWK2 = IWK1 + K + IWK3 = IWK2 + K + IWK2I = IWK2 - 1 + IWK3I = IWK3 - 1 +* +* Normalize Z. +* + RHO = SNRM2( K, Z, 1 ) + CALL SLASCL( 'G', 0, 0, RHO, ONE, K, 1, Z, K, INFO ) + RHO = RHO*RHO +* +* Initialize WORK(IWK3). +* + CALL SLASET( 'A', K, 1, ONE, ONE, WORK( IWK3 ), K ) +* +* Compute the updated singular values, the arrays DIFL, DIFR, +* and the updated Z. +* + DO 40 J = 1, K + CALL SLASD4( K, J, DSIGMA, Z, WORK( IWK1 ), RHO, D( J ), + $ WORK( IWK2 ), INFO ) +* +* If the root finder fails, the computation is terminated. +* + IF( INFO.NE.0 ) THEN + RETURN + END IF + WORK( IWK3I+J ) = WORK( IWK3I+J )*WORK( J )*WORK( IWK2I+J ) + DIFL( J ) = -WORK( J ) + DIFR( J, 1 ) = -WORK( J+1 ) + DO 20 I = 1, J - 1 + WORK( IWK3I+I ) = WORK( IWK3I+I )*WORK( I )* + $ WORK( IWK2I+I ) / ( DSIGMA( I )- + $ DSIGMA( J ) ) / ( DSIGMA( I )+ + $ DSIGMA( J ) ) + 20 CONTINUE + DO 30 I = J + 1, K + WORK( IWK3I+I ) = WORK( IWK3I+I )*WORK( I )* + $ WORK( IWK2I+I ) / ( DSIGMA( I )- + $ DSIGMA( J ) ) / ( DSIGMA( I )+ + $ DSIGMA( J ) ) + 30 CONTINUE + 40 CONTINUE +* +* Compute updated Z. +* + DO 50 I = 1, K + Z( I ) = SIGN( SQRT( ABS( WORK( IWK3I+I ) ) ), Z( I ) ) + 50 CONTINUE +* +* Update VF and VL. +* + DO 80 J = 1, K + DIFLJ = DIFL( J ) + DJ = D( J ) + DSIGJ = -DSIGMA( J ) + IF( J.LT.K ) THEN + DIFRJ = -DIFR( J, 1 ) + DSIGJP = -DSIGMA( J+1 ) + END IF + WORK( J ) = -Z( J ) / DIFLJ / ( DSIGMA( J )+DJ ) + DO 60 I = 1, J - 1 + WORK( I ) = Z( I ) / ( SLAMC3( DSIGMA( I ), DSIGJ )-DIFLJ ) + $ / ( DSIGMA( I )+DJ ) + 60 CONTINUE + DO 70 I = J + 1, K + WORK( I ) = Z( I ) / ( SLAMC3( DSIGMA( I ), DSIGJP )+DIFRJ ) + $ / ( DSIGMA( I )+DJ ) + 70 CONTINUE + TEMP = SNRM2( K, WORK, 1 ) + WORK( IWK2I+J ) = SDOT( K, WORK, 1, VF, 1 ) / TEMP + WORK( IWK3I+J ) = SDOT( K, WORK, 1, VL, 1 ) / TEMP + IF( ICOMPQ.EQ.1 ) THEN + DIFR( J, 2 ) = TEMP + END IF + 80 CONTINUE +* + CALL SCOPY( K, WORK( IWK2 ), 1, VF, 1 ) + CALL SCOPY( K, WORK( IWK3 ), 1, VL, 1 ) +* + RETURN +* +* End of SLASD8 +* + END diff --git a/costa/native/external/lapack/slasd9.f b/costa/native/external/lapack/slasd9.f new file mode 100644 index 000000000..99dce6291 --- /dev/null +++ b/costa/native/external/lapack/slasd9.f @@ -0,0 +1,256 @@ + SUBROUTINE SLASD9( ICOMPQ, LDU, K, D, Z, VF, VL, DIFL, DIFR, + $ DSIGMA, WORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, +* Courant Institute, NAG Ltd., and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER ICOMPQ, INFO, K, LDU +* .. +* .. Array Arguments .. + REAL D( * ), DIFL( * ), DIFR( LDU, * ), DSIGMA( * ), + $ VF( * ), VL( * ), WORK( * ), Z( * ) +* .. +* +* Purpose +* ======= +* +* SLASD9 finds the square roots of the roots of the secular equation, +* as defined by the values in DSIGMA and Z. It makes the +* appropriate calls to SLASD4, and stores, for each element in D, +* the distance to its two nearest poles (elements in DSIGMA). It also +* updates the arrays VF and VL, the first and last components of all +* the right singular vectors of the original bidiagonal matrix. +* +* SLASD9 is called from SLASD7. +* +* Arguments +* ========= +* +* ICOMPQ (input) INTEGER +* Specifies whether singular vectors are to be computed in +* factored form in the calling routine: +* +* ICOMPQ = 0 Compute singular values only. +* +* ICOMPQ = 1 Compute singular vector matrices in +* factored form also. +* K (input) INTEGER +* The number of terms in the rational function to be solved by +* SLASD4. K >= 1. +* +* D (output) REAL array, dimension(K) +* D(I) contains the updated singular values. +* +* DSIGMA (input) REAL array, dimension(K) +* The first K elements of this array contain the old roots +* of the deflated updating problem. These are the poles +* of the secular equation. +* +* Z (input) REAL array, dimension (K) +* The first K elements of this array contain the components +* of the deflation-adjusted updating row vector. +* +* VF (input/output) REAL array, dimension(K) +* On entry, VF contains information passed through SBEDE8.f +* On exit, VF contains the first K components of the first +* components of all right singular vectors of the bidiagonal +* matrix. +* +* VL (input/output) REAL array, dimension(K) +* On entry, VL contains information passed through SBEDE8.f +* On exit, VL contains the first K components of the last +* components of all right singular vectors of the bidiagonal +* matrix. +* +* DIFL (output) REAL array, dimension (K). +* On exit, DIFL(I) = D(I) - DSIGMA(I). +* +* DIFR (output) REAL array, +* dimension (LDU, 2) if ICOMPQ =1 and +* dimension (K) if ICOMPQ = 0. +* On exit, DIFR(I, 1) = D(I) - DSIGMA(I+1), DIFR(K, 1) is not +* defined and will not be referenced. +* +* If ICOMPQ = 1, DIFR(1:K, 2) is an array containing the +* normalizing factors for the right singular vector matrix. +* +* WORK (workspace) REAL array, +* dimension at least (3 * K) +* Workspace. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = 1, an singular value did not converge +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + INTEGER I, IWK1, IWK2, IWK2I, IWK3, IWK3I, J + REAL DIFLJ, DIFRJ, DJ, DJP1, DSIGJ, DSIGJP, RHO, + $ TEMP +* .. +* .. External Functions .. + REAL SDOT, SLAMC3, SNRM2 + EXTERNAL SDOT, SLAMC3, SNRM2 +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLASCL, SLASD4, SLASET, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( K.LT.1 ) THEN + INFO = -3 + ELSE IF( LDU.LT.K ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLASD9', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( K.EQ.1 ) THEN + D( 1 ) = ABS( Z( 1 ) ) + DIFL( 1 ) = D( 1 ) + IF( ICOMPQ.EQ.1 ) THEN + DIFL( 2 ) = ONE + DIFR( 1, 2 ) = ONE + END IF + RETURN + END IF +* +* Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can +* be computed with high relative accuracy (barring over/underflow). +* This is a problem on machines without a guard digit in +* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). +* The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), +* which on any of these machines zeros out the bottommost +* bit of DSIGMA(I) if it is 1; this makes the subsequent +* subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation +* occurs. On binary machines with a guard digit (almost all +* machines) it does not change DSIGMA(I) at all. On hexadecimal +* and decimal machines with a guard digit, it slightly +* changes the bottommost bits of DSIGMA(I). It does not account +* for hexadecimal or decimal machines without guard digits +* (we know of none). We use a subroutine call to compute +* 2*DLAMBDA(I) to prevent optimizing compilers from eliminating +* this code. +* + DO 10 I = 1, K + DSIGMA( I ) = SLAMC3( DSIGMA( I ), DSIGMA( I ) ) - DSIGMA( I ) + 10 CONTINUE +* +* Book keeping. +* + IWK1 = 1 + IWK2 = IWK1 + K + IWK3 = IWK2 + K + IWK2I = IWK2 - 1 + IWK3I = IWK3 - 1 +* +* Normalize Z. +* + RHO = SNRM2( K, Z, 1 ) + CALL SLASCL( 'G', 0, 0, RHO, ONE, K, 1, Z, K, INFO ) + RHO = RHO*RHO +* +* Initialize WORK(IWK3). +* + CALL SLASET( 'A', K, 1, ONE, ONE, WORK( IWK3 ), K ) +* +* Compute the updated singular values, the arrays DIFL, DIFR, +* and the updated Z. +* + DO 40 J = 1, K + CALL SLASD4( K, J, DSIGMA, Z, WORK( IWK1 ), RHO, D( J ), + $ WORK( IWK2 ), INFO ) +* +* If the root finder fails, the computation is terminated. +* + IF( INFO.NE.0 ) THEN + RETURN + END IF + WORK( IWK3I+J ) = WORK( IWK3I+J )*WORK( J )*WORK( IWK2I+J ) + DIFL( J ) = -WORK( J ) + DIFR( J, 1 ) = -WORK( J+1 ) + DO 20 I = 1, J - 1 + WORK( IWK3I+I ) = WORK( IWK3I+I )*WORK( I )* + $ WORK( IWK2I+I ) / ( DSIGMA( I )- + $ DSIGMA( J ) ) / ( DSIGMA( I )+ + $ DSIGMA( J ) ) + 20 CONTINUE + DO 30 I = J + 1, K + WORK( IWK3I+I ) = WORK( IWK3I+I )*WORK( I )* + $ WORK( IWK2I+I ) / ( DSIGMA( I )- + $ DSIGMA( J ) ) / ( DSIGMA( I )+ + $ DSIGMA( J ) ) + 30 CONTINUE + 40 CONTINUE +* +* Compute updated Z. +* + DO 50 I = 1, K + Z( I ) = SIGN( SQRT( ABS( WORK( IWK3I+I ) ) ), Z( I ) ) + 50 CONTINUE +* +* Update VF and VL. +* + DO 80 J = 1, K + DIFLJ = DIFL( J ) + DJ = D( J ) + DSIGJ = -DSIGMA( J ) + IF( J.LT.K ) THEN + DIFRJ = -DIFR( J, 1 ) + DJP1 = D( J+1 ) + DSIGJP = -DSIGMA( J+1 ) + END IF + WORK( J ) = -Z( J ) / DIFLJ / ( DSIGMA( J )+DJ ) + DO 60 I = 1, J - 1 + WORK( I ) = Z( I ) / ( SLAMC3( DSIGMA( I ), DSIGJ )-DIFLJ ) + $ / ( DSIGMA( I )+DJ ) + 60 CONTINUE + DO 70 I = J + 1, K + WORK( I ) = Z( I ) / ( SLAMC3( DSIGMA( I ), DSIGJP )+DIFRJ ) + $ / ( DSIGMA( I )+DJ ) + 70 CONTINUE + TEMP = SNRM2( K, WORK, 1 ) + WORK( IWK2I+J ) = SDOT( K, WORK, 1, VF, 1 ) / TEMP + WORK( IWK3I+J ) = SDOT( K, WORK, 1, VL, 1 ) / TEMP + IF( ICOMPQ.EQ.1 ) THEN + DIFR( J, 2 ) = TEMP + END IF + 80 CONTINUE +* + CALL SCOPY( K, WORK( IWK2 ), 1, VF, 1 ) + CALL SCOPY( K, WORK( IWK3 ), 1, VL, 1 ) +* + RETURN +* +* End of SLASD9 +* + END diff --git a/costa/native/external/lapack/slasda.f b/costa/native/external/lapack/slasda.f new file mode 100644 index 000000000..d00eb0cff --- /dev/null +++ b/costa/native/external/lapack/slasda.f @@ -0,0 +1,391 @@ + SUBROUTINE SLASDA( ICOMPQ, SMLSIZ, N, SQRE, D, E, U, LDU, VT, K, + $ DIFL, DIFR, Z, POLES, GIVPTR, GIVCOL, LDGCOL, + $ PERM, GIVNUM, C, S, WORK, IWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + INTEGER ICOMPQ, INFO, LDGCOL, LDU, N, SMLSIZ, SQRE +* .. +* .. Array Arguments .. + INTEGER GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ), + $ K( * ), PERM( LDGCOL, * ) + REAL C( * ), D( * ), DIFL( LDU, * ), DIFR( LDU, * ), + $ E( * ), GIVNUM( LDU, * ), POLES( LDU, * ), + $ S( * ), U( LDU, * ), VT( LDU, * ), WORK( * ), + $ Z( LDU, * ) +* .. +* +* Purpose +* ======= +* +* Using a divide and conquer approach, SLASDA computes the singular +* value decomposition (SVD) of a real upper bidiagonal N-by-M matrix +* B with diagonal D and offdiagonal E, where M = N + SQRE. The +* algorithm computes the singular values in the SVD B = U * S * VT. +* The orthogonal matrices U and VT are optionally computed in +* compact form. +* +* A related subroutine, SLASD0, computes the singular values and +* the singular vectors in explicit form. +* +* Arguments +* ========= +* +* ICOMPQ (input) INTEGER +* Specifies whether singular vectors are to be computed +* in compact form, as follows +* = 0: Compute singular values only. +* = 1: Compute singular vectors of upper bidiagonal +* matrix in compact form. +* +* SMLSIZ (input) INTEGER +* The maximum size of the subproblems at the bottom of the +* computation tree. +* +* N (input) INTEGER +* The row dimension of the upper bidiagonal matrix. This is +* also the dimension of the main diagonal array D. +* +* SQRE (input) INTEGER +* Specifies the column dimension of the bidiagonal matrix. +* = 0: The bidiagonal matrix has column dimension M = N; +* = 1: The bidiagonal matrix has column dimension M = N + 1. +* +* D (input/output) REAL array, dimension ( N ) +* On entry D contains the main diagonal of the bidiagonal +* matrix. On exit D, if INFO = 0, contains its singular values. +* +* E (input) REAL array, dimension ( M-1 ) +* Contains the subdiagonal entries of the bidiagonal matrix. +* On exit, E has been destroyed. +* +* U (output) REAL array, +* dimension ( LDU, SMLSIZ ) if ICOMPQ = 1, and not referenced +* if ICOMPQ = 0. If ICOMPQ = 1, on exit, U contains the left +* singular vector matrices of all subproblems at the bottom +* level. +* +* LDU (input) INTEGER, LDU = > N. +* The leading dimension of arrays U, VT, DIFL, DIFR, POLES, +* GIVNUM, and Z. +* +* VT (output) REAL array, +* dimension ( LDU, SMLSIZ+1 ) if ICOMPQ = 1, and not referenced +* if ICOMPQ = 0. If ICOMPQ = 1, on exit, VT' contains the right +* singular vector matrices of all subproblems at the bottom +* level. +* +* K (output) INTEGER array, +* dimension ( N ) if ICOMPQ = 1 and dimension 1 if ICOMPQ = 0. +* If ICOMPQ = 1, on exit, K(I) is the dimension of the I-th +* secular equation on the computation tree. +* +* DIFL (output) REAL array, dimension ( LDU, NLVL ), +* where NLVL = floor(log_2 (N/SMLSIZ))). +* +* DIFR (output) REAL array, +* dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1 and +* dimension ( N ) if ICOMPQ = 0. +* If ICOMPQ = 1, on exit, DIFL(1:N, I) and DIFR(1:N, 2 * I - 1) +* record distances between singular values on the I-th +* level and singular values on the (I -1)-th level, and +* DIFR(1:N, 2 * I ) contains the normalizing factors for +* the right singular vector matrix. See SLASD8 for details. +* +* Z (output) REAL array, +* dimension ( LDU, NLVL ) if ICOMPQ = 1 and +* dimension ( N ) if ICOMPQ = 0. +* The first K elements of Z(1, I) contain the components of +* the deflation-adjusted updating row vector for subproblems +* on the I-th level. +* +* POLES (output) REAL array, +* dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not referenced +* if ICOMPQ = 0. If ICOMPQ = 1, on exit, POLES(1, 2*I - 1) and +* POLES(1, 2*I) contain the new and old singular values +* involved in the secular equations on the I-th level. +* +* GIVPTR (output) INTEGER array, +* dimension ( N ) if ICOMPQ = 1, and not referenced if +* ICOMPQ = 0. If ICOMPQ = 1, on exit, GIVPTR( I ) records +* the number of Givens rotations performed on the I-th +* problem on the computation tree. +* +* GIVCOL (output) INTEGER array, +* dimension ( LDGCOL, 2 * NLVL ) if ICOMPQ = 1, and not +* referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I, +* GIVCOL(1, 2 *I - 1) and GIVCOL(1, 2 *I) record the locations +* of Givens rotations performed on the I-th level on the +* computation tree. +* +* LDGCOL (input) INTEGER, LDGCOL = > N. +* The leading dimension of arrays GIVCOL and PERM. +* +* PERM (output) INTEGER array, +* dimension ( LDGCOL, NLVL ) if ICOMPQ = 1, and not referenced +* if ICOMPQ = 0. If ICOMPQ = 1, on exit, PERM(1, I) records +* permutations done on the I-th level of the computation tree. +* +* GIVNUM (output) REAL array, +* dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not +* referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I, +* GIVNUM(1, 2 *I - 1) and GIVNUM(1, 2 *I) record the C- and S- +* values of Givens rotations performed on the I-th level on +* the computation tree. +* +* C (output) REAL array, +* dimension ( N ) if ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. +* If ICOMPQ = 1 and the I-th subproblem is not square, on exit, +* C( I ) contains the C-value of a Givens rotation related to +* the right null space of the I-th subproblem. +* +* S (output) REAL array, dimension ( N ) if +* ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. If ICOMPQ = 1 +* and the I-th subproblem is not square, on exit, S( I ) +* contains the S-value of a Givens rotation related to +* the right null space of the I-th subproblem. +* +* WORK (workspace) REAL array, dimension +* (6 * N + (SMLSIZ + 1)*(SMLSIZ + 1)). +* +* IWORK (workspace) INTEGER array. +* Dimension must be at least (7 * N). +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = 1, an singular value did not converge +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, I1, IC, IDXQ, IDXQI, IM1, INODE, ITEMP, IWK, + $ J, LF, LL, LVL, LVL2, M, NCC, ND, NDB1, NDIML, + $ NDIMR, NL, NLF, NLP1, NLVL, NR, NRF, NRP1, NRU, + $ NWORK1, NWORK2, SMLSZP, SQREI, VF, VFI, VL, VLI + REAL ALPHA, BETA +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLASD6, SLASDQ, SLASDT, SLASET, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( SMLSIZ.LT.3 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN + INFO = -4 + ELSE IF( LDU.LT.( N+SQRE ) ) THEN + INFO = -8 + ELSE IF( LDGCOL.LT.N ) THEN + INFO = -17 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLASDA', -INFO ) + RETURN + END IF +* + M = N + SQRE +* +* If the input matrix is too small, call SLASDQ to find the SVD. +* + IF( N.LE.SMLSIZ ) THEN + IF( ICOMPQ.EQ.0 ) THEN + CALL SLASDQ( 'U', SQRE, N, 0, 0, 0, D, E, VT, LDU, U, LDU, + $ U, LDU, WORK, INFO ) + ELSE + CALL SLASDQ( 'U', SQRE, N, M, N, 0, D, E, VT, LDU, U, LDU, + $ U, LDU, WORK, INFO ) + END IF + RETURN + END IF +* +* Book-keeping and set up the computation tree. +* + INODE = 1 + NDIML = INODE + N + NDIMR = NDIML + N + IDXQ = NDIMR + N + IWK = IDXQ + N +* + NCC = 0 + NRU = 0 +* + SMLSZP = SMLSIZ + 1 + VF = 1 + VL = VF + M + NWORK1 = VL + M + NWORK2 = NWORK1 + SMLSZP*SMLSZP +* + CALL SLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ), + $ IWORK( NDIMR ), SMLSIZ ) +* +* for the nodes on bottom level of the tree, solve +* their subproblems by SLASDQ. +* + NDB1 = ( ND+1 ) / 2 + DO 30 I = NDB1, ND +* +* IC : center row of each node +* NL : number of rows of left subproblem +* NR : number of rows of right subproblem +* NLF: starting row of the left subproblem +* NRF: starting row of the right subproblem +* + I1 = I - 1 + IC = IWORK( INODE+I1 ) + NL = IWORK( NDIML+I1 ) + NLP1 = NL + 1 + NR = IWORK( NDIMR+I1 ) + NLF = IC - NL + NRF = IC + 1 + IDXQI = IDXQ + NLF - 2 + VFI = VF + NLF - 1 + VLI = VL + NLF - 1 + SQREI = 1 + IF( ICOMPQ.EQ.0 ) THEN + CALL SLASET( 'A', NLP1, NLP1, ZERO, ONE, WORK( NWORK1 ), + $ SMLSZP ) + CALL SLASDQ( 'U', SQREI, NL, NLP1, NRU, NCC, D( NLF ), + $ E( NLF ), WORK( NWORK1 ), SMLSZP, + $ WORK( NWORK2 ), NL, WORK( NWORK2 ), NL, + $ WORK( NWORK2 ), INFO ) + ITEMP = NWORK1 + NL*SMLSZP + CALL SCOPY( NLP1, WORK( NWORK1 ), 1, WORK( VFI ), 1 ) + CALL SCOPY( NLP1, WORK( ITEMP ), 1, WORK( VLI ), 1 ) + ELSE + CALL SLASET( 'A', NL, NL, ZERO, ONE, U( NLF, 1 ), LDU ) + CALL SLASET( 'A', NLP1, NLP1, ZERO, ONE, VT( NLF, 1 ), LDU ) + CALL SLASDQ( 'U', SQREI, NL, NLP1, NL, NCC, D( NLF ), + $ E( NLF ), VT( NLF, 1 ), LDU, U( NLF, 1 ), LDU, + $ U( NLF, 1 ), LDU, WORK( NWORK1 ), INFO ) + CALL SCOPY( NLP1, VT( NLF, 1 ), 1, WORK( VFI ), 1 ) + CALL SCOPY( NLP1, VT( NLF, NLP1 ), 1, WORK( VLI ), 1 ) + END IF + IF( INFO.NE.0 ) THEN + RETURN + END IF + DO 10 J = 1, NL + IWORK( IDXQI+J ) = J + 10 CONTINUE + IF( ( I.EQ.ND ) .AND. ( SQRE.EQ.0 ) ) THEN + SQREI = 0 + ELSE + SQREI = 1 + END IF + IDXQI = IDXQI + NLP1 + VFI = VFI + NLP1 + VLI = VLI + NLP1 + NRP1 = NR + SQREI + IF( ICOMPQ.EQ.0 ) THEN + CALL SLASET( 'A', NRP1, NRP1, ZERO, ONE, WORK( NWORK1 ), + $ SMLSZP ) + CALL SLASDQ( 'U', SQREI, NR, NRP1, NRU, NCC, D( NRF ), + $ E( NRF ), WORK( NWORK1 ), SMLSZP, + $ WORK( NWORK2 ), NR, WORK( NWORK2 ), NR, + $ WORK( NWORK2 ), INFO ) + ITEMP = NWORK1 + ( NRP1-1 )*SMLSZP + CALL SCOPY( NRP1, WORK( NWORK1 ), 1, WORK( VFI ), 1 ) + CALL SCOPY( NRP1, WORK( ITEMP ), 1, WORK( VLI ), 1 ) + ELSE + CALL SLASET( 'A', NR, NR, ZERO, ONE, U( NRF, 1 ), LDU ) + CALL SLASET( 'A', NRP1, NRP1, ZERO, ONE, VT( NRF, 1 ), LDU ) + CALL SLASDQ( 'U', SQREI, NR, NRP1, NR, NCC, D( NRF ), + $ E( NRF ), VT( NRF, 1 ), LDU, U( NRF, 1 ), LDU, + $ U( NRF, 1 ), LDU, WORK( NWORK1 ), INFO ) + CALL SCOPY( NRP1, VT( NRF, 1 ), 1, WORK( VFI ), 1 ) + CALL SCOPY( NRP1, VT( NRF, NRP1 ), 1, WORK( VLI ), 1 ) + END IF + IF( INFO.NE.0 ) THEN + RETURN + END IF + DO 20 J = 1, NR + IWORK( IDXQI+J ) = J + 20 CONTINUE + 30 CONTINUE +* +* Now conquer each subproblem bottom-up. +* + J = 2**NLVL + DO 50 LVL = NLVL, 1, -1 + LVL2 = LVL*2 - 1 +* +* Find the first node LF and last node LL on +* the current level LVL. +* + IF( LVL.EQ.1 ) THEN + LF = 1 + LL = 1 + ELSE + LF = 2**( LVL-1 ) + LL = 2*LF - 1 + END IF + DO 40 I = LF, LL + IM1 = I - 1 + IC = IWORK( INODE+IM1 ) + NL = IWORK( NDIML+IM1 ) + NR = IWORK( NDIMR+IM1 ) + NLF = IC - NL + NRF = IC + 1 + IF( I.EQ.LL ) THEN + SQREI = SQRE + ELSE + SQREI = 1 + END IF + VFI = VF + NLF - 1 + VLI = VL + NLF - 1 + IDXQI = IDXQ + NLF - 1 + ALPHA = D( IC ) + BETA = E( IC ) + IF( ICOMPQ.EQ.0 ) THEN + CALL SLASD6( ICOMPQ, NL, NR, SQREI, D( NLF ), + $ WORK( VFI ), WORK( VLI ), ALPHA, BETA, + $ IWORK( IDXQI ), PERM, GIVPTR( 1 ), GIVCOL, + $ LDGCOL, GIVNUM, LDU, POLES, DIFL, DIFR, Z, + $ K( 1 ), C( 1 ), S( 1 ), WORK( NWORK1 ), + $ IWORK( IWK ), INFO ) + ELSE + J = J - 1 + CALL SLASD6( ICOMPQ, NL, NR, SQREI, D( NLF ), + $ WORK( VFI ), WORK( VLI ), ALPHA, BETA, + $ IWORK( IDXQI ), PERM( NLF, LVL ), + $ GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL, + $ GIVNUM( NLF, LVL2 ), LDU, + $ POLES( NLF, LVL2 ), DIFL( NLF, LVL ), + $ DIFR( NLF, LVL2 ), Z( NLF, LVL ), K( J ), + $ C( J ), S( J ), WORK( NWORK1 ), + $ IWORK( IWK ), INFO ) + END IF + IF( INFO.NE.0 ) THEN + RETURN + END IF + 40 CONTINUE + 50 CONTINUE +* + RETURN +* +* End of SLASDA +* + END diff --git a/costa/native/external/lapack/slasdq.f b/costa/native/external/lapack/slasdq.f new file mode 100644 index 000000000..b1a689e6d --- /dev/null +++ b/costa/native/external/lapack/slasdq.f @@ -0,0 +1,317 @@ + SUBROUTINE SLASDQ( UPLO, SQRE, N, NCVT, NRU, NCC, D, E, VT, LDVT, + $ U, LDU, C, LDC, WORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU, SQRE +* .. +* .. Array Arguments .. + REAL C( LDC, * ), D( * ), E( * ), U( LDU, * ), + $ VT( LDVT, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SLASDQ computes the singular value decomposition (SVD) of a real +* (upper or lower) bidiagonal matrix with diagonal D and offdiagonal +* E, accumulating the transformations if desired. Letting B denote +* the input bidiagonal matrix, the algorithm computes orthogonal +* matrices Q and P such that B = Q * S * P' (P' denotes the transpose +* of P). The singular values S are overwritten on D. +* +* The input matrix U is changed to U * Q if desired. +* The input matrix VT is changed to P' * VT if desired. +* The input matrix C is changed to Q' * C if desired. +* +* See "Computing Small Singular Values of Bidiagonal Matrices With +* Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, +* LAPACK Working Note #3, for a detailed description of the algorithm. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* On entry, UPLO specifies whether the input bidiagonal matrix +* is upper or lower bidiagonal, and wether it is square are +* not. +* UPLO = 'U' or 'u' B is upper bidiagonal. +* UPLO = 'L' or 'l' B is lower bidiagonal. +* +* SQRE (input) INTEGER +* = 0: then the input matrix is N-by-N. +* = 1: then the input matrix is N-by-(N+1) if UPLU = 'U' and +* (N+1)-by-N if UPLU = 'L'. +* +* The bidiagonal matrix has +* N = NL + NR + 1 rows and +* M = N + SQRE >= N columns. +* +* N (input) INTEGER +* On entry, N specifies the number of rows and columns +* in the matrix. N must be at least 0. +* +* NCVT (input) INTEGER +* On entry, NCVT specifies the number of columns of +* the matrix VT. NCVT must be at least 0. +* +* NRU (input) INTEGER +* On entry, NRU specifies the number of rows of +* the matrix U. NRU must be at least 0. +* +* NCC (input) INTEGER +* On entry, NCC specifies the number of columns of +* the matrix C. NCC must be at least 0. +* +* D (input/output) REAL array, dimension (N) +* On entry, D contains the diagonal entries of the +* bidiagonal matrix whose SVD is desired. On normal exit, +* D contains the singular values in ascending order. +* +* E (input/output) REAL array. +* dimension is (N-1) if SQRE = 0 and N if SQRE = 1. +* On entry, the entries of E contain the offdiagonal entries +* of the bidiagonal matrix whose SVD is desired. On normal +* exit, E will contain 0. If the algorithm does not converge, +* D and E will contain the diagonal and superdiagonal entries +* of a bidiagonal matrix orthogonally equivalent to the one +* given as input. +* +* VT (input/output) REAL array, dimension (LDVT, NCVT) +* On entry, contains a matrix which on exit has been +* premultiplied by P', dimension N-by-NCVT if SQRE = 0 +* and (N+1)-by-NCVT if SQRE = 1 (not referenced if NCVT=0). +* +* LDVT (input) INTEGER +* On entry, LDVT specifies the leading dimension of VT as +* declared in the calling (sub) program. LDVT must be at +* least 1. If NCVT is nonzero LDVT must also be at least N. +* +* U (input/output) REAL array, dimension (LDU, N) +* On entry, contains a matrix which on exit has been +* postmultiplied by Q, dimension NRU-by-N if SQRE = 0 +* and NRU-by-(N+1) if SQRE = 1 (not referenced if NRU=0). +* +* LDU (input) INTEGER +* On entry, LDU specifies the leading dimension of U as +* declared in the calling (sub) program. LDU must be at +* least max( 1, NRU ) . +* +* C (input/output) REAL array, dimension (LDC, NCC) +* On entry, contains an N-by-NCC matrix which on exit +* has been premultiplied by Q' dimension N-by-NCC if SQRE = 0 +* and (N+1)-by-NCC if SQRE = 1 (not referenced if NCC=0). +* +* LDC (input) INTEGER +* On entry, LDC specifies the leading dimension of C as +* declared in the calling (sub) program. LDC must be at +* least 1. If NCC is nonzero, LDC must also be at least N. +* +* WORK (workspace) REAL array, dimension (4*N) +* Workspace. Only referenced if one of NCVT, NRU, or NCC is +* nonzero, and if N is at least 2. +* +* INFO (output) INTEGER +* On exit, a value of 0 indicates a successful exit. +* If INFO < 0, argument number -INFO is illegal. +* If INFO > 0, the algorithm did not converge, and INFO +* specifies how many superdiagonals did not converge. +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ROTATE + INTEGER I, ISUB, IUPLO, J, NP1, SQRE1 + REAL CS, R, SMIN, SN +* .. +* .. External Subroutines .. + EXTERNAL SBDSQR, SLARTG, SLASR, SSWAP, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IUPLO = 0 + IF( LSAME( UPLO, 'U' ) ) + $ IUPLO = 1 + IF( LSAME( UPLO, 'L' ) ) + $ IUPLO = 2 + IF( IUPLO.EQ.0 ) THEN + INFO = -1 + ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NCVT.LT.0 ) THEN + INFO = -4 + ELSE IF( NRU.LT.0 ) THEN + INFO = -5 + ELSE IF( NCC.LT.0 ) THEN + INFO = -6 + ELSE IF( ( NCVT.EQ.0 .AND. LDVT.LT.1 ) .OR. + $ ( NCVT.GT.0 .AND. LDVT.LT.MAX( 1, N ) ) ) THEN + INFO = -10 + ELSE IF( LDU.LT.MAX( 1, NRU ) ) THEN + INFO = -12 + ELSE IF( ( NCC.EQ.0 .AND. LDC.LT.1 ) .OR. + $ ( NCC.GT.0 .AND. LDC.LT.MAX( 1, N ) ) ) THEN + INFO = -14 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLASDQ', -INFO ) + RETURN + END IF + IF( N.EQ.0 ) + $ RETURN +* +* ROTATE is true if any singular vectors desired, false otherwise +* + ROTATE = ( NCVT.GT.0 ) .OR. ( NRU.GT.0 ) .OR. ( NCC.GT.0 ) + NP1 = N + 1 + SQRE1 = SQRE +* +* If matrix non-square upper bidiagonal, rotate to be lower +* bidiagonal. The rotations are on the right. +* + IF( ( IUPLO.EQ.1 ) .AND. ( SQRE1.EQ.1 ) ) THEN + DO 10 I = 1, N - 1 + CALL SLARTG( D( I ), E( I ), CS, SN, R ) + D( I ) = R + E( I ) = SN*D( I+1 ) + D( I+1 ) = CS*D( I+1 ) + IF( ROTATE ) THEN + WORK( I ) = CS + WORK( N+I ) = SN + END IF + 10 CONTINUE + CALL SLARTG( D( N ), E( N ), CS, SN, R ) + D( N ) = R + E( N ) = ZERO + IF( ROTATE ) THEN + WORK( N ) = CS + WORK( N+N ) = SN + END IF + IUPLO = 2 + SQRE1 = 0 +* +* Update singular vectors if desired. +* + IF( NCVT.GT.0 ) + $ CALL SLASR( 'L', 'V', 'F', NP1, NCVT, WORK( 1 ), + $ WORK( NP1 ), VT, LDVT ) + END IF +* +* If matrix lower bidiagonal, rotate to be upper bidiagonal +* by applying Givens rotations on the left. +* + IF( IUPLO.EQ.2 ) THEN + DO 20 I = 1, N - 1 + CALL SLARTG( D( I ), E( I ), CS, SN, R ) + D( I ) = R + E( I ) = SN*D( I+1 ) + D( I+1 ) = CS*D( I+1 ) + IF( ROTATE ) THEN + WORK( I ) = CS + WORK( N+I ) = SN + END IF + 20 CONTINUE +* +* If matrix (N+1)-by-N lower bidiagonal, one additional +* rotation is needed. +* + IF( SQRE1.EQ.1 ) THEN + CALL SLARTG( D( N ), E( N ), CS, SN, R ) + D( N ) = R + IF( ROTATE ) THEN + WORK( N ) = CS + WORK( N+N ) = SN + END IF + END IF +* +* Update singular vectors if desired. +* + IF( NRU.GT.0 ) THEN + IF( SQRE1.EQ.0 ) THEN + CALL SLASR( 'R', 'V', 'F', NRU, N, WORK( 1 ), + $ WORK( NP1 ), U, LDU ) + ELSE + CALL SLASR( 'R', 'V', 'F', NRU, NP1, WORK( 1 ), + $ WORK( NP1 ), U, LDU ) + END IF + END IF + IF( NCC.GT.0 ) THEN + IF( SQRE1.EQ.0 ) THEN + CALL SLASR( 'L', 'V', 'F', N, NCC, WORK( 1 ), + $ WORK( NP1 ), C, LDC ) + ELSE + CALL SLASR( 'L', 'V', 'F', NP1, NCC, WORK( 1 ), + $ WORK( NP1 ), C, LDC ) + END IF + END IF + END IF +* +* Call SBDSQR to compute the SVD of the reduced real +* N-by-N upper bidiagonal matrix. +* + CALL SBDSQR( 'U', N, NCVT, NRU, NCC, D, E, VT, LDVT, U, LDU, C, + $ LDC, WORK, INFO ) +* +* Sort the singular values into ascending order (insertion sort on +* singular values, but only one transposition per singular vector) +* + DO 40 I = 1, N +* +* Scan for smallest D(I). +* + ISUB = I + SMIN = D( I ) + DO 30 J = I + 1, N + IF( D( J ).LT.SMIN ) THEN + ISUB = J + SMIN = D( J ) + END IF + 30 CONTINUE + IF( ISUB.NE.I ) THEN +* +* Swap singular values and vectors. +* + D( ISUB ) = D( I ) + D( I ) = SMIN + IF( NCVT.GT.0 ) + $ CALL SSWAP( NCVT, VT( ISUB, 1 ), LDVT, VT( I, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL SSWAP( NRU, U( 1, ISUB ), 1, U( 1, I ), 1 ) + IF( NCC.GT.0 ) + $ CALL SSWAP( NCC, C( ISUB, 1 ), LDC, C( I, 1 ), LDC ) + END IF + 40 CONTINUE +* + RETURN +* +* End of SLASDQ +* + END diff --git a/costa/native/external/lapack/slasdt.f b/costa/native/external/lapack/slasdt.f new file mode 100644 index 000000000..116e1d0ac --- /dev/null +++ b/costa/native/external/lapack/slasdt.f @@ -0,0 +1,106 @@ + SUBROUTINE SLASDT( N, LVL, ND, INODE, NDIML, NDIMR, MSUB ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + INTEGER LVL, MSUB, N, ND +* .. +* .. Array Arguments .. + INTEGER INODE( * ), NDIML( * ), NDIMR( * ) +* .. +* +* Purpose +* ======= +* +* SLASDT creates a tree of subproblems for bidiagonal divide and +* conquer. +* +* Arguments +* ========= +* +* N (input) INTEGER +* On entry, the number of diagonal elements of the +* bidiagonal matrix. +* +* LVL (output) INTEGER +* On exit, the number of levels on the computation tree. +* +* ND (output) INTEGER +* On exit, the number of nodes on the tree. +* +* INODE (output) INTEGER array, dimension ( N ) +* On exit, centers of subproblems. +* +* NDIML (output) INTEGER array, dimension ( N ) +* On exit, row dimensions of left children. +* +* NDIMR (output) INTEGER array, dimension ( N ) +* On exit, row dimensions of right children. +* +* MSUB (input) INTEGER. +* On entry, the maximum row dimension each subproblem at the +* bottom of the tree can be of. +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IL, IR, LLST, MAXN, NCRNT, NLVL + REAL TEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, LOG, MAX, REAL +* .. +* .. Executable Statements .. +* +* Find the number of levels on the tree. +* + MAXN = MAX( 1, N ) + TEMP = LOG( REAL( MAXN ) / REAL( MSUB+1 ) ) / LOG( TWO ) + LVL = INT( TEMP ) + 1 +* + I = N / 2 + INODE( 1 ) = I + 1 + NDIML( 1 ) = I + NDIMR( 1 ) = N - I - 1 + IL = 0 + IR = 1 + LLST = 1 + DO 20 NLVL = 1, LVL - 1 +* +* Constructing the tree at (NLVL+1)-st level. The number of +* nodes created on this level is LLST * 2. +* + DO 10 I = 0, LLST - 1 + IL = IL + 2 + IR = IR + 2 + NCRNT = LLST + I + NDIML( IL ) = NDIML( NCRNT ) / 2 + NDIMR( IL ) = NDIML( NCRNT ) - NDIML( IL ) - 1 + INODE( IL ) = INODE( NCRNT ) - NDIMR( IL ) - 1 + NDIML( IR ) = NDIMR( NCRNT ) / 2 + NDIMR( IR ) = NDIMR( NCRNT ) - NDIML( IR ) - 1 + INODE( IR ) = INODE( NCRNT ) + NDIML( IR ) + 1 + 10 CONTINUE + LLST = LLST*2 + 20 CONTINUE + ND = LLST*2 - 1 +* + RETURN +* +* End of SLASDT +* + END diff --git a/costa/native/external/lapack/slaset.f b/costa/native/external/lapack/slaset.f new file mode 100644 index 000000000..af596ecf9 --- /dev/null +++ b/costa/native/external/lapack/slaset.f @@ -0,0 +1,115 @@ + SUBROUTINE SLASET( UPLO, M, N, ALPHA, BETA, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER LDA, M, N + REAL ALPHA, BETA +* .. +* .. Array Arguments .. + REAL A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* SLASET initializes an m-by-n matrix A to BETA on the diagonal and +* ALPHA on the offdiagonals. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies the part of the matrix A to be set. +* = 'U': Upper triangular part is set; the strictly lower +* triangular part of A is not changed. +* = 'L': Lower triangular part is set; the strictly upper +* triangular part of A is not changed. +* Otherwise: All of the matrix A is set. +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* ALPHA (input) REAL +* The constant to which the offdiagonal elements are to be set. +* +* BETA (input) REAL +* The constant to which the diagonal elements are to be set. +* +* A (input/output) REAL array, dimension (LDA,N) +* On exit, the leading m-by-n submatrix of A is set as follows: +* +* if UPLO = 'U', A(i,j) = ALPHA, 1<=i<=j-1, 1<=j<=n, +* if UPLO = 'L', A(i,j) = ALPHA, j+1<=i<=m, 1<=j<=n, +* otherwise, A(i,j) = ALPHA, 1<=i<=m, 1<=j<=n, i.ne.j, +* +* and, for all UPLO, A(i,i) = BETA, 1<=i<=min(m,n). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Set the strictly upper triangular or trapezoidal part of the +* array to ALPHA. +* + DO 20 J = 2, N + DO 10 I = 1, MIN( J-1, M ) + A( I, J ) = ALPHA + 10 CONTINUE + 20 CONTINUE +* + ELSE IF( LSAME( UPLO, 'L' ) ) THEN +* +* Set the strictly lower triangular or trapezoidal part of the +* array to ALPHA. +* + DO 40 J = 1, MIN( M, N ) + DO 30 I = J + 1, M + A( I, J ) = ALPHA + 30 CONTINUE + 40 CONTINUE +* + ELSE +* +* Set the leading m-by-n submatrix to ALPHA. +* + DO 60 J = 1, N + DO 50 I = 1, M + A( I, J ) = ALPHA + 50 CONTINUE + 60 CONTINUE + END IF +* +* Set the first min(M,N) diagonal elements to BETA. +* + DO 70 I = 1, MIN( M, N ) + A( I, I ) = BETA + 70 CONTINUE +* + RETURN +* +* End of SLASET +* + END diff --git a/costa/native/external/lapack/slasq1.f b/costa/native/external/lapack/slasq1.f new file mode 100644 index 000000000..4a119381d --- /dev/null +++ b/costa/native/external/lapack/slasq1.f @@ -0,0 +1,149 @@ + SUBROUTINE SLASQ1( N, D, E, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, N +* .. +* .. Array Arguments .. + REAL D( * ), E( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SLASQ1 computes the singular values of a real N-by-N bidiagonal +* matrix with diagonal D and off-diagonal E. The singular values +* are computed to high relative accuracy, in the absence of +* denormalization, underflow and overflow. The algorithm was first +* presented in +* +* "Accurate singular values and differential qd algorithms" by K. V. +* Fernando and B. N. Parlett, Numer. Math., Vol-67, No. 2, pp. 191-230, +* 1994, +* +* and the present implementation is described in "An implementation of +* the dqds Algorithm (Positive Case)", LAPACK Working Note. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of rows and columns in the matrix. N >= 0. +* +* D (input/output) REAL array, dimension (N) +* On entry, D contains the diagonal elements of the +* bidiagonal matrix whose SVD is desired. On normal exit, +* D contains the singular values in decreasing order. +* +* E (input/output) REAL array, dimension (N) +* On entry, elements E(1:N-1) contain the off-diagonal elements +* of the bidiagonal matrix whose SVD is desired. +* On exit, E is overwritten. +* +* WORK (workspace) REAL array, dimension (4*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: the algorithm failed +* = 1, a split was marked by a positive value in E +* = 2, current block of Z not diagonalized after 30*N +* iterations (in inner while loop) +* = 3, termination criterion of outer while loop not met +* (program created more than N unreduced blocks) +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) +* .. +* .. Local Scalars .. + INTEGER I, IINFO + REAL EPS, SCALE, SAFMIN, SIGMN, SIGMX +* .. +* .. External Subroutines .. + EXTERNAL SLAS2, SLASQ2, SLASRT, XERBLA +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -2 + CALL XERBLA( 'SLASQ1', -INFO ) + RETURN + ELSE IF( N.EQ.0 ) THEN + RETURN + ELSE IF( N.EQ.1 ) THEN + D( 1 ) = ABS( D( 1 ) ) + RETURN + ELSE IF( N.EQ.2 ) THEN + CALL SLAS2( D( 1 ), E( 1 ), D( 2 ), SIGMN, SIGMX ) + D( 1 ) = SIGMX + D( 2 ) = SIGMN + RETURN + END IF +* +* Estimate the largest singular value. +* + SIGMX = ZERO + DO 10 I = 1, N - 1 + D( I ) = ABS( D( I ) ) + SIGMX = MAX( SIGMX, ABS( E( I ) ) ) + 10 CONTINUE + D( N ) = ABS( D( N ) ) +* +* Early return if SIGMX is zero (matrix is already diagonal). +* + IF( SIGMX.EQ.ZERO ) THEN + CALL SLASRT( 'D', N, D, IINFO ) + RETURN + END IF +* + DO 20 I = 1, N + SIGMX = MAX( SIGMX, D( I ) ) + 20 CONTINUE +* +* Copy D and E into WORK (in the Z format) and scale (squaring the +* input data makes scaling by a power of the radix pointless). +* + EPS = SLAMCH( 'Precision' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SCALE = SQRT( EPS / SAFMIN ) + CALL SCOPY( N, D, 1, WORK( 1 ), 2 ) + CALL SCOPY( N-1, E, 1, WORK( 2 ), 2 ) + CALL SLASCL( 'G', 0, 0, SIGMX, SCALE, 2*N-1, 1, WORK, 2*N-1, + $ IINFO ) +* +* Compute the q's and e's. +* + DO 30 I = 1, 2*N - 1 + WORK( I ) = WORK( I )**2 + 30 CONTINUE + WORK( 2*N ) = ZERO +* + CALL SLASQ2( N, WORK, INFO ) +* + IF( INFO.EQ.0 ) THEN + DO 40 I = 1, N + D( I ) = SQRT( WORK( I ) ) + 40 CONTINUE + CALL SLASCL( 'G', 0, 0, SCALE, SIGMX, N, 1, D, N, IINFO ) + END IF +* + RETURN +* +* End of SLASQ1 +* + END diff --git a/costa/native/external/lapack/slasq2.f b/costa/native/external/lapack/slasq2.f new file mode 100644 index 000000000..560826cbf --- /dev/null +++ b/costa/native/external/lapack/slasq2.f @@ -0,0 +1,436 @@ + SUBROUTINE SLASQ2( N, Z, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, N +* .. +* .. Array Arguments .. + REAL Z( * ) +* .. +* +* Purpose +* ======= +* +* SLASQ2 computes all the eigenvalues of the symmetric positive +* definite tridiagonal matrix associated with the qd array Z to high +* relative accuracy are computed to high relative accuracy, in the +* absence of denormalization, underflow and overflow. +* +* To see the relation of Z to the tridiagonal matrix, let L be a +* unit lower bidiagonal matrix with subdiagonals Z(2,4,6,,..) and +* let U be an upper bidiagonal matrix with 1's above and diagonal +* Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the +* symmetric tridiagonal to which it is similar. +* +* Note : SLASQ2 defines a logical variable, IEEE, which is true +* on machines which follow ieee-754 floating-point standard in their +* handling of infinities and NaNs, and false otherwise. This variable +* is passed to SLASQ3. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of rows and columns in the matrix. N >= 0. +* +* Z (workspace) REAL array, dimension ( 4*N ) +* On entry Z holds the qd array. On exit, entries 1 to N hold +* the eigenvalues in decreasing order, Z( 2*N+1 ) holds the +* trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If +* N > 2, then Z( 2*N+3 ) holds the iteration count, Z( 2*N+4 ) +* holds NDIVS/NIN^2, and Z( 2*N+5 ) holds the percentage of +* shifts that failed. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if the i-th argument is a scalar and had an illegal +* value, then INFO = -i, if the i-th argument is an +* array and the j-entry had an illegal value, then +* INFO = -(i*100+j) +* > 0: the algorithm failed +* = 1, a split was marked by a positive value in E +* = 2, current block of Z not diagonalized after 30*N +* iterations (in inner while loop) +* = 3, termination criterion of outer while loop not met +* (program created more than N unreduced blocks) +* +* Further Details +* =============== +* Local Variables: I0:N0 defines a current unreduced segment of Z. +* The shifts are accumulated in SIGMA. Iteration count is in ITER. +* Ping-pong is controlled by PP (alternates between 0 and 1). +* +* ===================================================================== +* +* .. Parameters .. + REAL CBIAS + PARAMETER ( CBIAS = 1.50E0 ) + REAL ZERO, HALF, ONE, TWO, FOUR, HUNDRD + PARAMETER ( ZERO = 0.0E0, HALF = 0.5E0, ONE = 1.0E0, + $ TWO = 2.0E0, FOUR = 4.0E0, HUNDRD = 100.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL IEEE + INTEGER I0, I4, IINFO, IPN4, ITER, IWHILA, IWHILB, K, + $ N0, NBIG, NDIV, NFAIL, PP, SPLT + REAL D, DESIG, DMIN, E, EMAX, EMIN, EPS, OLDEMN, + $ QMAX, QMIN, S, SAFMIN, SIGMA, T, TEMP, TOL, + $ TOL2, TRACE, ZMAX +* .. +* .. External Subroutines .. + EXTERNAL SLASQ3, SLASRT, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + REAL SLAMCH + EXTERNAL ILAENV, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* (in case SLASQ2 is not called by SLASQ1) +* + INFO = 0 + EPS = SLAMCH( 'Precision' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + TOL = EPS*HUNDRD + TOL2 = TOL**2 +* + IF( N.LT.0 ) THEN + INFO = -1 + CALL XERBLA( 'SLASQ2', 1 ) + RETURN + ELSE IF( N.EQ.0 ) THEN + RETURN + ELSE IF( N.EQ.1 ) THEN +* +* 1-by-1 case. +* + IF( Z( 1 ).LT.ZERO ) THEN + INFO = -201 + CALL XERBLA( 'SLASQ2', 2 ) + END IF + RETURN + ELSE IF( N.EQ.2 ) THEN +* +* 2-by-2 case. +* + IF( Z( 2 ).LT.ZERO .OR. Z( 3 ).LT.ZERO ) THEN + INFO = -2 + CALL XERBLA( 'SLASQ2', 2 ) + RETURN + ELSE IF( Z( 3 ).GT.Z( 1 ) ) THEN + D = Z( 3 ) + Z( 3 ) = Z( 1 ) + Z( 1 ) = D + END IF + Z( 5 ) = Z( 1 ) + Z( 2 ) + Z( 3 ) + IF( Z( 2 ).GT.Z( 3 )*TOL2 ) THEN + T = HALF*( ( Z( 1 )-Z( 3 ) )+Z( 2 ) ) + S = Z( 3 )*( Z( 2 ) / T ) + IF( S.LE.T ) THEN + S = Z( 3 )*( Z( 2 ) / ( T*( ONE+SQRT( ONE+S / T ) ) ) ) + ELSE + S = Z( 3 )*( Z( 2 ) / ( T+SQRT( T )*SQRT( T+S ) ) ) + END IF + T = Z( 1 ) + ( S+Z( 2 ) ) + Z( 3 ) = Z( 3 )*( Z( 1 ) / T ) + Z( 1 ) = T + END IF + Z( 2 ) = Z( 3 ) + Z( 6 ) = Z( 2 ) + Z( 1 ) + RETURN + END IF +* +* Check for negative data and compute sums of q's and e's. +* + Z( 2*N ) = ZERO + EMIN = Z( 2 ) + QMAX = ZERO + ZMAX = ZERO + D = ZERO + E = ZERO +* + DO 10 K = 1, 2*( N-1 ), 2 + IF( Z( K ).LT.ZERO ) THEN + INFO = -( 200+K ) + CALL XERBLA( 'SLASQ2', 2 ) + RETURN + ELSE IF( Z( K+1 ).LT.ZERO ) THEN + INFO = -( 200+K+1 ) + CALL XERBLA( 'SLASQ2', 2 ) + RETURN + END IF + D = D + Z( K ) + E = E + Z( K+1 ) + QMAX = MAX( QMAX, Z( K ) ) + EMIN = MIN( EMIN, Z( K+1 ) ) + ZMAX = MAX( QMAX, ZMAX, Z( K+1 ) ) + 10 CONTINUE + IF( Z( 2*N-1 ).LT.ZERO ) THEN + INFO = -( 200+2*N-1 ) + CALL XERBLA( 'SLASQ2', 2 ) + RETURN + END IF + D = D + Z( 2*N-1 ) + QMAX = MAX( QMAX, Z( 2*N-1 ) ) + ZMAX = MAX( QMAX, ZMAX ) +* +* Check for diagonality. +* + IF( E.EQ.ZERO ) THEN + DO 20 K = 2, N + Z( K ) = Z( 2*K-1 ) + 20 CONTINUE + CALL SLASRT( 'D', N, Z, IINFO ) + Z( 2*N-1 ) = D + RETURN + END IF +* + TRACE = D + E +* +* Check for zero data. +* + IF( TRACE.EQ.ZERO ) THEN + Z( 2*N-1 ) = ZERO + RETURN + END IF +* +* Check whether the machine is IEEE conformable. +* + IEEE = ILAENV( 10, 'SLASQ2', 'N', 1, 2, 3, 4 ).EQ.1 .AND. + $ ILAENV( 11, 'SLASQ2', 'N', 1, 2, 3, 4 ).EQ.1 +* +* Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...). +* + DO 30 K = 2*N, 2, -2 + Z( 2*K ) = ZERO + Z( 2*K-1 ) = Z( K ) + Z( 2*K-2 ) = ZERO + Z( 2*K-3 ) = Z( K-1 ) + 30 CONTINUE +* + I0 = 1 + N0 = N +* +* Reverse the qd-array, if warranted. +* + IF( CBIAS*Z( 4*I0-3 ).LT.Z( 4*N0-3 ) ) THEN + IPN4 = 4*( I0+N0 ) + DO 40 I4 = 4*I0, 2*( I0+N0-1 ), 4 + TEMP = Z( I4-3 ) + Z( I4-3 ) = Z( IPN4-I4-3 ) + Z( IPN4-I4-3 ) = TEMP + TEMP = Z( I4-1 ) + Z( I4-1 ) = Z( IPN4-I4-5 ) + Z( IPN4-I4-5 ) = TEMP + 40 CONTINUE + END IF +* +* Initial split checking via dqd and Li's test. +* + PP = 0 +* + DO 80 K = 1, 2 +* + D = Z( 4*N0+PP-3 ) + DO 50 I4 = 4*( N0-1 ) + PP, 4*I0 + PP, -4 + IF( Z( I4-1 ).LE.TOL2*D ) THEN + Z( I4-1 ) = -ZERO + D = Z( I4-3 ) + ELSE + D = Z( I4-3 )*( D / ( D+Z( I4-1 ) ) ) + END IF + 50 CONTINUE +* +* dqd maps Z to ZZ plus Li's test. +* + EMIN = Z( 4*I0+PP+1 ) + D = Z( 4*I0+PP-3 ) + DO 60 I4 = 4*I0 + PP, 4*( N0-1 ) + PP, 4 + Z( I4-2*PP-2 ) = D + Z( I4-1 ) + IF( Z( I4-1 ).LE.TOL2*D ) THEN + Z( I4-1 ) = -ZERO + Z( I4-2*PP-2 ) = D + Z( I4-2*PP ) = ZERO + D = Z( I4+1 ) + ELSE IF( SAFMIN*Z( I4+1 ).LT.Z( I4-2*PP-2 ) .AND. + $ SAFMIN*Z( I4-2*PP-2 ).LT.Z( I4+1 ) ) THEN + TEMP = Z( I4+1 ) / Z( I4-2*PP-2 ) + Z( I4-2*PP ) = Z( I4-1 )*TEMP + D = D*TEMP + ELSE + Z( I4-2*PP ) = Z( I4+1 )*( Z( I4-1 ) / Z( I4-2*PP-2 ) ) + D = Z( I4+1 )*( D / Z( I4-2*PP-2 ) ) + END IF + EMIN = MIN( EMIN, Z( I4-2*PP ) ) + 60 CONTINUE + Z( 4*N0-PP-2 ) = D +* +* Now find qmax. +* + QMAX = Z( 4*I0-PP-2 ) + DO 70 I4 = 4*I0 - PP + 2, 4*N0 - PP - 2, 4 + QMAX = MAX( QMAX, Z( I4 ) ) + 70 CONTINUE +* +* Prepare for the next iteration on K. +* + PP = 1 - PP + 80 CONTINUE +* + ITER = 2 + NFAIL = 0 + NDIV = 2*( N0-I0 ) +* + DO 140 IWHILA = 1, N + 1 + IF( N0.LT.1 ) + $ GO TO 150 +* +* While array unfinished do +* +* E(N0) holds the value of SIGMA when submatrix in I0:N0 +* splits from the rest of the array, but is negated. +* + DESIG = ZERO + IF( N0.EQ.N ) THEN + SIGMA = ZERO + ELSE + SIGMA = -Z( 4*N0-1 ) + END IF + IF( SIGMA.LT.ZERO ) THEN + INFO = 1 + RETURN + END IF +* +* Find last unreduced submatrix's top index I0, find QMAX and +* EMIN. Find Gershgorin-type bound if Q's much greater than E's. +* + EMAX = ZERO + IF( N0.GT.I0 ) THEN + EMIN = ABS( Z( 4*N0-5 ) ) + ELSE + EMIN = ZERO + END IF + QMIN = Z( 4*N0-3 ) + QMAX = QMIN + DO 90 I4 = 4*N0, 8, -4 + IF( Z( I4-5 ).LE.ZERO ) + $ GO TO 100 + IF( QMIN.GE.FOUR*EMAX ) THEN + QMIN = MIN( QMIN, Z( I4-3 ) ) + EMAX = MAX( EMAX, Z( I4-5 ) ) + END IF + QMAX = MAX( QMAX, Z( I4-7 )+Z( I4-5 ) ) + EMIN = MIN( EMIN, Z( I4-5 ) ) + 90 CONTINUE + I4 = 4 +* + 100 CONTINUE + I0 = I4 / 4 +* +* Store EMIN for passing to SLASQ3. +* + Z( 4*N0-1 ) = EMIN +* +* Put -(initial shift) into DMIN. +* + DMIN = -MAX( ZERO, QMIN-TWO*SQRT( QMIN )*SQRT( EMAX ) ) +* +* Now I0:N0 is unreduced. PP = 0 for ping, PP = 1 for pong. +* + PP = 0 +* + NBIG = 30*( N0-I0+1 ) + DO 120 IWHILB = 1, NBIG + IF( I0.GT.N0 ) + $ GO TO 130 +* +* While submatrix unfinished take a good dqds step. +* + CALL SLASQ3( I0, N0, Z, PP, DMIN, SIGMA, DESIG, QMAX, NFAIL, + $ ITER, NDIV, IEEE ) +* + PP = 1 - PP +* +* When EMIN is very small check for splits. +* + IF( PP.EQ.0 .AND. N0-I0.GE.3 ) THEN + IF( Z( 4*N0 ).LE.TOL2*QMAX .OR. + $ Z( 4*N0-1 ).LE.TOL2*SIGMA ) THEN + SPLT = I0 - 1 + QMAX = Z( 4*I0-3 ) + EMIN = Z( 4*I0-1 ) + OLDEMN = Z( 4*I0 ) + DO 110 I4 = 4*I0, 4*( N0-3 ), 4 + IF( Z( I4 ).LE.TOL2*Z( I4-3 ) .OR. + $ Z( I4-1 ).LE.TOL2*SIGMA ) THEN + Z( I4-1 ) = -SIGMA + SPLT = I4 / 4 + QMAX = ZERO + EMIN = Z( I4+3 ) + OLDEMN = Z( I4+4 ) + ELSE + QMAX = MAX( QMAX, Z( I4+1 ) ) + EMIN = MIN( EMIN, Z( I4-1 ) ) + OLDEMN = MIN( OLDEMN, Z( I4 ) ) + END IF + 110 CONTINUE + Z( 4*N0-1 ) = EMIN + Z( 4*N0 ) = OLDEMN + I0 = SPLT + 1 + END IF + END IF +* + 120 CONTINUE +* + INFO = 2 + RETURN +* +* end IWHILB +* + 130 CONTINUE +* + 140 CONTINUE +* + INFO = 3 + RETURN +* +* end IWHILA +* + 150 CONTINUE +* +* Move q's to the front. +* + DO 160 K = 2, N + Z( K ) = Z( 4*K-3 ) + 160 CONTINUE +* +* Sort and compute sum of eigenvalues. +* + CALL SLASRT( 'D', N, Z, IINFO ) +* + E = ZERO + DO 170 K = N, 1, -1 + E = E + Z( K ) + 170 CONTINUE +* +* Store trace, sum(eigenvalues) and information on performance. +* + Z( 2*N+1 ) = TRACE + Z( 2*N+2 ) = E + Z( 2*N+3 ) = REAL( ITER ) + Z( 2*N+4 ) = REAL( NDIV ) / REAL( N**2 ) + Z( 2*N+5 ) = HUNDRD*NFAIL / REAL( ITER ) + RETURN +* +* End of SLASQ2 +* + END diff --git a/costa/native/external/lapack/slasq3.f b/costa/native/external/lapack/slasq3.f new file mode 100644 index 000000000..d7c495eb0 --- /dev/null +++ b/costa/native/external/lapack/slasq3.f @@ -0,0 +1,298 @@ + SUBROUTINE SLASQ3( I0, N0, Z, PP, DMIN, SIGMA, DESIG, QMAX, NFAIL, + $ ITER, NDIV, IEEE ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* May 17, 2000 +* +* .. Scalar Arguments .. + LOGICAL IEEE + INTEGER I0, ITER, N0, NDIV, NFAIL, PP + REAL DESIG, DMIN, QMAX, SIGMA +* .. +* .. Array Arguments .. + REAL Z( * ) +* .. +* +* Purpose +* ======= +* +* SLASQ3 checks for deflation, computes a shift (TAU) and calls dqds. +* In case of failure it changes shifts, and tries again until output +* is positive. +* +* Arguments +* ========= +* +* I0 (input) INTEGER +* First index. +* +* N0 (input) INTEGER +* Last index. +* +* Z (input) REAL array, dimension ( 4*N ) +* Z holds the qd array. +* +* PP (input) INTEGER +* PP=0 for ping, PP=1 for pong. +* +* DMIN (output) REAL +* Minimum value of d. +* +* SIGMA (output) REAL +* Sum of shifts used in current segment. +* +* DESIG (input/output) REAL +* Lower order part of SIGMA +* +* QMAX (input) REAL +* Maximum value of q. +* +* NFAIL (output) INTEGER +* Number of times shift was too big. +* +* ITER (output) INTEGER +* Number of iterations. +* +* NDIV (output) INTEGER +* Number of divisions. +* +* TTYPE (output) INTEGER +* Shift type. +* +* IEEE (input) LOGICAL +* Flag for IEEE or non IEEE arithmetic (passed to SLASQ5). +* +* ===================================================================== +* +* .. Parameters .. + REAL CBIAS + PARAMETER ( CBIAS = 1.50E0 ) + REAL ZERO, QURTR, HALF, ONE, TWO, HUNDRD + PARAMETER ( ZERO = 0.0E0, QURTR = 0.250E0, HALF = 0.5E0, + $ ONE = 1.0E0, TWO = 2.0E0, HUNDRD = 100.0E0 ) +* .. +* .. Local Scalars .. + INTEGER IPN4, J4, N0IN, NN, TTYPE + REAL DMIN1, DMIN2, DN, DN1, DN2, EPS, S, SAFMIN, T, + $ TAU, TEMP, TOL, TOL2 +* .. +* .. External Subroutines .. + EXTERNAL SLASQ4, SLASQ5, SLASQ6 +* .. +* .. External Function .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MIN, SQRT +* .. +* .. Save statement .. + SAVE TTYPE + SAVE DMIN1, DMIN2, DN, DN1, DN2, TAU +* .. +* .. Data statement .. + DATA TTYPE / 0 / + DATA DMIN1 / ZERO /, DMIN2 / ZERO /, DN / ZERO /, + $ DN1 / ZERO /, DN2 / ZERO /, TAU / ZERO / +* .. +* .. Executable Statements .. +* + N0IN = N0 + EPS = SLAMCH( 'Precision' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + TOL = EPS*HUNDRD + TOL2 = TOL**2 +* +* Check for deflation. +* + 10 CONTINUE +* + IF( N0.LT.I0 ) + $ RETURN + IF( N0.EQ.I0 ) + $ GO TO 20 + NN = 4*N0 + PP + IF( N0.EQ.( I0+1 ) ) + $ GO TO 40 +* +* Check whether E(N0-1) is negligible, 1 eigenvalue. +* + IF( Z( NN-5 ).GT.TOL2*( SIGMA+Z( NN-3 ) ) .AND. + $ Z( NN-2*PP-4 ).GT.TOL2*Z( NN-7 ) ) + $ GO TO 30 +* + 20 CONTINUE +* + Z( 4*N0-3 ) = Z( 4*N0+PP-3 ) + SIGMA + N0 = N0 - 1 + GO TO 10 +* +* Check whether E(N0-2) is negligible, 2 eigenvalues. +* + 30 CONTINUE +* + IF( Z( NN-9 ).GT.TOL2*SIGMA .AND. + $ Z( NN-2*PP-8 ).GT.TOL2*Z( NN-11 ) ) + $ GO TO 50 +* + 40 CONTINUE +* + IF( Z( NN-3 ).GT.Z( NN-7 ) ) THEN + S = Z( NN-3 ) + Z( NN-3 ) = Z( NN-7 ) + Z( NN-7 ) = S + END IF + IF( Z( NN-5 ).GT.Z( NN-3 )*TOL2 ) THEN + T = HALF*( ( Z( NN-7 )-Z( NN-3 ) )+Z( NN-5 ) ) + S = Z( NN-3 )*( Z( NN-5 ) / T ) + IF( S.LE.T ) THEN + S = Z( NN-3 )*( Z( NN-5 ) / + $ ( T*( ONE+SQRT( ONE+S / T ) ) ) ) + ELSE + S = Z( NN-3 )*( Z( NN-5 ) / ( T+SQRT( T )*SQRT( T+S ) ) ) + END IF + T = Z( NN-7 ) + ( S+Z( NN-5 ) ) + Z( NN-3 ) = Z( NN-3 )*( Z( NN-7 ) / T ) + Z( NN-7 ) = T + END IF + Z( 4*N0-7 ) = Z( NN-7 ) + SIGMA + Z( 4*N0-3 ) = Z( NN-3 ) + SIGMA + N0 = N0 - 2 + GO TO 10 +* + 50 CONTINUE +* +* Reverse the qd-array, if warranted. +* + IF( DMIN.LE.ZERO .OR. N0.LT.N0IN ) THEN + IF( CBIAS*Z( 4*I0+PP-3 ).LT.Z( 4*N0+PP-3 ) ) THEN + IPN4 = 4*( I0+N0 ) + DO 60 J4 = 4*I0, 2*( I0+N0-1 ), 4 + TEMP = Z( J4-3 ) + Z( J4-3 ) = Z( IPN4-J4-3 ) + Z( IPN4-J4-3 ) = TEMP + TEMP = Z( J4-2 ) + Z( J4-2 ) = Z( IPN4-J4-2 ) + Z( IPN4-J4-2 ) = TEMP + TEMP = Z( J4-1 ) + Z( J4-1 ) = Z( IPN4-J4-5 ) + Z( IPN4-J4-5 ) = TEMP + TEMP = Z( J4 ) + Z( J4 ) = Z( IPN4-J4-4 ) + Z( IPN4-J4-4 ) = TEMP + 60 CONTINUE + IF( N0-I0.LE.4 ) THEN + Z( 4*N0+PP-1 ) = Z( 4*I0+PP-1 ) + Z( 4*N0-PP ) = Z( 4*I0-PP ) + END IF + DMIN2 = MIN( DMIN2, Z( 4*N0+PP-1 ) ) + Z( 4*N0+PP-1 ) = MIN( Z( 4*N0+PP-1 ), Z( 4*I0+PP-1 ), + $ Z( 4*I0+PP+3 ) ) + Z( 4*N0-PP ) = MIN( Z( 4*N0-PP ), Z( 4*I0-PP ), + $ Z( 4*I0-PP+4 ) ) + QMAX = MAX( QMAX, Z( 4*I0+PP-3 ), Z( 4*I0+PP+1 ) ) + DMIN = -ZERO + END IF + END IF +* + 70 CONTINUE +* + IF( DMIN.LT.ZERO .OR. SAFMIN*QMAX.LT.MIN( Z( 4*N0+PP-1 ), + $ Z( 4*N0+PP-9 ), DMIN2+Z( 4*N0-PP ) ) ) THEN +* +* Choose a shift. +* + CALL SLASQ4( I0, N0, Z, PP, N0IN, DMIN, DMIN1, DMIN2, DN, DN1, + $ DN2, TAU, TTYPE ) +* +* Call dqds until DMIN > 0. +* + 80 CONTINUE +* + CALL SLASQ5( I0, N0, Z, PP, TAU, DMIN, DMIN1, DMIN2, DN, + $ DN1, DN2, IEEE ) +* + NDIV = NDIV + ( N0-I0+2 ) + ITER = ITER + 1 +* +* Check status. +* + IF( DMIN.GE.ZERO .AND. DMIN1.GT.ZERO ) THEN +* +* Success. +* + GO TO 100 +* + ELSE IF( DMIN.LT.ZERO .AND. DMIN1.GT.ZERO .AND. + $ Z( 4*( N0-1 )-PP ).LT.TOL*( SIGMA+DN1 ) .AND. + $ ABS( DN ).LT.TOL*SIGMA ) THEN +* +* Convergence hidden by negative DN. +* + Z( 4*( N0-1 )-PP+2 ) = ZERO + DMIN = ZERO + GO TO 100 + ELSE IF( DMIN.LT.ZERO ) THEN +* +* TAU too big. Select new TAU and try again. +* + NFAIL = NFAIL + 1 + IF( TTYPE.LT.-22 ) THEN +* +* Failed twice. Play it safe. +* + TAU = ZERO + ELSE IF( DMIN1.GT.ZERO ) THEN +* +* Late failure. Gives excellent shift. +* + TAU = ( TAU+DMIN )*( ONE-TWO*EPS ) + TTYPE = TTYPE - 11 + ELSE +* +* Early failure. Divide by 4. +* + TAU = QURTR*TAU + TTYPE = TTYPE - 12 + END IF + GO TO 80 + ELSE IF( DMIN.NE.DMIN ) THEN +* +* NaN. +* + TAU = ZERO + GO TO 80 + ELSE +* +* Possible underflow. Play it safe. +* + GO TO 90 + END IF + END IF +* +* Risk of underflow. +* + 90 CONTINUE + CALL SLASQ6( I0, N0, Z, PP, DMIN, DMIN1, DMIN2, DN, DN1, DN2 ) + NDIV = NDIV + ( N0-I0+2 ) + ITER = ITER + 1 + TAU = ZERO +* + 100 CONTINUE + IF( TAU.LT.SIGMA ) THEN + DESIG = DESIG + TAU + T = SIGMA + DESIG + DESIG = DESIG - ( T-SIGMA ) + ELSE + T = SIGMA + TAU + DESIG = SIGMA - ( T-TAU ) + DESIG + END IF + SIGMA = T +* + RETURN +* +* End of SLASQ3 +* + END diff --git a/costa/native/external/lapack/slasq4.f b/costa/native/external/lapack/slasq4.f new file mode 100644 index 000000000..2e7d3ece8 --- /dev/null +++ b/costa/native/external/lapack/slasq4.f @@ -0,0 +1,330 @@ + SUBROUTINE SLASQ4( I0, N0, Z, PP, N0IN, DMIN, DMIN1, DMIN2, DN, + $ DN1, DN2, TAU, TTYPE ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + INTEGER I0, N0, N0IN, PP, TTYPE + REAL DMIN, DMIN1, DMIN2, DN, DN1, DN2, TAU +* .. +* .. Array Arguments .. + REAL Z( * ) +* .. +* +* Purpose +* ======= +* +* SLASQ4 computes an approximation TAU to the smallest eigenvalue +* using values of d from the previous transform. +* +* I0 (input) INTEGER +* First index. +* +* N0 (input) INTEGER +* Last index. +* +* Z (input) REAL array, dimension ( 4*N ) +* Z holds the qd array. +* +* PP (input) INTEGER +* PP=0 for ping, PP=1 for pong. +* +* NOIN (input) INTEGER +* The value of N0 at start of EIGTEST. +* +* DMIN (input) REAL +* Minimum value of d. +* +* DMIN1 (input) REAL +* Minimum value of d, excluding D( N0 ). +* +* DMIN2 (input) REAL +* Minimum value of d, excluding D( N0 ) and D( N0-1 ). +* +* DN (input) REAL +* d(N) +* +* DN1 (input) REAL +* d(N-1) +* +* DN2 (input) REAL +* d(N-2) +* +* TAU (output) REAL +* This is the shift. +* +* TTYPE (output) INTEGER +* Shift type. +* +* Further Details +* =============== +* CNST1 = 9/16 +* +* ===================================================================== +* +* .. Parameters .. + REAL CNST1, CNST2, CNST3 + PARAMETER ( CNST1 = 0.5630E0, CNST2 = 1.010E0, + $ CNST3 = 1.050E0 ) + REAL QURTR, THIRD, HALF, ZERO, ONE, TWO, HUNDRD + PARAMETER ( QURTR = 0.250E0, THIRD = 0.3330E0, + $ HALF = 0.50E0, ZERO = 0.0E0, ONE = 1.0E0, + $ TWO = 2.0E0, HUNDRD = 100.0E0 ) +* .. +* .. Local Scalars .. + INTEGER I4, NN, NP + REAL A2, B1, B2, G, GAM, GAP1, GAP2, S +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Save statement .. + SAVE G +* .. +* .. Data statement .. + DATA G / ZERO / +* .. +* .. Executable Statements .. +* +* A negative DMIN forces the shift to take that absolute value +* TTYPE records the type of shift. +* + IF( DMIN.LE.ZERO ) THEN + TAU = -DMIN + TTYPE = -1 + RETURN + END IF +* + NN = 4*N0 + PP + IF( N0IN.EQ.N0 ) THEN +* +* No eigenvalues deflated. +* + IF( DMIN.EQ.DN .OR. DMIN.EQ.DN1 ) THEN +* + B1 = SQRT( Z( NN-3 ) )*SQRT( Z( NN-5 ) ) + B2 = SQRT( Z( NN-7 ) )*SQRT( Z( NN-9 ) ) + A2 = Z( NN-7 ) + Z( NN-5 ) +* +* Cases 2 and 3. +* + IF( DMIN.EQ.DN .AND. DMIN1.EQ.DN1 ) THEN + GAP2 = DMIN2 - A2 - DMIN2*QURTR + IF( GAP2.GT.ZERO .AND. GAP2.GT.B2 ) THEN + GAP1 = A2 - DN - ( B2 / GAP2 )*B2 + ELSE + GAP1 = A2 - DN - ( B1+B2 ) + END IF + IF( GAP1.GT.ZERO .AND. GAP1.GT.B1 ) THEN + S = MAX( DN-( B1 / GAP1 )*B1, HALF*DMIN ) + TTYPE = -2 + ELSE + S = ZERO + IF( DN.GT.B1 ) + $ S = DN - B1 + IF( A2.GT.( B1+B2 ) ) + $ S = MIN( S, A2-( B1+B2 ) ) + S = MAX( S, THIRD*DMIN ) + TTYPE = -3 + END IF + ELSE +* +* Case 4. +* + TTYPE = -4 + S = QURTR*DMIN + IF( DMIN.EQ.DN ) THEN + GAM = DN + A2 = ZERO + IF( Z( NN-5 ) .GT. Z( NN-7 ) ) + $ RETURN + B2 = Z( NN-5 ) / Z( NN-7 ) + NP = NN - 9 + ELSE + NP = NN - 2*PP + B2 = Z( NP-2 ) + GAM = DN1 + IF( Z( NP-4 ) .GT. Z( NP-2 ) ) + $ RETURN + A2 = Z( NP-4 ) / Z( NP-2 ) + IF( Z( NN-9 ) .GT. Z( NN-11 ) ) + $ RETURN + B2 = Z( NN-9 ) / Z( NN-11 ) + NP = NN - 13 + END IF +* +* Approximate contribution to norm squared from I < NN-1. +* + A2 = A2 + B2 + DO 10 I4 = NP, 4*I0 - 1 + PP, -4 + IF( B2.EQ.ZERO ) + $ GO TO 20 + B1 = B2 + IF( Z( I4 ) .GT. Z( I4-2 ) ) + $ RETURN + B2 = B2*( Z( I4 ) / Z( I4-2 ) ) + A2 = A2 + B2 + IF( HUNDRD*MAX( B2, B1 ).LT.A2 .OR. CNST1.LT.A2 ) + $ GO TO 20 + 10 CONTINUE + 20 CONTINUE + A2 = CNST3*A2 +* +* Rayleigh quotient residual bound. +* + IF( A2.LT.CNST1 ) + $ S = GAM*( ONE-SQRT( A2 ) ) / ( ONE+A2 ) + END IF + ELSE IF( DMIN.EQ.DN2 ) THEN +* +* Case 5. +* + TTYPE = -5 + S = QURTR*DMIN +* +* Compute contribution to norm squared from I > NN-2. +* + NP = NN - 2*PP + B1 = Z( NP-2 ) + B2 = Z( NP-6 ) + GAM = DN2 + IF( Z( NP-8 ).GT.B2 .OR. Z( NP-4 ).GT.B1 ) + $ RETURN + A2 = ( Z( NP-8 ) / B2 )*( ONE+Z( NP-4 ) / B1 ) +* +* Approximate contribution to norm squared from I < NN-2. +* + IF( N0-I0.GT.2 ) THEN + B2 = Z( NN-13 ) / Z( NN-15 ) + A2 = A2 + B2 + DO 30 I4 = NN - 17, 4*I0 - 1 + PP, -4 + IF( B2.EQ.ZERO ) + $ GO TO 40 + B1 = B2 + IF( Z( I4 ) .GT. Z( I4-2 ) ) + $ RETURN + B2 = B2*( Z( I4 ) / Z( I4-2 ) ) + A2 = A2 + B2 + IF( HUNDRD*MAX( B2, B1 ).LT.A2 .OR. CNST1.LT.A2 ) + $ GO TO 40 + 30 CONTINUE + 40 CONTINUE + A2 = CNST3*A2 + END IF +* + IF( A2.LT.CNST1 ) + $ S = GAM*( ONE-SQRT( A2 ) ) / ( ONE+A2 ) + ELSE +* +* Case 6, no information to guide us. +* + IF( TTYPE.EQ.-6 ) THEN + G = G + THIRD*( ONE-G ) + ELSE IF( TTYPE.EQ.-18 ) THEN + G = QURTR*THIRD + ELSE + G = QURTR + END IF + S = G*DMIN + TTYPE = -6 + END IF +* + ELSE IF( N0IN.EQ.( N0+1 ) ) THEN +* +* One eigenvalue just deflated. Use DMIN1, DN1 for DMIN and DN. +* + IF( DMIN1.EQ.DN1 .AND. DMIN2.EQ.DN2 ) THEN +* +* Cases 7 and 8. +* + TTYPE = -7 + S = THIRD*DMIN1 + IF( Z( NN-5 ).GT.Z( NN-7 ) ) + $ RETURN + B1 = Z( NN-5 ) / Z( NN-7 ) + B2 = B1 + IF( B2.EQ.ZERO ) + $ GO TO 60 + DO 50 I4 = 4*N0 - 9 + PP, 4*I0 - 1 + PP, -4 + A2 = B1 + IF( Z( I4 ).GT.Z( I4-2 ) ) + $ RETURN + B1 = B1*( Z( I4 ) / Z( I4-2 ) ) + B2 = B2 + B1 + IF( HUNDRD*MAX( B1, A2 ).LT.B2 ) + $ GO TO 60 + 50 CONTINUE + 60 CONTINUE + B2 = SQRT( CNST3*B2 ) + A2 = DMIN1 / ( ONE+B2**2 ) + GAP2 = HALF*DMIN2 - A2 + IF( GAP2.GT.ZERO .AND. GAP2.GT.B2*A2 ) THEN + S = MAX( S, A2*( ONE-CNST2*A2*( B2 / GAP2 )*B2 ) ) + ELSE + S = MAX( S, A2*( ONE-CNST2*B2 ) ) + TTYPE = -8 + END IF + ELSE +* +* Case 9. +* + S = QURTR*DMIN1 + IF( DMIN1.EQ.DN1 ) + $ S = HALF*DMIN1 + TTYPE = -9 + END IF +* + ELSE IF( N0IN.EQ.( N0+2 ) ) THEN +* +* Two eigenvalues deflated. Use DMIN2, DN2 for DMIN and DN. +* +* Cases 10 and 11. +* + IF( DMIN2.EQ.DN2 .AND. TWO*Z( NN-5 ).LT.Z( NN-7 ) ) THEN + TTYPE = -10 + S = THIRD*DMIN2 + IF( Z( NN-5 ).GT.Z( NN-7 ) ) + $ RETURN + B1 = Z( NN-5 ) / Z( NN-7 ) + B2 = B1 + IF( B2.EQ.ZERO ) + $ GO TO 80 + DO 70 I4 = 4*N0 - 9 + PP, 4*I0 - 1 + PP, -4 + IF( Z( I4 ).GT.Z( I4-2 ) ) + $ RETURN + B1 = B1*( Z( I4 ) / Z( I4-2 ) ) + B2 = B2 + B1 + IF( HUNDRD*B1.LT.B2 ) + $ GO TO 80 + 70 CONTINUE + 80 CONTINUE + B2 = SQRT( CNST3*B2 ) + A2 = DMIN2 / ( ONE+B2**2 ) + GAP2 = Z( NN-7 ) + Z( NN-9 ) - + $ SQRT( Z( NN-11 ) )*SQRT( Z( NN-9 ) ) - A2 + IF( GAP2.GT.ZERO .AND. GAP2.GT.B2*A2 ) THEN + S = MAX( S, A2*( ONE-CNST2*A2*( B2 / GAP2 )*B2 ) ) + ELSE + S = MAX( S, A2*( ONE-CNST2*B2 ) ) + END IF + ELSE + S = QURTR*DMIN2 + TTYPE = -11 + END IF + ELSE IF( N0IN.GT.( N0+2 ) ) THEN +* +* Case 12, more than two eigenvalues deflated. No information. +* + S = ZERO + TTYPE = -12 + END IF +* + TAU = S + RETURN +* +* End of SLASQ4 +* + END diff --git a/costa/native/external/lapack/slasq5.f b/costa/native/external/lapack/slasq5.f new file mode 100644 index 000000000..7e5e00719 --- /dev/null +++ b/costa/native/external/lapack/slasq5.f @@ -0,0 +1,196 @@ + SUBROUTINE SLASQ5( I0, N0, Z, PP, TAU, DMIN, DMIN1, DMIN2, DN, + $ DNM1, DNM2, IEEE ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* May 17, 2000 +* +* .. Scalar Arguments .. + LOGICAL IEEE + INTEGER I0, N0, PP + REAL DMIN, DMIN1, DMIN2, DN, DNM1, DNM2, TAU +* .. +* .. Array Arguments .. + REAL Z( * ) +* .. +* +* Purpose +* ======= +* +* SLASQ5 computes one dqds transform in ping-pong form, one +* version for IEEE machines another for non IEEE machines. +* +* Arguments +* ========= +* +* I0 (input) INTEGER +* First index. +* +* N0 (input) INTEGER +* Last index. +* +* Z (input) REAL array, dimension ( 4*N ) +* Z holds the qd array. EMIN is stored in Z(4*N0) to avoid +* an extra argument. +* +* PP (input) INTEGER +* PP=0 for ping, PP=1 for pong. +* +* TAU (input) REAL +* This is the shift. +* +* DMIN (output) REAL +* Minimum value of d. +* +* DMIN1 (output) REAL +* Minimum value of d, excluding D( N0 ). +* +* DMIN2 (output) REAL +* Minimum value of d, excluding D( N0 ) and D( N0-1 ). +* +* DN (output) REAL +* d(N0), the last value of d. +* +* DNM1 (output) REAL +* d(N0-1). +* +* DNM2 (output) REAL +* d(N0-2). +* +* IEEE (input) LOGICAL +* Flag for IEEE or non IEEE arithmetic. +* +* ===================================================================== +* +* .. Parameter .. + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) +* .. +* .. Local Scalars .. + INTEGER J4, J4P2 + REAL D, EMIN, TEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( ( N0-I0-1 ).LE.0 ) + $ RETURN +* + J4 = 4*I0 + PP - 3 + EMIN = Z( J4+4 ) + D = Z( J4 ) - TAU + DMIN = D + DMIN1 = -Z( J4 ) +* + IF( IEEE ) THEN +* +* Code for IEEE arithmetic. +* + IF( PP.EQ.0 ) THEN + DO 10 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-2 ) = D + Z( J4-1 ) + TEMP = Z( J4+1 ) / Z( J4-2 ) + D = D*TEMP - TAU + DMIN = MIN( DMIN, D ) + Z( J4 ) = Z( J4-1 )*TEMP + EMIN = MIN( Z( J4 ), EMIN ) + 10 CONTINUE + ELSE + DO 20 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-3 ) = D + Z( J4 ) + TEMP = Z( J4+2 ) / Z( J4-3 ) + D = D*TEMP - TAU + DMIN = MIN( DMIN, D ) + Z( J4-1 ) = Z( J4 )*TEMP + EMIN = MIN( Z( J4-1 ), EMIN ) + 20 CONTINUE + END IF +* +* Unroll last two steps. +* + DNM2 = D + DMIN2 = DMIN + J4 = 4*( N0-2 ) - PP + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM2 + Z( J4P2 ) + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DNM1 = Z( J4P2+2 )*( DNM2 / Z( J4-2 ) ) - TAU + DMIN = MIN( DMIN, DNM1 ) +* + DMIN1 = DMIN + J4 = J4 + 4 + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM1 + Z( J4P2 ) + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DN = Z( J4P2+2 )*( DNM1 / Z( J4-2 ) ) - TAU + DMIN = MIN( DMIN, DN ) +* + ELSE +* +* Code for non IEEE arithmetic. +* + IF( PP.EQ.0 ) THEN + DO 30 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-2 ) = D + Z( J4-1 ) + IF( D.LT.ZERO ) THEN + RETURN + ELSE + Z( J4 ) = Z( J4+1 )*( Z( J4-1 ) / Z( J4-2 ) ) + D = Z( J4+1 )*( D / Z( J4-2 ) ) - TAU + END IF + DMIN = MIN( DMIN, D ) + EMIN = MIN( EMIN, Z( J4 ) ) + 30 CONTINUE + ELSE + DO 40 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-3 ) = D + Z( J4 ) + IF( D.LT.ZERO ) THEN + RETURN + ELSE + Z( J4-1 ) = Z( J4+2 )*( Z( J4 ) / Z( J4-3 ) ) + D = Z( J4+2 )*( D / Z( J4-3 ) ) - TAU + END IF + DMIN = MIN( DMIN, D ) + EMIN = MIN( EMIN, Z( J4-1 ) ) + 40 CONTINUE + END IF +* +* Unroll last two steps. +* + DNM2 = D + DMIN2 = DMIN + J4 = 4*( N0-2 ) - PP + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM2 + Z( J4P2 ) + IF( DNM2.LT.ZERO ) THEN + RETURN + ELSE + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DNM1 = Z( J4P2+2 )*( DNM2 / Z( J4-2 ) ) - TAU + END IF + DMIN = MIN( DMIN, DNM1 ) +* + DMIN1 = DMIN + J4 = J4 + 4 + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM1 + Z( J4P2 ) + IF( DNM1.LT.ZERO ) THEN + RETURN + ELSE + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DN = Z( J4P2+2 )*( DNM1 / Z( J4-2 ) ) - TAU + END IF + DMIN = MIN( DMIN, DN ) +* + END IF +* + Z( J4+2 ) = DN + Z( 4*N0-PP ) = EMIN + RETURN +* +* End of SLASQ5 +* + END diff --git a/costa/native/external/lapack/slasq6.f b/costa/native/external/lapack/slasq6.f new file mode 100644 index 000000000..1543a3c79 --- /dev/null +++ b/costa/native/external/lapack/slasq6.f @@ -0,0 +1,176 @@ + SUBROUTINE SLASQ6( I0, N0, Z, PP, DMIN, DMIN1, DMIN2, DN, + $ DNM1, DNM2 ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + INTEGER I0, N0, PP + REAL DMIN, DMIN1, DMIN2, DN, DNM1, DNM2 +* .. +* .. Array Arguments .. + REAL Z( * ) +* .. +* +* Purpose +* ======= +* +* SLASQ6 computes one dqd (shift equal to zero) transform in +* ping-pong form, with protection against underflow and overflow. +* +* Arguments +* ========= +* +* I0 (input) INTEGER +* First index. +* +* N0 (input) INTEGER +* Last index. +* +* Z (input) REAL array, dimension ( 4*N ) +* Z holds the qd array. EMIN is stored in Z(4*N0) to avoid +* an extra argument. +* +* PP (input) INTEGER +* PP=0 for ping, PP=1 for pong. +* +* DMIN (output) REAL +* Minimum value of d. +* +* DMIN1 (output) REAL +* Minimum value of d, excluding D( N0 ). +* +* DMIN2 (output) REAL +* Minimum value of d, excluding D( N0 ) and D( N0-1 ). +* +* DN (output) REAL +* d(N0), the last value of d. +* +* DNM1 (output) REAL +* d(N0-1). +* +* DNM2 (output) REAL +* d(N0-2). +* +* ===================================================================== +* +* .. Parameter .. + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) +* .. +* .. Local Scalars .. + INTEGER J4, J4P2 + REAL D, EMIN, SAFMIN, TEMP +* .. +* .. External Function .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( ( N0-I0-1 ).LE.0 ) + $ RETURN +* + SAFMIN = SLAMCH( 'Safe minimum' ) + J4 = 4*I0 + PP - 3 + EMIN = Z( J4+4 ) + D = Z( J4 ) + DMIN = D +* + IF( PP.EQ.0 ) THEN + DO 10 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-2 ) = D + Z( J4-1 ) + IF( Z( J4-2 ).EQ.ZERO ) THEN + Z( J4 ) = ZERO + D = Z( J4+1 ) + DMIN = D + EMIN = ZERO + ELSE IF( SAFMIN*Z( J4+1 ).LT.Z( J4-2 ) .AND. + $ SAFMIN*Z( J4-2 ).LT.Z( J4+1 ) ) THEN + TEMP = Z( J4+1 ) / Z( J4-2 ) + Z( J4 ) = Z( J4-1 )*TEMP + D = D*TEMP + ELSE + Z( J4 ) = Z( J4+1 )*( Z( J4-1 ) / Z( J4-2 ) ) + D = Z( J4+1 )*( D / Z( J4-2 ) ) + END IF + DMIN = MIN( DMIN, D ) + EMIN = MIN( EMIN, Z( J4 ) ) + 10 CONTINUE + ELSE + DO 20 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-3 ) = D + Z( J4 ) + IF( Z( J4-3 ).EQ.ZERO ) THEN + Z( J4-1 ) = ZERO + D = Z( J4+2 ) + DMIN = D + EMIN = ZERO + ELSE IF( SAFMIN*Z( J4+2 ).LT.Z( J4-3 ) .AND. + $ SAFMIN*Z( J4-3 ).LT.Z( J4+2 ) ) THEN + TEMP = Z( J4+2 ) / Z( J4-3 ) + Z( J4-1 ) = Z( J4 )*TEMP + D = D*TEMP + ELSE + Z( J4-1 ) = Z( J4+2 )*( Z( J4 ) / Z( J4-3 ) ) + D = Z( J4+2 )*( D / Z( J4-3 ) ) + END IF + DMIN = MIN( DMIN, D ) + EMIN = MIN( EMIN, Z( J4-1 ) ) + 20 CONTINUE + END IF +* +* Unroll last two steps. +* + DNM2 = D + DMIN2 = DMIN + J4 = 4*( N0-2 ) - PP + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM2 + Z( J4P2 ) + IF( Z( J4-2 ).EQ.ZERO ) THEN + Z( J4 ) = ZERO + DNM1 = Z( J4P2+2 ) + DMIN = DNM1 + EMIN = ZERO + ELSE IF( SAFMIN*Z( J4P2+2 ).LT.Z( J4-2 ) .AND. + $ SAFMIN*Z( J4-2 ).LT.Z( J4P2+2 ) ) THEN + TEMP = Z( J4P2+2 ) / Z( J4-2 ) + Z( J4 ) = Z( J4P2 )*TEMP + DNM1 = DNM2*TEMP + ELSE + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DNM1 = Z( J4P2+2 )*( DNM2 / Z( J4-2 ) ) + END IF + DMIN = MIN( DMIN, DNM1 ) +* + DMIN1 = DMIN + J4 = J4 + 4 + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM1 + Z( J4P2 ) + IF( Z( J4-2 ).EQ.ZERO ) THEN + Z( J4 ) = ZERO + DN = Z( J4P2+2 ) + DMIN = DN + EMIN = ZERO + ELSE IF( SAFMIN*Z( J4P2+2 ).LT.Z( J4-2 ) .AND. + $ SAFMIN*Z( J4-2 ).LT.Z( J4P2+2 ) ) THEN + TEMP = Z( J4P2+2 ) / Z( J4-2 ) + Z( J4 ) = Z( J4P2 )*TEMP + DN = DNM1*TEMP + ELSE + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DN = Z( J4P2+2 )*( DNM1 / Z( J4-2 ) ) + END IF + DMIN = MIN( DMIN, DN ) +* + Z( J4+2 ) = DN + Z( 4*N0-PP ) = EMIN + RETURN +* +* End of SLASQ6 +* + END diff --git a/costa/native/external/lapack/slasr.f b/costa/native/external/lapack/slasr.f new file mode 100644 index 000000000..5f3d6f340 --- /dev/null +++ b/costa/native/external/lapack/slasr.f @@ -0,0 +1,325 @@ + SUBROUTINE SLASR( SIDE, PIVOT, DIRECT, M, N, C, S, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, PIVOT, SIDE + INTEGER LDA, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( * ), S( * ) +* .. +* +* Purpose +* ======= +* +* SLASR performs the transformation +* +* A := P*A, when SIDE = 'L' or 'l' ( Left-hand side ) +* +* A := A*P', when SIDE = 'R' or 'r' ( Right-hand side ) +* +* where A is an m by n real matrix and P is an orthogonal matrix, +* consisting of a sequence of plane rotations determined by the +* parameters PIVOT and DIRECT as follows ( z = m when SIDE = 'L' or 'l' +* and z = n when SIDE = 'R' or 'r' ): +* +* When DIRECT = 'F' or 'f' ( Forward sequence ) then +* +* P = P( z - 1 )*...*P( 2 )*P( 1 ), +* +* and when DIRECT = 'B' or 'b' ( Backward sequence ) then +* +* P = P( 1 )*P( 2 )*...*P( z - 1 ), +* +* where P( k ) is a plane rotation matrix for the following planes: +* +* when PIVOT = 'V' or 'v' ( Variable pivot ), +* the plane ( k, k + 1 ) +* +* when PIVOT = 'T' or 't' ( Top pivot ), +* the plane ( 1, k + 1 ) +* +* when PIVOT = 'B' or 'b' ( Bottom pivot ), +* the plane ( k, z ) +* +* c( k ) and s( k ) must contain the cosine and sine that define the +* matrix P( k ). The two by two plane rotation part of the matrix +* P( k ), R( k ), is assumed to be of the form +* +* R( k ) = ( c( k ) s( k ) ). +* ( -s( k ) c( k ) ) +* +* This version vectorises across rows of the array A when SIDE = 'L'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* Specifies whether the plane rotation matrix P is applied to +* A on the left or the right. +* = 'L': Left, compute A := P*A +* = 'R': Right, compute A:= A*P' +* +* DIRECT (input) CHARACTER*1 +* Specifies whether P is a forward or backward sequence of +* plane rotations. +* = 'F': Forward, P = P( z - 1 )*...*P( 2 )*P( 1 ) +* = 'B': Backward, P = P( 1 )*P( 2 )*...*P( z - 1 ) +* +* PIVOT (input) CHARACTER*1 +* Specifies the plane for which P(k) is a plane rotation +* matrix. +* = 'V': Variable pivot, the plane (k,k+1) +* = 'T': Top pivot, the plane (1,k+1) +* = 'B': Bottom pivot, the plane (k,z) +* +* M (input) INTEGER +* The number of rows of the matrix A. If m <= 1, an immediate +* return is effected. +* +* N (input) INTEGER +* The number of columns of the matrix A. If n <= 1, an +* immediate return is effected. +* +* C, S (input) REAL arrays, dimension +* (M-1) if SIDE = 'L' +* (N-1) if SIDE = 'R' +* c(k) and s(k) contain the cosine and sine that define the +* matrix P(k). The two by two plane rotation part of the +* matrix P(k), R(k), is assumed to be of the form +* R( k ) = ( c( k ) s( k ) ). +* ( -s( k ) c( k ) ) +* +* A (input/output) REAL array, dimension (LDA,N) +* The m by n matrix A. On exit, A is overwritten by P*A if +* SIDE = 'R' or by A*P' if SIDE = 'L'. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, J + REAL CTEMP, STEMP, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( .NOT.( LSAME( SIDE, 'L' ) .OR. LSAME( SIDE, 'R' ) ) ) THEN + INFO = 1 + ELSE IF( .NOT.( LSAME( PIVOT, 'V' ) .OR. LSAME( PIVOT, + $ 'T' ) .OR. LSAME( PIVOT, 'B' ) ) ) THEN + INFO = 2 + ELSE IF( .NOT.( LSAME( DIRECT, 'F' ) .OR. LSAME( DIRECT, 'B' ) ) ) + $ THEN + INFO = 3 + ELSE IF( M.LT.0 ) THEN + INFO = 4 + ELSE IF( N.LT.0 ) THEN + INFO = 5 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = 9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLASR ', INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( ( M.EQ.0 ) .OR. ( N.EQ.0 ) ) + $ RETURN + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form P * A +* + IF( LSAME( PIVOT, 'V' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 20 J = 1, M - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 10 I = 1, N + TEMP = A( J+1, I ) + A( J+1, I ) = CTEMP*TEMP - STEMP*A( J, I ) + A( J, I ) = STEMP*TEMP + CTEMP*A( J, I ) + 10 CONTINUE + END IF + 20 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 40 J = M - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 30 I = 1, N + TEMP = A( J+1, I ) + A( J+1, I ) = CTEMP*TEMP - STEMP*A( J, I ) + A( J, I ) = STEMP*TEMP + CTEMP*A( J, I ) + 30 CONTINUE + END IF + 40 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'T' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 60 J = 2, M + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 50 I = 1, N + TEMP = A( J, I ) + A( J, I ) = CTEMP*TEMP - STEMP*A( 1, I ) + A( 1, I ) = STEMP*TEMP + CTEMP*A( 1, I ) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 80 J = M, 2, -1 + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 70 I = 1, N + TEMP = A( J, I ) + A( J, I ) = CTEMP*TEMP - STEMP*A( 1, I ) + A( 1, I ) = STEMP*TEMP + CTEMP*A( 1, I ) + 70 CONTINUE + END IF + 80 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'B' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 100 J = 1, M - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 90 I = 1, N + TEMP = A( J, I ) + A( J, I ) = STEMP*A( M, I ) + CTEMP*TEMP + A( M, I ) = CTEMP*A( M, I ) - STEMP*TEMP + 90 CONTINUE + END IF + 100 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 120 J = M - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 110 I = 1, N + TEMP = A( J, I ) + A( J, I ) = STEMP*A( M, I ) + CTEMP*TEMP + A( M, I ) = CTEMP*A( M, I ) - STEMP*TEMP + 110 CONTINUE + END IF + 120 CONTINUE + END IF + END IF + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form A * P' +* + IF( LSAME( PIVOT, 'V' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 140 J = 1, N - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 130 I = 1, M + TEMP = A( I, J+1 ) + A( I, J+1 ) = CTEMP*TEMP - STEMP*A( I, J ) + A( I, J ) = STEMP*TEMP + CTEMP*A( I, J ) + 130 CONTINUE + END IF + 140 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 160 J = N - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 150 I = 1, M + TEMP = A( I, J+1 ) + A( I, J+1 ) = CTEMP*TEMP - STEMP*A( I, J ) + A( I, J ) = STEMP*TEMP + CTEMP*A( I, J ) + 150 CONTINUE + END IF + 160 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'T' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 180 J = 2, N + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 170 I = 1, M + TEMP = A( I, J ) + A( I, J ) = CTEMP*TEMP - STEMP*A( I, 1 ) + A( I, 1 ) = STEMP*TEMP + CTEMP*A( I, 1 ) + 170 CONTINUE + END IF + 180 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 200 J = N, 2, -1 + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 190 I = 1, M + TEMP = A( I, J ) + A( I, J ) = CTEMP*TEMP - STEMP*A( I, 1 ) + A( I, 1 ) = STEMP*TEMP + CTEMP*A( I, 1 ) + 190 CONTINUE + END IF + 200 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'B' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 220 J = 1, N - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 210 I = 1, M + TEMP = A( I, J ) + A( I, J ) = STEMP*A( I, N ) + CTEMP*TEMP + A( I, N ) = CTEMP*A( I, N ) - STEMP*TEMP + 210 CONTINUE + END IF + 220 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 240 J = N - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 230 I = 1, M + TEMP = A( I, J ) + A( I, J ) = STEMP*A( I, N ) + CTEMP*TEMP + A( I, N ) = CTEMP*A( I, N ) - STEMP*TEMP + 230 CONTINUE + END IF + 240 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of SLASR +* + END diff --git a/costa/native/external/lapack/slasrt.f b/costa/native/external/lapack/slasrt.f new file mode 100644 index 000000000..debdd820d --- /dev/null +++ b/costa/native/external/lapack/slasrt.f @@ -0,0 +1,244 @@ + SUBROUTINE SLASRT( ID, N, D, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER ID + INTEGER INFO, N +* .. +* .. Array Arguments .. + REAL D( * ) +* .. +* +* Purpose +* ======= +* +* Sort the numbers in D in increasing order (if ID = 'I') or +* in decreasing order (if ID = 'D' ). +* +* Use Quick Sort, reverting to Insertion sort on arrays of +* size <= 20. Dimension of STACK limits N to about 2**32. +* +* Arguments +* ========= +* +* ID (input) CHARACTER*1 +* = 'I': sort D in increasing order; +* = 'D': sort D in decreasing order. +* +* N (input) INTEGER +* The length of the array D. +* +* D (input/output) REAL array, dimension (N) +* On entry, the array to be sorted. +* On exit, D has been sorted into increasing order +* (D(1) <= ... <= D(N) ) or into decreasing order +* (D(1) >= ... >= D(N) ), depending on ID. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + INTEGER SELECT + PARAMETER ( SELECT = 20 ) +* .. +* .. Local Scalars .. + INTEGER DIR, ENDD, I, J, START, STKPNT + REAL D1, D2, D3, DMNMX, TMP +* .. +* .. Local Arrays .. + INTEGER STACK( 2, 32 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input paramters. +* + INFO = 0 + DIR = -1 + IF( LSAME( ID, 'D' ) ) THEN + DIR = 0 + ELSE IF( LSAME( ID, 'I' ) ) THEN + DIR = 1 + END IF + IF( DIR.EQ.-1 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLASRT', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* + STKPNT = 1 + STACK( 1, 1 ) = 1 + STACK( 2, 1 ) = N + 10 CONTINUE + START = STACK( 1, STKPNT ) + ENDD = STACK( 2, STKPNT ) + STKPNT = STKPNT - 1 + IF( ENDD-START.LE.SELECT .AND. ENDD-START.GT.0 ) THEN +* +* Do Insertion sort on D( START:ENDD ) +* + IF( DIR.EQ.0 ) THEN +* +* Sort into decreasing order +* + DO 30 I = START + 1, ENDD + DO 20 J = I, START + 1, -1 + IF( D( J ).GT.D( J-1 ) ) THEN + DMNMX = D( J ) + D( J ) = D( J-1 ) + D( J-1 ) = DMNMX + ELSE + GO TO 30 + END IF + 20 CONTINUE + 30 CONTINUE +* + ELSE +* +* Sort into increasing order +* + DO 50 I = START + 1, ENDD + DO 40 J = I, START + 1, -1 + IF( D( J ).LT.D( J-1 ) ) THEN + DMNMX = D( J ) + D( J ) = D( J-1 ) + D( J-1 ) = DMNMX + ELSE + GO TO 50 + END IF + 40 CONTINUE + 50 CONTINUE +* + END IF +* + ELSE IF( ENDD-START.GT.SELECT ) THEN +* +* Partition D( START:ENDD ) and stack parts, largest one first +* +* Choose partition entry as median of 3 +* + D1 = D( START ) + D2 = D( ENDD ) + I = ( START+ENDD ) / 2 + D3 = D( I ) + IF( D1.LT.D2 ) THEN + IF( D3.LT.D1 ) THEN + DMNMX = D1 + ELSE IF( D3.LT.D2 ) THEN + DMNMX = D3 + ELSE + DMNMX = D2 + END IF + ELSE + IF( D3.LT.D2 ) THEN + DMNMX = D2 + ELSE IF( D3.LT.D1 ) THEN + DMNMX = D3 + ELSE + DMNMX = D1 + END IF + END IF +* + IF( DIR.EQ.0 ) THEN +* +* Sort into decreasing order +* + I = START - 1 + J = ENDD + 1 + 60 CONTINUE + 70 CONTINUE + J = J - 1 + IF( D( J ).LT.DMNMX ) + $ GO TO 70 + 80 CONTINUE + I = I + 1 + IF( D( I ).GT.DMNMX ) + $ GO TO 80 + IF( I.LT.J ) THEN + TMP = D( I ) + D( I ) = D( J ) + D( J ) = TMP + GO TO 60 + END IF + IF( J-START.GT.ENDD-J-1 ) THEN + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = START + STACK( 2, STKPNT ) = J + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = J + 1 + STACK( 2, STKPNT ) = ENDD + ELSE + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = J + 1 + STACK( 2, STKPNT ) = ENDD + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = START + STACK( 2, STKPNT ) = J + END IF + ELSE +* +* Sort into increasing order +* + I = START - 1 + J = ENDD + 1 + 90 CONTINUE + 100 CONTINUE + J = J - 1 + IF( D( J ).GT.DMNMX ) + $ GO TO 100 + 110 CONTINUE + I = I + 1 + IF( D( I ).LT.DMNMX ) + $ GO TO 110 + IF( I.LT.J ) THEN + TMP = D( I ) + D( I ) = D( J ) + D( J ) = TMP + GO TO 90 + END IF + IF( J-START.GT.ENDD-J-1 ) THEN + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = START + STACK( 2, STKPNT ) = J + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = J + 1 + STACK( 2, STKPNT ) = ENDD + ELSE + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = J + 1 + STACK( 2, STKPNT ) = ENDD + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = START + STACK( 2, STKPNT ) = J + END IF + END IF + END IF + IF( STKPNT.GT.0 ) + $ GO TO 10 + RETURN +* +* End of SLASRT +* + END diff --git a/costa/native/external/lapack/slassq.f b/costa/native/external/lapack/slassq.f new file mode 100644 index 000000000..c58ddb43d --- /dev/null +++ b/costa/native/external/lapack/slassq.f @@ -0,0 +1,89 @@ + SUBROUTINE SLASSQ( N, X, INCX, SCALE, SUMSQ ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INCX, N + REAL SCALE, SUMSQ +* .. +* .. Array Arguments .. + REAL X( * ) +* .. +* +* Purpose +* ======= +* +* SLASSQ returns the values scl and smsq such that +* +* ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, +* +* where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is +* assumed to be non-negative and scl returns the value +* +* scl = max( scale, abs( x( i ) ) ). +* +* scale and sumsq must be supplied in SCALE and SUMSQ and +* scl and smsq are overwritten on SCALE and SUMSQ respectively. +* +* The routine makes only one pass through the vector x. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of elements to be used from the vector X. +* +* X (input) REAL array, dimension (N) +* The vector for which a scaled sum of squares is computed. +* x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. +* +* INCX (input) INTEGER +* The increment between successive values of the vector X. +* INCX > 0. +* +* SCALE (input/output) REAL +* On entry, the value scale in the equation above. +* On exit, SCALE is overwritten with scl , the scaling factor +* for the sum of squares. +* +* SUMSQ (input/output) REAL +* On entry, the value sumsq in the equation above. +* On exit, SUMSQ is overwritten with smsq , the basic sum of +* squares from which scl has been factored out. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER IX + REAL ABSXI +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* + IF( N.GT.0 ) THEN + DO 10 IX = 1, 1 + ( N-1 )*INCX, INCX + IF( X( IX ).NE.ZERO ) THEN + ABSXI = ABS( X( IX ) ) + IF( SCALE.LT.ABSXI ) THEN + SUMSQ = 1 + SUMSQ*( SCALE / ABSXI )**2 + SCALE = ABSXI + ELSE + SUMSQ = SUMSQ + ( ABSXI / SCALE )**2 + END IF + END IF + 10 CONTINUE + END IF + RETURN +* +* End of SLASSQ +* + END diff --git a/costa/native/external/lapack/slasv2.f b/costa/native/external/lapack/slasv2.f new file mode 100644 index 000000000..7073d9e58 --- /dev/null +++ b/costa/native/external/lapack/slasv2.f @@ -0,0 +1,250 @@ + SUBROUTINE SLASV2( F, G, H, SSMIN, SSMAX, SNR, CSR, SNL, CSL ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + REAL CSL, CSR, F, G, H, SNL, SNR, SSMAX, SSMIN +* .. +* +* Purpose +* ======= +* +* SLASV2 computes the singular value decomposition of a 2-by-2 +* triangular matrix +* [ F G ] +* [ 0 H ]. +* On return, abs(SSMAX) is the larger singular value, abs(SSMIN) is the +* smaller singular value, and (CSL,SNL) and (CSR,SNR) are the left and +* right singular vectors for abs(SSMAX), giving the decomposition +* +* [ CSL SNL ] [ F G ] [ CSR -SNR ] = [ SSMAX 0 ] +* [-SNL CSL ] [ 0 H ] [ SNR CSR ] [ 0 SSMIN ]. +* +* Arguments +* ========= +* +* F (input) REAL +* The (1,1) element of the 2-by-2 matrix. +* +* G (input) REAL +* The (1,2) element of the 2-by-2 matrix. +* +* H (input) REAL +* The (2,2) element of the 2-by-2 matrix. +* +* SSMIN (output) REAL +* abs(SSMIN) is the smaller singular value. +* +* SSMAX (output) REAL +* abs(SSMAX) is the larger singular value. +* +* SNL (output) REAL +* CSL (output) REAL +* The vector (CSL, SNL) is a unit left singular vector for the +* singular value abs(SSMAX). +* +* SNR (output) REAL +* CSR (output) REAL +* The vector (CSR, SNR) is a unit right singular vector for the +* singular value abs(SSMAX). +* +* Further Details +* =============== +* +* Any input parameter may be aliased with any output parameter. +* +* Barring over/underflow and assuming a guard digit in subtraction, all +* output quantities are correct to within a few units in the last +* place (ulps). +* +* In IEEE arithmetic, the code works correctly if one matrix element is +* infinite. +* +* Overflow will not occur unless the largest singular value itself +* overflows or is within a few ulps of overflow. (On machines with +* partial overflow, like the Cray, overflow may occur if the largest +* singular value is within a factor of 2 of overflow.) +* +* Underflow is harmless if underflow is gradual. Otherwise, results +* may correspond to a matrix modified by perturbations of size near +* the underflow threshold. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) + REAL HALF + PARAMETER ( HALF = 0.5E0 ) + REAL ONE + PARAMETER ( ONE = 1.0E0 ) + REAL TWO + PARAMETER ( TWO = 2.0E0 ) + REAL FOUR + PARAMETER ( FOUR = 4.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL GASMAL, SWAP + INTEGER PMAX + REAL A, CLT, CRT, D, FA, FT, GA, GT, HA, HT, L, M, + $ MM, R, S, SLT, SRT, T, TEMP, TSIGN, TT +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SIGN, SQRT +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Executable Statements .. +* + FT = F + FA = ABS( FT ) + HT = H + HA = ABS( H ) +* +* PMAX points to the maximum absolute element of matrix +* PMAX = 1 if F largest in absolute values +* PMAX = 2 if G largest in absolute values +* PMAX = 3 if H largest in absolute values +* + PMAX = 1 + SWAP = ( HA.GT.FA ) + IF( SWAP ) THEN + PMAX = 3 + TEMP = FT + FT = HT + HT = TEMP + TEMP = FA + FA = HA + HA = TEMP +* +* Now FA .ge. HA +* + END IF + GT = G + GA = ABS( GT ) + IF( GA.EQ.ZERO ) THEN +* +* Diagonal matrix +* + SSMIN = HA + SSMAX = FA + CLT = ONE + CRT = ONE + SLT = ZERO + SRT = ZERO + ELSE + GASMAL = .TRUE. + IF( GA.GT.FA ) THEN + PMAX = 2 + IF( ( FA / GA ).LT.SLAMCH( 'EPS' ) ) THEN +* +* Case of very large GA +* + GASMAL = .FALSE. + SSMAX = GA + IF( HA.GT.ONE ) THEN + SSMIN = FA / ( GA / HA ) + ELSE + SSMIN = ( FA / GA )*HA + END IF + CLT = ONE + SLT = HT / GT + SRT = ONE + CRT = FT / GT + END IF + END IF + IF( GASMAL ) THEN +* +* Normal case +* + D = FA - HA + IF( D.EQ.FA ) THEN +* +* Copes with infinite F or H +* + L = ONE + ELSE + L = D / FA + END IF +* +* Note that 0 .le. L .le. 1 +* + M = GT / FT +* +* Note that abs(M) .le. 1/macheps +* + T = TWO - L +* +* Note that T .ge. 1 +* + MM = M*M + TT = T*T + S = SQRT( TT+MM ) +* +* Note that 1 .le. S .le. 1 + 1/macheps +* + IF( L.EQ.ZERO ) THEN + R = ABS( M ) + ELSE + R = SQRT( L*L+MM ) + END IF +* +* Note that 0 .le. R .le. 1 + 1/macheps +* + A = HALF*( S+R ) +* +* Note that 1 .le. A .le. 1 + abs(M) +* + SSMIN = HA / A + SSMAX = FA*A + IF( MM.EQ.ZERO ) THEN +* +* Note that M is very tiny +* + IF( L.EQ.ZERO ) THEN + T = SIGN( TWO, FT )*SIGN( ONE, GT ) + ELSE + T = GT / SIGN( D, FT ) + M / T + END IF + ELSE + T = ( M / ( S+T )+M / ( R+L ) )*( ONE+A ) + END IF + L = SQRT( T*T+FOUR ) + CRT = TWO / L + SRT = T / L + CLT = ( CRT+SRT*M ) / A + SLT = ( HT / FT )*SRT / A + END IF + END IF + IF( SWAP ) THEN + CSL = SRT + SNL = CRT + CSR = SLT + SNR = CLT + ELSE + CSL = CLT + SNL = SLT + CSR = CRT + SNR = SRT + END IF +* +* Correct signs of SSMAX and SSMIN +* + IF( PMAX.EQ.1 ) + $ TSIGN = SIGN( ONE, CSR )*SIGN( ONE, CSL )*SIGN( ONE, F ) + IF( PMAX.EQ.2 ) + $ TSIGN = SIGN( ONE, SNR )*SIGN( ONE, CSL )*SIGN( ONE, G ) + IF( PMAX.EQ.3 ) + $ TSIGN = SIGN( ONE, SNR )*SIGN( ONE, SNL )*SIGN( ONE, H ) + SSMAX = SIGN( SSMAX, TSIGN ) + SSMIN = SIGN( SSMIN, TSIGN*SIGN( ONE, F )*SIGN( ONE, H ) ) + RETURN +* +* End of SLASV2 +* + END diff --git a/costa/native/external/lapack/slaswp.f b/costa/native/external/lapack/slaswp.f new file mode 100644 index 000000000..2240badec --- /dev/null +++ b/costa/native/external/lapack/slaswp.f @@ -0,0 +1,120 @@ + SUBROUTINE SLASWP( N, A, LDA, K1, K2, IPIV, INCX ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INCX, K1, K2, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* SLASWP performs a series of row interchanges on the matrix A. +* One row interchange is initiated for each of rows K1 through K2 of A. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of columns of the matrix A. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the matrix of column dimension N to which the row +* interchanges will be applied. +* On exit, the permuted matrix. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* +* K1 (input) INTEGER +* The first element of IPIV for which a row interchange will +* be done. +* +* K2 (input) INTEGER +* The last element of IPIV for which a row interchange will +* be done. +* +* IPIV (input) INTEGER array, dimension (M*abs(INCX)) +* The vector of pivot indices. Only the elements in positions +* K1 through K2 of IPIV are accessed. +* IPIV(K) = L implies rows K and L are to be interchanged. +* +* INCX (input) INTEGER +* The increment between successive values of IPIV. If IPIV +* is negative, the pivots are applied in reverse order. +* +* Further Details +* =============== +* +* Modified by +* R. C. Whaley, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, I1, I2, INC, IP, IX, IX0, J, K, N32 + REAL TEMP +* .. +* .. Executable Statements .. +* +* Interchange row I with row IPIV(I) for each of rows K1 through K2. +* + IF( INCX.GT.0 ) THEN + IX0 = K1 + I1 = K1 + I2 = K2 + INC = 1 + ELSE IF( INCX.LT.0 ) THEN + IX0 = 1 + ( 1-K2 )*INCX + I1 = K2 + I2 = K1 + INC = -1 + ELSE + RETURN + END IF +* + N32 = ( N / 32 )*32 + IF( N32.NE.0 ) THEN + DO 30 J = 1, N32, 32 + IX = IX0 + DO 20 I = I1, I2, INC + IP = IPIV( IX ) + IF( IP.NE.I ) THEN + DO 10 K = J, J + 31 + TEMP = A( I, K ) + A( I, K ) = A( IP, K ) + A( IP, K ) = TEMP + 10 CONTINUE + END IF + IX = IX + INCX + 20 CONTINUE + 30 CONTINUE + END IF + IF( N32.NE.N ) THEN + N32 = N32 + 1 + IX = IX0 + DO 50 I = I1, I2, INC + IP = IPIV( IX ) + IF( IP.NE.I ) THEN + DO 40 K = N32, N + TEMP = A( I, K ) + A( I, K ) = A( IP, K ) + A( IP, K ) = TEMP + 40 CONTINUE + END IF + IX = IX + INCX + 50 CONTINUE + END IF +* + RETURN +* +* End of SLASWP +* + END diff --git a/costa/native/external/lapack/slasy2.f b/costa/native/external/lapack/slasy2.f new file mode 100644 index 000000000..43e8427fb --- /dev/null +++ b/costa/native/external/lapack/slasy2.f @@ -0,0 +1,382 @@ + SUBROUTINE SLASY2( LTRANL, LTRANR, ISGN, N1, N2, TL, LDTL, TR, + $ LDTR, B, LDB, SCALE, X, LDX, XNORM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + LOGICAL LTRANL, LTRANR + INTEGER INFO, ISGN, LDB, LDTL, LDTR, LDX, N1, N2 + REAL SCALE, XNORM +* .. +* .. Array Arguments .. + REAL B( LDB, * ), TL( LDTL, * ), TR( LDTR, * ), + $ X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* SLASY2 solves for the N1 by N2 matrix X, 1 <= N1,N2 <= 2, in +* +* op(TL)*X + ISGN*X*op(TR) = SCALE*B, +* +* where TL is N1 by N1, TR is N2 by N2, B is N1 by N2, and ISGN = 1 or +* -1. op(T) = T or T', where T' denotes the transpose of T. +* +* Arguments +* ========= +* +* LTRANL (input) LOGICAL +* On entry, LTRANL specifies the op(TL): +* = .FALSE., op(TL) = TL, +* = .TRUE., op(TL) = TL'. +* +* LTRANR (input) LOGICAL +* On entry, LTRANR specifies the op(TR): +* = .FALSE., op(TR) = TR, +* = .TRUE., op(TR) = TR'. +* +* ISGN (input) INTEGER +* On entry, ISGN specifies the sign of the equation +* as described before. ISGN may only be 1 or -1. +* +* N1 (input) INTEGER +* On entry, N1 specifies the order of matrix TL. +* N1 may only be 0, 1 or 2. +* +* N2 (input) INTEGER +* On entry, N2 specifies the order of matrix TR. +* N2 may only be 0, 1 or 2. +* +* TL (input) REAL array, dimension (LDTL,2) +* On entry, TL contains an N1 by N1 matrix. +* +* LDTL (input) INTEGER +* The leading dimension of the matrix TL. LDTL >= max(1,N1). +* +* TR (input) REAL array, dimension (LDTR,2) +* On entry, TR contains an N2 by N2 matrix. +* +* LDTR (input) INTEGER +* The leading dimension of the matrix TR. LDTR >= max(1,N2). +* +* B (input) REAL array, dimension (LDB,2) +* On entry, the N1 by N2 matrix B contains the right-hand +* side of the equation. +* +* LDB (input) INTEGER +* The leading dimension of the matrix B. LDB >= max(1,N1). +* +* SCALE (output) REAL +* On exit, SCALE contains the scale factor. SCALE is chosen +* less than or equal to 1 to prevent the solution overflowing. +* +* X (output) REAL array, dimension (LDX,2) +* On exit, X contains the N1 by N2 solution. +* +* LDX (input) INTEGER +* The leading dimension of the matrix X. LDX >= max(1,N1). +* +* XNORM (output) REAL +* On exit, XNORM is the infinity-norm of the solution. +* +* INFO (output) INTEGER +* On exit, INFO is set to +* 0: successful exit. +* 1: TL and TR have too close eigenvalues, so TL or +* TR is perturbed to get a nonsingular equation. +* NOTE: In the interests of speed, this routine does not +* check the inputs for errors. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL TWO, HALF, EIGHT + PARAMETER ( TWO = 2.0E+0, HALF = 0.5E+0, EIGHT = 8.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL BSWAP, XSWAP + INTEGER I, IP, IPIV, IPSV, J, JP, JPSV, K + REAL BET, EPS, GAM, L21, SGN, SMIN, SMLNUM, TAU1, + $ TEMP, U11, U12, U22, XMAX +* .. +* .. Local Arrays .. + LOGICAL BSWPIV( 4 ), XSWPIV( 4 ) + INTEGER JPIV( 4 ), LOCL21( 4 ), LOCU12( 4 ), + $ LOCU22( 4 ) + REAL BTMP( 4 ), T16( 4, 4 ), TMP( 4 ), X2( 2 ) +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SLAMCH + EXTERNAL ISAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Data statements .. + DATA LOCU12 / 3, 4, 1, 2 / , LOCL21 / 2, 1, 4, 3 / , + $ LOCU22 / 4, 3, 2, 1 / + DATA XSWPIV / .FALSE., .FALSE., .TRUE., .TRUE. / + DATA BSWPIV / .FALSE., .TRUE., .FALSE., .TRUE. / +* .. +* .. Executable Statements .. +* +* Do not check the input parameters for errors +* + INFO = 0 +* +* Quick return if possible +* + IF( N1.EQ.0 .OR. N2.EQ.0 ) + $ RETURN +* +* Set constants to control overflow +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) / EPS + SGN = ISGN +* + K = N1 + N1 + N2 - 2 + GO TO ( 10, 20, 30, 50 )K +* +* 1 by 1: TL11*X + SGN*X*TR11 = B11 +* + 10 CONTINUE + TAU1 = TL( 1, 1 ) + SGN*TR( 1, 1 ) + BET = ABS( TAU1 ) + IF( BET.LE.SMLNUM ) THEN + TAU1 = SMLNUM + BET = SMLNUM + INFO = 1 + END IF +* + SCALE = ONE + GAM = ABS( B( 1, 1 ) ) + IF( SMLNUM*GAM.GT.BET ) + $ SCALE = ONE / GAM +* + X( 1, 1 ) = ( B( 1, 1 )*SCALE ) / TAU1 + XNORM = ABS( X( 1, 1 ) ) + RETURN +* +* 1 by 2: +* TL11*[X11 X12] + ISGN*[X11 X12]*op[TR11 TR12] = [B11 B12] +* [TR21 TR22] +* + 20 CONTINUE +* + SMIN = MAX( EPS*MAX( ABS( TL( 1, 1 ) ), ABS( TR( 1, 1 ) ), + $ ABS( TR( 1, 2 ) ), ABS( TR( 2, 1 ) ), ABS( TR( 2, 2 ) ) ), + $ SMLNUM ) + TMP( 1 ) = TL( 1, 1 ) + SGN*TR( 1, 1 ) + TMP( 4 ) = TL( 1, 1 ) + SGN*TR( 2, 2 ) + IF( LTRANR ) THEN + TMP( 2 ) = SGN*TR( 2, 1 ) + TMP( 3 ) = SGN*TR( 1, 2 ) + ELSE + TMP( 2 ) = SGN*TR( 1, 2 ) + TMP( 3 ) = SGN*TR( 2, 1 ) + END IF + BTMP( 1 ) = B( 1, 1 ) + BTMP( 2 ) = B( 1, 2 ) + GO TO 40 +* +* 2 by 1: +* op[TL11 TL12]*[X11] + ISGN* [X11]*TR11 = [B11] +* [TL21 TL22] [X21] [X21] [B21] +* + 30 CONTINUE + SMIN = MAX( EPS*MAX( ABS( TR( 1, 1 ) ), ABS( TL( 1, 1 ) ), + $ ABS( TL( 1, 2 ) ), ABS( TL( 2, 1 ) ), ABS( TL( 2, 2 ) ) ), + $ SMLNUM ) + TMP( 1 ) = TL( 1, 1 ) + SGN*TR( 1, 1 ) + TMP( 4 ) = TL( 2, 2 ) + SGN*TR( 1, 1 ) + IF( LTRANL ) THEN + TMP( 2 ) = TL( 1, 2 ) + TMP( 3 ) = TL( 2, 1 ) + ELSE + TMP( 2 ) = TL( 2, 1 ) + TMP( 3 ) = TL( 1, 2 ) + END IF + BTMP( 1 ) = B( 1, 1 ) + BTMP( 2 ) = B( 2, 1 ) + 40 CONTINUE +* +* Solve 2 by 2 system using complete pivoting. +* Set pivots less than SMIN to SMIN. +* + IPIV = ISAMAX( 4, TMP, 1 ) + U11 = TMP( IPIV ) + IF( ABS( U11 ).LE.SMIN ) THEN + INFO = 1 + U11 = SMIN + END IF + U12 = TMP( LOCU12( IPIV ) ) + L21 = TMP( LOCL21( IPIV ) ) / U11 + U22 = TMP( LOCU22( IPIV ) ) - U12*L21 + XSWAP = XSWPIV( IPIV ) + BSWAP = BSWPIV( IPIV ) + IF( ABS( U22 ).LE.SMIN ) THEN + INFO = 1 + U22 = SMIN + END IF + IF( BSWAP ) THEN + TEMP = BTMP( 2 ) + BTMP( 2 ) = BTMP( 1 ) - L21*TEMP + BTMP( 1 ) = TEMP + ELSE + BTMP( 2 ) = BTMP( 2 ) - L21*BTMP( 1 ) + END IF + SCALE = ONE + IF( ( TWO*SMLNUM )*ABS( BTMP( 2 ) ).GT.ABS( U22 ) .OR. + $ ( TWO*SMLNUM )*ABS( BTMP( 1 ) ).GT.ABS( U11 ) ) THEN + SCALE = HALF / MAX( ABS( BTMP( 1 ) ), ABS( BTMP( 2 ) ) ) + BTMP( 1 ) = BTMP( 1 )*SCALE + BTMP( 2 ) = BTMP( 2 )*SCALE + END IF + X2( 2 ) = BTMP( 2 ) / U22 + X2( 1 ) = BTMP( 1 ) / U11 - ( U12 / U11 )*X2( 2 ) + IF( XSWAP ) THEN + TEMP = X2( 2 ) + X2( 2 ) = X2( 1 ) + X2( 1 ) = TEMP + END IF + X( 1, 1 ) = X2( 1 ) + IF( N1.EQ.1 ) THEN + X( 1, 2 ) = X2( 2 ) + XNORM = ABS( X( 1, 1 ) ) + ABS( X( 1, 2 ) ) + ELSE + X( 2, 1 ) = X2( 2 ) + XNORM = MAX( ABS( X( 1, 1 ) ), ABS( X( 2, 1 ) ) ) + END IF + RETURN +* +* 2 by 2: +* op[TL11 TL12]*[X11 X12] +ISGN* [X11 X12]*op[TR11 TR12] = [B11 B12] +* [TL21 TL22] [X21 X22] [X21 X22] [TR21 TR22] [B21 B22] +* +* Solve equivalent 4 by 4 system using complete pivoting. +* Set pivots less than SMIN to SMIN. +* + 50 CONTINUE + SMIN = MAX( ABS( TR( 1, 1 ) ), ABS( TR( 1, 2 ) ), + $ ABS( TR( 2, 1 ) ), ABS( TR( 2, 2 ) ) ) + SMIN = MAX( SMIN, ABS( TL( 1, 1 ) ), ABS( TL( 1, 2 ) ), + $ ABS( TL( 2, 1 ) ), ABS( TL( 2, 2 ) ) ) + SMIN = MAX( EPS*SMIN, SMLNUM ) + BTMP( 1 ) = ZERO + CALL SCOPY( 16, BTMP, 0, T16, 1 ) + T16( 1, 1 ) = TL( 1, 1 ) + SGN*TR( 1, 1 ) + T16( 2, 2 ) = TL( 2, 2 ) + SGN*TR( 1, 1 ) + T16( 3, 3 ) = TL( 1, 1 ) + SGN*TR( 2, 2 ) + T16( 4, 4 ) = TL( 2, 2 ) + SGN*TR( 2, 2 ) + IF( LTRANL ) THEN + T16( 1, 2 ) = TL( 2, 1 ) + T16( 2, 1 ) = TL( 1, 2 ) + T16( 3, 4 ) = TL( 2, 1 ) + T16( 4, 3 ) = TL( 1, 2 ) + ELSE + T16( 1, 2 ) = TL( 1, 2 ) + T16( 2, 1 ) = TL( 2, 1 ) + T16( 3, 4 ) = TL( 1, 2 ) + T16( 4, 3 ) = TL( 2, 1 ) + END IF + IF( LTRANR ) THEN + T16( 1, 3 ) = SGN*TR( 1, 2 ) + T16( 2, 4 ) = SGN*TR( 1, 2 ) + T16( 3, 1 ) = SGN*TR( 2, 1 ) + T16( 4, 2 ) = SGN*TR( 2, 1 ) + ELSE + T16( 1, 3 ) = SGN*TR( 2, 1 ) + T16( 2, 4 ) = SGN*TR( 2, 1 ) + T16( 3, 1 ) = SGN*TR( 1, 2 ) + T16( 4, 2 ) = SGN*TR( 1, 2 ) + END IF + BTMP( 1 ) = B( 1, 1 ) + BTMP( 2 ) = B( 2, 1 ) + BTMP( 3 ) = B( 1, 2 ) + BTMP( 4 ) = B( 2, 2 ) +* +* Perform elimination +* + DO 100 I = 1, 3 + XMAX = ZERO + DO 70 IP = I, 4 + DO 60 JP = I, 4 + IF( ABS( T16( IP, JP ) ).GE.XMAX ) THEN + XMAX = ABS( T16( IP, JP ) ) + IPSV = IP + JPSV = JP + END IF + 60 CONTINUE + 70 CONTINUE + IF( IPSV.NE.I ) THEN + CALL SSWAP( 4, T16( IPSV, 1 ), 4, T16( I, 1 ), 4 ) + TEMP = BTMP( I ) + BTMP( I ) = BTMP( IPSV ) + BTMP( IPSV ) = TEMP + END IF + IF( JPSV.NE.I ) + $ CALL SSWAP( 4, T16( 1, JPSV ), 1, T16( 1, I ), 1 ) + JPIV( I ) = JPSV + IF( ABS( T16( I, I ) ).LT.SMIN ) THEN + INFO = 1 + T16( I, I ) = SMIN + END IF + DO 90 J = I + 1, 4 + T16( J, I ) = T16( J, I ) / T16( I, I ) + BTMP( J ) = BTMP( J ) - T16( J, I )*BTMP( I ) + DO 80 K = I + 1, 4 + T16( J, K ) = T16( J, K ) - T16( J, I )*T16( I, K ) + 80 CONTINUE + 90 CONTINUE + 100 CONTINUE + IF( ABS( T16( 4, 4 ) ).LT.SMIN ) + $ T16( 4, 4 ) = SMIN + SCALE = ONE + IF( ( EIGHT*SMLNUM )*ABS( BTMP( 1 ) ).GT.ABS( T16( 1, 1 ) ) .OR. + $ ( EIGHT*SMLNUM )*ABS( BTMP( 2 ) ).GT.ABS( T16( 2, 2 ) ) .OR. + $ ( EIGHT*SMLNUM )*ABS( BTMP( 3 ) ).GT.ABS( T16( 3, 3 ) ) .OR. + $ ( EIGHT*SMLNUM )*ABS( BTMP( 4 ) ).GT.ABS( T16( 4, 4 ) ) ) THEN + SCALE = ( ONE / EIGHT ) / MAX( ABS( BTMP( 1 ) ), + $ ABS( BTMP( 2 ) ), ABS( BTMP( 3 ) ), ABS( BTMP( 4 ) ) ) + BTMP( 1 ) = BTMP( 1 )*SCALE + BTMP( 2 ) = BTMP( 2 )*SCALE + BTMP( 3 ) = BTMP( 3 )*SCALE + BTMP( 4 ) = BTMP( 4 )*SCALE + END IF + DO 120 I = 1, 4 + K = 5 - I + TEMP = ONE / T16( K, K ) + TMP( K ) = BTMP( K )*TEMP + DO 110 J = K + 1, 4 + TMP( K ) = TMP( K ) - ( TEMP*T16( K, J ) )*TMP( J ) + 110 CONTINUE + 120 CONTINUE + DO 130 I = 1, 3 + IF( JPIV( 4-I ).NE.4-I ) THEN + TEMP = TMP( 4-I ) + TMP( 4-I ) = TMP( JPIV( 4-I ) ) + TMP( JPIV( 4-I ) ) = TEMP + END IF + 130 CONTINUE + X( 1, 1 ) = TMP( 1 ) + X( 2, 1 ) = TMP( 2 ) + X( 1, 2 ) = TMP( 3 ) + X( 2, 2 ) = TMP( 4 ) + XNORM = MAX( ABS( TMP( 1 ) )+ABS( TMP( 3 ) ), + $ ABS( TMP( 2 ) )+ABS( TMP( 4 ) ) ) + RETURN +* +* End of SLASY2 +* + END diff --git a/costa/native/external/lapack/slasyf.f b/costa/native/external/lapack/slasyf.f new file mode 100644 index 000000000..a5e4dd171 --- /dev/null +++ b/costa/native/external/lapack/slasyf.f @@ -0,0 +1,588 @@ + SUBROUTINE SLASYF( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), W( LDW, * ) +* .. +* +* Purpose +* ======= +* +* SLASYF computes a partial factorization of a real symmetric matrix A +* using the Bunch-Kaufman diagonal pivoting method. The partial +* factorization has the form: +* +* A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: +* ( 0 U22 ) ( 0 D ) ( U12' U22' ) +* +* A = ( L11 0 ) ( D 0 ) ( L11' L21' ) if UPLO = 'L' +* ( L21 I ) ( 0 A22 ) ( 0 I ) +* +* where the order of D is at most NB. The actual order is returned in +* the argument KB, and is either NB or NB-1, or N if N <= NB. +* +* SLASYF is an auxiliary routine called by SSYTRF. It uses blocked code +* (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or +* A22 (if UPLO = 'L'). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is stored: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NB (input) INTEGER +* The maximum number of columns of the matrix A that should be +* factored. NB should be at least 2 to allow for 2-by-2 pivot +* blocks. +* +* KB (output) INTEGER +* The number of columns of A that were actually factored. +* KB is either NB-1 or NB, or N if N <= NB. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the symmetric matrix A. If UPLO = 'U', the leading +* n-by-n upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n-by-n lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* On exit, A contains details of the partial factorization. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (output) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D. +* If UPLO = 'U', only the last KB elements of IPIV are set; +* if UPLO = 'L', only the first KB elements are set. +* +* If IPIV(k) > 0, then rows and columns k and IPIV(k) were +* interchanged and D(k,k) is a 1-by-1 diagonal block. +* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +* +* W (workspace) REAL array, dimension (LDW,NB) +* +* LDW (input) INTEGER +* The leading dimension of the array W. LDW >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* > 0: if INFO = k, D(k,k) is exactly zero. The factorization +* has been completed, but the block diagonal matrix D is +* exactly singular. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0E+0, SEVTEN = 17.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER IMAX, J, JB, JJ, JMAX, JP, K, KK, KKW, KP, + $ KSTEP, KW + REAL ABSAKK, ALPHA, COLMAX, D11, D21, D22, R1, + $ ROWMAX, T +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + EXTERNAL LSAME, ISAMAX +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMM, SGEMV, SSCAL, SSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Factorize the trailing columns of A using the upper triangle +* of A and working backwards, and compute the matrix W = U12*D +* for use in updating A11 +* +* K is the main loop index, decreasing from N in steps of 1 or 2 +* +* KW is the column of W which corresponds to column K of A +* + K = N + 10 CONTINUE + KW = NB + K - N +* +* Exit from loop +* + IF( ( K.LE.N-NB+1 .AND. NB.LT.N ) .OR. K.LT.1 ) + $ GO TO 30 +* +* Copy column K of A to column KW of W and update it +* + CALL SCOPY( K, A( 1, K ), 1, W( 1, KW ), 1 ) + IF( K.LT.N ) + $ CALL SGEMV( 'No transpose', K, N-K, -ONE, A( 1, K+1 ), LDA, + $ W( K, KW+1 ), LDW, ONE, W( 1, KW ), 1 ) +* + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( W( K, KW ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.GT.1 ) THEN + IMAX = ISAMAX( K-1, W( 1, KW ), 1 ) + COLMAX = ABS( W( IMAX, KW ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* Copy column IMAX to column KW-1 of W and update it +* + CALL SCOPY( IMAX, A( 1, IMAX ), 1, W( 1, KW-1 ), 1 ) + CALL SCOPY( K-IMAX, A( IMAX, IMAX+1 ), LDA, + $ W( IMAX+1, KW-1 ), 1 ) + IF( K.LT.N ) + $ CALL SGEMV( 'No transpose', K, N-K, -ONE, A( 1, K+1 ), + $ LDA, W( IMAX, KW+1 ), LDW, ONE, + $ W( 1, KW-1 ), 1 ) +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = IMAX + ISAMAX( K-IMAX, W( IMAX+1, KW-1 ), 1 ) + ROWMAX = ABS( W( JMAX, KW-1 ) ) + IF( IMAX.GT.1 ) THEN + JMAX = ISAMAX( IMAX-1, W( 1, KW-1 ), 1 ) + ROWMAX = MAX( ROWMAX, ABS( W( JMAX, KW-1 ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( W( IMAX, KW-1 ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX +* +* copy column KW-1 of W to column KW +* + CALL SCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 ) + ELSE +* +* interchange rows and columns K-1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K - KSTEP + 1 + KKW = NB + KK - N +* +* Updated column KP is already stored in column KKW of W +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP +* + A( KP, K ) = A( KK, K ) + CALL SCOPY( K-1-KP, A( KP+1, KK ), 1, A( KP, KP+1 ), + $ LDA ) + CALL SCOPY( KP, A( 1, KK ), 1, A( 1, KP ), 1 ) +* +* Interchange rows KK and KP in last KK columns of A and W +* + CALL SSWAP( N-KK+1, A( KK, KK ), LDA, A( KP, KK ), LDA ) + CALL SSWAP( N-KK+1, W( KK, KKW ), LDW, W( KP, KKW ), + $ LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column KW of W now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* +* Store U(k) in column k of A +* + CALL SCOPY( K, W( 1, KW ), 1, A( 1, K ), 1 ) + R1 = ONE / A( K, K ) + CALL SSCAL( K-1, R1, A( 1, K ), 1 ) + ELSE +* +* 2-by-2 pivot block D(k): columns KW and KW-1 of W now +* hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* + IF( K.GT.2 ) THEN +* +* Store U(k) and U(k-1) in columns k and k-1 of A +* + D21 = W( K-1, KW ) + D11 = W( K, KW ) / D21 + D22 = W( K-1, KW-1 ) / D21 + T = ONE / ( D11*D22-ONE ) + D21 = T / D21 + DO 20 J = 1, K - 2 + A( J, K-1 ) = D21*( D11*W( J, KW-1 )-W( J, KW ) ) + A( J, K ) = D21*( D22*W( J, KW )-W( J, KW-1 ) ) + 20 CONTINUE + END IF +* +* Copy D(k) to A +* + A( K-1, K-1 ) = W( K-1, KW-1 ) + A( K-1, K ) = W( K-1, KW ) + A( K, K ) = W( K, KW ) + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + 30 CONTINUE +* +* Update the upper triangle of A11 (= A(1:k,1:k)) as +* +* A11 := A11 - U12*D*U12' = A11 - U12*W' +* +* computing blocks of NB columns at a time +* + DO 50 J = ( ( K-1 ) / NB )*NB + 1, 1, -NB + JB = MIN( NB, K-J+1 ) +* +* Update the upper triangle of the diagonal block +* + DO 40 JJ = J, J + JB - 1 + CALL SGEMV( 'No transpose', JJ-J+1, N-K, -ONE, + $ A( J, K+1 ), LDA, W( JJ, KW+1 ), LDW, ONE, + $ A( J, JJ ), 1 ) + 40 CONTINUE +* +* Update the rectangular superdiagonal block +* + CALL SGEMM( 'No transpose', 'Transpose', J-1, JB, N-K, -ONE, + $ A( 1, K+1 ), LDA, W( J, KW+1 ), LDW, ONE, + $ A( 1, J ), LDA ) + 50 CONTINUE +* +* Put U12 in standard form by partially undoing the interchanges +* in columns k+1:n +* + J = K + 1 + 60 CONTINUE + JJ = J + JP = IPIV( J ) + IF( JP.LT.0 ) THEN + JP = -JP + J = J + 1 + END IF + J = J + 1 + IF( JP.NE.JJ .AND. J.LE.N ) + $ CALL SSWAP( N-J+1, A( JP, J ), LDA, A( JJ, J ), LDA ) + IF( J.LE.N ) + $ GO TO 60 +* +* Set KB to the number of columns factorized +* + KB = N - K +* + ELSE +* +* Factorize the leading columns of A using the lower triangle +* of A and working forwards, and compute the matrix W = L21*D +* for use in updating A22 +* +* K is the main loop index, increasing from 1 in steps of 1 or 2 +* + K = 1 + 70 CONTINUE +* +* Exit from loop +* + IF( ( K.GE.NB .AND. NB.LT.N ) .OR. K.GT.N ) + $ GO TO 90 +* +* Copy column K of A to column K of W and update it +* + CALL SCOPY( N-K+1, A( K, K ), 1, W( K, K ), 1 ) + CALL SGEMV( 'No transpose', N-K+1, K-1, -ONE, A( K, 1 ), LDA, + $ W( K, 1 ), LDW, ONE, W( K, K ), 1 ) +* + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( W( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.LT.N ) THEN + IMAX = K + ISAMAX( N-K, W( K+1, K ), 1 ) + COLMAX = ABS( W( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* Copy column IMAX to column K+1 of W and update it +* + CALL SCOPY( IMAX-K, A( IMAX, K ), LDA, W( K, K+1 ), 1 ) + CALL SCOPY( N-IMAX+1, A( IMAX, IMAX ), 1, W( IMAX, K+1 ), + $ 1 ) + CALL SGEMV( 'No transpose', N-K+1, K-1, -ONE, A( K, 1 ), + $ LDA, W( IMAX, 1 ), LDW, ONE, W( K, K+1 ), 1 ) +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = K - 1 + ISAMAX( IMAX-K, W( K, K+1 ), 1 ) + ROWMAX = ABS( W( JMAX, K+1 ) ) + IF( IMAX.LT.N ) THEN + JMAX = IMAX + ISAMAX( N-IMAX, W( IMAX+1, K+1 ), 1 ) + ROWMAX = MAX( ROWMAX, ABS( W( JMAX, K+1 ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( W( IMAX, K+1 ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX +* +* copy column K+1 of W to column K +* + CALL SCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 ) + ELSE +* +* interchange rows and columns K+1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K + KSTEP - 1 +* +* Updated column KP is already stored in column KK of W +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP +* + A( KP, K ) = A( KK, K ) + CALL SCOPY( KP-K-1, A( K+1, KK ), 1, A( KP, K+1 ), LDA ) + CALL SCOPY( N-KP+1, A( KP, KK ), 1, A( KP, KP ), 1 ) +* +* Interchange rows KK and KP in first KK columns of A and W +* + CALL SSWAP( KK, A( KK, 1 ), LDA, A( KP, 1 ), LDA ) + CALL SSWAP( KK, W( KK, 1 ), LDW, W( KP, 1 ), LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k of W now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* +* Store L(k) in column k of A +* + CALL SCOPY( N-K+1, W( K, K ), 1, A( K, K ), 1 ) + IF( K.LT.N ) THEN + R1 = ONE / A( K, K ) + CALL SSCAL( N-K, R1, A( K+1, K ), 1 ) + END IF + ELSE +* +* 2-by-2 pivot block D(k): columns k and k+1 of W now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* + IF( K.LT.N-1 ) THEN +* +* Store L(k) and L(k+1) in columns k and k+1 of A +* + D21 = W( K+1, K ) + D11 = W( K+1, K+1 ) / D21 + D22 = W( K, K ) / D21 + T = ONE / ( D11*D22-ONE ) + D21 = T / D21 + DO 80 J = K + 2, N + A( J, K ) = D21*( D11*W( J, K )-W( J, K+1 ) ) + A( J, K+1 ) = D21*( D22*W( J, K+1 )-W( J, K ) ) + 80 CONTINUE + END IF +* +* Copy D(k) to A +* + A( K, K ) = W( K, K ) + A( K+1, K ) = W( K+1, K ) + A( K+1, K+1 ) = W( K+1, K+1 ) + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 70 +* + 90 CONTINUE +* +* Update the lower triangle of A22 (= A(k:n,k:n)) as +* +* A22 := A22 - L21*D*L21' = A22 - L21*W' +* +* computing blocks of NB columns at a time +* + DO 110 J = K, N, NB + JB = MIN( NB, N-J+1 ) +* +* Update the lower triangle of the diagonal block +* + DO 100 JJ = J, J + JB - 1 + CALL SGEMV( 'No transpose', J+JB-JJ, K-1, -ONE, + $ A( JJ, 1 ), LDA, W( JJ, 1 ), LDW, ONE, + $ A( JJ, JJ ), 1 ) + 100 CONTINUE +* +* Update the rectangular subdiagonal block +* + IF( J+JB.LE.N ) + $ CALL SGEMM( 'No transpose', 'Transpose', N-J-JB+1, JB, + $ K-1, -ONE, A( J+JB, 1 ), LDA, W( J, 1 ), LDW, + $ ONE, A( J+JB, J ), LDA ) + 110 CONTINUE +* +* Put L21 in standard form by partially undoing the interchanges +* in columns 1:k-1 +* + J = K - 1 + 120 CONTINUE + JJ = J + JP = IPIV( J ) + IF( JP.LT.0 ) THEN + JP = -JP + J = J - 1 + END IF + J = J - 1 + IF( JP.NE.JJ .AND. J.GE.1 ) + $ CALL SSWAP( J, A( JP, 1 ), LDA, A( JJ, 1 ), LDA ) + IF( J.GE.1 ) + $ GO TO 120 +* +* Set KB to the number of columns factorized +* + KB = K - 1 +* + END IF + RETURN +* +* End of SLASYF +* + END diff --git a/costa/native/external/lapack/slatbs.f b/costa/native/external/lapack/slatbs.f new file mode 100644 index 000000000..8d37a9e56 --- /dev/null +++ b/costa/native/external/lapack/slatbs.f @@ -0,0 +1,724 @@ + SUBROUTINE SLATBS( UPLO, TRANS, DIAG, NORMIN, N, KD, AB, LDAB, X, + $ SCALE, CNORM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1992 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORMIN, TRANS, UPLO + INTEGER INFO, KD, LDAB, N + REAL SCALE +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ), CNORM( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* SLATBS solves one of the triangular systems +* +* A *x = s*b or A'*x = s*b +* +* with scaling to prevent overflow, where A is an upper or lower +* triangular band matrix. Here A' denotes the transpose of A, x and b +* are n-element vectors, and s is a scaling factor, usually less than +* or equal to 1, chosen so that the components of x will be less than +* the overflow threshold. If the unscaled problem will not cause +* overflow, the Level 2 BLAS routine STBSV is called. If the matrix A +* is singular (A(j,j) = 0 for some j), then s is set to 0 and a +* non-trivial solution to A*x = 0 is returned. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the matrix A is upper or lower triangular. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* TRANS (input) CHARACTER*1 +* Specifies the operation applied to A. +* = 'N': Solve A * x = s*b (No transpose) +* = 'T': Solve A'* x = s*b (Transpose) +* = 'C': Solve A'* x = s*b (Conjugate transpose = Transpose) +* +* DIAG (input) CHARACTER*1 +* Specifies whether or not the matrix A is unit triangular. +* = 'N': Non-unit triangular +* = 'U': Unit triangular +* +* NORMIN (input) CHARACTER*1 +* Specifies whether CNORM has been set or not. +* = 'Y': CNORM contains the column norms on entry +* = 'N': CNORM is not set on entry. On exit, the norms will +* be computed and stored in CNORM. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of subdiagonals or superdiagonals in the +* triangular matrix A. KD >= 0. +* +* AB (input) REAL array, dimension (LDAB,N) +* The upper or lower triangular band matrix A, stored in the +* first KD+1 rows of the array. The j-th column of A is stored +* in the j-th column of the array AB as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* X (input/output) REAL array, dimension (N) +* On entry, the right hand side b of the triangular system. +* On exit, X is overwritten by the solution vector x. +* +* SCALE (output) REAL +* The scaling factor s for the triangular system +* A * x = s*b or A'* x = s*b. +* If SCALE = 0, the matrix A is singular or badly scaled, and +* the vector x is an exact or approximate solution to A*x = 0. +* +* CNORM (input or output) REAL array, dimension (N) +* +* If NORMIN = 'Y', CNORM is an input argument and CNORM(j) +* contains the norm of the off-diagonal part of the j-th column +* of A. If TRANS = 'N', CNORM(j) must be greater than or equal +* to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) +* must be greater than or equal to the 1-norm. +* +* If NORMIN = 'N', CNORM is an output argument and CNORM(j) +* returns the 1-norm of the offdiagonal part of the j-th column +* of A. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* Further Details +* ======= ======= +* +* A rough bound on x is computed; if that is less than overflow, STBSV +* is called, otherwise, specific code is used which checks for possible +* overflow or divide-by-zero at every operation. +* +* A columnwise scheme is used for solving A*x = b. The basic algorithm +* if A is lower triangular is +* +* x[1:n] := b[1:n] +* for j = 1, ..., n +* x(j) := x(j) / A(j,j) +* x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] +* end +* +* Define bounds on the components of x after j iterations of the loop: +* M(j) = bound on x[1:j] +* G(j) = bound on x[j+1:n] +* Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. +* +* Then for iteration j+1 we have +* M(j+1) <= G(j) / | A(j+1,j+1) | +* G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | +* <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) +* +* where CNORM(j+1) is greater than or equal to the infinity-norm of +* column j+1 of A, not counting the diagonal. Hence +* +* G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) +* 1<=i<=j +* and +* +* |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) +* 1<=i< j +* +* Since |x(j)| <= M(j), we use the Level 2 BLAS routine STBSV if the +* reciprocal of the largest M(j), j=1,..,n, is larger than +* max(underflow, 1/overflow). +* +* The bound on x(j) is also used to determine when a step in the +* columnwise method can be performed without fear of overflow. If +* the computed bound is greater than a large constant, x is scaled to +* prevent overflow, but if the bound overflows, x is set to 0, x(j) to +* 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. +* +* Similarly, a row-wise scheme is used to solve A'*x = b. The basic +* algorithm for A upper triangular is +* +* for j = 1, ..., n +* x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) +* end +* +* We simultaneously compute two bounds +* G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j +* M(j) = bound on x(i), 1<=i<=j +* +* The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we +* add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. +* Then the bound on x(j) is +* +* M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | +* +* <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) +* 1<=i<=j +* +* and we can safely call STBSV if 1/M(n) and 1/G(n) are both greater +* than max(underflow, 1/overflow). +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0E+0, HALF = 0.5E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + INTEGER I, IMAX, J, JFIRST, JINC, JLAST, JLEN, MAIND + REAL BIGNUM, GROW, REC, SMLNUM, SUMJ, TJJ, TJJS, + $ TMAX, TSCAL, USCAL, XBND, XJ, XMAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SASUM, SDOT, SLAMCH + EXTERNAL LSAME, ISAMAX, SASUM, SDOT, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SSCAL, STBSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* +* Test the input parameters. +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT. + $ LSAME( NORMIN, 'N' ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( KD.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLATBS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine machine dependent parameters to control overflow. +* + SMLNUM = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' ) + BIGNUM = ONE / SMLNUM + SCALE = ONE +* + IF( LSAME( NORMIN, 'N' ) ) THEN +* +* Compute the 1-norm of each column, not including the diagonal. +* + IF( UPPER ) THEN +* +* A is upper triangular. +* + DO 10 J = 1, N + JLEN = MIN( KD, J-1 ) + CNORM( J ) = SASUM( JLEN, AB( KD+1-JLEN, J ), 1 ) + 10 CONTINUE + ELSE +* +* A is lower triangular. +* + DO 20 J = 1, N + JLEN = MIN( KD, N-J ) + IF( JLEN.GT.0 ) THEN + CNORM( J ) = SASUM( JLEN, AB( 2, J ), 1 ) + ELSE + CNORM( J ) = ZERO + END IF + 20 CONTINUE + END IF + END IF +* +* Scale the column norms by TSCAL if the maximum element in CNORM is +* greater than BIGNUM. +* + IMAX = ISAMAX( N, CNORM, 1 ) + TMAX = CNORM( IMAX ) + IF( TMAX.LE.BIGNUM ) THEN + TSCAL = ONE + ELSE + TSCAL = ONE / ( SMLNUM*TMAX ) + CALL SSCAL( N, TSCAL, CNORM, 1 ) + END IF +* +* Compute a bound on the computed solution vector to see if the +* Level 2 BLAS routine STBSV can be used. +* + J = ISAMAX( N, X, 1 ) + XMAX = ABS( X( J ) ) + XBND = XMAX + IF( NOTRAN ) THEN +* +* Compute the growth in A * x = b. +* + IF( UPPER ) THEN + JFIRST = N + JLAST = 1 + JINC = -1 + MAIND = KD + 1 + ELSE + JFIRST = 1 + JLAST = N + JINC = 1 + MAIND = 1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 50 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, G(0) = max{x(i), i=1,...,n}. +* + GROW = ONE / MAX( XBND, SMLNUM ) + XBND = GROW + DO 30 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 50 +* +* M(j) = G(j-1) / abs(A(j,j)) +* + TJJ = ABS( AB( MAIND, J ) ) + XBND = MIN( XBND, MIN( ONE, TJJ )*GROW ) + IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN +* +* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) +* + GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) ) + ELSE +* +* G(j) could overflow, set GROW to 0. +* + GROW = ZERO + END IF + 30 CONTINUE + GROW = XBND + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) ) + DO 40 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 50 +* +* G(j) = G(j-1)*( 1 + CNORM(j) ) +* + GROW = GROW*( ONE / ( ONE+CNORM( J ) ) ) + 40 CONTINUE + END IF + 50 CONTINUE +* + ELSE +* +* Compute the growth in A' * x = b. +* + IF( UPPER ) THEN + JFIRST = 1 + JLAST = N + JINC = 1 + MAIND = KD + 1 + ELSE + JFIRST = N + JLAST = 1 + JINC = -1 + MAIND = 1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 80 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, M(0) = max{x(i), i=1,...,n}. +* + GROW = ONE / MAX( XBND, SMLNUM ) + XBND = GROW + DO 60 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 80 +* +* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) +* + XJ = ONE + CNORM( J ) + GROW = MIN( GROW, XBND / XJ ) +* +* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) +* + TJJ = ABS( AB( MAIND, J ) ) + IF( XJ.GT.TJJ ) + $ XBND = XBND*( TJJ / XJ ) + 60 CONTINUE + GROW = MIN( GROW, XBND ) + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) ) + DO 70 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 80 +* +* G(j) = ( 1 + CNORM(j) )*G(j-1) +* + XJ = ONE + CNORM( J ) + GROW = GROW / XJ + 70 CONTINUE + END IF + 80 CONTINUE + END IF +* + IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN +* +* Use the Level 2 BLAS solve if the reciprocal of the bound on +* elements of X is not too small. +* + CALL STBSV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, X, 1 ) + ELSE +* +* Use a Level 1 BLAS solve, scaling intermediate results. +* + IF( XMAX.GT.BIGNUM ) THEN +* +* Scale X so that its components are less than or equal to +* BIGNUM in absolute value. +* + SCALE = BIGNUM / XMAX + CALL SSCAL( N, SCALE, X, 1 ) + XMAX = BIGNUM + END IF +* + IF( NOTRAN ) THEN +* +* Solve A * x = b +* + DO 100 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) / A(j,j), scaling x if necessary. +* + XJ = ABS( X( J ) ) + IF( NOUNIT ) THEN + TJJS = AB( MAIND, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 95 + END IF + TJJ = ABS( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by 1/b(j). +* + REC = ONE / XJ + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = X( J ) / TJJS + XJ = ABS( X( J ) ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM +* to avoid overflow when dividing by A(j,j). +* + REC = ( TJJ*BIGNUM ) / XJ + IF( CNORM( J ).GT.ONE ) THEN +* +* Scale by 1/CNORM(j) to avoid overflow when +* multiplying x(j) times column j. +* + REC = REC / CNORM( J ) + END IF + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = X( J ) / TJJS + XJ = ABS( X( J ) ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A*x = 0. +* + DO 90 I = 1, N + X( I ) = ZERO + 90 CONTINUE + X( J ) = ONE + XJ = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 95 CONTINUE +* +* Scale x if necessary to avoid overflow when adding a +* multiple of column j of A. +* + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN +* +* Scale x by 1/(2*abs(x(j))). +* + REC = REC*HALF + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + END IF + ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN +* +* Scale x by 1/2. +* + CALL SSCAL( N, HALF, X, 1 ) + SCALE = SCALE*HALF + END IF +* + IF( UPPER ) THEN + IF( J.GT.1 ) THEN +* +* Compute the update +* x(max(1,j-kd):j-1) := x(max(1,j-kd):j-1) - +* x(j)* A(max(1,j-kd):j-1,j) +* + JLEN = MIN( KD, J-1 ) + CALL SAXPY( JLEN, -X( J )*TSCAL, + $ AB( KD+1-JLEN, J ), 1, X( J-JLEN ), 1 ) + I = ISAMAX( J-1, X, 1 ) + XMAX = ABS( X( I ) ) + END IF + ELSE IF( J.LT.N ) THEN +* +* Compute the update +* x(j+1:min(j+kd,n)) := x(j+1:min(j+kd,n)) - +* x(j) * A(j+1:min(j+kd,n),j) +* + JLEN = MIN( KD, N-J ) + IF( JLEN.GT.0 ) + $ CALL SAXPY( JLEN, -X( J )*TSCAL, AB( 2, J ), 1, + $ X( J+1 ), 1 ) + I = J + ISAMAX( N-J, X( J+1 ), 1 ) + XMAX = ABS( X( I ) ) + END IF + 100 CONTINUE +* + ELSE +* +* Solve A' * x = b +* + DO 140 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = ABS( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = AB( MAIND, J )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = ABS( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = USCAL / TJJS + END IF + IF( REC.LT.ONE ) THEN + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + SUMJ = ZERO + IF( USCAL.EQ.ONE ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call SDOT to perform the dot product. +* + IF( UPPER ) THEN + JLEN = MIN( KD, J-1 ) + SUMJ = SDOT( JLEN, AB( KD+1-JLEN, J ), 1, + $ X( J-JLEN ), 1 ) + ELSE + JLEN = MIN( KD, N-J ) + IF( JLEN.GT.0 ) + $ SUMJ = SDOT( JLEN, AB( 2, J ), 1, X( J+1 ), 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + JLEN = MIN( KD, J-1 ) + DO 110 I = 1, JLEN + SUMJ = SUMJ + ( AB( KD+I-JLEN, J )*USCAL )* + $ X( J-JLEN-1+I ) + 110 CONTINUE + ELSE + JLEN = MIN( KD, N-J ) + DO 120 I = 1, JLEN + SUMJ = SUMJ + ( AB( I+1, J )*USCAL )*X( J+I ) + 120 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.TSCAL ) THEN +* +* Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - SUMJ + XJ = ABS( X( J ) ) + IF( NOUNIT ) THEN +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJS = AB( MAIND, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 135 + END IF + TJJ = ABS( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = X( J ) / TJJS + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = X( J ) / TJJS + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A'*x = 0. +* + DO 130 I = 1, N + X( I ) = ZERO + 130 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 135 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - sumj if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = X( J ) / TJJS - SUMJ + END IF + XMAX = MAX( XMAX, ABS( X( J ) ) ) + 140 CONTINUE + END IF + SCALE = SCALE / TSCAL + END IF +* +* Scale the column norms by 1/TSCAL for return. +* + IF( TSCAL.NE.ONE ) THEN + CALL SSCAL( N, ONE / TSCAL, CNORM, 1 ) + END IF +* + RETURN +* +* End of SLATBS +* + END diff --git a/costa/native/external/lapack/slatdf.f b/costa/native/external/lapack/slatdf.f new file mode 100644 index 000000000..9bae7a1a1 --- /dev/null +++ b/costa/native/external/lapack/slatdf.f @@ -0,0 +1,238 @@ + SUBROUTINE SLATDF( IJOB, N, Z, LDZ, RHS, RDSUM, RDSCAL, IPIV, + $ JPIV ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER IJOB, LDZ, N + REAL RDSCAL, RDSUM +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), JPIV( * ) + REAL RHS( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* SLATDF uses the LU factorization of the n-by-n matrix Z computed by +* SGETC2 and computes a contribution to the reciprocal Dif-estimate +* by solving Z * x = b for x, and choosing the r.h.s. b such that +* the norm of x is as large as possible. On entry RHS = b holds the +* contribution from earlier solved sub-systems, and on return RHS = x. +* +* The factorization of Z returned by SGETC2 has the form Z = P*L*U*Q, +* where P and Q are permutation matrices. L is lower triangular with +* unit diagonal elements and U is upper triangular. +* +* Arguments +* ========= +* +* IJOB (input) INTEGER +* IJOB = 2: First compute an approximative null-vector e +* of Z using SGECON, e is normalized and solve for +* Zx = +-e - f with the sign giving the greater value +* of 2-norm(x). About 5 times as expensive as Default. +* IJOB .ne. 2: Local look ahead strategy where all entries of +* the r.h.s. b is choosen as either +1 or -1 (Default). +* +* N (input) INTEGER +* The number of columns of the matrix Z. +* +* Z (input) REAL array, dimension (LDZ, N) +* On entry, the LU part of the factorization of the n-by-n +* matrix Z computed by SGETC2: Z = P * L * U * Q +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDA >= max(1, N). +* +* RHS (input/output) REAL array, dimension N. +* On entry, RHS contains contributions from other subsystems. +* On exit, RHS contains the solution of the subsystem with +* entries acoording to the value of IJOB (see above). +* +* RDSUM (input/output) REAL +* On entry, the sum of squares of computed contributions to +* the Dif-estimate under computation by STGSYL, where the +* scaling factor RDSCAL (see below) has been factored out. +* On exit, the corresponding sum of squares updated with the +* contributions from the current sub-system. +* If TRANS = 'T' RDSUM is not touched. +* NOTE: RDSUM only makes sense when STGSY2 is called by STGSYL. +* +* RDSCAL (input/output) REAL +* On entry, scaling factor used to prevent overflow in RDSUM. +* On exit, RDSCAL is updated w.r.t. the current contributions +* in RDSUM. +* If TRANS = 'T', RDSCAL is not touched. +* NOTE: RDSCAL only makes sense when STGSY2 is called by +* STGSYL. +* +* IPIV (input) INTEGER array, dimension (N). +* The pivot indices; for 1 <= i <= N, row i of the +* matrix has been interchanged with row IPIV(i). +* +* JPIV (input) INTEGER array, dimension (N). +* The pivot indices; for 1 <= j <= N, column j of the +* matrix has been interchanged with column JPIV(j). +* +* Further Details +* =============== +* +* Based on contributions by +* Bo Kagstrom and Peter Poromaa, Department of Computing Science, +* Umea University, S-901 87 Umea, Sweden. +* +* This routine is a further developed implementation of algorithm +* BSOLVE in [1] using complete pivoting in the LU factorization. +* +* [1] Bo Kagstrom and Lars Westin, +* Generalized Schur Methods with Condition Estimators for +* Solving the Generalized Sylvester Equation, IEEE Transactions +* on Automatic Control, Vol. 34, No. 7, July 1989, pp 745-751. +* +* [2] Peter Poromaa, +* On Efficient and Robust Estimators for the Separation +* between two Regular Matrix Pairs with Applications in +* Condition Estimation. Report IMINF-95.05, Departement of +* Computing Science, Umea University, S-901 87 Umea, Sweden, 1995. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER MAXDIM + PARAMETER ( MAXDIM = 8 ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, J, K + REAL BM, BP, PMONE, SMINU, SPLUS, TEMP +* .. +* .. Local Arrays .. + INTEGER IWORK( MAXDIM ) + REAL WORK( 4*MAXDIM ), XM( MAXDIM ), XP( MAXDIM ) +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SGECON, SGESC2, SLASSQ, SLASWP, + $ SSCAL +* .. +* .. External Functions .. + REAL SASUM, SDOT + EXTERNAL SASUM, SDOT +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* + IF( IJOB.NE.2 ) THEN +* +* Apply permutations IPIV to RHS +* + CALL SLASWP( 1, RHS, LDZ, 1, N-1, IPIV, 1 ) +* +* Solve for L-part choosing RHS either to +1 or -1. +* + PMONE = -ONE +* + DO 10 J = 1, N - 1 + BP = RHS( J ) + ONE + BM = RHS( J ) - ONE + SPLUS = ONE +* +* Look-ahead for L-part RHS(1:N-1) = + or -1, SPLUS and +* SMIN computed more efficiently than in BSOLVE [1]. +* + SPLUS = SPLUS + SDOT( N-J, Z( J+1, J ), 1, Z( J+1, J ), 1 ) + SMINU = SDOT( N-J, Z( J+1, J ), 1, RHS( J+1 ), 1 ) + SPLUS = SPLUS*RHS( J ) + IF( SPLUS.GT.SMINU ) THEN + RHS( J ) = BP + ELSE IF( SMINU.GT.SPLUS ) THEN + RHS( J ) = BM + ELSE +* +* In this case the updating sums are equal and we can +* choose RHS(J) +1 or -1. The first time this happens +* we choose -1, thereafter +1. This is a simple way to +* get good estimates of matrices like Byers well-known +* example (see [1]). (Not done in BSOLVE.) +* + RHS( J ) = RHS( J ) + PMONE + PMONE = ONE + END IF +* +* Compute the remaining r.h.s. +* + TEMP = -RHS( J ) + CALL SAXPY( N-J, TEMP, Z( J+1, J ), 1, RHS( J+1 ), 1 ) +* + 10 CONTINUE +* +* Solve for U-part, look-ahead for RHS(N) = +-1. This is not done +* in BSOLVE and will hopefully give us a better estimate because +* any ill-conditioning of the original matrix is transfered to U +* and not to L. U(N, N) is an approximation to sigma_min(LU). +* + CALL SCOPY( N-1, RHS, 1, XP, 1 ) + XP( N ) = RHS( N ) + ONE + RHS( N ) = RHS( N ) - ONE + SPLUS = ZERO + SMINU = ZERO + DO 30 I = N, 1, -1 + TEMP = ONE / Z( I, I ) + XP( I ) = XP( I )*TEMP + RHS( I ) = RHS( I )*TEMP + DO 20 K = I + 1, N + XP( I ) = XP( I ) - XP( K )*( Z( I, K )*TEMP ) + RHS( I ) = RHS( I ) - RHS( K )*( Z( I, K )*TEMP ) + 20 CONTINUE + SPLUS = SPLUS + ABS( XP( I ) ) + SMINU = SMINU + ABS( RHS( I ) ) + 30 CONTINUE + IF( SPLUS.GT.SMINU ) + $ CALL SCOPY( N, XP, 1, RHS, 1 ) +* +* Apply the permutations JPIV to the computed solution (RHS) +* + CALL SLASWP( 1, RHS, LDZ, 1, N-1, JPIV, -1 ) +* +* Compute the sum of squares +* + CALL SLASSQ( N, RHS, 1, RDSCAL, RDSUM ) +* + ELSE +* +* IJOB = 2, Compute approximate nullvector XM of Z +* + CALL SGECON( 'I', N, Z, LDZ, ONE, TEMP, WORK, IWORK, INFO ) + CALL SCOPY( N, WORK( N+1 ), 1, XM, 1 ) +* +* Compute RHS +* + CALL SLASWP( 1, XM, LDZ, 1, N-1, IPIV, -1 ) + TEMP = ONE / SQRT( SDOT( N, XM, 1, XM, 1 ) ) + CALL SSCAL( N, TEMP, XM, 1 ) + CALL SCOPY( N, XM, 1, XP, 1 ) + CALL SAXPY( N, ONE, RHS, 1, XP, 1 ) + CALL SAXPY( N, -ONE, XM, 1, RHS, 1 ) + CALL SGESC2( N, Z, LDZ, RHS, IPIV, JPIV, TEMP ) + CALL SGESC2( N, Z, LDZ, XP, IPIV, JPIV, TEMP ) + IF( SASUM( N, XP, 1 ).GT.SASUM( N, RHS, 1 ) ) + $ CALL SCOPY( N, XP, 1, RHS, 1 ) +* +* Compute the sum of squares +* + CALL SLASSQ( N, RHS, 1, RDSCAL, RDSUM ) +* + END IF +* + RETURN +* +* End of SLATDF +* + END diff --git a/costa/native/external/lapack/slatps.f b/costa/native/external/lapack/slatps.f new file mode 100644 index 000000000..a71f99845 --- /dev/null +++ b/costa/native/external/lapack/slatps.f @@ -0,0 +1,713 @@ + SUBROUTINE SLATPS( UPLO, TRANS, DIAG, NORMIN, N, AP, X, SCALE, + $ CNORM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1992 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORMIN, TRANS, UPLO + INTEGER INFO, N + REAL SCALE +* .. +* .. Array Arguments .. + REAL AP( * ), CNORM( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* SLATPS solves one of the triangular systems +* +* A *x = s*b or A'*x = s*b +* +* with scaling to prevent overflow, where A is an upper or lower +* triangular matrix stored in packed form. Here A' denotes the +* transpose of A, x and b are n-element vectors, and s is a scaling +* factor, usually less than or equal to 1, chosen so that the +* components of x will be less than the overflow threshold. If the +* unscaled problem will not cause overflow, the Level 2 BLAS routine +* STPSV is called. If the matrix A is singular (A(j,j) = 0 for some j), +* then s is set to 0 and a non-trivial solution to A*x = 0 is returned. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the matrix A is upper or lower triangular. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* TRANS (input) CHARACTER*1 +* Specifies the operation applied to A. +* = 'N': Solve A * x = s*b (No transpose) +* = 'T': Solve A'* x = s*b (Transpose) +* = 'C': Solve A'* x = s*b (Conjugate transpose = Transpose) +* +* DIAG (input) CHARACTER*1 +* Specifies whether or not the matrix A is unit triangular. +* = 'N': Non-unit triangular +* = 'U': Unit triangular +* +* NORMIN (input) CHARACTER*1 +* Specifies whether CNORM has been set or not. +* = 'Y': CNORM contains the column norms on entry +* = 'N': CNORM is not set on entry. On exit, the norms will +* be computed and stored in CNORM. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input) REAL array, dimension (N*(N+1)/2) +* The upper or lower triangular matrix A, packed columnwise in +* a linear array. The j-th column of A is stored in the array +* AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* +* X (input/output) REAL array, dimension (N) +* On entry, the right hand side b of the triangular system. +* On exit, X is overwritten by the solution vector x. +* +* SCALE (output) REAL +* The scaling factor s for the triangular system +* A * x = s*b or A'* x = s*b. +* If SCALE = 0, the matrix A is singular or badly scaled, and +* the vector x is an exact or approximate solution to A*x = 0. +* +* CNORM (input or output) REAL array, dimension (N) +* +* If NORMIN = 'Y', CNORM is an input argument and CNORM(j) +* contains the norm of the off-diagonal part of the j-th column +* of A. If TRANS = 'N', CNORM(j) must be greater than or equal +* to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) +* must be greater than or equal to the 1-norm. +* +* If NORMIN = 'N', CNORM is an output argument and CNORM(j) +* returns the 1-norm of the offdiagonal part of the j-th column +* of A. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* Further Details +* ======= ======= +* +* A rough bound on x is computed; if that is less than overflow, STPSV +* is called, otherwise, specific code is used which checks for possible +* overflow or divide-by-zero at every operation. +* +* A columnwise scheme is used for solving A*x = b. The basic algorithm +* if A is lower triangular is +* +* x[1:n] := b[1:n] +* for j = 1, ..., n +* x(j) := x(j) / A(j,j) +* x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] +* end +* +* Define bounds on the components of x after j iterations of the loop: +* M(j) = bound on x[1:j] +* G(j) = bound on x[j+1:n] +* Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. +* +* Then for iteration j+1 we have +* M(j+1) <= G(j) / | A(j+1,j+1) | +* G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | +* <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) +* +* where CNORM(j+1) is greater than or equal to the infinity-norm of +* column j+1 of A, not counting the diagonal. Hence +* +* G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) +* 1<=i<=j +* and +* +* |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) +* 1<=i< j +* +* Since |x(j)| <= M(j), we use the Level 2 BLAS routine STPSV if the +* reciprocal of the largest M(j), j=1,..,n, is larger than +* max(underflow, 1/overflow). +* +* The bound on x(j) is also used to determine when a step in the +* columnwise method can be performed without fear of overflow. If +* the computed bound is greater than a large constant, x is scaled to +* prevent overflow, but if the bound overflows, x is set to 0, x(j) to +* 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. +* +* Similarly, a row-wise scheme is used to solve A'*x = b. The basic +* algorithm for A upper triangular is +* +* for j = 1, ..., n +* x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) +* end +* +* We simultaneously compute two bounds +* G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j +* M(j) = bound on x(i), 1<=i<=j +* +* The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we +* add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. +* Then the bound on x(j) is +* +* M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | +* +* <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) +* 1<=i<=j +* +* and we can safely call STPSV if 1/M(n) and 1/G(n) are both greater +* than max(underflow, 1/overflow). +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0E+0, HALF = 0.5E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + INTEGER I, IMAX, IP, J, JFIRST, JINC, JLAST, JLEN + REAL BIGNUM, GROW, REC, SMLNUM, SUMJ, TJJ, TJJS, + $ TMAX, TSCAL, USCAL, XBND, XJ, XMAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SASUM, SDOT, SLAMCH + EXTERNAL LSAME, ISAMAX, SASUM, SDOT, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SSCAL, STPSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* +* Test the input parameters. +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT. + $ LSAME( NORMIN, 'N' ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLATPS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine machine dependent parameters to control overflow. +* + SMLNUM = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' ) + BIGNUM = ONE / SMLNUM + SCALE = ONE +* + IF( LSAME( NORMIN, 'N' ) ) THEN +* +* Compute the 1-norm of each column, not including the diagonal. +* + IF( UPPER ) THEN +* +* A is upper triangular. +* + IP = 1 + DO 10 J = 1, N + CNORM( J ) = SASUM( J-1, AP( IP ), 1 ) + IP = IP + J + 10 CONTINUE + ELSE +* +* A is lower triangular. +* + IP = 1 + DO 20 J = 1, N - 1 + CNORM( J ) = SASUM( N-J, AP( IP+1 ), 1 ) + IP = IP + N - J + 1 + 20 CONTINUE + CNORM( N ) = ZERO + END IF + END IF +* +* Scale the column norms by TSCAL if the maximum element in CNORM is +* greater than BIGNUM. +* + IMAX = ISAMAX( N, CNORM, 1 ) + TMAX = CNORM( IMAX ) + IF( TMAX.LE.BIGNUM ) THEN + TSCAL = ONE + ELSE + TSCAL = ONE / ( SMLNUM*TMAX ) + CALL SSCAL( N, TSCAL, CNORM, 1 ) + END IF +* +* Compute a bound on the computed solution vector to see if the +* Level 2 BLAS routine STPSV can be used. +* + J = ISAMAX( N, X, 1 ) + XMAX = ABS( X( J ) ) + XBND = XMAX + IF( NOTRAN ) THEN +* +* Compute the growth in A * x = b. +* + IF( UPPER ) THEN + JFIRST = N + JLAST = 1 + JINC = -1 + ELSE + JFIRST = 1 + JLAST = N + JINC = 1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 50 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, G(0) = max{x(i), i=1,...,n}. +* + GROW = ONE / MAX( XBND, SMLNUM ) + XBND = GROW + IP = JFIRST*( JFIRST+1 ) / 2 + JLEN = N + DO 30 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 50 +* +* M(j) = G(j-1) / abs(A(j,j)) +* + TJJ = ABS( AP( IP ) ) + XBND = MIN( XBND, MIN( ONE, TJJ )*GROW ) + IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN +* +* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) +* + GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) ) + ELSE +* +* G(j) could overflow, set GROW to 0. +* + GROW = ZERO + END IF + IP = IP + JINC*JLEN + JLEN = JLEN - 1 + 30 CONTINUE + GROW = XBND + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) ) + DO 40 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 50 +* +* G(j) = G(j-1)*( 1 + CNORM(j) ) +* + GROW = GROW*( ONE / ( ONE+CNORM( J ) ) ) + 40 CONTINUE + END IF + 50 CONTINUE +* + ELSE +* +* Compute the growth in A' * x = b. +* + IF( UPPER ) THEN + JFIRST = 1 + JLAST = N + JINC = 1 + ELSE + JFIRST = N + JLAST = 1 + JINC = -1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 80 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, M(0) = max{x(i), i=1,...,n}. +* + GROW = ONE / MAX( XBND, SMLNUM ) + XBND = GROW + IP = JFIRST*( JFIRST+1 ) / 2 + JLEN = 1 + DO 60 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 80 +* +* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) +* + XJ = ONE + CNORM( J ) + GROW = MIN( GROW, XBND / XJ ) +* +* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) +* + TJJ = ABS( AP( IP ) ) + IF( XJ.GT.TJJ ) + $ XBND = XBND*( TJJ / XJ ) + JLEN = JLEN + 1 + IP = IP + JINC*JLEN + 60 CONTINUE + GROW = MIN( GROW, XBND ) + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) ) + DO 70 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 80 +* +* G(j) = ( 1 + CNORM(j) )*G(j-1) +* + XJ = ONE + CNORM( J ) + GROW = GROW / XJ + 70 CONTINUE + END IF + 80 CONTINUE + END IF +* + IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN +* +* Use the Level 2 BLAS solve if the reciprocal of the bound on +* elements of X is not too small. +* + CALL STPSV( UPLO, TRANS, DIAG, N, AP, X, 1 ) + ELSE +* +* Use a Level 1 BLAS solve, scaling intermediate results. +* + IF( XMAX.GT.BIGNUM ) THEN +* +* Scale X so that its components are less than or equal to +* BIGNUM in absolute value. +* + SCALE = BIGNUM / XMAX + CALL SSCAL( N, SCALE, X, 1 ) + XMAX = BIGNUM + END IF +* + IF( NOTRAN ) THEN +* +* Solve A * x = b +* + IP = JFIRST*( JFIRST+1 ) / 2 + DO 100 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) / A(j,j), scaling x if necessary. +* + XJ = ABS( X( J ) ) + IF( NOUNIT ) THEN + TJJS = AP( IP )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 95 + END IF + TJJ = ABS( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by 1/b(j). +* + REC = ONE / XJ + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = X( J ) / TJJS + XJ = ABS( X( J ) ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM +* to avoid overflow when dividing by A(j,j). +* + REC = ( TJJ*BIGNUM ) / XJ + IF( CNORM( J ).GT.ONE ) THEN +* +* Scale by 1/CNORM(j) to avoid overflow when +* multiplying x(j) times column j. +* + REC = REC / CNORM( J ) + END IF + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = X( J ) / TJJS + XJ = ABS( X( J ) ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A*x = 0. +* + DO 90 I = 1, N + X( I ) = ZERO + 90 CONTINUE + X( J ) = ONE + XJ = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 95 CONTINUE +* +* Scale x if necessary to avoid overflow when adding a +* multiple of column j of A. +* + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN +* +* Scale x by 1/(2*abs(x(j))). +* + REC = REC*HALF + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + END IF + ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN +* +* Scale x by 1/2. +* + CALL SSCAL( N, HALF, X, 1 ) + SCALE = SCALE*HALF + END IF +* + IF( UPPER ) THEN + IF( J.GT.1 ) THEN +* +* Compute the update +* x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) +* + CALL SAXPY( J-1, -X( J )*TSCAL, AP( IP-J+1 ), 1, X, + $ 1 ) + I = ISAMAX( J-1, X, 1 ) + XMAX = ABS( X( I ) ) + END IF + IP = IP - J + ELSE + IF( J.LT.N ) THEN +* +* Compute the update +* x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) +* + CALL SAXPY( N-J, -X( J )*TSCAL, AP( IP+1 ), 1, + $ X( J+1 ), 1 ) + I = J + ISAMAX( N-J, X( J+1 ), 1 ) + XMAX = ABS( X( I ) ) + END IF + IP = IP + N - J + 1 + END IF + 100 CONTINUE +* + ELSE +* +* Solve A' * x = b +* + IP = JFIRST*( JFIRST+1 ) / 2 + JLEN = 1 + DO 140 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = ABS( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = AP( IP )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = ABS( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = USCAL / TJJS + END IF + IF( REC.LT.ONE ) THEN + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + SUMJ = ZERO + IF( USCAL.EQ.ONE ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call SDOT to perform the dot product. +* + IF( UPPER ) THEN + SUMJ = SDOT( J-1, AP( IP-J+1 ), 1, X, 1 ) + ELSE IF( J.LT.N ) THEN + SUMJ = SDOT( N-J, AP( IP+1 ), 1, X( J+1 ), 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + DO 110 I = 1, J - 1 + SUMJ = SUMJ + ( AP( IP-J+I )*USCAL )*X( I ) + 110 CONTINUE + ELSE IF( J.LT.N ) THEN + DO 120 I = 1, N - J + SUMJ = SUMJ + ( AP( IP+I )*USCAL )*X( J+I ) + 120 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.TSCAL ) THEN +* +* Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - SUMJ + XJ = ABS( X( J ) ) + IF( NOUNIT ) THEN +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJS = AP( IP )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 135 + END IF + TJJ = ABS( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = X( J ) / TJJS + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = X( J ) / TJJS + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A'*x = 0. +* + DO 130 I = 1, N + X( I ) = ZERO + 130 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 135 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - sumj if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = X( J ) / TJJS - SUMJ + END IF + XMAX = MAX( XMAX, ABS( X( J ) ) ) + JLEN = JLEN + 1 + IP = IP + JINC*JLEN + 140 CONTINUE + END IF + SCALE = SCALE / TSCAL + END IF +* +* Scale the column norms by 1/TSCAL for return. +* + IF( TSCAL.NE.ONE ) THEN + CALL SSCAL( N, ONE / TSCAL, CNORM, 1 ) + END IF +* + RETURN +* +* End of SLATPS +* + END diff --git a/costa/native/external/lapack/slatrd.f b/costa/native/external/lapack/slatrd.f new file mode 100644 index 000000000..065814879 --- /dev/null +++ b/costa/native/external/lapack/slatrd.f @@ -0,0 +1,259 @@ + SUBROUTINE SLATRD( UPLO, N, NB, A, LDA, E, TAU, W, LDW ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER LDA, LDW, N, NB +* .. +* .. Array Arguments .. + REAL A( LDA, * ), E( * ), TAU( * ), W( LDW, * ) +* .. +* +* Purpose +* ======= +* +* SLATRD reduces NB rows and columns of a real symmetric matrix A to +* symmetric tridiagonal form by an orthogonal similarity +* transformation Q' * A * Q, and returns the matrices V and W which are +* needed to apply the transformation to the unreduced part of A. +* +* If UPLO = 'U', SLATRD reduces the last NB rows and columns of a +* matrix, of which the upper triangle is supplied; +* if UPLO = 'L', SLATRD reduces the first NB rows and columns of a +* matrix, of which the lower triangle is supplied. +* +* This is an auxiliary routine called by SSYTRD. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is stored: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. +* +* NB (input) INTEGER +* The number of rows and columns to be reduced. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the symmetric matrix A. If UPLO = 'U', the leading +* n-by-n upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n-by-n lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* On exit: +* if UPLO = 'U', the last NB columns have been reduced to +* tridiagonal form, with the diagonal elements overwriting +* the diagonal elements of A; the elements above the diagonal +* with the array TAU, represent the orthogonal matrix Q as a +* product of elementary reflectors; +* if UPLO = 'L', the first NB columns have been reduced to +* tridiagonal form, with the diagonal elements overwriting +* the diagonal elements of A; the elements below the diagonal +* with the array TAU, represent the orthogonal matrix Q as a +* product of elementary reflectors. +* See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= (1,N). +* +* E (output) REAL array, dimension (N-1) +* If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal +* elements of the last NB columns of the reduced matrix; +* if UPLO = 'L', E(1:nb) contains the subdiagonal elements of +* the first NB columns of the reduced matrix. +* +* TAU (output) REAL array, dimension (N-1) +* The scalar factors of the elementary reflectors, stored in +* TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'. +* See Further Details. +* +* W (output) REAL array, dimension (LDW,NB) +* The n-by-nb matrix W required to update the unreduced part +* of A. +* +* LDW (input) INTEGER +* The leading dimension of the array W. LDW >= max(1,N). +* +* Further Details +* =============== +* +* If UPLO = 'U', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(n) H(n-1) . . . H(n-nb+1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i), +* and tau in TAU(i-1). +* +* If UPLO = 'L', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(1) H(2) . . . H(nb). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), +* and tau in TAU(i). +* +* The elements of the vectors v together form the n-by-nb matrix V +* which is needed, with W, to apply the transformation to the unreduced +* part of the matrix, using a symmetric rank-2k update of the form: +* A := A - V*W' - W*V'. +* +* The contents of A on exit are illustrated by the following examples +* with n = 5 and nb = 2: +* +* if UPLO = 'U': if UPLO = 'L': +* +* ( a a a v4 v5 ) ( d ) +* ( a a v4 v5 ) ( 1 d ) +* ( a 1 v5 ) ( v1 1 a ) +* ( d 1 ) ( v1 v2 a a ) +* ( d ) ( v1 v2 a a a ) +* +* where d denotes a diagonal element of the reduced matrix, a denotes +* an element of the original matrix that is unchanged, and vi denotes +* an element of the vector defining H(i). +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, HALF + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, HALF = 0.5E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IW + REAL ALPHA +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SGEMV, SLARFG, SSCAL, SSYMV +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SDOT + EXTERNAL LSAME, SDOT +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Reduce last NB columns of upper triangle +* + DO 10 I = N, N - NB + 1, -1 + IW = I - N + NB + IF( I.LT.N ) THEN +* +* Update A(1:i,i) +* + CALL SGEMV( 'No transpose', I, N-I, -ONE, A( 1, I+1 ), + $ LDA, W( I, IW+1 ), LDW, ONE, A( 1, I ), 1 ) + CALL SGEMV( 'No transpose', I, N-I, -ONE, W( 1, IW+1 ), + $ LDW, A( I, I+1 ), LDA, ONE, A( 1, I ), 1 ) + END IF + IF( I.GT.1 ) THEN +* +* Generate elementary reflector H(i) to annihilate +* A(1:i-2,i) +* + CALL SLARFG( I-1, A( I-1, I ), A( 1, I ), 1, TAU( I-1 ) ) + E( I-1 ) = A( I-1, I ) + A( I-1, I ) = ONE +* +* Compute W(1:i-1,i) +* + CALL SSYMV( 'Upper', I-1, ONE, A, LDA, A( 1, I ), 1, + $ ZERO, W( 1, IW ), 1 ) + IF( I.LT.N ) THEN + CALL SGEMV( 'Transpose', I-1, N-I, ONE, W( 1, IW+1 ), + $ LDW, A( 1, I ), 1, ZERO, W( I+1, IW ), 1 ) + CALL SGEMV( 'No transpose', I-1, N-I, -ONE, + $ A( 1, I+1 ), LDA, W( I+1, IW ), 1, ONE, + $ W( 1, IW ), 1 ) + CALL SGEMV( 'Transpose', I-1, N-I, ONE, A( 1, I+1 ), + $ LDA, A( 1, I ), 1, ZERO, W( I+1, IW ), 1 ) + CALL SGEMV( 'No transpose', I-1, N-I, -ONE, + $ W( 1, IW+1 ), LDW, W( I+1, IW ), 1, ONE, + $ W( 1, IW ), 1 ) + END IF + CALL SSCAL( I-1, TAU( I-1 ), W( 1, IW ), 1 ) + ALPHA = -HALF*TAU( I-1 )*SDOT( I-1, W( 1, IW ), 1, + $ A( 1, I ), 1 ) + CALL SAXPY( I-1, ALPHA, A( 1, I ), 1, W( 1, IW ), 1 ) + END IF +* + 10 CONTINUE + ELSE +* +* Reduce first NB columns of lower triangle +* + DO 20 I = 1, NB +* +* Update A(i:n,i) +* + CALL SGEMV( 'No transpose', N-I+1, I-1, -ONE, A( I, 1 ), + $ LDA, W( I, 1 ), LDW, ONE, A( I, I ), 1 ) + CALL SGEMV( 'No transpose', N-I+1, I-1, -ONE, W( I, 1 ), + $ LDW, A( I, 1 ), LDA, ONE, A( I, I ), 1 ) + IF( I.LT.N ) THEN +* +* Generate elementary reflector H(i) to annihilate +* A(i+2:n,i) +* + CALL SLARFG( N-I, A( I+1, I ), A( MIN( I+2, N ), I ), 1, + $ TAU( I ) ) + E( I ) = A( I+1, I ) + A( I+1, I ) = ONE +* +* Compute W(i+1:n,i) +* + CALL SSYMV( 'Lower', N-I, ONE, A( I+1, I+1 ), LDA, + $ A( I+1, I ), 1, ZERO, W( I+1, I ), 1 ) + CALL SGEMV( 'Transpose', N-I, I-1, ONE, W( I+1, 1 ), LDW, + $ A( I+1, I ), 1, ZERO, W( 1, I ), 1 ) + CALL SGEMV( 'No transpose', N-I, I-1, -ONE, A( I+1, 1 ), + $ LDA, W( 1, I ), 1, ONE, W( I+1, I ), 1 ) + CALL SGEMV( 'Transpose', N-I, I-1, ONE, A( I+1, 1 ), LDA, + $ A( I+1, I ), 1, ZERO, W( 1, I ), 1 ) + CALL SGEMV( 'No transpose', N-I, I-1, -ONE, W( I+1, 1 ), + $ LDW, W( 1, I ), 1, ONE, W( I+1, I ), 1 ) + CALL SSCAL( N-I, TAU( I ), W( I+1, I ), 1 ) + ALPHA = -HALF*TAU( I )*SDOT( N-I, W( I+1, I ), 1, + $ A( I+1, I ), 1 ) + CALL SAXPY( N-I, ALPHA, A( I+1, I ), 1, W( I+1, I ), 1 ) + END IF +* + 20 CONTINUE + END IF +* + RETURN +* +* End of SLATRD +* + END diff --git a/costa/native/external/lapack/slatrs.f b/costa/native/external/lapack/slatrs.f new file mode 100644 index 000000000..aa4961a0c --- /dev/null +++ b/costa/native/external/lapack/slatrs.f @@ -0,0 +1,702 @@ + SUBROUTINE SLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, LDA, X, SCALE, + $ CNORM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1992 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORMIN, TRANS, UPLO + INTEGER INFO, LDA, N + REAL SCALE +* .. +* .. Array Arguments .. + REAL A( LDA, * ), CNORM( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* SLATRS solves one of the triangular systems +* +* A *x = s*b or A'*x = s*b +* +* with scaling to prevent overflow. Here A is an upper or lower +* triangular matrix, A' denotes the transpose of A, x and b are +* n-element vectors, and s is a scaling factor, usually less than +* or equal to 1, chosen so that the components of x will be less than +* the overflow threshold. If the unscaled problem will not cause +* overflow, the Level 2 BLAS routine STRSV is called. If the matrix A +* is singular (A(j,j) = 0 for some j), then s is set to 0 and a +* non-trivial solution to A*x = 0 is returned. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the matrix A is upper or lower triangular. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* TRANS (input) CHARACTER*1 +* Specifies the operation applied to A. +* = 'N': Solve A * x = s*b (No transpose) +* = 'T': Solve A'* x = s*b (Transpose) +* = 'C': Solve A'* x = s*b (Conjugate transpose = Transpose) +* +* DIAG (input) CHARACTER*1 +* Specifies whether or not the matrix A is unit triangular. +* = 'N': Non-unit triangular +* = 'U': Unit triangular +* +* NORMIN (input) CHARACTER*1 +* Specifies whether CNORM has been set or not. +* = 'Y': CNORM contains the column norms on entry +* = 'N': CNORM is not set on entry. On exit, the norms will +* be computed and stored in CNORM. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input) REAL array, dimension (LDA,N) +* The triangular matrix A. If UPLO = 'U', the leading n by n +* upper triangular part of the array A contains the upper +* triangular matrix, and the strictly lower triangular part of +* A is not referenced. If UPLO = 'L', the leading n by n lower +* triangular part of the array A contains the lower triangular +* matrix, and the strictly upper triangular part of A is not +* referenced. If DIAG = 'U', the diagonal elements of A are +* also not referenced and are assumed to be 1. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max (1,N). +* +* X (input/output) REAL array, dimension (N) +* On entry, the right hand side b of the triangular system. +* On exit, X is overwritten by the solution vector x. +* +* SCALE (output) REAL +* The scaling factor s for the triangular system +* A * x = s*b or A'* x = s*b. +* If SCALE = 0, the matrix A is singular or badly scaled, and +* the vector x is an exact or approximate solution to A*x = 0. +* +* CNORM (input or output) REAL array, dimension (N) +* +* If NORMIN = 'Y', CNORM is an input argument and CNORM(j) +* contains the norm of the off-diagonal part of the j-th column +* of A. If TRANS = 'N', CNORM(j) must be greater than or equal +* to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) +* must be greater than or equal to the 1-norm. +* +* If NORMIN = 'N', CNORM is an output argument and CNORM(j) +* returns the 1-norm of the offdiagonal part of the j-th column +* of A. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* Further Details +* ======= ======= +* +* A rough bound on x is computed; if that is less than overflow, STRSV +* is called, otherwise, specific code is used which checks for possible +* overflow or divide-by-zero at every operation. +* +* A columnwise scheme is used for solving A*x = b. The basic algorithm +* if A is lower triangular is +* +* x[1:n] := b[1:n] +* for j = 1, ..., n +* x(j) := x(j) / A(j,j) +* x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] +* end +* +* Define bounds on the components of x after j iterations of the loop: +* M(j) = bound on x[1:j] +* G(j) = bound on x[j+1:n] +* Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. +* +* Then for iteration j+1 we have +* M(j+1) <= G(j) / | A(j+1,j+1) | +* G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | +* <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) +* +* where CNORM(j+1) is greater than or equal to the infinity-norm of +* column j+1 of A, not counting the diagonal. Hence +* +* G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) +* 1<=i<=j +* and +* +* |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) +* 1<=i< j +* +* Since |x(j)| <= M(j), we use the Level 2 BLAS routine STRSV if the +* reciprocal of the largest M(j), j=1,..,n, is larger than +* max(underflow, 1/overflow). +* +* The bound on x(j) is also used to determine when a step in the +* columnwise method can be performed without fear of overflow. If +* the computed bound is greater than a large constant, x is scaled to +* prevent overflow, but if the bound overflows, x is set to 0, x(j) to +* 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. +* +* Similarly, a row-wise scheme is used to solve A'*x = b. The basic +* algorithm for A upper triangular is +* +* for j = 1, ..., n +* x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) +* end +* +* We simultaneously compute two bounds +* G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j +* M(j) = bound on x(i), 1<=i<=j +* +* The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we +* add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. +* Then the bound on x(j) is +* +* M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | +* +* <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) +* 1<=i<=j +* +* and we can safely call STRSV if 1/M(n) and 1/G(n) are both greater +* than max(underflow, 1/overflow). +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0E+0, HALF = 0.5E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + INTEGER I, IMAX, J, JFIRST, JINC, JLAST + REAL BIGNUM, GROW, REC, SMLNUM, SUMJ, TJJ, TJJS, + $ TMAX, TSCAL, USCAL, XBND, XJ, XMAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SASUM, SDOT, SLAMCH + EXTERNAL LSAME, ISAMAX, SASUM, SDOT, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SSCAL, STRSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* +* Test the input parameters. +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT. + $ LSAME( NORMIN, 'N' ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLATRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine machine dependent parameters to control overflow. +* + SMLNUM = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' ) + BIGNUM = ONE / SMLNUM + SCALE = ONE +* + IF( LSAME( NORMIN, 'N' ) ) THEN +* +* Compute the 1-norm of each column, not including the diagonal. +* + IF( UPPER ) THEN +* +* A is upper triangular. +* + DO 10 J = 1, N + CNORM( J ) = SASUM( J-1, A( 1, J ), 1 ) + 10 CONTINUE + ELSE +* +* A is lower triangular. +* + DO 20 J = 1, N - 1 + CNORM( J ) = SASUM( N-J, A( J+1, J ), 1 ) + 20 CONTINUE + CNORM( N ) = ZERO + END IF + END IF +* +* Scale the column norms by TSCAL if the maximum element in CNORM is +* greater than BIGNUM. +* + IMAX = ISAMAX( N, CNORM, 1 ) + TMAX = CNORM( IMAX ) + IF( TMAX.LE.BIGNUM ) THEN + TSCAL = ONE + ELSE + TSCAL = ONE / ( SMLNUM*TMAX ) + CALL SSCAL( N, TSCAL, CNORM, 1 ) + END IF +* +* Compute a bound on the computed solution vector to see if the +* Level 2 BLAS routine STRSV can be used. +* + J = ISAMAX( N, X, 1 ) + XMAX = ABS( X( J ) ) + XBND = XMAX + IF( NOTRAN ) THEN +* +* Compute the growth in A * x = b. +* + IF( UPPER ) THEN + JFIRST = N + JLAST = 1 + JINC = -1 + ELSE + JFIRST = 1 + JLAST = N + JINC = 1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 50 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, G(0) = max{x(i), i=1,...,n}. +* + GROW = ONE / MAX( XBND, SMLNUM ) + XBND = GROW + DO 30 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 50 +* +* M(j) = G(j-1) / abs(A(j,j)) +* + TJJ = ABS( A( J, J ) ) + XBND = MIN( XBND, MIN( ONE, TJJ )*GROW ) + IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN +* +* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) +* + GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) ) + ELSE +* +* G(j) could overflow, set GROW to 0. +* + GROW = ZERO + END IF + 30 CONTINUE + GROW = XBND + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) ) + DO 40 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 50 +* +* G(j) = G(j-1)*( 1 + CNORM(j) ) +* + GROW = GROW*( ONE / ( ONE+CNORM( J ) ) ) + 40 CONTINUE + END IF + 50 CONTINUE +* + ELSE +* +* Compute the growth in A' * x = b. +* + IF( UPPER ) THEN + JFIRST = 1 + JLAST = N + JINC = 1 + ELSE + JFIRST = N + JLAST = 1 + JINC = -1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 80 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, M(0) = max{x(i), i=1,...,n}. +* + GROW = ONE / MAX( XBND, SMLNUM ) + XBND = GROW + DO 60 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 80 +* +* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) +* + XJ = ONE + CNORM( J ) + GROW = MIN( GROW, XBND / XJ ) +* +* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) +* + TJJ = ABS( A( J, J ) ) + IF( XJ.GT.TJJ ) + $ XBND = XBND*( TJJ / XJ ) + 60 CONTINUE + GROW = MIN( GROW, XBND ) + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) ) + DO 70 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 80 +* +* G(j) = ( 1 + CNORM(j) )*G(j-1) +* + XJ = ONE + CNORM( J ) + GROW = GROW / XJ + 70 CONTINUE + END IF + 80 CONTINUE + END IF +* + IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN +* +* Use the Level 2 BLAS solve if the reciprocal of the bound on +* elements of X is not too small. +* + CALL STRSV( UPLO, TRANS, DIAG, N, A, LDA, X, 1 ) + ELSE +* +* Use a Level 1 BLAS solve, scaling intermediate results. +* + IF( XMAX.GT.BIGNUM ) THEN +* +* Scale X so that its components are less than or equal to +* BIGNUM in absolute value. +* + SCALE = BIGNUM / XMAX + CALL SSCAL( N, SCALE, X, 1 ) + XMAX = BIGNUM + END IF +* + IF( NOTRAN ) THEN +* +* Solve A * x = b +* + DO 100 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) / A(j,j), scaling x if necessary. +* + XJ = ABS( X( J ) ) + IF( NOUNIT ) THEN + TJJS = A( J, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 95 + END IF + TJJ = ABS( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by 1/b(j). +* + REC = ONE / XJ + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = X( J ) / TJJS + XJ = ABS( X( J ) ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM +* to avoid overflow when dividing by A(j,j). +* + REC = ( TJJ*BIGNUM ) / XJ + IF( CNORM( J ).GT.ONE ) THEN +* +* Scale by 1/CNORM(j) to avoid overflow when +* multiplying x(j) times column j. +* + REC = REC / CNORM( J ) + END IF + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = X( J ) / TJJS + XJ = ABS( X( J ) ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A*x = 0. +* + DO 90 I = 1, N + X( I ) = ZERO + 90 CONTINUE + X( J ) = ONE + XJ = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 95 CONTINUE +* +* Scale x if necessary to avoid overflow when adding a +* multiple of column j of A. +* + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN +* +* Scale x by 1/(2*abs(x(j))). +* + REC = REC*HALF + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + END IF + ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN +* +* Scale x by 1/2. +* + CALL SSCAL( N, HALF, X, 1 ) + SCALE = SCALE*HALF + END IF +* + IF( UPPER ) THEN + IF( J.GT.1 ) THEN +* +* Compute the update +* x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) +* + CALL SAXPY( J-1, -X( J )*TSCAL, A( 1, J ), 1, X, + $ 1 ) + I = ISAMAX( J-1, X, 1 ) + XMAX = ABS( X( I ) ) + END IF + ELSE + IF( J.LT.N ) THEN +* +* Compute the update +* x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) +* + CALL SAXPY( N-J, -X( J )*TSCAL, A( J+1, J ), 1, + $ X( J+1 ), 1 ) + I = J + ISAMAX( N-J, X( J+1 ), 1 ) + XMAX = ABS( X( I ) ) + END IF + END IF + 100 CONTINUE +* + ELSE +* +* Solve A' * x = b +* + DO 140 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = ABS( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = A( J, J )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = ABS( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = USCAL / TJJS + END IF + IF( REC.LT.ONE ) THEN + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + SUMJ = ZERO + IF( USCAL.EQ.ONE ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call SDOT to perform the dot product. +* + IF( UPPER ) THEN + SUMJ = SDOT( J-1, A( 1, J ), 1, X, 1 ) + ELSE IF( J.LT.N ) THEN + SUMJ = SDOT( N-J, A( J+1, J ), 1, X( J+1 ), 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + DO 110 I = 1, J - 1 + SUMJ = SUMJ + ( A( I, J )*USCAL )*X( I ) + 110 CONTINUE + ELSE IF( J.LT.N ) THEN + DO 120 I = J + 1, N + SUMJ = SUMJ + ( A( I, J )*USCAL )*X( I ) + 120 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.TSCAL ) THEN +* +* Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - SUMJ + XJ = ABS( X( J ) ) + IF( NOUNIT ) THEN + TJJS = A( J, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 135 + END IF +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJ = ABS( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = X( J ) / TJJS + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = X( J ) / TJJS + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A'*x = 0. +* + DO 130 I = 1, N + X( I ) = ZERO + 130 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 135 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - sumj if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = X( J ) / TJJS - SUMJ + END IF + XMAX = MAX( XMAX, ABS( X( J ) ) ) + 140 CONTINUE + END IF + SCALE = SCALE / TSCAL + END IF +* +* Scale the column norms by 1/TSCAL for return. +* + IF( TSCAL.NE.ONE ) THEN + CALL SSCAL( N, ONE / TSCAL, CNORM, 1 ) + END IF +* + RETURN +* +* End of SLATRS +* + END diff --git a/costa/native/external/lapack/slatrz.f b/costa/native/external/lapack/slatrz.f new file mode 100644 index 000000000..70cb31e93 --- /dev/null +++ b/costa/native/external/lapack/slatrz.f @@ -0,0 +1,128 @@ + SUBROUTINE SLATRZ( M, N, L, A, LDA, TAU, WORK ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER L, LDA, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SLATRZ factors the M-by-(M+L) real upper trapezoidal matrix +* [ A1 A2 ] = [ A(1:M,1:M) A(1:M,N-L+1:N) ] as ( R 0 ) * Z, by means +* of orthogonal transformations. Z is an (M+L)-by-(M+L) orthogonal +* matrix and, R and A1 are M-by-M upper triangular matrices. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* L (input) INTEGER +* The number of columns of the matrix A containing the +* meaningful part of the Householder vectors. N-M >= L >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the leading M-by-N upper trapezoidal part of the +* array A must contain the matrix to be factorized. +* On exit, the leading M-by-M upper triangular part of A +* contains the upper triangular matrix R, and elements N-L+1 to +* N of the first M rows of A, with the array TAU, represent the +* orthogonal matrix Z as a product of M elementary reflectors. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) REAL array, dimension (M) +* The scalar factors of the elementary reflectors. +* +* WORK (workspace) REAL array, dimension (M) +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* The factorization is obtained by Householder's method. The kth +* transformation matrix, Z( k ), which is used to introduce zeros into +* the ( m - k + 1 )th row of A, is given in the form +* +* Z( k ) = ( I 0 ), +* ( 0 T( k ) ) +* +* where +* +* T( k ) = I - tau*u( k )*u( k )', u( k ) = ( 1 ), +* ( 0 ) +* ( z( k ) ) +* +* tau is a scalar and z( k ) is an l element vector. tau and z( k ) +* are chosen to annihilate the elements of the kth row of A2. +* +* The scalar tau is returned in the kth element of TAU and the vector +* u( k ) in the kth row of A2, such that the elements of z( k ) are +* in a( k, l + 1 ), ..., a( k, n ). The elements of R are returned in +* the upper triangular part of A1. +* +* Z is given by +* +* Z = Z( 1 ) * Z( 2 ) * ... * Z( m ). +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I +* .. +* .. External Subroutines .. + EXTERNAL SLARFG, SLARZ +* .. +* .. Executable Statements .. +* +* Test the input arguments +* +* Quick return if possible +* + IF( M.EQ.0 ) THEN + RETURN + ELSE IF( M.EQ.N ) THEN + DO 10 I = 1, N + TAU( I ) = ZERO + 10 CONTINUE + RETURN + END IF +* + DO 20 I = M, 1, -1 +* +* Generate elementary reflector H(i) to annihilate +* [ A(i,i) A(i,n-l+1:n) ] +* + CALL SLARFG( L+1, A( I, I ), A( I, N-L+1 ), LDA, TAU( I ) ) +* +* Apply H(i) to A(1:i-1,i:n) from the right +* + CALL SLARZ( 'Right', I-1, N-I+1, L, A( I, N-L+1 ), LDA, + $ TAU( I ), A( 1, I ), LDA, WORK ) +* + 20 CONTINUE +* + RETURN +* +* End of SLATRZ +* + END diff --git a/costa/native/external/lapack/slatzm.f b/costa/native/external/lapack/slatzm.f new file mode 100644 index 000000000..6f6886bf2 --- /dev/null +++ b/costa/native/external/lapack/slatzm.f @@ -0,0 +1,143 @@ + SUBROUTINE SLATZM( SIDE, M, N, V, INCV, TAU, C1, C2, LDC, WORK ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, LDC, M, N + REAL TAU +* .. +* .. Array Arguments .. + REAL C1( LDC, * ), C2( LDC, * ), V( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* This routine is deprecated and has been replaced by routine SORMRZ. +* +* SLATZM applies a Householder matrix generated by STZRQF to a matrix. +* +* Let P = I - tau*u*u', u = ( 1 ), +* ( v ) +* where v is an (m-1) vector if SIDE = 'L', or a (n-1) vector if +* SIDE = 'R'. +* +* If SIDE equals 'L', let +* C = [ C1 ] 1 +* [ C2 ] m-1 +* n +* Then C is overwritten by P*C. +* +* If SIDE equals 'R', let +* C = [ C1, C2 ] m +* 1 n-1 +* Then C is overwritten by C*P. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': form P * C +* = 'R': form C * P +* +* M (input) INTEGER +* The number of rows of the matrix C. +* +* N (input) INTEGER +* The number of columns of the matrix C. +* +* V (input) REAL array, dimension +* (1 + (M-1)*abs(INCV)) if SIDE = 'L' +* (1 + (N-1)*abs(INCV)) if SIDE = 'R' +* The vector v in the representation of P. V is not used +* if TAU = 0. +* +* INCV (input) INTEGER +* The increment between elements of v. INCV <> 0 +* +* TAU (input) REAL +* The value tau in the representation of P. +* +* C1 (input/output) REAL array, dimension +* (LDC,N) if SIDE = 'L' +* (M,1) if SIDE = 'R' +* On entry, the n-vector C1 if SIDE = 'L', or the m-vector C1 +* if SIDE = 'R'. +* +* On exit, the first row of P*C if SIDE = 'L', or the first +* column of C*P if SIDE = 'R'. +* +* C2 (input/output) REAL array, dimension +* (LDC, N) if SIDE = 'L' +* (LDC, N-1) if SIDE = 'R' +* On entry, the (m - 1) x n matrix C2 if SIDE = 'L', or the +* m x (n - 1) matrix C2 if SIDE = 'R'. +* +* On exit, rows 2:m of P*C if SIDE = 'L', or columns 2:m of C*P +* if SIDE = 'R'. +* +* LDC (input) INTEGER +* The leading dimension of the arrays C1 and C2. LDC >= (1,M). +* +* WORK (workspace) REAL array, dimension +* (N) if SIDE = 'L' +* (M) if SIDE = 'R' +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SGEMV, SGER +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( ( MIN( M, N ).EQ.0 ) .OR. ( TAU.EQ.ZERO ) ) + $ RETURN +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* w := C1 + v' * C2 +* + CALL SCOPY( N, C1, LDC, WORK, 1 ) + CALL SGEMV( 'Transpose', M-1, N, ONE, C2, LDC, V, INCV, ONE, + $ WORK, 1 ) +* +* [ C1 ] := [ C1 ] - tau* [ 1 ] * w' +* [ C2 ] [ C2 ] [ v ] +* + CALL SAXPY( N, -TAU, WORK, 1, C1, LDC ) + CALL SGER( M-1, N, -TAU, V, INCV, WORK, 1, C2, LDC ) +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* w := C1 + C2 * v +* + CALL SCOPY( M, C1, 1, WORK, 1 ) + CALL SGEMV( 'No transpose', M, N-1, ONE, C2, LDC, V, INCV, ONE, + $ WORK, 1 ) +* +* [ C1, C2 ] := [ C1, C2 ] - tau* w * [ 1 , v'] +* + CALL SAXPY( M, -TAU, WORK, 1, C1, 1 ) + CALL SGER( M, N-1, -TAU, WORK, 1, V, INCV, C2, LDC ) + END IF +* + RETURN +* +* End of SLATZM +* + END diff --git a/costa/native/external/lapack/slauu2.f b/costa/native/external/lapack/slauu2.f new file mode 100644 index 000000000..b666f50f1 --- /dev/null +++ b/costa/native/external/lapack/slauu2.f @@ -0,0 +1,136 @@ + SUBROUTINE SLAUU2( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* SLAUU2 computes the product U * U' or L' * L, where the triangular +* factor U or L is stored in the upper or lower triangular part of +* the array A. +* +* If UPLO = 'U' or 'u' then the upper triangle of the result is stored, +* overwriting the factor U in A. +* If UPLO = 'L' or 'l' then the lower triangle of the result is stored, +* overwriting the factor L in A. +* +* This is the unblocked form of the algorithm, calling Level 2 BLAS. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the triangular factor stored in the array A +* is upper or lower triangular: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the triangular factor U or L. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the triangular factor U or L. +* On exit, if UPLO = 'U', the upper triangle of A is +* overwritten with the upper triangle of the product U * U'; +* if UPLO = 'L', the lower triangle of A is overwritten with +* the lower triangle of the product L' * L. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I + REAL AII +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SDOT + EXTERNAL LSAME, SDOT +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLAUU2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Compute the product U * U'. +* + DO 10 I = 1, N + AII = A( I, I ) + IF( I.LT.N ) THEN + A( I, I ) = SDOT( N-I+1, A( I, I ), LDA, A( I, I ), LDA ) + CALL SGEMV( 'No transpose', I-1, N-I, ONE, A( 1, I+1 ), + $ LDA, A( I, I+1 ), LDA, AII, A( 1, I ), 1 ) + ELSE + CALL SSCAL( I, AII, A( 1, I ), 1 ) + END IF + 10 CONTINUE +* + ELSE +* +* Compute the product L' * L. +* + DO 20 I = 1, N + AII = A( I, I ) + IF( I.LT.N ) THEN + A( I, I ) = SDOT( N-I+1, A( I, I ), 1, A( I, I ), 1 ) + CALL SGEMV( 'Transpose', N-I, I-1, ONE, A( I+1, 1 ), LDA, + $ A( I+1, I ), 1, AII, A( I, 1 ), LDA ) + ELSE + CALL SSCAL( I, AII, A( I, 1 ), LDA ) + END IF + 20 CONTINUE + END IF +* + RETURN +* +* End of SLAUU2 +* + END diff --git a/costa/native/external/lapack/slauum.f b/costa/native/external/lapack/slauum.f new file mode 100644 index 000000000..646f34e0c --- /dev/null +++ b/costa/native/external/lapack/slauum.f @@ -0,0 +1,156 @@ + SUBROUTINE SLAUUM( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* SLAUUM computes the product U * U' or L' * L, where the triangular +* factor U or L is stored in the upper or lower triangular part of +* the array A. +* +* If UPLO = 'U' or 'u' then the upper triangle of the result is stored, +* overwriting the factor U in A. +* If UPLO = 'L' or 'l' then the lower triangle of the result is stored, +* overwriting the factor L in A. +* +* This is the blocked form of the algorithm, calling Level 3 BLAS. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the triangular factor stored in the array A +* is upper or lower triangular: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the triangular factor U or L. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the triangular factor U or L. +* On exit, if UPLO = 'U', the upper triangle of A is +* overwritten with the upper triangle of the product U * U'; +* if UPLO = 'L', the lower triangle of A is overwritten with +* the lower triangle of the product L' * L. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IB, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, SLAUU2, SSYRK, STRMM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLAUUM', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'SLAUUM', UPLO, N, -1, -1, -1 ) +* + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code +* + CALL SLAUU2( UPLO, N, A, LDA, INFO ) + ELSE +* +* Use blocked code +* + IF( UPPER ) THEN +* +* Compute the product U * U'. +* + DO 10 I = 1, N, NB + IB = MIN( NB, N-I+1 ) + CALL STRMM( 'Right', 'Upper', 'Transpose', 'Non-unit', + $ I-1, IB, ONE, A( I, I ), LDA, A( 1, I ), + $ LDA ) + CALL SLAUU2( 'Upper', IB, A( I, I ), LDA, INFO ) + IF( I+IB.LE.N ) THEN + CALL SGEMM( 'No transpose', 'Transpose', I-1, IB, + $ N-I-IB+1, ONE, A( 1, I+IB ), LDA, + $ A( I, I+IB ), LDA, ONE, A( 1, I ), LDA ) + CALL SSYRK( 'Upper', 'No transpose', IB, N-I-IB+1, + $ ONE, A( I, I+IB ), LDA, ONE, A( I, I ), + $ LDA ) + END IF + 10 CONTINUE + ELSE +* +* Compute the product L' * L. +* + DO 20 I = 1, N, NB + IB = MIN( NB, N-I+1 ) + CALL STRMM( 'Left', 'Lower', 'Transpose', 'Non-unit', IB, + $ I-1, ONE, A( I, I ), LDA, A( I, 1 ), LDA ) + CALL SLAUU2( 'Lower', IB, A( I, I ), LDA, INFO ) + IF( I+IB.LE.N ) THEN + CALL SGEMM( 'Transpose', 'No transpose', IB, I-1, + $ N-I-IB+1, ONE, A( I+IB, I ), LDA, + $ A( I+IB, 1 ), LDA, ONE, A( I, 1 ), LDA ) + CALL SSYRK( 'Lower', 'Transpose', IB, N-I-IB+1, ONE, + $ A( I+IB, I ), LDA, ONE, A( I, I ), LDA ) + END IF + 20 CONTINUE + END IF + END IF +* + RETURN +* +* End of SLAUUM +* + END diff --git a/costa/native/external/lapack/sopgtr.f b/costa/native/external/lapack/sopgtr.f new file mode 100644 index 000000000..4b98a5a9b --- /dev/null +++ b/costa/native/external/lapack/sopgtr.f @@ -0,0 +1,161 @@ + SUBROUTINE SOPGTR( UPLO, N, AP, TAU, Q, LDQ, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDQ, N +* .. +* .. Array Arguments .. + REAL AP( * ), Q( LDQ, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SOPGTR generates a real orthogonal matrix Q which is defined as the +* product of n-1 elementary reflectors H(i) of order n, as returned by +* SSPTRD using packed storage: +* +* if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), +* +* if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangular packed storage used in previous +* call to SSPTRD; +* = 'L': Lower triangular packed storage used in previous +* call to SSPTRD. +* +* N (input) INTEGER +* The order of the matrix Q. N >= 0. +* +* AP (input) REAL array, dimension (N*(N+1)/2) +* The vectors which define the elementary reflectors, as +* returned by SSPTRD. +* +* TAU (input) REAL array, dimension (N-1) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by SSPTRD. +* +* Q (output) REAL array, dimension (LDQ,N) +* The N-by-N orthogonal matrix Q. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= max(1,N). +* +* WORK (workspace) REAL array, dimension (N-1) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IINFO, IJ, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SORG2L, SORG2R, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SOPGTR', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Q was determined by a call to SSPTRD with UPLO = 'U' +* +* Unpack the vectors which define the elementary reflectors and +* set the last row and column of Q equal to those of the unit +* matrix +* + IJ = 2 + DO 20 J = 1, N - 1 + DO 10 I = 1, J - 1 + Q( I, J ) = AP( IJ ) + IJ = IJ + 1 + 10 CONTINUE + IJ = IJ + 2 + Q( N, J ) = ZERO + 20 CONTINUE + DO 30 I = 1, N - 1 + Q( I, N ) = ZERO + 30 CONTINUE + Q( N, N ) = ONE +* +* Generate Q(1:n-1,1:n-1) +* + CALL SORG2L( N-1, N-1, N-1, Q, LDQ, TAU, WORK, IINFO ) +* + ELSE +* +* Q was determined by a call to SSPTRD with UPLO = 'L'. +* +* Unpack the vectors which define the elementary reflectors and +* set the first row and column of Q equal to those of the unit +* matrix +* + Q( 1, 1 ) = ONE + DO 40 I = 2, N + Q( I, 1 ) = ZERO + 40 CONTINUE + IJ = 3 + DO 60 J = 2, N + Q( 1, J ) = ZERO + DO 50 I = J + 1, N + Q( I, J ) = AP( IJ ) + IJ = IJ + 1 + 50 CONTINUE + IJ = IJ + 2 + 60 CONTINUE + IF( N.GT.1 ) THEN +* +* Generate Q(2:n,2:n) +* + CALL SORG2R( N-1, N-1, N-1, Q( 2, 2 ), LDQ, TAU, WORK, + $ IINFO ) + END IF + END IF + RETURN +* +* End of SOPGTR +* + END diff --git a/costa/native/external/lapack/sopmtr.f b/costa/native/external/lapack/sopmtr.f new file mode 100644 index 000000000..a089d8f78 --- /dev/null +++ b/costa/native/external/lapack/sopmtr.f @@ -0,0 +1,258 @@ + SUBROUTINE SOPMTR( SIDE, UPLO, TRANS, M, N, AP, TAU, C, LDC, WORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS, UPLO + INTEGER INFO, LDC, M, N +* .. +* .. Array Arguments .. + REAL AP( * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SOPMTR overwrites the general real M-by-N matrix C with +* +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'T': Q**T * C C * Q**T +* +* where Q is a real orthogonal matrix of order nq, with nq = m if +* SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of +* nq-1 elementary reflectors, as returned by SSPTRD using packed +* storage: +* +* if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); +* +* if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q**T from the Left; +* = 'R': apply Q or Q**T from the Right. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangular packed storage used in previous +* call to SSPTRD; +* = 'L': Lower triangular packed storage used in previous +* call to SSPTRD. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q; +* = 'T': Transpose, apply Q**T. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* AP (input) REAL array, dimension +* (M*(M+1)/2) if SIDE = 'L' +* (N*(N+1)/2) if SIDE = 'R' +* The vectors which define the elementary reflectors, as +* returned by SSPTRD. AP is modified by the routine but +* restored on exit. +* +* TAU (input) REAL array, dimension (M-1) if SIDE = 'L' +* or (N-1) if SIDE = 'R' +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by SSPTRD. +* +* C (input/output) REAL array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) REAL array, dimension +* (N) if SIDE = 'L' +* (M) if SIDE = 'R' +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL FORWRD, LEFT, NOTRAN, UPPER + INTEGER I, I1, I2, I3, IC, II, JC, MI, NI, NQ + REAL AII +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLARF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + UPPER = LSAME( UPLO, 'U' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SOPMTR', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Q was determined by a call to SSPTRD with UPLO = 'U' +* + FORWRD = ( LEFT .AND. NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) +* + IF( FORWRD ) THEN + I1 = 1 + I2 = NQ - 1 + I3 = 1 + II = 2 + ELSE + I1 = NQ - 1 + I2 = 1 + I3 = -1 + II = NQ*( NQ+1 ) / 2 - 1 + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) is applied to C(1:i,1:n) +* + MI = I + ELSE +* +* H(i) is applied to C(1:m,1:i) +* + NI = I + END IF +* +* Apply H(i) +* + AII = AP( II ) + AP( II ) = ONE + CALL SLARF( SIDE, MI, NI, AP( II-I+1 ), 1, TAU( I ), C, LDC, + $ WORK ) + AP( II ) = AII +* + IF( FORWRD ) THEN + II = II + I + 2 + ELSE + II = II - I - 1 + END IF + 10 CONTINUE + ELSE +* +* Q was determined by a call to SSPTRD with UPLO = 'L'. +* + FORWRD = ( LEFT .AND. .NOT.NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. NOTRAN ) +* + IF( FORWRD ) THEN + I1 = 1 + I2 = NQ - 1 + I3 = 1 + II = 2 + ELSE + I1 = NQ - 1 + I2 = 1 + I3 = -1 + II = NQ*( NQ+1 ) / 2 - 1 + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 20 I = I1, I2, I3 + AII = AP( II ) + AP( II ) = ONE + IF( LEFT ) THEN +* +* H(i) is applied to C(i+1:m,1:n) +* + MI = M - I + IC = I + 1 + ELSE +* +* H(i) is applied to C(1:m,i+1:n) +* + NI = N - I + JC = I + 1 + END IF +* +* Apply H(i) +* + CALL SLARF( SIDE, MI, NI, AP( II ), 1, TAU( I ), + $ C( IC, JC ), LDC, WORK ) + AP( II ) = AII +* + IF( FORWRD ) THEN + II = II + NQ - I + 1 + ELSE + II = II - NQ + I - 2 + END IF + 20 CONTINUE + END IF + RETURN +* +* End of SOPMTR +* + END diff --git a/costa/native/external/lapack/sorg2l.f b/costa/native/external/lapack/sorg2l.f new file mode 100644 index 000000000..7a21a6740 --- /dev/null +++ b/costa/native/external/lapack/sorg2l.f @@ -0,0 +1,128 @@ + SUBROUTINE SORG2L( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SORG2L generates an m by n real matrix Q with orthonormal columns, +* which is defined as the last n columns of a product of k elementary +* reflectors of order m +* +* Q = H(k) . . . H(2) H(1) +* +* as returned by SGEQLF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. M >= N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. N >= K >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the (n-k+i)-th column must contain the vector which +* defines the elementary reflector H(i), for i = 1,2,...,k, as +* returned by SGEQLF in the last k columns of its array +* argument A. +* On exit, the m by n matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) REAL array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by SGEQLF. +* +* WORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, II, J, L +* .. +* .. External Subroutines .. + EXTERNAL SLARF, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORG2L', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* +* Initialise columns 1:n-k to columns of the unit matrix +* + DO 20 J = 1, N - K + DO 10 L = 1, M + A( L, J ) = ZERO + 10 CONTINUE + A( M-N+J, J ) = ONE + 20 CONTINUE +* + DO 40 I = 1, K + II = N - K + I +* +* Apply H(i) to A(1:m-k+i,1:n-k+i) from the left +* + A( M-N+II, II ) = ONE + CALL SLARF( 'Left', M-N+II, II-1, A( 1, II ), 1, TAU( I ), A, + $ LDA, WORK ) + CALL SSCAL( M-N+II-1, -TAU( I ), A( 1, II ), 1 ) + A( M-N+II, II ) = ONE - TAU( I ) +* +* Set A(m-k+i+1:m,n-k+i) to zero +* + DO 30 L = M - N + II + 1, M + A( L, II ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of SORG2L +* + END diff --git a/costa/native/external/lapack/sorg2r.f b/costa/native/external/lapack/sorg2r.f new file mode 100644 index 000000000..4af5506fc --- /dev/null +++ b/costa/native/external/lapack/sorg2r.f @@ -0,0 +1,130 @@ + SUBROUTINE SORG2R( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SORG2R generates an m by n real matrix Q with orthonormal columns, +* which is defined as the first n columns of a product of k elementary +* reflectors of order m +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by SGEQRF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. M >= N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. N >= K >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the i-th column must contain the vector which +* defines the elementary reflector H(i), for i = 1,2,...,k, as +* returned by SGEQRF in the first k columns of its array +* argument A. +* On exit, the m-by-n matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) REAL array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by SGEQRF. +* +* WORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, L +* .. +* .. External Subroutines .. + EXTERNAL SLARF, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORG2R', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* +* Initialise columns k+1:n to columns of the unit matrix +* + DO 20 J = K + 1, N + DO 10 L = 1, M + A( L, J ) = ZERO + 10 CONTINUE + A( J, J ) = ONE + 20 CONTINUE +* + DO 40 I = K, 1, -1 +* +* Apply H(i) to A(i:m,i:n) from the left +* + IF( I.LT.N ) THEN + A( I, I ) = ONE + CALL SLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ), + $ A( I, I+1 ), LDA, WORK ) + END IF + IF( I.LT.M ) + $ CALL SSCAL( M-I, -TAU( I ), A( I+1, I ), 1 ) + A( I, I ) = ONE - TAU( I ) +* +* Set A(1:i-1,i) to zero +* + DO 30 L = 1, I - 1 + A( L, I ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of SORG2R +* + END diff --git a/costa/native/external/lapack/sorgbr.f b/costa/native/external/lapack/sorgbr.f new file mode 100644 index 000000000..2f7052283 --- /dev/null +++ b/costa/native/external/lapack/sorgbr.f @@ -0,0 +1,245 @@ + SUBROUTINE SORGBR( VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER VECT + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SORGBR generates one of the real orthogonal matrices Q or P**T +* determined by SGEBRD when reducing a real matrix A to bidiagonal +* form: A = Q * B * P**T. Q and P**T are defined as products of +* elementary reflectors H(i) or G(i) respectively. +* +* If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q +* is of order M: +* if m >= k, Q = H(1) H(2) . . . H(k) and SORGBR returns the first n +* columns of Q, where m >= n >= k; +* if m < k, Q = H(1) H(2) . . . H(m-1) and SORGBR returns Q as an +* M-by-M matrix. +* +* If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**T +* is of order N: +* if k < n, P**T = G(k) . . . G(2) G(1) and SORGBR returns the first m +* rows of P**T, where n >= m >= k; +* if k >= n, P**T = G(n-1) . . . G(2) G(1) and SORGBR returns P**T as +* an N-by-N matrix. +* +* Arguments +* ========= +* +* VECT (input) CHARACTER*1 +* Specifies whether the matrix Q or the matrix P**T is +* required, as defined in the transformation applied by SGEBRD: +* = 'Q': generate Q; +* = 'P': generate P**T. +* +* M (input) INTEGER +* The number of rows of the matrix Q or P**T to be returned. +* M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q or P**T to be returned. +* N >= 0. +* If VECT = 'Q', M >= N >= min(M,K); +* if VECT = 'P', N >= M >= min(N,K). +* +* K (input) INTEGER +* If VECT = 'Q', the number of columns in the original M-by-K +* matrix reduced by SGEBRD. +* If VECT = 'P', the number of rows in the original K-by-N +* matrix reduced by SGEBRD. +* K >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the vectors which define the elementary reflectors, +* as returned by SGEBRD. +* On exit, the M-by-N matrix Q or P**T. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (input) REAL array, dimension +* (min(M,K)) if VECT = 'Q' +* (min(N,K)) if VECT = 'P' +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i) or G(i), which determines Q or P**T, as +* returned by SGEBRD in its array argument TAUQ or TAUP. +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,min(M,N)). +* For optimum performance LWORK >= min(M,N)*NB, where NB +* is the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WANTQ + INTEGER I, IINFO, J, LWKOPT, MN, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL SORGLQ, SORGQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + WANTQ = LSAME( VECT, 'Q' ) + MN = MIN( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.WANTQ .AND. .NOT.LSAME( VECT, 'P' ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 .OR. ( WANTQ .AND. ( N.GT.M .OR. N.LT.MIN( M, + $ K ) ) ) .OR. ( .NOT.WANTQ .AND. ( M.GT.N .OR. M.LT. + $ MIN( N, K ) ) ) ) THEN + INFO = -3 + ELSE IF( K.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LWORK.LT.MAX( 1, MN ) .AND. .NOT.LQUERY ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( WANTQ ) THEN + NB = ILAENV( 1, 'SORGQR', ' ', M, N, K, -1 ) + ELSE + NB = ILAENV( 1, 'SORGLQ', ' ', M, N, K, -1 ) + END IF + LWKOPT = MAX( 1, MN )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORGBR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( WANTQ ) THEN +* +* Form Q, determined by a call to SGEBRD to reduce an m-by-k +* matrix +* + IF( M.GE.K ) THEN +* +* If m >= k, assume m >= n >= k +* + CALL SORGQR( M, N, K, A, LDA, TAU, WORK, LWORK, IINFO ) +* + ELSE +* +* If m < k, assume m = n +* +* Shift the vectors which define the elementary reflectors one +* column to the right, and set the first row and column of Q +* to those of the unit matrix +* + DO 20 J = M, 2, -1 + A( 1, J ) = ZERO + DO 10 I = J + 1, M + A( I, J ) = A( I, J-1 ) + 10 CONTINUE + 20 CONTINUE + A( 1, 1 ) = ONE + DO 30 I = 2, M + A( I, 1 ) = ZERO + 30 CONTINUE + IF( M.GT.1 ) THEN +* +* Form Q(2:m,2:m) +* + CALL SORGQR( M-1, M-1, M-1, A( 2, 2 ), LDA, TAU, WORK, + $ LWORK, IINFO ) + END IF + END IF + ELSE +* +* Form P', determined by a call to SGEBRD to reduce a k-by-n +* matrix +* + IF( K.LT.N ) THEN +* +* If k < n, assume k <= m <= n +* + CALL SORGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, IINFO ) +* + ELSE +* +* If k >= n, assume m = n +* +* Shift the vectors which define the elementary reflectors one +* row downward, and set the first row and column of P' to +* those of the unit matrix +* + A( 1, 1 ) = ONE + DO 40 I = 2, N + A( I, 1 ) = ZERO + 40 CONTINUE + DO 60 J = 2, N + DO 50 I = J - 1, 2, -1 + A( I, J ) = A( I-1, J ) + 50 CONTINUE + A( 1, J ) = ZERO + 60 CONTINUE + IF( N.GT.1 ) THEN +* +* Form P'(2:n,2:n) +* + CALL SORGLQ( N-1, N-1, N-1, A( 2, 2 ), LDA, TAU, WORK, + $ LWORK, IINFO ) + END IF + END IF + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of SORGBR +* + END diff --git a/costa/native/external/lapack/sorghr.f b/costa/native/external/lapack/sorghr.f new file mode 100644 index 000000000..01ab48489 --- /dev/null +++ b/costa/native/external/lapack/sorghr.f @@ -0,0 +1,165 @@ + SUBROUTINE SORGHR( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER IHI, ILO, INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SORGHR generates a real orthogonal matrix Q which is defined as the +* product of IHI-ILO elementary reflectors of order N, as returned by +* SGEHRD: +* +* Q = H(ilo) H(ilo+1) . . . H(ihi-1). +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix Q. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* ILO and IHI must have the same values as in the previous call +* of SGEHRD. Q is equal to the unit matrix except in the +* submatrix Q(ilo+1:ihi,ilo+1:ihi). +* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the vectors which define the elementary reflectors, +* as returned by SGEHRD. +* On exit, the N-by-N orthogonal matrix Q. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* TAU (input) REAL array, dimension (N-1) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by SGEHRD. +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= IHI-ILO. +* For optimum performance LWORK >= (IHI-ILO)*NB, where NB is +* the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IINFO, J, LWKOPT, NB, NH +* .. +* .. External Subroutines .. + EXTERNAL SORGQR, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NH = IHI - ILO + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -2 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, NH ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'SORGQR', ' ', NH, NH, NH, -1 ) + LWKOPT = MAX( 1, NH )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORGHR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Shift the vectors which define the elementary reflectors one +* column to the right, and set the first ilo and the last n-ihi +* rows and columns to those of the unit matrix +* + DO 40 J = IHI, ILO + 1, -1 + DO 10 I = 1, J - 1 + A( I, J ) = ZERO + 10 CONTINUE + DO 20 I = J + 1, IHI + A( I, J ) = A( I, J-1 ) + 20 CONTINUE + DO 30 I = IHI + 1, N + A( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + DO 60 J = 1, ILO + DO 50 I = 1, N + A( I, J ) = ZERO + 50 CONTINUE + A( J, J ) = ONE + 60 CONTINUE + DO 80 J = IHI + 1, N + DO 70 I = 1, N + A( I, J ) = ZERO + 70 CONTINUE + A( J, J ) = ONE + 80 CONTINUE +* + IF( NH.GT.0 ) THEN +* +* Generate Q(ilo+1:ihi,ilo+1:ihi) +* + CALL SORGQR( NH, NH, NH, A( ILO+1, ILO+1 ), LDA, TAU( ILO ), + $ WORK, LWORK, IINFO ) + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of SORGHR +* + END diff --git a/costa/native/external/lapack/sorgl2.f b/costa/native/external/lapack/sorgl2.f new file mode 100644 index 000000000..8f4505aa8 --- /dev/null +++ b/costa/native/external/lapack/sorgl2.f @@ -0,0 +1,134 @@ + SUBROUTINE SORGL2( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SORGL2 generates an m by n real matrix Q with orthonormal rows, +* which is defined as the first m rows of a product of k elementary +* reflectors of order n +* +* Q = H(k) . . . H(2) H(1) +* +* as returned by SGELQF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. N >= M. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. M >= K >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the i-th row must contain the vector which defines +* the elementary reflector H(i), for i = 1,2,...,k, as returned +* by SGELQF in the first k rows of its array argument A. +* On exit, the m-by-n matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) REAL array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by SGELQF. +* +* WORK (workspace) REAL array, dimension (M) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, L +* .. +* .. External Subroutines .. + EXTERNAL SLARF, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORGL2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.LE.0 ) + $ RETURN +* + IF( K.LT.M ) THEN +* +* Initialise rows k+1:m to rows of the unit matrix +* + DO 20 J = 1, N + DO 10 L = K + 1, M + A( L, J ) = ZERO + 10 CONTINUE + IF( J.GT.K .AND. J.LE.M ) + $ A( J, J ) = ONE + 20 CONTINUE + END IF +* + DO 40 I = K, 1, -1 +* +* Apply H(i) to A(i:m,i:n) from the right +* + IF( I.LT.N ) THEN + IF( I.LT.M ) THEN + A( I, I ) = ONE + CALL SLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, + $ TAU( I ), A( I+1, I ), LDA, WORK ) + END IF + CALL SSCAL( N-I, -TAU( I ), A( I, I+1 ), LDA ) + END IF + A( I, I ) = ONE - TAU( I ) +* +* Set A(i,1:i-1) to zero +* + DO 30 L = 1, I - 1 + A( I, L ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of SORGL2 +* + END diff --git a/costa/native/external/lapack/sorglq.f b/costa/native/external/lapack/sorglq.f new file mode 100644 index 000000000..816894aa0 --- /dev/null +++ b/costa/native/external/lapack/sorglq.f @@ -0,0 +1,216 @@ + SUBROUTINE SORGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SORGLQ generates an M-by-N real matrix Q with orthonormal rows, +* which is defined as the first M rows of a product of K elementary +* reflectors of order N +* +* Q = H(k) . . . H(2) H(1) +* +* as returned by SGELQF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. N >= M. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. M >= K >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the i-th row must contain the vector which defines +* the elementary reflector H(i), for i = 1,2,...,k, as returned +* by SGELQF in the first k rows of its array argument A. +* On exit, the M-by-N matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) REAL array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by SGELQF. +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,M). +* For optimum performance LWORK >= M*NB, where NB is +* the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, KI, KK, L, LDWORK, + $ LWKOPT, NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL SLARFB, SLARFT, SORGL2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'SORGLQ', ' ', M, N, K, -1 ) + LWKOPT = MAX( 1, M )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORGLQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.LE.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = M + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'SORGLQ', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SORGLQ', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the last block. +* The first kk rows are handled by the block method. +* + KI = ( ( K-NX-1 ) / NB )*NB + KK = MIN( K, KI+NB ) +* +* Set A(kk+1:m,1:kk) to zero. +* + DO 20 J = 1, KK + DO 10 I = KK + 1, M + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the last or only block. +* + IF( KK.LT.M ) + $ CALL SORGL2( M-KK, N-KK, K-KK, A( KK+1, KK+1 ), LDA, + $ TAU( KK+1 ), WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = KI + 1, 1, -NB + IB = MIN( NB, K-I+1 ) + IF( I+IB.LE.M ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL SLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, I ), + $ LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H' to A(i+ib:m,i:n) from the right +* + CALL SLARFB( 'Right', 'Transpose', 'Forward', 'Rowwise', + $ M-I-IB+1, N-I+1, IB, A( I, I ), LDA, WORK, + $ LDWORK, A( I+IB, I ), LDA, WORK( IB+1 ), + $ LDWORK ) + END IF +* +* Apply H' to columns i:n of current block +* + CALL SORGL2( IB, N-I+1, IB, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* +* Set columns 1:i-1 of current block to zero +* + DO 40 J = 1, I - 1 + DO 30 L = I, I + IB - 1 + A( L, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of SORGLQ +* + END diff --git a/costa/native/external/lapack/sorgql.f b/costa/native/external/lapack/sorgql.f new file mode 100644 index 000000000..b4610856a --- /dev/null +++ b/costa/native/external/lapack/sorgql.f @@ -0,0 +1,214 @@ + SUBROUTINE SORGQL( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SORGQL generates an M-by-N real matrix Q with orthonormal columns, +* which is defined as the last N columns of a product of K elementary +* reflectors of order M +* +* Q = H(k) . . . H(2) H(1) +* +* as returned by SGEQLF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. M >= N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. N >= K >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the (n-k+i)-th column must contain the vector which +* defines the elementary reflector H(i), for i = 1,2,...,k, as +* returned by SGEQLF in the last k columns of its array +* argument A. +* On exit, the M-by-N matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) REAL array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by SGEQLF. +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N). +* For optimum performance LWORK >= N*NB, where NB is the +* optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, KK, L, LDWORK, LWKOPT, + $ NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL SLARFB, SLARFT, SORG2L, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'SORGQL', ' ', M, N, K, -1 ) + LWKOPT = MAX( 1, N )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORGQL', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'SORGQL', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SORGQL', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the first block. +* The last kk columns are handled by the block method. +* + KK = MIN( K, ( ( K-NX+NB-1 ) / NB )*NB ) +* +* Set A(m-kk+1:m,1:n-kk) to zero. +* + DO 20 J = 1, N - KK + DO 10 I = M - KK + 1, M + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the first or only block. +* + CALL SORG2L( M-KK, N-KK, K-KK, A, LDA, TAU, WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = K - KK + 1, K, NB + IB = MIN( NB, K-I+1 ) + IF( N-K+I.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL SLARFT( 'Backward', 'Columnwise', M-K+I+IB-1, IB, + $ A( 1, N-K+I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(1:m-k+i+ib-1,1:n-k+i-1) from the left +* + CALL SLARFB( 'Left', 'No transpose', 'Backward', + $ 'Columnwise', M-K+I+IB-1, N-K+I-1, IB, + $ A( 1, N-K+I ), LDA, WORK, LDWORK, A, LDA, + $ WORK( IB+1 ), LDWORK ) + END IF +* +* Apply H to rows 1:m-k+i+ib-1 of current block +* + CALL SORG2L( M-K+I+IB-1, IB, IB, A( 1, N-K+I ), LDA, + $ TAU( I ), WORK, IINFO ) +* +* Set rows m-k+i+ib:m of current block to zero +* + DO 40 J = N - K + I, N - K + I + IB - 1 + DO 30 L = M - K + I + IB, M + A( L, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of SORGQL +* + END diff --git a/costa/native/external/lapack/sorgqr.f b/costa/native/external/lapack/sorgqr.f new file mode 100644 index 000000000..70de749a8 --- /dev/null +++ b/costa/native/external/lapack/sorgqr.f @@ -0,0 +1,217 @@ + SUBROUTINE SORGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SORGQR generates an M-by-N real matrix Q with orthonormal columns, +* which is defined as the first N columns of a product of K elementary +* reflectors of order M +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by SGEQRF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. M >= N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. N >= K >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the i-th column must contain the vector which +* defines the elementary reflector H(i), for i = 1,2,...,k, as +* returned by SGEQRF in the first k columns of its array +* argument A. +* On exit, the M-by-N matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) REAL array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by SGEQRF. +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N). +* For optimum performance LWORK >= N*NB, where NB is the +* optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, KI, KK, L, LDWORK, + $ LWKOPT, NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL SLARFB, SLARFT, SORG2R, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'SORGQR', ' ', M, N, K, -1 ) + LWKOPT = MAX( 1, N )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORGQR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'SORGQR', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SORGQR', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the last block. +* The first kk columns are handled by the block method. +* + KI = ( ( K-NX-1 ) / NB )*NB + KK = MIN( K, KI+NB ) +* +* Set A(1:kk,kk+1:n) to zero. +* + DO 20 J = KK + 1, N + DO 10 I = 1, KK + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the last or only block. +* + IF( KK.LT.N ) + $ CALL SORG2R( M-KK, N-KK, K-KK, A( KK+1, KK+1 ), LDA, + $ TAU( KK+1 ), WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = KI + 1, 1, -NB + IB = MIN( NB, K-I+1 ) + IF( I+IB.LE.N ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL SLARFT( 'Forward', 'Columnwise', M-I+1, IB, + $ A( I, I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(i:m,i+ib:n) from the left +* + CALL SLARFB( 'Left', 'No transpose', 'Forward', + $ 'Columnwise', M-I+1, N-I-IB+1, IB, + $ A( I, I ), LDA, WORK, LDWORK, A( I, I+IB ), + $ LDA, WORK( IB+1 ), LDWORK ) + END IF +* +* Apply H to rows i:m of current block +* + CALL SORG2R( M-I+1, IB, IB, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* +* Set rows 1:i-1 of current block to zero +* + DO 40 J = I, I + IB - 1 + DO 30 L = 1, I - 1 + A( L, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of SORGQR +* + END diff --git a/costa/native/external/lapack/sorgr2.f b/costa/native/external/lapack/sorgr2.f new file mode 100644 index 000000000..7498ec376 --- /dev/null +++ b/costa/native/external/lapack/sorgr2.f @@ -0,0 +1,132 @@ + SUBROUTINE SORGR2( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SORGR2 generates an m by n real matrix Q with orthonormal rows, +* which is defined as the last m rows of a product of k elementary +* reflectors of order n +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by SGERQF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. N >= M. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. M >= K >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the (m-k+i)-th row must contain the vector which +* defines the elementary reflector H(i), for i = 1,2,...,k, as +* returned by SGERQF in the last k rows of its array argument +* A. +* On exit, the m by n matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) REAL array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by SGERQF. +* +* WORK (workspace) REAL array, dimension (M) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, II, J, L +* .. +* .. External Subroutines .. + EXTERNAL SLARF, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORGR2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.LE.0 ) + $ RETURN +* + IF( K.LT.M ) THEN +* +* Initialise rows 1:m-k to rows of the unit matrix +* + DO 20 J = 1, N + DO 10 L = 1, M - K + A( L, J ) = ZERO + 10 CONTINUE + IF( J.GT.N-M .AND. J.LE.N-K ) + $ A( M-N+J, J ) = ONE + 20 CONTINUE + END IF +* + DO 40 I = 1, K + II = M - K + I +* +* Apply H(i) to A(1:m-k+i,1:n-k+i) from the right +* + A( II, N-M+II ) = ONE + CALL SLARF( 'Right', II-1, N-M+II, A( II, 1 ), LDA, TAU( I ), + $ A, LDA, WORK ) + CALL SSCAL( N-M+II-1, -TAU( I ), A( II, 1 ), LDA ) + A( II, N-M+II ) = ONE - TAU( I ) +* +* Set A(m-k+i,n-k+i+1:n) to zero +* + DO 30 L = N - M + II + 1, N + A( II, L ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of SORGR2 +* + END diff --git a/costa/native/external/lapack/sorgrq.f b/costa/native/external/lapack/sorgrq.f new file mode 100644 index 000000000..bdd3c7e5c --- /dev/null +++ b/costa/native/external/lapack/sorgrq.f @@ -0,0 +1,214 @@ + SUBROUTINE SORGRQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SORGRQ generates an M-by-N real matrix Q with orthonormal rows, +* which is defined as the last M rows of a product of K elementary +* reflectors of order N +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by SGERQF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. N >= M. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. M >= K >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the (m-k+i)-th row must contain the vector which +* defines the elementary reflector H(i), for i = 1,2,...,k, as +* returned by SGERQF in the last k rows of its array argument +* A. +* On exit, the M-by-N matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) REAL array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by SGERQF. +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,M). +* For optimum performance LWORK >= M*NB, where NB is the +* optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, II, IINFO, IWS, J, KK, L, LDWORK, + $ LWKOPT, NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL SLARFB, SLARFT, SORGR2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'SORGRQ', ' ', M, N, K, -1 ) + LWKOPT = MAX( 1, M )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORGRQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.LE.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = M + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'SORGRQ', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SORGRQ', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the first block. +* The last kk rows are handled by the block method. +* + KK = MIN( K, ( ( K-NX+NB-1 ) / NB )*NB ) +* +* Set A(1:m-kk,n-kk+1:n) to zero. +* + DO 20 J = N - KK + 1, N + DO 10 I = 1, M - KK + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the first or only block. +* + CALL SORGR2( M-KK, N-KK, K-KK, A, LDA, TAU, WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = K - KK + 1, K, NB + IB = MIN( NB, K-I+1 ) + II = M - K + I + IF( II.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL SLARFT( 'Backward', 'Rowwise', N-K+I+IB-1, IB, + $ A( II, 1 ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H' to A(1:m-k+i-1,1:n-k+i+ib-1) from the right +* + CALL SLARFB( 'Right', 'Transpose', 'Backward', 'Rowwise', + $ II-1, N-K+I+IB-1, IB, A( II, 1 ), LDA, WORK, + $ LDWORK, A, LDA, WORK( IB+1 ), LDWORK ) + END IF +* +* Apply H' to columns 1:n-k+i+ib-1 of current block +* + CALL SORGR2( IB, N-K+I+IB-1, IB, A( II, 1 ), LDA, TAU( I ), + $ WORK, IINFO ) +* +* Set columns n-k+i+ib:n of current block to zero +* + DO 40 L = N - K + I + IB, N + DO 30 J = II, II + IB - 1 + A( J, L ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of SORGRQ +* + END diff --git a/costa/native/external/lapack/sorgtr.f b/costa/native/external/lapack/sorgtr.f new file mode 100644 index 000000000..d827c93f9 --- /dev/null +++ b/costa/native/external/lapack/sorgtr.f @@ -0,0 +1,184 @@ + SUBROUTINE SORGTR( UPLO, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SORGTR generates a real orthogonal matrix Q which is defined as the +* product of n-1 elementary reflectors of order N, as returned by +* SSYTRD: +* +* if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), +* +* if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A contains elementary reflectors +* from SSYTRD; +* = 'L': Lower triangle of A contains elementary reflectors +* from SSYTRD. +* +* N (input) INTEGER +* The order of the matrix Q. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the vectors which define the elementary reflectors, +* as returned by SSYTRD. +* On exit, the N-by-N orthogonal matrix Q. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* TAU (input) REAL array, dimension (N-1) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by SSYTRD. +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N-1). +* For optimum performance LWORK >= (N-1)*NB, where NB is +* the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER I, IINFO, J, LWKOPT, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL SORGQL, SORGQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, N-1 ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN + IF ( UPPER ) THEN + NB = ILAENV( 1, 'SORGQL', ' ', N-1, N-1, N-1, -1 ) + ELSE + NB = ILAENV( 1, 'SORGQR', ' ', N-1, N-1, N-1, -1 ) + END IF + LWKOPT = MAX( 1, N-1 )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORGTR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( UPPER ) THEN +* +* Q was determined by a call to SSYTRD with UPLO = 'U' +* +* Shift the vectors which define the elementary reflectors one +* column to the left, and set the last row and column of Q to +* those of the unit matrix +* + DO 20 J = 1, N - 1 + DO 10 I = 1, J - 1 + A( I, J ) = A( I, J+1 ) + 10 CONTINUE + A( N, J ) = ZERO + 20 CONTINUE + DO 30 I = 1, N - 1 + A( I, N ) = ZERO + 30 CONTINUE + A( N, N ) = ONE +* +* Generate Q(1:n-1,1:n-1) +* + CALL SORGQL( N-1, N-1, N-1, A, LDA, TAU, WORK, LWORK, IINFO ) +* + ELSE +* +* Q was determined by a call to SSYTRD with UPLO = 'L'. +* +* Shift the vectors which define the elementary reflectors one +* column to the right, and set the first row and column of Q to +* those of the unit matrix +* + DO 50 J = N, 2, -1 + A( 1, J ) = ZERO + DO 40 I = J + 1, N + A( I, J ) = A( I, J-1 ) + 40 CONTINUE + 50 CONTINUE + A( 1, 1 ) = ONE + DO 60 I = 2, N + A( I, 1 ) = ZERO + 60 CONTINUE + IF( N.GT.1 ) THEN +* +* Generate Q(2:n,2:n) +* + CALL SORGQR( N-1, N-1, N-1, A( 2, 2 ), LDA, TAU, WORK, + $ LWORK, IINFO ) + END IF + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of SORGTR +* + END diff --git a/costa/native/external/lapack/sorm2l.f b/costa/native/external/lapack/sorm2l.f new file mode 100644 index 000000000..a27ed443e --- /dev/null +++ b/costa/native/external/lapack/sorm2l.f @@ -0,0 +1,194 @@ + SUBROUTINE SORM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SORM2L overwrites the general real m by n matrix C with +* +* Q * C if SIDE = 'L' and TRANS = 'N', or +* +* Q'* C if SIDE = 'L' and TRANS = 'T', or +* +* C * Q if SIDE = 'R' and TRANS = 'N', or +* +* C * Q' if SIDE = 'R' and TRANS = 'T', +* +* where Q is a real orthogonal matrix defined as the product of k +* elementary reflectors +* +* Q = H(k) . . . H(2) H(1) +* +* as returned by SGEQLF. Q is of order m if SIDE = 'L' and of order n +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q' from the Left +* = 'R': apply Q or Q' from the Right +* +* TRANS (input) CHARACTER*1 +* = 'N': apply Q (No transpose) +* = 'T': apply Q' (Transpose) +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* A (input) REAL array, dimension (LDA,K) +* The i-th column must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* SGEQLF in the last k columns of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* If SIDE = 'L', LDA >= max(1,M); +* if SIDE = 'R', LDA >= max(1,N). +* +* TAU (input) REAL array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by SGEQLF. +* +* C (input/output) REAL array, dimension (LDC,N) +* On entry, the m by n matrix C. +* On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) REAL array, dimension +* (N) if SIDE = 'L', +* (M) if SIDE = 'R' +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, MI, NI, NQ + REAL AII +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLARF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORM2L', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. NOTRAN ) .OR. ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) + $ THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) is applied to C(1:m-k+i,1:n) +* + MI = M - K + I + ELSE +* +* H(i) is applied to C(1:m,1:n-k+i) +* + NI = N - K + I + END IF +* +* Apply H(i) +* + AII = A( NQ-K+I, I ) + A( NQ-K+I, I ) = ONE + CALL SLARF( SIDE, MI, NI, A( 1, I ), 1, TAU( I ), C, LDC, + $ WORK ) + A( NQ-K+I, I ) = AII + 10 CONTINUE + RETURN +* +* End of SORM2L +* + END diff --git a/costa/native/external/lapack/sorm2r.f b/costa/native/external/lapack/sorm2r.f new file mode 100644 index 000000000..8b971b32c --- /dev/null +++ b/costa/native/external/lapack/sorm2r.f @@ -0,0 +1,198 @@ + SUBROUTINE SORM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SORM2R overwrites the general real m by n matrix C with +* +* Q * C if SIDE = 'L' and TRANS = 'N', or +* +* Q'* C if SIDE = 'L' and TRANS = 'T', or +* +* C * Q if SIDE = 'R' and TRANS = 'N', or +* +* C * Q' if SIDE = 'R' and TRANS = 'T', +* +* where Q is a real orthogonal matrix defined as the product of k +* elementary reflectors +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by SGEQRF. Q is of order m if SIDE = 'L' and of order n +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q' from the Left +* = 'R': apply Q or Q' from the Right +* +* TRANS (input) CHARACTER*1 +* = 'N': apply Q (No transpose) +* = 'T': apply Q' (Transpose) +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* A (input) REAL array, dimension (LDA,K) +* The i-th column must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* SGEQRF in the first k columns of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* If SIDE = 'L', LDA >= max(1,M); +* if SIDE = 'R', LDA >= max(1,N). +* +* TAU (input) REAL array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by SGEQRF. +* +* C (input/output) REAL array, dimension (LDC,N) +* On entry, the m by n matrix C. +* On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) REAL array, dimension +* (N) if SIDE = 'L', +* (M) if SIDE = 'R' +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, IC, JC, MI, NI, NQ + REAL AII +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLARF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORM2R', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. ( .NOT.LEFT .AND. NOTRAN ) ) + $ THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H(i) is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H(i) +* + AII = A( I, I ) + A( I, I ) = ONE + CALL SLARF( SIDE, MI, NI, A( I, I ), 1, TAU( I ), C( IC, JC ), + $ LDC, WORK ) + A( I, I ) = AII + 10 CONTINUE + RETURN +* +* End of SORM2R +* + END diff --git a/costa/native/external/lapack/sormbr.f b/costa/native/external/lapack/sormbr.f new file mode 100644 index 000000000..e36906c24 --- /dev/null +++ b/costa/native/external/lapack/sormbr.f @@ -0,0 +1,283 @@ + SUBROUTINE SORMBR( VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C, + $ LDC, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS, VECT + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( LDC, * ), TAU( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* If VECT = 'Q', SORMBR overwrites the general real M-by-N matrix C +* with +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'T': Q**T * C C * Q**T +* +* If VECT = 'P', SORMBR overwrites the general real M-by-N matrix C +* with +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': P * C C * P +* TRANS = 'T': P**T * C C * P**T +* +* Here Q and P**T are the orthogonal matrices determined by SGEBRD when +* reducing a real matrix A to bidiagonal form: A = Q * B * P**T. Q and +* P**T are defined as products of elementary reflectors H(i) and G(i) +* respectively. +* +* Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the +* order of the orthogonal matrix Q or P**T that is applied. +* +* If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: +* if nq >= k, Q = H(1) H(2) . . . H(k); +* if nq < k, Q = H(1) H(2) . . . H(nq-1). +* +* If VECT = 'P', A is assumed to have been a K-by-NQ matrix: +* if k < nq, P = G(1) G(2) . . . G(k); +* if k >= nq, P = G(1) G(2) . . . G(nq-1). +* +* Arguments +* ========= +* +* VECT (input) CHARACTER*1 +* = 'Q': apply Q or Q**T; +* = 'P': apply P or P**T. +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q, Q**T, P or P**T from the Left; +* = 'R': apply Q, Q**T, P or P**T from the Right. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q or P; +* = 'T': Transpose, apply Q**T or P**T. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* If VECT = 'Q', the number of columns in the original +* matrix reduced by SGEBRD. +* If VECT = 'P', the number of rows in the original +* matrix reduced by SGEBRD. +* K >= 0. +* +* A (input) REAL array, dimension +* (LDA,min(nq,K)) if VECT = 'Q' +* (LDA,nq) if VECT = 'P' +* The vectors which define the elementary reflectors H(i) and +* G(i), whose products determine the matrices Q and P, as +* returned by SGEBRD. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* If VECT = 'Q', LDA >= max(1,nq); +* if VECT = 'P', LDA >= max(1,min(nq,K)). +* +* TAU (input) REAL array, dimension (min(nq,K)) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i) or G(i) which determines Q or P, as returned +* by SGEBRD in the array argument TAUQ or TAUP. +* +* C (input/output) REAL array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q +* or P*C or P**T*C or C*P or C*P**T. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If SIDE = 'L', LWORK >= max(1,N); +* if SIDE = 'R', LWORK >= max(1,M). +* For optimum performance LWORK >= N*NB if SIDE = 'L', and +* LWORK >= M*NB if SIDE = 'R', where NB is the optimal +* blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL APPLYQ, LEFT, LQUERY, NOTRAN + CHARACTER TRANST + INTEGER I1, I2, IINFO, LWKOPT, MI, NB, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL SORMLQ, SORMQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + APPLYQ = LSAME( VECT, 'Q' ) + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q or P and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.APPLYQ .AND. .NOT.LSAME( VECT, 'P' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( K.LT.0 ) THEN + INFO = -6 + ELSE IF( ( APPLYQ .AND. LDA.LT.MAX( 1, NQ ) ) .OR. + $ ( .NOT.APPLYQ .AND. LDA.LT.MAX( 1, MIN( NQ, K ) ) ) ) + $ THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( APPLYQ ) THEN + IF( LEFT ) THEN + NB = ILAENV( 1, 'SORMQR', SIDE // TRANS, M-1, N, M-1, + $ -1 ) + ELSE + NB = ILAENV( 1, 'SORMQR', SIDE // TRANS, M, N-1, N-1, + $ -1 ) + END IF + ELSE + IF( LEFT ) THEN + NB = ILAENV( 1, 'SORMLQ', SIDE // TRANS, M-1, N, M-1, + $ -1 ) + ELSE + NB = ILAENV( 1, 'SORMLQ', SIDE // TRANS, M, N-1, N-1, + $ -1 ) + END IF + END IF + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORMBR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + WORK( 1 ) = 1 + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* + IF( APPLYQ ) THEN +* +* Apply Q +* + IF( NQ.GE.K ) THEN +* +* Q was determined by a call to SGEBRD with nq >= k +* + CALL SORMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, IINFO ) + ELSE IF( NQ.GT.1 ) THEN +* +* Q was determined by a call to SGEBRD with nq < k +* + IF( LEFT ) THEN + MI = M - 1 + NI = N + I1 = 2 + I2 = 1 + ELSE + MI = M + NI = N - 1 + I1 = 1 + I2 = 2 + END IF + CALL SORMQR( SIDE, TRANS, MI, NI, NQ-1, A( 2, 1 ), LDA, TAU, + $ C( I1, I2 ), LDC, WORK, LWORK, IINFO ) + END IF + ELSE +* +* Apply P +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF + IF( NQ.GT.K ) THEN +* +* P was determined by a call to SGEBRD with nq > k +* + CALL SORMLQ( SIDE, TRANST, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, IINFO ) + ELSE IF( NQ.GT.1 ) THEN +* +* P was determined by a call to SGEBRD with nq <= k +* + IF( LEFT ) THEN + MI = M - 1 + NI = N + I1 = 2 + I2 = 1 + ELSE + MI = M + NI = N - 1 + I1 = 1 + I2 = 2 + END IF + CALL SORMLQ( SIDE, TRANST, MI, NI, NQ-1, A( 1, 2 ), LDA, + $ TAU, C( I1, I2 ), LDC, WORK, LWORK, IINFO ) + END IF + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of SORMBR +* + END diff --git a/costa/native/external/lapack/sormhr.f b/costa/native/external/lapack/sormhr.f new file mode 100644 index 000000000..605433987 --- /dev/null +++ b/costa/native/external/lapack/sormhr.f @@ -0,0 +1,203 @@ + SUBROUTINE SORMHR( SIDE, TRANS, M, N, ILO, IHI, A, LDA, TAU, C, + $ LDC, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER IHI, ILO, INFO, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( LDC, * ), TAU( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* SORMHR overwrites the general real M-by-N matrix C with +* +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'T': Q**T * C C * Q**T +* +* where Q is a real orthogonal matrix of order nq, with nq = m if +* SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of +* IHI-ILO elementary reflectors, as returned by SGEHRD: +* +* Q = H(ilo) H(ilo+1) . . . H(ihi-1). +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q**T from the Left; +* = 'R': apply Q or Q**T from the Right. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q; +* = 'T': Transpose, apply Q**T. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* ILO and IHI must have the same values as in the previous call +* of SGEHRD. Q is equal to the unit matrix except in the +* submatrix Q(ilo+1:ihi,ilo+1:ihi). +* If SIDE = 'L', then 1 <= ILO <= IHI <= M, if M > 0, and +* ILO = 1 and IHI = 0, if M = 0; +* if SIDE = 'R', then 1 <= ILO <= IHI <= N, if N > 0, and +* ILO = 1 and IHI = 0, if N = 0. +* +* A (input) REAL array, dimension +* (LDA,M) if SIDE = 'L' +* (LDA,N) if SIDE = 'R' +* The vectors which define the elementary reflectors, as +* returned by SGEHRD. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'. +* +* TAU (input) REAL array, dimension +* (M-1) if SIDE = 'L' +* (N-1) if SIDE = 'R' +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by SGEHRD. +* +* C (input/output) REAL array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If SIDE = 'L', LWORK >= max(1,N); +* if SIDE = 'R', LWORK >= max(1,M). +* For optimum performance LWORK >= N*NB if SIDE = 'L', and +* LWORK >= M*NB if SIDE = 'R', where NB is the optimal +* blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LEFT, LQUERY + INTEGER I1, I2, IINFO, LWKOPT, MI, NB, NH, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL SORMQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NH = IHI - ILO + LEFT = LSAME( SIDE, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.LSAME( TRANS, 'T' ) ) + $ THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, NQ ) ) THEN + INFO = -5 + ELSE IF( IHI.LT.MIN( ILO, NQ ) .OR. IHI.GT.NQ ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( LEFT ) THEN + NB = ILAENV( 1, 'SORMQR', SIDE // TRANS, NH, N, NH, -1 ) + ELSE + NB = ILAENV( 1, 'SORMQR', SIDE // TRANS, M, NH, NH, -1 ) + END IF + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORMHR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. NH.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( LEFT ) THEN + MI = NH + NI = N + I1 = ILO + 1 + I2 = 1 + ELSE + MI = M + NI = NH + I1 = 1 + I2 = ILO + 1 + END IF +* + CALL SORMQR( SIDE, TRANS, MI, NI, NH, A( ILO+1, ILO ), LDA, + $ TAU( ILO ), C( I1, I2 ), LDC, WORK, LWORK, IINFO ) +* + WORK( 1 ) = LWKOPT + RETURN +* +* End of SORMHR +* + END diff --git a/costa/native/external/lapack/sorml2.f b/costa/native/external/lapack/sorml2.f new file mode 100644 index 000000000..094be1283 --- /dev/null +++ b/costa/native/external/lapack/sorml2.f @@ -0,0 +1,198 @@ + SUBROUTINE SORML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SORML2 overwrites the general real m by n matrix C with +* +* Q * C if SIDE = 'L' and TRANS = 'N', or +* +* Q'* C if SIDE = 'L' and TRANS = 'T', or +* +* C * Q if SIDE = 'R' and TRANS = 'N', or +* +* C * Q' if SIDE = 'R' and TRANS = 'T', +* +* where Q is a real orthogonal matrix defined as the product of k +* elementary reflectors +* +* Q = H(k) . . . H(2) H(1) +* +* as returned by SGELQF. Q is of order m if SIDE = 'L' and of order n +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q' from the Left +* = 'R': apply Q or Q' from the Right +* +* TRANS (input) CHARACTER*1 +* = 'N': apply Q (No transpose) +* = 'T': apply Q' (Transpose) +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* A (input) REAL array, dimension +* (LDA,M) if SIDE = 'L', +* (LDA,N) if SIDE = 'R' +* The i-th row must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* SGELQF in the first k rows of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,K). +* +* TAU (input) REAL array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by SGELQF. +* +* C (input/output) REAL array, dimension (LDC,N) +* On entry, the m by n matrix C. +* On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) REAL array, dimension +* (N) if SIDE = 'L', +* (M) if SIDE = 'R' +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, IC, JC, MI, NI, NQ + REAL AII +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLARF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORML2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. NOTRAN ) .OR. ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) + $ THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H(i) is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H(i) +* + AII = A( I, I ) + A( I, I ) = ONE + CALL SLARF( SIDE, MI, NI, A( I, I ), LDA, TAU( I ), + $ C( IC, JC ), LDC, WORK ) + A( I, I ) = AII + 10 CONTINUE + RETURN +* +* End of SORML2 +* + END diff --git a/costa/native/external/lapack/sormlq.f b/costa/native/external/lapack/sormlq.f new file mode 100644 index 000000000..611e2f2d1 --- /dev/null +++ b/costa/native/external/lapack/sormlq.f @@ -0,0 +1,269 @@ + SUBROUTINE SORMLQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( LDC, * ), TAU( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* SORMLQ overwrites the general real M-by-N matrix C with +* +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'T': Q**T * C C * Q**T +* +* where Q is a real orthogonal matrix defined as the product of k +* elementary reflectors +* +* Q = H(k) . . . H(2) H(1) +* +* as returned by SGELQF. Q is of order M if SIDE = 'L' and of order N +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q**T from the Left; +* = 'R': apply Q or Q**T from the Right. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q; +* = 'T': Transpose, apply Q**T. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* A (input) REAL array, dimension +* (LDA,M) if SIDE = 'L', +* (LDA,N) if SIDE = 'R' +* The i-th row must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* SGELQF in the first k rows of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,K). +* +* TAU (input) REAL array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by SGELQF. +* +* C (input/output) REAL array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If SIDE = 'L', LWORK >= max(1,N); +* if SIDE = 'R', LWORK >= max(1,M). +* For optimum performance LWORK >= N*NB if SIDE = 'L', and +* LWORK >= M*NB if SIDE = 'R', where NB is the optimal +* blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT + PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + CHARACTER TRANST + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK, + $ LWKOPT, MI, NB, NBMIN, NI, NQ, NW +* .. +* .. Local Arrays .. + REAL T( LDT, NBMAX ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SLARFB, SLARFT, SORML2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. NB may be at most NBMAX, where NBMAX +* is used to define the local array T. +* + NB = MIN( NBMAX, ILAENV( 1, 'SORMLQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORMLQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IWS = NW*NB + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SORMLQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + ELSE + IWS = NW + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL SORML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, + $ IINFO ) + ELSE +* +* Use blocked code +* + IF( ( LEFT .AND. NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL SLARFT( 'Forward', 'Rowwise', NQ-I+1, IB, A( I, I ), + $ LDA, TAU( I ), T, LDT ) + IF( LEFT ) THEN +* +* H or H' is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H or H' is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H or H' +* + CALL SLARFB( SIDE, TRANST, 'Forward', 'Rowwise', MI, NI, IB, + $ A( I, I ), LDA, T, LDT, C( IC, JC ), LDC, WORK, + $ LDWORK ) + 10 CONTINUE + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of SORMLQ +* + END diff --git a/costa/native/external/lapack/sormql.f b/costa/native/external/lapack/sormql.f new file mode 100644 index 000000000..e52e99fb7 --- /dev/null +++ b/costa/native/external/lapack/sormql.f @@ -0,0 +1,258 @@ + SUBROUTINE SORMQL( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( LDC, * ), TAU( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* SORMQL overwrites the general real M-by-N matrix C with +* +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'T': Q**T * C C * Q**T +* +* where Q is a real orthogonal matrix defined as the product of k +* elementary reflectors +* +* Q = H(k) . . . H(2) H(1) +* +* as returned by SGEQLF. Q is of order M if SIDE = 'L' and of order N +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q**T from the Left; +* = 'R': apply Q or Q**T from the Right. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q; +* = 'T': Transpose, apply Q**T. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* A (input) REAL array, dimension (LDA,K) +* The i-th column must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* SGEQLF in the last k columns of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* If SIDE = 'L', LDA >= max(1,M); +* if SIDE = 'R', LDA >= max(1,N). +* +* TAU (input) REAL array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by SGEQLF. +* +* C (input/output) REAL array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If SIDE = 'L', LWORK >= max(1,N); +* if SIDE = 'R', LWORK >= max(1,M). +* For optimum performance LWORK >= N*NB if SIDE = 'L', and +* LWORK >= M*NB if SIDE = 'R', where NB is the optimal +* blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT + PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + INTEGER I, I1, I2, I3, IB, IINFO, IWS, LDWORK, LWKOPT, + $ MI, NB, NBMIN, NI, NQ, NW +* .. +* .. Local Arrays .. + REAL T( LDT, NBMAX ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SLARFB, SLARFT, SORM2L, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. NB may be at most NBMAX, where NBMAX +* is used to define the local array T. +* + NB = MIN( NBMAX, ILAENV( 1, 'SORMQL', SIDE // TRANS, M, N, K, + $ -1 ) ) + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORMQL', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IWS = NW*NB + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SORMQL', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + ELSE + IWS = NW + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL SORM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, + $ IINFO ) + ELSE +* +* Use blocked code +* + IF( ( LEFT .AND. NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL SLARFT( 'Backward', 'Columnwise', NQ-K+I+IB-1, IB, + $ A( 1, I ), LDA, TAU( I ), T, LDT ) + IF( LEFT ) THEN +* +* H or H' is applied to C(1:m-k+i+ib-1,1:n) +* + MI = M - K + I + IB - 1 + ELSE +* +* H or H' is applied to C(1:m,1:n-k+i+ib-1) +* + NI = N - K + I + IB - 1 + END IF +* +* Apply H or H' +* + CALL SLARFB( SIDE, TRANS, 'Backward', 'Columnwise', MI, NI, + $ IB, A( 1, I ), LDA, T, LDT, C, LDC, WORK, + $ LDWORK ) + 10 CONTINUE + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of SORMQL +* + END diff --git a/costa/native/external/lapack/sormqr.f b/costa/native/external/lapack/sormqr.f new file mode 100644 index 000000000..976fa098d --- /dev/null +++ b/costa/native/external/lapack/sormqr.f @@ -0,0 +1,262 @@ + SUBROUTINE SORMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( LDC, * ), TAU( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* SORMQR overwrites the general real M-by-N matrix C with +* +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'T': Q**T * C C * Q**T +* +* where Q is a real orthogonal matrix defined as the product of k +* elementary reflectors +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by SGEQRF. Q is of order M if SIDE = 'L' and of order N +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q**T from the Left; +* = 'R': apply Q or Q**T from the Right. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q; +* = 'T': Transpose, apply Q**T. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* A (input) REAL array, dimension (LDA,K) +* The i-th column must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* SGEQRF in the first k columns of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* If SIDE = 'L', LDA >= max(1,M); +* if SIDE = 'R', LDA >= max(1,N). +* +* TAU (input) REAL array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by SGEQRF. +* +* C (input/output) REAL array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If SIDE = 'L', LWORK >= max(1,N); +* if SIDE = 'R', LWORK >= max(1,M). +* For optimum performance LWORK >= N*NB if SIDE = 'L', and +* LWORK >= M*NB if SIDE = 'R', where NB is the optimal +* blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT + PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK, + $ LWKOPT, MI, NB, NBMIN, NI, NQ, NW +* .. +* .. Local Arrays .. + REAL T( LDT, NBMAX ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SLARFB, SLARFT, SORM2R, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. NB may be at most NBMAX, where NBMAX +* is used to define the local array T. +* + NB = MIN( NBMAX, ILAENV( 1, 'SORMQR', SIDE // TRANS, M, N, K, + $ -1 ) ) + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORMQR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IWS = NW*NB + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SORMQR', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + ELSE + IWS = NW + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL SORM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, + $ IINFO ) + ELSE +* +* Use blocked code +* + IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL SLARFT( 'Forward', 'Columnwise', NQ-I+1, IB, A( I, I ), + $ LDA, TAU( I ), T, LDT ) + IF( LEFT ) THEN +* +* H or H' is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H or H' is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H or H' +* + CALL SLARFB( SIDE, TRANS, 'Forward', 'Columnwise', MI, NI, + $ IB, A( I, I ), LDA, T, LDT, C( IC, JC ), LDC, + $ WORK, LDWORK ) + 10 CONTINUE + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of SORMQR +* + END diff --git a/costa/native/external/lapack/sormr2.f b/costa/native/external/lapack/sormr2.f new file mode 100644 index 000000000..dc9b250e0 --- /dev/null +++ b/costa/native/external/lapack/sormr2.f @@ -0,0 +1,194 @@ + SUBROUTINE SORMR2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SORMR2 overwrites the general real m by n matrix C with +* +* Q * C if SIDE = 'L' and TRANS = 'N', or +* +* Q'* C if SIDE = 'L' and TRANS = 'T', or +* +* C * Q if SIDE = 'R' and TRANS = 'N', or +* +* C * Q' if SIDE = 'R' and TRANS = 'T', +* +* where Q is a real orthogonal matrix defined as the product of k +* elementary reflectors +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by SGERQF. Q is of order m if SIDE = 'L' and of order n +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q' from the Left +* = 'R': apply Q or Q' from the Right +* +* TRANS (input) CHARACTER*1 +* = 'N': apply Q (No transpose) +* = 'T': apply Q' (Transpose) +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* A (input) REAL array, dimension +* (LDA,M) if SIDE = 'L', +* (LDA,N) if SIDE = 'R' +* The i-th row must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* SGERQF in the last k rows of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,K). +* +* TAU (input) REAL array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by SGERQF. +* +* C (input/output) REAL array, dimension (LDC,N) +* On entry, the m by n matrix C. +* On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) REAL array, dimension +* (N) if SIDE = 'L', +* (M) if SIDE = 'R' +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, MI, NI, NQ + REAL AII +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLARF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORMR2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. ( .NOT.LEFT .AND. NOTRAN ) ) + $ THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) is applied to C(1:m-k+i,1:n) +* + MI = M - K + I + ELSE +* +* H(i) is applied to C(1:m,1:n-k+i) +* + NI = N - K + I + END IF +* +* Apply H(i) +* + AII = A( I, NQ-K+I ) + A( I, NQ-K+I ) = ONE + CALL SLARF( SIDE, MI, NI, A( I, 1 ), LDA, TAU( I ), C, LDC, + $ WORK ) + A( I, NQ-K+I ) = AII + 10 CONTINUE + RETURN +* +* End of SORMR2 +* + END diff --git a/costa/native/external/lapack/sormr3.f b/costa/native/external/lapack/sormr3.f new file mode 100644 index 000000000..d17d54540 --- /dev/null +++ b/costa/native/external/lapack/sormr3.f @@ -0,0 +1,207 @@ + SUBROUTINE SORMR3( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, L, LDA, LDC, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SORMR3 overwrites the general real m by n matrix C with +* +* Q * C if SIDE = 'L' and TRANS = 'N', or +* +* Q'* C if SIDE = 'L' and TRANS = 'T', or +* +* C * Q if SIDE = 'R' and TRANS = 'N', or +* +* C * Q' if SIDE = 'R' and TRANS = 'T', +* +* where Q is a real orthogonal matrix defined as the product of k +* elementary reflectors +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by STZRZF. Q is of order m if SIDE = 'L' and of order n +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q' from the Left +* = 'R': apply Q or Q' from the Right +* +* TRANS (input) CHARACTER*1 +* = 'N': apply Q (No transpose) +* = 'T': apply Q' (Transpose) +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* L (input) INTEGER +* The number of columns of the matrix A containing +* the meaningful part of the Householder reflectors. +* If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. +* +* A (input) REAL array, dimension +* (LDA,M) if SIDE = 'L', +* (LDA,N) if SIDE = 'R' +* The i-th row must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* STZRZF in the last k rows of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,K). +* +* TAU (input) REAL array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by STZRZF. +* +* C (input/output) REAL array, dimension (LDC,N) +* On entry, the m-by-n matrix C. +* On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) REAL array, dimension +* (N) if SIDE = 'L', +* (M) if SIDE = 'R' +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, IC, JA, JC, MI, NI, NQ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLARZ, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( L.LT.0 .OR. ( LEFT .AND. ( L.GT.M ) ) .OR. + $ ( .NOT.LEFT .AND. ( L.GT.N ) ) ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORMR3', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. .NOT.NOTRAN .OR. .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + JA = M - L + 1 + JC = 1 + ELSE + MI = M + JA = N - L + 1 + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) or H(i)' is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H(i) or H(i)' is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H(i) or H(i)' +* + CALL SLARZ( SIDE, MI, NI, L, A( I, JA ), LDA, TAU( I ), + $ C( IC, JC ), LDC, WORK ) +* + 10 CONTINUE +* + RETURN +* +* End of SORMR3 +* + END diff --git a/costa/native/external/lapack/sormrq.f b/costa/native/external/lapack/sormrq.f new file mode 100644 index 000000000..cb22b1cbf --- /dev/null +++ b/costa/native/external/lapack/sormrq.f @@ -0,0 +1,265 @@ + SUBROUTINE SORMRQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( LDC, * ), TAU( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* SORMRQ overwrites the general real M-by-N matrix C with +* +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'T': Q**T * C C * Q**T +* +* where Q is a real orthogonal matrix defined as the product of k +* elementary reflectors +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by SGERQF. Q is of order M if SIDE = 'L' and of order N +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q**T from the Left; +* = 'R': apply Q or Q**T from the Right. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q; +* = 'T': Transpose, apply Q**T. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* A (input) REAL array, dimension +* (LDA,M) if SIDE = 'L', +* (LDA,N) if SIDE = 'R' +* The i-th row must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* SGERQF in the last k rows of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,K). +* +* TAU (input) REAL array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by SGERQF. +* +* C (input/output) REAL array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If SIDE = 'L', LWORK >= max(1,N); +* if SIDE = 'R', LWORK >= max(1,M). +* For optimum performance LWORK >= N*NB if SIDE = 'L', and +* LWORK >= M*NB if SIDE = 'R', where NB is the optimal +* blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT + PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + CHARACTER TRANST + INTEGER I, I1, I2, I3, IB, IINFO, IWS, LDWORK, LWKOPT, + $ MI, NB, NBMIN, NI, NQ, NW +* .. +* .. Local Arrays .. + REAL T( LDT, NBMAX ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SLARFB, SLARFT, SORMR2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. NB may be at most NBMAX, where NBMAX +* is used to define the local array T. +* + NB = MIN( NBMAX, ILAENV( 1, 'SORMRQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORMRQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IWS = NW*NB + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SORMRQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + ELSE + IWS = NW + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL SORMR2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, + $ IINFO ) + ELSE +* +* Use blocked code +* + IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL SLARFT( 'Backward', 'Rowwise', NQ-K+I+IB-1, IB, + $ A( I, 1 ), LDA, TAU( I ), T, LDT ) + IF( LEFT ) THEN +* +* H or H' is applied to C(1:m-k+i+ib-1,1:n) +* + MI = M - K + I + IB - 1 + ELSE +* +* H or H' is applied to C(1:m,1:n-k+i+ib-1) +* + NI = N - K + I + IB - 1 + END IF +* +* Apply H or H' +* + CALL SLARFB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI, + $ IB, A( I, 1 ), LDA, T, LDT, C, LDC, WORK, + $ LDWORK ) + 10 CONTINUE + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of SORMRQ +* + END diff --git a/costa/native/external/lapack/sormrz.f b/costa/native/external/lapack/sormrz.f new file mode 100644 index 000000000..e33b24bf0 --- /dev/null +++ b/costa/native/external/lapack/sormrz.f @@ -0,0 +1,289 @@ + SUBROUTINE SORMRZ( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, L, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( LDC, * ), TAU( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* SORMRZ overwrites the general real M-by-N matrix C with +* +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'T': Q**T * C C * Q**T +* +* where Q is a real orthogonal matrix defined as the product of k +* elementary reflectors +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by STZRZF. Q is of order M if SIDE = 'L' and of order N +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q**T from the Left; +* = 'R': apply Q or Q**T from the Right. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q; +* = 'T': Transpose, apply Q**T. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* L (input) INTEGER +* The number of columns of the matrix A containing +* the meaningful part of the Householder reflectors. +* If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. +* +* A (input) REAL array, dimension +* (LDA,M) if SIDE = 'L', +* (LDA,N) if SIDE = 'R' +* The i-th row must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* STZRZF in the last k rows of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,K). +* +* TAU (input) REAL array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by STZRZF. +* +* C (input/output) REAL array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If SIDE = 'L', LWORK >= max(1,N); +* if SIDE = 'R', LWORK >= max(1,M). +* For optimum performance LWORK >= N*NB if SIDE = 'L', and +* LWORK >= M*NB if SIDE = 'R', where NB is the optimal +* blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT + PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + CHARACTER TRANST + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JA, JC, + $ LDWORK, LWKOPT, MI, NB, NBMIN, NI, NQ, NW +* .. +* .. Local Arrays .. + REAL T( LDT, NBMAX ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SLARZB, SLARZT, SORMR3, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( L.LT.0 .OR. ( LEFT .AND. ( L.GT.M ) ) .OR. + $ ( .NOT.LEFT .AND. ( L.GT.N ) ) ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. NB may be at most NBMAX, where NBMAX +* is used to define the local array T. +* + NB = MIN( NBMAX, ILAENV( 1, 'SORMRQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORMRZ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IWS = NW*NB + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SORMRQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + ELSE + IWS = NW + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL SORMR3( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC, + $ WORK, IINFO ) + ELSE +* +* Use blocked code +* + IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + JA = M - L + 1 + ELSE + MI = M + IC = 1 + JA = N - L + 1 + END IF +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL SLARZT( 'Backward', 'Rowwise', L, IB, A( I, JA ), LDA, + $ TAU( I ), T, LDT ) +* + IF( LEFT ) THEN +* +* H or H' is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H or H' is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H or H' +* + CALL SLARZB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI, + $ IB, L, A( I, JA ), LDA, T, LDT, C( IC, JC ), + $ LDC, WORK, LDWORK ) + 10 CONTINUE +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of SORMRZ +* + END diff --git a/costa/native/external/lapack/sormtr.f b/costa/native/external/lapack/sormtr.f new file mode 100644 index 000000000..e3f2156f5 --- /dev/null +++ b/costa/native/external/lapack/sormtr.f @@ -0,0 +1,224 @@ + SUBROUTINE SORMTR( SIDE, UPLO, TRANS, M, N, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS, UPLO + INTEGER INFO, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( LDC, * ), TAU( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* SORMTR overwrites the general real M-by-N matrix C with +* +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'T': Q**T * C C * Q**T +* +* where Q is a real orthogonal matrix of order nq, with nq = m if +* SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of +* nq-1 elementary reflectors, as returned by SSYTRD: +* +* if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); +* +* if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q**T from the Left; +* = 'R': apply Q or Q**T from the Right. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A contains elementary reflectors +* from SSYTRD; +* = 'L': Lower triangle of A contains elementary reflectors +* from SSYTRD. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q; +* = 'T': Transpose, apply Q**T. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* A (input) REAL array, dimension +* (LDA,M) if SIDE = 'L' +* (LDA,N) if SIDE = 'R' +* The vectors which define the elementary reflectors, as +* returned by SSYTRD. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'. +* +* TAU (input) REAL array, dimension +* (M-1) if SIDE = 'L' +* (N-1) if SIDE = 'R' +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by SSYTRD. +* +* C (input/output) REAL array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If SIDE = 'L', LWORK >= max(1,N); +* if SIDE = 'R', LWORK >= max(1,M). +* For optimum performance LWORK >= N*NB if SIDE = 'L', and +* LWORK >= M*NB if SIDE = 'R', where NB is the optimal +* blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, UPPER + INTEGER I1, I2, IINFO, LWKOPT, MI, NI, NB, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL SORMQL, SORMQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.LSAME( TRANS, 'T' ) ) + $ THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( UPPER ) THEN + IF( LEFT ) THEN + NB = ILAENV( 1, 'SORMQL', SIDE // TRANS, M-1, N, M-1, + $ -1 ) + ELSE + NB = ILAENV( 1, 'SORMQL', SIDE // TRANS, M, N-1, N-1, + $ -1 ) + END IF + ELSE + IF( LEFT ) THEN + NB = ILAENV( 1, 'SORMQR', SIDE // TRANS, M-1, N, M-1, + $ -1 ) + ELSE + NB = ILAENV( 1, 'SORMQR', SIDE // TRANS, M, N-1, N-1, + $ -1 ) + END IF + END IF + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORMTR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. NQ.EQ.1 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( LEFT ) THEN + MI = M - 1 + NI = N + ELSE + MI = M + NI = N - 1 + END IF +* + IF( UPPER ) THEN +* +* Q was determined by a call to SSYTRD with UPLO = 'U' +* + CALL SORMQL( SIDE, TRANS, MI, NI, NQ-1, A( 1, 2 ), LDA, TAU, C, + $ LDC, WORK, LWORK, IINFO ) + ELSE +* +* Q was determined by a call to SSYTRD with UPLO = 'L' +* + IF( LEFT ) THEN + I1 = 2 + I2 = 1 + ELSE + I1 = 1 + I2 = 2 + END IF + CALL SORMQR( SIDE, TRANS, MI, NI, NQ-1, A( 2, 1 ), LDA, TAU, + $ C( I1, I2 ), LDC, WORK, LWORK, IINFO ) + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of SORMTR +* + END diff --git a/costa/native/external/lapack/spbcon.f b/costa/native/external/lapack/spbcon.f new file mode 100644 index 000000000..a7e8fc936 --- /dev/null +++ b/costa/native/external/lapack/spbcon.f @@ -0,0 +1,188 @@ + SUBROUTINE SPBCON( UPLO, N, KD, AB, LDAB, ANORM, RCOND, WORK, + $ IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL AB( LDAB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SPBCON estimates the reciprocal of the condition number (in the +* 1-norm) of a real symmetric positive definite band matrix using the +* Cholesky factorization A = U**T*U or A = L*L**T computed by SPBTRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangular factor stored in AB; +* = 'L': Lower triangular factor stored in AB. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* AB (input) REAL array, dimension (LDAB,N) +* The triangular factor U or L from the Cholesky factorization +* A = U**T*U or A = L*L**T of the band matrix A, stored in the +* first KD+1 rows of the array. The j-th column of U or L is +* stored in the j-th column of the array AB as follows: +* if UPLO ='U', AB(kd+1+i-j,j) = U(i,j) for max(1,j-kd)<=i<=j; +* if UPLO ='L', AB(1+i-j,j) = L(i,j) for j<=i<=min(n,j+kd). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* ANORM (input) REAL +* The 1-norm (or infinity-norm) of the symmetric band matrix A. +* +* RCOND (output) REAL +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +* estimate of the 1-norm of inv(A) computed in this routine. +* +* WORK (workspace) REAL array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + CHARACTER NORMIN + INTEGER IX, KASE + REAL AINVNM, SCALE, SCALEL, SCALEU, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SLAMCH + EXTERNAL LSAME, ISAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SLACON, SLATBS, SRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPBCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = SLAMCH( 'Safe minimum' ) +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + NORMIN = 'N' + 10 CONTINUE + CALL SLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN + IF( UPPER ) THEN +* +* Multiply by inv(U'). +* + CALL SLATBS( 'Upper', 'Transpose', 'Non-unit', NORMIN, N, + $ KD, AB, LDAB, WORK, SCALEL, WORK( 2*N+1 ), + $ INFO ) + NORMIN = 'Y' +* +* Multiply by inv(U). +* + CALL SLATBS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ KD, AB, LDAB, WORK, SCALEU, WORK( 2*N+1 ), + $ INFO ) + ELSE +* +* Multiply by inv(L). +* + CALL SLATBS( 'Lower', 'No transpose', 'Non-unit', NORMIN, N, + $ KD, AB, LDAB, WORK, SCALEL, WORK( 2*N+1 ), + $ INFO ) + NORMIN = 'Y' +* +* Multiply by inv(L'). +* + CALL SLATBS( 'Lower', 'Transpose', 'Non-unit', NORMIN, N, + $ KD, AB, LDAB, WORK, SCALEU, WORK( 2*N+1 ), + $ INFO ) + END IF +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + SCALE = SCALEL*SCALEU + IF( SCALE.NE.ONE ) THEN + IX = ISAMAX( N, WORK, 1 ) + IF( SCALE.LT.ABS( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL SRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE +* + RETURN +* +* End of SPBCON +* + END diff --git a/costa/native/external/lapack/spbequ.f b/costa/native/external/lapack/spbequ.f new file mode 100644 index 000000000..474d9b9c0 --- /dev/null +++ b/costa/native/external/lapack/spbequ.f @@ -0,0 +1,167 @@ + SUBROUTINE SPBEQU( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N + REAL AMAX, SCOND +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ), S( * ) +* .. +* +* Purpose +* ======= +* +* SPBEQU computes row and column scalings intended to equilibrate a +* symmetric positive definite band matrix A and reduce its condition +* number (with respect to the two-norm). S contains the scale factors, +* S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with +* elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This +* choice of S puts the condition number of B within a factor N of the +* smallest possible condition number over all possible diagonal +* scalings. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangular of A is stored; +* = 'L': Lower triangular of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* AB (input) REAL array, dimension (LDAB,N) +* The upper or lower triangle of the symmetric band matrix A, +* stored in the first KD+1 rows of the array. The j-th column +* of A is stored in the j-th column of the array AB as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* LDAB (input) INTEGER +* The leading dimension of the array A. LDAB >= KD+1. +* +* S (output) REAL array, dimension (N) +* If INFO = 0, S contains the scale factors for A. +* +* SCOND (output) REAL +* If INFO = 0, S contains the ratio of the smallest S(i) to +* the largest S(i). If SCOND >= 0.1 and AMAX is neither too +* large nor too small, it is not worth scaling by S. +* +* AMAX (output) REAL +* Absolute value of largest matrix element. If AMAX is very +* close to overflow or very close to underflow, the matrix +* should be scaled. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, the i-th diagonal element is nonpositive. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, J + REAL SMIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPBEQU', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SCOND = ONE + AMAX = ZERO + RETURN + END IF +* + IF( UPPER ) THEN + J = KD + 1 + ELSE + J = 1 + END IF +* +* Initialize SMIN and AMAX. +* + S( 1 ) = AB( J, 1 ) + SMIN = S( 1 ) + AMAX = S( 1 ) +* +* Find the minimum and maximum diagonal elements. +* + DO 10 I = 2, N + S( I ) = AB( J, I ) + SMIN = MIN( SMIN, S( I ) ) + AMAX = MAX( AMAX, S( I ) ) + 10 CONTINUE +* + IF( SMIN.LE.ZERO ) THEN +* +* Find the first non-positive diagonal element and return. +* + DO 20 I = 1, N + IF( S( I ).LE.ZERO ) THEN + INFO = I + RETURN + END IF + 20 CONTINUE + ELSE +* +* Set the scale factors to the reciprocals +* of the diagonal elements. +* + DO 30 I = 1, N + S( I ) = ONE / SQRT( S( I ) ) + 30 CONTINUE +* +* Compute SCOND = min(S(I)) / max(S(I)) +* + SCOND = SQRT( SMIN ) / SQRT( AMAX ) + END IF + RETURN +* +* End of SPBEQU +* + END diff --git a/costa/native/external/lapack/spbrfs.f b/costa/native/external/lapack/spbrfs.f new file mode 100644 index 000000000..aafa620ef --- /dev/null +++ b/costa/native/external/lapack/spbrfs.f @@ -0,0 +1,337 @@ + SUBROUTINE SPBRFS( UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB, B, + $ LDB, X, LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, LDAFB, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* SPBRFS improves the computed solution to a system of linear +* equations when the coefficient matrix is symmetric positive definite +* and banded, and provides error bounds and backward error estimates +* for the solution. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AB (input) REAL array, dimension (LDAB,N) +* The upper or lower triangle of the symmetric band matrix A, +* stored in the first KD+1 rows of the array. The j-th column +* of A is stored in the j-th column of the array AB as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* AFB (input) REAL array, dimension (LDAFB,N) +* The triangular factor U or L from the Cholesky factorization +* A = U**T*U or A = L*L**T of the band matrix A as computed by +* SPBTRF, in the same storage format as A (see AB). +* +* LDAFB (input) INTEGER +* The leading dimension of the array AFB. LDAFB >= KD+1. +* +* B (input) REAL array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input/output) REAL array, dimension (LDX,NRHS) +* On entry, the solution matrix X, as computed by SPBTRS. +* On exit, the improved solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) REAL array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Internal Parameters +* =================== +* +* ITMAX is the maximum number of steps of iterative refinement. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) + REAL THREE + PARAMETER ( THREE = 3.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, J, K, KASE, L, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SLACON, SPBTRS, SSBMV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDAFB.LT.KD+1 ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPBRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = MIN( N+1, 2*KD+2 ) + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL SCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 ) + CALL SSBMV( UPLO, N, KD, -ONE, AB, LDAB, X( 1, J ), 1, ONE, + $ WORK( N+1 ), 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + WORK( I ) = ABS( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = ABS( X( K, J ) ) + L = KD + 1 - K + DO 40 I = MAX( 1, K-KD ), K - 1 + WORK( I ) = WORK( I ) + ABS( AB( L+I, K ) )*XK + S = S + ABS( AB( L+I, K ) )*ABS( X( I, J ) ) + 40 CONTINUE + WORK( K ) = WORK( K ) + ABS( AB( KD+1, K ) )*XK + S + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = ABS( X( K, J ) ) + WORK( K ) = WORK( K ) + ABS( AB( 1, K ) )*XK + L = 1 - K + DO 60 I = K + 1, MIN( N, K+KD ) + WORK( I ) = WORK( I ) + ABS( AB( L+I, K ) )*XK + S = S + ABS( AB( L+I, K ) )*ABS( X( I, J ) ) + 60 CONTINUE + WORK( K ) = WORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL SPBTRS( UPLO, N, KD, 1, AFB, LDAFB, WORK( N+1 ), N, + $ INFO ) + CALL SAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use SLACON to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL SLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A'). +* + CALL SPBTRS( UPLO, N, KD, 1, AFB, LDAFB, WORK( N+1 ), N, + $ INFO ) + DO 110 I = 1, N + WORK( N+I ) = WORK( N+I )*WORK( I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( N+I ) = WORK( N+I )*WORK( I ) + 120 CONTINUE + CALL SPBTRS( UPLO, N, KD, 1, AFB, LDAFB, WORK( N+1 ), N, + $ INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of SPBRFS +* + END diff --git a/costa/native/external/lapack/spbstf.f b/costa/native/external/lapack/spbstf.f new file mode 100644 index 000000000..3cd7a68a7 --- /dev/null +++ b/costa/native/external/lapack/spbstf.f @@ -0,0 +1,251 @@ + SUBROUTINE SPBSTF( UPLO, N, KD, AB, LDAB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* SPBSTF computes a split Cholesky factorization of a real +* symmetric positive definite band matrix A. +* +* This routine is designed to be used in conjunction with SSBGST. +* +* The factorization has the form A = S**T*S where S is a band matrix +* of the same bandwidth as A and the following structure: +* +* S = ( U ) +* ( M L ) +* +* where U is upper triangular of order m = (n+kd)/2, and L is lower +* triangular of order n-m. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* AB (input/output) REAL array, dimension (LDAB,N) +* On entry, the upper or lower triangle of the symmetric band +* matrix A, stored in the first kd+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* On exit, if INFO = 0, the factor S from the split Cholesky +* factorization A = S**T*S. See Further Details. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the factorization could not be completed, +* because the updated element a(i,i) was negative; the +* matrix A is not positive definite. +* +* Further Details +* =============== +* +* The band storage scheme is illustrated by the following example, when +* N = 7, KD = 2: +* +* S = ( s11 s12 s13 ) +* ( s22 s23 s24 ) +* ( s33 s34 ) +* ( s44 ) +* ( s53 s54 s55 ) +* ( s64 s65 s66 ) +* ( s75 s76 s77 ) +* +* If UPLO = 'U', the array AB holds: +* +* on entry: on exit: +* +* * * a13 a24 a35 a46 a57 * * s13 s24 s53 s64 s75 +* * a12 a23 a34 a45 a56 a67 * s12 s23 s34 s54 s65 s76 +* a11 a22 a33 a44 a55 a66 a77 s11 s22 s33 s44 s55 s66 s77 +* +* If UPLO = 'L', the array AB holds: +* +* on entry: on exit: +* +* a11 a22 a33 a44 a55 a66 a77 s11 s22 s33 s44 s55 s66 s77 +* a21 a32 a43 a54 a65 a76 * s12 s23 s34 s54 s65 s76 * +* a31 a42 a53 a64 a64 * * s13 s24 s53 s64 s75 * * +* +* Array elements marked * are not used by the routine. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, KLD, KM, M + REAL AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, SSYR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPBSTF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + KLD = MAX( 1, LDAB-1 ) +* +* Set the splitting point m. +* + M = ( N+KD ) / 2 +* + IF( UPPER ) THEN +* +* Factorize A(m+1:n,m+1:n) as L**T*L, and update A(1:m,1:m). +* + DO 10 J = N, M + 1, -1 +* +* Compute s(j,j) and test for non-positive-definiteness. +* + AJJ = AB( KD+1, J ) + IF( AJJ.LE.ZERO ) + $ GO TO 50 + AJJ = SQRT( AJJ ) + AB( KD+1, J ) = AJJ + KM = MIN( J-1, KD ) +* +* Compute elements j-km:j-1 of the j-th column and update the +* the leading submatrix within the band. +* + CALL SSCAL( KM, ONE / AJJ, AB( KD+1-KM, J ), 1 ) + CALL SSYR( 'Upper', KM, -ONE, AB( KD+1-KM, J ), 1, + $ AB( KD+1, J-KM ), KLD ) + 10 CONTINUE +* +* Factorize the updated submatrix A(1:m,1:m) as U**T*U. +* + DO 20 J = 1, M +* +* Compute s(j,j) and test for non-positive-definiteness. +* + AJJ = AB( KD+1, J ) + IF( AJJ.LE.ZERO ) + $ GO TO 50 + AJJ = SQRT( AJJ ) + AB( KD+1, J ) = AJJ + KM = MIN( KD, M-J ) +* +* Compute elements j+1:j+km of the j-th row and update the +* trailing submatrix within the band. +* + IF( KM.GT.0 ) THEN + CALL SSCAL( KM, ONE / AJJ, AB( KD, J+1 ), KLD ) + CALL SSYR( 'Upper', KM, -ONE, AB( KD, J+1 ), KLD, + $ AB( KD+1, J+1 ), KLD ) + END IF + 20 CONTINUE + ELSE +* +* Factorize A(m+1:n,m+1:n) as L**T*L, and update A(1:m,1:m). +* + DO 30 J = N, M + 1, -1 +* +* Compute s(j,j) and test for non-positive-definiteness. +* + AJJ = AB( 1, J ) + IF( AJJ.LE.ZERO ) + $ GO TO 50 + AJJ = SQRT( AJJ ) + AB( 1, J ) = AJJ + KM = MIN( J-1, KD ) +* +* Compute elements j-km:j-1 of the j-th row and update the +* trailing submatrix within the band. +* + CALL SSCAL( KM, ONE / AJJ, AB( KM+1, J-KM ), KLD ) + CALL SSYR( 'Lower', KM, -ONE, AB( KM+1, J-KM ), KLD, + $ AB( 1, J-KM ), KLD ) + 30 CONTINUE +* +* Factorize the updated submatrix A(1:m,1:m) as U**T*U. +* + DO 40 J = 1, M +* +* Compute s(j,j) and test for non-positive-definiteness. +* + AJJ = AB( 1, J ) + IF( AJJ.LE.ZERO ) + $ GO TO 50 + AJJ = SQRT( AJJ ) + AB( 1, J ) = AJJ + KM = MIN( KD, M-J ) +* +* Compute elements j+1:j+km of the j-th column and update the +* trailing submatrix within the band. +* + IF( KM.GT.0 ) THEN + CALL SSCAL( KM, ONE / AJJ, AB( 2, J ), 1 ) + CALL SSYR( 'Lower', KM, -ONE, AB( 2, J ), 1, + $ AB( 1, J+1 ), KLD ) + END IF + 40 CONTINUE + END IF + RETURN +* + 50 CONTINUE + INFO = J + RETURN +* +* End of SPBSTF +* + END diff --git a/costa/native/external/lapack/spbsv.f b/costa/native/external/lapack/spbsv.f new file mode 100644 index 000000000..ee08b73f3 --- /dev/null +++ b/costa/native/external/lapack/spbsv.f @@ -0,0 +1,152 @@ + SUBROUTINE SPBSV( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* SPBSV computes the solution to a real system of linear equations +* A * X = B, +* where A is an N-by-N symmetric positive definite band matrix and X +* and B are N-by-NRHS matrices. +* +* The Cholesky decomposition is used to factor A as +* A = U**T * U, if UPLO = 'U', or +* A = L * L**T, if UPLO = 'L', +* where U is an upper triangular band matrix, and L is a lower +* triangular band matrix, with the same number of superdiagonals or +* subdiagonals as A. The factored form of A is then used to solve the +* system of equations A * X = B. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AB (input/output) REAL array, dimension (LDAB,N) +* On entry, the upper or lower triangle of the symmetric band +* matrix A, stored in the first KD+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(KD+1+i-j,j) = A(i,j) for max(1,j-KD)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(N,j+KD). +* See below for further details. +* +* On exit, if INFO = 0, the triangular factor U or L from the +* Cholesky factorization A = U**T*U or A = L*L**T of the band +* matrix A, in the same storage format as A. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the leading minor of order i of A is not +* positive definite, so the factorization could not be +* completed, and the solution has not been computed. +* +* Further Details +* =============== +* +* The band storage scheme is illustrated by the following example, when +* N = 6, KD = 2, and UPLO = 'U': +* +* On entry: On exit: +* +* * * a13 a24 a35 a46 * * u13 u24 u35 u46 +* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +* +* Similarly, if UPLO = 'L' the format of A is as follows: +* +* On entry: On exit: +* +* a11 a22 a33 a44 a55 a66 l11 l22 l33 l44 l55 l66 +* a21 a32 a43 a54 a65 * l21 l32 l43 l54 l65 * +* a31 a42 a53 a64 * * l31 l42 l53 l64 * * +* +* Array elements marked * are not used by the routine. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SPBTRF, SPBTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPBSV ', -INFO ) + RETURN + END IF +* +* Compute the Cholesky factorization A = U'*U or A = L*L'. +* + CALL SPBTRF( UPLO, N, KD, AB, LDAB, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL SPBTRS( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO ) +* + END IF + RETURN +* +* End of SPBSV +* + END diff --git a/costa/native/external/lapack/spbsvx.f b/costa/native/external/lapack/spbsvx.f new file mode 100644 index 000000000..8b75d8dad --- /dev/null +++ b/costa/native/external/lapack/spbsvx.f @@ -0,0 +1,424 @@ + SUBROUTINE SPBSVX( FACT, UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB, + $ EQUED, S, B, LDB, X, LDX, RCOND, FERR, BERR, + $ WORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, UPLO + INTEGER INFO, KD, LDAB, LDAFB, LDB, LDX, N, NRHS + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ BERR( * ), FERR( * ), S( * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* SPBSVX uses the Cholesky factorization A = U**T*U or A = L*L**T to +* compute the solution to a real system of linear equations +* A * X = B, +* where A is an N-by-N symmetric positive definite band matrix and X +* and B are N-by-NRHS matrices. +* +* Error bounds on the solution and a condition estimate are also +* provided. +* +* Description +* =========== +* +* The following steps are performed: +* +* 1. If FACT = 'E', real scaling factors are computed to equilibrate +* the system: +* diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B +* Whether or not the system will be equilibrated depends on the +* scaling of the matrix A, but if equilibration is used, A is +* overwritten by diag(S)*A*diag(S) and B by diag(S)*B. +* +* 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to +* factor the matrix A (after equilibration if FACT = 'E') as +* A = U**T * U, if UPLO = 'U', or +* A = L * L**T, if UPLO = 'L', +* where U is an upper triangular band matrix, and L is a lower +* triangular band matrix. +* +* 3. If the leading i-by-i principal minor is not positive definite, +* then the routine returns with INFO = i. Otherwise, the factored +* form of A is used to estimate the condition number of the matrix +* A. If the reciprocal of the condition number is less than machine +* precision, INFO = N+1 is returned as a warning, but the routine +* still goes on to solve for X and compute error bounds as +* described below. +* +* 4. The system of equations is solved for X using the factored form +* of A. +* +* 5. Iterative refinement is applied to improve the computed solution +* matrix and calculate error bounds and backward error estimates +* for it. +* +* 6. If equilibration was used, the matrix X is premultiplied by +* diag(S) so that it solves the original system before +* equilibration. +* +* Arguments +* ========= +* +* FACT (input) CHARACTER*1 +* Specifies whether or not the factored form of the matrix A is +* supplied on entry, and if not, whether the matrix A should be +* equilibrated before it is factored. +* = 'F': On entry, AFB contains the factored form of A. +* If EQUED = 'Y', the matrix A has been equilibrated +* with scaling factors given by S. AB and AFB will not +* be modified. +* = 'N': The matrix A will be copied to AFB and factored. +* = 'E': The matrix A will be equilibrated if necessary, then +* copied to AFB and factored. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* NRHS (input) INTEGER +* The number of right-hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AB (input/output) REAL array, dimension (LDAB,N) +* On entry, the upper or lower triangle of the symmetric band +* matrix A, stored in the first KD+1 rows of the array, except +* if FACT = 'F' and EQUED = 'Y', then A must contain the +* equilibrated matrix diag(S)*A*diag(S). The j-th column of A +* is stored in the j-th column of the array AB as follows: +* if UPLO = 'U', AB(KD+1+i-j,j) = A(i,j) for max(1,j-KD)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(N,j+KD). +* See below for further details. +* +* On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by +* diag(S)*A*diag(S). +* +* LDAB (input) INTEGER +* The leading dimension of the array A. LDAB >= KD+1. +* +* AFB (input or output) REAL array, dimension (LDAFB,N) +* If FACT = 'F', then AFB is an input argument and on entry +* contains the triangular factor U or L from the Cholesky +* factorization A = U**T*U or A = L*L**T of the band matrix +* A, in the same storage format as A (see AB). If EQUED = 'Y', +* then AFB is the factored form of the equilibrated matrix A. +* +* If FACT = 'N', then AFB is an output argument and on exit +* returns the triangular factor U or L from the Cholesky +* factorization A = U**T*U or A = L*L**T. +* +* If FACT = 'E', then AFB is an output argument and on exit +* returns the triangular factor U or L from the Cholesky +* factorization A = U**T*U or A = L*L**T of the equilibrated +* matrix A (see the description of A for the form of the +* equilibrated matrix). +* +* LDAFB (input) INTEGER +* The leading dimension of the array AFB. LDAFB >= KD+1. +* +* EQUED (input or output) CHARACTER*1 +* Specifies the form of equilibration that was done. +* = 'N': No equilibration (always true if FACT = 'N'). +* = 'Y': Equilibration was done, i.e., A has been replaced by +* diag(S) * A * diag(S). +* EQUED is an input argument if FACT = 'F'; otherwise, it is an +* output argument. +* +* S (input or output) REAL array, dimension (N) +* The scale factors for A; not accessed if EQUED = 'N'. S is +* an input argument if FACT = 'F'; otherwise, S is an output +* argument. If FACT = 'F' and EQUED = 'Y', each element of S +* must be positive. +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y', +* B is overwritten by diag(S) * B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (output) REAL array, dimension (LDX,NRHS) +* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to +* the original system of equations. Note that if EQUED = 'Y', +* A and B are modified on exit, and the solution to the +* equilibrated system is inv(diag(S))*X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* RCOND (output) REAL +* The estimate of the reciprocal condition number of the matrix +* A after equilibration (if done). If RCOND is less than the +* machine precision (in particular, if RCOND = 0), the matrix +* is singular to working precision. This condition is +* indicated by a return code of INFO > 0. +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) REAL array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is +* <= N: the leading minor of order i of A is +* not positive definite, so the factorization +* could not be completed, and the solution has not +* been computed. RCOND = 0 is returned. +* = N+1: U is nonsingular, but RCOND is less than machine +* precision, meaning that the matrix is singular +* to working precision. Nevertheless, the +* solution and error bounds are computed because +* there are a number of situations where the +* computed solution can be more accurate than the +* value of RCOND would suggest. +* +* Further Details +* =============== +* +* The band storage scheme is illustrated by the following example, when +* N = 6, KD = 2, and UPLO = 'U': +* +* Two-dimensional storage of the symmetric matrix A: +* +* a11 a12 a13 +* a22 a23 a24 +* a33 a34 a35 +* a44 a45 a46 +* a55 a56 +* (aij=conjg(aji)) a66 +* +* Band storage of the upper triangle of A: +* +* * * a13 a24 a35 a46 +* * a12 a23 a34 a45 a56 +* a11 a22 a33 a44 a55 a66 +* +* Similarly, if UPLO = 'L' the format of A is as follows: +* +* a11 a22 a33 a44 a55 a66 +* a21 a32 a43 a54 a65 * +* a31 a42 a53 a64 * * +* +* Array elements marked * are not used by the routine. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL EQUIL, NOFACT, RCEQU, UPPER + INTEGER I, INFEQU, J, J1, J2 + REAL AMAX, ANORM, BIGNUM, SCOND, SMAX, SMIN, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANSB + EXTERNAL LSAME, SLAMCH, SLANSB +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLACPY, SLAQSB, SPBCON, SPBEQU, SPBRFS, + $ SPBTRF, SPBTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + UPPER = LSAME( UPLO, 'U' ) + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + RCEQU = .FALSE. + ELSE + RCEQU = LSAME( EQUED, 'Y' ) + SMLNUM = SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + END IF +* +* Test the input parameters. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) ) + $ THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -7 + ELSE IF( LDAFB.LT.KD+1 ) THEN + INFO = -9 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -10 + ELSE + IF( RCEQU ) THEN + SMIN = BIGNUM + SMAX = ZERO + DO 10 J = 1, N + SMIN = MIN( SMIN, S( J ) ) + SMAX = MAX( SMAX, S( J ) ) + 10 CONTINUE + IF( SMIN.LE.ZERO ) THEN + INFO = -11 + ELSE IF( N.GT.0 ) THEN + SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) + ELSE + SCOND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -15 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPBSVX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL SPBEQU( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL SLAQSB( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, EQUED ) + RCEQU = LSAME( EQUED, 'Y' ) + END IF + END IF +* +* Scale the right-hand side. +* + IF( RCEQU ) THEN + DO 30 J = 1, NRHS + DO 20 I = 1, N + B( I, J ) = S( I )*B( I, J ) + 20 CONTINUE + 30 CONTINUE + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the Cholesky factorization A = U'*U or A = L*L'. +* + IF( UPPER ) THEN + DO 40 J = 1, N + J1 = MAX( J-KD, 1 ) + CALL SCOPY( J-J1+1, AB( KD+1-J+J1, J ), 1, + $ AFB( KD+1-J+J1, J ), 1 ) + 40 CONTINUE + ELSE + DO 50 J = 1, N + J2 = MIN( J+KD, N ) + CALL SCOPY( J2-J+1, AB( 1, J ), 1, AFB( 1, J ), 1 ) + 50 CONTINUE + END IF +* + CALL SPBTRF( UPLO, N, KD, AFB, LDAFB, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN + IF( INFO.GT.0 ) + $ RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = SLANSB( '1', UPLO, N, KD, AB, LDAB, WORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL SPBCON( UPLO, N, KD, AFB, LDAFB, ANORM, RCOND, WORK, IWORK, + $ INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* +* Compute the solution matrix X. +* + CALL SLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL SPBTRS( UPLO, N, KD, NRHS, AFB, LDAFB, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL SPBRFS( UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB, B, LDB, X, + $ LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* Transform the solution matrix X to a solution of the original +* system. +* + IF( RCEQU ) THEN + DO 70 J = 1, NRHS + DO 60 I = 1, N + X( I, J ) = S( I )*X( I, J ) + 60 CONTINUE + 70 CONTINUE + DO 80 J = 1, NRHS + FERR( J ) = FERR( J ) / SCOND + 80 CONTINUE + END IF +* + RETURN +* +* End of SPBSVX +* + END diff --git a/costa/native/external/lapack/spbtf2.f b/costa/native/external/lapack/spbtf2.f new file mode 100644 index 000000000..ba5e389b0 --- /dev/null +++ b/costa/native/external/lapack/spbtf2.f @@ -0,0 +1,195 @@ + SUBROUTINE SPBTF2( UPLO, N, KD, AB, LDAB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* SPBTF2 computes the Cholesky factorization of a real symmetric +* positive definite band matrix A. +* +* The factorization has the form +* A = U' * U , if UPLO = 'U', or +* A = L * L', if UPLO = 'L', +* where U is an upper triangular matrix, U' is the transpose of U, and +* L is lower triangular. +* +* This is the unblocked version of the algorithm, calling Level 2 BLAS. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is stored: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of super-diagonals of the matrix A if UPLO = 'U', +* or the number of sub-diagonals if UPLO = 'L'. KD >= 0. +* +* AB (input/output) REAL array, dimension (LDAB,N) +* On entry, the upper or lower triangle of the symmetric band +* matrix A, stored in the first KD+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* On exit, if INFO = 0, the triangular factor U or L from the +* Cholesky factorization A = U'*U or A = L*L' of the band +* matrix A, in the same storage format as A. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* > 0: if INFO = k, the leading minor of order k is not +* positive definite, and the factorization could not be +* completed. +* +* Further Details +* =============== +* +* The band storage scheme is illustrated by the following example, when +* N = 6, KD = 2, and UPLO = 'U': +* +* On entry: On exit: +* +* * * a13 a24 a35 a46 * * u13 u24 u35 u46 +* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +* +* Similarly, if UPLO = 'L' the format of A is as follows: +* +* On entry: On exit: +* +* a11 a22 a33 a44 a55 a66 l11 l22 l33 l44 l55 l66 +* a21 a32 a43 a54 a65 * l21 l32 l43 l54 l65 * +* a31 a42 a53 a64 * * l31 l42 l53 l64 * * +* +* Array elements marked * are not used by the routine. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, KLD, KN + REAL AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, SSYR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPBTF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + KLD = MAX( 1, LDAB-1 ) +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U'*U. +* + DO 10 J = 1, N +* +* Compute U(J,J) and test for non-positive-definiteness. +* + AJJ = AB( KD+1, J ) + IF( AJJ.LE.ZERO ) + $ GO TO 30 + AJJ = SQRT( AJJ ) + AB( KD+1, J ) = AJJ +* +* Compute elements J+1:J+KN of row J and update the +* trailing submatrix within the band. +* + KN = MIN( KD, N-J ) + IF( KN.GT.0 ) THEN + CALL SSCAL( KN, ONE / AJJ, AB( KD, J+1 ), KLD ) + CALL SSYR( 'Upper', KN, -ONE, AB( KD, J+1 ), KLD, + $ AB( KD+1, J+1 ), KLD ) + END IF + 10 CONTINUE + ELSE +* +* Compute the Cholesky factorization A = L*L'. +* + DO 20 J = 1, N +* +* Compute L(J,J) and test for non-positive-definiteness. +* + AJJ = AB( 1, J ) + IF( AJJ.LE.ZERO ) + $ GO TO 30 + AJJ = SQRT( AJJ ) + AB( 1, J ) = AJJ +* +* Compute elements J+1:J+KN of column J and update the +* trailing submatrix within the band. +* + KN = MIN( KD, N-J ) + IF( KN.GT.0 ) THEN + CALL SSCAL( KN, ONE / AJJ, AB( 2, J ), 1 ) + CALL SSYR( 'Lower', KN, -ONE, AB( 2, J ), 1, + $ AB( 1, J+1 ), KLD ) + END IF + 20 CONTINUE + END IF + RETURN +* + 30 CONTINUE + INFO = J + RETURN +* +* End of SPBTF2 +* + END diff --git a/costa/native/external/lapack/spbtrf.f b/costa/native/external/lapack/spbtrf.f new file mode 100644 index 000000000..584b09f07 --- /dev/null +++ b/costa/native/external/lapack/spbtrf.f @@ -0,0 +1,365 @@ + SUBROUTINE SPBTRF( UPLO, N, KD, AB, LDAB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* SPBTRF computes the Cholesky factorization of a real symmetric +* positive definite band matrix A. +* +* The factorization has the form +* A = U**T * U, if UPLO = 'U', or +* A = L * L**T, if UPLO = 'L', +* where U is an upper triangular matrix and L is lower triangular. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* AB (input/output) REAL array, dimension (LDAB,N) +* On entry, the upper or lower triangle of the symmetric band +* matrix A, stored in the first KD+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* On exit, if INFO = 0, the triangular factor U or L from the +* Cholesky factorization A = U**T*U or A = L*L**T of the band +* matrix A, in the same storage format as A. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the leading minor of order i is not +* positive definite, and the factorization could not be +* completed. +* +* Further Details +* =============== +* +* The band storage scheme is illustrated by the following example, when +* N = 6, KD = 2, and UPLO = 'U': +* +* On entry: On exit: +* +* * * a13 a24 a35 a46 * * u13 u24 u35 u46 +* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +* +* Similarly, if UPLO = 'L' the format of A is as follows: +* +* On entry: On exit: +* +* a11 a22 a33 a44 a55 a66 l11 l22 l33 l44 l55 l66 +* a21 a32 a43 a54 a65 * l21 l32 l43 l54 l65 * +* a31 a42 a53 a64 * * l31 l42 l53 l64 * * +* +* Array elements marked * are not used by the routine. +* +* Contributed by +* Peter Mayes and Giuseppe Radicati, IBM ECSEC, Rome, March 23, 1989 +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) + INTEGER NBMAX, LDWORK + PARAMETER ( NBMAX = 32, LDWORK = NBMAX+1 ) +* .. +* .. Local Scalars .. + INTEGER I, I2, I3, IB, II, J, JJ, NB +* .. +* .. Local Arrays .. + REAL WORK( LDWORK, NBMAX ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, SPBTF2, SPOTF2, SSYRK, STRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( ( .NOT.LSAME( UPLO, 'U' ) ) .AND. + $ ( .NOT.LSAME( UPLO, 'L' ) ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPBTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment +* + NB = ILAENV( 1, 'SPBTRF', UPLO, N, KD, -1, -1 ) +* +* The block size must not exceed the semi-bandwidth KD, and must not +* exceed the limit set by the size of the local array WORK. +* + NB = MIN( NB, NBMAX ) +* + IF( NB.LE.1 .OR. NB.GT.KD ) THEN +* +* Use unblocked code +* + CALL SPBTF2( UPLO, N, KD, AB, LDAB, INFO ) + ELSE +* +* Use blocked code +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Compute the Cholesky factorization of a symmetric band +* matrix, given the upper triangle of the matrix in band +* storage. +* +* Zero the upper triangle of the work array. +* + DO 20 J = 1, NB + DO 10 I = 1, J - 1 + WORK( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* Process the band matrix one diagonal block at a time. +* + DO 70 I = 1, N, NB + IB = MIN( NB, N-I+1 ) +* +* Factorize the diagonal block +* + CALL SPOTF2( UPLO, IB, AB( KD+1, I ), LDAB-1, II ) + IF( II.NE.0 ) THEN + INFO = I + II - 1 + GO TO 150 + END IF + IF( I+IB.LE.N ) THEN +* +* Update the relevant part of the trailing submatrix. +* If A11 denotes the diagonal block which has just been +* factorized, then we need to update the remaining +* blocks in the diagram: +* +* A11 A12 A13 +* A22 A23 +* A33 +* +* The numbers of rows and columns in the partitioning +* are IB, I2, I3 respectively. The blocks A12, A22 and +* A23 are empty if IB = KD. The upper triangle of A13 +* lies outside the band. +* + I2 = MIN( KD-IB, N-I-IB+1 ) + I3 = MIN( IB, N-I-KD+1 ) +* + IF( I2.GT.0 ) THEN +* +* Update A12 +* + CALL STRSM( 'Left', 'Upper', 'Transpose', + $ 'Non-unit', IB, I2, ONE, AB( KD+1, I ), + $ LDAB-1, AB( KD+1-IB, I+IB ), LDAB-1 ) +* +* Update A22 +* + CALL SSYRK( 'Upper', 'Transpose', I2, IB, -ONE, + $ AB( KD+1-IB, I+IB ), LDAB-1, ONE, + $ AB( KD+1, I+IB ), LDAB-1 ) + END IF +* + IF( I3.GT.0 ) THEN +* +* Copy the lower triangle of A13 into the work array. +* + DO 40 JJ = 1, I3 + DO 30 II = JJ, IB + WORK( II, JJ ) = AB( II-JJ+1, JJ+I+KD-1 ) + 30 CONTINUE + 40 CONTINUE +* +* Update A13 (in the work array). +* + CALL STRSM( 'Left', 'Upper', 'Transpose', + $ 'Non-unit', IB, I3, ONE, AB( KD+1, I ), + $ LDAB-1, WORK, LDWORK ) +* +* Update A23 +* + IF( I2.GT.0 ) + $ CALL SGEMM( 'Transpose', 'No Transpose', I2, I3, + $ IB, -ONE, AB( KD+1-IB, I+IB ), + $ LDAB-1, WORK, LDWORK, ONE, + $ AB( 1+IB, I+KD ), LDAB-1 ) +* +* Update A33 +* + CALL SSYRK( 'Upper', 'Transpose', I3, IB, -ONE, + $ WORK, LDWORK, ONE, AB( KD+1, I+KD ), + $ LDAB-1 ) +* +* Copy the lower triangle of A13 back into place. +* + DO 60 JJ = 1, I3 + DO 50 II = JJ, IB + AB( II-JJ+1, JJ+I+KD-1 ) = WORK( II, JJ ) + 50 CONTINUE + 60 CONTINUE + END IF + END IF + 70 CONTINUE + ELSE +* +* Compute the Cholesky factorization of a symmetric band +* matrix, given the lower triangle of the matrix in band +* storage. +* +* Zero the lower triangle of the work array. +* + DO 90 J = 1, NB + DO 80 I = J + 1, NB + WORK( I, J ) = ZERO + 80 CONTINUE + 90 CONTINUE +* +* Process the band matrix one diagonal block at a time. +* + DO 140 I = 1, N, NB + IB = MIN( NB, N-I+1 ) +* +* Factorize the diagonal block +* + CALL SPOTF2( UPLO, IB, AB( 1, I ), LDAB-1, II ) + IF( II.NE.0 ) THEN + INFO = I + II - 1 + GO TO 150 + END IF + IF( I+IB.LE.N ) THEN +* +* Update the relevant part of the trailing submatrix. +* If A11 denotes the diagonal block which has just been +* factorized, then we need to update the remaining +* blocks in the diagram: +* +* A11 +* A21 A22 +* A31 A32 A33 +* +* The numbers of rows and columns in the partitioning +* are IB, I2, I3 respectively. The blocks A21, A22 and +* A32 are empty if IB = KD. The lower triangle of A31 +* lies outside the band. +* + I2 = MIN( KD-IB, N-I-IB+1 ) + I3 = MIN( IB, N-I-KD+1 ) +* + IF( I2.GT.0 ) THEN +* +* Update A21 +* + CALL STRSM( 'Right', 'Lower', 'Transpose', + $ 'Non-unit', I2, IB, ONE, AB( 1, I ), + $ LDAB-1, AB( 1+IB, I ), LDAB-1 ) +* +* Update A22 +* + CALL SSYRK( 'Lower', 'No Transpose', I2, IB, -ONE, + $ AB( 1+IB, I ), LDAB-1, ONE, + $ AB( 1, I+IB ), LDAB-1 ) + END IF +* + IF( I3.GT.0 ) THEN +* +* Copy the upper triangle of A31 into the work array. +* + DO 110 JJ = 1, IB + DO 100 II = 1, MIN( JJ, I3 ) + WORK( II, JJ ) = AB( KD+1-JJ+II, JJ+I-1 ) + 100 CONTINUE + 110 CONTINUE +* +* Update A31 (in the work array). +* + CALL STRSM( 'Right', 'Lower', 'Transpose', + $ 'Non-unit', I3, IB, ONE, AB( 1, I ), + $ LDAB-1, WORK, LDWORK ) +* +* Update A32 +* + IF( I2.GT.0 ) + $ CALL SGEMM( 'No transpose', 'Transpose', I3, I2, + $ IB, -ONE, WORK, LDWORK, + $ AB( 1+IB, I ), LDAB-1, ONE, + $ AB( 1+KD-IB, I+IB ), LDAB-1 ) +* +* Update A33 +* + CALL SSYRK( 'Lower', 'No Transpose', I3, IB, -ONE, + $ WORK, LDWORK, ONE, AB( 1, I+KD ), + $ LDAB-1 ) +* +* Copy the upper triangle of A31 back into place. +* + DO 130 JJ = 1, IB + DO 120 II = 1, MIN( JJ, I3 ) + AB( KD+1-JJ+II, JJ+I-1 ) = WORK( II, JJ ) + 120 CONTINUE + 130 CONTINUE + END IF + END IF + 140 CONTINUE + END IF + END IF + RETURN +* + 150 CONTINUE + RETURN +* +* End of SPBTRF +* + END diff --git a/costa/native/external/lapack/spbtrs.f b/costa/native/external/lapack/spbtrs.f new file mode 100644 index 000000000..a97776da1 --- /dev/null +++ b/costa/native/external/lapack/spbtrs.f @@ -0,0 +1,146 @@ + SUBROUTINE SPBTRS( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* SPBTRS solves a system of linear equations A*X = B with a symmetric +* positive definite band matrix A using the Cholesky factorization +* A = U**T*U or A = L*L**T computed by SPBTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangular factor stored in AB; +* = 'L': Lower triangular factor stored in AB. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AB (input) REAL array, dimension (LDAB,N) +* The triangular factor U or L from the Cholesky factorization +* A = U**T*U or A = L*L**T of the band matrix A, stored in the +* first KD+1 rows of the array. The j-th column of U or L is +* stored in the j-th column of the array AB as follows: +* if UPLO ='U', AB(kd+1+i-j,j) = U(i,j) for max(1,j-kd)<=i<=j; +* if UPLO ='L', AB(1+i-j,j) = L(i,j) for j<=i<=min(n,j+kd). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL STBSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPBTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B where A = U'*U. +* + DO 10 J = 1, NRHS +* +* Solve U'*X = B, overwriting B with X. +* + CALL STBSV( 'Upper', 'Transpose', 'Non-unit', N, KD, AB, + $ LDAB, B( 1, J ), 1 ) +* +* Solve U*X = B, overwriting B with X. +* + CALL STBSV( 'Upper', 'No transpose', 'Non-unit', N, KD, AB, + $ LDAB, B( 1, J ), 1 ) + 10 CONTINUE + ELSE +* +* Solve A*X = B where A = L*L'. +* + DO 20 J = 1, NRHS +* +* Solve L*X = B, overwriting B with X. +* + CALL STBSV( 'Lower', 'No transpose', 'Non-unit', N, KD, AB, + $ LDAB, B( 1, J ), 1 ) +* +* Solve L'*X = B, overwriting B with X. +* + CALL STBSV( 'Lower', 'Transpose', 'Non-unit', N, KD, AB, + $ LDAB, B( 1, J ), 1 ) + 20 CONTINUE + END IF +* + RETURN +* +* End of SPBTRS +* + END diff --git a/costa/native/external/lapack/spocon.f b/costa/native/external/lapack/spocon.f new file mode 100644 index 000000000..db4cb775d --- /dev/null +++ b/costa/native/external/lapack/spocon.f @@ -0,0 +1,173 @@ + SUBROUTINE SPOCON( UPLO, N, A, LDA, ANORM, RCOND, WORK, IWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SPOCON estimates the reciprocal of the condition number (in the +* 1-norm) of a real symmetric positive definite matrix using the +* Cholesky factorization A = U**T*U or A = L*L**T computed by SPOTRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input) REAL array, dimension (LDA,N) +* The triangular factor U or L from the Cholesky factorization +* A = U**T*U or A = L*L**T, as computed by SPOTRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* ANORM (input) REAL +* The 1-norm (or infinity-norm) of the symmetric matrix A. +* +* RCOND (output) REAL +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +* estimate of the 1-norm of inv(A) computed in this routine. +* +* WORK (workspace) REAL array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + CHARACTER NORMIN + INTEGER IX, KASE + REAL AINVNM, SCALE, SCALEL, SCALEU, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SLAMCH + EXTERNAL LSAME, ISAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SLACON, SLATRS, SRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPOCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = SLAMCH( 'Safe minimum' ) +* +* Estimate the 1-norm of inv(A). +* + KASE = 0 + NORMIN = 'N' + 10 CONTINUE + CALL SLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN + IF( UPPER ) THEN +* +* Multiply by inv(U'). +* + CALL SLATRS( 'Upper', 'Transpose', 'Non-unit', NORMIN, N, A, + $ LDA, WORK, SCALEL, WORK( 2*N+1 ), INFO ) + NORMIN = 'Y' +* +* Multiply by inv(U). +* + CALL SLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ A, LDA, WORK, SCALEU, WORK( 2*N+1 ), INFO ) + ELSE +* +* Multiply by inv(L). +* + CALL SLATRS( 'Lower', 'No transpose', 'Non-unit', NORMIN, N, + $ A, LDA, WORK, SCALEL, WORK( 2*N+1 ), INFO ) + NORMIN = 'Y' +* +* Multiply by inv(L'). +* + CALL SLATRS( 'Lower', 'Transpose', 'Non-unit', NORMIN, N, A, + $ LDA, WORK, SCALEU, WORK( 2*N+1 ), INFO ) + END IF +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + SCALE = SCALEL*SCALEU + IF( SCALE.NE.ONE ) THEN + IX = ISAMAX( N, WORK, 1 ) + IF( SCALE.LT.ABS( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL SRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE + RETURN +* +* End of SPOCON +* + END diff --git a/costa/native/external/lapack/spoequ.f b/costa/native/external/lapack/spoequ.f new file mode 100644 index 000000000..eef4f5b66 --- /dev/null +++ b/costa/native/external/lapack/spoequ.f @@ -0,0 +1,137 @@ + SUBROUTINE SPOEQU( N, A, LDA, S, SCOND, AMAX, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, N + REAL AMAX, SCOND +* .. +* .. Array Arguments .. + REAL A( LDA, * ), S( * ) +* .. +* +* Purpose +* ======= +* +* SPOEQU computes row and column scalings intended to equilibrate a +* symmetric positive definite matrix A and reduce its condition number +* (with respect to the two-norm). S contains the scale factors, +* S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with +* elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This +* choice of S puts the condition number of B within a factor N of the +* smallest possible condition number over all possible diagonal +* scalings. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input) REAL array, dimension (LDA,N) +* The N-by-N symmetric positive definite matrix whose scaling +* factors are to be computed. Only the diagonal elements of A +* are referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* S (output) REAL array, dimension (N) +* If INFO = 0, S contains the scale factors for A. +* +* SCOND (output) REAL +* If INFO = 0, S contains the ratio of the smallest S(i) to +* the largest S(i). If SCOND >= 0.1 and AMAX is neither too +* large nor too small, it is not worth scaling by S. +* +* AMAX (output) REAL +* Absolute value of largest matrix element. If AMAX is very +* close to overflow or very close to underflow, the matrix +* should be scaled. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the i-th diagonal element is nonpositive. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I + REAL SMIN +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPOEQU', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SCOND = ONE + AMAX = ZERO + RETURN + END IF +* +* Find the minimum and maximum diagonal elements. +* + S( 1 ) = A( 1, 1 ) + SMIN = S( 1 ) + AMAX = S( 1 ) + DO 10 I = 2, N + S( I ) = A( I, I ) + SMIN = MIN( SMIN, S( I ) ) + AMAX = MAX( AMAX, S( I ) ) + 10 CONTINUE +* + IF( SMIN.LE.ZERO ) THEN +* +* Find the first non-positive diagonal element and return. +* + DO 20 I = 1, N + IF( S( I ).LE.ZERO ) THEN + INFO = I + RETURN + END IF + 20 CONTINUE + ELSE +* +* Set the scale factors to the reciprocals +* of the diagonal elements. +* + DO 30 I = 1, N + S( I ) = ONE / SQRT( S( I ) ) + 30 CONTINUE +* +* Compute SCOND = min(S(I)) / max(S(I)) +* + SCOND = SQRT( SMIN ) / SQRT( AMAX ) + END IF + RETURN +* +* End of SPOEQU +* + END diff --git a/costa/native/external/lapack/sporfs.f b/costa/native/external/lapack/sporfs.f new file mode 100644 index 000000000..46e8021fa --- /dev/null +++ b/costa/native/external/lapack/sporfs.f @@ -0,0 +1,327 @@ + SUBROUTINE SPORFS( UPLO, N, NRHS, A, LDA, AF, LDAF, B, LDB, X, + $ LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* SPORFS improves the computed solution to a system of linear +* equations when the coefficient matrix is symmetric positive definite, +* and provides error bounds and backward error estimates for the +* solution. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input) REAL array, dimension (LDA,N) +* The symmetric matrix A. If UPLO = 'U', the leading N-by-N +* upper triangular part of A contains the upper triangular part +* of the matrix A, and the strictly lower triangular part of A +* is not referenced. If UPLO = 'L', the leading N-by-N lower +* triangular part of A contains the lower triangular part of +* the matrix A, and the strictly upper triangular part of A is +* not referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* AF (input) REAL array, dimension (LDAF,N) +* The triangular factor U or L from the Cholesky factorization +* A = U**T*U or A = L*L**T, as computed by SPOTRF. +* +* LDAF (input) INTEGER +* The leading dimension of the array AF. LDAF >= max(1,N). +* +* B (input) REAL array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input/output) REAL array, dimension (LDX,NRHS) +* On entry, the solution matrix X, as computed by SPOTRS. +* On exit, the improved solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) REAL array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Internal Parameters +* =================== +* +* ITMAX is the maximum number of steps of iterative refinement. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) + REAL THREE + PARAMETER ( THREE = 3.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, J, K, KASE, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SLACON, SPOTRS, SSYMV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPORFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL SCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 ) + CALL SSYMV( UPLO, N, -ONE, A, LDA, X( 1, J ), 1, ONE, + $ WORK( N+1 ), 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + WORK( I ) = ABS( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = ABS( X( K, J ) ) + DO 40 I = 1, K - 1 + WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK + S = S + ABS( A( I, K ) )*ABS( X( I, J ) ) + 40 CONTINUE + WORK( K ) = WORK( K ) + ABS( A( K, K ) )*XK + S + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = ABS( X( K, J ) ) + WORK( K ) = WORK( K ) + ABS( A( K, K ) )*XK + DO 60 I = K + 1, N + WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK + S = S + ABS( A( I, K ) )*ABS( X( I, J ) ) + 60 CONTINUE + WORK( K ) = WORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL SPOTRS( UPLO, N, 1, AF, LDAF, WORK( N+1 ), N, INFO ) + CALL SAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use SLACON to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL SLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A'). +* + CALL SPOTRS( UPLO, N, 1, AF, LDAF, WORK( N+1 ), N, INFO ) + DO 110 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 120 CONTINUE + CALL SPOTRS( UPLO, N, 1, AF, LDAF, WORK( N+1 ), N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of SPORFS +* + END diff --git a/costa/native/external/lapack/sposv.f b/costa/native/external/lapack/sposv.f new file mode 100644 index 000000000..cd06e2f29 --- /dev/null +++ b/costa/native/external/lapack/sposv.f @@ -0,0 +1,122 @@ + SUBROUTINE SPOSV( UPLO, N, NRHS, A, LDA, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* SPOSV computes the solution to a real system of linear equations +* A * X = B, +* where A is an N-by-N symmetric positive definite matrix and X and B +* are N-by-NRHS matrices. +* +* The Cholesky decomposition is used to factor A as +* A = U**T* U, if UPLO = 'U', or +* A = L * L**T, if UPLO = 'L', +* where U is an upper triangular matrix and L is a lower triangular +* matrix. The factored form of A is then used to solve the system of +* equations A * X = B. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the symmetric matrix A. If UPLO = 'U', the leading +* N-by-N upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, if INFO = 0, the factor U or L from the Cholesky +* factorization A = U**T*U or A = L*L**T. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the leading minor of order i of A is not +* positive definite, so the factorization could not be +* completed, and the solution has not been computed. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SPOTRF, SPOTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPOSV ', -INFO ) + RETURN + END IF +* +* Compute the Cholesky factorization A = U'*U or A = L*L'. +* + CALL SPOTRF( UPLO, N, A, LDA, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL SPOTRS( UPLO, N, NRHS, A, LDA, B, LDB, INFO ) +* + END IF + RETURN +* +* End of SPOSV +* + END diff --git a/costa/native/external/lapack/sposvx.f b/costa/native/external/lapack/sposvx.f new file mode 100644 index 000000000..ecd4b8d3a --- /dev/null +++ b/costa/native/external/lapack/sposvx.f @@ -0,0 +1,379 @@ + SUBROUTINE SPOSVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, EQUED, + $ S, B, LDB, X, LDX, RCOND, FERR, BERR, WORK, + $ IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ BERR( * ), FERR( * ), S( * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* SPOSVX uses the Cholesky factorization A = U**T*U or A = L*L**T to +* compute the solution to a real system of linear equations +* A * X = B, +* where A is an N-by-N symmetric positive definite matrix and X and B +* are N-by-NRHS matrices. +* +* Error bounds on the solution and a condition estimate are also +* provided. +* +* Description +* =========== +* +* The following steps are performed: +* +* 1. If FACT = 'E', real scaling factors are computed to equilibrate +* the system: +* diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B +* Whether or not the system will be equilibrated depends on the +* scaling of the matrix A, but if equilibration is used, A is +* overwritten by diag(S)*A*diag(S) and B by diag(S)*B. +* +* 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to +* factor the matrix A (after equilibration if FACT = 'E') as +* A = U**T* U, if UPLO = 'U', or +* A = L * L**T, if UPLO = 'L', +* where U is an upper triangular matrix and L is a lower triangular +* matrix. +* +* 3. If the leading i-by-i principal minor is not positive definite, +* then the routine returns with INFO = i. Otherwise, the factored +* form of A is used to estimate the condition number of the matrix +* A. If the reciprocal of the condition number is less than machine +* precision, INFO = N+1 is returned as a warning, but the routine +* still goes on to solve for X and compute error bounds as +* described below. +* +* 4. The system of equations is solved for X using the factored form +* of A. +* +* 5. Iterative refinement is applied to improve the computed solution +* matrix and calculate error bounds and backward error estimates +* for it. +* +* 6. If equilibration was used, the matrix X is premultiplied by +* diag(S) so that it solves the original system before +* equilibration. +* +* Arguments +* ========= +* +* FACT (input) CHARACTER*1 +* Specifies whether or not the factored form of the matrix A is +* supplied on entry, and if not, whether the matrix A should be +* equilibrated before it is factored. +* = 'F': On entry, AF contains the factored form of A. +* If EQUED = 'Y', the matrix A has been equilibrated +* with scaling factors given by S. A and AF will not +* be modified. +* = 'N': The matrix A will be copied to AF and factored. +* = 'E': The matrix A will be equilibrated if necessary, then +* copied to AF and factored. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the symmetric matrix A, except if FACT = 'F' and +* EQUED = 'Y', then A must contain the equilibrated matrix +* diag(S)*A*diag(S). If UPLO = 'U', the leading +* N-by-N upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. A is not modified if +* FACT = 'F' or 'N', or if FACT = 'E' and EQUED = 'N' on exit. +* +* On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by +* diag(S)*A*diag(S). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* AF (input or output) REAL array, dimension (LDAF,N) +* If FACT = 'F', then AF is an input argument and on entry +* contains the triangular factor U or L from the Cholesky +* factorization A = U**T*U or A = L*L**T, in the same storage +* format as A. If EQUED .ne. 'N', then AF is the factored form +* of the equilibrated matrix diag(S)*A*diag(S). +* +* If FACT = 'N', then AF is an output argument and on exit +* returns the triangular factor U or L from the Cholesky +* factorization A = U**T*U or A = L*L**T of the original +* matrix A. +* +* If FACT = 'E', then AF is an output argument and on exit +* returns the triangular factor U or L from the Cholesky +* factorization A = U**T*U or A = L*L**T of the equilibrated +* matrix A (see the description of A for the form of the +* equilibrated matrix). +* +* LDAF (input) INTEGER +* The leading dimension of the array AF. LDAF >= max(1,N). +* +* EQUED (input or output) CHARACTER*1 +* Specifies the form of equilibration that was done. +* = 'N': No equilibration (always true if FACT = 'N'). +* = 'Y': Equilibration was done, i.e., A has been replaced by +* diag(S) * A * diag(S). +* EQUED is an input argument if FACT = 'F'; otherwise, it is an +* output argument. +* +* S (input or output) REAL array, dimension (N) +* The scale factors for A; not accessed if EQUED = 'N'. S is +* an input argument if FACT = 'F'; otherwise, S is an output +* argument. If FACT = 'F' and EQUED = 'Y', each element of S +* must be positive. +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y', +* B is overwritten by diag(S) * B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (output) REAL array, dimension (LDX,NRHS) +* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to +* the original system of equations. Note that if EQUED = 'Y', +* A and B are modified on exit, and the solution to the +* equilibrated system is inv(diag(S))*X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* RCOND (output) REAL +* The estimate of the reciprocal condition number of the matrix +* A after equilibration (if done). If RCOND is less than the +* machine precision (in particular, if RCOND = 0), the matrix +* is singular to working precision. This condition is +* indicated by a return code of INFO > 0. +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) REAL array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is +* <= N: the leading minor of order i of A is +* not positive definite, so the factorization +* could not be completed, and the solution has not +* been computed. RCOND = 0 is returned. +* = N+1: U is nonsingular, but RCOND is less than machine +* precision, meaning that the matrix is singular +* to working precision. Nevertheless, the +* solution and error bounds are computed because +* there are a number of situations where the +* computed solution can be more accurate than the +* value of RCOND would suggest. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL EQUIL, NOFACT, RCEQU + INTEGER I, INFEQU, J + REAL AMAX, ANORM, BIGNUM, SCOND, SMAX, SMIN, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANSY + EXTERNAL LSAME, SLAMCH, SLANSY +* .. +* .. External Subroutines .. + EXTERNAL SLACPY, SLAQSY, SPOCON, SPOEQU, SPORFS, SPOTRF, + $ SPOTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + RCEQU = .FALSE. + ELSE + RCEQU = LSAME( EQUED, 'Y' ) + SMLNUM = SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + END IF +* +* Test the input parameters. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) ) + $ THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -9 + ELSE + IF( RCEQU ) THEN + SMIN = BIGNUM + SMAX = ZERO + DO 10 J = 1, N + SMIN = MIN( SMIN, S( J ) ) + SMAX = MAX( SMAX, S( J ) ) + 10 CONTINUE + IF( SMIN.LE.ZERO ) THEN + INFO = -10 + ELSE IF( N.GT.0 ) THEN + SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) + ELSE + SCOND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -14 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPOSVX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL SPOEQU( N, A, LDA, S, SCOND, AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL SLAQSY( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED ) + RCEQU = LSAME( EQUED, 'Y' ) + END IF + END IF +* +* Scale the right hand side. +* + IF( RCEQU ) THEN + DO 30 J = 1, NRHS + DO 20 I = 1, N + B( I, J ) = S( I )*B( I, J ) + 20 CONTINUE + 30 CONTINUE + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the Cholesky factorization A = U'*U or A = L*L'. +* + CALL SLACPY( UPLO, N, N, A, LDA, AF, LDAF ) + CALL SPOTRF( UPLO, N, AF, LDAF, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN + IF( INFO.GT.0 ) + $ RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = SLANSY( '1', UPLO, N, A, LDA, WORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL SPOCON( UPLO, N, AF, LDAF, ANORM, RCOND, WORK, IWORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* +* Compute the solution matrix X. +* + CALL SLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL SPOTRS( UPLO, N, NRHS, AF, LDAF, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL SPORFS( UPLO, N, NRHS, A, LDA, AF, LDAF, B, LDB, X, LDX, + $ FERR, BERR, WORK, IWORK, INFO ) +* +* Transform the solution matrix X to a solution of the original +* system. +* + IF( RCEQU ) THEN + DO 50 J = 1, NRHS + DO 40 I = 1, N + X( I, J ) = S( I )*X( I, J ) + 40 CONTINUE + 50 CONTINUE + DO 60 J = 1, NRHS + FERR( J ) = FERR( J ) / SCOND + 60 CONTINUE + END IF +* + RETURN +* +* End of SPOSVX +* + END diff --git a/costa/native/external/lapack/spotf2.f b/costa/native/external/lapack/spotf2.f new file mode 100644 index 000000000..34fb5af51 --- /dev/null +++ b/costa/native/external/lapack/spotf2.f @@ -0,0 +1,168 @@ + SUBROUTINE SPOTF2( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* SPOTF2 computes the Cholesky factorization of a real symmetric +* positive definite matrix A. +* +* The factorization has the form +* A = U' * U , if UPLO = 'U', or +* A = L * L', if UPLO = 'L', +* where U is an upper triangular matrix and L is lower triangular. +* +* This is the unblocked version of the algorithm, calling Level 2 BLAS. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is stored. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the symmetric matrix A. If UPLO = 'U', the leading +* n by n upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n by n lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, if INFO = 0, the factor U or L from the Cholesky +* factorization A = U'*U or A = L*L'. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* > 0: if INFO = k, the leading minor of order k is not +* positive definite, and the factorization could not be +* completed. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J + REAL AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SDOT + EXTERNAL LSAME, SDOT +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPOTF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U'*U. +* + DO 10 J = 1, N +* +* Compute U(J,J) and test for non-positive-definiteness. +* + AJJ = A( J, J ) - SDOT( J-1, A( 1, J ), 1, A( 1, J ), 1 ) + IF( AJJ.LE.ZERO ) THEN + A( J, J ) = AJJ + GO TO 30 + END IF + AJJ = SQRT( AJJ ) + A( J, J ) = AJJ +* +* Compute elements J+1:N of row J. +* + IF( J.LT.N ) THEN + CALL SGEMV( 'Transpose', J-1, N-J, -ONE, A( 1, J+1 ), + $ LDA, A( 1, J ), 1, ONE, A( J, J+1 ), LDA ) + CALL SSCAL( N-J, ONE / AJJ, A( J, J+1 ), LDA ) + END IF + 10 CONTINUE + ELSE +* +* Compute the Cholesky factorization A = L*L'. +* + DO 20 J = 1, N +* +* Compute L(J,J) and test for non-positive-definiteness. +* + AJJ = A( J, J ) - SDOT( J-1, A( J, 1 ), LDA, A( J, 1 ), + $ LDA ) + IF( AJJ.LE.ZERO ) THEN + A( J, J ) = AJJ + GO TO 30 + END IF + AJJ = SQRT( AJJ ) + A( J, J ) = AJJ +* +* Compute elements J+1:N of column J. +* + IF( J.LT.N ) THEN + CALL SGEMV( 'No transpose', N-J, J-1, -ONE, A( J+1, 1 ), + $ LDA, A( J, 1 ), LDA, ONE, A( J+1, J ), 1 ) + CALL SSCAL( N-J, ONE / AJJ, A( J+1, J ), 1 ) + END IF + 20 CONTINUE + END IF + GO TO 40 +* + 30 CONTINUE + INFO = J +* + 40 CONTINUE + RETURN +* +* End of SPOTF2 +* + END diff --git a/costa/native/external/lapack/spotrf.f b/costa/native/external/lapack/spotrf.f new file mode 100644 index 000000000..c97642532 --- /dev/null +++ b/costa/native/external/lapack/spotrf.f @@ -0,0 +1,184 @@ + SUBROUTINE SPOTRF( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* SPOTRF computes the Cholesky factorization of a real symmetric +* positive definite matrix A. +* +* The factorization has the form +* A = U**T * U, if UPLO = 'U', or +* A = L * L**T, if UPLO = 'L', +* where U is an upper triangular matrix and L is lower triangular. +* +* This is the block version of the algorithm, calling Level 3 BLAS. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the symmetric matrix A. If UPLO = 'U', the leading +* N-by-N upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, if INFO = 0, the factor U or L from the Cholesky +* factorization A = U**T*U or A = L*L**T. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the leading minor of order i is not +* positive definite, and the factorization could not be +* completed. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, JB, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, SPOTF2, SSYRK, STRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPOTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'SPOTRF', UPLO, N, -1, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code. +* + CALL SPOTF2( UPLO, N, A, LDA, INFO ) + ELSE +* +* Use blocked code. +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U'*U. +* + DO 10 J = 1, N, NB +* +* Update and factorize the current diagonal block and test +* for non-positive-definiteness. +* + JB = MIN( NB, N-J+1 ) + CALL SSYRK( 'Upper', 'Transpose', JB, J-1, -ONE, + $ A( 1, J ), LDA, ONE, A( J, J ), LDA ) + CALL SPOTF2( 'Upper', JB, A( J, J ), LDA, INFO ) + IF( INFO.NE.0 ) + $ GO TO 30 + IF( J+JB.LE.N ) THEN +* +* Compute the current block row. +* + CALL SGEMM( 'Transpose', 'No transpose', JB, N-J-JB+1, + $ J-1, -ONE, A( 1, J ), LDA, A( 1, J+JB ), + $ LDA, ONE, A( J, J+JB ), LDA ) + CALL STRSM( 'Left', 'Upper', 'Transpose', 'Non-unit', + $ JB, N-J-JB+1, ONE, A( J, J ), LDA, + $ A( J, J+JB ), LDA ) + END IF + 10 CONTINUE +* + ELSE +* +* Compute the Cholesky factorization A = L*L'. +* + DO 20 J = 1, N, NB +* +* Update and factorize the current diagonal block and test +* for non-positive-definiteness. +* + JB = MIN( NB, N-J+1 ) + CALL SSYRK( 'Lower', 'No transpose', JB, J-1, -ONE, + $ A( J, 1 ), LDA, ONE, A( J, J ), LDA ) + CALL SPOTF2( 'Lower', JB, A( J, J ), LDA, INFO ) + IF( INFO.NE.0 ) + $ GO TO 30 + IF( J+JB.LE.N ) THEN +* +* Compute the current block column. +* + CALL SGEMM( 'No transpose', 'Transpose', N-J-JB+1, JB, + $ J-1, -ONE, A( J+JB, 1 ), LDA, A( J, 1 ), + $ LDA, ONE, A( J+JB, J ), LDA ) + CALL STRSM( 'Right', 'Lower', 'Transpose', 'Non-unit', + $ N-J-JB+1, JB, ONE, A( J, J ), LDA, + $ A( J+JB, J ), LDA ) + END IF + 20 CONTINUE + END IF + END IF + GO TO 40 +* + 30 CONTINUE + INFO = INFO + J - 1 +* + 40 CONTINUE + RETURN +* +* End of SPOTRF +* + END diff --git a/costa/native/external/lapack/spotri.f b/costa/native/external/lapack/spotri.f new file mode 100644 index 000000000..44acd05c1 --- /dev/null +++ b/costa/native/external/lapack/spotri.f @@ -0,0 +1,97 @@ + SUBROUTINE SPOTRI( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* SPOTRI computes the inverse of a real symmetric positive definite +* matrix A using the Cholesky factorization A = U**T*U or A = L*L**T +* computed by SPOTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the triangular factor U or L from the Cholesky +* factorization A = U**T*U or A = L*L**T, as computed by +* SPOTRF. +* On exit, the upper or lower triangle of the (symmetric) +* inverse of A, overwriting the input factor U or L. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the (i,i) element of the factor U or L is +* zero, and the inverse could not be computed. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLAUUM, STRTRI, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPOTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Invert the triangular Cholesky factor U or L. +* + CALL STRTRI( UPLO, 'Non-unit', N, A, LDA, INFO ) + IF( INFO.GT.0 ) + $ RETURN +* +* Form inv(U)*inv(U)' or inv(L)'*inv(L). +* + CALL SLAUUM( UPLO, N, A, LDA, INFO ) +* + RETURN +* +* End of SPOTRI +* + END diff --git a/costa/native/external/lapack/spotrs.f b/costa/native/external/lapack/spotrs.f new file mode 100644 index 000000000..4c1fc3665 --- /dev/null +++ b/costa/native/external/lapack/spotrs.f @@ -0,0 +1,133 @@ + SUBROUTINE SPOTRS( UPLO, N, NRHS, A, LDA, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* SPOTRS solves a system of linear equations A*X = B with a symmetric +* positive definite matrix A using the Cholesky factorization +* A = U**T*U or A = L*L**T computed by SPOTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* A (input) REAL array, dimension (LDA,N) +* The triangular factor U or L from the Cholesky factorization +* A = U**T*U or A = L*L**T, as computed by SPOTRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL STRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPOTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B where A = U'*U. +* +* Solve U'*X = B, overwriting B with X. +* + CALL STRSM( 'Left', 'Upper', 'Transpose', 'Non-unit', N, NRHS, + $ ONE, A, LDA, B, LDB ) +* +* Solve U*X = B, overwriting B with X. +* + CALL STRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N, + $ NRHS, ONE, A, LDA, B, LDB ) + ELSE +* +* Solve A*X = B where A = L*L'. +* +* Solve L*X = B, overwriting B with X. +* + CALL STRSM( 'Left', 'Lower', 'No transpose', 'Non-unit', N, + $ NRHS, ONE, A, LDA, B, LDB ) +* +* Solve L'*X = B, overwriting B with X. +* + CALL STRSM( 'Left', 'Lower', 'Transpose', 'Non-unit', N, NRHS, + $ ONE, A, LDA, B, LDB ) + END IF +* + RETURN +* +* End of SPOTRS +* + END diff --git a/costa/native/external/lapack/sppcon.f b/costa/native/external/lapack/sppcon.f new file mode 100644 index 000000000..c2360875f --- /dev/null +++ b/costa/native/external/lapack/sppcon.f @@ -0,0 +1,172 @@ + SUBROUTINE SPPCON( UPLO, N, AP, ANORM, RCOND, WORK, IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL AP( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SPPCON estimates the reciprocal of the condition number (in the +* 1-norm) of a real symmetric positive definite packed matrix using +* the Cholesky factorization A = U**T*U or A = L*L**T computed by +* SPPTRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input) REAL array, dimension (N*(N+1)/2) +* The triangular factor U or L from the Cholesky factorization +* A = U**T*U or A = L*L**T, packed columnwise in a linear +* array. The j-th column of U or L is stored in the array AP +* as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n. +* +* ANORM (input) REAL +* The 1-norm (or infinity-norm) of the symmetric matrix A. +* +* RCOND (output) REAL +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +* estimate of the 1-norm of inv(A) computed in this routine. +* +* WORK (workspace) REAL array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + CHARACTER NORMIN + INTEGER IX, KASE + REAL AINVNM, SCALE, SCALEL, SCALEU, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SLAMCH + EXTERNAL LSAME, ISAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SLACON, SLATPS, SRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPPCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = SLAMCH( 'Safe minimum' ) +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + NORMIN = 'N' + 10 CONTINUE + CALL SLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN + IF( UPPER ) THEN +* +* Multiply by inv(U'). +* + CALL SLATPS( 'Upper', 'Transpose', 'Non-unit', NORMIN, N, + $ AP, WORK, SCALEL, WORK( 2*N+1 ), INFO ) + NORMIN = 'Y' +* +* Multiply by inv(U). +* + CALL SLATPS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ AP, WORK, SCALEU, WORK( 2*N+1 ), INFO ) + ELSE +* +* Multiply by inv(L). +* + CALL SLATPS( 'Lower', 'No transpose', 'Non-unit', NORMIN, N, + $ AP, WORK, SCALEL, WORK( 2*N+1 ), INFO ) + NORMIN = 'Y' +* +* Multiply by inv(L'). +* + CALL SLATPS( 'Lower', 'Transpose', 'Non-unit', NORMIN, N, + $ AP, WORK, SCALEU, WORK( 2*N+1 ), INFO ) + END IF +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + SCALE = SCALEL*SCALEU + IF( SCALE.NE.ONE ) THEN + IX = ISAMAX( N, WORK, 1 ) + IF( SCALE.LT.ABS( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL SRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE + RETURN +* +* End of SPPCON +* + END diff --git a/costa/native/external/lapack/sppequ.f b/costa/native/external/lapack/sppequ.f new file mode 100644 index 000000000..925df64b4 --- /dev/null +++ b/costa/native/external/lapack/sppequ.f @@ -0,0 +1,169 @@ + SUBROUTINE SPPEQU( UPLO, N, AP, S, SCOND, AMAX, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N + REAL AMAX, SCOND +* .. +* .. Array Arguments .. + REAL AP( * ), S( * ) +* .. +* +* Purpose +* ======= +* +* SPPEQU computes row and column scalings intended to equilibrate a +* symmetric positive definite matrix A in packed storage and reduce +* its condition number (with respect to the two-norm). S contains the +* scale factors, S(i)=1/sqrt(A(i,i)), chosen so that the scaled matrix +* B with elements B(i,j)=S(i)*A(i,j)*S(j) has ones on the diagonal. +* This choice of S puts the condition number of B within a factor N of +* the smallest possible condition number over all possible diagonal +* scalings. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input) REAL array, dimension (N*(N+1)/2) +* The upper or lower triangle of the symmetric matrix A, packed +* columnwise in a linear array. The j-th column of A is stored +* in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* +* S (output) REAL array, dimension (N) +* If INFO = 0, S contains the scale factors for A. +* +* SCOND (output) REAL +* If INFO = 0, S contains the ratio of the smallest S(i) to +* the largest S(i). If SCOND >= 0.1 and AMAX is neither too +* large nor too small, it is not worth scaling by S. +* +* AMAX (output) REAL +* Absolute value of largest matrix element. If AMAX is very +* close to overflow or very close to underflow, the matrix +* should be scaled. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the i-th diagonal element is nonpositive. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, JJ + REAL SMIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPPEQU', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SCOND = ONE + AMAX = ZERO + RETURN + END IF +* +* Initialize SMIN and AMAX. +* + S( 1 ) = AP( 1 ) + SMIN = S( 1 ) + AMAX = S( 1 ) +* + IF( UPPER ) THEN +* +* UPLO = 'U': Upper triangle of A is stored. +* Find the minimum and maximum diagonal elements. +* + JJ = 1 + DO 10 I = 2, N + JJ = JJ + I + S( I ) = AP( JJ ) + SMIN = MIN( SMIN, S( I ) ) + AMAX = MAX( AMAX, S( I ) ) + 10 CONTINUE +* + ELSE +* +* UPLO = 'L': Lower triangle of A is stored. +* Find the minimum and maximum diagonal elements. +* + JJ = 1 + DO 20 I = 2, N + JJ = JJ + N - I + 2 + S( I ) = AP( JJ ) + SMIN = MIN( SMIN, S( I ) ) + AMAX = MAX( AMAX, S( I ) ) + 20 CONTINUE + END IF +* + IF( SMIN.LE.ZERO ) THEN +* +* Find the first non-positive diagonal element and return. +* + DO 30 I = 1, N + IF( S( I ).LE.ZERO ) THEN + INFO = I + RETURN + END IF + 30 CONTINUE + ELSE +* +* Set the scale factors to the reciprocals +* of the diagonal elements. +* + DO 40 I = 1, N + S( I ) = ONE / SQRT( S( I ) ) + 40 CONTINUE +* +* Compute SCOND = min(S(I)) / max(S(I)) +* + SCOND = SQRT( SMIN ) / SQRT( AMAX ) + END IF + RETURN +* +* End of SPPEQU +* + END diff --git a/costa/native/external/lapack/spprfs.f b/costa/native/external/lapack/spprfs.f new file mode 100644 index 000000000..cc3663888 --- /dev/null +++ b/costa/native/external/lapack/spprfs.f @@ -0,0 +1,324 @@ + SUBROUTINE SPPRFS( UPLO, N, NRHS, AP, AFP, B, LDB, X, LDX, FERR, + $ BERR, WORK, IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL AFP( * ), AP( * ), B( LDB, * ), BERR( * ), + $ FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* SPPRFS improves the computed solution to a system of linear +* equations when the coefficient matrix is symmetric positive definite +* and packed, and provides error bounds and backward error estimates +* for the solution. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AP (input) REAL array, dimension (N*(N+1)/2) +* The upper or lower triangle of the symmetric matrix A, packed +* columnwise in a linear array. The j-th column of A is stored +* in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* +* AFP (input) REAL array, dimension (N*(N+1)/2) +* The triangular factor U or L from the Cholesky factorization +* A = U**T*U or A = L*L**T, as computed by SPPTRF/CPPTRF, +* packed columnwise in a linear array in the same format as A +* (see AP). +* +* B (input) REAL array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input/output) REAL array, dimension (LDX,NRHS) +* On entry, the solution matrix X, as computed by SPPTRS. +* On exit, the improved solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) REAL array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Internal Parameters +* =================== +* +* ITMAX is the maximum number of steps of iterative refinement. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) + REAL THREE + PARAMETER ( THREE = 3.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, IK, J, K, KASE, KK, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SLACON, SPPTRS, SSPMV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPPRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL SCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 ) + CALL SSPMV( UPLO, N, -ONE, AP, X( 1, J ), 1, ONE, WORK( N+1 ), + $ 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + WORK( I ) = ABS( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + KK = 1 + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = ABS( X( K, J ) ) + IK = KK + DO 40 I = 1, K - 1 + WORK( I ) = WORK( I ) + ABS( AP( IK ) )*XK + S = S + ABS( AP( IK ) )*ABS( X( I, J ) ) + IK = IK + 1 + 40 CONTINUE + WORK( K ) = WORK( K ) + ABS( AP( KK+K-1 ) )*XK + S + KK = KK + K + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = ABS( X( K, J ) ) + WORK( K ) = WORK( K ) + ABS( AP( KK ) )*XK + IK = KK + 1 + DO 60 I = K + 1, N + WORK( I ) = WORK( I ) + ABS( AP( IK ) )*XK + S = S + ABS( AP( IK ) )*ABS( X( I, J ) ) + IK = IK + 1 + 60 CONTINUE + WORK( K ) = WORK( K ) + S + KK = KK + ( N-K+1 ) + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL SPPTRS( UPLO, N, 1, AFP, WORK( N+1 ), N, INFO ) + CALL SAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use SLACON to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL SLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A'). +* + CALL SPPTRS( UPLO, N, 1, AFP, WORK( N+1 ), N, INFO ) + DO 110 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 120 CONTINUE + CALL SPPTRS( UPLO, N, 1, AFP, WORK( N+1 ), N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of SPPRFS +* + END diff --git a/costa/native/external/lapack/sppsv.f b/costa/native/external/lapack/sppsv.f new file mode 100644 index 000000000..ea8ed7ed5 --- /dev/null +++ b/costa/native/external/lapack/sppsv.f @@ -0,0 +1,134 @@ + SUBROUTINE SPPSV( UPLO, N, NRHS, AP, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL AP( * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* SPPSV computes the solution to a real system of linear equations +* A * X = B, +* where A is an N-by-N symmetric positive definite matrix stored in +* packed format and X and B are N-by-NRHS matrices. +* +* The Cholesky decomposition is used to factor A as +* A = U**T* U, if UPLO = 'U', or +* A = L * L**T, if UPLO = 'L', +* where U is an upper triangular matrix and L is a lower triangular +* matrix. The factored form of A is then used to solve the system of +* equations A * X = B. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AP (input/output) REAL array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the symmetric matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* See below for further details. +* +* On exit, if INFO = 0, the factor U or L from the Cholesky +* factorization A = U**T*U or A = L*L**T, in the same storage +* format as A. +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the leading minor of order i of A is not +* positive definite, so the factorization could not be +* completed, and the solution has not been computed. +* +* Further Details +* =============== +* +* The packed storage scheme is illustrated by the following example +* when N = 4, UPLO = 'U': +* +* Two-dimensional storage of the symmetric matrix A: +* +* a11 a12 a13 a14 +* a22 a23 a24 +* a33 a34 (aij = conjg(aji)) +* a44 +* +* Packed storage of the upper triangle of A: +* +* AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SPPTRF, SPPTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPPSV ', -INFO ) + RETURN + END IF +* +* Compute the Cholesky factorization A = U'*U or A = L*L'. +* + CALL SPPTRF( UPLO, N, AP, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL SPPTRS( UPLO, N, NRHS, AP, B, LDB, INFO ) +* + END IF + RETURN +* +* End of SPPSV +* + END diff --git a/costa/native/external/lapack/sppsvx.f b/costa/native/external/lapack/sppsvx.f new file mode 100644 index 000000000..8d06890a2 --- /dev/null +++ b/costa/native/external/lapack/sppsvx.f @@ -0,0 +1,383 @@ + SUBROUTINE SPPSVX( FACT, UPLO, N, NRHS, AP, AFP, EQUED, S, B, LDB, + $ X, LDX, RCOND, FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, UPLO + INTEGER INFO, LDB, LDX, N, NRHS + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL AFP( * ), AP( * ), B( LDB, * ), BERR( * ), + $ FERR( * ), S( * ), WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* SPPSVX uses the Cholesky factorization A = U**T*U or A = L*L**T to +* compute the solution to a real system of linear equations +* A * X = B, +* where A is an N-by-N symmetric positive definite matrix stored in +* packed format and X and B are N-by-NRHS matrices. +* +* Error bounds on the solution and a condition estimate are also +* provided. +* +* Description +* =========== +* +* The following steps are performed: +* +* 1. If FACT = 'E', real scaling factors are computed to equilibrate +* the system: +* diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B +* Whether or not the system will be equilibrated depends on the +* scaling of the matrix A, but if equilibration is used, A is +* overwritten by diag(S)*A*diag(S) and B by diag(S)*B. +* +* 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to +* factor the matrix A (after equilibration if FACT = 'E') as +* A = U**T* U, if UPLO = 'U', or +* A = L * L**T, if UPLO = 'L', +* where U is an upper triangular matrix and L is a lower triangular +* matrix. +* +* 3. If the leading i-by-i principal minor is not positive definite, +* then the routine returns with INFO = i. Otherwise, the factored +* form of A is used to estimate the condition number of the matrix +* A. If the reciprocal of the condition number is less than machine +* precision, INFO = N+1 is returned as a warning, but the routine +* still goes on to solve for X and compute error bounds as +* described below. +* +* 4. The system of equations is solved for X using the factored form +* of A. +* +* 5. Iterative refinement is applied to improve the computed solution +* matrix and calculate error bounds and backward error estimates +* for it. +* +* 6. If equilibration was used, the matrix X is premultiplied by +* diag(S) so that it solves the original system before +* equilibration. +* +* Arguments +* ========= +* +* FACT (input) CHARACTER*1 +* Specifies whether or not the factored form of the matrix A is +* supplied on entry, and if not, whether the matrix A should be +* equilibrated before it is factored. +* = 'F': On entry, AFP contains the factored form of A. +* If EQUED = 'Y', the matrix A has been equilibrated +* with scaling factors given by S. AP and AFP will not +* be modified. +* = 'N': The matrix A will be copied to AFP and factored. +* = 'E': The matrix A will be equilibrated if necessary, then +* copied to AFP and factored. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AP (input/output) REAL array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the symmetric matrix +* A, packed columnwise in a linear array, except if FACT = 'F' +* and EQUED = 'Y', then A must contain the equilibrated matrix +* diag(S)*A*diag(S). The j-th column of A is stored in the +* array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* See below for further details. A is not modified if +* FACT = 'F' or 'N', or if FACT = 'E' and EQUED = 'N' on exit. +* +* On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by +* diag(S)*A*diag(S). +* +* AFP (input or output) REAL array, dimension +* (N*(N+1)/2) +* If FACT = 'F', then AFP is an input argument and on entry +* contains the triangular factor U or L from the Cholesky +* factorization A = U'*U or A = L*L', in the same storage +* format as A. If EQUED .ne. 'N', then AFP is the factored +* form of the equilibrated matrix A. +* +* If FACT = 'N', then AFP is an output argument and on exit +* returns the triangular factor U or L from the Cholesky +* factorization A = U'*U or A = L*L' of the original matrix A. +* +* If FACT = 'E', then AFP is an output argument and on exit +* returns the triangular factor U or L from the Cholesky +* factorization A = U'*U or A = L*L' of the equilibrated +* matrix A (see the description of AP for the form of the +* equilibrated matrix). +* +* EQUED (input or output) CHARACTER*1 +* Specifies the form of equilibration that was done. +* = 'N': No equilibration (always true if FACT = 'N'). +* = 'Y': Equilibration was done, i.e., A has been replaced by +* diag(S) * A * diag(S). +* EQUED is an input argument if FACT = 'F'; otherwise, it is an +* output argument. +* +* S (input or output) REAL array, dimension (N) +* The scale factors for A; not accessed if EQUED = 'N'. S is +* an input argument if FACT = 'F'; otherwise, S is an output +* argument. If FACT = 'F' and EQUED = 'Y', each element of S +* must be positive. +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y', +* B is overwritten by diag(S) * B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (output) REAL array, dimension (LDX,NRHS) +* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to +* the original system of equations. Note that if EQUED = 'Y', +* A and B are modified on exit, and the solution to the +* equilibrated system is inv(diag(S))*X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* RCOND (output) REAL +* The estimate of the reciprocal condition number of the matrix +* A after equilibration (if done). If RCOND is less than the +* machine precision (in particular, if RCOND = 0), the matrix +* is singular to working precision. This condition is +* indicated by a return code of INFO > 0. +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) REAL array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is +* <= N: the leading minor of order i of A is +* not positive definite, so the factorization +* could not be completed, and the solution has not +* been computed. RCOND = 0 is returned. +* = N+1: U is nonsingular, but RCOND is less than machine +* precision, meaning that the matrix is singular +* to working precision. Nevertheless, the +* solution and error bounds are computed because +* there are a number of situations where the +* computed solution can be more accurate than the +* value of RCOND would suggest. +* +* Further Details +* =============== +* +* The packed storage scheme is illustrated by the following example +* when N = 4, UPLO = 'U': +* +* Two-dimensional storage of the symmetric matrix A: +* +* a11 a12 a13 a14 +* a22 a23 a24 +* a33 a34 (aij = conjg(aji)) +* a44 +* +* Packed storage of the upper triangle of A: +* +* AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL EQUIL, NOFACT, RCEQU + INTEGER I, INFEQU, J + REAL AMAX, ANORM, BIGNUM, SCOND, SMAX, SMIN, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANSP + EXTERNAL LSAME, SLAMCH, SLANSP +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLACPY, SLAQSP, SPPCON, SPPEQU, SPPRFS, + $ SPPTRF, SPPTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + RCEQU = .FALSE. + ELSE + RCEQU = LSAME( EQUED, 'Y' ) + SMLNUM = SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + END IF +* +* Test the input parameters. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) ) + $ THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -7 + ELSE + IF( RCEQU ) THEN + SMIN = BIGNUM + SMAX = ZERO + DO 10 J = 1, N + SMIN = MIN( SMIN, S( J ) ) + SMAX = MAX( SMAX, S( J ) ) + 10 CONTINUE + IF( SMIN.LE.ZERO ) THEN + INFO = -8 + ELSE IF( N.GT.0 ) THEN + SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) + ELSE + SCOND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPPSVX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL SPPEQU( UPLO, N, AP, S, SCOND, AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL SLAQSP( UPLO, N, AP, S, SCOND, AMAX, EQUED ) + RCEQU = LSAME( EQUED, 'Y' ) + END IF + END IF +* +* Scale the right-hand side. +* + IF( RCEQU ) THEN + DO 30 J = 1, NRHS + DO 20 I = 1, N + B( I, J ) = S( I )*B( I, J ) + 20 CONTINUE + 30 CONTINUE + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the Cholesky factorization A = U'*U or A = L*L'. +* + CALL SCOPY( N*( N+1 ) / 2, AP, 1, AFP, 1 ) + CALL SPPTRF( UPLO, N, AFP, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN + IF( INFO.GT.0 ) + $ RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = SLANSP( 'I', UPLO, N, AP, WORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL SPPCON( UPLO, N, AFP, ANORM, RCOND, WORK, IWORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* +* Compute the solution matrix X. +* + CALL SLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL SPPTRS( UPLO, N, NRHS, AFP, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL SPPRFS( UPLO, N, NRHS, AP, AFP, B, LDB, X, LDX, FERR, BERR, + $ WORK, IWORK, INFO ) +* +* Transform the solution matrix X to a solution of the original +* system. +* + IF( RCEQU ) THEN + DO 50 J = 1, NRHS + DO 40 I = 1, N + X( I, J ) = S( I )*X( I, J ) + 40 CONTINUE + 50 CONTINUE + DO 60 J = 1, NRHS + FERR( J ) = FERR( J ) / SCOND + 60 CONTINUE + END IF +* + RETURN +* +* End of SPPSVX +* + END diff --git a/costa/native/external/lapack/spptrf.f b/costa/native/external/lapack/spptrf.f new file mode 100644 index 000000000..035e0352d --- /dev/null +++ b/costa/native/external/lapack/spptrf.f @@ -0,0 +1,178 @@ + SUBROUTINE SPPTRF( UPLO, N, AP, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + REAL AP( * ) +* .. +* +* Purpose +* ======= +* +* SPPTRF computes the Cholesky factorization of a real symmetric +* positive definite matrix A stored in packed format. +* +* The factorization has the form +* A = U**T * U, if UPLO = 'U', or +* A = L * L**T, if UPLO = 'L', +* where U is an upper triangular matrix and L is lower triangular. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) REAL array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the symmetric matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* See below for further details. +* +* On exit, if INFO = 0, the triangular factor U or L from the +* Cholesky factorization A = U**T*U or A = L*L**T, in the same +* storage format as A. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the leading minor of order i is not +* positive definite, and the factorization could not be +* completed. +* +* Further Details +* ======= ======= +* +* The packed storage scheme is illustrated by the following example +* when N = 4, UPLO = 'U': +* +* Two-dimensional storage of the symmetric matrix A: +* +* a11 a12 a13 a14 +* a22 a23 a24 +* a33 a34 (aij = aji) +* a44 +* +* Packed storage of the upper triangle of A: +* +* AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, JC, JJ + REAL AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SDOT + EXTERNAL LSAME, SDOT +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, SSPR, STPSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPPTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U'*U. +* + JJ = 0 + DO 10 J = 1, N + JC = JJ + 1 + JJ = JJ + J +* +* Compute elements 1:J-1 of column J. +* + IF( J.GT.1 ) + $ CALL STPSV( 'Upper', 'Transpose', 'Non-unit', J-1, AP, + $ AP( JC ), 1 ) +* +* Compute U(J,J) and test for non-positive-definiteness. +* + AJJ = AP( JJ ) - SDOT( J-1, AP( JC ), 1, AP( JC ), 1 ) + IF( AJJ.LE.ZERO ) THEN + AP( JJ ) = AJJ + GO TO 30 + END IF + AP( JJ ) = SQRT( AJJ ) + 10 CONTINUE + ELSE +* +* Compute the Cholesky factorization A = L*L'. +* + JJ = 1 + DO 20 J = 1, N +* +* Compute L(J,J) and test for non-positive-definiteness. +* + AJJ = AP( JJ ) + IF( AJJ.LE.ZERO ) THEN + AP( JJ ) = AJJ + GO TO 30 + END IF + AJJ = SQRT( AJJ ) + AP( JJ ) = AJJ +* +* Compute elements J+1:N of column J and update the trailing +* submatrix. +* + IF( J.LT.N ) THEN + CALL SSCAL( N-J, ONE / AJJ, AP( JJ+1 ), 1 ) + CALL SSPR( 'Lower', N-J, -ONE, AP( JJ+1 ), 1, + $ AP( JJ+N-J+1 ) ) + JJ = JJ + N - J + 1 + END IF + 20 CONTINUE + END IF + GO TO 40 +* + 30 CONTINUE + INFO = J +* + 40 CONTINUE + RETURN +* +* End of SPPTRF +* + END diff --git a/costa/native/external/lapack/spptri.f b/costa/native/external/lapack/spptri.f new file mode 100644 index 000000000..6d376cd96 --- /dev/null +++ b/costa/native/external/lapack/spptri.f @@ -0,0 +1,129 @@ + SUBROUTINE SPPTRI( UPLO, N, AP, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + REAL AP( * ) +* .. +* +* Purpose +* ======= +* +* SPPTRI computes the inverse of a real symmetric positive definite +* matrix A using the Cholesky factorization A = U**T*U or A = L*L**T +* computed by SPPTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangular factor is stored in AP; +* = 'L': Lower triangular factor is stored in AP. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) REAL array, dimension (N*(N+1)/2) +* On entry, the triangular factor U or L from the Cholesky +* factorization A = U**T*U or A = L*L**T, packed columnwise as +* a linear array. The j-th column of U or L is stored in the +* array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n. +* +* On exit, the upper or lower triangle of the (symmetric) +* inverse of A, overwriting the input factor U or L. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the (i,i) element of the factor U or L is +* zero, and the inverse could not be computed. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, JC, JJ, JJN + REAL AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SDOT + EXTERNAL LSAME, SDOT +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, SSPR, STPMV, STPTRI, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPPTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Invert the triangular Cholesky factor U or L. +* + CALL STPTRI( UPLO, 'Non-unit', N, AP, INFO ) + IF( INFO.GT.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Compute the product inv(U) * inv(U)'. +* + JJ = 0 + DO 10 J = 1, N + JC = JJ + 1 + JJ = JJ + J + IF( J.GT.1 ) + $ CALL SSPR( 'Upper', J-1, ONE, AP( JC ), 1, AP ) + AJJ = AP( JJ ) + CALL SSCAL( J, AJJ, AP( JC ), 1 ) + 10 CONTINUE +* + ELSE +* +* Compute the product inv(L)' * inv(L). +* + JJ = 1 + DO 20 J = 1, N + JJN = JJ + N - J + 1 + AP( JJ ) = SDOT( N-J+1, AP( JJ ), 1, AP( JJ ), 1 ) + IF( J.LT.N ) + $ CALL STPMV( 'Lower', 'Transpose', 'Non-unit', N-J, + $ AP( JJN ), AP( JJ+1 ), 1 ) + JJ = JJN + 20 CONTINUE + END IF +* + RETURN +* +* End of SPPTRI +* + END diff --git a/costa/native/external/lapack/spptrs.f b/costa/native/external/lapack/spptrs.f new file mode 100644 index 000000000..8f4fcf5b8 --- /dev/null +++ b/costa/native/external/lapack/spptrs.f @@ -0,0 +1,135 @@ + SUBROUTINE SPPTRS( UPLO, N, NRHS, AP, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL AP( * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* SPPTRS solves a system of linear equations A*X = B with a symmetric +* positive definite matrix A in packed storage using the Cholesky +* factorization A = U**T*U or A = L*L**T computed by SPPTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AP (input) REAL array, dimension (N*(N+1)/2) +* The triangular factor U or L from the Cholesky factorization +* A = U**T*U or A = L*L**T, packed columnwise in a linear +* array. The j-th column of U or L is stored in the array AP +* as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n. +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL STPSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPPTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B where A = U'*U. +* + DO 10 I = 1, NRHS +* +* Solve U'*X = B, overwriting B with X. +* + CALL STPSV( 'Upper', 'Transpose', 'Non-unit', N, AP, + $ B( 1, I ), 1 ) +* +* Solve U*X = B, overwriting B with X. +* + CALL STPSV( 'Upper', 'No transpose', 'Non-unit', N, AP, + $ B( 1, I ), 1 ) + 10 CONTINUE + ELSE +* +* Solve A*X = B where A = L*L'. +* + DO 20 I = 1, NRHS +* +* Solve L*Y = B, overwriting B with X. +* + CALL STPSV( 'Lower', 'No transpose', 'Non-unit', N, AP, + $ B( 1, I ), 1 ) +* +* Solve L'*X = Y, overwriting B with X. +* + CALL STPSV( 'Lower', 'Transpose', 'Non-unit', N, AP, + $ B( 1, I ), 1 ) + 20 CONTINUE + END IF +* + RETURN +* +* End of SPPTRS +* + END diff --git a/costa/native/external/lapack/sptcon.f b/costa/native/external/lapack/sptcon.f new file mode 100644 index 000000000..2ea0c4f4e --- /dev/null +++ b/costa/native/external/lapack/sptcon.f @@ -0,0 +1,150 @@ + SUBROUTINE SPTCON( N, D, E, ANORM, RCOND, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + INTEGER INFO, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + REAL D( * ), E( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SPTCON computes the reciprocal of the condition number (in the +* 1-norm) of a real symmetric positive definite tridiagonal matrix +* using the factorization A = L*D*L**T or A = U**T*D*U computed by +* SPTTRF. +* +* Norm(inv(A)) is computed by a direct method, and the reciprocal of +* the condition number is computed as +* RCOND = 1 / (ANORM * norm(inv(A))). +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* D (input) REAL array, dimension (N) +* The n diagonal elements of the diagonal matrix D from the +* factorization of A, as computed by SPTTRF. +* +* E (input) REAL array, dimension (N-1) +* The (n-1) off-diagonal elements of the unit bidiagonal factor +* U or L from the factorization of A, as computed by SPTTRF. +* +* ANORM (input) REAL +* The 1-norm of the original matrix A. +* +* RCOND (output) REAL +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is the +* 1-norm of inv(A) computed in this routine. +* +* WORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The method used is described in Nicholas J. Higham, "Efficient +* Algorithms for Computing the Condition Number of a Tridiagonal +* Matrix", SIAM J. Sci. Stat. Comput., Vol. 7, No. 1, January 1986. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IX + REAL AINVNM +* .. +* .. External Functions .. + INTEGER ISAMAX + EXTERNAL ISAMAX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPTCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* +* Check that D(1:N) is positive. +* + DO 10 I = 1, N + IF( D( I ).LE.ZERO ) + $ RETURN + 10 CONTINUE +* +* Solve M(A) * x = e, where M(A) = (m(i,j)) is given by +* +* m(i,j) = abs(A(i,j)), i = j, +* m(i,j) = -abs(A(i,j)), i .ne. j, +* +* and e = [ 1, 1, ..., 1 ]'. Note M(A) = M(L)*D*M(L)'. +* +* Solve M(L) * x = e. +* + WORK( 1 ) = ONE + DO 20 I = 2, N + WORK( I ) = ONE + WORK( I-1 )*ABS( E( I-1 ) ) + 20 CONTINUE +* +* Solve D * M(L)' * x = b. +* + WORK( N ) = WORK( N ) / D( N ) + DO 30 I = N - 1, 1, -1 + WORK( I ) = WORK( I ) / D( I ) + WORK( I+1 )*ABS( E( I ) ) + 30 CONTINUE +* +* Compute AINVNM = max(x(i)), 1<=i<=n. +* + IX = ISAMAX( N, WORK, 1 ) + AINVNM = ABS( WORK( IX ) ) +* +* Compute the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of SPTCON +* + END diff --git a/costa/native/external/lapack/spteqr.f b/costa/native/external/lapack/spteqr.f new file mode 100644 index 000000000..b8df9060c --- /dev/null +++ b/costa/native/external/lapack/spteqr.f @@ -0,0 +1,190 @@ + SUBROUTINE SPTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + CHARACTER COMPZ + INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. + REAL D( * ), E( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* SPTEQR computes all eigenvalues and, optionally, eigenvectors of a +* symmetric positive definite tridiagonal matrix by first factoring the +* matrix using SPTTRF, and then calling SBDSQR to compute the singular +* values of the bidiagonal factor. +* +* This routine computes the eigenvalues of the positive definite +* tridiagonal matrix to high relative accuracy. This means that if the +* eigenvalues range over many orders of magnitude in size, then the +* small eigenvalues and corresponding eigenvectors will be computed +* more accurately than, for example, with the standard QR method. +* +* The eigenvectors of a full or band symmetric positive definite matrix +* can also be found if SSYTRD, SSPTRD, or SSBTRD has been used to +* reduce this matrix to tridiagonal form. (The reduction to tridiagonal +* form, however, may preclude the possibility of obtaining high +* relative accuracy in the small eigenvalues of the original matrix, if +* these eigenvalues range over many orders of magnitude.) +* +* Arguments +* ========= +* +* COMPZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only. +* = 'V': Compute eigenvectors of original symmetric +* matrix also. Array Z contains the orthogonal +* matrix used to reduce the original matrix to +* tridiagonal form. +* = 'I': Compute eigenvectors of tridiagonal matrix also. +* +* N (input) INTEGER +* The order of the matrix. N >= 0. +* +* D (input/output) REAL array, dimension (N) +* On entry, the n diagonal elements of the tridiagonal +* matrix. +* On normal exit, D contains the eigenvalues, in descending +* order. +* +* E (input/output) REAL array, dimension (N-1) +* On entry, the (n-1) subdiagonal elements of the tridiagonal +* matrix. +* On exit, E has been destroyed. +* +* Z (input/output) REAL array, dimension (LDZ, N) +* On entry, if COMPZ = 'V', the orthogonal matrix used in the +* reduction to tridiagonal form. +* On exit, if COMPZ = 'V', the orthonormal eigenvectors of the +* original symmetric matrix; +* if COMPZ = 'I', the orthonormal eigenvectors of the +* tridiagonal matrix. +* If INFO > 0 on exit, Z contains the eigenvectors associated +* with only the stored eigenvalues. +* If COMPZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* COMPZ = 'V' or 'I', LDZ >= max(1,N). +* +* WORK (workspace) REAL array, dimension (4*N) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, and i is: +* <= N the Cholesky factorization of the matrix could +* not be performed because the i-th principal minor +* was not positive definite. +* > N the SVD algorithm failed to converge; +* if INFO = N+i, i off-diagonal elements of the +* bidiagonal factor did not converge to zero. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SBDSQR, SLASET, SPTTRF, XERBLA +* .. +* .. Local Arrays .. + REAL C( 1, 1 ), VT( 1, 1 ) +* .. +* .. Local Scalars .. + INTEGER I, ICOMPZ, NRU +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ICOMPZ = 0 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ICOMPZ = 2 + ELSE + ICOMPZ = -1 + END IF + IF( ICOMPZ.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ( LDZ.LT.1 ) .OR. ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1, + $ N ) ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPTEQR', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ICOMPZ.GT.0 ) + $ Z( 1, 1 ) = ONE + RETURN + END IF + IF( ICOMPZ.EQ.2 ) + $ CALL SLASET( 'Full', N, N, ZERO, ONE, Z, LDZ ) +* +* Call SPTTRF to factor the matrix. +* + CALL SPTTRF( N, D, E, INFO ) + IF( INFO.NE.0 ) + $ RETURN + DO 10 I = 1, N + D( I ) = SQRT( D( I ) ) + 10 CONTINUE + DO 20 I = 1, N - 1 + E( I ) = E( I )*D( I ) + 20 CONTINUE +* +* Call SBDSQR to compute the singular values/vectors of the +* bidiagonal factor. +* + IF( ICOMPZ.GT.0 ) THEN + NRU = N + ELSE + NRU = 0 + END IF + CALL SBDSQR( 'Lower', N, 0, NRU, 0, D, E, VT, 1, Z, LDZ, C, 1, + $ WORK, INFO ) +* +* Square the singular values. +* + IF( INFO.EQ.0 ) THEN + DO 30 I = 1, N + D( I ) = D( I )*D( I ) + 30 CONTINUE + ELSE + INFO = N + INFO + END IF +* + RETURN +* +* End of SPTEQR +* + END diff --git a/costa/native/external/lapack/sptrfs.f b/costa/native/external/lapack/sptrfs.f new file mode 100644 index 000000000..96dccabb4 --- /dev/null +++ b/costa/native/external/lapack/sptrfs.f @@ -0,0 +1,302 @@ + SUBROUTINE SPTRFS( N, NRHS, D, E, DF, EF, B, LDB, X, LDX, FERR, + $ BERR, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + REAL B( LDB, * ), BERR( * ), D( * ), DF( * ), + $ E( * ), EF( * ), FERR( * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* SPTRFS improves the computed solution to a system of linear +* equations when the coefficient matrix is symmetric positive definite +* and tridiagonal, and provides error bounds and backward error +* estimates for the solution. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* D (input) REAL array, dimension (N) +* The n diagonal elements of the tridiagonal matrix A. +* +* E (input) REAL array, dimension (N-1) +* The (n-1) subdiagonal elements of the tridiagonal matrix A. +* +* DF (input) REAL array, dimension (N) +* The n diagonal elements of the diagonal matrix D from the +* factorization computed by SPTTRF. +* +* EF (input) REAL array, dimension (N-1) +* The (n-1) subdiagonal elements of the unit bidiagonal factor +* L from the factorization computed by SPTTRF. +* +* B (input) REAL array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input/output) REAL array, dimension (LDX,NRHS) +* On entry, the solution matrix X, as computed by SPTTRS. +* On exit, the improved solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) REAL array, dimension (NRHS) +* The forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) REAL array, dimension (2*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Internal Parameters +* =================== +* +* ITMAX is the maximum number of steps of iterative refinement. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) + REAL THREE + PARAMETER ( THREE = 3.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER COUNT, I, IX, J, NZ + REAL BI, CX, DX, EPS, EX, LSTRES, S, SAFE1, SAFE2, + $ SAFMIN +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SPTTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SLAMCH + EXTERNAL ISAMAX, SLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPTRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = 4 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 90 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X. Also compute +* abs(A)*abs(x) + abs(b) for use in the backward error bound. +* + IF( N.EQ.1 ) THEN + BI = B( 1, J ) + DX = D( 1 )*X( 1, J ) + WORK( N+1 ) = BI - DX + WORK( 1 ) = ABS( BI ) + ABS( DX ) + ELSE + BI = B( 1, J ) + DX = D( 1 )*X( 1, J ) + EX = E( 1 )*X( 2, J ) + WORK( N+1 ) = BI - DX - EX + WORK( 1 ) = ABS( BI ) + ABS( DX ) + ABS( EX ) + DO 30 I = 2, N - 1 + BI = B( I, J ) + CX = E( I-1 )*X( I-1, J ) + DX = D( I )*X( I, J ) + EX = E( I )*X( I+1, J ) + WORK( N+I ) = BI - CX - DX - EX + WORK( I ) = ABS( BI ) + ABS( CX ) + ABS( DX ) + ABS( EX ) + 30 CONTINUE + BI = B( N, J ) + CX = E( N-1 )*X( N-1, J ) + DX = D( N )*X( N, J ) + WORK( N+N ) = BI - CX - DX + WORK( N ) = ABS( BI ) + ABS( CX ) + ABS( DX ) + END IF +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + S = ZERO + DO 40 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 40 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL SPTTRS( N, 1, DF, EF, WORK( N+1 ), N, INFO ) + CALL SAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* + DO 50 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 50 CONTINUE + IX = ISAMAX( N, WORK, 1 ) + FERR( J ) = WORK( IX ) +* +* Estimate the norm of inv(A). +* +* Solve M(A) * x = e, where M(A) = (m(i,j)) is given by +* +* m(i,j) = abs(A(i,j)), i = j, +* m(i,j) = -abs(A(i,j)), i .ne. j, +* +* and e = [ 1, 1, ..., 1 ]'. Note M(A) = M(L)*D*M(L)'. +* +* Solve M(L) * x = e. +* + WORK( 1 ) = ONE + DO 60 I = 2, N + WORK( I ) = ONE + WORK( I-1 )*ABS( EF( I-1 ) ) + 60 CONTINUE +* +* Solve D * M(L)' * x = b. +* + WORK( N ) = WORK( N ) / DF( N ) + DO 70 I = N - 1, 1, -1 + WORK( I ) = WORK( I ) / DF( I ) + WORK( I+1 )*ABS( EF( I ) ) + 70 CONTINUE +* +* Compute norm(inv(A)) = max(x(i)), 1<=i<=n. +* + IX = ISAMAX( N, WORK, 1 ) + FERR( J ) = FERR( J )*ABS( WORK( IX ) ) +* +* Normalize error. +* + LSTRES = ZERO + DO 80 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 80 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 90 CONTINUE +* + RETURN +* +* End of SPTRFS +* + END diff --git a/costa/native/external/lapack/sptsv.f b/costa/native/external/lapack/sptsv.f new file mode 100644 index 000000000..01942c1d4 --- /dev/null +++ b/costa/native/external/lapack/sptsv.f @@ -0,0 +1,100 @@ + SUBROUTINE SPTSV( N, NRHS, D, E, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 25, 1997 +* +* .. Scalar Arguments .. + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL B( LDB, * ), D( * ), E( * ) +* .. +* +* Purpose +* ======= +* +* SPTSV computes the solution to a real system of linear equations +* A*X = B, where A is an N-by-N symmetric positive definite tridiagonal +* matrix, and X and B are N-by-NRHS matrices. +* +* A is factored as A = L*D*L**T, and the factored form of A is then +* used to solve the system of equations. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* D (input/output) REAL array, dimension (N) +* On entry, the n diagonal elements of the tridiagonal matrix +* A. On exit, the n diagonal elements of the diagonal matrix +* D from the factorization A = L*D*L**T. +* +* E (input/output) REAL array, dimension (N-1) +* On entry, the (n-1) subdiagonal elements of the tridiagonal +* matrix A. On exit, the (n-1) subdiagonal elements of the +* unit bidiagonal factor L from the L*D*L**T factorization of +* A. (E can also be regarded as the superdiagonal of the unit +* bidiagonal factor U from the U**T*D*U factorization of A.) +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the leading minor of order i is not +* positive definite, and the solution has not been +* computed. The factorization has not been completed +* unless i = N. +* +* ===================================================================== +* +* .. External Subroutines .. + EXTERNAL SPTTRF, SPTTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPTSV ', -INFO ) + RETURN + END IF +* +* Compute the L*D*L' (or U'*D*U) factorization of A. +* + CALL SPTTRF( N, D, E, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL SPTTRS( N, NRHS, D, E, B, LDB, INFO ) + END IF + RETURN +* +* End of SPTSV +* + END diff --git a/costa/native/external/lapack/sptsvx.f b/costa/native/external/lapack/sptsvx.f new file mode 100644 index 000000000..278ccbfc7 --- /dev/null +++ b/costa/native/external/lapack/sptsvx.f @@ -0,0 +1,235 @@ + SUBROUTINE SPTSVX( FACT, N, NRHS, D, E, DF, EF, B, LDB, X, LDX, + $ RCOND, FERR, BERR, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER FACT + INTEGER INFO, LDB, LDX, N, NRHS + REAL RCOND +* .. +* .. Array Arguments .. + REAL B( LDB, * ), BERR( * ), D( * ), DF( * ), + $ E( * ), EF( * ), FERR( * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* SPTSVX uses the factorization A = L*D*L**T to compute the solution +* to a real system of linear equations A*X = B, where A is an N-by-N +* symmetric positive definite tridiagonal matrix and X and B are +* N-by-NRHS matrices. +* +* Error bounds on the solution and a condition estimate are also +* provided. +* +* Description +* =========== +* +* The following steps are performed: +* +* 1. If FACT = 'N', the matrix A is factored as A = L*D*L**T, where L +* is a unit lower bidiagonal matrix and D is diagonal. The +* factorization can also be regarded as having the form +* A = U**T*D*U. +* +* 2. If the leading i-by-i principal minor is not positive definite, +* then the routine returns with INFO = i. Otherwise, the factored +* form of A is used to estimate the condition number of the matrix +* A. If the reciprocal of the condition number is less than machine +* precision, INFO = N+1 is returned as a warning, but the routine +* still goes on to solve for X and compute error bounds as +* described below. +* +* 3. The system of equations is solved for X using the factored form +* of A. +* +* 4. Iterative refinement is applied to improve the computed solution +* matrix and calculate error bounds and backward error estimates +* for it. +* +* Arguments +* ========= +* +* FACT (input) CHARACTER*1 +* Specifies whether or not the factored form of A has been +* supplied on entry. +* = 'F': On entry, DF and EF contain the factored form of A. +* D, E, DF, and EF will not be modified. +* = 'N': The matrix A will be copied to DF and EF and +* factored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* D (input) REAL array, dimension (N) +* The n diagonal elements of the tridiagonal matrix A. +* +* E (input) REAL array, dimension (N-1) +* The (n-1) subdiagonal elements of the tridiagonal matrix A. +* +* DF (input or output) REAL array, dimension (N) +* If FACT = 'F', then DF is an input argument and on entry +* contains the n diagonal elements of the diagonal matrix D +* from the L*D*L**T factorization of A. +* If FACT = 'N', then DF is an output argument and on exit +* contains the n diagonal elements of the diagonal matrix D +* from the L*D*L**T factorization of A. +* +* EF (input or output) REAL array, dimension (N-1) +* If FACT = 'F', then EF is an input argument and on entry +* contains the (n-1) subdiagonal elements of the unit +* bidiagonal factor L from the L*D*L**T factorization of A. +* If FACT = 'N', then EF is an output argument and on exit +* contains the (n-1) subdiagonal elements of the unit +* bidiagonal factor L from the L*D*L**T factorization of A. +* +* B (input) REAL array, dimension (LDB,NRHS) +* The N-by-NRHS right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (output) REAL array, dimension (LDX,NRHS) +* If INFO = 0 of INFO = N+1, the N-by-NRHS solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* RCOND (output) REAL +* The reciprocal condition number of the matrix A. If RCOND +* is less than the machine precision (in particular, if +* RCOND = 0), the matrix is singular to working precision. +* This condition is indicated by a return code of INFO > 0. +* +* FERR (output) REAL array, dimension (NRHS) +* The forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in any +* element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) REAL array, dimension (2*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is +* <= N: the leading minor of order i of A is +* not positive definite, so the factorization +* could not be completed, and the solution has not +* been computed. RCOND = 0 is returned. +* = N+1: U is nonsingular, but RCOND is less than machine +* precision, meaning that the matrix is singular +* to working precision. Nevertheless, the +* solution and error bounds are computed because +* there are a number of situations where the +* computed solution can be more accurate than the +* value of RCOND would suggest. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOFACT + REAL ANORM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANST + EXTERNAL LSAME, SLAMCH, SLANST +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLACPY, SPTCON, SPTRFS, SPTTRF, SPTTRS, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPTSVX', -INFO ) + RETURN + END IF +* + IF( NOFACT ) THEN +* +* Compute the L*D*L' (or U'*D*U) factorization of A. +* + CALL SCOPY( N, D, 1, DF, 1 ) + IF( N.GT.1 ) + $ CALL SCOPY( N-1, E, 1, EF, 1 ) + CALL SPTTRF( N, DF, EF, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN + IF( INFO.GT.0 ) + $ RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = SLANST( '1', N, D, E ) +* +* Compute the reciprocal of the condition number of A. +* + CALL SPTCON( N, DF, EF, ANORM, RCOND, WORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* +* Compute the solution vectors X. +* + CALL SLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL SPTTRS( N, NRHS, DF, EF, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solutions and +* compute error bounds and backward error estimates for them. +* + CALL SPTRFS( N, NRHS, D, E, DF, EF, B, LDB, X, LDX, FERR, BERR, + $ WORK, INFO ) +* + RETURN +* +* End of SPTSVX +* + END diff --git a/costa/native/external/lapack/spttrf.f b/costa/native/external/lapack/spttrf.f new file mode 100644 index 000000000..978ad2b88 --- /dev/null +++ b/costa/native/external/lapack/spttrf.f @@ -0,0 +1,153 @@ + SUBROUTINE SPTTRF( N, D, E, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, N +* .. +* .. Array Arguments .. + REAL D( * ), E( * ) +* .. +* +* Purpose +* ======= +* +* SPTTRF computes the L*D*L' factorization of a real symmetric +* positive definite tridiagonal matrix A. The factorization may also +* be regarded as having the form A = U'*D*U. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* D (input/output) REAL array, dimension (N) +* On entry, the n diagonal elements of the tridiagonal matrix +* A. On exit, the n diagonal elements of the diagonal matrix +* D from the L*D*L' factorization of A. +* +* E (input/output) REAL array, dimension (N-1) +* On entry, the (n-1) subdiagonal elements of the tridiagonal +* matrix A. On exit, the (n-1) subdiagonal elements of the +* unit bidiagonal factor L from the L*D*L' factorization of A. +* E can also be regarded as the superdiagonal of the unit +* bidiagonal factor U from the U'*D*U factorization of A. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* > 0: if INFO = k, the leading minor of order k is not +* positive definite; if k < N, the factorization could not +* be completed, while if k = N, the factorization was +* completed, but D(N) = 0. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, I4 + REAL EI +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MOD +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + CALL XERBLA( 'SPTTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Compute the L*D*L' (or U'*D*U) factorization of A. +* + I4 = MOD( N-1, 4 ) + DO 10 I = 1, I4 + IF( D( I ).LE.ZERO ) THEN + INFO = I + GO TO 30 + END IF + EI = E( I ) + E( I ) = EI / D( I ) + D( I+1 ) = D( I+1 ) - E( I )*EI + 10 CONTINUE +* + DO 20 I = I4 + 1, N - 4, 4 +* +* Drop out of the loop if d(i) <= 0: the matrix is not positive +* definite. +* + IF( D( I ).LE.ZERO ) THEN + INFO = I + GO TO 30 + END IF +* +* Solve for e(i) and d(i+1). +* + EI = E( I ) + E( I ) = EI / D( I ) + D( I+1 ) = D( I+1 ) - E( I )*EI +* + IF( D( I+1 ).LE.ZERO ) THEN + INFO = I + 1 + GO TO 30 + END IF +* +* Solve for e(i+1) and d(i+2). +* + EI = E( I+1 ) + E( I+1 ) = EI / D( I+1 ) + D( I+2 ) = D( I+2 ) - E( I+1 )*EI +* + IF( D( I+2 ).LE.ZERO ) THEN + INFO = I + 2 + GO TO 30 + END IF +* +* Solve for e(i+2) and d(i+3). +* + EI = E( I+2 ) + E( I+2 ) = EI / D( I+2 ) + D( I+3 ) = D( I+3 ) - E( I+2 )*EI +* + IF( D( I+3 ).LE.ZERO ) THEN + INFO = I + 3 + GO TO 30 + END IF +* +* Solve for e(i+3) and d(i+4). +* + EI = E( I+3 ) + E( I+3 ) = EI / D( I+3 ) + D( I+4 ) = D( I+4 ) - E( I+3 )*EI + 20 CONTINUE +* +* Check d(n) for positive definiteness. +* + IF( D( N ).LE.ZERO ) + $ INFO = N +* + 30 CONTINUE + RETURN +* +* End of SPTTRF +* + END diff --git a/costa/native/external/lapack/spttrs.f b/costa/native/external/lapack/spttrs.f new file mode 100644 index 000000000..e6b08432f --- /dev/null +++ b/costa/native/external/lapack/spttrs.f @@ -0,0 +1,115 @@ + SUBROUTINE SPTTRS( N, NRHS, D, E, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL B( LDB, * ), D( * ), E( * ) +* .. +* +* Purpose +* ======= +* +* SPTTRS solves a tridiagonal system of the form +* A * X = B +* using the L*D*L' factorization of A computed by SPTTRF. D is a +* diagonal matrix specified in the vector D, L is a unit bidiagonal +* matrix whose subdiagonal is specified in the vector E, and X and B +* are N by NRHS matrices. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the tridiagonal matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* D (input) REAL array, dimension (N) +* The n diagonal elements of the diagonal matrix D from the +* L*D*L' factorization of A. +* +* E (input) REAL array, dimension (N-1) +* The (n-1) subdiagonal elements of the unit bidiagonal factor +* L from the L*D*L' factorization of A. E can also be regarded +* as the superdiagonal of the unit bidiagonal factor U from the +* factorization A = U'*D*U. +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the right hand side vectors B for the system of +* linear equations. +* On exit, the solution vectors, X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER J, JB, NB +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SPTTS2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPTTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* Determine the number of right-hand sides to solve at a time. +* + IF( NRHS.EQ.1 ) THEN + NB = 1 + ELSE + NB = MAX( 1, ILAENV( 1, 'SPTTRS', ' ', N, NRHS, -1, -1 ) ) + END IF +* + IF( NB.GE.NRHS ) THEN + CALL SPTTS2( N, NRHS, D, E, B, LDB ) + ELSE + DO 10 J = 1, NRHS, NB + JB = MIN( NRHS-J+1, NB ) + CALL SPTTS2( N, JB, D, E, B( 1, J ), LDB ) + 10 CONTINUE + END IF +* + RETURN +* +* End of SPTTRS +* + END diff --git a/costa/native/external/lapack/sptts2.f b/costa/native/external/lapack/sptts2.f new file mode 100644 index 000000000..df5244515 --- /dev/null +++ b/costa/native/external/lapack/sptts2.f @@ -0,0 +1,94 @@ + SUBROUTINE SPTTS2( N, NRHS, D, E, B, LDB ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL B( LDB, * ), D( * ), E( * ) +* .. +* +* Purpose +* ======= +* +* SPTTS2 solves a tridiagonal system of the form +* A * X = B +* using the L*D*L' factorization of A computed by SPTTRF. D is a +* diagonal matrix specified in the vector D, L is a unit bidiagonal +* matrix whose subdiagonal is specified in the vector E, and X and B +* are N by NRHS matrices. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the tridiagonal matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* D (input) REAL array, dimension (N) +* The n diagonal elements of the diagonal matrix D from the +* L*D*L' factorization of A. +* +* E (input) REAL array, dimension (N-1) +* The (n-1) subdiagonal elements of the unit bidiagonal factor +* L from the L*D*L' factorization of A. E can also be regarded +* as the superdiagonal of the unit bidiagonal factor U from the +* factorization A = U'*D*U. +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the right hand side vectors B for the system of +* linear equations. +* On exit, the solution vectors, X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Subroutines .. + EXTERNAL SSCAL +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.1 ) THEN + IF( N.EQ.1 ) + $ CALL SSCAL( NRHS, 1. / D( 1 ), B, LDB ) + RETURN + END IF +* +* Solve A * X = B using the factorization A = L*D*L', +* overwriting each right hand side vector with its solution. +* + DO 30 J = 1, NRHS +* +* Solve L * x = b. +* + DO 10 I = 2, N + B( I, J ) = B( I, J ) - B( I-1, J )*E( I-1 ) + 10 CONTINUE +* +* Solve D * L' * x = b. +* + B( N, J ) = B( N, J ) / D( N ) + DO 20 I = N - 1, 1, -1 + B( I, J ) = B( I, J ) / D( I ) - B( I+1, J )*E( I ) + 20 CONTINUE + 30 CONTINUE +* + RETURN +* +* End of SPTTS2 +* + END diff --git a/costa/native/external/lapack/srscl.f b/costa/native/external/lapack/srscl.f new file mode 100644 index 000000000..a61c0021b --- /dev/null +++ b/costa/native/external/lapack/srscl.f @@ -0,0 +1,115 @@ + SUBROUTINE SRSCL( N, SA, SX, INCX ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INCX, N + REAL SA +* .. +* .. Array Arguments .. + REAL SX( * ) +* .. +* +* Purpose +* ======= +* +* SRSCL multiplies an n-element real vector x by the real scalar 1/a. +* This is done without overflow or underflow as long as +* the final result x/a does not overflow or underflow. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of components of the vector x. +* +* SA (input) REAL +* The scalar a which is used to divide each component of x. +* SA must be >= 0, or the subroutine will divide by zero. +* +* SX (input/output) REAL array, dimension +* (1+(N-1)*abs(INCX)) +* The n-element vector x. +* +* INCX (input) INTEGER +* The increment between successive values of the vector SX. +* > 0: SX(1) = X(1) and SX(1+(i-1)*INCX) = x(i), 1< i<= n +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL DONE + REAL BIGNUM, CDEN, CDEN1, CNUM, CNUM1, MUL, SMLNUM +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SLABAD, SSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* +* Get machine parameters +* + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Initialize the denominator to SA and the numerator to 1. +* + CDEN = SA + CNUM = ONE +* + 10 CONTINUE + CDEN1 = CDEN*SMLNUM + CNUM1 = CNUM / BIGNUM + IF( ABS( CDEN1 ).GT.ABS( CNUM ) .AND. CNUM.NE.ZERO ) THEN +* +* Pre-multiply X by SMLNUM if CDEN is large compared to CNUM. +* + MUL = SMLNUM + DONE = .FALSE. + CDEN = CDEN1 + ELSE IF( ABS( CNUM1 ).GT.ABS( CDEN ) ) THEN +* +* Pre-multiply X by BIGNUM if CDEN is small compared to CNUM. +* + MUL = BIGNUM + DONE = .FALSE. + CNUM = CNUM1 + ELSE +* +* Multiply X by CNUM / CDEN and return. +* + MUL = CNUM / CDEN + DONE = .TRUE. + END IF +* +* Scale the vector X by MUL +* + CALL SSCAL( N, MUL, SX, INCX ) +* + IF( .NOT.DONE ) + $ GO TO 10 +* + RETURN +* +* End of SRSCL +* + END diff --git a/costa/native/external/lapack/ssbev.f b/costa/native/external/lapack/ssbev.f new file mode 100644 index 000000000..694adf023 --- /dev/null +++ b/costa/native/external/lapack/ssbev.f @@ -0,0 +1,206 @@ + SUBROUTINE SSBEV( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KD, LDAB, LDZ, N +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* SSBEV computes all the eigenvalues and, optionally, eigenvectors of +* a real symmetric band matrix A. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* AB (input/output) REAL array, dimension (LDAB, N) +* On entry, the upper or lower triangle of the symmetric band +* matrix A, stored in the first KD+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* On exit, AB is overwritten by values generated during the +* reduction to tridiagonal form. If UPLO = 'U', the first +* superdiagonal and the diagonal of the tridiagonal matrix T +* are returned in rows KD and KD+1 of AB, and if UPLO = 'L', +* the diagonal and first subdiagonal of T are returned in the +* first two rows of AB. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD + 1. +* +* W (output) REAL array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* Z (output) REAL array, dimension (LDZ, N) +* If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +* eigenvectors of the matrix A, with the i-th column of Z +* holding the eigenvector associated with W(i). +* If JOBZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace) REAL array, dimension (max(1,3*N-2)) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the algorithm failed to converge; i +* off-diagonal elements of an intermediate tridiagonal +* form did not converge to zero. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, WANTZ + INTEGER IINFO, IMAX, INDE, INDWRK, ISCALE + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANSB + EXTERNAL LSAME, SLAMCH, SLANSB +* .. +* .. External Subroutines .. + EXTERNAL SLASCL, SSBTRD, SSCAL, SSTEQR, SSTERF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSBEV ', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( LOWER ) THEN + W( 1 ) = AB( 1, 1 ) + ELSE + W( 1 ) = AB( KD+1, 1 ) + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = SLANSB( 'M', UPLO, N, KD, AB, LDAB, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL SLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL SLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + END IF +* +* Call SSBTRD to reduce symmetric band matrix to tridiagonal form. +* + INDE = 1 + INDWRK = INDE + N + CALL SSBTRD( JOBZ, UPLO, N, KD, AB, LDAB, W, WORK( INDE ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, call SSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, WORK( INDE ), INFO ) + ELSE + CALL SSTEQR( JOBZ, N, W, WORK( INDE ), Z, LDZ, WORK( INDWRK ), + $ INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* + RETURN +* +* End of SSBEV +* + END diff --git a/costa/native/external/lapack/ssbevd.f b/costa/native/external/lapack/ssbevd.f new file mode 100644 index 000000000..d217793d6 --- /dev/null +++ b/costa/native/external/lapack/ssbevd.f @@ -0,0 +1,265 @@ + SUBROUTINE SSBEVD( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, + $ LWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KD, LDAB, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL AB( LDAB, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* SSBEVD computes all the eigenvalues and, optionally, eigenvectors of +* a real symmetric band matrix A. If eigenvectors are desired, it uses +* a divide and conquer algorithm. +* +* The divide and conquer algorithm makes very mild assumptions about +* floating point arithmetic. It will work on machines with a guard +* digit in add/subtract, or on those binary machines without guard +* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +* Cray-2. It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* AB (input/output) REAL array, dimension (LDAB, N) +* On entry, the upper or lower triangle of the symmetric band +* matrix A, stored in the first KD+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* On exit, AB is overwritten by values generated during the +* reduction to tridiagonal form. If UPLO = 'U', the first +* superdiagonal and the diagonal of the tridiagonal matrix T +* are returned in rows KD and KD+1 of AB, and if UPLO = 'L', +* the diagonal and first subdiagonal of T are returned in the +* first two rows of AB. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD + 1. +* +* W (output) REAL array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* Z (output) REAL array, dimension (LDZ, N) +* If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +* eigenvectors of the matrix A, with the i-th column of Z +* holding the eigenvector associated with W(i). +* If JOBZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace/output) REAL array, +* dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* IF N <= 1, LWORK must be at least 1. +* If JOBZ = 'N' and N > 2, LWORK must be at least 2*N. +* If JOBZ = 'V' and N > 2, LWORK must be at least +* ( 1 + 5*N + 2*N**2 ). +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array LIWORK. +* If JOBZ = 'N' or N <= 1, LIWORK must be at least 1. +* If JOBZ = 'V' and N > 2, LIWORK must be at least 3 + 5*N. +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the algorithm failed to converge; i +* off-diagonal elements of an intermediate tridiagonal +* form did not converge to zero. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, INDE, INDWK2, INDWRK, ISCALE, LIWMIN, + $ LLWRK2, LWMIN + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANSB + EXTERNAL LSAME, SLAMCH, SLANSB +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, SLACPY, SLASCL, SSBTRD, SSCAL, SSTEDC, + $ SSTERF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LIWMIN = 1 + LWMIN = 1 + ELSE + IF( WANTZ ) THEN + LIWMIN = 3 + 5*N + LWMIN = 1 + 5*N + 2*N**2 + ELSE + LIWMIN = 1 + LWMIN = 2*N + END IF + END IF + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSBEVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = AB( 1, 1 ) + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = SLANSB( 'M', UPLO, N, KD, AB, LDAB, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL SLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL SLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + END IF +* +* Call SSBTRD to reduce symmetric band matrix to tridiagonal form. +* + INDE = 1 + INDWRK = INDE + N + INDWK2 = INDWRK + N*N + LLWRK2 = LWORK - INDWK2 + 1 + CALL SSBTRD( JOBZ, UPLO, N, KD, AB, LDAB, W, WORK( INDE ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, call SSTEDC. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, WORK( INDE ), INFO ) + ELSE + CALL SSTEDC( 'I', N, W, WORK( INDE ), WORK( INDWRK ), N, + $ WORK( INDWK2 ), LLWRK2, IWORK, LIWORK, INFO ) + CALL SGEMM( 'N', 'N', N, N, N, ONE, Z, LDZ, WORK( INDWRK ), N, + $ ZERO, WORK( INDWK2 ), N ) + CALL SLACPY( 'A', N, N, WORK( INDWK2 ), N, Z, LDZ ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) + $ CALL SSCAL( N, ONE / SIGMA, W, 1 ) +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of SSBEVD +* + END diff --git a/costa/native/external/lapack/ssbevx.f b/costa/native/external/lapack/ssbevx.f new file mode 100644 index 000000000..333e44592 --- /dev/null +++ b/costa/native/external/lapack/ssbevx.f @@ -0,0 +1,411 @@ + SUBROUTINE SSBEVX( JOBZ, RANGE, UPLO, N, KD, AB, LDAB, Q, LDQ, VL, + $ VU, IL, IU, ABSTOL, M, W, Z, LDZ, WORK, IWORK, + $ IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, KD, LDAB, LDQ, LDZ, M, N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + REAL AB( LDAB, * ), Q( LDQ, * ), W( * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* SSBEVX computes selected eigenvalues and, optionally, eigenvectors +* of a real symmetric band matrix A. Eigenvalues and eigenvectors can +* be selected by specifying either a range of values or a range of +* indices for the desired eigenvalues. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* RANGE (input) CHARACTER*1 +* = 'A': all eigenvalues will be found; +* = 'V': all eigenvalues in the half-open interval (VL,VU] +* will be found; +* = 'I': the IL-th through IU-th eigenvalues will be found. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* AB (input/output) REAL array, dimension (LDAB, N) +* On entry, the upper or lower triangle of the symmetric band +* matrix A, stored in the first KD+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* On exit, AB is overwritten by values generated during the +* reduction to tridiagonal form. If UPLO = 'U', the first +* superdiagonal and the diagonal of the tridiagonal matrix T +* are returned in rows KD and KD+1 of AB, and if UPLO = 'L', +* the diagonal and first subdiagonal of T are returned in the +* first two rows of AB. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD + 1. +* +* Q (output) REAL array, dimension (LDQ, N) +* If JOBZ = 'V', the N-by-N orthogonal matrix used in the +* reduction to tridiagonal form. +* If JOBZ = 'N', the array Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. If JOBZ = 'V', then +* LDQ >= max(1,N). +* +* VL (input) REAL +* VU (input) REAL +* If RANGE='V', the lower and upper bounds of the interval to +* be searched for eigenvalues. VL < VU. +* Not referenced if RANGE = 'A' or 'I'. +* +* IL (input) INTEGER +* IU (input) INTEGER +* If RANGE='I', the indices (in ascending order) of the +* smallest and largest eigenvalues to be returned. +* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +* Not referenced if RANGE = 'A' or 'V'. +* +* ABSTOL (input) REAL +* The absolute error tolerance for the eigenvalues. +* An approximate eigenvalue is accepted as converged +* when it is determined to lie in an interval [a,b] +* of width less than or equal to +* +* ABSTOL + EPS * max( |a|,|b| ) , +* +* where EPS is the machine precision. If ABSTOL is less than +* or equal to zero, then EPS*|T| will be used in its place, +* where |T| is the 1-norm of the tridiagonal matrix obtained +* by reducing AB to tridiagonal form. +* +* Eigenvalues will be computed most accurately when ABSTOL is +* set to twice the underflow threshold 2*SLAMCH('S'), not zero. +* If this routine returns with INFO>0, indicating that some +* eigenvectors did not converge, try setting ABSTOL to +* 2*SLAMCH('S'). +* +* See "Computing Small Singular Values of Bidiagonal Matrices +* with Guaranteed High Relative Accuracy," by Demmel and +* Kahan, LAPACK Working Note #3. +* +* M (output) INTEGER +* The total number of eigenvalues found. 0 <= M <= N. +* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +* +* W (output) REAL array, dimension (N) +* The first M elements contain the selected eigenvalues in +* ascending order. +* +* Z (output) REAL array, dimension (LDZ, max(1,M)) +* If JOBZ = 'V', then if INFO = 0, the first M columns of Z +* contain the orthonormal eigenvectors of the matrix A +* corresponding to the selected eigenvalues, with the i-th +* column of Z holding the eigenvector associated with W(i). +* If an eigenvector fails to converge, then that column of Z +* contains the latest approximation to the eigenvector, and the +* index of the eigenvector is returned in IFAIL. +* If JOBZ = 'N', then Z is not referenced. +* Note: the user must ensure that at least max(1,M) columns are +* supplied in the array Z; if RANGE = 'V', the exact value of M +* is not known in advance and an upper bound must be used. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace) REAL array, dimension (7*N) +* +* IWORK (workspace) INTEGER array, dimension (5*N) +* +* IFAIL (output) INTEGER array, dimension (N) +* If JOBZ = 'V', then if INFO = 0, the first M elements of +* IFAIL are zero. If INFO > 0, then IFAIL contains the +* indices of the eigenvectors that failed to converge. +* If JOBZ = 'N', then IFAIL is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, then i eigenvectors failed to converge. +* Their indices are stored in array IFAIL. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, VALEIG, WANTZ + CHARACTER ORDER + INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, + $ INDISP, INDIWO, INDWRK, ISCALE, ITMP1, J, JJ, + $ NSPLIT + REAL ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANSB + EXTERNAL LSAME, SLAMCH, SLANSB +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMV, SLACPY, SLASCL, SSBTRD, SSCAL, + $ SSTEBZ, SSTEIN, SSTEQR, SSTERF, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) + LOWER = LSAME( UPLO, 'L' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -7 + ELSE IF( WANTZ .AND. LDQ.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -11 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -13 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) + $ INFO = -18 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSBEVX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + M = 1 + IF( LOWER ) THEN + TMP1 = AB( 1, 1 ) + ELSE + TMP1 = AB( KD+1, 1 ) + END IF + IF( VALEIG ) THEN + IF( .NOT.( VL.LT.TMP1 .AND. VU.GE.TMP1 ) ) + $ M = 0 + END IF + IF( M.EQ.1 ) THEN + W( 1 ) = TMP1 + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + END IF + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF ( VALEIG ) THEN + VLL = VL + VUU = VU + ELSE + VLL = ZERO + VUU = ZERO + ENDIF + ANRM = SLANSB( 'M', UPLO, N, KD, AB, LDAB, WORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL SLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL SLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call SSBTRD to reduce symmetric band matrix to tridiagonal form. +* + INDD = 1 + INDE = INDD + N + INDWRK = INDE + N + CALL SSBTRD( JOBZ, UPLO, N, KD, AB, LDAB, WORK( INDD ), + $ WORK( INDE ), Q, LDQ, WORK( INDWRK ), IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal +* to zero, then call SSTERF or SSTEQR. If this fails for some +* eigenvalue, then try SSTEBZ. +* + IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND. + $ ( ABSTOL.LE.ZERO ) ) THEN + CALL SCOPY( N, WORK( INDD ), 1, W, 1 ) + INDEE = INDWRK + 2*N + IF( .NOT.WANTZ ) THEN + CALL SCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL SSTERF( N, W, WORK( INDEE ), INFO ) + ELSE + CALL SLACPY( 'A', N, N, Q, LDQ, Z, LDZ ) + CALL SCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL SSTEQR( JOBZ, N, W, WORK( INDEE ), Z, LDZ, + $ WORK( INDWRK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 10 I = 1, N + IFAIL( I ) = 0 + 10 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 30 + END IF + INFO = 0 + END IF +* +* Otherwise, call SSTEBZ and, if eigenvectors are desired, SSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWO = INDISP + N + CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ WORK( INDD ), WORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWRK ), + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL SSTEIN( N, WORK( INDD ), WORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ WORK( INDWRK ), IWORK( INDIWO ), IFAIL, INFO ) +* +* Apply orthogonal matrix used in reduction to tridiagonal +* form to eigenvectors returned by SSTEIN. +* + DO 20 J = 1, M + CALL SCOPY( N, Z( 1, J ), 1, WORK( 1 ), 1 ) + CALL SGEMV( 'N', N, N, ONE, Q, LDQ, WORK, 1, ZERO, + $ Z( 1, J ), 1 ) + 20 CONTINUE + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 30 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 50 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 40 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 40 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL SSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 50 CONTINUE + END IF +* + RETURN +* +* End of SSBEVX +* + END diff --git a/costa/native/external/lapack/ssbgst.f b/costa/native/external/lapack/ssbgst.f new file mode 100644 index 000000000..915a30aee --- /dev/null +++ b/costa/native/external/lapack/ssbgst.f @@ -0,0 +1,1346 @@ + SUBROUTINE SSBGST( VECT, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, X, + $ LDX, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO, VECT + INTEGER INFO, KA, KB, LDAB, LDBB, LDX, N +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ), BB( LDBB, * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* SSBGST reduces a real symmetric-definite banded generalized +* eigenproblem A*x = lambda*B*x to standard form C*y = lambda*y, +* such that C has the same bandwidth as A. +* +* B must have been previously factorized as S**T*S by SPBSTF, using a +* split Cholesky factorization. A is overwritten by C = X**T*A*X, where +* X = S**(-1)*Q and Q is an orthogonal matrix chosen to preserve the +* bandwidth of A. +* +* Arguments +* ========= +* +* VECT (input) CHARACTER*1 +* = 'N': do not form the transformation matrix X; +* = 'V': form X. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* KA (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KA >= 0. +* +* KB (input) INTEGER +* The number of superdiagonals of the matrix B if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KA >= KB >= 0. +* +* AB (input/output) REAL array, dimension (LDAB,N) +* On entry, the upper or lower triangle of the symmetric band +* matrix A, stored in the first ka+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). +* +* On exit, the transformed matrix X**T*A*X, stored in the same +* format as A. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KA+1. +* +* BB (input) REAL array, dimension (LDBB,N) +* The banded factor S from the split Cholesky factorization of +* B, as returned by SPBSTF, stored in the first KB+1 rows of +* the array. +* +* LDBB (input) INTEGER +* The leading dimension of the array BB. LDBB >= KB+1. +* +* X (output) REAL array, dimension (LDX,N) +* If VECT = 'V', the n-by-n matrix X. +* If VECT = 'N', the array X is not referenced. +* +* LDX (input) INTEGER +* The leading dimension of the array X. +* LDX >= max(1,N) if VECT = 'V'; LDX >= 1 otherwise. +* +* WORK (workspace) REAL array, dimension (2*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPDATE, UPPER, WANTX + INTEGER I, I0, I1, I2, INCA, J, J1, J1T, J2, J2T, K, + $ KA1, KB1, KBT, L, M, NR, NRT, NX + REAL BII, RA, RA1, T +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SGER, SLAR2V, SLARGV, SLARTG, SLARTV, SLASET, + $ SROT, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + WANTX = LSAME( VECT, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + KA1 = KA + 1 + KB1 = KB + 1 + INFO = 0 + IF( .NOT.WANTX .AND. .NOT.LSAME( VECT, 'N' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KA.LT.0 ) THEN + INFO = -4 + ELSE IF( KB.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KA+1 ) THEN + INFO = -7 + ELSE IF( LDBB.LT.KB+1 ) THEN + INFO = -9 + ELSE IF( LDX.LT.1 .OR. WANTX .AND. LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSBGST', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + INCA = LDAB*KA1 +* +* Initialize X to the unit matrix, if needed +* + IF( WANTX ) + $ CALL SLASET( 'Full', N, N, ZERO, ONE, X, LDX ) +* +* Set M to the splitting point m. It must be the same value as is +* used in SPBSTF. The chosen value allows the arrays WORK and RWORK +* to be of dimension (N). +* + M = ( N+KB ) / 2 +* +* The routine works in two phases, corresponding to the two halves +* of the split Cholesky factorization of B as S**T*S where +* +* S = ( U ) +* ( M L ) +* +* with U upper triangular of order m, and L lower triangular of +* order n-m. S has the same bandwidth as B. +* +* S is treated as a product of elementary matrices: +* +* S = S(m)*S(m-1)*...*S(2)*S(1)*S(m+1)*S(m+2)*...*S(n-1)*S(n) +* +* where S(i) is determined by the i-th row of S. +* +* In phase 1, the index i takes the values n, n-1, ... , m+1; +* in phase 2, it takes the values 1, 2, ... , m. +* +* For each value of i, the current matrix A is updated by forming +* inv(S(i))**T*A*inv(S(i)). This creates a triangular bulge outside +* the band of A. The bulge is then pushed down toward the bottom of +* A in phase 1, and up toward the top of A in phase 2, by applying +* plane rotations. +* +* There are kb*(kb+1)/2 elements in the bulge, but at most 2*kb-1 +* of them are linearly independent, so annihilating a bulge requires +* only 2*kb-1 plane rotations. The rotations are divided into a 1st +* set of kb-1 rotations, and a 2nd set of kb rotations. +* +* Wherever possible, rotations are generated and applied in vector +* operations of length NR between the indices J1 and J2 (sometimes +* replaced by modified values NRT, J1T or J2T). +* +* The cosines and sines of the rotations are stored in the array +* WORK. The cosines of the 1st set of rotations are stored in +* elements n+2:n+m-kb-1 and the sines of the 1st set in elements +* 2:m-kb-1; the cosines of the 2nd set are stored in elements +* n+m-kb+1:2*n and the sines of the second set in elements m-kb+1:n. +* +* The bulges are not formed explicitly; nonzero elements outside the +* band are created only when they are required for generating new +* rotations; they are stored in the array WORK, in positions where +* they are later overwritten by the sines of the rotations which +* annihilate them. +* +* **************************** Phase 1 ***************************** +* +* The logical structure of this phase is: +* +* UPDATE = .TRUE. +* DO I = N, M + 1, -1 +* use S(i) to update A and create a new bulge +* apply rotations to push all bulges KA positions downward +* END DO +* UPDATE = .FALSE. +* DO I = M + KA + 1, N - 1 +* apply rotations to push all bulges KA positions downward +* END DO +* +* To avoid duplicating code, the two loops are merged. +* + UPDATE = .TRUE. + I = N + 1 + 10 CONTINUE + IF( UPDATE ) THEN + I = I - 1 + KBT = MIN( KB, I-1 ) + I0 = I - 1 + I1 = MIN( N, I+KA ) + I2 = I - KBT + KA1 + IF( I.LT.M+1 ) THEN + UPDATE = .FALSE. + I = I + 1 + I0 = M + IF( KA.EQ.0 ) + $ GO TO 480 + GO TO 10 + END IF + ELSE + I = I + KA + IF( I.GT.N-1 ) + $ GO TO 480 + END IF +* + IF( UPPER ) THEN +* +* Transform A, working with the upper triangle +* + IF( UPDATE ) THEN +* +* Form inv(S(i))**T * A * inv(S(i)) +* + BII = BB( KB1, I ) + DO 20 J = I, I1 + AB( I-J+KA1, J ) = AB( I-J+KA1, J ) / BII + 20 CONTINUE + DO 30 J = MAX( 1, I-KA ), I + AB( J-I+KA1, I ) = AB( J-I+KA1, I ) / BII + 30 CONTINUE + DO 60 K = I - KBT, I - 1 + DO 40 J = I - KBT, K + AB( J-K+KA1, K ) = AB( J-K+KA1, K ) - + $ BB( J-I+KB1, I )*AB( K-I+KA1, I ) - + $ BB( K-I+KB1, I )*AB( J-I+KA1, I ) + + $ AB( KA1, I )*BB( J-I+KB1, I )* + $ BB( K-I+KB1, I ) + 40 CONTINUE + DO 50 J = MAX( 1, I-KA ), I - KBT - 1 + AB( J-K+KA1, K ) = AB( J-K+KA1, K ) - + $ BB( K-I+KB1, I )*AB( J-I+KA1, I ) + 50 CONTINUE + 60 CONTINUE + DO 80 J = I, I1 + DO 70 K = MAX( J-KA, I-KBT ), I - 1 + AB( K-J+KA1, J ) = AB( K-J+KA1, J ) - + $ BB( K-I+KB1, I )*AB( I-J+KA1, J ) + 70 CONTINUE + 80 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by inv(S(i)) +* + CALL SSCAL( N-M, ONE / BII, X( M+1, I ), 1 ) + IF( KBT.GT.0 ) + $ CALL SGER( N-M, KBT, -ONE, X( M+1, I ), 1, + $ BB( KB1-KBT, I ), 1, X( M+1, I-KBT ), LDX ) + END IF +* +* store a(i,i1) in RA1 for use in next loop over K +* + RA1 = AB( I-I1+KA1, I1 ) + END IF +* +* Generate and apply vectors of rotations to chase all the +* existing bulges KA positions down toward the bottom of the +* band +* + DO 130 K = 1, KB - 1 + IF( UPDATE ) THEN +* +* Determine the rotations which would annihilate the bulge +* which has in theory just been created +* + IF( I-K+KA.LT.N .AND. I-K.GT.1 ) THEN +* +* generate rotation to annihilate a(i,i-k+ka+1) +* + CALL SLARTG( AB( K+1, I-K+KA ), RA1, + $ WORK( N+I-K+KA-M ), WORK( I-K+KA-M ), + $ RA ) +* +* create nonzero element a(i-k,i-k+ka+1) outside the +* band and store it in WORK(i-k) +* + T = -BB( KB1-K, I )*RA1 + WORK( I-K ) = WORK( N+I-K+KA-M )*T - + $ WORK( I-K+KA-M )*AB( 1, I-K+KA ) + AB( 1, I-K+KA ) = WORK( I-K+KA-M )*T + + $ WORK( N+I-K+KA-M )*AB( 1, I-K+KA ) + RA1 = RA + END IF + END IF + J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1 + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + IF( UPDATE ) THEN + J2T = MAX( J2, I+2*KA-K+1 ) + ELSE + J2T = J2 + END IF + NRT = ( N-J2T+KA ) / KA1 + DO 90 J = J2T, J1, KA1 +* +* create nonzero element a(j-ka,j+1) outside the band +* and store it in WORK(j-m) +* + WORK( J-M ) = WORK( J-M )*AB( 1, J+1 ) + AB( 1, J+1 ) = WORK( N+J-M )*AB( 1, J+1 ) + 90 CONTINUE +* +* generate rotations in 1st set to annihilate elements which +* have been created outside the band +* + IF( NRT.GT.0 ) + $ CALL SLARGV( NRT, AB( 1, J2T ), INCA, WORK( J2T-M ), KA1, + $ WORK( N+J2T-M ), KA1 ) + IF( NR.GT.0 ) THEN +* +* apply rotations in 1st set from the right +* + DO 100 L = 1, KA - 1 + CALL SLARTV( NR, AB( KA1-L, J2 ), INCA, + $ AB( KA-L, J2+1 ), INCA, WORK( N+J2-M ), + $ WORK( J2-M ), KA1 ) + 100 CONTINUE +* +* apply rotations in 1st set from both sides to diagonal +* blocks +* + CALL SLAR2V( NR, AB( KA1, J2 ), AB( KA1, J2+1 ), + $ AB( KA, J2+1 ), INCA, WORK( N+J2-M ), + $ WORK( J2-M ), KA1 ) +* + END IF +* +* start applying rotations in 1st set from the left +* + DO 110 L = KA - 1, KB - K + 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL SLARTV( NRT, AB( L, J2+KA1-L ), INCA, + $ AB( L+1, J2+KA1-L ), INCA, + $ WORK( N+J2-M ), WORK( J2-M ), KA1 ) + 110 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 1st set +* + DO 120 J = J2, J1, KA1 + CALL SROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1, + $ WORK( N+J-M ), WORK( J-M ) ) + 120 CONTINUE + END IF + 130 CONTINUE +* + IF( UPDATE ) THEN + IF( I2.LE.N .AND. KBT.GT.0 ) THEN +* +* create nonzero element a(i-kbt,i-kbt+ka+1) outside the +* band and store it in WORK(i-kbt) +* + WORK( I-KBT ) = -BB( KB1-KBT, I )*RA1 + END IF + END IF +* + DO 170 K = KB, 1, -1 + IF( UPDATE ) THEN + J2 = I - K - 1 + MAX( 2, K-I0+1 )*KA1 + ELSE + J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1 + END IF +* +* finish applying rotations in 2nd set from the left +* + DO 140 L = KB - K, 1, -1 + NRT = ( N-J2+KA+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL SLARTV( NRT, AB( L, J2-L+1 ), INCA, + $ AB( L+1, J2-L+1 ), INCA, WORK( N+J2-KA ), + $ WORK( J2-KA ), KA1 ) + 140 CONTINUE + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + DO 150 J = J1, J2, -KA1 + WORK( J ) = WORK( J-KA ) + WORK( N+J ) = WORK( N+J-KA ) + 150 CONTINUE + DO 160 J = J2, J1, KA1 +* +* create nonzero element a(j-ka,j+1) outside the band +* and store it in WORK(j) +* + WORK( J ) = WORK( J )*AB( 1, J+1 ) + AB( 1, J+1 ) = WORK( N+J )*AB( 1, J+1 ) + 160 CONTINUE + IF( UPDATE ) THEN + IF( I-K.LT.N-KA .AND. K.LE.KBT ) + $ WORK( I-K+KA ) = WORK( I-K ) + END IF + 170 CONTINUE +* + DO 210 K = KB, 1, -1 + J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1 + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + IF( NR.GT.0 ) THEN +* +* generate rotations in 2nd set to annihilate elements +* which have been created outside the band +* + CALL SLARGV( NR, AB( 1, J2 ), INCA, WORK( J2 ), KA1, + $ WORK( N+J2 ), KA1 ) +* +* apply rotations in 2nd set from the right +* + DO 180 L = 1, KA - 1 + CALL SLARTV( NR, AB( KA1-L, J2 ), INCA, + $ AB( KA-L, J2+1 ), INCA, WORK( N+J2 ), + $ WORK( J2 ), KA1 ) + 180 CONTINUE +* +* apply rotations in 2nd set from both sides to diagonal +* blocks +* + CALL SLAR2V( NR, AB( KA1, J2 ), AB( KA1, J2+1 ), + $ AB( KA, J2+1 ), INCA, WORK( N+J2 ), + $ WORK( J2 ), KA1 ) +* + END IF +* +* start applying rotations in 2nd set from the left +* + DO 190 L = KA - 1, KB - K + 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL SLARTV( NRT, AB( L, J2+KA1-L ), INCA, + $ AB( L+1, J2+KA1-L ), INCA, WORK( N+J2 ), + $ WORK( J2 ), KA1 ) + 190 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 2nd set +* + DO 200 J = J2, J1, KA1 + CALL SROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1, + $ WORK( N+J ), WORK( J ) ) + 200 CONTINUE + END IF + 210 CONTINUE +* + DO 230 K = 1, KB - 1 + J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1 +* +* finish applying rotations in 1st set from the left +* + DO 220 L = KB - K, 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL SLARTV( NRT, AB( L, J2+KA1-L ), INCA, + $ AB( L+1, J2+KA1-L ), INCA, + $ WORK( N+J2-M ), WORK( J2-M ), KA1 ) + 220 CONTINUE + 230 CONTINUE +* + IF( KB.GT.1 ) THEN + DO 240 J = N - 1, I - KB + 2*KA + 1, -1 + WORK( N+J-M ) = WORK( N+J-KA-M ) + WORK( J-M ) = WORK( J-KA-M ) + 240 CONTINUE + END IF +* + ELSE +* +* Transform A, working with the lower triangle +* + IF( UPDATE ) THEN +* +* Form inv(S(i))**T * A * inv(S(i)) +* + BII = BB( 1, I ) + DO 250 J = I, I1 + AB( J-I+1, I ) = AB( J-I+1, I ) / BII + 250 CONTINUE + DO 260 J = MAX( 1, I-KA ), I + AB( I-J+1, J ) = AB( I-J+1, J ) / BII + 260 CONTINUE + DO 290 K = I - KBT, I - 1 + DO 270 J = I - KBT, K + AB( K-J+1, J ) = AB( K-J+1, J ) - + $ BB( I-J+1, J )*AB( I-K+1, K ) - + $ BB( I-K+1, K )*AB( I-J+1, J ) + + $ AB( 1, I )*BB( I-J+1, J )* + $ BB( I-K+1, K ) + 270 CONTINUE + DO 280 J = MAX( 1, I-KA ), I - KBT - 1 + AB( K-J+1, J ) = AB( K-J+1, J ) - + $ BB( I-K+1, K )*AB( I-J+1, J ) + 280 CONTINUE + 290 CONTINUE + DO 310 J = I, I1 + DO 300 K = MAX( J-KA, I-KBT ), I - 1 + AB( J-K+1, K ) = AB( J-K+1, K ) - + $ BB( I-K+1, K )*AB( J-I+1, I ) + 300 CONTINUE + 310 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by inv(S(i)) +* + CALL SSCAL( N-M, ONE / BII, X( M+1, I ), 1 ) + IF( KBT.GT.0 ) + $ CALL SGER( N-M, KBT, -ONE, X( M+1, I ), 1, + $ BB( KBT+1, I-KBT ), LDBB-1, + $ X( M+1, I-KBT ), LDX ) + END IF +* +* store a(i1,i) in RA1 for use in next loop over K +* + RA1 = AB( I1-I+1, I ) + END IF +* +* Generate and apply vectors of rotations to chase all the +* existing bulges KA positions down toward the bottom of the +* band +* + DO 360 K = 1, KB - 1 + IF( UPDATE ) THEN +* +* Determine the rotations which would annihilate the bulge +* which has in theory just been created +* + IF( I-K+KA.LT.N .AND. I-K.GT.1 ) THEN +* +* generate rotation to annihilate a(i-k+ka+1,i) +* + CALL SLARTG( AB( KA1-K, I ), RA1, WORK( N+I-K+KA-M ), + $ WORK( I-K+KA-M ), RA ) +* +* create nonzero element a(i-k+ka+1,i-k) outside the +* band and store it in WORK(i-k) +* + T = -BB( K+1, I-K )*RA1 + WORK( I-K ) = WORK( N+I-K+KA-M )*T - + $ WORK( I-K+KA-M )*AB( KA1, I-K ) + AB( KA1, I-K ) = WORK( I-K+KA-M )*T + + $ WORK( N+I-K+KA-M )*AB( KA1, I-K ) + RA1 = RA + END IF + END IF + J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1 + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + IF( UPDATE ) THEN + J2T = MAX( J2, I+2*KA-K+1 ) + ELSE + J2T = J2 + END IF + NRT = ( N-J2T+KA ) / KA1 + DO 320 J = J2T, J1, KA1 +* +* create nonzero element a(j+1,j-ka) outside the band +* and store it in WORK(j-m) +* + WORK( J-M ) = WORK( J-M )*AB( KA1, J-KA+1 ) + AB( KA1, J-KA+1 ) = WORK( N+J-M )*AB( KA1, J-KA+1 ) + 320 CONTINUE +* +* generate rotations in 1st set to annihilate elements which +* have been created outside the band +* + IF( NRT.GT.0 ) + $ CALL SLARGV( NRT, AB( KA1, J2T-KA ), INCA, WORK( J2T-M ), + $ KA1, WORK( N+J2T-M ), KA1 ) + IF( NR.GT.0 ) THEN +* +* apply rotations in 1st set from the left +* + DO 330 L = 1, KA - 1 + CALL SLARTV( NR, AB( L+1, J2-L ), INCA, + $ AB( L+2, J2-L ), INCA, WORK( N+J2-M ), + $ WORK( J2-M ), KA1 ) + 330 CONTINUE +* +* apply rotations in 1st set from both sides to diagonal +* blocks +* + CALL SLAR2V( NR, AB( 1, J2 ), AB( 1, J2+1 ), AB( 2, J2 ), + $ INCA, WORK( N+J2-M ), WORK( J2-M ), KA1 ) +* + END IF +* +* start applying rotations in 1st set from the right +* + DO 340 L = KA - 1, KB - K + 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL SLARTV( NRT, AB( KA1-L+1, J2 ), INCA, + $ AB( KA1-L, J2+1 ), INCA, WORK( N+J2-M ), + $ WORK( J2-M ), KA1 ) + 340 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 1st set +* + DO 350 J = J2, J1, KA1 + CALL SROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1, + $ WORK( N+J-M ), WORK( J-M ) ) + 350 CONTINUE + END IF + 360 CONTINUE +* + IF( UPDATE ) THEN + IF( I2.LE.N .AND. KBT.GT.0 ) THEN +* +* create nonzero element a(i-kbt+ka+1,i-kbt) outside the +* band and store it in WORK(i-kbt) +* + WORK( I-KBT ) = -BB( KBT+1, I-KBT )*RA1 + END IF + END IF +* + DO 400 K = KB, 1, -1 + IF( UPDATE ) THEN + J2 = I - K - 1 + MAX( 2, K-I0+1 )*KA1 + ELSE + J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1 + END IF +* +* finish applying rotations in 2nd set from the right +* + DO 370 L = KB - K, 1, -1 + NRT = ( N-J2+KA+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL SLARTV( NRT, AB( KA1-L+1, J2-KA ), INCA, + $ AB( KA1-L, J2-KA+1 ), INCA, + $ WORK( N+J2-KA ), WORK( J2-KA ), KA1 ) + 370 CONTINUE + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + DO 380 J = J1, J2, -KA1 + WORK( J ) = WORK( J-KA ) + WORK( N+J ) = WORK( N+J-KA ) + 380 CONTINUE + DO 390 J = J2, J1, KA1 +* +* create nonzero element a(j+1,j-ka) outside the band +* and store it in WORK(j) +* + WORK( J ) = WORK( J )*AB( KA1, J-KA+1 ) + AB( KA1, J-KA+1 ) = WORK( N+J )*AB( KA1, J-KA+1 ) + 390 CONTINUE + IF( UPDATE ) THEN + IF( I-K.LT.N-KA .AND. K.LE.KBT ) + $ WORK( I-K+KA ) = WORK( I-K ) + END IF + 400 CONTINUE +* + DO 440 K = KB, 1, -1 + J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1 + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + IF( NR.GT.0 ) THEN +* +* generate rotations in 2nd set to annihilate elements +* which have been created outside the band +* + CALL SLARGV( NR, AB( KA1, J2-KA ), INCA, WORK( J2 ), KA1, + $ WORK( N+J2 ), KA1 ) +* +* apply rotations in 2nd set from the left +* + DO 410 L = 1, KA - 1 + CALL SLARTV( NR, AB( L+1, J2-L ), INCA, + $ AB( L+2, J2-L ), INCA, WORK( N+J2 ), + $ WORK( J2 ), KA1 ) + 410 CONTINUE +* +* apply rotations in 2nd set from both sides to diagonal +* blocks +* + CALL SLAR2V( NR, AB( 1, J2 ), AB( 1, J2+1 ), AB( 2, J2 ), + $ INCA, WORK( N+J2 ), WORK( J2 ), KA1 ) +* + END IF +* +* start applying rotations in 2nd set from the right +* + DO 420 L = KA - 1, KB - K + 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL SLARTV( NRT, AB( KA1-L+1, J2 ), INCA, + $ AB( KA1-L, J2+1 ), INCA, WORK( N+J2 ), + $ WORK( J2 ), KA1 ) + 420 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 2nd set +* + DO 430 J = J2, J1, KA1 + CALL SROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1, + $ WORK( N+J ), WORK( J ) ) + 430 CONTINUE + END IF + 440 CONTINUE +* + DO 460 K = 1, KB - 1 + J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1 +* +* finish applying rotations in 1st set from the right +* + DO 450 L = KB - K, 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL SLARTV( NRT, AB( KA1-L+1, J2 ), INCA, + $ AB( KA1-L, J2+1 ), INCA, WORK( N+J2-M ), + $ WORK( J2-M ), KA1 ) + 450 CONTINUE + 460 CONTINUE +* + IF( KB.GT.1 ) THEN + DO 470 J = N - 1, I - KB + 2*KA + 1, -1 + WORK( N+J-M ) = WORK( N+J-KA-M ) + WORK( J-M ) = WORK( J-KA-M ) + 470 CONTINUE + END IF +* + END IF +* + GO TO 10 +* + 480 CONTINUE +* +* **************************** Phase 2 ***************************** +* +* The logical structure of this phase is: +* +* UPDATE = .TRUE. +* DO I = 1, M +* use S(i) to update A and create a new bulge +* apply rotations to push all bulges KA positions upward +* END DO +* UPDATE = .FALSE. +* DO I = M - KA - 1, 2, -1 +* apply rotations to push all bulges KA positions upward +* END DO +* +* To avoid duplicating code, the two loops are merged. +* + UPDATE = .TRUE. + I = 0 + 490 CONTINUE + IF( UPDATE ) THEN + I = I + 1 + KBT = MIN( KB, M-I ) + I0 = I + 1 + I1 = MAX( 1, I-KA ) + I2 = I + KBT - KA1 + IF( I.GT.M ) THEN + UPDATE = .FALSE. + I = I - 1 + I0 = M + 1 + IF( KA.EQ.0 ) + $ RETURN + GO TO 490 + END IF + ELSE + I = I - KA + IF( I.LT.2 ) + $ RETURN + END IF +* + IF( I.LT.M-KBT ) THEN + NX = M + ELSE + NX = N + END IF +* + IF( UPPER ) THEN +* +* Transform A, working with the upper triangle +* + IF( UPDATE ) THEN +* +* Form inv(S(i))**T * A * inv(S(i)) +* + BII = BB( KB1, I ) + DO 500 J = I1, I + AB( J-I+KA1, I ) = AB( J-I+KA1, I ) / BII + 500 CONTINUE + DO 510 J = I, MIN( N, I+KA ) + AB( I-J+KA1, J ) = AB( I-J+KA1, J ) / BII + 510 CONTINUE + DO 540 K = I + 1, I + KBT + DO 520 J = K, I + KBT + AB( K-J+KA1, J ) = AB( K-J+KA1, J ) - + $ BB( I-J+KB1, J )*AB( I-K+KA1, K ) - + $ BB( I-K+KB1, K )*AB( I-J+KA1, J ) + + $ AB( KA1, I )*BB( I-J+KB1, J )* + $ BB( I-K+KB1, K ) + 520 CONTINUE + DO 530 J = I + KBT + 1, MIN( N, I+KA ) + AB( K-J+KA1, J ) = AB( K-J+KA1, J ) - + $ BB( I-K+KB1, K )*AB( I-J+KA1, J ) + 530 CONTINUE + 540 CONTINUE + DO 560 J = I1, I + DO 550 K = I + 1, MIN( J+KA, I+KBT ) + AB( J-K+KA1, K ) = AB( J-K+KA1, K ) - + $ BB( I-K+KB1, K )*AB( J-I+KA1, I ) + 550 CONTINUE + 560 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by inv(S(i)) +* + CALL SSCAL( NX, ONE / BII, X( 1, I ), 1 ) + IF( KBT.GT.0 ) + $ CALL SGER( NX, KBT, -ONE, X( 1, I ), 1, BB( KB, I+1 ), + $ LDBB-1, X( 1, I+1 ), LDX ) + END IF +* +* store a(i1,i) in RA1 for use in next loop over K +* + RA1 = AB( I1-I+KA1, I ) + END IF +* +* Generate and apply vectors of rotations to chase all the +* existing bulges KA positions up toward the top of the band +* + DO 610 K = 1, KB - 1 + IF( UPDATE ) THEN +* +* Determine the rotations which would annihilate the bulge +* which has in theory just been created +* + IF( I+K-KA1.GT.0 .AND. I+K.LT.M ) THEN +* +* generate rotation to annihilate a(i+k-ka-1,i) +* + CALL SLARTG( AB( K+1, I ), RA1, WORK( N+I+K-KA ), + $ WORK( I+K-KA ), RA ) +* +* create nonzero element a(i+k-ka-1,i+k) outside the +* band and store it in WORK(m-kb+i+k) +* + T = -BB( KB1-K, I+K )*RA1 + WORK( M-KB+I+K ) = WORK( N+I+K-KA )*T - + $ WORK( I+K-KA )*AB( 1, I+K ) + AB( 1, I+K ) = WORK( I+K-KA )*T + + $ WORK( N+I+K-KA )*AB( 1, I+K ) + RA1 = RA + END IF + END IF + J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1 + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + IF( UPDATE ) THEN + J2T = MIN( J2, I-2*KA+K-1 ) + ELSE + J2T = J2 + END IF + NRT = ( J2T+KA-1 ) / KA1 + DO 570 J = J1, J2T, KA1 +* +* create nonzero element a(j-1,j+ka) outside the band +* and store it in WORK(j) +* + WORK( J ) = WORK( J )*AB( 1, J+KA-1 ) + AB( 1, J+KA-1 ) = WORK( N+J )*AB( 1, J+KA-1 ) + 570 CONTINUE +* +* generate rotations in 1st set to annihilate elements which +* have been created outside the band +* + IF( NRT.GT.0 ) + $ CALL SLARGV( NRT, AB( 1, J1+KA ), INCA, WORK( J1 ), KA1, + $ WORK( N+J1 ), KA1 ) + IF( NR.GT.0 ) THEN +* +* apply rotations in 1st set from the left +* + DO 580 L = 1, KA - 1 + CALL SLARTV( NR, AB( KA1-L, J1+L ), INCA, + $ AB( KA-L, J1+L ), INCA, WORK( N+J1 ), + $ WORK( J1 ), KA1 ) + 580 CONTINUE +* +* apply rotations in 1st set from both sides to diagonal +* blocks +* + CALL SLAR2V( NR, AB( KA1, J1 ), AB( KA1, J1-1 ), + $ AB( KA, J1 ), INCA, WORK( N+J1 ), + $ WORK( J1 ), KA1 ) +* + END IF +* +* start applying rotations in 1st set from the right +* + DO 590 L = KA - 1, KB - K + 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL SLARTV( NRT, AB( L, J1T ), INCA, + $ AB( L+1, J1T-1 ), INCA, WORK( N+J1T ), + $ WORK( J1T ), KA1 ) + 590 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 1st set +* + DO 600 J = J1, J2, KA1 + CALL SROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1, + $ WORK( N+J ), WORK( J ) ) + 600 CONTINUE + END IF + 610 CONTINUE +* + IF( UPDATE ) THEN + IF( I2.GT.0 .AND. KBT.GT.0 ) THEN +* +* create nonzero element a(i+kbt-ka-1,i+kbt) outside the +* band and store it in WORK(m-kb+i+kbt) +* + WORK( M-KB+I+KBT ) = -BB( KB1-KBT, I+KBT )*RA1 + END IF + END IF +* + DO 650 K = KB, 1, -1 + IF( UPDATE ) THEN + J2 = I + K + 1 - MAX( 2, K+I0-M )*KA1 + ELSE + J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1 + END IF +* +* finish applying rotations in 2nd set from the right +* + DO 620 L = KB - K, 1, -1 + NRT = ( J2+KA+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL SLARTV( NRT, AB( L, J1T+KA ), INCA, + $ AB( L+1, J1T+KA-1 ), INCA, + $ WORK( N+M-KB+J1T+KA ), + $ WORK( M-KB+J1T+KA ), KA1 ) + 620 CONTINUE + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + DO 630 J = J1, J2, KA1 + WORK( M-KB+J ) = WORK( M-KB+J+KA ) + WORK( N+M-KB+J ) = WORK( N+M-KB+J+KA ) + 630 CONTINUE + DO 640 J = J1, J2, KA1 +* +* create nonzero element a(j-1,j+ka) outside the band +* and store it in WORK(m-kb+j) +* + WORK( M-KB+J ) = WORK( M-KB+J )*AB( 1, J+KA-1 ) + AB( 1, J+KA-1 ) = WORK( N+M-KB+J )*AB( 1, J+KA-1 ) + 640 CONTINUE + IF( UPDATE ) THEN + IF( I+K.GT.KA1 .AND. K.LE.KBT ) + $ WORK( M-KB+I+K-KA ) = WORK( M-KB+I+K ) + END IF + 650 CONTINUE +* + DO 690 K = KB, 1, -1 + J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1 + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + IF( NR.GT.0 ) THEN +* +* generate rotations in 2nd set to annihilate elements +* which have been created outside the band +* + CALL SLARGV( NR, AB( 1, J1+KA ), INCA, WORK( M-KB+J1 ), + $ KA1, WORK( N+M-KB+J1 ), KA1 ) +* +* apply rotations in 2nd set from the left +* + DO 660 L = 1, KA - 1 + CALL SLARTV( NR, AB( KA1-L, J1+L ), INCA, + $ AB( KA-L, J1+L ), INCA, + $ WORK( N+M-KB+J1 ), WORK( M-KB+J1 ), KA1 ) + 660 CONTINUE +* +* apply rotations in 2nd set from both sides to diagonal +* blocks +* + CALL SLAR2V( NR, AB( KA1, J1 ), AB( KA1, J1-1 ), + $ AB( KA, J1 ), INCA, WORK( N+M-KB+J1 ), + $ WORK( M-KB+J1 ), KA1 ) +* + END IF +* +* start applying rotations in 2nd set from the right +* + DO 670 L = KA - 1, KB - K + 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL SLARTV( NRT, AB( L, J1T ), INCA, + $ AB( L+1, J1T-1 ), INCA, + $ WORK( N+M-KB+J1T ), WORK( M-KB+J1T ), + $ KA1 ) + 670 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 2nd set +* + DO 680 J = J1, J2, KA1 + CALL SROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1, + $ WORK( N+M-KB+J ), WORK( M-KB+J ) ) + 680 CONTINUE + END IF + 690 CONTINUE +* + DO 710 K = 1, KB - 1 + J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1 +* +* finish applying rotations in 1st set from the right +* + DO 700 L = KB - K, 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL SLARTV( NRT, AB( L, J1T ), INCA, + $ AB( L+1, J1T-1 ), INCA, WORK( N+J1T ), + $ WORK( J1T ), KA1 ) + 700 CONTINUE + 710 CONTINUE +* + IF( KB.GT.1 ) THEN + DO 720 J = 2, MIN( I+KB, M ) - 2*KA - 1 + WORK( N+J ) = WORK( N+J+KA ) + WORK( J ) = WORK( J+KA ) + 720 CONTINUE + END IF +* + ELSE +* +* Transform A, working with the lower triangle +* + IF( UPDATE ) THEN +* +* Form inv(S(i))**T * A * inv(S(i)) +* + BII = BB( 1, I ) + DO 730 J = I1, I + AB( I-J+1, J ) = AB( I-J+1, J ) / BII + 730 CONTINUE + DO 740 J = I, MIN( N, I+KA ) + AB( J-I+1, I ) = AB( J-I+1, I ) / BII + 740 CONTINUE + DO 770 K = I + 1, I + KBT + DO 750 J = K, I + KBT + AB( J-K+1, K ) = AB( J-K+1, K ) - + $ BB( J-I+1, I )*AB( K-I+1, I ) - + $ BB( K-I+1, I )*AB( J-I+1, I ) + + $ AB( 1, I )*BB( J-I+1, I )* + $ BB( K-I+1, I ) + 750 CONTINUE + DO 760 J = I + KBT + 1, MIN( N, I+KA ) + AB( J-K+1, K ) = AB( J-K+1, K ) - + $ BB( K-I+1, I )*AB( J-I+1, I ) + 760 CONTINUE + 770 CONTINUE + DO 790 J = I1, I + DO 780 K = I + 1, MIN( J+KA, I+KBT ) + AB( K-J+1, J ) = AB( K-J+1, J ) - + $ BB( K-I+1, I )*AB( I-J+1, J ) + 780 CONTINUE + 790 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by inv(S(i)) +* + CALL SSCAL( NX, ONE / BII, X( 1, I ), 1 ) + IF( KBT.GT.0 ) + $ CALL SGER( NX, KBT, -ONE, X( 1, I ), 1, BB( 2, I ), 1, + $ X( 1, I+1 ), LDX ) + END IF +* +* store a(i,i1) in RA1 for use in next loop over K +* + RA1 = AB( I-I1+1, I1 ) + END IF +* +* Generate and apply vectors of rotations to chase all the +* existing bulges KA positions up toward the top of the band +* + DO 840 K = 1, KB - 1 + IF( UPDATE ) THEN +* +* Determine the rotations which would annihilate the bulge +* which has in theory just been created +* + IF( I+K-KA1.GT.0 .AND. I+K.LT.M ) THEN +* +* generate rotation to annihilate a(i,i+k-ka-1) +* + CALL SLARTG( AB( KA1-K, I+K-KA ), RA1, + $ WORK( N+I+K-KA ), WORK( I+K-KA ), RA ) +* +* create nonzero element a(i+k,i+k-ka-1) outside the +* band and store it in WORK(m-kb+i+k) +* + T = -BB( K+1, I )*RA1 + WORK( M-KB+I+K ) = WORK( N+I+K-KA )*T - + $ WORK( I+K-KA )*AB( KA1, I+K-KA ) + AB( KA1, I+K-KA ) = WORK( I+K-KA )*T + + $ WORK( N+I+K-KA )*AB( KA1, I+K-KA ) + RA1 = RA + END IF + END IF + J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1 + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + IF( UPDATE ) THEN + J2T = MIN( J2, I-2*KA+K-1 ) + ELSE + J2T = J2 + END IF + NRT = ( J2T+KA-1 ) / KA1 + DO 800 J = J1, J2T, KA1 +* +* create nonzero element a(j+ka,j-1) outside the band +* and store it in WORK(j) +* + WORK( J ) = WORK( J )*AB( KA1, J-1 ) + AB( KA1, J-1 ) = WORK( N+J )*AB( KA1, J-1 ) + 800 CONTINUE +* +* generate rotations in 1st set to annihilate elements which +* have been created outside the band +* + IF( NRT.GT.0 ) + $ CALL SLARGV( NRT, AB( KA1, J1 ), INCA, WORK( J1 ), KA1, + $ WORK( N+J1 ), KA1 ) + IF( NR.GT.0 ) THEN +* +* apply rotations in 1st set from the right +* + DO 810 L = 1, KA - 1 + CALL SLARTV( NR, AB( L+1, J1 ), INCA, AB( L+2, J1-1 ), + $ INCA, WORK( N+J1 ), WORK( J1 ), KA1 ) + 810 CONTINUE +* +* apply rotations in 1st set from both sides to diagonal +* blocks +* + CALL SLAR2V( NR, AB( 1, J1 ), AB( 1, J1-1 ), + $ AB( 2, J1-1 ), INCA, WORK( N+J1 ), + $ WORK( J1 ), KA1 ) +* + END IF +* +* start applying rotations in 1st set from the left +* + DO 820 L = KA - 1, KB - K + 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL SLARTV( NRT, AB( KA1-L+1, J1T-KA1+L ), INCA, + $ AB( KA1-L, J1T-KA1+L ), INCA, + $ WORK( N+J1T ), WORK( J1T ), KA1 ) + 820 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 1st set +* + DO 830 J = J1, J2, KA1 + CALL SROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1, + $ WORK( N+J ), WORK( J ) ) + 830 CONTINUE + END IF + 840 CONTINUE +* + IF( UPDATE ) THEN + IF( I2.GT.0 .AND. KBT.GT.0 ) THEN +* +* create nonzero element a(i+kbt,i+kbt-ka-1) outside the +* band and store it in WORK(m-kb+i+kbt) +* + WORK( M-KB+I+KBT ) = -BB( KBT+1, I )*RA1 + END IF + END IF +* + DO 880 K = KB, 1, -1 + IF( UPDATE ) THEN + J2 = I + K + 1 - MAX( 2, K+I0-M )*KA1 + ELSE + J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1 + END IF +* +* finish applying rotations in 2nd set from the left +* + DO 850 L = KB - K, 1, -1 + NRT = ( J2+KA+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL SLARTV( NRT, AB( KA1-L+1, J1T+L-1 ), INCA, + $ AB( KA1-L, J1T+L-1 ), INCA, + $ WORK( N+M-KB+J1T+KA ), + $ WORK( M-KB+J1T+KA ), KA1 ) + 850 CONTINUE + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + DO 860 J = J1, J2, KA1 + WORK( M-KB+J ) = WORK( M-KB+J+KA ) + WORK( N+M-KB+J ) = WORK( N+M-KB+J+KA ) + 860 CONTINUE + DO 870 J = J1, J2, KA1 +* +* create nonzero element a(j+ka,j-1) outside the band +* and store it in WORK(m-kb+j) +* + WORK( M-KB+J ) = WORK( M-KB+J )*AB( KA1, J-1 ) + AB( KA1, J-1 ) = WORK( N+M-KB+J )*AB( KA1, J-1 ) + 870 CONTINUE + IF( UPDATE ) THEN + IF( I+K.GT.KA1 .AND. K.LE.KBT ) + $ WORK( M-KB+I+K-KA ) = WORK( M-KB+I+K ) + END IF + 880 CONTINUE +* + DO 920 K = KB, 1, -1 + J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1 + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + IF( NR.GT.0 ) THEN +* +* generate rotations in 2nd set to annihilate elements +* which have been created outside the band +* + CALL SLARGV( NR, AB( KA1, J1 ), INCA, WORK( M-KB+J1 ), + $ KA1, WORK( N+M-KB+J1 ), KA1 ) +* +* apply rotations in 2nd set from the right +* + DO 890 L = 1, KA - 1 + CALL SLARTV( NR, AB( L+1, J1 ), INCA, AB( L+2, J1-1 ), + $ INCA, WORK( N+M-KB+J1 ), WORK( M-KB+J1 ), + $ KA1 ) + 890 CONTINUE +* +* apply rotations in 2nd set from both sides to diagonal +* blocks +* + CALL SLAR2V( NR, AB( 1, J1 ), AB( 1, J1-1 ), + $ AB( 2, J1-1 ), INCA, WORK( N+M-KB+J1 ), + $ WORK( M-KB+J1 ), KA1 ) +* + END IF +* +* start applying rotations in 2nd set from the left +* + DO 900 L = KA - 1, KB - K + 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL SLARTV( NRT, AB( KA1-L+1, J1T-KA1+L ), INCA, + $ AB( KA1-L, J1T-KA1+L ), INCA, + $ WORK( N+M-KB+J1T ), WORK( M-KB+J1T ), + $ KA1 ) + 900 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 2nd set +* + DO 910 J = J1, J2, KA1 + CALL SROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1, + $ WORK( N+M-KB+J ), WORK( M-KB+J ) ) + 910 CONTINUE + END IF + 920 CONTINUE +* + DO 940 K = 1, KB - 1 + J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1 +* +* finish applying rotations in 1st set from the left +* + DO 930 L = KB - K, 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL SLARTV( NRT, AB( KA1-L+1, J1T-KA1+L ), INCA, + $ AB( KA1-L, J1T-KA1+L ), INCA, + $ WORK( N+J1T ), WORK( J1T ), KA1 ) + 930 CONTINUE + 940 CONTINUE +* + IF( KB.GT.1 ) THEN + DO 950 J = 2, MIN( I+KB, M ) - 2*KA - 1 + WORK( N+J ) = WORK( N+J+KA ) + WORK( J ) = WORK( J+KA ) + 950 CONTINUE + END IF +* + END IF +* + GO TO 490 +* +* End of SSBGST +* + END diff --git a/costa/native/external/lapack/ssbgv.f b/costa/native/external/lapack/ssbgv.f new file mode 100644 index 000000000..00624a091 --- /dev/null +++ b/costa/native/external/lapack/ssbgv.f @@ -0,0 +1,189 @@ + SUBROUTINE SSBGV( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, Z, + $ LDZ, WORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KA, KB, LDAB, LDBB, LDZ, N +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ), BB( LDBB, * ), W( * ), + $ WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* SSBGV computes all the eigenvalues, and optionally, the eigenvectors +* of a real generalized symmetric-definite banded eigenproblem, of +* the form A*x=(lambda)*B*x. Here A and B are assumed to be symmetric +* and banded, and B is also positive definite. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangles of A and B are stored; +* = 'L': Lower triangles of A and B are stored. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* KA (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KA >= 0. +* +* KB (input) INTEGER +* The number of superdiagonals of the matrix B if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KB >= 0. +* +* AB (input/output) REAL array, dimension (LDAB, N) +* On entry, the upper or lower triangle of the symmetric band +* matrix A, stored in the first ka+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). +* +* On exit, the contents of AB are destroyed. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KA+1. +* +* BB (input/output) REAL array, dimension (LDBB, N) +* On entry, the upper or lower triangle of the symmetric band +* matrix B, stored in the first kb+1 rows of the array. The +* j-th column of B is stored in the j-th column of the array BB +* as follows: +* if UPLO = 'U', BB(kb+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j; +* if UPLO = 'L', BB(1+i-j,j) = B(i,j) for j<=i<=min(n,j+kb). +* +* On exit, the factor S from the split Cholesky factorization +* B = S**T*S, as returned by SPBSTF. +* +* LDBB (input) INTEGER +* The leading dimension of the array BB. LDBB >= KB+1. +* +* W (output) REAL array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* Z (output) REAL array, dimension (LDZ, N) +* If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of +* eigenvectors, with the i-th column of Z holding the +* eigenvector associated with W(i). The eigenvectors are +* normalized so that Z**T*B*Z = I. +* If JOBZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= N. +* +* WORK (workspace) REAL array, dimension (3*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is: +* <= N: the algorithm failed to converge: +* i off-diagonal elements of an intermediate +* tridiagonal form did not converge to zero; +* > N: if INFO = N + i, for 1 <= i <= N, then SPBSTF +* returned INFO = i: B is not positive definite. +* The factorization of B could not be completed and +* no eigenvalues or eigenvectors were computed. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER, WANTZ + CHARACTER VECT + INTEGER IINFO, INDE, INDWRK +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SPBSTF, SSBGST, SSBTRD, SSTEQR, SSTERF, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KA.LT.0 ) THEN + INFO = -4 + ELSE IF( KB.LT.0 .OR. KB.GT.KA ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KA+1 ) THEN + INFO = -7 + ELSE IF( LDBB.LT.KB+1 ) THEN + INFO = -9 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSBGV ', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a split Cholesky factorization of B. +* + CALL SPBSTF( UPLO, N, KB, BB, LDBB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem. +* + INDE = 1 + INDWRK = INDE + N + CALL SSBGST( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, Z, LDZ, + $ WORK( INDWRK ), IINFO ) +* +* Reduce to tridiagonal form. +* + IF( WANTZ ) THEN + VECT = 'U' + ELSE + VECT = 'N' + END IF + CALL SSBTRD( VECT, UPLO, N, KA, AB, LDAB, W, WORK( INDE ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, call SSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, WORK( INDE ), INFO ) + ELSE + CALL SSTEQR( JOBZ, N, W, WORK( INDE ), Z, LDZ, WORK( INDWRK ), + $ INFO ) + END IF + RETURN +* +* End of SSBGV +* + END diff --git a/costa/native/external/lapack/ssbgvd.f b/costa/native/external/lapack/ssbgvd.f new file mode 100644 index 000000000..bc185e3aa --- /dev/null +++ b/costa/native/external/lapack/ssbgvd.f @@ -0,0 +1,270 @@ + SUBROUTINE SSBGVD( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, + $ Z, LDZ, WORK, LWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KA, KB, LDAB, LDBB, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL AB( LDAB, * ), BB( LDBB, * ), W( * ), + $ WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* SSBGVD computes all the eigenvalues, and optionally, the eigenvectors +* of a real generalized symmetric-definite banded eigenproblem, of the +* form A*x=(lambda)*B*x. Here A and B are assumed to be symmetric and +* banded, and B is also positive definite. If eigenvectors are +* desired, it uses a divide and conquer algorithm. +* +* The divide and conquer algorithm makes very mild assumptions about +* floating point arithmetic. It will work on machines with a guard +* digit in add/subtract, or on those binary machines without guard +* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +* Cray-2. It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangles of A and B are stored; +* = 'L': Lower triangles of A and B are stored. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* KA (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KA >= 0. +* +* KB (input) INTEGER +* The number of superdiagonals of the matrix B if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KB >= 0. +* +* AB (input/output) REAL array, dimension (LDAB, N) +* On entry, the upper or lower triangle of the symmetric band +* matrix A, stored in the first ka+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). +* +* On exit, the contents of AB are destroyed. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KA+1. +* +* BB (input/output) REAL array, dimension (LDBB, N) +* On entry, the upper or lower triangle of the symmetric band +* matrix B, stored in the first kb+1 rows of the array. The +* j-th column of B is stored in the j-th column of the array BB +* as follows: +* if UPLO = 'U', BB(ka+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j; +* if UPLO = 'L', BB(1+i-j,j) = B(i,j) for j<=i<=min(n,j+kb). +* +* On exit, the factor S from the split Cholesky factorization +* B = S**T*S, as returned by SPBSTF. +* +* LDBB (input) INTEGER +* The leading dimension of the array BB. LDBB >= KB+1. +* +* W (output) REAL array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* Z (output) REAL array, dimension (LDZ, N) +* If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of +* eigenvectors, with the i-th column of Z holding the +* eigenvector associated with W(i). The eigenvectors are +* normalized so Z**T*B*Z = I. +* If JOBZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If N <= 1, LWORK >= 1. +* If JOBZ = 'N' and N > 1, LWORK >= 3*N. +* If JOBZ = 'V' and N > 1, LWORK >= 1 + 5*N + 2*N**2. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* On exit, if LIWORK > 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array IWORK. +* If JOBZ = 'N' or N <= 1, LIWORK >= 1. +* If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is: +* <= N: the algorithm failed to converge: +* i off-diagonal elements of an intermediate +* tridiagonal form did not converge to zero; +* > N: if INFO = N + i, for 1 <= i <= N, then SPBSTF +* returned INFO = i: B is not positive definite. +* The factorization of B could not be completed and +* no eigenvalues or eigenvectors were computed. +* +* Further Details +* =============== +* +* Based on contributions by +* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER VECT + INTEGER IINFO, INDE, INDWK2, INDWRK, LIWMIN, LLWRK2, + $ LWMIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, SLACPY, SPBSTF, SSBGST, SSBTRD, SSTEDC, + $ SSTERF, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LIWMIN = 1 + LWMIN = 1 + ELSE + IF( WANTZ ) THEN + LIWMIN = 3 + 5*N + LWMIN = 1 + 5*N + 2*N**2 + ELSE + LIWMIN = 1 + LWMIN = 2*N + END IF + END IF +* + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KA.LT.0 ) THEN + INFO = -4 + ELSE IF( KB.LT.0 .OR. KB.GT.KA ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KA+1 ) THEN + INFO = -7 + ELSE IF( LDBB.LT.KB+1 ) THEN + INFO = -9 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -12 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -14 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -16 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSBGVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a split Cholesky factorization of B. +* + CALL SPBSTF( UPLO, N, KB, BB, LDBB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem. +* + INDE = 1 + INDWRK = INDE + N + INDWK2 = INDWRK + N*N + LLWRK2 = LWORK - INDWK2 + 1 + CALL SSBGST( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, Z, LDZ, + $ WORK( INDWRK ), IINFO ) +* +* Reduce to tridiagonal form. +* + IF( WANTZ ) THEN + VECT = 'U' + ELSE + VECT = 'N' + END IF + CALL SSBTRD( VECT, UPLO, N, KA, AB, LDAB, W, WORK( INDE ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, call SSTEDC. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, WORK( INDE ), INFO ) + ELSE + CALL SSTEDC( 'I', N, W, WORK( INDE ), WORK( INDWRK ), N, + $ WORK( INDWK2 ), LLWRK2, IWORK, LIWORK, INFO ) + CALL SGEMM( 'N', 'N', N, N, N, ONE, Z, LDZ, WORK( INDWRK ), N, + $ ZERO, WORK( INDWK2 ), N ) + CALL SLACPY( 'A', N, N, WORK( INDWK2 ), N, Z, LDZ ) + END IF +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of SSBGVD +* + END diff --git a/costa/native/external/lapack/ssbgvx.f b/costa/native/external/lapack/ssbgvx.f new file mode 100644 index 000000000..a4317e0ab --- /dev/null +++ b/costa/native/external/lapack/ssbgvx.f @@ -0,0 +1,371 @@ + SUBROUTINE SSBGVX( JOBZ, RANGE, UPLO, N, KA, KB, AB, LDAB, BB, + $ LDBB, Q, LDQ, VL, VU, IL, IU, ABSTOL, M, W, Z, + $ LDZ, WORK, IWORK, IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, KA, KB, LDAB, LDBB, LDQ, LDZ, M, + $ N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + REAL AB( LDAB, * ), BB( LDBB, * ), Q( LDQ, * ), + $ W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* SSBGVX computes selected eigenvalues, and optionally, eigenvectors +* of a real generalized symmetric-definite banded eigenproblem, of +* the form A*x=(lambda)*B*x. Here A and B are assumed to be symmetric +* and banded, and B is also positive definite. Eigenvalues and +* eigenvectors can be selected by specifying either all eigenvalues, +* a range of values or a range of indices for the desired eigenvalues. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* RANGE (input) CHARACTER*1 +* = 'A': all eigenvalues will be found. +* = 'V': all eigenvalues in the half-open interval (VL,VU] +* will be found. +* = 'I': the IL-th through IU-th eigenvalues will be found. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangles of A and B are stored; +* = 'L': Lower triangles of A and B are stored. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* KA (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KA >= 0. +* +* KB (input) INTEGER +* The number of superdiagonals of the matrix B if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KB >= 0. +* +* AB (input/output) REAL array, dimension (LDAB, N) +* On entry, the upper or lower triangle of the symmetric band +* matrix A, stored in the first ka+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). +* +* On exit, the contents of AB are destroyed. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KA+1. +* +* BB (input/output) REAL array, dimension (LDBB, N) +* On entry, the upper or lower triangle of the symmetric band +* matrix B, stored in the first kb+1 rows of the array. The +* j-th column of B is stored in the j-th column of the array BB +* as follows: +* if UPLO = 'U', BB(ka+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j; +* if UPLO = 'L', BB(1+i-j,j) = B(i,j) for j<=i<=min(n,j+kb). +* +* On exit, the factor S from the split Cholesky factorization +* B = S**T*S, as returned by SPBSTF. +* +* LDBB (input) INTEGER +* The leading dimension of the array BB. LDBB >= KB+1. +* +* Q (output) REAL array, dimension (LDQ, N) +* If JOBZ = 'V', the n-by-n matrix used in the reduction of +* A*x = (lambda)*B*x to standard form, i.e. C*x = (lambda)*x, +* and consequently C to tridiagonal form. +* If JOBZ = 'N', the array Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. If JOBZ = 'N', +* LDQ >= 1. If JOBZ = 'V', LDQ >= max(1,N). +* +* VL (input) REAL +* VU (input) REAL +* If RANGE='V', the lower and upper bounds of the interval to +* be searched for eigenvalues. VL < VU. +* Not referenced if RANGE = 'A' or 'I'. +* +* IL (input) INTEGER +* IU (input) INTEGER +* If RANGE='I', the indices (in ascending order) of the +* smallest and largest eigenvalues to be returned. +* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +* Not referenced if RANGE = 'A' or 'V'. +* +* ABSTOL (input) REAL +* The absolute error tolerance for the eigenvalues. +* An approximate eigenvalue is accepted as converged +* when it is determined to lie in an interval [a,b] +* of width less than or equal to +* +* ABSTOL + EPS * max( |a|,|b| ) , +* +* where EPS is the machine precision. If ABSTOL is less than +* or equal to zero, then EPS*|T| will be used in its place, +* where |T| is the 1-norm of the tridiagonal matrix obtained +* by reducing A to tridiagonal form. +* +* Eigenvalues will be computed most accurately when ABSTOL is +* set to twice the underflow threshold 2*SLAMCH('S'), not zero. +* If this routine returns with INFO>0, indicating that some +* eigenvectors did not converge, try setting ABSTOL to +* 2*SLAMCH('S'). +* +* M (output) INTEGER +* The total number of eigenvalues found. 0 <= M <= N. +* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +* +* W (output) REAL array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* Z (output) REAL array, dimension (LDZ, N) +* If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of +* eigenvectors, with the i-th column of Z holding the +* eigenvector associated with W(i). The eigenvectors are +* normalized so Z**T*B*Z = I. +* If JOBZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace/output) REAL array, dimension (7N) +* +* IWORK (workspace/output) INTEGER array, dimension (5N) +* +* IFAIL (input) INTEGER array, dimension (M) +* If JOBZ = 'V', then if INFO = 0, the first M elements of +* IFAIL are zero. If INFO > 0, then IFAIL contains the +* indices of the eigenvalues that failed to converge. +* If JOBZ = 'N', then IFAIL is not referenced. +* +* INFO (output) INTEGER +* = 0 : successful exit +* < 0 : if INFO = -i, the i-th argument had an illegal value +* <= N: if INFO = i, then i eigenvectors failed to converge. +* Their indices are stored in IFAIL. +* > N : SPBSTF returned an error code; i.e., +* if INFO = N + i, for 1 <= i <= N, then the leading +* minor of order i of B is not positive definite. +* The factorization of B could not be completed and +* no eigenvalues or eigenvectors were computed. +* +* Further Details +* =============== +* +* Based on contributions by +* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, UPPER, VALEIG, WANTZ + CHARACTER ORDER, VECT + INTEGER I, IINFO, INDD, INDE, INDEE, INDIBL, INDISP, + $ INDIWO, INDWRK, ITMP1, J, JJ, NSPLIT + REAL TMP1 +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMV, SLACPY, SPBSTF, SSBGST, SSBTRD, + $ SSTEBZ, SSTEIN, SSTEQR, SSTERF, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KA.LT.0 ) THEN + INFO = -5 + ELSE IF( KB.LT.0 .OR. KB.GT.KA ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KA+1 ) THEN + INFO = -8 + ELSE IF( LDBB.LT.KB+1 ) THEN + INFO = -10 + ELSE IF( LDQ.LT.1 ) THEN + INFO = -12 + ELSE IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN + INFO = -14 + ELSE IF( INDEIG .AND. IL.LT.1 ) THEN + INFO = -15 + ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) THEN + INFO = -16 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -21 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSBGVX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Form a split Cholesky factorization of B. +* + CALL SPBSTF( UPLO, N, KB, BB, LDBB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem. +* + CALL SSBGST( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, Q, LDQ, + $ WORK, IINFO ) +* +* Reduce symmetric band matrix to tridiagonal form. +* + INDD = 1 + INDE = INDD + N + INDWRK = INDE + N + IF( WANTZ ) THEN + VECT = 'U' + ELSE + VECT = 'N' + END IF + CALL SSBTRD( VECT, UPLO, N, KA, AB, LDAB, WORK( INDD ), + $ WORK( INDE ), Q, LDQ, WORK( INDWRK ), IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal +* to zero, then call SSTERF or SSTEQR. If this fails for some +* eigenvalue, then try SSTEBZ. +* + IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND. + $ ( ABSTOL.LE.ZERO ) ) THEN + CALL SCOPY( N, WORK( INDD ), 1, W, 1 ) + INDEE = INDWRK + 2*N + CALL SCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, WORK( INDEE ), INFO ) + ELSE + CALL SLACPY( 'A', N, N, Q, LDQ, Z, LDZ ) + CALL SSTEQR( JOBZ, N, W, WORK( INDEE ), Z, LDZ, + $ WORK( INDWRK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 10 I = 1, N + IFAIL( I ) = 0 + 10 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 30 + END IF + INFO = 0 + END IF +* +* Otherwise, call SSTEBZ and, if eigenvectors are desired, +* call SSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWO = INDISP + N + CALL SSTEBZ( RANGE, ORDER, N, VL, VU, IL, IU, ABSTOL, + $ WORK( INDD ), WORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWRK ), + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL SSTEIN( N, WORK( INDD ), WORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ WORK( INDWRK ), IWORK( INDIWO ), IFAIL, INFO ) +* +* Apply transformation matrix used in reduction to tridiagonal +* form to eigenvectors returned by SSTEIN. +* + DO 20 J = 1, M + CALL SCOPY( N, Z( 1, J ), 1, WORK( 1 ), 1 ) + CALL SGEMV( 'N', N, N, ONE, Q, LDQ, WORK, 1, ZERO, + $ Z( 1, J ), 1 ) + 20 CONTINUE + END IF +* + 30 CONTINUE +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 50 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 40 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 40 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL SSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 50 CONTINUE + END IF +* + RETURN +* +* End of SSBGVX +* + END diff --git a/costa/native/external/lapack/ssbtrd.f b/costa/native/external/lapack/ssbtrd.f new file mode 100644 index 000000000..1f36133e7 --- /dev/null +++ b/costa/native/external/lapack/ssbtrd.f @@ -0,0 +1,553 @@ + SUBROUTINE SSBTRD( VECT, UPLO, N, KD, AB, LDAB, D, E, Q, LDQ, + $ WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO, VECT + INTEGER INFO, KD, LDAB, LDQ, N +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ), D( * ), E( * ), Q( LDQ, * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* SSBTRD reduces a real symmetric band matrix A to symmetric +* tridiagonal form T by an orthogonal similarity transformation: +* Q**T * A * Q = T. +* +* Arguments +* ========= +* +* VECT (input) CHARACTER*1 +* = 'N': do not form Q; +* = 'V': form Q; +* = 'U': update a matrix X, by forming X*Q. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* AB (input/output) REAL array, dimension (LDAB,N) +* On entry, the upper or lower triangle of the symmetric band +* matrix A, stored in the first KD+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* On exit, the diagonal elements of AB are overwritten by the +* diagonal elements of the tridiagonal matrix T; if KD > 0, the +* elements on the first superdiagonal (if UPLO = 'U') or the +* first subdiagonal (if UPLO = 'L') are overwritten by the +* off-diagonal elements of T; the rest of AB is overwritten by +* values generated during the reduction. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* D (output) REAL array, dimension (N) +* The diagonal elements of the tridiagonal matrix T. +* +* E (output) REAL array, dimension (N-1) +* The off-diagonal elements of the tridiagonal matrix T: +* E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'. +* +* Q (input/output) REAL array, dimension (LDQ,N) +* On entry, if VECT = 'U', then Q must contain an N-by-N +* matrix X; if VECT = 'N' or 'V', then Q need not be set. +* +* On exit: +* if VECT = 'V', Q contains the N-by-N orthogonal matrix Q; +* if VECT = 'U', Q contains the product X*Q; +* if VECT = 'N', the array Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. +* LDQ >= 1, and LDQ >= N if VECT = 'V' or 'U'. +* +* WORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* Modified by Linda Kaufman, Bell Labs. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL INITQ, UPPER, WANTQ + INTEGER I, I2, IBL, INCA, INCX, IQAEND, IQB, IQEND, J, + $ J1, J1END, J1INC, J2, JEND, JIN, JINC, K, KD1, + $ KDM1, KDN, L, LAST, LEND, NQ, NR, NRT + REAL TEMP +* .. +* .. External Subroutines .. + EXTERNAL SLAR2V, SLARGV, SLARTG, SLARTV, SLASET, SROT, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INITQ = LSAME( VECT, 'V' ) + WANTQ = INITQ .OR. LSAME( VECT, 'U' ) + UPPER = LSAME( UPLO, 'U' ) + KD1 = KD + 1 + KDM1 = KD - 1 + INCX = LDAB - 1 + IQEND = 1 +* + INFO = 0 + IF( .NOT.WANTQ .AND. .NOT.LSAME( VECT, 'N' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD1 ) THEN + INFO = -6 + ELSE IF( LDQ.LT.MAX( 1, N ) .AND. WANTQ ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSBTRD', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Initialize Q to the unit matrix, if needed +* + IF( INITQ ) + $ CALL SLASET( 'Full', N, N, ZERO, ONE, Q, LDQ ) +* +* Wherever possible, plane rotations are generated and applied in +* vector operations of length NR over the index set J1:J2:KD1. +* +* The cosines and sines of the plane rotations are stored in the +* arrays D and WORK. +* + INCA = KD1*LDAB + KDN = MIN( N-1, KD ) + IF( UPPER ) THEN +* + IF( KD.GT.1 ) THEN +* +* Reduce to tridiagonal form, working with upper triangle +* + NR = 0 + J1 = KDN + 2 + J2 = 1 +* + DO 90 I = 1, N - 2 +* +* Reduce i-th row of matrix to tridiagonal form +* + DO 80 K = KDN + 1, 2, -1 + J1 = J1 + KDN + J2 = J2 + KDN +* + IF( NR.GT.0 ) THEN +* +* generate plane rotations to annihilate nonzero +* elements which have been created outside the band +* + CALL SLARGV( NR, AB( 1, J1-1 ), INCA, WORK( J1 ), + $ KD1, D( J1 ), KD1 ) +* +* apply rotations from the right +* +* +* Dependent on the the number of diagonals either +* SLARTV or SROT is used +* + IF( NR.GE.2*KD-1 ) THEN + DO 10 L = 1, KD - 1 + CALL SLARTV( NR, AB( L+1, J1-1 ), INCA, + $ AB( L, J1 ), INCA, D( J1 ), + $ WORK( J1 ), KD1 ) + 10 CONTINUE +* + ELSE + JEND = J1 + ( NR-1 )*KD1 + DO 20 JINC = J1, JEND, KD1 + CALL SROT( KDM1, AB( 2, JINC-1 ), 1, + $ AB( 1, JINC ), 1, D( JINC ), + $ WORK( JINC ) ) + 20 CONTINUE + END IF + END IF +* +* + IF( K.GT.2 ) THEN + IF( K.LE.N-I+1 ) THEN +* +* generate plane rotation to annihilate a(i,i+k-1) +* within the band +* + CALL SLARTG( AB( KD-K+3, I+K-2 ), + $ AB( KD-K+2, I+K-1 ), D( I+K-1 ), + $ WORK( I+K-1 ), TEMP ) + AB( KD-K+3, I+K-2 ) = TEMP +* +* apply rotation from the right +* + CALL SROT( K-3, AB( KD-K+4, I+K-2 ), 1, + $ AB( KD-K+3, I+K-1 ), 1, D( I+K-1 ), + $ WORK( I+K-1 ) ) + END IF + NR = NR + 1 + J1 = J1 - KDN - 1 + END IF +* +* apply plane rotations from both sides to diagonal +* blocks +* + IF( NR.GT.0 ) + $ CALL SLAR2V( NR, AB( KD1, J1-1 ), AB( KD1, J1 ), + $ AB( KD, J1 ), INCA, D( J1 ), + $ WORK( J1 ), KD1 ) +* +* apply plane rotations from the left +* + IF( NR.GT.0 ) THEN + IF( 2*KD-1.LT.NR ) THEN +* +* Dependent on the the number of diagonals either +* SLARTV or SROT is used +* + DO 30 L = 1, KD - 1 + IF( J2+L.GT.N ) THEN + NRT = NR - 1 + ELSE + NRT = NR + END IF + IF( NRT.GT.0 ) + $ CALL SLARTV( NRT, AB( KD-L, J1+L ), INCA, + $ AB( KD-L+1, J1+L ), INCA, + $ D( J1 ), WORK( J1 ), KD1 ) + 30 CONTINUE + ELSE + J1END = J1 + KD1*( NR-2 ) + IF( J1END.GE.J1 ) THEN + DO 40 JIN = J1, J1END, KD1 + CALL SROT( KD-1, AB( KD-1, JIN+1 ), INCX, + $ AB( KD, JIN+1 ), INCX, + $ D( JIN ), WORK( JIN ) ) + 40 CONTINUE + END IF + LEND = MIN( KDM1, N-J2 ) + LAST = J1END + KD1 + IF( LEND.GT.0 ) + $ CALL SROT( LEND, AB( KD-1, LAST+1 ), INCX, + $ AB( KD, LAST+1 ), INCX, D( LAST ), + $ WORK( LAST ) ) + END IF + END IF +* + IF( WANTQ ) THEN +* +* accumulate product of plane rotations in Q +* + IF( INITQ ) THEN +* +* take advantage of the fact that Q was +* initially the Identity matrix +* + IQEND = MAX( IQEND, J2 ) + I2 = MAX( 0, K-3 ) + IQAEND = 1 + I*KD + IF( K.EQ.2 ) + $ IQAEND = IQAEND + KD + IQAEND = MIN( IQAEND, IQEND ) + DO 50 J = J1, J2, KD1 + IBL = I - I2 / KDM1 + I2 = I2 + 1 + IQB = MAX( 1, J-IBL ) + NQ = 1 + IQAEND - IQB + IQAEND = MIN( IQAEND+KD, IQEND ) + CALL SROT( NQ, Q( IQB, J-1 ), 1, Q( IQB, J ), + $ 1, D( J ), WORK( J ) ) + 50 CONTINUE + ELSE +* + DO 60 J = J1, J2, KD1 + CALL SROT( N, Q( 1, J-1 ), 1, Q( 1, J ), 1, + $ D( J ), WORK( J ) ) + 60 CONTINUE + END IF +* + END IF +* + IF( J2+KDN.GT.N ) THEN +* +* adjust J2 to keep within the bounds of the matrix +* + NR = NR - 1 + J2 = J2 - KDN - 1 + END IF +* + DO 70 J = J1, J2, KD1 +* +* create nonzero element a(j-1,j+kd) outside the band +* and store it in WORK +* + WORK( J+KD ) = WORK( J )*AB( 1, J+KD ) + AB( 1, J+KD ) = D( J )*AB( 1, J+KD ) + 70 CONTINUE + 80 CONTINUE + 90 CONTINUE + END IF +* + IF( KD.GT.0 ) THEN +* +* copy off-diagonal elements to E +* + DO 100 I = 1, N - 1 + E( I ) = AB( KD, I+1 ) + 100 CONTINUE + ELSE +* +* set E to zero if original matrix was diagonal +* + DO 110 I = 1, N - 1 + E( I ) = ZERO + 110 CONTINUE + END IF +* +* copy diagonal elements to D +* + DO 120 I = 1, N + D( I ) = AB( KD1, I ) + 120 CONTINUE +* + ELSE +* + IF( KD.GT.1 ) THEN +* +* Reduce to tridiagonal form, working with lower triangle +* + NR = 0 + J1 = KDN + 2 + J2 = 1 +* + DO 210 I = 1, N - 2 +* +* Reduce i-th column of matrix to tridiagonal form +* + DO 200 K = KDN + 1, 2, -1 + J1 = J1 + KDN + J2 = J2 + KDN +* + IF( NR.GT.0 ) THEN +* +* generate plane rotations to annihilate nonzero +* elements which have been created outside the band +* + CALL SLARGV( NR, AB( KD1, J1-KD1 ), INCA, + $ WORK( J1 ), KD1, D( J1 ), KD1 ) +* +* apply plane rotations from one side +* +* +* Dependent on the the number of diagonals either +* SLARTV or SROT is used +* + IF( NR.GT.2*KD-1 ) THEN + DO 130 L = 1, KD - 1 + CALL SLARTV( NR, AB( KD1-L, J1-KD1+L ), INCA, + $ AB( KD1-L+1, J1-KD1+L ), INCA, + $ D( J1 ), WORK( J1 ), KD1 ) + 130 CONTINUE + ELSE + JEND = J1 + KD1*( NR-1 ) + DO 140 JINC = J1, JEND, KD1 + CALL SROT( KDM1, AB( KD, JINC-KD ), INCX, + $ AB( KD1, JINC-KD ), INCX, + $ D( JINC ), WORK( JINC ) ) + 140 CONTINUE + END IF +* + END IF +* + IF( K.GT.2 ) THEN + IF( K.LE.N-I+1 ) THEN +* +* generate plane rotation to annihilate a(i+k-1,i) +* within the band +* + CALL SLARTG( AB( K-1, I ), AB( K, I ), + $ D( I+K-1 ), WORK( I+K-1 ), TEMP ) + AB( K-1, I ) = TEMP +* +* apply rotation from the left +* + CALL SROT( K-3, AB( K-2, I+1 ), LDAB-1, + $ AB( K-1, I+1 ), LDAB-1, D( I+K-1 ), + $ WORK( I+K-1 ) ) + END IF + NR = NR + 1 + J1 = J1 - KDN - 1 + END IF +* +* apply plane rotations from both sides to diagonal +* blocks +* + IF( NR.GT.0 ) + $ CALL SLAR2V( NR, AB( 1, J1-1 ), AB( 1, J1 ), + $ AB( 2, J1-1 ), INCA, D( J1 ), + $ WORK( J1 ), KD1 ) +* +* apply plane rotations from the right +* +* +* Dependent on the the number of diagonals either +* SLARTV or SROT is used +* + IF( NR.GT.0 ) THEN + IF( NR.GT.2*KD-1 ) THEN + DO 150 L = 1, KD - 1 + IF( J2+L.GT.N ) THEN + NRT = NR - 1 + ELSE + NRT = NR + END IF + IF( NRT.GT.0 ) + $ CALL SLARTV( NRT, AB( L+2, J1-1 ), INCA, + $ AB( L+1, J1 ), INCA, D( J1 ), + $ WORK( J1 ), KD1 ) + 150 CONTINUE + ELSE + J1END = J1 + KD1*( NR-2 ) + IF( J1END.GE.J1 ) THEN + DO 160 J1INC = J1, J1END, KD1 + CALL SROT( KDM1, AB( 3, J1INC-1 ), 1, + $ AB( 2, J1INC ), 1, D( J1INC ), + $ WORK( J1INC ) ) + 160 CONTINUE + END IF + LEND = MIN( KDM1, N-J2 ) + LAST = J1END + KD1 + IF( LEND.GT.0 ) + $ CALL SROT( LEND, AB( 3, LAST-1 ), 1, + $ AB( 2, LAST ), 1, D( LAST ), + $ WORK( LAST ) ) + END IF + END IF +* +* +* + IF( WANTQ ) THEN +* +* accumulate product of plane rotations in Q +* + IF( INITQ ) THEN +* +* take advantage of the fact that Q was +* initially the Identity matrix +* + IQEND = MAX( IQEND, J2 ) + I2 = MAX( 0, K-3 ) + IQAEND = 1 + I*KD + IF( K.EQ.2 ) + $ IQAEND = IQAEND + KD + IQAEND = MIN( IQAEND, IQEND ) + DO 170 J = J1, J2, KD1 + IBL = I - I2 / KDM1 + I2 = I2 + 1 + IQB = MAX( 1, J-IBL ) + NQ = 1 + IQAEND - IQB + IQAEND = MIN( IQAEND+KD, IQEND ) + CALL SROT( NQ, Q( IQB, J-1 ), 1, Q( IQB, J ), + $ 1, D( J ), WORK( J ) ) + 170 CONTINUE + ELSE +* + DO 180 J = J1, J2, KD1 + CALL SROT( N, Q( 1, J-1 ), 1, Q( 1, J ), 1, + $ D( J ), WORK( J ) ) + 180 CONTINUE + END IF + END IF +* + IF( J2+KDN.GT.N ) THEN +* +* adjust J2 to keep within the bounds of the matrix +* + NR = NR - 1 + J2 = J2 - KDN - 1 + END IF +* + DO 190 J = J1, J2, KD1 +* +* create nonzero element a(j+kd,j-1) outside the +* band and store it in WORK +* + WORK( J+KD ) = WORK( J )*AB( KD1, J ) + AB( KD1, J ) = D( J )*AB( KD1, J ) + 190 CONTINUE + 200 CONTINUE + 210 CONTINUE + END IF +* + IF( KD.GT.0 ) THEN +* +* copy off-diagonal elements to E +* + DO 220 I = 1, N - 1 + E( I ) = AB( 2, I ) + 220 CONTINUE + ELSE +* +* set E to zero if original matrix was diagonal +* + DO 230 I = 1, N - 1 + E( I ) = ZERO + 230 CONTINUE + END IF +* +* copy diagonal elements to D +* + DO 240 I = 1, N + D( I ) = AB( 1, I ) + 240 CONTINUE + END IF +* + RETURN +* +* End of SSBTRD +* + END diff --git a/costa/native/external/lapack/sspcon.f b/costa/native/external/lapack/sspcon.f new file mode 100644 index 000000000..65617c912 --- /dev/null +++ b/costa/native/external/lapack/sspcon.f @@ -0,0 +1,158 @@ + SUBROUTINE SSPCON( UPLO, N, AP, IPIV, ANORM, RCOND, WORK, IWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + REAL AP( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SSPCON estimates the reciprocal of the condition number (in the +* 1-norm) of a real symmetric packed matrix A using the factorization +* A = U*D*U**T or A = L*D*L**T computed by SSPTRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the details of the factorization are stored +* as an upper or lower triangular matrix. +* = 'U': Upper triangular, form is A = U*D*U**T; +* = 'L': Lower triangular, form is A = L*D*L**T. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input) REAL array, dimension (N*(N+1)/2) +* The block diagonal matrix D and the multipliers used to +* obtain the factor U or L as computed by SSPTRF, stored as a +* packed triangular matrix. +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by SSPTRF. +* +* ANORM (input) REAL +* The 1-norm of the original matrix A. +* +* RCOND (output) REAL +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +* estimate of the 1-norm of inv(A) computed in this routine. +* +* WORK (workspace) REAL array, dimension (2*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IP, KASE + REAL AINVNM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLACON, SSPTRS, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSPCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.LE.ZERO ) THEN + RETURN + END IF +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + IP = N*( N+1 ) / 2 + DO 10 I = N, 1, -1 + IF( IPIV( I ).GT.0 .AND. AP( IP ).EQ.ZERO ) + $ RETURN + IP = IP - I + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + IP = 1 + DO 20 I = 1, N + IF( IPIV( I ).GT.0 .AND. AP( IP ).EQ.ZERO ) + $ RETURN + IP = IP + N - I + 1 + 20 CONTINUE + END IF +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + 30 CONTINUE + CALL SLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN +* +* Multiply by inv(L*D*L') or inv(U*D*U'). +* + CALL SSPTRS( UPLO, N, 1, AP, IPIV, WORK, N, INFO ) + GO TO 30 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of SSPCON +* + END diff --git a/costa/native/external/lapack/sspev.f b/costa/native/external/lapack/sspev.f new file mode 100644 index 000000000..3871abe04 --- /dev/null +++ b/costa/native/external/lapack/sspev.f @@ -0,0 +1,188 @@ + SUBROUTINE SSPEV( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. + REAL AP( * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* SSPEV computes all the eigenvalues and, optionally, eigenvectors of a +* real symmetric matrix A in packed storage. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) REAL array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the symmetric matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* +* On exit, AP is overwritten by values generated during the +* reduction to tridiagonal form. If UPLO = 'U', the diagonal +* and first superdiagonal of the tridiagonal matrix T overwrite +* the corresponding elements of A, and if UPLO = 'L', the +* diagonal and first subdiagonal of T overwrite the +* corresponding elements of A. +* +* W (output) REAL array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* Z (output) REAL array, dimension (LDZ, N) +* If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +* eigenvectors of the matrix A, with the i-th column of Z +* holding the eigenvector associated with W(i). +* If JOBZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace) REAL array, dimension (3*N) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, the algorithm failed to converge; i +* off-diagonal elements of an intermediate tridiagonal +* form did not converge to zero. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL WANTZ + INTEGER IINFO, IMAX, INDE, INDTAU, INDWRK, ISCALE + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANSP + EXTERNAL LSAME, SLAMCH, SLANSP +* .. +* .. External Subroutines .. + EXTERNAL SOPGTR, SSCAL, SSPTRD, SSTEQR, SSTERF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LSAME( UPLO, 'U' ) .OR. LSAME( UPLO, 'L' ) ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -7 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSPEV ', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = AP( 1 ) + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = SLANSP( 'M', UPLO, N, AP, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + CALL SSCAL( ( N*( N+1 ) ) / 2, SIGMA, AP, 1 ) + END IF +* +* Call SSPTRD to reduce symmetric packed matrix to tridiagonal form. +* + INDE = 1 + INDTAU = INDE + N + CALL SSPTRD( UPLO, N, AP, W, WORK( INDE ), WORK( INDTAU ), IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, first call +* SOPGTR to generate the orthogonal matrix, then call SSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, WORK( INDE ), INFO ) + ELSE + INDWRK = INDTAU + N + CALL SOPGTR( UPLO, N, AP, WORK( INDTAU ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) + CALL SSTEQR( JOBZ, N, W, WORK( INDE ), Z, LDZ, WORK( INDTAU ), + $ INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* + RETURN +* +* End of SSPEV +* + END diff --git a/costa/native/external/lapack/sspevd.f b/costa/native/external/lapack/sspevd.f new file mode 100644 index 000000000..a407a6ed8 --- /dev/null +++ b/costa/native/external/lapack/sspevd.f @@ -0,0 +1,249 @@ + SUBROUTINE SSPEVD( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, LWORK, + $ IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL AP( * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* SSPEVD computes all the eigenvalues and, optionally, eigenvectors +* of a real symmetric matrix A in packed storage. If eigenvectors are +* desired, it uses a divide and conquer algorithm. +* +* The divide and conquer algorithm makes very mild assumptions about +* floating point arithmetic. It will work on machines with a guard +* digit in add/subtract, or on those binary machines without guard +* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +* Cray-2. It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) REAL array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the symmetric matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* +* On exit, AP is overwritten by values generated during the +* reduction to tridiagonal form. If UPLO = 'U', the diagonal +* and first superdiagonal of the tridiagonal matrix T overwrite +* the corresponding elements of A, and if UPLO = 'L', the +* diagonal and first subdiagonal of T overwrite the +* corresponding elements of A. +* +* W (output) REAL array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* Z (output) REAL array, dimension (LDZ, N) +* If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +* eigenvectors of the matrix A, with the i-th column of Z +* holding the eigenvector associated with W(i). +* If JOBZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace/output) REAL array, +* dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If N <= 1, LWORK must be at least 1. +* If JOBZ = 'N' and N > 1, LWORK must be at least 2*N. +* If JOBZ = 'V' and N > 1, LWORK must be at least +* 1 + 6*N + N**2. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array IWORK. +* If JOBZ = 'N' or N <= 1, LIWORK must be at least 1. +* If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, the algorithm failed to converge; i +* off-diagonal elements of an intermediate tridiagonal +* form did not converge to zero. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WANTZ + INTEGER IINFO, INDE, INDTAU, INDWRK, ISCALE, LIWMIN, + $ LLWORK, LWMIN + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANSP + EXTERNAL LSAME, SLAMCH, SLANSP +* .. +* .. External Subroutines .. + EXTERNAL SOPMTR, SSCAL, SSPTRD, SSTEDC, SSTERF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LIWMIN = 1 + LWMIN = 1 + ELSE + IF( WANTZ ) THEN + LIWMIN = 3 + 5*N + LWMIN = 1 + 6*N + N**2 + ELSE + LIWMIN = 1 + LWMIN = 2*N + END IF + END IF + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LSAME( UPLO, 'U' ) .OR. LSAME( UPLO, 'L' ) ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -7 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -9 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSPEVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = AP( 1 ) + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = SLANSP( 'M', UPLO, N, AP, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + CALL SSCAL( ( N*( N+1 ) ) / 2, SIGMA, AP, 1 ) + END IF +* +* Call SSPTRD to reduce symmetric packed matrix to tridiagonal form. +* + INDE = 1 + INDTAU = INDE + N + CALL SSPTRD( UPLO, N, AP, W, WORK( INDE ), WORK( INDTAU ), IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, first call +* SSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the +* tridiagonal matrix, then call SOPMTR to multiply it by the +* Householder transformations represented in AP. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, WORK( INDE ), INFO ) + ELSE + INDWRK = INDTAU + N + LLWORK = LWORK - INDWRK + 1 + CALL SSTEDC( 'I', N, W, WORK( INDE ), Z, LDZ, WORK( INDWRK ), + $ LLWORK, IWORK, LIWORK, INFO ) + CALL SOPMTR( 'L', UPLO, 'N', N, N, AP, WORK( INDTAU ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) + $ CALL SSCAL( N, ONE / SIGMA, W, 1 ) +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of SSPEVD +* + END diff --git a/costa/native/external/lapack/sspevx.f b/costa/native/external/lapack/sspevx.f new file mode 100644 index 000000000..05d350e69 --- /dev/null +++ b/costa/native/external/lapack/sspevx.f @@ -0,0 +1,377 @@ + SUBROUTINE SSPEVX( JOBZ, RANGE, UPLO, N, AP, VL, VU, IL, IU, + $ ABSTOL, M, W, Z, LDZ, WORK, IWORK, IFAIL, + $ INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDZ, M, N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + REAL AP( * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* SSPEVX computes selected eigenvalues and, optionally, eigenvectors +* of a real symmetric matrix A in packed storage. Eigenvalues/vectors +* can be selected by specifying either a range of values or a range of +* indices for the desired eigenvalues. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* RANGE (input) CHARACTER*1 +* = 'A': all eigenvalues will be found; +* = 'V': all eigenvalues in the half-open interval (VL,VU] +* will be found; +* = 'I': the IL-th through IU-th eigenvalues will be found. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) REAL array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the symmetric matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* +* On exit, AP is overwritten by values generated during the +* reduction to tridiagonal form. If UPLO = 'U', the diagonal +* and first superdiagonal of the tridiagonal matrix T overwrite +* the corresponding elements of A, and if UPLO = 'L', the +* diagonal and first subdiagonal of T overwrite the +* corresponding elements of A. +* +* VL (input) REAL +* VU (input) REAL +* If RANGE='V', the lower and upper bounds of the interval to +* be searched for eigenvalues. VL < VU. +* Not referenced if RANGE = 'A' or 'I'. +* +* IL (input) INTEGER +* IU (input) INTEGER +* If RANGE='I', the indices (in ascending order) of the +* smallest and largest eigenvalues to be returned. +* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +* Not referenced if RANGE = 'A' or 'V'. +* +* ABSTOL (input) REAL +* The absolute error tolerance for the eigenvalues. +* An approximate eigenvalue is accepted as converged +* when it is determined to lie in an interval [a,b] +* of width less than or equal to +* +* ABSTOL + EPS * max( |a|,|b| ) , +* +* where EPS is the machine precision. If ABSTOL is less than +* or equal to zero, then EPS*|T| will be used in its place, +* where |T| is the 1-norm of the tridiagonal matrix obtained +* by reducing AP to tridiagonal form. +* +* Eigenvalues will be computed most accurately when ABSTOL is +* set to twice the underflow threshold 2*SLAMCH('S'), not zero. +* If this routine returns with INFO>0, indicating that some +* eigenvectors did not converge, try setting ABSTOL to +* 2*SLAMCH('S'). +* +* See "Computing Small Singular Values of Bidiagonal Matrices +* with Guaranteed High Relative Accuracy," by Demmel and +* Kahan, LAPACK Working Note #3. +* +* M (output) INTEGER +* The total number of eigenvalues found. 0 <= M <= N. +* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +* +* W (output) REAL array, dimension (N) +* If INFO = 0, the selected eigenvalues in ascending order. +* +* Z (output) REAL array, dimension (LDZ, max(1,M)) +* If JOBZ = 'V', then if INFO = 0, the first M columns of Z +* contain the orthonormal eigenvectors of the matrix A +* corresponding to the selected eigenvalues, with the i-th +* column of Z holding the eigenvector associated with W(i). +* If an eigenvector fails to converge, then that column of Z +* contains the latest approximation to the eigenvector, and the +* index of the eigenvector is returned in IFAIL. +* If JOBZ = 'N', then Z is not referenced. +* Note: the user must ensure that at least max(1,M) columns are +* supplied in the array Z; if RANGE = 'V', the exact value of M +* is not known in advance and an upper bound must be used. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace) REAL array, dimension (8*N) +* +* IWORK (workspace) INTEGER array, dimension (5*N) +* +* IFAIL (output) INTEGER array, dimension (N) +* If JOBZ = 'V', then if INFO = 0, the first M elements of +* IFAIL are zero. If INFO > 0, then IFAIL contains the +* indices of the eigenvectors that failed to converge. +* If JOBZ = 'N', then IFAIL is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, then i eigenvectors failed to converge. +* Their indices are stored in array IFAIL. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, VALEIG, WANTZ + CHARACTER ORDER + INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, + $ INDISP, INDIWO, INDTAU, INDWRK, ISCALE, ITMP1, + $ J, JJ, NSPLIT + REAL ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANSP + EXTERNAL LSAME, SLAMCH, SLANSP +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SOPGTR, SOPMTR, SSCAL, SSPTRD, SSTEBZ, + $ SSTEIN, SSTEQR, SSTERF, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LSAME( UPLO, 'L' ) .OR. LSAME( UPLO, 'U' ) ) ) + $ THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -7 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -9 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) + $ INFO = -14 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSPEVX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = AP( 1 ) + ELSE + IF( VL.LT.AP( 1 ) .AND. VU.GE.AP( 1 ) ) THEN + M = 1 + W( 1 ) = AP( 1 ) + END IF + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF ( VALEIG ) THEN + VLL = VL + VUU = VU + ELSE + VLL = ZERO + VUU = ZERO + ENDIF + ANRM = SLANSP( 'M', UPLO, N, AP, WORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + CALL SSCAL( ( N*( N+1 ) ) / 2, SIGMA, AP, 1 ) + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call SSPTRD to reduce symmetric packed matrix to tridiagonal form. +* + INDTAU = 1 + INDE = INDTAU + N + INDD = INDE + N + INDWRK = INDD + N + CALL SSPTRD( UPLO, N, AP, WORK( INDD ), WORK( INDE ), + $ WORK( INDTAU ), IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal +* to zero, then call SSTERF or SOPGTR and SSTEQR. If this fails +* for some eigenvalue, then try SSTEBZ. +* + IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND. + $ ( ABSTOL.LE.ZERO ) ) THEN + CALL SCOPY( N, WORK( INDD ), 1, W, 1 ) + INDEE = INDWRK + 2*N + IF( .NOT.WANTZ ) THEN + CALL SCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL SSTERF( N, W, WORK( INDEE ), INFO ) + ELSE + CALL SOPGTR( UPLO, N, AP, WORK( INDTAU ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) + CALL SCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL SSTEQR( JOBZ, N, W, WORK( INDEE ), Z, LDZ, + $ WORK( INDWRK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 10 I = 1, N + IFAIL( I ) = 0 + 10 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 20 + END IF + INFO = 0 + END IF +* +* Otherwise, call SSTEBZ and, if eigenvectors are desired, SSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWO = INDISP + N + CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ WORK( INDD ), WORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWRK ), + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL SSTEIN( N, WORK( INDD ), WORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ WORK( INDWRK ), IWORK( INDIWO ), IFAIL, INFO ) +* +* Apply orthogonal matrix used in reduction to tridiagonal +* form to eigenvectors returned by SSTEIN. +* + CALL SOPMTR( 'L', UPLO, 'N', N, M, AP, WORK( INDTAU ), Z, LDZ, + $ WORK( INDWRK ), INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 20 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 40 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 30 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 30 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL SSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 40 CONTINUE + END IF +* + RETURN +* +* End of SSPEVX +* + END diff --git a/costa/native/external/lapack/sspgst.f b/costa/native/external/lapack/sspgst.f new file mode 100644 index 000000000..b8d386c13 --- /dev/null +++ b/costa/native/external/lapack/sspgst.f @@ -0,0 +1,209 @@ + SUBROUTINE SSPGST( ITYPE, UPLO, N, AP, BP, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, ITYPE, N +* .. +* .. Array Arguments .. + REAL AP( * ), BP( * ) +* .. +* +* Purpose +* ======= +* +* SSPGST reduces a real symmetric-definite generalized eigenproblem +* to standard form, using packed storage. +* +* If ITYPE = 1, the problem is A*x = lambda*B*x, +* and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) +* +* If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or +* B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L. +* +* B must have been previously factorized as U**T*U or L*L**T by SPPTRF. +* +* Arguments +* ========= +* +* ITYPE (input) INTEGER +* = 1: compute inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T); +* = 2 or 3: compute U*A*U**T or L**T*A*L. +* +* UPLO (input) CHARACTER +* = 'U': Upper triangle of A is stored and B is factored as +* U**T*U; +* = 'L': Lower triangle of A is stored and B is factored as +* L*L**T. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* AP (input/output) REAL array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the symmetric matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* +* On exit, if INFO = 0, the transformed matrix, stored in the +* same format as A. +* +* BP (input) REAL array, dimension (N*(N+1)/2) +* The triangular factor from the Cholesky factorization of B, +* stored in the same format as A, as returned by SPPTRF. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, HALF + PARAMETER ( ONE = 1.0, HALF = 0.5 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, J1, J1J1, JJ, K, K1, K1K1, KK + REAL AJJ, AKK, BJJ, BKK, CT +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SSCAL, SSPMV, SSPR2, STPMV, STPSV, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SDOT + EXTERNAL LSAME, SDOT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSPGST', -INFO ) + RETURN + END IF +* + IF( ITYPE.EQ.1 ) THEN + IF( UPPER ) THEN +* +* Compute inv(U')*A*inv(U) +* +* J1 and JJ are the indices of A(1,j) and A(j,j) +* + JJ = 0 + DO 10 J = 1, N + J1 = JJ + 1 + JJ = JJ + J +* +* Compute the j-th column of the upper triangle of A +* + BJJ = BP( JJ ) + CALL STPSV( UPLO, 'Transpose', 'Nonunit', J, BP, + $ AP( J1 ), 1 ) + CALL SSPMV( UPLO, J-1, -ONE, AP, BP( J1 ), 1, ONE, + $ AP( J1 ), 1 ) + CALL SSCAL( J-1, ONE / BJJ, AP( J1 ), 1 ) + AP( JJ ) = ( AP( JJ )-SDOT( J-1, AP( J1 ), 1, BP( J1 ), + $ 1 ) ) / BJJ + 10 CONTINUE + ELSE +* +* Compute inv(L)*A*inv(L') +* +* KK and K1K1 are the indices of A(k,k) and A(k+1,k+1) +* + KK = 1 + DO 20 K = 1, N + K1K1 = KK + N - K + 1 +* +* Update the lower triangle of A(k:n,k:n) +* + AKK = AP( KK ) + BKK = BP( KK ) + AKK = AKK / BKK**2 + AP( KK ) = AKK + IF( K.LT.N ) THEN + CALL SSCAL( N-K, ONE / BKK, AP( KK+1 ), 1 ) + CT = -HALF*AKK + CALL SAXPY( N-K, CT, BP( KK+1 ), 1, AP( KK+1 ), 1 ) + CALL SSPR2( UPLO, N-K, -ONE, AP( KK+1 ), 1, + $ BP( KK+1 ), 1, AP( K1K1 ) ) + CALL SAXPY( N-K, CT, BP( KK+1 ), 1, AP( KK+1 ), 1 ) + CALL STPSV( UPLO, 'No transpose', 'Non-unit', N-K, + $ BP( K1K1 ), AP( KK+1 ), 1 ) + END IF + KK = K1K1 + 20 CONTINUE + END IF + ELSE + IF( UPPER ) THEN +* +* Compute U*A*U' +* +* K1 and KK are the indices of A(1,k) and A(k,k) +* + KK = 0 + DO 30 K = 1, N + K1 = KK + 1 + KK = KK + K +* +* Update the upper triangle of A(1:k,1:k) +* + AKK = AP( KK ) + BKK = BP( KK ) + CALL STPMV( UPLO, 'No transpose', 'Non-unit', K-1, BP, + $ AP( K1 ), 1 ) + CT = HALF*AKK + CALL SAXPY( K-1, CT, BP( K1 ), 1, AP( K1 ), 1 ) + CALL SSPR2( UPLO, K-1, ONE, AP( K1 ), 1, BP( K1 ), 1, + $ AP ) + CALL SAXPY( K-1, CT, BP( K1 ), 1, AP( K1 ), 1 ) + CALL SSCAL( K-1, BKK, AP( K1 ), 1 ) + AP( KK ) = AKK*BKK**2 + 30 CONTINUE + ELSE +* +* Compute L'*A*L +* +* JJ and J1J1 are the indices of A(j,j) and A(j+1,j+1) +* + JJ = 1 + DO 40 J = 1, N + J1J1 = JJ + N - J + 1 +* +* Compute the j-th column of the lower triangle of A +* + AJJ = AP( JJ ) + BJJ = BP( JJ ) + AP( JJ ) = AJJ*BJJ + SDOT( N-J, AP( JJ+1 ), 1, + $ BP( JJ+1 ), 1 ) + CALL SSCAL( N-J, BJJ, AP( JJ+1 ), 1 ) + CALL SSPMV( UPLO, N-J, ONE, AP( J1J1 ), BP( JJ+1 ), 1, + $ ONE, AP( JJ+1 ), 1 ) + CALL STPMV( UPLO, 'Transpose', 'Non-unit', N-J+1, + $ BP( JJ ), AP( JJ ), 1 ) + JJ = J1J1 + 40 CONTINUE + END IF + END IF + RETURN +* +* End of SSPGST +* + END diff --git a/costa/native/external/lapack/sspgv.f b/costa/native/external/lapack/sspgv.f new file mode 100644 index 000000000..15594c9d7 --- /dev/null +++ b/costa/native/external/lapack/sspgv.f @@ -0,0 +1,196 @@ + SUBROUTINE SSPGV( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDZ, N +* .. +* .. Array Arguments .. + REAL AP( * ), BP( * ), W( * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* SSPGV computes all the eigenvalues and, optionally, the eigenvectors +* of a real generalized symmetric-definite eigenproblem, of the form +* A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. +* Here A and B are assumed to be symmetric, stored in packed format, +* and B is also positive definite. +* +* Arguments +* ========= +* +* ITYPE (input) INTEGER +* Specifies the problem type to be solved: +* = 1: A*x = (lambda)*B*x +* = 2: A*B*x = (lambda)*x +* = 3: B*A*x = (lambda)*x +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangles of A and B are stored; +* = 'L': Lower triangles of A and B are stored. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* AP (input/output) REAL array, dimension +* (N*(N+1)/2) +* On entry, the upper or lower triangle of the symmetric matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* +* On exit, the contents of AP are destroyed. +* +* BP (input/output) REAL array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the symmetric matrix +* B, packed columnwise in a linear array. The j-th column of B +* is stored in the array BP as follows: +* if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j; +* if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n. +* +* On exit, the triangular factor U or L from the Cholesky +* factorization B = U**T*U or B = L*L**T, in the same storage +* format as B. +* +* W (output) REAL array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* Z (output) REAL array, dimension (LDZ, N) +* If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of +* eigenvectors. The eigenvectors are normalized as follows: +* if ITYPE = 1 or 2, Z**T*B*Z = I; +* if ITYPE = 3, Z**T*inv(B)*Z = I. +* If JOBZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace) REAL array, dimension (3*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: SPPTRF or SSPEV returned an error code: +* <= N: if INFO = i, SSPEV failed to converge; +* i off-diagonal elements of an intermediate +* tridiagonal form did not converge to zero. +* > N: if INFO = n + i, for 1 <= i <= n, then the leading +* minor of order i of B is not positive definite. +* The factorization of B could not be completed and +* no eigenvalues or eigenvectors were computed. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER, WANTZ + CHARACTER TRANS + INTEGER J, NEIG +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SPPTRF, SSPEV, SSPGST, STPMV, STPSV, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) +* + INFO = 0 + IF( ITYPE.LT.0 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSPGV ', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL SPPTRF( UPLO, N, BP, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL SSPGST( ITYPE, UPLO, N, AP, BP, INFO ) + CALL SSPEV( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, INFO ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + NEIG = N + IF( INFO.GT.0 ) + $ NEIG = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)'*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'T' + END IF +* + DO 10 J = 1, NEIG + CALL STPSV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 10 CONTINUE +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U'*y +* + IF( UPPER ) THEN + TRANS = 'T' + ELSE + TRANS = 'N' + END IF +* + DO 20 J = 1, NEIG + CALL STPMV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 20 CONTINUE + END IF + END IF + RETURN +* +* End of SSPGV +* + END diff --git a/costa/native/external/lapack/sspgvd.f b/costa/native/external/lapack/sspgvd.f new file mode 100644 index 000000000..5f8d46ee1 --- /dev/null +++ b/costa/native/external/lapack/sspgvd.f @@ -0,0 +1,281 @@ + SUBROUTINE SSPGVD( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK, + $ LWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL AP( * ), BP( * ), W( * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* SSPGVD computes all the eigenvalues, and optionally, the eigenvectors +* of a real generalized symmetric-definite eigenproblem, of the form +* A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and +* B are assumed to be symmetric, stored in packed format, and B is also +* positive definite. +* If eigenvectors are desired, it uses a divide and conquer algorithm. +* +* The divide and conquer algorithm makes very mild assumptions about +* floating point arithmetic. It will work on machines with a guard +* digit in add/subtract, or on those binary machines without guard +* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +* Cray-2. It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* ITYPE (input) INTEGER +* Specifies the problem type to be solved: +* = 1: A*x = (lambda)*B*x +* = 2: A*B*x = (lambda)*x +* = 3: B*A*x = (lambda)*x +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangles of A and B are stored; +* = 'L': Lower triangles of A and B are stored. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* AP (input/output) REAL array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the symmetric matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* +* On exit, the contents of AP are destroyed. +* +* BP (input/output) REAL array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the symmetric matrix +* B, packed columnwise in a linear array. The j-th column of B +* is stored in the array BP as follows: +* if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j; +* if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n. +* +* On exit, the triangular factor U or L from the Cholesky +* factorization B = U**T*U or B = L*L**T, in the same storage +* format as B. +* +* W (output) REAL array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* Z (output) REAL array, dimension (LDZ, N) +* If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of +* eigenvectors. The eigenvectors are normalized as follows: +* if ITYPE = 1 or 2, Z**T*B*Z = I; +* if ITYPE = 3, Z**T*inv(B)*Z = I. +* If JOBZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If N <= 1, LWORK >= 1. +* If JOBZ = 'N' and N > 1, LWORK >= 2*N. +* If JOBZ = 'V' and N > 1, LWORK >= 1 + 6*N + 2*N**2. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array IWORK. +* If JOBZ = 'N' or N <= 1, LIWORK >= 1. +* If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: SPPTRF or SSPEVD returned an error code: +* <= N: if INFO = i, SSPEVD failed to converge; +* i off-diagonal elements of an intermediate +* tridiagonal form did not converge to zero; +* > N: if INFO = N + i, for 1 <= i <= N, then the leading +* minor of order i of B is not positive definite. +* The factorization of B could not be completed and +* no eigenvalues or eigenvectors were computed. +* +* Further Details +* =============== +* +* Based on contributions by +* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER TRANS + INTEGER J, LGN, LIWMIN, LWMIN, NEIG +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SPPTRF, SSPEVD, SSPGST, STPMV, STPSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, LOG, MAX, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LGN = 0 + LIWMIN = 1 + LWMIN = 1 + ELSE + LGN = INT( LOG( REAL( N ) ) / LOG( TWO ) ) + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IF( WANTZ ) THEN + LIWMIN = 3 + 5*N + LWMIN = 1 + 5*N + 2*N*LGN + 2*N**2 + ELSE + LIWMIN = 1 + LWMIN = 2*N + END IF + END IF +* + IF( ITYPE.LT.0 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDZ.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSPGVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of BP. +* + CALL SPPTRF( UPLO, N, BP, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL SSPGST( ITYPE, UPLO, N, AP, BP, INFO ) + CALL SSPEVD( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, LWORK, IWORK, + $ LIWORK, INFO ) + LWMIN = MAX( REAL( LWMIN ), REAL( WORK( 1 ) ) ) + LIWMIN = MAX( REAL( LIWMIN ), REAL( IWORK( 1 ) ) ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + NEIG = N + IF( INFO.GT.0 ) + $ NEIG = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)'*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'T' + END IF +* + DO 10 J = 1, NEIG + CALL STPSV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 10 CONTINUE +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U'*y +* + IF( UPPER ) THEN + TRANS = 'T' + ELSE + TRANS = 'N' + END IF +* + DO 20 J = 1, NEIG + CALL STPMV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 20 CONTINUE + END IF + END IF +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of SSPGVD +* + END diff --git a/costa/native/external/lapack/sspgvx.f b/costa/native/external/lapack/sspgvx.f new file mode 100644 index 000000000..5b374fd3c --- /dev/null +++ b/costa/native/external/lapack/sspgvx.f @@ -0,0 +1,286 @@ + SUBROUTINE SSPGVX( ITYPE, JOBZ, RANGE, UPLO, N, AP, BP, VL, VU, + $ IL, IU, ABSTOL, M, W, Z, LDZ, WORK, IWORK, + $ IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, ITYPE, IU, LDZ, M, N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + REAL AP( * ), BP( * ), W( * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* SSPGVX computes selected eigenvalues, and optionally, eigenvectors +* of a real generalized symmetric-definite eigenproblem, of the form +* A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A +* and B are assumed to be symmetric, stored in packed storage, and B +* is also positive definite. Eigenvalues and eigenvectors can be +* selected by specifying either a range of values or a range of indices +* for the desired eigenvalues. +* +* Arguments +* ========= +* +* ITYPE (input) INTEGER +* Specifies the problem type to be solved: +* = 1: A*x = (lambda)*B*x +* = 2: A*B*x = (lambda)*x +* = 3: B*A*x = (lambda)*x +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* RANGE (input) CHARACTER*1 +* = 'A': all eigenvalues will be found. +* = 'V': all eigenvalues in the half-open interval (VL,VU] +* will be found. +* = 'I': the IL-th through IU-th eigenvalues will be found. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A and B are stored; +* = 'L': Lower triangle of A and B are stored. +* +* N (input) INTEGER +* The order of the matrix pencil (A,B). N >= 0. +* +* AP (input/output) REAL array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the symmetric matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* +* On exit, the contents of AP are destroyed. +* +* BP (input/output) REAL array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the symmetric matrix +* B, packed columnwise in a linear array. The j-th column of B +* is stored in the array BP as follows: +* if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j; +* if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n. +* +* On exit, the triangular factor U or L from the Cholesky +* factorization B = U**T*U or B = L*L**T, in the same storage +* format as B. +* +* VL (input) REAL +* VU (input) REAL +* If RANGE='V', the lower and upper bounds of the interval to +* be searched for eigenvalues. VL < VU. +* Not referenced if RANGE = 'A' or 'I'. +* +* IL (input) INTEGER +* IU (input) INTEGER +* If RANGE='I', the indices (in ascending order) of the +* smallest and largest eigenvalues to be returned. +* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +* Not referenced if RANGE = 'A' or 'V'. +* +* ABSTOL (input) REAL +* The absolute error tolerance for the eigenvalues. +* An approximate eigenvalue is accepted as converged +* when it is determined to lie in an interval [a,b] +* of width less than or equal to +* +* ABSTOL + EPS * max( |a|,|b| ) , +* +* where EPS is the machine precision. If ABSTOL is less than +* or equal to zero, then EPS*|T| will be used in its place, +* where |T| is the 1-norm of the tridiagonal matrix obtained +* by reducing A to tridiagonal form. +* +* Eigenvalues will be computed most accurately when ABSTOL is +* set to twice the underflow threshold 2*SLAMCH('S'), not zero. +* If this routine returns with INFO>0, indicating that some +* eigenvectors did not converge, try setting ABSTOL to +* 2*SLAMCH('S'). +* +* M (output) INTEGER +* The total number of eigenvalues found. 0 <= M <= N. +* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +* +* W (output) REAL array, dimension (N) +* On normal exit, the first M elements contain the selected +* eigenvalues in ascending order. +* +* Z (output) REAL array, dimension (LDZ, max(1,M)) +* If JOBZ = 'N', then Z is not referenced. +* If JOBZ = 'V', then if INFO = 0, the first M columns of Z +* contain the orthonormal eigenvectors of the matrix A +* corresponding to the selected eigenvalues, with the i-th +* column of Z holding the eigenvector associated with W(i). +* The eigenvectors are normalized as follows: +* if ITYPE = 1 or 2, Z**T*B*Z = I; +* if ITYPE = 3, Z**T*inv(B)*Z = I. +* +* If an eigenvector fails to converge, then that column of Z +* contains the latest approximation to the eigenvector, and the +* index of the eigenvector is returned in IFAIL. +* Note: the user must ensure that at least max(1,M) columns are +* supplied in the array Z; if RANGE = 'V', the exact value of M +* is not known in advance and an upper bound must be used. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace) REAL array, dimension (8*N) +* +* IWORK (workspace) INTEGER array, dimension (5*N) +* +* IFAIL (output) INTEGER array, dimension (N) +* If JOBZ = 'V', then if INFO = 0, the first M elements of +* IFAIL are zero. If INFO > 0, then IFAIL contains the +* indices of the eigenvectors that failed to converge. +* If JOBZ = 'N', then IFAIL is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: SPPTRF or SSPEVX returned an error code: +* <= N: if INFO = i, SSPEVX failed to converge; +* i eigenvectors failed to converge. Their indices +* are stored in array IFAIL. +* > N: if INFO = N + i, for 1 <= i <= N, then the leading +* minor of order i of B is not positive definite. +* The factorization of B could not be completed and +* no eigenvalues or eigenvectors were computed. +* +* Further Details +* =============== +* +* Based on contributions by +* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, UPPER, VALEIG, WANTZ + CHARACTER TRANS + INTEGER J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SPPTRF, SSPEVX, SSPGST, STPMV, STPSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + UPPER = LSAME( UPLO, 'U' ) + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + INFO = 0 + IF( ITYPE.LT.0 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -3 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN + INFO = -9 + ELSE IF( INDEIG .AND. IL.LT.1 ) THEN + INFO = -10 + ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) THEN + INFO = -11 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -16 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSPGVX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Form a Cholesky factorization of B. +* + CALL SPPTRF( UPLO, N, BP, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL SSPGST( ITYPE, UPLO, N, AP, BP, INFO ) + CALL SSPEVX( JOBZ, RANGE, UPLO, N, AP, VL, VU, IL, IU, ABSTOL, M, + $ W, Z, LDZ, WORK, IWORK, IFAIL, INFO ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + IF( INFO.GT.0 ) + $ M = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)'*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'T' + END IF +* + DO 10 J = 1, M + CALL STPSV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 10 CONTINUE +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U'*y +* + IF( UPPER ) THEN + TRANS = 'T' + ELSE + TRANS = 'N' + END IF +* + DO 20 J = 1, M + CALL STPMV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 20 CONTINUE + END IF + END IF +* + RETURN +* +* End of SSPGVX +* + END diff --git a/costa/native/external/lapack/ssprfs.f b/costa/native/external/lapack/ssprfs.f new file mode 100644 index 000000000..0dff85913 --- /dev/null +++ b/costa/native/external/lapack/ssprfs.f @@ -0,0 +1,331 @@ + SUBROUTINE SSPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX, + $ FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + REAL AFP( * ), AP( * ), B( LDB, * ), BERR( * ), + $ FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* SSPRFS improves the computed solution to a system of linear +* equations when the coefficient matrix is symmetric indefinite +* and packed, and provides error bounds and backward error estimates +* for the solution. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AP (input) REAL array, dimension (N*(N+1)/2) +* The upper or lower triangle of the symmetric matrix A, packed +* columnwise in a linear array. The j-th column of A is stored +* in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* +* AFP (input) REAL array, dimension (N*(N+1)/2) +* The factored form of the matrix A. AFP contains the block +* diagonal matrix D and the multipliers used to obtain the +* factor U or L from the factorization A = U*D*U**T or +* A = L*D*L**T as computed by SSPTRF, stored as a packed +* triangular matrix. +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by SSPTRF. +* +* B (input) REAL array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input/output) REAL array, dimension (LDX,NRHS) +* On entry, the solution matrix X, as computed by SSPTRS. +* On exit, the improved solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) REAL array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Internal Parameters +* =================== +* +* ITMAX is the maximum number of steps of iterative refinement. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) + REAL THREE + PARAMETER ( THREE = 3.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, IK, J, K, KASE, KK, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SLACON, SSPMV, SSPTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSPRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL SCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 ) + CALL SSPMV( UPLO, N, -ONE, AP, X( 1, J ), 1, ONE, WORK( N+1 ), + $ 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + WORK( I ) = ABS( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + KK = 1 + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = ABS( X( K, J ) ) + IK = KK + DO 40 I = 1, K - 1 + WORK( I ) = WORK( I ) + ABS( AP( IK ) )*XK + S = S + ABS( AP( IK ) )*ABS( X( I, J ) ) + IK = IK + 1 + 40 CONTINUE + WORK( K ) = WORK( K ) + ABS( AP( KK+K-1 ) )*XK + S + KK = KK + K + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = ABS( X( K, J ) ) + WORK( K ) = WORK( K ) + ABS( AP( KK ) )*XK + IK = KK + 1 + DO 60 I = K + 1, N + WORK( I ) = WORK( I ) + ABS( AP( IK ) )*XK + S = S + ABS( AP( IK ) )*ABS( X( I, J ) ) + IK = IK + 1 + 60 CONTINUE + WORK( K ) = WORK( K ) + S + KK = KK + ( N-K+1 ) + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL SSPTRS( UPLO, N, 1, AFP, IPIV, WORK( N+1 ), N, INFO ) + CALL SAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use SLACON to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL SLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A'). +* + CALL SSPTRS( UPLO, N, 1, AFP, IPIV, WORK( N+1 ), N, + $ INFO ) + DO 110 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 120 CONTINUE + CALL SSPTRS( UPLO, N, 1, AFP, IPIV, WORK( N+1 ), N, + $ INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of SSPRFS +* + END diff --git a/costa/native/external/lapack/sspsv.f b/costa/native/external/lapack/sspsv.f new file mode 100644 index 000000000..eef02a4c9 --- /dev/null +++ b/costa/native/external/lapack/sspsv.f @@ -0,0 +1,149 @@ + SUBROUTINE SSPSV( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL AP( * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* SSPSV computes the solution to a real system of linear equations +* A * X = B, +* where A is an N-by-N symmetric matrix stored in packed format and X +* and B are N-by-NRHS matrices. +* +* The diagonal pivoting method is used to factor A as +* A = U * D * U**T, if UPLO = 'U', or +* A = L * D * L**T, if UPLO = 'L', +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices, D is symmetric and block diagonal with 1-by-1 +* and 2-by-2 diagonal blocks. The factored form of A is then used to +* solve the system of equations A * X = B. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AP (input/output) REAL array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the symmetric matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* See below for further details. +* +* On exit, the block diagonal matrix D and the multipliers used +* to obtain the factor U or L from the factorization +* A = U*D*U**T or A = L*D*L**T as computed by SSPTRF, stored as +* a packed triangular matrix in the same storage format as A. +* +* IPIV (output) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D, as +* determined by SSPTRF. If IPIV(k) > 0, then rows and columns +* k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1 +* diagonal block. If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, +* then rows and columns k-1 and -IPIV(k) were interchanged and +* D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If UPLO = 'L' and +* IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and +* -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 +* diagonal block. +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, D(i,i) is exactly zero. The factorization +* has been completed, but the block diagonal matrix D is +* exactly singular, so the solution could not be +* computed. +* +* Further Details +* =============== +* +* The packed storage scheme is illustrated by the following example +* when N = 4, UPLO = 'U': +* +* Two-dimensional storage of the symmetric matrix A: +* +* a11 a12 a13 a14 +* a22 a23 a24 +* a33 a34 (aij = aji) +* a44 +* +* Packed storage of the upper triangle of A: +* +* AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SSPTRF, SSPTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSPSV ', -INFO ) + RETURN + END IF +* +* Compute the factorization A = U*D*U' or A = L*D*L'. +* + CALL SSPTRF( UPLO, N, AP, IPIV, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL SSPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* + END IF + RETURN +* +* End of SSPSV +* + END diff --git a/costa/native/external/lapack/sspsvx.f b/costa/native/external/lapack/sspsvx.f new file mode 100644 index 000000000..d2f0f9fa0 --- /dev/null +++ b/costa/native/external/lapack/sspsvx.f @@ -0,0 +1,279 @@ + SUBROUTINE SSPSVX( FACT, UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, + $ LDX, RCOND, FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER FACT, UPLO + INTEGER INFO, LDB, LDX, N, NRHS + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + REAL AFP( * ), AP( * ), B( LDB, * ), BERR( * ), + $ FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* SSPSVX uses the diagonal pivoting factorization A = U*D*U**T or +* A = L*D*L**T to compute the solution to a real system of linear +* equations A * X = B, where A is an N-by-N symmetric matrix stored +* in packed format and X and B are N-by-NRHS matrices. +* +* Error bounds on the solution and a condition estimate are also +* provided. +* +* Description +* =========== +* +* The following steps are performed: +* +* 1. If FACT = 'N', the diagonal pivoting method is used to factor A as +* A = U * D * U**T, if UPLO = 'U', or +* A = L * D * L**T, if UPLO = 'L', +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices and D is symmetric and block diagonal with +* 1-by-1 and 2-by-2 diagonal blocks. +* +* 2. If some D(i,i)=0, so that D is exactly singular, then the routine +* returns with INFO = i. Otherwise, the factored form of A is used +* to estimate the condition number of the matrix A. If the +* reciprocal of the condition number is less than machine precision, +* INFO = N+1 is returned as a warning, but the routine still goes on +* to solve for X and compute error bounds as described below. +* +* 3. The system of equations is solved for X using the factored form +* of A. +* +* 4. Iterative refinement is applied to improve the computed solution +* matrix and calculate error bounds and backward error estimates +* for it. +* +* Arguments +* ========= +* +* FACT (input) CHARACTER*1 +* Specifies whether or not the factored form of A has been +* supplied on entry. +* = 'F': On entry, AFP and IPIV contain the factored form of +* A. AP, AFP and IPIV will not be modified. +* = 'N': The matrix A will be copied to AFP and factored. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AP (input) REAL array, dimension (N*(N+1)/2) +* The upper or lower triangle of the symmetric matrix A, packed +* columnwise in a linear array. The j-th column of A is stored +* in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* See below for further details. +* +* AFP (input or output) REAL array, dimension +* (N*(N+1)/2) +* If FACT = 'F', then AFP is an input argument and on entry +* contains the block diagonal matrix D and the multipliers used +* to obtain the factor U or L from the factorization +* A = U*D*U**T or A = L*D*L**T as computed by SSPTRF, stored as +* a packed triangular matrix in the same storage format as A. +* +* If FACT = 'N', then AFP is an output argument and on exit +* contains the block diagonal matrix D and the multipliers used +* to obtain the factor U or L from the factorization +* A = U*D*U**T or A = L*D*L**T as computed by SSPTRF, stored as +* a packed triangular matrix in the same storage format as A. +* +* IPIV (input or output) INTEGER array, dimension (N) +* If FACT = 'F', then IPIV is an input argument and on entry +* contains details of the interchanges and the block structure +* of D, as determined by SSPTRF. +* If IPIV(k) > 0, then rows and columns k and IPIV(k) were +* interchanged and D(k,k) is a 1-by-1 diagonal block. +* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +* +* If FACT = 'N', then IPIV is an output argument and on exit +* contains details of the interchanges and the block structure +* of D, as determined by SSPTRF. +* +* B (input) REAL array, dimension (LDB,NRHS) +* The N-by-NRHS right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (output) REAL array, dimension (LDX,NRHS) +* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* RCOND (output) REAL +* The estimate of the reciprocal condition number of the matrix +* A. If RCOND is less than the machine precision (in +* particular, if RCOND = 0), the matrix is singular to working +* precision. This condition is indicated by a return code of +* INFO > 0. +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) REAL array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is +* <= N: D(i,i) is exactly zero. The factorization +* has been completed but the factor D is exactly +* singular, so the solution and error bounds could +* not be computed. RCOND = 0 is returned. +* = N+1: D is nonsingular, but RCOND is less than machine +* precision, meaning that the matrix is singular +* to working precision. Nevertheless, the +* solution and error bounds are computed because +* there are a number of situations where the +* computed solution can be more accurate than the +* value of RCOND would suggest. +* +* Further Details +* =============== +* +* The packed storage scheme is illustrated by the following example +* when N = 4, UPLO = 'U': +* +* Two-dimensional storage of the symmetric matrix A: +* +* a11 a12 a13 a14 +* a22 a23 a24 +* a33 a34 (aij = aji) +* a44 +* +* Packed storage of the upper triangle of A: +* +* AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOFACT + REAL ANORM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANSP + EXTERNAL LSAME, SLAMCH, SLANSP +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLACPY, SSPCON, SSPRFS, SSPTRF, SSPTRS, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSPSVX', -INFO ) + RETURN + END IF +* + IF( NOFACT ) THEN +* +* Compute the factorization A = U*D*U' or A = L*D*L'. +* + CALL SCOPY( N*( N+1 ) / 2, AP, 1, AFP, 1 ) + CALL SSPTRF( UPLO, N, AFP, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN + IF( INFO.GT.0 ) + $ RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = SLANSP( 'I', UPLO, N, AP, WORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL SSPCON( UPLO, N, AFP, IPIV, ANORM, RCOND, WORK, IWORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* +* Compute the solution vectors X. +* + CALL SLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL SSPTRS( UPLO, N, NRHS, AFP, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solutions and +* compute error bounds and backward error estimates for them. +* + CALL SSPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX, FERR, + $ BERR, WORK, IWORK, INFO ) +* + RETURN +* +* End of SSPSVX +* + END diff --git a/costa/native/external/lapack/ssptrd.f b/costa/native/external/lapack/ssptrd.f new file mode 100644 index 000000000..a998c72a1 --- /dev/null +++ b/costa/native/external/lapack/ssptrd.f @@ -0,0 +1,228 @@ + SUBROUTINE SSPTRD( UPLO, N, AP, D, E, TAU, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + REAL AP( * ), D( * ), E( * ), TAU( * ) +* .. +* +* Purpose +* ======= +* +* SSPTRD reduces a real symmetric matrix A stored in packed form to +* symmetric tridiagonal form T by an orthogonal similarity +* transformation: Q**T * A * Q = T. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) REAL array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the symmetric matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* On exit, if UPLO = 'U', the diagonal and first superdiagonal +* of A are overwritten by the corresponding elements of the +* tridiagonal matrix T, and the elements above the first +* superdiagonal, with the array TAU, represent the orthogonal +* matrix Q as a product of elementary reflectors; if UPLO +* = 'L', the diagonal and first subdiagonal of A are over- +* written by the corresponding elements of the tridiagonal +* matrix T, and the elements below the first subdiagonal, with +* the array TAU, represent the orthogonal matrix Q as a product +* of elementary reflectors. See Further Details. +* +* D (output) REAL array, dimension (N) +* The diagonal elements of the tridiagonal matrix T: +* D(i) = A(i,i). +* +* E (output) REAL array, dimension (N-1) +* The off-diagonal elements of the tridiagonal matrix T: +* E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. +* +* TAU (output) REAL array, dimension (N-1) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* If UPLO = 'U', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(n-1) . . . H(2) H(1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in AP, +* overwriting A(1:i-1,i+1), and tau is stored in TAU(i). +* +* If UPLO = 'L', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(1) H(2) . . . H(n-1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in AP, +* overwriting A(i+2:n,i), and tau is stored in TAU(i). +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO, HALF + PARAMETER ( ONE = 1.0, ZERO = 0.0, HALF = 1.0 / 2.0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, I1, I1I1, II + REAL ALPHA, TAUI +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SLARFG, SSPMV, SSPR2, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SDOT + EXTERNAL LSAME, SDOT +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSPTRD', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Reduce the upper triangle of A. +* I1 is the index in AP of A(1,I+1). +* + I1 = N*( N-1 ) / 2 + 1 + DO 10 I = N - 1, 1, -1 +* +* Generate elementary reflector H(i) = I - tau * v * v' +* to annihilate A(1:i-1,i+1) +* + CALL SLARFG( I, AP( I1+I-1 ), AP( I1 ), 1, TAUI ) + E( I ) = AP( I1+I-1 ) +* + IF( TAUI.NE.ZERO ) THEN +* +* Apply H(i) from both sides to A(1:i,1:i) +* + AP( I1+I-1 ) = ONE +* +* Compute y := tau * A * v storing y in TAU(1:i) +* + CALL SSPMV( UPLO, I, TAUI, AP, AP( I1 ), 1, ZERO, TAU, + $ 1 ) +* +* Compute w := y - 1/2 * tau * (y'*v) * v +* + ALPHA = -HALF*TAUI*SDOT( I, TAU, 1, AP( I1 ), 1 ) + CALL SAXPY( I, ALPHA, AP( I1 ), 1, TAU, 1 ) +* +* Apply the transformation as a rank-2 update: +* A := A - v * w' - w * v' +* + CALL SSPR2( UPLO, I, -ONE, AP( I1 ), 1, TAU, 1, AP ) +* + AP( I1+I-1 ) = E( I ) + END IF + D( I+1 ) = AP( I1+I ) + TAU( I ) = TAUI + I1 = I1 - I + 10 CONTINUE + D( 1 ) = AP( 1 ) + ELSE +* +* Reduce the lower triangle of A. II is the index in AP of +* A(i,i) and I1I1 is the index of A(i+1,i+1). +* + II = 1 + DO 20 I = 1, N - 1 + I1I1 = II + N - I + 1 +* +* Generate elementary reflector H(i) = I - tau * v * v' +* to annihilate A(i+2:n,i) +* + CALL SLARFG( N-I, AP( II+1 ), AP( II+2 ), 1, TAUI ) + E( I ) = AP( II+1 ) +* + IF( TAUI.NE.ZERO ) THEN +* +* Apply H(i) from both sides to A(i+1:n,i+1:n) +* + AP( II+1 ) = ONE +* +* Compute y := tau * A * v storing y in TAU(i:n-1) +* + CALL SSPMV( UPLO, N-I, TAUI, AP( I1I1 ), AP( II+1 ), 1, + $ ZERO, TAU( I ), 1 ) +* +* Compute w := y - 1/2 * tau * (y'*v) * v +* + ALPHA = -HALF*TAUI*SDOT( N-I, TAU( I ), 1, AP( II+1 ), + $ 1 ) + CALL SAXPY( N-I, ALPHA, AP( II+1 ), 1, TAU( I ), 1 ) +* +* Apply the transformation as a rank-2 update: +* A := A - v * w' - w * v' +* + CALL SSPR2( UPLO, N-I, -ONE, AP( II+1 ), 1, TAU( I ), 1, + $ AP( I1I1 ) ) +* + AP( II+1 ) = E( I ) + END IF + D( I ) = AP( II ) + TAU( I ) = TAUI + II = I1I1 + 20 CONTINUE + D( N ) = AP( II ) + END IF +* + RETURN +* +* End of SSPTRD +* + END diff --git a/costa/native/external/lapack/ssptrf.f b/costa/native/external/lapack/ssptrf.f new file mode 100644 index 000000000..d2a4bba9e --- /dev/null +++ b/costa/native/external/lapack/ssptrf.f @@ -0,0 +1,548 @@ + SUBROUTINE SSPTRF( UPLO, N, AP, IPIV, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL AP( * ) +* .. +* +* Purpose +* ======= +* +* SSPTRF computes the factorization of a real symmetric matrix A stored +* in packed format using the Bunch-Kaufman diagonal pivoting method: +* +* A = U*D*U**T or A = L*D*L**T +* +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices, and D is symmetric and block diagonal with +* 1-by-1 and 2-by-2 diagonal blocks. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) REAL array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the symmetric matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* +* On exit, the block diagonal matrix D and the multipliers used +* to obtain the factor U or L, stored as a packed triangular +* matrix overwriting A (see below for further details). +* +* IPIV (output) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D. +* If IPIV(k) > 0, then rows and columns k and IPIV(k) were +* interchanged and D(k,k) is a 1-by-1 diagonal block. +* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, D(i,i) is exactly zero. The factorization +* has been completed, but the block diagonal matrix D is +* exactly singular, and division by zero will occur if it +* is used to solve a system of equations. +* +* Further Details +* =============== +* +* 5-96 - Based on modifications by J. Lewis, Boeing Computer Services +* Company +* +* If UPLO = 'U', then A = U*D*U', where +* U = P(n)*U(n)* ... *P(k)U(k)* ..., +* i.e., U is a product of terms P(k)*U(k), where k decreases from n to +* 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +* defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +* that if the diagonal block D(k) is of order s (s = 1 or 2), then +* +* ( I v 0 ) k-s +* U(k) = ( 0 I 0 ) s +* ( 0 0 I ) n-k +* k-s s n-k +* +* If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +* If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +* and A(k,k), and v overwrites A(1:k-2,k-1:k). +* +* If UPLO = 'L', then A = L*D*L', where +* L = P(1)*L(1)* ... *P(k)*L(k)* ..., +* i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +* n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +* defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +* that if the diagonal block D(k) is of order s (s = 1 or 2), then +* +* ( I 0 0 ) k-1 +* L(k) = ( 0 I 0 ) s +* ( 0 v I ) n-k-s+1 +* k-1 s n-k-s+1 +* +* If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +* If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +* and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0E+0, SEVTEN = 17.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IMAX, J, JMAX, K, KC, KK, KNC, KP, KPC, + $ KSTEP, KX, NPP + REAL ABSAKK, ALPHA, COLMAX, D11, D12, D21, D22, R1, + $ ROWMAX, T, WK, WKM1, WKP1 +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + EXTERNAL LSAME, ISAMAX +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, SSPR, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSPTRF', -INFO ) + RETURN + END IF +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U' using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2 +* + K = N + KC = ( N-1 )*N / 2 + 1 + 10 CONTINUE + KNC = KC +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 110 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( AP( KC+K-1 ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.GT.1 ) THEN + IMAX = ISAMAX( K-1, AP( KC ), 1 ) + COLMAX = ABS( AP( KC+IMAX-1 ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + ROWMAX = ZERO + JMAX = IMAX + KX = IMAX*( IMAX+1 ) / 2 + IMAX + DO 20 J = IMAX + 1, K + IF( ABS( AP( KX ) ).GT.ROWMAX ) THEN + ROWMAX = ABS( AP( KX ) ) + JMAX = J + END IF + KX = KX + J + 20 CONTINUE + KPC = ( IMAX-1 )*IMAX / 2 + 1 + IF( IMAX.GT.1 ) THEN + JMAX = ISAMAX( IMAX-1, AP( KPC ), 1 ) + ROWMAX = MAX( ROWMAX, ABS( AP( KPC+JMAX-1 ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( AP( KPC+IMAX-1 ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K-1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K - KSTEP + 1 + IF( KSTEP.EQ.2 ) + $ KNC = KNC - K + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the leading +* submatrix A(1:k,1:k) +* + CALL SSWAP( KP-1, AP( KNC ), 1, AP( KPC ), 1 ) + KX = KPC + KP - 1 + DO 30 J = KP + 1, KK - 1 + KX = KX + J - 1 + T = AP( KNC+J-1 ) + AP( KNC+J-1 ) = AP( KX ) + AP( KX ) = T + 30 CONTINUE + T = AP( KNC+KK-1 ) + AP( KNC+KK-1 ) = AP( KPC+KP-1 ) + AP( KPC+KP-1 ) = T + IF( KSTEP.EQ.2 ) THEN + T = AP( KC+K-2 ) + AP( KC+K-2 ) = AP( KC+KP-1 ) + AP( KC+KP-1 ) = T + END IF + END IF +* +* Update the leading submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* +* Perform a rank-1 update of A(1:k-1,1:k-1) as +* +* A := A - U(k)*D(k)*U(k)' = A - W(k)*1/D(k)*W(k)' +* + R1 = ONE / AP( KC+K-1 ) + CALL SSPR( UPLO, K-1, -R1, AP( KC ), 1, AP ) +* +* Store U(k) in column k +* + CALL SSCAL( K-1, R1, AP( KC ), 1 ) + ELSE +* +* 2-by-2 pivot block D(k): columns k and k-1 now hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* Perform a rank-2 update of A(1:k-2,1:k-2) as +* +* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )' +* = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )' +* + IF( K.GT.2 ) THEN +* + D12 = AP( K-1+( K-1 )*K / 2 ) + D22 = AP( K-1+( K-2 )*( K-1 ) / 2 ) / D12 + D11 = AP( K+( K-1 )*K / 2 ) / D12 + T = ONE / ( D11*D22-ONE ) + D12 = T / D12 +* + DO 50 J = K - 2, 1, -1 + WKM1 = D12*( D11*AP( J+( K-2 )*( K-1 ) / 2 )- + $ AP( J+( K-1 )*K / 2 ) ) + WK = D12*( D22*AP( J+( K-1 )*K / 2 )- + $ AP( J+( K-2 )*( K-1 ) / 2 ) ) + DO 40 I = J, 1, -1 + AP( I+( J-1 )*J / 2 ) = AP( I+( J-1 )*J / 2 ) - + $ AP( I+( K-1 )*K / 2 )*WK - + $ AP( I+( K-2 )*( K-1 ) / 2 )*WKM1 + 40 CONTINUE + AP( J+( K-1 )*K / 2 ) = WK + AP( J+( K-2 )*( K-1 ) / 2 ) = WKM1 + 50 CONTINUE +* + END IF +* + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + KC = KNC - K + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L' using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2 +* + K = 1 + KC = 1 + NPP = N*( N+1 ) / 2 + 60 CONTINUE + KNC = KC +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 110 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( AP( KC ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.LT.N ) THEN + IMAX = K + ISAMAX( N-K, AP( KC+1 ), 1 ) + COLMAX = ABS( AP( KC+IMAX-K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + ROWMAX = ZERO + KX = KC + IMAX - K + DO 70 J = K, IMAX - 1 + IF( ABS( AP( KX ) ).GT.ROWMAX ) THEN + ROWMAX = ABS( AP( KX ) ) + JMAX = J + END IF + KX = KX + N - J + 70 CONTINUE + KPC = NPP - ( N-IMAX+1 )*( N-IMAX+2 ) / 2 + 1 + IF( IMAX.LT.N ) THEN + JMAX = IMAX + ISAMAX( N-IMAX, AP( KPC+1 ), 1 ) + ROWMAX = MAX( ROWMAX, ABS( AP( KPC+JMAX-IMAX ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( AP( KPC ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K+1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K + KSTEP - 1 + IF( KSTEP.EQ.2 ) + $ KNC = KNC + N - K + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the trailing +* submatrix A(k:n,k:n) +* + IF( KP.LT.N ) + $ CALL SSWAP( N-KP, AP( KNC+KP-KK+1 ), 1, AP( KPC+1 ), + $ 1 ) + KX = KNC + KP - KK + DO 80 J = KK + 1, KP - 1 + KX = KX + N - J + 1 + T = AP( KNC+J-KK ) + AP( KNC+J-KK ) = AP( KX ) + AP( KX ) = T + 80 CONTINUE + T = AP( KNC ) + AP( KNC ) = AP( KPC ) + AP( KPC ) = T + IF( KSTEP.EQ.2 ) THEN + T = AP( KC+1 ) + AP( KC+1 ) = AP( KC+KP-K ) + AP( KC+KP-K ) = T + END IF + END IF +* +* Update the trailing submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* + IF( K.LT.N ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* +* A := A - L(k)*D(k)*L(k)' = A - W(k)*(1/D(k))*W(k)' +* + R1 = ONE / AP( KC ) + CALL SSPR( UPLO, N-K, -R1, AP( KC+1 ), 1, + $ AP( KC+N-K+1 ) ) +* +* Store L(k) in column K +* + CALL SSCAL( N-K, R1, AP( KC+1 ), 1 ) + END IF + ELSE +* +* 2-by-2 pivot block D(k): columns K and K+1 now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* + IF( K.LT.N-1 ) THEN +* +* Perform a rank-2 update of A(k+2:n,k+2:n) as +* +* A := A - ( L(k) L(k+1) )*D(k)*( L(k) L(k+1) )' +* = A - ( W(k) W(k+1) )*inv(D(k))*( W(k) W(k+1) )' +* + D21 = AP( K+1+( K-1 )*( 2*N-K ) / 2 ) + D11 = AP( K+1+K*( 2*N-K-1 ) / 2 ) / D21 + D22 = AP( K+( K-1 )*( 2*N-K ) / 2 ) / D21 + T = ONE / ( D11*D22-ONE ) + D21 = T / D21 +* + DO 100 J = K + 2, N + WK = D21*( D11*AP( J+( K-1 )*( 2*N-K ) / 2 )- + $ AP( J+K*( 2*N-K-1 ) / 2 ) ) + WKP1 = D21*( D22*AP( J+K*( 2*N-K-1 ) / 2 )- + $ AP( J+( K-1 )*( 2*N-K ) / 2 ) ) +* + DO 90 I = J, N + AP( I+( J-1 )*( 2*N-J ) / 2 ) = AP( I+( J-1 )* + $ ( 2*N-J ) / 2 ) - AP( I+( K-1 )*( 2*N-K ) / + $ 2 )*WK - AP( I+K*( 2*N-K-1 ) / 2 )*WKP1 + 90 CONTINUE +* + AP( J+( K-1 )*( 2*N-K ) / 2 ) = WK + AP( J+K*( 2*N-K-1 ) / 2 ) = WKP1 +* + 100 CONTINUE + END IF + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + KC = KNC + N - K + 2 + GO TO 60 +* + END IF +* + 110 CONTINUE + RETURN +* +* End of SSPTRF +* + END diff --git a/costa/native/external/lapack/ssptri.f b/costa/native/external/lapack/ssptri.f new file mode 100644 index 000000000..b562cc2c4 --- /dev/null +++ b/costa/native/external/lapack/ssptri.f @@ -0,0 +1,335 @@ + SUBROUTINE SSPTRI( UPLO, N, AP, IPIV, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL AP( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SSPTRI computes the inverse of a real symmetric indefinite matrix +* A in packed storage using the factorization A = U*D*U**T or +* A = L*D*L**T computed by SSPTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the details of the factorization are stored +* as an upper or lower triangular matrix. +* = 'U': Upper triangular, form is A = U*D*U**T; +* = 'L': Lower triangular, form is A = L*D*L**T. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) REAL array, dimension (N*(N+1)/2) +* On entry, the block diagonal matrix D and the multipliers +* used to obtain the factor U or L as computed by SSPTRF, +* stored as a packed triangular matrix. +* +* On exit, if INFO = 0, the (symmetric) inverse of the original +* matrix, stored as a packed triangular matrix. The j-th column +* of inv(A) is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = inv(A)(i,j) for 1<=i<=j; +* if UPLO = 'L', +* AP(i + (j-1)*(2n-j)/2) = inv(A)(i,j) for j<=i<=n. +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by SSPTRF. +* +* WORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +* inverse could not be computed. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KC, KCNEXT, KP, KPC, KSTEP, KX, NPP + REAL AK, AKKP1, AKP1, D, T, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SDOT + EXTERNAL LSAME, SDOT +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SSPMV, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSPTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + KP = N*( N+1 ) / 2 + DO 10 INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. AP( KP ).EQ.ZERO ) + $ RETURN + KP = KP - INFO + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + KP = 1 + DO 20 INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. AP( KP ).EQ.ZERO ) + $ RETURN + KP = KP + N - INFO + 1 + 20 CONTINUE + END IF + INFO = 0 +* + IF( UPPER ) THEN +* +* Compute inv(A) from the factorization A = U*D*U'. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + KC = 1 + 30 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + KCNEXT = KC + K + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + AP( KC+K-1 ) = ONE / AP( KC+K-1 ) +* +* Compute column K of the inverse. +* + IF( K.GT.1 ) THEN + CALL SCOPY( K-1, AP( KC ), 1, WORK, 1 ) + CALL SSPMV( UPLO, K-1, -ONE, AP, WORK, 1, ZERO, AP( KC ), + $ 1 ) + AP( KC+K-1 ) = AP( KC+K-1 ) - + $ SDOT( K-1, WORK, 1, AP( KC ), 1 ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = ABS( AP( KCNEXT+K-1 ) ) + AK = AP( KC+K-1 ) / T + AKP1 = AP( KCNEXT+K ) / T + AKKP1 = AP( KCNEXT+K-1 ) / T + D = T*( AK*AKP1-ONE ) + AP( KC+K-1 ) = AKP1 / D + AP( KCNEXT+K ) = AK / D + AP( KCNEXT+K-1 ) = -AKKP1 / D +* +* Compute columns K and K+1 of the inverse. +* + IF( K.GT.1 ) THEN + CALL SCOPY( K-1, AP( KC ), 1, WORK, 1 ) + CALL SSPMV( UPLO, K-1, -ONE, AP, WORK, 1, ZERO, AP( KC ), + $ 1 ) + AP( KC+K-1 ) = AP( KC+K-1 ) - + $ SDOT( K-1, WORK, 1, AP( KC ), 1 ) + AP( KCNEXT+K-1 ) = AP( KCNEXT+K-1 ) - + $ SDOT( K-1, AP( KC ), 1, AP( KCNEXT ), + $ 1 ) + CALL SCOPY( K-1, AP( KCNEXT ), 1, WORK, 1 ) + CALL SSPMV( UPLO, K-1, -ONE, AP, WORK, 1, ZERO, + $ AP( KCNEXT ), 1 ) + AP( KCNEXT+K ) = AP( KCNEXT+K ) - + $ SDOT( K-1, WORK, 1, AP( KCNEXT ), 1 ) + END IF + KSTEP = 2 + KCNEXT = KCNEXT + K + 1 + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the leading +* submatrix A(1:k+1,1:k+1) +* + KPC = ( KP-1 )*KP / 2 + 1 + CALL SSWAP( KP-1, AP( KC ), 1, AP( KPC ), 1 ) + KX = KPC + KP - 1 + DO 40 J = KP + 1, K - 1 + KX = KX + J - 1 + TEMP = AP( KC+J-1 ) + AP( KC+J-1 ) = AP( KX ) + AP( KX ) = TEMP + 40 CONTINUE + TEMP = AP( KC+K-1 ) + AP( KC+K-1 ) = AP( KPC+KP-1 ) + AP( KPC+KP-1 ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = AP( KC+K+K-1 ) + AP( KC+K+K-1 ) = AP( KC+K+KP-1 ) + AP( KC+K+KP-1 ) = TEMP + END IF + END IF +* + K = K + KSTEP + KC = KCNEXT + GO TO 30 + 50 CONTINUE +* + ELSE +* +* Compute inv(A) from the factorization A = L*D*L'. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + NPP = N*( N+1 ) / 2 + K = N + KC = NPP + 60 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 80 +* + KCNEXT = KC - ( N-K+2 ) + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + AP( KC ) = ONE / AP( KC ) +* +* Compute column K of the inverse. +* + IF( K.LT.N ) THEN + CALL SCOPY( N-K, AP( KC+1 ), 1, WORK, 1 ) + CALL SSPMV( UPLO, N-K, -ONE, AP( KC+N-K+1 ), WORK, 1, + $ ZERO, AP( KC+1 ), 1 ) + AP( KC ) = AP( KC ) - SDOT( N-K, WORK, 1, AP( KC+1 ), 1 ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = ABS( AP( KCNEXT+1 ) ) + AK = AP( KCNEXT ) / T + AKP1 = AP( KC ) / T + AKKP1 = AP( KCNEXT+1 ) / T + D = T*( AK*AKP1-ONE ) + AP( KCNEXT ) = AKP1 / D + AP( KC ) = AK / D + AP( KCNEXT+1 ) = -AKKP1 / D +* +* Compute columns K-1 and K of the inverse. +* + IF( K.LT.N ) THEN + CALL SCOPY( N-K, AP( KC+1 ), 1, WORK, 1 ) + CALL SSPMV( UPLO, N-K, -ONE, AP( KC+( N-K+1 ) ), WORK, 1, + $ ZERO, AP( KC+1 ), 1 ) + AP( KC ) = AP( KC ) - SDOT( N-K, WORK, 1, AP( KC+1 ), 1 ) + AP( KCNEXT+1 ) = AP( KCNEXT+1 ) - + $ SDOT( N-K, AP( KC+1 ), 1, + $ AP( KCNEXT+2 ), 1 ) + CALL SCOPY( N-K, AP( KCNEXT+2 ), 1, WORK, 1 ) + CALL SSPMV( UPLO, N-K, -ONE, AP( KC+( N-K+1 ) ), WORK, 1, + $ ZERO, AP( KCNEXT+2 ), 1 ) + AP( KCNEXT ) = AP( KCNEXT ) - + $ SDOT( N-K, WORK, 1, AP( KCNEXT+2 ), 1 ) + END IF + KSTEP = 2 + KCNEXT = KCNEXT - ( N-K+3 ) + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the trailing +* submatrix A(k-1:n,k-1:n) +* + KPC = NPP - ( N-KP+1 )*( N-KP+2 ) / 2 + 1 + IF( KP.LT.N ) + $ CALL SSWAP( N-KP, AP( KC+KP-K+1 ), 1, AP( KPC+1 ), 1 ) + KX = KC + KP - K + DO 70 J = K + 1, KP - 1 + KX = KX + N - J + 1 + TEMP = AP( KC+J-K ) + AP( KC+J-K ) = AP( KX ) + AP( KX ) = TEMP + 70 CONTINUE + TEMP = AP( KC ) + AP( KC ) = AP( KPC ) + AP( KPC ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = AP( KC-N+K-1 ) + AP( KC-N+K-1 ) = AP( KC-N+KP-1 ) + AP( KC-N+KP-1 ) = TEMP + END IF + END IF +* + K = K - KSTEP + KC = KCNEXT + GO TO 60 + 80 CONTINUE + END IF +* + RETURN +* +* End of SSPTRI +* + END diff --git a/costa/native/external/lapack/ssptrs.f b/costa/native/external/lapack/ssptrs.f new file mode 100644 index 000000000..394940dc6 --- /dev/null +++ b/costa/native/external/lapack/ssptrs.f @@ -0,0 +1,378 @@ + SUBROUTINE SSPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL AP( * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* SSPTRS solves a system of linear equations A*X = B with a real +* symmetric matrix A stored in packed format using the factorization +* A = U*D*U**T or A = L*D*L**T computed by SSPTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the details of the factorization are stored +* as an upper or lower triangular matrix. +* = 'U': Upper triangular, form is A = U*D*U**T; +* = 'L': Lower triangular, form is A = L*D*L**T. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AP (input) REAL array, dimension (N*(N+1)/2) +* The block diagonal matrix D and the multipliers used to +* obtain the factor U or L as computed by SSPTRF, stored as a +* packed triangular matrix. +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by SSPTRF. +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KC, KP + REAL AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, SGER, SSCAL, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSPTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*D*U'. +* +* First solve U*D*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + KC = N*( N+1 ) / 2 + 1 + 10 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 30 +* + KC = KC - K + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL SGER( K-1, NRHS, -ONE, AP( KC ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + CALL SSCAL( NRHS, ONE / AP( KC+K-1 ), B( K, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K-1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K-1 ) + $ CALL SSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in columns K-1 and K of A. +* + CALL SGER( K-2, NRHS, -ONE, AP( KC ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) + CALL SGER( K-2, NRHS, -ONE, AP( KC-( K-1 ) ), 1, + $ B( K-1, 1 ), LDB, B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = AP( KC+K-2 ) + AKM1 = AP( KC-1 ) / AKM1K + AK = AP( KC+K-1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 20 J = 1, NRHS + BKM1 = B( K-1, J ) / AKM1K + BK = B( K, J ) / AKM1K + B( K-1, J ) = ( AK*BKM1-BK ) / DENOM + B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM + 20 CONTINUE + KC = KC - K + 1 + K = K - 2 + END IF +* + GO TO 10 + 30 CONTINUE +* +* Next solve U'*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + KC = 1 + 40 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(U'(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL SGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, AP( KC ), + $ 1, ONE, B( K, 1 ), LDB ) +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + KC = KC + K + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(U'(K+1)), where U(K+1) is the transformation +* stored in columns K and K+1 of A. +* + CALL SGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, AP( KC ), + $ 1, ONE, B( K, 1 ), LDB ) + CALL SGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, + $ AP( KC+K ), 1, ONE, B( K+1, 1 ), LDB ) +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + KC = KC + 2*K + 1 + K = K + 2 + END IF +* + GO TO 40 + 50 CONTINUE +* + ELSE +* +* Solve A*X = B, where A = L*D*L'. +* +* First solve L*D*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + KC = 1 + 60 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 80 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL SGER( N-K, NRHS, -ONE, AP( KC+1 ), 1, B( K, 1 ), + $ LDB, B( K+1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + CALL SSCAL( NRHS, ONE / AP( KC ), B( K, 1 ), LDB ) + KC = KC + N - K + 1 + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K+1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K+1 ) + $ CALL SSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.LT.N-1 ) THEN + CALL SGER( N-K-1, NRHS, -ONE, AP( KC+2 ), 1, B( K, 1 ), + $ LDB, B( K+2, 1 ), LDB ) + CALL SGER( N-K-1, NRHS, -ONE, AP( KC+N-K+2 ), 1, + $ B( K+1, 1 ), LDB, B( K+2, 1 ), LDB ) + END IF +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = AP( KC+1 ) + AKM1 = AP( KC ) / AKM1K + AK = AP( KC+N-K+1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 70 J = 1, NRHS + BKM1 = B( K, J ) / AKM1K + BK = B( K+1, J ) / AKM1K + B( K, J ) = ( AK*BKM1-BK ) / DENOM + B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + 70 CONTINUE + KC = KC + 2*( N-K ) + 1 + K = K + 2 + END IF +* + GO TO 60 + 80 CONTINUE +* +* Next solve L'*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + KC = N*( N+1 ) / 2 + 1 + 90 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 100 +* + KC = KC - ( N-K+1 ) + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(L'(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL SGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, AP( KC+1 ), 1, ONE, B( K, 1 ), LDB ) +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(L'(K-1)), where L(K-1) is the transformation +* stored in columns K-1 and K of A. +* + IF( K.LT.N ) THEN + CALL SGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, AP( KC+1 ), 1, ONE, B( K, 1 ), LDB ) + CALL SGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, AP( KC-( N-K ) ), 1, ONE, B( K-1, 1 ), + $ LDB ) + END IF +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + KC = KC - ( N-K+2 ) + K = K - 2 + END IF +* + GO TO 90 + 100 CONTINUE + END IF +* + RETURN +* +* End of SSPTRS +* + END diff --git a/costa/native/external/lapack/sstebz.f b/costa/native/external/lapack/sstebz.f new file mode 100644 index 000000000..7da2f4404 --- /dev/null +++ b/costa/native/external/lapack/sstebz.f @@ -0,0 +1,651 @@ + SUBROUTINE SSTEBZ( RANGE, ORDER, N, VL, VU, IL, IU, ABSTOL, D, E, + $ M, NSPLIT, W, IBLOCK, ISPLIT, WORK, IWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER ORDER, RANGE + INTEGER IL, INFO, IU, M, N, NSPLIT + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IBLOCK( * ), ISPLIT( * ), IWORK( * ) + REAL D( * ), E( * ), W( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SSTEBZ computes the eigenvalues of a symmetric tridiagonal +* matrix T. The user may ask for all eigenvalues, all eigenvalues +* in the half-open interval (VL, VU], or the IL-th through IU-th +* eigenvalues. +* +* To avoid overflow, the matrix must be scaled so that its +* largest element is no greater than overflow**(1/2) * +* underflow**(1/4) in absolute value, and for greatest +* accuracy, it should not be much smaller than that. +* +* See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal +* Matrix", Report CS41, Computer Science Dept., Stanford +* University, July 21, 1966. +* +* Arguments +* ========= +* +* RANGE (input) CHARACTER +* = 'A': ("All") all eigenvalues will be found. +* = 'V': ("Value") all eigenvalues in the half-open interval +* (VL, VU] will be found. +* = 'I': ("Index") the IL-th through IU-th eigenvalues (of the +* entire matrix) will be found. +* +* ORDER (input) CHARACTER +* = 'B': ("By Block") the eigenvalues will be grouped by +* split-off block (see IBLOCK, ISPLIT) and +* ordered from smallest to largest within +* the block. +* = 'E': ("Entire matrix") +* the eigenvalues for the entire matrix +* will be ordered from smallest to +* largest. +* +* N (input) INTEGER +* The order of the tridiagonal matrix T. N >= 0. +* +* VL (input) REAL +* VU (input) REAL +* If RANGE='V', the lower and upper bounds of the interval to +* be searched for eigenvalues. Eigenvalues less than or equal +* to VL, or greater than VU, will not be returned. VL < VU. +* Not referenced if RANGE = 'A' or 'I'. +* +* IL (input) INTEGER +* IU (input) INTEGER +* If RANGE='I', the indices (in ascending order) of the +* smallest and largest eigenvalues to be returned. +* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +* Not referenced if RANGE = 'A' or 'V'. +* +* ABSTOL (input) REAL +* The absolute tolerance for the eigenvalues. An eigenvalue +* (or cluster) is considered to be located if it has been +* determined to lie in an interval whose width is ABSTOL or +* less. If ABSTOL is less than or equal to zero, then ULP*|T| +* will be used, where |T| means the 1-norm of T. +* +* Eigenvalues will be computed most accurately when ABSTOL is +* set to twice the underflow threshold 2*SLAMCH('S'), not zero. +* +* D (input) REAL array, dimension (N) +* The n diagonal elements of the tridiagonal matrix T. +* +* E (input) REAL array, dimension (N-1) +* The (n-1) off-diagonal elements of the tridiagonal matrix T. +* +* M (output) INTEGER +* The actual number of eigenvalues found. 0 <= M <= N. +* (See also the description of INFO=2,3.) +* +* NSPLIT (output) INTEGER +* The number of diagonal blocks in the matrix T. +* 1 <= NSPLIT <= N. +* +* W (output) REAL array, dimension (N) +* On exit, the first M elements of W will contain the +* eigenvalues. (SSTEBZ may use the remaining N-M elements as +* workspace.) +* +* IBLOCK (output) INTEGER array, dimension (N) +* At each row/column j where E(j) is zero or small, the +* matrix T is considered to split into a block diagonal +* matrix. On exit, if INFO = 0, IBLOCK(i) specifies to which +* block (from 1 to the number of blocks) the eigenvalue W(i) +* belongs. (SSTEBZ may use the remaining N-M elements as +* workspace.) +* +* ISPLIT (output) INTEGER array, dimension (N) +* The splitting points, at which T breaks up into submatrices. +* The first submatrix consists of rows/columns 1 to ISPLIT(1), +* the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), +* etc., and the NSPLIT-th consists of rows/columns +* ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. +* (Only the first NSPLIT elements will actually be used, but +* since the user cannot know a priori what value NSPLIT will +* have, N words must be reserved for ISPLIT.) +* +* WORK (workspace) REAL array, dimension (4*N) +* +* IWORK (workspace) INTEGER array, dimension (3*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: some or all of the eigenvalues failed to converge or +* were not computed: +* =1 or 3: Bisection failed to converge for some +* eigenvalues; these eigenvalues are flagged by a +* negative block number. The effect is that the +* eigenvalues may not be as accurate as the +* absolute and relative tolerances. This is +* generally caused by unexpectedly inaccurate +* arithmetic. +* =2 or 3: RANGE='I' only: Not all of the eigenvalues +* IL:IU were found. +* Effect: M < IU+1-IL +* Cause: non-monotonic arithmetic, causing the +* Sturm sequence to be non-monotonic. +* Cure: recalculate, using RANGE='A', and pick +* out eigenvalues IL:IU. In some cases, +* increasing the PARAMETER "FUDGE" may +* make things work. +* = 4: RANGE='I', and the Gershgorin interval +* initially used was too small. No eigenvalues +* were computed. +* Probable cause: your machine has sloppy +* floating-point arithmetic. +* Cure: Increase the PARAMETER "FUDGE", +* recompile, and try again. +* +* Internal Parameters +* =================== +* +* RELFAC REAL, default = 2.0e0 +* The relative tolerance. An interval (a,b] lies within +* "relative tolerance" if b-a < RELFAC*ulp*max(|a|,|b|), +* where "ulp" is the machine precision (distance from 1 to +* the next larger floating point number.) +* +* FUDGE REAL, default = 2 +* A "fudge factor" to widen the Gershgorin intervals. Ideally, +* a value of 1 should work, but on machines with sloppy +* arithmetic, this needs to be larger. The default for +* publicly released versions should be large enough to handle +* the worst machine around. Note that this has no effect +* on accuracy of the solution. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO, HALF + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0, + $ HALF = 1.0E0 / TWO ) + REAL FUDGE, RELFAC + PARAMETER ( FUDGE = 2.0E0, RELFAC = 2.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL NCNVRG, TOOFEW + INTEGER IB, IBEGIN, IDISCL, IDISCU, IE, IEND, IINFO, + $ IM, IN, IOFF, IORDER, IOUT, IRANGE, ITMAX, + $ ITMP1, IW, IWOFF, J, JB, JDISC, JE, NB, NWL, + $ NWU + REAL ATOLI, BNORM, GL, GU, PIVMIN, RTOLI, SAFEMN, + $ TMP1, TMP2, TNORM, ULP, WKILL, WL, WLU, WU, WUL +* .. +* .. Local Arrays .. + INTEGER IDUMMA( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH + EXTERNAL LSAME, ILAENV, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SLAEBZ, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, LOG, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Decode RANGE +* + IF( LSAME( RANGE, 'A' ) ) THEN + IRANGE = 1 + ELSE IF( LSAME( RANGE, 'V' ) ) THEN + IRANGE = 2 + ELSE IF( LSAME( RANGE, 'I' ) ) THEN + IRANGE = 3 + ELSE + IRANGE = 0 + END IF +* +* Decode ORDER +* + IF( LSAME( ORDER, 'B' ) ) THEN + IORDER = 2 + ELSE IF( LSAME( ORDER, 'E' ) ) THEN + IORDER = 1 + ELSE + IORDER = 0 + END IF +* +* Check for Errors +* + IF( IRANGE.LE.0 ) THEN + INFO = -1 + ELSE IF( IORDER.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( IRANGE.EQ.2 ) THEN + IF( VL.GE.VU ) INFO = -5 + ELSE IF( IRANGE.EQ.3 .AND. ( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) ) + $ THEN + INFO = -6 + ELSE IF( IRANGE.EQ.3 .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) + $ THEN + INFO = -7 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSTEBZ', -INFO ) + RETURN + END IF +* +* Initialize error flags +* + INFO = 0 + NCNVRG = .FALSE. + TOOFEW = .FALSE. +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* +* Simplifications: +* + IF( IRANGE.EQ.3 .AND. IL.EQ.1 .AND. IU.EQ.N ) + $ IRANGE = 1 +* +* Get machine constants +* NB is the minimum vector length for vector bisection, or 0 +* if only scalar is to be done. +* + SAFEMN = SLAMCH( 'S' ) + ULP = SLAMCH( 'P' ) + RTOLI = ULP*RELFAC + NB = ILAENV( 1, 'SSTEBZ', ' ', N, -1, -1, -1 ) + IF( NB.LE.1 ) + $ NB = 0 +* +* Special Case when N=1 +* + IF( N.EQ.1 ) THEN + NSPLIT = 1 + ISPLIT( 1 ) = 1 + IF( IRANGE.EQ.2 .AND. ( VL.GE.D( 1 ) .OR. VU.LT.D( 1 ) ) ) THEN + M = 0 + ELSE + W( 1 ) = D( 1 ) + IBLOCK( 1 ) = 1 + M = 1 + END IF + RETURN + END IF +* +* Compute Splitting Points +* + NSPLIT = 1 + WORK( N ) = ZERO + PIVMIN = ONE +* +CDIR$ NOVECTOR + DO 10 J = 2, N + TMP1 = E( J-1 )**2 + IF( ABS( D( J )*D( J-1 ) )*ULP**2+SAFEMN.GT.TMP1 ) THEN + ISPLIT( NSPLIT ) = J - 1 + NSPLIT = NSPLIT + 1 + WORK( J-1 ) = ZERO + ELSE + WORK( J-1 ) = TMP1 + PIVMIN = MAX( PIVMIN, TMP1 ) + END IF + 10 CONTINUE + ISPLIT( NSPLIT ) = N + PIVMIN = PIVMIN*SAFEMN +* +* Compute Interval and ATOLI +* + IF( IRANGE.EQ.3 ) THEN +* +* RANGE='I': Compute the interval containing eigenvalues +* IL through IU. +* +* Compute Gershgorin interval for entire (split) matrix +* and use it as the initial interval +* + GU = D( 1 ) + GL = D( 1 ) + TMP1 = ZERO +* + DO 20 J = 1, N - 1 + TMP2 = SQRT( WORK( J ) ) + GU = MAX( GU, D( J )+TMP1+TMP2 ) + GL = MIN( GL, D( J )-TMP1-TMP2 ) + TMP1 = TMP2 + 20 CONTINUE +* + GU = MAX( GU, D( N )+TMP1 ) + GL = MIN( GL, D( N )-TMP1 ) + TNORM = MAX( ABS( GL ), ABS( GU ) ) + GL = GL - FUDGE*TNORM*ULP*N - FUDGE*TWO*PIVMIN + GU = GU + FUDGE*TNORM*ULP*N + FUDGE*PIVMIN +* +* Compute Iteration parameters +* + ITMAX = INT( ( LOG( TNORM+PIVMIN )-LOG( PIVMIN ) ) / + $ LOG( TWO ) ) + 2 + IF( ABSTOL.LE.ZERO ) THEN + ATOLI = ULP*TNORM + ELSE + ATOLI = ABSTOL + END IF +* + WORK( N+1 ) = GL + WORK( N+2 ) = GL + WORK( N+3 ) = GU + WORK( N+4 ) = GU + WORK( N+5 ) = GL + WORK( N+6 ) = GU + IWORK( 1 ) = -1 + IWORK( 2 ) = -1 + IWORK( 3 ) = N + 1 + IWORK( 4 ) = N + 1 + IWORK( 5 ) = IL - 1 + IWORK( 6 ) = IU +* + CALL SLAEBZ( 3, ITMAX, N, 2, 2, NB, ATOLI, RTOLI, PIVMIN, D, E, + $ WORK, IWORK( 5 ), WORK( N+1 ), WORK( N+5 ), IOUT, + $ IWORK, W, IBLOCK, IINFO ) +* + IF( IWORK( 6 ).EQ.IU ) THEN + WL = WORK( N+1 ) + WLU = WORK( N+3 ) + NWL = IWORK( 1 ) + WU = WORK( N+4 ) + WUL = WORK( N+2 ) + NWU = IWORK( 4 ) + ELSE + WL = WORK( N+2 ) + WLU = WORK( N+4 ) + NWL = IWORK( 2 ) + WU = WORK( N+3 ) + WUL = WORK( N+1 ) + NWU = IWORK( 3 ) + END IF +* + IF( NWL.LT.0 .OR. NWL.GE.N .OR. NWU.LT.1 .OR. NWU.GT.N ) THEN + INFO = 4 + RETURN + END IF + ELSE +* +* RANGE='A' or 'V' -- Set ATOLI +* + TNORM = MAX( ABS( D( 1 ) )+ABS( E( 1 ) ), + $ ABS( D( N ) )+ABS( E( N-1 ) ) ) +* + DO 30 J = 2, N - 1 + TNORM = MAX( TNORM, ABS( D( J ) )+ABS( E( J-1 ) )+ + $ ABS( E( J ) ) ) + 30 CONTINUE +* + IF( ABSTOL.LE.ZERO ) THEN + ATOLI = ULP*TNORM + ELSE + ATOLI = ABSTOL + END IF +* + IF( IRANGE.EQ.2 ) THEN + WL = VL + WU = VU + ELSE + WL = ZERO + WU = ZERO + END IF + END IF +* +* Find Eigenvalues -- Loop Over Blocks and recompute NWL and NWU. +* NWL accumulates the number of eigenvalues .le. WL, +* NWU accumulates the number of eigenvalues .le. WU +* + M = 0 + IEND = 0 + INFO = 0 + NWL = 0 + NWU = 0 +* + DO 70 JB = 1, NSPLIT + IOFF = IEND + IBEGIN = IOFF + 1 + IEND = ISPLIT( JB ) + IN = IEND - IOFF +* + IF( IN.EQ.1 ) THEN +* +* Special Case -- IN=1 +* + IF( IRANGE.EQ.1 .OR. WL.GE.D( IBEGIN )-PIVMIN ) + $ NWL = NWL + 1 + IF( IRANGE.EQ.1 .OR. WU.GE.D( IBEGIN )-PIVMIN ) + $ NWU = NWU + 1 + IF( IRANGE.EQ.1 .OR. ( WL.LT.D( IBEGIN )-PIVMIN .AND. WU.GE. + $ D( IBEGIN )-PIVMIN ) ) THEN + M = M + 1 + W( M ) = D( IBEGIN ) + IBLOCK( M ) = JB + END IF + ELSE +* +* General Case -- IN > 1 +* +* Compute Gershgorin Interval +* and use it as the initial interval +* + GU = D( IBEGIN ) + GL = D( IBEGIN ) + TMP1 = ZERO +* + DO 40 J = IBEGIN, IEND - 1 + TMP2 = ABS( E( J ) ) + GU = MAX( GU, D( J )+TMP1+TMP2 ) + GL = MIN( GL, D( J )-TMP1-TMP2 ) + TMP1 = TMP2 + 40 CONTINUE +* + GU = MAX( GU, D( IEND )+TMP1 ) + GL = MIN( GL, D( IEND )-TMP1 ) + BNORM = MAX( ABS( GL ), ABS( GU ) ) + GL = GL - FUDGE*BNORM*ULP*IN - FUDGE*PIVMIN + GU = GU + FUDGE*BNORM*ULP*IN + FUDGE*PIVMIN +* +* Compute ATOLI for the current submatrix +* + IF( ABSTOL.LE.ZERO ) THEN + ATOLI = ULP*MAX( ABS( GL ), ABS( GU ) ) + ELSE + ATOLI = ABSTOL + END IF +* + IF( IRANGE.GT.1 ) THEN + IF( GU.LT.WL ) THEN + NWL = NWL + IN + NWU = NWU + IN + GO TO 70 + END IF + GL = MAX( GL, WL ) + GU = MIN( GU, WU ) + IF( GL.GE.GU ) + $ GO TO 70 + END IF +* +* Set Up Initial Interval +* + WORK( N+1 ) = GL + WORK( N+IN+1 ) = GU + CALL SLAEBZ( 1, 0, IN, IN, 1, NB, ATOLI, RTOLI, PIVMIN, + $ D( IBEGIN ), E( IBEGIN ), WORK( IBEGIN ), + $ IDUMMA, WORK( N+1 ), WORK( N+2*IN+1 ), IM, + $ IWORK, W( M+1 ), IBLOCK( M+1 ), IINFO ) +* + NWL = NWL + IWORK( 1 ) + NWU = NWU + IWORK( IN+1 ) + IWOFF = M - IWORK( 1 ) +* +* Compute Eigenvalues +* + ITMAX = INT( ( LOG( GU-GL+PIVMIN )-LOG( PIVMIN ) ) / + $ LOG( TWO ) ) + 2 + CALL SLAEBZ( 2, ITMAX, IN, IN, 1, NB, ATOLI, RTOLI, PIVMIN, + $ D( IBEGIN ), E( IBEGIN ), WORK( IBEGIN ), + $ IDUMMA, WORK( N+1 ), WORK( N+2*IN+1 ), IOUT, + $ IWORK, W( M+1 ), IBLOCK( M+1 ), IINFO ) +* +* Copy Eigenvalues Into W and IBLOCK +* Use -JB for block number for unconverged eigenvalues. +* + DO 60 J = 1, IOUT + TMP1 = HALF*( WORK( J+N )+WORK( J+IN+N ) ) +* +* Flag non-convergence. +* + IF( J.GT.IOUT-IINFO ) THEN + NCNVRG = .TRUE. + IB = -JB + ELSE + IB = JB + END IF + DO 50 JE = IWORK( J ) + 1 + IWOFF, + $ IWORK( J+IN ) + IWOFF + W( JE ) = TMP1 + IBLOCK( JE ) = IB + 50 CONTINUE + 60 CONTINUE +* + M = M + IM + END IF + 70 CONTINUE +* +* If RANGE='I', then (WL,WU) contains eigenvalues NWL+1,...,NWU +* If NWL+1 < IL or NWU > IU, discard extra eigenvalues. +* + IF( IRANGE.EQ.3 ) THEN + IM = 0 + IDISCL = IL - 1 - NWL + IDISCU = NWU - IU +* + IF( IDISCL.GT.0 .OR. IDISCU.GT.0 ) THEN + DO 80 JE = 1, M + IF( W( JE ).LE.WLU .AND. IDISCL.GT.0 ) THEN + IDISCL = IDISCL - 1 + ELSE IF( W( JE ).GE.WUL .AND. IDISCU.GT.0 ) THEN + IDISCU = IDISCU - 1 + ELSE + IM = IM + 1 + W( IM ) = W( JE ) + IBLOCK( IM ) = IBLOCK( JE ) + END IF + 80 CONTINUE + M = IM + END IF + IF( IDISCL.GT.0 .OR. IDISCU.GT.0 ) THEN +* +* Code to deal with effects of bad arithmetic: +* Some low eigenvalues to be discarded are not in (WL,WLU], +* or high eigenvalues to be discarded are not in (WUL,WU] +* so just kill off the smallest IDISCL/largest IDISCU +* eigenvalues, by simply finding the smallest/largest +* eigenvalue(s). +* +* (If N(w) is monotone non-decreasing, this should never +* happen.) +* + IF( IDISCL.GT.0 ) THEN + WKILL = WU + DO 100 JDISC = 1, IDISCL + IW = 0 + DO 90 JE = 1, M + IF( IBLOCK( JE ).NE.0 .AND. + $ ( W( JE ).LT.WKILL .OR. IW.EQ.0 ) ) THEN + IW = JE + WKILL = W( JE ) + END IF + 90 CONTINUE + IBLOCK( IW ) = 0 + 100 CONTINUE + END IF + IF( IDISCU.GT.0 ) THEN +* + WKILL = WL + DO 120 JDISC = 1, IDISCU + IW = 0 + DO 110 JE = 1, M + IF( IBLOCK( JE ).NE.0 .AND. + $ ( W( JE ).GT.WKILL .OR. IW.EQ.0 ) ) THEN + IW = JE + WKILL = W( JE ) + END IF + 110 CONTINUE + IBLOCK( IW ) = 0 + 120 CONTINUE + END IF + IM = 0 + DO 130 JE = 1, M + IF( IBLOCK( JE ).NE.0 ) THEN + IM = IM + 1 + W( IM ) = W( JE ) + IBLOCK( IM ) = IBLOCK( JE ) + END IF + 130 CONTINUE + M = IM + END IF + IF( IDISCL.LT.0 .OR. IDISCU.LT.0 ) THEN + TOOFEW = .TRUE. + END IF + END IF +* +* If ORDER='B', do nothing -- the eigenvalues are already sorted +* by block. +* If ORDER='E', sort the eigenvalues from smallest to largest +* + IF( IORDER.EQ.1 .AND. NSPLIT.GT.1 ) THEN + DO 150 JE = 1, M - 1 + IE = 0 + TMP1 = W( JE ) + DO 140 J = JE + 1, M + IF( W( J ).LT.TMP1 ) THEN + IE = J + TMP1 = W( J ) + END IF + 140 CONTINUE +* + IF( IE.NE.0 ) THEN + ITMP1 = IBLOCK( IE ) + W( IE ) = W( JE ) + IBLOCK( IE ) = IBLOCK( JE ) + W( JE ) = TMP1 + IBLOCK( JE ) = ITMP1 + END IF + 150 CONTINUE + END IF +* + INFO = 0 + IF( NCNVRG ) + $ INFO = INFO + 1 + IF( TOOFEW ) + $ INFO = INFO + 2 + RETURN +* +* End of SSTEBZ +* + END diff --git a/costa/native/external/lapack/sstedc.f b/costa/native/external/lapack/sstedc.f new file mode 100644 index 000000000..5d60e9da1 --- /dev/null +++ b/costa/native/external/lapack/sstedc.f @@ -0,0 +1,397 @@ + SUBROUTINE SSTEDC( COMPZ, N, D, E, Z, LDZ, WORK, LWORK, IWORK, + $ LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER COMPZ + INTEGER INFO, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL D( * ), E( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* SSTEDC computes all eigenvalues and, optionally, eigenvectors of a +* symmetric tridiagonal matrix using the divide and conquer method. +* The eigenvectors of a full or band real symmetric matrix can also be +* found if SSYTRD or SSPTRD or SSBTRD has been used to reduce this +* matrix to tridiagonal form. +* +* This code makes very mild assumptions about floating point +* arithmetic. It will work on machines with a guard digit in +* add/subtract, or on those binary machines without guard digits +* which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. +* It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. See SLAED3 for details. +* +* Arguments +* ========= +* +* COMPZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only. +* = 'I': Compute eigenvectors of tridiagonal matrix also. +* = 'V': Compute eigenvectors of original dense symmetric +* matrix also. On entry, Z contains the orthogonal +* matrix used to reduce the original matrix to +* tridiagonal form. +* +* N (input) INTEGER +* The dimension of the symmetric tridiagonal matrix. N >= 0. +* +* D (input/output) REAL array, dimension (N) +* On entry, the diagonal elements of the tridiagonal matrix. +* On exit, if INFO = 0, the eigenvalues in ascending order. +* +* E (input/output) REAL array, dimension (N-1) +* On entry, the subdiagonal elements of the tridiagonal matrix. +* On exit, E has been destroyed. +* +* Z (input/output) REAL array, dimension (LDZ,N) +* On entry, if COMPZ = 'V', then Z contains the orthogonal +* matrix used in the reduction to tridiagonal form. +* On exit, if INFO = 0, then if COMPZ = 'V', Z contains the +* orthonormal eigenvectors of the original symmetric matrix, +* and if COMPZ = 'I', Z contains the orthonormal eigenvectors +* of the symmetric tridiagonal matrix. +* If COMPZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1. +* If eigenvectors are desired, then LDZ >= max(1,N). +* +* WORK (workspace/output) REAL array, +* dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If COMPZ = 'N' or N <= 1 then LWORK must be at least 1. +* If COMPZ = 'V' and N > 1 then LWORK must be at least +* ( 1 + 3*N + 2*N*lg N + 3*N**2 ), +* where lg( N ) = smallest integer k such +* that 2**k >= N. +* If COMPZ = 'I' and N > 1 then LWORK must be at least +* ( 1 + 4*N + N**2 ). +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array IWORK. +* If COMPZ = 'N' or N <= 1 then LIWORK must be at least 1. +* If COMPZ = 'V' and N > 1 then LIWORK must be at least +* ( 6 + 6*N + 5*N*lg N ). +* If COMPZ = 'I' and N > 1 then LIWORK must be at least +* ( 3 + 5*N ). +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: The algorithm failed to compute an eigenvalue while +* working on the submatrix lying in rows and columns +* INFO/(N+1) through mod(INFO,N+1). +* +* Further Details +* =============== +* +* Based on contributions by +* Jeff Rutter, Computer Science Division, University of California +* at Berkeley, USA +* Modified by Francoise Tisseur, University of Tennessee. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER END, I, ICOMPZ, II, J, K, LGN, LIWMIN, LWMIN, + $ M, SMLSIZ, START, STOREZ, STRTRW + REAL EPS, ORGNRM, P, TINY +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANST + EXTERNAL ILAENV, LSAME, SLAMCH, SLANST +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, SLACPY, SLAED0, SLASCL, SLASET, SLASRT, + $ SSTEQR, SSTERF, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, LOG, MAX, MOD, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ICOMPZ = 0 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ICOMPZ = 2 + ELSE + ICOMPZ = -1 + END IF + IF( N.LE.1 .OR. ICOMPZ.LE.0 ) THEN + LIWMIN = 1 + LWMIN = 1 + ELSE + LGN = INT( LOG( REAL( N ) ) / LOG( TWO ) ) + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IF( ICOMPZ.EQ.1 ) THEN + LWMIN = 1 + 3*N + 2*N*LGN + 3*N**2 + LIWMIN = 6 + 6*N + 5*N*LGN + ELSE IF( ICOMPZ.EQ.2 ) THEN + LWMIN = 1 + 4*N + N**2 + LIWMIN = 3 + 5*N + END IF + END IF + IF( ICOMPZ.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ( LDZ.LT.1 ) .OR. ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1, + $ N ) ) ) THEN + INFO = -6 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -8 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSTEDC', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + IF( N.EQ.1 ) THEN + IF( ICOMPZ.NE.0 ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* + SMLSIZ = ILAENV( 9, 'SSTEDC', ' ', 0, 0, 0, 0 ) +* +* If the following conditional clause is removed, then the routine +* will use the Divide and Conquer routine to compute only the +* eigenvalues, which requires (3N + 3N**2) real workspace and +* (2 + 5N + 2N lg(N)) integer workspace. +* Since on many architectures SSTERF is much faster than any other +* algorithm for finding eigenvalues only, it is used here +* as the default. +* +* If COMPZ = 'N', use SSTERF to compute the eigenvalues. +* + IF( ICOMPZ.EQ.0 ) THEN + CALL SSTERF( N, D, E, INFO ) + RETURN + END IF +* +* If N is smaller than the minimum divide size (SMLSIZ+1), then +* solve the problem with another solver. +* + IF( N.LE.SMLSIZ ) THEN + IF( ICOMPZ.EQ.0 ) THEN + CALL SSTERF( N, D, E, INFO ) + RETURN + ELSE IF( ICOMPZ.EQ.2 ) THEN + CALL SSTEQR( 'I', N, D, E, Z, LDZ, WORK, INFO ) + RETURN + ELSE + CALL SSTEQR( 'V', N, D, E, Z, LDZ, WORK, INFO ) + RETURN + END IF + END IF +* +* If COMPZ = 'V', the Z matrix must be stored elsewhere for later +* use. +* + IF( ICOMPZ.EQ.1 ) THEN + STOREZ = 1 + N*N + ELSE + STOREZ = 1 + END IF +* + IF( ICOMPZ.EQ.2 ) THEN + CALL SLASET( 'Full', N, N, ZERO, ONE, Z, LDZ ) + END IF +* +* Scale. +* + ORGNRM = SLANST( 'M', N, D, E ) + IF( ORGNRM.EQ.ZERO ) + $ RETURN +* + EPS = SLAMCH( 'Epsilon' ) +* + START = 1 +* +* while ( START <= N ) +* + 10 CONTINUE + IF( START.LE.N ) THEN +* +* Let END be the position of the next subdiagonal entry such that +* E( END ) <= TINY or END = N if no such subdiagonal exists. The +* matrix identified by the elements between START and END +* constitutes an independent sub-problem. +* + END = START + 20 CONTINUE + IF( END.LT.N ) THEN + TINY = EPS*SQRT( ABS( D( END ) ) )*SQRT( ABS( D( END+1 ) ) ) + IF( ABS( E( END ) ).GT.TINY ) THEN + END = END + 1 + GO TO 20 + END IF + END IF +* +* (Sub) Problem determined. Compute its size and solve it. +* + M = END - START + 1 + IF( M.EQ.1 ) THEN + START = END + 1 + GO TO 10 + END IF + IF( M.GT.SMLSIZ ) THEN + INFO = SMLSIZ +* +* Scale. +* + ORGNRM = SLANST( 'M', M, D( START ), E( START ) ) + CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, M, 1, D( START ), M, + $ INFO ) + CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, M-1, 1, E( START ), + $ M-1, INFO ) +* + IF( ICOMPZ.EQ.1 ) THEN + STRTRW = 1 + ELSE + STRTRW = START + END IF + CALL SLAED0( ICOMPZ, N, M, D( START ), E( START ), + $ Z( STRTRW, START ), LDZ, WORK( 1 ), N, + $ WORK( STOREZ ), IWORK, INFO ) + IF( INFO.NE.0 ) THEN + INFO = ( INFO / ( M+1 )+START-1 )*( N+1 ) + + $ MOD( INFO, ( M+1 ) ) + START - 1 + RETURN + END IF +* +* Scale back. +* + CALL SLASCL( 'G', 0, 0, ONE, ORGNRM, M, 1, D( START ), M, + $ INFO ) +* + ELSE + IF( ICOMPZ.EQ.1 ) THEN +* +* Since QR won't update a Z matrix which is larger than the +* length of D, we must solve the sub-problem in a workspace and +* then multiply back into Z. +* + CALL SSTEQR( 'I', M, D( START ), E( START ), WORK, M, + $ WORK( M*M+1 ), INFO ) + CALL SLACPY( 'A', N, M, Z( 1, START ), LDZ, + $ WORK( STOREZ ), N ) + CALL SGEMM( 'N', 'N', N, M, M, ONE, WORK( STOREZ ), LDZ, + $ WORK, M, ZERO, Z( 1, START ), LDZ ) + ELSE IF( ICOMPZ.EQ.2 ) THEN + CALL SSTEQR( 'I', M, D( START ), E( START ), + $ Z( START, START ), LDZ, WORK, INFO ) + ELSE + CALL SSTERF( M, D( START ), E( START ), INFO ) + END IF + IF( INFO.NE.0 ) THEN + INFO = START*( N+1 ) + END + RETURN + END IF + END IF +* + START = END + 1 + GO TO 10 + END IF +* +* endwhile +* +* If the problem split any number of times, then the eigenvalues +* will not be properly ordered. Here we permute the eigenvalues +* (and the associated eigenvectors) into ascending order. +* + IF( M.NE.N ) THEN + IF( ICOMPZ.EQ.0 ) THEN +* +* Use Quick Sort +* + CALL SLASRT( 'I', N, D, INFO ) +* + ELSE +* +* Use Selection Sort to minimize swaps of eigenvectors +* + DO 40 II = 2, N + I = II - 1 + K = I + P = D( I ) + DO 30 J = II, N + IF( D( J ).LT.P ) THEN + K = J + P = D( J ) + END IF + 30 CONTINUE + IF( K.NE.I ) THEN + D( K ) = D( I ) + D( I ) = P + CALL SSWAP( N, Z( 1, I ), 1, Z( 1, K ), 1 ) + END IF + 40 CONTINUE + END IF + END IF +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of SSTEDC +* + END diff --git a/costa/native/external/lapack/sstegr.f b/costa/native/external/lapack/sstegr.f new file mode 100644 index 000000000..3df5627e0 --- /dev/null +++ b/costa/native/external/lapack/sstegr.f @@ -0,0 +1,400 @@ + SUBROUTINE SSTEGR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, ABSTOL, + $ M, W, Z, LDZ, ISUPPZ, WORK, LWORK, IWORK, + $ LIWORK, INFO ) +* +* -- LAPACK computational routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE + INTEGER IL, INFO, IU, LDZ, LIWORK, LWORK, M, N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER ISUPPZ( * ), IWORK( * ) + REAL D( * ), E( * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* SSTEGR computes selected eigenvalues and, optionally, eigenvectors +* of a real symmetric tridiagonal matrix T. Eigenvalues and +* eigenvectors can be selected by specifying either a range of values +* or a range of indices for the desired eigenvalues. The eigenvalues +* are computed by the dqds algorithm, while orthogonal eigenvectors are +* computed from various ``good'' L D L^T representations (also known as +* Relatively Robust Representations). Gram-Schmidt orthogonalization is +* avoided as far as possible. More specifically, the various steps of +* the algorithm are as follows. For the i-th unreduced block of T, +* (a) Compute T - sigma_i = L_i D_i L_i^T, such that L_i D_i L_i^T +* is a relatively robust representation, +* (b) Compute the eigenvalues, lambda_j, of L_i D_i L_i^T to high +* relative accuracy by the dqds algorithm, +* (c) If there is a cluster of close eigenvalues, "choose" sigma_i +* close to the cluster, and go to step (a), +* (d) Given the approximate eigenvalue lambda_j of L_i D_i L_i^T, +* compute the corresponding eigenvector by forming a +* rank-revealing twisted factorization. +* The desired accuracy of the output can be specified by the input +* parameter ABSTOL. +* +* For more details, see "A new O(n^2) algorithm for the symmetric +* tridiagonal eigenvalue/eigenvector problem", by Inderjit Dhillon, +* Computer Science Division Technical Report No. UCB/CSD-97-971, +* UC Berkeley, May 1997. +* +* Note 1 : Currently SSTEGR is only set up to find ALL the n +* eigenvalues and eigenvectors of T in O(n^2) time +* Note 2 : Currently the routine SSTEIN is called when an appropriate +* sigma_i cannot be chosen in step (c) above. SSTEIN invokes modified +* Gram-Schmidt when eigenvalues are close. +* Note 3 : SSTEGR works only on machines which follow ieee-754 +* floating-point standard in their handling of infinities and NaNs. +* Normal execution of SSTEGR may create NaNs and infinities and hence +* may abort due to a floating point exception in environments which +* do not conform to the ieee standard. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* RANGE (input) CHARACTER*1 +* = 'A': all eigenvalues will be found. +* = 'V': all eigenvalues in the half-open interval (VL,VU] +* will be found. +* = 'I': the IL-th through IU-th eigenvalues will be found. +********** Only RANGE = 'A' is currently supported ********************* +* +* N (input) INTEGER +* The order of the matrix. N >= 0. +* +* D (input/output) REAL array, dimension (N) +* On entry, the n diagonal elements of the tridiagonal matrix +* T. On exit, D is overwritten. +* +* E (input/output) REAL array, dimension (N) +* On entry, the (n-1) subdiagonal elements of the tridiagonal +* matrix T in elements 1 to N-1 of E; E(N) need not be set. +* On exit, E is overwritten. +* +* VL (input) REAL +* VU (input) REAL +* If RANGE='V', the lower and upper bounds of the interval to +* be searched for eigenvalues. VL < VU. +* Not referenced if RANGE = 'A' or 'I'. +* +* IL (input) INTEGER +* IU (input) INTEGER +* If RANGE='I', the indices (in ascending order) of the +* smallest and largest eigenvalues to be returned. +* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +* Not referenced if RANGE = 'A' or 'V'. +* +* ABSTOL (input) REAL +* The absolute error tolerance for the +* eigenvalues/eigenvectors. IF JOBZ = 'V', the eigenvalues and +* eigenvectors output have residual norms bounded by ABSTOL, +* and the dot products between different eigenvectors are +* bounded by ABSTOL. If ABSTOL is less than N*EPS*|T|, then +* N*EPS*|T| will be used in its place, where EPS is the +* machine precision and |T| is the 1-norm of the tridiagonal +* matrix. The eigenvalues are computed to an accuracy of +* EPS*|T| irrespective of ABSTOL. If high relative accuracy +* is important, set ABSTOL to DLAMCH( 'Safe minimum' ). +* See Barlow and Demmel "Computing Accurate Eigensystems of +* Scaled Diagonally Dominant Matrices", LAPACK Working Note #7 +* for a discussion of which matrices define their eigenvalues +* to high relative accuracy. +* +* M (output) INTEGER +* The total number of eigenvalues found. 0 <= M <= N. +* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +* +* W (output) REAL array, dimension (N) +* The first M elements contain the selected eigenvalues in +* ascending order. +* +* Z (output) REAL array, dimension (LDZ, max(1,M) ) +* If JOBZ = 'V', then if INFO = 0, the first M columns of Z +* contain the orthonormal eigenvectors of the matrix T +* corresponding to the selected eigenvalues, with the i-th +* column of Z holding the eigenvector associated with W(i). +* If JOBZ = 'N', then Z is not referenced. +* Note: the user must ensure that at least max(1,M) columns are +* supplied in the array Z; if RANGE = 'V', the exact value of M +* is not known in advance and an upper bound must be used. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* ISUPPZ (output) INTEGER ARRAY, dimension ( 2*max(1,M) ) +* The support of the eigenvectors in Z, i.e., the indices +* indicating the nonzero elements in Z. The i-th eigenvector +* is nonzero only in elements ISUPPZ( 2*i-1 ) through +* ISUPPZ( 2*i ). +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal +* (and minimal) LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,18*N) +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array IWORK. LIWORK >= max(1,10*N) +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = 1, internal error in SLARRE, +* if INFO = 2, internal error in SLARRV. +* +* Further Details +* =============== +* +* Based on contributions by +* Inderjit Dhillon, IBM Almaden, USA +* Osni Marques, LBNL/NERSC, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LQUERY, VALEIG, WANTZ + INTEGER I, IBEGIN, IEND, IINDBL, IINDWK, IINFO, IINSPL, + $ INDGRS, INDWOF, INDWRK, ITMP, J, JJ, LIWMIN, + $ LWMIN, NSPLIT + REAL BIGNUM, EPS, RMAX, RMIN, SAFMIN, SCALE, SMLNUM, + $ THRESH, TMP, TNRM, TOL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANST + EXTERNAL LSAME, SLAMCH, SLANST +* .. +* .. External Subroutines .. + EXTERNAL SLARRE, SLARRV, SLASET, SSCAL, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + LQUERY = ( ( LWORK.EQ.-1 ) .OR. ( LIWORK.EQ.-1 ) ) + LWMIN = 18*N + LIWMIN = 10*N +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 +* +* The following two lines need to be removed once the +* RANGE = 'V' and RANGE = 'I' options are provided. +* + ELSE IF( VALEIG .OR. INDEIG ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN + INFO = -7 + ELSE IF( INDEIG .AND. IL.LT.1 ) THEN + INFO = -8 +* The following change should be made in DSTEVX also, otherwise +* IL can be specified as N+1 and IU as N. +* ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) THEN + ELSE IF( INDEIG .AND. ( IU.LT.IL .OR. IU.GT.N ) ) THEN + INFO = -9 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -14 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -17 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -19 + END IF + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSTEGR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = D( 1 ) + ELSE + IF( VL.LT.D( 1 ) .AND. VU.GE.D( 1 ) ) THEN + M = 1 + W( 1 ) = D( 1 ) + END IF + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + SCALE = ONE + TNRM = SLANST( 'M', N, D, E ) + IF( TNRM.GT.ZERO .AND. TNRM.LT.RMIN ) THEN + SCALE = RMIN / TNRM + ELSE IF( TNRM.GT.RMAX ) THEN + SCALE = RMAX / TNRM + END IF + IF( SCALE.NE.ONE ) THEN + CALL SSCAL( N, SCALE, D, 1 ) + CALL SSCAL( N-1, SCALE, E, 1 ) + TNRM = TNRM*SCALE + END IF + INDGRS = 1 + INDWOF = 2*N + 1 + INDWRK = 3*N + 1 +* + IINSPL = 1 + IINDBL = N + 1 + IINDWK = 2*N + 1 +* + CALL SLASET( 'Full', N, N, ZERO, ZERO, Z, LDZ ) +* +* Compute the desired eigenvalues of the tridiagonal after splitting +* into smaller subblocks if the corresponding of-diagonal elements +* are small +* + THRESH = EPS*TNRM + CALL SLARRE( N, D, E, THRESH, NSPLIT, IWORK( IINSPL ), M, W, + $ WORK( INDWOF ), WORK( INDGRS ), WORK( INDWRK ), + $ IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = 1 + RETURN + END IF +* + IF( WANTZ ) THEN +* +* Compute the desired eigenvectors corresponding to the computed +* eigenvalues +* + TOL = MAX( ABSTOL, REAL( N )*THRESH ) + IBEGIN = 1 + DO 20 I = 1, NSPLIT + IEND = IWORK( IINSPL+I-1 ) + DO 10 J = IBEGIN, IEND + IWORK( IINDBL+J-1 ) = I + 10 CONTINUE + IBEGIN = IEND + 1 + 20 CONTINUE +* + CALL SLARRV( N, D, E, IWORK( IINSPL ), M, W, IWORK( IINDBL ), + $ WORK( INDGRS ), TOL, Z, LDZ, ISUPPZ, + $ WORK( INDWRK ), IWORK( IINDWK ), IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = 2 + RETURN + END IF +* + END IF +* + IBEGIN = 1 + DO 40 I = 1, NSPLIT + IEND = IWORK( IINSPL+I-1 ) + DO 30 J = IBEGIN, IEND + W( J ) = W( J ) + WORK( INDWOF+I-1 ) + 30 CONTINUE + IBEGIN = IEND + 1 + 40 CONTINUE +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( SCALE.NE.ONE ) THEN + CALL SSCAL( M, ONE / SCALE, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( NSPLIT.GT.1 ) THEN + DO 60 J = 1, M - 1 + I = 0 + TMP = W( J ) + DO 50 JJ = J + 1, M + IF( W( JJ ).LT.TMP ) THEN + I = JJ + TMP = W( JJ ) + END IF + 50 CONTINUE + IF( I.NE.0 ) THEN + W( I ) = W( J ) + W( J ) = TMP + IF( WANTZ ) THEN + CALL SSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + ITMP = ISUPPZ( 2*I-1 ) + ISUPPZ( 2*I-1 ) = ISUPPZ( 2*J-1 ) + ISUPPZ( 2*J-1 ) = ITMP + ITMP = ISUPPZ( 2*I ) + ISUPPZ( 2*I ) = ISUPPZ( 2*J ) + ISUPPZ( 2*J ) = ITMP + END IF + END IF + 60 CONTINUE + END IF +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of SSTEGR +* + END diff --git a/costa/native/external/lapack/sstein.f b/costa/native/external/lapack/sstein.f new file mode 100644 index 000000000..fa6fd7ee0 --- /dev/null +++ b/costa/native/external/lapack/sstein.f @@ -0,0 +1,362 @@ + SUBROUTINE SSTEIN( N, D, E, M, W, IBLOCK, ISPLIT, Z, LDZ, WORK, + $ IWORK, IFAIL, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, LDZ, M, N +* .. +* .. Array Arguments .. + INTEGER IBLOCK( * ), IFAIL( * ), ISPLIT( * ), + $ IWORK( * ) + REAL D( * ), E( * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* SSTEIN computes the eigenvectors of a real symmetric tridiagonal +* matrix T corresponding to specified eigenvalues, using inverse +* iteration. +* +* The maximum number of iterations allowed for each eigenvector is +* specified by an internal parameter MAXITS (currently set to 5). +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix. N >= 0. +* +* D (input) REAL array, dimension (N) +* The n diagonal elements of the tridiagonal matrix T. +* +* E (input) REAL array, dimension (N) +* The (n-1) subdiagonal elements of the tridiagonal matrix +* T, in elements 1 to N-1. E(N) need not be set. +* +* M (input) INTEGER +* The number of eigenvectors to be found. 0 <= M <= N. +* +* W (input) REAL array, dimension (N) +* The first M elements of W contain the eigenvalues for +* which eigenvectors are to be computed. The eigenvalues +* should be grouped by split-off block and ordered from +* smallest to largest within the block. ( The output array +* W from SSTEBZ with ORDER = 'B' is expected here. ) +* +* IBLOCK (input) INTEGER array, dimension (N) +* The submatrix indices associated with the corresponding +* eigenvalues in W; IBLOCK(i)=1 if eigenvalue W(i) belongs to +* the first submatrix from the top, =2 if W(i) belongs to +* the second submatrix, etc. ( The output array IBLOCK +* from SSTEBZ is expected here. ) +* +* ISPLIT (input) INTEGER array, dimension (N) +* The splitting points, at which T breaks up into submatrices. +* The first submatrix consists of rows/columns 1 to +* ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 +* through ISPLIT( 2 ), etc. +* ( The output array ISPLIT from SSTEBZ is expected here. ) +* +* Z (output) REAL array, dimension (LDZ, M) +* The computed eigenvectors. The eigenvector associated +* with the eigenvalue W(i) is stored in the i-th column of +* Z. Any vector which fails to converge is set to its current +* iterate after MAXITS iterations. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= max(1,N). +* +* WORK (workspace) REAL array, dimension (5*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* IFAIL (output) INTEGER array, dimension (M) +* On normal exit, all elements of IFAIL are zero. +* If one or more eigenvectors fail to converge after +* MAXITS iterations, then their indices are stored in +* array IFAIL. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, then i eigenvectors failed to converge +* in MAXITS iterations. Their indices are stored in +* array IFAIL. +* +* Internal Parameters +* =================== +* +* MAXITS INTEGER, default = 5 +* The maximum number of iterations performed. +* +* EXTRA INTEGER, default = 2 +* The number of iterations performed after norm growth +* criterion is satisfied, should be at least 1. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TEN, ODM3, ODM1 + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TEN = 1.0E+1, + $ ODM3 = 1.0E-3, ODM1 = 1.0E-1 ) + INTEGER MAXITS, EXTRA + PARAMETER ( MAXITS = 5, EXTRA = 2 ) +* .. +* .. Local Scalars .. + INTEGER B1, BLKSIZ, BN, GPIND, I, IINFO, INDRV1, + $ INDRV2, INDRV3, INDRV4, INDRV5, ITS, J, J1, + $ JBLK, JMAX, NBLK, NRMCHK + REAL CTR, EPS, EPS1, NRM, ONENRM, ORTOL, PERTOL, + $ SCL, SEP, STPCRT, TOL, XJ, XJM +* .. +* .. Local Arrays .. + INTEGER ISEED( 4 ) +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SASUM, SDOT, SLAMCH, SNRM2 + EXTERNAL ISAMAX, SASUM, SDOT, SLAMCH, SNRM2 +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SLAGTF, SLAGTS, SLARNV, SSCAL, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + DO 10 I = 1, M + IFAIL( I ) = 0 + 10 CONTINUE +* + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( M.LT.0 .OR. M.GT.N ) THEN + INFO = -4 + ELSE IF( LDZ.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE + DO 20 J = 2, M + IF( IBLOCK( J ).LT.IBLOCK( J-1 ) ) THEN + INFO = -6 + GO TO 30 + END IF + IF( IBLOCK( J ).EQ.IBLOCK( J-1 ) .AND. W( J ).LT.W( J-1 ) ) + $ THEN + INFO = -5 + GO TO 30 + END IF + 20 CONTINUE + 30 CONTINUE + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSTEIN', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. M.EQ.0 ) THEN + RETURN + ELSE IF( N.EQ.1 ) THEN + Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + EPS = SLAMCH( 'Precision' ) +* +* Initialize seed for random number generator SLARNV. +* + DO 40 I = 1, 4 + ISEED( I ) = 1 + 40 CONTINUE +* +* Initialize pointers. +* + INDRV1 = 0 + INDRV2 = INDRV1 + N + INDRV3 = INDRV2 + N + INDRV4 = INDRV3 + N + INDRV5 = INDRV4 + N +* +* Compute eigenvectors of matrix blocks. +* + J1 = 1 + DO 160 NBLK = 1, IBLOCK( M ) +* +* Find starting and ending indices of block nblk. +* + IF( NBLK.EQ.1 ) THEN + B1 = 1 + ELSE + B1 = ISPLIT( NBLK-1 ) + 1 + END IF + BN = ISPLIT( NBLK ) + BLKSIZ = BN - B1 + 1 + IF( BLKSIZ.EQ.1 ) + $ GO TO 60 + GPIND = B1 +* +* Compute reorthogonalization criterion and stopping criterion. +* + ONENRM = ABS( D( B1 ) ) + ABS( E( B1 ) ) + ONENRM = MAX( ONENRM, ABS( D( BN ) )+ABS( E( BN-1 ) ) ) + DO 50 I = B1 + 1, BN - 1 + ONENRM = MAX( ONENRM, ABS( D( I ) )+ABS( E( I-1 ) )+ + $ ABS( E( I ) ) ) + 50 CONTINUE + ORTOL = ODM3*ONENRM +* + STPCRT = SQRT( ODM1 / BLKSIZ ) +* +* Loop through eigenvalues of block nblk. +* + 60 CONTINUE + JBLK = 0 + DO 150 J = J1, M + IF( IBLOCK( J ).NE.NBLK ) THEN + J1 = J + GO TO 160 + END IF + JBLK = JBLK + 1 + XJ = W( J ) +* +* Skip all the work if the block size is one. +* + IF( BLKSIZ.EQ.1 ) THEN + WORK( INDRV1+1 ) = ONE + GO TO 120 + END IF +* +* If eigenvalues j and j-1 are too close, add a relatively +* small perturbation. +* + IF( JBLK.GT.1 ) THEN + EPS1 = ABS( EPS*XJ ) + PERTOL = TEN*EPS1 + SEP = XJ - XJM + IF( SEP.LT.PERTOL ) + $ XJ = XJM + PERTOL + END IF +* + ITS = 0 + NRMCHK = 0 +* +* Get random starting vector. +* + CALL SLARNV( 2, ISEED, BLKSIZ, WORK( INDRV1+1 ) ) +* +* Copy the matrix T so it won't be destroyed in factorization. +* + CALL SCOPY( BLKSIZ, D( B1 ), 1, WORK( INDRV4+1 ), 1 ) + CALL SCOPY( BLKSIZ-1, E( B1 ), 1, WORK( INDRV2+2 ), 1 ) + CALL SCOPY( BLKSIZ-1, E( B1 ), 1, WORK( INDRV3+1 ), 1 ) +* +* Compute LU factors with partial pivoting ( PT = LU ) +* + TOL = ZERO + CALL SLAGTF( BLKSIZ, WORK( INDRV4+1 ), XJ, WORK( INDRV2+2 ), + $ WORK( INDRV3+1 ), TOL, WORK( INDRV5+1 ), IWORK, + $ IINFO ) +* +* Update iteration count. +* + 70 CONTINUE + ITS = ITS + 1 + IF( ITS.GT.MAXITS ) + $ GO TO 100 +* +* Normalize and scale the righthand side vector Pb. +* + SCL = BLKSIZ*ONENRM*MAX( EPS, + $ ABS( WORK( INDRV4+BLKSIZ ) ) ) / + $ SASUM( BLKSIZ, WORK( INDRV1+1 ), 1 ) + CALL SSCAL( BLKSIZ, SCL, WORK( INDRV1+1 ), 1 ) +* +* Solve the system LU = Pb. +* + CALL SLAGTS( -1, BLKSIZ, WORK( INDRV4+1 ), WORK( INDRV2+2 ), + $ WORK( INDRV3+1 ), WORK( INDRV5+1 ), IWORK, + $ WORK( INDRV1+1 ), TOL, IINFO ) +* +* Reorthogonalize by modified Gram-Schmidt if eigenvalues are +* close enough. +* + IF( JBLK.EQ.1 ) + $ GO TO 90 + IF( ABS( XJ-XJM ).GT.ORTOL ) + $ GPIND = J + IF( GPIND.NE.J ) THEN + DO 80 I = GPIND, J - 1 + CTR = -SDOT( BLKSIZ, WORK( INDRV1+1 ), 1, Z( B1, I ), + $ 1 ) + CALL SAXPY( BLKSIZ, CTR, Z( B1, I ), 1, + $ WORK( INDRV1+1 ), 1 ) + 80 CONTINUE + END IF +* +* Check the infinity norm of the iterate. +* + 90 CONTINUE + JMAX = ISAMAX( BLKSIZ, WORK( INDRV1+1 ), 1 ) + NRM = ABS( WORK( INDRV1+JMAX ) ) +* +* Continue for additional iterations after norm reaches +* stopping criterion. +* + IF( NRM.LT.STPCRT ) + $ GO TO 70 + NRMCHK = NRMCHK + 1 + IF( NRMCHK.LT.EXTRA+1 ) + $ GO TO 70 +* + GO TO 110 +* +* If stopping criterion was not satisfied, update info and +* store eigenvector number in array ifail. +* + 100 CONTINUE + INFO = INFO + 1 + IFAIL( INFO ) = J +* +* Accept iterate as jth eigenvector. +* + 110 CONTINUE + SCL = ONE / SNRM2( BLKSIZ, WORK( INDRV1+1 ), 1 ) + JMAX = ISAMAX( BLKSIZ, WORK( INDRV1+1 ), 1 ) + IF( WORK( INDRV1+JMAX ).LT.ZERO ) + $ SCL = -SCL + CALL SSCAL( BLKSIZ, SCL, WORK( INDRV1+1 ), 1 ) + 120 CONTINUE + DO 130 I = 1, N + Z( I, J ) = ZERO + 130 CONTINUE + DO 140 I = 1, BLKSIZ + Z( B1+I-1, J ) = WORK( INDRV1+I ) + 140 CONTINUE +* +* Save the shift to check eigenvalue spacing at next +* iteration. +* + XJM = XJ +* + 150 CONTINUE + 160 CONTINUE +* + RETURN +* +* End of SSTEIN +* + END diff --git a/costa/native/external/lapack/ssteqr.f b/costa/native/external/lapack/ssteqr.f new file mode 100644 index 000000000..c6bb03ac4 --- /dev/null +++ b/costa/native/external/lapack/ssteqr.f @@ -0,0 +1,501 @@ + SUBROUTINE SSTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER COMPZ + INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. + REAL D( * ), E( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* SSTEQR computes all eigenvalues and, optionally, eigenvectors of a +* symmetric tridiagonal matrix using the implicit QL or QR method. +* The eigenvectors of a full or band symmetric matrix can also be found +* if SSYTRD or SSPTRD or SSBTRD has been used to reduce this matrix to +* tridiagonal form. +* +* Arguments +* ========= +* +* COMPZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only. +* = 'V': Compute eigenvalues and eigenvectors of the original +* symmetric matrix. On entry, Z must contain the +* orthogonal matrix used to reduce the original matrix +* to tridiagonal form. +* = 'I': Compute eigenvalues and eigenvectors of the +* tridiagonal matrix. Z is initialized to the identity +* matrix. +* +* N (input) INTEGER +* The order of the matrix. N >= 0. +* +* D (input/output) REAL array, dimension (N) +* On entry, the diagonal elements of the tridiagonal matrix. +* On exit, if INFO = 0, the eigenvalues in ascending order. +* +* E (input/output) REAL array, dimension (N-1) +* On entry, the (n-1) subdiagonal elements of the tridiagonal +* matrix. +* On exit, E has been destroyed. +* +* Z (input/output) REAL array, dimension (LDZ, N) +* On entry, if COMPZ = 'V', then Z contains the orthogonal +* matrix used in the reduction to tridiagonal form. +* On exit, if INFO = 0, then if COMPZ = 'V', Z contains the +* orthonormal eigenvectors of the original symmetric matrix, +* and if COMPZ = 'I', Z contains the orthonormal eigenvectors +* of the symmetric tridiagonal matrix. +* If COMPZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* eigenvectors are desired, then LDZ >= max(1,N). +* +* WORK (workspace) REAL array, dimension (max(1,2*N-2)) +* If COMPZ = 'N', then WORK is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: the algorithm has failed to find all the eigenvalues in +* a total of 30*N iterations; if INFO = i, then i +* elements of E have not converged to zero; on exit, D +* and E contain the elements of a symmetric tridiagonal +* matrix which is orthogonally similar to the original +* matrix. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO, THREE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0, + $ THREE = 3.0E0 ) + INTEGER MAXIT + PARAMETER ( MAXIT = 30 ) +* .. +* .. Local Scalars .. + INTEGER I, ICOMPZ, II, ISCALE, J, JTOT, K, L, L1, LEND, + $ LENDM1, LENDP1, LENDSV, LM1, LSV, M, MM, MM1, + $ NM1, NMAXIT + REAL ANORM, B, C, EPS, EPS2, F, G, P, R, RT1, RT2, + $ S, SAFMAX, SAFMIN, SSFMAX, SSFMIN, TST +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANST, SLAPY2 + EXTERNAL LSAME, SLAMCH, SLANST, SLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL SLAE2, SLAEV2, SLARTG, SLASCL, SLASET, SLASR, + $ SLASRT, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ICOMPZ = 0 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ICOMPZ = 2 + ELSE + ICOMPZ = -1 + END IF + IF( ICOMPZ.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ( LDZ.LT.1 ) .OR. ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1, + $ N ) ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSTEQR', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ICOMPZ.EQ.2 ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Determine the unit roundoff and over/underflow thresholds. +* + EPS = SLAMCH( 'E' ) + EPS2 = EPS**2 + SAFMIN = SLAMCH( 'S' ) + SAFMAX = ONE / SAFMIN + SSFMAX = SQRT( SAFMAX ) / THREE + SSFMIN = SQRT( SAFMIN ) / EPS2 +* +* Compute the eigenvalues and eigenvectors of the tridiagonal +* matrix. +* + IF( ICOMPZ.EQ.2 ) + $ CALL SLASET( 'Full', N, N, ZERO, ONE, Z, LDZ ) +* + NMAXIT = N*MAXIT + JTOT = 0 +* +* Determine where the matrix splits and choose QL or QR iteration +* for each block, according to whether top or bottom diagonal +* element is smaller. +* + L1 = 1 + NM1 = N - 1 +* + 10 CONTINUE + IF( L1.GT.N ) + $ GO TO 160 + IF( L1.GT.1 ) + $ E( L1-1 ) = ZERO + IF( L1.LE.NM1 ) THEN + DO 20 M = L1, NM1 + TST = ABS( E( M ) ) + IF( TST.EQ.ZERO ) + $ GO TO 30 + IF( TST.LE.( SQRT( ABS( D( M ) ) )*SQRT( ABS( D( M+ + $ 1 ) ) ) )*EPS ) THEN + E( M ) = ZERO + GO TO 30 + END IF + 20 CONTINUE + END IF + M = N +* + 30 CONTINUE + L = L1 + LSV = L + LEND = M + LENDSV = LEND + L1 = M + 1 + IF( LEND.EQ.L ) + $ GO TO 10 +* +* Scale submatrix in rows and columns L to LEND +* + ANORM = SLANST( 'I', LEND-L+1, D( L ), E( L ) ) + ISCALE = 0 + IF( ANORM.EQ.ZERO ) + $ GO TO 10 + IF( ANORM.GT.SSFMAX ) THEN + ISCALE = 1 + CALL SLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L+1, 1, D( L ), N, + $ INFO ) + CALL SLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L, 1, E( L ), N, + $ INFO ) + ELSE IF( ANORM.LT.SSFMIN ) THEN + ISCALE = 2 + CALL SLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L+1, 1, D( L ), N, + $ INFO ) + CALL SLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L, 1, E( L ), N, + $ INFO ) + END IF +* +* Choose between QL and QR iteration +* + IF( ABS( D( LEND ) ).LT.ABS( D( L ) ) ) THEN + LEND = LSV + L = LENDSV + END IF +* + IF( LEND.GT.L ) THEN +* +* QL Iteration +* +* Look for small subdiagonal element. +* + 40 CONTINUE + IF( L.NE.LEND ) THEN + LENDM1 = LEND - 1 + DO 50 M = L, LENDM1 + TST = ABS( E( M ) )**2 + IF( TST.LE.( EPS2*ABS( D( M ) ) )*ABS( D( M+1 ) )+ + $ SAFMIN )GO TO 60 + 50 CONTINUE + END IF +* + M = LEND +* + 60 CONTINUE + IF( M.LT.LEND ) + $ E( M ) = ZERO + P = D( L ) + IF( M.EQ.L ) + $ GO TO 80 +* +* If remaining matrix is 2-by-2, use SLAE2 or SLAEV2 +* to compute its eigensystem. +* + IF( M.EQ.L+1 ) THEN + IF( ICOMPZ.GT.0 ) THEN + CALL SLAEV2( D( L ), E( L ), D( L+1 ), RT1, RT2, C, S ) + WORK( L ) = C + WORK( N-1+L ) = S + CALL SLASR( 'R', 'V', 'B', N, 2, WORK( L ), + $ WORK( N-1+L ), Z( 1, L ), LDZ ) + ELSE + CALL SLAE2( D( L ), E( L ), D( L+1 ), RT1, RT2 ) + END IF + D( L ) = RT1 + D( L+1 ) = RT2 + E( L ) = ZERO + L = L + 2 + IF( L.LE.LEND ) + $ GO TO 40 + GO TO 140 + END IF +* + IF( JTOT.EQ.NMAXIT ) + $ GO TO 140 + JTOT = JTOT + 1 +* +* Form shift. +* + G = ( D( L+1 )-P ) / ( TWO*E( L ) ) + R = SLAPY2( G, ONE ) + G = D( M ) - P + ( E( L ) / ( G+SIGN( R, G ) ) ) +* + S = ONE + C = ONE + P = ZERO +* +* Inner loop +* + MM1 = M - 1 + DO 70 I = MM1, L, -1 + F = S*E( I ) + B = C*E( I ) + CALL SLARTG( G, F, C, S, R ) + IF( I.NE.M-1 ) + $ E( I+1 ) = R + G = D( I+1 ) - P + R = ( D( I )-G )*S + TWO*C*B + P = S*R + D( I+1 ) = G + P + G = C*R - B +* +* If eigenvectors are desired, then save rotations. +* + IF( ICOMPZ.GT.0 ) THEN + WORK( I ) = C + WORK( N-1+I ) = -S + END IF +* + 70 CONTINUE +* +* If eigenvectors are desired, then apply saved rotations. +* + IF( ICOMPZ.GT.0 ) THEN + MM = M - L + 1 + CALL SLASR( 'R', 'V', 'B', N, MM, WORK( L ), WORK( N-1+L ), + $ Z( 1, L ), LDZ ) + END IF +* + D( L ) = D( L ) - P + E( L ) = G + GO TO 40 +* +* Eigenvalue found. +* + 80 CONTINUE + D( L ) = P +* + L = L + 1 + IF( L.LE.LEND ) + $ GO TO 40 + GO TO 140 +* + ELSE +* +* QR Iteration +* +* Look for small superdiagonal element. +* + 90 CONTINUE + IF( L.NE.LEND ) THEN + LENDP1 = LEND + 1 + DO 100 M = L, LENDP1, -1 + TST = ABS( E( M-1 ) )**2 + IF( TST.LE.( EPS2*ABS( D( M ) ) )*ABS( D( M-1 ) )+ + $ SAFMIN )GO TO 110 + 100 CONTINUE + END IF +* + M = LEND +* + 110 CONTINUE + IF( M.GT.LEND ) + $ E( M-1 ) = ZERO + P = D( L ) + IF( M.EQ.L ) + $ GO TO 130 +* +* If remaining matrix is 2-by-2, use SLAE2 or SLAEV2 +* to compute its eigensystem. +* + IF( M.EQ.L-1 ) THEN + IF( ICOMPZ.GT.0 ) THEN + CALL SLAEV2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2, C, S ) + WORK( M ) = C + WORK( N-1+M ) = S + CALL SLASR( 'R', 'V', 'F', N, 2, WORK( M ), + $ WORK( N-1+M ), Z( 1, L-1 ), LDZ ) + ELSE + CALL SLAE2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2 ) + END IF + D( L-1 ) = RT1 + D( L ) = RT2 + E( L-1 ) = ZERO + L = L - 2 + IF( L.GE.LEND ) + $ GO TO 90 + GO TO 140 + END IF +* + IF( JTOT.EQ.NMAXIT ) + $ GO TO 140 + JTOT = JTOT + 1 +* +* Form shift. +* + G = ( D( L-1 )-P ) / ( TWO*E( L-1 ) ) + R = SLAPY2( G, ONE ) + G = D( M ) - P + ( E( L-1 ) / ( G+SIGN( R, G ) ) ) +* + S = ONE + C = ONE + P = ZERO +* +* Inner loop +* + LM1 = L - 1 + DO 120 I = M, LM1 + F = S*E( I ) + B = C*E( I ) + CALL SLARTG( G, F, C, S, R ) + IF( I.NE.M ) + $ E( I-1 ) = R + G = D( I ) - P + R = ( D( I+1 )-G )*S + TWO*C*B + P = S*R + D( I ) = G + P + G = C*R - B +* +* If eigenvectors are desired, then save rotations. +* + IF( ICOMPZ.GT.0 ) THEN + WORK( I ) = C + WORK( N-1+I ) = S + END IF +* + 120 CONTINUE +* +* If eigenvectors are desired, then apply saved rotations. +* + IF( ICOMPZ.GT.0 ) THEN + MM = L - M + 1 + CALL SLASR( 'R', 'V', 'F', N, MM, WORK( M ), WORK( N-1+M ), + $ Z( 1, M ), LDZ ) + END IF +* + D( L ) = D( L ) - P + E( LM1 ) = G + GO TO 90 +* +* Eigenvalue found. +* + 130 CONTINUE + D( L ) = P +* + L = L - 1 + IF( L.GE.LEND ) + $ GO TO 90 + GO TO 140 +* + END IF +* +* Undo scaling if necessary +* + 140 CONTINUE + IF( ISCALE.EQ.1 ) THEN + CALL SLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV+1, 1, + $ D( LSV ), N, INFO ) + CALL SLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV, 1, E( LSV ), + $ N, INFO ) + ELSE IF( ISCALE.EQ.2 ) THEN + CALL SLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV+1, 1, + $ D( LSV ), N, INFO ) + CALL SLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV, 1, E( LSV ), + $ N, INFO ) + END IF +* +* Check for no convergence to an eigenvalue after a total +* of N*MAXIT iterations. +* + IF( JTOT.LT.NMAXIT ) + $ GO TO 10 + DO 150 I = 1, N - 1 + IF( E( I ).NE.ZERO ) + $ INFO = INFO + 1 + 150 CONTINUE + GO TO 190 +* +* Order eigenvalues and eigenvectors. +* + 160 CONTINUE + IF( ICOMPZ.EQ.0 ) THEN +* +* Use Quick Sort +* + CALL SLASRT( 'I', N, D, INFO ) +* + ELSE +* +* Use Selection Sort to minimize swaps of eigenvectors +* + DO 180 II = 2, N + I = II - 1 + K = I + P = D( I ) + DO 170 J = II, N + IF( D( J ).LT.P ) THEN + K = J + P = D( J ) + END IF + 170 CONTINUE + IF( K.NE.I ) THEN + D( K ) = D( I ) + D( I ) = P + CALL SSWAP( N, Z( 1, I ), 1, Z( 1, K ), 1 ) + END IF + 180 CONTINUE + END IF +* + 190 CONTINUE + RETURN +* +* End of SSTEQR +* + END diff --git a/costa/native/external/lapack/ssterf.f b/costa/native/external/lapack/ssterf.f new file mode 100644 index 000000000..439c81e68 --- /dev/null +++ b/costa/native/external/lapack/ssterf.f @@ -0,0 +1,365 @@ + SUBROUTINE SSTERF( N, D, E, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, N +* .. +* .. Array Arguments .. + REAL D( * ), E( * ) +* .. +* +* Purpose +* ======= +* +* SSTERF computes all eigenvalues of a symmetric tridiagonal matrix +* using the Pal-Walker-Kahan variant of the QL or QR algorithm. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix. N >= 0. +* +* D (input/output) REAL array, dimension (N) +* On entry, the n diagonal elements of the tridiagonal matrix. +* On exit, if INFO = 0, the eigenvalues in ascending order. +* +* E (input/output) REAL array, dimension (N-1) +* On entry, the (n-1) subdiagonal elements of the tridiagonal +* matrix. +* On exit, E has been destroyed. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: the algorithm failed to find all of the eigenvalues in +* a total of 30*N iterations; if INFO = i, then i +* elements of E have not converged to zero. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO, THREE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0, + $ THREE = 3.0E0 ) + INTEGER MAXIT + PARAMETER ( MAXIT = 30 ) +* .. +* .. Local Scalars .. + INTEGER I, ISCALE, JTOT, L, L1, LEND, LENDSV, LSV, M, + $ NMAXIT + REAL ALPHA, ANORM, BB, C, EPS, EPS2, GAMMA, OLDC, + $ OLDGAM, P, R, RT1, RT2, RTE, S, SAFMAX, SAFMIN, + $ SIGMA, SSFMAX, SSFMIN +* .. +* .. External Functions .. + REAL SLAMCH, SLANST, SLAPY2 + EXTERNAL SLAMCH, SLANST, SLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL SLAE2, SLASCL, SLASRT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.LT.0 ) THEN + INFO = -1 + CALL XERBLA( 'SSTERF', -INFO ) + RETURN + END IF + IF( N.LE.1 ) + $ RETURN +* +* Determine the unit roundoff for this environment. +* + EPS = SLAMCH( 'E' ) + EPS2 = EPS**2 + SAFMIN = SLAMCH( 'S' ) + SAFMAX = ONE / SAFMIN + SSFMAX = SQRT( SAFMAX ) / THREE + SSFMIN = SQRT( SAFMIN ) / EPS2 +* +* Compute the eigenvalues of the tridiagonal matrix. +* + NMAXIT = N*MAXIT + SIGMA = ZERO + JTOT = 0 +* +* Determine where the matrix splits and choose QL or QR iteration +* for each block, according to whether top or bottom diagonal +* element is smaller. +* + L1 = 1 +* + 10 CONTINUE + IF( L1.GT.N ) + $ GO TO 170 + IF( L1.GT.1 ) + $ E( L1-1 ) = ZERO + DO 20 M = L1, N - 1 + IF( ABS( E( M ) ).LE.( SQRT( ABS( D( M ) ) )* + $ SQRT( ABS( D( M+1 ) ) ) )*EPS ) THEN + E( M ) = ZERO + GO TO 30 + END IF + 20 CONTINUE + M = N +* + 30 CONTINUE + L = L1 + LSV = L + LEND = M + LENDSV = LEND + L1 = M + 1 + IF( LEND.EQ.L ) + $ GO TO 10 +* +* Scale submatrix in rows and columns L to LEND +* + ANORM = SLANST( 'I', LEND-L+1, D( L ), E( L ) ) + ISCALE = 0 + IF( ANORM.GT.SSFMAX ) THEN + ISCALE = 1 + CALL SLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L+1, 1, D( L ), N, + $ INFO ) + CALL SLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L, 1, E( L ), N, + $ INFO ) + ELSE IF( ANORM.LT.SSFMIN ) THEN + ISCALE = 2 + CALL SLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L+1, 1, D( L ), N, + $ INFO ) + CALL SLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L, 1, E( L ), N, + $ INFO ) + END IF +* + DO 40 I = L, LEND - 1 + E( I ) = E( I )**2 + 40 CONTINUE +* +* Choose between QL and QR iteration +* + IF( ABS( D( LEND ) ).LT.ABS( D( L ) ) ) THEN + LEND = LSV + L = LENDSV + END IF +* + IF( LEND.GE.L ) THEN +* +* QL Iteration +* +* Look for small subdiagonal element. +* + 50 CONTINUE + IF( L.NE.LEND ) THEN + DO 60 M = L, LEND - 1 + IF( ABS( E( M ) ).LE.EPS2*ABS( D( M )*D( M+1 ) ) ) + $ GO TO 70 + 60 CONTINUE + END IF + M = LEND +* + 70 CONTINUE + IF( M.LT.LEND ) + $ E( M ) = ZERO + P = D( L ) + IF( M.EQ.L ) + $ GO TO 90 +* +* If remaining matrix is 2 by 2, use SLAE2 to compute its +* eigenvalues. +* + IF( M.EQ.L+1 ) THEN + RTE = SQRT( E( L ) ) + CALL SLAE2( D( L ), RTE, D( L+1 ), RT1, RT2 ) + D( L ) = RT1 + D( L+1 ) = RT2 + E( L ) = ZERO + L = L + 2 + IF( L.LE.LEND ) + $ GO TO 50 + GO TO 150 + END IF +* + IF( JTOT.EQ.NMAXIT ) + $ GO TO 150 + JTOT = JTOT + 1 +* +* Form shift. +* + RTE = SQRT( E( L ) ) + SIGMA = ( D( L+1 )-P ) / ( TWO*RTE ) + R = SLAPY2( SIGMA, ONE ) + SIGMA = P - ( RTE / ( SIGMA+SIGN( R, SIGMA ) ) ) +* + C = ONE + S = ZERO + GAMMA = D( M ) - SIGMA + P = GAMMA*GAMMA +* +* Inner loop +* + DO 80 I = M - 1, L, -1 + BB = E( I ) + R = P + BB + IF( I.NE.M-1 ) + $ E( I+1 ) = S*R + OLDC = C + C = P / R + S = BB / R + OLDGAM = GAMMA + ALPHA = D( I ) + GAMMA = C*( ALPHA-SIGMA ) - S*OLDGAM + D( I+1 ) = OLDGAM + ( ALPHA-GAMMA ) + IF( C.NE.ZERO ) THEN + P = ( GAMMA*GAMMA ) / C + ELSE + P = OLDC*BB + END IF + 80 CONTINUE +* + E( L ) = S*P + D( L ) = SIGMA + GAMMA + GO TO 50 +* +* Eigenvalue found. +* + 90 CONTINUE + D( L ) = P +* + L = L + 1 + IF( L.LE.LEND ) + $ GO TO 50 + GO TO 150 +* + ELSE +* +* QR Iteration +* +* Look for small superdiagonal element. +* + 100 CONTINUE + DO 110 M = L, LEND + 1, -1 + IF( ABS( E( M-1 ) ).LE.EPS2*ABS( D( M )*D( M-1 ) ) ) + $ GO TO 120 + 110 CONTINUE + M = LEND +* + 120 CONTINUE + IF( M.GT.LEND ) + $ E( M-1 ) = ZERO + P = D( L ) + IF( M.EQ.L ) + $ GO TO 140 +* +* If remaining matrix is 2 by 2, use SLAE2 to compute its +* eigenvalues. +* + IF( M.EQ.L-1 ) THEN + RTE = SQRT( E( L-1 ) ) + CALL SLAE2( D( L ), RTE, D( L-1 ), RT1, RT2 ) + D( L ) = RT1 + D( L-1 ) = RT2 + E( L-1 ) = ZERO + L = L - 2 + IF( L.GE.LEND ) + $ GO TO 100 + GO TO 150 + END IF +* + IF( JTOT.EQ.NMAXIT ) + $ GO TO 150 + JTOT = JTOT + 1 +* +* Form shift. +* + RTE = SQRT( E( L-1 ) ) + SIGMA = ( D( L-1 )-P ) / ( TWO*RTE ) + R = SLAPY2( SIGMA, ONE ) + SIGMA = P - ( RTE / ( SIGMA+SIGN( R, SIGMA ) ) ) +* + C = ONE + S = ZERO + GAMMA = D( M ) - SIGMA + P = GAMMA*GAMMA +* +* Inner loop +* + DO 130 I = M, L - 1 + BB = E( I ) + R = P + BB + IF( I.NE.M ) + $ E( I-1 ) = S*R + OLDC = C + C = P / R + S = BB / R + OLDGAM = GAMMA + ALPHA = D( I+1 ) + GAMMA = C*( ALPHA-SIGMA ) - S*OLDGAM + D( I ) = OLDGAM + ( ALPHA-GAMMA ) + IF( C.NE.ZERO ) THEN + P = ( GAMMA*GAMMA ) / C + ELSE + P = OLDC*BB + END IF + 130 CONTINUE +* + E( L-1 ) = S*P + D( L ) = SIGMA + GAMMA + GO TO 100 +* +* Eigenvalue found. +* + 140 CONTINUE + D( L ) = P +* + L = L - 1 + IF( L.GE.LEND ) + $ GO TO 100 + GO TO 150 +* + END IF +* +* Undo scaling if necessary +* + 150 CONTINUE + IF( ISCALE.EQ.1 ) + $ CALL SLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV+1, 1, + $ D( LSV ), N, INFO ) + IF( ISCALE.EQ.2 ) + $ CALL SLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV+1, 1, + $ D( LSV ), N, INFO ) +* +* Check for no convergence to an eigenvalue after a total +* of N*MAXIT iterations. +* + IF( JTOT.LT.NMAXIT ) + $ GO TO 10 + DO 160 I = 1, N - 1 + IF( E( I ).NE.ZERO ) + $ INFO = INFO + 1 + 160 CONTINUE + GO TO 180 +* +* Sort eigenvalues in increasing order. +* + 170 CONTINUE + CALL SLASRT( 'I', N, D, INFO ) +* + 180 CONTINUE + RETURN +* +* End of SSTERF +* + END diff --git a/costa/native/external/lapack/sstev.f b/costa/native/external/lapack/sstev.f new file mode 100644 index 000000000..c53d2596b --- /dev/null +++ b/costa/native/external/lapack/sstev.f @@ -0,0 +1,165 @@ + SUBROUTINE SSTEV( JOBZ, N, D, E, Z, LDZ, WORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER JOBZ + INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. + REAL D( * ), E( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* SSTEV computes all eigenvalues and, optionally, eigenvectors of a +* real symmetric tridiagonal matrix A. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* N (input) INTEGER +* The order of the matrix. N >= 0. +* +* D (input/output) REAL array, dimension (N) +* On entry, the n diagonal elements of the tridiagonal matrix +* A. +* On exit, if INFO = 0, the eigenvalues in ascending order. +* +* E (input/output) REAL array, dimension (N) +* On entry, the (n-1) subdiagonal elements of the tridiagonal +* matrix A, stored in elements 1 to N-1 of E; E(N) need not +* be set, but is used by the routine. +* On exit, the contents of E are destroyed. +* +* Z (output) REAL array, dimension (LDZ, N) +* If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +* eigenvectors of the matrix A, with the i-th column of Z +* holding the eigenvector associated with D(i). +* If JOBZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace) REAL array, dimension (max(1,2*N-2)) +* If JOBZ = 'N', WORK is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the algorithm failed to converge; i +* off-diagonal elements of E did not converge to zero. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL WANTZ + INTEGER IMAX, ISCALE + REAL BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, SMLNUM, + $ TNRM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANST + EXTERNAL LSAME, SLAMCH, SLANST +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, SSTEQR, SSTERF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -6 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSTEV ', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + TNRM = SLANST( 'M', N, D, E ) + IF( TNRM.GT.ZERO .AND. TNRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / TNRM + ELSE IF( TNRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / TNRM + END IF + IF( ISCALE.EQ.1 ) THEN + CALL SSCAL( N, SIGMA, D, 1 ) + CALL SSCAL( N-1, SIGMA, E( 1 ), 1 ) + END IF +* +* For eigenvalues only, call SSTERF. For eigenvalues and +* eigenvectors, call SSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, D, E, INFO ) + ELSE + CALL SSTEQR( 'I', N, D, E, Z, LDZ, WORK, INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, D, 1 ) + END IF +* + RETURN +* +* End of SSTEV +* + END diff --git a/costa/native/external/lapack/sstevd.f b/costa/native/external/lapack/sstevd.f new file mode 100644 index 000000000..49530a1f0 --- /dev/null +++ b/costa/native/external/lapack/sstevd.f @@ -0,0 +1,217 @@ + SUBROUTINE SSTEVD( JOBZ, N, D, E, Z, LDZ, WORK, LWORK, IWORK, + $ LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ + INTEGER INFO, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL D( * ), E( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* SSTEVD computes all eigenvalues and, optionally, eigenvectors of a +* real symmetric tridiagonal matrix. If eigenvectors are desired, it +* uses a divide and conquer algorithm. +* +* The divide and conquer algorithm makes very mild assumptions about +* floating point arithmetic. It will work on machines with a guard +* digit in add/subtract, or on those binary machines without guard +* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +* Cray-2. It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* N (input) INTEGER +* The order of the matrix. N >= 0. +* +* D (input/output) REAL array, dimension (N) +* On entry, the n diagonal elements of the tridiagonal matrix +* A. +* On exit, if INFO = 0, the eigenvalues in ascending order. +* +* E (input/output) REAL array, dimension (N) +* On entry, the (n-1) subdiagonal elements of the tridiagonal +* matrix A, stored in elements 1 to N-1 of E; E(N) need not +* be set, but is used by the routine. +* On exit, the contents of E are destroyed. +* +* Z (output) REAL array, dimension (LDZ, N) +* If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +* eigenvectors of the matrix A, with the i-th column of Z +* holding the eigenvector associated with D(i). +* If JOBZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace/output) REAL array, +* dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If JOBZ = 'N' or N <= 1 then LWORK must be at least 1. +* If JOBZ = 'V' and N > 1 then LWORK must be at least +* ( 1 + 4*N + N**2 ). +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array IWORK. +* If JOBZ = 'N' or N <= 1 then LIWORK must be at least 1. +* If JOBZ = 'V' and N > 1 then LIWORK must be at least 3+5*N. +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the algorithm failed to converge; i +* off-diagonal elements of E did not converge to zero. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WANTZ + INTEGER ISCALE, LIWMIN, LWMIN + REAL BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, SMLNUM, + $ TNRM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANST + EXTERNAL LSAME, SLAMCH, SLANST +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, SSTEDC, SSTERF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + LIWMIN = 1 + LWMIN = 1 + IF( N.GT.1 .AND. WANTZ ) THEN + LWMIN = 1 + 4*N + N**2 + LIWMIN = 3 + 5*N + END IF +* + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -6 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -8 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSTEVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + TNRM = SLANST( 'M', N, D, E ) + IF( TNRM.GT.ZERO .AND. TNRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / TNRM + ELSE IF( TNRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / TNRM + END IF + IF( ISCALE.EQ.1 ) THEN + CALL SSCAL( N, SIGMA, D, 1 ) + CALL SSCAL( N-1, SIGMA, E( 1 ), 1 ) + END IF +* +* For eigenvalues only, call SSTERF. For eigenvalues and +* eigenvectors, call SSTEDC. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, D, E, INFO ) + ELSE + CALL SSTEDC( 'I', N, D, E, Z, LDZ, WORK, LWORK, IWORK, LIWORK, + $ INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) + $ CALL SSCAL( N, ONE / SIGMA, D, 1 ) +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of SSTEVD +* + END diff --git a/costa/native/external/lapack/sstevr.f b/costa/native/external/lapack/sstevr.f new file mode 100644 index 000000000..047b418f9 --- /dev/null +++ b/costa/native/external/lapack/sstevr.f @@ -0,0 +1,434 @@ + SUBROUTINE SSTEVR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, ABSTOL, + $ M, W, Z, LDZ, ISUPPZ, WORK, LWORK, IWORK, + $ LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 20, 2000 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE + INTEGER IL, INFO, IU, LDZ, LIWORK, LWORK, M, N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER ISUPPZ( * ), IWORK( * ) + REAL D( * ), E( * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* SSTEVR computes selected eigenvalues and, optionally, eigenvectors +* of a real symmetric tridiagonal matrix T. Eigenvalues and +* eigenvectors can be selected by specifying either a range of values +* or a range of indices for the desired eigenvalues. +* +* Whenever possible, SSTEVR calls SSTEGR to compute the +* eigenspectrum using Relatively Robust Representations. SSTEGR +* computes eigenvalues by the dqds algorithm, while orthogonal +* eigenvectors are computed from various "good" L D L^T representations +* (also known as Relatively Robust Representations). Gram-Schmidt +* orthogonalization is avoided as far as possible. More specifically, +* the various steps of the algorithm are as follows. For the i-th +* unreduced block of T, +* (a) Compute T - sigma_i = L_i D_i L_i^T, such that L_i D_i L_i^T +* is a relatively robust representation, +* (b) Compute the eigenvalues, lambda_j, of L_i D_i L_i^T to high +* relative accuracy by the dqds algorithm, +* (c) If there is a cluster of close eigenvalues, "choose" sigma_i +* close to the cluster, and go to step (a), +* (d) Given the approximate eigenvalue lambda_j of L_i D_i L_i^T, +* compute the corresponding eigenvector by forming a +* rank-revealing twisted factorization. +* The desired accuracy of the output can be specified by the input +* parameter ABSTOL. +* +* For more details, see "A new O(n^2) algorithm for the symmetric +* tridiagonal eigenvalue/eigenvector problem", by Inderjit Dhillon, +* Computer Science Division Technical Report No. UCB//CSD-97-971, +* UC Berkeley, May 1997. +* +* +* Note 1 : SSTEVR calls SSTEGR when the full spectrum is requested +* on machines which conform to the ieee-754 floating point standard. +* SSTEVR calls SSTEBZ and SSTEIN on non-ieee machines and +* when partial spectrum requests are made. +* +* Normal execution of SSTEGR may create NaNs and infinities and +* hence may abort due to a floating point exception in environments +* which do not handle NaNs and infinities in the ieee standard default +* manner. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* RANGE (input) CHARACTER*1 +* = 'A': all eigenvalues will be found. +* = 'V': all eigenvalues in the half-open interval (VL,VU] +* will be found. +* = 'I': the IL-th through IU-th eigenvalues will be found. +********** For RANGE = 'V' or 'I' and IU - IL < N - 1, SSTEBZ and +********** SSTEIN are called +* +* N (input) INTEGER +* The order of the matrix. N >= 0. +* +* D (input/output) REAL array, dimension (N) +* On entry, the n diagonal elements of the tridiagonal matrix +* A. +* On exit, D may be multiplied by a constant factor chosen +* to avoid over/underflow in computing the eigenvalues. +* +* E (input/output) REAL array, dimension (N) +* On entry, the (n-1) subdiagonal elements of the tridiagonal +* matrix A in elements 1 to N-1 of E; E(N) need not be set. +* On exit, E may be multiplied by a constant factor chosen +* to avoid over/underflow in computing the eigenvalues. +* +* VL (input) REAL +* VU (input) REAL +* If RANGE='V', the lower and upper bounds of the interval to +* be searched for eigenvalues. VL < VU. +* Not referenced if RANGE = 'A' or 'I'. +* +* IL (input) INTEGER +* IU (input) INTEGER +* If RANGE='I', the indices (in ascending order) of the +* smallest and largest eigenvalues to be returned. +* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +* Not referenced if RANGE = 'A' or 'V'. +* +* ABSTOL (input) REAL +* The absolute error tolerance for the eigenvalues. +* An approximate eigenvalue is accepted as converged +* when it is determined to lie in an interval [a,b] +* of width less than or equal to +* +* ABSTOL + EPS * max( |a|,|b| ) , +* +* where EPS is the machine precision. If ABSTOL is less than +* or equal to zero, then EPS*|T| will be used in its place, +* where |T| is the 1-norm of the tridiagonal matrix obtained +* by reducing A to tridiagonal form. +* +* See "Computing Small Singular Values of Bidiagonal Matrices +* with Guaranteed High Relative Accuracy," by Demmel and +* Kahan, LAPACK Working Note #3. +* +* If high relative accuracy is important, set ABSTOL to +* SLAMCH( 'Safe minimum' ). Doing so will guarantee that +* eigenvalues are computed to high relative accuracy when +* possible in future releases. The current code does not +* make any guarantees about high relative accuracy, but +* future releases will. See J. Barlow and J. Demmel, +* "Computing Accurate Eigensystems of Scaled Diagonally +* Dominant Matrices", LAPACK Working Note #7, for a discussion +* of which matrices define their eigenvalues to high relative +* accuracy. +* +* M (output) INTEGER +* The total number of eigenvalues found. 0 <= M <= N. +* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +* +* W (output) REAL array, dimension (N) +* The first M elements contain the selected eigenvalues in +* ascending order. +* +* Z (output) REAL array, dimension (LDZ, max(1,M) ) +* If JOBZ = 'V', then if INFO = 0, the first M columns of Z +* contain the orthonormal eigenvectors of the matrix A +* corresponding to the selected eigenvalues, with the i-th +* column of Z holding the eigenvector associated with W(i). +* Note: the user must ensure that at least max(1,M) columns are +* supplied in the array Z; if RANGE = 'V', the exact value of M +* is not known in advance and an upper bound must be used. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* ISUPPZ (output) INTEGER array, dimension ( 2*max(1,M) ) +* The support of the eigenvectors in Z, i.e., the indices +* indicating the nonzero elements in Z. The i-th eigenvector +* is nonzero only in elements ISUPPZ( 2*i-1 ) through +* ISUPPZ( 2*i ). +********** Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1 +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal (and +* minimal) LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= 20*N. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* On exit, if INFO = 0, IWORK(1) returns the optimal (and +* minimal) LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array IWORK. LIWORK >= 10*N. +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: Internal error +* +* Further Details +* =============== +* +* Based on contributions by +* Inderjit Dhillon, IBM Almaden, USA +* Osni Marques, LBNL/NERSC, USA +* Ken Stanley, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LQUERY, VALEIG, WANTZ + CHARACTER ORDER + INTEGER I, IEEEOK, IMAX, INDIBL, INDIFL, INDISP, + $ INDIWO, ISCALE, ITMP1, J, JJ, LIWMIN, LWMIN, + $ NSPLIT + REAL BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, SMLNUM, + $ TMP1, TNRM, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANST + EXTERNAL LSAME, ILAENV, SLAMCH, SLANST +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SSCAL, SSTEBZ, SSTEGR, SSTEIN, SSTERF, + $ SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* +* Test the input parameters. +* + IEEEOK = ILAENV( 10, 'SSTEVR', 'N', 1, 2, 3, 4 ) +* + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + LQUERY = ( ( LWORK.EQ.-1 ) .OR. ( LIWORK.EQ.-1 ) ) + LWMIN = 20*N + LIWMIN = 10*N +* +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -7 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -9 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -14 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -17 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -19 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSTEVR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = D( 1 ) + ELSE + IF( VL.LT.D( 1 ) .AND. VU.GE.D( 1 ) ) THEN + M = 1 + W( 1 ) = D( 1 ) + END IF + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + VLL = VL + VUU = VU +* + TNRM = SLANST( 'M', N, D, E ) + IF( TNRM.GT.ZERO .AND. TNRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / TNRM + ELSE IF( TNRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / TNRM + END IF + IF( ISCALE.EQ.1 ) THEN + CALL SSCAL( N, SIGMA, D, 1 ) + CALL SSCAL( N-1, SIGMA, E( 1 ), 1 ) + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* If all eigenvalues are desired, then +* call SSTERF or SSTEGR. If this fails for some eigenvalue, then +* try SSTEBZ. +* +* + IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND. + $ IEEEOK.EQ.1 ) THEN + CALL SCOPY( N-1, E( 1 ), 1, WORK( 1 ), 1 ) + IF( .NOT.WANTZ ) THEN + CALL SCOPY( N, D, 1, W, 1 ) + CALL SSTERF( N, W, WORK, INFO ) + ELSE + CALL SCOPY( N, D, 1, WORK( N+1 ), 1 ) + CALL SSTEGR( JOBZ, 'A', N, WORK( N+1 ), WORK, VL, VU, IL, + $ IU, ABSTOL, M, W, Z, LDZ, ISUPPZ, + $ WORK( 2*N+1 ), LWORK-2*N, IWORK, LIWORK, INFO ) +* + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 10 + END IF + INFO = 0 + END IF +* +* Otherwise, call SSTEBZ and, if eigenvectors are desired, SSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIFL = INDISP + N + INDIWO = INDIFL + N + CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTOL, D, E, M, + $ NSPLIT, W, IWORK( INDIBL ), IWORK( INDISP ), WORK, + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL SSTEIN( N, D, E, M, W, IWORK( INDIBL ), IWORK( INDISP ), + $ Z, LDZ, WORK, IWORK( INDIWO ), IWORK( INDIFL ), + $ INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 10 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 30 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 20 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 20 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( I ) + W( I ) = W( J ) + IWORK( I ) = IWORK( J ) + W( J ) = TMP1 + IWORK( J ) = ITMP1 + CALL SSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + END IF + 30 CONTINUE + END IF +* +* Causes problems with tests 19 & 20: +* IF (wantz .and. INDEIG ) Z( 1,1) = Z(1,1) / 1.002 + .002 +* +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of SSTEVR +* + END diff --git a/costa/native/external/lapack/sstevx.f b/costa/native/external/lapack/sstevx.f new file mode 100644 index 000000000..316ea7e59 --- /dev/null +++ b/costa/native/external/lapack/sstevx.f @@ -0,0 +1,346 @@ + SUBROUTINE SSTEVX( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, ABSTOL, + $ M, W, Z, LDZ, WORK, IWORK, IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE + INTEGER IL, INFO, IU, LDZ, M, N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + REAL D( * ), E( * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* SSTEVX computes selected eigenvalues and, optionally, eigenvectors +* of a real symmetric tridiagonal matrix A. Eigenvalues and +* eigenvectors can be selected by specifying either a range of values +* or a range of indices for the desired eigenvalues. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* RANGE (input) CHARACTER*1 +* = 'A': all eigenvalues will be found. +* = 'V': all eigenvalues in the half-open interval (VL,VU] +* will be found. +* = 'I': the IL-th through IU-th eigenvalues will be found. +* +* N (input) INTEGER +* The order of the matrix. N >= 0. +* +* D (input/output) REAL array, dimension (N) +* On entry, the n diagonal elements of the tridiagonal matrix +* A. +* On exit, D may be multiplied by a constant factor chosen +* to avoid over/underflow in computing the eigenvalues. +* +* E (input/output) REAL array, dimension (N) +* On entry, the (n-1) subdiagonal elements of the tridiagonal +* matrix A in elements 1 to N-1 of E; E(N) need not be set. +* On exit, E may be multiplied by a constant factor chosen +* to avoid over/underflow in computing the eigenvalues. +* +* VL (input) REAL +* VU (input) REAL +* If RANGE='V', the lower and upper bounds of the interval to +* be searched for eigenvalues. VL < VU. +* Not referenced if RANGE = 'A' or 'I'. +* +* IL (input) INTEGER +* IU (input) INTEGER +* If RANGE='I', the indices (in ascending order) of the +* smallest and largest eigenvalues to be returned. +* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +* Not referenced if RANGE = 'A' or 'V'. +* +* ABSTOL (input) REAL +* The absolute error tolerance for the eigenvalues. +* An approximate eigenvalue is accepted as converged +* when it is determined to lie in an interval [a,b] +* of width less than or equal to +* +* ABSTOL + EPS * max( |a|,|b| ) , +* +* where EPS is the machine precision. If ABSTOL is less +* than or equal to zero, then EPS*|T| will be used in +* its place, where |T| is the 1-norm of the tridiagonal +* matrix. +* +* Eigenvalues will be computed most accurately when ABSTOL is +* set to twice the underflow threshold 2*SLAMCH('S'), not zero. +* If this routine returns with INFO>0, indicating that some +* eigenvectors did not converge, try setting ABSTOL to +* 2*SLAMCH('S'). +* +* See "Computing Small Singular Values of Bidiagonal Matrices +* with Guaranteed High Relative Accuracy," by Demmel and +* Kahan, LAPACK Working Note #3. +* +* M (output) INTEGER +* The total number of eigenvalues found. 0 <= M <= N. +* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +* +* W (output) REAL array, dimension (N) +* The first M elements contain the selected eigenvalues in +* ascending order. +* +* Z (output) REAL array, dimension (LDZ, max(1,M) ) +* If JOBZ = 'V', then if INFO = 0, the first M columns of Z +* contain the orthonormal eigenvectors of the matrix A +* corresponding to the selected eigenvalues, with the i-th +* column of Z holding the eigenvector associated with W(i). +* If an eigenvector fails to converge (INFO > 0), then that +* column of Z contains the latest approximation to the +* eigenvector, and the index of the eigenvector is returned +* in IFAIL. If JOBZ = 'N', then Z is not referenced. +* Note: the user must ensure that at least max(1,M) columns are +* supplied in the array Z; if RANGE = 'V', the exact value of M +* is not known in advance and an upper bound must be used. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace) REAL array, dimension (5*N) +* +* IWORK (workspace) INTEGER array, dimension (5*N) +* +* IFAIL (output) INTEGER array, dimension (N) +* If JOBZ = 'V', then if INFO = 0, the first M elements of +* IFAIL are zero. If INFO > 0, then IFAIL contains the +* indices of the eigenvectors that failed to converge. +* If JOBZ = 'N', then IFAIL is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, then i eigenvectors failed to converge. +* Their indices are stored in array IFAIL. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, VALEIG, WANTZ + CHARACTER ORDER + INTEGER I, IMAX, INDIBL, INDISP, INDIWO, INDWRK, + $ ISCALE, ITMP1, J, JJ, NSPLIT + REAL BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, SMLNUM, + $ TMP1, TNRM, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANST + EXTERNAL LSAME, SLAMCH, SLANST +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SSCAL, SSTEBZ, SSTEIN, SSTEQR, SSTERF, + $ SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -7 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -9 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) + $ INFO = -14 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSTEVX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = D( 1 ) + ELSE + IF( VL.LT.D( 1 ) .AND. VU.GE.D( 1 ) ) THEN + M = 1 + W( 1 ) = D( 1 ) + END IF + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + IF ( VALEIG ) THEN + VLL = VL + VUU = VU + ELSE + VLL = ZERO + VUU = ZERO + ENDIF + TNRM = SLANST( 'M', N, D, E ) + IF( TNRM.GT.ZERO .AND. TNRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / TNRM + ELSE IF( TNRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / TNRM + END IF + IF( ISCALE.EQ.1 ) THEN + CALL SSCAL( N, SIGMA, D, 1 ) + CALL SSCAL( N-1, SIGMA, E( 1 ), 1 ) + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* If all eigenvalues are desired and ABSTOL is less than zero, then +* call SSTERF or SSTEQR. If this fails for some eigenvalue, then +* try SSTEBZ. +* + IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND. + $ ( ABSTOL.LE.ZERO ) ) THEN + CALL SCOPY( N, D, 1, W, 1 ) + CALL SCOPY( N-1, E( 1 ), 1, WORK( 1 ), 1 ) + INDWRK = N + 1 + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, WORK, INFO ) + ELSE + CALL SSTEQR( 'I', N, W, WORK, Z, LDZ, WORK( INDWRK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 10 I = 1, N + IFAIL( I ) = 0 + 10 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 20 + END IF + INFO = 0 + END IF +* +* Otherwise, call SSTEBZ and, if eigenvectors are desired, SSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDWRK = 1 + INDIBL = 1 + INDISP = INDIBL + N + INDIWO = INDISP + N + CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTOL, D, E, M, + $ NSPLIT, W, IWORK( INDIBL ), IWORK( INDISP ), + $ WORK( INDWRK ), IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL SSTEIN( N, D, E, M, W, IWORK( INDIBL ), IWORK( INDISP ), + $ Z, LDZ, WORK( INDWRK ), IWORK( INDIWO ), IFAIL, + $ INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 20 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 40 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 30 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 30 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL SSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 40 CONTINUE + END IF +* + RETURN +* +* End of SSTEVX +* + END diff --git a/costa/native/external/lapack/ssycon.f b/costa/native/external/lapack/ssycon.f new file mode 100644 index 000000000..95409e786 --- /dev/null +++ b/costa/native/external/lapack/ssycon.f @@ -0,0 +1,161 @@ + SUBROUTINE SSYCON( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK, + $ IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + REAL A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SSYCON estimates the reciprocal of the condition number (in the +* 1-norm) of a real symmetric matrix A using the factorization +* A = U*D*U**T or A = L*D*L**T computed by SSYTRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the details of the factorization are stored +* as an upper or lower triangular matrix. +* = 'U': Upper triangular, form is A = U*D*U**T; +* = 'L': Lower triangular, form is A = L*D*L**T. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input) REAL array, dimension (LDA,N) +* The block diagonal matrix D and the multipliers used to +* obtain the factor U or L as computed by SSYTRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by SSYTRF. +* +* ANORM (input) REAL +* The 1-norm of the original matrix A. +* +* RCOND (output) REAL +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +* estimate of the 1-norm of inv(A) computed in this routine. +* +* WORK (workspace) REAL array, dimension (2*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, KASE + REAL AINVNM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLACON, SSYTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.LE.ZERO ) THEN + RETURN + END IF +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO 10 I = N, 1, -1 + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO 20 I = 1, N + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO ) + $ RETURN + 20 CONTINUE + END IF +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + 30 CONTINUE + CALL SLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN +* +* Multiply by inv(L*D*L') or inv(U*D*U'). +* + CALL SSYTRS( UPLO, N, 1, A, LDA, IPIV, WORK, N, INFO ) + GO TO 30 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of SSYCON +* + END diff --git a/costa/native/external/lapack/ssyev.f b/costa/native/external/lapack/ssyev.f new file mode 100644 index 000000000..19cbee06b --- /dev/null +++ b/costa/native/external/lapack/ssyev.f @@ -0,0 +1,213 @@ + SUBROUTINE SSYEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), W( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SSYEV computes all eigenvalues and, optionally, eigenvectors of a +* real symmetric matrix A. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA, N) +* On entry, the symmetric matrix A. If UPLO = 'U', the +* leading N-by-N upper triangular part of A contains the +* upper triangular part of the matrix A. If UPLO = 'L', +* the leading N-by-N lower triangular part of A contains +* the lower triangular part of the matrix A. +* On exit, if JOBZ = 'V', then if INFO = 0, A contains the +* orthonormal eigenvectors of the matrix A. +* If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') +* or the upper triangle (if UPLO='U') of A, including the +* diagonal, is destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* W (output) REAL array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of the array WORK. LWORK >= max(1,3*N-1). +* For optimal efficiency, LWORK >= (NB+2)*N, +* where NB is the blocksize for SSYTRD returned by ILAENV. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the algorithm failed to converge; i +* off-diagonal elements of an intermediate tridiagonal +* form did not converge to zero. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, IMAX, INDE, INDTAU, INDWRK, ISCALE, + $ LLWORK, LOPT, LWKOPT, NB + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANSY + EXTERNAL ILAENV, LSAME, SLAMCH, SLANSY +* .. +* .. External Subroutines .. + EXTERNAL SLASCL, SORGTR, SSCAL, SSTEQR, SSTERF, SSYTRD, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, 3*N-1 ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'SSYTRD', UPLO, N, -1, -1, -1 ) + LWKOPT = MAX( 1, ( NB+2 )*N ) + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYEV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( N.EQ.1 ) THEN + W( 1 ) = A( 1, 1 ) + WORK( 1 ) = 3 + IF( WANTZ ) + $ A( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = SLANSY( 'M', UPLO, N, A, LDA, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) + $ CALL SLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO ) +* +* Call SSYTRD to reduce symmetric matrix to tridiagonal form. +* + INDE = 1 + INDTAU = INDE + N + INDWRK = INDTAU + N + LLWORK = LWORK - INDWRK + 1 + CALL SSYTRD( UPLO, N, A, LDA, W, WORK( INDE ), WORK( INDTAU ), + $ WORK( INDWRK ), LLWORK, IINFO ) + LOPT = 2*N + WORK( INDWRK ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, first call +* SORGTR to generate the orthogonal matrix, then call SSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, WORK( INDE ), INFO ) + ELSE + CALL SORGTR( UPLO, N, A, LDA, WORK( INDTAU ), WORK( INDWRK ), + $ LLWORK, IINFO ) + CALL SSTEQR( JOBZ, N, W, WORK( INDE ), A, LDA, WORK( INDTAU ), + $ INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of SSYEV +* + END diff --git a/costa/native/external/lapack/ssyevd.f b/costa/native/external/lapack/ssyevd.f new file mode 100644 index 000000000..39e918b00 --- /dev/null +++ b/costa/native/external/lapack/ssyevd.f @@ -0,0 +1,265 @@ + SUBROUTINE SSYEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, IWORK, + $ LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDA, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), W( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SSYEVD computes all eigenvalues and, optionally, eigenvectors of a +* real symmetric matrix A. If eigenvectors are desired, it uses a +* divide and conquer algorithm. +* +* The divide and conquer algorithm makes very mild assumptions about +* floating point arithmetic. It will work on machines with a guard +* digit in add/subtract, or on those binary machines without guard +* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +* Cray-2. It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Because of large use of BLAS of level 3, SSYEVD needs N**2 more +* workspace than SSYEVX. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA, N) +* On entry, the symmetric matrix A. If UPLO = 'U', the +* leading N-by-N upper triangular part of A contains the +* upper triangular part of the matrix A. If UPLO = 'L', +* the leading N-by-N lower triangular part of A contains +* the lower triangular part of the matrix A. +* On exit, if JOBZ = 'V', then if INFO = 0, A contains the +* orthonormal eigenvectors of the matrix A. +* If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') +* or the upper triangle (if UPLO='U') of A, including the +* diagonal, is destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* W (output) REAL array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* WORK (workspace/output) REAL array, +* dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If N <= 1, LWORK must be at least 1. +* If JOBZ = 'N' and N > 1, LWORK must be at least 2*N+1. +* If JOBZ = 'V' and N > 1, LWORK must be at least +* 1 + 6*N + 2*N**2. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array IWORK. +* If N <= 1, LIWORK must be at least 1. +* If JOBZ = 'N' and N > 1, LIWORK must be at least 1. +* If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the algorithm failed to converge; i +* off-diagonal elements of an intermediate tridiagonal +* form did not converge to zero. +* +* Further Details +* =============== +* +* Based on contributions by +* Jeff Rutter, Computer Science Division, University of California +* at Berkeley, USA +* Modified by Francoise Tisseur, University of Tennessee. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. +* + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, INDE, INDTAU, INDWK2, INDWRK, ISCALE, + $ LIOPT, LIWMIN, LLWORK, LLWRK2, LOPT, LWMIN + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANSY + EXTERNAL LSAME, SLAMCH, SLANSY +* .. +* .. External Subroutines .. + EXTERNAL SLACPY, SLASCL, SORMTR, SSCAL, SSTEDC, SSTERF, + $ SSYTRD, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LIWMIN = 1 + LWMIN = 1 + LOPT = LWMIN + LIOPT = LIWMIN + ELSE + IF( WANTZ ) THEN + LIWMIN = 3 + 5*N + LWMIN = 1 + 6*N + 2*N**2 + ELSE + LIWMIN = 1 + LWMIN = 2*N + 1 + END IF + LOPT = LWMIN + LIOPT = LIWMIN + END IF + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -8 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LOPT + IWORK( 1 ) = LIOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYEVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = A( 1, 1 ) + IF( WANTZ ) + $ A( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = SLANSY( 'M', UPLO, N, A, LDA, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) + $ CALL SLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO ) +* +* Call SSYTRD to reduce symmetric matrix to tridiagonal form. +* + INDE = 1 + INDTAU = INDE + N + INDWRK = INDTAU + N + LLWORK = LWORK - INDWRK + 1 + INDWK2 = INDWRK + N*N + LLWRK2 = LWORK - INDWK2 + 1 +* + CALL SSYTRD( UPLO, N, A, LDA, W, WORK( INDE ), WORK( INDTAU ), + $ WORK( INDWRK ), LLWORK, IINFO ) + LOPT = 2*N + WORK( INDWRK ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, first call +* SSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the +* tridiagonal matrix, then call SORMTR to multiply it by the +* Householder transformations stored in A. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, WORK( INDE ), INFO ) + ELSE + CALL SSTEDC( 'I', N, W, WORK( INDE ), WORK( INDWRK ), N, + $ WORK( INDWK2 ), LLWRK2, IWORK, LIWORK, INFO ) + CALL SORMTR( 'L', UPLO, 'N', N, N, A, LDA, WORK( INDTAU ), + $ WORK( INDWRK ), N, WORK( INDWK2 ), LLWRK2, IINFO ) + CALL SLACPY( 'A', N, N, WORK( INDWRK ), N, A, LDA ) + LOPT = MAX( LOPT, 1+6*N+2*N**2 ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) + $ CALL SSCAL( N, ONE / SIGMA, W, 1 ) +* + WORK( 1 ) = LOPT + IWORK( 1 ) = LIOPT +* + RETURN +* +* End of SSYEVD +* + END diff --git a/costa/native/external/lapack/ssyevr.f b/costa/native/external/lapack/ssyevr.f new file mode 100644 index 000000000..fa2befa83 --- /dev/null +++ b/costa/native/external/lapack/ssyevr.f @@ -0,0 +1,497 @@ + SUBROUTINE SSYEVR( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, + $ ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, LWORK, + $ IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 20, 2000 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDA, LDZ, LIWORK, LWORK, M, N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER ISUPPZ( * ), IWORK( * ) + REAL A( LDA, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* SSYEVR computes selected eigenvalues and, optionally, eigenvectors +* of a real symmetric matrix T. Eigenvalues and eigenvectors can be +* selected by specifying either a range of values or a range of +* indices for the desired eigenvalues. +* +* Whenever possible, SSYEVR calls SSTEGR to compute the +* eigenspectrum using Relatively Robust Representations. SSTEGR +* computes eigenvalues by the dqds algorithm, while orthogonal +* eigenvectors are computed from various "good" L D L^T representations +* (also known as Relatively Robust Representations). Gram-Schmidt +* orthogonalization is avoided as far as possible. More specifically, +* the various steps of the algorithm are as follows. For the i-th +* unreduced block of T, +* (a) Compute T - sigma_i = L_i D_i L_i^T, such that L_i D_i L_i^T +* is a relatively robust representation, +* (b) Compute the eigenvalues, lambda_j, of L_i D_i L_i^T to high +* relative accuracy by the dqds algorithm, +* (c) If there is a cluster of close eigenvalues, "choose" sigma_i +* close to the cluster, and go to step (a), +* (d) Given the approximate eigenvalue lambda_j of L_i D_i L_i^T, +* compute the corresponding eigenvector by forming a +* rank-revealing twisted factorization. +* The desired accuracy of the output can be specified by the input +* parameter ABSTOL. +* +* For more details, see "A new O(n^2) algorithm for the symmetric +* tridiagonal eigenvalue/eigenvector problem", by Inderjit Dhillon, +* Computer Science Division Technical Report No. UCB//CSD-97-971, +* UC Berkeley, May 1997. +* +* +* Note 1 : SSYEVR calls SSTEGR when the full spectrum is requested +* on machines which conform to the ieee-754 floating point standard. +* SSYEVR calls SSTEBZ and SSTEIN on non-ieee machines and +* when partial spectrum requests are made. +* +* Normal execution of SSTEGR may create NaNs and infinities and +* hence may abort due to a floating point exception in environments +* which do not handle NaNs and infinities in the ieee standard default +* manner. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* RANGE (input) CHARACTER*1 +* = 'A': all eigenvalues will be found. +* = 'V': all eigenvalues in the half-open interval (VL,VU] +* will be found. +* = 'I': the IL-th through IU-th eigenvalues will be found. +********** For RANGE = 'V' or 'I' and IU - IL < N - 1, SSTEBZ and +********** SSTEIN are called +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA, N) +* On entry, the symmetric matrix A. If UPLO = 'U', the +* leading N-by-N upper triangular part of A contains the +* upper triangular part of the matrix A. If UPLO = 'L', +* the leading N-by-N lower triangular part of A contains +* the lower triangular part of the matrix A. +* On exit, the lower triangle (if UPLO='L') or the upper +* triangle (if UPLO='U') of A, including the diagonal, is +* destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* VL (input) REAL +* VU (input) REAL +* If RANGE='V', the lower and upper bounds of the interval to +* be searched for eigenvalues. VL < VU. +* Not referenced if RANGE = 'A' or 'I'. +* +* IL (input) INTEGER +* IU (input) INTEGER +* If RANGE='I', the indices (in ascending order) of the +* smallest and largest eigenvalues to be returned. +* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +* Not referenced if RANGE = 'A' or 'V'. +* +* ABSTOL (input) REAL +* The absolute error tolerance for the eigenvalues. +* An approximate eigenvalue is accepted as converged +* when it is determined to lie in an interval [a,b] +* of width less than or equal to +* +* ABSTOL + EPS * max( |a|,|b| ) , +* +* where EPS is the machine precision. If ABSTOL is less than +* or equal to zero, then EPS*|T| will be used in its place, +* where |T| is the 1-norm of the tridiagonal matrix obtained +* by reducing A to tridiagonal form. +* +* See "Computing Small Singular Values of Bidiagonal Matrices +* with Guaranteed High Relative Accuracy," by Demmel and +* Kahan, LAPACK Working Note #3. +* +* If high relative accuracy is important, set ABSTOL to +* SLAMCH( 'Safe minimum' ). Doing so will guarantee that +* eigenvalues are computed to high relative accuracy when +* possible in future releases. The current code does not +* make any guarantees about high relative accuracy, but +* furutre releases will. See J. Barlow and J. Demmel, +* "Computing Accurate Eigensystems of Scaled Diagonally +* Dominant Matrices", LAPACK Working Note #7, for a discussion +* of which matrices define their eigenvalues to high relative +* accuracy. +* +* M (output) INTEGER +* The total number of eigenvalues found. 0 <= M <= N. +* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +* +* W (output) REAL array, dimension (N) +* The first M elements contain the selected eigenvalues in +* ascending order. +* +* Z (output) REAL array, dimension (LDZ, max(1,M)) +* If JOBZ = 'V', then if INFO = 0, the first M columns of Z +* contain the orthonormal eigenvectors of the matrix A +* corresponding to the selected eigenvalues, with the i-th +* column of Z holding the eigenvector associated with W(i). +* If JOBZ = 'N', then Z is not referenced. +* Note: the user must ensure that at least max(1,M) columns are +* supplied in the array Z; if RANGE = 'V', the exact value of M +* is not known in advance and an upper bound must be used. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* ISUPPZ (output) INTEGER array, dimension ( 2*max(1,M) ) +* The support of the eigenvectors in Z, i.e., the indices +* indicating the nonzero elements in Z. The i-th eigenvector +* is nonzero only in elements ISUPPZ( 2*i-1 ) through +* ISUPPZ( 2*i ). +********** Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1 +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,26*N). +* For optimal efficiency, LWORK >= (NB+6)*N, +* where NB is the max of the blocksize for SSYTRD and SORMTR +* returned by ILAENV. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* On exit, if INFO = 0, IWORK(1) returns the optimal LWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array IWORK. LIWORK >= max(1,10*N). +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: Internal error +* +* Further Details +* =============== +* +* Based on contributions by +* Inderjit Dhillon, IBM Almaden, USA +* Osni Marques, LBNL/NERSC, USA +* Ken Stanley, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, VALEIG, WANTZ + CHARACTER ORDER + INTEGER I, IEEEOK, IINFO, IMAX, INDD, INDDD, INDE, + $ INDEE, INDIBL, INDIFL, INDISP, INDIWO, INDTAU, + $ INDWK, INDWKN, ISCALE, ITMP1, J, JJ, LIWMIN, + $ LLWORK, LLWRKN, LWKOPT, LWMIN, NB, NSPLIT + REAL ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANSY + EXTERNAL LSAME, ILAENV, SLAMCH, SLANSY +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SORMTR, SSCAL, SSTEBZ, SSTEGR, SSTEIN, + $ SSTERF, SSWAP, SSYTRD, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + IEEEOK = ILAENV( 10, 'SSYEVR', 'N', 1, 2, 3, 4 ) +* + LOWER = LSAME( UPLO, 'L' ) + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + LQUERY = ( ( LWORK.EQ.-1 ) .OR. ( LIWORK.EQ.-1 ) ) +* + LWMIN = MAX( 1, 26*N ) + LIWMIN = MAX( 1, 10*N ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -8 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -10 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -15 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -18 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -20 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'CHETRD', UPLO, N, -1, -1, -1 ) + NB = MAX( NB, ILAENV( 1, 'CUNMTR', UPLO, N, -1, -1, -1 ) ) + LWKOPT = MAX( ( NB+1 )*N, LWMIN ) + WORK( 1 ) = LWKOPT + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYEVR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( N.EQ.1 ) THEN + WORK( 1 ) = 7 + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = A( 1, 1 ) + ELSE + IF( VL.LT.A( 1, 1 ) .AND. VU.GE.A( 1, 1 ) ) THEN + M = 1 + W( 1 ) = A( 1, 1 ) + END IF + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + VLL = VL + VUU = VU + ANRM = SLANSY( 'M', UPLO, N, A, LDA, WORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + DO 10 J = 1, N + CALL SSCAL( N-J+1, SIGMA, A( J, J ), 1 ) + 10 CONTINUE + ELSE + DO 20 J = 1, N + CALL SSCAL( J, SIGMA, A( 1, J ), 1 ) + 20 CONTINUE + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call SSYTRD to reduce symmetric matrix to tridiagonal form. +* + INDTAU = 1 + INDE = INDTAU + N + INDD = INDE + N + INDEE = INDD + N + INDDD = INDEE + N + INDIFL = INDDD + N + INDWK = INDIFL + N + LLWORK = LWORK - INDWK + 1 + CALL SSYTRD( UPLO, N, A, LDA, WORK( INDD ), WORK( INDE ), + $ WORK( INDTAU ), WORK( INDWK ), LLWORK, IINFO ) +* +* If all eigenvalues are desired +* then call SSTERF or SSTEGR and SORMTR. +* + IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND. + $ IEEEOK.EQ.1 ) THEN + IF( .NOT.WANTZ ) THEN + CALL SCOPY( N, WORK( INDD ), 1, W, 1 ) + CALL SCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL SSTERF( N, W, WORK( INDEE ), INFO ) + ELSE + CALL SCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL SCOPY( N, WORK( INDD ), 1, WORK( INDDD ), 1 ) +* + CALL SSTEGR( JOBZ, 'A', N, WORK( INDDD ), WORK( INDEE ), + $ VL, VU, IL, IU, ABSTOL, M, W, Z, LDZ, ISUPPZ, + $ WORK( INDWK ), LWORK, IWORK, LIWORK, INFO ) +* +* +* +* Apply orthogonal matrix used in reduction to tridiagonal +* form to eigenvectors returned by SSTEIN. +* + IF( WANTZ .AND. INFO.EQ.0 ) THEN + INDWKN = INDE + LLWRKN = LWORK - INDWKN + 1 + CALL SORMTR( 'L', UPLO, 'N', N, M, A, LDA, + $ WORK( INDTAU ), Z, LDZ, WORK( INDWKN ), + $ LLWRKN, IINFO ) + END IF + END IF +* +* + IF( INFO.EQ.0 ) THEN + M = N + GO TO 30 + END IF + INFO = 0 + END IF +* +* Otherwise, call SSTEBZ and, if eigenvectors are desired, SSTEIN. +* Also call SSTEBZ and SSTEIN if SSTEGR fails. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIFL = 1 + INDIBL = INDIFL + N + INDISP = INDIBL + N + INDIWO = INDISP + N + CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ WORK( INDD ), WORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWK ), + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL SSTEIN( N, WORK( INDD ), WORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ WORK( INDWK ), IWORK( INDIWO ), IWORK( INDIFL ), + $ INFO ) +* +* Apply orthogonal matrix used in reduction to tridiagonal +* form to eigenvectors returned by SSTEIN. +* + INDWKN = INDE + LLWRKN = LWORK - INDWKN + 1 + CALL SORMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z, + $ LDZ, WORK( INDWKN ), LLWRKN, IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 30 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 50 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 40 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 40 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL SSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + END IF + 50 CONTINUE + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWKOPT + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of SSYEVR +* + END diff --git a/costa/native/external/lapack/ssyevx.f b/costa/native/external/lapack/ssyevx.f new file mode 100644 index 000000000..390d06b66 --- /dev/null +++ b/costa/native/external/lapack/ssyevx.f @@ -0,0 +1,421 @@ + SUBROUTINE SSYEVX( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, + $ ABSTOL, M, W, Z, LDZ, WORK, LWORK, IWORK, + $ IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDA, LDZ, LWORK, M, N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + REAL A( LDA, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* SSYEVX computes selected eigenvalues and, optionally, eigenvectors +* of a real symmetric matrix A. Eigenvalues and eigenvectors can be +* selected by specifying either a range of values or a range of indices +* for the desired eigenvalues. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* RANGE (input) CHARACTER*1 +* = 'A': all eigenvalues will be found. +* = 'V': all eigenvalues in the half-open interval (VL,VU] +* will be found. +* = 'I': the IL-th through IU-th eigenvalues will be found. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA, N) +* On entry, the symmetric matrix A. If UPLO = 'U', the +* leading N-by-N upper triangular part of A contains the +* upper triangular part of the matrix A. If UPLO = 'L', +* the leading N-by-N lower triangular part of A contains +* the lower triangular part of the matrix A. +* On exit, the lower triangle (if UPLO='L') or the upper +* triangle (if UPLO='U') of A, including the diagonal, is +* destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* VL (input) REAL +* VU (input) REAL +* If RANGE='V', the lower and upper bounds of the interval to +* be searched for eigenvalues. VL < VU. +* Not referenced if RANGE = 'A' or 'I'. +* +* IL (input) INTEGER +* IU (input) INTEGER +* If RANGE='I', the indices (in ascending order) of the +* smallest and largest eigenvalues to be returned. +* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +* Not referenced if RANGE = 'A' or 'V'. +* +* ABSTOL (input) REAL +* The absolute error tolerance for the eigenvalues. +* An approximate eigenvalue is accepted as converged +* when it is determined to lie in an interval [a,b] +* of width less than or equal to +* +* ABSTOL + EPS * max( |a|,|b| ) , +* +* where EPS is the machine precision. If ABSTOL is less than +* or equal to zero, then EPS*|T| will be used in its place, +* where |T| is the 1-norm of the tridiagonal matrix obtained +* by reducing A to tridiagonal form. +* +* Eigenvalues will be computed most accurately when ABSTOL is +* set to twice the underflow threshold 2*SLAMCH('S'), not zero. +* If this routine returns with INFO>0, indicating that some +* eigenvectors did not converge, try setting ABSTOL to +* 2*SLAMCH('S'). +* +* See "Computing Small Singular Values of Bidiagonal Matrices +* with Guaranteed High Relative Accuracy," by Demmel and +* Kahan, LAPACK Working Note #3. +* +* M (output) INTEGER +* The total number of eigenvalues found. 0 <= M <= N. +* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +* +* W (output) REAL array, dimension (N) +* On normal exit, the first M elements contain the selected +* eigenvalues in ascending order. +* +* Z (output) REAL array, dimension (LDZ, max(1,M)) +* If JOBZ = 'V', then if INFO = 0, the first M columns of Z +* contain the orthonormal eigenvectors of the matrix A +* corresponding to the selected eigenvalues, with the i-th +* column of Z holding the eigenvector associated with W(i). +* If an eigenvector fails to converge, then that column of Z +* contains the latest approximation to the eigenvector, and the +* index of the eigenvector is returned in IFAIL. +* If JOBZ = 'N', then Z is not referenced. +* Note: the user must ensure that at least max(1,M) columns are +* supplied in the array Z; if RANGE = 'V', the exact value of M +* is not known in advance and an upper bound must be used. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of the array WORK. LWORK >= max(1,8*N). +* For optimal efficiency, LWORK >= (NB+3)*N, +* where NB is the max of the blocksize for SSYTRD and SORMTR +* returned by ILAENV. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace) INTEGER array, dimension (5*N) +* +* IFAIL (output) INTEGER array, dimension (N) +* If JOBZ = 'V', then if INFO = 0, the first M elements of +* IFAIL are zero. If INFO > 0, then IFAIL contains the +* indices of the eigenvectors that failed to converge. +* If JOBZ = 'N', then IFAIL is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, then i eigenvectors failed to converge. +* Their indices are stored in array IFAIL. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, VALEIG, WANTZ + CHARACTER ORDER + INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, + $ INDISP, INDIWO, INDTAU, INDWKN, INDWRK, ISCALE, + $ ITMP1, J, JJ, LLWORK, LLWRKN, LOPT, LWKOPT, NB, + $ NSPLIT + REAL ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANSY + EXTERNAL LSAME, ILAENV, SLAMCH, SLANSY +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLACPY, SORGTR, SORMTR, SSCAL, SSTEBZ, + $ SSTEIN, SSTEQR, SSTERF, SSWAP, SSYTRD, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + LOWER = LSAME( UPLO, 'L' ) + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -8 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -10 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -15 + ELSE IF( LWORK.LT.MAX( 1, 8*N ) .AND. .NOT.LQUERY ) THEN + INFO = -17 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'SSYTRD', UPLO, N, -1, -1, -1 ) + NB = MAX( NB, ILAENV( 1, 'SORMTR', UPLO, N, -1, -1, -1 ) ) + LWKOPT = ( NB+3 )*N + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYEVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( N.EQ.1 ) THEN + WORK( 1 ) = 7 + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = A( 1, 1 ) + ELSE + IF( VL.LT.A( 1, 1 ) .AND. VU.GE.A( 1, 1 ) ) THEN + M = 1 + W( 1 ) = A( 1, 1 ) + END IF + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + VLL = VL + VUU = VU + ANRM = SLANSY( 'M', UPLO, N, A, LDA, WORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + DO 10 J = 1, N + CALL SSCAL( N-J+1, SIGMA, A( J, J ), 1 ) + 10 CONTINUE + ELSE + DO 20 J = 1, N + CALL SSCAL( J, SIGMA, A( 1, J ), 1 ) + 20 CONTINUE + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call SSYTRD to reduce symmetric matrix to tridiagonal form. +* + INDTAU = 1 + INDE = INDTAU + N + INDD = INDE + N + INDWRK = INDD + N + LLWORK = LWORK - INDWRK + 1 + CALL SSYTRD( UPLO, N, A, LDA, WORK( INDD ), WORK( INDE ), + $ WORK( INDTAU ), WORK( INDWRK ), LLWORK, IINFO ) + LOPT = 3*N + WORK( INDWRK ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal to +* zero, then call SSTERF or SORGTR and SSTEQR. If this fails for +* some eigenvalue, then try SSTEBZ. +* + IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND. + $ ( ABSTOL.LE.ZERO ) ) THEN + CALL SCOPY( N, WORK( INDD ), 1, W, 1 ) + INDEE = INDWRK + 2*N + IF( .NOT.WANTZ ) THEN + CALL SCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL SSTERF( N, W, WORK( INDEE ), INFO ) + ELSE + CALL SLACPY( 'A', N, N, A, LDA, Z, LDZ ) + CALL SORGTR( UPLO, N, Z, LDZ, WORK( INDTAU ), + $ WORK( INDWRK ), LLWORK, IINFO ) + CALL SCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL SSTEQR( JOBZ, N, W, WORK( INDEE ), Z, LDZ, + $ WORK( INDWRK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 30 I = 1, N + IFAIL( I ) = 0 + 30 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 40 + END IF + INFO = 0 + END IF +* +* Otherwise, call SSTEBZ and, if eigenvectors are desired, SSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWO = INDISP + N + CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ WORK( INDD ), WORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWRK ), + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL SSTEIN( N, WORK( INDD ), WORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ WORK( INDWRK ), IWORK( INDIWO ), IFAIL, INFO ) +* +* Apply orthogonal matrix used in reduction to tridiagonal +* form to eigenvectors returned by SSTEIN. +* + INDWKN = INDE + LLWRKN = LWORK - INDWKN + 1 + CALL SORMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z, + $ LDZ, WORK( INDWKN ), LLWRKN, IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 40 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 60 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 50 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 50 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL SSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 60 CONTINUE + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of SSYEVX +* + END diff --git a/costa/native/external/lapack/ssygs2.f b/costa/native/external/lapack/ssygs2.f new file mode 100644 index 000000000..4c33df116 --- /dev/null +++ b/costa/native/external/lapack/ssygs2.f @@ -0,0 +1,212 @@ + SUBROUTINE SSYGS2( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, ITYPE, LDA, LDB, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* SSYGS2 reduces a real symmetric-definite generalized eigenproblem +* to standard form. +* +* If ITYPE = 1, the problem is A*x = lambda*B*x, +* and A is overwritten by inv(U')*A*inv(U) or inv(L)*A*inv(L') +* +* If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or +* B*A*x = lambda*x, and A is overwritten by U*A*U` or L'*A*L. +* +* B must have been previously factorized as U'*U or L*L' by SPOTRF. +* +* Arguments +* ========= +* +* ITYPE (input) INTEGER +* = 1: compute inv(U')*A*inv(U) or inv(L)*A*inv(L'); +* = 2 or 3: compute U*A*U' or L'*A*L. +* +* UPLO (input) CHARACTER +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is stored, and how B has been factorized. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the symmetric matrix A. If UPLO = 'U', the leading +* n by n upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n by n lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, if INFO = 0, the transformed matrix, stored in the +* same format as A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input) REAL array, dimension (LDB,N) +* The triangular factor from the Cholesky factorization of B, +* as returned by SPOTRF. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, HALF + PARAMETER ( ONE = 1.0, HALF = 0.5 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER K + REAL AKK, BKK, CT +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SSCAL, SSYR2, STRMV, STRSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYGS2', -INFO ) + RETURN + END IF +* + IF( ITYPE.EQ.1 ) THEN + IF( UPPER ) THEN +* +* Compute inv(U')*A*inv(U) +* + DO 10 K = 1, N +* +* Update the upper triangle of A(k:n,k:n) +* + AKK = A( K, K ) + BKK = B( K, K ) + AKK = AKK / BKK**2 + A( K, K ) = AKK + IF( K.LT.N ) THEN + CALL SSCAL( N-K, ONE / BKK, A( K, K+1 ), LDA ) + CT = -HALF*AKK + CALL SAXPY( N-K, CT, B( K, K+1 ), LDB, A( K, K+1 ), + $ LDA ) + CALL SSYR2( UPLO, N-K, -ONE, A( K, K+1 ), LDA, + $ B( K, K+1 ), LDB, A( K+1, K+1 ), LDA ) + CALL SAXPY( N-K, CT, B( K, K+1 ), LDB, A( K, K+1 ), + $ LDA ) + CALL STRSV( UPLO, 'Transpose', 'Non-unit', N-K, + $ B( K+1, K+1 ), LDB, A( K, K+1 ), LDA ) + END IF + 10 CONTINUE + ELSE +* +* Compute inv(L)*A*inv(L') +* + DO 20 K = 1, N +* +* Update the lower triangle of A(k:n,k:n) +* + AKK = A( K, K ) + BKK = B( K, K ) + AKK = AKK / BKK**2 + A( K, K ) = AKK + IF( K.LT.N ) THEN + CALL SSCAL( N-K, ONE / BKK, A( K+1, K ), 1 ) + CT = -HALF*AKK + CALL SAXPY( N-K, CT, B( K+1, K ), 1, A( K+1, K ), 1 ) + CALL SSYR2( UPLO, N-K, -ONE, A( K+1, K ), 1, + $ B( K+1, K ), 1, A( K+1, K+1 ), LDA ) + CALL SAXPY( N-K, CT, B( K+1, K ), 1, A( K+1, K ), 1 ) + CALL STRSV( UPLO, 'No transpose', 'Non-unit', N-K, + $ B( K+1, K+1 ), LDB, A( K+1, K ), 1 ) + END IF + 20 CONTINUE + END IF + ELSE + IF( UPPER ) THEN +* +* Compute U*A*U' +* + DO 30 K = 1, N +* +* Update the upper triangle of A(1:k,1:k) +* + AKK = A( K, K ) + BKK = B( K, K ) + CALL STRMV( UPLO, 'No transpose', 'Non-unit', K-1, B, + $ LDB, A( 1, K ), 1 ) + CT = HALF*AKK + CALL SAXPY( K-1, CT, B( 1, K ), 1, A( 1, K ), 1 ) + CALL SSYR2( UPLO, K-1, ONE, A( 1, K ), 1, B( 1, K ), 1, + $ A, LDA ) + CALL SAXPY( K-1, CT, B( 1, K ), 1, A( 1, K ), 1 ) + CALL SSCAL( K-1, BKK, A( 1, K ), 1 ) + A( K, K ) = AKK*BKK**2 + 30 CONTINUE + ELSE +* +* Compute L'*A*L +* + DO 40 K = 1, N +* +* Update the lower triangle of A(1:k,1:k) +* + AKK = A( K, K ) + BKK = B( K, K ) + CALL STRMV( UPLO, 'Transpose', 'Non-unit', K-1, B, LDB, + $ A( K, 1 ), LDA ) + CT = HALF*AKK + CALL SAXPY( K-1, CT, B( K, 1 ), LDB, A( K, 1 ), LDA ) + CALL SSYR2( UPLO, K-1, ONE, A( K, 1 ), LDA, B( K, 1 ), + $ LDB, A, LDA ) + CALL SAXPY( K-1, CT, B( K, 1 ), LDB, A( K, 1 ), LDA ) + CALL SSCAL( K-1, BKK, A( K, 1 ), LDA ) + A( K, K ) = AKK*BKK**2 + 40 CONTINUE + END IF + END IF + RETURN +* +* End of SSYGS2 +* + END diff --git a/costa/native/external/lapack/ssygst.f b/costa/native/external/lapack/ssygst.f new file mode 100644 index 000000000..1151b8945 --- /dev/null +++ b/costa/native/external/lapack/ssygst.f @@ -0,0 +1,250 @@ + SUBROUTINE SSYGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, ITYPE, LDA, LDB, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* SSYGST reduces a real symmetric-definite generalized eigenproblem +* to standard form. +* +* If ITYPE = 1, the problem is A*x = lambda*B*x, +* and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) +* +* If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or +* B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L. +* +* B must have been previously factorized as U**T*U or L*L**T by SPOTRF. +* +* Arguments +* ========= +* +* ITYPE (input) INTEGER +* = 1: compute inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T); +* = 2 or 3: compute U*A*U**T or L**T*A*L. +* +* UPLO (input) CHARACTER +* = 'U': Upper triangle of A is stored and B is factored as +* U**T*U; +* = 'L': Lower triangle of A is stored and B is factored as +* L*L**T. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the symmetric matrix A. If UPLO = 'U', the leading +* N-by-N upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, if INFO = 0, the transformed matrix, stored in the +* same format as A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input) REAL array, dimension (LDB,N) +* The triangular factor from the Cholesky factorization of B, +* as returned by SPOTRF. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, HALF + PARAMETER ( ONE = 1.0, HALF = 0.5 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER K, KB, NB +* .. +* .. External Subroutines .. + EXTERNAL SSYGS2, SSYMM, SSYR2K, STRMM, STRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYGST', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'SSYGST', UPLO, N, -1, -1, -1 ) +* + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code +* + CALL SSYGS2( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + ELSE +* +* Use blocked code +* + IF( ITYPE.EQ.1 ) THEN + IF( UPPER ) THEN +* +* Compute inv(U')*A*inv(U) +* + DO 10 K = 1, N, NB + KB = MIN( N-K+1, NB ) +* +* Update the upper triangle of A(k:n,k:n) +* + CALL SSYGS2( ITYPE, UPLO, KB, A( K, K ), LDA, + $ B( K, K ), LDB, INFO ) + IF( K+KB.LE.N ) THEN + CALL STRSM( 'Left', UPLO, 'Transpose', 'Non-unit', + $ KB, N-K-KB+1, ONE, B( K, K ), LDB, + $ A( K, K+KB ), LDA ) + CALL SSYMM( 'Left', UPLO, KB, N-K-KB+1, -HALF, + $ A( K, K ), LDA, B( K, K+KB ), LDB, ONE, + $ A( K, K+KB ), LDA ) + CALL SSYR2K( UPLO, 'Transpose', N-K-KB+1, KB, -ONE, + $ A( K, K+KB ), LDA, B( K, K+KB ), LDB, + $ ONE, A( K+KB, K+KB ), LDA ) + CALL SSYMM( 'Left', UPLO, KB, N-K-KB+1, -HALF, + $ A( K, K ), LDA, B( K, K+KB ), LDB, ONE, + $ A( K, K+KB ), LDA ) + CALL STRSM( 'Right', UPLO, 'No transpose', + $ 'Non-unit', KB, N-K-KB+1, ONE, + $ B( K+KB, K+KB ), LDB, A( K, K+KB ), + $ LDA ) + END IF + 10 CONTINUE + ELSE +* +* Compute inv(L)*A*inv(L') +* + DO 20 K = 1, N, NB + KB = MIN( N-K+1, NB ) +* +* Update the lower triangle of A(k:n,k:n) +* + CALL SSYGS2( ITYPE, UPLO, KB, A( K, K ), LDA, + $ B( K, K ), LDB, INFO ) + IF( K+KB.LE.N ) THEN + CALL STRSM( 'Right', UPLO, 'Transpose', 'Non-unit', + $ N-K-KB+1, KB, ONE, B( K, K ), LDB, + $ A( K+KB, K ), LDA ) + CALL SSYMM( 'Right', UPLO, N-K-KB+1, KB, -HALF, + $ A( K, K ), LDA, B( K+KB, K ), LDB, ONE, + $ A( K+KB, K ), LDA ) + CALL SSYR2K( UPLO, 'No transpose', N-K-KB+1, KB, + $ -ONE, A( K+KB, K ), LDA, B( K+KB, K ), + $ LDB, ONE, A( K+KB, K+KB ), LDA ) + CALL SSYMM( 'Right', UPLO, N-K-KB+1, KB, -HALF, + $ A( K, K ), LDA, B( K+KB, K ), LDB, ONE, + $ A( K+KB, K ), LDA ) + CALL STRSM( 'Left', UPLO, 'No transpose', + $ 'Non-unit', N-K-KB+1, KB, ONE, + $ B( K+KB, K+KB ), LDB, A( K+KB, K ), + $ LDA ) + END IF + 20 CONTINUE + END IF + ELSE + IF( UPPER ) THEN +* +* Compute U*A*U' +* + DO 30 K = 1, N, NB + KB = MIN( N-K+1, NB ) +* +* Update the upper triangle of A(1:k+kb-1,1:k+kb-1) +* + CALL STRMM( 'Left', UPLO, 'No transpose', 'Non-unit', + $ K-1, KB, ONE, B, LDB, A( 1, K ), LDA ) + CALL SSYMM( 'Right', UPLO, K-1, KB, HALF, A( K, K ), + $ LDA, B( 1, K ), LDB, ONE, A( 1, K ), LDA ) + CALL SSYR2K( UPLO, 'No transpose', K-1, KB, ONE, + $ A( 1, K ), LDA, B( 1, K ), LDB, ONE, A, + $ LDA ) + CALL SSYMM( 'Right', UPLO, K-1, KB, HALF, A( K, K ), + $ LDA, B( 1, K ), LDB, ONE, A( 1, K ), LDA ) + CALL STRMM( 'Right', UPLO, 'Transpose', 'Non-unit', + $ K-1, KB, ONE, B( K, K ), LDB, A( 1, K ), + $ LDA ) + CALL SSYGS2( ITYPE, UPLO, KB, A( K, K ), LDA, + $ B( K, K ), LDB, INFO ) + 30 CONTINUE + ELSE +* +* Compute L'*A*L +* + DO 40 K = 1, N, NB + KB = MIN( N-K+1, NB ) +* +* Update the lower triangle of A(1:k+kb-1,1:k+kb-1) +* + CALL STRMM( 'Right', UPLO, 'No transpose', 'Non-unit', + $ KB, K-1, ONE, B, LDB, A( K, 1 ), LDA ) + CALL SSYMM( 'Left', UPLO, KB, K-1, HALF, A( K, K ), + $ LDA, B( K, 1 ), LDB, ONE, A( K, 1 ), LDA ) + CALL SSYR2K( UPLO, 'Transpose', K-1, KB, ONE, + $ A( K, 1 ), LDA, B( K, 1 ), LDB, ONE, A, + $ LDA ) + CALL SSYMM( 'Left', UPLO, KB, K-1, HALF, A( K, K ), + $ LDA, B( K, 1 ), LDB, ONE, A( K, 1 ), LDA ) + CALL STRMM( 'Left', UPLO, 'Transpose', 'Non-unit', KB, + $ K-1, ONE, B( K, K ), LDB, A( K, 1 ), LDA ) + CALL SSYGS2( ITYPE, UPLO, KB, A( K, K ), LDA, + $ B( K, K ), LDB, INFO ) + 40 CONTINUE + END IF + END IF + END IF + RETURN +* +* End of SSYGST +* + END diff --git a/costa/native/external/lapack/ssygv.f b/costa/native/external/lapack/ssygv.f new file mode 100644 index 000000000..444d8bca7 --- /dev/null +++ b/costa/native/external/lapack/ssygv.f @@ -0,0 +1,227 @@ + SUBROUTINE SSYGV( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, + $ LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDA, LDB, LWORK, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), W( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SSYGV computes all the eigenvalues, and optionally, the eigenvectors +* of a real generalized symmetric-definite eigenproblem, of the form +* A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. +* Here A and B are assumed to be symmetric and B is also +* positive definite. +* +* Arguments +* ========= +* +* ITYPE (input) INTEGER +* Specifies the problem type to be solved: +* = 1: A*x = (lambda)*B*x +* = 2: A*B*x = (lambda)*x +* = 3: B*A*x = (lambda)*x +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangles of A and B are stored; +* = 'L': Lower triangles of A and B are stored. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* A (input/output) REAL array, dimension (LDA, N) +* On entry, the symmetric matrix A. If UPLO = 'U', the +* leading N-by-N upper triangular part of A contains the +* upper triangular part of the matrix A. If UPLO = 'L', +* the leading N-by-N lower triangular part of A contains +* the lower triangular part of the matrix A. +* +* On exit, if JOBZ = 'V', then if INFO = 0, A contains the +* matrix Z of eigenvectors. The eigenvectors are normalized +* as follows: +* if ITYPE = 1 or 2, Z**T*B*Z = I; +* if ITYPE = 3, Z**T*inv(B)*Z = I. +* If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') +* or the lower triangle (if UPLO='L') of A, including the +* diagonal, is destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) REAL array, dimension (LDB, N) +* On entry, the symmetric positive definite matrix B. +* If UPLO = 'U', the leading N-by-N upper triangular part of B +* contains the upper triangular part of the matrix B. +* If UPLO = 'L', the leading N-by-N lower triangular part of B +* contains the lower triangular part of the matrix B. +* +* On exit, if INFO <= N, the part of B containing the matrix is +* overwritten by the triangular factor U or L from the Cholesky +* factorization B = U**T*U or B = L*L**T. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* W (output) REAL array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of the array WORK. LWORK >= max(1,3*N-1). +* For optimal efficiency, LWORK >= (NB+2)*N, +* where NB is the blocksize for SSYTRD returned by ILAENV. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: SPOTRF or SSYEV returned an error code: +* <= N: if INFO = i, SSYEV failed to converge; +* i off-diagonal elements of an intermediate +* tridiagonal form did not converge to zero; +* > N: if INFO = N + i, for 1 <= i <= N, then the leading +* minor of order i of B is not positive definite. +* The factorization of B could not be completed and +* no eigenvalues or eigenvectors were computed. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER TRANS + INTEGER LWKOPT, NB, NEIG +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL SPOTRF, SSYEV, SSYGST, STRMM, STRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, 3*N-1 ) .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'SSYTRD', UPLO, N, -1, -1, -1 ) + LWKOPT = ( NB+2 )*N + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYGV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL SPOTRF( UPLO, N, B, LDB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL SSYGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + CALL SSYEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, INFO ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + NEIG = N + IF( INFO.GT.0 ) + $ NEIG = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)'*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'T' + END IF +* + CALL STRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, + $ B, LDB, A, LDA ) +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U'*y +* + IF( UPPER ) THEN + TRANS = 'T' + ELSE + TRANS = 'N' + END IF +* + CALL STRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, + $ B, LDB, A, LDA ) + END IF + END IF +* + WORK( 1 ) = LWKOPT + RETURN +* +* End of SSYGV +* + END diff --git a/costa/native/external/lapack/ssygvd.f b/costa/native/external/lapack/ssygvd.f new file mode 100644 index 000000000..5ca95032f --- /dev/null +++ b/costa/native/external/lapack/ssygvd.f @@ -0,0 +1,277 @@ + SUBROUTINE SSYGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, + $ LWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDA, LDB, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), B( LDB, * ), W( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SSYGVD computes all the eigenvalues, and optionally, the eigenvectors +* of a real generalized symmetric-definite eigenproblem, of the form +* A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and +* B are assumed to be symmetric and B is also positive definite. +* If eigenvectors are desired, it uses a divide and conquer algorithm. +* +* The divide and conquer algorithm makes very mild assumptions about +* floating point arithmetic. It will work on machines with a guard +* digit in add/subtract, or on those binary machines without guard +* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +* Cray-2. It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* ITYPE (input) INTEGER +* Specifies the problem type to be solved: +* = 1: A*x = (lambda)*B*x +* = 2: A*B*x = (lambda)*x +* = 3: B*A*x = (lambda)*x +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangles of A and B are stored; +* = 'L': Lower triangles of A and B are stored. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* A (input/output) REAL array, dimension (LDA, N) +* On entry, the symmetric matrix A. If UPLO = 'U', the +* leading N-by-N upper triangular part of A contains the +* upper triangular part of the matrix A. If UPLO = 'L', +* the leading N-by-N lower triangular part of A contains +* the lower triangular part of the matrix A. +* +* On exit, if JOBZ = 'V', then if INFO = 0, A contains the +* matrix Z of eigenvectors. The eigenvectors are normalized +* as follows: +* if ITYPE = 1 or 2, Z**T*B*Z = I; +* if ITYPE = 3, Z**T*inv(B)*Z = I. +* If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') +* or the lower triangle (if UPLO='L') of A, including the +* diagonal, is destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) REAL array, dimension (LDB, N) +* On entry, the symmetric matrix B. If UPLO = 'U', the +* leading N-by-N upper triangular part of B contains the +* upper triangular part of the matrix B. If UPLO = 'L', +* the leading N-by-N lower triangular part of B contains +* the lower triangular part of the matrix B. +* +* On exit, if INFO <= N, the part of B containing the matrix is +* overwritten by the triangular factor U or L from the Cholesky +* factorization B = U**T*U or B = L*L**T. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* W (output) REAL array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If N <= 1, LWORK >= 1. +* If JOBZ = 'N' and N > 1, LWORK >= 2*N+1. +* If JOBZ = 'V' and N > 1, LWORK >= 1 + 6*N + 2*N**2. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array IWORK. +* If N <= 1, LIWORK >= 1. +* If JOBZ = 'N' and N > 1, LIWORK >= 1. +* If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: SPOTRF or SSYEVD returned an error code: +* <= N: if INFO = i, SSYEVD failed to converge; +* i off-diagonal elements of an intermediate +* tridiagonal form did not converge to zero; +* > N: if INFO = N + i, for 1 <= i <= N, then the leading +* minor of order i of B is not positive definite. +* The factorization of B could not be completed and +* no eigenvalues or eigenvectors were computed. +* +* Further Details +* =============== +* +* Based on contributions by +* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER TRANS + INTEGER LIOPT, LIWMIN, LOPT, LWMIN, NEIG +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SPOTRF, SSYEVD, SSYGST, STRMM, STRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LIWMIN = 1 + LWMIN = 1 + LOPT = LWMIN + LIOPT = LIWMIN + ELSE + IF( WANTZ ) THEN + LIWMIN = 3 + 5*N + LWMIN = 1 + 6*N + 2*N**2 + ELSE + LIWMIN = 1 + LWMIN = 2*N + 1 + END IF + LOPT = LWMIN + LIOPT = LIWMIN + END IF + IF( ITYPE.LT.0 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LOPT + IWORK( 1 ) = LIOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYGVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL SPOTRF( UPLO, N, B, LDB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL SSYGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + CALL SSYEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, IWORK, LIWORK, + $ INFO ) + LOPT = MAX( REAL( LOPT ), REAL( WORK( 1 ) ) ) + LIOPT = MAX( REAL( LIOPT ), REAL( IWORK( 1 ) ) ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + NEIG = N + IF( INFO.GT.0 ) + $ NEIG = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)'*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'T' + END IF +* + CALL STRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, + $ B, LDB, A, LDA ) +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U'*y +* + IF( UPPER ) THEN + TRANS = 'T' + ELSE + TRANS = 'N' + END IF +* + CALL STRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, + $ B, LDB, A, LDA ) + END IF + END IF +* + WORK( 1 ) = LOPT + IWORK( 1 ) = LIOPT +* + RETURN +* +* End of SSYGVD +* + END diff --git a/costa/native/external/lapack/ssygvx.f b/costa/native/external/lapack/ssygvx.f new file mode 100644 index 000000000..97f8bf503 --- /dev/null +++ b/costa/native/external/lapack/ssygvx.f @@ -0,0 +1,325 @@ + SUBROUTINE SSYGVX( ITYPE, JOBZ, RANGE, UPLO, N, A, LDA, B, LDB, + $ VL, VU, IL, IU, ABSTOL, M, W, Z, LDZ, WORK, + $ LWORK, IWORK, IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, ITYPE, IU, LDA, LDB, LDZ, LWORK, M, N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + REAL A( LDA, * ), B( LDB, * ), W( * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* SSYGVX computes selected eigenvalues, and optionally, eigenvectors +* of a real generalized symmetric-definite eigenproblem, of the form +* A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A +* and B are assumed to be symmetric and B is also positive definite. +* Eigenvalues and eigenvectors can be selected by specifying either a +* range of values or a range of indices for the desired eigenvalues. +* +* Arguments +* ========= +* +* ITYPE (input) INTEGER +* Specifies the problem type to be solved: +* = 1: A*x = (lambda)*B*x +* = 2: A*B*x = (lambda)*x +* = 3: B*A*x = (lambda)*x +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* RANGE (input) CHARACTER*1 +* = 'A': all eigenvalues will be found. +* = 'V': all eigenvalues in the half-open interval (VL,VU] +* will be found. +* = 'I': the IL-th through IU-th eigenvalues will be found. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A and B are stored; +* = 'L': Lower triangle of A and B are stored. +* +* N (input) INTEGER +* The order of the matrix pencil (A,B). N >= 0. +* +* A (input/output) REAL array, dimension (LDA, N) +* On entry, the symmetric matrix A. If UPLO = 'U', the +* leading N-by-N upper triangular part of A contains the +* upper triangular part of the matrix A. If UPLO = 'L', +* the leading N-by-N lower triangular part of A contains +* the lower triangular part of the matrix A. +* +* On exit, the lower triangle (if UPLO='L') or the upper +* triangle (if UPLO='U') of A, including the diagonal, is +* destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) REAL array, dimension (LDA, N) +* On entry, the symmetric matrix B. If UPLO = 'U', the +* leading N-by-N upper triangular part of B contains the +* upper triangular part of the matrix B. If UPLO = 'L', +* the leading N-by-N lower triangular part of B contains +* the lower triangular part of the matrix B. +* +* On exit, if INFO <= N, the part of B containing the matrix is +* overwritten by the triangular factor U or L from the Cholesky +* factorization B = U**T*U or B = L*L**T. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* VL (input) REAL +* VU (input) REAL +* If RANGE='V', the lower and upper bounds of the interval to +* be searched for eigenvalues. VL < VU. +* Not referenced if RANGE = 'A' or 'I'. +* +* IL (input) INTEGER +* IU (input) INTEGER +* If RANGE='I', the indices (in ascending order) of the +* smallest and largest eigenvalues to be returned. +* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +* Not referenced if RANGE = 'A' or 'V'. +* +* ABSTOL (input) REAL +* The absolute error tolerance for the eigenvalues. +* An approximate eigenvalue is accepted as converged +* when it is determined to lie in an interval [a,b] +* of width less than or equal to +* +* ABSTOL + EPS * max( |a|,|b| ) , +* +* where EPS is the machine precision. If ABSTOL is less than +* or equal to zero, then EPS*|T| will be used in its place, +* where |T| is the 1-norm of the tridiagonal matrix obtained +* by reducing A to tridiagonal form. +* +* Eigenvalues will be computed most accurately when ABSTOL is +* set to twice the underflow threshold 2*DLAMCH('S'), not zero. +* If this routine returns with INFO>0, indicating that some +* eigenvectors did not converge, try setting ABSTOL to +* 2*SLAMCH('S'). +* +* M (output) INTEGER +* The total number of eigenvalues found. 0 <= M <= N. +* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +* +* W (output) REAL array, dimension (N) +* On normal exit, the first M elements contain the selected +* eigenvalues in ascending order. +* +* Z (output) REAL array, dimension (LDZ, max(1,M)) +* If JOBZ = 'N', then Z is not referenced. +* If JOBZ = 'V', then if INFO = 0, the first M columns of Z +* contain the orthonormal eigenvectors of the matrix A +* corresponding to the selected eigenvalues, with the i-th +* column of Z holding the eigenvector associated with W(i). +* The eigenvectors are normalized as follows: +* if ITYPE = 1 or 2, Z**T*B*Z = I; +* if ITYPE = 3, Z**T*inv(B)*Z = I. +* +* If an eigenvector fails to converge, then that column of Z +* contains the latest approximation to the eigenvector, and the +* index of the eigenvector is returned in IFAIL. +* Note: the user must ensure that at least max(1,M) columns are +* supplied in the array Z; if RANGE = 'V', the exact value of M +* is not known in advance and an upper bound must be used. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of the array WORK. LWORK >= max(1,8*N). +* For optimal efficiency, LWORK >= (NB+3)*N, +* where NB is the blocksize for SSYTRD returned by ILAENV. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace) INTEGER array, dimension (5*N) +* +* IFAIL (output) INTEGER array, dimension (N) +* If JOBZ = 'V', then if INFO = 0, the first M elements of +* IFAIL are zero. If INFO > 0, then IFAIL contains the +* indices of the eigenvectors that failed to converge. +* If JOBZ = 'N', then IFAIL is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: SPOTRF or SSYEVX returned an error code: +* <= N: if INFO = i, SSYEVX failed to converge; +* i eigenvectors failed to converge. Their indices +* are stored in array IFAIL. +* > N: if INFO = N + i, for 1 <= i <= N, then the leading +* minor of order i of B is not positive definite. +* The factorization of B could not be completed and +* no eigenvalues or eigenvectors were computed. +* +* Further Details +* =============== +* +* Based on contributions by +* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LQUERY, UPPER, VALEIG, WANTZ + CHARACTER TRANS + INTEGER LOPT, LWKOPT, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL SPOTRF, SSYEVX, SSYGST, STRMM, STRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + UPPER = LSAME( UPLO, 'U' ) + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( ITYPE.LT.0 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -3 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( VALEIG .AND. N.GT.0 ) THEN + IF( VU.LE.VL ) INFO = -11 + ELSE IF( INDEIG .AND. IL.LT.1 ) THEN + INFO = -12 + ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) THEN + INFO = -13 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -18 + ELSE IF( LWORK.LT.MAX( 1, 8*N ) .AND. .NOT.LQUERY ) THEN + INFO = -20 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'SSYTRD', UPLO, N, -1, -1, -1 ) + LWKOPT = ( NB+3 )*N + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYGVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Form a Cholesky factorization of B. +* + CALL SPOTRF( UPLO, N, B, LDB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL SSYGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + CALL SSYEVX( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, ABSTOL, + $ M, W, Z, LDZ, WORK, LWORK, IWORK, IFAIL, INFO ) + LOPT = WORK( 1 ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + IF( INFO.GT.0 ) + $ M = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)'*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'T' + END IF +* + CALL STRSM( 'Left', UPLO, TRANS, 'Non-unit', N, M, ONE, B, + $ LDB, Z, LDZ ) +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U'*y +* + IF( UPPER ) THEN + TRANS = 'T' + ELSE + TRANS = 'N' + END IF +* + CALL STRMM( 'Left', UPLO, TRANS, 'Non-unit', N, M, ONE, B, + $ LDB, Z, LDZ ) + END IF + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of SSYGVX +* + END diff --git a/costa/native/external/lapack/ssyrfs.f b/costa/native/external/lapack/ssyrfs.f new file mode 100644 index 000000000..77572f0ad --- /dev/null +++ b/costa/native/external/lapack/ssyrfs.f @@ -0,0 +1,335 @@ + SUBROUTINE SSYRFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, + $ X, LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* SSYRFS improves the computed solution to a system of linear +* equations when the coefficient matrix is symmetric indefinite, and +* provides error bounds and backward error estimates for the solution. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input) REAL array, dimension (LDA,N) +* The symmetric matrix A. If UPLO = 'U', the leading N-by-N +* upper triangular part of A contains the upper triangular part +* of the matrix A, and the strictly lower triangular part of A +* is not referenced. If UPLO = 'L', the leading N-by-N lower +* triangular part of A contains the lower triangular part of +* the matrix A, and the strictly upper triangular part of A is +* not referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* AF (input) REAL array, dimension (LDAF,N) +* The factored form of the matrix A. AF contains the block +* diagonal matrix D and the multipliers used to obtain the +* factor U or L from the factorization A = U*D*U**T or +* A = L*D*L**T as computed by SSYTRF. +* +* LDAF (input) INTEGER +* The leading dimension of the array AF. LDAF >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by SSYTRF. +* +* B (input) REAL array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input/output) REAL array, dimension (LDX,NRHS) +* On entry, the solution matrix X, as computed by SSYTRS. +* On exit, the improved solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) REAL array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Internal Parameters +* =================== +* +* ITMAX is the maximum number of steps of iterative refinement. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) + REAL THREE + PARAMETER ( THREE = 3.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, J, K, KASE, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SLACON, SSYMV, SSYTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL SCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 ) + CALL SSYMV( UPLO, N, -ONE, A, LDA, X( 1, J ), 1, ONE, + $ WORK( N+1 ), 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + WORK( I ) = ABS( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = ABS( X( K, J ) ) + DO 40 I = 1, K - 1 + WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK + S = S + ABS( A( I, K ) )*ABS( X( I, J ) ) + 40 CONTINUE + WORK( K ) = WORK( K ) + ABS( A( K, K ) )*XK + S + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = ABS( X( K, J ) ) + WORK( K ) = WORK( K ) + ABS( A( K, K ) )*XK + DO 60 I = K + 1, N + WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK + S = S + ABS( A( I, K ) )*ABS( X( I, J ) ) + 60 CONTINUE + WORK( K ) = WORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL SSYTRS( UPLO, N, 1, AF, LDAF, IPIV, WORK( N+1 ), N, + $ INFO ) + CALL SAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use SLACON to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL SLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A'). +* + CALL SSYTRS( UPLO, N, 1, AF, LDAF, IPIV, WORK( N+1 ), N, + $ INFO ) + DO 110 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 120 CONTINUE + CALL SSYTRS( UPLO, N, 1, AF, LDAF, IPIV, WORK( N+1 ), N, + $ INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of SSYRFS +* + END diff --git a/costa/native/external/lapack/ssysv.f b/costa/native/external/lapack/ssysv.f new file mode 100644 index 000000000..5da1fa0f0 --- /dev/null +++ b/costa/native/external/lapack/ssysv.f @@ -0,0 +1,171 @@ + SUBROUTINE SSYSV( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SSYSV computes the solution to a real system of linear equations +* A * X = B, +* where A is an N-by-N symmetric matrix and X and B are N-by-NRHS +* matrices. +* +* The diagonal pivoting method is used to factor A as +* A = U * D * U**T, if UPLO = 'U', or +* A = L * D * L**T, if UPLO = 'L', +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices, and D is symmetric and block diagonal with +* 1-by-1 and 2-by-2 diagonal blocks. The factored form of A is then +* used to solve the system of equations A * X = B. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the symmetric matrix A. If UPLO = 'U', the leading +* N-by-N upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, if INFO = 0, the block diagonal matrix D and the +* multipliers used to obtain the factor U or L from the +* factorization A = U*D*U**T or A = L*D*L**T as computed by +* SSYTRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (output) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D, as +* determined by SSYTRF. If IPIV(k) > 0, then rows and columns +* k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1 +* diagonal block. If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, +* then rows and columns k-1 and -IPIV(k) were interchanged and +* D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If UPLO = 'L' and +* IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and +* -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 +* diagonal block. +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of WORK. LWORK >= 1, and for best performance +* LWORK >= N*NB, where NB is the optimal blocksize for +* SSYTRF. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, D(i,i) is exactly zero. The factorization +* has been completed, but the block diagonal matrix D is +* exactly singular, so the solution could not be computed. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LWKOPT, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL SSYTRF, SSYTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'SSYTRF', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYSV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Compute the factorization A = U*D*U' or A = L*D*L'. +* + CALL SSYTRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL SSYTRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of SSYSV +* + END diff --git a/costa/native/external/lapack/ssysvx.f b/costa/native/external/lapack/ssysvx.f new file mode 100644 index 000000000..eaab12ed2 --- /dev/null +++ b/costa/native/external/lapack/ssysvx.f @@ -0,0 +1,297 @@ + SUBROUTINE SSYSVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, + $ LDB, X, LDX, RCOND, FERR, BERR, WORK, LWORK, + $ IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER FACT, UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, LWORK, N, NRHS + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* SSYSVX uses the diagonal pivoting factorization to compute the +* solution to a real system of linear equations A * X = B, +* where A is an N-by-N symmetric matrix and X and B are N-by-NRHS +* matrices. +* +* Error bounds on the solution and a condition estimate are also +* provided. +* +* Description +* =========== +* +* The following steps are performed: +* +* 1. If FACT = 'N', the diagonal pivoting method is used to factor A. +* The form of the factorization is +* A = U * D * U**T, if UPLO = 'U', or +* A = L * D * L**T, if UPLO = 'L', +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices, and D is symmetric and block diagonal with +* 1-by-1 and 2-by-2 diagonal blocks. +* +* 2. If some D(i,i)=0, so that D is exactly singular, then the routine +* returns with INFO = i. Otherwise, the factored form of A is used +* to estimate the condition number of the matrix A. If the +* reciprocal of the condition number is less than machine precision, +* INFO = N+1 is returned as a warning, but the routine still goes on +* to solve for X and compute error bounds as described below. +* +* 3. The system of equations is solved for X using the factored form +* of A. +* +* 4. Iterative refinement is applied to improve the computed solution +* matrix and calculate error bounds and backward error estimates +* for it. +* +* Arguments +* ========= +* +* FACT (input) CHARACTER*1 +* Specifies whether or not the factored form of A has been +* supplied on entry. +* = 'F': On entry, AF and IPIV contain the factored form of +* A. AF and IPIV will not be modified. +* = 'N': The matrix A will be copied to AF and factored. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input) REAL array, dimension (LDA,N) +* The symmetric matrix A. If UPLO = 'U', the leading N-by-N +* upper triangular part of A contains the upper triangular part +* of the matrix A, and the strictly lower triangular part of A +* is not referenced. If UPLO = 'L', the leading N-by-N lower +* triangular part of A contains the lower triangular part of +* the matrix A, and the strictly upper triangular part of A is +* not referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* AF (input or output) REAL array, dimension (LDAF,N) +* If FACT = 'F', then AF is an input argument and on entry +* contains the block diagonal matrix D and the multipliers used +* to obtain the factor U or L from the factorization +* A = U*D*U**T or A = L*D*L**T as computed by SSYTRF. +* +* If FACT = 'N', then AF is an output argument and on exit +* returns the block diagonal matrix D and the multipliers used +* to obtain the factor U or L from the factorization +* A = U*D*U**T or A = L*D*L**T. +* +* LDAF (input) INTEGER +* The leading dimension of the array AF. LDAF >= max(1,N). +* +* IPIV (input or output) INTEGER array, dimension (N) +* If FACT = 'F', then IPIV is an input argument and on entry +* contains details of the interchanges and the block structure +* of D, as determined by SSYTRF. +* If IPIV(k) > 0, then rows and columns k and IPIV(k) were +* interchanged and D(k,k) is a 1-by-1 diagonal block. +* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +* +* If FACT = 'N', then IPIV is an output argument and on exit +* contains details of the interchanges and the block structure +* of D, as determined by SSYTRF. +* +* B (input) REAL array, dimension (LDB,NRHS) +* The N-by-NRHS right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (output) REAL array, dimension (LDX,NRHS) +* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* RCOND (output) REAL +* The estimate of the reciprocal condition number of the matrix +* A. If RCOND is less than the machine precision (in +* particular, if RCOND = 0), the matrix is singular to working +* precision. This condition is indicated by a return code of +* INFO > 0. +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of WORK. LWORK >= 3*N, and for best performance +* LWORK >= N*NB, where NB is the optimal blocksize for +* SSYTRF. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is +* <= N: D(i,i) is exactly zero. The factorization +* has been completed but the factor D is exactly +* singular, so the solution and error bounds could +* not be computed. RCOND = 0 is returned. +* = N+1: D is nonsingular, but RCOND is less than machine +* precision, meaning that the matrix is singular +* to working precision. Nevertheless, the +* solution and error bounds are computed because +* there are a number of situations where the +* computed solution can be more accurate than the +* value of RCOND would suggest. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, NOFACT + INTEGER LWKOPT, NB + REAL ANORM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANSY + EXTERNAL ILAENV, LSAME, SLAMCH, SLANSY +* .. +* .. External Subroutines .. + EXTERNAL SLACPY, SSYCON, SSYRFS, SSYTRF, SSYTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -11 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LWORK.LT.MAX( 1, 3*N ) .AND. .NOT.LQUERY ) THEN + INFO = -18 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'SSYTRF', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYSVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* + IF( NOFACT ) THEN +* +* Compute the factorization A = U*D*U' or A = L*D*L'. +* + CALL SLACPY( UPLO, N, N, A, LDA, AF, LDAF ) + CALL SSYTRF( UPLO, N, AF, LDAF, IPIV, WORK, LWORK, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN + IF( INFO.GT.0 ) + $ RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = SLANSY( 'I', UPLO, N, A, LDA, WORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL SSYCON( UPLO, N, AF, LDAF, IPIV, ANORM, RCOND, WORK, IWORK, + $ INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* +* Compute the solution vectors X. +* + CALL SLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL SSYTRS( UPLO, N, NRHS, AF, LDAF, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solutions and +* compute error bounds and backward error estimates for them. +* + CALL SSYRFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, X, + $ LDX, FERR, BERR, WORK, IWORK, INFO ) +* + RETURN +* +* End of SSYSVX +* + END diff --git a/costa/native/external/lapack/ssytd2.f b/costa/native/external/lapack/ssytd2.f new file mode 100644 index 000000000..4e2dc293f --- /dev/null +++ b/costa/native/external/lapack/ssytd2.f @@ -0,0 +1,248 @@ + SUBROUTINE SSYTD2( UPLO, N, A, LDA, D, E, TAU, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), D( * ), E( * ), TAU( * ) +* .. +* +* Purpose +* ======= +* +* SSYTD2 reduces a real symmetric matrix A to symmetric tridiagonal +* form T by an orthogonal similarity transformation: Q' * A * Q = T. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is stored: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the symmetric matrix A. If UPLO = 'U', the leading +* n-by-n upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n-by-n lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* On exit, if UPLO = 'U', the diagonal and first superdiagonal +* of A are overwritten by the corresponding elements of the +* tridiagonal matrix T, and the elements above the first +* superdiagonal, with the array TAU, represent the orthogonal +* matrix Q as a product of elementary reflectors; if UPLO +* = 'L', the diagonal and first subdiagonal of A are over- +* written by the corresponding elements of the tridiagonal +* matrix T, and the elements below the first subdiagonal, with +* the array TAU, represent the orthogonal matrix Q as a product +* of elementary reflectors. See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* D (output) REAL array, dimension (N) +* The diagonal elements of the tridiagonal matrix T: +* D(i) = A(i,i). +* +* E (output) REAL array, dimension (N-1) +* The off-diagonal elements of the tridiagonal matrix T: +* E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. +* +* TAU (output) REAL array, dimension (N-1) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* If UPLO = 'U', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(n-1) . . . H(2) H(1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in +* A(1:i-1,i+1), and tau in TAU(i). +* +* If UPLO = 'L', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(1) H(2) . . . H(n-1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), +* and tau in TAU(i). +* +* The contents of A on exit are illustrated by the following examples +* with n = 5: +* +* if UPLO = 'U': if UPLO = 'L': +* +* ( d e v2 v3 v4 ) ( d ) +* ( d e v3 v4 ) ( e d ) +* ( d e v4 ) ( v1 e d ) +* ( d e ) ( v1 v2 e d ) +* ( d ) ( v1 v2 v3 e d ) +* +* where d and e denote diagonal and off-diagonal elements of T, and vi +* denotes an element of the vector defining H(i). +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO, HALF + PARAMETER ( ONE = 1.0, ZERO = 0.0, HALF = 1.0 / 2.0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I + REAL ALPHA, TAUI +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SLARFG, SSYMV, SSYR2, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SDOT + EXTERNAL LSAME, SDOT +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTD2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Reduce the upper triangle of A +* + DO 10 I = N - 1, 1, -1 +* +* Generate elementary reflector H(i) = I - tau * v * v' +* to annihilate A(1:i-1,i+1) +* + CALL SLARFG( I, A( I, I+1 ), A( 1, I+1 ), 1, TAUI ) + E( I ) = A( I, I+1 ) +* + IF( TAUI.NE.ZERO ) THEN +* +* Apply H(i) from both sides to A(1:i,1:i) +* + A( I, I+1 ) = ONE +* +* Compute x := tau * A * v storing x in TAU(1:i) +* + CALL SSYMV( UPLO, I, TAUI, A, LDA, A( 1, I+1 ), 1, ZERO, + $ TAU, 1 ) +* +* Compute w := x - 1/2 * tau * (x'*v) * v +* + ALPHA = -HALF*TAUI*SDOT( I, TAU, 1, A( 1, I+1 ), 1 ) + CALL SAXPY( I, ALPHA, A( 1, I+1 ), 1, TAU, 1 ) +* +* Apply the transformation as a rank-2 update: +* A := A - v * w' - w * v' +* + CALL SSYR2( UPLO, I, -ONE, A( 1, I+1 ), 1, TAU, 1, A, + $ LDA ) +* + A( I, I+1 ) = E( I ) + END IF + D( I+1 ) = A( I+1, I+1 ) + TAU( I ) = TAUI + 10 CONTINUE + D( 1 ) = A( 1, 1 ) + ELSE +* +* Reduce the lower triangle of A +* + DO 20 I = 1, N - 1 +* +* Generate elementary reflector H(i) = I - tau * v * v' +* to annihilate A(i+2:n,i) +* + CALL SLARFG( N-I, A( I+1, I ), A( MIN( I+2, N ), I ), 1, + $ TAUI ) + E( I ) = A( I+1, I ) +* + IF( TAUI.NE.ZERO ) THEN +* +* Apply H(i) from both sides to A(i+1:n,i+1:n) +* + A( I+1, I ) = ONE +* +* Compute x := tau * A * v storing y in TAU(i:n-1) +* + CALL SSYMV( UPLO, N-I, TAUI, A( I+1, I+1 ), LDA, + $ A( I+1, I ), 1, ZERO, TAU( I ), 1 ) +* +* Compute w := x - 1/2 * tau * (x'*v) * v +* + ALPHA = -HALF*TAUI*SDOT( N-I, TAU( I ), 1, A( I+1, I ), + $ 1 ) + CALL SAXPY( N-I, ALPHA, A( I+1, I ), 1, TAU( I ), 1 ) +* +* Apply the transformation as a rank-2 update: +* A := A - v * w' - w * v' +* + CALL SSYR2( UPLO, N-I, -ONE, A( I+1, I ), 1, TAU( I ), 1, + $ A( I+1, I+1 ), LDA ) +* + A( I+1, I ) = E( I ) + END IF + D( I ) = A( I, I ) + TAU( I ) = TAUI + 20 CONTINUE + D( N ) = A( N, N ) + END IF +* + RETURN +* +* End of SSYTD2 +* + END diff --git a/costa/native/external/lapack/ssytf2.f b/costa/native/external/lapack/ssytf2.f new file mode 100644 index 000000000..c5cdaf82c --- /dev/null +++ b/costa/native/external/lapack/ssytf2.f @@ -0,0 +1,511 @@ + SUBROUTINE SSYTF2( UPLO, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* SSYTF2 computes the factorization of a real symmetric matrix A using +* the Bunch-Kaufman diagonal pivoting method: +* +* A = U*D*U' or A = L*D*L' +* +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices, U' is the transpose of U, and D is symmetric and +* block diagonal with 1-by-1 and 2-by-2 diagonal blocks. +* +* This is the unblocked version of the algorithm, calling Level 2 BLAS. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is stored: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the symmetric matrix A. If UPLO = 'U', the leading +* n-by-n upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n-by-n lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, the block diagonal matrix D and the multipliers used +* to obtain the factor U or L (see below for further details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (output) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D. +* If IPIV(k) > 0, then rows and columns k and IPIV(k) were +* interchanged and D(k,k) is a 1-by-1 diagonal block. +* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* > 0: if INFO = k, D(k,k) is exactly zero. The factorization +* has been completed, but the block diagonal matrix D is +* exactly singular, and division by zero will occur if it +* is used to solve a system of equations. +* +* Further Details +* =============== +* +* 1-96 - Based on modifications by J. Lewis, Boeing Computer Services +* Company +* +* If UPLO = 'U', then A = U*D*U', where +* U = P(n)*U(n)* ... *P(k)U(k)* ..., +* i.e., U is a product of terms P(k)*U(k), where k decreases from n to +* 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +* defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +* that if the diagonal block D(k) is of order s (s = 1 or 2), then +* +* ( I v 0 ) k-s +* U(k) = ( 0 I 0 ) s +* ( 0 0 I ) n-k +* k-s s n-k +* +* If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +* If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +* and A(k,k), and v overwrites A(1:k-2,k-1:k). +* +* If UPLO = 'L', then A = L*D*L', where +* L = P(1)*L(1)* ... *P(k)*L(k)* ..., +* i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +* n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +* defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +* that if the diagonal block D(k) is of order s (s = 1 or 2), then +* +* ( I 0 0 ) k-1 +* L(k) = ( 0 I 0 ) s +* ( 0 v I ) n-k-s+1 +* k-1 s n-k-s+1 +* +* If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +* If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +* and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0E+0, SEVTEN = 17.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IMAX, J, JMAX, K, KK, KP, KSTEP + REAL ABSAKK, ALPHA, COLMAX, D11, D12, D21, D22, R1, + $ ROWMAX, T, WK, WKM1, WKP1 +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + EXTERNAL LSAME, ISAMAX +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, SSWAP, SSYR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTF2', -INFO ) + RETURN + END IF +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U' using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2 +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 70 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( A( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.GT.1 ) THEN + IMAX = ISAMAX( K-1, A( 1, K ), 1 ) + COLMAX = ABS( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = IMAX + ISAMAX( K-IMAX, A( IMAX, IMAX+1 ), LDA ) + ROWMAX = ABS( A( IMAX, JMAX ) ) + IF( IMAX.GT.1 ) THEN + JMAX = ISAMAX( IMAX-1, A( 1, IMAX ), 1 ) + ROWMAX = MAX( ROWMAX, ABS( A( JMAX, IMAX ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K-1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K - KSTEP + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the leading +* submatrix A(1:k,1:k) +* + CALL SSWAP( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 ) + CALL SSWAP( KK-KP-1, A( KP+1, KK ), 1, A( KP, KP+1 ), + $ LDA ) + T = A( KK, KK ) + A( KK, KK ) = A( KP, KP ) + A( KP, KP ) = T + IF( KSTEP.EQ.2 ) THEN + T = A( K-1, K ) + A( K-1, K ) = A( KP, K ) + A( KP, K ) = T + END IF + END IF +* +* Update the leading submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* +* Perform a rank-1 update of A(1:k-1,1:k-1) as +* +* A := A - U(k)*D(k)*U(k)' = A - W(k)*1/D(k)*W(k)' +* + R1 = ONE / A( K, K ) + CALL SSYR( UPLO, K-1, -R1, A( 1, K ), 1, A, LDA ) +* +* Store U(k) in column k +* + CALL SSCAL( K-1, R1, A( 1, K ), 1 ) + ELSE +* +* 2-by-2 pivot block D(k): columns k and k-1 now hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* Perform a rank-2 update of A(1:k-2,1:k-2) as +* +* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )' +* = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )' +* + IF( K.GT.2 ) THEN +* + D12 = A( K-1, K ) + D22 = A( K-1, K-1 ) / D12 + D11 = A( K, K ) / D12 + T = ONE / ( D11*D22-ONE ) + D12 = T / D12 +* + DO 30 J = K - 2, 1, -1 + WKM1 = D12*( D11*A( J, K-1 )-A( J, K ) ) + WK = D12*( D22*A( J, K )-A( J, K-1 ) ) + DO 20 I = J, 1, -1 + A( I, J ) = A( I, J ) - A( I, K )*WK - + $ A( I, K-1 )*WKM1 + 20 CONTINUE + A( J, K ) = WK + A( J, K-1 ) = WKM1 + 30 CONTINUE +* + END IF +* + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L' using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2 +* + K = 1 + 40 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 70 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( A( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.LT.N ) THEN + IMAX = K + ISAMAX( N-K, A( K+1, K ), 1 ) + COLMAX = ABS( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = K - 1 + ISAMAX( IMAX-K, A( IMAX, K ), LDA ) + ROWMAX = ABS( A( IMAX, JMAX ) ) + IF( IMAX.LT.N ) THEN + JMAX = IMAX + ISAMAX( N-IMAX, A( IMAX+1, IMAX ), 1 ) + ROWMAX = MAX( ROWMAX, ABS( A( JMAX, IMAX ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K+1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K + KSTEP - 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the trailing +* submatrix A(k:n,k:n) +* + IF( KP.LT.N ) + $ CALL SSWAP( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 ) + CALL SSWAP( KP-KK-1, A( KK+1, KK ), 1, A( KP, KK+1 ), + $ LDA ) + T = A( KK, KK ) + A( KK, KK ) = A( KP, KP ) + A( KP, KP ) = T + IF( KSTEP.EQ.2 ) THEN + T = A( K+1, K ) + A( K+1, K ) = A( KP, K ) + A( KP, K ) = T + END IF + END IF +* +* Update the trailing submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* + IF( K.LT.N ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* +* A := A - L(k)*D(k)*L(k)' = A - W(k)*(1/D(k))*W(k)' +* + D11 = ONE / A( K, K ) + CALL SSYR( UPLO, N-K, -D11, A( K+1, K ), 1, + $ A( K+1, K+1 ), LDA ) +* +* Store L(k) in column K +* + CALL SSCAL( N-K, D11, A( K+1, K ), 1 ) + END IF + ELSE +* +* 2-by-2 pivot block D(k) +* + IF( K.LT.N-1 ) THEN +* +* Perform a rank-2 update of A(k+2:n,k+2:n) as +* +* A := A - ( (A(k) A(k+1))*D(k)**(-1) ) * (A(k) A(k+1))' +* +* where L(k) and L(k+1) are the k-th and (k+1)-th +* columns of L +* + D21 = A( K+1, K ) + D11 = A( K+1, K+1 ) / D21 + D22 = A( K, K ) / D21 + T = ONE / ( D11*D22-ONE ) + D21 = T / D21 +* + DO 60 J = K + 2, N +* + WK = D21*( D11*A( J, K )-A( J, K+1 ) ) + WKP1 = D21*( D22*A( J, K+1 )-A( J, K ) ) +* + DO 50 I = J, N + A( I, J ) = A( I, J ) - A( I, K )*WK - + $ A( I, K+1 )*WKP1 + 50 CONTINUE +* + A( J, K ) = WK + A( J, K+1 ) = WKP1 +* + 60 CONTINUE + END IF + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 40 +* + END IF +* + 70 CONTINUE +* + RETURN +* +* End of SSYTF2 +* + END diff --git a/costa/native/external/lapack/ssytrd.f b/costa/native/external/lapack/ssytrd.f new file mode 100644 index 000000000..fab61591a --- /dev/null +++ b/costa/native/external/lapack/ssytrd.f @@ -0,0 +1,295 @@ + SUBROUTINE SSYTRD( UPLO, N, A, LDA, D, E, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), D( * ), E( * ), TAU( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* SSYTRD reduces a real symmetric matrix A to real symmetric +* tridiagonal form T by an orthogonal similarity transformation: +* Q**T * A * Q = T. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the symmetric matrix A. If UPLO = 'U', the leading +* N-by-N upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* On exit, if UPLO = 'U', the diagonal and first superdiagonal +* of A are overwritten by the corresponding elements of the +* tridiagonal matrix T, and the elements above the first +* superdiagonal, with the array TAU, represent the orthogonal +* matrix Q as a product of elementary reflectors; if UPLO +* = 'L', the diagonal and first subdiagonal of A are over- +* written by the corresponding elements of the tridiagonal +* matrix T, and the elements below the first subdiagonal, with +* the array TAU, represent the orthogonal matrix Q as a product +* of elementary reflectors. See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* D (output) REAL array, dimension (N) +* The diagonal elements of the tridiagonal matrix T: +* D(i) = A(i,i). +* +* E (output) REAL array, dimension (N-1) +* The off-diagonal elements of the tridiagonal matrix T: +* E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. +* +* TAU (output) REAL array, dimension (N-1) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= 1. +* For optimum performance LWORK >= N*NB, where NB is the +* optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* If UPLO = 'U', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(n-1) . . . H(2) H(1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in +* A(1:i-1,i+1), and tau in TAU(i). +* +* If UPLO = 'L', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(1) H(2) . . . H(n-1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real scalar, and v is a real vector with +* v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), +* and tau in TAU(i). +* +* The contents of A on exit are illustrated by the following examples +* with n = 5: +* +* if UPLO = 'U': if UPLO = 'L': +* +* ( d e v2 v3 v4 ) ( d ) +* ( d e v3 v4 ) ( e d ) +* ( d e v4 ) ( v1 e d ) +* ( d e ) ( v1 v2 e d ) +* ( d ) ( v1 v2 v3 e d ) +* +* where d and e denote diagonal and off-diagonal elements of T, and vi +* denotes an element of the vector defining H(i). +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER I, IINFO, IWS, J, KK, LDWORK, LWKOPT, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL SLATRD, SSYR2K, SSYTD2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. +* + NB = ILAENV( 1, 'SSYTRD', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTRD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NX = N + IWS = 1 + IF( NB.GT.1 .AND. NB.LT.N ) THEN +* +* Determine when to cross over from blocked to unblocked code +* (last block is always handled by unblocked code). +* + NX = MAX( NB, ILAENV( 3, 'SSYTRD', UPLO, N, -1, -1, -1 ) ) + IF( NX.LT.N ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: determine the +* minimum value of NB, and reduce NB or force use of +* unblocked code by setting NX = N. +* + NB = MAX( LWORK / LDWORK, 1 ) + NBMIN = ILAENV( 2, 'SSYTRD', UPLO, N, -1, -1, -1 ) + IF( NB.LT.NBMIN ) + $ NX = N + END IF + ELSE + NX = N + END IF + ELSE + NB = 1 + END IF +* + IF( UPPER ) THEN +* +* Reduce the upper triangle of A. +* Columns 1:kk are handled by the unblocked method. +* + KK = N - ( ( N-NX+NB-1 ) / NB )*NB + DO 20 I = N - NB + 1, KK + 1, -NB +* +* Reduce columns i:i+nb-1 to tridiagonal form and form the +* matrix W which is needed to update the unreduced part of +* the matrix +* + CALL SLATRD( UPLO, I+NB-1, NB, A, LDA, E, TAU, WORK, + $ LDWORK ) +* +* Update the unreduced submatrix A(1:i-1,1:i-1), using an +* update of the form: A := A - V*W' - W*V' +* + CALL SSYR2K( UPLO, 'No transpose', I-1, NB, -ONE, A( 1, I ), + $ LDA, WORK, LDWORK, ONE, A, LDA ) +* +* Copy superdiagonal elements back into A, and diagonal +* elements into D +* + DO 10 J = I, I + NB - 1 + A( J-1, J ) = E( J-1 ) + D( J ) = A( J, J ) + 10 CONTINUE + 20 CONTINUE +* +* Use unblocked code to reduce the last or only block +* + CALL SSYTD2( UPLO, KK, A, LDA, D, E, TAU, IINFO ) + ELSE +* +* Reduce the lower triangle of A +* + DO 40 I = 1, N - NX, NB +* +* Reduce columns i:i+nb-1 to tridiagonal form and form the +* matrix W which is needed to update the unreduced part of +* the matrix +* + CALL SLATRD( UPLO, N-I+1, NB, A( I, I ), LDA, E( I ), + $ TAU( I ), WORK, LDWORK ) +* +* Update the unreduced submatrix A(i+ib:n,i+ib:n), using +* an update of the form: A := A - V*W' - W*V' +* + CALL SSYR2K( UPLO, 'No transpose', N-I-NB+1, NB, -ONE, + $ A( I+NB, I ), LDA, WORK( NB+1 ), LDWORK, ONE, + $ A( I+NB, I+NB ), LDA ) +* +* Copy subdiagonal elements back into A, and diagonal +* elements into D +* + DO 30 J = I, I + NB - 1 + A( J+1, J ) = E( J ) + D( J ) = A( J, J ) + 30 CONTINUE + 40 CONTINUE +* +* Use unblocked code to reduce the last or only block +* + CALL SSYTD2( UPLO, N-I+1, A( I, I ), LDA, D( I ), E( I ), + $ TAU( I ), IINFO ) + END IF +* + WORK( 1 ) = LWKOPT + RETURN +* +* End of SSYTRD +* + END diff --git a/costa/native/external/lapack/ssytrf.f b/costa/native/external/lapack/ssytrf.f new file mode 100644 index 000000000..10743da81 --- /dev/null +++ b/costa/native/external/lapack/ssytrf.f @@ -0,0 +1,288 @@ + SUBROUTINE SSYTRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SSYTRF computes the factorization of a real symmetric matrix A using +* the Bunch-Kaufman diagonal pivoting method. The form of the +* factorization is +* +* A = U*D*U**T or A = L*D*L**T +* +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices, and D is symmetric and block diagonal with +* 1-by-1 and 2-by-2 diagonal blocks. +* +* This is the blocked version of the algorithm, calling Level 3 BLAS. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the symmetric matrix A. If UPLO = 'U', the leading +* N-by-N upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, the block diagonal matrix D and the multipliers used +* to obtain the factor U or L (see below for further details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (output) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D. +* If IPIV(k) > 0, then rows and columns k and IPIV(k) were +* interchanged and D(k,k) is a 1-by-1 diagonal block. +* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of WORK. LWORK >=1. For best performance +* LWORK >= N*NB, where NB is the block size returned by ILAENV. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, D(i,i) is exactly zero. The factorization +* has been completed, but the block diagonal matrix D is +* exactly singular, and division by zero will occur if it +* is used to solve a system of equations. +* +* Further Details +* =============== +* +* If UPLO = 'U', then A = U*D*U', where +* U = P(n)*U(n)* ... *P(k)U(k)* ..., +* i.e., U is a product of terms P(k)*U(k), where k decreases from n to +* 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +* defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +* that if the diagonal block D(k) is of order s (s = 1 or 2), then +* +* ( I v 0 ) k-s +* U(k) = ( 0 I 0 ) s +* ( 0 0 I ) n-k +* k-s s n-k +* +* If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +* If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +* and A(k,k), and v overwrites A(1:k-2,k-1:k). +* +* If UPLO = 'L', then A = L*D*L', where +* L = P(1)*L(1)* ... *P(k)*L(k)* ..., +* i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +* n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +* defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +* that if the diagonal block D(k) is of order s (s = 1 or 2), then +* +* ( I 0 0 ) k-1 +* L(k) = ( 0 I 0 ) s +* ( 0 v I ) n-k-s+1 +* k-1 s n-k-s+1 +* +* If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +* If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +* and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER IINFO, IWS, J, K, KB, LDWORK, LWKOPT, NB, NBMIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SLASYF, SSYTF2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size +* + NB = ILAENV( 1, 'SSYTRF', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTRF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* + NBMIN = 2 + LDWORK = N + IF( NB.GT.1 .AND. NB.LT.N ) THEN + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN + NB = MAX( LWORK / LDWORK, 1 ) + NBMIN = MAX( 2, ILAENV( 2, 'SSYTRF', UPLO, N, -1, -1, -1 ) ) + END IF + ELSE + IWS = 1 + END IF + IF( NB.LT.NBMIN ) + $ NB = N +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U' using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* KB, where KB is the number of columns factorized by SLASYF; +* KB is either NB or NB-1, or K for the last block +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 40 +* + IF( K.GT.NB ) THEN +* +* Factorize columns k-kb+1:k of A and use blocked code to +* update columns 1:k-kb +* + CALL SLASYF( UPLO, K, NB, KB, A, LDA, IPIV, WORK, LDWORK, + $ IINFO ) + ELSE +* +* Use unblocked code to factorize columns 1:k of A +* + CALL SSYTF2( UPLO, K, A, LDA, IPIV, IINFO ) + KB = K + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO +* +* Decrease K and return to the start of the main loop +* + K = K - KB + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L' using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* KB, where KB is the number of columns factorized by SLASYF; +* KB is either NB or NB-1, or N-K+1 for the last block +* + K = 1 + 20 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 40 +* + IF( K.LE.N-NB ) THEN +* +* Factorize columns k:k+kb-1 of A and use blocked code to +* update columns k+kb:n +* + CALL SLASYF( UPLO, N-K+1, NB, KB, A( K, K ), LDA, IPIV( K ), + $ WORK, LDWORK, IINFO ) + ELSE +* +* Use unblocked code to factorize columns k:n of A +* + CALL SSYTF2( UPLO, N-K+1, A( K, K ), LDA, IPIV( K ), IINFO ) + KB = N - K + 1 + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + K - 1 +* +* Adjust IPIV +* + DO 30 J = K, K + KB - 1 + IF( IPIV( J ).GT.0 ) THEN + IPIV( J ) = IPIV( J ) + K - 1 + ELSE + IPIV( J ) = IPIV( J ) - K + 1 + END IF + 30 CONTINUE +* +* Increase K and return to the start of the main loop +* + K = K + KB + GO TO 20 +* + END IF +* + 40 CONTINUE + WORK( 1 ) = LWKOPT + RETURN +* +* End of SSYTRF +* + END diff --git a/costa/native/external/lapack/ssytri.f b/costa/native/external/lapack/ssytri.f new file mode 100644 index 000000000..ad8edc74d --- /dev/null +++ b/costa/native/external/lapack/ssytri.f @@ -0,0 +1,313 @@ + SUBROUTINE SSYTRI( UPLO, N, A, LDA, IPIV, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* SSYTRI computes the inverse of a real symmetric indefinite matrix +* A using the factorization A = U*D*U**T or A = L*D*L**T computed by +* SSYTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the details of the factorization are stored +* as an upper or lower triangular matrix. +* = 'U': Upper triangular, form is A = U*D*U**T; +* = 'L': Lower triangular, form is A = L*D*L**T. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the block diagonal matrix D and the multipliers +* used to obtain the factor U or L as computed by SSYTRF. +* +* On exit, if INFO = 0, the (symmetric) inverse of the original +* matrix. If UPLO = 'U', the upper triangular part of the +* inverse is formed and the part of A below the diagonal is not +* referenced; if UPLO = 'L' the lower triangular part of the +* inverse is formed and the part of A above the diagonal is +* not referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by SSYTRF. +* +* WORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +* inverse could not be computed. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER K, KP, KSTEP + REAL AK, AKKP1, AKP1, D, T, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SDOT + EXTERNAL LSAME, SDOT +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SSWAP, SSYMV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO 10 INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO 20 INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 20 CONTINUE + END IF + INFO = 0 +* + IF( UPPER ) THEN +* +* Compute inv(A) from the factorization A = U*D*U'. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 30 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 40 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + A( K, K ) = ONE / A( K, K ) +* +* Compute column K of the inverse. +* + IF( K.GT.1 ) THEN + CALL SCOPY( K-1, A( 1, K ), 1, WORK, 1 ) + CALL SSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K ), 1 ) + A( K, K ) = A( K, K ) - SDOT( K-1, WORK, 1, A( 1, K ), + $ 1 ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = ABS( A( K, K+1 ) ) + AK = A( K, K ) / T + AKP1 = A( K+1, K+1 ) / T + AKKP1 = A( K, K+1 ) / T + D = T*( AK*AKP1-ONE ) + A( K, K ) = AKP1 / D + A( K+1, K+1 ) = AK / D + A( K, K+1 ) = -AKKP1 / D +* +* Compute columns K and K+1 of the inverse. +* + IF( K.GT.1 ) THEN + CALL SCOPY( K-1, A( 1, K ), 1, WORK, 1 ) + CALL SSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K ), 1 ) + A( K, K ) = A( K, K ) - SDOT( K-1, WORK, 1, A( 1, K ), + $ 1 ) + A( K, K+1 ) = A( K, K+1 ) - + $ SDOT( K-1, A( 1, K ), 1, A( 1, K+1 ), 1 ) + CALL SCOPY( K-1, A( 1, K+1 ), 1, WORK, 1 ) + CALL SSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K+1 ), 1 ) + A( K+1, K+1 ) = A( K+1, K+1 ) - + $ SDOT( K-1, WORK, 1, A( 1, K+1 ), 1 ) + END IF + KSTEP = 2 + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the leading +* submatrix A(1:k+1,1:k+1) +* + CALL SSWAP( KP-1, A( 1, K ), 1, A( 1, KP ), 1 ) + CALL SSWAP( K-KP-1, A( KP+1, K ), 1, A( KP, KP+1 ), LDA ) + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = A( K, K+1 ) + A( K, K+1 ) = A( KP, K+1 ) + A( KP, K+1 ) = TEMP + END IF + END IF +* + K = K + KSTEP + GO TO 30 + 40 CONTINUE +* + ELSE +* +* Compute inv(A) from the factorization A = L*D*L'. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 50 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 60 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + A( K, K ) = ONE / A( K, K ) +* +* Compute column K of the inverse. +* + IF( K.LT.N ) THEN + CALL SCOPY( N-K, A( K+1, K ), 1, WORK, 1 ) + CALL SSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1, + $ ZERO, A( K+1, K ), 1 ) + A( K, K ) = A( K, K ) - SDOT( N-K, WORK, 1, A( K+1, K ), + $ 1 ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = ABS( A( K, K-1 ) ) + AK = A( K-1, K-1 ) / T + AKP1 = A( K, K ) / T + AKKP1 = A( K, K-1 ) / T + D = T*( AK*AKP1-ONE ) + A( K-1, K-1 ) = AKP1 / D + A( K, K ) = AK / D + A( K, K-1 ) = -AKKP1 / D +* +* Compute columns K-1 and K of the inverse. +* + IF( K.LT.N ) THEN + CALL SCOPY( N-K, A( K+1, K ), 1, WORK, 1 ) + CALL SSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1, + $ ZERO, A( K+1, K ), 1 ) + A( K, K ) = A( K, K ) - SDOT( N-K, WORK, 1, A( K+1, K ), + $ 1 ) + A( K, K-1 ) = A( K, K-1 ) - + $ SDOT( N-K, A( K+1, K ), 1, A( K+1, K-1 ), + $ 1 ) + CALL SCOPY( N-K, A( K+1, K-1 ), 1, WORK, 1 ) + CALL SSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1, + $ ZERO, A( K+1, K-1 ), 1 ) + A( K-1, K-1 ) = A( K-1, K-1 ) - + $ SDOT( N-K, WORK, 1, A( K+1, K-1 ), 1 ) + END IF + KSTEP = 2 + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the trailing +* submatrix A(k-1:n,k-1:n) +* + IF( KP.LT.N ) + $ CALL SSWAP( N-KP, A( KP+1, K ), 1, A( KP+1, KP ), 1 ) + CALL SSWAP( KP-K-1, A( K+1, K ), 1, A( KP, K+1 ), LDA ) + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = A( K, K-1 ) + A( K, K-1 ) = A( KP, K-1 ) + A( KP, K-1 ) = TEMP + END IF + END IF +* + K = K - KSTEP + GO TO 50 + 60 CONTINUE + END IF +* + RETURN +* +* End of SSYTRI +* + END diff --git a/costa/native/external/lapack/ssytrs.f b/costa/native/external/lapack/ssytrs.f new file mode 100644 index 000000000..d7525cd24 --- /dev/null +++ b/costa/native/external/lapack/ssytrs.f @@ -0,0 +1,370 @@ + SUBROUTINE SSYTRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* SSYTRS solves a system of linear equations A*X = B with a real +* symmetric matrix A using the factorization A = U*D*U**T or +* A = L*D*L**T computed by SSYTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the details of the factorization are stored +* as an upper or lower triangular matrix. +* = 'U': Upper triangular, form is A = U*D*U**T; +* = 'L': Lower triangular, form is A = L*D*L**T. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* A (input) REAL array, dimension (LDA,N) +* The block diagonal matrix D and the multipliers used to +* obtain the factor U or L as computed by SSYTRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by SSYTRF. +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KP + REAL AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, SGER, SSCAL, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*D*U'. +* +* First solve U*D*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 30 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL SGER( K-1, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + CALL SSCAL( NRHS, ONE / A( K, K ), B( K, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K-1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K-1 ) + $ CALL SSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in columns K-1 and K of A. +* + CALL SGER( K-2, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) + CALL SGER( K-2, NRHS, -ONE, A( 1, K-1 ), 1, B( K-1, 1 ), + $ LDB, B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = A( K-1, K ) + AKM1 = A( K-1, K-1 ) / AKM1K + AK = A( K, K ) / AKM1K + DENOM = AKM1*AK - ONE + DO 20 J = 1, NRHS + BKM1 = B( K-1, J ) / AKM1K + BK = B( K, J ) / AKM1K + B( K-1, J ) = ( AK*BKM1-BK ) / DENOM + B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM + 20 CONTINUE + K = K - 2 + END IF +* + GO TO 10 + 30 CONTINUE +* +* Next solve U'*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 40 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(U'(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL SGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, A( 1, K ), + $ 1, ONE, B( K, 1 ), LDB ) +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(U'(K+1)), where U(K+1) is the transformation +* stored in columns K and K+1 of A. +* + CALL SGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, A( 1, K ), + $ 1, ONE, B( K, 1 ), LDB ) + CALL SGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, + $ A( 1, K+1 ), 1, ONE, B( K+1, 1 ), LDB ) +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K + 2 + END IF +* + GO TO 40 + 50 CONTINUE +* + ELSE +* +* Solve A*X = B, where A = L*D*L'. +* +* First solve L*D*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 60 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 80 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL SGER( N-K, NRHS, -ONE, A( K+1, K ), 1, B( K, 1 ), + $ LDB, B( K+1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + CALL SSCAL( NRHS, ONE / A( K, K ), B( K, 1 ), LDB ) + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K+1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K+1 ) + $ CALL SSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.LT.N-1 ) THEN + CALL SGER( N-K-1, NRHS, -ONE, A( K+2, K ), 1, B( K, 1 ), + $ LDB, B( K+2, 1 ), LDB ) + CALL SGER( N-K-1, NRHS, -ONE, A( K+2, K+1 ), 1, + $ B( K+1, 1 ), LDB, B( K+2, 1 ), LDB ) + END IF +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = A( K+1, K ) + AKM1 = A( K, K ) / AKM1K + AK = A( K+1, K+1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 70 J = 1, NRHS + BKM1 = B( K, J ) / AKM1K + BK = B( K+1, J ) / AKM1K + B( K, J ) = ( AK*BKM1-BK ) / DENOM + B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + 70 CONTINUE + K = K + 2 + END IF +* + GO TO 60 + 80 CONTINUE +* +* Next solve L'*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 90 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 100 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(L'(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL SGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, A( K+1, K ), 1, ONE, B( K, 1 ), LDB ) +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(L'(K-1)), where L(K-1) is the transformation +* stored in columns K-1 and K of A. +* + IF( K.LT.N ) THEN + CALL SGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, A( K+1, K ), 1, ONE, B( K, 1 ), LDB ) + CALL SGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, A( K+1, K-1 ), 1, ONE, B( K-1, 1 ), + $ LDB ) + END IF +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 2 + END IF +* + GO TO 90 + 100 CONTINUE + END IF +* + RETURN +* +* End of SSYTRS +* + END diff --git a/costa/native/external/lapack/stbcon.f b/costa/native/external/lapack/stbcon.f new file mode 100644 index 000000000..650424eff --- /dev/null +++ b/costa/native/external/lapack/stbcon.f @@ -0,0 +1,198 @@ + SUBROUTINE STBCON( NORM, UPLO, DIAG, N, KD, AB, LDAB, RCOND, WORK, + $ IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER INFO, KD, LDAB, N + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL AB( LDAB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* STBCON estimates the reciprocal of the condition number of a +* triangular band matrix A, in either the 1-norm or the infinity-norm. +* +* The norm of A is computed and an estimate is obtained for +* norm(inv(A)), then the reciprocal of the condition number is +* computed as +* RCOND = 1 / ( norm(A) * norm(inv(A)) ). +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies whether the 1-norm condition number or the +* infinity-norm condition number is required: +* = '1' or 'O': 1-norm; +* = 'I': Infinity-norm. +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals or subdiagonals of the +* triangular band matrix A. KD >= 0. +* +* AB (input) REAL array, dimension (LDAB,N) +* The upper or lower triangular band matrix A, stored in the +* first kd+1 rows of the array. The j-th column of A is stored +* in the j-th column of the array AB as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* If DIAG = 'U', the diagonal elements of A are not referenced +* and are assumed to be 1. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* RCOND (output) REAL +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(norm(A) * norm(inv(A))). +* +* WORK (workspace) REAL array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, ONENRM, UPPER + CHARACTER NORMIN + INTEGER IX, KASE, KASE1 + REAL AINVNM, ANORM, SCALE, SMLNUM, XNORM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SLAMCH, SLANTB + EXTERNAL LSAME, ISAMAX, SLAMCH, SLANTB +* .. +* .. External Subroutines .. + EXTERNAL SLACON, SLATBS, SRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STBCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + END IF +* + RCOND = ZERO + SMLNUM = SLAMCH( 'Safe minimum' )*REAL( MAX( 1, N ) ) +* +* Compute the norm of the triangular matrix A. +* + ANORM = SLANTB( NORM, UPLO, DIAG, N, KD, AB, LDAB, WORK ) +* +* Continue only if ANORM > 0. +* + IF( ANORM.GT.ZERO ) THEN +* +* Estimate the norm of the inverse of A. +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 10 CONTINUE + CALL SLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(A). +* + CALL SLATBS( UPLO, 'No transpose', DIAG, NORMIN, N, KD, + $ AB, LDAB, WORK, SCALE, WORK( 2*N+1 ), INFO ) + ELSE +* +* Multiply by inv(A'). +* + CALL SLATBS( UPLO, 'Transpose', DIAG, NORMIN, N, KD, AB, + $ LDAB, WORK, SCALE, WORK( 2*N+1 ), INFO ) + END IF + NORMIN = 'Y' +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + IF( SCALE.NE.ONE ) THEN + IX = ISAMAX( N, WORK, 1 ) + XNORM = ABS( WORK( IX ) ) + IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL SRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / ANORM ) / AINVNM + END IF +* + 20 CONTINUE + RETURN +* +* End of STBCON +* + END diff --git a/costa/native/external/lapack/stbrfs.f b/costa/native/external/lapack/stbrfs.f new file mode 100644 index 000000000..00a50967d --- /dev/null +++ b/costa/native/external/lapack/stbrfs.f @@ -0,0 +1,381 @@ + SUBROUTINE STBRFS( UPLO, TRANS, DIAG, N, KD, NRHS, AB, LDAB, B, + $ LDB, X, LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, KD, LDAB, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL AB( LDAB, * ), B( LDB, * ), BERR( * ), + $ FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* STBRFS provides error bounds and backward error estimates for the +* solution to a system of linear equations with a triangular band +* coefficient matrix. +* +* The solution matrix X must be computed by STBTRS or some other +* means before entering this routine. STBRFS does not do iterative +* refinement because doing so cannot improve the backward error. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose = Transpose) +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals or subdiagonals of the +* triangular band matrix A. KD >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AB (input) REAL array, dimension (LDAB,N) +* The upper or lower triangular band matrix A, stored in the +* first kd+1 rows of the array. The j-th column of A is stored +* in the j-th column of the array AB as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* If DIAG = 'U', the diagonal elements of A are not referenced +* and are assumed to be 1. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* B (input) REAL array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input) REAL array, dimension (LDX,NRHS) +* The solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) REAL array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + CHARACTER TRANST + INTEGER I, J, K, KASE, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SLACON, STBMV, STBSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STBRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = KD + 2 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 250 J = 1, NRHS +* +* Compute residual R = B - op(A) * X, +* where op(A) = A or A', depending on TRANS. +* + CALL SCOPY( N, X( 1, J ), 1, WORK( N+1 ), 1 ) + CALL STBMV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, WORK( N+1 ), + $ 1 ) + CALL SAXPY( N, -ONE, B( 1, J ), 1, WORK( N+1 ), 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 20 I = 1, N + WORK( I ) = ABS( B( I, J ) ) + 20 CONTINUE +* + IF( NOTRAN ) THEN +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + IF( NOUNIT ) THEN + DO 40 K = 1, N + XK = ABS( X( K, J ) ) + DO 30 I = MAX( 1, K-KD ), K + WORK( I ) = WORK( I ) + + $ ABS( AB( KD+1+I-K, K ) )*XK + 30 CONTINUE + 40 CONTINUE + ELSE + DO 60 K = 1, N + XK = ABS( X( K, J ) ) + DO 50 I = MAX( 1, K-KD ), K - 1 + WORK( I ) = WORK( I ) + + $ ABS( AB( KD+1+I-K, K ) )*XK + 50 CONTINUE + WORK( K ) = WORK( K ) + XK + 60 CONTINUE + END IF + ELSE + IF( NOUNIT ) THEN + DO 80 K = 1, N + XK = ABS( X( K, J ) ) + DO 70 I = K, MIN( N, K+KD ) + WORK( I ) = WORK( I ) + ABS( AB( 1+I-K, K ) )*XK + 70 CONTINUE + 80 CONTINUE + ELSE + DO 100 K = 1, N + XK = ABS( X( K, J ) ) + DO 90 I = K + 1, MIN( N, K+KD ) + WORK( I ) = WORK( I ) + ABS( AB( 1+I-K, K ) )*XK + 90 CONTINUE + WORK( K ) = WORK( K ) + XK + 100 CONTINUE + END IF + END IF + ELSE +* +* Compute abs(A')*abs(X) + abs(B). +* + IF( UPPER ) THEN + IF( NOUNIT ) THEN + DO 120 K = 1, N + S = ZERO + DO 110 I = MAX( 1, K-KD ), K + S = S + ABS( AB( KD+1+I-K, K ) )* + $ ABS( X( I, J ) ) + 110 CONTINUE + WORK( K ) = WORK( K ) + S + 120 CONTINUE + ELSE + DO 140 K = 1, N + S = ABS( X( K, J ) ) + DO 130 I = MAX( 1, K-KD ), K - 1 + S = S + ABS( AB( KD+1+I-K, K ) )* + $ ABS( X( I, J ) ) + 130 CONTINUE + WORK( K ) = WORK( K ) + S + 140 CONTINUE + END IF + ELSE + IF( NOUNIT ) THEN + DO 160 K = 1, N + S = ZERO + DO 150 I = K, MIN( N, K+KD ) + S = S + ABS( AB( 1+I-K, K ) )*ABS( X( I, J ) ) + 150 CONTINUE + WORK( K ) = WORK( K ) + S + 160 CONTINUE + ELSE + DO 180 K = 1, N + S = ABS( X( K, J ) ) + DO 170 I = K + 1, MIN( N, K+KD ) + S = S + ABS( AB( 1+I-K, K ) )*ABS( X( I, J ) ) + 170 CONTINUE + WORK( K ) = WORK( K ) + S + 180 CONTINUE + END IF + END IF + END IF + S = ZERO + DO 190 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 190 CONTINUE + BERR( J ) = S +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use SLACON to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 200 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 200 CONTINUE +* + KASE = 0 + 210 CONTINUE + CALL SLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)'). +* + CALL STBSV( UPLO, TRANST, DIAG, N, KD, AB, LDAB, + $ WORK( N+1 ), 1 ) + DO 220 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 220 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 230 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 230 CONTINUE + CALL STBSV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, + $ WORK( N+1 ), 1 ) + END IF + GO TO 210 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 240 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 240 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 250 CONTINUE +* + RETURN +* +* End of STBRFS +* + END diff --git a/costa/native/external/lapack/stbtrs.f b/costa/native/external/lapack/stbtrs.f new file mode 100644 index 000000000..e13ed7040 --- /dev/null +++ b/costa/native/external/lapack/stbtrs.f @@ -0,0 +1,163 @@ + SUBROUTINE STBTRS( UPLO, TRANS, DIAG, N, KD, NRHS, AB, LDAB, B, + $ LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, KD, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* STBTRS solves a triangular system of the form +* +* A * X = B or A**T * X = B, +* +* where A is a triangular band matrix of order N, and B is an +* N-by NRHS matrix. A check is made to verify that A is nonsingular. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* TRANS (input) CHARACTER*1 +* Specifies the form the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose = Transpose) +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals or subdiagonals of the +* triangular band matrix A. KD >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AB (input) REAL array, dimension (LDAB,N) +* The upper or lower triangular band matrix A, stored in the +* first kd+1 rows of AB. The j-th column of A is stored +* in the j-th column of the array AB as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* If DIAG = 'U', the diagonal elements of A are not referenced +* and are assumed to be 1. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, if INFO = 0, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the i-th diagonal element of A is zero, +* indicating that the matrix is singular and the +* solutions X have not been computed. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL STBSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOUNIT = LSAME( DIAG, 'N' ) + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT. + $ LSAME( TRANS, 'T' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STBTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check for singularity. +* + IF( NOUNIT ) THEN + IF( UPPER ) THEN + DO 10 INFO = 1, N + IF( AB( KD+1, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE + DO 20 INFO = 1, N + IF( AB( 1, INFO ).EQ.ZERO ) + $ RETURN + 20 CONTINUE + END IF + END IF + INFO = 0 +* +* Solve A * X = B or A' * X = B. +* + DO 30 J = 1, NRHS + CALL STBSV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, B( 1, J ), 1 ) + 30 CONTINUE +* + RETURN +* +* End of STBTRS +* + END diff --git a/costa/native/external/lapack/stgevc.f b/costa/native/external/lapack/stgevc.f new file mode 100644 index 000000000..b361a5ecc --- /dev/null +++ b/costa/native/external/lapack/stgevc.f @@ -0,0 +1,1146 @@ + SUBROUTINE STGEVC( SIDE, HOWMNY, SELECT, N, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, MM, M, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER HOWMNY, SIDE + INTEGER INFO, LDA, LDB, LDVL, LDVR, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + REAL A( LDA, * ), B( LDB, * ), VL( LDVL, * ), + $ VR( LDVR, * ), WORK( * ) +* .. +* +* +* Purpose +* ======= +* +* STGEVC computes some or all of the right and/or left generalized +* eigenvectors of a pair of real upper triangular matrices (A,B). +* +* The right generalized eigenvector x and the left generalized +* eigenvector y of (A,B) corresponding to a generalized eigenvalue +* w are defined by: +* +* (A - wB) * x = 0 and y**H * (A - wB) = 0 +* +* where y**H denotes the conjugate tranpose of y. +* +* If an eigenvalue w is determined by zero diagonal elements of both A +* and B, a unit vector is returned as the corresponding eigenvector. +* +* If all eigenvectors are requested, the routine may either return +* the matrices X and/or Y of right or left eigenvectors of (A,B), or +* the products Z*X and/or Q*Y, where Z and Q are input orthogonal +* matrices. If (A,B) was obtained from the generalized real-Schur +* factorization of an original pair of matrices +* (A0,B0) = (Q*A*Z**H,Q*B*Z**H), +* then Z*X and Q*Y are the matrices of right or left eigenvectors of +* A. +* +* A must be block upper triangular, with 1-by-1 and 2-by-2 diagonal +* blocks. Corresponding to each 2-by-2 diagonal block is a complex +* conjugate pair of eigenvalues and eigenvectors; only one +* eigenvector of the pair is computed, namely the one corresponding +* to the eigenvalue with positive imaginary part. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'R': compute right eigenvectors only; +* = 'L': compute left eigenvectors only; +* = 'B': compute both right and left eigenvectors. +* +* HOWMNY (input) CHARACTER*1 +* = 'A': compute all right and/or left eigenvectors; +* = 'B': compute all right and/or left eigenvectors, and +* backtransform them using the input matrices supplied +* in VR and/or VL; +* = 'S': compute selected right and/or left eigenvectors, +* specified by the logical array SELECT. +* +* SELECT (input) LOGICAL array, dimension (N) +* If HOWMNY='S', SELECT specifies the eigenvectors to be +* computed. +* If HOWMNY='A' or 'B', SELECT is not referenced. +* To select the real eigenvector corresponding to the real +* eigenvalue w(j), SELECT(j) must be set to .TRUE. To select +* the complex eigenvector corresponding to a complex conjugate +* pair w(j) and w(j+1), either SELECT(j) or SELECT(j+1) must +* be set to .TRUE.. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* A (input) REAL array, dimension (LDA,N) +* The upper quasi-triangular matrix A. +* +* LDA (input) INTEGER +* The leading dimension of array A. LDA >= max(1, N). +* +* B (input) REAL array, dimension (LDB,N) +* The upper triangular matrix B. If A has a 2-by-2 diagonal +* block, then the corresponding 2-by-2 block of B must be +* diagonal with positive elements. +* +* LDB (input) INTEGER +* The leading dimension of array B. LDB >= max(1,N). +* +* VL (input/output) REAL array, dimension (LDVL,MM) +* On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must +* contain an N-by-N matrix Q (usually the orthogonal matrix Q +* of left Schur vectors returned by SHGEQZ). +* On exit, if SIDE = 'L' or 'B', VL contains: +* if HOWMNY = 'A', the matrix Y of left eigenvectors of (A,B); +* if HOWMNY = 'B', the matrix Q*Y; +* if HOWMNY = 'S', the left eigenvectors of (A,B) specified by +* SELECT, stored consecutively in the columns of +* VL, in the same order as their eigenvalues. +* If SIDE = 'R', VL is not referenced. +* +* A complex eigenvector corresponding to a complex eigenvalue +* is stored in two consecutive columns, the first holding the +* real part, and the second the imaginary part. +* +* LDVL (input) INTEGER +* The leading dimension of array VL. +* LDVL >= max(1,N) if SIDE = 'L' or 'B'; LDVL >= 1 otherwise. +* +* VR (input/output) REAL array, dimension (LDVR,MM) +* On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must +* contain an N-by-N matrix Q (usually the orthogonal matrix Z +* of right Schur vectors returned by SHGEQZ). +* On exit, if SIDE = 'R' or 'B', VR contains: +* if HOWMNY = 'A', the matrix X of right eigenvectors of (A,B); +* if HOWMNY = 'B', the matrix Z*X; +* if HOWMNY = 'S', the right eigenvectors of (A,B) specified by +* SELECT, stored consecutively in the columns of +* VR, in the same order as their eigenvalues. +* If SIDE = 'L', VR is not referenced. +* +* A complex eigenvector corresponding to a complex eigenvalue +* is stored in two consecutive columns, the first holding the +* real part and the second the imaginary part. +* +* LDVR (input) INTEGER +* The leading dimension of the array VR. +* LDVR >= max(1,N) if SIDE = 'R' or 'B'; LDVR >= 1 otherwise. +* +* MM (input) INTEGER +* The number of columns in the arrays VL and/or VR. MM >= M. +* +* M (output) INTEGER +* The number of columns in the arrays VL and/or VR actually +* used to store the eigenvectors. If HOWMNY = 'A' or 'B', M +* is set to N. Each selected real eigenvector occupies one +* column and each selected complex eigenvector occupies two +* columns. +* +* WORK (workspace) REAL array, dimension (6*N) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: the 2-by-2 block (INFO:INFO+1) does not have a complex +* eigenvalue. +* +* Further Details +* =============== +* +* Allocation of workspace: +* ---------- -- --------- +* +* WORK( j ) = 1-norm of j-th column of A, above the diagonal +* WORK( N+j ) = 1-norm of j-th column of B, above the diagonal +* WORK( 2*N+1:3*N ) = real part of eigenvector +* WORK( 3*N+1:4*N ) = imaginary part of eigenvector +* WORK( 4*N+1:5*N ) = real part of back-transformed eigenvector +* WORK( 5*N+1:6*N ) = imaginary part of back-transformed eigenvector +* +* Rowwise vs. columnwise solution methods: +* ------- -- ---------- -------- ------- +* +* Finding a generalized eigenvector consists basically of solving the +* singular triangular system +* +* (A - w B) x = 0 (for right) or: (A - w B)**H y = 0 (for left) +* +* Consider finding the i-th right eigenvector (assume all eigenvalues +* are real). The equation to be solved is: +* n i +* 0 = sum C(j,k) v(k) = sum C(j,k) v(k) for j = i,. . .,1 +* k=j k=j +* +* where C = (A - w B) (The components v(i+1:n) are 0.) +* +* The "rowwise" method is: +* +* (1) v(i) := 1 +* for j = i-1,. . .,1: +* i +* (2) compute s = - sum C(j,k) v(k) and +* k=j+1 +* +* (3) v(j) := s / C(j,j) +* +* Step 2 is sometimes called the "dot product" step, since it is an +* inner product between the j-th row and the portion of the eigenvector +* that has been computed so far. +* +* The "columnwise" method consists basically in doing the sums +* for all the rows in parallel. As each v(j) is computed, the +* contribution of v(j) times the j-th column of C is added to the +* partial sums. Since FORTRAN arrays are stored columnwise, this has +* the advantage that at each step, the elements of C that are accessed +* are adjacent to one another, whereas with the rowwise method, the +* elements accessed at a step are spaced LDA (and LDB) words apart. +* +* When finding left eigenvectors, the matrix in question is the +* transpose of the one in storage, so the rowwise method then +* actually accesses columns of A and B at each step, and so is the +* preferred method. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, SAFETY + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, + $ SAFETY = 1.0E+2 ) +* .. +* .. Local Scalars .. + LOGICAL COMPL, COMPR, IL2BY2, ILABAD, ILALL, ILBACK, + $ ILBBAD, ILCOMP, ILCPLX, LSA, LSB + INTEGER I, IBEG, IEIG, IEND, IHWMNY, IINFO, IM, ISIDE, + $ J, JA, JC, JE, JR, JW, NA, NW + REAL ACOEF, ACOEFA, ANORM, ASCALE, BCOEFA, BCOEFI, + $ BCOEFR, BIG, BIGNUM, BNORM, BSCALE, CIM2A, + $ CIM2B, CIMAGA, CIMAGB, CRE2A, CRE2B, CREALA, + $ CREALB, DMIN, SAFMIN, SALFAR, SBETA, SCALE, + $ SMALL, TEMP, TEMP2, TEMP2I, TEMP2R, ULP, XMAX, + $ XSCALE +* .. +* .. Local Arrays .. + REAL BDIAG( 2 ), SUM( 2, 2 ), SUMA( 2, 2 ), + $ SUMB( 2, 2 ) +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, SLABAD, SLACPY, SLAG2, SLALN2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* +* Decode and Test the input parameters +* + IF( LSAME( HOWMNY, 'A' ) ) THEN + IHWMNY = 1 + ILALL = .TRUE. + ILBACK = .FALSE. + ELSE IF( LSAME( HOWMNY, 'S' ) ) THEN + IHWMNY = 2 + ILALL = .FALSE. + ILBACK = .FALSE. + ELSE IF( LSAME( HOWMNY, 'B' ) .OR. LSAME( HOWMNY, 'T' ) ) THEN + IHWMNY = 3 + ILALL = .TRUE. + ILBACK = .TRUE. + ELSE + IHWMNY = -1 + ILALL = .TRUE. + END IF +* + IF( LSAME( SIDE, 'R' ) ) THEN + ISIDE = 1 + COMPL = .FALSE. + COMPR = .TRUE. + ELSE IF( LSAME( SIDE, 'L' ) ) THEN + ISIDE = 2 + COMPL = .TRUE. + COMPR = .FALSE. + ELSE IF( LSAME( SIDE, 'B' ) ) THEN + ISIDE = 3 + COMPL = .TRUE. + COMPR = .TRUE. + ELSE + ISIDE = -1 + END IF +* + INFO = 0 + IF( ISIDE.LT.0 ) THEN + INFO = -1 + ELSE IF( IHWMNY.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STGEVC', -INFO ) + RETURN + END IF +* +* Count the number of eigenvectors to be computed +* + IF( .NOT.ILALL ) THEN + IM = 0 + ILCPLX = .FALSE. + DO 10 J = 1, N + IF( ILCPLX ) THEN + ILCPLX = .FALSE. + GO TO 10 + END IF + IF( J.LT.N ) THEN + IF( A( J+1, J ).NE.ZERO ) + $ ILCPLX = .TRUE. + END IF + IF( ILCPLX ) THEN + IF( SELECT( J ) .OR. SELECT( J+1 ) ) + $ IM = IM + 2 + ELSE + IF( SELECT( J ) ) + $ IM = IM + 1 + END IF + 10 CONTINUE + ELSE + IM = N + END IF +* +* Check 2-by-2 diagonal blocks of A, B +* + ILABAD = .FALSE. + ILBBAD = .FALSE. + DO 20 J = 1, N - 1 + IF( A( J+1, J ).NE.ZERO ) THEN + IF( B( J, J ).EQ.ZERO .OR. B( J+1, J+1 ).EQ.ZERO .OR. + $ B( J, J+1 ).NE.ZERO )ILBBAD = .TRUE. + IF( J.LT.N-1 ) THEN + IF( A( J+2, J+1 ).NE.ZERO ) + $ ILABAD = .TRUE. + END IF + END IF + 20 CONTINUE +* + IF( ILABAD ) THEN + INFO = -5 + ELSE IF( ILBBAD ) THEN + INFO = -7 + ELSE IF( COMPL .AND. LDVL.LT.N .OR. LDVL.LT.1 ) THEN + INFO = -10 + ELSE IF( COMPR .AND. LDVR.LT.N .OR. LDVR.LT.1 ) THEN + INFO = -12 + ELSE IF( MM.LT.IM ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STGEVC', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + M = IM + IF( N.EQ.0 ) + $ RETURN +* +* Machine Constants +* + SAFMIN = SLAMCH( 'Safe minimum' ) + BIG = ONE / SAFMIN + CALL SLABAD( SAFMIN, BIG ) + ULP = SLAMCH( 'Epsilon' )*SLAMCH( 'Base' ) + SMALL = SAFMIN*N / ULP + BIG = ONE / SMALL + BIGNUM = ONE / ( SAFMIN*N ) +* +* Compute the 1-norm of each column of the strictly upper triangular +* part (i.e., excluding all elements belonging to the diagonal +* blocks) of A and B to check for possible overflow in the +* triangular solver. +* + ANORM = ABS( A( 1, 1 ) ) + IF( N.GT.1 ) + $ ANORM = ANORM + ABS( A( 2, 1 ) ) + BNORM = ABS( B( 1, 1 ) ) + WORK( 1 ) = ZERO + WORK( N+1 ) = ZERO +* + DO 50 J = 2, N + TEMP = ZERO + TEMP2 = ZERO + IF( A( J, J-1 ).EQ.ZERO ) THEN + IEND = J - 1 + ELSE + IEND = J - 2 + END IF + DO 30 I = 1, IEND + TEMP = TEMP + ABS( A( I, J ) ) + TEMP2 = TEMP2 + ABS( B( I, J ) ) + 30 CONTINUE + WORK( J ) = TEMP + WORK( N+J ) = TEMP2 + DO 40 I = IEND + 1, MIN( J+1, N ) + TEMP = TEMP + ABS( A( I, J ) ) + TEMP2 = TEMP2 + ABS( B( I, J ) ) + 40 CONTINUE + ANORM = MAX( ANORM, TEMP ) + BNORM = MAX( BNORM, TEMP2 ) + 50 CONTINUE +* + ASCALE = ONE / MAX( ANORM, SAFMIN ) + BSCALE = ONE / MAX( BNORM, SAFMIN ) +* +* Left eigenvectors +* + IF( COMPL ) THEN + IEIG = 0 +* +* Main loop over eigenvalues +* + ILCPLX = .FALSE. + DO 220 JE = 1, N +* +* Skip this iteration if (a) HOWMNY='S' and SELECT=.FALSE., or +* (b) this would be the second of a complex pair. +* Check for complex eigenvalue, so as to be sure of which +* entry(-ies) of SELECT to look at. +* + IF( ILCPLX ) THEN + ILCPLX = .FALSE. + GO TO 220 + END IF + NW = 1 + IF( JE.LT.N ) THEN + IF( A( JE+1, JE ).NE.ZERO ) THEN + ILCPLX = .TRUE. + NW = 2 + END IF + END IF + IF( ILALL ) THEN + ILCOMP = .TRUE. + ELSE IF( ILCPLX ) THEN + ILCOMP = SELECT( JE ) .OR. SELECT( JE+1 ) + ELSE + ILCOMP = SELECT( JE ) + END IF + IF( .NOT.ILCOMP ) + $ GO TO 220 +* +* Decide if (a) singular pencil, (b) real eigenvalue, or +* (c) complex eigenvalue. +* + IF( .NOT.ILCPLX ) THEN + IF( ABS( A( JE, JE ) ).LE.SAFMIN .AND. + $ ABS( B( JE, JE ) ).LE.SAFMIN ) THEN +* +* Singular matrix pencil -- return unit eigenvector +* + IEIG = IEIG + 1 + DO 60 JR = 1, N + VL( JR, IEIG ) = ZERO + 60 CONTINUE + VL( IEIG, IEIG ) = ONE + GO TO 220 + END IF + END IF +* +* Clear vector +* + DO 70 JR = 1, NW*N + WORK( 2*N+JR ) = ZERO + 70 CONTINUE +* T +* Compute coefficients in ( a A - b B ) y = 0 +* a is ACOEF +* b is BCOEFR + i*BCOEFI +* + IF( .NOT.ILCPLX ) THEN +* +* Real eigenvalue +* + TEMP = ONE / MAX( ABS( A( JE, JE ) )*ASCALE, + $ ABS( B( JE, JE ) )*BSCALE, SAFMIN ) + SALFAR = ( TEMP*A( JE, JE ) )*ASCALE + SBETA = ( TEMP*B( JE, JE ) )*BSCALE + ACOEF = SBETA*ASCALE + BCOEFR = SALFAR*BSCALE + BCOEFI = ZERO +* +* Scale to avoid underflow +* + SCALE = ONE + LSA = ABS( SBETA ).GE.SAFMIN .AND. ABS( ACOEF ).LT.SMALL + LSB = ABS( SALFAR ).GE.SAFMIN .AND. ABS( BCOEFR ).LT. + $ SMALL + IF( LSA ) + $ SCALE = ( SMALL / ABS( SBETA ) )*MIN( ANORM, BIG ) + IF( LSB ) + $ SCALE = MAX( SCALE, ( SMALL / ABS( SALFAR ) )* + $ MIN( BNORM, BIG ) ) + IF( LSA .OR. LSB ) THEN + SCALE = MIN( SCALE, ONE / + $ ( SAFMIN*MAX( ONE, ABS( ACOEF ), + $ ABS( BCOEFR ) ) ) ) + IF( LSA ) THEN + ACOEF = ASCALE*( SCALE*SBETA ) + ELSE + ACOEF = SCALE*ACOEF + END IF + IF( LSB ) THEN + BCOEFR = BSCALE*( SCALE*SALFAR ) + ELSE + BCOEFR = SCALE*BCOEFR + END IF + END IF + ACOEFA = ABS( ACOEF ) + BCOEFA = ABS( BCOEFR ) +* +* First component is 1 +* + WORK( 2*N+JE ) = ONE + XMAX = ONE + ELSE +* +* Complex eigenvalue +* + CALL SLAG2( A( JE, JE ), LDA, B( JE, JE ), LDB, + $ SAFMIN*SAFETY, ACOEF, TEMP, BCOEFR, TEMP2, + $ BCOEFI ) + BCOEFI = -BCOEFI + IF( BCOEFI.EQ.ZERO ) THEN + INFO = JE + RETURN + END IF +* +* Scale to avoid over/underflow +* + ACOEFA = ABS( ACOEF ) + BCOEFA = ABS( BCOEFR ) + ABS( BCOEFI ) + SCALE = ONE + IF( ACOEFA*ULP.LT.SAFMIN .AND. ACOEFA.GE.SAFMIN ) + $ SCALE = ( SAFMIN / ULP ) / ACOEFA + IF( BCOEFA*ULP.LT.SAFMIN .AND. BCOEFA.GE.SAFMIN ) + $ SCALE = MAX( SCALE, ( SAFMIN / ULP ) / BCOEFA ) + IF( SAFMIN*ACOEFA.GT.ASCALE ) + $ SCALE = ASCALE / ( SAFMIN*ACOEFA ) + IF( SAFMIN*BCOEFA.GT.BSCALE ) + $ SCALE = MIN( SCALE, BSCALE / ( SAFMIN*BCOEFA ) ) + IF( SCALE.NE.ONE ) THEN + ACOEF = SCALE*ACOEF + ACOEFA = ABS( ACOEF ) + BCOEFR = SCALE*BCOEFR + BCOEFI = SCALE*BCOEFI + BCOEFA = ABS( BCOEFR ) + ABS( BCOEFI ) + END IF +* +* Compute first two components of eigenvector +* + TEMP = ACOEF*A( JE+1, JE ) + TEMP2R = ACOEF*A( JE, JE ) - BCOEFR*B( JE, JE ) + TEMP2I = -BCOEFI*B( JE, JE ) + IF( ABS( TEMP ).GT.ABS( TEMP2R )+ABS( TEMP2I ) ) THEN + WORK( 2*N+JE ) = ONE + WORK( 3*N+JE ) = ZERO + WORK( 2*N+JE+1 ) = -TEMP2R / TEMP + WORK( 3*N+JE+1 ) = -TEMP2I / TEMP + ELSE + WORK( 2*N+JE+1 ) = ONE + WORK( 3*N+JE+1 ) = ZERO + TEMP = ACOEF*A( JE, JE+1 ) + WORK( 2*N+JE ) = ( BCOEFR*B( JE+1, JE+1 )-ACOEF* + $ A( JE+1, JE+1 ) ) / TEMP + WORK( 3*N+JE ) = BCOEFI*B( JE+1, JE+1 ) / TEMP + END IF + XMAX = MAX( ABS( WORK( 2*N+JE ) )+ABS( WORK( 3*N+JE ) ), + $ ABS( WORK( 2*N+JE+1 ) )+ABS( WORK( 3*N+JE+1 ) ) ) + END IF +* + DMIN = MAX( ULP*ACOEFA*ANORM, ULP*BCOEFA*BNORM, SAFMIN ) +* +* T +* Triangular solve of (a A - b B) y = 0 +* +* T +* (rowwise in (a A - b B) , or columnwise in (a A - b B) ) +* + IL2BY2 = .FALSE. +* + DO 160 J = JE + NW, N + IF( IL2BY2 ) THEN + IL2BY2 = .FALSE. + GO TO 160 + END IF +* + NA = 1 + BDIAG( 1 ) = B( J, J ) + IF( J.LT.N ) THEN + IF( A( J+1, J ).NE.ZERO ) THEN + IL2BY2 = .TRUE. + BDIAG( 2 ) = B( J+1, J+1 ) + NA = 2 + END IF + END IF +* +* Check whether scaling is necessary for dot products +* + XSCALE = ONE / MAX( ONE, XMAX ) + TEMP = MAX( WORK( J ), WORK( N+J ), + $ ACOEFA*WORK( J )+BCOEFA*WORK( N+J ) ) + IF( IL2BY2 ) + $ TEMP = MAX( TEMP, WORK( J+1 ), WORK( N+J+1 ), + $ ACOEFA*WORK( J+1 )+BCOEFA*WORK( N+J+1 ) ) + IF( TEMP.GT.BIGNUM*XSCALE ) THEN + DO 90 JW = 0, NW - 1 + DO 80 JR = JE, J - 1 + WORK( ( JW+2 )*N+JR ) = XSCALE* + $ WORK( ( JW+2 )*N+JR ) + 80 CONTINUE + 90 CONTINUE + XMAX = XMAX*XSCALE + END IF +* +* Compute dot products +* +* j-1 +* SUM = sum conjg( a*A(k,j) - b*B(k,j) )*x(k) +* k=je +* +* To reduce the op count, this is done as +* +* _ j-1 _ j-1 +* a*conjg( sum A(k,j)*x(k) ) - b*conjg( sum B(k,j)*x(k) ) +* k=je k=je +* +* which may cause underflow problems if A or B are close +* to underflow. (E.g., less than SMALL.) +* +* +* A series of compiler directives to defeat vectorization +* for the next loop +* +*$PL$ CMCHAR=' ' +CDIR$ NEXTSCALAR +C$DIR SCALAR +CDIR$ NEXT SCALAR +CVD$L NOVECTOR +CDEC$ NOVECTOR +CVD$ NOVECTOR +*VDIR NOVECTOR +*VOCL LOOP,SCALAR +CIBM PREFER SCALAR +*$PL$ CMCHAR='*' +* + DO 120 JW = 1, NW +* +*$PL$ CMCHAR=' ' +CDIR$ NEXTSCALAR +C$DIR SCALAR +CDIR$ NEXT SCALAR +CVD$L NOVECTOR +CDEC$ NOVECTOR +CVD$ NOVECTOR +*VDIR NOVECTOR +*VOCL LOOP,SCALAR +CIBM PREFER SCALAR +*$PL$ CMCHAR='*' +* + DO 110 JA = 1, NA + SUMA( JA, JW ) = ZERO + SUMB( JA, JW ) = ZERO +* + DO 100 JR = JE, J - 1 + SUMA( JA, JW ) = SUMA( JA, JW ) + + $ A( JR, J+JA-1 )* + $ WORK( ( JW+1 )*N+JR ) + SUMB( JA, JW ) = SUMB( JA, JW ) + + $ B( JR, J+JA-1 )* + $ WORK( ( JW+1 )*N+JR ) + 100 CONTINUE + 110 CONTINUE + 120 CONTINUE +* +*$PL$ CMCHAR=' ' +CDIR$ NEXTSCALAR +C$DIR SCALAR +CDIR$ NEXT SCALAR +CVD$L NOVECTOR +CDEC$ NOVECTOR +CVD$ NOVECTOR +*VDIR NOVECTOR +*VOCL LOOP,SCALAR +CIBM PREFER SCALAR +*$PL$ CMCHAR='*' +* + DO 130 JA = 1, NA + IF( ILCPLX ) THEN + SUM( JA, 1 ) = -ACOEF*SUMA( JA, 1 ) + + $ BCOEFR*SUMB( JA, 1 ) - + $ BCOEFI*SUMB( JA, 2 ) + SUM( JA, 2 ) = -ACOEF*SUMA( JA, 2 ) + + $ BCOEFR*SUMB( JA, 2 ) + + $ BCOEFI*SUMB( JA, 1 ) + ELSE + SUM( JA, 1 ) = -ACOEF*SUMA( JA, 1 ) + + $ BCOEFR*SUMB( JA, 1 ) + END IF + 130 CONTINUE +* +* T +* Solve ( a A - b B ) y = SUM(,) +* with scaling and perturbation of the denominator +* + CALL SLALN2( .TRUE., NA, NW, DMIN, ACOEF, A( J, J ), LDA, + $ BDIAG( 1 ), BDIAG( 2 ), SUM, 2, BCOEFR, + $ BCOEFI, WORK( 2*N+J ), N, SCALE, TEMP, + $ IINFO ) + IF( SCALE.LT.ONE ) THEN + DO 150 JW = 0, NW - 1 + DO 140 JR = JE, J - 1 + WORK( ( JW+2 )*N+JR ) = SCALE* + $ WORK( ( JW+2 )*N+JR ) + 140 CONTINUE + 150 CONTINUE + XMAX = SCALE*XMAX + END IF + XMAX = MAX( XMAX, TEMP ) + 160 CONTINUE +* +* Copy eigenvector to VL, back transforming if +* HOWMNY='B'. +* + IEIG = IEIG + 1 + IF( ILBACK ) THEN + DO 170 JW = 0, NW - 1 + CALL SGEMV( 'N', N, N+1-JE, ONE, VL( 1, JE ), LDVL, + $ WORK( ( JW+2 )*N+JE ), 1, ZERO, + $ WORK( ( JW+4 )*N+1 ), 1 ) + 170 CONTINUE + CALL SLACPY( ' ', N, NW, WORK( 4*N+1 ), N, VL( 1, JE ), + $ LDVL ) + IBEG = 1 + ELSE + CALL SLACPY( ' ', N, NW, WORK( 2*N+1 ), N, VL( 1, IEIG ), + $ LDVL ) + IBEG = JE + END IF +* +* Scale eigenvector +* + XMAX = ZERO + IF( ILCPLX ) THEN + DO 180 J = IBEG, N + XMAX = MAX( XMAX, ABS( VL( J, IEIG ) )+ + $ ABS( VL( J, IEIG+1 ) ) ) + 180 CONTINUE + ELSE + DO 190 J = IBEG, N + XMAX = MAX( XMAX, ABS( VL( J, IEIG ) ) ) + 190 CONTINUE + END IF +* + IF( XMAX.GT.SAFMIN ) THEN + XSCALE = ONE / XMAX +* + DO 210 JW = 0, NW - 1 + DO 200 JR = IBEG, N + VL( JR, IEIG+JW ) = XSCALE*VL( JR, IEIG+JW ) + 200 CONTINUE + 210 CONTINUE + END IF + IEIG = IEIG + NW - 1 +* + 220 CONTINUE + END IF +* +* Right eigenvectors +* + IF( COMPR ) THEN + IEIG = IM + 1 +* +* Main loop over eigenvalues +* + ILCPLX = .FALSE. + DO 500 JE = N, 1, -1 +* +* Skip this iteration if (a) HOWMNY='S' and SELECT=.FALSE., or +* (b) this would be the second of a complex pair. +* Check for complex eigenvalue, so as to be sure of which +* entry(-ies) of SELECT to look at -- if complex, SELECT(JE) +* or SELECT(JE-1). +* If this is a complex pair, the 2-by-2 diagonal block +* corresponding to the eigenvalue is in rows/columns JE-1:JE +* + IF( ILCPLX ) THEN + ILCPLX = .FALSE. + GO TO 500 + END IF + NW = 1 + IF( JE.GT.1 ) THEN + IF( A( JE, JE-1 ).NE.ZERO ) THEN + ILCPLX = .TRUE. + NW = 2 + END IF + END IF + IF( ILALL ) THEN + ILCOMP = .TRUE. + ELSE IF( ILCPLX ) THEN + ILCOMP = SELECT( JE ) .OR. SELECT( JE-1 ) + ELSE + ILCOMP = SELECT( JE ) + END IF + IF( .NOT.ILCOMP ) + $ GO TO 500 +* +* Decide if (a) singular pencil, (b) real eigenvalue, or +* (c) complex eigenvalue. +* + IF( .NOT.ILCPLX ) THEN + IF( ABS( A( JE, JE ) ).LE.SAFMIN .AND. + $ ABS( B( JE, JE ) ).LE.SAFMIN ) THEN +* +* Singular matrix pencil -- unit eigenvector +* + IEIG = IEIG - 1 + DO 230 JR = 1, N + VR( JR, IEIG ) = ZERO + 230 CONTINUE + VR( IEIG, IEIG ) = ONE + GO TO 500 + END IF + END IF +* +* Clear vector +* + DO 250 JW = 0, NW - 1 + DO 240 JR = 1, N + WORK( ( JW+2 )*N+JR ) = ZERO + 240 CONTINUE + 250 CONTINUE +* +* Compute coefficients in ( a A - b B ) x = 0 +* a is ACOEF +* b is BCOEFR + i*BCOEFI +* + IF( .NOT.ILCPLX ) THEN +* +* Real eigenvalue +* + TEMP = ONE / MAX( ABS( A( JE, JE ) )*ASCALE, + $ ABS( B( JE, JE ) )*BSCALE, SAFMIN ) + SALFAR = ( TEMP*A( JE, JE ) )*ASCALE + SBETA = ( TEMP*B( JE, JE ) )*BSCALE + ACOEF = SBETA*ASCALE + BCOEFR = SALFAR*BSCALE + BCOEFI = ZERO +* +* Scale to avoid underflow +* + SCALE = ONE + LSA = ABS( SBETA ).GE.SAFMIN .AND. ABS( ACOEF ).LT.SMALL + LSB = ABS( SALFAR ).GE.SAFMIN .AND. ABS( BCOEFR ).LT. + $ SMALL + IF( LSA ) + $ SCALE = ( SMALL / ABS( SBETA ) )*MIN( ANORM, BIG ) + IF( LSB ) + $ SCALE = MAX( SCALE, ( SMALL / ABS( SALFAR ) )* + $ MIN( BNORM, BIG ) ) + IF( LSA .OR. LSB ) THEN + SCALE = MIN( SCALE, ONE / + $ ( SAFMIN*MAX( ONE, ABS( ACOEF ), + $ ABS( BCOEFR ) ) ) ) + IF( LSA ) THEN + ACOEF = ASCALE*( SCALE*SBETA ) + ELSE + ACOEF = SCALE*ACOEF + END IF + IF( LSB ) THEN + BCOEFR = BSCALE*( SCALE*SALFAR ) + ELSE + BCOEFR = SCALE*BCOEFR + END IF + END IF + ACOEFA = ABS( ACOEF ) + BCOEFA = ABS( BCOEFR ) +* +* First component is 1 +* + WORK( 2*N+JE ) = ONE + XMAX = ONE +* +* Compute contribution from column JE of A and B to sum +* (See "Further Details", above.) +* + DO 260 JR = 1, JE - 1 + WORK( 2*N+JR ) = BCOEFR*B( JR, JE ) - + $ ACOEF*A( JR, JE ) + 260 CONTINUE + ELSE +* +* Complex eigenvalue +* + CALL SLAG2( A( JE-1, JE-1 ), LDA, B( JE-1, JE-1 ), LDB, + $ SAFMIN*SAFETY, ACOEF, TEMP, BCOEFR, TEMP2, + $ BCOEFI ) + IF( BCOEFI.EQ.ZERO ) THEN + INFO = JE - 1 + RETURN + END IF +* +* Scale to avoid over/underflow +* + ACOEFA = ABS( ACOEF ) + BCOEFA = ABS( BCOEFR ) + ABS( BCOEFI ) + SCALE = ONE + IF( ACOEFA*ULP.LT.SAFMIN .AND. ACOEFA.GE.SAFMIN ) + $ SCALE = ( SAFMIN / ULP ) / ACOEFA + IF( BCOEFA*ULP.LT.SAFMIN .AND. BCOEFA.GE.SAFMIN ) + $ SCALE = MAX( SCALE, ( SAFMIN / ULP ) / BCOEFA ) + IF( SAFMIN*ACOEFA.GT.ASCALE ) + $ SCALE = ASCALE / ( SAFMIN*ACOEFA ) + IF( SAFMIN*BCOEFA.GT.BSCALE ) + $ SCALE = MIN( SCALE, BSCALE / ( SAFMIN*BCOEFA ) ) + IF( SCALE.NE.ONE ) THEN + ACOEF = SCALE*ACOEF + ACOEFA = ABS( ACOEF ) + BCOEFR = SCALE*BCOEFR + BCOEFI = SCALE*BCOEFI + BCOEFA = ABS( BCOEFR ) + ABS( BCOEFI ) + END IF +* +* Compute first two components of eigenvector +* and contribution to sums +* + TEMP = ACOEF*A( JE, JE-1 ) + TEMP2R = ACOEF*A( JE, JE ) - BCOEFR*B( JE, JE ) + TEMP2I = -BCOEFI*B( JE, JE ) + IF( ABS( TEMP ).GE.ABS( TEMP2R )+ABS( TEMP2I ) ) THEN + WORK( 2*N+JE ) = ONE + WORK( 3*N+JE ) = ZERO + WORK( 2*N+JE-1 ) = -TEMP2R / TEMP + WORK( 3*N+JE-1 ) = -TEMP2I / TEMP + ELSE + WORK( 2*N+JE-1 ) = ONE + WORK( 3*N+JE-1 ) = ZERO + TEMP = ACOEF*A( JE-1, JE ) + WORK( 2*N+JE ) = ( BCOEFR*B( JE-1, JE-1 )-ACOEF* + $ A( JE-1, JE-1 ) ) / TEMP + WORK( 3*N+JE ) = BCOEFI*B( JE-1, JE-1 ) / TEMP + END IF +* + XMAX = MAX( ABS( WORK( 2*N+JE ) )+ABS( WORK( 3*N+JE ) ), + $ ABS( WORK( 2*N+JE-1 ) )+ABS( WORK( 3*N+JE-1 ) ) ) +* +* Compute contribution from columns JE and JE-1 +* of A and B to the sums. +* + CREALA = ACOEF*WORK( 2*N+JE-1 ) + CIMAGA = ACOEF*WORK( 3*N+JE-1 ) + CREALB = BCOEFR*WORK( 2*N+JE-1 ) - + $ BCOEFI*WORK( 3*N+JE-1 ) + CIMAGB = BCOEFI*WORK( 2*N+JE-1 ) + + $ BCOEFR*WORK( 3*N+JE-1 ) + CRE2A = ACOEF*WORK( 2*N+JE ) + CIM2A = ACOEF*WORK( 3*N+JE ) + CRE2B = BCOEFR*WORK( 2*N+JE ) - BCOEFI*WORK( 3*N+JE ) + CIM2B = BCOEFI*WORK( 2*N+JE ) + BCOEFR*WORK( 3*N+JE ) + DO 270 JR = 1, JE - 2 + WORK( 2*N+JR ) = -CREALA*A( JR, JE-1 ) + + $ CREALB*B( JR, JE-1 ) - + $ CRE2A*A( JR, JE ) + CRE2B*B( JR, JE ) + WORK( 3*N+JR ) = -CIMAGA*A( JR, JE-1 ) + + $ CIMAGB*B( JR, JE-1 ) - + $ CIM2A*A( JR, JE ) + CIM2B*B( JR, JE ) + 270 CONTINUE + END IF +* + DMIN = MAX( ULP*ACOEFA*ANORM, ULP*BCOEFA*BNORM, SAFMIN ) +* +* Columnwise triangular solve of (a A - b B) x = 0 +* + IL2BY2 = .FALSE. + DO 370 J = JE - NW, 1, -1 +* +* If a 2-by-2 block, is in position j-1:j, wait until +* next iteration to process it (when it will be j:j+1) +* + IF( .NOT.IL2BY2 .AND. J.GT.1 ) THEN + IF( A( J, J-1 ).NE.ZERO ) THEN + IL2BY2 = .TRUE. + GO TO 370 + END IF + END IF + BDIAG( 1 ) = B( J, J ) + IF( IL2BY2 ) THEN + NA = 2 + BDIAG( 2 ) = B( J+1, J+1 ) + ELSE + NA = 1 + END IF +* +* Compute x(j) (and x(j+1), if 2-by-2 block) +* + CALL SLALN2( .FALSE., NA, NW, DMIN, ACOEF, A( J, J ), + $ LDA, BDIAG( 1 ), BDIAG( 2 ), WORK( 2*N+J ), + $ N, BCOEFR, BCOEFI, SUM, 2, SCALE, TEMP, + $ IINFO ) + IF( SCALE.LT.ONE ) THEN +* + DO 290 JW = 0, NW - 1 + DO 280 JR = 1, JE + WORK( ( JW+2 )*N+JR ) = SCALE* + $ WORK( ( JW+2 )*N+JR ) + 280 CONTINUE + 290 CONTINUE + END IF + XMAX = MAX( SCALE*XMAX, TEMP ) +* + DO 310 JW = 1, NW + DO 300 JA = 1, NA + WORK( ( JW+1 )*N+J+JA-1 ) = SUM( JA, JW ) + 300 CONTINUE + 310 CONTINUE +* +* w = w + x(j)*(a A(*,j) - b B(*,j) ) with scaling +* + IF( J.GT.1 ) THEN +* +* Check whether scaling is necessary for sum. +* + XSCALE = ONE / MAX( ONE, XMAX ) + TEMP = ACOEFA*WORK( J ) + BCOEFA*WORK( N+J ) + IF( IL2BY2 ) + $ TEMP = MAX( TEMP, ACOEFA*WORK( J+1 )+BCOEFA* + $ WORK( N+J+1 ) ) + TEMP = MAX( TEMP, ACOEFA, BCOEFA ) + IF( TEMP.GT.BIGNUM*XSCALE ) THEN +* + DO 330 JW = 0, NW - 1 + DO 320 JR = 1, JE + WORK( ( JW+2 )*N+JR ) = XSCALE* + $ WORK( ( JW+2 )*N+JR ) + 320 CONTINUE + 330 CONTINUE + XMAX = XMAX*XSCALE + END IF +* +* Compute the contributions of the off-diagonals of +* column j (and j+1, if 2-by-2 block) of A and B to the +* sums. +* +* + DO 360 JA = 1, NA + IF( ILCPLX ) THEN + CREALA = ACOEF*WORK( 2*N+J+JA-1 ) + CIMAGA = ACOEF*WORK( 3*N+J+JA-1 ) + CREALB = BCOEFR*WORK( 2*N+J+JA-1 ) - + $ BCOEFI*WORK( 3*N+J+JA-1 ) + CIMAGB = BCOEFI*WORK( 2*N+J+JA-1 ) + + $ BCOEFR*WORK( 3*N+J+JA-1 ) + DO 340 JR = 1, J - 1 + WORK( 2*N+JR ) = WORK( 2*N+JR ) - + $ CREALA*A( JR, J+JA-1 ) + + $ CREALB*B( JR, J+JA-1 ) + WORK( 3*N+JR ) = WORK( 3*N+JR ) - + $ CIMAGA*A( JR, J+JA-1 ) + + $ CIMAGB*B( JR, J+JA-1 ) + 340 CONTINUE + ELSE + CREALA = ACOEF*WORK( 2*N+J+JA-1 ) + CREALB = BCOEFR*WORK( 2*N+J+JA-1 ) + DO 350 JR = 1, J - 1 + WORK( 2*N+JR ) = WORK( 2*N+JR ) - + $ CREALA*A( JR, J+JA-1 ) + + $ CREALB*B( JR, J+JA-1 ) + 350 CONTINUE + END IF + 360 CONTINUE + END IF +* + IL2BY2 = .FALSE. + 370 CONTINUE +* +* Copy eigenvector to VR, back transforming if +* HOWMNY='B'. +* + IEIG = IEIG - NW + IF( ILBACK ) THEN +* + DO 410 JW = 0, NW - 1 + DO 380 JR = 1, N + WORK( ( JW+4 )*N+JR ) = WORK( ( JW+2 )*N+1 )* + $ VR( JR, 1 ) + 380 CONTINUE +* +* A series of compiler directives to defeat +* vectorization for the next loop +* +* + DO 400 JC = 2, JE + DO 390 JR = 1, N + WORK( ( JW+4 )*N+JR ) = WORK( ( JW+4 )*N+JR ) + + $ WORK( ( JW+2 )*N+JC )*VR( JR, JC ) + 390 CONTINUE + 400 CONTINUE + 410 CONTINUE +* + DO 430 JW = 0, NW - 1 + DO 420 JR = 1, N + VR( JR, IEIG+JW ) = WORK( ( JW+4 )*N+JR ) + 420 CONTINUE + 430 CONTINUE +* + IEND = N + ELSE + DO 450 JW = 0, NW - 1 + DO 440 JR = 1, N + VR( JR, IEIG+JW ) = WORK( ( JW+2 )*N+JR ) + 440 CONTINUE + 450 CONTINUE +* + IEND = JE + END IF +* +* Scale eigenvector +* + XMAX = ZERO + IF( ILCPLX ) THEN + DO 460 J = 1, IEND + XMAX = MAX( XMAX, ABS( VR( J, IEIG ) )+ + $ ABS( VR( J, IEIG+1 ) ) ) + 460 CONTINUE + ELSE + DO 470 J = 1, IEND + XMAX = MAX( XMAX, ABS( VR( J, IEIG ) ) ) + 470 CONTINUE + END IF +* + IF( XMAX.GT.SAFMIN ) THEN + XSCALE = ONE / XMAX + DO 490 JW = 0, NW - 1 + DO 480 JR = 1, IEND + VR( JR, IEIG+JW ) = XSCALE*VR( JR, IEIG+JW ) + 480 CONTINUE + 490 CONTINUE + END IF + 500 CONTINUE + END IF +* + RETURN +* +* End of STGEVC +* + END diff --git a/costa/native/external/lapack/stgex2.f b/costa/native/external/lapack/stgex2.f new file mode 100644 index 000000000..b92ea0a46 --- /dev/null +++ b/costa/native/external/lapack/stgex2.f @@ -0,0 +1,582 @@ + SUBROUTINE STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, J1, N1, N2, WORK, LWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + LOGICAL WANTQ, WANTZ + INTEGER INFO, J1, LDA, LDB, LDQ, LDZ, LWORK, N, N1, N2 +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* STGEX2 swaps adjacent diagonal blocks (A11, B11) and (A22, B22) +* of size 1-by-1 or 2-by-2 in an upper (quasi) triangular matrix pair +* (A, B) by an orthogonal equivalence transformation. +* +* (A, B) must be in generalized real Schur canonical form (as returned +* by SGGES), i.e. A is block upper triangular with 1-by-1 and 2-by-2 +* diagonal blocks. B is upper triangular. +* +* Optionally, the matrices Q and Z of generalized Schur vectors are +* updated. +* +* Q(in) * A(in) * Z(in)' = Q(out) * A(out) * Z(out)' +* Q(in) * B(in) * Z(in)' = Q(out) * B(out) * Z(out)' +* +* +* Arguments +* ========= +* +* WANTQ (input) LOGICAL +* .TRUE. : update the left transformation matrix Q; +* .FALSE.: do not update Q. +* +* WANTZ (input) LOGICAL +* .TRUE. : update the right transformation matrix Z; +* .FALSE.: do not update Z. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* A (input/output) REAL arrays, dimensions (LDA,N) +* On entry, the matrix A in the pair (A, B). +* On exit, the updated matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) REAL arrays, dimensions (LDB,N) +* On entry, the matrix B in the pair (A, B). +* On exit, the updated matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* Q (input/output) REAL array, dimension (LDZ,N) +* On entry, if WANTQ = .TRUE., the orthogonal matrix Q. +* On exit, the updated matrix Q. +* Not referenced if WANTQ = .FALSE.. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= 1. +* If WANTQ = .TRUE., LDQ >= N. +* +* Z (input/output) REAL array, dimension (LDZ,N) +* On entry, if WANTZ =.TRUE., the orthogonal matrix Z. +* On exit, the updated matrix Z. +* Not referenced if WANTZ = .FALSE.. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1. +* If WANTZ = .TRUE., LDZ >= N. +* +* J1 (input) INTEGER +* The index to the first block (A11, B11). 1 <= J1 <= N. +* +* N1 (input) INTEGER +* The order of the first block (A11, B11). N1 = 0, 1 or 2. +* +* N2 (input) INTEGER +* The order of the second block (A22, B22). N2 = 0, 1 or 2. +* +* WORK (workspace) REAL array, dimension (LWORK). +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* LWORK >= MAX( N*(N2+N1), (N2+N1)*(N2+N1)*2 ) +* +* INFO (output) INTEGER +* =0: Successful exit +* >0: If INFO = 1, the transformed matrix (A, B) would be +* too far from generalized Schur form; the blocks are +* not swapped and (A, B) and (Q, Z) are unchanged. +* The problem of swapping is too ill-conditioned. +* <0: If INFO = -16: LWORK is too small. Appropriate value +* for LWORK is returned in WORK(1). +* +* Further Details +* =============== +* +* Based on contributions by +* Bo Kagstrom and Peter Poromaa, Department of Computing Science, +* Umea University, S-901 87 Umea, Sweden. +* +* In the current code both weak and strong stability tests are +* performed. The user can omit the strong stability test by changing +* the internal logical parameter WANDS to .FALSE.. See ref. [2] for +* details. +* +* [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the +* Generalized Real Schur Form of a Regular Matrix Pair (A, B), in +* M.S. Moonen et al (eds), Linear Algebra for Large Scale and +* Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. +* +* [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified +* Eigenvalues of a Regular Matrix Pair (A, B) and Condition +* Estimation: Theory, Algorithms and Software, +* Report UMINF - 94.04, Department of Computing Science, Umea +* University, S-901 87 Umea, Sweden, 1994. Also as LAPACK Working +* Note 87. To appear in Numerical Algorithms, 1996. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL TEN + PARAMETER ( TEN = 1.0E+01 ) + INTEGER LDST + PARAMETER ( LDST = 4 ) + LOGICAL WANDS + PARAMETER ( WANDS = .TRUE. ) +* .. +* .. Local Scalars .. + LOGICAL STRONG, WEAK + INTEGER I, IDUM, LINFO, M + REAL BQRA21, BRQA21, DDUM, DNORM, DSCALE, DSUM, EPS, + $ F, G, SA, SB, SCALE, SMLNUM, SS, THRESH, WS +* .. +* .. Local Arrays .. + INTEGER IWORK( LDST ) + REAL AI( 2 ), AR( 2 ), BE( 2 ), IR( LDST, LDST ), + $ IRCOP( LDST, LDST ), LI( LDST, LDST ), + $ LICOP( LDST, LDST ), S( LDST, LDST ), + $ SCPY( LDST, LDST ), T( LDST, LDST ), + $ TAUL( LDST ), TAUR( LDST ), TCPY( LDST, LDST ) +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMM, SGEQR2, SGERQ2, SLACPY, SLAGV2, + $ SLARTG, SLASSQ, SORG2R, SORGR2, SORM2R, SORMR2, + $ SROT, SSCAL, STGSY2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.LE.1 .OR. N1.LE.0 .OR. N2.LE.0 ) + $ RETURN + IF( N1.GT.N .OR. ( J1+N1 ).GT.N ) + $ RETURN + M = N1 + N2 + IF( LWORK.LT.MAX( N*M, M*M*2 ) ) THEN + INFO = -16 + WORK( 1 ) = MAX( N*M, M*M*2 ) + RETURN + END IF +* + WEAK = .FALSE. + STRONG = .FALSE. +* +* Make a local copy of selected block +* + CALL SCOPY( LDST*LDST, ZERO, 0, LI, 1 ) + CALL SCOPY( LDST*LDST, ZERO, 0, IR, 1 ) + CALL SLACPY( 'Full', M, M, A( J1, J1 ), LDA, S, LDST ) + CALL SLACPY( 'Full', M, M, B( J1, J1 ), LDB, T, LDST ) +* +* Compute threshold for testing acceptance of swapping. +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) / EPS + DSCALE = ZERO + DSUM = ONE + CALL SLACPY( 'Full', M, M, S, LDST, WORK, M ) + CALL SLASSQ( M*M, WORK, 1, DSCALE, DSUM ) + CALL SLACPY( 'Full', M, M, T, LDST, WORK, M ) + CALL SLASSQ( M*M, WORK, 1, DSCALE, DSUM ) + DNORM = DSCALE*SQRT( DSUM ) + THRESH = MAX( TEN*EPS*DNORM, SMLNUM ) +* + IF( M.EQ.2 ) THEN +* +* CASE 1: Swap 1-by-1 and 1-by-1 blocks. +* +* Compute orthogonal QL and RQ that swap 1-by-1 and 1-by-1 blocks +* using Givens rotations and perform the swap tentatively. +* + F = S( 2, 2 )*T( 1, 1 ) - T( 2, 2 )*S( 1, 1 ) + G = S( 2, 2 )*T( 1, 2 ) - T( 2, 2 )*S( 1, 2 ) + SB = ABS( T( 2, 2 ) ) + SA = ABS( S( 2, 2 ) ) + CALL SLARTG( F, G, IR( 1, 2 ), IR( 1, 1 ), DDUM ) + IR( 2, 1 ) = -IR( 1, 2 ) + IR( 2, 2 ) = IR( 1, 1 ) + CALL SROT( 2, S( 1, 1 ), 1, S( 1, 2 ), 1, IR( 1, 1 ), + $ IR( 2, 1 ) ) + CALL SROT( 2, T( 1, 1 ), 1, T( 1, 2 ), 1, IR( 1, 1 ), + $ IR( 2, 1 ) ) + IF( SA.GE.SB ) THEN + CALL SLARTG( S( 1, 1 ), S( 2, 1 ), LI( 1, 1 ), LI( 2, 1 ), + $ DDUM ) + ELSE + CALL SLARTG( T( 1, 1 ), T( 2, 1 ), LI( 1, 1 ), LI( 2, 1 ), + $ DDUM ) + END IF + CALL SROT( 2, S( 1, 1 ), LDST, S( 2, 1 ), LDST, LI( 1, 1 ), + $ LI( 2, 1 ) ) + CALL SROT( 2, T( 1, 1 ), LDST, T( 2, 1 ), LDST, LI( 1, 1 ), + $ LI( 2, 1 ) ) + LI( 2, 2 ) = LI( 1, 1 ) + LI( 1, 2 ) = -LI( 2, 1 ) +* +* Weak stability test: +* |S21| + |T21| <= O(EPS * F-norm((S, T))) +* + WS = ABS( S( 2, 1 ) ) + ABS( T( 2, 1 ) ) + WEAK = WS.LE.THRESH + IF( .NOT.WEAK ) + $ GO TO 70 +* + IF( WANDS ) THEN +* +* Strong stability test: +* F-norm((A-QL'*S*QR, B-QL'*T*QR)) <= O(EPS*F-norm((A,B))) +* + CALL SLACPY( 'Full', M, M, A( J1, J1 ), LDA, WORK( M*M+1 ), + $ M ) + CALL SGEMM( 'N', 'N', M, M, M, ONE, LI, LDST, S, LDST, ZERO, + $ WORK, M ) + CALL SGEMM( 'N', 'T', M, M, M, -ONE, WORK, M, IR, LDST, ONE, + $ WORK( M*M+1 ), M ) + DSCALE = ZERO + DSUM = ONE + CALL SLASSQ( M*M, WORK( M*M+1 ), 1, DSCALE, DSUM ) +* + CALL SLACPY( 'Full', M, M, B( J1, J1 ), LDB, WORK( M*M+1 ), + $ M ) + CALL SGEMM( 'N', 'N', M, M, M, ONE, LI, LDST, T, LDST, ZERO, + $ WORK, M ) + CALL SGEMM( 'N', 'T', M, M, M, -ONE, WORK, M, IR, LDST, ONE, + $ WORK( M*M+1 ), M ) + CALL SLASSQ( M*M, WORK( M*M+1 ), 1, DSCALE, DSUM ) + SS = DSCALE*SQRT( DSUM ) + STRONG = SS.LE.THRESH + IF( .NOT.STRONG ) + $ GO TO 70 + END IF +* +* Update (A(J1:J1+M-1, M+J1:N), B(J1:J1+M-1, M+J1:N)) and +* (A(1:J1-1, J1:J1+M), B(1:J1-1, J1:J1+M)). +* + CALL SROT( J1+1, A( 1, J1 ), 1, A( 1, J1+1 ), 1, IR( 1, 1 ), + $ IR( 2, 1 ) ) + CALL SROT( J1+1, B( 1, J1 ), 1, B( 1, J1+1 ), 1, IR( 1, 1 ), + $ IR( 2, 1 ) ) + CALL SROT( N-J1+1, A( J1, J1 ), LDA, A( J1+1, J1 ), LDA, + $ LI( 1, 1 ), LI( 2, 1 ) ) + CALL SROT( N-J1+1, B( J1, J1 ), LDB, B( J1+1, J1 ), LDB, + $ LI( 1, 1 ), LI( 2, 1 ) ) +* +* Set N1-by-N2 (2,1) - blocks to ZERO. +* + A( J1+1, J1 ) = ZERO + B( J1+1, J1 ) = ZERO +* +* Accumulate transformations into Q and Z if requested. +* + IF( WANTZ ) + $ CALL SROT( N, Z( 1, J1 ), 1, Z( 1, J1+1 ), 1, IR( 1, 1 ), + $ IR( 2, 1 ) ) + IF( WANTQ ) + $ CALL SROT( N, Q( 1, J1 ), 1, Q( 1, J1+1 ), 1, LI( 1, 1 ), + $ LI( 2, 1 ) ) +* +* Exit with INFO = 0 if swap was successfully performed. +* + RETURN +* + ELSE +* +* CASE 2: Swap 1-by-1 and 2-by-2 blocks, or 2-by-2 +* and 2-by-2 blocks. +* +* Solve the generalized Sylvester equation +* S11 * R - L * S22 = SCALE * S12 +* T11 * R - L * T22 = SCALE * T12 +* for R and L. Solutions in LI and IR. +* + CALL SLACPY( 'Full', N1, N2, T( 1, N1+1 ), LDST, LI, LDST ) + CALL SLACPY( 'Full', N1, N2, S( 1, N1+1 ), LDST, + $ IR( N2+1, N1+1 ), LDST ) + CALL STGSY2( 'N', 0, N1, N2, S, LDST, S( N1+1, N1+1 ), LDST, + $ IR( N2+1, N1+1 ), LDST, T, LDST, T( N1+1, N1+1 ), + $ LDST, LI, LDST, SCALE, DSUM, DSCALE, IWORK, IDUM, + $ LINFO ) +* +* Compute orthogonal matrix QL: +* +* QL' * LI = [ TL ] +* [ 0 ] +* where +* LI = [ -L ] +* [ SCALE * identity(N2) ] +* + DO 10 I = 1, N2 + CALL SSCAL( N1, -ONE, LI( 1, I ), 1 ) + LI( N1+I, I ) = SCALE + 10 CONTINUE + CALL SGEQR2( M, N2, LI, LDST, TAUL, WORK, LINFO ) + IF( LINFO.NE.0 ) + $ GO TO 70 + CALL SORG2R( M, M, N2, LI, LDST, TAUL, WORK, LINFO ) + IF( LINFO.NE.0 ) + $ GO TO 70 +* +* Compute orthogonal matrix RQ: +* +* IR * RQ' = [ 0 TR], +* +* where IR = [ SCALE * identity(N1), R ] +* + DO 20 I = 1, N1 + IR( N2+I, I ) = SCALE + 20 CONTINUE + CALL SGERQ2( N1, M, IR( N2+1, 1 ), LDST, TAUR, WORK, LINFO ) + IF( LINFO.NE.0 ) + $ GO TO 70 + CALL SORGR2( M, M, N1, IR, LDST, TAUR, WORK, LINFO ) + IF( LINFO.NE.0 ) + $ GO TO 70 +* +* Perform the swapping tentatively: +* + CALL SGEMM( 'T', 'N', M, M, M, ONE, LI, LDST, S, LDST, ZERO, + $ WORK, M ) + CALL SGEMM( 'N', 'T', M, M, M, ONE, WORK, M, IR, LDST, ZERO, S, + $ LDST ) + CALL SGEMM( 'T', 'N', M, M, M, ONE, LI, LDST, T, LDST, ZERO, + $ WORK, M ) + CALL SGEMM( 'N', 'T', M, M, M, ONE, WORK, M, IR, LDST, ZERO, T, + $ LDST ) + CALL SLACPY( 'F', M, M, S, LDST, SCPY, LDST ) + CALL SLACPY( 'F', M, M, T, LDST, TCPY, LDST ) + CALL SLACPY( 'F', M, M, IR, LDST, IRCOP, LDST ) + CALL SLACPY( 'F', M, M, LI, LDST, LICOP, LDST ) +* +* Triangularize the B-part by an RQ factorization. +* Apply transformation (from left) to A-part, giving S. +* + CALL SGERQ2( M, M, T, LDST, TAUR, WORK, LINFO ) + IF( LINFO.NE.0 ) + $ GO TO 70 + CALL SORMR2( 'R', 'T', M, M, M, T, LDST, TAUR, S, LDST, WORK, + $ LINFO ) + IF( LINFO.NE.0 ) + $ GO TO 70 + CALL SORMR2( 'L', 'N', M, M, M, T, LDST, TAUR, IR, LDST, WORK, + $ LINFO ) + IF( LINFO.NE.0 ) + $ GO TO 70 +* +* Compute F-norm(S21) in BRQA21. (T21 is 0.) +* + DSCALE = ZERO + DSUM = ONE + DO 30 I = 1, N2 + CALL SLASSQ( N1, S( N2+1, I ), 1, DSCALE, DSUM ) + 30 CONTINUE + BRQA21 = DSCALE*SQRT( DSUM ) +* +* Triangularize the B-part by a QR factorization. +* Apply transformation (from right) to A-part, giving S. +* + CALL SGEQR2( M, M, TCPY, LDST, TAUL, WORK, LINFO ) + IF( LINFO.NE.0 ) + $ GO TO 70 + CALL SORM2R( 'L', 'T', M, M, M, TCPY, LDST, TAUL, SCPY, LDST, + $ WORK, INFO ) + CALL SORM2R( 'R', 'N', M, M, M, TCPY, LDST, TAUL, LICOP, LDST, + $ WORK, INFO ) + IF( LINFO.NE.0 ) + $ GO TO 70 +* +* Compute F-norm(S21) in BQRA21. (T21 is 0.) +* + DSCALE = ZERO + DSUM = ONE + DO 40 I = 1, N2 + CALL SLASSQ( N1, SCPY( N2+1, I ), 1, DSCALE, DSUM ) + 40 CONTINUE + BQRA21 = DSCALE*SQRT( DSUM ) +* +* Decide which method to use. +* Weak stability test: +* F-norm(S21) <= O(EPS * F-norm((S, T))) +* + IF( BQRA21.LE.BRQA21 .AND. BQRA21.LE.THRESH ) THEN + CALL SLACPY( 'F', M, M, SCPY, LDST, S, LDST ) + CALL SLACPY( 'F', M, M, TCPY, LDST, T, LDST ) + CALL SLACPY( 'F', M, M, IRCOP, LDST, IR, LDST ) + CALL SLACPY( 'F', M, M, LICOP, LDST, LI, LDST ) + ELSE IF( BRQA21.GE.THRESH ) THEN + GO TO 70 + END IF +* +* Set lower triangle of B-part to zero +* + DO 50 I = 2, M + CALL SCOPY( M-I+1, ZERO, 0, T( I, I-1 ), 1 ) + 50 CONTINUE +* + IF( WANDS ) THEN +* +* Strong stability test: +* F-norm((A-QL*S*QR', B-QL*T*QR')) <= O(EPS*F-norm((A,B))) +* + CALL SLACPY( 'Full', M, M, A( J1, J1 ), LDA, WORK( M*M+1 ), + $ M ) + CALL SGEMM( 'N', 'N', M, M, M, ONE, LI, LDST, S, LDST, ZERO, + $ WORK, M ) + CALL SGEMM( 'N', 'N', M, M, M, -ONE, WORK, M, IR, LDST, ONE, + $ WORK( M*M+1 ), M ) + DSCALE = ZERO + DSUM = ONE + CALL SLASSQ( M*M, WORK( M*M+1 ), 1, DSCALE, DSUM ) +* + CALL SLACPY( 'Full', M, M, B( J1, J1 ), LDB, WORK( M*M+1 ), + $ M ) + CALL SGEMM( 'N', 'N', M, M, M, ONE, LI, LDST, T, LDST, ZERO, + $ WORK, M ) + CALL SGEMM( 'N', 'N', M, M, M, -ONE, WORK, M, IR, LDST, ONE, + $ WORK( M*M+1 ), M ) + CALL SLASSQ( M*M, WORK( M*M+1 ), 1, DSCALE, DSUM ) + SS = DSCALE*SQRT( DSUM ) + STRONG = ( SS.LE.THRESH ) + IF( .NOT.STRONG ) + $ GO TO 70 +* + END IF +* +* If the swap is accepted ("weakly" and "strongly"), apply the +* transformations and set N1-by-N2 (2,1)-block to zero. +* + DO 60 I = 1, N2 + CALL SCOPY( N1, ZERO, 0, S( N2+1, I ), 1 ) + 60 CONTINUE +* +* copy back M-by-M diagonal block starting at index J1 of (A, B) +* + CALL SLACPY( 'F', M, M, S, LDST, A( J1, J1 ), LDA ) + CALL SLACPY( 'F', M, M, T, LDST, B( J1, J1 ), LDB ) + CALL SCOPY( LDST*LDST, ZERO, 0, T, 1 ) +* +* Standardize existing 2-by-2 blocks. +* + CALL SCOPY( M*M, ZERO, 0, WORK, 1 ) + WORK( 1 ) = ONE + T( 1, 1 ) = ONE + IDUM = LWORK - M*M - 2 + IF( N2.GT.1 ) THEN + CALL SLAGV2( A( J1, J1 ), LDA, B( J1, J1 ), LDB, AR, AI, BE, + $ WORK( 1 ), WORK( 2 ), T( 1, 1 ), T( 2, 1 ) ) + WORK( M+1 ) = -WORK( 2 ) + WORK( M+2 ) = WORK( 1 ) + T( N2, N2 ) = T( 1, 1 ) + T( 1, 2 ) = -T( 2, 1 ) + END IF + WORK( M*M ) = ONE + T( M, M ) = ONE +* + IF( N1.GT.1 ) THEN + CALL SLAGV2( A( J1+N2, J1+N2 ), LDA, B( J1+N2, J1+N2 ), LDB, + $ TAUR, TAUL, WORK( M*M+1 ), WORK( N2*M+N2+1 ), + $ WORK( N2*M+N2+2 ), T( N2+1, N2+1 ), + $ T( M, M-1 ) ) + WORK( M*M ) = WORK( N2*M+N2+1 ) + WORK( M*M-1 ) = -WORK( N2*M+N2+2 ) + T( M, M ) = T( N2+1, N2+1 ) + T( M-1, M ) = -T( M, M-1 ) + END IF + CALL SGEMM( 'T', 'N', N2, N1, N2, ONE, WORK, M, A( J1, J1+N2 ), + $ LDA, ZERO, WORK( M*M+1 ), N2 ) + CALL SLACPY( 'Full', N2, N1, WORK( M*M+1 ), N2, A( J1, J1+N2 ), + $ LDA ) + CALL SGEMM( 'T', 'N', N2, N1, N2, ONE, WORK, M, B( J1, J1+N2 ), + $ LDB, ZERO, WORK( M*M+1 ), N2 ) + CALL SLACPY( 'Full', N2, N1, WORK( M*M+1 ), N2, B( J1, J1+N2 ), + $ LDB ) + CALL SGEMM( 'N', 'N', M, M, M, ONE, LI, LDST, WORK, M, ZERO, + $ WORK( M*M+1 ), M ) + CALL SLACPY( 'Full', M, M, WORK( M*M+1 ), M, LI, LDST ) + CALL SGEMM( 'N', 'N', N2, N1, N1, ONE, A( J1, J1+N2 ), LDA, + $ T( N2+1, N2+1 ), LDST, ZERO, WORK, N2 ) + CALL SLACPY( 'Full', N2, N1, WORK, N2, A( J1, J1+N2 ), LDA ) + CALL SGEMM( 'N', 'N', N2, N1, N1, ONE, B( J1, J1+N2 ), LDA, + $ T( N2+1, N2+1 ), LDST, ZERO, WORK, N2 ) + CALL SLACPY( 'Full', N2, N1, WORK, N2, B( J1, J1+N2 ), LDB ) + CALL SGEMM( 'T', 'N', M, M, M, ONE, IR, LDST, T, LDST, ZERO, + $ WORK, M ) + CALL SLACPY( 'Full', M, M, WORK, M, IR, LDST ) +* +* Accumulate transformations into Q and Z if requested. +* + IF( WANTQ ) THEN + CALL SGEMM( 'N', 'N', N, M, M, ONE, Q( 1, J1 ), LDQ, LI, + $ LDST, ZERO, WORK, N ) + CALL SLACPY( 'Full', N, M, WORK, N, Q( 1, J1 ), LDQ ) +* + END IF +* + IF( WANTZ ) THEN + CALL SGEMM( 'N', 'N', N, M, M, ONE, Z( 1, J1 ), LDZ, IR, + $ LDST, ZERO, WORK, N ) + CALL SLACPY( 'Full', N, M, WORK, N, Z( 1, J1 ), LDZ ) +* + END IF +* +* Update (A(J1:J1+M-1, M+J1:N), B(J1:J1+M-1, M+J1:N)) and +* (A(1:J1-1, J1:J1+M), B(1:J1-1, J1:J1+M)). +* + I = J1 + M + IF( I.LE.N ) THEN + CALL SGEMM( 'T', 'N', M, N-I+1, M, ONE, LI, LDST, + $ A( J1, I ), LDA, ZERO, WORK, M ) + CALL SLACPY( 'Full', M, N-I+1, WORK, M, A( J1, I ), LDA ) + CALL SGEMM( 'T', 'N', M, N-I+1, M, ONE, LI, LDST, + $ B( J1, I ), LDA, ZERO, WORK, M ) + CALL SLACPY( 'Full', M, N-I+1, WORK, M, B( J1, I ), LDA ) + END IF + I = J1 - 1 + IF( I.GT.0 ) THEN + CALL SGEMM( 'N', 'N', I, M, M, ONE, A( 1, J1 ), LDA, IR, + $ LDST, ZERO, WORK, I ) + CALL SLACPY( 'Full', I, M, WORK, I, A( 1, J1 ), LDA ) + CALL SGEMM( 'N', 'N', I, M, M, ONE, B( 1, J1 ), LDB, IR, + $ LDST, ZERO, WORK, I ) + CALL SLACPY( 'Full', I, M, WORK, I, B( 1, J1 ), LDB ) + END IF +* +* Exit with INFO = 0 if swap was successfully performed. +* + RETURN +* + END IF +* +* Exit with INFO = 1 if swap was rejected. +* + 70 CONTINUE +* + INFO = 1 + RETURN +* +* End of STGEX2 +* + END diff --git a/costa/native/external/lapack/stgexc.f b/costa/native/external/lapack/stgexc.f new file mode 100644 index 000000000..703446009 --- /dev/null +++ b/costa/native/external/lapack/stgexc.f @@ -0,0 +1,434 @@ + SUBROUTINE STGEXC( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, IFST, ILST, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + LOGICAL WANTQ, WANTZ + INTEGER IFST, ILST, INFO, LDA, LDB, LDQ, LDZ, LWORK, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* STGEXC reorders the generalized real Schur decomposition of a real +* matrix pair (A,B) using an orthogonal equivalence transformation +* +* (A, B) = Q * (A, B) * Z', +* +* so that the diagonal block of (A, B) with row index IFST is moved +* to row ILST. +* +* (A, B) must be in generalized real Schur canonical form (as returned +* by SGGES), i.e. A is block upper triangular with 1-by-1 and 2-by-2 +* diagonal blocks. B is upper triangular. +* +* Optionally, the matrices Q and Z of generalized Schur vectors are +* updated. +* +* Q(in) * A(in) * Z(in)' = Q(out) * A(out) * Z(out)' +* Q(in) * B(in) * Z(in)' = Q(out) * B(out) * Z(out)' +* +* +* Arguments +* ========= +* +* WANTQ (input) LOGICAL +* .TRUE. : update the left transformation matrix Q; +* .FALSE.: do not update Q. +* +* WANTZ (input) LOGICAL +* .TRUE. : update the right transformation matrix Z; +* .FALSE.: do not update Z. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the matrix A in generalized real Schur canonical +* form. +* On exit, the updated matrix A, again in generalized +* real Schur canonical form. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) REAL array, dimension (LDB,N) +* On entry, the matrix B in generalized real Schur canonical +* form (A,B). +* On exit, the updated matrix B, again in generalized +* real Schur canonical form (A,B). +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* Q (input/output) REAL array, dimension (LDZ,N) +* On entry, if WANTQ = .TRUE., the orthogonal matrix Q. +* On exit, the updated matrix Q. +* If WANTQ = .FALSE., Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= 1. +* If WANTQ = .TRUE., LDQ >= N. +* +* Z (input/output) REAL array, dimension (LDZ,N) +* On entry, if WANTZ = .TRUE., the orthogonal matrix Z. +* On exit, the updated matrix Z. +* If WANTZ = .FALSE., Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1. +* If WANTZ = .TRUE., LDZ >= N. +* +* IFST (input/output) INTEGER +* ILST (input/output) INTEGER +* Specify the reordering of the diagonal blocks of (A, B). +* The block with row index IFST is moved to row ILST, by a +* sequence of swapping between adjacent blocks. +* On exit, if IFST pointed on entry to the second row of +* a 2-by-2 block, it is changed to point to the first row; +* ILST always points to the first row of the block in its +* final position (which may differ from its input value by +* +1 or -1). 1 <= IFST, ILST <= N. +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= 4*N + 16. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* =0: successful exit. +* <0: if INFO = -i, the i-th argument had an illegal value. +* =1: The transformed matrix pair (A, B) would be too far +* from generalized Schur form; the problem is ill- +* conditioned. (A, B) may have been partially reordered, +* and ILST points to the first row of the current +* position of the block being moved. +* +* Further Details +* =============== +* +* Based on contributions by +* Bo Kagstrom and Peter Poromaa, Department of Computing Science, +* Umea University, S-901 87 Umea, Sweden. +* +* [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the +* Generalized Real Schur Form of a Regular Matrix Pair (A, B), in +* M.S. Moonen et al (eds), Linear Algebra for Large Scale and +* Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER HERE, LWMIN, NBF, NBL, NBNEXT +* .. +* .. External Subroutines .. + EXTERNAL STGEX2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Decode and test input arguments. +* + INFO = 0 + LWMIN = MAX( 1, 4*N+16 ) + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDQ.LT.1 .OR. WANTQ .AND. ( LDQ.LT.MAX( 1, N ) ) ) THEN + INFO = -9 + ELSE IF( LDZ.LT.1 .OR. WANTZ .AND. ( LDZ.LT.MAX( 1, N ) ) ) THEN + INFO = -11 + ELSE IF( IFST.LT.1 .OR. IFST.GT.N ) THEN + INFO = -12 + ELSE IF( ILST.LT.1 .OR. ILST.GT.N ) THEN + INFO = -13 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -15 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STGEXC', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* +* Determine the first row of the specified block and find out +* if it is 1-by-1 or 2-by-2. +* + IF( IFST.GT.1 ) THEN + IF( A( IFST, IFST-1 ).NE.ZERO ) + $ IFST = IFST - 1 + END IF + NBF = 1 + IF( IFST.LT.N ) THEN + IF( A( IFST+1, IFST ).NE.ZERO ) + $ NBF = 2 + END IF +* +* Determine the first row of the final block +* and find out if it is 1-by-1 or 2-by-2. +* + IF( ILST.GT.1 ) THEN + IF( A( ILST, ILST-1 ).NE.ZERO ) + $ ILST = ILST - 1 + END IF + NBL = 1 + IF( ILST.LT.N ) THEN + IF( A( ILST+1, ILST ).NE.ZERO ) + $ NBL = 2 + END IF + IF( IFST.EQ.ILST ) + $ RETURN +* + IF( IFST.LT.ILST ) THEN +* +* Update ILST. +* + IF( NBF.EQ.2 .AND. NBL.EQ.1 ) + $ ILST = ILST - 1 + IF( NBF.EQ.1 .AND. NBL.EQ.2 ) + $ ILST = ILST + 1 +* + HERE = IFST +* + 10 CONTINUE +* +* Swap with next one below. +* + IF( NBF.EQ.1 .OR. NBF.EQ.2 ) THEN +* +* Current block either 1-by-1 or 2-by-2. +* + NBNEXT = 1 + IF( HERE+NBF+1.LE.N ) THEN + IF( A( HERE+NBF+1, HERE+NBF ).NE.ZERO ) + $ NBNEXT = 2 + END IF + CALL STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, HERE, NBF, NBNEXT, WORK, LWORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE + NBNEXT +* +* Test if 2-by-2 block breaks into two 1-by-1 blocks. +* + IF( NBF.EQ.2 ) THEN + IF( A( HERE+1, HERE ).EQ.ZERO ) + $ NBF = 3 + END IF +* + ELSE +* +* Current block consists of two 1-by-1 blocks, each of which +* must be swapped individually. +* + NBNEXT = 1 + IF( HERE+3.LE.N ) THEN + IF( A( HERE+3, HERE+2 ).NE.ZERO ) + $ NBNEXT = 2 + END IF + CALL STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, HERE+1, 1, NBNEXT, WORK, LWORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + IF( NBNEXT.EQ.1 ) THEN +* +* Swap two 1-by-1 blocks. +* + CALL STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, HERE, 1, 1, WORK, LWORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE + 1 +* + ELSE +* +* Recompute NBNEXT in case of 2-by-2 split. +* + IF( A( HERE+2, HERE+1 ).EQ.ZERO ) + $ NBNEXT = 1 + IF( NBNEXT.EQ.2 ) THEN +* +* 2-by-2 block did not split. +* + CALL STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, + $ Z, LDZ, HERE, 1, NBNEXT, WORK, LWORK, + $ INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE + 2 + ELSE +* +* 2-by-2 block did split. +* + CALL STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, + $ Z, LDZ, HERE, 1, 1, WORK, LWORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE + 1 + CALL STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, + $ Z, LDZ, HERE, 1, 1, WORK, LWORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE + 1 + END IF +* + END IF + END IF + IF( HERE.LT.ILST ) + $ GO TO 10 + ELSE + HERE = IFST +* + 20 CONTINUE +* +* Swap with next one below. +* + IF( NBF.EQ.1 .OR. NBF.EQ.2 ) THEN +* +* Current block either 1-by-1 or 2-by-2. +* + NBNEXT = 1 + IF( HERE.GE.3 ) THEN + IF( A( HERE-1, HERE-2 ).NE.ZERO ) + $ NBNEXT = 2 + END IF + CALL STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, HERE-NBNEXT, NBNEXT, NBF, WORK, LWORK, + $ INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE - NBNEXT +* +* Test if 2-by-2 block breaks into two 1-by-1 blocks. +* + IF( NBF.EQ.2 ) THEN + IF( A( HERE+1, HERE ).EQ.ZERO ) + $ NBF = 3 + END IF +* + ELSE +* +* Current block consists of two 1-by-1 blocks, each of which +* must be swapped individually. +* + NBNEXT = 1 + IF( HERE.GE.3 ) THEN + IF( A( HERE-1, HERE-2 ).NE.ZERO ) + $ NBNEXT = 2 + END IF + CALL STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, HERE-NBNEXT, NBNEXT, 1, WORK, LWORK, + $ INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + IF( NBNEXT.EQ.1 ) THEN +* +* Swap two 1-by-1 blocks. +* + CALL STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, HERE, NBNEXT, 1, WORK, LWORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE - 1 + ELSE +* +* Recompute NBNEXT in case of 2-by-2 split. +* + IF( A( HERE, HERE-1 ).EQ.ZERO ) + $ NBNEXT = 1 + IF( NBNEXT.EQ.2 ) THEN +* +* 2-by-2 block did not split. +* + CALL STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, + $ Z, LDZ, HERE-1, 2, 1, WORK, LWORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE - 2 + ELSE +* +* 2-by-2 block did split. +* + CALL STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, + $ Z, LDZ, HERE, 1, 1, WORK, LWORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE - 1 + CALL STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, + $ Z, LDZ, HERE, 1, 1, WORK, LWORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE - 1 + END IF + END IF + END IF + IF( HERE.GT.ILST ) + $ GO TO 20 + END IF + ILST = HERE + WORK( 1 ) = LWMIN + RETURN +* +* End of STGEXC +* + END diff --git a/costa/native/external/lapack/stgsen.f b/costa/native/external/lapack/stgsen.f new file mode 100644 index 000000000..5970bd1c0 --- /dev/null +++ b/costa/native/external/lapack/stgsen.f @@ -0,0 +1,718 @@ + SUBROUTINE STGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, Q, LDQ, Z, LDZ, M, PL, + $ PR, DIF, WORK, LWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + LOGICAL WANTQ, WANTZ + INTEGER IJOB, INFO, LDA, LDB, LDQ, LDZ, LIWORK, LWORK, + $ M, N + REAL PL, PR +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + INTEGER IWORK( * ) + REAL A( LDA, * ), ALPHAI( * ), ALPHAR( * ), + $ B( LDB, * ), BETA( * ), DIF( * ), Q( LDQ, * ), + $ WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* STGSEN reorders the generalized real Schur decomposition of a real +* matrix pair (A, B) (in terms of an orthonormal equivalence trans- +* formation Q' * (A, B) * Z), so that a selected cluster of eigenvalues +* appears in the leading diagonal blocks of the upper quasi-triangular +* matrix A and the upper triangular B. The leading columns of Q and +* Z form orthonormal bases of the corresponding left and right eigen- +* spaces (deflating subspaces). (A, B) must be in generalized real +* Schur canonical form (as returned by SGGES), i.e. A is block upper +* triangular with 1-by-1 and 2-by-2 diagonal blocks. B is upper +* triangular. +* +* STGSEN also computes the generalized eigenvalues +* +* w(j) = (ALPHAR(j) + i*ALPHAI(j))/BETA(j) +* +* of the reordered matrix pair (A, B). +* +* Optionally, STGSEN computes the estimates of reciprocal condition +* numbers for eigenvalues and eigenspaces. These are Difu[(A11,B11), +* (A22,B22)] and Difl[(A11,B11), (A22,B22)], i.e. the separation(s) +* between the matrix pairs (A11, B11) and (A22,B22) that correspond to +* the selected cluster and the eigenvalues outside the cluster, resp., +* and norms of "projections" onto left and right eigenspaces w.r.t. +* the selected cluster in the (1,1)-block. +* +* Arguments +* ========= +* +* IJOB (input) INTEGER +* Specifies whether condition numbers are required for the +* cluster of eigenvalues (PL and PR) or the deflating subspaces +* (Difu and Difl): +* =0: Only reorder w.r.t. SELECT. No extras. +* =1: Reciprocal of norms of "projections" onto left and right +* eigenspaces w.r.t. the selected cluster (PL and PR). +* =2: Upper bounds on Difu and Difl. F-norm-based estimate +* (DIF(1:2)). +* =3: Estimate of Difu and Difl. 1-norm-based estimate +* (DIF(1:2)). +* About 5 times as expensive as IJOB = 2. +* =4: Compute PL, PR and DIF (i.e. 0, 1 and 2 above): Economic +* version to get it all. +* =5: Compute PL, PR and DIF (i.e. 0, 1 and 3 above) +* +* WANTQ (input) LOGICAL +* .TRUE. : update the left transformation matrix Q; +* .FALSE.: do not update Q. +* +* WANTZ (input) LOGICAL +* .TRUE. : update the right transformation matrix Z; +* .FALSE.: do not update Z. +* +* SELECT (input) LOGICAL array, dimension (N) +* SELECT specifies the eigenvalues in the selected cluster. +* To select a real eigenvalue w(j), SELECT(j) must be set to +* .TRUE.. To select a complex conjugate pair of eigenvalues +* w(j) and w(j+1), corresponding to a 2-by-2 diagonal block, +* either SELECT(j) or SELECT(j+1) or both must be set to +* .TRUE.; a complex conjugate pair of eigenvalues must be +* either both included in the cluster or both excluded. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* A (input/output) REAL array, dimension(LDA,N) +* On entry, the upper quasi-triangular matrix A, with (A, B) in +* generalized real Schur canonical form. +* On exit, A is overwritten by the reordered matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) REAL array, dimension(LDB,N) +* On entry, the upper triangular matrix B, with (A, B) in +* generalized real Schur canonical form. +* On exit, B is overwritten by the reordered matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* ALPHAR (output) REAL array, dimension (N) +* ALPHAI (output) REAL array, dimension (N) +* BETA (output) REAL array, dimension (N) +* On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will +* be the generalized eigenvalues. ALPHAR(j) + ALPHAI(j)*i +* and BETA(j),j=1,...,N are the diagonals of the complex Schur +* form (S,T) that would result if the 2-by-2 diagonal blocks of +* the real generalized Schur form of (A,B) were further reduced +* to triangular form using complex unitary transformations. +* If ALPHAI(j) is zero, then the j-th eigenvalue is real; if +* positive, then the j-th and (j+1)-st eigenvalues are a +* complex conjugate pair, with ALPHAI(j+1) negative. +* +* Q (input/output) REAL array, dimension (LDQ,N) +* On entry, if WANTQ = .TRUE., Q is an N-by-N matrix. +* On exit, Q has been postmultiplied by the left orthogonal +* transformation matrix which reorder (A, B); The leading M +* columns of Q form orthonormal bases for the specified pair of +* left eigenspaces (deflating subspaces). +* If WANTQ = .FALSE., Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= 1; +* and if WANTQ = .TRUE., LDQ >= N. +* +* Z (input/output) REAL array, dimension (LDZ,N) +* On entry, if WANTZ = .TRUE., Z is an N-by-N matrix. +* On exit, Z has been postmultiplied by the left orthogonal +* transformation matrix which reorder (A, B); The leading M +* columns of Z form orthonormal bases for the specified pair of +* left eigenspaces (deflating subspaces). +* If WANTZ = .FALSE., Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1; +* If WANTZ = .TRUE., LDZ >= N. +* +* M (output) INTEGER +* The dimension of the specified pair of left and right eigen- +* spaces (deflating subspaces). 0 <= M <= N. +* +* PL, PR (output) REAL +* If IJOB = 1, 4 or 5, PL, PR are lower bounds on the +* reciprocal of the norm of "projections" onto left and right +* eigenspaces with respect to the selected cluster. +* 0 < PL, PR <= 1. +* If M = 0 or M = N, PL = PR = 1. +* If IJOB = 0, 2 or 3, PL and PR are not referenced. +* +* DIF (output) REAL array, dimension (2). +* If IJOB >= 2, DIF(1:2) store the estimates of Difu and Difl. +* If IJOB = 2 or 4, DIF(1:2) are F-norm-based upper bounds on +* Difu and Difl. If IJOB = 3 or 5, DIF(1:2) are 1-norm-based +* estimates of Difu and Difl. +* If M = 0 or N, DIF(1:2) = F-norm([A, B]). +* If IJOB = 0 or 1, DIF is not referenced. +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* IF IJOB = 0, WORK is not referenced. Otherwise, +* on exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= 4*N+16. +* If IJOB = 1, 2 or 4, LWORK >= MAX(4*N+16, 2*M*(N-M)). +* If IJOB = 3 or 5, LWORK >= MAX(4*N+16, 4*M*(N-M)). +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* IF IJOB = 0, IWORK is not referenced. Otherwise, +* on exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array IWORK. LIWORK >= 1. +* If IJOB = 1, 2 or 4, LIWORK >= N+6. +* If IJOB = 3 or 5, LIWORK >= MAX(2*M*(N-M), N+6). +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* =0: Successful exit. +* <0: If INFO = -i, the i-th argument had an illegal value. +* =1: Reordering of (A, B) failed because the transformed +* matrix pair (A, B) would be too far from generalized +* Schur form; the problem is very ill-conditioned. +* (A, B) may have been partially reordered. +* If requested, 0 is returned in DIF(*), PL and PR. +* +* Further Details +* =============== +* +* STGSEN first collects the selected eigenvalues by computing +* orthogonal U and W that move them to the top left corner of (A, B). +* In other words, the selected eigenvalues are the eigenvalues of +* (A11, B11) in: +* +* U'*(A, B)*W = (A11 A12) (B11 B12) n1 +* ( 0 A22),( 0 B22) n2 +* n1 n2 n1 n2 +* +* where N = n1+n2 and U' means the transpose of U. The first n1 columns +* of U and W span the specified pair of left and right eigenspaces +* (deflating subspaces) of (A, B). +* +* If (A, B) has been obtained from the generalized real Schur +* decomposition of a matrix pair (C, D) = Q*(A, B)*Z', then the +* reordered generalized real Schur form of (C, D) is given by +* +* (C, D) = (Q*U)*(U'*(A, B)*W)*(Z*W)', +* +* and the first n1 columns of Q*U and Z*W span the corresponding +* deflating subspaces of (C, D) (Q and Z store Q*U and Z*W, resp.). +* +* Note that if the selected eigenvalue is sufficiently ill-conditioned, +* then its value may differ significantly from its value before +* reordering. +* +* The reciprocal condition numbers of the left and right eigenspaces +* spanned by the first n1 columns of U and W (or Q*U and Z*W) may +* be returned in DIF(1:2), corresponding to Difu and Difl, resp. +* +* The Difu and Difl are defined as: +* +* Difu[(A11, B11), (A22, B22)] = sigma-min( Zu ) +* and +* Difl[(A11, B11), (A22, B22)] = Difu[(A22, B22), (A11, B11)], +* +* where sigma-min(Zu) is the smallest singular value of the +* (2*n1*n2)-by-(2*n1*n2) matrix +* +* Zu = [ kron(In2, A11) -kron(A22', In1) ] +* [ kron(In2, B11) -kron(B22', In1) ]. +* +* Here, Inx is the identity matrix of size nx and A22' is the +* transpose of A22. kron(X, Y) is the Kronecker product between +* the matrices X and Y. +* +* When DIF(2) is small, small changes in (A, B) can cause large changes +* in the deflating subspace. An approximate (asymptotic) bound on the +* maximum angular error in the computed deflating subspaces is +* +* EPS * norm((A, B)) / DIF(2), +* +* where EPS is the machine precision. +* +* The reciprocal norm of the projectors on the left and right +* eigenspaces associated with (A11, B11) may be returned in PL and PR. +* They are computed as follows. First we compute L and R so that +* P*(A, B)*Q is block diagonal, where +* +* P = ( I -L ) n1 Q = ( I R ) n1 +* ( 0 I ) n2 and ( 0 I ) n2 +* n1 n2 n1 n2 +* +* and (L, R) is the solution to the generalized Sylvester equation +* +* A11*R - L*A22 = -A12 +* B11*R - L*B22 = -B12 +* +* Then PL = (F-norm(L)**2+1)**(-1/2) and PR = (F-norm(R)**2+1)**(-1/2). +* An approximate (asymptotic) bound on the average absolute error of +* the selected eigenvalues is +* +* EPS * norm((A, B)) / PL. +* +* There are also global error bounds which valid for perturbations up +* to a certain restriction: A lower bound (x) on the smallest +* F-norm(E,F) for which an eigenvalue of (A11, B11) may move and +* coalesce with an eigenvalue of (A22, B22) under perturbation (E,F), +* (i.e. (A + E, B + F), is +* +* x = min(Difu,Difl)/((1/(PL*PL)+1/(PR*PR))**(1/2)+2*max(1/PL,1/PR)). +* +* An approximate bound on x can be computed from DIF(1:2), PL and PR. +* +* If y = ( F-norm(E,F) / x) <= 1, the angles between the perturbed +* (L', R') and unperturbed (L, R) left and right deflating subspaces +* associated with the selected cluster in the (1,1)-blocks can be +* bounded as +* +* max-angle(L, L') <= arctan( y * PL / (1 - y * (1 - PL * PL)**(1/2)) +* max-angle(R, R') <= arctan( y * PR / (1 - y * (1 - PR * PR)**(1/2)) +* +* See LAPACK User's Guide section 4.11 or the following references +* for more information. +* +* Note that if the default method for computing the Frobenius-norm- +* based estimate DIF is not wanted (see SLATDF), then the parameter +* IDIFJB (see below) should be changed from 3 to 4 (routine SLATDF +* (IJOB = 2 will be used)). See STGSYL for more details. +* +* Based on contributions by +* Bo Kagstrom and Peter Poromaa, Department of Computing Science, +* Umea University, S-901 87 Umea, Sweden. +* +* References +* ========== +* +* [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the +* Generalized Real Schur Form of a Regular Matrix Pair (A, B), in +* M.S. Moonen et al (eds), Linear Algebra for Large Scale and +* Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. +* +* [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified +* Eigenvalues of a Regular Matrix Pair (A, B) and Condition +* Estimation: Theory, Algorithms and Software, +* Report UMINF - 94.04, Department of Computing Science, Umea +* University, S-901 87 Umea, Sweden, 1994. Also as LAPACK Working +* Note 87. To appear in Numerical Algorithms, 1996. +* +* [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software +* for Solving the Generalized Sylvester Equation and Estimating the +* Separation between Regular Matrix Pairs, Report UMINF - 93.23, +* Department of Computing Science, Umea University, S-901 87 Umea, +* Sweden, December 1993, Revised April 1994, Also as LAPACK Working +* Note 75. To appear in ACM Trans. on Math. Software, Vol 22, No 1, +* 1996. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER IDIFJB + PARAMETER ( IDIFJB = 3 ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, PAIR, SWAP, WANTD, WANTD1, WANTD2, + $ WANTP + INTEGER I, IERR, IJB, K, KASE, KK, KS, LIWMIN, LWMIN, + $ MN2, N1, N2 + REAL DSCALE, DSUM, EPS, RDSCAL, SMLNUM +* .. +* .. External Subroutines .. + EXTERNAL SLACON, SLACPY, SLAG2, SLASSQ, STGEXC, STGSYL, + $ XERBLA +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + IF( IJOB.LT.0 .OR. IJOB.GT.5 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -14 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -16 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STGSEN', -INFO ) + RETURN + END IF +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) / EPS + IERR = 0 +* + WANTP = IJOB.EQ.1 .OR. IJOB.GE.4 + WANTD1 = IJOB.EQ.2 .OR. IJOB.EQ.4 + WANTD2 = IJOB.EQ.3 .OR. IJOB.EQ.5 + WANTD = WANTD1 .OR. WANTD2 +* +* Set M to the dimension of the specified pair of deflating +* subspaces. +* + M = 0 + PAIR = .FALSE. + DO 10 K = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + ELSE + IF( K.LT.N ) THEN + IF( A( K+1, K ).EQ.ZERO ) THEN + IF( SELECT( K ) ) + $ M = M + 1 + ELSE + PAIR = .TRUE. + IF( SELECT( K ) .OR. SELECT( K+1 ) ) + $ M = M + 2 + END IF + ELSE + IF( SELECT( N ) ) + $ M = M + 1 + END IF + END IF + 10 CONTINUE +* + IF( IJOB.EQ.1 .OR. IJOB.EQ.2 .OR. IJOB.EQ.4 ) THEN + LWMIN = MAX( 1, 4*N+16, 2*M*(N-M) ) + LIWMIN = MAX( 1, N+6 ) + ELSE IF( IJOB.EQ.3 .OR. IJOB.EQ.5 ) THEN + LWMIN = MAX( 1, 4*N+16, 4*M*(N-M) ) + LIWMIN = MAX( 1, 2*M*(N-M), N+6 ) + ELSE + LWMIN = MAX( 1, 4*N+16 ) + LIWMIN = 1 + END IF +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -22 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -24 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STGSEN', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible. +* + IF( M.EQ.N .OR. M.EQ.0 ) THEN + IF( WANTP ) THEN + PL = ONE + PR = ONE + END IF + IF( WANTD ) THEN + DSCALE = ZERO + DSUM = ONE + DO 20 I = 1, N + CALL SLASSQ( N, A( 1, I ), 1, DSCALE, DSUM ) + CALL SLASSQ( N, B( 1, I ), 1, DSCALE, DSUM ) + 20 CONTINUE + DIF( 1 ) = DSCALE*SQRT( DSUM ) + DIF( 2 ) = DIF( 1 ) + END IF + GO TO 60 + END IF +* +* Collect the selected blocks at the top-left corner of (A, B). +* + KS = 0 + PAIR = .FALSE. + DO 30 K = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + ELSE +* + SWAP = SELECT( K ) + IF( K.LT.N ) THEN + IF( A( K+1, K ).NE.ZERO ) THEN + PAIR = .TRUE. + SWAP = SWAP .OR. SELECT( K+1 ) + END IF + END IF +* + IF( SWAP ) THEN + KS = KS + 1 +* +* Swap the K-th block to position KS. +* Perform the reordering of diagonal blocks in (A, B) +* by orthogonal transformation matrices and update +* Q and Z accordingly (if requested): +* + KK = K + IF( K.NE.KS ) + $ CALL STGEXC( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, + $ Z, LDZ, KK, KS, WORK, LWORK, IERR ) +* + IF( IERR.GT.0 ) THEN +* +* Swap is rejected: exit. +* + INFO = 1 + IF( WANTP ) THEN + PL = ZERO + PR = ZERO + END IF + IF( WANTD ) THEN + DIF( 1 ) = ZERO + DIF( 2 ) = ZERO + END IF + GO TO 60 + END IF +* + IF( PAIR ) + $ KS = KS + 1 + END IF + END IF + 30 CONTINUE + IF( WANTP ) THEN +* +* Solve generalized Sylvester equation for R and L +* and compute PL and PR. +* + N1 = M + N2 = N - M + I = N1 + 1 + IJB = 0 + CALL SLACPY( 'Full', N1, N2, A( 1, I ), LDA, WORK, N1 ) + CALL SLACPY( 'Full', N1, N2, B( 1, I ), LDB, WORK( N1*N2+1 ), + $ N1 ) + CALL STGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, WORK, + $ N1, B, LDB, B( I, I ), LDB, WORK( N1*N2+1 ), N1, + $ DSCALE, DIF( 1 ), WORK( N1*N2*2+1 ), + $ LWORK-2*N1*N2, IWORK, IERR ) +* +* Estimate the reciprocal of norms of "projections" onto left +* and right eigenspaces. +* + RDSCAL = ZERO + DSUM = ONE + CALL SLASSQ( N1*N2, WORK, 1, RDSCAL, DSUM ) + PL = RDSCAL*SQRT( DSUM ) + IF( PL.EQ.ZERO ) THEN + PL = ONE + ELSE + PL = DSCALE / ( SQRT( DSCALE*DSCALE / PL+PL )*SQRT( PL ) ) + END IF + RDSCAL = ZERO + DSUM = ONE + CALL SLASSQ( N1*N2, WORK( N1*N2+1 ), 1, RDSCAL, DSUM ) + PR = RDSCAL*SQRT( DSUM ) + IF( PR.EQ.ZERO ) THEN + PR = ONE + ELSE + PR = DSCALE / ( SQRT( DSCALE*DSCALE / PR+PR )*SQRT( PR ) ) + END IF + END IF +* + IF( WANTD ) THEN +* +* Compute estimates of Difu and Difl. +* + IF( WANTD1 ) THEN + N1 = M + N2 = N - M + I = N1 + 1 + IJB = IDIFJB +* +* Frobenius norm-based Difu-estimate. +* + CALL STGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, WORK, + $ N1, B, LDB, B( I, I ), LDB, WORK( N1*N2+1 ), + $ N1, DSCALE, DIF( 1 ), WORK( 2*N1*N2+1 ), + $ LWORK-2*N1*N2, IWORK, IERR ) +* +* Frobenius norm-based Difl-estimate. +* + CALL STGSYL( 'N', IJB, N2, N1, A( I, I ), LDA, A, LDA, WORK, + $ N2, B( I, I ), LDB, B, LDB, WORK( N1*N2+1 ), + $ N2, DSCALE, DIF( 2 ), WORK( 2*N1*N2+1 ), + $ LWORK-2*N1*N2, IWORK, IERR ) + ELSE +* +* +* Compute 1-norm-based estimates of Difu and Difl using +* reversed communication with SLACON. In each step a +* generalized Sylvester equation or a transposed variant +* is solved. +* + KASE = 0 + N1 = M + N2 = N - M + I = N1 + 1 + IJB = 0 + MN2 = 2*N1*N2 +* +* 1-norm-based estimate of Difu. +* + 40 CONTINUE + CALL SLACON( MN2, WORK( MN2+1 ), WORK, IWORK, DIF( 1 ), + $ KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Solve generalized Sylvester equation. +* + CALL STGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, + $ WORK, N1, B, LDB, B( I, I ), LDB, + $ WORK( N1*N2+1 ), N1, DSCALE, DIF( 1 ), + $ WORK( 2*N1*N2+1 ), LWORK-2*N1*N2, IWORK, + $ IERR ) + ELSE +* +* Solve the transposed variant. +* + CALL STGSYL( 'T', IJB, N1, N2, A, LDA, A( I, I ), LDA, + $ WORK, N1, B, LDB, B( I, I ), LDB, + $ WORK( N1*N2+1 ), N1, DSCALE, DIF( 1 ), + $ WORK( 2*N1*N2+1 ), LWORK-2*N1*N2, IWORK, + $ IERR ) + END IF + GO TO 40 + END IF + DIF( 1 ) = DSCALE / DIF( 1 ) +* +* 1-norm-based estimate of Difl. +* + 50 CONTINUE + CALL SLACON( MN2, WORK( MN2+1 ), WORK, IWORK, DIF( 2 ), + $ KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Solve generalized Sylvester equation. +* + CALL STGSYL( 'N', IJB, N2, N1, A( I, I ), LDA, A, LDA, + $ WORK, N2, B( I, I ), LDB, B, LDB, + $ WORK( N1*N2+1 ), N2, DSCALE, DIF( 2 ), + $ WORK( 2*N1*N2+1 ), LWORK-2*N1*N2, IWORK, + $ IERR ) + ELSE +* +* Solve the transposed variant. +* + CALL STGSYL( 'T', IJB, N2, N1, A( I, I ), LDA, A, LDA, + $ WORK, N2, B( I, I ), LDB, B, LDB, + $ WORK( N1*N2+1 ), N2, DSCALE, DIF( 2 ), + $ WORK( 2*N1*N2+1 ), LWORK-2*N1*N2, IWORK, + $ IERR ) + END IF + GO TO 50 + END IF + DIF( 2 ) = DSCALE / DIF( 2 ) +* + END IF + END IF +* + 60 CONTINUE +* +* Compute generalized eigenvalues of reordered pair (A, B) and +* normalize the generalized Schur form. +* + PAIR = .FALSE. + DO 70 K = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + ELSE +* + IF( K.LT.N ) THEN + IF( A( K+1, K ).NE.ZERO ) THEN + PAIR = .TRUE. + END IF + END IF +* + IF( PAIR ) THEN +* +* Compute the eigenvalue(s) at position K. +* + WORK( 1 ) = A( K, K ) + WORK( 2 ) = A( K+1, K ) + WORK( 3 ) = A( K, K+1 ) + WORK( 4 ) = A( K+1, K+1 ) + WORK( 5 ) = B( K, K ) + WORK( 6 ) = B( K+1, K ) + WORK( 7 ) = B( K, K+1 ) + WORK( 8 ) = B( K+1, K+1 ) + CALL SLAG2( WORK, 2, WORK( 5 ), 2, SMLNUM*EPS, BETA( K ), + $ BETA( K+1 ), ALPHAR( K ), ALPHAR( K+1 ), + $ ALPHAI( K ) ) + ALPHAI( K+1 ) = -ALPHAI( K ) +* + ELSE +* + IF( SIGN( ONE, B( K, K ) ).LT.ZERO ) THEN +* +* If B(K,K) is negative, make it positive +* + DO 80 I = 1, N + A( K, I ) = -A( K, I ) + B( K, I ) = -B( K, I ) + Q( I, K ) = -Q( I, K ) + 80 CONTINUE + END IF +* + ALPHAR( K ) = A( K, K ) + ALPHAI( K ) = ZERO + BETA( K ) = B( K, K ) +* + END IF + END IF + 70 CONTINUE +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of STGSEN +* + END diff --git a/costa/native/external/lapack/stgsja.f b/costa/native/external/lapack/stgsja.f new file mode 100644 index 000000000..646bb48f2 --- /dev/null +++ b/costa/native/external/lapack/stgsja.f @@ -0,0 +1,516 @@ + SUBROUTINE STGSJA( JOBU, JOBV, JOBQ, M, P, N, K, L, A, LDA, B, + $ LDB, TOLA, TOLB, ALPHA, BETA, U, LDU, V, LDV, + $ Q, LDQ, WORK, NCYCLE, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBQ, JOBU, JOBV + INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, + $ NCYCLE, P + REAL TOLA, TOLB +* .. +* .. Array Arguments .. + REAL A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), Q( LDQ, * ), U( LDU, * ), + $ V( LDV, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* STGSJA computes the generalized singular value decomposition (GSVD) +* of two real upper triangular (or trapezoidal) matrices A and B. +* +* On entry, it is assumed that matrices A and B have the following +* forms, which may be obtained by the preprocessing subroutine SGGSVP +* from a general M-by-N matrix A and P-by-N matrix B: +* +* N-K-L K L +* A = K ( 0 A12 A13 ) if M-K-L >= 0; +* L ( 0 0 A23 ) +* M-K-L ( 0 0 0 ) +* +* N-K-L K L +* A = K ( 0 A12 A13 ) if M-K-L < 0; +* M-K ( 0 0 A23 ) +* +* N-K-L K L +* B = L ( 0 0 B13 ) +* P-L ( 0 0 0 ) +* +* where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular +* upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, +* otherwise A23 is (M-K)-by-L upper trapezoidal. +* +* On exit, +* +* U'*A*Q = D1*( 0 R ), V'*B*Q = D2*( 0 R ), +* +* where U, V and Q are orthogonal matrices, Z' denotes the transpose +* of Z, R is a nonsingular upper triangular matrix, and D1 and D2 are +* ``diagonal'' matrices, which are of the following structures: +* +* If M-K-L >= 0, +* +* K L +* D1 = K ( I 0 ) +* L ( 0 C ) +* M-K-L ( 0 0 ) +* +* K L +* D2 = L ( 0 S ) +* P-L ( 0 0 ) +* +* N-K-L K L +* ( 0 R ) = K ( 0 R11 R12 ) K +* L ( 0 0 R22 ) L +* +* where +* +* C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), +* S = diag( BETA(K+1), ... , BETA(K+L) ), +* C**2 + S**2 = I. +* +* R is stored in A(1:K+L,N-K-L+1:N) on exit. +* +* If M-K-L < 0, +* +* K M-K K+L-M +* D1 = K ( I 0 0 ) +* M-K ( 0 C 0 ) +* +* K M-K K+L-M +* D2 = M-K ( 0 S 0 ) +* K+L-M ( 0 0 I ) +* P-L ( 0 0 0 ) +* +* N-K-L K M-K K+L-M +* ( 0 R ) = K ( 0 R11 R12 R13 ) +* M-K ( 0 0 R22 R23 ) +* K+L-M ( 0 0 0 R33 ) +* +* where +* C = diag( ALPHA(K+1), ... , ALPHA(M) ), +* S = diag( BETA(K+1), ... , BETA(M) ), +* C**2 + S**2 = I. +* +* R = ( R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N) and R33 is stored +* ( 0 R22 R23 ) +* in B(M-K+1:L,N+M-K-L+1:N) on exit. +* +* The computation of the orthogonal transformation matrices U, V or Q +* is optional. These matrices may either be formed explicitly, or they +* may be postmultiplied into input matrices U1, V1, or Q1. +* +* Arguments +* ========= +* +* JOBU (input) CHARACTER*1 +* = 'U': U must contain an orthogonal matrix U1 on entry, and +* the product U1*U is returned; +* = 'I': U is initialized to the unit matrix, and the +* orthogonal matrix U is returned; +* = 'N': U is not computed. +* +* JOBV (input) CHARACTER*1 +* = 'V': V must contain an orthogonal matrix V1 on entry, and +* the product V1*V is returned; +* = 'I': V is initialized to the unit matrix, and the +* orthogonal matrix V is returned; +* = 'N': V is not computed. +* +* JOBQ (input) CHARACTER*1 +* = 'Q': Q must contain an orthogonal matrix Q1 on entry, and +* the product Q1*Q is returned; +* = 'I': Q is initialized to the unit matrix, and the +* orthogonal matrix Q is returned; +* = 'N': Q is not computed. +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* P (input) INTEGER +* The number of rows of the matrix B. P >= 0. +* +* N (input) INTEGER +* The number of columns of the matrices A and B. N >= 0. +* +* K (input) INTEGER +* L (input) INTEGER +* K and L specify the subblocks in the input matrices A and B: +* A23 = A(K+1:MIN(K+L,M),N-L+1:N) and B13 = B(1:L,N-L+1:N) +* of A and B, whose GSVD is going to be computed by STGSJA. +* See Further details. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, A(N-K+1:N,1:MIN(K+L,M) ) contains the triangular +* matrix R or part of R. See Purpose for details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) REAL array, dimension (LDB,N) +* On entry, the P-by-N matrix B. +* On exit, if necessary, B(M-K+1:L,N+M-K-L+1:N) contains +* a part of R. See Purpose for details. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,P). +* +* TOLA (input) REAL +* TOLB (input) REAL +* TOLA and TOLB are the convergence criteria for the Jacobi- +* Kogbetliantz iteration procedure. Generally, they are the +* same as used in the preprocessing step, say +* TOLA = max(M,N)*norm(A)*MACHEPS, +* TOLB = max(P,N)*norm(B)*MACHEPS. +* +* ALPHA (output) REAL array, dimension (N) +* BETA (output) REAL array, dimension (N) +* On exit, ALPHA and BETA contain the generalized singular +* value pairs of A and B; +* ALPHA(1:K) = 1, +* BETA(1:K) = 0, +* and if M-K-L >= 0, +* ALPHA(K+1:K+L) = diag(C), +* BETA(K+1:K+L) = diag(S), +* or if M-K-L < 0, +* ALPHA(K+1:M)= C, ALPHA(M+1:K+L)= 0 +* BETA(K+1:M) = S, BETA(M+1:K+L) = 1. +* Furthermore, if K+L < N, +* ALPHA(K+L+1:N) = 0 and +* BETA(K+L+1:N) = 0. +* +* U (input/output) REAL array, dimension (LDU,M) +* On entry, if JOBU = 'U', U must contain a matrix U1 (usually +* the orthogonal matrix returned by SGGSVP). +* On exit, +* if JOBU = 'I', U contains the orthogonal matrix U; +* if JOBU = 'U', U contains the product U1*U. +* If JOBU = 'N', U is not referenced. +* +* LDU (input) INTEGER +* The leading dimension of the array U. LDU >= max(1,M) if +* JOBU = 'U'; LDU >= 1 otherwise. +* +* V (input/output) REAL array, dimension (LDV,P) +* On entry, if JOBV = 'V', V must contain a matrix V1 (usually +* the orthogonal matrix returned by SGGSVP). +* On exit, +* if JOBV = 'I', V contains the orthogonal matrix V; +* if JOBV = 'V', V contains the product V1*V. +* If JOBV = 'N', V is not referenced. +* +* LDV (input) INTEGER +* The leading dimension of the array V. LDV >= max(1,P) if +* JOBV = 'V'; LDV >= 1 otherwise. +* +* Q (input/output) REAL array, dimension (LDQ,N) +* On entry, if JOBQ = 'Q', Q must contain a matrix Q1 (usually +* the orthogonal matrix returned by SGGSVP). +* On exit, +* if JOBQ = 'I', Q contains the orthogonal matrix Q; +* if JOBQ = 'Q', Q contains the product Q1*Q. +* If JOBQ = 'N', Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= max(1,N) if +* JOBQ = 'Q'; LDQ >= 1 otherwise. +* +* WORK (workspace) REAL array, dimension (2*N) +* +* NCYCLE (output) INTEGER +* The number of cycles required for convergence. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* = 1: the procedure does not converge after MAXIT cycles. +* +* Internal Parameters +* =================== +* +* MAXIT INTEGER +* MAXIT specifies the total loops that the iterative procedure +* may take. If after MAXIT cycles, the routine fails to +* converge, we return INFO = 1. +* +* Further Details +* =============== +* +* STGSJA essentially uses a variant of Kogbetliantz algorithm to reduce +* min(L,M-K)-by-L triangular (or trapezoidal) matrix A23 and L-by-L +* matrix B13 to the form: +* +* U1'*A13*Q1 = C1*R1; V1'*B13*Q1 = S1*R1, +* +* where U1, V1 and Q1 are orthogonal matrix, and Z' is the transpose +* of Z. C1 and S1 are diagonal matrices satisfying +* +* C1**2 + S1**2 = I, +* +* and R1 is an L-by-L nonsingular upper triangular matrix. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER MAXIT + PARAMETER ( MAXIT = 40 ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. +* + LOGICAL INITQ, INITU, INITV, UPPER, WANTQ, WANTU, WANTV + INTEGER I, J, KCYCLE + REAL A1, A2, A3, B1, B2, B3, CSQ, CSU, CSV, ERROR, + $ GAMMA, RWK, SNQ, SNU, SNV, SSMIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLAGS2, SLAPLL, SLARTG, SLASET, SROT, + $ SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + INITU = LSAME( JOBU, 'I' ) + WANTU = INITU .OR. LSAME( JOBU, 'U' ) +* + INITV = LSAME( JOBV, 'I' ) + WANTV = INITV .OR. LSAME( JOBV, 'V' ) +* + INITQ = LSAME( JOBQ, 'I' ) + WANTQ = INITQ .OR. LSAME( JOBQ, 'Q' ) +* + INFO = 0 + IF( .NOT.( INITU .OR. WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( INITV .OR. WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( INITQ .OR. WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( P.LT.0 ) THEN + INFO = -5 + ELSE IF( N.LT.0 ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -12 + ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN + INFO = -18 + ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN + INFO = -20 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -22 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STGSJA', -INFO ) + RETURN + END IF +* +* Initialize U, V and Q, if necessary +* + IF( INITU ) + $ CALL SLASET( 'Full', M, M, ZERO, ONE, U, LDU ) + IF( INITV ) + $ CALL SLASET( 'Full', P, P, ZERO, ONE, V, LDV ) + IF( INITQ ) + $ CALL SLASET( 'Full', N, N, ZERO, ONE, Q, LDQ ) +* +* Loop until convergence +* + UPPER = .FALSE. + DO 40 KCYCLE = 1, MAXIT +* + UPPER = .NOT.UPPER +* + DO 20 I = 1, L - 1 + DO 10 J = I + 1, L +* + A1 = ZERO + A2 = ZERO + A3 = ZERO + IF( K+I.LE.M ) + $ A1 = A( K+I, N-L+I ) + IF( K+J.LE.M ) + $ A3 = A( K+J, N-L+J ) +* + B1 = B( I, N-L+I ) + B3 = B( J, N-L+J ) +* + IF( UPPER ) THEN + IF( K+I.LE.M ) + $ A2 = A( K+I, N-L+J ) + B2 = B( I, N-L+J ) + ELSE + IF( K+J.LE.M ) + $ A2 = A( K+J, N-L+I ) + B2 = B( J, N-L+I ) + END IF +* + CALL SLAGS2( UPPER, A1, A2, A3, B1, B2, B3, CSU, SNU, + $ CSV, SNV, CSQ, SNQ ) +* +* Update (K+I)-th and (K+J)-th rows of matrix A: U'*A +* + IF( K+J.LE.M ) + $ CALL SROT( L, A( K+J, N-L+1 ), LDA, A( K+I, N-L+1 ), + $ LDA, CSU, SNU ) +* +* Update I-th and J-th rows of matrix B: V'*B +* + CALL SROT( L, B( J, N-L+1 ), LDB, B( I, N-L+1 ), LDB, + $ CSV, SNV ) +* +* Update (N-L+I)-th and (N-L+J)-th columns of matrices +* A and B: A*Q and B*Q +* + CALL SROT( MIN( K+L, M ), A( 1, N-L+J ), 1, + $ A( 1, N-L+I ), 1, CSQ, SNQ ) +* + CALL SROT( L, B( 1, N-L+J ), 1, B( 1, N-L+I ), 1, CSQ, + $ SNQ ) +* + IF( UPPER ) THEN + IF( K+I.LE.M ) + $ A( K+I, N-L+J ) = ZERO + B( I, N-L+J ) = ZERO + ELSE + IF( K+J.LE.M ) + $ A( K+J, N-L+I ) = ZERO + B( J, N-L+I ) = ZERO + END IF +* +* Update orthogonal matrices U, V, Q, if desired. +* + IF( WANTU .AND. K+J.LE.M ) + $ CALL SROT( M, U( 1, K+J ), 1, U( 1, K+I ), 1, CSU, + $ SNU ) +* + IF( WANTV ) + $ CALL SROT( P, V( 1, J ), 1, V( 1, I ), 1, CSV, SNV ) +* + IF( WANTQ ) + $ CALL SROT( N, Q( 1, N-L+J ), 1, Q( 1, N-L+I ), 1, CSQ, + $ SNQ ) +* + 10 CONTINUE + 20 CONTINUE +* + IF( .NOT.UPPER ) THEN +* +* The matrices A13 and B13 were lower triangular at the start +* of the cycle, and are now upper triangular. +* +* Convergence test: test the parallelism of the corresponding +* rows of A and B. +* + ERROR = ZERO + DO 30 I = 1, MIN( L, M-K ) + CALL SCOPY( L-I+1, A( K+I, N-L+I ), LDA, WORK, 1 ) + CALL SCOPY( L-I+1, B( I, N-L+I ), LDB, WORK( L+1 ), 1 ) + CALL SLAPLL( L-I+1, WORK, 1, WORK( L+1 ), 1, SSMIN ) + ERROR = MAX( ERROR, SSMIN ) + 30 CONTINUE +* + IF( ABS( ERROR ).LE.MIN( TOLA, TOLB ) ) + $ GO TO 50 + END IF +* +* End of cycle loop +* + 40 CONTINUE +* +* The algorithm has not converged after MAXIT cycles. +* + INFO = 1 + GO TO 100 +* + 50 CONTINUE +* +* If ERROR <= MIN(TOLA,TOLB), then the algorithm has converged. +* Compute the generalized singular value pairs (ALPHA, BETA), and +* set the triangular matrix R to array A. +* + DO 60 I = 1, K + ALPHA( I ) = ONE + BETA( I ) = ZERO + 60 CONTINUE +* + DO 70 I = 1, MIN( L, M-K ) +* + A1 = A( K+I, N-L+I ) + B1 = B( I, N-L+I ) +* + IF( A1.NE.ZERO ) THEN + GAMMA = B1 / A1 +* +* change sign if necessary +* + IF( GAMMA.LT.ZERO ) THEN + CALL SSCAL( L-I+1, -ONE, B( I, N-L+I ), LDB ) + IF( WANTV ) + $ CALL SSCAL( P, -ONE, V( 1, I ), 1 ) + END IF +* + CALL SLARTG( ABS( GAMMA ), ONE, BETA( K+I ), ALPHA( K+I ), + $ RWK ) +* + IF( ALPHA( K+I ).GE.BETA( K+I ) ) THEN + CALL SSCAL( L-I+1, ONE / ALPHA( K+I ), A( K+I, N-L+I ), + $ LDA ) + ELSE + CALL SSCAL( L-I+1, ONE / BETA( K+I ), B( I, N-L+I ), + $ LDB ) + CALL SCOPY( L-I+1, B( I, N-L+I ), LDB, A( K+I, N-L+I ), + $ LDA ) + END IF +* + ELSE +* + ALPHA( K+I ) = ZERO + BETA( K+I ) = ONE + CALL SCOPY( L-I+1, B( I, N-L+I ), LDB, A( K+I, N-L+I ), + $ LDA ) +* + END IF +* + 70 CONTINUE +* +* Post-assignment +* + DO 80 I = M + 1, K + L + ALPHA( I ) = ZERO + BETA( I ) = ONE + 80 CONTINUE +* + IF( K+L.LT.N ) THEN + DO 90 I = K + L + 1, N + ALPHA( I ) = ZERO + BETA( I ) = ZERO + 90 CONTINUE + END IF +* + 100 CONTINUE + NCYCLE = KCYCLE + RETURN +* +* End of STGSJA +* + END diff --git a/costa/native/external/lapack/stgsna.f b/costa/native/external/lapack/stgsna.f new file mode 100644 index 000000000..f6c03ed25 --- /dev/null +++ b/costa/native/external/lapack/stgsna.f @@ -0,0 +1,585 @@ + SUBROUTINE STGSNA( JOB, HOWMNY, SELECT, N, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, S, DIF, MM, M, WORK, LWORK, + $ IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER HOWMNY, JOB + INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + INTEGER IWORK( * ) + REAL A( LDA, * ), B( LDB, * ), DIF( * ), S( * ), + $ VL( LDVL, * ), VR( LDVR, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* STGSNA estimates reciprocal condition numbers for specified +* eigenvalues and/or eigenvectors of a matrix pair (A, B) in +* generalized real Schur canonical form (or of any matrix pair +* (Q*A*Z', Q*B*Z') with orthogonal matrices Q and Z, where +* Z' denotes the transpose of Z. +* +* (A, B) must be in generalized real Schur form (as returned by SGGES), +* i.e. A is block upper triangular with 1-by-1 and 2-by-2 diagonal +* blocks. B is upper triangular. +* +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* Specifies whether condition numbers are required for +* eigenvalues (S) or eigenvectors (DIF): +* = 'E': for eigenvalues only (S); +* = 'V': for eigenvectors only (DIF); +* = 'B': for both eigenvalues and eigenvectors (S and DIF). +* +* HOWMNY (input) CHARACTER*1 +* = 'A': compute condition numbers for all eigenpairs; +* = 'S': compute condition numbers for selected eigenpairs +* specified by the array SELECT. +* +* SELECT (input) LOGICAL array, dimension (N) +* If HOWMNY = 'S', SELECT specifies the eigenpairs for which +* condition numbers are required. To select condition numbers +* for the eigenpair corresponding to a real eigenvalue w(j), +* SELECT(j) must be set to .TRUE.. To select condition numbers +* corresponding to a complex conjugate pair of eigenvalues w(j) +* and w(j+1), either SELECT(j) or SELECT(j+1) or both, must be +* set to .TRUE.. +* If HOWMNY = 'A', SELECT is not referenced. +* +* N (input) INTEGER +* The order of the square matrix pair (A, B). N >= 0. +* +* A (input) REAL array, dimension (LDA,N) +* The upper quasi-triangular matrix A in the pair (A,B). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input) REAL array, dimension (LDB,N) +* The upper triangular matrix B in the pair (A,B). +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* VL (input) REAL array, dimension (LDVL,M) +* If JOB = 'E' or 'B', VL must contain left eigenvectors of +* (A, B), corresponding to the eigenpairs specified by HOWMNY +* and SELECT. The eigenvectors must be stored in consecutive +* columns of VL, as returned by STGEVC. +* If JOB = 'V', VL is not referenced. +* +* LDVL (input) INTEGER +* The leading dimension of the array VL. LDVL >= 1. +* If JOB = 'E' or 'B', LDVL >= N. +* +* VR (input) REAL array, dimension (LDVR,M) +* If JOB = 'E' or 'B', VR must contain right eigenvectors of +* (A, B), corresponding to the eigenpairs specified by HOWMNY +* and SELECT. The eigenvectors must be stored in consecutive +* columns ov VR, as returned by STGEVC. +* If JOB = 'V', VR is not referenced. +* +* LDVR (input) INTEGER +* The leading dimension of the array VR. LDVR >= 1. +* If JOB = 'E' or 'B', LDVR >= N. +* +* S (output) REAL array, dimension (MM) +* If JOB = 'E' or 'B', the reciprocal condition numbers of the +* selected eigenvalues, stored in consecutive elements of the +* array. For a complex conjugate pair of eigenvalues two +* consecutive elements of S are set to the same value. Thus +* S(j), DIF(j), and the j-th columns of VL and VR all +* correspond to the same eigenpair (but not in general the +* j-th eigenpair, unless all eigenpairs are selected). +* If JOB = 'V', S is not referenced. +* +* DIF (output) REAL array, dimension (MM) +* If JOB = 'V' or 'B', the estimated reciprocal condition +* numbers of the selected eigenvectors, stored in consecutive +* elements of the array. For a complex eigenvector two +* consecutive elements of DIF are set to the same value. If +* the eigenvalues cannot be reordered to compute DIF(j), DIF(j) +* is set to 0; this can only occur when the true value would be +* very small anyway. +* If JOB = 'E', DIF is not referenced. +* +* MM (input) INTEGER +* The number of elements in the arrays S and DIF. MM >= M. +* +* M (output) INTEGER +* The number of elements of the arrays S and DIF used to store +* the specified condition numbers; for each selected real +* eigenvalue one element is used, and for each selected complex +* conjugate pair of eigenvalues, two elements are used. +* If HOWMNY = 'A', M is set to N. +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* If JOB = 'E', WORK is not referenced. Otherwise, +* on exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= N. +* If JOB = 'V' or 'B' LWORK >= 2*N*(N+2)+16. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace) INTEGER array, dimension (N + 6) +* If JOB = 'E', IWORK is not referenced. +* +* INFO (output) INTEGER +* =0: Successful exit +* <0: If INFO = -i, the i-th argument had an illegal value +* +* +* Further Details +* =============== +* +* The reciprocal of the condition number of a generalized eigenvalue +* w = (a, b) is defined as +* +* S(w) = (|u'Av|**2 + |u'Bv|**2)**(1/2) / (norm(u)*norm(v)) +* +* where u and v are the left and right eigenvectors of (A, B) +* corresponding to w; |z| denotes the absolute value of the complex +* number, and norm(u) denotes the 2-norm of the vector u. +* The pair (a, b) corresponds to an eigenvalue w = a/b (= u'Av/u'Bv) +* of the matrix pair (A, B). If both a and b equal zero, then (A B) is +* singular and S(I) = -1 is returned. +* +* An approximate error bound on the chordal distance between the i-th +* computed generalized eigenvalue w and the corresponding exact +* eigenvalue lambda is +* +* chord(w, lambda) <= EPS * norm(A, B) / S(I) +* +* where EPS is the machine precision. +* +* The reciprocal of the condition number DIF(i) of right eigenvector u +* and left eigenvector v corresponding to the generalized eigenvalue w +* is defined as follows: +* +* a) If the i-th eigenvalue w = (a,b) is real +* +* Suppose U and V are orthogonal transformations such that +* +* U'*(A, B)*V = (S, T) = ( a * ) ( b * ) 1 +* ( 0 S22 ),( 0 T22 ) n-1 +* 1 n-1 1 n-1 +* +* Then the reciprocal condition number DIF(i) is +* +* Difl((a, b), (S22, T22)) = sigma-min( Zl ), +* +* where sigma-min(Zl) denotes the smallest singular value of the +* 2(n-1)-by-2(n-1) matrix +* +* Zl = [ kron(a, In-1) -kron(1, S22) ] +* [ kron(b, In-1) -kron(1, T22) ] . +* +* Here In-1 is the identity matrix of size n-1. kron(X, Y) is the +* Kronecker product between the matrices X and Y. +* +* Note that if the default method for computing DIF(i) is wanted +* (see SLATDF), then the parameter DIFDRI (see below) should be +* changed from 3 to 4 (routine SLATDF(IJOB = 2 will be used)). +* See STGSYL for more details. +* +* b) If the i-th and (i+1)-th eigenvalues are complex conjugate pair, +* +* Suppose U and V are orthogonal transformations such that +* +* U'*(A, B)*V = (S, T) = ( S11 * ) ( T11 * ) 2 +* ( 0 S22 ),( 0 T22) n-2 +* 2 n-2 2 n-2 +* +* and (S11, T11) corresponds to the complex conjugate eigenvalue +* pair (w, conjg(w)). There exist unitary matrices U1 and V1 such +* that +* +* U1'*S11*V1 = ( s11 s12 ) and U1'*T11*V1 = ( t11 t12 ) +* ( 0 s22 ) ( 0 t22 ) +* +* where the generalized eigenvalues w = s11/t11 and +* conjg(w) = s22/t22. +* +* Then the reciprocal condition number DIF(i) is bounded by +* +* min( d1, max( 1, |real(s11)/real(s22)| )*d2 ) +* +* where, d1 = Difl((s11, t11), (s22, t22)) = sigma-min(Z1), where +* Z1 is the complex 2-by-2 matrix +* +* Z1 = [ s11 -s22 ] +* [ t11 -t22 ], +* +* This is done by computing (using real arithmetic) the +* roots of the characteristical polynomial det(Z1' * Z1 - lambda I), +* where Z1' denotes the conjugate transpose of Z1 and det(X) denotes +* the determinant of X. +* +* and d2 is an upper bound on Difl((S11, T11), (S22, T22)), i.e. an +* upper bound on sigma-min(Z2), where Z2 is (2n-2)-by-(2n-2) +* +* Z2 = [ kron(S11', In-2) -kron(I2, S22) ] +* [ kron(T11', In-2) -kron(I2, T22) ] +* +* Note that if the default method for computing DIF is wanted (see +* SLATDF), then the parameter DIFDRI (see below) should be changed +* from 3 to 4 (routine SLATDF(IJOB = 2 will be used)). See STGSYL +* for more details. +* +* For each eigenvalue/vector specified by SELECT, DIF stores a +* Frobenius norm-based estimate of Difl. +* +* An approximate error bound for the i-th computed eigenvector VL(i) or +* VR(i) is given by +* +* EPS * norm(A, B) / DIF(i). +* +* See ref. [2-3] for more details and further references. +* +* Based on contributions by +* Bo Kagstrom and Peter Poromaa, Department of Computing Science, +* Umea University, S-901 87 Umea, Sweden. +* +* References +* ========== +* +* [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the +* Generalized Real Schur Form of a Regular Matrix Pair (A, B), in +* M.S. Moonen et al (eds), Linear Algebra for Large Scale and +* Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. +* +* [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified +* Eigenvalues of a Regular Matrix Pair (A, B) and Condition +* Estimation: Theory, Algorithms and Software, +* Report UMINF - 94.04, Department of Computing Science, Umea +* University, S-901 87 Umea, Sweden, 1994. Also as LAPACK Working +* Note 87. To appear in Numerical Algorithms, 1996. +* +* [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software +* for Solving the Generalized Sylvester Equation and Estimating the +* Separation between Regular Matrix Pairs, Report UMINF - 93.23, +* Department of Computing Science, Umea University, S-901 87 Umea, +* Sweden, December 1993, Revised April 1994, Also as LAPACK Working +* Note 75. To appear in ACM Trans. on Math. Software, Vol 22, +* No 1, 1996. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER DIFDRI + PARAMETER ( DIFDRI = 3 ) + REAL ZERO, ONE, TWO, FOUR + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0, + $ FOUR = 4.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, PAIR, SOMCON, WANTBH, WANTDF, WANTS + INTEGER I, IERR, IFST, ILST, IZ, K, KS, LWMIN, N1, N2 + REAL ALPHAI, ALPHAR, ALPRQT, BETA, C1, C2, COND, + $ EPS, LNRM, RNRM, ROOT1, ROOT2, SCALE, SMLNUM, + $ TMPII, TMPIR, TMPRI, TMPRR, UHAV, UHAVI, UHBV, + $ UHBVI +* .. +* .. Local Arrays .. + REAL DUMMY( 1 ), DUMMY1( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SDOT, SLAMCH, SLAPY2, SNRM2 + EXTERNAL LSAME, SDOT, SLAMCH, SLAPY2, SNRM2 +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, SLACPY, SLAG2, STGEXC, STGSYL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + WANTBH = LSAME( JOB, 'B' ) + WANTS = LSAME( JOB, 'E' ) .OR. WANTBH + WANTDF = LSAME( JOB, 'V' ) .OR. WANTBH +* + SOMCON = LSAME( HOWMNY, 'S' ) +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) +* + IF( LSAME( JOB, 'V' ) .OR. LSAME( JOB, 'B' ) ) THEN + LWMIN = MAX( 1, 2*N*(N+2)+16 ) + ELSE + LWMIN = 1 + END IF +* + IF( .NOT.WANTS .AND. .NOT.WANTDF ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( HOWMNY, 'A' ) .AND. .NOT.SOMCON ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( WANTS .AND. LDVL.LT.N ) THEN + INFO = -10 + ELSE IF( WANTS .AND. LDVR.LT.N ) THEN + INFO = -12 + ELSE +* +* Set M to the number of eigenpairs for which condition numbers +* are required, and test MM. +* + IF( SOMCON ) THEN + M = 0 + PAIR = .FALSE. + DO 10 K = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + ELSE + IF( K.LT.N ) THEN + IF( A( K+1, K ).EQ.ZERO ) THEN + IF( SELECT( K ) ) + $ M = M + 1 + ELSE + PAIR = .TRUE. + IF( SELECT( K ) .OR. SELECT( K+1 ) ) + $ M = M + 2 + END IF + ELSE + IF( SELECT( N ) ) + $ M = M + 1 + END IF + END IF + 10 CONTINUE + ELSE + M = N + END IF +* + IF( MM.LT.M ) THEN + INFO = -15 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -18 +c ELSE IF( WANTDF .AND. LWORK.LT.2*N*( N+2 )+16 ) THEN +c INFO = -18 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STGSNA', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) / EPS + KS = 0 + PAIR = .FALSE. +* + DO 20 K = 1, N +* +* Determine whether A(k,k) begins a 1-by-1 or 2-by-2 block. +* + IF( PAIR ) THEN + PAIR = .FALSE. + GO TO 20 + ELSE + IF( K.LT.N ) + $ PAIR = A( K+1, K ).NE.ZERO + END IF +* +* Determine whether condition numbers are required for the k-th +* eigenpair. +* + IF( SOMCON ) THEN + IF( PAIR ) THEN + IF( .NOT.SELECT( K ) .AND. .NOT.SELECT( K+1 ) ) + $ GO TO 20 + ELSE + IF( .NOT.SELECT( K ) ) + $ GO TO 20 + END IF + END IF +* + KS = KS + 1 +* + IF( WANTS ) THEN +* +* Compute the reciprocal condition number of the k-th +* eigenvalue. +* + IF( PAIR ) THEN +* +* Complex eigenvalue pair. +* + RNRM = SLAPY2( SNRM2( N, VR( 1, KS ), 1 ), + $ SNRM2( N, VR( 1, KS+1 ), 1 ) ) + LNRM = SLAPY2( SNRM2( N, VL( 1, KS ), 1 ), + $ SNRM2( N, VL( 1, KS+1 ), 1 ) ) + CALL SGEMV( 'N', N, N, ONE, A, LDA, VR( 1, KS ), 1, ZERO, + $ WORK, 1 ) + TMPRR = SDOT( N, WORK, 1, VL( 1, KS ), 1 ) + TMPRI = SDOT( N, WORK, 1, VL( 1, KS+1 ), 1 ) + CALL SGEMV( 'N', N, N, ONE, A, LDA, VR( 1, KS+1 ), 1, + $ ZERO, WORK, 1 ) + TMPII = SDOT( N, WORK, 1, VL( 1, KS+1 ), 1 ) + TMPIR = SDOT( N, WORK, 1, VL( 1, KS ), 1 ) + UHAV = TMPRR + TMPII + UHAVI = TMPIR - TMPRI + CALL SGEMV( 'N', N, N, ONE, B, LDB, VR( 1, KS ), 1, ZERO, + $ WORK, 1 ) + TMPRR = SDOT( N, WORK, 1, VL( 1, KS ), 1 ) + TMPRI = SDOT( N, WORK, 1, VL( 1, KS+1 ), 1 ) + CALL SGEMV( 'N', N, N, ONE, B, LDB, VR( 1, KS+1 ), 1, + $ ZERO, WORK, 1 ) + TMPII = SDOT( N, WORK, 1, VL( 1, KS+1 ), 1 ) + TMPIR = SDOT( N, WORK, 1, VL( 1, KS ), 1 ) + UHBV = TMPRR + TMPII + UHBVI = TMPIR - TMPRI + UHAV = SLAPY2( UHAV, UHAVI ) + UHBV = SLAPY2( UHBV, UHBVI ) + COND = SLAPY2( UHAV, UHBV ) + S( KS ) = COND / ( RNRM*LNRM ) + S( KS+1 ) = S( KS ) +* + ELSE +* +* Real eigenvalue. +* + RNRM = SNRM2( N, VR( 1, KS ), 1 ) + LNRM = SNRM2( N, VL( 1, KS ), 1 ) + CALL SGEMV( 'N', N, N, ONE, A, LDA, VR( 1, KS ), 1, ZERO, + $ WORK, 1 ) + UHAV = SDOT( N, WORK, 1, VL( 1, KS ), 1 ) + CALL SGEMV( 'N', N, N, ONE, B, LDB, VR( 1, KS ), 1, ZERO, + $ WORK, 1 ) + UHBV = SDOT( N, WORK, 1, VL( 1, KS ), 1 ) + COND = SLAPY2( UHAV, UHBV ) + IF( COND.EQ.ZERO ) THEN + S( KS ) = -ONE + ELSE + S( KS ) = COND / ( RNRM*LNRM ) + END IF + END IF + END IF +* + IF( WANTDF ) THEN + IF( N.EQ.1 ) THEN + DIF( KS ) = SLAPY2( A( 1, 1 ), B( 1, 1 ) ) + GO TO 20 + END IF +* +* Estimate the reciprocal condition number of the k-th +* eigenvectors. + IF( PAIR ) THEN +* +* Copy the 2-by 2 pencil beginning at (A(k,k), B(k, k)). +* Compute the eigenvalue(s) at position K. +* + WORK( 1 ) = A( K, K ) + WORK( 2 ) = A( K+1, K ) + WORK( 3 ) = A( K, K+1 ) + WORK( 4 ) = A( K+1, K+1 ) + WORK( 5 ) = B( K, K ) + WORK( 6 ) = B( K+1, K ) + WORK( 7 ) = B( K, K+1 ) + WORK( 8 ) = B( K+1, K+1 ) + CALL SLAG2( WORK, 2, WORK( 5 ), 2, SMLNUM*EPS, BETA, + $ DUMMY1( 1 ), ALPHAR, DUMMY( 1 ), ALPHAI ) + ALPRQT = ONE + C1 = TWO*( ALPHAR*ALPHAR+ALPHAI*ALPHAI+BETA*BETA ) + C2 = FOUR*BETA*BETA*ALPHAI*ALPHAI + ROOT1 = C1 + SQRT( C1*C1-4.0*C2 ) + ROOT2 = C2 / ROOT1 + ROOT1 = ROOT1 / TWO + COND = MIN( SQRT( ROOT1 ), SQRT( ROOT2 ) ) + END IF +* +* Copy the matrix (A, B) to the array WORK and swap the +* diagonal block beginning at A(k,k) to the (1,1) position. +* + CALL SLACPY( 'Full', N, N, A, LDA, WORK, N ) + CALL SLACPY( 'Full', N, N, B, LDB, WORK( N*N+1 ), N ) + IFST = K + ILST = 1 +* + CALL STGEXC( .FALSE., .FALSE., N, WORK, N, WORK( N*N+1 ), N, + $ DUMMY, 1, DUMMY1, 1, IFST, ILST, + $ WORK( N*N*2+1 ), LWORK-2*N*N, IERR ) +* + IF( IERR.GT.0 ) THEN +* +* Ill-conditioned problem - swap rejected. +* + DIF( KS ) = ZERO + ELSE +* +* Reordering successful, solve generalized Sylvester +* equation for R and L, +* A22 * R - L * A11 = A12 +* B22 * R - L * B11 = B12, +* and compute estimate of Difl((A11,B11), (A22, B22)). +* + N1 = 1 + IF( WORK( 2 ).NE.ZERO ) + $ N1 = 2 + N2 = N - N1 + IF( N2.EQ.0 ) THEN + DIF( KS ) = COND + ELSE + I = N*N + 1 + IZ = 2*N*N + 1 + CALL STGSYL( 'N', DIFDRI, N2, N1, WORK( N*N1+N1+1 ), + $ N, WORK, N, WORK( N1+1 ), N, + $ WORK( N*N1+N1+I ), N, WORK( I ), N, + $ WORK( N1+I ), N, SCALE, DIF( KS ), + $ WORK( IZ+1 ), LWORK-2*N*N, IWORK, IERR ) +* + IF( PAIR ) + $ DIF( KS ) = MIN( MAX( ONE, ALPRQT )*DIF( KS ), + $ COND ) + END IF + END IF + IF( PAIR ) + $ DIF( KS+1 ) = DIF( KS ) + END IF + IF( PAIR ) + $ KS = KS + 1 +* + 20 CONTINUE + WORK( 1 ) = LWMIN + RETURN +* +* End of STGSNA +* + END diff --git a/costa/native/external/lapack/stgsy2.f b/costa/native/external/lapack/stgsy2.f new file mode 100644 index 000000000..e2f59b9cb --- /dev/null +++ b/costa/native/external/lapack/stgsy2.f @@ -0,0 +1,950 @@ + SUBROUTINE STGSY2( TRANS, IJOB, M, N, A, LDA, B, LDB, C, LDC, D, + $ LDD, E, LDE, F, LDF, SCALE, RDSUM, RDSCAL, + $ IWORK, PQ, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER IJOB, INFO, LDA, LDB, LDC, LDD, LDE, LDF, M, N, + $ PQ + REAL RDSCAL, RDSUM, SCALE +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), B( LDB, * ), C( LDC, * ), + $ D( LDD, * ), E( LDE, * ), F( LDF, * ) +* .. +* +* Purpose +* ======= +* +* STGSY2 solves the generalized Sylvester equation: +* +* A * R - L * B = scale * C (1) +* D * R - L * E = scale * F, +* +* using Level 1 and 2 BLAS. where R and L are unknown M-by-N matrices, +* (A, D), (B, E) and (C, F) are given matrix pairs of size M-by-M, +* N-by-N and M-by-N, respectively, with real entries. (A, D) and (B, E) +* must be in generalized Schur canonical form, i.e. A, B are upper +* quasi triangular and D, E are upper triangular. The solution (R, L) +* overwrites (C, F). 0 <= SCALE <= 1 is an output scaling factor +* chosen to avoid overflow. +* +* In matrix notation solving equation (1) corresponds to solve +* Z*x = scale*b, where Z is defined as +* +* Z = [ kron(In, A) -kron(B', Im) ] (2) +* [ kron(In, D) -kron(E', Im) ], +* +* Ik is the identity matrix of size k and X' is the transpose of X. +* kron(X, Y) is the Kronecker product between the matrices X and Y. +* In the process of solving (1), we solve a number of such systems +* where Dim(In), Dim(In) = 1 or 2. +* +* If TRANS = 'T', solve the transposed system Z'*y = scale*b for y, +* which is equivalent to solve for R and L in +* +* A' * R + D' * L = scale * C (3) +* R * B' + L * E' = scale * -F +* +* This case is used to compute an estimate of Dif[(A, D), (B, E)] = +* sigma_min(Z) using reverse communicaton with SLACON. +* +* STGSY2 also (IJOB >= 1) contributes to the computation in STGSYL +* of an upper bound on the separation between to matrix pairs. Then +* the input (A, D), (B, E) are sub-pencils of the matrix pair in +* STGSYL. See STGSYL for details. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER +* = 'N', solve the generalized Sylvester equation (1). +* = 'T': solve the 'transposed' system (3). +* +* IJOB (input) INTEGER +* Specifies what kind of functionality to be performed. +* = 0: solve (1) only. +* = 1: A contribution from this subsystem to a Frobenius +* norm-based estimate of the separation between two matrix +* pairs is computed. (look ahead strategy is used). +* = 2: A contribution from this subsystem to a Frobenius +* norm-based estimate of the separation between two matrix +* pairs is computed. (SGECON on sub-systems is used.) +* Not referenced if TRANS = 'T'. +* +* M (input) INTEGER +* On entry, M specifies the order of A and D, and the row +* dimension of C, F, R and L. +* +* N (input) INTEGER +* On entry, N specifies the order of B and E, and the column +* dimension of C, F, R and L. +* +* A (input) REAL array, dimension (LDA, M) +* On entry, A contains an upper quasi triangular matrix. +* +* LDA (input) INTEGER +* The leading dimension of the matrix A. LDA >= max(1, M). +* +* B (input) REAL array, dimension (LDB, N) +* On entry, B contains an upper quasi triangular matrix. +* +* LDB (input) INTEGER +* The leading dimension of the matrix B. LDB >= max(1, N). +* +* C (input/ output) REAL array, dimension (LDC, N) +* On entry, C contains the right-hand-side of the first matrix +* equation in (1). +* On exit, if IJOB = 0, C has been overwritten by the +* solution R. +* +* LDC (input) INTEGER +* The leading dimension of the matrix C. LDC >= max(1, M). +* +* D (input) REAL array, dimension (LDD, M) +* On entry, D contains an upper triangular matrix. +* +* LDD (input) INTEGER +* The leading dimension of the matrix D. LDD >= max(1, M). +* +* E (input) REAL array, dimension (LDE, N) +* On entry, E contains an upper triangular matrix. +* +* LDE (input) INTEGER +* The leading dimension of the matrix E. LDE >= max(1, N). +* +* F (input/ output) REAL array, dimension (LDF, N) +* On entry, F contains the right-hand-side of the second matrix +* equation in (1). +* On exit, if IJOB = 0, F has been overwritten by the +* solution L. +* +* LDF (input) INTEGER +* The leading dimension of the matrix F. LDF >= max(1, M). +* +* SCALE (output) REAL +* On exit, 0 <= SCALE <= 1. If 0 < SCALE < 1, the solutions +* R and L (C and F on entry) will hold the solutions to a +* slightly perturbed system but the input matrices A, B, D and +* E have not been changed. If SCALE = 0, R and L will hold the +* solutions to the homogeneous system with C = F = 0. Normally, +* SCALE = 1. +* +* RDSUM (input/output) REAL +* On entry, the sum of squares of computed contributions to +* the Dif-estimate under computation by STGSYL, where the +* scaling factor RDSCAL (see below) has been factored out. +* On exit, the corresponding sum of squares updated with the +* contributions from the current sub-system. +* If TRANS = 'T' RDSUM is not touched. +* NOTE: RDSUM only makes sense when STGSY2 is called by STGSYL. +* +* RDSCAL (input/output) REAL +* On entry, scaling factor used to prevent overflow in RDSUM. +* On exit, RDSCAL is updated w.r.t. the current contributions +* in RDSUM. +* If TRANS = 'T', RDSCAL is not touched. +* NOTE: RDSCAL only makes sense when STGSY2 is called by +* STGSYL. +* +* IWORK (workspace) INTEGER array, dimension (M+N+2) +* +* PQ (output) INTEGER +* On exit, the number of subsystems (of size 2-by-2, 4-by-4 and +* 8-by-8) solved by this routine. +* +* INFO (output) INTEGER +* On exit, if INFO is set to +* =0: Successful exit +* <0: If INFO = -i, the i-th argument had an illegal value. +* >0: The matrix pairs (A, D) and (B, E) have common or very +* close eigenvalues. +* +* Further Details +* =============== +* +* Based on contributions by +* Bo Kagstrom and Peter Poromaa, Department of Computing Science, +* Umea University, S-901 87 Umea, Sweden. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER LDZ + PARAMETER ( LDZ = 8 ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN + INTEGER I, IE, IERR, II, IS, ISP1, J, JE, JJ, JS, JSP1, + $ K, MB, NB, P, Q, ZDIM + REAL ALPHA, SCALOC +* .. +* .. Local Arrays .. + INTEGER IPIV( LDZ ), JPIV( LDZ ) + REAL RHS( LDZ ), Z( LDZ, LDZ ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SGEMM, SGEMV, SGER, SGESC2, + $ SGETC2, SSCAL, SLATDF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Decode and test input parameters +* + INFO = 0 + IERR = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -1 + ELSE IF( ( IJOB.LT.0 ) .OR. ( IJOB.GT.2 ) ) THEN + INFO = -2 + ELSE IF( M.LE.0 ) THEN + INFO = -3 + ELSE IF( N.LE.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LDD.LT.MAX( 1, M ) ) THEN + INFO = -12 + ELSE IF( LDE.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDF.LT.MAX( 1, M ) ) THEN + INFO = -16 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STGSY2', -INFO ) + RETURN + END IF +* +* Determine block structure of A +* + PQ = 0 + P = 0 + I = 1 + 10 CONTINUE + IF( I.GT.M ) + $ GO TO 20 + P = P + 1 + IWORK( P ) = I + IF( I.EQ.M ) + $ GO TO 20 + IF( A( I+1, I ).NE.ZERO ) THEN + I = I + 2 + ELSE + I = I + 1 + END IF + GO TO 10 + 20 CONTINUE + IWORK( P+1 ) = M + 1 +* +* Determine block structure of B +* + Q = P + 1 + J = 1 + 30 CONTINUE + IF( J.GT.N ) + $ GO TO 40 + Q = Q + 1 + IWORK( Q ) = J + IF( J.EQ.N ) + $ GO TO 40 + IF( B( J+1, J ).NE.ZERO ) THEN + J = J + 2 + ELSE + J = J + 1 + END IF + GO TO 30 + 40 CONTINUE + IWORK( Q+1 ) = N + 1 + PQ = P*( Q-P-1 ) +* + IF( NOTRAN ) THEN +* +* Solve (I, J) - subsystem +* A(I, I) * R(I, J) - L(I, J) * B(J, J) = C(I, J) +* D(I, I) * R(I, J) - L(I, J) * E(J, J) = F(I, J) +* for I = P, P - 1, ..., 1; J = 1, 2, ..., Q +* + SCALE = ONE + SCALOC = ONE + DO 120 J = P + 2, Q + JS = IWORK( J ) + JSP1 = JS + 1 + JE = IWORK( J+1 ) - 1 + NB = JE - JS + 1 + DO 110 I = P, 1, -1 +* + IS = IWORK( I ) + ISP1 = IS + 1 + IE = IWORK( I+1 ) - 1 + MB = IE - IS + 1 + ZDIM = MB*NB*2 +* + IF( ( MB.EQ.1 ) .AND. ( NB.EQ.1 ) ) THEN +* +* Build a 2-by-2 system Z * x = RHS +* + Z( 1, 1 ) = A( IS, IS ) + Z( 2, 1 ) = D( IS, IS ) + Z( 1, 2 ) = -B( JS, JS ) + Z( 2, 2 ) = -E( JS, JS ) +* +* Set up right hand side(s) +* + RHS( 1 ) = C( IS, JS ) + RHS( 2 ) = F( IS, JS ) +* +* Solve Z * x = RHS +* + CALL SGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR ) + IF( IERR.GT.0 ) + $ INFO = IERR +* + IF( IJOB.EQ.0 ) THEN + CALL SGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV, + $ SCALOC ) + IF( SCALOC.NE.ONE ) THEN + DO 50 K = 1, N + CALL SSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL SSCAL( M, SCALOC, F( 1, K ), 1 ) + 50 CONTINUE + SCALE = SCALE*SCALOC + END IF + ELSE + CALL SLATDF( IJOB, ZDIM, Z, LDZ, RHS, RDSUM, + $ RDSCAL, IPIV, JPIV ) + END IF +* +* Unpack solution vector(s) +* + C( IS, JS ) = RHS( 1 ) + F( IS, JS ) = RHS( 2 ) +* +* Substitute R(I, J) and L(I, J) into remaining +* equation. +* + IF( I.GT.1 ) THEN + ALPHA = -RHS( 1 ) + CALL SAXPY( IS-1, ALPHA, A( 1, IS ), 1, C( 1, JS ), + $ 1 ) + CALL SAXPY( IS-1, ALPHA, D( 1, IS ), 1, F( 1, JS ), + $ 1 ) + END IF + IF( J.LT.Q ) THEN + CALL SAXPY( N-JE, RHS( 2 ), B( JS, JE+1 ), LDB, + $ C( IS, JE+1 ), LDC ) + CALL SAXPY( N-JE, RHS( 2 ), E( JS, JE+1 ), LDE, + $ F( IS, JE+1 ), LDF ) + END IF +* + ELSE IF( ( MB.EQ.1 ) .AND. ( NB.EQ.2 ) ) THEN +* +* Build a 4-by-4 system Z * x = RHS +* + Z( 1, 1 ) = A( IS, IS ) + Z( 2, 1 ) = ZERO + Z( 3, 1 ) = D( IS, IS ) + Z( 4, 1 ) = ZERO +* + Z( 1, 2 ) = ZERO + Z( 2, 2 ) = A( IS, IS ) + Z( 3, 2 ) = ZERO + Z( 4, 2 ) = D( IS, IS ) +* + Z( 1, 3 ) = -B( JS, JS ) + Z( 2, 3 ) = -B( JS, JSP1 ) + Z( 3, 3 ) = -E( JS, JS ) + Z( 4, 3 ) = -E( JS, JSP1 ) +* + Z( 1, 4 ) = -B( JSP1, JS ) + Z( 2, 4 ) = -B( JSP1, JSP1 ) + Z( 3, 4 ) = ZERO + Z( 4, 4 ) = -E( JSP1, JSP1 ) +* +* Set up right hand side(s) +* + RHS( 1 ) = C( IS, JS ) + RHS( 2 ) = C( IS, JSP1 ) + RHS( 3 ) = F( IS, JS ) + RHS( 4 ) = F( IS, JSP1 ) +* +* Solve Z * x = RHS +* + CALL SGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR ) + IF( IERR.GT.0 ) + $ INFO = IERR +* + IF( IJOB.EQ.0 ) THEN + CALL SGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV, + $ SCALOC ) + IF( SCALOC.NE.ONE ) THEN + DO 60 K = 1, N + CALL SSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL SSCAL( M, SCALOC, F( 1, K ), 1 ) + 60 CONTINUE + SCALE = SCALE*SCALOC + END IF + ELSE + CALL SLATDF( IJOB, ZDIM, Z, LDZ, RHS, RDSUM, + $ RDSCAL, IPIV, JPIV ) + END IF +* +* Unpack solution vector(s) +* + C( IS, JS ) = RHS( 1 ) + C( IS, JSP1 ) = RHS( 2 ) + F( IS, JS ) = RHS( 3 ) + F( IS, JSP1 ) = RHS( 4 ) +* +* Substitute R(I, J) and L(I, J) into remaining +* equation. +* + IF( I.GT.1 ) THEN + CALL SGER( IS-1, NB, -ONE, A( 1, IS ), 1, RHS( 1 ), + $ 1, C( 1, JS ), LDC ) + CALL SGER( IS-1, NB, -ONE, D( 1, IS ), 1, RHS( 1 ), + $ 1, F( 1, JS ), LDF ) + END IF + IF( J.LT.Q ) THEN + CALL SAXPY( N-JE, RHS( 3 ), B( JS, JE+1 ), LDB, + $ C( IS, JE+1 ), LDC ) + CALL SAXPY( N-JE, RHS( 3 ), E( JS, JE+1 ), LDE, + $ F( IS, JE+1 ), LDF ) + CALL SAXPY( N-JE, RHS( 4 ), B( JSP1, JE+1 ), LDB, + $ C( IS, JE+1 ), LDC ) + CALL SAXPY( N-JE, RHS( 4 ), E( JSP1, JE+1 ), LDE, + $ F( IS, JE+1 ), LDF ) + END IF +* + ELSE IF( ( MB.EQ.2 ) .AND. ( NB.EQ.1 ) ) THEN +* +* Build a 4-by-4 system Z * x = RHS +* + Z( 1, 1 ) = A( IS, IS ) + Z( 2, 1 ) = A( ISP1, IS ) + Z( 3, 1 ) = D( IS, IS ) + Z( 4, 1 ) = ZERO +* + Z( 1, 2 ) = A( IS, ISP1 ) + Z( 2, 2 ) = A( ISP1, ISP1 ) + Z( 3, 2 ) = D( IS, ISP1 ) + Z( 4, 2 ) = D( ISP1, ISP1 ) +* + Z( 1, 3 ) = -B( JS, JS ) + Z( 2, 3 ) = ZERO + Z( 3, 3 ) = -E( JS, JS ) + Z( 4, 3 ) = ZERO +* + Z( 1, 4 ) = ZERO + Z( 2, 4 ) = -B( JS, JS ) + Z( 3, 4 ) = ZERO + Z( 4, 4 ) = -E( JS, JS ) +* +* Set up right hand side(s) +* + RHS( 1 ) = C( IS, JS ) + RHS( 2 ) = C( ISP1, JS ) + RHS( 3 ) = F( IS, JS ) + RHS( 4 ) = F( ISP1, JS ) +* +* Solve Z * x = RHS +* + CALL SGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR ) + IF( IERR.GT.0 ) + $ INFO = IERR + IF( IJOB.EQ.0 ) THEN + CALL SGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV, + $ SCALOC ) + IF( SCALOC.NE.ONE ) THEN + DO 70 K = 1, N + CALL SSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL SSCAL( M, SCALOC, F( 1, K ), 1 ) + 70 CONTINUE + SCALE = SCALE*SCALOC + END IF + ELSE + CALL SLATDF( IJOB, ZDIM, Z, LDZ, RHS, RDSUM, + $ RDSCAL, IPIV, JPIV ) + END IF +* +* Unpack solution vector(s) +* + C( IS, JS ) = RHS( 1 ) + C( ISP1, JS ) = RHS( 2 ) + F( IS, JS ) = RHS( 3 ) + F( ISP1, JS ) = RHS( 4 ) +* +* Substitute R(I, J) and L(I, J) into remaining +* equation. +* + IF( I.GT.1 ) THEN + CALL SGEMV( 'N', IS-1, MB, -ONE, A( 1, IS ), LDA, + $ RHS( 1 ), 1, ONE, C( 1, JS ), 1 ) + CALL SGEMV( 'N', IS-1, MB, -ONE, D( 1, IS ), LDD, + $ RHS( 1 ), 1, ONE, F( 1, JS ), 1 ) + END IF + IF( J.LT.Q ) THEN + CALL SGER( MB, N-JE, ONE, RHS( 3 ), 1, + $ B( JS, JE+1 ), LDB, C( IS, JE+1 ), LDC ) + CALL SGER( MB, N-JE, ONE, RHS( 3 ), 1, + $ E( JS, JE+1 ), LDB, F( IS, JE+1 ), LDC ) + END IF +* + ELSE IF( ( MB.EQ.2 ) .AND. ( NB.EQ.2 ) ) THEN +* +* Build an 8-by-8 system Z * x = RHS +* + CALL SCOPY( LDZ*LDZ, ZERO, 0, Z, 1 ) +* + Z( 1, 1 ) = A( IS, IS ) + Z( 2, 1 ) = A( ISP1, IS ) + Z( 5, 1 ) = D( IS, IS ) +* + Z( 1, 2 ) = A( IS, ISP1 ) + Z( 2, 2 ) = A( ISP1, ISP1 ) + Z( 5, 2 ) = D( IS, ISP1 ) + Z( 6, 2 ) = D( ISP1, ISP1 ) +* + Z( 3, 3 ) = A( IS, IS ) + Z( 4, 3 ) = A( ISP1, IS ) + Z( 7, 3 ) = D( IS, IS ) +* + Z( 3, 4 ) = A( IS, ISP1 ) + Z( 4, 4 ) = A( ISP1, ISP1 ) + Z( 7, 4 ) = D( IS, ISP1 ) + Z( 8, 4 ) = D( ISP1, ISP1 ) +* + Z( 1, 5 ) = -B( JS, JS ) + Z( 3, 5 ) = -B( JS, JSP1 ) + Z( 5, 5 ) = -E( JS, JS ) + Z( 7, 5 ) = -E( JS, JSP1 ) +* + Z( 2, 6 ) = -B( JS, JS ) + Z( 4, 6 ) = -B( JS, JSP1 ) + Z( 6, 6 ) = -E( JS, JS ) + Z( 8, 6 ) = -E( JS, JSP1 ) +* + Z( 1, 7 ) = -B( JSP1, JS ) + Z( 3, 7 ) = -B( JSP1, JSP1 ) + Z( 7, 7 ) = -E( JSP1, JSP1 ) +* + Z( 2, 8 ) = -B( JSP1, JS ) + Z( 4, 8 ) = -B( JSP1, JSP1 ) + Z( 8, 8 ) = -E( JSP1, JSP1 ) +* +* Set up right hand side(s) +* + K = 1 + II = MB*NB + 1 + DO 80 JJ = 0, NB - 1 + CALL SCOPY( MB, C( IS, JS+JJ ), 1, RHS( K ), 1 ) + CALL SCOPY( MB, F( IS, JS+JJ ), 1, RHS( II ), 1 ) + K = K + MB + II = II + MB + 80 CONTINUE +* +* Solve Z * x = RHS +* + CALL SGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR ) + IF( IERR.GT.0 ) + $ INFO = IERR + IF( IJOB.EQ.0 ) THEN + CALL SGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV, + $ SCALOC ) + IF( SCALOC.NE.ONE ) THEN + DO 90 K = 1, N + CALL SSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL SSCAL( M, SCALOC, F( 1, K ), 1 ) + 90 CONTINUE + SCALE = SCALE*SCALOC + END IF + ELSE + CALL SLATDF( IJOB, ZDIM, Z, LDZ, RHS, RDSUM, + $ RDSCAL, IPIV, JPIV ) + END IF +* +* Unpack solution vector(s) +* + K = 1 + II = MB*NB + 1 + DO 100 JJ = 0, NB - 1 + CALL SCOPY( MB, RHS( K ), 1, C( IS, JS+JJ ), 1 ) + CALL SCOPY( MB, RHS( II ), 1, F( IS, JS+JJ ), 1 ) + K = K + MB + II = II + MB + 100 CONTINUE +* +* Substitute R(I, J) and L(I, J) into remaining +* equation. +* + IF( I.GT.1 ) THEN + CALL SGEMM( 'N', 'N', IS-1, NB, MB, -ONE, + $ A( 1, IS ), LDA, RHS( 1 ), MB, ONE, + $ C( 1, JS ), LDC ) + CALL SGEMM( 'N', 'N', IS-1, NB, MB, -ONE, + $ D( 1, IS ), LDD, RHS( 1 ), MB, ONE, + $ F( 1, JS ), LDF ) + END IF + IF( J.LT.Q ) THEN + K = MB*NB + 1 + CALL SGEMM( 'N', 'N', MB, N-JE, NB, ONE, RHS( K ), + $ MB, B( JS, JE+1 ), LDB, ONE, + $ C( IS, JE+1 ), LDC ) + CALL SGEMM( 'N', 'N', MB, N-JE, NB, ONE, RHS( K ), + $ MB, E( JS, JE+1 ), LDE, ONE, + $ F( IS, JE+1 ), LDF ) + END IF +* + END IF +* + 110 CONTINUE + 120 CONTINUE + ELSE +* +* Solve (I, J) - subsystem +* A(I, I)' * R(I, J) + D(I, I)' * L(J, J) = C(I, J) +* R(I, I) * B(J, J) + L(I, J) * E(J, J) = -F(I, J) +* for I = 1, 2, ..., P, J = Q, Q - 1, ..., 1 +* + SCALE = ONE + SCALOC = ONE + DO 200 I = 1, P +* + IS = IWORK( I ) + ISP1 = IS + 1 + IE = IWORK( I+1 ) - 1 + MB = IE - IS + 1 + DO 190 J = Q, P + 2, -1 +* + JS = IWORK( J ) + JSP1 = JS + 1 + JE = IWORK( J+1 ) - 1 + NB = JE - JS + 1 + ZDIM = MB*NB*2 + IF( ( MB.EQ.1 ) .AND. ( NB.EQ.1 ) ) THEN +* +* Build a 2-by-2 system Z' * x = RHS +* + Z( 1, 1 ) = A( IS, IS ) + Z( 2, 1 ) = -B( JS, JS ) + Z( 1, 2 ) = D( IS, IS ) + Z( 2, 2 ) = -E( JS, JS ) +* +* Set up right hand side(s) +* + RHS( 1 ) = C( IS, JS ) + RHS( 2 ) = F( IS, JS ) +* +* Solve Z' * x = RHS +* + CALL SGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR ) + IF( IERR.GT.0 ) + $ INFO = IERR +* + CALL SGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV, SCALOC ) + IF( SCALOC.NE.ONE ) THEN + DO 130 K = 1, N + CALL SSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL SSCAL( M, SCALOC, F( 1, K ), 1 ) + 130 CONTINUE + SCALE = SCALE*SCALOC + END IF +* +* Unpack solution vector(s) +* + C( IS, JS ) = RHS( 1 ) + F( IS, JS ) = RHS( 2 ) +* +* Substitute R(I, J) and L(I, J) into remaining +* equation. +* + IF( J.GT.P+2 ) THEN + ALPHA = RHS( 1 ) + CALL SAXPY( JS-1, ALPHA, B( 1, JS ), 1, F( IS, 1 ), + $ LDF ) + ALPHA = RHS( 2 ) + CALL SAXPY( JS-1, ALPHA, E( 1, JS ), 1, F( IS, 1 ), + $ LDF ) + END IF + IF( I.LT.P ) THEN + ALPHA = -RHS( 1 ) + CALL SAXPY( M-IE, ALPHA, A( IS, IE+1 ), LDA, + $ C( IE+1, JS ), 1 ) + ALPHA = -RHS( 2 ) + CALL SAXPY( M-IE, ALPHA, D( IS, IE+1 ), LDD, + $ C( IE+1, JS ), 1 ) + END IF +* + ELSE IF( ( MB.EQ.1 ) .AND. ( NB.EQ.2 ) ) THEN +* +* Build a 4-by-4 system Z' * x = RHS +* + Z( 1, 1 ) = A( IS, IS ) + Z( 2, 1 ) = ZERO + Z( 3, 1 ) = -B( JS, JS ) + Z( 4, 1 ) = -B( JSP1, JS ) +* + Z( 1, 2 ) = ZERO + Z( 2, 2 ) = A( IS, IS ) + Z( 3, 2 ) = -B( JS, JSP1 ) + Z( 4, 2 ) = -B( JSP1, JSP1 ) +* + Z( 1, 3 ) = D( IS, IS ) + Z( 2, 3 ) = ZERO + Z( 3, 3 ) = -E( JS, JS ) + Z( 4, 3 ) = ZERO +* + Z( 1, 4 ) = ZERO + Z( 2, 4 ) = D( IS, IS ) + Z( 3, 4 ) = -E( JS, JSP1 ) + Z( 4, 4 ) = -E( JSP1, JSP1 ) +* +* Set up right hand side(s) +* + RHS( 1 ) = C( IS, JS ) + RHS( 2 ) = C( IS, JSP1 ) + RHS( 3 ) = F( IS, JS ) + RHS( 4 ) = F( IS, JSP1 ) +* +* Solve Z' * x = RHS +* + CALL SGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR ) + IF( IERR.GT.0 ) + $ INFO = IERR + CALL SGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV, SCALOC ) + IF( SCALOC.NE.ONE ) THEN + DO 140 K = 1, N + CALL SSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL SSCAL( M, SCALOC, F( 1, K ), 1 ) + 140 CONTINUE + SCALE = SCALE*SCALOC + END IF +* +* Unpack solution vector(s) +* + C( IS, JS ) = RHS( 1 ) + C( IS, JSP1 ) = RHS( 2 ) + F( IS, JS ) = RHS( 3 ) + F( IS, JSP1 ) = RHS( 4 ) +* +* Substitute R(I, J) and L(I, J) into remaining +* equation. +* + IF( J.GT.P+2 ) THEN + CALL SAXPY( JS-1, RHS( 1 ), B( 1, JS ), 1, + $ F( IS, 1 ), LDF ) + CALL SAXPY( JS-1, RHS( 2 ), B( 1, JSP1 ), 1, + $ F( IS, 1 ), LDF ) + CALL SAXPY( JS-1, RHS( 3 ), E( 1, JS ), 1, + $ F( IS, 1 ), LDF ) + CALL SAXPY( JS-1, RHS( 4 ), E( 1, JSP1 ), 1, + $ F( IS, 1 ), LDF ) + END IF + IF( I.LT.P ) THEN + CALL SGER( M-IE, NB, -ONE, A( IS, IE+1 ), LDA, + $ RHS( 1 ), 1, C( IE+1, JS ), LDC ) + CALL SGER( M-IE, NB, -ONE, D( IS, IE+1 ), LDD, + $ RHS( 3 ), 1, C( IE+1, JS ), LDC ) + END IF +* + ELSE IF( ( MB.EQ.2 ) .AND. ( NB.EQ.1 ) ) THEN +* +* Build a 4-by-4 system Z' * x = RHS +* + Z( 1, 1 ) = A( IS, IS ) + Z( 2, 1 ) = A( IS, ISP1 ) + Z( 3, 1 ) = -B( JS, JS ) + Z( 4, 1 ) = ZERO +* + Z( 1, 2 ) = A( ISP1, IS ) + Z( 2, 2 ) = A( ISP1, ISP1 ) + Z( 3, 2 ) = ZERO + Z( 4, 2 ) = -B( JS, JS ) +* + Z( 1, 3 ) = D( IS, IS ) + Z( 2, 3 ) = D( IS, ISP1 ) + Z( 3, 3 ) = -E( JS, JS ) + Z( 4, 3 ) = ZERO +* + Z( 1, 4 ) = ZERO + Z( 2, 4 ) = D( ISP1, ISP1 ) + Z( 3, 4 ) = ZERO + Z( 4, 4 ) = -E( JS, JS ) +* +* Set up right hand side(s) +* + RHS( 1 ) = C( IS, JS ) + RHS( 2 ) = C( ISP1, JS ) + RHS( 3 ) = F( IS, JS ) + RHS( 4 ) = F( ISP1, JS ) +* +* Solve Z' * x = RHS +* + CALL SGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR ) + IF( IERR.GT.0 ) + $ INFO = IERR +* + CALL SGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV, SCALOC ) + IF( SCALOC.NE.ONE ) THEN + DO 150 K = 1, N + CALL SSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL SSCAL( M, SCALOC, F( 1, K ), 1 ) + 150 CONTINUE + SCALE = SCALE*SCALOC + END IF +* +* Unpack solution vector(s) +* + C( IS, JS ) = RHS( 1 ) + C( ISP1, JS ) = RHS( 2 ) + F( IS, JS ) = RHS( 3 ) + F( ISP1, JS ) = RHS( 4 ) +* +* Substitute R(I, J) and L(I, J) into remaining +* equation. +* + IF( J.GT.P+2 ) THEN + CALL SGER( MB, JS-1, ONE, RHS( 1 ), 1, B( 1, JS ), + $ 1, F( IS, 1 ), LDF ) + CALL SGER( MB, JS-1, ONE, RHS( 3 ), 1, E( 1, JS ), + $ 1, F( IS, 1 ), LDF ) + END IF + IF( I.LT.P ) THEN + CALL SGEMV( 'T', MB, M-IE, -ONE, A( IS, IE+1 ), + $ LDA, RHS( 1 ), 1, ONE, C( IE+1, JS ), + $ 1 ) + CALL SGEMV( 'T', MB, M-IE, -ONE, D( IS, IE+1 ), + $ LDD, RHS( 3 ), 1, ONE, C( IE+1, JS ), + $ 1 ) + END IF +* + ELSE IF( ( MB.EQ.2 ) .AND. ( NB.EQ.2 ) ) THEN +* +* Build an 8-by-8 system Z' * x = RHS +* + CALL SCOPY( LDZ*LDZ, ZERO, 0, Z, 1 ) +* + Z( 1, 1 ) = A( IS, IS ) + Z( 2, 1 ) = A( IS, ISP1 ) + Z( 5, 1 ) = -B( JS, JS ) + Z( 7, 1 ) = -B( JSP1, JS ) +* + Z( 1, 2 ) = A( ISP1, IS ) + Z( 2, 2 ) = A( ISP1, ISP1 ) + Z( 6, 2 ) = -B( JS, JS ) + Z( 8, 2 ) = -B( JSP1, JS ) +* + Z( 3, 3 ) = A( IS, IS ) + Z( 4, 3 ) = A( IS, ISP1 ) + Z( 5, 3 ) = -B( JS, JSP1 ) + Z( 7, 3 ) = -B( JSP1, JSP1 ) +* + Z( 3, 4 ) = A( ISP1, IS ) + Z( 4, 4 ) = A( ISP1, ISP1 ) + Z( 6, 4 ) = -B( JS, JSP1 ) + Z( 8, 4 ) = -B( JSP1, JSP1 ) +* + Z( 1, 5 ) = D( IS, IS ) + Z( 2, 5 ) = D( IS, ISP1 ) + Z( 5, 5 ) = -E( JS, JS ) +* + Z( 2, 6 ) = D( ISP1, ISP1 ) + Z( 6, 6 ) = -E( JS, JS ) +* + Z( 3, 7 ) = D( IS, IS ) + Z( 4, 7 ) = D( IS, ISP1 ) + Z( 5, 7 ) = -E( JS, JSP1 ) + Z( 7, 7 ) = -E( JSP1, JSP1 ) +* + Z( 4, 8 ) = D( ISP1, ISP1 ) + Z( 6, 8 ) = -E( JS, JSP1 ) + Z( 8, 8 ) = -E( JSP1, JSP1 ) +* +* Set up right hand side(s) +* + K = 1 + II = MB*NB + 1 + DO 160 JJ = 0, NB - 1 + CALL SCOPY( MB, C( IS, JS+JJ ), 1, RHS( K ), 1 ) + CALL SCOPY( MB, F( IS, JS+JJ ), 1, RHS( II ), 1 ) + K = K + MB + II = II + MB + 160 CONTINUE +* +* +* Solve Z' * x = RHS +* + CALL SGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR ) + IF( IERR.GT.0 ) + $ INFO = IERR +* + CALL SGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV, SCALOC ) + IF( SCALOC.NE.ONE ) THEN + DO 170 K = 1, N + CALL SSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL SSCAL( M, SCALOC, F( 1, K ), 1 ) + 170 CONTINUE + SCALE = SCALE*SCALOC + END IF +* +* Unpack solution vector(s) +* + K = 1 + II = MB*NB + 1 + DO 180 JJ = 0, NB - 1 + CALL SCOPY( MB, RHS( K ), 1, C( IS, JS+JJ ), 1 ) + CALL SCOPY( MB, RHS( II ), 1, F( IS, JS+JJ ), 1 ) + K = K + MB + II = II + MB + 180 CONTINUE +* +* Substitute R(I, J) and L(I, J) into remaining +* equation. +* + IF( J.GT.P+2 ) THEN + CALL SGEMM( 'N', 'T', MB, JS-1, NB, ONE, + $ C( IS, JS ), LDC, B( 1, JS ), LDB, ONE, + $ F( IS, 1 ), LDF ) + CALL SGEMM( 'N', 'T', MB, JS-1, NB, ONE, + $ F( IS, JS ), LDF, E( 1, JS ), LDE, ONE, + $ F( IS, 1 ), LDF ) + END IF + IF( I.LT.P ) THEN + CALL SGEMM( 'T', 'N', M-IE, NB, MB, -ONE, + $ A( IS, IE+1 ), LDA, C( IS, JS ), LDC, + $ ONE, C( IE+1, JS ), LDC ) + CALL SGEMM( 'T', 'N', M-IE, NB, MB, -ONE, + $ D( IS, IE+1 ), LDD, F( IS, JS ), LDF, + $ ONE, C( IE+1, JS ), LDC ) + END IF +* + END IF +* + 190 CONTINUE + 200 CONTINUE +* + END IF + RETURN +* +* End of STGSY2 +* + END diff --git a/costa/native/external/lapack/stgsyl.f b/costa/native/external/lapack/stgsyl.f new file mode 100644 index 000000000..d87f6b548 --- /dev/null +++ b/costa/native/external/lapack/stgsyl.f @@ -0,0 +1,534 @@ + SUBROUTINE STGSYL( TRANS, IJOB, M, N, A, LDA, B, LDB, C, LDC, D, + $ LDD, E, LDE, F, LDF, SCALE, DIF, WORK, LWORK, + $ IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER IJOB, INFO, LDA, LDB, LDC, LDD, LDE, LDF, + $ LWORK, M, N + REAL DIF, SCALE +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), B( LDB, * ), C( LDC, * ), + $ D( LDD, * ), E( LDE, * ), F( LDF, * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* STGSYL solves the generalized Sylvester equation: +* +* A * R - L * B = scale * C (1) +* D * R - L * E = scale * F +* +* where R and L are unknown m-by-n matrices, (A, D), (B, E) and +* (C, F) are given matrix pairs of size m-by-m, n-by-n and m-by-n, +* respectively, with real entries. (A, D) and (B, E) must be in +* generalized (real) Schur canonical form, i.e. A, B are upper quasi +* triangular and D, E are upper triangular. +* +* The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1 is an output +* scaling factor chosen to avoid overflow. +* +* In matrix notation (1) is equivalent to solve Zx = scale b, where +* Z is defined as +* +* Z = [ kron(In, A) -kron(B', Im) ] (2) +* [ kron(In, D) -kron(E', Im) ]. +* +* Here Ik is the identity matrix of size k and X' is the transpose of +* X. kron(X, Y) is the Kronecker product between the matrices X and Y. +* +* If TRANS = 'T', STGSYL solves the transposed system Z'*y = scale*b, +* which is equivalent to solve for R and L in +* +* A' * R + D' * L = scale * C (3) +* R * B' + L * E' = scale * (-F) +* +* This case (TRANS = 'T') is used to compute an one-norm-based estimate +* of Dif[(A,D), (B,E)], the separation between the matrix pairs (A,D) +* and (B,E), using SLACON. +* +* If IJOB >= 1, STGSYL computes a Frobenius norm-based estimate +* of Dif[(A,D),(B,E)]. That is, the reciprocal of a lower bound on the +* reciprocal of the smallest singular value of Z. See [1-2] for more +* information. +* +* This is a level 3 BLAS algorithm. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER*1 +* = 'N', solve the generalized Sylvester equation (1). +* = 'T', solve the 'transposed' system (3). +* +* IJOB (input) INTEGER +* Specifies what kind of functionality to be performed. +* =0: solve (1) only. +* =1: The functionality of 0 and 3. +* =2: The functionality of 0 and 4. +* =3: Only an estimate of Dif[(A,D), (B,E)] is computed. +* (look ahead strategy IJOB = 1 is used). +* =4: Only an estimate of Dif[(A,D), (B,E)] is computed. +* ( SGECON on sub-systems is used ). +* Not referenced if TRANS = 'T'. +* +* M (input) INTEGER +* The order of the matrices A and D, and the row dimension of +* the matrices C, F, R and L. +* +* N (input) INTEGER +* The order of the matrices B and E, and the column dimension +* of the matrices C, F, R and L. +* +* A (input) REAL array, dimension (LDA, M) +* The upper quasi triangular matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1, M). +* +* B (input) REAL array, dimension (LDB, N) +* The upper quasi triangular matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1, N). +* +* C (input/output) REAL array, dimension (LDC, N) +* On entry, C contains the right-hand-side of the first matrix +* equation in (1) or (3). +* On exit, if IJOB = 0, 1 or 2, C has been overwritten by +* the solution R. If IJOB = 3 or 4 and TRANS = 'N', C holds R, +* the solution achieved during the computation of the +* Dif-estimate. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1, M). +* +* D (input) REAL array, dimension (LDD, M) +* The upper triangular matrix D. +* +* LDD (input) INTEGER +* The leading dimension of the array D. LDD >= max(1, M). +* +* E (input) REAL array, dimension (LDE, N) +* The upper triangular matrix E. +* +* LDE (input) INTEGER +* The leading dimension of the array E. LDE >= max(1, N). +* +* F (input/output) REAL array, dimension (LDF, N) +* On entry, F contains the right-hand-side of the second matrix +* equation in (1) or (3). +* On exit, if IJOB = 0, 1 or 2, F has been overwritten by +* the solution L. If IJOB = 3 or 4 and TRANS = 'N', F holds L, +* the solution achieved during the computation of the +* Dif-estimate. +* +* LDF (input) INTEGER +* The leading dimension of the array F. LDF >= max(1, M). +* +* DIF (output) REAL +* On exit DIF is the reciprocal of a lower bound of the +* reciprocal of the Dif-function, i.e. DIF is an upper bound of +* Dif[(A,D), (B,E)] = sigma_min(Z), where Z as in (2). +* IF IJOB = 0 or TRANS = 'T', DIF is not touched. +* +* SCALE (output) REAL +* On exit SCALE is the scaling factor in (1) or (3). +* If 0 < SCALE < 1, C and F hold the solutions R and L, resp., +* to a slightly perturbed system but the input matrices A, B, D +* and E have not been changed. If SCALE = 0, C and F hold the +* solutions R and L, respectively, to the homogeneous system +* with C = F = 0. Normally, SCALE = 1. +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* If IJOB = 0, WORK is not referenced. Otherwise, +* on exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK > = 1. +* If IJOB = 1 or 2 and TRANS = 'N', LWORK >= 2*M*N. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace) INTEGER array, dimension (M+N+6) +* +* INFO (output) INTEGER +* =0: successful exit +* <0: If INFO = -i, the i-th argument had an illegal value. +* >0: (A, D) and (B, E) have common or close eigenvalues. +* +* Further Details +* =============== +* +* Based on contributions by +* Bo Kagstrom and Peter Poromaa, Department of Computing Science, +* Umea University, S-901 87 Umea, Sweden. +* +* [1] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software +* for Solving the Generalized Sylvester Equation and Estimating the +* Separation between Regular Matrix Pairs, Report UMINF - 93.23, +* Department of Computing Science, Umea University, S-901 87 Umea, +* Sweden, December 1993, Revised April 1994, Also as LAPACK Working +* Note 75. To appear in ACM Trans. on Math. Software, Vol 22, +* No 1, 1996. +* +* [2] B. Kagstrom, A Perturbation Analysis of the Generalized Sylvester +* Equation (AR - LB, DR - LE ) = (C, F), SIAM J. Matrix Anal. +* Appl., 15(4):1045-1060, 1994 +* +* [3] B. Kagstrom and L. Westin, Generalized Schur Methods with +* Condition Estimators for Solving the Generalized Sylvester +* Equation, IEEE Transactions on Automatic Control, Vol. 34, No. 7, +* July 1989, pp 745-751. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, NOTRAN + INTEGER I, IE, IFUNC, IROUND, IS, ISOLVE, J, JE, JS, K, + $ LINFO, LWMIN, MB, NB, P, PPQQ, PQ, Q + REAL DSCALE, DSUM, SCALE2, SCALOC +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMM, SLACPY, SSCAL, STGSY2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Decode and test input parameters +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* + IF( ( IJOB.EQ.1 .OR. IJOB.EQ.2 ) .AND. NOTRAN ) THEN + LWMIN = MAX( 1, 2*M*N ) + ELSE + LWMIN = 1 + END IF +* + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -1 + ELSE IF( ( IJOB.LT.0 ) .OR. ( IJOB.GT.4 ) ) THEN + INFO = -2 + ELSE IF( M.LE.0 ) THEN + INFO = -3 + ELSE IF( N.LE.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LDD.LT.MAX( 1, M ) ) THEN + INFO = -12 + ELSE IF( LDE.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDF.LT.MAX( 1, M ) ) THEN + INFO = -16 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -20 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STGSYL', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Determine optimal block sizes MB and NB +* + MB = ILAENV( 2, 'STGSYL', TRANS, M, N, -1, -1 ) + NB = ILAENV( 5, 'STGSYL', TRANS, M, N, -1, -1 ) +* + ISOLVE = 1 + IFUNC = 0 + IF( IJOB.GE.3 .AND. NOTRAN ) THEN + IFUNC = IJOB - 2 + DO 10 J = 1, N + CALL SCOPY( M, ZERO, 0, C( 1, J ), 1 ) + CALL SCOPY( M, ZERO, 0, F( 1, J ), 1 ) + 10 CONTINUE + ELSE IF( IJOB.GE.1 .AND. NOTRAN ) THEN + ISOLVE = 2 + END IF +* + IF( ( MB.LE.1 .AND. NB.LE.1 ) .OR. ( MB.GE.M .AND. NB.GE.N ) ) + $ THEN +* + DO 30 IROUND = 1, ISOLVE +* +* Use unblocked Level 2 solver +* + DSCALE = ZERO + DSUM = ONE + PQ = 0 + CALL STGSY2( TRANS, IFUNC, M, N, A, LDA, B, LDB, C, LDC, D, + $ LDD, E, LDE, F, LDF, SCALE, DSUM, DSCALE, + $ IWORK, PQ, INFO ) + IF( DSCALE.NE.ZERO ) THEN + IF( IJOB.EQ.1 .OR. IJOB.EQ.3 ) THEN + DIF = SQRT( REAL( 2*M*N ) ) / ( DSCALE*SQRT( DSUM ) ) + ELSE + DIF = SQRT( REAL( PQ ) ) / ( DSCALE*SQRT( DSUM ) ) + END IF + END IF +* + IF( ISOLVE.EQ.2 .AND. IROUND.EQ.1 ) THEN + IFUNC = IJOB + SCALE2 = SCALE + CALL SLACPY( 'F', M, N, C, LDC, WORK, M ) + CALL SLACPY( 'F', M, N, F, LDF, WORK( M*N+1 ), M ) + DO 20 J = 1, N + CALL SCOPY( M, ZERO, 0, C( 1, J ), 1 ) + CALL SCOPY( M, ZERO, 0, F( 1, J ), 1 ) + 20 CONTINUE + ELSE IF( ISOLVE.EQ.2 .AND. IROUND.EQ.2 ) THEN + CALL SLACPY( 'F', M, N, WORK, M, C, LDC ) + CALL SLACPY( 'F', M, N, WORK( M*N+1 ), M, F, LDF ) + SCALE = SCALE2 + END IF + 30 CONTINUE +* + RETURN + END IF +* +* Determine block structure of A +* + P = 0 + I = 1 + 40 CONTINUE + IF( I.GT.M ) + $ GO TO 50 + P = P + 1 + IWORK( P ) = I + I = I + MB + IF( I.GE.M ) + $ GO TO 50 + IF( A( I, I-1 ).NE.ZERO ) + $ I = I + 1 + GO TO 40 + 50 CONTINUE +* + IWORK( P+1 ) = M + 1 + IF( IWORK( P ).EQ.IWORK( P+1 ) ) + $ P = P - 1 +* +* Determine block structure of B +* + Q = P + 1 + J = 1 + 60 CONTINUE + IF( J.GT.N ) + $ GO TO 70 + Q = Q + 1 + IWORK( Q ) = J + J = J + NB + IF( J.GE.N ) + $ GO TO 70 + IF( B( J, J-1 ).NE.ZERO ) + $ J = J + 1 + GO TO 60 + 70 CONTINUE +* + IWORK( Q+1 ) = N + 1 + IF( IWORK( Q ).EQ.IWORK( Q+1 ) ) + $ Q = Q - 1 +* + IF( NOTRAN ) THEN +* + DO 150 IROUND = 1, ISOLVE +* +* Solve (I, J)-subsystem +* A(I, I) * R(I, J) - L(I, J) * B(J, J) = C(I, J) +* D(I, I) * R(I, J) - L(I, J) * E(J, J) = F(I, J) +* for I = P, P - 1,..., 1; J = 1, 2,..., Q +* + DSCALE = ZERO + DSUM = ONE + PQ = 0 + SCALE = ONE + DO 130 J = P + 2, Q + JS = IWORK( J ) + JE = IWORK( J+1 ) - 1 + NB = JE - JS + 1 + DO 120 I = P, 1, -1 + IS = IWORK( I ) + IE = IWORK( I+1 ) - 1 + MB = IE - IS + 1 + PPQQ = 0 + CALL STGSY2( TRANS, IFUNC, MB, NB, A( IS, IS ), LDA, + $ B( JS, JS ), LDB, C( IS, JS ), LDC, + $ D( IS, IS ), LDD, E( JS, JS ), LDE, + $ F( IS, JS ), LDF, SCALOC, DSUM, DSCALE, + $ IWORK( Q+2 ), PPQQ, LINFO ) + IF( LINFO.GT.0 ) + $ INFO = LINFO +* + PQ = PQ + PPQQ + IF( SCALOC.NE.ONE ) THEN + DO 80 K = 1, JS - 1 + CALL SSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL SSCAL( M, SCALOC, F( 1, K ), 1 ) + 80 CONTINUE + DO 90 K = JS, JE + CALL SSCAL( IS-1, SCALOC, C( 1, K ), 1 ) + CALL SSCAL( IS-1, SCALOC, F( 1, K ), 1 ) + 90 CONTINUE + DO 100 K = JS, JE + CALL SSCAL( M-IE, SCALOC, C( IE+1, K ), 1 ) + CALL SSCAL( M-IE, SCALOC, F( IE+1, K ), 1 ) + 100 CONTINUE + DO 110 K = JE + 1, N + CALL SSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL SSCAL( M, SCALOC, F( 1, K ), 1 ) + 110 CONTINUE + SCALE = SCALE*SCALOC + END IF +* +* Substitute R(I, J) and L(I, J) into remaining +* equation. +* + IF( I.GT.1 ) THEN + CALL SGEMM( 'N', 'N', IS-1, NB, MB, -ONE, + $ A( 1, IS ), LDA, C( IS, JS ), LDC, ONE, + $ C( 1, JS ), LDC ) + CALL SGEMM( 'N', 'N', IS-1, NB, MB, -ONE, + $ D( 1, IS ), LDD, C( IS, JS ), LDC, ONE, + $ F( 1, JS ), LDF ) + END IF + IF( J.LT.Q ) THEN + CALL SGEMM( 'N', 'N', MB, N-JE, NB, ONE, + $ F( IS, JS ), LDF, B( JS, JE+1 ), LDB, + $ ONE, C( IS, JE+1 ), LDC ) + CALL SGEMM( 'N', 'N', MB, N-JE, NB, ONE, + $ F( IS, JS ), LDF, E( JS, JE+1 ), LDE, + $ ONE, F( IS, JE+1 ), LDF ) + END IF + 120 CONTINUE + 130 CONTINUE + IF( DSCALE.NE.ZERO ) THEN + IF( IJOB.EQ.1 .OR. IJOB.EQ.3 ) THEN + DIF = SQRT( REAL( 2*M*N ) ) / ( DSCALE*SQRT( DSUM ) ) + ELSE + DIF = SQRT( REAL( PQ ) ) / ( DSCALE*SQRT( DSUM ) ) + END IF + END IF + IF( ISOLVE.EQ.2 .AND. IROUND.EQ.1 ) THEN + IFUNC = IJOB + SCALE2 = SCALE + CALL SLACPY( 'F', M, N, C, LDC, WORK, M ) + CALL SLACPY( 'F', M, N, F, LDF, WORK( M*N+1 ), M ) + DO 140 J = 1, N + CALL SCOPY( M, ZERO, 0, C( 1, J ), 1 ) + CALL SCOPY( M, ZERO, 0, F( 1, J ), 1 ) + 140 CONTINUE + ELSE IF( ISOLVE.EQ.2 .AND. IROUND.EQ.2 ) THEN + CALL SLACPY( 'F', M, N, WORK, M, C, LDC ) + CALL SLACPY( 'F', M, N, WORK( M*N+1 ), M, F, LDF ) + SCALE = SCALE2 + END IF + 150 CONTINUE +* + ELSE +* +* Solve transposed (I, J)-subsystem +* A(I, I)' * R(I, J) + D(I, I)' * L(I, J) = C(I, J) +* R(I, J) * B(J, J)' + L(I, J) * E(J, J)' = -F(I, J) +* for I = 1,2,..., P; J = Q, Q-1,..., 1 +* + SCALE = ONE + DO 210 I = 1, P + IS = IWORK( I ) + IE = IWORK( I+1 ) - 1 + MB = IE - IS + 1 + DO 200 J = Q, P + 2, -1 + JS = IWORK( J ) + JE = IWORK( J+1 ) - 1 + NB = JE - JS + 1 + CALL STGSY2( TRANS, IFUNC, MB, NB, A( IS, IS ), LDA, + $ B( JS, JS ), LDB, C( IS, JS ), LDC, + $ D( IS, IS ), LDD, E( JS, JS ), LDE, + $ F( IS, JS ), LDF, SCALOC, DSUM, DSCALE, + $ IWORK( Q+2 ), PPQQ, LINFO ) + IF( LINFO.GT.0 ) + $ INFO = LINFO + IF( SCALOC.NE.ONE ) THEN + DO 160 K = 1, JS - 1 + CALL SSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL SSCAL( M, SCALOC, F( 1, K ), 1 ) + 160 CONTINUE + DO 170 K = JS, JE + CALL SSCAL( IS-1, SCALOC, C( 1, K ), 1 ) + CALL SSCAL( IS-1, SCALOC, F( 1, K ), 1 ) + 170 CONTINUE + DO 180 K = JS, JE + CALL SSCAL( M-IE, SCALOC, C( IE+1, K ), 1 ) + CALL SSCAL( M-IE, SCALOC, F( IE+1, K ), 1 ) + 180 CONTINUE + DO 190 K = JE + 1, N + CALL SSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL SSCAL( M, SCALOC, F( 1, K ), 1 ) + 190 CONTINUE + SCALE = SCALE*SCALOC + END IF +* +* Substitute R(I, J) and L(I, J) into remaining equation. +* + IF( J.GT.P+2 ) THEN + CALL SGEMM( 'N', 'T', MB, JS-1, NB, ONE, C( IS, JS ), + $ LDC, B( 1, JS ), LDB, ONE, F( IS, 1 ), + $ LDF ) + CALL SGEMM( 'N', 'T', MB, JS-1, NB, ONE, F( IS, JS ), + $ LDF, E( 1, JS ), LDE, ONE, F( IS, 1 ), + $ LDF ) + END IF + IF( I.LT.P ) THEN + CALL SGEMM( 'T', 'N', M-IE, NB, MB, -ONE, + $ A( IS, IE+1 ), LDA, C( IS, JS ), LDC, ONE, + $ C( IE+1, JS ), LDC ) + CALL SGEMM( 'T', 'N', M-IE, NB, MB, -ONE, + $ D( IS, IE+1 ), LDD, F( IS, JS ), LDF, ONE, + $ C( IE+1, JS ), LDC ) + END IF + 200 CONTINUE + 210 CONTINUE +* + END IF +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of STGSYL +* + END diff --git a/costa/native/external/lapack/stpcon.f b/costa/native/external/lapack/stpcon.f new file mode 100644 index 000000000..8adbadb4f --- /dev/null +++ b/costa/native/external/lapack/stpcon.f @@ -0,0 +1,187 @@ + SUBROUTINE STPCON( NORM, UPLO, DIAG, N, AP, RCOND, WORK, IWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER INFO, N + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL AP( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* STPCON estimates the reciprocal of the condition number of a packed +* triangular matrix A, in either the 1-norm or the infinity-norm. +* +* The norm of A is computed and an estimate is obtained for +* norm(inv(A)), then the reciprocal of the condition number is +* computed as +* RCOND = 1 / ( norm(A) * norm(inv(A)) ). +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies whether the 1-norm condition number or the +* infinity-norm condition number is required: +* = '1' or 'O': 1-norm; +* = 'I': Infinity-norm. +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input) REAL array, dimension (N*(N+1)/2) +* The upper or lower triangular matrix A, packed columnwise in +* a linear array. The j-th column of A is stored in the array +* AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* If DIAG = 'U', the diagonal elements of A are not referenced +* and are assumed to be 1. +* +* RCOND (output) REAL +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(norm(A) * norm(inv(A))). +* +* WORK (workspace) REAL array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, ONENRM, UPPER + CHARACTER NORMIN + INTEGER IX, KASE, KASE1 + REAL AINVNM, ANORM, SCALE, SMLNUM, XNORM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SLAMCH, SLANTP + EXTERNAL LSAME, ISAMAX, SLAMCH, SLANTP +* .. +* .. External Subroutines .. + EXTERNAL SLACON, SLATPS, SRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STPCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + END IF +* + RCOND = ZERO + SMLNUM = SLAMCH( 'Safe minimum' )*REAL( MAX( 1, N ) ) +* +* Compute the norm of the triangular matrix A. +* + ANORM = SLANTP( NORM, UPLO, DIAG, N, AP, WORK ) +* +* Continue only if ANORM > 0. +* + IF( ANORM.GT.ZERO ) THEN +* +* Estimate the norm of the inverse of A. +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 10 CONTINUE + CALL SLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(A). +* + CALL SLATPS( UPLO, 'No transpose', DIAG, NORMIN, N, AP, + $ WORK, SCALE, WORK( 2*N+1 ), INFO ) + ELSE +* +* Multiply by inv(A'). +* + CALL SLATPS( UPLO, 'Transpose', DIAG, NORMIN, N, AP, + $ WORK, SCALE, WORK( 2*N+1 ), INFO ) + END IF + NORMIN = 'Y' +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + IF( SCALE.NE.ONE ) THEN + IX = ISAMAX( N, WORK, 1 ) + XNORM = ABS( WORK( IX ) ) + IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL SRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / ANORM ) / AINVNM + END IF +* + 20 CONTINUE + RETURN +* +* End of STPCON +* + END diff --git a/costa/native/external/lapack/stprfs.f b/costa/native/external/lapack/stprfs.f new file mode 100644 index 000000000..b4ad7564c --- /dev/null +++ b/costa/native/external/lapack/stprfs.f @@ -0,0 +1,375 @@ + SUBROUTINE STPRFS( UPLO, TRANS, DIAG, N, NRHS, AP, B, LDB, X, LDX, + $ FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL AP( * ), B( LDB, * ), BERR( * ), FERR( * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* STPRFS provides error bounds and backward error estimates for the +* solution to a system of linear equations with a triangular packed +* coefficient matrix. +* +* The solution matrix X must be computed by STPTRS or some other +* means before entering this routine. STPRFS does not do iterative +* refinement because doing so cannot improve the backward error. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose = Transpose) +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AP (input) REAL array, dimension (N*(N+1)/2) +* The upper or lower triangular matrix A, packed columnwise in +* a linear array. The j-th column of A is stored in the array +* AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* If DIAG = 'U', the diagonal elements of A are not referenced +* and are assumed to be 1. +* +* B (input) REAL array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input) REAL array, dimension (LDX,NRHS) +* The solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) REAL array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + CHARACTER TRANST + INTEGER I, J, K, KASE, KC, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SLACON, STPMV, STPSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STPRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 250 J = 1, NRHS +* +* Compute residual R = B - op(A) * X, +* where op(A) = A or A', depending on TRANS. +* + CALL SCOPY( N, X( 1, J ), 1, WORK( N+1 ), 1 ) + CALL STPMV( UPLO, TRANS, DIAG, N, AP, WORK( N+1 ), 1 ) + CALL SAXPY( N, -ONE, B( 1, J ), 1, WORK( N+1 ), 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 20 I = 1, N + WORK( I ) = ABS( B( I, J ) ) + 20 CONTINUE +* + IF( NOTRAN ) THEN +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + KC = 1 + IF( NOUNIT ) THEN + DO 40 K = 1, N + XK = ABS( X( K, J ) ) + DO 30 I = 1, K + WORK( I ) = WORK( I ) + ABS( AP( KC+I-1 ) )*XK + 30 CONTINUE + KC = KC + K + 40 CONTINUE + ELSE + DO 60 K = 1, N + XK = ABS( X( K, J ) ) + DO 50 I = 1, K - 1 + WORK( I ) = WORK( I ) + ABS( AP( KC+I-1 ) )*XK + 50 CONTINUE + WORK( K ) = WORK( K ) + XK + KC = KC + K + 60 CONTINUE + END IF + ELSE + KC = 1 + IF( NOUNIT ) THEN + DO 80 K = 1, N + XK = ABS( X( K, J ) ) + DO 70 I = K, N + WORK( I ) = WORK( I ) + ABS( AP( KC+I-K ) )*XK + 70 CONTINUE + KC = KC + N - K + 1 + 80 CONTINUE + ELSE + DO 100 K = 1, N + XK = ABS( X( K, J ) ) + DO 90 I = K + 1, N + WORK( I ) = WORK( I ) + ABS( AP( KC+I-K ) )*XK + 90 CONTINUE + WORK( K ) = WORK( K ) + XK + KC = KC + N - K + 1 + 100 CONTINUE + END IF + END IF + ELSE +* +* Compute abs(A')*abs(X) + abs(B). +* + IF( UPPER ) THEN + KC = 1 + IF( NOUNIT ) THEN + DO 120 K = 1, N + S = ZERO + DO 110 I = 1, K + S = S + ABS( AP( KC+I-1 ) )*ABS( X( I, J ) ) + 110 CONTINUE + WORK( K ) = WORK( K ) + S + KC = KC + K + 120 CONTINUE + ELSE + DO 140 K = 1, N + S = ABS( X( K, J ) ) + DO 130 I = 1, K - 1 + S = S + ABS( AP( KC+I-1 ) )*ABS( X( I, J ) ) + 130 CONTINUE + WORK( K ) = WORK( K ) + S + KC = KC + K + 140 CONTINUE + END IF + ELSE + KC = 1 + IF( NOUNIT ) THEN + DO 160 K = 1, N + S = ZERO + DO 150 I = K, N + S = S + ABS( AP( KC+I-K ) )*ABS( X( I, J ) ) + 150 CONTINUE + WORK( K ) = WORK( K ) + S + KC = KC + N - K + 1 + 160 CONTINUE + ELSE + DO 180 K = 1, N + S = ABS( X( K, J ) ) + DO 170 I = K + 1, N + S = S + ABS( AP( KC+I-K ) )*ABS( X( I, J ) ) + 170 CONTINUE + WORK( K ) = WORK( K ) + S + KC = KC + N - K + 1 + 180 CONTINUE + END IF + END IF + END IF + S = ZERO + DO 190 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 190 CONTINUE + BERR( J ) = S +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use SLACON to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 200 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 200 CONTINUE +* + KASE = 0 + 210 CONTINUE + CALL SLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)'). +* + CALL STPSV( UPLO, TRANST, DIAG, N, AP, WORK( N+1 ), 1 ) + DO 220 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 220 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 230 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 230 CONTINUE + CALL STPSV( UPLO, TRANS, DIAG, N, AP, WORK( N+1 ), 1 ) + END IF + GO TO 210 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 240 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 240 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 250 CONTINUE +* + RETURN +* +* End of STPRFS +* + END diff --git a/costa/native/external/lapack/stptri.f b/costa/native/external/lapack/stptri.f new file mode 100644 index 000000000..4809d64cf --- /dev/null +++ b/costa/native/external/lapack/stptri.f @@ -0,0 +1,176 @@ + SUBROUTINE STPTRI( UPLO, DIAG, N, AP, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER DIAG, UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + REAL AP( * ) +* .. +* +* Purpose +* ======= +* +* STPTRI computes the inverse of a real upper or lower triangular +* matrix A stored in packed format. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) REAL array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangular matrix A, stored +* columnwise in a linear array. The j-th column of A is stored +* in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*((2*n-j)/2) = A(i,j) for j<=i<=n. +* See below for further details. +* On exit, the (triangular) inverse of the original matrix, in +* the same packed storage format. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, A(i,i) is exactly zero. The triangular +* matrix is singular and its inverse can not be computed. +* +* Further Details +* =============== +* +* A triangular matrix A can be transferred to packed storage using one +* of the following program segments: +* +* UPLO = 'U': UPLO = 'L': +* +* JC = 1 JC = 1 +* DO 2 J = 1, N DO 2 J = 1, N +* DO 1 I = 1, J DO 1 I = J, N +* AP(JC+I-1) = A(I,J) AP(JC+I-J) = A(I,J) +* 1 CONTINUE 1 CONTINUE +* JC = JC + J JC = JC + N - J + 1 +* 2 CONTINUE 2 CONTINUE +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J, JC, JCLAST, JJ + REAL AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, STPMV, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STPTRI', -INFO ) + RETURN + END IF +* +* Check for singularity if non-unit. +* + IF( NOUNIT ) THEN + IF( UPPER ) THEN + JJ = 0 + DO 10 INFO = 1, N + JJ = JJ + INFO + IF( AP( JJ ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE + JJ = 1 + DO 20 INFO = 1, N + IF( AP( JJ ).EQ.ZERO ) + $ RETURN + JJ = JJ + N - INFO + 1 + 20 CONTINUE + END IF + INFO = 0 + END IF +* + IF( UPPER ) THEN +* +* Compute inverse of upper triangular matrix. +* + JC = 1 + DO 30 J = 1, N + IF( NOUNIT ) THEN + AP( JC+J-1 ) = ONE / AP( JC+J-1 ) + AJJ = -AP( JC+J-1 ) + ELSE + AJJ = -ONE + END IF +* +* Compute elements 1:j-1 of j-th column. +* + CALL STPMV( 'Upper', 'No transpose', DIAG, J-1, AP, + $ AP( JC ), 1 ) + CALL SSCAL( J-1, AJJ, AP( JC ), 1 ) + JC = JC + J + 30 CONTINUE +* + ELSE +* +* Compute inverse of lower triangular matrix. +* + JC = N*( N+1 ) / 2 + DO 40 J = N, 1, -1 + IF( NOUNIT ) THEN + AP( JC ) = ONE / AP( JC ) + AJJ = -AP( JC ) + ELSE + AJJ = -ONE + END IF + IF( J.LT.N ) THEN +* +* Compute elements j+1:n of j-th column. +* + CALL STPMV( 'Lower', 'No transpose', DIAG, N-J, + $ AP( JCLAST ), AP( JC+1 ), 1 ) + CALL SSCAL( N-J, AJJ, AP( JC+1 ), 1 ) + END IF + JCLAST = JC + JC = JC - N + J - 2 + 40 CONTINUE + END IF +* + RETURN +* +* End of STPTRI +* + END diff --git a/costa/native/external/lapack/stptrs.f b/costa/native/external/lapack/stptrs.f new file mode 100644 index 000000000..8e8ab9de0 --- /dev/null +++ b/costa/native/external/lapack/stptrs.f @@ -0,0 +1,154 @@ + SUBROUTINE STPTRS( UPLO, TRANS, DIAG, N, NRHS, AP, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL AP( * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* STPTRS solves a triangular system of the form +* +* A * X = B or A**T * X = B, +* +* where A is a triangular matrix of order N stored in packed format, +* and B is an N-by-NRHS matrix. A check is made to verify that A is +* nonsingular. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose = Transpose) +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AP (input) REAL array, dimension (N*(N+1)/2) +* The upper or lower triangular matrix A, packed columnwise in +* a linear array. The j-th column of A is stored in the array +* AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, if INFO = 0, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the i-th diagonal element of A is zero, +* indicating that the matrix is singular and the +* solutions X have not been computed. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J, JC +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL STPSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT. + $ LSAME( TRANS, 'T' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STPTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check for singularity. +* + IF( NOUNIT ) THEN + IF( UPPER ) THEN + JC = 1 + DO 10 INFO = 1, N + IF( AP( JC+INFO-1 ).EQ.ZERO ) + $ RETURN + JC = JC + INFO + 10 CONTINUE + ELSE + JC = 1 + DO 20 INFO = 1, N + IF( AP( JC ).EQ.ZERO ) + $ RETURN + JC = JC + N - INFO + 1 + 20 CONTINUE + END IF + END IF + INFO = 0 +* +* Solve A * x = b or A' * x = b. +* + DO 30 J = 1, NRHS + CALL STPSV( UPLO, TRANS, DIAG, N, AP, B( 1, J ), 1 ) + 30 CONTINUE +* + RETURN +* +* End of STPTRS +* + END diff --git a/costa/native/external/lapack/strcon.f b/costa/native/external/lapack/strcon.f new file mode 100644 index 000000000..1e31a2ed3 --- /dev/null +++ b/costa/native/external/lapack/strcon.f @@ -0,0 +1,193 @@ + SUBROUTINE STRCON( NORM, UPLO, DIAG, N, A, LDA, RCOND, WORK, + $ IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER INFO, LDA, N + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* STRCON estimates the reciprocal of the condition number of a +* triangular matrix A, in either the 1-norm or the infinity-norm. +* +* The norm of A is computed and an estimate is obtained for +* norm(inv(A)), then the reciprocal of the condition number is +* computed as +* RCOND = 1 / ( norm(A) * norm(inv(A)) ). +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies whether the 1-norm condition number or the +* infinity-norm condition number is required: +* = '1' or 'O': 1-norm; +* = 'I': Infinity-norm. +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input) REAL array, dimension (LDA,N) +* The triangular matrix A. If UPLO = 'U', the leading N-by-N +* upper triangular part of the array A contains the upper +* triangular matrix, and the strictly lower triangular part of +* A is not referenced. If UPLO = 'L', the leading N-by-N lower +* triangular part of the array A contains the lower triangular +* matrix, and the strictly upper triangular part of A is not +* referenced. If DIAG = 'U', the diagonal elements of A are +* also not referenced and are assumed to be 1. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* RCOND (output) REAL +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(norm(A) * norm(inv(A))). +* +* WORK (workspace) REAL array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, ONENRM, UPPER + CHARACTER NORMIN + INTEGER IX, KASE, KASE1 + REAL AINVNM, ANORM, SCALE, SMLNUM, XNORM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SLAMCH, SLANTR + EXTERNAL LSAME, ISAMAX, SLAMCH, SLANTR +* .. +* .. External Subroutines .. + EXTERNAL SLACON, SLATRS, SRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STRCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + END IF +* + RCOND = ZERO + SMLNUM = SLAMCH( 'Safe minimum' )*REAL( MAX( 1, N ) ) +* +* Compute the norm of the triangular matrix A. +* + ANORM = SLANTR( NORM, UPLO, DIAG, N, N, A, LDA, WORK ) +* +* Continue only if ANORM > 0. +* + IF( ANORM.GT.ZERO ) THEN +* +* Estimate the norm of the inverse of A. +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 10 CONTINUE + CALL SLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(A). +* + CALL SLATRS( UPLO, 'No transpose', DIAG, NORMIN, N, A, + $ LDA, WORK, SCALE, WORK( 2*N+1 ), INFO ) + ELSE +* +* Multiply by inv(A'). +* + CALL SLATRS( UPLO, 'Transpose', DIAG, NORMIN, N, A, LDA, + $ WORK, SCALE, WORK( 2*N+1 ), INFO ) + END IF + NORMIN = 'Y' +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + IF( SCALE.NE.ONE ) THEN + IX = ISAMAX( N, WORK, 1 ) + XNORM = ABS( WORK( IX ) ) + IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL SRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / ANORM ) / AINVNM + END IF +* + 20 CONTINUE + RETURN +* +* End of STRCON +* + END diff --git a/costa/native/external/lapack/strevc.f b/costa/native/external/lapack/strevc.f new file mode 100644 index 000000000..83a0f3c4f --- /dev/null +++ b/costa/native/external/lapack/strevc.f @@ -0,0 +1,1006 @@ + SUBROUTINE STREVC( SIDE, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR, + $ LDVR, MM, M, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER HOWMNY, SIDE + INTEGER INFO, LDT, LDVL, LDVR, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + REAL T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* STREVC computes some or all of the right and/or left eigenvectors of +* a real upper quasi-triangular matrix T. +* +* The right eigenvector x and the left eigenvector y of T corresponding +* to an eigenvalue w are defined by: +* +* T*x = w*x, y'*T = w*y' +* +* where y' denotes the conjugate transpose of the vector y. +* +* If all eigenvectors are requested, the routine may either return the +* matrices X and/or Y of right or left eigenvectors of T, or the +* products Q*X and/or Q*Y, where Q is an input orthogonal +* matrix. If T was obtained from the real-Schur factorization of an +* original matrix A = Q*T*Q', then Q*X and Q*Y are the matrices of +* right or left eigenvectors of A. +* +* T must be in Schur canonical form (as returned by SHSEQR), that is, +* block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each +* 2-by-2 diagonal block has its diagonal elements equal and its +* off-diagonal elements of opposite sign. Corresponding to each 2-by-2 +* diagonal block is a complex conjugate pair of eigenvalues and +* eigenvectors; only one eigenvector of the pair is computed, namely +* the one corresponding to the eigenvalue with positive imaginary part. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'R': compute right eigenvectors only; +* = 'L': compute left eigenvectors only; +* = 'B': compute both right and left eigenvectors. +* +* HOWMNY (input) CHARACTER*1 +* = 'A': compute all right and/or left eigenvectors; +* = 'B': compute all right and/or left eigenvectors, +* and backtransform them using the input matrices +* supplied in VR and/or VL; +* = 'S': compute selected right and/or left eigenvectors, +* specified by the logical array SELECT. +* +* SELECT (input/output) LOGICAL array, dimension (N) +* If HOWMNY = 'S', SELECT specifies the eigenvectors to be +* computed. +* If HOWMNY = 'A' or 'B', SELECT is not referenced. +* To select the real eigenvector corresponding to a real +* eigenvalue w(j), SELECT(j) must be set to .TRUE.. To select +* the complex eigenvector corresponding to a complex conjugate +* pair w(j) and w(j+1), either SELECT(j) or SELECT(j+1) must be +* set to .TRUE.; then on exit SELECT(j) is .TRUE. and +* SELECT(j+1) is .FALSE.. +* +* N (input) INTEGER +* The order of the matrix T. N >= 0. +* +* T (input) REAL array, dimension (LDT,N) +* The upper quasi-triangular matrix T in Schur canonical form. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= max(1,N). +* +* VL (input/output) REAL array, dimension (LDVL,MM) +* On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must +* contain an N-by-N matrix Q (usually the orthogonal matrix Q +* of Schur vectors returned by SHSEQR). +* On exit, if SIDE = 'L' or 'B', VL contains: +* if HOWMNY = 'A', the matrix Y of left eigenvectors of T; +* VL has the same quasi-lower triangular form +* as T'. If T(i,i) is a real eigenvalue, then +* the i-th column VL(i) of VL is its +* corresponding eigenvector. If T(i:i+1,i:i+1) +* is a 2-by-2 block whose eigenvalues are +* complex-conjugate eigenvalues of T, then +* VL(i)+sqrt(-1)*VL(i+1) is the complex +* eigenvector corresponding to the eigenvalue +* with positive real part. +* if HOWMNY = 'B', the matrix Q*Y; +* if HOWMNY = 'S', the left eigenvectors of T specified by +* SELECT, stored consecutively in the columns +* of VL, in the same order as their +* eigenvalues. +* A complex eigenvector corresponding to a complex eigenvalue +* is stored in two consecutive columns, the first holding the +* real part, and the second the imaginary part. +* If SIDE = 'R', VL is not referenced. +* +* LDVL (input) INTEGER +* The leading dimension of the array VL. LDVL >= max(1,N) if +* SIDE = 'L' or 'B'; LDVL >= 1 otherwise. +* +* VR (input/output) REAL array, dimension (LDVR,MM) +* On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must +* contain an N-by-N matrix Q (usually the orthogonal matrix Q +* of Schur vectors returned by SHSEQR). +* On exit, if SIDE = 'R' or 'B', VR contains: +* if HOWMNY = 'A', the matrix X of right eigenvectors of T; +* VR has the same quasi-upper triangular form +* as T. If T(i,i) is a real eigenvalue, then +* the i-th column VR(i) of VR is its +* corresponding eigenvector. If T(i:i+1,i:i+1) +* is a 2-by-2 block whose eigenvalues are +* complex-conjugate eigenvalues of T, then +* VR(i)+sqrt(-1)*VR(i+1) is the complex +* eigenvector corresponding to the eigenvalue +* with positive real part. +* if HOWMNY = 'B', the matrix Q*X; +* if HOWMNY = 'S', the right eigenvectors of T specified by +* SELECT, stored consecutively in the columns +* of VR, in the same order as their +* eigenvalues. +* A complex eigenvector corresponding to a complex eigenvalue +* is stored in two consecutive columns, the first holding the +* real part and the second the imaginary part. +* If SIDE = 'L', VR is not referenced. +* +* LDVR (input) INTEGER +* The leading dimension of the array VR. LDVR >= max(1,N) if +* SIDE = 'R' or 'B'; LDVR >= 1 otherwise. +* +* MM (input) INTEGER +* The number of columns in the arrays VL and/or VR. MM >= M. +* +* M (output) INTEGER +* The number of columns in the arrays VL and/or VR actually +* used to store the eigenvectors. +* If HOWMNY = 'A' or 'B', M is set to N. +* Each selected real eigenvector occupies one column and each +* selected complex eigenvector occupies two columns. +* +* WORK (workspace) REAL array, dimension (3*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The algorithm used in this program is basically backward (forward) +* substitution, with scaling to make the the code robust against +* possible overflow. +* +* Each eigenvector is normalized so that the element of largest +* magnitude has magnitude 1; here the magnitude of a complex number +* (x,y) is taken to be |x| + |y|. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLV, BOTHV, LEFTV, OVER, PAIR, RIGHTV, SOMEV + INTEGER I, IERR, II, IP, IS, J, J1, J2, JNXT, K, KI, N2 + REAL BETA, BIGNUM, EMAX, OVFL, REC, REMAX, SCALE, + $ SMIN, SMLNUM, ULP, UNFL, VCRIT, VMAX, WI, WR, + $ XNORM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SDOT, SLAMCH + EXTERNAL LSAME, ISAMAX, SDOT, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SGEMV, SLABAD, SLALN2, SSCAL, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Local Arrays .. + REAL X( 2, 2 ) +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + BOTHV = LSAME( SIDE, 'B' ) + RIGHTV = LSAME( SIDE, 'R' ) .OR. BOTHV + LEFTV = LSAME( SIDE, 'L' ) .OR. BOTHV +* + ALLV = LSAME( HOWMNY, 'A' ) + OVER = LSAME( HOWMNY, 'B' ) + SOMEV = LSAME( HOWMNY, 'S' ) +* + INFO = 0 + IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -1 + ELSE IF( .NOT.ALLV .AND. .NOT.OVER .AND. .NOT.SOMEV ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDVL.LT.1 .OR. ( LEFTV .AND. LDVL.LT.N ) ) THEN + INFO = -8 + ELSE IF( LDVR.LT.1 .OR. ( RIGHTV .AND. LDVR.LT.N ) ) THEN + INFO = -10 + ELSE +* +* Set M to the number of columns required to store the selected +* eigenvectors, standardize the array SELECT if necessary, and +* test MM. +* + IF( SOMEV ) THEN + M = 0 + PAIR = .FALSE. + DO 10 J = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + SELECT( J ) = .FALSE. + ELSE + IF( J.LT.N ) THEN + IF( T( J+1, J ).EQ.ZERO ) THEN + IF( SELECT( J ) ) + $ M = M + 1 + ELSE + PAIR = .TRUE. + IF( SELECT( J ) .OR. SELECT( J+1 ) ) THEN + SELECT( J ) = .TRUE. + M = M + 2 + END IF + END IF + ELSE + IF( SELECT( N ) ) + $ M = M + 1 + END IF + END IF + 10 CONTINUE + ELSE + M = N + END IF +* + IF( MM.LT.M ) THEN + INFO = -11 + END IF + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STREVC', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* +* Set the constants to control overflow. +* + UNFL = SLAMCH( 'Safe minimum' ) + OVFL = ONE / UNFL + CALL SLABAD( UNFL, OVFL ) + ULP = SLAMCH( 'Precision' ) + SMLNUM = UNFL*( N / ULP ) + BIGNUM = ( ONE-ULP ) / SMLNUM +* +* Compute 1-norm of each column of strictly upper triangular +* part of T to control overflow in triangular solver. +* + WORK( 1 ) = ZERO + DO 30 J = 2, N + WORK( J ) = ZERO + DO 20 I = 1, J - 1 + WORK( J ) = WORK( J ) + ABS( T( I, J ) ) + 20 CONTINUE + 30 CONTINUE +* +* Index IP is used to specify the real or complex eigenvalue: +* IP = 0, real eigenvalue, +* 1, first of conjugate complex pair: (wr,wi) +* -1, second of conjugate complex pair: (wr,wi) +* + N2 = 2*N +* + IF( RIGHTV ) THEN +* +* Compute right eigenvectors. +* + IP = 0 + IS = M + DO 140 KI = N, 1, -1 +* + IF( IP.EQ.1 ) + $ GO TO 130 + IF( KI.EQ.1 ) + $ GO TO 40 + IF( T( KI, KI-1 ).EQ.ZERO ) + $ GO TO 40 + IP = -1 +* + 40 CONTINUE + IF( SOMEV ) THEN + IF( IP.EQ.0 ) THEN + IF( .NOT.SELECT( KI ) ) + $ GO TO 130 + ELSE + IF( .NOT.SELECT( KI-1 ) ) + $ GO TO 130 + END IF + END IF +* +* Compute the KI-th eigenvalue (WR,WI). +* + WR = T( KI, KI ) + WI = ZERO + IF( IP.NE.0 ) + $ WI = SQRT( ABS( T( KI, KI-1 ) ) )* + $ SQRT( ABS( T( KI-1, KI ) ) ) + SMIN = MAX( ULP*( ABS( WR )+ABS( WI ) ), SMLNUM ) +* + IF( IP.EQ.0 ) THEN +* +* Real right eigenvector +* + WORK( KI+N ) = ONE +* +* Form right-hand side +* + DO 50 K = 1, KI - 1 + WORK( K+N ) = -T( K, KI ) + 50 CONTINUE +* +* Solve the upper quasi-triangular system: +* (T(1:KI-1,1:KI-1) - WR)*X = SCALE*WORK. +* + JNXT = KI - 1 + DO 60 J = KI - 1, 1, -1 + IF( J.GT.JNXT ) + $ GO TO 60 + J1 = J + J2 = J + JNXT = J - 1 + IF( J.GT.1 ) THEN + IF( T( J, J-1 ).NE.ZERO ) THEN + J1 = J - 1 + JNXT = J - 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* 1-by-1 diagonal block +* + CALL SLALN2( .FALSE., 1, 1, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+N ), N, WR, + $ ZERO, X, 2, SCALE, XNORM, IERR ) +* +* Scale X(1,1) to avoid overflow when updating +* the right-hand side. +* + IF( XNORM.GT.ONE ) THEN + IF( WORK( J ).GT.BIGNUM / XNORM ) THEN + X( 1, 1 ) = X( 1, 1 ) / XNORM + SCALE = SCALE / XNORM + END IF + END IF +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) + $ CALL SSCAL( KI, SCALE, WORK( 1+N ), 1 ) + WORK( J+N ) = X( 1, 1 ) +* +* Update right-hand side +* + CALL SAXPY( J-1, -X( 1, 1 ), T( 1, J ), 1, + $ WORK( 1+N ), 1 ) +* + ELSE +* +* 2-by-2 diagonal block +* + CALL SLALN2( .FALSE., 2, 1, SMIN, ONE, + $ T( J-1, J-1 ), LDT, ONE, ONE, + $ WORK( J-1+N ), N, WR, ZERO, X, 2, + $ SCALE, XNORM, IERR ) +* +* Scale X(1,1) and X(2,1) to avoid overflow when +* updating the right-hand side. +* + IF( XNORM.GT.ONE ) THEN + BETA = MAX( WORK( J-1 ), WORK( J ) ) + IF( BETA.GT.BIGNUM / XNORM ) THEN + X( 1, 1 ) = X( 1, 1 ) / XNORM + X( 2, 1 ) = X( 2, 1 ) / XNORM + SCALE = SCALE / XNORM + END IF + END IF +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) + $ CALL SSCAL( KI, SCALE, WORK( 1+N ), 1 ) + WORK( J-1+N ) = X( 1, 1 ) + WORK( J+N ) = X( 2, 1 ) +* +* Update right-hand side +* + CALL SAXPY( J-2, -X( 1, 1 ), T( 1, J-1 ), 1, + $ WORK( 1+N ), 1 ) + CALL SAXPY( J-2, -X( 2, 1 ), T( 1, J ), 1, + $ WORK( 1+N ), 1 ) + END IF + 60 CONTINUE +* +* Copy the vector x or Q*x to VR and normalize. +* + IF( .NOT.OVER ) THEN + CALL SCOPY( KI, WORK( 1+N ), 1, VR( 1, IS ), 1 ) +* + II = ISAMAX( KI, VR( 1, IS ), 1 ) + REMAX = ONE / ABS( VR( II, IS ) ) + CALL SSCAL( KI, REMAX, VR( 1, IS ), 1 ) +* + DO 70 K = KI + 1, N + VR( K, IS ) = ZERO + 70 CONTINUE + ELSE + IF( KI.GT.1 ) + $ CALL SGEMV( 'N', N, KI-1, ONE, VR, LDVR, + $ WORK( 1+N ), 1, WORK( KI+N ), + $ VR( 1, KI ), 1 ) +* + II = ISAMAX( N, VR( 1, KI ), 1 ) + REMAX = ONE / ABS( VR( II, KI ) ) + CALL SSCAL( N, REMAX, VR( 1, KI ), 1 ) + END IF +* + ELSE +* +* Complex right eigenvector. +* +* Initial solve +* [ (T(KI-1,KI-1) T(KI-1,KI) ) - (WR + I* WI)]*X = 0. +* [ (T(KI,KI-1) T(KI,KI) ) ] +* + IF( ABS( T( KI-1, KI ) ).GE.ABS( T( KI, KI-1 ) ) ) THEN + WORK( KI-1+N ) = ONE + WORK( KI+N2 ) = WI / T( KI-1, KI ) + ELSE + WORK( KI-1+N ) = -WI / T( KI, KI-1 ) + WORK( KI+N2 ) = ONE + END IF + WORK( KI+N ) = ZERO + WORK( KI-1+N2 ) = ZERO +* +* Form right-hand side +* + DO 80 K = 1, KI - 2 + WORK( K+N ) = -WORK( KI-1+N )*T( K, KI-1 ) + WORK( K+N2 ) = -WORK( KI+N2 )*T( K, KI ) + 80 CONTINUE +* +* Solve upper quasi-triangular system: +* (T(1:KI-2,1:KI-2) - (WR+i*WI))*X = SCALE*(WORK+i*WORK2) +* + JNXT = KI - 2 + DO 90 J = KI - 2, 1, -1 + IF( J.GT.JNXT ) + $ GO TO 90 + J1 = J + J2 = J + JNXT = J - 1 + IF( J.GT.1 ) THEN + IF( T( J, J-1 ).NE.ZERO ) THEN + J1 = J - 1 + JNXT = J - 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* 1-by-1 diagonal block +* + CALL SLALN2( .FALSE., 1, 2, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+N ), N, WR, WI, + $ X, 2, SCALE, XNORM, IERR ) +* +* Scale X(1,1) and X(1,2) to avoid overflow when +* updating the right-hand side. +* + IF( XNORM.GT.ONE ) THEN + IF( WORK( J ).GT.BIGNUM / XNORM ) THEN + X( 1, 1 ) = X( 1, 1 ) / XNORM + X( 1, 2 ) = X( 1, 2 ) / XNORM + SCALE = SCALE / XNORM + END IF + END IF +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) THEN + CALL SSCAL( KI, SCALE, WORK( 1+N ), 1 ) + CALL SSCAL( KI, SCALE, WORK( 1+N2 ), 1 ) + END IF + WORK( J+N ) = X( 1, 1 ) + WORK( J+N2 ) = X( 1, 2 ) +* +* Update the right-hand side +* + CALL SAXPY( J-1, -X( 1, 1 ), T( 1, J ), 1, + $ WORK( 1+N ), 1 ) + CALL SAXPY( J-1, -X( 1, 2 ), T( 1, J ), 1, + $ WORK( 1+N2 ), 1 ) +* + ELSE +* +* 2-by-2 diagonal block +* + CALL SLALN2( .FALSE., 2, 2, SMIN, ONE, + $ T( J-1, J-1 ), LDT, ONE, ONE, + $ WORK( J-1+N ), N, WR, WI, X, 2, SCALE, + $ XNORM, IERR ) +* +* Scale X to avoid overflow when updating +* the right-hand side. +* + IF( XNORM.GT.ONE ) THEN + BETA = MAX( WORK( J-1 ), WORK( J ) ) + IF( BETA.GT.BIGNUM / XNORM ) THEN + REC = ONE / XNORM + X( 1, 1 ) = X( 1, 1 )*REC + X( 1, 2 ) = X( 1, 2 )*REC + X( 2, 1 ) = X( 2, 1 )*REC + X( 2, 2 ) = X( 2, 2 )*REC + SCALE = SCALE*REC + END IF + END IF +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) THEN + CALL SSCAL( KI, SCALE, WORK( 1+N ), 1 ) + CALL SSCAL( KI, SCALE, WORK( 1+N2 ), 1 ) + END IF + WORK( J-1+N ) = X( 1, 1 ) + WORK( J+N ) = X( 2, 1 ) + WORK( J-1+N2 ) = X( 1, 2 ) + WORK( J+N2 ) = X( 2, 2 ) +* +* Update the right-hand side +* + CALL SAXPY( J-2, -X( 1, 1 ), T( 1, J-1 ), 1, + $ WORK( 1+N ), 1 ) + CALL SAXPY( J-2, -X( 2, 1 ), T( 1, J ), 1, + $ WORK( 1+N ), 1 ) + CALL SAXPY( J-2, -X( 1, 2 ), T( 1, J-1 ), 1, + $ WORK( 1+N2 ), 1 ) + CALL SAXPY( J-2, -X( 2, 2 ), T( 1, J ), 1, + $ WORK( 1+N2 ), 1 ) + END IF + 90 CONTINUE +* +* Copy the vector x or Q*x to VR and normalize. +* + IF( .NOT.OVER ) THEN + CALL SCOPY( KI, WORK( 1+N ), 1, VR( 1, IS-1 ), 1 ) + CALL SCOPY( KI, WORK( 1+N2 ), 1, VR( 1, IS ), 1 ) +* + EMAX = ZERO + DO 100 K = 1, KI + EMAX = MAX( EMAX, ABS( VR( K, IS-1 ) )+ + $ ABS( VR( K, IS ) ) ) + 100 CONTINUE +* + REMAX = ONE / EMAX + CALL SSCAL( KI, REMAX, VR( 1, IS-1 ), 1 ) + CALL SSCAL( KI, REMAX, VR( 1, IS ), 1 ) +* + DO 110 K = KI + 1, N + VR( K, IS-1 ) = ZERO + VR( K, IS ) = ZERO + 110 CONTINUE +* + ELSE +* + IF( KI.GT.2 ) THEN + CALL SGEMV( 'N', N, KI-2, ONE, VR, LDVR, + $ WORK( 1+N ), 1, WORK( KI-1+N ), + $ VR( 1, KI-1 ), 1 ) + CALL SGEMV( 'N', N, KI-2, ONE, VR, LDVR, + $ WORK( 1+N2 ), 1, WORK( KI+N2 ), + $ VR( 1, KI ), 1 ) + ELSE + CALL SSCAL( N, WORK( KI-1+N ), VR( 1, KI-1 ), 1 ) + CALL SSCAL( N, WORK( KI+N2 ), VR( 1, KI ), 1 ) + END IF +* + EMAX = ZERO + DO 120 K = 1, N + EMAX = MAX( EMAX, ABS( VR( K, KI-1 ) )+ + $ ABS( VR( K, KI ) ) ) + 120 CONTINUE + REMAX = ONE / EMAX + CALL SSCAL( N, REMAX, VR( 1, KI-1 ), 1 ) + CALL SSCAL( N, REMAX, VR( 1, KI ), 1 ) + END IF + END IF +* + IS = IS - 1 + IF( IP.NE.0 ) + $ IS = IS - 1 + 130 CONTINUE + IF( IP.EQ.1 ) + $ IP = 0 + IF( IP.EQ.-1 ) + $ IP = 1 + 140 CONTINUE + END IF +* + IF( LEFTV ) THEN +* +* Compute left eigenvectors. +* + IP = 0 + IS = 1 + DO 260 KI = 1, N +* + IF( IP.EQ.-1 ) + $ GO TO 250 + IF( KI.EQ.N ) + $ GO TO 150 + IF( T( KI+1, KI ).EQ.ZERO ) + $ GO TO 150 + IP = 1 +* + 150 CONTINUE + IF( SOMEV ) THEN + IF( .NOT.SELECT( KI ) ) + $ GO TO 250 + END IF +* +* Compute the KI-th eigenvalue (WR,WI). +* + WR = T( KI, KI ) + WI = ZERO + IF( IP.NE.0 ) + $ WI = SQRT( ABS( T( KI, KI+1 ) ) )* + $ SQRT( ABS( T( KI+1, KI ) ) ) + SMIN = MAX( ULP*( ABS( WR )+ABS( WI ) ), SMLNUM ) +* + IF( IP.EQ.0 ) THEN +* +* Real left eigenvector. +* + WORK( KI+N ) = ONE +* +* Form right-hand side +* + DO 160 K = KI + 1, N + WORK( K+N ) = -T( KI, K ) + 160 CONTINUE +* +* Solve the quasi-triangular system: +* (T(KI+1:N,KI+1:N) - WR)'*X = SCALE*WORK +* + VMAX = ONE + VCRIT = BIGNUM +* + JNXT = KI + 1 + DO 170 J = KI + 1, N + IF( J.LT.JNXT ) + $ GO TO 170 + J1 = J + J2 = J + JNXT = J + 1 + IF( J.LT.N ) THEN + IF( T( J+1, J ).NE.ZERO ) THEN + J2 = J + 1 + JNXT = J + 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* 1-by-1 diagonal block +* +* Scale if necessary to avoid overflow when forming +* the right-hand side. +* + IF( WORK( J ).GT.VCRIT ) THEN + REC = ONE / VMAX + CALL SSCAL( N-KI+1, REC, WORK( KI+N ), 1 ) + VMAX = ONE + VCRIT = BIGNUM + END IF +* + WORK( J+N ) = WORK( J+N ) - + $ SDOT( J-KI-1, T( KI+1, J ), 1, + $ WORK( KI+1+N ), 1 ) +* +* Solve (T(J,J)-WR)'*X = WORK +* + CALL SLALN2( .FALSE., 1, 1, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+N ), N, WR, + $ ZERO, X, 2, SCALE, XNORM, IERR ) +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) + $ CALL SSCAL( N-KI+1, SCALE, WORK( KI+N ), 1 ) + WORK( J+N ) = X( 1, 1 ) + VMAX = MAX( ABS( WORK( J+N ) ), VMAX ) + VCRIT = BIGNUM / VMAX +* + ELSE +* +* 2-by-2 diagonal block +* +* Scale if necessary to avoid overflow when forming +* the right-hand side. +* + BETA = MAX( WORK( J ), WORK( J+1 ) ) + IF( BETA.GT.VCRIT ) THEN + REC = ONE / VMAX + CALL SSCAL( N-KI+1, REC, WORK( KI+N ), 1 ) + VMAX = ONE + VCRIT = BIGNUM + END IF +* + WORK( J+N ) = WORK( J+N ) - + $ SDOT( J-KI-1, T( KI+1, J ), 1, + $ WORK( KI+1+N ), 1 ) +* + WORK( J+1+N ) = WORK( J+1+N ) - + $ SDOT( J-KI-1, T( KI+1, J+1 ), 1, + $ WORK( KI+1+N ), 1 ) +* +* Solve +* [T(J,J)-WR T(J,J+1) ]'* X = SCALE*( WORK1 ) +* [T(J+1,J) T(J+1,J+1)-WR] ( WORK2 ) +* + CALL SLALN2( .TRUE., 2, 1, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+N ), N, WR, + $ ZERO, X, 2, SCALE, XNORM, IERR ) +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) + $ CALL SSCAL( N-KI+1, SCALE, WORK( KI+N ), 1 ) + WORK( J+N ) = X( 1, 1 ) + WORK( J+1+N ) = X( 2, 1 ) +* + VMAX = MAX( ABS( WORK( J+N ) ), + $ ABS( WORK( J+1+N ) ), VMAX ) + VCRIT = BIGNUM / VMAX +* + END IF + 170 CONTINUE +* +* Copy the vector x or Q*x to VL and normalize. +* + IF( .NOT.OVER ) THEN + CALL SCOPY( N-KI+1, WORK( KI+N ), 1, VL( KI, IS ), 1 ) +* + II = ISAMAX( N-KI+1, VL( KI, IS ), 1 ) + KI - 1 + REMAX = ONE / ABS( VL( II, IS ) ) + CALL SSCAL( N-KI+1, REMAX, VL( KI, IS ), 1 ) +* + DO 180 K = 1, KI - 1 + VL( K, IS ) = ZERO + 180 CONTINUE +* + ELSE +* + IF( KI.LT.N ) + $ CALL SGEMV( 'N', N, N-KI, ONE, VL( 1, KI+1 ), LDVL, + $ WORK( KI+1+N ), 1, WORK( KI+N ), + $ VL( 1, KI ), 1 ) +* + II = ISAMAX( N, VL( 1, KI ), 1 ) + REMAX = ONE / ABS( VL( II, KI ) ) + CALL SSCAL( N, REMAX, VL( 1, KI ), 1 ) +* + END IF +* + ELSE +* +* Complex left eigenvector. +* +* Initial solve: +* ((T(KI,KI) T(KI,KI+1) )' - (WR - I* WI))*X = 0. +* ((T(KI+1,KI) T(KI+1,KI+1)) ) +* + IF( ABS( T( KI, KI+1 ) ).GE.ABS( T( KI+1, KI ) ) ) THEN + WORK( KI+N ) = WI / T( KI, KI+1 ) + WORK( KI+1+N2 ) = ONE + ELSE + WORK( KI+N ) = ONE + WORK( KI+1+N2 ) = -WI / T( KI+1, KI ) + END IF + WORK( KI+1+N ) = ZERO + WORK( KI+N2 ) = ZERO +* +* Form right-hand side +* + DO 190 K = KI + 2, N + WORK( K+N ) = -WORK( KI+N )*T( KI, K ) + WORK( K+N2 ) = -WORK( KI+1+N2 )*T( KI+1, K ) + 190 CONTINUE +* +* Solve complex quasi-triangular system: +* ( T(KI+2,N:KI+2,N) - (WR-i*WI) )*X = WORK1+i*WORK2 +* + VMAX = ONE + VCRIT = BIGNUM +* + JNXT = KI + 2 + DO 200 J = KI + 2, N + IF( J.LT.JNXT ) + $ GO TO 200 + J1 = J + J2 = J + JNXT = J + 1 + IF( J.LT.N ) THEN + IF( T( J+1, J ).NE.ZERO ) THEN + J2 = J + 1 + JNXT = J + 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* 1-by-1 diagonal block +* +* Scale if necessary to avoid overflow when +* forming the right-hand side elements. +* + IF( WORK( J ).GT.VCRIT ) THEN + REC = ONE / VMAX + CALL SSCAL( N-KI+1, REC, WORK( KI+N ), 1 ) + CALL SSCAL( N-KI+1, REC, WORK( KI+N2 ), 1 ) + VMAX = ONE + VCRIT = BIGNUM + END IF +* + WORK( J+N ) = WORK( J+N ) - + $ SDOT( J-KI-2, T( KI+2, J ), 1, + $ WORK( KI+2+N ), 1 ) + WORK( J+N2 ) = WORK( J+N2 ) - + $ SDOT( J-KI-2, T( KI+2, J ), 1, + $ WORK( KI+2+N2 ), 1 ) +* +* Solve (T(J,J)-(WR-i*WI))*(X11+i*X12)= WK+I*WK2 +* + CALL SLALN2( .FALSE., 1, 2, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+N ), N, WR, + $ -WI, X, 2, SCALE, XNORM, IERR ) +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) THEN + CALL SSCAL( N-KI+1, SCALE, WORK( KI+N ), 1 ) + CALL SSCAL( N-KI+1, SCALE, WORK( KI+N2 ), 1 ) + END IF + WORK( J+N ) = X( 1, 1 ) + WORK( J+N2 ) = X( 1, 2 ) + VMAX = MAX( ABS( WORK( J+N ) ), + $ ABS( WORK( J+N2 ) ), VMAX ) + VCRIT = BIGNUM / VMAX +* + ELSE +* +* 2-by-2 diagonal block +* +* Scale if necessary to avoid overflow when forming +* the right-hand side elements. +* + BETA = MAX( WORK( J ), WORK( J+1 ) ) + IF( BETA.GT.VCRIT ) THEN + REC = ONE / VMAX + CALL SSCAL( N-KI+1, REC, WORK( KI+N ), 1 ) + CALL SSCAL( N-KI+1, REC, WORK( KI+N2 ), 1 ) + VMAX = ONE + VCRIT = BIGNUM + END IF +* + WORK( J+N ) = WORK( J+N ) - + $ SDOT( J-KI-2, T( KI+2, J ), 1, + $ WORK( KI+2+N ), 1 ) +* + WORK( J+N2 ) = WORK( J+N2 ) - + $ SDOT( J-KI-2, T( KI+2, J ), 1, + $ WORK( KI+2+N2 ), 1 ) +* + WORK( J+1+N ) = WORK( J+1+N ) - + $ SDOT( J-KI-2, T( KI+2, J+1 ), 1, + $ WORK( KI+2+N ), 1 ) +* + WORK( J+1+N2 ) = WORK( J+1+N2 ) - + $ SDOT( J-KI-2, T( KI+2, J+1 ), 1, + $ WORK( KI+2+N2 ), 1 ) +* +* Solve 2-by-2 complex linear equation +* ([T(j,j) T(j,j+1) ]'-(wr-i*wi)*I)*X = SCALE*B +* ([T(j+1,j) T(j+1,j+1)] ) +* + CALL SLALN2( .TRUE., 2, 2, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+N ), N, WR, + $ -WI, X, 2, SCALE, XNORM, IERR ) +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) THEN + CALL SSCAL( N-KI+1, SCALE, WORK( KI+N ), 1 ) + CALL SSCAL( N-KI+1, SCALE, WORK( KI+N2 ), 1 ) + END IF + WORK( J+N ) = X( 1, 1 ) + WORK( J+N2 ) = X( 1, 2 ) + WORK( J+1+N ) = X( 2, 1 ) + WORK( J+1+N2 ) = X( 2, 2 ) + VMAX = MAX( ABS( X( 1, 1 ) ), ABS( X( 1, 2 ) ), + $ ABS( X( 2, 1 ) ), ABS( X( 2, 2 ) ), VMAX ) + VCRIT = BIGNUM / VMAX +* + END IF + 200 CONTINUE +* +* Copy the vector x or Q*x to VL and normalize. +* + 210 CONTINUE + IF( .NOT.OVER ) THEN + CALL SCOPY( N-KI+1, WORK( KI+N ), 1, VL( KI, IS ), 1 ) + CALL SCOPY( N-KI+1, WORK( KI+N2 ), 1, VL( KI, IS+1 ), + $ 1 ) +* + EMAX = ZERO + DO 220 K = KI, N + EMAX = MAX( EMAX, ABS( VL( K, IS ) )+ + $ ABS( VL( K, IS+1 ) ) ) + 220 CONTINUE + REMAX = ONE / EMAX + CALL SSCAL( N-KI+1, REMAX, VL( KI, IS ), 1 ) + CALL SSCAL( N-KI+1, REMAX, VL( KI, IS+1 ), 1 ) +* + DO 230 K = 1, KI - 1 + VL( K, IS ) = ZERO + VL( K, IS+1 ) = ZERO + 230 CONTINUE + ELSE + IF( KI.LT.N-1 ) THEN + CALL SGEMV( 'N', N, N-KI-1, ONE, VL( 1, KI+2 ), + $ LDVL, WORK( KI+2+N ), 1, WORK( KI+N ), + $ VL( 1, KI ), 1 ) + CALL SGEMV( 'N', N, N-KI-1, ONE, VL( 1, KI+2 ), + $ LDVL, WORK( KI+2+N2 ), 1, + $ WORK( KI+1+N2 ), VL( 1, KI+1 ), 1 ) + ELSE + CALL SSCAL( N, WORK( KI+N ), VL( 1, KI ), 1 ) + CALL SSCAL( N, WORK( KI+1+N2 ), VL( 1, KI+1 ), 1 ) + END IF +* + EMAX = ZERO + DO 240 K = 1, N + EMAX = MAX( EMAX, ABS( VL( K, KI ) )+ + $ ABS( VL( K, KI+1 ) ) ) + 240 CONTINUE + REMAX = ONE / EMAX + CALL SSCAL( N, REMAX, VL( 1, KI ), 1 ) + CALL SSCAL( N, REMAX, VL( 1, KI+1 ), 1 ) +* + END IF +* + END IF +* + IS = IS + 1 + IF( IP.NE.0 ) + $ IS = IS + 1 + 250 CONTINUE + IF( IP.EQ.-1 ) + $ IP = 0 + IF( IP.EQ.1 ) + $ IP = -1 +* + 260 CONTINUE +* + END IF +* + RETURN +* +* End of STREVC +* + END diff --git a/costa/native/external/lapack/strexc.f b/costa/native/external/lapack/strexc.f new file mode 100644 index 000000000..c8c36f8da --- /dev/null +++ b/costa/native/external/lapack/strexc.f @@ -0,0 +1,346 @@ + SUBROUTINE STREXC( COMPQ, N, T, LDT, Q, LDQ, IFST, ILST, WORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER COMPQ + INTEGER IFST, ILST, INFO, LDQ, LDT, N +* .. +* .. Array Arguments .. + REAL Q( LDQ, * ), T( LDT, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* STREXC reorders the real Schur factorization of a real matrix +* A = Q*T*Q**T, so that the diagonal block of T with row index IFST is +* moved to row ILST. +* +* The real Schur form T is reordered by an orthogonal similarity +* transformation Z**T*T*Z, and optionally the matrix Q of Schur vectors +* is updated by postmultiplying it with Z. +* +* T must be in Schur canonical form (as returned by SHSEQR), that is, +* block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each +* 2-by-2 diagonal block has its diagonal elements equal and its +* off-diagonal elements of opposite sign. +* +* Arguments +* ========= +* +* COMPQ (input) CHARACTER*1 +* = 'V': update the matrix Q of Schur vectors; +* = 'N': do not update Q. +* +* N (input) INTEGER +* The order of the matrix T. N >= 0. +* +* T (input/output) REAL array, dimension (LDT,N) +* On entry, the upper quasi-triangular matrix T, in Schur +* Schur canonical form. +* On exit, the reordered upper quasi-triangular matrix, again +* in Schur canonical form. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= max(1,N). +* +* Q (input/output) REAL array, dimension (LDQ,N) +* On entry, if COMPQ = 'V', the matrix Q of Schur vectors. +* On exit, if COMPQ = 'V', Q has been postmultiplied by the +* orthogonal transformation matrix Z which reorders T. +* If COMPQ = 'N', Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= max(1,N). +* +* IFST (input/output) INTEGER +* ILST (input/output) INTEGER +* Specify the reordering of the diagonal blocks of T. +* The block with row index IFST is moved to row ILST, by a +* sequence of transpositions between adjacent blocks. +* On exit, if IFST pointed on entry to the second row of a +* 2-by-2 block, it is changed to point to the first row; ILST +* always points to the first row of the block in its final +* position (which may differ from its input value by +1 or -1). +* 1 <= IFST <= N; 1 <= ILST <= N. +* +* WORK (workspace) REAL array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* = 1: two adjacent blocks were too close to swap (the problem +* is very ill-conditioned); T may have been partially +* reordered, and ILST points to the first row of the +* current position of the block being moved. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL WANTQ + INTEGER HERE, NBF, NBL, NBNEXT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLAEXC, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Decode and test the input arguments. +* + INFO = 0 + WANTQ = LSAME( COMPQ, 'V' ) + IF( .NOT.WANTQ .AND. .NOT.LSAME( COMPQ, 'N' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.MAX( 1, N ) ) ) THEN + INFO = -6 + ELSE IF( IFST.LT.1 .OR. IFST.GT.N ) THEN + INFO = -7 + ELSE IF( ILST.LT.1 .OR. ILST.GT.N ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STREXC', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* +* Determine the first row of specified block +* and find out it is 1 by 1 or 2 by 2. +* + IF( IFST.GT.1 ) THEN + IF( T( IFST, IFST-1 ).NE.ZERO ) + $ IFST = IFST - 1 + END IF + NBF = 1 + IF( IFST.LT.N ) THEN + IF( T( IFST+1, IFST ).NE.ZERO ) + $ NBF = 2 + END IF +* +* Determine the first row of the final block +* and find out it is 1 by 1 or 2 by 2. +* + IF( ILST.GT.1 ) THEN + IF( T( ILST, ILST-1 ).NE.ZERO ) + $ ILST = ILST - 1 + END IF + NBL = 1 + IF( ILST.LT.N ) THEN + IF( T( ILST+1, ILST ).NE.ZERO ) + $ NBL = 2 + END IF +* + IF( IFST.EQ.ILST ) + $ RETURN +* + IF( IFST.LT.ILST ) THEN +* +* Update ILST +* + IF( NBF.EQ.2 .AND. NBL.EQ.1 ) + $ ILST = ILST - 1 + IF( NBF.EQ.1 .AND. NBL.EQ.2 ) + $ ILST = ILST + 1 +* + HERE = IFST +* + 10 CONTINUE +* +* Swap block with next one below +* + IF( NBF.EQ.1 .OR. NBF.EQ.2 ) THEN +* +* Current block either 1 by 1 or 2 by 2 +* + NBNEXT = 1 + IF( HERE+NBF+1.LE.N ) THEN + IF( T( HERE+NBF+1, HERE+NBF ).NE.ZERO ) + $ NBNEXT = 2 + END IF + CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, NBF, NBNEXT, + $ WORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE + NBNEXT +* +* Test if 2 by 2 block breaks into two 1 by 1 blocks +* + IF( NBF.EQ.2 ) THEN + IF( T( HERE+1, HERE ).EQ.ZERO ) + $ NBF = 3 + END IF +* + ELSE +* +* Current block consists of two 1 by 1 blocks each of which +* must be swapped individually +* + NBNEXT = 1 + IF( HERE+3.LE.N ) THEN + IF( T( HERE+3, HERE+2 ).NE.ZERO ) + $ NBNEXT = 2 + END IF + CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE+1, 1, NBNEXT, + $ WORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + IF( NBNEXT.EQ.1 ) THEN +* +* Swap two 1 by 1 blocks, no problems possible +* + CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, 1, NBNEXT, + $ WORK, INFO ) + HERE = HERE + 1 + ELSE +* +* Recompute NBNEXT in case 2 by 2 split +* + IF( T( HERE+2, HERE+1 ).EQ.ZERO ) + $ NBNEXT = 1 + IF( NBNEXT.EQ.2 ) THEN +* +* 2 by 2 Block did not split +* + CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, 1, + $ NBNEXT, WORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE + 2 + ELSE +* +* 2 by 2 Block did split +* + CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, 1, 1, + $ WORK, INFO ) + CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE+1, 1, 1, + $ WORK, INFO ) + HERE = HERE + 2 + END IF + END IF + END IF + IF( HERE.LT.ILST ) + $ GO TO 10 +* + ELSE +* + HERE = IFST + 20 CONTINUE +* +* Swap block with next one above +* + IF( NBF.EQ.1 .OR. NBF.EQ.2 ) THEN +* +* Current block either 1 by 1 or 2 by 2 +* + NBNEXT = 1 + IF( HERE.GE.3 ) THEN + IF( T( HERE-1, HERE-2 ).NE.ZERO ) + $ NBNEXT = 2 + END IF + CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE-NBNEXT, NBNEXT, + $ NBF, WORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE - NBNEXT +* +* Test if 2 by 2 block breaks into two 1 by 1 blocks +* + IF( NBF.EQ.2 ) THEN + IF( T( HERE+1, HERE ).EQ.ZERO ) + $ NBF = 3 + END IF +* + ELSE +* +* Current block consists of two 1 by 1 blocks each of which +* must be swapped individually +* + NBNEXT = 1 + IF( HERE.GE.3 ) THEN + IF( T( HERE-1, HERE-2 ).NE.ZERO ) + $ NBNEXT = 2 + END IF + CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE-NBNEXT, NBNEXT, + $ 1, WORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + IF( NBNEXT.EQ.1 ) THEN +* +* Swap two 1 by 1 blocks, no problems possible +* + CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, NBNEXT, 1, + $ WORK, INFO ) + HERE = HERE - 1 + ELSE +* +* Recompute NBNEXT in case 2 by 2 split +* + IF( T( HERE, HERE-1 ).EQ.ZERO ) + $ NBNEXT = 1 + IF( NBNEXT.EQ.2 ) THEN +* +* 2 by 2 Block did not split +* + CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE-1, 2, 1, + $ WORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE - 2 + ELSE +* +* 2 by 2 Block did split +* + CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, 1, 1, + $ WORK, INFO ) + CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE-1, 1, 1, + $ WORK, INFO ) + HERE = HERE - 2 + END IF + END IF + END IF + IF( HERE.GT.ILST ) + $ GO TO 20 + END IF + ILST = HERE +* + RETURN +* +* End of STREXC +* + END diff --git a/costa/native/external/lapack/strrfs.f b/costa/native/external/lapack/strrfs.f new file mode 100644 index 000000000..528a3c348 --- /dev/null +++ b/costa/native/external/lapack/strrfs.f @@ -0,0 +1,371 @@ + SUBROUTINE STRRFS( UPLO, TRANS, DIAG, N, NRHS, A, LDA, B, LDB, X, + $ LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, LDA, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), B( LDB, * ), BERR( * ), FERR( * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* STRRFS provides error bounds and backward error estimates for the +* solution to a system of linear equations with a triangular +* coefficient matrix. +* +* The solution matrix X must be computed by STRTRS or some other +* means before entering this routine. STRRFS does not do iterative +* refinement because doing so cannot improve the backward error. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose = Transpose) +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input) REAL array, dimension (LDA,N) +* The triangular matrix A. If UPLO = 'U', the leading N-by-N +* upper triangular part of the array A contains the upper +* triangular matrix, and the strictly lower triangular part of +* A is not referenced. If UPLO = 'L', the leading N-by-N lower +* triangular part of the array A contains the lower triangular +* matrix, and the strictly upper triangular part of A is not +* referenced. If DIAG = 'U', the diagonal elements of A are +* also not referenced and are assumed to be 1. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input) REAL array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input) REAL array, dimension (LDX,NRHS) +* The solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) REAL array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) REAL array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) REAL array, dimension (3*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + CHARACTER TRANST + INTEGER I, J, K, KASE, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SLACON, STRMV, STRSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STRRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 250 J = 1, NRHS +* +* Compute residual R = B - op(A) * X, +* where op(A) = A or A', depending on TRANS. +* + CALL SCOPY( N, X( 1, J ), 1, WORK( N+1 ), 1 ) + CALL STRMV( UPLO, TRANS, DIAG, N, A, LDA, WORK( N+1 ), 1 ) + CALL SAXPY( N, -ONE, B( 1, J ), 1, WORK( N+1 ), 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 20 I = 1, N + WORK( I ) = ABS( B( I, J ) ) + 20 CONTINUE +* + IF( NOTRAN ) THEN +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + IF( NOUNIT ) THEN + DO 40 K = 1, N + XK = ABS( X( K, J ) ) + DO 30 I = 1, K + WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK + 30 CONTINUE + 40 CONTINUE + ELSE + DO 60 K = 1, N + XK = ABS( X( K, J ) ) + DO 50 I = 1, K - 1 + WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK + 50 CONTINUE + WORK( K ) = WORK( K ) + XK + 60 CONTINUE + END IF + ELSE + IF( NOUNIT ) THEN + DO 80 K = 1, N + XK = ABS( X( K, J ) ) + DO 70 I = K, N + WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK + 70 CONTINUE + 80 CONTINUE + ELSE + DO 100 K = 1, N + XK = ABS( X( K, J ) ) + DO 90 I = K + 1, N + WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK + 90 CONTINUE + WORK( K ) = WORK( K ) + XK + 100 CONTINUE + END IF + END IF + ELSE +* +* Compute abs(A')*abs(X) + abs(B). +* + IF( UPPER ) THEN + IF( NOUNIT ) THEN + DO 120 K = 1, N + S = ZERO + DO 110 I = 1, K + S = S + ABS( A( I, K ) )*ABS( X( I, J ) ) + 110 CONTINUE + WORK( K ) = WORK( K ) + S + 120 CONTINUE + ELSE + DO 140 K = 1, N + S = ABS( X( K, J ) ) + DO 130 I = 1, K - 1 + S = S + ABS( A( I, K ) )*ABS( X( I, J ) ) + 130 CONTINUE + WORK( K ) = WORK( K ) + S + 140 CONTINUE + END IF + ELSE + IF( NOUNIT ) THEN + DO 160 K = 1, N + S = ZERO + DO 150 I = K, N + S = S + ABS( A( I, K ) )*ABS( X( I, J ) ) + 150 CONTINUE + WORK( K ) = WORK( K ) + S + 160 CONTINUE + ELSE + DO 180 K = 1, N + S = ABS( X( K, J ) ) + DO 170 I = K + 1, N + S = S + ABS( A( I, K ) )*ABS( X( I, J ) ) + 170 CONTINUE + WORK( K ) = WORK( K ) + S + 180 CONTINUE + END IF + END IF + END IF + S = ZERO + DO 190 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 190 CONTINUE + BERR( J ) = S +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use SLACON to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 200 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 200 CONTINUE +* + KASE = 0 + 210 CONTINUE + CALL SLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)'). +* + CALL STRSV( UPLO, TRANST, DIAG, N, A, LDA, WORK( N+1 ), + $ 1 ) + DO 220 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 220 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 230 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 230 CONTINUE + CALL STRSV( UPLO, TRANS, DIAG, N, A, LDA, WORK( N+1 ), + $ 1 ) + END IF + GO TO 210 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 240 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 240 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 250 CONTINUE +* + RETURN +* +* End of STRRFS +* + END diff --git a/costa/native/external/lapack/strsen.f b/costa/native/external/lapack/strsen.f new file mode 100644 index 000000000..e56557944 --- /dev/null +++ b/costa/native/external/lapack/strsen.f @@ -0,0 +1,457 @@ + SUBROUTINE STRSEN( JOB, COMPQ, SELECT, N, T, LDT, Q, LDQ, WR, WI, + $ M, S, SEP, WORK, LWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER COMPQ, JOB + INTEGER INFO, LDQ, LDT, LIWORK, LWORK, M, N + REAL S, SEP +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + INTEGER IWORK( * ) + REAL Q( LDQ, * ), T( LDT, * ), WI( * ), WORK( * ), + $ WR( * ) +* .. +* +* Purpose +* ======= +* +* STRSEN reorders the real Schur factorization of a real matrix +* A = Q*T*Q**T, so that a selected cluster of eigenvalues appears in +* the leading diagonal blocks of the upper quasi-triangular matrix T, +* and the leading columns of Q form an orthonormal basis of the +* corresponding right invariant subspace. +* +* Optionally the routine computes the reciprocal condition numbers of +* the cluster of eigenvalues and/or the invariant subspace. +* +* T must be in Schur canonical form (as returned by SHSEQR), that is, +* block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each +* 2-by-2 diagonal block has its diagonal elemnts equal and its +* off-diagonal elements of opposite sign. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* Specifies whether condition numbers are required for the +* cluster of eigenvalues (S) or the invariant subspace (SEP): +* = 'N': none; +* = 'E': for eigenvalues only (S); +* = 'V': for invariant subspace only (SEP); +* = 'B': for both eigenvalues and invariant subspace (S and +* SEP). +* +* COMPQ (input) CHARACTER*1 +* = 'V': update the matrix Q of Schur vectors; +* = 'N': do not update Q. +* +* SELECT (input) LOGICAL array, dimension (N) +* SELECT specifies the eigenvalues in the selected cluster. To +* select a real eigenvalue w(j), SELECT(j) must be set to +* .TRUE.. To select a complex conjugate pair of eigenvalues +* w(j) and w(j+1), corresponding to a 2-by-2 diagonal block, +* either SELECT(j) or SELECT(j+1) or both must be set to +* .TRUE.; a complex conjugate pair of eigenvalues must be +* either both included in the cluster or both excluded. +* +* N (input) INTEGER +* The order of the matrix T. N >= 0. +* +* T (input/output) REAL array, dimension (LDT,N) +* On entry, the upper quasi-triangular matrix T, in Schur +* canonical form. +* On exit, T is overwritten by the reordered matrix T, again in +* Schur canonical form, with the selected eigenvalues in the +* leading diagonal blocks. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= max(1,N). +* +* Q (input/output) REAL array, dimension (LDQ,N) +* On entry, if COMPQ = 'V', the matrix Q of Schur vectors. +* On exit, if COMPQ = 'V', Q has been postmultiplied by the +* orthogonal transformation matrix which reorders T; the +* leading M columns of Q form an orthonormal basis for the +* specified invariant subspace. +* If COMPQ = 'N', Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. +* LDQ >= 1; and if COMPQ = 'V', LDQ >= N. +* +* WR (output) REAL array, dimension (N) +* WI (output) REAL array, dimension (N) +* The real and imaginary parts, respectively, of the reordered +* eigenvalues of T. The eigenvalues are stored in the same +* order as on the diagonal of T, with WR(i) = T(i,i) and, if +* T(i:i+1,i:i+1) is a 2-by-2 diagonal block, WI(i) > 0 and +* WI(i+1) = -WI(i). Note that if a complex eigenvalue is +* sufficiently ill-conditioned, then its value may differ +* significantly from its value before reordering. +* +* M (output) INTEGER +* The dimension of the specified invariant subspace. +* 0 < = M <= N. +* +* S (output) REAL +* If JOB = 'E' or 'B', S is a lower bound on the reciprocal +* condition number for the selected cluster of eigenvalues. +* S cannot underestimate the true reciprocal condition number +* by more than a factor of sqrt(N). If M = 0 or N, S = 1. +* If JOB = 'N' or 'V', S is not referenced. +* +* SEP (output) REAL +* If JOB = 'V' or 'B', SEP is the estimated reciprocal +* condition number of the specified invariant subspace. If +* M = 0 or N, SEP = norm(T). +* If JOB = 'N' or 'E', SEP is not referenced. +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If JOB = 'N', LWORK >= max(1,N); +* if JOB = 'E', LWORK >= M*(N-M); +* if JOB = 'V' or 'B', LWORK >= 2*M*(N-M). +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace) INTEGER array, dimension (LIWORK) +* IF JOB = 'N' or 'E', IWORK is not referenced. +* +* LIWORK (input) INTEGER +* The dimension of the array IWORK. +* If JOB = 'N' or 'E', LIWORK >= 1; +* if JOB = 'V' or 'B', LIWORK >= M*(N-M). +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* = 1: reordering of T failed because some eigenvalues are too +* close to separate (the problem is very ill-conditioned); +* T may have been partially reordered, and WR and WI +* contain the eigenvalues in the same order as in T; S and +* SEP (if requested) are set to zero. +* +* Further Details +* =============== +* +* STRSEN first collects the selected eigenvalues by computing an +* orthogonal transformation Z to move them to the top left corner of T. +* In other words, the selected eigenvalues are the eigenvalues of T11 +* in: +* +* Z'*T*Z = ( T11 T12 ) n1 +* ( 0 T22 ) n2 +* n1 n2 +* +* where N = n1+n2 and Z' means the transpose of Z. The first n1 columns +* of Z span the specified invariant subspace of T. +* +* If T has been obtained from the real Schur factorization of a matrix +* A = Q*T*Q', then the reordered real Schur factorization of A is given +* by A = (Q*Z)*(Z'*T*Z)*(Q*Z)', and the first n1 columns of Q*Z span +* the corresponding invariant subspace of A. +* +* The reciprocal condition number of the average of the eigenvalues of +* T11 may be returned in S. S lies between 0 (very badly conditioned) +* and 1 (very well conditioned). It is computed as follows. First we +* compute R so that +* +* P = ( I R ) n1 +* ( 0 0 ) n2 +* n1 n2 +* +* is the projector on the invariant subspace associated with T11. +* R is the solution of the Sylvester equation: +* +* T11*R - R*T22 = T12. +* +* Let F-norm(M) denote the Frobenius-norm of M and 2-norm(M) denote +* the two-norm of M. Then S is computed as the lower bound +* +* (1 + F-norm(R)**2)**(-1/2) +* +* on the reciprocal of 2-norm(P), the true reciprocal condition number. +* S cannot underestimate 1 / 2-norm(P) by more than a factor of +* sqrt(N). +* +* An approximate error bound for the computed average of the +* eigenvalues of T11 is +* +* EPS * norm(T) / S +* +* where EPS is the machine precision. +* +* The reciprocal condition number of the right invariant subspace +* spanned by the first n1 columns of Z (or of Q*Z) is returned in SEP. +* SEP is defined as the separation of T11 and T22: +* +* sep( T11, T22 ) = sigma-min( C ) +* +* where sigma-min(C) is the smallest singular value of the +* n1*n2-by-n1*n2 matrix +* +* C = kprod( I(n2), T11 ) - kprod( transpose(T22), I(n1) ) +* +* I(m) is an m by m identity matrix, and kprod denotes the Kronecker +* product. We estimate sigma-min(C) by the reciprocal of an estimate of +* the 1-norm of inverse(C). The true reciprocal 1-norm of inverse(C) +* cannot differ from sigma-min(C) by more than a factor of sqrt(n1*n2). +* +* When SEP is small, small changes in T can cause large changes in +* the invariant subspace. An approximate bound on the maximum angular +* error in the computed right invariant subspace is +* +* EPS * norm(T) / SEP +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, PAIR, SWAP, WANTBH, WANTQ, WANTS, + $ WANTSP + INTEGER IERR, K, KASE, KK, KS, LIWMIN, LWMIN, N1, N2, + $ NN + REAL EST, RNORM, SCALE +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLANGE + EXTERNAL LSAME, SLANGE +* .. +* .. External Subroutines .. + EXTERNAL SLACON, SLACPY, STREXC, STRSYL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + WANTBH = LSAME( JOB, 'B' ) + WANTS = LSAME( JOB, 'E' ) .OR. WANTBH + WANTSP = LSAME( JOB, 'V' ) .OR. WANTBH + WANTQ = LSAME( COMPQ, 'V' ) +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.WANTS .AND. .NOT.WANTSP ) + $ THEN + INFO = -1 + ELSE IF( .NOT.LSAME( COMPQ, 'N' ) .AND. .NOT.WANTQ ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -8 + ELSE +* +* Set M to the dimension of the specified invariant subspace, +* and test LWORK and LIWORK. +* + M = 0 + PAIR = .FALSE. + DO 10 K = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + ELSE + IF( K.LT.N ) THEN + IF( T( K+1, K ).EQ.ZERO ) THEN + IF( SELECT( K ) ) + $ M = M + 1 + ELSE + PAIR = .TRUE. + IF( SELECT( K ) .OR. SELECT( K+1 ) ) + $ M = M + 2 + END IF + ELSE + IF( SELECT( N ) ) + $ M = M + 1 + END IF + END IF + 10 CONTINUE +* + N1 = M + N2 = N - M + NN = N1*N2 +* + IF( WANTSP ) THEN + LWMIN = MAX( 1, 2*NN ) + LIWMIN = MAX( 1, NN ) + ELSE IF( LSAME( JOB, 'N' ) ) THEN + LWMIN = MAX( 1, N ) + LIWMIN = 1 + ELSE IF( LSAME( JOB, 'E' ) ) THEN + LWMIN = MAX( 1, NN ) + LIWMIN = 1 + END IF +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -15 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -17 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STRSEN', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible. +* + IF( M.EQ.N .OR. M.EQ.0 ) THEN + IF( WANTS ) + $ S = ONE + IF( WANTSP ) + $ SEP = SLANGE( '1', N, N, T, LDT, WORK ) + GO TO 40 + END IF +* +* Collect the selected blocks at the top-left corner of T. +* + KS = 0 + PAIR = .FALSE. + DO 20 K = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + ELSE + SWAP = SELECT( K ) + IF( K.LT.N ) THEN + IF( T( K+1, K ).NE.ZERO ) THEN + PAIR = .TRUE. + SWAP = SWAP .OR. SELECT( K+1 ) + END IF + END IF + IF( SWAP ) THEN + KS = KS + 1 +* +* Swap the K-th block to position KS. +* + IERR = 0 + KK = K + IF( K.NE.KS ) + $ CALL STREXC( COMPQ, N, T, LDT, Q, LDQ, KK, KS, WORK, + $ IERR ) + IF( IERR.EQ.1 .OR. IERR.EQ.2 ) THEN +* +* Blocks too close to swap: exit. +* + INFO = 1 + IF( WANTS ) + $ S = ZERO + IF( WANTSP ) + $ SEP = ZERO + GO TO 40 + END IF + IF( PAIR ) + $ KS = KS + 1 + END IF + END IF + 20 CONTINUE +* + IF( WANTS ) THEN +* +* Solve Sylvester equation for R: +* +* T11*R - R*T22 = scale*T12 +* + CALL SLACPY( 'F', N1, N2, T( 1, N1+1 ), LDT, WORK, N1 ) + CALL STRSYL( 'N', 'N', -1, N1, N2, T, LDT, T( N1+1, N1+1 ), + $ LDT, WORK, N1, SCALE, IERR ) +* +* Estimate the reciprocal of the condition number of the cluster +* of eigenvalues. +* + RNORM = SLANGE( 'F', N1, N2, WORK, N1, WORK ) + IF( RNORM.EQ.ZERO ) THEN + S = ONE + ELSE + S = SCALE / ( SQRT( SCALE*SCALE / RNORM+RNORM )* + $ SQRT( RNORM ) ) + END IF + END IF +* + IF( WANTSP ) THEN +* +* Estimate sep(T11,T22). +* + EST = ZERO + KASE = 0 + 30 CONTINUE + CALL SLACON( NN, WORK( NN+1 ), WORK, IWORK, EST, KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Solve T11*R - R*T22 = scale*X. +* + CALL STRSYL( 'N', 'N', -1, N1, N2, T, LDT, + $ T( N1+1, N1+1 ), LDT, WORK, N1, SCALE, + $ IERR ) + ELSE +* +* Solve T11'*R - R*T22' = scale*X. +* + CALL STRSYL( 'T', 'T', -1, N1, N2, T, LDT, + $ T( N1+1, N1+1 ), LDT, WORK, N1, SCALE, + $ IERR ) + END IF + GO TO 30 + END IF +* + SEP = SCALE / EST + END IF +* + 40 CONTINUE +* +* Store the output eigenvalues in WR and WI. +* + DO 50 K = 1, N + WR( K ) = T( K, K ) + WI( K ) = ZERO + 50 CONTINUE + DO 60 K = 1, N - 1 + IF( T( K+1, K ).NE.ZERO ) THEN + WI( K ) = SQRT( ABS( T( K, K+1 ) ) )* + $ SQRT( ABS( T( K+1, K ) ) ) + WI( K+1 ) = -WI( K ) + END IF + 60 CONTINUE +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of STRSEN +* + END diff --git a/costa/native/external/lapack/strsna.f b/costa/native/external/lapack/strsna.f new file mode 100644 index 000000000..f9d801796 --- /dev/null +++ b/costa/native/external/lapack/strsna.f @@ -0,0 +1,493 @@ + SUBROUTINE STRSNA( JOB, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR, + $ LDVR, S, SEP, MM, M, WORK, LDWORK, IWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER HOWMNY, JOB + INTEGER INFO, LDT, LDVL, LDVR, LDWORK, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + INTEGER IWORK( * ) + REAL S( * ), SEP( * ), T( LDT, * ), VL( LDVL, * ), + $ VR( LDVR, * ), WORK( LDWORK, * ) +* .. +* +* Purpose +* ======= +* +* STRSNA estimates reciprocal condition numbers for specified +* eigenvalues and/or right eigenvectors of a real upper +* quasi-triangular matrix T (or of any matrix Q*T*Q**T with Q +* orthogonal). +* +* T must be in Schur canonical form (as returned by SHSEQR), that is, +* block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each +* 2-by-2 diagonal block has its diagonal elements equal and its +* off-diagonal elements of opposite sign. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* Specifies whether condition numbers are required for +* eigenvalues (S) or eigenvectors (SEP): +* = 'E': for eigenvalues only (S); +* = 'V': for eigenvectors only (SEP); +* = 'B': for both eigenvalues and eigenvectors (S and SEP). +* +* HOWMNY (input) CHARACTER*1 +* = 'A': compute condition numbers for all eigenpairs; +* = 'S': compute condition numbers for selected eigenpairs +* specified by the array SELECT. +* +* SELECT (input) LOGICAL array, dimension (N) +* If HOWMNY = 'S', SELECT specifies the eigenpairs for which +* condition numbers are required. To select condition numbers +* for the eigenpair corresponding to a real eigenvalue w(j), +* SELECT(j) must be set to .TRUE.. To select condition numbers +* corresponding to a complex conjugate pair of eigenvalues w(j) +* and w(j+1), either SELECT(j) or SELECT(j+1) or both, must be +* set to .TRUE.. +* If HOWMNY = 'A', SELECT is not referenced. +* +* N (input) INTEGER +* The order of the matrix T. N >= 0. +* +* T (input) REAL array, dimension (LDT,N) +* The upper quasi-triangular matrix T, in Schur canonical form. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= max(1,N). +* +* VL (input) REAL array, dimension (LDVL,M) +* If JOB = 'E' or 'B', VL must contain left eigenvectors of T +* (or of any Q*T*Q**T with Q orthogonal), corresponding to the +* eigenpairs specified by HOWMNY and SELECT. The eigenvectors +* must be stored in consecutive columns of VL, as returned by +* SHSEIN or STREVC. +* If JOB = 'V', VL is not referenced. +* +* LDVL (input) INTEGER +* The leading dimension of the array VL. +* LDVL >= 1; and if JOB = 'E' or 'B', LDVL >= N. +* +* VR (input) REAL array, dimension (LDVR,M) +* If JOB = 'E' or 'B', VR must contain right eigenvectors of T +* (or of any Q*T*Q**T with Q orthogonal), corresponding to the +* eigenpairs specified by HOWMNY and SELECT. The eigenvectors +* must be stored in consecutive columns of VR, as returned by +* SHSEIN or STREVC. +* If JOB = 'V', VR is not referenced. +* +* LDVR (input) INTEGER +* The leading dimension of the array VR. +* LDVR >= 1; and if JOB = 'E' or 'B', LDVR >= N. +* +* S (output) REAL array, dimension (MM) +* If JOB = 'E' or 'B', the reciprocal condition numbers of the +* selected eigenvalues, stored in consecutive elements of the +* array. For a complex conjugate pair of eigenvalues two +* consecutive elements of S are set to the same value. Thus +* S(j), SEP(j), and the j-th columns of VL and VR all +* correspond to the same eigenpair (but not in general the +* j-th eigenpair, unless all eigenpairs are selected). +* If JOB = 'V', S is not referenced. +* +* SEP (output) REAL array, dimension (MM) +* If JOB = 'V' or 'B', the estimated reciprocal condition +* numbers of the selected eigenvectors, stored in consecutive +* elements of the array. For a complex eigenvector two +* consecutive elements of SEP are set to the same value. If +* the eigenvalues cannot be reordered to compute SEP(j), SEP(j) +* is set to 0; this can only occur when the true value would be +* very small anyway. +* If JOB = 'E', SEP is not referenced. +* +* MM (input) INTEGER +* The number of elements in the arrays S (if JOB = 'E' or 'B') +* and/or SEP (if JOB = 'V' or 'B'). MM >= M. +* +* M (output) INTEGER +* The number of elements of the arrays S and/or SEP actually +* used to store the estimated condition numbers. +* If HOWMNY = 'A', M is set to N. +* +* WORK (workspace) REAL array, dimension (LDWORK,N+1) +* If JOB = 'E', WORK is not referenced. +* +* LDWORK (input) INTEGER +* The leading dimension of the array WORK. +* LDWORK >= 1; and if JOB = 'V' or 'B', LDWORK >= N. +* +* IWORK (workspace) INTEGER array, dimension (N) +* If JOB = 'E', IWORK is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The reciprocal of the condition number of an eigenvalue lambda is +* defined as +* +* S(lambda) = |v'*u| / (norm(u)*norm(v)) +* +* where u and v are the right and left eigenvectors of T corresponding +* to lambda; v' denotes the conjugate-transpose of v, and norm(u) +* denotes the Euclidean norm. These reciprocal condition numbers always +* lie between zero (very badly conditioned) and one (very well +* conditioned). If n = 1, S(lambda) is defined to be 1. +* +* An approximate error bound for a computed eigenvalue W(i) is given by +* +* EPS * norm(T) / S(i) +* +* where EPS is the machine precision. +* +* The reciprocal of the condition number of the right eigenvector u +* corresponding to lambda is defined as follows. Suppose +* +* T = ( lambda c ) +* ( 0 T22 ) +* +* Then the reciprocal condition number is +* +* SEP( lambda, T22 ) = sigma-min( T22 - lambda*I ) +* +* where sigma-min denotes the smallest singular value. We approximate +* the smallest singular value by the reciprocal of an estimate of the +* one-norm of the inverse of T22 - lambda*I. If n = 1, SEP(1) is +* defined to be abs(T(1,1)). +* +* An approximate error bound for a computed right eigenvector VR(i) +* is given by +* +* EPS * norm(T) / SEP(i) +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL PAIR, SOMCON, WANTBH, WANTS, WANTSP + INTEGER I, IERR, IFST, ILST, J, K, KASE, KS, N2, NN + REAL BIGNUM, COND, CS, DELTA, DUMM, EPS, EST, LNRM, + $ MU, PROD, PROD1, PROD2, RNRM, SCALE, SMLNUM, SN +* .. +* .. Local Arrays .. + REAL DUMMY( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SDOT, SLAMCH, SLAPY2, SNRM2 + EXTERNAL LSAME, SDOT, SLAMCH, SLAPY2, SNRM2 +* .. +* .. External Subroutines .. + EXTERNAL SLABAD, SLACON, SLACPY, SLAQTR, STREXC, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + WANTBH = LSAME( JOB, 'B' ) + WANTS = LSAME( JOB, 'E' ) .OR. WANTBH + WANTSP = LSAME( JOB, 'V' ) .OR. WANTBH +* + SOMCON = LSAME( HOWMNY, 'S' ) +* + INFO = 0 + IF( .NOT.WANTS .AND. .NOT.WANTSP ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( HOWMNY, 'A' ) .AND. .NOT.SOMCON ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDVL.LT.1 .OR. ( WANTS .AND. LDVL.LT.N ) ) THEN + INFO = -8 + ELSE IF( LDVR.LT.1 .OR. ( WANTS .AND. LDVR.LT.N ) ) THEN + INFO = -10 + ELSE +* +* Set M to the number of eigenpairs for which condition numbers +* are required, and test MM. +* + IF( SOMCON ) THEN + M = 0 + PAIR = .FALSE. + DO 10 K = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + ELSE + IF( K.LT.N ) THEN + IF( T( K+1, K ).EQ.ZERO ) THEN + IF( SELECT( K ) ) + $ M = M + 1 + ELSE + PAIR = .TRUE. + IF( SELECT( K ) .OR. SELECT( K+1 ) ) + $ M = M + 2 + END IF + ELSE + IF( SELECT( N ) ) + $ M = M + 1 + END IF + END IF + 10 CONTINUE + ELSE + M = N + END IF +* + IF( MM.LT.M ) THEN + INFO = -13 + ELSE IF( LDWORK.LT.1 .OR. ( WANTSP .AND. LDWORK.LT.N ) ) THEN + INFO = -16 + END IF + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STRSNA', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( SOMCON ) THEN + IF( .NOT.SELECT( 1 ) ) + $ RETURN + END IF + IF( WANTS ) + $ S( 1 ) = ONE + IF( WANTSP ) + $ SEP( 1 ) = ABS( T( 1, 1 ) ) + RETURN + END IF +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) / EPS + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* + KS = 0 + PAIR = .FALSE. + DO 60 K = 1, N +* +* Determine whether T(k,k) begins a 1-by-1 or 2-by-2 block. +* + IF( PAIR ) THEN + PAIR = .FALSE. + GO TO 60 + ELSE + IF( K.LT.N ) + $ PAIR = T( K+1, K ).NE.ZERO + END IF +* +* Determine whether condition numbers are required for the k-th +* eigenpair. +* + IF( SOMCON ) THEN + IF( PAIR ) THEN + IF( .NOT.SELECT( K ) .AND. .NOT.SELECT( K+1 ) ) + $ GO TO 60 + ELSE + IF( .NOT.SELECT( K ) ) + $ GO TO 60 + END IF + END IF +* + KS = KS + 1 +* + IF( WANTS ) THEN +* +* Compute the reciprocal condition number of the k-th +* eigenvalue. +* + IF( .NOT.PAIR ) THEN +* +* Real eigenvalue. +* + PROD = SDOT( N, VR( 1, KS ), 1, VL( 1, KS ), 1 ) + RNRM = SNRM2( N, VR( 1, KS ), 1 ) + LNRM = SNRM2( N, VL( 1, KS ), 1 ) + S( KS ) = ABS( PROD ) / ( RNRM*LNRM ) + ELSE +* +* Complex eigenvalue. +* + PROD1 = SDOT( N, VR( 1, KS ), 1, VL( 1, KS ), 1 ) + PROD1 = PROD1 + SDOT( N, VR( 1, KS+1 ), 1, VL( 1, KS+1 ), + $ 1 ) + PROD2 = SDOT( N, VL( 1, KS ), 1, VR( 1, KS+1 ), 1 ) + PROD2 = PROD2 - SDOT( N, VL( 1, KS+1 ), 1, VR( 1, KS ), + $ 1 ) + RNRM = SLAPY2( SNRM2( N, VR( 1, KS ), 1 ), + $ SNRM2( N, VR( 1, KS+1 ), 1 ) ) + LNRM = SLAPY2( SNRM2( N, VL( 1, KS ), 1 ), + $ SNRM2( N, VL( 1, KS+1 ), 1 ) ) + COND = SLAPY2( PROD1, PROD2 ) / ( RNRM*LNRM ) + S( KS ) = COND + S( KS+1 ) = COND + END IF + END IF +* + IF( WANTSP ) THEN +* +* Estimate the reciprocal condition number of the k-th +* eigenvector. +* +* Copy the matrix T to the array WORK and swap the diagonal +* block beginning at T(k,k) to the (1,1) position. +* + CALL SLACPY( 'Full', N, N, T, LDT, WORK, LDWORK ) + IFST = K + ILST = 1 + CALL STREXC( 'No Q', N, WORK, LDWORK, DUMMY, 1, IFST, ILST, + $ WORK( 1, N+1 ), IERR ) +* + IF( IERR.EQ.1 .OR. IERR.EQ.2 ) THEN +* +* Could not swap because blocks not well separated +* + SCALE = ONE + EST = BIGNUM + ELSE +* +* Reordering successful +* + IF( WORK( 2, 1 ).EQ.ZERO ) THEN +* +* Form C = T22 - lambda*I in WORK(2:N,2:N). +* + DO 20 I = 2, N + WORK( I, I ) = WORK( I, I ) - WORK( 1, 1 ) + 20 CONTINUE + N2 = 1 + NN = N - 1 + ELSE +* +* Triangularize the 2 by 2 block by unitary +* transformation U = [ cs i*ss ] +* [ i*ss cs ]. +* such that the (1,1) position of WORK is complex +* eigenvalue lambda with positive imaginary part. (2,2) +* position of WORK is the complex eigenvalue lambda +* with negative imaginary part. +* + MU = SQRT( ABS( WORK( 1, 2 ) ) )* + $ SQRT( ABS( WORK( 2, 1 ) ) ) + DELTA = SLAPY2( MU, WORK( 2, 1 ) ) + CS = MU / DELTA + SN = -WORK( 2, 1 ) / DELTA +* +* Form +* +* C' = WORK(2:N,2:N) + i*[rwork(1) ..... rwork(n-1) ] +* [ mu ] +* [ .. ] +* [ .. ] +* [ mu ] +* where C' is conjugate transpose of complex matrix C, +* and RWORK is stored starting in the N+1-st column of +* WORK. +* + DO 30 J = 3, N + WORK( 2, J ) = CS*WORK( 2, J ) + WORK( J, J ) = WORK( J, J ) - WORK( 1, 1 ) + 30 CONTINUE + WORK( 2, 2 ) = ZERO +* + WORK( 1, N+1 ) = TWO*MU + DO 40 I = 2, N - 1 + WORK( I, N+1 ) = SN*WORK( 1, I+1 ) + 40 CONTINUE + N2 = 2 + NN = 2*( N-1 ) + END IF +* +* Estimate norm(inv(C')) +* + EST = ZERO + KASE = 0 + 50 CONTINUE + CALL SLACON( NN, WORK( 1, N+2 ), WORK( 1, N+4 ), IWORK, + $ EST, KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN + IF( N2.EQ.1 ) THEN +* +* Real eigenvalue: solve C'*x = scale*c. +* + CALL SLAQTR( .TRUE., .TRUE., N-1, WORK( 2, 2 ), + $ LDWORK, DUMMY, DUMM, SCALE, + $ WORK( 1, N+4 ), WORK( 1, N+6 ), + $ IERR ) + ELSE +* +* Complex eigenvalue: solve +* C'*(p+iq) = scale*(c+id) in real arithmetic. +* + CALL SLAQTR( .TRUE., .FALSE., N-1, WORK( 2, 2 ), + $ LDWORK, WORK( 1, N+1 ), MU, SCALE, + $ WORK( 1, N+4 ), WORK( 1, N+6 ), + $ IERR ) + END IF + ELSE + IF( N2.EQ.1 ) THEN +* +* Real eigenvalue: solve C*x = scale*c. +* + CALL SLAQTR( .FALSE., .TRUE., N-1, WORK( 2, 2 ), + $ LDWORK, DUMMY, DUMM, SCALE, + $ WORK( 1, N+4 ), WORK( 1, N+6 ), + $ IERR ) + ELSE +* +* Complex eigenvalue: solve +* C*(p+iq) = scale*(c+id) in real arithmetic. +* + CALL SLAQTR( .FALSE., .FALSE., N-1, + $ WORK( 2, 2 ), LDWORK, + $ WORK( 1, N+1 ), MU, SCALE, + $ WORK( 1, N+4 ), WORK( 1, N+6 ), + $ IERR ) +* + END IF + END IF +* + GO TO 50 + END IF + END IF +* + SEP( KS ) = SCALE / MAX( EST, SMLNUM ) + IF( PAIR ) + $ SEP( KS+1 ) = SEP( KS ) + END IF +* + IF( PAIR ) + $ KS = KS + 1 +* + 60 CONTINUE + RETURN +* +* End of STRSNA +* + END diff --git a/costa/native/external/lapack/strsyl.f b/costa/native/external/lapack/strsyl.f new file mode 100644 index 000000000..5d8e3301f --- /dev/null +++ b/costa/native/external/lapack/strsyl.f @@ -0,0 +1,914 @@ + SUBROUTINE STRSYL( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C, + $ LDC, SCALE, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER TRANA, TRANB + INTEGER INFO, ISGN, LDA, LDB, LDC, M, N + REAL SCALE +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* STRSYL solves the real Sylvester matrix equation: +* +* op(A)*X + X*op(B) = scale*C or +* op(A)*X - X*op(B) = scale*C, +* +* where op(A) = A or A**T, and A and B are both upper quasi- +* triangular. A is M-by-M and B is N-by-N; the right hand side C and +* the solution X are M-by-N; and scale is an output scale factor, set +* <= 1 to avoid overflow in X. +* +* A and B must be in Schur canonical form (as returned by SHSEQR), that +* is, block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; +* each 2-by-2 diagonal block has its diagonal elements equal and its +* off-diagonal elements of opposite sign. +* +* Arguments +* ========= +* +* TRANA (input) CHARACTER*1 +* Specifies the option op(A): +* = 'N': op(A) = A (No transpose) +* = 'T': op(A) = A**T (Transpose) +* = 'C': op(A) = A**H (Conjugate transpose = Transpose) +* +* TRANB (input) CHARACTER*1 +* Specifies the option op(B): +* = 'N': op(B) = B (No transpose) +* = 'T': op(B) = B**T (Transpose) +* = 'C': op(B) = B**H (Conjugate transpose = Transpose) +* +* ISGN (input) INTEGER +* Specifies the sign in the equation: +* = +1: solve op(A)*X + X*op(B) = scale*C +* = -1: solve op(A)*X - X*op(B) = scale*C +* +* M (input) INTEGER +* The order of the matrix A, and the number of rows in the +* matrices X and C. M >= 0. +* +* N (input) INTEGER +* The order of the matrix B, and the number of columns in the +* matrices X and C. N >= 0. +* +* A (input) REAL array, dimension (LDA,M) +* The upper quasi-triangular matrix A, in Schur canonical form. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input) REAL array, dimension (LDB,N) +* The upper quasi-triangular matrix B, in Schur canonical form. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* C (input/output) REAL array, dimension (LDC,N) +* On entry, the M-by-N right hand side matrix C. +* On exit, C is overwritten by the solution matrix X. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M) +* +* SCALE (output) REAL +* The scale factor, scale, set <= 1 to avoid overflow in X. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* = 1: A and B have common or very close eigenvalues; perturbed +* values were used to solve the equation (but the matrices +* A and B are unchanged). +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRNA, NOTRNB + INTEGER IERR, J, K, K1, K2, KNEXT, L, L1, L2, LNEXT + REAL A11, BIGNUM, DA11, DB, EPS, SCALOC, SGN, SMIN, + $ SMLNUM, SUML, SUMR, XNORM +* .. +* .. Local Arrays .. + REAL DUM( 1 ), VEC( 2, 2 ), X( 2, 2 ) +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SDOT, SLAMCH, SLANGE + EXTERNAL LSAME, SDOT, SLAMCH, SLANGE +* .. +* .. External Subroutines .. + EXTERNAL SLABAD, SLALN2, SLASY2, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, REAL +* .. +* .. Executable Statements .. +* +* Decode and Test input parameters +* + NOTRNA = LSAME( TRANA, 'N' ) + NOTRNB = LSAME( TRANB, 'N' ) +* + INFO = 0 + IF( .NOT.NOTRNA .AND. .NOT.LSAME( TRANA, 'T' ) .AND. .NOT. + $ LSAME( TRANA, 'C' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRNB .AND. .NOT.LSAME( TRANB, 'T' ) .AND. .NOT. + $ LSAME( TRANB, 'C' ) ) THEN + INFO = -2 + ELSE IF( ISGN.NE.1 .AND. ISGN.NE.-1 ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STRSYL', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Set constants to control overflow +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SMLNUM*REAL( M*N ) / EPS + BIGNUM = ONE / SMLNUM +* + SMIN = MAX( SMLNUM, EPS*SLANGE( 'M', M, M, A, LDA, DUM ), + $ EPS*SLANGE( 'M', N, N, B, LDB, DUM ) ) +* + SCALE = ONE + SGN = ISGN +* + IF( NOTRNA .AND. NOTRNB ) THEN +* +* Solve A*X + ISGN*X*B = scale*C. +* +* The (K,L)th block of X is determined starting from +* bottom-left corner column by column by +* +* A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) +* +* Where +* M L-1 +* R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(J,L)]. +* I=K+1 J=1 +* +* Start column loop (index = L) +* L1 (L2) : column index of the first (first) row of X(K,L). +* + LNEXT = 1 + DO 70 L = 1, N + IF( L.LT.LNEXT ) + $ GO TO 70 + IF( L.EQ.N ) THEN + L1 = L + L2 = L + ELSE + IF( B( L+1, L ).NE.ZERO ) THEN + L1 = L + L2 = L + 1 + LNEXT = L + 2 + ELSE + L1 = L + L2 = L + LNEXT = L + 1 + END IF + END IF +* +* Start row loop (index = K) +* K1 (K2): row index of the first (last) row of X(K,L). +* + KNEXT = M + DO 60 K = M, 1, -1 + IF( K.GT.KNEXT ) + $ GO TO 60 + IF( K.EQ.1 ) THEN + K1 = K + K2 = K + ELSE + IF( A( K, K-1 ).NE.ZERO ) THEN + K1 = K - 1 + K2 = K + KNEXT = K - 2 + ELSE + K1 = K + K2 = K + KNEXT = K - 1 + END IF + END IF +* + IF( L1.EQ.L2 .AND. K1.EQ.K2 ) THEN + SUML = SDOT( M-K1, A( K1, MIN( K1+1, M ) ), LDA, + $ C( MIN( K1+1, M ), L1 ), 1 ) + SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) + SCALOC = ONE +* + A11 = A( K1, K1 ) + SGN*B( L1, L1 ) + DA11 = ABS( A11 ) + IF( DA11.LE.SMIN ) THEN + A11 = SMIN + DA11 = SMIN + INFO = 1 + END IF + DB = ABS( VEC( 1, 1 ) ) + IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN + IF( DB.GT.BIGNUM*DA11 ) + $ SCALOC = ONE / DB + END IF + X( 1, 1 ) = ( VEC( 1, 1 )*SCALOC ) / A11 +* + IF( SCALOC.NE.ONE ) THEN + DO 10 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 10 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) +* + ELSE IF( L1.EQ.L2 .AND. K1.NE.K2 ) THEN +* + SUML = SDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L1 ), 1 ) + SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L1 ), 1 ) + SUMR = SDOT( L1-1, C( K2, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR ) +* + CALL SLALN2( .FALSE., 2, 1, SMIN, ONE, A( K1, K1 ), + $ LDA, ONE, ONE, VEC, 2, -SGN*B( L1, L1 ), + $ ZERO, X, 2, SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 20 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 20 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K2, L1 ) = X( 2, 1 ) +* + ELSE IF( L1.NE.L2 .AND. K1.EQ.K2 ) THEN +* + SUML = SDOT( M-K1, A( K1, MIN( K1+1, M ) ), LDA, + $ C( MIN( K1+1, M ), L1 ), 1 ) + SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 1, 1 ) = SGN*( C( K1, L1 )-( SUML+SGN*SUMR ) ) +* + SUML = SDOT( M-K1, A( K1, MIN( K1+1, M ) ), LDA, + $ C( MIN( K1+1, M ), L2 ), 1 ) + SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L2 ), 1 ) + VEC( 2, 1 ) = SGN*( C( K1, L2 )-( SUML+SGN*SUMR ) ) +* + CALL SLALN2( .TRUE., 2, 1, SMIN, ONE, B( L1, L1 ), + $ LDB, ONE, ONE, VEC, 2, -SGN*A( K1, K1 ), + $ ZERO, X, 2, SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 40 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 40 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K1, L2 ) = X( 2, 1 ) +* + ELSE IF( L1.NE.L2 .AND. K1.NE.K2 ) THEN +* + SUML = SDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L1 ), 1 ) + SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L2 ), 1 ) + SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L2 ), 1 ) + VEC( 1, 2 ) = C( K1, L2 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L1 ), 1 ) + SUMR = SDOT( L1-1, C( K2, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L2 ), 1 ) + SUMR = SDOT( L1-1, C( K2, 1 ), LDC, B( 1, L2 ), 1 ) + VEC( 2, 2 ) = C( K2, L2 ) - ( SUML+SGN*SUMR ) +* + CALL SLASY2( .FALSE., .FALSE., ISGN, 2, 2, + $ A( K1, K1 ), LDA, B( L1, L1 ), LDB, VEC, + $ 2, SCALOC, X, 2, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 50 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 50 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K1, L2 ) = X( 1, 2 ) + C( K2, L1 ) = X( 2, 1 ) + C( K2, L2 ) = X( 2, 2 ) + END IF +* + 60 CONTINUE +* + 70 CONTINUE +* + ELSE IF( .NOT.NOTRNA .AND. NOTRNB ) THEN +* +* Solve A' *X + ISGN*X*B = scale*C. +* +* The (K,L)th block of X is determined starting from +* upper-left corner column by column by +* +* A(K,K)'*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) +* +* Where +* K-1 L-1 +* R(K,L) = SUM [A(I,K)'*X(I,L)] +ISGN*SUM [X(K,J)*B(J,L)] +* I=1 J=1 +* +* Start column loop (index = L) +* L1 (L2): column index of the first (last) row of X(K,L) +* + LNEXT = 1 + DO 130 L = 1, N + IF( L.LT.LNEXT ) + $ GO TO 130 + IF( L.EQ.N ) THEN + L1 = L + L2 = L + ELSE + IF( B( L+1, L ).NE.ZERO ) THEN + L1 = L + L2 = L + 1 + LNEXT = L + 2 + ELSE + L1 = L + L2 = L + LNEXT = L + 1 + END IF + END IF +* +* Start row loop (index = K) +* K1 (K2): row index of the first (last) row of X(K,L) +* + KNEXT = 1 + DO 120 K = 1, M + IF( K.LT.KNEXT ) + $ GO TO 120 + IF( K.EQ.M ) THEN + K1 = K + K2 = K + ELSE + IF( A( K+1, K ).NE.ZERO ) THEN + K1 = K + K2 = K + 1 + KNEXT = K + 2 + ELSE + K1 = K + K2 = K + KNEXT = K + 1 + END IF + END IF +* + IF( L1.EQ.L2 .AND. K1.EQ.K2 ) THEN + SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 ) + SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) + SCALOC = ONE +* + A11 = A( K1, K1 ) + SGN*B( L1, L1 ) + DA11 = ABS( A11 ) + IF( DA11.LE.SMIN ) THEN + A11 = SMIN + DA11 = SMIN + INFO = 1 + END IF + DB = ABS( VEC( 1, 1 ) ) + IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN + IF( DB.GT.BIGNUM*DA11 ) + $ SCALOC = ONE / DB + END IF + X( 1, 1 ) = ( VEC( 1, 1 )*SCALOC ) / A11 +* + IF( SCALOC.NE.ONE ) THEN + DO 80 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 80 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) +* + ELSE IF( L1.EQ.L2 .AND. K1.NE.K2 ) THEN +* + SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 ) + SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( K1-1, A( 1, K2 ), 1, C( 1, L1 ), 1 ) + SUMR = SDOT( L1-1, C( K2, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR ) +* + CALL SLALN2( .TRUE., 2, 1, SMIN, ONE, A( K1, K1 ), + $ LDA, ONE, ONE, VEC, 2, -SGN*B( L1, L1 ), + $ ZERO, X, 2, SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 90 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 90 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K2, L1 ) = X( 2, 1 ) +* + ELSE IF( L1.NE.L2 .AND. K1.EQ.K2 ) THEN +* + SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 ) + SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 1, 1 ) = SGN*( C( K1, L1 )-( SUML+SGN*SUMR ) ) +* + SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L2 ), 1 ) + SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L2 ), 1 ) + VEC( 2, 1 ) = SGN*( C( K1, L2 )-( SUML+SGN*SUMR ) ) +* + CALL SLALN2( .TRUE., 2, 1, SMIN, ONE, B( L1, L1 ), + $ LDB, ONE, ONE, VEC, 2, -SGN*A( K1, K1 ), + $ ZERO, X, 2, SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 100 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 100 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K1, L2 ) = X( 2, 1 ) +* + ELSE IF( L1.NE.L2 .AND. K1.NE.K2 ) THEN +* + SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 ) + SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L2 ), 1 ) + SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L2 ), 1 ) + VEC( 1, 2 ) = C( K1, L2 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( K1-1, A( 1, K2 ), 1, C( 1, L1 ), 1 ) + SUMR = SDOT( L1-1, C( K2, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( K1-1, A( 1, K2 ), 1, C( 1, L2 ), 1 ) + SUMR = SDOT( L1-1, C( K2, 1 ), LDC, B( 1, L2 ), 1 ) + VEC( 2, 2 ) = C( K2, L2 ) - ( SUML+SGN*SUMR ) +* + CALL SLASY2( .TRUE., .FALSE., ISGN, 2, 2, A( K1, K1 ), + $ LDA, B( L1, L1 ), LDB, VEC, 2, SCALOC, X, + $ 2, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 110 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 110 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K1, L2 ) = X( 1, 2 ) + C( K2, L1 ) = X( 2, 1 ) + C( K2, L2 ) = X( 2, 2 ) + END IF +* + 120 CONTINUE + 130 CONTINUE +* + ELSE IF( .NOT.NOTRNA .AND. .NOT.NOTRNB ) THEN +* +* Solve A'*X + ISGN*X*B' = scale*C. +* +* The (K,L)th block of X is determined starting from +* top-right corner column by column by +* +* A(K,K)'*X(K,L) + ISGN*X(K,L)*B(L,L)' = C(K,L) - R(K,L) +* +* Where +* K-1 N +* R(K,L) = SUM [A(I,K)'*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)']. +* I=1 J=L+1 +* +* Start column loop (index = L) +* L1 (L2): column index of the first (last) row of X(K,L) +* + LNEXT = N + DO 190 L = N, 1, -1 + IF( L.GT.LNEXT ) + $ GO TO 190 + IF( L.EQ.1 ) THEN + L1 = L + L2 = L + ELSE + IF( B( L, L-1 ).NE.ZERO ) THEN + L1 = L - 1 + L2 = L + LNEXT = L - 2 + ELSE + L1 = L + L2 = L + LNEXT = L - 1 + END IF + END IF +* +* Start row loop (index = K) +* K1 (K2): row index of the first (last) row of X(K,L) +* + KNEXT = 1 + DO 180 K = 1, M + IF( K.LT.KNEXT ) + $ GO TO 180 + IF( K.EQ.M ) THEN + K1 = K + K2 = K + ELSE + IF( A( K+1, K ).NE.ZERO ) THEN + K1 = K + K2 = K + 1 + KNEXT = K + 2 + ELSE + K1 = K + K2 = K + KNEXT = K + 1 + END IF + END IF +* + IF( L1.EQ.L2 .AND. K1.EQ.K2 ) THEN + SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 ) + SUMR = SDOT( N-L1, C( K1, MIN( L1+1, N ) ), LDC, + $ B( L1, MIN( L1+1, N ) ), LDB ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) + SCALOC = ONE +* + A11 = A( K1, K1 ) + SGN*B( L1, L1 ) + DA11 = ABS( A11 ) + IF( DA11.LE.SMIN ) THEN + A11 = SMIN + DA11 = SMIN + INFO = 1 + END IF + DB = ABS( VEC( 1, 1 ) ) + IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN + IF( DB.GT.BIGNUM*DA11 ) + $ SCALOC = ONE / DB + END IF + X( 1, 1 ) = ( VEC( 1, 1 )*SCALOC ) / A11 +* + IF( SCALOC.NE.ONE ) THEN + DO 140 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 140 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) +* + ELSE IF( L1.EQ.L2 .AND. K1.NE.K2 ) THEN +* + SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 ) + SUMR = SDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( K1-1, A( 1, K2 ), 1, C( 1, L1 ), 1 ) + SUMR = SDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR ) +* + CALL SLALN2( .TRUE., 2, 1, SMIN, ONE, A( K1, K1 ), + $ LDA, ONE, ONE, VEC, 2, -SGN*B( L1, L1 ), + $ ZERO, X, 2, SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 150 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 150 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K2, L1 ) = X( 2, 1 ) +* + ELSE IF( L1.NE.L2 .AND. K1.EQ.K2 ) THEN +* + SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 ) + SUMR = SDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 1, 1 ) = SGN*( C( K1, L1 )-( SUML+SGN*SUMR ) ) +* + SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L2 ), 1 ) + SUMR = SDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L2, MIN( L2+1, N ) ), LDB ) + VEC( 2, 1 ) = SGN*( C( K1, L2 )-( SUML+SGN*SUMR ) ) +* + CALL SLALN2( .FALSE., 2, 1, SMIN, ONE, B( L1, L1 ), + $ LDB, ONE, ONE, VEC, 2, -SGN*A( K1, K1 ), + $ ZERO, X, 2, SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 160 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 160 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K1, L2 ) = X( 2, 1 ) +* + ELSE IF( L1.NE.L2 .AND. K1.NE.K2 ) THEN +* + SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 ) + SUMR = SDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L2 ), 1 ) + SUMR = SDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L2, MIN( L2+1, N ) ), LDB ) + VEC( 1, 2 ) = C( K1, L2 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( K1-1, A( 1, K2 ), 1, C( 1, L1 ), 1 ) + SUMR = SDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( K1-1, A( 1, K2 ), 1, C( 1, L2 ), 1 ) + SUMR = SDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC, + $ B( L2, MIN(L2+1, N ) ), LDB ) + VEC( 2, 2 ) = C( K2, L2 ) - ( SUML+SGN*SUMR ) +* + CALL SLASY2( .TRUE., .TRUE., ISGN, 2, 2, A( K1, K1 ), + $ LDA, B( L1, L1 ), LDB, VEC, 2, SCALOC, X, + $ 2, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 170 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 170 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K1, L2 ) = X( 1, 2 ) + C( K2, L1 ) = X( 2, 1 ) + C( K2, L2 ) = X( 2, 2 ) + END IF +* + 180 CONTINUE + 190 CONTINUE +* + ELSE IF( NOTRNA .AND. .NOT.NOTRNB ) THEN +* +* Solve A*X + ISGN*X*B' = scale*C. +* +* The (K,L)th block of X is determined starting from +* bottom-right corner column by column by +* +* A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L)' = C(K,L) - R(K,L) +* +* Where +* M N +* R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)']. +* I=K+1 J=L+1 +* +* Start column loop (index = L) +* L1 (L2): column index of the first (last) row of X(K,L) +* + LNEXT = N + DO 250 L = N, 1, -1 + IF( L.GT.LNEXT ) + $ GO TO 250 + IF( L.EQ.1 ) THEN + L1 = L + L2 = L + ELSE + IF( B( L, L-1 ).NE.ZERO ) THEN + L1 = L - 1 + L2 = L + LNEXT = L - 2 + ELSE + L1 = L + L2 = L + LNEXT = L - 1 + END IF + END IF +* +* Start row loop (index = K) +* K1 (K2): row index of the first (last) row of X(K,L) +* + KNEXT = M + DO 240 K = M, 1, -1 + IF( K.GT.KNEXT ) + $ GO TO 240 + IF( K.EQ.1 ) THEN + K1 = K + K2 = K + ELSE + IF( A( K, K-1 ).NE.ZERO ) THEN + K1 = K - 1 + K2 = K + KNEXT = K - 2 + ELSE + K1 = K + K2 = K + KNEXT = K - 1 + END IF + END IF +* + IF( L1.EQ.L2 .AND. K1.EQ.K2 ) THEN + SUML = SDOT( M-K1, A( K1, MIN(K1+1, M ) ), LDA, + $ C( MIN( K1+1, M ), L1 ), 1 ) + SUMR = SDOT( N-L1, C( K1, MIN( L1+1, N ) ), LDC, + $ B( L1, MIN( L1+1, N ) ), LDB ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) + SCALOC = ONE +* + A11 = A( K1, K1 ) + SGN*B( L1, L1 ) + DA11 = ABS( A11 ) + IF( DA11.LE.SMIN ) THEN + A11 = SMIN + DA11 = SMIN + INFO = 1 + END IF + DB = ABS( VEC( 1, 1 ) ) + IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN + IF( DB.GT.BIGNUM*DA11 ) + $ SCALOC = ONE / DB + END IF + X( 1, 1 ) = ( VEC( 1, 1 )*SCALOC ) / A11 +* + IF( SCALOC.NE.ONE ) THEN + DO 200 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 200 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) +* + ELSE IF( L1.EQ.L2 .AND. K1.NE.K2 ) THEN +* + SUML = SDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L1 ), 1 ) + SUMR = SDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L1 ), 1 ) + SUMR = SDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR ) +* + CALL SLALN2( .FALSE., 2, 1, SMIN, ONE, A( K1, K1 ), + $ LDA, ONE, ONE, VEC, 2, -SGN*B( L1, L1 ), + $ ZERO, X, 2, SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 210 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 210 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K2, L1 ) = X( 2, 1 ) +* + ELSE IF( L1.NE.L2 .AND. K1.EQ.K2 ) THEN +* + SUML = SDOT( M-K1, A( K1, MIN( K1+1, M ) ), LDA, + $ C( MIN( K1+1, M ), L1 ), 1 ) + SUMR = SDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 1, 1 ) = SGN*( C( K1, L1 )-( SUML+SGN*SUMR ) ) +* + SUML = SDOT( M-K1, A( K1, MIN( K1+1, M ) ), LDA, + $ C( MIN( K1+1, M ), L2 ), 1 ) + SUMR = SDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L2, MIN( L2+1, N ) ), LDB ) + VEC( 2, 1 ) = SGN*( C( K1, L2 )-( SUML+SGN*SUMR ) ) +* + CALL SLALN2( .FALSE., 2, 1, SMIN, ONE, B( L1, L1 ), + $ LDB, ONE, ONE, VEC, 2, -SGN*A( K1, K1 ), + $ ZERO, X, 2, SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 220 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 220 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K1, L2 ) = X( 2, 1 ) +* + ELSE IF( L1.NE.L2 .AND. K1.NE.K2 ) THEN +* + SUML = SDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L1 ), 1 ) + SUMR = SDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L2 ), 1 ) + SUMR = SDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L2, MIN( L2+1, N ) ), LDB ) + VEC( 1, 2 ) = C( K1, L2 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L1 ), 1 ) + SUMR = SDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L2 ), 1 ) + SUMR = SDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC, + $ B( L2, MIN( L2+1, N ) ), LDB ) + VEC( 2, 2 ) = C( K2, L2 ) - ( SUML+SGN*SUMR ) +* + CALL SLASY2( .FALSE., .TRUE., ISGN, 2, 2, A( K1, K1 ), + $ LDA, B( L1, L1 ), LDB, VEC, 2, SCALOC, X, + $ 2, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 230 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 230 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K1, L2 ) = X( 1, 2 ) + C( K2, L1 ) = X( 2, 1 ) + C( K2, L2 ) = X( 2, 2 ) + END IF +* + 240 CONTINUE + 250 CONTINUE +* + END IF +* + RETURN +* +* End of STRSYL +* + END diff --git a/costa/native/external/lapack/strti2.f b/costa/native/external/lapack/strti2.f new file mode 100644 index 000000000..56d7d6a93 --- /dev/null +++ b/costa/native/external/lapack/strti2.f @@ -0,0 +1,147 @@ + SUBROUTINE STRTI2( UPLO, DIAG, N, A, LDA, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER DIAG, UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* STRTI2 computes the inverse of a real upper or lower triangular +* matrix. +* +* This is the Level 2 BLAS version of the algorithm. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the matrix A is upper or lower triangular. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* DIAG (input) CHARACTER*1 +* Specifies whether or not the matrix A is unit triangular. +* = 'N': Non-unit triangular +* = 'U': Unit triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the triangular matrix A. If UPLO = 'U', the +* leading n by n upper triangular part of the array A contains +* the upper triangular matrix, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n by n lower triangular part of the array A contains +* the lower triangular matrix, and the strictly upper +* triangular part of A is not referenced. If DIAG = 'U', the +* diagonal elements of A are also not referenced and are +* assumed to be 1. +* +* On exit, the (triangular) inverse of the original matrix, in +* the same storage format. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J + REAL AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, STRMV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STRTI2', -INFO ) + RETURN + END IF +* + IF( UPPER ) THEN +* +* Compute inverse of upper triangular matrix. +* + DO 10 J = 1, N + IF( NOUNIT ) THEN + A( J, J ) = ONE / A( J, J ) + AJJ = -A( J, J ) + ELSE + AJJ = -ONE + END IF +* +* Compute elements 1:j-1 of j-th column. +* + CALL STRMV( 'Upper', 'No transpose', DIAG, J-1, A, LDA, + $ A( 1, J ), 1 ) + CALL SSCAL( J-1, AJJ, A( 1, J ), 1 ) + 10 CONTINUE + ELSE +* +* Compute inverse of lower triangular matrix. +* + DO 20 J = N, 1, -1 + IF( NOUNIT ) THEN + A( J, J ) = ONE / A( J, J ) + AJJ = -A( J, J ) + ELSE + AJJ = -ONE + END IF + IF( J.LT.N ) THEN +* +* Compute elements j+1:n of j-th column. +* + CALL STRMV( 'Lower', 'No transpose', DIAG, N-J, + $ A( J+1, J+1 ), LDA, A( J+1, J ), 1 ) + CALL SSCAL( N-J, AJJ, A( J+1, J ), 1 ) + END IF + 20 CONTINUE + END IF +* + RETURN +* +* End of STRTI2 +* + END diff --git a/costa/native/external/lapack/strtri.f b/costa/native/external/lapack/strtri.f new file mode 100644 index 000000000..f8dfac142 --- /dev/null +++ b/costa/native/external/lapack/strtri.f @@ -0,0 +1,177 @@ + SUBROUTINE STRTRI( UPLO, DIAG, N, A, LDA, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER DIAG, UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* STRTRI computes the inverse of a real upper or lower triangular +* matrix A. +* +* This is the Level 3 BLAS version of the algorithm. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the triangular matrix A. If UPLO = 'U', the +* leading N-by-N upper triangular part of the array A contains +* the upper triangular matrix, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of the array A contains +* the lower triangular matrix, and the strictly upper +* triangular part of A is not referenced. If DIAG = 'U', the +* diagonal elements of A are also not referenced and are +* assumed to be 1. +* On exit, the (triangular) inverse of the original matrix, in +* the same storage format. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, A(i,i) is exactly zero. The triangular +* matrix is singular and its inverse can not be computed. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J, JB, NB, NN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL STRMM, STRSM, STRTI2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STRTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check for singularity if non-unit. +* + IF( NOUNIT ) THEN + DO 10 INFO = 1, N + IF( A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + INFO = 0 + END IF +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code +* + CALL STRTI2( UPLO, DIAG, N, A, LDA, INFO ) + ELSE +* +* Use blocked code +* + IF( UPPER ) THEN +* +* Compute inverse of upper triangular matrix +* + DO 20 J = 1, N, NB + JB = MIN( NB, N-J+1 ) +* +* Compute rows 1:j-1 of current block column +* + CALL STRMM( 'Left', 'Upper', 'No transpose', DIAG, J-1, + $ JB, ONE, A, LDA, A( 1, J ), LDA ) + CALL STRSM( 'Right', 'Upper', 'No transpose', DIAG, J-1, + $ JB, -ONE, A( J, J ), LDA, A( 1, J ), LDA ) +* +* Compute inverse of current diagonal block +* + CALL STRTI2( 'Upper', DIAG, JB, A( J, J ), LDA, INFO ) + 20 CONTINUE + ELSE +* +* Compute inverse of lower triangular matrix +* + NN = ( ( N-1 ) / NB )*NB + 1 + DO 30 J = NN, 1, -NB + JB = MIN( NB, N-J+1 ) + IF( J+JB.LE.N ) THEN +* +* Compute rows j+jb:n of current block column +* + CALL STRMM( 'Left', 'Lower', 'No transpose', DIAG, + $ N-J-JB+1, JB, ONE, A( J+JB, J+JB ), LDA, + $ A( J+JB, J ), LDA ) + CALL STRSM( 'Right', 'Lower', 'No transpose', DIAG, + $ N-J-JB+1, JB, -ONE, A( J, J ), LDA, + $ A( J+JB, J ), LDA ) + END IF +* +* Compute inverse of current diagonal block +* + CALL STRTI2( 'Lower', DIAG, JB, A( J, J ), LDA, INFO ) + 30 CONTINUE + END IF + END IF +* + RETURN +* +* End of STRTRI +* + END diff --git a/costa/native/external/lapack/strtrs.f b/costa/native/external/lapack/strtrs.f new file mode 100644 index 000000000..9be345ccf --- /dev/null +++ b/costa/native/external/lapack/strtrs.f @@ -0,0 +1,148 @@ + SUBROUTINE STRTRS( UPLO, TRANS, DIAG, N, NRHS, A, LDA, B, LDB, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* STRTRS solves a triangular system of the form +* +* A * X = B or A**T * X = B, +* +* where A is a triangular matrix of order N, and B is an N-by-NRHS +* matrix. A check is made to verify that A is nonsingular. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose = Transpose) +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* A (input) REAL array, dimension (LDA,N) +* The triangular matrix A. If UPLO = 'U', the leading N-by-N +* upper triangular part of the array A contains the upper +* triangular matrix, and the strictly lower triangular part of +* A is not referenced. If UPLO = 'L', the leading N-by-N lower +* triangular part of the array A contains the lower triangular +* matrix, and the strictly upper triangular part of A is not +* referenced. If DIAG = 'U', the diagonal elements of A are +* also not referenced and are assumed to be 1. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) REAL array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, if INFO = 0, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the i-th diagonal element of A is zero, +* indicating that the matrix is singular and the solutions +* X have not been computed. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL STRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT. + $ LSAME( TRANS, 'T' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STRTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check for singularity. +* + IF( NOUNIT ) THEN + DO 10 INFO = 1, N + IF( A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + END IF + INFO = 0 +* +* Solve A * x = b or A' * x = b. +* + CALL STRSM( 'Left', UPLO, TRANS, DIAG, N, NRHS, ONE, A, LDA, B, + $ LDB ) +* + RETURN +* +* End of STRTRS +* + END diff --git a/costa/native/external/lapack/stzrqf.f b/costa/native/external/lapack/stzrqf.f new file mode 100644 index 000000000..3de793103 --- /dev/null +++ b/costa/native/external/lapack/stzrqf.f @@ -0,0 +1,165 @@ + SUBROUTINE STZRQF( M, N, A, LDA, TAU, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ) +* .. +* +* Purpose +* ======= +* +* This routine is deprecated and has been replaced by routine STZRZF. +* +* STZRQF reduces the M-by-N ( M<=N ) real upper trapezoidal matrix A +* to upper triangular form by means of orthogonal transformations. +* +* The upper trapezoidal matrix A is factored as +* +* A = ( R 0 ) * Z, +* +* where Z is an N-by-N orthogonal matrix and R is an M-by-M upper +* triangular matrix. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= M. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the leading M-by-N upper trapezoidal part of the +* array A must contain the matrix to be factorized. +* On exit, the leading M-by-M upper triangular part of A +* contains the upper triangular matrix R, and elements M+1 to +* N of the first M rows of A, with the array TAU, represent the +* orthogonal matrix Z as a product of M elementary reflectors. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) REAL array, dimension (M) +* The scalar factors of the elementary reflectors. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The factorization is obtained by Householder's method. The kth +* transformation matrix, Z( k ), which is used to introduce zeros into +* the ( m - k + 1 )th row of A, is given in the form +* +* Z( k ) = ( I 0 ), +* ( 0 T( k ) ) +* +* where +* +* T( k ) = I - tau*u( k )*u( k )', u( k ) = ( 1 ), +* ( 0 ) +* ( z( k ) ) +* +* tau is a scalar and z( k ) is an ( n - m ) element vector. +* tau and z( k ) are chosen to annihilate the elements of the kth row +* of X. +* +* The scalar tau is returned in the kth element of TAU and the vector +* u( k ) in the kth row of A, such that the elements of z( k ) are +* in a( k, m + 1 ), ..., a( k, n ). The elements of R are returned in +* the upper triangular part of A. +* +* Z is given by +* +* Z = Z( 1 ) * Z( 2 ) * ... * Z( m ). +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, K, M1 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SGEMV, SGER, SLARFG, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STZRQF', -INFO ) + RETURN + END IF +* +* Perform the factorization. +* + IF( M.EQ.0 ) + $ RETURN + IF( M.EQ.N ) THEN + DO 10 I = 1, N + TAU( I ) = ZERO + 10 CONTINUE + ELSE + M1 = MIN( M+1, N ) + DO 20 K = M, 1, -1 +* +* Use a Householder reflection to zero the kth row of A. +* First set up the reflection. +* + CALL SLARFG( N-M+1, A( K, K ), A( K, M1 ), LDA, TAU( K ) ) +* + IF( ( TAU( K ).NE.ZERO ) .AND. ( K.GT.1 ) ) THEN +* +* We now perform the operation A := A*P( k ). +* +* Use the first ( k - 1 ) elements of TAU to store a( k ), +* where a( k ) consists of the first ( k - 1 ) elements of +* the kth column of A. Also let B denote the first +* ( k - 1 ) rows of the last ( n - m ) columns of A. +* + CALL SCOPY( K-1, A( 1, K ), 1, TAU, 1 ) +* +* Form w = a( k ) + B*z( k ) in TAU. +* + CALL SGEMV( 'No transpose', K-1, N-M, ONE, A( 1, M1 ), + $ LDA, A( K, M1 ), LDA, ONE, TAU, 1 ) +* +* Now form a( k ) := a( k ) - tau*w +* and B := B - tau*w*z( k )'. +* + CALL SAXPY( K-1, -TAU( K ), TAU, 1, A( 1, K ), 1 ) + CALL SGER( K-1, N-M, -TAU( K ), TAU, 1, A( K, M1 ), LDA, + $ A( 1, M1 ), LDA ) + END IF + 20 CONTINUE + END IF +* + RETURN +* +* End of STZRQF +* + END diff --git a/costa/native/external/lapack/stzrzf.f b/costa/native/external/lapack/stzrzf.f new file mode 100644 index 000000000..eb198bccb --- /dev/null +++ b/costa/native/external/lapack/stzrzf.f @@ -0,0 +1,241 @@ + SUBROUTINE STZRZF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* STZRZF reduces the M-by-N ( M<=N ) real upper trapezoidal matrix A +* to upper triangular form by means of orthogonal transformations. +* +* The upper trapezoidal matrix A is factored as +* +* A = ( R 0 ) * Z, +* +* where Z is an N-by-N orthogonal matrix and R is an M-by-M upper +* triangular matrix. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) REAL array, dimension (LDA,N) +* On entry, the leading M-by-N upper trapezoidal part of the +* array A must contain the matrix to be factorized. +* On exit, the leading M-by-M upper triangular part of A +* contains the upper triangular matrix R, and elements M+1 to +* N of the first M rows of A, with the array TAU, represent the +* orthogonal matrix Z as a product of M elementary reflectors. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) REAL array, dimension (M) +* The scalar factors of the elementary reflectors. +* +* WORK (workspace/output) REAL array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,M). +* For optimum performance LWORK >= M*NB, where NB is +* the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* The factorization is obtained by Householder's method. The kth +* transformation matrix, Z( k ), which is used to introduce zeros into +* the ( m - k + 1 )th row of A, is given in the form +* +* Z( k ) = ( I 0 ), +* ( 0 T( k ) ) +* +* where +* +* T( k ) = I - tau*u( k )*u( k )', u( k ) = ( 1 ), +* ( 0 ) +* ( z( k ) ) +* +* tau is a scalar and z( k ) is an ( n - m ) element vector. +* tau and z( k ) are chosen to annihilate the elements of the kth row +* of X. +* +* The scalar tau is returned in the kth element of TAU and the vector +* u( k ) in the kth row of A, such that the elements of z( k ) are +* in a( k, m + 1 ), ..., a( k, n ). The elements of R are returned in +* the upper triangular part of A. +* +* Z is given by +* +* Z = Z( 1 ) * Z( 2 ) * ... * Z( m ). +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IWS, KI, KK, LDWORK, LWKOPT, M1, MU, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL SLARZB, SLARZT, SLATRZ, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. +* + NB = ILAENV( 1, 'SGERQF', ' ', M, N, -1, -1 ) + LWKOPT = M*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STZRZF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + ELSE IF( M.EQ.N ) THEN + DO 10 I = 1, N + TAU( I ) = ZERO + 10 CONTINUE + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 1 + IWS = M + IF( NB.GT.1 .AND. NB.LT.M ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'SGERQF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.M ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SGERQF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.M .AND. NX.LT.M ) THEN +* +* Use blocked code initially. +* The last kk rows are handled by the block method. +* + M1 = MIN( M+1, N ) + KI = ( ( M-NX-1 ) / NB )*NB + KK = MIN( M, KI+NB ) +* + DO 20 I = M - KK + KI + 1, M - KK + 1, -NB + IB = MIN( M-I+1, NB ) +* +* Compute the TZ factorization of the current block +* A(i:i+ib-1,i:n) +* + CALL SLATRZ( IB, N-I+1, N-M, A( I, I ), LDA, TAU( I ), + $ WORK ) + IF( I.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL SLARZT( 'Backward', 'Rowwise', N-M, IB, A( I, M1 ), + $ LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(1:i-1,i:n) from the right +* + CALL SLARZB( 'Right', 'No transpose', 'Backward', + $ 'Rowwise', I-1, N-I+1, IB, N-M, A( I, M1 ), + $ LDA, WORK, LDWORK, A( 1, I ), LDA, + $ WORK( IB+1 ), LDWORK ) + END IF + 20 CONTINUE + MU = I + NB - 1 + ELSE + MU = M + END IF +* +* Use unblocked code to factor the last or only block +* + IF( MU.GT.0 ) + $ CALL SLATRZ( MU, N, N-M, A, LDA, TAU, WORK ) +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of STZRZF +* + END diff --git a/costa/native/external/lapack/xerbla.f b/costa/native/external/lapack/xerbla.f new file mode 100644 index 000000000..6e11175f3 --- /dev/null +++ b/costa/native/external/lapack/xerbla.f @@ -0,0 +1,46 @@ + SUBROUTINE XERBLA( SRNAME, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER*6 SRNAME + INTEGER INFO +* .. +* +* Purpose +* ======= +* +* XERBLA is an error handler for the LAPACK routines. +* It is called by an LAPACK routine if an input parameter has an +* invalid value. A message is printed and execution stops. +* +* Installers may consider modifying the STOP statement in order to +* call system-specific exception-handling facilities. +* +* Arguments +* ========= +* +* SRNAME (input) CHARACTER*6 +* The name of the routine which called XERBLA. +* +* INFO (input) INTEGER +* The position of the invalid parameter in the parameter list +* of the calling routine. +* +* ===================================================================== +* +* .. Executable Statements .. +* + WRITE( *, FMT = 9999 )SRNAME, INFO +* + STOP +* + 9999 FORMAT( ' ** On entry to ', A6, ' parameter number ', I2, ' had ', + $ 'an illegal value' ) +* +* End of XERBLA +* + END diff --git a/costa/native/external/lapack/zbdsqr.f b/costa/native/external/lapack/zbdsqr.f new file mode 100644 index 000000000..41f809263 --- /dev/null +++ b/costa/native/external/lapack/zbdsqr.f @@ -0,0 +1,733 @@ + SUBROUTINE ZBDSQR( UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U, + $ LDU, C, LDC, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ), RWORK( * ) + COMPLEX*16 C( LDC, * ), U( LDU, * ), VT( LDVT, * ) +* .. +* +* Purpose +* ======= +* +* ZBDSQR computes the singular value decomposition (SVD) of a real +* N-by-N (upper or lower) bidiagonal matrix B: B = Q * S * P' (P' +* denotes the transpose of P), where S is a diagonal matrix with +* non-negative diagonal elements (the singular values of B), and Q +* and P are orthogonal matrices. +* +* The routine computes S, and optionally computes U * Q, P' * VT, +* or Q' * C, for given complex input matrices U, VT, and C. +* +* See "Computing Small Singular Values of Bidiagonal Matrices With +* Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, +* LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11, +* no. 5, pp. 873-912, Sept 1990) and +* "Accurate singular values and differential qd algorithms," by +* B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics +* Department, University of California at Berkeley, July 1992 +* for a detailed description of the algorithm. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': B is upper bidiagonal; +* = 'L': B is lower bidiagonal. +* +* N (input) INTEGER +* The order of the matrix B. N >= 0. +* +* NCVT (input) INTEGER +* The number of columns of the matrix VT. NCVT >= 0. +* +* NRU (input) INTEGER +* The number of rows of the matrix U. NRU >= 0. +* +* NCC (input) INTEGER +* The number of columns of the matrix C. NCC >= 0. +* +* D (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the n diagonal elements of the bidiagonal matrix B. +* On exit, if INFO=0, the singular values of B in decreasing +* order. +* +* E (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the elements of E contain the +* offdiagonal elements of of the bidiagonal matrix whose SVD +* is desired. On normal exit (INFO = 0), E is destroyed. +* If the algorithm does not converge (INFO > 0), D and E +* will contain the diagonal and superdiagonal elements of a +* bidiagonal matrix orthogonally equivalent to the one given +* as input. E(N) is used for workspace. +* +* VT (input/output) COMPLEX*16 array, dimension (LDVT, NCVT) +* On entry, an N-by-NCVT matrix VT. +* On exit, VT is overwritten by P' * VT. +* VT is not referenced if NCVT = 0. +* +* LDVT (input) INTEGER +* The leading dimension of the array VT. +* LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0. +* +* U (input/output) COMPLEX*16 array, dimension (LDU, N) +* On entry, an NRU-by-N matrix U. +* On exit, U is overwritten by U * Q. +* U is not referenced if NRU = 0. +* +* LDU (input) INTEGER +* The leading dimension of the array U. LDU >= max(1,NRU). +* +* C (input/output) COMPLEX*16 array, dimension (LDC, NCC) +* On entry, an N-by-NCC matrix C. +* On exit, C is overwritten by Q' * C. +* C is not referenced if NCC = 0. +* +* LDC (input) INTEGER +* The leading dimension of the array C. +* LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0. +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (4*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: If INFO = -i, the i-th argument had an illegal value +* > 0: the algorithm did not converge; D and E contain the +* elements of a bidiagonal matrix which is orthogonally +* similar to the input matrix B; if INFO = i, i +* elements of E have not converged to zero. +* +* Internal Parameters +* =================== +* +* TOLMUL DOUBLE PRECISION, default = max(10,min(100,EPS**(-1/8))) +* TOLMUL controls the convergence criterion of the QR loop. +* If it is positive, TOLMUL*EPS is the desired relative +* precision in the computed singular values. +* If it is negative, abs(TOLMUL*EPS*sigma_max) is the +* desired absolute accuracy in the computed singular +* values (corresponds to relative accuracy +* abs(TOLMUL*EPS) in the largest singular value. +* abs(TOLMUL) should be between 1 and 1/EPS, and preferably +* between 10 (for fast convergence) and .1/EPS +* (for there to be some accuracy in the results). +* Default is to lose at either one eighth or 2 of the +* available decimal digits in each computed singular value +* (whichever is smaller). +* +* MAXITR INTEGER, default = 6 +* MAXITR controls the maximum number of passes of the +* algorithm through its inner loop. The algorithms stops +* (and so fails to converge) if the number of passes +* through the inner loop exceeds MAXITR*N**2. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D0 ) + DOUBLE PRECISION NEGONE + PARAMETER ( NEGONE = -1.0D0 ) + DOUBLE PRECISION HNDRTH + PARAMETER ( HNDRTH = 0.01D0 ) + DOUBLE PRECISION TEN + PARAMETER ( TEN = 10.0D0 ) + DOUBLE PRECISION HNDRD + PARAMETER ( HNDRD = 100.0D0 ) + DOUBLE PRECISION MEIGTH + PARAMETER ( MEIGTH = -0.125D0 ) + INTEGER MAXITR + PARAMETER ( MAXITR = 6 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, ROTATE + INTEGER I, IDIR, ISUB, ITER, J, LL, LLL, M, MAXIT, NM1, + $ NM12, NM13, OLDLL, OLDM + DOUBLE PRECISION ABSE, ABSS, COSL, COSR, CS, EPS, F, G, H, MU, + $ OLDCS, OLDSN, R, SHIFT, SIGMN, SIGMX, SINL, + $ SINR, SLL, SMAX, SMIN, SMINL, SMINLO, SMINOA, + $ SN, THRESH, TOL, TOLMUL, UNFL +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DLARTG, DLAS2, DLASQ1, DLASV2, XERBLA, ZDROT, + $ ZDSCAL, ZLASR, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, MAX, MIN, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LOWER ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NCVT.LT.0 ) THEN + INFO = -3 + ELSE IF( NRU.LT.0 ) THEN + INFO = -4 + ELSE IF( NCC.LT.0 ) THEN + INFO = -5 + ELSE IF( ( NCVT.EQ.0 .AND. LDVT.LT.1 ) .OR. + $ ( NCVT.GT.0 .AND. LDVT.LT.MAX( 1, N ) ) ) THEN + INFO = -9 + ELSE IF( LDU.LT.MAX( 1, NRU ) ) THEN + INFO = -11 + ELSE IF( ( NCC.EQ.0 .AND. LDC.LT.1 ) .OR. + $ ( NCC.GT.0 .AND. LDC.LT.MAX( 1, N ) ) ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZBDSQR', -INFO ) + RETURN + END IF + IF( N.EQ.0 ) + $ RETURN + IF( N.EQ.1 ) + $ GO TO 160 +* +* ROTATE is true if any singular vectors desired, false otherwise +* + ROTATE = ( NCVT.GT.0 ) .OR. ( NRU.GT.0 ) .OR. ( NCC.GT.0 ) +* +* If no singular vectors desired, use qd algorithm +* + IF( .NOT.ROTATE ) THEN + CALL DLASQ1( N, D, E, RWORK, INFO ) + RETURN + END IF +* + NM1 = N - 1 + NM12 = NM1 + NM1 + NM13 = NM12 + NM1 + IDIR = 0 +* +* Get machine constants +* + EPS = DLAMCH( 'Epsilon' ) + UNFL = DLAMCH( 'Safe minimum' ) +* +* If matrix lower bidiagonal, rotate to be upper bidiagonal +* by applying Givens rotations on the left +* + IF( LOWER ) THEN + DO 10 I = 1, N - 1 + CALL DLARTG( D( I ), E( I ), CS, SN, R ) + D( I ) = R + E( I ) = SN*D( I+1 ) + D( I+1 ) = CS*D( I+1 ) + RWORK( I ) = CS + RWORK( NM1+I ) = SN + 10 CONTINUE +* +* Update singular vectors if desired +* + IF( NRU.GT.0 ) + $ CALL ZLASR( 'R', 'V', 'F', NRU, N, RWORK( 1 ), RWORK( N ), + $ U, LDU ) + IF( NCC.GT.0 ) + $ CALL ZLASR( 'L', 'V', 'F', N, NCC, RWORK( 1 ), RWORK( N ), + $ C, LDC ) + END IF +* +* Compute singular values to relative accuracy TOL +* (By setting TOL to be negative, algorithm will compute +* singular values to absolute accuracy ABS(TOL)*norm(input matrix)) +* + TOLMUL = MAX( TEN, MIN( HNDRD, EPS**MEIGTH ) ) + TOL = TOLMUL*EPS +* +* Compute approximate maximum, minimum singular values +* + SMAX = ZERO + DO 20 I = 1, N + SMAX = MAX( SMAX, ABS( D( I ) ) ) + 20 CONTINUE + DO 30 I = 1, N - 1 + SMAX = MAX( SMAX, ABS( E( I ) ) ) + 30 CONTINUE + SMINL = ZERO + IF( TOL.GE.ZERO ) THEN +* +* Relative accuracy desired +* + SMINOA = ABS( D( 1 ) ) + IF( SMINOA.EQ.ZERO ) + $ GO TO 50 + MU = SMINOA + DO 40 I = 2, N + MU = ABS( D( I ) )*( MU / ( MU+ABS( E( I-1 ) ) ) ) + SMINOA = MIN( SMINOA, MU ) + IF( SMINOA.EQ.ZERO ) + $ GO TO 50 + 40 CONTINUE + 50 CONTINUE + SMINOA = SMINOA / SQRT( DBLE( N ) ) + THRESH = MAX( TOL*SMINOA, MAXITR*N*N*UNFL ) + ELSE +* +* Absolute accuracy desired +* + THRESH = MAX( ABS( TOL )*SMAX, MAXITR*N*N*UNFL ) + END IF +* +* Prepare for main iteration loop for the singular values +* (MAXIT is the maximum number of passes through the inner +* loop permitted before nonconvergence signalled.) +* + MAXIT = MAXITR*N*N + ITER = 0 + OLDLL = -1 + OLDM = -1 +* +* M points to last element of unconverged part of matrix +* + M = N +* +* Begin main iteration loop +* + 60 CONTINUE +* +* Check for convergence or exceeding iteration count +* + IF( M.LE.1 ) + $ GO TO 160 + IF( ITER.GT.MAXIT ) + $ GO TO 200 +* +* Find diagonal block of matrix to work on +* + IF( TOL.LT.ZERO .AND. ABS( D( M ) ).LE.THRESH ) + $ D( M ) = ZERO + SMAX = ABS( D( M ) ) + SMIN = SMAX + DO 70 LLL = 1, M - 1 + LL = M - LLL + ABSS = ABS( D( LL ) ) + ABSE = ABS( E( LL ) ) + IF( TOL.LT.ZERO .AND. ABSS.LE.THRESH ) + $ D( LL ) = ZERO + IF( ABSE.LE.THRESH ) + $ GO TO 80 + SMIN = MIN( SMIN, ABSS ) + SMAX = MAX( SMAX, ABSS, ABSE ) + 70 CONTINUE + LL = 0 + GO TO 90 + 80 CONTINUE + E( LL ) = ZERO +* +* Matrix splits since E(LL) = 0 +* + IF( LL.EQ.M-1 ) THEN +* +* Convergence of bottom singular value, return to top of loop +* + M = M - 1 + GO TO 60 + END IF + 90 CONTINUE + LL = LL + 1 +* +* E(LL) through E(M-1) are nonzero, E(LL-1) is zero +* + IF( LL.EQ.M-1 ) THEN +* +* 2 by 2 block, handle separately +* + CALL DLASV2( D( M-1 ), E( M-1 ), D( M ), SIGMN, SIGMX, SINR, + $ COSR, SINL, COSL ) + D( M-1 ) = SIGMX + E( M-1 ) = ZERO + D( M ) = SIGMN +* +* Compute singular vectors, if desired +* + IF( NCVT.GT.0 ) + $ CALL ZDROT( NCVT, VT( M-1, 1 ), LDVT, VT( M, 1 ), LDVT, + $ COSR, SINR ) + IF( NRU.GT.0 ) + $ CALL ZDROT( NRU, U( 1, M-1 ), 1, U( 1, M ), 1, COSL, SINL ) + IF( NCC.GT.0 ) + $ CALL ZDROT( NCC, C( M-1, 1 ), LDC, C( M, 1 ), LDC, COSL, + $ SINL ) + M = M - 2 + GO TO 60 + END IF +* +* If working on new submatrix, choose shift direction +* (from larger end diagonal element towards smaller) +* + IF( LL.GT.OLDM .OR. M.LT.OLDLL ) THEN + IF( ABS( D( LL ) ).GE.ABS( D( M ) ) ) THEN +* +* Chase bulge from top (big end) to bottom (small end) +* + IDIR = 1 + ELSE +* +* Chase bulge from bottom (big end) to top (small end) +* + IDIR = 2 + END IF + END IF +* +* Apply convergence tests +* + IF( IDIR.EQ.1 ) THEN +* +* Run convergence test in forward direction +* First apply standard test to bottom of matrix +* + IF( ABS( E( M-1 ) ).LE.ABS( TOL )*ABS( D( M ) ) .OR. + $ ( TOL.LT.ZERO .AND. ABS( E( M-1 ) ).LE.THRESH ) ) THEN + E( M-1 ) = ZERO + GO TO 60 + END IF +* + IF( TOL.GE.ZERO ) THEN +* +* If relative accuracy desired, +* apply convergence criterion forward +* + MU = ABS( D( LL ) ) + SMINL = MU + DO 100 LLL = LL, M - 1 + IF( ABS( E( LLL ) ).LE.TOL*MU ) THEN + E( LLL ) = ZERO + GO TO 60 + END IF + SMINLO = SMINL + MU = ABS( D( LLL+1 ) )*( MU / ( MU+ABS( E( LLL ) ) ) ) + SMINL = MIN( SMINL, MU ) + 100 CONTINUE + END IF +* + ELSE +* +* Run convergence test in backward direction +* First apply standard test to top of matrix +* + IF( ABS( E( LL ) ).LE.ABS( TOL )*ABS( D( LL ) ) .OR. + $ ( TOL.LT.ZERO .AND. ABS( E( LL ) ).LE.THRESH ) ) THEN + E( LL ) = ZERO + GO TO 60 + END IF +* + IF( TOL.GE.ZERO ) THEN +* +* If relative accuracy desired, +* apply convergence criterion backward +* + MU = ABS( D( M ) ) + SMINL = MU + DO 110 LLL = M - 1, LL, -1 + IF( ABS( E( LLL ) ).LE.TOL*MU ) THEN + E( LLL ) = ZERO + GO TO 60 + END IF + SMINLO = SMINL + MU = ABS( D( LLL ) )*( MU / ( MU+ABS( E( LLL ) ) ) ) + SMINL = MIN( SMINL, MU ) + 110 CONTINUE + END IF + END IF + OLDLL = LL + OLDM = M +* +* Compute shift. First, test if shifting would ruin relative +* accuracy, and if so set the shift to zero. +* + IF( TOL.GE.ZERO .AND. N*TOL*( SMINL / SMAX ).LE. + $ MAX( EPS, HNDRTH*TOL ) ) THEN +* +* Use a zero shift to avoid loss of relative accuracy +* + SHIFT = ZERO + ELSE +* +* Compute the shift from 2-by-2 block at end of matrix +* + IF( IDIR.EQ.1 ) THEN + SLL = ABS( D( LL ) ) + CALL DLAS2( D( M-1 ), E( M-1 ), D( M ), SHIFT, R ) + ELSE + SLL = ABS( D( M ) ) + CALL DLAS2( D( LL ), E( LL ), D( LL+1 ), SHIFT, R ) + END IF +* +* Test if shift negligible, and if so set to zero +* + IF( SLL.GT.ZERO ) THEN + IF( ( SHIFT / SLL )**2.LT.EPS ) + $ SHIFT = ZERO + END IF + END IF +* +* Increment iteration count +* + ITER = ITER + M - LL +* +* If SHIFT = 0, do simplified QR iteration +* + IF( SHIFT.EQ.ZERO ) THEN + IF( IDIR.EQ.1 ) THEN +* +* Chase bulge from top to bottom +* Save cosines and sines for later singular vector updates +* + CS = ONE + OLDCS = ONE + DO 120 I = LL, M - 1 + CALL DLARTG( D( I )*CS, E( I ), CS, SN, R ) + IF( I.GT.LL ) + $ E( I-1 ) = OLDSN*R + CALL DLARTG( OLDCS*R, D( I+1 )*SN, OLDCS, OLDSN, D( I ) ) + RWORK( I-LL+1 ) = CS + RWORK( I-LL+1+NM1 ) = SN + RWORK( I-LL+1+NM12 ) = OLDCS + RWORK( I-LL+1+NM13 ) = OLDSN + 120 CONTINUE + H = D( M )*CS + D( M ) = H*OLDCS + E( M-1 ) = H*OLDSN +* +* Update singular vectors +* + IF( NCVT.GT.0 ) + $ CALL ZLASR( 'L', 'V', 'F', M-LL+1, NCVT, RWORK( 1 ), + $ RWORK( N ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL ZLASR( 'R', 'V', 'F', NRU, M-LL+1, RWORK( NM12+1 ), + $ RWORK( NM13+1 ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL ZLASR( 'L', 'V', 'F', M-LL+1, NCC, RWORK( NM12+1 ), + $ RWORK( NM13+1 ), C( LL, 1 ), LDC ) +* +* Test convergence +* + IF( ABS( E( M-1 ) ).LE.THRESH ) + $ E( M-1 ) = ZERO +* + ELSE +* +* Chase bulge from bottom to top +* Save cosines and sines for later singular vector updates +* + CS = ONE + OLDCS = ONE + DO 130 I = M, LL + 1, -1 + CALL DLARTG( D( I )*CS, E( I-1 ), CS, SN, R ) + IF( I.LT.M ) + $ E( I ) = OLDSN*R + CALL DLARTG( OLDCS*R, D( I-1 )*SN, OLDCS, OLDSN, D( I ) ) + RWORK( I-LL ) = CS + RWORK( I-LL+NM1 ) = -SN + RWORK( I-LL+NM12 ) = OLDCS + RWORK( I-LL+NM13 ) = -OLDSN + 130 CONTINUE + H = D( LL )*CS + D( LL ) = H*OLDCS + E( LL ) = H*OLDSN +* +* Update singular vectors +* + IF( NCVT.GT.0 ) + $ CALL ZLASR( 'L', 'V', 'B', M-LL+1, NCVT, RWORK( NM12+1 ), + $ RWORK( NM13+1 ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL ZLASR( 'R', 'V', 'B', NRU, M-LL+1, RWORK( 1 ), + $ RWORK( N ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL ZLASR( 'L', 'V', 'B', M-LL+1, NCC, RWORK( 1 ), + $ RWORK( N ), C( LL, 1 ), LDC ) +* +* Test convergence +* + IF( ABS( E( LL ) ).LE.THRESH ) + $ E( LL ) = ZERO + END IF + ELSE +* +* Use nonzero shift +* + IF( IDIR.EQ.1 ) THEN +* +* Chase bulge from top to bottom +* Save cosines and sines for later singular vector updates +* + F = ( ABS( D( LL ) )-SHIFT )* + $ ( SIGN( ONE, D( LL ) )+SHIFT / D( LL ) ) + G = E( LL ) + DO 140 I = LL, M - 1 + CALL DLARTG( F, G, COSR, SINR, R ) + IF( I.GT.LL ) + $ E( I-1 ) = R + F = COSR*D( I ) + SINR*E( I ) + E( I ) = COSR*E( I ) - SINR*D( I ) + G = SINR*D( I+1 ) + D( I+1 ) = COSR*D( I+1 ) + CALL DLARTG( F, G, COSL, SINL, R ) + D( I ) = R + F = COSL*E( I ) + SINL*D( I+1 ) + D( I+1 ) = COSL*D( I+1 ) - SINL*E( I ) + IF( I.LT.M-1 ) THEN + G = SINL*E( I+1 ) + E( I+1 ) = COSL*E( I+1 ) + END IF + RWORK( I-LL+1 ) = COSR + RWORK( I-LL+1+NM1 ) = SINR + RWORK( I-LL+1+NM12 ) = COSL + RWORK( I-LL+1+NM13 ) = SINL + 140 CONTINUE + E( M-1 ) = F +* +* Update singular vectors +* + IF( NCVT.GT.0 ) + $ CALL ZLASR( 'L', 'V', 'F', M-LL+1, NCVT, RWORK( 1 ), + $ RWORK( N ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL ZLASR( 'R', 'V', 'F', NRU, M-LL+1, RWORK( NM12+1 ), + $ RWORK( NM13+1 ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL ZLASR( 'L', 'V', 'F', M-LL+1, NCC, RWORK( NM12+1 ), + $ RWORK( NM13+1 ), C( LL, 1 ), LDC ) +* +* Test convergence +* + IF( ABS( E( M-1 ) ).LE.THRESH ) + $ E( M-1 ) = ZERO +* + ELSE +* +* Chase bulge from bottom to top +* Save cosines and sines for later singular vector updates +* + F = ( ABS( D( M ) )-SHIFT )*( SIGN( ONE, D( M ) )+SHIFT / + $ D( M ) ) + G = E( M-1 ) + DO 150 I = M, LL + 1, -1 + CALL DLARTG( F, G, COSR, SINR, R ) + IF( I.LT.M ) + $ E( I ) = R + F = COSR*D( I ) + SINR*E( I-1 ) + E( I-1 ) = COSR*E( I-1 ) - SINR*D( I ) + G = SINR*D( I-1 ) + D( I-1 ) = COSR*D( I-1 ) + CALL DLARTG( F, G, COSL, SINL, R ) + D( I ) = R + F = COSL*E( I-1 ) + SINL*D( I-1 ) + D( I-1 ) = COSL*D( I-1 ) - SINL*E( I-1 ) + IF( I.GT.LL+1 ) THEN + G = SINL*E( I-2 ) + E( I-2 ) = COSL*E( I-2 ) + END IF + RWORK( I-LL ) = COSR + RWORK( I-LL+NM1 ) = -SINR + RWORK( I-LL+NM12 ) = COSL + RWORK( I-LL+NM13 ) = -SINL + 150 CONTINUE + E( LL ) = F +* +* Test convergence +* + IF( ABS( E( LL ) ).LE.THRESH ) + $ E( LL ) = ZERO +* +* Update singular vectors if desired +* + IF( NCVT.GT.0 ) + $ CALL ZLASR( 'L', 'V', 'B', M-LL+1, NCVT, RWORK( NM12+1 ), + $ RWORK( NM13+1 ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL ZLASR( 'R', 'V', 'B', NRU, M-LL+1, RWORK( 1 ), + $ RWORK( N ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL ZLASR( 'L', 'V', 'B', M-LL+1, NCC, RWORK( 1 ), + $ RWORK( N ), C( LL, 1 ), LDC ) + END IF + END IF +* +* QR iteration finished, go back and check convergence +* + GO TO 60 +* +* All singular values converged, so make them positive +* + 160 CONTINUE + DO 170 I = 1, N + IF( D( I ).LT.ZERO ) THEN + D( I ) = -D( I ) +* +* Change sign of singular vectors, if desired +* + IF( NCVT.GT.0 ) + $ CALL ZDSCAL( NCVT, NEGONE, VT( I, 1 ), LDVT ) + END IF + 170 CONTINUE +* +* Sort the singular values into decreasing order (insertion sort on +* singular values, but only one transposition per singular vector) +* + DO 190 I = 1, N - 1 +* +* Scan for smallest D(I) +* + ISUB = 1 + SMIN = D( 1 ) + DO 180 J = 2, N + 1 - I + IF( D( J ).LE.SMIN ) THEN + ISUB = J + SMIN = D( J ) + END IF + 180 CONTINUE + IF( ISUB.NE.N+1-I ) THEN +* +* Swap singular values and vectors +* + D( ISUB ) = D( N+1-I ) + D( N+1-I ) = SMIN + IF( NCVT.GT.0 ) + $ CALL ZSWAP( NCVT, VT( ISUB, 1 ), LDVT, VT( N+1-I, 1 ), + $ LDVT ) + IF( NRU.GT.0 ) + $ CALL ZSWAP( NRU, U( 1, ISUB ), 1, U( 1, N+1-I ), 1 ) + IF( NCC.GT.0 ) + $ CALL ZSWAP( NCC, C( ISUB, 1 ), LDC, C( N+1-I, 1 ), LDC ) + END IF + 190 CONTINUE + GO TO 220 +* +* Maximum number of iterations exceeded, failure to converge +* + 200 CONTINUE + INFO = 0 + DO 210 I = 1, N - 1 + IF( E( I ).NE.ZERO ) + $ INFO = INFO + 1 + 210 CONTINUE + 220 CONTINUE + RETURN +* +* End of ZBDSQR +* + END diff --git a/costa/native/external/lapack/zdrot.f b/costa/native/external/lapack/zdrot.f new file mode 100644 index 000000000..6c451becf --- /dev/null +++ b/costa/native/external/lapack/zdrot.f @@ -0,0 +1,54 @@ + SUBROUTINE ZDROT( N, CX, INCX, CY, INCY, C, S ) +* +* applies a plane rotation, where the cos and sin (c and s) are real +* and the vectors cx and cy are complex. +* jack dongarra, linpack, 3/11/78. +* +* .. Scalar Arguments .. + INTEGER INCX, INCY, N + DOUBLE PRECISION C, S +* .. +* .. Array Arguments .. + COMPLEX*16 CX( * ), CY( * ) +* +* ===================================================================== +* .. +* .. Local Scalars .. + INTEGER I, IX, IY + COMPLEX*16 CTEMP +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) + $ RETURN + IF( INCX.EQ.1 .AND. INCY.EQ.1 ) + $ GO TO 20 +* +* code for unequal increments or equal increments not equal +* to 1 +* + IX = 1 + IY = 1 + IF( INCX.LT.0 ) + $ IX = ( -N+1 )*INCX + 1 + IF( INCY.LT.0 ) + $ IY = ( -N+1 )*INCY + 1 + DO 10 I = 1, N + CTEMP = C*CX( IX ) + S*CY( IY ) + CY( IY ) = C*CY( IY ) - S*CX( IX ) + CX( IX ) = CTEMP + IX = IX + INCX + IY = IY + INCY + 10 CONTINUE + RETURN +* +* code for both increments equal to 1 +* + 20 CONTINUE + DO 30 I = 1, N + CTEMP = C*CX( I ) + S*CY( I ) + CY( I ) = C*CY( I ) - S*CX( I ) + CX( I ) = CTEMP + 30 CONTINUE + RETURN + END diff --git a/costa/native/external/lapack/zdrscl.f b/costa/native/external/lapack/zdrscl.f new file mode 100644 index 000000000..c44e96d65 --- /dev/null +++ b/costa/native/external/lapack/zdrscl.f @@ -0,0 +1,115 @@ + SUBROUTINE ZDRSCL( N, SA, SX, INCX ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INCX, N + DOUBLE PRECISION SA +* .. +* .. Array Arguments .. + COMPLEX*16 SX( * ) +* .. +* +* Purpose +* ======= +* +* ZDRSCL multiplies an n-element complex vector x by the real scalar +* 1/a. This is done without overflow or underflow as long as +* the final result x/a does not overflow or underflow. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of components of the vector x. +* +* SA (input) DOUBLE PRECISION +* The scalar a which is used to divide each component of x. +* SA must be >= 0, or the subroutine will divide by zero. +* +* SX (input/output) COMPLEX*16 array, dimension +* (1+(N-1)*abs(INCX)) +* The n-element vector x. +* +* INCX (input) INTEGER +* The increment between successive values of the vector SX. +* > 0: SX(1) = X(1) and SX(1+(i-1)*INCX) = x(i), 1< i<= n +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL DONE + DOUBLE PRECISION BIGNUM, CDEN, CDEN1, CNUM, CNUM1, MUL, SMLNUM +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DLABAD, ZDSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* +* Get machine parameters +* + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Initialize the denominator to SA and the numerator to 1. +* + CDEN = SA + CNUM = ONE +* + 10 CONTINUE + CDEN1 = CDEN*SMLNUM + CNUM1 = CNUM / BIGNUM + IF( ABS( CDEN1 ).GT.ABS( CNUM ) .AND. CNUM.NE.ZERO ) THEN +* +* Pre-multiply X by SMLNUM if CDEN is large compared to CNUM. +* + MUL = SMLNUM + DONE = .FALSE. + CDEN = CDEN1 + ELSE IF( ABS( CNUM1 ).GT.ABS( CDEN ) ) THEN +* +* Pre-multiply X by BIGNUM if CDEN is small compared to CNUM. +* + MUL = BIGNUM + DONE = .FALSE. + CNUM = CNUM1 + ELSE +* +* Multiply X by CNUM / CDEN and return. +* + MUL = CNUM / CDEN + DONE = .TRUE. + END IF +* +* Scale the vector X by MUL +* + CALL ZDSCAL( N, MUL, SX, INCX ) +* + IF( .NOT.DONE ) + $ GO TO 10 +* + RETURN +* +* End of ZDRSCL +* + END diff --git a/costa/native/external/lapack/zgbbrd.f b/costa/native/external/lapack/zgbbrd.f new file mode 100644 index 000000000..3e24d5eb6 --- /dev/null +++ b/costa/native/external/lapack/zgbbrd.f @@ -0,0 +1,466 @@ + SUBROUTINE ZGBBRD( VECT, M, N, NCC, KL, KU, AB, LDAB, D, E, Q, + $ LDQ, PT, LDPT, C, LDC, WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER VECT + INTEGER INFO, KL, KU, LDAB, LDC, LDPT, LDQ, M, N, NCC +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ), RWORK( * ) + COMPLEX*16 AB( LDAB, * ), C( LDC, * ), PT( LDPT, * ), + $ Q( LDQ, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZGBBRD reduces a complex general m-by-n band matrix A to real upper +* bidiagonal form B by a unitary transformation: Q' * A * P = B. +* +* The routine computes B, and optionally forms Q or P', or computes +* Q'*C for a given matrix C. +* +* Arguments +* ========= +* +* VECT (input) CHARACTER*1 +* Specifies whether or not the matrices Q and P' are to be +* formed. +* = 'N': do not form Q or P'; +* = 'Q': form Q only; +* = 'P': form P' only; +* = 'B': form both. +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* NCC (input) INTEGER +* The number of columns of the matrix C. NCC >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals of the matrix A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals of the matrix A. KU >= 0. +* +* AB (input/output) COMPLEX*16 array, dimension (LDAB,N) +* On entry, the m-by-n band matrix A, stored in rows 1 to +* KL+KU+1. The j-th column of A is stored in the j-th column of +* the array AB as follows: +* AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl). +* On exit, A is overwritten by values generated during the +* reduction. +* +* LDAB (input) INTEGER +* The leading dimension of the array A. LDAB >= KL+KU+1. +* +* D (output) DOUBLE PRECISION array, dimension (min(M,N)) +* The diagonal elements of the bidiagonal matrix B. +* +* E (output) DOUBLE PRECISION array, dimension (min(M,N)-1) +* The superdiagonal elements of the bidiagonal matrix B. +* +* Q (output) COMPLEX*16 array, dimension (LDQ,M) +* If VECT = 'Q' or 'B', the m-by-m unitary matrix Q. +* If VECT = 'N' or 'P', the array Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. +* LDQ >= max(1,M) if VECT = 'Q' or 'B'; LDQ >= 1 otherwise. +* +* PT (output) COMPLEX*16 array, dimension (LDPT,N) +* If VECT = 'P' or 'B', the n-by-n unitary matrix P'. +* If VECT = 'N' or 'Q', the array PT is not referenced. +* +* LDPT (input) INTEGER +* The leading dimension of the array PT. +* LDPT >= max(1,N) if VECT = 'P' or 'B'; LDPT >= 1 otherwise. +* +* C (input/output) COMPLEX*16 array, dimension (LDC,NCC) +* On entry, an m-by-ncc matrix C. +* On exit, C is overwritten by Q'*C. +* C is not referenced if NCC = 0. +* +* LDC (input) INTEGER +* The leading dimension of the array C. +* LDC >= max(1,M) if NCC > 0; LDC >= 1 if NCC = 0. +* +* WORK (workspace) COMPLEX*16 array, dimension (max(M,N)) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (max(M,N)) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL WANTB, WANTC, WANTPT, WANTQ + INTEGER I, INCA, J, J1, J2, KB, KB1, KK, KLM, KLU1, + $ KUN, L, MINMN, ML, ML0, MU, MU0, NR, NRT + DOUBLE PRECISION ABST, RC + COMPLEX*16 RA, RB, RS, T +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARGV, ZLARTG, ZLARTV, ZLASET, ZROT, + $ ZSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DCONJG, MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + WANTB = LSAME( VECT, 'B' ) + WANTQ = LSAME( VECT, 'Q' ) .OR. WANTB + WANTPT = LSAME( VECT, 'P' ) .OR. WANTB + WANTC = NCC.GT.0 + KLU1 = KL + KU + 1 + INFO = 0 + IF( .NOT.WANTQ .AND. .NOT.WANTPT .AND. .NOT.LSAME( VECT, 'N' ) ) + $ THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NCC.LT.0 ) THEN + INFO = -4 + ELSE IF( KL.LT.0 ) THEN + INFO = -5 + ELSE IF( KU.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KLU1 ) THEN + INFO = -8 + ELSE IF( LDQ.LT.1 .OR. WANTQ .AND. LDQ.LT.MAX( 1, M ) ) THEN + INFO = -12 + ELSE IF( LDPT.LT.1 .OR. WANTPT .AND. LDPT.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDC.LT.1 .OR. WANTC .AND. LDC.LT.MAX( 1, M ) ) THEN + INFO = -16 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGBBRD', -INFO ) + RETURN + END IF +* +* Initialize Q and P' to the unit matrix, if needed +* + IF( WANTQ ) + $ CALL ZLASET( 'Full', M, M, CZERO, CONE, Q, LDQ ) + IF( WANTPT ) + $ CALL ZLASET( 'Full', N, N, CZERO, CONE, PT, LDPT ) +* +* Quick return if possible. +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* + MINMN = MIN( M, N ) +* + IF( KL+KU.GT.1 ) THEN +* +* Reduce to upper bidiagonal form if KU > 0; if KU = 0, reduce +* first to lower bidiagonal form and then transform to upper +* bidiagonal +* + IF( KU.GT.0 ) THEN + ML0 = 1 + MU0 = 2 + ELSE + ML0 = 2 + MU0 = 1 + END IF +* +* Wherever possible, plane rotations are generated and applied in +* vector operations of length NR over the index set J1:J2:KLU1. +* +* The complex sines of the plane rotations are stored in WORK, +* and the real cosines in RWORK. +* + KLM = MIN( M-1, KL ) + KUN = MIN( N-1, KU ) + KB = KLM + KUN + KB1 = KB + 1 + INCA = KB1*LDAB + NR = 0 + J1 = KLM + 2 + J2 = 1 - KUN +* + DO 90 I = 1, MINMN +* +* Reduce i-th column and i-th row of matrix to bidiagonal form +* + ML = KLM + 1 + MU = KUN + 1 + DO 80 KK = 1, KB + J1 = J1 + KB + J2 = J2 + KB +* +* generate plane rotations to annihilate nonzero elements +* which have been created below the band +* + IF( NR.GT.0 ) + $ CALL ZLARGV( NR, AB( KLU1, J1-KLM-1 ), INCA, + $ WORK( J1 ), KB1, RWORK( J1 ), KB1 ) +* +* apply plane rotations from the left +* + DO 10 L = 1, KB + IF( J2-KLM+L-1.GT.N ) THEN + NRT = NR - 1 + ELSE + NRT = NR + END IF + IF( NRT.GT.0 ) + $ CALL ZLARTV( NRT, AB( KLU1-L, J1-KLM+L-1 ), INCA, + $ AB( KLU1-L+1, J1-KLM+L-1 ), INCA, + $ RWORK( J1 ), WORK( J1 ), KB1 ) + 10 CONTINUE +* + IF( ML.GT.ML0 ) THEN + IF( ML.LE.M-I+1 ) THEN +* +* generate plane rotation to annihilate a(i+ml-1,i) +* within the band, and apply rotation from the left +* + CALL ZLARTG( AB( KU+ML-1, I ), AB( KU+ML, I ), + $ RWORK( I+ML-1 ), WORK( I+ML-1 ), RA ) + AB( KU+ML-1, I ) = RA + IF( I.LT.N ) + $ CALL ZROT( MIN( KU+ML-2, N-I ), + $ AB( KU+ML-2, I+1 ), LDAB-1, + $ AB( KU+ML-1, I+1 ), LDAB-1, + $ RWORK( I+ML-1 ), WORK( I+ML-1 ) ) + END IF + NR = NR + 1 + J1 = J1 - KB1 + END IF +* + IF( WANTQ ) THEN +* +* accumulate product of plane rotations in Q +* + DO 20 J = J1, J2, KB1 + CALL ZROT( M, Q( 1, J-1 ), 1, Q( 1, J ), 1, + $ RWORK( J ), DCONJG( WORK( J ) ) ) + 20 CONTINUE + END IF +* + IF( WANTC ) THEN +* +* apply plane rotations to C +* + DO 30 J = J1, J2, KB1 + CALL ZROT( NCC, C( J-1, 1 ), LDC, C( J, 1 ), LDC, + $ RWORK( J ), WORK( J ) ) + 30 CONTINUE + END IF +* + IF( J2+KUN.GT.N ) THEN +* +* adjust J2 to keep within the bounds of the matrix +* + NR = NR - 1 + J2 = J2 - KB1 + END IF +* + DO 40 J = J1, J2, KB1 +* +* create nonzero element a(j-1,j+ku) above the band +* and store it in WORK(n+1:2*n) +* + WORK( J+KUN ) = WORK( J )*AB( 1, J+KUN ) + AB( 1, J+KUN ) = RWORK( J )*AB( 1, J+KUN ) + 40 CONTINUE +* +* generate plane rotations to annihilate nonzero elements +* which have been generated above the band +* + IF( NR.GT.0 ) + $ CALL ZLARGV( NR, AB( 1, J1+KUN-1 ), INCA, + $ WORK( J1+KUN ), KB1, RWORK( J1+KUN ), + $ KB1 ) +* +* apply plane rotations from the right +* + DO 50 L = 1, KB + IF( J2+L-1.GT.M ) THEN + NRT = NR - 1 + ELSE + NRT = NR + END IF + IF( NRT.GT.0 ) + $ CALL ZLARTV( NRT, AB( L+1, J1+KUN-1 ), INCA, + $ AB( L, J1+KUN ), INCA, + $ RWORK( J1+KUN ), WORK( J1+KUN ), KB1 ) + 50 CONTINUE +* + IF( ML.EQ.ML0 .AND. MU.GT.MU0 ) THEN + IF( MU.LE.N-I+1 ) THEN +* +* generate plane rotation to annihilate a(i,i+mu-1) +* within the band, and apply rotation from the right +* + CALL ZLARTG( AB( KU-MU+3, I+MU-2 ), + $ AB( KU-MU+2, I+MU-1 ), + $ RWORK( I+MU-1 ), WORK( I+MU-1 ), RA ) + AB( KU-MU+3, I+MU-2 ) = RA + CALL ZROT( MIN( KL+MU-2, M-I ), + $ AB( KU-MU+4, I+MU-2 ), 1, + $ AB( KU-MU+3, I+MU-1 ), 1, + $ RWORK( I+MU-1 ), WORK( I+MU-1 ) ) + END IF + NR = NR + 1 + J1 = J1 - KB1 + END IF +* + IF( WANTPT ) THEN +* +* accumulate product of plane rotations in P' +* + DO 60 J = J1, J2, KB1 + CALL ZROT( N, PT( J+KUN-1, 1 ), LDPT, + $ PT( J+KUN, 1 ), LDPT, RWORK( J+KUN ), + $ DCONJG( WORK( J+KUN ) ) ) + 60 CONTINUE + END IF +* + IF( J2+KB.GT.M ) THEN +* +* adjust J2 to keep within the bounds of the matrix +* + NR = NR - 1 + J2 = J2 - KB1 + END IF +* + DO 70 J = J1, J2, KB1 +* +* create nonzero element a(j+kl+ku,j+ku-1) below the +* band and store it in WORK(1:n) +* + WORK( J+KB ) = WORK( J+KUN )*AB( KLU1, J+KUN ) + AB( KLU1, J+KUN ) = RWORK( J+KUN )*AB( KLU1, J+KUN ) + 70 CONTINUE +* + IF( ML.GT.ML0 ) THEN + ML = ML - 1 + ELSE + MU = MU - 1 + END IF + 80 CONTINUE + 90 CONTINUE + END IF +* + IF( KU.EQ.0 .AND. KL.GT.0 ) THEN +* +* A has been reduced to complex lower bidiagonal form +* +* Transform lower bidiagonal form to upper bidiagonal by applying +* plane rotations from the left, overwriting superdiagonal +* elements on subdiagonal elements +* + DO 100 I = 1, MIN( M-1, N ) + CALL ZLARTG( AB( 1, I ), AB( 2, I ), RC, RS, RA ) + AB( 1, I ) = RA + IF( I.LT.N ) THEN + AB( 2, I ) = RS*AB( 1, I+1 ) + AB( 1, I+1 ) = RC*AB( 1, I+1 ) + END IF + IF( WANTQ ) + $ CALL ZROT( M, Q( 1, I ), 1, Q( 1, I+1 ), 1, RC, + $ DCONJG( RS ) ) + IF( WANTC ) + $ CALL ZROT( NCC, C( I, 1 ), LDC, C( I+1, 1 ), LDC, RC, + $ RS ) + 100 CONTINUE + ELSE +* +* A has been reduced to complex upper bidiagonal form or is +* diagonal +* + IF( KU.GT.0 .AND. M.LT.N ) THEN +* +* Annihilate a(m,m+1) by applying plane rotations from the +* right +* + RB = AB( KU, M+1 ) + DO 110 I = M, 1, -1 + CALL ZLARTG( AB( KU+1, I ), RB, RC, RS, RA ) + AB( KU+1, I ) = RA + IF( I.GT.1 ) THEN + RB = -DCONJG( RS )*AB( KU, I ) + AB( KU, I ) = RC*AB( KU, I ) + END IF + IF( WANTPT ) + $ CALL ZROT( N, PT( I, 1 ), LDPT, PT( M+1, 1 ), LDPT, + $ RC, DCONJG( RS ) ) + 110 CONTINUE + END IF + END IF +* +* Make diagonal and superdiagonal elements real, storing them in D +* and E +* + T = AB( KU+1, 1 ) + DO 120 I = 1, MINMN + ABST = ABS( T ) + D( I ) = ABST + IF( ABST.NE.ZERO ) THEN + T = T / ABST + ELSE + T = CONE + END IF + IF( WANTQ ) + $ CALL ZSCAL( M, T, Q( 1, I ), 1 ) + IF( WANTC ) + $ CALL ZSCAL( NCC, DCONJG( T ), C( I, 1 ), LDC ) + IF( I.LT.MINMN ) THEN + IF( KU.EQ.0 .AND. KL.EQ.0 ) THEN + E( I ) = ZERO + T = AB( 1, I+1 ) + ELSE + IF( KU.EQ.0 ) THEN + T = AB( 2, I )*DCONJG( T ) + ELSE + T = AB( KU, I+1 )*DCONJG( T ) + END IF + ABST = ABS( T ) + E( I ) = ABST + IF( ABST.NE.ZERO ) THEN + T = T / ABST + ELSE + T = CONE + END IF + IF( WANTPT ) + $ CALL ZSCAL( N, T, PT( I+1, 1 ), LDPT ) + T = AB( KU+1, I+1 )*DCONJG( T ) + END IF + END IF + 120 CONTINUE + RETURN +* +* End of ZGBBRD +* + END diff --git a/costa/native/external/lapack/zgbcon.f b/costa/native/external/lapack/zgbcon.f new file mode 100644 index 000000000..e8b829688 --- /dev/null +++ b/costa/native/external/lapack/zgbcon.f @@ -0,0 +1,230 @@ + SUBROUTINE ZGBCON( NORM, N, KL, KU, AB, LDAB, IPIV, ANORM, RCOND, + $ WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER INFO, KL, KU, LDAB, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 AB( LDAB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZGBCON estimates the reciprocal of the condition number of a complex +* general band matrix A, in either the 1-norm or the infinity-norm, +* using the LU factorization computed by ZGBTRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as +* RCOND = 1 / ( norm(A) * norm(inv(A)) ). +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies whether the 1-norm condition number or the +* infinity-norm condition number is required: +* = '1' or 'O': 1-norm; +* = 'I': Infinity-norm. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* AB (input) COMPLEX*16 array, dimension (LDAB,N) +* Details of the LU factorization of the band matrix A, as +* computed by ZGBTRF. U is stored as an upper triangular band +* matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and +* the multipliers used during the factorization are stored in +* rows KL+KU+2 to 2*KL+KU+1. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices; for 1 <= i <= N, row i of the matrix was +* interchanged with row IPIV(i). +* +* ANORM (input) DOUBLE PRECISION +* If NORM = '1' or 'O', the 1-norm of the original matrix A. +* If NORM = 'I', the infinity-norm of the original matrix A. +* +* RCOND (output) DOUBLE PRECISION +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(norm(A) * norm(inv(A))). +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LNOTI, ONENRM + CHARACTER NORMIN + INTEGER IX, J, JP, KASE, KASE1, KD, LM + DOUBLE PRECISION AINVNM, SCALE, SMLNUM + COMPLEX*16 T, ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX + DOUBLE PRECISION DLAMCH + COMPLEX*16 ZDOTC + EXTERNAL LSAME, IZAMAX, DLAMCH, ZDOTC +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZDRSCL, ZLACON, ZLATBS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MIN +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.2*KL+KU+1 ) THEN + INFO = -6 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGBCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = DLAMCH( 'Safe minimum' ) +* +* Estimate the norm of inv(A). +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KD = KL + KU + 1 + LNOTI = KL.GT.0 + KASE = 0 + 10 CONTINUE + CALL ZLACON( N, WORK( N+1 ), WORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(L). +* + IF( LNOTI ) THEN + DO 20 J = 1, N - 1 + LM = MIN( KL, N-J ) + JP = IPIV( J ) + T = WORK( JP ) + IF( JP.NE.J ) THEN + WORK( JP ) = WORK( J ) + WORK( J ) = T + END IF + CALL ZAXPY( LM, -T, AB( KD+1, J ), 1, WORK( J+1 ), 1 ) + 20 CONTINUE + END IF +* +* Multiply by inv(U). +* + CALL ZLATBS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ KL+KU, AB, LDAB, WORK, SCALE, RWORK, INFO ) + ELSE +* +* Multiply by inv(U'). +* + CALL ZLATBS( 'Upper', 'Conjugate transpose', 'Non-unit', + $ NORMIN, N, KL+KU, AB, LDAB, WORK, SCALE, RWORK, + $ INFO ) +* +* Multiply by inv(L'). +* + IF( LNOTI ) THEN + DO 30 J = N - 1, 1, -1 + LM = MIN( KL, N-J ) + WORK( J ) = WORK( J ) - ZDOTC( LM, AB( KD+1, J ), 1, + $ WORK( J+1 ), 1 ) + JP = IPIV( J ) + IF( JP.NE.J ) THEN + T = WORK( JP ) + WORK( JP ) = WORK( J ) + WORK( J ) = T + END IF + 30 CONTINUE + END IF + END IF +* +* Divide X by 1/SCALE if doing so will not cause overflow. +* + NORMIN = 'Y' + IF( SCALE.NE.ONE ) THEN + IX = IZAMAX( N, WORK, 1 ) + IF( SCALE.LT.CABS1( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 40 + CALL ZDRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 40 CONTINUE + RETURN +* +* End of ZGBCON +* + END diff --git a/costa/native/external/lapack/zgbequ.f b/costa/native/external/lapack/zgbequ.f new file mode 100644 index 000000000..d3e821fe5 --- /dev/null +++ b/costa/native/external/lapack/zgbequ.f @@ -0,0 +1,248 @@ + SUBROUTINE ZGBEQU( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1992 +* +* .. Scalar Arguments .. + INTEGER INFO, KL, KU, LDAB, M, N + DOUBLE PRECISION AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( * ), R( * ) + COMPLEX*16 AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* ZGBEQU computes row and column scalings intended to equilibrate an +* M-by-N band matrix A and reduce its condition number. R returns the +* row scale factors and C the column scale factors, chosen to try to +* make the largest element in each row and column of the matrix B with +* elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. +* +* R(i) and C(j) are restricted to be between SMLNUM = smallest safe +* number and BIGNUM = largest safe number. Use of these scaling +* factors is not guaranteed to reduce the condition number of A but +* works well in practice. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* AB (input) COMPLEX*16 array, dimension (LDAB,N) +* The band matrix A, stored in rows 1 to KL+KU+1. The j-th +* column of A is stored in the j-th column of the array AB as +* follows: +* AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KL+KU+1. +* +* R (output) DOUBLE PRECISION array, dimension (M) +* If INFO = 0, or INFO > M, R contains the row scale factors +* for A. +* +* C (output) DOUBLE PRECISION array, dimension (N) +* If INFO = 0, C contains the column scale factors for A. +* +* ROWCND (output) DOUBLE PRECISION +* If INFO = 0 or INFO > M, ROWCND contains the ratio of the +* smallest R(i) to the largest R(i). If ROWCND >= 0.1 and +* AMAX is neither too large nor too small, it is not worth +* scaling by R. +* +* COLCND (output) DOUBLE PRECISION +* If INFO = 0, COLCND contains the ratio of the smallest +* C(i) to the largest C(i). If COLCND >= 0.1, it is not +* worth scaling by C. +* +* AMAX (output) DOUBLE PRECISION +* Absolute value of largest matrix element. If AMAX is very +* close to overflow or very close to underflow, the matrix +* should be scaled. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is +* <= M: the i-th row of A is exactly zero +* > M: the (i-M)-th column of A is exactly zero +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, KD + DOUBLE PRECISION BIGNUM, RCMAX, RCMIN, SMLNUM + COMPLEX*16 ZDUM +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX, MIN +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGBEQU', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + ROWCND = ONE + COLCND = ONE + AMAX = ZERO + RETURN + END IF +* +* Get machine constants. +* + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM +* +* Compute row scale factors. +* + DO 10 I = 1, M + R( I ) = ZERO + 10 CONTINUE +* +* Find the maximum element in each row. +* + KD = KU + 1 + DO 30 J = 1, N + DO 20 I = MAX( J-KU, 1 ), MIN( J+KL, M ) + R( I ) = MAX( R( I ), CABS1( AB( KD+I-J, J ) ) ) + 20 CONTINUE + 30 CONTINUE +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 40 I = 1, M + RCMAX = MAX( RCMAX, R( I ) ) + RCMIN = MIN( RCMIN, R( I ) ) + 40 CONTINUE + AMAX = RCMAX +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 50 I = 1, M + IF( R( I ).EQ.ZERO ) THEN + INFO = I + RETURN + END IF + 50 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 60 I = 1, M + R( I ) = ONE / MIN( MAX( R( I ), SMLNUM ), BIGNUM ) + 60 CONTINUE +* +* Compute ROWCND = min(R(I)) / max(R(I)) +* + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* +* Compute column scale factors +* + DO 70 J = 1, N + C( J ) = ZERO + 70 CONTINUE +* +* Find the maximum element in each column, +* assuming the row scaling computed above. +* + KD = KU + 1 + DO 90 J = 1, N + DO 80 I = MAX( J-KU, 1 ), MIN( J+KL, M ) + C( J ) = MAX( C( J ), CABS1( AB( KD+I-J, J ) )*R( I ) ) + 80 CONTINUE + 90 CONTINUE +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 100 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 100 CONTINUE +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 110 J = 1, N + IF( C( J ).EQ.ZERO ) THEN + INFO = M + J + RETURN + END IF + 110 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 120 J = 1, N + C( J ) = ONE / MIN( MAX( C( J ), SMLNUM ), BIGNUM ) + 120 CONTINUE +* +* Compute COLCND = min(C(J)) / max(C(J)) +* + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* + RETURN +* +* End of ZGBEQU +* + END diff --git a/costa/native/external/lapack/zgbrfs.f b/costa/native/external/lapack/zgbrfs.f new file mode 100644 index 000000000..df93efce8 --- /dev/null +++ b/costa/native/external/lapack/zgbrfs.f @@ -0,0 +1,361 @@ + SUBROUTINE ZGBRFS( TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, LDAFB, + $ IPIV, B, LDB, X, LDX, FERR, BERR, WORK, RWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, KL, KU, LDAB, LDAFB, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) + COMPLEX*16 AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* ZGBRFS improves the computed solution to a system of linear +* equations when the coefficient matrix is banded, and provides +* error bounds and backward error estimates for the solution. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose) +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AB (input) COMPLEX*16 array, dimension (LDAB,N) +* The original band matrix A, stored in rows 1 to KL+KU+1. +* The j-th column of A is stored in the j-th column of the +* array AB as follows: +* AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KL+KU+1. +* +* AFB (input) COMPLEX*16 array, dimension (LDAFB,N) +* Details of the LU factorization of the band matrix A, as +* computed by ZGBTRF. U is stored as an upper triangular band +* matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and +* the multipliers used during the factorization are stored in +* rows KL+KU+2 to 2*KL+KU+1. +* +* LDAFB (input) INTEGER +* The leading dimension of the array AFB. LDAFB >= 2*KL*KU+1. +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices from ZGBTRF; for 1<=i<=N, row i of the +* matrix was interchanged with row IPIV(i). +* +* B (input) COMPLEX*16 array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input/output) COMPLEX*16 array, dimension (LDX,NRHS) +* On entry, the solution matrix X, as computed by ZGBTRS. +* On exit, the improved solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Internal Parameters +* =================== +* +* ITMAX is the maximum number of steps of iterative refinement. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) + DOUBLE PRECISION THREE + PARAMETER ( THREE = 3.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN + CHARACTER TRANSN, TRANST + INTEGER COUNT, I, J, K, KASE, KK, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX*16 ZDUM +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZCOPY, ZGBMV, ZGBTRS, ZLACON +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -7 + ELSE IF( LDAFB.LT.2*KL+KU+1 ) THEN + INFO = -9 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -14 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGBRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANSN = 'N' + TRANST = 'C' + ELSE + TRANSN = 'C' + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = MIN( KL+KU+2, N+1 ) + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - op(A) * X, +* where op(A) = A, A**T, or A**H, depending on TRANS. +* + CALL ZCOPY( N, B( 1, J ), 1, WORK, 1 ) + CALL ZGBMV( TRANS, N, N, KL, KU, -CONE, AB, LDAB, X( 1, J ), 1, + $ CONE, WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(op(A))*abs(X) + abs(B). +* + IF( NOTRAN ) THEN + DO 50 K = 1, N + KK = KU + 1 - K + XK = CABS1( X( K, J ) ) + DO 40 I = MAX( 1, K-KU ), MIN( N, K+KL ) + RWORK( I ) = RWORK( I ) + CABS1( AB( KK+I, K ) )*XK + 40 CONTINUE + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + KK = KU + 1 - K + DO 60 I = MAX( 1, K-KU ), MIN( N, K+KL ) + S = S + CABS1( AB( KK+I, K ) )*CABS1( X( I, J ) ) + 60 CONTINUE + RWORK( K ) = RWORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL ZGBTRS( TRANS, N, KL, KU, 1, AFB, LDAFB, IPIV, WORK, N, + $ INFO ) + CALL ZAXPY( N, CONE, WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use ZLACON to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL ZLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**H). +* + CALL ZGBTRS( TRANST, N, KL, KU, 1, AFB, LDAFB, IPIV, + $ WORK, N, INFO ) + DO 110 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 110 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 120 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 120 CONTINUE + CALL ZGBTRS( TRANSN, N, KL, KU, 1, AFB, LDAFB, IPIV, + $ WORK, N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of ZGBRFS +* + END diff --git a/costa/native/external/lapack/zgbsv.f b/costa/native/external/lapack/zgbsv.f new file mode 100644 index 000000000..aef9dd3da --- /dev/null +++ b/costa/native/external/lapack/zgbsv.f @@ -0,0 +1,143 @@ + SUBROUTINE ZGBSV( N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + INTEGER INFO, KL, KU, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 AB( LDAB, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* ZGBSV computes the solution to a complex system of linear equations +* A * X = B, where A is a band matrix of order N with KL subdiagonals +* and KU superdiagonals, and X and B are N-by-NRHS matrices. +* +* The LU decomposition with partial pivoting and row interchanges is +* used to factor A as A = L * U, where L is a product of permutation +* and unit lower triangular matrices with KL subdiagonals, and U is +* upper triangular with KL+KU superdiagonals. The factored form of A +* is then used to solve the system of equations A * X = B. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AB (input/output) COMPLEX*16 array, dimension (LDAB,N) +* On entry, the matrix A in band storage, in rows KL+1 to +* 2*KL+KU+1; rows 1 to KL of the array need not be set. +* The j-th column of A is stored in the j-th column of the +* array AB as follows: +* AB(KL+KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+KL) +* On exit, details of the factorization: U is stored as an +* upper triangular band matrix with KL+KU superdiagonals in +* rows 1 to KL+KU+1, and the multipliers used during the +* factorization are stored in rows KL+KU+2 to 2*KL+KU+1. +* See below for further details. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +* +* IPIV (output) INTEGER array, dimension (N) +* The pivot indices that define the permutation matrix P; +* row i of the matrix was interchanged with row IPIV(i). +* +* B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, U(i,i) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, and the solution has not been computed. +* +* Further Details +* =============== +* +* The band storage scheme is illustrated by the following example, when +* M = N = 6, KL = 2, KU = 1: +* +* On entry: On exit: +* +* * * * + + + * * * u14 u25 u36 +* * * + + + + * * u13 u24 u35 u46 +* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +* a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * +* a31 a42 a53 a64 * * m31 m42 m53 m64 * * +* +* Array elements marked * are not used by the routine; elements marked +* + need not be set on entry, but are required by the routine to store +* elements of U because of fill-in resulting from the row interchanges. +* +* ===================================================================== +* +* .. External Subroutines .. + EXTERNAL XERBLA, ZGBTRF, ZGBTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( KL.LT.0 ) THEN + INFO = -2 + ELSE IF( KU.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.2*KL+KU+1 ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( N, 1 ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGBSV ', -INFO ) + RETURN + END IF +* +* Compute the LU factorization of the band matrix A. +* + CALL ZGBTRF( N, N, KL, KU, AB, LDAB, IPIV, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL ZGBTRS( 'No transpose', N, KL, KU, NRHS, AB, LDAB, IPIV, + $ B, LDB, INFO ) + END IF + RETURN +* +* End of ZGBSV +* + END diff --git a/costa/native/external/lapack/zgbsvx.f b/costa/native/external/lapack/zgbsvx.f new file mode 100644 index 000000000..1fa863285 --- /dev/null +++ b/costa/native/external/lapack/zgbsvx.f @@ -0,0 +1,518 @@ + SUBROUTINE ZGBSVX( FACT, TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, + $ LDAFB, IPIV, EQUED, R, C, B, LDB, X, LDX, + $ RCOND, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, TRANS + INTEGER INFO, KL, KU, LDAB, LDAFB, LDB, LDX, N, NRHS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION BERR( * ), C( * ), FERR( * ), R( * ), + $ RWORK( * ) + COMPLEX*16 AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* ZGBSVX uses the LU factorization to compute the solution to a complex +* system of linear equations A * X = B, A**T * X = B, or A**H * X = B, +* where A is a band matrix of order N with KL subdiagonals and KU +* superdiagonals, and X and B are N-by-NRHS matrices. +* +* Error bounds on the solution and a condition estimate are also +* provided. +* +* Description +* =========== +* +* The following steps are performed by this subroutine: +* +* 1. If FACT = 'E', real scaling factors are computed to equilibrate +* the system: +* TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B +* TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B +* TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B +* Whether or not the system will be equilibrated depends on the +* scaling of the matrix A, but if equilibration is used, A is +* overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') +* or diag(C)*B (if TRANS = 'T' or 'C'). +* +* 2. If FACT = 'N' or 'E', the LU decomposition is used to factor the +* matrix A (after equilibration if FACT = 'E') as +* A = L * U, +* where L is a product of permutation and unit lower triangular +* matrices with KL subdiagonals, and U is upper triangular with +* KL+KU superdiagonals. +* +* 3. If some U(i,i)=0, so that U is exactly singular, then the routine +* returns with INFO = i. Otherwise, the factored form of A is used +* to estimate the condition number of the matrix A. If the +* reciprocal of the condition number is less than machine precision, +* INFO = N+1 is returned as a warning, but the routine still goes on +* to solve for X and compute error bounds as described below. +* +* 4. The system of equations is solved for X using the factored form +* of A. +* +* 5. Iterative refinement is applied to improve the computed solution +* matrix and calculate error bounds and backward error estimates +* for it. +* +* 6. If equilibration was used, the matrix X is premultiplied by +* diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so +* that it solves the original system before equilibration. +* +* Arguments +* ========= +* +* FACT (input) CHARACTER*1 +* Specifies whether or not the factored form of the matrix A is +* supplied on entry, and if not, whether the matrix A should be +* equilibrated before it is factored. +* = 'F': On entry, AFB and IPIV contain the factored form of +* A. If EQUED is not 'N', the matrix A has been +* equilibrated with scaling factors given by R and C. +* AB, AFB, and IPIV are not modified. +* = 'N': The matrix A will be copied to AFB and factored. +* = 'E': The matrix A will be equilibrated if necessary, then +* copied to AFB and factored. +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations. +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose) +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AB (input/output) COMPLEX*16 array, dimension (LDAB,N) +* On entry, the matrix A in band storage, in rows 1 to KL+KU+1. +* The j-th column of A is stored in the j-th column of the +* array AB as follows: +* AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl) +* +* If FACT = 'F' and EQUED is not 'N', then A must have been +* equilibrated by the scaling factors in R and/or C. AB is not +* modified if FACT = 'F' or 'N', or if FACT = 'E' and +* EQUED = 'N' on exit. +* +* On exit, if EQUED .ne. 'N', A is scaled as follows: +* EQUED = 'R': A := diag(R) * A +* EQUED = 'C': A := A * diag(C) +* EQUED = 'B': A := diag(R) * A * diag(C). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KL+KU+1. +* +* AFB (input or output) COMPLEX*16 array, dimension (LDAFB,N) +* If FACT = 'F', then AFB is an input argument and on entry +* contains details of the LU factorization of the band matrix +* A, as computed by ZGBTRF. U is stored as an upper triangular +* band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, +* and the multipliers used during the factorization are stored +* in rows KL+KU+2 to 2*KL+KU+1. If EQUED .ne. 'N', then AFB is +* the factored form of the equilibrated matrix A. +* +* If FACT = 'N', then AFB is an output argument and on exit +* returns details of the LU factorization of A. +* +* If FACT = 'E', then AFB is an output argument and on exit +* returns details of the LU factorization of the equilibrated +* matrix A (see the description of AB for the form of the +* equilibrated matrix). +* +* LDAFB (input) INTEGER +* The leading dimension of the array AFB. LDAFB >= 2*KL+KU+1. +* +* IPIV (input or output) INTEGER array, dimension (N) +* If FACT = 'F', then IPIV is an input argument and on entry +* contains the pivot indices from the factorization A = L*U +* as computed by ZGBTRF; row i of the matrix was interchanged +* with row IPIV(i). +* +* If FACT = 'N', then IPIV is an output argument and on exit +* contains the pivot indices from the factorization A = L*U +* of the original matrix A. +* +* If FACT = 'E', then IPIV is an output argument and on exit +* contains the pivot indices from the factorization A = L*U +* of the equilibrated matrix A. +* +* EQUED (input or output) CHARACTER*1 +* Specifies the form of equilibration that was done. +* = 'N': No equilibration (always true if FACT = 'N'). +* = 'R': Row equilibration, i.e., A has been premultiplied by +* diag(R). +* = 'C': Column equilibration, i.e., A has been postmultiplied +* by diag(C). +* = 'B': Both row and column equilibration, i.e., A has been +* replaced by diag(R) * A * diag(C). +* EQUED is an input argument if FACT = 'F'; otherwise, it is an +* output argument. +* +* R (input or output) DOUBLE PRECISION array, dimension (N) +* The row scale factors for A. If EQUED = 'R' or 'B', A is +* multiplied on the left by diag(R); if EQUED = 'N' or 'C', R +* is not accessed. R is an input argument if FACT = 'F'; +* otherwise, R is an output argument. If FACT = 'F' and +* EQUED = 'R' or 'B', each element of R must be positive. +* +* C (input or output) DOUBLE PRECISION array, dimension (N) +* The column scale factors for A. If EQUED = 'C' or 'B', A is +* multiplied on the right by diag(C); if EQUED = 'N' or 'R', C +* is not accessed. C is an input argument if FACT = 'F'; +* otherwise, C is an output argument. If FACT = 'F' and +* EQUED = 'C' or 'B', each element of C must be positive. +* +* B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, +* if EQUED = 'N', B is not modified; +* if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by +* diag(R)*B; +* if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is +* overwritten by diag(C)*B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (output) COMPLEX*16 array, dimension (LDX,NRHS) +* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X +* to the original system of equations. Note that A and B are +* modified on exit if EQUED .ne. 'N', and the solution to the +* equilibrated system is inv(diag(C))*X if TRANS = 'N' and +* EQUED = 'C' or 'B', or inv(diag(R))*X if TRANS = 'T' or 'C' +* and EQUED = 'R' or 'B'. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* RCOND (output) DOUBLE PRECISION +* The estimate of the reciprocal condition number of the matrix +* A after equilibration (if done). If RCOND is less than the +* machine precision (in particular, if RCOND = 0), the matrix +* is singular to working precision. This condition is +* indicated by a return code of INFO > 0. +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* RWORK (workspace/output) DOUBLE PRECISION array, dimension (N) +* On exit, RWORK(1) contains the reciprocal pivot growth +* factor norm(A)/norm(U). The "max absolute element" norm is +* used. If RWORK(1) is much less than 1, then the stability +* of the LU factorization of the (equilibrated) matrix A +* could be poor. This also means that the solution X, condition +* estimator RCOND, and forward error bound FERR could be +* unreliable. If factorization fails with 0 0: if INFO = i, and i is +* <= N: U(i,i) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, so the solution and error bounds +* could not be computed. RCOND = 0 is returned. +* = N+1: U is nonsingular, but RCOND is less than machine +* precision, meaning that the matrix is singular +* to working precision. Nevertheless, the +* solution and error bounds are computed because +* there are a number of situations where the +* computed solution can be more accurate than the +* value of RCOND would suggest. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL COLEQU, EQUIL, NOFACT, NOTRAN, ROWEQU + CHARACTER NORM + INTEGER I, INFEQU, J, J1, J2 + DOUBLE PRECISION AMAX, ANORM, BIGNUM, COLCND, RCMAX, RCMIN, + $ ROWCND, RPVGRW, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANGB, ZLANTB + EXTERNAL LSAME, DLAMCH, ZLANGB, ZLANTB +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZCOPY, ZGBCON, ZGBEQU, ZGBRFS, ZGBTRF, + $ ZGBTRS, ZLACPY, ZLAQGB +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + NOTRAN = LSAME( TRANS, 'N' ) + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + ROWEQU = .FALSE. + COLEQU = .FALSE. + ELSE + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + SMLNUM = DLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + END IF +* +* Test the input parameters. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) ) + $ THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KL.LT.0 ) THEN + INFO = -4 + ELSE IF( KU.LT.0 ) THEN + INFO = -5 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -8 + ELSE IF( LDAFB.LT.2*KL+KU+1 ) THEN + INFO = -10 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( ROWEQU .OR. COLEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -12 + ELSE + IF( ROWEQU ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 10 J = 1, N + RCMIN = MIN( RCMIN, R( J ) ) + RCMAX = MAX( RCMAX, R( J ) ) + 10 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -13 + ELSE IF( N.GT.0 ) THEN + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + ROWCND = ONE + END IF + END IF + IF( COLEQU .AND. INFO.EQ.0 ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 20 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 20 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -14 + ELSE IF( N.GT.0 ) THEN + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + COLCND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -16 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -18 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGBSVX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL ZGBEQU( N, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL ZLAQGB( N, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, EQUED ) + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + END IF + END IF +* +* Scale the right hand side. +* + IF( NOTRAN ) THEN + IF( ROWEQU ) THEN + DO 40 J = 1, NRHS + DO 30 I = 1, N + B( I, J ) = R( I )*B( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( COLEQU ) THEN + DO 60 J = 1, NRHS + DO 50 I = 1, N + B( I, J ) = C( I )*B( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the LU factorization of the band matrix A. +* + DO 70 J = 1, N + J1 = MAX( J-KU, 1 ) + J2 = MIN( J+KL, N ) + CALL ZCOPY( J2-J1+1, AB( KU+1-J+J1, J ), 1, + $ AFB( KL+KU+1-J+J1, J ), 1 ) + 70 CONTINUE +* + CALL ZGBTRF( N, N, KL, KU, AFB, LDAFB, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN + IF( INFO.GT.0 ) THEN +* +* Compute the reciprocal pivot growth factor of the +* leading rank-deficient INFO columns of A. +* + ANORM = ZERO + DO 90 J = 1, INFO + DO 80 I = MAX( KU+2-J, 1 ), + $ MIN( N+KU+1-J, KL+KU+1 ) + ANORM = MAX( ANORM, ABS( AB( I, J ) ) ) + 80 CONTINUE + 90 CONTINUE + RPVGRW = ZLANTB( 'M', 'U', 'N', INFO, + $ MIN( INFO-1, KL+KU ), AFB( MAX( 1, + $ KL+KU+2-INFO ), 1 ), LDAFB, RWORK ) + IF( RPVGRW.EQ.ZERO ) THEN + RPVGRW = ONE + ELSE + RPVGRW = ANORM / RPVGRW + END IF + RWORK( 1 ) = RPVGRW + RCOND = ZERO + END IF + RETURN + END IF + END IF +* +* Compute the norm of the matrix A and the +* reciprocal pivot growth factor RPVGRW. +* + IF( NOTRAN ) THEN + NORM = '1' + ELSE + NORM = 'I' + END IF + ANORM = ZLANGB( NORM, N, KL, KU, AB, LDAB, RWORK ) + RPVGRW = ZLANTB( 'M', 'U', 'N', N, KL+KU, AFB, LDAFB, RWORK ) + IF( RPVGRW.EQ.ZERO ) THEN + RPVGRW = ONE + ELSE + RPVGRW = ZLANGB( 'M', N, KL, KU, AB, LDAB, RWORK ) / RPVGRW + END IF +* +* Compute the reciprocal of the condition number of A. +* + CALL ZGBCON( NORM, N, KL, KU, AFB, LDAFB, IPIV, ANORM, RCOND, + $ WORK, RWORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.DLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* +* Compute the solution matrix X. +* + CALL ZLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL ZGBTRS( TRANS, N, KL, KU, NRHS, AFB, LDAFB, IPIV, X, LDX, + $ INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL ZGBRFS( TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, LDAFB, IPIV, + $ B, LDB, X, LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* Transform the solution matrix X to a solution of the original +* system. +* + IF( NOTRAN ) THEN + IF( COLEQU ) THEN + DO 110 J = 1, NRHS + DO 100 I = 1, N + X( I, J ) = C( I )*X( I, J ) + 100 CONTINUE + 110 CONTINUE + DO 120 J = 1, NRHS + FERR( J ) = FERR( J ) / COLCND + 120 CONTINUE + END IF + ELSE IF( ROWEQU ) THEN + DO 140 J = 1, NRHS + DO 130 I = 1, N + X( I, J ) = R( I )*X( I, J ) + 130 CONTINUE + 140 CONTINUE + DO 150 J = 1, NRHS + FERR( J ) = FERR( J ) / ROWCND + 150 CONTINUE + END IF +* + RWORK( 1 ) = RPVGRW + RETURN +* +* End of ZGBSVX +* + END diff --git a/costa/native/external/lapack/zgbtf2.f b/costa/native/external/lapack/zgbtf2.f new file mode 100644 index 000000000..ca7fe23ad --- /dev/null +++ b/costa/native/external/lapack/zgbtf2.f @@ -0,0 +1,203 @@ + SUBROUTINE ZGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, KL, KU, LDAB, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* ZGBTF2 computes an LU factorization of a complex m-by-n band matrix +* A using partial pivoting with row interchanges. +* +* This is the unblocked version of the algorithm, calling Level 2 BLAS. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* AB (input/output) COMPLEX*16 array, dimension (LDAB,N) +* On entry, the matrix A in band storage, in rows KL+1 to +* 2*KL+KU+1; rows 1 to KL of the array need not be set. +* The j-th column of A is stored in the j-th column of the +* array AB as follows: +* AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) +* +* On exit, details of the factorization: U is stored as an +* upper triangular band matrix with KL+KU superdiagonals in +* rows 1 to KL+KU+1, and the multipliers used during the +* factorization are stored in rows KL+KU+2 to 2*KL+KU+1. +* See below for further details. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +* +* IPIV (output) INTEGER array, dimension (min(M,N)) +* The pivot indices; for 1 <= i <= min(M,N), row i of the +* matrix was interchanged with row IPIV(i). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = +i, U(i,i) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, and division by zero will occur if it is used +* to solve a system of equations. +* +* Further Details +* =============== +* +* The band storage scheme is illustrated by the following example, when +* M = N = 6, KL = 2, KU = 1: +* +* On entry: On exit: +* +* * * * + + + * * * u14 u25 u36 +* * * + + + + * * u13 u24 u35 u46 +* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +* a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * +* a31 a42 a53 a64 * * m31 m42 m53 m64 * * +* +* Array elements marked * are not used by the routine; elements marked +* + need not be set on entry, but are required by the routine to store +* elements of U, because of fill-in resulting from the row +* interchanges. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, J, JP, JU, KM, KV +* .. +* .. External Functions .. + INTEGER IZAMAX + EXTERNAL IZAMAX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGERU, ZSCAL, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* KV is the number of superdiagonals in the factor U, allowing for +* fill-in. +* + KV = KU + KL +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KL+KV+1 ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGBTF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Gaussian elimination with partial pivoting +* +* Set fill-in elements in columns KU+2 to KV to zero. +* + DO 20 J = KU + 2, MIN( KV, N ) + DO 10 I = KV - J + 2, KL + AB( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* JU is the index of the last column affected by the current stage +* of the factorization. +* + JU = 1 +* + DO 40 J = 1, MIN( M, N ) +* +* Set fill-in elements in column J+KV to zero. +* + IF( J+KV.LE.N ) THEN + DO 30 I = 1, KL + AB( I, J+KV ) = ZERO + 30 CONTINUE + END IF +* +* Find pivot and test for singularity. KM is the number of +* subdiagonal elements in the current column. +* + KM = MIN( KL, M-J ) + JP = IZAMAX( KM+1, AB( KV+1, J ), 1 ) + IPIV( J ) = JP + J - 1 + IF( AB( KV+JP, J ).NE.ZERO ) THEN + JU = MAX( JU, MIN( J+KU+JP-1, N ) ) +* +* Apply interchange to columns J to JU. +* + IF( JP.NE.1 ) + $ CALL ZSWAP( JU-J+1, AB( KV+JP, J ), LDAB-1, + $ AB( KV+1, J ), LDAB-1 ) + IF( KM.GT.0 ) THEN +* +* Compute multipliers. +* + CALL ZSCAL( KM, ONE / AB( KV+1, J ), AB( KV+2, J ), 1 ) +* +* Update trailing submatrix within the band. +* + IF( JU.GT.J ) + $ CALL ZGERU( KM, JU-J, -ONE, AB( KV+2, J ), 1, + $ AB( KV, J+1 ), LDAB-1, AB( KV+1, J+1 ), + $ LDAB-1 ) + END IF + ELSE +* +* If pivot is zero, set INFO to the index of the pivot +* unless a zero pivot has already been found. +* + IF( INFO.EQ.0 ) + $ INFO = J + END IF + 40 CONTINUE + RETURN +* +* End of ZGBTF2 +* + END diff --git a/costa/native/external/lapack/zgbtrf.f b/costa/native/external/lapack/zgbtrf.f new file mode 100644 index 000000000..b76a481c5 --- /dev/null +++ b/costa/native/external/lapack/zgbtrf.f @@ -0,0 +1,443 @@ + SUBROUTINE ZGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, KL, KU, LDAB, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* ZGBTRF computes an LU factorization of a complex m-by-n band matrix A +* using partial pivoting with row interchanges. +* +* This is the blocked version of the algorithm, calling Level 3 BLAS. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* AB (input/output) COMPLEX*16 array, dimension (LDAB,N) +* On entry, the matrix A in band storage, in rows KL+1 to +* 2*KL+KU+1; rows 1 to KL of the array need not be set. +* The j-th column of A is stored in the j-th column of the +* array AB as follows: +* AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) +* +* On exit, details of the factorization: U is stored as an +* upper triangular band matrix with KL+KU superdiagonals in +* rows 1 to KL+KU+1, and the multipliers used during the +* factorization are stored in rows KL+KU+2 to 2*KL+KU+1. +* See below for further details. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +* +* IPIV (output) INTEGER array, dimension (min(M,N)) +* The pivot indices; for 1 <= i <= min(M,N), row i of the +* matrix was interchanged with row IPIV(i). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = +i, U(i,i) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, and division by zero will occur if it is used +* to solve a system of equations. +* +* Further Details +* =============== +* +* The band storage scheme is illustrated by the following example, when +* M = N = 6, KL = 2, KU = 1: +* +* On entry: On exit: +* +* * * * + + + * * * u14 u25 u36 +* * * + + + + * * u13 u24 u35 u46 +* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +* a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * +* a31 a42 a53 a64 * * m31 m42 m53 m64 * * +* +* Array elements marked * are not used by the routine; elements marked +* + need not be set on entry, but are required by the routine to store +* elements of U because of fill-in resulting from the row interchanges. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) + INTEGER NBMAX, LDWORK + PARAMETER ( NBMAX = 64, LDWORK = NBMAX+1 ) +* .. +* .. Local Scalars .. + INTEGER I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP, + $ JU, K2, KM, KV, NB, NW + COMPLEX*16 TEMP +* .. +* .. Local Arrays .. + COMPLEX*16 WORK13( LDWORK, NBMAX ), + $ WORK31( LDWORK, NBMAX ) +* .. +* .. External Functions .. + INTEGER ILAENV, IZAMAX + EXTERNAL ILAENV, IZAMAX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZCOPY, ZGBTF2, ZGEMM, ZGERU, ZLASWP, + $ ZSCAL, ZSWAP, ZTRSM +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* KV is the number of superdiagonals in the factor U, allowing for +* fill-in +* + KV = KU + KL +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KL+KV+1 ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGBTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment +* + NB = ILAENV( 1, 'ZGBTRF', ' ', M, N, KL, KU ) +* +* The block size must not exceed the limit set by the size of the +* local arrays WORK13 and WORK31. +* + NB = MIN( NB, NBMAX ) +* + IF( NB.LE.1 .OR. NB.GT.KL ) THEN +* +* Use unblocked code +* + CALL ZGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO ) + ELSE +* +* Use blocked code +* +* Zero the superdiagonal elements of the work array WORK13 +* + DO 20 J = 1, NB + DO 10 I = 1, J - 1 + WORK13( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* Zero the subdiagonal elements of the work array WORK31 +* + DO 40 J = 1, NB + DO 30 I = J + 1, NB + WORK31( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE +* +* Gaussian elimination with partial pivoting +* +* Set fill-in elements in columns KU+2 to KV to zero +* + DO 60 J = KU + 2, MIN( KV, N ) + DO 50 I = KV - J + 2, KL + AB( I, J ) = ZERO + 50 CONTINUE + 60 CONTINUE +* +* JU is the index of the last column affected by the current +* stage of the factorization +* + JU = 1 +* + DO 180 J = 1, MIN( M, N ), NB + JB = MIN( NB, MIN( M, N )-J+1 ) +* +* The active part of the matrix is partitioned +* +* A11 A12 A13 +* A21 A22 A23 +* A31 A32 A33 +* +* Here A11, A21 and A31 denote the current block of JB columns +* which is about to be factorized. The number of rows in the +* partitioning are JB, I2, I3 respectively, and the numbers +* of columns are JB, J2, J3. The superdiagonal elements of A13 +* and the subdiagonal elements of A31 lie outside the band. +* + I2 = MIN( KL-JB, M-J-JB+1 ) + I3 = MIN( JB, M-J-KL+1 ) +* +* J2 and J3 are computed after JU has been updated. +* +* Factorize the current block of JB columns +* + DO 80 JJ = J, J + JB - 1 +* +* Set fill-in elements in column JJ+KV to zero +* + IF( JJ+KV.LE.N ) THEN + DO 70 I = 1, KL + AB( I, JJ+KV ) = ZERO + 70 CONTINUE + END IF +* +* Find pivot and test for singularity. KM is the number of +* subdiagonal elements in the current column. +* + KM = MIN( KL, M-JJ ) + JP = IZAMAX( KM+1, AB( KV+1, JJ ), 1 ) + IPIV( JJ ) = JP + JJ - J + IF( AB( KV+JP, JJ ).NE.ZERO ) THEN + JU = MAX( JU, MIN( JJ+KU+JP-1, N ) ) + IF( JP.NE.1 ) THEN +* +* Apply interchange to columns J to J+JB-1 +* + IF( JP+JJ-1.LT.J+KL ) THEN +* + CALL ZSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1, + $ AB( KV+JP+JJ-J, J ), LDAB-1 ) + ELSE +* +* The interchange affects columns J to JJ-1 of A31 +* which are stored in the work array WORK31 +* + CALL ZSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1, + $ WORK31( JP+JJ-J-KL, 1 ), LDWORK ) + CALL ZSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1, + $ AB( KV+JP, JJ ), LDAB-1 ) + END IF + END IF +* +* Compute multipliers +* + CALL ZSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ), + $ 1 ) +* +* Update trailing submatrix within the band and within +* the current block. JM is the index of the last column +* which needs to be updated. +* + JM = MIN( JU, J+JB-1 ) + IF( JM.GT.JJ ) + $ CALL ZGERU( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1, + $ AB( KV, JJ+1 ), LDAB-1, + $ AB( KV+1, JJ+1 ), LDAB-1 ) + ELSE +* +* If pivot is zero, set INFO to the index of the pivot +* unless a zero pivot has already been found. +* + IF( INFO.EQ.0 ) + $ INFO = JJ + END IF +* +* Copy current column of A31 into the work array WORK31 +* + NW = MIN( JJ-J+1, I3 ) + IF( NW.GT.0 ) + $ CALL ZCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1, + $ WORK31( 1, JJ-J+1 ), 1 ) + 80 CONTINUE + IF( J+JB.LE.N ) THEN +* +* Apply the row interchanges to the other blocks. +* + J2 = MIN( JU-J+1, KV ) - JB + J3 = MAX( 0, JU-J-KV+1 ) +* +* Use ZLASWP to apply the row interchanges to A12, A22, and +* A32. +* + CALL ZLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB, + $ IPIV( J ), 1 ) +* +* Adjust the pivot indices. +* + DO 90 I = J, J + JB - 1 + IPIV( I ) = IPIV( I ) + J - 1 + 90 CONTINUE +* +* Apply the row interchanges to A13, A23, and A33 +* columnwise. +* + K2 = J - 1 + JB + J2 + DO 110 I = 1, J3 + JJ = K2 + I + DO 100 II = J + I - 1, J + JB - 1 + IP = IPIV( II ) + IF( IP.NE.II ) THEN + TEMP = AB( KV+1+II-JJ, JJ ) + AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ ) + AB( KV+1+IP-JJ, JJ ) = TEMP + END IF + 100 CONTINUE + 110 CONTINUE +* +* Update the relevant part of the trailing submatrix +* + IF( J2.GT.0 ) THEN +* +* Update A12 +* + CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit', + $ JB, J2, ONE, AB( KV+1, J ), LDAB-1, + $ AB( KV+1-JB, J+JB ), LDAB-1 ) +* + IF( I2.GT.0 ) THEN +* +* Update A22 +* + CALL ZGEMM( 'No transpose', 'No transpose', I2, J2, + $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1, + $ AB( KV+1-JB, J+JB ), LDAB-1, ONE, + $ AB( KV+1, J+JB ), LDAB-1 ) + END IF +* + IF( I3.GT.0 ) THEN +* +* Update A32 +* + CALL ZGEMM( 'No transpose', 'No transpose', I3, J2, + $ JB, -ONE, WORK31, LDWORK, + $ AB( KV+1-JB, J+JB ), LDAB-1, ONE, + $ AB( KV+KL+1-JB, J+JB ), LDAB-1 ) + END IF + END IF +* + IF( J3.GT.0 ) THEN +* +* Copy the lower triangle of A13 into the work array +* WORK13 +* + DO 130 JJ = 1, J3 + DO 120 II = JJ, JB + WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 ) + 120 CONTINUE + 130 CONTINUE +* +* Update A13 in the work array +* + CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit', + $ JB, J3, ONE, AB( KV+1, J ), LDAB-1, + $ WORK13, LDWORK ) +* + IF( I2.GT.0 ) THEN +* +* Update A23 +* + CALL ZGEMM( 'No transpose', 'No transpose', I2, J3, + $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1, + $ WORK13, LDWORK, ONE, AB( 1+JB, J+KV ), + $ LDAB-1 ) + END IF +* + IF( I3.GT.0 ) THEN +* +* Update A33 +* + CALL ZGEMM( 'No transpose', 'No transpose', I3, J3, + $ JB, -ONE, WORK31, LDWORK, WORK13, + $ LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 ) + END IF +* +* Copy the lower triangle of A13 back into place +* + DO 150 JJ = 1, J3 + DO 140 II = JJ, JB + AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ ) + 140 CONTINUE + 150 CONTINUE + END IF + ELSE +* +* Adjust the pivot indices. +* + DO 160 I = J, J + JB - 1 + IPIV( I ) = IPIV( I ) + J - 1 + 160 CONTINUE + END IF +* +* Partially undo the interchanges in the current block to +* restore the upper triangular form of A31 and copy the upper +* triangle of A31 back into place +* + DO 170 JJ = J + JB - 1, J, -1 + JP = IPIV( JJ ) - JJ + 1 + IF( JP.NE.1 ) THEN +* +* Apply interchange to columns J to JJ-1 +* + IF( JP+JJ-1.LT.J+KL ) THEN +* +* The interchange does not affect A31 +* + CALL ZSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1, + $ AB( KV+JP+JJ-J, J ), LDAB-1 ) + ELSE +* +* The interchange does affect A31 +* + CALL ZSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1, + $ WORK31( JP+JJ-J-KL, 1 ), LDWORK ) + END IF + END IF +* +* Copy the current column of A31 back into place +* + NW = MIN( I3, JJ-J+1 ) + IF( NW.GT.0 ) + $ CALL ZCOPY( NW, WORK31( 1, JJ-J+1 ), 1, + $ AB( KV+KL+1-JJ+J, JJ ), 1 ) + 170 CONTINUE + 180 CONTINUE + END IF +* + RETURN +* +* End of ZGBTRF +* + END diff --git a/costa/native/external/lapack/zgbtrs.f b/costa/native/external/lapack/zgbtrs.f new file mode 100644 index 000000000..49984e89e --- /dev/null +++ b/costa/native/external/lapack/zgbtrs.f @@ -0,0 +1,215 @@ + SUBROUTINE ZGBTRS( TRANS, N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, KL, KU, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 AB( LDAB, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* ZGBTRS solves a system of linear equations +* A * X = B, A**T * X = B, or A**H * X = B +* with a general band matrix A using the LU factorization computed +* by ZGBTRF. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations. +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose) +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AB (input) COMPLEX*16 array, dimension (LDAB,N) +* Details of the LU factorization of the band matrix A, as +* computed by ZGBTRF. U is stored as an upper triangular band +* matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and +* the multipliers used during the factorization are stored in +* rows KL+KU+2 to 2*KL+KU+1. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices; for 1 <= i <= N, row i of the matrix was +* interchanged with row IPIV(i). +* +* B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LNOTI, NOTRAN + INTEGER I, J, KD, L, LM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEMV, ZGERU, ZLACGV, ZSWAP, ZTBSV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.( 2*KL+KU+1 ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGBTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + KD = KU + KL + 1 + LNOTI = KL.GT.0 +* + IF( NOTRAN ) THEN +* +* Solve A*X = B. +* +* Solve L*X = B, overwriting B with X. +* +* L is represented as a product of permutations and unit lower +* triangular matrices L = P(1) * L(1) * ... * P(n-1) * L(n-1), +* where each transformation L(i) is a rank-one modification of +* the identity matrix. +* + IF( LNOTI ) THEN + DO 10 J = 1, N - 1 + LM = MIN( KL, N-J ) + L = IPIV( J ) + IF( L.NE.J ) + $ CALL ZSWAP( NRHS, B( L, 1 ), LDB, B( J, 1 ), LDB ) + CALL ZGERU( LM, NRHS, -ONE, AB( KD+1, J ), 1, B( J, 1 ), + $ LDB, B( J+1, 1 ), LDB ) + 10 CONTINUE + END IF +* + DO 20 I = 1, NRHS +* +* Solve U*X = B, overwriting B with X. +* + CALL ZTBSV( 'Upper', 'No transpose', 'Non-unit', N, KL+KU, + $ AB, LDAB, B( 1, I ), 1 ) + 20 CONTINUE +* + ELSE IF( LSAME( TRANS, 'T' ) ) THEN +* +* Solve A**T * X = B. +* + DO 30 I = 1, NRHS +* +* Solve U**T * X = B, overwriting B with X. +* + CALL ZTBSV( 'Upper', 'Transpose', 'Non-unit', N, KL+KU, AB, + $ LDAB, B( 1, I ), 1 ) + 30 CONTINUE +* +* Solve L**T * X = B, overwriting B with X. +* + IF( LNOTI ) THEN + DO 40 J = N - 1, 1, -1 + LM = MIN( KL, N-J ) + CALL ZGEMV( 'Transpose', LM, NRHS, -ONE, B( J+1, 1 ), + $ LDB, AB( KD+1, J ), 1, ONE, B( J, 1 ), LDB ) + L = IPIV( J ) + IF( L.NE.J ) + $ CALL ZSWAP( NRHS, B( L, 1 ), LDB, B( J, 1 ), LDB ) + 40 CONTINUE + END IF +* + ELSE +* +* Solve A**H * X = B. +* + DO 50 I = 1, NRHS +* +* Solve U**H * X = B, overwriting B with X. +* + CALL ZTBSV( 'Upper', 'Conjugate transpose', 'Non-unit', N, + $ KL+KU, AB, LDAB, B( 1, I ), 1 ) + 50 CONTINUE +* +* Solve L**H * X = B, overwriting B with X. +* + IF( LNOTI ) THEN + DO 60 J = N - 1, 1, -1 + LM = MIN( KL, N-J ) + CALL ZLACGV( NRHS, B( J, 1 ), LDB ) + CALL ZGEMV( 'Conjugate transpose', LM, NRHS, -ONE, + $ B( J+1, 1 ), LDB, AB( KD+1, J ), 1, ONE, + $ B( J, 1 ), LDB ) + CALL ZLACGV( NRHS, B( J, 1 ), LDB ) + L = IPIV( J ) + IF( L.NE.J ) + $ CALL ZSWAP( NRHS, B( L, 1 ), LDB, B( J, 1 ), LDB ) + 60 CONTINUE + END IF + END IF + RETURN +* +* End of ZGBTRS +* + END diff --git a/costa/native/external/lapack/zgebak.f b/costa/native/external/lapack/zgebak.f new file mode 100644 index 000000000..e87616905 --- /dev/null +++ b/costa/native/external/lapack/zgebak.f @@ -0,0 +1,190 @@ + SUBROUTINE ZGEBAK( JOB, SIDE, N, ILO, IHI, SCALE, M, V, LDV, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER JOB, SIDE + INTEGER IHI, ILO, INFO, LDV, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION SCALE( * ) + COMPLEX*16 V( LDV, * ) +* .. +* +* Purpose +* ======= +* +* ZGEBAK forms the right or left eigenvectors of a complex general +* matrix by backward transformation on the computed eigenvectors of the +* balanced matrix output by ZGEBAL. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* Specifies the type of backward transformation required: +* = 'N', do nothing, return immediately; +* = 'P', do backward transformation for permutation only; +* = 'S', do backward transformation for scaling only; +* = 'B', do backward transformations for both permutation and +* scaling. +* JOB must be the same as the argument JOB supplied to ZGEBAL. +* +* SIDE (input) CHARACTER*1 +* = 'R': V contains right eigenvectors; +* = 'L': V contains left eigenvectors. +* +* N (input) INTEGER +* The number of rows of the matrix V. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* The integers ILO and IHI determined by ZGEBAL. +* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +* +* SCALE (input) DOUBLE PRECISION array, dimension (N) +* Details of the permutation and scaling factors, as returned +* by ZGEBAL. +* +* M (input) INTEGER +* The number of columns of the matrix V. M >= 0. +* +* V (input/output) COMPLEX*16 array, dimension (LDV,M) +* On entry, the matrix of right or left eigenvectors to be +* transformed, as returned by ZHSEIN or ZTREVC. +* On exit, V is overwritten by the transformed eigenvectors. +* +* LDV (input) INTEGER +* The leading dimension of the array V. LDV >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LEFTV, RIGHTV + INTEGER I, II, K + DOUBLE PRECISION S +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDSCAL, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Decode and Test the input parameters +* + RIGHTV = LSAME( SIDE, 'R' ) + LEFTV = LSAME( SIDE, 'L' ) +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND. + $ .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN + INFO = -1 + ELSE IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -5 + ELSE IF( M.LT.0 ) THEN + INFO = -7 + ELSE IF( LDV.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEBAK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + IF( M.EQ.0 ) + $ RETURN + IF( LSAME( JOB, 'N' ) ) + $ RETURN +* + IF( ILO.EQ.IHI ) + $ GO TO 30 +* +* Backward balance +* + IF( LSAME( JOB, 'S' ) .OR. LSAME( JOB, 'B' ) ) THEN +* + IF( RIGHTV ) THEN + DO 10 I = ILO, IHI + S = SCALE( I ) + CALL ZDSCAL( M, S, V( I, 1 ), LDV ) + 10 CONTINUE + END IF +* + IF( LEFTV ) THEN + DO 20 I = ILO, IHI + S = ONE / SCALE( I ) + CALL ZDSCAL( M, S, V( I, 1 ), LDV ) + 20 CONTINUE + END IF +* + END IF +* +* Backward permutation +* +* For I = ILO-1 step -1 until 1, +* IHI+1 step 1 until N do -- +* + 30 CONTINUE + IF( LSAME( JOB, 'P' ) .OR. LSAME( JOB, 'B' ) ) THEN + IF( RIGHTV ) THEN + DO 40 II = 1, N + I = II + IF( I.GE.ILO .AND. I.LE.IHI ) + $ GO TO 40 + IF( I.LT.ILO ) + $ I = ILO - II + K = SCALE( I ) + IF( K.EQ.I ) + $ GO TO 40 + CALL ZSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 40 CONTINUE + END IF +* + IF( LEFTV ) THEN + DO 50 II = 1, N + I = II + IF( I.GE.ILO .AND. I.LE.IHI ) + $ GO TO 50 + IF( I.LT.ILO ) + $ I = ILO - II + K = SCALE( I ) + IF( K.EQ.I ) + $ GO TO 50 + CALL ZSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 50 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZGEBAK +* + END diff --git a/costa/native/external/lapack/zgebal.f b/costa/native/external/lapack/zgebal.f new file mode 100644 index 000000000..562abc7e3 --- /dev/null +++ b/costa/native/external/lapack/zgebal.f @@ -0,0 +1,331 @@ + SUBROUTINE ZGEBAL( JOB, N, A, LDA, ILO, IHI, SCALE, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOB + INTEGER IHI, ILO, INFO, LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION SCALE( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* ZGEBAL balances a general complex matrix A. This involves, first, +* permuting A by a similarity transformation to isolate eigenvalues +* in the first 1 to ILO-1 and last IHI+1 to N elements on the +* diagonal; and second, applying a diagonal similarity transformation +* to rows and columns ILO to IHI to make the rows and columns as +* close in norm as possible. Both steps are optional. +* +* Balancing may reduce the 1-norm of the matrix, and improve the +* accuracy of the computed eigenvalues and/or eigenvectors. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* Specifies the operations to be performed on A: +* = 'N': none: simply set ILO = 1, IHI = N, SCALE(I) = 1.0 +* for i = 1,...,N; +* = 'P': permute only; +* = 'S': scale only; +* = 'B': both permute and scale. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the input matrix A. +* On exit, A is overwritten by the balanced matrix. +* If JOB = 'N', A is not referenced. +* See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* ILO (output) INTEGER +* IHI (output) INTEGER +* ILO and IHI are set to integers such that on exit +* A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N. +* If JOB = 'N' or 'S', ILO = 1 and IHI = N. +* +* SCALE (output) DOUBLE PRECISION array, dimension (N) +* Details of the permutations and scaling factors applied to +* A. If P(j) is the index of the row and column interchanged +* with row and column j and D(j) is the scaling factor +* applied to row and column j, then +* SCALE(j) = P(j) for j = 1,...,ILO-1 +* = D(j) for j = ILO,...,IHI +* = P(j) for j = IHI+1,...,N. +* The order in which the interchanges are made is N to IHI+1, +* then 1 to ILO-1. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The permutations consist of row and column interchanges which put +* the matrix in the form +* +* ( T1 X Y ) +* P A P = ( 0 B Z ) +* ( 0 0 T2 ) +* +* where T1 and T2 are upper triangular matrices whose eigenvalues lie +* along the diagonal. The column indices ILO and IHI mark the starting +* and ending columns of the submatrix B. Balancing consists of applying +* a diagonal similarity transformation inv(D) * B * D to make the +* 1-norms of each row of B and its corresponding column nearly equal. +* The output matrix is +* +* ( T1 X*D Y ) +* ( 0 inv(D)*B*D inv(D)*Z ). +* ( 0 0 T2 ) +* +* Information about the permutations P and the diagonal matrix D is +* returned in the vector SCALE. +* +* This subroutine is based on the EISPACK routine CBAL. +* +* Modified by Tzu-Yi Chen, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION SCLFAC + PARAMETER ( SCLFAC = 0.8D+1 ) + DOUBLE PRECISION FACTOR + PARAMETER ( FACTOR = 0.95D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOCONV + INTEGER I, ICA, IEXC, IRA, J, K, L, M + DOUBLE PRECISION C, CA, F, G, R, RA, S, SFMAX1, SFMAX2, SFMIN1, + $ SFMIN2 + COMPLEX*16 CDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, IZAMAX, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDSCAL, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX, MIN +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND. + $ .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEBAL', -INFO ) + RETURN + END IF +* + K = 1 + L = N +* + IF( N.EQ.0 ) + $ GO TO 210 +* + IF( LSAME( JOB, 'N' ) ) THEN + DO 10 I = 1, N + SCALE( I ) = ONE + 10 CONTINUE + GO TO 210 + END IF +* + IF( LSAME( JOB, 'S' ) ) + $ GO TO 120 +* +* Permutation to isolate eigenvalues if possible +* + GO TO 50 +* +* Row and column exchange. +* + 20 CONTINUE + SCALE( M ) = J + IF( J.EQ.M ) + $ GO TO 30 +* + CALL ZSWAP( L, A( 1, J ), 1, A( 1, M ), 1 ) + CALL ZSWAP( N-K+1, A( J, K ), LDA, A( M, K ), LDA ) +* + 30 CONTINUE + GO TO ( 40, 80 )IEXC +* +* Search for rows isolating an eigenvalue and push them down. +* + 40 CONTINUE + IF( L.EQ.1 ) + $ GO TO 210 + L = L - 1 +* + 50 CONTINUE + DO 70 J = L, 1, -1 +* + DO 60 I = 1, L + IF( I.EQ.J ) + $ GO TO 60 + IF( DBLE( A( J, I ) ).NE.ZERO .OR. DIMAG( A( J, I ) ).NE. + $ ZERO )GO TO 70 + 60 CONTINUE +* + M = L + IEXC = 1 + GO TO 20 + 70 CONTINUE +* + GO TO 90 +* +* Search for columns isolating an eigenvalue and push them left. +* + 80 CONTINUE + K = K + 1 +* + 90 CONTINUE + DO 110 J = K, L +* + DO 100 I = K, L + IF( I.EQ.J ) + $ GO TO 100 + IF( DBLE( A( I, J ) ).NE.ZERO .OR. DIMAG( A( I, J ) ).NE. + $ ZERO )GO TO 110 + 100 CONTINUE +* + M = K + IEXC = 2 + GO TO 20 + 110 CONTINUE +* + 120 CONTINUE + DO 130 I = K, L + SCALE( I ) = ONE + 130 CONTINUE +* + IF( LSAME( JOB, 'P' ) ) + $ GO TO 210 +* +* Balance the submatrix in rows K to L. +* +* Iterative loop for norm reduction +* + SFMIN1 = DLAMCH( 'S' ) / DLAMCH( 'P' ) + SFMAX1 = ONE / SFMIN1 + SFMIN2 = SFMIN1*SCLFAC + SFMAX2 = ONE / SFMIN2 + 140 CONTINUE + NOCONV = .FALSE. +* + DO 200 I = K, L + C = ZERO + R = ZERO +* + DO 150 J = K, L + IF( J.EQ.I ) + $ GO TO 150 + C = C + CABS1( A( J, I ) ) + R = R + CABS1( A( I, J ) ) + 150 CONTINUE + ICA = IZAMAX( L, A( 1, I ), 1 ) + CA = ABS( A( ICA, I ) ) + IRA = IZAMAX( N-K+1, A( I, K ), LDA ) + RA = ABS( A( I, IRA+K-1 ) ) +* +* Guard against zero C or R due to underflow. +* + IF( C.EQ.ZERO .OR. R.EQ.ZERO ) + $ GO TO 200 + G = R / SCLFAC + F = ONE + S = C + R + 160 CONTINUE + IF( C.GE.G .OR. MAX( F, C, CA ).GE.SFMAX2 .OR. + $ MIN( R, G, RA ).LE.SFMIN2 )GO TO 170 + F = F*SCLFAC + C = C*SCLFAC + CA = CA*SCLFAC + R = R / SCLFAC + G = G / SCLFAC + RA = RA / SCLFAC + GO TO 160 +* + 170 CONTINUE + G = C / SCLFAC + 180 CONTINUE + IF( G.LT.R .OR. MAX( R, RA ).GE.SFMAX2 .OR. + $ MIN( F, C, G, CA ).LE.SFMIN2 )GO TO 190 + F = F / SCLFAC + C = C / SCLFAC + G = G / SCLFAC + CA = CA / SCLFAC + R = R*SCLFAC + RA = RA*SCLFAC + GO TO 180 +* +* Now balance. +* + 190 CONTINUE + IF( ( C+R ).GE.FACTOR*S ) + $ GO TO 200 + IF( F.LT.ONE .AND. SCALE( I ).LT.ONE ) THEN + IF( F*SCALE( I ).LE.SFMIN1 ) + $ GO TO 200 + END IF + IF( F.GT.ONE .AND. SCALE( I ).GT.ONE ) THEN + IF( SCALE( I ).GE.SFMAX1 / F ) + $ GO TO 200 + END IF + G = ONE / F + SCALE( I ) = SCALE( I )*F + NOCONV = .TRUE. +* + CALL ZDSCAL( N-K+1, G, A( I, K ), LDA ) + CALL ZDSCAL( L, F, A( 1, I ), 1 ) +* + 200 CONTINUE +* + IF( NOCONV ) + $ GO TO 140 +* + 210 CONTINUE + ILO = K + IHI = L +* + RETURN +* +* End of ZGEBAL +* + END diff --git a/costa/native/external/lapack/zgebd2.f b/costa/native/external/lapack/zgebd2.f new file mode 100644 index 000000000..5552fcf17 --- /dev/null +++ b/costa/native/external/lapack/zgebd2.f @@ -0,0 +1,249 @@ + SUBROUTINE ZGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ) + COMPLEX*16 A( LDA, * ), TAUP( * ), TAUQ( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZGEBD2 reduces a complex general m by n matrix A to upper or lower +* real bidiagonal form B by a unitary transformation: Q' * A * P = B. +* +* If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows in the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns in the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the m by n general matrix to be reduced. +* On exit, +* if m >= n, the diagonal and the first superdiagonal are +* overwritten with the upper bidiagonal matrix B; the +* elements below the diagonal, with the array TAUQ, represent +* the unitary matrix Q as a product of elementary +* reflectors, and the elements above the first superdiagonal, +* with the array TAUP, represent the unitary matrix P as +* a product of elementary reflectors; +* if m < n, the diagonal and the first subdiagonal are +* overwritten with the lower bidiagonal matrix B; the +* elements below the first subdiagonal, with the array TAUQ, +* represent the unitary matrix Q as a product of +* elementary reflectors, and the elements above the diagonal, +* with the array TAUP, represent the unitary matrix P as +* a product of elementary reflectors. +* See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* D (output) DOUBLE PRECISION array, dimension (min(M,N)) +* The diagonal elements of the bidiagonal matrix B: +* D(i) = A(i,i). +* +* E (output) DOUBLE PRECISION array, dimension (min(M,N)-1) +* The off-diagonal elements of the bidiagonal matrix B: +* if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; +* if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. +* +* TAUQ (output) COMPLEX*16 array dimension (min(M,N)) +* The scalar factors of the elementary reflectors which +* represent the unitary matrix Q. See Further Details. +* +* TAUP (output) COMPLEX*16 array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors which +* represent the unitary matrix P. See Further Details. +* +* WORK (workspace) COMPLEX*16 array, dimension (max(M,N)) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The matrices Q and P are represented as products of elementary +* reflectors: +* +* If m >= n, +* +* Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) +* +* Each H(i) and G(i) has the form: +* +* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' +* +* where tauq and taup are complex scalars, and v and u are complex +* vectors; v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in +* A(i+1:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in +* A(i,i+2:n); tauq is stored in TAUQ(i) and taup in TAUP(i). +* +* If m < n, +* +* Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) +* +* Each H(i) and G(i) has the form: +* +* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' +* +* where tauq and taup are complex scalars, v and u are complex vectors; +* v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); +* u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); +* tauq is stored in TAUQ(i) and taup in TAUP(i). +* +* The contents of A on exit are illustrated by the following examples: +* +* m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): +* +* ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) +* ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) +* ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) +* ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) +* ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) +* ( v1 v2 v3 v4 v5 ) +* +* where d and e denote diagonal and off-diagonal elements of B, vi +* denotes an element of the vector defining H(i), and ui an element of +* the vector defining G(i). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), + $ ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I + COMPLEX*16 ALPHA +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLACGV, ZLARF, ZLARFG +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.LT.0 ) THEN + CALL XERBLA( 'ZGEBD2', -INFO ) + RETURN + END IF +* + IF( M.GE.N ) THEN +* +* Reduce to upper bidiagonal form +* + DO 10 I = 1, N +* +* Generate elementary reflector H(i) to annihilate A(i+1:m,i) +* + ALPHA = A( I, I ) + CALL ZLARFG( M-I+1, ALPHA, A( MIN( I+1, M ), I ), 1, + $ TAUQ( I ) ) + D( I ) = ALPHA + A( I, I ) = ONE +* +* Apply H(i)' to A(i:m,i+1:n) from the left +* + CALL ZLARF( 'Left', M-I+1, N-I, A( I, I ), 1, + $ DCONJG( TAUQ( I ) ), A( I, I+1 ), LDA, WORK ) + A( I, I ) = D( I ) +* + IF( I.LT.N ) THEN +* +* Generate elementary reflector G(i) to annihilate +* A(i,i+2:n) +* + CALL ZLACGV( N-I, A( I, I+1 ), LDA ) + ALPHA = A( I, I+1 ) + CALL ZLARFG( N-I, ALPHA, A( I, MIN( I+2, N ) ), LDA, + $ TAUP( I ) ) + E( I ) = ALPHA + A( I, I+1 ) = ONE +* +* Apply G(i) to A(i+1:m,i+1:n) from the right +* + CALL ZLARF( 'Right', M-I, N-I, A( I, I+1 ), LDA, + $ TAUP( I ), A( I+1, I+1 ), LDA, WORK ) + CALL ZLACGV( N-I, A( I, I+1 ), LDA ) + A( I, I+1 ) = E( I ) + ELSE + TAUP( I ) = ZERO + END IF + 10 CONTINUE + ELSE +* +* Reduce to lower bidiagonal form +* + DO 20 I = 1, M +* +* Generate elementary reflector G(i) to annihilate A(i,i+1:n) +* + CALL ZLACGV( N-I+1, A( I, I ), LDA ) + ALPHA = A( I, I ) + CALL ZLARFG( N-I+1, ALPHA, A( I, MIN( I+1, N ) ), LDA, + $ TAUP( I ) ) + D( I ) = ALPHA + A( I, I ) = ONE +* +* Apply G(i) to A(i+1:m,i:n) from the right +* + CALL ZLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, TAUP( I ), + $ A( MIN( I+1, M ), I ), LDA, WORK ) + CALL ZLACGV( N-I+1, A( I, I ), LDA ) + A( I, I ) = D( I ) +* + IF( I.LT.M ) THEN +* +* Generate elementary reflector H(i) to annihilate +* A(i+2:m,i) +* + ALPHA = A( I+1, I ) + CALL ZLARFG( M-I, ALPHA, A( MIN( I+2, M ), I ), 1, + $ TAUQ( I ) ) + E( I ) = ALPHA + A( I+1, I ) = ONE +* +* Apply H(i)' to A(i+1:m,i+1:n) from the left +* + CALL ZLARF( 'Left', M-I, N-I, A( I+1, I ), 1, + $ DCONJG( TAUQ( I ) ), A( I+1, I+1 ), LDA, + $ WORK ) + A( I+1, I ) = E( I ) + ELSE + TAUQ( I ) = ZERO + END IF + 20 CONTINUE + END IF + RETURN +* +* End of ZGEBD2 +* + END diff --git a/costa/native/external/lapack/zgebrd.f b/costa/native/external/lapack/zgebrd.f new file mode 100644 index 000000000..34b4ba6be --- /dev/null +++ b/costa/native/external/lapack/zgebrd.f @@ -0,0 +1,269 @@ + SUBROUTINE ZGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ) + COMPLEX*16 A( LDA, * ), TAUP( * ), TAUQ( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZGEBRD reduces a general complex M-by-N matrix A to upper or lower +* bidiagonal form B by a unitary transformation: Q**H * A * P = B. +* +* If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows in the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns in the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the M-by-N general matrix to be reduced. +* On exit, +* if m >= n, the diagonal and the first superdiagonal are +* overwritten with the upper bidiagonal matrix B; the +* elements below the diagonal, with the array TAUQ, represent +* the unitary matrix Q as a product of elementary +* reflectors, and the elements above the first superdiagonal, +* with the array TAUP, represent the unitary matrix P as +* a product of elementary reflectors; +* if m < n, the diagonal and the first subdiagonal are +* overwritten with the lower bidiagonal matrix B; the +* elements below the first subdiagonal, with the array TAUQ, +* represent the unitary matrix Q as a product of +* elementary reflectors, and the elements above the diagonal, +* with the array TAUP, represent the unitary matrix P as +* a product of elementary reflectors. +* See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* D (output) DOUBLE PRECISION array, dimension (min(M,N)) +* The diagonal elements of the bidiagonal matrix B: +* D(i) = A(i,i). +* +* E (output) DOUBLE PRECISION array, dimension (min(M,N)-1) +* The off-diagonal elements of the bidiagonal matrix B: +* if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; +* if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. +* +* TAUQ (output) COMPLEX*16 array dimension (min(M,N)) +* The scalar factors of the elementary reflectors which +* represent the unitary matrix Q. See Further Details. +* +* TAUP (output) COMPLEX*16 array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors which +* represent the unitary matrix P. See Further Details. +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of the array WORK. LWORK >= max(1,M,N). +* For optimum performance LWORK >= (M+N)*NB, where NB +* is the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The matrices Q and P are represented as products of elementary +* reflectors: +* +* If m >= n, +* +* Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) +* +* Each H(i) and G(i) has the form: +* +* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' +* +* where tauq and taup are complex scalars, and v and u are complex +* vectors; v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in +* A(i+1:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in +* A(i,i+2:n); tauq is stored in TAUQ(i) and taup in TAUP(i). +* +* If m < n, +* +* Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) +* +* Each H(i) and G(i) has the form: +* +* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' +* +* where tauq and taup are complex scalars, and v and u are complex +* vectors; v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in +* A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in +* A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). +* +* The contents of A on exit are illustrated by the following examples: +* +* m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): +* +* ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) +* ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) +* ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) +* ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) +* ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) +* ( v1 v2 v3 v4 v5 ) +* +* where d and e denote diagonal and off-diagonal elements of B, vi +* denotes an element of the vector defining H(i), and ui an element of +* the vector defining G(i). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IINFO, J, LDWRKX, LDWRKY, LWKOPT, MINMN, NB, + $ NBMIN, NX + DOUBLE PRECISION WS +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEBD2, ZGEMM, ZLABRD +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NB = MAX( 1, ILAENV( 1, 'ZGEBRD', ' ', M, N, -1, -1 ) ) + LWKOPT = ( M+N )*NB + WORK( 1 ) = DBLE( LWKOPT ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, M, N ) .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF + IF( INFO.LT.0 ) THEN + CALL XERBLA( 'ZGEBRD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + MINMN = MIN( M, N ) + IF( MINMN.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + WS = MAX( M, N ) + LDWRKX = M + LDWRKY = N +* + IF( NB.GT.1 .AND. NB.LT.MINMN ) THEN +* +* Set the crossover point NX. +* + NX = MAX( NB, ILAENV( 3, 'ZGEBRD', ' ', M, N, -1, -1 ) ) +* +* Determine when to switch from blocked to unblocked code. +* + IF( NX.LT.MINMN ) THEN + WS = ( M+N )*NB + IF( LWORK.LT.WS ) THEN +* +* Not enough work space for the optimal NB, consider using +* a smaller block size. +* + NBMIN = ILAENV( 2, 'ZGEBRD', ' ', M, N, -1, -1 ) + IF( LWORK.GE.( M+N )*NBMIN ) THEN + NB = LWORK / ( M+N ) + ELSE + NB = 1 + NX = MINMN + END IF + END IF + END IF + ELSE + NX = MINMN + END IF +* + DO 30 I = 1, MINMN - NX, NB +* +* Reduce rows and columns i:i+ib-1 to bidiagonal form and return +* the matrices X and Y which are needed to update the unreduced +* part of the matrix +* + CALL ZLABRD( M-I+1, N-I+1, NB, A( I, I ), LDA, D( I ), E( I ), + $ TAUQ( I ), TAUP( I ), WORK, LDWRKX, + $ WORK( LDWRKX*NB+1 ), LDWRKY ) +* +* Update the trailing submatrix A(i+ib:m,i+ib:n), using +* an update of the form A := A - V*Y' - X*U' +* + CALL ZGEMM( 'No transpose', 'Conjugate transpose', M-I-NB+1, + $ N-I-NB+1, NB, -ONE, A( I+NB, I ), LDA, + $ WORK( LDWRKX*NB+NB+1 ), LDWRKY, ONE, + $ A( I+NB, I+NB ), LDA ) + CALL ZGEMM( 'No transpose', 'No transpose', M-I-NB+1, N-I-NB+1, + $ NB, -ONE, WORK( NB+1 ), LDWRKX, A( I, I+NB ), LDA, + $ ONE, A( I+NB, I+NB ), LDA ) +* +* Copy diagonal and off-diagonal elements of B back into A +* + IF( M.GE.N ) THEN + DO 10 J = I, I + NB - 1 + A( J, J ) = D( J ) + A( J, J+1 ) = E( J ) + 10 CONTINUE + ELSE + DO 20 J = I, I + NB - 1 + A( J, J ) = D( J ) + A( J+1, J ) = E( J ) + 20 CONTINUE + END IF + 30 CONTINUE +* +* Use unblocked code to reduce the remainder of the matrix +* + CALL ZGEBD2( M-I+1, N-I+1, A( I, I ), LDA, D( I ), E( I ), + $ TAUQ( I ), TAUP( I ), WORK, IINFO ) + WORK( 1 ) = WS + RETURN +* +* End of ZGEBRD +* + END diff --git a/costa/native/external/lapack/zgecon.f b/costa/native/external/lapack/zgecon.f new file mode 100644 index 000000000..0a09b0b11 --- /dev/null +++ b/costa/native/external/lapack/zgecon.f @@ -0,0 +1,189 @@ + SUBROUTINE ZGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, RWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER INFO, LDA, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZGECON estimates the reciprocal of the condition number of a general +* complex matrix A, in either the 1-norm or the infinity-norm, using +* the LU factorization computed by ZGETRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as +* RCOND = 1 / ( norm(A) * norm(inv(A)) ). +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies whether the 1-norm condition number or the +* infinity-norm condition number is required: +* = '1' or 'O': 1-norm; +* = 'I': Infinity-norm. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input) COMPLEX*16 array, dimension (LDA,N) +* The factors L and U from the factorization A = P*L*U +* as computed by ZGETRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* ANORM (input) DOUBLE PRECISION +* If NORM = '1' or 'O', the 1-norm of the original matrix A. +* If NORM = 'I', the infinity-norm of the original matrix A. +* +* RCOND (output) DOUBLE PRECISION +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(norm(A) * norm(inv(A))). +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (2*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ONENRM + CHARACTER NORMIN + INTEGER IX, KASE, KASE1 + DOUBLE PRECISION AINVNM, SCALE, SL, SMLNUM, SU + COMPLEX*16 ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, IZAMAX, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDRSCL, ZLACON, ZLATRS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGECON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = DLAMCH( 'Safe minimum' ) +* +* Estimate the norm of inv(A). +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 10 CONTINUE + CALL ZLACON( N, WORK( N+1 ), WORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(L). +* + CALL ZLATRS( 'Lower', 'No transpose', 'Unit', NORMIN, N, A, + $ LDA, WORK, SL, RWORK, INFO ) +* +* Multiply by inv(U). +* + CALL ZLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ A, LDA, WORK, SU, RWORK( N+1 ), INFO ) + ELSE +* +* Multiply by inv(U'). +* + CALL ZLATRS( 'Upper', 'Conjugate transpose', 'Non-unit', + $ NORMIN, N, A, LDA, WORK, SU, RWORK( N+1 ), + $ INFO ) +* +* Multiply by inv(L'). +* + CALL ZLATRS( 'Lower', 'Conjugate transpose', 'Unit', NORMIN, + $ N, A, LDA, WORK, SL, RWORK, INFO ) + END IF +* +* Divide X by 1/(SL*SU) if doing so will not cause overflow. +* + SCALE = SL*SU + NORMIN = 'Y' + IF( SCALE.NE.ONE ) THEN + IX = IZAMAX( N, WORK, 1 ) + IF( SCALE.LT.CABS1( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL ZDRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE + RETURN +* +* End of ZGECON +* + END diff --git a/costa/native/external/lapack/zgeequ.f b/costa/native/external/lapack/zgeequ.f new file mode 100644 index 000000000..809e5f095 --- /dev/null +++ b/costa/native/external/lapack/zgeequ.f @@ -0,0 +1,234 @@ + SUBROUTINE ZGEEQU( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N + DOUBLE PRECISION AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( * ), R( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* ZGEEQU computes row and column scalings intended to equilibrate an +* M-by-N matrix A and reduce its condition number. R returns the row +* scale factors and C the column scale factors, chosen to try to make +* the largest element in each row and column of the matrix B with +* elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. +* +* R(i) and C(j) are restricted to be between SMLNUM = smallest safe +* number and BIGNUM = largest safe number. Use of these scaling +* factors is not guaranteed to reduce the condition number of A but +* works well in practice. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input) COMPLEX*16 array, dimension (LDA,N) +* The M-by-N matrix whose equilibration factors are +* to be computed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* R (output) DOUBLE PRECISION array, dimension (M) +* If INFO = 0 or INFO > M, R contains the row scale factors +* for A. +* +* C (output) DOUBLE PRECISION array, dimension (N) +* If INFO = 0, C contains the column scale factors for A. +* +* ROWCND (output) DOUBLE PRECISION +* If INFO = 0 or INFO > M, ROWCND contains the ratio of the +* smallest R(i) to the largest R(i). If ROWCND >= 0.1 and +* AMAX is neither too large nor too small, it is not worth +* scaling by R. +* +* COLCND (output) DOUBLE PRECISION +* If INFO = 0, COLCND contains the ratio of the smallest +* C(i) to the largest C(i). If COLCND >= 0.1, it is not +* worth scaling by C. +* +* AMAX (output) DOUBLE PRECISION +* Absolute value of largest matrix element. If AMAX is very +* close to overflow or very close to underflow, the matrix +* should be scaled. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is +* <= M: the i-th row of A is exactly zero +* > M: the (i-M)-th column of A is exactly zero +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION BIGNUM, RCMAX, RCMIN, SMLNUM + COMPLEX*16 ZDUM +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX, MIN +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEEQU', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + ROWCND = ONE + COLCND = ONE + AMAX = ZERO + RETURN + END IF +* +* Get machine constants. +* + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM +* +* Compute row scale factors. +* + DO 10 I = 1, M + R( I ) = ZERO + 10 CONTINUE +* +* Find the maximum element in each row. +* + DO 30 J = 1, N + DO 20 I = 1, M + R( I ) = MAX( R( I ), CABS1( A( I, J ) ) ) + 20 CONTINUE + 30 CONTINUE +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 40 I = 1, M + RCMAX = MAX( RCMAX, R( I ) ) + RCMIN = MIN( RCMIN, R( I ) ) + 40 CONTINUE + AMAX = RCMAX +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 50 I = 1, M + IF( R( I ).EQ.ZERO ) THEN + INFO = I + RETURN + END IF + 50 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 60 I = 1, M + R( I ) = ONE / MIN( MAX( R( I ), SMLNUM ), BIGNUM ) + 60 CONTINUE +* +* Compute ROWCND = min(R(I)) / max(R(I)) +* + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* +* Compute column scale factors +* + DO 70 J = 1, N + C( J ) = ZERO + 70 CONTINUE +* +* Find the maximum element in each column, +* assuming the row scaling computed above. +* + DO 90 J = 1, N + DO 80 I = 1, M + C( J ) = MAX( C( J ), CABS1( A( I, J ) )*R( I ) ) + 80 CONTINUE + 90 CONTINUE +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 100 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 100 CONTINUE +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 110 J = 1, N + IF( C( J ).EQ.ZERO ) THEN + INFO = M + J + RETURN + END IF + 110 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 120 J = 1, N + C( J ) = ONE / MIN( MAX( C( J ), SMLNUM ), BIGNUM ) + 120 CONTINUE +* +* Compute COLCND = min(C(J)) / max(C(J)) +* + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* + RETURN +* +* End of ZGEEQU +* + END diff --git a/costa/native/external/lapack/zgees.f b/costa/native/external/lapack/zgees.f new file mode 100644 index 000000000..0f5d0b64b --- /dev/null +++ b/costa/native/external/lapack/zgees.f @@ -0,0 +1,322 @@ + SUBROUTINE ZGEES( JOBVS, SORT, SELECT, N, A, LDA, SDIM, W, VS, + $ LDVS, WORK, LWORK, RWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBVS, SORT + INTEGER INFO, LDA, LDVS, LWORK, N, SDIM +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 A( LDA, * ), VS( LDVS, * ), W( * ), WORK( * ) +* .. +* .. Function Arguments .. + LOGICAL SELECT + EXTERNAL SELECT +* .. +* +* Purpose +* ======= +* +* ZGEES computes for an N-by-N complex nonsymmetric matrix A, the +* eigenvalues, the Schur form T, and, optionally, the matrix of Schur +* vectors Z. This gives the Schur factorization A = Z*T*(Z**H). +* +* Optionally, it also orders the eigenvalues on the diagonal of the +* Schur form so that selected eigenvalues are at the top left. +* The leading columns of Z then form an orthonormal basis for the +* invariant subspace corresponding to the selected eigenvalues. +* +* A complex matrix is in Schur form if it is upper triangular. +* +* Arguments +* ========= +* +* JOBVS (input) CHARACTER*1 +* = 'N': Schur vectors are not computed; +* = 'V': Schur vectors are computed. +* +* SORT (input) CHARACTER*1 +* Specifies whether or not to order the eigenvalues on the +* diagonal of the Schur form. +* = 'N': Eigenvalues are not ordered: +* = 'S': Eigenvalues are ordered (see SELECT). +* +* SELECT (input) LOGICAL FUNCTION of one COMPLEX*16 argument +* SELECT must be declared EXTERNAL in the calling subroutine. +* If SORT = 'S', SELECT is used to select eigenvalues to order +* to the top left of the Schur form. +* IF SORT = 'N', SELECT is not referenced. +* The eigenvalue W(j) is selected if SELECT(W(j)) is true. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the N-by-N matrix A. +* On exit, A has been overwritten by its Schur form T. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* SDIM (output) INTEGER +* If SORT = 'N', SDIM = 0. +* If SORT = 'S', SDIM = number of eigenvalues for which +* SELECT is true. +* +* W (output) COMPLEX*16 array, dimension (N) +* W contains the computed eigenvalues, in the same order that +* they appear on the diagonal of the output Schur form T. +* +* VS (output) COMPLEX*16 array, dimension (LDVS,N) +* If JOBVS = 'V', VS contains the unitary matrix Z of Schur +* vectors. +* If JOBVS = 'N', VS is not referenced. +* +* LDVS (input) INTEGER +* The leading dimension of the array VS. LDVS >= 1; if +* JOBVS = 'V', LDVS >= N. +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,2*N). +* For good performance, LWORK must generally be larger. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* BWORK (workspace) LOGICAL array, dimension (N) +* Not referenced if SORT = 'N'. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, and i is +* <= N: the QR algorithm failed to compute all the +* eigenvalues; elements 1:ILO-1 and i+1:N of W +* contain those eigenvalues which have converged; +* if JOBVS = 'V', VS contains the matrix which +* reduces A to its partially converged Schur form. +* = N+1: the eigenvalues could not be reordered because +* some eigenvalues were too close to separate (the +* problem is very ill-conditioned); +* = N+2: after reordering, roundoff changed values of +* some complex eigenvalues so that leading +* eigenvalues in the Schur form no longer satisfy +* SELECT = .TRUE.. This could also be caused by +* underflow due to scaling. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, SCALEA, WANTST, WANTVS + INTEGER HSWORK, I, IBAL, ICOND, IERR, IEVAL, IHI, ILO, + $ ITAU, IWRK, K, MAXB, MAXWRK, MINWRK + DOUBLE PRECISION ANRM, BIGNUM, CSCALE, EPS, S, SEP, SMLNUM +* .. +* .. Local Arrays .. + DOUBLE PRECISION DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZCOPY, ZGEBAK, ZGEBAL, ZGEHRD, ZHSEQR, + $ ZLACPY, ZLASCL, ZTRSEN, ZUNGHR +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, ZLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + WANTVS = LSAME( JOBVS, 'V' ) + WANTST = LSAME( SORT, 'S' ) + IF( ( .NOT.WANTVS ) .AND. ( .NOT.LSAME( JOBVS, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDVS.LT.1 .OR. ( WANTVS .AND. LDVS.LT.N ) ) THEN + INFO = -10 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* CWorkspace refers to complex workspace, and RWorkspace to real +* workspace. NB refers to the optimal block size for the +* immediately following subroutine, as returned by ILAENV. +* HSWORK refers to the workspace preferred by ZHSEQR, as +* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, +* the worst case.) +* + MINWRK = 1 + IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN + MAXWRK = N + N*ILAENV( 1, 'ZGEHRD', ' ', N, 1, N, 0 ) + MINWRK = MAX( 1, 2*N ) + IF( .NOT.WANTVS ) THEN + MAXB = MAX( ILAENV( 8, 'ZHSEQR', 'SN', N, 1, N, -1 ), 2 ) + K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'ZHSEQR', 'SN', N, 1, + $ N, -1 ) ) ) + HSWORK = MAX( K*( K+2 ), 2*N ) + MAXWRK = MAX( MAXWRK, HSWORK, 1 ) + ELSE + MAXWRK = MAX( MAXWRK, N+( N-1 )* + $ ILAENV( 1, 'ZUNGHR', ' ', N, 1, N, -1 ) ) + MAXB = MAX( ILAENV( 8, 'ZHSEQR', 'EN', N, 1, N, -1 ), 2 ) + K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'ZHSEQR', 'EN', N, 1, + $ N, -1 ) ) ) + HSWORK = MAX( K*( K+2 ), 2*N ) + MAXWRK = MAX( MAXWRK, HSWORK, 1 ) + END IF + WORK( 1 ) = MAXWRK + END IF + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEES ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SDIM = 0 + RETURN + END IF +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = ZLANGE( 'M', N, N, A, LDA, DUM ) + SCALEA = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + SCALEA = .TRUE. + CSCALE = SMLNUM + ELSE IF( ANRM.GT.BIGNUM ) THEN + SCALEA = .TRUE. + CSCALE = BIGNUM + END IF + IF( SCALEA ) + $ CALL ZLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR ) +* +* Permute the matrix to make it more nearly triangular +* (CWorkspace: none) +* (RWorkspace: need N) +* + IBAL = 1 + CALL ZGEBAL( 'P', N, A, LDA, ILO, IHI, RWORK( IBAL ), IERR ) +* +* Reduce to upper Hessenberg form +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: none) +* + ITAU = 1 + IWRK = N + ITAU + CALL ZGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* + IF( WANTVS ) THEN +* +* Copy Householder vectors to VS +* + CALL ZLACPY( 'L', N, N, A, LDA, VS, LDVS ) +* +* Generate unitary matrix in VS +* (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) +* (RWorkspace: none) +* + CALL ZUNGHR( N, ILO, IHI, VS, LDVS, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) + END IF +* + SDIM = 0 +* +* Perform QR iteration, accumulating Schur vectors in VS if desired +* (CWorkspace: need 1, prefer HSWORK (see comments) ) +* (RWorkspace: none) +* + IWRK = ITAU + CALL ZHSEQR( 'S', JOBVS, N, ILO, IHI, A, LDA, W, VS, LDVS, + $ WORK( IWRK ), LWORK-IWRK+1, IEVAL ) + IF( IEVAL.GT.0 ) + $ INFO = IEVAL +* +* Sort eigenvalues if desired +* + IF( WANTST .AND. INFO.EQ.0 ) THEN + IF( SCALEA ) + $ CALL ZLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, W, N, IERR ) + DO 10 I = 1, N + BWORK( I ) = SELECT( W( I ) ) + 10 CONTINUE +* +* Reorder eigenvalues and transform Schur vectors +* (CWorkspace: none) +* (RWorkspace: none) +* + CALL ZTRSEN( 'N', JOBVS, BWORK, N, A, LDA, VS, LDVS, W, SDIM, + $ S, SEP, WORK( IWRK ), LWORK-IWRK+1, ICOND ) + END IF +* + IF( WANTVS ) THEN +* +* Undo balancing +* (CWorkspace: none) +* (RWorkspace: need N) +* + CALL ZGEBAK( 'P', 'R', N, ILO, IHI, RWORK( IBAL ), N, VS, LDVS, + $ IERR ) + END IF +* + IF( SCALEA ) THEN +* +* Undo scaling for the Schur form of A +* + CALL ZLASCL( 'U', 0, 0, CSCALE, ANRM, N, N, A, LDA, IERR ) + CALL ZCOPY( N, A, LDA+1, W, 1 ) + END IF +* + WORK( 1 ) = MAXWRK + RETURN +* +* End of ZGEES +* + END diff --git a/costa/native/external/lapack/zgeesx.f b/costa/native/external/lapack/zgeesx.f new file mode 100644 index 000000000..1b9482ccf --- /dev/null +++ b/costa/native/external/lapack/zgeesx.f @@ -0,0 +1,371 @@ + SUBROUTINE ZGEESX( JOBVS, SORT, SELECT, SENSE, N, A, LDA, SDIM, W, + $ VS, LDVS, RCONDE, RCONDV, WORK, LWORK, RWORK, + $ BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBVS, SENSE, SORT + INTEGER INFO, LDA, LDVS, LWORK, N, SDIM + DOUBLE PRECISION RCONDE, RCONDV +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 A( LDA, * ), VS( LDVS, * ), W( * ), WORK( * ) +* .. +* .. Function Arguments .. + LOGICAL SELECT + EXTERNAL SELECT +* .. +* +* Purpose +* ======= +* +* ZGEESX computes for an N-by-N complex nonsymmetric matrix A, the +* eigenvalues, the Schur form T, and, optionally, the matrix of Schur +* vectors Z. This gives the Schur factorization A = Z*T*(Z**H). +* +* Optionally, it also orders the eigenvalues on the diagonal of the +* Schur form so that selected eigenvalues are at the top left; +* computes a reciprocal condition number for the average of the +* selected eigenvalues (RCONDE); and computes a reciprocal condition +* number for the right invariant subspace corresponding to the +* selected eigenvalues (RCONDV). The leading columns of Z form an +* orthonormal basis for this invariant subspace. +* +* For further explanation of the reciprocal condition numbers RCONDE +* and RCONDV, see Section 4.10 of the LAPACK Users' Guide (where +* these quantities are called s and sep respectively). +* +* A complex matrix is in Schur form if it is upper triangular. +* +* Arguments +* ========= +* +* JOBVS (input) CHARACTER*1 +* = 'N': Schur vectors are not computed; +* = 'V': Schur vectors are computed. +* +* SORT (input) CHARACTER*1 +* Specifies whether or not to order the eigenvalues on the +* diagonal of the Schur form. +* = 'N': Eigenvalues are not ordered; +* = 'S': Eigenvalues are ordered (see SELECT). +* +* SELECT (input) LOGICAL FUNCTION of one COMPLEX*16 argument +* SELECT must be declared EXTERNAL in the calling subroutine. +* If SORT = 'S', SELECT is used to select eigenvalues to order +* to the top left of the Schur form. +* If SORT = 'N', SELECT is not referenced. +* An eigenvalue W(j) is selected if SELECT(W(j)) is true. +* +* SENSE (input) CHARACTER*1 +* Determines which reciprocal condition numbers are computed. +* = 'N': None are computed; +* = 'E': Computed for average of selected eigenvalues only; +* = 'V': Computed for selected right invariant subspace only; +* = 'B': Computed for both. +* If SENSE = 'E', 'V' or 'B', SORT must equal 'S'. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA, N) +* On entry, the N-by-N matrix A. +* On exit, A is overwritten by its Schur form T. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* SDIM (output) INTEGER +* If SORT = 'N', SDIM = 0. +* If SORT = 'S', SDIM = number of eigenvalues for which +* SELECT is true. +* +* W (output) COMPLEX*16 array, dimension (N) +* W contains the computed eigenvalues, in the same order +* that they appear on the diagonal of the output Schur form T. +* +* VS (output) COMPLEX*16 array, dimension (LDVS,N) +* If JOBVS = 'V', VS contains the unitary matrix Z of Schur +* vectors. +* If JOBVS = 'N', VS is not referenced. +* +* LDVS (input) INTEGER +* The leading dimension of the array VS. LDVS >= 1, and if +* JOBVS = 'V', LDVS >= N. +* +* RCONDE (output) DOUBLE PRECISION +* If SENSE = 'E' or 'B', RCONDE contains the reciprocal +* condition number for the average of the selected eigenvalues. +* Not referenced if SENSE = 'N' or 'V'. +* +* RCONDV (output) DOUBLE PRECISION +* If SENSE = 'V' or 'B', RCONDV contains the reciprocal +* condition number for the selected right invariant subspace. +* Not referenced if SENSE = 'N' or 'E'. +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,2*N). +* Also, if SENSE = 'E' or 'V' or 'B', LWORK >= 2*SDIM*(N-SDIM), +* where SDIM is the number of selected eigenvalues computed by +* this routine. Note that 2*SDIM*(N-SDIM) <= N*N/2. +* For good performance, LWORK must generally be larger. +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* BWORK (workspace) LOGICAL array, dimension (N) +* Not referenced if SORT = 'N'. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, and i is +* <= N: the QR algorithm failed to compute all the +* eigenvalues; elements 1:ILO-1 and i+1:N of W +* contain those eigenvalues which have converged; if +* JOBVS = 'V', VS contains the transformation which +* reduces A to its partially converged Schur form. +* = N+1: the eigenvalues could not be reordered because some +* eigenvalues were too close to separate (the problem +* is very ill-conditioned); +* = N+2: after reordering, roundoff changed values of some +* complex eigenvalues so that leading eigenvalues in +* the Schur form no longer satisfy SELECT=.TRUE. This +* could also be caused by underflow due to scaling. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL SCALEA, WANTSB, WANTSE, WANTSN, WANTST, WANTSV, + $ WANTVS + INTEGER HSWORK, I, IBAL, ICOND, IERR, IEVAL, IHI, ILO, + $ ITAU, IWRK, K, MAXB, MAXWRK, MINWRK + DOUBLE PRECISION ANRM, BIGNUM, CSCALE, EPS, SMLNUM +* .. +* .. Local Arrays .. + DOUBLE PRECISION DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DLASCL, XERBLA, ZCOPY, ZGEBAK, ZGEBAL, ZGEHRD, + $ ZHSEQR, ZLACPY, ZLASCL, ZTRSEN, ZUNGHR +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, ZLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + WANTVS = LSAME( JOBVS, 'V' ) + WANTST = LSAME( SORT, 'S' ) + WANTSN = LSAME( SENSE, 'N' ) + WANTSE = LSAME( SENSE, 'E' ) + WANTSV = LSAME( SENSE, 'V' ) + WANTSB = LSAME( SENSE, 'B' ) + IF( ( .NOT.WANTVS ) .AND. ( .NOT.LSAME( JOBVS, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( WANTSN .OR. WANTSE .OR. WANTSV .OR. WANTSB ) .OR. + $ ( .NOT.WANTST .AND. .NOT.WANTSN ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVS.LT.1 .OR. ( WANTVS .AND. LDVS.LT.N ) ) THEN + INFO = -11 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of real workspace needed at that point in the +* code, as well as the preferred amount for good performance. +* CWorkspace refers to complex workspace, and RWorkspace to real +* workspace. NB refers to the optimal block size for the +* immediately following subroutine, as returned by ILAENV. +* HSWORK refers to the workspace preferred by ZHSEQR, as +* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, +* the worst case. +* If SENSE = 'E', 'V' or 'B', then the amount of workspace needed +* depends on SDIM, which is computed by the routine ZTRSEN later +* in the code.) +* + MINWRK = 1 + IF( INFO.EQ.0 .AND. ( LWORK.GE.1 ) ) THEN + MAXWRK = N + N*ILAENV( 1, 'ZGEHRD', ' ', N, 1, N, 0 ) + MINWRK = MAX( 1, 2*N ) + IF( .NOT.WANTVS ) THEN + MAXB = MAX( ILAENV( 8, 'ZHSEQR', 'SN', N, 1, N, -1 ), 2 ) + K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'ZHSEQR', 'SN', N, 1, + $ N, -1 ) ) ) + HSWORK = MAX( K*( K+2 ), 2*N ) + MAXWRK = MAX( MAXWRK, HSWORK, 1 ) + ELSE + MAXWRK = MAX( MAXWRK, N+( N-1 )* + $ ILAENV( 1, 'ZUNGHR', ' ', N, 1, N, -1 ) ) + MAXB = MAX( ILAENV( 8, 'ZHSEQR', 'SV', N, 1, N, -1 ), 2 ) + K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'ZHSEQR', 'SV', N, 1, + $ N, -1 ) ) ) + HSWORK = MAX( K*( K+2 ), 2*N ) + MAXWRK = MAX( MAXWRK, HSWORK, 1 ) + END IF + WORK( 1 ) = MAXWRK + END IF + IF( LWORK.LT.MINWRK ) THEN + INFO = -15 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEESX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SDIM = 0 + RETURN + END IF +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = ZLANGE( 'M', N, N, A, LDA, DUM ) + SCALEA = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + SCALEA = .TRUE. + CSCALE = SMLNUM + ELSE IF( ANRM.GT.BIGNUM ) THEN + SCALEA = .TRUE. + CSCALE = BIGNUM + END IF + IF( SCALEA ) + $ CALL ZLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR ) +* +* +* Permute the matrix to make it more nearly triangular +* (CWorkspace: none) +* (RWorkspace: need N) +* + IBAL = 1 + CALL ZGEBAL( 'P', N, A, LDA, ILO, IHI, RWORK( IBAL ), IERR ) +* +* Reduce to upper Hessenberg form +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: none) +* + ITAU = 1 + IWRK = N + ITAU + CALL ZGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* + IF( WANTVS ) THEN +* +* Copy Householder vectors to VS +* + CALL ZLACPY( 'L', N, N, A, LDA, VS, LDVS ) +* +* Generate unitary matrix in VS +* (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) +* (RWorkspace: none) +* + CALL ZUNGHR( N, ILO, IHI, VS, LDVS, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) + END IF +* + SDIM = 0 +* +* Perform QR iteration, accumulating Schur vectors in VS if desired +* (CWorkspace: need 1, prefer HSWORK (see comments) ) +* (RWorkspace: none) +* + IWRK = ITAU + CALL ZHSEQR( 'S', JOBVS, N, ILO, IHI, A, LDA, W, VS, LDVS, + $ WORK( IWRK ), LWORK-IWRK+1, IEVAL ) + IF( IEVAL.GT.0 ) + $ INFO = IEVAL +* +* Sort eigenvalues if desired +* + IF( WANTST .AND. INFO.EQ.0 ) THEN + IF( SCALEA ) + $ CALL ZLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, W, N, IERR ) + DO 10 I = 1, N + BWORK( I ) = SELECT( W( I ) ) + 10 CONTINUE +* +* Reorder eigenvalues, transform Schur vectors, and compute +* reciprocal condition numbers +* (CWorkspace: if SENSE is not 'N', need 2*SDIM*(N-SDIM) +* otherwise, need none ) +* (RWorkspace: none) +* + CALL ZTRSEN( SENSE, JOBVS, BWORK, N, A, LDA, VS, LDVS, W, SDIM, + $ RCONDE, RCONDV, WORK( IWRK ), LWORK-IWRK+1, + $ ICOND ) + IF( .NOT.WANTSN ) + $ MAXWRK = MAX( MAXWRK, 2*SDIM*( N-SDIM ) ) + IF( ICOND.EQ.-14 ) THEN +* +* Not enough complex workspace +* + INFO = -15 + END IF + END IF +* + IF( WANTVS ) THEN +* +* Undo balancing +* (CWorkspace: none) +* (RWorkspace: need N) +* + CALL ZGEBAK( 'P', 'R', N, ILO, IHI, RWORK( IBAL ), N, VS, LDVS, + $ IERR ) + END IF +* + IF( SCALEA ) THEN +* +* Undo scaling for the Schur form of A +* + CALL ZLASCL( 'U', 0, 0, CSCALE, ANRM, N, N, A, LDA, IERR ) + CALL ZCOPY( N, A, LDA+1, W, 1 ) + IF( ( WANTSV .OR. WANTSB ) .AND. INFO.EQ.0 ) THEN + DUM( 1 ) = RCONDV + CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, 1, 1, DUM, 1, IERR ) + RCONDV = DUM( 1 ) + END IF + END IF +* + WORK( 1 ) = MAXWRK + RETURN +* +* End of ZGEESX +* + END diff --git a/costa/native/external/lapack/zgeev.f b/costa/native/external/lapack/zgeev.f new file mode 100644 index 000000000..42aa63368 --- /dev/null +++ b/costa/native/external/lapack/zgeev.f @@ -0,0 +1,391 @@ + SUBROUTINE ZGEEV( JOBVL, JOBVR, N, A, LDA, W, VL, LDVL, VR, LDVR, + $ WORK, LWORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBVL, JOBVR + INTEGER INFO, LDA, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 A( LDA, * ), VL( LDVL, * ), VR( LDVR, * ), + $ W( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZGEEV computes for an N-by-N complex nonsymmetric matrix A, the +* eigenvalues and, optionally, the left and/or right eigenvectors. +* +* The right eigenvector v(j) of A satisfies +* A * v(j) = lambda(j) * v(j) +* where lambda(j) is its eigenvalue. +* The left eigenvector u(j) of A satisfies +* u(j)**H * A = lambda(j) * u(j)**H +* where u(j)**H denotes the conjugate transpose of u(j). +* +* The computed eigenvectors are normalized to have Euclidean norm +* equal to 1 and largest component real. +* +* Arguments +* ========= +* +* JOBVL (input) CHARACTER*1 +* = 'N': left eigenvectors of A are not computed; +* = 'V': left eigenvectors of are computed. +* +* JOBVR (input) CHARACTER*1 +* = 'N': right eigenvectors of A are not computed; +* = 'V': right eigenvectors of A are computed. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the N-by-N matrix A. +* On exit, A has been overwritten. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* W (output) COMPLEX*16 array, dimension (N) +* W contains the computed eigenvalues. +* +* VL (output) COMPLEX*16 array, dimension (LDVL,N) +* If JOBVL = 'V', the left eigenvectors u(j) are stored one +* after another in the columns of VL, in the same order +* as their eigenvalues. +* If JOBVL = 'N', VL is not referenced. +* u(j) = VL(:,j), the j-th column of VL. +* +* LDVL (input) INTEGER +* The leading dimension of the array VL. LDVL >= 1; if +* JOBVL = 'V', LDVL >= N. +* +* VR (output) COMPLEX*16 array, dimension (LDVR,N) +* If JOBVR = 'V', the right eigenvectors v(j) are stored one +* after another in the columns of VR, in the same order +* as their eigenvalues. +* If JOBVR = 'N', VR is not referenced. +* v(j) = VR(:,j), the j-th column of VR. +* +* LDVR (input) INTEGER +* The leading dimension of the array VR. LDVR >= 1; if +* JOBVR = 'V', LDVR >= N. +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,2*N). +* For good performance, LWORK must generally be larger. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (2*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, the QR algorithm failed to compute all the +* eigenvalues, and no eigenvectors have been computed; +* elements and i+1:N of W contain eigenvalues which have +* converged. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, SCALEA, WANTVL, WANTVR + CHARACTER SIDE + INTEGER HSWORK, I, IBAL, IERR, IHI, ILO, IRWORK, ITAU, + $ IWRK, K, MAXB, MAXWRK, MINWRK, NOUT + DOUBLE PRECISION ANRM, BIGNUM, CSCALE, EPS, SCL, SMLNUM + COMPLEX*16 TMP +* .. +* .. Local Arrays .. + LOGICAL SELECT( 1 ) + DOUBLE PRECISION DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDSCAL, ZGEBAK, ZGEBAL, ZGEHRD, ZHSEQR, + $ ZLACPY, ZLASCL, ZSCAL, ZTREVC, ZUNGHR +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX, ILAENV + DOUBLE PRECISION DLAMCH, DZNRM2, ZLANGE + EXTERNAL LSAME, IDAMAX, ILAENV, DLAMCH, DZNRM2, ZLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCMPLX, DCONJG, DIMAG, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + WANTVL = LSAME( JOBVL, 'V' ) + WANTVR = LSAME( JOBVR, 'V' ) + IF( ( .NOT.WANTVL ) .AND. ( .NOT.LSAME( JOBVL, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( ( .NOT.WANTVR ) .AND. ( .NOT.LSAME( JOBVR, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDVL.LT.1 .OR. ( WANTVL .AND. LDVL.LT.N ) ) THEN + INFO = -8 + ELSE IF( LDVR.LT.1 .OR. ( WANTVR .AND. LDVR.LT.N ) ) THEN + INFO = -10 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* CWorkspace refers to complex workspace, and RWorkspace to real +* workspace. NB refers to the optimal block size for the +* immediately following subroutine, as returned by ILAENV. +* HSWORK refers to the workspace preferred by ZHSEQR, as +* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, +* the worst case.) +* + MINWRK = 1 + IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN + MAXWRK = N + N*ILAENV( 1, 'ZGEHRD', ' ', N, 1, N, 0 ) + IF( ( .NOT.WANTVL ) .AND. ( .NOT.WANTVR ) ) THEN + MINWRK = MAX( 1, 2*N ) + MAXB = MAX( ILAENV( 8, 'ZHSEQR', 'EN', N, 1, N, -1 ), 2 ) + K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'ZHSEQR', 'EN', N, 1, + $ N, -1 ) ) ) + HSWORK = MAX( K*( K+2 ), 2*N ) + MAXWRK = MAX( MAXWRK, HSWORK ) + ELSE + MINWRK = MAX( 1, 2*N ) + MAXWRK = MAX( MAXWRK, N+( N-1 )* + $ ILAENV( 1, 'ZUNGHR', ' ', N, 1, N, -1 ) ) + MAXB = MAX( ILAENV( 8, 'ZHSEQR', 'SV', N, 1, N, -1 ), 2 ) + K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'ZHSEQR', 'SV', N, 1, + $ N, -1 ) ) ) + HSWORK = MAX( K*( K+2 ), 2*N ) + MAXWRK = MAX( MAXWRK, HSWORK, 2*N ) + END IF + WORK( 1 ) = MAXWRK + END IF + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEEV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = ZLANGE( 'M', N, N, A, LDA, DUM ) + SCALEA = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + SCALEA = .TRUE. + CSCALE = SMLNUM + ELSE IF( ANRM.GT.BIGNUM ) THEN + SCALEA = .TRUE. + CSCALE = BIGNUM + END IF + IF( SCALEA ) + $ CALL ZLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR ) +* +* Balance the matrix +* (CWorkspace: none) +* (RWorkspace: need N) +* + IBAL = 1 + CALL ZGEBAL( 'B', N, A, LDA, ILO, IHI, RWORK( IBAL ), IERR ) +* +* Reduce to upper Hessenberg form +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: none) +* + ITAU = 1 + IWRK = ITAU + N + CALL ZGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* + IF( WANTVL ) THEN +* +* Want left eigenvectors +* Copy Householder vectors to VL +* + SIDE = 'L' + CALL ZLACPY( 'L', N, N, A, LDA, VL, LDVL ) +* +* Generate unitary matrix in VL +* (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) +* (RWorkspace: none) +* + CALL ZUNGHR( N, ILO, IHI, VL, LDVL, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* +* Perform QR iteration, accumulating Schur vectors in VL +* (CWorkspace: need 1, prefer HSWORK (see comments) ) +* (RWorkspace: none) +* + IWRK = ITAU + CALL ZHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, W, VL, LDVL, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) +* + IF( WANTVR ) THEN +* +* Want left and right eigenvectors +* Copy Schur vectors to VR +* + SIDE = 'B' + CALL ZLACPY( 'F', N, N, VL, LDVL, VR, LDVR ) + END IF +* + ELSE IF( WANTVR ) THEN +* +* Want right eigenvectors +* Copy Householder vectors to VR +* + SIDE = 'R' + CALL ZLACPY( 'L', N, N, A, LDA, VR, LDVR ) +* +* Generate unitary matrix in VR +* (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) +* (RWorkspace: none) +* + CALL ZUNGHR( N, ILO, IHI, VR, LDVR, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* +* Perform QR iteration, accumulating Schur vectors in VR +* (CWorkspace: need 1, prefer HSWORK (see comments) ) +* (RWorkspace: none) +* + IWRK = ITAU + CALL ZHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, W, VR, LDVR, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) +* + ELSE +* +* Compute eigenvalues only +* (CWorkspace: need 1, prefer HSWORK (see comments) ) +* (RWorkspace: none) +* + IWRK = ITAU + CALL ZHSEQR( 'E', 'N', N, ILO, IHI, A, LDA, W, VR, LDVR, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) + END IF +* +* If INFO > 0 from ZHSEQR, then quit +* + IF( INFO.GT.0 ) + $ GO TO 50 +* + IF( WANTVL .OR. WANTVR ) THEN +* +* Compute left and/or right eigenvectors +* (CWorkspace: need 2*N) +* (RWorkspace: need 2*N) +* + IRWORK = IBAL + N + CALL ZTREVC( SIDE, 'B', SELECT, N, A, LDA, VL, LDVL, VR, LDVR, + $ N, NOUT, WORK( IWRK ), RWORK( IRWORK ), IERR ) + END IF +* + IF( WANTVL ) THEN +* +* Undo balancing of left eigenvectors +* (CWorkspace: none) +* (RWorkspace: need N) +* + CALL ZGEBAK( 'B', 'L', N, ILO, IHI, RWORK( IBAL ), N, VL, LDVL, + $ IERR ) +* +* Normalize left eigenvectors and make largest component real +* + DO 20 I = 1, N + SCL = ONE / DZNRM2( N, VL( 1, I ), 1 ) + CALL ZDSCAL( N, SCL, VL( 1, I ), 1 ) + DO 10 K = 1, N + RWORK( IRWORK+K-1 ) = DBLE( VL( K, I ) )**2 + + $ DIMAG( VL( K, I ) )**2 + 10 CONTINUE + K = IDAMAX( N, RWORK( IRWORK ), 1 ) + TMP = DCONJG( VL( K, I ) ) / SQRT( RWORK( IRWORK+K-1 ) ) + CALL ZSCAL( N, TMP, VL( 1, I ), 1 ) + VL( K, I ) = DCMPLX( DBLE( VL( K, I ) ), ZERO ) + 20 CONTINUE + END IF +* + IF( WANTVR ) THEN +* +* Undo balancing of right eigenvectors +* (CWorkspace: none) +* (RWorkspace: need N) +* + CALL ZGEBAK( 'B', 'R', N, ILO, IHI, RWORK( IBAL ), N, VR, LDVR, + $ IERR ) +* +* Normalize right eigenvectors and make largest component real +* + DO 40 I = 1, N + SCL = ONE / DZNRM2( N, VR( 1, I ), 1 ) + CALL ZDSCAL( N, SCL, VR( 1, I ), 1 ) + DO 30 K = 1, N + RWORK( IRWORK+K-1 ) = DBLE( VR( K, I ) )**2 + + $ DIMAG( VR( K, I ) )**2 + 30 CONTINUE + K = IDAMAX( N, RWORK( IRWORK ), 1 ) + TMP = DCONJG( VR( K, I ) ) / SQRT( RWORK( IRWORK+K-1 ) ) + CALL ZSCAL( N, TMP, VR( 1, I ), 1 ) + VR( K, I ) = DCMPLX( DBLE( VR( K, I ) ), ZERO ) + 40 CONTINUE + END IF +* +* Undo scaling if necessary +* + 50 CONTINUE + IF( SCALEA ) THEN + CALL ZLASCL( 'G', 0, 0, CSCALE, ANRM, N-INFO, 1, W( INFO+1 ), + $ MAX( N-INFO, 1 ), IERR ) + IF( INFO.GT.0 ) THEN + CALL ZLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, W, N, IERR ) + END IF + END IF +* + WORK( 1 ) = MAXWRK + RETURN +* +* End of ZGEEV +* + END diff --git a/costa/native/external/lapack/zgeevx.f b/costa/native/external/lapack/zgeevx.f new file mode 100644 index 000000000..d8b0f45d2 --- /dev/null +++ b/costa/native/external/lapack/zgeevx.f @@ -0,0 +1,521 @@ + SUBROUTINE ZGEEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, W, VL, + $ LDVL, VR, LDVR, ILO, IHI, SCALE, ABNRM, RCONDE, + $ RCONDV, WORK, LWORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER BALANC, JOBVL, JOBVR, SENSE + INTEGER IHI, ILO, INFO, LDA, LDVL, LDVR, LWORK, N + DOUBLE PRECISION ABNRM +* .. +* .. Array Arguments .. + DOUBLE PRECISION RCONDE( * ), RCONDV( * ), RWORK( * ), + $ SCALE( * ) + COMPLEX*16 A( LDA, * ), VL( LDVL, * ), VR( LDVR, * ), + $ W( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZGEEVX computes for an N-by-N complex nonsymmetric matrix A, the +* eigenvalues and, optionally, the left and/or right eigenvectors. +* +* Optionally also, it computes a balancing transformation to improve +* the conditioning of the eigenvalues and eigenvectors (ILO, IHI, +* SCALE, and ABNRM), reciprocal condition numbers for the eigenvalues +* (RCONDE), and reciprocal condition numbers for the right +* eigenvectors (RCONDV). +* +* The right eigenvector v(j) of A satisfies +* A * v(j) = lambda(j) * v(j) +* where lambda(j) is its eigenvalue. +* The left eigenvector u(j) of A satisfies +* u(j)**H * A = lambda(j) * u(j)**H +* where u(j)**H denotes the conjugate transpose of u(j). +* +* The computed eigenvectors are normalized to have Euclidean norm +* equal to 1 and largest component real. +* +* Balancing a matrix means permuting the rows and columns to make it +* more nearly upper triangular, and applying a diagonal similarity +* transformation D * A * D**(-1), where D is a diagonal matrix, to +* make its rows and columns closer in norm and the condition numbers +* of its eigenvalues and eigenvectors smaller. The computed +* reciprocal condition numbers correspond to the balanced matrix. +* Permuting rows and columns will not change the condition numbers +* (in exact arithmetic) but diagonal scaling will. For further +* explanation of balancing, see section 4.10.2 of the LAPACK +* Users' Guide. +* +* Arguments +* ========= +* +* BALANC (input) CHARACTER*1 +* Indicates how the input matrix should be diagonally scaled +* and/or permuted to improve the conditioning of its +* eigenvalues. +* = 'N': Do not diagonally scale or permute; +* = 'P': Perform permutations to make the matrix more nearly +* upper triangular. Do not diagonally scale; +* = 'S': Diagonally scale the matrix, ie. replace A by +* D*A*D**(-1), where D is a diagonal matrix chosen +* to make the rows and columns of A more equal in +* norm. Do not permute; +* = 'B': Both diagonally scale and permute A. +* +* Computed reciprocal condition numbers will be for the matrix +* after balancing and/or permuting. Permuting does not change +* condition numbers (in exact arithmetic), but balancing does. +* +* JOBVL (input) CHARACTER*1 +* = 'N': left eigenvectors of A are not computed; +* = 'V': left eigenvectors of A are computed. +* If SENSE = 'E' or 'B', JOBVL must = 'V'. +* +* JOBVR (input) CHARACTER*1 +* = 'N': right eigenvectors of A are not computed; +* = 'V': right eigenvectors of A are computed. +* If SENSE = 'E' or 'B', JOBVR must = 'V'. +* +* SENSE (input) CHARACTER*1 +* Determines which reciprocal condition numbers are computed. +* = 'N': None are computed; +* = 'E': Computed for eigenvalues only; +* = 'V': Computed for right eigenvectors only; +* = 'B': Computed for eigenvalues and right eigenvectors. +* +* If SENSE = 'E' or 'B', both left and right eigenvectors +* must also be computed (JOBVL = 'V' and JOBVR = 'V'). +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the N-by-N matrix A. +* On exit, A has been overwritten. If JOBVL = 'V' or +* JOBVR = 'V', A contains the Schur form of the balanced +* version of the matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* W (output) COMPLEX*16 array, dimension (N) +* W contains the computed eigenvalues. +* +* VL (output) COMPLEX*16 array, dimension (LDVL,N) +* If JOBVL = 'V', the left eigenvectors u(j) are stored one +* after another in the columns of VL, in the same order +* as their eigenvalues. +* If JOBVL = 'N', VL is not referenced. +* u(j) = VL(:,j), the j-th column of VL. +* +* LDVL (input) INTEGER +* The leading dimension of the array VL. LDVL >= 1; if +* JOBVL = 'V', LDVL >= N. +* +* VR (output) COMPLEX*16 array, dimension (LDVR,N) +* If JOBVR = 'V', the right eigenvectors v(j) are stored one +* after another in the columns of VR, in the same order +* as their eigenvalues. +* If JOBVR = 'N', VR is not referenced. +* v(j) = VR(:,j), the j-th column of VR. +* +* LDVR (input) INTEGER +* The leading dimension of the array VR. LDVR >= 1; if +* JOBVR = 'V', LDVR >= N. +* +* ILO,IHI (output) INTEGER +* ILO and IHI are integer values determined when A was +* balanced. The balanced A(i,j) = 0 if I > J and +* J = 1,...,ILO-1 or I = IHI+1,...,N. +* +* SCALE (output) DOUBLE PRECISION array, dimension (N) +* Details of the permutations and scaling factors applied +* when balancing A. If P(j) is the index of the row and column +* interchanged with row and column j, and D(j) is the scaling +* factor applied to row and column j, then +* SCALE(J) = P(J), for J = 1,...,ILO-1 +* = D(J), for J = ILO,...,IHI +* = P(J) for J = IHI+1,...,N. +* The order in which the interchanges are made is N to IHI+1, +* then 1 to ILO-1. +* +* ABNRM (output) DOUBLE PRECISION +* The one-norm of the balanced matrix (the maximum +* of the sum of absolute values of elements of any column). +* +* RCONDE (output) DOUBLE PRECISION array, dimension (N) +* RCONDE(j) is the reciprocal condition number of the j-th +* eigenvalue. +* +* RCONDV (output) DOUBLE PRECISION array, dimension (N) +* RCONDV(j) is the reciprocal condition number of the j-th +* right eigenvector. +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. If SENSE = 'N' or 'E', +* LWORK >= max(1,2*N), and if SENSE = 'V' or 'B', +* LWORK >= N*N+2*N. +* For good performance, LWORK must generally be larger. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (2*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, the QR algorithm failed to compute all the +* eigenvalues, and no eigenvectors or condition numbers +* have been computed; elements 1:ILO-1 and i+1:N of W +* contain eigenvalues which have converged. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, SCALEA, WANTVL, WANTVR, WNTSNB, WNTSNE, + $ WNTSNN, WNTSNV + CHARACTER JOB, SIDE + INTEGER HSWORK, I, ICOND, IERR, ITAU, IWRK, K, MAXB, + $ MAXWRK, MINWRK, NOUT + DOUBLE PRECISION ANRM, BIGNUM, CSCALE, EPS, SCL, SMLNUM + COMPLEX*16 TMP +* .. +* .. Local Arrays .. + LOGICAL SELECT( 1 ) + DOUBLE PRECISION DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DLASCL, XERBLA, ZDSCAL, ZGEBAK, ZGEBAL, ZGEHRD, + $ ZHSEQR, ZLACPY, ZLASCL, ZSCAL, ZTREVC, ZTRSNA, + $ ZUNGHR +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX, ILAENV + DOUBLE PRECISION DLAMCH, DZNRM2, ZLANGE + EXTERNAL LSAME, IDAMAX, ILAENV, DLAMCH, DZNRM2, ZLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCMPLX, DCONJG, DIMAG, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + WANTVL = LSAME( JOBVL, 'V' ) + WANTVR = LSAME( JOBVR, 'V' ) + WNTSNN = LSAME( SENSE, 'N' ) + WNTSNE = LSAME( SENSE, 'E' ) + WNTSNV = LSAME( SENSE, 'V' ) + WNTSNB = LSAME( SENSE, 'B' ) + IF( .NOT.( LSAME( BALANC, 'N' ) .OR. LSAME( BALANC, + $ 'S' ) .OR. LSAME( BALANC, 'P' ) .OR. LSAME( BALANC, 'B' ) ) ) + $ THEN + INFO = -1 + ELSE IF( ( .NOT.WANTVL ) .AND. ( .NOT.LSAME( JOBVL, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( ( .NOT.WANTVR ) .AND. ( .NOT.LSAME( JOBVR, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( .NOT.( WNTSNN .OR. WNTSNE .OR. WNTSNB .OR. WNTSNV ) .OR. + $ ( ( WNTSNE .OR. WNTSNB ) .AND. .NOT.( WANTVL .AND. + $ WANTVR ) ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVL.LT.1 .OR. ( WANTVL .AND. LDVL.LT.N ) ) THEN + INFO = -10 + ELSE IF( LDVR.LT.1 .OR. ( WANTVR .AND. LDVR.LT.N ) ) THEN + INFO = -12 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* CWorkspace refers to complex workspace, and RWorkspace to real +* workspace. NB refers to the optimal block size for the +* immediately following subroutine, as returned by ILAENV. +* HSWORK refers to the workspace preferred by ZHSEQR, as +* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, +* the worst case.) +* + MINWRK = 1 + IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN + MAXWRK = N + N*ILAENV( 1, 'ZGEHRD', ' ', N, 1, N, 0 ) + IF( ( .NOT.WANTVL ) .AND. ( .NOT.WANTVR ) ) THEN + MINWRK = MAX( 1, 2*N ) + IF( .NOT.( WNTSNN .OR. WNTSNE ) ) + $ MINWRK = MAX( MINWRK, N*N+2*N ) + MAXB = MAX( ILAENV( 8, 'ZHSEQR', 'SN', N, 1, N, -1 ), 2 ) + IF( WNTSNN ) THEN + K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'ZHSEQR', 'EN', N, + $ 1, N, -1 ) ) ) + ELSE + K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'ZHSEQR', 'SN', N, + $ 1, N, -1 ) ) ) + END IF + HSWORK = MAX( K*( K+2 ), 2*N ) + MAXWRK = MAX( MAXWRK, 1, HSWORK ) + IF( .NOT.( WNTSNN .OR. WNTSNE ) ) + $ MAXWRK = MAX( MAXWRK, N*N+2*N ) + ELSE + MINWRK = MAX( 1, 2*N ) + IF( .NOT.( WNTSNN .OR. WNTSNE ) ) + $ MINWRK = MAX( MINWRK, N*N+2*N ) + MAXB = MAX( ILAENV( 8, 'ZHSEQR', 'SN', N, 1, N, -1 ), 2 ) + K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'ZHSEQR', 'EN', N, 1, + $ N, -1 ) ) ) + HSWORK = MAX( K*( K+2 ), 2*N ) + MAXWRK = MAX( MAXWRK, 1, HSWORK ) + MAXWRK = MAX( MAXWRK, N+( N-1 )* + $ ILAENV( 1, 'ZUNGHR', ' ', N, 1, N, -1 ) ) + IF( .NOT.( WNTSNN .OR. WNTSNE ) ) + $ MAXWRK = MAX( MAXWRK, N*N+2*N ) + MAXWRK = MAX( MAXWRK, 2*N, 1 ) + END IF + WORK( 1 ) = MAXWRK + END IF + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -20 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEEVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ICOND = 0 + ANRM = ZLANGE( 'M', N, N, A, LDA, DUM ) + SCALEA = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + SCALEA = .TRUE. + CSCALE = SMLNUM + ELSE IF( ANRM.GT.BIGNUM ) THEN + SCALEA = .TRUE. + CSCALE = BIGNUM + END IF + IF( SCALEA ) + $ CALL ZLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR ) +* +* Balance the matrix and compute ABNRM +* + CALL ZGEBAL( BALANC, N, A, LDA, ILO, IHI, SCALE, IERR ) + ABNRM = ZLANGE( '1', N, N, A, LDA, DUM ) + IF( SCALEA ) THEN + DUM( 1 ) = ABNRM + CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, 1, 1, DUM, 1, IERR ) + ABNRM = DUM( 1 ) + END IF +* +* Reduce to upper Hessenberg form +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: none) +* + ITAU = 1 + IWRK = ITAU + N + CALL ZGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* + IF( WANTVL ) THEN +* +* Want left eigenvectors +* Copy Householder vectors to VL +* + SIDE = 'L' + CALL ZLACPY( 'L', N, N, A, LDA, VL, LDVL ) +* +* Generate unitary matrix in VL +* (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) +* (RWorkspace: none) +* + CALL ZUNGHR( N, ILO, IHI, VL, LDVL, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* +* Perform QR iteration, accumulating Schur vectors in VL +* (CWorkspace: need 1, prefer HSWORK (see comments) ) +* (RWorkspace: none) +* + IWRK = ITAU + CALL ZHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, W, VL, LDVL, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) +* + IF( WANTVR ) THEN +* +* Want left and right eigenvectors +* Copy Schur vectors to VR +* + SIDE = 'B' + CALL ZLACPY( 'F', N, N, VL, LDVL, VR, LDVR ) + END IF +* + ELSE IF( WANTVR ) THEN +* +* Want right eigenvectors +* Copy Householder vectors to VR +* + SIDE = 'R' + CALL ZLACPY( 'L', N, N, A, LDA, VR, LDVR ) +* +* Generate unitary matrix in VR +* (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) +* (RWorkspace: none) +* + CALL ZUNGHR( N, ILO, IHI, VR, LDVR, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* +* Perform QR iteration, accumulating Schur vectors in VR +* (CWorkspace: need 1, prefer HSWORK (see comments) ) +* (RWorkspace: none) +* + IWRK = ITAU + CALL ZHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, W, VR, LDVR, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) +* + ELSE +* +* Compute eigenvalues only +* If condition numbers desired, compute Schur form +* + IF( WNTSNN ) THEN + JOB = 'E' + ELSE + JOB = 'S' + END IF +* +* (CWorkspace: need 1, prefer HSWORK (see comments) ) +* (RWorkspace: none) +* + IWRK = ITAU + CALL ZHSEQR( JOB, 'N', N, ILO, IHI, A, LDA, W, VR, LDVR, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) + END IF +* +* If INFO > 0 from ZHSEQR, then quit +* + IF( INFO.GT.0 ) + $ GO TO 50 +* + IF( WANTVL .OR. WANTVR ) THEN +* +* Compute left and/or right eigenvectors +* (CWorkspace: need 2*N) +* (RWorkspace: need N) +* + CALL ZTREVC( SIDE, 'B', SELECT, N, A, LDA, VL, LDVL, VR, LDVR, + $ N, NOUT, WORK( IWRK ), RWORK, IERR ) + END IF +* +* Compute condition numbers if desired +* (CWorkspace: need N*N+2*N unless SENSE = 'E') +* (RWorkspace: need 2*N unless SENSE = 'E') +* + IF( .NOT.WNTSNN ) THEN + CALL ZTRSNA( SENSE, 'A', SELECT, N, A, LDA, VL, LDVL, VR, LDVR, + $ RCONDE, RCONDV, N, NOUT, WORK( IWRK ), N, RWORK, + $ ICOND ) + END IF +* + IF( WANTVL ) THEN +* +* Undo balancing of left eigenvectors +* + CALL ZGEBAK( BALANC, 'L', N, ILO, IHI, SCALE, N, VL, LDVL, + $ IERR ) +* +* Normalize left eigenvectors and make largest component real +* + DO 20 I = 1, N + SCL = ONE / DZNRM2( N, VL( 1, I ), 1 ) + CALL ZDSCAL( N, SCL, VL( 1, I ), 1 ) + DO 10 K = 1, N + RWORK( K ) = DBLE( VL( K, I ) )**2 + + $ DIMAG( VL( K, I ) )**2 + 10 CONTINUE + K = IDAMAX( N, RWORK, 1 ) + TMP = DCONJG( VL( K, I ) ) / SQRT( RWORK( K ) ) + CALL ZSCAL( N, TMP, VL( 1, I ), 1 ) + VL( K, I ) = DCMPLX( DBLE( VL( K, I ) ), ZERO ) + 20 CONTINUE + END IF +* + IF( WANTVR ) THEN +* +* Undo balancing of right eigenvectors +* + CALL ZGEBAK( BALANC, 'R', N, ILO, IHI, SCALE, N, VR, LDVR, + $ IERR ) +* +* Normalize right eigenvectors and make largest component real +* + DO 40 I = 1, N + SCL = ONE / DZNRM2( N, VR( 1, I ), 1 ) + CALL ZDSCAL( N, SCL, VR( 1, I ), 1 ) + DO 30 K = 1, N + RWORK( K ) = DBLE( VR( K, I ) )**2 + + $ DIMAG( VR( K, I ) )**2 + 30 CONTINUE + K = IDAMAX( N, RWORK, 1 ) + TMP = DCONJG( VR( K, I ) ) / SQRT( RWORK( K ) ) + CALL ZSCAL( N, TMP, VR( 1, I ), 1 ) + VR( K, I ) = DCMPLX( DBLE( VR( K, I ) ), ZERO ) + 40 CONTINUE + END IF +* +* Undo scaling if necessary +* + 50 CONTINUE + IF( SCALEA ) THEN + CALL ZLASCL( 'G', 0, 0, CSCALE, ANRM, N-INFO, 1, W( INFO+1 ), + $ MAX( N-INFO, 1 ), IERR ) + IF( INFO.EQ.0 ) THEN + IF( ( WNTSNV .OR. WNTSNB ) .AND. ICOND.EQ.0 ) + $ CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, RCONDV, N, + $ IERR ) + ELSE + CALL ZLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, W, N, IERR ) + END IF + END IF +* + WORK( 1 ) = MAXWRK + RETURN +* +* End of ZGEEVX +* + END diff --git a/costa/native/external/lapack/zgegs.f b/costa/native/external/lapack/zgegs.f new file mode 100644 index 000000000..b5c53101a --- /dev/null +++ b/costa/native/external/lapack/zgegs.f @@ -0,0 +1,442 @@ + SUBROUTINE ZGEGS( JOBVSL, JOBVSR, N, A, LDA, B, LDB, ALPHA, BETA, + $ VSL, LDVSL, VSR, LDVSR, WORK, LWORK, RWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBVSL, JOBVSR + INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* This routine is deprecated and has been replaced by routine ZGGES. +* +* ZGEGS computes for a pair of N-by-N complex nonsymmetric matrices A, +* B: the generalized eigenvalues (alpha, beta), the complex Schur +* form (A, B), and optionally left and/or right Schur vectors +* (VSL and VSR). +* +* (If only the generalized eigenvalues are needed, use the driver ZGEGV +* instead.) +* +* A generalized eigenvalue for a pair of matrices (A,B) is, roughly +* speaking, a scalar w or a ratio alpha/beta = w, such that A - w*B +* is singular. It is usually represented as the pair (alpha,beta), +* as there is a reasonable interpretation for beta=0, and even for +* both being zero. A good beginning reference is the book, "Matrix +* Computations", by G. Golub & C. van Loan (Johns Hopkins U. Press) +* +* The (generalized) Schur form of a pair of matrices is the result of +* multiplying both matrices on the left by one unitary matrix and +* both on the right by another unitary matrix, these two unitary +* matrices being chosen so as to bring the pair of matrices into +* upper triangular form with the diagonal elements of B being +* non-negative real numbers (this is also called complex Schur form.) +* +* The left and right Schur vectors are the columns of VSL and VSR, +* respectively, where VSL and VSR are the unitary matrices +* which reduce A and B to Schur form: +* +* Schur form of (A,B) = ( (VSL)**H A (VSR), (VSL)**H B (VSR) ) +* +* Arguments +* ========= +* +* JOBVSL (input) CHARACTER*1 +* = 'N': do not compute the left Schur vectors; +* = 'V': compute the left Schur vectors. +* +* JOBVSR (input) CHARACTER*1 +* = 'N': do not compute the right Schur vectors; +* = 'V': compute the right Schur vectors. +* +* N (input) INTEGER +* The order of the matrices A, B, VSL, and VSR. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA, N) +* On entry, the first of the pair of matrices whose generalized +* eigenvalues and (optionally) Schur vectors are to be +* computed. +* On exit, the generalized Schur form of A. +* +* LDA (input) INTEGER +* The leading dimension of A. LDA >= max(1,N). +* +* B (input/output) COMPLEX*16 array, dimension (LDB, N) +* On entry, the second of the pair of matrices whose +* generalized eigenvalues and (optionally) Schur vectors are +* to be computed. +* On exit, the generalized Schur form of B. +* +* LDB (input) INTEGER +* The leading dimension of B. LDB >= max(1,N). +* +* ALPHA (output) COMPLEX*16 array, dimension (N) +* BETA (output) COMPLEX*16 array, dimension (N) +* On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the +* generalized eigenvalues. ALPHA(j), j=1,...,N and BETA(j), +* j=1,...,N are the diagonals of the complex Schur form (A,B) +* output by ZGEGS. The BETA(j) will be non-negative real. +* +* Note: the quotients ALPHA(j)/BETA(j) may easily over- or +* underflow, and BETA(j) may even be zero. Thus, the user +* should avoid naively computing the ratio alpha/beta. +* However, ALPHA will be always less than and usually +* comparable with norm(A) in magnitude, and BETA always less +* than and usually comparable with norm(B). +* +* VSL (output) COMPLEX*16 array, dimension (LDVSL,N) +* If JOBVSL = 'V', VSL will contain the left Schur vectors. +* (See "Purpose", above.) +* Not referenced if JOBVSL = 'N'. +* +* LDVSL (input) INTEGER +* The leading dimension of the matrix VSL. LDVSL >= 1, and +* if JOBVSL = 'V', LDVSL >= N. +* +* VSR (output) COMPLEX*16 array, dimension (LDVSR,N) +* If JOBVSR = 'V', VSR will contain the right Schur vectors. +* (See "Purpose", above.) +* Not referenced if JOBVSR = 'N'. +* +* LDVSR (input) INTEGER +* The leading dimension of the matrix VSR. LDVSR >= 1, and +* if JOBVSR = 'V', LDVSR >= N. +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,2*N). +* For good performance, LWORK must generally be larger. +* To compute the optimal value of LWORK, call ILAENV to get +* blocksizes (for ZGEQRF, ZUNMQR, and CUNGQR.) Then compute: +* NB -- MAX of the blocksizes for ZGEQRF, ZUNMQR, and CUNGQR; +* the optimal LWORK is N*(NB+1). +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (3*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* =1,...,N: +* The QZ iteration failed. (A,B) are not in Schur +* form, but ALPHA(j) and BETA(j) should be correct for +* j=INFO+1,...,N. +* > N: errors that usually indicate LAPACK problems: +* =N+1: error return from ZGGBAL +* =N+2: error return from ZGEQRF +* =N+3: error return from ZUNMQR +* =N+4: error return from ZUNGQR +* =N+5: error return from ZGGHRD +* =N+6: error return from ZHGEQZ (other than failed +* iteration) +* =N+7: error return from ZGGBAK (computing VSL) +* =N+8: error return from ZGGBAK (computing VSR) +* =N+9: error return from ZLASCL (various places) +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ), + $ CONE = ( 1.0D0, 0.0D0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ILASCL, ILBSCL, ILVSL, ILVSR, LQUERY + INTEGER ICOLS, IHI, IINFO, IJOBVL, IJOBVR, ILEFT, ILO, + $ IRIGHT, IROWS, IRWORK, ITAU, IWORK, LOPT, + $ LWKMIN, LWKOPT, NB, NB1, NB2, NB3 + DOUBLE PRECISION ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, + $ SAFMIN, SMLNUM +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEQRF, ZGGBAK, ZGGBAL, ZGGHRD, ZHGEQZ, + $ ZLACPY, ZLASCL, ZLASET, ZUNGQR, ZUNMQR +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, ZLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVSL, 'N' ) ) THEN + IJOBVL = 1 + ILVSL = .FALSE. + ELSE IF( LSAME( JOBVSL, 'V' ) ) THEN + IJOBVL = 2 + ILVSL = .TRUE. + ELSE + IJOBVL = -1 + ILVSL = .FALSE. + END IF +* + IF( LSAME( JOBVSR, 'N' ) ) THEN + IJOBVR = 1 + ILVSR = .FALSE. + ELSE IF( LSAME( JOBVSR, 'V' ) ) THEN + IJOBVR = 2 + ILVSR = .TRUE. + ELSE + IJOBVR = -1 + ILVSR = .FALSE. + END IF +* +* Test the input arguments +* + LWKMIN = MAX( 2*N, 1 ) + LWKOPT = LWKMIN + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + INFO = 0 + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVSL.LT.1 .OR. ( ILVSL .AND. LDVSL.LT.N ) ) THEN + INFO = -11 + ELSE IF( LDVSR.LT.1 .OR. ( ILVSR .AND. LDVSR.LT.N ) ) THEN + INFO = -13 + ELSE IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN + INFO = -15 + END IF +* + IF( INFO.EQ.0 ) THEN + NB1 = ILAENV( 1, 'ZGEQRF', ' ', N, N, -1, -1 ) + NB2 = ILAENV( 1, 'ZUNMQR', ' ', N, N, N, -1 ) + NB3 = ILAENV( 1, 'ZUNGQR', ' ', N, N, N, -1 ) + NB = MAX( NB1, NB2, NB3 ) + LOPT = N*( NB+1 ) + WORK( 1 ) = LOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEGS ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = DLAMCH( 'E' )*DLAMCH( 'B' ) + SAFMIN = DLAMCH( 'S' ) + SMLNUM = N*SAFMIN / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = ZLANGE( 'M', N, N, A, LDA, RWORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF +* + IF( ILASCL ) THEN + CALL ZLASCL( 'G', -1, -1, ANRM, ANRMTO, N, N, A, LDA, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + END IF +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = ZLANGE( 'M', N, N, B, LDB, RWORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF +* + IF( ILBSCL ) THEN + CALL ZLASCL( 'G', -1, -1, BNRM, BNRMTO, N, N, B, LDB, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + END IF +* +* Permute the matrix to make it more nearly triangular +* + ILEFT = 1 + IRIGHT = N + 1 + IRWORK = IRIGHT + N + IWORK = 1 + CALL ZGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), RWORK( IRWORK ), IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 1 + GO TO 10 + END IF +* +* Reduce B to triangular form, and initialize VSL and/or VSR +* + IROWS = IHI + 1 - ILO + ICOLS = N + 1 - ILO + ITAU = IWORK + IWORK = ITAU + IROWS + CALL ZGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWORK ), LWORK+1-IWORK, IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 2 + GO TO 10 + END IF +* + CALL ZUNMQR( 'L', 'C', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWORK ), + $ LWORK+1-IWORK, IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 3 + GO TO 10 + END IF +* + IF( ILVSL ) THEN + CALL ZLASET( 'Full', N, N, CZERO, CONE, VSL, LDVSL ) + CALL ZLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VSL( ILO+1, ILO ), LDVSL ) + CALL ZUNGQR( IROWS, IROWS, IROWS, VSL( ILO, ILO ), LDVSL, + $ WORK( ITAU ), WORK( IWORK ), LWORK+1-IWORK, + $ IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 4 + GO TO 10 + END IF + END IF +* + IF( ILVSR ) + $ CALL ZLASET( 'Full', N, N, CZERO, CONE, VSR, LDVSR ) +* +* Reduce to generalized Hessenberg form +* + CALL ZGGHRD( JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, VSL, + $ LDVSL, VSR, LDVSR, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 5 + GO TO 10 + END IF +* +* Perform QZ algorithm, computing Schur vectors if desired +* + IWORK = ITAU + CALL ZHGEQZ( 'S', JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, WORK( IWORK ), + $ LWORK+1-IWORK, RWORK( IRWORK ), IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + IF( IINFO.GT.0 .AND. IINFO.LE.N ) THEN + INFO = IINFO + ELSE IF( IINFO.GT.N .AND. IINFO.LE.2*N ) THEN + INFO = IINFO - N + ELSE + INFO = N + 6 + END IF + GO TO 10 + END IF +* +* Apply permutation to VSL and VSR +* + IF( ILVSL ) THEN + CALL ZGGBAK( 'P', 'L', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VSL, LDVSL, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 7 + GO TO 10 + END IF + END IF + IF( ILVSR ) THEN + CALL ZGGBAK( 'P', 'R', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VSR, LDVSR, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 8 + GO TO 10 + END IF + END IF +* +* Undo scaling +* + IF( ILASCL ) THEN + CALL ZLASCL( 'U', -1, -1, ANRMTO, ANRM, N, N, A, LDA, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + CALL ZLASCL( 'G', -1, -1, ANRMTO, ANRM, N, 1, ALPHA, N, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + END IF +* + IF( ILBSCL ) THEN + CALL ZLASCL( 'U', -1, -1, BNRMTO, BNRM, N, N, B, LDB, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + CALL ZLASCL( 'G', -1, -1, BNRMTO, BNRM, N, 1, BETA, N, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + END IF +* + 10 CONTINUE + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of ZGEGS +* + END diff --git a/costa/native/external/lapack/zgegv.f b/costa/native/external/lapack/zgegv.f new file mode 100644 index 000000000..ce0d56594 --- /dev/null +++ b/costa/native/external/lapack/zgegv.f @@ -0,0 +1,591 @@ + SUBROUTINE ZGEGV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHA, BETA, + $ VL, LDVL, VR, LDVR, WORK, LWORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBVL, JOBVR + INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), VL( LDVL, * ), VR( LDVR, * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* This routine is deprecated and has been replaced by routine ZGGEV. +* +* ZGEGV computes for a pair of N-by-N complex nonsymmetric matrices A +* and B, the generalized eigenvalues (alpha, beta), and optionally, +* the left and/or right generalized eigenvectors (VL and VR). +* +* A generalized eigenvalue for a pair of matrices (A,B) is, roughly +* speaking, a scalar w or a ratio alpha/beta = w, such that A - w*B +* is singular. It is usually represented as the pair (alpha,beta), +* as there is a reasonable interpretation for beta=0, and even for +* both being zero. A good beginning reference is the book, "Matrix +* Computations", by G. Golub & C. van Loan (Johns Hopkins U. Press) +* +* A right generalized eigenvector corresponding to a generalized +* eigenvalue w for a pair of matrices (A,B) is a vector r such +* that (A - w B) r = 0 . A left generalized eigenvector is a vector +* l such that l**H * (A - w B) = 0, where l**H is the +* conjugate-transpose of l. +* +* Note: this routine performs "full balancing" on A and B -- see +* "Further Details", below. +* +* Arguments +* ========= +* +* JOBVL (input) CHARACTER*1 +* = 'N': do not compute the left generalized eigenvectors; +* = 'V': compute the left generalized eigenvectors. +* +* JOBVR (input) CHARACTER*1 +* = 'N': do not compute the right generalized eigenvectors; +* = 'V': compute the right generalized eigenvectors. +* +* N (input) INTEGER +* The order of the matrices A, B, VL, and VR. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA, N) +* On entry, the first of the pair of matrices whose +* generalized eigenvalues and (optionally) generalized +* eigenvectors are to be computed. +* On exit, the contents will have been destroyed. (For a +* description of the contents of A on exit, see "Further +* Details", below.) +* +* LDA (input) INTEGER +* The leading dimension of A. LDA >= max(1,N). +* +* B (input/output) COMPLEX*16 array, dimension (LDB, N) +* On entry, the second of the pair of matrices whose +* generalized eigenvalues and (optionally) generalized +* eigenvectors are to be computed. +* On exit, the contents will have been destroyed. (For a +* description of the contents of B on exit, see "Further +* Details", below.) +* +* LDB (input) INTEGER +* The leading dimension of B. LDB >= max(1,N). +* +* ALPHA (output) COMPLEX*16 array, dimension (N) +* BETA (output) COMPLEX*16 array, dimension (N) +* On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the +* generalized eigenvalues. +* +* Note: the quotients ALPHA(j)/BETA(j) may easily over- or +* underflow, and BETA(j) may even be zero. Thus, the user +* should avoid naively computing the ratio alpha/beta. +* However, ALPHA will be always less than and usually +* comparable with norm(A) in magnitude, and BETA always less +* than and usually comparable with norm(B). +* +* VL (output) COMPLEX*16 array, dimension (LDVL,N) +* If JOBVL = 'V', the left generalized eigenvectors. (See +* "Purpose", above.) +* Each eigenvector will be scaled so the largest component +* will have abs(real part) + abs(imag. part) = 1, *except* +* that for eigenvalues with alpha=beta=0, a zero vector will +* be returned as the corresponding eigenvector. +* Not referenced if JOBVL = 'N'. +* +* LDVL (input) INTEGER +* The leading dimension of the matrix VL. LDVL >= 1, and +* if JOBVL = 'V', LDVL >= N. +* +* VR (output) COMPLEX*16 array, dimension (LDVR,N) +* If JOBVR = 'V', the right generalized eigenvectors. (See +* "Purpose", above.) +* Each eigenvector will be scaled so the largest component +* will have abs(real part) + abs(imag. part) = 1, *except* +* that for eigenvalues with alpha=beta=0, a zero vector will +* be returned as the corresponding eigenvector. +* Not referenced if JOBVR = 'N'. +* +* LDVR (input) INTEGER +* The leading dimension of the matrix VR. LDVR >= 1, and +* if JOBVR = 'V', LDVR >= N. +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,2*N). +* For good performance, LWORK must generally be larger. +* To compute the optimal value of LWORK, call ILAENV to get +* blocksizes (for ZGEQRF, ZUNMQR, and CUNGQR.) Then compute: +* NB -- MAX of the blocksizes for ZGEQRF, ZUNMQR, and CUNGQR; +* The optimal LWORK is MAX( 2*N, N*(NB+1) ). +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace/output) DOUBLE PRECISION array, dimension (8*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* =1,...,N: +* The QZ iteration failed. No eigenvectors have been +* calculated, but ALPHA(j) and BETA(j) should be +* correct for j=INFO+1,...,N. +* > N: errors that usually indicate LAPACK problems: +* =N+1: error return from ZGGBAL +* =N+2: error return from ZGEQRF +* =N+3: error return from ZUNMQR +* =N+4: error return from ZUNGQR +* =N+5: error return from ZGGHRD +* =N+6: error return from ZHGEQZ (other than failed +* iteration) +* =N+7: error return from ZTGEVC +* =N+8: error return from ZGGBAK (computing VL) +* =N+9: error return from ZGGBAK (computing VR) +* =N+10: error return from ZLASCL (various calls) +* +* Further Details +* =============== +* +* Balancing +* --------- +* +* This driver calls ZGGBAL to both permute and scale rows and columns +* of A and B. The permutations PL and PR are chosen so that PL*A*PR +* and PL*B*R will be upper triangular except for the diagonal blocks +* A(i:j,i:j) and B(i:j,i:j), with i and j as close together as +* possible. The diagonal scaling matrices DL and DR are chosen so +* that the pair DL*PL*A*PR*DR, DL*PL*B*PR*DR have elements close to +* one (except for the elements that start out zero.) +* +* After the eigenvalues and eigenvectors of the balanced matrices +* have been computed, ZGGBAK transforms the eigenvectors back to what +* they would have been (in perfect arithmetic) if they had not been +* balanced. +* +* Contents of A and B on Exit +* -------- -- - --- - -- ---- +* +* If any eigenvectors are computed (either JOBVL='V' or JOBVR='V' or +* both), then on exit the arrays A and B will contain the complex Schur +* form[*] of the "balanced" versions of A and B. If no eigenvectors +* are computed, then only the diagonal blocks will be correct. +* +* [*] In other words, upper triangular form. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ), + $ CONE = ( 1.0D0, 0.0D0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ILIMIT, ILV, ILVL, ILVR, LQUERY + CHARACTER CHTEMP + INTEGER ICOLS, IHI, IINFO, IJOBVL, IJOBVR, ILEFT, ILO, + $ IN, IRIGHT, IROWS, IRWORK, ITAU, IWORK, JC, JR, + $ LOPT, LWKMIN, LWKOPT, NB, NB1, NB2, NB3 + DOUBLE PRECISION ABSAI, ABSAR, ABSB, ANRM, ANRM1, ANRM2, BNRM, + $ BNRM1, BNRM2, EPS, SAFMAX, SAFMIN, SALFAI, + $ SALFAR, SBETA, SCALE, TEMP + COMPLEX*16 X +* .. +* .. Local Arrays .. + LOGICAL LDUMMA( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEQRF, ZGGBAK, ZGGBAL, ZGGHRD, ZHGEQZ, + $ ZLACPY, ZLASCL, ZLASET, ZTGEVC, ZUNGQR, ZUNMQR +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, ZLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DIMAG, INT, MAX +* .. +* .. Statement Functions .. + DOUBLE PRECISION ABS1 +* .. +* .. Statement Function definitions .. + ABS1( X ) = ABS( DBLE( X ) ) + ABS( DIMAG( X ) ) +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVL, 'N' ) ) THEN + IJOBVL = 1 + ILVL = .FALSE. + ELSE IF( LSAME( JOBVL, 'V' ) ) THEN + IJOBVL = 2 + ILVL = .TRUE. + ELSE + IJOBVL = -1 + ILVL = .FALSE. + END IF +* + IF( LSAME( JOBVR, 'N' ) ) THEN + IJOBVR = 1 + ILVR = .FALSE. + ELSE IF( LSAME( JOBVR, 'V' ) ) THEN + IJOBVR = 2 + ILVR = .TRUE. + ELSE + IJOBVR = -1 + ILVR = .FALSE. + END IF + ILV = ILVL .OR. ILVR +* +* Test the input arguments +* + LWKMIN = MAX( 2*N, 1 ) + LWKOPT = LWKMIN + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + INFO = 0 + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVL.LT.1 .OR. ( ILVL .AND. LDVL.LT.N ) ) THEN + INFO = -11 + ELSE IF( LDVR.LT.1 .OR. ( ILVR .AND. LDVR.LT.N ) ) THEN + INFO = -13 + ELSE IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN + INFO = -15 + END IF +* + IF( INFO.EQ.0 ) THEN + NB1 = ILAENV( 1, 'ZGEQRF', ' ', N, N, -1, -1 ) + NB2 = ILAENV( 1, 'ZUNMQR', ' ', N, N, N, -1 ) + NB3 = ILAENV( 1, 'ZUNGQR', ' ', N, N, N, -1 ) + NB = MAX( NB1, NB2, NB3 ) + LOPT = MAX( 2*N, N*( NB+1 ) ) + WORK( 1 ) = LOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEGV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = DLAMCH( 'E' )*DLAMCH( 'B' ) + SAFMIN = DLAMCH( 'S' ) + SAFMIN = SAFMIN + SAFMIN + SAFMAX = ONE / SAFMIN +* +* Scale A +* + ANRM = ZLANGE( 'M', N, N, A, LDA, RWORK ) + ANRM1 = ANRM + ANRM2 = ONE + IF( ANRM.LT.ONE ) THEN + IF( SAFMAX*ANRM.LT.ONE ) THEN + ANRM1 = SAFMIN + ANRM2 = SAFMAX*ANRM + END IF + END IF +* + IF( ANRM.GT.ZERO ) THEN + CALL ZLASCL( 'G', -1, -1, ANRM, ONE, N, N, A, LDA, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 10 + RETURN + END IF + END IF +* +* Scale B +* + BNRM = ZLANGE( 'M', N, N, B, LDB, RWORK ) + BNRM1 = BNRM + BNRM2 = ONE + IF( BNRM.LT.ONE ) THEN + IF( SAFMAX*BNRM.LT.ONE ) THEN + BNRM1 = SAFMIN + BNRM2 = SAFMAX*BNRM + END IF + END IF +* + IF( BNRM.GT.ZERO ) THEN + CALL ZLASCL( 'G', -1, -1, BNRM, ONE, N, N, B, LDB, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 10 + RETURN + END IF + END IF +* +* Permute the matrix to make it more nearly triangular +* Also "balance" the matrix. +* + ILEFT = 1 + IRIGHT = N + 1 + IRWORK = IRIGHT + N + CALL ZGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), RWORK( IRWORK ), IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 1 + GO TO 80 + END IF +* +* Reduce B to triangular form, and initialize VL and/or VR +* + IROWS = IHI + 1 - ILO + IF( ILV ) THEN + ICOLS = N + 1 - ILO + ELSE + ICOLS = IROWS + END IF + ITAU = 1 + IWORK = ITAU + IROWS + CALL ZGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWORK ), LWORK+1-IWORK, IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 2 + GO TO 80 + END IF +* + CALL ZUNMQR( 'L', 'C', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWORK ), + $ LWORK+1-IWORK, IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 3 + GO TO 80 + END IF +* + IF( ILVL ) THEN + CALL ZLASET( 'Full', N, N, CZERO, CONE, VL, LDVL ) + CALL ZLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VL( ILO+1, ILO ), LDVL ) + CALL ZUNGQR( IROWS, IROWS, IROWS, VL( ILO, ILO ), LDVL, + $ WORK( ITAU ), WORK( IWORK ), LWORK+1-IWORK, + $ IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 4 + GO TO 80 + END IF + END IF +* + IF( ILVR ) + $ CALL ZLASET( 'Full', N, N, CZERO, CONE, VR, LDVR ) +* +* Reduce to generalized Hessenberg form +* + IF( ILV ) THEN +* +* Eigenvectors requested -- work on whole matrix. +* + CALL ZGGHRD( JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, IINFO ) + ELSE + CALL ZGGHRD( 'N', 'N', IROWS, 1, IROWS, A( ILO, ILO ), LDA, + $ B( ILO, ILO ), LDB, VL, LDVL, VR, LDVR, IINFO ) + END IF + IF( IINFO.NE.0 ) THEN + INFO = N + 5 + GO TO 80 + END IF +* +* Perform QZ algorithm +* + IWORK = ITAU + IF( ILV ) THEN + CHTEMP = 'S' + ELSE + CHTEMP = 'E' + END IF + CALL ZHGEQZ( CHTEMP, JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHA, BETA, VL, LDVL, VR, LDVR, WORK( IWORK ), + $ LWORK+1-IWORK, RWORK( IRWORK ), IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + IF( IINFO.GT.0 .AND. IINFO.LE.N ) THEN + INFO = IINFO + ELSE IF( IINFO.GT.N .AND. IINFO.LE.2*N ) THEN + INFO = IINFO - N + ELSE + INFO = N + 6 + END IF + GO TO 80 + END IF +* + IF( ILV ) THEN +* +* Compute Eigenvectors +* + IF( ILVL ) THEN + IF( ILVR ) THEN + CHTEMP = 'B' + ELSE + CHTEMP = 'L' + END IF + ELSE + CHTEMP = 'R' + END IF +* + CALL ZTGEVC( CHTEMP, 'B', LDUMMA, N, A, LDA, B, LDB, VL, LDVL, + $ VR, LDVR, N, IN, WORK( IWORK ), RWORK( IRWORK ), + $ IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 7 + GO TO 80 + END IF +* +* Undo balancing on VL and VR, rescale +* + IF( ILVL ) THEN + CALL ZGGBAK( 'P', 'L', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VL, LDVL, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 8 + GO TO 80 + END IF + DO 30 JC = 1, N + TEMP = ZERO + DO 10 JR = 1, N + TEMP = MAX( TEMP, ABS1( VL( JR, JC ) ) ) + 10 CONTINUE + IF( TEMP.LT.SAFMIN ) + $ GO TO 30 + TEMP = ONE / TEMP + DO 20 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + 20 CONTINUE + 30 CONTINUE + END IF + IF( ILVR ) THEN + CALL ZGGBAK( 'P', 'R', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VR, LDVR, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + GO TO 80 + END IF + DO 60 JC = 1, N + TEMP = ZERO + DO 40 JR = 1, N + TEMP = MAX( TEMP, ABS1( VR( JR, JC ) ) ) + 40 CONTINUE + IF( TEMP.LT.SAFMIN ) + $ GO TO 60 + TEMP = ONE / TEMP + DO 50 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + 50 CONTINUE + 60 CONTINUE + END IF +* +* End of eigenvector calculation +* + END IF +* +* Undo scaling in alpha, beta +* +* Note: this does not give the alpha and beta for the unscaled +* problem. +* +* Un-scaling is limited to avoid underflow in alpha and beta +* if they are significant. +* + DO 70 JC = 1, N + ABSAR = ABS( DBLE( ALPHA( JC ) ) ) + ABSAI = ABS( DIMAG( ALPHA( JC ) ) ) + ABSB = ABS( DBLE( BETA( JC ) ) ) + SALFAR = ANRM*DBLE( ALPHA( JC ) ) + SALFAI = ANRM*DIMAG( ALPHA( JC ) ) + SBETA = BNRM*DBLE( BETA( JC ) ) + ILIMIT = .FALSE. + SCALE = ONE +* +* Check for significant underflow in imaginary part of ALPHA +* + IF( ABS( SALFAI ).LT.SAFMIN .AND. ABSAI.GE. + $ MAX( SAFMIN, EPS*ABSAR, EPS*ABSB ) ) THEN + ILIMIT = .TRUE. + SCALE = ( SAFMIN / ANRM1 ) / MAX( SAFMIN, ANRM2*ABSAI ) + END IF +* +* Check for significant underflow in real part of ALPHA +* + IF( ABS( SALFAR ).LT.SAFMIN .AND. ABSAR.GE. + $ MAX( SAFMIN, EPS*ABSAI, EPS*ABSB ) ) THEN + ILIMIT = .TRUE. + SCALE = MAX( SCALE, ( SAFMIN / ANRM1 ) / + $ MAX( SAFMIN, ANRM2*ABSAR ) ) + END IF +* +* Check for significant underflow in BETA +* + IF( ABS( SBETA ).LT.SAFMIN .AND. ABSB.GE. + $ MAX( SAFMIN, EPS*ABSAR, EPS*ABSAI ) ) THEN + ILIMIT = .TRUE. + SCALE = MAX( SCALE, ( SAFMIN / BNRM1 ) / + $ MAX( SAFMIN, BNRM2*ABSB ) ) + END IF +* +* Check for possible overflow when limiting scaling +* + IF( ILIMIT ) THEN + TEMP = ( SCALE*SAFMIN )*MAX( ABS( SALFAR ), ABS( SALFAI ), + $ ABS( SBETA ) ) + IF( TEMP.GT.ONE ) + $ SCALE = SCALE / TEMP + IF( SCALE.LT.ONE ) + $ ILIMIT = .FALSE. + END IF +* +* Recompute un-scaled ALPHA, BETA if necessary. +* + IF( ILIMIT ) THEN + SALFAR = ( SCALE*DBLE( ALPHA( JC ) ) )*ANRM + SALFAI = ( SCALE*DIMAG( ALPHA( JC ) ) )*ANRM + SBETA = ( SCALE*BETA( JC ) )*BNRM + END IF + ALPHA( JC ) = DCMPLX( SALFAR, SALFAI ) + BETA( JC ) = SBETA + 70 CONTINUE +* + 80 CONTINUE + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of ZGEGV +* + END diff --git a/costa/native/external/lapack/zgehd2.f b/costa/native/external/lapack/zgehd2.f new file mode 100644 index 000000000..c2505b36d --- /dev/null +++ b/costa/native/external/lapack/zgehd2.f @@ -0,0 +1,149 @@ + SUBROUTINE ZGEHD2( N, ILO, IHI, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER IHI, ILO, INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZGEHD2 reduces a complex general matrix A to upper Hessenberg form H +* by a unitary similarity transformation: Q' * A * Q = H . +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* It is assumed that A is already upper triangular in rows +* and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally +* set by a previous call to ZGEBAL; otherwise they should be +* set to 1 and N respectively. See Further Details. +* 1 <= ILO <= IHI <= max(1,N). +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the n by n general matrix to be reduced. +* On exit, the upper triangle and the first subdiagonal of A +* are overwritten with the upper Hessenberg matrix H, and the +* elements below the first subdiagonal, with the array TAU, +* represent the unitary matrix Q as a product of elementary +* reflectors. See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* TAU (output) COMPLEX*16 array, dimension (N-1) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace) COMPLEX*16 array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of (ihi-ilo) elementary +* reflectors +* +* Q = H(ilo) H(ilo+1) . . . H(ihi-1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on +* exit in A(i+2:ihi,i), and tau in TAU(i). +* +* The contents of A are illustrated by the following example, with +* n = 7, ilo = 2 and ihi = 6: +* +* on entry, on exit, +* +* ( a a a a a a a ) ( a a h h h h a ) +* ( a a a a a a ) ( a h h h h a ) +* ( a a a a a a ) ( h h h h h h ) +* ( a a a a a a ) ( v2 h h h h h ) +* ( a a a a a a ) ( v2 v3 h h h h ) +* ( a a a a a a ) ( v2 v3 v4 h h h ) +* ( a ) ( a ) +* +* where a denotes an element of the original matrix A, h denotes a +* modified element of the upper Hessenberg matrix H, and vi denotes an +* element of the vector defining H(i). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I + COMPLEX*16 ALPHA +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARF, ZLARFG +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -2 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEHD2', -INFO ) + RETURN + END IF +* + DO 10 I = ILO, IHI - 1 +* +* Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) +* + ALPHA = A( I+1, I ) + CALL ZLARFG( IHI-I, ALPHA, A( MIN( I+2, N ), I ), 1, TAU( I ) ) + A( I+1, I ) = ONE +* +* Apply H(i) to A(1:ihi,i+1:ihi) from the right +* + CALL ZLARF( 'Right', IHI, IHI-I, A( I+1, I ), 1, TAU( I ), + $ A( 1, I+1 ), LDA, WORK ) +* +* Apply H(i)' to A(i+1:ihi,i+1:n) from the left +* + CALL ZLARF( 'Left', IHI-I, N-I, A( I+1, I ), 1, + $ DCONJG( TAU( I ) ), A( I+1, I+1 ), LDA, WORK ) +* + A( I+1, I ) = ALPHA + 10 CONTINUE +* + RETURN +* +* End of ZGEHD2 +* + END diff --git a/costa/native/external/lapack/zgehrd.f b/costa/native/external/lapack/zgehrd.f new file mode 100644 index 000000000..d1c02bb20 --- /dev/null +++ b/costa/native/external/lapack/zgehrd.f @@ -0,0 +1,254 @@ + SUBROUTINE ZGEHRD( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER IHI, ILO, INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZGEHRD reduces a complex general matrix A to upper Hessenberg form H +* by a unitary similarity transformation: Q' * A * Q = H . +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* It is assumed that A is already upper triangular in rows +* and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally +* set by a previous call to ZGEBAL; otherwise they should be +* set to 1 and N respectively. See Further Details. +* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the N-by-N general matrix to be reduced. +* On exit, the upper triangle and the first subdiagonal of A +* are overwritten with the upper Hessenberg matrix H, and the +* elements below the first subdiagonal, with the array TAU, +* represent the unitary matrix Q as a product of elementary +* reflectors. See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* TAU (output) COMPLEX*16 array, dimension (N-1) +* The scalar factors of the elementary reflectors (see Further +* Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to +* zero. +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of the array WORK. LWORK >= max(1,N). +* For optimum performance LWORK >= N*NB, where NB is the +* optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of (ihi-ilo) elementary +* reflectors +* +* Q = H(ilo) H(ilo+1) . . . H(ihi-1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on +* exit in A(i+2:ihi,i), and tau in TAU(i). +* +* The contents of A are illustrated by the following example, with +* n = 7, ilo = 2 and ihi = 6: +* +* on entry, on exit, +* +* ( a a a a a a a ) ( a a h h h h a ) +* ( a a a a a a ) ( a h h h h a ) +* ( a a a a a a ) ( h h h h h h ) +* ( a a a a a a ) ( v2 h h h h h ) +* ( a a a a a a ) ( v2 v3 h h h h ) +* ( a a a a a a ) ( v2 v3 v4 h h h ) +* ( a ) ( a ) +* +* where a denotes an element of the original matrix A, h denotes a +* modified element of the upper Hessenberg matrix H, and vi denotes an +* element of the vector defining H(i). +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT + PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), + $ ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, LDWORK, LWKOPT, NB, NBMIN, + $ NH, NX + COMPLEX*16 EI +* .. +* .. Local Arrays .. + COMPLEX*16 T( LDT, NBMAX ) +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEHD2, ZGEMM, ZLAHRD, ZLARFB +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NB = MIN( NBMAX, ILAENV( 1, 'ZGEHRD', ' ', N, ILO, IHI, -1 ) ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -2 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEHRD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Set elements 1:ILO-1 and IHI:N-1 of TAU to zero +* + DO 10 I = 1, ILO - 1 + TAU( I ) = ZERO + 10 CONTINUE + DO 20 I = MAX( 1, IHI ), N - 1 + TAU( I ) = ZERO + 20 CONTINUE +* +* Quick return if possible +* + NH = IHI - ILO + 1 + IF( NH.LE.1 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + IWS = 1 + IF( NB.GT.1 .AND. NB.LT.NH ) THEN +* +* Determine when to cross over from blocked to unblocked code +* (last block is always handled by unblocked code). +* + NX = MAX( NB, ILAENV( 3, 'ZGEHRD', ' ', N, ILO, IHI, -1 ) ) + IF( NX.LT.NH ) THEN +* +* Determine if workspace is large enough for blocked code. +* + IWS = N*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: determine the +* minimum value of NB, and reduce NB or force use of +* unblocked code. +* + NBMIN = MAX( 2, ILAENV( 2, 'ZGEHRD', ' ', N, ILO, IHI, + $ -1 ) ) + IF( LWORK.GE.N*NBMIN ) THEN + NB = LWORK / N + ELSE + NB = 1 + END IF + END IF + END IF + END IF + LDWORK = N +* + IF( NB.LT.NBMIN .OR. NB.GE.NH ) THEN +* +* Use unblocked code below +* + I = ILO +* + ELSE +* +* Use blocked code +* + DO 30 I = ILO, IHI - 1 - NX, NB + IB = MIN( NB, IHI-I ) +* +* Reduce columns i:i+ib-1 to Hessenberg form, returning the +* matrices V and T of the block reflector H = I - V*T*V' +* which performs the reduction, and also the matrix Y = A*V*T +* + CALL ZLAHRD( IHI, I, IB, A( 1, I ), LDA, TAU( I ), T, LDT, + $ WORK, LDWORK ) +* +* Apply the block reflector H to A(1:ihi,i+ib:ihi) from the +* right, computing A := A - Y * V'. V(i+ib,ib-1) must be set +* to 1. +* + EI = A( I+IB, I+IB-1 ) + A( I+IB, I+IB-1 ) = ONE + CALL ZGEMM( 'No transpose', 'Conjugate transpose', IHI, + $ IHI-I-IB+1, IB, -ONE, WORK, LDWORK, + $ A( I+IB, I ), LDA, ONE, A( 1, I+IB ), LDA ) + A( I+IB, I+IB-1 ) = EI +* +* Apply the block reflector H to A(i+1:ihi,i+ib:n) from the +* left +* + CALL ZLARFB( 'Left', 'Conjugate transpose', 'Forward', + $ 'Columnwise', IHI-I, N-I-IB+1, IB, A( I+1, I ), + $ LDA, T, LDT, A( I+1, I+IB ), LDA, WORK, + $ LDWORK ) + 30 CONTINUE + END IF +* +* Use unblocked code to reduce the rest of the matrix +* + CALL ZGEHD2( N, I, IHI, A, LDA, TAU, WORK, IINFO ) + WORK( 1 ) = IWS +* + RETURN +* +* End of ZGEHRD +* + END diff --git a/costa/native/external/lapack/zgelq2.f b/costa/native/external/lapack/zgelq2.f new file mode 100644 index 000000000..fd516f373 --- /dev/null +++ b/costa/native/external/lapack/zgelq2.f @@ -0,0 +1,124 @@ + SUBROUTINE ZGELQ2( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZGELQ2 computes an LQ factorization of a complex m by n matrix A: +* A = L * Q. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the m by n matrix A. +* On exit, the elements on and below the diagonal of the array +* contain the m by min(m,n) lower trapezoidal matrix L (L is +* lower triangular if m <= n); the elements above the diagonal, +* with the array TAU, represent the unitary matrix Q as a +* product of elementary reflectors (see Further Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) COMPLEX*16 array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace) COMPLEX*16 array, dimension (M) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(k)' . . . H(2)' H(1)', where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in +* A(i,i+1:n), and tau in TAU(i). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, K + COMPLEX*16 ALPHA +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLACGV, ZLARF, ZLARFG +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGELQ2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = 1, K +* +* Generate elementary reflector H(i) to annihilate A(i,i+1:n) +* + CALL ZLACGV( N-I+1, A( I, I ), LDA ) + ALPHA = A( I, I ) + CALL ZLARFG( N-I+1, ALPHA, A( I, MIN( I+1, N ) ), LDA, + $ TAU( I ) ) + IF( I.LT.M ) THEN +* +* Apply H(i) to A(i+1:m,i:n) from the right +* + A( I, I ) = ONE + CALL ZLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, TAU( I ), + $ A( I+1, I ), LDA, WORK ) + END IF + A( I, I ) = ALPHA + CALL ZLACGV( N-I+1, A( I, I ), LDA ) + 10 CONTINUE + RETURN +* +* End of ZGELQ2 +* + END diff --git a/costa/native/external/lapack/zgelqf.f b/costa/native/external/lapack/zgelqf.f new file mode 100644 index 000000000..0ee3476be --- /dev/null +++ b/costa/native/external/lapack/zgelqf.f @@ -0,0 +1,196 @@ + SUBROUTINE ZGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZGELQF computes an LQ factorization of a complex M-by-N matrix A: +* A = L * Q. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, the elements on and below the diagonal of the array +* contain the m-by-min(m,n) lower trapezoidal matrix L (L is +* lower triangular if m <= n); the elements above the diagonal, +* with the array TAU, represent the unitary matrix Q as a +* product of elementary reflectors (see Further Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) COMPLEX*16 array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,M). +* For optimum performance LWORK >= M*NB, where NB is the +* optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(k)' . . . H(2)' H(1)', where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in +* A(i,i+1:n), and tau in TAU(i). +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGELQ2, ZLARFB, ZLARFT +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'ZGELQF', ' ', M, N, -1, -1 ) + LWKOPT = M*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGELQF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = M + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'ZGELQF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'ZGELQF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially +* + DO 10 I = 1, K - NX, NB + IB = MIN( K-I+1, NB ) +* +* Compute the LQ factorization of the current block +* A(i:i+ib-1,i:n) +* + CALL ZGELQ2( IB, N-I+1, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) + IF( I+IB.LE.M ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL ZLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, I ), + $ LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(i+ib:m,i:n) from the right +* + CALL ZLARFB( 'Right', 'No transpose', 'Forward', + $ 'Rowwise', M-I-IB+1, N-I+1, IB, A( I, I ), + $ LDA, WORK, LDWORK, A( I+IB, I ), LDA, + $ WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + ELSE + I = 1 + END IF +* +* Use unblocked code to factor the last or only block. +* + IF( I.LE.K ) + $ CALL ZGELQ2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of ZGELQF +* + END diff --git a/costa/native/external/lapack/zgels.f b/costa/native/external/lapack/zgels.f new file mode 100644 index 000000000..c3b56d630 --- /dev/null +++ b/costa/native/external/lapack/zgels.f @@ -0,0 +1,405 @@ + SUBROUTINE ZGELS( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZGELS solves overdetermined or underdetermined complex linear systems +* involving an M-by-N matrix A, or its conjugate-transpose, using a QR +* or LQ factorization of A. It is assumed that A has full rank. +* +* The following options are provided: +* +* 1. If TRANS = 'N' and m >= n: find the least squares solution of +* an overdetermined system, i.e., solve the least squares problem +* minimize || B - A*X ||. +* +* 2. If TRANS = 'N' and m < n: find the minimum norm solution of +* an underdetermined system A * X = B. +* +* 3. If TRANS = 'C' and m >= n: find the minimum norm solution of +* an undetermined system A**H * X = B. +* +* 4. If TRANS = 'C' and m < n: find the least squares solution of +* an overdetermined system, i.e., solve the least squares problem +* minimize || B - A**H * X ||. +* +* Several right hand side vectors b and solution vectors x can be +* handled in a single call; they are stored as the columns of the +* M-by-NRHS right hand side matrix B and the N-by-NRHS solution +* matrix X. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER +* = 'N': the linear system involves A; +* = 'C': the linear system involves A**H. +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of +* columns of the matrices B and X. NRHS >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* if M >= N, A is overwritten by details of its QR +* factorization as returned by ZGEQRF; +* if M < N, A is overwritten by details of its LQ +* factorization as returned by ZGELQF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) +* On entry, the matrix B of right hand side vectors, stored +* columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS +* if TRANS = 'C'. +* On exit, B is overwritten by the solution vectors, stored +* columnwise: +* if TRANS = 'N' and m >= n, rows 1 to n of B contain the least +* squares solution vectors; the residual sum of squares for the +* solution in each column is given by the sum of squares of +* elements N+1 to M in that column; +* if TRANS = 'N' and m < n, rows 1 to N of B contain the +* minimum norm solution vectors; +* if TRANS = 'C' and m >= n, rows 1 to M of B contain the +* minimum norm solution vectors; +* if TRANS = 'C' and m < n, rows 1 to M of B contain the +* least squares solution vectors; the residual sum of squares +* for the solution in each column is given by the sum of +* squares of elements M+1 to N in that column. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= MAX(1,M,N). +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* LWORK >= max( 1, MN + max( MN, NRHS ) ). +* For optimal performance, +* LWORK >= max( 1, MN + max( MN, NRHS )*NB ). +* where MN = min(M,N) and NB is the optimum block size. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, TPSD + INTEGER BROW, I, IASCL, IBSCL, J, MN, NB, SCLLEN, WSIZE + DOUBLE PRECISION ANRM, BIGNUM, BNRM, SMLNUM +* .. +* .. Local Arrays .. + DOUBLE PRECISION RWORK( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, ZLANGE +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGELQF, ZGEQRF, ZLASCL, ZLASET, ZTRSM, + $ ZUNMLQ, ZUNMQR +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + MN = MIN( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.( LSAME( TRANS, 'N' ) .OR. LSAME( TRANS, 'C' ) ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, MN+MAX( MN, NRHS ) ) .AND. .NOT.LQUERY ) + $ THEN + INFO = -10 + END IF +* +* Figure out optimal block size +* + IF( INFO.EQ.0 .OR. INFO.EQ.-10 ) THEN +* + TPSD = .TRUE. + IF( LSAME( TRANS, 'N' ) ) + $ TPSD = .FALSE. +* + IF( M.GE.N ) THEN + NB = ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, -1 ) + IF( TPSD ) THEN + NB = MAX( NB, ILAENV( 1, 'ZUNMQR', 'LN', M, NRHS, N, + $ -1 ) ) + ELSE + NB = MAX( NB, ILAENV( 1, 'ZUNMQR', 'LC', M, NRHS, N, + $ -1 ) ) + END IF + ELSE + NB = ILAENV( 1, 'ZGELQF', ' ', M, N, -1, -1 ) + IF( TPSD ) THEN + NB = MAX( NB, ILAENV( 1, 'ZUNMLQ', 'LC', N, NRHS, M, + $ -1 ) ) + ELSE + NB = MAX( NB, ILAENV( 1, 'ZUNMLQ', 'LN', N, NRHS, M, + $ -1 ) ) + END IF + END IF +* + WSIZE = MAX( 1, MN+MAX( MN, NRHS )*NB ) + WORK( 1 ) = DBLE( WSIZE ) +* + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGELS ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, NRHS ).EQ.0 ) THEN + CALL ZLASET( 'Full', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + RETURN + END IF +* +* Get machine parameters +* + SMLNUM = DLAMCH( 'S' ) / DLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max element outside range [SMLNUM,BIGNUM] +* + ANRM = ZLANGE( 'M', M, N, A, LDA, RWORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL ZLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + GO TO 50 + END IF +* + BROW = M + IF( TPSD ) + $ BROW = N + BNRM = ZLANGE( 'M', BROW, NRHS, B, LDB, RWORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL ZLASCL( 'G', 0, 0, BNRM, SMLNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL ZLASCL( 'G', 0, 0, BNRM, BIGNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 2 + END IF +* + IF( M.GE.N ) THEN +* +* compute QR factorization of A +* + CALL ZGEQRF( M, N, A, LDA, WORK( 1 ), WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least N, optimally N*NB +* + IF( .NOT.TPSD ) THEN +* +* Least-Squares Problem min || A * X - B || +* +* B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS) +* + CALL ZUNMQR( 'Left', 'Conjugate transpose', M, NRHS, N, A, + $ LDA, WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least NRHS, optimally NRHS*NB +* +* B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) +* + CALL ZTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N, + $ NRHS, CONE, A, LDA, B, LDB ) +* + SCLLEN = N +* + ELSE +* +* Overdetermined system of equations A' * X = B +* +* B(1:N,1:NRHS) := inv(R') * B(1:N,1:NRHS) +* + CALL ZTRSM( 'Left', 'Upper', 'Conjugate transpose', + $ 'Non-unit', N, NRHS, CONE, A, LDA, B, LDB ) +* +* B(N+1:M,1:NRHS) = ZERO +* + DO 20 J = 1, NRHS + DO 10 I = N + 1, M + B( I, J ) = CZERO + 10 CONTINUE + 20 CONTINUE +* +* B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS) +* + CALL ZUNMQR( 'Left', 'No transpose', M, NRHS, N, A, LDA, + $ WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least NRHS, optimally NRHS*NB +* + SCLLEN = M +* + END IF +* + ELSE +* +* Compute LQ factorization of A +* + CALL ZGELQF( M, N, A, LDA, WORK( 1 ), WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least M, optimally M*NB. +* + IF( .NOT.TPSD ) THEN +* +* underdetermined system of equations A * X = B +* +* B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) +* + CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Non-unit', M, + $ NRHS, CONE, A, LDA, B, LDB ) +* +* B(M+1:N,1:NRHS) = 0 +* + DO 40 J = 1, NRHS + DO 30 I = M + 1, N + B( I, J ) = CZERO + 30 CONTINUE + 40 CONTINUE +* +* B(1:N,1:NRHS) := Q(1:N,:)' * B(1:M,1:NRHS) +* + CALL ZUNMLQ( 'Left', 'Conjugate transpose', N, NRHS, M, A, + $ LDA, WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least NRHS, optimally NRHS*NB +* + SCLLEN = N +* + ELSE +* +* overdetermined system min || A' * X - B || +* +* B(1:N,1:NRHS) := Q * B(1:N,1:NRHS) +* + CALL ZUNMLQ( 'Left', 'No transpose', N, NRHS, M, A, LDA, + $ WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least NRHS, optimally NRHS*NB +* +* B(1:M,1:NRHS) := inv(L') * B(1:M,1:NRHS) +* + CALL ZTRSM( 'Left', 'Lower', 'Conjugate transpose', + $ 'Non-unit', M, NRHS, CONE, A, LDA, B, LDB ) +* + SCLLEN = M +* + END IF +* + END IF +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL ZLASCL( 'G', 0, 0, SMLNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL ZLASCL( 'G', 0, 0, BIGNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF +* + 50 CONTINUE + WORK( 1 ) = DBLE( WSIZE ) +* + RETURN +* +* End of ZGELS +* + END diff --git a/costa/native/external/lapack/zgelsd.f b/costa/native/external/lapack/zgelsd.f new file mode 100644 index 000000000..890964a74 --- /dev/null +++ b/costa/native/external/lapack/zgelsd.f @@ -0,0 +1,545 @@ + SUBROUTINE ZGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK, + $ WORK, LWORK, RWORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION RWORK( * ), S( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZGELSD computes the minimum-norm solution to a real linear least +* squares problem: +* minimize 2-norm(| b - A*x |) +* using the singular value decomposition (SVD) of A. A is an M-by-N +* matrix which may be rank-deficient. +* +* Several right hand side vectors b and solution vectors x can be +* handled in a single call; they are stored as the columns of the +* M-by-NRHS right hand side matrix B and the N-by-NRHS solution +* matrix X. +* +* The problem is solved in three steps: +* (1) Reduce the coefficient matrix A to bidiagonal form with +* Householder tranformations, reducing the original problem +* into a "bidiagonal least squares problem" (BLS) +* (2) Solve the BLS using a divide and conquer approach. +* (3) Apply back all the Householder tranformations to solve +* the original least squares problem. +* +* The effective rank of A is determined by treating as zero those +* singular values which are less than RCOND times the largest singular +* value. +* +* The divide and conquer algorithm makes very mild assumptions about +* floating point arithmetic. It will work on machines with a guard +* digit in add/subtract, or on those binary machines without guard +* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +* Cray-2. It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input) COMPLEX*16 array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, A has been destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) +* On entry, the M-by-NRHS right hand side matrix B. +* On exit, B is overwritten by the N-by-NRHS solution matrix X. +* If m >= n and RANK = n, the residual sum-of-squares for +* the solution in the i-th column is given by the sum of +* squares of elements n+1:m in that column. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,M,N). +* +* S (output) DOUBLE PRECISION array, dimension (min(M,N)) +* The singular values of A in decreasing order. +* The condition number of A in the 2-norm = S(1)/S(min(m,n)). +* +* RCOND (input) DOUBLE PRECISION +* RCOND is used to determine the effective rank of A. +* Singular values S(i) <= RCOND*S(1) are treated as zero. +* If RCOND < 0, machine precision is used instead. +* +* RANK (output) INTEGER +* The effective rank of A, i.e., the number of singular values +* which are greater than RCOND*S(1). +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK must be at least 1. +* The exact minimum amount of workspace needed depends on M, +* N and NRHS. As long as LWORK is at least +* 2 * N + N * NRHS +* if M is greater than or equal to N or +* 2 * M + M * NRHS +* if M is less than N, the code will execute correctly. +* For good performance, LWORK should generally be larger. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) DOUBLE PRECISION array, dimension at least +* 10*N + 2*N*SMLSIZ + 8*N*NLVL + 3*SMLSIZ*NRHS + +* (SMLSIZ+1)**2 +* if M is greater than or equal to N or +* 10*M + 2*M*SMLSIZ + 8*M*NLVL + 3*SMLSIZ*NRHS + +* (SMLSIZ+1)**2 +* if M is less than N, the code will execute correctly. +* SMLSIZ is returned by ILAENV and is equal to the maximum +* size of the subproblems at the bottom of the computation +* tree (usually about 25), and +* NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 ) +* +* IWORK (workspace) INTEGER array, dimension (LIWORK) +* LIWORK >= 3 * MINMN * NLVL + 11 * MINMN, +* where MINMN = MIN( M,N ). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: the algorithm for computing the SVD failed to converge; +* if INFO = i, i off-diagonal elements of an intermediate +* bidiagonal form did not converge to zero. +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Ren-Cang Li, Computer Science Division, University of +* California at Berkeley, USA +* Osni Marques, LBNL/NERSC, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + COMPLEX*16 CZERO + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER IASCL, IBSCL, IE, IL, ITAU, ITAUP, ITAUQ, + $ LDWORK, MAXMN, MAXWRK, MINMN, MINWRK, MM, + $ MNTHR, NRWORK, NWORK, SMLSIZ + DOUBLE PRECISION ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM +* .. +* .. External Subroutines .. + EXTERNAL DLABAD, DLASCL, DLASET, XERBLA, ZGEBRD, ZGELQF, + $ ZGEQRF, ZLACPY, ZLALSD, ZLASCL, ZLASET, ZUNMBR, + $ ZUNMLQ, ZUNMQR +* .. +* .. External Functions .. + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL ILAENV, DLAMCH, ZLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC DCMPLX, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + MINMN = MIN( M, N ) + MAXMN = MAX( M, N ) + MNTHR = ILAENV( 6, 'ZGELSD', ' ', M, N, NRHS, -1 ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN + INFO = -7 + END IF +* + SMLSIZ = ILAENV( 9, 'ZGELSD', ' ', 0, 0, 0, 0 ) +* +* Compute workspace. +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + MINWRK = 1 + IF( INFO.EQ.0 ) THEN + MAXWRK = 0 + MM = M + IF( M.GE.N .AND. M.GE.MNTHR ) THEN +* +* Path 1a - overdetermined, with many more rows than columns. +* + MM = N + MAXWRK = MAX( MAXWRK, N*ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, + $ -1 ) ) + MAXWRK = MAX( MAXWRK, NRHS*ILAENV( 1, 'ZUNMQR', 'LC', M, + $ NRHS, N, -1 ) ) + END IF + IF( M.GE.N ) THEN +* +* Path 1 - overdetermined or exactly determined. +* + MAXWRK = MAX( MAXWRK, 2*N+( MM+N )* + $ ILAENV( 1, 'ZGEBRD', ' ', MM, N, -1, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*N+NRHS* + $ ILAENV( 1, 'ZUNMBR', 'QLC', MM, NRHS, N, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*N+( N-1 )* + $ ILAENV( 1, 'ZUNMBR', 'PLN', N, NRHS, N, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*N+N*NRHS ) + MINWRK = MAX( 2*N+MM, 2*N+N*NRHS ) + END IF + IF( N.GT.M ) THEN + IF( N.GE.MNTHR ) THEN +* +* Path 2a - underdetermined, with many more columns +* than rows. +* + MAXWRK = M + M*ILAENV( 1, 'ZGELQF', ' ', M, N, -1, -1 ) + MAXWRK = MAX( MAXWRK, M*M+4*M+2*M* + $ ILAENV( 1, 'ZGEBRD', ' ', M, M, -1, -1 ) ) + MAXWRK = MAX( MAXWRK, M*M+4*M+NRHS* + $ ILAENV( 1, 'ZUNMBR', 'QLC', M, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, M*M+4*M+( M-1 )* + $ ILAENV( 1, 'ZUNMLQ', 'LC', N, NRHS, M, -1 ) ) + IF( NRHS.GT.1 ) THEN + MAXWRK = MAX( MAXWRK, M*M+M+M*NRHS ) + ELSE + MAXWRK = MAX( MAXWRK, M*M+2*M ) + END IF + MAXWRK = MAX( MAXWRK, M*M+4*M+M*NRHS ) + ELSE +* +* Path 2 - underdetermined. +* + MAXWRK = 2*M + ( N+M )*ILAENV( 1, 'ZGEBRD', ' ', M, N, + $ -1, -1 ) + MAXWRK = MAX( MAXWRK, 2*M+NRHS* + $ ILAENV( 1, 'ZUNMBR', 'QLC', M, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*M+M* + $ ILAENV( 1, 'ZUNMBR', 'PLN', N, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*M+M*NRHS ) + END IF + MINWRK = MAX( 2*M+N, 2*M+M*NRHS ) + END IF + MINWRK = MIN( MINWRK, MAXWRK ) + WORK( 1 ) = DCMPLX( MAXWRK, 0 ) + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGELSD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + GO TO 10 + END IF +* +* Quick return if possible. +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RANK = 0 + RETURN + END IF +* +* Get machine parameters. +* + EPS = DLAMCH( 'P' ) + SFMIN = DLAMCH( 'S' ) + SMLNUM = SFMIN / EPS + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Scale A if max entry outside range [SMLNUM,BIGNUM]. +* + ANRM = ZLANGE( 'M', M, N, A, LDA, RWORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM. +* + CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL ZLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + CALL DLASET( 'F', MINMN, 1, ZERO, ZERO, S, 1 ) + RANK = 0 + GO TO 10 + END IF +* +* Scale B if max entry outside range [SMLNUM,BIGNUM]. +* + BNRM = ZLANGE( 'M', M, NRHS, B, LDB, RWORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM. +* + CALL ZLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM. +* + CALL ZLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 2 + END IF +* +* If M < N make sure B(M+1:N,:) = 0 +* + IF( M.LT.N ) + $ CALL ZLASET( 'F', N-M, NRHS, CZERO, CZERO, B( M+1, 1 ), LDB ) +* +* Overdetermined case. +* + IF( M.GE.N ) THEN +* +* Path 1 - overdetermined or exactly determined. +* + MM = M + IF( M.GE.MNTHR ) THEN +* +* Path 1a - overdetermined, with many more rows than columns +* + MM = N + ITAU = 1 + NWORK = ITAU + N +* +* Compute A=Q*R. +* (RWorkspace: need N) +* (CWorkspace: need N, prefer N*NB) +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) +* +* Multiply B by transpose(Q). +* (RWorkspace: need N) +* (CWorkspace: need NRHS, prefer NRHS*NB) +* + CALL ZUNMQR( 'L', 'C', M, NRHS, N, A, LDA, WORK( ITAU ), B, + $ LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* +* Zero out below R. +* + IF( N.GT.1 ) THEN + CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, A( 2, 1 ), + $ LDA ) + END IF + END IF +* + ITAUQ = 1 + ITAUP = ITAUQ + N + NWORK = ITAUP + N + IE = 1 + NRWORK = IE + N +* +* Bidiagonalize R in A. +* (RWorkspace: need N) +* (CWorkspace: need 2*N+MM, prefer 2*N+(MM+N)*NB) +* + CALL ZGEBRD( MM, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors of R. +* (CWorkspace: need 2*N+NRHS, prefer 2*N+NRHS*NB) +* + CALL ZUNMBR( 'Q', 'L', 'C', MM, NRHS, N, A, LDA, WORK( ITAUQ ), + $ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* +* Solve the bidiagonal least squares problem. +* + CALL ZLALSD( 'U', SMLSIZ, N, NRHS, S, RWORK( IE ), B, LDB, + $ RCOND, RANK, WORK( NWORK ), RWORK( NRWORK ), + $ IWORK, INFO ) + IF( INFO.NE.0 ) THEN + GO TO 10 + END IF +* +* Multiply B by right bidiagonalizing vectors of R. +* + CALL ZUNMBR( 'P', 'L', 'N', N, NRHS, N, A, LDA, WORK( ITAUP ), + $ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* + ELSE IF( N.GE.MNTHR .AND. LWORK.GE.4*M+M*M+ + $ MAX( M, 2*M-4, NRHS, N-3*M ) ) THEN +* +* Path 2a - underdetermined, with many more columns than rows +* and sufficient workspace for an efficient algorithm. +* + LDWORK = M + IF( LWORK.GE.MAX( 4*M+M*LDA+MAX( M, 2*M-4, NRHS, N-3*M ), + $ M*LDA+M+M*NRHS ) )LDWORK = LDA + ITAU = 1 + NWORK = M + 1 +* +* Compute A=L*Q. +* (CWorkspace: need 2*M, prefer M+M*NB) +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) + IL = NWORK +* +* Copy L to WORK(IL), zeroing out above its diagonal. +* + CALL ZLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK ) + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, WORK( IL+LDWORK ), + $ LDWORK ) + ITAUQ = IL + LDWORK*M + ITAUP = ITAUQ + M + NWORK = ITAUP + M + IE = 1 + NRWORK = IE + M +* +* Bidiagonalize L in WORK(IL). +* (RWorkspace: need M) +* (CWorkspace: need M*M+4*M, prefer M*M+4*M+2*M*NB) +* + CALL ZGEBRD( M, M, WORK( IL ), LDWORK, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors of L. +* (CWorkspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) +* + CALL ZUNMBR( 'Q', 'L', 'C', M, NRHS, M, WORK( IL ), LDWORK, + $ WORK( ITAUQ ), B, LDB, WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) +* +* Solve the bidiagonal least squares problem. +* + CALL ZLALSD( 'U', SMLSIZ, M, NRHS, S, RWORK( IE ), B, LDB, + $ RCOND, RANK, WORK( NWORK ), RWORK( NRWORK ), + $ IWORK, INFO ) + IF( INFO.NE.0 ) THEN + GO TO 10 + END IF +* +* Multiply B by right bidiagonalizing vectors of L. +* + CALL ZUNMBR( 'P', 'L', 'N', M, NRHS, M, WORK( IL ), LDWORK, + $ WORK( ITAUP ), B, LDB, WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) +* +* Zero out below first M rows of B. +* + CALL ZLASET( 'F', N-M, NRHS, CZERO, CZERO, B( M+1, 1 ), LDB ) + NWORK = ITAU + M +* +* Multiply transpose(Q) by B. +* (CWorkspace: need NRHS, prefer NRHS*NB) +* + CALL ZUNMLQ( 'L', 'C', N, NRHS, M, A, LDA, WORK( ITAU ), B, + $ LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* + ELSE +* +* Path 2 - remaining underdetermined cases. +* + ITAUQ = 1 + ITAUP = ITAUQ + M + NWORK = ITAUP + M + IE = 1 + NRWORK = IE + M +* +* Bidiagonalize A. +* (RWorkspace: need M) +* (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) +* + CALL ZGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors. +* (CWorkspace: need 2*M+NRHS, prefer 2*M+NRHS*NB) +* + CALL ZUNMBR( 'Q', 'L', 'C', M, NRHS, N, A, LDA, WORK( ITAUQ ), + $ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* +* Solve the bidiagonal least squares problem. +* + CALL ZLALSD( 'L', SMLSIZ, M, NRHS, S, RWORK( IE ), B, LDB, + $ RCOND, RANK, WORK( NWORK ), RWORK( NRWORK ), + $ IWORK, INFO ) + IF( INFO.NE.0 ) THEN + GO TO 10 + END IF +* +* Multiply B by right bidiagonalizing vectors of A. +* + CALL ZUNMBR( 'P', 'L', 'N', N, NRHS, M, A, LDA, WORK( ITAUP ), + $ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* + END IF +* +* Undo scaling. +* + IF( IASCL.EQ.1 ) THEN + CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO ) + CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO ) + CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL ZLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL ZLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) + END IF +* + 10 CONTINUE + WORK( 1 ) = DCMPLX( MAXWRK, 0 ) + RETURN +* +* End of ZGELSD +* + END diff --git a/costa/native/external/lapack/zgelss.f b/costa/native/external/lapack/zgelss.f new file mode 100644 index 000000000..aa75b5081 --- /dev/null +++ b/costa/native/external/lapack/zgelss.f @@ -0,0 +1,639 @@ + SUBROUTINE ZGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK, + $ WORK, LWORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ), S( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZGELSS computes the minimum norm solution to a complex linear +* least squares problem: +* +* Minimize 2-norm(| b - A*x |). +* +* using the singular value decomposition (SVD) of A. A is an M-by-N +* matrix which may be rank-deficient. +* +* Several right hand side vectors b and solution vectors x can be +* handled in a single call; they are stored as the columns of the +* M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix +* X. +* +* The effective rank of A is determined by treating as zero those +* singular values which are less than RCOND times the largest singular +* value. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, the first min(m,n) rows of A are overwritten with +* its right singular vectors, stored rowwise. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) +* On entry, the M-by-NRHS right hand side matrix B. +* On exit, B is overwritten by the N-by-NRHS solution matrix X. +* If m >= n and RANK = n, the residual sum-of-squares for +* the solution in the i-th column is given by the sum of +* squares of elements n+1:m in that column. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,M,N). +* +* S (output) DOUBLE PRECISION array, dimension (min(M,N)) +* The singular values of A in decreasing order. +* The condition number of A in the 2-norm = S(1)/S(min(m,n)). +* +* RCOND (input) DOUBLE PRECISION +* RCOND is used to determine the effective rank of A. +* Singular values S(i) <= RCOND*S(1) are treated as zero. +* If RCOND < 0, machine precision is used instead. +* +* RANK (output) INTEGER +* The effective rank of A, i.e., the number of singular values +* which are greater than RCOND*S(1). +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= 1, and also: +* LWORK >= 2*min(M,N) + max(M,N,NRHS) +* For good performance, LWORK should generally be larger. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (5*min(M,N)) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: the algorithm for computing the SVD failed to converge; +* if INFO = i, i off-diagonal elements of an intermediate +* bidiagonal form did not converge to zero. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER BL, CHUNK, I, IASCL, IBSCL, IE, IL, IRWORK, + $ ITAU, ITAUP, ITAUQ, IWORK, LDWORK, MAXMN, + $ MAXWRK, MINMN, MINWRK, MM, MNTHR + DOUBLE PRECISION ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM, THR +* .. +* .. Local Arrays .. + COMPLEX*16 VDUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DLABAD, DLASCL, DLASET, XERBLA, ZBDSQR, ZCOPY, + $ ZDRSCL, ZGEBRD, ZGELQF, ZGEMM, ZGEMV, ZGEQRF, + $ ZLACPY, ZLASCL, ZLASET, ZUNGBR, ZUNMBR, ZUNMLQ, + $ ZUNMQR +* .. +* .. External Functions .. + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL ILAENV, DLAMCH, ZLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + MINMN = MIN( M, N ) + MAXMN = MAX( M, N ) + MNTHR = ILAENV( 6, 'ZGELSS', ' ', M, N, NRHS, -1 ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN + INFO = -7 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* CWorkspace refers to complex workspace, and RWorkspace refers +* to real workspace. NB refers to the optimal block size for the +* immediately following subroutine, as returned by ILAENV.) +* + MINWRK = 1 + IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN + MAXWRK = 0 + MM = M + IF( M.GE.N .AND. M.GE.MNTHR ) THEN +* +* Path 1a - overdetermined, with many more rows than columns +* +* Space needed for ZBDSQR is BDSPAC = 5*N +* + MM = N + MAXWRK = MAX( MAXWRK, N+N*ILAENV( 1, 'ZGEQRF', ' ', M, N, + $ -1, -1 ) ) + MAXWRK = MAX( MAXWRK, N+NRHS* + $ ILAENV( 1, 'ZUNMQR', 'LC', M, NRHS, N, -1 ) ) + END IF + IF( M.GE.N ) THEN +* +* Path 1 - overdetermined or exactly determined +* +* Space needed for ZBDSQR is BDSPC = 7*N+12 +* + MAXWRK = MAX( MAXWRK, 2*N+( MM+N )* + $ ILAENV( 1, 'ZGEBRD', ' ', MM, N, -1, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*N+NRHS* + $ ILAENV( 1, 'ZUNMBR', 'QLC', MM, NRHS, N, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*N+( N-1 )* + $ ILAENV( 1, 'ZUNGBR', 'P', N, N, N, -1 ) ) + MAXWRK = MAX( MAXWRK, N*NRHS ) + MINWRK = 2*N + MAX( NRHS, M ) + END IF + IF( N.GT.M ) THEN + MINWRK = 2*M + MAX( NRHS, N ) + IF( N.GE.MNTHR ) THEN +* +* Path 2a - underdetermined, with many more columns +* than rows +* +* Space needed for ZBDSQR is BDSPAC = 5*M +* + MAXWRK = M + M*ILAENV( 1, 'ZGELQF', ' ', M, N, -1, -1 ) + MAXWRK = MAX( MAXWRK, 3*M+M*M+2*M* + $ ILAENV( 1, 'ZGEBRD', ' ', M, M, -1, -1 ) ) + MAXWRK = MAX( MAXWRK, 3*M+M*M+NRHS* + $ ILAENV( 1, 'ZUNMBR', 'QLC', M, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, 3*M+M*M+( M-1 )* + $ ILAENV( 1, 'ZUNGBR', 'P', M, M, M, -1 ) ) + IF( NRHS.GT.1 ) THEN + MAXWRK = MAX( MAXWRK, M*M+M+M*NRHS ) + ELSE + MAXWRK = MAX( MAXWRK, M*M+2*M ) + END IF + MAXWRK = MAX( MAXWRK, M+NRHS* + $ ILAENV( 1, 'ZUNMLQ', 'LC', N, NRHS, M, -1 ) ) + ELSE +* +* Path 2 - underdetermined +* +* Space needed for ZBDSQR is BDSPAC = 5*M +* + MAXWRK = 2*M + ( N+M )*ILAENV( 1, 'ZGEBRD', ' ', M, N, + $ -1, -1 ) + MAXWRK = MAX( MAXWRK, 2*M+NRHS* + $ ILAENV( 1, 'ZUNMBR', 'QLC', M, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*M+M* + $ ILAENV( 1, 'ZUNGBR', 'P', M, N, M, -1 ) ) + MAXWRK = MAX( MAXWRK, N*NRHS ) + END IF + END IF + MINWRK = MAX( MINWRK, 1 ) + MAXWRK = MAX( MINWRK, MAXWRK ) + WORK( 1 ) = MAXWRK + END IF +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) + $ INFO = -12 + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGELSS', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RANK = 0 + RETURN + END IF +* +* Get machine parameters +* + EPS = DLAMCH( 'P' ) + SFMIN = DLAMCH( 'S' ) + SMLNUM = SFMIN / EPS + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = ZLANGE( 'M', M, N, A, LDA, RWORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL ZLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + CALL DLASET( 'F', MINMN, 1, ZERO, ZERO, S, MINMN ) + RANK = 0 + GO TO 70 + END IF +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = ZLANGE( 'M', M, NRHS, B, LDB, RWORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL ZLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL ZLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 2 + END IF +* +* Overdetermined case +* + IF( M.GE.N ) THEN +* +* Path 1 - overdetermined or exactly determined +* + MM = M + IF( M.GE.MNTHR ) THEN +* +* Path 1a - overdetermined, with many more rows than columns +* + MM = N + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: none) +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, INFO ) +* +* Multiply B by transpose(Q) +* (CWorkspace: need N+NRHS, prefer N+NRHS*NB) +* (RWorkspace: none) +* + CALL ZUNMQR( 'L', 'C', M, NRHS, N, A, LDA, WORK( ITAU ), B, + $ LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) +* +* Zero out below R +* + IF( N.GT.1 ) + $ CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, A( 2, 1 ), + $ LDA ) + END IF +* + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in A +* (CWorkspace: need 2*N+MM, prefer 2*N+(MM+N)*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( MM, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors of R +* (CWorkspace: need 2*N+NRHS, prefer 2*N+NRHS*NB) +* (RWorkspace: none) +* + CALL ZUNMBR( 'Q', 'L', 'C', MM, NRHS, N, A, LDA, WORK( ITAUQ ), + $ B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) +* +* Generate right bidiagonalizing vectors of R in A +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: none) +* + CALL ZUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, INFO ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration +* multiply B by transpose of left singular vectors +* compute right singular vectors in A +* (CWorkspace: none) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, N, 0, NRHS, S, RWORK( IE ), A, LDA, VDUM, + $ 1, B, LDB, RWORK( IRWORK ), INFO ) + IF( INFO.NE.0 ) + $ GO TO 70 +* +* Multiply B by reciprocals of singular values +* + THR = MAX( RCOND*S( 1 ), SFMIN ) + IF( RCOND.LT.ZERO ) + $ THR = MAX( EPS*S( 1 ), SFMIN ) + RANK = 0 + DO 10 I = 1, N + IF( S( I ).GT.THR ) THEN + CALL ZDRSCL( NRHS, S( I ), B( I, 1 ), LDB ) + RANK = RANK + 1 + ELSE + CALL ZLASET( 'F', 1, NRHS, CZERO, CZERO, B( I, 1 ), LDB ) + END IF + 10 CONTINUE +* +* Multiply B by right singular vectors +* (CWorkspace: need N, prefer N*NRHS) +* (RWorkspace: none) +* + IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN + CALL ZGEMM( 'C', 'N', N, NRHS, N, CONE, A, LDA, B, LDB, + $ CZERO, WORK, LDB ) + CALL ZLACPY( 'G', N, NRHS, WORK, LDB, B, LDB ) + ELSE IF( NRHS.GT.1 ) THEN + CHUNK = LWORK / N + DO 20 I = 1, NRHS, CHUNK + BL = MIN( NRHS-I+1, CHUNK ) + CALL ZGEMM( 'C', 'N', N, BL, N, CONE, A, LDA, B( 1, I ), + $ LDB, CZERO, WORK, N ) + CALL ZLACPY( 'G', N, BL, WORK, N, B( 1, I ), LDB ) + 20 CONTINUE + ELSE + CALL ZGEMV( 'C', N, N, CONE, A, LDA, B, 1, CZERO, WORK, 1 ) + CALL ZCOPY( N, WORK, 1, B, 1 ) + END IF +* + ELSE IF( N.GE.MNTHR .AND. LWORK.GE.3*M+M*M+MAX( M, NRHS, N-2*M ) ) + $ THEN +* +* Underdetermined case, M much less than N +* +* Path 2a - underdetermined, with many more columns than rows +* and sufficient workspace for an efficient algorithm +* + LDWORK = M + IF( LWORK.GE.3*M+M*LDA+MAX( M, NRHS, N-2*M ) ) + $ LDWORK = LDA + ITAU = 1 + IWORK = M + 1 +* +* Compute A=L*Q +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: none) +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, INFO ) + IL = IWORK +* +* Copy L to WORK(IL), zeroing out above it +* + CALL ZLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK ) + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, WORK( IL+LDWORK ), + $ LDWORK ) + IE = 1 + ITAUQ = IL + LDWORK*M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IL) +* (CWorkspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* (RWorkspace: need M) +* + CALL ZGEBRD( M, M, WORK( IL ), LDWORK, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors of L +* (CWorkspace: need M*M+3*M+NRHS, prefer M*M+3*M+NRHS*NB) +* (RWorkspace: none) +* + CALL ZUNMBR( 'Q', 'L', 'C', M, NRHS, M, WORK( IL ), LDWORK, + $ WORK( ITAUQ ), B, LDB, WORK( IWORK ), + $ LWORK-IWORK+1, INFO ) +* +* Generate right bidiagonalizing vectors of R in WORK(IL) +* (CWorkspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB) +* (RWorkspace: none) +* + CALL ZUNGBR( 'P', M, M, M, WORK( IL ), LDWORK, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, INFO ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right singular +* vectors of L in WORK(IL) and multiplying B by transpose of +* left singular vectors +* (CWorkspace: need M*M) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', M, M, 0, NRHS, S, RWORK( IE ), WORK( IL ), + $ LDWORK, A, LDA, B, LDB, RWORK( IRWORK ), INFO ) + IF( INFO.NE.0 ) + $ GO TO 70 +* +* Multiply B by reciprocals of singular values +* + THR = MAX( RCOND*S( 1 ), SFMIN ) + IF( RCOND.LT.ZERO ) + $ THR = MAX( EPS*S( 1 ), SFMIN ) + RANK = 0 + DO 30 I = 1, M + IF( S( I ).GT.THR ) THEN + CALL ZDRSCL( NRHS, S( I ), B( I, 1 ), LDB ) + RANK = RANK + 1 + ELSE + CALL ZLASET( 'F', 1, NRHS, CZERO, CZERO, B( I, 1 ), LDB ) + END IF + 30 CONTINUE + IWORK = IL + M*LDWORK +* +* Multiply B by right singular vectors of L in WORK(IL) +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NRHS) +* (RWorkspace: none) +* + IF( LWORK.GE.LDB*NRHS+IWORK-1 .AND. NRHS.GT.1 ) THEN + CALL ZGEMM( 'C', 'N', M, NRHS, M, CONE, WORK( IL ), LDWORK, + $ B, LDB, CZERO, WORK( IWORK ), LDB ) + CALL ZLACPY( 'G', M, NRHS, WORK( IWORK ), LDB, B, LDB ) + ELSE IF( NRHS.GT.1 ) THEN + CHUNK = ( LWORK-IWORK+1 ) / M + DO 40 I = 1, NRHS, CHUNK + BL = MIN( NRHS-I+1, CHUNK ) + CALL ZGEMM( 'C', 'N', M, BL, M, CONE, WORK( IL ), LDWORK, + $ B( 1, I ), LDB, CZERO, WORK( IWORK ), N ) + CALL ZLACPY( 'G', M, BL, WORK( IWORK ), N, B( 1, I ), + $ LDB ) + 40 CONTINUE + ELSE + CALL ZGEMV( 'C', M, M, CONE, WORK( IL ), LDWORK, B( 1, 1 ), + $ 1, CZERO, WORK( IWORK ), 1 ) + CALL ZCOPY( M, WORK( IWORK ), 1, B( 1, 1 ), 1 ) + END IF +* +* Zero out below first M rows of B +* + CALL ZLASET( 'F', N-M, NRHS, CZERO, CZERO, B( M+1, 1 ), LDB ) + IWORK = ITAU + M +* +* Multiply transpose(Q) by B +* (CWorkspace: need M+NRHS, prefer M+NHRS*NB) +* (RWorkspace: none) +* + CALL ZUNMLQ( 'L', 'C', N, NRHS, M, A, LDA, WORK( ITAU ), B, + $ LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) +* + ELSE +* +* Path 2 - remaining underdetermined cases +* + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize A +* (CWorkspace: need 3*M, prefer 2*M+(M+N)*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors +* (CWorkspace: need 2*M+NRHS, prefer 2*M+NRHS*NB) +* (RWorkspace: none) +* + CALL ZUNMBR( 'Q', 'L', 'C', M, NRHS, N, A, LDA, WORK( ITAUQ ), + $ B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) +* +* Generate right bidiagonalizing vectors in A +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: none) +* + CALL ZUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, INFO ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, +* computing right singular vectors of A in A and +* multiplying B by transpose of left singular vectors +* (CWorkspace: none) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'L', M, N, 0, NRHS, S, RWORK( IE ), A, LDA, VDUM, + $ 1, B, LDB, RWORK( IRWORK ), INFO ) + IF( INFO.NE.0 ) + $ GO TO 70 +* +* Multiply B by reciprocals of singular values +* + THR = MAX( RCOND*S( 1 ), SFMIN ) + IF( RCOND.LT.ZERO ) + $ THR = MAX( EPS*S( 1 ), SFMIN ) + RANK = 0 + DO 50 I = 1, M + IF( S( I ).GT.THR ) THEN + CALL ZDRSCL( NRHS, S( I ), B( I, 1 ), LDB ) + RANK = RANK + 1 + ELSE + CALL ZLASET( 'F', 1, NRHS, CZERO, CZERO, B( I, 1 ), LDB ) + END IF + 50 CONTINUE +* +* Multiply B by right singular vectors of A +* (CWorkspace: need N, prefer N*NRHS) +* (RWorkspace: none) +* + IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN + CALL ZGEMM( 'C', 'N', N, NRHS, M, CONE, A, LDA, B, LDB, + $ CZERO, WORK, LDB ) + CALL ZLACPY( 'G', N, NRHS, WORK, LDB, B, LDB ) + ELSE IF( NRHS.GT.1 ) THEN + CHUNK = LWORK / N + DO 60 I = 1, NRHS, CHUNK + BL = MIN( NRHS-I+1, CHUNK ) + CALL ZGEMM( 'C', 'N', N, BL, M, CONE, A, LDA, B( 1, I ), + $ LDB, CZERO, WORK, N ) + CALL ZLACPY( 'F', N, BL, WORK, N, B( 1, I ), LDB ) + 60 CONTINUE + ELSE + CALL ZGEMV( 'C', M, N, CONE, A, LDA, B, 1, CZERO, WORK, 1 ) + CALL ZCOPY( N, WORK, 1, B, 1 ) + END IF + END IF +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO ) + CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO ) + CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL ZLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL ZLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) + END IF + 70 CONTINUE + WORK( 1 ) = MAXWRK + RETURN +* +* End of ZGELSS +* + END diff --git a/costa/native/external/lapack/zgelsx.f b/costa/native/external/lapack/zgelsx.f new file mode 100644 index 000000000..dd324d652 --- /dev/null +++ b/costa/native/external/lapack/zgelsx.f @@ -0,0 +1,358 @@ + SUBROUTINE ZGELSX( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK, + $ WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, M, N, NRHS, RANK + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* This routine is deprecated and has been replaced by routine ZGELSY. +* +* ZGELSX computes the minimum-norm solution to a complex linear least +* squares problem: +* minimize || A * X - B || +* using a complete orthogonal factorization of A. A is an M-by-N +* matrix which may be rank-deficient. +* +* Several right hand side vectors b and solution vectors x can be +* handled in a single call; they are stored as the columns of the +* M-by-NRHS right hand side matrix B and the N-by-NRHS solution +* matrix X. +* +* The routine first computes a QR factorization with column pivoting: +* A * P = Q * [ R11 R12 ] +* [ 0 R22 ] +* with R11 defined as the largest leading submatrix whose estimated +* condition number is less than 1/RCOND. The order of R11, RANK, +* is the effective rank of A. +* +* Then, R22 is considered to be negligible, and R12 is annihilated +* by unitary transformations from the right, arriving at the +* complete orthogonal factorization: +* A * P = Q * [ T11 0 ] * Z +* [ 0 0 ] +* The minimum-norm solution is then +* X = P * Z' [ inv(T11)*Q1'*B ] +* [ 0 ] +* where Q1 consists of the first RANK columns of Q. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of +* columns of matrices B and X. NRHS >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, A has been overwritten by details of its +* complete orthogonal factorization. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) +* On entry, the M-by-NRHS right hand side matrix B. +* On exit, the N-by-NRHS solution matrix X. +* If m >= n and RANK = n, the residual sum-of-squares for +* the solution in the i-th column is given by the sum of +* squares of elements N+1:M in that column. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,M,N). +* +* JPVT (input/output) INTEGER array, dimension (N) +* On entry, if JPVT(i) .ne. 0, the i-th column of A is an +* initial column, otherwise it is a free column. Before +* the QR factorization of A, all initial columns are +* permuted to the leading positions; only the remaining +* free columns are moved as a result of column pivoting +* during the factorization. +* On exit, if JPVT(i) = k, then the i-th column of A*P +* was the k-th column of A. +* +* RCOND (input) DOUBLE PRECISION +* RCOND is used to determine the effective rank of A, which +* is defined as the order of the largest leading triangular +* submatrix R11 in the QR factorization with pivoting of A, +* whose estimated condition number < 1/RCOND. +* +* RANK (output) INTEGER +* The effective rank of A, i.e., the order of the submatrix +* R11. This is the same as the order of the submatrix T11 +* in the complete orthogonal factorization of A. +* +* WORK (workspace) COMPLEX*16 array, dimension +* (min(M,N) + max( N, 2*min(M,N)+NRHS )), +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (2*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + INTEGER IMAX, IMIN + PARAMETER ( IMAX = 1, IMIN = 2 ) + DOUBLE PRECISION ZERO, ONE, DONE, NTDONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, DONE = ZERO, + $ NTDONE = ONE ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, IASCL, IBSCL, ISMAX, ISMIN, J, K, MN + DOUBLE PRECISION ANRM, BIGNUM, BNRM, SMAX, SMAXPR, SMIN, SMINPR, + $ SMLNUM + COMPLEX*16 C1, C2, S1, S2, T1, T2 +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEQPF, ZLAIC1, ZLASCL, ZLASET, ZLATZM, + $ ZTRSM, ZTZRQF, ZUNM2R +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL DLAMCH, ZLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DCONJG, MAX, MIN +* .. +* .. Executable Statements .. +* + MN = MIN( M, N ) + ISMIN = MN + 1 + ISMAX = 2*MN + 1 +* +* Test the input arguments. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -7 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGELSX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, NRHS ).EQ.0 ) THEN + RANK = 0 + RETURN + END IF +* +* Get machine parameters +* + SMLNUM = DLAMCH( 'S' ) / DLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max elements outside range [SMLNUM,BIGNUM] +* + ANRM = ZLANGE( 'M', M, N, A, LDA, RWORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL ZLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + RANK = 0 + GO TO 100 + END IF +* + BNRM = ZLANGE( 'M', M, NRHS, B, LDB, RWORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL ZLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL ZLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 2 + END IF +* +* Compute QR factorization with column pivoting of A: +* A * P = Q * R +* + CALL ZGEQPF( M, N, A, LDA, JPVT, WORK( 1 ), WORK( MN+1 ), RWORK, + $ INFO ) +* +* complex workspace MN+N. Real workspace 2*N. Details of Householder +* rotations stored in WORK(1:MN). +* +* Determine RANK using incremental condition estimation +* + WORK( ISMIN ) = CONE + WORK( ISMAX ) = CONE + SMAX = ABS( A( 1, 1 ) ) + SMIN = SMAX + IF( ABS( A( 1, 1 ) ).EQ.ZERO ) THEN + RANK = 0 + CALL ZLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + GO TO 100 + ELSE + RANK = 1 + END IF +* + 10 CONTINUE + IF( RANK.LT.MN ) THEN + I = RANK + 1 + CALL ZLAIC1( IMIN, RANK, WORK( ISMIN ), SMIN, A( 1, I ), + $ A( I, I ), SMINPR, S1, C1 ) + CALL ZLAIC1( IMAX, RANK, WORK( ISMAX ), SMAX, A( 1, I ), + $ A( I, I ), SMAXPR, S2, C2 ) +* + IF( SMAXPR*RCOND.LE.SMINPR ) THEN + DO 20 I = 1, RANK + WORK( ISMIN+I-1 ) = S1*WORK( ISMIN+I-1 ) + WORK( ISMAX+I-1 ) = S2*WORK( ISMAX+I-1 ) + 20 CONTINUE + WORK( ISMIN+RANK ) = C1 + WORK( ISMAX+RANK ) = C2 + SMIN = SMINPR + SMAX = SMAXPR + RANK = RANK + 1 + GO TO 10 + END IF + END IF +* +* Logically partition R = [ R11 R12 ] +* [ 0 R22 ] +* where R11 = R(1:RANK,1:RANK) +* +* [R11,R12] = [ T11, 0 ] * Y +* + IF( RANK.LT.N ) + $ CALL ZTZRQF( RANK, N, A, LDA, WORK( MN+1 ), INFO ) +* +* Details of Householder rotations stored in WORK(MN+1:2*MN) +* +* B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS) +* + CALL ZUNM2R( 'Left', 'Conjugate transpose', M, NRHS, MN, A, LDA, + $ WORK( 1 ), B, LDB, WORK( 2*MN+1 ), INFO ) +* +* workspace NRHS +* +* B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS) +* + CALL ZTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', RANK, + $ NRHS, CONE, A, LDA, B, LDB ) +* + DO 40 I = RANK + 1, N + DO 30 J = 1, NRHS + B( I, J ) = CZERO + 30 CONTINUE + 40 CONTINUE +* +* B(1:N,1:NRHS) := Y' * B(1:N,1:NRHS) +* + IF( RANK.LT.N ) THEN + DO 50 I = 1, RANK + CALL ZLATZM( 'Left', N-RANK+1, NRHS, A( I, RANK+1 ), LDA, + $ DCONJG( WORK( MN+I ) ), B( I, 1 ), + $ B( RANK+1, 1 ), LDB, WORK( 2*MN+1 ) ) + 50 CONTINUE + END IF +* +* workspace NRHS +* +* B(1:N,1:NRHS) := P * B(1:N,1:NRHS) +* + DO 90 J = 1, NRHS + DO 60 I = 1, N + WORK( 2*MN+I ) = NTDONE + 60 CONTINUE + DO 80 I = 1, N + IF( WORK( 2*MN+I ).EQ.NTDONE ) THEN + IF( JPVT( I ).NE.I ) THEN + K = I + T1 = B( K, J ) + T2 = B( JPVT( K ), J ) + 70 CONTINUE + B( JPVT( K ), J ) = T1 + WORK( 2*MN+K ) = DONE + T1 = T2 + K = JPVT( K ) + T2 = B( JPVT( K ), J ) + IF( JPVT( K ).NE.I ) + $ GO TO 70 + B( I, J ) = T1 + WORK( 2*MN+K ) = DONE + END IF + END IF + 80 CONTINUE + 90 CONTINUE +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO ) + CALL ZLASCL( 'U', 0, 0, SMLNUM, ANRM, RANK, RANK, A, LDA, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO ) + CALL ZLASCL( 'U', 0, 0, BIGNUM, ANRM, RANK, RANK, A, LDA, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL ZLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL ZLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) + END IF +* + 100 CONTINUE +* + RETURN +* +* End of ZGELSX +* + END diff --git a/costa/native/external/lapack/zgelsy.f b/costa/native/external/lapack/zgelsy.f new file mode 100644 index 000000000..28c84971d --- /dev/null +++ b/costa/native/external/lapack/zgelsy.f @@ -0,0 +1,386 @@ + SUBROUTINE ZGELSY( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK, + $ WORK, LWORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZGELSY computes the minimum-norm solution to a complex linear least +* squares problem: +* minimize || A * X - B || +* using a complete orthogonal factorization of A. A is an M-by-N +* matrix which may be rank-deficient. +* +* Several right hand side vectors b and solution vectors x can be +* handled in a single call; they are stored as the columns of the +* M-by-NRHS right hand side matrix B and the N-by-NRHS solution +* matrix X. +* +* The routine first computes a QR factorization with column pivoting: +* A * P = Q * [ R11 R12 ] +* [ 0 R22 ] +* with R11 defined as the largest leading submatrix whose estimated +* condition number is less than 1/RCOND. The order of R11, RANK, +* is the effective rank of A. +* +* Then, R22 is considered to be negligible, and R12 is annihilated +* by unitary transformations from the right, arriving at the +* complete orthogonal factorization: +* A * P = Q * [ T11 0 ] * Z +* [ 0 0 ] +* The minimum-norm solution is then +* X = P * Z' [ inv(T11)*Q1'*B ] +* [ 0 ] +* where Q1 consists of the first RANK columns of Q. +* +* This routine is basically identical to the original xGELSX except +* three differences: +* o The permutation of matrix B (the right hand side) is faster and +* more simple. +* o The call to the subroutine xGEQPF has been substituted by the +* the call to the subroutine xGEQP3. This subroutine is a Blas-3 +* version of the QR factorization with column pivoting. +* o Matrix B (the right hand side) is updated with Blas-3. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of +* columns of matrices B and X. NRHS >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, A has been overwritten by details of its +* complete orthogonal factorization. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) +* On entry, the M-by-NRHS right hand side matrix B. +* On exit, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,M,N). +* +* JPVT (input/output) INTEGER array, dimension (N) +* On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted +* to the front of AP, otherwise column i is a free column. +* On exit, if JPVT(i) = k, then the i-th column of A*P +* was the k-th column of A. +* +* RCOND (input) DOUBLE PRECISION +* RCOND is used to determine the effective rank of A, which +* is defined as the order of the largest leading triangular +* submatrix R11 in the QR factorization with pivoting of A, +* whose estimated condition number < 1/RCOND. +* +* RANK (output) INTEGER +* The effective rank of A, i.e., the order of the submatrix +* R11. This is the same as the order of the submatrix T11 +* in the complete orthogonal factorization of A. +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* The unblocked strategy requires that: +* LWORK >= MN + MAX( 2*MN, N+1, MN+NRHS ) +* where MN = min(M,N). +* The block algorithm requires that: +* LWORK >= MN + MAX( 2*MN, NB*(N+1), MN+MN*NB, MN+NB*NRHS ) +* where NB is an upper bound on the blocksize returned +* by ILAENV for the routines ZGEQP3, ZTZRZF, CTZRQF, ZUNMQR, +* and ZUNMRZ. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (2*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* E. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +* G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +* +* ===================================================================== +* +* .. Parameters .. + INTEGER IMAX, IMIN + PARAMETER ( IMAX = 1, IMIN = 2 ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IASCL, IBSCL, ISMAX, ISMIN, J, LWKOPT, MN, + $ NB, NB1, NB2, NB3, NB4 + DOUBLE PRECISION ANRM, BIGNUM, BNRM, SMAX, SMAXPR, SMIN, SMINPR, + $ SMLNUM, WSIZE + COMPLEX*16 C1, C2, S1, S2 +* .. +* .. External Subroutines .. + EXTERNAL DLABAD, XERBLA, ZCOPY, ZGEQP3, ZLAIC1, ZLASCL, + $ ZLASET, ZTRSM, ZTZRZF, ZUNMQR, ZUNMRZ +* .. +* .. External Functions .. + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL ILAENV, DLAMCH, ZLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, MAX, MIN +* .. +* .. Executable Statements .. +* + MN = MIN( M, N ) + ISMIN = MN + 1 + ISMAX = 2*MN + 1 +* +* Test the input arguments. +* + INFO = 0 + NB1 = ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, -1 ) + NB2 = ILAENV( 1, 'ZGERQF', ' ', M, N, -1, -1 ) + NB3 = ILAENV( 1, 'ZUNMQR', ' ', M, N, NRHS, -1 ) + NB4 = ILAENV( 1, 'ZUNMRQ', ' ', M, N, NRHS, -1 ) + NB = MAX( NB1, NB2, NB3, NB4 ) + LWKOPT = MAX( 1, MN+2*N+NB*( N+1 ), 2*MN+NB*NRHS ) + WORK( 1 ) = DCMPLX( LWKOPT ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -7 + ELSE IF( LWORK.LT.( MN+MAX( 2*MN, N+1, MN+NRHS ) ) .AND. .NOT. + $ LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGELSY', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, NRHS ).EQ.0 ) THEN + RANK = 0 + RETURN + END IF +* +* Get machine parameters +* + SMLNUM = DLAMCH( 'S' ) / DLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max entries outside range [SMLNUM,BIGNUM] +* + ANRM = ZLANGE( 'M', M, N, A, LDA, RWORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL ZLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + RANK = 0 + GO TO 70 + END IF +* + BNRM = ZLANGE( 'M', M, NRHS, B, LDB, RWORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL ZLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL ZLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 2 + END IF +* +* Compute QR factorization with column pivoting of A: +* A * P = Q * R +* + CALL ZGEQP3( M, N, A, LDA, JPVT, WORK( 1 ), WORK( MN+1 ), + $ LWORK-MN, RWORK, INFO ) + WSIZE = MN + DBLE( WORK( MN+1 ) ) +* +* complex workspace: MN+NB*(N+1). real workspace 2*N. +* Details of Householder rotations stored in WORK(1:MN). +* +* Determine RANK using incremental condition estimation +* + WORK( ISMIN ) = CONE + WORK( ISMAX ) = CONE + SMAX = ABS( A( 1, 1 ) ) + SMIN = SMAX + IF( ABS( A( 1, 1 ) ).EQ.ZERO ) THEN + RANK = 0 + CALL ZLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + GO TO 70 + ELSE + RANK = 1 + END IF +* + 10 CONTINUE + IF( RANK.LT.MN ) THEN + I = RANK + 1 + CALL ZLAIC1( IMIN, RANK, WORK( ISMIN ), SMIN, A( 1, I ), + $ A( I, I ), SMINPR, S1, C1 ) + CALL ZLAIC1( IMAX, RANK, WORK( ISMAX ), SMAX, A( 1, I ), + $ A( I, I ), SMAXPR, S2, C2 ) +* + IF( SMAXPR*RCOND.LE.SMINPR ) THEN + DO 20 I = 1, RANK + WORK( ISMIN+I-1 ) = S1*WORK( ISMIN+I-1 ) + WORK( ISMAX+I-1 ) = S2*WORK( ISMAX+I-1 ) + 20 CONTINUE + WORK( ISMIN+RANK ) = C1 + WORK( ISMAX+RANK ) = C2 + SMIN = SMINPR + SMAX = SMAXPR + RANK = RANK + 1 + GO TO 10 + END IF + END IF +* +* complex workspace: 3*MN. +* +* Logically partition R = [ R11 R12 ] +* [ 0 R22 ] +* where R11 = R(1:RANK,1:RANK) +* +* [R11,R12] = [ T11, 0 ] * Y +* + IF( RANK.LT.N ) + $ CALL ZTZRZF( RANK, N, A, LDA, WORK( MN+1 ), WORK( 2*MN+1 ), + $ LWORK-2*MN, INFO ) +* +* complex workspace: 2*MN. +* Details of Householder rotations stored in WORK(MN+1:2*MN) +* +* B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS) +* + CALL ZUNMQR( 'Left', 'Conjugate transpose', M, NRHS, MN, A, LDA, + $ WORK( 1 ), B, LDB, WORK( 2*MN+1 ), LWORK-2*MN, INFO ) + WSIZE = MAX( WSIZE, 2*MN+DBLE( WORK( 2*MN+1 ) ) ) +* +* complex workspace: 2*MN+NB*NRHS. +* +* B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS) +* + CALL ZTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', RANK, + $ NRHS, CONE, A, LDA, B, LDB ) +* + DO 40 J = 1, NRHS + DO 30 I = RANK + 1, N + B( I, J ) = CZERO + 30 CONTINUE + 40 CONTINUE +* +* B(1:N,1:NRHS) := Y' * B(1:N,1:NRHS) +* + IF( RANK.LT.N ) THEN + CALL ZUNMRZ( 'Left', 'Conjugate transpose', N, NRHS, RANK, + $ N-RANK, A, LDA, WORK( MN+1 ), B, LDB, + $ WORK( 2*MN+1 ), LWORK-2*MN, INFO ) + END IF +* +* complex workspace: 2*MN+NRHS. +* +* B(1:N,1:NRHS) := P * B(1:N,1:NRHS) +* + DO 60 J = 1, NRHS + DO 50 I = 1, N + WORK( JPVT( I ) ) = B( I, J ) + 50 CONTINUE + CALL ZCOPY( N, WORK( 1 ), 1, B( 1, J ), 1 ) + 60 CONTINUE +* +* complex workspace: N. +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO ) + CALL ZLASCL( 'U', 0, 0, SMLNUM, ANRM, RANK, RANK, A, LDA, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO ) + CALL ZLASCL( 'U', 0, 0, BIGNUM, ANRM, RANK, RANK, A, LDA, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL ZLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL ZLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) + END IF +* + 70 CONTINUE + WORK( 1 ) = DCMPLX( LWKOPT ) +* + RETURN +* +* End of ZGELSY +* + END diff --git a/costa/native/external/lapack/zgeql2.f b/costa/native/external/lapack/zgeql2.f new file mode 100644 index 000000000..e408312ea --- /dev/null +++ b/costa/native/external/lapack/zgeql2.f @@ -0,0 +1,122 @@ + SUBROUTINE ZGEQL2( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZGEQL2 computes a QL factorization of a complex m by n matrix A: +* A = Q * L. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the m by n matrix A. +* On exit, if m >= n, the lower triangle of the subarray +* A(m-n+1:m,1:n) contains the n by n lower triangular matrix L; +* if m <= n, the elements on and below the (n-m)-th +* superdiagonal contain the m by n lower trapezoidal matrix L; +* the remaining elements, with the array TAU, represent the +* unitary matrix Q as a product of elementary reflectors +* (see Further Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) COMPLEX*16 array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace) COMPLEX*16 array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(k) . . . H(2) H(1), where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in +* A(1:m-k+i-1,n-k+i), and tau in TAU(i). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, K + COMPLEX*16 ALPHA +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARF, ZLARFG +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEQL2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = K, 1, -1 +* +* Generate elementary reflector H(i) to annihilate +* A(1:m-k+i-1,n-k+i) +* + ALPHA = A( M-K+I, N-K+I ) + CALL ZLARFG( M-K+I, ALPHA, A( 1, N-K+I ), 1, TAU( I ) ) +* +* Apply H(i)' to A(1:m-k+i,1:n-k+i-1) from the left +* + A( M-K+I, N-K+I ) = ONE + CALL ZLARF( 'Left', M-K+I, N-K+I-1, A( 1, N-K+I ), 1, + $ DCONJG( TAU( I ) ), A, LDA, WORK ) + A( M-K+I, N-K+I ) = ALPHA + 10 CONTINUE + RETURN +* +* End of ZGEQL2 +* + END diff --git a/costa/native/external/lapack/zgeqlf.f b/costa/native/external/lapack/zgeqlf.f new file mode 100644 index 000000000..f57dd8d32 --- /dev/null +++ b/costa/native/external/lapack/zgeqlf.f @@ -0,0 +1,205 @@ + SUBROUTINE ZGEQLF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZGEQLF computes a QL factorization of a complex M-by-N matrix A: +* A = Q * L. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, +* if m >= n, the lower triangle of the subarray +* A(m-n+1:m,1:n) contains the N-by-N lower triangular matrix L; +* if m <= n, the elements on and below the (n-m)-th +* superdiagonal contain the M-by-N lower trapezoidal matrix L; +* the remaining elements, with the array TAU, represent the +* unitary matrix Q as a product of elementary reflectors +* (see Further Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) COMPLEX*16 array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N). +* For optimum performance LWORK >= N*NB, where NB is +* the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(k) . . . H(2) H(1), where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in +* A(1:m-k+i-1,n-k+i), and tau in TAU(i). +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, KI, KK, LDWORK, LWKOPT, + $ MU, NB, NBMIN, NU, NX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEQL2, ZLARFB, ZLARFT +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'ZGEQLF', ' ', M, N, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEQLF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 1 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'ZGEQLF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'ZGEQLF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially. +* The last kk columns are handled by the block method. +* + KI = ( ( K-NX-1 ) / NB )*NB + KK = MIN( K, KI+NB ) +* + DO 10 I = K - KK + KI + 1, K - KK + 1, -NB + IB = MIN( K-I+1, NB ) +* +* Compute the QL factorization of the current block +* A(1:m-k+i+ib-1,n-k+i:n-k+i+ib-1) +* + CALL ZGEQL2( M-K+I+IB-1, IB, A( 1, N-K+I ), LDA, TAU( I ), + $ WORK, IINFO ) + IF( N-K+I.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL ZLARFT( 'Backward', 'Columnwise', M-K+I+IB-1, IB, + $ A( 1, N-K+I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H' to A(1:m-k+i+ib-1,1:n-k+i-1) from the left +* + CALL ZLARFB( 'Left', 'Conjugate transpose', 'Backward', + $ 'Columnwise', M-K+I+IB-1, N-K+I-1, IB, + $ A( 1, N-K+I ), LDA, WORK, LDWORK, A, LDA, + $ WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + MU = M - K + I + NB - 1 + NU = N - K + I + NB - 1 + ELSE + MU = M + NU = N + END IF +* +* Use unblocked code to factor the last or only block +* + IF( MU.GT.0 .AND. NU.GT.0 ) + $ CALL ZGEQL2( MU, NU, A, LDA, TAU, WORK, IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of ZGEQLF +* + END diff --git a/costa/native/external/lapack/zgeqp3.f b/costa/native/external/lapack/zgeqp3.f new file mode 100644 index 000000000..ffb709a5d --- /dev/null +++ b/costa/native/external/lapack/zgeqp3.f @@ -0,0 +1,285 @@ + SUBROUTINE ZGEQP3( M, N, A, LDA, JPVT, TAU, WORK, LWORK, RWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZGEQP3 computes a QR factorization with column pivoting of a +* matrix A: A*P = Q*R using Level 3 BLAS. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, the upper triangle of the array contains the +* min(M,N)-by-N upper trapezoidal matrix R; the elements below +* the diagonal, together with the array TAU, represent the +* unitary matrix Q as a product of min(M,N) elementary +* reflectors. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* JPVT (input/output) INTEGER array, dimension (N) +* On entry, if JPVT(J).ne.0, the J-th column of A is permuted +* to the front of A*P (a leading column); if JPVT(J)=0, +* the J-th column of A is a free column. +* On exit, if JPVT(J)=K, then the J-th column of A*P was the +* the K-th column of A. +* +* TAU (output) COMPLEX*16 array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors. +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO=0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= N+1. +* For optimal performance LWORK >= ( N+1 )*NB, where NB +* is the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (2*N) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(1) H(2) . . . H(k), where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a real/complex scalar, and v is a real/complex vector +* with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in +* A(i+1:m,i), and tau in TAU(i). +* +* Based on contributions by +* G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +* X. Sun, Computer Science Dept., Duke University, USA +* +* ===================================================================== +* +* .. Parameters .. + INTEGER INB, INBMIN, IXOVER + PARAMETER ( INB = 1, INBMIN = 2, IXOVER = 3 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER FJB, IWS, J, JB, LWKOPT, MINMN, MINWS, NA, NB, + $ NBMIN, NFXD, NX, SM, SMINMN, SN, TOPBMN +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEQRF, ZLAQP2, ZLAQPS, ZSWAP, ZUNMQR +* .. +* .. External Functions .. + INTEGER ILAENV + DOUBLE PRECISION DZNRM2 + EXTERNAL ILAENV, DZNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* + IWS = N + 1 + MINMN = MIN( M, N ) +* +* Test input arguments +* ==================== +* + INFO = 0 + NB = ILAENV( INB, 'ZGEQRF', ' ', M, N, -1, -1 ) + LWKOPT = ( N+1 )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( ( LWORK.LT.IWS ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEQP3', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible. +* + IF( MINMN.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Move initial columns up front. +* + NFXD = 1 + DO 10 J = 1, N + IF( JPVT( J ).NE.0 ) THEN + IF( J.NE.NFXD ) THEN + CALL ZSWAP( M, A( 1, J ), 1, A( 1, NFXD ), 1 ) + JPVT( J ) = JPVT( NFXD ) + JPVT( NFXD ) = J + ELSE + JPVT( J ) = J + END IF + NFXD = NFXD + 1 + ELSE + JPVT( J ) = J + END IF + 10 CONTINUE + NFXD = NFXD - 1 +* +* Factorize fixed columns +* ======================= +* +* Compute the QR factorization of fixed columns and update +* remaining columns. +* + IF( NFXD.GT.0 ) THEN + NA = MIN( M, NFXD ) +*CC CALL ZGEQR2( M, NA, A, LDA, TAU, WORK, INFO ) + CALL ZGEQRF( M, NA, A, LDA, TAU, WORK, LWORK, INFO ) + IWS = MAX( IWS, INT( WORK( 1 ) ) ) + IF( NA.LT.N ) THEN +*CC CALL ZUNM2R( 'Left', 'Conjugate Transpose', M, N-NA, +*CC $ NA, A, LDA, TAU, A( 1, NA+1 ), LDA, WORK, +*CC $ INFO ) + CALL ZUNMQR( 'Left', 'Conjugate Transpose', M, N-NA, NA, A, + $ LDA, TAU, A( 1, NA+1 ), LDA, WORK, LWORK, + $ INFO ) + IWS = MAX( IWS, INT( WORK( 1 ) ) ) + END IF + END IF +* +* Factorize free columns +* ====================== +* + IF( NFXD.LT.MINMN ) THEN +* + SM = M - NFXD + SN = N - NFXD + SMINMN = MINMN - NFXD +* +* Determine the block size. +* + NB = ILAENV( INB, 'ZGEQRF', ' ', SM, SN, -1, -1 ) + NBMIN = 2 + NX = 0 +* + IF( ( NB.GT.1 ) .AND. ( NB.LT.SMINMN ) ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( IXOVER, 'ZGEQRF', ' ', SM, SN, -1, + $ -1 ) ) +* +* + IF( NX.LT.SMINMN ) THEN +* +* Determine if workspace is large enough for blocked code. +* + MINWS = ( SN+1 )*NB + IWS = MAX( IWS, MINWS ) + IF( LWORK.LT.MINWS ) THEN +* +* Not enough workspace to use optimal NB: Reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / ( SN+1 ) + NBMIN = MAX( 2, ILAENV( INBMIN, 'ZGEQRF', ' ', SM, SN, + $ -1, -1 ) ) +* +* + END IF + END IF + END IF +* +* Initialize partial column norms. The first N elements of work +* store the exact column norms. +* + DO 20 J = NFXD + 1, N + RWORK( J ) = DZNRM2( SM, A( NFXD+1, J ), 1 ) + RWORK( N+J ) = RWORK( J ) + 20 CONTINUE +* + IF( ( NB.GE.NBMIN ) .AND. ( NB.LT.SMINMN ) .AND. + $ ( NX.LT.SMINMN ) ) THEN +* +* Use blocked code initially. +* + J = NFXD + 1 +* +* Compute factorization: while loop. +* +* + TOPBMN = MINMN - NX + 30 CONTINUE + IF( J.LE.TOPBMN ) THEN + JB = MIN( NB, TOPBMN-J+1 ) +* +* Factorize JB columns among columns J:N. +* + CALL ZLAQPS( M, N-J+1, J-1, JB, FJB, A( 1, J ), LDA, + $ JPVT( J ), TAU( J ), RWORK( J ), + $ RWORK( N+J ), WORK( 1 ), WORK( JB+1 ), + $ N-J+1 ) +* + J = J + FJB + GO TO 30 + END IF + ELSE + J = NFXD + 1 + END IF +* +* Use unblocked code to factor the last or only block. +* +* + IF( J.LE.MINMN ) + $ CALL ZLAQP2( M, N-J+1, J-1, A( 1, J ), LDA, JPVT( J ), + $ TAU( J ), RWORK( J ), RWORK( N+J ), WORK( 1 ) ) +* + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of ZGEQP3 +* + END diff --git a/costa/native/external/lapack/zgeqpf.f b/costa/native/external/lapack/zgeqpf.f new file mode 100644 index 000000000..fa1ed602a --- /dev/null +++ b/costa/native/external/lapack/zgeqpf.f @@ -0,0 +1,225 @@ + SUBROUTINE ZGEQPF( M, N, A, LDA, JPVT, TAU, WORK, RWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* This routine is deprecated and has been replaced by routine ZGEQP3. +* +* ZGEQPF computes a QR factorization with column pivoting of a +* complex M-by-N matrix A: A*P = Q*R. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0 +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, the upper triangle of the array contains the +* min(M,N)-by-N upper triangular matrix R; the elements +* below the diagonal, together with the array TAU, +* represent the unitary matrix Q as a product of +* min(m,n) elementary reflectors. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* JPVT (input/output) INTEGER array, dimension (N) +* On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted +* to the front of A*P (a leading column); if JPVT(i) = 0, +* the i-th column of A is a free column. +* On exit, if JPVT(i) = k, then the i-th column of A*P +* was the k-th column of A. +* +* TAU (output) COMPLEX*16 array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors. +* +* WORK (workspace) COMPLEX*16 array, dimension (N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (2*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(1) H(2) . . . H(n) +* +* Each H(i) has the form +* +* H = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i). +* +* The matrix P is represented in jpvt as follows: If +* jpvt(j) = i +* then the jth column of P is the ith canonical unit vector. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, ITEMP, J, MA, MN, PVT + DOUBLE PRECISION TEMP, TEMP2 + COMPLEX*16 AII +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEQR2, ZLARF, ZLARFG, ZSWAP, ZUNM2R +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DCMPLX, DCONJG, MAX, MIN, SQRT +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DZNRM2 + EXTERNAL IDAMAX, DZNRM2 +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEQPF', -INFO ) + RETURN + END IF +* + MN = MIN( M, N ) +* +* Move initial columns up front +* + ITEMP = 1 + DO 10 I = 1, N + IF( JPVT( I ).NE.0 ) THEN + IF( I.NE.ITEMP ) THEN + CALL ZSWAP( M, A( 1, I ), 1, A( 1, ITEMP ), 1 ) + JPVT( I ) = JPVT( ITEMP ) + JPVT( ITEMP ) = I + ELSE + JPVT( I ) = I + END IF + ITEMP = ITEMP + 1 + ELSE + JPVT( I ) = I + END IF + 10 CONTINUE + ITEMP = ITEMP - 1 +* +* Compute the QR factorization and update remaining columns +* + IF( ITEMP.GT.0 ) THEN + MA = MIN( ITEMP, M ) + CALL ZGEQR2( M, MA, A, LDA, TAU, WORK, INFO ) + IF( MA.LT.N ) THEN + CALL ZUNM2R( 'Left', 'Conjugate transpose', M, N-MA, MA, A, + $ LDA, TAU, A( 1, MA+1 ), LDA, WORK, INFO ) + END IF + END IF +* + IF( ITEMP.LT.MN ) THEN +* +* Initialize partial column norms. The first n elements of +* work store the exact column norms. +* + DO 20 I = ITEMP + 1, N + RWORK( I ) = DZNRM2( M-ITEMP, A( ITEMP+1, I ), 1 ) + RWORK( N+I ) = RWORK( I ) + 20 CONTINUE +* +* Compute factorization +* + DO 40 I = ITEMP + 1, MN +* +* Determine ith pivot column and swap if necessary +* + PVT = ( I-1 ) + IDAMAX( N-I+1, RWORK( I ), 1 ) +* + IF( PVT.NE.I ) THEN + CALL ZSWAP( M, A( 1, PVT ), 1, A( 1, I ), 1 ) + ITEMP = JPVT( PVT ) + JPVT( PVT ) = JPVT( I ) + JPVT( I ) = ITEMP + RWORK( PVT ) = RWORK( I ) + RWORK( N+PVT ) = RWORK( N+I ) + END IF +* +* Generate elementary reflector H(i) +* + AII = A( I, I ) + CALL ZLARFG( M-I+1, AII, A( MIN( I+1, M ), I ), 1, + $ TAU( I ) ) + A( I, I ) = AII +* + IF( I.LT.N ) THEN +* +* Apply H(i) to A(i:m,i+1:n) from the left +* + AII = A( I, I ) + A( I, I ) = DCMPLX( ONE ) + CALL ZLARF( 'Left', M-I+1, N-I, A( I, I ), 1, + $ DCONJG( TAU( I ) ), A( I, I+1 ), LDA, WORK ) + A( I, I ) = AII + END IF +* +* Update partial column norms +* + DO 30 J = I + 1, N + IF( RWORK( J ).NE.ZERO ) THEN + TEMP = ONE - ( ABS( A( I, J ) ) / RWORK( J ) )**2 + TEMP = MAX( TEMP, ZERO ) + TEMP2 = ONE + 0.05D0*TEMP* + $ ( RWORK( J ) / RWORK( N+J ) )**2 + IF( TEMP2.EQ.ONE ) THEN + IF( M-I.GT.0 ) THEN + RWORK( J ) = DZNRM2( M-I, A( I+1, J ), 1 ) + RWORK( N+J ) = RWORK( J ) + ELSE + RWORK( J ) = ZERO + RWORK( N+J ) = ZERO + END IF + ELSE + RWORK( J ) = RWORK( J )*SQRT( TEMP ) + END IF + END IF + 30 CONTINUE +* + 40 CONTINUE + END IF + RETURN +* +* End of ZGEQPF +* + END diff --git a/costa/native/external/lapack/zgeqr2.f b/costa/native/external/lapack/zgeqr2.f new file mode 100644 index 000000000..a4f226eca --- /dev/null +++ b/costa/native/external/lapack/zgeqr2.f @@ -0,0 +1,122 @@ + SUBROUTINE ZGEQR2( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZGEQR2 computes a QR factorization of a complex m by n matrix A: +* A = Q * R. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the m by n matrix A. +* On exit, the elements on and above the diagonal of the array +* contain the min(m,n) by n upper trapezoidal matrix R (R is +* upper triangular if m >= n); the elements below the diagonal, +* with the array TAU, represent the unitary matrix Q as a +* product of elementary reflectors (see Further Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) COMPLEX*16 array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace) COMPLEX*16 array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(1) H(2) . . . H(k), where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), +* and tau in TAU(i). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, K + COMPLEX*16 ALPHA +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARF, ZLARFG +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEQR2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = 1, K +* +* Generate elementary reflector H(i) to annihilate A(i+1:m,i) +* + CALL ZLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1, + $ TAU( I ) ) + IF( I.LT.N ) THEN +* +* Apply H(i)' to A(i:m,i+1:n) from the left +* + ALPHA = A( I, I ) + A( I, I ) = ONE + CALL ZLARF( 'Left', M-I+1, N-I, A( I, I ), 1, + $ DCONJG( TAU( I ) ), A( I, I+1 ), LDA, WORK ) + A( I, I ) = ALPHA + END IF + 10 CONTINUE + RETURN +* +* End of ZGEQR2 +* + END diff --git a/costa/native/external/lapack/zgeqrf.f b/costa/native/external/lapack/zgeqrf.f new file mode 100644 index 000000000..d621c1e0d --- /dev/null +++ b/costa/native/external/lapack/zgeqrf.f @@ -0,0 +1,197 @@ + SUBROUTINE ZGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZGEQRF computes a QR factorization of a complex M-by-N matrix A: +* A = Q * R. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, the elements on and above the diagonal of the array +* contain the min(M,N)-by-N upper trapezoidal matrix R (R is +* upper triangular if m >= n); the elements below the diagonal, +* with the array TAU, represent the unitary matrix Q as a +* product of min(m,n) elementary reflectors (see Further +* Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) COMPLEX*16 array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N). +* For optimum performance LWORK >= N*NB, where NB is +* the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(1) H(2) . . . H(k), where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), +* and tau in TAU(i). +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEQR2, ZLARFB, ZLARFT +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEQRF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'ZGEQRF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'ZGEQRF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially +* + DO 10 I = 1, K - NX, NB + IB = MIN( K-I+1, NB ) +* +* Compute the QR factorization of the current block +* A(i:m,i:i+ib-1) +* + CALL ZGEQR2( M-I+1, IB, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) + IF( I+IB.LE.N ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL ZLARFT( 'Forward', 'Columnwise', M-I+1, IB, + $ A( I, I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H' to A(i:m,i+ib:n) from the left +* + CALL ZLARFB( 'Left', 'Conjugate transpose', 'Forward', + $ 'Columnwise', M-I+1, N-I-IB+1, IB, + $ A( I, I ), LDA, WORK, LDWORK, A( I, I+IB ), + $ LDA, WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + ELSE + I = 1 + END IF +* +* Use unblocked code to factor the last or only block. +* + IF( I.LE.K ) + $ CALL ZGEQR2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of ZGEQRF +* + END diff --git a/costa/native/external/lapack/zgerfs.f b/costa/native/external/lapack/zgerfs.f new file mode 100644 index 000000000..aa2f0418d --- /dev/null +++ b/costa/native/external/lapack/zgerfs.f @@ -0,0 +1,341 @@ + SUBROUTINE ZGERFS( TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, + $ X, LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) + COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* ZGERFS improves the computed solution to a system of linear +* equations and provides error bounds and backward error estimates for +* the solution. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose) +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input) COMPLEX*16 array, dimension (LDA,N) +* The original N-by-N matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* AF (input) COMPLEX*16 array, dimension (LDAF,N) +* The factors L and U from the factorization A = P*L*U +* as computed by ZGETRF. +* +* LDAF (input) INTEGER +* The leading dimension of the array AF. LDAF >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices from ZGETRF; for 1<=i<=N, row i of the +* matrix was interchanged with row IPIV(i). +* +* B (input) COMPLEX*16 array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input/output) COMPLEX*16 array, dimension (LDX,NRHS) +* On entry, the solution matrix X, as computed by ZGETRS. +* On exit, the improved solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Internal Parameters +* =================== +* +* ITMAX is the maximum number of steps of iterative refinement. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) + DOUBLE PRECISION THREE + PARAMETER ( THREE = 3.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN + CHARACTER TRANSN, TRANST + INTEGER COUNT, I, J, K, KASE, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX*16 ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZCOPY, ZGEMV, ZGETRS, ZLACON +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGERFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANSN = 'N' + TRANST = 'C' + ELSE + TRANSN = 'C' + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - op(A) * X, +* where op(A) = A, A**T, or A**H, depending on TRANS. +* + CALL ZCOPY( N, B( 1, J ), 1, WORK, 1 ) + CALL ZGEMV( TRANS, N, N, -ONE, A, LDA, X( 1, J ), 1, ONE, WORK, + $ 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(op(A))*abs(X) + abs(B). +* + IF( NOTRAN ) THEN + DO 50 K = 1, N + XK = CABS1( X( K, J ) ) + DO 40 I = 1, N + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + 40 CONTINUE + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + DO 60 I = 1, N + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 60 CONTINUE + RWORK( K ) = RWORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL ZGETRS( TRANS, N, 1, AF, LDAF, IPIV, WORK, N, INFO ) + CALL ZAXPY( N, ONE, WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use ZLACON to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL ZLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**H). +* + CALL ZGETRS( TRANST, N, 1, AF, LDAF, IPIV, WORK, N, + $ INFO ) + DO 110 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 110 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 120 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 120 CONTINUE + CALL ZGETRS( TRANSN, N, 1, AF, LDAF, IPIV, WORK, N, + $ INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of ZGERFS +* + END diff --git a/costa/native/external/lapack/zgerq2.f b/costa/native/external/lapack/zgerq2.f new file mode 100644 index 000000000..6e709f69f --- /dev/null +++ b/costa/native/external/lapack/zgerq2.f @@ -0,0 +1,124 @@ + SUBROUTINE ZGERQ2( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZGERQ2 computes an RQ factorization of a complex m by n matrix A: +* A = R * Q. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the m by n matrix A. +* On exit, if m <= n, the upper triangle of the subarray +* A(1:m,n-m+1:n) contains the m by m upper triangular matrix R; +* if m >= n, the elements on and above the (m-n)-th subdiagonal +* contain the m by n upper trapezoidal matrix R; the remaining +* elements, with the array TAU, represent the unitary matrix +* Q as a product of elementary reflectors (see Further +* Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) COMPLEX*16 array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace) COMPLEX*16 array, dimension (M) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(1)' H(2)' . . . H(k)', where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(n-k+i+1:n) = 0 and v(n-k+i) = 1; conjg(v(1:n-k+i-1)) is stored on +* exit in A(m-k+i,1:n-k+i-1), and tau in TAU(i). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, K + COMPLEX*16 ALPHA +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLACGV, ZLARF, ZLARFG +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGERQ2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = K, 1, -1 +* +* Generate elementary reflector H(i) to annihilate +* A(m-k+i,1:n-k+i-1) +* + CALL ZLACGV( N-K+I, A( M-K+I, 1 ), LDA ) + ALPHA = A( M-K+I, N-K+I ) + CALL ZLARFG( N-K+I, ALPHA, A( M-K+I, 1 ), LDA, TAU( I ) ) +* +* Apply H(i) to A(1:m-k+i-1,1:n-k+i) from the right +* + A( M-K+I, N-K+I ) = ONE + CALL ZLARF( 'Right', M-K+I-1, N-K+I, A( M-K+I, 1 ), LDA, + $ TAU( I ), A, LDA, WORK ) + A( M-K+I, N-K+I ) = ALPHA + CALL ZLACGV( N-K+I-1, A( M-K+I, 1 ), LDA ) + 10 CONTINUE + RETURN +* +* End of ZGERQ2 +* + END diff --git a/costa/native/external/lapack/zgerqf.f b/costa/native/external/lapack/zgerqf.f new file mode 100644 index 000000000..e213e8cb3 --- /dev/null +++ b/costa/native/external/lapack/zgerqf.f @@ -0,0 +1,205 @@ + SUBROUTINE ZGERQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZGERQF computes an RQ factorization of a complex M-by-N matrix A: +* A = R * Q. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, +* if m <= n, the upper triangle of the subarray +* A(1:m,n-m+1:n) contains the M-by-M upper triangular matrix R; +* if m >= n, the elements on and above the (m-n)-th subdiagonal +* contain the M-by-N upper trapezoidal matrix R; +* the remaining elements, with the array TAU, represent the +* unitary matrix Q as a product of min(m,n) elementary +* reflectors (see Further Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) COMPLEX*16 array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,M). +* For optimum performance LWORK >= M*NB, where NB is +* the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(1)' H(2)' . . . H(k)', where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(n-k+i+1:n) = 0 and v(n-k+i) = 1; conjg(v(1:n-k+i-1)) is stored on +* exit in A(m-k+i,1:n-k+i-1), and tau in TAU(i). +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, KI, KK, LDWORK, LWKOPT, + $ MU, NB, NBMIN, NU, NX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGERQ2, ZLARFB, ZLARFT +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'ZGERQF', ' ', M, N, -1, -1 ) + LWKOPT = M*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGERQF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 1 + IWS = M + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'ZGERQF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'ZGERQF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially. +* The last kk rows are handled by the block method. +* + KI = ( ( K-NX-1 ) / NB )*NB + KK = MIN( K, KI+NB ) +* + DO 10 I = K - KK + KI + 1, K - KK + 1, -NB + IB = MIN( K-I+1, NB ) +* +* Compute the RQ factorization of the current block +* A(m-k+i:m-k+i+ib-1,1:n-k+i+ib-1) +* + CALL ZGERQ2( IB, N-K+I+IB-1, A( M-K+I, 1 ), LDA, TAU( I ), + $ WORK, IINFO ) + IF( M-K+I.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL ZLARFT( 'Backward', 'Rowwise', N-K+I+IB-1, IB, + $ A( M-K+I, 1 ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(1:m-k+i-1,1:n-k+i+ib-1) from the right +* + CALL ZLARFB( 'Right', 'No transpose', 'Backward', + $ 'Rowwise', M-K+I-1, N-K+I+IB-1, IB, + $ A( M-K+I, 1 ), LDA, WORK, LDWORK, A, LDA, + $ WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + MU = M - K + I + NB - 1 + NU = N - K + I + NB - 1 + ELSE + MU = M + NU = N + END IF +* +* Use unblocked code to factor the last or only block +* + IF( MU.GT.0 .AND. NU.GT.0 ) + $ CALL ZGERQ2( MU, NU, A, LDA, TAU, WORK, IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of ZGERQF +* + END diff --git a/costa/native/external/lapack/zgesc2.f b/costa/native/external/lapack/zgesc2.f new file mode 100644 index 000000000..ce4375287 --- /dev/null +++ b/costa/native/external/lapack/zgesc2.f @@ -0,0 +1,134 @@ + SUBROUTINE ZGESC2( N, A, LDA, RHS, IPIV, JPIV, SCALE ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER LDA, N + DOUBLE PRECISION SCALE +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), JPIV( * ) + COMPLEX*16 A( LDA, * ), RHS( * ) +* .. +* +* Purpose +* ======= +* +* ZGESC2 solves a system of linear equations +* +* A * X = scale* RHS +* +* with a general N-by-N matrix A using the LU factorization with +* complete pivoting computed by ZGETC2. +* +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of columns of the matrix A. +* +* A (input) COMPLEX*16 array, dimension (LDA, N) +* On entry, the LU part of the factorization of the n-by-n +* matrix A computed by ZGETC2: A = P * L * U * Q +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1, N). +* +* RHS (input/output) COMPLEX*16 array, dimension N. +* On entry, the right hand side vector b. +* On exit, the solution vector X. +* +* IPIV (iput) INTEGER array, dimension (N). +* The pivot indices; for 1 <= i <= N, row i of the +* matrix has been interchanged with row IPIV(i). +* +* JPIV (iput) INTEGER array, dimension (N). +* The pivot indices; for 1 <= j <= N, column j of the +* matrix has been interchanged with column JPIV(j). +* +* SCALE (output) DOUBLE PRECISION +* On exit, SCALE contains the scale factor. SCALE is chosen +* 0 <= SCALE <= 1 to prevent owerflow in the solution. +* +* Further Details +* =============== +* +* Based on contributions by +* Bo Kagstrom and Peter Poromaa, Department of Computing Science, +* Umea University, S-901 87 Umea, Sweden. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION BIGNUM, EPS, SMLNUM + COMPLEX*16 TEMP +* .. +* .. External Subroutines .. + EXTERNAL ZLASWP, ZSCAL +* .. +* .. External Functions .. + INTEGER IZAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL IZAMAX, DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX +* .. +* .. Executable Statements .. +* +* Set constant to control overflow +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) / EPS + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Apply permutations IPIV to RHS +* + CALL ZLASWP( 1, RHS, LDA, 1, N-1, IPIV, 1 ) +* +* Solve for L part +* + DO 20 I = 1, N - 1 + DO 10 J = I + 1, N + RHS( J ) = RHS( J ) - A( J, I )*RHS( I ) + 10 CONTINUE + 20 CONTINUE +* +* Solve for U part +* + SCALE = ONE +* +* Check for scaling +* + I = IZAMAX( N, RHS, 1 ) + IF( TWO*SMLNUM*ABS( RHS( I ) ).GT.ABS( A( N, N ) ) ) THEN + TEMP = DCMPLX( ONE / TWO, ZERO ) / ABS( RHS( I ) ) + CALL ZSCAL( N, TEMP, RHS( 1 ), 1 ) + SCALE = SCALE*DBLE( TEMP ) + END IF + DO 40 I = N, 1, -1 + TEMP = DCMPLX( ONE, ZERO ) / A( I, I ) + RHS( I ) = RHS( I )*TEMP + DO 30 J = I + 1, N + RHS( I ) = RHS( I ) - RHS( J )*( A( I, J )*TEMP ) + 30 CONTINUE + 40 CONTINUE +* +* Apply permutations JPIV to the solution (RHS) +* + CALL ZLASWP( 1, RHS, LDA, 1, N-1, JPIV, -1 ) + RETURN +* +* End of ZGESC2 +* + END diff --git a/costa/native/external/lapack/zgesdd.f b/costa/native/external/lapack/zgesdd.f new file mode 100644 index 000000000..8954ce496 --- /dev/null +++ b/costa/native/external/lapack/zgesdd.f @@ -0,0 +1,1950 @@ + SUBROUTINE ZGESDD( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, + $ LWORK, RWORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ + INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION RWORK( * ), S( * ) + COMPLEX*16 A( LDA, * ), U( LDU, * ), VT( LDVT, * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZGESDD computes the singular value decomposition (SVD) of a complex +* M-by-N matrix A, optionally computing the left and/or right singular +* vectors, by using divide-and-conquer method. The SVD is written +* +* A = U * SIGMA * conjugate-transpose(V) +* +* where SIGMA is an M-by-N matrix which is zero except for its +* min(m,n) diagonal elements, U is an M-by-M unitary matrix, and +* V is an N-by-N unitary matrix. The diagonal elements of SIGMA +* are the singular values of A; they are real and non-negative, and +* are returned in descending order. The first min(m,n) columns of +* U and V are the left and right singular vectors of A. +* +* Note that the routine returns VT = V**H, not V. +* +* The divide and conquer algorithm makes very mild assumptions about +* floating point arithmetic. It will work on machines with a guard +* digit in add/subtract, or on those binary machines without guard +* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +* Cray-2. It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* Specifies options for computing all or part of the matrix U: +* = 'A': all M columns of U and all N rows of V**H are +* returned in the arrays U and VT; +* = 'S': the first min(M,N) columns of U and the first +* min(M,N) rows of V**H are returned in the arrays U +* and VT; +* = 'O': If M >= N, the first N columns of U are overwritten +* on the array A and all rows of V**H are returned in +* the array VT; +* otherwise, all columns of U are returned in the +* array U and the first M rows of V**H are overwritten +* in the array VT; +* = 'N': no columns of U or rows of V**H are computed. +* +* M (input) INTEGER +* The number of rows of the input matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the input matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, +* if JOBZ = 'O', A is overwritten with the first N columns +* of U (the left singular vectors, stored +* columnwise) if M >= N; +* A is overwritten with the first M rows +* of V**H (the right singular vectors, stored +* rowwise) otherwise. +* if JOBZ .ne. 'O', the contents of A are destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* S (output) DOUBLE PRECISION array, dimension (min(M,N)) +* The singular values of A, sorted so that S(i) >= S(i+1). +* +* U (output) COMPLEX*16 array, dimension (LDU,UCOL) +* UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N; +* UCOL = min(M,N) if JOBZ = 'S'. +* If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M +* unitary matrix U; +* if JOBZ = 'S', U contains the first min(M,N) columns of U +* (the left singular vectors, stored columnwise); +* if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced. +* +* LDU (input) INTEGER +* The leading dimension of the array U. LDU >= 1; if +* JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M. +* +* VT (output) COMPLEX*16 array, dimension (LDVT,N) +* If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the +* N-by-N unitary matrix V**H; +* if JOBZ = 'S', VT contains the first min(M,N) rows of +* V**H (the right singular vectors, stored rowwise); +* if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced. +* +* LDVT (input) INTEGER +* The leading dimension of the array VT. LDVT >= 1; if +* JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N; +* if JOBZ = 'S', LDVT >= min(M,N). +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= 1. +* if JOBZ = 'N', LWORK >= 2*min(M,N)+max(M,N). +* if JOBZ = 'O', +* LWORK >= 2*min(M,N)*min(M,N)+2*min(M,N)+max(M,N). +* if JOBZ = 'S' or 'A', +* LWORK >= min(M,N)*min(M,N)+2*min(M,N)+max(M,N). +* For good performance, LWORK should generally be larger. +* If LWORK < 0 but other input arguments are legal, WORK(1) +* returns the optimal LWORK. +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (LRWORK) +* If JOBZ = 'N', LRWORK >= 7*min(M,N). +* Otherwise, LRWORK >= 5*min(M,N)*min(M,N) + 5*min(M,N) +* +* IWORK (workspace) INTEGER array, dimension (8*min(M,N)) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: The updating process of DBDSDC did not converge. +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ), + $ CONE = ( 1.0D0, 0.0D0 ) ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WNTQA, WNTQAS, WNTQN, WNTQO, WNTQS + INTEGER BLK, CHUNK, I, IE, IERR, IL, IR, IRU, IRVT, + $ ISCL, ITAU, ITAUP, ITAUQ, IU, IVT, LDWKVT, + $ LDWRKL, LDWRKR, LDWRKU, MAXWRK, MINMN, MINWRK, + $ MNTHR1, MNTHR2, NRWORK, NWORK, WRKBL + DOUBLE PRECISION ANRM, BIGNUM, EPS, SMLNUM +* .. +* .. Local Arrays .. + INTEGER IDUM( 1 ) + DOUBLE PRECISION DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DBDSDC, DLASCL, XERBLA, ZGEBRD, ZGELQF, ZGEMM, + $ ZGEQRF, ZLACP2, ZLACPY, ZLACRM, ZLARCM, ZLASCL, + $ ZLASET, ZUNGBR, ZUNGLQ, ZUNGQR, ZUNMBR +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL DLAMCH, ILAENV, LSAME, ZLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + MINMN = MIN( M, N ) + MNTHR1 = INT( MINMN*17.0D0 / 9.0D0 ) + MNTHR2 = INT( MINMN*5.0D0 / 3.0D0 ) + WNTQA = LSAME( JOBZ, 'A' ) + WNTQS = LSAME( JOBZ, 'S' ) + WNTQAS = WNTQA .OR. WNTQS + WNTQO = LSAME( JOBZ, 'O' ) + WNTQN = LSAME( JOBZ, 'N' ) + MINWRK = 1 + MAXWRK = 1 + LQUERY = ( LWORK.EQ.-1 ) +* + IF( .NOT.( WNTQA .OR. WNTQS .OR. WNTQO .OR. WNTQN ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDU.LT.1 .OR. ( WNTQAS .AND. LDU.LT.M ) .OR. + $ ( WNTQO .AND. M.LT.N .AND. LDU.LT.M ) ) THEN + INFO = -8 + ELSE IF( LDVT.LT.1 .OR. ( WNTQA .AND. LDVT.LT.N ) .OR. + $ ( WNTQS .AND. LDVT.LT.MINMN ) .OR. + $ ( WNTQO .AND. M.GE.N .AND. LDVT.LT.N ) ) THEN + INFO = -10 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* CWorkspace refers to complex workspace, and RWorkspace to +* real workspace. NB refers to the optimal block size for the +* immediately following subroutine, as returned by ILAENV.) +* + IF( INFO.EQ.0 .AND. M.GT.0 .AND. N.GT.0 ) THEN + IF( M.GE.N ) THEN +* +* There is no complex work space needed for bidiagonal SVD +* The real work space needed for bidiagonal SVD is BDSPAC, +* BDSPAC = 3*N*N + 4*N +* + IF( M.GE.MNTHR1 ) THEN + IF( WNTQN ) THEN +* +* Path 1 (M much larger than N, JOBZ='N') +* + WRKBL = N + N*ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, + $ -1 ) + WRKBL = MAX( WRKBL, 2*N+2*N* + $ ILAENV( 1, 'ZGEBRD', ' ', N, N, -1, -1 ) ) + MAXWRK = WRKBL + MINWRK = 3*N + ELSE IF( WNTQO ) THEN +* +* Path 2 (M much larger than N, JOBZ='O') +* + WRKBL = N + N*ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'ZUNGQR', ' ', M, + $ N, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+2*N* + $ ILAENV( 1, 'ZGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+N* + $ ILAENV( 1, 'ZUNMBR', 'QLN', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+N* + $ ILAENV( 1, 'ZUNMBR', 'PRC', N, N, N, -1 ) ) + MAXWRK = M*N + N*N + WRKBL + MINWRK = 2*N*N + 3*N + ELSE IF( WNTQS ) THEN +* +* Path 3 (M much larger than N, JOBZ='S') +* + WRKBL = N + N*ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'ZUNGQR', ' ', M, + $ N, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+2*N* + $ ILAENV( 1, 'ZGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+N* + $ ILAENV( 1, 'ZUNMBR', 'QLN', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+N* + $ ILAENV( 1, 'ZUNMBR', 'PRC', N, N, N, -1 ) ) + MAXWRK = N*N + WRKBL + MINWRK = N*N + 3*N + ELSE IF( WNTQA ) THEN +* +* Path 4 (M much larger than N, JOBZ='A') +* + WRKBL = N + N*ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'ZUNGQR', ' ', M, + $ M, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+2*N* + $ ILAENV( 1, 'ZGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+N* + $ ILAENV( 1, 'ZUNMBR', 'QLN', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+N* + $ ILAENV( 1, 'ZUNMBR', 'PRC', N, N, N, -1 ) ) + MAXWRK = N*N + WRKBL + MINWRK = N*N + 2*N + M + END IF + ELSE IF( M.GE.MNTHR2 ) THEN +* +* Path 5 (M much larger than N, but not as much as MNTHR1) +* + MAXWRK = 2*N + ( M+N )*ILAENV( 1, 'ZGEBRD', ' ', M, N, + $ -1, -1 ) + MINWRK = 2*N + M + IF( WNTQO ) THEN + MAXWRK = MAX( MAXWRK, 2*N+N* + $ ILAENV( 1, 'ZUNGBR', 'P', N, N, N, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*N+N* + $ ILAENV( 1, 'ZUNGBR', 'Q', M, N, N, -1 ) ) + MAXWRK = MAXWRK + M*N + MINWRK = MINWRK + N*N + ELSE IF( WNTQS ) THEN + MAXWRK = MAX( MAXWRK, 2*N+N* + $ ILAENV( 1, 'ZUNGBR', 'P', N, N, N, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*N+N* + $ ILAENV( 1, 'ZUNGBR', 'Q', M, N, N, -1 ) ) + ELSE IF( WNTQA ) THEN + MAXWRK = MAX( MAXWRK, 2*N+N* + $ ILAENV( 1, 'ZUNGBR', 'P', N, N, N, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*N+M* + $ ILAENV( 1, 'ZUNGBR', 'Q', M, M, N, -1 ) ) + END IF + ELSE +* +* Path 6 (M at least N, but not much larger) +* + MAXWRK = 2*N + ( M+N )*ILAENV( 1, 'ZGEBRD', ' ', M, N, + $ -1, -1 ) + MINWRK = 2*N + M + IF( WNTQO ) THEN + MAXWRK = MAX( MAXWRK, 2*N+N* + $ ILAENV( 1, 'ZUNMBR', 'PRC', N, N, N, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*N+N* + $ ILAENV( 1, 'ZUNMBR', 'QLN', M, N, N, -1 ) ) + MAXWRK = MAXWRK + M*N + MINWRK = MINWRK + N*N + ELSE IF( WNTQS ) THEN + MAXWRK = MAX( MAXWRK, 2*N+N* + $ ILAENV( 1, 'ZUNMBR', 'PRC', N, N, N, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*N+N* + $ ILAENV( 1, 'ZUNMBR', 'QLN', M, N, N, -1 ) ) + ELSE IF( WNTQA ) THEN + MAXWRK = MAX( MAXWRK, 2*N+N* + $ ILAENV( 1, 'ZUNGBR', 'PRC', N, N, N, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*N+M* + $ ILAENV( 1, 'ZUNGBR', 'QLN', M, M, N, -1 ) ) + END IF + END IF + ELSE +* +* There is no complex work space needed for bidiagonal SVD +* The real work space needed for bidiagonal SVD is BDSPAC, +* BDSPAC = 3*M*M + 4*M +* + IF( N.GE.MNTHR1 ) THEN + IF( WNTQN ) THEN +* +* Path 1t (N much larger than M, JOBZ='N') +* + MAXWRK = M + M*ILAENV( 1, 'ZGELQF', ' ', M, N, -1, + $ -1 ) + MAXWRK = MAX( MAXWRK, 2*M+2*M* + $ ILAENV( 1, 'ZGEBRD', ' ', M, M, -1, -1 ) ) + MINWRK = 3*M + ELSE IF( WNTQO ) THEN +* +* Path 2t (N much larger than M, JOBZ='O') +* + WRKBL = M + M*ILAENV( 1, 'ZGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'ZUNGLQ', ' ', M, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+2*M* + $ ILAENV( 1, 'ZGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+M* + $ ILAENV( 1, 'ZUNMBR', 'PRC', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+M* + $ ILAENV( 1, 'ZUNMBR', 'QLN', M, M, M, -1 ) ) + MAXWRK = M*N + M*M + WRKBL + MINWRK = 2*M*M + 3*M + ELSE IF( WNTQS ) THEN +* +* Path 3t (N much larger than M, JOBZ='S') +* + WRKBL = M + M*ILAENV( 1, 'ZGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'ZUNGLQ', ' ', M, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+2*M* + $ ILAENV( 1, 'ZGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+M* + $ ILAENV( 1, 'ZUNMBR', 'PRC', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+M* + $ ILAENV( 1, 'ZUNMBR', 'QLN', M, M, M, -1 ) ) + MAXWRK = M*M + WRKBL + MINWRK = M*M + 3*M + ELSE IF( WNTQA ) THEN +* +* Path 4t (N much larger than M, JOBZ='A') +* + WRKBL = M + M*ILAENV( 1, 'ZGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'ZUNGLQ', ' ', N, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+2*M* + $ ILAENV( 1, 'ZGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+M* + $ ILAENV( 1, 'ZUNMBR', 'PRC', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+M* + $ ILAENV( 1, 'ZUNMBR', 'QLN', M, M, M, -1 ) ) + MAXWRK = M*M + WRKBL + MINWRK = M*M + 2*M + N + END IF + ELSE IF( N.GE.MNTHR2 ) THEN +* +* Path 5t (N much larger than M, but not as much as MNTHR1) +* + MAXWRK = 2*M + ( M+N )*ILAENV( 1, 'ZGEBRD', ' ', M, N, + $ -1, -1 ) + MINWRK = 2*M + N + IF( WNTQO ) THEN + MAXWRK = MAX( MAXWRK, 2*M+M* + $ ILAENV( 1, 'ZUNGBR', 'P', M, N, M, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*M+M* + $ ILAENV( 1, 'ZUNGBR', 'Q', M, M, N, -1 ) ) + MAXWRK = MAXWRK + M*N + MINWRK = MINWRK + M*M + ELSE IF( WNTQS ) THEN + MAXWRK = MAX( MAXWRK, 2*M+M* + $ ILAENV( 1, 'ZUNGBR', 'P', M, N, M, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*M+M* + $ ILAENV( 1, 'ZUNGBR', 'Q', M, M, N, -1 ) ) + ELSE IF( WNTQA ) THEN + MAXWRK = MAX( MAXWRK, 2*M+N* + $ ILAENV( 1, 'ZUNGBR', 'P', N, N, M, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*M+M* + $ ILAENV( 1, 'ZUNGBR', 'Q', M, M, N, -1 ) ) + END IF + ELSE +* +* Path 6t (N greater than M, but not much larger) +* + MAXWRK = 2*M + ( M+N )*ILAENV( 1, 'ZGEBRD', ' ', M, N, + $ -1, -1 ) + MINWRK = 2*M + N + IF( WNTQO ) THEN + MAXWRK = MAX( MAXWRK, 2*M+M* + $ ILAENV( 1, 'ZUNMBR', 'PRC', M, N, M, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*M+M* + $ ILAENV( 1, 'ZUNMBR', 'QLN', M, M, N, -1 ) ) + MAXWRK = MAXWRK + M*N + MINWRK = MINWRK + M*M + ELSE IF( WNTQS ) THEN + MAXWRK = MAX( MAXWRK, 2*M+M* + $ ILAENV( 1, 'ZUNGBR', 'PRC', M, N, M, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*M+M* + $ ILAENV( 1, 'ZUNGBR', 'QLN', M, M, N, -1 ) ) + ELSE IF( WNTQA ) THEN + MAXWRK = MAX( MAXWRK, 2*M+N* + $ ILAENV( 1, 'ZUNGBR', 'PRC', N, N, M, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*M+M* + $ ILAENV( 1, 'ZUNGBR', 'QLN', M, M, N, -1 ) ) + END IF + END IF + END IF + MAXWRK = MAX( MAXWRK, MINWRK ) + WORK( 1 ) = MAXWRK + END IF +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGESDD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + IF( LWORK.GE.1 ) + $ WORK( 1 ) = ONE + RETURN + END IF +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = SQRT( DLAMCH( 'S' ) ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = ZLANGE( 'M', M, N, A, LDA, DUM ) + ISCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ISCL = 1 + CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR ) + ELSE IF( ANRM.GT.BIGNUM ) THEN + ISCL = 1 + CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR ) + END IF +* + IF( M.GE.N ) THEN +* +* A has at least as many rows as columns. If A has sufficiently +* more rows than columns, first reduce using the QR +* decomposition (if sufficient workspace available) +* + IF( M.GE.MNTHR1 ) THEN +* + IF( WNTQN ) THEN +* +* Path 1 (M much larger than N, JOBZ='N') +* No singular vectors to be computed +* + ITAU = 1 + NWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: need 0) +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Zero out below R +* + CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, A( 2, 1 ), + $ LDA ) + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize R in A +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( N, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) + NRWORK = IE + N +* +* Perform bidiagonal SVD, compute singular values only +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL DBDSDC( 'U', 'N', N, S, RWORK( IE ), DUM, 1, DUM, 1, + $ DUM, IDUM, RWORK( NRWORK ), IWORK, INFO ) +* + ELSE IF( WNTQO ) THEN +* +* Path 2 (M much larger than N, JOBZ='O') +* N left singular vectors to be overwritten on A and +* N right singular vectors to be computed in VT +* + IU = 1 +* +* WORK(IU) is N by N +* + LDWRKU = N + IR = IU + LDWRKU*N + IF( LWORK.GE.M*N+N*N+3*N ) THEN +* +* WORK(IR) is M by N +* + LDWRKR = M + ELSE + LDWRKR = ( LWORK-N*N-3*N ) / N + END IF + ITAU = IR + LDWRKR*N + NWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need N*N+2*N, prefer M*N+N+N*NB) +* (RWorkspace: 0) +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy R to WORK( IR ), zeroing out below it +* + CALL ZLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR ) + CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, WORK( IR+1 ), + $ LDWRKR ) +* +* Generate Q in A +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* (CWorkspace: need N*N+3*N, prefer M*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( N, N, WORK( IR ), LDWRKR, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of R in WORK(IRU) and computing right singular vectors +* of R in WORK(IRVT) +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + IRU = IE + N + IRVT = IRU + N*N + NRWORK = IRVT + N*N + CALL DBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ), + $ N, RWORK( IRVT ), N, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRU) to complex matrix WORK(IU) +* Overwrite WORK(IU) by the left singular vectors of R +* (CWorkspace: need 2*N*N+3*N, prefer M*N+N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL ZLACP2( 'F', N, N, RWORK( IRU ), N, WORK( IU ), + $ LDWRKU ) + CALL ZUNMBR( 'Q', 'L', 'N', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IU ), LDWRKU, + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Copy real matrix RWORK(IRVT) to complex matrix VT +* Overwrite VT by the right singular vectors of R +* (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL ZLACP2( 'F', N, N, RWORK( IRVT ), N, VT, LDVT ) + CALL ZUNMBR( 'P', 'R', 'C', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IU), storing result in WORK(IR) and copying to A +* (CWorkspace: need 2*N*N, prefer N*N+M*N) +* (RWorkspace: 0) +* + DO 10 I = 1, M, LDWRKR + CHUNK = MIN( M-I+1, LDWRKR ) + CALL ZGEMM( 'N', 'N', CHUNK, N, N, CONE, A( I, 1 ), + $ LDA, WORK( IU ), LDWRKU, CZERO, + $ WORK( IR ), LDWRKR ) + CALL ZLACPY( 'F', CHUNK, N, WORK( IR ), LDWRKR, + $ A( I, 1 ), LDA ) + 10 CONTINUE +* + ELSE IF( WNTQS ) THEN +* +* Path 3 (M much larger than N, JOBZ='S') +* N left singular vectors to be computed in U and +* N right singular vectors to be computed in VT +* + IR = 1 +* +* WORK(IR) is N by N +* + LDWRKR = N + ITAU = IR + LDWRKR*N + NWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy R to WORK(IR), zeroing out below it +* + CALL ZLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR ) + CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, WORK( IR+1 ), + $ LDWRKR ) +* +* Generate Q in A +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( N, N, WORK( IR ), LDWRKR, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + IRU = IE + N + IRVT = IRU + N*N + NRWORK = IRVT + N*N + CALL DBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ), + $ N, RWORK( IRVT ), N, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRU) to complex matrix U +* Overwrite U by left singular vectors of R +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL ZLACP2( 'F', N, N, RWORK( IRU ), N, U, LDU ) + CALL ZUNMBR( 'Q', 'L', 'N', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy real matrix RWORK(IRVT) to complex matrix VT +* Overwrite VT by right singular vectors of R +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL ZLACP2( 'F', N, N, RWORK( IRVT ), N, VT, LDVT ) + CALL ZUNMBR( 'P', 'R', 'C', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in U +* (CWorkspace: need N*N) +* (RWorkspace: 0) +* + CALL ZLACPY( 'F', N, N, U, LDU, WORK( IR ), LDWRKR ) + CALL ZGEMM( 'N', 'N', M, N, N, CONE, A, LDA, WORK( IR ), + $ LDWRKR, CZERO, U, LDU ) +* + ELSE IF( WNTQA ) THEN +* +* Path 4 (M much larger than N, JOBZ='A') +* M left singular vectors to be computed in U and +* N right singular vectors to be computed in VT +* + IU = 1 +* +* WORK(IU) is N by N +* + LDWRKU = N + ITAU = IU + LDWRKU*N + NWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + CALL ZLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need N+M, prefer N+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Produce R in A, zeroing out below it +* + CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, A( 2, 1 ), + $ LDA ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize R in A +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( N, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) + IRU = IE + N + IRVT = IRU + N*N + NRWORK = IRVT + N*N +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + CALL DBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ), + $ N, RWORK( IRVT ), N, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRU) to complex matrix WORK(IU) +* Overwrite WORK(IU) by left singular vectors of R +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL ZLACP2( 'F', N, N, RWORK( IRU ), N, WORK( IU ), + $ LDWRKU ) + CALL ZUNMBR( 'Q', 'L', 'N', N, N, N, A, LDA, + $ WORK( ITAUQ ), WORK( IU ), LDWRKU, + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Copy real matrix RWORK(IRVT) to complex matrix VT +* Overwrite VT by right singular vectors of R +* (CWorkspace: need 3*N, prefer 2*N+N*NB) +* (RWorkspace: 0) +* + CALL ZLACP2( 'F', N, N, RWORK( IRVT ), N, VT, LDVT ) + CALL ZUNMBR( 'P', 'R', 'C', N, N, N, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IU), storing result in A +* (CWorkspace: need N*N) +* (RWorkspace: 0) +* + CALL ZGEMM( 'N', 'N', M, N, N, CONE, U, LDU, WORK( IU ), + $ LDWRKU, CZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL ZLACPY( 'F', M, N, A, LDA, U, LDU ) +* + END IF +* + ELSE IF( M.GE.MNTHR2 ) THEN +* +* MNTHR2 <= M < MNTHR1 +* +* Path 5 (M much larger than N, but not as much as MNTHR1) +* Reduce to bidiagonal form without QR decomposition, use +* ZUNGBR and matrix multiplication to compute singular vectors +* + IE = 1 + NRWORK = IE + N + ITAUQ = 1 + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize A +* (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) + IF( WNTQN ) THEN +* +* Compute singular values only +* (Cworkspace: 0) +* (Rworkspace: need BDSPAC) +* + CALL DBDSDC( 'U', 'N', N, S, RWORK( IE ), DUM, 1, DUM, 1, + $ DUM, IDUM, RWORK( NRWORK ), IWORK, INFO ) + ELSE IF( WNTQO ) THEN + IU = NWORK + IRU = NRWORK + IRVT = IRU + N*N + NRWORK = IRVT + N*N +* +* Copy A to VT, generate P**H +* (Cworkspace: need 2*N, prefer N+N*NB) +* (Rworkspace: 0) +* + CALL ZLACPY( 'U', N, N, A, LDA, VT, LDVT ) + CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Generate Q in A +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* + IF( LWORK.GE.M*N+3*N ) THEN +* +* WORK( IU ) is M by N +* + LDWRKU = M + ELSE +* +* WORK(IU) is LDWRKU by N +* + LDWRKU = ( LWORK-3*N ) / N + END IF + NWORK = IU + LDWRKU*N +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + CALL DBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ), + $ N, RWORK( IRVT ), N, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Multiply real matrix RWORK(IRVT) by P**H in VT, +* storing the result in WORK(IU), copying to VT +* (Cworkspace: need 0) +* (Rworkspace: need 3*N*N) +* + CALL ZLARCM( N, N, RWORK( IRVT ), N, VT, LDVT, + $ WORK( IU ), LDWRKU, RWORK( NRWORK ) ) + CALL ZLACPY( 'F', N, N, WORK( IU ), LDWRKU, VT, LDVT ) +* +* Multiply Q in A by real matrix RWORK(IRU), storing the +* result in WORK(IU), copying to A +* (CWorkspace: need N*N, prefer M*N) +* (Rworkspace: need 3*N*N, prefer N*N+2*M*N) +* + NRWORK = IRVT + DO 20 I = 1, M, LDWRKU + CHUNK = MIN( M-I+1, LDWRKU ) + CALL ZLACRM( CHUNK, N, A( I, 1 ), LDA, RWORK( IRU ), + $ N, WORK( IU ), LDWRKU, RWORK( NRWORK ) ) + CALL ZLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU, + $ A( I, 1 ), LDA ) + 20 CONTINUE +* + ELSE IF( WNTQS ) THEN +* +* Copy A to VT, generate P**H +* (Cworkspace: need 2*N, prefer N+N*NB) +* (Rworkspace: 0) +* + CALL ZLACPY( 'U', N, N, A, LDA, VT, LDVT ) + CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Copy A to U, generate Q +* (Cworkspace: need 2*N, prefer N+N*NB) +* (Rworkspace: 0) +* + CALL ZLACPY( 'L', M, N, A, LDA, U, LDU ) + CALL ZUNGBR( 'Q', M, N, N, U, LDU, WORK( ITAUQ ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + IRU = NRWORK + IRVT = IRU + N*N + NRWORK = IRVT + N*N + CALL DBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ), + $ N, RWORK( IRVT ), N, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Multiply real matrix RWORK(IRVT) by P**H in VT, +* storing the result in A, copying to VT +* (Cworkspace: need 0) +* (Rworkspace: need 3*N*N) +* + CALL ZLARCM( N, N, RWORK( IRVT ), N, VT, LDVT, A, LDA, + $ RWORK( NRWORK ) ) + CALL ZLACPY( 'F', N, N, A, LDA, VT, LDVT ) +* +* Multiply Q in U by real matrix RWORK(IRU), storing the +* result in A, copying to U +* (CWorkspace: need 0) +* (Rworkspace: need N*N+2*M*N) +* + NRWORK = IRVT + CALL ZLACRM( M, N, U, LDU, RWORK( IRU ), N, A, LDA, + $ RWORK( NRWORK ) ) + CALL ZLACPY( 'F', M, N, A, LDA, U, LDU ) + ELSE +* +* Copy A to VT, generate P**H +* (Cworkspace: need 2*N, prefer N+N*NB) +* (Rworkspace: 0) +* + CALL ZLACPY( 'U', N, N, A, LDA, VT, LDVT ) + CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Copy A to U, generate Q +* (Cworkspace: need 2*N, prefer N+N*NB) +* (Rworkspace: 0) +* + CALL ZLACPY( 'L', M, N, A, LDA, U, LDU ) + CALL ZUNGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + IRU = NRWORK + IRVT = IRU + N*N + NRWORK = IRVT + N*N + CALL DBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ), + $ N, RWORK( IRVT ), N, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Multiply real matrix RWORK(IRVT) by P**H in VT, +* storing the result in A, copying to VT +* (Cworkspace: need 0) +* (Rworkspace: need 3*N*N) +* + CALL ZLARCM( N, N, RWORK( IRVT ), N, VT, LDVT, A, LDA, + $ RWORK( NRWORK ) ) + CALL ZLACPY( 'F', N, N, A, LDA, VT, LDVT ) +* +* Multiply Q in U by real matrix RWORK(IRU), storing the +* result in A, copying to U +* (CWorkspace: 0) +* (Rworkspace: need 3*N*N) +* + NRWORK = IRVT + CALL ZLACRM( M, N, U, LDU, RWORK( IRU ), N, A, LDA, + $ RWORK( NRWORK ) ) + CALL ZLACPY( 'F', M, N, A, LDA, U, LDU ) + END IF +* + ELSE +* +* M .LT. MNTHR2 +* +* Path 6 (M at least N, but not much larger) +* Reduce to bidiagonal form without QR decomposition +* Use ZUNMBR to compute singular vectors +* + IE = 1 + NRWORK = IE + N + ITAUQ = 1 + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize A +* (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) + IF( WNTQN ) THEN +* +* Compute singular values only +* (Cworkspace: 0) +* (Rworkspace: need BDSPAC) +* + CALL DBDSDC( 'U', 'N', N, S, RWORK( IE ), DUM, 1, DUM, 1, + $ DUM, IDUM, RWORK( NRWORK ), IWORK, INFO ) + ELSE IF( WNTQO ) THEN + IU = NWORK + IRU = NRWORK + IRVT = IRU + N*N + NRWORK = IRVT + N*N + IF( LWORK.GE.M*N+3*N ) THEN +* +* WORK( IU ) is M by N +* + LDWRKU = M + ELSE +* +* WORK( IU ) is LDWRKU by N +* + LDWRKU = ( LWORK-3*N ) / N + END IF + NWORK = IU + LDWRKU*N +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + CALL DBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ), + $ N, RWORK( IRVT ), N, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRVT) to complex matrix VT +* Overwrite VT by right singular vectors of A +* (Cworkspace: need 2*N, prefer N+N*NB) +* (Rworkspace: need 0) +* + CALL ZLACP2( 'F', N, N, RWORK( IRVT ), N, VT, LDVT ) + CALL ZUNMBR( 'P', 'R', 'C', N, N, N, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* + IF( LWORK.GE.M*N+3*N ) THEN +* +* Copy real matrix RWORK(IRU) to complex matrix WORK(IU) +* Overwrite WORK(IU) by left singular vectors of A, copying +* to A +* (Cworkspace: need M*N+2*N, prefer M*N+N+N*NB) +* (Rworkspace: need 0) +* + CALL ZLASET( 'F', M, N, CZERO, CZERO, WORK( IU ), + $ LDWRKU ) + CALL ZLACP2( 'F', N, N, RWORK( IRU ), N, WORK( IU ), + $ LDWRKU ) + CALL ZUNMBR( 'Q', 'L', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), WORK( IU ), LDWRKU, + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) + CALL ZLACPY( 'F', M, N, WORK( IU ), LDWRKU, A, LDA ) + ELSE +* +* Generate Q in A +* (Cworkspace: need 2*N, prefer N+N*NB) +* (Rworkspace: need 0) +* + CALL ZUNGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Multiply Q in A by real matrix RWORK(IRU), storing the +* result in WORK(IU), copying to A +* (CWorkspace: need N*N, prefer M*N) +* (Rworkspace: need 3*N*N, prefer N*N+2*M*N) +* + NRWORK = IRVT + DO 30 I = 1, M, LDWRKU + CHUNK = MIN( M-I+1, LDWRKU ) + CALL ZLACRM( CHUNK, N, A( I, 1 ), LDA, + $ RWORK( IRU ), N, WORK( IU ), LDWRKU, + $ RWORK( NRWORK ) ) + CALL ZLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU, + $ A( I, 1 ), LDA ) + 30 CONTINUE + END IF +* + ELSE IF( WNTQS ) THEN +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + IRU = NRWORK + IRVT = IRU + N*N + NRWORK = IRVT + N*N + CALL DBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ), + $ N, RWORK( IRVT ), N, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRU) to complex matrix U +* Overwrite U by left singular vectors of A +* (CWorkspace: need 3*N, prefer 2*N+N*NB) +* (RWorkspace: 0) +* + CALL ZLASET( 'F', M, N, CZERO, CZERO, U, LDU ) + CALL ZLACP2( 'F', N, N, RWORK( IRU ), N, U, LDU ) + CALL ZUNMBR( 'Q', 'L', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy real matrix RWORK(IRVT) to complex matrix VT +* Overwrite VT by right singular vectors of A +* (CWorkspace: need 3*N, prefer 2*N+N*NB) +* (RWorkspace: 0) +* + CALL ZLACP2( 'F', N, N, RWORK( IRVT ), N, VT, LDVT ) + CALL ZUNMBR( 'P', 'R', 'C', N, N, N, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + ELSE +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + IRU = NRWORK + IRVT = IRU + N*N + NRWORK = IRVT + N*N + CALL DBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ), + $ N, RWORK( IRVT ), N, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Set the right corner of U to identity matrix +* + CALL ZLASET( 'F', M, M, CZERO, CZERO, U, LDU ) + CALL ZLASET( 'F', M-N, M-N, CZERO, CONE, U( N+1, N+1 ), + $ LDU ) +* +* Copy real matrix RWORK(IRU) to complex matrix U +* Overwrite U by left singular vectors of A +* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) +* (RWorkspace: 0) +* + CALL ZLACP2( 'F', N, N, RWORK( IRU ), N, U, LDU ) + CALL ZUNMBR( 'Q', 'L', 'N', M, M, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy real matrix RWORK(IRVT) to complex matrix VT +* Overwrite VT by right singular vectors of A +* (CWorkspace: need 3*N, prefer 2*N+N*NB) +* (RWorkspace: 0) +* + CALL ZLACP2( 'F', N, N, RWORK( IRVT ), N, VT, LDVT ) + CALL ZUNMBR( 'P', 'R', 'C', N, N, N, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + END IF +* + END IF +* + ELSE +* +* A has more columns than rows. If A has sufficiently more +* columns than rows, first reduce using the LQ decomposition +* (if sufficient workspace available) +* + IF( N.GE.MNTHR1 ) THEN +* + IF( WNTQN ) THEN +* +* Path 1t (N much larger than M, JOBZ='N') +* No singular vectors to be computed +* + ITAU = 1 + NWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Zero out above L +* + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, A( 1, 2 ), + $ LDA ) + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize L in A +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL ZGEBRD( M, M, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) + NRWORK = IE + M +* +* Perform bidiagonal SVD, compute singular values only +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL DBDSDC( 'U', 'N', M, S, RWORK( IE ), DUM, 1, DUM, 1, + $ DUM, IDUM, RWORK( NRWORK ), IWORK, INFO ) +* + ELSE IF( WNTQO ) THEN +* +* Path 2t (N much larger than M, JOBZ='O') +* M right singular vectors to be overwritten on A and +* M left singular vectors to be computed in U +* + IVT = 1 + LDWKVT = M +* +* WORK(IVT) is M by M +* + IL = IVT + LDWKVT*M + IF( LWORK.GE.M*N+M*M+3*M ) THEN +* +* WORK(IL) M by N +* + LDWRKL = M + CHUNK = N + ELSE +* +* WORK(IL) is M by CHUNK +* + LDWRKL = M + CHUNK = ( LWORK-M*M-3*M ) / M + END IF + ITAU = IL + LDWRKL*CHUNK + NWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy L to WORK(IL), zeroing about above it +* + CALL ZLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWRKL ) + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IL+LDWRKL ), LDWRKL ) +* +* Generate Q in A +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IL) +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL ZGEBRD( M, M, WORK( IL ), LDWRKL, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + IRU = IE + M + IRVT = IRU + M*M + NRWORK = IRVT + M*M + CALL DBDSDC( 'U', 'I', M, S, RWORK( IE ), RWORK( IRU ), + $ M, RWORK( IRVT ), M, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRU) to complex matrix WORK(IU) +* Overwrite WORK(IU) by the left singular vectors of L +* (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL ZLACP2( 'F', M, M, RWORK( IRU ), M, U, LDU ) + CALL ZUNMBR( 'Q', 'L', 'N', M, M, M, WORK( IL ), LDWRKL, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT) +* Overwrite WORK(IVT) by the right singular vectors of L +* (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL ZLACP2( 'F', M, M, RWORK( IRVT ), M, WORK( IVT ), + $ LDWKVT ) + CALL ZUNMBR( 'P', 'R', 'C', M, M, M, WORK( IL ), LDWRKL, + $ WORK( ITAUP ), WORK( IVT ), LDWKVT, + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Multiply right singular vectors of L in WORK(IL) by Q +* in A, storing result in WORK(IL) and copying to A +* (CWorkspace: need 2*M*M, prefer M*M+M*N)) +* (RWorkspace: 0) +* + DO 40 I = 1, N, CHUNK + BLK = MIN( N-I+1, CHUNK ) + CALL ZGEMM( 'N', 'N', M, BLK, M, CONE, WORK( IVT ), M, + $ A( 1, I ), LDA, CZERO, WORK( IL ), + $ LDWRKL ) + CALL ZLACPY( 'F', M, BLK, WORK( IL ), LDWRKL, + $ A( 1, I ), LDA ) + 40 CONTINUE +* + ELSE IF( WNTQS ) THEN +* +* Path 3t (N much larger than M, JOBZ='S') +* M right singular vectors to be computed in VT and +* M left singular vectors to be computed in U +* + IL = 1 +* +* WORK(IL) is M by M +* + LDWRKL = M + ITAU = IL + LDWRKL*M + NWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy L to WORK(IL), zeroing out above it +* + CALL ZLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWRKL ) + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IL+LDWRKL ), LDWRKL ) +* +* Generate Q in A +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IL) +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL ZGEBRD( M, M, WORK( IL ), LDWRKL, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + IRU = IE + M + IRVT = IRU + M*M + NRWORK = IRVT + M*M + CALL DBDSDC( 'U', 'I', M, S, RWORK( IE ), RWORK( IRU ), + $ M, RWORK( IRVT ), M, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRU) to complex matrix U +* Overwrite U by left singular vectors of L +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) +* (RWorkspace: 0) +* + CALL ZLACP2( 'F', M, M, RWORK( IRU ), M, U, LDU ) + CALL ZUNMBR( 'Q', 'L', 'N', M, M, M, WORK( IL ), LDWRKL, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy real matrix RWORK(IRVT) to complex matrix VT +* Overwrite VT by left singular vectors of L +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) +* (RWorkspace: 0) +* + CALL ZLACP2( 'F', M, M, RWORK( IRVT ), M, VT, LDVT ) + CALL ZUNMBR( 'P', 'R', 'C', M, M, M, WORK( IL ), LDWRKL, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy VT to WORK(IL), multiply right singular vectors of L +* in WORK(IL) by Q in A, storing result in VT +* (CWorkspace: need M*M) +* (RWorkspace: 0) +* + CALL ZLACPY( 'F', M, M, VT, LDVT, WORK( IL ), LDWRKL ) + CALL ZGEMM( 'N', 'N', M, N, M, CONE, WORK( IL ), LDWRKL, + $ A, LDA, CZERO, VT, LDVT ) +* + ELSE IF( WNTQA ) THEN +* +* Path 9t (N much larger than M, JOBZ='A') +* N right singular vectors to be computed in VT and +* M left singular vectors to be computed in U +* + IVT = 1 +* +* WORK(IVT) is M by M +* + LDWKVT = M + ITAU = IVT + LDWKVT*M + NWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need M+N, prefer M+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Produce L in A, zeroing out above it +* + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, A( 1, 2 ), + $ LDA ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize L in A +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL ZGEBRD( M, M, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + IRU = IE + M + IRVT = IRU + M*M + NRWORK = IRVT + M*M + CALL DBDSDC( 'U', 'I', M, S, RWORK( IE ), RWORK( IRU ), + $ M, RWORK( IRVT ), M, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRU) to complex matrix U +* Overwrite U by left singular vectors of L +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL ZLACP2( 'F', M, M, RWORK( IRU ), M, U, LDU ) + CALL ZUNMBR( 'Q', 'L', 'N', M, M, M, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT) +* Overwrite WORK(IVT) by right singular vectors of L +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) +* (RWorkspace: 0) +* + CALL ZLACP2( 'F', M, M, RWORK( IRVT ), M, WORK( IVT ), + $ LDWKVT ) + CALL ZUNMBR( 'P', 'R', 'C', M, M, M, A, LDA, + $ WORK( ITAUP ), WORK( IVT ), LDWKVT, + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Multiply right singular vectors of L in WORK(IVT) by +* Q in VT, storing result in A +* (CWorkspace: need M*M) +* (RWorkspace: 0) +* + CALL ZGEMM( 'N', 'N', M, N, M, CONE, WORK( IVT ), LDWKVT, + $ VT, LDVT, CZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL ZLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* + END IF +* + ELSE IF( N.GE.MNTHR2 ) THEN +* +* MNTHR2 <= N < MNTHR1 +* +* Path 5t (N much larger than M, but not as much as MNTHR1) +* Reduce to bidiagonal form without QR decomposition, use +* ZUNGBR and matrix multiplication to compute singular vectors +* +* + IE = 1 + NRWORK = IE + M + ITAUQ = 1 + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize A +* (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) +* (RWorkspace: M) +* + CALL ZGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) +* + IF( WNTQN ) THEN +* +* Compute singular values only +* (Cworkspace: 0) +* (Rworkspace: need BDSPAC) +* + CALL DBDSDC( 'L', 'N', M, S, RWORK( IE ), DUM, 1, DUM, 1, + $ DUM, IDUM, RWORK( NRWORK ), IWORK, INFO ) + ELSE IF( WNTQO ) THEN + IRVT = NRWORK + IRU = IRVT + M*M + NRWORK = IRU + M*M + IVT = NWORK +* +* Copy A to U, generate Q +* (Cworkspace: need 2*M, prefer M+M*NB) +* (Rworkspace: 0) +* + CALL ZLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL ZUNGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Generate P**H in A +* (Cworkspace: need 2*M, prefer M+M*NB) +* (Rworkspace: 0) +* + CALL ZUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* + LDWKVT = M + IF( LWORK.GE.M*N+3*M ) THEN +* +* WORK( IVT ) is M by N +* + NWORK = IVT + LDWKVT*N + CHUNK = N + ELSE +* +* WORK( IVT ) is M by CHUNK +* + CHUNK = ( LWORK-3*M ) / M + NWORK = IVT + LDWKVT*CHUNK + END IF +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + CALL DBDSDC( 'L', 'I', M, S, RWORK( IE ), RWORK( IRU ), + $ M, RWORK( IRVT ), M, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Multiply Q in U by real matrix RWORK(IRVT) +* storing the result in WORK(IVT), copying to U +* (Cworkspace: need 0) +* (Rworkspace: need 2*M*M) +* + CALL ZLACRM( M, M, U, LDU, RWORK( IRU ), M, WORK( IVT ), + $ LDWKVT, RWORK( NRWORK ) ) + CALL ZLACPY( 'F', M, M, WORK( IVT ), LDWKVT, U, LDU ) +* +* Multiply RWORK(IRVT) by P**H in A, storing the +* result in WORK(IVT), copying to A +* (CWorkspace: need M*M, prefer M*N) +* (Rworkspace: need 2*M*M, prefer 2*M*N) +* + NRWORK = IRU + DO 50 I = 1, N, CHUNK + BLK = MIN( N-I+1, CHUNK ) + CALL ZLARCM( M, BLK, RWORK( IRVT ), M, A( 1, I ), LDA, + $ WORK( IVT ), LDWKVT, RWORK( NRWORK ) ) + CALL ZLACPY( 'F', M, BLK, WORK( IVT ), LDWKVT, + $ A( 1, I ), LDA ) + 50 CONTINUE + ELSE IF( WNTQS ) THEN +* +* Copy A to U, generate Q +* (Cworkspace: need 2*M, prefer M+M*NB) +* (Rworkspace: 0) +* + CALL ZLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL ZUNGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Copy A to VT, generate P**H +* (Cworkspace: need 2*M, prefer M+M*NB) +* (Rworkspace: 0) +* + CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT ) + CALL ZUNGBR( 'P', M, N, M, VT, LDVT, WORK( ITAUP ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + IRVT = NRWORK + IRU = IRVT + M*M + NRWORK = IRU + M*M + CALL DBDSDC( 'L', 'I', M, S, RWORK( IE ), RWORK( IRU ), + $ M, RWORK( IRVT ), M, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Multiply Q in U by real matrix RWORK(IRU), storing the +* result in A, copying to U +* (CWorkspace: need 0) +* (Rworkspace: need 3*M*M) +* + CALL ZLACRM( M, M, U, LDU, RWORK( IRU ), M, A, LDA, + $ RWORK( NRWORK ) ) + CALL ZLACPY( 'F', M, M, A, LDA, U, LDU ) +* +* Multiply real matrix RWORK(IRVT) by P**H in VT, +* storing the result in A, copying to VT +* (Cworkspace: need 0) +* (Rworkspace: need M*M+2*M*N) +* + NRWORK = IRU + CALL ZLARCM( M, N, RWORK( IRVT ), M, VT, LDVT, A, LDA, + $ RWORK( NRWORK ) ) + CALL ZLACPY( 'F', M, N, A, LDA, VT, LDVT ) + ELSE +* +* Copy A to U, generate Q +* (Cworkspace: need 2*M, prefer M+M*NB) +* (Rworkspace: 0) +* + CALL ZLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL ZUNGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Copy A to VT, generate P**H +* (Cworkspace: need 2*M, prefer M+M*NB) +* (Rworkspace: 0) +* + CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT ) + CALL ZUNGBR( 'P', N, N, M, VT, LDVT, WORK( ITAUP ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + IRVT = NRWORK + IRU = IRVT + M*M + NRWORK = IRU + M*M + CALL DBDSDC( 'L', 'I', M, S, RWORK( IE ), RWORK( IRU ), + $ M, RWORK( IRVT ), M, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Multiply Q in U by real matrix RWORK(IRU), storing the +* result in A, copying to U +* (CWorkspace: need 0) +* (Rworkspace: need 3*M*M) +* + CALL ZLACRM( M, M, U, LDU, RWORK( IRU ), M, A, LDA, + $ RWORK( NRWORK ) ) + CALL ZLACPY( 'F', M, M, A, LDA, U, LDU ) +* +* Multiply real matrix RWORK(IRVT) by P**H in VT, +* storing the result in A, copying to VT +* (Cworkspace: need 0) +* (Rworkspace: need M*M+2*M*N) +* + CALL ZLARCM( M, N, RWORK( IRVT ), M, VT, LDVT, A, LDA, + $ RWORK( NRWORK ) ) + CALL ZLACPY( 'F', M, N, A, LDA, VT, LDVT ) + END IF +* + ELSE +* +* N .LT. MNTHR2 +* +* Path 6t (N greater than M, but not much larger) +* Reduce to bidiagonal form without LQ decomposition +* Use ZUNMBR to compute singular vectors +* + IE = 1 + NRWORK = IE + M + ITAUQ = 1 + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize A +* (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) +* (RWorkspace: M) +* + CALL ZGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) + IF( WNTQN ) THEN +* +* Compute singular values only +* (Cworkspace: 0) +* (Rworkspace: need BDSPAC) +* + CALL DBDSDC( 'L', 'N', M, S, RWORK( IE ), DUM, 1, DUM, 1, + $ DUM, IDUM, RWORK( NRWORK ), IWORK, INFO ) + ELSE IF( WNTQO ) THEN + LDWKVT = M + IVT = NWORK + IF( LWORK.GE.M*N+3*M ) THEN +* +* WORK( IVT ) is M by N +* + CALL ZLASET( 'F', M, N, CZERO, CZERO, WORK( IVT ), + $ LDWKVT ) + NWORK = IVT + LDWKVT*N + ELSE +* +* WORK( IVT ) is M by CHUNK +* + CHUNK = ( LWORK-3*M ) / M + NWORK = IVT + LDWKVT*CHUNK + END IF +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + IRVT = NRWORK + IRU = IRVT + M*M + NRWORK = IRU + M*M + CALL DBDSDC( 'L', 'I', M, S, RWORK( IE ), RWORK( IRU ), + $ M, RWORK( IRVT ), M, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRU) to complex matrix U +* Overwrite U by left singular vectors of A +* (Cworkspace: need 2*M, prefer M+M*NB) +* (Rworkspace: need 0) +* + CALL ZLACP2( 'F', M, M, RWORK( IRU ), M, U, LDU ) + CALL ZUNMBR( 'Q', 'L', 'N', M, M, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* + IF( LWORK.GE.M*N+3*M ) THEN +* +* Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT) +* Overwrite WORK(IVT) by right singular vectors of A, +* copying to A +* (Cworkspace: need M*N+2*M, prefer M*N+M+M*NB) +* (Rworkspace: need 0) +* + CALL ZLACP2( 'F', M, M, RWORK( IRVT ), M, WORK( IVT ), + $ LDWKVT ) + CALL ZUNMBR( 'P', 'R', 'C', M, N, M, A, LDA, + $ WORK( ITAUP ), WORK( IVT ), LDWKVT, + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) + CALL ZLACPY( 'F', M, N, WORK( IVT ), LDWKVT, A, LDA ) + ELSE +* +* Generate P**H in A +* (Cworkspace: need 2*M, prefer M+M*NB) +* (Rworkspace: need 0) +* + CALL ZUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Multiply Q in A by real matrix RWORK(IRU), storing the +* result in WORK(IU), copying to A +* (CWorkspace: need M*M, prefer M*N) +* (Rworkspace: need 3*M*M, prefer M*M+2*M*N) +* + NRWORK = IRU + DO 60 I = 1, N, CHUNK + BLK = MIN( N-I+1, CHUNK ) + CALL ZLARCM( M, BLK, RWORK( IRVT ), M, A( 1, I ), + $ LDA, WORK( IVT ), LDWKVT, + $ RWORK( NRWORK ) ) + CALL ZLACPY( 'F', M, BLK, WORK( IVT ), LDWKVT, + $ A( 1, I ), LDA ) + 60 CONTINUE + END IF + ELSE IF( WNTQS ) THEN +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + IRVT = NRWORK + IRU = IRVT + M*M + NRWORK = IRU + M*M + CALL DBDSDC( 'L', 'I', M, S, RWORK( IE ), RWORK( IRU ), + $ M, RWORK( IRVT ), M, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRU) to complex matrix U +* Overwrite U by left singular vectors of A +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: M*M) +* + CALL ZLACP2( 'F', M, M, RWORK( IRU ), M, U, LDU ) + CALL ZUNMBR( 'Q', 'L', 'N', M, M, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy real matrix RWORK(IRVT) to complex matrix VT +* Overwrite VT by right singular vectors of A +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: M*M) +* + CALL ZLASET( 'F', M, N, CZERO, CZERO, VT, LDVT ) + CALL ZLACP2( 'F', M, M, RWORK( IRVT ), M, VT, LDVT ) + CALL ZUNMBR( 'P', 'R', 'C', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + ELSE +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + IRVT = NRWORK + IRU = IRVT + M*M + NRWORK = IRU + M*M +* + CALL DBDSDC( 'L', 'I', M, S, RWORK( IE ), RWORK( IRU ), + $ M, RWORK( IRVT ), M, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRU) to complex matrix U +* Overwrite U by left singular vectors of A +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: M*M) +* + CALL ZLACP2( 'F', M, M, RWORK( IRU ), M, U, LDU ) + CALL ZUNMBR( 'Q', 'L', 'N', M, M, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Set the right corner of VT to identity matrix +* + CALL ZLASET( 'F', N-M, N-M, CZERO, CONE, VT( M+1, M+1 ), + $ LDVT ) +* +* Copy real matrix RWORK(IRVT) to complex matrix VT +* Overwrite VT by right singular vectors of A +* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) +* (RWorkspace: M*M) +* + CALL ZLASET( 'F', N, N, CZERO, CZERO, VT, LDVT ) + CALL ZLACP2( 'F', M, M, RWORK( IRVT ), M, VT, LDVT ) + CALL ZUNMBR( 'P', 'R', 'C', N, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + END IF +* + END IF +* + END IF +* +* Undo scaling if necessary +* + IF( ISCL.EQ.1 ) THEN + IF( ANRM.GT.BIGNUM ) + $ CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, + $ IERR ) + IF( ANRM.LT.SMLNUM ) + $ CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, + $ IERR ) + END IF +* +* Return optimal workspace in WORK(1) +* + WORK( 1 ) = MAXWRK +* + RETURN +* +* End of ZGESDD +* + END diff --git a/costa/native/external/lapack/zgesv.f b/costa/native/external/lapack/zgesv.f new file mode 100644 index 000000000..4eeb90d4c --- /dev/null +++ b/costa/native/external/lapack/zgesv.f @@ -0,0 +1,108 @@ + SUBROUTINE ZGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* ZGESV computes the solution to a complex system of linear equations +* A * X = B, +* where A is an N-by-N matrix and X and B are N-by-NRHS matrices. +* +* The LU decomposition with partial pivoting and row interchanges is +* used to factor A as +* A = P * L * U, +* where P is a permutation matrix, L is unit lower triangular, and U is +* upper triangular. The factored form of A is then used to solve the +* system of equations A * X = B. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the N-by-N coefficient matrix A. +* On exit, the factors L and U from the factorization +* A = P*L*U; the unit diagonal elements of L are not stored. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (output) INTEGER array, dimension (N) +* The pivot indices that define the permutation matrix P; +* row i of the matrix was interchanged with row IPIV(i). +* +* B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS matrix of right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, U(i,i) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, so the solution could not be computed. +* +* ===================================================================== +* +* .. External Subroutines .. + EXTERNAL XERBLA, ZGETRF, ZGETRS +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGESV ', -INFO ) + RETURN + END IF +* +* Compute the LU factorization of A. +* + CALL ZGETRF( N, N, A, LDA, IPIV, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL ZGETRS( 'No transpose', N, NRHS, A, LDA, IPIV, B, LDB, + $ INFO ) + END IF + RETURN +* +* End of ZGESV +* + END diff --git a/costa/native/external/lapack/zgesvd.f b/costa/native/external/lapack/zgesvd.f new file mode 100644 index 000000000..181be23ab --- /dev/null +++ b/costa/native/external/lapack/zgesvd.f @@ -0,0 +1,3618 @@ + SUBROUTINE ZGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, + $ WORK, LWORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBU, JOBVT + INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ), S( * ) + COMPLEX*16 A( LDA, * ), U( LDU, * ), VT( LDVT, * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZGESVD computes the singular value decomposition (SVD) of a complex +* M-by-N matrix A, optionally computing the left and/or right singular +* vectors. The SVD is written +* +* A = U * SIGMA * conjugate-transpose(V) +* +* where SIGMA is an M-by-N matrix which is zero except for its +* min(m,n) diagonal elements, U is an M-by-M unitary matrix, and +* V is an N-by-N unitary matrix. The diagonal elements of SIGMA +* are the singular values of A; they are real and non-negative, and +* are returned in descending order. The first min(m,n) columns of +* U and V are the left and right singular vectors of A. +* +* Note that the routine returns V**H, not V. +* +* Arguments +* ========= +* +* JOBU (input) CHARACTER*1 +* Specifies options for computing all or part of the matrix U: +* = 'A': all M columns of U are returned in array U: +* = 'S': the first min(m,n) columns of U (the left singular +* vectors) are returned in the array U; +* = 'O': the first min(m,n) columns of U (the left singular +* vectors) are overwritten on the array A; +* = 'N': no columns of U (no left singular vectors) are +* computed. +* +* JOBVT (input) CHARACTER*1 +* Specifies options for computing all or part of the matrix +* V**H: +* = 'A': all N rows of V**H are returned in the array VT; +* = 'S': the first min(m,n) rows of V**H (the right singular +* vectors) are returned in the array VT; +* = 'O': the first min(m,n) rows of V**H (the right singular +* vectors) are overwritten on the array A; +* = 'N': no rows of V**H (no right singular vectors) are +* computed. +* +* JOBVT and JOBU cannot both be 'O'. +* +* M (input) INTEGER +* The number of rows of the input matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the input matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, +* if JOBU = 'O', A is overwritten with the first min(m,n) +* columns of U (the left singular vectors, +* stored columnwise); +* if JOBVT = 'O', A is overwritten with the first min(m,n) +* rows of V**H (the right singular vectors, +* stored rowwise); +* if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A +* are destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* S (output) DOUBLE PRECISION array, dimension (min(M,N)) +* The singular values of A, sorted so that S(i) >= S(i+1). +* +* U (output) COMPLEX*16 array, dimension (LDU,UCOL) +* (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'. +* If JOBU = 'A', U contains the M-by-M unitary matrix U; +* if JOBU = 'S', U contains the first min(m,n) columns of U +* (the left singular vectors, stored columnwise); +* if JOBU = 'N' or 'O', U is not referenced. +* +* LDU (input) INTEGER +* The leading dimension of the array U. LDU >= 1; if +* JOBU = 'S' or 'A', LDU >= M. +* +* VT (output) COMPLEX*16 array, dimension (LDVT,N) +* If JOBVT = 'A', VT contains the N-by-N unitary matrix +* V**H; +* if JOBVT = 'S', VT contains the first min(m,n) rows of +* V**H (the right singular vectors, stored rowwise); +* if JOBVT = 'N' or 'O', VT is not referenced. +* +* LDVT (input) INTEGER +* The leading dimension of the array VT. LDVT >= 1; if +* JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N). +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= 1. +* LWORK >= 2*MIN(M,N)+MAX(M,N). +* For good performance, LWORK should generally be larger. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (5*min(M,N)) +* On exit, if INFO > 0, RWORK(1:MIN(M,N)-1) contains the +* unconverged superdiagonal elements of an upper bidiagonal +* matrix B whose diagonal is in S (not necessarily sorted). +* B satisfies A = U * B * VT, so it has the same singular +* values as A, and singular vectors related by U and VT. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if ZBDSQR did not converge, INFO specifies how many +* superdiagonals of an intermediate bidiagonal form B +* did not converge to zero. See the description of RWORK +* above for details. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ), + $ CONE = ( 1.0D0, 0.0D0 ) ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS, + $ WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS + INTEGER BLK, CHUNK, I, IE, IERR, IR, IRWORK, ISCL, + $ ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU, + $ MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU, + $ NRVT, WRKBL + DOUBLE PRECISION ANRM, BIGNUM, EPS, SMLNUM +* .. +* .. Local Arrays .. + DOUBLE PRECISION DUM( 1 ) + COMPLEX*16 CDUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DLASCL, XERBLA, ZBDSQR, ZGEBRD, ZGELQF, ZGEMM, + $ ZGEQRF, ZLACPY, ZLASCL, ZLASET, ZUNGBR, ZUNGLQ, + $ ZUNGQR, ZUNMBR +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, ZLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + MINMN = MIN( M, N ) + MNTHR = ILAENV( 6, 'ZGESVD', JOBU // JOBVT, M, N, 0, 0 ) + WNTUA = LSAME( JOBU, 'A' ) + WNTUS = LSAME( JOBU, 'S' ) + WNTUAS = WNTUA .OR. WNTUS + WNTUO = LSAME( JOBU, 'O' ) + WNTUN = LSAME( JOBU, 'N' ) + WNTVA = LSAME( JOBVT, 'A' ) + WNTVS = LSAME( JOBVT, 'S' ) + WNTVAS = WNTVA .OR. WNTVS + WNTVO = LSAME( JOBVT, 'O' ) + WNTVN = LSAME( JOBVT, 'N' ) + MINWRK = 1 + LQUERY = ( LWORK.EQ.-1 ) +* + IF( .NOT.( WNTUA .OR. WNTUS .OR. WNTUO .OR. WNTUN ) ) THEN + INFO = -1 + ELSE IF( .NOT.( WNTVA .OR. WNTVS .OR. WNTVO .OR. WNTVN ) .OR. + $ ( WNTVO .AND. WNTUO ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDU.LT.1 .OR. ( WNTUAS .AND. LDU.LT.M ) ) THEN + INFO = -9 + ELSE IF( LDVT.LT.1 .OR. ( WNTVA .AND. LDVT.LT.N ) .OR. + $ ( WNTVS .AND. LDVT.LT.MINMN ) ) THEN + INFO = -11 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* CWorkspace refers to complex workspace, and RWorkspace to +* real workspace. NB refers to the optimal block size for the +* immediately following subroutine, as returned by ILAENV.) +* + IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) .AND. M.GT.0 .AND. + $ N.GT.0 ) THEN + IF( M.GE.N ) THEN +* +* Space needed for ZBDSQR is BDSPAC = 5*N +* + IF( M.GE.MNTHR ) THEN + IF( WNTUN ) THEN +* +* Path 1 (M much larger than N, JOBU='N') +* + MAXWRK = N + N*ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, + $ -1 ) + MAXWRK = MAX( MAXWRK, 2*N+2*N* + $ ILAENV( 1, 'ZGEBRD', ' ', N, N, -1, -1 ) ) + IF( WNTVO .OR. WNTVAS ) + $ MAXWRK = MAX( MAXWRK, 2*N+( N-1 )* + $ ILAENV( 1, 'ZUNGBR', 'P', N, N, N, -1 ) ) + MINWRK = 3*N + MAXWRK = MAX( MINWRK, MAXWRK ) + ELSE IF( WNTUO .AND. WNTVN ) THEN +* +* Path 2 (M much larger than N, JOBU='O', JOBVT='N') +* + WRKBL = N + N*ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'ZUNGQR', ' ', M, + $ N, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+2*N* + $ ILAENV( 1, 'ZGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+N* + $ ILAENV( 1, 'ZUNGBR', 'Q', N, N, N, -1 ) ) + MAXWRK = MAX( N*N+WRKBL, N*N+M*N ) + MINWRK = 2*N + M + MAXWRK = MAX( MINWRK, MAXWRK ) + ELSE IF( WNTUO .AND. WNTVAS ) THEN +* +* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or +* 'A') +* + WRKBL = N + N*ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'ZUNGQR', ' ', M, + $ N, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+2*N* + $ ILAENV( 1, 'ZGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+N* + $ ILAENV( 1, 'ZUNGBR', 'Q', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+( N-1 )* + $ ILAENV( 1, 'ZUNGBR', 'P', N, N, N, -1 ) ) + MAXWRK = MAX( N*N+WRKBL, N*N+M*N ) + MINWRK = 2*N + M + MAXWRK = MAX( MINWRK, MAXWRK ) + ELSE IF( WNTUS .AND. WNTVN ) THEN +* +* Path 4 (M much larger than N, JOBU='S', JOBVT='N') +* + WRKBL = N + N*ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'ZUNGQR', ' ', M, + $ N, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+2*N* + $ ILAENV( 1, 'ZGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+N* + $ ILAENV( 1, 'ZUNGBR', 'Q', N, N, N, -1 ) ) + MAXWRK = N*N + WRKBL + MINWRK = 2*N + M + MAXWRK = MAX( MINWRK, MAXWRK ) + ELSE IF( WNTUS .AND. WNTVO ) THEN +* +* Path 5 (M much larger than N, JOBU='S', JOBVT='O') +* + WRKBL = N + N*ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'ZUNGQR', ' ', M, + $ N, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+2*N* + $ ILAENV( 1, 'ZGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+N* + $ ILAENV( 1, 'ZUNGBR', 'Q', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+( N-1 )* + $ ILAENV( 1, 'ZUNGBR', 'P', N, N, N, -1 ) ) + MAXWRK = 2*N*N + WRKBL + MINWRK = 2*N + M + MAXWRK = MAX( MINWRK, MAXWRK ) + ELSE IF( WNTUS .AND. WNTVAS ) THEN +* +* Path 6 (M much larger than N, JOBU='S', JOBVT='S' or +* 'A') +* + WRKBL = N + N*ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'ZUNGQR', ' ', M, + $ N, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+2*N* + $ ILAENV( 1, 'ZGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+N* + $ ILAENV( 1, 'ZUNGBR', 'Q', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+( N-1 )* + $ ILAENV( 1, 'ZUNGBR', 'P', N, N, N, -1 ) ) + MAXWRK = N*N + WRKBL + MINWRK = 2*N + M + MAXWRK = MAX( MINWRK, MAXWRK ) + ELSE IF( WNTUA .AND. WNTVN ) THEN +* +* Path 7 (M much larger than N, JOBU='A', JOBVT='N') +* + WRKBL = N + N*ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'ZUNGQR', ' ', M, + $ M, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+2*N* + $ ILAENV( 1, 'ZGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+N* + $ ILAENV( 1, 'ZUNGBR', 'Q', N, N, N, -1 ) ) + MAXWRK = N*N + WRKBL + MINWRK = 2*N + M + MAXWRK = MAX( MINWRK, MAXWRK ) + ELSE IF( WNTUA .AND. WNTVO ) THEN +* +* Path 8 (M much larger than N, JOBU='A', JOBVT='O') +* + WRKBL = N + N*ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'ZUNGQR', ' ', M, + $ M, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+2*N* + $ ILAENV( 1, 'ZGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+N* + $ ILAENV( 1, 'ZUNGBR', 'Q', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+( N-1 )* + $ ILAENV( 1, 'ZUNGBR', 'P', N, N, N, -1 ) ) + MAXWRK = 2*N*N + WRKBL + MINWRK = 2*N + M + MAXWRK = MAX( MINWRK, MAXWRK ) + ELSE IF( WNTUA .AND. WNTVAS ) THEN +* +* Path 9 (M much larger than N, JOBU='A', JOBVT='S' or +* 'A') +* + WRKBL = N + N*ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'ZUNGQR', ' ', M, + $ M, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+2*N* + $ ILAENV( 1, 'ZGEBRD', ' ', N, N, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+N* + $ ILAENV( 1, 'ZUNGBR', 'Q', N, N, N, -1 ) ) + WRKBL = MAX( WRKBL, 2*N+( N-1 )* + $ ILAENV( 1, 'ZUNGBR', 'P', N, N, N, -1 ) ) + MAXWRK = N*N + WRKBL + MINWRK = 2*N + M + MAXWRK = MAX( MINWRK, MAXWRK ) + END IF + ELSE +* +* Path 10 (M at least N, but not much larger) +* + MAXWRK = 2*N + ( M+N )*ILAENV( 1, 'ZGEBRD', ' ', M, N, + $ -1, -1 ) + IF( WNTUS .OR. WNTUO ) + $ MAXWRK = MAX( MAXWRK, 2*N+N* + $ ILAENV( 1, 'ZUNGBR', 'Q', M, N, N, -1 ) ) + IF( WNTUA ) + $ MAXWRK = MAX( MAXWRK, 2*N+M* + $ ILAENV( 1, 'ZUNGBR', 'Q', M, M, N, -1 ) ) + IF( .NOT.WNTVN ) + $ MAXWRK = MAX( MAXWRK, 2*N+( N-1 )* + $ ILAENV( 1, 'ZUNGBR', 'P', N, N, N, -1 ) ) + MINWRK = 2*N + M + MAXWRK = MAX( MINWRK, MAXWRK ) + END IF + ELSE +* +* Space needed for ZBDSQR is BDSPAC = 5*M +* + IF( N.GE.MNTHR ) THEN + IF( WNTVN ) THEN +* +* Path 1t(N much larger than M, JOBVT='N') +* + MAXWRK = M + M*ILAENV( 1, 'ZGELQF', ' ', M, N, -1, + $ -1 ) + MAXWRK = MAX( MAXWRK, 2*M+2*M* + $ ILAENV( 1, 'ZGEBRD', ' ', M, M, -1, -1 ) ) + IF( WNTUO .OR. WNTUAS ) + $ MAXWRK = MAX( MAXWRK, 2*M+M* + $ ILAENV( 1, 'ZUNGBR', 'Q', M, M, M, -1 ) ) + MINWRK = 3*M + MAXWRK = MAX( MINWRK, MAXWRK ) + ELSE IF( WNTVO .AND. WNTUN ) THEN +* +* Path 2t(N much larger than M, JOBU='N', JOBVT='O') +* + WRKBL = M + M*ILAENV( 1, 'ZGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'ZUNGLQ', ' ', M, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+2*M* + $ ILAENV( 1, 'ZGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+( M-1 )* + $ ILAENV( 1, 'ZUNGBR', 'P', M, M, M, -1 ) ) + MAXWRK = MAX( M*M+WRKBL, M*M+M*N ) + MINWRK = 2*M + N + MAXWRK = MAX( MINWRK, MAXWRK ) + ELSE IF( WNTVO .AND. WNTUAS ) THEN +* +* Path 3t(N much larger than M, JOBU='S' or 'A', +* JOBVT='O') +* + WRKBL = M + M*ILAENV( 1, 'ZGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'ZUNGLQ', ' ', M, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+2*M* + $ ILAENV( 1, 'ZGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+( M-1 )* + $ ILAENV( 1, 'ZUNGBR', 'P', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+M* + $ ILAENV( 1, 'ZUNGBR', 'Q', M, M, M, -1 ) ) + MAXWRK = MAX( M*M+WRKBL, M*M+M*N ) + MINWRK = 2*M + N + MAXWRK = MAX( MINWRK, MAXWRK ) + ELSE IF( WNTVS .AND. WNTUN ) THEN +* +* Path 4t(N much larger than M, JOBU='N', JOBVT='S') +* + WRKBL = M + M*ILAENV( 1, 'ZGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'ZUNGLQ', ' ', M, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+2*M* + $ ILAENV( 1, 'ZGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+( M-1 )* + $ ILAENV( 1, 'ZUNGBR', 'P', M, M, M, -1 ) ) + MAXWRK = M*M + WRKBL + MINWRK = 2*M + N + MAXWRK = MAX( MINWRK, MAXWRK ) + ELSE IF( WNTVS .AND. WNTUO ) THEN +* +* Path 5t(N much larger than M, JOBU='O', JOBVT='S') +* + WRKBL = M + M*ILAENV( 1, 'ZGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'ZUNGLQ', ' ', M, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+2*M* + $ ILAENV( 1, 'ZGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+( M-1 )* + $ ILAENV( 1, 'ZUNGBR', 'P', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+M* + $ ILAENV( 1, 'ZUNGBR', 'Q', M, M, M, -1 ) ) + MAXWRK = 2*M*M + WRKBL + MINWRK = 2*M + N + MAXWRK = MAX( MINWRK, MAXWRK ) + ELSE IF( WNTVS .AND. WNTUAS ) THEN +* +* Path 6t(N much larger than M, JOBU='S' or 'A', +* JOBVT='S') +* + WRKBL = M + M*ILAENV( 1, 'ZGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'ZUNGLQ', ' ', M, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+2*M* + $ ILAENV( 1, 'ZGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+( M-1 )* + $ ILAENV( 1, 'ZUNGBR', 'P', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+M* + $ ILAENV( 1, 'ZUNGBR', 'Q', M, M, M, -1 ) ) + MAXWRK = M*M + WRKBL + MINWRK = 2*M + N + MAXWRK = MAX( MINWRK, MAXWRK ) + ELSE IF( WNTVA .AND. WNTUN ) THEN +* +* Path 7t(N much larger than M, JOBU='N', JOBVT='A') +* + WRKBL = M + M*ILAENV( 1, 'ZGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'ZUNGLQ', ' ', N, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+2*M* + $ ILAENV( 1, 'ZGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+( M-1 )* + $ ILAENV( 1, 'ZUNGBR', 'P', M, M, M, -1 ) ) + MAXWRK = M*M + WRKBL + MINWRK = 2*M + N + MAXWRK = MAX( MINWRK, MAXWRK ) + ELSE IF( WNTVA .AND. WNTUO ) THEN +* +* Path 8t(N much larger than M, JOBU='O', JOBVT='A') +* + WRKBL = M + M*ILAENV( 1, 'ZGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'ZUNGLQ', ' ', N, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+2*M* + $ ILAENV( 1, 'ZGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+( M-1 )* + $ ILAENV( 1, 'ZUNGBR', 'P', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+M* + $ ILAENV( 1, 'ZUNGBR', 'Q', M, M, M, -1 ) ) + MAXWRK = 2*M*M + WRKBL + MINWRK = 2*M + N + MAXWRK = MAX( MINWRK, MAXWRK ) + ELSE IF( WNTVA .AND. WNTUAS ) THEN +* +* Path 9t(N much larger than M, JOBU='S' or 'A', +* JOBVT='A') +* + WRKBL = M + M*ILAENV( 1, 'ZGELQF', ' ', M, N, -1, -1 ) + WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'ZUNGLQ', ' ', N, + $ N, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+2*M* + $ ILAENV( 1, 'ZGEBRD', ' ', M, M, -1, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+( M-1 )* + $ ILAENV( 1, 'ZUNGBR', 'P', M, M, M, -1 ) ) + WRKBL = MAX( WRKBL, 2*M+M* + $ ILAENV( 1, 'ZUNGBR', 'Q', M, M, M, -1 ) ) + MAXWRK = M*M + WRKBL + MINWRK = 2*M + N + MAXWRK = MAX( MINWRK, MAXWRK ) + END IF + ELSE +* +* Path 10t(N greater than M, but not much larger) +* + MAXWRK = 2*M + ( M+N )*ILAENV( 1, 'ZGEBRD', ' ', M, N, + $ -1, -1 ) + IF( WNTVS .OR. WNTVO ) + $ MAXWRK = MAX( MAXWRK, 2*M+M* + $ ILAENV( 1, 'ZUNGBR', 'P', M, N, M, -1 ) ) + IF( WNTVA ) + $ MAXWRK = MAX( MAXWRK, 2*M+N* + $ ILAENV( 1, 'ZUNGBR', 'P', N, N, M, -1 ) ) + IF( .NOT.WNTUN ) + $ MAXWRK = MAX( MAXWRK, 2*M+( M-1 )* + $ ILAENV( 1, 'ZUNGBR', 'Q', M, M, M, -1 ) ) + MINWRK = 2*M + N + MAXWRK = MAX( MINWRK, MAXWRK ) + END IF + END IF + WORK( 1 ) = MAXWRK + END IF +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGESVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + IF( LWORK.GE.1 ) + $ WORK( 1 ) = ONE + RETURN + END IF +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = SQRT( DLAMCH( 'S' ) ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = ZLANGE( 'M', M, N, A, LDA, DUM ) + ISCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ISCL = 1 + CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR ) + ELSE IF( ANRM.GT.BIGNUM ) THEN + ISCL = 1 + CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR ) + END IF +* + IF( M.GE.N ) THEN +* +* A has at least as many rows as columns. If A has sufficiently +* more rows than columns, first reduce using the QR +* decomposition (if sufficient workspace available) +* + IF( M.GE.MNTHR ) THEN +* + IF( WNTUN ) THEN +* +* Path 1 (M much larger than N, JOBU='N') +* No left singular vectors to be computed +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: need 0) +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Zero out below R +* + CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, A( 2, 1 ), + $ LDA ) + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in A +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( N, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + NCVT = 0 + IF( WNTVO .OR. WNTVAS ) THEN +* +* If right singular vectors desired, generate P'. +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + NCVT = N + END IF + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in A if desired +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, NCVT, 0, 0, S, RWORK( IE ), A, LDA, + $ CDUM, 1, CDUM, 1, RWORK( IRWORK ), INFO ) +* +* If right singular vectors desired in VT, copy them there +* + IF( WNTVAS ) + $ CALL ZLACPY( 'F', N, N, A, LDA, VT, LDVT ) +* + ELSE IF( WNTUO .AND. WNTVN ) THEN +* +* Path 2 (M much larger than N, JOBU='O', JOBVT='N') +* N left singular vectors to be overwritten on A and +* no right singular vectors to be computed +* + IF( LWORK.GE.N*N+3*N ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*N ) THEN +* +* WORK(IU) is LDA by N, WORK(IR) is LDA by N +* + LDWRKU = LDA + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+N*N ) THEN +* +* WORK(IU) is LDA by N, WORK(IR) is N by N +* + LDWRKU = LDA + LDWRKR = N + ELSE +* +* WORK(IU) is LDWRKU by N, WORK(IR) is N by N +* + LDWRKU = ( LWORK-N*N ) / N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IR) and zero out below it +* + CALL ZLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR ) + CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, + $ WORK( IR+1 ), LDWRKR ) +* +* Generate Q in A +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( N, N, WORK( IR ), LDWRKR, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing R +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) +* (RWorkspace: need 0) +* + CALL ZUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) +* (CWorkspace: need N*N) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, 0, N, 0, S, RWORK( IE ), CDUM, 1, + $ WORK( IR ), LDWRKR, CDUM, 1, + $ RWORK( IRWORK ), INFO ) + IU = ITAUQ +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in WORK(IU) and copying to A +* (CWorkspace: need N*N+N, prefer N*N+M*N) +* (RWorkspace: 0) +* + DO 10 I = 1, M, LDWRKU + CHUNK = MIN( M-I+1, LDWRKU ) + CALL ZGEMM( 'N', 'N', CHUNK, N, N, CONE, A( I, 1 ), + $ LDA, WORK( IR ), LDWRKR, CZERO, + $ WORK( IU ), LDWRKU ) + CALL ZLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU, + $ A( I, 1 ), LDA ) + 10 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize A +* (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB) +* (RWorkspace: N) +* + CALL ZGEBRD( M, N, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing A +* (CWorkspace: need 3*N, prefer 2*N+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in A +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, 0, M, 0, S, RWORK( IE ), CDUM, 1, + $ A, LDA, CDUM, 1, RWORK( IRWORK ), INFO ) +* + END IF +* + ELSE IF( WNTUO .AND. WNTVAS ) THEN +* +* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A') +* N left singular vectors to be overwritten on A and +* N right singular vectors to be computed in VT +* + IF( LWORK.GE.N*N+3*N ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by N +* + LDWRKU = LDA + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+N*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is N by N +* + LDWRKU = LDA + LDWRKR = N + ELSE +* +* WORK(IU) is LDWRKU by N and WORK(IR) is N by N +* + LDWRKU = ( LWORK-N*N ) / N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to VT, zeroing out below it +* + CALL ZLACPY( 'U', N, N, A, LDA, VT, LDVT ) + CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, VT( 2, 1 ), + $ LDVT ) +* +* Generate Q in A +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT, copying result to WORK(IR) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( N, N, VT, LDVT, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR ) +* +* Generate left vectors bidiagonalizing R in WORK(IR) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing R in VT +* (CWorkspace: need N*N+3*N-1, prefer N*N+2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) and computing right +* singular vectors of R in VT +* (CWorkspace: need N*N) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), VT, + $ LDVT, WORK( IR ), LDWRKR, CDUM, 1, + $ RWORK( IRWORK ), INFO ) + IU = ITAUQ +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in WORK(IU) and copying to A +* (CWorkspace: need N*N+N, prefer N*N+M*N) +* (RWorkspace: 0) +* + DO 20 I = 1, M, LDWRKU + CHUNK = MIN( M-I+1, LDWRKU ) + CALL ZGEMM( 'N', 'N', CHUNK, N, N, CONE, A( I, 1 ), + $ LDA, WORK( IR ), LDWRKR, CZERO, + $ WORK( IU ), LDWRKU ) + CALL ZLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU, + $ A( I, 1 ), LDA ) + 20 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to VT, zeroing out below it +* + CALL ZLACPY( 'U', N, N, A, LDA, VT, LDVT ) + CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, VT( 2, 1 ), + $ LDVT ) +* +* Generate Q in A +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: N) +* + CALL ZGEBRD( N, N, VT, LDVT, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in A by left vectors bidiagonalizing R +* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) +* (RWorkspace: 0) +* + CALL ZUNMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT, + $ WORK( ITAUQ ), A, LDA, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing R in VT +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in A and computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), VT, + $ LDVT, A, LDA, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTUS ) THEN +* + IF( WNTVN ) THEN +* +* Path 4 (M much larger than N, JOBU='S', JOBVT='N') +* N left singular vectors to be computed in U and +* no right singular vectors to be computed +* + IF( LWORK.GE.N*N+3*N ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IR) is LDA by N +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is N by N +* + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IR), zeroing out below it +* + CALL ZLACPY( 'U', N, N, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, + $ WORK( IR+1 ), LDWRKR ) +* +* Generate Q in A +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( N, N, WORK( IR ), LDWRKR, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing R in WORK(IR) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) +* (CWorkspace: need N*N) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, 0, N, 0, S, RWORK( IE ), CDUM, + $ 1, WORK( IR ), LDWRKR, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in U +* (CWorkspace: need N*N) +* (RWorkspace: 0) +* + CALL ZGEMM( 'N', 'N', M, N, N, CONE, A, LDA, + $ WORK( IR ), LDWRKR, CZERO, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGQR( M, N, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, + $ A( 2, 1 ), LDA ) +* +* Bidiagonalize R in A +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( N, N, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left vectors bidiagonalizing R +* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) +* (RWorkspace: 0) +* + CALL ZUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, 0, M, 0, S, RWORK( IE ), CDUM, + $ 1, U, LDU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVO ) THEN +* +* Path 5 (M much larger than N, JOBU='S', JOBVT='O') +* N left singular vectors to be computed in U and +* N right singular vectors to be overwritten on A +* + IF( LWORK.GE.2*N*N+3*N ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is N by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = N + ELSE +* +* WORK(IU) is N by N and WORK(IR) is N by N +* + LDWRKU = N + IR = IU + LDWRKU*N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL ZLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, + $ WORK( IU+1 ), LDWRKU ) +* +* Generate Q in A +* (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to +* WORK(IR) +* (CWorkspace: need 2*N*N+3*N, +* prefer 2*N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'U', N, N, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in WORK(IR) +* (CWorkspace: need 2*N*N+3*N-1, +* prefer 2*N*N+2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in WORK(IR) +* (CWorkspace: need 2*N*N) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), + $ WORK( IR ), LDWRKR, WORK( IU ), + $ LDWRKU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IU), storing result in U +* (CWorkspace: need N*N) +* (RWorkspace: 0) +* + CALL ZGEMM( 'N', 'N', M, N, N, CONE, A, LDA, + $ WORK( IU ), LDWRKU, CZERO, U, LDU ) +* +* Copy right singular vectors of R to A +* (CWorkspace: need N*N) +* (RWorkspace: 0) +* + CALL ZLACPY( 'F', N, N, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGQR( M, N, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, + $ A( 2, 1 ), LDA ) +* +* Bidiagonalize R in A +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( N, N, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left vectors bidiagonalizing R +* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) +* (RWorkspace: 0) +* + CALL ZUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing R in A +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in A +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), A, + $ LDA, U, LDU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVAS ) THEN +* +* Path 6 (M much larger than N, JOBU='S', JOBVT='S' +* or 'A') +* N left singular vectors to be computed in U and +* N right singular vectors to be computed in VT +* + IF( LWORK.GE.N*N+3*N ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IU) is LDA by N +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is N by N +* + LDWRKU = N + END IF + ITAU = IU + LDWRKU*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL ZLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, + $ WORK( IU+1 ), LDWRKU ) +* +* Generate Q in A +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to VT +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT, + $ LDVT ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (CWorkspace: need N*N+3*N-1, +* prefer N*N+2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in VT +* (CWorkspace: need N*N) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), VT, + $ LDVT, WORK( IU ), LDWRKU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IU), storing result in U +* (CWorkspace: need N*N) +* (RWorkspace: 0) +* + CALL ZGEMM( 'N', 'N', M, N, N, CONE, A, LDA, + $ WORK( IU ), LDWRKU, CZERO, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGQR( M, N, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to VT, zeroing out below it +* + CALL ZLACPY( 'U', N, N, A, LDA, VT, LDVT ) + CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, + $ VT( 2, 1 ), LDVT ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( N, N, VT, LDVT, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in VT +* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) +* (RWorkspace: 0) +* + CALL ZUNMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), VT, + $ LDVT, U, LDU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* + END IF +* + END IF +* + ELSE IF( WNTUA ) THEN +* + IF( WNTVN ) THEN +* +* Path 7 (M much larger than N, JOBU='A', JOBVT='N') +* M left singular vectors to be computed in U and +* no right singular vectors to be computed +* + IF( LWORK.GE.N*N+MAX( N+M, 3*N ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IR) is LDA by N +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is N by N +* + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Copy R to WORK(IR), zeroing out below it +* + CALL ZLACPY( 'U', N, N, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, + $ WORK( IR+1 ), LDWRKR ) +* +* Generate Q in U +* (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( N, N, WORK( IR ), LDWRKR, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in WORK(IR) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) +* (CWorkspace: need N*N) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, 0, N, 0, S, RWORK( IE ), CDUM, + $ 1, WORK( IR ), LDWRKR, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IR), storing result in A +* (CWorkspace: need N*N) +* (RWorkspace: 0) +* + CALL ZGEMM( 'N', 'N', M, N, N, CONE, U, LDU, + $ WORK( IR ), LDWRKR, CZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL ZLACPY( 'F', M, N, A, LDA, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need N+M, prefer N+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, + $ A( 2, 1 ), LDA ) +* +* Bidiagonalize R in A +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( N, N, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in A +* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) +* (RWorkspace: 0) +* + CALL ZUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, 0, M, 0, S, RWORK( IE ), CDUM, + $ 1, U, LDU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVO ) THEN +* +* Path 8 (M much larger than N, JOBU='A', JOBVT='O') +* M left singular vectors to be computed in U and +* N right singular vectors to be overwritten on A +* + IF( LWORK.GE.2*N*N+MAX( N+M, 3*N ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is N by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = N + ELSE +* +* WORK(IU) is N by N and WORK(IR) is N by N +* + LDWRKU = N + IR = IU + LDWRKU*N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL ZLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, + $ WORK( IU+1 ), LDWRKU ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to +* WORK(IR) +* (CWorkspace: need 2*N*N+3*N, +* prefer 2*N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'U', N, N, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in WORK(IR) +* (CWorkspace: need 2*N*N+3*N-1, +* prefer 2*N*N+2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in WORK(IR) +* (CWorkspace: need 2*N*N) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), + $ WORK( IR ), LDWRKR, WORK( IU ), + $ LDWRKU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IU), storing result in A +* (CWorkspace: need N*N) +* (RWorkspace: 0) +* + CALL ZGEMM( 'N', 'N', M, N, N, CONE, U, LDU, + $ WORK( IU ), LDWRKU, CZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL ZLACPY( 'F', M, N, A, LDA, U, LDU ) +* +* Copy right singular vectors of R from WORK(IR) to A +* + CALL ZLACPY( 'F', N, N, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need N+M, prefer N+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, + $ A( 2, 1 ), LDA ) +* +* Bidiagonalize R in A +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( N, N, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in A +* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) +* (RWorkspace: 0) +* + CALL ZUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in A +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in A +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), A, + $ LDA, U, LDU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVAS ) THEN +* +* Path 9 (M much larger than N, JOBU='A', JOBVT='S' +* or 'A') +* M left singular vectors to be computed in U and +* N right singular vectors to be computed in VT +* + IF( LWORK.GE.N*N+MAX( N+M, 3*N ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IU) is LDA by N +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is N by N +* + LDWRKU = N + END IF + ITAU = IU + LDWRKU*N + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL ZLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, + $ WORK( IU+1 ), LDWRKU ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to VT +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT, + $ LDVT ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (CWorkspace: need N*N+3*N-1, +* prefer N*N+2*N+(N-1)*NB) +* (RWorkspace: need 0) +* + CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in VT +* (CWorkspace: need N*N) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), VT, + $ LDVT, WORK( IU ), LDWRKU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IU), storing result in A +* (CWorkspace: need N*N) +* (RWorkspace: 0) +* + CALL ZGEMM( 'N', 'N', M, N, N, CONE, U, LDU, + $ WORK( IU ), LDWRKU, CZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL ZLACPY( 'F', M, N, A, LDA, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need N+M, prefer N+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R from A to VT, zeroing out below it +* + CALL ZLACPY( 'U', N, N, A, LDA, VT, LDVT ) + CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, + $ VT( 2, 1 ), LDVT ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( N, N, VT, LDVT, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in VT +* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) +* (RWorkspace: 0) +* + CALL ZUNMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), VT, + $ LDVT, U, LDU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* + END IF +* + END IF +* + END IF +* + ELSE +* +* M .LT. MNTHR +* +* Path 10 (M at least N, but not much larger) +* Reduce to bidiagonal form without QR decomposition +* + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize A +* (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + IF( WNTUAS ) THEN +* +* If left singular vectors desired in U, copy result to U +* and generate left bidiagonalizing vectors in U +* (CWorkspace: need 2*N+NCU, prefer 2*N+NCU*NB) +* (RWorkspace: 0) +* + CALL ZLACPY( 'L', M, N, A, LDA, U, LDU ) + IF( WNTUS ) + $ NCU = N + IF( WNTUA ) + $ NCU = M + CALL ZUNGBR( 'Q', M, NCU, N, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVAS ) THEN +* +* If right singular vectors desired in VT, copy result to +* VT and generate right bidiagonalizing vectors in VT +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL ZLACPY( 'U', N, N, A, LDA, VT, LDVT ) + CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTUO ) THEN +* +* If left singular vectors desired in A, generate left +* bidiagonalizing vectors in A +* (CWorkspace: need 3*N, prefer 2*N+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVO ) THEN +* +* If right singular vectors desired in A, generate right +* bidiagonalizing vectors in A +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IRWORK = IE + N + IF( WNTUAS .OR. WNTUO ) + $ NRU = M + IF( WNTUN ) + $ NRU = 0 + IF( WNTVAS .OR. WNTVO ) + $ NCVT = N + IF( WNTVN ) + $ NCVT = 0 + IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, NCVT, NRU, 0, S, RWORK( IE ), VT, + $ LDVT, U, LDU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) + ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in A +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, NCVT, NRU, 0, S, RWORK( IE ), A, + $ LDA, U, LDU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) + ELSE +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in A and computing right singular +* vectors in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, NCVT, NRU, 0, S, RWORK( IE ), VT, + $ LDVT, A, LDA, CDUM, 1, RWORK( IRWORK ), + $ INFO ) + END IF +* + END IF +* + ELSE +* +* A has more columns than rows. If A has sufficiently more +* columns than rows, first reduce using the LQ decomposition (if +* sufficient workspace available) +* + IF( N.GE.MNTHR ) THEN +* + IF( WNTVN ) THEN +* +* Path 1t(N much larger than M, JOBVT='N') +* No right singular vectors to be computed +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Zero out above L +* + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, A( 1, 2 ), + $ LDA ) + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in A +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL ZGEBRD( M, M, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + IF( WNTUO .OR. WNTUAS ) THEN +* +* If left singular vectors desired, generate Q +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IRWORK = IE + M + NRU = 0 + IF( WNTUO .OR. WNTUAS ) + $ NRU = M +* +* Perform bidiagonal QR iteration, computing left singular +* vectors of A in A if desired +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', M, 0, NRU, 0, S, RWORK( IE ), CDUM, 1, + $ A, LDA, CDUM, 1, RWORK( IRWORK ), INFO ) +* +* If left singular vectors desired in U, copy them there +* + IF( WNTUAS ) + $ CALL ZLACPY( 'F', M, M, A, LDA, U, LDU ) +* + ELSE IF( WNTVO .AND. WNTUN ) THEN +* +* Path 2t(N much larger than M, JOBU='N', JOBVT='O') +* M right singular vectors to be overwritten on A and +* no left singular vectors to be computed +* + IF( LWORK.GE.M*M+3*M ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+M*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is M by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = M + ELSE +* +* WORK(IU) is M by CHUNK and WORK(IR) is M by M +* + LDWRKU = M + CHUNK = ( LWORK-M*M ) / M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IR) and zero out above it +* + CALL ZLACPY( 'L', M, M, A, LDA, WORK( IR ), LDWRKR ) + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IR+LDWRKR ), LDWRKR ) +* +* Generate Q in A +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IR) +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL ZGEBRD( M, M, WORK( IR ), LDWRKR, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing L +* (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of L in WORK(IR) +* (CWorkspace: need M*M) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', M, M, 0, 0, S, RWORK( IE ), + $ WORK( IR ), LDWRKR, CDUM, 1, CDUM, 1, + $ RWORK( IRWORK ), INFO ) + IU = ITAUQ +* +* Multiply right singular vectors of L in WORK(IR) by Q +* in A, storing result in WORK(IU) and copying to A +* (CWorkspace: need M*M+M, prefer M*M+M*N) +* (RWorkspace: 0) +* + DO 30 I = 1, N, CHUNK + BLK = MIN( N-I+1, CHUNK ) + CALL ZGEMM( 'N', 'N', M, BLK, M, CONE, WORK( IR ), + $ LDWRKR, A( 1, I ), LDA, CZERO, + $ WORK( IU ), LDWRKU ) + CALL ZLACPY( 'F', M, BLK, WORK( IU ), LDWRKU, + $ A( 1, I ), LDA ) + 30 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize A +* (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) +* (RWorkspace: need M) +* + CALL ZGEBRD( M, N, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing A +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in A +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'L', M, N, 0, 0, S, RWORK( IE ), A, LDA, + $ CDUM, 1, CDUM, 1, RWORK( IRWORK ), INFO ) +* + END IF +* + ELSE IF( WNTVO .AND. WNTUAS ) THEN +* +* Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O') +* M right singular vectors to be overwritten on A and +* M left singular vectors to be computed in U +* + IF( LWORK.GE.M*M+3*M ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+M*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is M by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = M + ELSE +* +* WORK(IU) is M by CHUNK and WORK(IR) is M by M +* + LDWRKU = M + CHUNK = ( LWORK-M*M ) / M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing about above it +* + CALL ZLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, U( 1, 2 ), + $ LDU ) +* +* Generate Q in A +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U, copying result to WORK(IR) +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL ZGEBRD( M, M, U, LDU, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR ) +* +* Generate right vectors bidiagonalizing L in WORK(IR) +* (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing L in U +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in U, and computing right +* singular vectors of L in WORK(IR) +* (CWorkspace: need M*M) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', M, M, M, 0, S, RWORK( IE ), + $ WORK( IR ), LDWRKR, U, LDU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) + IU = ITAUQ +* +* Multiply right singular vectors of L in WORK(IR) by Q +* in A, storing result in WORK(IU) and copying to A +* (CWorkspace: need M*M+M, prefer M*M+M*N)) +* (RWorkspace: 0) +* + DO 40 I = 1, N, CHUNK + BLK = MIN( N-I+1, CHUNK ) + CALL ZGEMM( 'N', 'N', M, BLK, M, CONE, WORK( IR ), + $ LDWRKR, A( 1, I ), LDA, CZERO, + $ WORK( IU ), LDWRKU ) + CALL ZLACPY( 'F', M, BLK, WORK( IU ), LDWRKU, + $ A( 1, I ), LDA ) + 40 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing out above it +* + CALL ZLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, U( 1, 2 ), + $ LDU ) +* +* Generate Q in A +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL ZGEBRD( M, M, U, LDU, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right vectors bidiagonalizing L by Q in A +* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) +* (RWorkspace: 0) +* + CALL ZUNMBR( 'P', 'L', 'C', M, N, M, U, LDU, + $ WORK( ITAUP ), A, LDA, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing L in U +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in A +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), A, LDA, + $ U, LDU, CDUM, 1, RWORK( IRWORK ), INFO ) +* + END IF +* + ELSE IF( WNTVS ) THEN +* + IF( WNTUN ) THEN +* +* Path 4t(N much larger than M, JOBU='N', JOBVT='S') +* M right singular vectors to be computed in VT and +* no left singular vectors to be computed +* + IF( LWORK.GE.M*M+3*M ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IR) is LDA by M +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is M by M +* + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IR), zeroing out above it +* + CALL ZLACPY( 'L', M, M, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IR+LDWRKR ), LDWRKR ) +* +* Generate Q in A +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IR) +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL ZGEBRD( M, M, WORK( IR ), LDWRKR, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing L in +* WORK(IR) +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of L in WORK(IR) +* (CWorkspace: need M*M) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', M, M, 0, 0, S, RWORK( IE ), + $ WORK( IR ), LDWRKR, CDUM, 1, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IR) by +* Q in A, storing result in VT +* (CWorkspace: need M*M) +* (RWorkspace: 0) +* + CALL ZGEMM( 'N', 'N', M, N, M, CONE, WORK( IR ), + $ LDWRKR, A, LDA, CZERO, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy result to VT +* + CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGLQ( M, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, + $ A( 1, 2 ), LDA ) +* +* Bidiagonalize L in A +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL ZGEBRD( M, M, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right vectors bidiagonalizing L by Q in VT +* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) +* (RWorkspace: 0) +* + CALL ZUNMBR( 'P', 'L', 'C', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', M, N, 0, 0, S, RWORK( IE ), VT, + $ LDVT, CDUM, 1, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* + END IF +* + ELSE IF( WNTUO ) THEN +* +* Path 5t(N much larger than M, JOBU='O', JOBVT='S') +* M right singular vectors to be computed in VT and +* M left singular vectors to be overwritten on A +* + IF( LWORK.GE.2*M*M+3*M ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is LDA by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is M by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = M + ELSE +* +* WORK(IU) is M by M and WORK(IR) is M by M +* + LDWRKU = M + IR = IU + LDWRKU*M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out below it +* + CALL ZLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) +* +* Generate Q in A +* (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to +* WORK(IR) +* (CWorkspace: need 2*M*M+3*M, +* prefer 2*M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL ZGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'L', M, M, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (CWorkspace: need 2*M*M+3*M-1, +* prefer 2*M*M+2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in WORK(IR) +* (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in WORK(IR) and computing +* right singular vectors of L in WORK(IU) +* (CWorkspace: need 2*M*M) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', M, M, M, 0, S, RWORK( IE ), + $ WORK( IU ), LDWRKU, WORK( IR ), + $ LDWRKR, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in A, storing result in VT +* (CWorkspace: need M*M) +* (RWorkspace: 0) +* + CALL ZGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ), + $ LDWRKU, A, LDA, CZERO, VT, LDVT ) +* +* Copy left singular vectors of L to A +* (CWorkspace: need M*M) +* (RWorkspace: 0) +* + CALL ZLACPY( 'F', M, M, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGLQ( M, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, + $ A( 1, 2 ), LDA ) +* +* Bidiagonalize L in A +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL ZGEBRD( M, M, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right vectors bidiagonalizing L by Q in VT +* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) +* (RWorkspace: 0) +* + CALL ZUNMBR( 'P', 'L', 'C', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors of L in A +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in A and computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT, + $ LDVT, A, LDA, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* + END IF +* + ELSE IF( WNTUAS ) THEN +* +* Path 6t(N much larger than M, JOBU='S' or 'A', +* JOBVT='S') +* M right singular vectors to be computed in VT and +* M left singular vectors to be computed in U +* + IF( LWORK.GE.M*M+3*M ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IU) is LDA by N +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is LDA by M +* + LDWRKU = M + END IF + ITAU = IU + LDWRKU*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out above it +* + CALL ZLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) +* +* Generate Q in A +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to U +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL ZGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'L', M, M, WORK( IU ), LDWRKU, U, + $ LDU ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (CWorkspace: need M*M+3*M-1, +* prefer M*M+2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in U and computing right +* singular vectors of L in WORK(IU) +* (CWorkspace: need M*M) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', M, M, M, 0, S, RWORK( IE ), + $ WORK( IU ), LDWRKU, U, LDU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in A, storing result in VT +* (CWorkspace: need M*M) +* (RWorkspace: 0) +* + CALL ZGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ), + $ LDWRKU, A, LDA, CZERO, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGLQ( M, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing out above it +* + CALL ZLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, + $ U( 1, 2 ), LDU ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL ZGEBRD( M, M, U, LDU, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in U by Q +* in VT +* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) +* (RWorkspace: 0) +* + CALL ZUNMBR( 'P', 'L', 'C', M, N, M, U, LDU, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT, + $ LDVT, U, LDU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* + END IF +* + END IF +* + ELSE IF( WNTVA ) THEN +* + IF( WNTUN ) THEN +* +* Path 7t(N much larger than M, JOBU='N', JOBVT='A') +* N right singular vectors to be computed in VT and +* no left singular vectors to be computed +* + IF( LWORK.GE.M*M+MAX( N+M, 3*M ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IR) is LDA by M +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is M by M +* + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Copy L to WORK(IR), zeroing out above it +* + CALL ZLACPY( 'L', M, M, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IR+LDWRKR ), LDWRKR ) +* +* Generate Q in VT +* (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IR) +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL ZGEBRD( M, M, WORK( IR ), LDWRKR, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in WORK(IR) +* (CWorkspace: need M*M+3*M-1, +* prefer M*M+2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of L in WORK(IR) +* (CWorkspace: need M*M) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', M, M, 0, 0, S, RWORK( IE ), + $ WORK( IR ), LDWRKR, CDUM, 1, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IR) by +* Q in VT, storing result in A +* (CWorkspace: need M*M) +* (RWorkspace: 0) +* + CALL ZGEMM( 'N', 'N', M, N, M, CONE, WORK( IR ), + $ LDWRKR, VT, LDVT, CZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL ZLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need M+N, prefer M+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, + $ A( 1, 2 ), LDA ) +* +* Bidiagonalize L in A +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL ZGEBRD( M, M, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in A by Q +* in VT +* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) +* (RWorkspace: 0) +* + CALL ZUNMBR( 'P', 'L', 'C', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', M, N, 0, 0, S, RWORK( IE ), VT, + $ LDVT, CDUM, 1, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* + END IF +* + ELSE IF( WNTUO ) THEN +* +* Path 8t(N much larger than M, JOBU='O', JOBVT='A') +* N right singular vectors to be computed in VT and +* M left singular vectors to be overwritten on A +* + IF( LWORK.GE.2*M*M+MAX( N+M, 3*M ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is LDA by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is M by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = M + ELSE +* +* WORK(IU) is M by M and WORK(IR) is M by M +* + LDWRKU = M + IR = IU + LDWRKU*M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out above it +* + CALL ZLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to +* WORK(IR) +* (CWorkspace: need 2*M*M+3*M, +* prefer 2*M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL ZGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'L', M, M, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (CWorkspace: need 2*M*M+3*M-1, +* prefer 2*M*M+2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in WORK(IR) +* (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in WORK(IR) and computing +* right singular vectors of L in WORK(IU) +* (CWorkspace: need 2*M*M) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', M, M, M, 0, S, RWORK( IE ), + $ WORK( IU ), LDWRKU, WORK( IR ), + $ LDWRKR, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in VT, storing result in A +* (CWorkspace: need M*M) +* (RWorkspace: 0) +* + CALL ZGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ), + $ LDWRKU, VT, LDVT, CZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL ZLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* +* Copy left singular vectors of A from WORK(IR) to A +* + CALL ZLACPY( 'F', M, M, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need M+N, prefer M+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, + $ A( 1, 2 ), LDA ) +* +* Bidiagonalize L in A +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL ZGEBRD( M, M, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in A by Q +* in VT +* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) +* (RWorkspace: 0) +* + CALL ZUNMBR( 'P', 'L', 'C', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in A +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in A and computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT, + $ LDVT, A, LDA, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* + END IF +* + ELSE IF( WNTUAS ) THEN +* +* Path 9t(N much larger than M, JOBU='S' or 'A', +* JOBVT='A') +* N right singular vectors to be computed in VT and +* M left singular vectors to be computed in U +* + IF( LWORK.GE.M*M+MAX( N+M, 3*M ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IU) is LDA by M +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is M by M +* + LDWRKU = M + END IF + ITAU = IU + LDWRKU*M + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out above it +* + CALL ZLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to U +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL ZGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'L', M, M, WORK( IU ), LDWRKU, U, + $ LDU ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in U and computing right +* singular vectors of L in WORK(IU) +* (CWorkspace: need M*M) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', M, M, M, 0, S, RWORK( IE ), + $ WORK( IU ), LDWRKU, U, LDU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in VT, storing result in A +* (CWorkspace: need M*M) +* (RWorkspace: 0) +* + CALL ZGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ), + $ LDWRKU, VT, LDVT, CZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL ZLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need M+N, prefer M+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing out above it +* + CALL ZLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, + $ U( 1, 2 ), LDU ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL ZGEBRD( M, M, U, LDU, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in U by Q +* in VT +* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) +* (RWorkspace: 0) +* + CALL ZUNMBR( 'P', 'L', 'C', M, N, M, U, LDU, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT, + $ LDVT, U, LDU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* + END IF +* + END IF +* + END IF +* + ELSE +* +* N .LT. MNTHR +* +* Path 10t(N greater than M, but not much larger) +* Reduce to bidiagonal form without LQ decomposition +* + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize A +* (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) +* (RWorkspace: M) +* + CALL ZGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + IF( WNTUAS ) THEN +* +* If left singular vectors desired in U, copy result to U +* and generate left bidiagonalizing vectors in U +* (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL ZLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL ZUNGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVAS ) THEN +* +* If right singular vectors desired in VT, copy result to +* VT and generate right bidiagonalizing vectors in VT +* (CWorkspace: need 2*M+NRVT, prefer 2*M+NRVT*NB) +* (RWorkspace: 0) +* + CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT ) + IF( WNTVA ) + $ NRVT = N + IF( WNTVS ) + $ NRVT = M + CALL ZUNGBR( 'P', NRVT, N, M, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTUO ) THEN +* +* If left singular vectors desired in A, generate left +* bidiagonalizing vectors in A +* (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVO ) THEN +* +* If right singular vectors desired in A, generate right +* bidiagonalizing vectors in A +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IRWORK = IE + M + IF( WNTUAS .OR. WNTUO ) + $ NRU = M + IF( WNTUN ) + $ NRU = 0 + IF( WNTVAS .OR. WNTVO ) + $ NCVT = N + IF( WNTVN ) + $ NCVT = 0 + IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'L', M, NCVT, NRU, 0, S, RWORK( IE ), VT, + $ LDVT, U, LDU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) + ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in A +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'L', M, NCVT, NRU, 0, S, RWORK( IE ), A, + $ LDA, U, LDU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) + ELSE +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in A and computing right singular +* vectors in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'L', M, NCVT, NRU, 0, S, RWORK( IE ), VT, + $ LDVT, A, LDA, CDUM, 1, RWORK( IRWORK ), + $ INFO ) + END IF +* + END IF +* + END IF +* +* Undo scaling if necessary +* + IF( ISCL.EQ.1 ) THEN + IF( ANRM.GT.BIGNUM ) + $ CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, + $ IERR ) + IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM ) + $ CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1, + $ RWORK( IE ), MINMN, IERR ) + IF( ANRM.LT.SMLNUM ) + $ CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, + $ IERR ) + IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM ) + $ CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1, + $ RWORK( IE ), MINMN, IERR ) + END IF +* +* Return optimal workspace in WORK(1) +* + WORK( 1 ) = MAXWRK +* + RETURN +* +* End of ZGESVD +* + END diff --git a/costa/native/external/lapack/zgesvx.f b/costa/native/external/lapack/zgesvx.f new file mode 100644 index 000000000..0df63b3f3 --- /dev/null +++ b/costa/native/external/lapack/zgesvx.f @@ -0,0 +1,484 @@ + SUBROUTINE ZGESVX( FACT, TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, + $ EQUED, R, C, B, LDB, X, LDX, RCOND, FERR, BERR, + $ WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, TRANS + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION BERR( * ), C( * ), FERR( * ), R( * ), + $ RWORK( * ) + COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* ZGESVX uses the LU factorization to compute the solution to a complex +* system of linear equations +* A * X = B, +* where A is an N-by-N matrix and X and B are N-by-NRHS matrices. +* +* Error bounds on the solution and a condition estimate are also +* provided. +* +* Description +* =========== +* +* The following steps are performed: +* +* 1. If FACT = 'E', real scaling factors are computed to equilibrate +* the system: +* TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B +* TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B +* TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B +* Whether or not the system will be equilibrated depends on the +* scaling of the matrix A, but if equilibration is used, A is +* overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') +* or diag(C)*B (if TRANS = 'T' or 'C'). +* +* 2. If FACT = 'N' or 'E', the LU decomposition is used to factor the +* matrix A (after equilibration if FACT = 'E') as +* A = P * L * U, +* where P is a permutation matrix, L is a unit lower triangular +* matrix, and U is upper triangular. +* +* 3. If some U(i,i)=0, so that U is exactly singular, then the routine +* returns with INFO = i. Otherwise, the factored form of A is used +* to estimate the condition number of the matrix A. If the +* reciprocal of the condition number is less than machine precision, +* INFO = N+1 is returned as a warning, but the routine still goes on +* to solve for X and compute error bounds as described below. +* +* 4. The system of equations is solved for X using the factored form +* of A. +* +* 5. Iterative refinement is applied to improve the computed solution +* matrix and calculate error bounds and backward error estimates +* for it. +* +* 6. If equilibration was used, the matrix X is premultiplied by +* diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so +* that it solves the original system before equilibration. +* +* Arguments +* ========= +* +* FACT (input) CHARACTER*1 +* Specifies whether or not the factored form of the matrix A is +* supplied on entry, and if not, whether the matrix A should be +* equilibrated before it is factored. +* = 'F': On entry, AF and IPIV contain the factored form of A. +* If EQUED is not 'N', the matrix A has been +* equilibrated with scaling factors given by R and C. +* A, AF, and IPIV are not modified. +* = 'N': The matrix A will be copied to AF and factored. +* = 'E': The matrix A will be equilibrated if necessary, then +* copied to AF and factored. +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose) +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the N-by-N matrix A. If FACT = 'F' and EQUED is +* not 'N', then A must have been equilibrated by the scaling +* factors in R and/or C. A is not modified if FACT = 'F' or +* 'N', or if FACT = 'E' and EQUED = 'N' on exit. +* +* On exit, if EQUED .ne. 'N', A is scaled as follows: +* EQUED = 'R': A := diag(R) * A +* EQUED = 'C': A := A * diag(C) +* EQUED = 'B': A := diag(R) * A * diag(C). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* AF (input or output) COMPLEX*16 array, dimension (LDAF,N) +* If FACT = 'F', then AF is an input argument and on entry +* contains the factors L and U from the factorization +* A = P*L*U as computed by ZGETRF. If EQUED .ne. 'N', then +* AF is the factored form of the equilibrated matrix A. +* +* If FACT = 'N', then AF is an output argument and on exit +* returns the factors L and U from the factorization A = P*L*U +* of the original matrix A. +* +* If FACT = 'E', then AF is an output argument and on exit +* returns the factors L and U from the factorization A = P*L*U +* of the equilibrated matrix A (see the description of A for +* the form of the equilibrated matrix). +* +* LDAF (input) INTEGER +* The leading dimension of the array AF. LDAF >= max(1,N). +* +* IPIV (input or output) INTEGER array, dimension (N) +* If FACT = 'F', then IPIV is an input argument and on entry +* contains the pivot indices from the factorization A = P*L*U +* as computed by ZGETRF; row i of the matrix was interchanged +* with row IPIV(i). +* +* If FACT = 'N', then IPIV is an output argument and on exit +* contains the pivot indices from the factorization A = P*L*U +* of the original matrix A. +* +* If FACT = 'E', then IPIV is an output argument and on exit +* contains the pivot indices from the factorization A = P*L*U +* of the equilibrated matrix A. +* +* EQUED (input or output) CHARACTER*1 +* Specifies the form of equilibration that was done. +* = 'N': No equilibration (always true if FACT = 'N'). +* = 'R': Row equilibration, i.e., A has been premultiplied by +* diag(R). +* = 'C': Column equilibration, i.e., A has been postmultiplied +* by diag(C). +* = 'B': Both row and column equilibration, i.e., A has been +* replaced by diag(R) * A * diag(C). +* EQUED is an input argument if FACT = 'F'; otherwise, it is an +* output argument. +* +* R (input or output) DOUBLE PRECISION array, dimension (N) +* The row scale factors for A. If EQUED = 'R' or 'B', A is +* multiplied on the left by diag(R); if EQUED = 'N' or 'C', R +* is not accessed. R is an input argument if FACT = 'F'; +* otherwise, R is an output argument. If FACT = 'F' and +* EQUED = 'R' or 'B', each element of R must be positive. +* +* C (input or output) DOUBLE PRECISION array, dimension (N) +* The column scale factors for A. If EQUED = 'C' or 'B', A is +* multiplied on the right by diag(C); if EQUED = 'N' or 'R', C +* is not accessed. C is an input argument if FACT = 'F'; +* otherwise, C is an output argument. If FACT = 'F' and +* EQUED = 'C' or 'B', each element of C must be positive. +* +* B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, +* if EQUED = 'N', B is not modified; +* if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by +* diag(R)*B; +* if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is +* overwritten by diag(C)*B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (output) COMPLEX*16 array, dimension (LDX,NRHS) +* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X +* to the original system of equations. Note that A and B are +* modified on exit if EQUED .ne. 'N', and the solution to the +* equilibrated system is inv(diag(C))*X if TRANS = 'N' and +* EQUED = 'C' or 'B', or inv(diag(R))*X if TRANS = 'T' or 'C' +* and EQUED = 'R' or 'B'. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* RCOND (output) DOUBLE PRECISION +* The estimate of the reciprocal condition number of the matrix +* A after equilibration (if done). If RCOND is less than the +* machine precision (in particular, if RCOND = 0), the matrix +* is singular to working precision. This condition is +* indicated by a return code of INFO > 0. +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* RWORK (workspace/output) DOUBLE PRECISION array, dimension (2*N) +* On exit, RWORK(1) contains the reciprocal pivot growth +* factor norm(A)/norm(U). The "max absolute element" norm is +* used. If RWORK(1) is much less than 1, then the stability +* of the LU factorization of the (equilibrated) matrix A +* could be poor. This also means that the solution X, condition +* estimator RCOND, and forward error bound FERR could be +* unreliable. If factorization fails with 0 0: if INFO = i, and i is +* <= N: U(i,i) is exactly zero. The factorization has +* been completed, but the factor U is exactly +* singular, so the solution and error bounds +* could not be computed. RCOND = 0 is returned. +* = N+1: U is nonsingular, but RCOND is less than machine +* precision, meaning that the matrix is singular +* to working precision. Nevertheless, the +* solution and error bounds are computed because +* there are a number of situations where the +* computed solution can be more accurate than the +* value of RCOND would suggest. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL COLEQU, EQUIL, NOFACT, NOTRAN, ROWEQU + CHARACTER NORM + INTEGER I, INFEQU, J + DOUBLE PRECISION AMAX, ANORM, BIGNUM, COLCND, RCMAX, RCMIN, + $ ROWCND, RPVGRW, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANGE, ZLANTR + EXTERNAL LSAME, DLAMCH, ZLANGE, ZLANTR +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGECON, ZGEEQU, ZGERFS, ZGETRF, ZGETRS, + $ ZLACPY, ZLAQGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + NOTRAN = LSAME( TRANS, 'N' ) + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + ROWEQU = .FALSE. + COLEQU = .FALSE. + ELSE + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + SMLNUM = DLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + END IF +* +* Test the input parameters. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) ) + $ THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( ROWEQU .OR. COLEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -10 + ELSE + IF( ROWEQU ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 10 J = 1, N + RCMIN = MIN( RCMIN, R( J ) ) + RCMAX = MAX( RCMAX, R( J ) ) + 10 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -11 + ELSE IF( N.GT.0 ) THEN + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + ROWCND = ONE + END IF + END IF + IF( COLEQU .AND. INFO.EQ.0 ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 20 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 20 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -12 + ELSE IF( N.GT.0 ) THEN + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + COLCND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -16 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGESVX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL ZGEEQU( N, N, A, LDA, R, C, ROWCND, COLCND, AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL ZLAQGE( N, N, A, LDA, R, C, ROWCND, COLCND, AMAX, + $ EQUED ) + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + END IF + END IF +* +* Scale the right hand side. +* + IF( NOTRAN ) THEN + IF( ROWEQU ) THEN + DO 40 J = 1, NRHS + DO 30 I = 1, N + B( I, J ) = R( I )*B( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( COLEQU ) THEN + DO 60 J = 1, NRHS + DO 50 I = 1, N + B( I, J ) = C( I )*B( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the LU factorization of A. +* + CALL ZLACPY( 'Full', N, N, A, LDA, AF, LDAF ) + CALL ZGETRF( N, N, AF, LDAF, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN + IF( INFO.GT.0 ) THEN +* +* Compute the reciprocal pivot growth factor of the +* leading rank-deficient INFO columns of A. +* + RPVGRW = ZLANTR( 'M', 'U', 'N', INFO, INFO, AF, LDAF, + $ RWORK ) + IF( RPVGRW.EQ.ZERO ) THEN + RPVGRW = ONE + ELSE + RPVGRW = ZLANGE( 'M', N, INFO, A, LDA, RWORK ) / + $ RPVGRW + END IF + RWORK( 1 ) = RPVGRW + RCOND = ZERO + END IF + RETURN + END IF + END IF +* +* Compute the norm of the matrix A and the +* reciprocal pivot growth factor RPVGRW. +* + IF( NOTRAN ) THEN + NORM = '1' + ELSE + NORM = 'I' + END IF + ANORM = ZLANGE( NORM, N, N, A, LDA, RWORK ) + RPVGRW = ZLANTR( 'M', 'U', 'N', N, N, AF, LDAF, RWORK ) + IF( RPVGRW.EQ.ZERO ) THEN + RPVGRW = ONE + ELSE + RPVGRW = ZLANGE( 'M', N, N, A, LDA, RWORK ) / RPVGRW + END IF +* +* Compute the reciprocal of the condition number of A. +* + CALL ZGECON( NORM, N, AF, LDAF, ANORM, RCOND, WORK, RWORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.DLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* +* Compute the solution matrix X. +* + CALL ZLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL ZGETRS( TRANS, N, NRHS, AF, LDAF, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL ZGERFS( TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, X, + $ LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* Transform the solution matrix X to a solution of the original +* system. +* + IF( NOTRAN ) THEN + IF( COLEQU ) THEN + DO 80 J = 1, NRHS + DO 70 I = 1, N + X( I, J ) = C( I )*X( I, J ) + 70 CONTINUE + 80 CONTINUE + DO 90 J = 1, NRHS + FERR( J ) = FERR( J ) / COLCND + 90 CONTINUE + END IF + ELSE IF( ROWEQU ) THEN + DO 110 J = 1, NRHS + DO 100 I = 1, N + X( I, J ) = R( I )*X( I, J ) + 100 CONTINUE + 110 CONTINUE + DO 120 J = 1, NRHS + FERR( J ) = FERR( J ) / ROWCND + 120 CONTINUE + END IF +* + RWORK( 1 ) = RPVGRW + RETURN +* +* End of ZGESVX +* + END diff --git a/costa/native/external/lapack/zgetc2.f b/costa/native/external/lapack/zgetc2.f new file mode 100644 index 000000000..645d8a1a0 --- /dev/null +++ b/costa/native/external/lapack/zgetc2.f @@ -0,0 +1,146 @@ + SUBROUTINE ZGETC2( N, A, LDA, IPIV, JPIV, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), JPIV( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* ZGETC2 computes an LU factorization, using complete pivoting, of the +* n-by-n matrix A. The factorization has the form A = P * L * U * Q, +* where P and Q are permutation matrices, L is lower triangular with +* unit diagonal elements and U is upper triangular. +* +* This is a level 1 BLAS version of the algorithm. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA, N) +* On entry, the n-by-n matrix to be factored. +* On exit, the factors L and U from the factorization +* A = P*L*U*Q; the unit diagonal elements of L are not stored. +* If U(k, k) appears to be less than SMIN, U(k, k) is given the +* value of SMIN, giving a nonsingular perturbed system. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1, N). +* +* IPIV (output) INTEGER array, dimension (N). +* The pivot indices; for 1 <= i <= N, row i of the +* matrix has been interchanged with row IPIV(i). +* +* JPIV (output) INTEGER array, dimension (N). +* The pivot indices; for 1 <= j <= N, column j of the +* matrix has been interchanged with column JPIV(j). +* +* INFO (output) INTEGER +* = 0: successful exit +* > 0: if INFO = k, U(k, k) is likely to produce overflow if +* one tries to solve for x in Ax = b. So U is perturbed +* to avoid the overflow. +* +* Further Details +* =============== +* +* Based on contributions by +* Bo Kagstrom and Peter Poromaa, Department of Computing Science, +* Umea University, S-901 87 Umea, Sweden. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IP, IPV, J, JP, JPV + DOUBLE PRECISION BIGNUM, EPS, SMIN, SMLNUM, XMAX +* .. +* .. External Subroutines .. + EXTERNAL ZGERU, ZSWAP +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DCMPLX, MAX +* .. +* .. Executable Statements .. +* +* Set constants to control overflow +* + INFO = 0 + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) / EPS + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Factorize A using complete pivoting. +* Set pivots less than SMIN to SMIN +* + DO 40 I = 1, N - 1 +* +* Find max element in matrix A +* + XMAX = ZERO + DO 20 IP = I, N + DO 10 JP = I, N + IF( ABS( A( IP, JP ) ).GE.XMAX ) THEN + XMAX = ABS( A( IP, JP ) ) + IPV = IP + JPV = JP + END IF + 10 CONTINUE + 20 CONTINUE + IF( I.EQ.1 ) + $ SMIN = MAX( EPS*XMAX, SMLNUM ) +* +* Swap rows +* + IF( IPV.NE.I ) + $ CALL ZSWAP( N, A( IPV, 1 ), LDA, A( I, 1 ), LDA ) + IPIV( I ) = IPV +* +* Swap columns +* + IF( JPV.NE.I ) + $ CALL ZSWAP( N, A( 1, JPV ), 1, A( 1, I ), 1 ) + JPIV( I ) = JPV +* +* Check for singularity +* + IF( ABS( A( I, I ) ).LT.SMIN ) THEN + INFO = I + A( I, I ) = DCMPLX( SMIN, ZERO ) + END IF + DO 30 J = I + 1, N + A( J, I ) = A( J, I ) / A( I, I ) + 30 CONTINUE + CALL ZGERU( N-I, N-I, -DCMPLX( ONE ), A( I+1, I ), 1, + $ A( I, I+1 ), LDA, A( I+1, I+1 ), LDA ) + 40 CONTINUE +* + IF( ABS( A( N, N ) ).LT.SMIN ) THEN + INFO = N + A( N, N ) = DCMPLX( SMIN, ZERO ) + END IF + RETURN +* +* End of ZGETC2 +* + END diff --git a/costa/native/external/lapack/zgetf2.f b/costa/native/external/lapack/zgetf2.f new file mode 100644 index 000000000..4cbdb446b --- /dev/null +++ b/costa/native/external/lapack/zgetf2.f @@ -0,0 +1,136 @@ + SUBROUTINE ZGETF2( M, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* ZGETF2 computes an LU factorization of a general m-by-n matrix A +* using partial pivoting with row interchanges. +* +* The factorization has the form +* A = P * L * U +* where P is a permutation matrix, L is lower triangular with unit +* diagonal elements (lower trapezoidal if m > n), and U is upper +* triangular (upper trapezoidal if m < n). +* +* This is the right-looking Level 2 BLAS version of the algorithm. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the m by n matrix to be factored. +* On exit, the factors L and U from the factorization +* A = P*L*U; the unit diagonal elements of L are not stored. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* IPIV (output) INTEGER array, dimension (min(M,N)) +* The pivot indices; for 1 <= i <= min(M,N), row i of the +* matrix was interchanged with row IPIV(i). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* > 0: if INFO = k, U(k,k) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, and division by zero will occur if it is used +* to solve a system of equations. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER J, JP +* .. +* .. External Functions .. + INTEGER IZAMAX + EXTERNAL IZAMAX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGERU, ZSCAL, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGETF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* + DO 10 J = 1, MIN( M, N ) +* +* Find pivot and test for singularity. +* + JP = J - 1 + IZAMAX( M-J+1, A( J, J ), 1 ) + IPIV( J ) = JP + IF( A( JP, J ).NE.ZERO ) THEN +* +* Apply the interchange to columns 1:N. +* + IF( JP.NE.J ) + $ CALL ZSWAP( N, A( J, 1 ), LDA, A( JP, 1 ), LDA ) +* +* Compute elements J+1:M of J-th column. +* + IF( J.LT.M ) + $ CALL ZSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 ) +* + ELSE IF( INFO.EQ.0 ) THEN +* + INFO = J + END IF +* + IF( J.LT.MIN( M, N ) ) THEN +* +* Update trailing submatrix. +* + CALL ZGERU( M-J, N-J, -ONE, A( J+1, J ), 1, A( J, J+1 ), + $ LDA, A( J+1, J+1 ), LDA ) + END IF + 10 CONTINUE + RETURN +* +* End of ZGETF2 +* + END diff --git a/costa/native/external/lapack/zgetrf.f b/costa/native/external/lapack/zgetrf.f new file mode 100644 index 000000000..6f48c8d56 --- /dev/null +++ b/costa/native/external/lapack/zgetrf.f @@ -0,0 +1,160 @@ + SUBROUTINE ZGETRF( M, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* ZGETRF computes an LU factorization of a general M-by-N matrix A +* using partial pivoting with row interchanges. +* +* The factorization has the form +* A = P * L * U +* where P is a permutation matrix, L is lower triangular with unit +* diagonal elements (lower trapezoidal if m > n), and U is upper +* triangular (upper trapezoidal if m < n). +* +* This is the right-looking Level 3 BLAS version of the algorithm. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the M-by-N matrix to be factored. +* On exit, the factors L and U from the factorization +* A = P*L*U; the unit diagonal elements of L are not stored. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* IPIV (output) INTEGER array, dimension (min(M,N)) +* The pivot indices; for 1 <= i <= min(M,N), row i of the +* matrix was interchanged with row IPIV(i). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, U(i,i) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, and division by zero will occur if it is used +* to solve a system of equations. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, IINFO, J, JB, NB +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEMM, ZGETF2, ZLASWP, ZTRSM +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGETRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'ZGETRF', ' ', M, N, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN +* +* Use unblocked code. +* + CALL ZGETF2( M, N, A, LDA, IPIV, INFO ) + ELSE +* +* Use blocked code. +* + DO 20 J = 1, MIN( M, N ), NB + JB = MIN( MIN( M, N )-J+1, NB ) +* +* Factor diagonal and subdiagonal blocks and test for exact +* singularity. +* + CALL ZGETF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO ) +* +* Adjust INFO and the pivot indices. +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + J - 1 + DO 10 I = J, MIN( M, J+JB-1 ) + IPIV( I ) = J - 1 + IPIV( I ) + 10 CONTINUE +* +* Apply interchanges to columns 1:J-1. +* + CALL ZLASWP( J-1, A, LDA, J, J+JB-1, IPIV, 1 ) +* + IF( J+JB.LE.N ) THEN +* +* Apply interchanges to columns J+JB:N. +* + CALL ZLASWP( N-J-JB+1, A( 1, J+JB ), LDA, J, J+JB-1, + $ IPIV, 1 ) +* +* Compute block row of U. +* + CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit', JB, + $ N-J-JB+1, ONE, A( J, J ), LDA, A( J, J+JB ), + $ LDA ) + IF( J+JB.LE.M ) THEN +* +* Update trailing submatrix. +* + CALL ZGEMM( 'No transpose', 'No transpose', M-J-JB+1, + $ N-J-JB+1, JB, -ONE, A( J+JB, J ), LDA, + $ A( J, J+JB ), LDA, ONE, A( J+JB, J+JB ), + $ LDA ) + END IF + END IF + 20 CONTINUE + END IF + RETURN +* +* End of ZGETRF +* + END diff --git a/costa/native/external/lapack/zgetri.f b/costa/native/external/lapack/zgetri.f new file mode 100644 index 000000000..1eb4eb7f1 --- /dev/null +++ b/costa/native/external/lapack/zgetri.f @@ -0,0 +1,194 @@ + SUBROUTINE ZGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZGETRI computes the inverse of a matrix using the LU factorization +* computed by ZGETRF. +* +* This method inverts U and then computes inv(A) by solving the system +* inv(A)*L = inv(U) for inv(A). +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the factors L and U from the factorization +* A = P*L*U as computed by ZGETRF. +* On exit, if INFO = 0, the inverse of the original matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices from ZGETRF; for 1<=i<=N, row i of the +* matrix was interchanged with row IPIV(i). +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO=0, then WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N). +* For optimal performance LWORK >= N*NB, where NB is +* the optimal blocksize returned by ILAENV. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, U(i,i) is exactly zero; the matrix is +* singular and its inverse could not be computed. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), + $ ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IWS, J, JB, JJ, JP, LDWORK, LWKOPT, NB, + $ NBMIN, NN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEMM, ZGEMV, ZSWAP, ZTRSM, ZTRTRI +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NB = ILAENV( 1, 'ZGETRI', ' ', N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -3 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGETRI', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form inv(U). If INFO > 0 from ZTRTRI, then U is singular, +* and the inverse is not computed. +* + CALL ZTRTRI( 'Upper', 'Non-unit', N, A, LDA, INFO ) + IF( INFO.GT.0 ) + $ RETURN +* + NBMIN = 2 + LDWORK = N + IF( NB.GT.1 .AND. NB.LT.N ) THEN + IWS = MAX( LDWORK*NB, 1 ) + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'ZGETRI', ' ', N, -1, -1, -1 ) ) + END IF + ELSE + IWS = N + END IF +* +* Solve the equation inv(A)*L = inv(U) for inv(A). +* + IF( NB.LT.NBMIN .OR. NB.GE.N ) THEN +* +* Use unblocked code. +* + DO 20 J = N, 1, -1 +* +* Copy current column of L to WORK and replace with zeros. +* + DO 10 I = J + 1, N + WORK( I ) = A( I, J ) + A( I, J ) = ZERO + 10 CONTINUE +* +* Compute current column of inv(A). +* + IF( J.LT.N ) + $ CALL ZGEMV( 'No transpose', N, N-J, -ONE, A( 1, J+1 ), + $ LDA, WORK( J+1 ), 1, ONE, A( 1, J ), 1 ) + 20 CONTINUE + ELSE +* +* Use blocked code. +* + NN = ( ( N-1 ) / NB )*NB + 1 + DO 50 J = NN, 1, -NB + JB = MIN( NB, N-J+1 ) +* +* Copy current block column of L to WORK and replace with +* zeros. +* + DO 40 JJ = J, J + JB - 1 + DO 30 I = JJ + 1, N + WORK( I+( JJ-J )*LDWORK ) = A( I, JJ ) + A( I, JJ ) = ZERO + 30 CONTINUE + 40 CONTINUE +* +* Compute current block column of inv(A). +* + IF( J+JB.LE.N ) + $ CALL ZGEMM( 'No transpose', 'No transpose', N, JB, + $ N-J-JB+1, -ONE, A( 1, J+JB ), LDA, + $ WORK( J+JB ), LDWORK, ONE, A( 1, J ), LDA ) + CALL ZTRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB, + $ ONE, WORK( J ), LDWORK, A( 1, J ), LDA ) + 50 CONTINUE + END IF +* +* Apply column interchanges. +* + DO 60 J = N - 1, 1, -1 + JP = IPIV( J ) + IF( JP.NE.J ) + $ CALL ZSWAP( N, A( 1, J ), 1, A( 1, JP ), 1 ) + 60 CONTINUE +* + WORK( 1 ) = IWS + RETURN +* +* End of ZGETRI +* + END diff --git a/costa/native/external/lapack/zgetrs.f b/costa/native/external/lapack/zgetrs.f new file mode 100644 index 000000000..508484618 --- /dev/null +++ b/costa/native/external/lapack/zgetrs.f @@ -0,0 +1,150 @@ + SUBROUTINE ZGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* ZGETRS solves a system of linear equations +* A * X = B, A**T * X = B, or A**H * X = B +* with a general N-by-N matrix A using the LU factorization computed +* by ZGETRF. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose) +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* A (input) COMPLEX*16 array, dimension (LDA,N) +* The factors L and U from the factorization A = P*L*U +* as computed by ZGETRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices from ZGETRF; for 1<=i<=N, row i of the +* matrix was interchanged with row IPIV(i). +* +* B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLASWP, ZTRSM +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGETRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( NOTRAN ) THEN +* +* Solve A * X = B. +* +* Apply row interchanges to the right hand sides. +* + CALL ZLASWP( NRHS, B, LDB, 1, N, IPIV, 1 ) +* +* Solve L*X = B, overwriting B with X. +* + CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit', N, NRHS, + $ ONE, A, LDA, B, LDB ) +* +* Solve U*X = B, overwriting B with X. +* + CALL ZTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N, + $ NRHS, ONE, A, LDA, B, LDB ) + ELSE +* +* Solve A**T * X = B or A**H * X = B. +* +* Solve U'*X = B, overwriting B with X. +* + CALL ZTRSM( 'Left', 'Upper', TRANS, 'Non-unit', N, NRHS, ONE, + $ A, LDA, B, LDB ) +* +* Solve L'*X = B, overwriting B with X. +* + CALL ZTRSM( 'Left', 'Lower', TRANS, 'Unit', N, NRHS, ONE, A, + $ LDA, B, LDB ) +* +* Apply row interchanges to the solution vectors. +* + CALL ZLASWP( NRHS, B, LDB, 1, N, IPIV, -1 ) + END IF +* + RETURN +* +* End of ZGETRS +* + END diff --git a/costa/native/external/lapack/zggbak.f b/costa/native/external/lapack/zggbak.f new file mode 100644 index 000000000..4e5e5dca4 --- /dev/null +++ b/costa/native/external/lapack/zggbak.f @@ -0,0 +1,216 @@ + SUBROUTINE ZGGBAK( JOB, SIDE, N, ILO, IHI, LSCALE, RSCALE, M, V, + $ LDV, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER JOB, SIDE + INTEGER IHI, ILO, INFO, LDV, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION LSCALE( * ), RSCALE( * ) + COMPLEX*16 V( LDV, * ) +* .. +* +* Purpose +* ======= +* +* ZGGBAK forms the right or left eigenvectors of a complex generalized +* eigenvalue problem A*x = lambda*B*x, by backward transformation on +* the computed eigenvectors of the balanced pair of matrices output by +* ZGGBAL. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* Specifies the type of backward transformation required: +* = 'N': do nothing, return immediately; +* = 'P': do backward transformation for permutation only; +* = 'S': do backward transformation for scaling only; +* = 'B': do backward transformations for both permutation and +* scaling. +* JOB must be the same as the argument JOB supplied to ZGGBAL. +* +* SIDE (input) CHARACTER*1 +* = 'R': V contains right eigenvectors; +* = 'L': V contains left eigenvectors. +* +* N (input) INTEGER +* The number of rows of the matrix V. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* The integers ILO and IHI determined by ZGGBAL. +* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +* +* LSCALE (input) DOUBLE PRECISION array, dimension (N) +* Details of the permutations and/or scaling factors applied +* to the left side of A and B, as returned by ZGGBAL. +* +* RSCALE (input) DOUBLE PRECISION array, dimension (N) +* Details of the permutations and/or scaling factors applied +* to the right side of A and B, as returned by ZGGBAL. +* +* M (input) INTEGER +* The number of columns of the matrix V. M >= 0. +* +* V (input/output) COMPLEX*16 array, dimension (LDV,M) +* On entry, the matrix of right or left eigenvectors to be +* transformed, as returned by ZTGEVC. +* On exit, V is overwritten by the transformed eigenvectors. +* +* LDV (input) INTEGER +* The leading dimension of the matrix V. LDV >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* See R.C. Ward, Balancing the generalized eigenvalue problem, +* SIAM J. Sci. Stat. Comp. 2 (1981), 141-152. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LEFTV, RIGHTV + INTEGER I, K +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDSCAL, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + RIGHTV = LSAME( SIDE, 'R' ) + LEFTV = LSAME( SIDE, 'L' ) +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND. + $ .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN + INFO = -1 + ELSE IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 ) THEN + INFO = -4 + ELSE IF( IHI.LT.ILO .OR. IHI.GT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( M.LT.0 ) THEN + INFO = -6 + ELSE IF( LDV.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGGBAK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + IF( M.EQ.0 ) + $ RETURN + IF( LSAME( JOB, 'N' ) ) + $ RETURN +* + IF( ILO.EQ.IHI ) + $ GO TO 30 +* +* Backward balance +* + IF( LSAME( JOB, 'S' ) .OR. LSAME( JOB, 'B' ) ) THEN +* +* Backward transformation on right eigenvectors +* + IF( RIGHTV ) THEN + DO 10 I = ILO, IHI + CALL ZDSCAL( M, RSCALE( I ), V( I, 1 ), LDV ) + 10 CONTINUE + END IF +* +* Backward transformation on left eigenvectors +* + IF( LEFTV ) THEN + DO 20 I = ILO, IHI + CALL ZDSCAL( M, LSCALE( I ), V( I, 1 ), LDV ) + 20 CONTINUE + END IF + END IF +* +* Backward permutation +* + 30 CONTINUE + IF( LSAME( JOB, 'P' ) .OR. LSAME( JOB, 'B' ) ) THEN +* +* Backward permutation on right eigenvectors +* + IF( RIGHTV ) THEN + IF( ILO.EQ.1 ) + $ GO TO 50 + DO 40 I = ILO - 1, 1, -1 + K = RSCALE( I ) + IF( K.EQ.I ) + $ GO TO 40 + CALL ZSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 40 CONTINUE +* + 50 CONTINUE + IF( IHI.EQ.N ) + $ GO TO 70 + DO 60 I = IHI + 1, N + K = RSCALE( I ) + IF( K.EQ.I ) + $ GO TO 60 + CALL ZSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 60 CONTINUE + END IF +* +* Backward permutation on left eigenvectors +* + 70 CONTINUE + IF( LEFTV ) THEN + IF( ILO.EQ.1 ) + $ GO TO 90 + DO 80 I = ILO - 1, 1, -1 + K = LSCALE( I ) + IF( K.EQ.I ) + $ GO TO 80 + CALL ZSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 80 CONTINUE +* + 90 CONTINUE + IF( IHI.EQ.N ) + $ GO TO 110 + DO 100 I = IHI + 1, N + K = LSCALE( I ) + IF( K.EQ.I ) + $ GO TO 100 + CALL ZSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 100 CONTINUE + END IF + END IF +* + 110 CONTINUE +* + RETURN +* +* End of ZGGBAK +* + END diff --git a/costa/native/external/lapack/zggbal.f b/costa/native/external/lapack/zggbal.f new file mode 100644 index 000000000..f557e4425 --- /dev/null +++ b/costa/native/external/lapack/zggbal.f @@ -0,0 +1,474 @@ + SUBROUTINE ZGGBAL( JOB, N, A, LDA, B, LDB, ILO, IHI, LSCALE, + $ RSCALE, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER JOB + INTEGER IHI, ILO, INFO, LDA, LDB, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION LSCALE( * ), RSCALE( * ), WORK( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* ZGGBAL balances a pair of general complex matrices (A,B). This +* involves, first, permuting A and B by similarity transformations to +* isolate eigenvalues in the first 1 to ILO$-$1 and last IHI+1 to N +* elements on the diagonal; and second, applying a diagonal similarity +* transformation to rows and columns ILO to IHI to make the rows +* and columns as close in norm as possible. Both steps are optional. +* +* Balancing may reduce the 1-norm of the matrices, and improve the +* accuracy of the computed eigenvalues and/or eigenvectors in the +* generalized eigenvalue problem A*x = lambda*B*x. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* Specifies the operations to be performed on A and B: +* = 'N': none: simply set ILO = 1, IHI = N, LSCALE(I) = 1.0 +* and RSCALE(I) = 1.0 for i=1,...,N; +* = 'P': permute only; +* = 'S': scale only; +* = 'B': both permute and scale. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the input matrix A. +* On exit, A is overwritten by the balanced matrix. +* If JOB = 'N', A is not referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) COMPLEX*16 array, dimension (LDB,N) +* On entry, the input matrix B. +* On exit, B is overwritten by the balanced matrix. +* If JOB = 'N', B is not referenced. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* ILO (output) INTEGER +* IHI (output) INTEGER +* ILO and IHI are set to integers such that on exit +* A(i,j) = 0 and B(i,j) = 0 if i > j and +* j = 1,...,ILO-1 or i = IHI+1,...,N. +* If JOB = 'N' or 'S', ILO = 1 and IHI = N. +* +* LSCALE (output) DOUBLE PRECISION array, dimension (N) +* Details of the permutations and scaling factors applied +* to the left side of A and B. If P(j) is the index of the +* row interchanged with row j, and D(j) is the scaling factor +* applied to row j, then +* LSCALE(j) = P(j) for J = 1,...,ILO-1 +* = D(j) for J = ILO,...,IHI +* = P(j) for J = IHI+1,...,N. +* The order in which the interchanges are made is N to IHI+1, +* then 1 to ILO-1. +* +* RSCALE (output) DOUBLE PRECISION array, dimension (N) +* Details of the permutations and scaling factors applied +* to the right side of A and B. If P(j) is the index of the +* column interchanged with column j, and D(j) is the scaling +* factor applied to column j, then +* RSCALE(j) = P(j) for J = 1,...,ILO-1 +* = D(j) for J = ILO,...,IHI +* = P(j) for J = IHI+1,...,N. +* The order in which the interchanges are made is N to IHI+1, +* then 1 to ILO-1. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (6*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* See R.C. WARD, Balancing the generalized eigenvalue problem, +* SIAM J. Sci. Stat. Comp. 2 (1981), 141-152. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0D+0, HALF = 0.5D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION THREE, SCLFAC + PARAMETER ( THREE = 3.0D+0, SCLFAC = 1.0D+1 ) + COMPLEX*16 CZERO + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, ICAB, IFLOW, IP1, IR, IRAB, IT, J, JC, JP1, + $ K, KOUNT, L, LCAB, LM1, LRAB, LSFMAX, LSFMIN, + $ M, NR, NRP2 + DOUBLE PRECISION ALPHA, BASL, BETA, CAB, CMAX, COEF, COEF2, + $ COEF5, COR, EW, EWC, GAMMA, PGAMMA, RAB, SFMAX, + $ SFMIN, SUM, T, TA, TB, TC + COMPLEX*16 CDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX + DOUBLE PRECISION DDOT, DLAMCH + EXTERNAL LSAME, IZAMAX, DDOT, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DSCAL, XERBLA, ZDSCAL, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, INT, LOG10, MAX, MIN, SIGN +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND. + $ .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGGBAL', -INFO ) + RETURN + END IF +* + K = 1 + L = N +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( LSAME( JOB, 'N' ) ) THEN + ILO = 1 + IHI = N + DO 10 I = 1, N + LSCALE( I ) = ONE + RSCALE( I ) = ONE + 10 CONTINUE + RETURN + END IF +* + IF( K.EQ.L ) THEN + ILO = 1 + IHI = 1 + LSCALE( 1 ) = ONE + RSCALE( 1 ) = ONE + RETURN + END IF +* + IF( LSAME( JOB, 'S' ) ) + $ GO TO 190 +* + GO TO 30 +* +* Permute the matrices A and B to isolate the eigenvalues. +* +* Find row with one nonzero in columns 1 through L +* + 20 CONTINUE + L = LM1 + IF( L.NE.1 ) + $ GO TO 30 +* + RSCALE( 1 ) = 1 + LSCALE( 1 ) = 1 + GO TO 190 +* + 30 CONTINUE + LM1 = L - 1 + DO 80 I = L, 1, -1 + DO 40 J = 1, LM1 + JP1 = J + 1 + IF( A( I, J ).NE.CZERO .OR. B( I, J ).NE.CZERO ) + $ GO TO 50 + 40 CONTINUE + J = L + GO TO 70 +* + 50 CONTINUE + DO 60 J = JP1, L + IF( A( I, J ).NE.CZERO .OR. B( I, J ).NE.CZERO ) + $ GO TO 80 + 60 CONTINUE + J = JP1 - 1 +* + 70 CONTINUE + M = L + IFLOW = 1 + GO TO 160 + 80 CONTINUE + GO TO 100 +* +* Find column with one nonzero in rows K through N +* + 90 CONTINUE + K = K + 1 +* + 100 CONTINUE + DO 150 J = K, L + DO 110 I = K, LM1 + IP1 = I + 1 + IF( A( I, J ).NE.CZERO .OR. B( I, J ).NE.CZERO ) + $ GO TO 120 + 110 CONTINUE + I = L + GO TO 140 + 120 CONTINUE + DO 130 I = IP1, L + IF( A( I, J ).NE.CZERO .OR. B( I, J ).NE.CZERO ) + $ GO TO 150 + 130 CONTINUE + I = IP1 - 1 + 140 CONTINUE + M = K + IFLOW = 2 + GO TO 160 + 150 CONTINUE + GO TO 190 +* +* Permute rows M and I +* + 160 CONTINUE + LSCALE( M ) = I + IF( I.EQ.M ) + $ GO TO 170 + CALL ZSWAP( N-K+1, A( I, K ), LDA, A( M, K ), LDA ) + CALL ZSWAP( N-K+1, B( I, K ), LDB, B( M, K ), LDB ) +* +* Permute columns M and J +* + 170 CONTINUE + RSCALE( M ) = J + IF( J.EQ.M ) + $ GO TO 180 + CALL ZSWAP( L, A( 1, J ), 1, A( 1, M ), 1 ) + CALL ZSWAP( L, B( 1, J ), 1, B( 1, M ), 1 ) +* + 180 CONTINUE + GO TO ( 20, 90 )IFLOW +* + 190 CONTINUE + ILO = K + IHI = L +* + IF( ILO.EQ.IHI ) + $ RETURN +* + IF( LSAME( JOB, 'P' ) ) + $ RETURN +* +* Balance the submatrix in rows ILO to IHI. +* + NR = IHI - ILO + 1 + DO 200 I = ILO, IHI + RSCALE( I ) = ZERO + LSCALE( I ) = ZERO +* + WORK( I ) = ZERO + WORK( I+N ) = ZERO + WORK( I+2*N ) = ZERO + WORK( I+3*N ) = ZERO + WORK( I+4*N ) = ZERO + WORK( I+5*N ) = ZERO + 200 CONTINUE +* +* Compute right side vector in resulting linear equations +* + BASL = LOG10( SCLFAC ) + DO 240 I = ILO, IHI + DO 230 J = ILO, IHI + IF( A( I, J ).EQ.CZERO ) THEN + TA = ZERO + GO TO 210 + END IF + TA = LOG10( CABS1( A( I, J ) ) ) / BASL +* + 210 CONTINUE + IF( B( I, J ).EQ.CZERO ) THEN + TB = ZERO + GO TO 220 + END IF + TB = LOG10( CABS1( B( I, J ) ) ) / BASL +* + 220 CONTINUE + WORK( I+4*N ) = WORK( I+4*N ) - TA - TB + WORK( J+5*N ) = WORK( J+5*N ) - TA - TB + 230 CONTINUE + 240 CONTINUE +* + COEF = ONE / DBLE( 2*NR ) + COEF2 = COEF*COEF + COEF5 = HALF*COEF2 + NRP2 = NR + 2 + BETA = ZERO + IT = 1 +* +* Start generalized conjugate gradient iteration +* + 250 CONTINUE +* + GAMMA = DDOT( NR, WORK( ILO+4*N ), 1, WORK( ILO+4*N ), 1 ) + + $ DDOT( NR, WORK( ILO+5*N ), 1, WORK( ILO+5*N ), 1 ) +* + EW = ZERO + EWC = ZERO + DO 260 I = ILO, IHI + EW = EW + WORK( I+4*N ) + EWC = EWC + WORK( I+5*N ) + 260 CONTINUE +* + GAMMA = COEF*GAMMA - COEF2*( EW**2+EWC**2 ) - COEF5*( EW-EWC )**2 + IF( GAMMA.EQ.ZERO ) + $ GO TO 350 + IF( IT.NE.1 ) + $ BETA = GAMMA / PGAMMA + T = COEF5*( EWC-THREE*EW ) + TC = COEF5*( EW-THREE*EWC ) +* + CALL DSCAL( NR, BETA, WORK( ILO ), 1 ) + CALL DSCAL( NR, BETA, WORK( ILO+N ), 1 ) +* + CALL DAXPY( NR, COEF, WORK( ILO+4*N ), 1, WORK( ILO+N ), 1 ) + CALL DAXPY( NR, COEF, WORK( ILO+5*N ), 1, WORK( ILO ), 1 ) +* + DO 270 I = ILO, IHI + WORK( I ) = WORK( I ) + TC + WORK( I+N ) = WORK( I+N ) + T + 270 CONTINUE +* +* Apply matrix to vector +* + DO 300 I = ILO, IHI + KOUNT = 0 + SUM = ZERO + DO 290 J = ILO, IHI + IF( A( I, J ).EQ.CZERO ) + $ GO TO 280 + KOUNT = KOUNT + 1 + SUM = SUM + WORK( J ) + 280 CONTINUE + IF( B( I, J ).EQ.CZERO ) + $ GO TO 290 + KOUNT = KOUNT + 1 + SUM = SUM + WORK( J ) + 290 CONTINUE + WORK( I+2*N ) = DBLE( KOUNT )*WORK( I+N ) + SUM + 300 CONTINUE +* + DO 330 J = ILO, IHI + KOUNT = 0 + SUM = ZERO + DO 320 I = ILO, IHI + IF( A( I, J ).EQ.CZERO ) + $ GO TO 310 + KOUNT = KOUNT + 1 + SUM = SUM + WORK( I+N ) + 310 CONTINUE + IF( B( I, J ).EQ.CZERO ) + $ GO TO 320 + KOUNT = KOUNT + 1 + SUM = SUM + WORK( I+N ) + 320 CONTINUE + WORK( J+3*N ) = DBLE( KOUNT )*WORK( J ) + SUM + 330 CONTINUE +* + SUM = DDOT( NR, WORK( ILO+N ), 1, WORK( ILO+2*N ), 1 ) + + $ DDOT( NR, WORK( ILO ), 1, WORK( ILO+3*N ), 1 ) + ALPHA = GAMMA / SUM +* +* Determine correction to current iteration +* + CMAX = ZERO + DO 340 I = ILO, IHI + COR = ALPHA*WORK( I+N ) + IF( ABS( COR ).GT.CMAX ) + $ CMAX = ABS( COR ) + LSCALE( I ) = LSCALE( I ) + COR + COR = ALPHA*WORK( I ) + IF( ABS( COR ).GT.CMAX ) + $ CMAX = ABS( COR ) + RSCALE( I ) = RSCALE( I ) + COR + 340 CONTINUE + IF( CMAX.LT.HALF ) + $ GO TO 350 +* + CALL DAXPY( NR, -ALPHA, WORK( ILO+2*N ), 1, WORK( ILO+4*N ), 1 ) + CALL DAXPY( NR, -ALPHA, WORK( ILO+3*N ), 1, WORK( ILO+5*N ), 1 ) +* + PGAMMA = GAMMA + IT = IT + 1 + IF( IT.LE.NRP2 ) + $ GO TO 250 +* +* End generalized conjugate gradient iteration +* + 350 CONTINUE + SFMIN = DLAMCH( 'S' ) + SFMAX = ONE / SFMIN + LSFMIN = INT( LOG10( SFMIN ) / BASL+ONE ) + LSFMAX = INT( LOG10( SFMAX ) / BASL ) + DO 360 I = ILO, IHI + IRAB = IZAMAX( N-ILO+1, A( I, ILO ), LDA ) + RAB = ABS( A( I, IRAB+ILO-1 ) ) + IRAB = IZAMAX( N-ILO+1, B( I, ILO ), LDA ) + RAB = MAX( RAB, ABS( B( I, IRAB+ILO-1 ) ) ) + LRAB = INT( LOG10( RAB+SFMIN ) / BASL+ONE ) + IR = LSCALE( I ) + SIGN( HALF, LSCALE( I ) ) + IR = MIN( MAX( IR, LSFMIN ), LSFMAX, LSFMAX-LRAB ) + LSCALE( I ) = SCLFAC**IR + ICAB = IZAMAX( IHI, A( 1, I ), 1 ) + CAB = ABS( A( ICAB, I ) ) + ICAB = IZAMAX( IHI, B( 1, I ), 1 ) + CAB = MAX( CAB, ABS( B( ICAB, I ) ) ) + LCAB = INT( LOG10( CAB+SFMIN ) / BASL+ONE ) + JC = RSCALE( I ) + SIGN( HALF, RSCALE( I ) ) + JC = MIN( MAX( JC, LSFMIN ), LSFMAX, LSFMAX-LCAB ) + RSCALE( I ) = SCLFAC**JC + 360 CONTINUE +* +* Row scaling of matrices A and B +* + DO 370 I = ILO, IHI + CALL ZDSCAL( N-ILO+1, LSCALE( I ), A( I, ILO ), LDA ) + CALL ZDSCAL( N-ILO+1, LSCALE( I ), B( I, ILO ), LDB ) + 370 CONTINUE +* +* Column scaling of matrices A and B +* + DO 380 J = ILO, IHI + CALL ZDSCAL( IHI, RSCALE( J ), A( 1, J ), 1 ) + CALL ZDSCAL( IHI, RSCALE( J ), B( 1, J ), 1 ) + 380 CONTINUE +* + RETURN +* +* End of ZGGBAL +* + END diff --git a/costa/native/external/lapack/zgges.f b/costa/native/external/lapack/zgges.f new file mode 100644 index 000000000..b10ff4231 --- /dev/null +++ b/costa/native/external/lapack/zgges.f @@ -0,0 +1,475 @@ + SUBROUTINE ZGGES( JOBVSL, JOBVSR, SORT, DELCTG, N, A, LDA, B, LDB, + $ SDIM, ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, WORK, + $ LWORK, RWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBVSL, JOBVSR, SORT + INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ), + $ WORK( * ) +* .. +* .. Function Arguments .. + LOGICAL DELCTG + EXTERNAL DELCTG +* .. +* +* Purpose +* ======= +* +* ZGGES computes for a pair of N-by-N complex nonsymmetric matrices +* (A,B), the generalized eigenvalues, the generalized complex Schur +* form (S, T), and optionally left and/or right Schur vectors (VSL +* and VSR). This gives the generalized Schur factorization +* +* (A,B) = ( (VSL)*S*(VSR)**H, (VSL)*T*(VSR)**H ) +* +* where (VSR)**H is the conjugate-transpose of VSR. +* +* Optionally, it also orders the eigenvalues so that a selected cluster +* of eigenvalues appears in the leading diagonal blocks of the upper +* triangular matrix S and the upper triangular matrix T. The leading +* columns of VSL and VSR then form an unitary basis for the +* corresponding left and right eigenspaces (deflating subspaces). +* +* (If only the generalized eigenvalues are needed, use the driver +* ZGGEV instead, which is faster.) +* +* A generalized eigenvalue for a pair of matrices (A,B) is a scalar w +* or a ratio alpha/beta = w, such that A - w*B is singular. It is +* usually represented as the pair (alpha,beta), as there is a +* reasonable interpretation for beta=0, and even for both being zero. +* +* A pair of matrices (S,T) is in generalized complex Schur form if S +* and T are upper triangular and, in addition, the diagonal elements +* of T are non-negative real numbers. +* +* Arguments +* ========= +* +* JOBVSL (input) CHARACTER*1 +* = 'N': do not compute the left Schur vectors; +* = 'V': compute the left Schur vectors. +* +* JOBVSR (input) CHARACTER*1 +* = 'N': do not compute the right Schur vectors; +* = 'V': compute the right Schur vectors. +* +* SORT (input) CHARACTER*1 +* Specifies whether or not to order the eigenvalues on the +* diagonal of the generalized Schur form. +* = 'N': Eigenvalues are not ordered; +* = 'S': Eigenvalues are ordered (see DELZTG). +* +* DELZTG (input) LOGICAL FUNCTION of two COMPLEX*16 arguments +* DELZTG must be declared EXTERNAL in the calling subroutine. +* If SORT = 'N', DELZTG is not referenced. +* If SORT = 'S', DELZTG is used to select eigenvalues to sort +* to the top left of the Schur form. +* An eigenvalue ALPHA(j)/BETA(j) is selected if +* DELZTG(ALPHA(j),BETA(j)) is true. +* +* Note that a selected complex eigenvalue may no longer satisfy +* DELZTG(ALPHA(j),BETA(j)) = .TRUE. after ordering, since +* ordering may change the value of complex eigenvalues +* (especially if the eigenvalue is ill-conditioned), in this +* case INFO is set to N+2 (See INFO below). +* +* N (input) INTEGER +* The order of the matrices A, B, VSL, and VSR. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA, N) +* On entry, the first of the pair of matrices. +* On exit, A has been overwritten by its generalized Schur +* form S. +* +* LDA (input) INTEGER +* The leading dimension of A. LDA >= max(1,N). +* +* B (input/output) COMPLEX*16 array, dimension (LDB, N) +* On entry, the second of the pair of matrices. +* On exit, B has been overwritten by its generalized Schur +* form T. +* +* LDB (input) INTEGER +* The leading dimension of B. LDB >= max(1,N). +* +* SDIM (output) INTEGER +* If SORT = 'N', SDIM = 0. +* If SORT = 'S', SDIM = number of eigenvalues (after sorting) +* for which DELZTG is true. +* +* ALPHA (output) COMPLEX*16 array, dimension (N) +* BETA (output) COMPLEX*16 array, dimension (N) +* On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the +* generalized eigenvalues. ALPHA(j), j=1,...,N and BETA(j), +* j=1,...,N are the diagonals of the complex Schur form (A,B) +* output by ZGGES. The BETA(j) will be non-negative real. +* +* Note: the quotients ALPHA(j)/BETA(j) may easily over- or +* underflow, and BETA(j) may even be zero. Thus, the user +* should avoid naively computing the ratio alpha/beta. +* However, ALPHA will be always less than and usually +* comparable with norm(A) in magnitude, and BETA always less +* than and usually comparable with norm(B). +* +* VSL (output) COMPLEX*16 array, dimension (LDVSL,N) +* If JOBVSL = 'V', VSL will contain the left Schur vectors. +* Not referenced if JOBVSL = 'N'. +* +* LDVSL (input) INTEGER +* The leading dimension of the matrix VSL. LDVSL >= 1, and +* if JOBVSL = 'V', LDVSL >= N. +* +* VSR (output) COMPLEX*16 array, dimension (LDVSR,N) +* If JOBVSR = 'V', VSR will contain the right Schur vectors. +* Not referenced if JOBVSR = 'N'. +* +* LDVSR (input) INTEGER +* The leading dimension of the matrix VSR. LDVSR >= 1, and +* if JOBVSR = 'V', LDVSR >= N. +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,2*N). +* For good performance, LWORK must generally be larger. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (8*N) +* +* BWORK (workspace) LOGICAL array, dimension (N) +* Not referenced if SORT = 'N'. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* =1,...,N: +* The QZ iteration failed. (A,B) are not in Schur +* form, but ALPHA(j) and BETA(j) should be correct for +* j=INFO+1,...,N. +* > N: =N+1: other than QZ iteration failed in ZHGEQZ +* =N+2: after reordering, roundoff changed values of +* some complex eigenvalues so that leading +* eigenvalues in the Generalized Schur form no +* longer satisfy DELZTG=.TRUE. This could also +* be caused due to scaling. +* =N+3: reordering falied in ZTGSEN. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ), + $ CONE = ( 1.0D0, 0.0D0 ) ) +* .. +* .. Local Scalars .. + LOGICAL CURSL, ILASCL, ILBSCL, ILVSL, ILVSR, LASTSL, + $ LQUERY, WANTST + INTEGER I, ICOLS, IERR, IHI, IJOBVL, IJOBVR, ILEFT, + $ ILO, IRIGHT, IROWS, IRWRK, ITAU, IWRK, LWKMIN, + $ LWKOPT + DOUBLE PRECISION ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, PVSL, + $ PVSR, SMLNUM +* .. +* .. Local Arrays .. + INTEGER IDUM( 1 ) + DOUBLE PRECISION DIF( 2 ) +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEQRF, ZGGBAK, ZGGBAL, ZGGHRD, ZHGEQZ, + $ ZLACPY, ZLASCL, ZLASET, ZTGSEN, ZUNGQR, ZUNMQR +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, ZLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVSL, 'N' ) ) THEN + IJOBVL = 1 + ILVSL = .FALSE. + ELSE IF( LSAME( JOBVSL, 'V' ) ) THEN + IJOBVL = 2 + ILVSL = .TRUE. + ELSE + IJOBVL = -1 + ILVSL = .FALSE. + END IF +* + IF( LSAME( JOBVSR, 'N' ) ) THEN + IJOBVR = 1 + ILVSR = .FALSE. + ELSE IF( LSAME( JOBVSR, 'V' ) ) THEN + IJOBVR = 2 + ILVSR = .TRUE. + ELSE + IJOBVR = -1 + ILVSR = .FALSE. + END IF +* + WANTST = LSAME( SORT, 'S' ) +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDVSL.LT.1 .OR. ( ILVSL .AND. LDVSL.LT.N ) ) THEN + INFO = -14 + ELSE IF( LDVSR.LT.1 .OR. ( ILVSR .AND. LDVSR.LT.N ) ) THEN + INFO = -16 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + LWKMIN = 1 + IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN + LWKMIN = MAX( 1, 2*N ) + LWKOPT = N + N*ILAENV( 1, 'ZGEQRF', ' ', N, 1, N, 0 ) + IF( ILVSL ) THEN + LWKOPT = MAX( LWKOPT, N+N*ILAENV( 1, 'ZUNGQR', ' ', N, 1, N, + $ -1 ) ) + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) + $ INFO = -18 +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGGES ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + WORK( 1 ) = LWKOPT + IF( N.EQ.0 ) THEN + SDIM = 0 + RETURN + END IF +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = ZLANGE( 'M', N, N, A, LDA, RWORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF +* + IF( ILASCL ) + $ CALL ZLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR ) +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = ZLANGE( 'M', N, N, B, LDB, RWORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF +* + IF( ILBSCL ) + $ CALL ZLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR ) +* +* Permute the matrix to make it more nearly triangular +* (Real Workspace: need 6*N) +* + ILEFT = 1 + IRIGHT = N + 1 + IRWRK = IRIGHT + N + CALL ZGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), RWORK( IRWRK ), IERR ) +* +* Reduce B to triangular form (QR decomposition of B) +* (Complex Workspace: need N, prefer N*NB) +* + IROWS = IHI + 1 - ILO + ICOLS = N + 1 - ILO + ITAU = 1 + IWRK = ITAU + IROWS + CALL ZGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Apply the orthogonal transformation to matrix A +* (Complex Workspace: need N, prefer N*NB) +* + CALL ZUNMQR( 'L', 'C', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ), + $ LWORK+1-IWRK, IERR ) +* +* Initialize VSL +* (Complex Workspace: need N, prefer N*NB) +* + IF( ILVSL ) THEN + CALL ZLASET( 'Full', N, N, CZERO, CONE, VSL, LDVSL ) + CALL ZLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VSL( ILO+1, ILO ), LDVSL ) + CALL ZUNGQR( IROWS, IROWS, IROWS, VSL( ILO, ILO ), LDVSL, + $ WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* +* Initialize VSR +* + IF( ILVSR ) + $ CALL ZLASET( 'Full', N, N, CZERO, CONE, VSR, LDVSR ) +* +* Reduce to generalized Hessenberg form +* (Workspace: none needed) +* + CALL ZGGHRD( JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, VSL, + $ LDVSL, VSR, LDVSR, IERR ) +* + SDIM = 0 +* +* Perform QZ algorithm, computing Schur vectors if desired +* (Complex Workspace: need N) +* (Real Workspace: need N) +* + IWRK = ITAU + CALL ZHGEQZ( 'S', JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, WORK( IWRK ), + $ LWORK+1-IWRK, RWORK( IRWRK ), IERR ) + IF( IERR.NE.0 ) THEN + IF( IERR.GT.0 .AND. IERR.LE.N ) THEN + INFO = IERR + ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN + INFO = IERR - N + ELSE + INFO = N + 1 + END IF + GO TO 30 + END IF +* +* Sort eigenvalues ALPHA/BETA if desired +* (Workspace: none needed) +* + IF( WANTST ) THEN +* +* Undo scaling on eigenvalues before selecting +* + IF( ILASCL ) + $ CALL ZLASCL( 'G', 0, 0, ANRM, ANRMTO, N, 1, ALPHA, N, IERR ) + IF( ILBSCL ) + $ CALL ZLASCL( 'G', 0, 0, BNRM, BNRMTO, N, 1, BETA, N, IERR ) +* +* Select eigenvalues +* + DO 10 I = 1, N + BWORK( I ) = DELCTG( ALPHA( I ), BETA( I ) ) + 10 CONTINUE +* + CALL ZTGSEN( 0, ILVSL, ILVSR, BWORK, N, A, LDA, B, LDB, ALPHA, + $ BETA, VSL, LDVSL, VSR, LDVSR, SDIM, PVSL, PVSR, + $ DIF, WORK( IWRK ), LWORK-IWRK+1, IDUM, 1, IERR ) + IF( IERR.EQ.1 ) + $ INFO = N + 3 +* + END IF +* +* Apply back-permutation to VSL and VSR +* (Workspace: none needed) +* + IF( ILVSL ) + $ CALL ZGGBAK( 'P', 'L', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VSL, LDVSL, IERR ) + IF( ILVSR ) + $ CALL ZGGBAK( 'P', 'R', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VSR, LDVSR, IERR ) +* +* Undo scaling +* + IF( ILASCL ) THEN + CALL ZLASCL( 'U', 0, 0, ANRMTO, ANRM, N, N, A, LDA, IERR ) + CALL ZLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHA, N, IERR ) + END IF +* + IF( ILBSCL ) THEN + CALL ZLASCL( 'U', 0, 0, BNRMTO, BNRM, N, N, B, LDB, IERR ) + CALL ZLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) + END IF +* + IF( WANTST ) THEN +* +* Check if reordering is correct +* + LASTSL = .TRUE. + SDIM = 0 + DO 20 I = 1, N + CURSL = DELCTG( ALPHA( I ), BETA( I ) ) + IF( CURSL ) + $ SDIM = SDIM + 1 + IF( CURSL .AND. .NOT.LASTSL ) + $ INFO = N + 2 + LASTSL = CURSL + 20 CONTINUE +* + END IF +* + 30 CONTINUE +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of ZGGES +* + END diff --git a/costa/native/external/lapack/zggesx.f b/costa/native/external/lapack/zggesx.f new file mode 100644 index 000000000..89c22433d --- /dev/null +++ b/costa/native/external/lapack/zggesx.f @@ -0,0 +1,544 @@ + SUBROUTINE ZGGESX( JOBVSL, JOBVSR, SORT, DELCTG, SENSE, N, A, LDA, + $ B, LDB, SDIM, ALPHA, BETA, VSL, LDVSL, VSR, + $ LDVSR, RCONDE, RCONDV, WORK, LWORK, RWORK, + $ IWORK, LIWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBVSL, JOBVSR, SENSE, SORT + INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LIWORK, LWORK, N, + $ SDIM +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + INTEGER IWORK( * ) + DOUBLE PRECISION RCONDE( 2 ), RCONDV( 2 ), RWORK( * ) + COMPLEX*16 A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ), + $ WORK( * ) +* .. +* .. Function Arguments .. + LOGICAL DELCTG + EXTERNAL DELCTG +* .. +* +* Purpose +* ======= +* +* ZGGESX computes for a pair of N-by-N complex nonsymmetric matrices +* (A,B), the generalized eigenvalues, the complex Schur form (S,T), +* and, optionally, the left and/or right matrices of Schur vectors (VSL +* and VSR). This gives the generalized Schur factorization +* +* (A,B) = ( (VSL) S (VSR)**H, (VSL) T (VSR)**H ) +* +* where (VSR)**H is the conjugate-transpose of VSR. +* +* Optionally, it also orders the eigenvalues so that a selected cluster +* of eigenvalues appears in the leading diagonal blocks of the upper +* triangular matrix S and the upper triangular matrix T; computes +* a reciprocal condition number for the average of the selected +* eigenvalues (RCONDE); and computes a reciprocal condition number for +* the right and left deflating subspaces corresponding to the selected +* eigenvalues (RCONDV). The leading columns of VSL and VSR then form +* an orthonormal basis for the corresponding left and right eigenspaces +* (deflating subspaces). +* +* A generalized eigenvalue for a pair of matrices (A,B) is a scalar w +* or a ratio alpha/beta = w, such that A - w*B is singular. It is +* usually represented as the pair (alpha,beta), as there is a +* reasonable interpretation for beta=0 or for both being zero. +* +* A pair of matrices (S,T) is in generalized complex Schur form if T is +* upper triangular with non-negative diagonal and S is upper +* triangular. +* +* Arguments +* ========= +* +* JOBVSL (input) CHARACTER*1 +* = 'N': do not compute the left Schur vectors; +* = 'V': compute the left Schur vectors. +* +* JOBVSR (input) CHARACTER*1 +* = 'N': do not compute the right Schur vectors; +* = 'V': compute the right Schur vectors. +* +* SORT (input) CHARACTER*1 +* Specifies whether or not to order the eigenvalues on the +* diagonal of the generalized Schur form. +* = 'N': Eigenvalues are not ordered; +* = 'S': Eigenvalues are ordered (see DELZTG). +* +* DELZTG (input) LOGICAL FUNCTION of two COMPLEX*16 arguments +* DELZTG must be declared EXTERNAL in the calling subroutine. +* If SORT = 'N', DELZTG is not referenced. +* If SORT = 'S', DELZTG is used to select eigenvalues to sort +* to the top left of the Schur form. +* Note that a selected complex eigenvalue may no longer satisfy +* DELZTG(ALPHA(j),BETA(j)) = .TRUE. after ordering, since +* ordering may change the value of complex eigenvalues +* (especially if the eigenvalue is ill-conditioned), in this +* case INFO is set to N+3 see INFO below). +* +* SENSE (input) CHARACTER +* Determines which reciprocal condition numbers are computed. +* = 'N' : None are computed; +* = 'E' : Computed for average of selected eigenvalues only; +* = 'V' : Computed for selected deflating subspaces only; +* = 'B' : Computed for both. +* If SENSE = 'E', 'V', or 'B', SORT must equal 'S'. +* +* N (input) INTEGER +* The order of the matrices A, B, VSL, and VSR. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA, N) +* On entry, the first of the pair of matrices. +* On exit, A has been overwritten by its generalized Schur +* form S. +* +* LDA (input) INTEGER +* The leading dimension of A. LDA >= max(1,N). +* +* B (input/output) COMPLEX*16 array, dimension (LDB, N) +* On entry, the second of the pair of matrices. +* On exit, B has been overwritten by its generalized Schur +* form T. +* +* LDB (input) INTEGER +* The leading dimension of B. LDB >= max(1,N). +* +* SDIM (output) INTEGER +* If SORT = 'N', SDIM = 0. +* If SORT = 'S', SDIM = number of eigenvalues (after sorting) +* for which DELZTG is true. +* +* ALPHA (output) COMPLEX*16 array, dimension (N) +* BETA (output) COMPLEX*16 array, dimension (N) +* On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the +* generalized eigenvalues. ALPHA(j) and BETA(j),j=1,...,N are +* the diagonals of the complex Schur form (S,T). BETA(j) will +* be non-negative real. +* +* Note: the quotients ALPHA(j)/BETA(j) may easily over- or +* underflow, and BETA(j) may even be zero. Thus, the user +* should avoid naively computing the ratio alpha/beta. +* However, ALPHA will be always less than and usually +* comparable with norm(A) in magnitude, and BETA always less +* than and usually comparable with norm(B). +* +* VSL (output) COMPLEX*16 array, dimension (LDVSL,N) +* If JOBVSL = 'V', VSL will contain the left Schur vectors. +* Not referenced if JOBVSL = 'N'. +* +* LDVSL (input) INTEGER +* The leading dimension of the matrix VSL. LDVSL >=1, and +* if JOBVSL = 'V', LDVSL >= N. +* +* VSR (output) COMPLEX*16 array, dimension (LDVSR,N) +* If JOBVSR = 'V', VSR will contain the right Schur vectors. +* Not referenced if JOBVSR = 'N'. +* +* LDVSR (input) INTEGER +* The leading dimension of the matrix VSR. LDVSR >= 1, and +* if JOBVSR = 'V', LDVSR >= N. +* +* RCONDE (output) DOUBLE PRECISION array, dimension ( 2 ) +* If SENSE = 'E' or 'B', RCONDE(1) and RCONDE(2) contain the +* reciprocal condition numbers for the average of the selected +* eigenvalues. +* Not referenced if SENSE = 'N' or 'V'. +* +* RCONDV (output) DOUBLE PRECISION array, dimension ( 2 ) +* If SENSE = 'V' or 'B', RCONDV(1) and RCONDV(2) contain the +* reciprocal condition number for the selected deflating +* subspaces. +* Not referenced if SENSE = 'N' or 'E'. +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= 2*N. +* If SENSE = 'E', 'V', or 'B', +* LWORK >= MAX(2*N, 2*SDIM*(N-SDIM)). +* +* RWORK (workspace) DOUBLE PRECISION array, dimension ( 8*N ) +* Real workspace. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* Not referenced if SENSE = 'N'. +* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array WORK. LIWORK >= N+2. +* +* BWORK (workspace) LOGICAL array, dimension (N) +* Not referenced if SORT = 'N'. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* = 1,...,N: +* The QZ iteration failed. (A,B) are not in Schur +* form, but ALPHA(j) and BETA(j) should be correct for +* j=INFO+1,...,N. +* > N: =N+1: other than QZ iteration failed in ZHGEQZ +* =N+2: after reordering, roundoff changed values of +* some complex eigenvalues so that leading +* eigenvalues in the Generalized Schur form no +* longer satisfy DELZTG=.TRUE. This could also +* be caused due to scaling. +* =N+3: reordering failed in ZTGSEN. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL CURSL, ILASCL, ILBSCL, ILVSL, ILVSR, LASTSL, + $ WANTSB, WANTSE, WANTSN, WANTST, WANTSV + INTEGER I, ICOLS, IERR, IHI, IJOB, IJOBVL, IJOBVR, + $ ILEFT, ILO, IRIGHT, IROWS, IRWRK, ITAU, IWRK, + $ LIWMIN, MAXWRK, MINWRK + DOUBLE PRECISION ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, PL, + $ PR, SMLNUM +* .. +* .. Local Arrays .. + DOUBLE PRECISION DIF( 2 ) +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEQRF, ZGGBAK, ZGGBAL, ZGGHRD, ZHGEQZ, + $ ZLACPY, ZLASCL, ZLASET, ZTGSEN, ZUNGQR, ZUNMQR +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, ZLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVSL, 'N' ) ) THEN + IJOBVL = 1 + ILVSL = .FALSE. + ELSE IF( LSAME( JOBVSL, 'V' ) ) THEN + IJOBVL = 2 + ILVSL = .TRUE. + ELSE + IJOBVL = -1 + ILVSL = .FALSE. + END IF +* + IF( LSAME( JOBVSR, 'N' ) ) THEN + IJOBVR = 1 + ILVSR = .FALSE. + ELSE IF( LSAME( JOBVSR, 'V' ) ) THEN + IJOBVR = 2 + ILVSR = .TRUE. + ELSE + IJOBVR = -1 + ILVSR = .FALSE. + END IF +* + WANTST = LSAME( SORT, 'S' ) + WANTSN = LSAME( SENSE, 'N' ) + WANTSE = LSAME( SENSE, 'E' ) + WANTSV = LSAME( SENSE, 'V' ) + WANTSB = LSAME( SENSE, 'B' ) + IF( WANTSN ) THEN + IJOB = 0 + IWORK( 1 ) = 1 + ELSE IF( WANTSE ) THEN + IJOB = 1 + ELSE IF( WANTSV ) THEN + IJOB = 2 + ELSE IF( WANTSB ) THEN + IJOB = 4 + END IF +* +* Test the input arguments +* + INFO = 0 + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( .NOT.( WANTSN .OR. WANTSE .OR. WANTSV .OR. WANTSB ) .OR. + $ ( .NOT.WANTST .AND. .NOT.WANTSN ) ) THEN + INFO = -5 + ELSE IF( N.LT.0 ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDVSL.LT.1 .OR. ( ILVSL .AND. LDVSL.LT.N ) ) THEN + INFO = -15 + ELSE IF( LDVSR.LT.1 .OR. ( ILVSR .AND. LDVSR.LT.N ) ) THEN + INFO = -17 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + MINWRK = 1 + IF( INFO.EQ.0 .AND. LWORK.GE.1 ) THEN + MINWRK = MAX( 1, 2*N ) + MAXWRK = N + N*ILAENV( 1, 'ZGEQRF', ' ', N, 1, N, 0 ) + IF( ILVSL ) THEN + MAXWRK = MAX( MAXWRK, N+N*ILAENV( 1, 'ZUNGQR', ' ', N, 1, N, + $ -1 ) ) + END IF + WORK( 1 ) = MAXWRK + END IF + IF( .NOT.WANTSN ) THEN + LIWMIN = N + 2 + ELSE + LIWMIN = 1 + END IF + IWORK( 1 ) = LIWMIN +* + IF( INFO.EQ.0 .AND. LWORK.LT.MINWRK ) THEN + INFO = -21 + ELSE IF( INFO.EQ.0 .AND. IJOB.GE.1 ) THEN + IF( LIWORK.LT.LIWMIN ) + $ INFO = -24 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGGESX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SDIM = 0 + RETURN + END IF +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = ZLANGE( 'M', N, N, A, LDA, RWORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF + IF( ILASCL ) + $ CALL ZLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR ) +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = ZLANGE( 'M', N, N, B, LDB, RWORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF + IF( ILBSCL ) + $ CALL ZLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR ) +* +* Permute the matrix to make it more nearly triangular +* (Real Workspace: need 6*N) +* + ILEFT = 1 + IRIGHT = N + 1 + IRWRK = IRIGHT + N + CALL ZGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), RWORK( IRWRK ), IERR ) +* +* Reduce B to triangular form (QR decomposition of B) +* (Complex Workspace: need N, prefer N*NB) +* + IROWS = IHI + 1 - ILO + ICOLS = N + 1 - ILO + ITAU = 1 + IWRK = ITAU + IROWS + CALL ZGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Apply the unitary transformation to matrix A +* (Complex Workspace: need N, prefer N*NB) +* + CALL ZUNMQR( 'L', 'C', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ), + $ LWORK+1-IWRK, IERR ) +* +* Initialize VSL +* (Complex Workspace: need N, prefer N*NB) +* + IF( ILVSL ) THEN + CALL ZLASET( 'Full', N, N, CZERO, CONE, VSL, LDVSL ) + CALL ZLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VSL( ILO+1, ILO ), LDVSL ) + CALL ZUNGQR( IROWS, IROWS, IROWS, VSL( ILO, ILO ), LDVSL, + $ WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* +* Initialize VSR +* + IF( ILVSR ) + $ CALL ZLASET( 'Full', N, N, CZERO, CONE, VSR, LDVSR ) +* +* Reduce to generalized Hessenberg form +* (Workspace: none needed) +* + CALL ZGGHRD( JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, VSL, + $ LDVSL, VSR, LDVSR, IERR ) +* + SDIM = 0 +* +* Perform QZ algorithm, computing Schur vectors if desired +* (Complex Workspace: need N) +* (Real Workspace: need N) +* + IWRK = ITAU + CALL ZHGEQZ( 'S', JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, WORK( IWRK ), + $ LWORK+1-IWRK, RWORK( IRWRK ), IERR ) + IF( IERR.NE.0 ) THEN + IF( IERR.GT.0 .AND. IERR.LE.N ) THEN + INFO = IERR + ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN + INFO = IERR - N + ELSE + INFO = N + 1 + END IF + GO TO 40 + END IF +* +* Sort eigenvalues ALPHA/BETA and compute the reciprocal of +* condition number(s) +* + IF( WANTST ) THEN +* +* Undo scaling on eigenvalues before DELZTGing +* + IF( ILASCL ) + $ CALL ZLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHA, N, IERR ) + IF( ILBSCL ) + $ CALL ZLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) +* +* Select eigenvalues +* + DO 10 I = 1, N + BWORK( I ) = DELCTG( ALPHA( I ), BETA( I ) ) + 10 CONTINUE +* +* Reorder eigenvalues, transform Generalized Schur vectors, and +* compute reciprocal condition numbers +* (Complex Workspace: If IJOB >= 1, need MAX(1, 2*SDIM*(N-SDIM)) +* otherwise, need 1 ) +* + CALL ZTGSEN( IJOB, ILVSL, ILVSR, BWORK, N, A, LDA, B, LDB, + $ ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, SDIM, PL, PR, + $ DIF, WORK( IWRK ), LWORK-IWRK+1, IWORK, LIWORK, + $ IERR ) +* + IF( IJOB.GE.1 ) + $ MAXWRK = MAX( MAXWRK, 2*SDIM*( N-SDIM ) ) + IF( IERR.EQ.-21 ) THEN +* +* not enough complex workspace +* + INFO = -21 + ELSE + RCONDE( 1 ) = PL + RCONDE( 2 ) = PL + RCONDV( 1 ) = DIF( 1 ) + RCONDV( 2 ) = DIF( 2 ) + IF( IERR.EQ.1 ) + $ INFO = N + 3 + END IF +* + END IF +* +* Apply permutation to VSL and VSR +* (Workspace: none needed) +* + IF( ILVSL ) + $ CALL ZGGBAK( 'P', 'L', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VSL, LDVSL, IERR ) +* + IF( ILVSR ) + $ CALL ZGGBAK( 'P', 'R', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VSR, LDVSR, IERR ) +* +* Undo scaling +* + IF( ILASCL ) THEN + CALL ZLASCL( 'U', 0, 0, ANRMTO, ANRM, N, N, A, LDA, IERR ) + CALL ZLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHA, N, IERR ) + END IF +* + IF( ILBSCL ) THEN + CALL ZLASCL( 'U', 0, 0, BNRMTO, BNRM, N, N, B, LDB, IERR ) + CALL ZLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) + END IF +* + 20 CONTINUE +* + IF( WANTST ) THEN +* +* Check if reordering is correct +* + LASTSL = .TRUE. + SDIM = 0 + DO 30 I = 1, N + CURSL = DELCTG( ALPHA( I ), BETA( I ) ) + IF( CURSL ) + $ SDIM = SDIM + 1 + IF( CURSL .AND. .NOT.LASTSL ) + $ INFO = N + 2 + LASTSL = CURSL + 30 CONTINUE +* + END IF +* + 40 CONTINUE +* + WORK( 1 ) = MAXWRK + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of ZGGESX +* + END diff --git a/costa/native/external/lapack/zggev.f b/costa/native/external/lapack/zggev.f new file mode 100644 index 000000000..33228cb99 --- /dev/null +++ b/costa/native/external/lapack/zggev.f @@ -0,0 +1,448 @@ + SUBROUTINE ZGGEV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHA, BETA, + $ VL, LDVL, VR, LDVR, WORK, LWORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBVL, JOBVR + INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), VL( LDVL, * ), VR( LDVR, * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZGGEV computes for a pair of N-by-N complex nonsymmetric matrices +* (A,B), the generalized eigenvalues, and optionally, the left and/or +* right generalized eigenvectors. +* +* A generalized eigenvalue for a pair of matrices (A,B) is a scalar +* lambda or a ratio alpha/beta = lambda, such that A - lambda*B is +* singular. It is usually represented as the pair (alpha,beta), as +* there is a reasonable interpretation for beta=0, and even for both +* being zero. +* +* The right generalized eigenvector v(j) corresponding to the +* generalized eigenvalue lambda(j) of (A,B) satisfies +* +* A * v(j) = lambda(j) * B * v(j). +* +* The left generalized eigenvector u(j) corresponding to the +* generalized eigenvalues lambda(j) of (A,B) satisfies +* +* u(j)**H * A = lambda(j) * u(j)**H * B +* +* where u(j)**H is the conjugate-transpose of u(j). +* +* Arguments +* ========= +* +* JOBVL (input) CHARACTER*1 +* = 'N': do not compute the left generalized eigenvectors; +* = 'V': compute the left generalized eigenvectors. +* +* JOBVR (input) CHARACTER*1 +* = 'N': do not compute the right generalized eigenvectors; +* = 'V': compute the right generalized eigenvectors. +* +* N (input) INTEGER +* The order of the matrices A, B, VL, and VR. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA, N) +* On entry, the matrix A in the pair (A,B). +* On exit, A has been overwritten. +* +* LDA (input) INTEGER +* The leading dimension of A. LDA >= max(1,N). +* +* B (input/output) COMPLEX*16 array, dimension (LDB, N) +* On entry, the matrix B in the pair (A,B). +* On exit, B has been overwritten. +* +* LDB (input) INTEGER +* The leading dimension of B. LDB >= max(1,N). +* +* ALPHA (output) COMPLEX*16 array, dimension (N) +* BETA (output) COMPLEX*16 array, dimension (N) +* On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the +* generalized eigenvalues. +* +* Note: the quotients ALPHA(j)/BETA(j) may easily over- or +* underflow, and BETA(j) may even be zero. Thus, the user +* should avoid naively computing the ratio alpha/beta. +* However, ALPHA will be always less than and usually +* comparable with norm(A) in magnitude, and BETA always less +* than and usually comparable with norm(B). +* +* VL (output) COMPLEX*16 array, dimension (LDVL,N) +* If JOBVL = 'V', the left generalized eigenvectors u(j) are +* stored one after another in the columns of VL, in the same +* order as their eigenvalues. +* Each eigenvector will be scaled so the largest component +* will have abs(real part) + abs(imag. part) = 1. +* Not referenced if JOBVL = 'N'. +* +* LDVL (input) INTEGER +* The leading dimension of the matrix VL. LDVL >= 1, and +* if JOBVL = 'V', LDVL >= N. +* +* VR (output) COMPLEX*16 array, dimension (LDVR,N) +* If JOBVR = 'V', the right generalized eigenvectors v(j) are +* stored one after another in the columns of VR, in the same +* order as their eigenvalues. +* Each eigenvector will be scaled so the largest component +* will have abs(real part) + abs(imag. part) = 1. +* Not referenced if JOBVR = 'N'. +* +* LDVR (input) INTEGER +* The leading dimension of the matrix VR. LDVR >= 1, and +* if JOBVR = 'V', LDVR >= N. +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,2*N). +* For good performance, LWORK must generally be larger. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace/output) DOUBLE PRECISION array, dimension (8*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* =1,...,N: +* The QZ iteration failed. No eigenvectors have been +* calculated, but ALPHA(j) and BETA(j) should be +* correct for j=INFO+1,...,N. +* > N: =N+1: other then QZ iteration failed in DHGEQZ, +* =N+2: error return from DTGEVC. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ), + $ CONE = ( 1.0D0, 0.0D0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ILASCL, ILBSCL, ILV, ILVL, ILVR, LQUERY + CHARACTER CHTEMP + INTEGER ICOLS, IERR, IHI, IJOBVL, IJOBVR, ILEFT, ILO, + $ IN, IRIGHT, IROWS, IRWRK, ITAU, IWRK, JC, JR, + $ LWKMIN, LWKOPT + DOUBLE PRECISION ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, + $ SMLNUM, TEMP + COMPLEX*16 X +* .. +* .. Local Arrays .. + LOGICAL LDUMMA( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEQRF, ZGGBAK, ZGGBAL, ZGGHRD, ZHGEQZ, + $ ZLACPY, ZLASCL, ZLASET, ZTGEVC, ZUNGQR, ZUNMQR +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, ZLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX, SQRT +* .. +* .. Statement Functions .. + DOUBLE PRECISION ABS1 +* .. +* .. Statement Function definitions .. + ABS1( X ) = ABS( DBLE( X ) ) + ABS( DIMAG( X ) ) +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVL, 'N' ) ) THEN + IJOBVL = 1 + ILVL = .FALSE. + ELSE IF( LSAME( JOBVL, 'V' ) ) THEN + IJOBVL = 2 + ILVL = .TRUE. + ELSE + IJOBVL = -1 + ILVL = .FALSE. + END IF +* + IF( LSAME( JOBVR, 'N' ) ) THEN + IJOBVR = 1 + ILVR = .FALSE. + ELSE IF( LSAME( JOBVR, 'V' ) ) THEN + IJOBVR = 2 + ILVR = .TRUE. + ELSE + IJOBVR = -1 + ILVR = .FALSE. + END IF + ILV = ILVL .OR. ILVR +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVL.LT.1 .OR. ( ILVL .AND. LDVL.LT.N ) ) THEN + INFO = -11 + ELSE IF( LDVR.LT.1 .OR. ( ILVR .AND. LDVR.LT.N ) ) THEN + INFO = -13 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV. The workspace is +* computed assuming ILO = 1 and IHI = N, the worst case.) +* + LWKMIN = 1 + IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN + LWKOPT = N + N*ILAENV( 1, 'ZGEQRF', ' ', N, 1, N, 0 ) + LWKMIN = MAX( 1, 2*N ) + WORK( 1 ) = LWKOPT + END IF +* + IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) + $ INFO = -15 +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGGEV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + WORK( 1 ) = LWKOPT + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = DLAMCH( 'E' )*DLAMCH( 'B' ) + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = ZLANGE( 'M', N, N, A, LDA, RWORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF + IF( ILASCL ) + $ CALL ZLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR ) +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = ZLANGE( 'M', N, N, B, LDB, RWORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF + IF( ILBSCL ) + $ CALL ZLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR ) +* +* Permute the matrices A, B to isolate eigenvalues if possible +* (Real Workspace: need 6*N) +* + ILEFT = 1 + IRIGHT = N + 1 + IRWRK = IRIGHT + N + CALL ZGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), RWORK( IRWRK ), IERR ) +* +* Reduce B to triangular form (QR decomposition of B) +* (Complex Workspace: need N, prefer N*NB) +* + IROWS = IHI + 1 - ILO + IF( ILV ) THEN + ICOLS = N + 1 - ILO + ELSE + ICOLS = IROWS + END IF + ITAU = 1 + IWRK = ITAU + IROWS + CALL ZGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Apply the orthogonal transformation to matrix A +* (Complex Workspace: need N, prefer N*NB) +* + CALL ZUNMQR( 'L', 'C', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ), + $ LWORK+1-IWRK, IERR ) +* +* Initialize VL +* (Complex Workspace: need N, prefer N*NB) +* + IF( ILVL ) THEN + CALL ZLASET( 'Full', N, N, CZERO, CONE, VL, LDVL ) + CALL ZLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VL( ILO+1, ILO ), LDVL ) + CALL ZUNGQR( IROWS, IROWS, IROWS, VL( ILO, ILO ), LDVL, + $ WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* +* Initialize VR +* + IF( ILVR ) + $ CALL ZLASET( 'Full', N, N, CZERO, CONE, VR, LDVR ) +* +* Reduce to generalized Hessenberg form +* + IF( ILV ) THEN +* +* Eigenvectors requested -- work on whole matrix. +* + CALL ZGGHRD( JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, IERR ) + ELSE + CALL ZGGHRD( 'N', 'N', IROWS, 1, IROWS, A( ILO, ILO ), LDA, + $ B( ILO, ILO ), LDB, VL, LDVL, VR, LDVR, IERR ) + END IF +* +* Perform QZ algorithm (Compute eigenvalues, and optionally, the +* Schur form and Schur vectors) +* (Complex Workspace: need N) +* (Real Workspace: need N) +* + IWRK = ITAU + IF( ILV ) THEN + CHTEMP = 'S' + ELSE + CHTEMP = 'E' + END IF + CALL ZHGEQZ( CHTEMP, JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHA, BETA, VL, LDVL, VR, LDVR, WORK( IWRK ), + $ LWORK+1-IWRK, RWORK( IRWRK ), IERR ) + IF( IERR.NE.0 ) THEN + IF( IERR.GT.0 .AND. IERR.LE.N ) THEN + INFO = IERR + ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN + INFO = IERR - N + ELSE + INFO = N + 1 + END IF + GO TO 70 + END IF +* +* Compute Eigenvectors +* (Real Workspace: need 2*N) +* (Complex Workspace: need 2*N) +* + IF( ILV ) THEN + IF( ILVL ) THEN + IF( ILVR ) THEN + CHTEMP = 'B' + ELSE + CHTEMP = 'L' + END IF + ELSE + CHTEMP = 'R' + END IF +* + CALL ZTGEVC( CHTEMP, 'B', LDUMMA, N, A, LDA, B, LDB, VL, LDVL, + $ VR, LDVR, N, IN, WORK( IWRK ), RWORK( IRWRK ), + $ IERR ) + IF( IERR.NE.0 ) THEN + INFO = N + 2 + GO TO 70 + END IF +* +* Undo balancing on VL and VR and normalization +* (Workspace: none needed) +* + IF( ILVL ) THEN + CALL ZGGBAK( 'P', 'L', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VL, LDVL, IERR ) + DO 30 JC = 1, N + TEMP = ZERO + DO 10 JR = 1, N + TEMP = MAX( TEMP, ABS1( VL( JR, JC ) ) ) + 10 CONTINUE + IF( TEMP.LT.SMLNUM ) + $ GO TO 30 + TEMP = ONE / TEMP + DO 20 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + 20 CONTINUE + 30 CONTINUE + END IF + IF( ILVR ) THEN + CALL ZGGBAK( 'P', 'R', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VR, LDVR, IERR ) + DO 60 JC = 1, N + TEMP = ZERO + DO 40 JR = 1, N + TEMP = MAX( TEMP, ABS1( VR( JR, JC ) ) ) + 40 CONTINUE + IF( TEMP.LT.SMLNUM ) + $ GO TO 60 + TEMP = ONE / TEMP + DO 50 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + 50 CONTINUE + 60 CONTINUE + END IF + END IF +* +* Undo scaling if necessary +* + IF( ILASCL ) + $ CALL ZLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHA, N, IERR ) +* + IF( ILBSCL ) + $ CALL ZLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) +* + 70 CONTINUE + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of ZGGEV +* + END diff --git a/costa/native/external/lapack/zggevx.f b/costa/native/external/lapack/zggevx.f new file mode 100644 index 000000000..e98e7bee3 --- /dev/null +++ b/costa/native/external/lapack/zggevx.f @@ -0,0 +1,640 @@ + SUBROUTINE ZGGEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, B, LDB, + $ ALPHA, BETA, VL, LDVL, VR, LDVR, ILO, IHI, + $ LSCALE, RSCALE, ABNRM, BBNRM, RCONDE, RCONDV, + $ WORK, LWORK, RWORK, IWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER BALANC, JOBVL, JOBVR, SENSE + INTEGER IHI, ILO, INFO, LDA, LDB, LDVL, LDVR, LWORK, N + DOUBLE PRECISION ABNRM, BBNRM +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + INTEGER IWORK( * ) + DOUBLE PRECISION LSCALE( * ), RCONDE( * ), RCONDV( * ), + $ RSCALE( * ), RWORK( * ) + COMPLEX*16 A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), VL( LDVL, * ), VR( LDVR, * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZGGEVX computes for a pair of N-by-N complex nonsymmetric matrices +* (A,B) the generalized eigenvalues, and optionally, the left and/or +* right generalized eigenvectors. +* +* Optionally, it also computes a balancing transformation to improve +* the conditioning of the eigenvalues and eigenvectors (ILO, IHI, +* LSCALE, RSCALE, ABNRM, and BBNRM), reciprocal condition numbers for +* the eigenvalues (RCONDE), and reciprocal condition numbers for the +* right eigenvectors (RCONDV). +* +* A generalized eigenvalue for a pair of matrices (A,B) is a scalar +* lambda or a ratio alpha/beta = lambda, such that A - lambda*B is +* singular. It is usually represented as the pair (alpha,beta), as +* there is a reasonable interpretation for beta=0, and even for both +* being zero. +* +* The right eigenvector v(j) corresponding to the eigenvalue lambda(j) +* of (A,B) satisfies +* A * v(j) = lambda(j) * B * v(j) . +* The left eigenvector u(j) corresponding to the eigenvalue lambda(j) +* of (A,B) satisfies +* u(j)**H * A = lambda(j) * u(j)**H * B. +* where u(j)**H is the conjugate-transpose of u(j). +* +* +* Arguments +* ========= +* +* BALANC (input) CHARACTER*1 +* Specifies the balance option to be performed: +* = 'N': do not diagonally scale or permute; +* = 'P': permute only; +* = 'S': scale only; +* = 'B': both permute and scale. +* Computed reciprocal condition numbers will be for the +* matrices after permuting and/or balancing. Permuting does +* not change condition numbers (in exact arithmetic), but +* balancing does. +* +* JOBVL (input) CHARACTER*1 +* = 'N': do not compute the left generalized eigenvectors; +* = 'V': compute the left generalized eigenvectors. +* +* JOBVR (input) CHARACTER*1 +* = 'N': do not compute the right generalized eigenvectors; +* = 'V': compute the right generalized eigenvectors. +* +* SENSE (input) CHARACTER*1 +* Determines which reciprocal condition numbers are computed. +* = 'N': none are computed; +* = 'E': computed for eigenvalues only; +* = 'V': computed for eigenvectors only; +* = 'B': computed for eigenvalues and eigenvectors. +* +* N (input) INTEGER +* The order of the matrices A, B, VL, and VR. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA, N) +* On entry, the matrix A in the pair (A,B). +* On exit, A has been overwritten. If JOBVL='V' or JOBVR='V' +* or both, then A contains the first part of the complex Schur +* form of the "balanced" versions of the input A and B. +* +* LDA (input) INTEGER +* The leading dimension of A. LDA >= max(1,N). +* +* B (input/output) COMPLEX*16 array, dimension (LDB, N) +* On entry, the matrix B in the pair (A,B). +* On exit, B has been overwritten. If JOBVL='V' or JOBVR='V' +* or both, then B contains the second part of the complex +* Schur form of the "balanced" versions of the input A and B. +* +* LDB (input) INTEGER +* The leading dimension of B. LDB >= max(1,N). +* +* ALPHA (output) COMPLEX*16 array, dimension (N) +* BETA (output) COMPLEX*16 array, dimension (N) +* On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the generalized +* eigenvalues. +* +* Note: the quotient ALPHA(j)/BETA(j) ) may easily over- or +* underflow, and BETA(j) may even be zero. Thus, the user +* should avoid naively computing the ratio ALPHA/BETA. +* However, ALPHA will be always less than and usually +* comparable with norm(A) in magnitude, and BETA always less +* than and usually comparable with norm(B). +* +* VL (output) COMPLEX*16 array, dimension (LDVL,N) +* If JOBVL = 'V', the left generalized eigenvectors u(j) are +* stored one after another in the columns of VL, in the same +* order as their eigenvalues. +* Each eigenvector will be scaled so the largest component +* will have abs(real part) + abs(imag. part) = 1. +* Not referenced if JOBVL = 'N'. +* +* LDVL (input) INTEGER +* The leading dimension of the matrix VL. LDVL >= 1, and +* if JOBVL = 'V', LDVL >= N. +* +* VR (output) COMPLEX*16 array, dimension (LDVR,N) +* If JOBVR = 'V', the right generalized eigenvectors v(j) are +* stored one after another in the columns of VR, in the same +* order as their eigenvalues. +* Each eigenvector will be scaled so the largest component +* will have abs(real part) + abs(imag. part) = 1. +* Not referenced if JOBVR = 'N'. +* +* LDVR (input) INTEGER +* The leading dimension of the matrix VR. LDVR >= 1, and +* if JOBVR = 'V', LDVR >= N. +* +* ILO,IHI (output) INTEGER +* ILO and IHI are integer values such that on exit +* A(i,j) = 0 and B(i,j) = 0 if i > j and +* j = 1,...,ILO-1 or i = IHI+1,...,N. +* If BALANC = 'N' or 'S', ILO = 1 and IHI = N. +* +* LSCALE (output) DOUBLE PRECISION array, dimension (N) +* Details of the permutations and scaling factors applied +* to the left side of A and B. If PL(j) is the index of the +* row interchanged with row j, and DL(j) is the scaling +* factor applied to row j, then +* LSCALE(j) = PL(j) for j = 1,...,ILO-1 +* = DL(j) for j = ILO,...,IHI +* = PL(j) for j = IHI+1,...,N. +* The order in which the interchanges are made is N to IHI+1, +* then 1 to ILO-1. +* +* RSCALE (output) DOUBLE PRECISION array, dimension (N) +* Details of the permutations and scaling factors applied +* to the right side of A and B. If PR(j) is the index of the +* column interchanged with column j, and DR(j) is the scaling +* factor applied to column j, then +* RSCALE(j) = PR(j) for j = 1,...,ILO-1 +* = DR(j) for j = ILO,...,IHI +* = PR(j) for j = IHI+1,...,N +* The order in which the interchanges are made is N to IHI+1, +* then 1 to ILO-1. +* +* ABNRM (output) DOUBLE PRECISION +* The one-norm of the balanced matrix A. +* +* BBNRM (output) DOUBLE PRECISION +* The one-norm of the balanced matrix B. +* +* RCONDE (output) DOUBLE PRECISION array, dimension (N) +* If SENSE = 'E' or 'B', the reciprocal condition numbers of +* the selected eigenvalues, stored in consecutive elements of +* the array. +* If SENSE = 'V', RCONDE is not referenced. +* +* RCONDV (output) DOUBLE PRECISION array, dimension (N) +* If JOB = 'V' or 'B', the estimated reciprocal condition +* numbers of the selected eigenvectors, stored in consecutive +* elements of the array. If the eigenvalues cannot be reordered +* to compute RCONDV(j), RCONDV(j) is set to 0; this can only +* occur when the true value would be very small anyway. +* If SENSE = 'E', RCONDV is not referenced. +* Not referenced if JOB = 'E'. +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,2*N). +* If SENSE = 'N' or 'E', LWORK >= 2*N. +* If SENSE = 'V' or 'B', LWORK >= 2*N*N+2*N. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (6*N) +* Real workspace. +* +* IWORK (workspace) INTEGER array, dimension (N+2) +* If SENSE = 'E', IWORK is not referenced. +* +* BWORK (workspace) LOGICAL array, dimension (N) +* If SENSE = 'N', BWORK is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* = 1,...,N: +* The QZ iteration failed. No eigenvectors have been +* calculated, but ALPHA(j) and BETA(j) should be correct +* for j=INFO+1,...,N. +* > N: =N+1: other than QZ iteration failed in ZHGEQZ. +* =N+2: error return from ZTGEVC. +* +* Further Details +* =============== +* +* Balancing a matrix pair (A,B) includes, first, permuting rows and +* columns to isolate eigenvalues, second, applying diagonal similarity +* transformation to the rows and columns to make the rows and columns +* as close in norm as possible. The computed reciprocal condition +* numbers correspond to the balanced matrix. Permuting rows and columns +* will not change the condition numbers (in exact arithmetic) but +* diagonal scaling will. For further explanation of balancing, see +* section 4.11.1.2 of LAPACK Users' Guide. +* +* An approximate error bound on the chordal distance between the i-th +* computed generalized eigenvalue w and the corresponding exact +* eigenvalue lambda is +* +* chord(w, lambda) <= EPS * norm(ABNRM, BBNRM) / RCONDE(I) +* +* An approximate error bound for the angle between the i-th computed +* eigenvector VL(i) or VR(i) is given by +* +* EPS * norm(ABNRM, BBNRM) / DIF(i). +* +* For further explanation of the reciprocal condition numbers RCONDE +* and RCONDV, see section 4.11 of LAPACK User's Guide. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ILASCL, ILBSCL, ILV, ILVL, ILVR, LQUERY, + $ WANTSB, WANTSE, WANTSN, WANTSV + CHARACTER CHTEMP + INTEGER I, ICOLS, IERR, IJOBVL, IJOBVR, IN, IROWS, + $ ITAU, IWRK, IWRK1, J, JC, JR, M, MAXWRK, MINWRK + DOUBLE PRECISION ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, + $ SMLNUM, TEMP + COMPLEX*16 X +* .. +* .. Local Arrays .. + LOGICAL LDUMMA( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DLASCL, XERBLA, ZGEQRF, ZGGBAK, ZGGBAL, ZGGHRD, + $ ZHGEQZ, ZLACPY, ZLASCL, ZLASET, ZTGEVC, ZTGSNA, + $ ZUNGQR, ZUNMQR +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, ZLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX, SQRT +* .. +* .. Statement Functions .. + DOUBLE PRECISION ABS1 +* .. +* .. Statement Function definitions .. + ABS1( X ) = ABS( DBLE( X ) ) + ABS( DIMAG( X ) ) +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVL, 'N' ) ) THEN + IJOBVL = 1 + ILVL = .FALSE. + ELSE IF( LSAME( JOBVL, 'V' ) ) THEN + IJOBVL = 2 + ILVL = .TRUE. + ELSE + IJOBVL = -1 + ILVL = .FALSE. + END IF +* + IF( LSAME( JOBVR, 'N' ) ) THEN + IJOBVR = 1 + ILVR = .FALSE. + ELSE IF( LSAME( JOBVR, 'V' ) ) THEN + IJOBVR = 2 + ILVR = .TRUE. + ELSE + IJOBVR = -1 + ILVR = .FALSE. + END IF + ILV = ILVL .OR. ILVR +* + WANTSN = LSAME( SENSE, 'N' ) + WANTSE = LSAME( SENSE, 'E' ) + WANTSV = LSAME( SENSE, 'V' ) + WANTSB = LSAME( SENSE, 'B' ) +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.( LSAME( BALANC, 'N' ) .OR. LSAME( BALANC, + $ 'S' ) .OR. LSAME( BALANC, 'P' ) .OR. LSAME( BALANC, 'B' ) ) ) + $ THEN + INFO = -1 + ELSE IF( IJOBVL.LE.0 ) THEN + INFO = -2 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -3 + ELSE IF( .NOT.( WANTSN .OR. WANTSE .OR. WANTSB .OR. WANTSV ) ) + $ THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDVL.LT.1 .OR. ( ILVL .AND. LDVL.LT.N ) ) THEN + INFO = -13 + ELSE IF( LDVR.LT.1 .OR. ( ILVR .AND. LDVR.LT.N ) ) THEN + INFO = -15 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV. The workspace is +* computed assuming ILO = 1 and IHI = N, the worst case.) +* + MINWRK = 1 + IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN + MAXWRK = N + N*ILAENV( 1, 'ZGEQRF', ' ', N, 1, N, 0 ) + IF( WANTSE ) THEN + MINWRK = MAX( 1, 2*N ) + ELSE IF( WANTSV .OR. WANTSB ) THEN + MINWRK = 2*N*N + 2*N + MAXWRK = MAX( MAXWRK, 2*N*N+2*N ) + END IF + WORK( 1 ) = MAXWRK + END IF +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -25 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGGEVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = ZLANGE( 'M', N, N, A, LDA, RWORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF + IF( ILASCL ) + $ CALL ZLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR ) +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = ZLANGE( 'M', N, N, B, LDB, RWORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF + IF( ILBSCL ) + $ CALL ZLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR ) +* +* Permute and/or balance the matrix pair (A,B) +* (Real Workspace: need 6*N) +* + CALL ZGGBAL( BALANC, N, A, LDA, B, LDB, ILO, IHI, LSCALE, RSCALE, + $ RWORK, IERR ) +* +* Compute ABNRM and BBNRM +* + ABNRM = ZLANGE( '1', N, N, A, LDA, RWORK( 1 ) ) + IF( ILASCL ) THEN + RWORK( 1 ) = ABNRM + CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, 1, 1, RWORK( 1 ), 1, + $ IERR ) + ABNRM = RWORK( 1 ) + END IF +* + BBNRM = ZLANGE( '1', N, N, B, LDB, RWORK( 1 ) ) + IF( ILBSCL ) THEN + RWORK( 1 ) = BBNRM + CALL DLASCL( 'G', 0, 0, BNRMTO, BNRM, 1, 1, RWORK( 1 ), 1, + $ IERR ) + BBNRM = RWORK( 1 ) + END IF +* +* Reduce B to triangular form (QR decomposition of B) +* (Complex Workspace: need N, prefer N*NB ) +* + IROWS = IHI + 1 - ILO + IF( ILV .OR. .NOT.WANTSN ) THEN + ICOLS = N + 1 - ILO + ELSE + ICOLS = IROWS + END IF + ITAU = 1 + IWRK = ITAU + IROWS + CALL ZGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Apply the unitary transformation to A +* (Complex Workspace: need N, prefer N*NB) +* + CALL ZUNMQR( 'L', 'C', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ), + $ LWORK+1-IWRK, IERR ) +* +* Initialize VL and/or VR +* (Workspace: need N, prefer N*NB) +* + IF( ILVL ) THEN + CALL ZLASET( 'Full', N, N, CZERO, CONE, VL, LDVL ) + CALL ZLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VL( ILO+1, ILO ), LDVL ) + CALL ZUNGQR( IROWS, IROWS, IROWS, VL( ILO, ILO ), LDVL, + $ WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* + IF( ILVR ) + $ CALL ZLASET( 'Full', N, N, CZERO, CONE, VR, LDVR ) +* +* Reduce to generalized Hessenberg form +* (Workspace: none needed) +* + IF( ILV .OR. .NOT.WANTSN ) THEN +* +* Eigenvectors requested -- work on whole matrix. +* + CALL ZGGHRD( JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, IERR ) + ELSE + CALL ZGGHRD( 'N', 'N', IROWS, 1, IROWS, A( ILO, ILO ), LDA, + $ B( ILO, ILO ), LDB, VL, LDVL, VR, LDVR, IERR ) + END IF +* +* Perform QZ algorithm (Compute eigenvalues, and optionally, the +* Schur forms and Schur vectors) +* (Complex Workspace: need N) +* (Real Workspace: need N) +* + IWRK = ITAU + IF( ILV .OR. .NOT.WANTSN ) THEN + CHTEMP = 'S' + ELSE + CHTEMP = 'E' + END IF +* + CALL ZHGEQZ( CHTEMP, JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHA, BETA, VL, LDVL, VR, LDVR, WORK( IWRK ), + $ LWORK+1-IWRK, RWORK, IERR ) + IF( IERR.NE.0 ) THEN + IF( IERR.GT.0 .AND. IERR.LE.N ) THEN + INFO = IERR + ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN + INFO = IERR - N + ELSE + INFO = N + 1 + END IF + GO TO 90 + END IF +* +* Compute Eigenvectors and estimate condition numbers if desired +* ZTGEVC: (Complex Workspace: need 2*N ) +* (Real Workspace: need 2*N ) +* ZTGSNA: (Complex Workspace: need 2*N*N if SENSE='V' or 'B') +* (Integer Workspace: need N+2 ) +* + IF( ILV .OR. .NOT.WANTSN ) THEN + IF( ILV ) THEN + IF( ILVL ) THEN + IF( ILVR ) THEN + CHTEMP = 'B' + ELSE + CHTEMP = 'L' + END IF + ELSE + CHTEMP = 'R' + END IF +* + CALL ZTGEVC( CHTEMP, 'B', LDUMMA, N, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, N, IN, WORK( IWRK ), RWORK, + $ IERR ) + IF( IERR.NE.0 ) THEN + INFO = N + 2 + GO TO 90 + END IF + END IF +* + IF( .NOT.WANTSN ) THEN +* +* compute eigenvectors (DTGEVC) and estimate condition +* numbers (DTGSNA). Note that the definition of the condition +* number is not invariant under transformation (u,v) to +* (Q*u, Z*v), where (u,v) are eigenvectors of the generalized +* Schur form (S,T), Q and Z are orthogonal matrices. In order +* to avoid using extra 2*N*N workspace, we have to +* re-calculate eigenvectors and estimate the condition numbers +* one at a time. +* + DO 20 I = 1, N +* + DO 10 J = 1, N + BWORK( J ) = .FALSE. + 10 CONTINUE + BWORK( I ) = .TRUE. +* + IWRK = N + 1 + IWRK1 = IWRK + N +* + IF( WANTSE .OR. WANTSB ) THEN + CALL ZTGEVC( 'B', 'S', BWORK, N, A, LDA, B, LDB, + $ WORK( 1 ), N, WORK( IWRK ), N, 1, M, + $ WORK( IWRK1 ), RWORK, IERR ) + IF( IERR.NE.0 ) THEN + INFO = N + 2 + GO TO 90 + END IF + END IF +* + CALL ZTGSNA( SENSE, 'S', BWORK, N, A, LDA, B, LDB, + $ WORK( 1 ), N, WORK( IWRK ), N, RCONDE( I ), + $ RCONDV( I ), 1, M, WORK( IWRK1 ), + $ LWORK-IWRK1+1, IWORK, IERR ) +* + 20 CONTINUE + END IF + END IF +* +* Undo balancing on VL and VR and normalization +* (Workspace: none needed) +* + IF( ILVL ) THEN + CALL ZGGBAK( BALANC, 'L', N, ILO, IHI, LSCALE, RSCALE, N, VL, + $ LDVL, IERR ) +* + DO 50 JC = 1, N + TEMP = ZERO + DO 30 JR = 1, N + TEMP = MAX( TEMP, ABS1( VL( JR, JC ) ) ) + 30 CONTINUE + IF( TEMP.LT.SMLNUM ) + $ GO TO 50 + TEMP = ONE / TEMP + DO 40 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + 40 CONTINUE + 50 CONTINUE + END IF +* + IF( ILVR ) THEN + CALL ZGGBAK( BALANC, 'R', N, ILO, IHI, LSCALE, RSCALE, N, VR, + $ LDVR, IERR ) + DO 80 JC = 1, N + TEMP = ZERO + DO 60 JR = 1, N + TEMP = MAX( TEMP, ABS1( VR( JR, JC ) ) ) + 60 CONTINUE + IF( TEMP.LT.SMLNUM ) + $ GO TO 80 + TEMP = ONE / TEMP + DO 70 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + 70 CONTINUE + 80 CONTINUE + END IF +* +* Undo scaling if necessary +* + IF( ILASCL ) + $ CALL ZLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHA, N, IERR ) +* + IF( ILBSCL ) + $ CALL ZLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) +* + 90 CONTINUE + WORK( 1 ) = MAXWRK +* + RETURN +* +* End of ZGGEVX +* + END diff --git a/costa/native/external/lapack/zggglm.f b/costa/native/external/lapack/zggglm.f new file mode 100644 index 000000000..42b91c16a --- /dev/null +++ b/costa/native/external/lapack/zggglm.f @@ -0,0 +1,213 @@ + SUBROUTINE ZGGGLM( N, M, P, A, LDA, B, LDB, D, X, Y, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), D( * ), WORK( * ), + $ X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* ZGGGLM solves a general Gauss-Markov linear model (GLM) problem: +* +* minimize || y ||_2 subject to d = A*x + B*y +* x +* +* where A is an N-by-M matrix, B is an N-by-P matrix, and d is a +* given N-vector. It is assumed that M <= N <= M+P, and +* +* rank(A) = M and rank( A B ) = N. +* +* Under these assumptions, the constrained equation is always +* consistent, and there is a unique solution x and a minimal 2-norm +* solution y, which is obtained using a generalized QR factorization +* of A and B. +* +* In particular, if matrix B is square nonsingular, then the problem +* GLM is equivalent to the following weighted linear least squares +* problem +* +* minimize || inv(B)*(d-A*x) ||_2 +* x +* +* where inv(B) denotes the inverse of B. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of rows of the matrices A and B. N >= 0. +* +* M (input) INTEGER +* The number of columns of the matrix A. 0 <= M <= N. +* +* P (input) INTEGER +* The number of columns of the matrix B. P >= N-M. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,M) +* On entry, the N-by-M matrix A. +* On exit, A is destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) COMPLEX*16 array, dimension (LDB,P) +* On entry, the N-by-P matrix B. +* On exit, B is destroyed. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* D (input/output) COMPLEX*16 array, dimension (N) +* On entry, D is the left hand side of the GLM equation. +* On exit, D is destroyed. +* +* X (output) COMPLEX*16 array, dimension (M) +* Y (output) COMPLEX*16 array, dimension (P) +* On exit, X and Y are the solutions of the GLM problem. +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N+M+P). +* For optimum performance, LWORK >= M+min(N,P)+max(N,P)*NB, +* where NB is an upper bound for the optimal blocksizes for +* ZGEQRF, CGERQF, ZUNMQR and CUNMRQ. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* =================================================================== +* +* .. Parameters .. + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, LOPT, LWKOPT, NB, NB1, NB2, NB3, NB4, NP +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZCOPY, ZGEMV, ZGGQRF, ZTRSV, ZUNMQR, + $ ZUNMRQ +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NP = MIN( N, P ) + NB1 = ILAENV( 1, 'ZGEQRF', ' ', N, M, -1, -1 ) + NB2 = ILAENV( 1, 'ZGERQF', ' ', N, M, -1, -1 ) + NB3 = ILAENV( 1, 'ZUNMQR', ' ', N, M, P, -1 ) + NB4 = ILAENV( 1, 'ZUNMRQ', ' ', N, M, P, -1 ) + NB = MAX( NB1, NB2, NB3, NB4 ) + LWKOPT = M + NP + MAX( N, P )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( M.LT.0 .OR. M.GT.N ) THEN + INFO = -2 + ELSE IF( P.LT.0 .OR. P.LT.N-M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LWORK.LT.MAX( 1, N+M+P ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGGGLM', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Compute the GQR factorization of matrices A and B: +* +* Q'*A = ( R11 ) M, Q'*B*Z' = ( T11 T12 ) M +* ( 0 ) N-M ( 0 T22 ) N-M +* M M+P-N N-M +* +* where R11 and T22 are upper triangular, and Q and Z are +* unitary. +* + CALL ZGGQRF( N, M, P, A, LDA, WORK, B, LDB, WORK( M+1 ), + $ WORK( M+NP+1 ), LWORK-M-NP, INFO ) + LOPT = WORK( M+NP+1 ) +* +* Update left-hand-side vector d = Q'*d = ( d1 ) M +* ( d2 ) N-M +* + CALL ZUNMQR( 'Left', 'Conjugate transpose', N, 1, M, A, LDA, WORK, + $ D, MAX( 1, N ), WORK( M+NP+1 ), LWORK-M-NP, INFO ) + LOPT = MAX( LOPT, INT( WORK( M+NP+1 ) ) ) +* +* Solve T22*y2 = d2 for y2 +* + CALL ZTRSV( 'Upper', 'No transpose', 'Non unit', N-M, + $ B( M+1, M+P-N+1 ), LDB, D( M+1 ), 1 ) + CALL ZCOPY( N-M, D( M+1 ), 1, Y( M+P-N+1 ), 1 ) +* +* Set y1 = 0 +* + DO 10 I = 1, M + P - N + Y( I ) = CZERO + 10 CONTINUE +* +* Update d1 = d1 - T12*y2 +* + CALL ZGEMV( 'No transpose', M, N-M, -CONE, B( 1, M+P-N+1 ), LDB, + $ Y( M+P-N+1 ), 1, CONE, D, 1 ) +* +* Solve triangular system: R11*x = d1 +* + CALL ZTRSV( 'Upper', 'No Transpose', 'Non unit', M, A, LDA, D, 1 ) +* +* Copy D to X +* + CALL ZCOPY( M, D, 1, X, 1 ) +* +* Backward transformation y = Z'*y +* + CALL ZUNMRQ( 'Left', 'Conjugate transpose', P, 1, NP, + $ B( MAX( 1, N-P+1 ), 1 ), LDB, WORK( M+1 ), Y, + $ MAX( 1, P ), WORK( M+NP+1 ), LWORK-M-NP, INFO ) + WORK( 1 ) = M + NP + MAX( LOPT, INT( WORK( M+NP+1 ) ) ) +* + RETURN +* +* End of ZGGGLM +* + END diff --git a/costa/native/external/lapack/zgghrd.f b/costa/native/external/lapack/zgghrd.f new file mode 100644 index 000000000..fdaf5187e --- /dev/null +++ b/costa/native/external/lapack/zgghrd.f @@ -0,0 +1,256 @@ + SUBROUTINE ZGGHRD( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q, + $ LDQ, Z, LDZ, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER COMPQ, COMPZ + INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* ZGGHRD reduces a pair of complex matrices (A,B) to generalized upper +* Hessenberg form using unitary transformations, where A is a +* general matrix and B is upper triangular: Q' * A * Z = H and +* Q' * B * Z = T, where H is upper Hessenberg, T is upper triangular, +* and Q and Z are unitary, and ' means conjugate transpose. +* +* The unitary matrices Q and Z are determined as products of Givens +* rotations. They may either be formed explicitly, or they may be +* postmultiplied into input matrices Q1 and Z1, so that +* +* Q1 * A * Z1' = (Q1*Q) * H * (Z1*Z)' +* Q1 * B * Z1' = (Q1*Q) * T * (Z1*Z)' +* +* Arguments +* ========= +* +* COMPQ (input) CHARACTER*1 +* = 'N': do not compute Q; +* = 'I': Q is initialized to the unit matrix, and the +* unitary matrix Q is returned; +* = 'V': Q must contain a unitary matrix Q1 on entry, +* and the product Q1*Q is returned. +* +* COMPZ (input) CHARACTER*1 +* = 'N': do not compute Q; +* = 'I': Q is initialized to the unit matrix, and the +* unitary matrix Q is returned; +* = 'V': Q must contain a unitary matrix Q1 on entry, +* and the product Q1*Q is returned. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* It is assumed that A is already upper triangular in rows and +* columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally set +* by a previous call to ZGGBAL; otherwise they should be set +* to 1 and N respectively. +* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA, N) +* On entry, the N-by-N general matrix to be reduced. +* On exit, the upper triangle and the first subdiagonal of A +* are overwritten with the upper Hessenberg matrix H, and the +* rest is set to zero. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) COMPLEX*16 array, dimension (LDB, N) +* On entry, the N-by-N upper triangular matrix B. +* On exit, the upper triangular matrix T = Q' B Z. The +* elements below the diagonal are set to zero. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* Q (input/output) COMPLEX*16 array, dimension (LDQ, N) +* If COMPQ='N': Q is not referenced. +* If COMPQ='I': on entry, Q need not be set, and on exit it +* contains the unitary matrix Q, where Q' +* is the product of the Givens transformations +* which are applied to A and B on the left. +* If COMPQ='V': on entry, Q must contain a unitary matrix +* Q1, and on exit this is overwritten by Q1*Q. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. +* LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise. +* +* Z (input/output) COMPLEX*16 array, dimension (LDZ, N) +* If COMPZ='N': Z is not referenced. +* If COMPZ='I': on entry, Z need not be set, and on exit it +* contains the unitary matrix Z, which is +* the product of the Givens transformations +* which are applied to A and B on the right. +* If COMPZ='V': on entry, Z must contain a unitary matrix +* Z1, and on exit this is overwritten by Z1*Z. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. +* LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* This routine reduces A to Hessenberg and B to triangular form by +* an unblocked reduction, as described in _Matrix_Computations_, +* by Golub and van Loan (Johns Hopkins Press). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CONE, CZERO + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ), + $ CZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ILQ, ILZ + INTEGER ICOMPQ, ICOMPZ, JCOL, JROW + DOUBLE PRECISION C + COMPLEX*16 CTEMP, S +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARTG, ZLASET, ZROT +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX +* .. +* .. Executable Statements .. +* +* Decode COMPQ +* + IF( LSAME( COMPQ, 'N' ) ) THEN + ILQ = .FALSE. + ICOMPQ = 1 + ELSE IF( LSAME( COMPQ, 'V' ) ) THEN + ILQ = .TRUE. + ICOMPQ = 2 + ELSE IF( LSAME( COMPQ, 'I' ) ) THEN + ILQ = .TRUE. + ICOMPQ = 3 + ELSE + ICOMPQ = 0 + END IF +* +* Decode COMPZ +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ILZ = .FALSE. + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ILZ = .TRUE. + ICOMPZ = 2 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ILZ = .TRUE. + ICOMPZ = 3 + ELSE + ICOMPZ = 0 + END IF +* +* Test the input parameters. +* + INFO = 0 + IF( ICOMPQ.LE.0 ) THEN + INFO = -1 + ELSE IF( ICOMPZ.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 ) THEN + INFO = -4 + ELSE IF( IHI.GT.N .OR. IHI.LT.ILO-1 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( ( ILQ .AND. LDQ.LT.N ) .OR. LDQ.LT.1 ) THEN + INFO = -11 + ELSE IF( ( ILZ .AND. LDZ.LT.N ) .OR. LDZ.LT.1 ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGGHRD', -INFO ) + RETURN + END IF +* +* Initialize Q and Z if desired. +* + IF( ICOMPQ.EQ.3 ) + $ CALL ZLASET( 'Full', N, N, CZERO, CONE, Q, LDQ ) + IF( ICOMPZ.EQ.3 ) + $ CALL ZLASET( 'Full', N, N, CZERO, CONE, Z, LDZ ) +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* +* Zero out lower triangle of B +* + DO 20 JCOL = 1, N - 1 + DO 10 JROW = JCOL + 1, N + B( JROW, JCOL ) = CZERO + 10 CONTINUE + 20 CONTINUE +* +* Reduce A and B +* + DO 40 JCOL = ILO, IHI - 2 +* + DO 30 JROW = IHI, JCOL + 2, -1 +* +* Step 1: rotate rows JROW-1, JROW to kill A(JROW,JCOL) +* + CTEMP = A( JROW-1, JCOL ) + CALL ZLARTG( CTEMP, A( JROW, JCOL ), C, S, + $ A( JROW-1, JCOL ) ) + A( JROW, JCOL ) = CZERO + CALL ZROT( N-JCOL, A( JROW-1, JCOL+1 ), LDA, + $ A( JROW, JCOL+1 ), LDA, C, S ) + CALL ZROT( N+2-JROW, B( JROW-1, JROW-1 ), LDB, + $ B( JROW, JROW-1 ), LDB, C, S ) + IF( ILQ ) + $ CALL ZROT( N, Q( 1, JROW-1 ), 1, Q( 1, JROW ), 1, C, + $ DCONJG( S ) ) +* +* Step 2: rotate columns JROW, JROW-1 to kill B(JROW,JROW-1) +* + CTEMP = B( JROW, JROW ) + CALL ZLARTG( CTEMP, B( JROW, JROW-1 ), C, S, + $ B( JROW, JROW ) ) + B( JROW, JROW-1 ) = CZERO + CALL ZROT( IHI, A( 1, JROW ), 1, A( 1, JROW-1 ), 1, C, S ) + CALL ZROT( JROW-1, B( 1, JROW ), 1, B( 1, JROW-1 ), 1, C, + $ S ) + IF( ILZ ) + $ CALL ZROT( N, Z( 1, JROW ), 1, Z( 1, JROW-1 ), 1, C, S ) + 30 CONTINUE + 40 CONTINUE +* + RETURN +* +* End of ZGGHRD +* + END diff --git a/costa/native/external/lapack/zgglse.f b/costa/native/external/lapack/zgglse.f new file mode 100644 index 000000000..b8787a80f --- /dev/null +++ b/costa/native/external/lapack/zgglse.f @@ -0,0 +1,218 @@ + SUBROUTINE ZGGLSE( M, N, P, A, LDA, B, LDB, C, D, X, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), C( * ), D( * ), + $ WORK( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* ZGGLSE solves the linear equality-constrained least squares (LSE) +* problem: +* +* minimize || c - A*x ||_2 subject to B*x = d +* +* where A is an M-by-N matrix, B is a P-by-N matrix, c is a given +* M-vector, and d is a given P-vector. It is assumed that +* P <= N <= M+P, and +* +* rank(B) = P and rank( ( A ) ) = N. +* ( ( B ) ) +* +* These conditions ensure that the LSE problem has a unique solution, +* which is obtained using a GRQ factorization of the matrices B and A. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrices A and B. N >= 0. +* +* P (input) INTEGER +* The number of rows of the matrix B. 0 <= P <= N <= M+P. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, A is destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) COMPLEX*16 array, dimension (LDB,N) +* On entry, the P-by-N matrix B. +* On exit, B is destroyed. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,P). +* +* C (input/output) COMPLEX*16 array, dimension (M) +* On entry, C contains the right hand side vector for the +* least squares part of the LSE problem. +* On exit, the residual sum of squares for the solution +* is given by the sum of squares of elements N-P+1 to M of +* vector C. +* +* D (input/output) COMPLEX*16 array, dimension (P) +* On entry, D contains the right hand side vector for the +* constrained equation. +* On exit, D is destroyed. +* +* X (output) COMPLEX*16 array, dimension (N) +* On exit, X is the solution of the LSE problem. +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,M+N+P). +* For optimum performance LWORK >= P+min(M,N)+max(M,N)*NB, +* where NB is an upper bound for the optimal blocksizes for +* ZGEQRF, CGERQF, ZUNMQR and CUNMRQ. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LOPT, LWKOPT, MN, NB, NB1, NB2, NB3, NB4, NR +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZCOPY, ZGEMV, ZGGRQF, ZTRMV, + $ ZTRSV, ZUNMQR, ZUNMRQ +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + MN = MIN( M, N ) + NB1 = ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, -1 ) + NB2 = ILAENV( 1, 'ZGERQF', ' ', M, N, -1, -1 ) + NB3 = ILAENV( 1, 'ZUNMQR', ' ', M, N, P, -1 ) + NB4 = ILAENV( 1, 'ZUNMRQ', ' ', M, N, P, -1 ) + NB = MAX( NB1, NB2, NB3, NB4 ) + LWKOPT = P + MN + MAX( M, N )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( P.LT.0 .OR. P.GT.N .OR. P.LT.N-M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -7 + ELSE IF( LWORK.LT.MAX( 1, M+N+P ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGGLSE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Compute the GRQ factorization of matrices B and A: +* +* B*Q' = ( 0 T12 ) P Z'*A*Q' = ( R11 R12 ) N-P +* N-P P ( 0 R22 ) M+P-N +* N-P P +* +* where T12 and R11 are upper triangular, and Q and Z are +* unitary. +* + CALL ZGGRQF( P, M, N, B, LDB, WORK, A, LDA, WORK( P+1 ), + $ WORK( P+MN+1 ), LWORK-P-MN, INFO ) + LOPT = WORK( P+MN+1 ) +* +* Update c = Z'*c = ( c1 ) N-P +* ( c2 ) M+P-N +* + CALL ZUNMQR( 'Left', 'Conjugate Transpose', M, 1, MN, A, LDA, + $ WORK( P+1 ), C, MAX( 1, M ), WORK( P+MN+1 ), + $ LWORK-P-MN, INFO ) + LOPT = MAX( LOPT, INT( WORK( P+MN+1 ) ) ) +* +* Solve T12*x2 = d for x2 +* + CALL ZTRSV( 'Upper', 'No transpose', 'Non unit', P, B( 1, N-P+1 ), + $ LDB, D, 1 ) +* +* Update c1 +* + CALL ZGEMV( 'No transpose', N-P, P, -CONE, A( 1, N-P+1 ), LDA, D, + $ 1, CONE, C, 1 ) +* +* Sovle R11*x1 = c1 for x1 +* + CALL ZTRSV( 'Upper', 'No transpose', 'Non unit', N-P, A, LDA, C, + $ 1 ) +* +* Put the solutions in X +* + CALL ZCOPY( N-P, C, 1, X, 1 ) + CALL ZCOPY( P, D, 1, X( N-P+1 ), 1 ) +* +* Compute the residual vector: +* + IF( M.LT.N ) THEN + NR = M + P - N + CALL ZGEMV( 'No transpose', NR, N-M, -CONE, A( N-P+1, M+1 ), + $ LDA, D( NR+1 ), 1, CONE, C( N-P+1 ), 1 ) + ELSE + NR = P + END IF + CALL ZTRMV( 'Upper', 'No transpose', 'Non unit', NR, + $ A( N-P+1, N-P+1 ), LDA, D, 1 ) + CALL ZAXPY( NR, -CONE, D, 1, C( N-P+1 ), 1 ) +* +* Backward transformation x = Q'*x +* + CALL ZUNMRQ( 'Left', 'Conjugate Transpose', N, 1, P, B, LDB, + $ WORK( 1 ), X, N, WORK( P+MN+1 ), LWORK-P-MN, INFO ) + WORK( 1 ) = P + MN + MAX( LOPT, INT( WORK( P+MN+1 ) ) ) +* + RETURN +* +* End of ZGGLSE +* + END diff --git a/costa/native/external/lapack/zggqrf.f b/costa/native/external/lapack/zggqrf.f new file mode 100644 index 000000000..43bfdb7e7 --- /dev/null +++ b/costa/native/external/lapack/zggqrf.f @@ -0,0 +1,212 @@ + SUBROUTINE ZGGQRF( N, M, P, A, LDA, TAUA, B, LDB, TAUB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZGGQRF computes a generalized QR factorization of an N-by-M matrix A +* and an N-by-P matrix B: +* +* A = Q*R, B = Q*T*Z, +* +* where Q is an N-by-N unitary matrix, Z is a P-by-P unitary matrix, +* and R and T assume one of the forms: +* +* if N >= M, R = ( R11 ) M , or if N < M, R = ( R11 R12 ) N, +* ( 0 ) N-M N M-N +* M +* +* where R11 is upper triangular, and +* +* if N <= P, T = ( 0 T12 ) N, or if N > P, T = ( T11 ) N-P, +* P-N N ( T21 ) P +* P +* +* where T12 or T21 is upper triangular. +* +* In particular, if B is square and nonsingular, the GQR factorization +* of A and B implicitly gives the QR factorization of inv(B)*A: +* +* inv(B)*A = Z'*(inv(T)*R) +* +* where inv(B) denotes the inverse of the matrix B, and Z' denotes the +* conjugate transpose of matrix Z. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of rows of the matrices A and B. N >= 0. +* +* M (input) INTEGER +* The number of columns of the matrix A. M >= 0. +* +* P (input) INTEGER +* The number of columns of the matrix B. P >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,M) +* On entry, the N-by-M matrix A. +* On exit, the elements on and above the diagonal of the array +* contain the min(N,M)-by-M upper trapezoidal matrix R (R is +* upper triangular if N >= M); the elements below the diagonal, +* with the array TAUA, represent the unitary matrix Q as a +* product of min(N,M) elementary reflectors (see Further +* Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* TAUA (output) COMPLEX*16 array, dimension (min(N,M)) +* The scalar factors of the elementary reflectors which +* represent the unitary matrix Q (see Further Details). +* +* B (input/output) COMPLEX*16 array, dimension (LDB,P) +* On entry, the N-by-P matrix B. +* On exit, if N <= P, the upper triangle of the subarray +* B(1:N,P-N+1:P) contains the N-by-N upper triangular matrix T; +* if N > P, the elements on and above the (N-P)-th subdiagonal +* contain the N-by-P upper trapezoidal matrix T; the remaining +* elements, with the array TAUB, represent the unitary +* matrix Z as a product of elementary reflectors (see Further +* Details). +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* TAUB (output) COMPLEX*16 array, dimension (min(N,P)) +* The scalar factors of the elementary reflectors which +* represent the unitary matrix Z (see Further Details). +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N,M,P). +* For optimum performance LWORK >= max(N,M,P)*max(NB1,NB2,NB3), +* where NB1 is the optimal blocksize for the QR factorization +* of an N-by-M matrix, NB2 is the optimal blocksize for the +* RQ factorization of an N-by-P matrix, and NB3 is the optimal +* blocksize for a call of ZUNMQR. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(1) H(2) . . . H(k), where k = min(n,m). +* +* Each H(i) has the form +* +* H(i) = I - taua * v * v' +* +* where taua is a complex scalar, and v is a complex vector with +* v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), +* and taua in TAUA(i). +* To form Q explicitly, use LAPACK subroutine ZUNGQR. +* To use Q to update another matrix, use LAPACK subroutine ZUNMQR. +* +* The matrix Z is represented as a product of elementary reflectors +* +* Z = H(1) H(2) . . . H(k), where k = min(n,p). +* +* Each H(i) has the form +* +* H(i) = I - taub * v * v' +* +* where taub is a complex scalar, and v is a complex vector with +* v(p-k+i+1:p) = 0 and v(p-k+i) = 1; v(1:p-k+i-1) is stored on exit in +* B(n-k+i,1:p-k+i-1), and taub in TAUB(i). +* To form Z explicitly, use LAPACK subroutine ZUNGRQ. +* To use Z to update another matrix, use LAPACK subroutine ZUNMRQ. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LOPT, LWKOPT, NB, NB1, NB2, NB3 +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEQRF, ZGERQF, ZUNMQR +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NB1 = ILAENV( 1, 'ZGEQRF', ' ', N, M, -1, -1 ) + NB2 = ILAENV( 1, 'ZGERQF', ' ', N, P, -1, -1 ) + NB3 = ILAENV( 1, 'ZUNMQR', ' ', N, M, P, -1 ) + NB = MAX( NB1, NB2, NB3 ) + LWKOPT = MAX( N, M, P )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( P.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, N, M, P ) .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGGQRF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* QR factorization of N-by-M matrix A: A = Q*R +* + CALL ZGEQRF( N, M, A, LDA, TAUA, WORK, LWORK, INFO ) + LOPT = WORK( 1 ) +* +* Update B := Q'*B. +* + CALL ZUNMQR( 'Left', 'Conjugate Transpose', N, P, MIN( N, M ), A, + $ LDA, TAUA, B, LDB, WORK, LWORK, INFO ) + LOPT = MAX( LOPT, INT( WORK( 1 ) ) ) +* +* RQ factorization of N-by-P matrix B: B = T*Z. +* + CALL ZGERQF( N, P, B, LDB, TAUB, WORK, LWORK, INFO ) + WORK( 1 ) = MAX( LOPT, INT( WORK( 1 ) ) ) +* + RETURN +* +* End of ZGGQRF +* + END diff --git a/costa/native/external/lapack/zggrqf.f b/costa/native/external/lapack/zggrqf.f new file mode 100644 index 000000000..8323a80f1 --- /dev/null +++ b/costa/native/external/lapack/zggrqf.f @@ -0,0 +1,212 @@ + SUBROUTINE ZGGRQF( M, P, N, A, LDA, TAUA, B, LDB, TAUB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZGGRQF computes a generalized RQ factorization of an M-by-N matrix A +* and a P-by-N matrix B: +* +* A = R*Q, B = Z*T*Q, +* +* where Q is an N-by-N unitary matrix, Z is a P-by-P unitary +* matrix, and R and T assume one of the forms: +* +* if M <= N, R = ( 0 R12 ) M, or if M > N, R = ( R11 ) M-N, +* N-M M ( R21 ) N +* N +* +* where R12 or R21 is upper triangular, and +* +* if P >= N, T = ( T11 ) N , or if P < N, T = ( T11 T12 ) P, +* ( 0 ) P-N P N-P +* N +* +* where T11 is upper triangular. +* +* In particular, if B is square and nonsingular, the GRQ factorization +* of A and B implicitly gives the RQ factorization of A*inv(B): +* +* A*inv(B) = (R*inv(T))*Z' +* +* where inv(B) denotes the inverse of the matrix B, and Z' denotes the +* conjugate transpose of the matrix Z. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* P (input) INTEGER +* The number of rows of the matrix B. P >= 0. +* +* N (input) INTEGER +* The number of columns of the matrices A and B. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, if M <= N, the upper triangle of the subarray +* A(1:M,N-M+1:N) contains the M-by-M upper triangular matrix R; +* if M > N, the elements on and above the (M-N)-th subdiagonal +* contain the M-by-N upper trapezoidal matrix R; the remaining +* elements, with the array TAUA, represent the unitary +* matrix Q as a product of elementary reflectors (see Further +* Details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAUA (output) COMPLEX*16 array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors which +* represent the unitary matrix Q (see Further Details). +* +* B (input/output) COMPLEX*16 array, dimension (LDB,N) +* On entry, the P-by-N matrix B. +* On exit, the elements on and above the diagonal of the array +* contain the min(P,N)-by-N upper trapezoidal matrix T (T is +* upper triangular if P >= N); the elements below the diagonal, +* with the array TAUB, represent the unitary matrix Z as a +* product of elementary reflectors (see Further Details). +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,P). +* +* TAUB (output) COMPLEX*16 array, dimension (min(P,N)) +* The scalar factors of the elementary reflectors which +* represent the unitary matrix Z (see Further Details). +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N,M,P). +* For optimum performance LWORK >= max(N,M,P)*max(NB1,NB2,NB3), +* where NB1 is the optimal blocksize for the RQ factorization +* of an M-by-N matrix, NB2 is the optimal blocksize for the +* QR factorization of a P-by-N matrix, and NB3 is the optimal +* blocksize for a call of ZUNMRQ. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO=-i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of elementary reflectors +* +* Q = H(1) H(2) . . . H(k), where k = min(m,n). +* +* Each H(i) has the form +* +* H(i) = I - taua * v * v' +* +* where taua is a complex scalar, and v is a complex vector with +* v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in +* A(m-k+i,1:n-k+i-1), and taua in TAUA(i). +* To form Q explicitly, use LAPACK subroutine ZUNGRQ. +* To use Q to update another matrix, use LAPACK subroutine ZUNMRQ. +* +* The matrix Z is represented as a product of elementary reflectors +* +* Z = H(1) H(2) . . . H(k), where k = min(p,n). +* +* Each H(i) has the form +* +* H(i) = I - taub * v * v' +* +* where taub is a complex scalar, and v is a complex vector with +* v(1:i-1) = 0 and v(i) = 1; v(i+1:p) is stored on exit in B(i+1:p,i), +* and taub in TAUB(i). +* To form Z explicitly, use LAPACK subroutine ZUNGQR. +* To use Z to update another matrix, use LAPACK subroutine ZUNMQR. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LOPT, LWKOPT, NB, NB1, NB2, NB3 +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEQRF, ZGERQF, ZUNMRQ +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NB1 = ILAENV( 1, 'ZGERQF', ' ', M, N, -1, -1 ) + NB2 = ILAENV( 1, 'ZGEQRF', ' ', P, N, -1, -1 ) + NB3 = ILAENV( 1, 'ZUNMRQ', ' ', M, N, P, -1 ) + NB = MAX( NB1, NB2, NB3 ) + LWKOPT = MAX( N, M, P )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( P.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, M, P, N ) .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGGRQF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* RQ factorization of M-by-N matrix A: A = R*Q +* + CALL ZGERQF( M, N, A, LDA, TAUA, WORK, LWORK, INFO ) + LOPT = WORK( 1 ) +* +* Update B := B*Q' +* + CALL ZUNMRQ( 'Right', 'Conjugate Transpose', P, N, MIN( M, N ), + $ A( MAX( 1, M-N+1 ), 1 ), LDA, TAUA, B, LDB, WORK, + $ LWORK, INFO ) + LOPT = MAX( LOPT, INT( WORK( 1 ) ) ) +* +* QR factorization of P-by-N matrix B: B = Z*T +* + CALL ZGEQRF( P, N, B, LDB, TAUB, WORK, LWORK, INFO ) + WORK( 1 ) = MAX( LOPT, INT( WORK( 1 ) ) ) +* + RETURN +* +* End of ZGGRQF +* + END diff --git a/costa/native/external/lapack/zggsvd.f b/costa/native/external/lapack/zggsvd.f new file mode 100644 index 000000000..41a0368ad --- /dev/null +++ b/costa/native/external/lapack/zggsvd.f @@ -0,0 +1,334 @@ + SUBROUTINE ZGGSVD( JOBU, JOBV, JOBQ, M, N, P, K, L, A, LDA, B, + $ LDB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, WORK, + $ RWORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBQ, JOBU, JOBV + INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION ALPHA( * ), BETA( * ), RWORK( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ U( LDU, * ), V( LDV, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZGGSVD computes the generalized singular value decomposition (GSVD) +* of an M-by-N complex matrix A and P-by-N complex matrix B: +* +* U'*A*Q = D1*( 0 R ), V'*B*Q = D2*( 0 R ) +* +* where U, V and Q are unitary matrices, and Z' means the conjugate +* transpose of Z. Let K+L = the effective numerical rank of the +* matrix (A',B')', then R is a (K+L)-by-(K+L) nonsingular upper +* triangular matrix, D1 and D2 are M-by-(K+L) and P-by-(K+L) "diagonal" +* matrices and of the following structures, respectively: +* +* If M-K-L >= 0, +* +* K L +* D1 = K ( I 0 ) +* L ( 0 C ) +* M-K-L ( 0 0 ) +* +* K L +* D2 = L ( 0 S ) +* P-L ( 0 0 ) +* +* N-K-L K L +* ( 0 R ) = K ( 0 R11 R12 ) +* L ( 0 0 R22 ) +* where +* +* C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), +* S = diag( BETA(K+1), ... , BETA(K+L) ), +* C**2 + S**2 = I. +* +* R is stored in A(1:K+L,N-K-L+1:N) on exit. +* +* If M-K-L < 0, +* +* K M-K K+L-M +* D1 = K ( I 0 0 ) +* M-K ( 0 C 0 ) +* +* K M-K K+L-M +* D2 = M-K ( 0 S 0 ) +* K+L-M ( 0 0 I ) +* P-L ( 0 0 0 ) +* +* N-K-L K M-K K+L-M +* ( 0 R ) = K ( 0 R11 R12 R13 ) +* M-K ( 0 0 R22 R23 ) +* K+L-M ( 0 0 0 R33 ) +* +* where +* +* C = diag( ALPHA(K+1), ... , ALPHA(M) ), +* S = diag( BETA(K+1), ... , BETA(M) ), +* C**2 + S**2 = I. +* +* (R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N), and R33 is stored +* ( 0 R22 R23 ) +* in B(M-K+1:L,N+M-K-L+1:N) on exit. +* +* The routine computes C, S, R, and optionally the unitary +* transformation matrices U, V and Q. +* +* In particular, if B is an N-by-N nonsingular matrix, then the GSVD of +* A and B implicitly gives the SVD of A*inv(B): +* A*inv(B) = U*(D1*inv(D2))*V'. +* If ( A',B')' has orthnormal columns, then the GSVD of A and B is also +* equal to the CS decomposition of A and B. Furthermore, the GSVD can +* be used to derive the solution of the eigenvalue problem: +* A'*A x = lambda* B'*B x. +* In some literature, the GSVD of A and B is presented in the form +* U'*A*X = ( 0 D1 ), V'*B*X = ( 0 D2 ) +* where U and V are orthogonal and X is nonsingular, and D1 and D2 are +* ``diagonal''. The former GSVD form can be converted to the latter +* form by taking the nonsingular matrix X as +* +* X = Q*( I 0 ) +* ( 0 inv(R) ) +* +* Arguments +* ========= +* +* JOBU (input) CHARACTER*1 +* = 'U': Unitary matrix U is computed; +* = 'N': U is not computed. +* +* JOBV (input) CHARACTER*1 +* = 'V': Unitary matrix V is computed; +* = 'N': V is not computed. +* +* JOBQ (input) CHARACTER*1 +* = 'Q': Unitary matrix Q is computed; +* = 'N': Q is not computed. +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrices A and B. N >= 0. +* +* P (input) INTEGER +* The number of rows of the matrix B. P >= 0. +* +* K (output) INTEGER +* L (output) INTEGER +* On exit, K and L specify the dimension of the subblocks +* described in Purpose. +* K + L = effective numerical rank of (A',B')'. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, A contains the triangular matrix R, or part of R. +* See Purpose for details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) COMPLEX*16 array, dimension (LDB,N) +* On entry, the P-by-N matrix B. +* On exit, B contains part of the triangular matrix R if +* M-K-L < 0. See Purpose for details. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,P). +* +* ALPHA (output) DOUBLE PRECISION array, dimension (N) +* BETA (output) DOUBLE PRECISION array, dimension (N) +* On exit, ALPHA and BETA contain the generalized singular +* value pairs of A and B; +* ALPHA(1:K) = 1, +* BETA(1:K) = 0, +* and if M-K-L >= 0, +* ALPHA(K+1:K+L) = C, +* BETA(K+1:K+L) = S, +* or if M-K-L < 0, +* ALPHA(K+1:M)= C, ALPHA(M+1:K+L)= 0 +* BETA(K+1:M) = S, BETA(M+1:K+L) = 1 +* and +* ALPHA(K+L+1:N) = 0 +* BETA(K+L+1:N) = 0 +* +* U (output) COMPLEX*16 array, dimension (LDU,M) +* If JOBU = 'U', U contains the M-by-M unitary matrix U. +* If JOBU = 'N', U is not referenced. +* +* LDU (input) INTEGER +* The leading dimension of the array U. LDU >= max(1,M) if +* JOBU = 'U'; LDU >= 1 otherwise. +* +* V (output) COMPLEX*16 array, dimension (LDV,P) +* If JOBV = 'V', V contains the P-by-P unitary matrix V. +* If JOBV = 'N', V is not referenced. +* +* LDV (input) INTEGER +* The leading dimension of the array V. LDV >= max(1,P) if +* JOBV = 'V'; LDV >= 1 otherwise. +* +* Q (output) COMPLEX*16 array, dimension (LDQ,N) +* If JOBQ = 'Q', Q contains the N-by-N unitary matrix Q. +* If JOBQ = 'N', Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= max(1,N) if +* JOBQ = 'Q'; LDQ >= 1 otherwise. +* +* WORK (workspace) COMPLEX*16 array, dimension (max(3*N,M,P)+N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (2*N) +* +* IWORK (workspace/output) INTEGER array, dimension (N) +* On exit, IWORK stores the sorting information. More +* precisely, the following loop will sort ALPHA +* for I = K+1, min(M,K+L) +* swap ALPHA(I) and ALPHA(IWORK(I)) +* endfor +* such that ALPHA(1) >= ALPHA(2) >= ... >= ALPHA(N). +* +* INFO (output)INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = 1, the Jacobi-type procedure failed to +* converge. For further details, see subroutine ZTGSJA. +* +* Internal Parameters +* =================== +* +* TOLA DOUBLE PRECISION +* TOLB DOUBLE PRECISION +* TOLA and TOLB are the thresholds to determine the effective +* rank of (A',B')'. Generally, they are set to +* TOLA = MAX(M,N)*norm(A)*MAZHEPS, +* TOLB = MAX(P,N)*norm(B)*MAZHEPS. +* The size of TOLA and TOLB may affect the size of backward +* errors of the decomposition. +* +* Further Details +* =============== +* +* 2-96 Based on modifications by +* Ming Gu and Huan Ren, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL WANTQ, WANTU, WANTV + INTEGER I, IBND, ISUB, J, NCYCLE + DOUBLE PRECISION ANORM, BNORM, SMAX, TEMP, TOLA, TOLB, ULP, UNFL +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL LSAME, DLAMCH, ZLANGE +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, XERBLA, ZGGSVP, ZTGSJA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + WANTU = LSAME( JOBU, 'U' ) + WANTV = LSAME( JOBV, 'V' ) + WANTQ = LSAME( JOBQ, 'Q' ) +* + INFO = 0 + IF( .NOT.( WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( P.LT.0 ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -12 + ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN + INFO = -16 + ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN + INFO = -18 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -20 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGGSVD', -INFO ) + RETURN + END IF +* +* Compute the Frobenius norm of matrices A and B +* + ANORM = ZLANGE( '1', M, N, A, LDA, RWORK ) + BNORM = ZLANGE( '1', P, N, B, LDB, RWORK ) +* +* Get machine precision and set up threshold for determining +* the effective numerical rank of the matrices A and B. +* + ULP = DLAMCH( 'Precision' ) + UNFL = DLAMCH( 'Safe Minimum' ) + TOLA = MAX( M, N )*MAX( ANORM, UNFL )*ULP + TOLB = MAX( P, N )*MAX( BNORM, UNFL )*ULP +* + CALL ZGGSVP( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, TOLA, + $ TOLB, K, L, U, LDU, V, LDV, Q, LDQ, IWORK, RWORK, + $ WORK, WORK( N+1 ), INFO ) +* +* Compute the GSVD of two upper "triangular" matrices +* + CALL ZTGSJA( JOBU, JOBV, JOBQ, M, P, N, K, L, A, LDA, B, LDB, + $ TOLA, TOLB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, + $ WORK, NCYCLE, INFO ) +* +* Sort the singular values and store the pivot indices in IWORK +* Copy ALPHA to RWORK, then sort ALPHA in RWORK +* + CALL DCOPY( N, ALPHA, 1, RWORK, 1 ) + IBND = MIN( L, M-K ) + DO 20 I = 1, IBND +* +* Scan for largest ALPHA(K+I) +* + ISUB = I + SMAX = RWORK( K+I ) + DO 10 J = I + 1, IBND + TEMP = RWORK( K+J ) + IF( TEMP.GT.SMAX ) THEN + ISUB = J + SMAX = TEMP + END IF + 10 CONTINUE + IF( ISUB.NE.I ) THEN + RWORK( K+ISUB ) = RWORK( K+I ) + RWORK( K+I ) = SMAX + IWORK( K+I ) = K + ISUB + ELSE + IWORK( K+I ) = K + I + END IF + 20 CONTINUE +* + RETURN +* +* End of ZGGSVD +* + END diff --git a/costa/native/external/lapack/zggsvp.f b/costa/native/external/lapack/zggsvp.f new file mode 100644 index 000000000..843d1e9f9 --- /dev/null +++ b/costa/native/external/lapack/zggsvp.f @@ -0,0 +1,403 @@ + SUBROUTINE ZGGSVP( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, + $ TOLA, TOLB, K, L, U, LDU, V, LDV, Q, LDQ, + $ IWORK, RWORK, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER JOBQ, JOBU, JOBV + INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P + DOUBLE PRECISION TOLA, TOLB +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ TAU( * ), U( LDU, * ), V( LDV, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZGGSVP computes unitary matrices U, V and Q such that +* +* N-K-L K L +* U'*A*Q = K ( 0 A12 A13 ) if M-K-L >= 0; +* L ( 0 0 A23 ) +* M-K-L ( 0 0 0 ) +* +* N-K-L K L +* = K ( 0 A12 A13 ) if M-K-L < 0; +* M-K ( 0 0 A23 ) +* +* N-K-L K L +* V'*B*Q = L ( 0 0 B13 ) +* P-L ( 0 0 0 ) +* +* where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular +* upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, +* otherwise A23 is (M-K)-by-L upper trapezoidal. K+L = the effective +* numerical rank of the (M+P)-by-N matrix (A',B')'. Z' denotes the +* conjugate transpose of Z. +* +* This decomposition is the preprocessing step for computing the +* Generalized Singular Value Decomposition (GSVD), see subroutine +* ZGGSVD. +* +* Arguments +* ========= +* +* JOBU (input) CHARACTER*1 +* = 'U': Unitary matrix U is computed; +* = 'N': U is not computed. +* +* JOBV (input) CHARACTER*1 +* = 'V': Unitary matrix V is computed; +* = 'N': V is not computed. +* +* JOBQ (input) CHARACTER*1 +* = 'Q': Unitary matrix Q is computed; +* = 'N': Q is not computed. +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* P (input) INTEGER +* The number of rows of the matrix B. P >= 0. +* +* N (input) INTEGER +* The number of columns of the matrices A and B. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, A contains the triangular (or trapezoidal) matrix +* described in the Purpose section. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) COMPLEX*16 array, dimension (LDB,N) +* On entry, the P-by-N matrix B. +* On exit, B contains the triangular matrix described in +* the Purpose section. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,P). +* +* TOLA (input) DOUBLE PRECISION +* TOLB (input) DOUBLE PRECISION +* TOLA and TOLB are the thresholds to determine the effective +* numerical rank of matrix B and a subblock of A. Generally, +* they are set to +* TOLA = MAX(M,N)*norm(A)*MAZHEPS, +* TOLB = MAX(P,N)*norm(B)*MAZHEPS. +* The size of TOLA and TOLB may affect the size of backward +* errors of the decomposition. +* +* K (output) INTEGER +* L (output) INTEGER +* On exit, K and L specify the dimension of the subblocks +* described in Purpose section. +* K + L = effective numerical rank of (A',B')'. +* +* U (output) COMPLEX*16 array, dimension (LDU,M) +* If JOBU = 'U', U contains the unitary matrix U. +* If JOBU = 'N', U is not referenced. +* +* LDU (input) INTEGER +* The leading dimension of the array U. LDU >= max(1,M) if +* JOBU = 'U'; LDU >= 1 otherwise. +* +* V (output) COMPLEX*16 array, dimension (LDV,M) +* If JOBV = 'V', V contains the unitary matrix V. +* If JOBV = 'N', V is not referenced. +* +* LDV (input) INTEGER +* The leading dimension of the array V. LDV >= max(1,P) if +* JOBV = 'V'; LDV >= 1 otherwise. +* +* Q (output) COMPLEX*16 array, dimension (LDQ,N) +* If JOBQ = 'Q', Q contains the unitary matrix Q. +* If JOBQ = 'N', Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= max(1,N) if +* JOBQ = 'Q'; LDQ >= 1 otherwise. +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (2*N) +* +* TAU (workspace) COMPLEX*16 array, dimension (N) +* +* WORK (workspace) COMPLEX*16 array, dimension (max(3*N,M,P)) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* The subroutine uses LAPACK subroutine ZGEQPF for the QR factorization +* with column pivoting to detect the effective numerical rank of the +* a matrix. It may be replaced by a better rank determination strategy. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL FORWRD, WANTQ, WANTU, WANTV + INTEGER I, J + COMPLEX*16 T +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEQPF, ZGEQR2, ZGERQ2, ZLACPY, ZLAPMT, + $ ZLASET, ZUNG2R, ZUNM2R, ZUNMR2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX, MIN +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( T ) = ABS( DBLE( T ) ) + ABS( DIMAG( T ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + WANTU = LSAME( JOBU, 'U' ) + WANTV = LSAME( JOBV, 'V' ) + WANTQ = LSAME( JOBQ, 'Q' ) + FORWRD = .TRUE. +* + INFO = 0 + IF( .NOT.( WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( P.LT.0 ) THEN + INFO = -5 + ELSE IF( N.LT.0 ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -10 + ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN + INFO = -16 + ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN + INFO = -18 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -20 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGGSVP', -INFO ) + RETURN + END IF +* +* QR with column pivoting of B: B*P = V*( S11 S12 ) +* ( 0 0 ) +* + DO 10 I = 1, N + IWORK( I ) = 0 + 10 CONTINUE + CALL ZGEQPF( P, N, B, LDB, IWORK, TAU, WORK, RWORK, INFO ) +* +* Update A := A*P +* + CALL ZLAPMT( FORWRD, M, N, A, LDA, IWORK ) +* +* Determine the effective rank of matrix B. +* + L = 0 + DO 20 I = 1, MIN( P, N ) + IF( CABS1( B( I, I ) ).GT.TOLB ) + $ L = L + 1 + 20 CONTINUE +* + IF( WANTV ) THEN +* +* Copy the details of V, and form V. +* + CALL ZLASET( 'Full', P, P, CZERO, CZERO, V, LDV ) + IF( P.GT.1 ) + $ CALL ZLACPY( 'Lower', P-1, N, B( 2, 1 ), LDB, V( 2, 1 ), + $ LDV ) + CALL ZUNG2R( P, P, MIN( P, N ), V, LDV, TAU, WORK, INFO ) + END IF +* +* Clean up B +* + DO 40 J = 1, L - 1 + DO 30 I = J + 1, L + B( I, J ) = CZERO + 30 CONTINUE + 40 CONTINUE + IF( P.GT.L ) + $ CALL ZLASET( 'Full', P-L, N, CZERO, CZERO, B( L+1, 1 ), LDB ) +* + IF( WANTQ ) THEN +* +* Set Q = I and Update Q := Q*P +* + CALL ZLASET( 'Full', N, N, CZERO, CONE, Q, LDQ ) + CALL ZLAPMT( FORWRD, N, N, Q, LDQ, IWORK ) + END IF +* + IF( P.GE.L .AND. N.NE.L ) THEN +* +* RQ factorization of ( S11 S12 ) = ( 0 S12 )*Z +* + CALL ZGERQ2( L, N, B, LDB, TAU, WORK, INFO ) +* +* Update A := A*Z' +* + CALL ZUNMR2( 'Right', 'Conjugate transpose', M, N, L, B, LDB, + $ TAU, A, LDA, WORK, INFO ) + IF( WANTQ ) THEN +* +* Update Q := Q*Z' +* + CALL ZUNMR2( 'Right', 'Conjugate transpose', N, N, L, B, + $ LDB, TAU, Q, LDQ, WORK, INFO ) + END IF +* +* Clean up B +* + CALL ZLASET( 'Full', L, N-L, CZERO, CZERO, B, LDB ) + DO 60 J = N - L + 1, N + DO 50 I = J - N + L + 1, L + B( I, J ) = CZERO + 50 CONTINUE + 60 CONTINUE +* + END IF +* +* Let N-L L +* A = ( A11 A12 ) M, +* +* then the following does the complete QR decomposition of A11: +* +* A11 = U*( 0 T12 )*P1' +* ( 0 0 ) +* + DO 70 I = 1, N - L + IWORK( I ) = 0 + 70 CONTINUE + CALL ZGEQPF( M, N-L, A, LDA, IWORK, TAU, WORK, RWORK, INFO ) +* +* Determine the effective rank of A11 +* + K = 0 + DO 80 I = 1, MIN( M, N-L ) + IF( CABS1( A( I, I ) ).GT.TOLA ) + $ K = K + 1 + 80 CONTINUE +* +* Update A12 := U'*A12, where A12 = A( 1:M, N-L+1:N ) +* + CALL ZUNM2R( 'Left', 'Conjugate transpose', M, L, MIN( M, N-L ), + $ A, LDA, TAU, A( 1, N-L+1 ), LDA, WORK, INFO ) +* + IF( WANTU ) THEN +* +* Copy the details of U, and form U +* + CALL ZLASET( 'Full', M, M, CZERO, CZERO, U, LDU ) + IF( M.GT.1 ) + $ CALL ZLACPY( 'Lower', M-1, N-L, A( 2, 1 ), LDA, U( 2, 1 ), + $ LDU ) + CALL ZUNG2R( M, M, MIN( M, N-L ), U, LDU, TAU, WORK, INFO ) + END IF +* + IF( WANTQ ) THEN +* +* Update Q( 1:N, 1:N-L ) = Q( 1:N, 1:N-L )*P1 +* + CALL ZLAPMT( FORWRD, N, N-L, Q, LDQ, IWORK ) + END IF +* +* Clean up A: set the strictly lower triangular part of +* A(1:K, 1:K) = 0, and A( K+1:M, 1:N-L ) = 0. +* + DO 100 J = 1, K - 1 + DO 90 I = J + 1, K + A( I, J ) = CZERO + 90 CONTINUE + 100 CONTINUE + IF( M.GT.K ) + $ CALL ZLASET( 'Full', M-K, N-L, CZERO, CZERO, A( K+1, 1 ), LDA ) +* + IF( N-L.GT.K ) THEN +* +* RQ factorization of ( T11 T12 ) = ( 0 T12 )*Z1 +* + CALL ZGERQ2( K, N-L, A, LDA, TAU, WORK, INFO ) +* + IF( WANTQ ) THEN +* +* Update Q( 1:N,1:N-L ) = Q( 1:N,1:N-L )*Z1' +* + CALL ZUNMR2( 'Right', 'Conjugate transpose', N, N-L, K, A, + $ LDA, TAU, Q, LDQ, WORK, INFO ) + END IF +* +* Clean up A +* + CALL ZLASET( 'Full', K, N-L-K, CZERO, CZERO, A, LDA ) + DO 120 J = N - L - K + 1, N - L + DO 110 I = J - N + L + K + 1, K + A( I, J ) = CZERO + 110 CONTINUE + 120 CONTINUE +* + END IF +* + IF( M.GT.K ) THEN +* +* QR factorization of A( K+1:M,N-L+1:N ) +* + CALL ZGEQR2( M-K, L, A( K+1, N-L+1 ), LDA, TAU, WORK, INFO ) +* + IF( WANTU ) THEN +* +* Update U(:,K+1:M) := U(:,K+1:M)*U1 +* + CALL ZUNM2R( 'Right', 'No transpose', M, M-K, MIN( M-K, L ), + $ A( K+1, N-L+1 ), LDA, TAU, U( 1, K+1 ), LDU, + $ WORK, INFO ) + END IF +* +* Clean up +* + DO 140 J = N - L + 1, N + DO 130 I = J - N + K + L + 1, M + A( I, J ) = CZERO + 130 CONTINUE + 140 CONTINUE +* + END IF +* + RETURN +* +* End of ZGGSVP +* + END diff --git a/costa/native/external/lapack/zgtcon.f b/costa/native/external/lapack/zgtcon.f new file mode 100644 index 000000000..a66f5b0d9 --- /dev/null +++ b/costa/native/external/lapack/zgtcon.f @@ -0,0 +1,167 @@ + SUBROUTINE ZGTCON( NORM, N, DL, D, DU, DU2, IPIV, ANORM, RCOND, + $ WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER INFO, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 D( * ), DL( * ), DU( * ), DU2( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZGTCON estimates the reciprocal of the condition number of a complex +* tridiagonal matrix A using the LU factorization as computed by +* ZGTTRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies whether the 1-norm condition number or the +* infinity-norm condition number is required: +* = '1' or 'O': 1-norm; +* = 'I': Infinity-norm. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* DL (input) COMPLEX*16 array, dimension (N-1) +* The (n-1) multipliers that define the matrix L from the +* LU factorization of A as computed by ZGTTRF. +* +* D (input) COMPLEX*16 array, dimension (N) +* The n diagonal elements of the upper triangular matrix U from +* the LU factorization of A. +* +* DU (input) COMPLEX*16 array, dimension (N-1) +* The (n-1) elements of the first superdiagonal of U. +* +* DU2 (input) COMPLEX*16 array, dimension (N-2) +* The (n-2) elements of the second superdiagonal of U. +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices; for 1 <= i <= n, row i of the matrix was +* interchanged with row IPIV(i). IPIV(i) will always be either +* i or i+1; IPIV(i) = i indicates a row interchange was not +* required. +* +* ANORM (input) DOUBLE PRECISION +* If NORM = '1' or 'O', the 1-norm of the original matrix A. +* If NORM = 'I', the infinity-norm of the original matrix A. +* +* RCOND (output) DOUBLE PRECISION +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +* estimate of the 1-norm of inv(A) computed in this routine. +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ONENRM + INTEGER I, KASE, KASE1 + DOUBLE PRECISION AINVNM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGTTRS, ZLACON +* .. +* .. Intrinsic Functions .. + INTRINSIC DCMPLX +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGTCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* +* Check that D(1:N) is non-zero. +* + DO 10 I = 1, N + IF( D( I ).EQ.DCMPLX( ZERO ) ) + $ RETURN + 10 CONTINUE +* + AINVNM = ZERO + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 20 CONTINUE + CALL ZLACON( N, WORK( N+1 ), WORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(U)*inv(L). +* + CALL ZGTTRS( 'No transpose', N, 1, DL, D, DU, DU2, IPIV, + $ WORK, N, INFO ) + ELSE +* +* Multiply by inv(L')*inv(U'). +* + CALL ZGTTRS( 'Conjugate transpose', N, 1, DL, D, DU, DU2, + $ IPIV, WORK, N, INFO ) + END IF + GO TO 20 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of ZGTCON +* + END diff --git a/costa/native/external/lapack/zgtrfs.f b/costa/native/external/lapack/zgtrfs.f new file mode 100644 index 000000000..abf97ae47 --- /dev/null +++ b/costa/native/external/lapack/zgtrfs.f @@ -0,0 +1,369 @@ + SUBROUTINE ZGTRFS( TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, DU2, + $ IPIV, B, LDB, X, LDX, FERR, BERR, WORK, RWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) + COMPLEX*16 B( LDB, * ), D( * ), DF( * ), DL( * ), + $ DLF( * ), DU( * ), DU2( * ), DUF( * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* ZGTRFS improves the computed solution to a system of linear +* equations when the coefficient matrix is tridiagonal, and provides +* error bounds and backward error estimates for the solution. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose) +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* DL (input) COMPLEX*16 array, dimension (N-1) +* The (n-1) subdiagonal elements of A. +* +* D (input) COMPLEX*16 array, dimension (N) +* The diagonal elements of A. +* +* DU (input) COMPLEX*16 array, dimension (N-1) +* The (n-1) superdiagonal elements of A. +* +* DLF (input) COMPLEX*16 array, dimension (N-1) +* The (n-1) multipliers that define the matrix L from the +* LU factorization of A as computed by ZGTTRF. +* +* DF (input) COMPLEX*16 array, dimension (N) +* The n diagonal elements of the upper triangular matrix U from +* the LU factorization of A. +* +* DUF (input) COMPLEX*16 array, dimension (N-1) +* The (n-1) elements of the first superdiagonal of U. +* +* DU2 (input) COMPLEX*16 array, dimension (N-2) +* The (n-2) elements of the second superdiagonal of U. +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices; for 1 <= i <= n, row i of the matrix was +* interchanged with row IPIV(i). IPIV(i) will always be either +* i or i+1; IPIV(i) = i indicates a row interchange was not +* required. +* +* B (input) COMPLEX*16 array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input/output) COMPLEX*16 array, dimension (LDX,NRHS) +* On entry, the solution matrix X, as computed by ZGTTRS. +* On exit, the improved solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Internal Parameters +* =================== +* +* ITMAX is the maximum number of steps of iterative refinement. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) + DOUBLE PRECISION THREE + PARAMETER ( THREE = 3.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN + CHARACTER TRANSN, TRANST + INTEGER COUNT, I, J, KASE, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN + COMPLEX*16 ZDUM +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZCOPY, ZGTTRS, ZLACON, ZLAGTM +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DIMAG, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -15 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGTRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANSN = 'N' + TRANST = 'C' + ELSE + TRANSN = 'C' + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = 4 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 110 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - op(A) * X, +* where op(A) = A, A**T, or A**H, depending on TRANS. +* + CALL ZCOPY( N, B( 1, J ), 1, WORK, 1 ) + CALL ZLAGTM( TRANS, N, 1, -ONE, DL, D, DU, X( 1, J ), LDX, ONE, + $ WORK, N ) +* +* Compute abs(op(A))*abs(x) + abs(b) for use in the backward +* error bound. +* + IF( NOTRAN ) THEN + IF( N.EQ.1 ) THEN + RWORK( 1 ) = CABS1( B( 1, J ) ) + + $ CABS1( D( 1 ) )*CABS1( X( 1, J ) ) + ELSE + RWORK( 1 ) = CABS1( B( 1, J ) ) + + $ CABS1( D( 1 ) )*CABS1( X( 1, J ) ) + + $ CABS1( DU( 1 ) )*CABS1( X( 2, J ) ) + DO 30 I = 2, N - 1 + RWORK( I ) = CABS1( B( I, J ) ) + + $ CABS1( DL( I-1 ) )*CABS1( X( I-1, J ) ) + + $ CABS1( D( I ) )*CABS1( X( I, J ) ) + + $ CABS1( DU( I ) )*CABS1( X( I+1, J ) ) + 30 CONTINUE + RWORK( N ) = CABS1( B( N, J ) ) + + $ CABS1( DL( N-1 ) )*CABS1( X( N-1, J ) ) + + $ CABS1( D( N ) )*CABS1( X( N, J ) ) + END IF + ELSE + IF( N.EQ.1 ) THEN + RWORK( 1 ) = CABS1( B( 1, J ) ) + + $ CABS1( D( 1 ) )*CABS1( X( 1, J ) ) + ELSE + RWORK( 1 ) = CABS1( B( 1, J ) ) + + $ CABS1( D( 1 ) )*CABS1( X( 1, J ) ) + + $ CABS1( DL( 1 ) )*CABS1( X( 2, J ) ) + DO 40 I = 2, N - 1 + RWORK( I ) = CABS1( B( I, J ) ) + + $ CABS1( DU( I-1 ) )*CABS1( X( I-1, J ) ) + + $ CABS1( D( I ) )*CABS1( X( I, J ) ) + + $ CABS1( DL( I ) )*CABS1( X( I+1, J ) ) + 40 CONTINUE + RWORK( N ) = CABS1( B( N, J ) ) + + $ CABS1( DU( N-1 ) )*CABS1( X( N-1, J ) ) + + $ CABS1( D( N ) )*CABS1( X( N, J ) ) + END IF + END IF +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + S = ZERO + DO 50 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 50 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL ZGTTRS( TRANS, N, 1, DLF, DF, DUF, DU2, IPIV, WORK, N, + $ INFO ) + CALL ZAXPY( N, DCMPLX( ONE ), WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use ZLACON to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 60 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 60 CONTINUE +* + KASE = 0 + 70 CONTINUE + CALL ZLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**H). +* + CALL ZGTTRS( TRANST, N, 1, DLF, DF, DUF, DU2, IPIV, WORK, + $ N, INFO ) + DO 80 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 80 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 90 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 90 CONTINUE + CALL ZGTTRS( TRANSN, N, 1, DLF, DF, DUF, DU2, IPIV, WORK, + $ N, INFO ) + END IF + GO TO 70 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 100 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 100 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 110 CONTINUE +* + RETURN +* +* End of ZGTRFS +* + END diff --git a/costa/native/external/lapack/zgtsv.f b/costa/native/external/lapack/zgtsv.f new file mode 100644 index 000000000..f6223f32f --- /dev/null +++ b/costa/native/external/lapack/zgtsv.f @@ -0,0 +1,174 @@ + SUBROUTINE ZGTSV( N, NRHS, DL, D, DU, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX*16 B( LDB, * ), D( * ), DL( * ), DU( * ) +* .. +* +* Purpose +* ======= +* +* ZGTSV solves the equation +* +* A*X = B, +* +* where A is an N-by-N tridiagonal matrix, by Gaussian elimination with +* partial pivoting. +* +* Note that the equation A'*X = B may be solved by interchanging the +* order of the arguments DU and DL. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* DL (input/output) COMPLEX*16 array, dimension (N-1) +* On entry, DL must contain the (n-1) subdiagonal elements of +* A. +* On exit, DL is overwritten by the (n-2) elements of the +* second superdiagonal of the upper triangular matrix U from +* the LU factorization of A, in DL(1), ..., DL(n-2). +* +* D (input/output) COMPLEX*16 array, dimension (N) +* On entry, D must contain the diagonal elements of A. +* On exit, D is overwritten by the n diagonal elements of U. +* +* DU (input/output) COMPLEX*16 array, dimension (N-1) +* On entry, DU must contain the (n-1) superdiagonal elements +* of A. +* On exit, DU is overwritten by the (n-1) elements of the first +* superdiagonal of U. +* +* B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, U(i,i) is exactly zero, and the solution +* has not been computed. The factorization has not been +* completed unless i = N. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER J, K + COMPLEX*16 MULT, TEMP, ZDUM +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGTSV ', -INFO ) + RETURN + END IF +* + IF( N.EQ.0 ) + $ RETURN +* + DO 30 K = 1, N - 1 + IF( DL( K ).EQ.ZERO ) THEN +* +* Subdiagonal is zero, no elimination is required. +* + IF( D( K ).EQ.ZERO ) THEN +* +* Diagonal is zero: set INFO = K and return; a unique +* solution can not be found. +* + INFO = K + RETURN + END IF + ELSE IF( CABS1( D( K ) ).GE.CABS1( DL( K ) ) ) THEN +* +* No row interchange required +* + MULT = DL( K ) / D( K ) + D( K+1 ) = D( K+1 ) - MULT*DU( K ) + DO 10 J = 1, NRHS + B( K+1, J ) = B( K+1, J ) - MULT*B( K, J ) + 10 CONTINUE + IF( K.LT.( N-1 ) ) + $ DL( K ) = ZERO + ELSE +* +* Interchange rows K and K+1 +* + MULT = D( K ) / DL( K ) + D( K ) = DL( K ) + TEMP = D( K+1 ) + D( K+1 ) = DU( K ) - MULT*TEMP + IF( K.LT.( N-1 ) ) THEN + DL( K ) = DU( K+1 ) + DU( K+1 ) = -MULT*DL( K ) + END IF + DU( K ) = TEMP + DO 20 J = 1, NRHS + TEMP = B( K, J ) + B( K, J ) = B( K+1, J ) + B( K+1, J ) = TEMP - MULT*B( K+1, J ) + 20 CONTINUE + END IF + 30 CONTINUE + IF( D( N ).EQ.ZERO ) THEN + INFO = N + RETURN + END IF +* +* Back solve with the matrix U from the factorization. +* + DO 50 J = 1, NRHS + B( N, J ) = B( N, J ) / D( N ) + IF( N.GT.1 ) + $ B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / D( N-1 ) + DO 40 K = N - 2, 1, -1 + B( K, J ) = ( B( K, J )-DU( K )*B( K+1, J )-DL( K )* + $ B( K+2, J ) ) / D( K ) + 40 CONTINUE + 50 CONTINUE +* + RETURN +* +* End of ZGTSV +* + END diff --git a/costa/native/external/lapack/zgtsvx.f b/costa/native/external/lapack/zgtsvx.f new file mode 100644 index 000000000..94a483b82 --- /dev/null +++ b/costa/native/external/lapack/zgtsvx.f @@ -0,0 +1,294 @@ + SUBROUTINE ZGTSVX( FACT, TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, + $ DU2, IPIV, B, LDB, X, LDX, RCOND, FERR, BERR, + $ WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER FACT, TRANS + INTEGER INFO, LDB, LDX, N, NRHS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) + COMPLEX*16 B( LDB, * ), D( * ), DF( * ), DL( * ), + $ DLF( * ), DU( * ), DU2( * ), DUF( * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* ZGTSVX uses the LU factorization to compute the solution to a complex +* system of linear equations A * X = B, A**T * X = B, or A**H * X = B, +* where A is a tridiagonal matrix of order N and X and B are N-by-NRHS +* matrices. +* +* Error bounds on the solution and a condition estimate are also +* provided. +* +* Description +* =========== +* +* The following steps are performed: +* +* 1. If FACT = 'N', the LU decomposition is used to factor the matrix A +* as A = L * U, where L is a product of permutation and unit lower +* bidiagonal matrices and U is upper triangular with nonzeros in +* only the main diagonal and first two superdiagonals. +* +* 2. If some U(i,i)=0, so that U is exactly singular, then the routine +* returns with INFO = i. Otherwise, the factored form of A is used +* to estimate the condition number of the matrix A. If the +* reciprocal of the condition number is less than machine precision, +* INFO = N+1 is returned as a warning, but the routine still goes on +* to solve for X and compute error bounds as described below. +* +* 3. The system of equations is solved for X using the factored form +* of A. +* +* 4. Iterative refinement is applied to improve the computed solution +* matrix and calculate error bounds and backward error estimates +* for it. +* +* Arguments +* ========= +* +* FACT (input) CHARACTER*1 +* Specifies whether or not the factored form of A has been +* supplied on entry. +* = 'F': DLF, DF, DUF, DU2, and IPIV contain the factored form +* of A; DL, D, DU, DLF, DF, DUF, DU2 and IPIV will not +* be modified. +* = 'N': The matrix will be copied to DLF, DF, and DUF +* and factored. +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose) +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* DL (input) COMPLEX*16 array, dimension (N-1) +* The (n-1) subdiagonal elements of A. +* +* D (input) COMPLEX*16 array, dimension (N) +* The n diagonal elements of A. +* +* DU (input) COMPLEX*16 array, dimension (N-1) +* The (n-1) superdiagonal elements of A. +* +* DLF (input or output) COMPLEX*16 array, dimension (N-1) +* If FACT = 'F', then DLF is an input argument and on entry +* contains the (n-1) multipliers that define the matrix L from +* the LU factorization of A as computed by ZGTTRF. +* +* If FACT = 'N', then DLF is an output argument and on exit +* contains the (n-1) multipliers that define the matrix L from +* the LU factorization of A. +* +* DF (input or output) COMPLEX*16 array, dimension (N) +* If FACT = 'F', then DF is an input argument and on entry +* contains the n diagonal elements of the upper triangular +* matrix U from the LU factorization of A. +* +* If FACT = 'N', then DF is an output argument and on exit +* contains the n diagonal elements of the upper triangular +* matrix U from the LU factorization of A. +* +* DUF (input or output) COMPLEX*16 array, dimension (N-1) +* If FACT = 'F', then DUF is an input argument and on entry +* contains the (n-1) elements of the first superdiagonal of U. +* +* If FACT = 'N', then DUF is an output argument and on exit +* contains the (n-1) elements of the first superdiagonal of U. +* +* DU2 (input or output) COMPLEX*16 array, dimension (N-2) +* If FACT = 'F', then DU2 is an input argument and on entry +* contains the (n-2) elements of the second superdiagonal of +* U. +* +* If FACT = 'N', then DU2 is an output argument and on exit +* contains the (n-2) elements of the second superdiagonal of +* U. +* +* IPIV (input or output) INTEGER array, dimension (N) +* If FACT = 'F', then IPIV is an input argument and on entry +* contains the pivot indices from the LU factorization of A as +* computed by ZGTTRF. +* +* If FACT = 'N', then IPIV is an output argument and on exit +* contains the pivot indices from the LU factorization of A; +* row i of the matrix was interchanged with row IPIV(i). +* IPIV(i) will always be either i or i+1; IPIV(i) = i indicates +* a row interchange was not required. +* +* B (input) COMPLEX*16 array, dimension (LDB,NRHS) +* The N-by-NRHS right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (output) COMPLEX*16 array, dimension (LDX,NRHS) +* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* RCOND (output) DOUBLE PRECISION +* The estimate of the reciprocal condition number of the matrix +* A. If RCOND is less than the machine precision (in +* particular, if RCOND = 0), the matrix is singular to working +* precision. This condition is indicated by a return code of +* INFO > 0. +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is +* <= N: U(i,i) is exactly zero. The factorization +* has not been completed unless i = N, but the +* factor U is exactly singular, so the solution +* and error bounds could not be computed. +* RCOND = 0 is returned. +* = N+1: U is nonsingular, but RCOND is less than machine +* precision, meaning that the matrix is singular +* to working precision. Nevertheless, the +* solution and error bounds are computed because +* there are a number of situations where the +* computed solution can be more accurate than the +* value of RCOND would suggest. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOFACT, NOTRAN + CHARACTER NORM + DOUBLE PRECISION ANORM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANGT + EXTERNAL LSAME, DLAMCH, ZLANGT +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZCOPY, ZGTCON, ZGTRFS, ZGTTRF, ZGTTRS, + $ ZLACPY +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -16 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGTSVX', -INFO ) + RETURN + END IF +* + IF( NOFACT ) THEN +* +* Compute the LU factorization of A. +* + CALL ZCOPY( N, D, 1, DF, 1 ) + IF( N.GT.1 ) THEN + CALL ZCOPY( N-1, DL, 1, DLF, 1 ) + CALL ZCOPY( N-1, DU, 1, DUF, 1 ) + END IF + CALL ZGTTRF( N, DLF, DF, DUF, DU2, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN + IF( INFO.GT.0 ) + $ RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + IF( NOTRAN ) THEN + NORM = '1' + ELSE + NORM = 'I' + END IF + ANORM = ZLANGT( NORM, N, DL, D, DU ) +* +* Compute the reciprocal of the condition number of A. +* + CALL ZGTCON( NORM, N, DLF, DF, DUF, DU2, IPIV, ANORM, RCOND, WORK, + $ INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.DLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* +* Compute the solution vectors X. +* + CALL ZLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL ZGTTRS( TRANS, N, NRHS, DLF, DF, DUF, DU2, IPIV, X, LDX, + $ INFO ) +* +* Use iterative refinement to improve the computed solutions and +* compute error bounds and backward error estimates for them. +* + CALL ZGTRFS( TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, DU2, IPIV, + $ B, LDB, X, LDX, FERR, BERR, WORK, RWORK, INFO ) +* + RETURN +* +* End of ZGTSVX +* + END diff --git a/costa/native/external/lapack/zgttrf.f b/costa/native/external/lapack/zgttrf.f new file mode 100644 index 000000000..0bc59b6e5 --- /dev/null +++ b/costa/native/external/lapack/zgttrf.f @@ -0,0 +1,175 @@ + SUBROUTINE ZGTTRF( N, DL, D, DU, DU2, IPIV, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* Purpose +* ======= +* +* ZGTTRF computes an LU factorization of a complex tridiagonal matrix A +* using elimination with partial pivoting and row interchanges. +* +* The factorization has the form +* A = L * U +* where L is a product of permutation and unit lower bidiagonal +* matrices and U is upper triangular with nonzeros in only the main +* diagonal and first two superdiagonals. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. +* +* DL (input/output) COMPLEX*16 array, dimension (N-1) +* On entry, DL must contain the (n-1) sub-diagonal elements of +* A. +* +* On exit, DL is overwritten by the (n-1) multipliers that +* define the matrix L from the LU factorization of A. +* +* D (input/output) COMPLEX*16 array, dimension (N) +* On entry, D must contain the diagonal elements of A. +* +* On exit, D is overwritten by the n diagonal elements of the +* upper triangular matrix U from the LU factorization of A. +* +* DU (input/output) COMPLEX*16 array, dimension (N-1) +* On entry, DU must contain the (n-1) super-diagonal elements +* of A. +* +* On exit, DU is overwritten by the (n-1) elements of the first +* super-diagonal of U. +* +* DU2 (output) COMPLEX*16 array, dimension (N-2) +* On exit, DU2 is overwritten by the (n-2) elements of the +* second super-diagonal of U. +* +* IPIV (output) INTEGER array, dimension (N) +* The pivot indices; for 1 <= i <= n, row i of the matrix was +* interchanged with row IPIV(i). IPIV(i) will always be either +* i or i+1; IPIV(i) = i indicates a row interchange was not +* required. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* > 0: if INFO = k, U(k,k) is exactly zero. The factorization +* has been completed, but the factor U is exactly +* singular, and division by zero will occur if it is used +* to solve a system of equations. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I + COMPLEX*16 FACT, TEMP, ZDUM +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + CALL XERBLA( 'ZGTTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Initialize IPIV(i) = i and DU2(i) = 0 +* + DO 10 I = 1, N + IPIV( I ) = I + 10 CONTINUE + DO 20 I = 1, N - 2 + DU2( I ) = ZERO + 20 CONTINUE +* + DO 30 I = 1, N - 2 + IF( CABS1( D( I ) ).GE.CABS1( DL( I ) ) ) THEN +* +* No row interchange required, eliminate DL(I) +* + IF( CABS1( D( I ) ).NE.ZERO ) THEN + FACT = DL( I ) / D( I ) + DL( I ) = FACT + D( I+1 ) = D( I+1 ) - FACT*DU( I ) + END IF + ELSE +* +* Interchange rows I and I+1, eliminate DL(I) +* + FACT = D( I ) / DL( I ) + D( I ) = DL( I ) + DL( I ) = FACT + TEMP = DU( I ) + DU( I ) = D( I+1 ) + D( I+1 ) = TEMP - FACT*D( I+1 ) + DU2( I ) = DU( I+1 ) + DU( I+1 ) = -FACT*DU( I+1 ) + IPIV( I ) = I + 1 + END IF + 30 CONTINUE + IF( N.GT.1 ) THEN + I = N - 1 + IF( CABS1( D( I ) ).GE.CABS1( DL( I ) ) ) THEN + IF( CABS1( D( I ) ).NE.ZERO ) THEN + FACT = DL( I ) / D( I ) + DL( I ) = FACT + D( I+1 ) = D( I+1 ) - FACT*DU( I ) + END IF + ELSE + FACT = D( I ) / DL( I ) + D( I ) = DL( I ) + DL( I ) = FACT + TEMP = DU( I ) + DU( I ) = D( I+1 ) + D( I+1 ) = TEMP - FACT*D( I+1 ) + IPIV( I ) = I + 1 + END IF + END IF +* +* Check for a zero on the diagonal of U. +* + DO 40 I = 1, N + IF( CABS1( D( I ) ).EQ.ZERO ) THEN + INFO = I + GO TO 50 + END IF + 40 CONTINUE + 50 CONTINUE +* + RETURN +* +* End of ZGTTRF +* + END diff --git a/costa/native/external/lapack/zgttrs.f b/costa/native/external/lapack/zgttrs.f new file mode 100644 index 000000000..e62367c9b --- /dev/null +++ b/costa/native/external/lapack/zgttrs.f @@ -0,0 +1,143 @@ + SUBROUTINE ZGTTRS( TRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* Purpose +* ======= +* +* ZGTTRS solves one of the systems of equations +* A * X = B, A**T * X = B, or A**H * X = B, +* with a tridiagonal matrix A using the LU factorization computed +* by ZGTTRF. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER +* Specifies the form of the system of equations. +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose) +* +* N (input) INTEGER +* The order of the matrix A. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* DL (input) COMPLEX*16 array, dimension (N-1) +* The (n-1) multipliers that define the matrix L from the +* LU factorization of A. +* +* D (input) COMPLEX*16 array, dimension (N) +* The n diagonal elements of the upper triangular matrix U from +* the LU factorization of A. +* +* DU (input) COMPLEX*16 array, dimension (N-1) +* The (n-1) elements of the first super-diagonal of U. +* +* DU2 (input) COMPLEX*16 array, dimension (N-2) +* The (n-2) elements of the second super-diagonal of U. +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices; for 1 <= i <= n, row i of the matrix was +* interchanged with row IPIV(i). IPIV(i) will always be either +* i or i+1; IPIV(i) = i indicates a row interchange was not +* required. +* +* B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) +* On entry, the matrix of right hand side vectors B. +* On exit, B is overwritten by the solution vectors X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL NOTRAN + INTEGER ITRANS, J, JB, NB +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGTTS2 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOTRAN = ( TRANS.EQ.'N' .OR. TRANS.EQ.'n' ) + IF( .NOT.NOTRAN .AND. .NOT.( TRANS.EQ.'T' .OR. TRANS.EQ. + $ 't' ) .AND. .NOT.( TRANS.EQ.'C' .OR. TRANS.EQ.'c' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( N, 1 ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGTTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* Decode TRANS +* + IF( NOTRAN ) THEN + ITRANS = 0 + ELSE IF( TRANS.EQ.'T' .OR. TRANS.EQ.'t' ) THEN + ITRANS = 1 + ELSE + ITRANS = 2 + END IF +* +* Determine the number of right-hand sides to solve at a time. +* + IF( NRHS.EQ.1 ) THEN + NB = 1 + ELSE + NB = MAX( 1, ILAENV( 1, 'ZGTTRS', TRANS, N, NRHS, -1, -1 ) ) + END IF +* + IF( NB.GE.NRHS ) THEN + CALL ZGTTS2( ITRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB ) + ELSE + DO 10 J = 1, NRHS, NB + JB = MIN( NRHS-J+1, NB ) + CALL ZGTTS2( ITRANS, N, JB, DL, D, DU, DU2, IPIV, B( 1, J ), + $ LDB ) + 10 CONTINUE + END IF +* +* End of ZGTTRS +* + END diff --git a/costa/native/external/lapack/zgtts2.f b/costa/native/external/lapack/zgtts2.f new file mode 100644 index 000000000..adf7077b7 --- /dev/null +++ b/costa/native/external/lapack/zgtts2.f @@ -0,0 +1,272 @@ + SUBROUTINE ZGTTS2( ITRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER ITRANS, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* Purpose +* ======= +* +* ZGTTS2 solves one of the systems of equations +* A * X = B, A**T * X = B, or A**H * X = B, +* with a tridiagonal matrix A using the LU factorization computed +* by ZGTTRF. +* +* Arguments +* ========= +* +* ITRANS (input) INTEGER +* Specifies the form of the system of equations. +* = 0: A * X = B (No transpose) +* = 1: A**T * X = B (Transpose) +* = 2: A**H * X = B (Conjugate transpose) +* +* N (input) INTEGER +* The order of the matrix A. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* DL (input) COMPLEX*16 array, dimension (N-1) +* The (n-1) multipliers that define the matrix L from the +* LU factorization of A. +* +* D (input) COMPLEX*16 array, dimension (N) +* The n diagonal elements of the upper triangular matrix U from +* the LU factorization of A. +* +* DU (input) COMPLEX*16 array, dimension (N-1) +* The (n-1) elements of the first super-diagonal of U. +* +* DU2 (input) COMPLEX*16 array, dimension (N-2) +* The (n-2) elements of the second super-diagonal of U. +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices; for 1 <= i <= n, row i of the matrix was +* interchanged with row IPIV(i). IPIV(i) will always be either +* i or i+1; IPIV(i) = i indicates a row interchange was not +* required. +* +* B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) +* On entry, the matrix of right hand side vectors B. +* On exit, B is overwritten by the solution vectors X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J + COMPLEX*16 TEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( ITRANS.EQ.0 ) THEN +* +* Solve A*X = B using the LU factorization of A, +* overwriting each right hand side vector with its solution. +* + IF( NRHS.LE.1 ) THEN + J = 1 + 10 CONTINUE +* +* Solve L*x = b. +* + DO 20 I = 1, N - 1 + IF( IPIV( I ).EQ.I ) THEN + B( I+1, J ) = B( I+1, J ) - DL( I )*B( I, J ) + ELSE + TEMP = B( I, J ) + B( I, J ) = B( I+1, J ) + B( I+1, J ) = TEMP - DL( I )*B( I, J ) + END IF + 20 CONTINUE +* +* Solve U*x = b. +* + B( N, J ) = B( N, J ) / D( N ) + IF( N.GT.1 ) + $ B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / + $ D( N-1 ) + DO 30 I = N - 2, 1, -1 + B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DU2( I )* + $ B( I+2, J ) ) / D( I ) + 30 CONTINUE + IF( J.LT.NRHS ) THEN + J = J + 1 + GO TO 10 + END IF + ELSE + DO 60 J = 1, NRHS +* +* Solve L*x = b. +* + DO 40 I = 1, N - 1 + IF( IPIV( I ).EQ.I ) THEN + B( I+1, J ) = B( I+1, J ) - DL( I )*B( I, J ) + ELSE + TEMP = B( I, J ) + B( I, J ) = B( I+1, J ) + B( I+1, J ) = TEMP - DL( I )*B( I, J ) + END IF + 40 CONTINUE +* +* Solve U*x = b. +* + B( N, J ) = B( N, J ) / D( N ) + IF( N.GT.1 ) + $ B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / + $ D( N-1 ) + DO 50 I = N - 2, 1, -1 + B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DU2( I )* + $ B( I+2, J ) ) / D( I ) + 50 CONTINUE + 60 CONTINUE + END IF + ELSE IF( ITRANS.EQ.1 ) THEN +* +* Solve A**T * X = B. +* + IF( NRHS.LE.1 ) THEN + J = 1 + 70 CONTINUE +* +* Solve U**T * x = b. +* + B( 1, J ) = B( 1, J ) / D( 1 ) + IF( N.GT.1 ) + $ B( 2, J ) = ( B( 2, J )-DU( 1 )*B( 1, J ) ) / D( 2 ) + DO 80 I = 3, N + B( I, J ) = ( B( I, J )-DU( I-1 )*B( I-1, J )-DU2( I-2 )* + $ B( I-2, J ) ) / D( I ) + 80 CONTINUE +* +* Solve L**T * x = b. +* + DO 90 I = N - 1, 1, -1 + IF( IPIV( I ).EQ.I ) THEN + B( I, J ) = B( I, J ) - DL( I )*B( I+1, J ) + ELSE + TEMP = B( I+1, J ) + B( I+1, J ) = B( I, J ) - DL( I )*TEMP + B( I, J ) = TEMP + END IF + 90 CONTINUE + IF( J.LT.NRHS ) THEN + J = J + 1 + GO TO 70 + END IF + ELSE + DO 120 J = 1, NRHS +* +* Solve U**T * x = b. +* + B( 1, J ) = B( 1, J ) / D( 1 ) + IF( N.GT.1 ) + $ B( 2, J ) = ( B( 2, J )-DU( 1 )*B( 1, J ) ) / D( 2 ) + DO 100 I = 3, N + B( I, J ) = ( B( I, J )-DU( I-1 )*B( I-1, J )- + $ DU2( I-2 )*B( I-2, J ) ) / D( I ) + 100 CONTINUE +* +* Solve L**T * x = b. +* + DO 110 I = N - 1, 1, -1 + IF( IPIV( I ).EQ.I ) THEN + B( I, J ) = B( I, J ) - DL( I )*B( I+1, J ) + ELSE + TEMP = B( I+1, J ) + B( I+1, J ) = B( I, J ) - DL( I )*TEMP + B( I, J ) = TEMP + END IF + 110 CONTINUE + 120 CONTINUE + END IF + ELSE +* +* Solve A**H * X = B. +* + IF( NRHS.LE.1 ) THEN + J = 1 + 130 CONTINUE +* +* Solve U**H * x = b. +* + B( 1, J ) = B( 1, J ) / DCONJG( D( 1 ) ) + IF( N.GT.1 ) + $ B( 2, J ) = ( B( 2, J )-DCONJG( DU( 1 ) )*B( 1, J ) ) / + $ DCONJG( D( 2 ) ) + DO 140 I = 3, N + B( I, J ) = ( B( I, J )-DCONJG( DU( I-1 ) )*B( I-1, J )- + $ DCONJG( DU2( I-2 ) )*B( I-2, J ) ) / + $ DCONJG( D( I ) ) + 140 CONTINUE +* +* Solve L**H * x = b. +* + DO 150 I = N - 1, 1, -1 + IF( IPIV( I ).EQ.I ) THEN + B( I, J ) = B( I, J ) - DCONJG( DL( I ) )*B( I+1, J ) + ELSE + TEMP = B( I+1, J ) + B( I+1, J ) = B( I, J ) - DCONJG( DL( I ) )*TEMP + B( I, J ) = TEMP + END IF + 150 CONTINUE + IF( J.LT.NRHS ) THEN + J = J + 1 + GO TO 130 + END IF + ELSE + DO 180 J = 1, NRHS +* +* Solve U**H * x = b. +* + B( 1, J ) = B( 1, J ) / DCONJG( D( 1 ) ) + IF( N.GT.1 ) + $ B( 2, J ) = ( B( 2, J )-DCONJG( DU( 1 ) )*B( 1, J ) ) + $ / DCONJG( D( 2 ) ) + DO 160 I = 3, N + B( I, J ) = ( B( I, J )-DCONJG( DU( I-1 ) )* + $ B( I-1, J )-DCONJG( DU2( I-2 ) )* + $ B( I-2, J ) ) / DCONJG( D( I ) ) + 160 CONTINUE +* +* Solve L**H * x = b. +* + DO 170 I = N - 1, 1, -1 + IF( IPIV( I ).EQ.I ) THEN + B( I, J ) = B( I, J ) - DCONJG( DL( I ) )* + $ B( I+1, J ) + ELSE + TEMP = B( I+1, J ) + B( I+1, J ) = B( I, J ) - DCONJG( DL( I ) )*TEMP + B( I, J ) = TEMP + END IF + 170 CONTINUE + 180 CONTINUE + END IF + END IF +* +* End of ZGTTS2 +* + END diff --git a/costa/native/external/lapack/zhbev.f b/costa/native/external/lapack/zhbev.f new file mode 100644 index 000000000..70672c9b3 --- /dev/null +++ b/costa/native/external/lapack/zhbev.f @@ -0,0 +1,209 @@ + SUBROUTINE ZHBEV( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, + $ RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KD, LDAB, LDZ, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 AB( LDAB, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* ZHBEV computes all the eigenvalues and, optionally, eigenvectors of +* a complex Hermitian band matrix A. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* AB (input/output) COMPLEX*16 array, dimension (LDAB, N) +* On entry, the upper or lower triangle of the Hermitian band +* matrix A, stored in the first KD+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* On exit, AB is overwritten by values generated during the +* reduction to tridiagonal form. If UPLO = 'U', the first +* superdiagonal and the diagonal of the tridiagonal matrix T +* are returned in rows KD and KD+1 of AB, and if UPLO = 'L', +* the diagonal and first subdiagonal of T are returned in the +* first two rows of AB. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD + 1. +* +* W (output) DOUBLE PRECISION array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* Z (output) COMPLEX*16 array, dimension (LDZ, N) +* If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +* eigenvectors of the matrix A, with the i-th column of Z +* holding the eigenvector associated with W(i). +* If JOBZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace) COMPLEX*16 array, dimension (N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (max(1,3*N-2)) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, the algorithm failed to converge; i +* off-diagonal elements of an intermediate tridiagonal +* form did not converge to zero. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, WANTZ + INTEGER IINFO, IMAX, INDE, INDRWK, ISCALE + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANHB + EXTERNAL LSAME, DLAMCH, ZLANHB +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSTERF, XERBLA, ZHBTRD, ZLASCL, ZSTEQR +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHBEV ', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( LOWER ) THEN + W( 1 ) = AB( 1, 1 ) + ELSE + W( 1 ) = AB( KD+1, 1 ) + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = ZLANHB( 'M', UPLO, N, KD, AB, LDAB, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL ZLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL ZLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + END IF +* +* Call ZHBTRD to reduce Hermitian band matrix to tridiagonal form. +* + INDE = 1 + CALL ZHBTRD( JOBZ, UPLO, N, KD, AB, LDAB, W, RWORK( INDE ), Z, + $ LDZ, WORK, IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, call ZSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + INDRWK = INDE + N + CALL ZSTEQR( JOBZ, N, W, RWORK( INDE ), Z, LDZ, + $ RWORK( INDRWK ), INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* + RETURN +* +* End of ZHBEV +* + END diff --git a/costa/native/external/lapack/zhbevd.f b/costa/native/external/lapack/zhbevd.f new file mode 100644 index 000000000..f9dcce634 --- /dev/null +++ b/costa/native/external/lapack/zhbevd.f @@ -0,0 +1,298 @@ + SUBROUTINE ZHBEVD( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, + $ LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KD, LDAB, LDZ, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 AB( LDAB, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* ZHBEVD computes all the eigenvalues and, optionally, eigenvectors of +* a complex Hermitian band matrix A. If eigenvectors are desired, it +* uses a divide and conquer algorithm. +* +* The divide and conquer algorithm makes very mild assumptions about +* floating point arithmetic. It will work on machines with a guard +* digit in add/subtract, or on those binary machines without guard +* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +* Cray-2. It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* AB (input/output) COMPLEX*16 array, dimension (LDAB, N) +* On entry, the upper or lower triangle of the Hermitian band +* matrix A, stored in the first KD+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* On exit, AB is overwritten by values generated during the +* reduction to tridiagonal form. If UPLO = 'U', the first +* superdiagonal and the diagonal of the tridiagonal matrix T +* are returned in rows KD and KD+1 of AB, and if UPLO = 'L', +* the diagonal and first subdiagonal of T are returned in the +* first two rows of AB. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD + 1. +* +* W (output) DOUBLE PRECISION array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* Z (output) COMPLEX*16 array, dimension (LDZ, N) +* If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +* eigenvectors of the matrix A, with the i-th column of Z +* holding the eigenvector associated with W(i). +* If JOBZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If N <= 1, LWORK must be at least 1. +* If JOBZ = 'N' and N > 1, LWORK must be at least N. +* If JOBZ = 'V' and N > 1, LWORK must be at least 2*N**2. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace/output) DOUBLE PRECISION array, +* dimension (LRWORK) +* On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. +* +* LRWORK (input) INTEGER +* The dimension of array RWORK. +* If N <= 1, LRWORK must be at least 1. +* If JOBZ = 'N' and N > 1, LRWORK must be at least N. +* If JOBZ = 'V' and N > 1, LRWORK must be at least +* 1 + 5*N + 2*N**2. +* +* If LRWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the RWORK array, +* returns this value as the first entry of the RWORK array, and +* no error message related to LRWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of array IWORK. +* If JOBZ = 'N' or N <= 1, LIWORK must be at least 1. +* If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N . +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, the algorithm failed to converge; i +* off-diagonal elements of an intermediate tridiagonal +* form did not converge to zero. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ), + $ CONE = ( 1.0D0, 0.0D0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, IMAX, INDE, INDWK2, INDWRK, ISCALE, + $ LIWMIN, LLRWK, LLWK2, LRWMIN, LWMIN + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANHB + EXTERNAL LSAME, DLAMCH, ZLANHB +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSTERF, XERBLA, ZGEMM, ZHBTRD, ZLACPY, + $ ZLASCL, ZSTEDC +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LWMIN = 1 + LRWMIN = 1 + LIWMIN = 1 + ELSE + IF( WANTZ ) THEN + LWMIN = 2*N**2 + LRWMIN = 1 + 5*N + 2*N**2 + LIWMIN = 3 + 5*N + ELSE + LWMIN = N + LRWMIN = N + LIWMIN = 1 + END IF + END IF + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -15 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHBEVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = AB( 1, 1 ) + IF( WANTZ ) + $ Z( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = ZLANHB( 'M', UPLO, N, KD, AB, LDAB, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL ZLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL ZLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + END IF +* +* Call ZHBTRD to reduce Hermitian band matrix to tridiagonal form. +* + INDE = 1 + INDWRK = INDE + N + INDWK2 = 1 + N*N + LLWK2 = LWORK - INDWK2 + 1 + LLRWK = LRWORK - INDWRK + 1 + CALL ZHBTRD( JOBZ, UPLO, N, KD, AB, LDAB, W, RWORK( INDE ), Z, + $ LDZ, WORK, IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, call ZSTEDC. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + CALL ZSTEDC( 'I', N, W, RWORK( INDE ), WORK, N, WORK( INDWK2 ), + $ LLWK2, RWORK( INDWRK ), LLRWK, IWORK, LIWORK, + $ INFO ) + CALL ZGEMM( 'N', 'N', N, N, N, CONE, Z, LDZ, WORK, N, CZERO, + $ WORK( INDWK2 ), N ) + CALL ZLACPY( 'A', N, N, WORK( INDWK2 ), N, Z, LDZ ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of ZHBEVD +* + END diff --git a/costa/native/external/lapack/zhbevx.f b/costa/native/external/lapack/zhbevx.f new file mode 100644 index 000000000..3905a980a --- /dev/null +++ b/costa/native/external/lapack/zhbevx.f @@ -0,0 +1,417 @@ + SUBROUTINE ZHBEVX( JOBZ, RANGE, UPLO, N, KD, AB, LDAB, Q, LDQ, VL, + $ VU, IL, IU, ABSTOL, M, W, Z, LDZ, WORK, RWORK, + $ IWORK, IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, KD, LDAB, LDQ, LDZ, M, N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 AB( LDAB, * ), Q( LDQ, * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* ZHBEVX computes selected eigenvalues and, optionally, eigenvectors +* of a complex Hermitian band matrix A. Eigenvalues and eigenvectors +* can be selected by specifying either a range of values or a range of +* indices for the desired eigenvalues. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* RANGE (input) CHARACTER*1 +* = 'A': all eigenvalues will be found; +* = 'V': all eigenvalues in the half-open interval (VL,VU] +* will be found; +* = 'I': the IL-th through IU-th eigenvalues will be found. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* AB (input/output) COMPLEX*16 array, dimension (LDAB, N) +* On entry, the upper or lower triangle of the Hermitian band +* matrix A, stored in the first KD+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* On exit, AB is overwritten by values generated during the +* reduction to tridiagonal form. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD + 1. +* +* Q (output) COMPLEX*16 array, dimension (LDQ, N) +* If JOBZ = 'V', the N-by-N unitary matrix used in the +* reduction to tridiagonal form. +* If JOBZ = 'N', the array Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. If JOBZ = 'V', then +* LDQ >= max(1,N). +* +* VL (input) DOUBLE PRECISION +* VU (input) DOUBLE PRECISION +* If RANGE='V', the lower and upper bounds of the interval to +* be searched for eigenvalues. VL < VU. +* Not referenced if RANGE = 'A' or 'I'. +* +* IL (input) INTEGER +* IU (input) INTEGER +* If RANGE='I', the indices (in ascending order) of the +* smallest and largest eigenvalues to be returned. +* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +* Not referenced if RANGE = 'A' or 'V'. +* +* ABSTOL (input) DOUBLE PRECISION +* The absolute error tolerance for the eigenvalues. +* An approximate eigenvalue is accepted as converged +* when it is determined to lie in an interval [a,b] +* of width less than or equal to +* +* ABSTOL + EPS * max( |a|,|b| ) , +* +* where EPS is the machine precision. If ABSTOL is less than +* or equal to zero, then EPS*|T| will be used in its place, +* where |T| is the 1-norm of the tridiagonal matrix obtained +* by reducing AB to tridiagonal form. +* +* Eigenvalues will be computed most accurately when ABSTOL is +* set to twice the underflow threshold 2*DLAMCH('S'), not zero. +* If this routine returns with INFO>0, indicating that some +* eigenvectors did not converge, try setting ABSTOL to +* 2*DLAMCH('S'). +* +* See "Computing Small Singular Values of Bidiagonal Matrices +* with Guaranteed High Relative Accuracy," by Demmel and +* Kahan, LAPACK Working Note #3. +* +* M (output) INTEGER +* The total number of eigenvalues found. 0 <= M <= N. +* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +* +* W (output) DOUBLE PRECISION array, dimension (N) +* The first M elements contain the selected eigenvalues in +* ascending order. +* +* Z (output) COMPLEX*16 array, dimension (LDZ, max(1,M)) +* If JOBZ = 'V', then if INFO = 0, the first M columns of Z +* contain the orthonormal eigenvectors of the matrix A +* corresponding to the selected eigenvalues, with the i-th +* column of Z holding the eigenvector associated with W(i). +* If an eigenvector fails to converge, then that column of Z +* contains the latest approximation to the eigenvector, and the +* index of the eigenvector is returned in IFAIL. +* If JOBZ = 'N', then Z is not referenced. +* Note: the user must ensure that at least max(1,M) columns are +* supplied in the array Z; if RANGE = 'V', the exact value of M +* is not known in advance and an upper bound must be used. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace) COMPLEX*16 array, dimension (N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (7*N) +* +* IWORK (workspace) INTEGER array, dimension (5*N) +* +* IFAIL (output) INTEGER array, dimension (N) +* If JOBZ = 'V', then if INFO = 0, the first M elements of +* IFAIL are zero. If INFO > 0, then IFAIL contains the +* indices of the eigenvectors that failed to converge. +* If JOBZ = 'N', then IFAIL is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, then i eigenvectors failed to converge. +* Their indices are stored in array IFAIL. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ), + $ CONE = ( 1.0D0, 0.0D0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, VALEIG, WANTZ + CHARACTER ORDER + INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, + $ INDISP, INDIWK, INDRWK, INDWRK, ISCALE, ITMP1, + $ J, JJ, NSPLIT + DOUBLE PRECISION ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU + COMPLEX*16 CTMP1 +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANHB + EXTERNAL LSAME, DLAMCH, ZLANHB +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DSCAL, DSTEBZ, DSTERF, XERBLA, ZCOPY, + $ ZGEMV, ZHBTRD, ZLACPY, ZLASCL, ZSTEIN, ZSTEQR, + $ ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) + LOWER = LSAME( UPLO, 'L' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -7 + ELSE IF( WANTZ .AND. LDQ.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -11 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -13 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) + $ INFO = -18 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHBEVX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + M = 1 + IF( LOWER ) THEN + CTMP1 = AB( 1, 1 ) + ELSE + CTMP1 = AB( KD+1, 1 ) + END IF + TMP1 = DBLE( CTMP1 ) + IF( VALEIG ) THEN + IF( .NOT.( VL.LT.TMP1 .AND. VU.GE.TMP1 ) ) + $ M = 0 + END IF + IF( M.EQ.1 ) THEN + W( 1 ) = CTMP1 + IF( WANTZ ) + $ Z( 1, 1 ) = CONE + END IF + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF( VALEIG ) THEN + VLL = VL + VUU = VU + ELSE + VLL = ZERO + VUU = ZERO + END IF + ANRM = ZLANHB( 'M', UPLO, N, KD, AB, LDAB, RWORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL ZLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL ZLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call ZHBTRD to reduce Hermitian band matrix to tridiagonal form. +* + INDD = 1 + INDE = INDD + N + INDRWK = INDE + N + INDWRK = 1 + CALL ZHBTRD( JOBZ, UPLO, N, KD, AB, LDAB, RWORK( INDD ), + $ RWORK( INDE ), Q, LDQ, WORK( INDWRK ), IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal +* to zero, then call DSTERF or ZSTEQR. If this fails for some +* eigenvalue, then try DSTEBZ. +* + IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND. + $ ( ABSTOL.LE.ZERO ) ) THEN + CALL DCOPY( N, RWORK( INDD ), 1, W, 1 ) + INDEE = INDRWK + 2*N + IF( .NOT.WANTZ ) THEN + CALL DCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + CALL DSTERF( N, W, RWORK( INDEE ), INFO ) + ELSE + CALL ZLACPY( 'A', N, N, Q, LDQ, Z, LDZ ) + CALL DCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + CALL ZSTEQR( JOBZ, N, W, RWORK( INDEE ), Z, LDZ, + $ RWORK( INDRWK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 10 I = 1, N + IFAIL( I ) = 0 + 10 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 30 + END IF + INFO = 0 + END IF +* +* Otherwise, call DSTEBZ and, if eigenvectors are desired, ZSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWK = INDISP + N + CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ RWORK( INDD ), RWORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ), + $ IWORK( INDIWK ), INFO ) +* + IF( WANTZ ) THEN + CALL ZSTEIN( N, RWORK( INDD ), RWORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ RWORK( INDRWK ), IWORK( INDIWK ), IFAIL, INFO ) +* +* Apply unitary matrix used in reduction to tridiagonal +* form to eigenvectors returned by ZSTEIN. +* + DO 20 J = 1, M + CALL ZCOPY( N, Z( 1, J ), 1, WORK( 1 ), 1 ) + CALL ZGEMV( 'N', N, N, CONE, Q, LDQ, WORK, 1, CZERO, + $ Z( 1, J ), 1 ) + 20 CONTINUE + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 30 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 50 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 40 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 40 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL ZSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 50 CONTINUE + END IF +* + RETURN +* +* End of ZHBEVX +* + END diff --git a/costa/native/external/lapack/zhbgst.f b/costa/native/external/lapack/zhbgst.f new file mode 100644 index 000000000..d0ddf2915 --- /dev/null +++ b/costa/native/external/lapack/zhbgst.f @@ -0,0 +1,1378 @@ + SUBROUTINE ZHBGST( VECT, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, X, + $ LDX, WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO, VECT + INTEGER INFO, KA, KB, LDAB, LDBB, LDX, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 AB( LDAB, * ), BB( LDBB, * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* ZHBGST reduces a complex Hermitian-definite banded generalized +* eigenproblem A*x = lambda*B*x to standard form C*y = lambda*y, +* such that C has the same bandwidth as A. +* +* B must have been previously factorized as S**H*S by ZPBSTF, using a +* split Cholesky factorization. A is overwritten by C = X**H*A*X, where +* X = S**(-1)*Q and Q is a unitary matrix chosen to preserve the +* bandwidth of A. +* +* Arguments +* ========= +* +* VECT (input) CHARACTER*1 +* = 'N': do not form the transformation matrix X; +* = 'V': form X. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* KA (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KA >= 0. +* +* KB (input) INTEGER +* The number of superdiagonals of the matrix B if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KA >= KB >= 0. +* +* AB (input/output) COMPLEX*16 array, dimension (LDAB,N) +* On entry, the upper or lower triangle of the Hermitian band +* matrix A, stored in the first ka+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). +* +* On exit, the transformed matrix X**H*A*X, stored in the same +* format as A. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KA+1. +* +* BB (input) COMPLEX*16 array, dimension (LDBB,N) +* The banded factor S from the split Cholesky factorization of +* B, as returned by ZPBSTF, stored in the first kb+1 rows of +* the array. +* +* LDBB (input) INTEGER +* The leading dimension of the array BB. LDBB >= KB+1. +* +* X (output) COMPLEX*16 array, dimension (LDX,N) +* If VECT = 'V', the n-by-n matrix X. +* If VECT = 'N', the array X is not referenced. +* +* LDX (input) INTEGER +* The leading dimension of the array X. +* LDX >= max(1,N) if VECT = 'V'; LDX >= 1 otherwise. +* +* WORK (workspace) COMPLEX*16 array, dimension (N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CZERO, CONE + DOUBLE PRECISION ONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ), ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPDATE, UPPER, WANTX + INTEGER I, I0, I1, I2, INCA, J, J1, J1T, J2, J2T, K, + $ KA1, KB1, KBT, L, M, NR, NRT, NX + DOUBLE PRECISION BII + COMPLEX*16 RA, RA1, T +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDSCAL, ZGERC, ZGERU, ZLACGV, ZLAR2V, + $ ZLARGV, ZLARTG, ZLARTV, ZLASET, ZROT +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCONJG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + WANTX = LSAME( VECT, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + KA1 = KA + 1 + KB1 = KB + 1 + INFO = 0 + IF( .NOT.WANTX .AND. .NOT.LSAME( VECT, 'N' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KA.LT.0 ) THEN + INFO = -4 + ELSE IF( KB.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KA+1 ) THEN + INFO = -7 + ELSE IF( LDBB.LT.KB+1 ) THEN + INFO = -9 + ELSE IF( LDX.LT.1 .OR. WANTX .AND. LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHBGST', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + INCA = LDAB*KA1 +* +* Initialize X to the unit matrix, if needed +* + IF( WANTX ) + $ CALL ZLASET( 'Full', N, N, CZERO, CONE, X, LDX ) +* +* Set M to the splitting point m. It must be the same value as is +* used in ZPBSTF. The chosen value allows the arrays WORK and RWORK +* to be of dimension (N). +* + M = ( N+KB ) / 2 +* +* The routine works in two phases, corresponding to the two halves +* of the split Cholesky factorization of B as S**H*S where +* +* S = ( U ) +* ( M L ) +* +* with U upper triangular of order m, and L lower triangular of +* order n-m. S has the same bandwidth as B. +* +* S is treated as a product of elementary matrices: +* +* S = S(m)*S(m-1)*...*S(2)*S(1)*S(m+1)*S(m+2)*...*S(n-1)*S(n) +* +* where S(i) is determined by the i-th row of S. +* +* In phase 1, the index i takes the values n, n-1, ... , m+1; +* in phase 2, it takes the values 1, 2, ... , m. +* +* For each value of i, the current matrix A is updated by forming +* inv(S(i))**H*A*inv(S(i)). This creates a triangular bulge outside +* the band of A. The bulge is then pushed down toward the bottom of +* A in phase 1, and up toward the top of A in phase 2, by applying +* plane rotations. +* +* There are kb*(kb+1)/2 elements in the bulge, but at most 2*kb-1 +* of them are linearly independent, so annihilating a bulge requires +* only 2*kb-1 plane rotations. The rotations are divided into a 1st +* set of kb-1 rotations, and a 2nd set of kb rotations. +* +* Wherever possible, rotations are generated and applied in vector +* operations of length NR between the indices J1 and J2 (sometimes +* replaced by modified values NRT, J1T or J2T). +* +* The real cosines and complex sines of the rotations are stored in +* the arrays RWORK and WORK, those of the 1st set in elements +* 2:m-kb-1, and those of the 2nd set in elements m-kb+1:n. +* +* The bulges are not formed explicitly; nonzero elements outside the +* band are created only when they are required for generating new +* rotations; they are stored in the array WORK, in positions where +* they are later overwritten by the sines of the rotations which +* annihilate them. +* +* **************************** Phase 1 ***************************** +* +* The logical structure of this phase is: +* +* UPDATE = .TRUE. +* DO I = N, M + 1, -1 +* use S(i) to update A and create a new bulge +* apply rotations to push all bulges KA positions downward +* END DO +* UPDATE = .FALSE. +* DO I = M + KA + 1, N - 1 +* apply rotations to push all bulges KA positions downward +* END DO +* +* To avoid duplicating code, the two loops are merged. +* + UPDATE = .TRUE. + I = N + 1 + 10 CONTINUE + IF( UPDATE ) THEN + I = I - 1 + KBT = MIN( KB, I-1 ) + I0 = I - 1 + I1 = MIN( N, I+KA ) + I2 = I - KBT + KA1 + IF( I.LT.M+1 ) THEN + UPDATE = .FALSE. + I = I + 1 + I0 = M + IF( KA.EQ.0 ) + $ GO TO 480 + GO TO 10 + END IF + ELSE + I = I + KA + IF( I.GT.N-1 ) + $ GO TO 480 + END IF +* + IF( UPPER ) THEN +* +* Transform A, working with the upper triangle +* + IF( UPDATE ) THEN +* +* Form inv(S(i))**H * A * inv(S(i)) +* + BII = DBLE( BB( KB1, I ) ) + AB( KA1, I ) = ( DBLE( AB( KA1, I ) ) / BII ) / BII + DO 20 J = I + 1, I1 + AB( I-J+KA1, J ) = AB( I-J+KA1, J ) / BII + 20 CONTINUE + DO 30 J = MAX( 1, I-KA ), I - 1 + AB( J-I+KA1, I ) = AB( J-I+KA1, I ) / BII + 30 CONTINUE + DO 60 K = I - KBT, I - 1 + DO 40 J = I - KBT, K + AB( J-K+KA1, K ) = AB( J-K+KA1, K ) - + $ BB( J-I+KB1, I )* + $ DCONJG( AB( K-I+KA1, I ) ) - + $ DCONJG( BB( K-I+KB1, I ) )* + $ AB( J-I+KA1, I ) + + $ DBLE( AB( KA1, I ) )* + $ BB( J-I+KB1, I )* + $ DCONJG( BB( K-I+KB1, I ) ) + 40 CONTINUE + DO 50 J = MAX( 1, I-KA ), I - KBT - 1 + AB( J-K+KA1, K ) = AB( J-K+KA1, K ) - + $ DCONJG( BB( K-I+KB1, I ) )* + $ AB( J-I+KA1, I ) + 50 CONTINUE + 60 CONTINUE + DO 80 J = I, I1 + DO 70 K = MAX( J-KA, I-KBT ), I - 1 + AB( K-J+KA1, J ) = AB( K-J+KA1, J ) - + $ BB( K-I+KB1, I )*AB( I-J+KA1, J ) + 70 CONTINUE + 80 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by inv(S(i)) +* + CALL ZDSCAL( N-M, ONE / BII, X( M+1, I ), 1 ) + IF( KBT.GT.0 ) + $ CALL ZGERC( N-M, KBT, -CONE, X( M+1, I ), 1, + $ BB( KB1-KBT, I ), 1, X( M+1, I-KBT ), + $ LDX ) + END IF +* +* store a(i,i1) in RA1 for use in next loop over K +* + RA1 = AB( I-I1+KA1, I1 ) + END IF +* +* Generate and apply vectors of rotations to chase all the +* existing bulges KA positions down toward the bottom of the +* band +* + DO 130 K = 1, KB - 1 + IF( UPDATE ) THEN +* +* Determine the rotations which would annihilate the bulge +* which has in theory just been created +* + IF( I-K+KA.LT.N .AND. I-K.GT.1 ) THEN +* +* generate rotation to annihilate a(i,i-k+ka+1) +* + CALL ZLARTG( AB( K+1, I-K+KA ), RA1, + $ RWORK( I-K+KA-M ), WORK( I-K+KA-M ), RA ) +* +* create nonzero element a(i-k,i-k+ka+1) outside the +* band and store it in WORK(i-k) +* + T = -BB( KB1-K, I )*RA1 + WORK( I-K ) = RWORK( I-K+KA-M )*T - + $ DCONJG( WORK( I-K+KA-M ) )* + $ AB( 1, I-K+KA ) + AB( 1, I-K+KA ) = WORK( I-K+KA-M )*T + + $ RWORK( I-K+KA-M )*AB( 1, I-K+KA ) + RA1 = RA + END IF + END IF + J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1 + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + IF( UPDATE ) THEN + J2T = MAX( J2, I+2*KA-K+1 ) + ELSE + J2T = J2 + END IF + NRT = ( N-J2T+KA ) / KA1 + DO 90 J = J2T, J1, KA1 +* +* create nonzero element a(j-ka,j+1) outside the band +* and store it in WORK(j-m) +* + WORK( J-M ) = WORK( J-M )*AB( 1, J+1 ) + AB( 1, J+1 ) = RWORK( J-M )*AB( 1, J+1 ) + 90 CONTINUE +* +* generate rotations in 1st set to annihilate elements which +* have been created outside the band +* + IF( NRT.GT.0 ) + $ CALL ZLARGV( NRT, AB( 1, J2T ), INCA, WORK( J2T-M ), KA1, + $ RWORK( J2T-M ), KA1 ) + IF( NR.GT.0 ) THEN +* +* apply rotations in 1st set from the right +* + DO 100 L = 1, KA - 1 + CALL ZLARTV( NR, AB( KA1-L, J2 ), INCA, + $ AB( KA-L, J2+1 ), INCA, RWORK( J2-M ), + $ WORK( J2-M ), KA1 ) + 100 CONTINUE +* +* apply rotations in 1st set from both sides to diagonal +* blocks +* + CALL ZLAR2V( NR, AB( KA1, J2 ), AB( KA1, J2+1 ), + $ AB( KA, J2+1 ), INCA, RWORK( J2-M ), + $ WORK( J2-M ), KA1 ) +* + CALL ZLACGV( NR, WORK( J2-M ), KA1 ) + END IF +* +* start applying rotations in 1st set from the left +* + DO 110 L = KA - 1, KB - K + 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL ZLARTV( NRT, AB( L, J2+KA1-L ), INCA, + $ AB( L+1, J2+KA1-L ), INCA, RWORK( J2-M ), + $ WORK( J2-M ), KA1 ) + 110 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 1st set +* + DO 120 J = J2, J1, KA1 + CALL ZROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1, + $ RWORK( J-M ), DCONJG( WORK( J-M ) ) ) + 120 CONTINUE + END IF + 130 CONTINUE +* + IF( UPDATE ) THEN + IF( I2.LE.N .AND. KBT.GT.0 ) THEN +* +* create nonzero element a(i-kbt,i-kbt+ka+1) outside the +* band and store it in WORK(i-kbt) +* + WORK( I-KBT ) = -BB( KB1-KBT, I )*RA1 + END IF + END IF +* + DO 170 K = KB, 1, -1 + IF( UPDATE ) THEN + J2 = I - K - 1 + MAX( 2, K-I0+1 )*KA1 + ELSE + J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1 + END IF +* +* finish applying rotations in 2nd set from the left +* + DO 140 L = KB - K, 1, -1 + NRT = ( N-J2+KA+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL ZLARTV( NRT, AB( L, J2-L+1 ), INCA, + $ AB( L+1, J2-L+1 ), INCA, RWORK( J2-KA ), + $ WORK( J2-KA ), KA1 ) + 140 CONTINUE + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + DO 150 J = J1, J2, -KA1 + WORK( J ) = WORK( J-KA ) + RWORK( J ) = RWORK( J-KA ) + 150 CONTINUE + DO 160 J = J2, J1, KA1 +* +* create nonzero element a(j-ka,j+1) outside the band +* and store it in WORK(j) +* + WORK( J ) = WORK( J )*AB( 1, J+1 ) + AB( 1, J+1 ) = RWORK( J )*AB( 1, J+1 ) + 160 CONTINUE + IF( UPDATE ) THEN + IF( I-K.LT.N-KA .AND. K.LE.KBT ) + $ WORK( I-K+KA ) = WORK( I-K ) + END IF + 170 CONTINUE +* + DO 210 K = KB, 1, -1 + J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1 + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + IF( NR.GT.0 ) THEN +* +* generate rotations in 2nd set to annihilate elements +* which have been created outside the band +* + CALL ZLARGV( NR, AB( 1, J2 ), INCA, WORK( J2 ), KA1, + $ RWORK( J2 ), KA1 ) +* +* apply rotations in 2nd set from the right +* + DO 180 L = 1, KA - 1 + CALL ZLARTV( NR, AB( KA1-L, J2 ), INCA, + $ AB( KA-L, J2+1 ), INCA, RWORK( J2 ), + $ WORK( J2 ), KA1 ) + 180 CONTINUE +* +* apply rotations in 2nd set from both sides to diagonal +* blocks +* + CALL ZLAR2V( NR, AB( KA1, J2 ), AB( KA1, J2+1 ), + $ AB( KA, J2+1 ), INCA, RWORK( J2 ), + $ WORK( J2 ), KA1 ) +* + CALL ZLACGV( NR, WORK( J2 ), KA1 ) + END IF +* +* start applying rotations in 2nd set from the left +* + DO 190 L = KA - 1, KB - K + 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL ZLARTV( NRT, AB( L, J2+KA1-L ), INCA, + $ AB( L+1, J2+KA1-L ), INCA, RWORK( J2 ), + $ WORK( J2 ), KA1 ) + 190 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 2nd set +* + DO 200 J = J2, J1, KA1 + CALL ZROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1, + $ RWORK( J ), DCONJG( WORK( J ) ) ) + 200 CONTINUE + END IF + 210 CONTINUE +* + DO 230 K = 1, KB - 1 + J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1 +* +* finish applying rotations in 1st set from the left +* + DO 220 L = KB - K, 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL ZLARTV( NRT, AB( L, J2+KA1-L ), INCA, + $ AB( L+1, J2+KA1-L ), INCA, RWORK( J2-M ), + $ WORK( J2-M ), KA1 ) + 220 CONTINUE + 230 CONTINUE +* + IF( KB.GT.1 ) THEN + DO 240 J = N - 1, I2 + KA, -1 + RWORK( J-M ) = RWORK( J-KA-M ) + WORK( J-M ) = WORK( J-KA-M ) + 240 CONTINUE + END IF +* + ELSE +* +* Transform A, working with the lower triangle +* + IF( UPDATE ) THEN +* +* Form inv(S(i))**H * A * inv(S(i)) +* + BII = DBLE( BB( 1, I ) ) + AB( 1, I ) = ( DBLE( AB( 1, I ) ) / BII ) / BII + DO 250 J = I + 1, I1 + AB( J-I+1, I ) = AB( J-I+1, I ) / BII + 250 CONTINUE + DO 260 J = MAX( 1, I-KA ), I - 1 + AB( I-J+1, J ) = AB( I-J+1, J ) / BII + 260 CONTINUE + DO 290 K = I - KBT, I - 1 + DO 270 J = I - KBT, K + AB( K-J+1, J ) = AB( K-J+1, J ) - + $ BB( I-J+1, J )*DCONJG( AB( I-K+1, + $ K ) ) - DCONJG( BB( I-K+1, K ) )* + $ AB( I-J+1, J ) + DBLE( AB( 1, I ) )* + $ BB( I-J+1, J )*DCONJG( BB( I-K+1, + $ K ) ) + 270 CONTINUE + DO 280 J = MAX( 1, I-KA ), I - KBT - 1 + AB( K-J+1, J ) = AB( K-J+1, J ) - + $ DCONJG( BB( I-K+1, K ) )* + $ AB( I-J+1, J ) + 280 CONTINUE + 290 CONTINUE + DO 310 J = I, I1 + DO 300 K = MAX( J-KA, I-KBT ), I - 1 + AB( J-K+1, K ) = AB( J-K+1, K ) - + $ BB( I-K+1, K )*AB( J-I+1, I ) + 300 CONTINUE + 310 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by inv(S(i)) +* + CALL ZDSCAL( N-M, ONE / BII, X( M+1, I ), 1 ) + IF( KBT.GT.0 ) + $ CALL ZGERU( N-M, KBT, -CONE, X( M+1, I ), 1, + $ BB( KBT+1, I-KBT ), LDBB-1, + $ X( M+1, I-KBT ), LDX ) + END IF +* +* store a(i1,i) in RA1 for use in next loop over K +* + RA1 = AB( I1-I+1, I ) + END IF +* +* Generate and apply vectors of rotations to chase all the +* existing bulges KA positions down toward the bottom of the +* band +* + DO 360 K = 1, KB - 1 + IF( UPDATE ) THEN +* +* Determine the rotations which would annihilate the bulge +* which has in theory just been created +* + IF( I-K+KA.LT.N .AND. I-K.GT.1 ) THEN +* +* generate rotation to annihilate a(i-k+ka+1,i) +* + CALL ZLARTG( AB( KA1-K, I ), RA1, RWORK( I-K+KA-M ), + $ WORK( I-K+KA-M ), RA ) +* +* create nonzero element a(i-k+ka+1,i-k) outside the +* band and store it in WORK(i-k) +* + T = -BB( K+1, I-K )*RA1 + WORK( I-K ) = RWORK( I-K+KA-M )*T - + $ DCONJG( WORK( I-K+KA-M ) )* + $ AB( KA1, I-K ) + AB( KA1, I-K ) = WORK( I-K+KA-M )*T + + $ RWORK( I-K+KA-M )*AB( KA1, I-K ) + RA1 = RA + END IF + END IF + J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1 + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + IF( UPDATE ) THEN + J2T = MAX( J2, I+2*KA-K+1 ) + ELSE + J2T = J2 + END IF + NRT = ( N-J2T+KA ) / KA1 + DO 320 J = J2T, J1, KA1 +* +* create nonzero element a(j+1,j-ka) outside the band +* and store it in WORK(j-m) +* + WORK( J-M ) = WORK( J-M )*AB( KA1, J-KA+1 ) + AB( KA1, J-KA+1 ) = RWORK( J-M )*AB( KA1, J-KA+1 ) + 320 CONTINUE +* +* generate rotations in 1st set to annihilate elements which +* have been created outside the band +* + IF( NRT.GT.0 ) + $ CALL ZLARGV( NRT, AB( KA1, J2T-KA ), INCA, WORK( J2T-M ), + $ KA1, RWORK( J2T-M ), KA1 ) + IF( NR.GT.0 ) THEN +* +* apply rotations in 1st set from the left +* + DO 330 L = 1, KA - 1 + CALL ZLARTV( NR, AB( L+1, J2-L ), INCA, + $ AB( L+2, J2-L ), INCA, RWORK( J2-M ), + $ WORK( J2-M ), KA1 ) + 330 CONTINUE +* +* apply rotations in 1st set from both sides to diagonal +* blocks +* + CALL ZLAR2V( NR, AB( 1, J2 ), AB( 1, J2+1 ), AB( 2, J2 ), + $ INCA, RWORK( J2-M ), WORK( J2-M ), KA1 ) +* + CALL ZLACGV( NR, WORK( J2-M ), KA1 ) + END IF +* +* start applying rotations in 1st set from the right +* + DO 340 L = KA - 1, KB - K + 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL ZLARTV( NRT, AB( KA1-L+1, J2 ), INCA, + $ AB( KA1-L, J2+1 ), INCA, RWORK( J2-M ), + $ WORK( J2-M ), KA1 ) + 340 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 1st set +* + DO 350 J = J2, J1, KA1 + CALL ZROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1, + $ RWORK( J-M ), WORK( J-M ) ) + 350 CONTINUE + END IF + 360 CONTINUE +* + IF( UPDATE ) THEN + IF( I2.LE.N .AND. KBT.GT.0 ) THEN +* +* create nonzero element a(i-kbt+ka+1,i-kbt) outside the +* band and store it in WORK(i-kbt) +* + WORK( I-KBT ) = -BB( KBT+1, I-KBT )*RA1 + END IF + END IF +* + DO 400 K = KB, 1, -1 + IF( UPDATE ) THEN + J2 = I - K - 1 + MAX( 2, K-I0+1 )*KA1 + ELSE + J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1 + END IF +* +* finish applying rotations in 2nd set from the right +* + DO 370 L = KB - K, 1, -1 + NRT = ( N-J2+KA+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL ZLARTV( NRT, AB( KA1-L+1, J2-KA ), INCA, + $ AB( KA1-L, J2-KA+1 ), INCA, + $ RWORK( J2-KA ), WORK( J2-KA ), KA1 ) + 370 CONTINUE + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + DO 380 J = J1, J2, -KA1 + WORK( J ) = WORK( J-KA ) + RWORK( J ) = RWORK( J-KA ) + 380 CONTINUE + DO 390 J = J2, J1, KA1 +* +* create nonzero element a(j+1,j-ka) outside the band +* and store it in WORK(j) +* + WORK( J ) = WORK( J )*AB( KA1, J-KA+1 ) + AB( KA1, J-KA+1 ) = RWORK( J )*AB( KA1, J-KA+1 ) + 390 CONTINUE + IF( UPDATE ) THEN + IF( I-K.LT.N-KA .AND. K.LE.KBT ) + $ WORK( I-K+KA ) = WORK( I-K ) + END IF + 400 CONTINUE +* + DO 440 K = KB, 1, -1 + J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1 + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + IF( NR.GT.0 ) THEN +* +* generate rotations in 2nd set to annihilate elements +* which have been created outside the band +* + CALL ZLARGV( NR, AB( KA1, J2-KA ), INCA, WORK( J2 ), KA1, + $ RWORK( J2 ), KA1 ) +* +* apply rotations in 2nd set from the left +* + DO 410 L = 1, KA - 1 + CALL ZLARTV( NR, AB( L+1, J2-L ), INCA, + $ AB( L+2, J2-L ), INCA, RWORK( J2 ), + $ WORK( J2 ), KA1 ) + 410 CONTINUE +* +* apply rotations in 2nd set from both sides to diagonal +* blocks +* + CALL ZLAR2V( NR, AB( 1, J2 ), AB( 1, J2+1 ), AB( 2, J2 ), + $ INCA, RWORK( J2 ), WORK( J2 ), KA1 ) +* + CALL ZLACGV( NR, WORK( J2 ), KA1 ) + END IF +* +* start applying rotations in 2nd set from the right +* + DO 420 L = KA - 1, KB - K + 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL ZLARTV( NRT, AB( KA1-L+1, J2 ), INCA, + $ AB( KA1-L, J2+1 ), INCA, RWORK( J2 ), + $ WORK( J2 ), KA1 ) + 420 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 2nd set +* + DO 430 J = J2, J1, KA1 + CALL ZROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1, + $ RWORK( J ), WORK( J ) ) + 430 CONTINUE + END IF + 440 CONTINUE +* + DO 460 K = 1, KB - 1 + J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1 +* +* finish applying rotations in 1st set from the right +* + DO 450 L = KB - K, 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL ZLARTV( NRT, AB( KA1-L+1, J2 ), INCA, + $ AB( KA1-L, J2+1 ), INCA, RWORK( J2-M ), + $ WORK( J2-M ), KA1 ) + 450 CONTINUE + 460 CONTINUE +* + IF( KB.GT.1 ) THEN + DO 470 J = N - 1, I2 + KA, -1 + RWORK( J-M ) = RWORK( J-KA-M ) + WORK( J-M ) = WORK( J-KA-M ) + 470 CONTINUE + END IF +* + END IF +* + GO TO 10 +* + 480 CONTINUE +* +* **************************** Phase 2 ***************************** +* +* The logical structure of this phase is: +* +* UPDATE = .TRUE. +* DO I = 1, M +* use S(i) to update A and create a new bulge +* apply rotations to push all bulges KA positions upward +* END DO +* UPDATE = .FALSE. +* DO I = M - KA - 1, 2, -1 +* apply rotations to push all bulges KA positions upward +* END DO +* +* To avoid duplicating code, the two loops are merged. +* + UPDATE = .TRUE. + I = 0 + 490 CONTINUE + IF( UPDATE ) THEN + I = I + 1 + KBT = MIN( KB, M-I ) + I0 = I + 1 + I1 = MAX( 1, I-KA ) + I2 = I + KBT - KA1 + IF( I.GT.M ) THEN + UPDATE = .FALSE. + I = I - 1 + I0 = M + 1 + IF( KA.EQ.0 ) + $ RETURN + GO TO 490 + END IF + ELSE + I = I - KA + IF( I.LT.2 ) + $ RETURN + END IF +* + IF( I.LT.M-KBT ) THEN + NX = M + ELSE + NX = N + END IF +* + IF( UPPER ) THEN +* +* Transform A, working with the upper triangle +* + IF( UPDATE ) THEN +* +* Form inv(S(i))**H * A * inv(S(i)) +* + BII = DBLE( BB( KB1, I ) ) + AB( KA1, I ) = ( DBLE( AB( KA1, I ) ) / BII ) / BII + DO 500 J = I1, I - 1 + AB( J-I+KA1, I ) = AB( J-I+KA1, I ) / BII + 500 CONTINUE + DO 510 J = I + 1, MIN( N, I+KA ) + AB( I-J+KA1, J ) = AB( I-J+KA1, J ) / BII + 510 CONTINUE + DO 540 K = I + 1, I + KBT + DO 520 J = K, I + KBT + AB( K-J+KA1, J ) = AB( K-J+KA1, J ) - + $ BB( I-J+KB1, J )* + $ DCONJG( AB( I-K+KA1, K ) ) - + $ DCONJG( BB( I-K+KB1, K ) )* + $ AB( I-J+KA1, J ) + + $ DBLE( AB( KA1, I ) )* + $ BB( I-J+KB1, J )* + $ DCONJG( BB( I-K+KB1, K ) ) + 520 CONTINUE + DO 530 J = I + KBT + 1, MIN( N, I+KA ) + AB( K-J+KA1, J ) = AB( K-J+KA1, J ) - + $ DCONJG( BB( I-K+KB1, K ) )* + $ AB( I-J+KA1, J ) + 530 CONTINUE + 540 CONTINUE + DO 560 J = I1, I + DO 550 K = I + 1, MIN( J+KA, I+KBT ) + AB( J-K+KA1, K ) = AB( J-K+KA1, K ) - + $ BB( I-K+KB1, K )*AB( J-I+KA1, I ) + 550 CONTINUE + 560 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by inv(S(i)) +* + CALL ZDSCAL( NX, ONE / BII, X( 1, I ), 1 ) + IF( KBT.GT.0 ) + $ CALL ZGERU( NX, KBT, -CONE, X( 1, I ), 1, + $ BB( KB, I+1 ), LDBB-1, X( 1, I+1 ), LDX ) + END IF +* +* store a(i1,i) in RA1 for use in next loop over K +* + RA1 = AB( I1-I+KA1, I ) + END IF +* +* Generate and apply vectors of rotations to chase all the +* existing bulges KA positions up toward the top of the band +* + DO 610 K = 1, KB - 1 + IF( UPDATE ) THEN +* +* Determine the rotations which would annihilate the bulge +* which has in theory just been created +* + IF( I+K-KA1.GT.0 .AND. I+K.LT.M ) THEN +* +* generate rotation to annihilate a(i+k-ka-1,i) +* + CALL ZLARTG( AB( K+1, I ), RA1, RWORK( I+K-KA ), + $ WORK( I+K-KA ), RA ) +* +* create nonzero element a(i+k-ka-1,i+k) outside the +* band and store it in WORK(m-kb+i+k) +* + T = -BB( KB1-K, I+K )*RA1 + WORK( M-KB+I+K ) = RWORK( I+K-KA )*T - + $ DCONJG( WORK( I+K-KA ) )* + $ AB( 1, I+K ) + AB( 1, I+K ) = WORK( I+K-KA )*T + + $ RWORK( I+K-KA )*AB( 1, I+K ) + RA1 = RA + END IF + END IF + J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1 + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + IF( UPDATE ) THEN + J2T = MIN( J2, I-2*KA+K-1 ) + ELSE + J2T = J2 + END IF + NRT = ( J2T+KA-1 ) / KA1 + DO 570 J = J1, J2T, KA1 +* +* create nonzero element a(j-1,j+ka) outside the band +* and store it in WORK(j) +* + WORK( J ) = WORK( J )*AB( 1, J+KA-1 ) + AB( 1, J+KA-1 ) = RWORK( J )*AB( 1, J+KA-1 ) + 570 CONTINUE +* +* generate rotations in 1st set to annihilate elements which +* have been created outside the band +* + IF( NRT.GT.0 ) + $ CALL ZLARGV( NRT, AB( 1, J1+KA ), INCA, WORK( J1 ), KA1, + $ RWORK( J1 ), KA1 ) + IF( NR.GT.0 ) THEN +* +* apply rotations in 1st set from the left +* + DO 580 L = 1, KA - 1 + CALL ZLARTV( NR, AB( KA1-L, J1+L ), INCA, + $ AB( KA-L, J1+L ), INCA, RWORK( J1 ), + $ WORK( J1 ), KA1 ) + 580 CONTINUE +* +* apply rotations in 1st set from both sides to diagonal +* blocks +* + CALL ZLAR2V( NR, AB( KA1, J1 ), AB( KA1, J1-1 ), + $ AB( KA, J1 ), INCA, RWORK( J1 ), WORK( J1 ), + $ KA1 ) +* + CALL ZLACGV( NR, WORK( J1 ), KA1 ) + END IF +* +* start applying rotations in 1st set from the right +* + DO 590 L = KA - 1, KB - K + 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL ZLARTV( NRT, AB( L, J1T ), INCA, + $ AB( L+1, J1T-1 ), INCA, RWORK( J1T ), + $ WORK( J1T ), KA1 ) + 590 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 1st set +* + DO 600 J = J1, J2, KA1 + CALL ZROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1, + $ RWORK( J ), WORK( J ) ) + 600 CONTINUE + END IF + 610 CONTINUE +* + IF( UPDATE ) THEN + IF( I2.GT.0 .AND. KBT.GT.0 ) THEN +* +* create nonzero element a(i+kbt-ka-1,i+kbt) outside the +* band and store it in WORK(m-kb+i+kbt) +* + WORK( M-KB+I+KBT ) = -BB( KB1-KBT, I+KBT )*RA1 + END IF + END IF +* + DO 650 K = KB, 1, -1 + IF( UPDATE ) THEN + J2 = I + K + 1 - MAX( 2, K+I0-M )*KA1 + ELSE + J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1 + END IF +* +* finish applying rotations in 2nd set from the right +* + DO 620 L = KB - K, 1, -1 + NRT = ( J2+KA+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL ZLARTV( NRT, AB( L, J1T+KA ), INCA, + $ AB( L+1, J1T+KA-1 ), INCA, + $ RWORK( M-KB+J1T+KA ), + $ WORK( M-KB+J1T+KA ), KA1 ) + 620 CONTINUE + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + DO 630 J = J1, J2, KA1 + WORK( M-KB+J ) = WORK( M-KB+J+KA ) + RWORK( M-KB+J ) = RWORK( M-KB+J+KA ) + 630 CONTINUE + DO 640 J = J1, J2, KA1 +* +* create nonzero element a(j-1,j+ka) outside the band +* and store it in WORK(m-kb+j) +* + WORK( M-KB+J ) = WORK( M-KB+J )*AB( 1, J+KA-1 ) + AB( 1, J+KA-1 ) = RWORK( M-KB+J )*AB( 1, J+KA-1 ) + 640 CONTINUE + IF( UPDATE ) THEN + IF( I+K.GT.KA1 .AND. K.LE.KBT ) + $ WORK( M-KB+I+K-KA ) = WORK( M-KB+I+K ) + END IF + 650 CONTINUE +* + DO 690 K = KB, 1, -1 + J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1 + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + IF( NR.GT.0 ) THEN +* +* generate rotations in 2nd set to annihilate elements +* which have been created outside the band +* + CALL ZLARGV( NR, AB( 1, J1+KA ), INCA, WORK( M-KB+J1 ), + $ KA1, RWORK( M-KB+J1 ), KA1 ) +* +* apply rotations in 2nd set from the left +* + DO 660 L = 1, KA - 1 + CALL ZLARTV( NR, AB( KA1-L, J1+L ), INCA, + $ AB( KA-L, J1+L ), INCA, RWORK( M-KB+J1 ), + $ WORK( M-KB+J1 ), KA1 ) + 660 CONTINUE +* +* apply rotations in 2nd set from both sides to diagonal +* blocks +* + CALL ZLAR2V( NR, AB( KA1, J1 ), AB( KA1, J1-1 ), + $ AB( KA, J1 ), INCA, RWORK( M-KB+J1 ), + $ WORK( M-KB+J1 ), KA1 ) +* + CALL ZLACGV( NR, WORK( M-KB+J1 ), KA1 ) + END IF +* +* start applying rotations in 2nd set from the right +* + DO 670 L = KA - 1, KB - K + 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL ZLARTV( NRT, AB( L, J1T ), INCA, + $ AB( L+1, J1T-1 ), INCA, + $ RWORK( M-KB+J1T ), WORK( M-KB+J1T ), + $ KA1 ) + 670 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 2nd set +* + DO 680 J = J1, J2, KA1 + CALL ZROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1, + $ RWORK( M-KB+J ), WORK( M-KB+J ) ) + 680 CONTINUE + END IF + 690 CONTINUE +* + DO 710 K = 1, KB - 1 + J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1 +* +* finish applying rotations in 1st set from the right +* + DO 700 L = KB - K, 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL ZLARTV( NRT, AB( L, J1T ), INCA, + $ AB( L+1, J1T-1 ), INCA, RWORK( J1T ), + $ WORK( J1T ), KA1 ) + 700 CONTINUE + 710 CONTINUE +* + IF( KB.GT.1 ) THEN + DO 720 J = 2, I2 - KA + RWORK( J ) = RWORK( J+KA ) + WORK( J ) = WORK( J+KA ) + 720 CONTINUE + END IF +* + ELSE +* +* Transform A, working with the lower triangle +* + IF( UPDATE ) THEN +* +* Form inv(S(i))**H * A * inv(S(i)) +* + BII = DBLE( BB( 1, I ) ) + AB( 1, I ) = ( DBLE( AB( 1, I ) ) / BII ) / BII + DO 730 J = I1, I - 1 + AB( I-J+1, J ) = AB( I-J+1, J ) / BII + 730 CONTINUE + DO 740 J = I + 1, MIN( N, I+KA ) + AB( J-I+1, I ) = AB( J-I+1, I ) / BII + 740 CONTINUE + DO 770 K = I + 1, I + KBT + DO 750 J = K, I + KBT + AB( J-K+1, K ) = AB( J-K+1, K ) - + $ BB( J-I+1, I )*DCONJG( AB( K-I+1, + $ I ) ) - DCONJG( BB( K-I+1, I ) )* + $ AB( J-I+1, I ) + DBLE( AB( 1, I ) )* + $ BB( J-I+1, I )*DCONJG( BB( K-I+1, + $ I ) ) + 750 CONTINUE + DO 760 J = I + KBT + 1, MIN( N, I+KA ) + AB( J-K+1, K ) = AB( J-K+1, K ) - + $ DCONJG( BB( K-I+1, I ) )* + $ AB( J-I+1, I ) + 760 CONTINUE + 770 CONTINUE + DO 790 J = I1, I + DO 780 K = I + 1, MIN( J+KA, I+KBT ) + AB( K-J+1, J ) = AB( K-J+1, J ) - + $ BB( K-I+1, I )*AB( I-J+1, J ) + 780 CONTINUE + 790 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by inv(S(i)) +* + CALL ZDSCAL( NX, ONE / BII, X( 1, I ), 1 ) + IF( KBT.GT.0 ) + $ CALL ZGERC( NX, KBT, -CONE, X( 1, I ), 1, BB( 2, I ), + $ 1, X( 1, I+1 ), LDX ) + END IF +* +* store a(i,i1) in RA1 for use in next loop over K +* + RA1 = AB( I-I1+1, I1 ) + END IF +* +* Generate and apply vectors of rotations to chase all the +* existing bulges KA positions up toward the top of the band +* + DO 840 K = 1, KB - 1 + IF( UPDATE ) THEN +* +* Determine the rotations which would annihilate the bulge +* which has in theory just been created +* + IF( I+K-KA1.GT.0 .AND. I+K.LT.M ) THEN +* +* generate rotation to annihilate a(i,i+k-ka-1) +* + CALL ZLARTG( AB( KA1-K, I+K-KA ), RA1, + $ RWORK( I+K-KA ), WORK( I+K-KA ), RA ) +* +* create nonzero element a(i+k,i+k-ka-1) outside the +* band and store it in WORK(m-kb+i+k) +* + T = -BB( K+1, I )*RA1 + WORK( M-KB+I+K ) = RWORK( I+K-KA )*T - + $ DCONJG( WORK( I+K-KA ) )* + $ AB( KA1, I+K-KA ) + AB( KA1, I+K-KA ) = WORK( I+K-KA )*T + + $ RWORK( I+K-KA )*AB( KA1, I+K-KA ) + RA1 = RA + END IF + END IF + J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1 + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + IF( UPDATE ) THEN + J2T = MIN( J2, I-2*KA+K-1 ) + ELSE + J2T = J2 + END IF + NRT = ( J2T+KA-1 ) / KA1 + DO 800 J = J1, J2T, KA1 +* +* create nonzero element a(j+ka,j-1) outside the band +* and store it in WORK(j) +* + WORK( J ) = WORK( J )*AB( KA1, J-1 ) + AB( KA1, J-1 ) = RWORK( J )*AB( KA1, J-1 ) + 800 CONTINUE +* +* generate rotations in 1st set to annihilate elements which +* have been created outside the band +* + IF( NRT.GT.0 ) + $ CALL ZLARGV( NRT, AB( KA1, J1 ), INCA, WORK( J1 ), KA1, + $ RWORK( J1 ), KA1 ) + IF( NR.GT.0 ) THEN +* +* apply rotations in 1st set from the right +* + DO 810 L = 1, KA - 1 + CALL ZLARTV( NR, AB( L+1, J1 ), INCA, AB( L+2, J1-1 ), + $ INCA, RWORK( J1 ), WORK( J1 ), KA1 ) + 810 CONTINUE +* +* apply rotations in 1st set from both sides to diagonal +* blocks +* + CALL ZLAR2V( NR, AB( 1, J1 ), AB( 1, J1-1 ), + $ AB( 2, J1-1 ), INCA, RWORK( J1 ), + $ WORK( J1 ), KA1 ) +* + CALL ZLACGV( NR, WORK( J1 ), KA1 ) + END IF +* +* start applying rotations in 1st set from the left +* + DO 820 L = KA - 1, KB - K + 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL ZLARTV( NRT, AB( KA1-L+1, J1T-KA1+L ), INCA, + $ AB( KA1-L, J1T-KA1+L ), INCA, + $ RWORK( J1T ), WORK( J1T ), KA1 ) + 820 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 1st set +* + DO 830 J = J1, J2, KA1 + CALL ZROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1, + $ RWORK( J ), DCONJG( WORK( J ) ) ) + 830 CONTINUE + END IF + 840 CONTINUE +* + IF( UPDATE ) THEN + IF( I2.GT.0 .AND. KBT.GT.0 ) THEN +* +* create nonzero element a(i+kbt,i+kbt-ka-1) outside the +* band and store it in WORK(m-kb+i+kbt) +* + WORK( M-KB+I+KBT ) = -BB( KBT+1, I )*RA1 + END IF + END IF +* + DO 880 K = KB, 1, -1 + IF( UPDATE ) THEN + J2 = I + K + 1 - MAX( 2, K+I0-M )*KA1 + ELSE + J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1 + END IF +* +* finish applying rotations in 2nd set from the left +* + DO 850 L = KB - K, 1, -1 + NRT = ( J2+KA+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL ZLARTV( NRT, AB( KA1-L+1, J1T+L-1 ), INCA, + $ AB( KA1-L, J1T+L-1 ), INCA, + $ RWORK( M-KB+J1T+KA ), + $ WORK( M-KB+J1T+KA ), KA1 ) + 850 CONTINUE + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + DO 860 J = J1, J2, KA1 + WORK( M-KB+J ) = WORK( M-KB+J+KA ) + RWORK( M-KB+J ) = RWORK( M-KB+J+KA ) + 860 CONTINUE + DO 870 J = J1, J2, KA1 +* +* create nonzero element a(j+ka,j-1) outside the band +* and store it in WORK(m-kb+j) +* + WORK( M-KB+J ) = WORK( M-KB+J )*AB( KA1, J-1 ) + AB( KA1, J-1 ) = RWORK( M-KB+J )*AB( KA1, J-1 ) + 870 CONTINUE + IF( UPDATE ) THEN + IF( I+K.GT.KA1 .AND. K.LE.KBT ) + $ WORK( M-KB+I+K-KA ) = WORK( M-KB+I+K ) + END IF + 880 CONTINUE +* + DO 920 K = KB, 1, -1 + J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1 + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + IF( NR.GT.0 ) THEN +* +* generate rotations in 2nd set to annihilate elements +* which have been created outside the band +* + CALL ZLARGV( NR, AB( KA1, J1 ), INCA, WORK( M-KB+J1 ), + $ KA1, RWORK( M-KB+J1 ), KA1 ) +* +* apply rotations in 2nd set from the right +* + DO 890 L = 1, KA - 1 + CALL ZLARTV( NR, AB( L+1, J1 ), INCA, AB( L+2, J1-1 ), + $ INCA, RWORK( M-KB+J1 ), WORK( M-KB+J1 ), + $ KA1 ) + 890 CONTINUE +* +* apply rotations in 2nd set from both sides to diagonal +* blocks +* + CALL ZLAR2V( NR, AB( 1, J1 ), AB( 1, J1-1 ), + $ AB( 2, J1-1 ), INCA, RWORK( M-KB+J1 ), + $ WORK( M-KB+J1 ), KA1 ) +* + CALL ZLACGV( NR, WORK( M-KB+J1 ), KA1 ) + END IF +* +* start applying rotations in 2nd set from the left +* + DO 900 L = KA - 1, KB - K + 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL ZLARTV( NRT, AB( KA1-L+1, J1T-KA1+L ), INCA, + $ AB( KA1-L, J1T-KA1+L ), INCA, + $ RWORK( M-KB+J1T ), WORK( M-KB+J1T ), + $ KA1 ) + 900 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 2nd set +* + DO 910 J = J1, J2, KA1 + CALL ZROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1, + $ RWORK( M-KB+J ), DCONJG( WORK( M-KB+J ) ) ) + 910 CONTINUE + END IF + 920 CONTINUE +* + DO 940 K = 1, KB - 1 + J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1 +* +* finish applying rotations in 1st set from the left +* + DO 930 L = KB - K, 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL ZLARTV( NRT, AB( KA1-L+1, J1T-KA1+L ), INCA, + $ AB( KA1-L, J1T-KA1+L ), INCA, + $ RWORK( J1T ), WORK( J1T ), KA1 ) + 930 CONTINUE + 940 CONTINUE +* + IF( KB.GT.1 ) THEN + DO 950 J = 2, I2 - KA + RWORK( J ) = RWORK( J+KA ) + WORK( J ) = WORK( J+KA ) + 950 CONTINUE + END IF +* + END IF +* + GO TO 490 +* +* End of ZHBGST +* + END diff --git a/costa/native/external/lapack/zhbgv.f b/costa/native/external/lapack/zhbgv.f new file mode 100644 index 000000000..9fc18ace7 --- /dev/null +++ b/costa/native/external/lapack/zhbgv.f @@ -0,0 +1,192 @@ + SUBROUTINE ZHBGV( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, Z, + $ LDZ, WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KA, KB, LDAB, LDBB, LDZ, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 AB( LDAB, * ), BB( LDBB, * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* ZHBGV computes all the eigenvalues, and optionally, the eigenvectors +* of a complex generalized Hermitian-definite banded eigenproblem, of +* the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian +* and banded, and B is also positive definite. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangles of A and B are stored; +* = 'L': Lower triangles of A and B are stored. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* KA (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KA >= 0. +* +* KB (input) INTEGER +* The number of superdiagonals of the matrix B if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KB >= 0. +* +* AB (input/output) COMPLEX*16 array, dimension (LDAB, N) +* On entry, the upper or lower triangle of the Hermitian band +* matrix A, stored in the first ka+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). +* +* On exit, the contents of AB are destroyed. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KA+1. +* +* BB (input/output) COMPLEX*16 array, dimension (LDBB, N) +* On entry, the upper or lower triangle of the Hermitian band +* matrix B, stored in the first kb+1 rows of the array. The +* j-th column of B is stored in the j-th column of the array BB +* as follows: +* if UPLO = 'U', BB(kb+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j; +* if UPLO = 'L', BB(1+i-j,j) = B(i,j) for j<=i<=min(n,j+kb). +* +* On exit, the factor S from the split Cholesky factorization +* B = S**H*S, as returned by ZPBSTF. +* +* LDBB (input) INTEGER +* The leading dimension of the array BB. LDBB >= KB+1. +* +* W (output) DOUBLE PRECISION array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* Z (output) COMPLEX*16 array, dimension (LDZ, N) +* If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of +* eigenvectors, with the i-th column of Z holding the +* eigenvector associated with W(i). The eigenvectors are +* normalized so that Z**H*B*Z = I. +* If JOBZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= N. +* +* WORK (workspace) COMPLEX*16 array, dimension (N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (3*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is: +* <= N: the algorithm failed to converge: +* i off-diagonal elements of an intermediate +* tridiagonal form did not converge to zero; +* > N: if INFO = N + i, for 1 <= i <= N, then ZPBSTF +* returned INFO = i: B is not positive definite. +* The factorization of B could not be completed and +* no eigenvalues or eigenvectors were computed. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER, WANTZ + CHARACTER VECT + INTEGER IINFO, INDE, INDWRK +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DSTERF, XERBLA, ZHBGST, ZHBTRD, ZPBSTF, ZSTEQR +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KA.LT.0 ) THEN + INFO = -4 + ELSE IF( KB.LT.0 .OR. KB.GT.KA ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KA+1 ) THEN + INFO = -7 + ELSE IF( LDBB.LT.KB+1 ) THEN + INFO = -9 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHBGV ', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a split Cholesky factorization of B. +* + CALL ZPBSTF( UPLO, N, KB, BB, LDBB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem. +* + INDE = 1 + INDWRK = INDE + N + CALL ZHBGST( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, Z, LDZ, + $ WORK, RWORK( INDWRK ), IINFO ) +* +* Reduce to tridiagonal form. +* + IF( WANTZ ) THEN + VECT = 'U' + ELSE + VECT = 'N' + END IF + CALL ZHBTRD( VECT, UPLO, N, KA, AB, LDAB, W, RWORK( INDE ), Z, + $ LDZ, WORK, IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, call ZSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + CALL ZSTEQR( JOBZ, N, W, RWORK( INDE ), Z, LDZ, + $ RWORK( INDWRK ), INFO ) + END IF + RETURN +* +* End of ZHBGV +* + END diff --git a/costa/native/external/lapack/zhbgvd.f b/costa/native/external/lapack/zhbgvd.f new file mode 100644 index 000000000..446b61408 --- /dev/null +++ b/costa/native/external/lapack/zhbgvd.f @@ -0,0 +1,295 @@ + SUBROUTINE ZHBGVD( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, + $ Z, LDZ, WORK, LWORK, RWORK, LRWORK, IWORK, + $ LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KA, KB, LDAB, LDBB, LDZ, LIWORK, LRWORK, + $ LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 AB( LDAB, * ), BB( LDBB, * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* ZHBGVD computes all the eigenvalues, and optionally, the eigenvectors +* of a complex generalized Hermitian-definite banded eigenproblem, of +* the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian +* and banded, and B is also positive definite. If eigenvectors are +* desired, it uses a divide and conquer algorithm. +* +* The divide and conquer algorithm makes very mild assumptions about +* floating point arithmetic. It will work on machines with a guard +* digit in add/subtract, or on those binary machines without guard +* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +* Cray-2. It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangles of A and B are stored; +* = 'L': Lower triangles of A and B are stored. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* KA (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KA >= 0. +* +* KB (input) INTEGER +* The number of superdiagonals of the matrix B if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KB >= 0. +* +* AB (input/output) COMPLEX*16 array, dimension (LDAB, N) +* On entry, the upper or lower triangle of the Hermitian band +* matrix A, stored in the first ka+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). +* +* On exit, the contents of AB are destroyed. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KA+1. +* +* BB (input/output) COMPLEX*16 array, dimension (LDBB, N) +* On entry, the upper or lower triangle of the Hermitian band +* matrix B, stored in the first kb+1 rows of the array. The +* j-th column of B is stored in the j-th column of the array BB +* as follows: +* if UPLO = 'U', BB(kb+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j; +* if UPLO = 'L', BB(1+i-j,j) = B(i,j) for j<=i<=min(n,j+kb). +* +* On exit, the factor S from the split Cholesky factorization +* B = S**H*S, as returned by ZPBSTF. +* +* LDBB (input) INTEGER +* The leading dimension of the array BB. LDBB >= KB+1. +* +* W (output) DOUBLE PRECISION array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* Z (output) COMPLEX*16 array, dimension (LDZ, N) +* If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of +* eigenvectors, with the i-th column of Z holding the +* eigenvector associated with W(i). The eigenvectors are +* normalized so that Z**H*B*Z = I. +* If JOBZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= N. +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO=0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If N <= 1, LWORK >= 1. +* If JOBZ = 'N' and N > 1, LWORK >= N. +* If JOBZ = 'V' and N > 1, LWORK >= 2*N**2. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace/output) DOUBLE PRECISION array, dimension (LRWORK) +* On exit, if INFO=0, RWORK(1) returns the optimal LRWORK. +* +* LRWORK (input) INTEGER +* The dimension of array RWORK. +* If N <= 1, LRWORK >= 1. +* If JOBZ = 'N' and N > 1, LRWORK >= N. +* If JOBZ = 'V' and N > 1, LRWORK >= 1 + 5*N + 2*N**2. +* +* If LRWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the RWORK array, +* returns this value as the first entry of the RWORK array, and +* no error message related to LRWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* On exit, if INFO=0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of array IWORK. +* If JOBZ = 'N' or N <= 1, LIWORK >= 1. +* If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is: +* <= N: the algorithm failed to converge: +* i off-diagonal elements of an intermediate +* tridiagonal form did not converge to zero; +* > N: if INFO = N + i, for 1 <= i <= N, then ZPBSTF +* returned INFO = i: B is not positive definite. +* The factorization of B could not be completed and +* no eigenvalues or eigenvectors were computed. +* +* Further Details +* =============== +* +* Based on contributions by +* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CONE, CZERO + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ), + $ CZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER VECT + INTEGER IINFO, INDE, INDWK2, INDWRK, LIWMIN, LLRWK, + $ LLWK2, LRWMIN, LWMIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DSTERF, XERBLA, ZGEMM, ZHBGST, ZHBTRD, ZLACPY, + $ ZPBSTF, ZSTEDC +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LWMIN = 1 + LRWMIN = 1 + LIWMIN = 1 + ELSE + IF( WANTZ ) THEN + LWMIN = 2*N**2 + LRWMIN = 1 + 5*N + 2*N**2 + LIWMIN = 3 + 5*N + ELSE + LWMIN = N + LRWMIN = N + LIWMIN = 1 + END IF + END IF + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KA.LT.0 ) THEN + INFO = -4 + ELSE IF( KB.LT.0 .OR. KB.GT.KA ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KA+1 ) THEN + INFO = -7 + ELSE IF( LDBB.LT.KB+1 ) THEN + INFO = -9 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -12 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -14 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -16 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -18 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHBGVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a split Cholesky factorization of B. +* + CALL ZPBSTF( UPLO, N, KB, BB, LDBB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem. +* + INDE = 1 + INDWRK = INDE + N + INDWK2 = 1 + N*N + LLWK2 = LWORK - INDWK2 + 2 + LLRWK = LRWORK - INDWRK + 2 + CALL ZHBGST( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, Z, LDZ, + $ WORK, RWORK( INDWRK ), IINFO ) +* +* Reduce Hermitian band matrix to tridiagonal form. +* + IF( WANTZ ) THEN + VECT = 'U' + ELSE + VECT = 'N' + END IF + CALL ZHBTRD( VECT, UPLO, N, KA, AB, LDAB, W, RWORK( INDE ), Z, + $ LDZ, WORK, IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, call ZSTEDC. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + CALL ZSTEDC( 'I', N, W, RWORK( INDE ), WORK, N, WORK( INDWK2 ), + $ LLWK2, RWORK( INDWRK ), LLRWK, IWORK, LIWORK, + $ INFO ) + CALL ZGEMM( 'N', 'N', N, N, N, CONE, Z, LDZ, WORK, N, CZERO, + $ WORK( INDWK2 ), N ) + CALL ZLACPY( 'A', N, N, WORK( INDWK2 ), N, Z, LDZ ) + END IF +* + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of ZHBGVD +* + END diff --git a/costa/native/external/lapack/zhbgvx.f b/costa/native/external/lapack/zhbgvx.f new file mode 100644 index 000000000..d12fbd994 --- /dev/null +++ b/costa/native/external/lapack/zhbgvx.f @@ -0,0 +1,374 @@ + SUBROUTINE ZHBGVX( JOBZ, RANGE, UPLO, N, KA, KB, AB, LDAB, BB, + $ LDBB, Q, LDQ, VL, VU, IL, IU, ABSTOL, M, W, Z, + $ LDZ, WORK, RWORK, IWORK, IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, KA, KB, LDAB, LDBB, LDQ, LDZ, M, + $ N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 AB( LDAB, * ), BB( LDBB, * ), Q( LDQ, * ), + $ WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* ZHBGVX computes all the eigenvalues, and optionally, the eigenvectors +* of a complex generalized Hermitian-definite banded eigenproblem, of +* the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian +* and banded, and B is also positive definite. Eigenvalues and +* eigenvectors can be selected by specifying either all eigenvalues, +* a range of values or a range of indices for the desired eigenvalues. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* RANGE (input) CHARACTER*1 +* = 'A': all eigenvalues will be found; +* = 'V': all eigenvalues in the half-open interval (VL,VU] +* will be found; +* = 'I': the IL-th through IU-th eigenvalues will be found. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangles of A and B are stored; +* = 'L': Lower triangles of A and B are stored. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* KA (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KA >= 0. +* +* KB (input) INTEGER +* The number of superdiagonals of the matrix B if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KB >= 0. +* +* AB (input/output) COMPLEX*16 array, dimension (LDAB, N) +* On entry, the upper or lower triangle of the Hermitian band +* matrix A, stored in the first ka+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). +* +* On exit, the contents of AB are destroyed. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KA+1. +* +* BB (input/output) COMPLEX*16 array, dimension (LDBB, N) +* On entry, the upper or lower triangle of the Hermitian band +* matrix B, stored in the first kb+1 rows of the array. The +* j-th column of B is stored in the j-th column of the array BB +* as follows: +* if UPLO = 'U', BB(kb+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j; +* if UPLO = 'L', BB(1+i-j,j) = B(i,j) for j<=i<=min(n,j+kb). +* +* On exit, the factor S from the split Cholesky factorization +* B = S**H*S, as returned by ZPBSTF. +* +* LDBB (input) INTEGER +* The leading dimension of the array BB. LDBB >= KB+1. +* +* Q (output) COMPLEX*16 array, dimension (LDQ, N) +* If JOBZ = 'V', the n-by-n matrix used in the reduction of +* A*x = (lambda)*B*x to standard form, i.e. C*x = (lambda)*x, +* and consequently C to tridiagonal form. +* If JOBZ = 'N', the array Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. If JOBZ = 'N', +* LDQ >= 1. If JOBZ = 'V', LDQ >= max(1,N). +* +* VL (input) DOUBLE PRECISION +* VU (input) DOUBLE PRECISION +* If RANGE='V', the lower and upper bounds of the interval to +* be searched for eigenvalues. VL < VU. +* Not referenced if RANGE = 'A' or 'I'. +* +* IL (input) INTEGER +* IU (input) INTEGER +* If RANGE='I', the indices (in ascending order) of the +* smallest and largest eigenvalues to be returned. +* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +* Not referenced if RANGE = 'A' or 'V'. +* +* ABSTOL (input) DOUBLE PRECISION +* The absolute error tolerance for the eigenvalues. +* An approximate eigenvalue is accepted as converged +* when it is determined to lie in an interval [a,b] +* of width less than or equal to +* +* ABSTOL + EPS * max( |a|,|b| ) , +* +* where EPS is the machine precision. If ABSTOL is less than +* or equal to zero, then EPS*|T| will be used in its place, +* where |T| is the 1-norm of the tridiagonal matrix obtained +* by reducing AP to tridiagonal form. +* +* Eigenvalues will be computed most accurately when ABSTOL is +* set to twice the underflow threshold 2*DLAMCH('S'), not zero. +* If this routine returns with INFO>0, indicating that some +* eigenvectors did not converge, try setting ABSTOL to +* 2*DLAMCH('S'). +* +* M (output) INTEGER +* The total number of eigenvalues found. 0 <= M <= N. +* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +* +* W (output) DOUBLE PRECISION array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* Z (output) COMPLEX*16 array, dimension (LDZ, N) +* If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of +* eigenvectors, with the i-th column of Z holding the +* eigenvector associated with W(i). The eigenvectors are +* normalized so that Z**H*B*Z = I. +* If JOBZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= N. +* +* WORK (workspace) COMPLEX*16 array, dimension (N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (7*N) +* +* IWORK (workspace) INTEGER array, dimension (5*N) +* +* IFAIL (output) INTEGER array, dimension (N) +* If JOBZ = 'V', then if INFO = 0, the first M elements of +* IFAIL are zero. If INFO > 0, then IFAIL contains the +* indices of the eigenvectors that failed to converge. +* If JOBZ = 'N', then IFAIL is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is: +* <= N: then i eigenvectors failed to converge. Their +* indices are stored in array IFAIL. +* > N: if INFO = N + i, for 1 <= i <= N, then ZPBSTF +* returned INFO = i: B is not positive definite. +* The factorization of B could not be completed and +* no eigenvalues or eigenvectors were computed. +* +* Further Details +* =============== +* +* Based on contributions by +* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, UPPER, VALEIG, WANTZ + CHARACTER ORDER, VECT + INTEGER I, IINFO, INDD, INDE, INDEE, INDIBL, INDISP, + $ INDIWK, INDRWK, INDWRK, ITMP1, J, JJ, NSPLIT + DOUBLE PRECISION TMP1 +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DSTEBZ, DSTERF, XERBLA, ZCOPY, ZGEMV, + $ ZHBGST, ZHBTRD, ZLACPY, ZPBSTF, ZSTEIN, ZSTEQR, + $ ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KA.LT.0 ) THEN + INFO = -5 + ELSE IF( KB.LT.0 .OR. KB.GT.KA ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KA+1 ) THEN + INFO = -8 + ELSE IF( LDBB.LT.KB+1 ) THEN + INFO = -10 + ELSE IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN + INFO = -12 + ELSE IF( INDEIG .AND. IL.LT.1 ) THEN + INFO = -13 + ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) THEN + INFO = -14 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -19 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHBGVX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a split Cholesky factorization of B. +* + CALL ZPBSTF( UPLO, N, KB, BB, LDBB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem. +* + CALL ZHBGST( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, Q, LDQ, + $ WORK, RWORK, IINFO ) +* +* Solve the standard eigenvalue problem. +* Reduce Hermitian band matrix to tridiagonal form. +* + INDD = 1 + INDE = INDD + N + INDRWK = INDE + N + INDWRK = 1 + IF( WANTZ ) THEN + VECT = 'U' + ELSE + VECT = 'N' + END IF + CALL ZHBTRD( VECT, UPLO, N, KA, AB, LDAB, RWORK( INDD ), + $ RWORK( INDE ), Q, LDQ, WORK( INDWRK ), IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal +* to zero, then call DSTERF or ZSTEQR. If this fails for some +* eigenvalue, then try DSTEBZ. +* + IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND. + $ ( ABSTOL.LE.ZERO ) ) THEN + CALL DCOPY( N, RWORK( INDD ), 1, W, 1 ) + INDEE = INDRWK + 2*N + CALL DCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, RWORK( INDEE ), INFO ) + ELSE + CALL ZLACPY( 'A', N, N, Q, LDQ, Z, LDZ ) + CALL ZSTEQR( JOBZ, N, W, RWORK( INDEE ), Z, LDZ, + $ RWORK( INDRWK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 10 I = 1, N + IFAIL( I ) = 0 + 10 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 30 + END IF + INFO = 0 + END IF +* +* Otherwise, call DSTEBZ and, if eigenvectors are desired, +* call ZSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWK = INDISP + N + CALL DSTEBZ( RANGE, ORDER, N, VL, VU, IL, IU, ABSTOL, + $ RWORK( INDD ), RWORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ), + $ IWORK( INDIWK ), INFO ) +* + IF( WANTZ ) THEN + CALL ZSTEIN( N, RWORK( INDD ), RWORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ RWORK( INDRWK ), IWORK( INDIWK ), IFAIL, INFO ) +* +* Apply unitary matrix used in reduction to tridiagonal +* form to eigenvectors returned by ZSTEIN. +* + DO 20 J = 1, M + CALL ZCOPY( N, Z( 1, J ), 1, WORK( 1 ), 1 ) + CALL ZGEMV( 'N', N, N, CONE, Q, LDQ, WORK, 1, CZERO, + $ Z( 1, J ), 1 ) + 20 CONTINUE + END IF +* + 30 CONTINUE +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 50 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 40 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 40 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL ZSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 50 CONTINUE + END IF +* + RETURN +* +* End of ZHBGVX +* + END diff --git a/costa/native/external/lapack/zhbtrd.f b/costa/native/external/lapack/zhbtrd.f new file mode 100644 index 000000000..a4743c280 --- /dev/null +++ b/costa/native/external/lapack/zhbtrd.f @@ -0,0 +1,589 @@ + SUBROUTINE ZHBTRD( VECT, UPLO, N, KD, AB, LDAB, D, E, Q, LDQ, + $ WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO, VECT + INTEGER INFO, KD, LDAB, LDQ, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ) + COMPLEX*16 AB( LDAB, * ), Q( LDQ, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZHBTRD reduces a complex Hermitian band matrix A to real symmetric +* tridiagonal form T by a unitary similarity transformation: +* Q**H * A * Q = T. +* +* Arguments +* ========= +* +* VECT (input) CHARACTER*1 +* = 'N': do not form Q; +* = 'V': form Q; +* = 'U': update a matrix X, by forming X*Q. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* AB (input/output) COMPLEX*16 array, dimension (LDAB,N) +* On entry, the upper or lower triangle of the Hermitian band +* matrix A, stored in the first KD+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* On exit, the diagonal elements of AB are overwritten by the +* diagonal elements of the tridiagonal matrix T; if KD > 0, the +* elements on the first superdiagonal (if UPLO = 'U') or the +* first subdiagonal (if UPLO = 'L') are overwritten by the +* off-diagonal elements of T; the rest of AB is overwritten by +* values generated during the reduction. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* D (output) DOUBLE PRECISION array, dimension (N) +* The diagonal elements of the tridiagonal matrix T. +* +* E (output) DOUBLE PRECISION array, dimension (N-1) +* The off-diagonal elements of the tridiagonal matrix T: +* E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'. +* +* Q (input/output) COMPLEX*16 array, dimension (LDQ,N) +* On entry, if VECT = 'U', then Q must contain an N-by-N +* matrix X; if VECT = 'N' or 'V', then Q need not be set. +* +* On exit: +* if VECT = 'V', Q contains the N-by-N unitary matrix Q; +* if VECT = 'U', Q contains the product X*Q; +* if VECT = 'N', the array Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. +* LDQ >= 1, and LDQ >= N if VECT = 'V' or 'U'. +* +* WORK (workspace) COMPLEX*16 array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* Modified by Linda Kaufman, Bell Labs. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL INITQ, UPPER, WANTQ + INTEGER I, I2, IBL, INCA, INCX, IQAEND, IQB, IQEND, J, + $ J1, J1END, J1INC, J2, JEND, JIN, JINC, K, KD1, + $ KDM1, KDN, L, LAST, LEND, NQ, NR, NRT + DOUBLE PRECISION ABST + COMPLEX*16 T, TEMP +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLACGV, ZLAR2V, ZLARGV, ZLARTG, ZLARTV, + $ ZLASET, ZROT, ZSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCONJG, MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INITQ = LSAME( VECT, 'V' ) + WANTQ = INITQ .OR. LSAME( VECT, 'U' ) + UPPER = LSAME( UPLO, 'U' ) + KD1 = KD + 1 + KDM1 = KD - 1 + INCX = LDAB - 1 + IQEND = 1 +* + INFO = 0 + IF( .NOT.WANTQ .AND. .NOT.LSAME( VECT, 'N' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD1 ) THEN + INFO = -6 + ELSE IF( LDQ.LT.MAX( 1, N ) .AND. WANTQ ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHBTRD', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Initialize Q to the unit matrix, if needed +* + IF( INITQ ) + $ CALL ZLASET( 'Full', N, N, CZERO, CONE, Q, LDQ ) +* +* Wherever possible, plane rotations are generated and applied in +* vector operations of length NR over the index set J1:J2:KD1. +* +* The real cosines and complex sines of the plane rotations are +* stored in the arrays D and WORK. +* + INCA = KD1*LDAB + KDN = MIN( N-1, KD ) + IF( UPPER ) THEN +* + IF( KD.GT.1 ) THEN +* +* Reduce to complex Hermitian tridiagonal form, working with +* the upper triangle +* + NR = 0 + J1 = KDN + 2 + J2 = 1 +* + AB( KD1, 1 ) = DBLE( AB( KD1, 1 ) ) + DO 90 I = 1, N - 2 +* +* Reduce i-th row of matrix to tridiagonal form +* + DO 80 K = KDN + 1, 2, -1 + J1 = J1 + KDN + J2 = J2 + KDN +* + IF( NR.GT.0 ) THEN +* +* generate plane rotations to annihilate nonzero +* elements which have been created outside the band +* + CALL ZLARGV( NR, AB( 1, J1-1 ), INCA, WORK( J1 ), + $ KD1, D( J1 ), KD1 ) +* +* apply rotations from the right +* +* +* Dependent on the the number of diagonals either +* ZLARTV or ZROT is used +* + IF( NR.GE.2*KD-1 ) THEN + DO 10 L = 1, KD - 1 + CALL ZLARTV( NR, AB( L+1, J1-1 ), INCA, + $ AB( L, J1 ), INCA, D( J1 ), + $ WORK( J1 ), KD1 ) + 10 CONTINUE +* + ELSE + JEND = J1 + ( NR-1 )*KD1 + DO 20 JINC = J1, JEND, KD1 + CALL ZROT( KDM1, AB( 2, JINC-1 ), 1, + $ AB( 1, JINC ), 1, D( JINC ), + $ WORK( JINC ) ) + 20 CONTINUE + END IF + END IF +* +* + IF( K.GT.2 ) THEN + IF( K.LE.N-I+1 ) THEN +* +* generate plane rotation to annihilate a(i,i+k-1) +* within the band +* + CALL ZLARTG( AB( KD-K+3, I+K-2 ), + $ AB( KD-K+2, I+K-1 ), D( I+K-1 ), + $ WORK( I+K-1 ), TEMP ) + AB( KD-K+3, I+K-2 ) = TEMP +* +* apply rotation from the right +* + CALL ZROT( K-3, AB( KD-K+4, I+K-2 ), 1, + $ AB( KD-K+3, I+K-1 ), 1, D( I+K-1 ), + $ WORK( I+K-1 ) ) + END IF + NR = NR + 1 + J1 = J1 - KDN - 1 + END IF +* +* apply plane rotations from both sides to diagonal +* blocks +* + IF( NR.GT.0 ) + $ CALL ZLAR2V( NR, AB( KD1, J1-1 ), AB( KD1, J1 ), + $ AB( KD, J1 ), INCA, D( J1 ), + $ WORK( J1 ), KD1 ) +* +* apply plane rotations from the left +* + CALL ZLACGV( NR, WORK( J1 ), KD1 ) + IF( NR.GT.0 ) THEN + IF( 2*KD-1.LT.NR ) THEN +* +* Dependent on the the number of diagonals either +* ZLARTV or ZROT is used +* + DO 30 L = 1, KD - 1 + IF( J2+L.GT.N ) THEN + NRT = NR - 1 + ELSE + NRT = NR + END IF + IF( NRT.GT.0 ) + $ CALL ZLARTV( NRT, AB( KD-L, J1+L ), INCA, + $ AB( KD-L+1, J1+L ), INCA, + $ D( J1 ), WORK( J1 ), KD1 ) + 30 CONTINUE + ELSE + J1END = J1 + KD1*( NR-2 ) + IF( J1END.GE.J1 ) THEN + DO 40 JIN = J1, J1END, KD1 + CALL ZROT( KD-1, AB( KD-1, JIN+1 ), INCX, + $ AB( KD, JIN+1 ), INCX, + $ D( JIN ), WORK( JIN ) ) + 40 CONTINUE + END IF + LEND = MIN( KDM1, N-J2 ) + LAST = J1END + KD1 + IF( LEND.GT.0 ) + $ CALL ZROT( LEND, AB( KD-1, LAST+1 ), INCX, + $ AB( KD, LAST+1 ), INCX, D( LAST ), + $ WORK( LAST ) ) + END IF + END IF +* + IF( WANTQ ) THEN +* +* accumulate product of plane rotations in Q +* + IF( INITQ ) THEN +* +* take advantage of the fact that Q was +* initially the Identity matrix +* + IQEND = MAX( IQEND, J2 ) + I2 = MAX( 0, K-3 ) + IQAEND = 1 + I*KD + IF( K.EQ.2 ) + $ IQAEND = IQAEND + KD + IQAEND = MIN( IQAEND, IQEND ) + DO 50 J = J1, J2, KD1 + IBL = I - I2 / KDM1 + I2 = I2 + 1 + IQB = MAX( 1, J-IBL ) + NQ = 1 + IQAEND - IQB + IQAEND = MIN( IQAEND+KD, IQEND ) + CALL ZROT( NQ, Q( IQB, J-1 ), 1, Q( IQB, J ), + $ 1, D( J ), DCONJG( WORK( J ) ) ) + 50 CONTINUE + ELSE +* + DO 60 J = J1, J2, KD1 + CALL ZROT( N, Q( 1, J-1 ), 1, Q( 1, J ), 1, + $ D( J ), DCONJG( WORK( J ) ) ) + 60 CONTINUE + END IF +* + END IF +* + IF( J2+KDN.GT.N ) THEN +* +* adjust J2 to keep within the bounds of the matrix +* + NR = NR - 1 + J2 = J2 - KDN - 1 + END IF +* + DO 70 J = J1, J2, KD1 +* +* create nonzero element a(j-1,j+kd) outside the band +* and store it in WORK +* + WORK( J+KD ) = WORK( J )*AB( 1, J+KD ) + AB( 1, J+KD ) = D( J )*AB( 1, J+KD ) + 70 CONTINUE + 80 CONTINUE + 90 CONTINUE + END IF +* + IF( KD.GT.0 ) THEN +* +* make off-diagonal elements real and copy them to E +* + DO 100 I = 1, N - 1 + T = AB( KD, I+1 ) + ABST = ABS( T ) + AB( KD, I+1 ) = ABST + E( I ) = ABST + IF( ABST.NE.ZERO ) THEN + T = T / ABST + ELSE + T = CONE + END IF + IF( I.LT.N-1 ) + $ AB( KD, I+2 ) = AB( KD, I+2 )*T + IF( WANTQ ) THEN + CALL ZSCAL( N, DCONJG( T ), Q( 1, I+1 ), 1 ) + END IF + 100 CONTINUE + ELSE +* +* set E to zero if original matrix was diagonal +* + DO 110 I = 1, N - 1 + E( I ) = ZERO + 110 CONTINUE + END IF +* +* copy diagonal elements to D +* + DO 120 I = 1, N + D( I ) = AB( KD1, I ) + 120 CONTINUE +* + ELSE +* + IF( KD.GT.1 ) THEN +* +* Reduce to complex Hermitian tridiagonal form, working with +* the lower triangle +* + NR = 0 + J1 = KDN + 2 + J2 = 1 +* + AB( 1, 1 ) = DBLE( AB( 1, 1 ) ) + DO 210 I = 1, N - 2 +* +* Reduce i-th column of matrix to tridiagonal form +* + DO 200 K = KDN + 1, 2, -1 + J1 = J1 + KDN + J2 = J2 + KDN +* + IF( NR.GT.0 ) THEN +* +* generate plane rotations to annihilate nonzero +* elements which have been created outside the band +* + CALL ZLARGV( NR, AB( KD1, J1-KD1 ), INCA, + $ WORK( J1 ), KD1, D( J1 ), KD1 ) +* +* apply plane rotations from one side +* +* +* Dependent on the the number of diagonals either +* ZLARTV or ZROT is used +* + IF( NR.GT.2*KD-1 ) THEN + DO 130 L = 1, KD - 1 + CALL ZLARTV( NR, AB( KD1-L, J1-KD1+L ), INCA, + $ AB( KD1-L+1, J1-KD1+L ), INCA, + $ D( J1 ), WORK( J1 ), KD1 ) + 130 CONTINUE + ELSE + JEND = J1 + KD1*( NR-1 ) + DO 140 JINC = J1, JEND, KD1 + CALL ZROT( KDM1, AB( KD, JINC-KD ), INCX, + $ AB( KD1, JINC-KD ), INCX, + $ D( JINC ), WORK( JINC ) ) + 140 CONTINUE + END IF +* + END IF +* + IF( K.GT.2 ) THEN + IF( K.LE.N-I+1 ) THEN +* +* generate plane rotation to annihilate a(i+k-1,i) +* within the band +* + CALL ZLARTG( AB( K-1, I ), AB( K, I ), + $ D( I+K-1 ), WORK( I+K-1 ), TEMP ) + AB( K-1, I ) = TEMP +* +* apply rotation from the left +* + CALL ZROT( K-3, AB( K-2, I+1 ), LDAB-1, + $ AB( K-1, I+1 ), LDAB-1, D( I+K-1 ), + $ WORK( I+K-1 ) ) + END IF + NR = NR + 1 + J1 = J1 - KDN - 1 + END IF +* +* apply plane rotations from both sides to diagonal +* blocks +* + IF( NR.GT.0 ) + $ CALL ZLAR2V( NR, AB( 1, J1-1 ), AB( 1, J1 ), + $ AB( 2, J1-1 ), INCA, D( J1 ), + $ WORK( J1 ), KD1 ) +* +* apply plane rotations from the right +* +* +* Dependent on the the number of diagonals either +* ZLARTV or ZROT is used +* + CALL ZLACGV( NR, WORK( J1 ), KD1 ) + IF( NR.GT.0 ) THEN + IF( NR.GT.2*KD-1 ) THEN + DO 150 L = 1, KD - 1 + IF( J2+L.GT.N ) THEN + NRT = NR - 1 + ELSE + NRT = NR + END IF + IF( NRT.GT.0 ) + $ CALL ZLARTV( NRT, AB( L+2, J1-1 ), INCA, + $ AB( L+1, J1 ), INCA, D( J1 ), + $ WORK( J1 ), KD1 ) + 150 CONTINUE + ELSE + J1END = J1 + KD1*( NR-2 ) + IF( J1END.GE.J1 ) THEN + DO 160 J1INC = J1, J1END, KD1 + CALL ZROT( KDM1, AB( 3, J1INC-1 ), 1, + $ AB( 2, J1INC ), 1, D( J1INC ), + $ WORK( J1INC ) ) + 160 CONTINUE + END IF + LEND = MIN( KDM1, N-J2 ) + LAST = J1END + KD1 + IF( LEND.GT.0 ) + $ CALL ZROT( LEND, AB( 3, LAST-1 ), 1, + $ AB( 2, LAST ), 1, D( LAST ), + $ WORK( LAST ) ) + END IF + END IF +* +* +* + IF( WANTQ ) THEN +* +* accumulate product of plane rotations in Q +* + IF( INITQ ) THEN +* +* take advantage of the fact that Q was +* initially the Identity matrix +* + IQEND = MAX( IQEND, J2 ) + I2 = MAX( 0, K-3 ) + IQAEND = 1 + I*KD + IF( K.EQ.2 ) + $ IQAEND = IQAEND + KD + IQAEND = MIN( IQAEND, IQEND ) + DO 170 J = J1, J2, KD1 + IBL = I - I2 / KDM1 + I2 = I2 + 1 + IQB = MAX( 1, J-IBL ) + NQ = 1 + IQAEND - IQB + IQAEND = MIN( IQAEND+KD, IQEND ) + CALL ZROT( NQ, Q( IQB, J-1 ), 1, Q( IQB, J ), + $ 1, D( J ), WORK( J ) ) + 170 CONTINUE + ELSE +* + DO 180 J = J1, J2, KD1 + CALL ZROT( N, Q( 1, J-1 ), 1, Q( 1, J ), 1, + $ D( J ), WORK( J ) ) + 180 CONTINUE + END IF + END IF +* + IF( J2+KDN.GT.N ) THEN +* +* adjust J2 to keep within the bounds of the matrix +* + NR = NR - 1 + J2 = J2 - KDN - 1 + END IF +* + DO 190 J = J1, J2, KD1 +* +* create nonzero element a(j+kd,j-1) outside the +* band and store it in WORK +* + WORK( J+KD ) = WORK( J )*AB( KD1, J ) + AB( KD1, J ) = D( J )*AB( KD1, J ) + 190 CONTINUE + 200 CONTINUE + 210 CONTINUE + END IF +* + IF( KD.GT.0 ) THEN +* +* make off-diagonal elements real and copy them to E +* + DO 220 I = 1, N - 1 + T = AB( 2, I ) + ABST = ABS( T ) + AB( 2, I ) = ABST + E( I ) = ABST + IF( ABST.NE.ZERO ) THEN + T = T / ABST + ELSE + T = CONE + END IF + IF( I.LT.N-1 ) + $ AB( 2, I+1 ) = AB( 2, I+1 )*T + IF( WANTQ ) THEN + CALL ZSCAL( N, T, Q( 1, I+1 ), 1 ) + END IF + 220 CONTINUE + ELSE +* +* set E to zero if original matrix was diagonal +* + DO 230 I = 1, N - 1 + E( I ) = ZERO + 230 CONTINUE + END IF +* +* copy diagonal elements to D +* + DO 240 I = 1, N + D( I ) = AB( 1, I ) + 240 CONTINUE + END IF +* + RETURN +* +* End of ZHBTRD +* + END diff --git a/costa/native/external/lapack/zhecon.f b/costa/native/external/lapack/zhecon.f new file mode 100644 index 000000000..d65ee3a2e --- /dev/null +++ b/costa/native/external/lapack/zhecon.f @@ -0,0 +1,159 @@ + SUBROUTINE ZHECON( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZHECON estimates the reciprocal of the condition number of a complex +* Hermitian matrix A using the factorization A = U*D*U**H or +* A = L*D*L**H computed by ZHETRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the details of the factorization are stored +* as an upper or lower triangular matrix. +* = 'U': Upper triangular, form is A = U*D*U**H; +* = 'L': Lower triangular, form is A = L*D*L**H. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input) COMPLEX*16 array, dimension (LDA,N) +* The block diagonal matrix D and the multipliers used to +* obtain the factor U or L as computed by ZHETRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by ZHETRF. +* +* ANORM (input) DOUBLE PRECISION +* The 1-norm of the original matrix A. +* +* RCOND (output) DOUBLE PRECISION +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +* estimate of the 1-norm of inv(A) computed in this routine. +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, KASE + DOUBLE PRECISION AINVNM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHETRS, ZLACON +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHECON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.LE.ZERO ) THEN + RETURN + END IF +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO 10 I = N, 1, -1 + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO 20 I = 1, N + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO ) + $ RETURN + 20 CONTINUE + END IF +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + 30 CONTINUE + CALL ZLACON( N, WORK( N+1 ), WORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN +* +* Multiply by inv(L*D*L') or inv(U*D*U'). +* + CALL ZHETRS( UPLO, N, 1, A, LDA, IPIV, WORK, N, INFO ) + GO TO 30 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of ZHECON +* + END diff --git a/costa/native/external/lapack/zheev.f b/costa/native/external/lapack/zheev.f new file mode 100644 index 000000000..240758636 --- /dev/null +++ b/costa/native/external/lapack/zheev.f @@ -0,0 +1,220 @@ + SUBROUTINE ZHEEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZHEEV computes all eigenvalues and, optionally, eigenvectors of a +* complex Hermitian matrix A. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA, N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the +* leading N-by-N upper triangular part of A contains the +* upper triangular part of the matrix A. If UPLO = 'L', +* the leading N-by-N lower triangular part of A contains +* the lower triangular part of the matrix A. +* On exit, if JOBZ = 'V', then if INFO = 0, A contains the +* orthonormal eigenvectors of the matrix A. +* If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') +* or the upper triangle (if UPLO='U') of A, including the +* diagonal, is destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* W (output) DOUBLE PRECISION array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of the array WORK. LWORK >= max(1,2*N-1). +* For optimal efficiency, LWORK >= (NB+1)*N, +* where NB is the blocksize for ZHETRD returned by ILAENV. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (max(1, 3*N-2)) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the algorithm failed to converge; i +* off-diagonal elements of an intermediate tridiagonal +* form did not converge to zero. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D0, 0.0D0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, IMAX, INDE, INDTAU, INDWRK, ISCALE, + $ LLWORK, LOPT, LWKOPT, NB + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANHE + EXTERNAL LSAME, ILAENV, DLAMCH, ZLANHE +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSTERF, XERBLA, ZHETRD, ZLASCL, ZSTEQR, + $ ZUNGTR +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, 2*N-1 ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'ZHETRD', UPLO, N, -1, -1, -1 ) + LWKOPT = MAX( 1, ( NB+1 )*N ) + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHEEV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( N.EQ.1 ) THEN + W( 1 ) = A( 1, 1 ) + WORK( 1 ) = 3 + IF( WANTZ ) + $ A( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = ZLANHE( 'M', UPLO, N, A, LDA, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) + $ CALL ZLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO ) +* +* Call ZHETRD to reduce Hermitian matrix to tridiagonal form. +* + INDE = 1 + INDTAU = 1 + INDWRK = INDTAU + N + LLWORK = LWORK - INDWRK + 1 + CALL ZHETRD( UPLO, N, A, LDA, W, RWORK( INDE ), WORK( INDTAU ), + $ WORK( INDWRK ), LLWORK, IINFO ) + LOPT = N + WORK( INDWRK ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, first call +* ZUNGTR to generate the unitary matrix, then call ZSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + CALL ZUNGTR( UPLO, N, A, LDA, WORK( INDTAU ), WORK( INDWRK ), + $ LLWORK, IINFO ) + INDWRK = INDE + N + CALL ZSTEQR( JOBZ, N, W, RWORK( INDE ), A, LDA, + $ RWORK( INDWRK ), INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* Set WORK(1) to optimal complex workspace size. +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of ZHEEV +* + END diff --git a/costa/native/external/lapack/zheevd.f b/costa/native/external/lapack/zheevd.f new file mode 100644 index 000000000..c8b5bfb1a --- /dev/null +++ b/costa/native/external/lapack/zheevd.f @@ -0,0 +1,296 @@ + SUBROUTINE ZHEEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, + $ LRWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDA, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZHEEVD computes all eigenvalues and, optionally, eigenvectors of a +* complex Hermitian matrix A. If eigenvectors are desired, it uses a +* divide and conquer algorithm. +* +* The divide and conquer algorithm makes very mild assumptions about +* floating point arithmetic. It will work on machines with a guard +* digit in add/subtract, or on those binary machines without guard +* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +* Cray-2. It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA, N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the +* leading N-by-N upper triangular part of A contains the +* upper triangular part of the matrix A. If UPLO = 'L', +* the leading N-by-N lower triangular part of A contains +* the lower triangular part of the matrix A. +* On exit, if JOBZ = 'V', then if INFO = 0, A contains the +* orthonormal eigenvectors of the matrix A. +* If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') +* or the upper triangle (if UPLO='U') of A, including the +* diagonal, is destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* W (output) DOUBLE PRECISION array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of the array WORK. +* If N <= 1, LWORK must be at least 1. +* If JOBZ = 'N' and N > 1, LWORK must be at least N + 1. +* If JOBZ = 'V' and N > 1, LWORK must be at least 2*N + N**2. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace/output) DOUBLE PRECISION array, +* dimension (LRWORK) +* On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. +* +* LRWORK (input) INTEGER +* The dimension of the array RWORK. +* If N <= 1, LRWORK must be at least 1. +* If JOBZ = 'N' and N > 1, LRWORK must be at least N. +* If JOBZ = 'V' and N > 1, LRWORK must be at least +* 1 + 5*N + 2*N**2. +* +* If LRWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the RWORK array, +* returns this value as the first entry of the RWORK array, and +* no error message related to LRWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array IWORK. +* If N <= 1, LIWORK must be at least 1. +* If JOBZ = 'N' and N > 1, LIWORK must be at least 1. +* If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the algorithm failed to converge; i +* off-diagonal elements of an intermediate tridiagonal +* form did not converge to zero. +* +* Further Details +* =============== +* +* Based on contributions by +* Jeff Rutter, Computer Science Division, University of California +* at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D0, 0.0D0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, IMAX, INDE, INDRWK, INDTAU, INDWK2, + $ INDWRK, ISCALE, LIOPT, LIWMIN, LLRWK, LLWORK, + $ LLWRK2, LOPT, LROPT, LRWMIN, LWMIN + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANHE + EXTERNAL LSAME, DLAMCH, ZLANHE +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSTERF, XERBLA, ZHETRD, ZLACPY, ZLASCL, + $ ZSTEDC, ZUNMTR +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, INT, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LWMIN = 1 + LRWMIN = 1 + LIWMIN = 1 + LOPT = LWMIN + LROPT = LRWMIN + LIOPT = LIWMIN + ELSE + IF( WANTZ ) THEN + LWMIN = 2*N + N*N + LRWMIN = 1 + 5*N + 2*N**2 + LIWMIN = 3 + 5*N + ELSE + LWMIN = N + 1 + LRWMIN = N + LIWMIN = 1 + END IF + LOPT = LWMIN + LROPT = LRWMIN + LIOPT = LIWMIN + END IF + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -8 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LOPT + RWORK( 1 ) = LROPT + IWORK( 1 ) = LIOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHEEVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = A( 1, 1 ) + IF( WANTZ ) + $ A( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = ZLANHE( 'M', UPLO, N, A, LDA, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) + $ CALL ZLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO ) +* +* Call ZHETRD to reduce Hermitian matrix to tridiagonal form. +* + INDE = 1 + INDTAU = 1 + INDWRK = INDTAU + N + INDRWK = INDE + N + INDWK2 = INDWRK + N*N + LLWORK = LWORK - INDWRK + 1 + LLWRK2 = LWORK - INDWK2 + 1 + LLRWK = LRWORK - INDRWK + 1 + CALL ZHETRD( UPLO, N, A, LDA, W, RWORK( INDE ), WORK( INDTAU ), + $ WORK( INDWRK ), LLWORK, IINFO ) + LOPT = MAX( DBLE( LOPT ), DBLE( N )+DBLE( WORK( INDWRK ) ) ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, first call +* ZSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the +* tridiagonal matrix, then call ZUNMTR to multiply it to the +* Householder transformations represented as Householder vectors in +* A. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + CALL ZSTEDC( 'I', N, W, RWORK( INDE ), WORK( INDWRK ), N, + $ WORK( INDWK2 ), LLWRK2, RWORK( INDRWK ), LLRWK, + $ IWORK, LIWORK, INFO ) + CALL ZUNMTR( 'L', UPLO, 'N', N, N, A, LDA, WORK( INDTAU ), + $ WORK( INDWRK ), N, WORK( INDWK2 ), LLWRK2, IINFO ) + CALL ZLACPY( 'A', N, N, WORK( INDWRK ), N, A, LDA ) + LOPT = MAX( LOPT, N+N**2+INT( WORK( INDWK2 ) ) ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* + WORK( 1 ) = LOPT + RWORK( 1 ) = LROPT + IWORK( 1 ) = LIOPT +* + RETURN +* +* End of ZHEEVD +* + END diff --git a/costa/native/external/lapack/zheevr.f b/costa/native/external/lapack/zheevr.f new file mode 100644 index 000000000..557aedac5 --- /dev/null +++ b/costa/native/external/lapack/zheevr.f @@ -0,0 +1,522 @@ + SUBROUTINE ZHEEVR( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, + $ ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, LWORK, + $ RWORK, LRWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 20, 2000 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDA, LDZ, LIWORK, LRWORK, LWORK, + $ M, N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER ISUPPZ( * ), IWORK( * ) + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 A( LDA, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* ZHEEVR computes selected eigenvalues and, optionally, eigenvectors +* of a complex Hermitian matrix T. Eigenvalues and eigenvectors can +* be selected by specifying either a range of values or a range of +* indices for the desired eigenvalues. +* +* Whenever possible, ZHEEVR calls ZSTEGR to compute the +* eigenspectrum using Relatively Robust Representations. ZSTEGR +* computes eigenvalues by the dqds algorithm, while orthogonal +* eigenvectors are computed from various "good" L D L^T representations +* (also known as Relatively Robust Representations). Gram-Schmidt +* orthogonalization is avoided as far as possible. More specifically, +* the various steps of the algorithm are as follows. For the i-th +* unreduced block of T, +* (a) Compute T - sigma_i = L_i D_i L_i^T, such that L_i D_i L_i^T +* is a relatively robust representation, +* (b) Compute the eigenvalues, lambda_j, of L_i D_i L_i^T to high +* relative accuracy by the dqds algorithm, +* (c) If there is a cluster of close eigenvalues, "choose" sigma_i +* close to the cluster, and go to step (a), +* (d) Given the approximate eigenvalue lambda_j of L_i D_i L_i^T, +* compute the corresponding eigenvector by forming a +* rank-revealing twisted factorization. +* The desired accuracy of the output can be specified by the input +* parameter ABSTOL. +* +* For more details, see "A new O(n^2) algorithm for the symmetric +* tridiagonal eigenvalue/eigenvector problem", by Inderjit Dhillon, +* Computer Science Division Technical Report No. UCB//CSD-97-971, +* UC Berkeley, May 1997. +* +* +* Note 1 : ZHEEVR calls ZSTEGR when the full spectrum is requested +* on machines which conform to the ieee-754 floating point standard. +* ZHEEVR calls DSTEBZ and ZSTEIN on non-ieee machines and +* when partial spectrum requests are made. +* +* Normal execution of ZSTEGR may create NaNs and infinities and +* hence may abort due to a floating point exception in environments +* which do not handle NaNs and infinities in the ieee standard default +* manner. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* RANGE (input) CHARACTER*1 +* = 'A': all eigenvalues will be found. +* = 'V': all eigenvalues in the half-open interval (VL,VU] +* will be found. +* = 'I': the IL-th through IU-th eigenvalues will be found. +********** For RANGE = 'V' or 'I' and IU - IL < N - 1, DSTEBZ and +********** ZSTEIN are called +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA, N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the +* leading N-by-N upper triangular part of A contains the +* upper triangular part of the matrix A. If UPLO = 'L', +* the leading N-by-N lower triangular part of A contains +* the lower triangular part of the matrix A. +* On exit, the lower triangle (if UPLO='L') or the upper +* triangle (if UPLO='U') of A, including the diagonal, is +* destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* VL (input) DOUBLE PRECISION +* VU (input) DOUBLE PRECISION +* If RANGE='V', the lower and upper bounds of the interval to +* be searched for eigenvalues. VL < VU. +* Not referenced if RANGE = 'A' or 'I'. +* +* IL (input) INTEGER +* IU (input) INTEGER +* If RANGE='I', the indices (in ascending order) of the +* smallest and largest eigenvalues to be returned. +* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +* Not referenced if RANGE = 'A' or 'V'. +* +* ABSTOL (input) DOUBLE PRECISION +* The absolute error tolerance for the eigenvalues. +* An approximate eigenvalue is accepted as converged +* when it is determined to lie in an interval [a,b] +* of width less than or equal to +* +* ABSTOL + EPS * max( |a|,|b| ) , +* +* where EPS is the machine precision. If ABSTOL is less than +* or equal to zero, then EPS*|T| will be used in its place, +* where |T| is the 1-norm of the tridiagonal matrix obtained +* by reducing A to tridiagonal form. +* +* See "Computing Small Singular Values of Bidiagonal Matrices +* with Guaranteed High Relative Accuracy," by Demmel and +* Kahan, LAPACK Working Note #3. +* +* If high relative accuracy is important, set ABSTOL to +* DLAMCH( 'Safe minimum' ). Doing so will guarantee that +* eigenvalues are computed to high relative accuracy when +* possible in future releases. The current code does not +* make any guarantees about high relative accuracy, but +* furutre releases will. See J. Barlow and J. Demmel, +* "Computing Accurate Eigensystems of Scaled Diagonally +* Dominant Matrices", LAPACK Working Note #7, for a discussion +* of which matrices define their eigenvalues to high relative +* accuracy. +* +* M (output) INTEGER +* The total number of eigenvalues found. 0 <= M <= N. +* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +* +* W (output) DOUBLE PRECISION array, dimension (N) +* The first M elements contain the selected eigenvalues in +* ascending order. +* +* Z (output) COMPLEX*16 array, dimension (LDZ, max(1,M)) +* If JOBZ = 'V', then if INFO = 0, the first M columns of Z +* contain the orthonormal eigenvectors of the matrix A +* corresponding to the selected eigenvalues, with the i-th +* column of Z holding the eigenvector associated with W(i). +* If JOBZ = 'N', then Z is not referenced. +* Note: the user must ensure that at least max(1,M) columns are +* supplied in the array Z; if RANGE = 'V', the exact value of M +* is not known in advance and an upper bound must be used. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* ISUPPZ (output) INTEGER ARRAY, dimension ( 2*max(1,M) ) +* The support of the eigenvectors in Z, i.e., the indices +* indicating the nonzero elements in Z. The i-th eigenvector +* is nonzero only in elements ISUPPZ( 2*i-1 ) through +* ISUPPZ( 2*i ). +********** Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1 +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of the array WORK. LWORK >= max(1,2*N). +* For optimal efficiency, LWORK >= (NB+1)*N, +* where NB is the max of the blocksize for ZHETRD and for +* ZUNMTR as returned by ILAENV. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace/output) DOUBLE PRECISION array, dimension (LRWORK) +* On exit, if INFO = 0, RWORK(1) returns the optimal +* (and minimal) LRWORK. +* +* LRWORK (input) INTEGER +* The length of the array RWORK. LRWORK >= max(1,24*N). +* +* If LRWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the RWORK array, returns +* this value as the first entry of the RWORK array, and no error +* message related to LRWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* On exit, if INFO = 0, IWORK(1) returns the optimal +* (and minimal) LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array IWORK. LIWORK >= max(1,10*N). +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: Internal error +* +* Further Details +* =============== +* +* Based on contributions by +* Inderjit Dhillon, IBM Almaden, USA +* Osni Marques, LBNL/NERSC, USA +* Ken Stanley, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, VALEIG, WANTZ + CHARACTER ORDER + INTEGER I, IEEEOK, IINFO, IMAX, INDIBL, INDIFL, INDISP, + $ INDIWO, INDRD, INDRDD, INDRE, INDREE, INDRWK, + $ INDTAU, INDWK, INDWKN, ISCALE, ITMP1, J, JJ, + $ LIWMIN, LLWORK, LLWRKN, LRWMIN, LWKOPT, LWMIN, + $ NB, NSPLIT + DOUBLE PRECISION ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANSY + EXTERNAL LSAME, ILAENV, DLAMCH, ZLANSY +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DSCAL, DSTEBZ, DSTERF, XERBLA, ZDSCAL, + $ ZHETRD, ZSTEGR, ZSTEIN, ZSWAP, ZUNMTR +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + IEEEOK = ILAENV( 10, 'ZHEEVR', 'N', 1, 2, 3, 4 ) +* + LOWER = LSAME( UPLO, 'L' ) + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + LQUERY = ( ( LWORK.EQ.-1 ) .OR. ( LRWORK.EQ.-1 ) .OR. + $ ( LIWORK.EQ.-1 ) ) +* + LRWMIN = MAX( 1, 24*N ) + LIWMIN = MAX( 1, 10*N ) + LWMIN = MAX( 1, 2*N ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -8 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -10 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -15 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -18 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -20 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -22 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'ZHETRD', UPLO, N, -1, -1, -1 ) + NB = MAX( NB, ILAENV( 1, 'ZUNMTR', UPLO, N, -1, -1, -1 ) ) + LWKOPT = MAX( ( NB+1 )*N, LWMIN ) + WORK( 1 ) = LWKOPT + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHEEVR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( N.EQ.1 ) THEN + WORK( 1 ) = 7 + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = DBLE( A( 1, 1 ) ) + ELSE + IF( VL.LT.DBLE( A( 1, 1 ) ) .AND. VU.GE.DBLE( A( 1, 1 ) ) ) + $ THEN + M = 1 + W( 1 ) = DBLE( A( 1, 1 ) ) + END IF + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + VLL = VL + VUU = VU + ANRM = ZLANSY( 'M', UPLO, N, A, LDA, RWORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + DO 10 J = 1, N + CALL ZDSCAL( N-J+1, SIGMA, A( J, J ), 1 ) + 10 CONTINUE + ELSE + DO 20 J = 1, N + CALL ZDSCAL( J, SIGMA, A( 1, J ), 1 ) + 20 CONTINUE + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call ZHETRD to reduce Hermitian matrix to tridiagonal form. +* + INDTAU = 1 + INDWK = INDTAU + N +* + INDRE = 1 + INDRD = INDRE + N + INDREE = INDRD + N + INDRDD = INDREE + N + INDRWK = INDRDD + N + LLWORK = LWORK - INDWK + 1 + CALL ZHETRD( UPLO, N, A, LDA, RWORK( INDRD ), RWORK( INDRE ), + $ WORK( INDTAU ), WORK( INDWK ), LLWORK, IINFO ) +* +* If all eigenvalues are desired +* then call DSTERF or ZSTEGR and ZUNMTR. +* + IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND. + $ IEEEOK.EQ.1 ) THEN + IF( .NOT.WANTZ ) THEN + CALL DCOPY( N, RWORK( INDRD ), 1, W, 1 ) + CALL DCOPY( N-1, RWORK( INDRE ), 1, RWORK( INDREE ), 1 ) + CALL DSTERF( N, W, RWORK( INDREE ), INFO ) + ELSE + CALL DCOPY( N-1, RWORK( INDRE ), 1, RWORK( INDREE ), 1 ) + CALL DCOPY( N, RWORK( INDRD ), 1, RWORK( INDRDD ), 1 ) +* + CALL ZSTEGR( JOBZ, 'A', N, RWORK( INDRDD ), + $ RWORK( INDREE ), VL, VU, IL, IU, ABSTOL, M, W, + $ Z, LDZ, ISUPPZ, RWORK( INDRWK ), LWORK, IWORK, + $ LIWORK, INFO ) +* +* +* +* Apply unitary matrix used in reduction to tridiagonal +* form to eigenvectors returned by ZSTEIN. +* + IF( WANTZ .AND. INFO.EQ.0 ) THEN + INDWKN = INDWK + LLWRKN = LWORK - INDWKN + 1 + CALL ZUNMTR( 'L', UPLO, 'N', N, M, A, LDA, + $ WORK( INDTAU ), Z, LDZ, WORK( INDWKN ), + $ LLWRKN, IINFO ) + END IF + END IF +* +* + IF( INFO.EQ.0 ) THEN + M = N + GO TO 30 + END IF + INFO = 0 + END IF +* +* Otherwise, call DSTEBZ and, if eigenvectors are desired, ZSTEIN. +* Also call DSTEBZ and ZSTEIN if CSTEGR fails. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIFL = 1 + INDIBL = INDIFL + N + INDISP = INDIBL + N + INDIWO = INDISP + N + CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ RWORK( INDRD ), RWORK( INDRE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ), + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL ZSTEIN( N, RWORK( INDRD ), RWORK( INDRE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ RWORK( INDRWK ), IWORK( INDIWO ), IWORK( INDIFL ), + $ INFO ) +* +* Apply unitary matrix used in reduction to tridiagonal +* form to eigenvectors returned by ZSTEIN. +* + INDWKN = INDWK + LLWRKN = LWORK - INDWKN + 1 + CALL ZUNMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z, + $ LDZ, WORK( INDWKN ), LLWRKN, IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 30 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 50 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 40 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 40 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL ZSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + END IF + 50 CONTINUE + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWKOPT + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of ZHEEVR +* + END diff --git a/costa/native/external/lapack/zheevx.f b/costa/native/external/lapack/zheevx.f new file mode 100644 index 000000000..6797df556 --- /dev/null +++ b/costa/native/external/lapack/zheevx.f @@ -0,0 +1,426 @@ + SUBROUTINE ZHEEVX( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, + $ ABSTOL, M, W, Z, LDZ, WORK, LWORK, RWORK, + $ IWORK, IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDA, LDZ, LWORK, M, N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 A( LDA, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* ZHEEVX computes selected eigenvalues and, optionally, eigenvectors +* of a complex Hermitian matrix A. Eigenvalues and eigenvectors can +* be selected by specifying either a range of values or a range of +* indices for the desired eigenvalues. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* RANGE (input) CHARACTER*1 +* = 'A': all eigenvalues will be found. +* = 'V': all eigenvalues in the half-open interval (VL,VU] +* will be found. +* = 'I': the IL-th through IU-th eigenvalues will be found. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA, N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the +* leading N-by-N upper triangular part of A contains the +* upper triangular part of the matrix A. If UPLO = 'L', +* the leading N-by-N lower triangular part of A contains +* the lower triangular part of the matrix A. +* On exit, the lower triangle (if UPLO='L') or the upper +* triangle (if UPLO='U') of A, including the diagonal, is +* destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* VL (input) DOUBLE PRECISION +* VU (input) DOUBLE PRECISION +* If RANGE='V', the lower and upper bounds of the interval to +* be searched for eigenvalues. VL < VU. +* Not referenced if RANGE = 'A' or 'I'. +* +* IL (input) INTEGER +* IU (input) INTEGER +* If RANGE='I', the indices (in ascending order) of the +* smallest and largest eigenvalues to be returned. +* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +* Not referenced if RANGE = 'A' or 'V'. +* +* ABSTOL (input) DOUBLE PRECISION +* The absolute error tolerance for the eigenvalues. +* An approximate eigenvalue is accepted as converged +* when it is determined to lie in an interval [a,b] +* of width less than or equal to +* +* ABSTOL + EPS * max( |a|,|b| ) , +* +* where EPS is the machine precision. If ABSTOL is less than +* or equal to zero, then EPS*|T| will be used in its place, +* where |T| is the 1-norm of the tridiagonal matrix obtained +* by reducing A to tridiagonal form. +* +* Eigenvalues will be computed most accurately when ABSTOL is +* set to twice the underflow threshold 2*DLAMCH('S'), not zero. +* If this routine returns with INFO>0, indicating that some +* eigenvectors did not converge, try setting ABSTOL to +* 2*DLAMCH('S'). +* +* See "Computing Small Singular Values of Bidiagonal Matrices +* with Guaranteed High Relative Accuracy," by Demmel and +* Kahan, LAPACK Working Note #3. +* +* M (output) INTEGER +* The total number of eigenvalues found. 0 <= M <= N. +* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +* +* W (output) DOUBLE PRECISION array, dimension (N) +* On normal exit, the first M elements contain the selected +* eigenvalues in ascending order. +* +* Z (output) COMPLEX*16 array, dimension (LDZ, max(1,M)) +* If JOBZ = 'V', then if INFO = 0, the first M columns of Z +* contain the orthonormal eigenvectors of the matrix A +* corresponding to the selected eigenvalues, with the i-th +* column of Z holding the eigenvector associated with W(i). +* If an eigenvector fails to converge, then that column of Z +* contains the latest approximation to the eigenvector, and the +* index of the eigenvector is returned in IFAIL. +* If JOBZ = 'N', then Z is not referenced. +* Note: the user must ensure that at least max(1,M) columns are +* supplied in the array Z; if RANGE = 'V', the exact value of M +* is not known in advance and an upper bound must be used. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of the array WORK. LWORK >= max(1,2*N-1). +* For optimal efficiency, LWORK >= (NB+1)*N, +* where NB is the max of the blocksize for ZHETRD and for +* ZUNMTR as returned by ILAENV. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (7*N) +* +* IWORK (workspace) INTEGER array, dimension (5*N) +* +* IFAIL (output) INTEGER array, dimension (N) +* If JOBZ = 'V', then if INFO = 0, the first M elements of +* IFAIL are zero. If INFO > 0, then IFAIL contains the +* indices of the eigenvectors that failed to converge. +* If JOBZ = 'N', then IFAIL is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, then i eigenvectors failed to converge. +* Their indices are stored in array IFAIL. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, VALEIG, WANTZ + CHARACTER ORDER + INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, + $ INDISP, INDIWK, INDRWK, INDTAU, INDWRK, ISCALE, + $ ITMP1, J, JJ, LLWORK, LOPT, LWKOPT, NB, NSPLIT + DOUBLE PRECISION ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANHE + EXTERNAL LSAME, ILAENV, DLAMCH, ZLANHE +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DSCAL, DSTEBZ, DSTERF, XERBLA, ZDSCAL, + $ ZHETRD, ZLACPY, ZSTEIN, ZSTEQR, ZSWAP, ZUNGTR, + $ ZUNMTR +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + LOWER = LSAME( UPLO, 'L' ) + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -8 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -10 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -15 + ELSE IF( LWORK.LT.MAX( 1, 2*N-1 ) .AND. .NOT.LQUERY ) THEN + INFO = -17 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'ZHETRD', UPLO, N, -1, -1, -1 ) + NB = MAX( NB, ILAENV( 1, 'ZUNMTR', UPLO, N, -1, -1, -1 ) ) + LWKOPT = ( NB+1 )*N + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHEEVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( N.EQ.1 ) THEN + WORK( 1 ) = 1 + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = A( 1, 1 ) + ELSE IF( VALEIG ) THEN + IF( VL.LT.DBLE( A( 1, 1 ) ) .AND. VU.GE.DBLE( A( 1, 1 ) ) ) + $ THEN + M = 1 + W( 1 ) = A( 1, 1 ) + END IF + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + VLL = VL + VUU = VU + ANRM = ZLANHE( 'M', UPLO, N, A, LDA, RWORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + DO 10 J = 1, N + CALL ZDSCAL( N-J+1, SIGMA, A( J, J ), 1 ) + 10 CONTINUE + ELSE + DO 20 J = 1, N + CALL ZDSCAL( J, SIGMA, A( 1, J ), 1 ) + 20 CONTINUE + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call ZHETRD to reduce Hermitian matrix to tridiagonal form. +* + INDD = 1 + INDE = INDD + N + INDRWK = INDE + N + INDTAU = 1 + INDWRK = INDTAU + N + LLWORK = LWORK - INDWRK + 1 + CALL ZHETRD( UPLO, N, A, LDA, RWORK( INDD ), RWORK( INDE ), + $ WORK( INDTAU ), WORK( INDWRK ), LLWORK, IINFO ) + LOPT = N + WORK( INDWRK ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal to +* zero, then call DSTERF or ZUNGTR and ZSTEQR. If this fails for +* some eigenvalue, then try DSTEBZ. +* + IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND. + $ ( ABSTOL.LE.ZERO ) ) THEN + CALL DCOPY( N, RWORK( INDD ), 1, W, 1 ) + INDEE = INDRWK + 2*N + IF( .NOT.WANTZ ) THEN + CALL DCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + CALL DSTERF( N, W, RWORK( INDEE ), INFO ) + ELSE + CALL ZLACPY( 'A', N, N, A, LDA, Z, LDZ ) + CALL ZUNGTR( UPLO, N, Z, LDZ, WORK( INDTAU ), + $ WORK( INDWRK ), LLWORK, IINFO ) + CALL DCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + CALL ZSTEQR( JOBZ, N, W, RWORK( INDEE ), Z, LDZ, + $ RWORK( INDRWK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 30 I = 1, N + IFAIL( I ) = 0 + 30 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 40 + END IF + INFO = 0 + END IF +* +* Otherwise, call DSTEBZ and, if eigenvectors are desired, ZSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWK = INDISP + N + CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ RWORK( INDD ), RWORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ), + $ IWORK( INDIWK ), INFO ) +* + IF( WANTZ ) THEN + CALL ZSTEIN( N, RWORK( INDD ), RWORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ RWORK( INDRWK ), IWORK( INDIWK ), IFAIL, INFO ) +* +* Apply unitary matrix used in reduction to tridiagonal +* form to eigenvectors returned by ZSTEIN. +* + CALL ZUNMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z, + $ LDZ, WORK( INDWRK ), LLWORK, IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 40 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 60 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 50 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 50 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL ZSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 60 CONTINUE + END IF +* +* Set WORK(1) to optimal complex workspace size. +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of ZHEEVX +* + END diff --git a/costa/native/external/lapack/zhegs2.f b/costa/native/external/lapack/zhegs2.f new file mode 100644 index 000000000..63a5ef387 --- /dev/null +++ b/costa/native/external/lapack/zhegs2.f @@ -0,0 +1,225 @@ + SUBROUTINE ZHEGS2( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, ITYPE, LDA, LDB, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* ZHEGS2 reduces a complex Hermitian-definite generalized +* eigenproblem to standard form. +* +* If ITYPE = 1, the problem is A*x = lambda*B*x, +* and A is overwritten by inv(U')*A*inv(U) or inv(L)*A*inv(L') +* +* If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or +* B*A*x = lambda*x, and A is overwritten by U*A*U` or L'*A*L. +* +* B must have been previously factorized as U'*U or L*L' by ZPOTRF. +* +* Arguments +* ========= +* +* ITYPE (input) INTEGER +* = 1: compute inv(U')*A*inv(U) or inv(L)*A*inv(L'); +* = 2 or 3: compute U*A*U' or L'*A*L. +* +* UPLO (input) CHARACTER +* Specifies whether the upper or lower triangular part of the +* Hermitian matrix A is stored, and how B has been factorized. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the leading +* n by n upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n by n lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, if INFO = 0, the transformed matrix, stored in the +* same format as A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input) COMPLEX*16 array, dimension (LDB,N) +* The triangular factor from the Cholesky factorization of B, +* as returned by ZPOTRF. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, HALF + PARAMETER ( ONE = 1.0D+0, HALF = 0.5D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER K + DOUBLE PRECISION AKK, BKK + COMPLEX*16 CT +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZDSCAL, ZHER2, ZLACGV, ZTRMV, + $ ZTRSV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHEGS2', -INFO ) + RETURN + END IF +* + IF( ITYPE.EQ.1 ) THEN + IF( UPPER ) THEN +* +* Compute inv(U')*A*inv(U) +* + DO 10 K = 1, N +* +* Update the upper triangle of A(k:n,k:n) +* + AKK = A( K, K ) + BKK = B( K, K ) + AKK = AKK / BKK**2 + A( K, K ) = AKK + IF( K.LT.N ) THEN + CALL ZDSCAL( N-K, ONE / BKK, A( K, K+1 ), LDA ) + CT = -HALF*AKK + CALL ZLACGV( N-K, A( K, K+1 ), LDA ) + CALL ZLACGV( N-K, B( K, K+1 ), LDB ) + CALL ZAXPY( N-K, CT, B( K, K+1 ), LDB, A( K, K+1 ), + $ LDA ) + CALL ZHER2( UPLO, N-K, -CONE, A( K, K+1 ), LDA, + $ B( K, K+1 ), LDB, A( K+1, K+1 ), LDA ) + CALL ZAXPY( N-K, CT, B( K, K+1 ), LDB, A( K, K+1 ), + $ LDA ) + CALL ZLACGV( N-K, B( K, K+1 ), LDB ) + CALL ZTRSV( UPLO, 'Conjugate transpose', 'Non-unit', + $ N-K, B( K+1, K+1 ), LDB, A( K, K+1 ), + $ LDA ) + CALL ZLACGV( N-K, A( K, K+1 ), LDA ) + END IF + 10 CONTINUE + ELSE +* +* Compute inv(L)*A*inv(L') +* + DO 20 K = 1, N +* +* Update the lower triangle of A(k:n,k:n) +* + AKK = A( K, K ) + BKK = B( K, K ) + AKK = AKK / BKK**2 + A( K, K ) = AKK + IF( K.LT.N ) THEN + CALL ZDSCAL( N-K, ONE / BKK, A( K+1, K ), 1 ) + CT = -HALF*AKK + CALL ZAXPY( N-K, CT, B( K+1, K ), 1, A( K+1, K ), 1 ) + CALL ZHER2( UPLO, N-K, -CONE, A( K+1, K ), 1, + $ B( K+1, K ), 1, A( K+1, K+1 ), LDA ) + CALL ZAXPY( N-K, CT, B( K+1, K ), 1, A( K+1, K ), 1 ) + CALL ZTRSV( UPLO, 'No transpose', 'Non-unit', N-K, + $ B( K+1, K+1 ), LDB, A( K+1, K ), 1 ) + END IF + 20 CONTINUE + END IF + ELSE + IF( UPPER ) THEN +* +* Compute U*A*U' +* + DO 30 K = 1, N +* +* Update the upper triangle of A(1:k,1:k) +* + AKK = A( K, K ) + BKK = B( K, K ) + CALL ZTRMV( UPLO, 'No transpose', 'Non-unit', K-1, B, + $ LDB, A( 1, K ), 1 ) + CT = HALF*AKK + CALL ZAXPY( K-1, CT, B( 1, K ), 1, A( 1, K ), 1 ) + CALL ZHER2( UPLO, K-1, CONE, A( 1, K ), 1, B( 1, K ), 1, + $ A, LDA ) + CALL ZAXPY( K-1, CT, B( 1, K ), 1, A( 1, K ), 1 ) + CALL ZDSCAL( K-1, BKK, A( 1, K ), 1 ) + A( K, K ) = AKK*BKK**2 + 30 CONTINUE + ELSE +* +* Compute L'*A*L +* + DO 40 K = 1, N +* +* Update the lower triangle of A(1:k,1:k) +* + AKK = A( K, K ) + BKK = B( K, K ) + CALL ZLACGV( K-1, A( K, 1 ), LDA ) + CALL ZTRMV( UPLO, 'Conjugate transpose', 'Non-unit', K-1, + $ B, LDB, A( K, 1 ), LDA ) + CT = HALF*AKK + CALL ZLACGV( K-1, B( K, 1 ), LDB ) + CALL ZAXPY( K-1, CT, B( K, 1 ), LDB, A( K, 1 ), LDA ) + CALL ZHER2( UPLO, K-1, CONE, A( K, 1 ), LDA, B( K, 1 ), + $ LDB, A, LDA ) + CALL ZAXPY( K-1, CT, B( K, 1 ), LDB, A( K, 1 ), LDA ) + CALL ZLACGV( K-1, B( K, 1 ), LDB ) + CALL ZDSCAL( K-1, BKK, A( K, 1 ), LDA ) + CALL ZLACGV( K-1, A( K, 1 ), LDA ) + A( K, K ) = AKK*BKK**2 + 40 CONTINUE + END IF + END IF + RETURN +* +* End of ZHEGS2 +* + END diff --git a/costa/native/external/lapack/zhegst.f b/costa/native/external/lapack/zhegst.f new file mode 100644 index 000000000..dfaa47af3 --- /dev/null +++ b/costa/native/external/lapack/zhegst.f @@ -0,0 +1,260 @@ + SUBROUTINE ZHEGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, ITYPE, LDA, LDB, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* ZHEGST reduces a complex Hermitian-definite generalized +* eigenproblem to standard form. +* +* If ITYPE = 1, the problem is A*x = lambda*B*x, +* and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H) +* +* If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or +* B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L. +* +* B must have been previously factorized as U**H*U or L*L**H by ZPOTRF. +* +* Arguments +* ========= +* +* ITYPE (input) INTEGER +* = 1: compute inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H); +* = 2 or 3: compute U*A*U**H or L**H*A*L. +* +* UPLO (input) CHARACTER +* = 'U': Upper triangle of A is stored and B is factored as +* U**H*U; +* = 'L': Lower triangle of A is stored and B is factored as +* L*L**H. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the leading +* N-by-N upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, if INFO = 0, the transformed matrix, stored in the +* same format as A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input) COMPLEX*16 array, dimension (LDB,N) +* The triangular factor from the Cholesky factorization of B, +* as returned by ZPOTRF. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) + COMPLEX*16 CONE, HALF + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ), + $ HALF = ( 0.5D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER K, KB, NB +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHEGS2, ZHEMM, ZHER2K, ZTRMM, ZTRSM +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHEGST', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'ZHEGST', UPLO, N, -1, -1, -1 ) +* + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code +* + CALL ZHEGS2( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + ELSE +* +* Use blocked code +* + IF( ITYPE.EQ.1 ) THEN + IF( UPPER ) THEN +* +* Compute inv(U')*A*inv(U) +* + DO 10 K = 1, N, NB + KB = MIN( N-K+1, NB ) +* +* Update the upper triangle of A(k:n,k:n) +* + CALL ZHEGS2( ITYPE, UPLO, KB, A( K, K ), LDA, + $ B( K, K ), LDB, INFO ) + IF( K+KB.LE.N ) THEN + CALL ZTRSM( 'Left', UPLO, 'Conjugate transpose', + $ 'Non-unit', KB, N-K-KB+1, CONE, + $ B( K, K ), LDB, A( K, K+KB ), LDA ) + CALL ZHEMM( 'Left', UPLO, KB, N-K-KB+1, -HALF, + $ A( K, K ), LDA, B( K, K+KB ), LDB, + $ CONE, A( K, K+KB ), LDA ) + CALL ZHER2K( UPLO, 'Conjugate transpose', N-K-KB+1, + $ KB, -CONE, A( K, K+KB ), LDA, + $ B( K, K+KB ), LDB, ONE, + $ A( K+KB, K+KB ), LDA ) + CALL ZHEMM( 'Left', UPLO, KB, N-K-KB+1, -HALF, + $ A( K, K ), LDA, B( K, K+KB ), LDB, + $ CONE, A( K, K+KB ), LDA ) + CALL ZTRSM( 'Right', UPLO, 'No transpose', + $ 'Non-unit', KB, N-K-KB+1, CONE, + $ B( K+KB, K+KB ), LDB, A( K, K+KB ), + $ LDA ) + END IF + 10 CONTINUE + ELSE +* +* Compute inv(L)*A*inv(L') +* + DO 20 K = 1, N, NB + KB = MIN( N-K+1, NB ) +* +* Update the lower triangle of A(k:n,k:n) +* + CALL ZHEGS2( ITYPE, UPLO, KB, A( K, K ), LDA, + $ B( K, K ), LDB, INFO ) + IF( K+KB.LE.N ) THEN + CALL ZTRSM( 'Right', UPLO, 'Conjugate transpose', + $ 'Non-unit', N-K-KB+1, KB, CONE, + $ B( K, K ), LDB, A( K+KB, K ), LDA ) + CALL ZHEMM( 'Right', UPLO, N-K-KB+1, KB, -HALF, + $ A( K, K ), LDA, B( K+KB, K ), LDB, + $ CONE, A( K+KB, K ), LDA ) + CALL ZHER2K( UPLO, 'No transpose', N-K-KB+1, KB, + $ -CONE, A( K+KB, K ), LDA, + $ B( K+KB, K ), LDB, ONE, + $ A( K+KB, K+KB ), LDA ) + CALL ZHEMM( 'Right', UPLO, N-K-KB+1, KB, -HALF, + $ A( K, K ), LDA, B( K+KB, K ), LDB, + $ CONE, A( K+KB, K ), LDA ) + CALL ZTRSM( 'Left', UPLO, 'No transpose', + $ 'Non-unit', N-K-KB+1, KB, CONE, + $ B( K+KB, K+KB ), LDB, A( K+KB, K ), + $ LDA ) + END IF + 20 CONTINUE + END IF + ELSE + IF( UPPER ) THEN +* +* Compute U*A*U' +* + DO 30 K = 1, N, NB + KB = MIN( N-K+1, NB ) +* +* Update the upper triangle of A(1:k+kb-1,1:k+kb-1) +* + CALL ZTRMM( 'Left', UPLO, 'No transpose', 'Non-unit', + $ K-1, KB, CONE, B, LDB, A( 1, K ), LDA ) + CALL ZHEMM( 'Right', UPLO, K-1, KB, HALF, A( K, K ), + $ LDA, B( 1, K ), LDB, CONE, A( 1, K ), + $ LDA ) + CALL ZHER2K( UPLO, 'No transpose', K-1, KB, CONE, + $ A( 1, K ), LDA, B( 1, K ), LDB, ONE, A, + $ LDA ) + CALL ZHEMM( 'Right', UPLO, K-1, KB, HALF, A( K, K ), + $ LDA, B( 1, K ), LDB, CONE, A( 1, K ), + $ LDA ) + CALL ZTRMM( 'Right', UPLO, 'Conjugate transpose', + $ 'Non-unit', K-1, KB, CONE, B( K, K ), LDB, + $ A( 1, K ), LDA ) + CALL ZHEGS2( ITYPE, UPLO, KB, A( K, K ), LDA, + $ B( K, K ), LDB, INFO ) + 30 CONTINUE + ELSE +* +* Compute L'*A*L +* + DO 40 K = 1, N, NB + KB = MIN( N-K+1, NB ) +* +* Update the lower triangle of A(1:k+kb-1,1:k+kb-1) +* + CALL ZTRMM( 'Right', UPLO, 'No transpose', 'Non-unit', + $ KB, K-1, CONE, B, LDB, A( K, 1 ), LDA ) + CALL ZHEMM( 'Left', UPLO, KB, K-1, HALF, A( K, K ), + $ LDA, B( K, 1 ), LDB, CONE, A( K, 1 ), + $ LDA ) + CALL ZHER2K( UPLO, 'Conjugate transpose', K-1, KB, + $ CONE, A( K, 1 ), LDA, B( K, 1 ), LDB, + $ ONE, A, LDA ) + CALL ZHEMM( 'Left', UPLO, KB, K-1, HALF, A( K, K ), + $ LDA, B( K, 1 ), LDB, CONE, A( K, 1 ), + $ LDA ) + CALL ZTRMM( 'Left', UPLO, 'Conjugate transpose', + $ 'Non-unit', KB, K-1, CONE, B( K, K ), LDB, + $ A( K, 1 ), LDA ) + CALL ZHEGS2( ITYPE, UPLO, KB, A( K, K ), LDA, + $ B( K, K ), LDB, INFO ) + 40 CONTINUE + END IF + END IF + END IF + RETURN +* +* End of ZHEGST +* + END diff --git a/costa/native/external/lapack/zhegv.f b/costa/native/external/lapack/zhegv.f new file mode 100644 index 000000000..3bfc95b61 --- /dev/null +++ b/costa/native/external/lapack/zhegv.f @@ -0,0 +1,229 @@ + SUBROUTINE ZHEGV( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, + $ LWORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDA, LDB, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZHEGV computes all the eigenvalues, and optionally, the eigenvectors +* of a complex generalized Hermitian-definite eigenproblem, of the form +* A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. +* Here A and B are assumed to be Hermitian and B is also +* positive definite. +* +* Arguments +* ========= +* +* ITYPE (input) INTEGER +* Specifies the problem type to be solved: +* = 1: A*x = (lambda)*B*x +* = 2: A*B*x = (lambda)*x +* = 3: B*A*x = (lambda)*x +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangles of A and B are stored; +* = 'L': Lower triangles of A and B are stored. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA, N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the +* leading N-by-N upper triangular part of A contains the +* upper triangular part of the matrix A. If UPLO = 'L', +* the leading N-by-N lower triangular part of A contains +* the lower triangular part of the matrix A. +* +* On exit, if JOBZ = 'V', then if INFO = 0, A contains the +* matrix Z of eigenvectors. The eigenvectors are normalized +* as follows: +* if ITYPE = 1 or 2, Z**H*B*Z = I; +* if ITYPE = 3, Z**H*inv(B)*Z = I. +* If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') +* or the lower triangle (if UPLO='L') of A, including the +* diagonal, is destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) COMPLEX*16 array, dimension (LDB, N) +* On entry, the Hermitian positive definite matrix B. +* If UPLO = 'U', the leading N-by-N upper triangular part of B +* contains the upper triangular part of the matrix B. +* If UPLO = 'L', the leading N-by-N lower triangular part of B +* contains the lower triangular part of the matrix B. +* +* On exit, if INFO <= N, the part of B containing the matrix is +* overwritten by the triangular factor U or L from the Cholesky +* factorization B = U**H*U or B = L*L**H. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* W (output) DOUBLE PRECISION array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of the array WORK. LWORK >= max(1,2*N-1). +* For optimal efficiency, LWORK >= (NB+1)*N, +* where NB is the blocksize for ZHETRD returned by ILAENV. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (max(1, 3*N-2)) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: ZPOTRF or ZHEEV returned an error code: +* <= N: if INFO = i, ZHEEV failed to converge; +* i off-diagonal elements of an intermediate +* tridiagonal form did not converge to zero; +* > N: if INFO = N + i, for 1 <= i <= N, then the leading +* minor of order i of B is not positive definite. +* The factorization of B could not be completed and +* no eigenvalues or eigenvectors were computed. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER TRANS + INTEGER LWKOPT, NB, NEIG +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHEEV, ZHEGST, ZPOTRF, ZTRMM, ZTRSM +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, 2*N-1 ) .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'ZHETRD', UPLO, N, -1, -1, -1 ) + LWKOPT = ( NB+1 )*N + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHEGV ', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL ZPOTRF( UPLO, N, B, LDB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL ZHEGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + CALL ZHEEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, INFO ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + NEIG = N + IF( INFO.GT.0 ) + $ NEIG = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)'*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'C' + END IF +* + CALL ZTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, + $ B, LDB, A, LDA ) +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U'*y +* + IF( UPPER ) THEN + TRANS = 'C' + ELSE + TRANS = 'N' + END IF +* + CALL ZTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, + $ B, LDB, A, LDA ) + END IF + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of ZHEGV +* + END diff --git a/costa/native/external/lapack/zhegvd.f b/costa/native/external/lapack/zhegvd.f new file mode 100644 index 000000000..5362a33e8 --- /dev/null +++ b/costa/native/external/lapack/zhegvd.f @@ -0,0 +1,297 @@ + SUBROUTINE ZHEGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, + $ LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDA, LDB, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZHEGVD computes all the eigenvalues, and optionally, the eigenvectors +* of a complex generalized Hermitian-definite eigenproblem, of the form +* A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and +* B are assumed to be Hermitian and B is also positive definite. +* If eigenvectors are desired, it uses a divide and conquer algorithm. +* +* The divide and conquer algorithm makes very mild assumptions about +* floating point arithmetic. It will work on machines with a guard +* digit in add/subtract, or on those binary machines without guard +* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +* Cray-2. It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* ITYPE (input) INTEGER +* Specifies the problem type to be solved: +* = 1: A*x = (lambda)*B*x +* = 2: A*B*x = (lambda)*x +* = 3: B*A*x = (lambda)*x +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangles of A and B are stored; +* = 'L': Lower triangles of A and B are stored. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA, N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the +* leading N-by-N upper triangular part of A contains the +* upper triangular part of the matrix A. If UPLO = 'L', +* the leading N-by-N lower triangular part of A contains +* the lower triangular part of the matrix A. +* +* On exit, if JOBZ = 'V', then if INFO = 0, A contains the +* matrix Z of eigenvectors. The eigenvectors are normalized +* as follows: +* if ITYPE = 1 or 2, Z**H*B*Z = I; +* if ITYPE = 3, Z**H*inv(B)*Z = I. +* If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') +* or the lower triangle (if UPLO='L') of A, including the +* diagonal, is destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) COMPLEX*16 array, dimension (LDB, N) +* On entry, the Hermitian matrix B. If UPLO = 'U', the +* leading N-by-N upper triangular part of B contains the +* upper triangular part of the matrix B. If UPLO = 'L', +* the leading N-by-N lower triangular part of B contains +* the lower triangular part of the matrix B. +* +* On exit, if INFO <= N, the part of B containing the matrix is +* overwritten by the triangular factor U or L from the Cholesky +* factorization B = U**H*U or B = L*L**H. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* W (output) DOUBLE PRECISION array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of the array WORK. +* If N <= 1, LWORK >= 1. +* If JOBZ = 'N' and N > 1, LWORK >= N + 1. +* If JOBZ = 'V' and N > 1, LWORK >= 2*N + N**2. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace/output) DOUBLE PRECISION array, dimension (LRWORK) +* On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. +* +* LRWORK (input) INTEGER +* The dimension of the array RWORK. +* If N <= 1, LRWORK >= 1. +* If JOBZ = 'N' and N > 1, LRWORK >= N. +* If JOBZ = 'V' and N > 1, LRWORK >= 1 + 5*N + 2*N**2. +* +* If LRWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the RWORK array, +* returns this value as the first entry of the RWORK array, and +* no error message related to LRWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array IWORK. +* If N <= 1, LIWORK >= 1. +* If JOBZ = 'N' and N > 1, LIWORK >= 1. +* If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: ZPOTRF or ZHEEVD returned an error code: +* <= N: if INFO = i, ZHEEVD failed to converge; +* i off-diagonal elements of an intermediate +* tridiagonal form did not converge to zero; +* > N: if INFO = N + i, for 1 <= i <= N, then the leading +* minor of order i of B is not positive definite. +* The factorization of B could not be completed and +* no eigenvalues or eigenvectors were computed. +* +* Further Details +* =============== +* +* Based on contributions by +* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER TRANS + INTEGER LIOPT, LIWMIN, LOPT, LROPT, LRWMIN, LWMIN, NEIG +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHEEVD, ZHEGST, ZPOTRF, ZTRMM, ZTRSM +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LWMIN = 1 + LRWMIN = 1 + LIWMIN = 1 + LOPT = LWMIN + LROPT = LRWMIN + LIOPT = LIWMIN + ELSE + IF( WANTZ ) THEN + LWMIN = 2*N + N*N + LRWMIN = 1 + 5*N + 2*N*N + LIWMIN = 3 + 5*N + ELSE + LWMIN = N + 1 + LRWMIN = N + LIWMIN = 1 + END IF + LOPT = LWMIN + LROPT = LRWMIN + LIOPT = LIWMIN + END IF + IF( ITYPE.LT.0 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -15 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LOPT + RWORK( 1 ) = LROPT + IWORK( 1 ) = LIOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHEGVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL ZPOTRF( UPLO, N, B, LDB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL ZHEGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + CALL ZHEEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, LRWORK, + $ IWORK, LIWORK, INFO ) + LOPT = MAX( DBLE( LOPT ), DBLE( WORK( 1 ) ) ) + LROPT = MAX( DBLE( LROPT ), DBLE( RWORK( 1 ) ) ) + LIOPT = MAX( DBLE( LIOPT ), DBLE( IWORK( 1 ) ) ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + NEIG = N + IF( INFO.GT.0 ) + $ NEIG = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)'*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'C' + END IF +* + CALL ZTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, CONE, + $ B, LDB, A, LDA ) +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U'*y +* + IF( UPPER ) THEN + TRANS = 'C' + ELSE + TRANS = 'N' + END IF +* + CALL ZTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, CONE, + $ B, LDB, A, LDA ) + END IF + END IF +* + WORK( 1 ) = LOPT + RWORK( 1 ) = LROPT + IWORK( 1 ) = LIOPT +* + RETURN +* +* End of ZHEGVD +* + END diff --git a/costa/native/external/lapack/zhegvx.f b/costa/native/external/lapack/zhegvx.f new file mode 100644 index 000000000..c5b67aec1 --- /dev/null +++ b/costa/native/external/lapack/zhegvx.f @@ -0,0 +1,330 @@ + SUBROUTINE ZHEGVX( ITYPE, JOBZ, RANGE, UPLO, N, A, LDA, B, LDB, + $ VL, VU, IL, IU, ABSTOL, M, W, Z, LDZ, WORK, + $ LWORK, RWORK, IWORK, IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, ITYPE, IU, LDA, LDB, LDZ, LWORK, M, N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* ZHEGVX computes selected eigenvalues, and optionally, eigenvectors +* of a complex generalized Hermitian-definite eigenproblem, of the form +* A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and +* B are assumed to be Hermitian and B is also positive definite. +* Eigenvalues and eigenvectors can be selected by specifying either a +* range of values or a range of indices for the desired eigenvalues. +* +* Arguments +* ========= +* +* ITYPE (input) INTEGER +* Specifies the problem type to be solved: +* = 1: A*x = (lambda)*B*x +* = 2: A*B*x = (lambda)*x +* = 3: B*A*x = (lambda)*x +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* RANGE (input) CHARACTER*1 +* = 'A': all eigenvalues will be found. +* = 'V': all eigenvalues in the half-open interval (VL,VU] +* will be found. +* = 'I': the IL-th through IU-th eigenvalues will be found. +** +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangles of A and B are stored; +* = 'L': Lower triangles of A and B are stored. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA, N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the +* leading N-by-N upper triangular part of A contains the +* upper triangular part of the matrix A. If UPLO = 'L', +* the leading N-by-N lower triangular part of A contains +* the lower triangular part of the matrix A. +* +* On exit, the lower triangle (if UPLO='L') or the upper +* triangle (if UPLO='U') of A, including the diagonal, is +* destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) COMPLEX*16 array, dimension (LDB, N) +* On entry, the Hermitian matrix B. If UPLO = 'U', the +* leading N-by-N upper triangular part of B contains the +* upper triangular part of the matrix B. If UPLO = 'L', +* the leading N-by-N lower triangular part of B contains +* the lower triangular part of the matrix B. +* +* On exit, if INFO <= N, the part of B containing the matrix is +* overwritten by the triangular factor U or L from the Cholesky +* factorization B = U**H*U or B = L*L**H. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* VL (input) DOUBLE PRECISION +* VU (input) DOUBLE PRECISION +* If RANGE='V', the lower and upper bounds of the interval to +* be searched for eigenvalues. VL < VU. +* Not referenced if RANGE = 'A' or 'I'. +* +* IL (input) INTEGER +* IU (input) INTEGER +* If RANGE='I', the indices (in ascending order) of the +* smallest and largest eigenvalues to be returned. +* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +* Not referenced if RANGE = 'A' or 'V'. +* +* ABSTOL (input) DOUBLE PRECISION +* The absolute error tolerance for the eigenvalues. +* An approximate eigenvalue is accepted as converged +* when it is determined to lie in an interval [a,b] +* of width less than or equal to +* +* ABSTOL + EPS * max( |a|,|b| ) , +* +* where EPS is the machine precision. If ABSTOL is less than +* or equal to zero, then EPS*|T| will be used in its place, +* where |T| is the 1-norm of the tridiagonal matrix obtained +* by reducing A to tridiagonal form. +* +* Eigenvalues will be computed most accurately when ABSTOL is +* set to twice the underflow threshold 2*DLAMCH('S'), not zero. +* If this routine returns with INFO>0, indicating that some +* eigenvectors did not converge, try setting ABSTOL to +* 2*DLAMCH('S'). +* +* M (output) INTEGER +* The total number of eigenvalues found. 0 <= M <= N. +* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +* +* W (output) DOUBLE PRECISION array, dimension (N) +* The first M elements contain the selected +* eigenvalues in ascending order. +* +* Z (output) COMPLEX*16 array, dimension (LDZ, max(1,M)) +* If JOBZ = 'N', then Z is not referenced. +* If JOBZ = 'V', then if INFO = 0, the first M columns of Z +* contain the orthonormal eigenvectors of the matrix A +* corresponding to the selected eigenvalues, with the i-th +* column of Z holding the eigenvector associated with W(i). +* The eigenvectors are normalized as follows: +* if ITYPE = 1 or 2, Z**T*B*Z = I; +* if ITYPE = 3, Z**T*inv(B)*Z = I. +* +* If an eigenvector fails to converge, then that column of Z +* contains the latest approximation to the eigenvector, and the +* index of the eigenvector is returned in IFAIL. +* Note: the user must ensure that at least max(1,M) columns are +* supplied in the array Z; if RANGE = 'V', the exact value of M +* is not known in advance and an upper bound must be used. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of the array WORK. LWORK >= max(1,2*N-1). +* For optimal efficiency, LWORK >= (NB+1)*N, +* where NB is the blocksize for ZHETRD returned by ILAENV. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (7*N) +* +* IWORK (workspace) INTEGER array, dimension (5*N) +* +* IFAIL (output) INTEGER array, dimension (N) +* If JOBZ = 'V', then if INFO = 0, the first M elements of +* IFAIL are zero. If INFO > 0, then IFAIL contains the +* indices of the eigenvectors that failed to converge. +* If JOBZ = 'N', then IFAIL is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: ZPOTRF or ZHEEVX returned an error code: +* <= N: if INFO = i, ZHEEVX failed to converge; +* i eigenvectors failed to converge. Their indices +* are stored in array IFAIL. +* > N: if INFO = N + i, for 1 <= i <= N, then the leading +* minor of order i of B is not positive definite. +* The factorization of B could not be completed and +* no eigenvalues or eigenvectors were computed. +* +* Further Details +* =============== +* +* Based on contributions by +* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LQUERY, UPPER, VALEIG, WANTZ + CHARACTER TRANS + INTEGER LOPT, LWKOPT, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHEEVX, ZHEGST, ZPOTRF, ZTRMM, ZTRSM +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( ITYPE.LT.0 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -3 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( VALEIG .AND. N.GT.0 ) THEN + IF( VU.LE.VL ) + $ INFO = -11 + ELSE IF( INDEIG .AND. IL.LT.1 ) THEN + INFO = -12 + ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) THEN + INFO = -13 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -18 + ELSE IF( LWORK.LT.MAX( 1, 2*N-1 ) .AND. .NOT.LQUERY ) THEN + INFO = -20 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'ZHETRD', UPLO, N, -1, -1, -1 ) + LWKOPT = ( NB+1 )*N + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHEGVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Form a Cholesky factorization of B. +* + CALL ZPOTRF( UPLO, N, B, LDB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL ZHEGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + CALL ZHEEVX( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, ABSTOL, + $ M, W, Z, LDZ, WORK, LWORK, RWORK, IWORK, IFAIL, + $ INFO ) + LOPT = WORK( 1 ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + IF( INFO.GT.0 ) + $ M = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)'*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'C' + END IF +* + CALL ZTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, M, CONE, B, + $ LDB, Z, LDZ ) +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U'*y +* + IF( UPPER ) THEN + TRANS = 'C' + ELSE + TRANS = 'N' + END IF +* + CALL ZTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, M, CONE, B, + $ LDB, Z, LDZ ) + END IF + END IF +* +* Set WORK(1) to optimal complex workspace size. +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of ZHEGVX +* + END diff --git a/costa/native/external/lapack/zherfs.f b/costa/native/external/lapack/zherfs.f new file mode 100644 index 000000000..456841f1d --- /dev/null +++ b/costa/native/external/lapack/zherfs.f @@ -0,0 +1,339 @@ + SUBROUTINE ZHERFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, + $ X, LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) + COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* ZHERFS improves the computed solution to a system of linear +* equations when the coefficient matrix is Hermitian indefinite, and +* provides error bounds and backward error estimates for the solution. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input) COMPLEX*16 array, dimension (LDA,N) +* The Hermitian matrix A. If UPLO = 'U', the leading N-by-N +* upper triangular part of A contains the upper triangular part +* of the matrix A, and the strictly lower triangular part of A +* is not referenced. If UPLO = 'L', the leading N-by-N lower +* triangular part of A contains the lower triangular part of +* the matrix A, and the strictly upper triangular part of A is +* not referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* AF (input) COMPLEX*16 array, dimension (LDAF,N) +* The factored form of the matrix A. AF contains the block +* diagonal matrix D and the multipliers used to obtain the +* factor U or L from the factorization A = U*D*U**H or +* A = L*D*L**H as computed by ZHETRF. +* +* LDAF (input) INTEGER +* The leading dimension of the array AF. LDAF >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by ZHETRF. +* +* B (input) COMPLEX*16 array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input/output) COMPLEX*16 array, dimension (LDX,NRHS) +* On entry, the solution matrix X, as computed by ZHETRS. +* On exit, the improved solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Internal Parameters +* =================== +* +* ITMAX is the maximum number of steps of iterative refinement. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) + DOUBLE PRECISION THREE + PARAMETER ( THREE = 3.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, J, K, KASE, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX*16 ZDUM +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZCOPY, ZHEMV, ZHETRS, ZLACON +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHERFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL ZCOPY( N, B( 1, J ), 1, WORK, 1 ) + CALL ZHEMV( UPLO, N, -ONE, A, LDA, X( 1, J ), 1, ONE, WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + DO 40 I = 1, K - 1 + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 40 CONTINUE + RWORK( K ) = RWORK( K ) + ABS( DBLE( A( K, K ) ) )*XK + S + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + RWORK( K ) = RWORK( K ) + ABS( DBLE( A( K, K ) ) )*XK + DO 60 I = K + 1, N + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 60 CONTINUE + RWORK( K ) = RWORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL ZHETRS( UPLO, N, 1, AF, LDAF, IPIV, WORK, N, INFO ) + CALL ZAXPY( N, ONE, WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use ZLACON to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL ZLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A'). +* + CALL ZHETRS( UPLO, N, 1, AF, LDAF, IPIV, WORK, N, INFO ) + DO 110 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 120 CONTINUE + CALL ZHETRS( UPLO, N, 1, AF, LDAF, IPIV, WORK, N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of ZHERFS +* + END diff --git a/costa/native/external/lapack/zhesv.f b/costa/native/external/lapack/zhesv.f new file mode 100644 index 000000000..18fa775c8 --- /dev/null +++ b/costa/native/external/lapack/zhesv.f @@ -0,0 +1,171 @@ + SUBROUTINE ZHESV( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZHESV computes the solution to a complex system of linear equations +* A * X = B, +* where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS +* matrices. +* +* The diagonal pivoting method is used to factor A as +* A = U * D * U**H, if UPLO = 'U', or +* A = L * D * L**H, if UPLO = 'L', +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices, and D is Hermitian and block diagonal with +* 1-by-1 and 2-by-2 diagonal blocks. The factored form of A is then +* used to solve the system of equations A * X = B. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the leading +* N-by-N upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, if INFO = 0, the block diagonal matrix D and the +* multipliers used to obtain the factor U or L from the +* factorization A = U*D*U**H or A = L*D*L**H as computed by +* ZHETRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (output) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D, as +* determined by ZHETRF. If IPIV(k) > 0, then rows and columns +* k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1 +* diagonal block. If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, +* then rows and columns k-1 and -IPIV(k) were interchanged and +* D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If UPLO = 'L' and +* IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and +* -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 +* diagonal block. +* +* B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of WORK. LWORK >= 1, and for best performance +* LWORK >= N*NB, where NB is the optimal blocksize for +* ZHETRF. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, D(i,i) is exactly zero. The factorization +* has been completed, but the block diagonal matrix D is +* exactly singular, so the solution could not be computed. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LWKOPT, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHETRF, ZHETRS +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'ZHETRF', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHESV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Compute the factorization A = U*D*U' or A = L*D*L'. +* + CALL ZHETRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL ZHETRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of ZHESV +* + END diff --git a/costa/native/external/lapack/zhesvx.f b/costa/native/external/lapack/zhesvx.f new file mode 100644 index 000000000..399f14864 --- /dev/null +++ b/costa/native/external/lapack/zhesvx.f @@ -0,0 +1,299 @@ + SUBROUTINE ZHESVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, + $ LDB, X, LDX, RCOND, FERR, BERR, WORK, LWORK, + $ RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER FACT, UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, LWORK, N, NRHS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) + COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* ZHESVX uses the diagonal pivoting factorization to compute the +* solution to a complex system of linear equations A * X = B, +* where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS +* matrices. +* +* Error bounds on the solution and a condition estimate are also +* provided. +* +* Description +* =========== +* +* The following steps are performed: +* +* 1. If FACT = 'N', the diagonal pivoting method is used to factor A. +* The form of the factorization is +* A = U * D * U**H, if UPLO = 'U', or +* A = L * D * L**H, if UPLO = 'L', +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices, and D is Hermitian and block diagonal with +* 1-by-1 and 2-by-2 diagonal blocks. +* +* 2. If some D(i,i)=0, so that D is exactly singular, then the routine +* returns with INFO = i. Otherwise, the factored form of A is used +* to estimate the condition number of the matrix A. If the +* reciprocal of the condition number is less than machine precision, +* INFO = N+1 is returned as a warning, but the routine still goes on +* to solve for X and compute error bounds as described below. +* +* 3. The system of equations is solved for X using the factored form +* of A. +* +* 4. Iterative refinement is applied to improve the computed solution +* matrix and calculate error bounds and backward error estimates +* for it. +* +* Arguments +* ========= +* +* FACT (input) CHARACTER*1 +* Specifies whether or not the factored form of A has been +* supplied on entry. +* = 'F': On entry, AF and IPIV contain the factored form +* of A. A, AF and IPIV will not be modified. +* = 'N': The matrix A will be copied to AF and factored. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input) COMPLEX*16 array, dimension (LDA,N) +* The Hermitian matrix A. If UPLO = 'U', the leading N-by-N +* upper triangular part of A contains the upper triangular part +* of the matrix A, and the strictly lower triangular part of A +* is not referenced. If UPLO = 'L', the leading N-by-N lower +* triangular part of A contains the lower triangular part of +* the matrix A, and the strictly upper triangular part of A is +* not referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* AF (input or output) COMPLEX*16 array, dimension (LDAF,N) +* If FACT = 'F', then AF is an input argument and on entry +* contains the block diagonal matrix D and the multipliers used +* to obtain the factor U or L from the factorization +* A = U*D*U**H or A = L*D*L**H as computed by ZHETRF. +* +* If FACT = 'N', then AF is an output argument and on exit +* returns the block diagonal matrix D and the multipliers used +* to obtain the factor U or L from the factorization +* A = U*D*U**H or A = L*D*L**H. +* +* LDAF (input) INTEGER +* The leading dimension of the array AF. LDAF >= max(1,N). +* +* IPIV (input or output) INTEGER array, dimension (N) +* If FACT = 'F', then IPIV is an input argument and on entry +* contains details of the interchanges and the block structure +* of D, as determined by ZHETRF. +* If IPIV(k) > 0, then rows and columns k and IPIV(k) were +* interchanged and D(k,k) is a 1-by-1 diagonal block. +* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +* +* If FACT = 'N', then IPIV is an output argument and on exit +* contains details of the interchanges and the block structure +* of D, as determined by ZHETRF. +* +* B (input) COMPLEX*16 array, dimension (LDB,NRHS) +* The N-by-NRHS right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (output) COMPLEX*16 array, dimension (LDX,NRHS) +* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* RCOND (output) DOUBLE PRECISION +* The estimate of the reciprocal condition number of the matrix +* A. If RCOND is less than the machine precision (in +* particular, if RCOND = 0), the matrix is singular to working +* precision. This condition is indicated by a return code of +* INFO > 0. +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of WORK. LWORK >= 2*N, and for best performance +* LWORK >= N*NB, where NB is the optimal blocksize for +* ZHETRF. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is +* <= N: D(i,i) is exactly zero. The factorization +* has been completed but the factor D is exactly +* singular, so the solution and error bounds could +* not be computed. RCOND = 0 is returned. +* = N+1: D is nonsingular, but RCOND is less than machine +* precision, meaning that the matrix is singular +* to working precision. Nevertheless, the +* solution and error bounds are computed because +* there are a number of situations where the +* computed solution can be more accurate than the +* value of RCOND would suggest. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, NOFACT + INTEGER LWKOPT, NB + DOUBLE PRECISION ANORM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANHE + EXTERNAL LSAME, ILAENV, DLAMCH, ZLANHE +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHECON, ZHERFS, ZHETRF, ZHETRS, ZLACPY +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -11 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LWORK.LT.MAX( 1, 2*N ) .AND. .NOT.LQUERY ) THEN + INFO = -18 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'ZHETRF', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHESVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* + IF( NOFACT ) THEN +* +* Compute the factorization A = U*D*U' or A = L*D*L'. +* + CALL ZLACPY( UPLO, N, N, A, LDA, AF, LDAF ) + CALL ZHETRF( UPLO, N, AF, LDAF, IPIV, WORK, LWORK, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN + IF( INFO.GT.0 ) + $ RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = ZLANHE( 'I', UPLO, N, A, LDA, RWORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL ZHECON( UPLO, N, AF, LDAF, IPIV, ANORM, RCOND, WORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.DLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* +* Compute the solution vectors X. +* + CALL ZLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL ZHETRS( UPLO, N, NRHS, AF, LDAF, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solutions and +* compute error bounds and backward error estimates for them. +* + CALL ZHERFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, X, + $ LDX, FERR, BERR, WORK, RWORK, INFO ) +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of ZHESVX +* + END diff --git a/costa/native/external/lapack/zhetd2.f b/costa/native/external/lapack/zhetd2.f new file mode 100644 index 000000000..cbf4b3187 --- /dev/null +++ b/costa/native/external/lapack/zhetd2.f @@ -0,0 +1,259 @@ + SUBROUTINE ZHETD2( UPLO, N, A, LDA, D, E, TAU, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ) + COMPLEX*16 A( LDA, * ), TAU( * ) +* .. +* +* Purpose +* ======= +* +* ZHETD2 reduces a complex Hermitian matrix A to real symmetric +* tridiagonal form T by a unitary similarity transformation: +* Q' * A * Q = T. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* Hermitian matrix A is stored: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the leading +* n-by-n upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n-by-n lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* On exit, if UPLO = 'U', the diagonal and first superdiagonal +* of A are overwritten by the corresponding elements of the +* tridiagonal matrix T, and the elements above the first +* superdiagonal, with the array TAU, represent the unitary +* matrix Q as a product of elementary reflectors; if UPLO +* = 'L', the diagonal and first subdiagonal of A are over- +* written by the corresponding elements of the tridiagonal +* matrix T, and the elements below the first subdiagonal, with +* the array TAU, represent the unitary matrix Q as a product +* of elementary reflectors. See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* D (output) DOUBLE PRECISION array, dimension (N) +* The diagonal elements of the tridiagonal matrix T: +* D(i) = A(i,i). +* +* E (output) DOUBLE PRECISION array, dimension (N-1) +* The off-diagonal elements of the tridiagonal matrix T: +* E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. +* +* TAU (output) COMPLEX*16 array, dimension (N-1) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* If UPLO = 'U', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(n-1) . . . H(2) H(1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in +* A(1:i-1,i+1), and tau in TAU(i). +* +* If UPLO = 'L', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(1) H(2) . . . H(n-1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), +* and tau in TAU(i). +* +* The contents of A on exit are illustrated by the following examples +* with n = 5: +* +* if UPLO = 'U': if UPLO = 'L': +* +* ( d e v2 v3 v4 ) ( d ) +* ( d e v3 v4 ) ( e d ) +* ( d e v4 ) ( v1 e d ) +* ( d e ) ( v1 v2 e d ) +* ( d ) ( v1 v2 v3 e d ) +* +* where d and e denote diagonal and off-diagonal elements of T, and vi +* denotes an element of the vector defining H(i). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO, HALF + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ), + $ HALF = ( 0.5D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I + COMPLEX*16 ALPHA, TAUI +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZHEMV, ZHER2, ZLARFG +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX*16 ZDOTC + EXTERNAL LSAME, ZDOTC +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETD2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Reduce the upper triangle of A +* + A( N, N ) = DBLE( A( N, N ) ) + DO 10 I = N - 1, 1, -1 +* +* Generate elementary reflector H(i) = I - tau * v * v' +* to annihilate A(1:i-1,i+1) +* + ALPHA = A( I, I+1 ) + CALL ZLARFG( I, ALPHA, A( 1, I+1 ), 1, TAUI ) + E( I ) = ALPHA +* + IF( TAUI.NE.ZERO ) THEN +* +* Apply H(i) from both sides to A(1:i,1:i) +* + A( I, I+1 ) = ONE +* +* Compute x := tau * A * v storing x in TAU(1:i) +* + CALL ZHEMV( UPLO, I, TAUI, A, LDA, A( 1, I+1 ), 1, ZERO, + $ TAU, 1 ) +* +* Compute w := x - 1/2 * tau * (x'*v) * v +* + ALPHA = -HALF*TAUI*ZDOTC( I, TAU, 1, A( 1, I+1 ), 1 ) + CALL ZAXPY( I, ALPHA, A( 1, I+1 ), 1, TAU, 1 ) +* +* Apply the transformation as a rank-2 update: +* A := A - v * w' - w * v' +* + CALL ZHER2( UPLO, I, -ONE, A( 1, I+1 ), 1, TAU, 1, A, + $ LDA ) +* + ELSE + A( I, I ) = DBLE( A( I, I ) ) + END IF + A( I, I+1 ) = E( I ) + D( I+1 ) = A( I+1, I+1 ) + TAU( I ) = TAUI + 10 CONTINUE + D( 1 ) = A( 1, 1 ) + ELSE +* +* Reduce the lower triangle of A +* + A( 1, 1 ) = DBLE( A( 1, 1 ) ) + DO 20 I = 1, N - 1 +* +* Generate elementary reflector H(i) = I - tau * v * v' +* to annihilate A(i+2:n,i) +* + ALPHA = A( I+1, I ) + CALL ZLARFG( N-I, ALPHA, A( MIN( I+2, N ), I ), 1, TAUI ) + E( I ) = ALPHA +* + IF( TAUI.NE.ZERO ) THEN +* +* Apply H(i) from both sides to A(i+1:n,i+1:n) +* + A( I+1, I ) = ONE +* +* Compute x := tau * A * v storing y in TAU(i:n-1) +* + CALL ZHEMV( UPLO, N-I, TAUI, A( I+1, I+1 ), LDA, + $ A( I+1, I ), 1, ZERO, TAU( I ), 1 ) +* +* Compute w := x - 1/2 * tau * (x'*v) * v +* + ALPHA = -HALF*TAUI*ZDOTC( N-I, TAU( I ), 1, A( I+1, I ), + $ 1 ) + CALL ZAXPY( N-I, ALPHA, A( I+1, I ), 1, TAU( I ), 1 ) +* +* Apply the transformation as a rank-2 update: +* A := A - v * w' - w * v' +* + CALL ZHER2( UPLO, N-I, -ONE, A( I+1, I ), 1, TAU( I ), 1, + $ A( I+1, I+1 ), LDA ) +* + ELSE + A( I+1, I+1 ) = DBLE( A( I+1, I+1 ) ) + END IF + A( I+1, I ) = E( I ) + D( I ) = A( I, I ) + TAU( I ) = TAUI + 20 CONTINUE + D( N ) = A( N, N ) + END IF +* + RETURN +* +* End of ZHETD2 +* + END diff --git a/costa/native/external/lapack/zhetf2.f b/costa/native/external/lapack/zhetf2.f new file mode 100644 index 000000000..7a0fb8b11 --- /dev/null +++ b/costa/native/external/lapack/zhetf2.f @@ -0,0 +1,546 @@ + SUBROUTINE ZHETF2( UPLO, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* ZHETF2 computes the factorization of a complex Hermitian matrix A +* using the Bunch-Kaufman diagonal pivoting method: +* +* A = U*D*U' or A = L*D*L' +* +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices, U' is the conjugate transpose of U, and D is +* Hermitian and block diagonal with 1-by-1 and 2-by-2 diagonal blocks. +* +* This is the unblocked version of the algorithm, calling Level 2 BLAS. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* Hermitian matrix A is stored: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the leading +* n-by-n upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n-by-n lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, the block diagonal matrix D and the multipliers used +* to obtain the factor U or L (see below for further details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (output) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D. +* If IPIV(k) > 0, then rows and columns k and IPIV(k) were +* interchanged and D(k,k) is a 1-by-1 diagonal block. +* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* > 0: if INFO = k, D(k,k) is exactly zero. The factorization +* has been completed, but the block diagonal matrix D is +* exactly singular, and division by zero will occur if it +* is used to solve a system of equations. +* +* Further Details +* =============== +* +* 1-96 - Based on modifications by +* J. Lewis, Boeing Computer Services Company +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* If UPLO = 'U', then A = U*D*U', where +* U = P(n)*U(n)* ... *P(k)U(k)* ..., +* i.e., U is a product of terms P(k)*U(k), where k decreases from n to +* 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +* defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +* that if the diagonal block D(k) is of order s (s = 1 or 2), then +* +* ( I v 0 ) k-s +* U(k) = ( 0 I 0 ) s +* ( 0 0 I ) n-k +* k-s s n-k +* +* If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +* If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +* and A(k,k), and v overwrites A(1:k-2,k-1:k). +* +* If UPLO = 'L', then A = L*D*L', where +* L = P(1)*L(1)* ... *P(k)*L(k)* ..., +* i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +* n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +* defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +* that if the diagonal block D(k) is of order s (s = 1 or 2), then +* +* ( I 0 0 ) k-1 +* L(k) = ( 0 I 0 ) s +* ( 0 v I ) n-k-s+1 +* k-1 s n-k-s+1 +* +* If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +* If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +* and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0D+0, SEVTEN = 17.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IMAX, J, JMAX, K, KK, KP, KSTEP + DOUBLE PRECISION ABSAKK, ALPHA, COLMAX, D, D11, D22, R1, ROWMAX, + $ TT + COMPLEX*16 D12, D21, T, WK, WKM1, WKP1, ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX + DOUBLE PRECISION DLAPY2 + EXTERNAL LSAME, IZAMAX, DLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDSCAL, ZHER, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX, SQRT +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETF2', -INFO ) + RETURN + END IF +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U' using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2 +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 90 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( DBLE( A( K, K ) ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.GT.1 ) THEN + IMAX = IZAMAX( K-1, A( 1, K ), 1 ) + COLMAX = CABS1( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + A( K, K ) = DBLE( A( K, K ) ) + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = IMAX + IZAMAX( K-IMAX, A( IMAX, IMAX+1 ), LDA ) + ROWMAX = CABS1( A( IMAX, JMAX ) ) + IF( IMAX.GT.1 ) THEN + JMAX = IZAMAX( IMAX-1, A( 1, IMAX ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( A( JMAX, IMAX ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( DBLE( A( IMAX, IMAX ) ) ).GE.ALPHA*ROWMAX ) + $ THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K-1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K - KSTEP + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the leading +* submatrix A(1:k,1:k) +* + CALL ZSWAP( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 ) + DO 20 J = KP + 1, KK - 1 + T = DCONJG( A( J, KK ) ) + A( J, KK ) = DCONJG( A( KP, J ) ) + A( KP, J ) = T + 20 CONTINUE + A( KP, KK ) = DCONJG( A( KP, KK ) ) + R1 = DBLE( A( KK, KK ) ) + A( KK, KK ) = DBLE( A( KP, KP ) ) + A( KP, KP ) = R1 + IF( KSTEP.EQ.2 ) THEN + A( K, K ) = DBLE( A( K, K ) ) + T = A( K-1, K ) + A( K-1, K ) = A( KP, K ) + A( KP, K ) = T + END IF + ELSE + A( K, K ) = DBLE( A( K, K ) ) + IF( KSTEP.EQ.2 ) + $ A( K-1, K-1 ) = DBLE( A( K-1, K-1 ) ) + END IF +* +* Update the leading submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* +* Perform a rank-1 update of A(1:k-1,1:k-1) as +* +* A := A - U(k)*D(k)*U(k)' = A - W(k)*1/D(k)*W(k)' +* + R1 = ONE / DBLE( A( K, K ) ) + CALL ZHER( UPLO, K-1, -R1, A( 1, K ), 1, A, LDA ) +* +* Store U(k) in column k +* + CALL ZDSCAL( K-1, R1, A( 1, K ), 1 ) + ELSE +* +* 2-by-2 pivot block D(k): columns k and k-1 now hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* Perform a rank-2 update of A(1:k-2,1:k-2) as +* +* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )' +* = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )' +* + IF( K.GT.2 ) THEN +* + D = DLAPY2( DBLE( A( K-1, K ) ), + $ DIMAG( A( K-1, K ) ) ) + D22 = DBLE( A( K-1, K-1 ) ) / D + D11 = DBLE( A( K, K ) ) / D + TT = ONE / ( D11*D22-ONE ) + D12 = A( K-1, K ) / D + D = TT / D +* + DO 40 J = K - 2, 1, -1 + WKM1 = D*( D11*A( J, K-1 )-DCONJG( D12 )* + $ A( J, K ) ) + WK = D*( D22*A( J, K )-D12*A( J, K-1 ) ) + DO 30 I = J, 1, -1 + A( I, J ) = A( I, J ) - A( I, K )*DCONJG( WK ) - + $ A( I, K-1 )*DCONJG( WKM1 ) + 30 CONTINUE + A( J, K ) = WK + A( J, K-1 ) = WKM1 + A( J, J ) = DCMPLX( DBLE( A( J, J ) ), 0.0D+0 ) + 40 CONTINUE +* + END IF +* + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L' using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2 +* + K = 1 + 50 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 90 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( DBLE( A( K, K ) ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.LT.N ) THEN + IMAX = K + IZAMAX( N-K, A( K+1, K ), 1 ) + COLMAX = CABS1( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + A( K, K ) = DBLE( A( K, K ) ) + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = K - 1 + IZAMAX( IMAX-K, A( IMAX, K ), LDA ) + ROWMAX = CABS1( A( IMAX, JMAX ) ) + IF( IMAX.LT.N ) THEN + JMAX = IMAX + IZAMAX( N-IMAX, A( IMAX+1, IMAX ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( A( JMAX, IMAX ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( DBLE( A( IMAX, IMAX ) ) ).GE.ALPHA*ROWMAX ) + $ THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K+1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K + KSTEP - 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the trailing +* submatrix A(k:n,k:n) +* + IF( KP.LT.N ) + $ CALL ZSWAP( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 ) + DO 60 J = KK + 1, KP - 1 + T = DCONJG( A( J, KK ) ) + A( J, KK ) = DCONJG( A( KP, J ) ) + A( KP, J ) = T + 60 CONTINUE + A( KP, KK ) = DCONJG( A( KP, KK ) ) + R1 = DBLE( A( KK, KK ) ) + A( KK, KK ) = DBLE( A( KP, KP ) ) + A( KP, KP ) = R1 + IF( KSTEP.EQ.2 ) THEN + A( K, K ) = DBLE( A( K, K ) ) + T = A( K+1, K ) + A( K+1, K ) = A( KP, K ) + A( KP, K ) = T + END IF + ELSE + A( K, K ) = DBLE( A( K, K ) ) + IF( KSTEP.EQ.2 ) + $ A( K+1, K+1 ) = DBLE( A( K+1, K+1 ) ) + END IF +* +* Update the trailing submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* + IF( K.LT.N ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* +* A := A - L(k)*D(k)*L(k)' = A - W(k)*(1/D(k))*W(k)' +* + R1 = ONE / DBLE( A( K, K ) ) + CALL ZHER( UPLO, N-K, -R1, A( K+1, K ), 1, + $ A( K+1, K+1 ), LDA ) +* +* Store L(k) in column K +* + CALL ZDSCAL( N-K, R1, A( K+1, K ), 1 ) + END IF + ELSE +* +* 2-by-2 pivot block D(k) +* + IF( K.LT.N-1 ) THEN +* +* Perform a rank-2 update of A(k+2:n,k+2:n) as +* +* A := A - ( L(k) L(k+1) )*D(k)*( L(k) L(k+1) )' +* = A - ( W(k) W(k+1) )*inv(D(k))*( W(k) W(k+1) )' +* +* where L(k) and L(k+1) are the k-th and (k+1)-th +* columns of L +* + D = DLAPY2( DBLE( A( K+1, K ) ), + $ DIMAG( A( K+1, K ) ) ) + D11 = DBLE( A( K+1, K+1 ) ) / D + D22 = DBLE( A( K, K ) ) / D + TT = ONE / ( D11*D22-ONE ) + D21 = A( K+1, K ) / D + D = TT / D +* + DO 80 J = K + 2, N + WK = D*( D11*A( J, K )-D21*A( J, K+1 ) ) + WKP1 = D*( D22*A( J, K+1 )-DCONJG( D21 )* + $ A( J, K ) ) + DO 70 I = J, N + A( I, J ) = A( I, J ) - A( I, K )*DCONJG( WK ) - + $ A( I, K+1 )*DCONJG( WKP1 ) + 70 CONTINUE + A( J, K ) = WK + A( J, K+1 ) = WKP1 + A( J, J ) = DCMPLX( DBLE( A( J, J ) ), 0.0D+0 ) + 80 CONTINUE + END IF + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 50 +* + END IF +* + 90 CONTINUE + RETURN +* +* End of ZHETF2 +* + END diff --git a/costa/native/external/lapack/zhetrd.f b/costa/native/external/lapack/zhetrd.f new file mode 100644 index 000000000..bc310c090 --- /dev/null +++ b/costa/native/external/lapack/zhetrd.f @@ -0,0 +1,297 @@ + SUBROUTINE ZHETRD( UPLO, N, A, LDA, D, E, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ) + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZHETRD reduces a complex Hermitian matrix A to real symmetric +* tridiagonal form T by a unitary similarity transformation: +* Q**H * A * Q = T. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the leading +* N-by-N upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* On exit, if UPLO = 'U', the diagonal and first superdiagonal +* of A are overwritten by the corresponding elements of the +* tridiagonal matrix T, and the elements above the first +* superdiagonal, with the array TAU, represent the unitary +* matrix Q as a product of elementary reflectors; if UPLO +* = 'L', the diagonal and first subdiagonal of A are over- +* written by the corresponding elements of the tridiagonal +* matrix T, and the elements below the first subdiagonal, with +* the array TAU, represent the unitary matrix Q as a product +* of elementary reflectors. See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* D (output) DOUBLE PRECISION array, dimension (N) +* The diagonal elements of the tridiagonal matrix T: +* D(i) = A(i,i). +* +* E (output) DOUBLE PRECISION array, dimension (N-1) +* The off-diagonal elements of the tridiagonal matrix T: +* E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. +* +* TAU (output) COMPLEX*16 array, dimension (N-1) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= 1. +* For optimum performance LWORK >= N*NB, where NB is the +* optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* If UPLO = 'U', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(n-1) . . . H(2) H(1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in +* A(1:i-1,i+1), and tau in TAU(i). +* +* If UPLO = 'L', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(1) H(2) . . . H(n-1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), +* and tau in TAU(i). +* +* The contents of A on exit are illustrated by the following examples +* with n = 5: +* +* if UPLO = 'U': if UPLO = 'L': +* +* ( d e v2 v3 v4 ) ( d ) +* ( d e v3 v4 ) ( e d ) +* ( d e v4 ) ( v1 e d ) +* ( d e ) ( v1 v2 e d ) +* ( d ) ( v1 v2 v3 e d ) +* +* where d and e denote diagonal and off-diagonal elements of T, and vi +* denotes an element of the vector defining H(i). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER I, IINFO, IWS, J, KK, LDWORK, LWKOPT, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHER2K, ZHETD2, ZLATRD +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. +* + NB = ILAENV( 1, 'ZHETRD', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETRD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NX = N + IWS = 1 + IF( NB.GT.1 .AND. NB.LT.N ) THEN +* +* Determine when to cross over from blocked to unblocked code +* (last block is always handled by unblocked code). +* + NX = MAX( NB, ILAENV( 3, 'ZHETRD', UPLO, N, -1, -1, -1 ) ) + IF( NX.LT.N ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: determine the +* minimum value of NB, and reduce NB or force use of +* unblocked code by setting NX = N. +* + NB = MAX( LWORK / LDWORK, 1 ) + NBMIN = ILAENV( 2, 'ZHETRD', UPLO, N, -1, -1, -1 ) + IF( NB.LT.NBMIN ) + $ NX = N + END IF + ELSE + NX = N + END IF + ELSE + NB = 1 + END IF +* + IF( UPPER ) THEN +* +* Reduce the upper triangle of A. +* Columns 1:kk are handled by the unblocked method. +* + KK = N - ( ( N-NX+NB-1 ) / NB )*NB + DO 20 I = N - NB + 1, KK + 1, -NB +* +* Reduce columns i:i+nb-1 to tridiagonal form and form the +* matrix W which is needed to update the unreduced part of +* the matrix +* + CALL ZLATRD( UPLO, I+NB-1, NB, A, LDA, E, TAU, WORK, + $ LDWORK ) +* +* Update the unreduced submatrix A(1:i-1,1:i-1), using an +* update of the form: A := A - V*W' - W*V' +* + CALL ZHER2K( UPLO, 'No transpose', I-1, NB, -CONE, + $ A( 1, I ), LDA, WORK, LDWORK, ONE, A, LDA ) +* +* Copy superdiagonal elements back into A, and diagonal +* elements into D +* + DO 10 J = I, I + NB - 1 + A( J-1, J ) = E( J-1 ) + D( J ) = A( J, J ) + 10 CONTINUE + 20 CONTINUE +* +* Use unblocked code to reduce the last or only block +* + CALL ZHETD2( UPLO, KK, A, LDA, D, E, TAU, IINFO ) + ELSE +* +* Reduce the lower triangle of A +* + DO 40 I = 1, N - NX, NB +* +* Reduce columns i:i+nb-1 to tridiagonal form and form the +* matrix W which is needed to update the unreduced part of +* the matrix +* + CALL ZLATRD( UPLO, N-I+1, NB, A( I, I ), LDA, E( I ), + $ TAU( I ), WORK, LDWORK ) +* +* Update the unreduced submatrix A(i+nb:n,i+nb:n), using +* an update of the form: A := A - V*W' - W*V' +* + CALL ZHER2K( UPLO, 'No transpose', N-I-NB+1, NB, -CONE, + $ A( I+NB, I ), LDA, WORK( NB+1 ), LDWORK, ONE, + $ A( I+NB, I+NB ), LDA ) +* +* Copy subdiagonal elements back into A, and diagonal +* elements into D +* + DO 30 J = I, I + NB - 1 + A( J+1, J ) = E( J ) + D( J ) = A( J, J ) + 30 CONTINUE + 40 CONTINUE +* +* Use unblocked code to reduce the last or only block +* + CALL ZHETD2( UPLO, N-I+1, A( I, I ), LDA, D( I ), E( I ), + $ TAU( I ), IINFO ) + END IF +* + WORK( 1 ) = LWKOPT + RETURN +* +* End of ZHETRD +* + END diff --git a/costa/native/external/lapack/zhetrf.f b/costa/native/external/lapack/zhetrf.f new file mode 100644 index 000000000..562655b11 --- /dev/null +++ b/costa/native/external/lapack/zhetrf.f @@ -0,0 +1,282 @@ + SUBROUTINE ZHETRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZHETRF computes the factorization of a complex Hermitian matrix A +* using the Bunch-Kaufman diagonal pivoting method. The form of the +* factorization is +* +* A = U*D*U**H or A = L*D*L**H +* +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices, and D is Hermitian and block diagonal with +* 1-by-1 and 2-by-2 diagonal blocks. +* +* This is the blocked version of the algorithm, calling Level 3 BLAS. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the leading +* N-by-N upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, the block diagonal matrix D and the multipliers used +* to obtain the factor U or L (see below for further details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (output) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D. +* If IPIV(k) > 0, then rows and columns k and IPIV(k) were +* interchanged and D(k,k) is a 1-by-1 diagonal block. +* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of WORK. LWORK >=1. For best performance +* LWORK >= N*NB, where NB is the block size returned by ILAENV. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, D(i,i) is exactly zero. The factorization +* has been completed, but the block diagonal matrix D is +* exactly singular, and division by zero will occur if it +* is used to solve a system of equations. +* +* Further Details +* =============== +* +* If UPLO = 'U', then A = U*D*U', where +* U = P(n)*U(n)* ... *P(k)U(k)* ..., +* i.e., U is a product of terms P(k)*U(k), where k decreases from n to +* 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +* defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +* that if the diagonal block D(k) is of order s (s = 1 or 2), then +* +* ( I v 0 ) k-s +* U(k) = ( 0 I 0 ) s +* ( 0 0 I ) n-k +* k-s s n-k +* +* If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +* If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +* and A(k,k), and v overwrites A(1:k-2,k-1:k). +* +* If UPLO = 'L', then A = L*D*L', where +* L = P(1)*L(1)* ... *P(k)*L(k)* ..., +* i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +* n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +* defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +* that if the diagonal block D(k) is of order s (s = 1 or 2), then +* +* ( I 0 0 ) k-1 +* L(k) = ( 0 I 0 ) s +* ( 0 v I ) n-k-s+1 +* k-1 s n-k-s+1 +* +* If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +* If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +* and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER IINFO, IWS, J, K, KB, LDWORK, LWKOPT, NB, NBMIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHETF2, ZLAHEF +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size +* + NB = ILAENV( 1, 'ZHETRF', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETRF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* + NBMIN = 2 + LDWORK = N + IF( NB.GT.1 .AND. NB.LT.N ) THEN + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN + NB = MAX( LWORK / LDWORK, 1 ) + NBMIN = MAX( 2, ILAENV( 2, 'ZHETRF', UPLO, N, -1, -1, -1 ) ) + END IF + ELSE + IWS = 1 + END IF + IF( NB.LT.NBMIN ) + $ NB = N +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U' using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* KB, where KB is the number of columns factorized by ZLAHEF; +* KB is either NB or NB-1, or K for the last block +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 40 +* + IF( K.GT.NB ) THEN +* +* Factorize columns k-kb+1:k of A and use blocked code to +* update columns 1:k-kb +* + CALL ZLAHEF( UPLO, K, NB, KB, A, LDA, IPIV, WORK, N, IINFO ) + ELSE +* +* Use unblocked code to factorize columns 1:k of A +* + CALL ZHETF2( UPLO, K, A, LDA, IPIV, IINFO ) + KB = K + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO +* +* Decrease K and return to the start of the main loop +* + K = K - KB + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L' using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* KB, where KB is the number of columns factorized by ZLAHEF; +* KB is either NB or NB-1, or N-K+1 for the last block +* + K = 1 + 20 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 40 +* + IF( K.LE.N-NB ) THEN +* +* Factorize columns k:k+kb-1 of A and use blocked code to +* update columns k+kb:n +* + CALL ZLAHEF( UPLO, N-K+1, NB, KB, A( K, K ), LDA, IPIV( K ), + $ WORK, N, IINFO ) + ELSE +* +* Use unblocked code to factorize columns k:n of A +* + CALL ZHETF2( UPLO, N-K+1, A( K, K ), LDA, IPIV( K ), IINFO ) + KB = N - K + 1 + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + K - 1 +* +* Adjust IPIV +* + DO 30 J = K, K + KB - 1 + IF( IPIV( J ).GT.0 ) THEN + IPIV( J ) = IPIV( J ) + K - 1 + ELSE + IPIV( J ) = IPIV( J ) - K + 1 + END IF + 30 CONTINUE +* +* Increase K and return to the start of the main loop +* + K = K + KB + GO TO 20 +* + END IF +* + 40 CONTINUE + WORK( 1 ) = LWKOPT + RETURN +* +* End of ZHETRF +* + END diff --git a/costa/native/external/lapack/zhetri.f b/costa/native/external/lapack/zhetri.f new file mode 100644 index 000000000..4115937fb --- /dev/null +++ b/costa/native/external/lapack/zhetri.f @@ -0,0 +1,328 @@ + SUBROUTINE ZHETRI( UPLO, N, A, LDA, IPIV, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZHETRI computes the inverse of a complex Hermitian indefinite matrix +* A using the factorization A = U*D*U**H or A = L*D*L**H computed by +* ZHETRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the details of the factorization are stored +* as an upper or lower triangular matrix. +* = 'U': Upper triangular, form is A = U*D*U**H; +* = 'L': Lower triangular, form is A = L*D*L**H. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the block diagonal matrix D and the multipliers +* used to obtain the factor U or L as computed by ZHETRF. +* +* On exit, if INFO = 0, the (Hermitian) inverse of the original +* matrix. If UPLO = 'U', the upper triangular part of the +* inverse is formed and the part of A below the diagonal is not +* referenced; if UPLO = 'L' the lower triangular part of the +* inverse is formed and the part of A above the diagonal is +* not referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by ZHETRF. +* +* WORK (workspace) COMPLEX*16 array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +* inverse could not be computed. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + COMPLEX*16 CONE, ZERO + PARAMETER ( ONE = 1.0D+0, CONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KP, KSTEP + DOUBLE PRECISION AK, AKP1, D, T + COMPLEX*16 AKKP1, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX*16 ZDOTC + EXTERNAL LSAME, ZDOTC +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZCOPY, ZHEMV, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO 10 INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO 20 INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 20 CONTINUE + END IF + INFO = 0 +* + IF( UPPER ) THEN +* +* Compute inv(A) from the factorization A = U*D*U'. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 30 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + A( K, K ) = ONE / DBLE( A( K, K ) ) +* +* Compute column K of the inverse. +* + IF( K.GT.1 ) THEN + CALL ZCOPY( K-1, A( 1, K ), 1, WORK, 1 ) + CALL ZHEMV( UPLO, K-1, -CONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K ), 1 ) + A( K, K ) = A( K, K ) - DBLE( ZDOTC( K-1, WORK, 1, A( 1, + $ K ), 1 ) ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = ABS( A( K, K+1 ) ) + AK = DBLE( A( K, K ) ) / T + AKP1 = DBLE( A( K+1, K+1 ) ) / T + AKKP1 = A( K, K+1 ) / T + D = T*( AK*AKP1-ONE ) + A( K, K ) = AKP1 / D + A( K+1, K+1 ) = AK / D + A( K, K+1 ) = -AKKP1 / D +* +* Compute columns K and K+1 of the inverse. +* + IF( K.GT.1 ) THEN + CALL ZCOPY( K-1, A( 1, K ), 1, WORK, 1 ) + CALL ZHEMV( UPLO, K-1, -CONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K ), 1 ) + A( K, K ) = A( K, K ) - DBLE( ZDOTC( K-1, WORK, 1, A( 1, + $ K ), 1 ) ) + A( K, K+1 ) = A( K, K+1 ) - + $ ZDOTC( K-1, A( 1, K ), 1, A( 1, K+1 ), 1 ) + CALL ZCOPY( K-1, A( 1, K+1 ), 1, WORK, 1 ) + CALL ZHEMV( UPLO, K-1, -CONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K+1 ), 1 ) + A( K+1, K+1 ) = A( K+1, K+1 ) - + $ DBLE( ZDOTC( K-1, WORK, 1, A( 1, K+1 ), + $ 1 ) ) + END IF + KSTEP = 2 + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the leading +* submatrix A(1:k+1,1:k+1) +* + CALL ZSWAP( KP-1, A( 1, K ), 1, A( 1, KP ), 1 ) + DO 40 J = KP + 1, K - 1 + TEMP = DCONJG( A( J, K ) ) + A( J, K ) = DCONJG( A( KP, J ) ) + A( KP, J ) = TEMP + 40 CONTINUE + A( KP, K ) = DCONJG( A( KP, K ) ) + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = A( K, K+1 ) + A( K, K+1 ) = A( KP, K+1 ) + A( KP, K+1 ) = TEMP + END IF + END IF +* + K = K + KSTEP + GO TO 30 + 50 CONTINUE +* + ELSE +* +* Compute inv(A) from the factorization A = L*D*L'. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 60 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 80 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + A( K, K ) = ONE / DBLE( A( K, K ) ) +* +* Compute column K of the inverse. +* + IF( K.LT.N ) THEN + CALL ZCOPY( N-K, A( K+1, K ), 1, WORK, 1 ) + CALL ZHEMV( UPLO, N-K, -CONE, A( K+1, K+1 ), LDA, WORK, + $ 1, ZERO, A( K+1, K ), 1 ) + A( K, K ) = A( K, K ) - DBLE( ZDOTC( N-K, WORK, 1, + $ A( K+1, K ), 1 ) ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = ABS( A( K, K-1 ) ) + AK = DBLE( A( K-1, K-1 ) ) / T + AKP1 = DBLE( A( K, K ) ) / T + AKKP1 = A( K, K-1 ) / T + D = T*( AK*AKP1-ONE ) + A( K-1, K-1 ) = AKP1 / D + A( K, K ) = AK / D + A( K, K-1 ) = -AKKP1 / D +* +* Compute columns K-1 and K of the inverse. +* + IF( K.LT.N ) THEN + CALL ZCOPY( N-K, A( K+1, K ), 1, WORK, 1 ) + CALL ZHEMV( UPLO, N-K, -CONE, A( K+1, K+1 ), LDA, WORK, + $ 1, ZERO, A( K+1, K ), 1 ) + A( K, K ) = A( K, K ) - DBLE( ZDOTC( N-K, WORK, 1, + $ A( K+1, K ), 1 ) ) + A( K, K-1 ) = A( K, K-1 ) - + $ ZDOTC( N-K, A( K+1, K ), 1, A( K+1, K-1 ), + $ 1 ) + CALL ZCOPY( N-K, A( K+1, K-1 ), 1, WORK, 1 ) + CALL ZHEMV( UPLO, N-K, -CONE, A( K+1, K+1 ), LDA, WORK, + $ 1, ZERO, A( K+1, K-1 ), 1 ) + A( K-1, K-1 ) = A( K-1, K-1 ) - + $ DBLE( ZDOTC( N-K, WORK, 1, A( K+1, K-1 ), + $ 1 ) ) + END IF + KSTEP = 2 + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the trailing +* submatrix A(k-1:n,k-1:n) +* + IF( KP.LT.N ) + $ CALL ZSWAP( N-KP, A( KP+1, K ), 1, A( KP+1, KP ), 1 ) + DO 70 J = K + 1, KP - 1 + TEMP = DCONJG( A( J, K ) ) + A( J, K ) = DCONJG( A( KP, J ) ) + A( KP, J ) = TEMP + 70 CONTINUE + A( KP, K ) = DCONJG( A( KP, K ) ) + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = A( K, K-1 ) + A( K, K-1 ) = A( KP, K-1 ) + A( KP, K-1 ) = TEMP + END IF + END IF +* + K = K - KSTEP + GO TO 60 + 80 CONTINUE + END IF +* + RETURN +* +* End of ZHETRI +* + END diff --git a/costa/native/external/lapack/zhetrs.f b/costa/native/external/lapack/zhetrs.f new file mode 100644 index 000000000..dd416f35d --- /dev/null +++ b/costa/native/external/lapack/zhetrs.f @@ -0,0 +1,394 @@ + SUBROUTINE ZHETRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* ZHETRS solves a system of linear equations A*X = B with a complex +* Hermitian matrix A using the factorization A = U*D*U**H or +* A = L*D*L**H computed by ZHETRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the details of the factorization are stored +* as an upper or lower triangular matrix. +* = 'U': Upper triangular, form is A = U*D*U**H; +* = 'L': Lower triangular, form is A = L*D*L**H. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* A (input) COMPLEX*16 array, dimension (LDA,N) +* The block diagonal matrix D and the multipliers used to +* obtain the factor U or L as computed by ZHETRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by ZHETRF. +* +* B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KP + DOUBLE PRECISION S + COMPLEX*16 AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDSCAL, ZGEMV, ZGERU, ZLACGV, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCONJG, MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*D*U'. +* +* First solve U*D*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 30 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL ZGERU( K-1, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + S = DBLE( ONE ) / DBLE( A( K, K ) ) + CALL ZDSCAL( NRHS, S, B( K, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K-1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K-1 ) + $ CALL ZSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in columns K-1 and K of A. +* + CALL ZGERU( K-2, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) + CALL ZGERU( K-2, NRHS, -ONE, A( 1, K-1 ), 1, B( K-1, 1 ), + $ LDB, B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = A( K-1, K ) + AKM1 = A( K-1, K-1 ) / AKM1K + AK = A( K, K ) / DCONJG( AKM1K ) + DENOM = AKM1*AK - ONE + DO 20 J = 1, NRHS + BKM1 = B( K-1, J ) / AKM1K + BK = B( K, J ) / DCONJG( AKM1K ) + B( K-1, J ) = ( AK*BKM1-BK ) / DENOM + B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM + 20 CONTINUE + K = K - 2 + END IF +* + GO TO 10 + 30 CONTINUE +* +* Next solve U'*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 40 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(U'(K)), where U(K) is the transformation +* stored in column K of A. +* + IF( K.GT.1 ) THEN + CALL ZLACGV( NRHS, B( K, 1 ), LDB ) + CALL ZGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B, + $ LDB, A( 1, K ), 1, ONE, B( K, 1 ), LDB ) + CALL ZLACGV( NRHS, B( K, 1 ), LDB ) + END IF +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(U'(K+1)), where U(K+1) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.GT.1 ) THEN + CALL ZLACGV( NRHS, B( K, 1 ), LDB ) + CALL ZGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B, + $ LDB, A( 1, K ), 1, ONE, B( K, 1 ), LDB ) + CALL ZLACGV( NRHS, B( K, 1 ), LDB ) +* + CALL ZLACGV( NRHS, B( K+1, 1 ), LDB ) + CALL ZGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B, + $ LDB, A( 1, K+1 ), 1, ONE, B( K+1, 1 ), LDB ) + CALL ZLACGV( NRHS, B( K+1, 1 ), LDB ) + END IF +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K + 2 + END IF +* + GO TO 40 + 50 CONTINUE +* + ELSE +* +* Solve A*X = B, where A = L*D*L'. +* +* First solve L*D*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 60 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 80 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL ZGERU( N-K, NRHS, -ONE, A( K+1, K ), 1, B( K, 1 ), + $ LDB, B( K+1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + S = DBLE( ONE ) / DBLE( A( K, K ) ) + CALL ZDSCAL( NRHS, S, B( K, 1 ), LDB ) + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K+1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K+1 ) + $ CALL ZSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.LT.N-1 ) THEN + CALL ZGERU( N-K-1, NRHS, -ONE, A( K+2, K ), 1, B( K, 1 ), + $ LDB, B( K+2, 1 ), LDB ) + CALL ZGERU( N-K-1, NRHS, -ONE, A( K+2, K+1 ), 1, + $ B( K+1, 1 ), LDB, B( K+2, 1 ), LDB ) + END IF +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = A( K+1, K ) + AKM1 = A( K, K ) / DCONJG( AKM1K ) + AK = A( K+1, K+1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 70 J = 1, NRHS + BKM1 = B( K, J ) / DCONJG( AKM1K ) + BK = B( K+1, J ) / AKM1K + B( K, J ) = ( AK*BKM1-BK ) / DENOM + B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + 70 CONTINUE + K = K + 2 + END IF +* + GO TO 60 + 80 CONTINUE +* +* Next solve L'*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 90 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 100 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(L'(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) THEN + CALL ZLACGV( NRHS, B( K, 1 ), LDB ) + CALL ZGEMV( 'Conjugate transpose', N-K, NRHS, -ONE, + $ B( K+1, 1 ), LDB, A( K+1, K ), 1, ONE, + $ B( K, 1 ), LDB ) + CALL ZLACGV( NRHS, B( K, 1 ), LDB ) + END IF +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(L'(K-1)), where L(K-1) is the transformation +* stored in columns K-1 and K of A. +* + IF( K.LT.N ) THEN + CALL ZLACGV( NRHS, B( K, 1 ), LDB ) + CALL ZGEMV( 'Conjugate transpose', N-K, NRHS, -ONE, + $ B( K+1, 1 ), LDB, A( K+1, K ), 1, ONE, + $ B( K, 1 ), LDB ) + CALL ZLACGV( NRHS, B( K, 1 ), LDB ) +* + CALL ZLACGV( NRHS, B( K-1, 1 ), LDB ) + CALL ZGEMV( 'Conjugate transpose', N-K, NRHS, -ONE, + $ B( K+1, 1 ), LDB, A( K+1, K-1 ), 1, ONE, + $ B( K-1, 1 ), LDB ) + CALL ZLACGV( NRHS, B( K-1, 1 ), LDB ) + END IF +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 2 + END IF +* + GO TO 90 + 100 CONTINUE + END IF +* + RETURN +* +* End of ZHETRS +* + END diff --git a/costa/native/external/lapack/zhgeqz.f b/costa/native/external/lapack/zhgeqz.f new file mode 100644 index 000000000..1e29d9882 --- /dev/null +++ b/costa/native/external/lapack/zhgeqz.f @@ -0,0 +1,735 @@ + SUBROUTINE ZHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHA, BETA, Q, LDQ, Z, LDZ, WORK, LWORK, + $ RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER COMPQ, COMPZ, JOB + INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), Q( LDQ, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* ZHGEQZ implements a single-shift version of the QZ +* method for finding the generalized eigenvalues w(i)=ALPHA(i)/BETA(i) +* of the equation +* +* det( A - w(i) B ) = 0 +* +* If JOB='S', then the pair (A,B) is simultaneously +* reduced to Schur form (i.e., A and B are both upper triangular) by +* applying one unitary tranformation (usually called Q) on the left and +* another (usually called Z) on the right. The diagonal elements of +* A are then ALPHA(1),...,ALPHA(N), and of B are BETA(1),...,BETA(N). +* +* If JOB='S' and COMPQ and COMPZ are 'V' or 'I', then the unitary +* transformations used to reduce (A,B) are accumulated into the arrays +* Q and Z s.t.: +* +* Q(in) A(in) Z(in)* = Q(out) A(out) Z(out)* +* Q(in) B(in) Z(in)* = Q(out) B(out) Z(out)* +* +* Ref: C.B. Moler & G.W. Stewart, "An Algorithm for Generalized Matrix +* Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973), +* pp. 241--256. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* = 'E': compute only ALPHA and BETA. A and B will not +* necessarily be put into generalized Schur form. +* = 'S': put A and B into generalized Schur form, as well +* as computing ALPHA and BETA. +* +* COMPQ (input) CHARACTER*1 +* = 'N': do not modify Q. +* = 'V': multiply the array Q on the right by the conjugate +* transpose of the unitary tranformation that is +* applied to the left side of A and B to reduce them +* to Schur form. +* = 'I': like COMPQ='V', except that Q will be initialized to +* the identity first. +* +* COMPZ (input) CHARACTER*1 +* = 'N': do not modify Z. +* = 'V': multiply the array Z on the right by the unitary +* tranformation that is applied to the right side of +* A and B to reduce them to Schur form. +* = 'I': like COMPZ='V', except that Z will be initialized to +* the identity first. +* +* N (input) INTEGER +* The order of the matrices A, B, Q, and Z. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* It is assumed that A is already upper triangular in rows and +* columns 1:ILO-1 and IHI+1:N. +* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA, N) +* On entry, the N-by-N upper Hessenberg matrix A. Elements +* below the subdiagonal must be zero. +* If JOB='S', then on exit A and B will have been +* simultaneously reduced to upper triangular form. +* If JOB='E', then on exit A will have been destroyed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max( 1, N ). +* +* B (input/output) COMPLEX*16 array, dimension (LDB, N) +* On entry, the N-by-N upper triangular matrix B. Elements +* below the diagonal must be zero. +* If JOB='S', then on exit A and B will have been +* simultaneously reduced to upper triangular form. +* If JOB='E', then on exit B will have been destroyed. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max( 1, N ). +* +* ALPHA (output) COMPLEX*16 array, dimension (N) +* The diagonal elements of A when the pair (A,B) has been +* reduced to Schur form. ALPHA(i)/BETA(i) i=1,...,N +* are the generalized eigenvalues. +* +* BETA (output) COMPLEX*16 array, dimension (N) +* The diagonal elements of B when the pair (A,B) has been +* reduced to Schur form. ALPHA(i)/BETA(i) i=1,...,N +* are the generalized eigenvalues. A and B are normalized +* so that BETA(1),...,BETA(N) are non-negative real numbers. +* +* Q (input/output) COMPLEX*16 array, dimension (LDQ, N) +* If COMPQ='N', then Q will not be referenced. +* If COMPQ='V' or 'I', then the conjugate transpose of the +* unitary transformations which are applied to A and B on +* the left will be applied to the array Q on the right. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= 1. +* If COMPQ='V' or 'I', then LDQ >= N. +* +* Z (input/output) COMPLEX*16 array, dimension (LDZ, N) +* If COMPZ='N', then Z will not be referenced. +* If COMPZ='V' or 'I', then the unitary transformations which +* are applied to A and B on the right will be applied to the +* array Z on the right. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1. +* If COMPZ='V' or 'I', then LDZ >= N. +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO >= 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N). +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* = 1,...,N: the QZ iteration did not converge. (A,B) is not +* in Schur form, but ALPHA(i) and BETA(i), +* i=INFO+1,...,N should be correct. +* = N+1,...,2*N: the shift calculation failed. (A,B) is not +* in Schur form, but ALPHA(i) and BETA(i), +* i=INFO-N+1,...,N should be correct. +* > 2*N: various "impossible" errors. +* +* Further Details +* =============== +* +* We assume that complex ABS works as long as its value is less than +* overflow. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION HALF + PARAMETER ( HALF = 0.5D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ILAZR2, ILAZRO, ILQ, ILSCHR, ILZ, LQUERY + INTEGER ICOMPQ, ICOMPZ, IFIRST, IFRSTM, IITER, ILAST, + $ ILASTM, IN, ISCHUR, ISTART, J, JC, JCH, JITER, + $ JR, MAXIT + DOUBLE PRECISION ABSB, ANORM, ASCALE, ATOL, BNORM, BSCALE, BTOL, + $ C, SAFMIN, TEMP, TEMP2, TEMPR, ULP + COMPLEX*16 ABI22, AD11, AD12, AD21, AD22, CTEMP, CTEMP2, + $ CTEMP3, ESHIFT, RTDISC, S, SHIFT, SIGNBC, T, + $ U12, X +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANHS + EXTERNAL LSAME, DLAMCH, ZLANHS +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARTG, ZLASET, ZROT, ZSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX, MIN, + $ SQRT +* .. +* .. Statement Functions .. + DOUBLE PRECISION ABS1 +* .. +* .. Statement Function definitions .. + ABS1( X ) = ABS( DBLE( X ) ) + ABS( DIMAG( X ) ) +* .. +* .. Executable Statements .. +* +* Decode JOB, COMPQ, COMPZ +* + IF( LSAME( JOB, 'E' ) ) THEN + ILSCHR = .FALSE. + ISCHUR = 1 + ELSE IF( LSAME( JOB, 'S' ) ) THEN + ILSCHR = .TRUE. + ISCHUR = 2 + ELSE + ISCHUR = 0 + END IF +* + IF( LSAME( COMPQ, 'N' ) ) THEN + ILQ = .FALSE. + ICOMPQ = 1 + ELSE IF( LSAME( COMPQ, 'V' ) ) THEN + ILQ = .TRUE. + ICOMPQ = 2 + ELSE IF( LSAME( COMPQ, 'I' ) ) THEN + ILQ = .TRUE. + ICOMPQ = 3 + ELSE + ICOMPQ = 0 + END IF +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ILZ = .FALSE. + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ILZ = .TRUE. + ICOMPZ = 2 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ILZ = .TRUE. + ICOMPZ = 3 + ELSE + ICOMPZ = 0 + END IF +* +* Check Argument Values +* + INFO = 0 + WORK( 1 ) = MAX( 1, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( ISCHUR.EQ.0 ) THEN + INFO = -1 + ELSE IF( ICOMPQ.EQ.0 ) THEN + INFO = -2 + ELSE IF( ICOMPZ.EQ.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( ILO.LT.1 ) THEN + INFO = -5 + ELSE IF( IHI.GT.N .OR. IHI.LT.ILO-1 ) THEN + INFO = -6 + ELSE IF( LDA.LT.N ) THEN + INFO = -8 + ELSE IF( LDB.LT.N ) THEN + INFO = -10 + ELSE IF( LDQ.LT.1 .OR. ( ILQ .AND. LDQ.LT.N ) ) THEN + INFO = -14 + ELSE IF( LDZ.LT.1 .OR. ( ILZ .AND. LDZ.LT.N ) ) THEN + INFO = -16 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -18 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHGEQZ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* +* WORK( 1 ) = CMPLX( 1 ) + IF( N.LE.0 ) THEN + WORK( 1 ) = DCMPLX( 1 ) + RETURN + END IF +* +* Initialize Q and Z +* + IF( ICOMPQ.EQ.3 ) + $ CALL ZLASET( 'Full', N, N, CZERO, CONE, Q, LDQ ) + IF( ICOMPZ.EQ.3 ) + $ CALL ZLASET( 'Full', N, N, CZERO, CONE, Z, LDZ ) +* +* Machine Constants +* + IN = IHI + 1 - ILO + SAFMIN = DLAMCH( 'S' ) + ULP = DLAMCH( 'E' )*DLAMCH( 'B' ) + ANORM = ZLANHS( 'F', IN, A( ILO, ILO ), LDA, RWORK ) + BNORM = ZLANHS( 'F', IN, B( ILO, ILO ), LDB, RWORK ) + ATOL = MAX( SAFMIN, ULP*ANORM ) + BTOL = MAX( SAFMIN, ULP*BNORM ) + ASCALE = ONE / MAX( SAFMIN, ANORM ) + BSCALE = ONE / MAX( SAFMIN, BNORM ) +* +* +* Set Eigenvalues IHI+1:N +* + DO 10 J = IHI + 1, N + ABSB = ABS( B( J, J ) ) + IF( ABSB.GT.SAFMIN ) THEN + SIGNBC = DCONJG( B( J, J ) / ABSB ) + B( J, J ) = ABSB + IF( ILSCHR ) THEN + CALL ZSCAL( J-1, SIGNBC, B( 1, J ), 1 ) + CALL ZSCAL( J, SIGNBC, A( 1, J ), 1 ) + ELSE + A( J, J ) = A( J, J )*SIGNBC + END IF + IF( ILZ ) + $ CALL ZSCAL( N, SIGNBC, Z( 1, J ), 1 ) + ELSE + B( J, J ) = CZERO + END IF + ALPHA( J ) = A( J, J ) + BETA( J ) = B( J, J ) + 10 CONTINUE +* +* If IHI < ILO, skip QZ steps +* + IF( IHI.LT.ILO ) + $ GO TO 190 +* +* MAIN QZ ITERATION LOOP +* +* Initialize dynamic indices +* +* Eigenvalues ILAST+1:N have been found. +* Column operations modify rows IFRSTM:whatever +* Row operations modify columns whatever:ILASTM +* +* If only eigenvalues are being computed, then +* IFRSTM is the row of the last splitting row above row ILAST; +* this is always at least ILO. +* IITER counts iterations since the last eigenvalue was found, +* to tell when to use an extraordinary shift. +* MAXIT is the maximum number of QZ sweeps allowed. +* + ILAST = IHI + IF( ILSCHR ) THEN + IFRSTM = 1 + ILASTM = N + ELSE + IFRSTM = ILO + ILASTM = IHI + END IF + IITER = 0 + ESHIFT = CZERO + MAXIT = 30*( IHI-ILO+1 ) +* + DO 170 JITER = 1, MAXIT +* +* Check for too many iterations. +* + IF( JITER.GT.MAXIT ) + $ GO TO 180 +* +* Split the matrix if possible. +* +* Two tests: +* 1: A(j,j-1)=0 or j=ILO +* 2: B(j,j)=0 +* +* Special case: j=ILAST +* + IF( ILAST.EQ.ILO ) THEN + GO TO 60 + ELSE + IF( ABS1( A( ILAST, ILAST-1 ) ).LE.ATOL ) THEN + A( ILAST, ILAST-1 ) = CZERO + GO TO 60 + END IF + END IF +* + IF( ABS( B( ILAST, ILAST ) ).LE.BTOL ) THEN + B( ILAST, ILAST ) = CZERO + GO TO 50 + END IF +* +* General case: j= 0. +* +* AP (input) COMPLEX*16 array, dimension (N*(N+1)/2) +* The block diagonal matrix D and the multipliers used to +* obtain the factor U or L as computed by ZHPTRF, stored as a +* packed triangular matrix. +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by ZHPTRF. +* +* ANORM (input) DOUBLE PRECISION +* The 1-norm of the original matrix A. +* +* RCOND (output) DOUBLE PRECISION +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +* estimate of the 1-norm of inv(A) computed in this routine. +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IP, KASE + DOUBLE PRECISION AINVNM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHPTRS, ZLACON +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHPCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.LE.ZERO ) THEN + RETURN + END IF +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + IP = N*( N+1 ) / 2 + DO 10 I = N, 1, -1 + IF( IPIV( I ).GT.0 .AND. AP( IP ).EQ.ZERO ) + $ RETURN + IP = IP - I + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + IP = 1 + DO 20 I = 1, N + IF( IPIV( I ).GT.0 .AND. AP( IP ).EQ.ZERO ) + $ RETURN + IP = IP + N - I + 1 + 20 CONTINUE + END IF +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + 30 CONTINUE + CALL ZLACON( N, WORK( N+1 ), WORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN +* +* Multiply by inv(L*D*L') or inv(U*D*U'). +* + CALL ZHPTRS( UPLO, N, 1, AP, IPIV, WORK, N, INFO ) + GO TO 30 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of ZHPCON +* + END diff --git a/costa/native/external/lapack/zhpev.f b/costa/native/external/lapack/zhpev.f new file mode 100644 index 000000000..110af19ef --- /dev/null +++ b/costa/native/external/lapack/zhpev.f @@ -0,0 +1,197 @@ + SUBROUTINE ZHPEV( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, RWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 AP( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* ZHPEV computes all the eigenvalues and, optionally, eigenvectors of a +* complex Hermitian matrix in packed storage. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) COMPLEX*16 array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the Hermitian matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* +* On exit, AP is overwritten by values generated during the +* reduction to tridiagonal form. If UPLO = 'U', the diagonal +* and first superdiagonal of the tridiagonal matrix T overwrite +* the corresponding elements of A, and if UPLO = 'L', the +* diagonal and first subdiagonal of T overwrite the +* corresponding elements of A. +* +* W (output) DOUBLE PRECISION array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* Z (output) COMPLEX*16 array, dimension (LDZ, N) +* If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +* eigenvectors of the matrix A, with the i-th column of Z +* holding the eigenvector associated with W(i). +* If JOBZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace) COMPLEX*16 array, dimension (max(1, 2*N-1)) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (max(1, 3*N-2)) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, the algorithm failed to converge; i +* off-diagonal elements of an intermediate tridiagonal +* form did not converge to zero. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL WANTZ + INTEGER IINFO, IMAX, INDE, INDRWK, INDTAU, INDWRK, + $ ISCALE + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANHP + EXTERNAL LSAME, DLAMCH, ZLANHP +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSTERF, XERBLA, ZDSCAL, ZHPTRD, ZSTEQR, + $ ZUPGTR +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LSAME( UPLO, 'L' ) .OR. LSAME( UPLO, 'U' ) ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -7 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHPEV ', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = AP( 1 ) + RWORK( 1 ) = 1 + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = ZLANHP( 'M', UPLO, N, AP, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + CALL ZDSCAL( ( N*( N+1 ) ) / 2, SIGMA, AP, 1 ) + END IF +* +* Call ZHPTRD to reduce Hermitian packed matrix to tridiagonal form. +* + INDE = 1 + INDTAU = 1 + CALL ZHPTRD( UPLO, N, AP, W, RWORK( INDE ), WORK( INDTAU ), + $ IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, first call +* ZUPGTR to generate the orthogonal matrix, then call ZSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + INDWRK = INDTAU + N + CALL ZUPGTR( UPLO, N, AP, WORK( INDTAU ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) + INDRWK = INDE + N + CALL ZSTEQR( JOBZ, N, W, RWORK( INDE ), Z, LDZ, + $ RWORK( INDRWK ), INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* + RETURN +* +* End of ZHPEV +* + END diff --git a/costa/native/external/lapack/zhpevd.f b/costa/native/external/lapack/zhpevd.f new file mode 100644 index 000000000..e4dbf766e --- /dev/null +++ b/costa/native/external/lapack/zhpevd.f @@ -0,0 +1,280 @@ + SUBROUTINE ZHPEVD( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, LWORK, + $ RWORK, LRWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDZ, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 AP( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* ZHPEVD computes all the eigenvalues and, optionally, eigenvectors of +* a complex Hermitian matrix A in packed storage. If eigenvectors are +* desired, it uses a divide and conquer algorithm. +* +* The divide and conquer algorithm makes very mild assumptions about +* floating point arithmetic. It will work on machines with a guard +* digit in add/subtract, or on those binary machines without guard +* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +* Cray-2. It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) COMPLEX*16 array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the Hermitian matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* +* On exit, AP is overwritten by values generated during the +* reduction to tridiagonal form. If UPLO = 'U', the diagonal +* and first superdiagonal of the tridiagonal matrix T overwrite +* the corresponding elements of A, and if UPLO = 'L', the +* diagonal and first subdiagonal of T overwrite the +* corresponding elements of A. +* +* W (output) DOUBLE PRECISION array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* Z (output) COMPLEX*16 array, dimension (LDZ, N) +* If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +* eigenvectors of the matrix A, with the i-th column of Z +* holding the eigenvector associated with W(i). +* If JOBZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of array WORK. +* If N <= 1, LWORK must be at least 1. +* If JOBZ = 'N' and N > 1, LWORK must be at least N. +* If JOBZ = 'V' and N > 1, LWORK must be at least 2*N. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace/output) DOUBLE PRECISION array, +* dimension (LRWORK) +* On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. +* +* LRWORK (input) INTEGER +* The dimension of array RWORK. +* If N <= 1, LRWORK must be at least 1. +* If JOBZ = 'N' and N > 1, LRWORK must be at least N. +* If JOBZ = 'V' and N > 1, LRWORK must be at least +* 1 + 5*N + 2*N**2. +* +* If LRWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the RWORK array, +* returns this value as the first entry of the RWORK array, and +* no error message related to LRWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of array IWORK. +* If JOBZ = 'N' or N <= 1, LIWORK must be at least 1. +* If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, the algorithm failed to converge; i +* off-diagonal elements of an intermediate tridiagonal +* form did not converge to zero. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WANTZ + INTEGER IINFO, IMAX, INDE, INDRWK, INDTAU, INDWRK, + $ ISCALE, LIWMIN, LLRWK, LLWRK, LRWMIN, LWMIN + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANHP + EXTERNAL LSAME, DLAMCH, ZLANHP +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSTERF, XERBLA, ZDSCAL, ZHPTRD, ZSTEDC, + $ ZUPMTR +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LWMIN = 1 + LIWMIN = 1 + LRWMIN = 1 + ELSE + IF( WANTZ ) THEN + LWMIN = 2*N + LRWMIN = 1 + 5*N + 2*N**2 + LIWMIN = 3 + 5*N + ELSE + LWMIN = N + LRWMIN = N + LIWMIN = 1 + END IF + END IF + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LSAME( UPLO, 'L' ) .OR. LSAME( UPLO, 'U' ) ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -7 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -9 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHPEVD', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = AP( 1 ) + IF( WANTZ ) + $ Z( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = ZLANHP( 'M', UPLO, N, AP, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + CALL ZDSCAL( ( N*( N+1 ) ) / 2, SIGMA, AP, 1 ) + END IF +* +* Call ZHPTRD to reduce Hermitian packed matrix to tridiagonal form. +* + INDE = 1 + INDTAU = 1 + INDRWK = INDE + N + INDWRK = INDTAU + N + LLWRK = LWORK - INDWRK + 1 + LLRWK = LRWORK - INDRWK + 1 + CALL ZHPTRD( UPLO, N, AP, W, RWORK( INDE ), WORK( INDTAU ), + $ IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, first call +* ZUPGTR to generate the orthogonal matrix, then call ZSTEDC. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + CALL ZSTEDC( 'I', N, W, RWORK( INDE ), Z, LDZ, WORK( INDWRK ), + $ LLWRK, RWORK( INDRWK ), LLRWK, IWORK, LIWORK, + $ INFO ) + CALL ZUPMTR( 'L', UPLO, 'N', N, N, AP, WORK( INDTAU ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of ZHPEVD +* + END diff --git a/costa/native/external/lapack/zhpevx.f b/costa/native/external/lapack/zhpevx.f new file mode 100644 index 000000000..ffda9fb7a --- /dev/null +++ b/costa/native/external/lapack/zhpevx.f @@ -0,0 +1,384 @@ + SUBROUTINE ZHPEVX( JOBZ, RANGE, UPLO, N, AP, VL, VU, IL, IU, + $ ABSTOL, M, W, Z, LDZ, WORK, RWORK, IWORK, + $ IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDZ, M, N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 AP( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* ZHPEVX computes selected eigenvalues and, optionally, eigenvectors +* of a complex Hermitian matrix A in packed storage. +* Eigenvalues/vectors can be selected by specifying either a range of +* values or a range of indices for the desired eigenvalues. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* RANGE (input) CHARACTER*1 +* = 'A': all eigenvalues will be found; +* = 'V': all eigenvalues in the half-open interval (VL,VU] +* will be found; +* = 'I': the IL-th through IU-th eigenvalues will be found. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) COMPLEX*16 array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the Hermitian matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* +* On exit, AP is overwritten by values generated during the +* reduction to tridiagonal form. If UPLO = 'U', the diagonal +* and first superdiagonal of the tridiagonal matrix T overwrite +* the corresponding elements of A, and if UPLO = 'L', the +* diagonal and first subdiagonal of T overwrite the +* corresponding elements of A. +* +* VL (input) DOUBLE PRECISION +* VU (input) DOUBLE PRECISION +* If RANGE='V', the lower and upper bounds of the interval to +* be searched for eigenvalues. VL < VU. +* Not referenced if RANGE = 'A' or 'I'. +* +* IL (input) INTEGER +* IU (input) INTEGER +* If RANGE='I', the indices (in ascending order) of the +* smallest and largest eigenvalues to be returned. +* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +* Not referenced if RANGE = 'A' or 'V'. +* +* ABSTOL (input) DOUBLE PRECISION +* The absolute error tolerance for the eigenvalues. +* An approximate eigenvalue is accepted as converged +* when it is determined to lie in an interval [a,b] +* of width less than or equal to +* +* ABSTOL + EPS * max( |a|,|b| ) , +* +* where EPS is the machine precision. If ABSTOL is less than +* or equal to zero, then EPS*|T| will be used in its place, +* where |T| is the 1-norm of the tridiagonal matrix obtained +* by reducing AP to tridiagonal form. +* +* Eigenvalues will be computed most accurately when ABSTOL is +* set to twice the underflow threshold 2*DLAMCH('S'), not zero. +* If this routine returns with INFO>0, indicating that some +* eigenvectors did not converge, try setting ABSTOL to +* 2*DLAMCH('S'). +* +* See "Computing Small Singular Values of Bidiagonal Matrices +* with Guaranteed High Relative Accuracy," by Demmel and +* Kahan, LAPACK Working Note #3. +* +* M (output) INTEGER +* The total number of eigenvalues found. 0 <= M <= N. +* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +* +* W (output) DOUBLE PRECISION array, dimension (N) +* If INFO = 0, the selected eigenvalues in ascending order. +* +* Z (output) COMPLEX*16 array, dimension (LDZ, max(1,M)) +* If JOBZ = 'V', then if INFO = 0, the first M columns of Z +* contain the orthonormal eigenvectors of the matrix A +* corresponding to the selected eigenvalues, with the i-th +* column of Z holding the eigenvector associated with W(i). +* If an eigenvector fails to converge, then that column of Z +* contains the latest approximation to the eigenvector, and +* the index of the eigenvector is returned in IFAIL. +* If JOBZ = 'N', then Z is not referenced. +* Note: the user must ensure that at least max(1,M) columns are +* supplied in the array Z; if RANGE = 'V', the exact value of M +* is not known in advance and an upper bound must be used. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (7*N) +* +* IWORK (workspace) INTEGER array, dimension (5*N) +* +* IFAIL (output) INTEGER array, dimension (N) +* If JOBZ = 'V', then if INFO = 0, the first M elements of +* IFAIL are zero. If INFO > 0, then IFAIL contains the +* indices of the eigenvectors that failed to converge. +* If JOBZ = 'N', then IFAIL is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, then i eigenvectors failed to converge. +* Their indices are stored in array IFAIL. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D0, 0.0D0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, VALEIG, WANTZ + CHARACTER ORDER + INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, + $ INDISP, INDIWK, INDRWK, INDTAU, INDWRK, ISCALE, + $ ITMP1, J, JJ, NSPLIT + DOUBLE PRECISION ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANHP + EXTERNAL LSAME, DLAMCH, ZLANHP +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DSCAL, DSTEBZ, DSTERF, XERBLA, ZDSCAL, + $ ZHPTRD, ZSTEIN, ZSTEQR, ZSWAP, ZUPGTR, ZUPMTR +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LSAME( UPLO, 'L' ) .OR. LSAME( UPLO, 'U' ) ) ) + $ THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -7 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -9 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) + $ INFO = -14 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHPEVX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = AP( 1 ) + ELSE + IF( VL.LT.DBLE( AP( 1 ) ) .AND. VU.GE.DBLE( AP( 1 ) ) ) THEN + M = 1 + W( 1 ) = AP( 1 ) + END IF + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF( VALEIG ) THEN + VLL = VL + VUU = VU + ELSE + VLL = ZERO + VUU = ZERO + END IF + ANRM = ZLANHP( 'M', UPLO, N, AP, RWORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + CALL ZDSCAL( ( N*( N+1 ) ) / 2, SIGMA, AP, 1 ) + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call ZHPTRD to reduce Hermitian packed matrix to tridiagonal form. +* + INDD = 1 + INDE = INDD + N + INDRWK = INDE + N + INDTAU = 1 + INDWRK = INDTAU + N + CALL ZHPTRD( UPLO, N, AP, RWORK( INDD ), RWORK( INDE ), + $ WORK( INDTAU ), IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal +* to zero, then call DSTERF or ZUPGTR and ZSTEQR. If this fails +* for some eigenvalue, then try DSTEBZ. +* + IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND. + $ ( ABSTOL.LE.ZERO ) ) THEN + CALL DCOPY( N, RWORK( INDD ), 1, W, 1 ) + INDEE = INDRWK + 2*N + IF( .NOT.WANTZ ) THEN + CALL DCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + CALL DSTERF( N, W, RWORK( INDEE ), INFO ) + ELSE + CALL ZUPGTR( UPLO, N, AP, WORK( INDTAU ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) + CALL DCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + CALL ZSTEQR( JOBZ, N, W, RWORK( INDEE ), Z, LDZ, + $ RWORK( INDRWK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 10 I = 1, N + IFAIL( I ) = 0 + 10 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 20 + END IF + INFO = 0 + END IF +* +* Otherwise, call DSTEBZ and, if eigenvectors are desired, ZSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWK = INDISP + N + CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ RWORK( INDD ), RWORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ), + $ IWORK( INDIWK ), INFO ) +* + IF( WANTZ ) THEN + CALL ZSTEIN( N, RWORK( INDD ), RWORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ RWORK( INDRWK ), IWORK( INDIWK ), IFAIL, INFO ) +* +* Apply unitary matrix used in reduction to tridiagonal +* form to eigenvectors returned by ZSTEIN. +* + INDWRK = INDTAU + N + CALL ZUPMTR( 'L', UPLO, 'N', N, M, AP, WORK( INDTAU ), Z, LDZ, + $ WORK( INDWRK ), INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 20 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 40 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 30 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 30 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL ZSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 40 CONTINUE + END IF +* + RETURN +* +* End of ZHPEVX +* + END diff --git a/costa/native/external/lapack/zhpgst.f b/costa/native/external/lapack/zhpgst.f new file mode 100644 index 000000000..69aeec055 --- /dev/null +++ b/costa/native/external/lapack/zhpgst.f @@ -0,0 +1,216 @@ + SUBROUTINE ZHPGST( ITYPE, UPLO, N, AP, BP, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, ITYPE, N +* .. +* .. Array Arguments .. + COMPLEX*16 AP( * ), BP( * ) +* .. +* +* Purpose +* ======= +* +* ZHPGST reduces a complex Hermitian-definite generalized +* eigenproblem to standard form, using packed storage. +* +* If ITYPE = 1, the problem is A*x = lambda*B*x, +* and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H) +* +* If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or +* B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L. +* +* B must have been previously factorized as U**H*U or L*L**H by ZPPTRF. +* +* Arguments +* ========= +* +* ITYPE (input) INTEGER +* = 1: compute inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H); +* = 2 or 3: compute U*A*U**H or L**H*A*L. +* +* UPLO (input) CHARACTER +* = 'U': Upper triangle of A is stored and B is factored as +* U**H*U; +* = 'L': Lower triangle of A is stored and B is factored as +* L*L**H. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* AP (input/output) COMPLEX*16 array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the Hermitian matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* +* On exit, if INFO = 0, the transformed matrix, stored in the +* same format as A. +* +* BP (input) COMPLEX*16 array, dimension (N*(N+1)/2) +* The triangular factor from the Cholesky factorization of B, +* stored in the same format as A, as returned by ZPPTRF. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, HALF + PARAMETER ( ONE = 1.0D+0, HALF = 0.5D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, J1, J1J1, JJ, K, K1, K1K1, KK + DOUBLE PRECISION AJJ, AKK, BJJ, BKK + COMPLEX*16 CT +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZDSCAL, ZHPMV, ZHPR2, ZTPMV, + $ ZTPSV +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX*16 ZDOTC + EXTERNAL LSAME, ZDOTC +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHPGST', -INFO ) + RETURN + END IF +* + IF( ITYPE.EQ.1 ) THEN + IF( UPPER ) THEN +* +* Compute inv(U')*A*inv(U) +* +* J1 and JJ are the indices of A(1,j) and A(j,j) +* + JJ = 0 + DO 10 J = 1, N + J1 = JJ + 1 + JJ = JJ + J +* +* Compute the j-th column of the upper triangle of A +* + AP( JJ ) = DBLE( AP( JJ ) ) + BJJ = BP( JJ ) + CALL ZTPSV( UPLO, 'Conjugate transpose', 'Non-unit', J, + $ BP, AP( J1 ), 1 ) + CALL ZHPMV( UPLO, J-1, -CONE, AP, BP( J1 ), 1, CONE, + $ AP( J1 ), 1 ) + CALL ZDSCAL( J-1, ONE / BJJ, AP( J1 ), 1 ) + AP( JJ ) = ( AP( JJ )-ZDOTC( J-1, AP( J1 ), 1, BP( J1 ), + $ 1 ) ) / BJJ + 10 CONTINUE + ELSE +* +* Compute inv(L)*A*inv(L') +* +* KK and K1K1 are the indices of A(k,k) and A(k+1,k+1) +* + KK = 1 + DO 20 K = 1, N + K1K1 = KK + N - K + 1 +* +* Update the lower triangle of A(k:n,k:n) +* + AKK = AP( KK ) + BKK = BP( KK ) + AKK = AKK / BKK**2 + AP( KK ) = AKK + IF( K.LT.N ) THEN + CALL ZDSCAL( N-K, ONE / BKK, AP( KK+1 ), 1 ) + CT = -HALF*AKK + CALL ZAXPY( N-K, CT, BP( KK+1 ), 1, AP( KK+1 ), 1 ) + CALL ZHPR2( UPLO, N-K, -CONE, AP( KK+1 ), 1, + $ BP( KK+1 ), 1, AP( K1K1 ) ) + CALL ZAXPY( N-K, CT, BP( KK+1 ), 1, AP( KK+1 ), 1 ) + CALL ZTPSV( UPLO, 'No transpose', 'Non-unit', N-K, + $ BP( K1K1 ), AP( KK+1 ), 1 ) + END IF + KK = K1K1 + 20 CONTINUE + END IF + ELSE + IF( UPPER ) THEN +* +* Compute U*A*U' +* +* K1 and KK are the indices of A(1,k) and A(k,k) +* + KK = 0 + DO 30 K = 1, N + K1 = KK + 1 + KK = KK + K +* +* Update the upper triangle of A(1:k,1:k) +* + AKK = AP( KK ) + BKK = BP( KK ) + CALL ZTPMV( UPLO, 'No transpose', 'Non-unit', K-1, BP, + $ AP( K1 ), 1 ) + CT = HALF*AKK + CALL ZAXPY( K-1, CT, BP( K1 ), 1, AP( K1 ), 1 ) + CALL ZHPR2( UPLO, K-1, CONE, AP( K1 ), 1, BP( K1 ), 1, + $ AP ) + CALL ZAXPY( K-1, CT, BP( K1 ), 1, AP( K1 ), 1 ) + CALL ZDSCAL( K-1, BKK, AP( K1 ), 1 ) + AP( KK ) = AKK*BKK**2 + 30 CONTINUE + ELSE +* +* Compute L'*A*L +* +* JJ and J1J1 are the indices of A(j,j) and A(j+1,j+1) +* + JJ = 1 + DO 40 J = 1, N + J1J1 = JJ + N - J + 1 +* +* Compute the j-th column of the lower triangle of A +* + AJJ = AP( JJ ) + BJJ = BP( JJ ) + AP( JJ ) = AJJ*BJJ + ZDOTC( N-J, AP( JJ+1 ), 1, + $ BP( JJ+1 ), 1 ) + CALL ZDSCAL( N-J, BJJ, AP( JJ+1 ), 1 ) + CALL ZHPMV( UPLO, N-J, CONE, AP( J1J1 ), BP( JJ+1 ), 1, + $ CONE, AP( JJ+1 ), 1 ) + CALL ZTPMV( UPLO, 'Conjugate transpose', 'Non-unit', + $ N-J+1, BP( JJ ), AP( JJ ), 1 ) + JJ = J1J1 + 40 CONTINUE + END IF + END IF + RETURN +* +* End of ZHPGST +* + END diff --git a/costa/native/external/lapack/zhpgv.f b/costa/native/external/lapack/zhpgv.f new file mode 100644 index 000000000..ed5589ad1 --- /dev/null +++ b/costa/native/external/lapack/zhpgv.f @@ -0,0 +1,197 @@ + SUBROUTINE ZHPGV( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK, + $ RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDZ, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 AP( * ), BP( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* ZHPGV computes all the eigenvalues and, optionally, the eigenvectors +* of a complex generalized Hermitian-definite eigenproblem, of the form +* A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. +* Here A and B are assumed to be Hermitian, stored in packed format, +* and B is also positive definite. +* +* Arguments +* ========= +* +* ITYPE (input) INTEGER +* Specifies the problem type to be solved: +* = 1: A*x = (lambda)*B*x +* = 2: A*B*x = (lambda)*x +* = 3: B*A*x = (lambda)*x +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangles of A and B are stored; +* = 'L': Lower triangles of A and B are stored. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* AP (input/output) COMPLEX*16 array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the Hermitian matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* +* On exit, the contents of AP are destroyed. +* +* BP (input/output) COMPLEX*16 array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the Hermitian matrix +* B, packed columnwise in a linear array. The j-th column of B +* is stored in the array BP as follows: +* if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j; +* if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n. +* +* On exit, the triangular factor U or L from the Cholesky +* factorization B = U**H*U or B = L*L**H, in the same storage +* format as B. +* +* W (output) DOUBLE PRECISION array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* Z (output) COMPLEX*16 array, dimension (LDZ, N) +* If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of +* eigenvectors. The eigenvectors are normalized as follows: +* if ITYPE = 1 or 2, Z**H*B*Z = I; +* if ITYPE = 3, Z**H*inv(B)*Z = I. +* If JOBZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace) COMPLEX*16 array, dimension (max(1, 2*N-1)) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (max(1, 3*N-2)) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: ZPPTRF or ZHPEV returned an error code: +* <= N: if INFO = i, ZHPEV failed to converge; +* i off-diagonal elements of an intermediate +* tridiagonal form did not convergeto zero; +* > N: if INFO = N + i, for 1 <= i <= n, then the leading +* minor of order i of B is not positive definite. +* The factorization of B could not be completed and +* no eigenvalues or eigenvectors were computed. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER, WANTZ + CHARACTER TRANS + INTEGER J, NEIG +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHPEV, ZHPGST, ZPPTRF, ZTPMV, ZTPSV +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) +* + INFO = 0 + IF( ITYPE.LT.0 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHPGV ', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL ZPPTRF( UPLO, N, BP, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL ZHPGST( ITYPE, UPLO, N, AP, BP, INFO ) + CALL ZHPEV( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, RWORK, INFO ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + NEIG = N + IF( INFO.GT.0 ) + $ NEIG = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)'*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'C' + END IF +* + DO 10 J = 1, NEIG + CALL ZTPSV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 10 CONTINUE +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U'*y +* + IF( UPPER ) THEN + TRANS = 'C' + ELSE + TRANS = 'N' + END IF +* + DO 20 J = 1, NEIG + CALL ZTPMV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 20 CONTINUE + END IF + END IF + RETURN +* +* End of ZHPGV +* + END diff --git a/costa/native/external/lapack/zhpgvd.f b/costa/native/external/lapack/zhpgvd.f new file mode 100644 index 000000000..56878c3e4 --- /dev/null +++ b/costa/native/external/lapack/zhpgvd.f @@ -0,0 +1,291 @@ + SUBROUTINE ZHPGVD( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK, + $ LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDZ, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 AP( * ), BP( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* ZHPGVD computes all the eigenvalues and, optionally, the eigenvectors +* of a complex generalized Hermitian-definite eigenproblem, of the form +* A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and +* B are assumed to be Hermitian, stored in packed format, and B is also +* positive definite. +* If eigenvectors are desired, it uses a divide and conquer algorithm. +* +* The divide and conquer algorithm makes very mild assumptions about +* floating point arithmetic. It will work on machines with a guard +* digit in add/subtract, or on those binary machines without guard +* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +* Cray-2. It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* ITYPE (input) INTEGER +* Specifies the problem type to be solved: +* = 1: A*x = (lambda)*B*x +* = 2: A*B*x = (lambda)*x +* = 3: B*A*x = (lambda)*x +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangles of A and B are stored; +* = 'L': Lower triangles of A and B are stored. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* AP (input/output) COMPLEX*16 array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the Hermitian matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* +* On exit, the contents of AP are destroyed. +* +* BP (input/output) COMPLEX*16 array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the Hermitian matrix +* B, packed columnwise in a linear array. The j-th column of B +* is stored in the array BP as follows: +* if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j; +* if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n. +* +* On exit, the triangular factor U or L from the Cholesky +* factorization B = U**H*U or B = L*L**H, in the same storage +* format as B. +* +* W (output) DOUBLE PRECISION array, dimension (N) +* If INFO = 0, the eigenvalues in ascending order. +* +* Z (output) COMPLEX*16 array, dimension (LDZ, N) +* If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of +* eigenvectors. The eigenvectors are normalized as follows: +* if ITYPE = 1 or 2, Z**H*B*Z = I; +* if ITYPE = 3, Z**H*inv(B)*Z = I. +* If JOBZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of array WORK. +* If N <= 1, LWORK >= 1. +* If JOBZ = 'N' and N > 1, LWORK >= N. +* If JOBZ = 'V' and N > 1, LWORK >= 2*N. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (LRWORK) +* On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. +* +* LRWORK (input) INTEGER +* The dimension of array RWORK. +* If N <= 1, LRWORK >= 1. +* If JOBZ = 'N' and N > 1, LRWORK >= N. +* If JOBZ = 'V' and N > 1, LRWORK >= 1 + 5*N + 2*N**2. +* +* If LRWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the RWORK array, +* returns this value as the first entry of the RWORK array, and +* no error message related to LRWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of array IWORK. +* If JOBZ = 'N' or N <= 1, LIWORK >= 1. +* If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: ZPPTRF or ZHPEVD returned an error code: +* <= N: if INFO = i, ZHPEVD failed to converge; +* i off-diagonal elements of an intermediate +* tridiagonal form did not convergeto zero; +* > N: if INFO = N + i, for 1 <= i <= n, then the leading +* minor of order i of B is not positive definite. +* The factorization of B could not be completed and +* no eigenvalues or eigenvectors were computed. +* +* Further Details +* =============== +* +* Based on contributions by +* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER TRANS + INTEGER J, LIWMIN, LRWMIN, LWMIN, NEIG +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHPEVD, ZHPGST, ZPPTRF, ZTPMV, ZTPSV +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LWMIN = 1 + LIWMIN = 1 + LRWMIN = 1 + ELSE + IF( WANTZ ) THEN + LWMIN = 2*N + LRWMIN = 1 + 5*N + 2*N**2 + LIWMIN = 3 + 5*N + ELSE + LWMIN = N + LRWMIN = N + LIWMIN = 1 + END IF + END IF + IF( ITYPE.LT.0 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -15 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHPGVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL ZPPTRF( UPLO, N, BP, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL ZHPGST( ITYPE, UPLO, N, AP, BP, INFO ) + CALL ZHPEVD( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, LWORK, RWORK, + $ LRWORK, IWORK, LIWORK, INFO ) + LWMIN = MAX( DBLE( LWMIN ), DBLE( WORK( 1 ) ) ) + LRWMIN = MAX( DBLE( LRWMIN ), DBLE( RWORK( 1 ) ) ) + LIWMIN = MAX( DBLE( LIWMIN ), DBLE( IWORK( 1 ) ) ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + NEIG = N + IF( INFO.GT.0 ) + $ NEIG = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)'*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'C' + END IF +* + DO 10 J = 1, NEIG + CALL ZTPSV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 10 CONTINUE +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U'*y +* + IF( UPPER ) THEN + TRANS = 'C' + ELSE + TRANS = 'N' + END IF +* + DO 20 J = 1, NEIG + CALL ZTPMV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 20 CONTINUE + END IF + END IF +* + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of ZHPGVD +* + END diff --git a/costa/native/external/lapack/zhpgvx.f b/costa/native/external/lapack/zhpgvx.f new file mode 100644 index 000000000..f3c9d8b3c --- /dev/null +++ b/costa/native/external/lapack/zhpgvx.f @@ -0,0 +1,284 @@ + SUBROUTINE ZHPGVX( ITYPE, JOBZ, RANGE, UPLO, N, AP, BP, VL, VU, + $ IL, IU, ABSTOL, M, W, Z, LDZ, WORK, RWORK, + $ IWORK, IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, ITYPE, IU, LDZ, M, N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 AP( * ), BP( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* ZHPGVX computes selected eigenvalues and, optionally, eigenvectors +* of a complex generalized Hermitian-definite eigenproblem, of the form +* A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and +* B are assumed to be Hermitian, stored in packed format, and B is also +* positive definite. Eigenvalues and eigenvectors can be selected by +* specifying either a range of values or a range of indices for the +* desired eigenvalues. +* +* Arguments +* ========= +* +* ITYPE (input) INTEGER +* Specifies the problem type to be solved: +* = 1: A*x = (lambda)*B*x +* = 2: A*B*x = (lambda)*x +* = 3: B*A*x = (lambda)*x +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* RANGE (input) CHARACTER*1 +* = 'A': all eigenvalues will be found; +* = 'V': all eigenvalues in the half-open interval (VL,VU] +* will be found; +* = 'I': the IL-th through IU-th eigenvalues will be found. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangles of A and B are stored; +* = 'L': Lower triangles of A and B are stored. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* AP (input/output) COMPLEX*16 array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the Hermitian matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* +* On exit, the contents of AP are destroyed. +* +* BP (input/output) COMPLEX*16 array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the Hermitian matrix +* B, packed columnwise in a linear array. The j-th column of B +* is stored in the array BP as follows: +* if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j; +* if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n. +* +* On exit, the triangular factor U or L from the Cholesky +* factorization B = U**H*U or B = L*L**H, in the same storage +* format as B. +* +* VL (input) DOUBLE PRECISION +* VU (input) DOUBLE PRECISION +* If RANGE='V', the lower and upper bounds of the interval to +* be searched for eigenvalues. VL < VU. +* Not referenced if RANGE = 'A' or 'I'. +* +* IL (input) INTEGER +* IU (input) INTEGER +* If RANGE='I', the indices (in ascending order) of the +* smallest and largest eigenvalues to be returned. +* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +* Not referenced if RANGE = 'A' or 'V'. +* +* ABSTOL (input) DOUBLE PRECISION +* The absolute error tolerance for the eigenvalues. +* An approximate eigenvalue is accepted as converged +* when it is determined to lie in an interval [a,b] +* of width less than or equal to +* +* ABSTOL + EPS * max( |a|,|b| ) , +* +* where EPS is the machine precision. If ABSTOL is less than +* or equal to zero, then EPS*|T| will be used in its place, +* where |T| is the 1-norm of the tridiagonal matrix obtained +* by reducing AP to tridiagonal form. +* +* Eigenvalues will be computed most accurately when ABSTOL is +* set to twice the underflow threshold 2*DLAMCH('S'), not zero. +* If this routine returns with INFO>0, indicating that some +* eigenvectors did not converge, try setting ABSTOL to +* 2*DLAMCH('S'). +* +* M (output) INTEGER +* The total number of eigenvalues found. 0 <= M <= N. +* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +* +* W (output) DOUBLE PRECISION array, dimension (N) +* On normal exit, the first M elements contain the selected +* eigenvalues in ascending order. +* +* Z (output) COMPLEX*16 array, dimension (LDZ, N) +* If JOBZ = 'N', then Z is not referenced. +* If JOBZ = 'V', then if INFO = 0, the first M columns of Z +* contain the orthonormal eigenvectors of the matrix A +* corresponding to the selected eigenvalues, with the i-th +* column of Z holding the eigenvector associated with W(i). +* The eigenvectors are normalized as follows: +* if ITYPE = 1 or 2, Z**H*B*Z = I; +* if ITYPE = 3, Z**H*inv(B)*Z = I. +* +* If an eigenvector fails to converge, then that column of Z +* contains the latest approximation to the eigenvector, and the +* index of the eigenvector is returned in IFAIL. +* Note: the user must ensure that at least max(1,M) columns are +* supplied in the array Z; if RANGE = 'V', the exact value of M +* is not known in advance and an upper bound must be used. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (7*N) +* +* IWORK (workspace) INTEGER array, dimension (5*N) +* +* IFAIL (output) INTEGER array, dimension (N) +* If JOBZ = 'V', then if INFO = 0, the first M elements of +* IFAIL are zero. If INFO > 0, then IFAIL contains the +* indices of the eigenvectors that failed to converge. +* If JOBZ = 'N', then IFAIL is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: ZPPTRF or ZHPEVX returned an error code: +* <= N: if INFO = i, ZHPEVX failed to converge; +* i eigenvectors failed to converge. Their indices +* are stored in array IFAIL. +* > N: if INFO = N + i, for 1 <= i <= n, then the leading +* minor of order i of B is not positive definite. +* The factorization of B could not be completed and +* no eigenvalues or eigenvectors were computed. +* +* Further Details +* =============== +* +* Based on contributions by +* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, UPPER, VALEIG, WANTZ + CHARACTER TRANS + INTEGER J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHPEVX, ZHPGST, ZPPTRF, ZTPMV, ZTPSV +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + INFO = 0 + IF( ITYPE.LT.0 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -3 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN + INFO = -9 + ELSE IF( INDEIG .AND. IL.LT.1 ) THEN + INFO = -10 + ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) THEN + INFO = -11 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -16 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHPGVX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL ZPPTRF( UPLO, N, BP, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL ZHPGST( ITYPE, UPLO, N, AP, BP, INFO ) + CALL ZHPEVX( JOBZ, RANGE, UPLO, N, AP, VL, VU, IL, IU, ABSTOL, M, + $ W, Z, LDZ, WORK, RWORK, IWORK, IFAIL, INFO ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + IF( INFO.GT.0 ) + $ M = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)'*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'C' + END IF +* + DO 10 J = 1, M + CALL ZTPSV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 10 CONTINUE +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U'*y +* + IF( UPPER ) THEN + TRANS = 'C' + ELSE + TRANS = 'N' + END IF +* + DO 20 J = 1, M + CALL ZTPMV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 20 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZHPGVX +* + END diff --git a/costa/native/external/lapack/zhprfs.f b/costa/native/external/lapack/zhprfs.f new file mode 100644 index 000000000..4e822cfaa --- /dev/null +++ b/costa/native/external/lapack/zhprfs.f @@ -0,0 +1,337 @@ + SUBROUTINE ZHPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX, + $ FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) + COMPLEX*16 AFP( * ), AP( * ), B( LDB, * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* ZHPRFS improves the computed solution to a system of linear +* equations when the coefficient matrix is Hermitian indefinite +* and packed, and provides error bounds and backward error estimates +* for the solution. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AP (input) COMPLEX*16 array, dimension (N*(N+1)/2) +* The upper or lower triangle of the Hermitian matrix A, packed +* columnwise in a linear array. The j-th column of A is stored +* in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* +* AFP (input) COMPLEX*16 array, dimension (N*(N+1)/2) +* The factored form of the matrix A. AFP contains the block +* diagonal matrix D and the multipliers used to obtain the +* factor U or L from the factorization A = U*D*U**H or +* A = L*D*L**H as computed by ZHPTRF, stored as a packed +* triangular matrix. +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by ZHPTRF. +* +* B (input) COMPLEX*16 array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input/output) COMPLEX*16 array, dimension (LDX,NRHS) +* On entry, the solution matrix X, as computed by ZHPTRS. +* On exit, the improved solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Internal Parameters +* =================== +* +* ITMAX is the maximum number of steps of iterative refinement. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) + DOUBLE PRECISION THREE + PARAMETER ( THREE = 3.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, IK, J, K, KASE, KK, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX*16 ZDUM +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZCOPY, ZHPMV, ZHPTRS, ZLACON +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHPRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL ZCOPY( N, B( 1, J ), 1, WORK, 1 ) + CALL ZHPMV( UPLO, N, -ONE, AP, X( 1, J ), 1, ONE, WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + KK = 1 + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + IK = KK + DO 40 I = 1, K - 1 + RWORK( I ) = RWORK( I ) + CABS1( AP( IK ) )*XK + S = S + CABS1( AP( IK ) )*CABS1( X( I, J ) ) + IK = IK + 1 + 40 CONTINUE + RWORK( K ) = RWORK( K ) + ABS( DBLE( AP( KK+K-1 ) ) )* + $ XK + S + KK = KK + K + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + RWORK( K ) = RWORK( K ) + ABS( DBLE( AP( KK ) ) )*XK + IK = KK + 1 + DO 60 I = K + 1, N + RWORK( I ) = RWORK( I ) + CABS1( AP( IK ) )*XK + S = S + CABS1( AP( IK ) )*CABS1( X( I, J ) ) + IK = IK + 1 + 60 CONTINUE + RWORK( K ) = RWORK( K ) + S + KK = KK + ( N-K+1 ) + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL ZHPTRS( UPLO, N, 1, AFP, IPIV, WORK, N, INFO ) + CALL ZAXPY( N, ONE, WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use ZLACON to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL ZLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A'). +* + CALL ZHPTRS( UPLO, N, 1, AFP, IPIV, WORK, N, INFO ) + DO 110 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 120 CONTINUE + CALL ZHPTRS( UPLO, N, 1, AFP, IPIV, WORK, N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of ZHPRFS +* + END diff --git a/costa/native/external/lapack/zhpsv.f b/costa/native/external/lapack/zhpsv.f new file mode 100644 index 000000000..7c06670cd --- /dev/null +++ b/costa/native/external/lapack/zhpsv.f @@ -0,0 +1,149 @@ + SUBROUTINE ZHPSV( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 AP( * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* ZHPSV computes the solution to a complex system of linear equations +* A * X = B, +* where A is an N-by-N Hermitian matrix stored in packed format and X +* and B are N-by-NRHS matrices. +* +* The diagonal pivoting method is used to factor A as +* A = U * D * U**H, if UPLO = 'U', or +* A = L * D * L**H, if UPLO = 'L', +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices, D is Hermitian and block diagonal with 1-by-1 +* and 2-by-2 diagonal blocks. The factored form of A is then used to +* solve the system of equations A * X = B. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AP (input/output) COMPLEX*16 array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the Hermitian matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* See below for further details. +* +* On exit, the block diagonal matrix D and the multipliers used +* to obtain the factor U or L from the factorization +* A = U*D*U**H or A = L*D*L**H as computed by ZHPTRF, stored as +* a packed triangular matrix in the same storage format as A. +* +* IPIV (output) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D, as +* determined by ZHPTRF. If IPIV(k) > 0, then rows and columns +* k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1 +* diagonal block. If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, +* then rows and columns k-1 and -IPIV(k) were interchanged and +* D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If UPLO = 'L' and +* IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and +* -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 +* diagonal block. +* +* B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, D(i,i) is exactly zero. The factorization +* has been completed, but the block diagonal matrix D is +* exactly singular, so the solution could not be +* computed. +* +* Further Details +* =============== +* +* The packed storage scheme is illustrated by the following example +* when N = 4, UPLO = 'U': +* +* Two-dimensional storage of the Hermitian matrix A: +* +* a11 a12 a13 a14 +* a22 a23 a24 +* a33 a34 (aij = conjg(aji)) +* a44 +* +* Packed storage of the upper triangle of A: +* +* AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHPTRF, ZHPTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHPSV ', -INFO ) + RETURN + END IF +* +* Compute the factorization A = U*D*U' or A = L*D*L'. +* + CALL ZHPTRF( UPLO, N, AP, IPIV, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL ZHPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* + END IF + RETURN +* +* End of ZHPSV +* + END diff --git a/costa/native/external/lapack/zhpsvx.f b/costa/native/external/lapack/zhpsvx.f new file mode 100644 index 000000000..54303daff --- /dev/null +++ b/costa/native/external/lapack/zhpsvx.f @@ -0,0 +1,279 @@ + SUBROUTINE ZHPSVX( FACT, UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, + $ LDX, RCOND, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER FACT, UPLO + INTEGER INFO, LDB, LDX, N, NRHS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) + COMPLEX*16 AFP( * ), AP( * ), B( LDB, * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* ZHPSVX uses the diagonal pivoting factorization A = U*D*U**H or +* A = L*D*L**H to compute the solution to a complex system of linear +* equations A * X = B, where A is an N-by-N Hermitian matrix stored +* in packed format and X and B are N-by-NRHS matrices. +* +* Error bounds on the solution and a condition estimate are also +* provided. +* +* Description +* =========== +* +* The following steps are performed: +* +* 1. If FACT = 'N', the diagonal pivoting method is used to factor A as +* A = U * D * U**H, if UPLO = 'U', or +* A = L * D * L**H, if UPLO = 'L', +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices and D is Hermitian and block diagonal with +* 1-by-1 and 2-by-2 diagonal blocks. +* +* 2. If some D(i,i)=0, so that D is exactly singular, then the routine +* returns with INFO = i. Otherwise, the factored form of A is used +* to estimate the condition number of the matrix A. If the +* reciprocal of the condition number is less than machine precision, +* INFO = N+1 is returned as a warning, but the routine still goes on +* to solve for X and compute error bounds as described below. +* +* 3. The system of equations is solved for X using the factored form +* of A. +* +* 4. Iterative refinement is applied to improve the computed solution +* matrix and calculate error bounds and backward error estimates +* for it. +* +* Arguments +* ========= +* +* FACT (input) CHARACTER*1 +* Specifies whether or not the factored form of A has been +* supplied on entry. +* = 'F': On entry, AFP and IPIV contain the factored form of +* A. AFP and IPIV will not be modified. +* = 'N': The matrix A will be copied to AFP and factored. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AP (input) COMPLEX*16 array, dimension (N*(N+1)/2) +* The upper or lower triangle of the Hermitian matrix A, packed +* columnwise in a linear array. The j-th column of A is stored +* in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* See below for further details. +* +* AFP (input or output) COMPLEX*16 array, dimension (N*(N+1)/2) +* If FACT = 'F', then AFP is an input argument and on entry +* contains the block diagonal matrix D and the multipliers used +* to obtain the factor U or L from the factorization +* A = U*D*U**H or A = L*D*L**H as computed by ZHPTRF, stored as +* a packed triangular matrix in the same storage format as A. +* +* If FACT = 'N', then AFP is an output argument and on exit +* contains the block diagonal matrix D and the multipliers used +* to obtain the factor U or L from the factorization +* A = U*D*U**H or A = L*D*L**H as computed by ZHPTRF, stored as +* a packed triangular matrix in the same storage format as A. +* +* IPIV (input or output) INTEGER array, dimension (N) +* If FACT = 'F', then IPIV is an input argument and on entry +* contains details of the interchanges and the block structure +* of D, as determined by ZHPTRF. +* If IPIV(k) > 0, then rows and columns k and IPIV(k) were +* interchanged and D(k,k) is a 1-by-1 diagonal block. +* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +* +* If FACT = 'N', then IPIV is an output argument and on exit +* contains details of the interchanges and the block structure +* of D, as determined by ZHPTRF. +* +* B (input) COMPLEX*16 array, dimension (LDB,NRHS) +* The N-by-NRHS right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (output) COMPLEX*16 array, dimension (LDX,NRHS) +* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* RCOND (output) DOUBLE PRECISION +* The estimate of the reciprocal condition number of the matrix +* A. If RCOND is less than the machine precision (in +* particular, if RCOND = 0), the matrix is singular to working +* precision. This condition is indicated by a return code of +* INFO > 0. +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is +* <= N: D(i,i) is exactly zero. The factorization +* has been completed but the factor D is exactly +* singular, so the solution and error bounds could +* not be computed. RCOND = 0 is returned. +* = N+1: D is nonsingular, but RCOND is less than machine +* precision, meaning that the matrix is singular +* to working precision. Nevertheless, the +* solution and error bounds are computed because +* there are a number of situations where the +* computed solution can be more accurate than the +* value of RCOND would suggest. +* +* Further Details +* =============== +* +* The packed storage scheme is illustrated by the following example +* when N = 4, UPLO = 'U': +* +* Two-dimensional storage of the Hermitian matrix A: +* +* a11 a12 a13 a14 +* a22 a23 a24 +* a33 a34 (aij = conjg(aji)) +* a44 +* +* Packed storage of the upper triangle of A: +* +* AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOFACT + DOUBLE PRECISION ANORM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANHP + EXTERNAL LSAME, DLAMCH, ZLANHP +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZCOPY, ZHPCON, ZHPRFS, ZHPTRF, ZHPTRS, + $ ZLACPY +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHPSVX', -INFO ) + RETURN + END IF +* + IF( NOFACT ) THEN +* +* Compute the factorization A = U*D*U' or A = L*D*L'. +* + CALL ZCOPY( N*( N+1 ) / 2, AP, 1, AFP, 1 ) + CALL ZHPTRF( UPLO, N, AFP, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN + IF( INFO.GT.0 ) + $ RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = ZLANHP( 'I', UPLO, N, AP, RWORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL ZHPCON( UPLO, N, AFP, IPIV, ANORM, RCOND, WORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.DLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* +* Compute the solution vectors X. +* + CALL ZLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL ZHPTRS( UPLO, N, NRHS, AFP, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solutions and +* compute error bounds and backward error estimates for them. +* + CALL ZHPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX, FERR, + $ BERR, WORK, RWORK, INFO ) +* + RETURN +* +* End of ZHPSVX +* + END diff --git a/costa/native/external/lapack/zhptrd.f b/costa/native/external/lapack/zhptrd.f new file mode 100644 index 000000000..00536f6b5 --- /dev/null +++ b/costa/native/external/lapack/zhptrd.f @@ -0,0 +1,238 @@ + SUBROUTINE ZHPTRD( UPLO, N, AP, D, E, TAU, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ) + COMPLEX*16 AP( * ), TAU( * ) +* .. +* +* Purpose +* ======= +* +* ZHPTRD reduces a complex Hermitian matrix A stored in packed form to +* real symmetric tridiagonal form T by a unitary similarity +* transformation: Q**H * A * Q = T. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) COMPLEX*16 array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the Hermitian matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* On exit, if UPLO = 'U', the diagonal and first superdiagonal +* of A are overwritten by the corresponding elements of the +* tridiagonal matrix T, and the elements above the first +* superdiagonal, with the array TAU, represent the unitary +* matrix Q as a product of elementary reflectors; if UPLO +* = 'L', the diagonal and first subdiagonal of A are over- +* written by the corresponding elements of the tridiagonal +* matrix T, and the elements below the first subdiagonal, with +* the array TAU, represent the unitary matrix Q as a product +* of elementary reflectors. See Further Details. +* +* D (output) DOUBLE PRECISION array, dimension (N) +* The diagonal elements of the tridiagonal matrix T: +* D(i) = A(i,i). +* +* E (output) DOUBLE PRECISION array, dimension (N-1) +* The off-diagonal elements of the tridiagonal matrix T: +* E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. +* +* TAU (output) COMPLEX*16 array, dimension (N-1) +* The scalar factors of the elementary reflectors (see Further +* Details). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* If UPLO = 'U', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(n-1) . . . H(2) H(1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in AP, +* overwriting A(1:i-1,i+1), and tau is stored in TAU(i). +* +* If UPLO = 'L', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(1) H(2) . . . H(n-1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in AP, +* overwriting A(i+2:n,i), and tau is stored in TAU(i). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO, HALF + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ), + $ HALF = ( 0.5D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, I1, I1I1, II + COMPLEX*16 ALPHA, TAUI +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZHPMV, ZHPR2, ZLARFG +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX*16 ZDOTC + EXTERNAL LSAME, ZDOTC +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHPTRD', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Reduce the upper triangle of A. +* I1 is the index in AP of A(1,I+1). +* + I1 = N*( N-1 ) / 2 + 1 + AP( I1+N-1 ) = DBLE( AP( I1+N-1 ) ) + DO 10 I = N - 1, 1, -1 +* +* Generate elementary reflector H(i) = I - tau * v * v' +* to annihilate A(1:i-1,i+1) +* + ALPHA = AP( I1+I-1 ) + CALL ZLARFG( I, ALPHA, AP( I1 ), 1, TAUI ) + E( I ) = ALPHA +* + IF( TAUI.NE.ZERO ) THEN +* +* Apply H(i) from both sides to A(1:i,1:i) +* + AP( I1+I-1 ) = ONE +* +* Compute y := tau * A * v storing y in TAU(1:i) +* + CALL ZHPMV( UPLO, I, TAUI, AP, AP( I1 ), 1, ZERO, TAU, + $ 1 ) +* +* Compute w := y - 1/2 * tau * (y'*v) * v +* + ALPHA = -HALF*TAUI*ZDOTC( I, TAU, 1, AP( I1 ), 1 ) + CALL ZAXPY( I, ALPHA, AP( I1 ), 1, TAU, 1 ) +* +* Apply the transformation as a rank-2 update: +* A := A - v * w' - w * v' +* + CALL ZHPR2( UPLO, I, -ONE, AP( I1 ), 1, TAU, 1, AP ) +* + END IF + AP( I1+I-1 ) = E( I ) + D( I+1 ) = AP( I1+I ) + TAU( I ) = TAUI + I1 = I1 - I + 10 CONTINUE + D( 1 ) = AP( 1 ) + ELSE +* +* Reduce the lower triangle of A. II is the index in AP of +* A(i,i) and I1I1 is the index of A(i+1,i+1). +* + II = 1 + AP( 1 ) = DBLE( AP( 1 ) ) + DO 20 I = 1, N - 1 + I1I1 = II + N - I + 1 +* +* Generate elementary reflector H(i) = I - tau * v * v' +* to annihilate A(i+2:n,i) +* + ALPHA = AP( II+1 ) + CALL ZLARFG( N-I, ALPHA, AP( II+2 ), 1, TAUI ) + E( I ) = ALPHA +* + IF( TAUI.NE.ZERO ) THEN +* +* Apply H(i) from both sides to A(i+1:n,i+1:n) +* + AP( II+1 ) = ONE +* +* Compute y := tau * A * v storing y in TAU(i:n-1) +* + CALL ZHPMV( UPLO, N-I, TAUI, AP( I1I1 ), AP( II+1 ), 1, + $ ZERO, TAU( I ), 1 ) +* +* Compute w := y - 1/2 * tau * (y'*v) * v +* + ALPHA = -HALF*TAUI*ZDOTC( N-I, TAU( I ), 1, AP( II+1 ), + $ 1 ) + CALL ZAXPY( N-I, ALPHA, AP( II+1 ), 1, TAU( I ), 1 ) +* +* Apply the transformation as a rank-2 update: +* A := A - v * w' - w * v' +* + CALL ZHPR2( UPLO, N-I, -ONE, AP( II+1 ), 1, TAU( I ), 1, + $ AP( I1I1 ) ) +* + END IF + AP( II+1 ) = E( I ) + D( I ) = AP( II ) + TAU( I ) = TAUI + II = I1I1 + 20 CONTINUE + D( N ) = AP( II ) + END IF +* + RETURN +* +* End of ZHPTRD +* + END diff --git a/costa/native/external/lapack/zhptrf.f b/costa/native/external/lapack/zhptrf.f new file mode 100644 index 000000000..08ff6b66d --- /dev/null +++ b/costa/native/external/lapack/zhptrf.f @@ -0,0 +1,582 @@ + SUBROUTINE ZHPTRF( UPLO, N, AP, IPIV, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 AP( * ) +* .. +* +* Purpose +* ======= +* +* ZHPTRF computes the factorization of a complex Hermitian packed +* matrix A using the Bunch-Kaufman diagonal pivoting method: +* +* A = U*D*U**H or A = L*D*L**H +* +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices, and D is Hermitian and block diagonal with +* 1-by-1 and 2-by-2 diagonal blocks. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) COMPLEX*16 array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the Hermitian matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* +* On exit, the block diagonal matrix D and the multipliers used +* to obtain the factor U or L, stored as a packed triangular +* matrix overwriting A (see below for further details). +* +* IPIV (output) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D. +* If IPIV(k) > 0, then rows and columns k and IPIV(k) were +* interchanged and D(k,k) is a 1-by-1 diagonal block. +* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, D(i,i) is exactly zero. The factorization +* has been completed, but the block diagonal matrix D is +* exactly singular, and division by zero will occur if it +* is used to solve a system of equations. +* +* Further Details +* =============== +* +* 5-96 - Based on modifications by J. Lewis, Boeing Computer Services +* Company +* +* If UPLO = 'U', then A = U*D*U', where +* U = P(n)*U(n)* ... *P(k)U(k)* ..., +* i.e., U is a product of terms P(k)*U(k), where k decreases from n to +* 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +* defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +* that if the diagonal block D(k) is of order s (s = 1 or 2), then +* +* ( I v 0 ) k-s +* U(k) = ( 0 I 0 ) s +* ( 0 0 I ) n-k +* k-s s n-k +* +* If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +* If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +* and A(k,k), and v overwrites A(1:k-2,k-1:k). +* +* If UPLO = 'L', then A = L*D*L', where +* L = P(1)*L(1)* ... *P(k)*L(k)* ..., +* i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +* n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +* defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +* that if the diagonal block D(k) is of order s (s = 1 or 2), then +* +* ( I 0 0 ) k-1 +* L(k) = ( 0 I 0 ) s +* ( 0 v I ) n-k-s+1 +* k-1 s n-k-s+1 +* +* If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +* If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +* and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0D+0, SEVTEN = 17.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IMAX, J, JMAX, K, KC, KK, KNC, KP, KPC, + $ KSTEP, KX, NPP + DOUBLE PRECISION ABSAKK, ALPHA, COLMAX, D, D11, D22, R1, ROWMAX, + $ TT + COMPLEX*16 D12, D21, T, WK, WKM1, WKP1, ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX + DOUBLE PRECISION DLAPY2 + EXTERNAL LSAME, IZAMAX, DLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDSCAL, ZHPR, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX, SQRT +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHPTRF', -INFO ) + RETURN + END IF +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U' using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2 +* + K = N + KC = ( N-1 )*N / 2 + 1 + 10 CONTINUE + KNC = KC +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 110 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( DBLE( AP( KC+K-1 ) ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.GT.1 ) THEN + IMAX = IZAMAX( K-1, AP( KC ), 1 ) + COLMAX = CABS1( AP( KC+IMAX-1 ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + AP( KC+K-1 ) = DBLE( AP( KC+K-1 ) ) + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + ROWMAX = ZERO + JMAX = IMAX + KX = IMAX*( IMAX+1 ) / 2 + IMAX + DO 20 J = IMAX + 1, K + IF( CABS1( AP( KX ) ).GT.ROWMAX ) THEN + ROWMAX = CABS1( AP( KX ) ) + JMAX = J + END IF + KX = KX + J + 20 CONTINUE + KPC = ( IMAX-1 )*IMAX / 2 + 1 + IF( IMAX.GT.1 ) THEN + JMAX = IZAMAX( IMAX-1, AP( KPC ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( AP( KPC+JMAX-1 ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( DBLE( AP( KPC+IMAX-1 ) ) ).GE.ALPHA* + $ ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K-1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K - KSTEP + 1 + IF( KSTEP.EQ.2 ) + $ KNC = KNC - K + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the leading +* submatrix A(1:k,1:k) +* + CALL ZSWAP( KP-1, AP( KNC ), 1, AP( KPC ), 1 ) + KX = KPC + KP - 1 + DO 30 J = KP + 1, KK - 1 + KX = KX + J - 1 + T = DCONJG( AP( KNC+J-1 ) ) + AP( KNC+J-1 ) = DCONJG( AP( KX ) ) + AP( KX ) = T + 30 CONTINUE + AP( KX+KK-1 ) = DCONJG( AP( KX+KK-1 ) ) + R1 = DBLE( AP( KNC+KK-1 ) ) + AP( KNC+KK-1 ) = DBLE( AP( KPC+KP-1 ) ) + AP( KPC+KP-1 ) = R1 + IF( KSTEP.EQ.2 ) THEN + AP( KC+K-1 ) = DBLE( AP( KC+K-1 ) ) + T = AP( KC+K-2 ) + AP( KC+K-2 ) = AP( KC+KP-1 ) + AP( KC+KP-1 ) = T + END IF + ELSE + AP( KC+K-1 ) = DBLE( AP( KC+K-1 ) ) + IF( KSTEP.EQ.2 ) + $ AP( KC-1 ) = DBLE( AP( KC-1 ) ) + END IF +* +* Update the leading submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* +* Perform a rank-1 update of A(1:k-1,1:k-1) as +* +* A := A - U(k)*D(k)*U(k)' = A - W(k)*1/D(k)*W(k)' +* + R1 = ONE / DBLE( AP( KC+K-1 ) ) + CALL ZHPR( UPLO, K-1, -R1, AP( KC ), 1, AP ) +* +* Store U(k) in column k +* + CALL ZDSCAL( K-1, R1, AP( KC ), 1 ) + ELSE +* +* 2-by-2 pivot block D(k): columns k and k-1 now hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* Perform a rank-2 update of A(1:k-2,1:k-2) as +* +* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )' +* = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )' +* + IF( K.GT.2 ) THEN +* + D = DLAPY2( DBLE( AP( K-1+( K-1 )*K / 2 ) ), + $ DIMAG( AP( K-1+( K-1 )*K / 2 ) ) ) + D22 = DBLE( AP( K-1+( K-2 )*( K-1 ) / 2 ) ) / D + D11 = DBLE( AP( K+( K-1 )*K / 2 ) ) / D + TT = ONE / ( D11*D22-ONE ) + D12 = AP( K-1+( K-1 )*K / 2 ) / D + D = TT / D +* + DO 50 J = K - 2, 1, -1 + WKM1 = D*( D11*AP( J+( K-2 )*( K-1 ) / 2 )- + $ DCONJG( D12 )*AP( J+( K-1 )*K / 2 ) ) + WK = D*( D22*AP( J+( K-1 )*K / 2 )-D12* + $ AP( J+( K-2 )*( K-1 ) / 2 ) ) + DO 40 I = J, 1, -1 + AP( I+( J-1 )*J / 2 ) = AP( I+( J-1 )*J / 2 ) - + $ AP( I+( K-1 )*K / 2 )*DCONJG( WK ) - + $ AP( I+( K-2 )*( K-1 ) / 2 )*DCONJG( WKM1 ) + 40 CONTINUE + AP( J+( K-1 )*K / 2 ) = WK + AP( J+( K-2 )*( K-1 ) / 2 ) = WKM1 + AP( J+( J-1 )*J / 2 ) = DCMPLX( DBLE( AP( J+( J- + $ 1 )*J / 2 ) ), 0.0D+0 ) + 50 CONTINUE +* + END IF +* + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + KC = KNC - K + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L' using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2 +* + K = 1 + KC = 1 + NPP = N*( N+1 ) / 2 + 60 CONTINUE + KNC = KC +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 110 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( DBLE( AP( KC ) ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.LT.N ) THEN + IMAX = K + IZAMAX( N-K, AP( KC+1 ), 1 ) + COLMAX = CABS1( AP( KC+IMAX-K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + AP( KC ) = DBLE( AP( KC ) ) + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + ROWMAX = ZERO + KX = KC + IMAX - K + DO 70 J = K, IMAX - 1 + IF( CABS1( AP( KX ) ).GT.ROWMAX ) THEN + ROWMAX = CABS1( AP( KX ) ) + JMAX = J + END IF + KX = KX + N - J + 70 CONTINUE + KPC = NPP - ( N-IMAX+1 )*( N-IMAX+2 ) / 2 + 1 + IF( IMAX.LT.N ) THEN + JMAX = IMAX + IZAMAX( N-IMAX, AP( KPC+1 ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( AP( KPC+JMAX-IMAX ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( DBLE( AP( KPC ) ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K+1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K + KSTEP - 1 + IF( KSTEP.EQ.2 ) + $ KNC = KNC + N - K + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the trailing +* submatrix A(k:n,k:n) +* + IF( KP.LT.N ) + $ CALL ZSWAP( N-KP, AP( KNC+KP-KK+1 ), 1, AP( KPC+1 ), + $ 1 ) + KX = KNC + KP - KK + DO 80 J = KK + 1, KP - 1 + KX = KX + N - J + 1 + T = DCONJG( AP( KNC+J-KK ) ) + AP( KNC+J-KK ) = DCONJG( AP( KX ) ) + AP( KX ) = T + 80 CONTINUE + AP( KNC+KP-KK ) = DCONJG( AP( KNC+KP-KK ) ) + R1 = DBLE( AP( KNC ) ) + AP( KNC ) = DBLE( AP( KPC ) ) + AP( KPC ) = R1 + IF( KSTEP.EQ.2 ) THEN + AP( KC ) = DBLE( AP( KC ) ) + T = AP( KC+1 ) + AP( KC+1 ) = AP( KC+KP-K ) + AP( KC+KP-K ) = T + END IF + ELSE + AP( KC ) = DBLE( AP( KC ) ) + IF( KSTEP.EQ.2 ) + $ AP( KNC ) = DBLE( AP( KNC ) ) + END IF +* +* Update the trailing submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* + IF( K.LT.N ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* +* A := A - L(k)*D(k)*L(k)' = A - W(k)*(1/D(k))*W(k)' +* + R1 = ONE / DBLE( AP( KC ) ) + CALL ZHPR( UPLO, N-K, -R1, AP( KC+1 ), 1, + $ AP( KC+N-K+1 ) ) +* +* Store L(k) in column K +* + CALL ZDSCAL( N-K, R1, AP( KC+1 ), 1 ) + END IF + ELSE +* +* 2-by-2 pivot block D(k): columns K and K+1 now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* + IF( K.LT.N-1 ) THEN +* +* Perform a rank-2 update of A(k+2:n,k+2:n) as +* +* A := A - ( L(k) L(k+1) )*D(k)*( L(k) L(k+1) )' +* = A - ( W(k) W(k+1) )*inv(D(k))*( W(k) W(k+1) )' +* +* where L(k) and L(k+1) are the k-th and (k+1)-th +* columns of L +* + D = DLAPY2( DBLE( AP( K+1+( K-1 )*( 2*N-K ) / 2 ) ), + $ DIMAG( AP( K+1+( K-1 )*( 2*N-K ) / 2 ) ) ) + D11 = DBLE( AP( K+1+K*( 2*N-K-1 ) / 2 ) ) / D + D22 = DBLE( AP( K+( K-1 )*( 2*N-K ) / 2 ) ) / D + TT = ONE / ( D11*D22-ONE ) + D21 = AP( K+1+( K-1 )*( 2*N-K ) / 2 ) / D + D = TT / D +* + DO 100 J = K + 2, N + WK = D*( D11*AP( J+( K-1 )*( 2*N-K ) / 2 )-D21* + $ AP( J+K*( 2*N-K-1 ) / 2 ) ) + WKP1 = D*( D22*AP( J+K*( 2*N-K-1 ) / 2 )- + $ DCONJG( D21 )*AP( J+( K-1 )*( 2*N-K ) / + $ 2 ) ) + DO 90 I = J, N + AP( I+( J-1 )*( 2*N-J ) / 2 ) = AP( I+( J-1 )* + $ ( 2*N-J ) / 2 ) - AP( I+( K-1 )*( 2*N-K ) / + $ 2 )*DCONJG( WK ) - AP( I+K*( 2*N-K-1 ) / 2 )* + $ DCONJG( WKP1 ) + 90 CONTINUE + AP( J+( K-1 )*( 2*N-K ) / 2 ) = WK + AP( J+K*( 2*N-K-1 ) / 2 ) = WKP1 + AP( J+( J-1 )*( 2*N-J ) / 2 ) + $ = DCMPLX( DBLE( AP( J+( J-1 )*( 2*N-J ) / 2 ) ), + $ 0.0D+0 ) + 100 CONTINUE + END IF + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + KC = KNC + N - K + 2 + GO TO 60 +* + END IF +* + 110 CONTINUE + RETURN +* +* End of ZHPTRF +* + END diff --git a/costa/native/external/lapack/zhptri.f b/costa/native/external/lapack/zhptri.f new file mode 100644 index 000000000..5873b61b1 --- /dev/null +++ b/costa/native/external/lapack/zhptri.f @@ -0,0 +1,344 @@ + SUBROUTINE ZHPTRI( UPLO, N, AP, IPIV, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 AP( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZHPTRI computes the inverse of a complex Hermitian indefinite matrix +* A in packed storage using the factorization A = U*D*U**H or +* A = L*D*L**H computed by ZHPTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the details of the factorization are stored +* as an upper or lower triangular matrix. +* = 'U': Upper triangular, form is A = U*D*U**H; +* = 'L': Lower triangular, form is A = L*D*L**H. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) COMPLEX*16 array, dimension (N*(N+1)/2) +* On entry, the block diagonal matrix D and the multipliers +* used to obtain the factor U or L as computed by ZHPTRF, +* stored as a packed triangular matrix. +* +* On exit, if INFO = 0, the (Hermitian) inverse of the original +* matrix, stored as a packed triangular matrix. The j-th column +* of inv(A) is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = inv(A)(i,j) for 1<=i<=j; +* if UPLO = 'L', +* AP(i + (j-1)*(2n-j)/2) = inv(A)(i,j) for j<=i<=n. +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by ZHPTRF. +* +* WORK (workspace) COMPLEX*16 array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +* inverse could not be computed. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + COMPLEX*16 CONE, ZERO + PARAMETER ( ONE = 1.0D+0, CONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KC, KCNEXT, KP, KPC, KSTEP, KX, NPP + DOUBLE PRECISION AK, AKP1, D, T + COMPLEX*16 AKKP1, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX*16 ZDOTC + EXTERNAL LSAME, ZDOTC +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZCOPY, ZHPMV, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCONJG +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHPTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + KP = N*( N+1 ) / 2 + DO 10 INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. AP( KP ).EQ.ZERO ) + $ RETURN + KP = KP - INFO + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + KP = 1 + DO 20 INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. AP( KP ).EQ.ZERO ) + $ RETURN + KP = KP + N - INFO + 1 + 20 CONTINUE + END IF + INFO = 0 +* + IF( UPPER ) THEN +* +* Compute inv(A) from the factorization A = U*D*U'. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + KC = 1 + 30 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + KCNEXT = KC + K + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + AP( KC+K-1 ) = ONE / DBLE( AP( KC+K-1 ) ) +* +* Compute column K of the inverse. +* + IF( K.GT.1 ) THEN + CALL ZCOPY( K-1, AP( KC ), 1, WORK, 1 ) + CALL ZHPMV( UPLO, K-1, -CONE, AP, WORK, 1, ZERO, + $ AP( KC ), 1 ) + AP( KC+K-1 ) = AP( KC+K-1 ) - + $ DBLE( ZDOTC( K-1, WORK, 1, AP( KC ), 1 ) ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = ABS( AP( KCNEXT+K-1 ) ) + AK = DBLE( AP( KC+K-1 ) ) / T + AKP1 = DBLE( AP( KCNEXT+K ) ) / T + AKKP1 = AP( KCNEXT+K-1 ) / T + D = T*( AK*AKP1-ONE ) + AP( KC+K-1 ) = AKP1 / D + AP( KCNEXT+K ) = AK / D + AP( KCNEXT+K-1 ) = -AKKP1 / D +* +* Compute columns K and K+1 of the inverse. +* + IF( K.GT.1 ) THEN + CALL ZCOPY( K-1, AP( KC ), 1, WORK, 1 ) + CALL ZHPMV( UPLO, K-1, -CONE, AP, WORK, 1, ZERO, + $ AP( KC ), 1 ) + AP( KC+K-1 ) = AP( KC+K-1 ) - + $ DBLE( ZDOTC( K-1, WORK, 1, AP( KC ), 1 ) ) + AP( KCNEXT+K-1 ) = AP( KCNEXT+K-1 ) - + $ ZDOTC( K-1, AP( KC ), 1, AP( KCNEXT ), + $ 1 ) + CALL ZCOPY( K-1, AP( KCNEXT ), 1, WORK, 1 ) + CALL ZHPMV( UPLO, K-1, -CONE, AP, WORK, 1, ZERO, + $ AP( KCNEXT ), 1 ) + AP( KCNEXT+K ) = AP( KCNEXT+K ) - + $ DBLE( ZDOTC( K-1, WORK, 1, AP( KCNEXT ), + $ 1 ) ) + END IF + KSTEP = 2 + KCNEXT = KCNEXT + K + 1 + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the leading +* submatrix A(1:k+1,1:k+1) +* + KPC = ( KP-1 )*KP / 2 + 1 + CALL ZSWAP( KP-1, AP( KC ), 1, AP( KPC ), 1 ) + KX = KPC + KP - 1 + DO 40 J = KP + 1, K - 1 + KX = KX + J - 1 + TEMP = DCONJG( AP( KC+J-1 ) ) + AP( KC+J-1 ) = DCONJG( AP( KX ) ) + AP( KX ) = TEMP + 40 CONTINUE + AP( KC+KP-1 ) = DCONJG( AP( KC+KP-1 ) ) + TEMP = AP( KC+K-1 ) + AP( KC+K-1 ) = AP( KPC+KP-1 ) + AP( KPC+KP-1 ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = AP( KC+K+K-1 ) + AP( KC+K+K-1 ) = AP( KC+K+KP-1 ) + AP( KC+K+KP-1 ) = TEMP + END IF + END IF +* + K = K + KSTEP + KC = KCNEXT + GO TO 30 + 50 CONTINUE +* + ELSE +* +* Compute inv(A) from the factorization A = L*D*L'. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + NPP = N*( N+1 ) / 2 + K = N + KC = NPP + 60 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 80 +* + KCNEXT = KC - ( N-K+2 ) + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + AP( KC ) = ONE / DBLE( AP( KC ) ) +* +* Compute column K of the inverse. +* + IF( K.LT.N ) THEN + CALL ZCOPY( N-K, AP( KC+1 ), 1, WORK, 1 ) + CALL ZHPMV( UPLO, N-K, -CONE, AP( KC+N-K+1 ), WORK, 1, + $ ZERO, AP( KC+1 ), 1 ) + AP( KC ) = AP( KC ) - DBLE( ZDOTC( N-K, WORK, 1, + $ AP( KC+1 ), 1 ) ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = ABS( AP( KCNEXT+1 ) ) + AK = DBLE( AP( KCNEXT ) ) / T + AKP1 = DBLE( AP( KC ) ) / T + AKKP1 = AP( KCNEXT+1 ) / T + D = T*( AK*AKP1-ONE ) + AP( KCNEXT ) = AKP1 / D + AP( KC ) = AK / D + AP( KCNEXT+1 ) = -AKKP1 / D +* +* Compute columns K-1 and K of the inverse. +* + IF( K.LT.N ) THEN + CALL ZCOPY( N-K, AP( KC+1 ), 1, WORK, 1 ) + CALL ZHPMV( UPLO, N-K, -CONE, AP( KC+( N-K+1 ) ), WORK, + $ 1, ZERO, AP( KC+1 ), 1 ) + AP( KC ) = AP( KC ) - DBLE( ZDOTC( N-K, WORK, 1, + $ AP( KC+1 ), 1 ) ) + AP( KCNEXT+1 ) = AP( KCNEXT+1 ) - + $ ZDOTC( N-K, AP( KC+1 ), 1, + $ AP( KCNEXT+2 ), 1 ) + CALL ZCOPY( N-K, AP( KCNEXT+2 ), 1, WORK, 1 ) + CALL ZHPMV( UPLO, N-K, -CONE, AP( KC+( N-K+1 ) ), WORK, + $ 1, ZERO, AP( KCNEXT+2 ), 1 ) + AP( KCNEXT ) = AP( KCNEXT ) - + $ DBLE( ZDOTC( N-K, WORK, 1, AP( KCNEXT+2 ), + $ 1 ) ) + END IF + KSTEP = 2 + KCNEXT = KCNEXT - ( N-K+3 ) + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the trailing +* submatrix A(k-1:n,k-1:n) +* + KPC = NPP - ( N-KP+1 )*( N-KP+2 ) / 2 + 1 + IF( KP.LT.N ) + $ CALL ZSWAP( N-KP, AP( KC+KP-K+1 ), 1, AP( KPC+1 ), 1 ) + KX = KC + KP - K + DO 70 J = K + 1, KP - 1 + KX = KX + N - J + 1 + TEMP = DCONJG( AP( KC+J-K ) ) + AP( KC+J-K ) = DCONJG( AP( KX ) ) + AP( KX ) = TEMP + 70 CONTINUE + AP( KC+KP-K ) = DCONJG( AP( KC+KP-K ) ) + TEMP = AP( KC ) + AP( KC ) = AP( KPC ) + AP( KPC ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = AP( KC-N+K-1 ) + AP( KC-N+K-1 ) = AP( KC-N+KP-1 ) + AP( KC-N+KP-1 ) = TEMP + END IF + END IF +* + K = K - KSTEP + KC = KCNEXT + GO TO 60 + 80 CONTINUE + END IF +* + RETURN +* +* End of ZHPTRI +* + END diff --git a/costa/native/external/lapack/zhptrs.f b/costa/native/external/lapack/zhptrs.f new file mode 100644 index 000000000..c2ed8233e --- /dev/null +++ b/costa/native/external/lapack/zhptrs.f @@ -0,0 +1,402 @@ + SUBROUTINE ZHPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 AP( * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* ZHPTRS solves a system of linear equations A*X = B with a complex +* Hermitian matrix A stored in packed format using the factorization +* A = U*D*U**H or A = L*D*L**H computed by ZHPTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the details of the factorization are stored +* as an upper or lower triangular matrix. +* = 'U': Upper triangular, form is A = U*D*U**H; +* = 'L': Lower triangular, form is A = L*D*L**H. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AP (input) COMPLEX*16 array, dimension (N*(N+1)/2) +* The block diagonal matrix D and the multipliers used to +* obtain the factor U or L as computed by ZHPTRF, stored as a +* packed triangular matrix. +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by ZHPTRF. +* +* B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KC, KP + DOUBLE PRECISION S + COMPLEX*16 AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDSCAL, ZGEMV, ZGERU, ZLACGV, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCONJG, MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHPTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*D*U'. +* +* First solve U*D*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + KC = N*( N+1 ) / 2 + 1 + 10 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 30 +* + KC = KC - K + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL ZGERU( K-1, NRHS, -ONE, AP( KC ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + S = DBLE( ONE ) / DBLE( AP( KC+K-1 ) ) + CALL ZDSCAL( NRHS, S, B( K, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K-1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K-1 ) + $ CALL ZSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in columns K-1 and K of A. +* + CALL ZGERU( K-2, NRHS, -ONE, AP( KC ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) + CALL ZGERU( K-2, NRHS, -ONE, AP( KC-( K-1 ) ), 1, + $ B( K-1, 1 ), LDB, B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = AP( KC+K-2 ) + AKM1 = AP( KC-1 ) / AKM1K + AK = AP( KC+K-1 ) / DCONJG( AKM1K ) + DENOM = AKM1*AK - ONE + DO 20 J = 1, NRHS + BKM1 = B( K-1, J ) / AKM1K + BK = B( K, J ) / DCONJG( AKM1K ) + B( K-1, J ) = ( AK*BKM1-BK ) / DENOM + B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM + 20 CONTINUE + KC = KC - K + 1 + K = K - 2 + END IF +* + GO TO 10 + 30 CONTINUE +* +* Next solve U'*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + KC = 1 + 40 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(U'(K)), where U(K) is the transformation +* stored in column K of A. +* + IF( K.GT.1 ) THEN + CALL ZLACGV( NRHS, B( K, 1 ), LDB ) + CALL ZGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B, + $ LDB, AP( KC ), 1, ONE, B( K, 1 ), LDB ) + CALL ZLACGV( NRHS, B( K, 1 ), LDB ) + END IF +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + KC = KC + K + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(U'(K+1)), where U(K+1) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.GT.1 ) THEN + CALL ZLACGV( NRHS, B( K, 1 ), LDB ) + CALL ZGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B, + $ LDB, AP( KC ), 1, ONE, B( K, 1 ), LDB ) + CALL ZLACGV( NRHS, B( K, 1 ), LDB ) +* + CALL ZLACGV( NRHS, B( K+1, 1 ), LDB ) + CALL ZGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B, + $ LDB, AP( KC+K ), 1, ONE, B( K+1, 1 ), LDB ) + CALL ZLACGV( NRHS, B( K+1, 1 ), LDB ) + END IF +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + KC = KC + 2*K + 1 + K = K + 2 + END IF +* + GO TO 40 + 50 CONTINUE +* + ELSE +* +* Solve A*X = B, where A = L*D*L'. +* +* First solve L*D*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + KC = 1 + 60 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 80 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL ZGERU( N-K, NRHS, -ONE, AP( KC+1 ), 1, B( K, 1 ), + $ LDB, B( K+1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + S = DBLE( ONE ) / DBLE( AP( KC ) ) + CALL ZDSCAL( NRHS, S, B( K, 1 ), LDB ) + KC = KC + N - K + 1 + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K+1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K+1 ) + $ CALL ZSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.LT.N-1 ) THEN + CALL ZGERU( N-K-1, NRHS, -ONE, AP( KC+2 ), 1, B( K, 1 ), + $ LDB, B( K+2, 1 ), LDB ) + CALL ZGERU( N-K-1, NRHS, -ONE, AP( KC+N-K+2 ), 1, + $ B( K+1, 1 ), LDB, B( K+2, 1 ), LDB ) + END IF +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = AP( KC+1 ) + AKM1 = AP( KC ) / DCONJG( AKM1K ) + AK = AP( KC+N-K+1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 70 J = 1, NRHS + BKM1 = B( K, J ) / DCONJG( AKM1K ) + BK = B( K+1, J ) / AKM1K + B( K, J ) = ( AK*BKM1-BK ) / DENOM + B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + 70 CONTINUE + KC = KC + 2*( N-K ) + 1 + K = K + 2 + END IF +* + GO TO 60 + 80 CONTINUE +* +* Next solve L'*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + KC = N*( N+1 ) / 2 + 1 + 90 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 100 +* + KC = KC - ( N-K+1 ) + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(L'(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) THEN + CALL ZLACGV( NRHS, B( K, 1 ), LDB ) + CALL ZGEMV( 'Conjugate transpose', N-K, NRHS, -ONE, + $ B( K+1, 1 ), LDB, AP( KC+1 ), 1, ONE, + $ B( K, 1 ), LDB ) + CALL ZLACGV( NRHS, B( K, 1 ), LDB ) + END IF +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(L'(K-1)), where L(K-1) is the transformation +* stored in columns K-1 and K of A. +* + IF( K.LT.N ) THEN + CALL ZLACGV( NRHS, B( K, 1 ), LDB ) + CALL ZGEMV( 'Conjugate transpose', N-K, NRHS, -ONE, + $ B( K+1, 1 ), LDB, AP( KC+1 ), 1, ONE, + $ B( K, 1 ), LDB ) + CALL ZLACGV( NRHS, B( K, 1 ), LDB ) +* + CALL ZLACGV( NRHS, B( K-1, 1 ), LDB ) + CALL ZGEMV( 'Conjugate transpose', N-K, NRHS, -ONE, + $ B( K+1, 1 ), LDB, AP( KC-( N-K ) ), 1, ONE, + $ B( K-1, 1 ), LDB ) + CALL ZLACGV( NRHS, B( K-1, 1 ), LDB ) + END IF +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + KC = KC - ( N-K+2 ) + K = K - 2 + END IF +* + GO TO 90 + 100 CONTINUE + END IF +* + RETURN +* +* End of ZHPTRS +* + END diff --git a/costa/native/external/lapack/zhsein.f b/costa/native/external/lapack/zhsein.f new file mode 100644 index 000000000..c6f38150c --- /dev/null +++ b/costa/native/external/lapack/zhsein.f @@ -0,0 +1,351 @@ + SUBROUTINE ZHSEIN( SIDE, EIGSRC, INITV, SELECT, N, H, LDH, W, VL, + $ LDVL, VR, LDVR, MM, M, WORK, RWORK, IFAILL, + $ IFAILR, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER EIGSRC, INITV, SIDE + INTEGER INFO, LDH, LDVL, LDVR, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + INTEGER IFAILL( * ), IFAILR( * ) + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 H( LDH, * ), VL( LDVL, * ), VR( LDVR, * ), + $ W( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZHSEIN uses inverse iteration to find specified right and/or left +* eigenvectors of a complex upper Hessenberg matrix H. +* +* The right eigenvector x and the left eigenvector y of the matrix H +* corresponding to an eigenvalue w are defined by: +* +* H * x = w * x, y**h * H = w * y**h +* +* where y**h denotes the conjugate transpose of the vector y. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'R': compute right eigenvectors only; +* = 'L': compute left eigenvectors only; +* = 'B': compute both right and left eigenvectors. +* +* EIGSRC (input) CHARACTER*1 +* Specifies the source of eigenvalues supplied in W: +* = 'Q': the eigenvalues were found using ZHSEQR; thus, if +* H has zero subdiagonal elements, and so is +* block-triangular, then the j-th eigenvalue can be +* assumed to be an eigenvalue of the block containing +* the j-th row/column. This property allows ZHSEIN to +* perform inverse iteration on just one diagonal block. +* = 'N': no assumptions are made on the correspondence +* between eigenvalues and diagonal blocks. In this +* case, ZHSEIN must always perform inverse iteration +* using the whole matrix H. +* +* INITV (input) CHARACTER*1 +* = 'N': no initial vectors are supplied; +* = 'U': user-supplied initial vectors are stored in the arrays +* VL and/or VR. +* +* SELECT (input) LOGICAL array, dimension (N) +* Specifies the eigenvectors to be computed. To select the +* eigenvector corresponding to the eigenvalue W(j), +* SELECT(j) must be set to .TRUE.. +* +* N (input) INTEGER +* The order of the matrix H. N >= 0. +* +* H (input) COMPLEX*16 array, dimension (LDH,N) +* The upper Hessenberg matrix H. +* +* LDH (input) INTEGER +* The leading dimension of the array H. LDH >= max(1,N). +* +* W (input/output) COMPLEX*16 array, dimension (N) +* On entry, the eigenvalues of H. +* On exit, the real parts of W may have been altered since +* close eigenvalues are perturbed slightly in searching for +* independent eigenvectors. +* +* VL (input/output) COMPLEX*16 array, dimension (LDVL,MM) +* On entry, if INITV = 'U' and SIDE = 'L' or 'B', VL must +* contain starting vectors for the inverse iteration for the +* left eigenvectors; the starting vector for each eigenvector +* must be in the same column in which the eigenvector will be +* stored. +* On exit, if SIDE = 'L' or 'B', the left eigenvectors +* specified by SELECT will be stored consecutively in the +* columns of VL, in the same order as their eigenvalues. +* If SIDE = 'R', VL is not referenced. +* +* LDVL (input) INTEGER +* The leading dimension of the array VL. +* LDVL >= max(1,N) if SIDE = 'L' or 'B'; LDVL >= 1 otherwise. +* +* VR (input/output) COMPLEX*16 array, dimension (LDVR,MM) +* On entry, if INITV = 'U' and SIDE = 'R' or 'B', VR must +* contain starting vectors for the inverse iteration for the +* right eigenvectors; the starting vector for each eigenvector +* must be in the same column in which the eigenvector will be +* stored. +* On exit, if SIDE = 'R' or 'B', the right eigenvectors +* specified by SELECT will be stored consecutively in the +* columns of VR, in the same order as their eigenvalues. +* If SIDE = 'L', VR is not referenced. +* +* LDVR (input) INTEGER +* The leading dimension of the array VR. +* LDVR >= max(1,N) if SIDE = 'R' or 'B'; LDVR >= 1 otherwise. +* +* MM (input) INTEGER +* The number of columns in the arrays VL and/or VR. MM >= M. +* +* M (output) INTEGER +* The number of columns in the arrays VL and/or VR required to +* store the eigenvectors (= the number of .TRUE. elements in +* SELECT). +* +* WORK (workspace) COMPLEX*16 array, dimension (N*N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* IFAILL (output) INTEGER array, dimension (MM) +* If SIDE = 'L' or 'B', IFAILL(i) = j > 0 if the left +* eigenvector in the i-th column of VL (corresponding to the +* eigenvalue w(j)) failed to converge; IFAILL(i) = 0 if the +* eigenvector converged satisfactorily. +* If SIDE = 'R', IFAILL is not referenced. +* +* IFAILR (output) INTEGER array, dimension (MM) +* If SIDE = 'R' or 'B', IFAILR(i) = j > 0 if the right +* eigenvector in the i-th column of VR (corresponding to the +* eigenvalue w(j)) failed to converge; IFAILR(i) = 0 if the +* eigenvector converged satisfactorily. +* If SIDE = 'L', IFAILR is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, i is the number of eigenvectors which +* failed to converge; see IFAILL and IFAILR for further +* details. +* +* Further Details +* =============== +* +* Each eigenvector is normalized so that the element of largest +* magnitude has magnitude 1; here the magnitude of a complex number +* (x,y) is taken to be |x|+|y|. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) + DOUBLE PRECISION RZERO + PARAMETER ( RZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL BOTHV, FROMQR, LEFTV, NOINIT, RIGHTV + INTEGER I, IINFO, K, KL, KLN, KR, KS, LDWORK + DOUBLE PRECISION EPS3, HNORM, SMLNUM, ULP, UNFL + COMPLEX*16 CDUM, WK +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANHS + EXTERNAL LSAME, DLAMCH, ZLANHS +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLAEIN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters. +* + BOTHV = LSAME( SIDE, 'B' ) + RIGHTV = LSAME( SIDE, 'R' ) .OR. BOTHV + LEFTV = LSAME( SIDE, 'L' ) .OR. BOTHV +* + FROMQR = LSAME( EIGSRC, 'Q' ) +* + NOINIT = LSAME( INITV, 'N' ) +* +* Set M to the number of columns required to store the selected +* eigenvectors. +* + M = 0 + DO 10 K = 1, N + IF( SELECT( K ) ) + $ M = M + 1 + 10 CONTINUE +* + INFO = 0 + IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -1 + ELSE IF( .NOT.FROMQR .AND. .NOT.LSAME( EIGSRC, 'N' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOINIT .AND. .NOT.LSAME( INITV, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDH.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVL.LT.1 .OR. ( LEFTV .AND. LDVL.LT.N ) ) THEN + INFO = -10 + ELSE IF( LDVR.LT.1 .OR. ( RIGHTV .AND. LDVR.LT.N ) ) THEN + INFO = -12 + ELSE IF( MM.LT.M ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHSEIN', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* +* Set machine-dependent constants. +* + UNFL = DLAMCH( 'Safe minimum' ) + ULP = DLAMCH( 'Precision' ) + SMLNUM = UNFL*( N / ULP ) +* + LDWORK = N +* + KL = 1 + KLN = 0 + IF( FROMQR ) THEN + KR = 0 + ELSE + KR = N + END IF + KS = 1 +* + DO 100 K = 1, N + IF( SELECT( K ) ) THEN +* +* Compute eigenvector(s) corresponding to W(K). +* + IF( FROMQR ) THEN +* +* If affiliation of eigenvalues is known, check whether +* the matrix splits. +* +* Determine KL and KR such that 1 <= KL <= K <= KR <= N +* and H(KL,KL-1) and H(KR+1,KR) are zero (or KL = 1 or +* KR = N). +* +* Then inverse iteration can be performed with the +* submatrix H(KL:N,KL:N) for a left eigenvector, and with +* the submatrix H(1:KR,1:KR) for a right eigenvector. +* + DO 20 I = K, KL + 1, -1 + IF( H( I, I-1 ).EQ.ZERO ) + $ GO TO 30 + 20 CONTINUE + 30 CONTINUE + KL = I + IF( K.GT.KR ) THEN + DO 40 I = K, N - 1 + IF( H( I+1, I ).EQ.ZERO ) + $ GO TO 50 + 40 CONTINUE + 50 CONTINUE + KR = I + END IF + END IF +* + IF( KL.NE.KLN ) THEN + KLN = KL +* +* Compute infinity-norm of submatrix H(KL:KR,KL:KR) if it +* has not ben computed before. +* + HNORM = ZLANHS( 'I', KR-KL+1, H( KL, KL ), LDH, RWORK ) + IF( HNORM.GT.RZERO ) THEN + EPS3 = HNORM*ULP + ELSE + EPS3 = SMLNUM + END IF + END IF +* +* Perturb eigenvalue if it is close to any previous +* selected eigenvalues affiliated to the submatrix +* H(KL:KR,KL:KR). Close roots are modified by EPS3. +* + WK = W( K ) + 60 CONTINUE + DO 70 I = K - 1, KL, -1 + IF( SELECT( I ) .AND. CABS1( W( I )-WK ).LT.EPS3 ) THEN + WK = WK + EPS3 + GO TO 60 + END IF + 70 CONTINUE + W( K ) = WK +* + IF( LEFTV ) THEN +* +* Compute left eigenvector. +* + CALL ZLAEIN( .FALSE., NOINIT, N-KL+1, H( KL, KL ), LDH, + $ WK, VL( KL, KS ), WORK, LDWORK, RWORK, EPS3, + $ SMLNUM, IINFO ) + IF( IINFO.GT.0 ) THEN + INFO = INFO + 1 + IFAILL( KS ) = K + ELSE + IFAILL( KS ) = 0 + END IF + DO 80 I = 1, KL - 1 + VL( I, KS ) = ZERO + 80 CONTINUE + END IF + IF( RIGHTV ) THEN +* +* Compute right eigenvector. +* + CALL ZLAEIN( .TRUE., NOINIT, KR, H, LDH, WK, VR( 1, KS ), + $ WORK, LDWORK, RWORK, EPS3, SMLNUM, IINFO ) + IF( IINFO.GT.0 ) THEN + INFO = INFO + 1 + IFAILR( KS ) = K + ELSE + IFAILR( KS ) = 0 + END IF + DO 90 I = KR + 1, N + VR( I, KS ) = ZERO + 90 CONTINUE + END IF + KS = KS + 1 + END IF + 100 CONTINUE +* + RETURN +* +* End of ZHSEIN +* + END diff --git a/costa/native/external/lapack/zhseqr.f b/costa/native/external/lapack/zhseqr.f new file mode 100644 index 000000000..17274a8f6 --- /dev/null +++ b/costa/native/external/lapack/zhseqr.f @@ -0,0 +1,474 @@ + SUBROUTINE ZHSEQR( JOB, COMPZ, N, ILO, IHI, H, LDH, W, Z, LDZ, + $ WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER COMPZ, JOB + INTEGER IHI, ILO, INFO, LDH, LDZ, LWORK, N +* .. +* .. Array Arguments .. + COMPLEX*16 H( LDH, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* ZHSEQR computes the eigenvalues of a complex upper Hessenberg +* matrix H, and, optionally, the matrices T and Z from the Schur +* decomposition H = Z T Z**H, where T is an upper triangular matrix +* (the Schur form), and Z is the unitary matrix of Schur vectors. +* +* Optionally Z may be postmultiplied into an input unitary matrix Q, +* so that this routine can give the Schur factorization of a matrix A +* which has been reduced to the Hessenberg form H by the unitary +* matrix Q: A = Q*H*Q**H = (QZ)*T*(QZ)**H. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* = 'E': compute eigenvalues only; +* = 'S': compute eigenvalues and the Schur form T. +* +* COMPZ (input) CHARACTER*1 +* = 'N': no Schur vectors are computed; +* = 'I': Z is initialized to the unit matrix and the matrix Z +* of Schur vectors of H is returned; +* = 'V': Z must contain an unitary matrix Q on entry, and +* the product Q*Z is returned. +* +* N (input) INTEGER +* The order of the matrix H. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* It is assumed that H is already upper triangular in rows +* and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally +* set by a previous call to ZGEBAL, and then passed to CGEHRD +* when the matrix output by ZGEBAL is reduced to Hessenberg +* form. Otherwise ILO and IHI should be set to 1 and N +* respectively. +* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +* +* H (input/output) COMPLEX*16 array, dimension (LDH,N) +* On entry, the upper Hessenberg matrix H. +* On exit, if JOB = 'S', H contains the upper triangular matrix +* T from the Schur decomposition (the Schur form). If +* JOB = 'E', the contents of H are unspecified on exit. +* +* LDH (input) INTEGER +* The leading dimension of the array H. LDH >= max(1,N). +* +* W (output) COMPLEX*16 array, dimension (N) +* The computed eigenvalues. If JOB = 'S', the eigenvalues are +* stored in the same order as on the diagonal of the Schur form +* returned in H, with W(i) = H(i,i). +* +* Z (input/output) COMPLEX*16 array, dimension (LDZ,N) +* If COMPZ = 'N': Z is not referenced. +* If COMPZ = 'I': on entry, Z need not be set, and on exit, Z +* contains the unitary matrix Z of the Schur vectors of H. +* If COMPZ = 'V': on entry Z must contain an N-by-N matrix Q, +* which is assumed to be equal to the unit matrix except for +* the submatrix Z(ILO:IHI,ILO:IHI); on exit Z contains Q*Z. +* Normally Q is the unitary matrix generated by ZUNGHR after +* the call to ZGEHRD which formed the Hessenberg matrix H. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. +* LDZ >= max(1,N) if COMPZ = 'I' or 'V'; LDZ >= 1 otherwise. +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N). +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, ZHSEQR failed to compute all the +* eigenvalues in a total of 30*(IHI-ILO+1) iterations; +* elements 1:ilo-1 and i+1:n of W contain those +* eigenvalues which have been successfully computed. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), + $ ONE = ( 1.0D+0, 0.0D+0 ) ) + DOUBLE PRECISION RZERO, RONE, CONST + PARAMETER ( RZERO = 0.0D+0, RONE = 1.0D+0, + $ CONST = 1.5D+0 ) + INTEGER NSMAX, LDS + PARAMETER ( NSMAX = 15, LDS = NSMAX ) +* .. +* .. Local Scalars .. + LOGICAL INITZ, LQUERY, WANTT, WANTZ + INTEGER I, I1, I2, IERR, II, ITEMP, ITN, ITS, J, K, L, + $ MAXB, NH, NR, NS, NV + DOUBLE PRECISION OVFL, RTEMP, SMLNUM, TST1, ULP, UNFL + COMPLEX*16 CDUM, TAU, TEMP +* .. +* .. Local Arrays .. + DOUBLE PRECISION RWORK( 1 ) + COMPLEX*16 S( LDS, NSMAX ), V( NSMAX+1 ), VV( NSMAX+1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV, IZAMAX + DOUBLE PRECISION DLAMCH, DLAPY2, ZLANHS + EXTERNAL LSAME, ILAENV, IZAMAX, DLAMCH, DLAPY2, ZLANHS +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZCOPY, ZDSCAL, ZGEMV, ZLACPY, ZLAHQR, + $ ZLARFG, ZLARFX, ZLASET, ZSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCONJG, DIMAG, MAX, MIN +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + WANTT = LSAME( JOB, 'S' ) + INITZ = LSAME( COMPZ, 'I' ) + WANTZ = INITZ .OR. LSAME( COMPZ, 'V' ) +* + INFO = 0 + WORK( 1 ) = MAX( 1, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( JOB, 'E' ) .AND. .NOT.WANTT ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( COMPZ, 'N' ) .AND. .NOT.WANTZ ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -5 + ELSE IF( LDH.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDZ.LT.1 .OR. WANTZ .AND. LDZ.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHSEQR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Initialize Z, if necessary +* + IF( INITZ ) + $ CALL ZLASET( 'Full', N, N, ZERO, ONE, Z, LDZ ) +* +* Store the eigenvalues isolated by ZGEBAL. +* + DO 10 I = 1, ILO - 1 + W( I ) = H( I, I ) + 10 CONTINUE + DO 20 I = IHI + 1, N + W( I ) = H( I, I ) + 20 CONTINUE +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN + IF( ILO.EQ.IHI ) THEN + W( ILO ) = H( ILO, ILO ) + RETURN + END IF +* +* Set rows and columns ILO to IHI to zero below the first +* subdiagonal. +* + DO 40 J = ILO, IHI - 2 + DO 30 I = J + 2, N + H( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + NH = IHI - ILO + 1 +* +* I1 and I2 are the indices of the first row and last column of H +* to which transformations must be applied. If eigenvalues only are +* being computed, I1 and I2 are re-set inside the main loop. +* + IF( WANTT ) THEN + I1 = 1 + I2 = N + ELSE + I1 = ILO + I2 = IHI + END IF +* +* Ensure that the subdiagonal elements are real. +* + DO 50 I = ILO + 1, IHI + TEMP = H( I, I-1 ) + IF( DIMAG( TEMP ).NE.RZERO ) THEN + RTEMP = DLAPY2( DBLE( TEMP ), DIMAG( TEMP ) ) + H( I, I-1 ) = RTEMP + TEMP = TEMP / RTEMP + IF( I2.GT.I ) + $ CALL ZSCAL( I2-I, DCONJG( TEMP ), H( I, I+1 ), LDH ) + CALL ZSCAL( I-I1, TEMP, H( I1, I ), 1 ) + IF( I.LT.IHI ) + $ H( I+1, I ) = TEMP*H( I+1, I ) + IF( WANTZ ) + $ CALL ZSCAL( NH, TEMP, Z( ILO, I ), 1 ) + END IF + 50 CONTINUE +* +* Determine the order of the multi-shift QR algorithm to be used. +* + NS = ILAENV( 4, 'ZHSEQR', JOB // COMPZ, N, ILO, IHI, -1 ) + MAXB = ILAENV( 8, 'ZHSEQR', JOB // COMPZ, N, ILO, IHI, -1 ) + IF( NS.LE.1 .OR. NS.GT.NH .OR. MAXB.GE.NH ) THEN +* +* Use the standard double-shift algorithm +* + CALL ZLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILO, IHI, Z, + $ LDZ, INFO ) + RETURN + END IF + MAXB = MAX( 2, MAXB ) + NS = MIN( NS, MAXB, NSMAX ) +* +* Now 1 < NS <= MAXB < NH. +* +* Set machine-dependent constants for the stopping criterion. +* If norm(H) <= sqrt(OVFL), overflow should not occur. +* + UNFL = DLAMCH( 'Safe minimum' ) + OVFL = RONE / UNFL + CALL DLABAD( UNFL, OVFL ) + ULP = DLAMCH( 'Precision' ) + SMLNUM = UNFL*( NH / ULP ) +* +* ITN is the total number of multiple-shift QR iterations allowed. +* + ITN = 30*NH +* +* The main loop begins here. I is the loop index and decreases from +* IHI to ILO in steps of at most MAXB. Each iteration of the loop +* works with the active submatrix in rows and columns L to I. +* Eigenvalues I+1 to IHI have already converged. Either L = ILO, or +* H(L,L-1) is negligible so that the matrix splits. +* + I = IHI + 60 CONTINUE + IF( I.LT.ILO ) + $ GO TO 180 +* +* Perform multiple-shift QR iterations on rows and columns ILO to I +* until a submatrix of order at most MAXB splits off at the bottom +* because a subdiagonal element has become negligible. +* + L = ILO + DO 160 ITS = 0, ITN +* +* Look for a single small subdiagonal element. +* + DO 70 K = I, L + 1, -1 + TST1 = CABS1( H( K-1, K-1 ) ) + CABS1( H( K, K ) ) + IF( TST1.EQ.RZERO ) + $ TST1 = ZLANHS( '1', I-L+1, H( L, L ), LDH, RWORK ) + IF( ABS( DBLE( H( K, K-1 ) ) ).LE.MAX( ULP*TST1, SMLNUM ) ) + $ GO TO 80 + 70 CONTINUE + 80 CONTINUE + L = K + IF( L.GT.ILO ) THEN +* +* H(L,L-1) is negligible. +* + H( L, L-1 ) = ZERO + END IF +* +* Exit from loop if a submatrix of order <= MAXB has split off. +* + IF( L.GE.I-MAXB+1 ) + $ GO TO 170 +* +* Now the active submatrix is in rows and columns L to I. If +* eigenvalues only are being computed, only the active submatrix +* need be transformed. +* + IF( .NOT.WANTT ) THEN + I1 = L + I2 = I + END IF +* + IF( ITS.EQ.20 .OR. ITS.EQ.30 ) THEN +* +* Exceptional shifts. +* + DO 90 II = I - NS + 1, I + W( II ) = CONST*( ABS( DBLE( H( II, II-1 ) ) )+ + $ ABS( DBLE( H( II, II ) ) ) ) + 90 CONTINUE + ELSE +* +* Use eigenvalues of trailing submatrix of order NS as shifts. +* + CALL ZLACPY( 'Full', NS, NS, H( I-NS+1, I-NS+1 ), LDH, S, + $ LDS ) + CALL ZLAHQR( .FALSE., .FALSE., NS, 1, NS, S, LDS, + $ W( I-NS+1 ), 1, NS, Z, LDZ, IERR ) + IF( IERR.GT.0 ) THEN +* +* If ZLAHQR failed to compute all NS eigenvalues, use the +* unconverged diagonal elements as the remaining shifts. +* + DO 100 II = 1, IERR + W( I-NS+II ) = S( II, II ) + 100 CONTINUE + END IF + END IF +* +* Form the first column of (G-w(1)) (G-w(2)) . . . (G-w(ns)) +* where G is the Hessenberg submatrix H(L:I,L:I) and w is +* the vector of shifts (stored in W). The result is +* stored in the local array V. +* + V( 1 ) = ONE + DO 110 II = 2, NS + 1 + V( II ) = ZERO + 110 CONTINUE + NV = 1 + DO 130 J = I - NS + 1, I + CALL ZCOPY( NV+1, V, 1, VV, 1 ) + CALL ZGEMV( 'No transpose', NV+1, NV, ONE, H( L, L ), LDH, + $ VV, 1, -W( J ), V, 1 ) + NV = NV + 1 +* +* Scale V(1:NV) so that max(abs(V(i))) = 1. If V is zero, +* reset it to the unit vector. +* + ITEMP = IZAMAX( NV, V, 1 ) + RTEMP = CABS1( V( ITEMP ) ) + IF( RTEMP.EQ.RZERO ) THEN + V( 1 ) = ONE + DO 120 II = 2, NV + V( II ) = ZERO + 120 CONTINUE + ELSE + RTEMP = MAX( RTEMP, SMLNUM ) + CALL ZDSCAL( NV, RONE / RTEMP, V, 1 ) + END IF + 130 CONTINUE +* +* Multiple-shift QR step +* + DO 150 K = L, I - 1 +* +* The first iteration of this loop determines a reflection G +* from the vector V and applies it from left and right to H, +* thus creating a nonzero bulge below the subdiagonal. +* +* Each subsequent iteration determines a reflection G to +* restore the Hessenberg form in the (K-1)th column, and thus +* chases the bulge one step toward the bottom of the active +* submatrix. NR is the order of G. +* + NR = MIN( NS+1, I-K+1 ) + IF( K.GT.L ) + $ CALL ZCOPY( NR, H( K, K-1 ), 1, V, 1 ) + CALL ZLARFG( NR, V( 1 ), V( 2 ), 1, TAU ) + IF( K.GT.L ) THEN + H( K, K-1 ) = V( 1 ) + DO 140 II = K + 1, I + H( II, K-1 ) = ZERO + 140 CONTINUE + END IF + V( 1 ) = ONE +* +* Apply G' from the left to transform the rows of the matrix +* in columns K to I2. +* + CALL ZLARFX( 'Left', NR, I2-K+1, V, DCONJG( TAU ), + $ H( K, K ), LDH, WORK ) +* +* Apply G from the right to transform the columns of the +* matrix in rows I1 to min(K+NR,I). +* + CALL ZLARFX( 'Right', MIN( K+NR, I )-I1+1, NR, V, TAU, + $ H( I1, K ), LDH, WORK ) +* + IF( WANTZ ) THEN +* +* Accumulate transformations in the matrix Z +* + CALL ZLARFX( 'Right', NH, NR, V, TAU, Z( ILO, K ), LDZ, + $ WORK ) + END IF + 150 CONTINUE +* +* Ensure that H(I,I-1) is real. +* + TEMP = H( I, I-1 ) + IF( DIMAG( TEMP ).NE.RZERO ) THEN + RTEMP = DLAPY2( DBLE( TEMP ), DIMAG( TEMP ) ) + H( I, I-1 ) = RTEMP + TEMP = TEMP / RTEMP + IF( I2.GT.I ) + $ CALL ZSCAL( I2-I, DCONJG( TEMP ), H( I, I+1 ), LDH ) + CALL ZSCAL( I-I1, TEMP, H( I1, I ), 1 ) + IF( WANTZ ) THEN + CALL ZSCAL( NH, TEMP, Z( ILO, I ), 1 ) + END IF + END IF +* + 160 CONTINUE +* +* Failure to converge in remaining number of iterations +* + INFO = I + RETURN +* + 170 CONTINUE +* +* A submatrix of order <= MAXB in rows and columns L to I has split +* off. Use the double-shift QR algorithm to handle it. +* + CALL ZLAHQR( WANTT, WANTZ, N, L, I, H, LDH, W, ILO, IHI, Z, LDZ, + $ INFO ) + IF( INFO.GT.0 ) + $ RETURN +* +* Decrement number of remaining iterations, and return to start of +* the main loop with a new value of I. +* + ITN = ITN - ITS + I = L - 1 + GO TO 60 +* + 180 CONTINUE + WORK( 1 ) = MAX( 1, N ) + RETURN +* +* End of ZHSEQR +* + END diff --git a/costa/native/external/lapack/zlabrd.f b/costa/native/external/lapack/zlabrd.f new file mode 100644 index 000000000..45ee265ec --- /dev/null +++ b/costa/native/external/lapack/zlabrd.f @@ -0,0 +1,329 @@ + SUBROUTINE ZLABRD( M, N, NB, A, LDA, D, E, TAUQ, TAUP, X, LDX, Y, + $ LDY ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER LDA, LDX, LDY, M, N, NB +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ) + COMPLEX*16 A( LDA, * ), TAUP( * ), TAUQ( * ), X( LDX, * ), + $ Y( LDY, * ) +* .. +* +* Purpose +* ======= +* +* ZLABRD reduces the first NB rows and columns of a complex general +* m by n matrix A to upper or lower real bidiagonal form by a unitary +* transformation Q' * A * P, and returns the matrices X and Y which +* are needed to apply the transformation to the unreduced part of A. +* +* If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower +* bidiagonal form. +* +* This is an auxiliary routine called by ZGEBRD +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows in the matrix A. +* +* N (input) INTEGER +* The number of columns in the matrix A. +* +* NB (input) INTEGER +* The number of leading rows and columns of A to be reduced. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the m by n general matrix to be reduced. +* On exit, the first NB rows and columns of the matrix are +* overwritten; the rest of the array is unchanged. +* If m >= n, elements on and below the diagonal in the first NB +* columns, with the array TAUQ, represent the unitary +* matrix Q as a product of elementary reflectors; and +* elements above the diagonal in the first NB rows, with the +* array TAUP, represent the unitary matrix P as a product +* of elementary reflectors. +* If m < n, elements below the diagonal in the first NB +* columns, with the array TAUQ, represent the unitary +* matrix Q as a product of elementary reflectors, and +* elements on and above the diagonal in the first NB rows, +* with the array TAUP, represent the unitary matrix P as +* a product of elementary reflectors. +* See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* D (output) DOUBLE PRECISION array, dimension (NB) +* The diagonal elements of the first NB rows and columns of +* the reduced matrix. D(i) = A(i,i). +* +* E (output) DOUBLE PRECISION array, dimension (NB) +* The off-diagonal elements of the first NB rows and columns of +* the reduced matrix. +* +* TAUQ (output) COMPLEX*16 array dimension (NB) +* The scalar factors of the elementary reflectors which +* represent the unitary matrix Q. See Further Details. +* +* TAUP (output) COMPLEX*16 array, dimension (NB) +* The scalar factors of the elementary reflectors which +* represent the unitary matrix P. See Further Details. +* +* X (output) COMPLEX*16 array, dimension (LDX,NB) +* The m-by-nb matrix X required to update the unreduced part +* of A. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,M). +* +* Y (output) COMPLEX*16 array, dimension (LDY,NB) +* The n-by-nb matrix Y required to update the unreduced part +* of A. +* +* LDY (output) INTEGER +* The leading dimension of the array Y. LDY >= max(1,N). +* +* Further Details +* =============== +* +* The matrices Q and P are represented as products of elementary +* reflectors: +* +* Q = H(1) H(2) . . . H(nb) and P = G(1) G(2) . . . G(nb) +* +* Each H(i) and G(i) has the form: +* +* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' +* +* where tauq and taup are complex scalars, and v and u are complex +* vectors. +* +* If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in +* A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in +* A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). +* +* If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in +* A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in +* A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). +* +* The elements of the vectors v and u together form the m-by-nb matrix +* V and the nb-by-n matrix U' which are needed, with X and Y, to apply +* the transformation to the unreduced part of the matrix, using a block +* update of the form: A := A - V*Y' - X*U'. +* +* The contents of A on exit are illustrated by the following examples +* with nb = 2: +* +* m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): +* +* ( 1 1 u1 u1 u1 ) ( 1 u1 u1 u1 u1 u1 ) +* ( v1 1 1 u2 u2 ) ( 1 1 u2 u2 u2 u2 ) +* ( v1 v2 a a a ) ( v1 1 a a a a ) +* ( v1 v2 a a a ) ( v1 v2 a a a a ) +* ( v1 v2 a a a ) ( v1 v2 a a a a ) +* ( v1 v2 a a a ) +* +* where a denotes an element of the original matrix which is unchanged, +* vi denotes an element of the vector defining H(i), and ui an element +* of the vector defining G(i). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), + $ ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I + COMPLEX*16 ALPHA +* .. +* .. External Subroutines .. + EXTERNAL ZGEMV, ZLACGV, ZLARFG, ZSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) + $ RETURN +* + IF( M.GE.N ) THEN +* +* Reduce to upper bidiagonal form +* + DO 10 I = 1, NB +* +* Update A(i:m,i) +* + CALL ZLACGV( I-1, Y( I, 1 ), LDY ) + CALL ZGEMV( 'No transpose', M-I+1, I-1, -ONE, A( I, 1 ), + $ LDA, Y( I, 1 ), LDY, ONE, A( I, I ), 1 ) + CALL ZLACGV( I-1, Y( I, 1 ), LDY ) + CALL ZGEMV( 'No transpose', M-I+1, I-1, -ONE, X( I, 1 ), + $ LDX, A( 1, I ), 1, ONE, A( I, I ), 1 ) +* +* Generate reflection Q(i) to annihilate A(i+1:m,i) +* + ALPHA = A( I, I ) + CALL ZLARFG( M-I+1, ALPHA, A( MIN( I+1, M ), I ), 1, + $ TAUQ( I ) ) + D( I ) = ALPHA + IF( I.LT.N ) THEN + A( I, I ) = ONE +* +* Compute Y(i+1:n,i) +* + CALL ZGEMV( 'Conjugate transpose', M-I+1, N-I, ONE, + $ A( I, I+1 ), LDA, A( I, I ), 1, ZERO, + $ Y( I+1, I ), 1 ) + CALL ZGEMV( 'Conjugate transpose', M-I+1, I-1, ONE, + $ A( I, 1 ), LDA, A( I, I ), 1, ZERO, + $ Y( 1, I ), 1 ) + CALL ZGEMV( 'No transpose', N-I, I-1, -ONE, Y( I+1, 1 ), + $ LDY, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 ) + CALL ZGEMV( 'Conjugate transpose', M-I+1, I-1, ONE, + $ X( I, 1 ), LDX, A( I, I ), 1, ZERO, + $ Y( 1, I ), 1 ) + CALL ZGEMV( 'Conjugate transpose', I-1, N-I, -ONE, + $ A( 1, I+1 ), LDA, Y( 1, I ), 1, ONE, + $ Y( I+1, I ), 1 ) + CALL ZSCAL( N-I, TAUQ( I ), Y( I+1, I ), 1 ) +* +* Update A(i,i+1:n) +* + CALL ZLACGV( N-I, A( I, I+1 ), LDA ) + CALL ZLACGV( I, A( I, 1 ), LDA ) + CALL ZGEMV( 'No transpose', N-I, I, -ONE, Y( I+1, 1 ), + $ LDY, A( I, 1 ), LDA, ONE, A( I, I+1 ), LDA ) + CALL ZLACGV( I, A( I, 1 ), LDA ) + CALL ZLACGV( I-1, X( I, 1 ), LDX ) + CALL ZGEMV( 'Conjugate transpose', I-1, N-I, -ONE, + $ A( 1, I+1 ), LDA, X( I, 1 ), LDX, ONE, + $ A( I, I+1 ), LDA ) + CALL ZLACGV( I-1, X( I, 1 ), LDX ) +* +* Generate reflection P(i) to annihilate A(i,i+2:n) +* + ALPHA = A( I, I+1 ) + CALL ZLARFG( N-I, ALPHA, A( I, MIN( I+2, N ) ), LDA, + $ TAUP( I ) ) + E( I ) = ALPHA + A( I, I+1 ) = ONE +* +* Compute X(i+1:m,i) +* + CALL ZGEMV( 'No transpose', M-I, N-I, ONE, A( I+1, I+1 ), + $ LDA, A( I, I+1 ), LDA, ZERO, X( I+1, I ), 1 ) + CALL ZGEMV( 'Conjugate transpose', N-I, I, ONE, + $ Y( I+1, 1 ), LDY, A( I, I+1 ), LDA, ZERO, + $ X( 1, I ), 1 ) + CALL ZGEMV( 'No transpose', M-I, I, -ONE, A( I+1, 1 ), + $ LDA, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL ZGEMV( 'No transpose', I-1, N-I, ONE, A( 1, I+1 ), + $ LDA, A( I, I+1 ), LDA, ZERO, X( 1, I ), 1 ) + CALL ZGEMV( 'No transpose', M-I, I-1, -ONE, X( I+1, 1 ), + $ LDX, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL ZSCAL( M-I, TAUP( I ), X( I+1, I ), 1 ) + CALL ZLACGV( N-I, A( I, I+1 ), LDA ) + END IF + 10 CONTINUE + ELSE +* +* Reduce to lower bidiagonal form +* + DO 20 I = 1, NB +* +* Update A(i,i:n) +* + CALL ZLACGV( N-I+1, A( I, I ), LDA ) + CALL ZLACGV( I-1, A( I, 1 ), LDA ) + CALL ZGEMV( 'No transpose', N-I+1, I-1, -ONE, Y( I, 1 ), + $ LDY, A( I, 1 ), LDA, ONE, A( I, I ), LDA ) + CALL ZLACGV( I-1, A( I, 1 ), LDA ) + CALL ZLACGV( I-1, X( I, 1 ), LDX ) + CALL ZGEMV( 'Conjugate transpose', I-1, N-I+1, -ONE, + $ A( 1, I ), LDA, X( I, 1 ), LDX, ONE, A( I, I ), + $ LDA ) + CALL ZLACGV( I-1, X( I, 1 ), LDX ) +* +* Generate reflection P(i) to annihilate A(i,i+1:n) +* + ALPHA = A( I, I ) + CALL ZLARFG( N-I+1, ALPHA, A( I, MIN( I+1, N ) ), LDA, + $ TAUP( I ) ) + D( I ) = ALPHA + IF( I.LT.M ) THEN + A( I, I ) = ONE +* +* Compute X(i+1:m,i) +* + CALL ZGEMV( 'No transpose', M-I, N-I+1, ONE, A( I+1, I ), + $ LDA, A( I, I ), LDA, ZERO, X( I+1, I ), 1 ) + CALL ZGEMV( 'Conjugate transpose', N-I+1, I-1, ONE, + $ Y( I, 1 ), LDY, A( I, I ), LDA, ZERO, + $ X( 1, I ), 1 ) + CALL ZGEMV( 'No transpose', M-I, I-1, -ONE, A( I+1, 1 ), + $ LDA, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL ZGEMV( 'No transpose', I-1, N-I+1, ONE, A( 1, I ), + $ LDA, A( I, I ), LDA, ZERO, X( 1, I ), 1 ) + CALL ZGEMV( 'No transpose', M-I, I-1, -ONE, X( I+1, 1 ), + $ LDX, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL ZSCAL( M-I, TAUP( I ), X( I+1, I ), 1 ) + CALL ZLACGV( N-I+1, A( I, I ), LDA ) +* +* Update A(i+1:m,i) +* + CALL ZLACGV( I-1, Y( I, 1 ), LDY ) + CALL ZGEMV( 'No transpose', M-I, I-1, -ONE, A( I+1, 1 ), + $ LDA, Y( I, 1 ), LDY, ONE, A( I+1, I ), 1 ) + CALL ZLACGV( I-1, Y( I, 1 ), LDY ) + CALL ZGEMV( 'No transpose', M-I, I, -ONE, X( I+1, 1 ), + $ LDX, A( 1, I ), 1, ONE, A( I+1, I ), 1 ) +* +* Generate reflection Q(i) to annihilate A(i+2:m,i) +* + ALPHA = A( I+1, I ) + CALL ZLARFG( M-I, ALPHA, A( MIN( I+2, M ), I ), 1, + $ TAUQ( I ) ) + E( I ) = ALPHA + A( I+1, I ) = ONE +* +* Compute Y(i+1:n,i) +* + CALL ZGEMV( 'Conjugate transpose', M-I, N-I, ONE, + $ A( I+1, I+1 ), LDA, A( I+1, I ), 1, ZERO, + $ Y( I+1, I ), 1 ) + CALL ZGEMV( 'Conjugate transpose', M-I, I-1, ONE, + $ A( I+1, 1 ), LDA, A( I+1, I ), 1, ZERO, + $ Y( 1, I ), 1 ) + CALL ZGEMV( 'No transpose', N-I, I-1, -ONE, Y( I+1, 1 ), + $ LDY, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 ) + CALL ZGEMV( 'Conjugate transpose', M-I, I, ONE, + $ X( I+1, 1 ), LDX, A( I+1, I ), 1, ZERO, + $ Y( 1, I ), 1 ) + CALL ZGEMV( 'Conjugate transpose', I, N-I, -ONE, + $ A( 1, I+1 ), LDA, Y( 1, I ), 1, ONE, + $ Y( I+1, I ), 1 ) + CALL ZSCAL( N-I, TAUQ( I ), Y( I+1, I ), 1 ) + ELSE + CALL ZLACGV( N-I+1, A( I, I ), LDA ) + END IF + 20 CONTINUE + END IF + RETURN +* +* End of ZLABRD +* + END diff --git a/costa/native/external/lapack/zlacgv.f b/costa/native/external/lapack/zlacgv.f new file mode 100644 index 000000000..032f8c69c --- /dev/null +++ b/costa/native/external/lapack/zlacgv.f @@ -0,0 +1,61 @@ + SUBROUTINE ZLACGV( N, X, INCX ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + INTEGER INCX, N +* .. +* .. Array Arguments .. + COMPLEX*16 X( * ) +* .. +* +* Purpose +* ======= +* +* ZLACGV conjugates a complex vector of length N. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The length of the vector X. N >= 0. +* +* X (input/output) COMPLEX*16 array, dimension +* (1+(N-1)*abs(INCX)) +* On entry, the vector of length N to be conjugated. +* On exit, X is overwritten with conjg(X). +* +* INCX (input) INTEGER +* The spacing between successive elements of X. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IOFF +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG +* .. +* .. Executable Statements .. +* + IF( INCX.EQ.1 ) THEN + DO 10 I = 1, N + X( I ) = DCONJG( X( I ) ) + 10 CONTINUE + ELSE + IOFF = 1 + IF( INCX.LT.0 ) + $ IOFF = 1 - ( N-1 )*INCX + DO 20 I = 1, N + X( IOFF ) = DCONJG( X( IOFF ) ) + IOFF = IOFF + INCX + 20 CONTINUE + END IF + RETURN +* +* End of ZLACGV +* + END diff --git a/costa/native/external/lapack/zlacon.f b/costa/native/external/lapack/zlacon.f new file mode 100644 index 000000000..32c3c6788 --- /dev/null +++ b/costa/native/external/lapack/zlacon.f @@ -0,0 +1,211 @@ + SUBROUTINE ZLACON( N, V, X, EST, KASE ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER KASE, N + DOUBLE PRECISION EST +* .. +* .. Array Arguments .. + COMPLEX*16 V( N ), X( N ) +* .. +* +* Purpose +* ======= +* +* ZLACON estimates the 1-norm of a square, complex matrix A. +* Reverse communication is used for evaluating matrix-vector products. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix. N >= 1. +* +* V (workspace) COMPLEX*16 array, dimension (N) +* On the final return, V = A*W, where EST = norm(V)/norm(W) +* (W is not returned). +* +* X (input/output) COMPLEX*16 array, dimension (N) +* On an intermediate return, X should be overwritten by +* A * X, if KASE=1, +* A' * X, if KASE=2, +* where A' is the conjugate transpose of A, and ZLACON must be +* re-called with all the other parameters unchanged. +* +* EST (output) DOUBLE PRECISION +* An estimate (a lower bound) for norm(A). +* +* KASE (input/output) INTEGER +* On the initial call to ZLACON, KASE should be 0. +* On an intermediate return, KASE will be 1 or 2, indicating +* whether X should be overwritten by A * X or A' * X. +* On the final return from ZLACON, KASE will again be 0. +* +* Further Details +* ======= ======= +* +* Contributed by Nick Higham, University of Manchester. +* Originally named CONEST, dated March 16, 1988. +* +* Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of +* a real or complex matrix, with applications to condition estimation", +* ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988. +* +* Last modified: April, 1999 +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ONE, TWO + PARAMETER ( ONE = 1.0D0, TWO = 2.0D0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ), + $ CONE = ( 1.0D0, 0.0D0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, ITER, J, JLAST, JUMP + DOUBLE PRECISION ABSXI, ALTSGN, ESTOLD, SAFMIN, TEMP +* .. +* .. External Functions .. + INTEGER IZMAX1 + DOUBLE PRECISION DLAMCH, DZSUM1 + EXTERNAL IZMAX1, DLAMCH, DZSUM1 +* .. +* .. External Subroutines .. + EXTERNAL ZCOPY +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DIMAG +* .. +* .. Save statement .. + SAVE +* .. +* .. Executable Statements .. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + IF( KASE.EQ.0 ) THEN + DO 10 I = 1, N + X( I ) = DCMPLX( ONE / DBLE( N ) ) + 10 CONTINUE + KASE = 1 + JUMP = 1 + RETURN + END IF +* + GO TO ( 20, 40, 70, 90, 120 )JUMP +* +* ................ ENTRY (JUMP = 1) +* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY A*X. +* + 20 CONTINUE + IF( N.EQ.1 ) THEN + V( 1 ) = X( 1 ) + EST = ABS( V( 1 ) ) +* ... QUIT + GO TO 130 + END IF + EST = DZSUM1( N, X, 1 ) +* + DO 30 I = 1, N + ABSXI = ABS( X( I ) ) + IF( ABSXI.GT.SAFMIN ) THEN + X( I ) = DCMPLX( DBLE( X( I ) ) / ABSXI, + $ DIMAG( X( I ) ) / ABSXI ) + ELSE + X( I ) = CONE + END IF + 30 CONTINUE + KASE = 2 + JUMP = 2 + RETURN +* +* ................ ENTRY (JUMP = 2) +* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY ZTRANS(A)*X. +* + 40 CONTINUE + J = IZMAX1( N, X, 1 ) + ITER = 2 +* +* MAIN LOOP - ITERATIONS 2,3,...,ITMAX. +* + 50 CONTINUE + DO 60 I = 1, N + X( I ) = CZERO + 60 CONTINUE + X( J ) = CONE + KASE = 1 + JUMP = 3 + RETURN +* +* ................ ENTRY (JUMP = 3) +* X HAS BEEN OVERWRITTEN BY A*X. +* + 70 CONTINUE + CALL ZCOPY( N, X, 1, V, 1 ) + ESTOLD = EST + EST = DZSUM1( N, V, 1 ) +* +* TEST FOR CYCLING. + IF( EST.LE.ESTOLD ) + $ GO TO 100 +* + DO 80 I = 1, N + ABSXI = ABS( X( I ) ) + IF( ABSXI.GT.SAFMIN ) THEN + X( I ) = DCMPLX( DBLE( X( I ) ) / ABSXI, + $ DIMAG( X( I ) ) / ABSXI ) + ELSE + X( I ) = CONE + END IF + 80 CONTINUE + KASE = 2 + JUMP = 4 + RETURN +* +* ................ ENTRY (JUMP = 4) +* X HAS BEEN OVERWRITTEN BY ZTRANS(A)*X. +* + 90 CONTINUE + JLAST = J + J = IZMAX1( N, X, 1 ) + IF( ( ABS( X( JLAST ) ).NE.ABS( X( J ) ) ) .AND. + $ ( ITER.LT.ITMAX ) ) THEN + ITER = ITER + 1 + GO TO 50 + END IF +* +* ITERATION COMPLETE. FINAL STAGE. +* + 100 CONTINUE + ALTSGN = ONE + DO 110 I = 1, N + X( I ) = DCMPLX( ALTSGN*( ONE+DBLE( I-1 ) / DBLE( N-1 ) ) ) + ALTSGN = -ALTSGN + 110 CONTINUE + KASE = 1 + JUMP = 5 + RETURN +* +* ................ ENTRY (JUMP = 5) +* X HAS BEEN OVERWRITTEN BY A*X. +* + 120 CONTINUE + TEMP = TWO*( DZSUM1( N, X, 1 ) / DBLE( 3*N ) ) + IF( TEMP.GT.EST ) THEN + CALL ZCOPY( N, X, 1, V, 1 ) + EST = TEMP + END IF +* + 130 CONTINUE + KASE = 0 + RETURN +* +* End of ZLACON +* + END diff --git a/costa/native/external/lapack/zlacp2.f b/costa/native/external/lapack/zlacp2.f new file mode 100644 index 000000000..a01057fb1 --- /dev/null +++ b/costa/native/external/lapack/zlacp2.f @@ -0,0 +1,92 @@ + SUBROUTINE ZLACP2( UPLO, M, N, A, LDA, B, LDB ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER LDA, LDB, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ) + COMPLEX*16 B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* ZLACP2 copies all or part of a real two-dimensional matrix A to a +* complex matrix B. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies the part of the matrix A to be copied to B. +* = 'U': Upper triangular part +* = 'L': Lower triangular part +* Otherwise: All of the matrix A +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input) DOUBLE PRECISION array, dimension (LDA,N) +* The m by n matrix A. If UPLO = 'U', only the upper trapezium +* is accessed; if UPLO = 'L', only the lower trapezium is +* accessed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (output) COMPLEX*16 array, dimension (LDB,N) +* On exit, B = A in the locations specified by UPLO. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,M). +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, MIN( J, M ) + B( I, J ) = A( I, J ) + 10 CONTINUE + 20 CONTINUE +* + ELSE IF( LSAME( UPLO, 'L' ) ) THEN + DO 40 J = 1, N + DO 30 I = J, M + B( I, J ) = A( I, J ) + 30 CONTINUE + 40 CONTINUE +* + ELSE + DO 60 J = 1, N + DO 50 I = 1, M + B( I, J ) = A( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + RETURN +* +* End of ZLACP2 +* + END diff --git a/costa/native/external/lapack/zlacpy.f b/costa/native/external/lapack/zlacpy.f new file mode 100644 index 000000000..b0d264ce2 --- /dev/null +++ b/costa/native/external/lapack/zlacpy.f @@ -0,0 +1,91 @@ + SUBROUTINE ZLACPY( UPLO, M, N, A, LDA, B, LDB ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER LDA, LDB, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* ZLACPY copies all or part of a two-dimensional matrix A to another +* matrix B. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies the part of the matrix A to be copied to B. +* = 'U': Upper triangular part +* = 'L': Lower triangular part +* Otherwise: All of the matrix A +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input) COMPLEX*16 array, dimension (LDA,N) +* The m by n matrix A. If UPLO = 'U', only the upper trapezium +* is accessed; if UPLO = 'L', only the lower trapezium is +* accessed. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (output) COMPLEX*16 array, dimension (LDB,N) +* On exit, B = A in the locations specified by UPLO. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,M). +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, MIN( J, M ) + B( I, J ) = A( I, J ) + 10 CONTINUE + 20 CONTINUE +* + ELSE IF( LSAME( UPLO, 'L' ) ) THEN + DO 40 J = 1, N + DO 30 I = J, M + B( I, J ) = A( I, J ) + 30 CONTINUE + 40 CONTINUE +* + ELSE + DO 60 J = 1, N + DO 50 I = 1, M + B( I, J ) = A( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + RETURN +* +* End of ZLACPY +* + END diff --git a/costa/native/external/lapack/zlacrm.f b/costa/native/external/lapack/zlacrm.f new file mode 100644 index 000000000..951916541 --- /dev/null +++ b/costa/native/external/lapack/zlacrm.f @@ -0,0 +1,111 @@ + SUBROUTINE ZLACRM( M, N, A, LDA, B, LDB, C, LDC, RWORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER LDA, LDB, LDC, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION B( LDB, * ), RWORK( * ) + COMPLEX*16 A( LDA, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* ZLACRM performs a very simple matrix-matrix multiplication: +* C := A * B, +* where A is M by N and complex; B is N by N and real; +* C is M by N and complex. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A and of the matrix C. +* M >= 0. +* +* N (input) INTEGER +* The number of columns and rows of the matrix B and +* the number of columns of the matrix C. +* N >= 0. +* +* A (input) COMPLEX*16 array, dimension (LDA, N) +* A contains the M by N matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >=max(1,M). +* +* B (input) DOUBLE PRECISION array, dimension (LDB, N) +* B contains the N by N matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >=max(1,N). +* +* C (input) COMPLEX*16 array, dimension (LDC, N) +* C contains the M by N matrix C. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >=max(1,N). +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (2*M*N) +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D0, ZERO = 0.0D0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, L +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCMPLX, DIMAG +* .. +* .. External Subroutines .. + EXTERNAL DGEMM +* .. +* .. Executable Statements .. +* +* Quick return if possible. +* + IF( ( M.EQ.0 ) .OR. ( N.EQ.0 ) ) + $ RETURN +* + DO 20 J = 1, N + DO 10 I = 1, M + RWORK( ( J-1 )*M+I ) = DBLE( A( I, J ) ) + 10 CONTINUE + 20 CONTINUE +* + L = M*N + 1 + CALL DGEMM( 'N', 'N', M, N, N, ONE, RWORK, M, B, LDB, ZERO, + $ RWORK( L ), M ) + DO 40 J = 1, N + DO 30 I = 1, M + C( I, J ) = RWORK( L+( J-1 )*M+I-1 ) + 30 CONTINUE + 40 CONTINUE +* + DO 60 J = 1, N + DO 50 I = 1, M + RWORK( ( J-1 )*M+I ) = DIMAG( A( I, J ) ) + 50 CONTINUE + 60 CONTINUE + CALL DGEMM( 'N', 'N', M, N, N, ONE, RWORK, M, B, LDB, ZERO, + $ RWORK( L ), M ) + DO 80 J = 1, N + DO 70 I = 1, M + C( I, J ) = DCMPLX( DBLE( C( I, J ) ), + $ RWORK( L+( J-1 )*M+I-1 ) ) + 70 CONTINUE + 80 CONTINUE +* + RETURN +* +* End of ZLACRM +* + END diff --git a/costa/native/external/lapack/zlacrt.f b/costa/native/external/lapack/zlacrt.f new file mode 100644 index 000000000..174683600 --- /dev/null +++ b/costa/native/external/lapack/zlacrt.f @@ -0,0 +1,91 @@ + SUBROUTINE ZLACRT( N, CX, INCX, CY, INCY, C, S ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + INTEGER INCX, INCY, N + COMPLEX*16 C, S +* .. +* .. Array Arguments .. + COMPLEX*16 CX( * ), CY( * ) +* .. +* +* Purpose +* ======= +* +* ZLACRT performs the operation +* +* ( c s )( x ) ==> ( x ) +* ( -s c )( y ) ( y ) +* +* where c and s are complex and the vectors x and y are complex. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of elements in the vectors CX and CY. +* +* CX (input/output) COMPLEX*16 array, dimension (N) +* On input, the vector x. +* On output, CX is overwritten with c*x + s*y. +* +* INCX (input) INTEGER +* The increment between successive values of CX. INCX <> 0. +* +* CY (input/output) COMPLEX*16 array, dimension (N) +* On input, the vector y. +* On output, CY is overwritten with -s*x + c*y. +* +* INCY (input) INTEGER +* The increment between successive values of CY. INCY <> 0. +* +* C (input) COMPLEX*16 +* S (input) COMPLEX*16 +* C and S define the matrix +* [ C S ]. +* [ -S C ] +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IX, IY + COMPLEX*16 CTEMP +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) + $ RETURN + IF( INCX.EQ.1 .AND. INCY.EQ.1 ) + $ GO TO 20 +* +* Code for unequal increments or equal increments not equal to 1 +* + IX = 1 + IY = 1 + IF( INCX.LT.0 ) + $ IX = ( -N+1 )*INCX + 1 + IF( INCY.LT.0 ) + $ IY = ( -N+1 )*INCY + 1 + DO 10 I = 1, N + CTEMP = C*CX( IX ) + S*CY( IY ) + CY( IY ) = C*CY( IY ) - S*CX( IX ) + CX( IX ) = CTEMP + IX = IX + INCX + IY = IY + INCY + 10 CONTINUE + RETURN +* +* Code for both increments equal to 1 +* + 20 CONTINUE + DO 30 I = 1, N + CTEMP = C*CX( I ) + S*CY( I ) + CY( I ) = C*CY( I ) - S*CX( I ) + CX( I ) = CTEMP + 30 CONTINUE + RETURN + END diff --git a/costa/native/external/lapack/zladiv.f b/costa/native/external/lapack/zladiv.f new file mode 100644 index 000000000..31f3aacb9 --- /dev/null +++ b/costa/native/external/lapack/zladiv.f @@ -0,0 +1,47 @@ + DOUBLE COMPLEX FUNCTION ZLADIV( X, Y ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + COMPLEX*16 X, Y +* .. +* +* Purpose +* ======= +* +* ZLADIV := X / Y, where X and Y are complex. The computation of X / Y +* will not overflow on an intermediary step unless the results +* overflows. +* +* Arguments +* ========= +* +* X (input) COMPLEX*16 +* Y (input) COMPLEX*16 +* The complex scalars X and Y. +* +* ===================================================================== +* +* .. Local Scalars .. + DOUBLE PRECISION ZI, ZR +* .. +* .. External Subroutines .. + EXTERNAL DLADIV +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCMPLX, DIMAG +* .. +* .. Executable Statements .. +* + CALL DLADIV( DBLE( X ), DIMAG( X ), DBLE( Y ), DIMAG( Y ), ZR, + $ ZI ) + ZLADIV = DCMPLX( ZR, ZI ) +* + RETURN +* +* End of ZLADIV +* + END diff --git a/costa/native/external/lapack/zlaed0.f b/costa/native/external/lapack/zlaed0.f new file mode 100644 index 000000000..68ecbd8e5 --- /dev/null +++ b/costa/native/external/lapack/zlaed0.f @@ -0,0 +1,289 @@ + SUBROUTINE ZLAED0( QSIZ, N, D, E, Q, LDQ, QSTORE, LDQS, RWORK, + $ IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, LDQ, LDQS, N, QSIZ +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION D( * ), E( * ), RWORK( * ) + COMPLEX*16 Q( LDQ, * ), QSTORE( LDQS, * ) +* .. +* +* Purpose +* ======= +* +* Using the divide and conquer method, ZLAED0 computes all eigenvalues +* of a symmetric tridiagonal matrix which is one diagonal block of +* those from reducing a dense or band Hermitian matrix and +* corresponding eigenvectors of the dense or band matrix. +* +* Arguments +* ========= +* +* QSIZ (input) INTEGER +* The dimension of the unitary matrix used to reduce +* the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. +* +* N (input) INTEGER +* The dimension of the symmetric tridiagonal matrix. N >= 0. +* +* D (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the diagonal elements of the tridiagonal matrix. +* On exit, the eigenvalues in ascending order. +* +* E (input/output) DOUBLE PRECISION array, dimension (N-1) +* On entry, the off-diagonal elements of the tridiagonal matrix. +* On exit, E has been destroyed. +* +* Q (input/output) COMPLEX*16 array, dimension (LDQ,N) +* On entry, Q must contain an QSIZ x N matrix whose columns +* unitarily orthonormal. It is a part of the unitary matrix +* that reduces the full dense Hermitian matrix to a +* (reducible) symmetric tridiagonal matrix. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= max(1,N). +* +* IWORK (workspace) INTEGER array, +* the dimension of IWORK must be at least +* 6 + 6*N + 5*N*lg N +* ( lg( N ) = smallest integer k +* such that 2^k >= N ) +* +* RWORK (workspace) DOUBLE PRECISION array, +* dimension (1 + 3*N + 2*N*lg N + 3*N**2) +* ( lg( N ) = smallest integer k +* such that 2^k >= N ) +* +* QSTORE (workspace) COMPLEX*16 array, dimension (LDQS, N) +* Used to store parts of +* the eigenvector matrix when the updating matrix multiplies +* take place. +* +* LDQS (input) INTEGER +* The leading dimension of the array QSTORE. +* LDQS >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: The algorithm failed to compute an eigenvalue while +* working on the submatrix lying in rows and columns +* INFO/(N+1) through mod(INFO,N+1). +* +* ===================================================================== +* +* Warning: N could be as big as QSIZ! +* +* .. Parameters .. + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.D+0 ) +* .. +* .. Local Scalars .. + INTEGER CURLVL, CURPRB, CURR, I, IGIVCL, IGIVNM, + $ IGIVPT, INDXQ, IPERM, IPRMPT, IQ, IQPTR, IWREM, + $ J, K, LGN, LL, MATSIZ, MSD2, SMLSIZ, SMM1, + $ SPM1, SPM2, SUBMAT, SUBPBS, TLVLS + DOUBLE PRECISION TEMP +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DSTEQR, XERBLA, ZCOPY, ZLACRM, ZLAED7 +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, INT, LOG, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* +* IF( ICOMPQ .LT. 0 .OR. ICOMPQ .GT. 2 ) THEN +* INFO = -1 +* ELSE IF( ( ICOMPQ .EQ. 1 ) .AND. ( QSIZ .LT. MAX( 0, N ) ) ) +* $ THEN + IF( QSIZ.LT.MAX( 0, N ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDQS.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLAED0', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + SMLSIZ = ILAENV( 9, 'ZLAED0', ' ', 0, 0, 0, 0 ) +* +* Determine the size and placement of the submatrices, and save in +* the leading elements of IWORK. +* + IWORK( 1 ) = N + SUBPBS = 1 + TLVLS = 0 + 10 CONTINUE + IF( IWORK( SUBPBS ).GT.SMLSIZ ) THEN + DO 20 J = SUBPBS, 1, -1 + IWORK( 2*J ) = ( IWORK( J )+1 ) / 2 + IWORK( 2*J-1 ) = IWORK( J ) / 2 + 20 CONTINUE + TLVLS = TLVLS + 1 + SUBPBS = 2*SUBPBS + GO TO 10 + END IF + DO 30 J = 2, SUBPBS + IWORK( J ) = IWORK( J ) + IWORK( J-1 ) + 30 CONTINUE +* +* Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1 +* using rank-1 modifications (cuts). +* + SPM1 = SUBPBS - 1 + DO 40 I = 1, SPM1 + SUBMAT = IWORK( I ) + 1 + SMM1 = SUBMAT - 1 + D( SMM1 ) = D( SMM1 ) - ABS( E( SMM1 ) ) + D( SUBMAT ) = D( SUBMAT ) - ABS( E( SMM1 ) ) + 40 CONTINUE +* + INDXQ = 4*N + 3 +* +* Set up workspaces for eigenvalues only/accumulate new vectors +* routine +* + TEMP = LOG( DBLE( N ) ) / LOG( TWO ) + LGN = INT( TEMP ) + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IPRMPT = INDXQ + N + 1 + IPERM = IPRMPT + N*LGN + IQPTR = IPERM + N*LGN + IGIVPT = IQPTR + N + 2 + IGIVCL = IGIVPT + N*LGN +* + IGIVNM = 1 + IQ = IGIVNM + 2*N*LGN + IWREM = IQ + N**2 + 1 +* Initialize pointers + DO 50 I = 0, SUBPBS + IWORK( IPRMPT+I ) = 1 + IWORK( IGIVPT+I ) = 1 + 50 CONTINUE + IWORK( IQPTR ) = 1 +* +* Solve each submatrix eigenproblem at the bottom of the divide and +* conquer tree. +* + CURR = 0 + DO 70 I = 0, SPM1 + IF( I.EQ.0 ) THEN + SUBMAT = 1 + MATSIZ = IWORK( 1 ) + ELSE + SUBMAT = IWORK( I ) + 1 + MATSIZ = IWORK( I+1 ) - IWORK( I ) + END IF + LL = IQ - 1 + IWORK( IQPTR+CURR ) + CALL DSTEQR( 'I', MATSIZ, D( SUBMAT ), E( SUBMAT ), + $ RWORK( LL ), MATSIZ, RWORK, INFO ) + CALL ZLACRM( QSIZ, MATSIZ, Q( 1, SUBMAT ), LDQ, RWORK( LL ), + $ MATSIZ, QSTORE( 1, SUBMAT ), LDQS, + $ RWORK( IWREM ) ) + IWORK( IQPTR+CURR+1 ) = IWORK( IQPTR+CURR ) + MATSIZ**2 + CURR = CURR + 1 + IF( INFO.GT.0 ) THEN + INFO = SUBMAT*( N+1 ) + SUBMAT + MATSIZ - 1 + RETURN + END IF + K = 1 + DO 60 J = SUBMAT, IWORK( I+1 ) + IWORK( INDXQ+J ) = K + K = K + 1 + 60 CONTINUE + 70 CONTINUE +* +* Successively merge eigensystems of adjacent submatrices +* into eigensystem for the corresponding larger matrix. +* +* while ( SUBPBS > 1 ) +* + CURLVL = 1 + 80 CONTINUE + IF( SUBPBS.GT.1 ) THEN + SPM2 = SUBPBS - 2 + DO 90 I = 0, SPM2, 2 + IF( I.EQ.0 ) THEN + SUBMAT = 1 + MATSIZ = IWORK( 2 ) + MSD2 = IWORK( 1 ) + CURPRB = 0 + ELSE + SUBMAT = IWORK( I ) + 1 + MATSIZ = IWORK( I+2 ) - IWORK( I ) + MSD2 = MATSIZ / 2 + CURPRB = CURPRB + 1 + END IF +* +* Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2) +* into an eigensystem of size MATSIZ. ZLAED7 handles the case +* when the eigenvectors of a full or band Hermitian matrix (which +* was reduced to tridiagonal form) are desired. +* +* I am free to use Q as a valuable working space until Loop 150. +* + CALL ZLAED7( MATSIZ, MSD2, QSIZ, TLVLS, CURLVL, CURPRB, + $ D( SUBMAT ), QSTORE( 1, SUBMAT ), LDQS, + $ E( SUBMAT+MSD2-1 ), IWORK( INDXQ+SUBMAT ), + $ RWORK( IQ ), IWORK( IQPTR ), IWORK( IPRMPT ), + $ IWORK( IPERM ), IWORK( IGIVPT ), + $ IWORK( IGIVCL ), RWORK( IGIVNM ), + $ Q( 1, SUBMAT ), RWORK( IWREM ), + $ IWORK( SUBPBS+1 ), INFO ) + IF( INFO.GT.0 ) THEN + INFO = SUBMAT*( N+1 ) + SUBMAT + MATSIZ - 1 + RETURN + END IF + IWORK( I / 2+1 ) = IWORK( I+2 ) + 90 CONTINUE + SUBPBS = SUBPBS / 2 + CURLVL = CURLVL + 1 + GO TO 80 + END IF +* +* end while +* +* Re-merge the eigenvalues/vectors which were deflated at the final +* merge step. +* + DO 100 I = 1, N + J = IWORK( INDXQ+I ) + RWORK( I ) = D( J ) + CALL ZCOPY( QSIZ, QSTORE( 1, J ), 1, Q( 1, I ), 1 ) + 100 CONTINUE + CALL DCOPY( N, RWORK, 1, D, 1 ) +* + RETURN +* +* End of ZLAED0 +* + END diff --git a/costa/native/external/lapack/zlaed7.f b/costa/native/external/lapack/zlaed7.f new file mode 100644 index 000000000..86f5b1111 --- /dev/null +++ b/costa/native/external/lapack/zlaed7.f @@ -0,0 +1,267 @@ + SUBROUTINE ZLAED7( N, CUTPNT, QSIZ, TLVLS, CURLVL, CURPBM, D, Q, + $ LDQ, RHO, INDXQ, QSTORE, QPTR, PRMPTR, PERM, + $ GIVPTR, GIVCOL, GIVNUM, WORK, RWORK, IWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER CURLVL, CURPBM, CUTPNT, INFO, LDQ, N, QSIZ, + $ TLVLS + DOUBLE PRECISION RHO +* .. +* .. Array Arguments .. + INTEGER GIVCOL( 2, * ), GIVPTR( * ), INDXQ( * ), + $ IWORK( * ), PERM( * ), PRMPTR( * ), QPTR( * ) + DOUBLE PRECISION D( * ), GIVNUM( 2, * ), QSTORE( * ), RWORK( * ) + COMPLEX*16 Q( LDQ, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZLAED7 computes the updated eigensystem of a diagonal +* matrix after modification by a rank-one symmetric matrix. This +* routine is used only for the eigenproblem which requires all +* eigenvalues and optionally eigenvectors of a dense or banded +* Hermitian matrix that has been reduced to tridiagonal form. +* +* T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out) +* +* where Z = Q'u, u is a vector of length N with ones in the +* CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. +* +* The eigenvectors of the original matrix are stored in Q, and the +* eigenvalues are in D. The algorithm consists of three stages: +* +* The first stage consists of deflating the size of the problem +* when there are multiple eigenvalues or if there is a zero in +* the Z vector. For each such occurence the dimension of the +* secular equation problem is reduced by one. This stage is +* performed by the routine DLAED2. +* +* The second stage consists of calculating the updated +* eigenvalues. This is done by finding the roots of the secular +* equation via the routine DLAED4 (as called by SLAED3). +* This routine also calculates the eigenvectors of the current +* problem. +* +* The final stage consists of computing the updated eigenvectors +* directly using the updated eigenvalues. The eigenvectors for +* the current problem are multiplied with the eigenvectors from +* the overall problem. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The dimension of the symmetric tridiagonal matrix. N >= 0. +* +* CUTPNT (input) INTEGER +* Contains the location of the last eigenvalue in the leading +* sub-matrix. min(1,N) <= CUTPNT <= N. +* +* QSIZ (input) INTEGER +* The dimension of the unitary matrix used to reduce +* the full matrix to tridiagonal form. QSIZ >= N. +* +* TLVLS (input) INTEGER +* The total number of merging levels in the overall divide and +* conquer tree. +* +* CURLVL (input) INTEGER +* The current level in the overall merge routine, +* 0 <= curlvl <= tlvls. +* +* CURPBM (input) INTEGER +* The current problem in the current level in the overall +* merge routine (counting from upper left to lower right). +* +* D (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the eigenvalues of the rank-1-perturbed matrix. +* On exit, the eigenvalues of the repaired matrix. +* +* Q (input/output) COMPLEX*16 array, dimension (LDQ,N) +* On entry, the eigenvectors of the rank-1-perturbed matrix. +* On exit, the eigenvectors of the repaired tridiagonal matrix. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= max(1,N). +* +* RHO (input) DOUBLE PRECISION +* Contains the subdiagonal element used to create the rank-1 +* modification. +* +* INDXQ (output) INTEGER array, dimension (N) +* This contains the permutation which will reintegrate the +* subproblem just solved back into sorted order, +* ie. D( INDXQ( I = 1, N ) ) will be in ascending order. +* +* IWORK (workspace) INTEGER array, dimension (4*N) +* +* RWORK (workspace) DOUBLE PRECISION array, +* dimension (3*N+2*QSIZ*N) +* +* WORK (workspace) COMPLEX*16 array, dimension (QSIZ*N) +* +* QSTORE (input/output) DOUBLE PRECISION array, dimension (N**2+1) +* Stores eigenvectors of submatrices encountered during +* divide and conquer, packed together. QPTR points to +* beginning of the submatrices. +* +* QPTR (input/output) INTEGER array, dimension (N+2) +* List of indices pointing to beginning of submatrices stored +* in QSTORE. The submatrices are numbered starting at the +* bottom left of the divide and conquer tree, from left to +* right and bottom to top. +* +* PRMPTR (input) INTEGER array, dimension (N lg N) +* Contains a list of pointers which indicate where in PERM a +* level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) +* indicates the size of the permutation and also the size of +* the full, non-deflated problem. +* +* PERM (input) INTEGER array, dimension (N lg N) +* Contains the permutations (from deflation and sorting) to be +* applied to each eigenblock. +* +* GIVPTR (input) INTEGER array, dimension (N lg N) +* Contains a list of pointers which indicate where in GIVCOL a +* level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) +* indicates the number of Givens rotations. +* +* GIVCOL (input) INTEGER array, dimension (2, N lg N) +* Each pair of numbers indicates a pair of columns to take place +* in a Givens rotation. +* +* GIVNUM (input) DOUBLE PRECISION array, dimension (2, N lg N) +* Each number indicates the S value to be used in the +* corresponding Givens rotation. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = 1, an eigenvalue did not converge +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER COLTYP, CURR, I, IDLMDA, IND1, IND2, INDX, + $ INDXC, INDXP, IQ, IW, IZ, K, N1, N2, PTR +* .. +* .. External Subroutines .. + EXTERNAL DLAED9, DLAEDA, DLAMRG, XERBLA, ZLACRM, ZLAED8 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* +* IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.1 ) THEN +* INFO = -1 +* ELSE IF( N.LT.0 ) THEN + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( MIN( 1, N ).GT.CUTPNT .OR. N.LT.CUTPNT ) THEN + INFO = -2 + ELSE IF( QSIZ.LT.N ) THEN + INFO = -3 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLAED7', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* The following values are for bookkeeping purposes only. They are +* integer pointers which indicate the portion of the workspace +* used by a particular array in DLAED2 and SLAED3. +* + IZ = 1 + IDLMDA = IZ + N + IW = IDLMDA + N + IQ = IW + N +* + INDX = 1 + INDXC = INDX + N + COLTYP = INDXC + N + INDXP = COLTYP + N +* +* Form the z-vector which consists of the last row of Q_1 and the +* first row of Q_2. +* + PTR = 1 + 2**TLVLS + DO 10 I = 1, CURLVL - 1 + PTR = PTR + 2**( TLVLS-I ) + 10 CONTINUE + CURR = PTR + CURPBM + CALL DLAEDA( N, TLVLS, CURLVL, CURPBM, PRMPTR, PERM, GIVPTR, + $ GIVCOL, GIVNUM, QSTORE, QPTR, RWORK( IZ ), + $ RWORK( IZ+N ), INFO ) +* +* When solving the final problem, we no longer need the stored data, +* so we will overwrite the data from this level onto the previously +* used storage space. +* + IF( CURLVL.EQ.TLVLS ) THEN + QPTR( CURR ) = 1 + PRMPTR( CURR ) = 1 + GIVPTR( CURR ) = 1 + END IF +* +* Sort and Deflate eigenvalues. +* + CALL ZLAED8( K, N, QSIZ, Q, LDQ, D, RHO, CUTPNT, RWORK( IZ ), + $ RWORK( IDLMDA ), WORK, QSIZ, RWORK( IW ), + $ IWORK( INDXP ), IWORK( INDX ), INDXQ, + $ PERM( PRMPTR( CURR ) ), GIVPTR( CURR+1 ), + $ GIVCOL( 1, GIVPTR( CURR ) ), + $ GIVNUM( 1, GIVPTR( CURR ) ), INFO ) + PRMPTR( CURR+1 ) = PRMPTR( CURR ) + N + GIVPTR( CURR+1 ) = GIVPTR( CURR+1 ) + GIVPTR( CURR ) +* +* Solve Secular Equation. +* + IF( K.NE.0 ) THEN + CALL DLAED9( K, 1, K, N, D, RWORK( IQ ), K, RHO, + $ RWORK( IDLMDA ), RWORK( IW ), + $ QSTORE( QPTR( CURR ) ), K, INFO ) + CALL ZLACRM( QSIZ, K, WORK, QSIZ, QSTORE( QPTR( CURR ) ), K, Q, + $ LDQ, RWORK( IQ ) ) + QPTR( CURR+1 ) = QPTR( CURR ) + K**2 + IF( INFO.NE.0 ) THEN + RETURN + END IF +* +* Prepare the INDXQ sorting premutation. +* + N1 = K + N2 = N - K + IND1 = 1 + IND2 = N + CALL DLAMRG( N1, N2, D, 1, -1, INDXQ ) + ELSE + QPTR( CURR+1 ) = QPTR( CURR ) + DO 20 I = 1, N + INDXQ( I ) = I + 20 CONTINUE + END IF +* + RETURN +* +* End of ZLAED7 +* + END diff --git a/costa/native/external/lapack/zlaed8.f b/costa/native/external/lapack/zlaed8.f new file mode 100644 index 000000000..c485d5729 --- /dev/null +++ b/costa/native/external/lapack/zlaed8.f @@ -0,0 +1,364 @@ + SUBROUTINE ZLAED8( K, N, QSIZ, Q, LDQ, D, RHO, CUTPNT, Z, DLAMDA, + $ Q2, LDQ2, W, INDXP, INDX, INDXQ, PERM, GIVPTR, + $ GIVCOL, GIVNUM, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, +* Courant Institute, NAG Ltd., and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER CUTPNT, GIVPTR, INFO, K, LDQ, LDQ2, N, QSIZ + DOUBLE PRECISION RHO +* .. +* .. Array Arguments .. + INTEGER GIVCOL( 2, * ), INDX( * ), INDXP( * ), + $ INDXQ( * ), PERM( * ) + DOUBLE PRECISION D( * ), DLAMDA( * ), GIVNUM( 2, * ), W( * ), + $ Z( * ) + COMPLEX*16 Q( LDQ, * ), Q2( LDQ2, * ) +* .. +* +* Purpose +* ======= +* +* ZLAED8 merges the two sets of eigenvalues together into a single +* sorted set. Then it tries to deflate the size of the problem. +* There are two ways in which deflation can occur: when two or more +* eigenvalues are close together or if there is a tiny element in the +* Z vector. For each such occurrence the order of the related secular +* equation problem is reduced by one. +* +* Arguments +* ========= +* +* K (output) INTEGER +* Contains the number of non-deflated eigenvalues. +* This is the order of the related secular equation. +* +* N (input) INTEGER +* The dimension of the symmetric tridiagonal matrix. N >= 0. +* +* QSIZ (input) INTEGER +* The dimension of the unitary matrix used to reduce +* the dense or band matrix to tridiagonal form. +* QSIZ >= N if ICOMPQ = 1. +* +* Q (input/output) COMPLEX*16 array, dimension (LDQ,N) +* On entry, Q contains the eigenvectors of the partially solved +* system which has been previously updated in matrix +* multiplies with other partially solved eigensystems. +* On exit, Q contains the trailing (N-K) updated eigenvectors +* (those which were deflated) in its last N-K columns. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= max( 1, N ). +* +* D (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, D contains the eigenvalues of the two submatrices to +* be combined. On exit, D contains the trailing (N-K) updated +* eigenvalues (those which were deflated) sorted into increasing +* order. +* +* RHO (input/output) DOUBLE PRECISION +* Contains the off diagonal element associated with the rank-1 +* cut which originally split the two submatrices which are now +* being recombined. RHO is modified during the computation to +* the value required by DLAED3. +* +* CUTPNT (input) INTEGER +* Contains the location of the last eigenvalue in the leading +* sub-matrix. MIN(1,N) <= CUTPNT <= N. +* +* Z (input) DOUBLE PRECISION array, dimension (N) +* On input this vector contains the updating vector (the last +* row of the first sub-eigenvector matrix and the first row of +* the second sub-eigenvector matrix). The contents of Z are +* destroyed during the updating process. +* +* DLAMDA (output) DOUBLE PRECISION array, dimension (N) +* Contains a copy of the first K eigenvalues which will be used +* by DLAED3 to form the secular equation. +* +* Q2 (output) COMPLEX*16 array, dimension (LDQ2,N) +* If ICOMPQ = 0, Q2 is not referenced. Otherwise, +* Contains a copy of the first K eigenvectors which will be used +* by DLAED7 in a matrix multiply (DGEMM) to update the new +* eigenvectors. +* +* LDQ2 (input) INTEGER +* The leading dimension of the array Q2. LDQ2 >= max( 1, N ). +* +* W (output) DOUBLE PRECISION array, dimension (N) +* This will hold the first k values of the final +* deflation-altered z-vector and will be passed to DLAED3. +* +* INDXP (workspace) INTEGER array, dimension (N) +* This will contain the permutation used to place deflated +* values of D at the end of the array. On output INDXP(1:K) +* points to the nondeflated D-values and INDXP(K+1:N) +* points to the deflated eigenvalues. +* +* INDX (workspace) INTEGER array, dimension (N) +* This will contain the permutation used to sort the contents of +* D into ascending order. +* +* INDXQ (input) INTEGER array, dimension (N) +* This contains the permutation which separately sorts the two +* sub-problems in D into ascending order. Note that elements in +* the second half of this permutation must first have CUTPNT +* added to their values in order to be accurate. +* +* PERM (output) INTEGER array, dimension (N) +* Contains the permutations (from deflation and sorting) to be +* applied to each eigenblock. +* +* GIVPTR (output) INTEGER +* Contains the number of Givens rotations which took place in +* this subproblem. +* +* GIVCOL (output) INTEGER array, dimension (2, N) +* Each pair of numbers indicates a pair of columns to take place +* in a Givens rotation. +* +* GIVNUM (output) DOUBLE PRECISION array, dimension (2, N) +* Each number indicates the S value to be used in the +* corresponding Givens rotation. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION MONE, ZERO, ONE, TWO, EIGHT + PARAMETER ( MONE = -1.0D0, ZERO = 0.0D0, ONE = 1.0D0, + $ TWO = 2.0D0, EIGHT = 8.0D0 ) +* .. +* .. Local Scalars .. + INTEGER I, IMAX, J, JLAM, JMAX, JP, K2, N1, N1P1, N2 + DOUBLE PRECISION C, EPS, S, T, TAU, TOL +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH, DLAPY2 + EXTERNAL IDAMAX, DLAMCH, DLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLAMRG, DSCAL, XERBLA, ZCOPY, ZDROT, + $ ZLACPY +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( QSIZ.LT.N ) THEN + INFO = -3 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( CUTPNT.LT.MIN( 1, N ) .OR. CUTPNT.GT.N ) THEN + INFO = -8 + ELSE IF( LDQ2.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLAED8', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + N1 = CUTPNT + N2 = N - N1 + N1P1 = N1 + 1 +* + IF( RHO.LT.ZERO ) THEN + CALL DSCAL( N2, MONE, Z( N1P1 ), 1 ) + END IF +* +* Normalize z so that norm(z) = 1 +* + T = ONE / SQRT( TWO ) + DO 10 J = 1, N + INDX( J ) = J + 10 CONTINUE + CALL DSCAL( N, T, Z, 1 ) + RHO = ABS( TWO*RHO ) +* +* Sort the eigenvalues into increasing order +* + DO 20 I = CUTPNT + 1, N + INDXQ( I ) = INDXQ( I ) + CUTPNT + 20 CONTINUE + DO 30 I = 1, N + DLAMDA( I ) = D( INDXQ( I ) ) + W( I ) = Z( INDXQ( I ) ) + 30 CONTINUE + I = 1 + J = CUTPNT + 1 + CALL DLAMRG( N1, N2, DLAMDA, 1, 1, INDX ) + DO 40 I = 1, N + D( I ) = DLAMDA( INDX( I ) ) + Z( I ) = W( INDX( I ) ) + 40 CONTINUE +* +* Calculate the allowable deflation tolerance +* + IMAX = IDAMAX( N, Z, 1 ) + JMAX = IDAMAX( N, D, 1 ) + EPS = DLAMCH( 'Epsilon' ) + TOL = EIGHT*EPS*ABS( D( JMAX ) ) +* +* If the rank-1 modifier is small enough, no more needs to be done +* -- except to reorganize Q so that its columns correspond with the +* elements in D. +* + IF( RHO*ABS( Z( IMAX ) ).LE.TOL ) THEN + K = 0 + DO 50 J = 1, N + PERM( J ) = INDXQ( INDX( J ) ) + CALL ZCOPY( QSIZ, Q( 1, PERM( J ) ), 1, Q2( 1, J ), 1 ) + 50 CONTINUE + CALL ZLACPY( 'A', QSIZ, N, Q2( 1, 1 ), LDQ2, Q( 1, 1 ), LDQ ) + RETURN + END IF +* +* If there are multiple eigenvalues then the problem deflates. Here +* the number of equal eigenvalues are found. As each equal +* eigenvalue is found, an elementary reflector is computed to rotate +* the corresponding eigensubspace so that the corresponding +* components of Z are zero in this new basis. +* + K = 0 + GIVPTR = 0 + K2 = N + 1 + DO 60 J = 1, N + IF( RHO*ABS( Z( J ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + INDXP( K2 ) = J + IF( J.EQ.N ) + $ GO TO 100 + ELSE + JLAM = J + GO TO 70 + END IF + 60 CONTINUE + 70 CONTINUE + J = J + 1 + IF( J.GT.N ) + $ GO TO 90 + IF( RHO*ABS( Z( J ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + INDXP( K2 ) = J + ELSE +* +* Check if eigenvalues are close enough to allow deflation. +* + S = Z( JLAM ) + C = Z( J ) +* +* Find sqrt(a**2+b**2) without overflow or +* destructive underflow. +* + TAU = DLAPY2( C, S ) + T = D( J ) - D( JLAM ) + C = C / TAU + S = -S / TAU + IF( ABS( T*C*S ).LE.TOL ) THEN +* +* Deflation is possible. +* + Z( J ) = TAU + Z( JLAM ) = ZERO +* +* Record the appropriate Givens rotation +* + GIVPTR = GIVPTR + 1 + GIVCOL( 1, GIVPTR ) = INDXQ( INDX( JLAM ) ) + GIVCOL( 2, GIVPTR ) = INDXQ( INDX( J ) ) + GIVNUM( 1, GIVPTR ) = C + GIVNUM( 2, GIVPTR ) = S + CALL ZDROT( QSIZ, Q( 1, INDXQ( INDX( JLAM ) ) ), 1, + $ Q( 1, INDXQ( INDX( J ) ) ), 1, C, S ) + T = D( JLAM )*C*C + D( J )*S*S + D( J ) = D( JLAM )*S*S + D( J )*C*C + D( JLAM ) = T + K2 = K2 - 1 + I = 1 + 80 CONTINUE + IF( K2+I.LE.N ) THEN + IF( D( JLAM ).LT.D( INDXP( K2+I ) ) ) THEN + INDXP( K2+I-1 ) = INDXP( K2+I ) + INDXP( K2+I ) = JLAM + I = I + 1 + GO TO 80 + ELSE + INDXP( K2+I-1 ) = JLAM + END IF + ELSE + INDXP( K2+I-1 ) = JLAM + END IF + JLAM = J + ELSE + K = K + 1 + W( K ) = Z( JLAM ) + DLAMDA( K ) = D( JLAM ) + INDXP( K ) = JLAM + JLAM = J + END IF + END IF + GO TO 70 + 90 CONTINUE +* +* Record the last eigenvalue. +* + K = K + 1 + W( K ) = Z( JLAM ) + DLAMDA( K ) = D( JLAM ) + INDXP( K ) = JLAM +* + 100 CONTINUE +* +* Sort the eigenvalues and corresponding eigenvectors into DLAMDA +* and Q2 respectively. The eigenvalues/vectors which were not +* deflated go into the first K slots of DLAMDA and Q2 respectively, +* while those which were deflated go into the last N - K slots. +* + DO 110 J = 1, N + JP = INDXP( J ) + DLAMDA( J ) = D( JP ) + PERM( J ) = INDXQ( INDX( JP ) ) + CALL ZCOPY( QSIZ, Q( 1, PERM( J ) ), 1, Q2( 1, J ), 1 ) + 110 CONTINUE +* +* The deflated eigenvalues and their corresponding vectors go back +* into the last N - K slots of D and Q respectively. +* + IF( K.LT.N ) THEN + CALL DCOPY( N-K, DLAMDA( K+1 ), 1, D( K+1 ), 1 ) + CALL ZLACPY( 'A', QSIZ, N-K, Q2( 1, K+1 ), LDQ2, Q( 1, K+1 ), + $ LDQ ) + END IF +* + RETURN +* +* End of ZLAED8 +* + END diff --git a/costa/native/external/lapack/zlaein.f b/costa/native/external/lapack/zlaein.f new file mode 100644 index 000000000..577ec1186 --- /dev/null +++ b/costa/native/external/lapack/zlaein.f @@ -0,0 +1,264 @@ + SUBROUTINE ZLAEIN( RIGHTV, NOINIT, N, H, LDH, W, V, B, LDB, RWORK, + $ EPS3, SMLNUM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + LOGICAL NOINIT, RIGHTV + INTEGER INFO, LDB, LDH, N + DOUBLE PRECISION EPS3, SMLNUM + COMPLEX*16 W +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 B( LDB, * ), H( LDH, * ), V( * ) +* .. +* +* Purpose +* ======= +* +* ZLAEIN uses inverse iteration to find a right or left eigenvector +* corresponding to the eigenvalue W of a complex upper Hessenberg +* matrix H. +* +* Arguments +* ========= +* +* RIGHTV (input) LOGICAL +* = .TRUE. : compute right eigenvector; +* = .FALSE.: compute left eigenvector. +* +* NOINIT (input) LOGICAL +* = .TRUE. : no initial vector supplied in V +* = .FALSE.: initial vector supplied in V. +* +* N (input) INTEGER +* The order of the matrix H. N >= 0. +* +* H (input) COMPLEX*16 array, dimension (LDH,N) +* The upper Hessenberg matrix H. +* +* LDH (input) INTEGER +* The leading dimension of the array H. LDH >= max(1,N). +* +* W (input) COMPLEX*16 +* The eigenvalue of H whose corresponding right or left +* eigenvector is to be computed. +* +* V (input/output) COMPLEX*16 array, dimension (N) +* On entry, if NOINIT = .FALSE., V must contain a starting +* vector for inverse iteration; otherwise V need not be set. +* On exit, V contains the computed eigenvector, normalized so +* that the component of largest magnitude has magnitude 1; here +* the magnitude of a complex number (x,y) is taken to be +* |x| + |y|. +* +* B (workspace) COMPLEX*16 array, dimension (LDB,N) +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* EPS3 (input) DOUBLE PRECISION +* A small machine-dependent value which is used to perturb +* close eigenvalues, and to replace zero pivots. +* +* SMLNUM (input) DOUBLE PRECISION +* A machine-dependent value close to the underflow threshold. +* +* INFO (output) INTEGER +* = 0: successful exit +* = 1: inverse iteration did not converge; V is set to the +* last iterate. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, TENTH + PARAMETER ( ONE = 1.0D+0, TENTH = 1.0D-1 ) + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + CHARACTER NORMIN, TRANS + INTEGER I, IERR, ITS, J + DOUBLE PRECISION GROWTO, NRMSML, ROOTN, RTEMP, SCALE, VNORM + COMPLEX*16 CDUM, EI, EJ, TEMP, X +* .. +* .. External Functions .. + INTEGER IZAMAX + DOUBLE PRECISION DZASUM, DZNRM2 + COMPLEX*16 ZLADIV + EXTERNAL IZAMAX, DZASUM, DZNRM2, ZLADIV +* .. +* .. External Subroutines .. + EXTERNAL ZDSCAL, ZLATRS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX, SQRT +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* GROWTO is the threshold used in the acceptance test for an +* eigenvector. +* + ROOTN = SQRT( DBLE( N ) ) + GROWTO = TENTH / ROOTN + NRMSML = MAX( ONE, EPS3*ROOTN )*SMLNUM +* +* Form B = H - W*I (except that the subdiagonal elements are not +* stored). +* + DO 20 J = 1, N + DO 10 I = 1, J - 1 + B( I, J ) = H( I, J ) + 10 CONTINUE + B( J, J ) = H( J, J ) - W + 20 CONTINUE +* + IF( NOINIT ) THEN +* +* Initialize V. +* + DO 30 I = 1, N + V( I ) = EPS3 + 30 CONTINUE + ELSE +* +* Scale supplied initial vector. +* + VNORM = DZNRM2( N, V, 1 ) + CALL ZDSCAL( N, ( EPS3*ROOTN ) / MAX( VNORM, NRMSML ), V, 1 ) + END IF +* + IF( RIGHTV ) THEN +* +* LU decomposition with partial pivoting of B, replacing zero +* pivots by EPS3. +* + DO 60 I = 1, N - 1 + EI = H( I+1, I ) + IF( CABS1( B( I, I ) ).LT.CABS1( EI ) ) THEN +* +* Interchange rows and eliminate. +* + X = ZLADIV( B( I, I ), EI ) + B( I, I ) = EI + DO 40 J = I + 1, N + TEMP = B( I+1, J ) + B( I+1, J ) = B( I, J ) - X*TEMP + B( I, J ) = TEMP + 40 CONTINUE + ELSE +* +* Eliminate without interchange. +* + IF( B( I, I ).EQ.ZERO ) + $ B( I, I ) = EPS3 + X = ZLADIV( EI, B( I, I ) ) + IF( X.NE.ZERO ) THEN + DO 50 J = I + 1, N + B( I+1, J ) = B( I+1, J ) - X*B( I, J ) + 50 CONTINUE + END IF + END IF + 60 CONTINUE + IF( B( N, N ).EQ.ZERO ) + $ B( N, N ) = EPS3 +* + TRANS = 'N' +* + ELSE +* +* UL decomposition with partial pivoting of B, replacing zero +* pivots by EPS3. +* + DO 90 J = N, 2, -1 + EJ = H( J, J-1 ) + IF( CABS1( B( J, J ) ).LT.CABS1( EJ ) ) THEN +* +* Interchange columns and eliminate. +* + X = ZLADIV( B( J, J ), EJ ) + B( J, J ) = EJ + DO 70 I = 1, J - 1 + TEMP = B( I, J-1 ) + B( I, J-1 ) = B( I, J ) - X*TEMP + B( I, J ) = TEMP + 70 CONTINUE + ELSE +* +* Eliminate without interchange. +* + IF( B( J, J ).EQ.ZERO ) + $ B( J, J ) = EPS3 + X = ZLADIV( EJ, B( J, J ) ) + IF( X.NE.ZERO ) THEN + DO 80 I = 1, J - 1 + B( I, J-1 ) = B( I, J-1 ) - X*B( I, J ) + 80 CONTINUE + END IF + END IF + 90 CONTINUE + IF( B( 1, 1 ).EQ.ZERO ) + $ B( 1, 1 ) = EPS3 +* + TRANS = 'C' +* + END IF +* + NORMIN = 'N' + DO 110 ITS = 1, N +* +* Solve U*x = scale*v for a right eigenvector +* or U'*x = scale*v for a left eigenvector, +* overwriting x on v. +* + CALL ZLATRS( 'Upper', TRANS, 'Nonunit', NORMIN, N, B, LDB, V, + $ SCALE, RWORK, IERR ) + NORMIN = 'Y' +* +* Test for sufficient growth in the norm of v. +* + VNORM = DZASUM( N, V, 1 ) + IF( VNORM.GE.GROWTO*SCALE ) + $ GO TO 120 +* +* Choose new orthogonal starting vector and try again. +* + RTEMP = EPS3 / ( ROOTN+ONE ) + V( 1 ) = EPS3 + DO 100 I = 2, N + V( I ) = RTEMP + 100 CONTINUE + V( N-ITS+1 ) = V( N-ITS+1 ) - EPS3*ROOTN + 110 CONTINUE +* +* Failure to find eigenvector in N iterations. +* + INFO = 1 +* + 120 CONTINUE +* +* Normalize eigenvector. +* + I = IZAMAX( N, V, 1 ) + CALL ZDSCAL( N, ONE / CABS1( V( I ) ), V, 1 ) +* + RETURN +* +* End of ZLAEIN +* + END diff --git a/costa/native/external/lapack/zlaesy.f b/costa/native/external/lapack/zlaesy.f new file mode 100644 index 000000000..d947e150d --- /dev/null +++ b/costa/native/external/lapack/zlaesy.f @@ -0,0 +1,153 @@ + SUBROUTINE ZLAESY( A, B, C, RT1, RT2, EVSCAL, CS1, SN1 ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + COMPLEX*16 A, B, C, CS1, EVSCAL, RT1, RT2, SN1 +* .. +* +* Purpose +* ======= +* +* ZLAESY computes the eigendecomposition of a 2-by-2 symmetric matrix +* ( ( A, B );( B, C ) ) +* provided the norm of the matrix of eigenvectors is larger than +* some threshold value. +* +* RT1 is the eigenvalue of larger absolute value, and RT2 of +* smaller absolute value. If the eigenvectors are computed, then +* on return ( CS1, SN1 ) is the unit eigenvector for RT1, hence +* +* [ CS1 SN1 ] . [ A B ] . [ CS1 -SN1 ] = [ RT1 0 ] +* [ -SN1 CS1 ] [ B C ] [ SN1 CS1 ] [ 0 RT2 ] +* +* Arguments +* ========= +* +* A (input) COMPLEX*16 +* The ( 1, 1 ) element of input matrix. +* +* B (input) COMPLEX*16 +* The ( 1, 2 ) element of input matrix. The ( 2, 1 ) element +* is also given by B, since the 2-by-2 matrix is symmetric. +* +* C (input) COMPLEX*16 +* The ( 2, 2 ) element of input matrix. +* +* RT1 (output) COMPLEX*16 +* The eigenvalue of larger modulus. +* +* RT2 (output) COMPLEX*16 +* The eigenvalue of smaller modulus. +* +* EVSCAL (output) COMPLEX*16 +* The complex value by which the eigenvector matrix was scaled +* to make it orthonormal. If EVSCAL is zero, the eigenvectors +* were not computed. This means one of two things: the 2-by-2 +* matrix could not be diagonalized, or the norm of the matrix +* of eigenvectors before scaling was larger than the threshold +* value THRESH (set below). +* +* CS1 (output) COMPLEX*16 +* SN1 (output) COMPLEX*16 +* If EVSCAL .NE. 0, ( CS1, SN1 ) is the unit right eigenvector +* for RT1. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D0, 0.0D0 ) ) + DOUBLE PRECISION HALF + PARAMETER ( HALF = 0.5D0 ) + DOUBLE PRECISION THRESH + PARAMETER ( THRESH = 0.1D0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION BABS, EVNORM, TABS, Z + COMPLEX*16 S, T, TMP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* +* Special case: The matrix is actually diagonal. +* To avoid divide by zero later, we treat this case separately. +* + IF( ABS( B ).EQ.ZERO ) THEN + RT1 = A + RT2 = C + IF( ABS( RT1 ).LT.ABS( RT2 ) ) THEN + TMP = RT1 + RT1 = RT2 + RT2 = TMP + CS1 = ZERO + SN1 = ONE + ELSE + CS1 = ONE + SN1 = ZERO + END IF + ELSE +* +* Compute the eigenvalues and eigenvectors. +* The characteristic equation is +* lambda **2 - (A+C) lambda + (A*C - B*B) +* and we solve it using the quadratic formula. +* + S = ( A+C )*HALF + T = ( A-C )*HALF +* +* Take the square root carefully to avoid over/under flow. +* + BABS = ABS( B ) + TABS = ABS( T ) + Z = MAX( BABS, TABS ) + IF( Z.GT.ZERO ) + $ T = Z*SQRT( ( T / Z )**2+( B / Z )**2 ) +* +* Compute the two eigenvalues. RT1 and RT2 are exchanged +* if necessary so that RT1 will have the greater magnitude. +* + RT1 = S + T + RT2 = S - T + IF( ABS( RT1 ).LT.ABS( RT2 ) ) THEN + TMP = RT1 + RT1 = RT2 + RT2 = TMP + END IF +* +* Choose CS1 = 1 and SN1 to satisfy the first equation, then +* scale the components of this eigenvector so that the matrix +* of eigenvectors X satisfies X * X' = I . (No scaling is +* done if the norm of the eigenvalue matrix is less than THRESH.) +* + SN1 = ( RT1-A ) / B + TABS = ABS( SN1 ) + IF( TABS.GT.ONE ) THEN + T = TABS*SQRT( ( ONE / TABS )**2+( SN1 / TABS )**2 ) + ELSE + T = SQRT( CONE+SN1*SN1 ) + END IF + EVNORM = ABS( T ) + IF( EVNORM.GE.THRESH ) THEN + EVSCAL = CONE / T + CS1 = EVSCAL + SN1 = SN1*EVSCAL + ELSE + EVSCAL = ZERO + END IF + END IF + RETURN +* +* End of ZLAESY +* + END diff --git a/costa/native/external/lapack/zlaev2.f b/costa/native/external/lapack/zlaev2.f new file mode 100644 index 000000000..372566512 --- /dev/null +++ b/costa/native/external/lapack/zlaev2.f @@ -0,0 +1,96 @@ + SUBROUTINE ZLAEV2( A, B, C, RT1, RT2, CS1, SN1 ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + DOUBLE PRECISION CS1, RT1, RT2 + COMPLEX*16 A, B, C, SN1 +* .. +* +* Purpose +* ======= +* +* ZLAEV2 computes the eigendecomposition of a 2-by-2 Hermitian matrix +* [ A B ] +* [ CONJG(B) C ]. +* On return, RT1 is the eigenvalue of larger absolute value, RT2 is the +* eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right +* eigenvector for RT1, giving the decomposition +* +* [ CS1 CONJG(SN1) ] [ A B ] [ CS1 -CONJG(SN1) ] = [ RT1 0 ] +* [-SN1 CS1 ] [ CONJG(B) C ] [ SN1 CS1 ] [ 0 RT2 ]. +* +* Arguments +* ========= +* +* A (input) COMPLEX*16 +* The (1,1) element of the 2-by-2 matrix. +* +* B (input) COMPLEX*16 +* The (1,2) element and the conjugate of the (2,1) element of +* the 2-by-2 matrix. +* +* C (input) COMPLEX*16 +* The (2,2) element of the 2-by-2 matrix. +* +* RT1 (output) DOUBLE PRECISION +* The eigenvalue of larger absolute value. +* +* RT2 (output) DOUBLE PRECISION +* The eigenvalue of smaller absolute value. +* +* CS1 (output) DOUBLE PRECISION +* SN1 (output) COMPLEX*16 +* The vector (CS1, SN1) is a unit right eigenvector for RT1. +* +* Further Details +* =============== +* +* RT1 is accurate to a few ulps barring over/underflow. +* +* RT2 may be inaccurate if there is massive cancellation in the +* determinant A*C-B*B; higher precision or correctly rounded or +* correctly truncated arithmetic would be needed to compute RT2 +* accurately in all cases. +* +* CS1 and SN1 are accurate to a few ulps barring over/underflow. +* +* Overflow is possible only if RT1 is within a factor of 5 of overflow. +* Underflow is harmless if the input data is 0 or exceeds +* underflow_threshold / macheps. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION T + COMPLEX*16 W +* .. +* .. External Subroutines .. + EXTERNAL DLAEV2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCONJG +* .. +* .. Executable Statements .. +* + IF( ABS( B ).EQ.ZERO ) THEN + W = ONE + ELSE + W = DCONJG( B ) / ABS( B ) + END IF + CALL DLAEV2( DBLE( A ), ABS( B ), DBLE( C ), RT1, RT2, CS1, T ) + SN1 = W*T + RETURN +* +* End of ZLAEV2 +* + END diff --git a/costa/native/external/lapack/zlags2.f b/costa/native/external/lapack/zlags2.f new file mode 100644 index 000000000..ba3f96c0d --- /dev/null +++ b/costa/native/external/lapack/zlags2.f @@ -0,0 +1,309 @@ + SUBROUTINE ZLAGS2( UPPER, A1, A2, A3, B1, B2, B3, CSU, SNU, CSV, + $ SNV, CSQ, SNQ ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + LOGICAL UPPER + DOUBLE PRECISION A1, A3, B1, B3, CSQ, CSU, CSV + COMPLEX*16 A2, B2, SNQ, SNU, SNV +* .. +* +* Purpose +* ======= +* +* ZLAGS2 computes 2-by-2 unitary matrices U, V and Q, such +* that if ( UPPER ) then +* +* U'*A*Q = U'*( A1 A2 )*Q = ( x 0 ) +* ( 0 A3 ) ( x x ) +* and +* V'*B*Q = V'*( B1 B2 )*Q = ( x 0 ) +* ( 0 B3 ) ( x x ) +* +* or if ( .NOT.UPPER ) then +* +* U'*A*Q = U'*( A1 0 )*Q = ( x x ) +* ( A2 A3 ) ( 0 x ) +* and +* V'*B*Q = V'*( B1 0 )*Q = ( x x ) +* ( B2 B3 ) ( 0 x ) +* where +* +* U = ( CSU SNU ), V = ( CSV SNV ), +* ( -CONJG(SNU) CSU ) ( -CONJG(SNV) CSV ) +* +* Q = ( CSQ SNQ ) +* ( -CONJG(SNQ) CSQ ) +* +* Z' denotes the conjugate transpose of Z. +* +* The rows of the transformed A and B are parallel. Moreover, if the +* input 2-by-2 matrix A is not zero, then the transformed (1,1) entry +* of A is not zero. If the input matrices A and B are both not zero, +* then the transformed (2,2) element of B is not zero, except when the +* first rows of input A and B are parallel and the second rows are +* zero. +* +* Arguments +* ========= +* +* UPPER (input) LOGICAL +* = .TRUE.: the input matrices A and B are upper triangular. +* = .FALSE.: the input matrices A and B are lower triangular. +* +* A1 (input) DOUBLE PRECISION +* A2 (input) COMPLEX*16 +* A3 (input) DOUBLE PRECISION +* On entry, A1, A2 and A3 are elements of the input 2-by-2 +* upper (lower) triangular matrix A. +* +* B1 (input) DOUBLE PRECISION +* B2 (input) COMPLEX*16 +* B3 (input) DOUBLE PRECISION +* On entry, B1, B2 and B3 are elements of the input 2-by-2 +* upper (lower) triangular matrix B. +* +* CSU (output) DOUBLE PRECISION +* SNU (output) COMPLEX*16 +* The desired unitary matrix U. +* +* CSV (output) DOUBLE PRECISION +* SNV (output) COMPLEX*16 +* The desired unitary matrix V. +* +* CSQ (output) DOUBLE PRECISION +* SNQ (output) COMPLEX*16 +* The desired unitary matrix Q. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION A, AUA11, AUA12, AUA21, AUA22, AVB12, AVB11, + $ AVB21, AVB22, CSL, CSR, D, FB, FC, S1, S2, + $ SNL, SNR, UA11R, UA22R, VB11R, VB22R + COMPLEX*16 B, C, D1, R, T, UA11, UA12, UA21, UA22, VB11, + $ VB12, VB21, VB22 +* .. +* .. External Subroutines .. + EXTERNAL DLASV2, ZLARTG +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG +* .. +* .. Statement Functions .. + DOUBLE PRECISION ABS1 +* .. +* .. Statement Function definitions .. + ABS1( T ) = ABS( DBLE( T ) ) + ABS( DIMAG( T ) ) +* .. +* .. Executable Statements .. +* + IF( UPPER ) THEN +* +* Input matrices A and B are upper triangular matrices +* +* Form matrix C = A*adj(B) = ( a b ) +* ( 0 d ) +* + A = A1*B3 + D = A3*B1 + B = A2*B1 - A1*B2 + FB = ABS( B ) +* +* Transform complex 2-by-2 matrix C to real matrix by unitary +* diagonal matrix diag(1,D1). +* + D1 = ONE + IF( FB.NE.ZERO ) + $ D1 = B / FB +* +* The SVD of real 2 by 2 triangular C +* +* ( CSL -SNL )*( A B )*( CSR SNR ) = ( R 0 ) +* ( SNL CSL ) ( 0 D ) ( -SNR CSR ) ( 0 T ) +* + CALL DLASV2( A, FB, D, S1, S2, SNR, CSR, SNL, CSL ) +* + IF( ABS( CSL ).GE.ABS( SNL ) .OR. ABS( CSR ).GE.ABS( SNR ) ) + $ THEN +* +* Compute the (1,1) and (1,2) elements of U'*A and V'*B, +* and (1,2) element of |U|'*|A| and |V|'*|B|. +* + UA11R = CSL*A1 + UA12 = CSL*A2 + D1*SNL*A3 +* + VB11R = CSR*B1 + VB12 = CSR*B2 + D1*SNR*B3 +* + AUA12 = ABS( CSL )*ABS1( A2 ) + ABS( SNL )*ABS( A3 ) + AVB12 = ABS( CSR )*ABS1( B2 ) + ABS( SNR )*ABS( B3 ) +* +* zero (1,2) elements of U'*A and V'*B +* + IF( ( ABS( UA11R )+ABS1( UA12 ) ).EQ.ZERO ) THEN + CALL ZLARTG( -DCMPLX( VB11R ), DCONJG( VB12 ), CSQ, SNQ, + $ R ) + ELSE IF( ( ABS( VB11R )+ABS1( VB12 ) ).EQ.ZERO ) THEN + CALL ZLARTG( -DCMPLX( UA11R ), DCONJG( UA12 ), CSQ, SNQ, + $ R ) + ELSE IF( AUA12 / ( ABS( UA11R )+ABS1( UA12 ) ).LE.AVB12 / + $ ( ABS( VB11R )+ABS1( VB12 ) ) ) THEN + CALL ZLARTG( -DCMPLX( UA11R ), DCONJG( UA12 ), CSQ, SNQ, + $ R ) + ELSE + CALL ZLARTG( -DCMPLX( VB11R ), DCONJG( VB12 ), CSQ, SNQ, + $ R ) + END IF +* + CSU = CSL + SNU = -D1*SNL + CSV = CSR + SNV = -D1*SNR +* + ELSE +* +* Compute the (2,1) and (2,2) elements of U'*A and V'*B, +* and (2,2) element of |U|'*|A| and |V|'*|B|. +* + UA21 = -DCONJG( D1 )*SNL*A1 + UA22 = -DCONJG( D1 )*SNL*A2 + CSL*A3 +* + VB21 = -DCONJG( D1 )*SNR*B1 + VB22 = -DCONJG( D1 )*SNR*B2 + CSR*B3 +* + AUA22 = ABS( SNL )*ABS1( A2 ) + ABS( CSL )*ABS( A3 ) + AVB22 = ABS( SNR )*ABS1( B2 ) + ABS( CSR )*ABS( B3 ) +* +* zero (2,2) elements of U'*A and V'*B, and then swap. +* + IF( ( ABS1( UA21 )+ABS1( UA22 ) ).EQ.ZERO ) THEN + CALL ZLARTG( -DCONJG( VB21 ), DCONJG( VB22 ), CSQ, SNQ, + $ R ) + ELSE IF( ( ABS1( VB21 )+ABS( VB22 ) ).EQ.ZERO ) THEN + CALL ZLARTG( -DCONJG( UA21 ), DCONJG( UA22 ), CSQ, SNQ, + $ R ) + ELSE IF( AUA22 / ( ABS1( UA21 )+ABS1( UA22 ) ).LE.AVB22 / + $ ( ABS1( VB21 )+ABS1( VB22 ) ) ) THEN + CALL ZLARTG( -DCONJG( UA21 ), DCONJG( UA22 ), CSQ, SNQ, + $ R ) + ELSE + CALL ZLARTG( -DCONJG( VB21 ), DCONJG( VB22 ), CSQ, SNQ, + $ R ) + END IF +* + CSU = SNL + SNU = D1*CSL + CSV = SNR + SNV = D1*CSR +* + END IF +* + ELSE +* +* Input matrices A and B are lower triangular matrices +* +* Form matrix C = A*adj(B) = ( a 0 ) +* ( c d ) +* + A = A1*B3 + D = A3*B1 + C = A2*B3 - A3*B2 + FC = ABS( C ) +* +* Transform complex 2-by-2 matrix C to real matrix by unitary +* diagonal matrix diag(d1,1). +* + D1 = ONE + IF( FC.NE.ZERO ) + $ D1 = C / FC +* +* The SVD of real 2 by 2 triangular C +* +* ( CSL -SNL )*( A 0 )*( CSR SNR ) = ( R 0 ) +* ( SNL CSL ) ( C D ) ( -SNR CSR ) ( 0 T ) +* + CALL DLASV2( A, FC, D, S1, S2, SNR, CSR, SNL, CSL ) +* + IF( ABS( CSR ).GE.ABS( SNR ) .OR. ABS( CSL ).GE.ABS( SNL ) ) + $ THEN +* +* Compute the (2,1) and (2,2) elements of U'*A and V'*B, +* and (2,1) element of |U|'*|A| and |V|'*|B|. +* + UA21 = -D1*SNR*A1 + CSR*A2 + UA22R = CSR*A3 +* + VB21 = -D1*SNL*B1 + CSL*B2 + VB22R = CSL*B3 +* + AUA21 = ABS( SNR )*ABS( A1 ) + ABS( CSR )*ABS1( A2 ) + AVB21 = ABS( SNL )*ABS( B1 ) + ABS( CSL )*ABS1( B2 ) +* +* zero (2,1) elements of U'*A and V'*B. +* + IF( ( ABS1( UA21 )+ABS( UA22R ) ).EQ.ZERO ) THEN + CALL ZLARTG( DCMPLX( VB22R ), VB21, CSQ, SNQ, R ) + ELSE IF( ( ABS1( VB21 )+ABS( VB22R ) ).EQ.ZERO ) THEN + CALL ZLARTG( DCMPLX( UA22R ), UA21, CSQ, SNQ, R ) + ELSE IF( AUA21 / ( ABS1( UA21 )+ABS( UA22R ) ).LE.AVB21 / + $ ( ABS1( VB21 )+ABS( VB22R ) ) ) THEN + CALL ZLARTG( DCMPLX( UA22R ), UA21, CSQ, SNQ, R ) + ELSE + CALL ZLARTG( DCMPLX( VB22R ), VB21, CSQ, SNQ, R ) + END IF +* + CSU = CSR + SNU = -DCONJG( D1 )*SNR + CSV = CSL + SNV = -DCONJG( D1 )*SNL +* + ELSE +* +* Compute the (1,1) and (1,2) elements of U'*A and V'*B, +* and (1,1) element of |U|'*|A| and |V|'*|B|. +* + UA11 = CSR*A1 + DCONJG( D1 )*SNR*A2 + UA12 = DCONJG( D1 )*SNR*A3 +* + VB11 = CSL*B1 + DCONJG( D1 )*SNL*B2 + VB12 = DCONJG( D1 )*SNL*B3 +* + AUA11 = ABS( CSR )*ABS( A1 ) + ABS( SNR )*ABS1( A2 ) + AVB11 = ABS( CSL )*ABS( B1 ) + ABS( SNL )*ABS1( B2 ) +* +* zero (1,1) elements of U'*A and V'*B, and then swap. +* + IF( ( ABS1( UA11 )+ABS1( UA12 ) ).EQ.ZERO ) THEN + CALL ZLARTG( VB12, VB11, CSQ, SNQ, R ) + ELSE IF( ( ABS1( VB11 )+ABS1( VB12 ) ).EQ.ZERO ) THEN + CALL ZLARTG( UA12, UA11, CSQ, SNQ, R ) + ELSE IF( AUA11 / ( ABS1( UA11 )+ABS1( UA12 ) ).LE.AVB11 / + $ ( ABS1( VB11 )+ABS1( VB12 ) ) ) THEN + CALL ZLARTG( UA12, UA11, CSQ, SNQ, R ) + ELSE + CALL ZLARTG( VB12, VB11, CSQ, SNQ, R ) + END IF +* + CSU = SNR + SNU = DCONJG( D1 )*CSR + CSV = SNL + SNV = DCONJG( D1 )*CSL +* + END IF +* + END IF +* + RETURN +* +* End of ZLAGS2 +* + END diff --git a/costa/native/external/lapack/zlagtm.f b/costa/native/external/lapack/zlagtm.f new file mode 100644 index 000000000..2a2fb6d68 --- /dev/null +++ b/costa/native/external/lapack/zlagtm.f @@ -0,0 +1,234 @@ + SUBROUTINE ZLAGTM( TRANS, N, NRHS, ALPHA, DL, D, DU, X, LDX, BETA, + $ B, LDB ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER LDB, LDX, N, NRHS + DOUBLE PRECISION ALPHA, BETA +* .. +* .. Array Arguments .. + COMPLEX*16 B( LDB, * ), D( * ), DL( * ), DU( * ), + $ X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* ZLAGTM performs a matrix-vector product of the form +* +* B := alpha * A * X + beta * B +* +* where A is a tridiagonal matrix of order N, B and X are N by NRHS +* matrices, and alpha and beta are real scalars, each of which may be +* 0., 1., or -1. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER +* Specifies the operation applied to A. +* = 'N': No transpose, B := alpha * A * X + beta * B +* = 'T': Transpose, B := alpha * A**T * X + beta * B +* = 'C': Conjugate transpose, B := alpha * A**H * X + beta * B +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices X and B. +* +* ALPHA (input) DOUBLE PRECISION +* The scalar alpha. ALPHA must be 0., 1., or -1.; otherwise, +* it is assumed to be 0. +* +* DL (input) COMPLEX*16 array, dimension (N-1) +* The (n-1) sub-diagonal elements of T. +* +* D (input) COMPLEX*16 array, dimension (N) +* The diagonal elements of T. +* +* DU (input) COMPLEX*16 array, dimension (N-1) +* The (n-1) super-diagonal elements of T. +* +* X (input) COMPLEX*16 array, dimension (LDX,NRHS) +* The N by NRHS matrix X. +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(N,1). +* +* BETA (input) DOUBLE PRECISION +* The scalar beta. BETA must be 0., 1., or -1.; otherwise, +* it is assumed to be 1. +* +* B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) +* On entry, the N by NRHS matrix B. +* On exit, B is overwritten by the matrix expression +* B := alpha * A * X + beta * B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(N,1). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) + $ RETURN +* +* Multiply B by BETA if BETA.NE.1. +* + IF( BETA.EQ.ZERO ) THEN + DO 20 J = 1, NRHS + DO 10 I = 1, N + B( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE IF( BETA.EQ.-ONE ) THEN + DO 40 J = 1, NRHS + DO 30 I = 1, N + B( I, J ) = -B( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF +* + IF( ALPHA.EQ.ONE ) THEN + IF( LSAME( TRANS, 'N' ) ) THEN +* +* Compute B := B + A*X +* + DO 60 J = 1, NRHS + IF( N.EQ.1 ) THEN + B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J ) + ELSE + B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J ) + + $ DU( 1 )*X( 2, J ) + B( N, J ) = B( N, J ) + DL( N-1 )*X( N-1, J ) + + $ D( N )*X( N, J ) + DO 50 I = 2, N - 1 + B( I, J ) = B( I, J ) + DL( I-1 )*X( I-1, J ) + + $ D( I )*X( I, J ) + DU( I )*X( I+1, J ) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE IF( LSAME( TRANS, 'T' ) ) THEN +* +* Compute B := B + A**T * X +* + DO 80 J = 1, NRHS + IF( N.EQ.1 ) THEN + B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J ) + ELSE + B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J ) + + $ DL( 1 )*X( 2, J ) + B( N, J ) = B( N, J ) + DU( N-1 )*X( N-1, J ) + + $ D( N )*X( N, J ) + DO 70 I = 2, N - 1 + B( I, J ) = B( I, J ) + DU( I-1 )*X( I-1, J ) + + $ D( I )*X( I, J ) + DL( I )*X( I+1, J ) + 70 CONTINUE + END IF + 80 CONTINUE + ELSE IF( LSAME( TRANS, 'C' ) ) THEN +* +* Compute B := B + A**H * X +* + DO 100 J = 1, NRHS + IF( N.EQ.1 ) THEN + B( 1, J ) = B( 1, J ) + DCONJG( D( 1 ) )*X( 1, J ) + ELSE + B( 1, J ) = B( 1, J ) + DCONJG( D( 1 ) )*X( 1, J ) + + $ DCONJG( DL( 1 ) )*X( 2, J ) + B( N, J ) = B( N, J ) + DCONJG( DU( N-1 ) )* + $ X( N-1, J ) + DCONJG( D( N ) )*X( N, J ) + DO 90 I = 2, N - 1 + B( I, J ) = B( I, J ) + DCONJG( DU( I-1 ) )* + $ X( I-1, J ) + DCONJG( D( I ) )* + $ X( I, J ) + DCONJG( DL( I ) )* + $ X( I+1, J ) + 90 CONTINUE + END IF + 100 CONTINUE + END IF + ELSE IF( ALPHA.EQ.-ONE ) THEN + IF( LSAME( TRANS, 'N' ) ) THEN +* +* Compute B := B - A*X +* + DO 120 J = 1, NRHS + IF( N.EQ.1 ) THEN + B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J ) + ELSE + B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J ) - + $ DU( 1 )*X( 2, J ) + B( N, J ) = B( N, J ) - DL( N-1 )*X( N-1, J ) - + $ D( N )*X( N, J ) + DO 110 I = 2, N - 1 + B( I, J ) = B( I, J ) - DL( I-1 )*X( I-1, J ) - + $ D( I )*X( I, J ) - DU( I )*X( I+1, J ) + 110 CONTINUE + END IF + 120 CONTINUE + ELSE IF( LSAME( TRANS, 'T' ) ) THEN +* +* Compute B := B - A'*X +* + DO 140 J = 1, NRHS + IF( N.EQ.1 ) THEN + B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J ) + ELSE + B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J ) - + $ DL( 1 )*X( 2, J ) + B( N, J ) = B( N, J ) - DU( N-1 )*X( N-1, J ) - + $ D( N )*X( N, J ) + DO 130 I = 2, N - 1 + B( I, J ) = B( I, J ) - DU( I-1 )*X( I-1, J ) - + $ D( I )*X( I, J ) - DL( I )*X( I+1, J ) + 130 CONTINUE + END IF + 140 CONTINUE + ELSE IF( LSAME( TRANS, 'C' ) ) THEN +* +* Compute B := B - A'*X +* + DO 160 J = 1, NRHS + IF( N.EQ.1 ) THEN + B( 1, J ) = B( 1, J ) - DCONJG( D( 1 ) )*X( 1, J ) + ELSE + B( 1, J ) = B( 1, J ) - DCONJG( D( 1 ) )*X( 1, J ) - + $ DCONJG( DL( 1 ) )*X( 2, J ) + B( N, J ) = B( N, J ) - DCONJG( DU( N-1 ) )* + $ X( N-1, J ) - DCONJG( D( N ) )*X( N, J ) + DO 150 I = 2, N - 1 + B( I, J ) = B( I, J ) - DCONJG( DU( I-1 ) )* + $ X( I-1, J ) - DCONJG( D( I ) )* + $ X( I, J ) - DCONJG( DL( I ) )* + $ X( I+1, J ) + 150 CONTINUE + END IF + 160 CONTINUE + END IF + END IF + RETURN +* +* End of ZLAGTM +* + END diff --git a/costa/native/external/lapack/zlahef.f b/costa/native/external/lapack/zlahef.f new file mode 100644 index 000000000..7520c57b4 --- /dev/null +++ b/costa/native/external/lapack/zlahef.f @@ -0,0 +1,648 @@ + SUBROUTINE ZLAHEF( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), W( LDW, * ) +* .. +* +* Purpose +* ======= +* +* ZLAHEF computes a partial factorization of a complex Hermitian +* matrix A using the Bunch-Kaufman diagonal pivoting method. The +* partial factorization has the form: +* +* A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: +* ( 0 U22 ) ( 0 D ) ( U12' U22' ) +* +* A = ( L11 0 ) ( D 0 ) ( L11' L21' ) if UPLO = 'L' +* ( L21 I ) ( 0 A22 ) ( 0 I ) +* +* where the order of D is at most NB. The actual order is returned in +* the argument KB, and is either NB or NB-1, or N if N <= NB. +* Note that U' denotes the conjugate transpose of U. +* +* ZLAHEF is an auxiliary routine called by ZHETRF. It uses blocked code +* (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or +* A22 (if UPLO = 'L'). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* Hermitian matrix A is stored: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NB (input) INTEGER +* The maximum number of columns of the matrix A that should be +* factored. NB should be at least 2 to allow for 2-by-2 pivot +* blocks. +* +* KB (output) INTEGER +* The number of columns of A that were actually factored. +* KB is either NB-1 or NB, or N if N <= NB. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the leading +* n-by-n upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n-by-n lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* On exit, A contains details of the partial factorization. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (output) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D. +* If UPLO = 'U', only the last KB elements of IPIV are set; +* if UPLO = 'L', only the first KB elements are set. +* +* If IPIV(k) > 0, then rows and columns k and IPIV(k) were +* interchanged and D(k,k) is a 1-by-1 diagonal block. +* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +* +* W (workspace) COMPLEX*16 array, dimension (LDW,NB) +* +* LDW (input) INTEGER +* The leading dimension of the array W. LDW >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* > 0: if INFO = k, D(k,k) is exactly zero. The factorization +* has been completed, but the block diagonal matrix D is +* exactly singular. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) + DOUBLE PRECISION EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0D+0, SEVTEN = 17.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER IMAX, J, JB, JJ, JMAX, JP, K, KK, KKW, KP, + $ KSTEP, KW + DOUBLE PRECISION ABSAKK, ALPHA, COLMAX, R1, ROWMAX, T + COMPLEX*16 D11, D21, D22, Z +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX + EXTERNAL LSAME, IZAMAX +* .. +* .. External Subroutines .. + EXTERNAL ZCOPY, ZDSCAL, ZGEMM, ZGEMV, ZLACGV, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCONJG, DIMAG, MAX, MIN, SQRT +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( Z ) = ABS( DBLE( Z ) ) + ABS( DIMAG( Z ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Factorize the trailing columns of A using the upper triangle +* of A and working backwards, and compute the matrix W = U12*D +* for use in updating A11 (note that conjg(W) is actually stored) +* +* K is the main loop index, decreasing from N in steps of 1 or 2 +* +* KW is the column of W which corresponds to column K of A +* + K = N + 10 CONTINUE + KW = NB + K - N +* +* Exit from loop +* + IF( ( K.LE.N-NB+1 .AND. NB.LT.N ) .OR. K.LT.1 ) + $ GO TO 30 +* +* Copy column K of A to column KW of W and update it +* + CALL ZCOPY( K-1, A( 1, K ), 1, W( 1, KW ), 1 ) + W( K, KW ) = DBLE( A( K, K ) ) + IF( K.LT.N ) THEN + CALL ZGEMV( 'No transpose', K, N-K, -CONE, A( 1, K+1 ), LDA, + $ W( K, KW+1 ), LDW, CONE, W( 1, KW ), 1 ) + W( K, KW ) = DBLE( W( K, KW ) ) + END IF +* + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( DBLE( W( K, KW ) ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.GT.1 ) THEN + IMAX = IZAMAX( K-1, W( 1, KW ), 1 ) + COLMAX = CABS1( W( IMAX, KW ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + A( K, K ) = DBLE( A( K, K ) ) + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* Copy column IMAX to column KW-1 of W and update it +* + CALL ZCOPY( IMAX-1, A( 1, IMAX ), 1, W( 1, KW-1 ), 1 ) + W( IMAX, KW-1 ) = DBLE( A( IMAX, IMAX ) ) + CALL ZCOPY( K-IMAX, A( IMAX, IMAX+1 ), LDA, + $ W( IMAX+1, KW-1 ), 1 ) + CALL ZLACGV( K-IMAX, W( IMAX+1, KW-1 ), 1 ) + IF( K.LT.N ) THEN + CALL ZGEMV( 'No transpose', K, N-K, -CONE, + $ A( 1, K+1 ), LDA, W( IMAX, KW+1 ), LDW, + $ CONE, W( 1, KW-1 ), 1 ) + W( IMAX, KW-1 ) = DBLE( W( IMAX, KW-1 ) ) + END IF +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = IMAX + IZAMAX( K-IMAX, W( IMAX+1, KW-1 ), 1 ) + ROWMAX = CABS1( W( JMAX, KW-1 ) ) + IF( IMAX.GT.1 ) THEN + JMAX = IZAMAX( IMAX-1, W( 1, KW-1 ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( W( JMAX, KW-1 ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( DBLE( W( IMAX, KW-1 ) ) ).GE.ALPHA*ROWMAX ) + $ THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX +* +* copy column KW-1 of W to column KW +* + CALL ZCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 ) + ELSE +* +* interchange rows and columns K-1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K - KSTEP + 1 + KKW = NB + KK - N +* +* Updated column KP is already stored in column KKW of W +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP +* + A( KP, KP ) = DBLE( A( KK, KK ) ) + CALL ZCOPY( KK-1-KP, A( KP+1, KK ), 1, A( KP, KP+1 ), + $ LDA ) + CALL ZLACGV( KK-1-KP, A( KP, KP+1 ), LDA ) + CALL ZCOPY( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 ) +* +* Interchange rows KK and KP in last KK columns of A and W +* + IF( KK.LT.N ) + $ CALL ZSWAP( N-KK, A( KK, KK+1 ), LDA, A( KP, KK+1 ), + $ LDA ) + CALL ZSWAP( N-KK+1, W( KK, KKW ), LDW, W( KP, KKW ), + $ LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column KW of W now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* +* Store U(k) in column k of A +* + CALL ZCOPY( K, W( 1, KW ), 1, A( 1, K ), 1 ) + R1 = ONE / DBLE( A( K, K ) ) + CALL ZDSCAL( K-1, R1, A( 1, K ), 1 ) +* +* Conjugate W(k) +* + CALL ZLACGV( K-1, W( 1, KW ), 1 ) + ELSE +* +* 2-by-2 pivot block D(k): columns KW and KW-1 of W now +* hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* + IF( K.GT.2 ) THEN +* +* Store U(k) and U(k-1) in columns k and k-1 of A +* + D21 = W( K-1, KW ) + D11 = W( K, KW ) / DCONJG( D21 ) + D22 = W( K-1, KW-1 ) / D21 + T = ONE / ( DBLE( D11*D22 )-ONE ) + D21 = T / D21 + DO 20 J = 1, K - 2 + A( J, K-1 ) = D21*( D11*W( J, KW-1 )-W( J, KW ) ) + A( J, K ) = DCONJG( D21 )* + $ ( D22*W( J, KW )-W( J, KW-1 ) ) + 20 CONTINUE + END IF +* +* Copy D(k) to A +* + A( K-1, K-1 ) = W( K-1, KW-1 ) + A( K-1, K ) = W( K-1, KW ) + A( K, K ) = W( K, KW ) +* +* Conjugate W(k) and W(k-1) +* + CALL ZLACGV( K-1, W( 1, KW ), 1 ) + CALL ZLACGV( K-2, W( 1, KW-1 ), 1 ) + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + 30 CONTINUE +* +* Update the upper triangle of A11 (= A(1:k,1:k)) as +* +* A11 := A11 - U12*D*U12' = A11 - U12*W' +* +* computing blocks of NB columns at a time (note that conjg(W) is +* actually stored) +* + DO 50 J = ( ( K-1 ) / NB )*NB + 1, 1, -NB + JB = MIN( NB, K-J+1 ) +* +* Update the upper triangle of the diagonal block +* + DO 40 JJ = J, J + JB - 1 + A( JJ, JJ ) = DBLE( A( JJ, JJ ) ) + CALL ZGEMV( 'No transpose', JJ-J+1, N-K, -CONE, + $ A( J, K+1 ), LDA, W( JJ, KW+1 ), LDW, CONE, + $ A( J, JJ ), 1 ) + A( JJ, JJ ) = DBLE( A( JJ, JJ ) ) + 40 CONTINUE +* +* Update the rectangular superdiagonal block +* + CALL ZGEMM( 'No transpose', 'Transpose', J-1, JB, N-K, + $ -CONE, A( 1, K+1 ), LDA, W( J, KW+1 ), LDW, + $ CONE, A( 1, J ), LDA ) + 50 CONTINUE +* +* Put U12 in standard form by partially undoing the interchanges +* in columns k+1:n +* + J = K + 1 + 60 CONTINUE + JJ = J + JP = IPIV( J ) + IF( JP.LT.0 ) THEN + JP = -JP + J = J + 1 + END IF + J = J + 1 + IF( JP.NE.JJ .AND. J.LE.N ) + $ CALL ZSWAP( N-J+1, A( JP, J ), LDA, A( JJ, J ), LDA ) + IF( J.LE.N ) + $ GO TO 60 +* +* Set KB to the number of columns factorized +* + KB = N - K +* + ELSE +* +* Factorize the leading columns of A using the lower triangle +* of A and working forwards, and compute the matrix W = L21*D +* for use in updating A22 (note that conjg(W) is actually stored) +* +* K is the main loop index, increasing from 1 in steps of 1 or 2 +* + K = 1 + 70 CONTINUE +* +* Exit from loop +* + IF( ( K.GE.NB .AND. NB.LT.N ) .OR. K.GT.N ) + $ GO TO 90 +* +* Copy column K of A to column K of W and update it +* + W( K, K ) = DBLE( A( K, K ) ) + IF( K.LT.N ) + $ CALL ZCOPY( N-K, A( K+1, K ), 1, W( K+1, K ), 1 ) + CALL ZGEMV( 'No transpose', N-K+1, K-1, -CONE, A( K, 1 ), LDA, + $ W( K, 1 ), LDW, CONE, W( K, K ), 1 ) + W( K, K ) = DBLE( W( K, K ) ) +* + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( DBLE( W( K, K ) ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.LT.N ) THEN + IMAX = K + IZAMAX( N-K, W( K+1, K ), 1 ) + COLMAX = CABS1( W( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + A( K, K ) = DBLE( A( K, K ) ) + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* Copy column IMAX to column K+1 of W and update it +* + CALL ZCOPY( IMAX-K, A( IMAX, K ), LDA, W( K, K+1 ), 1 ) + CALL ZLACGV( IMAX-K, W( K, K+1 ), 1 ) + W( IMAX, K+1 ) = DBLE( A( IMAX, IMAX ) ) + IF( IMAX.LT.N ) + $ CALL ZCOPY( N-IMAX, A( IMAX+1, IMAX ), 1, + $ W( IMAX+1, K+1 ), 1 ) + CALL ZGEMV( 'No transpose', N-K+1, K-1, -CONE, A( K, 1 ), + $ LDA, W( IMAX, 1 ), LDW, CONE, W( K, K+1 ), + $ 1 ) + W( IMAX, K+1 ) = DBLE( W( IMAX, K+1 ) ) +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = K - 1 + IZAMAX( IMAX-K, W( K, K+1 ), 1 ) + ROWMAX = CABS1( W( JMAX, K+1 ) ) + IF( IMAX.LT.N ) THEN + JMAX = IMAX + IZAMAX( N-IMAX, W( IMAX+1, K+1 ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( W( JMAX, K+1 ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( DBLE( W( IMAX, K+1 ) ) ).GE.ALPHA*ROWMAX ) + $ THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX +* +* copy column K+1 of W to column K +* + CALL ZCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 ) + ELSE +* +* interchange rows and columns K+1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K + KSTEP - 1 +* +* Updated column KP is already stored in column KK of W +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP +* + A( KP, KP ) = DBLE( A( KK, KK ) ) + CALL ZCOPY( KP-KK-1, A( KK+1, KK ), 1, A( KP, KK+1 ), + $ LDA ) + CALL ZLACGV( KP-KK-1, A( KP, KK+1 ), LDA ) + IF( KP.LT.N ) + $ CALL ZCOPY( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 ) +* +* Interchange rows KK and KP in first KK columns of A and W +* + CALL ZSWAP( KK-1, A( KK, 1 ), LDA, A( KP, 1 ), LDA ) + CALL ZSWAP( KK, W( KK, 1 ), LDW, W( KP, 1 ), LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k of W now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* +* Store L(k) in column k of A +* + CALL ZCOPY( N-K+1, W( K, K ), 1, A( K, K ), 1 ) + IF( K.LT.N ) THEN + R1 = ONE / DBLE( A( K, K ) ) + CALL ZDSCAL( N-K, R1, A( K+1, K ), 1 ) +* +* Conjugate W(k) +* + CALL ZLACGV( N-K, W( K+1, K ), 1 ) + END IF + ELSE +* +* 2-by-2 pivot block D(k): columns k and k+1 of W now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* + IF( K.LT.N-1 ) THEN +* +* Store L(k) and L(k+1) in columns k and k+1 of A +* + D21 = W( K+1, K ) + D11 = W( K+1, K+1 ) / D21 + D22 = W( K, K ) / DCONJG( D21 ) + T = ONE / ( DBLE( D11*D22 )-ONE ) + D21 = T / D21 + DO 80 J = K + 2, N + A( J, K ) = DCONJG( D21 )* + $ ( D11*W( J, K )-W( J, K+1 ) ) + A( J, K+1 ) = D21*( D22*W( J, K+1 )-W( J, K ) ) + 80 CONTINUE + END IF +* +* Copy D(k) to A +* + A( K, K ) = W( K, K ) + A( K+1, K ) = W( K+1, K ) + A( K+1, K+1 ) = W( K+1, K+1 ) +* +* Conjugate W(k) and W(k+1) +* + CALL ZLACGV( N-K, W( K+1, K ), 1 ) + CALL ZLACGV( N-K-1, W( K+2, K+1 ), 1 ) + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 70 +* + 90 CONTINUE +* +* Update the lower triangle of A22 (= A(k:n,k:n)) as +* +* A22 := A22 - L21*D*L21' = A22 - L21*W' +* +* computing blocks of NB columns at a time (note that conjg(W) is +* actually stored) +* + DO 110 J = K, N, NB + JB = MIN( NB, N-J+1 ) +* +* Update the lower triangle of the diagonal block +* + DO 100 JJ = J, J + JB - 1 + A( JJ, JJ ) = DBLE( A( JJ, JJ ) ) + CALL ZGEMV( 'No transpose', J+JB-JJ, K-1, -CONE, + $ A( JJ, 1 ), LDA, W( JJ, 1 ), LDW, CONE, + $ A( JJ, JJ ), 1 ) + A( JJ, JJ ) = DBLE( A( JJ, JJ ) ) + 100 CONTINUE +* +* Update the rectangular subdiagonal block +* + IF( J+JB.LE.N ) + $ CALL ZGEMM( 'No transpose', 'Transpose', N-J-JB+1, JB, + $ K-1, -CONE, A( J+JB, 1 ), LDA, W( J, 1 ), + $ LDW, CONE, A( J+JB, J ), LDA ) + 110 CONTINUE +* +* Put L21 in standard form by partially undoing the interchanges +* in columns 1:k-1 +* + J = K - 1 + 120 CONTINUE + JJ = J + JP = IPIV( J ) + IF( JP.LT.0 ) THEN + JP = -JP + J = J - 1 + END IF + J = J - 1 + IF( JP.NE.JJ .AND. J.GE.1 ) + $ CALL ZSWAP( J, A( JP, 1 ), LDA, A( JJ, 1 ), LDA ) + IF( J.GE.1 ) + $ GO TO 120 +* +* Set KB to the number of columns factorized +* + KB = K - 1 +* + END IF + RETURN +* +* End of ZLAHEF +* + END diff --git a/costa/native/external/lapack/zlahqr.f b/costa/native/external/lapack/zlahqr.f new file mode 100644 index 000000000..5fbd9e55c --- /dev/null +++ b/costa/native/external/lapack/zlahqr.f @@ -0,0 +1,384 @@ + SUBROUTINE ZLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILOZ, + $ IHIZ, Z, LDZ, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + LOGICAL WANTT, WANTZ + INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, N +* .. +* .. Array Arguments .. + COMPLEX*16 H( LDH, * ), W( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* ZLAHQR is an auxiliary routine called by ZHSEQR to update the +* eigenvalues and Schur decomposition already computed by ZHSEQR, by +* dealing with the Hessenberg submatrix in rows and columns ILO to IHI. +* +* Arguments +* ========= +* +* WANTT (input) LOGICAL +* = .TRUE. : the full Schur form T is required; +* = .FALSE.: only eigenvalues are required. +* +* WANTZ (input) LOGICAL +* = .TRUE. : the matrix of Schur vectors Z is required; +* = .FALSE.: Schur vectors are not required. +* +* N (input) INTEGER +* The order of the matrix H. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* It is assumed that H is already upper triangular in rows and +* columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless ILO = 1). +* ZLAHQR works primarily with the Hessenberg submatrix in rows +* and columns ILO to IHI, but applies transformations to all of +* H if WANTT is .TRUE.. +* 1 <= ILO <= max(1,IHI); IHI <= N. +* +* H (input/output) COMPLEX*16 array, dimension (LDH,N) +* On entry, the upper Hessenberg matrix H. +* On exit, if WANTT is .TRUE., H is upper triangular in rows +* and columns ILO:IHI, with any 2-by-2 diagonal blocks in +* standard form. If WANTT is .FALSE., the contents of H are +* unspecified on exit. +* +* LDH (input) INTEGER +* The leading dimension of the array H. LDH >= max(1,N). +* +* W (output) COMPLEX*16 array, dimension (N) +* The computed eigenvalues ILO to IHI are stored in the +* corresponding elements of W. If WANTT is .TRUE., the +* eigenvalues are stored in the same order as on the diagonal +* of the Schur form returned in H, with W(i) = H(i,i). +* +* ILOZ (input) INTEGER +* IHIZ (input) INTEGER +* Specify the rows of Z to which transformations must be +* applied if WANTZ is .TRUE.. +* 1 <= ILOZ <= ILO; IHI <= IHIZ <= N. +* +* Z (input/output) COMPLEX*16 array, dimension (LDZ,N) +* If WANTZ is .TRUE., on entry Z must contain the current +* matrix Z of transformations accumulated by ZHSEQR, and on +* exit Z has been updated; transformations are applied only to +* the submatrix Z(ILOZ:IHIZ,ILO:IHI). +* If WANTZ is .FALSE., Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* > 0: if INFO = i, ZLAHQR failed to compute all the +* eigenvalues ILO to IHI in a total of 30*(IHI-ILO+1) +* iterations; elements i+1:ihi of W contain those +* eigenvalues which have been successfully computed. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), + $ ONE = ( 1.0D+0, 0.0D+0 ) ) + DOUBLE PRECISION RZERO, HALF + PARAMETER ( RZERO = 0.0D+0, HALF = 0.5D+0 ) + DOUBLE PRECISION DAT1 + PARAMETER ( DAT1 = 0.75D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, I1, I2, ITN, ITS, J, K, L, M, NH, NZ + DOUBLE PRECISION H10, H21, RTEMP, S, SMLNUM, T2, TST1, ULP + COMPLEX*16 CDUM, H11, H11S, H22, SUM, T, T1, TEMP, U, V2, + $ X, Y +* .. +* .. Local Arrays .. + DOUBLE PRECISION RWORK( 1 ) + COMPLEX*16 V( 2 ) +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, ZLANHS + COMPLEX*16 ZLADIV + EXTERNAL DLAMCH, ZLANHS, ZLADIV +* .. +* .. External Subroutines .. + EXTERNAL ZCOPY, ZLARFG, ZSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCONJG, DIMAG, MAX, MIN, SQRT +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + IF( ILO.EQ.IHI ) THEN + W( ILO ) = H( ILO, ILO ) + RETURN + END IF +* + NH = IHI - ILO + 1 + NZ = IHIZ - ILOZ + 1 +* +* Set machine-dependent constants for the stopping criterion. +* If norm(H) <= sqrt(OVFL), overflow should not occur. +* + ULP = DLAMCH( 'Precision' ) + SMLNUM = DLAMCH( 'Safe minimum' ) / ULP +* +* I1 and I2 are the indices of the first row and last column of H +* to which transformations must be applied. If eigenvalues only are +* being computed, I1 and I2 are set inside the main loop. +* + IF( WANTT ) THEN + I1 = 1 + I2 = N + END IF +* +* ITN is the total number of QR iterations allowed. +* + ITN = 30*NH +* +* The main loop begins here. I is the loop index and decreases from +* IHI to ILO in steps of 1. Each iteration of the loop works +* with the active submatrix in rows and columns L to I. +* Eigenvalues I+1 to IHI have already converged. Either L = ILO, or +* H(L,L-1) is negligible so that the matrix splits. +* + I = IHI + 10 CONTINUE + IF( I.LT.ILO ) + $ GO TO 130 +* +* Perform QR iterations on rows and columns ILO to I until a +* submatrix of order 1 splits off at the bottom because a +* subdiagonal element has become negligible. +* + L = ILO + DO 110 ITS = 0, ITN +* +* Look for a single small subdiagonal element. +* + DO 20 K = I, L + 1, -1 + TST1 = CABS1( H( K-1, K-1 ) ) + CABS1( H( K, K ) ) + IF( TST1.EQ.RZERO ) + $ TST1 = ZLANHS( '1', I-L+1, H( L, L ), LDH, RWORK ) + IF( ABS( DBLE( H( K, K-1 ) ) ).LE.MAX( ULP*TST1, SMLNUM ) ) + $ GO TO 30 + 20 CONTINUE + 30 CONTINUE + L = K + IF( L.GT.ILO ) THEN +* +* H(L,L-1) is negligible +* + H( L, L-1 ) = ZERO + END IF +* +* Exit from loop if a submatrix of order 1 has split off. +* + IF( L.GE.I ) + $ GO TO 120 +* +* Now the active submatrix is in rows and columns L to I. If +* eigenvalues only are being computed, only the active submatrix +* need be transformed. +* + IF( .NOT.WANTT ) THEN + I1 = L + I2 = I + END IF +* + IF( ITS.EQ.10 .OR. ITS.EQ.20 ) THEN +* +* Exceptional shift. +* + S = DAT1*ABS( DBLE( H( I, I-1 ) ) ) + T = S + H( I, I ) + ELSE +* +* Wilkinson's shift. +* + T = H( I, I ) + U = H( I-1, I )*DBLE( H( I, I-1 ) ) + IF( U.NE.ZERO ) THEN + X = HALF*( H( I-1, I-1 )-T ) + Y = SQRT( X*X+U ) + IF( DBLE( X )*DBLE( Y )+DIMAG( X )*DIMAG( Y ).LT.RZERO ) + $ Y = -Y + T = T - ZLADIV( U, ( X+Y ) ) + END IF + END IF +* +* Look for two consecutive small subdiagonal elements. +* + DO 40 M = I - 1, L + 1, -1 +* +* Determine the effect of starting the single-shift QR +* iteration at row M, and see if this would make H(M,M-1) +* negligible. +* + H11 = H( M, M ) + H22 = H( M+1, M+1 ) + H11S = H11 - T + H21 = H( M+1, M ) + S = CABS1( H11S ) + ABS( H21 ) + H11S = H11S / S + H21 = H21 / S + V( 1 ) = H11S + V( 2 ) = H21 + H10 = H( M, M-1 ) + TST1 = CABS1( H11S )*( CABS1( H11 )+CABS1( H22 ) ) + IF( ABS( H10*H21 ).LE.ULP*TST1 ) + $ GO TO 50 + 40 CONTINUE + H11 = H( L, L ) + H22 = H( L+1, L+1 ) + H11S = H11 - T + H21 = H( L+1, L ) + S = CABS1( H11S ) + ABS( H21 ) + H11S = H11S / S + H21 = H21 / S + V( 1 ) = H11S + V( 2 ) = H21 + 50 CONTINUE +* +* Single-shift QR step +* + DO 100 K = M, I - 1 +* +* The first iteration of this loop determines a reflection G +* from the vector V and applies it from left and right to H, +* thus creating a nonzero bulge below the subdiagonal. +* +* Each subsequent iteration determines a reflection G to +* restore the Hessenberg form in the (K-1)th column, and thus +* chases the bulge one step toward the bottom of the active +* submatrix. +* +* V(2) is always real before the call to ZLARFG, and hence +* after the call T2 ( = T1*V(2) ) is also real. +* + IF( K.GT.M ) + $ CALL ZCOPY( 2, H( K, K-1 ), 1, V, 1 ) + CALL ZLARFG( 2, V( 1 ), V( 2 ), 1, T1 ) + IF( K.GT.M ) THEN + H( K, K-1 ) = V( 1 ) + H( K+1, K-1 ) = ZERO + END IF + V2 = V( 2 ) + T2 = DBLE( T1*V2 ) +* +* Apply G from the left to transform the rows of the matrix +* in columns K to I2. +* + DO 60 J = K, I2 + SUM = DCONJG( T1 )*H( K, J ) + T2*H( K+1, J ) + H( K, J ) = H( K, J ) - SUM + H( K+1, J ) = H( K+1, J ) - SUM*V2 + 60 CONTINUE +* +* Apply G from the right to transform the columns of the +* matrix in rows I1 to min(K+2,I). +* + DO 70 J = I1, MIN( K+2, I ) + SUM = T1*H( J, K ) + T2*H( J, K+1 ) + H( J, K ) = H( J, K ) - SUM + H( J, K+1 ) = H( J, K+1 ) - SUM*DCONJG( V2 ) + 70 CONTINUE +* + IF( WANTZ ) THEN +* +* Accumulate transformations in the matrix Z +* + DO 80 J = ILOZ, IHIZ + SUM = T1*Z( J, K ) + T2*Z( J, K+1 ) + Z( J, K ) = Z( J, K ) - SUM + Z( J, K+1 ) = Z( J, K+1 ) - SUM*DCONJG( V2 ) + 80 CONTINUE + END IF +* + IF( K.EQ.M .AND. M.GT.L ) THEN +* +* If the QR step was started at row M > L because two +* consecutive small subdiagonals were found, then extra +* scaling must be performed to ensure that H(M,M-1) remains +* real. +* + TEMP = ONE - T1 + TEMP = TEMP / ABS( TEMP ) + H( M+1, M ) = H( M+1, M )*DCONJG( TEMP ) + IF( M+2.LE.I ) + $ H( M+2, M+1 ) = H( M+2, M+1 )*TEMP + DO 90 J = M, I + IF( J.NE.M+1 ) THEN + IF( I2.GT.J ) + $ CALL ZSCAL( I2-J, TEMP, H( J, J+1 ), LDH ) + CALL ZSCAL( J-I1, DCONJG( TEMP ), H( I1, J ), 1 ) + IF( WANTZ ) THEN + CALL ZSCAL( NZ, DCONJG( TEMP ), Z( ILOZ, J ), + $ 1 ) + END IF + END IF + 90 CONTINUE + END IF + 100 CONTINUE +* +* Ensure that H(I,I-1) is real. +* + TEMP = H( I, I-1 ) + IF( DIMAG( TEMP ).NE.RZERO ) THEN + RTEMP = ABS( TEMP ) + H( I, I-1 ) = RTEMP + TEMP = TEMP / RTEMP + IF( I2.GT.I ) + $ CALL ZSCAL( I2-I, DCONJG( TEMP ), H( I, I+1 ), LDH ) + CALL ZSCAL( I-I1, TEMP, H( I1, I ), 1 ) + IF( WANTZ ) THEN + CALL ZSCAL( NZ, TEMP, Z( ILOZ, I ), 1 ) + END IF + END IF +* + 110 CONTINUE +* +* Failure to converge in remaining number of iterations +* + INFO = I + RETURN +* + 120 CONTINUE +* +* H(I,I-1) is negligible: one eigenvalue has converged. +* + W( I ) = H( I, I ) +* +* Decrement number of remaining iterations, and return to start of +* the main loop with new value of I. +* + ITN = ITN - ITS + I = L - 1 + GO TO 10 +* + 130 CONTINUE + RETURN +* +* End of ZLAHQR +* + END diff --git a/costa/native/external/lapack/zlahrd.f b/costa/native/external/lapack/zlahrd.f new file mode 100644 index 000000000..4f39378c2 --- /dev/null +++ b/costa/native/external/lapack/zlahrd.f @@ -0,0 +1,212 @@ + SUBROUTINE ZLAHRD( N, K, NB, A, LDA, TAU, T, LDT, Y, LDY ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER K, LDA, LDT, LDY, N, NB +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), T( LDT, NB ), TAU( NB ), + $ Y( LDY, NB ) +* .. +* +* Purpose +* ======= +* +* ZLAHRD reduces the first NB columns of a complex general n-by-(n-k+1) +* matrix A so that elements below the k-th subdiagonal are zero. The +* reduction is performed by a unitary similarity transformation +* Q' * A * Q. The routine returns the matrices V and T which determine +* Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T. +* +* This is an auxiliary routine called by ZGEHRD. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. +* +* K (input) INTEGER +* The offset for the reduction. Elements below the k-th +* subdiagonal in the first NB columns are reduced to zero. +* +* NB (input) INTEGER +* The number of columns to be reduced. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N-K+1) +* On entry, the n-by-(n-k+1) general matrix A. +* On exit, the elements on and above the k-th subdiagonal in +* the first NB columns are overwritten with the corresponding +* elements of the reduced matrix; the elements below the k-th +* subdiagonal, with the array TAU, represent the matrix Q as a +* product of elementary reflectors. The other columns of A are +* unchanged. See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* TAU (output) COMPLEX*16 array, dimension (NB) +* The scalar factors of the elementary reflectors. See Further +* Details. +* +* T (output) COMPLEX*16 array, dimension (LDT,NB) +* The upper triangular matrix T. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= NB. +* +* Y (output) COMPLEX*16 array, dimension (LDY,NB) +* The n-by-nb matrix Y. +* +* LDY (input) INTEGER +* The leading dimension of the array Y. LDY >= max(1,N). +* +* Further Details +* =============== +* +* The matrix Q is represented as a product of nb elementary reflectors +* +* Q = H(1) H(2) . . . H(nb). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in +* A(i+k+1:n,i), and tau in TAU(i). +* +* The elements of the vectors v together form the (n-k+1)-by-nb matrix +* V which is needed, with T and Y, to apply the transformation to the +* unreduced part of the matrix, using an update of the form: +* A := (I - V*T*V') * (A - Y*V'). +* +* The contents of A on exit are illustrated by the following example +* with n = 7, k = 3 and nb = 2: +* +* ( a h a a a ) +* ( a h a a a ) +* ( a h a a a ) +* ( h h a a a ) +* ( v1 h a a a ) +* ( v1 v2 a a a ) +* ( v1 v2 a a a ) +* +* where a denotes an element of the original matrix A, h denotes a +* modified element of the upper Hessenberg matrix H, and vi denotes an +* element of the vector defining H(i). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), + $ ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I + COMPLEX*16 EI +* .. +* .. External Subroutines .. + EXTERNAL ZAXPY, ZCOPY, ZGEMV, ZLACGV, ZLARFG, ZSCAL, + $ ZTRMV +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* + DO 10 I = 1, NB + IF( I.GT.1 ) THEN +* +* Update A(1:n,i) +* +* Compute i-th column of A - Y * V' +* + CALL ZLACGV( I-1, A( K+I-1, 1 ), LDA ) + CALL ZGEMV( 'No transpose', N, I-1, -ONE, Y, LDY, + $ A( K+I-1, 1 ), LDA, ONE, A( 1, I ), 1 ) + CALL ZLACGV( I-1, A( K+I-1, 1 ), LDA ) +* +* Apply I - V * T' * V' to this column (call it b) from the +* left, using the last column of T as workspace +* +* Let V = ( V1 ) and b = ( b1 ) (first I-1 rows) +* ( V2 ) ( b2 ) +* +* where V1 is unit lower triangular +* +* w := V1' * b1 +* + CALL ZCOPY( I-1, A( K+1, I ), 1, T( 1, NB ), 1 ) + CALL ZTRMV( 'Lower', 'Conjugate transpose', 'Unit', I-1, + $ A( K+1, 1 ), LDA, T( 1, NB ), 1 ) +* +* w := w + V2'*b2 +* + CALL ZGEMV( 'Conjugate transpose', N-K-I+1, I-1, ONE, + $ A( K+I, 1 ), LDA, A( K+I, I ), 1, ONE, + $ T( 1, NB ), 1 ) +* +* w := T'*w +* + CALL ZTRMV( 'Upper', 'Conjugate transpose', 'Non-unit', I-1, + $ T, LDT, T( 1, NB ), 1 ) +* +* b2 := b2 - V2*w +* + CALL ZGEMV( 'No transpose', N-K-I+1, I-1, -ONE, A( K+I, 1 ), + $ LDA, T( 1, NB ), 1, ONE, A( K+I, I ), 1 ) +* +* b1 := b1 - V1*w +* + CALL ZTRMV( 'Lower', 'No transpose', 'Unit', I-1, + $ A( K+1, 1 ), LDA, T( 1, NB ), 1 ) + CALL ZAXPY( I-1, -ONE, T( 1, NB ), 1, A( K+1, I ), 1 ) +* + A( K+I-1, I-1 ) = EI + END IF +* +* Generate the elementary reflector H(i) to annihilate +* A(k+i+1:n,i) +* + EI = A( K+I, I ) + CALL ZLARFG( N-K-I+1, EI, A( MIN( K+I+1, N ), I ), 1, + $ TAU( I ) ) + A( K+I, I ) = ONE +* +* Compute Y(1:n,i) +* + CALL ZGEMV( 'No transpose', N, N-K-I+1, ONE, A( 1, I+1 ), LDA, + $ A( K+I, I ), 1, ZERO, Y( 1, I ), 1 ) + CALL ZGEMV( 'Conjugate transpose', N-K-I+1, I-1, ONE, + $ A( K+I, 1 ), LDA, A( K+I, I ), 1, ZERO, T( 1, I ), + $ 1 ) + CALL ZGEMV( 'No transpose', N, I-1, -ONE, Y, LDY, T( 1, I ), 1, + $ ONE, Y( 1, I ), 1 ) + CALL ZSCAL( N, TAU( I ), Y( 1, I ), 1 ) +* +* Compute T(1:i,i) +* + CALL ZSCAL( I-1, -TAU( I ), T( 1, I ), 1 ) + CALL ZTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T, LDT, + $ T( 1, I ), 1 ) + T( I, I ) = TAU( I ) +* + 10 CONTINUE + A( K+NB, NB ) = EI +* + RETURN +* +* End of ZLAHRD +* + END diff --git a/costa/native/external/lapack/zlaic1.f b/costa/native/external/lapack/zlaic1.f new file mode 100644 index 000000000..297445d3b --- /dev/null +++ b/costa/native/external/lapack/zlaic1.f @@ -0,0 +1,296 @@ + SUBROUTINE ZLAIC1( JOB, J, X, SEST, W, GAMMA, SESTPR, S, C ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER J, JOB + DOUBLE PRECISION SEST, SESTPR + COMPLEX*16 C, GAMMA, S +* .. +* .. Array Arguments .. + COMPLEX*16 W( J ), X( J ) +* .. +* +* Purpose +* ======= +* +* ZLAIC1 applies one step of incremental condition estimation in +* its simplest version: +* +* Let x, twonorm(x) = 1, be an approximate singular vector of an j-by-j +* lower triangular matrix L, such that +* twonorm(L*x) = sest +* Then ZLAIC1 computes sestpr, s, c such that +* the vector +* [ s*x ] +* xhat = [ c ] +* is an approximate singular vector of +* [ L 0 ] +* Lhat = [ w' gamma ] +* in the sense that +* twonorm(Lhat*xhat) = sestpr. +* +* Depending on JOB, an estimate for the largest or smallest singular +* value is computed. +* +* Note that [s c]' and sestpr**2 is an eigenpair of the system +* +* diag(sest*sest, 0) + [alpha gamma] * [ conjg(alpha) ] +* [ conjg(gamma) ] +* +* where alpha = conjg(x)'*w. +* +* Arguments +* ========= +* +* JOB (input) INTEGER +* = 1: an estimate for the largest singular value is computed. +* = 2: an estimate for the smallest singular value is computed. +* +* J (input) INTEGER +* Length of X and W +* +* X (input) COMPLEX*16 array, dimension (J) +* The j-vector x. +* +* SEST (input) DOUBLE PRECISION +* Estimated singular value of j by j matrix L +* +* W (input) COMPLEX*16 array, dimension (J) +* The j-vector w. +* +* GAMMA (input) COMPLEX*16 +* The diagonal element gamma. +* +* SESTPR (output) DOUBLE PRECISION +* Estimated singular value of (j+1) by (j+1) matrix Lhat. +* +* S (output) COMPLEX*16 +* Sine needed in forming xhat. +* +* C (output) COMPLEX*16 +* Cosine needed in forming xhat. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0 ) + DOUBLE PRECISION HALF, FOUR + PARAMETER ( HALF = 0.5D0, FOUR = 4.0D0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION ABSALP, ABSEST, ABSGAM, B, EPS, NORMA, S1, S2, + $ SCL, T, TEST, TMP, ZETA1, ZETA2 + COMPLEX*16 ALPHA, COSINE, SINE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DCONJG, MAX, SQRT +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + COMPLEX*16 ZDOTC + EXTERNAL DLAMCH, ZDOTC +* .. +* .. Executable Statements .. +* + EPS = DLAMCH( 'Epsilon' ) + ALPHA = ZDOTC( J, X, 1, W, 1 ) +* + ABSALP = ABS( ALPHA ) + ABSGAM = ABS( GAMMA ) + ABSEST = ABS( SEST ) +* + IF( JOB.EQ.1 ) THEN +* +* Estimating largest singular value +* +* special cases +* + IF( SEST.EQ.ZERO ) THEN + S1 = MAX( ABSGAM, ABSALP ) + IF( S1.EQ.ZERO ) THEN + S = ZERO + C = ONE + SESTPR = ZERO + ELSE + S = ALPHA / S1 + C = GAMMA / S1 + TMP = SQRT( S*DCONJG( S )+C*DCONJG( C ) ) + S = S / TMP + C = C / TMP + SESTPR = S1*TMP + END IF + RETURN + ELSE IF( ABSGAM.LE.EPS*ABSEST ) THEN + S = ONE + C = ZERO + TMP = MAX( ABSEST, ABSALP ) + S1 = ABSEST / TMP + S2 = ABSALP / TMP + SESTPR = TMP*SQRT( S1*S1+S2*S2 ) + RETURN + ELSE IF( ABSALP.LE.EPS*ABSEST ) THEN + S1 = ABSGAM + S2 = ABSEST + IF( S1.LE.S2 ) THEN + S = ONE + C = ZERO + SESTPR = S2 + ELSE + S = ZERO + C = ONE + SESTPR = S1 + END IF + RETURN + ELSE IF( ABSEST.LE.EPS*ABSALP .OR. ABSEST.LE.EPS*ABSGAM ) THEN + S1 = ABSGAM + S2 = ABSALP + IF( S1.LE.S2 ) THEN + TMP = S1 / S2 + SCL = SQRT( ONE+TMP*TMP ) + SESTPR = S2*SCL + S = ( ALPHA / S2 ) / SCL + C = ( GAMMA / S2 ) / SCL + ELSE + TMP = S2 / S1 + SCL = SQRT( ONE+TMP*TMP ) + SESTPR = S1*SCL + S = ( ALPHA / S1 ) / SCL + C = ( GAMMA / S1 ) / SCL + END IF + RETURN + ELSE +* +* normal case +* + ZETA1 = ABSALP / ABSEST + ZETA2 = ABSGAM / ABSEST +* + B = ( ONE-ZETA1*ZETA1-ZETA2*ZETA2 )*HALF + C = ZETA1*ZETA1 + IF( B.GT.ZERO ) THEN + T = C / ( B+SQRT( B*B+C ) ) + ELSE + T = SQRT( B*B+C ) - B + END IF +* + SINE = -( ALPHA / ABSEST ) / T + COSINE = -( GAMMA / ABSEST ) / ( ONE+T ) + TMP = SQRT( SINE*DCONJG( SINE )+COSINE*DCONJG( COSINE ) ) + S = SINE / TMP + C = COSINE / TMP + SESTPR = SQRT( T+ONE )*ABSEST + RETURN + END IF +* + ELSE IF( JOB.EQ.2 ) THEN +* +* Estimating smallest singular value +* +* special cases +* + IF( SEST.EQ.ZERO ) THEN + SESTPR = ZERO + IF( MAX( ABSGAM, ABSALP ).EQ.ZERO ) THEN + SINE = ONE + COSINE = ZERO + ELSE + SINE = -DCONJG( GAMMA ) + COSINE = DCONJG( ALPHA ) + END IF + S1 = MAX( ABS( SINE ), ABS( COSINE ) ) + S = SINE / S1 + C = COSINE / S1 + TMP = SQRT( S*DCONJG( S )+C*DCONJG( C ) ) + S = S / TMP + C = C / TMP + RETURN + ELSE IF( ABSGAM.LE.EPS*ABSEST ) THEN + S = ZERO + C = ONE + SESTPR = ABSGAM + RETURN + ELSE IF( ABSALP.LE.EPS*ABSEST ) THEN + S1 = ABSGAM + S2 = ABSEST + IF( S1.LE.S2 ) THEN + S = ZERO + C = ONE + SESTPR = S1 + ELSE + S = ONE + C = ZERO + SESTPR = S2 + END IF + RETURN + ELSE IF( ABSEST.LE.EPS*ABSALP .OR. ABSEST.LE.EPS*ABSGAM ) THEN + S1 = ABSGAM + S2 = ABSALP + IF( S1.LE.S2 ) THEN + TMP = S1 / S2 + SCL = SQRT( ONE+TMP*TMP ) + SESTPR = ABSEST*( TMP / SCL ) + S = -( DCONJG( GAMMA ) / S2 ) / SCL + C = ( DCONJG( ALPHA ) / S2 ) / SCL + ELSE + TMP = S2 / S1 + SCL = SQRT( ONE+TMP*TMP ) + SESTPR = ABSEST / SCL + S = -( DCONJG( GAMMA ) / S1 ) / SCL + C = ( DCONJG( ALPHA ) / S1 ) / SCL + END IF + RETURN + ELSE +* +* normal case +* + ZETA1 = ABSALP / ABSEST + ZETA2 = ABSGAM / ABSEST +* + NORMA = MAX( ONE+ZETA1*ZETA1+ZETA1*ZETA2, + $ ZETA1*ZETA2+ZETA2*ZETA2 ) +* +* See if root is closer to zero or to ONE +* + TEST = ONE + TWO*( ZETA1-ZETA2 )*( ZETA1+ZETA2 ) + IF( TEST.GE.ZERO ) THEN +* +* root is close to zero, compute directly +* + B = ( ZETA1*ZETA1+ZETA2*ZETA2+ONE )*HALF + C = ZETA2*ZETA2 + T = C / ( B+SQRT( ABS( B*B-C ) ) ) + SINE = ( ALPHA / ABSEST ) / ( ONE-T ) + COSINE = -( GAMMA / ABSEST ) / T + SESTPR = SQRT( T+FOUR*EPS*EPS*NORMA )*ABSEST + ELSE +* +* root is closer to ONE, shift by that amount +* + B = ( ZETA2*ZETA2+ZETA1*ZETA1-ONE )*HALF + C = ZETA1*ZETA1 + IF( B.GE.ZERO ) THEN + T = -C / ( B+SQRT( B*B+C ) ) + ELSE + T = B - SQRT( B*B+C ) + END IF + SINE = -( ALPHA / ABSEST ) / T + COSINE = -( GAMMA / ABSEST ) / ( ONE+T ) + SESTPR = SQRT( ONE+T+FOUR*EPS*EPS*NORMA )*ABSEST + END IF + TMP = SQRT( SINE*DCONJG( SINE )+COSINE*DCONJG( COSINE ) ) + S = SINE / TMP + C = COSINE / TMP + RETURN +* + END IF + END IF + RETURN +* +* End of ZLAIC1 +* + END diff --git a/costa/native/external/lapack/zlals0.f b/costa/native/external/lapack/zlals0.f new file mode 100644 index 000000000..2abee26bb --- /dev/null +++ b/costa/native/external/lapack/zlals0.f @@ -0,0 +1,434 @@ + SUBROUTINE ZLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B, LDB, BX, LDBX, + $ PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM, + $ POLES, DIFL, DIFR, Z, K, C, S, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* December 1, 1999 +* +* .. Scalar Arguments .. + INTEGER GIVPTR, ICOMPQ, INFO, K, LDB, LDBX, LDGCOL, + $ LDGNUM, NL, NR, NRHS, SQRE + DOUBLE PRECISION C, S +* .. +* .. Array Arguments .. + INTEGER GIVCOL( LDGCOL, * ), PERM( * ) + DOUBLE PRECISION DIFL( * ), DIFR( LDGNUM, * ), + $ GIVNUM( LDGNUM, * ), POLES( LDGNUM, * ), + $ RWORK( * ), Z( * ) + COMPLEX*16 B( LDB, * ), BX( LDBX, * ) +* .. +* +* Purpose +* ======= +* +* ZLALS0 applies back the multiplying factors of either the left or the +* right singular vector matrix of a diagonal matrix appended by a row +* to the right hand side matrix B in solving the least squares problem +* using the divide-and-conquer SVD approach. +* +* For the left singular vector matrix, three types of orthogonal +* matrices are involved: +* +* (1L) Givens rotations: the number of such rotations is GIVPTR; the +* pairs of columns/rows they were applied to are stored in GIVCOL; +* and the C- and S-values of these rotations are stored in GIVNUM. +* +* (2L) Permutation. The (NL+1)-st row of B is to be moved to the first +* row, and for J=2:N, PERM(J)-th row of B is to be moved to the +* J-th row. +* +* (3L) The left singular vector matrix of the remaining matrix. +* +* For the right singular vector matrix, four types of orthogonal +* matrices are involved: +* +* (1R) The right singular vector matrix of the remaining matrix. +* +* (2R) If SQRE = 1, one extra Givens rotation to generate the right +* null space. +* +* (3R) The inverse transformation of (2L). +* +* (4R) The inverse transformation of (1L). +* +* Arguments +* ========= +* +* ICOMPQ (input) INTEGER +* Specifies whether singular vectors are to be computed in +* factored form: +* = 0: Left singular vector matrix. +* = 1: Right singular vector matrix. +* +* NL (input) INTEGER +* The row dimension of the upper block. NL >= 1. +* +* NR (input) INTEGER +* The row dimension of the lower block. NR >= 1. +* +* SQRE (input) INTEGER +* = 0: the lower block is an NR-by-NR square matrix. +* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. +* +* The bidiagonal matrix has row dimension N = NL + NR + 1, +* and column dimension M = N + SQRE. +* +* NRHS (input) INTEGER +* The number of columns of B and BX. NRHS must be at least 1. +* +* B (input/output) COMPLEX*16 array, dimension ( LDB, NRHS ) +* On input, B contains the right hand sides of the least +* squares problem in rows 1 through M. On output, B contains +* the solution X in rows 1 through N. +* +* LDB (input) INTEGER +* The leading dimension of B. LDB must be at least +* max(1,MAX( M, N ) ). +* +* BX (workspace) COMPLEX*16 array, dimension ( LDBX, NRHS ) +* +* LDBX (input) INTEGER +* The leading dimension of BX. +* +* PERM (input) INTEGER array, dimension ( N ) +* The permutations (from deflation and sorting) applied +* to the two blocks. +* +* GIVPTR (input) INTEGER +* The number of Givens rotations which took place in this +* subproblem. +* +* GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 ) +* Each pair of numbers indicates a pair of rows/columns +* involved in a Givens rotation. +* +* LDGCOL (input) INTEGER +* The leading dimension of GIVCOL, must be at least N. +* +* GIVNUM (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) +* Each number indicates the C or S value used in the +* corresponding Givens rotation. +* +* LDGNUM (input) INTEGER +* The leading dimension of arrays DIFR, POLES and +* GIVNUM, must be at least K. +* +* POLES (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) +* On entry, POLES(1:K, 1) contains the new singular +* values obtained from solving the secular equation, and +* POLES(1:K, 2) is an array containing the poles in the secular +* equation. +* +* DIFL (input) DOUBLE PRECISION array, dimension ( K ). +* On entry, DIFL(I) is the distance between I-th updated +* (undeflated) singular value and the I-th (undeflated) old +* singular value. +* +* DIFR (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ). +* On entry, DIFR(I, 1) contains the distances between I-th +* updated (undeflated) singular value and the I+1-th +* (undeflated) old singular value. And DIFR(I, 2) is the +* normalizing factor for the I-th right singular vector. +* +* Z (input) DOUBLE PRECISION array, dimension ( K ) +* Contain the components of the deflation-adjusted updating row +* vector. +* +* K (input) INTEGER +* Contains the dimension of the non-deflated matrix, +* This is the order of the related secular equation. 1 <= K <=N. +* +* C (input) DOUBLE PRECISION +* C contains garbage if SQRE =0 and the C-value of a Givens +* rotation related to the right null space if SQRE = 1. +* +* S (input) DOUBLE PRECISION +* S contains garbage if SQRE =0 and the S-value of a Givens +* rotation related to the right null space if SQRE = 1. +* +* RWORK (workspace) DOUBLE PRECISION array, dimension +* ( K*(1+NRHS) + 2*NRHS ) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Ren-Cang Li, Computer Science Division, University of +* California at Berkeley, USA +* Osni Marques, LBNL/NERSC, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO, NEGONE + PARAMETER ( ONE = 1.0D0, ZERO = 0.0D0, NEGONE = -1.0D0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, JCOL, JROW, M, N, NLP1 + DOUBLE PRECISION DIFLJ, DIFRJ, DJ, DSIGJ, DSIGJP, TEMP +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, XERBLA, ZCOPY, ZDROT, ZDSCAL, ZLACPY, + $ ZLASCL +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMC3, DNRM2 + EXTERNAL DLAMC3, DNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCMPLX, DIMAG +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( NL.LT.1 ) THEN + INFO = -2 + ELSE IF( NR.LT.1 ) THEN + INFO = -3 + ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN + INFO = -4 + END IF +* + N = NL + NR + 1 +* + IF( NRHS.LT.1 ) THEN + INFO = -5 + ELSE IF( LDB.LT.N ) THEN + INFO = -7 + ELSE IF( LDBX.LT.N ) THEN + INFO = -9 + ELSE IF( GIVPTR.LT.0 ) THEN + INFO = -11 + ELSE IF( LDGCOL.LT.N ) THEN + INFO = -13 + ELSE IF( LDGNUM.LT.N ) THEN + INFO = -15 + ELSE IF( K.LT.1 ) THEN + INFO = -20 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLALS0', -INFO ) + RETURN + END IF +* + M = N + SQRE + NLP1 = NL + 1 +* + IF( ICOMPQ.EQ.0 ) THEN +* +* Apply back orthogonal transformations from the left. +* +* Step (1L): apply back the Givens rotations performed. +* + DO 10 I = 1, GIVPTR + CALL ZDROT( NRHS, B( GIVCOL( I, 2 ), 1 ), LDB, + $ B( GIVCOL( I, 1 ), 1 ), LDB, GIVNUM( I, 2 ), + $ GIVNUM( I, 1 ) ) + 10 CONTINUE +* +* Step (2L): permute rows of B. +* + CALL ZCOPY( NRHS, B( NLP1, 1 ), LDB, BX( 1, 1 ), LDBX ) + DO 20 I = 2, N + CALL ZCOPY( NRHS, B( PERM( I ), 1 ), LDB, BX( I, 1 ), LDBX ) + 20 CONTINUE +* +* Step (3L): apply the inverse of the left singular vector +* matrix to BX. +* + IF( K.EQ.1 ) THEN + CALL ZCOPY( NRHS, BX, LDBX, B, LDB ) + IF( Z( 1 ).LT.ZERO ) THEN + CALL ZDSCAL( NRHS, NEGONE, B, LDB ) + END IF + ELSE + DO 100 J = 1, K + DIFLJ = DIFL( J ) + DJ = POLES( J, 1 ) + DSIGJ = -POLES( J, 2 ) + IF( J.LT.K ) THEN + DIFRJ = -DIFR( J, 1 ) + DSIGJP = -POLES( J+1, 2 ) + END IF + IF( ( Z( J ).EQ.ZERO ) .OR. ( POLES( J, 2 ).EQ.ZERO ) ) + $ THEN + RWORK( J ) = ZERO + ELSE + RWORK( J ) = -POLES( J, 2 )*Z( J ) / DIFLJ / + $ ( POLES( J, 2 )+DJ ) + END IF + DO 30 I = 1, J - 1 + IF( ( Z( I ).EQ.ZERO ) .OR. + $ ( POLES( I, 2 ).EQ.ZERO ) ) THEN + RWORK( I ) = ZERO + ELSE + RWORK( I ) = POLES( I, 2 )*Z( I ) / + $ ( DLAMC3( POLES( I, 2 ), DSIGJ )- + $ DIFLJ ) / ( POLES( I, 2 )+DJ ) + END IF + 30 CONTINUE + DO 40 I = J + 1, K + IF( ( Z( I ).EQ.ZERO ) .OR. + $ ( POLES( I, 2 ).EQ.ZERO ) ) THEN + RWORK( I ) = ZERO + ELSE + RWORK( I ) = POLES( I, 2 )*Z( I ) / + $ ( DLAMC3( POLES( I, 2 ), DSIGJP )+ + $ DIFRJ ) / ( POLES( I, 2 )+DJ ) + END IF + 40 CONTINUE + RWORK( 1 ) = NEGONE + TEMP = DNRM2( K, RWORK, 1 ) +* +* Since B and BX are complex, the following call to DGEMV +* is performed in two steps (real and imaginary parts). +* +* CALL DGEMV( 'T', K, NRHS, ONE, BX, LDBX, WORK, 1, ZERO, +* $ B( J, 1 ), LDB ) +* + I = K + NRHS*2 + DO 60 JCOL = 1, NRHS + DO 50 JROW = 1, K + I = I + 1 + RWORK( I ) = DBLE( BX( JROW, JCOL ) ) + 50 CONTINUE + 60 CONTINUE + CALL DGEMV( 'T', K, NRHS, ONE, RWORK( 1+K+NRHS*2 ), K, + $ RWORK( 1 ), 1, ZERO, RWORK( 1+K ), 1 ) + I = K + NRHS*2 + DO 80 JCOL = 1, NRHS + DO 70 JROW = 1, K + I = I + 1 + RWORK( I ) = DIMAG( BX( JROW, JCOL ) ) + 70 CONTINUE + 80 CONTINUE + CALL DGEMV( 'T', K, NRHS, ONE, RWORK( 1+K+NRHS*2 ), K, + $ RWORK( 1 ), 1, ZERO, RWORK( 1+K+NRHS ), 1 ) + DO 90 JCOL = 1, NRHS + B( J, JCOL ) = DCMPLX( RWORK( JCOL+K ), + $ RWORK( JCOL+K+NRHS ) ) + 90 CONTINUE + CALL ZLASCL( 'G', 0, 0, TEMP, ONE, 1, NRHS, B( J, 1 ), + $ LDB, INFO ) + 100 CONTINUE + END IF +* +* Move the deflated rows of BX to B also. +* + IF( K.LT.MAX( M, N ) ) + $ CALL ZLACPY( 'A', N-K, NRHS, BX( K+1, 1 ), LDBX, + $ B( K+1, 1 ), LDB ) + ELSE +* +* Apply back the right orthogonal transformations. +* +* Step (1R): apply back the new right singular vector matrix +* to B. +* + IF( K.EQ.1 ) THEN + CALL ZCOPY( NRHS, B, LDB, BX, LDBX ) + ELSE + DO 180 J = 1, K + DSIGJ = POLES( J, 2 ) + IF( Z( J ).EQ.ZERO ) THEN + RWORK( J ) = ZERO + ELSE + RWORK( J ) = -Z( J ) / DIFL( J ) / + $ ( DSIGJ+POLES( J, 1 ) ) / DIFR( J, 2 ) + END IF + DO 110 I = 1, J - 1 + IF( Z( J ).EQ.ZERO ) THEN + RWORK( I ) = ZERO + ELSE + RWORK( I ) = Z( J ) / ( DLAMC3( DSIGJ, -POLES( I+1, + $ 2 ) )-DIFR( I, 1 ) ) / + $ ( DSIGJ+POLES( I, 1 ) ) / DIFR( I, 2 ) + END IF + 110 CONTINUE + DO 120 I = J + 1, K + IF( Z( J ).EQ.ZERO ) THEN + RWORK( I ) = ZERO + ELSE + RWORK( I ) = Z( J ) / ( DLAMC3( DSIGJ, -POLES( I, + $ 2 ) )-DIFL( I ) ) / + $ ( DSIGJ+POLES( I, 1 ) ) / DIFR( I, 2 ) + END IF + 120 CONTINUE +* +* Since B and BX are complex, the following call to DGEMV +* is performed in two steps (real and imaginary parts). +* +* CALL DGEMV( 'T', K, NRHS, ONE, B, LDB, WORK, 1, ZERO, +* $ BX( J, 1 ), LDBX ) +* + I = K + NRHS*2 + DO 140 JCOL = 1, NRHS + DO 130 JROW = 1, K + I = I + 1 + RWORK( I ) = DBLE( B( JROW, JCOL ) ) + 130 CONTINUE + 140 CONTINUE + CALL DGEMV( 'T', K, NRHS, ONE, RWORK( 1+K+NRHS*2 ), K, + $ RWORK( 1 ), 1, ZERO, RWORK( 1+K ), 1 ) + I = K + NRHS*2 + DO 160 JCOL = 1, NRHS + DO 150 JROW = 1, K + I = I + 1 + RWORK( I ) = DIMAG( B( JROW, JCOL ) ) + 150 CONTINUE + 160 CONTINUE + CALL DGEMV( 'T', K, NRHS, ONE, RWORK( 1+K+NRHS*2 ), K, + $ RWORK( 1 ), 1, ZERO, RWORK( 1+K+NRHS ), 1 ) + DO 170 JCOL = 1, NRHS + BX( J, JCOL ) = DCMPLX( RWORK( JCOL+K ), + $ RWORK( JCOL+K+NRHS ) ) + 170 CONTINUE + 180 CONTINUE + END IF +* +* Step (2R): if SQRE = 1, apply back the rotation that is +* related to the right null space of the subproblem. +* + IF( SQRE.EQ.1 ) THEN + CALL ZCOPY( NRHS, B( M, 1 ), LDB, BX( M, 1 ), LDBX ) + CALL ZDROT( NRHS, BX( 1, 1 ), LDBX, BX( M, 1 ), LDBX, C, S ) + END IF + IF( K.LT.MAX( M, N ) ) + $ CALL ZLACPY( 'A', N-K, NRHS, B( K+1, 1 ), LDB, BX( K+1, 1 ), + $ LDBX ) +* +* Step (3R): permute rows of B. +* + CALL ZCOPY( NRHS, BX( 1, 1 ), LDBX, B( NLP1, 1 ), LDB ) + IF( SQRE.EQ.1 ) THEN + CALL ZCOPY( NRHS, BX( M, 1 ), LDBX, B( M, 1 ), LDB ) + END IF + DO 190 I = 2, N + CALL ZCOPY( NRHS, BX( I, 1 ), LDBX, B( PERM( I ), 1 ), LDB ) + 190 CONTINUE +* +* Step (4R): apply back the Givens rotations performed. +* + DO 200 I = GIVPTR, 1, -1 + CALL ZDROT( NRHS, B( GIVCOL( I, 2 ), 1 ), LDB, + $ B( GIVCOL( I, 1 ), 1 ), LDB, GIVNUM( I, 2 ), + $ -GIVNUM( I, 1 ) ) + 200 CONTINUE + END IF +* + RETURN +* +* End of ZLALS0 +* + END diff --git a/costa/native/external/lapack/zlalsa.f b/costa/native/external/lapack/zlalsa.f new file mode 100644 index 000000000..79d796da4 --- /dev/null +++ b/costa/native/external/lapack/zlalsa.f @@ -0,0 +1,504 @@ + SUBROUTINE ZLALSA( ICOMPQ, SMLSIZ, N, NRHS, B, LDB, BX, LDBX, U, + $ LDU, VT, K, DIFL, DIFR, Z, POLES, GIVPTR, + $ GIVCOL, LDGCOL, PERM, GIVNUM, C, S, RWORK, + $ IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER ICOMPQ, INFO, LDB, LDBX, LDGCOL, LDU, N, NRHS, + $ SMLSIZ +* .. +* .. Array Arguments .. + INTEGER GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ), + $ K( * ), PERM( LDGCOL, * ) + DOUBLE PRECISION C( * ), DIFL( LDU, * ), DIFR( LDU, * ), + $ GIVNUM( LDU, * ), POLES( LDU, * ), RWORK( * ), + $ S( * ), U( LDU, * ), VT( LDU, * ), Z( LDU, * ) + COMPLEX*16 B( LDB, * ), BX( LDBX, * ) +* .. +* +* Purpose +* ======= +* +* ZLALSA is an itermediate step in solving the least squares problem +* by computing the SVD of the coefficient matrix in compact form (The +* singular vectors are computed as products of simple orthorgonal +* matrices.). +* +* If ICOMPQ = 0, ZLALSA applies the inverse of the left singular vector +* matrix of an upper bidiagonal matrix to the right hand side; and if +* ICOMPQ = 1, ZLALSA applies the right singular vector matrix to the +* right hand side. The singular vector matrices were generated in +* compact form by ZLALSA. +* +* Arguments +* ========= +* +* ICOMPQ (input) INTEGER +* Specifies whether the left or the right singular vector +* matrix is involved. +* = 0: Left singular vector matrix +* = 1: Right singular vector matrix +* +* SMLSIZ (input) INTEGER +* The maximum size of the subproblems at the bottom of the +* computation tree. +* +* N (input) INTEGER +* The row and column dimensions of the upper bidiagonal matrix. +* +* NRHS (input) INTEGER +* The number of columns of B and BX. NRHS must be at least 1. +* +* B (input) COMPLEX*16 array, dimension ( LDB, NRHS ) +* On input, B contains the right hand sides of the least +* squares problem in rows 1 through M. On output, B contains +* the solution X in rows 1 through N. +* +* LDB (input) INTEGER +* The leading dimension of B in the calling subprogram. +* LDB must be at least max(1,MAX( M, N ) ). +* +* BX (output) COMPLEX*16 array, dimension ( LDBX, NRHS ) +* On exit, the result of applying the left or right singular +* vector matrix to B. +* +* LDBX (input) INTEGER +* The leading dimension of BX. +* +* U (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ ). +* On entry, U contains the left singular vector matrices of all +* subproblems at the bottom level. +* +* LDU (input) INTEGER, LDU = > N. +* The leading dimension of arrays U, VT, DIFL, DIFR, +* POLES, GIVNUM, and Z. +* +* VT (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ+1 ). +* On entry, VT' contains the right singular vector matrices of +* all subproblems at the bottom level. +* +* K (input) INTEGER array, dimension ( N ). +* +* DIFL (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ). +* where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1. +* +* DIFR (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). +* On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record +* distances between singular values on the I-th level and +* singular values on the (I -1)-th level, and DIFR(*, 2 * I) +* record the normalizing factors of the right singular vectors +* matrices of subproblems on I-th level. +* +* Z (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ). +* On entry, Z(1, I) contains the components of the deflation- +* adjusted updating row vector for subproblems on the I-th +* level. +* +* POLES (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). +* On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old +* singular values involved in the secular equations on the I-th +* level. +* +* GIVPTR (input) INTEGER array, dimension ( N ). +* On entry, GIVPTR( I ) records the number of Givens +* rotations performed on the I-th problem on the computation +* tree. +* +* GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ). +* On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the +* locations of Givens rotations performed on the I-th level on +* the computation tree. +* +* LDGCOL (input) INTEGER, LDGCOL = > N. +* The leading dimension of arrays GIVCOL and PERM. +* +* PERM (input) INTEGER array, dimension ( LDGCOL, NLVL ). +* On entry, PERM(*, I) records permutations done on the I-th +* level of the computation tree. +* +* GIVNUM (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). +* On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S- +* values of Givens rotations performed on the I-th level on the +* computation tree. +* +* C (input) DOUBLE PRECISION array, dimension ( N ). +* On entry, if the I-th subproblem is not square, +* C( I ) contains the C-value of a Givens rotation related to +* the right null space of the I-th subproblem. +* +* S (input) DOUBLE PRECISION array, dimension ( N ). +* On entry, if the I-th subproblem is not square, +* S( I ) contains the S-value of a Givens rotation related to +* the right null space of the I-th subproblem. +* +* RWORK (workspace) DOUBLE PRECISION array, dimension at least +* max ( N, (SMLSZ+1)*NRHS*3 ). +* +* IWORK (workspace) INTEGER array. +* The dimension must be at least 3 * N +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Ren-Cang Li, Computer Science Division, University of +* California at Berkeley, USA +* Osni Marques, LBNL/NERSC, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + INTEGER I, I1, IC, IM1, INODE, J, JCOL, JIMAG, JREAL, + $ JROW, LF, LL, LVL, LVL2, ND, NDB1, NDIML, + $ NDIMR, NL, NLF, NLP1, NLVL, NR, NRF, NRP1, SQRE +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DLASDT, XERBLA, ZCOPY, ZLALS0 +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCMPLX, DIMAG +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( SMLSIZ.LT.3 ) THEN + INFO = -2 + ELSE IF( N.LT.SMLSIZ ) THEN + INFO = -3 + ELSE IF( NRHS.LT.1 ) THEN + INFO = -4 + ELSE IF( LDB.LT.N ) THEN + INFO = -6 + ELSE IF( LDBX.LT.N ) THEN + INFO = -8 + ELSE IF( LDU.LT.N ) THEN + INFO = -10 + ELSE IF( LDGCOL.LT.N ) THEN + INFO = -19 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLALSA', -INFO ) + RETURN + END IF +* +* Book-keeping and setting up the computation tree. +* + INODE = 1 + NDIML = INODE + N + NDIMR = NDIML + N +* + CALL DLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ), + $ IWORK( NDIMR ), SMLSIZ ) +* +* The following code applies back the left singular vector factors. +* For applying back the right singular vector factors, go to 170. +* + IF( ICOMPQ.EQ.1 ) THEN + GO TO 170 + END IF +* +* The nodes on the bottom level of the tree were solved +* by DLASDQ. The corresponding left and right singular vector +* matrices are in explicit form. First apply back the left +* singular vector matrices. +* + NDB1 = ( ND+1 ) / 2 + DO 130 I = NDB1, ND +* +* IC : center row of each node +* NL : number of rows of left subproblem +* NR : number of rows of right subproblem +* NLF: starting row of the left subproblem +* NRF: starting row of the right subproblem +* + I1 = I - 1 + IC = IWORK( INODE+I1 ) + NL = IWORK( NDIML+I1 ) + NR = IWORK( NDIMR+I1 ) + NLF = IC - NL + NRF = IC + 1 +* +* Since B and BX are complex, the following call to DGEMM +* is performed in two steps (real and imaginary parts). +* +* CALL DGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU, +* $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX ) +* + J = NL*NRHS*2 + DO 20 JCOL = 1, NRHS + DO 10 JROW = NLF, NLF + NL - 1 + J = J + 1 + RWORK( J ) = DBLE( B( JROW, JCOL ) ) + 10 CONTINUE + 20 CONTINUE + CALL DGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU, + $ RWORK( 1+NL*NRHS*2 ), NL, ZERO, RWORK( 1 ), NL ) + J = NL*NRHS*2 + DO 40 JCOL = 1, NRHS + DO 30 JROW = NLF, NLF + NL - 1 + J = J + 1 + RWORK( J ) = DIMAG( B( JROW, JCOL ) ) + 30 CONTINUE + 40 CONTINUE + CALL DGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU, + $ RWORK( 1+NL*NRHS*2 ), NL, ZERO, RWORK( 1+NL*NRHS ), + $ NL ) + JREAL = 0 + JIMAG = NL*NRHS + DO 60 JCOL = 1, NRHS + DO 50 JROW = NLF, NLF + NL - 1 + JREAL = JREAL + 1 + JIMAG = JIMAG + 1 + BX( JROW, JCOL ) = DCMPLX( RWORK( JREAL ), + $ RWORK( JIMAG ) ) + 50 CONTINUE + 60 CONTINUE +* +* Since B and BX are complex, the following call to DGEMM +* is performed in two steps (real and imaginary parts). +* +* CALL DGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU, +* $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX ) +* + J = NR*NRHS*2 + DO 80 JCOL = 1, NRHS + DO 70 JROW = NRF, NRF + NR - 1 + J = J + 1 + RWORK( J ) = DBLE( B( JROW, JCOL ) ) + 70 CONTINUE + 80 CONTINUE + CALL DGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU, + $ RWORK( 1+NR*NRHS*2 ), NR, ZERO, RWORK( 1 ), NR ) + J = NR*NRHS*2 + DO 100 JCOL = 1, NRHS + DO 90 JROW = NRF, NRF + NR - 1 + J = J + 1 + RWORK( J ) = DIMAG( B( JROW, JCOL ) ) + 90 CONTINUE + 100 CONTINUE + CALL DGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU, + $ RWORK( 1+NR*NRHS*2 ), NR, ZERO, RWORK( 1+NR*NRHS ), + $ NR ) + JREAL = 0 + JIMAG = NR*NRHS + DO 120 JCOL = 1, NRHS + DO 110 JROW = NRF, NRF + NR - 1 + JREAL = JREAL + 1 + JIMAG = JIMAG + 1 + BX( JROW, JCOL ) = DCMPLX( RWORK( JREAL ), + $ RWORK( JIMAG ) ) + 110 CONTINUE + 120 CONTINUE +* + 130 CONTINUE +* +* Next copy the rows of B that correspond to unchanged rows +* in the bidiagonal matrix to BX. +* + DO 140 I = 1, ND + IC = IWORK( INODE+I-1 ) + CALL ZCOPY( NRHS, B( IC, 1 ), LDB, BX( IC, 1 ), LDBX ) + 140 CONTINUE +* +* Finally go through the left singular vector matrices of all +* the other subproblems bottom-up on the tree. +* + J = 2**NLVL + SQRE = 0 +* + DO 160 LVL = NLVL, 1, -1 + LVL2 = 2*LVL - 1 +* +* find the first node LF and last node LL on +* the current level LVL +* + IF( LVL.EQ.1 ) THEN + LF = 1 + LL = 1 + ELSE + LF = 2**( LVL-1 ) + LL = 2*LF - 1 + END IF + DO 150 I = LF, LL + IM1 = I - 1 + IC = IWORK( INODE+IM1 ) + NL = IWORK( NDIML+IM1 ) + NR = IWORK( NDIMR+IM1 ) + NLF = IC - NL + NRF = IC + 1 + J = J - 1 + CALL ZLALS0( ICOMPQ, NL, NR, SQRE, NRHS, BX( NLF, 1 ), LDBX, + $ B( NLF, 1 ), LDB, PERM( NLF, LVL ), + $ GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL, + $ GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ), + $ DIFL( NLF, LVL ), DIFR( NLF, LVL2 ), + $ Z( NLF, LVL ), K( J ), C( J ), S( J ), RWORK, + $ INFO ) + 150 CONTINUE + 160 CONTINUE + GO TO 330 +* +* ICOMPQ = 1: applying back the right singular vector factors. +* + 170 CONTINUE +* +* First now go through the right singular vector matrices of all +* the tree nodes top-down. +* + J = 0 + DO 190 LVL = 1, NLVL + LVL2 = 2*LVL - 1 +* +* Find the first node LF and last node LL on +* the current level LVL. +* + IF( LVL.EQ.1 ) THEN + LF = 1 + LL = 1 + ELSE + LF = 2**( LVL-1 ) + LL = 2*LF - 1 + END IF + DO 180 I = LL, LF, -1 + IM1 = I - 1 + IC = IWORK( INODE+IM1 ) + NL = IWORK( NDIML+IM1 ) + NR = IWORK( NDIMR+IM1 ) + NLF = IC - NL + NRF = IC + 1 + IF( I.EQ.LL ) THEN + SQRE = 0 + ELSE + SQRE = 1 + END IF + J = J + 1 + CALL ZLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B( NLF, 1 ), LDB, + $ BX( NLF, 1 ), LDBX, PERM( NLF, LVL ), + $ GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL, + $ GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ), + $ DIFL( NLF, LVL ), DIFR( NLF, LVL2 ), + $ Z( NLF, LVL ), K( J ), C( J ), S( J ), RWORK, + $ INFO ) + 180 CONTINUE + 190 CONTINUE +* +* The nodes on the bottom level of the tree were solved +* by DLASDQ. The corresponding right singular vector +* matrices are in explicit form. Apply them back. +* + NDB1 = ( ND+1 ) / 2 + DO 320 I = NDB1, ND + I1 = I - 1 + IC = IWORK( INODE+I1 ) + NL = IWORK( NDIML+I1 ) + NR = IWORK( NDIMR+I1 ) + NLP1 = NL + 1 + IF( I.EQ.ND ) THEN + NRP1 = NR + ELSE + NRP1 = NR + 1 + END IF + NLF = IC - NL + NRF = IC + 1 +* +* Since B and BX are complex, the following call to DGEMM is +* performed in two steps (real and imaginary parts). +* +* CALL DGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU, +* $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX ) +* + J = NLP1*NRHS*2 + DO 210 JCOL = 1, NRHS + DO 200 JROW = NLF, NLF + NLP1 - 1 + J = J + 1 + RWORK( J ) = DBLE( B( JROW, JCOL ) ) + 200 CONTINUE + 210 CONTINUE + CALL DGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU, + $ RWORK( 1+NLP1*NRHS*2 ), NLP1, ZERO, RWORK( 1 ), + $ NLP1 ) + J = NLP1*NRHS*2 + DO 230 JCOL = 1, NRHS + DO 220 JROW = NLF, NLF + NLP1 - 1 + J = J + 1 + RWORK( J ) = DIMAG( B( JROW, JCOL ) ) + 220 CONTINUE + 230 CONTINUE + CALL DGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU, + $ RWORK( 1+NLP1*NRHS*2 ), NLP1, ZERO, + $ RWORK( 1+NLP1*NRHS ), NLP1 ) + JREAL = 0 + JIMAG = NLP1*NRHS + DO 250 JCOL = 1, NRHS + DO 240 JROW = NLF, NLF + NLP1 - 1 + JREAL = JREAL + 1 + JIMAG = JIMAG + 1 + BX( JROW, JCOL ) = DCMPLX( RWORK( JREAL ), + $ RWORK( JIMAG ) ) + 240 CONTINUE + 250 CONTINUE +* +* Since B and BX are complex, the following call to DGEMM is +* performed in two steps (real and imaginary parts). +* +* CALL DGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU, +* $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX ) +* + J = NRP1*NRHS*2 + DO 270 JCOL = 1, NRHS + DO 260 JROW = NRF, NRF + NRP1 - 1 + J = J + 1 + RWORK( J ) = DBLE( B( JROW, JCOL ) ) + 260 CONTINUE + 270 CONTINUE + CALL DGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU, + $ RWORK( 1+NRP1*NRHS*2 ), NRP1, ZERO, RWORK( 1 ), + $ NRP1 ) + J = NRP1*NRHS*2 + DO 290 JCOL = 1, NRHS + DO 280 JROW = NRF, NRF + NRP1 - 1 + J = J + 1 + RWORK( J ) = DIMAG( B( JROW, JCOL ) ) + 280 CONTINUE + 290 CONTINUE + CALL DGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU, + $ RWORK( 1+NRP1*NRHS*2 ), NRP1, ZERO, + $ RWORK( 1+NRP1*NRHS ), NRP1 ) + JREAL = 0 + JIMAG = NRP1*NRHS + DO 310 JCOL = 1, NRHS + DO 300 JROW = NRF, NRF + NRP1 - 1 + JREAL = JREAL + 1 + JIMAG = JIMAG + 1 + BX( JROW, JCOL ) = DCMPLX( RWORK( JREAL ), + $ RWORK( JIMAG ) ) + 300 CONTINUE + 310 CONTINUE +* + 320 CONTINUE +* + 330 CONTINUE +* + RETURN +* +* End of ZLALSA +* + END diff --git a/costa/native/external/lapack/zlalsd.f b/costa/native/external/lapack/zlalsd.f new file mode 100644 index 000000000..da700d100 --- /dev/null +++ b/costa/native/external/lapack/zlalsd.f @@ -0,0 +1,599 @@ + SUBROUTINE ZLALSD( UPLO, SMLSIZ, N, NRHS, D, E, B, LDB, RCOND, + $ RANK, WORK, RWORK, IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS, RANK, SMLSIZ + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION D( * ), E( * ), RWORK( * ) + COMPLEX*16 B( LDB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZLALSD uses the singular value decomposition of A to solve the least +* squares problem of finding X to minimize the Euclidean norm of each +* column of A*X-B, where A is N-by-N upper bidiagonal, and X and B +* are N-by-NRHS. The solution X overwrites B. +* +* The singular values of A smaller than RCOND times the largest +* singular value are treated as zero in solving the least squares +* problem; in this case a minimum norm solution is returned. +* The actual singular values are returned in D in ascending order. +* +* This code makes very mild assumptions about floating point +* arithmetic. It will work on machines with a guard digit in +* add/subtract, or on those binary machines without guard digits +* which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. +* It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': D and E define an upper bidiagonal matrix. +* = 'L': D and E define a lower bidiagonal matrix. +* +* SMLSIZ (input) INTEGER +* The maximum size of the subproblems at the bottom of the +* computation tree. +* +* N (input) INTEGER +* The dimension of the bidiagonal matrix. N >= 0. +* +* NRHS (input) INTEGER +* The number of columns of B. NRHS must be at least 1. +* +* D (input/output) DOUBLE PRECISION array, dimension (N) +* On entry D contains the main diagonal of the bidiagonal +* matrix. On exit, if INFO = 0, D contains its singular values. +* +* E (input) DOUBLE PRECISION array, dimension (N-1) +* Contains the super-diagonal entries of the bidiagonal matrix. +* On exit, E has been destroyed. +* +* B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) +* On input, B contains the right hand sides of the least +* squares problem. On output, B contains the solution X. +* +* LDB (input) INTEGER +* The leading dimension of B in the calling subprogram. +* LDB must be at least max(1,N). +* +* RCOND (input) DOUBLE PRECISION +* The singular values of A less than or equal to RCOND times +* the largest singular value are treated as zero in solving +* the least squares problem. If RCOND is negative, +* machine precision is used instead. +* For example, if diag(S)*X=B were the least squares problem, +* where diag(S) is a diagonal matrix of singular values, the +* solution would be X(i) = B(i) / S(i) if S(i) is greater than +* RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to +* RCOND*max(S). +* +* RANK (output) INTEGER +* The number of singular values of A greater than RCOND times +* the largest singular value. +* +* WORK (workspace) COMPLEX*16 array, dimension at least +* (N * NRHS). +* +* RWORK (workspace) DOUBLE PRECISION array, dimension at least +* (9*N + 2*N*SMLSIZ + 8*N*NLVL + 3*SMLSIZ*NRHS + (SMLSIZ+1)**2), +* where +* NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 ) +* +* IWORK (workspace) INTEGER array, dimension at least +* (3*N*NLVL + 11*N). +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: The algorithm failed to compute an singular value while +* working on the submatrix lying in rows and columns +* INFO/(N+1) through MOD(INFO,N+1). +* +* Further Details +* =============== +* +* Based on contributions by +* Ming Gu and Ren-Cang Li, Computer Science Division, University of +* California at Berkeley, USA +* Osni Marques, LBNL/NERSC, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0 ) + COMPLEX*16 CZERO + PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ) ) +* .. +* .. Local Scalars .. + INTEGER BX, BXST, C, DIFL, DIFR, GIVCOL, GIVNUM, + $ GIVPTR, I, ICMPQ1, ICMPQ2, IRWB, IRWIB, IRWRB, + $ IRWU, IRWVT, IRWWRK, IWK, J, JCOL, JIMAG, + $ JREAL, JROW, K, NLVL, NM1, NRWORK, NSIZE, NSUB, + $ PERM, POLES, S, SIZEI, SMLSZP, SQRE, ST, ST1, + $ U, VT, Z + DOUBLE PRECISION CS, EPS, ORGNRM, R, SN, TOL +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH, DLANST + EXTERNAL IDAMAX, DLAMCH, DLANST +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DLARTG, DLASCL, DLASDA, DLASDQ, DLASET, + $ DLASRT, XERBLA, ZCOPY, ZDROT, ZLACPY, ZLALSA, + $ ZLASCL, ZLASET +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DIMAG, INT, LOG, SIGN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.1 ) THEN + INFO = -4 + ELSE IF( ( LDB.LT.1 ) .OR. ( LDB.LT.N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLALSD', -INFO ) + RETURN + END IF +* + EPS = DLAMCH( 'Epsilon' ) +* +* Set up the tolerance. +* + IF( ( RCOND.LE.ZERO ) .OR. ( RCOND.GE.ONE ) ) THEN + RCOND = EPS + END IF +* + RANK = 0 +* +* Quick return if possible. +* + IF( N.EQ.0 ) THEN + RETURN + ELSE IF( N.EQ.1 ) THEN + IF( D( 1 ).EQ.ZERO ) THEN + CALL ZLASET( 'A', 1, NRHS, CZERO, CZERO, B, LDB ) + ELSE + RANK = 1 + CALL ZLASCL( 'G', 0, 0, D( 1 ), ONE, 1, NRHS, B, LDB, INFO ) + D( 1 ) = ABS( D( 1 ) ) + END IF + RETURN + END IF +* +* Rotate the matrix if it is lower bidiagonal. +* + IF( UPLO.EQ.'L' ) THEN + DO 10 I = 1, N - 1 + CALL DLARTG( D( I ), E( I ), CS, SN, R ) + D( I ) = R + E( I ) = SN*D( I+1 ) + D( I+1 ) = CS*D( I+1 ) + IF( NRHS.EQ.1 ) THEN + CALL ZDROT( 1, B( I, 1 ), 1, B( I+1, 1 ), 1, CS, SN ) + ELSE + RWORK( I*2-1 ) = CS + RWORK( I*2 ) = SN + END IF + 10 CONTINUE + IF( NRHS.GT.1 ) THEN + DO 30 I = 1, NRHS + DO 20 J = 1, N - 1 + CS = RWORK( J*2-1 ) + SN = RWORK( J*2 ) + CALL ZDROT( 1, B( J, I ), 1, B( J+1, I ), 1, CS, SN ) + 20 CONTINUE + 30 CONTINUE + END IF + END IF +* +* Scale. +* + NM1 = N - 1 + ORGNRM = DLANST( 'M', N, D, E ) + IF( ORGNRM.EQ.ZERO ) THEN + CALL ZLASET( 'A', N, NRHS, CZERO, CZERO, B, LDB ) + RETURN + END IF +* + CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, INFO ) + CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, NM1, 1, E, NM1, INFO ) +* +* If N is smaller than the minimum divide size SMLSIZ, then solve +* the problem with another solver. +* + IF( N.LE.SMLSIZ ) THEN + IRWU = 1 + IRWVT = IRWU + N*N + IRWWRK = IRWVT + N*N + IRWRB = IRWWRK + IRWIB = IRWRB + N*NRHS + IRWB = IRWIB + N*NRHS + CALL DLASET( 'A', N, N, ZERO, ONE, RWORK( IRWU ), N ) + CALL DLASET( 'A', N, N, ZERO, ONE, RWORK( IRWVT ), N ) + CALL DLASDQ( 'U', 0, N, N, N, 0, D, E, RWORK( IRWVT ), N, + $ RWORK( IRWU ), N, RWORK( IRWWRK ), 1, + $ RWORK( IRWWRK ), INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF +* +* In the real version, B is passed to DLASDQ and multiplied +* internally by Q'. Here B is complex and that product is +* computed below in two steps (real and imaginary parts). +* + J = IRWB - 1 + DO 50 JCOL = 1, NRHS + DO 40 JROW = 1, N + J = J + 1 + RWORK( J ) = DBLE( B( JROW, JCOL ) ) + 40 CONTINUE + 50 CONTINUE + CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, RWORK( IRWU ), N, + $ RWORK( IRWB ), N, ZERO, RWORK( IRWRB ), N ) + J = IRWB - 1 + DO 70 JCOL = 1, NRHS + DO 60 JROW = 1, N + J = J + 1 + RWORK( J ) = DIMAG( B( JROW, JCOL ) ) + 60 CONTINUE + 70 CONTINUE + CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, RWORK( IRWU ), N, + $ RWORK( IRWB ), N, ZERO, RWORK( IRWIB ), N ) + JREAL = IRWRB - 1 + JIMAG = IRWIB - 1 + DO 90 JCOL = 1, NRHS + DO 80 JROW = 1, N + JREAL = JREAL + 1 + JIMAG = JIMAG + 1 + B( JROW, JCOL ) = DCMPLX( RWORK( JREAL ), + $ RWORK( JIMAG ) ) + 80 CONTINUE + 90 CONTINUE +* + TOL = RCOND*ABS( D( IDAMAX( N, D, 1 ) ) ) + DO 100 I = 1, N + IF( D( I ).LE.TOL ) THEN + CALL ZLASET( 'A', 1, NRHS, CZERO, CZERO, B( I, 1 ), LDB ) + ELSE + CALL ZLASCL( 'G', 0, 0, D( I ), ONE, 1, NRHS, B( I, 1 ), + $ LDB, INFO ) + RANK = RANK + 1 + END IF + 100 CONTINUE +* +* Since B is complex, the following call to DGEMM is performed +* in two steps (real and imaginary parts). That is for V * B +* (in the real version of the code V' is stored in WORK). +* +* CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, WORK, N, B, LDB, ZERO, +* $ WORK( NWORK ), N ) +* + J = IRWB - 1 + DO 120 JCOL = 1, NRHS + DO 110 JROW = 1, N + J = J + 1 + RWORK( J ) = DBLE( B( JROW, JCOL ) ) + 110 CONTINUE + 120 CONTINUE + CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, RWORK( IRWVT ), N, + $ RWORK( IRWB ), N, ZERO, RWORK( IRWRB ), N ) + J = IRWB - 1 + DO 140 JCOL = 1, NRHS + DO 130 JROW = 1, N + J = J + 1 + RWORK( J ) = DIMAG( B( JROW, JCOL ) ) + 130 CONTINUE + 140 CONTINUE + CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, RWORK( IRWVT ), N, + $ RWORK( IRWB ), N, ZERO, RWORK( IRWIB ), N ) + JREAL = IRWRB - 1 + JIMAG = IRWIB - 1 + DO 160 JCOL = 1, NRHS + DO 150 JROW = 1, N + JREAL = JREAL + 1 + JIMAG = JIMAG + 1 + B( JROW, JCOL ) = DCMPLX( RWORK( JREAL ), + $ RWORK( JIMAG ) ) + 150 CONTINUE + 160 CONTINUE +* +* Unscale. +* + CALL DLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO ) + CALL DLASRT( 'D', N, D, INFO ) + CALL ZLASCL( 'G', 0, 0, ORGNRM, ONE, N, NRHS, B, LDB, INFO ) +* + RETURN + END IF +* +* Book-keeping and setting up some constants. +* + NLVL = INT( LOG( DBLE( N ) / DBLE( SMLSIZ+1 ) ) / LOG( TWO ) ) + 1 +* + SMLSZP = SMLSIZ + 1 +* + U = 1 + VT = 1 + SMLSIZ*N + DIFL = VT + SMLSZP*N + DIFR = DIFL + NLVL*N + Z = DIFR + NLVL*N*2 + C = Z + NLVL*N + S = C + N + POLES = S + N + GIVNUM = POLES + 2*NLVL*N + NRWORK = GIVNUM + 2*NLVL*N + BX = 1 +* + IRWRB = NRWORK + IRWIB = IRWRB + SMLSIZ*NRHS + IRWB = IRWIB + SMLSIZ*NRHS +* + SIZEI = 1 + N + K = SIZEI + N + GIVPTR = K + N + PERM = GIVPTR + N + GIVCOL = PERM + NLVL*N + IWK = GIVCOL + NLVL*N*2 +* + ST = 1 + SQRE = 0 + ICMPQ1 = 1 + ICMPQ2 = 0 + NSUB = 0 +* + DO 170 I = 1, N + IF( ABS( D( I ) ).LT.EPS ) THEN + D( I ) = SIGN( EPS, D( I ) ) + END IF + 170 CONTINUE +* + DO 240 I = 1, NM1 + IF( ( ABS( E( I ) ).LT.EPS ) .OR. ( I.EQ.NM1 ) ) THEN + NSUB = NSUB + 1 + IWORK( NSUB ) = ST +* +* Subproblem found. First determine its size and then +* apply divide and conquer on it. +* + IF( I.LT.NM1 ) THEN +* +* A subproblem with E(I) small for I < NM1. +* + NSIZE = I - ST + 1 + IWORK( SIZEI+NSUB-1 ) = NSIZE + ELSE IF( ABS( E( I ) ).GE.EPS ) THEN +* +* A subproblem with E(NM1) not too small but I = NM1. +* + NSIZE = N - ST + 1 + IWORK( SIZEI+NSUB-1 ) = NSIZE + ELSE +* +* A subproblem with E(NM1) small. This implies an +* 1-by-1 subproblem at D(N), which is not solved +* explicitly. +* + NSIZE = I - ST + 1 + IWORK( SIZEI+NSUB-1 ) = NSIZE + NSUB = NSUB + 1 + IWORK( NSUB ) = N + IWORK( SIZEI+NSUB-1 ) = 1 + CALL ZCOPY( NRHS, B( N, 1 ), LDB, WORK( BX+NM1 ), N ) + END IF + ST1 = ST - 1 + IF( NSIZE.EQ.1 ) THEN +* +* This is a 1-by-1 subproblem and is not solved +* explicitly. +* + CALL ZCOPY( NRHS, B( ST, 1 ), LDB, WORK( BX+ST1 ), N ) + ELSE IF( NSIZE.LE.SMLSIZ ) THEN +* +* This is a small subproblem and is solved by DLASDQ. +* + CALL DLASET( 'A', NSIZE, NSIZE, ZERO, ONE, + $ RWORK( VT+ST1 ), N ) + CALL DLASET( 'A', NSIZE, NSIZE, ZERO, ONE, + $ RWORK( U+ST1 ), N ) + CALL DLASDQ( 'U', 0, NSIZE, NSIZE, NSIZE, 0, D( ST ), + $ E( ST ), RWORK( VT+ST1 ), N, RWORK( U+ST1 ), + $ N, RWORK( NRWORK ), 1, RWORK( NRWORK ), + $ INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF +* +* In the real version, B is passed to DLASDQ and multiplied +* internally by Q'. Here B is complex and that product is +* computed below in two steps (real and imaginary parts). +* + J = IRWB - 1 + DO 190 JCOL = 1, NRHS + DO 180 JROW = ST, ST + NSIZE - 1 + J = J + 1 + RWORK( J ) = DBLE( B( JROW, JCOL ) ) + 180 CONTINUE + 190 CONTINUE + CALL DGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE, + $ RWORK( U+ST1 ), N, RWORK( IRWB ), NSIZE, + $ ZERO, RWORK( IRWRB ), NSIZE ) + J = IRWB - 1 + DO 210 JCOL = 1, NRHS + DO 200 JROW = ST, ST + NSIZE - 1 + J = J + 1 + RWORK( J ) = DIMAG( B( JROW, JCOL ) ) + 200 CONTINUE + 210 CONTINUE + CALL DGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE, + $ RWORK( U+ST1 ), N, RWORK( IRWB ), NSIZE, + $ ZERO, RWORK( IRWIB ), NSIZE ) + JREAL = IRWRB - 1 + JIMAG = IRWIB - 1 + DO 230 JCOL = 1, NRHS + DO 220 JROW = ST, ST + NSIZE - 1 + JREAL = JREAL + 1 + JIMAG = JIMAG + 1 + B( JROW, JCOL ) = DCMPLX( RWORK( JREAL ), + $ RWORK( JIMAG ) ) + 220 CONTINUE + 230 CONTINUE +* + CALL ZLACPY( 'A', NSIZE, NRHS, B( ST, 1 ), LDB, + $ WORK( BX+ST1 ), N ) + ELSE +* +* A large problem. Solve it using divide and conquer. +* + CALL DLASDA( ICMPQ1, SMLSIZ, NSIZE, SQRE, D( ST ), + $ E( ST ), RWORK( U+ST1 ), N, RWORK( VT+ST1 ), + $ IWORK( K+ST1 ), RWORK( DIFL+ST1 ), + $ RWORK( DIFR+ST1 ), RWORK( Z+ST1 ), + $ RWORK( POLES+ST1 ), IWORK( GIVPTR+ST1 ), + $ IWORK( GIVCOL+ST1 ), N, IWORK( PERM+ST1 ), + $ RWORK( GIVNUM+ST1 ), RWORK( C+ST1 ), + $ RWORK( S+ST1 ), RWORK( NRWORK ), + $ IWORK( IWK ), INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + BXST = BX + ST1 + CALL ZLALSA( ICMPQ2, SMLSIZ, NSIZE, NRHS, B( ST, 1 ), + $ LDB, WORK( BXST ), N, RWORK( U+ST1 ), N, + $ RWORK( VT+ST1 ), IWORK( K+ST1 ), + $ RWORK( DIFL+ST1 ), RWORK( DIFR+ST1 ), + $ RWORK( Z+ST1 ), RWORK( POLES+ST1 ), + $ IWORK( GIVPTR+ST1 ), IWORK( GIVCOL+ST1 ), N, + $ IWORK( PERM+ST1 ), RWORK( GIVNUM+ST1 ), + $ RWORK( C+ST1 ), RWORK( S+ST1 ), + $ RWORK( NRWORK ), IWORK( IWK ), INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + END IF + ST = I + 1 + END IF + 240 CONTINUE +* +* Apply the singular values and treat the tiny ones as zero. +* + TOL = RCOND*ABS( D( IDAMAX( N, D, 1 ) ) ) +* + DO 250 I = 1, N +* +* Some of the elements in D can be negative because 1-by-1 +* subproblems were not solved explicitly. +* + IF( ABS( D( I ) ).LE.TOL ) THEN + CALL ZLASET( 'A', 1, NRHS, CZERO, CZERO, WORK( BX+I-1 ), N ) + ELSE + RANK = RANK + 1 + CALL ZLASCL( 'G', 0, 0, D( I ), ONE, 1, NRHS, + $ WORK( BX+I-1 ), N, INFO ) + END IF + D( I ) = ABS( D( I ) ) + 250 CONTINUE +* +* Now apply back the right singular vectors. +* + ICMPQ2 = 1 + DO 320 I = 1, NSUB + ST = IWORK( I ) + ST1 = ST - 1 + NSIZE = IWORK( SIZEI+I-1 ) + BXST = BX + ST1 + IF( NSIZE.EQ.1 ) THEN + CALL ZCOPY( NRHS, WORK( BXST ), N, B( ST, 1 ), LDB ) + ELSE IF( NSIZE.LE.SMLSIZ ) THEN +* +* Since B and BX are complex, the following call to DGEMM +* is performed in two steps (real and imaginary parts). +* +* CALL DGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE, +* $ RWORK( VT+ST1 ), N, RWORK( BXST ), N, ZERO, +* $ B( ST, 1 ), LDB ) +* + J = BXST - N - 1 + JREAL = IRWB - 1 + DO 270 JCOL = 1, NRHS + J = J + N + DO 260 JROW = 1, NSIZE + JREAL = JREAL + 1 + RWORK( JREAL ) = DBLE( WORK( J+JROW ) ) + 260 CONTINUE + 270 CONTINUE + CALL DGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE, + $ RWORK( VT+ST1 ), N, RWORK( IRWB ), NSIZE, ZERO, + $ RWORK( IRWRB ), NSIZE ) + J = BXST - N - 1 + JIMAG = IRWB - 1 + DO 290 JCOL = 1, NRHS + J = J + N + DO 280 JROW = 1, NSIZE + JIMAG = JIMAG + 1 + RWORK( JIMAG ) = DIMAG( WORK( J+JROW ) ) + 280 CONTINUE + 290 CONTINUE + CALL DGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE, + $ RWORK( VT+ST1 ), N, RWORK( IRWB ), NSIZE, ZERO, + $ RWORK( IRWIB ), NSIZE ) + JREAL = IRWRB - 1 + JIMAG = IRWIB - 1 + DO 310 JCOL = 1, NRHS + DO 300 JROW = ST, ST + NSIZE - 1 + JREAL = JREAL + 1 + JIMAG = JIMAG + 1 + B( JROW, JCOL ) = DCMPLX( RWORK( JREAL ), + $ RWORK( JIMAG ) ) + 300 CONTINUE + 310 CONTINUE + ELSE + CALL ZLALSA( ICMPQ2, SMLSIZ, NSIZE, NRHS, WORK( BXST ), N, + $ B( ST, 1 ), LDB, RWORK( U+ST1 ), N, + $ RWORK( VT+ST1 ), IWORK( K+ST1 ), + $ RWORK( DIFL+ST1 ), RWORK( DIFR+ST1 ), + $ RWORK( Z+ST1 ), RWORK( POLES+ST1 ), + $ IWORK( GIVPTR+ST1 ), IWORK( GIVCOL+ST1 ), N, + $ IWORK( PERM+ST1 ), RWORK( GIVNUM+ST1 ), + $ RWORK( C+ST1 ), RWORK( S+ST1 ), + $ RWORK( NRWORK ), IWORK( IWK ), INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + END IF + 320 CONTINUE +* +* Unscale and sort the singular values. +* + CALL DLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO ) + CALL DLASRT( 'D', N, D, INFO ) + CALL ZLASCL( 'G', 0, 0, ORGNRM, ONE, N, NRHS, B, LDB, INFO ) +* + RETURN +* +* End of ZLALSD +* + END diff --git a/costa/native/external/lapack/zlangb.f b/costa/native/external/lapack/zlangb.f new file mode 100644 index 000000000..112c63bec --- /dev/null +++ b/costa/native/external/lapack/zlangb.f @@ -0,0 +1,155 @@ + DOUBLE PRECISION FUNCTION ZLANGB( NORM, N, KL, KU, AB, LDAB, + $ WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER KL, KU, LDAB, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION WORK( * ) + COMPLEX*16 AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* ZLANGB returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of an +* n by n band matrix A, with kl sub-diagonals and ku super-diagonals. +* +* Description +* =========== +* +* ZLANGB returns the value +* +* ZLANGB = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in ZLANGB as described +* above. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, ZLANGB is +* set to zero. +* +* KL (input) INTEGER +* The number of sub-diagonals of the matrix A. KL >= 0. +* +* KU (input) INTEGER +* The number of super-diagonals of the matrix A. KU >= 0. +* +* AB (input) COMPLEX*16 array, dimension (LDAB,N) +* The band matrix A, stored in rows 1 to KL+KU+1. The j-th +* column of A is stored in the j-th column of the array AB as +* follows: +* AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KL+KU+1. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (LWORK), +* where LWORK >= N when NORM = 'I'; otherwise, WORK is not +* referenced. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, K, L + DOUBLE PRECISION SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + DO 20 J = 1, N + DO 10 I = MAX( KU+2-J, 1 ), MIN( N+KU+1-J, KL+KU+1 ) + VALUE = MAX( VALUE, ABS( AB( I, J ) ) ) + 10 CONTINUE + 20 CONTINUE + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + DO 40 J = 1, N + SUM = ZERO + DO 30 I = MAX( KU+2-J, 1 ), MIN( N+KU+1-J, KL+KU+1 ) + SUM = SUM + ABS( AB( I, J ) ) + 30 CONTINUE + VALUE = MAX( VALUE, SUM ) + 40 CONTINUE + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + DO 50 I = 1, N + WORK( I ) = ZERO + 50 CONTINUE + DO 70 J = 1, N + K = KU + 1 - J + DO 60 I = MAX( 1, J-KU ), MIN( N, J+KL ) + WORK( I ) = WORK( I ) + ABS( AB( K+I, J ) ) + 60 CONTINUE + 70 CONTINUE + VALUE = ZERO + DO 80 I = 1, N + VALUE = MAX( VALUE, WORK( I ) ) + 80 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + DO 90 J = 1, N + L = MAX( 1, J-KU ) + K = KU + 1 - J + L + CALL ZLASSQ( MIN( N, J+KL )-L+1, AB( K, J ), 1, SCALE, SUM ) + 90 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + ZLANGB = VALUE + RETURN +* +* End of ZLANGB +* + END diff --git a/costa/native/external/lapack/zlange.f b/costa/native/external/lapack/zlange.f new file mode 100644 index 000000000..754f0f779 --- /dev/null +++ b/costa/native/external/lapack/zlange.f @@ -0,0 +1,146 @@ + DOUBLE PRECISION FUNCTION ZLANGE( NORM, M, N, A, LDA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION WORK( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* ZLANGE returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* complex matrix A. +* +* Description +* =========== +* +* ZLANGE returns the value +* +* ZLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in ZLANGE as described +* above. +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. When M = 0, +* ZLANGE is set to zero. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. When N = 0, +* ZLANGE is set to zero. +* +* A (input) COMPLEX*16 array, dimension (LDA,N) +* The m by n matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(M,1). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (LWORK), +* where LWORK >= M when NORM = 'I'; otherwise, WORK is not +* referenced. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( MIN( M, N ).EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + DO 20 J = 1, N + DO 10 I = 1, M + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 10 CONTINUE + 20 CONTINUE + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + DO 40 J = 1, N + SUM = ZERO + DO 30 I = 1, M + SUM = SUM + ABS( A( I, J ) ) + 30 CONTINUE + VALUE = MAX( VALUE, SUM ) + 40 CONTINUE + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + DO 50 I = 1, M + WORK( I ) = ZERO + 50 CONTINUE + DO 70 J = 1, N + DO 60 I = 1, M + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 60 CONTINUE + 70 CONTINUE + VALUE = ZERO + DO 80 I = 1, M + VALUE = MAX( VALUE, WORK( I ) ) + 80 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + DO 90 J = 1, N + CALL ZLASSQ( M, A( 1, J ), 1, SCALE, SUM ) + 90 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + ZLANGE = VALUE + RETURN +* +* End of ZLANGE +* + END diff --git a/costa/native/external/lapack/zlangt.f b/costa/native/external/lapack/zlangt.f new file mode 100644 index 000000000..e6c053831 --- /dev/null +++ b/costa/native/external/lapack/zlangt.f @@ -0,0 +1,142 @@ + DOUBLE PRECISION FUNCTION ZLANGT( NORM, N, DL, D, DU ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER N +* .. +* .. Array Arguments .. + COMPLEX*16 D( * ), DL( * ), DU( * ) +* .. +* +* Purpose +* ======= +* +* ZLANGT returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* complex tridiagonal matrix A. +* +* Description +* =========== +* +* ZLANGT returns the value +* +* ZLANGT = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in ZLANGT as described +* above. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, ZLANGT is +* set to zero. +* +* DL (input) COMPLEX*16 array, dimension (N-1) +* The (n-1) sub-diagonal elements of A. +* +* D (input) COMPLEX*16 array, dimension (N) +* The diagonal elements of A. +* +* DU (input) COMPLEX*16 array, dimension (N-1) +* The (n-1) super-diagonal elements of A. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I + DOUBLE PRECISION ANORM, SCALE, SUM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) THEN + ANORM = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + ANORM = ABS( D( N ) ) + DO 10 I = 1, N - 1 + ANORM = MAX( ANORM, ABS( DL( I ) ) ) + ANORM = MAX( ANORM, ABS( D( I ) ) ) + ANORM = MAX( ANORM, ABS( DU( I ) ) ) + 10 CONTINUE + ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' ) THEN +* +* Find norm1(A). +* + IF( N.EQ.1 ) THEN + ANORM = ABS( D( 1 ) ) + ELSE + ANORM = MAX( ABS( D( 1 ) )+ABS( DL( 1 ) ), + $ ABS( D( N ) )+ABS( DU( N-1 ) ) ) + DO 20 I = 2, N - 1 + ANORM = MAX( ANORM, ABS( D( I ) )+ABS( DL( I ) )+ + $ ABS( DU( I-1 ) ) ) + 20 CONTINUE + END IF + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + IF( N.EQ.1 ) THEN + ANORM = ABS( D( 1 ) ) + ELSE + ANORM = MAX( ABS( D( 1 ) )+ABS( DU( 1 ) ), + $ ABS( D( N ) )+ABS( DL( N-1 ) ) ) + DO 30 I = 2, N - 1 + ANORM = MAX( ANORM, ABS( D( I ) )+ABS( DU( I ) )+ + $ ABS( DL( I-1 ) ) ) + 30 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + CALL ZLASSQ( N, D, 1, SCALE, SUM ) + IF( N.GT.1 ) THEN + CALL ZLASSQ( N-1, DL, 1, SCALE, SUM ) + CALL ZLASSQ( N-1, DU, 1, SCALE, SUM ) + END IF + ANORM = SCALE*SQRT( SUM ) + END IF +* + ZLANGT = ANORM + RETURN +* +* End of ZLANGT +* + END diff --git a/costa/native/external/lapack/zlanhb.f b/costa/native/external/lapack/zlanhb.f new file mode 100644 index 000000000..a30f9ce51 --- /dev/null +++ b/costa/native/external/lapack/zlanhb.f @@ -0,0 +1,202 @@ + DOUBLE PRECISION FUNCTION ZLANHB( NORM, UPLO, N, K, AB, LDAB, + $ WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM, UPLO + INTEGER K, LDAB, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION WORK( * ) + COMPLEX*16 AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* ZLANHB returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of an +* n by n hermitian band matrix A, with k super-diagonals. +* +* Description +* =========== +* +* ZLANHB returns the value +* +* ZLANHB = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in ZLANHB as described +* above. +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* band matrix A is supplied. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, ZLANHB is +* set to zero. +* +* K (input) INTEGER +* The number of super-diagonals or sub-diagonals of the +* band matrix A. K >= 0. +* +* AB (input) COMPLEX*16 array, dimension (LDAB,N) +* The upper or lower triangle of the hermitian band matrix A, +* stored in the first K+1 rows of AB. The j-th column of A is +* stored in the j-th column of the array AB as follows: +* if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+k). +* Note that the imaginary parts of the diagonal elements need +* not be set and are assumed to be zero. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= K+1. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (LWORK), +* where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +* WORK is not referenced. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, L + DOUBLE PRECISION ABSA, SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = MAX( K+2-J, 1 ), K + VALUE = MAX( VALUE, ABS( AB( I, J ) ) ) + 10 CONTINUE + VALUE = MAX( VALUE, ABS( DBLE( AB( K+1, J ) ) ) ) + 20 CONTINUE + ELSE + DO 40 J = 1, N + VALUE = MAX( VALUE, ABS( DBLE( AB( 1, J ) ) ) ) + DO 30 I = 2, MIN( N+1-J, K+1 ) + VALUE = MAX( VALUE, ABS( AB( I, J ) ) ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is hermitian). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + SUM = ZERO + L = K + 1 - J + DO 50 I = MAX( 1, J-K ), J - 1 + ABSA = ABS( AB( L+I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 50 CONTINUE + WORK( J ) = SUM + ABS( DBLE( AB( K+1, J ) ) ) + 60 CONTINUE + DO 70 I = 1, N + VALUE = MAX( VALUE, WORK( I ) ) + 70 CONTINUE + ELSE + DO 80 I = 1, N + WORK( I ) = ZERO + 80 CONTINUE + DO 100 J = 1, N + SUM = WORK( J ) + ABS( DBLE( AB( 1, J ) ) ) + L = 1 - J + DO 90 I = J + 1, MIN( N, J+K ) + ABSA = ABS( AB( L+I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 90 CONTINUE + VALUE = MAX( VALUE, SUM ) + 100 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + IF( K.GT.0 ) THEN + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 2, N + CALL ZLASSQ( MIN( J-1, K ), AB( MAX( K+2-J, 1 ), J ), + $ 1, SCALE, SUM ) + 110 CONTINUE + L = K + 1 + ELSE + DO 120 J = 1, N - 1 + CALL ZLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE, + $ SUM ) + 120 CONTINUE + L = 1 + END IF + SUM = 2*SUM + ELSE + L = 1 + END IF + DO 130 J = 1, N + IF( DBLE( AB( L, J ) ).NE.ZERO ) THEN + ABSA = ABS( DBLE( AB( L, J ) ) ) + IF( SCALE.LT.ABSA ) THEN + SUM = ONE + SUM*( SCALE / ABSA )**2 + SCALE = ABSA + ELSE + SUM = SUM + ( ABSA / SCALE )**2 + END IF + END IF + 130 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + ZLANHB = VALUE + RETURN +* +* End of ZLANHB +* + END diff --git a/costa/native/external/lapack/zlanhe.f b/costa/native/external/lapack/zlanhe.f new file mode 100644 index 000000000..deda1052e --- /dev/null +++ b/costa/native/external/lapack/zlanhe.f @@ -0,0 +1,188 @@ + DOUBLE PRECISION FUNCTION ZLANHE( NORM, UPLO, N, A, LDA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM, UPLO + INTEGER LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION WORK( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* ZLANHE returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* complex hermitian matrix A. +* +* Description +* =========== +* +* ZLANHE returns the value +* +* ZLANHE = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in ZLANHE as described +* above. +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* hermitian matrix A is to be referenced. +* = 'U': Upper triangular part of A is referenced +* = 'L': Lower triangular part of A is referenced +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, ZLANHE is +* set to zero. +* +* A (input) COMPLEX*16 array, dimension (LDA,N) +* The hermitian matrix A. If UPLO = 'U', the leading n by n +* upper triangular part of A contains the upper triangular part +* of the matrix A, and the strictly lower triangular part of A +* is not referenced. If UPLO = 'L', the leading n by n lower +* triangular part of A contains the lower triangular part of +* the matrix A, and the strictly upper triangular part of A is +* not referenced. Note that the imaginary parts of the diagonal +* elements need not be set and are assumed to be zero. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(N,1). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (LWORK), +* where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +* WORK is not referenced. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION ABSA, SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, MAX, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, J - 1 + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 10 CONTINUE + VALUE = MAX( VALUE, ABS( DBLE( A( J, J ) ) ) ) + 20 CONTINUE + ELSE + DO 40 J = 1, N + VALUE = MAX( VALUE, ABS( DBLE( A( J, J ) ) ) ) + DO 30 I = J + 1, N + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is hermitian). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + SUM = ZERO + DO 50 I = 1, J - 1 + ABSA = ABS( A( I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 50 CONTINUE + WORK( J ) = SUM + ABS( DBLE( A( J, J ) ) ) + 60 CONTINUE + DO 70 I = 1, N + VALUE = MAX( VALUE, WORK( I ) ) + 70 CONTINUE + ELSE + DO 80 I = 1, N + WORK( I ) = ZERO + 80 CONTINUE + DO 100 J = 1, N + SUM = WORK( J ) + ABS( DBLE( A( J, J ) ) ) + DO 90 I = J + 1, N + ABSA = ABS( A( I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 90 CONTINUE + VALUE = MAX( VALUE, SUM ) + 100 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 2, N + CALL ZLASSQ( J-1, A( 1, J ), 1, SCALE, SUM ) + 110 CONTINUE + ELSE + DO 120 J = 1, N - 1 + CALL ZLASSQ( N-J, A( J+1, J ), 1, SCALE, SUM ) + 120 CONTINUE + END IF + SUM = 2*SUM + DO 130 I = 1, N + IF( DBLE( A( I, I ) ).NE.ZERO ) THEN + ABSA = ABS( DBLE( A( I, I ) ) ) + IF( SCALE.LT.ABSA ) THEN + SUM = ONE + SUM*( SCALE / ABSA )**2 + SCALE = ABSA + ELSE + SUM = SUM + ( ABSA / SCALE )**2 + END IF + END IF + 130 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + ZLANHE = VALUE + RETURN +* +* End of ZLANHE +* + END diff --git a/costa/native/external/lapack/zlanhp.f b/costa/native/external/lapack/zlanhp.f new file mode 100644 index 000000000..3c5f4cde6 --- /dev/null +++ b/costa/native/external/lapack/zlanhp.f @@ -0,0 +1,202 @@ + DOUBLE PRECISION FUNCTION ZLANHP( NORM, UPLO, N, AP, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM, UPLO + INTEGER N +* .. +* .. Array Arguments .. + DOUBLE PRECISION WORK( * ) + COMPLEX*16 AP( * ) +* .. +* +* Purpose +* ======= +* +* ZLANHP returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* complex hermitian matrix A, supplied in packed form. +* +* Description +* =========== +* +* ZLANHP returns the value +* +* ZLANHP = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in ZLANHP as described +* above. +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* hermitian matrix A is supplied. +* = 'U': Upper triangular part of A is supplied +* = 'L': Lower triangular part of A is supplied +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, ZLANHP is +* set to zero. +* +* AP (input) COMPLEX*16 array, dimension (N*(N+1)/2) +* The upper or lower triangle of the hermitian matrix A, packed +* columnwise in a linear array. The j-th column of A is stored +* in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* Note that the imaginary parts of the diagonal elements need +* not be set and are assumed to be zero. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (LWORK), +* where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +* WORK is not referenced. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, K + DOUBLE PRECISION ABSA, SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, MAX, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + K = 0 + DO 20 J = 1, N + DO 10 I = K + 1, K + J - 1 + VALUE = MAX( VALUE, ABS( AP( I ) ) ) + 10 CONTINUE + K = K + J + VALUE = MAX( VALUE, ABS( DBLE( AP( K ) ) ) ) + 20 CONTINUE + ELSE + K = 1 + DO 40 J = 1, N + VALUE = MAX( VALUE, ABS( DBLE( AP( K ) ) ) ) + DO 30 I = K + 1, K + N - J + VALUE = MAX( VALUE, ABS( AP( I ) ) ) + 30 CONTINUE + K = K + N - J + 1 + 40 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is hermitian). +* + VALUE = ZERO + K = 1 + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + SUM = ZERO + DO 50 I = 1, J - 1 + ABSA = ABS( AP( K ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + K = K + 1 + 50 CONTINUE + WORK( J ) = SUM + ABS( DBLE( AP( K ) ) ) + K = K + 1 + 60 CONTINUE + DO 70 I = 1, N + VALUE = MAX( VALUE, WORK( I ) ) + 70 CONTINUE + ELSE + DO 80 I = 1, N + WORK( I ) = ZERO + 80 CONTINUE + DO 100 J = 1, N + SUM = WORK( J ) + ABS( DBLE( AP( K ) ) ) + K = K + 1 + DO 90 I = J + 1, N + ABSA = ABS( AP( K ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + K = K + 1 + 90 CONTINUE + VALUE = MAX( VALUE, SUM ) + 100 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + K = 2 + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 2, N + CALL ZLASSQ( J-1, AP( K ), 1, SCALE, SUM ) + K = K + J + 110 CONTINUE + ELSE + DO 120 J = 1, N - 1 + CALL ZLASSQ( N-J, AP( K ), 1, SCALE, SUM ) + K = K + N - J + 1 + 120 CONTINUE + END IF + SUM = 2*SUM + K = 1 + DO 130 I = 1, N + IF( DBLE( AP( K ) ).NE.ZERO ) THEN + ABSA = ABS( DBLE( AP( K ) ) ) + IF( SCALE.LT.ABSA ) THEN + SUM = ONE + SUM*( SCALE / ABSA )**2 + SCALE = ABSA + ELSE + SUM = SUM + ( ABSA / SCALE )**2 + END IF + END IF + IF( LSAME( UPLO, 'U' ) ) THEN + K = K + I + 1 + ELSE + K = K + N - I + 1 + END IF + 130 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + ZLANHP = VALUE + RETURN +* +* End of ZLANHP +* + END diff --git a/costa/native/external/lapack/zlanhs.f b/costa/native/external/lapack/zlanhs.f new file mode 100644 index 000000000..2499c5119 --- /dev/null +++ b/costa/native/external/lapack/zlanhs.f @@ -0,0 +1,143 @@ + DOUBLE PRECISION FUNCTION ZLANHS( NORM, N, A, LDA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION WORK( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* ZLANHS returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* Hessenberg matrix A. +* +* Description +* =========== +* +* ZLANHS returns the value +* +* ZLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in ZLANHS as described +* above. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, ZLANHS is +* set to zero. +* +* A (input) COMPLEX*16 array, dimension (LDA,N) +* The n by n upper Hessenberg matrix A; the part of A below the +* first sub-diagonal is not referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(N,1). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (LWORK), +* where LWORK >= N when NORM = 'I'; otherwise, WORK is not +* referenced. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + DO 20 J = 1, N + DO 10 I = 1, MIN( N, J+1 ) + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 10 CONTINUE + 20 CONTINUE + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + DO 40 J = 1, N + SUM = ZERO + DO 30 I = 1, MIN( N, J+1 ) + SUM = SUM + ABS( A( I, J ) ) + 30 CONTINUE + VALUE = MAX( VALUE, SUM ) + 40 CONTINUE + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + DO 50 I = 1, N + WORK( I ) = ZERO + 50 CONTINUE + DO 70 J = 1, N + DO 60 I = 1, MIN( N, J+1 ) + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 60 CONTINUE + 70 CONTINUE + VALUE = ZERO + DO 80 I = 1, N + VALUE = MAX( VALUE, WORK( I ) ) + 80 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + DO 90 J = 1, N + CALL ZLASSQ( MIN( N, J+1 ), A( 1, J ), 1, SCALE, SUM ) + 90 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + ZLANHS = VALUE + RETURN +* +* End of ZLANHS +* + END diff --git a/costa/native/external/lapack/zlanht.f b/costa/native/external/lapack/zlanht.f new file mode 100644 index 000000000..8fca28266 --- /dev/null +++ b/costa/native/external/lapack/zlanht.f @@ -0,0 +1,126 @@ + DOUBLE PRECISION FUNCTION ZLANHT( NORM, N, D, E ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ) + COMPLEX*16 E( * ) +* .. +* +* Purpose +* ======= +* +* ZLANHT returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* complex Hermitian tridiagonal matrix A. +* +* Description +* =========== +* +* ZLANHT returns the value +* +* ZLANHT = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in ZLANHT as described +* above. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, ZLANHT is +* set to zero. +* +* D (input) DOUBLE PRECISION array, dimension (N) +* The diagonal elements of A. +* +* E (input) COMPLEX*16 array, dimension (N-1) +* The (n-1) sub-diagonal or super-diagonal elements of A. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I + DOUBLE PRECISION ANORM, SCALE, SUM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLASSQ, ZLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) THEN + ANORM = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + ANORM = ABS( D( N ) ) + DO 10 I = 1, N - 1 + ANORM = MAX( ANORM, ABS( D( I ) ) ) + ANORM = MAX( ANORM, ABS( E( I ) ) ) + 10 CONTINUE + ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' .OR. + $ LSAME( NORM, 'I' ) ) THEN +* +* Find norm1(A). +* + IF( N.EQ.1 ) THEN + ANORM = ABS( D( 1 ) ) + ELSE + ANORM = MAX( ABS( D( 1 ) )+ABS( E( 1 ) ), + $ ABS( E( N-1 ) )+ABS( D( N ) ) ) + DO 20 I = 2, N - 1 + ANORM = MAX( ANORM, ABS( D( I ) )+ABS( E( I ) )+ + $ ABS( E( I-1 ) ) ) + 20 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + IF( N.GT.1 ) THEN + CALL ZLASSQ( N-1, E, 1, SCALE, SUM ) + SUM = 2*SUM + END IF + CALL DLASSQ( N, D, 1, SCALE, SUM ) + ANORM = SCALE*SQRT( SUM ) + END IF +* + ZLANHT = ANORM + RETURN +* +* End of ZLANHT +* + END diff --git a/costa/native/external/lapack/zlansb.f b/costa/native/external/lapack/zlansb.f new file mode 100644 index 000000000..ffb1b1367 --- /dev/null +++ b/costa/native/external/lapack/zlansb.f @@ -0,0 +1,188 @@ + DOUBLE PRECISION FUNCTION ZLANSB( NORM, UPLO, N, K, AB, LDAB, + $ WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM, UPLO + INTEGER K, LDAB, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION WORK( * ) + COMPLEX*16 AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* ZLANSB returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of an +* n by n symmetric band matrix A, with k super-diagonals. +* +* Description +* =========== +* +* ZLANSB returns the value +* +* ZLANSB = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in ZLANSB as described +* above. +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* band matrix A is supplied. +* = 'U': Upper triangular part is supplied +* = 'L': Lower triangular part is supplied +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, ZLANSB is +* set to zero. +* +* K (input) INTEGER +* The number of super-diagonals or sub-diagonals of the +* band matrix A. K >= 0. +* +* AB (input) COMPLEX*16 array, dimension (LDAB,N) +* The upper or lower triangle of the symmetric band matrix A, +* stored in the first K+1 rows of AB. The j-th column of A is +* stored in the j-th column of the array AB as follows: +* if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+k). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= K+1. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (LWORK), +* where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +* WORK is not referenced. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, L + DOUBLE PRECISION ABSA, SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = MAX( K+2-J, 1 ), K + 1 + VALUE = MAX( VALUE, ABS( AB( I, J ) ) ) + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = 1, MIN( N+1-J, K+1 ) + VALUE = MAX( VALUE, ABS( AB( I, J ) ) ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is symmetric). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + SUM = ZERO + L = K + 1 - J + DO 50 I = MAX( 1, J-K ), J - 1 + ABSA = ABS( AB( L+I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 50 CONTINUE + WORK( J ) = SUM + ABS( AB( K+1, J ) ) + 60 CONTINUE + DO 70 I = 1, N + VALUE = MAX( VALUE, WORK( I ) ) + 70 CONTINUE + ELSE + DO 80 I = 1, N + WORK( I ) = ZERO + 80 CONTINUE + DO 100 J = 1, N + SUM = WORK( J ) + ABS( AB( 1, J ) ) + L = 1 - J + DO 90 I = J + 1, MIN( N, J+K ) + ABSA = ABS( AB( L+I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 90 CONTINUE + VALUE = MAX( VALUE, SUM ) + 100 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + IF( K.GT.0 ) THEN + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 2, N + CALL ZLASSQ( MIN( J-1, K ), AB( MAX( K+2-J, 1 ), J ), + $ 1, SCALE, SUM ) + 110 CONTINUE + L = K + 1 + ELSE + DO 120 J = 1, N - 1 + CALL ZLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE, + $ SUM ) + 120 CONTINUE + L = 1 + END IF + SUM = 2*SUM + ELSE + L = 1 + END IF + CALL ZLASSQ( N, AB( L, 1 ), LDAB, SCALE, SUM ) + VALUE = SCALE*SQRT( SUM ) + END IF +* + ZLANSB = VALUE + RETURN +* +* End of ZLANSB +* + END diff --git a/costa/native/external/lapack/zlansp.f b/costa/native/external/lapack/zlansp.f new file mode 100644 index 000000000..9f70a691b --- /dev/null +++ b/costa/native/external/lapack/zlansp.f @@ -0,0 +1,207 @@ + DOUBLE PRECISION FUNCTION ZLANSP( NORM, UPLO, N, AP, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM, UPLO + INTEGER N +* .. +* .. Array Arguments .. + DOUBLE PRECISION WORK( * ) + COMPLEX*16 AP( * ) +* .. +* +* Purpose +* ======= +* +* ZLANSP returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* complex symmetric matrix A, supplied in packed form. +* +* Description +* =========== +* +* ZLANSP returns the value +* +* ZLANSP = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in ZLANSP as described +* above. +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is supplied. +* = 'U': Upper triangular part of A is supplied +* = 'L': Lower triangular part of A is supplied +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, ZLANSP is +* set to zero. +* +* AP (input) COMPLEX*16 array, dimension (N*(N+1)/2) +* The upper or lower triangle of the symmetric matrix A, packed +* columnwise in a linear array. The j-th column of A is stored +* in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (LWORK), +* where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +* WORK is not referenced. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, K + DOUBLE PRECISION ABSA, SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + K = 1 + DO 20 J = 1, N + DO 10 I = K, K + J - 1 + VALUE = MAX( VALUE, ABS( AP( I ) ) ) + 10 CONTINUE + K = K + J + 20 CONTINUE + ELSE + K = 1 + DO 40 J = 1, N + DO 30 I = K, K + N - J + VALUE = MAX( VALUE, ABS( AP( I ) ) ) + 30 CONTINUE + K = K + N - J + 1 + 40 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is symmetric). +* + VALUE = ZERO + K = 1 + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + SUM = ZERO + DO 50 I = 1, J - 1 + ABSA = ABS( AP( K ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + K = K + 1 + 50 CONTINUE + WORK( J ) = SUM + ABS( AP( K ) ) + K = K + 1 + 60 CONTINUE + DO 70 I = 1, N + VALUE = MAX( VALUE, WORK( I ) ) + 70 CONTINUE + ELSE + DO 80 I = 1, N + WORK( I ) = ZERO + 80 CONTINUE + DO 100 J = 1, N + SUM = WORK( J ) + ABS( AP( K ) ) + K = K + 1 + DO 90 I = J + 1, N + ABSA = ABS( AP( K ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + K = K + 1 + 90 CONTINUE + VALUE = MAX( VALUE, SUM ) + 100 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + K = 2 + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 2, N + CALL ZLASSQ( J-1, AP( K ), 1, SCALE, SUM ) + K = K + J + 110 CONTINUE + ELSE + DO 120 J = 1, N - 1 + CALL ZLASSQ( N-J, AP( K ), 1, SCALE, SUM ) + K = K + N - J + 1 + 120 CONTINUE + END IF + SUM = 2*SUM + K = 1 + DO 130 I = 1, N + IF( DBLE( AP( K ) ).NE.ZERO ) THEN + ABSA = ABS( DBLE( AP( K ) ) ) + IF( SCALE.LT.ABSA ) THEN + SUM = ONE + SUM*( SCALE / ABSA )**2 + SCALE = ABSA + ELSE + SUM = SUM + ( ABSA / SCALE )**2 + END IF + END IF + IF( DIMAG( AP( K ) ).NE.ZERO ) THEN + ABSA = ABS( DIMAG( AP( K ) ) ) + IF( SCALE.LT.ABSA ) THEN + SUM = ONE + SUM*( SCALE / ABSA )**2 + SCALE = ABSA + ELSE + SUM = SUM + ( ABSA / SCALE )**2 + END IF + END IF + IF( LSAME( UPLO, 'U' ) ) THEN + K = K + I + 1 + ELSE + K = K + N - I + 1 + END IF + 130 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + ZLANSP = VALUE + RETURN +* +* End of ZLANSP +* + END diff --git a/costa/native/external/lapack/zlansy.f b/costa/native/external/lapack/zlansy.f new file mode 100644 index 000000000..571783614 --- /dev/null +++ b/costa/native/external/lapack/zlansy.f @@ -0,0 +1,175 @@ + DOUBLE PRECISION FUNCTION ZLANSY( NORM, UPLO, N, A, LDA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM, UPLO + INTEGER LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION WORK( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* ZLANSY returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* complex symmetric matrix A. +* +* Description +* =========== +* +* ZLANSY returns the value +* +* ZLANSY = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in ZLANSY as described +* above. +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is to be referenced. +* = 'U': Upper triangular part of A is referenced +* = 'L': Lower triangular part of A is referenced +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, ZLANSY is +* set to zero. +* +* A (input) COMPLEX*16 array, dimension (LDA,N) +* The symmetric matrix A. If UPLO = 'U', the leading n by n +* upper triangular part of A contains the upper triangular part +* of the matrix A, and the strictly lower triangular part of A +* is not referenced. If UPLO = 'L', the leading n by n lower +* triangular part of A contains the lower triangular part of +* the matrix A, and the strictly upper triangular part of A is +* not referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(N,1). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (LWORK), +* where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +* WORK is not referenced. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION ABSA, SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, J + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = J, N + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is symmetric). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + SUM = ZERO + DO 50 I = 1, J - 1 + ABSA = ABS( A( I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 50 CONTINUE + WORK( J ) = SUM + ABS( A( J, J ) ) + 60 CONTINUE + DO 70 I = 1, N + VALUE = MAX( VALUE, WORK( I ) ) + 70 CONTINUE + ELSE + DO 80 I = 1, N + WORK( I ) = ZERO + 80 CONTINUE + DO 100 J = 1, N + SUM = WORK( J ) + ABS( A( J, J ) ) + DO 90 I = J + 1, N + ABSA = ABS( A( I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 90 CONTINUE + VALUE = MAX( VALUE, SUM ) + 100 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 2, N + CALL ZLASSQ( J-1, A( 1, J ), 1, SCALE, SUM ) + 110 CONTINUE + ELSE + DO 120 J = 1, N - 1 + CALL ZLASSQ( N-J, A( J+1, J ), 1, SCALE, SUM ) + 120 CONTINUE + END IF + SUM = 2*SUM + CALL ZLASSQ( N, A, LDA+1, SCALE, SUM ) + VALUE = SCALE*SQRT( SUM ) + END IF +* + ZLANSY = VALUE + RETURN +* +* End of ZLANSY +* + END diff --git a/costa/native/external/lapack/zlantb.f b/costa/native/external/lapack/zlantb.f new file mode 100644 index 000000000..b6ffac6c3 --- /dev/null +++ b/costa/native/external/lapack/zlantb.f @@ -0,0 +1,286 @@ + DOUBLE PRECISION FUNCTION ZLANTB( NORM, UPLO, DIAG, N, K, AB, + $ LDAB, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER K, LDAB, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION WORK( * ) + COMPLEX*16 AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* ZLANTB returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of an +* n by n triangular band matrix A, with ( k + 1 ) diagonals. +* +* Description +* =========== +* +* ZLANTB returns the value +* +* ZLANTB = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in ZLANTB as described +* above. +* +* UPLO (input) CHARACTER*1 +* Specifies whether the matrix A is upper or lower triangular. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* DIAG (input) CHARACTER*1 +* Specifies whether or not the matrix A is unit triangular. +* = 'N': Non-unit triangular +* = 'U': Unit triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, ZLANTB is +* set to zero. +* +* K (input) INTEGER +* The number of super-diagonals of the matrix A if UPLO = 'U', +* or the number of sub-diagonals of the matrix A if UPLO = 'L'. +* K >= 0. +* +* AB (input) COMPLEX*16 array, dimension (LDAB,N) +* The upper or lower triangular band matrix A, stored in the +* first k+1 rows of AB. The j-th column of A is stored +* in the j-th column of the array AB as follows: +* if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+k). +* Note that when DIAG = 'U', the elements of the array AB +* corresponding to the diagonal elements of the matrix A are +* not referenced, but are assumed to be one. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= K+1. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (LWORK), +* where LWORK >= N when NORM = 'I'; otherwise, WORK is not +* referenced. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UDIAG + INTEGER I, J, L + DOUBLE PRECISION SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + IF( LSAME( DIAG, 'U' ) ) THEN + VALUE = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = MAX( K+2-J, 1 ), K + VALUE = MAX( VALUE, ABS( AB( I, J ) ) ) + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = 2, MIN( N+1-J, K+1 ) + VALUE = MAX( VALUE, ABS( AB( I, J ) ) ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + DO 50 I = MAX( K+2-J, 1 ), K + 1 + VALUE = MAX( VALUE, ABS( AB( I, J ) ) ) + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80 J = 1, N + DO 70 I = 1, MIN( N+1-J, K+1 ) + VALUE = MAX( VALUE, ABS( AB( I, J ) ) ) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + UDIAG = LSAME( DIAG, 'U' ) + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 90 I = MAX( K+2-J, 1 ), K + SUM = SUM + ABS( AB( I, J ) ) + 90 CONTINUE + ELSE + SUM = ZERO + DO 100 I = MAX( K+2-J, 1 ), K + 1 + SUM = SUM + ABS( AB( I, J ) ) + 100 CONTINUE + END IF + VALUE = MAX( VALUE, SUM ) + 110 CONTINUE + ELSE + DO 140 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 120 I = 2, MIN( N+1-J, K+1 ) + SUM = SUM + ABS( AB( I, J ) ) + 120 CONTINUE + ELSE + SUM = ZERO + DO 130 I = 1, MIN( N+1-J, K+1 ) + SUM = SUM + ABS( AB( I, J ) ) + 130 CONTINUE + END IF + VALUE = MAX( VALUE, SUM ) + 140 CONTINUE + END IF + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + DO 150 I = 1, N + WORK( I ) = ONE + 150 CONTINUE + DO 170 J = 1, N + L = K + 1 - J + DO 160 I = MAX( 1, J-K ), J - 1 + WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) ) + 160 CONTINUE + 170 CONTINUE + ELSE + DO 180 I = 1, N + WORK( I ) = ZERO + 180 CONTINUE + DO 200 J = 1, N + L = K + 1 - J + DO 190 I = MAX( 1, J-K ), J + WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) ) + 190 CONTINUE + 200 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + DO 210 I = 1, N + WORK( I ) = ONE + 210 CONTINUE + DO 230 J = 1, N + L = 1 - J + DO 220 I = J + 1, MIN( N, J+K ) + WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) ) + 220 CONTINUE + 230 CONTINUE + ELSE + DO 240 I = 1, N + WORK( I ) = ZERO + 240 CONTINUE + DO 260 J = 1, N + L = 1 - J + DO 250 I = J, MIN( N, J+K ) + WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) ) + 250 CONTINUE + 260 CONTINUE + END IF + END IF + DO 270 I = 1, N + VALUE = MAX( VALUE, WORK( I ) ) + 270 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = N + IF( K.GT.0 ) THEN + DO 280 J = 2, N + CALL ZLASSQ( MIN( J-1, K ), + $ AB( MAX( K+2-J, 1 ), J ), 1, SCALE, + $ SUM ) + 280 CONTINUE + END IF + ELSE + SCALE = ZERO + SUM = ONE + DO 290 J = 1, N + CALL ZLASSQ( MIN( J, K+1 ), AB( MAX( K+2-J, 1 ), J ), + $ 1, SCALE, SUM ) + 290 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = N + IF( K.GT.0 ) THEN + DO 300 J = 1, N - 1 + CALL ZLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE, + $ SUM ) + 300 CONTINUE + END IF + ELSE + SCALE = ZERO + SUM = ONE + DO 310 J = 1, N + CALL ZLASSQ( MIN( N-J+1, K+1 ), AB( 1, J ), 1, SCALE, + $ SUM ) + 310 CONTINUE + END IF + END IF + VALUE = SCALE*SQRT( SUM ) + END IF +* + ZLANTB = VALUE + RETURN +* +* End of ZLANTB +* + END diff --git a/costa/native/external/lapack/zlantp.f b/costa/native/external/lapack/zlantp.f new file mode 100644 index 000000000..e2483d8d2 --- /dev/null +++ b/costa/native/external/lapack/zlantp.f @@ -0,0 +1,287 @@ + DOUBLE PRECISION FUNCTION ZLANTP( NORM, UPLO, DIAG, N, AP, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER N +* .. +* .. Array Arguments .. + DOUBLE PRECISION WORK( * ) + COMPLEX*16 AP( * ) +* .. +* +* Purpose +* ======= +* +* ZLANTP returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* triangular matrix A, supplied in packed form. +* +* Description +* =========== +* +* ZLANTP returns the value +* +* ZLANTP = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in ZLANTP as described +* above. +* +* UPLO (input) CHARACTER*1 +* Specifies whether the matrix A is upper or lower triangular. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* DIAG (input) CHARACTER*1 +* Specifies whether or not the matrix A is unit triangular. +* = 'N': Non-unit triangular +* = 'U': Unit triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. When N = 0, ZLANTP is +* set to zero. +* +* AP (input) COMPLEX*16 array, dimension (N*(N+1)/2) +* The upper or lower triangular matrix A, packed columnwise in +* a linear array. The j-th column of A is stored in the array +* AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* Note that when DIAG = 'U', the elements of the array AP +* corresponding to the diagonal elements of the matrix A are +* not referenced, but are assumed to be one. +* +* WORK (workspace) DOUBLE PRECISION array, dimension (LWORK), +* where LWORK >= N when NORM = 'I'; otherwise, WORK is not +* referenced. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UDIAG + INTEGER I, J, K + DOUBLE PRECISION SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + K = 1 + IF( LSAME( DIAG, 'U' ) ) THEN + VALUE = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = K, K + J - 2 + VALUE = MAX( VALUE, ABS( AP( I ) ) ) + 10 CONTINUE + K = K + J + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = K + 1, K + N - J + VALUE = MAX( VALUE, ABS( AP( I ) ) ) + 30 CONTINUE + K = K + N - J + 1 + 40 CONTINUE + END IF + ELSE + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + DO 50 I = K, K + J - 1 + VALUE = MAX( VALUE, ABS( AP( I ) ) ) + 50 CONTINUE + K = K + J + 60 CONTINUE + ELSE + DO 80 J = 1, N + DO 70 I = K, K + N - J + VALUE = MAX( VALUE, ABS( AP( I ) ) ) + 70 CONTINUE + K = K + N - J + 1 + 80 CONTINUE + END IF + END IF + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + K = 1 + UDIAG = LSAME( DIAG, 'U' ) + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 90 I = K, K + J - 2 + SUM = SUM + ABS( AP( I ) ) + 90 CONTINUE + ELSE + SUM = ZERO + DO 100 I = K, K + J - 1 + SUM = SUM + ABS( AP( I ) ) + 100 CONTINUE + END IF + K = K + J + VALUE = MAX( VALUE, SUM ) + 110 CONTINUE + ELSE + DO 140 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 120 I = K + 1, K + N - J + SUM = SUM + ABS( AP( I ) ) + 120 CONTINUE + ELSE + SUM = ZERO + DO 130 I = K, K + N - J + SUM = SUM + ABS( AP( I ) ) + 130 CONTINUE + END IF + K = K + N - J + 1 + VALUE = MAX( VALUE, SUM ) + 140 CONTINUE + END IF + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + K = 1 + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + DO 150 I = 1, N + WORK( I ) = ONE + 150 CONTINUE + DO 170 J = 1, N + DO 160 I = 1, J - 1 + WORK( I ) = WORK( I ) + ABS( AP( K ) ) + K = K + 1 + 160 CONTINUE + K = K + 1 + 170 CONTINUE + ELSE + DO 180 I = 1, N + WORK( I ) = ZERO + 180 CONTINUE + DO 200 J = 1, N + DO 190 I = 1, J + WORK( I ) = WORK( I ) + ABS( AP( K ) ) + K = K + 1 + 190 CONTINUE + 200 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + DO 210 I = 1, N + WORK( I ) = ONE + 210 CONTINUE + DO 230 J = 1, N + K = K + 1 + DO 220 I = J + 1, N + WORK( I ) = WORK( I ) + ABS( AP( K ) ) + K = K + 1 + 220 CONTINUE + 230 CONTINUE + ELSE + DO 240 I = 1, N + WORK( I ) = ZERO + 240 CONTINUE + DO 260 J = 1, N + DO 250 I = J, N + WORK( I ) = WORK( I ) + ABS( AP( K ) ) + K = K + 1 + 250 CONTINUE + 260 CONTINUE + END IF + END IF + VALUE = ZERO + DO 270 I = 1, N + VALUE = MAX( VALUE, WORK( I ) ) + 270 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = N + K = 2 + DO 280 J = 2, N + CALL ZLASSQ( J-1, AP( K ), 1, SCALE, SUM ) + K = K + J + 280 CONTINUE + ELSE + SCALE = ZERO + SUM = ONE + K = 1 + DO 290 J = 1, N + CALL ZLASSQ( J, AP( K ), 1, SCALE, SUM ) + K = K + J + 290 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = N + K = 2 + DO 300 J = 1, N - 1 + CALL ZLASSQ( N-J, AP( K ), 1, SCALE, SUM ) + K = K + N - J + 1 + 300 CONTINUE + ELSE + SCALE = ZERO + SUM = ONE + K = 1 + DO 310 J = 1, N + CALL ZLASSQ( N-J+1, AP( K ), 1, SCALE, SUM ) + K = K + N - J + 1 + 310 CONTINUE + END IF + END IF + VALUE = SCALE*SQRT( SUM ) + END IF +* + ZLANTP = VALUE + RETURN +* +* End of ZLANTP +* + END diff --git a/costa/native/external/lapack/zlantr.f b/costa/native/external/lapack/zlantr.f new file mode 100644 index 000000000..1cb7efce2 --- /dev/null +++ b/costa/native/external/lapack/zlantr.f @@ -0,0 +1,278 @@ + DOUBLE PRECISION FUNCTION ZLANTR( NORM, UPLO, DIAG, M, N, A, LDA, + $ WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION WORK( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* ZLANTR returns the value of the one norm, or the Frobenius norm, or +* the infinity norm, or the element of largest absolute value of a +* trapezoidal or triangular matrix A. +* +* Description +* =========== +* +* ZLANTR returns the value +* +* ZLANTR = ( max(abs(A(i,j))), NORM = 'M' or 'm' +* ( +* ( norm1(A), NORM = '1', 'O' or 'o' +* ( +* ( normI(A), NORM = 'I' or 'i' +* ( +* ( normF(A), NORM = 'F', 'f', 'E' or 'e' +* +* where norm1 denotes the one norm of a matrix (maximum column sum), +* normI denotes the infinity norm of a matrix (maximum row sum) and +* normF denotes the Frobenius norm of a matrix (square root of sum of +* squares). Note that max(abs(A(i,j))) is not a matrix norm. +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies the value to be returned in ZLANTR as described +* above. +* +* UPLO (input) CHARACTER*1 +* Specifies whether the matrix A is upper or lower trapezoidal. +* = 'U': Upper trapezoidal +* = 'L': Lower trapezoidal +* Note that A is triangular instead of trapezoidal if M = N. +* +* DIAG (input) CHARACTER*1 +* Specifies whether or not the matrix A has unit diagonal. +* = 'N': Non-unit diagonal +* = 'U': Unit diagonal +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0, and if +* UPLO = 'U', M <= N. When M = 0, ZLANTR is set to zero. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0, and if +* UPLO = 'L', N <= M. When N = 0, ZLANTR is set to zero. +* +* A (input) COMPLEX*16 array, dimension (LDA,N) +* The trapezoidal matrix A (A is triangular if M = N). +* If UPLO = 'U', the leading m by n upper trapezoidal part of +* the array A contains the upper trapezoidal matrix, and the +* strictly lower triangular part of A is not referenced. +* If UPLO = 'L', the leading m by n lower trapezoidal part of +* the array A contains the lower trapezoidal matrix, and the +* strictly upper triangular part of A is not referenced. Note +* that when DIAG = 'U', the diagonal elements of A are not +* referenced and are assumed to be one. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(M,1). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (LWORK), +* where LWORK >= M when NORM = 'I'; otherwise, WORK is not +* referenced. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UDIAG + INTEGER I, J + DOUBLE PRECISION SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( MIN( M, N ).EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + IF( LSAME( DIAG, 'U' ) ) THEN + VALUE = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, MIN( M, J-1 ) + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = J + 1, M + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + DO 50 I = 1, MIN( M, J ) + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80 J = 1, N + DO 70 I = J, M + VALUE = MAX( VALUE, ABS( A( I, J ) ) ) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + UDIAG = LSAME( DIAG, 'U' ) + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 1, N + IF( ( UDIAG ) .AND. ( J.LE.M ) ) THEN + SUM = ONE + DO 90 I = 1, J - 1 + SUM = SUM + ABS( A( I, J ) ) + 90 CONTINUE + ELSE + SUM = ZERO + DO 100 I = 1, MIN( M, J ) + SUM = SUM + ABS( A( I, J ) ) + 100 CONTINUE + END IF + VALUE = MAX( VALUE, SUM ) + 110 CONTINUE + ELSE + DO 140 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 120 I = J + 1, M + SUM = SUM + ABS( A( I, J ) ) + 120 CONTINUE + ELSE + SUM = ZERO + DO 130 I = J, M + SUM = SUM + ABS( A( I, J ) ) + 130 CONTINUE + END IF + VALUE = MAX( VALUE, SUM ) + 140 CONTINUE + END IF + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + DO 150 I = 1, M + WORK( I ) = ONE + 150 CONTINUE + DO 170 J = 1, N + DO 160 I = 1, MIN( M, J-1 ) + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 160 CONTINUE + 170 CONTINUE + ELSE + DO 180 I = 1, M + WORK( I ) = ZERO + 180 CONTINUE + DO 200 J = 1, N + DO 190 I = 1, MIN( M, J ) + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 190 CONTINUE + 200 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + DO 210 I = 1, N + WORK( I ) = ONE + 210 CONTINUE + DO 220 I = N + 1, M + WORK( I ) = ZERO + 220 CONTINUE + DO 240 J = 1, N + DO 230 I = J + 1, M + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 230 CONTINUE + 240 CONTINUE + ELSE + DO 250 I = 1, M + WORK( I ) = ZERO + 250 CONTINUE + DO 270 J = 1, N + DO 260 I = J, M + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 260 CONTINUE + 270 CONTINUE + END IF + END IF + VALUE = ZERO + DO 280 I = 1, M + VALUE = MAX( VALUE, WORK( I ) ) + 280 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = MIN( M, N ) + DO 290 J = 2, N + CALL ZLASSQ( MIN( M, J-1 ), A( 1, J ), 1, SCALE, SUM ) + 290 CONTINUE + ELSE + SCALE = ZERO + SUM = ONE + DO 300 J = 1, N + CALL ZLASSQ( MIN( M, J ), A( 1, J ), 1, SCALE, SUM ) + 300 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = MIN( M, N ) + DO 310 J = 1, N + CALL ZLASSQ( M-J, A( MIN( M, J+1 ), J ), 1, SCALE, + $ SUM ) + 310 CONTINUE + ELSE + SCALE = ZERO + SUM = ONE + DO 320 J = 1, N + CALL ZLASSQ( M-J+1, A( J, J ), 1, SCALE, SUM ) + 320 CONTINUE + END IF + END IF + VALUE = SCALE*SQRT( SUM ) + END IF +* + ZLANTR = VALUE + RETURN +* +* End of ZLANTR +* + END diff --git a/costa/native/external/lapack/zlapll.f b/costa/native/external/lapack/zlapll.f new file mode 100644 index 000000000..014085eaf --- /dev/null +++ b/costa/native/external/lapack/zlapll.f @@ -0,0 +1,104 @@ + SUBROUTINE ZLAPLL( N, X, INCX, Y, INCY, SSMIN ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INCX, INCY, N + DOUBLE PRECISION SSMIN +* .. +* .. Array Arguments .. + COMPLEX*16 X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* Given two column vectors X and Y, let +* +* A = ( X Y ). +* +* The subroutine first computes the QR factorization of A = Q*R, +* and then computes the SVD of the 2-by-2 upper triangular matrix R. +* The smaller singular value of R is returned in SSMIN, which is used +* as the measurement of the linear dependency of the vectors X and Y. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The length of the vectors X and Y. +* +* X (input/output) COMPLEX*16 array, dimension (1+(N-1)*INCX) +* On entry, X contains the N-vector X. +* On exit, X is overwritten. +* +* INCX (input) INTEGER +* The increment between successive elements of X. INCX > 0. +* +* Y (input/output) COMPLEX*16 array, dimension (1+(N-1)*INCY) +* On entry, Y contains the N-vector Y. +* On exit, Y is overwritten. +* +* INCY (input) INTEGER +* The increment between successive elements of Y. INCY > 0. +* +* SSMIN (output) DOUBLE PRECISION +* The smallest singular value of the N-by-2 matrix A = ( X Y ). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION SSMAX + COMPLEX*16 A11, A12, A22, C, TAU +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DCONJG +* .. +* .. External Functions .. + COMPLEX*16 ZDOTC + EXTERNAL ZDOTC +* .. +* .. External Subroutines .. + EXTERNAL DLAS2, ZAXPY, ZLARFG +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.1 ) THEN + SSMIN = ZERO + RETURN + END IF +* +* Compute the QR factorization of the N-by-2 matrix ( X Y ) +* + CALL ZLARFG( N, X( 1 ), X( 1+INCX ), INCX, TAU ) + A11 = X( 1 ) + X( 1 ) = CONE +* + C = -DCONJG( TAU )*ZDOTC( N, X, INCX, Y, INCY ) + CALL ZAXPY( N, C, X, INCX, Y, INCY ) +* + CALL ZLARFG( N-1, Y( 1+INCY ), Y( 1+2*INCY ), INCY, TAU ) +* + A12 = Y( 1 ) + A22 = Y( 1+INCY ) +* +* Compute the SVD of 2-by-2 Upper triangular matrix. +* + CALL DLAS2( ABS( A11 ), ABS( A12 ), ABS( A22 ), SSMIN, SSMAX ) +* + RETURN +* +* End of ZLAPLL +* + END diff --git a/costa/native/external/lapack/zlapmt.f b/costa/native/external/lapack/zlapmt.f new file mode 100644 index 000000000..c0efd5a8c --- /dev/null +++ b/costa/native/external/lapack/zlapmt.f @@ -0,0 +1,135 @@ + SUBROUTINE ZLAPMT( FORWRD, M, N, X, LDX, K ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + LOGICAL FORWRD + INTEGER LDX, M, N +* .. +* .. Array Arguments .. + INTEGER K( * ) + COMPLEX*16 X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* ZLAPMT rearranges the columns of the M by N matrix X as specified +* by the permutation K(1),K(2),...,K(N) of the integers 1,...,N. +* If FORWRD = .TRUE., forward permutation: +* +* X(*,K(J)) is moved X(*,J) for J = 1,2,...,N. +* +* If FORWRD = .FALSE., backward permutation: +* +* X(*,J) is moved to X(*,K(J)) for J = 1,2,...,N. +* +* Arguments +* ========= +* +* FORWRD (input) LOGICAL +* = .TRUE., forward permutation +* = .FALSE., backward permutation +* +* M (input) INTEGER +* The number of rows of the matrix X. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix X. N >= 0. +* +* X (input/output) COMPLEX*16 array, dimension (LDX,N) +* On entry, the M by N matrix X. +* On exit, X contains the permuted matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X, LDX >= MAX(1,M). +* +* K (input) INTEGER array, dimension (N) +* On entry, K contains the permutation vector. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, II, IN, J + COMPLEX*16 TEMP +* .. +* .. Executable Statements .. +* + IF( N.LE.1 ) + $ RETURN +* + DO 10 I = 1, N + K( I ) = -K( I ) + 10 CONTINUE +* + IF( FORWRD ) THEN +* +* Forward permutation +* + DO 50 I = 1, N +* + IF( K( I ).GT.0 ) + $ GO TO 40 +* + J = I + K( J ) = -K( J ) + IN = K( J ) +* + 20 CONTINUE + IF( K( IN ).GT.0 ) + $ GO TO 40 +* + DO 30 II = 1, M + TEMP = X( II, J ) + X( II, J ) = X( II, IN ) + X( II, IN ) = TEMP + 30 CONTINUE +* + K( IN ) = -K( IN ) + J = IN + IN = K( IN ) + GO TO 20 +* + 40 CONTINUE +* + 50 CONTINUE +* + ELSE +* +* Backward permutation +* + DO 90 I = 1, N +* + IF( K( I ).GT.0 ) + $ GO TO 80 +* + K( I ) = -K( I ) + J = K( I ) + 60 CONTINUE + IF( J.EQ.I ) + $ GO TO 80 +* + DO 70 II = 1, M + TEMP = X( II, I ) + X( II, I ) = X( II, J ) + X( II, J ) = TEMP + 70 CONTINUE +* + K( J ) = -K( J ) + J = K( J ) + GO TO 60 +* + 80 CONTINUE +* + 90 CONTINUE +* + END IF +* + RETURN +* +* End of ZLAPMT +* + END diff --git a/costa/native/external/lapack/zlaqgb.f b/costa/native/external/lapack/zlaqgb.f new file mode 100644 index 000000000..fdce88bd4 --- /dev/null +++ b/costa/native/external/lapack/zlaqgb.f @@ -0,0 +1,170 @@ + SUBROUTINE ZLAQGB( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER EQUED + INTEGER KL, KU, LDAB, M, N + DOUBLE PRECISION AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( * ), R( * ) + COMPLEX*16 AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* ZLAQGB equilibrates a general M by N band matrix A with KL +* subdiagonals and KU superdiagonals using the row and scaling factors +* in the vectors R and C. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals within the band of A. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals within the band of A. KU >= 0. +* +* AB (input/output) COMPLEX*16 array, dimension (LDAB,N) +* On entry, the matrix A in band storage, in rows 1 to KL+KU+1. +* The j-th column of A is stored in the j-th column of the +* array AB as follows: +* AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) +* +* On exit, the equilibrated matrix, in the same storage format +* as A. See EQUED for the form of the equilibrated matrix. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDA >= KL+KU+1. +* +* R (output) DOUBLE PRECISION array, dimension (M) +* The row scale factors for A. +* +* C (output) DOUBLE PRECISION array, dimension (N) +* The column scale factors for A. +* +* ROWCND (output) DOUBLE PRECISION +* Ratio of the smallest R(i) to the largest R(i). +* +* COLCND (output) DOUBLE PRECISION +* Ratio of the smallest C(i) to the largest C(i). +* +* AMAX (input) DOUBLE PRECISION +* Absolute value of largest matrix entry. +* +* EQUED (output) CHARACTER*1 +* Specifies the form of equilibration that was done. +* = 'N': No equilibration +* = 'R': Row equilibration, i.e., A has been premultiplied by +* diag(R). +* = 'C': Column equilibration, i.e., A has been postmultiplied +* by diag(C). +* = 'B': Both row and column equilibration, i.e., A has been +* replaced by diag(R) * A * diag(C). +* +* Internal Parameters +* =================== +* +* THRESH is a threshold value used to decide if row or column scaling +* should be done based on the ratio of the row or column scaling +* factors. If ROWCND < THRESH, row scaling is done, and if +* COLCND < THRESH, column scaling is done. +* +* LARGE and SMALL are threshold values used to decide if row scaling +* should be done based on the absolute size of the largest matrix +* element. If AMAX > LARGE or AMAX < SMALL, row scaling is done. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, THRESH + PARAMETER ( ONE = 1.0D+0, THRESH = 0.1D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION CJ, LARGE, SMALL +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( ROWCND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) + $ THEN +* +* No row scaling +* + IF( COLCND.GE.THRESH ) THEN +* +* No column scaling +* + EQUED = 'N' + ELSE +* +* Column scaling +* + DO 20 J = 1, N + CJ = C( J ) + DO 10 I = MAX( 1, J-KU ), MIN( M, J+KL ) + AB( KU+1+I-J, J ) = CJ*AB( KU+1+I-J, J ) + 10 CONTINUE + 20 CONTINUE + EQUED = 'C' + END IF + ELSE IF( COLCND.GE.THRESH ) THEN +* +* Row scaling, no column scaling +* + DO 40 J = 1, N + DO 30 I = MAX( 1, J-KU ), MIN( M, J+KL ) + AB( KU+1+I-J, J ) = R( I )*AB( KU+1+I-J, J ) + 30 CONTINUE + 40 CONTINUE + EQUED = 'R' + ELSE +* +* Row and column scaling +* + DO 60 J = 1, N + CJ = C( J ) + DO 50 I = MAX( 1, J-KU ), MIN( M, J+KL ) + AB( KU+1+I-J, J ) = CJ*R( I )*AB( KU+1+I-J, J ) + 50 CONTINUE + 60 CONTINUE + EQUED = 'B' + END IF +* + RETURN +* +* End of ZLAQGB +* + END diff --git a/costa/native/external/lapack/zlaqge.f b/costa/native/external/lapack/zlaqge.f new file mode 100644 index 000000000..162b38b4b --- /dev/null +++ b/costa/native/external/lapack/zlaqge.f @@ -0,0 +1,156 @@ + SUBROUTINE ZLAQGE( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, + $ EQUED ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER EQUED + INTEGER LDA, M, N + DOUBLE PRECISION AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( * ), R( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* ZLAQGE equilibrates a general M by N matrix A using the row and +* scaling factors in the vectors R and C. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the M by N matrix A. +* On exit, the equilibrated matrix. See EQUED for the form of +* the equilibrated matrix. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(M,1). +* +* R (input) DOUBLE PRECISION array, dimension (M) +* The row scale factors for A. +* +* C (input) DOUBLE PRECISION array, dimension (N) +* The column scale factors for A. +* +* ROWCND (input) DOUBLE PRECISION +* Ratio of the smallest R(i) to the largest R(i). +* +* COLCND (input) DOUBLE PRECISION +* Ratio of the smallest C(i) to the largest C(i). +* +* AMAX (input) DOUBLE PRECISION +* Absolute value of largest matrix entry. +* +* EQUED (output) CHARACTER*1 +* Specifies the form of equilibration that was done. +* = 'N': No equilibration +* = 'R': Row equilibration, i.e., A has been premultiplied by +* diag(R). +* = 'C': Column equilibration, i.e., A has been postmultiplied +* by diag(C). +* = 'B': Both row and column equilibration, i.e., A has been +* replaced by diag(R) * A * diag(C). +* +* Internal Parameters +* =================== +* +* THRESH is a threshold value used to decide if row or column scaling +* should be done based on the ratio of the row or column scaling +* factors. If ROWCND < THRESH, row scaling is done, and if +* COLCND < THRESH, column scaling is done. +* +* LARGE and SMALL are threshold values used to decide if row scaling +* should be done based on the absolute size of the largest matrix +* element. If AMAX > LARGE or AMAX < SMALL, row scaling is done. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, THRESH + PARAMETER ( ONE = 1.0D+0, THRESH = 0.1D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION CJ, LARGE, SMALL +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( ROWCND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) + $ THEN +* +* No row scaling +* + IF( COLCND.GE.THRESH ) THEN +* +* No column scaling +* + EQUED = 'N' + ELSE +* +* Column scaling +* + DO 20 J = 1, N + CJ = C( J ) + DO 10 I = 1, M + A( I, J ) = CJ*A( I, J ) + 10 CONTINUE + 20 CONTINUE + EQUED = 'C' + END IF + ELSE IF( COLCND.GE.THRESH ) THEN +* +* Row scaling, no column scaling +* + DO 40 J = 1, N + DO 30 I = 1, M + A( I, J ) = R( I )*A( I, J ) + 30 CONTINUE + 40 CONTINUE + EQUED = 'R' + ELSE +* +* Row and column scaling +* + DO 60 J = 1, N + CJ = C( J ) + DO 50 I = 1, M + A( I, J ) = CJ*R( I )*A( I, J ) + 50 CONTINUE + 60 CONTINUE + EQUED = 'B' + END IF +* + RETURN +* +* End of ZLAQGE +* + END diff --git a/costa/native/external/lapack/zlaqhb.f b/costa/native/external/lapack/zlaqhb.f new file mode 100644 index 000000000..c891c3a67 --- /dev/null +++ b/costa/native/external/lapack/zlaqhb.f @@ -0,0 +1,152 @@ + SUBROUTINE ZLAQHB( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER EQUED, UPLO + INTEGER KD, LDAB, N + DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION S( * ) + COMPLEX*16 AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* ZLAQHB equilibrates a symmetric band matrix A using the scaling +* factors in the vector S. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is stored. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of super-diagonals of the matrix A if UPLO = 'U', +* or the number of sub-diagonals if UPLO = 'L'. KD >= 0. +* +* AB (input/output) COMPLEX*16 array, dimension (LDAB,N) +* On entry, the upper or lower triangle of the symmetric band +* matrix A, stored in the first KD+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* On exit, if INFO = 0, the triangular factor U or L from the +* Cholesky factorization A = U'*U or A = L*L' of the band +* matrix A, in the same storage format as A. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* S (output) DOUBLE PRECISION array, dimension (N) +* The scale factors for A. +* +* SCOND (input) DOUBLE PRECISION +* Ratio of the smallest S(i) to the largest S(i). +* +* AMAX (input) DOUBLE PRECISION +* Absolute value of largest matrix entry. +* +* EQUED (output) CHARACTER*1 +* Specifies whether or not equilibration was done. +* = 'N': No equilibration. +* = 'Y': Equilibration was done, i.e., A has been replaced by +* diag(S) * A * diag(S). +* +* Internal Parameters +* =================== +* +* THRESH is a threshold value used to decide if scaling should be done +* based on the ratio of the scaling factors. If SCOND < THRESH, +* scaling is done. +* +* LARGE and SMALL are threshold values used to decide if scaling should +* be done based on the absolute size of the largest matrix element. +* If AMAX > LARGE or AMAX < SMALL, scaling is done. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, THRESH + PARAMETER ( ONE = 1.0D+0, THRESH = 0.1D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION CJ, LARGE, SMALL +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN +* +* No equilibration +* + EQUED = 'N' + ELSE +* +* Replace A by diag(S) * A * diag(S). +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Upper triangle of A is stored in band format. +* + DO 20 J = 1, N + CJ = S( J ) + DO 10 I = MAX( 1, J-KD ), J - 1 + AB( KD+1+I-J, J ) = CJ*S( I )*AB( KD+1+I-J, J ) + 10 CONTINUE + AB( KD+1, J ) = CJ*CJ*DBLE( AB( KD+1, J ) ) + 20 CONTINUE + ELSE +* +* Lower triangle of A is stored. +* + DO 40 J = 1, N + CJ = S( J ) + AB( 1, J ) = CJ*CJ*DBLE( AB( 1, J ) ) + DO 30 I = J + 1, MIN( N, J+KD ) + AB( 1+I-J, J ) = CJ*S( I )*AB( 1+I-J, J ) + 30 CONTINUE + 40 CONTINUE + END IF + EQUED = 'Y' + END IF +* + RETURN +* +* End of ZLAQHB +* + END diff --git a/costa/native/external/lapack/zlaqhe.f b/costa/native/external/lapack/zlaqhe.f new file mode 100644 index 000000000..3eb2a093b --- /dev/null +++ b/costa/native/external/lapack/zlaqhe.f @@ -0,0 +1,148 @@ + SUBROUTINE ZLAQHE( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER EQUED, UPLO + INTEGER LDA, N + DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION S( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* ZLAQHE equilibrates a Hermitian matrix A using the scaling factors +* in the vector S. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* Hermitian matrix A is stored. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the leading +* n by n upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n by n lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, if EQUED = 'Y', the equilibrated matrix: +* diag(S) * A * diag(S). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(N,1). +* +* S (input) DOUBLE PRECISION array, dimension (N) +* The scale factors for A. +* +* SCOND (input) DOUBLE PRECISION +* Ratio of the smallest S(i) to the largest S(i). +* +* AMAX (input) DOUBLE PRECISION +* Absolute value of largest matrix entry. +* +* EQUED (output) CHARACTER*1 +* Specifies whether or not equilibration was done. +* = 'N': No equilibration. +* = 'Y': Equilibration was done, i.e., A has been replaced by +* diag(S) * A * diag(S). +* +* Internal Parameters +* =================== +* +* THRESH is a threshold value used to decide if scaling should be done +* based on the ratio of the scaling factors. If SCOND < THRESH, +* scaling is done. +* +* LARGE and SMALL are threshold values used to decide if scaling should +* be done based on the absolute size of the largest matrix element. +* If AMAX > LARGE or AMAX < SMALL, scaling is done. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, THRESH + PARAMETER ( ONE = 1.0D+0, THRESH = 0.1D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION CJ, LARGE, SMALL +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN +* +* No equilibration +* + EQUED = 'N' + ELSE +* +* Replace A by diag(S) * A * diag(S). +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Upper triangle of A is stored. +* + DO 20 J = 1, N + CJ = S( J ) + DO 10 I = 1, J - 1 + A( I, J ) = CJ*S( I )*A( I, J ) + 10 CONTINUE + A( J, J ) = CJ*CJ*DBLE( A( J, J ) ) + 20 CONTINUE + ELSE +* +* Lower triangle of A is stored. +* + DO 40 J = 1, N + CJ = S( J ) + A( J, J ) = CJ*CJ*DBLE( A( J, J ) ) + DO 30 I = J + 1, N + A( I, J ) = CJ*S( I )*A( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + EQUED = 'Y' + END IF +* + RETURN +* +* End of ZLAQHE +* + END diff --git a/costa/native/external/lapack/zlaqhp.f b/costa/native/external/lapack/zlaqhp.f new file mode 100644 index 000000000..07a2039f4 --- /dev/null +++ b/costa/native/external/lapack/zlaqhp.f @@ -0,0 +1,147 @@ + SUBROUTINE ZLAQHP( UPLO, N, AP, S, SCOND, AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER EQUED, UPLO + INTEGER N + DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION S( * ) + COMPLEX*16 AP( * ) +* .. +* +* Purpose +* ======= +* +* ZLAQHP equilibrates a Hermitian matrix A using the scaling factors +* in the vector S. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* Hermitian matrix A is stored. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) COMPLEX*16 array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the Hermitian matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* +* On exit, the equilibrated matrix: diag(S) * A * diag(S), in +* the same storage format as A. +* +* S (input) DOUBLE PRECISION array, dimension (N) +* The scale factors for A. +* +* SCOND (input) DOUBLE PRECISION +* Ratio of the smallest S(i) to the largest S(i). +* +* AMAX (input) DOUBLE PRECISION +* Absolute value of largest matrix entry. +* +* EQUED (output) CHARACTER*1 +* Specifies whether or not equilibration was done. +* = 'N': No equilibration. +* = 'Y': Equilibration was done, i.e., A has been replaced by +* diag(S) * A * diag(S). +* +* Internal Parameters +* =================== +* +* THRESH is a threshold value used to decide if scaling should be done +* based on the ratio of the scaling factors. If SCOND < THRESH, +* scaling is done. +* +* LARGE and SMALL are threshold values used to decide if scaling should +* be done based on the absolute size of the largest matrix element. +* If AMAX > LARGE or AMAX < SMALL, scaling is done. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, THRESH + PARAMETER ( ONE = 1.0D+0, THRESH = 0.1D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, JC + DOUBLE PRECISION CJ, LARGE, SMALL +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN +* +* No equilibration +* + EQUED = 'N' + ELSE +* +* Replace A by diag(S) * A * diag(S). +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Upper triangle of A is stored. +* + JC = 1 + DO 20 J = 1, N + CJ = S( J ) + DO 10 I = 1, J - 1 + AP( JC+I-1 ) = CJ*S( I )*AP( JC+I-1 ) + 10 CONTINUE + AP( JC+J-1 ) = CJ*CJ*DBLE( AP( JC+J-1 ) ) + JC = JC + J + 20 CONTINUE + ELSE +* +* Lower triangle of A is stored. +* + JC = 1 + DO 40 J = 1, N + CJ = S( J ) + AP( JC ) = CJ*CJ*DBLE( AP( JC ) ) + DO 30 I = J + 1, N + AP( JC+I-J ) = CJ*S( I )*AP( JC+I-J ) + 30 CONTINUE + JC = JC + N - J + 1 + 40 CONTINUE + END IF + EQUED = 'Y' + END IF +* + RETURN +* +* End of ZLAQHP +* + END diff --git a/costa/native/external/lapack/zlaqp2.f b/costa/native/external/lapack/zlaqp2.f new file mode 100644 index 000000000..b7386e1ee --- /dev/null +++ b/costa/native/external/lapack/zlaqp2.f @@ -0,0 +1,170 @@ + SUBROUTINE ZLAQP2( M, N, OFFSET, A, LDA, JPVT, TAU, VN1, VN2, + $ WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER LDA, M, N, OFFSET +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + DOUBLE PRECISION VN1( * ), VN2( * ) + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZLAQP2 computes a QR factorization with column pivoting of +* the block A(OFFSET+1:M,1:N). +* The block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* OFFSET (input) INTEGER +* The number of rows of the matrix A that must be pivoted +* but no factorized. OFFSET >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, the upper triangle of block A(OFFSET+1:M,1:N) is +* the triangular factor obtained; the elements in block +* A(OFFSET+1:M,1:N) below the diagonal, together with the +* array TAU, represent the orthogonal matrix Q as a product of +* elementary reflectors. Block A(1:OFFSET,1:N) has been +* accordingly pivoted, but no factorized. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* JPVT (input/output) INTEGER array, dimension (N) +* On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted +* to the front of A*P (a leading column); if JPVT(i) = 0, +* the i-th column of A is a free column. +* On exit, if JPVT(i) = k, then the i-th column of A*P +* was the k-th column of A. +* +* TAU (output) COMPLEX*16 array, dimension (min(M,N)) +* The scalar factors of the elementary reflectors. +* +* VN1 (input/output) DOUBLE PRECISION array, dimension (N) +* The vector with the partial column norms. +* +* VN2 (input/output) DOUBLE PRECISION array, dimension (N) +* The vector with the exact column norms. +* +* WORK (workspace) COMPLEX*16 array, dimension (N) +* +* Further Details +* =============== +* +* Based on contributions by +* G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +* X. Sun, Computer Science Dept., Duke University, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + COMPLEX*16 CONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, + $ CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, ITEMP, J, MN, OFFPI, PVT + DOUBLE PRECISION TEMP, TEMP2 + COMPLEX*16 AII +* .. +* .. External Subroutines .. + EXTERNAL ZLARF, ZLARFG, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DCONJG, MAX, MIN, SQRT +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DZNRM2 + EXTERNAL IDAMAX, DZNRM2 +* .. +* .. Executable Statements .. +* + MN = MIN( M-OFFSET, N ) +* +* Compute factorization. +* + DO 20 I = 1, MN +* + OFFPI = OFFSET + I +* +* Determine ith pivot column and swap if necessary. +* + PVT = ( I-1 ) + IDAMAX( N-I+1, VN1( I ), 1 ) +* + IF( PVT.NE.I ) THEN + CALL ZSWAP( M, A( 1, PVT ), 1, A( 1, I ), 1 ) + ITEMP = JPVT( PVT ) + JPVT( PVT ) = JPVT( I ) + JPVT( I ) = ITEMP + VN1( PVT ) = VN1( I ) + VN2( PVT ) = VN2( I ) + END IF +* +* Generate elementary reflector H(i). +* + IF( OFFPI.LT.M ) THEN + CALL ZLARFG( M-OFFPI+1, A( OFFPI, I ), A( OFFPI+1, I ), 1, + $ TAU( I ) ) + ELSE + CALL ZLARFG( 1, A( M, I ), A( M, I ), 1, TAU( I ) ) + END IF +* + IF( I.LT.N ) THEN +* +* Apply H(i)' to A(offset+i:m,i+1:n) from the left. +* + AII = A( OFFPI, I ) + A( OFFPI, I ) = CONE + CALL ZLARF( 'Left', M-OFFPI+1, N-I, A( OFFPI, I ), 1, + $ DCONJG( TAU( I ) ), A( OFFPI, I+1 ), LDA, + $ WORK( 1 ) ) + A( OFFPI, I ) = AII + END IF +* +* Update partial column norms. +* + DO 10 J = I + 1, N + IF( VN1( J ).NE.ZERO ) THEN + TEMP = ONE - ( ABS( A( OFFPI, J ) ) / VN1( J ) )**2 + TEMP = MAX( TEMP, ZERO ) + TEMP2 = ONE + 0.05D0*TEMP*( VN1( J ) / VN2( J ) )**2 + IF( TEMP2.EQ.ONE ) THEN + IF( OFFPI.LT.M ) THEN + VN1( J ) = DZNRM2( M-OFFPI, A( OFFPI+1, J ), 1 ) + VN2( J ) = VN1( J ) + ELSE + VN1( J ) = ZERO + VN2( J ) = ZERO + END IF + ELSE + VN1( J ) = VN1( J )*SQRT( TEMP ) + END IF + END IF + 10 CONTINUE +* + 20 CONTINUE +* + RETURN +* +* End of ZLAQP2 +* + END diff --git a/costa/native/external/lapack/zlaqps.f b/costa/native/external/lapack/zlaqps.f new file mode 100644 index 000000000..27b0d8baa --- /dev/null +++ b/costa/native/external/lapack/zlaqps.f @@ -0,0 +1,260 @@ + SUBROUTINE ZLAQPS( M, N, OFFSET, NB, KB, A, LDA, JPVT, TAU, VN1, + $ VN2, AUXV, F, LDF ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER KB, LDA, LDF, M, N, NB, OFFSET +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + DOUBLE PRECISION VN1( * ), VN2( * ) + COMPLEX*16 A( LDA, * ), AUXV( * ), F( LDF, * ), TAU( * ) +* .. +* +* Purpose +* ======= +* +* ZLAQPS computes a step of QR factorization with column pivoting +* of a complex M-by-N matrix A by using Blas-3. It tries to factorize +* NB columns from A starting from the row OFFSET+1, and updates all +* of the matrix with Blas-3 xGEMM. +* +* In some cases, due to catastrophic cancellations, it cannot +* factorize NB columns. Hence, the actual number of factorized +* columns is returned in KB. +* +* Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0 +* +* OFFSET (input) INTEGER +* The number of rows of A that have been factorized in +* previous steps. +* +* NB (input) INTEGER +* The number of columns to factorize. +* +* KB (output) INTEGER +* The number of columns actually factorized. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, block A(OFFSET+1:M,1:KB) is the triangular +* factor obtained and block A(1:OFFSET,1:N) has been +* accordingly pivoted, but no factorized. +* The rest of the matrix, block A(OFFSET+1:M,KB+1:N) has +* been updated. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* JPVT (input/output) INTEGER array, dimension (N) +* JPVT(I) = K <==> Column K of the full matrix A has been +* permuted into position I in AP. +* +* TAU (output) COMPLEX*16 array, dimension (KB) +* The scalar factors of the elementary reflectors. +* +* VN1 (input/output) DOUBLE PRECISION array, dimension (N) +* The vector with the partial column norms. +* +* VN2 (input/output) DOUBLE PRECISION array, dimension (N) +* The vector with the exact column norms. +* +* AUXV (input/output) COMPLEX*16 array, dimension (NB) +* Auxiliar vector. +* +* F (input/output) COMPLEX*16 array, dimension (LDF,NB) +* Matrix F' = L*Y'*A. +* +* LDF (input) INTEGER +* The leading dimension of the array F. LDF >= max(1,N). +* +* Further Details +* =============== +* +* Based on contributions by +* G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +* X. Sun, Computer Science Dept., Duke University, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + COMPLEX*16 CZERO, CONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, + $ CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER ITEMP, J, K, LASTRK, LSTICC, PVT, RK + DOUBLE PRECISION TEMP, TEMP2 + COMPLEX*16 AKK +* .. +* .. External Subroutines .. + EXTERNAL ZGEMM, ZGEMV, ZLARFG, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCONJG, MAX, MIN, NINT, SQRT +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DZNRM2 + EXTERNAL IDAMAX, DZNRM2 +* .. +* .. Executable Statements .. +* + LASTRK = MIN( M, N+OFFSET ) + LSTICC = 0 + K = 0 +* +* Beginning of while loop. +* + 10 CONTINUE + IF( ( K.LT.NB ) .AND. ( LSTICC.EQ.0 ) ) THEN + K = K + 1 + RK = OFFSET + K +* +* Determine ith pivot column and swap if necessary +* + PVT = ( K-1 ) + IDAMAX( N-K+1, VN1( K ), 1 ) + IF( PVT.NE.K ) THEN + CALL ZSWAP( M, A( 1, PVT ), 1, A( 1, K ), 1 ) + CALL ZSWAP( K-1, F( PVT, 1 ), LDF, F( K, 1 ), LDF ) + ITEMP = JPVT( PVT ) + JPVT( PVT ) = JPVT( K ) + JPVT( K ) = ITEMP + VN1( PVT ) = VN1( K ) + VN2( PVT ) = VN2( K ) + END IF +* +* Apply previous Householder reflectors to column K: +* A(RK:M,K) := A(RK:M,K) - A(RK:M,1:K-1)*F(K,1:K-1)'. +* + IF( K.GT.1 ) THEN +*CC CALL ZGEMM( 'No transpose', 'Conjugate transpose', +*CC $ M-RK+1, 1, K-1, -CONE, A( RK, 1 ), LDA, +*CC $ F( K, 1 ), LDF, CONE, A( RK, K ), LDA ) + DO 20 J = 1, K - 1 + F( K, J ) = DCONJG( F( K, J ) ) + 20 CONTINUE + CALL ZGEMV( 'No transpose', M-RK+1, K-1, -CONE, A( RK, 1 ), + $ LDA, F( K, 1 ), LDF, CONE, A( RK, K ), 1 ) + DO 30 J = 1, K - 1 + F( K, J ) = DCONJG( F( K, J ) ) + 30 CONTINUE + END IF +* +* Generate elementary reflector H(k). +* + IF( RK.LT.M ) THEN + CALL ZLARFG( M-RK+1, A( RK, K ), A( RK+1, K ), 1, TAU( K ) ) + ELSE + CALL ZLARFG( 1, A( RK, K ), A( RK, K ), 1, TAU( K ) ) + END IF +* + AKK = A( RK, K ) + A( RK, K ) = CONE +* +* Compute Kth column of F: +* +* Compute F(K+1:N,K) := tau(K)*A(RK:M,K+1:N)'*A(RK:M,K). +* + IF( K.LT.N ) THEN + CALL ZGEMV( 'Conjugate transpose', M-RK+1, N-K, TAU( K ), + $ A( RK, K+1 ), LDA, A( RK, K ), 1, CZERO, + $ F( K+1, K ), 1 ) + END IF +* +* Padding F(1:K,K) with zeros. +* + DO 40 J = 1, K + F( J, K ) = CZERO + 40 CONTINUE +* +* Incremental updating of F: +* F(1:N,K) := F(1:N,K) - tau(K)*F(1:N,1:K-1)*A(RK:M,1:K-1)' +* *A(RK:M,K). +* + IF( K.GT.1 ) THEN + CALL ZGEMV( 'Conjugate transpose', M-RK+1, K-1, -TAU( K ), + $ A( RK, 1 ), LDA, A( RK, K ), 1, CZERO, + $ AUXV( 1 ), 1 ) +* + CALL ZGEMV( 'No transpose', N, K-1, CONE, F( 1, 1 ), LDF, + $ AUXV( 1 ), 1, CONE, F( 1, K ), 1 ) + END IF +* +* Update the current row of A: +* A(RK,K+1:N) := A(RK,K+1:N) - A(RK,1:K)*F(K+1:N,1:K)'. +* + IF( K.LT.N ) THEN + CALL ZGEMM( 'No transpose', 'Conjugate transpose', 1, N-K, + $ K, -CONE, A( RK, 1 ), LDA, F( K+1, 1 ), LDF, + $ CONE, A( RK, K+1 ), LDA ) + END IF +* +* Update partial column norms. +* + IF( RK.LT.LASTRK ) THEN + DO 50 J = K + 1, N + IF( VN1( J ).NE.ZERO ) THEN + TEMP = ABS( A( RK, J ) ) / VN1( J ) + TEMP = MAX( ZERO, ( ONE+TEMP )*( ONE-TEMP ) ) + TEMP2 = ONE + 0.05D0*TEMP*( VN1( J ) / VN2( J ) )**2 + IF( TEMP2.EQ.ONE ) THEN + VN2( J ) = DBLE( LSTICC ) + LSTICC = J + ELSE + VN1( J ) = VN1( J )*SQRT( TEMP ) + END IF + END IF + 50 CONTINUE + END IF +* + A( RK, K ) = AKK +* +* End of while loop. +* + GO TO 10 + END IF + KB = K + RK = OFFSET + KB +* +* Apply the block reflector to the rest of the matrix: +* A(OFFSET+KB+1:M,KB+1:N) := A(OFFSET+KB+1:M,KB+1:N) - +* A(OFFSET+KB+1:M,1:KB)*F(KB+1:N,1:KB)'. +* + IF( KB.LT.MIN( N, M-OFFSET ) ) THEN + CALL ZGEMM( 'No transpose', 'Conjugate transpose', M-RK, N-KB, + $ KB, -CONE, A( RK+1, 1 ), LDA, F( KB+1, 1 ), LDF, + $ CONE, A( RK+1, KB+1 ), LDA ) + END IF +* +* Recomputation of difficult columns. +* + 60 CONTINUE + IF( LSTICC.GT.0 ) THEN + ITEMP = NINT( VN2( LSTICC ) ) + VN1( LSTICC ) = DZNRM2( M-RK, A( RK+1, LSTICC ), 1 ) + VN2( LSTICC ) = VN1( LSTICC ) + LSTICC = ITEMP + GO TO 60 + END IF +* + RETURN +* +* End of ZLAQPS +* + END diff --git a/costa/native/external/lapack/zlaqsb.f b/costa/native/external/lapack/zlaqsb.f new file mode 100644 index 000000000..5fd31ab66 --- /dev/null +++ b/costa/native/external/lapack/zlaqsb.f @@ -0,0 +1,150 @@ + SUBROUTINE ZLAQSB( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER EQUED, UPLO + INTEGER KD, LDAB, N + DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION S( * ) + COMPLEX*16 AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* ZLAQSB equilibrates a symmetric band matrix A using the scaling +* factors in the vector S. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is stored. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of super-diagonals of the matrix A if UPLO = 'U', +* or the number of sub-diagonals if UPLO = 'L'. KD >= 0. +* +* AB (input/output) COMPLEX*16 array, dimension (LDAB,N) +* On entry, the upper or lower triangle of the symmetric band +* matrix A, stored in the first KD+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* On exit, if INFO = 0, the triangular factor U or L from the +* Cholesky factorization A = U'*U or A = L*L' of the band +* matrix A, in the same storage format as A. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* S (output) DOUBLE PRECISION array, dimension (N) +* The scale factors for A. +* +* SCOND (input) DOUBLE PRECISION +* Ratio of the smallest S(i) to the largest S(i). +* +* AMAX (input) DOUBLE PRECISION +* Absolute value of largest matrix entry. +* +* EQUED (output) CHARACTER*1 +* Specifies whether or not equilibration was done. +* = 'N': No equilibration. +* = 'Y': Equilibration was done, i.e., A has been replaced by +* diag(S) * A * diag(S). +* +* Internal Parameters +* =================== +* +* THRESH is a threshold value used to decide if scaling should be done +* based on the ratio of the scaling factors. If SCOND < THRESH, +* scaling is done. +* +* LARGE and SMALL are threshold values used to decide if scaling should +* be done based on the absolute size of the largest matrix element. +* If AMAX > LARGE or AMAX < SMALL, scaling is done. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, THRESH + PARAMETER ( ONE = 1.0D+0, THRESH = 0.1D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION CJ, LARGE, SMALL +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN +* +* No equilibration +* + EQUED = 'N' + ELSE +* +* Replace A by diag(S) * A * diag(S). +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Upper triangle of A is stored in band format. +* + DO 20 J = 1, N + CJ = S( J ) + DO 10 I = MAX( 1, J-KD ), J + AB( KD+1+I-J, J ) = CJ*S( I )*AB( KD+1+I-J, J ) + 10 CONTINUE + 20 CONTINUE + ELSE +* +* Lower triangle of A is stored. +* + DO 40 J = 1, N + CJ = S( J ) + DO 30 I = J, MIN( N, J+KD ) + AB( 1+I-J, J ) = CJ*S( I )*AB( 1+I-J, J ) + 30 CONTINUE + 40 CONTINUE + END IF + EQUED = 'Y' + END IF +* + RETURN +* +* End of ZLAQSB +* + END diff --git a/costa/native/external/lapack/zlaqsp.f b/costa/native/external/lapack/zlaqsp.f new file mode 100644 index 000000000..f554a51aa --- /dev/null +++ b/costa/native/external/lapack/zlaqsp.f @@ -0,0 +1,142 @@ + SUBROUTINE ZLAQSP( UPLO, N, AP, S, SCOND, AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER EQUED, UPLO + INTEGER N + DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION S( * ) + COMPLEX*16 AP( * ) +* .. +* +* Purpose +* ======= +* +* ZLAQSP equilibrates a symmetric matrix A using the scaling factors +* in the vector S. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is stored. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) COMPLEX*16 array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the symmetric matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* +* On exit, the equilibrated matrix: diag(S) * A * diag(S), in +* the same storage format as A. +* +* S (input) DOUBLE PRECISION array, dimension (N) +* The scale factors for A. +* +* SCOND (input) DOUBLE PRECISION +* Ratio of the smallest S(i) to the largest S(i). +* +* AMAX (input) DOUBLE PRECISION +* Absolute value of largest matrix entry. +* +* EQUED (output) CHARACTER*1 +* Specifies whether or not equilibration was done. +* = 'N': No equilibration. +* = 'Y': Equilibration was done, i.e., A has been replaced by +* diag(S) * A * diag(S). +* +* Internal Parameters +* =================== +* +* THRESH is a threshold value used to decide if scaling should be done +* based on the ratio of the scaling factors. If SCOND < THRESH, +* scaling is done. +* +* LARGE and SMALL are threshold values used to decide if scaling should +* be done based on the absolute size of the largest matrix element. +* If AMAX > LARGE or AMAX < SMALL, scaling is done. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, THRESH + PARAMETER ( ONE = 1.0D+0, THRESH = 0.1D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, JC + DOUBLE PRECISION CJ, LARGE, SMALL +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN +* +* No equilibration +* + EQUED = 'N' + ELSE +* +* Replace A by diag(S) * A * diag(S). +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Upper triangle of A is stored. +* + JC = 1 + DO 20 J = 1, N + CJ = S( J ) + DO 10 I = 1, J + AP( JC+I-1 ) = CJ*S( I )*AP( JC+I-1 ) + 10 CONTINUE + JC = JC + J + 20 CONTINUE + ELSE +* +* Lower triangle of A is stored. +* + JC = 1 + DO 40 J = 1, N + CJ = S( J ) + DO 30 I = J, N + AP( JC+I-J ) = CJ*S( I )*AP( JC+I-J ) + 30 CONTINUE + JC = JC + N - J + 1 + 40 CONTINUE + END IF + EQUED = 'Y' + END IF +* + RETURN +* +* End of ZLAQSP +* + END diff --git a/costa/native/external/lapack/zlaqsy.f b/costa/native/external/lapack/zlaqsy.f new file mode 100644 index 000000000..b7b987e54 --- /dev/null +++ b/costa/native/external/lapack/zlaqsy.f @@ -0,0 +1,143 @@ + SUBROUTINE ZLAQSY( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER EQUED, UPLO + INTEGER LDA, N + DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION S( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* ZLAQSY equilibrates a symmetric matrix A using the scaling factors +* in the vector S. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is stored. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the symmetric matrix A. If UPLO = 'U', the leading +* n by n upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n by n lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, if EQUED = 'Y', the equilibrated matrix: +* diag(S) * A * diag(S). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(N,1). +* +* S (input) DOUBLE PRECISION array, dimension (N) +* The scale factors for A. +* +* SCOND (input) DOUBLE PRECISION +* Ratio of the smallest S(i) to the largest S(i). +* +* AMAX (input) DOUBLE PRECISION +* Absolute value of largest matrix entry. +* +* EQUED (output) CHARACTER*1 +* Specifies whether or not equilibration was done. +* = 'N': No equilibration. +* = 'Y': Equilibration was done, i.e., A has been replaced by +* diag(S) * A * diag(S). +* +* Internal Parameters +* =================== +* +* THRESH is a threshold value used to decide if scaling should be done +* based on the ratio of the scaling factors. If SCOND < THRESH, +* scaling is done. +* +* LARGE and SMALL are threshold values used to decide if scaling should +* be done based on the absolute size of the largest matrix element. +* If AMAX > LARGE or AMAX < SMALL, scaling is done. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, THRESH + PARAMETER ( ONE = 1.0D+0, THRESH = 0.1D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION CJ, LARGE, SMALL +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN +* +* No equilibration +* + EQUED = 'N' + ELSE +* +* Replace A by diag(S) * A * diag(S). +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Upper triangle of A is stored. +* + DO 20 J = 1, N + CJ = S( J ) + DO 10 I = 1, J + A( I, J ) = CJ*S( I )*A( I, J ) + 10 CONTINUE + 20 CONTINUE + ELSE +* +* Lower triangle of A is stored. +* + DO 40 J = 1, N + CJ = S( J ) + DO 30 I = J, N + A( I, J ) = CJ*S( I )*A( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + EQUED = 'Y' + END IF +* + RETURN +* +* End of ZLAQSY +* + END diff --git a/costa/native/external/lapack/zlar1v.f b/costa/native/external/lapack/zlar1v.f new file mode 100644 index 000000000..5e5d29d01 --- /dev/null +++ b/costa/native/external/lapack/zlar1v.f @@ -0,0 +1,328 @@ + SUBROUTINE ZLAR1V( N, B1, BN, SIGMA, D, L, LD, LLD, GERSCH, Z, + $ ZTZ, MINGMA, R, ISUPPZ, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER B1, BN, N, R + DOUBLE PRECISION MINGMA, SIGMA, ZTZ +* .. +* .. Array Arguments .. + INTEGER ISUPPZ( * ) + DOUBLE PRECISION D( * ), GERSCH( * ), L( * ), LD( * ), LLD( * ), + $ WORK( * ) + COMPLEX*16 Z( * ) +* .. +* +* Purpose +* ======= +* +* ZLAR1V computes the (scaled) r-th column of the inverse of +* the sumbmatrix in rows B1 through BN of the tridiagonal matrix +* L D L^T - sigma I. The following steps accomplish this computation : +* (a) Stationary qd transform, L D L^T - sigma I = L(+) D(+) L(+)^T, +* (b) Progressive qd transform, L D L^T - sigma I = U(-) D(-) U(-)^T, +* (c) Computation of the diagonal elements of the inverse of +* L D L^T - sigma I by combining the above transforms, and choosing +* r as the index where the diagonal of the inverse is (one of the) +* largest in magnitude. +* (d) Computation of the (scaled) r-th column of the inverse using the +* twisted factorization obtained by combining the top part of the +* the stationary and the bottom part of the progressive transform. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix L D L^T. +* +* B1 (input) INTEGER +* First index of the submatrix of L D L^T. +* +* BN (input) INTEGER +* Last index of the submatrix of L D L^T. +* +* SIGMA (input) DOUBLE PRECISION +* The shift. Initially, when R = 0, SIGMA should be a good +* approximation to an eigenvalue of L D L^T. +* +* L (input) DOUBLE PRECISION array, dimension (N-1) +* The (n-1) subdiagonal elements of the unit bidiagonal matrix +* L, in elements 1 to N-1. +* +* D (input) DOUBLE PRECISION array, dimension (N) +* The n diagonal elements of the diagonal matrix D. +* +* LD (input) DOUBLE PRECISION array, dimension (N-1) +* The n-1 elements L(i)*D(i). +* +* LLD (input) DOUBLE PRECISION array, dimension (N-1) +* The n-1 elements L(i)*L(i)*D(i). +* +* GERSCH (input) DOUBLE PRECISION array, dimension (2*N) +* The n Gerschgorin intervals. These are used to restrict +* the initial search for R, when R is input as 0. +* +* Z (output) COMPLEX*16 array, dimension (N) +* The (scaled) r-th column of the inverse. Z(R) is returned +* to be 1. +* +* ZTZ (output) DOUBLE PRECISION +* The square of the norm of Z. +* +* MINGMA (output) DOUBLE PRECISION +* The reciprocal of the largest (in magnitude) diagonal +* element of the inverse of L D L^T - sigma I. +* +* R (input/output) INTEGER +* Initially, R should be input to be 0 and is then output as +* the index where the diagonal element of the inverse is +* largest in magnitude. In later iterations, this same value +* of R should be input. +* +* ISUPPZ (output) INTEGER array, dimension (2) +* The support of the vector in Z, i.e., the vector Z is +* nonzero only in elements ISUPPZ(1) through ISUPPZ( 2 ). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) +* +* Further Details +* =============== +* +* Based on contributions by +* Inderjit Dhillon, IBM Almaden, USA +* Osni Marques, LBNL/NERSC, USA +* Ken Stanley, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + INTEGER BLKSIZ + PARAMETER ( BLKSIZ = 32 ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D0, 0.0D0 ) ) +* .. +* .. Local Scalars .. + LOGICAL SAWNAN + INTEGER FROM, I, INDP, INDS, INDUMN, J, R1, R2, TO + DOUBLE PRECISION DMINUS, DPLUS, EPS, S, TMP +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, MAX, MIN +* .. +* .. Executable Statements .. +* + EPS = DLAMCH( 'Precision' ) + IF( R.EQ.0 ) THEN +* +* Eliminate the top and bottom indices from the possible values +* of R where the desired eigenvector is largest in magnitude. +* + R1 = B1 + DO 10 I = B1, BN + IF( SIGMA.GE.GERSCH( 2*I-1 ) .OR. SIGMA.LE.GERSCH( 2*I ) ) + $ THEN + R1 = I + GO TO 20 + END IF + 10 CONTINUE + 20 CONTINUE + R2 = BN + DO 30 I = BN, B1, -1 + IF( SIGMA.GE.GERSCH( 2*I-1 ) .OR. SIGMA.LE.GERSCH( 2*I ) ) + $ THEN + R2 = I + GO TO 40 + END IF + 30 CONTINUE + 40 CONTINUE + ELSE + R1 = R + R2 = R + END IF +* + INDUMN = N + INDS = 2*N + 1 + INDP = 3*N + 1 + SAWNAN = .FALSE. +* +* Compute the stationary transform (using the differential form) +* untill the index R2 +* + IF( B1.EQ.1 ) THEN + WORK( INDS ) = ZERO + ELSE + WORK( INDS ) = LLD( B1-1 ) + END IF + S = WORK( INDS ) - SIGMA + DO 50 I = B1, R2 - 1 + DPLUS = D( I ) + S + WORK( I ) = LD( I ) / DPLUS + WORK( INDS+I ) = S*WORK( I )*L( I ) + S = WORK( INDS+I ) - SIGMA + 50 CONTINUE +* + IF( .NOT.( S.GT.ZERO .OR. S.LT.ONE ) ) THEN +* +* Run a slower version of the above loop if a NaN is detected +* + SAWNAN = .TRUE. + J = B1 + 1 + 60 CONTINUE + IF( WORK( INDS+J ).GT.ZERO .OR. WORK( INDS+J ).LT.ONE ) THEN + J = J + 1 + GO TO 60 + END IF + WORK( INDS+J ) = LLD( J ) + S = WORK( INDS+J ) - SIGMA + DO 70 I = J + 1, R2 - 1 + DPLUS = D( I ) + S + WORK( I ) = LD( I ) / DPLUS + IF( WORK( I ).EQ.ZERO ) THEN + WORK( INDS+I ) = LLD( I ) + ELSE + WORK( INDS+I ) = S*WORK( I )*L( I ) + END IF + S = WORK( INDS+I ) - SIGMA + 70 CONTINUE + END IF + WORK( INDP+BN-1 ) = D( BN ) - SIGMA + DO 80 I = BN - 1, R1, -1 + DMINUS = LLD( I ) + WORK( INDP+I ) + TMP = D( I ) / DMINUS + WORK( INDUMN+I ) = L( I )*TMP + WORK( INDP+I-1 ) = WORK( INDP+I )*TMP - SIGMA + 80 CONTINUE + TMP = WORK( INDP+R1-1 ) + IF( .NOT.( TMP.GT.ZERO .OR. TMP.LT.ONE ) ) THEN +* +* Run a slower version of the above loop if a NaN is detected +* + SAWNAN = .TRUE. + J = BN - 3 + 90 CONTINUE + IF( WORK( INDP+J ).GT.ZERO .OR. WORK( INDP+J ).LT.ONE ) THEN + J = J - 1 + GO TO 90 + END IF + WORK( INDP+J ) = D( J+1 ) - SIGMA + DO 100 I = J, R1, -1 + DMINUS = LLD( I ) + WORK( INDP+I ) + TMP = D( I ) / DMINUS + WORK( INDUMN+I ) = L( I )*TMP + IF( TMP.EQ.ZERO ) THEN + WORK( INDP+I-1 ) = D( I ) - SIGMA + ELSE + WORK( INDP+I-1 ) = WORK( INDP+I )*TMP - SIGMA + END IF + 100 CONTINUE + END IF +* +* Find the index (from R1 to R2) of the largest (in magnitude) +* diagonal element of the inverse +* + MINGMA = WORK( INDS+R1-1 ) + WORK( INDP+R1-1 ) + IF( MINGMA.EQ.ZERO ) + $ MINGMA = EPS*WORK( INDS+R1-1 ) + R = R1 + DO 110 I = R1, R2 - 1 + TMP = WORK( INDS+I ) + WORK( INDP+I ) + IF( TMP.EQ.ZERO ) + $ TMP = EPS*WORK( INDS+I ) + IF( ABS( TMP ).LT.ABS( MINGMA ) ) THEN + MINGMA = TMP + R = I + 1 + END IF + 110 CONTINUE +* +* Compute the (scaled) r-th column of the inverse +* + ISUPPZ( 1 ) = B1 + ISUPPZ( 2 ) = BN + Z( R ) = CONE + ZTZ = ONE + IF( .NOT.SAWNAN ) THEN + FROM = R - 1 + TO = MAX( R-BLKSIZ, B1 ) + 120 CONTINUE + IF( FROM.GE.B1 ) THEN + DO 130 I = FROM, TO, -1 + Z( I ) = -( WORK( I )*Z( I+1 ) ) + ZTZ = ZTZ + DBLE( Z( I )*Z( I ) ) + 130 CONTINUE + IF( ABS( Z( TO ) ).LE.EPS .AND. ABS( Z( TO+1 ) ).LE.EPS ) + $ THEN + ISUPPZ( 1 ) = TO + 2 + ELSE + FROM = TO - 1 + TO = MAX( TO-BLKSIZ, B1 ) + GO TO 120 + END IF + END IF + FROM = R + 1 + TO = MIN( R+BLKSIZ, BN ) + 140 CONTINUE + IF( FROM.LE.BN ) THEN + DO 150 I = FROM, TO + Z( I ) = -( WORK( INDUMN+I-1 )*Z( I-1 ) ) + ZTZ = ZTZ + DBLE( Z( I )*Z( I ) ) + 150 CONTINUE + IF( ABS( Z( TO ) ).LE.EPS .AND. ABS( Z( TO-1 ) ).LE.EPS ) + $ THEN + ISUPPZ( 2 ) = TO - 2 + ELSE + FROM = TO + 1 + TO = MIN( TO+BLKSIZ, BN ) + GO TO 140 + END IF + END IF + ELSE + DO 160 I = R - 1, B1, -1 + IF( Z( I+1 ).EQ.ZERO ) THEN + Z( I ) = -( LD( I+1 ) / LD( I ) )*Z( I+2 ) + ELSE IF( ABS( Z( I+1 ) ).LE.EPS .AND. ABS( Z( I+2 ) ).LE. + $ EPS ) THEN + ISUPPZ( 1 ) = I + 3 + GO TO 170 + ELSE + Z( I ) = -( WORK( I )*Z( I+1 ) ) + END IF + ZTZ = ZTZ + DBLE( Z( I )*Z( I ) ) + 160 CONTINUE + 170 CONTINUE + DO 180 I = R, BN - 1 + IF( Z( I ).EQ.ZERO ) THEN + Z( I+1 ) = -( LD( I-1 ) / LD( I ) )*Z( I-1 ) + ELSE IF( ABS( Z( I ) ).LE.EPS .AND. ABS( Z( I-1 ) ).LE.EPS ) + $ THEN + ISUPPZ( 2 ) = I - 2 + GO TO 190 + ELSE + Z( I+1 ) = -( WORK( INDUMN+I )*Z( I ) ) + END IF + ZTZ = ZTZ + DBLE( Z( I+1 )*Z( I+1 ) ) + 180 CONTINUE + 190 CONTINUE + END IF + DO 200 I = B1, ISUPPZ( 1 ) - 3 + Z( I ) = ZERO + 200 CONTINUE + DO 210 I = ISUPPZ( 2 ) + 3, BN + Z( I ) = ZERO + 210 CONTINUE +* + RETURN +* +* End of ZLAR1V +* + END diff --git a/costa/native/external/lapack/zlar2v.f b/costa/native/external/lapack/zlar2v.f new file mode 100644 index 000000000..b11458f65 --- /dev/null +++ b/costa/native/external/lapack/zlar2v.f @@ -0,0 +1,98 @@ + SUBROUTINE ZLAR2V( N, X, Y, Z, INCX, C, S, INCC ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + INTEGER INCC, INCX, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( * ) + COMPLEX*16 S( * ), X( * ), Y( * ), Z( * ) +* .. +* +* Purpose +* ======= +* +* ZLAR2V applies a vector of complex plane rotations with real cosines +* from both sides to a sequence of 2-by-2 complex Hermitian matrices, +* defined by the elements of the vectors x, y and z. For i = 1,2,...,n +* +* ( x(i) z(i) ) := +* ( conjg(z(i)) y(i) ) +* +* ( c(i) conjg(s(i)) ) ( x(i) z(i) ) ( c(i) -conjg(s(i)) ) +* ( -s(i) c(i) ) ( conjg(z(i)) y(i) ) ( s(i) c(i) ) +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of plane rotations to be applied. +* +* X (input/output) COMPLEX*16 array, dimension (1+(N-1)*INCX) +* The vector x; the elements of x are assumed to be real. +* +* Y (input/output) COMPLEX*16 array, dimension (1+(N-1)*INCX) +* The vector y; the elements of y are assumed to be real. +* +* Z (input/output) COMPLEX*16 array, dimension (1+(N-1)*INCX) +* The vector z. +* +* INCX (input) INTEGER +* The increment between elements of X, Y and Z. INCX > 0. +* +* C (input) DOUBLE PRECISION array, dimension (1+(N-1)*INCC) +* The cosines of the plane rotations. +* +* S (input) COMPLEX*16 array, dimension (1+(N-1)*INCC) +* The sines of the plane rotations. +* +* INCC (input) INTEGER +* The increment between elements of C and S. INCC > 0. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IC, IX + DOUBLE PRECISION CI, SII, SIR, T1I, T1R, T5, T6, XI, YI, ZII, + $ ZIR + COMPLEX*16 SI, T2, T3, T4, ZI +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCMPLX, DCONJG, DIMAG +* .. +* .. Executable Statements .. +* + IX = 1 + IC = 1 + DO 10 I = 1, N + XI = DBLE( X( IX ) ) + YI = DBLE( Y( IX ) ) + ZI = Z( IX ) + ZIR = DBLE( ZI ) + ZII = DIMAG( ZI ) + CI = C( IC ) + SI = S( IC ) + SIR = DBLE( SI ) + SII = DIMAG( SI ) + T1R = SIR*ZIR - SII*ZII + T1I = SIR*ZII + SII*ZIR + T2 = CI*ZI + T3 = T2 - DCONJG( SI )*XI + T4 = DCONJG( T2 ) + SI*YI + T5 = CI*XI + T1R + T6 = CI*YI - T1R + X( IX ) = CI*T5 + ( SIR*DBLE( T4 )+SII*DIMAG( T4 ) ) + Y( IX ) = CI*T6 - ( SIR*DBLE( T3 )-SII*DIMAG( T3 ) ) + Z( IX ) = CI*T3 + DCONJG( SI )*DCMPLX( T6, T1I ) + IX = IX + INCX + IC = IC + INCC + 10 CONTINUE + RETURN +* +* End of ZLAR2V +* + END diff --git a/costa/native/external/lapack/zlarcm.f b/costa/native/external/lapack/zlarcm.f new file mode 100644 index 000000000..ea86f80f6 --- /dev/null +++ b/costa/native/external/lapack/zlarcm.f @@ -0,0 +1,111 @@ + SUBROUTINE ZLARCM( M, N, A, LDA, B, LDB, C, LDC, RWORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER LDA, LDB, LDC, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), RWORK( * ) + COMPLEX*16 B( LDB, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* ZLARCM performs a very simple matrix-matrix multiplication: +* C := A * B, +* where A is M by M and real; B is M by N and complex; +* C is M by N and complex. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A and of the matrix C. +* M >= 0. +* +* N (input) INTEGER +* The number of columns and rows of the matrix B and +* the number of columns of the matrix C. +* N >= 0. +* +* A (input) DOUBLE PRECISION array, dimension (LDA, M) +* A contains the M by M matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >=max(1,M). +* +* B (input) DOUBLE PRECISION array, dimension (LDB, N) +* B contains the M by N matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >=max(1,M). +* +* C (input) COMPLEX*16 array, dimension (LDC, N) +* C contains the M by N matrix C. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >=max(1,M). +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (2*M*N) +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D0, ZERO = 0.0D0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, L +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCMPLX, DIMAG +* .. +* .. External Subroutines .. + EXTERNAL DGEMM +* .. +* .. Executable Statements .. +* +* Quick return if possible. +* + IF( ( M.EQ.0 ) .OR. ( N.EQ.0 ) ) + $ RETURN +* + DO 20 J = 1, N + DO 10 I = 1, M + RWORK( ( J-1 )*M+I ) = DBLE( B( I, J ) ) + 10 CONTINUE + 20 CONTINUE +* + L = M*N + 1 + CALL DGEMM( 'N', 'N', M, N, M, ONE, A, LDA, RWORK, M, ZERO, + $ RWORK( L ), M ) + DO 40 J = 1, N + DO 30 I = 1, M + C( I, J ) = RWORK( L+( J-1 )*M+I-1 ) + 30 CONTINUE + 40 CONTINUE +* + DO 60 J = 1, N + DO 50 I = 1, M + RWORK( ( J-1 )*M+I ) = DIMAG( B( I, J ) ) + 50 CONTINUE + 60 CONTINUE + CALL DGEMM( 'N', 'N', M, N, M, ONE, A, LDA, RWORK, M, ZERO, + $ RWORK( L ), M ) + DO 80 J = 1, N + DO 70 I = 1, M + C( I, J ) = DCMPLX( DBLE( C( I, J ) ), + $ RWORK( L+( J-1 )*M+I-1 ) ) + 70 CONTINUE + 80 CONTINUE +* + RETURN +* +* End of ZLARCM +* + END diff --git a/costa/native/external/lapack/zlarf.f b/costa/native/external/lapack/zlarf.f new file mode 100644 index 000000000..f63bd070e --- /dev/null +++ b/costa/native/external/lapack/zlarf.f @@ -0,0 +1,121 @@ + SUBROUTINE ZLARF( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, LDC, M, N + COMPLEX*16 TAU +* .. +* .. Array Arguments .. + COMPLEX*16 C( LDC, * ), V( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZLARF applies a complex elementary reflector H to a complex M-by-N +* matrix C, from either the left or the right. H is represented in the +* form +* +* H = I - tau * v * v' +* +* where tau is a complex scalar and v is a complex vector. +* +* If tau = 0, then H is taken to be the unit matrix. +* +* To apply H' (the conjugate transpose of H), supply conjg(tau) instead +* tau. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': form H * C +* = 'R': form C * H +* +* M (input) INTEGER +* The number of rows of the matrix C. +* +* N (input) INTEGER +* The number of columns of the matrix C. +* +* V (input) COMPLEX*16 array, dimension +* (1 + (M-1)*abs(INCV)) if SIDE = 'L' +* or (1 + (N-1)*abs(INCV)) if SIDE = 'R' +* The vector v in the representation of H. V is not used if +* TAU = 0. +* +* INCV (input) INTEGER +* The increment between elements of v. INCV <> 0. +* +* TAU (input) COMPLEX*16 +* The value tau in the representation of H. +* +* C (input/output) COMPLEX*16 array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by the matrix H * C if SIDE = 'L', +* or C * H if SIDE = 'R'. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) COMPLEX*16 array, dimension +* (N) if SIDE = 'L' +* or (M) if SIDE = 'R' +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. External Subroutines .. + EXTERNAL ZGEMV, ZGERC +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C +* + IF( TAU.NE.ZERO ) THEN +* +* w := C' * v +* + CALL ZGEMV( 'Conjugate transpose', M, N, ONE, C, LDC, V, + $ INCV, ZERO, WORK, 1 ) +* +* C := C - v * w' +* + CALL ZGERC( M, N, -TAU, V, INCV, WORK, 1, C, LDC ) + END IF + ELSE +* +* Form C * H +* + IF( TAU.NE.ZERO ) THEN +* +* w := C * v +* + CALL ZGEMV( 'No transpose', M, N, ONE, C, LDC, V, INCV, + $ ZERO, WORK, 1 ) +* +* C := C - w * v' +* + CALL ZGERC( M, N, -TAU, WORK, 1, V, INCV, C, LDC ) + END IF + END IF + RETURN +* +* End of ZLARF +* + END diff --git a/costa/native/external/lapack/zlarfb.f b/costa/native/external/lapack/zlarfb.f new file mode 100644 index 000000000..e21692a7d --- /dev/null +++ b/costa/native/external/lapack/zlarfb.f @@ -0,0 +1,609 @@ + SUBROUTINE ZLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, LDV, + $ T, LDT, C, LDC, WORK, LDWORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, SIDE, STOREV, TRANS + INTEGER K, LDC, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 C( LDC, * ), T( LDT, * ), V( LDV, * ), + $ WORK( LDWORK, * ) +* .. +* +* Purpose +* ======= +* +* ZLARFB applies a complex block reflector H or its transpose H' to a +* complex M-by-N matrix C, from either the left or the right. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply H or H' from the Left +* = 'R': apply H or H' from the Right +* +* TRANS (input) CHARACTER*1 +* = 'N': apply H (No transpose) +* = 'C': apply H' (Conjugate transpose) +* +* DIRECT (input) CHARACTER*1 +* Indicates how H is formed from a product of elementary +* reflectors +* = 'F': H = H(1) H(2) . . . H(k) (Forward) +* = 'B': H = H(k) . . . H(2) H(1) (Backward) +* +* STOREV (input) CHARACTER*1 +* Indicates how the vectors which define the elementary +* reflectors are stored: +* = 'C': Columnwise +* = 'R': Rowwise +* +* M (input) INTEGER +* The number of rows of the matrix C. +* +* N (input) INTEGER +* The number of columns of the matrix C. +* +* K (input) INTEGER +* The order of the matrix T (= the number of elementary +* reflectors whose product defines the block reflector). +* +* V (input) COMPLEX*16 array, dimension +* (LDV,K) if STOREV = 'C' +* (LDV,M) if STOREV = 'R' and SIDE = 'L' +* (LDV,N) if STOREV = 'R' and SIDE = 'R' +* The matrix V. See further details. +* +* LDV (input) INTEGER +* The leading dimension of the array V. +* If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M); +* if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N); +* if STOREV = 'R', LDV >= K. +* +* T (input) COMPLEX*16 array, dimension (LDT,K) +* The triangular K-by-K matrix T in the representation of the +* block reflector. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= K. +* +* C (input/output) COMPLEX*16 array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by H*C or H'*C or C*H or C*H'. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) COMPLEX*16 array, dimension (LDWORK,K) +* +* LDWORK (input) INTEGER +* The leading dimension of the array WORK. +* If SIDE = 'L', LDWORK >= max(1,N); +* if SIDE = 'R', LDWORK >= max(1,M). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + CHARACTER TRANST + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZCOPY, ZGEMM, ZLACGV, ZTRMM +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) + $ RETURN +* + IF( LSAME( TRANS, 'N' ) ) THEN + TRANST = 'C' + ELSE + TRANST = 'N' + END IF +* + IF( LSAME( STOREV, 'C' ) ) THEN +* + IF( LSAME( DIRECT, 'F' ) ) THEN +* +* Let V = ( V1 ) (first K rows) +* ( V2 ) +* where V1 is unit lower triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H' * C where C = ( C1 ) +* ( C2 ) +* +* W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) +* +* W := C1' +* + DO 10 J = 1, K + CALL ZCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + CALL ZLACGV( N, WORK( 1, J ), 1 ) + 10 CONTINUE +* +* W := W * V1 +* + CALL ZTRMM( 'Right', 'Lower', 'No transpose', 'Unit', N, + $ K, ONE, V, LDV, WORK, LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C2'*V2 +* + CALL ZGEMM( 'Conjugate transpose', 'No transpose', N, + $ K, M-K, ONE, C( K+1, 1 ), LDC, + $ V( K+1, 1 ), LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T' or W * T +* + CALL ZTRMM( 'Right', 'Upper', TRANST, 'Non-unit', N, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V * W' +* + IF( M.GT.K ) THEN +* +* C2 := C2 - V2 * W' +* + CALL ZGEMM( 'No transpose', 'Conjugate transpose', + $ M-K, N, K, -ONE, V( K+1, 1 ), LDV, WORK, + $ LDWORK, ONE, C( K+1, 1 ), LDC ) + END IF +* +* W := W * V1' +* + CALL ZTRMM( 'Right', 'Lower', 'Conjugate transpose', + $ 'Unit', N, K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W' +* + DO 30 J = 1, K + DO 20 I = 1, N + C( J, I ) = C( J, I ) - DCONJG( WORK( I, J ) ) + 20 CONTINUE + 30 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H' where C = ( C1 C2 ) +* +* W := C * V = (C1*V1 + C2*V2) (stored in WORK) +* +* W := C1 +* + DO 40 J = 1, K + CALL ZCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 ) + 40 CONTINUE +* +* W := W * V1 +* + CALL ZTRMM( 'Right', 'Lower', 'No transpose', 'Unit', M, + $ K, ONE, V, LDV, WORK, LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C2 * V2 +* + CALL ZGEMM( 'No transpose', 'No transpose', M, K, N-K, + $ ONE, C( 1, K+1 ), LDC, V( K+1, 1 ), LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T' +* + CALL ZTRMM( 'Right', 'Upper', TRANS, 'Non-unit', M, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V' +* + IF( N.GT.K ) THEN +* +* C2 := C2 - W * V2' +* + CALL ZGEMM( 'No transpose', 'Conjugate transpose', M, + $ N-K, K, -ONE, WORK, LDWORK, V( K+1, 1 ), + $ LDV, ONE, C( 1, K+1 ), LDC ) + END IF +* +* W := W * V1' +* + CALL ZTRMM( 'Right', 'Lower', 'Conjugate transpose', + $ 'Unit', M, K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 60 J = 1, K + DO 50 I = 1, M + C( I, J ) = C( I, J ) - WORK( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + ELSE +* +* Let V = ( V1 ) +* ( V2 ) (last K rows) +* where V2 is unit upper triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H' * C where C = ( C1 ) +* ( C2 ) +* +* W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) +* +* W := C2' +* + DO 70 J = 1, K + CALL ZCOPY( N, C( M-K+J, 1 ), LDC, WORK( 1, J ), 1 ) + CALL ZLACGV( N, WORK( 1, J ), 1 ) + 70 CONTINUE +* +* W := W * V2 +* + CALL ZTRMM( 'Right', 'Upper', 'No transpose', 'Unit', N, + $ K, ONE, V( M-K+1, 1 ), LDV, WORK, LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C1'*V1 +* + CALL ZGEMM( 'Conjugate transpose', 'No transpose', N, + $ K, M-K, ONE, C, LDC, V, LDV, ONE, WORK, + $ LDWORK ) + END IF +* +* W := W * T' or W * T +* + CALL ZTRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V * W' +* + IF( M.GT.K ) THEN +* +* C1 := C1 - V1 * W' +* + CALL ZGEMM( 'No transpose', 'Conjugate transpose', + $ M-K, N, K, -ONE, V, LDV, WORK, LDWORK, + $ ONE, C, LDC ) + END IF +* +* W := W * V2' +* + CALL ZTRMM( 'Right', 'Upper', 'Conjugate transpose', + $ 'Unit', N, K, ONE, V( M-K+1, 1 ), LDV, WORK, + $ LDWORK ) +* +* C2 := C2 - W' +* + DO 90 J = 1, K + DO 80 I = 1, N + C( M-K+J, I ) = C( M-K+J, I ) - + $ DCONJG( WORK( I, J ) ) + 80 CONTINUE + 90 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H' where C = ( C1 C2 ) +* +* W := C * V = (C1*V1 + C2*V2) (stored in WORK) +* +* W := C2 +* + DO 100 J = 1, K + CALL ZCOPY( M, C( 1, N-K+J ), 1, WORK( 1, J ), 1 ) + 100 CONTINUE +* +* W := W * V2 +* + CALL ZTRMM( 'Right', 'Upper', 'No transpose', 'Unit', M, + $ K, ONE, V( N-K+1, 1 ), LDV, WORK, LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C1 * V1 +* + CALL ZGEMM( 'No transpose', 'No transpose', M, K, N-K, + $ ONE, C, LDC, V, LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T' +* + CALL ZTRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V' +* + IF( N.GT.K ) THEN +* +* C1 := C1 - W * V1' +* + CALL ZGEMM( 'No transpose', 'Conjugate transpose', M, + $ N-K, K, -ONE, WORK, LDWORK, V, LDV, ONE, + $ C, LDC ) + END IF +* +* W := W * V2' +* + CALL ZTRMM( 'Right', 'Upper', 'Conjugate transpose', + $ 'Unit', M, K, ONE, V( N-K+1, 1 ), LDV, WORK, + $ LDWORK ) +* +* C2 := C2 - W +* + DO 120 J = 1, K + DO 110 I = 1, M + C( I, N-K+J ) = C( I, N-K+J ) - WORK( I, J ) + 110 CONTINUE + 120 CONTINUE + END IF + END IF +* + ELSE IF( LSAME( STOREV, 'R' ) ) THEN +* + IF( LSAME( DIRECT, 'F' ) ) THEN +* +* Let V = ( V1 V2 ) (V1: first K columns) +* where V1 is unit upper triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H' * C where C = ( C1 ) +* ( C2 ) +* +* W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) +* +* W := C1' +* + DO 130 J = 1, K + CALL ZCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + CALL ZLACGV( N, WORK( 1, J ), 1 ) + 130 CONTINUE +* +* W := W * V1' +* + CALL ZTRMM( 'Right', 'Upper', 'Conjugate transpose', + $ 'Unit', N, K, ONE, V, LDV, WORK, LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C2'*V2' +* + CALL ZGEMM( 'Conjugate transpose', + $ 'Conjugate transpose', N, K, M-K, ONE, + $ C( K+1, 1 ), LDC, V( 1, K+1 ), LDV, ONE, + $ WORK, LDWORK ) + END IF +* +* W := W * T' or W * T +* + CALL ZTRMM( 'Right', 'Upper', TRANST, 'Non-unit', N, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V' * W' +* + IF( M.GT.K ) THEN +* +* C2 := C2 - V2' * W' +* + CALL ZGEMM( 'Conjugate transpose', + $ 'Conjugate transpose', M-K, N, K, -ONE, + $ V( 1, K+1 ), LDV, WORK, LDWORK, ONE, + $ C( K+1, 1 ), LDC ) + END IF +* +* W := W * V1 +* + CALL ZTRMM( 'Right', 'Upper', 'No transpose', 'Unit', N, + $ K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W' +* + DO 150 J = 1, K + DO 140 I = 1, N + C( J, I ) = C( J, I ) - DCONJG( WORK( I, J ) ) + 140 CONTINUE + 150 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H' where C = ( C1 C2 ) +* +* W := C * V' = (C1*V1' + C2*V2') (stored in WORK) +* +* W := C1 +* + DO 160 J = 1, K + CALL ZCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 ) + 160 CONTINUE +* +* W := W * V1' +* + CALL ZTRMM( 'Right', 'Upper', 'Conjugate transpose', + $ 'Unit', M, K, ONE, V, LDV, WORK, LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C2 * V2' +* + CALL ZGEMM( 'No transpose', 'Conjugate transpose', M, + $ K, N-K, ONE, C( 1, K+1 ), LDC, + $ V( 1, K+1 ), LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T' +* + CALL ZTRMM( 'Right', 'Upper', TRANS, 'Non-unit', M, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V +* + IF( N.GT.K ) THEN +* +* C2 := C2 - W * V2 +* + CALL ZGEMM( 'No transpose', 'No transpose', M, N-K, K, + $ -ONE, WORK, LDWORK, V( 1, K+1 ), LDV, ONE, + $ C( 1, K+1 ), LDC ) + END IF +* +* W := W * V1 +* + CALL ZTRMM( 'Right', 'Upper', 'No transpose', 'Unit', M, + $ K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 180 J = 1, K + DO 170 I = 1, M + C( I, J ) = C( I, J ) - WORK( I, J ) + 170 CONTINUE + 180 CONTINUE +* + END IF +* + ELSE +* +* Let V = ( V1 V2 ) (V2: last K columns) +* where V2 is unit lower triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H' * C where C = ( C1 ) +* ( C2 ) +* +* W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) +* +* W := C2' +* + DO 190 J = 1, K + CALL ZCOPY( N, C( M-K+J, 1 ), LDC, WORK( 1, J ), 1 ) + CALL ZLACGV( N, WORK( 1, J ), 1 ) + 190 CONTINUE +* +* W := W * V2' +* + CALL ZTRMM( 'Right', 'Lower', 'Conjugate transpose', + $ 'Unit', N, K, ONE, V( 1, M-K+1 ), LDV, WORK, + $ LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C1'*V1' +* + CALL ZGEMM( 'Conjugate transpose', + $ 'Conjugate transpose', N, K, M-K, ONE, C, + $ LDC, V, LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T' or W * T +* + CALL ZTRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V' * W' +* + IF( M.GT.K ) THEN +* +* C1 := C1 - V1' * W' +* + CALL ZGEMM( 'Conjugate transpose', + $ 'Conjugate transpose', M-K, N, K, -ONE, V, + $ LDV, WORK, LDWORK, ONE, C, LDC ) + END IF +* +* W := W * V2 +* + CALL ZTRMM( 'Right', 'Lower', 'No transpose', 'Unit', N, + $ K, ONE, V( 1, M-K+1 ), LDV, WORK, LDWORK ) +* +* C2 := C2 - W' +* + DO 210 J = 1, K + DO 200 I = 1, N + C( M-K+J, I ) = C( M-K+J, I ) - + $ DCONJG( WORK( I, J ) ) + 200 CONTINUE + 210 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H' where C = ( C1 C2 ) +* +* W := C * V' = (C1*V1' + C2*V2') (stored in WORK) +* +* W := C2 +* + DO 220 J = 1, K + CALL ZCOPY( M, C( 1, N-K+J ), 1, WORK( 1, J ), 1 ) + 220 CONTINUE +* +* W := W * V2' +* + CALL ZTRMM( 'Right', 'Lower', 'Conjugate transpose', + $ 'Unit', M, K, ONE, V( 1, N-K+1 ), LDV, WORK, + $ LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C1 * V1' +* + CALL ZGEMM( 'No transpose', 'Conjugate transpose', M, + $ K, N-K, ONE, C, LDC, V, LDV, ONE, WORK, + $ LDWORK ) + END IF +* +* W := W * T or W * T' +* + CALL ZTRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V +* + IF( N.GT.K ) THEN +* +* C1 := C1 - W * V1 +* + CALL ZGEMM( 'No transpose', 'No transpose', M, N-K, K, + $ -ONE, WORK, LDWORK, V, LDV, ONE, C, LDC ) + END IF +* +* W := W * V2 +* + CALL ZTRMM( 'Right', 'Lower', 'No transpose', 'Unit', M, + $ K, ONE, V( 1, N-K+1 ), LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 240 J = 1, K + DO 230 I = 1, M + C( I, N-K+J ) = C( I, N-K+J ) - WORK( I, J ) + 230 CONTINUE + 240 CONTINUE +* + END IF +* + END IF + END IF +* + RETURN +* +* End of ZLARFB +* + END diff --git a/costa/native/external/lapack/zlarfg.f b/costa/native/external/lapack/zlarfg.f new file mode 100644 index 000000000..243549400 --- /dev/null +++ b/costa/native/external/lapack/zlarfg.f @@ -0,0 +1,146 @@ + SUBROUTINE ZLARFG( N, ALPHA, X, INCX, TAU ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INCX, N + COMPLEX*16 ALPHA, TAU +* .. +* .. Array Arguments .. + COMPLEX*16 X( * ) +* .. +* +* Purpose +* ======= +* +* ZLARFG generates a complex elementary reflector H of order n, such +* that +* +* H' * ( alpha ) = ( beta ), H' * H = I. +* ( x ) ( 0 ) +* +* where alpha and beta are scalars, with beta real, and x is an +* (n-1)-element complex vector. H is represented in the form +* +* H = I - tau * ( 1 ) * ( 1 v' ) , +* ( v ) +* +* where tau is a complex scalar and v is a complex (n-1)-element +* vector. Note that H is not hermitian. +* +* If the elements of x are all zero and alpha is real, then tau = 0 +* and H is taken to be the unit matrix. +* +* Otherwise 1 <= real(tau) <= 2 and abs(tau-1) <= 1 . +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the elementary reflector. +* +* ALPHA (input/output) COMPLEX*16 +* On entry, the value alpha. +* On exit, it is overwritten with the value beta. +* +* X (input/output) COMPLEX*16 array, dimension +* (1+(N-2)*abs(INCX)) +* On entry, the vector x. +* On exit, it is overwritten with the vector v. +* +* INCX (input) INTEGER +* The increment between elements of X. INCX > 0. +* +* TAU (output) COMPLEX*16 +* The value tau. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER J, KNT + DOUBLE PRECISION ALPHI, ALPHR, BETA, RSAFMN, SAFMIN, XNORM +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, DLAPY3, DZNRM2 + COMPLEX*16 ZLADIV + EXTERNAL DLAMCH, DLAPY3, DZNRM2, ZLADIV +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DIMAG, SIGN +* .. +* .. External Subroutines .. + EXTERNAL ZDSCAL, ZSCAL +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) THEN + TAU = ZERO + RETURN + END IF +* + XNORM = DZNRM2( N-1, X, INCX ) + ALPHR = DBLE( ALPHA ) + ALPHI = DIMAG( ALPHA ) +* + IF( XNORM.EQ.ZERO .AND. ALPHI.EQ.ZERO ) THEN +* +* H = I +* + TAU = ZERO + ELSE +* +* general case +* + BETA = -SIGN( DLAPY3( ALPHR, ALPHI, XNORM ), ALPHR ) + SAFMIN = DLAMCH( 'S' ) / DLAMCH( 'E' ) + RSAFMN = ONE / SAFMIN +* + IF( ABS( BETA ).LT.SAFMIN ) THEN +* +* XNORM, BETA may be inaccurate; scale X and recompute them +* + KNT = 0 + 10 CONTINUE + KNT = KNT + 1 + CALL ZDSCAL( N-1, RSAFMN, X, INCX ) + BETA = BETA*RSAFMN + ALPHI = ALPHI*RSAFMN + ALPHR = ALPHR*RSAFMN + IF( ABS( BETA ).LT.SAFMIN ) + $ GO TO 10 +* +* New BETA is at most 1, at least SAFMIN +* + XNORM = DZNRM2( N-1, X, INCX ) + ALPHA = DCMPLX( ALPHR, ALPHI ) + BETA = -SIGN( DLAPY3( ALPHR, ALPHI, XNORM ), ALPHR ) + TAU = DCMPLX( ( BETA-ALPHR ) / BETA, -ALPHI / BETA ) + ALPHA = ZLADIV( DCMPLX( ONE ), ALPHA-BETA ) + CALL ZSCAL( N-1, ALPHA, X, INCX ) +* +* If ALPHA is subnormal, it may lose relative accuracy +* + ALPHA = BETA + DO 20 J = 1, KNT + ALPHA = ALPHA*SAFMIN + 20 CONTINUE + ELSE + TAU = DCMPLX( ( BETA-ALPHR ) / BETA, -ALPHI / BETA ) + ALPHA = ZLADIV( DCMPLX( ONE ), ALPHA-BETA ) + CALL ZSCAL( N-1, ALPHA, X, INCX ) + ALPHA = BETA + END IF + END IF +* + RETURN +* +* End of ZLARFG +* + END diff --git a/costa/native/external/lapack/zlarft.f b/costa/native/external/lapack/zlarft.f new file mode 100644 index 000000000..cd26a1e6e --- /dev/null +++ b/costa/native/external/lapack/zlarft.f @@ -0,0 +1,225 @@ + SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. + COMPLEX*16 T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* Purpose +* ======= +* +* ZLARFT forms the triangular factor T of a complex block reflector H +* of order n, which is defined as a product of k elementary reflectors. +* +* If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; +* +* If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. +* +* If STOREV = 'C', the vector which defines the elementary reflector +* H(i) is stored in the i-th column of the array V, and +* +* H = I - V * T * V' +* +* If STOREV = 'R', the vector which defines the elementary reflector +* H(i) is stored in the i-th row of the array V, and +* +* H = I - V' * T * V +* +* Arguments +* ========= +* +* DIRECT (input) CHARACTER*1 +* Specifies the order in which the elementary reflectors are +* multiplied to form the block reflector: +* = 'F': H = H(1) H(2) . . . H(k) (Forward) +* = 'B': H = H(k) . . . H(2) H(1) (Backward) +* +* STOREV (input) CHARACTER*1 +* Specifies how the vectors which define the elementary +* reflectors are stored (see also Further Details): +* = 'C': columnwise +* = 'R': rowwise +* +* N (input) INTEGER +* The order of the block reflector H. N >= 0. +* +* K (input) INTEGER +* The order of the triangular factor T (= the number of +* elementary reflectors). K >= 1. +* +* V (input/output) COMPLEX*16 array, dimension +* (LDV,K) if STOREV = 'C' +* (LDV,N) if STOREV = 'R' +* The matrix V. See further details. +* +* LDV (input) INTEGER +* The leading dimension of the array V. +* If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. +* +* TAU (input) COMPLEX*16 array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i). +* +* T (output) COMPLEX*16 array, dimension (LDT,K) +* The k by k triangular factor T of the block reflector. +* If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is +* lower triangular. The rest of the array is not used. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= K. +* +* Further Details +* =============== +* +* The shape of the matrix V and the storage of the vectors which define +* the H(i) is best illustrated by the following example with n = 5 and +* k = 3. The elements equal to 1 are not stored; the corresponding +* array elements are modified but restored on exit. The rest of the +* array is not used. +* +* DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +* +* V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) +* ( v1 1 ) ( 1 v2 v2 v2 ) +* ( v1 v2 1 ) ( 1 v3 v3 ) +* ( v1 v2 v3 ) +* ( v1 v2 v3 ) +* +* DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +* +* V = ( v1 v2 v3 ) V = ( v1 v1 1 ) +* ( v1 v2 v3 ) ( v2 v2 v2 1 ) +* ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) +* ( 1 v3 ) +* ( 1 ) +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, J + COMPLEX*16 VII +* .. +* .. External Subroutines .. + EXTERNAL ZGEMV, ZLACGV, ZTRMV +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 20 I = 1, K + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO 10 J = 1, I + T( J, I ) = ZERO + 10 CONTINUE + ELSE +* +* general case +* + VII = V( I, I ) + V( I, I ) = ONE + IF( LSAME( STOREV, 'C' ) ) THEN +* +* T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i) +* + CALL ZGEMV( 'Conjugate transpose', N-I+1, I-1, + $ -TAU( I ), V( I, 1 ), LDV, V( I, I ), 1, + $ ZERO, T( 1, I ), 1 ) + ELSE +* +* T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)' +* + IF( I.LT.N ) + $ CALL ZLACGV( N-I, V( I, I+1 ), LDV ) + CALL ZGEMV( 'No transpose', I-1, N-I+1, -TAU( I ), + $ V( 1, I ), LDV, V( I, I ), LDV, ZERO, + $ T( 1, I ), 1 ) + IF( I.LT.N ) + $ CALL ZLACGV( N-I, V( I, I+1 ), LDV ) + END IF + V( I, I ) = VII +* +* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) +* + CALL ZTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T, + $ LDT, T( 1, I ), 1 ) + T( I, I ) = TAU( I ) + END IF + 20 CONTINUE + ELSE + DO 40 I = K, 1, -1 + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO 30 J = I, K + T( J, I ) = ZERO + 30 CONTINUE + ELSE +* +* general case +* + IF( I.LT.K ) THEN + IF( LSAME( STOREV, 'C' ) ) THEN + VII = V( N-K+I, I ) + V( N-K+I, I ) = ONE +* +* T(i+1:k,i) := +* - tau(i) * V(1:n-k+i,i+1:k)' * V(1:n-k+i,i) +* + CALL ZGEMV( 'Conjugate transpose', N-K+I, K-I, + $ -TAU( I ), V( 1, I+1 ), LDV, V( 1, I ), + $ 1, ZERO, T( I+1, I ), 1 ) + V( N-K+I, I ) = VII + ELSE + VII = V( I, N-K+I ) + V( I, N-K+I ) = ONE +* +* T(i+1:k,i) := +* - tau(i) * V(i+1:k,1:n-k+i) * V(i,1:n-k+i)' +* + CALL ZLACGV( N-K+I-1, V( I, 1 ), LDV ) + CALL ZGEMV( 'No transpose', K-I, N-K+I, -TAU( I ), + $ V( I+1, 1 ), LDV, V( I, 1 ), LDV, ZERO, + $ T( I+1, I ), 1 ) + CALL ZLACGV( N-K+I-1, V( I, 1 ), LDV ) + V( I, N-K+I ) = VII + END IF +* +* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) +* + CALL ZTRMV( 'Lower', 'No transpose', 'Non-unit', K-I, + $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) + END IF + T( I, I ) = TAU( I ) + END IF + 40 CONTINUE + END IF + RETURN +* +* End of ZLARFT +* + END diff --git a/costa/native/external/lapack/zlarfx.f b/costa/native/external/lapack/zlarfx.f new file mode 100644 index 000000000..8627cb65e --- /dev/null +++ b/costa/native/external/lapack/zlarfx.f @@ -0,0 +1,642 @@ + SUBROUTINE ZLARFX( SIDE, M, N, V, TAU, C, LDC, WORK ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER LDC, M, N + COMPLEX*16 TAU +* .. +* .. Array Arguments .. + COMPLEX*16 C( LDC, * ), V( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZLARFX applies a complex elementary reflector H to a complex m by n +* matrix C, from either the left or the right. H is represented in the +* form +* +* H = I - tau * v * v' +* +* where tau is a complex scalar and v is a complex vector. +* +* If tau = 0, then H is taken to be the unit matrix +* +* This version uses inline code if H has order < 11. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': form H * C +* = 'R': form C * H +* +* M (input) INTEGER +* The number of rows of the matrix C. +* +* N (input) INTEGER +* The number of columns of the matrix C. +* +* V (input) COMPLEX*16 array, dimension (M) if SIDE = 'L' +* or (N) if SIDE = 'R' +* The vector v in the representation of H. +* +* TAU (input) COMPLEX*16 +* The value tau in the representation of H. +* +* C (input/output) COMPLEX*16 array, dimension (LDC,N) +* On entry, the m by n matrix C. +* On exit, C is overwritten by the matrix H * C if SIDE = 'L', +* or C * H if SIDE = 'R'. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDA >= max(1,M). +* +* WORK (workspace) COMPLEX*16 array, dimension (N) if SIDE = 'L' +* or (M) if SIDE = 'R' +* WORK is not referenced if H has order < 11. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), + $ ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER J + COMPLEX*16 SUM, T1, T10, T2, T3, T4, T5, T6, T7, T8, T9, + $ V1, V10, V2, V3, V4, V5, V6, V7, V8, V9 +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZGEMV, ZGERC +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG +* .. +* .. Executable Statements .. +* + IF( TAU.EQ.ZERO ) + $ RETURN + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C, where H has order m. +* + GO TO ( 10, 30, 50, 70, 90, 110, 130, 150, + $ 170, 190 )M +* +* Code for general M +* +* w := C'*v +* + CALL ZGEMV( 'Conjugate transpose', M, N, ONE, C, LDC, V, 1, + $ ZERO, WORK, 1 ) +* +* C := C - tau * v * w' +* + CALL ZGERC( M, N, -TAU, V, 1, WORK, 1, C, LDC ) + GO TO 410 + 10 CONTINUE +* +* Special code for 1 x 1 Householder +* + T1 = ONE - TAU*V( 1 )*DCONJG( V( 1 ) ) + DO 20 J = 1, N + C( 1, J ) = T1*C( 1, J ) + 20 CONTINUE + GO TO 410 + 30 CONTINUE +* +* Special code for 2 x 2 Householder +* + V1 = DCONJG( V( 1 ) ) + T1 = TAU*DCONJG( V1 ) + V2 = DCONJG( V( 2 ) ) + T2 = TAU*DCONJG( V2 ) + DO 40 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + 40 CONTINUE + GO TO 410 + 50 CONTINUE +* +* Special code for 3 x 3 Householder +* + V1 = DCONJG( V( 1 ) ) + T1 = TAU*DCONJG( V1 ) + V2 = DCONJG( V( 2 ) ) + T2 = TAU*DCONJG( V2 ) + V3 = DCONJG( V( 3 ) ) + T3 = TAU*DCONJG( V3 ) + DO 60 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + 60 CONTINUE + GO TO 410 + 70 CONTINUE +* +* Special code for 4 x 4 Householder +* + V1 = DCONJG( V( 1 ) ) + T1 = TAU*DCONJG( V1 ) + V2 = DCONJG( V( 2 ) ) + T2 = TAU*DCONJG( V2 ) + V3 = DCONJG( V( 3 ) ) + T3 = TAU*DCONJG( V3 ) + V4 = DCONJG( V( 4 ) ) + T4 = TAU*DCONJG( V4 ) + DO 80 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + 80 CONTINUE + GO TO 410 + 90 CONTINUE +* +* Special code for 5 x 5 Householder +* + V1 = DCONJG( V( 1 ) ) + T1 = TAU*DCONJG( V1 ) + V2 = DCONJG( V( 2 ) ) + T2 = TAU*DCONJG( V2 ) + V3 = DCONJG( V( 3 ) ) + T3 = TAU*DCONJG( V3 ) + V4 = DCONJG( V( 4 ) ) + T4 = TAU*DCONJG( V4 ) + V5 = DCONJG( V( 5 ) ) + T5 = TAU*DCONJG( V5 ) + DO 100 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + 100 CONTINUE + GO TO 410 + 110 CONTINUE +* +* Special code for 6 x 6 Householder +* + V1 = DCONJG( V( 1 ) ) + T1 = TAU*DCONJG( V1 ) + V2 = DCONJG( V( 2 ) ) + T2 = TAU*DCONJG( V2 ) + V3 = DCONJG( V( 3 ) ) + T3 = TAU*DCONJG( V3 ) + V4 = DCONJG( V( 4 ) ) + T4 = TAU*DCONJG( V4 ) + V5 = DCONJG( V( 5 ) ) + T5 = TAU*DCONJG( V5 ) + V6 = DCONJG( V( 6 ) ) + T6 = TAU*DCONJG( V6 ) + DO 120 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + 120 CONTINUE + GO TO 410 + 130 CONTINUE +* +* Special code for 7 x 7 Householder +* + V1 = DCONJG( V( 1 ) ) + T1 = TAU*DCONJG( V1 ) + V2 = DCONJG( V( 2 ) ) + T2 = TAU*DCONJG( V2 ) + V3 = DCONJG( V( 3 ) ) + T3 = TAU*DCONJG( V3 ) + V4 = DCONJG( V( 4 ) ) + T4 = TAU*DCONJG( V4 ) + V5 = DCONJG( V( 5 ) ) + T5 = TAU*DCONJG( V5 ) + V6 = DCONJG( V( 6 ) ) + T6 = TAU*DCONJG( V6 ) + V7 = DCONJG( V( 7 ) ) + T7 = TAU*DCONJG( V7 ) + DO 140 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + + $ V7*C( 7, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + C( 7, J ) = C( 7, J ) - SUM*T7 + 140 CONTINUE + GO TO 410 + 150 CONTINUE +* +* Special code for 8 x 8 Householder +* + V1 = DCONJG( V( 1 ) ) + T1 = TAU*DCONJG( V1 ) + V2 = DCONJG( V( 2 ) ) + T2 = TAU*DCONJG( V2 ) + V3 = DCONJG( V( 3 ) ) + T3 = TAU*DCONJG( V3 ) + V4 = DCONJG( V( 4 ) ) + T4 = TAU*DCONJG( V4 ) + V5 = DCONJG( V( 5 ) ) + T5 = TAU*DCONJG( V5 ) + V6 = DCONJG( V( 6 ) ) + T6 = TAU*DCONJG( V6 ) + V7 = DCONJG( V( 7 ) ) + T7 = TAU*DCONJG( V7 ) + V8 = DCONJG( V( 8 ) ) + T8 = TAU*DCONJG( V8 ) + DO 160 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + + $ V7*C( 7, J ) + V8*C( 8, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + C( 7, J ) = C( 7, J ) - SUM*T7 + C( 8, J ) = C( 8, J ) - SUM*T8 + 160 CONTINUE + GO TO 410 + 170 CONTINUE +* +* Special code for 9 x 9 Householder +* + V1 = DCONJG( V( 1 ) ) + T1 = TAU*DCONJG( V1 ) + V2 = DCONJG( V( 2 ) ) + T2 = TAU*DCONJG( V2 ) + V3 = DCONJG( V( 3 ) ) + T3 = TAU*DCONJG( V3 ) + V4 = DCONJG( V( 4 ) ) + T4 = TAU*DCONJG( V4 ) + V5 = DCONJG( V( 5 ) ) + T5 = TAU*DCONJG( V5 ) + V6 = DCONJG( V( 6 ) ) + T6 = TAU*DCONJG( V6 ) + V7 = DCONJG( V( 7 ) ) + T7 = TAU*DCONJG( V7 ) + V8 = DCONJG( V( 8 ) ) + T8 = TAU*DCONJG( V8 ) + V9 = DCONJG( V( 9 ) ) + T9 = TAU*DCONJG( V9 ) + DO 180 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + + $ V7*C( 7, J ) + V8*C( 8, J ) + V9*C( 9, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + C( 7, J ) = C( 7, J ) - SUM*T7 + C( 8, J ) = C( 8, J ) - SUM*T8 + C( 9, J ) = C( 9, J ) - SUM*T9 + 180 CONTINUE + GO TO 410 + 190 CONTINUE +* +* Special code for 10 x 10 Householder +* + V1 = DCONJG( V( 1 ) ) + T1 = TAU*DCONJG( V1 ) + V2 = DCONJG( V( 2 ) ) + T2 = TAU*DCONJG( V2 ) + V3 = DCONJG( V( 3 ) ) + T3 = TAU*DCONJG( V3 ) + V4 = DCONJG( V( 4 ) ) + T4 = TAU*DCONJG( V4 ) + V5 = DCONJG( V( 5 ) ) + T5 = TAU*DCONJG( V5 ) + V6 = DCONJG( V( 6 ) ) + T6 = TAU*DCONJG( V6 ) + V7 = DCONJG( V( 7 ) ) + T7 = TAU*DCONJG( V7 ) + V8 = DCONJG( V( 8 ) ) + T8 = TAU*DCONJG( V8 ) + V9 = DCONJG( V( 9 ) ) + T9 = TAU*DCONJG( V9 ) + V10 = DCONJG( V( 10 ) ) + T10 = TAU*DCONJG( V10 ) + DO 200 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + + $ V7*C( 7, J ) + V8*C( 8, J ) + V9*C( 9, J ) + + $ V10*C( 10, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + C( 7, J ) = C( 7, J ) - SUM*T7 + C( 8, J ) = C( 8, J ) - SUM*T8 + C( 9, J ) = C( 9, J ) - SUM*T9 + C( 10, J ) = C( 10, J ) - SUM*T10 + 200 CONTINUE + GO TO 410 + ELSE +* +* Form C * H, where H has order n. +* + GO TO ( 210, 230, 250, 270, 290, 310, 330, 350, + $ 370, 390 )N +* +* Code for general N +* +* w := C * v +* + CALL ZGEMV( 'No transpose', M, N, ONE, C, LDC, V, 1, ZERO, + $ WORK, 1 ) +* +* C := C - tau * w * v' +* + CALL ZGERC( M, N, -TAU, WORK, 1, V, 1, C, LDC ) + GO TO 410 + 210 CONTINUE +* +* Special code for 1 x 1 Householder +* + T1 = ONE - TAU*V( 1 )*DCONJG( V( 1 ) ) + DO 220 J = 1, M + C( J, 1 ) = T1*C( J, 1 ) + 220 CONTINUE + GO TO 410 + 230 CONTINUE +* +* Special code for 2 x 2 Householder +* + V1 = V( 1 ) + T1 = TAU*DCONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*DCONJG( V2 ) + DO 240 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + 240 CONTINUE + GO TO 410 + 250 CONTINUE +* +* Special code for 3 x 3 Householder +* + V1 = V( 1 ) + T1 = TAU*DCONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*DCONJG( V2 ) + V3 = V( 3 ) + T3 = TAU*DCONJG( V3 ) + DO 260 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + 260 CONTINUE + GO TO 410 + 270 CONTINUE +* +* Special code for 4 x 4 Householder +* + V1 = V( 1 ) + T1 = TAU*DCONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*DCONJG( V2 ) + V3 = V( 3 ) + T3 = TAU*DCONJG( V3 ) + V4 = V( 4 ) + T4 = TAU*DCONJG( V4 ) + DO 280 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + 280 CONTINUE + GO TO 410 + 290 CONTINUE +* +* Special code for 5 x 5 Householder +* + V1 = V( 1 ) + T1 = TAU*DCONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*DCONJG( V2 ) + V3 = V( 3 ) + T3 = TAU*DCONJG( V3 ) + V4 = V( 4 ) + T4 = TAU*DCONJG( V4 ) + V5 = V( 5 ) + T5 = TAU*DCONJG( V5 ) + DO 300 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + 300 CONTINUE + GO TO 410 + 310 CONTINUE +* +* Special code for 6 x 6 Householder +* + V1 = V( 1 ) + T1 = TAU*DCONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*DCONJG( V2 ) + V3 = V( 3 ) + T3 = TAU*DCONJG( V3 ) + V4 = V( 4 ) + T4 = TAU*DCONJG( V4 ) + V5 = V( 5 ) + T5 = TAU*DCONJG( V5 ) + V6 = V( 6 ) + T6 = TAU*DCONJG( V6 ) + DO 320 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + 320 CONTINUE + GO TO 410 + 330 CONTINUE +* +* Special code for 7 x 7 Householder +* + V1 = V( 1 ) + T1 = TAU*DCONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*DCONJG( V2 ) + V3 = V( 3 ) + T3 = TAU*DCONJG( V3 ) + V4 = V( 4 ) + T4 = TAU*DCONJG( V4 ) + V5 = V( 5 ) + T5 = TAU*DCONJG( V5 ) + V6 = V( 6 ) + T6 = TAU*DCONJG( V6 ) + V7 = V( 7 ) + T7 = TAU*DCONJG( V7 ) + DO 340 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + + $ V7*C( J, 7 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + C( J, 7 ) = C( J, 7 ) - SUM*T7 + 340 CONTINUE + GO TO 410 + 350 CONTINUE +* +* Special code for 8 x 8 Householder +* + V1 = V( 1 ) + T1 = TAU*DCONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*DCONJG( V2 ) + V3 = V( 3 ) + T3 = TAU*DCONJG( V3 ) + V4 = V( 4 ) + T4 = TAU*DCONJG( V4 ) + V5 = V( 5 ) + T5 = TAU*DCONJG( V5 ) + V6 = V( 6 ) + T6 = TAU*DCONJG( V6 ) + V7 = V( 7 ) + T7 = TAU*DCONJG( V7 ) + V8 = V( 8 ) + T8 = TAU*DCONJG( V8 ) + DO 360 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + + $ V7*C( J, 7 ) + V8*C( J, 8 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + C( J, 7 ) = C( J, 7 ) - SUM*T7 + C( J, 8 ) = C( J, 8 ) - SUM*T8 + 360 CONTINUE + GO TO 410 + 370 CONTINUE +* +* Special code for 9 x 9 Householder +* + V1 = V( 1 ) + T1 = TAU*DCONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*DCONJG( V2 ) + V3 = V( 3 ) + T3 = TAU*DCONJG( V3 ) + V4 = V( 4 ) + T4 = TAU*DCONJG( V4 ) + V5 = V( 5 ) + T5 = TAU*DCONJG( V5 ) + V6 = V( 6 ) + T6 = TAU*DCONJG( V6 ) + V7 = V( 7 ) + T7 = TAU*DCONJG( V7 ) + V8 = V( 8 ) + T8 = TAU*DCONJG( V8 ) + V9 = V( 9 ) + T9 = TAU*DCONJG( V9 ) + DO 380 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + + $ V7*C( J, 7 ) + V8*C( J, 8 ) + V9*C( J, 9 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + C( J, 7 ) = C( J, 7 ) - SUM*T7 + C( J, 8 ) = C( J, 8 ) - SUM*T8 + C( J, 9 ) = C( J, 9 ) - SUM*T9 + 380 CONTINUE + GO TO 410 + 390 CONTINUE +* +* Special code for 10 x 10 Householder +* + V1 = V( 1 ) + T1 = TAU*DCONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*DCONJG( V2 ) + V3 = V( 3 ) + T3 = TAU*DCONJG( V3 ) + V4 = V( 4 ) + T4 = TAU*DCONJG( V4 ) + V5 = V( 5 ) + T5 = TAU*DCONJG( V5 ) + V6 = V( 6 ) + T6 = TAU*DCONJG( V6 ) + V7 = V( 7 ) + T7 = TAU*DCONJG( V7 ) + V8 = V( 8 ) + T8 = TAU*DCONJG( V8 ) + V9 = V( 9 ) + T9 = TAU*DCONJG( V9 ) + V10 = V( 10 ) + T10 = TAU*DCONJG( V10 ) + DO 400 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + + $ V7*C( J, 7 ) + V8*C( J, 8 ) + V9*C( J, 9 ) + + $ V10*C( J, 10 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + C( J, 7 ) = C( J, 7 ) - SUM*T7 + C( J, 8 ) = C( J, 8 ) - SUM*T8 + C( J, 9 ) = C( J, 9 ) - SUM*T9 + C( J, 10 ) = C( J, 10 ) - SUM*T10 + 400 CONTINUE + GO TO 410 + END IF + 410 CONTINUE + RETURN +* +* End of ZLARFX +* + END diff --git a/costa/native/external/lapack/zlargv.f b/costa/native/external/lapack/zlargv.f new file mode 100644 index 000000000..f51ee9b95 --- /dev/null +++ b/costa/native/external/lapack/zlargv.f @@ -0,0 +1,225 @@ + SUBROUTINE ZLARGV( N, X, INCX, Y, INCY, C, INCC ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INCC, INCX, INCY, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( * ) + COMPLEX*16 X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* ZLARGV generates a vector of complex plane rotations with real +* cosines, determined by elements of the complex vectors x and y. +* For i = 1,2,...,n +* +* ( c(i) s(i) ) ( x(i) ) = ( r(i) ) +* ( -conjg(s(i)) c(i) ) ( y(i) ) = ( 0 ) +* +* where c(i)**2 + ABS(s(i))**2 = 1 +* +* The following conventions are used (these are the same as in ZLARTG, +* but differ from the BLAS1 routine ZROTG): +* If y(i)=0, then c(i)=1 and s(i)=0. +* If x(i)=0, then c(i)=0 and s(i) is chosen so that r(i) is real. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of plane rotations to be generated. +* +* X (input/output) COMPLEX*16 array, dimension (1+(N-1)*INCX) +* On entry, the vector x. +* On exit, x(i) is overwritten by r(i), for i = 1,...,n. +* +* INCX (input) INTEGER +* The increment between elements of X. INCX > 0. +* +* Y (input/output) COMPLEX*16 array, dimension (1+(N-1)*INCY) +* On entry, the vector y. +* On exit, the sines of the plane rotations. +* +* INCY (input) INTEGER +* The increment between elements of Y. INCY > 0. +* +* C (output) DOUBLE PRECISION array, dimension (1+(N-1)*INCC) +* The cosines of the plane rotations. +* +* INCC (input) INTEGER +* The increment between elements of C. INCC > 0. +* +* Further Details +* ======= ======= +* +* 6-6-96 - Modified with a new algorithm by W. Kahan and J. Demmel +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION TWO, ONE, ZERO + PARAMETER ( TWO = 2.0D+0, ONE = 1.0D+0, ZERO = 0.0D+0 ) + COMPLEX*16 CZERO + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL FIRST + INTEGER COUNT, I, IC, IX, IY, J + DOUBLE PRECISION CS, D, DI, DR, EPS, F2, F2S, G2, G2S, SAFMIN, + $ SAFMN2, SAFMX2, SCALE + COMPLEX*16 F, FF, FS, G, GS, R, SN +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, DLAPY2 + EXTERNAL DLAMCH, DLAPY2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG, INT, LOG, + $ MAX, SQRT +* .. +* .. Statement Functions .. + DOUBLE PRECISION ABS1, ABSSQ +* .. +* .. Save statement .. + SAVE FIRST, SAFMX2, SAFMIN, SAFMN2 +* .. +* .. Data statements .. + DATA FIRST / .TRUE. / +* .. +* .. Statement Function definitions .. + ABS1( FF ) = MAX( ABS( DBLE( FF ) ), ABS( DIMAG( FF ) ) ) + ABSSQ( FF ) = DBLE( FF )**2 + DIMAG( FF )**2 +* .. +* .. Executable Statements .. +* + IF( FIRST ) THEN + FIRST = .FALSE. + SAFMIN = DLAMCH( 'S' ) + EPS = DLAMCH( 'E' ) + SAFMN2 = DLAMCH( 'B' )**INT( LOG( SAFMIN / EPS ) / + $ LOG( DLAMCH( 'B' ) ) / TWO ) + SAFMX2 = ONE / SAFMN2 + END IF + IX = 1 + IY = 1 + IC = 1 + DO 60 I = 1, N + F = X( IX ) + G = Y( IY ) +* +* Use identical algorithm as in ZLARTG +* + SCALE = MAX( ABS1( F ), ABS1( G ) ) + FS = F + GS = G + COUNT = 0 + IF( SCALE.GE.SAFMX2 ) THEN + 10 CONTINUE + COUNT = COUNT + 1 + FS = FS*SAFMN2 + GS = GS*SAFMN2 + SCALE = SCALE*SAFMN2 + IF( SCALE.GE.SAFMX2 ) + $ GO TO 10 + ELSE IF( SCALE.LE.SAFMN2 ) THEN + IF( G.EQ.CZERO ) THEN + CS = ONE + SN = CZERO + R = F + GO TO 50 + END IF + 20 CONTINUE + COUNT = COUNT - 1 + FS = FS*SAFMX2 + GS = GS*SAFMX2 + SCALE = SCALE*SAFMX2 + IF( SCALE.LE.SAFMN2 ) + $ GO TO 20 + END IF + F2 = ABSSQ( FS ) + G2 = ABSSQ( GS ) + IF( F2.LE.MAX( G2, ONE )*SAFMIN ) THEN +* +* This is a rare case: F is very small. +* + IF( F.EQ.CZERO ) THEN + CS = ZERO + R = DLAPY2( DBLE( G ), DIMAG( G ) ) +* Do complex/real division explicitly with two real +* divisions + D = DLAPY2( DBLE( GS ), DIMAG( GS ) ) + SN = DCMPLX( DBLE( GS ) / D, -DIMAG( GS ) / D ) + GO TO 50 + END IF + F2S = DLAPY2( DBLE( FS ), DIMAG( FS ) ) +* G2 and G2S are accurate +* G2 is at least SAFMIN, and G2S is at least SAFMN2 + G2S = SQRT( G2 ) +* Error in CS from underflow in F2S is at most +* UNFL / SAFMN2 .lt. sqrt(UNFL*EPS) .lt. EPS +* If MAX(G2,ONE)=G2, then F2 .lt. G2*SAFMIN, +* and so CS .lt. sqrt(SAFMIN) +* If MAX(G2,ONE)=ONE, then F2 .lt. SAFMIN +* and so CS .lt. sqrt(SAFMIN)/SAFMN2 = sqrt(EPS) +* Therefore, CS = F2S/G2S / sqrt( 1 + (F2S/G2S)**2 ) = F2S/G2S + CS = F2S / G2S +* Make sure abs(FF) = 1 +* Do complex/real division explicitly with 2 real divisions + IF( ABS1( F ).GT.ONE ) THEN + D = DLAPY2( DBLE( F ), DIMAG( F ) ) + FF = DCMPLX( DBLE( F ) / D, DIMAG( F ) / D ) + ELSE + DR = SAFMX2*DBLE( F ) + DI = SAFMX2*DIMAG( F ) + D = DLAPY2( DR, DI ) + FF = DCMPLX( DR / D, DI / D ) + END IF + SN = FF*DCMPLX( DBLE( GS ) / G2S, -DIMAG( GS ) / G2S ) + R = CS*F + SN*G + ELSE +* +* This is the most common case. +* Neither F2 nor F2/G2 are less than SAFMIN +* F2S cannot overflow, and it is accurate +* + F2S = SQRT( ONE+G2 / F2 ) +* Do the F2S(real)*FS(complex) multiply with two real +* multiplies + R = DCMPLX( F2S*DBLE( FS ), F2S*DIMAG( FS ) ) + CS = ONE / F2S + D = F2 + G2 +* Do complex/real division explicitly with two real divisions + SN = DCMPLX( DBLE( R ) / D, DIMAG( R ) / D ) + SN = SN*DCONJG( GS ) + IF( COUNT.NE.0 ) THEN + IF( COUNT.GT.0 ) THEN + DO 30 J = 1, COUNT + R = R*SAFMX2 + 30 CONTINUE + ELSE + DO 40 J = 1, -COUNT + R = R*SAFMN2 + 40 CONTINUE + END IF + END IF + END IF + 50 CONTINUE + C( IC ) = CS + Y( IY ) = SN + X( IX ) = R + IC = IC + INCC + IY = IY + INCY + IX = IX + INCX + 60 CONTINUE + RETURN +* +* End of ZLARGV +* + END diff --git a/costa/native/external/lapack/zlarnv.f b/costa/native/external/lapack/zlarnv.f new file mode 100644 index 000000000..3cad52f03 --- /dev/null +++ b/costa/native/external/lapack/zlarnv.f @@ -0,0 +1,131 @@ + SUBROUTINE ZLARNV( IDIST, ISEED, N, X ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER IDIST, N +* .. +* .. Array Arguments .. + INTEGER ISEED( 4 ) + COMPLEX*16 X( * ) +* .. +* +* Purpose +* ======= +* +* ZLARNV returns a vector of n random complex numbers from a uniform or +* normal distribution. +* +* Arguments +* ========= +* +* IDIST (input) INTEGER +* Specifies the distribution of the random numbers: +* = 1: real and imaginary parts each uniform (0,1) +* = 2: real and imaginary parts each uniform (-1,1) +* = 3: real and imaginary parts each normal (0,1) +* = 4: uniformly distributed on the disc abs(z) < 1 +* = 5: uniformly distributed on the circle abs(z) = 1 +* +* ISEED (input/output) INTEGER array, dimension (4) +* On entry, the seed of the random number generator; the array +* elements must be between 0 and 4095, and ISEED(4) must be +* odd. +* On exit, the seed is updated. +* +* N (input) INTEGER +* The number of random numbers to be generated. +* +* X (output) COMPLEX*16 array, dimension (N) +* The generated random numbers. +* +* Further Details +* =============== +* +* This routine calls the auxiliary routine DLARUV to generate random +* real numbers from a uniform (0,1) distribution, in batches of up to +* 128 using vectorisable code. The Box-Muller method is used to +* transform numbers from a uniform to a normal distribution. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0 ) + INTEGER LV + PARAMETER ( LV = 128 ) + DOUBLE PRECISION TWOPI + PARAMETER ( TWOPI = 6.2831853071795864769252867663D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IL, IV +* .. +* .. Local Arrays .. + DOUBLE PRECISION U( LV ) +* .. +* .. Intrinsic Functions .. + INTRINSIC DCMPLX, EXP, LOG, MIN, SQRT +* .. +* .. External Subroutines .. + EXTERNAL DLARUV +* .. +* .. Executable Statements .. +* + DO 60 IV = 1, N, LV / 2 + IL = MIN( LV / 2, N-IV+1 ) +* +* Call DLARUV to generate 2*IL real numbers from a uniform (0,1) +* distribution (2*IL <= LV) +* + CALL DLARUV( ISEED, 2*IL, U ) +* + IF( IDIST.EQ.1 ) THEN +* +* Copy generated numbers +* + DO 10 I = 1, IL + X( IV+I-1 ) = DCMPLX( U( 2*I-1 ), U( 2*I ) ) + 10 CONTINUE + ELSE IF( IDIST.EQ.2 ) THEN +* +* Convert generated numbers to uniform (-1,1) distribution +* + DO 20 I = 1, IL + X( IV+I-1 ) = DCMPLX( TWO*U( 2*I-1 )-ONE, + $ TWO*U( 2*I )-ONE ) + 20 CONTINUE + ELSE IF( IDIST.EQ.3 ) THEN +* +* Convert generated numbers to normal (0,1) distribution +* + DO 30 I = 1, IL + X( IV+I-1 ) = SQRT( -TWO*LOG( U( 2*I-1 ) ) )* + $ EXP( DCMPLX( ZERO, TWOPI*U( 2*I ) ) ) + 30 CONTINUE + ELSE IF( IDIST.EQ.4 ) THEN +* +* Convert generated numbers to complex numbers uniformly +* distributed on the unit disk +* + DO 40 I = 1, IL + X( IV+I-1 ) = SQRT( U( 2*I-1 ) )* + $ EXP( DCMPLX( ZERO, TWOPI*U( 2*I ) ) ) + 40 CONTINUE + ELSE IF( IDIST.EQ.5 ) THEN +* +* Convert generated numbers to complex numbers uniformly +* distributed on the unit circle +* + DO 50 I = 1, IL + X( IV+I-1 ) = EXP( DCMPLX( ZERO, TWOPI*U( 2*I ) ) ) + 50 CONTINUE + END IF + 60 CONTINUE + RETURN +* +* End of ZLARNV +* + END diff --git a/costa/native/external/lapack/zlarrv.f b/costa/native/external/lapack/zlarrv.f new file mode 100644 index 000000000..d44fd7293 --- /dev/null +++ b/costa/native/external/lapack/zlarrv.f @@ -0,0 +1,437 @@ + SUBROUTINE ZLARRV( N, D, L, ISPLIT, M, W, IBLOCK, GERSCH, TOL, Z, + $ LDZ, ISUPPZ, WORK, IWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDZ, M, N + DOUBLE PRECISION TOL +* .. +* .. Array Arguments .. + INTEGER IBLOCK( * ), ISPLIT( * ), ISUPPZ( * ), + $ IWORK( * ) + DOUBLE PRECISION D( * ), GERSCH( * ), L( * ), W( * ), WORK( * ) + COMPLEX*16 Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* ZLARRV computes the eigenvectors of the tridiagonal matrix +* T = L D L^T given L, D and the eigenvalues of L D L^T. +* The input eigenvalues should have high relative accuracy with +* respect to the entries of L and D. The desired accuracy of the +* output can be specified by the input parameter TOL. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix. N >= 0. +* +* D (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the n diagonal elements of the diagonal matrix D. +* On exit, D may be overwritten. +* +* L (input/output) DOUBLE PRECISION array, dimension (N-1) +* On entry, the (n-1) subdiagonal elements of the unit +* bidiagonal matrix L in elements 1 to N-1 of L. L(N) need +* not be set. On exit, L is overwritten. +* +* ISPLIT (input) INTEGER array, dimension (N) +* The splitting points, at which T breaks up into submatrices. +* The first submatrix consists of rows/columns 1 to +* ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 +* through ISPLIT( 2 ), etc. +* +* TOL (input) DOUBLE PRECISION +* The absolute error tolerance for the +* eigenvalues/eigenvectors. +* Errors in the input eigenvalues must be bounded by TOL. +* The eigenvectors output have residual norms +* bounded by TOL, and the dot products between different +* eigenvectors are bounded by TOL. TOL must be at least +* N*EPS*|T|, where EPS is the machine precision and |T| is +* the 1-norm of the tridiagonal matrix. +* +* M (input) INTEGER +* The total number of eigenvalues found. 0 <= M <= N. +* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +* +* W (input) DOUBLE PRECISION array, dimension (N) +* The first M elements of W contain the eigenvalues for +* which eigenvectors are to be computed. The eigenvalues +* should be grouped by split-off block and ordered from +* smallest to largest within the block ( The output array +* W from DLARRE is expected here ). +* Errors in W must be bounded by TOL (see above). +* +* IBLOCK (input) INTEGER array, dimension (N) +* The submatrix indices associated with the corresponding +* eigenvalues in W; IBLOCK(i)=1 if eigenvalue W(i) belongs to +* the first submatrix from the top, =2 if W(i) belongs to +* the second submatrix, etc. +* +* Z (output) COMPLEX*16 array, dimension (LDZ, max(1,M) ) +* If JOBZ = 'V', then if INFO = 0, the first M columns of Z +* contain the orthonormal eigenvectors of the matrix T +* corresponding to the selected eigenvalues, with the i-th +* column of Z holding the eigenvector associated with W(i). +* If JOBZ = 'N', then Z is not referenced. +* Note: the user must ensure that at least max(1,M) columns are +* supplied in the array Z; if RANGE = 'V', the exact value of M +* is not known in advance and an upper bound must be used. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* ISUPPZ (output) INTEGER ARRAY, dimension ( 2*max(1,M) ) +* The support of the eigenvectors in Z, i.e., the indices +* indicating the nonzero elements in Z. The i-th eigenvector +* is nonzero only in elements ISUPPZ( 2*i-1 ) through +* ISUPPZ( 2*i ). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (13*N) +* +* IWORK (workspace) INTEGER array, dimension (6*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = 1, internal error in DLARRB +* if INFO = 2, internal error in ZSTEIN +* +* Further Details +* =============== +* +* Based on contributions by +* Inderjit Dhillon, IBM Almaden, USA +* Osni Marques, LBNL/NERSC, USA +* Ken Stanley, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + INTEGER MGSSIZ + PARAMETER ( MGSSIZ = 20 ) + DOUBLE PRECISION ZERO, ONE, FOUR + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, FOUR = 4.0D0 ) + COMPLEX*16 CZERO + PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ) ) +* .. +* .. Local Scalars .. + LOGICAL MGSCLS + INTEGER I, IBEGIN, IEND, IINDC1, IINDC2, IINDR, IINDWK, + $ IINFO, IM, IN, INDERR, INDGAP, INDIN1, INDIN2, + $ INDLD, INDLLD, INDWRK, ITER, ITMP1, ITMP2, J, + $ JBLK, K, KTOT, LSBDPT, MAXITR, NCLUS, NDEPTH, + $ NDONE, NEWCLS, NEWFRS, NEWFTT, NEWLST, NEWSIZ, + $ NSPLIT, OLDCLS, OLDFST, OLDIEN, OLDLST, OLDNCL, + $ P, Q + DOUBLE PRECISION EPS, GAP, LAMBDA, MGSTOL, MINGMA, MINRGP, + $ NRMINV, RELGAP, RELTOL, RESID, RQCORR, SIGMA, + $ TMP1, ZTZ +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, DZNRM2 + COMPLEX*16 ZDOTU + EXTERNAL DLAMCH, DZNRM2, ZDOTU +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLARRB, DLARRF, ZAXPY, ZDSCAL, ZLAR1V, + $ ZLASET, ZSTEIN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, MAX, MIN, SQRT +* .. +* .. Local Arrays .. + INTEGER TEMP( 1 ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INDERR = N + 1 + INDLD = 2*N + INDLLD = 3*N + INDGAP = 4*N + INDIN1 = 5*N + 1 + INDIN2 = 6*N + 1 + INDWRK = 7*N + 1 +* + IINDR = N + IINDC1 = 2*N + IINDC2 = 3*N + IINDWK = 4*N + 1 +* + EPS = DLAMCH( 'Precision' ) +* + DO 10 I = 1, 2*N + IWORK( I ) = 0 + 10 CONTINUE + DO 20 I = 1, M + WORK( INDERR+I-1 ) = EPS*ABS( W( I ) ) + 20 CONTINUE + CALL ZLASET( 'Full', N, N, CZERO, CZERO, Z, LDZ ) + MGSTOL = 5.0D0*EPS +* + NSPLIT = IBLOCK( M ) + IBEGIN = 1 + DO 190 JBLK = 1, NSPLIT + IEND = ISPLIT( JBLK ) +* +* Find the eigenvectors of the submatrix indexed IBEGIN +* through IEND. +* + IF( IBEGIN.EQ.IEND ) THEN + Z( IBEGIN, IBEGIN ) = ONE + ISUPPZ( 2*IBEGIN-1 ) = IBEGIN + ISUPPZ( 2*IBEGIN ) = IBEGIN + IBEGIN = IEND + 1 + GO TO 190 + END IF + OLDIEN = IBEGIN - 1 + IN = IEND - OLDIEN + RELTOL = MIN( 1.0D-2, ONE / DBLE( IN ) ) + IM = IN + CALL DCOPY( IM, W( IBEGIN ), 1, WORK, 1 ) + DO 30 I = 1, IN - 1 + WORK( INDGAP+I ) = WORK( I+1 ) - WORK( I ) + 30 CONTINUE + WORK( INDGAP+IN ) = MAX( ABS( WORK( IN ) ), EPS ) + NDONE = 0 +* + NDEPTH = 0 + LSBDPT = 1 + NCLUS = 1 + IWORK( IINDC1+1 ) = 1 + IWORK( IINDC1+2 ) = IN +* +* While( NDONE.LT.IM ) do +* + 40 CONTINUE + IF( NDONE.LT.IM ) THEN + OLDNCL = NCLUS + NCLUS = 0 + LSBDPT = 1 - LSBDPT + DO 170 I = 1, OLDNCL + IF( LSBDPT.EQ.0 ) THEN + OLDCLS = IINDC1 + NEWCLS = IINDC2 + ELSE + OLDCLS = IINDC2 + NEWCLS = IINDC1 + END IF +* +* If NDEPTH > 1, retrieve the relatively robust +* representation (RRR) and perform limited bisection +* (if necessary) to get approximate eigenvalues. +* + J = OLDCLS + 2*I + OLDFST = IWORK( J-1 ) + OLDLST = IWORK( J ) + IF( NDEPTH.GT.0 ) THEN + J = OLDIEN + OLDFST + DO 50 K = 1, IN + D( IBEGIN+K-1 ) = DBLE( Z( IBEGIN+K-1, + $ OLDIEN+OLDFST ) ) + L( IBEGIN+K-1 ) = DBLE( Z( IBEGIN+K-1, + $ OLDIEN+OLDFST+1 ) ) + 50 CONTINUE + SIGMA = L( IEND ) + END IF + K = IBEGIN + DO 60 J = 1, IN - 1 + WORK( INDLD+J ) = D( K )*L( K ) + WORK( INDLLD+J ) = WORK( INDLD+J )*L( K ) + K = K + 1 + 60 CONTINUE + IF( NDEPTH.GT.0 ) THEN + CALL DLARRB( IN, D( IBEGIN ), L( IBEGIN ), + $ WORK( INDLD+1 ), WORK( INDLLD+1 ), + $ OLDFST, OLDLST, SIGMA, RELTOL, WORK, + $ WORK( INDGAP+1 ), WORK( INDERR ), + $ WORK( INDWRK ), IWORK( IINDWK ), IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = 1 + RETURN + END IF + END IF +* +* Classify eigenvalues of the current representation (RRR) +* as (i) isolated, (ii) loosely clustered or (iii) tightly +* clustered +* + NEWFRS = OLDFST + DO 160 J = OLDFST, OLDLST + IF( J.EQ.OLDLST .OR. WORK( INDGAP+J ).GE.RELTOL* + $ ABS( WORK( J ) ) ) THEN + NEWLST = J + ELSE +* +* continue (to the next loop) +* + RELGAP = WORK( INDGAP+J ) / ABS( WORK( J ) ) + IF( J.EQ.NEWFRS ) THEN + MINRGP = RELGAP + ELSE + MINRGP = MIN( MINRGP, RELGAP ) + END IF + GO TO 160 + END IF + NEWSIZ = NEWLST - NEWFRS + 1 + MAXITR = 10 + NEWFTT = OLDIEN + NEWFRS + IF( NEWSIZ.GT.1 ) THEN + MGSCLS = NEWSIZ.LE.MGSSIZ .AND. MINRGP.GE.MGSTOL + IF( .NOT.MGSCLS ) THEN + DO 70 K = 1, IN + WORK( INDIN1+K-1 ) = DBLE( Z( IBEGIN+K-1, + $ NEWFTT ) ) + WORK( INDIN2+K-1 ) = DBLE( Z( IBEGIN+K-1, + $ NEWFTT+1 ) ) + 70 CONTINUE + CALL DLARRF( IN, D( IBEGIN ), L( IBEGIN ), + $ WORK( INDLD+1 ), WORK( INDLLD+1 ), + $ NEWFRS, NEWLST, WORK, + $ WORK( INDIN1 ), WORK( INDIN2 ), + $ WORK( INDWRK ), IWORK( IINDWK ), + $ INFO ) + IF( INFO.EQ.0 ) THEN + NCLUS = NCLUS + 1 + K = NEWCLS + 2*NCLUS + IWORK( K-1 ) = NEWFRS + IWORK( K ) = NEWLST + ELSE + INFO = 0 + IF( MINRGP.GE.MGSTOL ) THEN + MGSCLS = .TRUE. + ELSE +* +* Call ZSTEIN to process this tight cluster. +* This happens only if MINRGP <= MGSTOL +* and DLARRF returns INFO = 1. The latter +* means that a new RRR to "break" the +* cluster could not be found. +* + WORK( INDWRK ) = D( IBEGIN ) + DO 80 K = 1, IN - 1 + WORK( INDWRK+K ) = D( IBEGIN+K ) + + $ WORK( INDLLD+K ) + 80 CONTINUE + DO 90 K = 1, NEWSIZ + IWORK( IINDWK+K-1 ) = 1 + 90 CONTINUE + DO 100 K = NEWFRS, NEWLST + ISUPPZ( 2*( IBEGIN+K )-3 ) = 1 + ISUPPZ( 2*( IBEGIN+K )-2 ) = IN + 100 CONTINUE + TEMP( 1 ) = IN + CALL ZSTEIN( IN, WORK( INDWRK ), + $ WORK( INDLD+1 ), NEWSIZ, + $ WORK( NEWFRS ), + $ IWORK( IINDWK ), TEMP( 1 ), + $ Z( IBEGIN, NEWFTT ), LDZ, + $ WORK( INDWRK+IN ), + $ IWORK( IINDWK+IN ), + $ IWORK( IINDWK+2*IN ), IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = 2 + RETURN + END IF + NDONE = NDONE + NEWSIZ + END IF + END IF + END IF + ELSE + MGSCLS = .FALSE. + END IF + IF( NEWSIZ.EQ.1 .OR. MGSCLS ) THEN + KTOT = NEWFTT + DO 120 K = NEWFRS, NEWLST + ITER = 0 + 110 CONTINUE + LAMBDA = WORK( K ) + CALL ZLAR1V( IN, 1, IN, LAMBDA, D( IBEGIN ), + $ L( IBEGIN ), WORK( INDLD+1 ), + $ WORK( INDLLD+1 ), + $ GERSCH( 2*OLDIEN+1 ), + $ Z( IBEGIN, KTOT ), ZTZ, MINGMA, + $ IWORK( IINDR+KTOT ), + $ ISUPPZ( 2*KTOT-1 ), + $ WORK( INDWRK ) ) + TMP1 = ONE / ZTZ + NRMINV = SQRT( TMP1 ) + RESID = ABS( MINGMA )*NRMINV + RQCORR = MINGMA*TMP1 + IF( K.EQ.IN ) THEN + GAP = WORK( INDGAP+K-1 ) + ELSE IF( K.EQ.1 ) THEN + GAP = WORK( INDGAP+K ) + ELSE + GAP = MIN( WORK( INDGAP+K-1 ), + $ WORK( INDGAP+K ) ) + END IF + ITER = ITER + 1 + IF( RESID.GT.TOL*GAP .AND. ABS( RQCORR ).GT. + $ FOUR*EPS*ABS( LAMBDA ) ) THEN + WORK( K ) = LAMBDA + RQCORR + IF( ITER.LT.MAXITR ) THEN + GO TO 110 + END IF + END IF + IWORK( KTOT ) = 1 + IF( NEWSIZ.EQ.1 ) + $ NDONE = NDONE + 1 + CALL ZDSCAL( IN, NRMINV, Z( IBEGIN, KTOT ), 1 ) + KTOT = KTOT + 1 + 120 CONTINUE + IF( NEWSIZ.GT.1 ) THEN + ITMP1 = ISUPPZ( 2*NEWFTT-1 ) + ITMP2 = ISUPPZ( 2*NEWFTT ) + KTOT = OLDIEN + NEWLST + DO 140 P = NEWFTT + 1, KTOT + DO 130 Q = NEWFTT, P - 1 + TMP1 = -ZDOTU( IN, Z( IBEGIN, P ), 1, + $ Z( IBEGIN, Q ), 1 ) + CALL ZAXPY( IN, DCMPLX( TMP1, ZERO ), + $ Z( IBEGIN, Q ), 1, + $ Z( IBEGIN, P ), 1 ) + 130 CONTINUE + TMP1 = ONE / DZNRM2( IN, Z( IBEGIN, P ), 1 ) + CALL ZDSCAL( IN, TMP1, Z( IBEGIN, P ), 1 ) + ITMP1 = MIN( ITMP1, ISUPPZ( 2*P-1 ) ) + ITMP2 = MAX( ITMP2, ISUPPZ( 2*P ) ) + 140 CONTINUE + DO 150 P = NEWFTT, KTOT + ISUPPZ( 2*P-1 ) = ITMP1 + ISUPPZ( 2*P ) = ITMP2 + 150 CONTINUE + NDONE = NDONE + NEWSIZ + END IF + END IF + NEWFRS = J + 1 + 160 CONTINUE + 170 CONTINUE + NDEPTH = NDEPTH + 1 + GO TO 40 + END IF + J = 2*IBEGIN + DO 180 I = IBEGIN, IEND + ISUPPZ( J-1 ) = ISUPPZ( J-1 ) + OLDIEN + ISUPPZ( J ) = ISUPPZ( J ) + OLDIEN + J = J + 2 + 180 CONTINUE + IBEGIN = IEND + 1 + 190 CONTINUE +* + RETURN +* +* End of ZLARRV +* + END diff --git a/costa/native/external/lapack/zlartg.f b/costa/native/external/lapack/zlartg.f new file mode 100644 index 000000000..95f9dc0b6 --- /dev/null +++ b/costa/native/external/lapack/zlartg.f @@ -0,0 +1,193 @@ + SUBROUTINE ZLARTG( F, G, CS, SN, R ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + DOUBLE PRECISION CS + COMPLEX*16 F, G, R, SN +* .. +* +* Purpose +* ======= +* +* ZLARTG generates a plane rotation so that +* +* [ CS SN ] [ F ] [ R ] +* [ __ ] . [ ] = [ ] where CS**2 + |SN|**2 = 1. +* [ -SN CS ] [ G ] [ 0 ] +* +* This is a faster version of the BLAS1 routine ZROTG, except for +* the following differences: +* F and G are unchanged on return. +* If G=0, then CS=1 and SN=0. +* If F=0, then CS=0 and SN is chosen so that R is real. +* +* Arguments +* ========= +* +* F (input) COMPLEX*16 +* The first component of vector to be rotated. +* +* G (input) COMPLEX*16 +* The second component of vector to be rotated. +* +* CS (output) DOUBLE PRECISION +* The cosine of the rotation. +* +* SN (output) COMPLEX*16 +* The sine of the rotation. +* +* R (output) COMPLEX*16 +* The nonzero component of the rotated vector. +* +* Further Details +* ======= ======= +* +* 3-5-96 - Modified with a new algorithm by W. Kahan and J. Demmel +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION TWO, ONE, ZERO + PARAMETER ( TWO = 2.0D+0, ONE = 1.0D+0, ZERO = 0.0D+0 ) + COMPLEX*16 CZERO + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL FIRST + INTEGER COUNT, I + DOUBLE PRECISION D, DI, DR, EPS, F2, F2S, G2, G2S, SAFMIN, + $ SAFMN2, SAFMX2, SCALE + COMPLEX*16 FF, FS, GS +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, DLAPY2 + EXTERNAL DLAMCH, DLAPY2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG, INT, LOG, + $ MAX, SQRT +* .. +* .. Statement Functions .. + DOUBLE PRECISION ABS1, ABSSQ +* .. +* .. Save statement .. + SAVE FIRST, SAFMX2, SAFMIN, SAFMN2 +* .. +* .. Data statements .. + DATA FIRST / .TRUE. / +* .. +* .. Statement Function definitions .. + ABS1( FF ) = MAX( ABS( DBLE( FF ) ), ABS( DIMAG( FF ) ) ) + ABSSQ( FF ) = DBLE( FF )**2 + DIMAG( FF )**2 +* .. +* .. Executable Statements .. +* + IF( FIRST ) THEN + FIRST = .FALSE. + SAFMIN = DLAMCH( 'S' ) + EPS = DLAMCH( 'E' ) + SAFMN2 = DLAMCH( 'B' )**INT( LOG( SAFMIN / EPS ) / + $ LOG( DLAMCH( 'B' ) ) / TWO ) + SAFMX2 = ONE / SAFMN2 + END IF + SCALE = MAX( ABS1( F ), ABS1( G ) ) + FS = F + GS = G + COUNT = 0 + IF( SCALE.GE.SAFMX2 ) THEN + 10 CONTINUE + COUNT = COUNT + 1 + FS = FS*SAFMN2 + GS = GS*SAFMN2 + SCALE = SCALE*SAFMN2 + IF( SCALE.GE.SAFMX2 ) + $ GO TO 10 + ELSE IF( SCALE.LE.SAFMN2 ) THEN + IF( G.EQ.CZERO ) THEN + CS = ONE + SN = CZERO + R = F + RETURN + END IF + 20 CONTINUE + COUNT = COUNT - 1 + FS = FS*SAFMX2 + GS = GS*SAFMX2 + SCALE = SCALE*SAFMX2 + IF( SCALE.LE.SAFMN2 ) + $ GO TO 20 + END IF + F2 = ABSSQ( FS ) + G2 = ABSSQ( GS ) + IF( F2.LE.MAX( G2, ONE )*SAFMIN ) THEN +* +* This is a rare case: F is very small. +* + IF( F.EQ.CZERO ) THEN + CS = ZERO + R = DLAPY2( DBLE( G ), DIMAG( G ) ) +* Do complex/real division explicitly with two real divisions + D = DLAPY2( DBLE( GS ), DIMAG( GS ) ) + SN = DCMPLX( DBLE( GS ) / D, -DIMAG( GS ) / D ) + RETURN + END IF + F2S = DLAPY2( DBLE( FS ), DIMAG( FS ) ) +* G2 and G2S are accurate +* G2 is at least SAFMIN, and G2S is at least SAFMN2 + G2S = SQRT( G2 ) +* Error in CS from underflow in F2S is at most +* UNFL / SAFMN2 .lt. sqrt(UNFL*EPS) .lt. EPS +* If MAX(G2,ONE)=G2, then F2 .lt. G2*SAFMIN, +* and so CS .lt. sqrt(SAFMIN) +* If MAX(G2,ONE)=ONE, then F2 .lt. SAFMIN +* and so CS .lt. sqrt(SAFMIN)/SAFMN2 = sqrt(EPS) +* Therefore, CS = F2S/G2S / sqrt( 1 + (F2S/G2S)**2 ) = F2S/G2S + CS = F2S / G2S +* Make sure abs(FF) = 1 +* Do complex/real division explicitly with 2 real divisions + IF( ABS1( F ).GT.ONE ) THEN + D = DLAPY2( DBLE( F ), DIMAG( F ) ) + FF = DCMPLX( DBLE( F ) / D, DIMAG( F ) / D ) + ELSE + DR = SAFMX2*DBLE( F ) + DI = SAFMX2*DIMAG( F ) + D = DLAPY2( DR, DI ) + FF = DCMPLX( DR / D, DI / D ) + END IF + SN = FF*DCMPLX( DBLE( GS ) / G2S, -DIMAG( GS ) / G2S ) + R = CS*F + SN*G + ELSE +* +* This is the most common case. +* Neither F2 nor F2/G2 are less than SAFMIN +* F2S cannot overflow, and it is accurate +* + F2S = SQRT( ONE+G2 / F2 ) +* Do the F2S(real)*FS(complex) multiply with two real multiplies + R = DCMPLX( F2S*DBLE( FS ), F2S*DIMAG( FS ) ) + CS = ONE / F2S + D = F2 + G2 +* Do complex/real division explicitly with two real divisions + SN = DCMPLX( DBLE( R ) / D, DIMAG( R ) / D ) + SN = SN*DCONJG( GS ) + IF( COUNT.NE.0 ) THEN + IF( COUNT.GT.0 ) THEN + DO 30 I = 1, COUNT + R = R*SAFMX2 + 30 CONTINUE + ELSE + DO 40 I = 1, -COUNT + R = R*SAFMN2 + 40 CONTINUE + END IF + END IF + END IF + RETURN +* +* End of ZLARTG +* + END diff --git a/costa/native/external/lapack/zlartv.f b/costa/native/external/lapack/zlartv.f new file mode 100644 index 000000000..736cc7a4b --- /dev/null +++ b/costa/native/external/lapack/zlartv.f @@ -0,0 +1,79 @@ + SUBROUTINE ZLARTV( N, X, INCX, Y, INCY, C, S, INCC ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + INTEGER INCC, INCX, INCY, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( * ) + COMPLEX*16 S( * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* ZLARTV applies a vector of complex plane rotations with real cosines +* to elements of the complex vectors x and y. For i = 1,2,...,n +* +* ( x(i) ) := ( c(i) s(i) ) ( x(i) ) +* ( y(i) ) ( -conjg(s(i)) c(i) ) ( y(i) ) +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of plane rotations to be applied. +* +* X (input/output) COMPLEX*16 array, dimension (1+(N-1)*INCX) +* The vector x. +* +* INCX (input) INTEGER +* The increment between elements of X. INCX > 0. +* +* Y (input/output) COMPLEX*16 array, dimension (1+(N-1)*INCY) +* The vector y. +* +* INCY (input) INTEGER +* The increment between elements of Y. INCY > 0. +* +* C (input) DOUBLE PRECISION array, dimension (1+(N-1)*INCC) +* The cosines of the plane rotations. +* +* S (input) COMPLEX*16 array, dimension (1+(N-1)*INCC) +* The sines of the plane rotations. +* +* INCC (input) INTEGER +* The increment between elements of C and S. INCC > 0. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IC, IX, IY + COMPLEX*16 XI, YI +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG +* .. +* .. Executable Statements .. +* + IX = 1 + IY = 1 + IC = 1 + DO 10 I = 1, N + XI = X( IX ) + YI = Y( IY ) + X( IX ) = C( IC )*XI + S( IC )*YI + Y( IY ) = C( IC )*YI - DCONJG( S( IC ) )*XI + IX = IX + INCX + IY = IY + INCY + IC = IC + INCC + 10 CONTINUE + RETURN +* +* End of ZLARTV +* + END diff --git a/costa/native/external/lapack/zlarz.f b/costa/native/external/lapack/zlarz.f new file mode 100644 index 000000000..661358e8a --- /dev/null +++ b/costa/native/external/lapack/zlarz.f @@ -0,0 +1,158 @@ + SUBROUTINE ZLARZ( SIDE, M, N, L, V, INCV, TAU, C, LDC, WORK ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, L, LDC, M, N + COMPLEX*16 TAU +* .. +* .. Array Arguments .. + COMPLEX*16 C( LDC, * ), V( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZLARZ applies a complex elementary reflector H to a complex +* M-by-N matrix C, from either the left or the right. H is represented +* in the form +* +* H = I - tau * v * v' +* +* where tau is a complex scalar and v is a complex vector. +* +* If tau = 0, then H is taken to be the unit matrix. +* +* To apply H' (the conjugate transpose of H), supply conjg(tau) instead +* tau. +* +* H is a product of k elementary reflectors as returned by ZTZRZF. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': form H * C +* = 'R': form C * H +* +* M (input) INTEGER +* The number of rows of the matrix C. +* +* N (input) INTEGER +* The number of columns of the matrix C. +* +* L (input) INTEGER +* The number of entries of the vector V containing +* the meaningful part of the Householder vectors. +* If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. +* +* V (input) COMPLEX*16 array, dimension (1+(L-1)*abs(INCV)) +* The vector v in the representation of H as returned by +* ZTZRZF. V is not used if TAU = 0. +* +* INCV (input) INTEGER +* The increment between elements of v. INCV <> 0. +* +* TAU (input) COMPLEX*16 +* The value tau in the representation of H. +* +* C (input/output) COMPLEX*16 array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by the matrix H * C if SIDE = 'L', +* or C * H if SIDE = 'R'. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) COMPLEX*16 array, dimension +* (N) if SIDE = 'L' +* or (M) if SIDE = 'R' +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. External Subroutines .. + EXTERNAL ZAXPY, ZCOPY, ZGEMV, ZGERC, ZGERU, ZLACGV +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C +* + IF( TAU.NE.ZERO ) THEN +* +* w( 1:n ) = conjg( C( 1, 1:n ) ) +* + CALL ZCOPY( N, C, LDC, WORK, 1 ) + CALL ZLACGV( N, WORK, 1 ) +* +* w( 1:n ) = conjg( w( 1:n ) + C( m-l+1:m, 1:n )' * v( 1:l ) ) +* + CALL ZGEMV( 'Conjugate transpose', L, N, ONE, C( M-L+1, 1 ), + $ LDC, V, INCV, ONE, WORK, 1 ) + CALL ZLACGV( N, WORK, 1 ) +* +* C( 1, 1:n ) = C( 1, 1:n ) - tau * w( 1:n ) +* + CALL ZAXPY( N, -TAU, WORK, 1, C, LDC ) +* +* C( m-l+1:m, 1:n ) = C( m-l+1:m, 1:n ) - ... +* tau * v( 1:l ) * conjg( w( 1:n )' ) +* + CALL ZGERU( L, N, -TAU, V, INCV, WORK, 1, C( M-L+1, 1 ), + $ LDC ) + END IF +* + ELSE +* +* Form C * H +* + IF( TAU.NE.ZERO ) THEN +* +* w( 1:m ) = C( 1:m, 1 ) +* + CALL ZCOPY( M, C, 1, WORK, 1 ) +* +* w( 1:m ) = w( 1:m ) + C( 1:m, n-l+1:n, 1:n ) * v( 1:l ) +* + CALL ZGEMV( 'No transpose', M, L, ONE, C( 1, N-L+1 ), LDC, + $ V, INCV, ONE, WORK, 1 ) +* +* C( 1:m, 1 ) = C( 1:m, 1 ) - tau * w( 1:m ) +* + CALL ZAXPY( M, -TAU, WORK, 1, C, 1 ) +* +* C( 1:m, n-l+1:n ) = C( 1:m, n-l+1:n ) - ... +* tau * w( 1:m ) * v( 1:l )' +* + CALL ZGERC( M, L, -TAU, WORK, 1, V, INCV, C( 1, N-L+1 ), + $ LDC ) +* + END IF +* + END IF +* + RETURN +* +* End of ZLARZ +* + END diff --git a/costa/native/external/lapack/zlarzb.f b/costa/native/external/lapack/zlarzb.f new file mode 100644 index 000000000..eba455112 --- /dev/null +++ b/costa/native/external/lapack/zlarzb.f @@ -0,0 +1,235 @@ + SUBROUTINE ZLARZB( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, V, + $ LDV, T, LDT, C, LDC, WORK, LDWORK ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* December 1, 1999 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, SIDE, STOREV, TRANS + INTEGER K, L, LDC, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 C( LDC, * ), T( LDT, * ), V( LDV, * ), + $ WORK( LDWORK, * ) +* .. +* +* Purpose +* ======= +* +* ZLARZB applies a complex block reflector H or its transpose H**H +* to a complex distributed M-by-N C from the left or the right. +* +* Currently, only STOREV = 'R' and DIRECT = 'B' are supported. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply H or H' from the Left +* = 'R': apply H or H' from the Right +* +* TRANS (input) CHARACTER*1 +* = 'N': apply H (No transpose) +* = 'C': apply H' (Conjugate transpose) +* +* DIRECT (input) CHARACTER*1 +* Indicates how H is formed from a product of elementary +* reflectors +* = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet) +* = 'B': H = H(k) . . . H(2) H(1) (Backward) +* +* STOREV (input) CHARACTER*1 +* Indicates how the vectors which define the elementary +* reflectors are stored: +* = 'C': Columnwise (not supported yet) +* = 'R': Rowwise +* +* M (input) INTEGER +* The number of rows of the matrix C. +* +* N (input) INTEGER +* The number of columns of the matrix C. +* +* K (input) INTEGER +* The order of the matrix T (= the number of elementary +* reflectors whose product defines the block reflector). +* +* L (input) INTEGER +* The number of columns of the matrix V containing the +* meaningful part of the Householder reflectors. +* If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. +* +* V (input) COMPLEX*16 array, dimension (LDV,NV). +* If STOREV = 'C', NV = K; if STOREV = 'R', NV = L. +* +* LDV (input) INTEGER +* The leading dimension of the array V. +* If STOREV = 'C', LDV >= L; if STOREV = 'R', LDV >= K. +* +* T (input) COMPLEX*16 array, dimension (LDT,K) +* The triangular K-by-K matrix T in the representation of the +* block reflector. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= K. +* +* C (input/output) COMPLEX*16 array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by H*C or H'*C or C*H or C*H'. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) COMPLEX*16 array, dimension (LDWORK,K) +* +* LDWORK (input) INTEGER +* The leading dimension of the array WORK. +* If SIDE = 'L', LDWORK >= max(1,N); +* if SIDE = 'R', LDWORK >= max(1,M). +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + CHARACTER TRANST + INTEGER I, INFO, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZCOPY, ZGEMM, ZLACGV, ZTRMM +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) + $ RETURN +* +* Check for currently supported options +* + INFO = 0 + IF( .NOT.LSAME( DIRECT, 'B' ) ) THEN + INFO = -3 + ELSE IF( .NOT.LSAME( STOREV, 'R' ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLARZB', -INFO ) + RETURN + END IF +* + IF( LSAME( TRANS, 'N' ) ) THEN + TRANST = 'C' + ELSE + TRANST = 'N' + END IF +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H' * C +* +* W( 1:n, 1:k ) = conjg( C( 1:k, 1:n )' ) +* + DO 10 J = 1, K + CALL ZCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + 10 CONTINUE +* +* W( 1:n, 1:k ) = W( 1:n, 1:k ) + ... +* conjg( C( m-l+1:m, 1:n )' ) * V( 1:k, 1:l )' +* + IF( L.GT.0 ) + $ CALL ZGEMM( 'Transpose', 'Conjugate transpose', N, K, L, + $ ONE, C( M-L+1, 1 ), LDC, V, LDV, ONE, WORK, + $ LDWORK ) +* +* W( 1:n, 1:k ) = W( 1:n, 1:k ) * T' or W( 1:m, 1:k ) * T +* + CALL ZTRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K, ONE, T, + $ LDT, WORK, LDWORK ) +* +* C( 1:k, 1:n ) = C( 1:k, 1:n ) - conjg( W( 1:n, 1:k )' ) +* + DO 30 J = 1, N + DO 20 I = 1, K + C( I, J ) = C( I, J ) - WORK( J, I ) + 20 CONTINUE + 30 CONTINUE +* +* C( m-l+1:m, 1:n ) = C( m-l+1:m, 1:n ) - ... +* conjg( V( 1:k, 1:l )' ) * conjg( W( 1:n, 1:k )' ) +* + IF( L.GT.0 ) + $ CALL ZGEMM( 'Transpose', 'Transpose', L, N, K, -ONE, V, LDV, + $ WORK, LDWORK, ONE, C( M-L+1, 1 ), LDC ) +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H' +* +* W( 1:m, 1:k ) = C( 1:m, 1:k ) +* + DO 40 J = 1, K + CALL ZCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 ) + 40 CONTINUE +* +* W( 1:m, 1:k ) = W( 1:m, 1:k ) + ... +* C( 1:m, n-l+1:n ) * conjg( V( 1:k, 1:l )' ) +* + IF( L.GT.0 ) + $ CALL ZGEMM( 'No transpose', 'Transpose', M, K, L, ONE, + $ C( 1, N-L+1 ), LDC, V, LDV, ONE, WORK, LDWORK ) +* +* W( 1:m, 1:k ) = W( 1:m, 1:k ) * conjg( T ) or +* W( 1:m, 1:k ) * conjg( T' ) +* + DO 50 J = 1, K + CALL ZLACGV( K-J+1, T( J, J ), 1 ) + 50 CONTINUE + CALL ZTRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K, ONE, T, + $ LDT, WORK, LDWORK ) + DO 60 J = 1, K + CALL ZLACGV( K-J+1, T( J, J ), 1 ) + 60 CONTINUE +* +* C( 1:m, 1:k ) = C( 1:m, 1:k ) - W( 1:m, 1:k ) +* + DO 80 J = 1, K + DO 70 I = 1, M + C( I, J ) = C( I, J ) - WORK( I, J ) + 70 CONTINUE + 80 CONTINUE +* +* C( 1:m, n-l+1:n ) = C( 1:m, n-l+1:n ) - ... +* W( 1:m, 1:k ) * conjg( V( 1:k, 1:l ) ) +* + DO 90 J = 1, L + CALL ZLACGV( K, V( 1, J ), 1 ) + 90 CONTINUE + IF( L.GT.0 ) + $ CALL ZGEMM( 'No transpose', 'No transpose', M, L, K, -ONE, + $ WORK, LDWORK, V, LDV, ONE, C( 1, N-L+1 ), LDC ) + DO 100 J = 1, L + CALL ZLACGV( K, V( 1, J ), 1 ) + 100 CONTINUE +* + END IF +* + RETURN +* +* End of ZLARZB +* + END diff --git a/costa/native/external/lapack/zlarzt.f b/costa/native/external/lapack/zlarzt.f new file mode 100644 index 000000000..e6bfed887 --- /dev/null +++ b/costa/native/external/lapack/zlarzt.f @@ -0,0 +1,187 @@ + SUBROUTINE ZLARZT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. + COMPLEX*16 T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* Purpose +* ======= +* +* ZLARZT forms the triangular factor T of a complex block reflector +* H of order > n, which is defined as a product of k elementary +* reflectors. +* +* If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; +* +* If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. +* +* If STOREV = 'C', the vector which defines the elementary reflector +* H(i) is stored in the i-th column of the array V, and +* +* H = I - V * T * V' +* +* If STOREV = 'R', the vector which defines the elementary reflector +* H(i) is stored in the i-th row of the array V, and +* +* H = I - V' * T * V +* +* Currently, only STOREV = 'R' and DIRECT = 'B' are supported. +* +* Arguments +* ========= +* +* DIRECT (input) CHARACTER*1 +* Specifies the order in which the elementary reflectors are +* multiplied to form the block reflector: +* = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet) +* = 'B': H = H(k) . . . H(2) H(1) (Backward) +* +* STOREV (input) CHARACTER*1 +* Specifies how the vectors which define the elementary +* reflectors are stored (see also Further Details): +* = 'C': columnwise (not supported yet) +* = 'R': rowwise +* +* N (input) INTEGER +* The order of the block reflector H. N >= 0. +* +* K (input) INTEGER +* The order of the triangular factor T (= the number of +* elementary reflectors). K >= 1. +* +* V (input/output) COMPLEX*16 array, dimension +* (LDV,K) if STOREV = 'C' +* (LDV,N) if STOREV = 'R' +* The matrix V. See further details. +* +* LDV (input) INTEGER +* The leading dimension of the array V. +* If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. +* +* TAU (input) COMPLEX*16 array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i). +* +* T (output) COMPLEX*16 array, dimension (LDT,K) +* The k by k triangular factor T of the block reflector. +* If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is +* lower triangular. The rest of the array is not used. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= K. +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* The shape of the matrix V and the storage of the vectors which define +* the H(i) is best illustrated by the following example with n = 5 and +* k = 3. The elements equal to 1 are not stored; the corresponding +* array elements are modified but restored on exit. The rest of the +* array is not used. +* +* DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +* +* ______V_____ +* ( v1 v2 v3 ) / \ +* ( v1 v2 v3 ) ( v1 v1 v1 v1 v1 . . . . 1 ) +* V = ( v1 v2 v3 ) ( v2 v2 v2 v2 v2 . . . 1 ) +* ( v1 v2 v3 ) ( v3 v3 v3 v3 v3 . . 1 ) +* ( v1 v2 v3 ) +* . . . +* . . . +* 1 . . +* 1 . +* 1 +* +* DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +* +* ______V_____ +* 1 / \ +* . 1 ( 1 . . . . v1 v1 v1 v1 v1 ) +* . . 1 ( . 1 . . . v2 v2 v2 v2 v2 ) +* . . . ( . . 1 . . v3 v3 v3 v3 v3 ) +* . . . +* ( v1 v2 v3 ) +* ( v1 v2 v3 ) +* V = ( v1 v2 v3 ) +* ( v1 v2 v3 ) +* ( v1 v2 v3 ) +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, J +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEMV, ZLACGV, ZTRMV +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Check for currently supported options +* + INFO = 0 + IF( .NOT.LSAME( DIRECT, 'B' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( STOREV, 'R' ) ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLARZT', -INFO ) + RETURN + END IF +* + DO 20 I = K, 1, -1 + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO 10 J = I, K + T( J, I ) = ZERO + 10 CONTINUE + ELSE +* +* general case +* + IF( I.LT.K ) THEN +* +* T(i+1:k,i) = - tau(i) * V(i+1:k,1:n) * V(i,1:n)' +* + CALL ZLACGV( N, V( I, 1 ), LDV ) + CALL ZGEMV( 'No transpose', K-I, N, -TAU( I ), + $ V( I+1, 1 ), LDV, V( I, 1 ), LDV, ZERO, + $ T( I+1, I ), 1 ) + CALL ZLACGV( N, V( I, 1 ), LDV ) +* +* T(i+1:k,i) = T(i+1:k,i+1:k) * T(i+1:k,i) +* + CALL ZTRMV( 'Lower', 'No transpose', 'Non-unit', K-I, + $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) + END IF + T( I, I ) = TAU( I ) + END IF + 20 CONTINUE + RETURN +* +* End of ZLARZT +* + END diff --git a/costa/native/external/lapack/zlascl.f b/costa/native/external/lapack/zlascl.f new file mode 100644 index 000000000..c67328602 --- /dev/null +++ b/costa/native/external/lapack/zlascl.f @@ -0,0 +1,268 @@ + SUBROUTINE ZLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER TYPE + INTEGER INFO, KL, KU, LDA, M, N + DOUBLE PRECISION CFROM, CTO +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* ZLASCL multiplies the M by N complex matrix A by the real scalar +* CTO/CFROM. This is done without over/underflow as long as the final +* result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that +* A may be full, upper triangular, lower triangular, upper Hessenberg, +* or banded. +* +* Arguments +* ========= +* +* TYPE (input) CHARACTER*1 +* TYPE indices the storage type of the input matrix. +* = 'G': A is a full matrix. +* = 'L': A is a lower triangular matrix. +* = 'U': A is an upper triangular matrix. +* = 'H': A is an upper Hessenberg matrix. +* = 'B': A is a symmetric band matrix with lower bandwidth KL +* and upper bandwidth KU and with the only the lower +* half stored. +* = 'Q': A is a symmetric band matrix with lower bandwidth KL +* and upper bandwidth KU and with the only the upper +* half stored. +* = 'Z': A is a band matrix with lower bandwidth KL and upper +* bandwidth KU. +* +* KL (input) INTEGER +* The lower bandwidth of A. Referenced only if TYPE = 'B', +* 'Q' or 'Z'. +* +* KU (input) INTEGER +* The upper bandwidth of A. Referenced only if TYPE = 'B', +* 'Q' or 'Z'. +* +* CFROM (input) DOUBLE PRECISION +* CTO (input) DOUBLE PRECISION +* The matrix A is multiplied by CTO/CFROM. A(I,J) is computed +* without over/underflow if the final result CTO*A(I,J)/CFROM +* can be represented without over/underflow. CFROM must be +* nonzero. +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,M) +* The matrix to be multiplied by CTO/CFROM. See TYPE for the +* storage type. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* INFO (output) INTEGER +* 0 - successful exit +* <0 - if INFO = -i, the i-th argument had an illegal value. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL DONE + INTEGER I, ITYPE, J, K1, K2, K3, K4 + DOUBLE PRECISION BIGNUM, CFROM1, CFROMC, CTO1, CTOC, MUL, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 +* + IF( LSAME( TYPE, 'G' ) ) THEN + ITYPE = 0 + ELSE IF( LSAME( TYPE, 'L' ) ) THEN + ITYPE = 1 + ELSE IF( LSAME( TYPE, 'U' ) ) THEN + ITYPE = 2 + ELSE IF( LSAME( TYPE, 'H' ) ) THEN + ITYPE = 3 + ELSE IF( LSAME( TYPE, 'B' ) ) THEN + ITYPE = 4 + ELSE IF( LSAME( TYPE, 'Q' ) ) THEN + ITYPE = 5 + ELSE IF( LSAME( TYPE, 'Z' ) ) THEN + ITYPE = 6 + ELSE + ITYPE = -1 + END IF +* + IF( ITYPE.EQ.-1 ) THEN + INFO = -1 + ELSE IF( CFROM.EQ.ZERO ) THEN + INFO = -4 + ELSE IF( M.LT.0 ) THEN + INFO = -6 + ELSE IF( N.LT.0 .OR. ( ITYPE.EQ.4 .AND. N.NE.M ) .OR. + $ ( ITYPE.EQ.5 .AND. N.NE.M ) ) THEN + INFO = -7 + ELSE IF( ITYPE.LE.3 .AND. LDA.LT.MAX( 1, M ) ) THEN + INFO = -9 + ELSE IF( ITYPE.GE.4 ) THEN + IF( KL.LT.0 .OR. KL.GT.MAX( M-1, 0 ) ) THEN + INFO = -2 + ELSE IF( KU.LT.0 .OR. KU.GT.MAX( N-1, 0 ) .OR. + $ ( ( ITYPE.EQ.4 .OR. ITYPE.EQ.5 ) .AND. KL.NE.KU ) ) + $ THEN + INFO = -3 + ELSE IF( ( ITYPE.EQ.4 .AND. LDA.LT.KL+1 ) .OR. + $ ( ITYPE.EQ.5 .AND. LDA.LT.KU+1 ) .OR. + $ ( ITYPE.EQ.6 .AND. LDA.LT.2*KL+KU+1 ) ) THEN + INFO = -9 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLASCL', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. M.EQ.0 ) + $ RETURN +* +* Get machine parameters +* + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM +* + CFROMC = CFROM + CTOC = CTO +* + 10 CONTINUE + CFROM1 = CFROMC*SMLNUM + CTO1 = CTOC / BIGNUM + IF( ABS( CFROM1 ).GT.ABS( CTOC ) .AND. CTOC.NE.ZERO ) THEN + MUL = SMLNUM + DONE = .FALSE. + CFROMC = CFROM1 + ELSE IF( ABS( CTO1 ).GT.ABS( CFROMC ) ) THEN + MUL = BIGNUM + DONE = .FALSE. + CTOC = CTO1 + ELSE + MUL = CTOC / CFROMC + DONE = .TRUE. + END IF +* + IF( ITYPE.EQ.0 ) THEN +* +* Full matrix +* + DO 30 J = 1, N + DO 20 I = 1, M + A( I, J ) = A( I, J )*MUL + 20 CONTINUE + 30 CONTINUE +* + ELSE IF( ITYPE.EQ.1 ) THEN +* +* Lower triangular matrix +* + DO 50 J = 1, N + DO 40 I = J, M + A( I, J ) = A( I, J )*MUL + 40 CONTINUE + 50 CONTINUE +* + ELSE IF( ITYPE.EQ.2 ) THEN +* +* Upper triangular matrix +* + DO 70 J = 1, N + DO 60 I = 1, MIN( J, M ) + A( I, J ) = A( I, J )*MUL + 60 CONTINUE + 70 CONTINUE +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* Upper Hessenberg matrix +* + DO 90 J = 1, N + DO 80 I = 1, MIN( J+1, M ) + A( I, J ) = A( I, J )*MUL + 80 CONTINUE + 90 CONTINUE +* + ELSE IF( ITYPE.EQ.4 ) THEN +* +* Lower half of a symmetric band matrix +* + K3 = KL + 1 + K4 = N + 1 + DO 110 J = 1, N + DO 100 I = 1, MIN( K3, K4-J ) + A( I, J ) = A( I, J )*MUL + 100 CONTINUE + 110 CONTINUE +* + ELSE IF( ITYPE.EQ.5 ) THEN +* +* Upper half of a symmetric band matrix +* + K1 = KU + 2 + K3 = KU + 1 + DO 130 J = 1, N + DO 120 I = MAX( K1-J, 1 ), K3 + A( I, J ) = A( I, J )*MUL + 120 CONTINUE + 130 CONTINUE +* + ELSE IF( ITYPE.EQ.6 ) THEN +* +* Band matrix +* + K1 = KL + KU + 2 + K2 = KL + 1 + K3 = 2*KL + KU + 1 + K4 = KL + KU + 1 + M + DO 150 J = 1, N + DO 140 I = MAX( K1-J, K2 ), MIN( K3, K4-J ) + A( I, J ) = A( I, J )*MUL + 140 CONTINUE + 150 CONTINUE +* + END IF +* + IF( .NOT.DONE ) + $ GO TO 10 +* + RETURN +* +* End of ZLASCL +* + END diff --git a/costa/native/external/lapack/zlaset.f b/costa/native/external/lapack/zlaset.f new file mode 100644 index 000000000..1039ac802 --- /dev/null +++ b/costa/native/external/lapack/zlaset.f @@ -0,0 +1,115 @@ + SUBROUTINE ZLASET( UPLO, M, N, ALPHA, BETA, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER LDA, M, N + COMPLEX*16 ALPHA, BETA +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* ZLASET initializes a 2-D array A to BETA on the diagonal and +* ALPHA on the offdiagonals. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies the part of the matrix A to be set. +* = 'U': Upper triangular part is set. The lower triangle +* is unchanged. +* = 'L': Lower triangular part is set. The upper triangle +* is unchanged. +* Otherwise: All of the matrix A is set. +* +* M (input) INTEGER +* On entry, M specifies the number of rows of A. +* +* N (input) INTEGER +* On entry, N specifies the number of columns of A. +* +* ALPHA (input) COMPLEX*16 +* All the offdiagonal array elements are set to ALPHA. +* +* BETA (input) COMPLEX*16 +* All the diagonal array elements are set to BETA. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the m by n matrix A. +* On exit, A(i,j) = ALPHA, 1 <= i <= m, 1 <= j <= n, i.ne.j; +* A(i,i) = BETA , 1 <= i <= min(m,n) +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Set the diagonal to BETA and the strictly upper triangular +* part of the array to ALPHA. +* + DO 20 J = 2, N + DO 10 I = 1, MIN( J-1, M ) + A( I, J ) = ALPHA + 10 CONTINUE + 20 CONTINUE + DO 30 I = 1, MIN( N, M ) + A( I, I ) = BETA + 30 CONTINUE +* + ELSE IF( LSAME( UPLO, 'L' ) ) THEN +* +* Set the diagonal to BETA and the strictly lower triangular +* part of the array to ALPHA. +* + DO 50 J = 1, MIN( M, N ) + DO 40 I = J + 1, M + A( I, J ) = ALPHA + 40 CONTINUE + 50 CONTINUE + DO 60 I = 1, MIN( N, M ) + A( I, I ) = BETA + 60 CONTINUE +* + ELSE +* +* Set the array to BETA on the diagonal and ALPHA on the +* offdiagonal. +* + DO 80 J = 1, N + DO 70 I = 1, M + A( I, J ) = ALPHA + 70 CONTINUE + 80 CONTINUE + DO 90 I = 1, MIN( M, N ) + A( I, I ) = BETA + 90 CONTINUE + END IF +* + RETURN +* +* End of ZLASET +* + END diff --git a/costa/native/external/lapack/zlasr.f b/costa/native/external/lapack/zlasr.f new file mode 100644 index 000000000..1fb08d1d6 --- /dev/null +++ b/costa/native/external/lapack/zlasr.f @@ -0,0 +1,325 @@ + SUBROUTINE ZLASR( SIDE, PIVOT, DIRECT, M, N, C, S, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, PIVOT, SIDE + INTEGER LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( * ), S( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* ZLASR performs the transformation +* +* A := P*A, when SIDE = 'L' or 'l' ( Left-hand side ) +* +* A := A*P', when SIDE = 'R' or 'r' ( Right-hand side ) +* +* where A is an m by n complex matrix and P is an orthogonal matrix, +* consisting of a sequence of plane rotations determined by the +* parameters PIVOT and DIRECT as follows ( z = m when SIDE = 'L' or 'l' +* and z = n when SIDE = 'R' or 'r' ): +* +* When DIRECT = 'F' or 'f' ( Forward sequence ) then +* +* P = P( z - 1 )*...*P( 2 )*P( 1 ), +* +* and when DIRECT = 'B' or 'b' ( Backward sequence ) then +* +* P = P( 1 )*P( 2 )*...*P( z - 1 ), +* +* where P( k ) is a plane rotation matrix for the following planes: +* +* when PIVOT = 'V' or 'v' ( Variable pivot ), +* the plane ( k, k + 1 ) +* +* when PIVOT = 'T' or 't' ( Top pivot ), +* the plane ( 1, k + 1 ) +* +* when PIVOT = 'B' or 'b' ( Bottom pivot ), +* the plane ( k, z ) +* +* c( k ) and s( k ) must contain the cosine and sine that define the +* matrix P( k ). The two by two plane rotation part of the matrix +* P( k ), R( k ), is assumed to be of the form +* +* R( k ) = ( c( k ) s( k ) ). +* ( -s( k ) c( k ) ) +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* Specifies whether the plane rotation matrix P is applied to +* A on the left or the right. +* = 'L': Left, compute A := P*A +* = 'R': Right, compute A:= A*P' +* +* DIRECT (input) CHARACTER*1 +* Specifies whether P is a forward or backward sequence of +* plane rotations. +* = 'F': Forward, P = P( z - 1 )*...*P( 2 )*P( 1 ) +* = 'B': Backward, P = P( 1 )*P( 2 )*...*P( z - 1 ) +* +* PIVOT (input) CHARACTER*1 +* Specifies the plane for which P(k) is a plane rotation +* matrix. +* = 'V': Variable pivot, the plane (k,k+1) +* = 'T': Top pivot, the plane (1,k+1) +* = 'B': Bottom pivot, the plane (k,z) +* +* M (input) INTEGER +* The number of rows of the matrix A. If m <= 1, an immediate +* return is effected. +* +* N (input) INTEGER +* The number of columns of the matrix A. If n <= 1, an +* immediate return is effected. +* +* C, S (input) DOUBLE PRECISION arrays, dimension +* (M-1) if SIDE = 'L' +* (N-1) if SIDE = 'R' +* c(k) and s(k) contain the cosine and sine that define the +* matrix P(k). The two by two plane rotation part of the +* matrix P(k), R(k), is assumed to be of the form +* R( k ) = ( c( k ) s( k ) ). +* ( -s( k ) c( k ) ) +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* The m by n matrix A. On exit, A is overwritten by P*A if +* SIDE = 'R' or by A*P' if SIDE = 'L'. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, J + DOUBLE PRECISION CTEMP, STEMP + COMPLEX*16 TEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( .NOT.( LSAME( SIDE, 'L' ) .OR. LSAME( SIDE, 'R' ) ) ) THEN + INFO = 1 + ELSE IF( .NOT.( LSAME( PIVOT, 'V' ) .OR. LSAME( PIVOT, + $ 'T' ) .OR. LSAME( PIVOT, 'B' ) ) ) THEN + INFO = 2 + ELSE IF( .NOT.( LSAME( DIRECT, 'F' ) .OR. LSAME( DIRECT, 'B' ) ) ) + $ THEN + INFO = 3 + ELSE IF( M.LT.0 ) THEN + INFO = 4 + ELSE IF( N.LT.0 ) THEN + INFO = 5 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = 9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLASR ', INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( ( M.EQ.0 ) .OR. ( N.EQ.0 ) ) + $ RETURN + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form P * A +* + IF( LSAME( PIVOT, 'V' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 20 J = 1, M - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 10 I = 1, N + TEMP = A( J+1, I ) + A( J+1, I ) = CTEMP*TEMP - STEMP*A( J, I ) + A( J, I ) = STEMP*TEMP + CTEMP*A( J, I ) + 10 CONTINUE + END IF + 20 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 40 J = M - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 30 I = 1, N + TEMP = A( J+1, I ) + A( J+1, I ) = CTEMP*TEMP - STEMP*A( J, I ) + A( J, I ) = STEMP*TEMP + CTEMP*A( J, I ) + 30 CONTINUE + END IF + 40 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'T' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 60 J = 2, M + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 50 I = 1, N + TEMP = A( J, I ) + A( J, I ) = CTEMP*TEMP - STEMP*A( 1, I ) + A( 1, I ) = STEMP*TEMP + CTEMP*A( 1, I ) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 80 J = M, 2, -1 + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 70 I = 1, N + TEMP = A( J, I ) + A( J, I ) = CTEMP*TEMP - STEMP*A( 1, I ) + A( 1, I ) = STEMP*TEMP + CTEMP*A( 1, I ) + 70 CONTINUE + END IF + 80 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'B' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 100 J = 1, M - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 90 I = 1, N + TEMP = A( J, I ) + A( J, I ) = STEMP*A( M, I ) + CTEMP*TEMP + A( M, I ) = CTEMP*A( M, I ) - STEMP*TEMP + 90 CONTINUE + END IF + 100 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 120 J = M - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 110 I = 1, N + TEMP = A( J, I ) + A( J, I ) = STEMP*A( M, I ) + CTEMP*TEMP + A( M, I ) = CTEMP*A( M, I ) - STEMP*TEMP + 110 CONTINUE + END IF + 120 CONTINUE + END IF + END IF + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form A * P' +* + IF( LSAME( PIVOT, 'V' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 140 J = 1, N - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 130 I = 1, M + TEMP = A( I, J+1 ) + A( I, J+1 ) = CTEMP*TEMP - STEMP*A( I, J ) + A( I, J ) = STEMP*TEMP + CTEMP*A( I, J ) + 130 CONTINUE + END IF + 140 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 160 J = N - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 150 I = 1, M + TEMP = A( I, J+1 ) + A( I, J+1 ) = CTEMP*TEMP - STEMP*A( I, J ) + A( I, J ) = STEMP*TEMP + CTEMP*A( I, J ) + 150 CONTINUE + END IF + 160 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'T' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 180 J = 2, N + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 170 I = 1, M + TEMP = A( I, J ) + A( I, J ) = CTEMP*TEMP - STEMP*A( I, 1 ) + A( I, 1 ) = STEMP*TEMP + CTEMP*A( I, 1 ) + 170 CONTINUE + END IF + 180 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 200 J = N, 2, -1 + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 190 I = 1, M + TEMP = A( I, J ) + A( I, J ) = CTEMP*TEMP - STEMP*A( I, 1 ) + A( I, 1 ) = STEMP*TEMP + CTEMP*A( I, 1 ) + 190 CONTINUE + END IF + 200 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'B' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 220 J = 1, N - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 210 I = 1, M + TEMP = A( I, J ) + A( I, J ) = STEMP*A( I, N ) + CTEMP*TEMP + A( I, N ) = CTEMP*A( I, N ) - STEMP*TEMP + 210 CONTINUE + END IF + 220 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 240 J = N - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 230 I = 1, M + TEMP = A( I, J ) + A( I, J ) = STEMP*A( I, N ) + CTEMP*TEMP + A( I, N ) = CTEMP*A( I, N ) - STEMP*TEMP + 230 CONTINUE + END IF + 240 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of ZLASR +* + END diff --git a/costa/native/external/lapack/zlassq.f b/costa/native/external/lapack/zlassq.f new file mode 100644 index 000000000..541c207ca --- /dev/null +++ b/costa/native/external/lapack/zlassq.f @@ -0,0 +1,102 @@ + SUBROUTINE ZLASSQ( N, X, INCX, SCALE, SUMSQ ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INCX, N + DOUBLE PRECISION SCALE, SUMSQ +* .. +* .. Array Arguments .. + COMPLEX*16 X( * ) +* .. +* +* Purpose +* ======= +* +* ZLASSQ returns the values scl and ssq such that +* +* ( scl**2 )*ssq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, +* +* where x( i ) = abs( X( 1 + ( i - 1 )*INCX ) ). The value of sumsq is +* assumed to be at least unity and the value of ssq will then satisfy +* +* 1.0 .le. ssq .le. ( sumsq + 2*n ). +* +* scale is assumed to be non-negative and scl returns the value +* +* scl = max( scale, abs( real( x( i ) ) ), abs( aimag( x( i ) ) ) ), +* i +* +* scale and sumsq must be supplied in SCALE and SUMSQ respectively. +* SCALE and SUMSQ are overwritten by scl and ssq respectively. +* +* The routine makes only one pass through the vector X. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of elements to be used from the vector X. +* +* X (input) COMPLEX*16 array, dimension (N) +* The vector x as described above. +* x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. +* +* INCX (input) INTEGER +* The increment between successive values of the vector X. +* INCX > 0. +* +* SCALE (input/output) DOUBLE PRECISION +* On entry, the value scale in the equation above. +* On exit, SCALE is overwritten with the value scl . +* +* SUMSQ (input/output) DOUBLE PRECISION +* On entry, the value sumsq in the equation above. +* On exit, SUMSQ is overwritten with the value ssq . +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER IX + DOUBLE PRECISION TEMP1 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG +* .. +* .. Executable Statements .. +* + IF( N.GT.0 ) THEN + DO 10 IX = 1, 1 + ( N-1 )*INCX, INCX + IF( DBLE( X( IX ) ).NE.ZERO ) THEN + TEMP1 = ABS( DBLE( X( IX ) ) ) + IF( SCALE.LT.TEMP1 ) THEN + SUMSQ = 1 + SUMSQ*( SCALE / TEMP1 )**2 + SCALE = TEMP1 + ELSE + SUMSQ = SUMSQ + ( TEMP1 / SCALE )**2 + END IF + END IF + IF( DIMAG( X( IX ) ).NE.ZERO ) THEN + TEMP1 = ABS( DIMAG( X( IX ) ) ) + IF( SCALE.LT.TEMP1 ) THEN + SUMSQ = 1 + SUMSQ*( SCALE / TEMP1 )**2 + SCALE = TEMP1 + ELSE + SUMSQ = SUMSQ + ( TEMP1 / SCALE )**2 + END IF + END IF + 10 CONTINUE + END IF +* + RETURN +* +* End of ZLASSQ +* + END diff --git a/costa/native/external/lapack/zlaswp.f b/costa/native/external/lapack/zlaswp.f new file mode 100644 index 000000000..0749250bc --- /dev/null +++ b/costa/native/external/lapack/zlaswp.f @@ -0,0 +1,120 @@ + SUBROUTINE ZLASWP( N, A, LDA, K1, K2, IPIV, INCX ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INCX, K1, K2, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* ZLASWP performs a series of row interchanges on the matrix A. +* One row interchange is initiated for each of rows K1 through K2 of A. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of columns of the matrix A. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the matrix of column dimension N to which the row +* interchanges will be applied. +* On exit, the permuted matrix. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* +* K1 (input) INTEGER +* The first element of IPIV for which a row interchange will +* be done. +* +* K2 (input) INTEGER +* The last element of IPIV for which a row interchange will +* be done. +* +* IPIV (input) INTEGER array, dimension (M*abs(INCX)) +* The vector of pivot indices. Only the elements in positions +* K1 through K2 of IPIV are accessed. +* IPIV(K) = L implies rows K and L are to be interchanged. +* +* INCX (input) INTEGER +* The increment between successive values of IPIV. If IPIV +* is negative, the pivots are applied in reverse order. +* +* Further Details +* =============== +* +* Modified by +* R. C. Whaley, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, I1, I2, INC, IP, IX, IX0, J, K, N32 + COMPLEX*16 TEMP +* .. +* .. Executable Statements .. +* +* Interchange row I with row IPIV(I) for each of rows K1 through K2. +* + IF( INCX.GT.0 ) THEN + IX0 = K1 + I1 = K1 + I2 = K2 + INC = 1 + ELSE IF( INCX.LT.0 ) THEN + IX0 = 1 + ( 1-K2 )*INCX + I1 = K2 + I2 = K1 + INC = -1 + ELSE + RETURN + END IF +* + N32 = ( N / 32 )*32 + IF( N32.NE.0 ) THEN + DO 30 J = 1, N32, 32 + IX = IX0 + DO 20 I = I1, I2, INC + IP = IPIV( IX ) + IF( IP.NE.I ) THEN + DO 10 K = J, J + 31 + TEMP = A( I, K ) + A( I, K ) = A( IP, K ) + A( IP, K ) = TEMP + 10 CONTINUE + END IF + IX = IX + INCX + 20 CONTINUE + 30 CONTINUE + END IF + IF( N32.NE.N ) THEN + N32 = N32 + 1 + IX = IX0 + DO 50 I = I1, I2, INC + IP = IPIV( IX ) + IF( IP.NE.I ) THEN + DO 40 K = N32, N + TEMP = A( I, K ) + A( I, K ) = A( IP, K ) + A( IP, K ) = TEMP + 40 CONTINUE + END IF + IX = IX + INCX + 50 CONTINUE + END IF +* + RETURN +* +* End of ZLASWP +* + END diff --git a/costa/native/external/lapack/zlasyf.f b/costa/native/external/lapack/zlasyf.f new file mode 100644 index 000000000..a7f89e74e --- /dev/null +++ b/costa/native/external/lapack/zlasyf.f @@ -0,0 +1,598 @@ + SUBROUTINE ZLASYF( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), W( LDW, * ) +* .. +* +* Purpose +* ======= +* +* ZLASYF computes a partial factorization of a complex symmetric matrix +* A using the Bunch-Kaufman diagonal pivoting method. The partial +* factorization has the form: +* +* A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: +* ( 0 U22 ) ( 0 D ) ( U12' U22' ) +* +* A = ( L11 0 ) ( D 0 ) ( L11' L21' ) if UPLO = 'L' +* ( L21 I ) ( 0 A22 ) ( 0 I ) +* +* where the order of D is at most NB. The actual order is returned in +* the argument KB, and is either NB or NB-1, or N if N <= NB. +* Note that U' denotes the transpose of U. +* +* ZLASYF is an auxiliary routine called by ZSYTRF. It uses blocked code +* (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or +* A22 (if UPLO = 'L'). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is stored: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NB (input) INTEGER +* The maximum number of columns of the matrix A that should be +* factored. NB should be at least 2 to allow for 2-by-2 pivot +* blocks. +* +* KB (output) INTEGER +* The number of columns of A that were actually factored. +* KB is either NB-1 or NB, or N if N <= NB. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the symmetric matrix A. If UPLO = 'U', the leading +* n-by-n upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n-by-n lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* On exit, A contains details of the partial factorization. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (output) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D. +* If UPLO = 'U', only the last KB elements of IPIV are set; +* if UPLO = 'L', only the first KB elements are set. +* +* If IPIV(k) > 0, then rows and columns k and IPIV(k) were +* interchanged and D(k,k) is a 1-by-1 diagonal block. +* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +* +* W (workspace) COMPLEX*16 array, dimension (LDW,NB) +* +* LDW (input) INTEGER +* The leading dimension of the array W. LDW >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* > 0: if INFO = k, D(k,k) is exactly zero. The factorization +* has been completed, but the block diagonal matrix D is +* exactly singular. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0D+0, SEVTEN = 17.0D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER IMAX, J, JB, JJ, JMAX, JP, K, KK, KKW, KP, + $ KSTEP, KW + DOUBLE PRECISION ABSAKK, ALPHA, COLMAX, ROWMAX + COMPLEX*16 D11, D21, D22, R1, T, Z +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX + EXTERNAL LSAME, IZAMAX +* .. +* .. External Subroutines .. + EXTERNAL ZCOPY, ZGEMM, ZGEMV, ZSCAL, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX, MIN, SQRT +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( Z ) = ABS( DBLE( Z ) ) + ABS( DIMAG( Z ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Factorize the trailing columns of A using the upper triangle +* of A and working backwards, and compute the matrix W = U12*D +* for use in updating A11 +* +* K is the main loop index, decreasing from N in steps of 1 or 2 +* +* KW is the column of W which corresponds to column K of A +* + K = N + 10 CONTINUE + KW = NB + K - N +* +* Exit from loop +* + IF( ( K.LE.N-NB+1 .AND. NB.LT.N ) .OR. K.LT.1 ) + $ GO TO 30 +* +* Copy column K of A to column KW of W and update it +* + CALL ZCOPY( K, A( 1, K ), 1, W( 1, KW ), 1 ) + IF( K.LT.N ) + $ CALL ZGEMV( 'No transpose', K, N-K, -CONE, A( 1, K+1 ), LDA, + $ W( K, KW+1 ), LDW, CONE, W( 1, KW ), 1 ) +* + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = CABS1( W( K, KW ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.GT.1 ) THEN + IMAX = IZAMAX( K-1, W( 1, KW ), 1 ) + COLMAX = CABS1( W( IMAX, KW ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* Copy column IMAX to column KW-1 of W and update it +* + CALL ZCOPY( IMAX, A( 1, IMAX ), 1, W( 1, KW-1 ), 1 ) + CALL ZCOPY( K-IMAX, A( IMAX, IMAX+1 ), LDA, + $ W( IMAX+1, KW-1 ), 1 ) + IF( K.LT.N ) + $ CALL ZGEMV( 'No transpose', K, N-K, -CONE, + $ A( 1, K+1 ), LDA, W( IMAX, KW+1 ), LDW, + $ CONE, W( 1, KW-1 ), 1 ) +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = IMAX + IZAMAX( K-IMAX, W( IMAX+1, KW-1 ), 1 ) + ROWMAX = CABS1( W( JMAX, KW-1 ) ) + IF( IMAX.GT.1 ) THEN + JMAX = IZAMAX( IMAX-1, W( 1, KW-1 ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( W( JMAX, KW-1 ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( CABS1( W( IMAX, KW-1 ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX +* +* copy column KW-1 of W to column KW +* + CALL ZCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 ) + ELSE +* +* interchange rows and columns K-1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K - KSTEP + 1 + KKW = NB + KK - N +* +* Updated column KP is already stored in column KKW of W +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP +* + A( KP, K ) = A( KK, K ) + CALL ZCOPY( K-1-KP, A( KP+1, KK ), 1, A( KP, KP+1 ), + $ LDA ) + CALL ZCOPY( KP, A( 1, KK ), 1, A( 1, KP ), 1 ) +* +* Interchange rows KK and KP in last KK columns of A and W +* + CALL ZSWAP( N-KK+1, A( KK, KK ), LDA, A( KP, KK ), LDA ) + CALL ZSWAP( N-KK+1, W( KK, KKW ), LDW, W( KP, KKW ), + $ LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column KW of W now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* +* Store U(k) in column k of A +* + CALL ZCOPY( K, W( 1, KW ), 1, A( 1, K ), 1 ) + R1 = CONE / A( K, K ) + CALL ZSCAL( K-1, R1, A( 1, K ), 1 ) + ELSE +* +* 2-by-2 pivot block D(k): columns KW and KW-1 of W now +* hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* + IF( K.GT.2 ) THEN +* +* Store U(k) and U(k-1) in columns k and k-1 of A +* + D21 = W( K-1, KW ) + D11 = W( K, KW ) / D21 + D22 = W( K-1, KW-1 ) / D21 + T = CONE / ( D11*D22-CONE ) + D21 = T / D21 + DO 20 J = 1, K - 2 + A( J, K-1 ) = D21*( D11*W( J, KW-1 )-W( J, KW ) ) + A( J, K ) = D21*( D22*W( J, KW )-W( J, KW-1 ) ) + 20 CONTINUE + END IF +* +* Copy D(k) to A +* + A( K-1, K-1 ) = W( K-1, KW-1 ) + A( K-1, K ) = W( K-1, KW ) + A( K, K ) = W( K, KW ) + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + 30 CONTINUE +* +* Update the upper triangle of A11 (= A(1:k,1:k)) as +* +* A11 := A11 - U12*D*U12' = A11 - U12*W' +* +* computing blocks of NB columns at a time +* + DO 50 J = ( ( K-1 ) / NB )*NB + 1, 1, -NB + JB = MIN( NB, K-J+1 ) +* +* Update the upper triangle of the diagonal block +* + DO 40 JJ = J, J + JB - 1 + CALL ZGEMV( 'No transpose', JJ-J+1, N-K, -CONE, + $ A( J, K+1 ), LDA, W( JJ, KW+1 ), LDW, CONE, + $ A( J, JJ ), 1 ) + 40 CONTINUE +* +* Update the rectangular superdiagonal block +* + CALL ZGEMM( 'No transpose', 'Transpose', J-1, JB, N-K, + $ -CONE, A( 1, K+1 ), LDA, W( J, KW+1 ), LDW, + $ CONE, A( 1, J ), LDA ) + 50 CONTINUE +* +* Put U12 in standard form by partially undoing the interchanges +* in columns k+1:n +* + J = K + 1 + 60 CONTINUE + JJ = J + JP = IPIV( J ) + IF( JP.LT.0 ) THEN + JP = -JP + J = J + 1 + END IF + J = J + 1 + IF( JP.NE.JJ .AND. J.LE.N ) + $ CALL ZSWAP( N-J+1, A( JP, J ), LDA, A( JJ, J ), LDA ) + IF( J.LE.N ) + $ GO TO 60 +* +* Set KB to the number of columns factorized +* + KB = N - K +* + ELSE +* +* Factorize the leading columns of A using the lower triangle +* of A and working forwards, and compute the matrix W = L21*D +* for use in updating A22 +* +* K is the main loop index, increasing from 1 in steps of 1 or 2 +* + K = 1 + 70 CONTINUE +* +* Exit from loop +* + IF( ( K.GE.NB .AND. NB.LT.N ) .OR. K.GT.N ) + $ GO TO 90 +* +* Copy column K of A to column K of W and update it +* + CALL ZCOPY( N-K+1, A( K, K ), 1, W( K, K ), 1 ) + CALL ZGEMV( 'No transpose', N-K+1, K-1, -CONE, A( K, 1 ), LDA, + $ W( K, 1 ), LDW, CONE, W( K, K ), 1 ) +* + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = CABS1( W( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.LT.N ) THEN + IMAX = K + IZAMAX( N-K, W( K+1, K ), 1 ) + COLMAX = CABS1( W( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* Copy column IMAX to column K+1 of W and update it +* + CALL ZCOPY( IMAX-K, A( IMAX, K ), LDA, W( K, K+1 ), 1 ) + CALL ZCOPY( N-IMAX+1, A( IMAX, IMAX ), 1, W( IMAX, K+1 ), + $ 1 ) + CALL ZGEMV( 'No transpose', N-K+1, K-1, -CONE, A( K, 1 ), + $ LDA, W( IMAX, 1 ), LDW, CONE, W( K, K+1 ), + $ 1 ) +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = K - 1 + IZAMAX( IMAX-K, W( K, K+1 ), 1 ) + ROWMAX = CABS1( W( JMAX, K+1 ) ) + IF( IMAX.LT.N ) THEN + JMAX = IMAX + IZAMAX( N-IMAX, W( IMAX+1, K+1 ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( W( JMAX, K+1 ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( CABS1( W( IMAX, K+1 ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX +* +* copy column K+1 of W to column K +* + CALL ZCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 ) + ELSE +* +* interchange rows and columns K+1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K + KSTEP - 1 +* +* Updated column KP is already stored in column KK of W +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP +* + A( KP, K ) = A( KK, K ) + CALL ZCOPY( KP-K-1, A( K+1, KK ), 1, A( KP, K+1 ), LDA ) + CALL ZCOPY( N-KP+1, A( KP, KK ), 1, A( KP, KP ), 1 ) +* +* Interchange rows KK and KP in first KK columns of A and W +* + CALL ZSWAP( KK, A( KK, 1 ), LDA, A( KP, 1 ), LDA ) + CALL ZSWAP( KK, W( KK, 1 ), LDW, W( KP, 1 ), LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k of W now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* +* Store L(k) in column k of A +* + CALL ZCOPY( N-K+1, W( K, K ), 1, A( K, K ), 1 ) + IF( K.LT.N ) THEN + R1 = CONE / A( K, K ) + CALL ZSCAL( N-K, R1, A( K+1, K ), 1 ) + END IF + ELSE +* +* 2-by-2 pivot block D(k): columns k and k+1 of W now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* + IF( K.LT.N-1 ) THEN +* +* Store L(k) and L(k+1) in columns k and k+1 of A +* + D21 = W( K+1, K ) + D11 = W( K+1, K+1 ) / D21 + D22 = W( K, K ) / D21 + T = CONE / ( D11*D22-CONE ) + D21 = T / D21 + DO 80 J = K + 2, N + A( J, K ) = D21*( D11*W( J, K )-W( J, K+1 ) ) + A( J, K+1 ) = D21*( D22*W( J, K+1 )-W( J, K ) ) + 80 CONTINUE + END IF +* +* Copy D(k) to A +* + A( K, K ) = W( K, K ) + A( K+1, K ) = W( K+1, K ) + A( K+1, K+1 ) = W( K+1, K+1 ) + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 70 +* + 90 CONTINUE +* +* Update the lower triangle of A22 (= A(k:n,k:n)) as +* +* A22 := A22 - L21*D*L21' = A22 - L21*W' +* +* computing blocks of NB columns at a time +* + DO 110 J = K, N, NB + JB = MIN( NB, N-J+1 ) +* +* Update the lower triangle of the diagonal block +* + DO 100 JJ = J, J + JB - 1 + CALL ZGEMV( 'No transpose', J+JB-JJ, K-1, -CONE, + $ A( JJ, 1 ), LDA, W( JJ, 1 ), LDW, CONE, + $ A( JJ, JJ ), 1 ) + 100 CONTINUE +* +* Update the rectangular subdiagonal block +* + IF( J+JB.LE.N ) + $ CALL ZGEMM( 'No transpose', 'Transpose', N-J-JB+1, JB, + $ K-1, -CONE, A( J+JB, 1 ), LDA, W( J, 1 ), + $ LDW, CONE, A( J+JB, J ), LDA ) + 110 CONTINUE +* +* Put L21 in standard form by partially undoing the interchanges +* in columns 1:k-1 +* + J = K - 1 + 120 CONTINUE + JJ = J + JP = IPIV( J ) + IF( JP.LT.0 ) THEN + JP = -JP + J = J - 1 + END IF + J = J - 1 + IF( JP.NE.JJ .AND. J.GE.1 ) + $ CALL ZSWAP( J, A( JP, 1 ), LDA, A( JJ, 1 ), LDA ) + IF( J.GE.1 ) + $ GO TO 120 +* +* Set KB to the number of columns factorized +* + KB = K - 1 +* + END IF + RETURN +* +* End of ZLASYF +* + END diff --git a/costa/native/external/lapack/zlatbs.f b/costa/native/external/lapack/zlatbs.f new file mode 100644 index 000000000..f389f9f78 --- /dev/null +++ b/costa/native/external/lapack/zlatbs.f @@ -0,0 +1,909 @@ + SUBROUTINE ZLATBS( UPLO, TRANS, DIAG, NORMIN, N, KD, AB, LDAB, X, + $ SCALE, CNORM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1992 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORMIN, TRANS, UPLO + INTEGER INFO, KD, LDAB, N + DOUBLE PRECISION SCALE +* .. +* .. Array Arguments .. + DOUBLE PRECISION CNORM( * ) + COMPLEX*16 AB( LDAB, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* ZLATBS solves one of the triangular systems +* +* A * x = s*b, A**T * x = s*b, or A**H * x = s*b, +* +* with scaling to prevent overflow, where A is an upper or lower +* triangular band matrix. Here A' denotes the transpose of A, x and b +* are n-element vectors, and s is a scaling factor, usually less than +* or equal to 1, chosen so that the components of x will be less than +* the overflow threshold. If the unscaled problem will not cause +* overflow, the Level 2 BLAS routine ZTBSV is called. If the matrix A +* is singular (A(j,j) = 0 for some j), then s is set to 0 and a +* non-trivial solution to A*x = 0 is returned. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the matrix A is upper or lower triangular. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* TRANS (input) CHARACTER*1 +* Specifies the operation applied to A. +* = 'N': Solve A * x = s*b (No transpose) +* = 'T': Solve A**T * x = s*b (Transpose) +* = 'C': Solve A**H * x = s*b (Conjugate transpose) +* +* DIAG (input) CHARACTER*1 +* Specifies whether or not the matrix A is unit triangular. +* = 'N': Non-unit triangular +* = 'U': Unit triangular +* +* NORMIN (input) CHARACTER*1 +* Specifies whether CNORM has been set or not. +* = 'Y': CNORM contains the column norms on entry +* = 'N': CNORM is not set on entry. On exit, the norms will +* be computed and stored in CNORM. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of subdiagonals or superdiagonals in the +* triangular matrix A. KD >= 0. +* +* AB (input) COMPLEX*16 array, dimension (LDAB,N) +* The upper or lower triangular band matrix A, stored in the +* first KD+1 rows of the array. The j-th column of A is stored +* in the j-th column of the array AB as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* X (input/output) COMPLEX*16 array, dimension (N) +* On entry, the right hand side b of the triangular system. +* On exit, X is overwritten by the solution vector x. +* +* SCALE (output) DOUBLE PRECISION +* The scaling factor s for the triangular system +* A * x = s*b, A**T * x = s*b, or A**H * x = s*b. +* If SCALE = 0, the matrix A is singular or badly scaled, and +* the vector x is an exact or approximate solution to A*x = 0. +* +* CNORM (input or output) DOUBLE PRECISION array, dimension (N) +* +* If NORMIN = 'Y', CNORM is an input argument and CNORM(j) +* contains the norm of the off-diagonal part of the j-th column +* of A. If TRANS = 'N', CNORM(j) must be greater than or equal +* to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) +* must be greater than or equal to the 1-norm. +* +* If NORMIN = 'N', CNORM is an output argument and CNORM(j) +* returns the 1-norm of the offdiagonal part of the j-th column +* of A. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* Further Details +* ======= ======= +* +* A rough bound on x is computed; if that is less than overflow, ZTBSV +* is called, otherwise, specific code is used which checks for possible +* overflow or divide-by-zero at every operation. +* +* A columnwise scheme is used for solving A*x = b. The basic algorithm +* if A is lower triangular is +* +* x[1:n] := b[1:n] +* for j = 1, ..., n +* x(j) := x(j) / A(j,j) +* x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] +* end +* +* Define bounds on the components of x after j iterations of the loop: +* M(j) = bound on x[1:j] +* G(j) = bound on x[j+1:n] +* Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. +* +* Then for iteration j+1 we have +* M(j+1) <= G(j) / | A(j+1,j+1) | +* G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | +* <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) +* +* where CNORM(j+1) is greater than or equal to the infinity-norm of +* column j+1 of A, not counting the diagonal. Hence +* +* G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) +* 1<=i<=j +* and +* +* |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) +* 1<=i< j +* +* Since |x(j)| <= M(j), we use the Level 2 BLAS routine ZTBSV if the +* reciprocal of the largest M(j), j=1,..,n, is larger than +* max(underflow, 1/overflow). +* +* The bound on x(j) is also used to determine when a step in the +* columnwise method can be performed without fear of overflow. If +* the computed bound is greater than a large constant, x is scaled to +* prevent overflow, but if the bound overflows, x is set to 0, x(j) to +* 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. +* +* Similarly, a row-wise scheme is used to solve A**T *x = b or +* A**H *x = b. The basic algorithm for A upper triangular is +* +* for j = 1, ..., n +* x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) +* end +* +* We simultaneously compute two bounds +* G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j +* M(j) = bound on x(i), 1<=i<=j +* +* The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we +* add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. +* Then the bound on x(j) is +* +* M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | +* +* <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) +* 1<=i<=j +* +* and we can safely call ZTBSV if 1/M(n) and 1/G(n) are both greater +* than max(underflow, 1/overflow). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, HALF, ONE, TWO + PARAMETER ( ZERO = 0.0D+0, HALF = 0.5D+0, ONE = 1.0D+0, + $ TWO = 2.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + INTEGER I, IMAX, J, JFIRST, JINC, JLAST, JLEN, MAIND + DOUBLE PRECISION BIGNUM, GROW, REC, SMLNUM, TJJ, TMAX, TSCAL, + $ XBND, XJ, XMAX + COMPLEX*16 CSUMJ, TJJS, USCAL, ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX, IZAMAX + DOUBLE PRECISION DLAMCH, DZASUM + COMPLEX*16 ZDOTC, ZDOTU, ZLADIV + EXTERNAL LSAME, IDAMAX, IZAMAX, DLAMCH, DZASUM, ZDOTC, + $ ZDOTU, ZLADIV +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, XERBLA, ZAXPY, ZDSCAL, ZTBSV +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX, MIN +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1, CABS2 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) + CABS2( ZDUM ) = ABS( DBLE( ZDUM ) / 2.D0 ) + + $ ABS( DIMAG( ZDUM ) / 2.D0 ) +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* +* Test the input parameters. +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT. + $ LSAME( NORMIN, 'N' ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( KD.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLATBS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine machine dependent parameters to control overflow. +* + SMLNUM = DLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SMLNUM / DLAMCH( 'Precision' ) + BIGNUM = ONE / SMLNUM + SCALE = ONE +* + IF( LSAME( NORMIN, 'N' ) ) THEN +* +* Compute the 1-norm of each column, not including the diagonal. +* + IF( UPPER ) THEN +* +* A is upper triangular. +* + DO 10 J = 1, N + JLEN = MIN( KD, J-1 ) + CNORM( J ) = DZASUM( JLEN, AB( KD+1-JLEN, J ), 1 ) + 10 CONTINUE + ELSE +* +* A is lower triangular. +* + DO 20 J = 1, N + JLEN = MIN( KD, N-J ) + IF( JLEN.GT.0 ) THEN + CNORM( J ) = DZASUM( JLEN, AB( 2, J ), 1 ) + ELSE + CNORM( J ) = ZERO + END IF + 20 CONTINUE + END IF + END IF +* +* Scale the column norms by TSCAL if the maximum element in CNORM is +* greater than BIGNUM/2. +* + IMAX = IDAMAX( N, CNORM, 1 ) + TMAX = CNORM( IMAX ) + IF( TMAX.LE.BIGNUM*HALF ) THEN + TSCAL = ONE + ELSE + TSCAL = HALF / ( SMLNUM*TMAX ) + CALL DSCAL( N, TSCAL, CNORM, 1 ) + END IF +* +* Compute a bound on the computed solution vector to see if the +* Level 2 BLAS routine ZTBSV can be used. +* + XMAX = ZERO + DO 30 J = 1, N + XMAX = MAX( XMAX, CABS2( X( J ) ) ) + 30 CONTINUE + XBND = XMAX + IF( NOTRAN ) THEN +* +* Compute the growth in A * x = b. +* + IF( UPPER ) THEN + JFIRST = N + JLAST = 1 + JINC = -1 + MAIND = KD + 1 + ELSE + JFIRST = 1 + JLAST = N + JINC = 1 + MAIND = 1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 60 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, G(0) = max{x(i), i=1,...,n}. +* + GROW = HALF / MAX( XBND, SMLNUM ) + XBND = GROW + DO 40 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 60 +* + TJJS = AB( MAIND, J ) + TJJ = CABS1( TJJS ) +* + IF( TJJ.GE.SMLNUM ) THEN +* +* M(j) = G(j-1) / abs(A(j,j)) +* + XBND = MIN( XBND, MIN( ONE, TJJ )*GROW ) + ELSE +* +* M(j) could overflow, set XBND to 0. +* + XBND = ZERO + END IF +* + IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN +* +* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) +* + GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) ) + ELSE +* +* G(j) could overflow, set GROW to 0. +* + GROW = ZERO + END IF + 40 CONTINUE + GROW = XBND + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) ) + DO 50 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 60 +* +* G(j) = G(j-1)*( 1 + CNORM(j) ) +* + GROW = GROW*( ONE / ( ONE+CNORM( J ) ) ) + 50 CONTINUE + END IF + 60 CONTINUE +* + ELSE +* +* Compute the growth in A**T * x = b or A**H * x = b. +* + IF( UPPER ) THEN + JFIRST = 1 + JLAST = N + JINC = 1 + MAIND = KD + 1 + ELSE + JFIRST = N + JLAST = 1 + JINC = -1 + MAIND = 1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 90 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, M(0) = max{x(i), i=1,...,n}. +* + GROW = HALF / MAX( XBND, SMLNUM ) + XBND = GROW + DO 70 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 90 +* +* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) +* + XJ = ONE + CNORM( J ) + GROW = MIN( GROW, XBND / XJ ) +* + TJJS = AB( MAIND, J ) + TJJ = CABS1( TJJS ) +* + IF( TJJ.GE.SMLNUM ) THEN +* +* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) +* + IF( XJ.GT.TJJ ) + $ XBND = XBND*( TJJ / XJ ) + ELSE +* +* M(j) could overflow, set XBND to 0. +* + XBND = ZERO + END IF + 70 CONTINUE + GROW = MIN( GROW, XBND ) + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) ) + DO 80 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 90 +* +* G(j) = ( 1 + CNORM(j) )*G(j-1) +* + XJ = ONE + CNORM( J ) + GROW = GROW / XJ + 80 CONTINUE + END IF + 90 CONTINUE + END IF +* + IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN +* +* Use the Level 2 BLAS solve if the reciprocal of the bound on +* elements of X is not too small. +* + CALL ZTBSV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, X, 1 ) + ELSE +* +* Use a Level 1 BLAS solve, scaling intermediate results. +* + IF( XMAX.GT.BIGNUM*HALF ) THEN +* +* Scale X so that its components are less than or equal to +* BIGNUM in absolute value. +* + SCALE = ( BIGNUM*HALF ) / XMAX + CALL ZDSCAL( N, SCALE, X, 1 ) + XMAX = BIGNUM + ELSE + XMAX = XMAX*TWO + END IF +* + IF( NOTRAN ) THEN +* +* Solve A * x = b +* + DO 120 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) / A(j,j), scaling x if necessary. +* + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN + TJJS = AB( MAIND, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 110 + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by 1/b(j). +* + REC = ONE / XJ + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = ZLADIV( X( J ), TJJS ) + XJ = CABS1( X( J ) ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM +* to avoid overflow when dividing by A(j,j). +* + REC = ( TJJ*BIGNUM ) / XJ + IF( CNORM( J ).GT.ONE ) THEN +* +* Scale by 1/CNORM(j) to avoid overflow when +* multiplying x(j) times column j. +* + REC = REC / CNORM( J ) + END IF + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = ZLADIV( X( J ), TJJS ) + XJ = CABS1( X( J ) ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A*x = 0. +* + DO 100 I = 1, N + X( I ) = ZERO + 100 CONTINUE + X( J ) = ONE + XJ = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 110 CONTINUE +* +* Scale x if necessary to avoid overflow when adding a +* multiple of column j of A. +* + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN +* +* Scale x by 1/(2*abs(x(j))). +* + REC = REC*HALF + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + END IF + ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN +* +* Scale x by 1/2. +* + CALL ZDSCAL( N, HALF, X, 1 ) + SCALE = SCALE*HALF + END IF +* + IF( UPPER ) THEN + IF( J.GT.1 ) THEN +* +* Compute the update +* x(max(1,j-kd):j-1) := x(max(1,j-kd):j-1) - +* x(j)* A(max(1,j-kd):j-1,j) +* + JLEN = MIN( KD, J-1 ) + CALL ZAXPY( JLEN, -X( J )*TSCAL, + $ AB( KD+1-JLEN, J ), 1, X( J-JLEN ), 1 ) + I = IZAMAX( J-1, X, 1 ) + XMAX = CABS1( X( I ) ) + END IF + ELSE IF( J.LT.N ) THEN +* +* Compute the update +* x(j+1:min(j+kd,n)) := x(j+1:min(j+kd,n)) - +* x(j) * A(j+1:min(j+kd,n),j) +* + JLEN = MIN( KD, N-J ) + IF( JLEN.GT.0 ) + $ CALL ZAXPY( JLEN, -X( J )*TSCAL, AB( 2, J ), 1, + $ X( J+1 ), 1 ) + I = J + IZAMAX( N-J, X( J+1 ), 1 ) + XMAX = CABS1( X( I ) ) + END IF + 120 CONTINUE +* + ELSE IF( LSAME( TRANS, 'T' ) ) THEN +* +* Solve A**T * x = b +* + DO 170 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = CABS1( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = AB( MAIND, J )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = ZLADIV( USCAL, TJJS ) + END IF + IF( REC.LT.ONE ) THEN + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + CSUMJ = ZERO + IF( USCAL.EQ.DCMPLX( ONE ) ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call ZDOTU to perform the dot product. +* + IF( UPPER ) THEN + JLEN = MIN( KD, J-1 ) + CSUMJ = ZDOTU( JLEN, AB( KD+1-JLEN, J ), 1, + $ X( J-JLEN ), 1 ) + ELSE + JLEN = MIN( KD, N-J ) + IF( JLEN.GT.1 ) + $ CSUMJ = ZDOTU( JLEN, AB( 2, J ), 1, X( J+1 ), + $ 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + JLEN = MIN( KD, J-1 ) + DO 130 I = 1, JLEN + CSUMJ = CSUMJ + ( AB( KD+I-JLEN, J )*USCAL )* + $ X( J-JLEN-1+I ) + 130 CONTINUE + ELSE + JLEN = MIN( KD, N-J ) + DO 140 I = 1, JLEN + CSUMJ = CSUMJ + ( AB( I+1, J )*USCAL )*X( J+I ) + 140 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.DCMPLX( TSCAL ) ) THEN +* +* Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - CSUMJ + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJS = AB( MAIND, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 160 + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = ZLADIV( X( J ), TJJS ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = ZLADIV( X( J ), TJJS ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0 and compute a solution to A**T *x = 0. +* + DO 150 I = 1, N + X( I ) = ZERO + 150 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 160 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = ZLADIV( X( J ), TJJS ) - CSUMJ + END IF + XMAX = MAX( XMAX, CABS1( X( J ) ) ) + 170 CONTINUE +* + ELSE +* +* Solve A**H * x = b +* + DO 220 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = CABS1( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = DCONJG( AB( MAIND, J ) )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = ZLADIV( USCAL, TJJS ) + END IF + IF( REC.LT.ONE ) THEN + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + CSUMJ = ZERO + IF( USCAL.EQ.DCMPLX( ONE ) ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call ZDOTC to perform the dot product. +* + IF( UPPER ) THEN + JLEN = MIN( KD, J-1 ) + CSUMJ = ZDOTC( JLEN, AB( KD+1-JLEN, J ), 1, + $ X( J-JLEN ), 1 ) + ELSE + JLEN = MIN( KD, N-J ) + IF( JLEN.GT.1 ) + $ CSUMJ = ZDOTC( JLEN, AB( 2, J ), 1, X( J+1 ), + $ 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + JLEN = MIN( KD, J-1 ) + DO 180 I = 1, JLEN + CSUMJ = CSUMJ + ( DCONJG( AB( KD+I-JLEN, J ) )* + $ USCAL )*X( J-JLEN-1+I ) + 180 CONTINUE + ELSE + JLEN = MIN( KD, N-J ) + DO 190 I = 1, JLEN + CSUMJ = CSUMJ + ( DCONJG( AB( I+1, J ) )*USCAL ) + $ *X( J+I ) + 190 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.DCMPLX( TSCAL ) ) THEN +* +* Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - CSUMJ + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJS = DCONJG( AB( MAIND, J ) )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 210 + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = ZLADIV( X( J ), TJJS ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = ZLADIV( X( J ), TJJS ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0 and compute a solution to A**H *x = 0. +* + DO 200 I = 1, N + X( I ) = ZERO + 200 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 210 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = ZLADIV( X( J ), TJJS ) - CSUMJ + END IF + XMAX = MAX( XMAX, CABS1( X( J ) ) ) + 220 CONTINUE + END IF + SCALE = SCALE / TSCAL + END IF +* +* Scale the column norms by 1/TSCAL for return. +* + IF( TSCAL.NE.ONE ) THEN + CALL DSCAL( N, ONE / TSCAL, CNORM, 1 ) + END IF +* + RETURN +* +* End of ZLATBS +* + END diff --git a/costa/native/external/lapack/zlatdf.f b/costa/native/external/lapack/zlatdf.f new file mode 100644 index 000000000..3316ca4fb --- /dev/null +++ b/costa/native/external/lapack/zlatdf.f @@ -0,0 +1,242 @@ + SUBROUTINE ZLATDF( IJOB, N, Z, LDZ, RHS, RDSUM, RDSCAL, IPIV, + $ JPIV ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER IJOB, LDZ, N + DOUBLE PRECISION RDSCAL, RDSUM +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), JPIV( * ) + COMPLEX*16 RHS( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* ZLATDF computes the contribution to the reciprocal Dif-estimate +* by solving for x in Z * x = b, where b is chosen such that the norm +* of x is as large as possible. It is assumed that LU decomposition +* of Z has been computed by ZGETC2. On entry RHS = f holds the +* contribution from earlier solved sub-systems, and on return RHS = x. +* +* The factorization of Z returned by ZGETC2 has the form +* Z = P * L * U * Q, where P and Q are permutation matrices. L is lower +* triangular with unit diagonal elements and U is upper triangular. +* +* Arguments +* ========= +* +* IJOB (input) INTEGER +* IJOB = 2: First compute an approximative null-vector e +* of Z using ZGECON, e is normalized and solve for +* Zx = +-e - f with the sign giving the greater value of +* 2-norm(x). About 5 times as expensive as Default. +* IJOB .ne. 2: Local look ahead strategy where +* all entries of the r.h.s. b is choosen as either +1 or +* -1. Default. +* +* N (input) INTEGER +* The number of columns of the matrix Z. +* +* Z (input) DOUBLE PRECISION array, dimension (LDZ, N) +* On entry, the LU part of the factorization of the n-by-n +* matrix Z computed by ZGETC2: Z = P * L * U * Q +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDA >= max(1, N). +* +* RHS (input/output) DOUBLE PRECISION array, dimension (N). +* On entry, RHS contains contributions from other subsystems. +* On exit, RHS contains the solution of the subsystem with +* entries according to the value of IJOB (see above). +* +* RDSUM (input/output) DOUBLE PRECISION +* On entry, the sum of squares of computed contributions to +* the Dif-estimate under computation by ZTGSYL, where the +* scaling factor RDSCAL (see below) has been factored out. +* On exit, the corresponding sum of squares updated with the +* contributions from the current sub-system. +* If TRANS = 'T' RDSUM is not touched. +* NOTE: RDSUM only makes sense when ZTGSY2 is called by CTGSYL. +* +* RDSCAL (input/output) DOUBLE PRECISION +* On entry, scaling factor used to prevent overflow in RDSUM. +* On exit, RDSCAL is updated w.r.t. the current contributions +* in RDSUM. +* If TRANS = 'T', RDSCAL is not touched. +* NOTE: RDSCAL only makes sense when ZTGSY2 is called by +* ZTGSYL. +* +* IPIV (input) INTEGER array, dimension (N). +* The pivot indices; for 1 <= i <= N, row i of the +* matrix has been interchanged with row IPIV(i). +* +* JPIV (input) INTEGER array, dimension (N). +* The pivot indices; for 1 <= j <= N, column j of the +* matrix has been interchanged with column JPIV(j). +* +* Further Details +* =============== +* +* Based on contributions by +* Bo Kagstrom and Peter Poromaa, Department of Computing Science, +* Umea University, S-901 87 Umea, Sweden. +* +* This routine is a further developed implementation of algorithm +* BSOLVE in [1] using complete pivoting in the LU factorization. +* +* [1] Bo Kagstrom and Lars Westin, +* Generalized Schur Methods with Condition Estimators for +* Solving the Generalized Sylvester Equation, IEEE Transactions +* on Automatic Control, Vol. 34, No. 7, July 1989, pp 745-751. +* +* [2] Peter Poromaa, +* On Efficient and Robust Estimators for the Separation +* between two Regular Matrix Pairs with Applications in +* Condition Estimation. Report UMINF-95.05, Department of +* Computing Science, Umea University, S-901 87 Umea, Sweden, +* 1995. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER MAXDIM + PARAMETER ( MAXDIM = 2 ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, J, K + DOUBLE PRECISION RTEMP, SCALE, SMINU, SPLUS + COMPLEX*16 BM, BP, PMONE, TEMP +* .. +* .. Local Arrays .. + DOUBLE PRECISION RWORK( MAXDIM ) + COMPLEX*16 WORK( 4*MAXDIM ), XM( MAXDIM ), XP( MAXDIM ) +* .. +* .. External Subroutines .. + EXTERNAL ZAXPY, ZCOPY, ZGECON, ZGESC2, ZLASSQ, ZLASWP, + $ ZSCAL +* .. +* .. External Functions .. + DOUBLE PRECISION DZASUM + COMPLEX*16 ZDOTC + EXTERNAL DZASUM, ZDOTC +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, SQRT +* .. +* .. Executable Statements .. +* + IF( IJOB.NE.2 ) THEN +* +* Apply permutations IPIV to RHS +* + CALL ZLASWP( 1, RHS, LDZ, 1, N-1, IPIV, 1 ) +* +* Solve for L-part choosing RHS either to +1 or -1. +* + PMONE = -CONE + DO 10 J = 1, N - 1 + BP = RHS( J ) + CONE + BM = RHS( J ) - CONE + SPLUS = ONE +* +* Lockahead for L- part RHS(1:N-1) = +-1 +* SPLUS and SMIN computed more efficiently than in BSOLVE[1]. +* + SPLUS = SPLUS + DBLE( ZDOTC( N-J, Z( J+1, J ), 1, Z( J+1, + $ J ), 1 ) ) + SMINU = DBLE( ZDOTC( N-J, Z( J+1, J ), 1, RHS( J+1 ), 1 ) ) + SPLUS = SPLUS*DBLE( RHS( J ) ) + IF( SPLUS.GT.SMINU ) THEN + RHS( J ) = BP + ELSE IF( SMINU.GT.SPLUS ) THEN + RHS( J ) = BM + ELSE +* +* In this case the updating sums are equal and we can +* choose RHS(J) +1 or -1. The first time this happens we +* choose -1, thereafter +1. This is a simple way to get +* good estimates of matrices like Byers well-known example +* (see [1]). (Not done in BSOLVE.) +* + RHS( J ) = RHS( J ) + PMONE + PMONE = CONE + END IF +* +* Compute the remaining r.h.s. +* + TEMP = -RHS( J ) + CALL ZAXPY( N-J, TEMP, Z( J+1, J ), 1, RHS( J+1 ), 1 ) + 10 CONTINUE +* +* Solve for U- part, lockahead for RHS(N) = +-1. This is not done +* In BSOLVE and will hopefully give us a better estimate because +* any ill-conditioning of the original matrix is transfered to U +* and not to L. U(N, N) is an approximation to sigma_min(LU). +* + CALL ZCOPY( N-1, RHS, 1, WORK, 1 ) + WORK( N ) = RHS( N ) + CONE + RHS( N ) = RHS( N ) - CONE + SPLUS = ZERO + SMINU = ZERO + DO 30 I = N, 1, -1 + TEMP = CONE / Z( I, I ) + WORK( I ) = WORK( I )*TEMP + RHS( I ) = RHS( I )*TEMP + DO 20 K = I + 1, N + WORK( I ) = WORK( I ) - WORK( K )*( Z( I, K )*TEMP ) + RHS( I ) = RHS( I ) - RHS( K )*( Z( I, K )*TEMP ) + 20 CONTINUE + SPLUS = SPLUS + ABS( WORK( I ) ) + SMINU = SMINU + ABS( RHS( I ) ) + 30 CONTINUE + IF( SPLUS.GT.SMINU ) + $ CALL ZCOPY( N, WORK, 1, RHS, 1 ) +* +* Apply the permutations JPIV to the computed solution (RHS) +* + CALL ZLASWP( 1, RHS, LDZ, 1, N-1, JPIV, -1 ) +* +* Compute the sum of squares +* + CALL ZLASSQ( N, RHS, 1, RDSCAL, RDSUM ) + RETURN + END IF +* +* ENTRY IJOB = 2 +* +* Compute approximate nullvector XM of Z +* + CALL ZGECON( 'I', N, Z, LDZ, ONE, RTEMP, WORK, RWORK, INFO ) + CALL ZCOPY( N, WORK( N+1 ), 1, XM, 1 ) +* +* Compute RHS +* + CALL ZLASWP( 1, XM, LDZ, 1, N-1, IPIV, -1 ) + TEMP = CONE / SQRT( ZDOTC( N, XM, 1, XM, 1 ) ) + CALL ZSCAL( N, TEMP, XM, 1 ) + CALL ZCOPY( N, XM, 1, XP, 1 ) + CALL ZAXPY( N, CONE, RHS, 1, XP, 1 ) + CALL ZAXPY( N, -CONE, XM, 1, RHS, 1 ) + CALL ZGESC2( N, Z, LDZ, RHS, IPIV, JPIV, SCALE ) + CALL ZGESC2( N, Z, LDZ, XP, IPIV, JPIV, SCALE ) + IF( DZASUM( N, XP, 1 ).GT.DZASUM( N, RHS, 1 ) ) + $ CALL ZCOPY( N, XP, 1, RHS, 1 ) +* +* Compute the sum of squares +* + CALL ZLASSQ( N, RHS, 1, RDSCAL, RDSUM ) + RETURN +* +* End of ZLATDF +* + END diff --git a/costa/native/external/lapack/zlatps.f b/costa/native/external/lapack/zlatps.f new file mode 100644 index 000000000..0b26e585a --- /dev/null +++ b/costa/native/external/lapack/zlatps.f @@ -0,0 +1,895 @@ + SUBROUTINE ZLATPS( UPLO, TRANS, DIAG, NORMIN, N, AP, X, SCALE, + $ CNORM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORMIN, TRANS, UPLO + INTEGER INFO, N + DOUBLE PRECISION SCALE +* .. +* .. Array Arguments .. + DOUBLE PRECISION CNORM( * ) + COMPLEX*16 AP( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* ZLATPS solves one of the triangular systems +* +* A * x = s*b, A**T * x = s*b, or A**H * x = s*b, +* +* with scaling to prevent overflow, where A is an upper or lower +* triangular matrix stored in packed form. Here A**T denotes the +* transpose of A, A**H denotes the conjugate transpose of A, x and b +* are n-element vectors, and s is a scaling factor, usually less than +* or equal to 1, chosen so that the components of x will be less than +* the overflow threshold. If the unscaled problem will not cause +* overflow, the Level 2 BLAS routine ZTPSV is called. If the matrix A +* is singular (A(j,j) = 0 for some j), then s is set to 0 and a +* non-trivial solution to A*x = 0 is returned. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the matrix A is upper or lower triangular. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* TRANS (input) CHARACTER*1 +* Specifies the operation applied to A. +* = 'N': Solve A * x = s*b (No transpose) +* = 'T': Solve A**T * x = s*b (Transpose) +* = 'C': Solve A**H * x = s*b (Conjugate transpose) +* +* DIAG (input) CHARACTER*1 +* Specifies whether or not the matrix A is unit triangular. +* = 'N': Non-unit triangular +* = 'U': Unit triangular +* +* NORMIN (input) CHARACTER*1 +* Specifies whether CNORM has been set or not. +* = 'Y': CNORM contains the column norms on entry +* = 'N': CNORM is not set on entry. On exit, the norms will +* be computed and stored in CNORM. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input) COMPLEX*16 array, dimension (N*(N+1)/2) +* The upper or lower triangular matrix A, packed columnwise in +* a linear array. The j-th column of A is stored in the array +* AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* +* X (input/output) COMPLEX*16 array, dimension (N) +* On entry, the right hand side b of the triangular system. +* On exit, X is overwritten by the solution vector x. +* +* SCALE (output) DOUBLE PRECISION +* The scaling factor s for the triangular system +* A * x = s*b, A**T * x = s*b, or A**H * x = s*b. +* If SCALE = 0, the matrix A is singular or badly scaled, and +* the vector x is an exact or approximate solution to A*x = 0. +* +* CNORM (input or output) DOUBLE PRECISION array, dimension (N) +* +* If NORMIN = 'Y', CNORM is an input argument and CNORM(j) +* contains the norm of the off-diagonal part of the j-th column +* of A. If TRANS = 'N', CNORM(j) must be greater than or equal +* to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) +* must be greater than or equal to the 1-norm. +* +* If NORMIN = 'N', CNORM is an output argument and CNORM(j) +* returns the 1-norm of the offdiagonal part of the j-th column +* of A. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* Further Details +* ======= ======= +* +* A rough bound on x is computed; if that is less than overflow, ZTPSV +* is called, otherwise, specific code is used which checks for possible +* overflow or divide-by-zero at every operation. +* +* A columnwise scheme is used for solving A*x = b. The basic algorithm +* if A is lower triangular is +* +* x[1:n] := b[1:n] +* for j = 1, ..., n +* x(j) := x(j) / A(j,j) +* x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] +* end +* +* Define bounds on the components of x after j iterations of the loop: +* M(j) = bound on x[1:j] +* G(j) = bound on x[j+1:n] +* Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. +* +* Then for iteration j+1 we have +* M(j+1) <= G(j) / | A(j+1,j+1) | +* G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | +* <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) +* +* where CNORM(j+1) is greater than or equal to the infinity-norm of +* column j+1 of A, not counting the diagonal. Hence +* +* G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) +* 1<=i<=j +* and +* +* |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) +* 1<=i< j +* +* Since |x(j)| <= M(j), we use the Level 2 BLAS routine ZTPSV if the +* reciprocal of the largest M(j), j=1,..,n, is larger than +* max(underflow, 1/overflow). +* +* The bound on x(j) is also used to determine when a step in the +* columnwise method can be performed without fear of overflow. If +* the computed bound is greater than a large constant, x is scaled to +* prevent overflow, but if the bound overflows, x is set to 0, x(j) to +* 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. +* +* Similarly, a row-wise scheme is used to solve A**T *x = b or +* A**H *x = b. The basic algorithm for A upper triangular is +* +* for j = 1, ..., n +* x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) +* end +* +* We simultaneously compute two bounds +* G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j +* M(j) = bound on x(i), 1<=i<=j +* +* The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we +* add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. +* Then the bound on x(j) is +* +* M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | +* +* <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) +* 1<=i<=j +* +* and we can safely call ZTPSV if 1/M(n) and 1/G(n) are both greater +* than max(underflow, 1/overflow). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, HALF, ONE, TWO + PARAMETER ( ZERO = 0.0D+0, HALF = 0.5D+0, ONE = 1.0D+0, + $ TWO = 2.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + INTEGER I, IMAX, IP, J, JFIRST, JINC, JLAST, JLEN + DOUBLE PRECISION BIGNUM, GROW, REC, SMLNUM, TJJ, TMAX, TSCAL, + $ XBND, XJ, XMAX + COMPLEX*16 CSUMJ, TJJS, USCAL, ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX, IZAMAX + DOUBLE PRECISION DLAMCH, DZASUM + COMPLEX*16 ZDOTC, ZDOTU, ZLADIV + EXTERNAL LSAME, IDAMAX, IZAMAX, DLAMCH, DZASUM, ZDOTC, + $ ZDOTU, ZLADIV +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, XERBLA, ZAXPY, ZDSCAL, ZTPSV +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX, MIN +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1, CABS2 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) + CABS2( ZDUM ) = ABS( DBLE( ZDUM ) / 2.D0 ) + + $ ABS( DIMAG( ZDUM ) / 2.D0 ) +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* +* Test the input parameters. +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT. + $ LSAME( NORMIN, 'N' ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLATPS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine machine dependent parameters to control overflow. +* + SMLNUM = DLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SMLNUM / DLAMCH( 'Precision' ) + BIGNUM = ONE / SMLNUM + SCALE = ONE +* + IF( LSAME( NORMIN, 'N' ) ) THEN +* +* Compute the 1-norm of each column, not including the diagonal. +* + IF( UPPER ) THEN +* +* A is upper triangular. +* + IP = 1 + DO 10 J = 1, N + CNORM( J ) = DZASUM( J-1, AP( IP ), 1 ) + IP = IP + J + 10 CONTINUE + ELSE +* +* A is lower triangular. +* + IP = 1 + DO 20 J = 1, N - 1 + CNORM( J ) = DZASUM( N-J, AP( IP+1 ), 1 ) + IP = IP + N - J + 1 + 20 CONTINUE + CNORM( N ) = ZERO + END IF + END IF +* +* Scale the column norms by TSCAL if the maximum element in CNORM is +* greater than BIGNUM/2. +* + IMAX = IDAMAX( N, CNORM, 1 ) + TMAX = CNORM( IMAX ) + IF( TMAX.LE.BIGNUM*HALF ) THEN + TSCAL = ONE + ELSE + TSCAL = HALF / ( SMLNUM*TMAX ) + CALL DSCAL( N, TSCAL, CNORM, 1 ) + END IF +* +* Compute a bound on the computed solution vector to see if the +* Level 2 BLAS routine ZTPSV can be used. +* + XMAX = ZERO + DO 30 J = 1, N + XMAX = MAX( XMAX, CABS2( X( J ) ) ) + 30 CONTINUE + XBND = XMAX + IF( NOTRAN ) THEN +* +* Compute the growth in A * x = b. +* + IF( UPPER ) THEN + JFIRST = N + JLAST = 1 + JINC = -1 + ELSE + JFIRST = 1 + JLAST = N + JINC = 1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 60 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, G(0) = max{x(i), i=1,...,n}. +* + GROW = HALF / MAX( XBND, SMLNUM ) + XBND = GROW + IP = JFIRST*( JFIRST+1 ) / 2 + JLEN = N + DO 40 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 60 +* + TJJS = AP( IP ) + TJJ = CABS1( TJJS ) +* + IF( TJJ.GE.SMLNUM ) THEN +* +* M(j) = G(j-1) / abs(A(j,j)) +* + XBND = MIN( XBND, MIN( ONE, TJJ )*GROW ) + ELSE +* +* M(j) could overflow, set XBND to 0. +* + XBND = ZERO + END IF +* + IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN +* +* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) +* + GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) ) + ELSE +* +* G(j) could overflow, set GROW to 0. +* + GROW = ZERO + END IF + IP = IP + JINC*JLEN + JLEN = JLEN - 1 + 40 CONTINUE + GROW = XBND + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) ) + DO 50 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 60 +* +* G(j) = G(j-1)*( 1 + CNORM(j) ) +* + GROW = GROW*( ONE / ( ONE+CNORM( J ) ) ) + 50 CONTINUE + END IF + 60 CONTINUE +* + ELSE +* +* Compute the growth in A**T * x = b or A**H * x = b. +* + IF( UPPER ) THEN + JFIRST = 1 + JLAST = N + JINC = 1 + ELSE + JFIRST = N + JLAST = 1 + JINC = -1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 90 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, M(0) = max{x(i), i=1,...,n}. +* + GROW = HALF / MAX( XBND, SMLNUM ) + XBND = GROW + IP = JFIRST*( JFIRST+1 ) / 2 + JLEN = 1 + DO 70 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 90 +* +* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) +* + XJ = ONE + CNORM( J ) + GROW = MIN( GROW, XBND / XJ ) +* + TJJS = AP( IP ) + TJJ = CABS1( TJJS ) +* + IF( TJJ.GE.SMLNUM ) THEN +* +* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) +* + IF( XJ.GT.TJJ ) + $ XBND = XBND*( TJJ / XJ ) + ELSE +* +* M(j) could overflow, set XBND to 0. +* + XBND = ZERO + END IF + JLEN = JLEN + 1 + IP = IP + JINC*JLEN + 70 CONTINUE + GROW = MIN( GROW, XBND ) + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) ) + DO 80 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 90 +* +* G(j) = ( 1 + CNORM(j) )*G(j-1) +* + XJ = ONE + CNORM( J ) + GROW = GROW / XJ + 80 CONTINUE + END IF + 90 CONTINUE + END IF +* + IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN +* +* Use the Level 2 BLAS solve if the reciprocal of the bound on +* elements of X is not too small. +* + CALL ZTPSV( UPLO, TRANS, DIAG, N, AP, X, 1 ) + ELSE +* +* Use a Level 1 BLAS solve, scaling intermediate results. +* + IF( XMAX.GT.BIGNUM*HALF ) THEN +* +* Scale X so that its components are less than or equal to +* BIGNUM in absolute value. +* + SCALE = ( BIGNUM*HALF ) / XMAX + CALL ZDSCAL( N, SCALE, X, 1 ) + XMAX = BIGNUM + ELSE + XMAX = XMAX*TWO + END IF +* + IF( NOTRAN ) THEN +* +* Solve A * x = b +* + IP = JFIRST*( JFIRST+1 ) / 2 + DO 120 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) / A(j,j), scaling x if necessary. +* + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN + TJJS = AP( IP )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 110 + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by 1/b(j). +* + REC = ONE / XJ + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = ZLADIV( X( J ), TJJS ) + XJ = CABS1( X( J ) ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM +* to avoid overflow when dividing by A(j,j). +* + REC = ( TJJ*BIGNUM ) / XJ + IF( CNORM( J ).GT.ONE ) THEN +* +* Scale by 1/CNORM(j) to avoid overflow when +* multiplying x(j) times column j. +* + REC = REC / CNORM( J ) + END IF + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = ZLADIV( X( J ), TJJS ) + XJ = CABS1( X( J ) ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A*x = 0. +* + DO 100 I = 1, N + X( I ) = ZERO + 100 CONTINUE + X( J ) = ONE + XJ = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 110 CONTINUE +* +* Scale x if necessary to avoid overflow when adding a +* multiple of column j of A. +* + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN +* +* Scale x by 1/(2*abs(x(j))). +* + REC = REC*HALF + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + END IF + ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN +* +* Scale x by 1/2. +* + CALL ZDSCAL( N, HALF, X, 1 ) + SCALE = SCALE*HALF + END IF +* + IF( UPPER ) THEN + IF( J.GT.1 ) THEN +* +* Compute the update +* x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) +* + CALL ZAXPY( J-1, -X( J )*TSCAL, AP( IP-J+1 ), 1, X, + $ 1 ) + I = IZAMAX( J-1, X, 1 ) + XMAX = CABS1( X( I ) ) + END IF + IP = IP - J + ELSE + IF( J.LT.N ) THEN +* +* Compute the update +* x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) +* + CALL ZAXPY( N-J, -X( J )*TSCAL, AP( IP+1 ), 1, + $ X( J+1 ), 1 ) + I = J + IZAMAX( N-J, X( J+1 ), 1 ) + XMAX = CABS1( X( I ) ) + END IF + IP = IP + N - J + 1 + END IF + 120 CONTINUE +* + ELSE IF( LSAME( TRANS, 'T' ) ) THEN +* +* Solve A**T * x = b +* + IP = JFIRST*( JFIRST+1 ) / 2 + JLEN = 1 + DO 170 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = CABS1( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = AP( IP )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = ZLADIV( USCAL, TJJS ) + END IF + IF( REC.LT.ONE ) THEN + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + CSUMJ = ZERO + IF( USCAL.EQ.DCMPLX( ONE ) ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call ZDOTU to perform the dot product. +* + IF( UPPER ) THEN + CSUMJ = ZDOTU( J-1, AP( IP-J+1 ), 1, X, 1 ) + ELSE IF( J.LT.N ) THEN + CSUMJ = ZDOTU( N-J, AP( IP+1 ), 1, X( J+1 ), 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + DO 130 I = 1, J - 1 + CSUMJ = CSUMJ + ( AP( IP-J+I )*USCAL )*X( I ) + 130 CONTINUE + ELSE IF( J.LT.N ) THEN + DO 140 I = 1, N - J + CSUMJ = CSUMJ + ( AP( IP+I )*USCAL )*X( J+I ) + 140 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.DCMPLX( TSCAL ) ) THEN +* +* Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - CSUMJ + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJS = AP( IP )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 160 + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = ZLADIV( X( J ), TJJS ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = ZLADIV( X( J ), TJJS ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0 and compute a solution to A**T *x = 0. +* + DO 150 I = 1, N + X( I ) = ZERO + 150 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 160 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = ZLADIV( X( J ), TJJS ) - CSUMJ + END IF + XMAX = MAX( XMAX, CABS1( X( J ) ) ) + JLEN = JLEN + 1 + IP = IP + JINC*JLEN + 170 CONTINUE +* + ELSE +* +* Solve A**H * x = b +* + IP = JFIRST*( JFIRST+1 ) / 2 + JLEN = 1 + DO 220 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = CABS1( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = DCONJG( AP( IP ) )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = ZLADIV( USCAL, TJJS ) + END IF + IF( REC.LT.ONE ) THEN + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + CSUMJ = ZERO + IF( USCAL.EQ.DCMPLX( ONE ) ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call ZDOTC to perform the dot product. +* + IF( UPPER ) THEN + CSUMJ = ZDOTC( J-1, AP( IP-J+1 ), 1, X, 1 ) + ELSE IF( J.LT.N ) THEN + CSUMJ = ZDOTC( N-J, AP( IP+1 ), 1, X( J+1 ), 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + DO 180 I = 1, J - 1 + CSUMJ = CSUMJ + ( DCONJG( AP( IP-J+I ) )*USCAL ) + $ *X( I ) + 180 CONTINUE + ELSE IF( J.LT.N ) THEN + DO 190 I = 1, N - J + CSUMJ = CSUMJ + ( DCONJG( AP( IP+I ) )*USCAL )* + $ X( J+I ) + 190 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.DCMPLX( TSCAL ) ) THEN +* +* Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - CSUMJ + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJS = DCONJG( AP( IP ) )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 210 + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = ZLADIV( X( J ), TJJS ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = ZLADIV( X( J ), TJJS ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0 and compute a solution to A**H *x = 0. +* + DO 200 I = 1, N + X( I ) = ZERO + 200 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 210 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = ZLADIV( X( J ), TJJS ) - CSUMJ + END IF + XMAX = MAX( XMAX, CABS1( X( J ) ) ) + JLEN = JLEN + 1 + IP = IP + JINC*JLEN + 220 CONTINUE + END IF + SCALE = SCALE / TSCAL + END IF +* +* Scale the column norms by 1/TSCAL for return. +* + IF( TSCAL.NE.ONE ) THEN + CALL DSCAL( N, ONE / TSCAL, CNORM, 1 ) + END IF +* + RETURN +* +* End of ZLATPS +* + END diff --git a/costa/native/external/lapack/zlatrd.f b/costa/native/external/lapack/zlatrd.f new file mode 100644 index 000000000..8b0369fcc --- /dev/null +++ b/costa/native/external/lapack/zlatrd.f @@ -0,0 +1,280 @@ + SUBROUTINE ZLATRD( UPLO, N, NB, A, LDA, E, TAU, W, LDW ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER LDA, LDW, N, NB +* .. +* .. Array Arguments .. + DOUBLE PRECISION E( * ) + COMPLEX*16 A( LDA, * ), TAU( * ), W( LDW, * ) +* .. +* +* Purpose +* ======= +* +* ZLATRD reduces NB rows and columns of a complex Hermitian matrix A to +* Hermitian tridiagonal form by a unitary similarity +* transformation Q' * A * Q, and returns the matrices V and W which are +* needed to apply the transformation to the unreduced part of A. +* +* If UPLO = 'U', ZLATRD reduces the last NB rows and columns of a +* matrix, of which the upper triangle is supplied; +* if UPLO = 'L', ZLATRD reduces the first NB rows and columns of a +* matrix, of which the lower triangle is supplied. +* +* This is an auxiliary routine called by ZHETRD. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER +* Specifies whether the upper or lower triangular part of the +* Hermitian matrix A is stored: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. +* +* NB (input) INTEGER +* The number of rows and columns to be reduced. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the leading +* n-by-n upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n-by-n lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* On exit: +* if UPLO = 'U', the last NB columns have been reduced to +* tridiagonal form, with the diagonal elements overwriting +* the diagonal elements of A; the elements above the diagonal +* with the array TAU, represent the unitary matrix Q as a +* product of elementary reflectors; +* if UPLO = 'L', the first NB columns have been reduced to +* tridiagonal form, with the diagonal elements overwriting +* the diagonal elements of A; the elements below the diagonal +* with the array TAU, represent the unitary matrix Q as a +* product of elementary reflectors. +* See Further Details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* E (output) DOUBLE PRECISION array, dimension (N-1) +* If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal +* elements of the last NB columns of the reduced matrix; +* if UPLO = 'L', E(1:nb) contains the subdiagonal elements of +* the first NB columns of the reduced matrix. +* +* TAU (output) COMPLEX*16 array, dimension (N-1) +* The scalar factors of the elementary reflectors, stored in +* TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'. +* See Further Details. +* +* W (output) COMPLEX*16 array, dimension (LDW,NB) +* The n-by-nb matrix W required to update the unreduced part +* of A. +* +* LDW (input) INTEGER +* The leading dimension of the array W. LDW >= max(1,N). +* +* Further Details +* =============== +* +* If UPLO = 'U', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(n) H(n-1) . . . H(n-nb+1). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i), +* and tau in TAU(i-1). +* +* If UPLO = 'L', the matrix Q is represented as a product of elementary +* reflectors +* +* Q = H(1) H(2) . . . H(nb). +* +* Each H(i) has the form +* +* H(i) = I - tau * v * v' +* +* where tau is a complex scalar, and v is a complex vector with +* v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), +* and tau in TAU(i). +* +* The elements of the vectors v together form the n-by-nb matrix V +* which is needed, with W, to apply the transformation to the unreduced +* part of the matrix, using a Hermitian rank-2k update of the form: +* A := A - V*W' - W*V'. +* +* The contents of A on exit are illustrated by the following examples +* with n = 5 and nb = 2: +* +* if UPLO = 'U': if UPLO = 'L': +* +* ( a a a v4 v5 ) ( d ) +* ( a a v4 v5 ) ( 1 d ) +* ( a 1 v5 ) ( v1 1 a ) +* ( d 1 ) ( v1 v2 a a ) +* ( d ) ( v1 v2 a a a ) +* +* where d denotes a diagonal element of the reduced matrix, a denotes +* an element of the original matrix that is unchanged, and vi denotes +* an element of the vector defining H(i). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO, ONE, HALF + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), + $ ONE = ( 1.0D+0, 0.0D+0 ), + $ HALF = ( 0.5D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, IW + COMPLEX*16 ALPHA +* .. +* .. External Subroutines .. + EXTERNAL ZAXPY, ZGEMV, ZHEMV, ZLACGV, ZLARFG, ZSCAL +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX*16 ZDOTC + EXTERNAL LSAME, ZDOTC +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Reduce last NB columns of upper triangle +* + DO 10 I = N, N - NB + 1, -1 + IW = I - N + NB + IF( I.LT.N ) THEN +* +* Update A(1:i,i) +* + A( I, I ) = DBLE( A( I, I ) ) + CALL ZLACGV( N-I, W( I, IW+1 ), LDW ) + CALL ZGEMV( 'No transpose', I, N-I, -ONE, A( 1, I+1 ), + $ LDA, W( I, IW+1 ), LDW, ONE, A( 1, I ), 1 ) + CALL ZLACGV( N-I, W( I, IW+1 ), LDW ) + CALL ZLACGV( N-I, A( I, I+1 ), LDA ) + CALL ZGEMV( 'No transpose', I, N-I, -ONE, W( 1, IW+1 ), + $ LDW, A( I, I+1 ), LDA, ONE, A( 1, I ), 1 ) + CALL ZLACGV( N-I, A( I, I+1 ), LDA ) + A( I, I ) = DBLE( A( I, I ) ) + END IF + IF( I.GT.1 ) THEN +* +* Generate elementary reflector H(i) to annihilate +* A(1:i-2,i) +* + ALPHA = A( I-1, I ) + CALL ZLARFG( I-1, ALPHA, A( 1, I ), 1, TAU( I-1 ) ) + E( I-1 ) = ALPHA + A( I-1, I ) = ONE +* +* Compute W(1:i-1,i) +* + CALL ZHEMV( 'Upper', I-1, ONE, A, LDA, A( 1, I ), 1, + $ ZERO, W( 1, IW ), 1 ) + IF( I.LT.N ) THEN + CALL ZGEMV( 'Conjugate transpose', I-1, N-I, ONE, + $ W( 1, IW+1 ), LDW, A( 1, I ), 1, ZERO, + $ W( I+1, IW ), 1 ) + CALL ZGEMV( 'No transpose', I-1, N-I, -ONE, + $ A( 1, I+1 ), LDA, W( I+1, IW ), 1, ONE, + $ W( 1, IW ), 1 ) + CALL ZGEMV( 'Conjugate transpose', I-1, N-I, ONE, + $ A( 1, I+1 ), LDA, A( 1, I ), 1, ZERO, + $ W( I+1, IW ), 1 ) + CALL ZGEMV( 'No transpose', I-1, N-I, -ONE, + $ W( 1, IW+1 ), LDW, W( I+1, IW ), 1, ONE, + $ W( 1, IW ), 1 ) + END IF + CALL ZSCAL( I-1, TAU( I-1 ), W( 1, IW ), 1 ) + ALPHA = -HALF*TAU( I-1 )*ZDOTC( I-1, W( 1, IW ), 1, + $ A( 1, I ), 1 ) + CALL ZAXPY( I-1, ALPHA, A( 1, I ), 1, W( 1, IW ), 1 ) + END IF +* + 10 CONTINUE + ELSE +* +* Reduce first NB columns of lower triangle +* + DO 20 I = 1, NB +* +* Update A(i:n,i) +* + A( I, I ) = DBLE( A( I, I ) ) + CALL ZLACGV( I-1, W( I, 1 ), LDW ) + CALL ZGEMV( 'No transpose', N-I+1, I-1, -ONE, A( I, 1 ), + $ LDA, W( I, 1 ), LDW, ONE, A( I, I ), 1 ) + CALL ZLACGV( I-1, W( I, 1 ), LDW ) + CALL ZLACGV( I-1, A( I, 1 ), LDA ) + CALL ZGEMV( 'No transpose', N-I+1, I-1, -ONE, W( I, 1 ), + $ LDW, A( I, 1 ), LDA, ONE, A( I, I ), 1 ) + CALL ZLACGV( I-1, A( I, 1 ), LDA ) + A( I, I ) = DBLE( A( I, I ) ) + IF( I.LT.N ) THEN +* +* Generate elementary reflector H(i) to annihilate +* A(i+2:n,i) +* + ALPHA = A( I+1, I ) + CALL ZLARFG( N-I, ALPHA, A( MIN( I+2, N ), I ), 1, + $ TAU( I ) ) + E( I ) = ALPHA + A( I+1, I ) = ONE +* +* Compute W(i+1:n,i) +* + CALL ZHEMV( 'Lower', N-I, ONE, A( I+1, I+1 ), LDA, + $ A( I+1, I ), 1, ZERO, W( I+1, I ), 1 ) + CALL ZGEMV( 'Conjugate transpose', N-I, I-1, ONE, + $ W( I+1, 1 ), LDW, A( I+1, I ), 1, ZERO, + $ W( 1, I ), 1 ) + CALL ZGEMV( 'No transpose', N-I, I-1, -ONE, A( I+1, 1 ), + $ LDA, W( 1, I ), 1, ONE, W( I+1, I ), 1 ) + CALL ZGEMV( 'Conjugate transpose', N-I, I-1, ONE, + $ A( I+1, 1 ), LDA, A( I+1, I ), 1, ZERO, + $ W( 1, I ), 1 ) + CALL ZGEMV( 'No transpose', N-I, I-1, -ONE, W( I+1, 1 ), + $ LDW, W( 1, I ), 1, ONE, W( I+1, I ), 1 ) + CALL ZSCAL( N-I, TAU( I ), W( I+1, I ), 1 ) + ALPHA = -HALF*TAU( I )*ZDOTC( N-I, W( I+1, I ), 1, + $ A( I+1, I ), 1 ) + CALL ZAXPY( N-I, ALPHA, A( I+1, I ), 1, W( I+1, I ), 1 ) + END IF +* + 20 CONTINUE + END IF +* + RETURN +* +* End of ZLATRD +* + END diff --git a/costa/native/external/lapack/zlatrs.f b/costa/native/external/lapack/zlatrs.f new file mode 100644 index 000000000..4ae39729a --- /dev/null +++ b/costa/native/external/lapack/zlatrs.f @@ -0,0 +1,880 @@ + SUBROUTINE ZLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, LDA, X, SCALE, + $ CNORM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1992 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORMIN, TRANS, UPLO + INTEGER INFO, LDA, N + DOUBLE PRECISION SCALE +* .. +* .. Array Arguments .. + DOUBLE PRECISION CNORM( * ) + COMPLEX*16 A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* ZLATRS solves one of the triangular systems +* +* A * x = s*b, A**T * x = s*b, or A**H * x = s*b, +* +* with scaling to prevent overflow. Here A is an upper or lower +* triangular matrix, A**T denotes the transpose of A, A**H denotes the +* conjugate transpose of A, x and b are n-element vectors, and s is a +* scaling factor, usually less than or equal to 1, chosen so that the +* components of x will be less than the overflow threshold. If the +* unscaled problem will not cause overflow, the Level 2 BLAS routine +* ZTRSV is called. If the matrix A is singular (A(j,j) = 0 for some j), +* then s is set to 0 and a non-trivial solution to A*x = 0 is returned. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the matrix A is upper or lower triangular. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* TRANS (input) CHARACTER*1 +* Specifies the operation applied to A. +* = 'N': Solve A * x = s*b (No transpose) +* = 'T': Solve A**T * x = s*b (Transpose) +* = 'C': Solve A**H * x = s*b (Conjugate transpose) +* +* DIAG (input) CHARACTER*1 +* Specifies whether or not the matrix A is unit triangular. +* = 'N': Non-unit triangular +* = 'U': Unit triangular +* +* NORMIN (input) CHARACTER*1 +* Specifies whether CNORM has been set or not. +* = 'Y': CNORM contains the column norms on entry +* = 'N': CNORM is not set on entry. On exit, the norms will +* be computed and stored in CNORM. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input) COMPLEX*16 array, dimension (LDA,N) +* The triangular matrix A. If UPLO = 'U', the leading n by n +* upper triangular part of the array A contains the upper +* triangular matrix, and the strictly lower triangular part of +* A is not referenced. If UPLO = 'L', the leading n by n lower +* triangular part of the array A contains the lower triangular +* matrix, and the strictly upper triangular part of A is not +* referenced. If DIAG = 'U', the diagonal elements of A are +* also not referenced and are assumed to be 1. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max (1,N). +* +* X (input/output) COMPLEX*16 array, dimension (N) +* On entry, the right hand side b of the triangular system. +* On exit, X is overwritten by the solution vector x. +* +* SCALE (output) DOUBLE PRECISION +* The scaling factor s for the triangular system +* A * x = s*b, A**T * x = s*b, or A**H * x = s*b. +* If SCALE = 0, the matrix A is singular or badly scaled, and +* the vector x is an exact or approximate solution to A*x = 0. +* +* CNORM (input or output) DOUBLE PRECISION array, dimension (N) +* +* If NORMIN = 'Y', CNORM is an input argument and CNORM(j) +* contains the norm of the off-diagonal part of the j-th column +* of A. If TRANS = 'N', CNORM(j) must be greater than or equal +* to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) +* must be greater than or equal to the 1-norm. +* +* If NORMIN = 'N', CNORM is an output argument and CNORM(j) +* returns the 1-norm of the offdiagonal part of the j-th column +* of A. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* Further Details +* ======= ======= +* +* A rough bound on x is computed; if that is less than overflow, ZTRSV +* is called, otherwise, specific code is used which checks for possible +* overflow or divide-by-zero at every operation. +* +* A columnwise scheme is used for solving A*x = b. The basic algorithm +* if A is lower triangular is +* +* x[1:n] := b[1:n] +* for j = 1, ..., n +* x(j) := x(j) / A(j,j) +* x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] +* end +* +* Define bounds on the components of x after j iterations of the loop: +* M(j) = bound on x[1:j] +* G(j) = bound on x[j+1:n] +* Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. +* +* Then for iteration j+1 we have +* M(j+1) <= G(j) / | A(j+1,j+1) | +* G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | +* <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) +* +* where CNORM(j+1) is greater than or equal to the infinity-norm of +* column j+1 of A, not counting the diagonal. Hence +* +* G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) +* 1<=i<=j +* and +* +* |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) +* 1<=i< j +* +* Since |x(j)| <= M(j), we use the Level 2 BLAS routine ZTRSV if the +* reciprocal of the largest M(j), j=1,..,n, is larger than +* max(underflow, 1/overflow). +* +* The bound on x(j) is also used to determine when a step in the +* columnwise method can be performed without fear of overflow. If +* the computed bound is greater than a large constant, x is scaled to +* prevent overflow, but if the bound overflows, x is set to 0, x(j) to +* 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. +* +* Similarly, a row-wise scheme is used to solve A**T *x = b or +* A**H *x = b. The basic algorithm for A upper triangular is +* +* for j = 1, ..., n +* x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) +* end +* +* We simultaneously compute two bounds +* G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j +* M(j) = bound on x(i), 1<=i<=j +* +* The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we +* add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. +* Then the bound on x(j) is +* +* M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | +* +* <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) +* 1<=i<=j +* +* and we can safely call ZTRSV if 1/M(n) and 1/G(n) are both greater +* than max(underflow, 1/overflow). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, HALF, ONE, TWO + PARAMETER ( ZERO = 0.0D+0, HALF = 0.5D+0, ONE = 1.0D+0, + $ TWO = 2.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + INTEGER I, IMAX, J, JFIRST, JINC, JLAST + DOUBLE PRECISION BIGNUM, GROW, REC, SMLNUM, TJJ, TMAX, TSCAL, + $ XBND, XJ, XMAX + COMPLEX*16 CSUMJ, TJJS, USCAL, ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX, IZAMAX + DOUBLE PRECISION DLAMCH, DZASUM + COMPLEX*16 ZDOTC, ZDOTU, ZLADIV + EXTERNAL LSAME, IDAMAX, IZAMAX, DLAMCH, DZASUM, ZDOTC, + $ ZDOTU, ZLADIV +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, XERBLA, ZAXPY, ZDSCAL, ZTRSV +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX, MIN +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1, CABS2 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) + CABS2( ZDUM ) = ABS( DBLE( ZDUM ) / 2.D0 ) + + $ ABS( DIMAG( ZDUM ) / 2.D0 ) +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* +* Test the input parameters. +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT. + $ LSAME( NORMIN, 'N' ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLATRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine machine dependent parameters to control overflow. +* + SMLNUM = DLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SMLNUM / DLAMCH( 'Precision' ) + BIGNUM = ONE / SMLNUM + SCALE = ONE +* + IF( LSAME( NORMIN, 'N' ) ) THEN +* +* Compute the 1-norm of each column, not including the diagonal. +* + IF( UPPER ) THEN +* +* A is upper triangular. +* + DO 10 J = 1, N + CNORM( J ) = DZASUM( J-1, A( 1, J ), 1 ) + 10 CONTINUE + ELSE +* +* A is lower triangular. +* + DO 20 J = 1, N - 1 + CNORM( J ) = DZASUM( N-J, A( J+1, J ), 1 ) + 20 CONTINUE + CNORM( N ) = ZERO + END IF + END IF +* +* Scale the column norms by TSCAL if the maximum element in CNORM is +* greater than BIGNUM/2. +* + IMAX = IDAMAX( N, CNORM, 1 ) + TMAX = CNORM( IMAX ) + IF( TMAX.LE.BIGNUM*HALF ) THEN + TSCAL = ONE + ELSE + TSCAL = HALF / ( SMLNUM*TMAX ) + CALL DSCAL( N, TSCAL, CNORM, 1 ) + END IF +* +* Compute a bound on the computed solution vector to see if the +* Level 2 BLAS routine ZTRSV can be used. +* + XMAX = ZERO + DO 30 J = 1, N + XMAX = MAX( XMAX, CABS2( X( J ) ) ) + 30 CONTINUE + XBND = XMAX +* + IF( NOTRAN ) THEN +* +* Compute the growth in A * x = b. +* + IF( UPPER ) THEN + JFIRST = N + JLAST = 1 + JINC = -1 + ELSE + JFIRST = 1 + JLAST = N + JINC = 1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 60 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, G(0) = max{x(i), i=1,...,n}. +* + GROW = HALF / MAX( XBND, SMLNUM ) + XBND = GROW + DO 40 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 60 +* + TJJS = A( J, J ) + TJJ = CABS1( TJJS ) +* + IF( TJJ.GE.SMLNUM ) THEN +* +* M(j) = G(j-1) / abs(A(j,j)) +* + XBND = MIN( XBND, MIN( ONE, TJJ )*GROW ) + ELSE +* +* M(j) could overflow, set XBND to 0. +* + XBND = ZERO + END IF +* + IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN +* +* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) +* + GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) ) + ELSE +* +* G(j) could overflow, set GROW to 0. +* + GROW = ZERO + END IF + 40 CONTINUE + GROW = XBND + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) ) + DO 50 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 60 +* +* G(j) = G(j-1)*( 1 + CNORM(j) ) +* + GROW = GROW*( ONE / ( ONE+CNORM( J ) ) ) + 50 CONTINUE + END IF + 60 CONTINUE +* + ELSE +* +* Compute the growth in A**T * x = b or A**H * x = b. +* + IF( UPPER ) THEN + JFIRST = 1 + JLAST = N + JINC = 1 + ELSE + JFIRST = N + JLAST = 1 + JINC = -1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 90 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, M(0) = max{x(i), i=1,...,n}. +* + GROW = HALF / MAX( XBND, SMLNUM ) + XBND = GROW + DO 70 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 90 +* +* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) +* + XJ = ONE + CNORM( J ) + GROW = MIN( GROW, XBND / XJ ) +* + TJJS = A( J, J ) + TJJ = CABS1( TJJS ) +* + IF( TJJ.GE.SMLNUM ) THEN +* +* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) +* + IF( XJ.GT.TJJ ) + $ XBND = XBND*( TJJ / XJ ) + ELSE +* +* M(j) could overflow, set XBND to 0. +* + XBND = ZERO + END IF + 70 CONTINUE + GROW = MIN( GROW, XBND ) + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) ) + DO 80 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 90 +* +* G(j) = ( 1 + CNORM(j) )*G(j-1) +* + XJ = ONE + CNORM( J ) + GROW = GROW / XJ + 80 CONTINUE + END IF + 90 CONTINUE + END IF +* + IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN +* +* Use the Level 2 BLAS solve if the reciprocal of the bound on +* elements of X is not too small. +* + CALL ZTRSV( UPLO, TRANS, DIAG, N, A, LDA, X, 1 ) + ELSE +* +* Use a Level 1 BLAS solve, scaling intermediate results. +* + IF( XMAX.GT.BIGNUM*HALF ) THEN +* +* Scale X so that its components are less than or equal to +* BIGNUM in absolute value. +* + SCALE = ( BIGNUM*HALF ) / XMAX + CALL ZDSCAL( N, SCALE, X, 1 ) + XMAX = BIGNUM + ELSE + XMAX = XMAX*TWO + END IF +* + IF( NOTRAN ) THEN +* +* Solve A * x = b +* + DO 120 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) / A(j,j), scaling x if necessary. +* + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN + TJJS = A( J, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 110 + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by 1/b(j). +* + REC = ONE / XJ + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = ZLADIV( X( J ), TJJS ) + XJ = CABS1( X( J ) ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM +* to avoid overflow when dividing by A(j,j). +* + REC = ( TJJ*BIGNUM ) / XJ + IF( CNORM( J ).GT.ONE ) THEN +* +* Scale by 1/CNORM(j) to avoid overflow when +* multiplying x(j) times column j. +* + REC = REC / CNORM( J ) + END IF + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = ZLADIV( X( J ), TJJS ) + XJ = CABS1( X( J ) ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A*x = 0. +* + DO 100 I = 1, N + X( I ) = ZERO + 100 CONTINUE + X( J ) = ONE + XJ = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 110 CONTINUE +* +* Scale x if necessary to avoid overflow when adding a +* multiple of column j of A. +* + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN +* +* Scale x by 1/(2*abs(x(j))). +* + REC = REC*HALF + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + END IF + ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN +* +* Scale x by 1/2. +* + CALL ZDSCAL( N, HALF, X, 1 ) + SCALE = SCALE*HALF + END IF +* + IF( UPPER ) THEN + IF( J.GT.1 ) THEN +* +* Compute the update +* x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) +* + CALL ZAXPY( J-1, -X( J )*TSCAL, A( 1, J ), 1, X, + $ 1 ) + I = IZAMAX( J-1, X, 1 ) + XMAX = CABS1( X( I ) ) + END IF + ELSE + IF( J.LT.N ) THEN +* +* Compute the update +* x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) +* + CALL ZAXPY( N-J, -X( J )*TSCAL, A( J+1, J ), 1, + $ X( J+1 ), 1 ) + I = J + IZAMAX( N-J, X( J+1 ), 1 ) + XMAX = CABS1( X( I ) ) + END IF + END IF + 120 CONTINUE +* + ELSE IF( LSAME( TRANS, 'T' ) ) THEN +* +* Solve A**T * x = b +* + DO 170 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = CABS1( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = A( J, J )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = ZLADIV( USCAL, TJJS ) + END IF + IF( REC.LT.ONE ) THEN + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + CSUMJ = ZERO + IF( USCAL.EQ.DCMPLX( ONE ) ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call ZDOTU to perform the dot product. +* + IF( UPPER ) THEN + CSUMJ = ZDOTU( J-1, A( 1, J ), 1, X, 1 ) + ELSE IF( J.LT.N ) THEN + CSUMJ = ZDOTU( N-J, A( J+1, J ), 1, X( J+1 ), 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + DO 130 I = 1, J - 1 + CSUMJ = CSUMJ + ( A( I, J )*USCAL )*X( I ) + 130 CONTINUE + ELSE IF( J.LT.N ) THEN + DO 140 I = J + 1, N + CSUMJ = CSUMJ + ( A( I, J )*USCAL )*X( I ) + 140 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.DCMPLX( TSCAL ) ) THEN +* +* Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - CSUMJ + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN + TJJS = A( J, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 160 + END IF +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = ZLADIV( X( J ), TJJS ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = ZLADIV( X( J ), TJJS ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0 and compute a solution to A**T *x = 0. +* + DO 150 I = 1, N + X( I ) = ZERO + 150 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 160 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = ZLADIV( X( J ), TJJS ) - CSUMJ + END IF + XMAX = MAX( XMAX, CABS1( X( J ) ) ) + 170 CONTINUE +* + ELSE +* +* Solve A**H * x = b +* + DO 220 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = CABS1( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = DCONJG( A( J, J ) )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = ZLADIV( USCAL, TJJS ) + END IF + IF( REC.LT.ONE ) THEN + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + CSUMJ = ZERO + IF( USCAL.EQ.DCMPLX( ONE ) ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call ZDOTC to perform the dot product. +* + IF( UPPER ) THEN + CSUMJ = ZDOTC( J-1, A( 1, J ), 1, X, 1 ) + ELSE IF( J.LT.N ) THEN + CSUMJ = ZDOTC( N-J, A( J+1, J ), 1, X( J+1 ), 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + DO 180 I = 1, J - 1 + CSUMJ = CSUMJ + ( DCONJG( A( I, J ) )*USCAL )* + $ X( I ) + 180 CONTINUE + ELSE IF( J.LT.N ) THEN + DO 190 I = J + 1, N + CSUMJ = CSUMJ + ( DCONJG( A( I, J ) )*USCAL )* + $ X( I ) + 190 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.DCMPLX( TSCAL ) ) THEN +* +* Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - CSUMJ + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN + TJJS = DCONJG( A( J, J ) )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 210 + END IF +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = ZLADIV( X( J ), TJJS ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = ZLADIV( X( J ), TJJS ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0 and compute a solution to A**H *x = 0. +* + DO 200 I = 1, N + X( I ) = ZERO + 200 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 210 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = ZLADIV( X( J ), TJJS ) - CSUMJ + END IF + XMAX = MAX( XMAX, CABS1( X( J ) ) ) + 220 CONTINUE + END IF + SCALE = SCALE / TSCAL + END IF +* +* Scale the column norms by 1/TSCAL for return. +* + IF( TSCAL.NE.ONE ) THEN + CALL DSCAL( N, ONE / TSCAL, CNORM, 1 ) + END IF +* + RETURN +* +* End of ZLATRS +* + END diff --git a/costa/native/external/lapack/zlatrz.f b/costa/native/external/lapack/zlatrz.f new file mode 100644 index 000000000..5e82f3d20 --- /dev/null +++ b/costa/native/external/lapack/zlatrz.f @@ -0,0 +1,134 @@ + SUBROUTINE ZLATRZ( M, N, L, A, LDA, TAU, WORK ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER L, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZLATRZ factors the M-by-(M+L) complex upper trapezoidal matrix +* [ A1 A2 ] = [ A(1:M,1:M) A(1:M,N-L+1:N) ] as ( R 0 ) * Z by means +* of unitary transformations, where Z is an (M+L)-by-(M+L) unitary +* matrix and, R and A1 are M-by-M upper triangular matrices. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* L (input) INTEGER +* The number of columns of the matrix A containing the +* meaningful part of the Householder vectors. N-M >= L >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the leading M-by-N upper trapezoidal part of the +* array A must contain the matrix to be factorized. +* On exit, the leading M-by-M upper triangular part of A +* contains the upper triangular matrix R, and elements N-L+1 to +* N of the first M rows of A, with the array TAU, represent the +* unitary matrix Z as a product of M elementary reflectors. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) COMPLEX*16 array, dimension (M) +* The scalar factors of the elementary reflectors. +* +* WORK (workspace) COMPLEX*16 array, dimension (M) +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* The factorization is obtained by Householder's method. The kth +* transformation matrix, Z( k ), which is used to introduce zeros into +* the ( m - k + 1 )th row of A, is given in the form +* +* Z( k ) = ( I 0 ), +* ( 0 T( k ) ) +* +* where +* +* T( k ) = I - tau*u( k )*u( k )', u( k ) = ( 1 ), +* ( 0 ) +* ( z( k ) ) +* +* tau is a scalar and z( k ) is an l element vector. tau and z( k ) +* are chosen to annihilate the elements of the kth row of A2. +* +* The scalar tau is returned in the kth element of TAU and the vector +* u( k ) in the kth row of A2, such that the elements of z( k ) are +* in a( k, l + 1 ), ..., a( k, n ). The elements of R are returned in +* the upper triangular part of A1. +* +* Z is given by +* +* Z = Z( 1 ) * Z( 2 ) * ... * Z( m ). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I + COMPLEX*16 ALPHA +* .. +* .. External Subroutines .. + EXTERNAL ZLACGV, ZLARFG, ZLARZ +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.EQ.0 ) THEN + RETURN + ELSE IF( M.EQ.N ) THEN + DO 10 I = 1, N + TAU( I ) = ZERO + 10 CONTINUE + RETURN + END IF +* + DO 20 I = M, 1, -1 +* +* Generate elementary reflector H(i) to annihilate +* [ A(i,i) A(i,n-l+1:n) ] +* + CALL ZLACGV( L, A( I, N-L+1 ), LDA ) + ALPHA = DCONJG( A( I, I ) ) + CALL ZLARFG( L+1, ALPHA, A( I, N-L+1 ), LDA, TAU( I ) ) + TAU( I ) = DCONJG( TAU( I ) ) +* +* Apply H(i) to A(1:i-1,i:n) from the right +* + CALL ZLARZ( 'Right', I-1, N-I+1, L, A( I, N-L+1 ), LDA, + $ DCONJG( TAU( I ) ), A( 1, I ), LDA, WORK ) + A( I, I ) = DCONJG( ALPHA ) +* + 20 CONTINUE +* + RETURN +* +* End of ZLATRZ +* + END diff --git a/costa/native/external/lapack/zlatzm.f b/costa/native/external/lapack/zlatzm.f new file mode 100644 index 000000000..e81223f45 --- /dev/null +++ b/costa/native/external/lapack/zlatzm.f @@ -0,0 +1,147 @@ + SUBROUTINE ZLATZM( SIDE, M, N, V, INCV, TAU, C1, C2, LDC, WORK ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, LDC, M, N + COMPLEX*16 TAU +* .. +* .. Array Arguments .. + COMPLEX*16 C1( LDC, * ), C2( LDC, * ), V( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* This routine is deprecated and has been replaced by routine ZUNMRZ. +* +* ZLATZM applies a Householder matrix generated by ZTZRQF to a matrix. +* +* Let P = I - tau*u*u', u = ( 1 ), +* ( v ) +* where v is an (m-1) vector if SIDE = 'L', or a (n-1) vector if +* SIDE = 'R'. +* +* If SIDE equals 'L', let +* C = [ C1 ] 1 +* [ C2 ] m-1 +* n +* Then C is overwritten by P*C. +* +* If SIDE equals 'R', let +* C = [ C1, C2 ] m +* 1 n-1 +* Then C is overwritten by C*P. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': form P * C +* = 'R': form C * P +* +* M (input) INTEGER +* The number of rows of the matrix C. +* +* N (input) INTEGER +* The number of columns of the matrix C. +* +* V (input) COMPLEX*16 array, dimension +* (1 + (M-1)*abs(INCV)) if SIDE = 'L' +* (1 + (N-1)*abs(INCV)) if SIDE = 'R' +* The vector v in the representation of P. V is not used +* if TAU = 0. +* +* INCV (input) INTEGER +* The increment between elements of v. INCV <> 0 +* +* TAU (input) COMPLEX*16 +* The value tau in the representation of P. +* +* C1 (input/output) COMPLEX*16 array, dimension +* (LDC,N) if SIDE = 'L' +* (M,1) if SIDE = 'R' +* On entry, the n-vector C1 if SIDE = 'L', or the m-vector C1 +* if SIDE = 'R'. +* +* On exit, the first row of P*C if SIDE = 'L', or the first +* column of C*P if SIDE = 'R'. +* +* C2 (input/output) COMPLEX*16 array, dimension +* (LDC, N) if SIDE = 'L' +* (LDC, N-1) if SIDE = 'R' +* On entry, the (m - 1) x n matrix C2 if SIDE = 'L', or the +* m x (n - 1) matrix C2 if SIDE = 'R'. +* +* On exit, rows 2:m of P*C if SIDE = 'L', or columns 2:m of C*P +* if SIDE = 'R'. +* +* LDC (input) INTEGER +* The leading dimension of the arrays C1 and C2. +* LDC >= max(1,M). +* +* WORK (workspace) COMPLEX*16 array, dimension +* (N) if SIDE = 'L' +* (M) if SIDE = 'R' +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. External Subroutines .. + EXTERNAL ZAXPY, ZCOPY, ZGEMV, ZGERC, ZGERU, ZLACGV +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( ( MIN( M, N ).EQ.0 ) .OR. ( TAU.EQ.ZERO ) ) + $ RETURN +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* w := conjg( C1 + v' * C2 ) +* + CALL ZCOPY( N, C1, LDC, WORK, 1 ) + CALL ZLACGV( N, WORK, 1 ) + CALL ZGEMV( 'Conjugate transpose', M-1, N, ONE, C2, LDC, V, + $ INCV, ONE, WORK, 1 ) +* +* [ C1 ] := [ C1 ] - tau* [ 1 ] * w' +* [ C2 ] [ C2 ] [ v ] +* + CALL ZLACGV( N, WORK, 1 ) + CALL ZAXPY( N, -TAU, WORK, 1, C1, LDC ) + CALL ZGERU( M-1, N, -TAU, V, INCV, WORK, 1, C2, LDC ) +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* w := C1 + C2 * v +* + CALL ZCOPY( M, C1, 1, WORK, 1 ) + CALL ZGEMV( 'No transpose', M, N-1, ONE, C2, LDC, V, INCV, ONE, + $ WORK, 1 ) +* +* [ C1, C2 ] := [ C1, C2 ] - tau* w * [ 1 , v'] +* + CALL ZAXPY( M, -TAU, WORK, 1, C1, 1 ) + CALL ZGERC( M, N-1, -TAU, WORK, 1, V, INCV, C2, LDC ) + END IF +* + RETURN +* +* End of ZLATZM +* + END diff --git a/costa/native/external/lapack/zlauu2.f b/costa/native/external/lapack/zlauu2.f new file mode 100644 index 000000000..99502aac6 --- /dev/null +++ b/costa/native/external/lapack/zlauu2.f @@ -0,0 +1,144 @@ + SUBROUTINE ZLAUU2( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* ZLAUU2 computes the product U * U' or L' * L, where the triangular +* factor U or L is stored in the upper or lower triangular part of +* the array A. +* +* If UPLO = 'U' or 'u' then the upper triangle of the result is stored, +* overwriting the factor U in A. +* If UPLO = 'L' or 'l' then the lower triangle of the result is stored, +* overwriting the factor L in A. +* +* This is the unblocked form of the algorithm, calling Level 2 BLAS. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the triangular factor stored in the array A +* is upper or lower triangular: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the triangular factor U or L. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the triangular factor U or L. +* On exit, if UPLO = 'U', the upper triangle of A is +* overwritten with the upper triangle of the product U * U'; +* if UPLO = 'L', the lower triangle of A is overwritten with +* the lower triangle of the product L' * L. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I + DOUBLE PRECISION AII +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX*16 ZDOTC + EXTERNAL LSAME, ZDOTC +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDSCAL, ZGEMV, ZLACGV +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCMPLX, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLAUU2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Compute the product U * U'. +* + DO 10 I = 1, N + AII = A( I, I ) + IF( I.LT.N ) THEN + A( I, I ) = AII*AII + DBLE( ZDOTC( N-I, A( I, I+1 ), LDA, + $ A( I, I+1 ), LDA ) ) + CALL ZLACGV( N-I, A( I, I+1 ), LDA ) + CALL ZGEMV( 'No transpose', I-1, N-I, ONE, A( 1, I+1 ), + $ LDA, A( I, I+1 ), LDA, DCMPLX( AII ), + $ A( 1, I ), 1 ) + CALL ZLACGV( N-I, A( I, I+1 ), LDA ) + ELSE + CALL ZDSCAL( I, AII, A( 1, I ), 1 ) + END IF + 10 CONTINUE +* + ELSE +* +* Compute the product L' * L. +* + DO 20 I = 1, N + AII = A( I, I ) + IF( I.LT.N ) THEN + A( I, I ) = AII*AII + DBLE( ZDOTC( N-I, A( I+1, I ), 1, + $ A( I+1, I ), 1 ) ) + CALL ZLACGV( I-1, A( I, 1 ), LDA ) + CALL ZGEMV( 'Conjugate transpose', N-I, I-1, ONE, + $ A( I+1, 1 ), LDA, A( I+1, I ), 1, + $ DCMPLX( AII ), A( I, 1 ), LDA ) + CALL ZLACGV( I-1, A( I, 1 ), LDA ) + ELSE + CALL ZDSCAL( I, AII, A( I, 1 ), LDA ) + END IF + 20 CONTINUE + END IF +* + RETURN +* +* End of ZLAUU2 +* + END diff --git a/costa/native/external/lapack/zlauum.f b/costa/native/external/lapack/zlauum.f new file mode 100644 index 000000000..07fbf5e87 --- /dev/null +++ b/costa/native/external/lapack/zlauum.f @@ -0,0 +1,161 @@ + SUBROUTINE ZLAUUM( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* ZLAUUM computes the product U * U' or L' * L, where the triangular +* factor U or L is stored in the upper or lower triangular part of +* the array A. +* +* If UPLO = 'U' or 'u' then the upper triangle of the result is stored, +* overwriting the factor U in A. +* If UPLO = 'L' or 'l' then the lower triangle of the result is stored, +* overwriting the factor L in A. +* +* This is the blocked form of the algorithm, calling Level 3 BLAS. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the triangular factor stored in the array A +* is upper or lower triangular: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the triangular factor U or L. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the triangular factor U or L. +* On exit, if UPLO = 'U', the upper triangle of A is +* overwritten with the upper triangle of the product U * U'; +* if UPLO = 'L', the lower triangle of A is overwritten with +* the lower triangle of the product L' * L. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IB, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEMM, ZHERK, ZLAUU2, ZTRMM +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLAUUM', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'ZLAUUM', UPLO, N, -1, -1, -1 ) +* + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code +* + CALL ZLAUU2( UPLO, N, A, LDA, INFO ) + ELSE +* +* Use blocked code +* + IF( UPPER ) THEN +* +* Compute the product U * U'. +* + DO 10 I = 1, N, NB + IB = MIN( NB, N-I+1 ) + CALL ZTRMM( 'Right', 'Upper', 'Conjugate transpose', + $ 'Non-unit', I-1, IB, CONE, A( I, I ), LDA, + $ A( 1, I ), LDA ) + CALL ZLAUU2( 'Upper', IB, A( I, I ), LDA, INFO ) + IF( I+IB.LE.N ) THEN + CALL ZGEMM( 'No transpose', 'Conjugate transpose', + $ I-1, IB, N-I-IB+1, CONE, A( 1, I+IB ), + $ LDA, A( I, I+IB ), LDA, CONE, A( 1, I ), + $ LDA ) + CALL ZHERK( 'Upper', 'No transpose', IB, N-I-IB+1, + $ ONE, A( I, I+IB ), LDA, ONE, A( I, I ), + $ LDA ) + END IF + 10 CONTINUE + ELSE +* +* Compute the product L' * L. +* + DO 20 I = 1, N, NB + IB = MIN( NB, N-I+1 ) + CALL ZTRMM( 'Left', 'Lower', 'Conjugate transpose', + $ 'Non-unit', IB, I-1, CONE, A( I, I ), LDA, + $ A( I, 1 ), LDA ) + CALL ZLAUU2( 'Lower', IB, A( I, I ), LDA, INFO ) + IF( I+IB.LE.N ) THEN + CALL ZGEMM( 'Conjugate transpose', 'No transpose', IB, + $ I-1, N-I-IB+1, CONE, A( I+IB, I ), LDA, + $ A( I+IB, 1 ), LDA, CONE, A( I, 1 ), LDA ) + CALL ZHERK( 'Lower', 'Conjugate transpose', IB, + $ N-I-IB+1, ONE, A( I+IB, I ), LDA, ONE, + $ A( I, I ), LDA ) + END IF + 20 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZLAUUM +* + END diff --git a/costa/native/external/lapack/zpbcon.f b/costa/native/external/lapack/zpbcon.f new file mode 100644 index 000000000..4c5f15cf8 --- /dev/null +++ b/costa/native/external/lapack/zpbcon.f @@ -0,0 +1,194 @@ + SUBROUTINE ZPBCON( UPLO, N, KD, AB, LDAB, ANORM, RCOND, WORK, + $ RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 AB( LDAB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZPBCON estimates the reciprocal of the condition number (in the +* 1-norm) of a complex Hermitian positive definite band matrix using +* the Cholesky factorization A = U**H*U or A = L*L**H computed by +* ZPBTRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangular factor stored in AB; +* = 'L': Lower triangular factor stored in AB. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of sub-diagonals if UPLO = 'L'. KD >= 0. +* +* AB (input) COMPLEX*16 array, dimension (LDAB,N) +* The triangular factor U or L from the Cholesky factorization +* A = U**H*U or A = L*L**H of the band matrix A, stored in the +* first KD+1 rows of the array. The j-th column of U or L is +* stored in the j-th column of the array AB as follows: +* if UPLO ='U', AB(kd+1+i-j,j) = U(i,j) for max(1,j-kd)<=i<=j; +* if UPLO ='L', AB(1+i-j,j) = L(i,j) for j<=i<=min(n,j+kd). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* ANORM (input) DOUBLE PRECISION +* The 1-norm (or infinity-norm) of the Hermitian band matrix A. +* +* RCOND (output) DOUBLE PRECISION +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +* estimate of the 1-norm of inv(A) computed in this routine. +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + CHARACTER NORMIN + INTEGER IX, KASE + DOUBLE PRECISION AINVNM, SCALE, SCALEL, SCALEU, SMLNUM + COMPLEX*16 ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, IZAMAX, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDRSCL, ZLACON, ZLATBS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPBCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = DLAMCH( 'Safe minimum' ) +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + NORMIN = 'N' + 10 CONTINUE + CALL ZLACON( N, WORK( N+1 ), WORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN + IF( UPPER ) THEN +* +* Multiply by inv(U'). +* + CALL ZLATBS( 'Upper', 'Conjugate transpose', 'Non-unit', + $ NORMIN, N, KD, AB, LDAB, WORK, SCALEL, RWORK, + $ INFO ) + NORMIN = 'Y' +* +* Multiply by inv(U). +* + CALL ZLATBS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ KD, AB, LDAB, WORK, SCALEU, RWORK, INFO ) + ELSE +* +* Multiply by inv(L). +* + CALL ZLATBS( 'Lower', 'No transpose', 'Non-unit', NORMIN, N, + $ KD, AB, LDAB, WORK, SCALEL, RWORK, INFO ) + NORMIN = 'Y' +* +* Multiply by inv(L'). +* + CALL ZLATBS( 'Lower', 'Conjugate transpose', 'Non-unit', + $ NORMIN, N, KD, AB, LDAB, WORK, SCALEU, RWORK, + $ INFO ) + END IF +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + SCALE = SCALEL*SCALEU + IF( SCALE.NE.ONE ) THEN + IX = IZAMAX( N, WORK, 1 ) + IF( SCALE.LT.CABS1( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL ZDRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE +* + RETURN +* +* End of ZPBCON +* + END diff --git a/costa/native/external/lapack/zpbequ.f b/costa/native/external/lapack/zpbequ.f new file mode 100644 index 000000000..9e1bcb1c6 --- /dev/null +++ b/costa/native/external/lapack/zpbequ.f @@ -0,0 +1,168 @@ + SUBROUTINE ZPBEQU( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N + DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION S( * ) + COMPLEX*16 AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* ZPBEQU computes row and column scalings intended to equilibrate a +* Hermitian positive definite band matrix A and reduce its condition +* number (with respect to the two-norm). S contains the scale factors, +* S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with +* elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This +* choice of S puts the condition number of B within a factor N of the +* smallest possible condition number over all possible diagonal +* scalings. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangular of A is stored; +* = 'L': Lower triangular of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* AB (input) COMPLEX*16 array, dimension (LDAB,N) +* The upper or lower triangle of the Hermitian band matrix A, +* stored in the first KD+1 rows of the array. The j-th column +* of A is stored in the j-th column of the array AB as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* LDAB (input) INTEGER +* The leading dimension of the array A. LDAB >= KD+1. +* +* S (output) DOUBLE PRECISION array, dimension (N) +* If INFO = 0, S contains the scale factors for A. +* +* SCOND (output) DOUBLE PRECISION +* If INFO = 0, S contains the ratio of the smallest S(i) to +* the largest S(i). If SCOND >= 0.1 and AMAX is neither too +* large nor too small, it is not worth scaling by S. +* +* AMAX (output) DOUBLE PRECISION +* Absolute value of largest matrix element. If AMAX is very +* close to overflow or very close to underflow, the matrix +* should be scaled. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, the i-th diagonal element is nonpositive. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, J + DOUBLE PRECISION SMIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPBEQU', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SCOND = ONE + AMAX = ZERO + RETURN + END IF +* + IF( UPPER ) THEN + J = KD + 1 + ELSE + J = 1 + END IF +* +* Initialize SMIN and AMAX. +* + S( 1 ) = DBLE( AB( J, 1 ) ) + SMIN = S( 1 ) + AMAX = S( 1 ) +* +* Find the minimum and maximum diagonal elements. +* + DO 10 I = 2, N + S( I ) = DBLE( AB( J, I ) ) + SMIN = MIN( SMIN, S( I ) ) + AMAX = MAX( AMAX, S( I ) ) + 10 CONTINUE +* + IF( SMIN.LE.ZERO ) THEN +* +* Find the first non-positive diagonal element and return. +* + DO 20 I = 1, N + IF( S( I ).LE.ZERO ) THEN + INFO = I + RETURN + END IF + 20 CONTINUE + ELSE +* +* Set the scale factors to the reciprocals +* of the diagonal elements. +* + DO 30 I = 1, N + S( I ) = ONE / SQRT( S( I ) ) + 30 CONTINUE +* +* Compute SCOND = min(S(I)) / max(S(I)) +* + SCOND = SQRT( SMIN ) / SQRT( AMAX ) + END IF + RETURN +* +* End of ZPBEQU +* + END diff --git a/costa/native/external/lapack/zpbrfs.f b/costa/native/external/lapack/zpbrfs.f new file mode 100644 index 000000000..43a37d26b --- /dev/null +++ b/costa/native/external/lapack/zpbrfs.f @@ -0,0 +1,342 @@ + SUBROUTINE ZPBRFS( UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB, B, + $ LDB, X, LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, LDAFB, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) + COMPLEX*16 AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* ZPBRFS improves the computed solution to a system of linear +* equations when the coefficient matrix is Hermitian positive definite +* and banded, and provides error bounds and backward error estimates +* for the solution. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) +* The upper or lower triangle of the Hermitian band matrix A, +* stored in the first KD+1 rows of the array. The j-th column +* of A is stored in the j-th column of the array AB as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* AFB (input) COMPLEX*16 array, dimension (LDAFB,N) +* The triangular factor U or L from the Cholesky factorization +* A = U**H*U or A = L*L**H of the band matrix A as computed by +* ZPBTRF, in the same storage format as A (see AB). +* +* LDAFB (input) INTEGER +* The leading dimension of the array AFB. LDAFB >= KD+1. +* +* B (input) COMPLEX*16 array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input/output) COMPLEX*16 array, dimension (LDX,NRHS) +* On entry, the solution matrix X, as computed by ZPBTRS. +* On exit, the improved solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Internal Parameters +* =================== +* +* ITMAX is the maximum number of steps of iterative refinement. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) + DOUBLE PRECISION THREE + PARAMETER ( THREE = 3.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, J, K, KASE, L, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX*16 ZDUM +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZCOPY, ZHBMV, ZLACON, ZPBTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDAFB.LT.KD+1 ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPBRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = MIN( N+1, 2*KD+2 ) + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL ZCOPY( N, B( 1, J ), 1, WORK, 1 ) + CALL ZHBMV( UPLO, N, KD, -ONE, AB, LDAB, X( 1, J ), 1, ONE, + $ WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + L = KD + 1 - K + DO 40 I = MAX( 1, K-KD ), K - 1 + RWORK( I ) = RWORK( I ) + CABS1( AB( L+I, K ) )*XK + S = S + CABS1( AB( L+I, K ) )*CABS1( X( I, J ) ) + 40 CONTINUE + RWORK( K ) = RWORK( K ) + ABS( DBLE( AB( KD+1, K ) ) )* + $ XK + S + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + RWORK( K ) = RWORK( K ) + ABS( DBLE( AB( 1, K ) ) )*XK + L = 1 - K + DO 60 I = K + 1, MIN( N, K+KD ) + RWORK( I ) = RWORK( I ) + CABS1( AB( L+I, K ) )*XK + S = S + CABS1( AB( L+I, K ) )*CABS1( X( I, J ) ) + 60 CONTINUE + RWORK( K ) = RWORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL ZPBTRS( UPLO, N, KD, 1, AFB, LDAFB, WORK, N, INFO ) + CALL ZAXPY( N, ONE, WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use ZLACON to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL ZLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A'). +* + CALL ZPBTRS( UPLO, N, KD, 1, AFB, LDAFB, WORK, N, INFO ) + DO 110 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 120 CONTINUE + CALL ZPBTRS( UPLO, N, KD, 1, AFB, LDAFB, WORK, N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of ZPBRFS +* + END diff --git a/costa/native/external/lapack/zpbstf.f b/costa/native/external/lapack/zpbstf.f new file mode 100644 index 000000000..11d004e16 --- /dev/null +++ b/costa/native/external/lapack/zpbstf.f @@ -0,0 +1,264 @@ + SUBROUTINE ZPBSTF( UPLO, N, KD, AB, LDAB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N +* .. +* .. Array Arguments .. + COMPLEX*16 AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* ZPBSTF computes a split Cholesky factorization of a complex +* Hermitian positive definite band matrix A. +* +* This routine is designed to be used in conjunction with ZHBGST. +* +* The factorization has the form A = S**H*S where S is a band matrix +* of the same bandwidth as A and the following structure: +* +* S = ( U ) +* ( M L ) +* +* where U is upper triangular of order m = (n+kd)/2, and L is lower +* triangular of order n-m. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* AB (input/output) COMPLEX*16 array, dimension (LDAB,N) +* On entry, the upper or lower triangle of the Hermitian band +* matrix A, stored in the first kd+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* On exit, if INFO = 0, the factor S from the split Cholesky +* factorization A = S**H*S. See Further Details. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the factorization could not be completed, +* because the updated element a(i,i) was negative; the +* matrix A is not positive definite. +* +* Further Details +* =============== +* +* The band storage scheme is illustrated by the following example, when +* N = 7, KD = 2: +* +* S = ( s11 s12 s13 ) +* ( s22 s23 s24 ) +* ( s33 s34 ) +* ( s44 ) +* ( s53 s54 s55 ) +* ( s64 s65 s66 ) +* ( s75 s76 s77 ) +* +* If UPLO = 'U', the array AB holds: +* +* on entry: on exit: +* +* * * a13 a24 a35 a46 a57 * * s13 s24 s53' s64' s75' +* * a12 a23 a34 a45 a56 a67 * s12 s23 s34 s54' s65' s76' +* a11 a22 a33 a44 a55 a66 a77 s11 s22 s33 s44 s55 s66 s77 +* +* If UPLO = 'L', the array AB holds: +* +* on entry: on exit: +* +* a11 a22 a33 a44 a55 a66 a77 s11 s22 s33 s44 s55 s66 s77 +* a21 a32 a43 a54 a65 a76 * s12' s23' s34' s54 s65 s76 * +* a31 a42 a53 a64 a64 * * s13' s24' s53 s64 s75 * * +* +* Array elements marked * are not used by the routine; s12' denotes +* conjg(s12); the diagonal elements of S are real. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, KLD, KM, M + DOUBLE PRECISION AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDSCAL, ZHER, ZLACGV +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPBSTF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + KLD = MAX( 1, LDAB-1 ) +* +* Set the splitting point m. +* + M = ( N+KD ) / 2 +* + IF( UPPER ) THEN +* +* Factorize A(m+1:n,m+1:n) as L**H*L, and update A(1:m,1:m). +* + DO 10 J = N, M + 1, -1 +* +* Compute s(j,j) and test for non-positive-definiteness. +* + AJJ = DBLE( AB( KD+1, J ) ) + IF( AJJ.LE.ZERO ) THEN + AB( KD+1, J ) = AJJ + GO TO 50 + END IF + AJJ = SQRT( AJJ ) + AB( KD+1, J ) = AJJ + KM = MIN( J-1, KD ) +* +* Compute elements j-km:j-1 of the j-th column and update the +* the leading submatrix within the band. +* + CALL ZDSCAL( KM, ONE / AJJ, AB( KD+1-KM, J ), 1 ) + CALL ZHER( 'Upper', KM, -ONE, AB( KD+1-KM, J ), 1, + $ AB( KD+1, J-KM ), KLD ) + 10 CONTINUE +* +* Factorize the updated submatrix A(1:m,1:m) as U**H*U. +* + DO 20 J = 1, M +* +* Compute s(j,j) and test for non-positive-definiteness. +* + AJJ = DBLE( AB( KD+1, J ) ) + IF( AJJ.LE.ZERO ) THEN + AB( KD+1, J ) = AJJ + GO TO 50 + END IF + AJJ = SQRT( AJJ ) + AB( KD+1, J ) = AJJ + KM = MIN( KD, M-J ) +* +* Compute elements j+1:j+km of the j-th row and update the +* trailing submatrix within the band. +* + IF( KM.GT.0 ) THEN + CALL ZDSCAL( KM, ONE / AJJ, AB( KD, J+1 ), KLD ) + CALL ZLACGV( KM, AB( KD, J+1 ), KLD ) + CALL ZHER( 'Upper', KM, -ONE, AB( KD, J+1 ), KLD, + $ AB( KD+1, J+1 ), KLD ) + CALL ZLACGV( KM, AB( KD, J+1 ), KLD ) + END IF + 20 CONTINUE + ELSE +* +* Factorize A(m+1:n,m+1:n) as L**H*L, and update A(1:m,1:m). +* + DO 30 J = N, M + 1, -1 +* +* Compute s(j,j) and test for non-positive-definiteness. +* + AJJ = DBLE( AB( 1, J ) ) + IF( AJJ.LE.ZERO ) THEN + AB( 1, J ) = AJJ + GO TO 50 + END IF + AJJ = SQRT( AJJ ) + AB( 1, J ) = AJJ + KM = MIN( J-1, KD ) +* +* Compute elements j-km:j-1 of the j-th row and update the +* trailing submatrix within the band. +* + CALL ZDSCAL( KM, ONE / AJJ, AB( KM+1, J-KM ), KLD ) + CALL ZLACGV( KM, AB( KM+1, J-KM ), KLD ) + CALL ZHER( 'Lower', KM, -ONE, AB( KM+1, J-KM ), KLD, + $ AB( 1, J-KM ), KLD ) + CALL ZLACGV( KM, AB( KM+1, J-KM ), KLD ) + 30 CONTINUE +* +* Factorize the updated submatrix A(1:m,1:m) as U**H*U. +* + DO 40 J = 1, M +* +* Compute s(j,j) and test for non-positive-definiteness. +* + AJJ = DBLE( AB( 1, J ) ) + IF( AJJ.LE.ZERO ) THEN + AB( 1, J ) = AJJ + GO TO 50 + END IF + AJJ = SQRT( AJJ ) + AB( 1, J ) = AJJ + KM = MIN( KD, M-J ) +* +* Compute elements j+1:j+km of the j-th column and update the +* trailing submatrix within the band. +* + IF( KM.GT.0 ) THEN + CALL ZDSCAL( KM, ONE / AJJ, AB( 2, J ), 1 ) + CALL ZHER( 'Lower', KM, -ONE, AB( 2, J ), 1, + $ AB( 1, J+1 ), KLD ) + END IF + 40 CONTINUE + END IF + RETURN +* + 50 CONTINUE + INFO = J + RETURN +* +* End of ZPBSTF +* + END diff --git a/costa/native/external/lapack/zpbsv.f b/costa/native/external/lapack/zpbsv.f new file mode 100644 index 000000000..b93d171a7 --- /dev/null +++ b/costa/native/external/lapack/zpbsv.f @@ -0,0 +1,152 @@ + SUBROUTINE ZPBSV( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX*16 AB( LDAB, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* ZPBSV computes the solution to a complex system of linear equations +* A * X = B, +* where A is an N-by-N Hermitian positive definite band matrix and X +* and B are N-by-NRHS matrices. +* +* The Cholesky decomposition is used to factor A as +* A = U**H * U, if UPLO = 'U', or +* A = L * L**H, if UPLO = 'L', +* where U is an upper triangular band matrix, and L is a lower +* triangular band matrix, with the same number of superdiagonals or +* subdiagonals as A. The factored form of A is then used to solve the +* system of equations A * X = B. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AB (input/output) COMPLEX*16 array, dimension (LDAB,N) +* On entry, the upper or lower triangle of the Hermitian band +* matrix A, stored in the first KD+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(KD+1+i-j,j) = A(i,j) for max(1,j-KD)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(N,j+KD). +* See below for further details. +* +* On exit, if INFO = 0, the triangular factor U or L from the +* Cholesky factorization A = U**H*U or A = L*L**H of the band +* matrix A, in the same storage format as A. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the leading minor of order i of A is not +* positive definite, so the factorization could not be +* completed, and the solution has not been computed. +* +* Further Details +* =============== +* +* The band storage scheme is illustrated by the following example, when +* N = 6, KD = 2, and UPLO = 'U': +* +* On entry: On exit: +* +* * * a13 a24 a35 a46 * * u13 u24 u35 u46 +* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +* +* Similarly, if UPLO = 'L' the format of A is as follows: +* +* On entry: On exit: +* +* a11 a22 a33 a44 a55 a66 l11 l22 l33 l44 l55 l66 +* a21 a32 a43 a54 a65 * l21 l32 l43 l54 l65 * +* a31 a42 a53 a64 * * l31 l42 l53 l64 * * +* +* Array elements marked * are not used by the routine. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZPBTRF, ZPBTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPBSV ', -INFO ) + RETURN + END IF +* +* Compute the Cholesky factorization A = U'*U or A = L*L'. +* + CALL ZPBTRF( UPLO, N, KD, AB, LDAB, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL ZPBTRS( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO ) +* + END IF + RETURN +* +* End of ZPBSV +* + END diff --git a/costa/native/external/lapack/zpbsvx.f b/costa/native/external/lapack/zpbsvx.f new file mode 100644 index 000000000..4d5be185a --- /dev/null +++ b/costa/native/external/lapack/zpbsvx.f @@ -0,0 +1,423 @@ + SUBROUTINE ZPBSVX( FACT, UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB, + $ EQUED, S, B, LDB, X, LDX, RCOND, FERR, BERR, + $ WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, UPLO + INTEGER INFO, KD, LDAB, LDAFB, LDB, LDX, N, NRHS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ), S( * ) + COMPLEX*16 AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* ZPBSVX uses the Cholesky factorization A = U**H*U or A = L*L**H to +* compute the solution to a complex system of linear equations +* A * X = B, +* where A is an N-by-N Hermitian positive definite band matrix and X +* and B are N-by-NRHS matrices. +* +* Error bounds on the solution and a condition estimate are also +* provided. +* +* Description +* =========== +* +* The following steps are performed: +* +* 1. If FACT = 'E', real scaling factors are computed to equilibrate +* the system: +* diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B +* Whether or not the system will be equilibrated depends on the +* scaling of the matrix A, but if equilibration is used, A is +* overwritten by diag(S)*A*diag(S) and B by diag(S)*B. +* +* 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to +* factor the matrix A (after equilibration if FACT = 'E') as +* A = U**H * U, if UPLO = 'U', or +* A = L * L**H, if UPLO = 'L', +* where U is an upper triangular band matrix, and L is a lower +* triangular band matrix. +* +* 3. If the leading i-by-i principal minor is not positive definite, +* then the routine returns with INFO = i. Otherwise, the factored +* form of A is used to estimate the condition number of the matrix +* A. If the reciprocal of the condition number is less than machine +* precision, INFO = N+1 is returned as a warning, but the routine +* still goes on to solve for X and compute error bounds as +* described below. +* +* 4. The system of equations is solved for X using the factored form +* of A. +* +* 5. Iterative refinement is applied to improve the computed solution +* matrix and calculate error bounds and backward error estimates +* for it. +* +* 6. If equilibration was used, the matrix X is premultiplied by +* diag(S) so that it solves the original system before +* equilibration. +* +* Arguments +* ========= +* +* FACT (input) CHARACTER*1 +* Specifies whether or not the factored form of the matrix A is +* supplied on entry, and if not, whether the matrix A should be +* equilibrated before it is factored. +* = 'F': On entry, AFB contains the factored form of A. +* If EQUED = 'Y', the matrix A has been equilibrated +* with scaling factors given by S. AB and AFB will not +* be modified. +* = 'N': The matrix A will be copied to AFB and factored. +* = 'E': The matrix A will be equilibrated if necessary, then +* copied to AFB and factored. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* NRHS (input) INTEGER +* The number of right-hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AB (input/output) COMPLEX*16 array, dimension (LDAB,N) +* On entry, the upper or lower triangle of the Hermitian band +* matrix A, stored in the first KD+1 rows of the array, except +* if FACT = 'F' and EQUED = 'Y', then A must contain the +* equilibrated matrix diag(S)*A*diag(S). The j-th column of A +* is stored in the j-th column of the array AB as follows: +* if UPLO = 'U', AB(KD+1+i-j,j) = A(i,j) for max(1,j-KD)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(N,j+KD). +* See below for further details. +* +* On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by +* diag(S)*A*diag(S). +* +* LDAB (input) INTEGER +* The leading dimension of the array A. LDAB >= KD+1. +* +* AFB (input or output) COMPLEX*16 array, dimension (LDAFB,N) +* If FACT = 'F', then AFB is an input argument and on entry +* contains the triangular factor U or L from the Cholesky +* factorization A = U**H*U or A = L*L**H of the band matrix +* A, in the same storage format as A (see AB). If EQUED = 'Y', +* then AFB is the factored form of the equilibrated matrix A. +* +* If FACT = 'N', then AFB is an output argument and on exit +* returns the triangular factor U or L from the Cholesky +* factorization A = U**H*U or A = L*L**H. +* +* If FACT = 'E', then AFB is an output argument and on exit +* returns the triangular factor U or L from the Cholesky +* factorization A = U**H*U or A = L*L**H of the equilibrated +* matrix A (see the description of A for the form of the +* equilibrated matrix). +* +* LDAFB (input) INTEGER +* The leading dimension of the array AFB. LDAFB >= KD+1. +* +* EQUED (input or output) CHARACTER*1 +* Specifies the form of equilibration that was done. +* = 'N': No equilibration (always true if FACT = 'N'). +* = 'Y': Equilibration was done, i.e., A has been replaced by +* diag(S) * A * diag(S). +* EQUED is an input argument if FACT = 'F'; otherwise, it is an +* output argument. +* +* S (input or output) DOUBLE PRECISION array, dimension (N) +* The scale factors for A; not accessed if EQUED = 'N'. S is +* an input argument if FACT = 'F'; otherwise, S is an output +* argument. If FACT = 'F' and EQUED = 'Y', each element of S +* must be positive. +* +* B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y', +* B is overwritten by diag(S) * B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (output) COMPLEX*16 array, dimension (LDX,NRHS) +* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to +* the original system of equations. Note that if EQUED = 'Y', +* A and B are modified on exit, and the solution to the +* equilibrated system is inv(diag(S))*X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* RCOND (output) DOUBLE PRECISION +* The estimate of the reciprocal condition number of the matrix +* A after equilibration (if done). If RCOND is less than the +* machine precision (in particular, if RCOND = 0), the matrix +* is singular to working precision. This condition is +* indicated by a return code of INFO > 0. +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is +* <= N: the leading minor of order i of A is +* not positive definite, so the factorization +* could not be completed, and the solution has not +* been computed. RCOND = 0 is returned. +* = N+1: U is nonsingular, but RCOND is less than machine +* precision, meaning that the matrix is singular +* to working precision. Nevertheless, the +* solution and error bounds are computed because +* there are a number of situations where the +* computed solution can be more accurate than the +* value of RCOND would suggest. +* +* Further Details +* =============== +* +* The band storage scheme is illustrated by the following example, when +* N = 6, KD = 2, and UPLO = 'U': +* +* Two-dimensional storage of the Hermitian matrix A: +* +* a11 a12 a13 +* a22 a23 a24 +* a33 a34 a35 +* a44 a45 a46 +* a55 a56 +* (aij=conjg(aji)) a66 +* +* Band storage of the upper triangle of A: +* +* * * a13 a24 a35 a46 +* * a12 a23 a34 a45 a56 +* a11 a22 a33 a44 a55 a66 +* +* Similarly, if UPLO = 'L' the format of A is as follows: +* +* a11 a22 a33 a44 a55 a66 +* a21 a32 a43 a54 a65 * +* a31 a42 a53 a64 * * +* +* Array elements marked * are not used by the routine. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL EQUIL, NOFACT, RCEQU, UPPER + INTEGER I, INFEQU, J, J1, J2 + DOUBLE PRECISION AMAX, ANORM, BIGNUM, SCOND, SMAX, SMIN, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANHB + EXTERNAL LSAME, DLAMCH, ZLANHB +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZCOPY, ZLACPY, ZLAQHB, ZPBCON, ZPBEQU, + $ ZPBRFS, ZPBTRF, ZPBTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + UPPER = LSAME( UPLO, 'U' ) + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + RCEQU = .FALSE. + ELSE + RCEQU = LSAME( EQUED, 'Y' ) + SMLNUM = DLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + END IF +* +* Test the input parameters. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) ) + $ THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -7 + ELSE IF( LDAFB.LT.KD+1 ) THEN + INFO = -9 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -10 + ELSE + IF( RCEQU ) THEN + SMIN = BIGNUM + SMAX = ZERO + DO 10 J = 1, N + SMIN = MIN( SMIN, S( J ) ) + SMAX = MAX( SMAX, S( J ) ) + 10 CONTINUE + IF( SMIN.LE.ZERO ) THEN + INFO = -11 + ELSE IF( N.GT.0 ) THEN + SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) + ELSE + SCOND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -15 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPBSVX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL ZPBEQU( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL ZLAQHB( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, EQUED ) + RCEQU = LSAME( EQUED, 'Y' ) + END IF + END IF +* +* Scale the right-hand side. +* + IF( RCEQU ) THEN + DO 30 J = 1, NRHS + DO 20 I = 1, N + B( I, J ) = S( I )*B( I, J ) + 20 CONTINUE + 30 CONTINUE + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the Cholesky factorization A = U'*U or A = L*L'. +* + IF( UPPER ) THEN + DO 40 J = 1, N + J1 = MAX( J-KD, 1 ) + CALL ZCOPY( J-J1+1, AB( KD+1-J+J1, J ), 1, + $ AFB( KD+1-J+J1, J ), 1 ) + 40 CONTINUE + ELSE + DO 50 J = 1, N + J2 = MIN( J+KD, N ) + CALL ZCOPY( J2-J+1, AB( 1, J ), 1, AFB( 1, J ), 1 ) + 50 CONTINUE + END IF +* + CALL ZPBTRF( UPLO, N, KD, AFB, LDAFB, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN + IF( INFO.GT.0 ) + $ RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = ZLANHB( '1', UPLO, N, KD, AB, LDAB, RWORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL ZPBCON( UPLO, N, KD, AFB, LDAFB, ANORM, RCOND, WORK, RWORK, + $ INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.DLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* +* Compute the solution matrix X. +* + CALL ZLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL ZPBTRS( UPLO, N, KD, NRHS, AFB, LDAFB, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL ZPBRFS( UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB, B, LDB, X, + $ LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* Transform the solution matrix X to a solution of the original +* system. +* + IF( RCEQU ) THEN + DO 70 J = 1, NRHS + DO 60 I = 1, N + X( I, J ) = S( I )*X( I, J ) + 60 CONTINUE + 70 CONTINUE + DO 80 J = 1, NRHS + FERR( J ) = FERR( J ) / SCOND + 80 CONTINUE + END IF +* + RETURN +* +* End of ZPBSVX +* + END diff --git a/costa/native/external/lapack/zpbtf2.f b/costa/native/external/lapack/zpbtf2.f new file mode 100644 index 000000000..2b0d5a7a4 --- /dev/null +++ b/costa/native/external/lapack/zpbtf2.f @@ -0,0 +1,201 @@ + SUBROUTINE ZPBTF2( UPLO, N, KD, AB, LDAB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N +* .. +* .. Array Arguments .. + COMPLEX*16 AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* ZPBTF2 computes the Cholesky factorization of a complex Hermitian +* positive definite band matrix A. +* +* The factorization has the form +* A = U' * U , if UPLO = 'U', or +* A = L * L', if UPLO = 'L', +* where U is an upper triangular matrix, U' is the conjugate transpose +* of U, and L is lower triangular. +* +* This is the unblocked version of the algorithm, calling Level 2 BLAS. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* Hermitian matrix A is stored: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of super-diagonals of the matrix A if UPLO = 'U', +* or the number of sub-diagonals if UPLO = 'L'. KD >= 0. +* +* AB (input/output) COMPLEX*16 array, dimension (LDAB,N) +* On entry, the upper or lower triangle of the Hermitian band +* matrix A, stored in the first KD+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* On exit, if INFO = 0, the triangular factor U or L from the +* Cholesky factorization A = U'*U or A = L*L' of the band +* matrix A, in the same storage format as A. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* > 0: if INFO = k, the leading minor of order k is not +* positive definite, and the factorization could not be +* completed. +* +* Further Details +* =============== +* +* The band storage scheme is illustrated by the following example, when +* N = 6, KD = 2, and UPLO = 'U': +* +* On entry: On exit: +* +* * * a13 a24 a35 a46 * * u13 u24 u35 u46 +* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +* +* Similarly, if UPLO = 'L' the format of A is as follows: +* +* On entry: On exit: +* +* a11 a22 a33 a44 a55 a66 l11 l22 l33 l44 l55 l66 +* a21 a32 a43 a54 a65 * l21 l32 l43 l54 l65 * +* a31 a42 a53 a64 * * l31 l42 l53 l64 * * +* +* Array elements marked * are not used by the routine. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, KLD, KN + DOUBLE PRECISION AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDSCAL, ZHER, ZLACGV +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPBTF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + KLD = MAX( 1, LDAB-1 ) +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U'*U. +* + DO 10 J = 1, N +* +* Compute U(J,J) and test for non-positive-definiteness. +* + AJJ = DBLE( AB( KD+1, J ) ) + IF( AJJ.LE.ZERO ) THEN + AB( KD+1, J ) = AJJ + GO TO 30 + END IF + AJJ = SQRT( AJJ ) + AB( KD+1, J ) = AJJ +* +* Compute elements J+1:J+KN of row J and update the +* trailing submatrix within the band. +* + KN = MIN( KD, N-J ) + IF( KN.GT.0 ) THEN + CALL ZDSCAL( KN, ONE / AJJ, AB( KD, J+1 ), KLD ) + CALL ZLACGV( KN, AB( KD, J+1 ), KLD ) + CALL ZHER( 'Upper', KN, -ONE, AB( KD, J+1 ), KLD, + $ AB( KD+1, J+1 ), KLD ) + CALL ZLACGV( KN, AB( KD, J+1 ), KLD ) + END IF + 10 CONTINUE + ELSE +* +* Compute the Cholesky factorization A = L*L'. +* + DO 20 J = 1, N +* +* Compute L(J,J) and test for non-positive-definiteness. +* + AJJ = DBLE( AB( 1, J ) ) + IF( AJJ.LE.ZERO ) THEN + AB( 1, J ) = AJJ + GO TO 30 + END IF + AJJ = SQRT( AJJ ) + AB( 1, J ) = AJJ +* +* Compute elements J+1:J+KN of column J and update the +* trailing submatrix within the band. +* + KN = MIN( KD, N-J ) + IF( KN.GT.0 ) THEN + CALL ZDSCAL( KN, ONE / AJJ, AB( 2, J ), 1 ) + CALL ZHER( 'Lower', KN, -ONE, AB( 2, J ), 1, + $ AB( 1, J+1 ), KLD ) + END IF + 20 CONTINUE + END IF + RETURN +* + 30 CONTINUE + INFO = J + RETURN +* +* End of ZPBTF2 +* + END diff --git a/costa/native/external/lapack/zpbtrf.f b/costa/native/external/lapack/zpbtrf.f new file mode 100644 index 000000000..1f7f66e97 --- /dev/null +++ b/costa/native/external/lapack/zpbtrf.f @@ -0,0 +1,372 @@ + SUBROUTINE ZPBTRF( UPLO, N, KD, AB, LDAB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N +* .. +* .. Array Arguments .. + COMPLEX*16 AB( LDAB, * ) +* .. +* +* Purpose +* ======= +* +* ZPBTRF computes the Cholesky factorization of a complex Hermitian +* positive definite band matrix A. +* +* The factorization has the form +* A = U**H * U, if UPLO = 'U', or +* A = L * L**H, if UPLO = 'L', +* where U is an upper triangular matrix and L is lower triangular. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* AB (input/output) COMPLEX*16 array, dimension (LDAB,N) +* On entry, the upper or lower triangle of the Hermitian band +* matrix A, stored in the first KD+1 rows of the array. The +* j-th column of A is stored in the j-th column of the array AB +* as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* +* On exit, if INFO = 0, the triangular factor U or L from the +* Cholesky factorization A = U**H*U or A = L*L**H of the band +* matrix A, in the same storage format as A. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the leading minor of order i is not +* positive definite, and the factorization could not be +* completed. +* +* Further Details +* =============== +* +* The band storage scheme is illustrated by the following example, when +* N = 6, KD = 2, and UPLO = 'U': +* +* On entry: On exit: +* +* * * a13 a24 a35 a46 * * u13 u24 u35 u46 +* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +* +* Similarly, if UPLO = 'L' the format of A is as follows: +* +* On entry: On exit: +* +* a11 a22 a33 a44 a55 a66 l11 l22 l33 l44 l55 l66 +* a21 a32 a43 a54 a65 * l21 l32 l43 l54 l65 * +* a31 a42 a53 a64 * * l31 l42 l53 l64 * * +* +* Array elements marked * are not used by the routine. +* +* Contributed by +* Peter Mayes and Giuseppe Radicati, IBM ECSEC, Rome, March 23, 1989 +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) + INTEGER NBMAX, LDWORK + PARAMETER ( NBMAX = 32, LDWORK = NBMAX+1 ) +* .. +* .. Local Scalars .. + INTEGER I, I2, I3, IB, II, J, JJ, NB +* .. +* .. Local Arrays .. + COMPLEX*16 WORK( LDWORK, NBMAX ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEMM, ZHERK, ZPBTF2, ZPOTF2, ZTRSM +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( ( .NOT.LSAME( UPLO, 'U' ) ) .AND. + $ ( .NOT.LSAME( UPLO, 'L' ) ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPBTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment +* + NB = ILAENV( 1, 'ZPBTRF', UPLO, N, KD, -1, -1 ) +* +* The block size must not exceed the semi-bandwidth KD, and must not +* exceed the limit set by the size of the local array WORK. +* + NB = MIN( NB, NBMAX ) +* + IF( NB.LE.1 .OR. NB.GT.KD ) THEN +* +* Use unblocked code +* + CALL ZPBTF2( UPLO, N, KD, AB, LDAB, INFO ) + ELSE +* +* Use blocked code +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Compute the Cholesky factorization of a Hermitian band +* matrix, given the upper triangle of the matrix in band +* storage. +* +* Zero the upper triangle of the work array. +* + DO 20 J = 1, NB + DO 10 I = 1, J - 1 + WORK( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* Process the band matrix one diagonal block at a time. +* + DO 70 I = 1, N, NB + IB = MIN( NB, N-I+1 ) +* +* Factorize the diagonal block +* + CALL ZPOTF2( UPLO, IB, AB( KD+1, I ), LDAB-1, II ) + IF( II.NE.0 ) THEN + INFO = I + II - 1 + GO TO 150 + END IF + IF( I+IB.LE.N ) THEN +* +* Update the relevant part of the trailing submatrix. +* If A11 denotes the diagonal block which has just been +* factorized, then we need to update the remaining +* blocks in the diagram: +* +* A11 A12 A13 +* A22 A23 +* A33 +* +* The numbers of rows and columns in the partitioning +* are IB, I2, I3 respectively. The blocks A12, A22 and +* A23 are empty if IB = KD. The upper triangle of A13 +* lies outside the band. +* + I2 = MIN( KD-IB, N-I-IB+1 ) + I3 = MIN( IB, N-I-KD+1 ) +* + IF( I2.GT.0 ) THEN +* +* Update A12 +* + CALL ZTRSM( 'Left', 'Upper', 'Conjugate transpose', + $ 'Non-unit', IB, I2, CONE, + $ AB( KD+1, I ), LDAB-1, + $ AB( KD+1-IB, I+IB ), LDAB-1 ) +* +* Update A22 +* + CALL ZHERK( 'Upper', 'Conjugate transpose', I2, IB, + $ -ONE, AB( KD+1-IB, I+IB ), LDAB-1, ONE, + $ AB( KD+1, I+IB ), LDAB-1 ) + END IF +* + IF( I3.GT.0 ) THEN +* +* Copy the lower triangle of A13 into the work array. +* + DO 40 JJ = 1, I3 + DO 30 II = JJ, IB + WORK( II, JJ ) = AB( II-JJ+1, JJ+I+KD-1 ) + 30 CONTINUE + 40 CONTINUE +* +* Update A13 (in the work array). +* + CALL ZTRSM( 'Left', 'Upper', 'Conjugate transpose', + $ 'Non-unit', IB, I3, CONE, + $ AB( KD+1, I ), LDAB-1, WORK, LDWORK ) +* +* Update A23 +* + IF( I2.GT.0 ) + $ CALL ZGEMM( 'Conjugate transpose', + $ 'No transpose', I2, I3, IB, -CONE, + $ AB( KD+1-IB, I+IB ), LDAB-1, WORK, + $ LDWORK, CONE, AB( 1+IB, I+KD ), + $ LDAB-1 ) +* +* Update A33 +* + CALL ZHERK( 'Upper', 'Conjugate transpose', I3, IB, + $ -ONE, WORK, LDWORK, ONE, + $ AB( KD+1, I+KD ), LDAB-1 ) +* +* Copy the lower triangle of A13 back into place. +* + DO 60 JJ = 1, I3 + DO 50 II = JJ, IB + AB( II-JJ+1, JJ+I+KD-1 ) = WORK( II, JJ ) + 50 CONTINUE + 60 CONTINUE + END IF + END IF + 70 CONTINUE + ELSE +* +* Compute the Cholesky factorization of a Hermitian band +* matrix, given the lower triangle of the matrix in band +* storage. +* +* Zero the lower triangle of the work array. +* + DO 90 J = 1, NB + DO 80 I = J + 1, NB + WORK( I, J ) = ZERO + 80 CONTINUE + 90 CONTINUE +* +* Process the band matrix one diagonal block at a time. +* + DO 140 I = 1, N, NB + IB = MIN( NB, N-I+1 ) +* +* Factorize the diagonal block +* + CALL ZPOTF2( UPLO, IB, AB( 1, I ), LDAB-1, II ) + IF( II.NE.0 ) THEN + INFO = I + II - 1 + GO TO 150 + END IF + IF( I+IB.LE.N ) THEN +* +* Update the relevant part of the trailing submatrix. +* If A11 denotes the diagonal block which has just been +* factorized, then we need to update the remaining +* blocks in the diagram: +* +* A11 +* A21 A22 +* A31 A32 A33 +* +* The numbers of rows and columns in the partitioning +* are IB, I2, I3 respectively. The blocks A21, A22 and +* A32 are empty if IB = KD. The lower triangle of A31 +* lies outside the band. +* + I2 = MIN( KD-IB, N-I-IB+1 ) + I3 = MIN( IB, N-I-KD+1 ) +* + IF( I2.GT.0 ) THEN +* +* Update A21 +* + CALL ZTRSM( 'Right', 'Lower', + $ 'Conjugate transpose', 'Non-unit', I2, + $ IB, CONE, AB( 1, I ), LDAB-1, + $ AB( 1+IB, I ), LDAB-1 ) +* +* Update A22 +* + CALL ZHERK( 'Lower', 'No transpose', I2, IB, -ONE, + $ AB( 1+IB, I ), LDAB-1, ONE, + $ AB( 1, I+IB ), LDAB-1 ) + END IF +* + IF( I3.GT.0 ) THEN +* +* Copy the upper triangle of A31 into the work array. +* + DO 110 JJ = 1, IB + DO 100 II = 1, MIN( JJ, I3 ) + WORK( II, JJ ) = AB( KD+1-JJ+II, JJ+I-1 ) + 100 CONTINUE + 110 CONTINUE +* +* Update A31 (in the work array). +* + CALL ZTRSM( 'Right', 'Lower', + $ 'Conjugate transpose', 'Non-unit', I3, + $ IB, CONE, AB( 1, I ), LDAB-1, WORK, + $ LDWORK ) +* +* Update A32 +* + IF( I2.GT.0 ) + $ CALL ZGEMM( 'No transpose', + $ 'Conjugate transpose', I3, I2, IB, + $ -CONE, WORK, LDWORK, AB( 1+IB, I ), + $ LDAB-1, CONE, AB( 1+KD-IB, I+IB ), + $ LDAB-1 ) +* +* Update A33 +* + CALL ZHERK( 'Lower', 'No transpose', I3, IB, -ONE, + $ WORK, LDWORK, ONE, AB( 1, I+KD ), + $ LDAB-1 ) +* +* Copy the upper triangle of A31 back into place. +* + DO 130 JJ = 1, IB + DO 120 II = 1, MIN( JJ, I3 ) + AB( KD+1-JJ+II, JJ+I-1 ) = WORK( II, JJ ) + 120 CONTINUE + 130 CONTINUE + END IF + END IF + 140 CONTINUE + END IF + END IF + RETURN +* + 150 CONTINUE + RETURN +* +* End of ZPBTRF +* + END diff --git a/costa/native/external/lapack/zpbtrs.f b/costa/native/external/lapack/zpbtrs.f new file mode 100644 index 000000000..ff8124db3 --- /dev/null +++ b/costa/native/external/lapack/zpbtrs.f @@ -0,0 +1,146 @@ + SUBROUTINE ZPBTRS( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX*16 AB( LDAB, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* ZPBTRS solves a system of linear equations A*X = B with a Hermitian +* positive definite band matrix A using the Cholesky factorization +* A = U**H*U or A = L*L**H computed by ZPBTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangular factor stored in AB; +* = 'L': Lower triangular factor stored in AB. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals of the matrix A if UPLO = 'U', +* or the number of subdiagonals if UPLO = 'L'. KD >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AB (input) COMPLEX*16 array, dimension (LDAB,N) +* The triangular factor U or L from the Cholesky factorization +* A = U**H*U or A = L*L**H of the band matrix A, stored in the +* first KD+1 rows of the array. The j-th column of U or L is +* stored in the j-th column of the array AB as follows: +* if UPLO ='U', AB(kd+1+i-j,j) = U(i,j) for max(1,j-kd)<=i<=j; +* if UPLO ='L', AB(1+i-j,j) = L(i,j) for j<=i<=min(n,j+kd). +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZTBSV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPBTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B where A = U'*U. +* + DO 10 J = 1, NRHS +* +* Solve U'*X = B, overwriting B with X. +* + CALL ZTBSV( 'Upper', 'Conjugate transpose', 'Non-unit', N, + $ KD, AB, LDAB, B( 1, J ), 1 ) +* +* Solve U*X = B, overwriting B with X. +* + CALL ZTBSV( 'Upper', 'No transpose', 'Non-unit', N, KD, AB, + $ LDAB, B( 1, J ), 1 ) + 10 CONTINUE + ELSE +* +* Solve A*X = B where A = L*L'. +* + DO 20 J = 1, NRHS +* +* Solve L*X = B, overwriting B with X. +* + CALL ZTBSV( 'Lower', 'No transpose', 'Non-unit', N, KD, AB, + $ LDAB, B( 1, J ), 1 ) +* +* Solve L'*X = B, overwriting B with X. +* + CALL ZTBSV( 'Lower', 'Conjugate transpose', 'Non-unit', N, + $ KD, AB, LDAB, B( 1, J ), 1 ) + 20 CONTINUE + END IF +* + RETURN +* +* End of ZPBTRS +* + END diff --git a/costa/native/external/lapack/zpocon.f b/costa/native/external/lapack/zpocon.f new file mode 100644 index 000000000..3ab984a83 --- /dev/null +++ b/costa/native/external/lapack/zpocon.f @@ -0,0 +1,180 @@ + SUBROUTINE ZPOCON( UPLO, N, A, LDA, ANORM, RCOND, WORK, RWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZPOCON estimates the reciprocal of the condition number (in the +* 1-norm) of a complex Hermitian positive definite matrix using the +* Cholesky factorization A = U**H*U or A = L*L**H computed by ZPOTRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input) COMPLEX*16 array, dimension (LDA,N) +* The triangular factor U or L from the Cholesky factorization +* A = U**H*U or A = L*L**H, as computed by ZPOTRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* ANORM (input) DOUBLE PRECISION +* The 1-norm (or infinity-norm) of the Hermitian matrix A. +* +* RCOND (output) DOUBLE PRECISION +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +* estimate of the 1-norm of inv(A) computed in this routine. +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + CHARACTER NORMIN + INTEGER IX, KASE + DOUBLE PRECISION AINVNM, SCALE, SCALEL, SCALEU, SMLNUM + COMPLEX*16 ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, IZAMAX, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDRSCL, ZLACON, ZLATRS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPOCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = DLAMCH( 'Safe minimum' ) +* +* Estimate the 1-norm of inv(A). +* + KASE = 0 + NORMIN = 'N' + 10 CONTINUE + CALL ZLACON( N, WORK( N+1 ), WORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN + IF( UPPER ) THEN +* +* Multiply by inv(U'). +* + CALL ZLATRS( 'Upper', 'Conjugate transpose', 'Non-unit', + $ NORMIN, N, A, LDA, WORK, SCALEL, RWORK, INFO ) + NORMIN = 'Y' +* +* Multiply by inv(U). +* + CALL ZLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ A, LDA, WORK, SCALEU, RWORK, INFO ) + ELSE +* +* Multiply by inv(L). +* + CALL ZLATRS( 'Lower', 'No transpose', 'Non-unit', NORMIN, N, + $ A, LDA, WORK, SCALEL, RWORK, INFO ) + NORMIN = 'Y' +* +* Multiply by inv(L'). +* + CALL ZLATRS( 'Lower', 'Conjugate transpose', 'Non-unit', + $ NORMIN, N, A, LDA, WORK, SCALEU, RWORK, INFO ) + END IF +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + SCALE = SCALEL*SCALEU + IF( SCALE.NE.ONE ) THEN + IX = IZAMAX( N, WORK, 1 ) + IF( SCALE.LT.CABS1( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL ZDRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE + RETURN +* +* End of ZPOCON +* + END diff --git a/costa/native/external/lapack/zpoequ.f b/costa/native/external/lapack/zpoequ.f new file mode 100644 index 000000000..65b1ca714 --- /dev/null +++ b/costa/native/external/lapack/zpoequ.f @@ -0,0 +1,138 @@ + SUBROUTINE ZPOEQU( N, A, LDA, S, SCOND, AMAX, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, N + DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION S( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* ZPOEQU computes row and column scalings intended to equilibrate a +* Hermitian positive definite matrix A and reduce its condition number +* (with respect to the two-norm). S contains the scale factors, +* S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with +* elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This +* choice of S puts the condition number of B within a factor N of the +* smallest possible condition number over all possible diagonal +* scalings. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input) COMPLEX*16 array, dimension (LDA,N) +* The N-by-N Hermitian positive definite matrix whose scaling +* factors are to be computed. Only the diagonal elements of A +* are referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* S (output) DOUBLE PRECISION array, dimension (N) +* If INFO = 0, S contains the scale factors for A. +* +* SCOND (output) DOUBLE PRECISION +* If INFO = 0, S contains the ratio of the smallest S(i) to +* the largest S(i). If SCOND >= 0.1 and AMAX is neither too +* large nor too small, it is not worth scaling by S. +* +* AMAX (output) DOUBLE PRECISION +* Absolute value of largest matrix element. If AMAX is very +* close to overflow or very close to underflow, the matrix +* should be scaled. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the i-th diagonal element is nonpositive. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I + DOUBLE PRECISION SMIN +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPOEQU', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SCOND = ONE + AMAX = ZERO + RETURN + END IF +* +* Find the minimum and maximum diagonal elements. +* + S( 1 ) = DBLE( A( 1, 1 ) ) + SMIN = S( 1 ) + AMAX = S( 1 ) + DO 10 I = 2, N + S( I ) = DBLE( A( I, I ) ) + SMIN = MIN( SMIN, S( I ) ) + AMAX = MAX( AMAX, S( I ) ) + 10 CONTINUE +* + IF( SMIN.LE.ZERO ) THEN +* +* Find the first non-positive diagonal element and return. +* + DO 20 I = 1, N + IF( S( I ).LE.ZERO ) THEN + INFO = I + RETURN + END IF + 20 CONTINUE + ELSE +* +* Set the scale factors to the reciprocals +* of the diagonal elements. +* + DO 30 I = 1, N + S( I ) = ONE / SQRT( S( I ) ) + 30 CONTINUE +* +* Compute SCOND = min(S(I)) / max(S(I)) +* + SCOND = SQRT( SMIN ) / SQRT( AMAX ) + END IF + RETURN +* +* End of ZPOEQU +* + END diff --git a/costa/native/external/lapack/zporfs.f b/costa/native/external/lapack/zporfs.f new file mode 100644 index 000000000..7c394da8b --- /dev/null +++ b/costa/native/external/lapack/zporfs.f @@ -0,0 +1,333 @@ + SUBROUTINE ZPORFS( UPLO, N, NRHS, A, LDA, AF, LDAF, B, LDB, X, + $ LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) + COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* ZPORFS improves the computed solution to a system of linear +* equations when the coefficient matrix is Hermitian positive definite, +* and provides error bounds and backward error estimates for the +* solution. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input) COMPLEX*16 array, dimension (LDA,N) +* The Hermitian matrix A. If UPLO = 'U', the leading N-by-N +* upper triangular part of A contains the upper triangular part +* of the matrix A, and the strictly lower triangular part of A +* is not referenced. If UPLO = 'L', the leading N-by-N lower +* triangular part of A contains the lower triangular part of +* the matrix A, and the strictly upper triangular part of A is +* not referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* AF (input) COMPLEX*16 array, dimension (LDAF,N) +* The triangular factor U or L from the Cholesky factorization +* A = U**H*U or A = L*L**H, as computed by ZPOTRF. +* +* LDAF (input) INTEGER +* The leading dimension of the array AF. LDAF >= max(1,N). +* +* B (input) COMPLEX*16 array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input/output) COMPLEX*16 array, dimension (LDX,NRHS) +* On entry, the solution matrix X, as computed by ZPOTRS. +* On exit, the improved solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Internal Parameters +* =================== +* +* ITMAX is the maximum number of steps of iterative refinement. +* +* ==================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) + DOUBLE PRECISION THREE + PARAMETER ( THREE = 3.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, J, K, KASE, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX*16 ZDUM +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZCOPY, ZHEMV, ZLACON, ZPOTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPORFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL ZCOPY( N, B( 1, J ), 1, WORK, 1 ) + CALL ZHEMV( UPLO, N, -ONE, A, LDA, X( 1, J ), 1, ONE, WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + DO 40 I = 1, K - 1 + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 40 CONTINUE + RWORK( K ) = RWORK( K ) + ABS( DBLE( A( K, K ) ) )*XK + S + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + RWORK( K ) = RWORK( K ) + ABS( DBLE( A( K, K ) ) )*XK + DO 60 I = K + 1, N + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 60 CONTINUE + RWORK( K ) = RWORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL ZPOTRS( UPLO, N, 1, AF, LDAF, WORK, N, INFO ) + CALL ZAXPY( N, ONE, WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use ZLACON to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL ZLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A'). +* + CALL ZPOTRS( UPLO, N, 1, AF, LDAF, WORK, N, INFO ) + DO 110 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 120 CONTINUE + CALL ZPOTRS( UPLO, N, 1, AF, LDAF, WORK, N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of ZPORFS +* + END diff --git a/costa/native/external/lapack/zposv.f b/costa/native/external/lapack/zposv.f new file mode 100644 index 000000000..2d69554fd --- /dev/null +++ b/costa/native/external/lapack/zposv.f @@ -0,0 +1,122 @@ + SUBROUTINE ZPOSV( UPLO, N, NRHS, A, LDA, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* ZPOSV computes the solution to a complex system of linear equations +* A * X = B, +* where A is an N-by-N Hermitian positive definite matrix and X and B +* are N-by-NRHS matrices. +* +* The Cholesky decomposition is used to factor A as +* A = U**H* U, if UPLO = 'U', or +* A = L * L**H, if UPLO = 'L', +* where U is an upper triangular matrix and L is a lower triangular +* matrix. The factored form of A is then used to solve the system of +* equations A * X = B. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the leading +* N-by-N upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, if INFO = 0, the factor U or L from the Cholesky +* factorization A = U**H*U or A = L*L**H. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the leading minor of order i of A is not +* positive definite, so the factorization could not be +* completed, and the solution has not been computed. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZPOTRF, ZPOTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPOSV ', -INFO ) + RETURN + END IF +* +* Compute the Cholesky factorization A = U'*U or A = L*L'. +* + CALL ZPOTRF( UPLO, N, A, LDA, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL ZPOTRS( UPLO, N, NRHS, A, LDA, B, LDB, INFO ) +* + END IF + RETURN +* +* End of ZPOSV +* + END diff --git a/costa/native/external/lapack/zposvx.f b/costa/native/external/lapack/zposvx.f new file mode 100644 index 000000000..098f52a3a --- /dev/null +++ b/costa/native/external/lapack/zposvx.f @@ -0,0 +1,378 @@ + SUBROUTINE ZPOSVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, EQUED, + $ S, B, LDB, X, LDX, RCOND, FERR, BERR, WORK, + $ RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ), S( * ) + COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* ZPOSVX uses the Cholesky factorization A = U**H*U or A = L*L**H to +* compute the solution to a complex system of linear equations +* A * X = B, +* where A is an N-by-N Hermitian positive definite matrix and X and B +* are N-by-NRHS matrices. +* +* Error bounds on the solution and a condition estimate are also +* provided. +* +* Description +* =========== +* +* The following steps are performed: +* +* 1. If FACT = 'E', real scaling factors are computed to equilibrate +* the system: +* diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B +* Whether or not the system will be equilibrated depends on the +* scaling of the matrix A, but if equilibration is used, A is +* overwritten by diag(S)*A*diag(S) and B by diag(S)*B. +* +* 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to +* factor the matrix A (after equilibration if FACT = 'E') as +* A = U**H* U, if UPLO = 'U', or +* A = L * L**H, if UPLO = 'L', +* where U is an upper triangular matrix and L is a lower triangular +* matrix. +* +* 3. If the leading i-by-i principal minor is not positive definite, +* then the routine returns with INFO = i. Otherwise, the factored +* form of A is used to estimate the condition number of the matrix +* A. If the reciprocal of the condition number is less than machine +* precision, INFO = N+1 is returned as a warning, but the routine +* still goes on to solve for X and compute error bounds as +* described below. +* +* 4. The system of equations is solved for X using the factored form +* of A. +* +* 5. Iterative refinement is applied to improve the computed solution +* matrix and calculate error bounds and backward error estimates +* for it. +* +* 6. If equilibration was used, the matrix X is premultiplied by +* diag(S) so that it solves the original system before +* equilibration. +* +* Arguments +* ========= +* +* FACT (input) CHARACTER*1 +* Specifies whether or not the factored form of the matrix A is +* supplied on entry, and if not, whether the matrix A should be +* equilibrated before it is factored. +* = 'F': On entry, AF contains the factored form of A. +* If EQUED = 'Y', the matrix A has been equilibrated +* with scaling factors given by S. A and AF will not +* be modified. +* = 'N': The matrix A will be copied to AF and factored. +* = 'E': The matrix A will be equilibrated if necessary, then +* copied to AF and factored. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the Hermitian matrix A, except if FACT = 'F' and +* EQUED = 'Y', then A must contain the equilibrated matrix +* diag(S)*A*diag(S). If UPLO = 'U', the leading +* N-by-N upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. A is not modified if +* FACT = 'F' or 'N', or if FACT = 'E' and EQUED = 'N' on exit. +* +* On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by +* diag(S)*A*diag(S). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* AF (input or output) COMPLEX*16 array, dimension (LDAF,N) +* If FACT = 'F', then AF is an input argument and on entry +* contains the triangular factor U or L from the Cholesky +* factorization A = U**H*U or A = L*L**H, in the same storage +* format as A. If EQUED .ne. 'N', then AF is the factored form +* of the equilibrated matrix diag(S)*A*diag(S). +* +* If FACT = 'N', then AF is an output argument and on exit +* returns the triangular factor U or L from the Cholesky +* factorization A = U**H*U or A = L*L**H of the original +* matrix A. +* +* If FACT = 'E', then AF is an output argument and on exit +* returns the triangular factor U or L from the Cholesky +* factorization A = U**H*U or A = L*L**H of the equilibrated +* matrix A (see the description of A for the form of the +* equilibrated matrix). +* +* LDAF (input) INTEGER +* The leading dimension of the array AF. LDAF >= max(1,N). +* +* EQUED (input or output) CHARACTER*1 +* Specifies the form of equilibration that was done. +* = 'N': No equilibration (always true if FACT = 'N'). +* = 'Y': Equilibration was done, i.e., A has been replaced by +* diag(S) * A * diag(S). +* EQUED is an input argument if FACT = 'F'; otherwise, it is an +* output argument. +* +* S (input or output) DOUBLE PRECISION array, dimension (N) +* The scale factors for A; not accessed if EQUED = 'N'. S is +* an input argument if FACT = 'F'; otherwise, S is an output +* argument. If FACT = 'F' and EQUED = 'Y', each element of S +* must be positive. +* +* B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS righthand side matrix B. +* On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y', +* B is overwritten by diag(S) * B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (output) COMPLEX*16 array, dimension (LDX,NRHS) +* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to +* the original system of equations. Note that if EQUED = 'Y', +* A and B are modified on exit, and the solution to the +* equilibrated system is inv(diag(S))*X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* RCOND (output) DOUBLE PRECISION +* The estimate of the reciprocal condition number of the matrix +* A after equilibration (if done). If RCOND is less than the +* machine precision (in particular, if RCOND = 0), the matrix +* is singular to working precision. This condition is +* indicated by a return code of INFO > 0. +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is +* <= N: the leading minor of order i of A is +* not positive definite, so the factorization +* could not be completed, and the solution has not +* been computed. RCOND = 0 is returned. +* = N+1: U is nonsingular, but RCOND is less than machine +* precision, meaning that the matrix is singular +* to working precision. Nevertheless, the +* solution and error bounds are computed because +* there are a number of situations where the +* computed solution can be more accurate than the +* value of RCOND would suggest. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL EQUIL, NOFACT, RCEQU + INTEGER I, INFEQU, J + DOUBLE PRECISION AMAX, ANORM, BIGNUM, SCOND, SMAX, SMIN, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANHE + EXTERNAL LSAME, DLAMCH, ZLANHE +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLACPY, ZLAQHE, ZPOCON, ZPOEQU, ZPORFS, + $ ZPOTRF, ZPOTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + RCEQU = .FALSE. + ELSE + RCEQU = LSAME( EQUED, 'Y' ) + SMLNUM = DLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + END IF +* +* Test the input parameters. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) ) + $ THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -9 + ELSE + IF( RCEQU ) THEN + SMIN = BIGNUM + SMAX = ZERO + DO 10 J = 1, N + SMIN = MIN( SMIN, S( J ) ) + SMAX = MAX( SMAX, S( J ) ) + 10 CONTINUE + IF( SMIN.LE.ZERO ) THEN + INFO = -10 + ELSE IF( N.GT.0 ) THEN + SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) + ELSE + SCOND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -14 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPOSVX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL ZPOEQU( N, A, LDA, S, SCOND, AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL ZLAQHE( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED ) + RCEQU = LSAME( EQUED, 'Y' ) + END IF + END IF +* +* Scale the right hand side. +* + IF( RCEQU ) THEN + DO 30 J = 1, NRHS + DO 20 I = 1, N + B( I, J ) = S( I )*B( I, J ) + 20 CONTINUE + 30 CONTINUE + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the Cholesky factorization A = U'*U or A = L*L'. +* + CALL ZLACPY( UPLO, N, N, A, LDA, AF, LDAF ) + CALL ZPOTRF( UPLO, N, AF, LDAF, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN + IF( INFO.GT.0 ) + $ RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = ZLANHE( '1', UPLO, N, A, LDA, RWORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL ZPOCON( UPLO, N, AF, LDAF, ANORM, RCOND, WORK, RWORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.DLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* +* Compute the solution matrix X. +* + CALL ZLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL ZPOTRS( UPLO, N, NRHS, AF, LDAF, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL ZPORFS( UPLO, N, NRHS, A, LDA, AF, LDAF, B, LDB, X, LDX, + $ FERR, BERR, WORK, RWORK, INFO ) +* +* Transform the solution matrix X to a solution of the original +* system. +* + IF( RCEQU ) THEN + DO 50 J = 1, NRHS + DO 40 I = 1, N + X( I, J ) = S( I )*X( I, J ) + 40 CONTINUE + 50 CONTINUE + DO 60 J = 1, NRHS + FERR( J ) = FERR( J ) / SCOND + 60 CONTINUE + END IF +* + RETURN +* +* End of ZPOSVX +* + END diff --git a/costa/native/external/lapack/zpotf2.f b/costa/native/external/lapack/zpotf2.f new file mode 100644 index 000000000..771db483e --- /dev/null +++ b/costa/native/external/lapack/zpotf2.f @@ -0,0 +1,175 @@ + SUBROUTINE ZPOTF2( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* ZPOTF2 computes the Cholesky factorization of a complex Hermitian +* positive definite matrix A. +* +* The factorization has the form +* A = U' * U , if UPLO = 'U', or +* A = L * L', if UPLO = 'L', +* where U is an upper triangular matrix and L is lower triangular. +* +* This is the unblocked version of the algorithm, calling Level 2 BLAS. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* Hermitian matrix A is stored. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the leading +* n by n upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n by n lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, if INFO = 0, the factor U or L from the Cholesky +* factorization A = U'*U or A = L*L'. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* > 0: if INFO = k, the leading minor of order k is not +* positive definite, and the factorization could not be +* completed. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J + DOUBLE PRECISION AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX*16 ZDOTC + EXTERNAL LSAME, ZDOTC +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDSCAL, ZGEMV, ZLACGV +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPOTF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U'*U. +* + DO 10 J = 1, N +* +* Compute U(J,J) and test for non-positive-definiteness. +* + AJJ = DBLE( A( J, J ) ) - ZDOTC( J-1, A( 1, J ), 1, + $ A( 1, J ), 1 ) + IF( AJJ.LE.ZERO ) THEN + A( J, J ) = AJJ + GO TO 30 + END IF + AJJ = SQRT( AJJ ) + A( J, J ) = AJJ +* +* Compute elements J+1:N of row J. +* + IF( J.LT.N ) THEN + CALL ZLACGV( J-1, A( 1, J ), 1 ) + CALL ZGEMV( 'Transpose', J-1, N-J, -CONE, A( 1, J+1 ), + $ LDA, A( 1, J ), 1, CONE, A( J, J+1 ), LDA ) + CALL ZLACGV( J-1, A( 1, J ), 1 ) + CALL ZDSCAL( N-J, ONE / AJJ, A( J, J+1 ), LDA ) + END IF + 10 CONTINUE + ELSE +* +* Compute the Cholesky factorization A = L*L'. +* + DO 20 J = 1, N +* +* Compute L(J,J) and test for non-positive-definiteness. +* + AJJ = DBLE( A( J, J ) ) - ZDOTC( J-1, A( J, 1 ), LDA, + $ A( J, 1 ), LDA ) + IF( AJJ.LE.ZERO ) THEN + A( J, J ) = AJJ + GO TO 30 + END IF + AJJ = SQRT( AJJ ) + A( J, J ) = AJJ +* +* Compute elements J+1:N of column J. +* + IF( J.LT.N ) THEN + CALL ZLACGV( J-1, A( J, 1 ), LDA ) + CALL ZGEMV( 'No transpose', N-J, J-1, -CONE, A( J+1, 1 ), + $ LDA, A( J, 1 ), LDA, CONE, A( J+1, J ), 1 ) + CALL ZLACGV( J-1, A( J, 1 ), LDA ) + CALL ZDSCAL( N-J, ONE / AJJ, A( J+1, J ), 1 ) + END IF + 20 CONTINUE + END IF + GO TO 40 +* + 30 CONTINUE + INFO = J +* + 40 CONTINUE + RETURN +* +* End of ZPOTF2 +* + END diff --git a/costa/native/external/lapack/zpotrf.f b/costa/native/external/lapack/zpotrf.f new file mode 100644 index 000000000..e46514d63 --- /dev/null +++ b/costa/native/external/lapack/zpotrf.f @@ -0,0 +1,187 @@ + SUBROUTINE ZPOTRF( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* ZPOTRF computes the Cholesky factorization of a complex Hermitian +* positive definite matrix A. +* +* The factorization has the form +* A = U**H * U, if UPLO = 'U', or +* A = L * L**H, if UPLO = 'L', +* where U is an upper triangular matrix and L is lower triangular. +* +* This is the block version of the algorithm, calling Level 3 BLAS. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the Hermitian matrix A. If UPLO = 'U', the leading +* N-by-N upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, if INFO = 0, the factor U or L from the Cholesky +* factorization A = U**H*U or A = L*L**H. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the leading minor of order i is not +* positive definite, and the factorization could not be +* completed. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + COMPLEX*16 CONE + PARAMETER ( ONE = 1.0D+0, CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, JB, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEMM, ZHERK, ZPOTF2, ZTRSM +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPOTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'ZPOTRF', UPLO, N, -1, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code. +* + CALL ZPOTF2( UPLO, N, A, LDA, INFO ) + ELSE +* +* Use blocked code. +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U'*U. +* + DO 10 J = 1, N, NB +* +* Update and factorize the current diagonal block and test +* for non-positive-definiteness. +* + JB = MIN( NB, N-J+1 ) + CALL ZHERK( 'Upper', 'Conjugate transpose', JB, J-1, + $ -ONE, A( 1, J ), LDA, ONE, A( J, J ), LDA ) + CALL ZPOTF2( 'Upper', JB, A( J, J ), LDA, INFO ) + IF( INFO.NE.0 ) + $ GO TO 30 + IF( J+JB.LE.N ) THEN +* +* Compute the current block row. +* + CALL ZGEMM( 'Conjugate transpose', 'No transpose', JB, + $ N-J-JB+1, J-1, -CONE, A( 1, J ), LDA, + $ A( 1, J+JB ), LDA, CONE, A( J, J+JB ), + $ LDA ) + CALL ZTRSM( 'Left', 'Upper', 'Conjugate transpose', + $ 'Non-unit', JB, N-J-JB+1, CONE, A( J, J ), + $ LDA, A( J, J+JB ), LDA ) + END IF + 10 CONTINUE +* + ELSE +* +* Compute the Cholesky factorization A = L*L'. +* + DO 20 J = 1, N, NB +* +* Update and factorize the current diagonal block and test +* for non-positive-definiteness. +* + JB = MIN( NB, N-J+1 ) + CALL ZHERK( 'Lower', 'No transpose', JB, J-1, -ONE, + $ A( J, 1 ), LDA, ONE, A( J, J ), LDA ) + CALL ZPOTF2( 'Lower', JB, A( J, J ), LDA, INFO ) + IF( INFO.NE.0 ) + $ GO TO 30 + IF( J+JB.LE.N ) THEN +* +* Compute the current block column. +* + CALL ZGEMM( 'No transpose', 'Conjugate transpose', + $ N-J-JB+1, JB, J-1, -CONE, A( J+JB, 1 ), + $ LDA, A( J, 1 ), LDA, CONE, A( J+JB, J ), + $ LDA ) + CALL ZTRSM( 'Right', 'Lower', 'Conjugate transpose', + $ 'Non-unit', N-J-JB+1, JB, CONE, A( J, J ), + $ LDA, A( J+JB, J ), LDA ) + END IF + 20 CONTINUE + END IF + END IF + GO TO 40 +* + 30 CONTINUE + INFO = INFO + J - 1 +* + 40 CONTINUE + RETURN +* +* End of ZPOTRF +* + END diff --git a/costa/native/external/lapack/zpotri.f b/costa/native/external/lapack/zpotri.f new file mode 100644 index 000000000..06fe7a483 --- /dev/null +++ b/costa/native/external/lapack/zpotri.f @@ -0,0 +1,97 @@ + SUBROUTINE ZPOTRI( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* ZPOTRI computes the inverse of a complex Hermitian positive definite +* matrix A using the Cholesky factorization A = U**H*U or A = L*L**H +* computed by ZPOTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the triangular factor U or L from the Cholesky +* factorization A = U**H*U or A = L*L**H, as computed by +* ZPOTRF. +* On exit, the upper or lower triangle of the (Hermitian) +* inverse of A, overwriting the input factor U or L. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the (i,i) element of the factor U or L is +* zero, and the inverse could not be computed. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLAUUM, ZTRTRI +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPOTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Invert the triangular Cholesky factor U or L. +* + CALL ZTRTRI( UPLO, 'Non-unit', N, A, LDA, INFO ) + IF( INFO.GT.0 ) + $ RETURN +* +* Form inv(U)*inv(U)' or inv(L)'*inv(L). +* + CALL ZLAUUM( UPLO, N, A, LDA, INFO ) +* + RETURN +* +* End of ZPOTRI +* + END diff --git a/costa/native/external/lapack/zpotrs.f b/costa/native/external/lapack/zpotrs.f new file mode 100644 index 000000000..cf6e825be --- /dev/null +++ b/costa/native/external/lapack/zpotrs.f @@ -0,0 +1,133 @@ + SUBROUTINE ZPOTRS( UPLO, N, NRHS, A, LDA, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* ZPOTRS solves a system of linear equations A*X = B with a Hermitian +* positive definite matrix A using the Cholesky factorization +* A = U**H*U or A = L*L**H computed by ZPOTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* A (input) COMPLEX*16 array, dimension (LDA,N) +* The triangular factor U or L from the Cholesky factorization +* A = U**H*U or A = L*L**H, as computed by ZPOTRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZTRSM +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPOTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B where A = U'*U. +* +* Solve U'*X = B, overwriting B with X. +* + CALL ZTRSM( 'Left', 'Upper', 'Conjugate transpose', 'Non-unit', + $ N, NRHS, ONE, A, LDA, B, LDB ) +* +* Solve U*X = B, overwriting B with X. +* + CALL ZTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N, + $ NRHS, ONE, A, LDA, B, LDB ) + ELSE +* +* Solve A*X = B where A = L*L'. +* +* Solve L*X = B, overwriting B with X. +* + CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Non-unit', N, + $ NRHS, ONE, A, LDA, B, LDB ) +* +* Solve L'*X = B, overwriting B with X. +* + CALL ZTRSM( 'Left', 'Lower', 'Conjugate transpose', 'Non-unit', + $ N, NRHS, ONE, A, LDA, B, LDB ) + END IF +* + RETURN +* +* End of ZPOTRS +* + END diff --git a/costa/native/external/lapack/zppcon.f b/costa/native/external/lapack/zppcon.f new file mode 100644 index 000000000..9c955ae9f --- /dev/null +++ b/costa/native/external/lapack/zppcon.f @@ -0,0 +1,179 @@ + SUBROUTINE ZPPCON( UPLO, N, AP, ANORM, RCOND, WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 AP( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZPPCON estimates the reciprocal of the condition number (in the +* 1-norm) of a complex Hermitian positive definite packed matrix using +* the Cholesky factorization A = U**H*U or A = L*L**H computed by +* ZPPTRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input) COMPLEX*16 array, dimension (N*(N+1)/2) +* The triangular factor U or L from the Cholesky factorization +* A = U**H*U or A = L*L**H, packed columnwise in a linear +* array. The j-th column of U or L is stored in the array AP +* as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n. +* +* ANORM (input) DOUBLE PRECISION +* The 1-norm (or infinity-norm) of the Hermitian matrix A. +* +* RCOND (output) DOUBLE PRECISION +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +* estimate of the 1-norm of inv(A) computed in this routine. +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + CHARACTER NORMIN + INTEGER IX, KASE + DOUBLE PRECISION AINVNM, SCALE, SCALEL, SCALEU, SMLNUM + COMPLEX*16 ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, IZAMAX, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDRSCL, ZLACON, ZLATPS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPPCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = DLAMCH( 'Safe minimum' ) +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + NORMIN = 'N' + 10 CONTINUE + CALL ZLACON( N, WORK( N+1 ), WORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN + IF( UPPER ) THEN +* +* Multiply by inv(U'). +* + CALL ZLATPS( 'Upper', 'Conjugate transpose', 'Non-unit', + $ NORMIN, N, AP, WORK, SCALEL, RWORK, INFO ) + NORMIN = 'Y' +* +* Multiply by inv(U). +* + CALL ZLATPS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ AP, WORK, SCALEU, RWORK, INFO ) + ELSE +* +* Multiply by inv(L). +* + CALL ZLATPS( 'Lower', 'No transpose', 'Non-unit', NORMIN, N, + $ AP, WORK, SCALEL, RWORK, INFO ) + NORMIN = 'Y' +* +* Multiply by inv(L'). +* + CALL ZLATPS( 'Lower', 'Conjugate transpose', 'Non-unit', + $ NORMIN, N, AP, WORK, SCALEU, RWORK, INFO ) + END IF +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + SCALE = SCALEL*SCALEU + IF( SCALE.NE.ONE ) THEN + IX = IZAMAX( N, WORK, 1 ) + IF( SCALE.LT.CABS1( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL ZDRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE + RETURN +* +* End of ZPPCON +* + END diff --git a/costa/native/external/lapack/zppequ.f b/costa/native/external/lapack/zppequ.f new file mode 100644 index 000000000..da40eb4c3 --- /dev/null +++ b/costa/native/external/lapack/zppequ.f @@ -0,0 +1,170 @@ + SUBROUTINE ZPPEQU( UPLO, N, AP, S, SCOND, AMAX, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N + DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION S( * ) + COMPLEX*16 AP( * ) +* .. +* +* Purpose +* ======= +* +* ZPPEQU computes row and column scalings intended to equilibrate a +* Hermitian positive definite matrix A in packed storage and reduce +* its condition number (with respect to the two-norm). S contains the +* scale factors, S(i)=1/sqrt(A(i,i)), chosen so that the scaled matrix +* B with elements B(i,j)=S(i)*A(i,j)*S(j) has ones on the diagonal. +* This choice of S puts the condition number of B within a factor N of +* the smallest possible condition number over all possible diagonal +* scalings. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input) COMPLEX*16 array, dimension (N*(N+1)/2) +* The upper or lower triangle of the Hermitian matrix A, packed +* columnwise in a linear array. The j-th column of A is stored +* in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* +* S (output) DOUBLE PRECISION array, dimension (N) +* If INFO = 0, S contains the scale factors for A. +* +* SCOND (output) DOUBLE PRECISION +* If INFO = 0, S contains the ratio of the smallest S(i) to +* the largest S(i). If SCOND >= 0.1 and AMAX is neither too +* large nor too small, it is not worth scaling by S. +* +* AMAX (output) DOUBLE PRECISION +* Absolute value of largest matrix element. If AMAX is very +* close to overflow or very close to underflow, the matrix +* should be scaled. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the i-th diagonal element is nonpositive. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, JJ + DOUBLE PRECISION SMIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPPEQU', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SCOND = ONE + AMAX = ZERO + RETURN + END IF +* +* Initialize SMIN and AMAX. +* + S( 1 ) = DBLE( AP( 1 ) ) + SMIN = S( 1 ) + AMAX = S( 1 ) +* + IF( UPPER ) THEN +* +* UPLO = 'U': Upper triangle of A is stored. +* Find the minimum and maximum diagonal elements. +* + JJ = 1 + DO 10 I = 2, N + JJ = JJ + I + S( I ) = DBLE( AP( JJ ) ) + SMIN = MIN( SMIN, S( I ) ) + AMAX = MAX( AMAX, S( I ) ) + 10 CONTINUE +* + ELSE +* +* UPLO = 'L': Lower triangle of A is stored. +* Find the minimum and maximum diagonal elements. +* + JJ = 1 + DO 20 I = 2, N + JJ = JJ + N - I + 2 + S( I ) = DBLE( AP( JJ ) ) + SMIN = MIN( SMIN, S( I ) ) + AMAX = MAX( AMAX, S( I ) ) + 20 CONTINUE + END IF +* + IF( SMIN.LE.ZERO ) THEN +* +* Find the first non-positive diagonal element and return. +* + DO 30 I = 1, N + IF( S( I ).LE.ZERO ) THEN + INFO = I + RETURN + END IF + 30 CONTINUE + ELSE +* +* Set the scale factors to the reciprocals +* of the diagonal elements. +* + DO 40 I = 1, N + S( I ) = ONE / SQRT( S( I ) ) + 40 CONTINUE +* +* Compute SCOND = min(S(I)) / max(S(I)) +* + SCOND = SQRT( SMIN ) / SQRT( AMAX ) + END IF + RETURN +* +* End of ZPPEQU +* + END diff --git a/costa/native/external/lapack/zpprfs.f b/costa/native/external/lapack/zpprfs.f new file mode 100644 index 000000000..e455e8920 --- /dev/null +++ b/costa/native/external/lapack/zpprfs.f @@ -0,0 +1,331 @@ + SUBROUTINE ZPPRFS( UPLO, N, NRHS, AP, AFP, B, LDB, X, LDX, FERR, + $ BERR, WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) + COMPLEX*16 AFP( * ), AP( * ), B( LDB, * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* ZPPRFS improves the computed solution to a system of linear +* equations when the coefficient matrix is Hermitian positive definite +* and packed, and provides error bounds and backward error estimates +* for the solution. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AP (input) COMPLEX*16 array, dimension (N*(N+1)/2) +* The upper or lower triangle of the Hermitian matrix A, packed +* columnwise in a linear array. The j-th column of A is stored +* in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* +* AFP (input) COMPLEX*16 array, dimension (N*(N+1)/2) +* The triangular factor U or L from the Cholesky factorization +* A = U**H*U or A = L*L**H, as computed by DPPTRF/ZPPTRF, +* packed columnwise in a linear array in the same format as A +* (see AP). +* +* B (input) COMPLEX*16 array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input/output) COMPLEX*16 array, dimension (LDX,NRHS) +* On entry, the solution matrix X, as computed by ZPPTRS. +* On exit, the improved solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Internal Parameters +* =================== +* +* ITMAX is the maximum number of steps of iterative refinement. +* +* ==================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) + DOUBLE PRECISION THREE + PARAMETER ( THREE = 3.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, IK, J, K, KASE, KK, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX*16 ZDUM +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZCOPY, ZHPMV, ZLACON, ZPPTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPPRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL ZCOPY( N, B( 1, J ), 1, WORK, 1 ) + CALL ZHPMV( UPLO, N, -CONE, AP, X( 1, J ), 1, CONE, WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + KK = 1 + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + IK = KK + DO 40 I = 1, K - 1 + RWORK( I ) = RWORK( I ) + CABS1( AP( IK ) )*XK + S = S + CABS1( AP( IK ) )*CABS1( X( I, J ) ) + IK = IK + 1 + 40 CONTINUE + RWORK( K ) = RWORK( K ) + ABS( DBLE( AP( KK+K-1 ) ) )* + $ XK + S + KK = KK + K + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + RWORK( K ) = RWORK( K ) + ABS( DBLE( AP( KK ) ) )*XK + IK = KK + 1 + DO 60 I = K + 1, N + RWORK( I ) = RWORK( I ) + CABS1( AP( IK ) )*XK + S = S + CABS1( AP( IK ) )*CABS1( X( I, J ) ) + IK = IK + 1 + 60 CONTINUE + RWORK( K ) = RWORK( K ) + S + KK = KK + ( N-K+1 ) + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL ZPPTRS( UPLO, N, 1, AFP, WORK, N, INFO ) + CALL ZAXPY( N, CONE, WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use ZLACON to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL ZLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A'). +* + CALL ZPPTRS( UPLO, N, 1, AFP, WORK, N, INFO ) + DO 110 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 120 CONTINUE + CALL ZPPTRS( UPLO, N, 1, AFP, WORK, N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of ZPPRFS +* + END diff --git a/costa/native/external/lapack/zppsv.f b/costa/native/external/lapack/zppsv.f new file mode 100644 index 000000000..707f93257 --- /dev/null +++ b/costa/native/external/lapack/zppsv.f @@ -0,0 +1,134 @@ + SUBROUTINE ZPPSV( UPLO, N, NRHS, AP, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX*16 AP( * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* ZPPSV computes the solution to a complex system of linear equations +* A * X = B, +* where A is an N-by-N Hermitian positive definite matrix stored in +* packed format and X and B are N-by-NRHS matrices. +* +* The Cholesky decomposition is used to factor A as +* A = U**H* U, if UPLO = 'U', or +* A = L * L**H, if UPLO = 'L', +* where U is an upper triangular matrix and L is a lower triangular +* matrix. The factored form of A is then used to solve the system of +* equations A * X = B. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AP (input/output) COMPLEX*16 array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the Hermitian matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* See below for further details. +* +* On exit, if INFO = 0, the factor U or L from the Cholesky +* factorization A = U**H*U or A = L*L**H, in the same storage +* format as A. +* +* B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the leading minor of order i of A is not +* positive definite, so the factorization could not be +* completed, and the solution has not been computed. +* +* Further Details +* =============== +* +* The packed storage scheme is illustrated by the following example +* when N = 4, UPLO = 'U': +* +* Two-dimensional storage of the Hermitian matrix A: +* +* a11 a12 a13 a14 +* a22 a23 a24 +* a33 a34 (aij = conjg(aji)) +* a44 +* +* Packed storage of the upper triangle of A: +* +* AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZPPTRF, ZPPTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPPSV ', -INFO ) + RETURN + END IF +* +* Compute the Cholesky factorization A = U'*U or A = L*L'. +* + CALL ZPPTRF( UPLO, N, AP, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL ZPPTRS( UPLO, N, NRHS, AP, B, LDB, INFO ) +* + END IF + RETURN +* +* End of ZPPSV +* + END diff --git a/costa/native/external/lapack/zppsvx.f b/costa/native/external/lapack/zppsvx.f new file mode 100644 index 000000000..b2307fbea --- /dev/null +++ b/costa/native/external/lapack/zppsvx.f @@ -0,0 +1,383 @@ + SUBROUTINE ZPPSVX( FACT, UPLO, N, NRHS, AP, AFP, EQUED, S, B, LDB, + $ X, LDX, RCOND, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, UPLO + INTEGER INFO, LDB, LDX, N, NRHS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ), S( * ) + COMPLEX*16 AFP( * ), AP( * ), B( LDB, * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* ZPPSVX uses the Cholesky factorization A = U**H*U or A = L*L**H to +* compute the solution to a complex system of linear equations +* A * X = B, +* where A is an N-by-N Hermitian positive definite matrix stored in +* packed format and X and B are N-by-NRHS matrices. +* +* Error bounds on the solution and a condition estimate are also +* provided. +* +* Description +* =========== +* +* The following steps are performed: +* +* 1. If FACT = 'E', real scaling factors are computed to equilibrate +* the system: +* diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B +* Whether or not the system will be equilibrated depends on the +* scaling of the matrix A, but if equilibration is used, A is +* overwritten by diag(S)*A*diag(S) and B by diag(S)*B. +* +* 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to +* factor the matrix A (after equilibration if FACT = 'E') as +* A = U'* U , if UPLO = 'U', or +* A = L * L', if UPLO = 'L', +* where U is an upper triangular matrix, L is a lower triangular +* matrix, and ' indicates conjugate transpose. +* +* 3. If the leading i-by-i principal minor is not positive definite, +* then the routine returns with INFO = i. Otherwise, the factored +* form of A is used to estimate the condition number of the matrix +* A. If the reciprocal of the condition number is less than machine +* precision, INFO = N+1 is returned as a warning, but the routine +* still goes on to solve for X and compute error bounds as +* described below. +* +* 4. The system of equations is solved for X using the factored form +* of A. +* +* 5. Iterative refinement is applied to improve the computed solution +* matrix and calculate error bounds and backward error estimates +* for it. +* +* 6. If equilibration was used, the matrix X is premultiplied by +* diag(S) so that it solves the original system before +* equilibration. +* +* Arguments +* ========= +* +* FACT (input) CHARACTER*1 +* Specifies whether or not the factored form of the matrix A is +* supplied on entry, and if not, whether the matrix A should be +* equilibrated before it is factored. +* = 'F': On entry, AFP contains the factored form of A. +* If EQUED = 'Y', the matrix A has been equilibrated +* with scaling factors given by S. AP and AFP will not +* be modified. +* = 'N': The matrix A will be copied to AFP and factored. +* = 'E': The matrix A will be equilibrated if necessary, then +* copied to AFP and factored. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AP (input/output) COMPLEX*16 array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the Hermitian matrix +* A, packed columnwise in a linear array, except if FACT = 'F' +* and EQUED = 'Y', then A must contain the equilibrated matrix +* diag(S)*A*diag(S). The j-th column of A is stored in the +* array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* See below for further details. A is not modified if +* FACT = 'F' or 'N', or if FACT = 'E' and EQUED = 'N' on exit. +* +* On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by +* diag(S)*A*diag(S). +* +* AFP (input or output) COMPLEX*16 array, dimension (N*(N+1)/2) +* If FACT = 'F', then AFP is an input argument and on entry +* contains the triangular factor U or L from the Cholesky +* factorization A = U**H*U or A = L*L**H, in the same storage +* format as A. If EQUED .ne. 'N', then AFP is the factored +* form of the equilibrated matrix A. +* +* If FACT = 'N', then AFP is an output argument and on exit +* returns the triangular factor U or L from the Cholesky +* factorization A = U**H*U or A = L*L**H of the original +* matrix A. +* +* If FACT = 'E', then AFP is an output argument and on exit +* returns the triangular factor U or L from the Cholesky +* factorization A = U**H*U or A = L*L**H of the equilibrated +* matrix A (see the description of AP for the form of the +* equilibrated matrix). +* +* EQUED (input or output) CHARACTER*1 +* Specifies the form of equilibration that was done. +* = 'N': No equilibration (always true if FACT = 'N'). +* = 'Y': Equilibration was done, i.e., A has been replaced by +* diag(S) * A * diag(S). +* EQUED is an input argument if FACT = 'F'; otherwise, it is an +* output argument. +* +* S (input or output) DOUBLE PRECISION array, dimension (N) +* The scale factors for A; not accessed if EQUED = 'N'. S is +* an input argument if FACT = 'F'; otherwise, S is an output +* argument. If FACT = 'F' and EQUED = 'Y', each element of S +* must be positive. +* +* B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y', +* B is overwritten by diag(S) * B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (output) COMPLEX*16 array, dimension (LDX,NRHS) +* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to +* the original system of equations. Note that if EQUED = 'Y', +* A and B are modified on exit, and the solution to the +* equilibrated system is inv(diag(S))*X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* RCOND (output) DOUBLE PRECISION +* The estimate of the reciprocal condition number of the matrix +* A after equilibration (if done). If RCOND is less than the +* machine precision (in particular, if RCOND = 0), the matrix +* is singular to working precision. This condition is +* indicated by a return code of INFO > 0. +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is +* <= N: the leading minor of order i of A is +* not positive definite, so the factorization +* could not be completed, and the solution has not +* been computed. RCOND = 0 is returned. +* = N+1: U is nonsingular, but RCOND is less than machine +* precision, meaning that the matrix is singular +* to working precision. Nevertheless, the +* solution and error bounds are computed because +* there are a number of situations where the +* computed solution can be more accurate than the +* value of RCOND would suggest. +* +* Further Details +* =============== +* +* The packed storage scheme is illustrated by the following example +* when N = 4, UPLO = 'U': +* +* Two-dimensional storage of the Hermitian matrix A: +* +* a11 a12 a13 a14 +* a22 a23 a24 +* a33 a34 (aij = conjg(aji)) +* a44 +* +* Packed storage of the upper triangle of A: +* +* AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL EQUIL, NOFACT, RCEQU + INTEGER I, INFEQU, J + DOUBLE PRECISION AMAX, ANORM, BIGNUM, SCOND, SMAX, SMIN, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANHP + EXTERNAL LSAME, DLAMCH, ZLANHP +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZCOPY, ZLACPY, ZLAQHP, ZPPCON, ZPPEQU, + $ ZPPRFS, ZPPTRF, ZPPTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + RCEQU = .FALSE. + ELSE + RCEQU = LSAME( EQUED, 'Y' ) + SMLNUM = DLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + END IF +* +* Test the input parameters. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) ) + $ THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -7 + ELSE + IF( RCEQU ) THEN + SMIN = BIGNUM + SMAX = ZERO + DO 10 J = 1, N + SMIN = MIN( SMIN, S( J ) ) + SMAX = MAX( SMAX, S( J ) ) + 10 CONTINUE + IF( SMIN.LE.ZERO ) THEN + INFO = -8 + ELSE IF( N.GT.0 ) THEN + SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) + ELSE + SCOND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPPSVX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL ZPPEQU( UPLO, N, AP, S, SCOND, AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL ZLAQHP( UPLO, N, AP, S, SCOND, AMAX, EQUED ) + RCEQU = LSAME( EQUED, 'Y' ) + END IF + END IF +* +* Scale the right-hand side. +* + IF( RCEQU ) THEN + DO 30 J = 1, NRHS + DO 20 I = 1, N + B( I, J ) = S( I )*B( I, J ) + 20 CONTINUE + 30 CONTINUE + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the Cholesky factorization A = U'*U or A = L*L'. +* + CALL ZCOPY( N*( N+1 ) / 2, AP, 1, AFP, 1 ) + CALL ZPPTRF( UPLO, N, AFP, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN + IF( INFO.GT.0 ) + $ RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = ZLANHP( 'I', UPLO, N, AP, RWORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL ZPPCON( UPLO, N, AFP, ANORM, RCOND, WORK, RWORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.DLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* +* Compute the solution matrix X. +* + CALL ZLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL ZPPTRS( UPLO, N, NRHS, AFP, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL ZPPRFS( UPLO, N, NRHS, AP, AFP, B, LDB, X, LDX, FERR, BERR, + $ WORK, RWORK, INFO ) +* +* Transform the solution matrix X to a solution of the original +* system. +* + IF( RCEQU ) THEN + DO 50 J = 1, NRHS + DO 40 I = 1, N + X( I, J ) = S( I )*X( I, J ) + 40 CONTINUE + 50 CONTINUE + DO 60 J = 1, NRHS + FERR( J ) = FERR( J ) / SCOND + 60 CONTINUE + END IF +* + RETURN +* +* End of ZPPSVX +* + END diff --git a/costa/native/external/lapack/zpptrf.f b/costa/native/external/lapack/zpptrf.f new file mode 100644 index 000000000..8f7bb0c42 --- /dev/null +++ b/costa/native/external/lapack/zpptrf.f @@ -0,0 +1,179 @@ + SUBROUTINE ZPPTRF( UPLO, N, AP, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + COMPLEX*16 AP( * ) +* .. +* +* Purpose +* ======= +* +* ZPPTRF computes the Cholesky factorization of a complex Hermitian +* positive definite matrix A stored in packed format. +* +* The factorization has the form +* A = U**H * U, if UPLO = 'U', or +* A = L * L**H, if UPLO = 'L', +* where U is an upper triangular matrix and L is lower triangular. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) COMPLEX*16 array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the Hermitian matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* See below for further details. +* +* On exit, if INFO = 0, the triangular factor U or L from the +* Cholesky factorization A = U**H*U or A = L*L**H, in the same +* storage format as A. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the leading minor of order i is not +* positive definite, and the factorization could not be +* completed. +* +* Further Details +* =============== +* +* The packed storage scheme is illustrated by the following example +* when N = 4, UPLO = 'U': +* +* Two-dimensional storage of the Hermitian matrix A: +* +* a11 a12 a13 a14 +* a22 a23 a24 +* a33 a34 (aij = conjg(aji)) +* a44 +* +* Packed storage of the upper triangle of A: +* +* AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, JC, JJ + DOUBLE PRECISION AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX*16 ZDOTC + EXTERNAL LSAME, ZDOTC +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDSCAL, ZHPR, ZTPSV +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPPTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U'*U. +* + JJ = 0 + DO 10 J = 1, N + JC = JJ + 1 + JJ = JJ + J +* +* Compute elements 1:J-1 of column J. +* + IF( J.GT.1 ) + $ CALL ZTPSV( 'Upper', 'Conjugate transpose', 'Non-unit', + $ J-1, AP, AP( JC ), 1 ) +* +* Compute U(J,J) and test for non-positive-definiteness. +* + AJJ = DBLE( AP( JJ ) ) - ZDOTC( J-1, AP( JC ), 1, AP( JC ), + $ 1 ) + IF( AJJ.LE.ZERO ) THEN + AP( JJ ) = AJJ + GO TO 30 + END IF + AP( JJ ) = SQRT( AJJ ) + 10 CONTINUE + ELSE +* +* Compute the Cholesky factorization A = L*L'. +* + JJ = 1 + DO 20 J = 1, N +* +* Compute L(J,J) and test for non-positive-definiteness. +* + AJJ = DBLE( AP( JJ ) ) + IF( AJJ.LE.ZERO ) THEN + AP( JJ ) = AJJ + GO TO 30 + END IF + AJJ = SQRT( AJJ ) + AP( JJ ) = AJJ +* +* Compute elements J+1:N of column J and update the trailing +* submatrix. +* + IF( J.LT.N ) THEN + CALL ZDSCAL( N-J, ONE / AJJ, AP( JJ+1 ), 1 ) + CALL ZHPR( 'Lower', N-J, -ONE, AP( JJ+1 ), 1, + $ AP( JJ+N-J+1 ) ) + JJ = JJ + N - J + 1 + END IF + 20 CONTINUE + END IF + GO TO 40 +* + 30 CONTINUE + INFO = J +* + 40 CONTINUE + RETURN +* +* End of ZPPTRF +* + END diff --git a/costa/native/external/lapack/zpptri.f b/costa/native/external/lapack/zpptri.f new file mode 100644 index 000000000..4e82f2b9e --- /dev/null +++ b/costa/native/external/lapack/zpptri.f @@ -0,0 +1,131 @@ + SUBROUTINE ZPPTRI( UPLO, N, AP, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + COMPLEX*16 AP( * ) +* .. +* +* Purpose +* ======= +* +* ZPPTRI computes the inverse of a complex Hermitian positive definite +* matrix A using the Cholesky factorization A = U**H*U or A = L*L**H +* computed by ZPPTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangular factor is stored in AP; +* = 'L': Lower triangular factor is stored in AP. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) COMPLEX*16 array, dimension (N*(N+1)/2) +* On entry, the triangular factor U or L from the Cholesky +* factorization A = U**H*U or A = L*L**H, packed columnwise as +* a linear array. The j-th column of U or L is stored in the +* array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n. +* +* On exit, the upper or lower triangle of the (Hermitian) +* inverse of A, overwriting the input factor U or L. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the (i,i) element of the factor U or L is +* zero, and the inverse could not be computed. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, JC, JJ, JJN + DOUBLE PRECISION AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX*16 ZDOTC + EXTERNAL LSAME, ZDOTC +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDSCAL, ZHPR, ZTPMV, ZTPTRI +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPPTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Invert the triangular Cholesky factor U or L. +* + CALL ZTPTRI( UPLO, 'Non-unit', N, AP, INFO ) + IF( INFO.GT.0 ) + $ RETURN + IF( UPPER ) THEN +* +* Compute the product inv(U) * inv(U)'. +* + JJ = 0 + DO 10 J = 1, N + JC = JJ + 1 + JJ = JJ + J + IF( J.GT.1 ) + $ CALL ZHPR( 'Upper', J-1, ONE, AP( JC ), 1, AP ) + AJJ = AP( JJ ) + CALL ZDSCAL( J, AJJ, AP( JC ), 1 ) + 10 CONTINUE +* + ELSE +* +* Compute the product inv(L)' * inv(L). +* + JJ = 1 + DO 20 J = 1, N + JJN = JJ + N - J + 1 + AP( JJ ) = DBLE( ZDOTC( N-J+1, AP( JJ ), 1, AP( JJ ), 1 ) ) + IF( J.LT.N ) + $ CALL ZTPMV( 'Lower', 'Conjugate transpose', 'Non-unit', + $ N-J, AP( JJN ), AP( JJ+1 ), 1 ) + JJ = JJN + 20 CONTINUE + END IF +* + RETURN +* +* End of ZPPTRI +* + END diff --git a/costa/native/external/lapack/zpptrs.f b/costa/native/external/lapack/zpptrs.f new file mode 100644 index 000000000..6aa056708 --- /dev/null +++ b/costa/native/external/lapack/zpptrs.f @@ -0,0 +1,135 @@ + SUBROUTINE ZPPTRS( UPLO, N, NRHS, AP, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX*16 AP( * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* ZPPTRS solves a system of linear equations A*X = B with a Hermitian +* positive definite matrix A in packed storage using the Cholesky +* factorization A = U**H*U or A = L*L**H computed by ZPPTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AP (input) COMPLEX*16 array, dimension (N*(N+1)/2) +* The triangular factor U or L from the Cholesky factorization +* A = U**H*U or A = L*L**H, packed columnwise in a linear +* array. The j-th column of U or L is stored in the array AP +* as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n. +* +* B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZTPSV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPPTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B where A = U'*U. +* + DO 10 I = 1, NRHS +* +* Solve U'*X = B, overwriting B with X. +* + CALL ZTPSV( 'Upper', 'Conjugate transpose', 'Non-unit', N, + $ AP, B( 1, I ), 1 ) +* +* Solve U*X = B, overwriting B with X. +* + CALL ZTPSV( 'Upper', 'No transpose', 'Non-unit', N, AP, + $ B( 1, I ), 1 ) + 10 CONTINUE + ELSE +* +* Solve A*X = B where A = L*L'. +* + DO 20 I = 1, NRHS +* +* Solve L*Y = B, overwriting B with X. +* + CALL ZTPSV( 'Lower', 'No transpose', 'Non-unit', N, AP, + $ B( 1, I ), 1 ) +* +* Solve L'*X = Y, overwriting B with X. +* + CALL ZTPSV( 'Lower', 'Conjugate transpose', 'Non-unit', N, + $ AP, B( 1, I ), 1 ) + 20 CONTINUE + END IF +* + RETURN +* +* End of ZPPTRS +* + END diff --git a/costa/native/external/lapack/zptcon.f b/costa/native/external/lapack/zptcon.f new file mode 100644 index 000000000..8905e07f8 --- /dev/null +++ b/costa/native/external/lapack/zptcon.f @@ -0,0 +1,151 @@ + SUBROUTINE ZPTCON( N, D, E, ANORM, RCOND, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), RWORK( * ) + COMPLEX*16 E( * ) +* .. +* +* Purpose +* ======= +* +* ZPTCON computes the reciprocal of the condition number (in the +* 1-norm) of a complex Hermitian positive definite tridiagonal matrix +* using the factorization A = L*D*L**H or A = U**H*D*U computed by +* ZPTTRF. +* +* Norm(inv(A)) is computed by a direct method, and the reciprocal of +* the condition number is computed as +* RCOND = 1 / (ANORM * norm(inv(A))). +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* D (input) DOUBLE PRECISION array, dimension (N) +* The n diagonal elements of the diagonal matrix D from the +* factorization of A, as computed by ZPTTRF. +* +* E (input) COMPLEX*16 array, dimension (N-1) +* The (n-1) off-diagonal elements of the unit bidiagonal factor +* U or L from the factorization of A, as computed by ZPTTRF. +* +* ANORM (input) DOUBLE PRECISION +* The 1-norm of the original matrix A. +* +* RCOND (output) DOUBLE PRECISION +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is the +* 1-norm of inv(A) computed in this routine. +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The method used is described in Nicholas J. Higham, "Efficient +* Algorithms for Computing the Condition Number of a Tridiagonal +* Matrix", SIAM J. Sci. Stat. Comput., Vol. 7, No. 1, January 1986. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IX + DOUBLE PRECISION AINVNM +* .. +* .. External Functions .. + INTEGER IDAMAX + EXTERNAL IDAMAX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPTCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* +* Check that D(1:N) is positive. +* + DO 10 I = 1, N + IF( D( I ).LE.ZERO ) + $ RETURN + 10 CONTINUE +* +* Solve M(A) * x = e, where M(A) = (m(i,j)) is given by +* +* m(i,j) = abs(A(i,j)), i = j, +* m(i,j) = -abs(A(i,j)), i .ne. j, +* +* and e = [ 1, 1, ..., 1 ]'. Note M(A) = M(L)*D*M(L)'. +* +* Solve M(L) * x = e. +* + RWORK( 1 ) = ONE + DO 20 I = 2, N + RWORK( I ) = ONE + RWORK( I-1 )*ABS( E( I-1 ) ) + 20 CONTINUE +* +* Solve D * M(L)' * x = b. +* + RWORK( N ) = RWORK( N ) / D( N ) + DO 30 I = N - 1, 1, -1 + RWORK( I ) = RWORK( I ) / D( I ) + RWORK( I+1 )*ABS( E( I ) ) + 30 CONTINUE +* +* Compute AINVNM = max(x(i)), 1<=i<=n. +* + IX = IDAMAX( N, RWORK, 1 ) + AINVNM = ABS( RWORK( IX ) ) +* +* Compute the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of ZPTCON +* + END diff --git a/costa/native/external/lapack/zpteqr.f b/costa/native/external/lapack/zpteqr.f new file mode 100644 index 000000000..19a305943 --- /dev/null +++ b/costa/native/external/lapack/zpteqr.f @@ -0,0 +1,191 @@ + SUBROUTINE ZPTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + CHARACTER COMPZ + INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ), WORK( * ) + COMPLEX*16 Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* ZPTEQR computes all eigenvalues and, optionally, eigenvectors of a +* symmetric positive definite tridiagonal matrix by first factoring the +* matrix using DPTTRF and then calling ZBDSQR to compute the singular +* values of the bidiagonal factor. +* +* This routine computes the eigenvalues of the positive definite +* tridiagonal matrix to high relative accuracy. This means that if the +* eigenvalues range over many orders of magnitude in size, then the +* small eigenvalues and corresponding eigenvectors will be computed +* more accurately than, for example, with the standard QR method. +* +* The eigenvectors of a full or band positive definite Hermitian matrix +* can also be found if ZHETRD, ZHPTRD, or ZHBTRD has been used to +* reduce this matrix to tridiagonal form. (The reduction to +* tridiagonal form, however, may preclude the possibility of obtaining +* high relative accuracy in the small eigenvalues of the original +* matrix, if these eigenvalues range over many orders of magnitude.) +* +* Arguments +* ========= +* +* COMPZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only. +* = 'V': Compute eigenvectors of original Hermitian +* matrix also. Array Z contains the unitary matrix +* used to reduce the original matrix to tridiagonal +* form. +* = 'I': Compute eigenvectors of tridiagonal matrix also. +* +* N (input) INTEGER +* The order of the matrix. N >= 0. +* +* D (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the n diagonal elements of the tridiagonal matrix. +* On normal exit, D contains the eigenvalues, in descending +* order. +* +* E (input/output) DOUBLE PRECISION array, dimension (N-1) +* On entry, the (n-1) subdiagonal elements of the tridiagonal +* matrix. +* On exit, E has been destroyed. +* +* Z (input/output) COMPLEX*16 array, dimension (LDZ, N) +* On entry, if COMPZ = 'V', the unitary matrix used in the +* reduction to tridiagonal form. +* On exit, if COMPZ = 'V', the orthonormal eigenvectors of the +* original Hermitian matrix; +* if COMPZ = 'I', the orthonormal eigenvectors of the +* tridiagonal matrix. +* If INFO > 0 on exit, Z contains the eigenvectors associated +* with only the stored eigenvalues. +* If COMPZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* COMPZ = 'V' or 'I', LDZ >= max(1,N). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: if INFO = i, and i is: +* <= N the Cholesky factorization of the matrix could +* not be performed because the i-th principal minor +* was not positive definite. +* > N the SVD algorithm failed to converge; +* if INFO = N+i, i off-diagonal elements of the +* bidiagonal factor did not converge to zero. +* +* ==================================================================== +* +* .. Parameters .. + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DPTTRF, XERBLA, ZBDSQR, ZLASET +* .. +* .. Local Arrays .. + COMPLEX*16 C( 1, 1 ), VT( 1, 1 ) +* .. +* .. Local Scalars .. + INTEGER I, ICOMPZ, NRU +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ICOMPZ = 0 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ICOMPZ = 2 + ELSE + ICOMPZ = -1 + END IF + IF( ICOMPZ.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ( LDZ.LT.1 ) .OR. ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1, + $ N ) ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPTEQR', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ICOMPZ.GT.0 ) + $ Z( 1, 1 ) = CONE + RETURN + END IF + IF( ICOMPZ.EQ.2 ) + $ CALL ZLASET( 'Full', N, N, CZERO, CONE, Z, LDZ ) +* +* Call DPTTRF to factor the matrix. +* + CALL DPTTRF( N, D, E, INFO ) + IF( INFO.NE.0 ) + $ RETURN + DO 10 I = 1, N + D( I ) = SQRT( D( I ) ) + 10 CONTINUE + DO 20 I = 1, N - 1 + E( I ) = E( I )*D( I ) + 20 CONTINUE +* +* Call ZBDSQR to compute the singular values/vectors of the +* bidiagonal factor. +* + IF( ICOMPZ.GT.0 ) THEN + NRU = N + ELSE + NRU = 0 + END IF + CALL ZBDSQR( 'Lower', N, 0, NRU, 0, D, E, VT, 1, Z, LDZ, C, 1, + $ WORK, INFO ) +* +* Square the singular values. +* + IF( INFO.EQ.0 ) THEN + DO 30 I = 1, N + D( I ) = D( I )*D( I ) + 30 CONTINUE + ELSE + INFO = N + INFO + END IF +* + RETURN +* +* End of ZPTEQR +* + END diff --git a/costa/native/external/lapack/zptrfs.f b/costa/native/external/lapack/zptrfs.f new file mode 100644 index 000000000..98381b64e --- /dev/null +++ b/costa/native/external/lapack/zptrfs.f @@ -0,0 +1,367 @@ + SUBROUTINE ZPTRFS( UPLO, N, NRHS, D, E, DF, EF, B, LDB, X, LDX, + $ FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION BERR( * ), D( * ), DF( * ), FERR( * ), + $ RWORK( * ) + COMPLEX*16 B( LDB, * ), E( * ), EF( * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* ZPTRFS improves the computed solution to a system of linear +* equations when the coefficient matrix is Hermitian positive definite +* and tridiagonal, and provides error bounds and backward error +* estimates for the solution. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the superdiagonal or the subdiagonal of the +* tridiagonal matrix A is stored and the form of the +* factorization: +* = 'U': E is the superdiagonal of A, and A = U**H*D*U; +* = 'L': E is the subdiagonal of A, and A = L*D*L**H. +* (The two forms are equivalent if A is real.) +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* D (input) DOUBLE PRECISION array, dimension (N) +* The n real diagonal elements of the tridiagonal matrix A. +* +* E (input) COMPLEX*16 array, dimension (N-1) +* The (n-1) off-diagonal elements of the tridiagonal matrix A +* (see UPLO). +* +* DF (input) DOUBLE PRECISION array, dimension (N) +* The n diagonal elements of the diagonal matrix D from +* the factorization computed by ZPTTRF. +* +* EF (input) COMPLEX*16 array, dimension (N-1) +* The (n-1) off-diagonal elements of the unit bidiagonal +* factor U or L from the factorization computed by ZPTTRF +* (see UPLO). +* +* B (input) COMPLEX*16 array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input/output) COMPLEX*16 array, dimension (LDX,NRHS) +* On entry, the solution matrix X, as computed by ZPTTRS. +* On exit, the improved solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX*16 array, dimension (N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Internal Parameters +* =================== +* +* ITMAX is the maximum number of steps of iterative refinement. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) + DOUBLE PRECISION THREE + PARAMETER ( THREE = 3.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, IX, J, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN + COMPLEX*16 BI, CX, DX, EX, ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, IDAMAX, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZPTTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPTRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = 4 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 100 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X. Also compute +* abs(A)*abs(x) + abs(b) for use in the backward error bound. +* + IF( UPPER ) THEN + IF( N.EQ.1 ) THEN + BI = B( 1, J ) + DX = D( 1 )*X( 1, J ) + WORK( 1 ) = BI - DX + RWORK( 1 ) = CABS1( BI ) + CABS1( DX ) + ELSE + BI = B( 1, J ) + DX = D( 1 )*X( 1, J ) + EX = E( 1 )*X( 2, J ) + WORK( 1 ) = BI - DX - EX + RWORK( 1 ) = CABS1( BI ) + CABS1( DX ) + + $ CABS1( E( 1 ) )*CABS1( X( 2, J ) ) + DO 30 I = 2, N - 1 + BI = B( I, J ) + CX = DCONJG( E( I-1 ) )*X( I-1, J ) + DX = D( I )*X( I, J ) + EX = E( I )*X( I+1, J ) + WORK( I ) = BI - CX - DX - EX + RWORK( I ) = CABS1( BI ) + + $ CABS1( E( I-1 ) )*CABS1( X( I-1, J ) ) + + $ CABS1( DX ) + CABS1( E( I ) )* + $ CABS1( X( I+1, J ) ) + 30 CONTINUE + BI = B( N, J ) + CX = DCONJG( E( N-1 ) )*X( N-1, J ) + DX = D( N )*X( N, J ) + WORK( N ) = BI - CX - DX + RWORK( N ) = CABS1( BI ) + CABS1( E( N-1 ) )* + $ CABS1( X( N-1, J ) ) + CABS1( DX ) + END IF + ELSE + IF( N.EQ.1 ) THEN + BI = B( 1, J ) + DX = D( 1 )*X( 1, J ) + WORK( 1 ) = BI - DX + RWORK( 1 ) = CABS1( BI ) + CABS1( DX ) + ELSE + BI = B( 1, J ) + DX = D( 1 )*X( 1, J ) + EX = DCONJG( E( 1 ) )*X( 2, J ) + WORK( 1 ) = BI - DX - EX + RWORK( 1 ) = CABS1( BI ) + CABS1( DX ) + + $ CABS1( E( 1 ) )*CABS1( X( 2, J ) ) + DO 40 I = 2, N - 1 + BI = B( I, J ) + CX = E( I-1 )*X( I-1, J ) + DX = D( I )*X( I, J ) + EX = DCONJG( E( I ) )*X( I+1, J ) + WORK( I ) = BI - CX - DX - EX + RWORK( I ) = CABS1( BI ) + + $ CABS1( E( I-1 ) )*CABS1( X( I-1, J ) ) + + $ CABS1( DX ) + CABS1( E( I ) )* + $ CABS1( X( I+1, J ) ) + 40 CONTINUE + BI = B( N, J ) + CX = E( N-1 )*X( N-1, J ) + DX = D( N )*X( N, J ) + WORK( N ) = BI - CX - DX + RWORK( N ) = CABS1( BI ) + CABS1( E( N-1 ) )* + $ CABS1( X( N-1, J ) ) + CABS1( DX ) + END IF + END IF +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + S = ZERO + DO 50 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 50 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL ZPTTRS( UPLO, N, 1, DF, EF, WORK, N, INFO ) + CALL ZAXPY( N, DCMPLX( ONE ), WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* + DO 60 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 60 CONTINUE + IX = IDAMAX( N, RWORK, 1 ) + FERR( J ) = RWORK( IX ) +* +* Estimate the norm of inv(A). +* +* Solve M(A) * x = e, where M(A) = (m(i,j)) is given by +* +* m(i,j) = abs(A(i,j)), i = j, +* m(i,j) = -abs(A(i,j)), i .ne. j, +* +* and e = [ 1, 1, ..., 1 ]'. Note M(A) = M(L)*D*M(L)'. +* +* Solve M(L) * x = e. +* + RWORK( 1 ) = ONE + DO 70 I = 2, N + RWORK( I ) = ONE + RWORK( I-1 )*ABS( EF( I-1 ) ) + 70 CONTINUE +* +* Solve D * M(L)' * x = b. +* + RWORK( N ) = RWORK( N ) / DF( N ) + DO 80 I = N - 1, 1, -1 + RWORK( I ) = RWORK( I ) / DF( I ) + + $ RWORK( I+1 )*ABS( EF( I ) ) + 80 CONTINUE +* +* Compute norm(inv(A)) = max(x(i)), 1<=i<=n. +* + IX = IDAMAX( N, RWORK, 1 ) + FERR( J ) = FERR( J )*ABS( RWORK( IX ) ) +* +* Normalize error. +* + LSTRES = ZERO + DO 90 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 90 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 100 CONTINUE +* + RETURN +* +* End of ZPTRFS +* + END diff --git a/costa/native/external/lapack/zptsv.f b/costa/native/external/lapack/zptsv.f new file mode 100644 index 000000000..2949d70e4 --- /dev/null +++ b/costa/native/external/lapack/zptsv.f @@ -0,0 +1,101 @@ + SUBROUTINE ZPTSV( N, NRHS, D, E, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 25, 1997 +* +* .. Scalar Arguments .. + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ) + COMPLEX*16 B( LDB, * ), E( * ) +* .. +* +* Purpose +* ======= +* +* ZPTSV computes the solution to a complex system of linear equations +* A*X = B, where A is an N-by-N Hermitian positive definite tridiagonal +* matrix, and X and B are N-by-NRHS matrices. +* +* A is factored as A = L*D*L**H, and the factored form of A is then +* used to solve the system of equations. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* D (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the n diagonal elements of the tridiagonal matrix +* A. On exit, the n diagonal elements of the diagonal matrix +* D from the factorization A = L*D*L**H. +* +* E (input/output) COMPLEX*16 array, dimension (N-1) +* On entry, the (n-1) subdiagonal elements of the tridiagonal +* matrix A. On exit, the (n-1) subdiagonal elements of the +* unit bidiagonal factor L from the L*D*L**H factorization of +* A. E can also be regarded as the superdiagonal of the unit +* bidiagonal factor U from the U**H*D*U factorization of A. +* +* B (input/output) COMPLEX*16 array, dimension (LDB,N) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the leading minor of order i is not +* positive definite, and the solution has not been +* computed. The factorization has not been completed +* unless i = N. +* +* ===================================================================== +* +* .. External Subroutines .. + EXTERNAL XERBLA, ZPTTRF, ZPTTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPTSV ', -INFO ) + RETURN + END IF +* +* Compute the L*D*L' (or U'*D*U) factorization of A. +* + CALL ZPTTRF( N, D, E, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL ZPTTRS( 'Lower', N, NRHS, D, E, B, LDB, INFO ) + END IF + RETURN +* +* End of ZPTSV +* + END diff --git a/costa/native/external/lapack/zptsvx.f b/costa/native/external/lapack/zptsvx.f new file mode 100644 index 000000000..eeb5bcb1f --- /dev/null +++ b/costa/native/external/lapack/zptsvx.f @@ -0,0 +1,238 @@ + SUBROUTINE ZPTSVX( FACT, N, NRHS, D, E, DF, EF, B, LDB, X, LDX, + $ RCOND, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER FACT + INTEGER INFO, LDB, LDX, N, NRHS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION BERR( * ), D( * ), DF( * ), FERR( * ), + $ RWORK( * ) + COMPLEX*16 B( LDB, * ), E( * ), EF( * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* ZPTSVX uses the factorization A = L*D*L**H to compute the solution +* to a complex system of linear equations A*X = B, where A is an +* N-by-N Hermitian positive definite tridiagonal matrix and X and B +* are N-by-NRHS matrices. +* +* Error bounds on the solution and a condition estimate are also +* provided. +* +* Description +* =========== +* +* The following steps are performed: +* +* 1. If FACT = 'N', the matrix A is factored as A = L*D*L**H, where L +* is a unit lower bidiagonal matrix and D is diagonal. The +* factorization can also be regarded as having the form +* A = U**H*D*U. +* +* 2. If the leading i-by-i principal minor is not positive definite, +* then the routine returns with INFO = i. Otherwise, the factored +* form of A is used to estimate the condition number of the matrix +* A. If the reciprocal of the condition number is less than machine +* precision, INFO = N+1 is returned as a warning, but the routine +* still goes on to solve for X and compute error bounds as +* described below. +* +* 3. The system of equations is solved for X using the factored form +* of A. +* +* 4. Iterative refinement is applied to improve the computed solution +* matrix and calculate error bounds and backward error estimates +* for it. +* +* Arguments +* ========= +* +* FACT (input) CHARACTER*1 +* Specifies whether or not the factored form of the matrix +* A is supplied on entry. +* = 'F': On entry, DF and EF contain the factored form of A. +* D, E, DF, and EF will not be modified. +* = 'N': The matrix A will be copied to DF and EF and +* factored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* D (input) DOUBLE PRECISION array, dimension (N) +* The n diagonal elements of the tridiagonal matrix A. +* +* E (input) COMPLEX*16 array, dimension (N-1) +* The (n-1) subdiagonal elements of the tridiagonal matrix A. +* +* DF (input or output) DOUBLE PRECISION array, dimension (N) +* If FACT = 'F', then DF is an input argument and on entry +* contains the n diagonal elements of the diagonal matrix D +* from the L*D*L**H factorization of A. +* If FACT = 'N', then DF is an output argument and on exit +* contains the n diagonal elements of the diagonal matrix D +* from the L*D*L**H factorization of A. +* +* EF (input or output) COMPLEX*16 array, dimension (N-1) +* If FACT = 'F', then EF is an input argument and on entry +* contains the (n-1) subdiagonal elements of the unit +* bidiagonal factor L from the L*D*L**H factorization of A. +* If FACT = 'N', then EF is an output argument and on exit +* contains the (n-1) subdiagonal elements of the unit +* bidiagonal factor L from the L*D*L**H factorization of A. +* +* B (input) COMPLEX*16 array, dimension (LDB,NRHS) +* The N-by-NRHS right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (output) COMPLEX*16 array, dimension (LDX,NRHS) +* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* RCOND (output) DOUBLE PRECISION +* The reciprocal condition number of the matrix A. If RCOND +* is less than the machine precision (in particular, if +* RCOND = 0), the matrix is singular to working precision. +* This condition is indicated by a return code of INFO > 0. +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in any +* element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX*16 array, dimension (N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is +* <= N: the leading minor of order i of A is +* not positive definite, so the factorization +* could not be completed, and the solution has not +* been computed. RCOND = 0 is returned. +* = N+1: U is nonsingular, but RCOND is less than machine +* precision, meaning that the matrix is singular +* to working precision. Nevertheless, the +* solution and error bounds are computed because +* there are a number of situations where the +* computed solution can be more accurate than the +* value of RCOND would suggest. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOFACT + DOUBLE PRECISION ANORM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANHT + EXTERNAL LSAME, DLAMCH, ZLANHT +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, XERBLA, ZCOPY, ZLACPY, ZPTCON, ZPTRFS, + $ ZPTTRF, ZPTTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPTSVX', -INFO ) + RETURN + END IF +* + IF( NOFACT ) THEN +* +* Compute the L*D*L' (or U'*D*U) factorization of A. +* + CALL DCOPY( N, D, 1, DF, 1 ) + IF( N.GT.1 ) + $ CALL ZCOPY( N-1, E, 1, EF, 1 ) + CALL ZPTTRF( N, DF, EF, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN + IF( INFO.GT.0 ) + $ RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = ZLANHT( '1', N, D, E ) +* +* Compute the reciprocal of the condition number of A. +* + CALL ZPTCON( N, DF, EF, ANORM, RCOND, RWORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.DLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* +* Compute the solution vectors X. +* + CALL ZLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL ZPTTRS( 'Lower', N, NRHS, DF, EF, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solutions and +* compute error bounds and backward error estimates for them. +* + CALL ZPTRFS( 'Lower', N, NRHS, D, E, DF, EF, B, LDB, X, LDX, FERR, + $ BERR, WORK, RWORK, INFO ) +* + RETURN +* +* End of ZPTSVX +* + END diff --git a/costa/native/external/lapack/zpttrf.f b/costa/native/external/lapack/zpttrf.f new file mode 100644 index 000000000..92ddc9871 --- /dev/null +++ b/costa/native/external/lapack/zpttrf.f @@ -0,0 +1,169 @@ + SUBROUTINE ZPTTRF( N, D, E, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ) + COMPLEX*16 E( * ) +* .. +* +* Purpose +* ======= +* +* ZPTTRF computes the L*D*L' factorization of a complex Hermitian +* positive definite tridiagonal matrix A. The factorization may also +* be regarded as having the form A = U'*D*U. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* D (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the n diagonal elements of the tridiagonal matrix +* A. On exit, the n diagonal elements of the diagonal matrix +* D from the L*D*L' factorization of A. +* +* E (input/output) COMPLEX*16 array, dimension (N-1) +* On entry, the (n-1) subdiagonal elements of the tridiagonal +* matrix A. On exit, the (n-1) subdiagonal elements of the +* unit bidiagonal factor L from the L*D*L' factorization of A. +* E can also be regarded as the superdiagonal of the unit +* bidiagonal factor U from the U'*D*U factorization of A. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* > 0: if INFO = k, the leading minor of order k is not +* positive definite; if k < N, the factorization could not +* be completed, while if k = N, the factorization was +* completed, but D(N) = 0. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, I4 + DOUBLE PRECISION EII, EIR, F, G +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCMPLX, DIMAG, MOD +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + CALL XERBLA( 'ZPTTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Compute the L*D*L' (or U'*D*U) factorization of A. +* + I4 = MOD( N-1, 4 ) + DO 10 I = 1, I4 + IF( D( I ).LE.ZERO ) THEN + INFO = I + GO TO 30 + END IF + EIR = DBLE( E( I ) ) + EII = DIMAG( E( I ) ) + F = EIR / D( I ) + G = EII / D( I ) + E( I ) = DCMPLX( F, G ) + D( I+1 ) = D( I+1 ) - F*EIR - G*EII + 10 CONTINUE +* + DO 20 I = I4 + 1, N - 4, 4 +* +* Drop out of the loop if d(i) <= 0: the matrix is not positive +* definite. +* + IF( D( I ).LE.ZERO ) THEN + INFO = I + GO TO 30 + END IF +* +* Solve for e(i) and d(i+1). +* + EIR = DBLE( E( I ) ) + EII = DIMAG( E( I ) ) + F = EIR / D( I ) + G = EII / D( I ) + E( I ) = DCMPLX( F, G ) + D( I+1 ) = D( I+1 ) - F*EIR - G*EII +* + IF( D( I+1 ).LE.ZERO ) THEN + INFO = I + 1 + GO TO 30 + END IF +* +* Solve for e(i+1) and d(i+2). +* + EIR = DBLE( E( I+1 ) ) + EII = DIMAG( E( I+1 ) ) + F = EIR / D( I+1 ) + G = EII / D( I+1 ) + E( I+1 ) = DCMPLX( F, G ) + D( I+2 ) = D( I+2 ) - F*EIR - G*EII +* + IF( D( I+2 ).LE.ZERO ) THEN + INFO = I + 2 + GO TO 30 + END IF +* +* Solve for e(i+2) and d(i+3). +* + EIR = DBLE( E( I+2 ) ) + EII = DIMAG( E( I+2 ) ) + F = EIR / D( I+2 ) + G = EII / D( I+2 ) + E( I+2 ) = DCMPLX( F, G ) + D( I+3 ) = D( I+3 ) - F*EIR - G*EII +* + IF( D( I+3 ).LE.ZERO ) THEN + INFO = I + 3 + GO TO 30 + END IF +* +* Solve for e(i+3) and d(i+4). +* + EIR = DBLE( E( I+3 ) ) + EII = DIMAG( E( I+3 ) ) + F = EIR / D( I+3 ) + G = EII / D( I+3 ) + E( I+3 ) = DCMPLX( F, G ) + D( I+4 ) = D( I+4 ) - F*EIR - G*EII + 20 CONTINUE +* +* Check d(n) for positive definiteness. +* + IF( D( N ).LE.ZERO ) + $ INFO = N +* + 30 CONTINUE + RETURN +* +* End of ZPTTRF +* + END diff --git a/costa/native/external/lapack/zpttrs.f b/costa/native/external/lapack/zpttrs.f new file mode 100644 index 000000000..2dbbe2e7e --- /dev/null +++ b/costa/native/external/lapack/zpttrs.f @@ -0,0 +1,136 @@ + SUBROUTINE ZPTTRS( UPLO, N, NRHS, D, E, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ) + COMPLEX*16 B( LDB, * ), E( * ) +* .. +* +* Purpose +* ======= +* +* ZPTTRS solves a tridiagonal system of the form +* A * X = B +* using the factorization A = U'*D*U or A = L*D*L' computed by ZPTTRF. +* D is a diagonal matrix specified in the vector D, U (or L) is a unit +* bidiagonal matrix whose superdiagonal (subdiagonal) is specified in +* the vector E, and X and B are N by NRHS matrices. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies the form of the factorization and whether the +* vector E is the superdiagonal of the upper bidiagonal factor +* U or the subdiagonal of the lower bidiagonal factor L. +* = 'U': A = U'*D*U, E is the superdiagonal of U +* = 'L': A = L*D*L', E is the subdiagonal of L +* +* N (input) INTEGER +* The order of the tridiagonal matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* D (input) DOUBLE PRECISION array, dimension (N) +* The n diagonal elements of the diagonal matrix D from the +* factorization A = U'*D*U or A = L*D*L'. +* +* E (input) COMPLEX*16 array, dimension (N-1) +* If UPLO = 'U', the (n-1) superdiagonal elements of the unit +* bidiagonal factor U from the factorization A = U'*D*U. +* If UPLO = 'L', the (n-1) subdiagonal elements of the unit +* bidiagonal factor L from the factorization A = L*D*L'. +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) +* On entry, the right hand side vectors B for the system of +* linear equations. +* On exit, the solution vectors, X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER + INTEGER IUPLO, J, JB, NB +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZPTTS2 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + UPPER = ( UPLO.EQ.'U' .OR. UPLO.EQ.'u' ) + IF( .NOT.UPPER .AND. .NOT.( UPLO.EQ.'L' .OR. UPLO.EQ.'l' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPTTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* Determine the number of right-hand sides to solve at a time. +* + IF( NRHS.EQ.1 ) THEN + NB = 1 + ELSE + NB = MAX( 1, ILAENV( 1, 'ZPTTRS', UPLO, N, NRHS, -1, -1 ) ) + END IF +* +* Decode UPLO +* + IF( UPPER ) THEN + IUPLO = 1 + ELSE + IUPLO = 0 + END IF +* + IF( NB.GE.NRHS ) THEN + CALL ZPTTS2( IUPLO, N, NRHS, D, E, B, LDB ) + ELSE + DO 10 J = 1, NRHS, NB + JB = MIN( NRHS-J+1, NB ) + CALL ZPTTS2( IUPLO, N, JB, D, E, B( 1, J ), LDB ) + 10 CONTINUE + END IF +* + RETURN +* +* End of ZPTTRS +* + END diff --git a/costa/native/external/lapack/zptts2.f b/costa/native/external/lapack/zptts2.f new file mode 100644 index 000000000..4d0d38df3 --- /dev/null +++ b/costa/native/external/lapack/zptts2.f @@ -0,0 +1,177 @@ + SUBROUTINE ZPTTS2( IUPLO, N, NRHS, D, E, B, LDB ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER IUPLO, LDB, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ) + COMPLEX*16 B( LDB, * ), E( * ) +* .. +* +* Purpose +* ======= +* +* ZPTTS2 solves a tridiagonal system of the form +* A * X = B +* using the factorization A = U'*D*U or A = L*D*L' computed by ZPTTRF. +* D is a diagonal matrix specified in the vector D, U (or L) is a unit +* bidiagonal matrix whose superdiagonal (subdiagonal) is specified in +* the vector E, and X and B are N by NRHS matrices. +* +* Arguments +* ========= +* +* IUPLO (input) INTEGER +* Specifies the form of the factorization and whether the +* vector E is the superdiagonal of the upper bidiagonal factor +* U or the subdiagonal of the lower bidiagonal factor L. +* = 1: A = U'*D*U, E is the superdiagonal of U +* = 0: A = L*D*L', E is the subdiagonal of L +* +* N (input) INTEGER +* The order of the tridiagonal matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* D (input) DOUBLE PRECISION array, dimension (N) +* The n diagonal elements of the diagonal matrix D from the +* factorization A = U'*D*U or A = L*D*L'. +* +* E (input) COMPLEX*16 array, dimension (N-1) +* If IUPLO = 1, the (n-1) superdiagonal elements of the unit +* bidiagonal factor U from the factorization A = U'*D*U. +* If IUPLO = 0, the (n-1) subdiagonal elements of the unit +* bidiagonal factor L from the factorization A = L*D*L'. +* +* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) +* On entry, the right hand side vectors B for the system of +* linear equations. +* On exit, the solution vectors, X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Subroutines .. + EXTERNAL ZDSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.1 ) THEN + IF( N.EQ.1 ) + $ CALL ZDSCAL( NRHS, 1.D0 / D( 1 ), B, LDB ) + RETURN + END IF +* + IF( IUPLO.EQ.1 ) THEN +* +* Solve A * X = B using the factorization A = U'*D*U, +* overwriting each right hand side vector with its solution. +* + IF( NRHS.LE.2 ) THEN + J = 1 + 10 CONTINUE +* +* Solve U' * x = b. +* + DO 20 I = 2, N + B( I, J ) = B( I, J ) - B( I-1, J )*DCONJG( E( I-1 ) ) + 20 CONTINUE +* +* Solve D * U * x = b. +* + DO 30 I = 1, N + B( I, J ) = B( I, J ) / D( I ) + 30 CONTINUE + DO 40 I = N - 1, 1, -1 + B( I, J ) = B( I, J ) - B( I+1, J )*E( I ) + 40 CONTINUE + IF( J.LT.NRHS ) THEN + J = J + 1 + GO TO 10 + END IF + ELSE + DO 70 J = 1, NRHS +* +* Solve U' * x = b. +* + DO 50 I = 2, N + B( I, J ) = B( I, J ) - B( I-1, J )*DCONJG( E( I-1 ) ) + 50 CONTINUE +* +* Solve D * U * x = b. +* + B( N, J ) = B( N, J ) / D( N ) + DO 60 I = N - 1, 1, -1 + B( I, J ) = B( I, J ) / D( I ) - B( I+1, J )*E( I ) + 60 CONTINUE + 70 CONTINUE + END IF + ELSE +* +* Solve A * X = B using the factorization A = L*D*L', +* overwriting each right hand side vector with its solution. +* + IF( NRHS.LE.2 ) THEN + J = 1 + 80 CONTINUE +* +* Solve L * x = b. +* + DO 90 I = 2, N + B( I, J ) = B( I, J ) - B( I-1, J )*E( I-1 ) + 90 CONTINUE +* +* Solve D * L' * x = b. +* + DO 100 I = 1, N + B( I, J ) = B( I, J ) / D( I ) + 100 CONTINUE + DO 110 I = N - 1, 1, -1 + B( I, J ) = B( I, J ) - B( I+1, J )*DCONJG( E( I ) ) + 110 CONTINUE + IF( J.LT.NRHS ) THEN + J = J + 1 + GO TO 80 + END IF + ELSE + DO 140 J = 1, NRHS +* +* Solve L * x = b. +* + DO 120 I = 2, N + B( I, J ) = B( I, J ) - B( I-1, J )*E( I-1 ) + 120 CONTINUE +* +* Solve D * L' * x = b. +* + B( N, J ) = B( N, J ) / D( N ) + DO 130 I = N - 1, 1, -1 + B( I, J ) = B( I, J ) / D( I ) - + $ B( I+1, J )*DCONJG( E( I ) ) + 130 CONTINUE + 140 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZPTTS2 +* + END diff --git a/costa/native/external/lapack/zrot.f b/costa/native/external/lapack/zrot.f new file mode 100644 index 000000000..993fc0dd9 --- /dev/null +++ b/costa/native/external/lapack/zrot.f @@ -0,0 +1,92 @@ + SUBROUTINE ZROT( N, CX, INCX, CY, INCY, C, S ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + INTEGER INCX, INCY, N + DOUBLE PRECISION C + COMPLEX*16 S +* .. +* .. Array Arguments .. + COMPLEX*16 CX( * ), CY( * ) +* .. +* +* Purpose +* ======= +* +* ZROT applies a plane rotation, where the cos (C) is real and the +* sin (S) is complex, and the vectors CX and CY are complex. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of elements in the vectors CX and CY. +* +* CX (input/output) COMPLEX*16 array, dimension (N) +* On input, the vector X. +* On output, CX is overwritten with C*X + S*Y. +* +* INCX (input) INTEGER +* The increment between successive values of CY. INCX <> 0. +* +* CY (input/output) COMPLEX*16 array, dimension (N) +* On input, the vector Y. +* On output, CY is overwritten with -CONJG(S)*X + C*Y. +* +* INCY (input) INTEGER +* The increment between successive values of CY. INCX <> 0. +* +* C (input) DOUBLE PRECISION +* S (input) COMPLEX*16 +* C and S define a rotation +* [ C S ] +* [ -conjg(S) C ] +* where C*C + S*CONJG(S) = 1.0. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IX, IY + COMPLEX*16 STEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) + $ RETURN + IF( INCX.EQ.1 .AND. INCY.EQ.1 ) + $ GO TO 20 +* +* Code for unequal increments or equal increments not equal to 1 +* + IX = 1 + IY = 1 + IF( INCX.LT.0 ) + $ IX = ( -N+1 )*INCX + 1 + IF( INCY.LT.0 ) + $ IY = ( -N+1 )*INCY + 1 + DO 10 I = 1, N + STEMP = C*CX( IX ) + S*CY( IY ) + CY( IY ) = C*CY( IY ) - DCONJG( S )*CX( IX ) + CX( IX ) = STEMP + IX = IX + INCX + IY = IY + INCY + 10 CONTINUE + RETURN +* +* Code for both increments equal to 1 +* + 20 CONTINUE + DO 30 I = 1, N + STEMP = C*CX( I ) + S*CY( I ) + CY( I ) = C*CY( I ) - DCONJG( S )*CX( I ) + CX( I ) = STEMP + 30 CONTINUE + RETURN + END diff --git a/costa/native/external/lapack/zspcon.f b/costa/native/external/lapack/zspcon.f new file mode 100644 index 000000000..0ff3dce48 --- /dev/null +++ b/costa/native/external/lapack/zspcon.f @@ -0,0 +1,155 @@ + SUBROUTINE ZSPCON( UPLO, N, AP, IPIV, ANORM, RCOND, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 AP( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZSPCON estimates the reciprocal of the condition number (in the +* 1-norm) of a complex symmetric packed matrix A using the +* factorization A = U*D*U**T or A = L*D*L**T computed by ZSPTRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the details of the factorization are stored +* as an upper or lower triangular matrix. +* = 'U': Upper triangular, form is A = U*D*U**T; +* = 'L': Lower triangular, form is A = L*D*L**T. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input) COMPLEX*16 array, dimension (N*(N+1)/2) +* The block diagonal matrix D and the multipliers used to +* obtain the factor U or L as computed by ZSPTRF, stored as a +* packed triangular matrix. +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by ZSPTRF. +* +* ANORM (input) DOUBLE PRECISION +* The 1-norm of the original matrix A. +* +* RCOND (output) DOUBLE PRECISION +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +* estimate of the 1-norm of inv(A) computed in this routine. +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IP, KASE + DOUBLE PRECISION AINVNM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLACON, ZSPTRS +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSPCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.LE.ZERO ) THEN + RETURN + END IF +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + IP = N*( N+1 ) / 2 + DO 10 I = N, 1, -1 + IF( IPIV( I ).GT.0 .AND. AP( IP ).EQ.ZERO ) + $ RETURN + IP = IP - I + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + IP = 1 + DO 20 I = 1, N + IF( IPIV( I ).GT.0 .AND. AP( IP ).EQ.ZERO ) + $ RETURN + IP = IP + N - I + 1 + 20 CONTINUE + END IF +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + 30 CONTINUE + CALL ZLACON( N, WORK( N+1 ), WORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN +* +* Multiply by inv(L*D*L') or inv(U*D*U'). +* + CALL ZSPTRS( UPLO, N, 1, AP, IPIV, WORK, N, INFO ) + GO TO 30 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of ZSPCON +* + END diff --git a/costa/native/external/lapack/zspmv.f b/costa/native/external/lapack/zspmv.f new file mode 100644 index 000000000..8ad09f486 --- /dev/null +++ b/costa/native/external/lapack/zspmv.f @@ -0,0 +1,265 @@ + SUBROUTINE ZSPMV( UPLO, N, ALPHA, AP, X, INCX, BETA, Y, INCY ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INCX, INCY, N + COMPLEX*16 ALPHA, BETA +* .. +* .. Array Arguments .. + COMPLEX*16 AP( * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* ZSPMV performs the matrix-vector operation +* +* y := alpha*A*x + beta*y, +* +* where alpha and beta are scalars, x and y are n element vectors and +* A is an n by n symmetric matrix, supplied in packed form. +* +* Arguments +* ========== +* +* UPLO - CHARACTER*1 +* On entry, UPLO specifies whether the upper or lower +* triangular part of the matrix A is supplied in the packed +* array AP as follows: +* +* UPLO = 'U' or 'u' The upper triangular part of A is +* supplied in AP. +* +* UPLO = 'L' or 'l' The lower triangular part of A is +* supplied in AP. +* +* Unchanged on exit. +* +* N - INTEGER +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX*16 +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* AP - COMPLEX*16 array, dimension at least +* ( ( N*( N + 1 ) )/2 ). +* Before entry, with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular part of the symmetric matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) +* and a( 2, 2 ) respectively, and so on. +* Before entry, with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular part of the symmetric matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) +* and a( 3, 1 ) respectively, and so on. +* Unchanged on exit. +* +* X - COMPLEX*16 array, dimension at least +* ( 1 + ( N - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the N- +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - COMPLEX*16 +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then Y need not be set on input. +* Unchanged on exit. +* +* Y - COMPLEX*16 array, dimension at least +* ( 1 + ( N - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. On exit, Y is overwritten by the updated +* vector y. +* +* INCY - INTEGER +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, IX, IY, J, JX, JY, K, KK, KX, KY + COMPLEX*16 TEMP1, TEMP2 +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = 1 + ELSE IF( N.LT.0 ) THEN + INFO = 2 + ELSE IF( INCX.EQ.0 ) THEN + INFO = 6 + ELSE IF( INCY.EQ.0 ) THEN + INFO = 9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSPMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ) .OR. ( ( ALPHA.EQ.ZERO ) .AND. ( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set up the start points in X and Y. +* + IF( INCX.GT.0 ) THEN + KX = 1 + ELSE + KX = 1 - ( N-1 )*INCX + END IF + IF( INCY.GT.0 ) THEN + KY = 1 + ELSE + KY = 1 - ( N-1 )*INCY + END IF +* +* Start the operations. In this version the elements of the array AP +* are accessed sequentially with one pass through AP. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE ) THEN + IF( INCY.EQ.1 ) THEN + IF( BETA.EQ.ZERO ) THEN + DO 10 I = 1, N + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20 I = 1, N + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO ) THEN + DO 30 I = 1, N + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40 I = 1, N + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + KK = 1 + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Form y when AP contains the upper triangle. +* + IF( ( INCX.EQ.1 ) .AND. ( INCY.EQ.1 ) ) THEN + DO 60 J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + K = KK + DO 50 I = 1, J - 1 + Y( I ) = Y( I ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + AP( K )*X( I ) + K = K + 1 + 50 CONTINUE + Y( J ) = Y( J ) + TEMP1*AP( KK+J-1 ) + ALPHA*TEMP2 + KK = KK + J + 60 CONTINUE + ELSE + JX = KX + JY = KY + DO 80 J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + IX = KX + IY = KY + DO 70 K = KK, KK + J - 2 + Y( IY ) = Y( IY ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + AP( K )*X( IX ) + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + Y( JY ) = Y( JY ) + TEMP1*AP( KK+J-1 ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + KK = KK + J + 80 CONTINUE + END IF + ELSE +* +* Form y when AP contains the lower triangle. +* + IF( ( INCX.EQ.1 ) .AND. ( INCY.EQ.1 ) ) THEN + DO 100 J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + Y( J ) = Y( J ) + TEMP1*AP( KK ) + K = KK + 1 + DO 90 I = J + 1, N + Y( I ) = Y( I ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + AP( K )*X( I ) + K = K + 1 + 90 CONTINUE + Y( J ) = Y( J ) + ALPHA*TEMP2 + KK = KK + ( N-J+1 ) + 100 CONTINUE + ELSE + JX = KX + JY = KY + DO 120 J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + Y( JY ) = Y( JY ) + TEMP1*AP( KK ) + IX = JX + IY = JY + DO 110 K = KK + 1, KK + N - J + IX = IX + INCX + IY = IY + INCY + Y( IY ) = Y( IY ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + AP( K )*X( IX ) + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + KK = KK + ( N-J+1 ) + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZSPMV +* + END diff --git a/costa/native/external/lapack/zspr.f b/costa/native/external/lapack/zspr.f new file mode 100644 index 000000000..be29e6ad8 --- /dev/null +++ b/costa/native/external/lapack/zspr.f @@ -0,0 +1,214 @@ + SUBROUTINE ZSPR( UPLO, N, ALPHA, X, INCX, AP ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INCX, N + COMPLEX*16 ALPHA +* .. +* .. Array Arguments .. + COMPLEX*16 AP( * ), X( * ) +* .. +* +* Purpose +* ======= +* +* ZSPR performs the symmetric rank 1 operation +* +* A := alpha*x*conjg( x' ) + A, +* +* where alpha is a complex scalar, x is an n element vector and A is an +* n by n symmetric matrix, supplied in packed form. +* +* Arguments +* ========== +* +* UPLO - CHARACTER*1 +* On entry, UPLO specifies whether the upper or lower +* triangular part of the matrix A is supplied in the packed +* array AP as follows: +* +* UPLO = 'U' or 'u' The upper triangular part of A is +* supplied in AP. +* +* UPLO = 'L' or 'l' The lower triangular part of A is +* supplied in AP. +* +* Unchanged on exit. +* +* N - INTEGER +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX*16 +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - COMPLEX*16 array, dimension at least +* ( 1 + ( N - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the N- +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* AP - COMPLEX*16 array, dimension at least +* ( ( N*( N + 1 ) )/2 ). +* Before entry, with UPLO = 'U' or 'u', the array AP must +* contain the upper triangular part of the symmetric matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) +* and a( 2, 2 ) respectively, and so on. On exit, the array +* AP is overwritten by the upper triangular part of the +* updated matrix. +* Before entry, with UPLO = 'L' or 'l', the array AP must +* contain the lower triangular part of the symmetric matrix +* packed sequentially, column by column, so that AP( 1 ) +* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) +* and a( 3, 1 ) respectively, and so on. On exit, the array +* AP is overwritten by the lower triangular part of the +* updated matrix. +* Note that the imaginary parts of the diagonal elements need +* not be set, they are assumed to be zero, and on exit they +* are set to zero. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, IX, J, JX, K, KK, KX + COMPLEX*16 TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = 1 + ELSE IF( N.LT.0 ) THEN + INFO = 2 + ELSE IF( INCX.EQ.0 ) THEN + INFO = 5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSPR ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ) .OR. ( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Set the start point in X if the increment is not unity. +* + IF( INCX.LE.0 ) THEN + KX = 1 - ( N-1 )*INCX + ELSE IF( INCX.NE.1 ) THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of the array AP +* are accessed sequentially with one pass through AP. +* + KK = 1 + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Form A when upper triangle is stored in AP. +* + IF( INCX.EQ.1 ) THEN + DO 20 J = 1, N + IF( X( J ).NE.ZERO ) THEN + TEMP = ALPHA*X( J ) + K = KK + DO 10 I = 1, J - 1 + AP( K ) = AP( K ) + X( I )*TEMP + K = K + 1 + 10 CONTINUE + AP( KK+J-1 ) = AP( KK+J-1 ) + X( J )*TEMP + ELSE + AP( KK+J-1 ) = AP( KK+J-1 ) + END IF + KK = KK + J + 20 CONTINUE + ELSE + JX = KX + DO 40 J = 1, N + IF( X( JX ).NE.ZERO ) THEN + TEMP = ALPHA*X( JX ) + IX = KX + DO 30 K = KK, KK + J - 2 + AP( K ) = AP( K ) + X( IX )*TEMP + IX = IX + INCX + 30 CONTINUE + AP( KK+J-1 ) = AP( KK+J-1 ) + X( JX )*TEMP + ELSE + AP( KK+J-1 ) = AP( KK+J-1 ) + END IF + JX = JX + INCX + KK = KK + J + 40 CONTINUE + END IF + ELSE +* +* Form A when lower triangle is stored in AP. +* + IF( INCX.EQ.1 ) THEN + DO 60 J = 1, N + IF( X( J ).NE.ZERO ) THEN + TEMP = ALPHA*X( J ) + AP( KK ) = AP( KK ) + TEMP*X( J ) + K = KK + 1 + DO 50 I = J + 1, N + AP( K ) = AP( K ) + X( I )*TEMP + K = K + 1 + 50 CONTINUE + ELSE + AP( KK ) = AP( KK ) + END IF + KK = KK + N - J + 1 + 60 CONTINUE + ELSE + JX = KX + DO 80 J = 1, N + IF( X( JX ).NE.ZERO ) THEN + TEMP = ALPHA*X( JX ) + AP( KK ) = AP( KK ) + TEMP*X( JX ) + IX = JX + DO 70 K = KK + 1, KK + N - J + IX = IX + INCX + AP( K ) = AP( K ) + X( IX )*TEMP + 70 CONTINUE + ELSE + AP( KK ) = AP( KK ) + END IF + JX = JX + INCX + KK = KK + N - J + 1 + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZSPR +* + END diff --git a/costa/native/external/lapack/zsprfs.f b/costa/native/external/lapack/zsprfs.f new file mode 100644 index 000000000..edb9cd811 --- /dev/null +++ b/costa/native/external/lapack/zsprfs.f @@ -0,0 +1,336 @@ + SUBROUTINE ZSPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX, + $ FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) + COMPLEX*16 AFP( * ), AP( * ), B( LDB, * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* ZSPRFS improves the computed solution to a system of linear +* equations when the coefficient matrix is symmetric indefinite +* and packed, and provides error bounds and backward error estimates +* for the solution. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AP (input) COMPLEX*16 array, dimension (N*(N+1)/2) +* The upper or lower triangle of the symmetric matrix A, packed +* columnwise in a linear array. The j-th column of A is stored +* in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* +* AFP (input) COMPLEX*16 array, dimension (N*(N+1)/2) +* The factored form of the matrix A. AFP contains the block +* diagonal matrix D and the multipliers used to obtain the +* factor U or L from the factorization A = U*D*U**T or +* A = L*D*L**T as computed by ZSPTRF, stored as a packed +* triangular matrix. +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by ZSPTRF. +* +* B (input) COMPLEX*16 array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input/output) COMPLEX*16 array, dimension (LDX,NRHS) +* On entry, the solution matrix X, as computed by ZSPTRS. +* On exit, the improved solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Internal Parameters +* =================== +* +* ITMAX is the maximum number of steps of iterative refinement. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) + DOUBLE PRECISION THREE + PARAMETER ( THREE = 3.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, IK, J, K, KASE, KK, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX*16 ZDUM +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZCOPY, ZLACON, ZSPMV, ZSPTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSPRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL ZCOPY( N, B( 1, J ), 1, WORK, 1 ) + CALL ZSPMV( UPLO, N, -ONE, AP, X( 1, J ), 1, ONE, WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + KK = 1 + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + IK = KK + DO 40 I = 1, K - 1 + RWORK( I ) = RWORK( I ) + CABS1( AP( IK ) )*XK + S = S + CABS1( AP( IK ) )*CABS1( X( I, J ) ) + IK = IK + 1 + 40 CONTINUE + RWORK( K ) = RWORK( K ) + CABS1( AP( KK+K-1 ) )*XK + S + KK = KK + K + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + RWORK( K ) = RWORK( K ) + CABS1( AP( KK ) )*XK + IK = KK + 1 + DO 60 I = K + 1, N + RWORK( I ) = RWORK( I ) + CABS1( AP( IK ) )*XK + S = S + CABS1( AP( IK ) )*CABS1( X( I, J ) ) + IK = IK + 1 + 60 CONTINUE + RWORK( K ) = RWORK( K ) + S + KK = KK + ( N-K+1 ) + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL ZSPTRS( UPLO, N, 1, AFP, IPIV, WORK, N, INFO ) + CALL ZAXPY( N, ONE, WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use ZLACON to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL ZLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A'). +* + CALL ZSPTRS( UPLO, N, 1, AFP, IPIV, WORK, N, INFO ) + DO 110 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 120 CONTINUE + CALL ZSPTRS( UPLO, N, 1, AFP, IPIV, WORK, N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of ZSPRFS +* + END diff --git a/costa/native/external/lapack/zspsv.f b/costa/native/external/lapack/zspsv.f new file mode 100644 index 000000000..8f37f0784 --- /dev/null +++ b/costa/native/external/lapack/zspsv.f @@ -0,0 +1,149 @@ + SUBROUTINE ZSPSV( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 AP( * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* ZSPSV computes the solution to a complex system of linear equations +* A * X = B, +* where A is an N-by-N symmetric matrix stored in packed format and X +* and B are N-by-NRHS matrices. +* +* The diagonal pivoting method is used to factor A as +* A = U * D * U**T, if UPLO = 'U', or +* A = L * D * L**T, if UPLO = 'L', +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices, D is symmetric and block diagonal with 1-by-1 +* and 2-by-2 diagonal blocks. The factored form of A is then used to +* solve the system of equations A * X = B. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AP (input/output) COMPLEX*16 array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the symmetric matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* See below for further details. +* +* On exit, the block diagonal matrix D and the multipliers used +* to obtain the factor U or L from the factorization +* A = U*D*U**T or A = L*D*L**T as computed by ZSPTRF, stored as +* a packed triangular matrix in the same storage format as A. +* +* IPIV (output) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D, as +* determined by ZSPTRF. If IPIV(k) > 0, then rows and columns +* k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1 +* diagonal block. If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, +* then rows and columns k-1 and -IPIV(k) were interchanged and +* D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If UPLO = 'L' and +* IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and +* -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 +* diagonal block. +* +* B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, D(i,i) is exactly zero. The factorization +* has been completed, but the block diagonal matrix D is +* exactly singular, so the solution could not be +* computed. +* +* Further Details +* =============== +* +* The packed storage scheme is illustrated by the following example +* when N = 4, UPLO = 'U': +* +* Two-dimensional storage of the symmetric matrix A: +* +* a11 a12 a13 a14 +* a22 a23 a24 +* a33 a34 (aij = aji) +* a44 +* +* Packed storage of the upper triangle of A: +* +* AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZSPTRF, ZSPTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSPSV ', -INFO ) + RETURN + END IF +* +* Compute the factorization A = U*D*U' or A = L*D*L'. +* + CALL ZSPTRF( UPLO, N, AP, IPIV, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL ZSPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* + END IF + RETURN +* +* End of ZSPSV +* + END diff --git a/costa/native/external/lapack/zspsvx.f b/costa/native/external/lapack/zspsvx.f new file mode 100644 index 000000000..d2eea7e5a --- /dev/null +++ b/costa/native/external/lapack/zspsvx.f @@ -0,0 +1,279 @@ + SUBROUTINE ZSPSVX( FACT, UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, + $ LDX, RCOND, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER FACT, UPLO + INTEGER INFO, LDB, LDX, N, NRHS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) + COMPLEX*16 AFP( * ), AP( * ), B( LDB, * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* ZSPSVX uses the diagonal pivoting factorization A = U*D*U**T or +* A = L*D*L**T to compute the solution to a complex system of linear +* equations A * X = B, where A is an N-by-N symmetric matrix stored +* in packed format and X and B are N-by-NRHS matrices. +* +* Error bounds on the solution and a condition estimate are also +* provided. +* +* Description +* =========== +* +* The following steps are performed: +* +* 1. If FACT = 'N', the diagonal pivoting method is used to factor A as +* A = U * D * U**T, if UPLO = 'U', or +* A = L * D * L**T, if UPLO = 'L', +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices and D is symmetric and block diagonal with +* 1-by-1 and 2-by-2 diagonal blocks. +* +* 2. If some D(i,i)=0, so that D is exactly singular, then the routine +* returns with INFO = i. Otherwise, the factored form of A is used +* to estimate the condition number of the matrix A. If the +* reciprocal of the condition number is less than machine precision, +* INFO = N+1 is returned as a warning, but the routine still goes on +* to solve for X and compute error bounds as described below. +* +* 3. The system of equations is solved for X using the factored form +* of A. +* +* 4. Iterative refinement is applied to improve the computed solution +* matrix and calculate error bounds and backward error estimates +* for it. +* +* Arguments +* ========= +* +* FACT (input) CHARACTER*1 +* Specifies whether or not the factored form of A has been +* supplied on entry. +* = 'F': On entry, AFP and IPIV contain the factored form +* of A. AP, AFP and IPIV will not be modified. +* = 'N': The matrix A will be copied to AFP and factored. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AP (input) COMPLEX*16 array, dimension (N*(N+1)/2) +* The upper or lower triangle of the symmetric matrix A, packed +* columnwise in a linear array. The j-th column of A is stored +* in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* See below for further details. +* +* AFP (input or output) COMPLEX*16 array, dimension (N*(N+1)/2) +* If FACT = 'F', then AFP is an input argument and on entry +* contains the block diagonal matrix D and the multipliers used +* to obtain the factor U or L from the factorization +* A = U*D*U**T or A = L*D*L**T as computed by ZSPTRF, stored as +* a packed triangular matrix in the same storage format as A. +* +* If FACT = 'N', then AFP is an output argument and on exit +* contains the block diagonal matrix D and the multipliers used +* to obtain the factor U or L from the factorization +* A = U*D*U**T or A = L*D*L**T as computed by ZSPTRF, stored as +* a packed triangular matrix in the same storage format as A. +* +* IPIV (input or output) INTEGER array, dimension (N) +* If FACT = 'F', then IPIV is an input argument and on entry +* contains details of the interchanges and the block structure +* of D, as determined by ZSPTRF. +* If IPIV(k) > 0, then rows and columns k and IPIV(k) were +* interchanged and D(k,k) is a 1-by-1 diagonal block. +* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +* +* If FACT = 'N', then IPIV is an output argument and on exit +* contains details of the interchanges and the block structure +* of D, as determined by ZSPTRF. +* +* B (input) COMPLEX*16 array, dimension (LDB,NRHS) +* The N-by-NRHS right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (output) COMPLEX*16 array, dimension (LDX,NRHS) +* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* RCOND (output) DOUBLE PRECISION +* The estimate of the reciprocal condition number of the matrix +* A. If RCOND is less than the machine precision (in +* particular, if RCOND = 0), the matrix is singular to working +* precision. This condition is indicated by a return code of +* INFO > 0. +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is +* <= N: D(i,i) is exactly zero. The factorization +* has been completed but the factor D is exactly +* singular, so the solution and error bounds could +* not be computed. RCOND = 0 is returned. +* = N+1: D is nonsingular, but RCOND is less than machine +* precision, meaning that the matrix is singular +* to working precision. Nevertheless, the +* solution and error bounds are computed because +* there are a number of situations where the +* computed solution can be more accurate than the +* value of RCOND would suggest. +* +* Further Details +* =============== +* +* The packed storage scheme is illustrated by the following example +* when N = 4, UPLO = 'U': +* +* Two-dimensional storage of the symmetric matrix A: +* +* a11 a12 a13 a14 +* a22 a23 a24 +* a33 a34 (aij = aji) +* a44 +* +* Packed storage of the upper triangle of A: +* +* AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOFACT + DOUBLE PRECISION ANORM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANSP + EXTERNAL LSAME, DLAMCH, ZLANSP +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZCOPY, ZLACPY, ZSPCON, ZSPRFS, ZSPTRF, + $ ZSPTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSPSVX', -INFO ) + RETURN + END IF +* + IF( NOFACT ) THEN +* +* Compute the factorization A = U*D*U' or A = L*D*L'. +* + CALL ZCOPY( N*( N+1 ) / 2, AP, 1, AFP, 1 ) + CALL ZSPTRF( UPLO, N, AFP, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN + IF( INFO.GT.0 ) + $ RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = ZLANSP( 'I', UPLO, N, AP, RWORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL ZSPCON( UPLO, N, AFP, IPIV, ANORM, RCOND, WORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.DLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* +* Compute the solution vectors X. +* + CALL ZLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL ZSPTRS( UPLO, N, NRHS, AFP, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solutions and +* compute error bounds and backward error estimates for them. +* + CALL ZSPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX, FERR, + $ BERR, WORK, RWORK, INFO ) +* + RETURN +* +* End of ZSPSVX +* + END diff --git a/costa/native/external/lapack/zsptrf.f b/costa/native/external/lapack/zsptrf.f new file mode 100644 index 000000000..69a81901e --- /dev/null +++ b/costa/native/external/lapack/zsptrf.f @@ -0,0 +1,556 @@ + SUBROUTINE ZSPTRF( UPLO, N, AP, IPIV, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 AP( * ) +* .. +* +* Purpose +* ======= +* +* ZSPTRF computes the factorization of a complex symmetric matrix A +* stored in packed format using the Bunch-Kaufman diagonal pivoting +* method: +* +* A = U*D*U**T or A = L*D*L**T +* +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices, and D is symmetric and block diagonal with +* 1-by-1 and 2-by-2 diagonal blocks. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) COMPLEX*16 array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangle of the symmetric matrix +* A, packed columnwise in a linear array. The j-th column of A +* is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* +* On exit, the block diagonal matrix D and the multipliers used +* to obtain the factor U or L, stored as a packed triangular +* matrix overwriting A (see below for further details). +* +* IPIV (output) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D. +* If IPIV(k) > 0, then rows and columns k and IPIV(k) were +* interchanged and D(k,k) is a 1-by-1 diagonal block. +* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, D(i,i) is exactly zero. The factorization +* has been completed, but the block diagonal matrix D is +* exactly singular, and division by zero will occur if it +* is used to solve a system of equations. +* +* Further Details +* =============== +* +* 5-96 - Based on modifications by J. Lewis, Boeing Computer Services +* Company +* +* If UPLO = 'U', then A = U*D*U', where +* U = P(n)*U(n)* ... *P(k)U(k)* ..., +* i.e., U is a product of terms P(k)*U(k), where k decreases from n to +* 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +* defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +* that if the diagonal block D(k) is of order s (s = 1 or 2), then +* +* ( I v 0 ) k-s +* U(k) = ( 0 I 0 ) s +* ( 0 0 I ) n-k +* k-s s n-k +* +* If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +* If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +* and A(k,k), and v overwrites A(1:k-2,k-1:k). +* +* If UPLO = 'L', then A = L*D*L', where +* L = P(1)*L(1)* ... *P(k)*L(k)* ..., +* i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +* n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +* defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +* that if the diagonal block D(k) is of order s (s = 1 or 2), then +* +* ( I 0 0 ) k-1 +* L(k) = ( 0 I 0 ) s +* ( 0 v I ) n-k-s+1 +* k-1 s n-k-s+1 +* +* If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +* If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +* and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0D+0, SEVTEN = 17.0D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IMAX, J, JMAX, K, KC, KK, KNC, KP, KPC, + $ KSTEP, KX, NPP + DOUBLE PRECISION ABSAKK, ALPHA, COLMAX, ROWMAX + COMPLEX*16 D11, D12, D21, D22, R1, T, WK, WKM1, WKP1, ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX + EXTERNAL LSAME, IZAMAX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZSCAL, ZSPR, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX, SQRT +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSPTRF', -INFO ) + RETURN + END IF +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U' using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2 +* + K = N + KC = ( N-1 )*N / 2 + 1 + 10 CONTINUE + KNC = KC +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 110 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = CABS1( AP( KC+K-1 ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.GT.1 ) THEN + IMAX = IZAMAX( K-1, AP( KC ), 1 ) + COLMAX = CABS1( AP( KC+IMAX-1 ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + ROWMAX = ZERO + JMAX = IMAX + KX = IMAX*( IMAX+1 ) / 2 + IMAX + DO 20 J = IMAX + 1, K + IF( CABS1( AP( KX ) ).GT.ROWMAX ) THEN + ROWMAX = CABS1( AP( KX ) ) + JMAX = J + END IF + KX = KX + J + 20 CONTINUE + KPC = ( IMAX-1 )*IMAX / 2 + 1 + IF( IMAX.GT.1 ) THEN + JMAX = IZAMAX( IMAX-1, AP( KPC ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( AP( KPC+JMAX-1 ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( CABS1( AP( KPC+IMAX-1 ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K-1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K - KSTEP + 1 + IF( KSTEP.EQ.2 ) + $ KNC = KNC - K + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the leading +* submatrix A(1:k,1:k) +* + CALL ZSWAP( KP-1, AP( KNC ), 1, AP( KPC ), 1 ) + KX = KPC + KP - 1 + DO 30 J = KP + 1, KK - 1 + KX = KX + J - 1 + T = AP( KNC+J-1 ) + AP( KNC+J-1 ) = AP( KX ) + AP( KX ) = T + 30 CONTINUE + T = AP( KNC+KK-1 ) + AP( KNC+KK-1 ) = AP( KPC+KP-1 ) + AP( KPC+KP-1 ) = T + IF( KSTEP.EQ.2 ) THEN + T = AP( KC+K-2 ) + AP( KC+K-2 ) = AP( KC+KP-1 ) + AP( KC+KP-1 ) = T + END IF + END IF +* +* Update the leading submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* +* Perform a rank-1 update of A(1:k-1,1:k-1) as +* +* A := A - U(k)*D(k)*U(k)' = A - W(k)*1/D(k)*W(k)' +* + R1 = CONE / AP( KC+K-1 ) + CALL ZSPR( UPLO, K-1, -R1, AP( KC ), 1, AP ) +* +* Store U(k) in column k +* + CALL ZSCAL( K-1, R1, AP( KC ), 1 ) + ELSE +* +* 2-by-2 pivot block D(k): columns k and k-1 now hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* Perform a rank-2 update of A(1:k-2,1:k-2) as +* +* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )' +* = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )' +* + IF( K.GT.2 ) THEN +* + D12 = AP( K-1+( K-1 )*K / 2 ) + D22 = AP( K-1+( K-2 )*( K-1 ) / 2 ) / D12 + D11 = AP( K+( K-1 )*K / 2 ) / D12 + T = CONE / ( D11*D22-CONE ) + D12 = T / D12 +* + DO 50 J = K - 2, 1, -1 + WKM1 = D12*( D11*AP( J+( K-2 )*( K-1 ) / 2 )- + $ AP( J+( K-1 )*K / 2 ) ) + WK = D12*( D22*AP( J+( K-1 )*K / 2 )- + $ AP( J+( K-2 )*( K-1 ) / 2 ) ) + DO 40 I = J, 1, -1 + AP( I+( J-1 )*J / 2 ) = AP( I+( J-1 )*J / 2 ) - + $ AP( I+( K-1 )*K / 2 )*WK - + $ AP( I+( K-2 )*( K-1 ) / 2 )*WKM1 + 40 CONTINUE + AP( J+( K-1 )*K / 2 ) = WK + AP( J+( K-2 )*( K-1 ) / 2 ) = WKM1 + 50 CONTINUE +* + END IF + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + KC = KNC - K + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L' using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2 +* + K = 1 + KC = 1 + NPP = N*( N+1 ) / 2 + 60 CONTINUE + KNC = KC +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 110 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = CABS1( AP( KC ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.LT.N ) THEN + IMAX = K + IZAMAX( N-K, AP( KC+1 ), 1 ) + COLMAX = CABS1( AP( KC+IMAX-K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + ROWMAX = ZERO + KX = KC + IMAX - K + DO 70 J = K, IMAX - 1 + IF( CABS1( AP( KX ) ).GT.ROWMAX ) THEN + ROWMAX = CABS1( AP( KX ) ) + JMAX = J + END IF + KX = KX + N - J + 70 CONTINUE + KPC = NPP - ( N-IMAX+1 )*( N-IMAX+2 ) / 2 + 1 + IF( IMAX.LT.N ) THEN + JMAX = IMAX + IZAMAX( N-IMAX, AP( KPC+1 ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( AP( KPC+JMAX-IMAX ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( CABS1( AP( KPC ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K+1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K + KSTEP - 1 + IF( KSTEP.EQ.2 ) + $ KNC = KNC + N - K + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the trailing +* submatrix A(k:n,k:n) +* + IF( KP.LT.N ) + $ CALL ZSWAP( N-KP, AP( KNC+KP-KK+1 ), 1, AP( KPC+1 ), + $ 1 ) + KX = KNC + KP - KK + DO 80 J = KK + 1, KP - 1 + KX = KX + N - J + 1 + T = AP( KNC+J-KK ) + AP( KNC+J-KK ) = AP( KX ) + AP( KX ) = T + 80 CONTINUE + T = AP( KNC ) + AP( KNC ) = AP( KPC ) + AP( KPC ) = T + IF( KSTEP.EQ.2 ) THEN + T = AP( KC+1 ) + AP( KC+1 ) = AP( KC+KP-K ) + AP( KC+KP-K ) = T + END IF + END IF +* +* Update the trailing submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* + IF( K.LT.N ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* +* A := A - L(k)*D(k)*L(k)' = A - W(k)*(1/D(k))*W(k)' +* + R1 = CONE / AP( KC ) + CALL ZSPR( UPLO, N-K, -R1, AP( KC+1 ), 1, + $ AP( KC+N-K+1 ) ) +* +* Store L(k) in column K +* + CALL ZSCAL( N-K, R1, AP( KC+1 ), 1 ) + END IF + ELSE +* +* 2-by-2 pivot block D(k): columns K and K+1 now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* + IF( K.LT.N-1 ) THEN +* +* Perform a rank-2 update of A(k+2:n,k+2:n) as +* +* A := A - ( L(k) L(k+1) )*D(k)*( L(k) L(k+1) )' +* = A - ( W(k) W(k+1) )*inv(D(k))*( W(k) W(k+1) )' +* +* where L(k) and L(k+1) are the k-th and (k+1)-th +* columns of L +* + D21 = AP( K+1+( K-1 )*( 2*N-K ) / 2 ) + D11 = AP( K+1+K*( 2*N-K-1 ) / 2 ) / D21 + D22 = AP( K+( K-1 )*( 2*N-K ) / 2 ) / D21 + T = CONE / ( D11*D22-CONE ) + D21 = T / D21 +* + DO 100 J = K + 2, N + WK = D21*( D11*AP( J+( K-1 )*( 2*N-K ) / 2 )- + $ AP( J+K*( 2*N-K-1 ) / 2 ) ) + WKP1 = D21*( D22*AP( J+K*( 2*N-K-1 ) / 2 )- + $ AP( J+( K-1 )*( 2*N-K ) / 2 ) ) + DO 90 I = J, N + AP( I+( J-1 )*( 2*N-J ) / 2 ) = AP( I+( J-1 )* + $ ( 2*N-J ) / 2 ) - AP( I+( K-1 )*( 2*N-K ) / + $ 2 )*WK - AP( I+K*( 2*N-K-1 ) / 2 )*WKP1 + 90 CONTINUE + AP( J+( K-1 )*( 2*N-K ) / 2 ) = WK + AP( J+K*( 2*N-K-1 ) / 2 ) = WKP1 + 100 CONTINUE + END IF + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + KC = KNC + N - K + 2 + GO TO 60 +* + END IF +* + 110 CONTINUE + RETURN +* +* End of ZSPTRF +* + END diff --git a/costa/native/external/lapack/zsptri.f b/costa/native/external/lapack/zsptri.f new file mode 100644 index 000000000..eb5dec827 --- /dev/null +++ b/costa/native/external/lapack/zsptri.f @@ -0,0 +1,338 @@ + SUBROUTINE ZSPTRI( UPLO, N, AP, IPIV, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 AP( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZSPTRI computes the inverse of a complex symmetric indefinite matrix +* A in packed storage using the factorization A = U*D*U**T or +* A = L*D*L**T computed by ZSPTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the details of the factorization are stored +* as an upper or lower triangular matrix. +* = 'U': Upper triangular, form is A = U*D*U**T; +* = 'L': Lower triangular, form is A = L*D*L**T. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) COMPLEX*16 array, dimension (N*(N+1)/2) +* On entry, the block diagonal matrix D and the multipliers +* used to obtain the factor U or L as computed by ZSPTRF, +* stored as a packed triangular matrix. +* +* On exit, if INFO = 0, the (symmetric) inverse of the original +* matrix, stored as a packed triangular matrix. The j-th column +* of inv(A) is stored in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = inv(A)(i,j) for 1<=i<=j; +* if UPLO = 'L', +* AP(i + (j-1)*(2n-j)/2) = inv(A)(i,j) for j<=i<=n. +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by ZSPTRF. +* +* WORK (workspace) COMPLEX*16 array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +* inverse could not be computed. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KC, KCNEXT, KP, KPC, KSTEP, KX, NPP + COMPLEX*16 AK, AKKP1, AKP1, D, T, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX*16 ZDOTU + EXTERNAL LSAME, ZDOTU +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZCOPY, ZSPMV, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSPTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + KP = N*( N+1 ) / 2 + DO 10 INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. AP( KP ).EQ.ZERO ) + $ RETURN + KP = KP - INFO + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + KP = 1 + DO 20 INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. AP( KP ).EQ.ZERO ) + $ RETURN + KP = KP + N - INFO + 1 + 20 CONTINUE + END IF + INFO = 0 +* + IF( UPPER ) THEN +* +* Compute inv(A) from the factorization A = U*D*U'. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + KC = 1 + 30 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + KCNEXT = KC + K + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + AP( KC+K-1 ) = ONE / AP( KC+K-1 ) +* +* Compute column K of the inverse. +* + IF( K.GT.1 ) THEN + CALL ZCOPY( K-1, AP( KC ), 1, WORK, 1 ) + CALL ZSPMV( UPLO, K-1, -ONE, AP, WORK, 1, ZERO, AP( KC ), + $ 1 ) + AP( KC+K-1 ) = AP( KC+K-1 ) - + $ ZDOTU( K-1, WORK, 1, AP( KC ), 1 ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = AP( KCNEXT+K-1 ) + AK = AP( KC+K-1 ) / T + AKP1 = AP( KCNEXT+K ) / T + AKKP1 = AP( KCNEXT+K-1 ) / T + D = T*( AK*AKP1-ONE ) + AP( KC+K-1 ) = AKP1 / D + AP( KCNEXT+K ) = AK / D + AP( KCNEXT+K-1 ) = -AKKP1 / D +* +* Compute columns K and K+1 of the inverse. +* + IF( K.GT.1 ) THEN + CALL ZCOPY( K-1, AP( KC ), 1, WORK, 1 ) + CALL ZSPMV( UPLO, K-1, -ONE, AP, WORK, 1, ZERO, AP( KC ), + $ 1 ) + AP( KC+K-1 ) = AP( KC+K-1 ) - + $ ZDOTU( K-1, WORK, 1, AP( KC ), 1 ) + AP( KCNEXT+K-1 ) = AP( KCNEXT+K-1 ) - + $ ZDOTU( K-1, AP( KC ), 1, AP( KCNEXT ), + $ 1 ) + CALL ZCOPY( K-1, AP( KCNEXT ), 1, WORK, 1 ) + CALL ZSPMV( UPLO, K-1, -ONE, AP, WORK, 1, ZERO, + $ AP( KCNEXT ), 1 ) + AP( KCNEXT+K ) = AP( KCNEXT+K ) - + $ ZDOTU( K-1, WORK, 1, AP( KCNEXT ), 1 ) + END IF + KSTEP = 2 + KCNEXT = KCNEXT + K + 1 + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the leading +* submatrix A(1:k+1,1:k+1) +* + KPC = ( KP-1 )*KP / 2 + 1 + CALL ZSWAP( KP-1, AP( KC ), 1, AP( KPC ), 1 ) + KX = KPC + KP - 1 + DO 40 J = KP + 1, K - 1 + KX = KX + J - 1 + TEMP = AP( KC+J-1 ) + AP( KC+J-1 ) = AP( KX ) + AP( KX ) = TEMP + 40 CONTINUE + TEMP = AP( KC+K-1 ) + AP( KC+K-1 ) = AP( KPC+KP-1 ) + AP( KPC+KP-1 ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = AP( KC+K+K-1 ) + AP( KC+K+K-1 ) = AP( KC+K+KP-1 ) + AP( KC+K+KP-1 ) = TEMP + END IF + END IF +* + K = K + KSTEP + KC = KCNEXT + GO TO 30 + 50 CONTINUE +* + ELSE +* +* Compute inv(A) from the factorization A = L*D*L'. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + NPP = N*( N+1 ) / 2 + K = N + KC = NPP + 60 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 80 +* + KCNEXT = KC - ( N-K+2 ) + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + AP( KC ) = ONE / AP( KC ) +* +* Compute column K of the inverse. +* + IF( K.LT.N ) THEN + CALL ZCOPY( N-K, AP( KC+1 ), 1, WORK, 1 ) + CALL ZSPMV( UPLO, N-K, -ONE, AP( KC+N-K+1 ), WORK, 1, + $ ZERO, AP( KC+1 ), 1 ) + AP( KC ) = AP( KC ) - ZDOTU( N-K, WORK, 1, AP( KC+1 ), + $ 1 ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = AP( KCNEXT+1 ) + AK = AP( KCNEXT ) / T + AKP1 = AP( KC ) / T + AKKP1 = AP( KCNEXT+1 ) / T + D = T*( AK*AKP1-ONE ) + AP( KCNEXT ) = AKP1 / D + AP( KC ) = AK / D + AP( KCNEXT+1 ) = -AKKP1 / D +* +* Compute columns K-1 and K of the inverse. +* + IF( K.LT.N ) THEN + CALL ZCOPY( N-K, AP( KC+1 ), 1, WORK, 1 ) + CALL ZSPMV( UPLO, N-K, -ONE, AP( KC+( N-K+1 ) ), WORK, 1, + $ ZERO, AP( KC+1 ), 1 ) + AP( KC ) = AP( KC ) - ZDOTU( N-K, WORK, 1, AP( KC+1 ), + $ 1 ) + AP( KCNEXT+1 ) = AP( KCNEXT+1 ) - + $ ZDOTU( N-K, AP( KC+1 ), 1, + $ AP( KCNEXT+2 ), 1 ) + CALL ZCOPY( N-K, AP( KCNEXT+2 ), 1, WORK, 1 ) + CALL ZSPMV( UPLO, N-K, -ONE, AP( KC+( N-K+1 ) ), WORK, 1, + $ ZERO, AP( KCNEXT+2 ), 1 ) + AP( KCNEXT ) = AP( KCNEXT ) - + $ ZDOTU( N-K, WORK, 1, AP( KCNEXT+2 ), 1 ) + END IF + KSTEP = 2 + KCNEXT = KCNEXT - ( N-K+3 ) + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the trailing +* submatrix A(k-1:n,k-1:n) +* + KPC = NPP - ( N-KP+1 )*( N-KP+2 ) / 2 + 1 + IF( KP.LT.N ) + $ CALL ZSWAP( N-KP, AP( KC+KP-K+1 ), 1, AP( KPC+1 ), 1 ) + KX = KC + KP - K + DO 70 J = K + 1, KP - 1 + KX = KX + N - J + 1 + TEMP = AP( KC+J-K ) + AP( KC+J-K ) = AP( KX ) + AP( KX ) = TEMP + 70 CONTINUE + TEMP = AP( KC ) + AP( KC ) = AP( KPC ) + AP( KPC ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = AP( KC-N+K-1 ) + AP( KC-N+K-1 ) = AP( KC-N+KP-1 ) + AP( KC-N+KP-1 ) = TEMP + END IF + END IF +* + K = K - KSTEP + KC = KCNEXT + GO TO 60 + 80 CONTINUE + END IF +* + RETURN +* +* End of ZSPTRI +* + END diff --git a/costa/native/external/lapack/zsptrs.f b/costa/native/external/lapack/zsptrs.f new file mode 100644 index 000000000..3a621a63a --- /dev/null +++ b/costa/native/external/lapack/zsptrs.f @@ -0,0 +1,378 @@ + SUBROUTINE ZSPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 AP( * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* ZSPTRS solves a system of linear equations A*X = B with a complex +* symmetric matrix A stored in packed format using the factorization +* A = U*D*U**T or A = L*D*L**T computed by ZSPTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the details of the factorization are stored +* as an upper or lower triangular matrix. +* = 'U': Upper triangular, form is A = U*D*U**T; +* = 'L': Lower triangular, form is A = L*D*L**T. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AP (input) COMPLEX*16 array, dimension (N*(N+1)/2) +* The block diagonal matrix D and the multipliers used to +* obtain the factor U or L as computed by ZSPTRF, stored as a +* packed triangular matrix. +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by ZSPTRF. +* +* B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KC, KP + COMPLEX*16 AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEMV, ZGERU, ZSCAL, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSPTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*D*U'. +* +* First solve U*D*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + KC = N*( N+1 ) / 2 + 1 + 10 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 30 +* + KC = KC - K + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL ZGERU( K-1, NRHS, -ONE, AP( KC ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + CALL ZSCAL( NRHS, ONE / AP( KC+K-1 ), B( K, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K-1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K-1 ) + $ CALL ZSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in columns K-1 and K of A. +* + CALL ZGERU( K-2, NRHS, -ONE, AP( KC ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) + CALL ZGERU( K-2, NRHS, -ONE, AP( KC-( K-1 ) ), 1, + $ B( K-1, 1 ), LDB, B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = AP( KC+K-2 ) + AKM1 = AP( KC-1 ) / AKM1K + AK = AP( KC+K-1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 20 J = 1, NRHS + BKM1 = B( K-1, J ) / AKM1K + BK = B( K, J ) / AKM1K + B( K-1, J ) = ( AK*BKM1-BK ) / DENOM + B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM + 20 CONTINUE + KC = KC - K + 1 + K = K - 2 + END IF +* + GO TO 10 + 30 CONTINUE +* +* Next solve U'*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + KC = 1 + 40 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(U'(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL ZGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, AP( KC ), + $ 1, ONE, B( K, 1 ), LDB ) +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + KC = KC + K + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(U'(K+1)), where U(K+1) is the transformation +* stored in columns K and K+1 of A. +* + CALL ZGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, AP( KC ), + $ 1, ONE, B( K, 1 ), LDB ) + CALL ZGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, + $ AP( KC+K ), 1, ONE, B( K+1, 1 ), LDB ) +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + KC = KC + 2*K + 1 + K = K + 2 + END IF +* + GO TO 40 + 50 CONTINUE +* + ELSE +* +* Solve A*X = B, where A = L*D*L'. +* +* First solve L*D*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + KC = 1 + 60 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 80 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL ZGERU( N-K, NRHS, -ONE, AP( KC+1 ), 1, B( K, 1 ), + $ LDB, B( K+1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + CALL ZSCAL( NRHS, ONE / AP( KC ), B( K, 1 ), LDB ) + KC = KC + N - K + 1 + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K+1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K+1 ) + $ CALL ZSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.LT.N-1 ) THEN + CALL ZGERU( N-K-1, NRHS, -ONE, AP( KC+2 ), 1, B( K, 1 ), + $ LDB, B( K+2, 1 ), LDB ) + CALL ZGERU( N-K-1, NRHS, -ONE, AP( KC+N-K+2 ), 1, + $ B( K+1, 1 ), LDB, B( K+2, 1 ), LDB ) + END IF +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = AP( KC+1 ) + AKM1 = AP( KC ) / AKM1K + AK = AP( KC+N-K+1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 70 J = 1, NRHS + BKM1 = B( K, J ) / AKM1K + BK = B( K+1, J ) / AKM1K + B( K, J ) = ( AK*BKM1-BK ) / DENOM + B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + 70 CONTINUE + KC = KC + 2*( N-K ) + 1 + K = K + 2 + END IF +* + GO TO 60 + 80 CONTINUE +* +* Next solve L'*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + KC = N*( N+1 ) / 2 + 1 + 90 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 100 +* + KC = KC - ( N-K+1 ) + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(L'(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL ZGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, AP( KC+1 ), 1, ONE, B( K, 1 ), LDB ) +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(L'(K-1)), where L(K-1) is the transformation +* stored in columns K-1 and K of A. +* + IF( K.LT.N ) THEN + CALL ZGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, AP( KC+1 ), 1, ONE, B( K, 1 ), LDB ) + CALL ZGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, AP( KC-( N-K ) ), 1, ONE, B( K-1, 1 ), + $ LDB ) + END IF +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + KC = KC - ( N-K+2 ) + K = K - 2 + END IF +* + GO TO 90 + 100 CONTINUE + END IF +* + RETURN +* +* End of ZSPTRS +* + END diff --git a/costa/native/external/lapack/zstedc.f b/costa/native/external/lapack/zstedc.f new file mode 100644 index 000000000..c992efd73 --- /dev/null +++ b/costa/native/external/lapack/zstedc.f @@ -0,0 +1,390 @@ + SUBROUTINE ZSTEDC( COMPZ, N, D, E, Z, LDZ, WORK, LWORK, RWORK, + $ LRWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER COMPZ + INTEGER INFO, LDZ, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION D( * ), E( * ), RWORK( * ) + COMPLEX*16 WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* ZSTEDC computes all eigenvalues and, optionally, eigenvectors of a +* symmetric tridiagonal matrix using the divide and conquer method. +* The eigenvectors of a full or band complex Hermitian matrix can also +* be found if ZHETRD or ZHPTRD or ZHBTRD has been used to reduce this +* matrix to tridiagonal form. +* +* This code makes very mild assumptions about floating point +* arithmetic. It will work on machines with a guard digit in +* add/subtract, or on those binary machines without guard digits +* which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. +* It could conceivably fail on hexadecimal or decimal machines +* without guard digits, but we know of none. See DLAED3 for details. +* +* Arguments +* ========= +* +* COMPZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only. +* = 'I': Compute eigenvectors of tridiagonal matrix also. +* = 'V': Compute eigenvectors of original Hermitian matrix +* also. On entry, Z contains the unitary matrix used +* to reduce the original matrix to tridiagonal form. +* +* N (input) INTEGER +* The dimension of the symmetric tridiagonal matrix. N >= 0. +* +* D (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the diagonal elements of the tridiagonal matrix. +* On exit, if INFO = 0, the eigenvalues in ascending order. +* +* E (input/output) DOUBLE PRECISION array, dimension (N-1) +* On entry, the subdiagonal elements of the tridiagonal matrix. +* On exit, E has been destroyed. +* +* Z (input/output) COMPLEX*16 array, dimension (LDZ,N) +* On entry, if COMPZ = 'V', then Z contains the unitary +* matrix used in the reduction to tridiagonal form. +* On exit, if INFO = 0, then if COMPZ = 'V', Z contains the +* orthonormal eigenvectors of the original Hermitian matrix, +* and if COMPZ = 'I', Z contains the orthonormal eigenvectors +* of the symmetric tridiagonal matrix. +* If COMPZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1. +* If eigenvectors are desired, then LDZ >= max(1,N). +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If COMPZ = 'N' or 'I', or N <= 1, LWORK must be at least 1. +* If COMPZ = 'V' and N > 1, LWORK must be at least N*N. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace/output) DOUBLE PRECISION array, +* dimension (LRWORK) +* On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. +* +* LRWORK (input) INTEGER +* The dimension of the array RWORK. +* If COMPZ = 'N' or N <= 1, LRWORK must be at least 1. +* If COMPZ = 'V' and N > 1, LRWORK must be at least +* 1 + 3*N + 2*N*lg N + 3*N**2 , +* where lg( N ) = smallest integer k such +* that 2**k >= N. +* If COMPZ = 'I' and N > 1, LRWORK must be at least +* 1 + 4*N + 2*N**2 . +* +* If LRWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the RWORK array, +* returns this value as the first entry of the RWORK array, and +* no error message related to LRWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array IWORK. +* If COMPZ = 'N' or N <= 1, LIWORK must be at least 1. +* If COMPZ = 'V' or N > 1, LIWORK must be at least +* 6 + 6*N + 5*N*lg N. +* If COMPZ = 'I' or N > 1, LIWORK must be at least +* 3 + 5*N . +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* > 0: The algorithm failed to compute an eigenvalue while +* working on the submatrix lying in rows and columns +* INFO/(N+1) through mod(INFO,N+1). +* +* Further Details +* =============== +* +* Based on contributions by +* Jeff Rutter, Computer Science Division, University of California +* at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER END, I, ICOMPZ, II, J, K, LGN, LIWMIN, LL, + $ LRWMIN, LWMIN, M, SMLSIZ, START + DOUBLE PRECISION EPS, ORGNRM, P, TINY +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANST + EXTERNAL LSAME, ILAENV, DLAMCH, DLANST +* .. +* .. External Subroutines .. + EXTERNAL DLASCL, DLASET, DSTEDC, DSTEQR, DSTERF, XERBLA, + $ ZLACPY, ZLACRM, ZLAED0, ZSTEQR, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, INT, LOG, MAX, MOD, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ICOMPZ = 0 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ICOMPZ = 2 + ELSE + ICOMPZ = -1 + END IF + IF( N.LE.1 .OR. ICOMPZ.LE.0 ) THEN + LWMIN = 1 + LIWMIN = 1 + LRWMIN = 1 + ELSE + LGN = INT( LOG( DBLE( N ) ) / LOG( TWO ) ) + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IF( ICOMPZ.EQ.1 ) THEN + LWMIN = N*N + LRWMIN = 1 + 3*N + 2*N*LGN + 3*N**2 + LIWMIN = 6 + 6*N + 5*N*LGN + ELSE IF( ICOMPZ.EQ.2 ) THEN + LWMIN = 1 + LRWMIN = 1 + 4*N + 2*N**2 + LIWMIN = 3 + 5*N + END IF + END IF + IF( ICOMPZ.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ( LDZ.LT.1 ) .OR. ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1, + $ N ) ) ) THEN + INFO = -6 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -8 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSTEDC', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + IF( N.EQ.1 ) THEN + IF( ICOMPZ.NE.0 ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* + SMLSIZ = ILAENV( 9, 'ZSTEDC', ' ', 0, 0, 0, 0 ) +* +* If the following conditional clause is removed, then the routine +* will use the Divide and Conquer routine to compute only the +* eigenvalues, which requires (3N + 3N**2) real workspace and +* (2 + 5N + 2N lg(N)) integer workspace. +* Since on many architectures DSTERF is much faster than any other +* algorithm for finding eigenvalues only, it is used here +* as the default. +* +* If COMPZ = 'N', use DSTERF to compute the eigenvalues. +* + IF( ICOMPZ.EQ.0 ) THEN + CALL DSTERF( N, D, E, INFO ) + RETURN + END IF +* +* If N is smaller than the minimum divide size (SMLSIZ+1), then +* solve the problem with another solver. +* + IF( N.LE.SMLSIZ ) THEN + IF( ICOMPZ.EQ.0 ) THEN + CALL DSTERF( N, D, E, INFO ) + RETURN + ELSE IF( ICOMPZ.EQ.2 ) THEN + CALL ZSTEQR( 'I', N, D, E, Z, LDZ, RWORK, INFO ) + RETURN + ELSE + CALL ZSTEQR( 'V', N, D, E, Z, LDZ, RWORK, INFO ) + RETURN + END IF + END IF +* +* If COMPZ = 'I', we simply call DSTEDC instead. +* + IF( ICOMPZ.EQ.2 ) THEN + CALL DLASET( 'Full', N, N, ZERO, ONE, RWORK, N ) + LL = N*N + 1 + CALL DSTEDC( 'I', N, D, E, RWORK, N, RWORK( LL ), LRWORK-LL+1, + $ IWORK, LIWORK, INFO ) + DO 20 J = 1, N + DO 10 I = 1, N + Z( I, J ) = RWORK( ( J-1 )*N+I ) + 10 CONTINUE + 20 CONTINUE + RETURN + END IF +* +* From now on, only option left to be handled is COMPZ = 'V', +* i.e. ICOMPZ = 1. +* +* Scale. +* + ORGNRM = DLANST( 'M', N, D, E ) + IF( ORGNRM.EQ.ZERO ) + $ RETURN +* + EPS = DLAMCH( 'Epsilon' ) +* + START = 1 +* +* while ( START <= N ) +* + 30 CONTINUE + IF( START.LE.N ) THEN +* +* Let END be the position of the next subdiagonal entry such that +* E( END ) <= TINY or END = N if no such subdiagonal exists. The +* matrix identified by the elements between START and END +* constitutes an independent sub-problem. +* + END = START + 40 CONTINUE + IF( END.LT.N ) THEN + TINY = EPS*SQRT( ABS( D( END ) ) )*SQRT( ABS( D( END+1 ) ) ) + IF( ABS( E( END ) ).GT.TINY ) THEN + END = END + 1 + GO TO 40 + END IF + END IF +* +* (Sub) Problem determined. Compute its size and solve it. +* + M = END - START + 1 + IF( M.GT.SMLSIZ ) THEN + INFO = SMLSIZ +* +* Scale. +* + ORGNRM = DLANST( 'M', M, D( START ), E( START ) ) + CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, M, 1, D( START ), M, + $ INFO ) + CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, M-1, 1, E( START ), + $ M-1, INFO ) +* + CALL ZLAED0( N, M, D( START ), E( START ), Z( 1, START ), + $ LDZ, WORK, N, RWORK, IWORK, INFO ) + IF( INFO.GT.0 ) THEN + INFO = ( INFO / ( M+1 )+START-1 )*( N+1 ) + + $ MOD( INFO, ( M+1 ) ) + START - 1 + RETURN + END IF +* +* Scale back. +* + CALL DLASCL( 'G', 0, 0, ONE, ORGNRM, M, 1, D( START ), M, + $ INFO ) +* + ELSE + CALL DSTEQR( 'I', M, D( START ), E( START ), RWORK, M, + $ RWORK( M*M+1 ), INFO ) + CALL ZLACRM( N, M, Z( 1, START ), LDZ, RWORK, M, WORK, N, + $ RWORK( M*M+1 ) ) + CALL ZLACPY( 'A', N, M, WORK, N, Z( 1, START ), LDZ ) + IF( INFO.GT.0 ) THEN + INFO = START*( N+1 ) + END + RETURN + END IF + END IF +* + START = END + 1 + GO TO 30 + END IF +* +* endwhile +* +* If the problem split any number of times, then the eigenvalues +* will not be properly ordered. Here we permute the eigenvalues +* (and the associated eigenvectors) into ascending order. +* + IF( M.NE.N ) THEN +* +* Use Selection Sort to minimize swaps of eigenvectors +* + DO 60 II = 2, N + I = II - 1 + K = I + P = D( I ) + DO 50 J = II, N + IF( D( J ).LT.P ) THEN + K = J + P = D( J ) + END IF + 50 CONTINUE + IF( K.NE.I ) THEN + D( K ) = D( I ) + D( I ) = P + CALL ZSWAP( N, Z( 1, I ), 1, Z( 1, K ), 1 ) + END IF + 60 CONTINUE + END IF +* + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of ZSTEDC +* + END diff --git a/costa/native/external/lapack/zstegr.f b/costa/native/external/lapack/zstegr.f new file mode 100644 index 000000000..5188f76a0 --- /dev/null +++ b/costa/native/external/lapack/zstegr.f @@ -0,0 +1,405 @@ + SUBROUTINE ZSTEGR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, ABSTOL, + $ M, W, Z, LDZ, ISUPPZ, WORK, LWORK, IWORK, + $ LIWORK, INFO ) +* +* -- LAPACK computational routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE + INTEGER IL, INFO, IU, LDZ, LIWORK, LWORK, M, N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER ISUPPZ( * ), IWORK( * ) + DOUBLE PRECISION D( * ), E( * ), W( * ), WORK( * ) + COMPLEX*16 Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* ZSTEGR computes selected eigenvalues and, optionally, eigenvectors +* of a real symmetric tridiagonal matrix T. Eigenvalues and +* eigenvectors can be selected by specifying either a range of values +* or a range of indices for the desired eigenvalues. The eigenvalues +* are computed by the dqds algorithm, while orthogonal eigenvectors are +* computed from various ``good'' L D L^T representations (also known as +* Relatively Robust Representations). Gram-Schmidt orthogonalization is +* avoided as far as possible. More specifically, the various steps of +* the algorithm are as follows. For the i-th unreduced block of T, +* (a) Compute T - sigma_i = L_i D_i L_i^T, such that L_i D_i L_i^T +* is a relatively robust representation, +* (b) Compute the eigenvalues, lambda_j, of L_i D_i L_i^T to high +* relative accuracy by the dqds algorithm, +* (c) If there is a cluster of close eigenvalues, "choose" sigma_i +* close to the cluster, and go to step (a), +* (d) Given the approximate eigenvalue lambda_j of L_i D_i L_i^T, +* compute the corresponding eigenvector by forming a +* rank-revealing twisted factorization. +* The desired accuracy of the output can be specified by the input +* parameter ABSTOL. +* +* For more details, see "A new O(n^2) algorithm for the symmetric +* tridiagonal eigenvalue/eigenvector problem", by Inderjit Dhillon, +* Computer Science Division Technical Report No. UCB/CSD-97-971, +* UC Berkeley, May 1997. +* +* Note 1 : Currently ZSTEGR is only set up to find ALL the n +* eigenvalues and eigenvectors of T in O(n^2) time +* Note 2 : Currently the routine ZSTEIN is called when an appropriate +* sigma_i cannot be chosen in step (c) above. ZSTEIN invokes modified +* Gram-Schmidt when eigenvalues are close. +* Note 3 : ZSTEGR works only on machines which follow ieee-754 +* floating-point standard in their handling of infinities and NaNs. +* Normal execution of ZSTEGR may create NaNs and infinities and hence +* may abort due to a floating point exception in environments which +* do not conform to the ieee standard. +* +* Arguments +* ========= +* +* JOBZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only; +* = 'V': Compute eigenvalues and eigenvectors. +* +* RANGE (input) CHARACTER*1 +* = 'A': all eigenvalues will be found. +* = 'V': all eigenvalues in the half-open interval (VL,VU] +* will be found. +* = 'I': the IL-th through IU-th eigenvalues will be found. +********** Only RANGE = 'A' is currently supported ********************* +* +* N (input) INTEGER +* The order of the matrix. N >= 0. +* +* D (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the n diagonal elements of the tridiagonal matrix +* T. On exit, D is overwritten. +* +* E (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the (n-1) subdiagonal elements of the tridiagonal +* matrix T in elements 1 to N-1 of E; E(N) need not be set. +* On exit, E is overwritten. +* +* VL (input) DOUBLE PRECISION +* VU (input) DOUBLE PRECISION +* If RANGE='V', the lower and upper bounds of the interval to +* be searched for eigenvalues. VL < VU. +* Not referenced if RANGE = 'A' or 'I'. +* +* IL (input) INTEGER +* IU (input) INTEGER +* If RANGE='I', the indices (in ascending order) of the +* smallest and largest eigenvalues to be returned. +* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +* Not referenced if RANGE = 'A' or 'V'. +* +* ABSTOL (input) DOUBLE PRECISION +* The absolute error tolerance for the +* eigenvalues/eigenvectors. IF JOBZ = 'V', the eigenvalues and +* eigenvectors output have residual norms bounded by ABSTOL, +* and the dot products between different eigenvectors are +* bounded by ABSTOL. If ABSTOL is less than N*EPS*|T|, then +* N*EPS*|T| will be used in its place, where EPS is the +* machine precision and |T| is the 1-norm of the tridiagonal +* matrix. The eigenvalues are computed to an accuracy of +* EPS*|T| irrespective of ABSTOL. If high relative accuracy +* is important, set ABSTOL to DLAMCH( 'Safe minimum' ). +* See Barlow and Demmel "Computing Accurate Eigensystems of +* Scaled Diagonally Dominant Matrices", LAPACK Working Note #7 +* for a discussion of which matrices define their eigenvalues +* to high relative accuracy. +* +* M (output) INTEGER +* The total number of eigenvalues found. 0 <= M <= N. +* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +* +* W (output) DOUBLE PRECISION array, dimension (N) +* The first M elements contain the selected eigenvalues in +* ascending order. +* +* Z (output) COMPLEX*16 array, dimension (LDZ, max(1,M) ) +* If JOBZ = 'V', then if INFO = 0, the first M columns of Z +* contain the orthonormal eigenvectors of the matrix T +* corresponding to the selected eigenvalues, with the i-th +* column of Z holding the eigenvector associated with W(i). +* If JOBZ = 'N', then Z is not referenced. +* Note: the user must ensure that at least max(1,M) columns are +* supplied in the array Z; if RANGE = 'V', the exact value of M +* is not known in advance and an upper bound must be used. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* JOBZ = 'V', LDZ >= max(1,N). +* +* ISUPPZ (output) INTEGER ARRAY, dimension ( 2*max(1,M) ) +* The support of the eigenvectors in Z, i.e., the indices +* indicating the nonzero elements in Z. The i-th eigenvector +* is nonzero only in elements ISUPPZ( 2*i-1 ) through +* ISUPPZ( 2*i ). +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal +* (and minimal) LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,18*N) +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER array, dimension (LIWORK) +* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array IWORK. LIWORK >= max(1,10*N) +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = 1, internal error in DLARRE, +* if INFO = 2, internal error in ZLARRV. +* +* Further Details +* =============== +* +* Based on contributions by +* Inderjit Dhillon, IBM Almaden, USA +* Osni Marques, LBNL/NERSC, USA +* Ken Stanley, Computer Science Division, University of +* California at Berkeley, USA +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) + COMPLEX*16 CZERO + PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LQUERY, VALEIG, WANTZ + INTEGER I, IBEGIN, IEND, IINDBL, IINDWK, IINFO, IINSPL, + $ INDGRS, INDWOF, INDWRK, ITMP, J, JJ, LIWMIN, + $ LWMIN, NSPLIT + DOUBLE PRECISION BIGNUM, EPS, RMAX, RMIN, SAFMIN, SCALE, SMLNUM, + $ THRESH, TMP, TNRM, TOL +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANST + EXTERNAL LSAME, DLAMCH, DLANST +* .. +* .. External Subroutines .. + EXTERNAL DLARRE, DSCAL, XERBLA, ZLARRV, ZLASET, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + LQUERY = ( ( LWORK.EQ.-1 ) .OR. ( LIWORK.EQ.-1 ) ) + LWMIN = 18*N + LIWMIN = 10*N +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 +* +* The following two lines need to be removed once the +* RANGE = 'V' and RANGE = 'I' options are provided. +* + ELSE IF( VALEIG .OR. INDEIG ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN + INFO = -7 + ELSE IF( INDEIG .AND. IL.LT.1 ) THEN + INFO = -8 +* The following change should be made in DSTEVX also, otherwise +* IL can be specified as N+1 and IU as N. +* ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) THEN + ELSE IF( INDEIG .AND. ( IU.LT.IL .OR. IU.GT.N ) ) THEN + INFO = -9 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -14 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -17 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -19 + END IF + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSTEGR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = D( 1 ) + ELSE + IF( VL.LT.D( 1 ) .AND. VU.GE.D( 1 ) ) THEN + M = 1 + W( 1 ) = D( 1 ) + END IF + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + SCALE = ONE + TNRM = DLANST( 'M', N, D, E ) + IF( TNRM.GT.ZERO .AND. TNRM.LT.RMIN ) THEN + SCALE = RMIN / TNRM + ELSE IF( TNRM.GT.RMAX ) THEN + SCALE = RMAX / TNRM + END IF + IF( SCALE.NE.ONE ) THEN + CALL DSCAL( N, SCALE, D, 1 ) + CALL DSCAL( N-1, SCALE, E, 1 ) + TNRM = TNRM*SCALE + END IF + INDGRS = 1 + INDWOF = 2*N + 1 + INDWRK = 3*N + 1 +* + IINSPL = 1 + IINDBL = N + 1 + IINDWK = 2*N + 1 +* + CALL ZLASET( 'Full', N, N, CZERO, CZERO, Z, LDZ ) +* +* Compute the desired eigenvalues of the tridiagonal after splitting +* into smaller subblocks if the corresponding of-diagonal elements +* are small +* + THRESH = EPS*TNRM + CALL DLARRE( N, D, E, THRESH, NSPLIT, IWORK( IINSPL ), M, W, + $ WORK( INDWOF ), WORK( INDGRS ), WORK( INDWRK ), + $ IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = 1 + RETURN + END IF +* + IF( WANTZ ) THEN +* +* Compute the desired eigenvectors corresponding to the computed +* eigenvalues +* + TOL = MAX( ABSTOL, DBLE( N )*THRESH ) + IBEGIN = 1 + DO 20 I = 1, NSPLIT + IEND = IWORK( IINSPL+I-1 ) + DO 10 J = IBEGIN, IEND + IWORK( IINDBL+J-1 ) = I + 10 CONTINUE + IBEGIN = IEND + 1 + 20 CONTINUE +* + CALL ZLARRV( N, D, E, IWORK( IINSPL ), M, W, IWORK( IINDBL ), + $ WORK( INDGRS ), TOL, Z, LDZ, ISUPPZ, + $ WORK( INDWRK ), IWORK( IINDWK ), IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = 2 + RETURN + END IF +* + END IF +* + IBEGIN = 1 + DO 40 I = 1, NSPLIT + IEND = IWORK( IINSPL+I-1 ) + DO 30 J = IBEGIN, IEND + W( J ) = W( J ) + WORK( INDWOF+I-1 ) + 30 CONTINUE + IBEGIN = IEND + 1 + 40 CONTINUE +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( SCALE.NE.ONE ) THEN + CALL DSCAL( M, ONE / SCALE, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( NSPLIT.GT.1 ) THEN + DO 60 J = 1, M - 1 + I = 0 + TMP = W( J ) + DO 50 JJ = J + 1, M + IF( W( JJ ).LT.TMP ) THEN + I = JJ + TMP = W( JJ ) + END IF + 50 CONTINUE + IF( I.NE.0 ) THEN + W( I ) = W( J ) + W( J ) = TMP + IF( WANTZ ) THEN + CALL ZSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + ITMP = ISUPPZ( 2*I-1 ) + ISUPPZ( 2*I-1 ) = ISUPPZ( 2*J-1 ) + ISUPPZ( 2*J-1 ) = ITMP + ITMP = ISUPPZ( 2*I ) + ISUPPZ( 2*I ) = ISUPPZ( 2*J ) + ISUPPZ( 2*J ) = ITMP + END IF + END IF + 60 CONTINUE + END IF +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of ZSTEGR +* + END diff --git a/costa/native/external/lapack/zstein.f b/costa/native/external/lapack/zstein.f new file mode 100644 index 000000000..e921df06e --- /dev/null +++ b/costa/native/external/lapack/zstein.f @@ -0,0 +1,377 @@ + SUBROUTINE ZSTEIN( N, D, E, M, W, IBLOCK, ISPLIT, Z, LDZ, WORK, + $ IWORK, IFAIL, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, LDZ, M, N +* .. +* .. Array Arguments .. + INTEGER IBLOCK( * ), IFAIL( * ), ISPLIT( * ), + $ IWORK( * ) + DOUBLE PRECISION D( * ), E( * ), W( * ), WORK( * ) + COMPLEX*16 Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* ZSTEIN computes the eigenvectors of a real symmetric tridiagonal +* matrix T corresponding to specified eigenvalues, using inverse +* iteration. +* +* The maximum number of iterations allowed for each eigenvector is +* specified by an internal parameter MAXITS (currently set to 5). +* +* Although the eigenvectors are real, they are stored in a complex +* array, which may be passed to ZUNMTR or ZUPMTR for back +* transformation to the eigenvectors of a complex Hermitian matrix +* which was reduced to tridiagonal form. +* +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix. N >= 0. +* +* D (input) DOUBLE PRECISION array, dimension (N) +* The n diagonal elements of the tridiagonal matrix T. +* +* E (input) DOUBLE PRECISION array, dimension (N) +* The (n-1) subdiagonal elements of the tridiagonal matrix +* T, stored in elements 1 to N-1; E(N) need not be set. +* +* M (input) INTEGER +* The number of eigenvectors to be found. 0 <= M <= N. +* +* W (input) DOUBLE PRECISION array, dimension (N) +* The first M elements of W contain the eigenvalues for +* which eigenvectors are to be computed. The eigenvalues +* should be grouped by split-off block and ordered from +* smallest to largest within the block. ( The output array +* W from DSTEBZ with ORDER = 'B' is expected here. ) +* +* IBLOCK (input) INTEGER array, dimension (N) +* The submatrix indices associated with the corresponding +* eigenvalues in W; IBLOCK(i)=1 if eigenvalue W(i) belongs to +* the first submatrix from the top, =2 if W(i) belongs to +* the second submatrix, etc. ( The output array IBLOCK +* from DSTEBZ is expected here. ) +* +* ISPLIT (input) INTEGER array, dimension (N) +* The splitting points, at which T breaks up into submatrices. +* The first submatrix consists of rows/columns 1 to +* ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 +* through ISPLIT( 2 ), etc. +* ( The output array ISPLIT from DSTEBZ is expected here. ) +* +* Z (output) COMPLEX*16 array, dimension (LDZ, M) +* The computed eigenvectors. The eigenvector associated +* with the eigenvalue W(i) is stored in the i-th column of +* Z. Any vector which fails to converge is set to its current +* iterate after MAXITS iterations. +* The imaginary parts of the eigenvectors are set to zero. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= max(1,N). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (5*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* IFAIL (output) INTEGER array, dimension (M) +* On normal exit, all elements of IFAIL are zero. +* If one or more eigenvectors fail to converge after +* MAXITS iterations, then their indices are stored in +* array IFAIL. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, then i eigenvectors failed to converge +* in MAXITS iterations. Their indices are stored in +* array IFAIL. +* +* Internal Parameters +* =================== +* +* MAXITS INTEGER, default = 5 +* The maximum number of iterations performed. +* +* EXTRA INTEGER, default = 2 +* The number of iterations performed after norm growth +* criterion is satisfied, should be at least 1. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) + DOUBLE PRECISION ZERO, ONE, TEN, ODM3, ODM1 + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TEN = 1.0D+1, + $ ODM3 = 1.0D-3, ODM1 = 1.0D-1 ) + INTEGER MAXITS, EXTRA + PARAMETER ( MAXITS = 5, EXTRA = 2 ) +* .. +* .. Local Scalars .. + INTEGER B1, BLKSIZ, BN, GPIND, I, IINFO, INDRV1, + $ INDRV2, INDRV3, INDRV4, INDRV5, ITS, J, J1, + $ JBLK, JMAX, JR, NBLK, NRMCHK + DOUBLE PRECISION DTPCRT, EPS, EPS1, NRM, ONENRM, ORTOL, PERTOL, + $ SCL, SEP, TOL, XJ, XJM, ZTR +* .. +* .. Local Arrays .. + INTEGER ISEED( 4 ) +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DASUM, DLAMCH, DNRM2 + EXTERNAL IDAMAX, DASUM, DLAMCH, DNRM2 +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLAGTF, DLAGTS, DLARNV, DSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + DO 10 I = 1, M + IFAIL( I ) = 0 + 10 CONTINUE +* + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( M.LT.0 .OR. M.GT.N ) THEN + INFO = -4 + ELSE IF( LDZ.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE + DO 20 J = 2, M + IF( IBLOCK( J ).LT.IBLOCK( J-1 ) ) THEN + INFO = -6 + GO TO 30 + END IF + IF( IBLOCK( J ).EQ.IBLOCK( J-1 ) .AND. W( J ).LT.W( J-1 ) ) + $ THEN + INFO = -5 + GO TO 30 + END IF + 20 CONTINUE + 30 CONTINUE + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSTEIN', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. M.EQ.0 ) THEN + RETURN + ELSE IF( N.EQ.1 ) THEN + Z( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + EPS = DLAMCH( 'Precision' ) +* +* Initialize seed for random number generator DLARNV. +* + DO 40 I = 1, 4 + ISEED( I ) = 1 + 40 CONTINUE +* +* Initialize pointers. +* + INDRV1 = 0 + INDRV2 = INDRV1 + N + INDRV3 = INDRV2 + N + INDRV4 = INDRV3 + N + INDRV5 = INDRV4 + N +* +* Compute eigenvectors of matrix blocks. +* + J1 = 1 + DO 180 NBLK = 1, IBLOCK( M ) +* +* Find starting and ending indices of block nblk. +* + IF( NBLK.EQ.1 ) THEN + B1 = 1 + ELSE + B1 = ISPLIT( NBLK-1 ) + 1 + END IF + BN = ISPLIT( NBLK ) + BLKSIZ = BN - B1 + 1 + IF( BLKSIZ.EQ.1 ) + $ GO TO 60 + GPIND = B1 +* +* Compute reorthogonalization criterion and stopping criterion. +* + ONENRM = ABS( D( B1 ) ) + ABS( E( B1 ) ) + ONENRM = MAX( ONENRM, ABS( D( BN ) )+ABS( E( BN-1 ) ) ) + DO 50 I = B1 + 1, BN - 1 + ONENRM = MAX( ONENRM, ABS( D( I ) )+ABS( E( I-1 ) )+ + $ ABS( E( I ) ) ) + 50 CONTINUE + ORTOL = ODM3*ONENRM +* + DTPCRT = SQRT( ODM1 / BLKSIZ ) +* +* Loop through eigenvalues of block nblk. +* + 60 CONTINUE + JBLK = 0 + DO 170 J = J1, M + IF( IBLOCK( J ).NE.NBLK ) THEN + J1 = J + GO TO 180 + END IF + JBLK = JBLK + 1 + XJ = W( J ) +* +* Skip all the work if the block size is one. +* + IF( BLKSIZ.EQ.1 ) THEN + WORK( INDRV1+1 ) = ONE + GO TO 140 + END IF +* +* If eigenvalues j and j-1 are too close, add a relatively +* small perturbation. +* + IF( JBLK.GT.1 ) THEN + EPS1 = ABS( EPS*XJ ) + PERTOL = TEN*EPS1 + SEP = XJ - XJM + IF( SEP.LT.PERTOL ) + $ XJ = XJM + PERTOL + END IF +* + ITS = 0 + NRMCHK = 0 +* +* Get random starting vector. +* + CALL DLARNV( 2, ISEED, BLKSIZ, WORK( INDRV1+1 ) ) +* +* Copy the matrix T so it won't be destroyed in factorization. +* + CALL DCOPY( BLKSIZ, D( B1 ), 1, WORK( INDRV4+1 ), 1 ) + CALL DCOPY( BLKSIZ-1, E( B1 ), 1, WORK( INDRV2+2 ), 1 ) + CALL DCOPY( BLKSIZ-1, E( B1 ), 1, WORK( INDRV3+1 ), 1 ) +* +* Compute LU factors with partial pivoting ( PT = LU ) +* + TOL = ZERO + CALL DLAGTF( BLKSIZ, WORK( INDRV4+1 ), XJ, WORK( INDRV2+2 ), + $ WORK( INDRV3+1 ), TOL, WORK( INDRV5+1 ), IWORK, + $ IINFO ) +* +* Update iteration count. +* + 70 CONTINUE + ITS = ITS + 1 + IF( ITS.GT.MAXITS ) + $ GO TO 120 +* +* Normalize and scale the righthand side vector Pb. +* + SCL = BLKSIZ*ONENRM*MAX( EPS, + $ ABS( WORK( INDRV4+BLKSIZ ) ) ) / + $ DASUM( BLKSIZ, WORK( INDRV1+1 ), 1 ) + CALL DSCAL( BLKSIZ, SCL, WORK( INDRV1+1 ), 1 ) +* +* Solve the system LU = Pb. +* + CALL DLAGTS( -1, BLKSIZ, WORK( INDRV4+1 ), WORK( INDRV2+2 ), + $ WORK( INDRV3+1 ), WORK( INDRV5+1 ), IWORK, + $ WORK( INDRV1+1 ), TOL, IINFO ) +* +* Reorthogonalize by modified Gram-Schmidt if eigenvalues are +* close enough. +* + IF( JBLK.EQ.1 ) + $ GO TO 110 + IF( ABS( XJ-XJM ).GT.ORTOL ) + $ GPIND = J + IF( GPIND.NE.J ) THEN + DO 100 I = GPIND, J - 1 + ZTR = ZERO + DO 80 JR = 1, BLKSIZ + ZTR = ZTR + WORK( INDRV1+JR )* + $ DBLE( Z( B1-1+JR, I ) ) + 80 CONTINUE + DO 90 JR = 1, BLKSIZ + WORK( INDRV1+JR ) = WORK( INDRV1+JR ) - + $ ZTR*DBLE( Z( B1-1+JR, I ) ) + 90 CONTINUE + 100 CONTINUE + END IF +* +* Check the infinity norm of the iterate. +* + 110 CONTINUE + JMAX = IDAMAX( BLKSIZ, WORK( INDRV1+1 ), 1 ) + NRM = ABS( WORK( INDRV1+JMAX ) ) +* +* Continue for additional iterations after norm reaches +* stopping criterion. +* + IF( NRM.LT.DTPCRT ) + $ GO TO 70 + NRMCHK = NRMCHK + 1 + IF( NRMCHK.LT.EXTRA+1 ) + $ GO TO 70 +* + GO TO 130 +* +* If stopping criterion was not satisfied, update info and +* store eigenvector number in array ifail. +* + 120 CONTINUE + INFO = INFO + 1 + IFAIL( INFO ) = J +* +* Accept iterate as jth eigenvector. +* + 130 CONTINUE + SCL = ONE / DNRM2( BLKSIZ, WORK( INDRV1+1 ), 1 ) + JMAX = IDAMAX( BLKSIZ, WORK( INDRV1+1 ), 1 ) + IF( WORK( INDRV1+JMAX ).LT.ZERO ) + $ SCL = -SCL + CALL DSCAL( BLKSIZ, SCL, WORK( INDRV1+1 ), 1 ) + 140 CONTINUE + DO 150 I = 1, N + Z( I, J ) = CZERO + 150 CONTINUE + DO 160 I = 1, BLKSIZ + Z( B1+I-1, J ) = DCMPLX( WORK( INDRV1+I ), ZERO ) + 160 CONTINUE +* +* Save the shift to check eigenvalue spacing at next +* iteration. +* + XJM = XJ +* + 170 CONTINUE + 180 CONTINUE +* + RETURN +* +* End of ZSTEIN +* + END diff --git a/costa/native/external/lapack/zsteqr.f b/costa/native/external/lapack/zsteqr.f new file mode 100644 index 000000000..8dc609e4e --- /dev/null +++ b/costa/native/external/lapack/zsteqr.f @@ -0,0 +1,504 @@ + SUBROUTINE ZSTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER COMPZ + INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ), WORK( * ) + COMPLEX*16 Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* ZSTEQR computes all eigenvalues and, optionally, eigenvectors of a +* symmetric tridiagonal matrix using the implicit QL or QR method. +* The eigenvectors of a full or band complex Hermitian matrix can also +* be found if ZHETRD or ZHPTRD or ZHBTRD has been used to reduce this +* matrix to tridiagonal form. +* +* Arguments +* ========= +* +* COMPZ (input) CHARACTER*1 +* = 'N': Compute eigenvalues only. +* = 'V': Compute eigenvalues and eigenvectors of the original +* Hermitian matrix. On entry, Z must contain the +* unitary matrix used to reduce the original matrix +* to tridiagonal form. +* = 'I': Compute eigenvalues and eigenvectors of the +* tridiagonal matrix. Z is initialized to the identity +* matrix. +* +* N (input) INTEGER +* The order of the matrix. N >= 0. +* +* D (input/output) DOUBLE PRECISION array, dimension (N) +* On entry, the diagonal elements of the tridiagonal matrix. +* On exit, if INFO = 0, the eigenvalues in ascending order. +* +* E (input/output) DOUBLE PRECISION array, dimension (N-1) +* On entry, the (n-1) subdiagonal elements of the tridiagonal +* matrix. +* On exit, E has been destroyed. +* +* Z (input/output) COMPLEX*16 array, dimension (LDZ, N) +* On entry, if COMPZ = 'V', then Z contains the unitary +* matrix used in the reduction to tridiagonal form. +* On exit, if INFO = 0, then if COMPZ = 'V', Z contains the +* orthonormal eigenvectors of the original Hermitian matrix, +* and if COMPZ = 'I', Z contains the orthonormal eigenvectors +* of the symmetric tridiagonal matrix. +* If COMPZ = 'N', then Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1, and if +* eigenvectors are desired, then LDZ >= max(1,N). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (max(1,2*N-2)) +* If COMPZ = 'N', then WORK is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: the algorithm has failed to find all the eigenvalues in +* a total of 30*N iterations; if INFO = i, then i +* elements of E have not converged to zero; on exit, D +* and E contain the elements of a symmetric tridiagonal +* matrix which is unitarily similar to the original +* matrix. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO, THREE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0, + $ THREE = 3.0D0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ), + $ CONE = ( 1.0D0, 0.0D0 ) ) + INTEGER MAXIT + PARAMETER ( MAXIT = 30 ) +* .. +* .. Local Scalars .. + INTEGER I, ICOMPZ, II, ISCALE, J, JTOT, K, L, L1, LEND, + $ LENDM1, LENDP1, LENDSV, LM1, LSV, M, MM, MM1, + $ NM1, NMAXIT + DOUBLE PRECISION ANORM, B, C, EPS, EPS2, F, G, P, R, RT1, RT2, + $ S, SAFMAX, SAFMIN, SSFMAX, SSFMIN, TST +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANST, DLAPY2 + EXTERNAL LSAME, DLAMCH, DLANST, DLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL DLAE2, DLAEV2, DLARTG, DLASCL, DLASRT, XERBLA, + $ ZLASET, ZLASR, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ICOMPZ = 0 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ICOMPZ = 2 + ELSE + ICOMPZ = -1 + END IF + IF( ICOMPZ.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ( LDZ.LT.1 ) .OR. ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1, + $ N ) ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSTEQR', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ICOMPZ.EQ.2 ) + $ Z( 1, 1 ) = CONE + RETURN + END IF +* +* Determine the unit roundoff and over/underflow thresholds. +* + EPS = DLAMCH( 'E' ) + EPS2 = EPS**2 + SAFMIN = DLAMCH( 'S' ) + SAFMAX = ONE / SAFMIN + SSFMAX = SQRT( SAFMAX ) / THREE + SSFMIN = SQRT( SAFMIN ) / EPS2 +* +* Compute the eigenvalues and eigenvectors of the tridiagonal +* matrix. +* + IF( ICOMPZ.EQ.2 ) + $ CALL ZLASET( 'Full', N, N, CZERO, CONE, Z, LDZ ) +* + NMAXIT = N*MAXIT + JTOT = 0 +* +* Determine where the matrix splits and choose QL or QR iteration +* for each block, according to whether top or bottom diagonal +* element is smaller. +* + L1 = 1 + NM1 = N - 1 +* + 10 CONTINUE + IF( L1.GT.N ) + $ GO TO 160 + IF( L1.GT.1 ) + $ E( L1-1 ) = ZERO + IF( L1.LE.NM1 ) THEN + DO 20 M = L1, NM1 + TST = ABS( E( M ) ) + IF( TST.EQ.ZERO ) + $ GO TO 30 + IF( TST.LE.( SQRT( ABS( D( M ) ) )*SQRT( ABS( D( M+ + $ 1 ) ) ) )*EPS ) THEN + E( M ) = ZERO + GO TO 30 + END IF + 20 CONTINUE + END IF + M = N +* + 30 CONTINUE + L = L1 + LSV = L + LEND = M + LENDSV = LEND + L1 = M + 1 + IF( LEND.EQ.L ) + $ GO TO 10 +* +* Scale submatrix in rows and columns L to LEND +* + ANORM = DLANST( 'I', LEND-L+1, D( L ), E( L ) ) + ISCALE = 0 + IF( ANORM.EQ.ZERO ) + $ GO TO 10 + IF( ANORM.GT.SSFMAX ) THEN + ISCALE = 1 + CALL DLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L+1, 1, D( L ), N, + $ INFO ) + CALL DLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L, 1, E( L ), N, + $ INFO ) + ELSE IF( ANORM.LT.SSFMIN ) THEN + ISCALE = 2 + CALL DLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L+1, 1, D( L ), N, + $ INFO ) + CALL DLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L, 1, E( L ), N, + $ INFO ) + END IF +* +* Choose between QL and QR iteration +* + IF( ABS( D( LEND ) ).LT.ABS( D( L ) ) ) THEN + LEND = LSV + L = LENDSV + END IF +* + IF( LEND.GT.L ) THEN +* +* QL Iteration +* +* Look for small subdiagonal element. +* + 40 CONTINUE + IF( L.NE.LEND ) THEN + LENDM1 = LEND - 1 + DO 50 M = L, LENDM1 + TST = ABS( E( M ) )**2 + IF( TST.LE.( EPS2*ABS( D( M ) ) )*ABS( D( M+1 ) )+ + $ SAFMIN )GO TO 60 + 50 CONTINUE + END IF +* + M = LEND +* + 60 CONTINUE + IF( M.LT.LEND ) + $ E( M ) = ZERO + P = D( L ) + IF( M.EQ.L ) + $ GO TO 80 +* +* If remaining matrix is 2-by-2, use DLAE2 or SLAEV2 +* to compute its eigensystem. +* + IF( M.EQ.L+1 ) THEN + IF( ICOMPZ.GT.0 ) THEN + CALL DLAEV2( D( L ), E( L ), D( L+1 ), RT1, RT2, C, S ) + WORK( L ) = C + WORK( N-1+L ) = S + CALL ZLASR( 'R', 'V', 'B', N, 2, WORK( L ), + $ WORK( N-1+L ), Z( 1, L ), LDZ ) + ELSE + CALL DLAE2( D( L ), E( L ), D( L+1 ), RT1, RT2 ) + END IF + D( L ) = RT1 + D( L+1 ) = RT2 + E( L ) = ZERO + L = L + 2 + IF( L.LE.LEND ) + $ GO TO 40 + GO TO 140 + END IF +* + IF( JTOT.EQ.NMAXIT ) + $ GO TO 140 + JTOT = JTOT + 1 +* +* Form shift. +* + G = ( D( L+1 )-P ) / ( TWO*E( L ) ) + R = DLAPY2( G, ONE ) + G = D( M ) - P + ( E( L ) / ( G+SIGN( R, G ) ) ) +* + S = ONE + C = ONE + P = ZERO +* +* Inner loop +* + MM1 = M - 1 + DO 70 I = MM1, L, -1 + F = S*E( I ) + B = C*E( I ) + CALL DLARTG( G, F, C, S, R ) + IF( I.NE.M-1 ) + $ E( I+1 ) = R + G = D( I+1 ) - P + R = ( D( I )-G )*S + TWO*C*B + P = S*R + D( I+1 ) = G + P + G = C*R - B +* +* If eigenvectors are desired, then save rotations. +* + IF( ICOMPZ.GT.0 ) THEN + WORK( I ) = C + WORK( N-1+I ) = -S + END IF +* + 70 CONTINUE +* +* If eigenvectors are desired, then apply saved rotations. +* + IF( ICOMPZ.GT.0 ) THEN + MM = M - L + 1 + CALL ZLASR( 'R', 'V', 'B', N, MM, WORK( L ), WORK( N-1+L ), + $ Z( 1, L ), LDZ ) + END IF +* + D( L ) = D( L ) - P + E( L ) = G + GO TO 40 +* +* Eigenvalue found. +* + 80 CONTINUE + D( L ) = P +* + L = L + 1 + IF( L.LE.LEND ) + $ GO TO 40 + GO TO 140 +* + ELSE +* +* QR Iteration +* +* Look for small superdiagonal element. +* + 90 CONTINUE + IF( L.NE.LEND ) THEN + LENDP1 = LEND + 1 + DO 100 M = L, LENDP1, -1 + TST = ABS( E( M-1 ) )**2 + IF( TST.LE.( EPS2*ABS( D( M ) ) )*ABS( D( M-1 ) )+ + $ SAFMIN )GO TO 110 + 100 CONTINUE + END IF +* + M = LEND +* + 110 CONTINUE + IF( M.GT.LEND ) + $ E( M-1 ) = ZERO + P = D( L ) + IF( M.EQ.L ) + $ GO TO 130 +* +* If remaining matrix is 2-by-2, use DLAE2 or SLAEV2 +* to compute its eigensystem. +* + IF( M.EQ.L-1 ) THEN + IF( ICOMPZ.GT.0 ) THEN + CALL DLAEV2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2, C, S ) + WORK( M ) = C + WORK( N-1+M ) = S + CALL ZLASR( 'R', 'V', 'F', N, 2, WORK( M ), + $ WORK( N-1+M ), Z( 1, L-1 ), LDZ ) + ELSE + CALL DLAE2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2 ) + END IF + D( L-1 ) = RT1 + D( L ) = RT2 + E( L-1 ) = ZERO + L = L - 2 + IF( L.GE.LEND ) + $ GO TO 90 + GO TO 140 + END IF +* + IF( JTOT.EQ.NMAXIT ) + $ GO TO 140 + JTOT = JTOT + 1 +* +* Form shift. +* + G = ( D( L-1 )-P ) / ( TWO*E( L-1 ) ) + R = DLAPY2( G, ONE ) + G = D( M ) - P + ( E( L-1 ) / ( G+SIGN( R, G ) ) ) +* + S = ONE + C = ONE + P = ZERO +* +* Inner loop +* + LM1 = L - 1 + DO 120 I = M, LM1 + F = S*E( I ) + B = C*E( I ) + CALL DLARTG( G, F, C, S, R ) + IF( I.NE.M ) + $ E( I-1 ) = R + G = D( I ) - P + R = ( D( I+1 )-G )*S + TWO*C*B + P = S*R + D( I ) = G + P + G = C*R - B +* +* If eigenvectors are desired, then save rotations. +* + IF( ICOMPZ.GT.0 ) THEN + WORK( I ) = C + WORK( N-1+I ) = S + END IF +* + 120 CONTINUE +* +* If eigenvectors are desired, then apply saved rotations. +* + IF( ICOMPZ.GT.0 ) THEN + MM = L - M + 1 + CALL ZLASR( 'R', 'V', 'F', N, MM, WORK( M ), WORK( N-1+M ), + $ Z( 1, M ), LDZ ) + END IF +* + D( L ) = D( L ) - P + E( LM1 ) = G + GO TO 90 +* +* Eigenvalue found. +* + 130 CONTINUE + D( L ) = P +* + L = L - 1 + IF( L.GE.LEND ) + $ GO TO 90 + GO TO 140 +* + END IF +* +* Undo scaling if necessary +* + 140 CONTINUE + IF( ISCALE.EQ.1 ) THEN + CALL DLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV+1, 1, + $ D( LSV ), N, INFO ) + CALL DLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV, 1, E( LSV ), + $ N, INFO ) + ELSE IF( ISCALE.EQ.2 ) THEN + CALL DLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV+1, 1, + $ D( LSV ), N, INFO ) + CALL DLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV, 1, E( LSV ), + $ N, INFO ) + END IF +* +* Check for no convergence to an eigenvalue after a total +* of N*MAXIT iterations. +* + IF( JTOT.EQ.NMAXIT ) THEN + DO 150 I = 1, N - 1 + IF( E( I ).NE.ZERO ) + $ INFO = INFO + 1 + 150 CONTINUE + RETURN + END IF + GO TO 10 +* +* Order eigenvalues and eigenvectors. +* + 160 CONTINUE + IF( ICOMPZ.EQ.0 ) THEN +* +* Use Quick Sort +* + CALL DLASRT( 'I', N, D, INFO ) +* + ELSE +* +* Use Selection Sort to minimize swaps of eigenvectors +* + DO 180 II = 2, N + I = II - 1 + K = I + P = D( I ) + DO 170 J = II, N + IF( D( J ).LT.P ) THEN + K = J + P = D( J ) + END IF + 170 CONTINUE + IF( K.NE.I ) THEN + D( K ) = D( I ) + D( I ) = P + CALL ZSWAP( N, Z( 1, I ), 1, Z( 1, K ), 1 ) + END IF + 180 CONTINUE + END IF + RETURN +* +* End of ZSTEQR +* + END diff --git a/costa/native/external/lapack/zsycon.f b/costa/native/external/lapack/zsycon.f new file mode 100644 index 000000000..bc8a95e6a --- /dev/null +++ b/costa/native/external/lapack/zsycon.f @@ -0,0 +1,159 @@ + SUBROUTINE ZSYCON( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZSYCON estimates the reciprocal of the condition number (in the +* 1-norm) of a complex symmetric matrix A using the factorization +* A = U*D*U**T or A = L*D*L**T computed by ZSYTRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the details of the factorization are stored +* as an upper or lower triangular matrix. +* = 'U': Upper triangular, form is A = U*D*U**T; +* = 'L': Lower triangular, form is A = L*D*L**T. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input) COMPLEX*16 array, dimension (LDA,N) +* The block diagonal matrix D and the multipliers used to +* obtain the factor U or L as computed by ZSYTRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by ZSYTRF. +* +* ANORM (input) DOUBLE PRECISION +* The 1-norm of the original matrix A. +* +* RCOND (output) DOUBLE PRECISION +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +* estimate of the 1-norm of inv(A) computed in this routine. +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, KASE + DOUBLE PRECISION AINVNM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLACON, ZSYTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.LE.ZERO ) THEN + RETURN + END IF +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO 10 I = N, 1, -1 + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO 20 I = 1, N + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO ) + $ RETURN + 20 CONTINUE + END IF +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + 30 CONTINUE + CALL ZLACON( N, WORK( N+1 ), WORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN +* +* Multiply by inv(L*D*L') or inv(U*D*U'). +* + CALL ZSYTRS( UPLO, N, 1, A, LDA, IPIV, WORK, N, INFO ) + GO TO 30 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of ZSYCON +* + END diff --git a/costa/native/external/lapack/zsymv.f b/costa/native/external/lapack/zsymv.f new file mode 100644 index 000000000..10451fc0a --- /dev/null +++ b/costa/native/external/lapack/zsymv.f @@ -0,0 +1,265 @@ + SUBROUTINE ZSYMV( UPLO, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INCX, INCY, LDA, N + COMPLEX*16 ALPHA, BETA +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), X( * ), Y( * ) +* .. +* +* Purpose +* ======= +* +* ZSYMV performs the matrix-vector operation +* +* y := alpha*A*x + beta*y, +* +* where alpha and beta are scalars, x and y are n element vectors and +* A is an n by n symmetric matrix. +* +* Arguments +* ========== +* +* UPLO - CHARACTER*1 +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array A is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of A +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of A +* is to be referenced. +* +* Unchanged on exit. +* +* N - INTEGER +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX*16 +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* A - COMPLEX*16 array, dimension ( LDA, N ) +* Before entry, with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular part of the symmetric matrix and the strictly +* lower triangular part of A is not referenced. +* Before entry, with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular part of the symmetric matrix and the strictly +* upper triangular part of A is not referenced. +* Unchanged on exit. +* +* LDA - INTEGER +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, N ). +* Unchanged on exit. +* +* X - COMPLEX*16 array, dimension at least +* ( 1 + ( N - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the N- +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* BETA - COMPLEX*16 +* On entry, BETA specifies the scalar beta. When BETA is +* supplied as zero then Y need not be set on input. +* Unchanged on exit. +* +* Y - COMPLEX*16 array, dimension at least +* ( 1 + ( N - 1 )*abs( INCY ) ). +* Before entry, the incremented array Y must contain the n +* element vector y. On exit, Y is overwritten by the updated +* vector y. +* +* INCY - INTEGER +* On entry, INCY specifies the increment for the elements of +* Y. INCY must not be zero. +* Unchanged on exit. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY + COMPLEX*16 TEMP1, TEMP2 +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = 1 + ELSE IF( N.LT.0 ) THEN + INFO = 2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = 5 + ELSE IF( INCX.EQ.0 ) THEN + INFO = 7 + ELSE IF( INCY.EQ.0 ) THEN + INFO = 10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ) .OR. ( ( ALPHA.EQ.ZERO ) .AND. ( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set up the start points in X and Y. +* + IF( INCX.GT.0 ) THEN + KX = 1 + ELSE + KX = 1 - ( N-1 )*INCX + END IF + IF( INCY.GT.0 ) THEN + KY = 1 + ELSE + KY = 1 - ( N-1 )*INCY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE ) THEN + IF( INCY.EQ.1 ) THEN + IF( BETA.EQ.ZERO ) THEN + DO 10 I = 1, N + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20 I = 1, N + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO ) THEN + DO 30 I = 1, N + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40 I = 1, N + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Form y when A is stored in upper triangle. +* + IF( ( INCX.EQ.1 ) .AND. ( INCY.EQ.1 ) ) THEN + DO 60 J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + DO 50 I = 1, J - 1 + Y( I ) = Y( I ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + A( I, J )*X( I ) + 50 CONTINUE + Y( J ) = Y( J ) + TEMP1*A( J, J ) + ALPHA*TEMP2 + 60 CONTINUE + ELSE + JX = KX + JY = KY + DO 80 J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + IX = KX + IY = KY + DO 70 I = 1, J - 1 + Y( IY ) = Y( IY ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + A( I, J )*X( IX ) + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + Y( JY ) = Y( JY ) + TEMP1*A( J, J ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + 80 CONTINUE + END IF + ELSE +* +* Form y when A is stored in lower triangle. +* + IF( ( INCX.EQ.1 ) .AND. ( INCY.EQ.1 ) ) THEN + DO 100 J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + Y( J ) = Y( J ) + TEMP1*A( J, J ) + DO 90 I = J + 1, N + Y( I ) = Y( I ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + A( I, J )*X( I ) + 90 CONTINUE + Y( J ) = Y( J ) + ALPHA*TEMP2 + 100 CONTINUE + ELSE + JX = KX + JY = KY + DO 120 J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + Y( JY ) = Y( JY ) + TEMP1*A( J, J ) + IX = JX + IY = JY + DO 110 I = J + 1, N + IX = IX + INCX + IY = IY + INCY + Y( IY ) = Y( IY ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + A( I, J )*X( IX ) + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZSYMV +* + END diff --git a/costa/native/external/lapack/zsyr.f b/costa/native/external/lapack/zsyr.f new file mode 100644 index 000000000..69f522642 --- /dev/null +++ b/costa/native/external/lapack/zsyr.f @@ -0,0 +1,199 @@ + SUBROUTINE ZSYR( UPLO, N, ALPHA, X, INCX, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* October 31, 1992 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INCX, LDA, N + COMPLEX*16 ALPHA +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), X( * ) +* .. +* +* Purpose +* ======= +* +* ZSYR performs the symmetric rank 1 operation +* +* A := alpha*x*( x' ) + A, +* +* where alpha is a complex scalar, x is an n element vector and A is an +* n by n symmetric matrix. +* +* Arguments +* ========== +* +* UPLO - CHARACTER*1 +* On entry, UPLO specifies whether the upper or lower +* triangular part of the array A is to be referenced as +* follows: +* +* UPLO = 'U' or 'u' Only the upper triangular part of A +* is to be referenced. +* +* UPLO = 'L' or 'l' Only the lower triangular part of A +* is to be referenced. +* +* Unchanged on exit. +* +* N - INTEGER +* On entry, N specifies the order of the matrix A. +* N must be at least zero. +* Unchanged on exit. +* +* ALPHA - COMPLEX*16 +* On entry, ALPHA specifies the scalar alpha. +* Unchanged on exit. +* +* X - COMPLEX*16 array, dimension at least +* ( 1 + ( N - 1 )*abs( INCX ) ). +* Before entry, the incremented array X must contain the N- +* element vector x. +* Unchanged on exit. +* +* INCX - INTEGER +* On entry, INCX specifies the increment for the elements of +* X. INCX must not be zero. +* Unchanged on exit. +* +* A - COMPLEX*16 array, dimension ( LDA, N ) +* Before entry, with UPLO = 'U' or 'u', the leading n by n +* upper triangular part of the array A must contain the upper +* triangular part of the symmetric matrix and the strictly +* lower triangular part of A is not referenced. On exit, the +* upper triangular part of the array A is overwritten by the +* upper triangular part of the updated matrix. +* Before entry, with UPLO = 'L' or 'l', the leading n by n +* lower triangular part of the array A must contain the lower +* triangular part of the symmetric matrix and the strictly +* upper triangular part of A is not referenced. On exit, the +* lower triangular part of the array A is overwritten by the +* lower triangular part of the updated matrix. +* +* LDA - INTEGER +* On entry, LDA specifies the first dimension of A as declared +* in the calling (sub) program. LDA must be at least +* max( 1, N ). +* Unchanged on exit. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, IX, J, JX, KX + COMPLEX*16 TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = 1 + ELSE IF( N.LT.0 ) THEN + INFO = 2 + ELSE IF( INCX.EQ.0 ) THEN + INFO = 5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = 7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYR ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ) .OR. ( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Set the start point in X if the increment is not unity. +* + IF( INCX.LE.0 ) THEN + KX = 1 - ( N-1 )*INCX + ELSE IF( INCX.NE.1 ) THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Form A when A is stored in upper triangle. +* + IF( INCX.EQ.1 ) THEN + DO 20 J = 1, N + IF( X( J ).NE.ZERO ) THEN + TEMP = ALPHA*X( J ) + DO 10 I = 1, J + A( I, J ) = A( I, J ) + X( I )*TEMP + 10 CONTINUE + END IF + 20 CONTINUE + ELSE + JX = KX + DO 40 J = 1, N + IF( X( JX ).NE.ZERO ) THEN + TEMP = ALPHA*X( JX ) + IX = KX + DO 30 I = 1, J + A( I, J ) = A( I, J ) + X( IX )*TEMP + IX = IX + INCX + 30 CONTINUE + END IF + JX = JX + INCX + 40 CONTINUE + END IF + ELSE +* +* Form A when A is stored in lower triangle. +* + IF( INCX.EQ.1 ) THEN + DO 60 J = 1, N + IF( X( J ).NE.ZERO ) THEN + TEMP = ALPHA*X( J ) + DO 50 I = J, N + A( I, J ) = A( I, J ) + X( I )*TEMP + 50 CONTINUE + END IF + 60 CONTINUE + ELSE + JX = KX + DO 80 J = 1, N + IF( X( JX ).NE.ZERO ) THEN + TEMP = ALPHA*X( JX ) + IX = JX + DO 70 I = J, N + A( I, J ) = A( I, J ) + X( IX )*TEMP + IX = IX + INCX + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZSYR +* + END diff --git a/costa/native/external/lapack/zsyrfs.f b/costa/native/external/lapack/zsyrfs.f new file mode 100644 index 000000000..62fc4ea76 --- /dev/null +++ b/costa/native/external/lapack/zsyrfs.f @@ -0,0 +1,339 @@ + SUBROUTINE ZSYRFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, + $ X, LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) + COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* ZSYRFS improves the computed solution to a system of linear +* equations when the coefficient matrix is symmetric indefinite, and +* provides error bounds and backward error estimates for the solution. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input) COMPLEX*16 array, dimension (LDA,N) +* The symmetric matrix A. If UPLO = 'U', the leading N-by-N +* upper triangular part of A contains the upper triangular part +* of the matrix A, and the strictly lower triangular part of A +* is not referenced. If UPLO = 'L', the leading N-by-N lower +* triangular part of A contains the lower triangular part of +* the matrix A, and the strictly upper triangular part of A is +* not referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* AF (input) COMPLEX*16 array, dimension (LDAF,N) +* The factored form of the matrix A. AF contains the block +* diagonal matrix D and the multipliers used to obtain the +* factor U or L from the factorization A = U*D*U**T or +* A = L*D*L**T as computed by ZSYTRF. +* +* LDAF (input) INTEGER +* The leading dimension of the array AF. LDAF >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by ZSYTRF. +* +* B (input) COMPLEX*16 array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input/output) COMPLEX*16 array, dimension (LDX,NRHS) +* On entry, the solution matrix X, as computed by ZSYTRS. +* On exit, the improved solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Internal Parameters +* =================== +* +* ITMAX is the maximum number of steps of iterative refinement. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) + DOUBLE PRECISION THREE + PARAMETER ( THREE = 3.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, J, K, KASE, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX*16 ZDUM +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZCOPY, ZLACON, ZSYMV, ZSYTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL ZCOPY( N, B( 1, J ), 1, WORK, 1 ) + CALL ZSYMV( UPLO, N, -ONE, A, LDA, X( 1, J ), 1, ONE, WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + DO 40 I = 1, K - 1 + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 40 CONTINUE + RWORK( K ) = RWORK( K ) + CABS1( A( K, K ) )*XK + S + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + RWORK( K ) = RWORK( K ) + CABS1( A( K, K ) )*XK + DO 60 I = K + 1, N + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 60 CONTINUE + RWORK( K ) = RWORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL ZSYTRS( UPLO, N, 1, AF, LDAF, IPIV, WORK, N, INFO ) + CALL ZAXPY( N, ONE, WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use ZLACON to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL ZLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A'). +* + CALL ZSYTRS( UPLO, N, 1, AF, LDAF, IPIV, WORK, N, INFO ) + DO 110 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 120 CONTINUE + CALL ZSYTRS( UPLO, N, 1, AF, LDAF, IPIV, WORK, N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of ZSYRFS +* + END diff --git a/costa/native/external/lapack/zsysv.f b/costa/native/external/lapack/zsysv.f new file mode 100644 index 000000000..38a59a8a1 --- /dev/null +++ b/costa/native/external/lapack/zsysv.f @@ -0,0 +1,171 @@ + SUBROUTINE ZSYSV( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZSYSV computes the solution to a complex system of linear equations +* A * X = B, +* where A is an N-by-N symmetric matrix and X and B are N-by-NRHS +* matrices. +* +* The diagonal pivoting method is used to factor A as +* A = U * D * U**T, if UPLO = 'U', or +* A = L * D * L**T, if UPLO = 'L', +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices, and D is symmetric and block diagonal with +* 1-by-1 and 2-by-2 diagonal blocks. The factored form of A is then +* used to solve the system of equations A * X = B. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the symmetric matrix A. If UPLO = 'U', the leading +* N-by-N upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, if INFO = 0, the block diagonal matrix D and the +* multipliers used to obtain the factor U or L from the +* factorization A = U*D*U**T or A = L*D*L**T as computed by +* ZSYTRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (output) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D, as +* determined by ZSYTRF. If IPIV(k) > 0, then rows and columns +* k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1 +* diagonal block. If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, +* then rows and columns k-1 and -IPIV(k) were interchanged and +* D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If UPLO = 'L' and +* IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and +* -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 +* diagonal block. +* +* B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) +* On entry, the N-by-NRHS right hand side matrix B. +* On exit, if INFO = 0, the N-by-NRHS solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of WORK. LWORK >= 1, and for best performance +* LWORK >= N*NB, where NB is the optimal blocksize for +* ZSYTRF. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, D(i,i) is exactly zero. The factorization +* has been completed, but the block diagonal matrix D is +* exactly singular, so the solution could not be computed. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LWKOPT, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZSYTRF, ZSYTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'ZSYTRF', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYSV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Compute the factorization A = U*D*U' or A = L*D*L'. +* + CALL ZSYTRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL ZSYTRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of ZSYSV +* + END diff --git a/costa/native/external/lapack/zsysvx.f b/costa/native/external/lapack/zsysvx.f new file mode 100644 index 000000000..a6a8ff948 --- /dev/null +++ b/costa/native/external/lapack/zsysvx.f @@ -0,0 +1,299 @@ + SUBROUTINE ZSYSVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, + $ LDB, X, LDX, RCOND, FERR, BERR, WORK, LWORK, + $ RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER FACT, UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, LWORK, N, NRHS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) + COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* ZSYSVX uses the diagonal pivoting factorization to compute the +* solution to a complex system of linear equations A * X = B, +* where A is an N-by-N symmetric matrix and X and B are N-by-NRHS +* matrices. +* +* Error bounds on the solution and a condition estimate are also +* provided. +* +* Description +* =========== +* +* The following steps are performed: +* +* 1. If FACT = 'N', the diagonal pivoting method is used to factor A. +* The form of the factorization is +* A = U * D * U**T, if UPLO = 'U', or +* A = L * D * L**T, if UPLO = 'L', +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices, and D is symmetric and block diagonal with +* 1-by-1 and 2-by-2 diagonal blocks. +* +* 2. If some D(i,i)=0, so that D is exactly singular, then the routine +* returns with INFO = i. Otherwise, the factored form of A is used +* to estimate the condition number of the matrix A. If the +* reciprocal of the condition number is less than machine precision, +* INFO = N+1 is returned as a warning, but the routine still goes on +* to solve for X and compute error bounds as described below. +* +* 3. The system of equations is solved for X using the factored form +* of A. +* +* 4. Iterative refinement is applied to improve the computed solution +* matrix and calculate error bounds and backward error estimates +* for it. +* +* Arguments +* ========= +* +* FACT (input) CHARACTER*1 +* Specifies whether or not the factored form of A has been +* supplied on entry. +* = 'F': On entry, AF and IPIV contain the factored form +* of A. A, AF and IPIV will not be modified. +* = 'N': The matrix A will be copied to AF and factored. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The number of linear equations, i.e., the order of the +* matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input) COMPLEX*16 array, dimension (LDA,N) +* The symmetric matrix A. If UPLO = 'U', the leading N-by-N +* upper triangular part of A contains the upper triangular part +* of the matrix A, and the strictly lower triangular part of A +* is not referenced. If UPLO = 'L', the leading N-by-N lower +* triangular part of A contains the lower triangular part of +* the matrix A, and the strictly upper triangular part of A is +* not referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* AF (input or output) COMPLEX*16 array, dimension (LDAF,N) +* If FACT = 'F', then AF is an input argument and on entry +* contains the block diagonal matrix D and the multipliers used +* to obtain the factor U or L from the factorization +* A = U*D*U**T or A = L*D*L**T as computed by ZSYTRF. +* +* If FACT = 'N', then AF is an output argument and on exit +* returns the block diagonal matrix D and the multipliers used +* to obtain the factor U or L from the factorization +* A = U*D*U**T or A = L*D*L**T. +* +* LDAF (input) INTEGER +* The leading dimension of the array AF. LDAF >= max(1,N). +* +* IPIV (input or output) INTEGER array, dimension (N) +* If FACT = 'F', then IPIV is an input argument and on entry +* contains details of the interchanges and the block structure +* of D, as determined by ZSYTRF. +* If IPIV(k) > 0, then rows and columns k and IPIV(k) were +* interchanged and D(k,k) is a 1-by-1 diagonal block. +* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +* +* If FACT = 'N', then IPIV is an output argument and on exit +* contains details of the interchanges and the block structure +* of D, as determined by ZSYTRF. +* +* B (input) COMPLEX*16 array, dimension (LDB,NRHS) +* The N-by-NRHS right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (output) COMPLEX*16 array, dimension (LDX,NRHS) +* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* RCOND (output) DOUBLE PRECISION +* The estimate of the reciprocal condition number of the matrix +* A. If RCOND is less than the machine precision (in +* particular, if RCOND = 0), the matrix is singular to working +* precision. This condition is indicated by a return code of +* INFO > 0. +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of WORK. LWORK >= 2*N, and for best performance +* LWORK >= N*NB, where NB is the optimal blocksize for +* ZSYTRF. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, and i is +* <= N: D(i,i) is exactly zero. The factorization +* has been completed but the factor D is exactly +* singular, so the solution and error bounds could +* not be computed. RCOND = 0 is returned. +* = N+1: D is nonsingular, but RCOND is less than machine +* precision, meaning that the matrix is singular +* to working precision. Nevertheless, the +* solution and error bounds are computed because +* there are a number of situations where the +* computed solution can be more accurate than the +* value of RCOND would suggest. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, NOFACT + INTEGER LWKOPT, NB + DOUBLE PRECISION ANORM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANSY + EXTERNAL LSAME, ILAENV, DLAMCH, ZLANSY +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLACPY, ZSYCON, ZSYRFS, ZSYTRF, ZSYTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -11 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LWORK.LT.MAX( 1, 2*N ) .AND. .NOT.LQUERY ) THEN + INFO = -18 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'ZSYTRF', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYSVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* + IF( NOFACT ) THEN +* +* Compute the factorization A = U*D*U' or A = L*D*L'. +* + CALL ZLACPY( UPLO, N, N, A, LDA, AF, LDAF ) + CALL ZSYTRF( UPLO, N, AF, LDAF, IPIV, WORK, LWORK, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN + IF( INFO.GT.0 ) + $ RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = ZLANSY( 'I', UPLO, N, A, LDA, RWORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL ZSYCON( UPLO, N, AF, LDAF, IPIV, ANORM, RCOND, WORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.DLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* +* Compute the solution vectors X. +* + CALL ZLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL ZSYTRS( UPLO, N, NRHS, AF, LDAF, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solutions and +* compute error bounds and backward error estimates for them. +* + CALL ZSYRFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, X, + $ LDX, FERR, BERR, WORK, RWORK, INFO ) +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of ZSYSVX +* + END diff --git a/costa/native/external/lapack/zsytf2.f b/costa/native/external/lapack/zsytf2.f new file mode 100644 index 000000000..0ef31eae9 --- /dev/null +++ b/costa/native/external/lapack/zsytf2.f @@ -0,0 +1,515 @@ + SUBROUTINE ZSYTF2( UPLO, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* ZSYTF2 computes the factorization of a complex symmetric matrix A +* using the Bunch-Kaufman diagonal pivoting method: +* +* A = U*D*U' or A = L*D*L' +* +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices, U' is the transpose of U, and D is symmetric and +* block diagonal with 1-by-1 and 2-by-2 diagonal blocks. +* +* This is the unblocked version of the algorithm, calling Level 2 BLAS. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the upper or lower triangular part of the +* symmetric matrix A is stored: +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the symmetric matrix A. If UPLO = 'U', the leading +* n-by-n upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n-by-n lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, the block diagonal matrix D and the multipliers used +* to obtain the factor U or L (see below for further details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (output) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D. +* If IPIV(k) > 0, then rows and columns k and IPIV(k) were +* interchanged and D(k,k) is a 1-by-1 diagonal block. +* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* > 0: if INFO = k, D(k,k) is exactly zero. The factorization +* has been completed, but the block diagonal matrix D is +* exactly singular, and division by zero will occur if it +* is used to solve a system of equations. +* +* Further Details +* =============== +* +* 1-96 - Based on modifications by J. Lewis, Boeing Computer Services +* Company +* +* If UPLO = 'U', then A = U*D*U', where +* U = P(n)*U(n)* ... *P(k)U(k)* ..., +* i.e., U is a product of terms P(k)*U(k), where k decreases from n to +* 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +* defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +* that if the diagonal block D(k) is of order s (s = 1 or 2), then +* +* ( I v 0 ) k-s +* U(k) = ( 0 I 0 ) s +* ( 0 0 I ) n-k +* k-s s n-k +* +* If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +* If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +* and A(k,k), and v overwrites A(1:k-2,k-1:k). +* +* If UPLO = 'L', then A = L*D*L', where +* L = P(1)*L(1)* ... *P(k)*L(k)* ..., +* i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +* n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +* defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +* that if the diagonal block D(k) is of order s (s = 1 or 2), then +* +* ( I 0 0 ) k-1 +* L(k) = ( 0 I 0 ) s +* ( 0 v I ) n-k-s+1 +* k-1 s n-k-s+1 +* +* If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +* If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +* and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0D+0, SEVTEN = 17.0D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IMAX, J, JMAX, K, KK, KP, KSTEP + DOUBLE PRECISION ABSAKK, ALPHA, COLMAX, ROWMAX + COMPLEX*16 D11, D12, D21, D22, R1, T, WK, WKM1, WKP1, Z +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX + EXTERNAL LSAME, IZAMAX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZSCAL, ZSWAP, ZSYR +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX, SQRT +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( Z ) = ABS( DBLE( Z ) ) + ABS( DIMAG( Z ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYTF2', -INFO ) + RETURN + END IF +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U' using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2 +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 70 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = CABS1( A( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.GT.1 ) THEN + IMAX = IZAMAX( K-1, A( 1, K ), 1 ) + COLMAX = CABS1( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = IMAX + IZAMAX( K-IMAX, A( IMAX, IMAX+1 ), LDA ) + ROWMAX = CABS1( A( IMAX, JMAX ) ) + IF( IMAX.GT.1 ) THEN + JMAX = IZAMAX( IMAX-1, A( 1, IMAX ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( A( JMAX, IMAX ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( CABS1( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K-1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K - KSTEP + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the leading +* submatrix A(1:k,1:k) +* + CALL ZSWAP( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 ) + CALL ZSWAP( KK-KP-1, A( KP+1, KK ), 1, A( KP, KP+1 ), + $ LDA ) + T = A( KK, KK ) + A( KK, KK ) = A( KP, KP ) + A( KP, KP ) = T + IF( KSTEP.EQ.2 ) THEN + T = A( K-1, K ) + A( K-1, K ) = A( KP, K ) + A( KP, K ) = T + END IF + END IF +* +* Update the leading submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* +* Perform a rank-1 update of A(1:k-1,1:k-1) as +* +* A := A - U(k)*D(k)*U(k)' = A - W(k)*1/D(k)*W(k)' +* + R1 = CONE / A( K, K ) + CALL ZSYR( UPLO, K-1, -R1, A( 1, K ), 1, A, LDA ) +* +* Store U(k) in column k +* + CALL ZSCAL( K-1, R1, A( 1, K ), 1 ) + ELSE +* +* 2-by-2 pivot block D(k): columns k and k-1 now hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* Perform a rank-2 update of A(1:k-2,1:k-2) as +* +* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )' +* = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )' +* + IF( K.GT.2 ) THEN +* + D12 = A( K-1, K ) + D22 = A( K-1, K-1 ) / D12 + D11 = A( K, K ) / D12 + T = CONE / ( D11*D22-CONE ) + D12 = T / D12 +* + DO 30 J = K - 2, 1, -1 + WKM1 = D12*( D11*A( J, K-1 )-A( J, K ) ) + WK = D12*( D22*A( J, K )-A( J, K-1 ) ) + DO 20 I = J, 1, -1 + A( I, J ) = A( I, J ) - A( I, K )*WK - + $ A( I, K-1 )*WKM1 + 20 CONTINUE + A( J, K ) = WK + A( J, K-1 ) = WKM1 + 30 CONTINUE +* + END IF +* + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L' using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2 +* + K = 1 + 40 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 70 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = CABS1( A( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.LT.N ) THEN + IMAX = K + IZAMAX( N-K, A( K+1, K ), 1 ) + COLMAX = CABS1( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = K - 1 + IZAMAX( IMAX-K, A( IMAX, K ), LDA ) + ROWMAX = CABS1( A( IMAX, JMAX ) ) + IF( IMAX.LT.N ) THEN + JMAX = IMAX + IZAMAX( N-IMAX, A( IMAX+1, IMAX ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( A( JMAX, IMAX ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( CABS1( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K+1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K + KSTEP - 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the trailing +* submatrix A(k:n,k:n) +* + IF( KP.LT.N ) + $ CALL ZSWAP( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 ) + CALL ZSWAP( KP-KK-1, A( KK+1, KK ), 1, A( KP, KK+1 ), + $ LDA ) + T = A( KK, KK ) + A( KK, KK ) = A( KP, KP ) + A( KP, KP ) = T + IF( KSTEP.EQ.2 ) THEN + T = A( K+1, K ) + A( K+1, K ) = A( KP, K ) + A( KP, K ) = T + END IF + END IF +* +* Update the trailing submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* + IF( K.LT.N ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* +* A := A - L(k)*D(k)*L(k)' = A - W(k)*(1/D(k))*W(k)' +* + R1 = CONE / A( K, K ) + CALL ZSYR( UPLO, N-K, -R1, A( K+1, K ), 1, + $ A( K+1, K+1 ), LDA ) +* +* Store L(k) in column K +* + CALL ZSCAL( N-K, R1, A( K+1, K ), 1 ) + END IF + ELSE +* +* 2-by-2 pivot block D(k) +* + IF( K.LT.N-1 ) THEN +* +* Perform a rank-2 update of A(k+2:n,k+2:n) as +* +* A := A - ( L(k) L(k+1) )*D(k)*( L(k) L(k+1) )' +* = A - ( W(k) W(k+1) )*inv(D(k))*( W(k) W(k+1) )' +* +* where L(k) and L(k+1) are the k-th and (k+1)-th +* columns of L +* + D21 = A( K+1, K ) + D11 = A( K+1, K+1 ) / D21 + D22 = A( K, K ) / D21 + T = CONE / ( D11*D22-CONE ) + D21 = T / D21 +* + DO 60 J = K + 2, N + WK = D21*( D11*A( J, K )-A( J, K+1 ) ) + WKP1 = D21*( D22*A( J, K+1 )-A( J, K ) ) + DO 50 I = J, N + A( I, J ) = A( I, J ) - A( I, K )*WK - + $ A( I, K+1 )*WKP1 + 50 CONTINUE + A( J, K ) = WK + A( J, K+1 ) = WKP1 + 60 CONTINUE + END IF + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 40 +* + END IF +* + 70 CONTINUE + RETURN +* +* End of ZSYTF2 +* + END diff --git a/costa/native/external/lapack/zsytrf.f b/costa/native/external/lapack/zsytrf.f new file mode 100644 index 000000000..bb2fc730c --- /dev/null +++ b/costa/native/external/lapack/zsytrf.f @@ -0,0 +1,287 @@ + SUBROUTINE ZSYTRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZSYTRF computes the factorization of a complex symmetric matrix A +* using the Bunch-Kaufman diagonal pivoting method. The form of the +* factorization is +* +* A = U*D*U**T or A = L*D*L**T +* +* where U (or L) is a product of permutation and unit upper (lower) +* triangular matrices, and D is symmetric and block diagonal with +* with 1-by-1 and 2-by-2 diagonal blocks. +* +* This is the blocked version of the algorithm, calling Level 3 BLAS. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A is stored; +* = 'L': Lower triangle of A is stored. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the symmetric matrix A. If UPLO = 'U', the leading +* N-by-N upper triangular part of A contains the upper +* triangular part of the matrix A, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of A contains the lower +* triangular part of the matrix A, and the strictly upper +* triangular part of A is not referenced. +* +* On exit, the block diagonal matrix D and the multipliers used +* to obtain the factor U or L (see below for further details). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (output) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D. +* If IPIV(k) > 0, then rows and columns k and IPIV(k) were +* interchanged and D(k,k) is a 1-by-1 diagonal block. +* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The length of WORK. LWORK >=1. For best performance +* LWORK >= N*NB, where NB is the block size returned by ILAENV. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, D(i,i) is exactly zero. The factorization +* has been completed, but the block diagonal matrix D is +* exactly singular, and division by zero will occur if it +* is used to solve a system of equations. +* +* Further Details +* =============== +* +* If UPLO = 'U', then A = U*D*U', where +* U = P(n)*U(n)* ... *P(k)U(k)* ..., +* i.e., U is a product of terms P(k)*U(k), where k decreases from n to +* 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +* defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +* that if the diagonal block D(k) is of order s (s = 1 or 2), then +* +* ( I v 0 ) k-s +* U(k) = ( 0 I 0 ) s +* ( 0 0 I ) n-k +* k-s s n-k +* +* If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +* If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +* and A(k,k), and v overwrites A(1:k-2,k-1:k). +* +* If UPLO = 'L', then A = L*D*L', where +* L = P(1)*L(1)* ... *P(k)*L(k)* ..., +* i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +* n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +* defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +* that if the diagonal block D(k) is of order s (s = 1 or 2), then +* +* ( I 0 0 ) k-1 +* L(k) = ( 0 I 0 ) s +* ( 0 v I ) n-k-s+1 +* k-1 s n-k-s+1 +* +* If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +* If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +* and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER IINFO, IWS, J, K, KB, LDWORK, LWKOPT, NB, NBMIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLASYF, ZSYTF2 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size +* + NB = ILAENV( 1, 'ZSYTRF', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYTRF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* + NBMIN = 2 + LDWORK = N + IF( NB.GT.1 .AND. NB.LT.N ) THEN + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN + NB = MAX( LWORK / LDWORK, 1 ) + NBMIN = MAX( 2, ILAENV( 2, 'ZSYTRF', UPLO, N, -1, -1, -1 ) ) + END IF + ELSE + IWS = 1 + END IF + IF( NB.LT.NBMIN ) + $ NB = N +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U' using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* KB, where KB is the number of columns factorized by ZLASYF; +* KB is either NB or NB-1, or K for the last block +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 40 +* + IF( K.GT.NB ) THEN +* +* Factorize columns k-kb+1:k of A and use blocked code to +* update columns 1:k-kb +* + CALL ZLASYF( UPLO, K, NB, KB, A, LDA, IPIV, WORK, N, IINFO ) + ELSE +* +* Use unblocked code to factorize columns 1:k of A +* + CALL ZSYTF2( UPLO, K, A, LDA, IPIV, IINFO ) + KB = K + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO +* +* Decrease K and return to the start of the main loop +* + K = K - KB + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L' using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* KB, where KB is the number of columns factorized by ZLASYF; +* KB is either NB or NB-1, or N-K+1 for the last block +* + K = 1 + 20 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 40 +* + IF( K.LE.N-NB ) THEN +* +* Factorize columns k:k+kb-1 of A and use blocked code to +* update columns k+kb:n +* + CALL ZLASYF( UPLO, N-K+1, NB, KB, A( K, K ), LDA, IPIV( K ), + $ WORK, N, IINFO ) + ELSE +* +* Use unblocked code to factorize columns k:n of A +* + CALL ZSYTF2( UPLO, N-K+1, A( K, K ), LDA, IPIV( K ), IINFO ) + KB = N - K + 1 + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + K - 1 +* +* Adjust IPIV +* + DO 30 J = K, K + KB - 1 + IF( IPIV( J ).GT.0 ) THEN + IPIV( J ) = IPIV( J ) + K - 1 + ELSE + IPIV( J ) = IPIV( J ) - K + 1 + END IF + 30 CONTINUE +* +* Increase K and return to the start of the main loop +* + K = K + KB + GO TO 20 +* + END IF +* + 40 CONTINUE + WORK( 1 ) = LWKOPT + RETURN +* +* End of ZSYTRF +* + END diff --git a/costa/native/external/lapack/zsytri.f b/costa/native/external/lapack/zsytri.f new file mode 100644 index 000000000..4fca2a71f --- /dev/null +++ b/costa/native/external/lapack/zsytri.f @@ -0,0 +1,314 @@ + SUBROUTINE ZSYTRI( UPLO, N, A, LDA, IPIV, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZSYTRI computes the inverse of a complex symmetric indefinite matrix +* A using the factorization A = U*D*U**T or A = L*D*L**T computed by +* ZSYTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the details of the factorization are stored +* as an upper or lower triangular matrix. +* = 'U': Upper triangular, form is A = U*D*U**T; +* = 'L': Lower triangular, form is A = L*D*L**T. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the block diagonal matrix D and the multipliers +* used to obtain the factor U or L as computed by ZSYTRF. +* +* On exit, if INFO = 0, the (symmetric) inverse of the original +* matrix. If UPLO = 'U', the upper triangular part of the +* inverse is formed and the part of A below the diagonal is not +* referenced; if UPLO = 'L' the lower triangular part of the +* inverse is formed and the part of A above the diagonal is +* not referenced. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by ZSYTRF. +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +* inverse could not be computed. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER K, KP, KSTEP + COMPLEX*16 AK, AKKP1, AKP1, D, T, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX*16 ZDOTU + EXTERNAL LSAME, ZDOTU +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZCOPY, ZSWAP, ZSYMV +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO 10 INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO 20 INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 20 CONTINUE + END IF + INFO = 0 +* + IF( UPPER ) THEN +* +* Compute inv(A) from the factorization A = U*D*U'. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 30 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 40 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + A( K, K ) = ONE / A( K, K ) +* +* Compute column K of the inverse. +* + IF( K.GT.1 ) THEN + CALL ZCOPY( K-1, A( 1, K ), 1, WORK, 1 ) + CALL ZSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K ), 1 ) + A( K, K ) = A( K, K ) - ZDOTU( K-1, WORK, 1, A( 1, K ), + $ 1 ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = A( K, K+1 ) + AK = A( K, K ) / T + AKP1 = A( K+1, K+1 ) / T + AKKP1 = A( K, K+1 ) / T + D = T*( AK*AKP1-ONE ) + A( K, K ) = AKP1 / D + A( K+1, K+1 ) = AK / D + A( K, K+1 ) = -AKKP1 / D +* +* Compute columns K and K+1 of the inverse. +* + IF( K.GT.1 ) THEN + CALL ZCOPY( K-1, A( 1, K ), 1, WORK, 1 ) + CALL ZSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K ), 1 ) + A( K, K ) = A( K, K ) - ZDOTU( K-1, WORK, 1, A( 1, K ), + $ 1 ) + A( K, K+1 ) = A( K, K+1 ) - + $ ZDOTU( K-1, A( 1, K ), 1, A( 1, K+1 ), 1 ) + CALL ZCOPY( K-1, A( 1, K+1 ), 1, WORK, 1 ) + CALL ZSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K+1 ), 1 ) + A( K+1, K+1 ) = A( K+1, K+1 ) - + $ ZDOTU( K-1, WORK, 1, A( 1, K+1 ), 1 ) + END IF + KSTEP = 2 + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the leading +* submatrix A(1:k+1,1:k+1) +* + CALL ZSWAP( KP-1, A( 1, K ), 1, A( 1, KP ), 1 ) + CALL ZSWAP( K-KP-1, A( KP+1, K ), 1, A( KP, KP+1 ), LDA ) + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = A( K, K+1 ) + A( K, K+1 ) = A( KP, K+1 ) + A( KP, K+1 ) = TEMP + END IF + END IF +* + K = K + KSTEP + GO TO 30 + 40 CONTINUE +* + ELSE +* +* Compute inv(A) from the factorization A = L*D*L'. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 50 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 60 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + A( K, K ) = ONE / A( K, K ) +* +* Compute column K of the inverse. +* + IF( K.LT.N ) THEN + CALL ZCOPY( N-K, A( K+1, K ), 1, WORK, 1 ) + CALL ZSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1, + $ ZERO, A( K+1, K ), 1 ) + A( K, K ) = A( K, K ) - ZDOTU( N-K, WORK, 1, A( K+1, K ), + $ 1 ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = A( K, K-1 ) + AK = A( K-1, K-1 ) / T + AKP1 = A( K, K ) / T + AKKP1 = A( K, K-1 ) / T + D = T*( AK*AKP1-ONE ) + A( K-1, K-1 ) = AKP1 / D + A( K, K ) = AK / D + A( K, K-1 ) = -AKKP1 / D +* +* Compute columns K-1 and K of the inverse. +* + IF( K.LT.N ) THEN + CALL ZCOPY( N-K, A( K+1, K ), 1, WORK, 1 ) + CALL ZSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1, + $ ZERO, A( K+1, K ), 1 ) + A( K, K ) = A( K, K ) - ZDOTU( N-K, WORK, 1, A( K+1, K ), + $ 1 ) + A( K, K-1 ) = A( K, K-1 ) - + $ ZDOTU( N-K, A( K+1, K ), 1, A( K+1, K-1 ), + $ 1 ) + CALL ZCOPY( N-K, A( K+1, K-1 ), 1, WORK, 1 ) + CALL ZSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1, + $ ZERO, A( K+1, K-1 ), 1 ) + A( K-1, K-1 ) = A( K-1, K-1 ) - + $ ZDOTU( N-K, WORK, 1, A( K+1, K-1 ), 1 ) + END IF + KSTEP = 2 + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the trailing +* submatrix A(k-1:n,k-1:n) +* + IF( KP.LT.N ) + $ CALL ZSWAP( N-KP, A( KP+1, K ), 1, A( KP+1, KP ), 1 ) + CALL ZSWAP( KP-K-1, A( K+1, K ), 1, A( KP, K+1 ), LDA ) + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = A( K, K-1 ) + A( K, K-1 ) = A( KP, K-1 ) + A( KP, K-1 ) = TEMP + END IF + END IF +* + K = K - KSTEP + GO TO 50 + 60 CONTINUE + END IF +* + RETURN +* +* End of ZSYTRI +* + END diff --git a/costa/native/external/lapack/zsytrs.f b/costa/native/external/lapack/zsytrs.f new file mode 100644 index 000000000..b0cc85938 --- /dev/null +++ b/costa/native/external/lapack/zsytrs.f @@ -0,0 +1,370 @@ + SUBROUTINE ZSYTRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* ZSYTRS solves a system of linear equations A*X = B with a complex +* symmetric matrix A using the factorization A = U*D*U**T or +* A = L*D*L**T computed by ZSYTRF. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the details of the factorization are stored +* as an upper or lower triangular matrix. +* = 'U': Upper triangular, form is A = U*D*U**T; +* = 'L': Lower triangular, form is A = L*D*L**T. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* A (input) COMPLEX*16 array, dimension (LDA,N) +* The block diagonal matrix D and the multipliers used to +* obtain the factor U or L as computed by ZSYTRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* Details of the interchanges and the block structure of D +* as determined by ZSYTRF. +* +* B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KP + COMPLEX*16 AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEMV, ZGERU, ZSCAL, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*D*U'. +* +* First solve U*D*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 30 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL ZGERU( K-1, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + CALL ZSCAL( NRHS, ONE / A( K, K ), B( K, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K-1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K-1 ) + $ CALL ZSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in columns K-1 and K of A. +* + CALL ZGERU( K-2, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) + CALL ZGERU( K-2, NRHS, -ONE, A( 1, K-1 ), 1, B( K-1, 1 ), + $ LDB, B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = A( K-1, K ) + AKM1 = A( K-1, K-1 ) / AKM1K + AK = A( K, K ) / AKM1K + DENOM = AKM1*AK - ONE + DO 20 J = 1, NRHS + BKM1 = B( K-1, J ) / AKM1K + BK = B( K, J ) / AKM1K + B( K-1, J ) = ( AK*BKM1-BK ) / DENOM + B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM + 20 CONTINUE + K = K - 2 + END IF +* + GO TO 10 + 30 CONTINUE +* +* Next solve U'*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 40 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(U'(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL ZGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, A( 1, K ), + $ 1, ONE, B( K, 1 ), LDB ) +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(U'(K+1)), where U(K+1) is the transformation +* stored in columns K and K+1 of A. +* + CALL ZGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, A( 1, K ), + $ 1, ONE, B( K, 1 ), LDB ) + CALL ZGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, + $ A( 1, K+1 ), 1, ONE, B( K+1, 1 ), LDB ) +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K + 2 + END IF +* + GO TO 40 + 50 CONTINUE +* + ELSE +* +* Solve A*X = B, where A = L*D*L'. +* +* First solve L*D*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 60 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 80 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL ZGERU( N-K, NRHS, -ONE, A( K+1, K ), 1, B( K, 1 ), + $ LDB, B( K+1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + CALL ZSCAL( NRHS, ONE / A( K, K ), B( K, 1 ), LDB ) + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K+1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K+1 ) + $ CALL ZSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.LT.N-1 ) THEN + CALL ZGERU( N-K-1, NRHS, -ONE, A( K+2, K ), 1, B( K, 1 ), + $ LDB, B( K+2, 1 ), LDB ) + CALL ZGERU( N-K-1, NRHS, -ONE, A( K+2, K+1 ), 1, + $ B( K+1, 1 ), LDB, B( K+2, 1 ), LDB ) + END IF +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = A( K+1, K ) + AKM1 = A( K, K ) / AKM1K + AK = A( K+1, K+1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 70 J = 1, NRHS + BKM1 = B( K, J ) / AKM1K + BK = B( K+1, J ) / AKM1K + B( K, J ) = ( AK*BKM1-BK ) / DENOM + B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + 70 CONTINUE + K = K + 2 + END IF +* + GO TO 60 + 80 CONTINUE +* +* Next solve L'*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 90 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 100 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(L'(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL ZGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, A( K+1, K ), 1, ONE, B( K, 1 ), LDB ) +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(L'(K-1)), where L(K-1) is the transformation +* stored in columns K-1 and K of A. +* + IF( K.LT.N ) THEN + CALL ZGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, A( K+1, K ), 1, ONE, B( K, 1 ), LDB ) + CALL ZGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, A( K+1, K-1 ), 1, ONE, B( K-1, 1 ), + $ LDB ) + END IF +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 2 + END IF +* + GO TO 90 + 100 CONTINUE + END IF +* + RETURN +* +* End of ZSYTRS +* + END diff --git a/costa/native/external/lapack/ztbcon.f b/costa/native/external/lapack/ztbcon.f new file mode 100644 index 000000000..38dcc896f --- /dev/null +++ b/costa/native/external/lapack/ztbcon.f @@ -0,0 +1,205 @@ + SUBROUTINE ZTBCON( NORM, UPLO, DIAG, N, KD, AB, LDAB, RCOND, WORK, + $ RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER INFO, KD, LDAB, N + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 AB( LDAB, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZTBCON estimates the reciprocal of the condition number of a +* triangular band matrix A, in either the 1-norm or the infinity-norm. +* +* The norm of A is computed and an estimate is obtained for +* norm(inv(A)), then the reciprocal of the condition number is +* computed as +* RCOND = 1 / ( norm(A) * norm(inv(A)) ). +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies whether the 1-norm condition number or the +* infinity-norm condition number is required: +* = '1' or 'O': 1-norm; +* = 'I': Infinity-norm. +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals or subdiagonals of the +* triangular band matrix A. KD >= 0. +* +* AB (input) COMPLEX*16 array, dimension (LDAB,N) +* The upper or lower triangular band matrix A, stored in the +* first kd+1 rows of the array. The j-th column of A is stored +* in the j-th column of the array AB as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* If DIAG = 'U', the diagonal elements of A are not referenced +* and are assumed to be 1. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* RCOND (output) DOUBLE PRECISION +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(norm(A) * norm(inv(A))). +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, ONENRM, UPPER + CHARACTER NORMIN + INTEGER IX, KASE, KASE1 + DOUBLE PRECISION AINVNM, ANORM, SCALE, SMLNUM, XNORM + COMPLEX*16 ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX + DOUBLE PRECISION DLAMCH, ZLANTB + EXTERNAL LSAME, IZAMAX, DLAMCH, ZLANTB +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDRSCL, ZLACON, ZLATBS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTBCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + END IF +* + RCOND = ZERO + SMLNUM = DLAMCH( 'Safe minimum' )*DBLE( MAX( N, 1 ) ) +* +* Compute the 1-norm of the triangular matrix A or A'. +* + ANORM = ZLANTB( NORM, UPLO, DIAG, N, KD, AB, LDAB, RWORK ) +* +* Continue only if ANORM > 0. +* + IF( ANORM.GT.ZERO ) THEN +* +* Estimate the 1-norm of the inverse of A. +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 10 CONTINUE + CALL ZLACON( N, WORK( N+1 ), WORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(A). +* + CALL ZLATBS( UPLO, 'No transpose', DIAG, NORMIN, N, KD, + $ AB, LDAB, WORK, SCALE, RWORK, INFO ) + ELSE +* +* Multiply by inv(A'). +* + CALL ZLATBS( UPLO, 'Conjugate transpose', DIAG, NORMIN, + $ N, KD, AB, LDAB, WORK, SCALE, RWORK, INFO ) + END IF + NORMIN = 'Y' +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + IF( SCALE.NE.ONE ) THEN + IX = IZAMAX( N, WORK, 1 ) + XNORM = CABS1( WORK( IX ) ) + IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL ZDRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / ANORM ) / AINVNM + END IF +* + 20 CONTINUE + RETURN +* +* End of ZTBCON +* + END diff --git a/costa/native/external/lapack/ztbrfs.f b/costa/native/external/lapack/ztbrfs.f new file mode 100644 index 000000000..c1197909b --- /dev/null +++ b/costa/native/external/lapack/ztbrfs.f @@ -0,0 +1,393 @@ + SUBROUTINE ZTBRFS( UPLO, TRANS, DIAG, N, KD, NRHS, AB, LDAB, B, + $ LDB, X, LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, KD, LDAB, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) + COMPLEX*16 AB( LDAB, * ), B( LDB, * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* ZTBRFS provides error bounds and backward error estimates for the +* solution to a system of linear equations with a triangular band +* coefficient matrix. +* +* The solution matrix X must be computed by ZTBTRS or some other +* means before entering this routine. ZTBRFS does not do iterative +* refinement because doing so cannot improve the backward error. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose) +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals or subdiagonals of the +* triangular band matrix A. KD >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AB (input) COMPLEX*16 array, dimension (LDAB,N) +* The upper or lower triangular band matrix A, stored in the +* first kd+1 rows of the array. The j-th column of A is stored +* in the j-th column of the array AB as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* If DIAG = 'U', the diagonal elements of A are not referenced +* and are assumed to be 1. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* B (input) COMPLEX*16 array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input) COMPLEX*16 array, dimension (LDX,NRHS) +* The solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + CHARACTER TRANSN, TRANST + INTEGER I, J, K, KASE, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX*16 ZDUM +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZCOPY, ZLACON, ZTBMV, ZTBSV +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTBRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANSN = 'N' + TRANST = 'C' + ELSE + TRANSN = 'C' + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = KD + 2 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 250 J = 1, NRHS +* +* Compute residual R = B - op(A) * X, +* where op(A) = A, A**T, or A**H, depending on TRANS. +* + CALL ZCOPY( N, X( 1, J ), 1, WORK, 1 ) + CALL ZTBMV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, WORK, 1 ) + CALL ZAXPY( N, -ONE, B( 1, J ), 1, WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 20 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 20 CONTINUE +* + IF( NOTRAN ) THEN +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + IF( NOUNIT ) THEN + DO 40 K = 1, N + XK = CABS1( X( K, J ) ) + DO 30 I = MAX( 1, K-KD ), K + RWORK( I ) = RWORK( I ) + + $ CABS1( AB( KD+1+I-K, K ) )*XK + 30 CONTINUE + 40 CONTINUE + ELSE + DO 60 K = 1, N + XK = CABS1( X( K, J ) ) + DO 50 I = MAX( 1, K-KD ), K - 1 + RWORK( I ) = RWORK( I ) + + $ CABS1( AB( KD+1+I-K, K ) )*XK + 50 CONTINUE + RWORK( K ) = RWORK( K ) + XK + 60 CONTINUE + END IF + ELSE + IF( NOUNIT ) THEN + DO 80 K = 1, N + XK = CABS1( X( K, J ) ) + DO 70 I = K, MIN( N, K+KD ) + RWORK( I ) = RWORK( I ) + + $ CABS1( AB( 1+I-K, K ) )*XK + 70 CONTINUE + 80 CONTINUE + ELSE + DO 100 K = 1, N + XK = CABS1( X( K, J ) ) + DO 90 I = K + 1, MIN( N, K+KD ) + RWORK( I ) = RWORK( I ) + + $ CABS1( AB( 1+I-K, K ) )*XK + 90 CONTINUE + RWORK( K ) = RWORK( K ) + XK + 100 CONTINUE + END IF + END IF + ELSE +* +* Compute abs(A**H)*abs(X) + abs(B). +* + IF( UPPER ) THEN + IF( NOUNIT ) THEN + DO 120 K = 1, N + S = ZERO + DO 110 I = MAX( 1, K-KD ), K + S = S + CABS1( AB( KD+1+I-K, K ) )* + $ CABS1( X( I, J ) ) + 110 CONTINUE + RWORK( K ) = RWORK( K ) + S + 120 CONTINUE + ELSE + DO 140 K = 1, N + S = CABS1( X( K, J ) ) + DO 130 I = MAX( 1, K-KD ), K - 1 + S = S + CABS1( AB( KD+1+I-K, K ) )* + $ CABS1( X( I, J ) ) + 130 CONTINUE + RWORK( K ) = RWORK( K ) + S + 140 CONTINUE + END IF + ELSE + IF( NOUNIT ) THEN + DO 160 K = 1, N + S = ZERO + DO 150 I = K, MIN( N, K+KD ) + S = S + CABS1( AB( 1+I-K, K ) )* + $ CABS1( X( I, J ) ) + 150 CONTINUE + RWORK( K ) = RWORK( K ) + S + 160 CONTINUE + ELSE + DO 180 K = 1, N + S = CABS1( X( K, J ) ) + DO 170 I = K + 1, MIN( N, K+KD ) + S = S + CABS1( AB( 1+I-K, K ) )* + $ CABS1( X( I, J ) ) + 170 CONTINUE + RWORK( K ) = RWORK( K ) + S + 180 CONTINUE + END IF + END IF + END IF + S = ZERO + DO 190 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 190 CONTINUE + BERR( J ) = S +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use ZLACON to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 200 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 200 CONTINUE +* + KASE = 0 + 210 CONTINUE + CALL ZLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**H). +* + CALL ZTBSV( UPLO, TRANST, DIAG, N, KD, AB, LDAB, WORK, + $ 1 ) + DO 220 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 220 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 230 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 230 CONTINUE + CALL ZTBSV( UPLO, TRANSN, DIAG, N, KD, AB, LDAB, WORK, + $ 1 ) + END IF + GO TO 210 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 240 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 240 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 250 CONTINUE +* + RETURN +* +* End of ZTBRFS +* + END diff --git a/costa/native/external/lapack/ztbtrs.f b/costa/native/external/lapack/ztbtrs.f new file mode 100644 index 000000000..c432d54df --- /dev/null +++ b/costa/native/external/lapack/ztbtrs.f @@ -0,0 +1,163 @@ + SUBROUTINE ZTBTRS( UPLO, TRANS, DIAG, N, KD, NRHS, AB, LDAB, B, + $ LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, KD, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX*16 AB( LDAB, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* ZTBTRS solves a triangular system of the form +* +* A * X = B, A**T * X = B, or A**H * X = B, +* +* where A is a triangular band matrix of order N, and B is an +* N-by-NRHS matrix. A check is made to verify that A is nonsingular. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose) +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* KD (input) INTEGER +* The number of superdiagonals or subdiagonals of the +* triangular band matrix A. KD >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AB (input) COMPLEX*16 array, dimension (LDAB,N) +* The upper or lower triangular band matrix A, stored in the +* first kd+1 rows of AB. The j-th column of A is stored +* in the j-th column of the array AB as follows: +* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +* If DIAG = 'U', the diagonal elements of A are not referenced +* and are assumed to be 1. +* +* LDAB (input) INTEGER +* The leading dimension of the array AB. LDAB >= KD+1. +* +* B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, if INFO = 0, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the i-th diagonal element of A is zero, +* indicating that the matrix is singular and the +* solutions X have not been computed. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZTBSV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOUNIT = LSAME( DIAG, 'N' ) + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT. + $ LSAME( TRANS, 'T' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTBTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check for singularity. +* + IF( NOUNIT ) THEN + IF( UPPER ) THEN + DO 10 INFO = 1, N + IF( AB( KD+1, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE + DO 20 INFO = 1, N + IF( AB( 1, INFO ).EQ.ZERO ) + $ RETURN + 20 CONTINUE + END IF + END IF + INFO = 0 +* +* Solve A * X = B, A**T * X = B, or A**H * X = B. +* + DO 30 J = 1, NRHS + CALL ZTBSV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, B( 1, J ), 1 ) + 30 CONTINUE +* + RETURN +* +* End of ZTBTRS +* + END diff --git a/costa/native/external/lapack/ztgevc.f b/costa/native/external/lapack/ztgevc.f new file mode 100644 index 000000000..65febe3ec --- /dev/null +++ b/costa/native/external/lapack/ztgevc.f @@ -0,0 +1,632 @@ + SUBROUTINE ZTGEVC( SIDE, HOWMNY, SELECT, N, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, MM, M, WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER HOWMNY, SIDE + INTEGER INFO, LDA, LDB, LDVL, LDVR, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), VL( LDVL, * ), + $ VR( LDVR, * ), WORK( * ) +* .. +* +* +* Purpose +* ======= +* +* ZTGEVC computes some or all of the right and/or left generalized +* eigenvectors of a pair of complex upper triangular matrices (A,B). +* +* The right generalized eigenvector x and the left generalized +* eigenvector y of (A,B) corresponding to a generalized eigenvalue +* w are defined by: +* +* (A - wB) * x = 0 and y**H * (A - wB) = 0 +* +* where y**H denotes the conjugate tranpose of y. +* +* If an eigenvalue w is determined by zero diagonal elements of both A +* and B, a unit vector is returned as the corresponding eigenvector. +* +* If all eigenvectors are requested, the routine may either return +* the matrices X and/or Y of right or left eigenvectors of (A,B), or +* the products Z*X and/or Q*Y, where Z and Q are input unitary +* matrices. If (A,B) was obtained from the generalized Schur +* factorization of an original pair of matrices +* (A0,B0) = (Q*A*Z**H,Q*B*Z**H), +* then Z*X and Q*Y are the matrices of right or left eigenvectors of +* A. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'R': compute right eigenvectors only; +* = 'L': compute left eigenvectors only; +* = 'B': compute both right and left eigenvectors. +* +* HOWMNY (input) CHARACTER*1 +* = 'A': compute all right and/or left eigenvectors; +* = 'B': compute all right and/or left eigenvectors, and +* backtransform them using the input matrices supplied +* in VR and/or VL; +* = 'S': compute selected right and/or left eigenvectors, +* specified by the logical array SELECT. +* +* SELECT (input) LOGICAL array, dimension (N) +* If HOWMNY='S', SELECT specifies the eigenvectors to be +* computed. +* If HOWMNY='A' or 'B', SELECT is not referenced. +* To select the eigenvector corresponding to the j-th +* eigenvalue, SELECT(j) must be set to .TRUE.. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* A (input) COMPLEX*16 array, dimension (LDA,N) +* The upper triangular matrix A. +* +* LDA (input) INTEGER +* The leading dimension of array A. LDA >= max(1,N). +* +* B (input) COMPLEX*16 array, dimension (LDB,N) +* The upper triangular matrix B. B must have real diagonal +* elements. +* +* LDB (input) INTEGER +* The leading dimension of array B. LDB >= max(1,N). +* +* VL (input/output) COMPLEX*16 array, dimension (LDVL,MM) +* On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must +* contain an N-by-N matrix Q (usually the unitary matrix Q +* of left Schur vectors returned by ZHGEQZ). +* On exit, if SIDE = 'L' or 'B', VL contains: +* if HOWMNY = 'A', the matrix Y of left eigenvectors of (A,B); +* if HOWMNY = 'B', the matrix Q*Y; +* if HOWMNY = 'S', the left eigenvectors of (A,B) specified by +* SELECT, stored consecutively in the columns of +* VL, in the same order as their eigenvalues. +* If SIDE = 'R', VL is not referenced. +* +* LDVL (input) INTEGER +* The leading dimension of array VL. +* LDVL >= max(1,N) if SIDE = 'L' or 'B'; LDVL >= 1 otherwise. +* +* VR (input/output) COMPLEX*16 array, dimension (LDVR,MM) +* On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must +* contain an N-by-N matrix Q (usually the unitary matrix Z +* of right Schur vectors returned by ZHGEQZ). +* On exit, if SIDE = 'R' or 'B', VR contains: +* if HOWMNY = 'A', the matrix X of right eigenvectors of (A,B); +* if HOWMNY = 'B', the matrix Z*X; +* if HOWMNY = 'S', the right eigenvectors of (A,B) specified by +* SELECT, stored consecutively in the columns of +* VR, in the same order as their eigenvalues. +* If SIDE = 'L', VR is not referenced. +* +* LDVR (input) INTEGER +* The leading dimension of the array VR. +* LDVR >= max(1,N) if SIDE = 'R' or 'B'; LDVR >= 1 otherwise. +* +* MM (input) INTEGER +* The number of columns in the arrays VL and/or VR. MM >= M. +* +* M (output) INTEGER +* The number of columns in the arrays VL and/or VR actually +* used to store the eigenvectors. If HOWMNY = 'A' or 'B', M +* is set to N. Each selected eigenvector occupies one column. +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (2*N) +* +* INFO (output) INTEGER +* = 0: successful exit. +* < 0: if INFO = -i, the i-th argument had an illegal value. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL COMPL, COMPR, ILALL, ILBACK, ILBBAD, ILCOMP, + $ LSA, LSB + INTEGER I, IBEG, IEIG, IEND, IHWMNY, IM, ISIDE, ISRC, + $ J, JE, JR + DOUBLE PRECISION ACOEFA, ACOEFF, ANORM, ASCALE, BCOEFA, BIG, + $ BIGNUM, BNORM, BSCALE, DMIN, SAFMIN, SBETA, + $ SCALE, SMALL, TEMP, ULP, XMAX + COMPLEX*16 BCOEFF, CA, CB, D, SALPHA, SUM, SUMA, SUMB, X +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + COMPLEX*16 ZLADIV + EXTERNAL LSAME, DLAMCH, ZLADIV +* .. +* .. External Subroutines .. + EXTERNAL DLABAD, XERBLA, ZGEMV +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX, MIN +* .. +* .. Statement Functions .. + DOUBLE PRECISION ABS1 +* .. +* .. Statement Function definitions .. + ABS1( X ) = ABS( DBLE( X ) ) + ABS( DIMAG( X ) ) +* .. +* .. Executable Statements .. +* +* Decode and Test the input parameters +* + IF( LSAME( HOWMNY, 'A' ) ) THEN + IHWMNY = 1 + ILALL = .TRUE. + ILBACK = .FALSE. + ELSE IF( LSAME( HOWMNY, 'S' ) ) THEN + IHWMNY = 2 + ILALL = .FALSE. + ILBACK = .FALSE. + ELSE IF( LSAME( HOWMNY, 'B' ) .OR. LSAME( HOWMNY, 'T' ) ) THEN + IHWMNY = 3 + ILALL = .TRUE. + ILBACK = .TRUE. + ELSE + IHWMNY = -1 + END IF +* + IF( LSAME( SIDE, 'R' ) ) THEN + ISIDE = 1 + COMPL = .FALSE. + COMPR = .TRUE. + ELSE IF( LSAME( SIDE, 'L' ) ) THEN + ISIDE = 2 + COMPL = .TRUE. + COMPR = .FALSE. + ELSE IF( LSAME( SIDE, 'B' ) ) THEN + ISIDE = 3 + COMPL = .TRUE. + COMPR = .TRUE. + ELSE + ISIDE = -1 + END IF +* + INFO = 0 + IF( ISIDE.LT.0 ) THEN + INFO = -1 + ELSE IF( IHWMNY.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTGEVC', -INFO ) + RETURN + END IF +* +* Count the number of eigenvectors +* + IF( .NOT.ILALL ) THEN + IM = 0 + DO 10 J = 1, N + IF( SELECT( J ) ) + $ IM = IM + 1 + 10 CONTINUE + ELSE + IM = N + END IF +* +* Check diagonal of B +* + ILBBAD = .FALSE. + DO 20 J = 1, N + IF( DIMAG( B( J, J ) ).NE.ZERO ) + $ ILBBAD = .TRUE. + 20 CONTINUE +* + IF( ILBBAD ) THEN + INFO = -7 + ELSE IF( COMPL .AND. LDVL.LT.N .OR. LDVL.LT.1 ) THEN + INFO = -10 + ELSE IF( COMPR .AND. LDVR.LT.N .OR. LDVR.LT.1 ) THEN + INFO = -12 + ELSE IF( MM.LT.IM ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTGEVC', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + M = IM + IF( N.EQ.0 ) + $ RETURN +* +* Machine Constants +* + SAFMIN = DLAMCH( 'Safe minimum' ) + BIG = ONE / SAFMIN + CALL DLABAD( SAFMIN, BIG ) + ULP = DLAMCH( 'Epsilon' )*DLAMCH( 'Base' ) + SMALL = SAFMIN*N / ULP + BIG = ONE / SMALL + BIGNUM = ONE / ( SAFMIN*N ) +* +* Compute the 1-norm of each column of the strictly upper triangular +* part of A and B to check for possible overflow in the triangular +* solver. +* + ANORM = ABS1( A( 1, 1 ) ) + BNORM = ABS1( B( 1, 1 ) ) + RWORK( 1 ) = ZERO + RWORK( N+1 ) = ZERO + DO 40 J = 2, N + RWORK( J ) = ZERO + RWORK( N+J ) = ZERO + DO 30 I = 1, J - 1 + RWORK( J ) = RWORK( J ) + ABS1( A( I, J ) ) + RWORK( N+J ) = RWORK( N+J ) + ABS1( B( I, J ) ) + 30 CONTINUE + ANORM = MAX( ANORM, RWORK( J )+ABS1( A( J, J ) ) ) + BNORM = MAX( BNORM, RWORK( N+J )+ABS1( B( J, J ) ) ) + 40 CONTINUE +* + ASCALE = ONE / MAX( ANORM, SAFMIN ) + BSCALE = ONE / MAX( BNORM, SAFMIN ) +* +* Left eigenvectors +* + IF( COMPL ) THEN + IEIG = 0 +* +* Main loop over eigenvalues +* + DO 140 JE = 1, N + IF( ILALL ) THEN + ILCOMP = .TRUE. + ELSE + ILCOMP = SELECT( JE ) + END IF + IF( ILCOMP ) THEN + IEIG = IEIG + 1 +* + IF( ABS1( A( JE, JE ) ).LE.SAFMIN .AND. + $ ABS( DBLE( B( JE, JE ) ) ).LE.SAFMIN ) THEN +* +* Singular matrix pencil -- return unit eigenvector +* + DO 50 JR = 1, N + VL( JR, IEIG ) = CZERO + 50 CONTINUE + VL( IEIG, IEIG ) = CONE + GO TO 140 + END IF +* +* Non-singular eigenvalue: +* Compute coefficients a and b in +* H +* y ( a A - b B ) = 0 +* + TEMP = ONE / MAX( ABS1( A( JE, JE ) )*ASCALE, + $ ABS( DBLE( B( JE, JE ) ) )*BSCALE, SAFMIN ) + SALPHA = ( TEMP*A( JE, JE ) )*ASCALE + SBETA = ( TEMP*DBLE( B( JE, JE ) ) )*BSCALE + ACOEFF = SBETA*ASCALE + BCOEFF = SALPHA*BSCALE +* +* Scale to avoid underflow +* + LSA = ABS( SBETA ).GE.SAFMIN .AND. ABS( ACOEFF ).LT.SMALL + LSB = ABS1( SALPHA ).GE.SAFMIN .AND. ABS1( BCOEFF ).LT. + $ SMALL +* + SCALE = ONE + IF( LSA ) + $ SCALE = ( SMALL / ABS( SBETA ) )*MIN( ANORM, BIG ) + IF( LSB ) + $ SCALE = MAX( SCALE, ( SMALL / ABS1( SALPHA ) )* + $ MIN( BNORM, BIG ) ) + IF( LSA .OR. LSB ) THEN + SCALE = MIN( SCALE, ONE / + $ ( SAFMIN*MAX( ONE, ABS( ACOEFF ), + $ ABS1( BCOEFF ) ) ) ) + IF( LSA ) THEN + ACOEFF = ASCALE*( SCALE*SBETA ) + ELSE + ACOEFF = SCALE*ACOEFF + END IF + IF( LSB ) THEN + BCOEFF = BSCALE*( SCALE*SALPHA ) + ELSE + BCOEFF = SCALE*BCOEFF + END IF + END IF +* + ACOEFA = ABS( ACOEFF ) + BCOEFA = ABS1( BCOEFF ) + XMAX = ONE + DO 60 JR = 1, N + WORK( JR ) = CZERO + 60 CONTINUE + WORK( JE ) = CONE + DMIN = MAX( ULP*ACOEFA*ANORM, ULP*BCOEFA*BNORM, SAFMIN ) +* +* H +* Triangular solve of (a A - b B) y = 0 +* +* H +* (rowwise in (a A - b B) , or columnwise in a A - b B) +* + DO 100 J = JE + 1, N +* +* Compute +* j-1 +* SUM = sum conjg( a*A(k,j) - b*B(k,j) )*x(k) +* k=je +* (Scale if necessary) +* + TEMP = ONE / XMAX + IF( ACOEFA*RWORK( J )+BCOEFA*RWORK( N+J ).GT.BIGNUM* + $ TEMP ) THEN + DO 70 JR = JE, J - 1 + WORK( JR ) = TEMP*WORK( JR ) + 70 CONTINUE + XMAX = ONE + END IF + SUMA = CZERO + SUMB = CZERO +* + DO 80 JR = JE, J - 1 + SUMA = SUMA + DCONJG( A( JR, J ) )*WORK( JR ) + SUMB = SUMB + DCONJG( B( JR, J ) )*WORK( JR ) + 80 CONTINUE + SUM = ACOEFF*SUMA - DCONJG( BCOEFF )*SUMB +* +* Form x(j) = - SUM / conjg( a*A(j,j) - b*B(j,j) ) +* +* with scaling and perturbation of the denominator +* + D = DCONJG( ACOEFF*A( J, J )-BCOEFF*B( J, J ) ) + IF( ABS1( D ).LE.DMIN ) + $ D = DCMPLX( DMIN ) +* + IF( ABS1( D ).LT.ONE ) THEN + IF( ABS1( SUM ).GE.BIGNUM*ABS1( D ) ) THEN + TEMP = ONE / ABS1( SUM ) + DO 90 JR = JE, J - 1 + WORK( JR ) = TEMP*WORK( JR ) + 90 CONTINUE + XMAX = TEMP*XMAX + SUM = TEMP*SUM + END IF + END IF + WORK( J ) = ZLADIV( -SUM, D ) + XMAX = MAX( XMAX, ABS1( WORK( J ) ) ) + 100 CONTINUE +* +* Back transform eigenvector if HOWMNY='B'. +* + IF( ILBACK ) THEN + CALL ZGEMV( 'N', N, N+1-JE, CONE, VL( 1, JE ), LDVL, + $ WORK( JE ), 1, CZERO, WORK( N+1 ), 1 ) + ISRC = 2 + IBEG = 1 + ELSE + ISRC = 1 + IBEG = JE + END IF +* +* Copy and scale eigenvector into column of VL +* + XMAX = ZERO + DO 110 JR = IBEG, N + XMAX = MAX( XMAX, ABS1( WORK( ( ISRC-1 )*N+JR ) ) ) + 110 CONTINUE +* + IF( XMAX.GT.SAFMIN ) THEN + TEMP = ONE / XMAX + DO 120 JR = IBEG, N + VL( JR, IEIG ) = TEMP*WORK( ( ISRC-1 )*N+JR ) + 120 CONTINUE + ELSE + IBEG = N + 1 + END IF +* + DO 130 JR = 1, IBEG - 1 + VL( JR, IEIG ) = CZERO + 130 CONTINUE +* + END IF + 140 CONTINUE + END IF +* +* Right eigenvectors +* + IF( COMPR ) THEN + IEIG = IM + 1 +* +* Main loop over eigenvalues +* + DO 250 JE = N, 1, -1 + IF( ILALL ) THEN + ILCOMP = .TRUE. + ELSE + ILCOMP = SELECT( JE ) + END IF + IF( ILCOMP ) THEN + IEIG = IEIG - 1 +* + IF( ABS1( A( JE, JE ) ).LE.SAFMIN .AND. + $ ABS( DBLE( B( JE, JE ) ) ).LE.SAFMIN ) THEN +* +* Singular matrix pencil -- return unit eigenvector +* + DO 150 JR = 1, N + VR( JR, IEIG ) = CZERO + 150 CONTINUE + VR( IEIG, IEIG ) = CONE + GO TO 250 + END IF +* +* Non-singular eigenvalue: +* Compute coefficients a and b in +* +* ( a A - b B ) x = 0 +* + TEMP = ONE / MAX( ABS1( A( JE, JE ) )*ASCALE, + $ ABS( DBLE( B( JE, JE ) ) )*BSCALE, SAFMIN ) + SALPHA = ( TEMP*A( JE, JE ) )*ASCALE + SBETA = ( TEMP*DBLE( B( JE, JE ) ) )*BSCALE + ACOEFF = SBETA*ASCALE + BCOEFF = SALPHA*BSCALE +* +* Scale to avoid underflow +* + LSA = ABS( SBETA ).GE.SAFMIN .AND. ABS( ACOEFF ).LT.SMALL + LSB = ABS1( SALPHA ).GE.SAFMIN .AND. ABS1( BCOEFF ).LT. + $ SMALL +* + SCALE = ONE + IF( LSA ) + $ SCALE = ( SMALL / ABS( SBETA ) )*MIN( ANORM, BIG ) + IF( LSB ) + $ SCALE = MAX( SCALE, ( SMALL / ABS1( SALPHA ) )* + $ MIN( BNORM, BIG ) ) + IF( LSA .OR. LSB ) THEN + SCALE = MIN( SCALE, ONE / + $ ( SAFMIN*MAX( ONE, ABS( ACOEFF ), + $ ABS1( BCOEFF ) ) ) ) + IF( LSA ) THEN + ACOEFF = ASCALE*( SCALE*SBETA ) + ELSE + ACOEFF = SCALE*ACOEFF + END IF + IF( LSB ) THEN + BCOEFF = BSCALE*( SCALE*SALPHA ) + ELSE + BCOEFF = SCALE*BCOEFF + END IF + END IF +* + ACOEFA = ABS( ACOEFF ) + BCOEFA = ABS1( BCOEFF ) + XMAX = ONE + DO 160 JR = 1, N + WORK( JR ) = CZERO + 160 CONTINUE + WORK( JE ) = CONE + DMIN = MAX( ULP*ACOEFA*ANORM, ULP*BCOEFA*BNORM, SAFMIN ) +* +* Triangular solve of (a A - b B) x = 0 (columnwise) +* +* WORK(1:j-1) contains sums w, +* WORK(j+1:JE) contains x +* + DO 170 JR = 1, JE - 1 + WORK( JR ) = ACOEFF*A( JR, JE ) - BCOEFF*B( JR, JE ) + 170 CONTINUE + WORK( JE ) = CONE +* + DO 210 J = JE - 1, 1, -1 +* +* Form x(j) := - w(j) / d +* with scaling and perturbation of the denominator +* + D = ACOEFF*A( J, J ) - BCOEFF*B( J, J ) + IF( ABS1( D ).LE.DMIN ) + $ D = DCMPLX( DMIN ) +* + IF( ABS1( D ).LT.ONE ) THEN + IF( ABS1( WORK( J ) ).GE.BIGNUM*ABS1( D ) ) THEN + TEMP = ONE / ABS1( WORK( J ) ) + DO 180 JR = 1, JE + WORK( JR ) = TEMP*WORK( JR ) + 180 CONTINUE + END IF + END IF +* + WORK( J ) = ZLADIV( -WORK( J ), D ) +* + IF( J.GT.1 ) THEN +* +* w = w + x(j)*(a A(*,j) - b B(*,j) ) with scaling +* + IF( ABS1( WORK( J ) ).GT.ONE ) THEN + TEMP = ONE / ABS1( WORK( J ) ) + IF( ACOEFA*RWORK( J )+BCOEFA*RWORK( N+J ).GE. + $ BIGNUM*TEMP ) THEN + DO 190 JR = 1, JE + WORK( JR ) = TEMP*WORK( JR ) + 190 CONTINUE + END IF + END IF +* + CA = ACOEFF*WORK( J ) + CB = BCOEFF*WORK( J ) + DO 200 JR = 1, J - 1 + WORK( JR ) = WORK( JR ) + CA*A( JR, J ) - + $ CB*B( JR, J ) + 200 CONTINUE + END IF + 210 CONTINUE +* +* Back transform eigenvector if HOWMNY='B'. +* + IF( ILBACK ) THEN + CALL ZGEMV( 'N', N, JE, CONE, VR, LDVR, WORK, 1, + $ CZERO, WORK( N+1 ), 1 ) + ISRC = 2 + IEND = N + ELSE + ISRC = 1 + IEND = JE + END IF +* +* Copy and scale eigenvector into column of VR +* + XMAX = ZERO + DO 220 JR = 1, IEND + XMAX = MAX( XMAX, ABS1( WORK( ( ISRC-1 )*N+JR ) ) ) + 220 CONTINUE +* + IF( XMAX.GT.SAFMIN ) THEN + TEMP = ONE / XMAX + DO 230 JR = 1, IEND + VR( JR, IEIG ) = TEMP*WORK( ( ISRC-1 )*N+JR ) + 230 CONTINUE + ELSE + IEND = 0 + END IF +* + DO 240 JR = IEND + 1, N + VR( JR, IEIG ) = CZERO + 240 CONTINUE +* + END IF + 250 CONTINUE + END IF +* + RETURN +* +* End of ZTGEVC +* + END diff --git a/costa/native/external/lapack/ztgex2.f b/costa/native/external/lapack/ztgex2.f new file mode 100644 index 000000000..50c18c1dc --- /dev/null +++ b/costa/native/external/lapack/ztgex2.f @@ -0,0 +1,268 @@ + SUBROUTINE ZTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, J1, INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + LOGICAL WANTQ, WANTZ + INTEGER INFO, J1, LDA, LDB, LDQ, LDZ, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* ZTGEX2 swaps adjacent diagonal 1 by 1 blocks (A11,B11) and (A22,B22) +* in an upper triangular matrix pair (A, B) by an unitary equivalence +* transformation. +* +* (A, B) must be in generalized Schur canonical form, that is, A and +* B are both upper triangular. +* +* Optionally, the matrices Q and Z of generalized Schur vectors are +* updated. +* +* Q(in) * A(in) * Z(in)' = Q(out) * A(out) * Z(out)' +* Q(in) * B(in) * Z(in)' = Q(out) * B(out) * Z(out)' +* +* +* Arguments +* ========= +* +* WANTQ (input) LOGICAL +* .TRUE. : update the left transformation matrix Q; +* .FALSE.: do not update Q. +* +* WANTZ (input) LOGICAL +* .TRUE. : update the right transformation matrix Z; +* .FALSE.: do not update Z. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* A (input/output) COMPLEX*16 arrays, dimensions (LDA,N) +* On entry, the matrix A in the pair (A, B). +* On exit, the updated matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) COMPLEX*16 arrays, dimensions (LDB,N) +* On entry, the matrix B in the pair (A, B). +* On exit, the updated matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* Q (input/output) COMPLEX*16 array, dimension (LDZ,N) +* If WANTQ = .TRUE, on entry, the unitary matrix Q. On exit, +* the updated matrix Q. +* Not referenced if WANTQ = .FALSE.. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= 1; +* If WANTQ = .TRUE., LDQ >= N. +* +* Z (input/output) COMPLEX*16 array, dimension (LDZ,N) +* If WANTZ = .TRUE, on entry, the unitary matrix Z. On exit, +* the updated matrix Z. +* Not referenced if WANTZ = .FALSE.. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1; +* If WANTZ = .TRUE., LDZ >= N. +* +* J1 (input) INTEGER +* The index to the first block (A11, B11). +* +* INFO (output) INTEGER +* =0: Successful exit. +* =1: The transformed matrix pair (A, B) would be too far +* from generalized Schur form; the problem is ill- +* conditioned. (A, B) may have been partially reordered, +* and ILST points to the first row of the current +* position of the block being moved. +* +* +* Further Details +* =============== +* +* Based on contributions by +* Bo Kagstrom and Peter Poromaa, Department of Computing Science, +* Umea University, S-901 87 Umea, Sweden. +* +* In the current code both weak and strong stability tests are +* performed. The user can omit the strong stability test by changing +* the internal logical parameter WANDS to .FALSE.. See ref. [2] for +* details. +* +* [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the +* Generalized Real Schur Form of a Regular Matrix Pair (A, B), in +* M.S. Moonen et al (eds), Linear Algebra for Large Scale and +* Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. +* +* [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified +* Eigenvalues of a Regular Matrix Pair (A, B) and Condition +* Estimation: Theory, Algorithms and Software, Report UMINF-94.04, +* Department of Computing Science, Umea University, S-901 87 Umea, +* Sweden, 1994. Also as LAPACK Working Note 87. To appear in +* Numerical Algorithms, 1996. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) + DOUBLE PRECISION TEN + PARAMETER ( TEN = 10.0D+0 ) + INTEGER LDST + PARAMETER ( LDST = 2 ) + LOGICAL WANDS + PARAMETER ( WANDS = .TRUE. ) +* .. +* .. Local Scalars .. + LOGICAL DTRONG, WEAK + INTEGER I, M + DOUBLE PRECISION CQ, CZ, EPS, SA, SB, SCALE, SMLNUM, SS, SUM, + $ THRESH, WS + COMPLEX*16 CDUM, F, G, SQ, SZ +* .. +* .. Local Arrays .. + COMPLEX*16 S( LDST, LDST ), T( LDST, LDST ), WORK( 8 ) +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL ZLACPY, ZLARTG, ZLASSQ, ZROT +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCONJG, MAX, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* + M = LDST + WEAK = .FALSE. + DTRONG = .FALSE. +* +* Make a local copy of selected block in (A, B) +* + CALL ZLACPY( 'Full', M, M, A( J1, J1 ), LDA, S, LDST ) + CALL ZLACPY( 'Full', M, M, B( J1, J1 ), LDB, T, LDST ) +* +* Compute the threshold for testing the acceptance of swapping. +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) / EPS + SCALE = DBLE( CZERO ) + SUM = DBLE( CONE ) + CALL ZLACPY( 'Full', M, M, S, LDST, WORK, M ) + CALL ZLACPY( 'Full', M, M, T, LDST, WORK( M*M+1 ), M ) + CALL ZLASSQ( 2*M*M, WORK, 1, SCALE, SUM ) + SA = SCALE*SQRT( SUM ) + THRESH = MAX( TEN*EPS*SA, SMLNUM ) +* +* Compute unitary QL and RQ that swap 1-by-1 and 1-by-1 blocks +* using Givens rotations and perform the swap tentatively. +* + F = S( 2, 2 )*T( 1, 1 ) - T( 2, 2 )*S( 1, 1 ) + G = S( 2, 2 )*T( 1, 2 ) - T( 2, 2 )*S( 1, 2 ) + SA = ABS( S( 2, 2 ) ) + SB = ABS( T( 2, 2 ) ) + CALL ZLARTG( G, F, CZ, SZ, CDUM ) + SZ = -SZ + CALL ZROT( 2, S( 1, 1 ), 1, S( 1, 2 ), 1, CZ, DCONJG( SZ ) ) + CALL ZROT( 2, T( 1, 1 ), 1, T( 1, 2 ), 1, CZ, DCONJG( SZ ) ) + IF( SA.GE.SB ) THEN + CALL ZLARTG( S( 1, 1 ), S( 2, 1 ), CQ, SQ, CDUM ) + ELSE + CALL ZLARTG( T( 1, 1 ), T( 2, 1 ), CQ, SQ, CDUM ) + END IF + CALL ZROT( 2, S( 1, 1 ), LDST, S( 2, 1 ), LDST, CQ, SQ ) + CALL ZROT( 2, T( 1, 1 ), LDST, T( 2, 1 ), LDST, CQ, SQ ) +* +* Weak stability test: |S21| + |T21| <= O(EPS F-norm((S, T))) +* + WS = ABS( S( 2, 1 ) ) + ABS( T( 2, 1 ) ) + WEAK = WS.LE.THRESH + IF( .NOT.WEAK ) + $ GO TO 20 +* + IF( WANDS ) THEN +* +* Strong stability test: +* F-norm((A-QL'*S*QR, B-QL'*T*QR)) <= O(EPS*F-norm((A, B))) +* + CALL ZLACPY( 'Full', M, M, S, LDST, WORK, M ) + CALL ZLACPY( 'Full', M, M, T, LDST, WORK( M*M+1 ), M ) + CALL ZROT( 2, WORK, 1, WORK( 3 ), 1, CZ, -DCONJG( SZ ) ) + CALL ZROT( 2, WORK( 5 ), 1, WORK( 7 ), 1, CZ, -DCONJG( SZ ) ) + CALL ZROT( 2, WORK, 2, WORK( 2 ), 2, CQ, -SQ ) + CALL ZROT( 2, WORK( 5 ), 2, WORK( 6 ), 2, CQ, -SQ ) + DO 10 I = 1, 2 + WORK( I ) = WORK( I ) - A( J1+I-1, J1 ) + WORK( I+2 ) = WORK( I+2 ) - A( J1+I-1, J1+1 ) + WORK( I+4 ) = WORK( I+4 ) - B( J1+I-1, J1 ) + WORK( I+6 ) = WORK( I+6 ) - B( J1+I-1, J1+1 ) + 10 CONTINUE + SCALE = DBLE( CZERO ) + SUM = DBLE( CONE ) + CALL ZLASSQ( 2*M*M, WORK, 1, SCALE, SUM ) + SS = SCALE*SQRT( SUM ) + DTRONG = SS.LE.THRESH + IF( .NOT.DTRONG ) + $ GO TO 20 + END IF +* +* If the swap is accepted ("weakly" and "strongly"), apply the +* equivalence transformations to the original matrix pair (A,B) +* + CALL ZROT( J1+1, A( 1, J1 ), 1, A( 1, J1+1 ), 1, CZ, + $ DCONJG( SZ ) ) + CALL ZROT( J1+1, B( 1, J1 ), 1, B( 1, J1+1 ), 1, CZ, + $ DCONJG( SZ ) ) + CALL ZROT( N-J1+1, A( J1, J1 ), LDA, A( J1+1, J1 ), LDA, CQ, SQ ) + CALL ZROT( N-J1+1, B( J1, J1 ), LDB, B( J1+1, J1 ), LDB, CQ, SQ ) +* +* Set N1 by N2 (2,1) blocks to 0 +* + A( J1+1, J1 ) = CZERO + B( J1+1, J1 ) = CZERO +* +* Accumulate transformations into Q and Z if requested. +* + IF( WANTZ ) + $ CALL ZROT( N, Z( 1, J1 ), 1, Z( 1, J1+1 ), 1, CZ, + $ DCONJG( SZ ) ) + IF( WANTQ ) + $ CALL ZROT( N, Q( 1, J1 ), 1, Q( 1, J1+1 ), 1, CQ, + $ DCONJG( SQ ) ) +* +* Exit with INFO = 0 if swap was successfully performed. +* + RETURN +* +* Exit with INFO = 1 if swap was rejected. +* + 20 CONTINUE + INFO = 1 + RETURN +* +* End of ZTGEX2 +* + END diff --git a/costa/native/external/lapack/ztgexc.f b/costa/native/external/lapack/ztgexc.f new file mode 100644 index 000000000..fe28d7358 --- /dev/null +++ b/costa/native/external/lapack/ztgexc.f @@ -0,0 +1,207 @@ + SUBROUTINE ZTGEXC( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, IFST, ILST, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + LOGICAL WANTQ, WANTZ + INTEGER IFST, ILST, INFO, LDA, LDB, LDQ, LDZ, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* ZTGEXC reorders the generalized Schur decomposition of a complex +* matrix pair (A,B), using an unitary equivalence transformation +* (A, B) := Q * (A, B) * Z', so that the diagonal block of (A, B) with +* row index IFST is moved to row ILST. +* +* (A, B) must be in generalized Schur canonical form, that is, A and +* B are both upper triangular. +* +* Optionally, the matrices Q and Z of generalized Schur vectors are +* updated. +* +* Q(in) * A(in) * Z(in)' = Q(out) * A(out) * Z(out)' +* Q(in) * B(in) * Z(in)' = Q(out) * B(out) * Z(out)' +* +* Arguments +* ========= +* +* WANTQ (input) LOGICAL +* .TRUE. : update the left transformation matrix Q; +* .FALSE.: do not update Q. +* +* WANTZ (input) LOGICAL +* .TRUE. : update the right transformation matrix Z; +* .FALSE.: do not update Z. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the upper triangular matrix A in the pair (A, B). +* On exit, the updated matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) COMPLEX*16 array, dimension (LDB,N) +* On entry, the upper triangular matrix B in the pair (A, B). +* On exit, the updated matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* Q (input/output) COMPLEX*16 array, dimension (LDZ,N) +* On entry, if WANTQ = .TRUE., the unitary matrix Q. +* On exit, the updated matrix Q. +* If WANTQ = .FALSE., Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= 1; +* If WANTQ = .TRUE., LDQ >= N. +* +* Z (input/output) COMPLEX*16 array, dimension (LDZ,N) +* On entry, if WANTZ = .TRUE., the unitary matrix Z. +* On exit, the updated matrix Z. +* If WANTZ = .FALSE., Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1; +* If WANTZ = .TRUE., LDZ >= N. +* +* IFST (input/output) INTEGER +* ILST (input/output) INTEGER +* Specify the reordering of the diagonal blocks of (A, B). +* The block with row index IFST is moved to row ILST, by a +* sequence of swapping between adjacent blocks. +* +* INFO (output) INTEGER +* =0: Successful exit. +* <0: if INFO = -i, the i-th argument had an illegal value. +* =1: The transformed matrix pair (A, B) would be too far +* from generalized Schur form; the problem is ill- +* conditioned. (A, B) may have been partially reordered, +* and ILST points to the first row of the current +* position of the block being moved. +* +* +* Further Details +* =============== +* +* Based on contributions by +* Bo Kagstrom and Peter Poromaa, Department of Computing Science, +* Umea University, S-901 87 Umea, Sweden. +* +* [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the +* Generalized Real Schur Form of a Regular Matrix Pair (A, B), in +* M.S. Moonen et al (eds), Linear Algebra for Large Scale and +* Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. +* +* [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified +* Eigenvalues of a Regular Matrix Pair (A, B) and Condition +* Estimation: Theory, Algorithms and Software, Report +* UMINF - 94.04, Department of Computing Science, Umea University, +* S-901 87 Umea, Sweden, 1994. Also as LAPACK Working Note 87. +* To appear in Numerical Algorithms, 1996. +* +* [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software +* for Solving the Generalized Sylvester Equation and Estimating the +* Separation between Regular Matrix Pairs, Report UMINF - 93.23, +* Department of Computing Science, Umea University, S-901 87 Umea, +* Sweden, December 1993, Revised April 1994, Also as LAPACK working +* Note 75. To appear in ACM Trans. on Math. Software, Vol 22, No 1, +* 1996. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER HERE +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZTGEX2 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Decode and test input arguments. + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDQ.LT.1 .OR. WANTQ .AND. ( LDQ.LT.MAX( 1, N ) ) ) THEN + INFO = -9 + ELSE IF( LDZ.LT.1 .OR. WANTZ .AND. ( LDZ.LT.MAX( 1, N ) ) ) THEN + INFO = -11 + ELSE IF( IFST.LT.1 .OR. IFST.GT.N ) THEN + INFO = -12 + ELSE IF( ILST.LT.1 .OR. ILST.GT.N ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTGEXC', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN + IF( IFST.EQ.ILST ) + $ RETURN +* + IF( IFST.LT.ILST ) THEN +* + HERE = IFST +* + 10 CONTINUE +* +* Swap with next one below +* + CALL ZTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, LDZ, + $ HERE, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE + 1 + IF( HERE.LT.ILST ) + $ GO TO 10 + HERE = HERE - 1 + ELSE + HERE = IFST - 1 +* + 20 CONTINUE +* +* Swap with next one above +* + CALL ZTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, LDZ, + $ HERE, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE - 1 + IF( HERE.GE.ILST ) + $ GO TO 20 + HERE = HERE + 1 + END IF + ILST = HERE + RETURN +* +* End of ZTGEXC +* + END diff --git a/costa/native/external/lapack/ztgsen.f b/costa/native/external/lapack/ztgsen.f new file mode 100644 index 000000000..2d7bffc0a --- /dev/null +++ b/costa/native/external/lapack/ztgsen.f @@ -0,0 +1,645 @@ + SUBROUTINE ZTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, + $ ALPHA, BETA, Q, LDQ, Z, LDZ, M, PL, PR, DIF, + $ WORK, LWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + LOGICAL WANTQ, WANTZ + INTEGER IJOB, INFO, LDA, LDB, LDQ, LDZ, LIWORK, LWORK, + $ M, N + DOUBLE PRECISION PL, PR +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + INTEGER IWORK( * ) + DOUBLE PRECISION DIF( * ) + COMPLEX*16 A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), Q( LDQ, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* Purpose +* ======= +* +* ZTGSEN reorders the generalized Schur decomposition of a complex +* matrix pair (A, B) (in terms of an unitary equivalence trans- +* formation Q' * (A, B) * Z), so that a selected cluster of eigenvalues +* appears in the leading diagonal blocks of the pair (A,B). The leading +* columns of Q and Z form unitary bases of the corresponding left and +* right eigenspaces (deflating subspaces). (A, B) must be in +* generalized Schur canonical form, that is, A and B are both upper +* triangular. +* +* ZTGSEN also computes the generalized eigenvalues +* +* w(j)= ALPHA(j) / BETA(j) +* +* of the reordered matrix pair (A, B). +* +* Optionally, the routine computes estimates of reciprocal condition +* numbers for eigenvalues and eigenspaces. These are Difu[(A11,B11), +* (A22,B22)] and Difl[(A11,B11), (A22,B22)], i.e. the separation(s) +* between the matrix pairs (A11, B11) and (A22,B22) that correspond to +* the selected cluster and the eigenvalues outside the cluster, resp., +* and norms of "projections" onto left and right eigenspaces w.r.t. +* the selected cluster in the (1,1)-block. +* +* +* Arguments +* ========= +* +* IJOB (input) integer +* Specifies whether condition numbers are required for the +* cluster of eigenvalues (PL and PR) or the deflating subspaces +* (Difu and Difl): +* =0: Only reorder w.r.t. SELECT. No extras. +* =1: Reciprocal of norms of "projections" onto left and right +* eigenspaces w.r.t. the selected cluster (PL and PR). +* =2: Upper bounds on Difu and Difl. F-norm-based estimate +* (DIF(1:2)). +* =3: Estimate of Difu and Difl. 1-norm-based estimate +* (DIF(1:2)). +* About 5 times as expensive as IJOB = 2. +* =4: Compute PL, PR and DIF (i.e. 0, 1 and 2 above): Economic +* version to get it all. +* =5: Compute PL, PR and DIF (i.e. 0, 1 and 3 above) +* +* WANTQ (input) LOGICAL +* .TRUE. : update the left transformation matrix Q; +* .FALSE.: do not update Q. +* +* WANTZ (input) LOGICAL +* .TRUE. : update the right transformation matrix Z; +* .FALSE.: do not update Z. +* +* SELECT (input) LOGICAL array, dimension (N) +* SELECT specifies the eigenvalues in the selected cluster. To +* select an eigenvalue w(j), SELECT(j) must be set to +* .TRUE.. +* +* N (input) INTEGER +* The order of the matrices A and B. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension(LDA,N) +* On entry, the upper triangular matrix A, in generalized +* Schur canonical form. +* On exit, A is overwritten by the reordered matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) COMPLEX*16 array, dimension(LDB,N) +* On entry, the upper triangular matrix B, in generalized +* Schur canonical form. +* On exit, B is overwritten by the reordered matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* ALPHA (output) COMPLEX*16 array, dimension (N) +* BETA (output) COMPLEX*16 array, dimension (N) +* The diagonal elements of A and B, respectively, +* when the pair (A,B) has been reduced to generalized Schur +* form. ALPHA(i)/BETA(i) i=1,...,N are the generalized +* eigenvalues. +* +* Q (input/output) COMPLEX*16 array, dimension (LDQ,N) +* On entry, if WANTQ = .TRUE., Q is an N-by-N matrix. +* On exit, Q has been postmultiplied by the left unitary +* transformation matrix which reorder (A, B); The leading M +* columns of Q form orthonormal bases for the specified pair of +* left eigenspaces (deflating subspaces). +* If WANTQ = .FALSE., Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= 1. +* If WANTQ = .TRUE., LDQ >= N. +* +* Z (input/output) COMPLEX*16 array, dimension (LDZ,N) +* On entry, if WANTZ = .TRUE., Z is an N-by-N matrix. +* On exit, Z has been postmultiplied by the left unitary +* transformation matrix which reorder (A, B); The leading M +* columns of Z form orthonormal bases for the specified pair of +* left eigenspaces (deflating subspaces). +* If WANTZ = .FALSE., Z is not referenced. +* +* LDZ (input) INTEGER +* The leading dimension of the array Z. LDZ >= 1. +* If WANTZ = .TRUE., LDZ >= N. +* +* M (output) INTEGER +* The dimension of the specified pair of left and right +* eigenspaces, (deflating subspaces) 0 <= M <= N. +* +* PL, PR (output) DOUBLE PRECISION +* If IJOB = 1, 4 or 5, PL, PR are lower bounds on the +* reciprocal of the norm of "projections" onto left and right +* eigenspace with respect to the selected cluster. +* 0 < PL, PR <= 1. +* If M = 0 or M = N, PL = PR = 1. +* If IJOB = 0, 2 or 3 PL, PR are not referenced. +* +* DIF (output) DOUBLE PRECISION array, dimension (2). +* If IJOB >= 2, DIF(1:2) store the estimates of Difu and Difl. +* If IJOB = 2 or 4, DIF(1:2) are F-norm-based upper bounds on +* Difu and Difl. If IJOB = 3 or 5, DIF(1:2) are 1-norm-based +* estimates of Difu and Difl, computed using reversed +* communication with ZLACON. +* If M = 0 or N, DIF(1:2) = F-norm([A, B]). +* If IJOB = 0 or 1, DIF is not referenced. +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* IF IJOB = 0, WORK is not referenced. Otherwise, +* on exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= 1 +* If IJOB = 1, 2 or 4, LWORK >= 2*M*(N-M) +* If IJOB = 3 or 5, LWORK >= 4*M*(N-M) +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace/output) INTEGER, dimension (LIWORK) +* IF IJOB = 0, IWORK is not referenced. Otherwise, +* on exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +* +* LIWORK (input) INTEGER +* The dimension of the array IWORK. LIWORK >= 1. +* If IJOB = 1, 2 or 4, LIWORK >= N+2; +* If IJOB = 3 or 5, LIWORK >= MAX(N+2, 2*M*(N-M)); +* +* If LIWORK = -1, then a workspace query is assumed; the +* routine only calculates the optimal size of the IWORK array, +* returns this value as the first entry of the IWORK array, and +* no error message related to LIWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* =0: Successful exit. +* <0: If INFO = -i, the i-th argument had an illegal value. +* =1: Reordering of (A, B) failed because the transformed +* matrix pair (A, B) would be too far from generalized +* Schur form; the problem is very ill-conditioned. +* (A, B) may have been partially reordered. +* If requested, 0 is returned in DIF(*), PL and PR. +* +* +* Further Details +* =============== +* +* ZTGSEN first collects the selected eigenvalues by computing unitary +* U and W that move them to the top left corner of (A, B). In other +* words, the selected eigenvalues are the eigenvalues of (A11, B11) in +* +* U'*(A, B)*W = (A11 A12) (B11 B12) n1 +* ( 0 A22),( 0 B22) n2 +* n1 n2 n1 n2 +* +* where N = n1+n2 and U' means the conjugate transpose of U. The first +* n1 columns of U and W span the specified pair of left and right +* eigenspaces (deflating subspaces) of (A, B). +* +* If (A, B) has been obtained from the generalized real Schur +* decomposition of a matrix pair (C, D) = Q*(A, B)*Z', then the +* reordered generalized Schur form of (C, D) is given by +* +* (C, D) = (Q*U)*(U'*(A, B)*W)*(Z*W)', +* +* and the first n1 columns of Q*U and Z*W span the corresponding +* deflating subspaces of (C, D) (Q and Z store Q*U and Z*W, resp.). +* +* Note that if the selected eigenvalue is sufficiently ill-conditioned, +* then its value may differ significantly from its value before +* reordering. +* +* The reciprocal condition numbers of the left and right eigenspaces +* spanned by the first n1 columns of U and W (or Q*U and Z*W) may +* be returned in DIF(1:2), corresponding to Difu and Difl, resp. +* +* The Difu and Difl are defined as: +* +* Difu[(A11, B11), (A22, B22)] = sigma-min( Zu ) +* and +* Difl[(A11, B11), (A22, B22)] = Difu[(A22, B22), (A11, B11)], +* +* where sigma-min(Zu) is the smallest singular value of the +* (2*n1*n2)-by-(2*n1*n2) matrix +* +* Zu = [ kron(In2, A11) -kron(A22', In1) ] +* [ kron(In2, B11) -kron(B22', In1) ]. +* +* Here, Inx is the identity matrix of size nx and A22' is the +* transpose of A22. kron(X, Y) is the Kronecker product between +* the matrices X and Y. +* +* When DIF(2) is small, small changes in (A, B) can cause large changes +* in the deflating subspace. An approximate (asymptotic) bound on the +* maximum angular error in the computed deflating subspaces is +* +* EPS * norm((A, B)) / DIF(2), +* +* where EPS is the machine precision. +* +* The reciprocal norm of the projectors on the left and right +* eigenspaces associated with (A11, B11) may be returned in PL and PR. +* They are computed as follows. First we compute L and R so that +* P*(A, B)*Q is block diagonal, where +* +* P = ( I -L ) n1 Q = ( I R ) n1 +* ( 0 I ) n2 and ( 0 I ) n2 +* n1 n2 n1 n2 +* +* and (L, R) is the solution to the generalized Sylvester equation +* +* A11*R - L*A22 = -A12 +* B11*R - L*B22 = -B12 +* +* Then PL = (F-norm(L)**2+1)**(-1/2) and PR = (F-norm(R)**2+1)**(-1/2). +* An approximate (asymptotic) bound on the average absolute error of +* the selected eigenvalues is +* +* EPS * norm((A, B)) / PL. +* +* There are also global error bounds which valid for perturbations up +* to a certain restriction: A lower bound (x) on the smallest +* F-norm(E,F) for which an eigenvalue of (A11, B11) may move and +* coalesce with an eigenvalue of (A22, B22) under perturbation (E,F), +* (i.e. (A + E, B + F), is +* +* x = min(Difu,Difl)/((1/(PL*PL)+1/(PR*PR))**(1/2)+2*max(1/PL,1/PR)). +* +* An approximate bound on x can be computed from DIF(1:2), PL and PR. +* +* If y = ( F-norm(E,F) / x) <= 1, the angles between the perturbed +* (L', R') and unperturbed (L, R) left and right deflating subspaces +* associated with the selected cluster in the (1,1)-blocks can be +* bounded as +* +* max-angle(L, L') <= arctan( y * PL / (1 - y * (1 - PL * PL)**(1/2)) +* max-angle(R, R') <= arctan( y * PR / (1 - y * (1 - PR * PR)**(1/2)) +* +* See LAPACK User's Guide section 4.11 or the following references +* for more information. +* +* Note that if the default method for computing the Frobenius-norm- +* based estimate DIF is not wanted (see ZLATDF), then the parameter +* IDIFJB (see below) should be changed from 3 to 4 (routine ZLATDF +* (IJOB = 2 will be used)). See ZTGSYL for more details. +* +* Based on contributions by +* Bo Kagstrom and Peter Poromaa, Department of Computing Science, +* Umea University, S-901 87 Umea, Sweden. +* +* References +* ========== +* +* [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the +* Generalized Real Schur Form of a Regular Matrix Pair (A, B), in +* M.S. Moonen et al (eds), Linear Algebra for Large Scale and +* Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. +* +* [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified +* Eigenvalues of a Regular Matrix Pair (A, B) and Condition +* Estimation: Theory, Algorithms and Software, Report +* UMINF - 94.04, Department of Computing Science, Umea University, +* S-901 87 Umea, Sweden, 1994. Also as LAPACK Working Note 87. +* To appear in Numerical Algorithms, 1996. +* +* [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software +* for Solving the Generalized Sylvester Equation and Estimating the +* Separation between Regular Matrix Pairs, Report UMINF - 93.23, +* Department of Computing Science, Umea University, S-901 87 Umea, +* Sweden, December 1993, Revised April 1994, Also as LAPACK working +* Note 75. To appear in ACM Trans. on Math. Software, Vol 22, No 1, +* 1996. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER IDIFJB + PARAMETER ( IDIFJB = 3 ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, SWAP, WANTD, WANTD1, WANTD2, WANTP + INTEGER I, IERR, IJB, K, KASE, KS, LIWMIN, LWMIN, MN2, + $ N1, N2 + DOUBLE PRECISION DSCALE, DSUM, RDSCAL, SAFMIN +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLACON, ZLACPY, ZLASSQ, ZSCAL, ZTGEXC, + $ ZTGSYL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DCMPLX, DCONJG, MAX, SQRT +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + IF( IJOB.LT.0 .OR. IJOB.GT.5 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -13 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -15 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTGSEN', -INFO ) + RETURN + END IF +* + IERR = 0 +* + WANTP = IJOB.EQ.1 .OR. IJOB.GE.4 + WANTD1 = IJOB.EQ.2 .OR. IJOB.EQ.4 + WANTD2 = IJOB.EQ.3 .OR. IJOB.EQ.5 + WANTD = WANTD1 .OR. WANTD2 +* +* Set M to the dimension of the specified pair of deflating +* subspaces. +* + M = 0 + DO 10 K = 1, N + ALPHA( K ) = A( K, K ) + BETA( K ) = B( K, K ) + IF( K.LT.N ) THEN + IF( SELECT( K ) ) + $ M = M + 1 + ELSE + IF( SELECT( N ) ) + $ M = M + 1 + END IF + 10 CONTINUE +* + IF( IJOB.EQ.1 .OR. IJOB.EQ.2 .OR. IJOB.EQ.4 ) THEN + LWMIN = MAX( 1, 2*M*( N-M ) ) + LIWMIN = MAX( 1, N+2 ) + ELSE IF( IJOB.EQ.3 .OR. IJOB.EQ.5 ) THEN + LWMIN = MAX( 1, 4*M*( N-M ) ) + LIWMIN = MAX( 1, 2*M*( N-M ), N+2 ) + ELSE + LWMIN = 1 + LIWMIN = 1 + END IF +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -21 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -23 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTGSEN', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible. +* + IF( M.EQ.N .OR. M.EQ.0 ) THEN + IF( WANTP ) THEN + PL = ONE + PR = ONE + END IF + IF( WANTD ) THEN + DSCALE = ZERO + DSUM = ONE + DO 20 I = 1, N + CALL ZLASSQ( N, A( 1, I ), 1, DSCALE, DSUM ) + CALL ZLASSQ( N, B( 1, I ), 1, DSCALE, DSUM ) + 20 CONTINUE + DIF( 1 ) = DSCALE*SQRT( DSUM ) + DIF( 2 ) = DIF( 1 ) + END IF + GO TO 70 + END IF +* +* Get machine constant +* + SAFMIN = DLAMCH( 'S' ) +* +* Collect the selected blocks at the top-left corner of (A, B). +* + KS = 0 + DO 30 K = 1, N + SWAP = SELECT( K ) + IF( SWAP ) THEN + KS = KS + 1 +* +* Swap the K-th block to position KS. Compute unitary Q +* and Z that will swap adjacent diagonal blocks in (A, B). +* + IF( K.NE.KS ) + $ CALL ZTGEXC( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, K, KS, IERR ) +* + IF( IERR.GT.0 ) THEN +* +* Swap is rejected: exit. +* + INFO = 1 + IF( WANTP ) THEN + PL = ZERO + PR = ZERO + END IF + IF( WANTD ) THEN + DIF( 1 ) = ZERO + DIF( 2 ) = ZERO + END IF + GO TO 70 + END IF + END IF + 30 CONTINUE + IF( WANTP ) THEN +* +* Solve generalized Sylvester equation for R and L: +* A11 * R - L * A22 = A12 +* B11 * R - L * B22 = B12 +* + N1 = M + N2 = N - M + I = N1 + 1 + CALL ZLACPY( 'Full', N1, N2, A( 1, I ), LDA, WORK, N1 ) + CALL ZLACPY( 'Full', N1, N2, B( 1, I ), LDB, WORK( N1*N2+1 ), + $ N1 ) + IJB = 0 + CALL ZTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, WORK, + $ N1, B, LDB, B( I, I ), LDB, WORK( N1*N2+1 ), N1, + $ DSCALE, DIF( 1 ), WORK( N1*N2*2+1 ), + $ LWORK-2*N1*N2, IWORK, IERR ) +* +* Estimate the reciprocal of norms of "projections" onto +* left and right eigenspaces +* + RDSCAL = ZERO + DSUM = ONE + CALL ZLASSQ( N1*N2, WORK, 1, RDSCAL, DSUM ) + PL = RDSCAL*SQRT( DSUM ) + IF( PL.EQ.ZERO ) THEN + PL = ONE + ELSE + PL = DSCALE / ( SQRT( DSCALE*DSCALE / PL+PL )*SQRT( PL ) ) + END IF + RDSCAL = ZERO + DSUM = ONE + CALL ZLASSQ( N1*N2, WORK( N1*N2+1 ), 1, RDSCAL, DSUM ) + PR = RDSCAL*SQRT( DSUM ) + IF( PR.EQ.ZERO ) THEN + PR = ONE + ELSE + PR = DSCALE / ( SQRT( DSCALE*DSCALE / PR+PR )*SQRT( PR ) ) + END IF + END IF + IF( WANTD ) THEN +* +* Compute estimates Difu and Difl. +* + IF( WANTD1 ) THEN + N1 = M + N2 = N - M + I = N1 + 1 + IJB = IDIFJB +* +* Frobenius norm-based Difu estimate. +* + CALL ZTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, WORK, + $ N1, B, LDB, B( I, I ), LDB, WORK( N1*N2+1 ), + $ N1, DSCALE, DIF( 1 ), WORK( N1*N2*2+1 ), + $ LWORK-2*N1*N2, IWORK, IERR ) +* +* Frobenius norm-based Difl estimate. +* + CALL ZTGSYL( 'N', IJB, N2, N1, A( I, I ), LDA, A, LDA, WORK, + $ N2, B( I, I ), LDB, B, LDB, WORK( N1*N2+1 ), + $ N2, DSCALE, DIF( 2 ), WORK( N1*N2*2+1 ), + $ LWORK-2*N1*N2, IWORK, IERR ) + ELSE +* +* Compute 1-norm-based estimates of Difu and Difl using +* reversed communication with ZLACON. In each step a +* generalized Sylvester equation or a transposed variant +* is solved. +* + KASE = 0 + N1 = M + N2 = N - M + I = N1 + 1 + IJB = 0 + MN2 = 2*N1*N2 +* +* 1-norm-based estimate of Difu. +* + 40 CONTINUE + CALL ZLACON( MN2, WORK( MN2+1 ), WORK, DIF( 1 ), KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Solve generalized Sylvester equation +* + CALL ZTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, + $ WORK, N1, B, LDB, B( I, I ), LDB, + $ WORK( N1*N2+1 ), N1, DSCALE, DIF( 1 ), + $ WORK( N1*N2*2+1 ), LWORK-2*N1*N2, IWORK, + $ IERR ) + ELSE +* +* Solve the transposed variant. +* + CALL ZTGSYL( 'C', IJB, N1, N2, A, LDA, A( I, I ), LDA, + $ WORK, N1, B, LDB, B( I, I ), LDB, + $ WORK( N1*N2+1 ), N1, DSCALE, DIF( 1 ), + $ WORK( N1*N2*2+1 ), LWORK-2*N1*N2, IWORK, + $ IERR ) + END IF + GO TO 40 + END IF + DIF( 1 ) = DSCALE / DIF( 1 ) +* +* 1-norm-based estimate of Difl. +* + 50 CONTINUE + CALL ZLACON( MN2, WORK( MN2+1 ), WORK, DIF( 2 ), KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Solve generalized Sylvester equation +* + CALL ZTGSYL( 'N', IJB, N2, N1, A( I, I ), LDA, A, LDA, + $ WORK, N2, B( I, I ), LDB, B, LDB, + $ WORK( N1*N2+1 ), N2, DSCALE, DIF( 2 ), + $ WORK( N1*N2*2+1 ), LWORK-2*N1*N2, IWORK, + $ IERR ) + ELSE +* +* Solve the transposed variant. +* + CALL ZTGSYL( 'C', IJB, N2, N1, A( I, I ), LDA, A, LDA, + $ WORK, N2, B, LDB, B( I, I ), LDB, + $ WORK( N1*N2+1 ), N2, DSCALE, DIF( 2 ), + $ WORK( N1*N2*2+1 ), LWORK-2*N1*N2, IWORK, + $ IERR ) + END IF + GO TO 50 + END IF + DIF( 2 ) = DSCALE / DIF( 2 ) + END IF + END IF +* +* If B(K,K) is complex, make it real and positive (normalization +* of the generalized Schur form) and Store the generalized +* eigenvalues of reordered pair (A, B) +* + DO 60 K = 1, N + DSCALE = ABS( B( K, K ) ) + IF( DSCALE.GT.SAFMIN ) THEN + WORK( 1 ) = DCONJG( B( K, K ) / DSCALE ) + WORK( 2 ) = B( K, K ) / DSCALE + B( K, K ) = DSCALE + CALL ZSCAL( N-K, WORK( 1 ), B( K, K+1 ), LDB ) + CALL ZSCAL( N-K+1, WORK( 1 ), A( K, K ), LDA ) + IF( WANTQ ) + $ CALL ZSCAL( N, WORK( 2 ), Q( 1, K ), 1 ) + ELSE + B( K, K ) = DCMPLX( ZERO, ZERO ) + END IF +* + ALPHA( K ) = A( K, K ) + BETA( K ) = B( K, K ) +* + 60 CONTINUE +* + 70 CONTINUE +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of ZTGSEN +* + END diff --git a/costa/native/external/lapack/ztgsja.f b/costa/native/external/lapack/ztgsja.f new file mode 100644 index 000000000..d3415fe5c --- /dev/null +++ b/costa/native/external/lapack/ztgsja.f @@ -0,0 +1,526 @@ + SUBROUTINE ZTGSJA( JOBU, JOBV, JOBQ, M, P, N, K, L, A, LDA, B, + $ LDB, TOLA, TOLB, ALPHA, BETA, U, LDU, V, LDV, + $ Q, LDQ, WORK, NCYCLE, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER JOBQ, JOBU, JOBV + INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, + $ NCYCLE, P + DOUBLE PRECISION TOLA, TOLB +* .. +* .. Array Arguments .. + DOUBLE PRECISION ALPHA( * ), BETA( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ U( LDU, * ), V( LDV, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZTGSJA computes the generalized singular value decomposition (GSVD) +* of two complex upper triangular (or trapezoidal) matrices A and B. +* +* On entry, it is assumed that matrices A and B have the following +* forms, which may be obtained by the preprocessing subroutine ZGGSVP +* from a general M-by-N matrix A and P-by-N matrix B: +* +* N-K-L K L +* A = K ( 0 A12 A13 ) if M-K-L >= 0; +* L ( 0 0 A23 ) +* M-K-L ( 0 0 0 ) +* +* N-K-L K L +* A = K ( 0 A12 A13 ) if M-K-L < 0; +* M-K ( 0 0 A23 ) +* +* N-K-L K L +* B = L ( 0 0 B13 ) +* P-L ( 0 0 0 ) +* +* where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular +* upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, +* otherwise A23 is (M-K)-by-L upper trapezoidal. +* +* On exit, +* +* U'*A*Q = D1*( 0 R ), V'*B*Q = D2*( 0 R ), +* +* where U, V and Q are unitary matrices, Z' denotes the conjugate +* transpose of Z, R is a nonsingular upper triangular matrix, and D1 +* and D2 are ``diagonal'' matrices, which are of the following +* structures: +* +* If M-K-L >= 0, +* +* K L +* D1 = K ( I 0 ) +* L ( 0 C ) +* M-K-L ( 0 0 ) +* +* K L +* D2 = L ( 0 S ) +* P-L ( 0 0 ) +* +* N-K-L K L +* ( 0 R ) = K ( 0 R11 R12 ) K +* L ( 0 0 R22 ) L +* +* where +* +* C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), +* S = diag( BETA(K+1), ... , BETA(K+L) ), +* C**2 + S**2 = I. +* +* R is stored in A(1:K+L,N-K-L+1:N) on exit. +* +* If M-K-L < 0, +* +* K M-K K+L-M +* D1 = K ( I 0 0 ) +* M-K ( 0 C 0 ) +* +* K M-K K+L-M +* D2 = M-K ( 0 S 0 ) +* K+L-M ( 0 0 I ) +* P-L ( 0 0 0 ) +* +* N-K-L K M-K K+L-M +* ( 0 R ) = K ( 0 R11 R12 R13 ) +* M-K ( 0 0 R22 R23 ) +* K+L-M ( 0 0 0 R33 ) +* +* where +* C = diag( ALPHA(K+1), ... , ALPHA(M) ), +* S = diag( BETA(K+1), ... , BETA(M) ), +* C**2 + S**2 = I. +* +* R = ( R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N) and R33 is stored +* ( 0 R22 R23 ) +* in B(M-K+1:L,N+M-K-L+1:N) on exit. +* +* The computation of the unitary transformation matrices U, V or Q +* is optional. These matrices may either be formed explicitly, or they +* may be postmultiplied into input matrices U1, V1, or Q1. +* +* Arguments +* ========= +* +* JOBU (input) CHARACTER*1 +* = 'U': U must contain a unitary matrix U1 on entry, and +* the product U1*U is returned; +* = 'I': U is initialized to the unit matrix, and the +* unitary matrix U is returned; +* = 'N': U is not computed. +* +* JOBV (input) CHARACTER*1 +* = 'V': V must contain a unitary matrix V1 on entry, and +* the product V1*V is returned; +* = 'I': V is initialized to the unit matrix, and the +* unitary matrix V is returned; +* = 'N': V is not computed. +* +* JOBQ (input) CHARACTER*1 +* = 'Q': Q must contain a unitary matrix Q1 on entry, and +* the product Q1*Q is returned; +* = 'I': Q is initialized to the unit matrix, and the +* unitary matrix Q is returned; +* = 'N': Q is not computed. +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* P (input) INTEGER +* The number of rows of the matrix B. P >= 0. +* +* N (input) INTEGER +* The number of columns of the matrices A and B. N >= 0. +* +* K (input) INTEGER +* L (input) INTEGER +* K and L specify the subblocks in the input matrices A and B: +* A23 = A(K+1:MIN(K+L,M),N-L+1:N) and B13 = B(1:L,,N-L+1:N) +* of A and B, whose GSVD is going to be computed by ZTGSJA. +* See Further details. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the M-by-N matrix A. +* On exit, A(N-K+1:N,1:MIN(K+L,M) ) contains the triangular +* matrix R or part of R. See Purpose for details. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input/output) COMPLEX*16 array, dimension (LDB,N) +* On entry, the P-by-N matrix B. +* On exit, if necessary, B(M-K+1:L,N+M-K-L+1:N) contains +* a part of R. See Purpose for details. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,P). +* +* TOLA (input) DOUBLE PRECISION +* TOLB (input) DOUBLE PRECISION +* TOLA and TOLB are the convergence criteria for the Jacobi- +* Kogbetliantz iteration procedure. Generally, they are the +* same as used in the preprocessing step, say +* TOLA = MAX(M,N)*norm(A)*MAZHEPS, +* TOLB = MAX(P,N)*norm(B)*MAZHEPS. +* +* ALPHA (output) DOUBLE PRECISION array, dimension (N) +* BETA (output) DOUBLE PRECISION array, dimension (N) +* On exit, ALPHA and BETA contain the generalized singular +* value pairs of A and B; +* ALPHA(1:K) = 1, +* BETA(1:K) = 0, +* and if M-K-L >= 0, +* ALPHA(K+1:K+L) = diag(C), +* BETA(K+1:K+L) = diag(S), +* or if M-K-L < 0, +* ALPHA(K+1:M)= C, ALPHA(M+1:K+L)= 0 +* BETA(K+1:M) = S, BETA(M+1:K+L) = 1. +* Furthermore, if K+L < N, +* ALPHA(K+L+1:N) = 0 +* BETA(K+L+1:N) = 0. +* +* U (input/output) COMPLEX*16 array, dimension (LDU,M) +* On entry, if JOBU = 'U', U must contain a matrix U1 (usually +* the unitary matrix returned by ZGGSVP). +* On exit, +* if JOBU = 'I', U contains the unitary matrix U; +* if JOBU = 'U', U contains the product U1*U. +* If JOBU = 'N', U is not referenced. +* +* LDU (input) INTEGER +* The leading dimension of the array U. LDU >= max(1,M) if +* JOBU = 'U'; LDU >= 1 otherwise. +* +* V (input/output) COMPLEX*16 array, dimension (LDV,P) +* On entry, if JOBV = 'V', V must contain a matrix V1 (usually +* the unitary matrix returned by ZGGSVP). +* On exit, +* if JOBV = 'I', V contains the unitary matrix V; +* if JOBV = 'V', V contains the product V1*V. +* If JOBV = 'N', V is not referenced. +* +* LDV (input) INTEGER +* The leading dimension of the array V. LDV >= max(1,P) if +* JOBV = 'V'; LDV >= 1 otherwise. +* +* Q (input/output) COMPLEX*16 array, dimension (LDQ,N) +* On entry, if JOBQ = 'Q', Q must contain a matrix Q1 (usually +* the unitary matrix returned by ZGGSVP). +* On exit, +* if JOBQ = 'I', Q contains the unitary matrix Q; +* if JOBQ = 'Q', Q contains the product Q1*Q. +* If JOBQ = 'N', Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= max(1,N) if +* JOBQ = 'Q'; LDQ >= 1 otherwise. +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* NCYCLE (output) INTEGER +* The number of cycles required for convergence. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value. +* = 1: the procedure does not converge after MAXIT cycles. +* +* Internal Parameters +* =================== +* +* MAXIT INTEGER +* MAXIT specifies the total loops that the iterative procedure +* may take. If after MAXIT cycles, the routine fails to +* converge, we return INFO = 1. +* +* Further Details +* =============== +* +* ZTGSJA essentially uses a variant of Kogbetliantz algorithm to reduce +* min(L,M-K)-by-L triangular (or trapezoidal) matrix A23 and L-by-L +* matrix B13 to the form: +* +* U1'*A13*Q1 = C1*R1; V1'*B13*Q1 = S1*R1, +* +* where U1, V1 and Q1 are unitary matrix, and Z' is the conjugate +* transpose of Z. C1 and S1 are diagonal matrices satisfying +* +* C1**2 + S1**2 = I, +* +* and R1 is an L-by-L nonsingular upper triangular matrix. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER MAXIT + PARAMETER ( MAXIT = 40 ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. +* + LOGICAL INITQ, INITU, INITV, UPPER, WANTQ, WANTU, WANTV + INTEGER I, J, KCYCLE + DOUBLE PRECISION A1, A3, B1, B3, CSQ, CSU, CSV, ERROR, GAMMA, + $ RWK, SSMIN + COMPLEX*16 A2, B2, SNQ, SNU, SNV +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLARTG, XERBLA, ZCOPY, ZDSCAL, ZLAGS2, ZLAPLL, + $ ZLASET, ZROT +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCONJG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + INITU = LSAME( JOBU, 'I' ) + WANTU = INITU .OR. LSAME( JOBU, 'U' ) +* + INITV = LSAME( JOBV, 'I' ) + WANTV = INITV .OR. LSAME( JOBV, 'V' ) +* + INITQ = LSAME( JOBQ, 'I' ) + WANTQ = INITQ .OR. LSAME( JOBQ, 'Q' ) +* + INFO = 0 + IF( .NOT.( INITU .OR. WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( INITV .OR. WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( INITQ .OR. WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( P.LT.0 ) THEN + INFO = -5 + ELSE IF( N.LT.0 ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -12 + ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN + INFO = -18 + ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN + INFO = -20 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -22 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTGSJA', -INFO ) + RETURN + END IF +* +* Initialize U, V and Q, if necessary +* + IF( INITU ) + $ CALL ZLASET( 'Full', M, M, CZERO, CONE, U, LDU ) + IF( INITV ) + $ CALL ZLASET( 'Full', P, P, CZERO, CONE, V, LDV ) + IF( INITQ ) + $ CALL ZLASET( 'Full', N, N, CZERO, CONE, Q, LDQ ) +* +* Loop until convergence +* + UPPER = .FALSE. + DO 40 KCYCLE = 1, MAXIT +* + UPPER = .NOT.UPPER +* + DO 20 I = 1, L - 1 + DO 10 J = I + 1, L +* + A1 = ZERO + A2 = CZERO + A3 = ZERO + IF( K+I.LE.M ) + $ A1 = DBLE( A( K+I, N-L+I ) ) + IF( K+J.LE.M ) + $ A3 = DBLE( A( K+J, N-L+J ) ) +* + B1 = DBLE( B( I, N-L+I ) ) + B3 = DBLE( B( J, N-L+J ) ) +* + IF( UPPER ) THEN + IF( K+I.LE.M ) + $ A2 = A( K+I, N-L+J ) + B2 = B( I, N-L+J ) + ELSE + IF( K+J.LE.M ) + $ A2 = A( K+J, N-L+I ) + B2 = B( J, N-L+I ) + END IF +* + CALL ZLAGS2( UPPER, A1, A2, A3, B1, B2, B3, CSU, SNU, + $ CSV, SNV, CSQ, SNQ ) +* +* Update (K+I)-th and (K+J)-th rows of matrix A: U'*A +* + IF( K+J.LE.M ) + $ CALL ZROT( L, A( K+J, N-L+1 ), LDA, A( K+I, N-L+1 ), + $ LDA, CSU, DCONJG( SNU ) ) +* +* Update I-th and J-th rows of matrix B: V'*B +* + CALL ZROT( L, B( J, N-L+1 ), LDB, B( I, N-L+1 ), LDB, + $ CSV, DCONJG( SNV ) ) +* +* Update (N-L+I)-th and (N-L+J)-th columns of matrices +* A and B: A*Q and B*Q +* + CALL ZROT( MIN( K+L, M ), A( 1, N-L+J ), 1, + $ A( 1, N-L+I ), 1, CSQ, SNQ ) +* + CALL ZROT( L, B( 1, N-L+J ), 1, B( 1, N-L+I ), 1, CSQ, + $ SNQ ) +* + IF( UPPER ) THEN + IF( K+I.LE.M ) + $ A( K+I, N-L+J ) = CZERO + B( I, N-L+J ) = CZERO + ELSE + IF( K+J.LE.M ) + $ A( K+J, N-L+I ) = CZERO + B( J, N-L+I ) = CZERO + END IF +* +* Ensure that the diagonal elements of A and B are real. +* + IF( K+I.LE.M ) + $ A( K+I, N-L+I ) = DBLE( A( K+I, N-L+I ) ) + IF( K+J.LE.M ) + $ A( K+J, N-L+J ) = DBLE( A( K+J, N-L+J ) ) + B( I, N-L+I ) = DBLE( B( I, N-L+I ) ) + B( J, N-L+J ) = DBLE( B( J, N-L+J ) ) +* +* Update unitary matrices U, V, Q, if desired. +* + IF( WANTU .AND. K+J.LE.M ) + $ CALL ZROT( M, U( 1, K+J ), 1, U( 1, K+I ), 1, CSU, + $ SNU ) +* + IF( WANTV ) + $ CALL ZROT( P, V( 1, J ), 1, V( 1, I ), 1, CSV, SNV ) +* + IF( WANTQ ) + $ CALL ZROT( N, Q( 1, N-L+J ), 1, Q( 1, N-L+I ), 1, CSQ, + $ SNQ ) +* + 10 CONTINUE + 20 CONTINUE +* + IF( .NOT.UPPER ) THEN +* +* The matrices A13 and B13 were lower triangular at the start +* of the cycle, and are now upper triangular. +* +* Convergence test: test the parallelism of the corresponding +* rows of A and B. +* + ERROR = ZERO + DO 30 I = 1, MIN( L, M-K ) + CALL ZCOPY( L-I+1, A( K+I, N-L+I ), LDA, WORK, 1 ) + CALL ZCOPY( L-I+1, B( I, N-L+I ), LDB, WORK( L+1 ), 1 ) + CALL ZLAPLL( L-I+1, WORK, 1, WORK( L+1 ), 1, SSMIN ) + ERROR = MAX( ERROR, SSMIN ) + 30 CONTINUE +* + IF( ABS( ERROR ).LE.MIN( TOLA, TOLB ) ) + $ GO TO 50 + END IF +* +* End of cycle loop +* + 40 CONTINUE +* +* The algorithm has not converged after MAXIT cycles. +* + INFO = 1 + GO TO 100 +* + 50 CONTINUE +* +* If ERROR <= MIN(TOLA,TOLB), then the algorithm has converged. +* Compute the generalized singular value pairs (ALPHA, BETA), and +* set the triangular matrix R to array A. +* + DO 60 I = 1, K + ALPHA( I ) = ONE + BETA( I ) = ZERO + 60 CONTINUE +* + DO 70 I = 1, MIN( L, M-K ) +* + A1 = DBLE( A( K+I, N-L+I ) ) + B1 = DBLE( B( I, N-L+I ) ) +* + IF( A1.NE.ZERO ) THEN + GAMMA = B1 / A1 +* + IF( GAMMA.LT.ZERO ) THEN + CALL ZDSCAL( L-I+1, -ONE, B( I, N-L+I ), LDB ) + IF( WANTV ) + $ CALL ZDSCAL( P, -ONE, V( 1, I ), 1 ) + END IF +* + CALL DLARTG( ABS( GAMMA ), ONE, BETA( K+I ), ALPHA( K+I ), + $ RWK ) +* + IF( ALPHA( K+I ).GE.BETA( K+I ) ) THEN + CALL ZDSCAL( L-I+1, ONE / ALPHA( K+I ), A( K+I, N-L+I ), + $ LDA ) + ELSE + CALL ZDSCAL( L-I+1, ONE / BETA( K+I ), B( I, N-L+I ), + $ LDB ) + CALL ZCOPY( L-I+1, B( I, N-L+I ), LDB, A( K+I, N-L+I ), + $ LDA ) + END IF +* + ELSE + ALPHA( K+I ) = ZERO + BETA( K+I ) = ONE + CALL ZCOPY( L-I+1, B( I, N-L+I ), LDB, A( K+I, N-L+I ), + $ LDA ) + END IF + 70 CONTINUE +* +* Post-assignment +* + DO 80 I = M + 1, K + L + ALPHA( I ) = ZERO + BETA( I ) = ONE + 80 CONTINUE +* + IF( K+L.LT.N ) THEN + DO 90 I = K + L + 1, N + ALPHA( I ) = ZERO + BETA( I ) = ZERO + 90 CONTINUE + END IF +* + 100 CONTINUE + NCYCLE = KCYCLE +* + RETURN +* +* End of ZTGSJA +* + END diff --git a/costa/native/external/lapack/ztgsna.f b/costa/native/external/lapack/ztgsna.f new file mode 100644 index 000000000..4f02aab17 --- /dev/null +++ b/costa/native/external/lapack/ztgsna.f @@ -0,0 +1,402 @@ + SUBROUTINE ZTGSNA( JOB, HOWMNY, SELECT, N, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, S, DIF, MM, M, WORK, LWORK, + $ IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER HOWMNY, JOB + INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + INTEGER IWORK( * ) + DOUBLE PRECISION DIF( * ), S( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), VL( LDVL, * ), + $ VR( LDVR, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZTGSNA estimates reciprocal condition numbers for specified +* eigenvalues and/or eigenvectors of a matrix pair (A, B). +* +* (A, B) must be in generalized Schur canonical form, that is, A and +* B are both upper triangular. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* Specifies whether condition numbers are required for +* eigenvalues (S) or eigenvectors (DIF): +* = 'E': for eigenvalues only (S); +* = 'V': for eigenvectors only (DIF); +* = 'B': for both eigenvalues and eigenvectors (S and DIF). +* +* HOWMNY (input) CHARACTER*1 +* = 'A': compute condition numbers for all eigenpairs; +* = 'S': compute condition numbers for selected eigenpairs +* specified by the array SELECT. +* +* SELECT (input) LOGICAL array, dimension (N) +* If HOWMNY = 'S', SELECT specifies the eigenpairs for which +* condition numbers are required. To select condition numbers +* for the corresponding j-th eigenvalue and/or eigenvector, +* SELECT(j) must be set to .TRUE.. +* If HOWMNY = 'A', SELECT is not referenced. +* +* N (input) INTEGER +* The order of the square matrix pair (A, B). N >= 0. +* +* A (input) COMPLEX*16 array, dimension (LDA,N) +* The upper triangular matrix A in the pair (A,B). +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input) COMPLEX*16 array, dimension (LDB,N) +* The upper triangular matrix B in the pair (A, B). +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* VL (input) COMPLEX*16 array, dimension (LDVL,M) +* IF JOB = 'E' or 'B', VL must contain left eigenvectors of +* (A, B), corresponding to the eigenpairs specified by HOWMNY +* and SELECT. The eigenvectors must be stored in consecutive +* columns of VL, as returned by ZTGEVC. +* If JOB = 'V', VL is not referenced. +* +* LDVL (input) INTEGER +* The leading dimension of the array VL. LDVL >= 1; and +* If JOB = 'E' or 'B', LDVL >= N. +* +* VR (input) COMPLEX*16 array, dimension (LDVR,M) +* IF JOB = 'E' or 'B', VR must contain right eigenvectors of +* (A, B), corresponding to the eigenpairs specified by HOWMNY +* and SELECT. The eigenvectors must be stored in consecutive +* columns of VR, as returned by ZTGEVC. +* If JOB = 'V', VR is not referenced. +* +* LDVR (input) INTEGER +* The leading dimension of the array VR. LDVR >= 1; +* If JOB = 'E' or 'B', LDVR >= N. +* +* S (output) DOUBLE PRECISION array, dimension (MM) +* If JOB = 'E' or 'B', the reciprocal condition numbers of the +* selected eigenvalues, stored in consecutive elements of the +* array. +* If JOB = 'V', S is not referenced. +* +* DIF (output) DOUBLE PRECISION array, dimension (MM) +* If JOB = 'V' or 'B', the estimated reciprocal condition +* numbers of the selected eigenvectors, stored in consecutive +* elements of the array. +* If the eigenvalues cannot be reordered to compute DIF(j), +* DIF(j) is set to 0; this can only occur when the true value +* would be very small anyway. +* For each eigenvalue/vector specified by SELECT, DIF stores +* a Frobenius norm-based estimate of Difl. +* If JOB = 'E', DIF is not referenced. +* +* MM (input) INTEGER +* The number of elements in the arrays S and DIF. MM >= M. +* +* M (output) INTEGER +* The number of elements of the arrays S and DIF used to store +* the specified condition numbers; for each selected eigenvalue +* one element is used. If HOWMNY = 'A', M is set to N. +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* If JOB = 'E', WORK is not referenced. Otherwise, +* on exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= 1. +* If JOB = 'V' or 'B', LWORK >= 2*N*N. +* +* IWORK (workspace) INTEGER array, dimension (N+2) +* If JOB = 'E', IWORK is not referenced. +* +* INFO (output) INTEGER +* = 0: Successful exit +* < 0: If INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The reciprocal of the condition number of the i-th generalized +* eigenvalue w = (a, b) is defined as +* +* S(I) = (|v'Au|**2 + |v'Bu|**2)**(1/2) / (norm(u)*norm(v)) +* +* where u and v are the right and left eigenvectors of (A, B) +* corresponding to w; |z| denotes the absolute value of the complex +* number, and norm(u) denotes the 2-norm of the vector u. The pair +* (a, b) corresponds to an eigenvalue w = a/b (= v'Au/v'Bu) of the +* matrix pair (A, B). If both a and b equal zero, then (A,B) is +* singular and S(I) = -1 is returned. +* +* An approximate error bound on the chordal distance between the i-th +* computed generalized eigenvalue w and the corresponding exact +* eigenvalue lambda is +* +* chord(w, lambda) <= EPS * norm(A, B) / S(I), +* +* where EPS is the machine precision. +* +* The reciprocal of the condition number of the right eigenvector u +* and left eigenvector v corresponding to the generalized eigenvalue w +* is defined as follows. Suppose +* +* (A, B) = ( a * ) ( b * ) 1 +* ( 0 A22 ),( 0 B22 ) n-1 +* 1 n-1 1 n-1 +* +* Then the reciprocal condition number DIF(I) is +* +* Difl[(a, b), (A22, B22)] = sigma-min( Zl ) +* +* where sigma-min(Zl) denotes the smallest singular value of +* +* Zl = [ kron(a, In-1) -kron(1, A22) ] +* [ kron(b, In-1) -kron(1, B22) ]. +* +* Here In-1 is the identity matrix of size n-1 and X' is the conjugate +* transpose of X. kron(X, Y) is the Kronecker product between the +* matrices X and Y. +* +* We approximate the smallest singular value of Zl with an upper +* bound. This is done by ZLATDF. +* +* An approximate error bound for a computed eigenvector VL(i) or +* VR(i) is given by +* +* EPS * norm(A, B) / DIF(i). +* +* See ref. [2-3] for more details and further references. +* +* Based on contributions by +* Bo Kagstrom and Peter Poromaa, Department of Computing Science, +* Umea University, S-901 87 Umea, Sweden. +* +* References +* ========== +* +* [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the +* Generalized Real Schur Form of a Regular Matrix Pair (A, B), in +* M.S. Moonen et al (eds), Linear Algebra for Large Scale and +* Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. +* +* [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified +* Eigenvalues of a Regular Matrix Pair (A, B) and Condition +* Estimation: Theory, Algorithms and Software, Report +* UMINF - 94.04, Department of Computing Science, Umea University, +* S-901 87 Umea, Sweden, 1994. Also as LAPACK Working Note 87. +* To appear in Numerical Algorithms, 1996. +* +* [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software +* for Solving the Generalized Sylvester Equation and Estimating the +* Separation between Regular Matrix Pairs, Report UMINF - 93.23, +* Department of Computing Science, Umea University, S-901 87 Umea, +* Sweden, December 1993, Revised April 1994, Also as LAPACK Working +* Note 75. +* To appear in ACM Trans. on Math. Software, Vol 22, No 1, 1996. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + INTEGER IDIFJB + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, IDIFJB = 3 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, SOMCON, WANTBH, WANTDF, WANTS + INTEGER I, IERR, IFST, ILST, K, KS, LLWRK, LWMIN, N1, + $ N2 + DOUBLE PRECISION BIGNUM, COND, EPS, LNRM, RNRM, SCALE, SMLNUM + COMPLEX*16 YHAX, YHBX +* .. +* .. Local Arrays .. + COMPLEX*16 DUMMY( 1 ), DUMMY1( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLAPY2, DZNRM2 + COMPLEX*16 ZDOTC + EXTERNAL LSAME, DLAMCH, DLAPY2, DZNRM2, ZDOTC +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEMV, ZLACPY, ZTGEXC, ZTGSYL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DCMPLX, MAX +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + WANTBH = LSAME( JOB, 'B' ) + WANTS = LSAME( JOB, 'E' ) .OR. WANTBH + WANTDF = LSAME( JOB, 'V' ) .OR. WANTBH +* + SOMCON = LSAME( HOWMNY, 'S' ) +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) +* + IF( LSAME( JOB, 'V' ) .OR. LSAME( JOB, 'B' ) ) THEN + LWMIN = MAX( 1, 2*N*N ) + ELSE + LWMIN = 1 + END IF +* + IF( .NOT.WANTS .AND. .NOT.WANTDF ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( HOWMNY, 'A' ) .AND. .NOT.SOMCON ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( WANTS .AND. LDVL.LT.N ) THEN + INFO = -10 + ELSE IF( WANTS .AND. LDVR.LT.N ) THEN + INFO = -12 + ELSE +* +* Set M to the number of eigenpairs for which condition numbers +* are required, and test MM. +* + IF( SOMCON ) THEN + M = 0 + DO 10 K = 1, N + IF( SELECT( K ) ) + $ M = M + 1 + 10 CONTINUE + ELSE + M = N + END IF +* + IF( MM.LT.M ) THEN + INFO = -15 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -18 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTGSNA', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) / EPS + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + LLWRK = LWORK - 2*N*N + KS = 0 + DO 20 K = 1, N +* +* Determine whether condition numbers are required for the k-th +* eigenpair. +* + IF( SOMCON ) THEN + IF( .NOT.SELECT( K ) ) + $ GO TO 20 + END IF +* + KS = KS + 1 +* + IF( WANTS ) THEN +* +* Compute the reciprocal condition number of the k-th +* eigenvalue. +* + RNRM = DZNRM2( N, VR( 1, KS ), 1 ) + LNRM = DZNRM2( N, VL( 1, KS ), 1 ) + CALL ZGEMV( 'N', N, N, DCMPLX( ONE, ZERO ), A, LDA, + $ VR( 1, KS ), 1, DCMPLX( ZERO, ZERO ), WORK, 1 ) + YHAX = ZDOTC( N, WORK, 1, VL( 1, KS ), 1 ) + CALL ZGEMV( 'N', N, N, DCMPLX( ONE, ZERO ), B, LDB, + $ VR( 1, KS ), 1, DCMPLX( ZERO, ZERO ), WORK, 1 ) + YHBX = ZDOTC( N, WORK, 1, VL( 1, KS ), 1 ) + COND = DLAPY2( ABS( YHAX ), ABS( YHBX ) ) + IF( COND.EQ.ZERO ) THEN + S( KS ) = -ONE + ELSE + S( KS ) = COND / ( RNRM*LNRM ) + END IF + END IF +* + IF( WANTDF ) THEN + IF( N.EQ.1 ) THEN + DIF( KS ) = DLAPY2( ABS( A( 1, 1 ) ), ABS( B( 1, 1 ) ) ) + GO TO 20 + END IF +* +* Estimate the reciprocal condition number of the k-th +* eigenvectors. +* +* Copy the matrix (A, B) to the array WORK and move the +* (k,k)th pair to the (1,1) position. +* + CALL ZLACPY( 'Full', N, N, A, LDA, WORK, N ) + CALL ZLACPY( 'Full', N, N, B, LDB, WORK( N*N+1 ), N ) + IFST = K + ILST = 1 +* + CALL ZTGEXC( .FALSE., .FALSE., N, WORK, N, WORK( N*N+1 ), N, + $ DUMMY, 1, DUMMY1, 1, IFST, ILST, IERR ) +* + IF( IERR.GT.0 ) THEN +* +* Ill-conditioned problem - swap rejected. +* + DIF( KS ) = ZERO + ELSE +* +* Reordering successful, solve generalized Sylvester +* equation for R and L, +* A22 * R - L * A11 = A12 +* B22 * R - L * B11 = B12, +* and compute estimate of Difl[(A11,B11), (A22, B22)]. +* + N1 = 1 + N2 = N - N1 + I = N*N + 1 + CALL ZTGSYL( 'N', IDIFJB, N2, N1, WORK( N*N1+N1+1 ), N, + $ WORK, N, WORK( N1+1 ), N, WORK( N*N1+N1+I ), + $ N, WORK( I ), N, WORK( N1+I ), N, SCALE, + $ DIF( KS ), WORK( N*N*2+1 ), LLWRK, IWORK, + $ IERR ) + END IF + END IF +* + 20 CONTINUE + WORK( 1 ) = LWMIN + RETURN +* +* End of ZTGSNA +* + END diff --git a/costa/native/external/lapack/ztgsy2.f b/costa/native/external/lapack/ztgsy2.f new file mode 100644 index 000000000..5ee762bc0 --- /dev/null +++ b/costa/native/external/lapack/ztgsy2.f @@ -0,0 +1,357 @@ + SUBROUTINE ZTGSY2( TRANS, IJOB, M, N, A, LDA, B, LDB, C, LDC, D, + $ LDD, E, LDE, F, LDF, SCALE, RDSUM, RDSCAL, + $ INFO ) +* +* -- LAPACK auxiliary routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER IJOB, INFO, LDA, LDB, LDC, LDD, LDE, LDF, M, N + DOUBLE PRECISION RDSCAL, RDSUM, SCALE +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), C( LDC, * ), + $ D( LDD, * ), E( LDE, * ), F( LDF, * ) +* .. +* +* Purpose +* ======= +* +* ZTGSY2 solves the generalized Sylvester equation +* +* A * R - L * B = scale * C (1) +* D * R - L * E = scale * F +* +* using Level 1 and 2 BLAS, where R and L are unknown M-by-N matrices, +* (A, D), (B, E) and (C, F) are given matrix pairs of size M-by-M, +* N-by-N and M-by-N, respectively. A, B, D and E are upper triangular +* (i.e., (A,D) and (B,E) in generalized Schur form). +* +* The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1 is an output +* scaling factor chosen to avoid overflow. +* +* In matrix notation solving equation (1) corresponds to solve +* Zx = scale * b, where Z is defined as +* +* Z = [ kron(In, A) -kron(B', Im) ] (2) +* [ kron(In, D) -kron(E', Im) ], +* +* Ik is the identity matrix of size k and X' is the transpose of X. +* kron(X, Y) is the Kronecker product between the matrices X and Y. +* +* If TRANS = 'C', y in the conjugate transposed system Z'y = scale*b +* is solved for, which is equivalent to solve for R and L in +* +* A' * R + D' * L = scale * C (3) +* R * B' + L * E' = scale * -F +* +* This case is used to compute an estimate of Dif[(A, D), (B, E)] = +* = sigma_min(Z) using reverse communicaton with ZLACON. +* +* ZTGSY2 also (IJOB >= 1) contributes to the computation in ZTGSYL +* of an upper bound on the separation between to matrix pairs. Then +* the input (A, D), (B, E) are sub-pencils of two matrix pairs in +* ZTGSYL. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER +* = 'N', solve the generalized Sylvester equation (1). +* = 'T': solve the 'transposed' system (3). +* +* IJOB (input) INTEGER +* Specifies what kind of functionality to be performed. +* =0: solve (1) only. +* =1: A contribution from this subsystem to a Frobenius +* norm-based estimate of the separation between two matrix +* pairs is computed. (look ahead strategy is used). +* =2: A contribution from this subsystem to a Frobenius +* norm-based estimate of the separation between two matrix +* pairs is computed. (DGECON on sub-systems is used.) +* Not referenced if TRANS = 'T'. +* +* M (input) INTEGER +* On entry, M specifies the order of A and D, and the row +* dimension of C, F, R and L. +* +* N (input) INTEGER +* On entry, N specifies the order of B and E, and the column +* dimension of C, F, R and L. +* +* A (input) COMPLEX*16 array, dimension (LDA, M) +* On entry, A contains an upper triangular matrix. +* +* LDA (input) INTEGER +* The leading dimension of the matrix A. LDA >= max(1, M). +* +* B (input) COMPLEX*16 array, dimension (LDB, N) +* On entry, B contains an upper triangular matrix. +* +* LDB (input) INTEGER +* The leading dimension of the matrix B. LDB >= max(1, N). +* +* C (input/ output) COMPLEX*16 array, dimension (LDC, N) +* On entry, C contains the right-hand-side of the first matrix +* equation in (1). +* On exit, if IJOB = 0, C has been overwritten by the solution +* R. +* +* LDC (input) INTEGER +* The leading dimension of the matrix C. LDC >= max(1, M). +* +* D (input) COMPLEX*16 array, dimension (LDD, M) +* On entry, D contains an upper triangular matrix. +* +* LDD (input) INTEGER +* The leading dimension of the matrix D. LDD >= max(1, M). +* +* E (input) COMPLEX*16 array, dimension (LDE, N) +* On entry, E contains an upper triangular matrix. +* +* LDE (input) INTEGER +* The leading dimension of the matrix E. LDE >= max(1, N). +* +* F (input/ output) COMPLEX*16 array, dimension (LDF, N) +* On entry, F contains the right-hand-side of the second matrix +* equation in (1). +* On exit, if IJOB = 0, F has been overwritten by the solution +* L. +* +* LDF (input) INTEGER +* The leading dimension of the matrix F. LDF >= max(1, M). +* +* SCALE (output) DOUBLE PRECISION +* On exit, 0 <= SCALE <= 1. If 0 < SCALE < 1, the solutions +* R and L (C and F on entry) will hold the solutions to a +* slightly perturbed system but the input matrices A, B, D and +* E have not been changed. If SCALE = 0, R and L will hold the +* solutions to the homogeneous system with C = F = 0. +* Normally, SCALE = 1. +* +* RDSUM (input/output) DOUBLE PRECISION +* On entry, the sum of squares of computed contributions to +* the Dif-estimate under computation by ZTGSYL, where the +* scaling factor RDSCAL (see below) has been factored out. +* On exit, the corresponding sum of squares updated with the +* contributions from the current sub-system. +* If TRANS = 'T' RDSUM is not touched. +* NOTE: RDSUM only makes sense when ZTGSY2 is called by +* ZTGSYL. +* +* RDSCAL (input/output) DOUBLE PRECISION +* On entry, scaling factor used to prevent overflow in RDSUM. +* On exit, RDSCAL is updated w.r.t. the current contributions +* in RDSUM. +* If TRANS = 'T', RDSCAL is not touched. +* NOTE: RDSCAL only makes sense when ZTGSY2 is called by +* ZTGSYL. +* +* INFO (output) INTEGER +* On exit, if INFO is set to +* =0: Successful exit +* <0: If INFO = -i, input argument number i is illegal. +* >0: The matrix pairs (A, D) and (B, E) have common or very +* close eigenvalues. +* +* Further Details +* =============== +* +* Based on contributions by +* Bo Kagstrom and Peter Poromaa, Department of Computing Science, +* Umea University, S-901 87 Umea, Sweden. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + INTEGER LDZ + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, LDZ = 2 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN + INTEGER I, IERR, J, K + DOUBLE PRECISION SCALOC + COMPLEX*16 ALPHA +* .. +* .. Local Arrays .. + INTEGER IPIV( LDZ ), JPIV( LDZ ) + COMPLEX*16 RHS( LDZ ), Z( LDZ, LDZ ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZGESC2, ZGETC2, ZLATDF, ZSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC DCMPLX, DCONJG, MAX +* .. +* .. Executable Statements .. +* +* Decode and test input parameters +* + INFO = 0 + IERR = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( ( IJOB.LT.0 ) .OR. ( IJOB.GT.2 ) ) THEN + INFO = -2 + ELSE IF( M.LE.0 ) THEN + INFO = -3 + ELSE IF( N.LE.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LDD.LT.MAX( 1, M ) ) THEN + INFO = -12 + ELSE IF( LDE.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDF.LT.MAX( 1, M ) ) THEN + INFO = -16 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTGSY2', -INFO ) + RETURN + END IF +* + IF( NOTRAN ) THEN +* +* Solve (I, J) - system +* A(I, I) * R(I, J) - L(I, J) * B(J, J) = C(I, J) +* D(I, I) * R(I, J) - L(I, J) * E(J, J) = F(I, J) +* for I = M, M - 1, ..., 1; J = 1, 2, ..., N +* + SCALE = ONE + SCALOC = ONE + DO 30 J = 1, N + DO 20 I = M, 1, -1 +* +* Build 2 by 2 system +* + Z( 1, 1 ) = A( I, I ) + Z( 2, 1 ) = D( I, I ) + Z( 1, 2 ) = -B( J, J ) + Z( 2, 2 ) = -E( J, J ) +* +* Set up right hand side(s) +* + RHS( 1 ) = C( I, J ) + RHS( 2 ) = F( I, J ) +* +* Solve Z * x = RHS +* + CALL ZGETC2( LDZ, Z, LDZ, IPIV, JPIV, IERR ) + IF( IERR.GT.0 ) + $ INFO = IERR + IF( IJOB.EQ.0 ) THEN + CALL ZGESC2( LDZ, Z, LDZ, RHS, IPIV, JPIV, SCALOC ) + IF( SCALOC.NE.ONE ) THEN + DO 10 K = 1, N + CALL ZSCAL( M, DCMPLX( SCALOC, ZERO ), + $ C( 1, K ), 1 ) + CALL ZSCAL( M, DCMPLX( SCALOC, ZERO ), + $ F( 1, K ), 1 ) + 10 CONTINUE + SCALE = SCALE*SCALOC + END IF + ELSE + CALL ZLATDF( IJOB, LDZ, Z, LDZ, RHS, RDSUM, RDSCAL, + $ IPIV, JPIV ) + END IF +* +* Unpack solution vector(s) +* + C( I, J ) = RHS( 1 ) + F( I, J ) = RHS( 2 ) +* +* Substitute R(I, J) and L(I, J) into remaining equation. +* + IF( I.GT.1 ) THEN + ALPHA = -RHS( 1 ) + CALL ZAXPY( I-1, ALPHA, A( 1, I ), 1, C( 1, J ), 1 ) + CALL ZAXPY( I-1, ALPHA, D( 1, I ), 1, F( 1, J ), 1 ) + END IF + IF( J.LT.N ) THEN + CALL ZAXPY( N-J, RHS( 2 ), B( J, J+1 ), LDB, + $ C( I, J+1 ), LDC ) + CALL ZAXPY( N-J, RHS( 2 ), E( J, J+1 ), LDE, + $ F( I, J+1 ), LDF ) + END IF +* + 20 CONTINUE + 30 CONTINUE + ELSE +* +* Solve transposed (I, J) - system: +* A(I, I)' * R(I, J) + D(I, I)' * L(J, J) = C(I, J) +* R(I, I) * B(J, J) + L(I, J) * E(J, J) = -F(I, J) +* for I = 1, 2, ..., M, J = N, N - 1, ..., 1 +* + SCALE = ONE + SCALOC = ONE + DO 80 I = 1, M + DO 70 J = N, 1, -1 +* +* Build 2 by 2 system Z' +* + Z( 1, 1 ) = DCONJG( A( I, I ) ) + Z( 2, 1 ) = -DCONJG( B( J, J ) ) + Z( 1, 2 ) = DCONJG( D( I, I ) ) + Z( 2, 2 ) = -DCONJG( E( J, J ) ) +* +* +* Set up right hand side(s) +* + RHS( 1 ) = C( I, J ) + RHS( 2 ) = F( I, J ) +* +* Solve Z' * x = RHS +* + CALL ZGETC2( LDZ, Z, LDZ, IPIV, JPIV, IERR ) + IF( IERR.GT.0 ) + $ INFO = IERR + CALL ZGESC2( LDZ, Z, LDZ, RHS, IPIV, JPIV, SCALOC ) + IF( SCALOC.NE.ONE ) THEN + DO 40 K = 1, N + CALL ZSCAL( M, DCMPLX( SCALOC, ZERO ), C( 1, K ), + $ 1 ) + CALL ZSCAL( M, DCMPLX( SCALOC, ZERO ), F( 1, K ), + $ 1 ) + 40 CONTINUE + SCALE = SCALE*SCALOC + END IF +* +* Unpack solution vector(s) +* + C( I, J ) = RHS( 1 ) + F( I, J ) = RHS( 2 ) +* +* Substitute R(I, J) and L(I, J) into remaining equation. +* + DO 50 K = 1, J - 1 + F( I, K ) = F( I, K ) + RHS( 1 )*DCONJG( B( K, J ) ) + + $ RHS( 2 )*DCONJG( E( K, J ) ) + 50 CONTINUE + DO 60 K = I + 1, M + C( K, J ) = C( K, J ) - DCONJG( A( I, K ) )*RHS( 1 ) - + $ DCONJG( D( I, K ) )*RHS( 2 ) + 60 CONTINUE +* + 70 CONTINUE + 80 CONTINUE + END IF + RETURN +* +* End of ZTGSY2 +* + END diff --git a/costa/native/external/lapack/ztgsyl.f b/costa/native/external/lapack/ztgsyl.f new file mode 100644 index 000000000..dfb3de503 --- /dev/null +++ b/costa/native/external/lapack/ztgsyl.f @@ -0,0 +1,550 @@ + SUBROUTINE ZTGSYL( TRANS, IJOB, M, N, A, LDA, B, LDB, C, LDC, D, + $ LDD, E, LDE, F, LDF, SCALE, DIF, WORK, LWORK, + $ IWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER IJOB, INFO, LDA, LDB, LDC, LDD, LDE, LDF, + $ LWORK, M, N + DOUBLE PRECISION DIF, SCALE +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), C( LDC, * ), + $ D( LDD, * ), E( LDE, * ), F( LDF, * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZTGSYL solves the generalized Sylvester equation: +* +* A * R - L * B = scale * C (1) +* D * R - L * E = scale * F +* +* where R and L are unknown m-by-n matrices, (A, D), (B, E) and +* (C, F) are given matrix pairs of size m-by-m, n-by-n and m-by-n, +* respectively, with complex entries. A, B, D and E are upper +* triangular (i.e., (A,D) and (B,E) in generalized Schur form). +* +* The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1 +* is an output scaling factor chosen to avoid overflow. +* +* In matrix notation (1) is equivalent to solve Zx = scale*b, where Z +* is defined as +* +* Z = [ kron(In, A) -kron(B', Im) ] (2) +* [ kron(In, D) -kron(E', Im) ], +* +* Here Ix is the identity matrix of size x and X' is the conjugate +* transpose of X. Kron(X, Y) is the Kronecker product between the +* matrices X and Y. +* +* If TRANS = 'C', y in the conjugate transposed system Z'*y = scale*b +* is solved for, which is equivalent to solve for R and L in +* +* A' * R + D' * L = scale * C (3) +* R * B' + L * E' = scale * -F +* +* This case (TRANS = 'C') is used to compute an one-norm-based estimate +* of Dif[(A,D), (B,E)], the separation between the matrix pairs (A,D) +* and (B,E), using ZLACON. +* +* If IJOB >= 1, ZTGSYL computes a Frobenius norm-based estimate of +* Dif[(A,D),(B,E)]. That is, the reciprocal of a lower bound on the +* reciprocal of the smallest singular value of Z. +* +* This is a level-3 BLAS algorithm. +* +* Arguments +* ========= +* +* TRANS (input) CHARACTER*1 +* = 'N': solve the generalized sylvester equation (1). +* = 'C': solve the "conjugate transposed" system (3). +* +* IJOB (input) INTEGER +* Specifies what kind of functionality to be performed. +* =0: solve (1) only. +* =1: The functionality of 0 and 3. +* =2: The functionality of 0 and 4. +* =3: Only an estimate of Dif[(A,D), (B,E)] is computed. +* (look ahead strategy is used). +* =4: Only an estimate of Dif[(A,D), (B,E)] is computed. +* (ZGECON on sub-systems is used). +* Not referenced if TRANS = 'C'. +* +* M (input) INTEGER +* The order of the matrices A and D, and the row dimension of +* the matrices C, F, R and L. +* +* N (input) INTEGER +* The order of the matrices B and E, and the column dimension +* of the matrices C, F, R and L. +* +* A (input) COMPLEX*16 array, dimension (LDA, M) +* The upper triangular matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1, M). +* +* B (input) COMPLEX*16 array, dimension (LDB, N) +* The upper triangular matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1, N). +* +* C (input/output) COMPLEX*16 array, dimension (LDC, N) +* On entry, C contains the right-hand-side of the first matrix +* equation in (1) or (3). +* On exit, if IJOB = 0, 1 or 2, C has been overwritten by +* the solution R. If IJOB = 3 or 4 and TRANS = 'N', C holds R, +* the solution achieved during the computation of the +* Dif-estimate. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1, M). +* +* D (input) COMPLEX*16 array, dimension (LDD, M) +* The upper triangular matrix D. +* +* LDD (input) INTEGER +* The leading dimension of the array D. LDD >= max(1, M). +* +* E (input) COMPLEX*16 array, dimension (LDE, N) +* The upper triangular matrix E. +* +* LDE (input) INTEGER +* The leading dimension of the array E. LDE >= max(1, N). +* +* F (input/output) COMPLEX*16 array, dimension (LDF, N) +* On entry, F contains the right-hand-side of the second matrix +* equation in (1) or (3). +* On exit, if IJOB = 0, 1 or 2, F has been overwritten by +* the solution L. If IJOB = 3 or 4 and TRANS = 'N', F holds L, +* the solution achieved during the computation of the +* Dif-estimate. +* +* LDF (input) INTEGER +* The leading dimension of the array F. LDF >= max(1, M). +* +* DIF (output) DOUBLE PRECISION +* On exit DIF is the reciprocal of a lower bound of the +* reciprocal of the Dif-function, i.e. DIF is an upper bound of +* Dif[(A,D), (B,E)] = sigma-min(Z), where Z as in (2). +* IF IJOB = 0 or TRANS = 'C', DIF is not referenced. +* +* SCALE (output) DOUBLE PRECISION +* On exit SCALE is the scaling factor in (1) or (3). +* If 0 < SCALE < 1, C and F hold the solutions R and L, resp., +* to a slightly perturbed system but the input matrices A, B, +* D and E have not been changed. If SCALE = 0, R and L will +* hold the solutions to the homogenious system with C = F = 0. +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* IF IJOB = 0, WORK is not referenced. Otherwise, +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK > = 1. +* If IJOB = 1 or 2 and TRANS = 'N', LWORK >= 2*M*N. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* IWORK (workspace) INTEGER array, dimension (M+N+2) +* If IJOB = 0, IWORK is not referenced. +* +* INFO (output) INTEGER +* =0: successful exit +* <0: If INFO = -i, the i-th argument had an illegal value. +* >0: (A, D) and (B, E) have common or very close +* eigenvalues. +* +* Further Details +* =============== +* +* Based on contributions by +* Bo Kagstrom and Peter Poromaa, Department of Computing Science, +* Umea University, S-901 87 Umea, Sweden. +* +* [1] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software +* for Solving the Generalized Sylvester Equation and Estimating the +* Separation between Regular Matrix Pairs, Report UMINF - 93.23, +* Department of Computing Science, Umea University, S-901 87 Umea, +* Sweden, December 1993, Revised April 1994, Also as LAPACK Working +* Note 75. To appear in ACM Trans. on Math. Software, Vol 22, +* No 1, 1996. +* +* [2] B. Kagstrom, A Perturbation Analysis of the Generalized Sylvester +* Equation (AR - LB, DR - LE ) = (C, F), SIAM J. Matrix Anal. +* Appl., 15(4):1045-1060, 1994. +* +* [3] B. Kagstrom and L. Westin, Generalized Schur Methods with +* Condition Estimators for Solving the Generalized Sylvester +* Equation, IEEE Transactions on Automatic Control, Vol. 34, No. 7, +* July 1989, pp 745-751. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, NOTRAN + INTEGER I, IE, IFUNC, IROUND, IS, ISOLVE, J, JE, JS, K, + $ LINFO, LWMIN, MB, NB, P, PQ, Q + DOUBLE PRECISION DSCALE, DSUM, SCALE2, SCALOC +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZCOPY, ZGEMM, ZLACPY, ZSCAL, ZTGSY2 +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCMPLX, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode and test input parameters +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* + IF( ( IJOB.EQ.1 .OR. IJOB.EQ.2 ) .AND. NOTRAN ) THEN + LWMIN = MAX( 1, 2*M*N ) + ELSE + LWMIN = 1 + END IF +* + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( ( IJOB.LT.0 ) .OR. ( IJOB.GT.4 ) ) THEN + INFO = -2 + ELSE IF( M.LE.0 ) THEN + INFO = -3 + ELSE IF( N.LE.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LDD.LT.MAX( 1, M ) ) THEN + INFO = -12 + ELSE IF( LDE.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDF.LT.MAX( 1, M ) ) THEN + INFO = -16 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -20 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTGSYL', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Determine optimal block sizes MB and NB +* + MB = ILAENV( 2, 'ZTGSYL', TRANS, M, N, -1, -1 ) + NB = ILAENV( 5, 'ZTGSYL', TRANS, M, N, -1, -1 ) +* + ISOLVE = 1 + IFUNC = 0 + IF( IJOB.GE.3 .AND. NOTRAN ) THEN + IFUNC = IJOB - 2 + DO 10 J = 1, N + CALL ZCOPY( M, DCMPLX( ZERO, ZERO ), 0, C( 1, J ), 1 ) + CALL ZCOPY( M, DCMPLX( ZERO, ZERO ), 0, F( 1, J ), 1 ) + 10 CONTINUE + ELSE IF( IJOB.GE.1 .AND. NOTRAN ) THEN + ISOLVE = 2 + END IF +* + IF( ( MB.LE.1 .AND. NB.LE.1 ) .OR. ( MB.GE.M .AND. NB.GE.N ) ) + $ THEN +* +* Use unblocked Level 2 solver +* + DO 30 IROUND = 1, ISOLVE +* + SCALE = ONE + DSCALE = ZERO + DSUM = ONE + PQ = M*N + CALL ZTGSY2( TRANS, IFUNC, M, N, A, LDA, B, LDB, C, LDC, D, + $ LDD, E, LDE, F, LDF, SCALE, DSUM, DSCALE, + $ INFO ) + IF( DSCALE.NE.ZERO ) THEN + IF( IJOB.EQ.1 .OR. IJOB.EQ.3 ) THEN + DIF = SQRT( DBLE( 2*M*N ) ) / ( DSCALE*SQRT( DSUM ) ) + ELSE + DIF = SQRT( DBLE( PQ ) ) / ( DSCALE*SQRT( DSUM ) ) + END IF + END IF + IF( ISOLVE.EQ.2 .AND. IROUND.EQ.1 ) THEN + IFUNC = IJOB + SCALE2 = SCALE + CALL ZLACPY( 'F', M, N, C, LDC, WORK, M ) + CALL ZLACPY( 'F', M, N, F, LDF, WORK( M*N+1 ), M ) + DO 20 J = 1, N + CALL ZCOPY( M, DCMPLX( ZERO, ZERO ), 0, C( 1, J ), 1 ) + CALL ZCOPY( M, DCMPLX( ZERO, ZERO ), 0, F( 1, J ), 1 ) + 20 CONTINUE + ELSE IF( ISOLVE.EQ.2 .AND. IROUND.EQ.2 ) THEN + CALL ZLACPY( 'F', M, N, WORK, M, C, LDC ) + CALL ZLACPY( 'F', M, N, WORK( M*N+1 ), M, F, LDF ) + SCALE = SCALE2 + END IF + 30 CONTINUE +* + RETURN +* + END IF +* +* Determine block structure of A +* + P = 0 + I = 1 + 40 CONTINUE + IF( I.GT.M ) + $ GO TO 50 + P = P + 1 + IWORK( P ) = I + I = I + MB + IF( I.GE.M ) + $ GO TO 50 + GO TO 40 + 50 CONTINUE + IWORK( P+1 ) = M + 1 + IF( IWORK( P ).EQ.IWORK( P+1 ) ) + $ P = P - 1 +* +* Determine block structure of B +* + Q = P + 1 + J = 1 + 60 CONTINUE + IF( J.GT.N ) + $ GO TO 70 +* + Q = Q + 1 + IWORK( Q ) = J + J = J + NB + IF( J.GE.N ) + $ GO TO 70 + GO TO 60 +* + 70 CONTINUE + IWORK( Q+1 ) = N + 1 + IF( IWORK( Q ).EQ.IWORK( Q+1 ) ) + $ Q = Q - 1 +* + IF( NOTRAN ) THEN + DO 150 IROUND = 1, ISOLVE +* +* Solve (I, J) - subsystem +* A(I, I) * R(I, J) - L(I, J) * B(J, J) = C(I, J) +* D(I, I) * R(I, J) - L(I, J) * E(J, J) = F(I, J) +* for I = P, P - 1, ..., 1; J = 1, 2, ..., Q +* + PQ = 0 + SCALE = ONE + DSCALE = ZERO + DSUM = ONE + DO 130 J = P + 2, Q + JS = IWORK( J ) + JE = IWORK( J+1 ) - 1 + NB = JE - JS + 1 + DO 120 I = P, 1, -1 + IS = IWORK( I ) + IE = IWORK( I+1 ) - 1 + MB = IE - IS + 1 + CALL ZTGSY2( TRANS, IFUNC, MB, NB, A( IS, IS ), LDA, + $ B( JS, JS ), LDB, C( IS, JS ), LDC, + $ D( IS, IS ), LDD, E( JS, JS ), LDE, + $ F( IS, JS ), LDF, SCALOC, DSUM, DSCALE, + $ LINFO ) + IF( LINFO.GT.0 ) + $ INFO = LINFO + PQ = PQ + MB*NB + IF( SCALOC.NE.ONE ) THEN + DO 80 K = 1, JS - 1 + CALL ZSCAL( M, DCMPLX( SCALOC, ZERO ), + $ C( 1, K ), 1 ) + CALL ZSCAL( M, DCMPLX( SCALOC, ZERO ), + $ F( 1, K ), 1 ) + 80 CONTINUE + DO 90 K = JS, JE + CALL ZSCAL( IS-1, DCMPLX( SCALOC, ZERO ), + $ C( 1, K ), 1 ) + CALL ZSCAL( IS-1, DCMPLX( SCALOC, ZERO ), + $ F( 1, K ), 1 ) + 90 CONTINUE + DO 100 K = JS, JE + CALL ZSCAL( M-IE, DCMPLX( SCALOC, ZERO ), + $ C( IE+1, K ), 1 ) + CALL ZSCAL( M-IE, DCMPLX( SCALOC, ZERO ), + $ F( IE+1, K ), 1 ) + 100 CONTINUE + DO 110 K = JE + 1, N + CALL ZSCAL( M, DCMPLX( SCALOC, ZERO ), + $ C( 1, K ), 1 ) + CALL ZSCAL( M, DCMPLX( SCALOC, ZERO ), + $ F( 1, K ), 1 ) + 110 CONTINUE + SCALE = SCALE*SCALOC + END IF +* +* Substitute R(I,J) and L(I,J) into remaining equation. +* + IF( I.GT.1 ) THEN + CALL ZGEMM( 'N', 'N', IS-1, NB, MB, + $ DCMPLX( -ONE, ZERO ), A( 1, IS ), LDA, + $ C( IS, JS ), LDC, DCMPLX( ONE, ZERO ), + $ C( 1, JS ), LDC ) + CALL ZGEMM( 'N', 'N', IS-1, NB, MB, + $ DCMPLX( -ONE, ZERO ), D( 1, IS ), LDD, + $ C( IS, JS ), LDC, DCMPLX( ONE, ZERO ), + $ F( 1, JS ), LDF ) + END IF + IF( J.LT.Q ) THEN + CALL ZGEMM( 'N', 'N', MB, N-JE, NB, + $ DCMPLX( ONE, ZERO ), F( IS, JS ), LDF, + $ B( JS, JE+1 ), LDB, + $ DCMPLX( ONE, ZERO ), C( IS, JE+1 ), + $ LDC ) + CALL ZGEMM( 'N', 'N', MB, N-JE, NB, + $ DCMPLX( ONE, ZERO ), F( IS, JS ), LDF, + $ E( JS, JE+1 ), LDE, + $ DCMPLX( ONE, ZERO ), F( IS, JE+1 ), + $ LDF ) + END IF + 120 CONTINUE + 130 CONTINUE + IF( DSCALE.NE.ZERO ) THEN + IF( IJOB.EQ.1 .OR. IJOB.EQ.3 ) THEN + DIF = SQRT( DBLE( 2*M*N ) ) / ( DSCALE*SQRT( DSUM ) ) + ELSE + DIF = SQRT( DBLE( PQ ) ) / ( DSCALE*SQRT( DSUM ) ) + END IF + END IF + IF( ISOLVE.EQ.2 .AND. IROUND.EQ.1 ) THEN + IFUNC = IJOB + SCALE2 = SCALE + CALL ZLACPY( 'F', M, N, C, LDC, WORK, M ) + CALL ZLACPY( 'F', M, N, F, LDF, WORK( M*N+1 ), M ) + DO 140 J = 1, N + CALL ZCOPY( M, DCMPLX( ZERO, ZERO ), 0, C( 1, J ), 1 ) + CALL ZCOPY( M, DCMPLX( ZERO, ZERO ), 0, F( 1, J ), 1 ) + 140 CONTINUE + ELSE IF( ISOLVE.EQ.2 .AND. IROUND.EQ.2 ) THEN + CALL ZLACPY( 'F', M, N, WORK, M, C, LDC ) + CALL ZLACPY( 'F', M, N, WORK( M*N+1 ), M, F, LDF ) + SCALE = SCALE2 + END IF + 150 CONTINUE + ELSE +* +* Solve transposed (I, J)-subsystem +* A(I, I)' * R(I, J) + D(I, I)' * L(I, J) = C(I, J) +* R(I, J) * B(J, J) + L(I, J) * E(J, J) = -F(I, J) +* for I = 1,2,..., P; J = Q, Q-1,..., 1 +* + SCALE = ONE + DO 210 I = 1, P + IS = IWORK( I ) + IE = IWORK( I+1 ) - 1 + MB = IE - IS + 1 + DO 200 J = Q, P + 2, -1 + JS = IWORK( J ) + JE = IWORK( J+1 ) - 1 + NB = JE - JS + 1 + CALL ZTGSY2( TRANS, IFUNC, MB, NB, A( IS, IS ), LDA, + $ B( JS, JS ), LDB, C( IS, JS ), LDC, + $ D( IS, IS ), LDD, E( JS, JS ), LDE, + $ F( IS, JS ), LDF, SCALOC, DSUM, DSCALE, + $ LINFO ) + IF( LINFO.GT.0 ) + $ INFO = LINFO + IF( SCALOC.NE.ONE ) THEN + DO 160 K = 1, JS - 1 + CALL ZSCAL( M, DCMPLX( SCALOC, ZERO ), C( 1, K ), + $ 1 ) + CALL ZSCAL( M, DCMPLX( SCALOC, ZERO ), F( 1, K ), + $ 1 ) + 160 CONTINUE + DO 170 K = JS, JE + CALL ZSCAL( IS-1, DCMPLX( SCALOC, ZERO ), + $ C( 1, K ), 1 ) + CALL ZSCAL( IS-1, DCMPLX( SCALOC, ZERO ), + $ F( 1, K ), 1 ) + 170 CONTINUE + DO 180 K = JS, JE + CALL ZSCAL( M-IE, DCMPLX( SCALOC, ZERO ), + $ C( IE+1, K ), 1 ) + CALL ZSCAL( M-IE, DCMPLX( SCALOC, ZERO ), + $ F( IE+1, K ), 1 ) + 180 CONTINUE + DO 190 K = JE + 1, N + CALL ZSCAL( M, DCMPLX( SCALOC, ZERO ), C( 1, K ), + $ 1 ) + CALL ZSCAL( M, DCMPLX( SCALOC, ZERO ), F( 1, K ), + $ 1 ) + 190 CONTINUE + SCALE = SCALE*SCALOC + END IF +* +* Substitute R(I,J) and L(I,J) into remaining equation. +* + IF( J.GT.P+2 ) THEN + CALL ZGEMM( 'N', 'C', MB, JS-1, NB, + $ DCMPLX( ONE, ZERO ), C( IS, JS ), LDC, + $ B( 1, JS ), LDB, DCMPLX( ONE, ZERO ), + $ F( IS, 1 ), LDF ) + CALL ZGEMM( 'N', 'C', MB, JS-1, NB, + $ DCMPLX( ONE, ZERO ), F( IS, JS ), LDF, + $ E( 1, JS ), LDE, DCMPLX( ONE, ZERO ), + $ F( IS, 1 ), LDF ) + END IF + IF( I.LT.P ) THEN + CALL ZGEMM( 'C', 'N', M-IE, NB, MB, + $ DCMPLX( -ONE, ZERO ), A( IS, IE+1 ), LDA, + $ C( IS, JS ), LDC, DCMPLX( ONE, ZERO ), + $ C( IE+1, JS ), LDC ) + CALL ZGEMM( 'C', 'N', M-IE, NB, MB, + $ DCMPLX( -ONE, ZERO ), D( IS, IE+1 ), LDD, + $ F( IS, JS ), LDF, DCMPLX( ONE, ZERO ), + $ C( IE+1, JS ), LDC ) + END IF + 200 CONTINUE + 210 CONTINUE + END IF +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of ZTGSYL +* + END diff --git a/costa/native/external/lapack/ztpcon.f b/costa/native/external/lapack/ztpcon.f new file mode 100644 index 000000000..a55d87011 --- /dev/null +++ b/costa/native/external/lapack/ztpcon.f @@ -0,0 +1,194 @@ + SUBROUTINE ZTPCON( NORM, UPLO, DIAG, N, AP, RCOND, WORK, RWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER INFO, N + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 AP( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZTPCON estimates the reciprocal of the condition number of a packed +* triangular matrix A, in either the 1-norm or the infinity-norm. +* +* The norm of A is computed and an estimate is obtained for +* norm(inv(A)), then the reciprocal of the condition number is +* computed as +* RCOND = 1 / ( norm(A) * norm(inv(A)) ). +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies whether the 1-norm condition number or the +* infinity-norm condition number is required: +* = '1' or 'O': 1-norm; +* = 'I': Infinity-norm. +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input) COMPLEX*16 array, dimension (N*(N+1)/2) +* The upper or lower triangular matrix A, packed columnwise in +* a linear array. The j-th column of A is stored in the array +* AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* If DIAG = 'U', the diagonal elements of A are not referenced +* and are assumed to be 1. +* +* RCOND (output) DOUBLE PRECISION +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(norm(A) * norm(inv(A))). +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, ONENRM, UPPER + CHARACTER NORMIN + INTEGER IX, KASE, KASE1 + DOUBLE PRECISION AINVNM, ANORM, SCALE, SMLNUM, XNORM + COMPLEX*16 ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX + DOUBLE PRECISION DLAMCH, ZLANTP + EXTERNAL LSAME, IZAMAX, DLAMCH, ZLANTP +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDRSCL, ZLACON, ZLATPS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTPCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + END IF +* + RCOND = ZERO + SMLNUM = DLAMCH( 'Safe minimum' )*DBLE( MAX( 1, N ) ) +* +* Compute the norm of the triangular matrix A. +* + ANORM = ZLANTP( NORM, UPLO, DIAG, N, AP, RWORK ) +* +* Continue only if ANORM > 0. +* + IF( ANORM.GT.ZERO ) THEN +* +* Estimate the norm of the inverse of A. +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 10 CONTINUE + CALL ZLACON( N, WORK( N+1 ), WORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(A). +* + CALL ZLATPS( UPLO, 'No transpose', DIAG, NORMIN, N, AP, + $ WORK, SCALE, RWORK, INFO ) + ELSE +* +* Multiply by inv(A'). +* + CALL ZLATPS( UPLO, 'Conjugate transpose', DIAG, NORMIN, + $ N, AP, WORK, SCALE, RWORK, INFO ) + END IF + NORMIN = 'Y' +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + IF( SCALE.NE.ONE ) THEN + IX = IZAMAX( N, WORK, 1 ) + XNORM = CABS1( WORK( IX ) ) + IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL ZDRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / ANORM ) / AINVNM + END IF +* + 20 CONTINUE + RETURN +* +* End of ZTPCON +* + END diff --git a/costa/native/external/lapack/ztprfs.f b/costa/native/external/lapack/ztprfs.f new file mode 100644 index 000000000..b2f62a5a0 --- /dev/null +++ b/costa/native/external/lapack/ztprfs.f @@ -0,0 +1,387 @@ + SUBROUTINE ZTPRFS( UPLO, TRANS, DIAG, N, NRHS, AP, B, LDB, X, LDX, + $ FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) + COMPLEX*16 AP( * ), B( LDB, * ), WORK( * ), X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* ZTPRFS provides error bounds and backward error estimates for the +* solution to a system of linear equations with a triangular packed +* coefficient matrix. +* +* The solution matrix X must be computed by ZTPTRS or some other +* means before entering this routine. ZTPRFS does not do iterative +* refinement because doing so cannot improve the backward error. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose) +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* AP (input) COMPLEX*16 array, dimension (N*(N+1)/2) +* The upper or lower triangular matrix A, packed columnwise in +* a linear array. The j-th column of A is stored in the array +* AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +* If DIAG = 'U', the diagonal elements of A are not referenced +* and are assumed to be 1. +* +* B (input) COMPLEX*16 array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input) COMPLEX*16 array, dimension (LDX,NRHS) +* The solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + CHARACTER TRANSN, TRANST + INTEGER I, J, K, KASE, KC, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX*16 ZDUM +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZCOPY, ZLACON, ZTPMV, ZTPSV +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTPRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANSN = 'N' + TRANST = 'C' + ELSE + TRANSN = 'C' + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 250 J = 1, NRHS +* +* Compute residual R = B - op(A) * X, +* where op(A) = A, A**T, or A**H, depending on TRANS. +* + CALL ZCOPY( N, X( 1, J ), 1, WORK, 1 ) + CALL ZTPMV( UPLO, TRANS, DIAG, N, AP, WORK, 1 ) + CALL ZAXPY( N, -ONE, B( 1, J ), 1, WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 20 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 20 CONTINUE +* + IF( NOTRAN ) THEN +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + KC = 1 + IF( NOUNIT ) THEN + DO 40 K = 1, N + XK = CABS1( X( K, J ) ) + DO 30 I = 1, K + RWORK( I ) = RWORK( I ) + + $ CABS1( AP( KC+I-1 ) )*XK + 30 CONTINUE + KC = KC + K + 40 CONTINUE + ELSE + DO 60 K = 1, N + XK = CABS1( X( K, J ) ) + DO 50 I = 1, K - 1 + RWORK( I ) = RWORK( I ) + + $ CABS1( AP( KC+I-1 ) )*XK + 50 CONTINUE + RWORK( K ) = RWORK( K ) + XK + KC = KC + K + 60 CONTINUE + END IF + ELSE + KC = 1 + IF( NOUNIT ) THEN + DO 80 K = 1, N + XK = CABS1( X( K, J ) ) + DO 70 I = K, N + RWORK( I ) = RWORK( I ) + + $ CABS1( AP( KC+I-K ) )*XK + 70 CONTINUE + KC = KC + N - K + 1 + 80 CONTINUE + ELSE + DO 100 K = 1, N + XK = CABS1( X( K, J ) ) + DO 90 I = K + 1, N + RWORK( I ) = RWORK( I ) + + $ CABS1( AP( KC+I-K ) )*XK + 90 CONTINUE + RWORK( K ) = RWORK( K ) + XK + KC = KC + N - K + 1 + 100 CONTINUE + END IF + END IF + ELSE +* +* Compute abs(A**H)*abs(X) + abs(B). +* + IF( UPPER ) THEN + KC = 1 + IF( NOUNIT ) THEN + DO 120 K = 1, N + S = ZERO + DO 110 I = 1, K + S = S + CABS1( AP( KC+I-1 ) )*CABS1( X( I, J ) ) + 110 CONTINUE + RWORK( K ) = RWORK( K ) + S + KC = KC + K + 120 CONTINUE + ELSE + DO 140 K = 1, N + S = CABS1( X( K, J ) ) + DO 130 I = 1, K - 1 + S = S + CABS1( AP( KC+I-1 ) )*CABS1( X( I, J ) ) + 130 CONTINUE + RWORK( K ) = RWORK( K ) + S + KC = KC + K + 140 CONTINUE + END IF + ELSE + KC = 1 + IF( NOUNIT ) THEN + DO 160 K = 1, N + S = ZERO + DO 150 I = K, N + S = S + CABS1( AP( KC+I-K ) )*CABS1( X( I, J ) ) + 150 CONTINUE + RWORK( K ) = RWORK( K ) + S + KC = KC + N - K + 1 + 160 CONTINUE + ELSE + DO 180 K = 1, N + S = CABS1( X( K, J ) ) + DO 170 I = K + 1, N + S = S + CABS1( AP( KC+I-K ) )*CABS1( X( I, J ) ) + 170 CONTINUE + RWORK( K ) = RWORK( K ) + S + KC = KC + N - K + 1 + 180 CONTINUE + END IF + END IF + END IF + S = ZERO + DO 190 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 190 CONTINUE + BERR( J ) = S +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use ZLACON to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 200 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 200 CONTINUE +* + KASE = 0 + 210 CONTINUE + CALL ZLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**H). +* + CALL ZTPSV( UPLO, TRANST, DIAG, N, AP, WORK, 1 ) + DO 220 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 220 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 230 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 230 CONTINUE + CALL ZTPSV( UPLO, TRANSN, DIAG, N, AP, WORK, 1 ) + END IF + GO TO 210 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 240 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 240 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 250 CONTINUE +* + RETURN +* +* End of ZTPRFS +* + END diff --git a/costa/native/external/lapack/ztptri.f b/costa/native/external/lapack/ztptri.f new file mode 100644 index 000000000..bc90876d5 --- /dev/null +++ b/costa/native/external/lapack/ztptri.f @@ -0,0 +1,177 @@ + SUBROUTINE ZTPTRI( UPLO, DIAG, N, AP, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER DIAG, UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + COMPLEX*16 AP( * ) +* .. +* +* Purpose +* ======= +* +* ZTPTRI computes the inverse of a complex upper or lower triangular +* matrix A stored in packed format. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* AP (input/output) COMPLEX*16 array, dimension (N*(N+1)/2) +* On entry, the upper or lower triangular matrix A, stored +* columnwise in a linear array. The j-th column of A is stored +* in the array AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*((2*n-j)/2) = A(i,j) for j<=i<=n. +* See below for further details. +* On exit, the (triangular) inverse of the original matrix, in +* the same packed storage format. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, A(i,i) is exactly zero. The triangular +* matrix is singular and its inverse can not be computed. +* +* Further Details +* =============== +* +* A triangular matrix A can be transferred to packed storage using one +* of the following program segments: +* +* UPLO = 'U': UPLO = 'L': +* +* JC = 1 JC = 1 +* DO 2 J = 1, N DO 2 J = 1, N +* DO 1 I = 1, J DO 1 I = J, N +* AP(JC+I-1) = A(I,J) AP(JC+I-J) = A(I,J) +* 1 CONTINUE 1 CONTINUE +* JC = JC + J JC = JC + N - J + 1 +* 2 CONTINUE 2 CONTINUE +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J, JC, JCLAST, JJ + COMPLEX*16 AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZSCAL, ZTPMV +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTPTRI', -INFO ) + RETURN + END IF +* +* Check for singularity if non-unit. +* + IF( NOUNIT ) THEN + IF( UPPER ) THEN + JJ = 0 + DO 10 INFO = 1, N + JJ = JJ + INFO + IF( AP( JJ ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE + JJ = 1 + DO 20 INFO = 1, N + IF( AP( JJ ).EQ.ZERO ) + $ RETURN + JJ = JJ + N - INFO + 1 + 20 CONTINUE + END IF + INFO = 0 + END IF +* + IF( UPPER ) THEN +* +* Compute inverse of upper triangular matrix. +* + JC = 1 + DO 30 J = 1, N + IF( NOUNIT ) THEN + AP( JC+J-1 ) = ONE / AP( JC+J-1 ) + AJJ = -AP( JC+J-1 ) + ELSE + AJJ = -ONE + END IF +* +* Compute elements 1:j-1 of j-th column. +* + CALL ZTPMV( 'Upper', 'No transpose', DIAG, J-1, AP, + $ AP( JC ), 1 ) + CALL ZSCAL( J-1, AJJ, AP( JC ), 1 ) + JC = JC + J + 30 CONTINUE +* + ELSE +* +* Compute inverse of lower triangular matrix. +* + JC = N*( N+1 ) / 2 + DO 40 J = N, 1, -1 + IF( NOUNIT ) THEN + AP( JC ) = ONE / AP( JC ) + AJJ = -AP( JC ) + ELSE + AJJ = -ONE + END IF + IF( J.LT.N ) THEN +* +* Compute elements j+1:n of j-th column. +* + CALL ZTPMV( 'Lower', 'No transpose', DIAG, N-J, + $ AP( JCLAST ), AP( JC+1 ), 1 ) + CALL ZSCAL( N-J, AJJ, AP( JC+1 ), 1 ) + END IF + JCLAST = JC + JC = JC - N + J - 2 + 40 CONTINUE + END IF +* + RETURN +* +* End of ZTPTRI +* + END diff --git a/costa/native/external/lapack/ztptrs.f b/costa/native/external/lapack/ztptrs.f new file mode 100644 index 000000000..0fe620a7d --- /dev/null +++ b/costa/native/external/lapack/ztptrs.f @@ -0,0 +1,154 @@ + SUBROUTINE ZTPTRS( UPLO, TRANS, DIAG, N, NRHS, AP, B, LDB, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX*16 AP( * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* ZTPTRS solves a triangular system of the form +* +* A * X = B, A**T * X = B, or A**H * X = B, +* +* where A is a triangular matrix of order N stored in packed format, +* and B is an N-by-NRHS matrix. A check is made to verify that A is +* nonsingular. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose) +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* AP (input) COMPLEX*16 array, dimension (N*(N+1)/2) +* The upper or lower triangular matrix A, packed columnwise in +* a linear array. The j-th column of A is stored in the array +* AP as follows: +* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +* +* B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, if INFO = 0, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the i-th diagonal element of A is zero, +* indicating that the matrix is singular and the +* solutions X have not been computed. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J, JC +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZTPSV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT. + $ LSAME( TRANS, 'T' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTPTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check for singularity. +* + IF( NOUNIT ) THEN + IF( UPPER ) THEN + JC = 1 + DO 10 INFO = 1, N + IF( AP( JC+INFO-1 ).EQ.ZERO ) + $ RETURN + JC = JC + INFO + 10 CONTINUE + ELSE + JC = 1 + DO 20 INFO = 1, N + IF( AP( JC ).EQ.ZERO ) + $ RETURN + JC = JC + N - INFO + 1 + 20 CONTINUE + END IF + END IF + INFO = 0 +* +* Solve A * x = b, A**T * x = b, or A**H * x = b. +* + DO 30 J = 1, NRHS + CALL ZTPSV( UPLO, TRANS, DIAG, N, AP, B( 1, J ), 1 ) + 30 CONTINUE +* + RETURN +* +* End of ZTPTRS +* + END diff --git a/costa/native/external/lapack/ztrcon.f b/costa/native/external/lapack/ztrcon.f new file mode 100644 index 000000000..aa3d3b11a --- /dev/null +++ b/costa/native/external/lapack/ztrcon.f @@ -0,0 +1,200 @@ + SUBROUTINE ZTRCON( NORM, UPLO, DIAG, N, A, LDA, RCOND, WORK, + $ RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER INFO, LDA, N + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZTRCON estimates the reciprocal of the condition number of a +* triangular matrix A, in either the 1-norm or the infinity-norm. +* +* The norm of A is computed and an estimate is obtained for +* norm(inv(A)), then the reciprocal of the condition number is +* computed as +* RCOND = 1 / ( norm(A) * norm(inv(A)) ). +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies whether the 1-norm condition number or the +* infinity-norm condition number is required: +* = '1' or 'O': 1-norm; +* = 'I': Infinity-norm. +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input) COMPLEX*16 array, dimension (LDA,N) +* The triangular matrix A. If UPLO = 'U', the leading N-by-N +* upper triangular part of the array A contains the upper +* triangular matrix, and the strictly lower triangular part of +* A is not referenced. If UPLO = 'L', the leading N-by-N lower +* triangular part of the array A contains the lower triangular +* matrix, and the strictly upper triangular part of A is not +* referenced. If DIAG = 'U', the diagonal elements of A are +* also not referenced and are assumed to be 1. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* RCOND (output) DOUBLE PRECISION +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(norm(A) * norm(inv(A))). +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, ONENRM, UPPER + CHARACTER NORMIN + INTEGER IX, KASE, KASE1 + DOUBLE PRECISION AINVNM, ANORM, SCALE, SMLNUM, XNORM + COMPLEX*16 ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX + DOUBLE PRECISION DLAMCH, ZLANTR + EXTERNAL LSAME, IZAMAX, DLAMCH, ZLANTR +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDRSCL, ZLACON, ZLATRS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTRCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + END IF +* + RCOND = ZERO + SMLNUM = DLAMCH( 'Safe minimum' )*DBLE( MAX( 1, N ) ) +* +* Compute the norm of the triangular matrix A. +* + ANORM = ZLANTR( NORM, UPLO, DIAG, N, N, A, LDA, RWORK ) +* +* Continue only if ANORM > 0. +* + IF( ANORM.GT.ZERO ) THEN +* +* Estimate the norm of the inverse of A. +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 10 CONTINUE + CALL ZLACON( N, WORK( N+1 ), WORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(A). +* + CALL ZLATRS( UPLO, 'No transpose', DIAG, NORMIN, N, A, + $ LDA, WORK, SCALE, RWORK, INFO ) + ELSE +* +* Multiply by inv(A'). +* + CALL ZLATRS( UPLO, 'Conjugate transpose', DIAG, NORMIN, + $ N, A, LDA, WORK, SCALE, RWORK, INFO ) + END IF + NORMIN = 'Y' +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + IF( SCALE.NE.ONE ) THEN + IX = IZAMAX( N, WORK, 1 ) + XNORM = CABS1( WORK( IX ) ) + IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL ZDRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / ANORM ) / AINVNM + END IF +* + 20 CONTINUE + RETURN +* +* End of ZTRCON +* + END diff --git a/costa/native/external/lapack/ztrevc.f b/costa/native/external/lapack/ztrevc.f new file mode 100644 index 000000000..d1b4b7206 --- /dev/null +++ b/costa/native/external/lapack/ztrevc.f @@ -0,0 +1,390 @@ + SUBROUTINE ZTREVC( SIDE, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR, + $ LDVR, MM, M, WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER HOWMNY, SIDE + INTEGER INFO, LDT, LDVL, LDVR, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), + $ WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZTREVC computes some or all of the right and/or left eigenvectors of +* a complex upper triangular matrix T. +* +* The right eigenvector x and the left eigenvector y of T corresponding +* to an eigenvalue w are defined by: +* +* T*x = w*x, y'*T = w*y' +* +* where y' denotes the conjugate transpose of the vector y. +* +* If all eigenvectors are requested, the routine may either return the +* matrices X and/or Y of right or left eigenvectors of T, or the +* products Q*X and/or Q*Y, where Q is an input unitary +* matrix. If T was obtained from the Schur factorization of an +* original matrix A = Q*T*Q', then Q*X and Q*Y are the matrices of +* right or left eigenvectors of A. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'R': compute right eigenvectors only; +* = 'L': compute left eigenvectors only; +* = 'B': compute both right and left eigenvectors. +* +* HOWMNY (input) CHARACTER*1 +* = 'A': compute all right and/or left eigenvectors; +* = 'B': compute all right and/or left eigenvectors, +* and backtransform them using the input matrices +* supplied in VR and/or VL; +* = 'S': compute selected right and/or left eigenvectors, +* specified by the logical array SELECT. +* +* SELECT (input) LOGICAL array, dimension (N) +* If HOWMNY = 'S', SELECT specifies the eigenvectors to be +* computed. +* If HOWMNY = 'A' or 'B', SELECT is not referenced. +* To select the eigenvector corresponding to the j-th +* eigenvalue, SELECT(j) must be set to .TRUE.. +* +* N (input) INTEGER +* The order of the matrix T. N >= 0. +* +* T (input/output) COMPLEX*16 array, dimension (LDT,N) +* The upper triangular matrix T. T is modified, but restored +* on exit. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= max(1,N). +* +* VL (input/output) COMPLEX*16 array, dimension (LDVL,MM) +* On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must +* contain an N-by-N matrix Q (usually the unitary matrix Q of +* Schur vectors returned by ZHSEQR). +* On exit, if SIDE = 'L' or 'B', VL contains: +* if HOWMNY = 'A', the matrix Y of left eigenvectors of T; +* VL is lower triangular. The i-th column +* VL(i) of VL is the eigenvector corresponding +* to T(i,i). +* if HOWMNY = 'B', the matrix Q*Y; +* if HOWMNY = 'S', the left eigenvectors of T specified by +* SELECT, stored consecutively in the columns +* of VL, in the same order as their +* eigenvalues. +* If SIDE = 'R', VL is not referenced. +* +* LDVL (input) INTEGER +* The leading dimension of the array VL. LDVL >= max(1,N) if +* SIDE = 'L' or 'B'; LDVL >= 1 otherwise. +* +* VR (input/output) COMPLEX*16 array, dimension (LDVR,MM) +* On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must +* contain an N-by-N matrix Q (usually the unitary matrix Q of +* Schur vectors returned by ZHSEQR). +* On exit, if SIDE = 'R' or 'B', VR contains: +* if HOWMNY = 'A', the matrix X of right eigenvectors of T; +* VR is upper triangular. The i-th column +* VR(i) of VR is the eigenvector corresponding +* to T(i,i). +* if HOWMNY = 'B', the matrix Q*X; +* if HOWMNY = 'S', the right eigenvectors of T specified by +* SELECT, stored consecutively in the columns +* of VR, in the same order as their +* eigenvalues. +* If SIDE = 'L', VR is not referenced. +* +* LDVR (input) INTEGER +* The leading dimension of the array VR. LDVR >= max(1,N) if +* SIDE = 'R' or 'B'; LDVR >= 1 otherwise. +* +* MM (input) INTEGER +* The number of columns in the arrays VL and/or VR. MM >= M. +* +* M (output) INTEGER +* The number of columns in the arrays VL and/or VR actually +* used to store the eigenvectors. If HOWMNY = 'A' or 'B', M +* is set to N. Each selected eigenvector occupies one +* column. +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The algorithm used in this program is basically backward (forward) +* substitution, with scaling to make the the code robust against +* possible overflow. +* +* Each eigenvector is normalized so that the element of largest +* magnitude has magnitude 1; here the magnitude of a complex number +* (x,y) is taken to be |x| + |y|. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + COMPLEX*16 CMZERO, CMONE + PARAMETER ( CMZERO = ( 0.0D+0, 0.0D+0 ), + $ CMONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ALLV, BOTHV, LEFTV, OVER, RIGHTV, SOMEV + INTEGER I, II, IS, J, K, KI + DOUBLE PRECISION OVFL, REMAX, SCALE, SMIN, SMLNUM, ULP, UNFL + COMPLEX*16 CDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX + DOUBLE PRECISION DLAMCH, DZASUM + EXTERNAL LSAME, IZAMAX, DLAMCH, DZASUM +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZCOPY, ZDSCAL, ZGEMV, ZLATRS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + BOTHV = LSAME( SIDE, 'B' ) + RIGHTV = LSAME( SIDE, 'R' ) .OR. BOTHV + LEFTV = LSAME( SIDE, 'L' ) .OR. BOTHV +* + ALLV = LSAME( HOWMNY, 'A' ) + OVER = LSAME( HOWMNY, 'B' ) + SOMEV = LSAME( HOWMNY, 'S' ) +* +* Set M to the number of columns required to store the selected +* eigenvectors. +* + IF( SOMEV ) THEN + M = 0 + DO 10 J = 1, N + IF( SELECT( J ) ) + $ M = M + 1 + 10 CONTINUE + ELSE + M = N + END IF +* + INFO = 0 + IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -1 + ELSE IF( .NOT.ALLV .AND. .NOT.OVER .AND. .NOT.SOMEV ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDVL.LT.1 .OR. ( LEFTV .AND. LDVL.LT.N ) ) THEN + INFO = -8 + ELSE IF( LDVR.LT.1 .OR. ( RIGHTV .AND. LDVR.LT.N ) ) THEN + INFO = -10 + ELSE IF( MM.LT.M ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTREVC', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* +* Set the constants to control overflow. +* + UNFL = DLAMCH( 'Safe minimum' ) + OVFL = ONE / UNFL + CALL DLABAD( UNFL, OVFL ) + ULP = DLAMCH( 'Precision' ) + SMLNUM = UNFL*( N / ULP ) +* +* Store the diagonal elements of T in working array WORK. +* + DO 20 I = 1, N + WORK( I+N ) = T( I, I ) + 20 CONTINUE +* +* Compute 1-norm of each column of strictly upper triangular +* part of T to control overflow in triangular solver. +* + RWORK( 1 ) = ZERO + DO 30 J = 2, N + RWORK( J ) = DZASUM( J-1, T( 1, J ), 1 ) + 30 CONTINUE +* + IF( RIGHTV ) THEN +* +* Compute right eigenvectors. +* + IS = M + DO 80 KI = N, 1, -1 +* + IF( SOMEV ) THEN + IF( .NOT.SELECT( KI ) ) + $ GO TO 80 + END IF + SMIN = MAX( ULP*( CABS1( T( KI, KI ) ) ), SMLNUM ) +* + WORK( 1 ) = CMONE +* +* Form right-hand side. +* + DO 40 K = 1, KI - 1 + WORK( K ) = -T( K, KI ) + 40 CONTINUE +* +* Solve the triangular system: +* (T(1:KI-1,1:KI-1) - T(KI,KI))*X = SCALE*WORK. +* + DO 50 K = 1, KI - 1 + T( K, K ) = T( K, K ) - T( KI, KI ) + IF( CABS1( T( K, K ) ).LT.SMIN ) + $ T( K, K ) = SMIN + 50 CONTINUE +* + IF( KI.GT.1 ) THEN + CALL ZLATRS( 'Upper', 'No transpose', 'Non-unit', 'Y', + $ KI-1, T, LDT, WORK( 1 ), SCALE, RWORK, + $ INFO ) + WORK( KI ) = SCALE + END IF +* +* Copy the vector x or Q*x to VR and normalize. +* + IF( .NOT.OVER ) THEN + CALL ZCOPY( KI, WORK( 1 ), 1, VR( 1, IS ), 1 ) +* + II = IZAMAX( KI, VR( 1, IS ), 1 ) + REMAX = ONE / CABS1( VR( II, IS ) ) + CALL ZDSCAL( KI, REMAX, VR( 1, IS ), 1 ) +* + DO 60 K = KI + 1, N + VR( K, IS ) = CMZERO + 60 CONTINUE + ELSE + IF( KI.GT.1 ) + $ CALL ZGEMV( 'N', N, KI-1, CMONE, VR, LDVR, WORK( 1 ), + $ 1, DCMPLX( SCALE ), VR( 1, KI ), 1 ) +* + II = IZAMAX( N, VR( 1, KI ), 1 ) + REMAX = ONE / CABS1( VR( II, KI ) ) + CALL ZDSCAL( N, REMAX, VR( 1, KI ), 1 ) + END IF +* +* Set back the original diagonal elements of T. +* + DO 70 K = 1, KI - 1 + T( K, K ) = WORK( K+N ) + 70 CONTINUE +* + IS = IS - 1 + 80 CONTINUE + END IF +* + IF( LEFTV ) THEN +* +* Compute left eigenvectors. +* + IS = 1 + DO 130 KI = 1, N +* + IF( SOMEV ) THEN + IF( .NOT.SELECT( KI ) ) + $ GO TO 130 + END IF + SMIN = MAX( ULP*( CABS1( T( KI, KI ) ) ), SMLNUM ) +* + WORK( N ) = CMONE +* +* Form right-hand side. +* + DO 90 K = KI + 1, N + WORK( K ) = -DCONJG( T( KI, K ) ) + 90 CONTINUE +* +* Solve the triangular system: +* (T(KI+1:N,KI+1:N) - T(KI,KI))'*X = SCALE*WORK. +* + DO 100 K = KI + 1, N + T( K, K ) = T( K, K ) - T( KI, KI ) + IF( CABS1( T( K, K ) ).LT.SMIN ) + $ T( K, K ) = SMIN + 100 CONTINUE +* + IF( KI.LT.N ) THEN + CALL ZLATRS( 'Upper', 'Conjugate transpose', 'Non-unit', + $ 'Y', N-KI, T( KI+1, KI+1 ), LDT, + $ WORK( KI+1 ), SCALE, RWORK, INFO ) + WORK( KI ) = SCALE + END IF +* +* Copy the vector x or Q*x to VL and normalize. +* + IF( .NOT.OVER ) THEN + CALL ZCOPY( N-KI+1, WORK( KI ), 1, VL( KI, IS ), 1 ) +* + II = IZAMAX( N-KI+1, VL( KI, IS ), 1 ) + KI - 1 + REMAX = ONE / CABS1( VL( II, IS ) ) + CALL ZDSCAL( N-KI+1, REMAX, VL( KI, IS ), 1 ) +* + DO 110 K = 1, KI - 1 + VL( K, IS ) = CMZERO + 110 CONTINUE + ELSE + IF( KI.LT.N ) + $ CALL ZGEMV( 'N', N, N-KI, CMONE, VL( 1, KI+1 ), LDVL, + $ WORK( KI+1 ), 1, DCMPLX( SCALE ), + $ VL( 1, KI ), 1 ) +* + II = IZAMAX( N, VL( 1, KI ), 1 ) + REMAX = ONE / CABS1( VL( II, KI ) ) + CALL ZDSCAL( N, REMAX, VL( 1, KI ), 1 ) + END IF +* +* Set back the original diagonal elements of T. +* + DO 120 K = KI + 1, N + T( K, K ) = WORK( K+N ) + 120 CONTINUE +* + IS = IS + 1 + 130 CONTINUE + END IF +* + RETURN +* +* End of ZTREVC +* + END diff --git a/costa/native/external/lapack/ztrexc.f b/costa/native/external/lapack/ztrexc.f new file mode 100644 index 000000000..4ba7b4a95 --- /dev/null +++ b/costa/native/external/lapack/ztrexc.f @@ -0,0 +1,163 @@ + SUBROUTINE ZTREXC( COMPQ, N, T, LDT, Q, LDQ, IFST, ILST, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER COMPQ + INTEGER IFST, ILST, INFO, LDQ, LDT, N +* .. +* .. Array Arguments .. + COMPLEX*16 Q( LDQ, * ), T( LDT, * ) +* .. +* +* Purpose +* ======= +* +* ZTREXC reorders the Schur factorization of a complex matrix +* A = Q*T*Q**H, so that the diagonal element of T with row index IFST +* is moved to row ILST. +* +* The Schur form T is reordered by a unitary similarity transformation +* Z**H*T*Z, and optionally the matrix Q of Schur vectors is updated by +* postmultplying it with Z. +* +* Arguments +* ========= +* +* COMPQ (input) CHARACTER*1 +* = 'V': update the matrix Q of Schur vectors; +* = 'N': do not update Q. +* +* N (input) INTEGER +* The order of the matrix T. N >= 0. +* +* T (input/output) COMPLEX*16 array, dimension (LDT,N) +* On entry, the upper triangular matrix T. +* On exit, the reordered upper triangular matrix. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= max(1,N). +* +* Q (input/output) COMPLEX*16 array, dimension (LDQ,N) +* On entry, if COMPQ = 'V', the matrix Q of Schur vectors. +* On exit, if COMPQ = 'V', Q has been postmultiplied by the +* unitary transformation matrix Z which reorders T. +* If COMPQ = 'N', Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= max(1,N). +* +* IFST (input) INTEGER +* ILST (input) INTEGER +* Specify the reordering of the diagonal elements of T: +* The element with row index IFST is moved to row ILST by a +* sequence of transpositions between adjacent elements. +* 1 <= IFST <= N; 1 <= ILST <= N. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL WANTQ + INTEGER K, M1, M2, M3 + DOUBLE PRECISION CS + COMPLEX*16 SN, T11, T22, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARTG, ZROT +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters. +* + INFO = 0 + WANTQ = LSAME( COMPQ, 'V' ) + IF( .NOT.LSAME( COMPQ, 'N' ) .AND. .NOT.WANTQ ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.MAX( 1, N ) ) ) THEN + INFO = -6 + ELSE IF( IFST.LT.1 .OR. IFST.GT.N ) THEN + INFO = -7 + ELSE IF( ILST.LT.1 .OR. ILST.GT.N ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTREXC', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.1 .OR. IFST.EQ.ILST ) + $ RETURN +* + IF( IFST.LT.ILST ) THEN +* +* Move the IFST-th diagonal element forward down the diagonal. +* + M1 = 0 + M2 = -1 + M3 = 1 + ELSE +* +* Move the IFST-th diagonal element backward up the diagonal. +* + M1 = -1 + M2 = 0 + M3 = -1 + END IF +* + DO 10 K = IFST + M1, ILST + M2, M3 +* +* Interchange the k-th and (k+1)-th diagonal elements. +* + T11 = T( K, K ) + T22 = T( K+1, K+1 ) +* +* Determine the transformation to perform the interchange. +* + CALL ZLARTG( T( K, K+1 ), T22-T11, CS, SN, TEMP ) +* +* Apply transformation to the matrix T. +* + IF( K+2.LE.N ) + $ CALL ZROT( N-K-1, T( K, K+2 ), LDT, T( K+1, K+2 ), LDT, CS, + $ SN ) + CALL ZROT( K-1, T( 1, K ), 1, T( 1, K+1 ), 1, CS, + $ DCONJG( SN ) ) +* + T( K, K ) = T22 + T( K+1, K+1 ) = T11 +* + IF( WANTQ ) THEN +* +* Accumulate transformation in the matrix Q. +* + CALL ZROT( N, Q( 1, K ), 1, Q( 1, K+1 ), 1, CS, + $ DCONJG( SN ) ) + END IF +* + 10 CONTINUE +* + RETURN +* +* End of ZTREXC +* + END diff --git a/costa/native/external/lapack/ztrrfs.f b/costa/native/external/lapack/ztrrfs.f new file mode 100644 index 000000000..08a0383b1 --- /dev/null +++ b/costa/native/external/lapack/ztrrfs.f @@ -0,0 +1,378 @@ + SUBROUTINE ZTRRFS( UPLO, TRANS, DIAG, N, NRHS, A, LDA, B, LDB, X, + $ LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, LDA, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* Purpose +* ======= +* +* ZTRRFS provides error bounds and backward error estimates for the +* solution to a system of linear equations with a triangular +* coefficient matrix. +* +* The solution matrix X must be computed by ZTRTRS or some other +* means before entering this routine. ZTRRFS does not do iterative +* refinement because doing so cannot improve the backward error. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose) +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrices B and X. NRHS >= 0. +* +* A (input) COMPLEX*16 array, dimension (LDA,N) +* The triangular matrix A. If UPLO = 'U', the leading N-by-N +* upper triangular part of the array A contains the upper +* triangular matrix, and the strictly lower triangular part of +* A is not referenced. If UPLO = 'L', the leading N-by-N lower +* triangular part of the array A contains the lower triangular +* matrix, and the strictly upper triangular part of A is not +* referenced. If DIAG = 'U', the diagonal elements of A are +* also not referenced and are assumed to be 1. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input) COMPLEX*16 array, dimension (LDB,NRHS) +* The right hand side matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* X (input) COMPLEX*16 array, dimension (LDX,NRHS) +* The solution matrix X. +* +* LDX (input) INTEGER +* The leading dimension of the array X. LDX >= max(1,N). +* +* FERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The estimated forward error bound for each solution vector +* X(j) (the j-th column of the solution matrix X). +* If XTRUE is the true solution corresponding to X(j), FERR(j) +* is an estimated upper bound for the magnitude of the largest +* element in (X(j) - XTRUE) divided by the magnitude of the +* largest element in X(j). The estimate is as reliable as +* the estimate for RCOND, and is almost always a slight +* overestimate of the true error. +* +* BERR (output) DOUBLE PRECISION array, dimension (NRHS) +* The componentwise relative backward error of each solution +* vector X(j) (i.e., the smallest relative change in +* any element of A or B that makes X(j) an exact solution). +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + CHARACTER TRANSN, TRANST + INTEGER I, J, K, KASE, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX*16 ZDUM +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZCOPY, ZLACON, ZTRMV, ZTRSV +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTRRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANSN = 'N' + TRANST = 'C' + ELSE + TRANSN = 'C' + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 250 J = 1, NRHS +* +* Compute residual R = B - op(A) * X, +* where op(A) = A, A**T, or A**H, depending on TRANS. +* + CALL ZCOPY( N, X( 1, J ), 1, WORK, 1 ) + CALL ZTRMV( UPLO, TRANS, DIAG, N, A, LDA, WORK, 1 ) + CALL ZAXPY( N, -ONE, B( 1, J ), 1, WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 20 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 20 CONTINUE +* + IF( NOTRAN ) THEN +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + IF( NOUNIT ) THEN + DO 40 K = 1, N + XK = CABS1( X( K, J ) ) + DO 30 I = 1, K + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + 30 CONTINUE + 40 CONTINUE + ELSE + DO 60 K = 1, N + XK = CABS1( X( K, J ) ) + DO 50 I = 1, K - 1 + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + 50 CONTINUE + RWORK( K ) = RWORK( K ) + XK + 60 CONTINUE + END IF + ELSE + IF( NOUNIT ) THEN + DO 80 K = 1, N + XK = CABS1( X( K, J ) ) + DO 70 I = K, N + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + 70 CONTINUE + 80 CONTINUE + ELSE + DO 100 K = 1, N + XK = CABS1( X( K, J ) ) + DO 90 I = K + 1, N + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + 90 CONTINUE + RWORK( K ) = RWORK( K ) + XK + 100 CONTINUE + END IF + END IF + ELSE +* +* Compute abs(A**H)*abs(X) + abs(B). +* + IF( UPPER ) THEN + IF( NOUNIT ) THEN + DO 120 K = 1, N + S = ZERO + DO 110 I = 1, K + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 110 CONTINUE + RWORK( K ) = RWORK( K ) + S + 120 CONTINUE + ELSE + DO 140 K = 1, N + S = CABS1( X( K, J ) ) + DO 130 I = 1, K - 1 + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 130 CONTINUE + RWORK( K ) = RWORK( K ) + S + 140 CONTINUE + END IF + ELSE + IF( NOUNIT ) THEN + DO 160 K = 1, N + S = ZERO + DO 150 I = K, N + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 150 CONTINUE + RWORK( K ) = RWORK( K ) + S + 160 CONTINUE + ELSE + DO 180 K = 1, N + S = CABS1( X( K, J ) ) + DO 170 I = K + 1, N + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 170 CONTINUE + RWORK( K ) = RWORK( K ) + S + 180 CONTINUE + END IF + END IF + END IF + S = ZERO + DO 190 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 190 CONTINUE + BERR( J ) = S +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use ZLACON to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 200 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 200 CONTINUE +* + KASE = 0 + 210 CONTINUE + CALL ZLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**H). +* + CALL ZTRSV( UPLO, TRANST, DIAG, N, A, LDA, WORK, 1 ) + DO 220 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 220 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 230 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 230 CONTINUE + CALL ZTRSV( UPLO, TRANSN, DIAG, N, A, LDA, WORK, 1 ) + END IF + GO TO 210 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 240 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 240 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 250 CONTINUE +* + RETURN +* +* End of ZTRRFS +* + END diff --git a/costa/native/external/lapack/ztrsen.f b/costa/native/external/lapack/ztrsen.f new file mode 100644 index 000000000..6eda60719 --- /dev/null +++ b/costa/native/external/lapack/ztrsen.f @@ -0,0 +1,358 @@ + SUBROUTINE ZTRSEN( JOB, COMPQ, SELECT, N, T, LDT, Q, LDQ, W, M, S, + $ SEP, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER COMPQ, JOB + INTEGER INFO, LDQ, LDT, LWORK, M, N + DOUBLE PRECISION S, SEP +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + COMPLEX*16 Q( LDQ, * ), T( LDT, * ), W( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZTRSEN reorders the Schur factorization of a complex matrix +* A = Q*T*Q**H, so that a selected cluster of eigenvalues appears in +* the leading positions on the diagonal of the upper triangular matrix +* T, and the leading columns of Q form an orthonormal basis of the +* corresponding right invariant subspace. +* +* Optionally the routine computes the reciprocal condition numbers of +* the cluster of eigenvalues and/or the invariant subspace. +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* Specifies whether condition numbers are required for the +* cluster of eigenvalues (S) or the invariant subspace (SEP): +* = 'N': none; +* = 'E': for eigenvalues only (S); +* = 'V': for invariant subspace only (SEP); +* = 'B': for both eigenvalues and invariant subspace (S and +* SEP). +* +* COMPQ (input) CHARACTER*1 +* = 'V': update the matrix Q of Schur vectors; +* = 'N': do not update Q. +* +* SELECT (input) LOGICAL array, dimension (N) +* SELECT specifies the eigenvalues in the selected cluster. To +* select the j-th eigenvalue, SELECT(j) must be set to .TRUE.. +* +* N (input) INTEGER +* The order of the matrix T. N >= 0. +* +* T (input/output) COMPLEX*16 array, dimension (LDT,N) +* On entry, the upper triangular matrix T. +* On exit, T is overwritten by the reordered matrix T, with the +* selected eigenvalues as the leading diagonal elements. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= max(1,N). +* +* Q (input/output) COMPLEX*16 array, dimension (LDQ,N) +* On entry, if COMPQ = 'V', the matrix Q of Schur vectors. +* On exit, if COMPQ = 'V', Q has been postmultiplied by the +* unitary transformation matrix which reorders T; the leading M +* columns of Q form an orthonormal basis for the specified +* invariant subspace. +* If COMPQ = 'N', Q is not referenced. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. +* LDQ >= 1; and if COMPQ = 'V', LDQ >= N. +* +* W (output) COMPLEX*16 array, dimension (N) +* The reordered eigenvalues of T, in the same order as they +* appear on the diagonal of T. +* +* M (output) INTEGER +* The dimension of the specified invariant subspace. +* 0 <= M <= N. +* +* S (output) DOUBLE PRECISION +* If JOB = 'E' or 'B', S is a lower bound on the reciprocal +* condition number for the selected cluster of eigenvalues. +* S cannot underestimate the true reciprocal condition number +* by more than a factor of sqrt(N). If M = 0 or N, S = 1. +* If JOB = 'N' or 'V', S is not referenced. +* +* SEP (output) DOUBLE PRECISION +* If JOB = 'V' or 'B', SEP is the estimated reciprocal +* condition number of the specified invariant subspace. If +* M = 0 or N, SEP = norm(T). +* If JOB = 'N' or 'E', SEP is not referenced. +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* If JOB = 'N', WORK is not referenced. Otherwise, +* on exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If JOB = 'N', LWORK >= 1; +* if JOB = 'E', LWORK = M*(N-M); +* if JOB = 'V' or 'B', LWORK >= 2*M*(N-M). +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* ZTRSEN first collects the selected eigenvalues by computing a unitary +* transformation Z to move them to the top left corner of T. In other +* words, the selected eigenvalues are the eigenvalues of T11 in: +* +* Z'*T*Z = ( T11 T12 ) n1 +* ( 0 T22 ) n2 +* n1 n2 +* +* where N = n1+n2 and Z' means the conjugate transpose of Z. The first +* n1 columns of Z span the specified invariant subspace of T. +* +* If T has been obtained from the Schur factorization of a matrix +* A = Q*T*Q', then the reordered Schur factorization of A is given by +* A = (Q*Z)*(Z'*T*Z)*(Q*Z)', and the first n1 columns of Q*Z span the +* corresponding invariant subspace of A. +* +* The reciprocal condition number of the average of the eigenvalues of +* T11 may be returned in S. S lies between 0 (very badly conditioned) +* and 1 (very well conditioned). It is computed as follows. First we +* compute R so that +* +* P = ( I R ) n1 +* ( 0 0 ) n2 +* n1 n2 +* +* is the projector on the invariant subspace associated with T11. +* R is the solution of the Sylvester equation: +* +* T11*R - R*T22 = T12. +* +* Let F-norm(M) denote the Frobenius-norm of M and 2-norm(M) denote +* the two-norm of M. Then S is computed as the lower bound +* +* (1 + F-norm(R)**2)**(-1/2) +* +* on the reciprocal of 2-norm(P), the true reciprocal condition number. +* S cannot underestimate 1 / 2-norm(P) by more than a factor of +* sqrt(N). +* +* An approximate error bound for the computed average of the +* eigenvalues of T11 is +* +* EPS * norm(T) / S +* +* where EPS is the machine precision. +* +* The reciprocal condition number of the right invariant subspace +* spanned by the first n1 columns of Z (or of Q*Z) is returned in SEP. +* SEP is defined as the separation of T11 and T22: +* +* sep( T11, T22 ) = sigma-min( C ) +* +* where sigma-min(C) is the smallest singular value of the +* n1*n2-by-n1*n2 matrix +* +* C = kprod( I(n2), T11 ) - kprod( transpose(T22), I(n1) ) +* +* I(m) is an m by m identity matrix, and kprod denotes the Kronecker +* product. We estimate sigma-min(C) by the reciprocal of an estimate of +* the 1-norm of inverse(C). The true reciprocal 1-norm of inverse(C) +* cannot differ from sigma-min(C) by more than a factor of sqrt(n1*n2). +* +* When SEP is small, small changes in T can cause large changes in +* the invariant subspace. An approximate bound on the maximum angular +* error in the computed right invariant subspace is +* +* EPS * norm(T) / SEP +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WANTBH, WANTQ, WANTS, WANTSP + INTEGER IERR, K, KASE, KS, LWMIN, N1, N2, NN + DOUBLE PRECISION EST, RNORM, SCALE +* .. +* .. Local Arrays .. + DOUBLE PRECISION RWORK( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION ZLANGE + EXTERNAL LSAME, ZLANGE +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLACON, ZLACPY, ZTREXC, ZTRSYL +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters. +* + WANTBH = LSAME( JOB, 'B' ) + WANTS = LSAME( JOB, 'E' ) .OR. WANTBH + WANTSP = LSAME( JOB, 'V' ) .OR. WANTBH + WANTQ = LSAME( COMPQ, 'V' ) +* +* Set M to the number of selected eigenvalues. +* + M = 0 + DO 10 K = 1, N + IF( SELECT( K ) ) + $ M = M + 1 + 10 CONTINUE +* + N1 = M + N2 = N - M + NN = N1*N2 +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) +* + IF( WANTSP ) THEN + LWMIN = MAX( 1, 2*NN ) + ELSE IF( LSAME( JOB, 'N' ) ) THEN + LWMIN = 1 + ELSE IF( LSAME( JOB, 'E' ) ) THEN + LWMIN = MAX( 1, NN ) + END IF +* + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.WANTS .AND. .NOT.WANTSP ) + $ THEN + INFO = -1 + ELSE IF( .NOT.LSAME( COMPQ, 'N' ) .AND. .NOT.WANTQ ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -14 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTRSEN', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.N .OR. M.EQ.0 ) THEN + IF( WANTS ) + $ S = ONE + IF( WANTSP ) + $ SEP = ZLANGE( '1', N, N, T, LDT, RWORK ) + GO TO 40 + END IF +* +* Collect the selected eigenvalues at the top left corner of T. +* + KS = 0 + DO 20 K = 1, N + IF( SELECT( K ) ) THEN + KS = KS + 1 +* +* Swap the K-th eigenvalue to position KS. +* + IF( K.NE.KS ) + $ CALL ZTREXC( COMPQ, N, T, LDT, Q, LDQ, K, KS, IERR ) + END IF + 20 CONTINUE +* + IF( WANTS ) THEN +* +* Solve the Sylvester equation for R: +* +* T11*R - R*T22 = scale*T12 +* + CALL ZLACPY( 'F', N1, N2, T( 1, N1+1 ), LDT, WORK, N1 ) + CALL ZTRSYL( 'N', 'N', -1, N1, N2, T, LDT, T( N1+1, N1+1 ), + $ LDT, WORK, N1, SCALE, IERR ) +* +* Estimate the reciprocal of the condition number of the cluster +* of eigenvalues. +* + RNORM = ZLANGE( 'F', N1, N2, WORK, N1, RWORK ) + IF( RNORM.EQ.ZERO ) THEN + S = ONE + ELSE + S = SCALE / ( SQRT( SCALE*SCALE / RNORM+RNORM )* + $ SQRT( RNORM ) ) + END IF + END IF +* + IF( WANTSP ) THEN +* +* Estimate sep(T11,T22). +* + EST = ZERO + KASE = 0 + 30 CONTINUE + CALL ZLACON( NN, WORK( NN+1 ), WORK, EST, KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Solve T11*R - R*T22 = scale*X. +* + CALL ZTRSYL( 'N', 'N', -1, N1, N2, T, LDT, + $ T( N1+1, N1+1 ), LDT, WORK, N1, SCALE, + $ IERR ) + ELSE +* +* Solve T11'*R - R*T22' = scale*X. +* + CALL ZTRSYL( 'C', 'C', -1, N1, N2, T, LDT, + $ T( N1+1, N1+1 ), LDT, WORK, N1, SCALE, + $ IERR ) + END IF + GO TO 30 + END IF +* + SEP = SCALE / EST + END IF +* + 40 CONTINUE +* +* Copy reordered eigenvalues to W. +* + DO 50 K = 1, N + W( K ) = T( K, K ) + 50 CONTINUE +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of ZTRSEN +* + END diff --git a/costa/native/external/lapack/ztrsna.f b/costa/native/external/lapack/ztrsna.f new file mode 100644 index 000000000..3398e8734 --- /dev/null +++ b/costa/native/external/lapack/ztrsna.f @@ -0,0 +1,353 @@ + SUBROUTINE ZTRSNA( JOB, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR, + $ LDVR, S, SEP, MM, M, WORK, LDWORK, RWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER HOWMNY, JOB + INTEGER INFO, LDT, LDVL, LDVR, LDWORK, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + DOUBLE PRECISION RWORK( * ), S( * ), SEP( * ) + COMPLEX*16 T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), + $ WORK( LDWORK, * ) +* .. +* +* Purpose +* ======= +* +* ZTRSNA estimates reciprocal condition numbers for specified +* eigenvalues and/or right eigenvectors of a complex upper triangular +* matrix T (or of any matrix Q*T*Q**H with Q unitary). +* +* Arguments +* ========= +* +* JOB (input) CHARACTER*1 +* Specifies whether condition numbers are required for +* eigenvalues (S) or eigenvectors (SEP): +* = 'E': for eigenvalues only (S); +* = 'V': for eigenvectors only (SEP); +* = 'B': for both eigenvalues and eigenvectors (S and SEP). +* +* HOWMNY (input) CHARACTER*1 +* = 'A': compute condition numbers for all eigenpairs; +* = 'S': compute condition numbers for selected eigenpairs +* specified by the array SELECT. +* +* SELECT (input) LOGICAL array, dimension (N) +* If HOWMNY = 'S', SELECT specifies the eigenpairs for which +* condition numbers are required. To select condition numbers +* for the j-th eigenpair, SELECT(j) must be set to .TRUE.. +* If HOWMNY = 'A', SELECT is not referenced. +* +* N (input) INTEGER +* The order of the matrix T. N >= 0. +* +* T (input) COMPLEX*16 array, dimension (LDT,N) +* The upper triangular matrix T. +* +* LDT (input) INTEGER +* The leading dimension of the array T. LDT >= max(1,N). +* +* VL (input) COMPLEX*16 array, dimension (LDVL,M) +* If JOB = 'E' or 'B', VL must contain left eigenvectors of T +* (or of any Q*T*Q**H with Q unitary), corresponding to the +* eigenpairs specified by HOWMNY and SELECT. The eigenvectors +* must be stored in consecutive columns of VL, as returned by +* ZHSEIN or ZTREVC. +* If JOB = 'V', VL is not referenced. +* +* LDVL (input) INTEGER +* The leading dimension of the array VL. +* LDVL >= 1; and if JOB = 'E' or 'B', LDVL >= N. +* +* VR (input) COMPLEX*16 array, dimension (LDVR,M) +* If JOB = 'E' or 'B', VR must contain right eigenvectors of T +* (or of any Q*T*Q**H with Q unitary), corresponding to the +* eigenpairs specified by HOWMNY and SELECT. The eigenvectors +* must be stored in consecutive columns of VR, as returned by +* ZHSEIN or ZTREVC. +* If JOB = 'V', VR is not referenced. +* +* LDVR (input) INTEGER +* The leading dimension of the array VR. +* LDVR >= 1; and if JOB = 'E' or 'B', LDVR >= N. +* +* S (output) DOUBLE PRECISION array, dimension (MM) +* If JOB = 'E' or 'B', the reciprocal condition numbers of the +* selected eigenvalues, stored in consecutive elements of the +* array. Thus S(j), SEP(j), and the j-th columns of VL and VR +* all correspond to the same eigenpair (but not in general the +* j-th eigenpair, unless all eigenpairs are selected). +* If JOB = 'V', S is not referenced. +* +* SEP (output) DOUBLE PRECISION array, dimension (MM) +* If JOB = 'V' or 'B', the estimated reciprocal condition +* numbers of the selected eigenvectors, stored in consecutive +* elements of the array. +* If JOB = 'E', SEP is not referenced. +* +* MM (input) INTEGER +* The number of elements in the arrays S (if JOB = 'E' or 'B') +* and/or SEP (if JOB = 'V' or 'B'). MM >= M. +* +* M (output) INTEGER +* The number of elements of the arrays S and/or SEP actually +* used to store the estimated condition numbers. +* If HOWMNY = 'A', M is set to N. +* +* WORK (workspace) COMPLEX*16 array, dimension (LDWORK,N+1) +* If JOB = 'E', WORK is not referenced. +* +* LDWORK (input) INTEGER +* The leading dimension of the array WORK. +* LDWORK >= 1; and if JOB = 'V' or 'B', LDWORK >= N. +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (N) +* If JOB = 'E', RWORK is not referenced. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The reciprocal of the condition number of an eigenvalue lambda is +* defined as +* +* S(lambda) = |v'*u| / (norm(u)*norm(v)) +* +* where u and v are the right and left eigenvectors of T corresponding +* to lambda; v' denotes the conjugate transpose of v, and norm(u) +* denotes the Euclidean norm. These reciprocal condition numbers always +* lie between zero (very badly conditioned) and one (very well +* conditioned). If n = 1, S(lambda) is defined to be 1. +* +* An approximate error bound for a computed eigenvalue W(i) is given by +* +* EPS * norm(T) / S(i) +* +* where EPS is the machine precision. +* +* The reciprocal of the condition number of the right eigenvector u +* corresponding to lambda is defined as follows. Suppose +* +* T = ( lambda c ) +* ( 0 T22 ) +* +* Then the reciprocal condition number is +* +* SEP( lambda, T22 ) = sigma-min( T22 - lambda*I ) +* +* where sigma-min denotes the smallest singular value. We approximate +* the smallest singular value by the reciprocal of an estimate of the +* one-norm of the inverse of T22 - lambda*I. If n = 1, SEP(1) is +* defined to be abs(T(1,1)). +* +* An approximate error bound for a computed right eigenvector VR(i) +* is given by +* +* EPS * norm(T) / SEP(i) +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D0+0 ) +* .. +* .. Local Scalars .. + LOGICAL SOMCON, WANTBH, WANTS, WANTSP + CHARACTER NORMIN + INTEGER I, IERR, IX, J, K, KASE, KS + DOUBLE PRECISION BIGNUM, EPS, EST, LNRM, RNRM, SCALE, SMLNUM, + $ XNORM + COMPLEX*16 CDUM, PROD +* .. +* .. Local Arrays .. + COMPLEX*16 DUMMY( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX + DOUBLE PRECISION DLAMCH, DZNRM2 + COMPLEX*16 ZDOTC + EXTERNAL LSAME, IZAMAX, DLAMCH, DZNRM2, ZDOTC +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDRSCL, ZLACON, ZLACPY, ZLATRS, ZTREXC +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + WANTBH = LSAME( JOB, 'B' ) + WANTS = LSAME( JOB, 'E' ) .OR. WANTBH + WANTSP = LSAME( JOB, 'V' ) .OR. WANTBH +* + SOMCON = LSAME( HOWMNY, 'S' ) +* +* Set M to the number of eigenpairs for which condition numbers are +* to be computed. +* + IF( SOMCON ) THEN + M = 0 + DO 10 J = 1, N + IF( SELECT( J ) ) + $ M = M + 1 + 10 CONTINUE + ELSE + M = N + END IF +* + INFO = 0 + IF( .NOT.WANTS .AND. .NOT.WANTSP ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( HOWMNY, 'A' ) .AND. .NOT.SOMCON ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDVL.LT.1 .OR. ( WANTS .AND. LDVL.LT.N ) ) THEN + INFO = -8 + ELSE IF( LDVR.LT.1 .OR. ( WANTS .AND. LDVR.LT.N ) ) THEN + INFO = -10 + ELSE IF( MM.LT.M ) THEN + INFO = -13 + ELSE IF( LDWORK.LT.1 .OR. ( WANTSP .AND. LDWORK.LT.N ) ) THEN + INFO = -16 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTRSNA', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( SOMCON ) THEN + IF( .NOT.SELECT( 1 ) ) + $ RETURN + END IF + IF( WANTS ) + $ S( 1 ) = ONE + IF( WANTSP ) + $ SEP( 1 ) = ABS( T( 1, 1 ) ) + RETURN + END IF +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) / EPS + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* + KS = 1 + DO 50 K = 1, N +* + IF( SOMCON ) THEN + IF( .NOT.SELECT( K ) ) + $ GO TO 50 + END IF +* + IF( WANTS ) THEN +* +* Compute the reciprocal condition number of the k-th +* eigenvalue. +* + PROD = ZDOTC( N, VR( 1, KS ), 1, VL( 1, KS ), 1 ) + RNRM = DZNRM2( N, VR( 1, KS ), 1 ) + LNRM = DZNRM2( N, VL( 1, KS ), 1 ) + S( KS ) = ABS( PROD ) / ( RNRM*LNRM ) +* + END IF +* + IF( WANTSP ) THEN +* +* Estimate the reciprocal condition number of the k-th +* eigenvector. +* +* Copy the matrix T to the array WORK and swap the k-th +* diagonal element to the (1,1) position. +* + CALL ZLACPY( 'Full', N, N, T, LDT, WORK, LDWORK ) + CALL ZTREXC( 'No Q', N, WORK, LDWORK, DUMMY, 1, K, 1, IERR ) +* +* Form C = T22 - lambda*I in WORK(2:N,2:N). +* + DO 20 I = 2, N + WORK( I, I ) = WORK( I, I ) - WORK( 1, 1 ) + 20 CONTINUE +* +* Estimate a lower bound for the 1-norm of inv(C'). The 1st +* and (N+1)th columns of WORK are used to store work vectors. +* + SEP( KS ) = ZERO + EST = ZERO + KASE = 0 + NORMIN = 'N' + 30 CONTINUE + CALL ZLACON( N-1, WORK( 1, N+1 ), WORK, EST, KASE ) +* + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Solve C'*x = scale*b +* + CALL ZLATRS( 'Upper', 'Conjugate transpose', + $ 'Nonunit', NORMIN, N-1, WORK( 2, 2 ), + $ LDWORK, WORK, SCALE, RWORK, IERR ) + ELSE +* +* Solve C*x = scale*b +* + CALL ZLATRS( 'Upper', 'No transpose', 'Nonunit', + $ NORMIN, N-1, WORK( 2, 2 ), LDWORK, WORK, + $ SCALE, RWORK, IERR ) + END IF + NORMIN = 'Y' + IF( SCALE.NE.ONE ) THEN +* +* Multiply by 1/SCALE if doing so will not cause +* overflow. +* + IX = IZAMAX( N-1, WORK, 1 ) + XNORM = CABS1( WORK( IX, 1 ) ) + IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 40 + CALL ZDRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 30 + END IF +* + SEP( KS ) = ONE / MAX( EST, SMLNUM ) + END IF +* + 40 CONTINUE + KS = KS + 1 + 50 CONTINUE + RETURN +* +* End of ZTRSNA +* + END diff --git a/costa/native/external/lapack/ztrsyl.f b/costa/native/external/lapack/ztrsyl.f new file mode 100644 index 000000000..8fcbb3aaf --- /dev/null +++ b/costa/native/external/lapack/ztrsyl.f @@ -0,0 +1,368 @@ + SUBROUTINE ZTRSYL( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C, + $ LDC, SCALE, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER TRANA, TRANB + INTEGER INFO, ISGN, LDA, LDB, LDC, M, N + DOUBLE PRECISION SCALE +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* Purpose +* ======= +* +* ZTRSYL solves the complex Sylvester matrix equation: +* +* op(A)*X + X*op(B) = scale*C or +* op(A)*X - X*op(B) = scale*C, +* +* where op(A) = A or A**H, and A and B are both upper triangular. A is +* M-by-M and B is N-by-N; the right hand side C and the solution X are +* M-by-N; and scale is an output scale factor, set <= 1 to avoid +* overflow in X. +* +* Arguments +* ========= +* +* TRANA (input) CHARACTER*1 +* Specifies the option op(A): +* = 'N': op(A) = A (No transpose) +* = 'C': op(A) = A**H (Conjugate transpose) +* +* TRANB (input) CHARACTER*1 +* Specifies the option op(B): +* = 'N': op(B) = B (No transpose) +* = 'C': op(B) = B**H (Conjugate transpose) +* +* ISGN (input) INTEGER +* Specifies the sign in the equation: +* = +1: solve op(A)*X + X*op(B) = scale*C +* = -1: solve op(A)*X - X*op(B) = scale*C +* +* M (input) INTEGER +* The order of the matrix A, and the number of rows in the +* matrices X and C. M >= 0. +* +* N (input) INTEGER +* The order of the matrix B, and the number of columns in the +* matrices X and C. N >= 0. +* +* A (input) COMPLEX*16 array, dimension (LDA,M) +* The upper triangular matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* B (input) COMPLEX*16 array, dimension (LDB,N) +* The upper triangular matrix B. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* C (input/output) COMPLEX*16 array, dimension (LDC,N) +* On entry, the M-by-N right hand side matrix C. +* On exit, C is overwritten by the solution matrix X. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M) +* +* SCALE (output) DOUBLE PRECISION +* The scale factor, scale, set <= 1 to avoid overflow in X. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* = 1: A and B have common or very close eigenvalues; perturbed +* values were used to solve the equation (but the matrices +* A and B are unchanged). +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRNA, NOTRNB + INTEGER J, K, L + DOUBLE PRECISION BIGNUM, DA11, DB, EPS, SCALOC, SGN, SMIN, + $ SMLNUM + COMPLEX*16 A11, SUML, SUMR, VEC, X11 +* .. +* .. Local Arrays .. + DOUBLE PRECISION DUM( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANGE + COMPLEX*16 ZDOTC, ZDOTU, ZLADIV + EXTERNAL LSAME, DLAMCH, ZLANGE, ZDOTC, ZDOTU, ZLADIV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Decode and Test input parameters +* + NOTRNA = LSAME( TRANA, 'N' ) + NOTRNB = LSAME( TRANB, 'N' ) +* + INFO = 0 + IF( .NOT.NOTRNA .AND. .NOT.LSAME( TRANA, 'T' ) .AND. .NOT. + $ LSAME( TRANA, 'C' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRNB .AND. .NOT.LSAME( TRANB, 'T' ) .AND. .NOT. + $ LSAME( TRANB, 'C' ) ) THEN + INFO = -2 + ELSE IF( ISGN.NE.1 .AND. ISGN.NE.-1 ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTRSYL', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Set constants to control overflow +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SMLNUM*DBLE( M*N ) / EPS + BIGNUM = ONE / SMLNUM + SMIN = MAX( SMLNUM, EPS*ZLANGE( 'M', M, M, A, LDA, DUM ), + $ EPS*ZLANGE( 'M', N, N, B, LDB, DUM ) ) + SCALE = ONE + SGN = ISGN +* + IF( NOTRNA .AND. NOTRNB ) THEN +* +* Solve A*X + ISGN*X*B = scale*C. +* +* The (K,L)th block of X is determined starting from +* bottom-left corner column by column by +* +* A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) +* +* Where +* M L-1 +* R(K,L) = SUM [A(K,I)*X(I,L)] +ISGN*SUM [X(K,J)*B(J,L)]. +* I=K+1 J=1 +* + DO 30 L = 1, N + DO 20 K = M, 1, -1 +* + SUML = ZDOTU( M-K, A( K, MIN( K+1, M ) ), LDA, + $ C( MIN( K+1, M ), L ), 1 ) + SUMR = ZDOTU( L-1, C( K, 1 ), LDC, B( 1, L ), 1 ) + VEC = C( K, L ) - ( SUML+SGN*SUMR ) +* + SCALOC = ONE + A11 = A( K, K ) + SGN*B( L, L ) + DA11 = ABS( DBLE( A11 ) ) + ABS( DIMAG( A11 ) ) + IF( DA11.LE.SMIN ) THEN + A11 = SMIN + DA11 = SMIN + INFO = 1 + END IF + DB = ABS( DBLE( VEC ) ) + ABS( DIMAG( VEC ) ) + IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN + IF( DB.GT.BIGNUM*DA11 ) + $ SCALOC = ONE / DB + END IF + X11 = ZLADIV( VEC*DCMPLX( SCALOC ), A11 ) +* + IF( SCALOC.NE.ONE ) THEN + DO 10 J = 1, N + CALL ZDSCAL( M, SCALOC, C( 1, J ), 1 ) + 10 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K, L ) = X11 +* + 20 CONTINUE + 30 CONTINUE +* + ELSE IF( .NOT.NOTRNA .AND. NOTRNB ) THEN +* +* Solve A' *X + ISGN*X*B = scale*C. +* +* The (K,L)th block of X is determined starting from +* upper-left corner column by column by +* +* A'(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) +* +* Where +* K-1 L-1 +* R(K,L) = SUM [A'(I,K)*X(I,L)] + ISGN*SUM [X(K,J)*B(J,L)] +* I=1 J=1 +* + DO 60 L = 1, N + DO 50 K = 1, M +* + SUML = ZDOTC( K-1, A( 1, K ), 1, C( 1, L ), 1 ) + SUMR = ZDOTU( L-1, C( K, 1 ), LDC, B( 1, L ), 1 ) + VEC = C( K, L ) - ( SUML+SGN*SUMR ) +* + SCALOC = ONE + A11 = DCONJG( A( K, K ) ) + SGN*B( L, L ) + DA11 = ABS( DBLE( A11 ) ) + ABS( DIMAG( A11 ) ) + IF( DA11.LE.SMIN ) THEN + A11 = SMIN + DA11 = SMIN + INFO = 1 + END IF + DB = ABS( DBLE( VEC ) ) + ABS( DIMAG( VEC ) ) + IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN + IF( DB.GT.BIGNUM*DA11 ) + $ SCALOC = ONE / DB + END IF +* + X11 = ZLADIV( VEC*DCMPLX( SCALOC ), A11 ) +* + IF( SCALOC.NE.ONE ) THEN + DO 40 J = 1, N + CALL ZDSCAL( M, SCALOC, C( 1, J ), 1 ) + 40 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K, L ) = X11 +* + 50 CONTINUE + 60 CONTINUE +* + ELSE IF( .NOT.NOTRNA .AND. .NOT.NOTRNB ) THEN +* +* Solve A'*X + ISGN*X*B' = C. +* +* The (K,L)th block of X is determined starting from +* upper-right corner column by column by +* +* A'(K,K)*X(K,L) + ISGN*X(K,L)*B'(L,L) = C(K,L) - R(K,L) +* +* Where +* K-1 +* R(K,L) = SUM [A'(I,K)*X(I,L)] + +* I=1 +* N +* ISGN*SUM [X(K,J)*B'(L,J)]. +* J=L+1 +* + DO 90 L = N, 1, -1 + DO 80 K = 1, M +* + SUML = ZDOTC( K-1, A( 1, K ), 1, C( 1, L ), 1 ) + SUMR = ZDOTC( N-L, C( K, MIN( L+1, N ) ), LDC, + $ B( L, MIN( L+1, N ) ), LDB ) + VEC = C( K, L ) - ( SUML+SGN*DCONJG( SUMR ) ) +* + SCALOC = ONE + A11 = DCONJG( A( K, K )+SGN*B( L, L ) ) + DA11 = ABS( DBLE( A11 ) ) + ABS( DIMAG( A11 ) ) + IF( DA11.LE.SMIN ) THEN + A11 = SMIN + DA11 = SMIN + INFO = 1 + END IF + DB = ABS( DBLE( VEC ) ) + ABS( DIMAG( VEC ) ) + IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN + IF( DB.GT.BIGNUM*DA11 ) + $ SCALOC = ONE / DB + END IF +* + X11 = ZLADIV( VEC*DCMPLX( SCALOC ), A11 ) +* + IF( SCALOC.NE.ONE ) THEN + DO 70 J = 1, N + CALL ZDSCAL( M, SCALOC, C( 1, J ), 1 ) + 70 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K, L ) = X11 +* + 80 CONTINUE + 90 CONTINUE +* + ELSE IF( NOTRNA .AND. .NOT.NOTRNB ) THEN +* +* Solve A*X + ISGN*X*B' = C. +* +* The (K,L)th block of X is determined starting from +* bottom-left corner column by column by +* +* A(K,K)*X(K,L) + ISGN*X(K,L)*B'(L,L) = C(K,L) - R(K,L) +* +* Where +* M N +* R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B'(L,J)] +* I=K+1 J=L+1 +* + DO 120 L = N, 1, -1 + DO 110 K = M, 1, -1 +* + SUML = ZDOTU( M-K, A( K, MIN( K+1, M ) ), LDA, + $ C( MIN( K+1, M ), L ), 1 ) + SUMR = ZDOTC( N-L, C( K, MIN( L+1, N ) ), LDC, + $ B( L, MIN( L+1, N ) ), LDB ) + VEC = C( K, L ) - ( SUML+SGN*DCONJG( SUMR ) ) +* + SCALOC = ONE + A11 = A( K, K ) + SGN*DCONJG( B( L, L ) ) + DA11 = ABS( DBLE( A11 ) ) + ABS( DIMAG( A11 ) ) + IF( DA11.LE.SMIN ) THEN + A11 = SMIN + DA11 = SMIN + INFO = 1 + END IF + DB = ABS( DBLE( VEC ) ) + ABS( DIMAG( VEC ) ) + IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN + IF( DB.GT.BIGNUM*DA11 ) + $ SCALOC = ONE / DB + END IF +* + X11 = ZLADIV( VEC*DCMPLX( SCALOC ), A11 ) +* + IF( SCALOC.NE.ONE ) THEN + DO 100 J = 1, N + CALL ZDSCAL( M, SCALOC, C( 1, J ), 1 ) + 100 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K, L ) = X11 +* + 110 CONTINUE + 120 CONTINUE +* + END IF +* + RETURN +* +* End of ZTRSYL +* + END diff --git a/costa/native/external/lapack/ztrti2.f b/costa/native/external/lapack/ztrti2.f new file mode 100644 index 000000000..f8bab3b18 --- /dev/null +++ b/costa/native/external/lapack/ztrti2.f @@ -0,0 +1,147 @@ + SUBROUTINE ZTRTI2( UPLO, DIAG, N, A, LDA, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER DIAG, UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* ZTRTI2 computes the inverse of a complex upper or lower triangular +* matrix. +* +* This is the Level 2 BLAS version of the algorithm. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* Specifies whether the matrix A is upper or lower triangular. +* = 'U': Upper triangular +* = 'L': Lower triangular +* +* DIAG (input) CHARACTER*1 +* Specifies whether or not the matrix A is unit triangular. +* = 'N': Non-unit triangular +* = 'U': Unit triangular +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the triangular matrix A. If UPLO = 'U', the +* leading n by n upper triangular part of the array A contains +* the upper triangular matrix, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading n by n lower triangular part of the array A contains +* the lower triangular matrix, and the strictly upper +* triangular part of A is not referenced. If DIAG = 'U', the +* diagonal elements of A are also not referenced and are +* assumed to be 1. +* +* On exit, the (triangular) inverse of the original matrix, in +* the same storage format. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -k, the k-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J + COMPLEX*16 AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZSCAL, ZTRMV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTRTI2', -INFO ) + RETURN + END IF +* + IF( UPPER ) THEN +* +* Compute inverse of upper triangular matrix. +* + DO 10 J = 1, N + IF( NOUNIT ) THEN + A( J, J ) = ONE / A( J, J ) + AJJ = -A( J, J ) + ELSE + AJJ = -ONE + END IF +* +* Compute elements 1:j-1 of j-th column. +* + CALL ZTRMV( 'Upper', 'No transpose', DIAG, J-1, A, LDA, + $ A( 1, J ), 1 ) + CALL ZSCAL( J-1, AJJ, A( 1, J ), 1 ) + 10 CONTINUE + ELSE +* +* Compute inverse of lower triangular matrix. +* + DO 20 J = N, 1, -1 + IF( NOUNIT ) THEN + A( J, J ) = ONE / A( J, J ) + AJJ = -A( J, J ) + ELSE + AJJ = -ONE + END IF + IF( J.LT.N ) THEN +* +* Compute elements j+1:n of j-th column. +* + CALL ZTRMV( 'Lower', 'No transpose', DIAG, N-J, + $ A( J+1, J+1 ), LDA, A( J+1, J ), 1 ) + CALL ZSCAL( N-J, AJJ, A( J+1, J ), 1 ) + END IF + 20 CONTINUE + END IF +* + RETURN +* +* End of ZTRTI2 +* + END diff --git a/costa/native/external/lapack/ztrtri.f b/costa/native/external/lapack/ztrtri.f new file mode 100644 index 000000000..53c2356d4 --- /dev/null +++ b/costa/native/external/lapack/ztrtri.f @@ -0,0 +1,178 @@ + SUBROUTINE ZTRTRI( UPLO, DIAG, N, A, LDA, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER DIAG, UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ) +* .. +* +* Purpose +* ======= +* +* ZTRTRI computes the inverse of a complex upper or lower triangular +* matrix A. +* +* This is the Level 3 BLAS version of the algorithm. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the triangular matrix A. If UPLO = 'U', the +* leading N-by-N upper triangular part of the array A contains +* the upper triangular matrix, and the strictly lower +* triangular part of A is not referenced. If UPLO = 'L', the +* leading N-by-N lower triangular part of the array A contains +* the lower triangular matrix, and the strictly upper +* triangular part of A is not referenced. If DIAG = 'U', the +* diagonal elements of A are also not referenced and are +* assumed to be 1. +* On exit, the (triangular) inverse of the original matrix, in +* the same storage format. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, A(i,i) is exactly zero. The triangular +* matrix is singular and its inverse can not be computed. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J, JB, NB, NN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZTRMM, ZTRSM, ZTRTI2 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTRTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check for singularity if non-unit. +* + IF( NOUNIT ) THEN + DO 10 INFO = 1, N + IF( A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + INFO = 0 + END IF +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'ZTRTRI', UPLO // DIAG, N, -1, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code +* + CALL ZTRTI2( UPLO, DIAG, N, A, LDA, INFO ) + ELSE +* +* Use blocked code +* + IF( UPPER ) THEN +* +* Compute inverse of upper triangular matrix +* + DO 20 J = 1, N, NB + JB = MIN( NB, N-J+1 ) +* +* Compute rows 1:j-1 of current block column +* + CALL ZTRMM( 'Left', 'Upper', 'No transpose', DIAG, J-1, + $ JB, ONE, A, LDA, A( 1, J ), LDA ) + CALL ZTRSM( 'Right', 'Upper', 'No transpose', DIAG, J-1, + $ JB, -ONE, A( J, J ), LDA, A( 1, J ), LDA ) +* +* Compute inverse of current diagonal block +* + CALL ZTRTI2( 'Upper', DIAG, JB, A( J, J ), LDA, INFO ) + 20 CONTINUE + ELSE +* +* Compute inverse of lower triangular matrix +* + NN = ( ( N-1 ) / NB )*NB + 1 + DO 30 J = NN, 1, -NB + JB = MIN( NB, N-J+1 ) + IF( J+JB.LE.N ) THEN +* +* Compute rows j+jb:n of current block column +* + CALL ZTRMM( 'Left', 'Lower', 'No transpose', DIAG, + $ N-J-JB+1, JB, ONE, A( J+JB, J+JB ), LDA, + $ A( J+JB, J ), LDA ) + CALL ZTRSM( 'Right', 'Lower', 'No transpose', DIAG, + $ N-J-JB+1, JB, -ONE, A( J, J ), LDA, + $ A( J+JB, J ), LDA ) + END IF +* +* Compute inverse of current diagonal block +* + CALL ZTRTI2( 'Lower', DIAG, JB, A( J, J ), LDA, INFO ) + 30 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZTRTRI +* + END diff --git a/costa/native/external/lapack/ztrtrs.f b/costa/native/external/lapack/ztrtrs.f new file mode 100644 index 000000000..d3d8f2be6 --- /dev/null +++ b/costa/native/external/lapack/ztrtrs.f @@ -0,0 +1,149 @@ + SUBROUTINE ZTRTRS( UPLO, TRANS, DIAG, N, NRHS, A, LDA, B, LDB, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ) +* .. +* +* Purpose +* ======= +* +* ZTRTRS solves a triangular system of the form +* +* A * X = B, A**T * X = B, or A**H * X = B, +* +* where A is a triangular matrix of order N, and B is an N-by-NRHS +* matrix. A check is made to verify that A is nonsingular. +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': A is upper triangular; +* = 'L': A is lower triangular. +* +* TRANS (input) CHARACTER*1 +* Specifies the form of the system of equations: +* = 'N': A * X = B (No transpose) +* = 'T': A**T * X = B (Transpose) +* = 'C': A**H * X = B (Conjugate transpose) +* +* DIAG (input) CHARACTER*1 +* = 'N': A is non-unit triangular; +* = 'U': A is unit triangular. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* NRHS (input) INTEGER +* The number of right hand sides, i.e., the number of columns +* of the matrix B. NRHS >= 0. +* +* A (input) COMPLEX*16 array, dimension (LDA,N) +* The triangular matrix A. If UPLO = 'U', the leading N-by-N +* upper triangular part of the array A contains the upper +* triangular matrix, and the strictly lower triangular part of +* A is not referenced. If UPLO = 'L', the leading N-by-N lower +* triangular part of the array A contains the lower triangular +* matrix, and the strictly upper triangular part of A is not +* referenced. If DIAG = 'U', the diagonal elements of A are +* also not referenced and are assumed to be 1. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) +* On entry, the right hand side matrix B. +* On exit, if INFO = 0, the solution matrix X. +* +* LDB (input) INTEGER +* The leading dimension of the array B. LDB >= max(1,N). +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, the i-th diagonal element of A is zero, +* indicating that the matrix is singular and the solutions +* X have not been computed. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), + $ ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZTRSM +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT. + $ LSAME( TRANS, 'T' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTRTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check for singularity. +* + IF( NOUNIT ) THEN + DO 10 INFO = 1, N + IF( A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + END IF + INFO = 0 +* +* Solve A * x = b, A**T * x = b, or A**H * x = b. +* + CALL ZTRSM( 'Left', UPLO, TRANS, DIAG, N, NRHS, ONE, A, LDA, B, + $ LDB ) +* + RETURN +* +* End of ZTRTRS +* + END diff --git a/costa/native/external/lapack/ztzrqf.f b/costa/native/external/lapack/ztzrqf.f new file mode 100644 index 000000000..8f4278df8 --- /dev/null +++ b/costa/native/external/lapack/ztzrqf.f @@ -0,0 +1,174 @@ + SUBROUTINE ZTZRQF( M, N, A, LDA, TAU, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ) +* .. +* +* Purpose +* ======= +* +* This routine is deprecated and has been replaced by routine ZTZRZF. +* +* ZTZRQF reduces the M-by-N ( M<=N ) complex upper trapezoidal matrix A +* to upper triangular form by means of unitary transformations. +* +* The upper trapezoidal matrix A is factored as +* +* A = ( R 0 ) * Z, +* +* where Z is an N-by-N unitary matrix and R is an M-by-M upper +* triangular matrix. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= M. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the leading M-by-N upper trapezoidal part of the +* array A must contain the matrix to be factorized. +* On exit, the leading M-by-M upper triangular part of A +* contains the upper triangular matrix R, and elements M+1 to +* N of the first M rows of A, with the array TAU, represent the +* unitary matrix Z as a product of M elementary reflectors. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) COMPLEX*16 array, dimension (M) +* The scalar factors of the elementary reflectors. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* The factorization is obtained by Householder's method. The kth +* transformation matrix, Z( k ), whose conjugate transpose is used to +* introduce zeros into the (m - k + 1)th row of A, is given in the form +* +* Z( k ) = ( I 0 ), +* ( 0 T( k ) ) +* +* where +* +* T( k ) = I - tau*u( k )*u( k )', u( k ) = ( 1 ), +* ( 0 ) +* ( z( k ) ) +* +* tau is a scalar and z( k ) is an ( n - m ) element vector. +* tau and z( k ) are chosen to annihilate the elements of the kth row +* of X. +* +* The scalar tau is returned in the kth element of TAU and the vector +* u( k ) in the kth row of A, such that the elements of z( k ) are +* in a( k, m + 1 ), ..., a( k, n ). The elements of R are returned in +* the upper triangular part of A. +* +* Z is given by +* +* Z = Z( 1 ) * Z( 2 ) * ... * Z( m ). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CONE, CZERO + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ), + $ CZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, K, M1 + COMPLEX*16 ALPHA +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX, MIN +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZCOPY, ZGEMV, ZGERC, ZLACGV, + $ ZLARFG +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTZRQF', -INFO ) + RETURN + END IF +* +* Perform the factorization. +* + IF( M.EQ.0 ) + $ RETURN + IF( M.EQ.N ) THEN + DO 10 I = 1, N + TAU( I ) = CZERO + 10 CONTINUE + ELSE + M1 = MIN( M+1, N ) + DO 20 K = M, 1, -1 +* +* Use a Householder reflection to zero the kth row of A. +* First set up the reflection. +* + A( K, K ) = DCONJG( A( K, K ) ) + CALL ZLACGV( N-M, A( K, M1 ), LDA ) + ALPHA = A( K, K ) + CALL ZLARFG( N-M+1, ALPHA, A( K, M1 ), LDA, TAU( K ) ) + A( K, K ) = ALPHA + TAU( K ) = DCONJG( TAU( K ) ) +* + IF( TAU( K ).NE.CZERO .AND. K.GT.1 ) THEN +* +* We now perform the operation A := A*P( k )'. +* +* Use the first ( k - 1 ) elements of TAU to store a( k ), +* where a( k ) consists of the first ( k - 1 ) elements of +* the kth column of A. Also let B denote the first +* ( k - 1 ) rows of the last ( n - m ) columns of A. +* + CALL ZCOPY( K-1, A( 1, K ), 1, TAU, 1 ) +* +* Form w = a( k ) + B*z( k ) in TAU. +* + CALL ZGEMV( 'No transpose', K-1, N-M, CONE, A( 1, M1 ), + $ LDA, A( K, M1 ), LDA, CONE, TAU, 1 ) +* +* Now form a( k ) := a( k ) - conjg(tau)*w +* and B := B - conjg(tau)*w*z( k )'. +* + CALL ZAXPY( K-1, -DCONJG( TAU( K ) ), TAU, 1, A( 1, K ), + $ 1 ) + CALL ZGERC( K-1, N-M, -DCONJG( TAU( K ) ), TAU, 1, + $ A( K, M1 ), LDA, A( 1, M1 ), LDA ) + END IF + 20 CONTINUE + END IF +* + RETURN +* +* End of ZTZRQF +* + END diff --git a/costa/native/external/lapack/ztzrzf.f b/costa/native/external/lapack/ztzrzf.f new file mode 100644 index 000000000..87f22ba46 --- /dev/null +++ b/costa/native/external/lapack/ztzrzf.f @@ -0,0 +1,241 @@ + SUBROUTINE ZTZRZF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZTZRZF reduces the M-by-N ( M<=N ) complex upper trapezoidal matrix A +* to upper triangular form by means of unitary transformations. +* +* The upper trapezoidal matrix A is factored as +* +* A = ( R 0 ) * Z, +* +* where Z is an N-by-N unitary matrix and R is an M-by-M upper +* triangular matrix. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix A. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the leading M-by-N upper trapezoidal part of the +* array A must contain the matrix to be factorized. +* On exit, the leading M-by-M upper triangular part of A +* contains the upper triangular matrix R, and elements M+1 to +* N of the first M rows of A, with the array TAU, represent the +* unitary matrix Z as a product of M elementary reflectors. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,M). +* +* TAU (output) COMPLEX*16 array, dimension (M) +* The scalar factors of the elementary reflectors. +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,M). +* For optimum performance LWORK >= M*NB, where NB is +* the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* The factorization is obtained by Householder's method. The kth +* transformation matrix, Z( k ), which is used to introduce zeros into +* the ( m - k + 1 )th row of A, is given in the form +* +* Z( k ) = ( I 0 ), +* ( 0 T( k ) ) +* +* where +* +* T( k ) = I - tau*u( k )*u( k )', u( k ) = ( 1 ), +* ( 0 ) +* ( z( k ) ) +* +* tau is a scalar and z( k ) is an ( n - m ) element vector. +* tau and z( k ) are chosen to annihilate the elements of the kth row +* of X. +* +* The scalar tau is returned in the kth element of TAU and the vector +* u( k ) in the kth row of A, such that the elements of z( k ) are +* in a( k, m + 1 ), ..., a( k, n ). The elements of R are returned in +* the upper triangular part of A. +* +* Z is given by +* +* Z = Z( 1 ) * Z( 2 ) * ... * Z( m ). +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IWS, KI, KK, LDWORK, LWKOPT, M1, MU, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARZB, ZLARZT, ZLATRZ +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. +* + NB = ILAENV( 1, 'ZGERQF', ' ', M, N, -1, -1 ) + LWKOPT = M*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTZRZF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + ELSE IF( M.EQ.N ) THEN + DO 10 I = 1, N + TAU( I ) = ZERO + 10 CONTINUE + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 1 + IWS = M + IF( NB.GT.1 .AND. NB.LT.M ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'ZGERQF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.M ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'ZGERQF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.M .AND. NX.LT.M ) THEN +* +* Use blocked code initially. +* The last kk rows are handled by the block method. +* + M1 = MIN( M+1, N ) + KI = ( ( M-NX-1 ) / NB )*NB + KK = MIN( M, KI+NB ) +* + DO 20 I = M - KK + KI + 1, M - KK + 1, -NB + IB = MIN( M-I+1, NB ) +* +* Compute the TZ factorization of the current block +* A(i:i+ib-1,i:n) +* + CALL ZLATRZ( IB, N-I+1, N-M, A( I, I ), LDA, TAU( I ), + $ WORK ) + IF( I.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL ZLARZT( 'Backward', 'Rowwise', N-M, IB, A( I, M1 ), + $ LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(1:i-1,i:n) from the right +* + CALL ZLARZB( 'Right', 'No transpose', 'Backward', + $ 'Rowwise', I-1, N-I+1, IB, N-M, A( I, M1 ), + $ LDA, WORK, LDWORK, A( 1, I ), LDA, + $ WORK( IB+1 ), LDWORK ) + END IF + 20 CONTINUE + MU = I + NB - 1 + ELSE + MU = M + END IF +* +* Use unblocked code to factor the last or only block +* + IF( MU.GT.0 ) + $ CALL ZLATRZ( MU, N, N-M, A, LDA, TAU, WORK ) +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of ZTZRZF +* + END diff --git a/costa/native/external/lapack/zung2l.f b/costa/native/external/lapack/zung2l.f new file mode 100644 index 000000000..c3d3a1766 --- /dev/null +++ b/costa/native/external/lapack/zung2l.f @@ -0,0 +1,129 @@ + SUBROUTINE ZUNG2L( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZUNG2L generates an m by n complex matrix Q with orthonormal columns, +* which is defined as the last n columns of a product of k elementary +* reflectors of order m +* +* Q = H(k) . . . H(2) H(1) +* +* as returned by ZGEQLF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. M >= N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. N >= K >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the (n-k+i)-th column must contain the vector which +* defines the elementary reflector H(i), for i = 1,2,...,k, as +* returned by ZGEQLF in the last k columns of its array +* argument A. +* On exit, the m-by-n matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) COMPLEX*16 array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by ZGEQLF. +* +* WORK (workspace) COMPLEX*16 array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, II, J, L +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARF, ZSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNG2L', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* +* Initialise columns 1:n-k to columns of the unit matrix +* + DO 20 J = 1, N - K + DO 10 L = 1, M + A( L, J ) = ZERO + 10 CONTINUE + A( M-N+J, J ) = ONE + 20 CONTINUE +* + DO 40 I = 1, K + II = N - K + I +* +* Apply H(i) to A(1:m-k+i,1:n-k+i) from the left +* + A( M-N+II, II ) = ONE + CALL ZLARF( 'Left', M-N+II, II-1, A( 1, II ), 1, TAU( I ), A, + $ LDA, WORK ) + CALL ZSCAL( M-N+II-1, -TAU( I ), A( 1, II ), 1 ) + A( M-N+II, II ) = ONE - TAU( I ) +* +* Set A(m-k+i+1:m,n-k+i) to zero +* + DO 30 L = M - N + II + 1, M + A( L, II ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of ZUNG2L +* + END diff --git a/costa/native/external/lapack/zung2r.f b/costa/native/external/lapack/zung2r.f new file mode 100644 index 000000000..53208c673 --- /dev/null +++ b/costa/native/external/lapack/zung2r.f @@ -0,0 +1,131 @@ + SUBROUTINE ZUNG2R( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZUNG2R generates an m by n complex matrix Q with orthonormal columns, +* which is defined as the first n columns of a product of k elementary +* reflectors of order m +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by ZGEQRF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. M >= N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. N >= K >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the i-th column must contain the vector which +* defines the elementary reflector H(i), for i = 1,2,...,k, as +* returned by ZGEQRF in the first k columns of its array +* argument A. +* On exit, the m by n matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) COMPLEX*16 array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by ZGEQRF. +* +* WORK (workspace) COMPLEX*16 array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, J, L +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARF, ZSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNG2R', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* +* Initialise columns k+1:n to columns of the unit matrix +* + DO 20 J = K + 1, N + DO 10 L = 1, M + A( L, J ) = ZERO + 10 CONTINUE + A( J, J ) = ONE + 20 CONTINUE +* + DO 40 I = K, 1, -1 +* +* Apply H(i) to A(i:m,i:n) from the left +* + IF( I.LT.N ) THEN + A( I, I ) = ONE + CALL ZLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ), + $ A( I, I+1 ), LDA, WORK ) + END IF + IF( I.LT.M ) + $ CALL ZSCAL( M-I, -TAU( I ), A( I+1, I ), 1 ) + A( I, I ) = ONE - TAU( I ) +* +* Set A(1:i-1,i) to zero +* + DO 30 L = 1, I - 1 + A( L, I ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of ZUNG2R +* + END diff --git a/costa/native/external/lapack/zungbr.f b/costa/native/external/lapack/zungbr.f new file mode 100644 index 000000000..4c699781d --- /dev/null +++ b/costa/native/external/lapack/zungbr.f @@ -0,0 +1,246 @@ + SUBROUTINE ZUNGBR( VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER VECT + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZUNGBR generates one of the complex unitary matrices Q or P**H +* determined by ZGEBRD when reducing a complex matrix A to bidiagonal +* form: A = Q * B * P**H. Q and P**H are defined as products of +* elementary reflectors H(i) or G(i) respectively. +* +* If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q +* is of order M: +* if m >= k, Q = H(1) H(2) . . . H(k) and ZUNGBR returns the first n +* columns of Q, where m >= n >= k; +* if m < k, Q = H(1) H(2) . . . H(m-1) and ZUNGBR returns Q as an +* M-by-M matrix. +* +* If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**H +* is of order N: +* if k < n, P**H = G(k) . . . G(2) G(1) and ZUNGBR returns the first m +* rows of P**H, where n >= m >= k; +* if k >= n, P**H = G(n-1) . . . G(2) G(1) and ZUNGBR returns P**H as +* an N-by-N matrix. +* +* Arguments +* ========= +* +* VECT (input) CHARACTER*1 +* Specifies whether the matrix Q or the matrix P**H is +* required, as defined in the transformation applied by ZGEBRD: +* = 'Q': generate Q; +* = 'P': generate P**H. +* +* M (input) INTEGER +* The number of rows of the matrix Q or P**H to be returned. +* M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q or P**H to be returned. +* N >= 0. +* If VECT = 'Q', M >= N >= min(M,K); +* if VECT = 'P', N >= M >= min(N,K). +* +* K (input) INTEGER +* If VECT = 'Q', the number of columns in the original M-by-K +* matrix reduced by ZGEBRD. +* If VECT = 'P', the number of rows in the original K-by-N +* matrix reduced by ZGEBRD. +* K >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the vectors which define the elementary reflectors, +* as returned by ZGEBRD. +* On exit, the M-by-N matrix Q or P**H. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= M. +* +* TAU (input) COMPLEX*16 array, dimension +* (min(M,K)) if VECT = 'Q' +* (min(N,K)) if VECT = 'P' +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i) or G(i), which determines Q or P**H, as +* returned by ZGEBRD in its array argument TAUQ or TAUP. +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,min(M,N)). +* For optimum performance LWORK >= min(M,N)*NB, where NB +* is the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), + $ ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WANTQ + INTEGER I, IINFO, J, LWKOPT, MN, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZUNGLQ, ZUNGQR +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + WANTQ = LSAME( VECT, 'Q' ) + MN = MIN( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.WANTQ .AND. .NOT.LSAME( VECT, 'P' ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 .OR. ( WANTQ .AND. ( N.GT.M .OR. N.LT.MIN( M, + $ K ) ) ) .OR. ( .NOT.WANTQ .AND. ( M.GT.N .OR. M.LT. + $ MIN( N, K ) ) ) ) THEN + INFO = -3 + ELSE IF( K.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LWORK.LT.MAX( 1, MN ) .AND. .NOT.LQUERY ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( WANTQ ) THEN + NB = ILAENV( 1, 'ZUNGQR', ' ', M, N, K, -1 ) + ELSE + NB = ILAENV( 1, 'ZUNGLQ', ' ', M, N, K, -1 ) + END IF + LWKOPT = MAX( 1, MN )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNGBR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( WANTQ ) THEN +* +* Form Q, determined by a call to ZGEBRD to reduce an m-by-k +* matrix +* + IF( M.GE.K ) THEN +* +* If m >= k, assume m >= n >= k +* + CALL ZUNGQR( M, N, K, A, LDA, TAU, WORK, LWORK, IINFO ) +* + ELSE +* +* If m < k, assume m = n +* +* Shift the vectors which define the elementary reflectors one +* column to the right, and set the first row and column of Q +* to those of the unit matrix +* + DO 20 J = M, 2, -1 + A( 1, J ) = ZERO + DO 10 I = J + 1, M + A( I, J ) = A( I, J-1 ) + 10 CONTINUE + 20 CONTINUE + A( 1, 1 ) = ONE + DO 30 I = 2, M + A( I, 1 ) = ZERO + 30 CONTINUE + IF( M.GT.1 ) THEN +* +* Form Q(2:m,2:m) +* + CALL ZUNGQR( M-1, M-1, M-1, A( 2, 2 ), LDA, TAU, WORK, + $ LWORK, IINFO ) + END IF + END IF + ELSE +* +* Form P', determined by a call to ZGEBRD to reduce a k-by-n +* matrix +* + IF( K.LT.N ) THEN +* +* If k < n, assume k <= m <= n +* + CALL ZUNGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, IINFO ) +* + ELSE +* +* If k >= n, assume m = n +* +* Shift the vectors which define the elementary reflectors one +* row downward, and set the first row and column of P' to +* those of the unit matrix +* + A( 1, 1 ) = ONE + DO 40 I = 2, N + A( I, 1 ) = ZERO + 40 CONTINUE + DO 60 J = 2, N + DO 50 I = J - 1, 2, -1 + A( I, J ) = A( I-1, J ) + 50 CONTINUE + A( 1, J ) = ZERO + 60 CONTINUE + IF( N.GT.1 ) THEN +* +* Form P'(2:n,2:n) +* + CALL ZUNGLQ( N-1, N-1, N-1, A( 2, 2 ), LDA, TAU, WORK, + $ LWORK, IINFO ) + END IF + END IF + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of ZUNGBR +* + END diff --git a/costa/native/external/lapack/zunghr.f b/costa/native/external/lapack/zunghr.f new file mode 100644 index 000000000..e8cd55a8a --- /dev/null +++ b/costa/native/external/lapack/zunghr.f @@ -0,0 +1,166 @@ + SUBROUTINE ZUNGHR( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER IHI, ILO, INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZUNGHR generates a complex unitary matrix Q which is defined as the +* product of IHI-ILO elementary reflectors of order N, as returned by +* ZGEHRD: +* +* Q = H(ilo) H(ilo+1) . . . H(ihi-1). +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix Q. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* ILO and IHI must have the same values as in the previous call +* of ZGEHRD. Q is equal to the unit matrix except in the +* submatrix Q(ilo+1:ihi,ilo+1:ihi). +* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the vectors which define the elementary reflectors, +* as returned by ZGEHRD. +* On exit, the N-by-N unitary matrix Q. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* TAU (input) COMPLEX*16 array, dimension (N-1) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by ZGEHRD. +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= IHI-ILO. +* For optimum performance LWORK >= (IHI-ILO)*NB, where NB is +* the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), + $ ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IINFO, J, LWKOPT, NB, NH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZUNGQR +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NH = IHI - ILO + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -2 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, NH ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'ZUNGQR', ' ', NH, NH, NH, -1 ) + LWKOPT = MAX( 1, NH )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNGHR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Shift the vectors which define the elementary reflectors one +* column to the right, and set the first ilo and the last n-ihi +* rows and columns to those of the unit matrix +* + DO 40 J = IHI, ILO + 1, -1 + DO 10 I = 1, J - 1 + A( I, J ) = ZERO + 10 CONTINUE + DO 20 I = J + 1, IHI + A( I, J ) = A( I, J-1 ) + 20 CONTINUE + DO 30 I = IHI + 1, N + A( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + DO 60 J = 1, ILO + DO 50 I = 1, N + A( I, J ) = ZERO + 50 CONTINUE + A( J, J ) = ONE + 60 CONTINUE + DO 80 J = IHI + 1, N + DO 70 I = 1, N + A( I, J ) = ZERO + 70 CONTINUE + A( J, J ) = ONE + 80 CONTINUE +* + IF( NH.GT.0 ) THEN +* +* Generate Q(ilo+1:ihi,ilo+1:ihi) +* + CALL ZUNGQR( NH, NH, NH, A( ILO+1, ILO+1 ), LDA, TAU( ILO ), + $ WORK, LWORK, IINFO ) + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of ZUNGHR +* + END diff --git a/costa/native/external/lapack/zungl2.f b/costa/native/external/lapack/zungl2.f new file mode 100644 index 000000000..b72ad06cb --- /dev/null +++ b/costa/native/external/lapack/zungl2.f @@ -0,0 +1,137 @@ + SUBROUTINE ZUNGL2( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZUNGL2 generates an m-by-n complex matrix Q with orthonormal rows, +* which is defined as the first m rows of a product of k elementary +* reflectors of order n +* +* Q = H(k)' . . . H(2)' H(1)' +* +* as returned by ZGELQF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. N >= M. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. M >= K >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the i-th row must contain the vector which defines +* the elementary reflector H(i), for i = 1,2,...,k, as returned +* by ZGELQF in the first k rows of its array argument A. +* On exit, the m by n matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) COMPLEX*16 array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by ZGELQF. +* +* WORK (workspace) COMPLEX*16 array, dimension (M) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, J, L +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLACGV, ZLARF, ZSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNGL2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.LE.0 ) + $ RETURN +* + IF( K.LT.M ) THEN +* +* Initialise rows k+1:m to rows of the unit matrix +* + DO 20 J = 1, N + DO 10 L = K + 1, M + A( L, J ) = ZERO + 10 CONTINUE + IF( J.GT.K .AND. J.LE.M ) + $ A( J, J ) = ONE + 20 CONTINUE + END IF +* + DO 40 I = K, 1, -1 +* +* Apply H(i)' to A(i:m,i:n) from the right +* + IF( I.LT.N ) THEN + CALL ZLACGV( N-I, A( I, I+1 ), LDA ) + IF( I.LT.M ) THEN + A( I, I ) = ONE + CALL ZLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, + $ DCONJG( TAU( I ) ), A( I+1, I ), LDA, WORK ) + END IF + CALL ZSCAL( N-I, -TAU( I ), A( I, I+1 ), LDA ) + CALL ZLACGV( N-I, A( I, I+1 ), LDA ) + END IF + A( I, I ) = ONE - DCONJG( TAU( I ) ) +* +* Set A(i,1:i-1) to zero +* + DO 30 L = 1, I - 1 + A( I, L ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of ZUNGL2 +* + END diff --git a/costa/native/external/lapack/zunglq.f b/costa/native/external/lapack/zunglq.f new file mode 100644 index 000000000..5b4da67cf --- /dev/null +++ b/costa/native/external/lapack/zunglq.f @@ -0,0 +1,216 @@ + SUBROUTINE ZUNGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZUNGLQ generates an M-by-N complex matrix Q with orthonormal rows, +* which is defined as the first M rows of a product of K elementary +* reflectors of order N +* +* Q = H(k)' . . . H(2)' H(1)' +* +* as returned by ZGELQF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. N >= M. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. M >= K >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the i-th row must contain the vector which defines +* the elementary reflector H(i), for i = 1,2,...,k, as returned +* by ZGELQF in the first k rows of its array argument A. +* On exit, the M-by-N matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) COMPLEX*16 array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by ZGELQF. +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,M). +* For optimum performance LWORK >= M*NB, where NB is +* the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit; +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, KI, KK, L, LDWORK, + $ LWKOPT, NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARFB, ZLARFT, ZUNGL2 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'ZUNGLQ', ' ', M, N, K, -1 ) + LWKOPT = MAX( 1, M )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNGLQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.LE.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = M + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'ZUNGLQ', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'ZUNGLQ', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the last block. +* The first kk rows are handled by the block method. +* + KI = ( ( K-NX-1 ) / NB )*NB + KK = MIN( K, KI+NB ) +* +* Set A(kk+1:m,1:kk) to zero. +* + DO 20 J = 1, KK + DO 10 I = KK + 1, M + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the last or only block. +* + IF( KK.LT.M ) + $ CALL ZUNGL2( M-KK, N-KK, K-KK, A( KK+1, KK+1 ), LDA, + $ TAU( KK+1 ), WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = KI + 1, 1, -NB + IB = MIN( NB, K-I+1 ) + IF( I+IB.LE.M ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL ZLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, I ), + $ LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H' to A(i+ib:m,i:n) from the right +* + CALL ZLARFB( 'Right', 'Conjugate transpose', 'Forward', + $ 'Rowwise', M-I-IB+1, N-I+1, IB, A( I, I ), + $ LDA, WORK, LDWORK, A( I+IB, I ), LDA, + $ WORK( IB+1 ), LDWORK ) + END IF +* +* Apply H' to columns i:n of current block +* + CALL ZUNGL2( IB, N-I+1, IB, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* +* Set columns 1:i-1 of current block to zero +* + DO 40 J = 1, I - 1 + DO 30 L = I, I + IB - 1 + A( L, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of ZUNGLQ +* + END diff --git a/costa/native/external/lapack/zungql.f b/costa/native/external/lapack/zungql.f new file mode 100644 index 000000000..96768817c --- /dev/null +++ b/costa/native/external/lapack/zungql.f @@ -0,0 +1,214 @@ + SUBROUTINE ZUNGQL( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZUNGQL generates an M-by-N complex matrix Q with orthonormal columns, +* which is defined as the last N columns of a product of K elementary +* reflectors of order M +* +* Q = H(k) . . . H(2) H(1) +* +* as returned by ZGEQLF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. M >= N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. N >= K >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the (n-k+i)-th column must contain the vector which +* defines the elementary reflector H(i), for i = 1,2,...,k, as +* returned by ZGEQLF in the last k columns of its array +* argument A. +* On exit, the M-by-N matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) COMPLEX*16 array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by ZGEQLF. +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N). +* For optimum performance LWORK >= N*NB, where NB is the +* optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, KK, L, LDWORK, LWKOPT, + $ NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARFB, ZLARFT, ZUNG2L +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'ZUNGQL', ' ', M, N, K, -1 ) + LWKOPT = MAX( 1, N )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNGQL', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'ZUNGQL', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'ZUNGQL', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the first block. +* The last kk columns are handled by the block method. +* + KK = MIN( K, ( ( K-NX+NB-1 ) / NB )*NB ) +* +* Set A(m-kk+1:m,1:n-kk) to zero. +* + DO 20 J = 1, N - KK + DO 10 I = M - KK + 1, M + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the first or only block. +* + CALL ZUNG2L( M-KK, N-KK, K-KK, A, LDA, TAU, WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = K - KK + 1, K, NB + IB = MIN( NB, K-I+1 ) + IF( N-K+I.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL ZLARFT( 'Backward', 'Columnwise', M-K+I+IB-1, IB, + $ A( 1, N-K+I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(1:m-k+i+ib-1,1:n-k+i-1) from the left +* + CALL ZLARFB( 'Left', 'No transpose', 'Backward', + $ 'Columnwise', M-K+I+IB-1, N-K+I-1, IB, + $ A( 1, N-K+I ), LDA, WORK, LDWORK, A, LDA, + $ WORK( IB+1 ), LDWORK ) + END IF +* +* Apply H to rows 1:m-k+i+ib-1 of current block +* + CALL ZUNG2L( M-K+I+IB-1, IB, IB, A( 1, N-K+I ), LDA, + $ TAU( I ), WORK, IINFO ) +* +* Set rows m-k+i+ib:m of current block to zero +* + DO 40 J = N - K + I, N - K + I + IB - 1 + DO 30 L = M - K + I + IB, M + A( L, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of ZUNGQL +* + END diff --git a/costa/native/external/lapack/zungqr.f b/costa/native/external/lapack/zungqr.f new file mode 100644 index 000000000..dad895e10 --- /dev/null +++ b/costa/native/external/lapack/zungqr.f @@ -0,0 +1,217 @@ + SUBROUTINE ZUNGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZUNGQR generates an M-by-N complex matrix Q with orthonormal columns, +* which is defined as the first N columns of a product of K elementary +* reflectors of order M +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by ZGEQRF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. M >= N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. N >= K >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the i-th column must contain the vector which +* defines the elementary reflector H(i), for i = 1,2,...,k, as +* returned by ZGEQRF in the first k columns of its array +* argument A. +* On exit, the M-by-N matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) COMPLEX*16 array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by ZGEQRF. +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N). +* For optimum performance LWORK >= N*NB, where NB is the +* optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, KI, KK, L, LDWORK, + $ LWKOPT, NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARFB, ZLARFT, ZUNG2R +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'ZUNGQR', ' ', M, N, K, -1 ) + LWKOPT = MAX( 1, N )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNGQR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'ZUNGQR', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'ZUNGQR', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the last block. +* The first kk columns are handled by the block method. +* + KI = ( ( K-NX-1 ) / NB )*NB + KK = MIN( K, KI+NB ) +* +* Set A(1:kk,kk+1:n) to zero. +* + DO 20 J = KK + 1, N + DO 10 I = 1, KK + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the last or only block. +* + IF( KK.LT.N ) + $ CALL ZUNG2R( M-KK, N-KK, K-KK, A( KK+1, KK+1 ), LDA, + $ TAU( KK+1 ), WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = KI + 1, 1, -NB + IB = MIN( NB, K-I+1 ) + IF( I+IB.LE.N ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL ZLARFT( 'Forward', 'Columnwise', M-I+1, IB, + $ A( I, I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(i:m,i+ib:n) from the left +* + CALL ZLARFB( 'Left', 'No transpose', 'Forward', + $ 'Columnwise', M-I+1, N-I-IB+1, IB, + $ A( I, I ), LDA, WORK, LDWORK, A( I, I+IB ), + $ LDA, WORK( IB+1 ), LDWORK ) + END IF +* +* Apply H to rows i:m of current block +* + CALL ZUNG2R( M-I+1, IB, IB, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* +* Set rows 1:i-1 of current block to zero +* + DO 40 J = I, I + IB - 1 + DO 30 L = 1, I - 1 + A( L, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of ZUNGQR +* + END diff --git a/costa/native/external/lapack/zungr2.f b/costa/native/external/lapack/zungr2.f new file mode 100644 index 000000000..7254c5d66 --- /dev/null +++ b/costa/native/external/lapack/zungr2.f @@ -0,0 +1,135 @@ + SUBROUTINE ZUNGR2( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZUNGR2 generates an m by n complex matrix Q with orthonormal rows, +* which is defined as the last m rows of a product of k elementary +* reflectors of order n +* +* Q = H(1)' H(2)' . . . H(k)' +* +* as returned by ZGERQF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. N >= M. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. M >= K >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the (m-k+i)-th row must contain the vector which +* defines the elementary reflector H(i), for i = 1,2,...,k, as +* returned by ZGERQF in the last k rows of its array argument +* A. +* On exit, the m-by-n matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) COMPLEX*16 array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by ZGERQF. +* +* WORK (workspace) COMPLEX*16 array, dimension (M) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, II, J, L +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLACGV, ZLARF, ZSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNGR2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.LE.0 ) + $ RETURN +* + IF( K.LT.M ) THEN +* +* Initialise rows 1:m-k to rows of the unit matrix +* + DO 20 J = 1, N + DO 10 L = 1, M - K + A( L, J ) = ZERO + 10 CONTINUE + IF( J.GT.N-M .AND. J.LE.N-K ) + $ A( M-N+J, J ) = ONE + 20 CONTINUE + END IF +* + DO 40 I = 1, K + II = M - K + I +* +* Apply H(i)' to A(1:m-k+i,1:n-k+i) from the right +* + CALL ZLACGV( N-M+II-1, A( II, 1 ), LDA ) + A( II, N-M+II ) = ONE + CALL ZLARF( 'Right', II-1, N-M+II, A( II, 1 ), LDA, + $ DCONJG( TAU( I ) ), A, LDA, WORK ) + CALL ZSCAL( N-M+II-1, -TAU( I ), A( II, 1 ), LDA ) + CALL ZLACGV( N-M+II-1, A( II, 1 ), LDA ) + A( II, N-M+II ) = ONE - DCONJG( TAU( I ) ) +* +* Set A(m-k+i,n-k+i+1:n) to zero +* + DO 30 L = N - M + II + 1, N + A( II, L ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of ZUNGR2 +* + END diff --git a/costa/native/external/lapack/zungrq.f b/costa/native/external/lapack/zungrq.f new file mode 100644 index 000000000..b1ac7f014 --- /dev/null +++ b/costa/native/external/lapack/zungrq.f @@ -0,0 +1,215 @@ + SUBROUTINE ZUNGRQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZUNGRQ generates an M-by-N complex matrix Q with orthonormal rows, +* which is defined as the last M rows of a product of K elementary +* reflectors of order N +* +* Q = H(1)' H(2)' . . . H(k)' +* +* as returned by ZGERQF. +* +* Arguments +* ========= +* +* M (input) INTEGER +* The number of rows of the matrix Q. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix Q. N >= M. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines the +* matrix Q. M >= K >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the (m-k+i)-th row must contain the vector which +* defines the elementary reflector H(i), for i = 1,2,...,k, as +* returned by ZGERQF in the last k rows of its array argument +* A. +* On exit, the M-by-N matrix Q. +* +* LDA (input) INTEGER +* The first dimension of the array A. LDA >= max(1,M). +* +* TAU (input) COMPLEX*16 array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by ZGERQF. +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,M). +* For optimum performance LWORK >= M*NB, where NB is the +* optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument has an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, II, IINFO, IWS, J, KK, L, LDWORK, + $ LWKOPT, NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARFB, ZLARFT, ZUNGR2 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'ZUNGRQ', ' ', M, N, K, -1 ) + LWKOPT = MAX( 1, M )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNGRQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.LE.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = M + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'ZUNGRQ', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'ZUNGRQ', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the first block. +* The last kk rows are handled by the block method. +* + KK = MIN( K, ( ( K-NX+NB-1 ) / NB )*NB ) +* +* Set A(1:m-kk,n-kk+1:n) to zero. +* + DO 20 J = N - KK + 1, N + DO 10 I = 1, M - KK + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the first or only block. +* + CALL ZUNGR2( M-KK, N-KK, K-KK, A, LDA, TAU, WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = K - KK + 1, K, NB + IB = MIN( NB, K-I+1 ) + II = M - K + I + IF( II.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL ZLARFT( 'Backward', 'Rowwise', N-K+I+IB-1, IB, + $ A( II, 1 ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H' to A(1:m-k+i-1,1:n-k+i+ib-1) from the right +* + CALL ZLARFB( 'Right', 'Conjugate transpose', 'Backward', + $ 'Rowwise', II-1, N-K+I+IB-1, IB, A( II, 1 ), + $ LDA, WORK, LDWORK, A, LDA, WORK( IB+1 ), + $ LDWORK ) + END IF +* +* Apply H' to columns 1:n-k+i+ib-1 of current block +* + CALL ZUNGR2( IB, N-K+I+IB-1, IB, A( II, 1 ), LDA, TAU( I ), + $ WORK, IINFO ) +* +* Set columns n-k+i+ib:n of current block to zero +* + DO 40 L = N - K + I + IB, N + DO 30 J = II, II + IB - 1 + A( J, L ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of ZUNGRQ +* + END diff --git a/costa/native/external/lapack/zungtr.f b/costa/native/external/lapack/zungtr.f new file mode 100644 index 000000000..d731be20a --- /dev/null +++ b/costa/native/external/lapack/zungtr.f @@ -0,0 +1,185 @@ + SUBROUTINE ZUNGTR( UPLO, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZUNGTR generates a complex unitary matrix Q which is defined as the +* product of n-1 elementary reflectors of order N, as returned by +* ZHETRD: +* +* if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), +* +* if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A contains elementary reflectors +* from ZHETRD; +* = 'L': Lower triangle of A contains elementary reflectors +* from ZHETRD. +* +* N (input) INTEGER +* The order of the matrix Q. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the vectors which define the elementary reflectors, +* as returned by ZHETRD. +* On exit, the N-by-N unitary matrix Q. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= N. +* +* TAU (input) COMPLEX*16 array, dimension (N-1) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by ZHETRD. +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= N-1. +* For optimum performance LWORK >= (N-1)*NB, where NB is +* the optimal blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), + $ ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER I, IINFO, J, LWKOPT, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZUNGQL, ZUNGQR +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, N-1 ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( UPPER ) THEN + NB = ILAENV( 1, 'ZUNGQL', ' ', N-1, N-1, N-1, -1 ) + ELSE + NB = ILAENV( 1, 'ZUNGQR', ' ', N-1, N-1, N-1, -1 ) + END IF + LWKOPT = MAX( 1, N-1 )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNGTR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( UPPER ) THEN +* +* Q was determined by a call to ZHETRD with UPLO = 'U' +* +* Shift the vectors which define the elementary reflectors one +* column to the left, and set the last row and column of Q to +* those of the unit matrix +* + DO 20 J = 1, N - 1 + DO 10 I = 1, J - 1 + A( I, J ) = A( I, J+1 ) + 10 CONTINUE + A( N, J ) = ZERO + 20 CONTINUE + DO 30 I = 1, N - 1 + A( I, N ) = ZERO + 30 CONTINUE + A( N, N ) = ONE +* +* Generate Q(1:n-1,1:n-1) +* + CALL ZUNGQL( N-1, N-1, N-1, A, LDA, TAU, WORK, LWORK, IINFO ) +* + ELSE +* +* Q was determined by a call to ZHETRD with UPLO = 'L'. +* +* Shift the vectors which define the elementary reflectors one +* column to the right, and set the first row and column of Q to +* those of the unit matrix +* + DO 50 J = N, 2, -1 + A( 1, J ) = ZERO + DO 40 I = J + 1, N + A( I, J ) = A( I, J-1 ) + 40 CONTINUE + 50 CONTINUE + A( 1, 1 ) = ONE + DO 60 I = 2, N + A( I, 1 ) = ZERO + 60 CONTINUE + IF( N.GT.1 ) THEN +* +* Generate Q(2:n,2:n) +* + CALL ZUNGQR( N-1, N-1, N-1, A( 2, 2 ), LDA, TAU, WORK, + $ LWORK, IINFO ) + END IF + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of ZUNGTR +* + END diff --git a/costa/native/external/lapack/zunm2l.f b/costa/native/external/lapack/zunm2l.f new file mode 100644 index 000000000..453711701 --- /dev/null +++ b/costa/native/external/lapack/zunm2l.f @@ -0,0 +1,197 @@ + SUBROUTINE ZUNM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZUNM2L overwrites the general complex m-by-n matrix C with +* +* Q * C if SIDE = 'L' and TRANS = 'N', or +* +* Q'* C if SIDE = 'L' and TRANS = 'C', or +* +* C * Q if SIDE = 'R' and TRANS = 'N', or +* +* C * Q' if SIDE = 'R' and TRANS = 'C', +* +* where Q is a complex unitary matrix defined as the product of k +* elementary reflectors +* +* Q = H(k) . . . H(2) H(1) +* +* as returned by ZGEQLF. Q is of order m if SIDE = 'L' and of order n +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q' from the Left +* = 'R': apply Q or Q' from the Right +* +* TRANS (input) CHARACTER*1 +* = 'N': apply Q (No transpose) +* = 'C': apply Q' (Conjugate transpose) +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* A (input) COMPLEX*16 array, dimension (LDA,K) +* The i-th column must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* ZGEQLF in the last k columns of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* If SIDE = 'L', LDA >= max(1,M); +* if SIDE = 'R', LDA >= max(1,N). +* +* TAU (input) COMPLEX*16 array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by ZGEQLF. +* +* C (input/output) COMPLEX*16 array, dimension (LDC,N) +* On entry, the m-by-n matrix C. +* On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) COMPLEX*16 array, dimension +* (N) if SIDE = 'L', +* (M) if SIDE = 'R' +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, MI, NI, NQ + COMPLEX*16 AII, TAUI +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARF +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNM2L', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. NOTRAN .OR. .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) or H(i)' is applied to C(1:m-k+i,1:n) +* + MI = M - K + I + ELSE +* +* H(i) or H(i)' is applied to C(1:m,1:n-k+i) +* + NI = N - K + I + END IF +* +* Apply H(i) or H(i)' +* + IF( NOTRAN ) THEN + TAUI = TAU( I ) + ELSE + TAUI = DCONJG( TAU( I ) ) + END IF + AII = A( NQ-K+I, I ) + A( NQ-K+I, I ) = ONE + CALL ZLARF( SIDE, MI, NI, A( 1, I ), 1, TAUI, C, LDC, WORK ) + A( NQ-K+I, I ) = AII + 10 CONTINUE + RETURN +* +* End of ZUNM2L +* + END diff --git a/costa/native/external/lapack/zunm2r.f b/costa/native/external/lapack/zunm2r.f new file mode 100644 index 000000000..a17d12f93 --- /dev/null +++ b/costa/native/external/lapack/zunm2r.f @@ -0,0 +1,202 @@ + SUBROUTINE ZUNM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZUNM2R overwrites the general complex m-by-n matrix C with +* +* Q * C if SIDE = 'L' and TRANS = 'N', or +* +* Q'* C if SIDE = 'L' and TRANS = 'C', or +* +* C * Q if SIDE = 'R' and TRANS = 'N', or +* +* C * Q' if SIDE = 'R' and TRANS = 'C', +* +* where Q is a complex unitary matrix defined as the product of k +* elementary reflectors +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by ZGEQRF. Q is of order m if SIDE = 'L' and of order n +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q' from the Left +* = 'R': apply Q or Q' from the Right +* +* TRANS (input) CHARACTER*1 +* = 'N': apply Q (No transpose) +* = 'C': apply Q' (Conjugate transpose) +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* A (input) COMPLEX*16 array, dimension (LDA,K) +* The i-th column must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* ZGEQRF in the first k columns of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* If SIDE = 'L', LDA >= max(1,M); +* if SIDE = 'R', LDA >= max(1,N). +* +* TAU (input) COMPLEX*16 array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by ZGEQRF. +* +* C (input/output) COMPLEX*16 array, dimension (LDC,N) +* On entry, the m-by-n matrix C. +* On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) COMPLEX*16 array, dimension +* (N) if SIDE = 'L', +* (M) if SIDE = 'R' +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, IC, JC, MI, NI, NQ + COMPLEX*16 AII, TAUI +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARF +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNM2R', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. .NOT.NOTRAN .OR. .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) or H(i)' is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H(i) or H(i)' is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H(i) or H(i)' +* + IF( NOTRAN ) THEN + TAUI = TAU( I ) + ELSE + TAUI = DCONJG( TAU( I ) ) + END IF + AII = A( I, I ) + A( I, I ) = ONE + CALL ZLARF( SIDE, MI, NI, A( I, I ), 1, TAUI, C( IC, JC ), LDC, + $ WORK ) + A( I, I ) = AII + 10 CONTINUE + RETURN +* +* End of ZUNM2R +* + END diff --git a/costa/native/external/lapack/zunmbr.f b/costa/native/external/lapack/zunmbr.f new file mode 100644 index 000000000..ae0a86f70 --- /dev/null +++ b/costa/native/external/lapack/zunmbr.f @@ -0,0 +1,281 @@ + SUBROUTINE ZUNMBR( VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C, + $ LDC, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS, VECT + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* If VECT = 'Q', ZUNMBR overwrites the general complex M-by-N matrix C +* with +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'C': Q**H * C C * Q**H +* +* If VECT = 'P', ZUNMBR overwrites the general complex M-by-N matrix C +* with +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': P * C C * P +* TRANS = 'C': P**H * C C * P**H +* +* Here Q and P**H are the unitary matrices determined by ZGEBRD when +* reducing a complex matrix A to bidiagonal form: A = Q * B * P**H. Q +* and P**H are defined as products of elementary reflectors H(i) and +* G(i) respectively. +* +* Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the +* order of the unitary matrix Q or P**H that is applied. +* +* If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: +* if nq >= k, Q = H(1) H(2) . . . H(k); +* if nq < k, Q = H(1) H(2) . . . H(nq-1). +* +* If VECT = 'P', A is assumed to have been a K-by-NQ matrix: +* if k < nq, P = G(1) G(2) . . . G(k); +* if k >= nq, P = G(1) G(2) . . . G(nq-1). +* +* Arguments +* ========= +* +* VECT (input) CHARACTER*1 +* = 'Q': apply Q or Q**H; +* = 'P': apply P or P**H. +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q, Q**H, P or P**H from the Left; +* = 'R': apply Q, Q**H, P or P**H from the Right. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q or P; +* = 'C': Conjugate transpose, apply Q**H or P**H. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* If VECT = 'Q', the number of columns in the original +* matrix reduced by ZGEBRD. +* If VECT = 'P', the number of rows in the original +* matrix reduced by ZGEBRD. +* K >= 0. +* +* A (input) COMPLEX*16 array, dimension +* (LDA,min(nq,K)) if VECT = 'Q' +* (LDA,nq) if VECT = 'P' +* The vectors which define the elementary reflectors H(i) and +* G(i), whose products determine the matrices Q and P, as +* returned by ZGEBRD. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* If VECT = 'Q', LDA >= max(1,nq); +* if VECT = 'P', LDA >= max(1,min(nq,K)). +* +* TAU (input) COMPLEX*16 array, dimension (min(nq,K)) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i) or G(i) which determines Q or P, as returned +* by ZGEBRD in the array argument TAUQ or TAUP. +* +* C (input/output) COMPLEX*16 array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q +* or P*C or P**H*C or C*P or C*P**H. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If SIDE = 'L', LWORK >= max(1,N); +* if SIDE = 'R', LWORK >= max(1,M). +* For optimum performance LWORK >= N*NB if SIDE = 'L', and +* LWORK >= M*NB if SIDE = 'R', where NB is the optimal +* blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL APPLYQ, LEFT, LQUERY, NOTRAN + CHARACTER TRANST + INTEGER I1, I2, IINFO, LWKOPT, MI, NB, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZUNMLQ, ZUNMQR +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + APPLYQ = LSAME( VECT, 'Q' ) + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q or P and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.APPLYQ .AND. .NOT.LSAME( VECT, 'P' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( K.LT.0 ) THEN + INFO = -6 + ELSE IF( ( APPLYQ .AND. LDA.LT.MAX( 1, NQ ) ) .OR. + $ ( .NOT.APPLYQ .AND. LDA.LT.MAX( 1, MIN( NQ, K ) ) ) ) + $ THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( APPLYQ ) THEN + IF( LEFT ) THEN + NB = ILAENV( 1, 'ZUNMQR', SIDE // TRANS, M-1, N, M-1, + $ -1 ) + ELSE + NB = ILAENV( 1, 'ZUNMQR', SIDE // TRANS, M, N-1, N-1, + $ -1 ) + END IF + ELSE + IF( LEFT ) THEN + NB = ILAENV( 1, 'ZUNMLQ', SIDE // TRANS, M-1, N, M-1, + $ -1 ) + ELSE + NB = ILAENV( 1, 'ZUNMLQ', SIDE // TRANS, M, N-1, N-1, + $ -1 ) + END IF + END IF + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNMBR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + END IF +* +* Quick return if possible +* + WORK( 1 ) = 1 + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* + IF( APPLYQ ) THEN +* +* Apply Q +* + IF( NQ.GE.K ) THEN +* +* Q was determined by a call to ZGEBRD with nq >= k +* + CALL ZUNMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, IINFO ) + ELSE IF( NQ.GT.1 ) THEN +* +* Q was determined by a call to ZGEBRD with nq < k +* + IF( LEFT ) THEN + MI = M - 1 + NI = N + I1 = 2 + I2 = 1 + ELSE + MI = M + NI = N - 1 + I1 = 1 + I2 = 2 + END IF + CALL ZUNMQR( SIDE, TRANS, MI, NI, NQ-1, A( 2, 1 ), LDA, TAU, + $ C( I1, I2 ), LDC, WORK, LWORK, IINFO ) + END IF + ELSE +* +* Apply P +* + IF( NOTRAN ) THEN + TRANST = 'C' + ELSE + TRANST = 'N' + END IF + IF( NQ.GT.K ) THEN +* +* P was determined by a call to ZGEBRD with nq > k +* + CALL ZUNMLQ( SIDE, TRANST, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, IINFO ) + ELSE IF( NQ.GT.1 ) THEN +* +* P was determined by a call to ZGEBRD with nq <= k +* + IF( LEFT ) THEN + MI = M - 1 + NI = N + I1 = 2 + I2 = 1 + ELSE + MI = M + NI = N - 1 + I1 = 1 + I2 = 2 + END IF + CALL ZUNMLQ( SIDE, TRANST, MI, NI, NQ-1, A( 1, 2 ), LDA, + $ TAU, C( I1, I2 ), LDC, WORK, LWORK, IINFO ) + END IF + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of ZUNMBR +* + END diff --git a/costa/native/external/lapack/zunmhr.f b/costa/native/external/lapack/zunmhr.f new file mode 100644 index 000000000..f3b3fc71c --- /dev/null +++ b/costa/native/external/lapack/zunmhr.f @@ -0,0 +1,202 @@ + SUBROUTINE ZUNMHR( SIDE, TRANS, M, N, ILO, IHI, A, LDA, TAU, C, + $ LDC, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER IHI, ILO, INFO, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZUNMHR overwrites the general complex M-by-N matrix C with +* +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'C': Q**H * C C * Q**H +* +* where Q is a complex unitary matrix of order nq, with nq = m if +* SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of +* IHI-ILO elementary reflectors, as returned by ZGEHRD: +* +* Q = H(ilo) H(ilo+1) . . . H(ihi-1). +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q**H from the Left; +* = 'R': apply Q or Q**H from the Right. +* +* TRANS (input) CHARACTER*1 +* = 'N': apply Q (No transpose) +* = 'C': apply Q**H (Conjugate transpose) +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* ILO (input) INTEGER +* IHI (input) INTEGER +* ILO and IHI must have the same values as in the previous call +* of ZGEHRD. Q is equal to the unit matrix except in the +* submatrix Q(ilo+1:ihi,ilo+1:ihi). +* If SIDE = 'L', then 1 <= ILO <= IHI <= M, if M > 0, and +* ILO = 1 and IHI = 0, if M = 0; +* if SIDE = 'R', then 1 <= ILO <= IHI <= N, if N > 0, and +* ILO = 1 and IHI = 0, if N = 0. +* +* A (input) COMPLEX*16 array, dimension +* (LDA,M) if SIDE = 'L' +* (LDA,N) if SIDE = 'R' +* The vectors which define the elementary reflectors, as +* returned by ZGEHRD. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'. +* +* TAU (input) COMPLEX*16 array, dimension +* (M-1) if SIDE = 'L' +* (N-1) if SIDE = 'R' +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by ZGEHRD. +* +* C (input/output) COMPLEX*16 array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If SIDE = 'L', LWORK >= max(1,N); +* if SIDE = 'R', LWORK >= max(1,M). +* For optimum performance LWORK >= N*NB if SIDE = 'L', and +* LWORK >= M*NB if SIDE = 'R', where NB is the optimal +* blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LEFT, LQUERY + INTEGER I1, I2, IINFO, LWKOPT, MI, NB, NH, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZUNMQR +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NH = IHI - ILO + LEFT = LSAME( SIDE, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) + $ THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, NQ ) ) THEN + INFO = -5 + ELSE IF( IHI.LT.MIN( ILO, NQ ) .OR. IHI.GT.NQ ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( LEFT ) THEN + NB = ILAENV( 1, 'ZUNMQR', SIDE // TRANS, NH, N, NH, -1 ) + ELSE + NB = ILAENV( 1, 'ZUNMQR', SIDE // TRANS, M, NH, NH, -1 ) + END IF + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNMHR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. NH.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( LEFT ) THEN + MI = NH + NI = N + I1 = ILO + 1 + I2 = 1 + ELSE + MI = M + NI = NH + I1 = 1 + I2 = ILO + 1 + END IF +* + CALL ZUNMQR( SIDE, TRANS, MI, NI, NH, A( ILO+1, ILO ), LDA, + $ TAU( ILO ), C( I1, I2 ), LDC, WORK, LWORK, IINFO ) +* + WORK( 1 ) = LWKOPT + RETURN +* +* End of ZUNMHR +* + END diff --git a/costa/native/external/lapack/zunml2.f b/costa/native/external/lapack/zunml2.f new file mode 100644 index 000000000..650710ec9 --- /dev/null +++ b/costa/native/external/lapack/zunml2.f @@ -0,0 +1,206 @@ + SUBROUTINE ZUNML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZUNML2 overwrites the general complex m-by-n matrix C with +* +* Q * C if SIDE = 'L' and TRANS = 'N', or +* +* Q'* C if SIDE = 'L' and TRANS = 'C', or +* +* C * Q if SIDE = 'R' and TRANS = 'N', or +* +* C * Q' if SIDE = 'R' and TRANS = 'C', +* +* where Q is a complex unitary matrix defined as the product of k +* elementary reflectors +* +* Q = H(k)' . . . H(2)' H(1)' +* +* as returned by ZGELQF. Q is of order m if SIDE = 'L' and of order n +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q' from the Left +* = 'R': apply Q or Q' from the Right +* +* TRANS (input) CHARACTER*1 +* = 'N': apply Q (No transpose) +* = 'C': apply Q' (Conjugate transpose) +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* A (input) COMPLEX*16 array, dimension +* (LDA,M) if SIDE = 'L', +* (LDA,N) if SIDE = 'R' +* The i-th row must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* ZGELQF in the first k rows of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,K). +* +* TAU (input) COMPLEX*16 array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by ZGELQF. +* +* C (input/output) COMPLEX*16 array, dimension (LDC,N) +* On entry, the m-by-n matrix C. +* On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) COMPLEX*16 array, dimension +* (N) if SIDE = 'L', +* (M) if SIDE = 'R' +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, IC, JC, MI, NI, NQ + COMPLEX*16 AII, TAUI +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLACGV, ZLARF +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNML2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. NOTRAN .OR. .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) or H(i)' is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H(i) or H(i)' is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H(i) or H(i)' +* + IF( NOTRAN ) THEN + TAUI = DCONJG( TAU( I ) ) + ELSE + TAUI = TAU( I ) + END IF + IF( I.LT.NQ ) + $ CALL ZLACGV( NQ-I, A( I, I+1 ), LDA ) + AII = A( I, I ) + A( I, I ) = ONE + CALL ZLARF( SIDE, MI, NI, A( I, I ), LDA, TAUI, C( IC, JC ), + $ LDC, WORK ) + A( I, I ) = AII + IF( I.LT.NQ ) + $ CALL ZLACGV( NQ-I, A( I, I+1 ), LDA ) + 10 CONTINUE + RETURN +* +* End of ZUNML2 +* + END diff --git a/costa/native/external/lapack/zunmlq.f b/costa/native/external/lapack/zunmlq.f new file mode 100644 index 000000000..3d69f2e4e --- /dev/null +++ b/costa/native/external/lapack/zunmlq.f @@ -0,0 +1,268 @@ + SUBROUTINE ZUNMLQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZUNMLQ overwrites the general complex M-by-N matrix C with +* +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'C': Q**H * C C * Q**H +* +* where Q is a complex unitary matrix defined as the product of k +* elementary reflectors +* +* Q = H(k)' . . . H(2)' H(1)' +* +* as returned by ZGELQF. Q is of order M if SIDE = 'L' and of order N +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q**H from the Left; +* = 'R': apply Q or Q**H from the Right. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q; +* = 'C': Conjugate transpose, apply Q**H. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* A (input) COMPLEX*16 array, dimension +* (LDA,M) if SIDE = 'L', +* (LDA,N) if SIDE = 'R' +* The i-th row must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* ZGELQF in the first k rows of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,K). +* +* TAU (input) COMPLEX*16 array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by ZGELQF. +* +* C (input/output) COMPLEX*16 array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If SIDE = 'L', LWORK >= max(1,N); +* if SIDE = 'R', LWORK >= max(1,M). +* For optimum performance LWORK >= N*NB if SIDE 'L', and +* LWORK >= M*NB if SIDE = 'R', where NB is the optimal +* blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT + PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + CHARACTER TRANST + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK, + $ LWKOPT, MI, NB, NBMIN, NI, NQ, NW +* .. +* .. Local Arrays .. + COMPLEX*16 T( LDT, NBMAX ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARFB, ZLARFT, ZUNML2 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. NB may be at most NBMAX, where NBMAX +* is used to define the local array T. +* + NB = MIN( NBMAX, ILAENV( 1, 'ZUNMLQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNMLQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IWS = NW*NB + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'ZUNMLQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + ELSE + IWS = NW + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL ZUNML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, + $ IINFO ) + ELSE +* +* Use blocked code +* + IF( ( LEFT .AND. NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + IF( NOTRAN ) THEN + TRANST = 'C' + ELSE + TRANST = 'N' + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL ZLARFT( 'Forward', 'Rowwise', NQ-I+1, IB, A( I, I ), + $ LDA, TAU( I ), T, LDT ) + IF( LEFT ) THEN +* +* H or H' is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H or H' is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H or H' +* + CALL ZLARFB( SIDE, TRANST, 'Forward', 'Rowwise', MI, NI, IB, + $ A( I, I ), LDA, T, LDT, C( IC, JC ), LDC, WORK, + $ LDWORK ) + 10 CONTINUE + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of ZUNMLQ +* + END diff --git a/costa/native/external/lapack/zunmql.f b/costa/native/external/lapack/zunmql.f new file mode 100644 index 000000000..0ba198a05 --- /dev/null +++ b/costa/native/external/lapack/zunmql.f @@ -0,0 +1,257 @@ + SUBROUTINE ZUNMQL( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZUNMQL overwrites the general complex M-by-N matrix C with +* +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'C': Q**H * C C * Q**H +* +* where Q is a complex unitary matrix defined as the product of k +* elementary reflectors +* +* Q = H(k) . . . H(2) H(1) +* +* as returned by ZGEQLF. Q is of order M if SIDE = 'L' and of order N +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q**H from the Left; +* = 'R': apply Q or Q**H from the Right. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q; +* = 'C': Transpose, apply Q**H. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* A (input) COMPLEX*16 array, dimension (LDA,K) +* The i-th column must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* ZGEQLF in the last k columns of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* If SIDE = 'L', LDA >= max(1,M); +* if SIDE = 'R', LDA >= max(1,N). +* +* TAU (input) COMPLEX*16 array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by ZGEQLF. +* +* C (input/output) COMPLEX*16 array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If SIDE = 'L', LWORK >= max(1,N); +* if SIDE = 'R', LWORK >= max(1,M). +* For optimum performance LWORK >= N*NB if SIDE = 'L', and +* LWORK >= M*NB if SIDE = 'R', where NB is the optimal +* blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT + PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + INTEGER I, I1, I2, I3, IB, IINFO, IWS, LDWORK, LWKOPT, + $ MI, NB, NBMIN, NI, NQ, NW +* .. +* .. Local Arrays .. + COMPLEX*16 T( LDT, NBMAX ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARFB, ZLARFT, ZUNM2L +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. NB may be at most NBMAX, where NBMAX +* is used to define the local array T. +* + NB = MIN( NBMAX, ILAENV( 1, 'ZUNMQL', SIDE // TRANS, M, N, K, + $ -1 ) ) + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNMQL', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IWS = NW*NB + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'ZUNMQL', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + ELSE + IWS = NW + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL ZUNM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, + $ IINFO ) + ELSE +* +* Use blocked code +* + IF( ( LEFT .AND. NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL ZLARFT( 'Backward', 'Columnwise', NQ-K+I+IB-1, IB, + $ A( 1, I ), LDA, TAU( I ), T, LDT ) + IF( LEFT ) THEN +* +* H or H' is applied to C(1:m-k+i+ib-1,1:n) +* + MI = M - K + I + IB - 1 + ELSE +* +* H or H' is applied to C(1:m,1:n-k+i+ib-1) +* + NI = N - K + I + IB - 1 + END IF +* +* Apply H or H' +* + CALL ZLARFB( SIDE, TRANS, 'Backward', 'Columnwise', MI, NI, + $ IB, A( 1, I ), LDA, T, LDT, C, LDC, WORK, + $ LDWORK ) + 10 CONTINUE + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of ZUNMQL +* + END diff --git a/costa/native/external/lapack/zunmqr.f b/costa/native/external/lapack/zunmqr.f new file mode 100644 index 000000000..54168a04c --- /dev/null +++ b/costa/native/external/lapack/zunmqr.f @@ -0,0 +1,261 @@ + SUBROUTINE ZUNMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZUNMQR overwrites the general complex M-by-N matrix C with +* +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'C': Q**H * C C * Q**H +* +* where Q is a complex unitary matrix defined as the product of k +* elementary reflectors +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by ZGEQRF. Q is of order M if SIDE = 'L' and of order N +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q**H from the Left; +* = 'R': apply Q or Q**H from the Right. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q; +* = 'C': Conjugate transpose, apply Q**H. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* A (input) COMPLEX*16 array, dimension (LDA,K) +* The i-th column must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* ZGEQRF in the first k columns of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* If SIDE = 'L', LDA >= max(1,M); +* if SIDE = 'R', LDA >= max(1,N). +* +* TAU (input) COMPLEX*16 array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by ZGEQRF. +* +* C (input/output) COMPLEX*16 array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If SIDE = 'L', LWORK >= max(1,N); +* if SIDE = 'R', LWORK >= max(1,M). +* For optimum performance LWORK >= N*NB if SIDE = 'L', and +* LWORK >= M*NB if SIDE = 'R', where NB is the optimal +* blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT + PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK, + $ LWKOPT, MI, NB, NBMIN, NI, NQ, NW +* .. +* .. Local Arrays .. + COMPLEX*16 T( LDT, NBMAX ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARFB, ZLARFT, ZUNM2R +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. NB may be at most NBMAX, where NBMAX +* is used to define the local array T. +* + NB = MIN( NBMAX, ILAENV( 1, 'ZUNMQR', SIDE // TRANS, M, N, K, + $ -1 ) ) + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNMQR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IWS = NW*NB + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'ZUNMQR', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + ELSE + IWS = NW + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL ZUNM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, + $ IINFO ) + ELSE +* +* Use blocked code +* + IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL ZLARFT( 'Forward', 'Columnwise', NQ-I+1, IB, A( I, I ), + $ LDA, TAU( I ), T, LDT ) + IF( LEFT ) THEN +* +* H or H' is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H or H' is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H or H' +* + CALL ZLARFB( SIDE, TRANS, 'Forward', 'Columnwise', MI, NI, + $ IB, A( I, I ), LDA, T, LDT, C( IC, JC ), LDC, + $ WORK, LDWORK ) + 10 CONTINUE + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of ZUNMQR +* + END diff --git a/costa/native/external/lapack/zunmr2.f b/costa/native/external/lapack/zunmr2.f new file mode 100644 index 000000000..15f403efc --- /dev/null +++ b/costa/native/external/lapack/zunmr2.f @@ -0,0 +1,199 @@ + SUBROUTINE ZUNMR2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZUNMR2 overwrites the general complex m-by-n matrix C with +* +* Q * C if SIDE = 'L' and TRANS = 'N', or +* +* Q'* C if SIDE = 'L' and TRANS = 'C', or +* +* C * Q if SIDE = 'R' and TRANS = 'N', or +* +* C * Q' if SIDE = 'R' and TRANS = 'C', +* +* where Q is a complex unitary matrix defined as the product of k +* elementary reflectors +* +* Q = H(1)' H(2)' . . . H(k)' +* +* as returned by ZGERQF. Q is of order m if SIDE = 'L' and of order n +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q' from the Left +* = 'R': apply Q or Q' from the Right +* +* TRANS (input) CHARACTER*1 +* = 'N': apply Q (No transpose) +* = 'C': apply Q' (Conjugate transpose) +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* A (input) COMPLEX*16 array, dimension +* (LDA,M) if SIDE = 'L', +* (LDA,N) if SIDE = 'R' +* The i-th row must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* ZGERQF in the last k rows of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,K). +* +* TAU (input) COMPLEX*16 array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by ZGERQF. +* +* C (input/output) COMPLEX*16 array, dimension (LDC,N) +* On entry, the m-by-n matrix C. +* On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) COMPLEX*16 array, dimension +* (N) if SIDE = 'L', +* (M) if SIDE = 'R' +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, MI, NI, NQ + COMPLEX*16 AII, TAUI +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLACGV, ZLARF +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNMR2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. .NOT.NOTRAN .OR. .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) or H(i)' is applied to C(1:m-k+i,1:n) +* + MI = M - K + I + ELSE +* +* H(i) or H(i)' is applied to C(1:m,1:n-k+i) +* + NI = N - K + I + END IF +* +* Apply H(i) or H(i)' +* + IF( NOTRAN ) THEN + TAUI = DCONJG( TAU( I ) ) + ELSE + TAUI = TAU( I ) + END IF + CALL ZLACGV( NQ-K+I-1, A( I, 1 ), LDA ) + AII = A( I, NQ-K+I ) + A( I, NQ-K+I ) = ONE + CALL ZLARF( SIDE, MI, NI, A( I, 1 ), LDA, TAUI, C, LDC, WORK ) + A( I, NQ-K+I ) = AII + CALL ZLACGV( NQ-K+I-1, A( I, 1 ), LDA ) + 10 CONTINUE + RETURN +* +* End of ZUNMR2 +* + END diff --git a/costa/native/external/lapack/zunmr3.f b/costa/native/external/lapack/zunmr3.f new file mode 100644 index 000000000..204b7bfd1 --- /dev/null +++ b/costa/native/external/lapack/zunmr3.f @@ -0,0 +1,213 @@ + SUBROUTINE ZUNMR3( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, L, LDA, LDC, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZUNMR3 overwrites the general complex m by n matrix C with +* +* Q * C if SIDE = 'L' and TRANS = 'N', or +* +* Q'* C if SIDE = 'L' and TRANS = 'C', or +* +* C * Q if SIDE = 'R' and TRANS = 'N', or +* +* C * Q' if SIDE = 'R' and TRANS = 'C', +* +* where Q is a complex unitary matrix defined as the product of k +* elementary reflectors +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by ZTZRZF. Q is of order m if SIDE = 'L' and of order n +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q' from the Left +* = 'R': apply Q or Q' from the Right +* +* TRANS (input) CHARACTER*1 +* = 'N': apply Q (No transpose) +* = 'C': apply Q' (Conjugate transpose) +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* L (input) INTEGER +* The number of columns of the matrix A containing +* the meaningful part of the Householder reflectors. +* If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. +* +* A (input) COMPLEX*16 array, dimension +* (LDA,M) if SIDE = 'L', +* (LDA,N) if SIDE = 'R' +* The i-th row must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* ZTZRZF in the last k rows of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,K). +* +* TAU (input) COMPLEX*16 array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by ZTZRZF. +* +* C (input/output) COMPLEX*16 array, dimension (LDC,N) +* On entry, the m-by-n matrix C. +* On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) COMPLEX*16 array, dimension +* (N) if SIDE = 'L', +* (M) if SIDE = 'R' +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, IC, JA, JC, MI, NI, NQ + COMPLEX*16 TAUI +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARZ +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( L.LT.0 .OR. ( LEFT .AND. ( L.GT.M ) ) .OR. + $ ( .NOT.LEFT .AND. ( L.GT.N ) ) ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNMR3', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. .NOT.NOTRAN .OR. .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + JA = M - L + 1 + JC = 1 + ELSE + MI = M + JA = N - L + 1 + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) or H(i)' is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H(i) or H(i)' is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H(i) or H(i)' +* + IF( NOTRAN ) THEN + TAUI = TAU( I ) + ELSE + TAUI = DCONJG( TAU( I ) ) + END IF + CALL ZLARZ( SIDE, MI, NI, L, A( I, JA ), LDA, TAUI, + $ C( IC, JC ), LDC, WORK ) +* + 10 CONTINUE +* + RETURN +* +* End of ZUNMR3 +* + END diff --git a/costa/native/external/lapack/zunmrq.f b/costa/native/external/lapack/zunmrq.f new file mode 100644 index 000000000..edee06e96 --- /dev/null +++ b/costa/native/external/lapack/zunmrq.f @@ -0,0 +1,264 @@ + SUBROUTINE ZUNMRQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZUNMRQ overwrites the general complex M-by-N matrix C with +* +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'C': Q**H * C C * Q**H +* +* where Q is a complex unitary matrix defined as the product of k +* elementary reflectors +* +* Q = H(1)' H(2)' . . . H(k)' +* +* as returned by ZGERQF. Q is of order M if SIDE = 'L' and of order N +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q**H from the Left; +* = 'R': apply Q or Q**H from the Right. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q; +* = 'C': Transpose, apply Q**H. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* A (input) COMPLEX*16 array, dimension +* (LDA,M) if SIDE = 'L', +* (LDA,N) if SIDE = 'R' +* The i-th row must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* ZGERQF in the last k rows of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,K). +* +* TAU (input) COMPLEX*16 array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by ZGERQF. +* +* C (input/output) COMPLEX*16 array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If SIDE = 'L', LWORK >= max(1,N); +* if SIDE = 'R', LWORK >= max(1,M). +* For optimum performance LWORK >= N*NB if SIDE = 'L', and +* LWORK >= M*NB if SIDE = 'R', where NB is the optimal +* blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT + PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + CHARACTER TRANST + INTEGER I, I1, I2, I3, IB, IINFO, IWS, LDWORK, LWKOPT, + $ MI, NB, NBMIN, NI, NQ, NW +* .. +* .. Local Arrays .. + COMPLEX*16 T( LDT, NBMAX ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARFB, ZLARFT, ZUNMR2 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. NB may be at most NBMAX, where NBMAX +* is used to define the local array T. +* + NB = MIN( NBMAX, ILAENV( 1, 'ZUNMRQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNMRQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IWS = NW*NB + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'ZUNMRQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + ELSE + IWS = NW + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL ZUNMR2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, + $ IINFO ) + ELSE +* +* Use blocked code +* + IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + IF( NOTRAN ) THEN + TRANST = 'C' + ELSE + TRANST = 'N' + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL ZLARFT( 'Backward', 'Rowwise', NQ-K+I+IB-1, IB, + $ A( I, 1 ), LDA, TAU( I ), T, LDT ) + IF( LEFT ) THEN +* +* H or H' is applied to C(1:m-k+i+ib-1,1:n) +* + MI = M - K + I + IB - 1 + ELSE +* +* H or H' is applied to C(1:m,1:n-k+i+ib-1) +* + NI = N - K + I + IB - 1 + END IF +* +* Apply H or H' +* + CALL ZLARFB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI, + $ IB, A( I, 1 ), LDA, T, LDT, C, LDC, WORK, + $ LDWORK ) + 10 CONTINUE + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of ZUNMRQ +* + END diff --git a/costa/native/external/lapack/zunmrz.f b/costa/native/external/lapack/zunmrz.f new file mode 100644 index 000000000..39eb863e5 --- /dev/null +++ b/costa/native/external/lapack/zunmrz.f @@ -0,0 +1,293 @@ + SUBROUTINE ZUNMRZ( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, L, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZUNMRZ overwrites the general complex M-by-N matrix C with +* +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'C': Q**H * C C * Q**H +* +* where Q is a complex unitary matrix defined as the product of k +* elementary reflectors +* +* Q = H(1) H(2) . . . H(k) +* +* as returned by ZTZRZF. Q is of order M if SIDE = 'L' and of order N +* if SIDE = 'R'. +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q**H from the Left; +* = 'R': apply Q or Q**H from the Right. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q; +* = 'C': Conjugate transpose, apply Q**H. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* K (input) INTEGER +* The number of elementary reflectors whose product defines +* the matrix Q. +* If SIDE = 'L', M >= K >= 0; +* if SIDE = 'R', N >= K >= 0. +* +* L (input) INTEGER +* The number of columns of the matrix A containing +* the meaningful part of the Householder reflectors. +* If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. +* +* A (input) COMPLEX*16 array, dimension +* (LDA,M) if SIDE = 'L', +* (LDA,N) if SIDE = 'R' +* The i-th row must contain the vector which defines the +* elementary reflector H(i), for i = 1,2,...,k, as returned by +* ZTZRZF in the last k rows of its array argument A. +* A is modified by the routine but restored on exit. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,K). +* +* TAU (input) COMPLEX*16 array, dimension (K) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by ZTZRZF. +* +* C (input/output) COMPLEX*16 array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If SIDE = 'L', LWORK >= max(1,N); +* if SIDE = 'R', LWORK >= max(1,M). +* For optimum performance LWORK >= N*NB if SIDE = 'L', and +* LWORK >= M*NB if SIDE = 'R', where NB is the optimal +* blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* Further Details +* =============== +* +* Based on contributions by +* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT + PARAMETER ( NBMAX = 64, LDT = NBMAX+1 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + CHARACTER TRANST + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWS, JA, JC, + $ LDWORK, LWKOPT, MI, NB, NBMIN, NI, NQ, NW +* .. +* .. Local Arrays .. + COMPLEX*16 T( LDT, NBMAX ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARZB, ZLARZT, ZUNMR3 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( L.LT.0 .OR. ( LEFT .AND. ( L.GT.M ) ) .OR. + $ ( .NOT.LEFT .AND. ( L.GT.N ) ) ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. NB may be at most NBMAX, where NBMAX +* is used to define the local array T. +* + NB = MIN( NBMAX, ILAENV( 1, 'ZUNMRQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNMRZ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Determine the block size. NB may be at most NBMAX, where NBMAX +* is used to define the local array T. +* + NB = MIN( NBMAX, ILAENV( 1, 'ZUNMRQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IWS = NW*NB + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'ZUNMRQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + ELSE + IWS = NW + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL ZUNMR3( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC, + $ WORK, IINFO ) + ELSE +* +* Use blocked code +* + IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + JA = M - L + 1 + ELSE + MI = M + IC = 1 + JA = N - L + 1 + END IF +* + IF( NOTRAN ) THEN + TRANST = 'C' + ELSE + TRANST = 'N' + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL ZLARZT( 'Backward', 'Rowwise', L, IB, A( I, JA ), LDA, + $ TAU( I ), T, LDT ) +* + IF( LEFT ) THEN +* +* H or H' is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H or H' is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H or H' +* + CALL ZLARZB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI, + $ IB, L, A( I, JA ), LDA, T, LDT, C( IC, JC ), + $ LDC, WORK, LDWORK ) + 10 CONTINUE +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of ZUNMRZ +* + END diff --git a/costa/native/external/lapack/zunmtr.f b/costa/native/external/lapack/zunmtr.f new file mode 100644 index 000000000..8fc2f9fda --- /dev/null +++ b/costa/native/external/lapack/zunmtr.f @@ -0,0 +1,223 @@ + SUBROUTINE ZUNMTR( SIDE, UPLO, TRANS, M, N, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS, UPLO + INTEGER INFO, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZUNMTR overwrites the general complex M-by-N matrix C with +* +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'C': Q**H * C C * Q**H +* +* where Q is a complex unitary matrix of order nq, with nq = m if +* SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of +* nq-1 elementary reflectors, as returned by ZHETRD: +* +* if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); +* +* if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q**H from the Left; +* = 'R': apply Q or Q**H from the Right. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangle of A contains elementary reflectors +* from ZHETRD; +* = 'L': Lower triangle of A contains elementary reflectors +* from ZHETRD. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q; +* = 'C': Conjugate transpose, apply Q**H. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* A (input) COMPLEX*16 array, dimension +* (LDA,M) if SIDE = 'L' +* (LDA,N) if SIDE = 'R' +* The vectors which define the elementary reflectors, as +* returned by ZHETRD. +* +* LDA (input) INTEGER +* The leading dimension of the array A. +* LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'. +* +* TAU (input) COMPLEX*16 array, dimension +* (M-1) if SIDE = 'L' +* (N-1) if SIDE = 'R' +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by ZHETRD. +* +* C (input/output) COMPLEX*16 array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. +* If SIDE = 'L', LWORK >= max(1,N); +* if SIDE = 'R', LWORK >= max(1,M). +* For optimum performance LWORK >= N*NB if SIDE = 'L', and +* LWORK >=M*NB if SIDE = 'R', where NB is the optimal +* blocksize. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, UPPER + INTEGER I1, I2, IINFO, LWKOPT, MI, NB, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZUNMQL, ZUNMQR +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) + $ THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( UPPER ) THEN + IF( LEFT ) THEN + NB = ILAENV( 1, 'ZUNMQL', SIDE // TRANS, M-1, N, M-1, + $ -1 ) + ELSE + NB = ILAENV( 1, 'ZUNMQL', SIDE // TRANS, M, N-1, N-1, + $ -1 ) + END IF + ELSE + IF( LEFT ) THEN + NB = ILAENV( 1, 'ZUNMQR', SIDE // TRANS, M-1, N, M-1, + $ -1 ) + ELSE + NB = ILAENV( 1, 'ZUNMQR', SIDE // TRANS, M, N-1, N-1, + $ -1 ) + END IF + END IF + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNMTR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. NQ.EQ.1 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( LEFT ) THEN + MI = M - 1 + NI = N + ELSE + MI = M + NI = N - 1 + END IF +* + IF( UPPER ) THEN +* +* Q was determined by a call to ZHETRD with UPLO = 'U' +* + CALL ZUNMQL( SIDE, TRANS, MI, NI, NQ-1, A( 1, 2 ), LDA, TAU, C, + $ LDC, WORK, LWORK, IINFO ) + ELSE +* +* Q was determined by a call to ZHETRD with UPLO = 'L' +* + IF( LEFT ) THEN + I1 = 2 + I2 = 1 + ELSE + I1 = 1 + I2 = 2 + END IF + CALL ZUNMQR( SIDE, TRANS, MI, NI, NQ-1, A( 2, 1 ), LDA, TAU, + $ C( I1, I2 ), LDC, WORK, LWORK, IINFO ) + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of ZUNMTR +* + END diff --git a/costa/native/external/lapack/zupgtr.f b/costa/native/external/lapack/zupgtr.f new file mode 100644 index 000000000..d44124bfd --- /dev/null +++ b/costa/native/external/lapack/zupgtr.f @@ -0,0 +1,162 @@ + SUBROUTINE ZUPGTR( UPLO, N, AP, TAU, Q, LDQ, WORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDQ, N +* .. +* .. Array Arguments .. + COMPLEX*16 AP( * ), Q( LDQ, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZUPGTR generates a complex unitary matrix Q which is defined as the +* product of n-1 elementary reflectors H(i) of order n, as returned by +* ZHPTRD using packed storage: +* +* if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), +* +* if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). +* +* Arguments +* ========= +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangular packed storage used in previous +* call to ZHPTRD; +* = 'L': Lower triangular packed storage used in previous +* call to ZHPTRD. +* +* N (input) INTEGER +* The order of the matrix Q. N >= 0. +* +* AP (input) COMPLEX*16 array, dimension (N*(N+1)/2) +* The vectors which define the elementary reflectors, as +* returned by ZHPTRD. +* +* TAU (input) COMPLEX*16 array, dimension (N-1) +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by ZHPTRD. +* +* Q (output) COMPLEX*16 array, dimension (LDQ,N) +* The N-by-N unitary matrix Q. +* +* LDQ (input) INTEGER +* The leading dimension of the array Q. LDQ >= max(1,N). +* +* WORK (workspace) COMPLEX*16 array, dimension (N-1) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IINFO, IJ, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZUNG2L, ZUNG2R +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUPGTR', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Q was determined by a call to ZHPTRD with UPLO = 'U' +* +* Unpack the vectors which define the elementary reflectors and +* set the last row and column of Q equal to those of the unit +* matrix +* + IJ = 2 + DO 20 J = 1, N - 1 + DO 10 I = 1, J - 1 + Q( I, J ) = AP( IJ ) + IJ = IJ + 1 + 10 CONTINUE + IJ = IJ + 2 + Q( N, J ) = CZERO + 20 CONTINUE + DO 30 I = 1, N - 1 + Q( I, N ) = CZERO + 30 CONTINUE + Q( N, N ) = CONE +* +* Generate Q(1:n-1,1:n-1) +* + CALL ZUNG2L( N-1, N-1, N-1, Q, LDQ, TAU, WORK, IINFO ) +* + ELSE +* +* Q was determined by a call to ZHPTRD with UPLO = 'L'. +* +* Unpack the vectors which define the elementary reflectors and +* set the first row and column of Q equal to those of the unit +* matrix +* + Q( 1, 1 ) = CONE + DO 40 I = 2, N + Q( I, 1 ) = CZERO + 40 CONTINUE + IJ = 3 + DO 60 J = 2, N + Q( 1, J ) = CZERO + DO 50 I = J + 1, N + Q( I, J ) = AP( IJ ) + IJ = IJ + 1 + 50 CONTINUE + IJ = IJ + 2 + 60 CONTINUE + IF( N.GT.1 ) THEN +* +* Generate Q(2:n,2:n) +* + CALL ZUNG2R( N-1, N-1, N-1, Q( 2, 2 ), LDQ, TAU, WORK, + $ IINFO ) + END IF + END IF + RETURN +* +* End of ZUPGTR +* + END diff --git a/costa/native/external/lapack/zupmtr.f b/costa/native/external/lapack/zupmtr.f new file mode 100644 index 000000000..9156336e8 --- /dev/null +++ b/costa/native/external/lapack/zupmtr.f @@ -0,0 +1,268 @@ + SUBROUTINE ZUPMTR( SIDE, UPLO, TRANS, M, N, AP, TAU, C, LDC, WORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS, UPLO + INTEGER INFO, LDC, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 AP( * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZUPMTR overwrites the general complex M-by-N matrix C with +* +* SIDE = 'L' SIDE = 'R' +* TRANS = 'N': Q * C C * Q +* TRANS = 'C': Q**H * C C * Q**H +* +* where Q is a complex unitary matrix of order nq, with nq = m if +* SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of +* nq-1 elementary reflectors, as returned by ZHPTRD using packed +* storage: +* +* if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); +* +* if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). +* +* Arguments +* ========= +* +* SIDE (input) CHARACTER*1 +* = 'L': apply Q or Q**H from the Left; +* = 'R': apply Q or Q**H from the Right. +* +* UPLO (input) CHARACTER*1 +* = 'U': Upper triangular packed storage used in previous +* call to ZHPTRD; +* = 'L': Lower triangular packed storage used in previous +* call to ZHPTRD. +* +* TRANS (input) CHARACTER*1 +* = 'N': No transpose, apply Q; +* = 'C': Conjugate transpose, apply Q**H. +* +* M (input) INTEGER +* The number of rows of the matrix C. M >= 0. +* +* N (input) INTEGER +* The number of columns of the matrix C. N >= 0. +* +* AP (input) COMPLEX*16 array, dimension +* (M*(M+1)/2) if SIDE = 'L' +* (N*(N+1)/2) if SIDE = 'R' +* The vectors which define the elementary reflectors, as +* returned by ZHPTRD. AP is modified by the routine but +* restored on exit. +* +* TAU (input) COMPLEX*16 array, dimension (M-1) if SIDE = 'L' +* or (N-1) if SIDE = 'R' +* TAU(i) must contain the scalar factor of the elementary +* reflector H(i), as returned by ZHPTRD. +* +* C (input/output) COMPLEX*16 array, dimension (LDC,N) +* On entry, the M-by-N matrix C. +* On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +* +* LDC (input) INTEGER +* The leading dimension of the array C. LDC >= max(1,M). +* +* WORK (workspace) COMPLEX*16 array, dimension +* (N) if SIDE = 'L' +* (M) if SIDE = 'R' +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL FORWRD, LEFT, NOTRAN, UPPER + INTEGER I, I1, I2, I3, IC, II, JC, MI, NI, NQ + COMPLEX*16 AII, TAUI +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARF +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + UPPER = LSAME( UPLO, 'U' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUPMTR', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Q was determined by a call to ZHPTRD with UPLO = 'U' +* + FORWRD = ( LEFT .AND. NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) +* + IF( FORWRD ) THEN + I1 = 1 + I2 = NQ - 1 + I3 = 1 + II = 2 + ELSE + I1 = NQ - 1 + I2 = 1 + I3 = -1 + II = NQ*( NQ+1 ) / 2 - 1 + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) or H(i)' is applied to C(1:i,1:n) +* + MI = I + ELSE +* +* H(i) or H(i)' is applied to C(1:m,1:i) +* + NI = I + END IF +* +* Apply H(i) or H(i)' +* + IF( NOTRAN ) THEN + TAUI = TAU( I ) + ELSE + TAUI = DCONJG( TAU( I ) ) + END IF + AII = AP( II ) + AP( II ) = ONE + CALL ZLARF( SIDE, MI, NI, AP( II-I+1 ), 1, TAUI, C, LDC, + $ WORK ) + AP( II ) = AII +* + IF( FORWRD ) THEN + II = II + I + 2 + ELSE + II = II - I - 1 + END IF + 10 CONTINUE + ELSE +* +* Q was determined by a call to ZHPTRD with UPLO = 'L'. +* + FORWRD = ( LEFT .AND. .NOT.NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. NOTRAN ) +* + IF( FORWRD ) THEN + I1 = 1 + I2 = NQ - 1 + I3 = 1 + II = 2 + ELSE + I1 = NQ - 1 + I2 = 1 + I3 = -1 + II = NQ*( NQ+1 ) / 2 - 1 + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 20 I = I1, I2, I3 + AII = AP( II ) + AP( II ) = ONE + IF( LEFT ) THEN +* +* H(i) or H(i)' is applied to C(i+1:m,1:n) +* + MI = M - I + IC = I + 1 + ELSE +* +* H(i) or H(i)' is applied to C(1:m,i+1:n) +* + NI = N - I + JC = I + 1 + END IF +* +* Apply H(i) or H(i)' +* + IF( NOTRAN ) THEN + TAUI = TAU( I ) + ELSE + TAUI = DCONJG( TAU( I ) ) + END IF + CALL ZLARF( SIDE, MI, NI, AP( II ), 1, TAUI, C( IC, JC ), + $ LDC, WORK ) + AP( II ) = AII +* + IF( FORWRD ) THEN + II = II + NQ - I + 1 + ELSE + II = II - NQ + I - 2 + END IF + 20 CONTINUE + END IF + RETURN +* +* End of ZUPMTR +* + END From a4713b1eac8349c8a1f0d7cecaa5d20f4a6ad04a Mon Sep 17 00:00:00 2001 From: Werner Kramer Date: Mon, 2 Jan 2023 13:20:06 +0100 Subject: [PATCH 02/15] Use system jni and libxml2 --- costa/native/CMakeLists.txt | 11 + costa/native/README.md | 13 + costa/native/cta/src/CMakeLists.txt | 5 +- costa/native/cta_f90/generated/CMakeLists.txt | 19 + costa/native/external/blas/CMakeLists.txt | 21 + costa/native/external/lapack/CMakeLists.txt | 106 +++ .../jni_cta_CtaObsdescr_NativeToJava.c | 258 ++++++ .../jni_cta_CtaObsdescr_NativeToJava.h | 33 + .../openda/bridge/include/jni_cta_utils.h | 45 + .../openda/bridge/include/jni_datatypes.h | 31 + .../include/org_openda_costa_CtaArray.h | 213 +++++ .../include/org_openda_costa_CtaInitialize.h | 29 + .../include/org_openda_costa_CtaModelState.h | 21 + .../include/org_openda_costa_CtaObject.h | 21 + ..._openda_costa_CtaObservationDescriptions.h | 77 ++ .../include/org_openda_costa_CtaOpenDaModel.h | 189 +++++ .../include/org_openda_costa_CtaParallel.h | 29 + .../org_openda_costa_CtaRelationTable.h | 61 ++ .../org_openda_costa_CtaStochObserver.h | 101 +++ .../bridge/include/org_openda_costa_CtaTime.h | 157 ++++ .../include/org_openda_costa_CtaTreeVector.h | 137 +++ .../include/org_openda_costa_CtaUtils.h | 21 + .../include/org_openda_costa_CtaVector.h | 155 ++++ ..._openda_resultwriters_NativeResultWriter.h | 45 + ...a_resultwriters_NetcdfResultWriterNative.h | 29 + costa/native/openda/bridge/src/CMakeLists.txt | 26 + .../openda/bridge/src/jni_cta_utils.cpp | 220 +++++ .../bridge/src/org_openda_costa_CtaArray.cpp | 600 +++++++++++++ .../src/org_openda_costa_CtaInitialize.cpp | 85 ++ .../src/org_openda_costa_CtaModelState.cpp | 63 ++ .../bridge/src/org_openda_costa_CtaObject.cpp | 39 + ...penda_costa_CtaObservationDescriptions.cpp | 307 +++++++ .../src/org_openda_costa_CtaOpenDaModel.cpp | 798 ++++++++++++++++++ .../src/org_openda_costa_CtaParallel.cpp | 83 ++ .../src/org_openda_costa_CtaRelationTable.cpp | 171 ++++ .../src/org_openda_costa_CtaStochObserver.cpp | 320 +++++++ .../bridge/src/org_openda_costa_CtaTime.cpp | 286 +++++++ .../src/org_openda_costa_CtaTreeVector.cpp | 513 +++++++++++ .../bridge/src/org_openda_costa_CtaUtils.cpp | 39 + .../bridge/src/org_openda_costa_CtaVector.cpp | 547 ++++++++++++ ...penda_resultwriters_NativeResultWriter.cpp | 122 +++ ...resultwriters_NetcdfResultWriterNative.cpp | 111 +++ 42 files changed, 6156 insertions(+), 1 deletion(-) create mode 100644 costa/native/CMakeLists.txt create mode 100644 costa/native/README.md create mode 100644 costa/native/cta_f90/generated/CMakeLists.txt create mode 100644 costa/native/external/blas/CMakeLists.txt create mode 100644 costa/native/external/lapack/CMakeLists.txt create mode 100644 costa/native/openda/bridge/include/jni_cta_CtaObsdescr_NativeToJava.c create mode 100644 costa/native/openda/bridge/include/jni_cta_CtaObsdescr_NativeToJava.h create mode 100644 costa/native/openda/bridge/include/jni_cta_utils.h create mode 100644 costa/native/openda/bridge/include/jni_datatypes.h create mode 100644 costa/native/openda/bridge/include/org_openda_costa_CtaArray.h create mode 100644 costa/native/openda/bridge/include/org_openda_costa_CtaInitialize.h create mode 100644 costa/native/openda/bridge/include/org_openda_costa_CtaModelState.h create mode 100644 costa/native/openda/bridge/include/org_openda_costa_CtaObject.h create mode 100644 costa/native/openda/bridge/include/org_openda_costa_CtaObservationDescriptions.h create mode 100644 costa/native/openda/bridge/include/org_openda_costa_CtaOpenDaModel.h create mode 100644 costa/native/openda/bridge/include/org_openda_costa_CtaParallel.h create mode 100644 costa/native/openda/bridge/include/org_openda_costa_CtaRelationTable.h create mode 100644 costa/native/openda/bridge/include/org_openda_costa_CtaStochObserver.h create mode 100644 costa/native/openda/bridge/include/org_openda_costa_CtaTime.h create mode 100644 costa/native/openda/bridge/include/org_openda_costa_CtaTreeVector.h create mode 100644 costa/native/openda/bridge/include/org_openda_costa_CtaUtils.h create mode 100644 costa/native/openda/bridge/include/org_openda_costa_CtaVector.h create mode 100644 costa/native/openda/bridge/include/org_openda_resultwriters_NativeResultWriter.h create mode 100644 costa/native/openda/bridge/include/org_openda_resultwriters_NetcdfResultWriterNative.h create mode 100644 costa/native/openda/bridge/src/CMakeLists.txt create mode 100644 costa/native/openda/bridge/src/jni_cta_utils.cpp create mode 100644 costa/native/openda/bridge/src/org_openda_costa_CtaArray.cpp create mode 100644 costa/native/openda/bridge/src/org_openda_costa_CtaInitialize.cpp create mode 100644 costa/native/openda/bridge/src/org_openda_costa_CtaModelState.cpp create mode 100644 costa/native/openda/bridge/src/org_openda_costa_CtaObject.cpp create mode 100644 costa/native/openda/bridge/src/org_openda_costa_CtaObservationDescriptions.cpp create mode 100644 costa/native/openda/bridge/src/org_openda_costa_CtaOpenDaModel.cpp create mode 100644 costa/native/openda/bridge/src/org_openda_costa_CtaParallel.cpp create mode 100644 costa/native/openda/bridge/src/org_openda_costa_CtaRelationTable.cpp create mode 100644 costa/native/openda/bridge/src/org_openda_costa_CtaStochObserver.cpp create mode 100644 costa/native/openda/bridge/src/org_openda_costa_CtaTime.cpp create mode 100644 costa/native/openda/bridge/src/org_openda_costa_CtaTreeVector.cpp create mode 100644 costa/native/openda/bridge/src/org_openda_costa_CtaUtils.cpp create mode 100644 costa/native/openda/bridge/src/org_openda_costa_CtaVector.cpp create mode 100644 costa/native/openda/bridge/src/org_openda_resultwriters_NativeResultWriter.cpp create mode 100644 costa/native/openda/bridge/src/org_openda_resultwriters_NetcdfResultWriterNative.cpp diff --git a/costa/native/CMakeLists.txt b/costa/native/CMakeLists.txt new file mode 100644 index 000000000..92c16cb1e --- /dev/null +++ b/costa/native/CMakeLists.txt @@ -0,0 +1,11 @@ +cmake_minimum_required(VERSION 3.9.1) + +project(openda) +#add_subdirectory(libxml) +add_subdirectory(external/blas) +add_subdirectory(external/lapack) + +add_subdirectory(cta/src) +add_subdirectory(cta_f90/generated) +add_subdirectory(openda/bridge/src) + diff --git a/costa/native/README.md b/costa/native/README.md new file mode 100644 index 000000000..f20432f61 --- /dev/null +++ b/costa/native/README.md @@ -0,0 +1,13 @@ + + + + +```shell +export JAVA_HOME=/opt/apps/java/amazon-corretto-17.0.5.8.1-linux-x64/amazon-corretto-17.0.5.8.1-linux-x64 +``` + + +```shell +cd build +cmake .. -DCMAKE_Fortran_COMPILER=ifort -DCMAKE_C_COMPILER=gcc +``` diff --git a/costa/native/cta/src/CMakeLists.txt b/costa/native/cta/src/CMakeLists.txt index afd3dcd6a..d225b4c14 100644 --- a/costa/native/cta/src/CMakeLists.txt +++ b/costa/native/cta/src/CMakeLists.txt @@ -1,4 +1,5 @@ cmake_minimum_required(VERSION 3.9.1) +find_package(LibXml2 REQUIRED) set(LIBRARY_OUTPUT_PATH ${CMAKE_BINARY_DIR}/lib) set(SOURCES @@ -16,8 +17,10 @@ set(SOURCES ) add_library(cta SHARED ${SOURCES}) + + target_include_directories(cta PUBLIC ../include) target_include_directories(cta PUBLIC ${CMAKE_SOURCE_DIR}/external) - +target_include_directories(cta PUBLIC ${LIBXML2_INCLUDE_DIR}) #target_link_libraries(cta INTERFACE libxml) diff --git a/costa/native/cta_f90/generated/CMakeLists.txt b/costa/native/cta_f90/generated/CMakeLists.txt new file mode 100644 index 000000000..e94c5652e --- /dev/null +++ b/costa/native/cta_f90/generated/CMakeLists.txt @@ -0,0 +1,19 @@ +cmake_minimum_required(VERSION 3.9.1) +enable_language(Fortran) + +set(LIBRARY_OUTPUT_PATH ${CMAKE_BINARY_DIR}/lib) +set(SOURCES +cta_f90.f90 cta_f90_interface.f90 cta_f90_model_utilities.f90 cta_f90_time.f90 +cta_f90_array.f90 cta_f90_matrix.f90 cta_f90_obsdescr.f90 cta_f90_tree.f90 +cta_f90_datatypes.f90 cta_f90_mem.f90 cta_f90_pack.f90 cta_f90_treevector.f90 +cta_f90_datetime.f90 cta_f90_message.f90 cta_f90_par.f90 cta_f90_util_methods.f90 +cta_f90_file.f90 cta_f90_metainfo.f90 cta_f90_parameters.f90 cta_f90_util_sort.f90 +cta_f90_flush_mod.f90 cta_f90_method.f90 cta_f90_reltable.f90 cta_f90_util_statistics.f90 +cta_f90_functions.f90 cta_f90_modbuild_par.f90 cta_f90_resultwriter.f90 cta_f90_vector.f90 +cta_f90_handles.f90 cta_f90_model.f90 cta_f90_sobs.f90 cta_f90_xml.f90 +cta_f90_initialise.f90 cta_f90_model_factory.f90 cta_f90_string.f90 +) + +add_library(cta_f90 SHARED ${SOURCES}) +target_include_directories(cta_f90 PUBLIC ../include) + diff --git a/costa/native/external/blas/CMakeLists.txt b/costa/native/external/blas/CMakeLists.txt new file mode 100644 index 000000000..7e8e5636c --- /dev/null +++ b/costa/native/external/blas/CMakeLists.txt @@ -0,0 +1,21 @@ +cmake_minimum_required(VERSION 3.9.1) +enable_language(Fortran) + +set(LIBRARY_OUTPUT_PATH ${CMAKE_BINARY_DIR}/lib) +set(SOURCES +caxpy.f cher.f csyr2k.f dcabs1.f dspmv.f dtpmv.f lsame.f srot.f ssyr2k.f zcopy.f zher.f zsyrk.f +ccopy.f cher2.f csyrk.f dcopy.f dspr.f dtpsv.f sasum.f srotg.f ssyrk.f zdotc.f zher2.f ztbmv.f +cdotc.f cher2k.f ctbmv.f ddot.f dspr2.f dtrmm.f saxpy.f ssbmv.f stbmv.f zdotu.f zher2k.f ztbsv.f +cdotu.f cherk.f ctbsv.f dgbmv.f dswap.f dtrmv.f scasum.f sscal.f stbsv.f zdscal.f zherk.f ztpmv.f +cgbmv.f chpmv.f ctpmv.f dgemm.f dsymm.f dtrsm.f scnrm2.f sspmv.f stpmv.f zgbmv.f zhpmv.f ztpsv.f +cgemm.f chpr.f ctpsv.f dgemv.f dsymv.f dtrsv.f scopy.f sspr.f stpsv.f zgemm.f zhpr.f ztrmm.f +cgemv.f chpr2.f ctrmm.f dger.f dsyr.f dzasum.f sdot.f sspr2.f strmm.f zgemv.f zhpr2.f ztrmv.f +cgerc.f crotg.f ctrmv.f dnrm2.f dsyr2.f dznrm2.f sgbmv.f sswap.f strmv.f zgerc.f zrotg.f ztrsm.f +cgeru.f cscal.f ctrsm.f drot.f dsyr2k.f icamax.f sgemm.f ssymm.f strsm.f zgeru.f zscal.f ztrsv.f +chbmv.f csscal.f ctrsv.f drotg.f dsyrk.f idamax.f sgemv.f ssymv.f strsv.f zhbmv.f zswap.f +chemm.f cswap.f dasum.f dsbmv.f dtbmv.f isamax.f sger.f ssyr.f xerbla.f zhemm.f zsymm.f +chemv.f csymm.f daxpy.f dscal.f dtbsv.f izamax.f snrm2.f ssyr2.f zaxpy.f zhemv.f zsyr2k.f +) + +add_library(blas SHARED ${SOURCES}) + diff --git a/costa/native/external/lapack/CMakeLists.txt b/costa/native/external/lapack/CMakeLists.txt new file mode 100644 index 000000000..df3abfc2c --- /dev/null +++ b/costa/native/external/lapack/CMakeLists.txt @@ -0,0 +1,106 @@ +cmake_minimum_required(VERSION 3.9.1) +enable_language(Fortran) +# add_compile_options(-Wall -Weffc++ -pedantic -std=c++0x) + +set(LIBRARY_OUTPUT_PATH ${CMAKE_BINARY_DIR}/lib) +set(SOURCES +cbdsqr.f chetrd.f clatbs.f ctrtrs.f dggsvp.f dlasd1.f dpttrs.f sdisna.f slaev2.f slatrs.f ssteqr.f zgerq2.f zlahef.f zptsv.f +cgbbrd.f chetrf.f clatdf.f ctzrqf.f dgtcon.f dlasd2.f dptts2.f second.f slaexc.f slatrz.f ssterf.f zgerqf.f zlahqr.f zptsvx.f +cgbcon.f chetri.f clatps.f ctzrzf.f dgtrfs.f dlasd3.f drscl.f sgbbrd.f slag2.f slatzm.f sstev.f zgesc2.f zlahrd.f zpttrf.f +cgbequ.f chetrs.f clatrd.f cung2l.f dgtsv.f dlasd4.f dsbev.f sgbcon.f slags2.f slauu2.f sstevd.f zgesdd.f zlaic1.f zpttrs.f +cgbrfs.f chgeqz.f clatrs.f cung2r.f dgtsvx.f dlasd5.f dsbevd.f sgbequ.f slagtf.f slauum.f sstevr.f zgesv.f zlals0.f zptts2.f +cgbsv.f chpcon.f clatrz.f cungbr.f dgttrf.f dlasd6.f dsbevx.f sgbrfs.f slagtm.f sopgtr.f sstevx.f zgesvd.f zlalsa.f zrot.f +cgbsvx.f chpev.f clatzm.f cunghr.f dgttrs.f dlasd7.f dsbgst.f sgbsv.f slagts.f sopmtr.f ssycon.f zgesvx.f zlalsd.f zspcon.f +cgbtf2.f chpevd.f clauu2.f cungl2.f dgtts2.f dlasd8.f dsbgv.f sgbsvx.f slagv2.f sorg2l.f ssyev.f zgetc2.f zlangb.f zspmv.f +cgbtrf.f chpevx.f clauum.f cunglq.f dhgeqz.f dlasd9.f dsbgvd.f sgbtf2.f slahqr.f sorg2r.f ssyevd.f zgetf2.f zlange.f zspr.f +cgbtrs.f chpgst.f cpbcon.f cungql.f dhsein.f dlasda.f dsbgvx.f sgbtrf.f slahrd.f sorgbr.f ssyevr.f zgetrf.f zlangt.f zsprfs.f +cgebak.f chpgv.f cpbequ.f cungqr.f dhseqr.f dlasdq.f dsbtrd.f sgbtrs.f slaic1.f sorghr.f ssyevx.f zgetri.f zlanhb.f zspsv.f +cgebal.f chpgvd.f cpbrfs.f cungr2.f dlabad.f dlasdt.f dsecnd.f sgebak.f slaln2.f sorgl2.f ssygs2.f zgetrs.f zlanhe.f zspsvx.f +cgebd2.f chpgvx.f cpbstf.f cungrq.f dlabrd.f dlaset.f dspcon.f sgebal.f slals0.f sorglq.f ssygst.f zggbak.f zlanhp.f zsptrf.f +cgebrd.f chprfs.f cpbsv.f cungtr.f dlacon.f dlasq1.f dspev.f sgebd2.f slalsa.f sorgql.f ssygv.f zggbal.f zlanhs.f zsptri.f +cgecon.f chpsv.f cpbsvx.f cunm2l.f dlacpy.f dlasq2.f dspevd.f sgebrd.f slalsd.f sorgqr.f ssygvd.f zgges.f zlanht.f zsptrs.f +cgeequ.f chpsvx.f cpbtf2.f cunm2r.f dladiv.f dlasq3.f dspevx.f sgecon.f slamch.f sorgr2.f ssygvx.f zggesx.f zlansb.f zstedc.f +cgees.f chptrd.f cpbtrf.f cunmbr.f dlae2.f dlasq4.f dspgst.f sgeequ.f slamrg.f sorgrq.f ssyrfs.f zggev.f zlansp.f zstegr.f +cgeesx.f chptrf.f cpbtrs.f cunmhr.f dlaebz.f dlasq5.f dspgv.f sgees.f slangb.f sorgtr.f ssysv.f zggevx.f zlansy.f zstein.f +cgeev.f chptri.f cpocon.f cunml2.f dlaed0.f dlasq6.f dspgvd.f sgeesx.f slange.f sorm2l.f ssysvx.f zggglm.f zlantb.f zsteqr.f +cgeevx.f chptrs.f cpoequ.f cunmlq.f dlaed1.f dlasr.f dspgvx.f sgeev.f slangt.f sorm2r.f ssytd2.f zgghrd.f zlantp.f zsycon.f +cgegs.f chsein.f cporfs.f cunmql.f dlaed2.f dlasrt.f dsprfs.f sgeevx.f slanhs.f sormbr.f ssytf2.f zgglse.f zlantr.f zsymv.f +cgegv.f chseqr.f cposv.f cunmqr.f dlaed3.f dlassq.f dspsv.f sgegs.f slansb.f sormhr.f ssytrd.f zggqrf.f zlapll.f zsyr.f +cgehd2.f clabrd.f cposvx.f cunmr2.f dlaed4.f dlasv2.f dspsvx.f sgegv.f slansp.f sorml2.f ssytrf.f zggrqf.f zlapmt.f zsyrfs.f +cgehrd.f clacgv.f cpotf2.f cunmr3.f dlaed5.f dlaswp.f dsptrd.f sgehd2.f slanst.f sormlq.f ssytri.f zggsvd.f zlaqgb.f zsysv.f +cgelq2.f clacon.f cpotrf.f cunmrq.f dlaed6.f dlasy2.f dsptrf.f sgehrd.f slansy.f sormql.f ssytrs.f zggsvp.f zlaqge.f zsysvx.f +cgelqf.f clacp2.f cpotri.f cunmrz.f dlaed7.f dlasyf.f dsptri.f sgelq2.f slantb.f sormqr.f stbcon.f zgtcon.f zlaqhb.f zsytf2.f +cgels.f clacpy.f cpotrs.f cunmtr.f dlaed8.f dlatbs.f dsptrs.f sgelqf.f slantp.f sormr2.f stbrfs.f zgtrfs.f zlaqhe.f zsytrf.f +cgelsd.f clacrm.f cppcon.f cupgtr.f dlaed9.f dlatdf.f dstebz.f sgels.f slantr.f sormr3.f stbtrs.f zgtsv.f zlaqhp.f zsytri.f +cgelss.f clacrt.f cppequ.f cupmtr.f dlaeda.f dlatps.f dstedc.f sgelsd.f slanv2.f sormrq.f stgevc.f zgtsvx.f zlaqp2.f zsytrs.f +cgelsx.f cladiv.f cpprfs.f dbdsdc.f dlaein.f dlatrd.f dstegr.f sgelss.f slapll.f sormrz.f stgex2.f zgttrf.f zlaqps.f ztbcon.f +cgelsy.f claed0.f cppsv.f dbdsqr.f dlaev2.f dlatrs.f dstein.f sgelsx.f slapmt.f sormtr.f stgexc.f zgttrs.f zlaqsb.f ztbrfs.f +cgeql2.f claed7.f cppsvx.f ddisna.f dlaexc.f dlatrz.f dsteqr.f sgelsy.f slapy2.f spbcon.f stgsen.f zgtts2.f zlaqsp.f ztbtrs.f +cgeqlf.f claed8.f cpptrf.f dgbbrd.f dlag2.f dlatzm.f dsterf.f sgeql2.f slapy3.f spbequ.f stgsja.f zhbev.f zlaqsy.f ztgevc.f +cgeqp3.f claein.f cpptri.f dgbcon.f dlags2.f dlauu2.f dstev.f sgeqlf.f slaqgb.f spbrfs.f stgsna.f zhbevd.f zlar1v.f ztgex2.f +cgeqpf.f claesy.f cpptrs.f dgbequ.f dlagtf.f dlauum.f dstevd.f sgeqp3.f slaqge.f spbstf.f stgsy2.f zhbevx.f zlar2v.f ztgexc.f +cgeqr2.f claev2.f cptcon.f dgbrfs.f dlagtm.f dopgtr.f dstevr.f sgeqpf.f slaqp2.f spbsv.f stgsyl.f zhbgst.f zlarcm.f ztgsen.f +cgeqrf.f clags2.f cpteqr.f dgbsv.f dlagts.f dopmtr.f dstevx.f sgeqr2.f slaqps.f spbsvx.f stpcon.f zhbgv.f zlarf.f ztgsja.f +cgerfs.f clagtm.f cptrfs.f dgbsvx.f dlagv2.f dorg2l.f dsycon.f sgeqrf.f slaqsb.f spbtf2.f stprfs.f zhbgvd.f zlarfb.f ztgsna.f +cgerq2.f clahef.f cptsv.f dgbtf2.f dlahqr.f dorg2r.f dsyev.f sgerfs.f slaqsp.f spbtrf.f stptri.f zhbgvx.f zlarfg.f ztgsy2.f +cgerqf.f clahqr.f cptsvx.f dgbtrf.f dlahrd.f dorgbr.f dsyevd.f sgerq2.f slaqsy.f spbtrs.f stptrs.f zhbtrd.f zlarft.f ztgsyl.f +cgesc2.f clahrd.f cpttrf.f dgbtrs.f dlaic1.f dorghr.f dsyevr.f sgerqf.f slaqtr.f spocon.f strcon.f zhecon.f zlarfx.f ztpcon.f +cgesdd.f claic1.f cpttrs.f dgebak.f dlaln2.f dorgl2.f dsyevx.f sgesc2.f slar1v.f spoequ.f strevc.f zheev.f zlargv.f ztprfs.f +cgesv.f clals0.f cptts2.f dgebal.f dlals0.f dorglq.f dsygs2.f sgesdd.f slar2v.f sporfs.f strexc.f zheevd.f zlarnv.f ztptri.f +cgesvd.f clalsa.f crot.f dgebd2.f dlalsa.f dorgql.f dsygst.f sgesv.f slarf.f sposv.f strrfs.f zheevr.f zlarrv.f ztptrs.f +cgesvx.f clalsd.f cspcon.f dgebrd.f dlalsd.f dorgqr.f dsygv.f sgesvd.f slarfb.f sposvx.f strsen.f zheevx.f zlartg.f ztrcon.f +cgetc2.f clangb.f cspmv.f dgecon.f dlamch.f dorgr2.f dsygvd.f sgesvx.f slarfg.f spotf2.f strsna.f zhegs2.f zlartv.f ztrevc.f +cgetf2.f clange.f cspr.f dgeequ.f dlamrg.f dorgrq.f dsygvx.f sgetc2.f slarft.f spotrf.f strsyl.f zhegst.f zlarz.f ztrexc.f +cgetrf.f clangt.f csprfs.f dgees.f dlangb.f dorgtr.f dsyrfs.f sgetf2.f slarfx.f spotri.f strti2.f zhegv.f zlarzb.f ztrrfs.f +cgetri.f clanhb.f cspsv.f dgeesx.f dlange.f dorm2l.f dsysv.f sgetrf.f slargv.f spotrs.f strtri.f zhegvd.f zlarzt.f ztrsen.f +cgetrs.f clanhe.f cspsvx.f dgeev.f dlangt.f dorm2r.f dsysvx.f sgetri.f slarnv.f sppcon.f strtrs.f zhegvx.f zlascl.f ztrsna.f +cggbak.f clanhp.f csptrf.f dgeevx.f dlanhs.f dormbr.f dsytd2.f sgetrs.f slarrb.f sppequ.f stzrqf.f zherfs.f zlaset.f ztrsyl.f +cggbal.f clanhs.f csptri.f dgegs.f dlansb.f dormhr.f dsytf2.f sggbak.f slarre.f spprfs.f stzrzf.f zhesv.f zlasr.f ztrti2.f +cgges.f clanht.f csptrs.f dgegv.f dlansp.f dorml2.f dsytrd.f sggbal.f slarrf.f sppsv.f xerbla.f zhesvx.f zlassq.f ztrtri.f +cggesx.f clansb.f csrot.f dgehd2.f dlanst.f dormlq.f dsytrf.f sgges.f slarrv.f sppsvx.f zbdsqr.f zhetd2.f zlaswp.f ztrtrs.f +cggev.f clansp.f csrscl.f dgehrd.f dlansy.f dormql.f dsytri.f sggesx.f slartg.f spptrf.f zdrot.f zhetf2.f zlasyf.f ztzrqf.f +cggevx.f clansy.f cstedc.f dgelq2.f dlantb.f dormqr.f dsytrs.f sggev.f slartv.f spptri.f zdrscl.f zhetrd.f zlatbs.f ztzrzf.f +cggglm.f clantb.f cstegr.f dgelqf.f dlantp.f dormr2.f dtbcon.f sggevx.f slaruv.f spptrs.f zgbbrd.f zhetrf.f zlatdf.f zung2l.f +cgghrd.f clantp.f cstein.f dgels.f dlantr.f dormr3.f dtbrfs.f sggglm.f slarz.f sptcon.f zgbcon.f zhetri.f zlatps.f zung2r.f +cgglse.f clantr.f csteqr.f dgelsd.f dlanv2.f dormrq.f dtbtrs.f sgghrd.f slarzb.f spteqr.f zgbequ.f zhetrs.f zlatrd.f zungbr.f +cggqrf.f clapll.f csycon.f dgelss.f dlapll.f dormrz.f dtgevc.f sgglse.f slarzt.f sptrfs.f zgbrfs.f zhgeqz.f zlatrs.f zunghr.f +cggrqf.f clapmt.f csymv.f dgelsx.f dlapmt.f dormtr.f dtgex2.f sggqrf.f slas2.f sptsv.f zgbsv.f zhpcon.f zlatrz.f zungl2.f +cggsvd.f claqgb.f csyr.f dgelsy.f dlapy2.f dpbcon.f dtgexc.f sggrqf.f slascl.f sptsvx.f zgbsvx.f zhpev.f zlatzm.f zunglq.f +cggsvp.f claqge.f csyrfs.f dgeql2.f dlapy3.f dpbequ.f dtgsen.f sggsvd.f slasd0.f spttrf.f zgbtf2.f zhpevd.f zlauu2.f zungql.f +cgtcon.f claqhb.f csysv.f dgeqlf.f dlaqgb.f dpbrfs.f dtgsja.f sggsvp.f slasd1.f spttrs.f zgbtrf.f zhpevx.f zlauum.f zungqr.f +cgtrfs.f claqhe.f csysvx.f dgeqp3.f dlaqge.f dpbstf.f dtgsna.f sgtcon.f slasd2.f sptts2.f zgbtrs.f zhpgst.f zpbcon.f zungr2.f +cgtsv.f claqhp.f csytf2.f dgeqpf.f dlaqp2.f dpbsv.f dtgsy2.f sgtrfs.f slasd3.f srscl.f zgebak.f zhpgv.f zpbequ.f zungrq.f +cgtsvx.f claqp2.f csytrf.f dgeqr2.f dlaqps.f dpbsvx.f dtgsyl.f sgtsv.f slasd4.f ssbev.f zgebal.f zhpgvd.f zpbrfs.f zungtr.f +cgttrf.f claqps.f csytri.f dgeqrf.f dlaqsb.f dpbtf2.f dtpcon.f sgtsvx.f slasd5.f ssbevd.f zgebd2.f zhpgvx.f zpbstf.f zunm2l.f +cgttrs.f claqsb.f csytrs.f dgerfs.f dlaqsp.f dpbtrf.f dtprfs.f sgttrf.f slasd6.f ssbevx.f zgebrd.f zhprfs.f zpbsv.f zunm2r.f +cgtts2.f claqsp.f ctbcon.f dgerq2.f dlaqsy.f dpbtrs.f dtptri.f sgttrs.f slasd7.f ssbgst.f zgecon.f zhpsv.f zpbsvx.f zunmbr.f +chbev.f claqsy.f ctbrfs.f dgerqf.f dlaqtr.f dpocon.f dtptrs.f sgtts2.f slasd8.f ssbgv.f zgeequ.f zhpsvx.f zpbtf2.f zunmhr.f +chbevd.f clar1v.f ctbtrs.f dgesc2.f dlar1v.f dpoequ.f dtrcon.f shgeqz.f slasd9.f ssbgvd.f zgees.f zhptrd.f zpbtrf.f zunml2.f +chbevx.f clar2v.f ctgevc.f dgesdd.f dlar2v.f dporfs.f dtrevc.f shsein.f slasda.f ssbgvx.f zgeesx.f zhptrf.f zpbtrs.f zunmlq.f +chbgst.f clarcm.f ctgex2.f dgesv.f dlarf.f dposv.f dtrexc.f shseqr.f slasdq.f ssbtrd.f zgeev.f zhptri.f zpocon.f zunmql.f +chbgv.f clarf.f ctgexc.f dgesvd.f dlarfb.f dposvx.f dtrrfs.f slabad.f slasdt.f sspcon.f zgeevx.f zhptrs.f zpoequ.f zunmqr.f +chbgvd.f clarfb.f ctgsen.f dgesvx.f dlarfg.f dpotf2.f dtrsen.f slabrd.f slaset.f sspev.f zgegs.f zhsein.f zporfs.f zunmr2.f +chbgvx.f clarfg.f ctgsja.f dgetc2.f dlarft.f dpotrf.f dtrsna.f slacon.f slasq1.f sspevd.f zgegv.f zhseqr.f zposv.f zunmr3.f +chbtrd.f clarft.f ctgsna.f dgetf2.f dlarfx.f dpotri.f dtrsyl.f slacpy.f slasq2.f sspevx.f zgehd2.f zlabrd.f zposvx.f zunmrq.f +checon.f clarfx.f ctgsy2.f dgetrf.f dlargv.f dpotrs.f dtrti2.f sladiv.f slasq3.f sspgst.f zgehrd.f zlacgv.f zpotf2.f zunmrz.f +cheev.f clargv.f ctgsyl.f dgetri.f dlarnv.f dppcon.f dtrtri.f slae2.f slasq4.f sspgv.f zgelq2.f zlacon.f zpotrf.f zunmtr.f +cheevd.f clarnv.f ctpcon.f dgetrs.f dlarrb.f dppequ.f dtrtrs.f slaebz.f slasq5.f sspgvd.f zgelqf.f zlacp2.f zpotri.f zupgtr.f +cheevr.f clarrv.f ctprfs.f dggbak.f dlarre.f dpprfs.f dtzrqf.f slaed0.f slasq6.f sspgvx.f zgels.f zlacpy.f zpotrs.f zupmtr.f +cheevx.f clartg.f ctptri.f dggbal.f dlarrf.f dppsv.f dtzrzf.f slaed1.f slasr.f ssprfs.f zgelsd.f zlacrm.f zppcon.f +chegs2.f clartv.f ctptrs.f dgges.f dlarrv.f dppsvx.f dzsum1.f slaed2.f slasrt.f sspsv.f zgelss.f zlacrt.f zppequ.f +chegst.f clarz.f ctrcon.f dggesx.f dlartg.f dpptrf.f icmax1.f slaed3.f slassq.f sspsvx.f zgelsx.f zladiv.f zpprfs.f +chegv.f clarzb.f ctrevc.f dggev.f dlartv.f dpptri.f ieeeck.f slaed4.f slasv2.f ssptrd.f zgelsy.f zlaed0.f zppsv.f +chegvd.f clarzt.f ctrexc.f dggevx.f dlaruv.f dpptrs.f ilaenv.f slaed5.f slaswp.f ssptrf.f zgeql2.f zlaed7.f zppsvx.f +chegvx.f clascl.f ctrrfs.f dggglm.f dlarz.f dptcon.f izmax1.f slaed6.f slasy2.f ssptri.f zgeqlf.f zlaed8.f zpptrf.f +cherfs.f claset.f ctrsen.f dgghrd.f dlarzb.f dpteqr.f lsame.f slaed7.f slasyf.f ssptrs.f zgeqp3.f zlaein.f zpptri.f +chesv.f clasr.f ctrsna.f dgglse.f dlarzt.f dptrfs.f lsamen.f slaed8.f slatbs.f sstebz.f zgeqpf.f zlaesy.f zpptrs.f +chesvx.f classq.f ctrsyl.f dggqrf.f dlas2.f dptsv.f sbdsdc.f slaed9.f slatdf.f sstedc.f zgeqr2.f zlaev2.f zptcon.f +chetd2.f claswp.f ctrti2.f dggrqf.f dlascl.f dptsvx.f sbdsqr.f slaeda.f slatps.f sstegr.f zgeqrf.f zlags2.f zpteqr.f +chetf2.f clasyf.f ctrtri.f dggsvd.f dlasd0.f dpttrf.f scsum1.f slaein.f slatrd.f sstein.f zgerfs.f zlagtm.f zptrfs.f +) + +set_source_files_properties(slamch.f PROPERTIES COMPILE_FLAGS -O0) +set_source_files_properties(dlamch.f PROPERTIES COMPILE_FLAGS -O0) + +add_library(lapack SHARED ${SOURCES}) + diff --git a/costa/native/openda/bridge/include/jni_cta_CtaObsdescr_NativeToJava.c b/costa/native/openda/bridge/include/jni_cta_CtaObsdescr_NativeToJava.c new file mode 100644 index 000000000..a331b2210 --- /dev/null +++ b/costa/native/openda/bridge/include/jni_cta_CtaObsdescr_NativeToJava.c @@ -0,0 +1,258 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/trunk/costa/src/cta/cta_obsdescr_combine.c $ +$Revision: 671 $, $Date: 2008-10-07 14:49:42 +0200 (Tue, 07 Oct 2008) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2010 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_handles.h +\brief Implemenation of a generic OpenDA observation description wrapper for use in native COSTA code +*/ + + +#include +#include +#include "cta.h" + +#include "jni_datatypes.h" +#include "jni.h" +#include "jni_cta_utils.h" + + +#define CLASSNAME "CtaObsDescr_nativeToJava" + +#define IDEBUG (0) + +typedef struct { +CTA_Handle myhandle; +sJni_Class classInstance; +} CTAI_ObsDescr_NativeToJava; + + +#define METHOD "CTAI_ObsDescr_nativeToJava_Create_Size" +void CTAI_ObsDescr_nativeToJava_Create_Size(int *memsize, int *retval){ + *memsize=(int) sizeof(CTAI_ObsDescr_NativeToJava); + *retval=CTA_OK; +}; + +#undef METHOD +#define METHOD "CTAI_ObsDescr_nativeToJava_Create_Init" +void CTAI_ObsDescr_nativeToJava_Create_Init(CTA_ObsDescr *myhandle, CTAI_ObsDescr_NativeToJava *descr, + CTA_Handle *usrdat, int *retval) +{ + JNIEnv *env; + sJni_Class *classIn; + + //Set the runtime and class from userdata + + CTA_Handle_GetData(*usrdat, (void *) &classIn); + env = classIn->env; + + descr->classInstance.env = env; + descr->classInstance.cls = (*env)->NewGlobalRef(env, classIn->cls); + descr->classInstance.obj = (*env)->NewGlobalRef(env, classIn->obj); + descr->myhandle = *myhandle; + +}; + +#undef METHOD +#define METHOD "CTAI_ObsDescr_nativeToJava_CreateSel" +void CTAI_ObsDescr_nativeToJava_CreateSel(CTAI_ObsDescr_NativeToJava *descr, + CTA_String *selection, CTA_RelTable *reltab, + CTA_ObsDescr *myhandle_out, + CTAI_ObsDescr_NativeToJava *descrout, int *retval){ + +CTA_WRITE_ERROR("Method not implemented"); +*retval=CTA_NOT_IMPLEMENTED; + +} + + + +#undef METHOD +#define METHOD "CTAI_ObsDescr_nativeToJava_Get_Keys" +void CTAI_ObsDescr_nativeToJava_Get_Keys(CTAI_ObsDescr_NativeToJava *descr, + CTA_Vector *keys, int *retval) +{ + jobject jKeys; + + // Find method + JNIEnv *env= descr->classInstance.env; + jclass cls= descr->classInstance.cls; + jobject obj = descr->classInstance.obj; + jmethodID func; + + /* Gewoon aanroepen zoals in alle voorbeelden staat */ + func = (*env)->GetMethodID(env, cls, "getPropertyKeys", "()[Ljava/lang/String;"); + if (func) { + // call method and get keys + jKeys =(*env)->CallObjectMethod(env,obj,func); + + // convert java array of strings to native vector of strings + *retval = cta_jni_JavaStringVecToNativeVec(env, (jobjectArray) jKeys, *keys); + } + else { + CTA_WRITE_ERROR("Cannot find java method name=getPropertyKeys signature=()[Ljava/lang/String;"); + *retval = CTA_INTERNAL_ERROR; + } +}; + +#undef METHOD +#define METHOD "CTAI_ObsDescr_nativeToJava_Property_Count" +void CTAI_ObsDescr_nativeToJava_Property_Count( + CTAI_ObsDescr_NativeToJava *descr, + int *nkeys, + int *retval) +{ + // Find method + JNIEnv *env = descr->classInstance.env; + jobject obj = descr->classInstance.obj; + jclass cls = descr->classInstance.cls; + + jmethodID func = (*env)->GetMethodID(env, cls, "getPropertyCount", "()I"); + if (func) { + // Call method + *nkeys=(*env)->CallIntMethod(env,obj,func); + *retval=CTA_OK; + } + else { + CTA_WRITE_ERROR("Cannot find java method name=getPropertyCount signature=()I"); + *retval=CTA_INTERNAL_ERROR; + } +} +; + +#undef METHOD +#define METHOD "CTAI_ObsDescr_nativeToJava_Observation_Count" + void CTAI_ObsDescr_nativeToJava_Observation_Count( + CTAI_ObsDescr_NativeToJava *descr, + int *nobs, + int *retval) +{ + + JNIEnv *env= descr->classInstance.env; + jobject obj = descr->classInstance.obj; + jclass cls= descr->classInstance.cls; + + jmethodID func = (*env)->GetMethodID(env, cls, "getObservationCount", "()I"); + if (func) { + + // Call method + *nobs=(*env)->CallIntMethod(env,obj,func); + *retval=CTA_OK; + } + else { + CTA_WRITE_ERROR("Cannot find java method name=getObservationCount signature=()I"); + *retval=CTA_INTERNAL_ERROR; + } +}; + + +#undef METHOD +#define METHOD "CTAI_ObsDescr_nativeToJava_Get_Properties" +void CTAI_ObsDescr_nativeToJava_Get_Properties( + CTAI_ObsDescr_NativeToJava *descr, + const char *Key, + CTA_Vector *Properties, + CTA_Datatype *datatype, + int *retval) +{ + jobject jProperties; + jstring jKey; + + // Find method + JNIEnv *env= descr->classInstance.env; + jclass cls= descr->classInstance.cls; + jobject obj = descr->classInstance.obj; + jmethodID func; + + func = (*env)->GetMethodID(env, cls, "getStringProperties", "(Ljava/lang/String;)[Ljava/lang/String;"); + if (func) { + // call method and get properties + jKey= (*env)->NewStringUTF(env, Key); + + jProperties =(*env)->CallObjectMethod(env,obj,func,jKey); + + // convert java array of strings to native vector + if (jProperties){ + *retval = cta_jni_JavaStringVecToNativeVec(env, (jobjectArray) jProperties, *Properties); + } + else { + CTA_WRITE_ERROR("getStringProperties returned no properties"); + *retval = CTA_JNI_INTERFACING_ERROR; + } + } + else { + CTA_WRITE_ERROR("Cannot find java method name=getStringProperties signature=(Ljava/lang/String;)[Ljava/lang/String;"); + *retval = CTA_INTERNAL_ERROR; + } +} + + +#undef METHOD +#define METHOD "CTAI_ObsDescr_nativeToJava_Free" +void CTAI_ObsDescr_nativeToJava_Free( + CTAI_ObsDescr_NativeToJava *descr, + int *retval) +{ + // Release the global references + JNIEnv *env = descr->classInstance.env; + (*env)->DeleteGlobalRef(env, descr->classInstance.cls); + (*env)->DeleteGlobalRef(env, descr->classInstance.obj); + + // Since we did not allocate any memory, there is nothing to do + *retval=CTA_OK; +} + + + +#undef METHOD +#define METHOD "CTA_ObsDescr_nativeToJava_initialise" +void CTA_ObsDescr_nativeToJava_initialise(CTA_ObsDescrClass *hobsdescrcl) +{ + CTA_Intf hintf=0; + CTA_Func h_func[I_CTA_OBSDESCR_NUMFUNC]; + + int ierr; + + // The vector h_func is filled with COSTA-function handles of the + // implementations in this file. + ierr=CTA_Func_Create(" ",&CTAI_ObsDescr_nativeToJava_Create_Size, hintf, + &h_func[I_CTA_OBSDESCR_CREATE_SIZE]); + ierr=CTA_Func_Create(" ",&CTAI_ObsDescr_nativeToJava_Create_Init, hintf, + &h_func[I_CTA_OBSDESCR_CREATE_INIT]); + ierr=CTA_Func_Create(" ",&CTAI_ObsDescr_nativeToJava_Property_Count, hintf, + &h_func[I_CTA_OBSDESCR_COUNT_PROPERTIES]); + ierr=CTA_Func_Create(" ",&CTAI_ObsDescr_nativeToJava_Get_Properties, hintf, + &h_func[I_CTA_OBSDESCR_GET_PROPERTIES]); + ierr=CTA_Func_Create(" ",&CTAI_ObsDescr_nativeToJava_Observation_Count, hintf, + &h_func[I_CTA_OBSDESCR_COUNT_OBSERVATIONS]); + ierr=CTA_Func_Create(" ",&CTAI_ObsDescr_nativeToJava_Get_Keys, hintf, + &h_func[I_CTA_OBSDESCR_GET_KEYS]); + ierr=CTA_Func_Create(" ",&CTAI_ObsDescr_nativeToJava_Free, hintf, + &h_func[I_CTA_OBSDESCR_FREE]); + ierr=CTA_Func_Create(" ",&CTAI_ObsDescr_nativeToJava_CreateSel, hintf, + &h_func[I_CTA_OBSDESCR_SELECTION]); + ierr=CTA_ObsDescr_DefineClass("cta_obsdescr_nativeToJava",h_func,hobsdescrcl); + +} + + + + diff --git a/costa/native/openda/bridge/include/jni_cta_CtaObsdescr_NativeToJava.h b/costa/native/openda/bridge/include/jni_cta_CtaObsdescr_NativeToJava.h new file mode 100644 index 000000000..b642da618 --- /dev/null +++ b/costa/native/openda/bridge/include/jni_cta_CtaObsdescr_NativeToJava.h @@ -0,0 +1,33 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/trunk/costa/src/cta/cta_obsdescr_combine.c $ +$Revision: 671 $, $Date: 2008-10-07 14:49:42 +0200 (Tue, 07 Oct 2008) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2010 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifdef __cplusplus +extern "C" { +#endif + +void CTA_ObsDescr_nativeToJava_initialise(CTA_ObsDescrClass *hobsdescrcl); + +#ifdef __cplusplus +} +#endif + + diff --git a/costa/native/openda/bridge/include/jni_cta_utils.h b/costa/native/openda/bridge/include/jni_cta_utils.h new file mode 100644 index 000000000..c22e4c6d6 --- /dev/null +++ b/costa/native/openda/bridge/include/jni_cta_utils.h @@ -0,0 +1,45 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/openda/bridge/jni_cta_utils.h $ +$Revision: 1553 $, $Date: 2010-05-07 14:55:23 +0200 (Fri, 07 May 2010) $ + +OpenDA interface for COSTA. +Copyright (C) 2007 Stef Hummel / Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + + +#include "jni.h" +#include "cta.h" +void cta_jni_setJavaEnv(JNIEnv *env); +#ifdef __cplusplus +void cta_jni_exception(JNIEnv *env, const char *name, const char *msg); +#endif +void cta_jni_exception(JNIEnv *env, const char *name, const char *msg, int retVal); +void cta_jni_free(JNIEnv * env, jobject obj_this); +CTA_Handle cta_jni_getCtaHandle(JNIEnv * env, jobject obj_this); + +void cta_jni_setCtaHandle(JNIEnv * env, jobject obj_this, CTA_Handle ctaHandle); +void cta_jni_ExternalMessageWriter(char *className, char *method, char *message, char type); +void cta_jni_ExternalMessageWriterSetID(JNIEnv *env, jclass classID, jmethodID methodID); + +#ifdef __cplusplus +extern "C" { +#endif +int cta_jni_JavaStringVecToNativeVec(JNIEnv *env, jobjectArray jArray, CTA_Vector cVec); + +#ifdef __cplusplus +} +#endif diff --git a/costa/native/openda/bridge/include/jni_datatypes.h b/costa/native/openda/bridge/include/jni_datatypes.h new file mode 100644 index 000000000..4ec184a68 --- /dev/null +++ b/costa/native/openda/bridge/include/jni_datatypes.h @@ -0,0 +1,31 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/openda_1/public/trunk/core/native/src/openda/bridge/jni_cta_utils.h $ +$Revision: 1257 $, $Date: 2009-12-21 13:42:01 +0100 (ma, 21 dec 2009) $ + +OpenDA interface for COSTA. +Copyright (C) 2010 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + + +#include + +typedef struct { +JNIEnv *env; //Poiner to Java runtime environment; +jclass cls; //JNI handle to the java class instance; +jobject obj; //JNI object handle; +} sJni_Class; + diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaArray.h b/costa/native/openda/bridge/include/org_openda_costa_CtaArray.h new file mode 100644 index 000000000..b3b315950 --- /dev/null +++ b/costa/native/openda/bridge/include/org_openda_costa_CtaArray.h @@ -0,0 +1,213 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_openda_costa_CtaArray */ + +#ifndef _Included_org_openda_costa_CtaArray +#define _Included_org_openda_costa_CtaArray +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_openda_costa_CtaArray + * Method: create + * Signature: ([D[I)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_create + (JNIEnv *, jobject, jdoubleArray, jintArray); + +/* + * Class: org_openda_costa_CtaArray + * Method: getNumberOfDimensions + * Signature: ()I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaArray_getNumberOfDimensions + (JNIEnv *, jobject); + +/* + * Class: org_openda_costa_CtaArray + * Method: getDimensions + * Signature: ()[I + */ +JNIEXPORT jintArray JNICALL Java_org_openda_costa_CtaArray_getDimensions + (JNIEnv *, jobject); + +/* + * Class: org_openda_costa_CtaArray + * Method: length + * Signature: ()I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaArray_length + (JNIEnv *, jobject); + +/* + * Class: org_openda_costa_CtaArray + * Method: getValuesAsDoubles + * Signature: ()[D + */ +JNIEXPORT jdoubleArray JNICALL Java_org_openda_costa_CtaArray_getValuesAsDoubles__ + (JNIEnv *, jobject); + +/* + * Class: org_openda_costa_CtaArray + * Method: getValuesAsDoubles + * Signature: (Z)[D + */ +JNIEXPORT jdoubleArray JNICALL Java_org_openda_costa_CtaArray_getValuesAsDoubles__Z + (JNIEnv *, jobject, jboolean); + +/* + * Class: org_openda_costa_CtaArray + * Method: getValuesAsDoubles + * Signature: (II)[D + */ +JNIEXPORT jdoubleArray JNICALL Java_org_openda_costa_CtaArray_getValuesAsDoubles__II + (JNIEnv *, jobject, jint, jint); + +/* + * Class: org_openda_costa_CtaArray + * Method: getValueAsDouble + * Signature: (I)D + */ +JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaArray_getValueAsDouble__I + (JNIEnv *, jobject, jint); + +/* + * Class: org_openda_costa_CtaArray + * Method: getValueAsDouble + * Signature: ([I)D + */ +JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaArray_getValueAsDouble___3I + (JNIEnv *, jobject, jintArray); + +/* + * Class: org_openda_costa_CtaArray + * Method: setConstant + * Signature: (D)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_setConstant + (JNIEnv *, jobject, jdouble); + +/* + * Class: org_openda_costa_CtaArray + * Method: setValuesAsDoubles + * Signature: ([D)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_setValuesAsDoubles___3D + (JNIEnv *, jobject, jdoubleArray); + +/* + * Class: org_openda_costa_CtaArray + * Method: setValuesAsDoubles + * Signature: (II[D)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_setValuesAsDoubles__II_3D + (JNIEnv *, jobject, jint, jint, jdoubleArray); + +/* + * Class: org_openda_costa_CtaArray + * Method: setValueAsDouble + * Signature: (ID)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_setValueAsDouble__ID + (JNIEnv *, jobject, jint, jdouble); + +/* + * Class: org_openda_costa_CtaArray + * Method: setValueAsDouble + * Signature: ([ID)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_setValueAsDouble___3ID + (JNIEnv *, jobject, jintArray, jdouble); + +/* + * Class: org_openda_costa_CtaArray + * Method: axpyOnValues + * Signature: (D[D)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_axpyOnValues + (JNIEnv *, jobject, jdouble, jdoubleArray); + +/* + * Class: org_openda_costa_CtaArray + * Method: multiplyValues + * Signature: ([D)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_multiplyValues + (JNIEnv *, jobject, jdoubleArray); + +/* + * Class: org_openda_costa_CtaArray + * Method: reshape + * Signature: ([I)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_reshape + (JNIEnv *, jobject, jintArray); + +/* + * Class: org_openda_costa_CtaArray + * Method: getSlice + * Signature: (II)Lorg/openda/interfaces/IArray; + */ +JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaArray_getSlice__II + (JNIEnv *, jobject, jint, jint); + +/* + * Class: org_openda_costa_CtaArray + * Method: getSlice + * Signature: (III)Lorg/openda/interfaces/IArray; + */ +JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaArray_getSlice__III + (JNIEnv *, jobject, jint, jint, jint); + +/* + * Class: org_openda_costa_CtaArray + * Method: getSliceAsDoubles + * Signature: (III)[D + */ +JNIEXPORT jdoubleArray JNICALL Java_org_openda_costa_CtaArray_getSliceAsDoubles + (JNIEnv *, jobject, jint, jint, jint); + +/* + * Class: org_openda_costa_CtaArray + * Method: setSlice + * Signature: ([DII)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_setSlice___3DII + (JNIEnv *, jobject, jdoubleArray, jint, jint); + +/* + * Class: org_openda_costa_CtaArray + * Method: setSliceBySlice + * Signature: (Lorg/openda/interfaces/IArray;II)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_setSliceBySlice__Lorg_openda_interfaces_IArray_2II + (JNIEnv *, jobject, jobject, jint, jint); + +/* + * Class: org_openda_costa_CtaArray + * Method: setSliceBySlice + * Signature: (Lorg/openda/interfaces/IArray;III)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_setSliceBySlice__Lorg_openda_interfaces_IArray_2III + (JNIEnv *, jobject, jobject, jint, jint, jint); + +/* + * Class: org_openda_costa_CtaArray + * Method: setSlice + * Signature: ([DIII)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_setSlice___3DIII + (JNIEnv *, jobject, jdoubleArray, jint, jint, jint); + +/* + * Class: org_openda_costa_CtaArray + * Method: valueIndex + * Signature: ([I)I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaArray_valueIndex + (JNIEnv *, jobject, jintArray); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaInitialize.h b/costa/native/openda/bridge/include/org_openda_costa_CtaInitialize.h new file mode 100644 index 000000000..35acb09f4 --- /dev/null +++ b/costa/native/openda/bridge/include/org_openda_costa_CtaInitialize.h @@ -0,0 +1,29 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_openda_costa_CtaInitialize */ + +#ifndef _Included_org_openda_costa_CtaInitialize +#define _Included_org_openda_costa_CtaInitialize +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_openda_costa_CtaInitialize + * Method: ctaInit + * Signature: ()V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaInitialize_ctaInit + (JNIEnv *, jclass); + +/* + * Class: org_openda_costa_CtaInitialize + * Method: setRandomSeed + * Signature: (I)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaInitialize_setRandomSeed + (JNIEnv *, jclass, jint); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaModelState.h b/costa/native/openda/bridge/include/org_openda_costa_CtaModelState.h new file mode 100644 index 000000000..04f443ec4 --- /dev/null +++ b/costa/native/openda/bridge/include/org_openda_costa_CtaModelState.h @@ -0,0 +1,21 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_openda_costa_CtaModelState */ + +#ifndef _Included_org_openda_costa_CtaModelState +#define _Included_org_openda_costa_CtaModelState +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_openda_costa_CtaModelState + * Method: nativeSavePersistentState + * Signature: (Ljava/lang/String;Ljava/lang/String;I)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaModelState_nativeSavePersistentState + (JNIEnv *, jobject, jstring, jstring, jint); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaObject.h b/costa/native/openda/bridge/include/org_openda_costa_CtaObject.h new file mode 100644 index 000000000..730327211 --- /dev/null +++ b/costa/native/openda/bridge/include/org_openda_costa_CtaObject.h @@ -0,0 +1,21 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_openda_costa_CtaObject */ + +#ifndef _Included_org_openda_costa_CtaObject +#define _Included_org_openda_costa_CtaObject +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_openda_costa_CtaObject + * Method: ctaFree + * Signature: ()V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaObject_ctaFree + (JNIEnv *, jobject); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaObservationDescriptions.h b/costa/native/openda/bridge/include/org_openda_costa_CtaObservationDescriptions.h new file mode 100644 index 000000000..e4f58d10a --- /dev/null +++ b/costa/native/openda/bridge/include/org_openda_costa_CtaObservationDescriptions.h @@ -0,0 +1,77 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_openda_costa_CtaObservationDescriptions */ + +#ifndef _Included_org_openda_costa_CtaObservationDescriptions +#define _Included_org_openda_costa_CtaObservationDescriptions +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_openda_costa_CtaObservationDescriptions + * Method: getPropertyKeys + * Signature: ()[Ljava/lang/String; + */ +JNIEXPORT jobjectArray JNICALL Java_org_openda_costa_CtaObservationDescriptions_getPropertyKeys + (JNIEnv *, jobject); + +/* + * Class: org_openda_costa_CtaObservationDescriptions + * Method: ctaCreateSelection + * Signature: (Ljava/lang/String;)I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaObservationDescriptions_ctaCreateSelection + (JNIEnv *, jobject, jstring); + +/* + * Class: org_openda_costa_CtaObservationDescriptions + * Method: ctaCreateTimeSelection + * Signature: (I)I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaObservationDescriptions_ctaCreateTimeSelection + (JNIEnv *, jobject, jint); + +/* + * Class: org_openda_costa_CtaObservationDescriptions + * Method: getObservationCount + * Signature: ()I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaObservationDescriptions_getObservationCount + (JNIEnv *, jobject); + +/* + * Class: org_openda_costa_CtaObservationDescriptions + * Method: getPropertyCount + * Signature: ()I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaObservationDescriptions_getPropertyCount + (JNIEnv *, jobject); + +/* + * Class: org_openda_costa_CtaObservationDescriptions + * Method: getStringProperties + * Signature: (Ljava/lang/String;)[Ljava/lang/String; + */ +JNIEXPORT jobjectArray JNICALL Java_org_openda_costa_CtaObservationDescriptions_getStringProperties + (JNIEnv *, jobject, jstring); + +/* + * Class: org_openda_costa_CtaObservationDescriptions + * Method: ctaCreateNativeToJavaObserver + * Signature: (Lorg/openda/interfaces/IObservationDescriptions;)I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaObservationDescriptions_ctaCreateNativeToJavaObserver + (JNIEnv *, jobject, jobject); + +/* + * Class: org_openda_costa_CtaObservationDescriptions + * Method: ctaGetValueProperties + * Signature: (Ljava/lang/String;I)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaObservationDescriptions_ctaGetValueProperties + (JNIEnv *, jobject, jstring, jint); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaOpenDaModel.h b/costa/native/openda/bridge/include/org_openda_costa_CtaOpenDaModel.h new file mode 100644 index 000000000..b468a42b1 --- /dev/null +++ b/costa/native/openda/bridge/include/org_openda_costa_CtaOpenDaModel.h @@ -0,0 +1,189 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_openda_costa_CtaOpenDaModel */ + +#ifndef _Included_org_openda_costa_CtaOpenDaModel +#define _Included_org_openda_costa_CtaOpenDaModel +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_openda_costa_CtaOpenDaModel + * Method: getCurrentTime + * Signature: ()Lorg/openda/interfaces/ITime; + */ +JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaOpenDaModel_getCurrentTime + (JNIEnv *, jobject); + +/* + * Class: org_openda_costa_CtaOpenDaModel + * Method: getTimeHorizon + * Signature: ()Lorg/openda/interfaces/ITime; + */ +JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaOpenDaModel_getTimeHorizon + (JNIEnv *, jobject); + +/* + * Class: org_openda_costa_CtaOpenDaModel + * Method: setAutomaticNoiseGeneration + * Signature: (Z)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaOpenDaModel_setAutomaticNoiseGeneration + (JNIEnv *, jobject, jboolean); + +/* + * Class: org_openda_costa_CtaOpenDaModel + * Method: ctaCreate + * Signature: (Ljava/lang/String;Ljava/lang/String;)I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaCreate + (JNIEnv *, jobject, jstring, jstring); + +/* + * Class: org_openda_costa_CtaOpenDaModel + * Method: ctaAnnounceObservedValues + * Signature: (Lorg/openda/interfaces/IObservationDescriptions;)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaAnnounceObservedValues + (JNIEnv *, jobject, jobject); + +/* + * Class: org_openda_costa_CtaOpenDaModel + * Method: ctaAxpyOnParameters + * Signature: (DLorg/openda/interfaces/IVector;)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaAxpyOnParameters + (JNIEnv *, jobject, jdouble, jobject); + +/* + * Class: org_openda_costa_CtaOpenDaModel + * Method: ctaAxpyOnStateDomain + * Signature: (DLorg/openda/interfaces/IVector;I)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaAxpyOnStateDomain + (JNIEnv *, jobject, jdouble, jobject, jint); + +/* + * Class: org_openda_costa_CtaOpenDaModel + * Method: ctaCompute + * Signature: (Lorg/openda/interfaces/ITime;)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaCompute + (JNIEnv *, jobject, jobject); + +/* + * Class: org_openda_costa_CtaOpenDaModel + * Method: ctaGetObservedValues + * Signature: (Lorg/openda/interfaces/IObservationDescriptions;Lorg/openda/costa/CtaVector;)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaGetObservedValues + (JNIEnv *, jobject, jobject, jobject); + +/* + * Class: org_openda_costa_CtaOpenDaModel + * Method: ctaGetState + * Signature: ()Lorg/openda/interfaces/IVector; + */ +JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaGetState + (JNIEnv *, jobject); + +/* + * Class: org_openda_costa_CtaOpenDaModel + * Method: ctaGetStateDomain + * Signature: (I)Lorg/openda/interfaces/IVector; + */ +JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaGetStateDomain + (JNIEnv *, jobject, jint); + +/* + * Class: org_openda_costa_CtaOpenDaModel + * Method: ctaGetParameters + * Signature: ()Lorg/openda/interfaces/IVector; + */ +JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaGetParameters + (JNIEnv *, jobject); + +/* + * Class: org_openda_costa_CtaOpenDaModel + * Method: ctaGetStateScaling + * Signature: ()Lorg/openda/interfaces/IVector; + */ +JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaGetStateScaling + (JNIEnv *, jobject); + +/* + * Class: org_openda_costa_CtaOpenDaModel + * Method: ctaReleaseInternalState + * Signature: (Ljava/lang/String;)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaReleaseInternalState + (JNIEnv *, jobject, jstring); + +/* + * Class: org_openda_costa_CtaOpenDaModel + * Method: ctaLoadPersistentState + * Signature: (Ljava/lang/String;)Lorg/openda/interfaces/IModelState; + */ +JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaLoadPersistentState + (JNIEnv *, jobject, jstring); + +/* + * Class: org_openda_costa_CtaOpenDaModel + * Method: ctaSaveInternalState + * Signature: ()Lorg/openda/interfaces/IModelState; + */ +JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaSaveInternalState + (JNIEnv *, jobject); + +/* + * Class: org_openda_costa_CtaOpenDaModel + * Method: ctaRestoreInternalState + * Signature: (Ljava/lang/String;)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaRestoreInternalState + (JNIEnv *, jobject, jstring); + +/* + * Class: org_openda_costa_CtaOpenDaModel + * Method: ctaGetObservedLocalization + * Signature: (Lorg/openda/interfaces/IObservationDescriptions;D)[I + */ +JNIEXPORT jintArray JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaGetObservedLocalization + (JNIEnv *, jobject, jobject, jdouble); + +/* + * Class: org_openda_costa_CtaOpenDaModel + * Method: ctaGetObservedLocalizationDomain + * Signature: (Lorg/openda/interfaces/IObservationDescriptions;DI)[I + */ +JNIEXPORT jintArray JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaGetObservedLocalizationDomain + (JNIEnv *, jobject, jobject, jdouble, jint); + +/* + * Class: org_openda_costa_CtaOpenDaModel + * Method: ctaSetParameters + * Signature: (Lorg/openda/interfaces/IVector;)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaSetParameters + (JNIEnv *, jobject, jobject); + +/* + * Class: org_openda_costa_CtaOpenDaModel + * Method: ctaGetNumDomains + * Signature: (D)I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaGetNumDomains + (JNIEnv *, jobject, jdouble); + +/* + * Class: org_openda_costa_CtaOpenDaModel + * Method: ctaGetObservationSelector + * Signature: (Lorg/openda/interfaces/IObservationDescriptions;DI)[I + */ +JNIEXPORT jintArray JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaGetObservationSelector + (JNIEnv *, jobject, jobject, jdouble, jint); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaParallel.h b/costa/native/openda/bridge/include/org_openda_costa_CtaParallel.h new file mode 100644 index 000000000..c030c8860 --- /dev/null +++ b/costa/native/openda/bridge/include/org_openda_costa_CtaParallel.h @@ -0,0 +1,29 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_openda_costa_CtaParallel */ + +#ifndef _Included_org_openda_costa_CtaParallel +#define _Included_org_openda_costa_CtaParallel +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_openda_costa_CtaParallel + * Method: nativeInit + * Signature: (Ljava/lang/String;)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaParallel_nativeInit + (JNIEnv *, jclass, jstring); + +/* + * Class: org_openda_costa_CtaParallel + * Method: finalizeParallelEnvironment + * Signature: ()V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaParallel_finalizeParallelEnvironment + (JNIEnv *, jclass); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaRelationTable.h b/costa/native/openda/bridge/include/org_openda_costa_CtaRelationTable.h new file mode 100644 index 000000000..334c45895 --- /dev/null +++ b/costa/native/openda/bridge/include/org_openda_costa_CtaRelationTable.h @@ -0,0 +1,61 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_costa_CtaRelationTable */ + +#ifndef _Included_org_costa_CtaRelationTable +#define _Included_org_costa_CtaRelationTable +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_costa_CtaRelationTable + * Method: ctaCreate + * Signature: ()I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaRelationTable_ctaCreate + (JNIEnv *, jobject); + +/* + * Class: org_costa_CtaRelationTable + * Method: SetTableCombine + * Signature: (Lorg/openda/interfaces/IRelationTable;ZLorg/openda/interfaces/IRelationTable;Z)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaRelationTable_SetTableCombine + (JNIEnv *, jobject, jobject, jboolean, jobject, jboolean); + +/* + * Class: org_costa_CtaRelationTable + * Method: SetSelect + * Signature: (Lorg/openda/interfaces/IVector;)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaRelationTable_SetSelect + (JNIEnv *, jobject, jobject); + +/* + * Class: org_costa_CtaRelationTable + * Method: apply + * Signature: (Lorg/openda/interfaces/IVector;Lorg/openda/interfaces/IVector;)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaRelationTable_apply + (JNIEnv *, jobject, jobject, jobject); + +/* + * Class: org_costa_CtaRelationTable + * Method: applyInv + * Signature: (Lorg/openda/interfaces/IVector;Lorg/openda/interfaces/IVector;)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaRelationTable_applyInv + (JNIEnv *, jobject, jobject, jobject); + +/* + * Class: org_costa_CtaRelationTable + * Method: free + * Signature: ()V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaRelationTable_free + (JNIEnv *, jobject); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaStochObserver.h b/costa/native/openda/bridge/include/org_openda_costa_CtaStochObserver.h new file mode 100644 index 000000000..86ddef6ad --- /dev/null +++ b/costa/native/openda/bridge/include/org_openda_costa_CtaStochObserver.h @@ -0,0 +1,101 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_openda_costa_CtaStochObserver */ + +#ifndef _Included_org_openda_costa_CtaStochObserver +#define _Included_org_openda_costa_CtaStochObserver +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_openda_costa_CtaStochObserver + * Method: ctaCreateNative + * Signature: (Ljava/lang/String;Ljava/lang/String;)I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaStochObserver_ctaCreateNative + (JNIEnv *, jobject, jstring, jstring); + +/* + * Class: org_openda_costa_CtaStochObserver + * Method: ctaGetStandardDeviation + * Signature: (I)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaStochObserver_ctaGetStandardDeviation + (JNIEnv *, jobject, jint); + +/* + * Class: org_openda_costa_CtaStochObserver + * Method: getCount + * Signature: ()I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaStochObserver_getCount + (JNIEnv *, jobject); + +/* + * Class: org_openda_costa_CtaStochObserver + * Method: ctaGetExpectations + * Signature: (I)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaStochObserver_ctaGetExpectations + (JNIEnv *, jobject, jint); + +/* + * Class: org_openda_costa_CtaStochObserver + * Method: ctaGetObservationDescriptions + * Signature: ()I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaStochObserver_ctaGetObservationDescriptions + (JNIEnv *, jobject); + +/* + * Class: org_openda_costa_CtaStochObserver + * Method: ctaGetRealizations + * Signature: (I)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaStochObserver_ctaGetRealizations + (JNIEnv *, jobject, jint); + +/* + * Class: org_openda_costa_CtaStochObserver + * Method: ctaGetValues + * Signature: (I)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaStochObserver_ctaGetValues + (JNIEnv *, jobject, jint); + +/* + * Class: org_openda_costa_CtaStochObserver + * Method: ctaGetVariances + * Signature: (I)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaStochObserver_ctaGetVariances + (JNIEnv *, jobject, jint); + +/* + * Class: org_openda_costa_CtaStochObserver + * Method: ctaCreateSelection + * Signature: (Ljava/lang/String;)I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaStochObserver_ctaCreateSelection + (JNIEnv *, jobject, jstring); + +/* + * Class: org_openda_costa_CtaStochObserver + * Method: ctaCreateTimeSelection + * Signature: (I)I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaStochObserver_ctaCreateTimeSelection + (JNIEnv *, jobject, jint); + +/* + * Class: org_openda_costa_CtaStochObserver + * Method: ctaEvaluatePDF + * Signature: (II)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaStochObserver_ctaEvaluatePDF + (JNIEnv *, jobject, jint, jint); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaTime.h b/costa/native/openda/bridge/include/org_openda_costa_CtaTime.h new file mode 100644 index 000000000..c1da6bd47 --- /dev/null +++ b/costa/native/openda/bridge/include/org_openda_costa_CtaTime.h @@ -0,0 +1,157 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_costa_CtaTime */ + +#ifndef _Included_org_costa_CtaTime +#define _Included_org_costa_CtaTime +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_costa_CtaTime + * Method: create + * Signature: ()I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTime_create + (JNIEnv *, jobject); + +/* + * Class: org_costa_CtaTime + * Method: isStamp + * Signature: ()Z + */ +JNIEXPORT jboolean JNICALL Java_org_openda_costa_CtaTime_isStamp + (JNIEnv *, jobject); + +/* + * Class: org_costa_CtaTime + * Method: getMJD + * Signature: ()D + */ +JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaTime_getMJD + (JNIEnv *, jobject); + +/* + * Class: org_costa_CtaTime + * Method: getUserTime + * Signature: ()D + */ +JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaTime_getUserTime + (JNIEnv *, jobject); + +/* + * Class: org_costa_CtaTime + * Method: setMJD + * Signature: (D)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaTime_setMJD + (JNIEnv *, jobject, jdouble); + +/* + * Class: org_costa_CtaTime + * Method: setUserTime + * Signature: (D)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaTime_setUserTime + (JNIEnv *, jobject, jdouble); + +/* + * Class: org_costa_CtaTime + * Method: isSpan + * Signature: ()Z + */ +JNIEXPORT jboolean JNICALL Java_org_openda_costa_CtaTime_isSpan + (JNIEnv *, jobject); + +/* + * Class: org_costa_CtaTime + * Method: getStep + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_openda_costa_CtaTime_getStep + (JNIEnv *, jobject); + +/* + * Class: org_costa_CtaTime + * Method: getBeginMJD + * Signature: ()D + */ +JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaTime_getBeginMJD + (JNIEnv *, jobject); + +/* + * Class: org_costa_CtaTime + * Method: getEndMJD + * Signature: ()D + */ +JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaTime_getEndMJD + (JNIEnv *, jobject); + +/* + * Class: org_costa_CtaTime + * Method: getStepMJD + * Signature: ()D + */ +JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaTime_getStepMJD + (JNIEnv *, jobject); + +/* + * Class: org_costa_CtaTime + * Method: getBeginUserTime + * Signature: ()D + */ +JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaTime_getBeginUserTime + (JNIEnv *, jobject); + +/* + * Class: org_costa_CtaTime + * Method: getEndUserTime + * Signature: ()D + */ +JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaTime_getEndUserTime + (JNIEnv *, jobject); + +/* + * Class: org_costa_CtaTime + * Method: getUserStep + * Signature: ()D + */ +JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaTime_getUserStep + (JNIEnv *, jobject); + +/* + * Class: org_costa_CtaTime + * Method: setSpanMJD + * Signature: (DDD)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaTime_setSpanMJD + (JNIEnv *, jobject, jdouble, jdouble, jdouble); + +/* + * Class: org_costa_CtaTime + * Method: setUserSpan + * Signature: (DDD)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaTime_setUserSpan + (JNIEnv *, jobject, jdouble, jdouble, jdouble); + +/* + * Class: org_costa_CtaTime + * Method: getStepCount + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_openda_costa_CtaTime_getStepCount + (JNIEnv *, jobject); + +/* + * Class: org_costa_CtaTime + * Method: clone + * Signature: ()Lorg/openda/interfaces/ITime; + */ +JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaTime_clone + (JNIEnv *, jobject); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaTreeVector.h b/costa/native/openda/bridge/include/org_openda_costa_CtaTreeVector.h new file mode 100644 index 000000000..8bea84740 --- /dev/null +++ b/costa/native/openda/bridge/include/org_openda_costa_CtaTreeVector.h @@ -0,0 +1,137 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_costa_CtaTreeVector */ + +#ifndef _Included_org_costa_CtaTreeVector +#define _Included_org_costa_CtaTreeVector +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_costa_CtaTreeVector + * Method: getId + * Signature: ()Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_org_openda_costa_CtaTreeVector_getId + (JNIEnv *, jobject); + +/* + * Class: org_costa_CtaTreeVector + * Method: ctaCreate_from_vector + * Signature: ()I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTreeVector_ctaCreateFromVector + (JNIEnv *, jobject, jstring, jstring, jint); + +/* + * Class: org_costa_CtaTreeVector + * Method: ctaCreateFromSubtreevectors + * Signature: ()I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTreeVector_ctaCreateFromSubtreevectors + (JNIEnv *, jobject, jstring, jstring, jint, jintArray); + +/* + * Class: org_costa_ctatreevector + * Method: ctaSetRegGrid + * Signature: ()V +*/ + JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTreeVector_ctaSetRegGrid + (JNIEnv * , jobject, jint, jint, jint, jdouble, jdouble, jdouble, + jdouble, jdouble, jdouble); + +/* + * Class: org_costa_CtaTreeVector + * Method: ctaGetSubTreeVector + * Signature: (Ljava/lang/String;)I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTreeVector_ctaGetSubTreeVector + (JNIEnv *, jobject, jstring); + +/* + * Class: org_costa_CtaTreeVector + * Method: ctaGetSubTreeVectorId + * Signature: ()Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_org_openda_costa_CtaTreeVector_ctaGetSubTreeVectorId +(JNIEnv *env, jobject obj_this, jint index); + + +/* + * Class: org_costa_CtaTreeVector + * Method: ctaGetNumSubTreeVectors + * Signature: (Ljava/lang/String;)I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTreeVector_ctaGetNumSubTreeVectors +(JNIEnv *env, jobject obj_this); + +/* + * Class: org_costa_CtaTreeVector + * Method: ctaImport + * Signature: (Ljava/lang/String;)I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTreeVector_ctaImport +(JNIEnv *, jobject , jint ); + +/* + * Class: org_costa_CtaTreeVector + * Method: ctaVImport + * Signature: (Ljava/lang/String;)I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTreeVector_ctaVImport +(JNIEnv *, jobject , jint ); + + + +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTreeVector_ctaNetcdfClose + (JNIEnv *, jobject, jint ); + +/* + * Class: org_resultwriters_NetcdfResultWriter + * Method: ctaNetcdfClose + * Signature: (Ljava/lang/String;)I + */ +JNIEXPORT jint JNICALL Java_org_openda_resultwriters_NetcdfResultWriter_ctaNetcdfClose + (JNIEnv *, jobject, jint); + + + +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTreeVector_ctaNetcdfInit + (JNIEnv *, jobject, jstring, jstring); +/* + * Class: org_resultwriters_NetcdfResultWriter + * Method: ctaNetcdfInit + * Signature: (Ljava/lang/String;)I + */ +JNIEXPORT jint JNICALL Java_org_openda_resultwriters_NetcdfResultWriter_ctaNetcdfInit + (JNIEnv *, jobject, jstring, jstring); + +/* + * Class: org_costa_CtaTreeVector + * Method: ctaExport + * Signature: (Ljava/lang/String;)I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTreeVector_ctaExport + (JNIEnv *, jobject, jint); + + +/* + * Class: org_costa_CtaTreeVector + * Method: getCaption + * Signature: ()Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_org_openda_costa_CtaTreeVector_getCaption + (JNIEnv *, jobject); + +/* + * Class: org_costa_CtaTreeVector + * Method: clone + * Signature: ()Lorg/openda/interfaces/ITreeVector; + */ +JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaTreeVector_clone + (JNIEnv *, jobject); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaUtils.h b/costa/native/openda/bridge/include/org_openda_costa_CtaUtils.h new file mode 100644 index 000000000..2511876ef --- /dev/null +++ b/costa/native/openda/bridge/include/org_openda_costa_CtaUtils.h @@ -0,0 +1,21 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_openda_costa_CtaUtils */ + +#ifndef _Included_org_openda_costa_CtaUtils +#define _Included_org_openda_costa_CtaUtils +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_openda_costa_CtaUtils + * Method: print_memory + * Signature: (Ljava/lang/String;I)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaUtils_print_1memory + (JNIEnv *, jclass, jstring, jint); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaVector.h b/costa/native/openda/bridge/include/org_openda_costa_CtaVector.h new file mode 100644 index 000000000..3cc509377 --- /dev/null +++ b/costa/native/openda/bridge/include/org_openda_costa_CtaVector.h @@ -0,0 +1,155 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_costa_CtaVector */ + +#ifndef _Included_org_costa_CtaVector +#define _Included_org_costa_CtaVector +#ifdef __cplusplus +extern "C" { +#endif + + +/* + * Class: org_costa_CtaVector + * Method: ctaPrint handles + * Signature: ()Lorg/openda/interfaces/IVector; + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaVector_ctaPrintHandles +(JNIEnv *, jobject, jstring); + + + +/* + * Class: org_costa_CtaVector + * Method: clone + * Signature: ()Lorg/openda/interfaces/IVector; + */ +JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaVector_clone + (JNIEnv *, jobject); + +/* + * Class: org_costa_CtaVector + * Method: setConstant + * Signature: (D)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaVector_setConstant + (JNIEnv *, jobject, jdouble); + +/* + * Class: org_costa_CtaVector + * Method: scale + * Signature: (D)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaVector_scale + (JNIEnv *, jobject, jdouble); + +/* + * Class: org_costa_CtaVector + * Method: setValues + * Signature: ([D)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaVector_setValues + (JNIEnv *, jobject, jdoubleArray); + +/* + * Class: org_costa_CtaVector + * Method: getValues + * Signature: ()[D + */ +JNIEXPORT jdoubleArray JNICALL Java_org_openda_costa_CtaVector_getValues + (JNIEnv *, jobject); + +/* + * Class: org_costa_CtaVector + * Method: setValue + * Signature: (ID)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaVector_setValue + (JNIEnv *, jobject, jint, jdouble); + +/* + * Class: org_costa_CtaVector + * Method: getValue + * Signature: (I)D + */ +JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaVector_getValue + (JNIEnv *, jobject, jint); + +/* + * Class: org_costa_CtaVector + * Method: getSize + * Signature: ()I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaVector_getSize + (JNIEnv *, jobject); + +/* + * Class: org_costa_CtaVector + * Method: norm2 + * Signature: ()D + */ +JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaVector_norm2 + (JNIEnv *, jobject); + +/* + * Class: org_costa_CtaVector + * Method: ctaCreate + * Signature: (I)I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaVector_ctaCreate + (JNIEnv *, jobject, jint); + +/* + * Class: org_costa_CtaVector + * Method: ctaDotProduct + * Signature: (I)D + */ +JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaVector_ctaDotProduct + (JNIEnv *, jobject, jint); + +/* + * Class: org_costa_CtaVector + * Method: ctaPointwiseDivide + * Signature: (I)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaVector_ctaPointwiseDivide + (JNIEnv *, jobject, jint); + +/* + * Class: org_costa_CtaVector + * Method: ctaPointwiseMultiply + * Signature: (I)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaVector_ctaPointwiseMultiply + (JNIEnv *, jobject, jint); + +/* + * Class: org_costa_CtaVector + * Method: ctaSetValues + * Signature: (I)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaVector_ctaSetValues + (JNIEnv *, jobject, jint); + +/* + * Class: org_costa_CtaVector + * Method: ctaAxpy + * Signature: (DI)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaVector_ctaAxpy + (JNIEnv *, jobject, jdouble, jint); + + +/* + * Class: org_costa_CtaVector + * Method: ctaSqrt + * Signature: ()V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaVector_ctaSqrt + (JNIEnv *, jobject); + + +#ifdef __cplusplus +} +#endif +#endif diff --git a/costa/native/openda/bridge/include/org_openda_resultwriters_NativeResultWriter.h b/costa/native/openda/bridge/include/org_openda_resultwriters_NativeResultWriter.h new file mode 100644 index 000000000..784d9c037 --- /dev/null +++ b/costa/native/openda/bridge/include/org_openda_resultwriters_NativeResultWriter.h @@ -0,0 +1,45 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_openda_resultwriters_NativeResultWriter */ + +#ifndef _Included_org_openda_resultwriters_NativeResultWriter +#define _Included_org_openda_resultwriters_NativeResultWriter +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_openda_resultwriters_NativeResultWriter + * Method: putMessage + * Signature: (ILjava/lang/String;Ljava/lang/String;Ljava/lang/String;)V + */ +JNIEXPORT void JNICALL Java_org_openda_resultwriters_NativeResultWriter_putMessage + (JNIEnv *, jobject, jint, jstring, jstring, jstring); + +/* + * Class: org_openda_resultwriters_NativeResultWriter + * Method: putValue + * Signature: (ILjava/lang/String;Ljava/lang/String;Ljava/lang/String;IILjava/lang/String;I)V + */ +JNIEXPORT void JNICALL Java_org_openda_resultwriters_NativeResultWriter_putValue + (JNIEnv *, jobject, jint, jstring, jstring, jstring, jint, jint, jstring, jint); + +/* + * Class: org_openda_resultwriters_NativeResultWriter + * Method: putIterationReport + * Signature: (ILjava/lang/String;Ljava/lang/String;IDI)V + */ +JNIEXPORT void JNICALL Java_org_openda_resultwriters_NativeResultWriter_putIterationReport + (JNIEnv *, jobject, jint, jstring, jstring, jint, jdouble, jint); + +/* + * Class: org_openda_resultwriters_NativeResultWriter + * Method: free + * Signature: (I)V + */ +JNIEXPORT void JNICALL Java_org_openda_resultwriters_NativeResultWriter_free + (JNIEnv *, jobject, jint); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/costa/native/openda/bridge/include/org_openda_resultwriters_NetcdfResultWriterNative.h b/costa/native/openda/bridge/include/org_openda_resultwriters_NetcdfResultWriterNative.h new file mode 100644 index 000000000..9774ecd35 --- /dev/null +++ b/costa/native/openda/bridge/include/org_openda_resultwriters_NetcdfResultWriterNative.h @@ -0,0 +1,29 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_openda_resultwriters_NetcdfResultWriterNative */ + +#ifndef _Included_org_openda_resultwriters_NetcdfResultWriterNative +#define _Included_org_openda_resultwriters_NetcdfResultWriterNative +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_openda_resultwriters_NetcdfResultWriterNative + * Method: ctaNetcdfInit + * Signature: (Ljava/lang/String;Ljava/lang/String;)I + */ +JNIEXPORT jint JNICALL Java_org_openda_resultwriters_NetcdfResultWriterNative_ctaNetcdfInit + (JNIEnv *, jobject, jstring, jstring); + +/* + * Class: org_openda_resultwriters_NetcdfResultWriterNative + * Method: ctaNetcdfClose + * Signature: (I)I + */ +JNIEXPORT jint JNICALL Java_org_openda_resultwriters_NetcdfResultWriterNative_ctaNetcdfClose + (JNIEnv *, jobject, jint); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/costa/native/openda/bridge/src/CMakeLists.txt b/costa/native/openda/bridge/src/CMakeLists.txt new file mode 100644 index 000000000..7e4c9e1b7 --- /dev/null +++ b/costa/native/openda/bridge/src/CMakeLists.txt @@ -0,0 +1,26 @@ +cmake_minimum_required(VERSION 3.9.1) +find_package(LibXml2 REQUIRED) +find_package(JNI REQUIRED) + +set(LIBRARY_OUTPUT_PATH ${CMAKE_BINARY_DIR}/lib) +set(SOURCES + jni_cta_utils.cpp org_openda_costa_CtaRelationTable.cpp + org_openda_costa_CtaArray.cpp org_openda_costa_CtaStochObserver.cpp + org_openda_costa_CtaInitialize.cpp org_openda_costa_CtaTime.cpp + org_openda_costa_CtaModelState.cpp org_openda_costa_CtaTreeVector.cpp + org_openda_costa_CtaObject.cpp org_openda_costa_CtaUtils.cpp + org_openda_costa_CtaObservationDescriptions.cpp org_openda_costa_CtaVector.cpp + org_openda_costa_CtaOpenDaModel.cpp org_openda_resultwriters_NativeResultWriter.cpp + org_openda_costa_CtaParallel.cpp org_openda_resultwriters_NetcdfResultWriterNative.cpp +) + +add_library(opendabridge SHARED ${SOURCES}) + + +target_include_directories(opendabridge PUBLIC ../include) +target_include_directories(opendabridge PUBLIC ${CMAKE_SOURCE_DIR}/cta/include) +target_include_directories(opendabridge PUBLIC ${CMAKE_SOURCE_DIR}/external) +target_include_directories(opendabridge PUBLIC ${LIBXML2_INCLUDE_DIR}) +target_include_directories(opendabridge PUBLIC ${JNI_INCLUDE_DIRS}) +#target_link_libraries(cta INTERFACE libxml) + diff --git a/costa/native/openda/bridge/src/jni_cta_utils.cpp b/costa/native/openda/bridge/src/jni_cta_utils.cpp new file mode 100644 index 000000000..3df977437 --- /dev/null +++ b/costa/native/openda/bridge/src/jni_cta_utils.cpp @@ -0,0 +1,220 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/openda/bridge/jni_cta_utils.cpp $ +$Revision: 2694 $, $Date: 2011-08-24 08:32:07 +0200 (Wed, 24 Aug 2011) $ + +OpenDA interface for COSTA. +Copyright (C) 2007 Stef Hummel / Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +#define CLASSNAME "CtaObsDescr_nativeToJava" +#define IDEBUG 0 + +#define MAX_EXCEPTION_LEN 256 + +#include +#include +#include "jni_cta_utils.h" +#include "cta.h" + + +static jmethodID javaIDWriter; +static jclass javaClass; +static JNIEnv* javaEnv; + + +#define METHOD "cta_jni_setJavaEnv" +void cta_jni_setJavaEnv(JNIEnv *env){ + javaEnv=env; +} + +#undef METHOD +#define METHOD "cta_jni_exception" +void cta_jni_exception(JNIEnv *env, const char *ctaClassName, const char *message, int retVal) +{ + char messageString[MAX_EXCEPTION_LEN+1]; + sprintf(messageString, "%s, Costa Error Code=%d", message, retVal); + cta_jni_exception(env, ctaClassName, messageString); + return; +} + +#undef METHOD +#define METHOD "cta_jni_exception" +void cta_jni_exception(JNIEnv *env, const char *ctaClassName, const char *message) +{ + char messageString[MAX_EXCEPTION_LEN+1]; + sprintf(messageString, "%s: %s", ctaClassName, message); + jclass cls = env->FindClass("java/lang/RuntimeException"); + /* if cls is NULL, an exception has already been thrown */ + if (cls != NULL) { + env->ThrowNew(cls, messageString); + } else { + env->FatalError("Could not get RuntimeException class"); + } + /* free the local ref */ + env->DeleteLocalRef(cls); +} + +#undef METHOD +#define METHOD "cta_jni_free" +void cta_jni_free(JNIEnv * env, jobject obj_this) { + + CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this); + + if ( ctaHandle != CTA_NULL ) + { + CTA_Handle_Free_All(&ctaHandle); + jclass class_CtaObject = env->GetObjectClass(obj_this); + jfieldID ctaHandleField = env->GetFieldID(class_CtaObject, "ctaHandle", "I"); + env->SetIntField(obj_this, ctaHandleField, ctaHandle); + } + +} + + + +#undef METHOD +#define METHOD "cta_jni_getCtaHandle" +CTA_Handle cta_jni_getCtaHandle(JNIEnv * env, jobject obj_this) { + + jclass class_CtaObject = env->GetObjectClass(obj_this); + jfieldID ctaHandleField = env->GetFieldID(class_CtaObject, "ctaHandle", "I"); + return env->GetIntField(obj_this, ctaHandleField); + +} + +#undef METHOD +#define METHOD "cta_jni_setCtaHandle" +void cta_jni_setCtaHandle(JNIEnv * env, jobject obj_this, CTA_Handle ctaHandle) { + + jclass class_CtaObject = env->GetObjectClass(obj_this); + jfieldID ctaHandleField = env->GetFieldID(class_CtaObject, "ctaHandle", "I"); + + env->SetIntField(obj_this, ctaHandleField, ctaHandle); +} + +#undef METHOD +#define METHOD "cta_jni_ExternalMessageWriter" +void cta_jni_ExternalMessageWriter(char *className, char *method, char *message, char type){ + + jstring jClassName, jMethod ,jMessage,jType; + char sType[2]; + sType[0]=type; + sType[1]=0; + + // Create 4 Java strings + jClassName =(javaEnv)->NewStringUTF(className); + jMethod =(javaEnv)->NewStringUTF(method); + jMessage =(javaEnv)->NewStringUTF(message); + jType =(javaEnv)->NewStringUTF(sType); + + + // Get the Java class of the message writer + jclass cls = javaEnv->FindClass("org/openda/costa/CtaMessageWriter"); + jmethodID my_method=NULL; + if (cls){ + my_method = javaEnv->GetStaticMethodID(cls, "Write", "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V"); + } + + if (my_method){ + (javaEnv)->CallStaticVoidMethod(cls, my_method, jClassName, jMethod, jMessage, jType); + } + else { + printf("Null JNI pointers found. I cannot call the message writer; the message to write is\n"); + printf("class:%s method:%s message:%s type:%s\n", className, method, message, sType); + } +} + +#undef METHOD +#define METHOD "cta_jni_ExternalMessageWriterSetID" +void cta_jni_ExternalMessageWriterSetID(JNIEnv *env, jclass classID, jmethodID methodID){ + javaIDWriter =methodID; + javaClass =classID; + javaEnv =env; +}; + +#undef METHOD +#define METHOD "cta_jni_JavaStringVecToNativeStringVec" +int cta_jni_JavaStringVecToNativeVec(JNIEnv *env, jobjectArray jArray, CTA_Vector cVec){ + CTA_String aString; + jint lenArray; + int nVec, ival; + jstring j_str; + jboolean isCopy; + CTA_Datatype datatype; + double val; + + // Check dimensions + CTA_Vector_GetSize(cVec, &nVec); + lenArray = env->GetArrayLength(jArray); + if (nVec == lenArray) { + + // Create work string + CTA_String_Create(&aString); + + // Get the datatype of the vector + CTA_Vector_GetDatatype(cVec, &datatype); + + // Unpack the java vector + int retval=CTA_OK; + for (int i=0; iGetObjectArrayElement(jArray, i); + c_str = env->GetStringUTFChars(j_str,&isCopy); + + + // Store java string in COSTA string or convert value to double + if (datatype == CTA_STRING) { + if (IDEBUG) printf("cta_jni_JavaStringVecToNativeStringVec :string %d is %s\n",i+1,c_str); + CTA_String_Set(aString,c_str); + } + else { + val=atof(c_str); + if (IDEBUG) printf("cta_jni_JavaStringVecToNativeStringVec :value %d is %f\n",i+1,val); + + } + env->ReleaseStringUTFChars(j_str, c_str); + + // Store value in COSTA vector + if (datatype==CTA_STRING) { + // Put COSTA string into array of strings + retval=CTA_Vector_SetVal(cVec, i+1, &aString, CTA_STRING); + } + else { + if (datatype == CTA_INTEGER){ + ival = (int) val; + retval=CTA_Vector_SetVal(cVec, i+1, &ival, CTA_INTEGER); + } + else { + retval=CTA_Vector_SetVal(cVec, i+1, &val, CTA_DOUBLE); + } + } + } + CTA_String_Free(&aString); + return retval; + } + else { + char message[256]; + sprintf(message,"Dimension error: the size of the native array is %d and the java array has size %d",nVec,lenArray); + CTA_WRITE_ERROR(message); + return CTA_DIMENSION_ERROR; + } + + + + + +} diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaArray.cpp b/costa/native/openda/bridge/src/org_openda_costa_CtaArray.cpp new file mode 100644 index 000000000..5a6024719 --- /dev/null +++ b/costa/native/openda/bridge/src/org_openda_costa_CtaArray.cpp @@ -0,0 +1,600 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/openda_1/public/trunk/core/native/src/openda/bridge/org_openda_costa_CtaVector.cpp $ +$Revision: 2738 $, $Date: 2011-09-05 10:48:32 +0200 (Mon, 05 Sep 2011) $ + +OpenDA interface for COSTA. +Copyright (C) 2012 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "org_openda_costa_CtaArray.h" +#include "cta_array.h" +#include "jni_cta_utils.h" +#include "cta_errors.h" +#include "cta_defaults.h" + +/* + * Class: org_openda_costa_CtaArray + * Method: create + * Signature: ([D[I)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_create + (JNIEnv *env, jobject obj_this, jdoubleArray jValues, jintArray jDimensions){ + + cta_jni_setJavaEnv(env); + + // Get dimension and pointers to the data of the java arrays + //int nValues = env->GetArrayLength(jValues); + jboolean isCopy = JNI_FALSE; + jdouble * cValues = env->GetDoubleArrayElements(jValues, &isCopy); + + int nDimensions = env->GetArrayLength(jDimensions); + jint * cDimensions = env->GetIntArrayElements(jDimensions, &isCopy); + + // Call native constructor + CTA_Array h; + int ierr=CTA_Array_CreateAsDoubles(cValues, nDimensions, (int*) cDimensions, &h); + if ( ierr != CTA_OK ) { + cta_jni_exception(env, "CtaArray", "Could create new Array", ierr); + } + + // Set the native handle in the java class + cta_jni_setCtaHandle(env, obj_this, (CTA_Handle) h); + + // Release the data of the arrays + env->ReleaseDoubleArrayElements(jValues, cValues,0); + env->ReleaseIntArrayElements(jDimensions, cDimensions,0); +} + +/* + * Class: org_openda_costa_CtaArray + * Method: getNumberOfDimensions + * Signature: ()I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaArray_getNumberOfDimensions + (JNIEnv *env, jobject obj_this){ + + cta_jni_setJavaEnv(env); + CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this); + return (jint) CTA_Array_getNumberOfDimensions(h); +} + +/* + * Class: org_openda_costa_CtaArray + * Method: getDimensions + * Signature: ()[I + */ +JNIEXPORT jintArray JNICALL Java_org_openda_costa_CtaArray_getDimensions + (JNIEnv *env, jobject obj_this){ + + cta_jni_setJavaEnv(env); + CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this); + + int nDimensions=CTA_Array_getNumberOfDimensions(h); + jintArray jDimensions = env->NewIntArray(nDimensions); + jint * cDimensions = new jint[nDimensions]; + + int ierr = CTA_Array_getDimensions(h, (int*) cDimensions); + if (ierr == CTA_OK){ + env->SetIntArrayRegion(jDimensions, 0, nDimensions, cDimensions); + } + else { + cta_jni_exception(env, "CtaArray", "could not call getDimensions", ierr); + } + delete [] cDimensions; + return jDimensions; +} + +/* + * Class: org_openda_costa_CtaArray + * Method: length + * Signature: ()I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaArray_length + (JNIEnv *env, jobject obj_this){ + + cta_jni_setJavaEnv(env); + CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this); + + return (jint) CTA_Array_length(h); + +} + +/* + * Class: org_openda_costa_CtaArray + * Method: getValuesAsDoubles + * Signature: ()[D + */ +JNIEXPORT jdoubleArray JNICALL Java_org_openda_costa_CtaArray_getValuesAsDoubles__ + (JNIEnv *env, jobject obj_this){ + + cta_jni_setJavaEnv(env); + CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this); + int n = CTA_Array_length(h); + jdoubleArray jValues = env->NewDoubleArray(n); + double *cValues = new double[n]; + + int ierr = CTA_Array_getValuesAsDoubles(h, cValues); + if (ierr == CTA_OK){ + env->SetDoubleArrayRegion(jValues, 0, n, cValues); + } + else { + cta_jni_exception(env, "CtaArray", "could not call getValuesAsDoubles", ierr); + } + delete [] cValues; + return jValues; +} + +/* + * Class: org_openda_costa_CtaArray + * Method: getValuesAsDoubles + * Signature: (Z)[D + */ +JNIEXPORT jdoubleArray JNICALL Java_org_openda_costa_CtaArray_getValuesAsDoubles__Z + (JNIEnv *env, jobject obj_this, jboolean){ + + //Note since this is a JNI binding we ignore the boolean + + cta_jni_setJavaEnv(env); + CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this); + int n = CTA_Array_length(h); + jdoubleArray jValues = env->NewDoubleArray(n); + double *cValues = new double[n]; + + int ierr = CTA_Array_getValuesAsDoubles(h, cValues); + if (ierr == CTA_OK){ + env->SetDoubleArrayRegion(jValues, 0, n, cValues); + } + else { + cta_jni_exception(env, "CtaArray", "could not call getValuesAsDoubles", ierr); + } + delete [] cValues; + return jValues; +} + +/* + * Class: org_openda_costa_CtaArray + * Method: getValuesAsDoubles + * Signature: (II)[D + */ +JNIEXPORT jdoubleArray JNICALL Java_org_openda_costa_CtaArray_getValuesAsDoubles__II + (JNIEnv *env, jobject obj_this, jint firstIndex, jint lastIndex){ + + cta_jni_setJavaEnv(env); + CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this); + int n = lastIndex-firstIndex+1; + jdoubleArray jValues = env->NewDoubleArray(n); + double *cValues = new double[n]; + + int ierr = CTA_Array_getValuesAsDoubles_indexrange(h, firstIndex, lastIndex, cValues); + if (ierr == CTA_OK){ + env->SetDoubleArrayRegion(jValues, 0, n, cValues); + } + else { + cta_jni_exception(env, "CtaArray", "could not call getValuesAsDoubles_indexrange", ierr); + } + delete [] cValues; + return jValues; + + +} + +/* + * Class: org_openda_costa_CtaArray + * Method: getValueAsDouble + * Signature: (I)D + */ +JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaArray_getValueAsDouble__I + (JNIEnv *env, jobject obj_this, jint index){ + + cta_jni_setJavaEnv(env); + CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this); + jdouble cValue=-99.0; + + int ierr = CTA_Array_getValueAsDoubles_index(h, index, &cValue); + if (ierr!=CTA_OK){ + cta_jni_exception(env, "CtaArray", "could not call getValuesAsDoubles_indexrange", ierr); + } + return cValue; +} + +/* + * Class: org_openda_costa_CtaArray + * Method: getValueAsDouble + * Signature: ([I)D + */ +JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaArray_getValueAsDouble___3I + (JNIEnv *env, jobject obj_this, jintArray jIndices){ + + cta_jni_setJavaEnv(env); + CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this); + + jboolean isCopy = JNI_FALSE; + jint * cIndices = env->GetIntArrayElements(jIndices, &isCopy); + int nIndices = env->GetArrayLength(jIndices); + + jdouble cValue; + int ierr = CTA_Array_getValueAsDouble_indices(h, nIndices, (int *) cIndices, &cValue); + if (ierr != CTA_OK){ + cta_jni_exception(env, "CtaArray", "could not call CTA_Array_getValueAsDouble_indices", ierr); + } + env->ReleaseIntArrayElements(jIndices, cIndices,0); + + return cValue; +} + +/* + * Class: org_openda_costa_CtaArray + * Method: setConstant + * Signature: (D)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_setConstant + (JNIEnv *env, jobject obj_this, jdouble jValue){ + + cta_jni_setJavaEnv(env); + CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this); + int ierr = CTA_Array_setConstant(h, jValue); + if (ierr != CTA_OK){ + cta_jni_exception(env, "CtaArray", "could not call CTA_Array_setConstant", ierr); + } +} + +/* + * Class: org_openda_costa_CtaArray + * Method: setValuesAsDoubles + * Signature: ([D)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_setValuesAsDoubles___3D + (JNIEnv *env, jobject obj_this, jdoubleArray jValues){ + + cta_jni_setJavaEnv(env); + CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this); + + jboolean isCopy = JNI_FALSE; + jdouble * cValues = env->GetDoubleArrayElements(jValues, &isCopy); + int length = env->GetArrayLength(jValues); + + int ierr = CTA_Array_setValuesAsDoubles(h, cValues, length); + if ( ierr != CTA_OK ) { + cta_jni_exception(env, "CtaArray", "Could call CTA_Array_setValuesAsDoubles", ierr); + } + env->ReleaseDoubleArrayElements(jValues, cValues,0); +} + +/* + * Class: org_openda_costa_CtaArray + * Method: setValuesAsDoubles + * Signature: (II[D)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_setValuesAsDoubles__II_3D + (JNIEnv *env, jobject obj_this, jint firstIndex, jint lastIndex, jdoubleArray jValues){ + + cta_jni_setJavaEnv(env); + CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this); + + jboolean isCopy = JNI_FALSE; + jdouble * cValues = env->GetDoubleArrayElements(jValues, &isCopy); + + int ierr = CTA_Array_setValuesAsDoubles_indexrange(h, firstIndex, lastIndex, cValues); + if ( ierr != CTA_OK ) { + cta_jni_exception(env, "CtaArray", "Could call CTA_Array_setValuesAsDoubles_indexrange", ierr); + } + env->ReleaseDoubleArrayElements(jValues, cValues,0); +} + +/* + * Class: org_openda_costa_CtaArray + * Method: setValueAsDouble + * Signature: (ID)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_setValueAsDouble__ID + (JNIEnv *env, jobject obj_this, jint jIndex, jdouble jValue){ + + cta_jni_setJavaEnv(env); + CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this); + + int ierr = CTA_Array_setValueAsDouble_index(h, jIndex, jValue); + if ( ierr != CTA_OK ) { + cta_jni_exception(env, "CtaArray", "Could call CTA_Array_setValueAsDouble", ierr); + } +} + +/* + * Class: org_openda_costa_CtaArray + * Method: setValueAsDouble + * Signature: ([ID)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_setValueAsDouble___3ID + (JNIEnv *env, jobject obj_this, jintArray jIndices, jdouble jValue){ + + cta_jni_setJavaEnv(env); + CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this); + + jboolean isCopy = JNI_FALSE; + jint * cIndices = env->GetIntArrayElements(jIndices, &isCopy); + int nIndices = env->GetArrayLength(jIndices); + + + int ierr = CTA_Array_setValueAsDouble_indices(h, nIndices, (int *) cIndices, jValue); + if ( ierr != CTA_OK ) { + cta_jni_exception(env, "CtaArray", "Could call setValueAsDouble_indices", ierr); + } + env->ReleaseIntArrayElements(jIndices, cIndices,0); + +} + +/* + * Class: org_openda_costa_CtaArray + * Method: axpyOnValues + * Signature: (D[D)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_axpyOnValues + (JNIEnv *env, jobject obj_this, jdouble jAlpha, jdoubleArray jValues){ + + cta_jni_setJavaEnv(env); + CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this); + + jboolean isCopy = JNI_FALSE; + jdouble * cValues = env->GetDoubleArrayElements(jValues, &isCopy); + + int ierr = CTA_Array_axpyOnValues(h, jAlpha, cValues); + if ( ierr != CTA_OK ) { + cta_jni_exception(env, "CtaArray", "Could call CTA_Array_axpyOnValues", ierr); + } + env->ReleaseDoubleArrayElements(jValues, cValues,0); +} + +/* + * Class: org_openda_costa_CtaArray + * Method: multiplyValues + * Signature: ([D)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_multiplyValues + (JNIEnv *env, jobject obj_this, jdoubleArray jValues){ + + cta_jni_setJavaEnv(env); + CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this); + + jboolean isCopy = JNI_FALSE; + jdouble * cValues = env->GetDoubleArrayElements(jValues, &isCopy); + + int ierr = CTA_Array_multiplyValues(h, cValues); + if ( ierr != CTA_OK ) { + cta_jni_exception(env, "CtaArray", "Could call CTA_Array_axpyOnValues", ierr); + } + env->ReleaseDoubleArrayElements(jValues, cValues,0); +} + +/* + * Class: org_openda_costa_CtaArray + * Method: reshape + * Signature: ([I)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_reshape + (JNIEnv *env, jobject obj_this, jintArray jDimensions){ + + cta_jni_setJavaEnv(env); + CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this); + + jboolean isCopy = JNI_FALSE; + int nDimensions = env->GetArrayLength(jDimensions); + jint * cDimensions = env->GetIntArrayElements(jDimensions, &isCopy); + + int ierr = CTA_Array_reshape(h, nDimensions, (int *) cDimensions); + if ( ierr != CTA_OK ) { + cta_jni_exception(env, "CtaArray", "Could call CTA_Array_reshape", ierr); + } + env->ReleaseIntArrayElements(jDimensions, cDimensions,0); +} + +/* + * Class: org_openda_costa_CtaArray + * Method: getSlice + * Signature: (II)Lorg/openda/interfaces/IArray; + */ +JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaArray_getSlice__II + (JNIEnv *env, jobject obj_this, jint jDimension, jint jIndex){ + + cta_jni_setJavaEnv(env); + CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this); + + CTA_Array h_out; + jobject jArray=NULL; + + int ierr = CTA_Array_getSlice(h, jDimension, jIndex, &h_out); + if ( ierr == CTA_OK ) { + /* Create a Java CtaVector */ + jclass clsArray = env->FindClass("org/openda/costa/CtaArray"); + jmethodID constructorID = env->GetMethodID (clsArray, "", "()V"); + jArray = env->NewObject(clsArray, constructorID); + + cta_jni_setCtaHandle(env, jArray, h_out); + } + else { + cta_jni_exception(env, "CtaArray", "Could call CTA_Array_getSlice", ierr); + } + return jArray; +} + +/* + * Class: org_openda_costa_CtaArray + * Method: getSlice + * Signature: (III)Lorg/openda/interfaces/IArray; + */ +JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaArray_getSlice__III + (JNIEnv *env, jobject obj_this, jint jDimension, jint jMinIndex, jint jMaxIndex){ + + cta_jni_setJavaEnv(env); + CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this); + + CTA_Array h_out; + jobject jArray=NULL; + + int ierr = CTA_Array_getSlice_range(h, jDimension, jMinIndex, jMaxIndex, &h_out); + if ( ierr == CTA_OK ) { + /* Create a Java CtaVector */ + jclass clsArray = env->FindClass("org/openda/costa/CtaArray"); + jmethodID constructorID = env->GetMethodID (clsArray, "", "()V"); + jArray = env->NewObject(clsArray, constructorID); + + cta_jni_setCtaHandle(env, jArray, h_out); + } + else { + cta_jni_exception(env, "CtaArray", "Could call CTA_Array_getSlice_range", ierr); + } + return jArray; +} + +/* + * Class: org_openda_costa_CtaArray + * Method: getSliceAsDoubles + * Signature: (III)[D + */ +JNIEXPORT jdoubleArray JNICALL Java_org_openda_costa_CtaArray_getSliceAsDoubles + (JNIEnv *env, jobject obj_this, jint jDimension, jint jMinIndex, jint jMaxIndex){ + + cta_jni_setJavaEnv(env); + CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this); + + int nDimensions; + CTA_Array_getnDimensions(h, &nDimensions); + int *dimensions = new int[nDimensions]; + CTA_Array_getDimensions(h, dimensions); + + int n=jMaxIndex-jMinIndex+1; + int i; + for (i=0;iNewDoubleArray(n); + double *cValues = new double[n]; + + int ierr = CTA_Array_getSliceAsDoubles_range(h, jDimension, jMinIndex, jMaxIndex, cValues); + if (ierr == CTA_OK){ + env->SetDoubleArrayRegion(jValues, 0, n, cValues); + } + else { + cta_jni_exception(env, "CtaArray", "could not call CTA_Array_getSliceAsDoubles_range", ierr); + } + return jValues; + +} + +/* + * Class: org_openda_costa_CtaArray + * Method: setSlice + * Signature: ([DII)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_setSlice___3DII + (JNIEnv *env, jobject obj_this, jdoubleArray jValues, jint jDimension, jint jIndex){ + + cta_jni_setJavaEnv(env); + CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this); + + jboolean isCopy = JNI_FALSE; + jdouble * cValues = env->GetDoubleArrayElements(jValues, &isCopy); + + int ierr = CTA_Array_setSliceAsDoubles(h, cValues, jDimension, jIndex); + if (ierr != CTA_OK){ + cta_jni_exception(env, "CtaArray", "could not call CTA_Array_setSliceAsDoubles", ierr); + } + env->ReleaseDoubleArrayElements(jValues, cValues,0); +} + +/* + * Class: org_openda_costa_CtaArray + * Method: setSliceBySlice + * Signature: (Lorg/openda/interfaces/IArray;II)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_setSliceBySlice__Lorg_openda_interfaces_IArray_2II + (JNIEnv *env, jobject obj_this, jobject jSlice, jint jDimension, jint jIndex){ + + cta_jni_setJavaEnv(env); + CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this); + CTA_Array h_slice = (CTA_Array) cta_jni_getCtaHandle(env, jSlice); + + int ierr = CTA_Array_setSliceAsArray(h, h_slice, jDimension, jIndex); + if (ierr != CTA_OK){ + cta_jni_exception(env, "CtaArray", "could not call CTA_Array_setSliceAsArray", ierr); + } +} + +/* + * Class: org_openda_costa_CtaArray + * Method: setSliceBySlice + * Signature: (Lorg/openda/interfaces/IArray;III)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_setSliceBySlice__Lorg_openda_interfaces_IArray_2III + (JNIEnv *env, jobject obj_this, jobject jSlice, jint jDimension, jint jMinIndex, jint jMaxIndex){ + + cta_jni_setJavaEnv(env); + CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this); + CTA_Array h_slice = (CTA_Array) cta_jni_getCtaHandle(env, jSlice); + + int ierr = CTA_Array_setSliceAsArray_range(h, h_slice, jDimension, jMinIndex, jMaxIndex); + if (ierr != CTA_OK){ + cta_jni_exception(env, "CtaArray", "could not call CTA_Array_setSliceAsArray_range", ierr); + } +} + +/* + * Class: org_openda_costa_CtaArray + * Method: setSlice + * Signature: ([DIII)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_setSlice___3DIII + (JNIEnv *env, jobject obj_this, jdoubleArray jValues, jint jDimension, jint jMinIndex, jint jMaxIndex){ + + cta_jni_setJavaEnv(env); + CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this); + + jboolean isCopy = JNI_FALSE; + jdouble * cValues = env->GetDoubleArrayElements(jValues, &isCopy); + + int ierr = CTA_Array_setSliceAsDoubles_range(h, cValues, jDimension, jMinIndex, jMaxIndex); + if (ierr != CTA_OK){ + cta_jni_exception(env, "CtaArray", "could not call CTA_Array_setSliceAsDoubles_range", ierr); + } + env->ReleaseDoubleArrayElements(jValues, cValues,0); + + +} + +/* + * Class: org_openda_costa_CtaArray + * Method: valueIndex + * Signature: ([I)I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaArray_valueIndex + (JNIEnv *env, jobject obj_this, jintArray jIndices){ + + cta_jni_setJavaEnv(env); + CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this); + + jboolean isCopy = JNI_FALSE; + jint * cIndices = env->GetIntArrayElements(jIndices, &isCopy); + int nIndices = env->GetArrayLength(jIndices); + jint jIndex; + int ierr = CTA_Array_valueIndex(h, nIndices, (int *) cIndices, (int*) &jIndex); + if (ierr != CTA_OK){ + cta_jni_exception(env, "CtaArray", "could not call CTA_Array_valueIndex", ierr); + } + + env->ReleaseIntArrayElements(jIndices, cIndices,0); + return jIndex; +} + diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaInitialize.cpp b/costa/native/openda/bridge/src/org_openda_costa_CtaInitialize.cpp new file mode 100644 index 000000000..d30f6810e --- /dev/null +++ b/costa/native/openda/bridge/src/org_openda_costa_CtaInitialize.cpp @@ -0,0 +1,85 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/openda_1/public/trunk/core/native/src/openda/bridge/org_openda_costa_CtaObject.cpp $ +$Revision: 2211 $, $Date: 2011-04-01 13:48:25 +0200 (Fri, 01 Apr 2011) $ + +OpenDA interface for COSTA. +Copyright (C) 2013 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +#include "org_openda_costa_CtaInitialize.h" +#include "jni_cta_utils.h" +#include "cta.h" +#include "jni_cta_CtaObsdescr_NativeToJava.h" + + +/* + * Class: org_openda_costa_CtaInitialize + * Method: ctaInit + * Signature: ()V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaInitialize_ctaInit + (JNIEnv *env, jclass){ + + CTA_Func hMessageWriter; + + // Initialize the costa core environment + CTA_Core_Initialise(); + + // Create a COSTA function handle for the JNI-message writer + CTA_Func_Create("CTA_cta_jni_ExternalMessageWriter", (CTA_Function*) &cta_jni_ExternalMessageWriter, + CTA_NULL, &hMessageWriter); + + // Get the Java class of the message writer + jclass clsMessageWriter = env->FindClass("org/openda/costa/CtaMessageWriter"); + + + if (!clsMessageWriter){ + CTA_Message_Write("org_costa_CtaObject", "ctaInit", "Cannot find Java class org/openda/costa/CtaMessageWriter", 'F'); + } + // Get the Method ID of the Java write-function + jmethodID javaIDWriter = env->GetStaticMethodID(clsMessageWriter, "Write", "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V"); + if (!javaIDWriter){ + CTA_Message_Write("org_costa_CtaObject", "ctaInit", "Member method Write in Java class org/openda/costa/CtaMessageWriter", 'F'); + } + + // Save ID's of the message writer + cta_jni_ExternalMessageWriterSetID(env, clsMessageWriter, javaIDWriter); + + // Set the JNI-writer as message writer in COSTA + CTA_Message_SetExternalWriter(hMessageWriter); + + // Use the JNI-writer for informing that initialization is succesfull + CTA_Message_Write("org_costa_CtaObject", "ctaInit", "Java message writer is initialized", 'I'); + + // Administrate the CtaObsdescr_NativeToJava.c class + CTA_ObsDescrClass hobsdescrcl; + CTA_ObsDescr_nativeToJava_initialise(&hobsdescrcl); + +} + +/* + * Class: org_openda_costa_CtaInitialize + * Method: setRandomSeed + * Signature: (I)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaInitialize_setRandomSeed + (JNIEnv *, jclass, jint seed){ + long lseed=seed; + + CTA_rand_seed(lseed); + +} + diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaModelState.cpp b/costa/native/openda/bridge/src/org_openda_costa_CtaModelState.cpp new file mode 100644 index 000000000..f2b2ae41f --- /dev/null +++ b/costa/native/openda/bridge/src/org_openda_costa_CtaModelState.cpp @@ -0,0 +1,63 @@ +/* +$URL: $ +$Revision: $, $Date: $ + +Copyright (c) 2012 OpenDA Association +All rights reserved. + +This file is part of OpenDA. + +OpenDA is free software: you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as +published by the Free Software Foundation, either version 3 of +the License, or (at your option) any later version. + +OpenDA is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with OpenDA. If not, see . +*/ + +#include "org_openda_costa_CtaModelState.h" +#include "cta_model.h" +#include "jni_cta_utils.h" +#include "cta_errors.h" +#include "cta_defaults.h" + +#define IDEBUG (0) +/* + * Class: org_openda_costa_CtaModelState + * Method: nativeSavePersistentState + * Signature: (Ljava/lang/String;Ljava/lang/String;I)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaModelState_nativeSavePersistentState + (JNIEnv *env, jobject jPersisState, jstring jFileName, jstring jID, jint ctaModel){ + + cta_jni_setJavaEnv(env); + if (IDEBUG>0) { + printf("Java_org_openda_costa_CtaModelState_nativeSavePersistentState: Start of function:\n"); + } + + const char *fileName = env->GetStringUTFChars(jFileName, 0); + const char *ID = env->GetStringUTFChars(jID, 0); + CTA_String cFileName; + CTA_String cID; + CTA_String_Create(&cFileName); + CTA_String_Create(&cID); + CTA_String_Set(cFileName, fileName); + CTA_String_Set(cID,ID); + env->ReleaseStringUTFChars(jFileName, fileName); + env->ReleaseStringUTFChars(jID, ID); + + int retVal = CTA_Model_SavePersistentState(ctaModel, cFileName, cID); + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaModelState", "Error calling SavePersistentState ", retVal); + return; + } + if (IDEBUG>0) { + printf("Java_org_openda_costa_CtaModelState_nativeSavePersistentState: End of function:\n"); + } +} diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaObject.cpp b/costa/native/openda/bridge/src/org_openda_costa_CtaObject.cpp new file mode 100644 index 000000000..f18734a57 --- /dev/null +++ b/costa/native/openda/bridge/src/org_openda_costa_CtaObject.cpp @@ -0,0 +1,39 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/openda/bridge/org_openda_costa_CtaObject.cpp $ +$Revision: 4061 $, $Date: 2013-07-15 11:45:09 +0200 (Mon, 15 Jul 2013) $ + +OpenDA interface for COSTA. +Copyright (C) 2007 Stef Hummel / Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + + +#include "org_openda_costa_CtaObject.h" +#include "jni_cta_utils.h" +#include "cta.h" +#include "jni_cta_CtaObsdescr_NativeToJava.h" + +/* + * Class: org_costa_CtaObject + * Method: ctaFree + * Signature: ()V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaObject_ctaFree + (JNIEnv * env, jobject obj_this) +{ + cta_jni_free(env, obj_this); +} + diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaObservationDescriptions.cpp b/costa/native/openda/bridge/src/org_openda_costa_CtaObservationDescriptions.cpp new file mode 100644 index 000000000..dd85acde6 --- /dev/null +++ b/costa/native/openda/bridge/src/org_openda_costa_CtaObservationDescriptions.cpp @@ -0,0 +1,307 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/openda/bridge/org_openda_costa_CtaObservationDescriptions.cpp $ +$Revision: 2557 $, $Date: 2011-07-05 10:39:56 +0200 (Tue, 05 Jul 2011) $ + +OpenDA interface for COSTA. +Copyright (C) 2010 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + + +#include "org_openda_costa_CtaObservationDescriptions.h" +#include "cta.h" +#include "jni_cta_utils.h" +#include "jni_datatypes.h" +#define IDEBUG (0) + +/* + * Class: org_openda_costa_CtaObservationDescriptions + * Method: getPropertyKeys + * Signature: ()[Ljava/lang/String; + */ +JNIEXPORT jobjectArray JNICALL Java_org_openda_costa_CtaObservationDescriptions_getPropertyKeys +(JNIEnv *env, jobject jObsDescr){ + + cta_jni_setJavaEnv(env); + CTA_ObsDescr ctaObsDescr = cta_jni_getCtaHandle(env, jObsDescr); + + int nKeys; + CTA_ObsDescr_Property_Count (ctaObsDescr, &nKeys); + + CTA_Vector vKeys; + CTA_Vector_Create(CTA_DEFAULT_VECTOR, nKeys, CTA_STRING, CTA_NULL, &vKeys); + int retVal=CTA_ObsDescr_Get_PropertyKeys(ctaObsDescr, vKeys); + if (retVal!=CTA_OK){ + cta_jni_exception(env, "CtaObservationDescriptions", "Could not get Keys", retVal); + return NULL; + } + + + /* Create and fill a Java array of strings */ + jclass clsString = env->FindClass("java/lang/String"); + jobjectArray results = env->NewObjectArray((long) nKeys, clsString, NULL); + + CTA_String sKey; + CTA_String_Create(&sKey); + for (int i=0; iNewStringUTF(CTAI_String_GetPtr(sKey)); + env->SetObjectArrayElement(results, i, str); + } + + CTA_String_Free(&sKey); + CTA_Vector_Free(&vKeys); + return results; +} + + +/* + * Class: org_openda_costa_CtaObservationDescriptions + * Method: ctaCreateSelection + * Signature: (Ljava/lang/String;)I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaObservationDescriptions_ctaCreateSelection +(JNIEnv *env, jobject jObsDescr, jstring jSelect){ + + CTA_String select; + + printf("Debug we maken een selectie \n"); + cta_jni_setJavaEnv(env); + /* create COSTA string with selection */ + CTA_String_Create(&select); + const char *sel = env->GetStringUTFChars(jSelect, 0); + printf("Selectie is %s\n",sel); + CTA_String_Set(select,sel); + env->ReleaseStringUTFChars(jSelect, sel); + + CTA_ObsDescr descrOut; + CTA_ObsDescr ctaObsDescr = cta_jni_getCtaHandle(env, jObsDescr); + + printf("De obs descr waar we het mee doen is %d\n",ctaObsDescr); + int retVal = CTA_SObs_CreateSel (ctaObsDescr, select, &descrOut); + if (retVal!=CTA_OK){ + cta_jni_exception(env, "CtaObservationDescriptions", "Could not create selection", retVal); + return 0; + } + CTA_String_Free(&select); + + return descrOut; +} + + +/* + * Class: org_openda_costa_CtaObservationDescriptions + * Method: ctaCreateTimeSelection + * Signature: (I)I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaObservationDescriptions_ctaCreateTimeSelection +(JNIEnv *env, jobject jObsDescr, jint timesel){ + + CTA_ObsDescr descrOut; + CTA_ObsDescr ctaObsDescr; + + cta_jni_setJavaEnv(env); + + ctaObsDescr = cta_jni_getCtaHandle(env, jObsDescr); + int retVal = CTA_SObs_CreateTimSel (ctaObsDescr, timesel, &descrOut); + if (retVal!=CTA_OK){ + cta_jni_exception(env, "CtaStochObserver", "Could not create selection", retVal); + return 0; + } + + return descrOut; +} + + +/* + * Class: org_costa_CtaObservationDescriptions + * Method: getObservationCount + * Signature: ()I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaObservationDescriptions_getObservationCount +(JNIEnv *env, jobject jObsDescr){ + + CTA_ObsDescr ctaObsDescr; + int nobs; + int retVal; + + cta_jni_setJavaEnv(env); + ctaObsDescr = cta_jni_getCtaHandle(env, jObsDescr); + retVal = CTA_ObsDescr_Observation_Count(ctaObsDescr, &nobs); + if (retVal!=CTA_OK){ + cta_jni_exception(env, "CtaStochObserver", "Could not get observation count", retVal); + return 0; + } + return nobs; +} + + + +/* + * Class: org_costa_CtaObservationDescriptions + * Method: getPropertyCount + * Signature: ()I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaObservationDescriptions_getPropertyCount +(JNIEnv *env, jobject jObsDescr){ + CTA_ObsDescr ctaObsDescr; + int nprop; + int retVal; + + cta_jni_setJavaEnv(env); + ctaObsDescr = cta_jni_getCtaHandle(env, jObsDescr); + + retVal = CTA_ObsDescr_Property_Count(ctaObsDescr, &nprop); + if (retVal!=CTA_OK){ + cta_jni_exception(env, "CtaStochObserver", "Could not get property count", retVal); + return 0; + } + return nprop; +} + + +/* + * Class: org_costa_CtaObservationDescriptions + * Method: getStringProperties + * Signature: (Ljava/lang/String;)[Ljava/lang/String; + */ +JNIEXPORT jobjectArray JNICALL Java_org_openda_costa_CtaObservationDescriptions_getStringProperties +(JNIEnv *env, jobject jObsDescr, jstring jKey){ + + CTA_ObsDescr ctaObsDescr; + int nMeasr; + + cta_jni_setJavaEnv(env); + + ctaObsDescr = cta_jni_getCtaHandle(env, jObsDescr); + CTA_ObsDescr_Observation_Count (ctaObsDescr, &nMeasr); + + CTA_Vector vProp; + CTA_Vector_Create(CTA_DEFAULT_VECTOR, nMeasr, CTA_STRING, CTA_NULL, &vProp); + + + const char *key=env->GetStringUTFChars(jKey, 0); + int retVal = CTA_ObsDescr_Get_ValueProperties(ctaObsDescr, key, vProp, CTA_STRING); + env->ReleaseStringUTFChars(jKey, key); + if (retVal!=CTA_OK){ + printf("hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh\n"); + cta_jni_exception(env, "CtaObservationDescriptions", "Could not get String Properties", retVal); + return NULL; + } + + + /* Create and fill a Java array of strings */ + jclass clsString = env->FindClass("java/lang/String"); + jobjectArray results = env->NewObjectArray((long) nMeasr, clsString, NULL); + + CTA_String sProp; + CTA_String_Create(&sProp); + for (int i=0; iNewStringUTF(CTAI_String_GetPtr(sProp)); + env->SetObjectArrayElement(results, i, str); + } + + CTA_String_Free(&sProp); + CTA_Vector_Free(&vProp); + return results; +} + + + +/* + * Class: org_openda_costa_CtaObservationDescriptions + * Method: ctaCreateNativeToJavaObserver + * Signature: (Lorg/openda/interfaces/IObservationDescriptions;)I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaObservationDescriptions_ctaCreateNativeToJavaObserver +(JNIEnv *env, jobject jObsDescr, jobject jJavaObsDescr){ + + cta_jni_setJavaEnv(env); + + if (IDEBUG>0) {printf("Debug: Creating observation descriptions \n");} + // Get the class of the Java implementation + jclass cls = env->GetObjectClass(jJavaObsDescr); + + // Find the COSTA class + CTA_ObsDescrClass hobsdescrcl; + CTA_String hClass; + CTA_String_Create(&hClass); + CTA_String_Set(hClass,"cta_obsdescr_nativeToJava"); + CTA_Handle_Find(hClass, CTA_OBSDESCRCLASS, &hobsdescrcl); + + // Create this new class + CTA_ObsDescr hobsdscr; + + // create the user input argument + sJni_Class jniClass; + jniClass.env = env; + jniClass.cls = cls; + jniClass.obj = jJavaObsDescr; + + // Create the actual class + CTA_Handle hJniClass; + CTA_Handle_Create("Java class", CTA_DATABLOCK,&jniClass, &hJniClass); + + CTA_ObsDescr_Create( hobsdescrcl, hJniClass, &hobsdscr); + if (IDEBUG>0) {printf("Debug: Handle of created observation descriptions is %d\n",hobsdscr);} + + + + //Free work objects / mem + CTA_Handle_Free(&hJniClass); + CTA_String_Free(&hClass); + + return hobsdscr; + +} + + + +/* + * Class: org_costa_CtaObservationDescriptions + * Method: ctaGetValueProperties + * Signature: (Ljava/lang/String;I)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaObservationDescriptions_ctaGetValueProperties +(JNIEnv *env, jobject jObsDescr, jstring jKey, jint vProp){ + + cta_jni_setJavaEnv(env); + + CTA_ObsDescr ctaObsDescr = cta_jni_getCtaHandle(env, jObsDescr); + printf("De handle van de obsdescr is %d\n",ctaObsDescr); + + const char *key=env->GetStringUTFChars(jKey, 0); + printf("De key is %s\n",key); + int retVal = CTA_ObsDescr_Get_ValueProperties(ctaObsDescr, key, vProp, CTA_DOUBLE); + env->ReleaseStringUTFChars(jKey, key); + if (retVal!=CTA_OK){ + printf("HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH\n"); + cta_jni_exception(env, "CtaObservationDescriptions", "Could not get String Properties", retVal); + return; + } +} + + + + + + + + + + + diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaOpenDaModel.cpp b/costa/native/openda/bridge/src/org_openda_costa_CtaOpenDaModel.cpp new file mode 100644 index 000000000..966353091 --- /dev/null +++ b/costa/native/openda/bridge/src/org_openda_costa_CtaOpenDaModel.cpp @@ -0,0 +1,798 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/openda/bridge/org_openda_costa_CtaOpenDaModel.cpp $ +$Revision: 3319 $, $Date: 2012-05-30 10:12:43 +0200 (Wed, 30 May 2012) $ + +OpenDA interface for COSTA. +Copyright (C) 2007 Stef Hummel / Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "org_openda_costa_CtaOpenDaModel.h" +#include "org_openda_costa_CtaTreeVector.h" +#include "cta_model.h" +#include "cta_model_factory.h" +#include "jni_cta_utils.h" +#include "cta_errors.h" +#include "cta_defaults.h" +#include "cta_mem.h" +#include "string.h" + +#define IDEBUG (0) + +/* Store a list of all classes that are created */ +static int CTAI_nClasses = 0; +static CTA_ModelClass *CTAI_Created_Classes = NULL; +static char **CTAI_ClassFile = NULL; +static char **CTAI_ClassConfigFile = NULL; + + +/*Private functions */ + +jintArray Java_org_openda_costa_CtaOpenDaModel_ctaGetObservedLocalization + (JNIEnv *env, jobject jModel, jobject jObsDescr, jdouble distance, jint iDomain){ + + int nObs; + CTA_Vector locVecs; + + cta_jni_setJavaEnv(env); + CTA_Model ctaModel = cta_jni_getCtaHandle(env, jModel); + CTA_ObsDescr ctaObsDescr = cta_jni_getCtaHandle(env, jObsDescr); + + /* Get number of observations */ + CTA_ObsDescr_Observation_Count(ctaObsDescr, &nObs); + + /* Create array */ + jintArray jValues = env->NewIntArray(nObs); + + /* Create COSTA vector */ + CTA_Vector_Create(CTA_DEFAULT_VECTOR, nObs, CTA_HANDLE, CTA_NULL, &locVecs); + + /* Set all indices NULL */ + CTA_Handle nullHandle=CTA_NULL; + CTA_Vector_SetConstant(locVecs, &nullHandle, CTA_HANDLE); + + if (iDomain >=0){ + /* Get localization from model */ + CTA_Model_GetObsLocalizationDomain(ctaModel, ctaObsDescr, distance, iDomain, locVecs); + } + else { + CTA_Model_GetObsLocalization(ctaModel, ctaObsDescr, distance, locVecs); + } + + /* Copy costa handles into Java integer array */ + int *cValues = new int[nObs]; + CTA_Vector_GetVals(locVecs, (void *) cValues, nObs, CTA_HANDLE); + env->SetIntArrayRegion(jValues, 0, nObs, (jint *) cValues); + + /* Set all handles to CTA_NULL in order to avoid deallocation */ + CTA_Vector_SetConstant(locVecs, &nullHandle, CTA_HANDLE); + + /* Free work variables */ + delete [] cValues; + CTA_Vector_Free(&locVecs); + + return jValues; +} + + + + + + +/* + * Class: org_costa_CtaOpenDaModel + * Method: CtaOpenDaModelCreate + * Signature: (Ljava/lang/String;)I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaCreate +(JNIEnv *env, jobject, jstring jModelCls, jstring jConfigFile){ + + cta_jni_setJavaEnv(env); + + /* Translate jModelCls and jConfigFile to C-strings */ + const char *modelClassConfigFile = env->GetStringUTFChars(jModelCls, 0); + const char *ConfigFile = env->GetStringUTFChars(jConfigFile, 0); + + if (IDEBUG>0) { + printf("Java_org_openda_costa_CtaOpenDaModel_ctaCreate: Start of function:\n"); + printf("modelClassConfigFile =%s\n",modelClassConfigFile); + printf("ConfigFile =%s\n",ConfigFile); + } + + /* Get the COSTA modelClass object */ + /* Figure out whether this class already exists*/ + CTA_ModelClass modelClass; + bool newClass=true; + for (int iClass=0; iClassReleaseStringUTFChars(jModelCls, modelClassConfigFile ); + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaOpenDaModel", "Could create model class", retVal); + return 0; + } + + if (IDEBUG>0) { + printf("Java_org_openda_costa_CtaOpenDaModel_ctaCreate: handle of modelClass=%d\n",modelClass); + } + } + + /* Create a model instance */ + + /* -Create costa string with name of the model configuration */ + CTA_String ctaConfigFile; + CTA_String_Create(&ctaConfigFile); + CTA_String_Set(ctaConfigFile, ConfigFile); + env->ReleaseStringUTFChars(jConfigFile, ConfigFile); + + /* -Create the model */ + CTA_Model ctaModel; + if (IDEBUG>0) printf("Java_org_openda_costa_CtaOpenDaModel_ctaCreate: before model_create "); + int retVal = CTA_Model_Create(modelClass,ctaConfigFile,&ctaModel); + if (IDEBUG>0) printf("Java_org_openda_costa_CtaOpenDaModel_ctaCreate: after model_create "); + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaOpenDaModel", "Could NOT perform CTA_Model_Create", retVal); + return 0; + } + + if (IDEBUG>0) { + printf("Java_org_openda_costa_CtaOpenDaModel_ctaCreate: Handle of model is %d\n",ctaModel); + printf("Java_org_openda_costa_CtaOpenDaModel_ctaCreate: End of function:\n"); + } + return ctaModel; +} + +/* + * Class: org_costa_CtaOpenDaModel + * Method: CtaModelAnnounceObservedValues + * Signature: (Lorg/openda/interfaces/IObservationDescriptions;)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaAnnounceObservedValues +(JNIEnv *env, jobject jModel, jobject jObsDescr){ + + cta_jni_setJavaEnv(env); + CTA_Model ctaModel = cta_jni_getCtaHandle(env, jModel); + CTA_ObsDescr ctaObsDescr = cta_jni_getCtaHandle(env, jObsDescr); + + int retVal=CTA_Model_AnnounceObsValues(ctaModel, ctaObsDescr); + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaOpenDaModel", "Could NOT perform CTA_Model_AnnounceObsValues", retVal); + return; + } +} + +/* + * Class: org_costa_CtaOpenDaModel + * Method: CtaModelAxpyOnParameters + * Signature: (DLorg/openda/interfaces/IVector;)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaAxpyOnParameters +(JNIEnv *env, jobject jModel, jdouble alpha, jobject jX){ + + cta_jni_setJavaEnv(env); + CTA_Model ctaModel = cta_jni_getCtaHandle(env, jModel); + CTA_TreeVector ctaX = cta_jni_getCtaHandle(env, jX); + + int retVal=CTA_Model_AxpyParam(ctaModel, alpha, ctaX); + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaOpenDaModel", "Could perform CTA_Model_AxpyParam", retVal); + return; + } +} + + +/* + * Class: org_costa_CtaOpenDaModel + * Method: CtaModelCompute + * Signature: (Lorg/openda/interfaces/ITime;)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaCompute +(JNIEnv *env, jobject jModel, jobject jTime){ + + double tstart, tstop; + + cta_jni_setJavaEnv(env); + + if (IDEBUG>0) { + printf("Java_org_openda_costa_CtaOpenDaModel_ctaCompute: Start of function:\n"); + } + + CTA_Model ctaModel = cta_jni_getCtaHandle(env, jModel); + CTA_Time ctaTime = cta_jni_getCtaHandle(env, jTime); + + + if (IDEBUG>0) { + printf("Java_org_openda_costa_CtaOpenDaModel_ctaCompute: model handle =%d:\n",ctaModel); + printf("Java_org_openda_costa_CtaOpenDaModel_ctaCompute: time handle =%d:\n",ctaTime); + tstart=-9999.0; + tstop=-9999.0; + CTA_Time_GetSpan(ctaTime,&tstart,&tstop); + printf("Java_org_openda_costa_CtaOpenDaModel_ctaCompute: from =%g %g :\n",tstart,tstop); + } + + + int retVal=CTA_Model_Compute(ctaModel, ctaTime); + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaOpenDaModel", "Could perform CTA_Model_Compute", retVal); + return; + } + + if (IDEBUG>0) { + printf("Java_org_openda_costa_CtaOpenDaModel_ctaCompute: End of function:\n"); + } + +} + + + + +/* + * Class: org_costa_CtaOpenDaModel + * Method: CtaModelgetObservedValues + * Signature: (Lorg/openda/interfaces/IObservationDescriptions;Lorg/costa/CtaVector;)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaGetObservedValues +(JNIEnv *env, jobject jModel, jobject jObsDescr, jobject jValues){ + + cta_jni_setJavaEnv(env); + CTA_Model ctaModel = cta_jni_getCtaHandle(env, jModel); + CTA_ObsDescr ctaObsDescr = cta_jni_getCtaHandle(env, jObsDescr); + CTA_Vector ctaValues = cta_jni_getCtaHandle(env, jValues); + CTA_Time time; + + // Create an empty time interval + CTA_Time_Create(&time); + CTA_Time_SetSpan(time, 0.0, -1.0); + + int retVal=CTA_Model_GetObsValues (ctaModel, time, ctaObsDescr, ctaValues); + + CTA_Time_Free(&time); + + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaOpenDaModel", "Could not perform CTA_Model_GetObsValues", retVal); + return; + } +} + +/* + * Class: org_costa_CtaOpenDaModel + * Method: ctaGetParameters + * Signature: ()Lorg/openda/interfaces/IVector; + */ +JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaGetParameters +(JNIEnv *env, jobject jModel){ + + cta_jni_setJavaEnv(env); + + CTA_Model ctaModel = cta_jni_getCtaHandle(env, jModel); + CTA_TreeVector ctaParams =CTA_NULL; + + int retVal=CTA_Model_GetParam (ctaModel, &ctaParams); + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaOpenDaModel", "Could perform CTA_Model_GetParam", retVal); + return NULL; + } + + /* Create a Java CtaTreeVector */ + jclass clsTreeVector = env->FindClass("org/openda/costa/CtaTreeVector"); + jmethodID constructorID = env->GetMethodID (clsTreeVector, "", "()V"); + jobject jState = env->NewObject(clsTreeVector, constructorID); + + cta_jni_setCtaHandle(env, jState, ctaParams); + + return jState; +} + +/* + * Class: org_costa_CtaOpenDaModel + * Method: ctaGetState + * Signature: ()Lorg/openda/interfaces/IVector; + */ +JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaGetState +(JNIEnv *env, jobject jModel){ + + cta_jni_setJavaEnv(env); + CTA_Model ctaModel = cta_jni_getCtaHandle(env, jModel); + CTA_TreeVector ctaState =CTA_NULL; + int retVal=CTA_Model_GetState (ctaModel, &ctaState); + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaOpenDaModel", "Could perform CTA_Model_GetState", retVal); + return NULL; + } + + /* Create a Java CtaTreeVector */ + jclass clsTreeVector = env->FindClass("org/openda/costa/CtaTreeVector"); + jmethodID constructorID = env->GetMethodID (clsTreeVector, "", "()V"); + jobject jState = env->NewObject(clsTreeVector, constructorID); + + cta_jni_setCtaHandle(env, jState, ctaState); + + return jState; +} + +/* + * Class: org_costa_CtaOpenDaModel + * Method: ctaGetStateScaling + * Signature: ()Lorg/openda/interfaces/IVector; + */ +JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaGetStateScaling +(JNIEnv *env, jobject jModel){ + + cta_jni_setJavaEnv(env); + CTA_Model ctaModel = cta_jni_getCtaHandle(env, jModel); + CTA_TreeVector ctaScal =CTA_NULL; + + int retVal=CTA_Model_GetStateScaling (ctaModel, &ctaScal); + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaOpenDaModel", "Could perform CTA_Model_GetStateScaling", retVal); + return NULL; + } + + /* Create a Java CtaTreeVector */ + jclass clsTreeVector = env->FindClass("org/openda/costa/CtaTreeVector"); + jmethodID constructorID = env->GetMethodID (clsTreeVector, "", "()V"); + jobject jState = env->NewObject(clsTreeVector, constructorID); + + cta_jni_setCtaHandle(env, jState, ctaScal); + + return jState; +} + + +/* + * Class: org_openda_costa_CtaOpenDaModel + * Method: ctaGetObservedLocalization + * Signature: (Lorg/openda/interfaces/IObservationDescriptions;D)[I + */ +JNIEXPORT jintArray JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaGetObservedLocalization + (JNIEnv *env, jobject jModel, jobject jObsDescr, jdouble distance){ + + return Java_org_openda_costa_CtaOpenDaModel_ctaGetObservedLocalization + (env, jModel, jObsDescr, distance, -1); +} + +/* + * Class: org_openda_costa_CtaOpenDaModel + * Method: ctaReleaseInternalState + * Signature: (Ljava/lang/String;)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaReleaseInternalState + (JNIEnv *env, jobject jModel, jstring jID){ + // Release resources used to save a state at some earlier time. + + cta_jni_setJavaEnv(env); + CTA_Model ctaModel = cta_jni_getCtaHandle(env, jModel); + if (IDEBUG>0) { + printf("Java_org_openda_costa_CtaOpenDaModel_ctaReleaseInternalState: Start of function:\n"); + } + + // create costa string with ID of state to be released + const char *ID = env->GetStringUTFChars(jID, 0); + CTA_String cID; + CTA_String_Create(&cID); + CTA_String_Set(cID,ID); + env->ReleaseStringUTFChars(jID, ID); + + int retVal = CTA_Model_ReleaseInternalState(ctaModel,cID); + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaOpenDaModel", "Could perform CTA_Model_ReleaseInternalState", retVal); + return; + } + if (IDEBUG>0) { + printf("Java_org_openda_costa_CtaOpenDaModel_ctaReleaseInternalState: End of function:\n"); + } +} + +/* + * Class: org_openda_costa_CtaOpenDaModel + * Method: ctaSaveInternalState + * Signature: ()Lorg/openda/interfaces/IModelState; + */ +JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaSaveInternalState + (JNIEnv *env, jobject jModel){ + + cta_jni_setJavaEnv(env); + if (IDEBUG>0) { + printf("Java_org_openda_costa_CtaOpenDaModel_ctaSaveInternalState: Start of function:\n"); + } + + CTA_Model ctaModel = cta_jni_getCtaHandle(env, jModel); + /* create costa string */ + CTA_String cID; + CTA_String_Create(&cID); + /* save the state and get the ID of it back */ + int retVal = CTA_Model_SaveInternalState(ctaModel,&cID); + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaOpenDaModel", "Could perform CTA_Model_SaveInternalState", retVal); + return NULL; + } + + /* put the returned costa string into a java string */ + char *ID = CTAI_String_GetPtr(cID); + jstring jID =(env)->NewStringUTF(ID); + if (IDEBUG>0) { + printf("# returned instanceID bridge = '%s'\n",ID); + } + + /* create the ctaModelState object */ + jclass clsModelState = env->FindClass("org/openda/costa/CtaModelState"); + jmethodID constructorID = env->GetMethodID (clsModelState, "", "(Ljava/lang/String;I)V"); + jobject jModelState = env->NewObject(clsModelState, constructorID, jID, (jint) ctaModel ); + if (IDEBUG>0) { + printf("Java_org_openda_costa_CtaOpenDaModel_ctaSaveInternalState: End of function:\n"); + } + CTA_String_Free(&cID); + return jModelState; +} + +/* + * Class: org_openda_costa_CtaOpenDaModel + * Method: ctaLoadPersistentState + * Signature: (Ljava/lang/String;)Lorg/openda/interfaces/IModelState; + */ +JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaLoadPersistentState + (JNIEnv *env, jobject jModel, jstring jFileName){ + + cta_jni_setJavaEnv(env); + if (IDEBUG>0) { + printf("Java_org_openda_costa_CtaOpenDaModel_ctaLoadPersistentState: Start of function:\n"); + } + + CTA_Model ctaModel = cta_jni_getCtaHandle(env, jModel); + /* create costa strings*/ + CTA_String cFileName; + CTA_String cID; + const char *fileName = env->GetStringUTFChars(jFileName, 0); + + CTA_String_Create(&cFileName); + CTA_String_Create(&cID); + CTA_String_Set(cFileName, fileName); + env->ReleaseStringUTFChars(jFileName, fileName); + + /* load the state from file, get ID of loaded state back*/ + CTA_Model_LoadPersistentState(ctaModel, cFileName, &cID); + + /* put the returned costa string into a java string */ + char *ID = CTAI_String_GetPtr(cID); + jstring jID =(env)->NewStringUTF(ID); + + /* create the ctaModelState object */ + jclass clsModelState = env->FindClass("org/openda/costa/CtaModelState"); + jmethodID constructorID = env->GetMethodID (clsModelState, "", "(Ljava/lang/String;I)V"); + jobject jModelState = env->NewObject(clsModelState, constructorID, jID, (jint) ctaModel ); + + CTA_String_Free(&cID); + return jModelState; +} + +/* + * Class: org_costa_CtaOpenDaModel + * Method: setAutomaticNoiseGeneration + * Signature: (Z)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaOpenDaModel_setAutomaticNoiseGeneration +(JNIEnv *env, jobject jModel, jboolean add_noise){ + + cta_jni_setJavaEnv(env); + CTA_Model ctaModel = cta_jni_getCtaHandle(env, jModel); + CTA_Time timespan; + double t1,t2; + CTA_Time_Create(×pan); + if (add_noise){ + t1=-999e99; + t2= 999e99; + } + else { + t1=999e99; + t2=-999e99; + } + + CTA_Time_SetSpan(timespan,t1,t2); + + + int retVal=CTA_Model_AddNoise (ctaModel, timespan); + CTA_Time_Free(×pan); + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaOpenDaModel", "Could perform CTA_Model_AddNoise", retVal); + return; + } +} + +/* + * Class: org_openda_costa_CtaOpenDaModel + * Method: ctaRestoreInternalState + * Signature: (Ljava/lang/String;)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaRestoreInternalState + (JNIEnv *env, jobject jModel, jstring jID){ + //Restore a previously saved state of the model + int retVal; + + cta_jni_setJavaEnv(env); + if (IDEBUG>0) { + printf("Java_org_openda_costa_CtaOpenDaModel_ctaRestoreInternalState: Start of function:\n"); + } + CTA_Model ctaModel = cta_jni_getCtaHandle(env, jModel); + const char*ID = env->GetStringUTFChars(jID, 0); + CTA_String cID; + CTA_String_Create(&cID); + CTA_String_Set(cID,ID); + + retVal = CTA_Model_RestoreInternalState(ctaModel,cID); + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaOpenDaModel", "Could perform CTA_Model_RestoreInternalState", retVal); + return; + } +} + +/* + * Class: org_costa_CtaOpenDaModel + * Method: CtaModelSetParameters + * Signature: (Lorg/openda/interfaces/IVector;)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaSetParameters +(JNIEnv *env, jobject jModel, jobject jParams){ + + cta_jni_setJavaEnv(env); + CTA_Model ctaModel = cta_jni_getCtaHandle(env, jModel); + CTA_TreeVector ctaParams = cta_jni_getCtaHandle(env, jParams); + int retVal=CTA_Model_SetParam (ctaModel, ctaParams); + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaOpenDaModel", "Could perform CTA_Model_AddNoise", retVal); + return; + } +} + +/* + * Class: org_costa_CtaOpenDaModel + * Method: getTimeHorizon + * Signature: ()Lorg/openda/interfaces/ITime; + */ +JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaOpenDaModel_getTimeHorizon + (JNIEnv *env, jobject jModel){ + + CTA_Time timeHorizon; + + cta_jni_setJavaEnv(env); + + /* Create a new time instance */ + CTA_Time_Create(&timeHorizon); + + /* Get the handle of the model */ + CTA_Model ctaModel = cta_jni_getCtaHandle(env, jModel); + + /* Call the method */ + int retVal=CTA_Model_GetTimeHorizon (ctaModel, timeHorizon); + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaOpenDaModel", "Could perform CTA_Model_GetTimeHorizon", retVal); + return NULL; + } + + /* Create Java Time */ + jclass clsTime = env->FindClass("org/openda/costa/CtaTime"); + jmethodID constructorID = env->GetMethodID (clsTime, "", "()V"); + jobject jTime = env->NewObject(clsTime, constructorID); + /* Set the handle */ + cta_jni_setCtaHandle(env, jTime, timeHorizon); + + return jTime; +} + +/* + * Class: org_costa_CtaOpenDaModel + * Method: getCurrentTime + * Signature: ()Lorg/openda/interfaces/ITime; + */ +JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaOpenDaModel_getCurrentTime + (JNIEnv *env, jobject jModel){ + + CTA_Time currentTime; + + cta_jni_setJavaEnv(env); + + /* Create a new time instance */ + CTA_Time_Create(¤tTime); + + /* Get the handle of the model */ + CTA_Model ctaModel = cta_jni_getCtaHandle(env, jModel); + + /* Call the method */ + int retVal=CTA_Model_GetCurrentTime (ctaModel, currentTime); + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaOpenDaModel", "Could perform CTA_Model_GetCurrentTime", retVal); + return NULL; + } + + /* Create Java Time */ + jclass clsTime = env->FindClass("org/openda/costa/CtaTime"); + jmethodID constructorID = env->GetMethodID (clsTime, "", "()V"); + jobject jTime = env->NewObject(clsTime, constructorID); + /* Set the handle */ + cta_jni_setCtaHandle(env, jTime, currentTime); + + return jTime; +} + + +/* + * Class: org_openda_costa_CtaOpenDaModel + * Method: ctaAxpyOnStateDomain + * Signature: (DLorg/openda/interfaces/IVector;I)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaAxpyOnStateDomain + (JNIEnv *env, jobject jModel, jdouble alpha, jobject jX, jint iDomain){ + + cta_jni_setJavaEnv(env); + CTA_Model ctaModel = cta_jni_getCtaHandle(env, jModel); + CTA_TreeVector ctaX = cta_jni_getCtaHandle(env, jX); + + int retVal=CTA_OK; + if (iDomain<0){ + retVal=CTA_Model_AxpyState(ctaModel, alpha, ctaX); + } + else { + retVal=CTA_Model_AxpyStateDomain(ctaModel, alpha, iDomain, ctaX); + } + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaOpenDaModel", "Could perform CTA_Model_AxpyState", retVal); + return; + } +} + + +/* + * Class: org_openda_costa_CtaOpenDaModel + * Method: ctaGetStateDomain + * Signature: (I)Lorg/openda/interfaces/IVector; + */ +JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaGetStateDomain + (JNIEnv *env, jobject jModel, jint iDomain){ + cta_jni_setJavaEnv(env); + CTA_Model ctaModel = cta_jni_getCtaHandle(env, jModel); + CTA_TreeVector ctaState =CTA_NULL; + int retVal=CTA_Model_GetStateDomain(ctaModel, iDomain, &ctaState); + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaOpenDaModel", "Could perform CTA_Model_GetState", retVal); + return NULL; + } + + /* Create a Java CtaTreeVector */ + jclass clsTreeVector = env->FindClass("org/openda/costa/CtaTreeVector"); + jmethodID constructorID = env->GetMethodID (clsTreeVector, "", "()V"); + jobject jState = env->NewObject(clsTreeVector, constructorID); + + cta_jni_setCtaHandle(env, jState, ctaState); + + return jState; +} + + +/* + * Class: org_openda_costa_CtaOpenDaModel + * Method: ctaGetObservedLocalizationDomain + * Signature: (Lorg/openda/interfaces/IObservationDescriptions;DI)[I + */ +JNIEXPORT jintArray JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaGetObservedLocalizationDomain + (JNIEnv *env, jobject jModel, jobject jObsDescr, jdouble distance, jint iDomain){ + + return Java_org_openda_costa_CtaOpenDaModel_ctaGetObservedLocalization + (env, jModel, jObsDescr, distance, iDomain); +} + + +/* + * Class: org_openda_costa_CtaOpenDaModel + * Method: ctaGetNumDomains + * Signature: (D)I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaGetNumDomains + (JNIEnv *env, jobject jModel, jdouble distance){ + + cta_jni_setJavaEnv(env); + CTA_Model ctaModel = cta_jni_getCtaHandle(env, jModel); + + int nDomains; + int retVal=CTA_Model_GetNumDomains(ctaModel, distance, &nDomains); + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaOpenDaModel", "Could perform CTA_Model_AxpyState", retVal); + return 0; + } + return nDomains; +} + + +/* + * Class: org_openda_costa_CtaOpenDaModel + * Method: ctaGetObservationSelector + * Signature: (Lorg/openda/interfaces/IObservationDescriptions;DI)[I + */ +JNIEXPORT jintArray JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaGetObservationSelector + (JNIEnv *env, jobject jModel, jobject jObsDescr, jdouble distance, jint iDomain){ + + cta_jni_setJavaEnv(env); + CTA_Model ctaModel = cta_jni_getCtaHandle(env, jModel); + CTA_ObsDescr ctaObsDescr = cta_jni_getCtaHandle(env, jObsDescr); + CTA_Time time; + + + CTA_Vector ctaSelVec = CTA_NULL; + int retVal=CTA_Model_GetObsSelector (ctaModel, ctaObsDescr, distance, iDomain, &ctaSelVec); + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaModel", "ObservationSelector returned an error ", retVal); + return NULL; + } + int nSel; + retVal=CTA_Vector_GetSize(ctaSelVec, &nSel); + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaModel", "ObservationSelector cannot get length of returned selction. Error ", retVal); + return NULL; + } + int *iVals = new int[nSel]; + retVal = CTA_Vector_GetVals(ctaSelVec,iVals,nSel,CTA_INTEGER); + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaModel", "ObservationSelector cannot get values from selection vector. Error ", retVal); + delete [] iVals; + return NULL; + } + retVal= CTA_Vector_Free(&ctaSelVec); + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaModel", "ObservationSelector cannot get values from selection vector. Error ", retVal); + delete [] iVals; + return NULL; + } + + /* Copy values to java integer array */ + jintArray jValues = env->NewIntArray(nSel); + jint *jIVals= new jint[nSel]; + for (int i=0;iSetIntArrayRegion(jValues, 0, nSel, jIVals); + delete [] jIVals; + delete [] iVals; + return jValues; +} + + + + + + + + diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaParallel.cpp b/costa/native/openda/bridge/src/org_openda_costa_CtaParallel.cpp new file mode 100644 index 000000000..7337933b0 --- /dev/null +++ b/costa/native/openda/bridge/src/org_openda_costa_CtaParallel.cpp @@ -0,0 +1,83 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/openda_1/public/trunk/core/native/src/openda/bridge/org_costa_CtaOpenDaModel.cpp $ +$Revision: 1429 $, $Date: 2010-04-01 17:14:23 +0200 (Thu, 01 Apr 2010) $ + +OpenDA interface for COSTA. +Copyright (C) 2007 Stef Hummel / Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "org_openda_costa_CtaParallel.h" +#include "cta.h" +#include "cta_modbuild_par.h" +#include "jni_cta_utils.h" + +#define IDEBUG (0) + +#ifdef __cplusplus +extern "C" { +#endif + int CTA_Par_CreateGroups(CTA_Tree,int); +#ifdef __cplusplus +} +#endif + + +/* + * Class: org_openda_costa_CtaParallel + * Method: nativeInit + * Signature: (Ljava/lang/String;)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaParallel_nativeInit + (JNIEnv *env, jclass myClass, jstring jConfigName){ + + CTA_String hfile; + CTA_Tree htree; + int retVal; + + /* Create C string of jave string of config file name */ + const char *configName = env->GetStringUTFChars(jConfigName, 0); + + /* Parse the input file */ + CTA_String_Create(&hfile); + CTA_String_Set(hfile, configName); + env->ReleaseStringUTFChars(jConfigName, configName); + retVal=CTA_XML_Read(hfile,&htree); + if (retVal!=CTA_OK) { + cta_jni_exception(env, "CtaParallel", + "Could parse configuration file", retVal); + } + CTA_String_Free(&hfile); + + /* initialize the parallel environment */ + retVal=CTA_Par_CreateGroups(htree,CTA_TRUE); + if (retVal!=CTA_OK) { + cta_jni_exception(env, "CtaParallel", + "Could not initialize parallel environment", retVal); + } +} + +/* + * Class: org_openda_costa_CtaParallel + * Method: finalizeParallelEnvironment + * Signature: ()V + */ + +JNIEXPORT void JNICALL Java_org_openda_costa_CtaParallel_finalizeParallelEnvironment + (JNIEnv *, jclass){ + CTA_Modbuild_par_Finalize(); +} + diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaRelationTable.cpp b/costa/native/openda/bridge/src/org_openda_costa_CtaRelationTable.cpp new file mode 100644 index 000000000..f3ff77036 --- /dev/null +++ b/costa/native/openda/bridge/src/org_openda_costa_CtaRelationTable.cpp @@ -0,0 +1,171 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/openda/bridge/org_openda_costa_CtaRelationTable.cpp $ +$Revision: 2211 $, $Date: 2011-04-01 13:48:25 +0200 (Fri, 01 Apr 2011) $ + +OpenDA interface for COSTA. +Copyright (C) 2007 Stef Hummel / Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "jni_cta_utils.h" +#include "cta_defaults.h" +#include "cta_vector.h" +#include "cta_errors.h" +#include "cta_reltable.h" +#include "org_openda_costa_CtaRelationTable.h" + +/* + * Class: org_costa_CtaRelationTable + * Method: ctaCreate + * Signature: ()I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaRelationTable_ctaCreate +(JNIEnv *env, jobject obj_this){ + + cta_jni_setJavaEnv(env); + + CTA_RelTable ctaRelTab = CTA_NULL; + int retVal = CTA_RelTable_Create( &ctaRelTab); + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaRelationTable", "Could not create relation table", retVal); + return 0; + } + return ctaRelTab; +} + +/* + * Class: org_costa_CtaRelationTable + * Method: SetTableCombine + * Signature: (Lorg/openda/interfaces/IRelationTable;ZLorg/openda/interfaces/IRelationTable;Z)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaRelationTable_SetTableCombine +(JNIEnv *env, jobject newtable, jobject reltab1, jboolean inv1, jobject reltab2, jboolean inv2){ + + cta_jni_setJavaEnv(env); + + /* Get the COSTA handles */ + CTA_RelTable ctaRelTab = cta_jni_getCtaHandle(env, newtable); + CTA_RelTable ctaRel1 = cta_jni_getCtaHandle(env, reltab1); + CTA_RelTable ctaRel2 = cta_jni_getCtaHandle(env, reltab2); + + int retVal=CTA_RelTable_SetTableCombine (ctaRelTab, ctaRel1, inv1, ctaRel2, inv2); + + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaRelationTable", "Could not perform CTA_RelTable_SetTableCombine", retVal); + return; + } +} + +/* + * Class: org_costa_CtaRelationTable + * Method: SetSelect + * Signature: (Lorg/openda/interfaces/IVector;)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaRelationTable_SetSelect +(JNIEnv *env, jobject jRelTab, jobject jSelect){ + + cta_jni_setJavaEnv(env); + + /* Get the COSTA handles */ + CTA_RelTable ctaRelTab = cta_jni_getCtaHandle(env, jRelTab); + CTA_Vector ctaVecSelect = cta_jni_getCtaHandle(env, jSelect); + + /* Create Integervector */ + int n; + CTA_Vector_GetSize(ctaVecSelect,&n); + CTA_Vector ctaIVecSelect; + CTA_Vector_Create(CTA_DEFAULT_VECTOR,n,CTA_INTEGER,CTA_NULL,&ctaIVecSelect); + + double *dval=(double *) malloc(n*sizeof(double)); + CTA_Vector_GetVals(ctaVecSelect,dval,n,CTA_DOUBLE); + int *ival= (int *) malloc(n*sizeof(int)); + for (int i=0;i +#include "org_openda_costa_CtaStochObserver.h" +#include "cta_sobs.h" +#include "jni_cta_utils.h" +#include "cta_errors.h" +#include "cta_defaults.h" + + +/* + * Class: org_costa_CtaStochObserver + * Method: CtaStochObserver_createNative + * Signature: (Ljava/lang/String;Ljava/lang/String)I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaStochObserver_ctaCreateNative +(JNIEnv *env, jobject, jstring jTypeObserver, jstring jFilename){ + + cta_jni_setJavaEnv(env); + printf("Debug: start: Java_org_openda_costa_CtaStochObserver_ctaCreateNative\n"); + CTA_StochObs hstochobs; + CTA_String Filename,sTypeObserver; + CTA_SObsClass hsobsclass; + CTA_Tree htree; + int retVal=0; + + CTA_String_Create(&Filename); + const char *str = env->GetStringUTFChars(jFilename, 0); + CTA_String_Set(Filename, str); + printf("Debug Observation filename=%s\n",str); + env->ReleaseStringUTFChars(jFilename, str); + + const char *typeObserver = env->GetStringUTFChars(jTypeObserver, 0); + if (strcmp(typeObserver,"SQLITE")==0){ + retVal = CTA_SObs_Create (CTA_DEFAULT_SOBS, Filename, &hstochobs); + } + else if (strcmp(typeObserver,"MAORI")==0) { + printf("DEBUG vanuit bridge. Create de MAORI filename=%d\n",Filename); + retVal = CTA_SObs_Create (CTA_MAORI_SOBS, Filename, &hstochobs); + } + else if (strcmp(typeObserver,"USER")==0) { + printf("DEBUG vanuit bridge. Create de USER filename=%d\n",Filename); + retVal = CTA_SObs_Create (CTA_USER_SOBS, Filename, &hstochobs); + } + else { + char message[1000]; + + CTA_String_Create(&sTypeObserver); + CTA_String_Set(sTypeObserver,typeObserver); + retVal=CTA_XML_Read(sTypeObserver,&htree); + CTA_String_Free(&sTypeObserver); + if (retVal != CTA_OK) { + sprintf(message, "Error reading xml-file %s with Stochastic Observer class. Error code %d", typeObserver, retVal); + cta_jni_exception(env, "CtaStochObserver", message, retVal); + hstochobs = CTA_NULL; + } else { + retVal=CTA_Tree_GetHandleStr(htree, (char *) "COSTA/sobsclass", &hsobsclass); + if (retVal != CTA_OK) { + sprintf(message, "Error get class from tree (created from xml-file %s. Error code %d\n" ,typeObserver, retVal); + cta_jni_exception(env, "CtaStochObserver", message, retVal); + hstochobs = CTA_NULL; + } else { + retVal = CTA_SObs_Create (hsobsclass, Filename, &hstochobs); + if (retVal != CTA_OK) { + sprintf(message, "Cannot create stochastic Observer Error code %d\n" ,retVal); + cta_jni_exception(env, "CtaStochObserver", message, retVal); + hstochobs = CTA_NULL; + } + } + } + CTA_Tree_Free(&htree); + } +/* else { + char message[256]; + sprintf(message, "Unknown Observer type: %s", typeObserver); + retVal = CTA_NOT_YET_SUPPORTED; + hstochobs = CTA_NULL; + cta_jni_exception(env, "CtaStochObserver", message, retVal); + } */ + + printf("Debug Observation handle is =%d\n", hstochobs); + env->ReleaseStringUTFChars(jTypeObserver, typeObserver); + if (retVal!=CTA_OK){ + cta_jni_exception(env, "CtaStochObserver", "Could create new stoch observer", retVal); + return 0; + } + + CTA_String_Free(&Filename); + printf("Debug: end: Java_org_openda_costa_CtaStochObserver_ctaCreateNative\n"); + return hstochobs; + +} + + +/* + * Class: org_costa_CtaStochObserver + * Method: getCount + * Signature: ()I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaStochObserver_getCount +(JNIEnv *env, jobject jStochObs){ + + cta_jni_setJavaEnv(env); + + CTA_StochObs ctaStochObs = cta_jni_getCtaHandle(env, jStochObs); + int nmeasr; + int retVal = CTA_SObs_Count(ctaStochObs, &nmeasr); + if (retVal!=CTA_OK){ + cta_jni_exception(env, "CtaStochObserver", "Could not get Count", retVal); + return 0; + } + return nmeasr; +} + +/* + * Class: org_costa_CtaStochObserver + * Method: ctaStochObserverGetExpectations + * Signature: (I)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaStochObserver_ctaGetExpectations +(JNIEnv *env, jobject jStochObs, jint ctaVector){ + + cta_jni_setJavaEnv(env); + + CTA_StochObs ctaStochObs = cta_jni_getCtaHandle(env, jStochObs); + int retVal = CTA_SObs_GetExpectation(ctaStochObs, ctaVector); + if (retVal!=CTA_OK){ + cta_jni_exception(env, "CtaStochObserver", "Could not get Expectation", retVal); + return; + } + } + +/* + * Class: org_costa_CtaStochObserver + * Method: CtagetObservationDescriptions + * Signature: ()I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaStochObserver_ctaGetObservationDescriptions +(JNIEnv *env, jobject jStochObs){ + + cta_jni_setJavaEnv(env); + + CTA_StochObs ctaStochObs = cta_jni_getCtaHandle(env, jStochObs); + CTA_ObsDescr hobsdescr; + int retVal = CTA_SObs_GetDescription(ctaStochObs, &hobsdescr); + if (retVal!=CTA_OK){ + cta_jni_exception(env, "CtaStochObserver", "Could not get Description", retVal); + return 0; + } + return hobsdescr; +} + +/* + * Class: org_costa_CtaStochObserver + * Method: ctaStochObserverGetRealizations + * Signature: (I)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaStochObserver_ctaGetRealizations +(JNIEnv *env, jobject jStochObs, jint ctaVector){ + + cta_jni_setJavaEnv(env); + + CTA_StochObs ctaStochObs = cta_jni_getCtaHandle(env, jStochObs); + int retVal = CTA_SObs_GetRealisation(ctaStochObs, ctaVector); + if (retVal!=CTA_OK){ + cta_jni_exception(env, "CtaStochObserver", "Could not get Realizations", retVal); + return; + } + } + +/* + * Class: org_costa_CtaStochObserver + * Method: ctaStochObserverGetValues + * Signature: (I)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaStochObserver_ctaGetValues +(JNIEnv *env, jobject jStochObs, jint ctaVector){ + + cta_jni_setJavaEnv(env); + + CTA_StochObs ctaStochObs = cta_jni_getCtaHandle(env, jStochObs); + int retVal = CTA_SObs_GetVal(ctaStochObs, ctaVector); + if (retVal!=CTA_OK){ + cta_jni_exception(env, "CtaStochObserver", "Could not get Values", retVal); + return; + } + } + +/* + * Class: org_costa_CtaStochObserver + * Method: ctaStochObserverGetVariances + * Signature: (I)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaStochObserver_ctaGetVariances +(JNIEnv *env, jobject jStochObs, jint ctaVector){ + + cta_jni_setJavaEnv(env); + + CTA_StochObs ctaStochObs = cta_jni_getCtaHandle(env, jStochObs); + int retVal = CTA_SObs_GetVar(ctaStochObs, ctaVector); + if (retVal!=CTA_OK){ + cta_jni_exception(env, "CtaStochObserver", "Could not get Variance", retVal); + return; + } + } + +/* + * Class: org_costa_CtaStochObserver + * Method: ctaGetStandardDeviation + * Signature: (I)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaStochObserver_ctaGetStandardDeviation +(JNIEnv *env, jobject jStochObs, jint ctaVector){ + + cta_jni_setJavaEnv(env); + + CTA_StochObs ctaStochObs = cta_jni_getCtaHandle(env, jStochObs); + int retVal = CTA_SObs_GetStd(ctaStochObs, ctaVector); + if (retVal!=CTA_OK){ + cta_jni_exception(env, "CtaStochObserver", "Could not get Standard deviation", retVal); + return; + } + return; +} + + + + + + + +/* + * Class: org_costa_CtaStochObserver + * Method: ctaStochObserverCreateSelection + * Signature: (Ljava/lang/String;)I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaStochObserver_ctaCreateSelection +(JNIEnv *env, jobject jStochObs, jstring jSelect){ + + cta_jni_setJavaEnv(env); + + CTA_String select; + /* create COSTA string with selection */ + CTA_String_Create(&select); + const char *sel = env->GetStringUTFChars(jSelect, 0); + CTA_String_Set(select,sel); + env->ReleaseStringUTFChars(jSelect, sel); + + CTA_StochObs hsobsout; + CTA_StochObs ctaStochObs = cta_jni_getCtaHandle(env, jStochObs); + int retVal = CTA_SObs_CreateSel (ctaStochObs, select, &hsobsout); + if (retVal!=CTA_OK){ + cta_jni_exception(env, "CtaStochObserver", "Could not create selection", retVal); + return 0; + } + CTA_String_Free(&select); + + return hsobsout; +} + +/* + * Class: org_costa_CtaStochObserver + * Method: ctaStochObserverCreateTimeSelection + * Signature: (I)I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaStochObserver_ctaCreateTimeSelection +(JNIEnv *env, jobject jStochObs, jint timesel){ + + cta_jni_setJavaEnv(env); + + CTA_StochObs hsobsout; + CTA_StochObs ctaStochObs = cta_jni_getCtaHandle(env, jStochObs); + int retVal = CTA_SObs_CreateTimSel (ctaStochObs, timesel, &hsobsout); + if (retVal!=CTA_OK){ + cta_jni_exception(env, "CtaStochObserver", "Could not create selection", retVal); + return 0; + } + return hsobsout; + +} + +/* + * Class: org_costa_CtaStochObserver + * Method: ctaStochObserverEvaluatePDF + * Signature: (II)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaStochObserver_ctaEvaluatePDF +(JNIEnv *env, jobject jStochObs, jint ctaVectorX, jint ctaVectorY){ + + cta_jni_setJavaEnv(env); + + CTA_StochObs ctaStochObs = cta_jni_getCtaHandle(env, jStochObs); + int retVal = CTA_SObs_EvalPDF(ctaStochObs, ctaVectorX, ctaVectorY); + if (retVal!=CTA_OK){ + cta_jni_exception(env, "CtaStochObserver", "Could not get evaluate PDF", retVal); + return; + } + } + + diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaTime.cpp b/costa/native/openda/bridge/src/org_openda_costa_CtaTime.cpp new file mode 100644 index 000000000..4b6ea32eb --- /dev/null +++ b/costa/native/openda/bridge/src/org_openda_costa_CtaTime.cpp @@ -0,0 +1,286 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/openda/bridge/org_openda_costa_CtaTime.cpp $ +$Revision: 2872 $, $Date: 2011-11-07 10:48:40 +0100 (Mon, 07 Nov 2011) $ + +OpenDA interface for COSTA. +Copyright (C) 2007 Stef Hummel / Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + + +#include "org_openda_costa_CtaTime.h" +#include "cta_defaults.h" +#include "jni_cta_utils.h" +#include "cta_time.h" +#include "cta_errors.h" + + +/* + * Class: org_costa_CtaTime + * Method: create + * Signature: ()I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTime_create + (JNIEnv * env, jobject obj_this) +{ + + cta_jni_setJavaEnv(env); + + int handle = CTA_NULL; + + int retVal = CTA_Time_Create((CTA_Time *) &handle); + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaTime_Create", "", retVal); + return 0; + } + jint handle2 = (jint) handle; + return handle2; +} + +/* + * Class: org_costa_CtaTime + * Method: getMJD + * Signature: ()D + */ +JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaTime_getMJD + (JNIEnv * env, jobject obj_this) +{ + cta_jni_setJavaEnv(env); + + CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this); + double t1,t2; + int retVal=CTA_Time_GetSpan(ctaHandle, &t1, &t2); + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaTime", "Could not getMJD ", retVal); + return 0.0; + } + if (t1!=t2) { + cta_jni_exception(env, "CtaTime", "Could not getMJD this is a time span ",0); + return 0.0; + } + return (jdouble) t2; +} + +/* + * Class: org_costa_CtaTime + * Method: getUserTime + * Signature: ()D + */ +JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaTime_getUserTime + (JNIEnv * env, jobject obj_this) +{ + cta_jni_setJavaEnv(env); + return 0; +} + +/* + * Class: org_costa_CtaTime + * Method: setMJD + * Signature: (D)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaTime_setMJD + (JNIEnv *env, jobject obj_this, jdouble t) +{ + cta_jni_setJavaEnv(env); + + CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this); + int retVal=CTA_Time_SetSpan(ctaHandle, t, t); + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaTime", "Could not setMJD ", retVal); + return; + } + //CTA_Time_Export(ctaHandle,CTA_FILE_STDOUT); +} + +/* + * Class: org_costa_CtaTime + * Method: setUserTime + * Signature: (D)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaTime_setUserTime + (JNIEnv *env, jobject, jdouble) +{ + cta_jni_setJavaEnv(env); +} + +/* + * Class: org_costa_CtaTime + * Method: getStep + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_openda_costa_CtaTime_getStep + (JNIEnv * env, jobject obj_this) +{ + cta_jni_setJavaEnv(env); + return 0; +} + +/* + * Class: org_costa_CtaTime + * Method: getBeginMJD + * Signature: ()D + */ +JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaTime_getBeginMJD + (JNIEnv * env, jobject obj_this) +{ + cta_jni_setJavaEnv(env); + + double t1,t2; + CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this); + int retVal=CTA_Time_GetSpan(ctaHandle, &t1, &t2); + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaTime", "Could not getBeginMJD ", retVal); + return 0.0; + } + return (jdouble) t1; +} + +/* + * Class: org_costa_CtaTime + * Method: getEndMJD + * Signature: ()D + */ +JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaTime_getEndMJD + (JNIEnv * env, jobject obj_this) +{ + cta_jni_setJavaEnv(env); + + double t1,t2; + CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this); + int retVal=CTA_Time_GetSpan(ctaHandle, &t1, &t2); + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaTime", "Could not getEndMJD ", retVal); + return 0.0; + } + return (jdouble) t2; +} + +/* + * Class: org_costa_CtaTime + * Method: getStepMJD + * Signature: ()D + */ +JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaTime_getStepMJD + (JNIEnv * env, jobject obj_this) +{ + cta_jni_setJavaEnv(env); + + double t1; + CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this); + int retVal=CTA_Time_GetStep(ctaHandle, &t1); + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaTime", "Could not getStepMJD ", retVal); + return 0.0; + } + return (jdouble) t1; +} + +/* + * Class: org_costa_CtaTime + * Method: getBeginUserTime + * Signature: ()D + */ +JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaTime_getBeginUserTime + (JNIEnv * env, jobject obj_this) +{ + cta_jni_setJavaEnv(env); + return 0; +} + +/* + * Class: org_costa_CtaTime + * Method: getEndUserTime + * Signature: ()D + */ +JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaTime_getEndUserTime + (JNIEnv * env, jobject obj_this) +{ + cta_jni_setJavaEnv(env); + return 0; +} + +/* + * Class: org_costa_CtaTime + * Method: getUserStep + * Signature: ()D + */ +JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaTime_getUserStep + (JNIEnv * env, jobject obj_this) +{ + cta_jni_setJavaEnv(env); + return 0; +} + +/* + * Class: org_costa_CtaTime + * Method: setSpanMJD + * Signature: (DDD)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaTime_setSpanMJD + (JNIEnv *env, jobject obj_this, jdouble tstart, jdouble tend, jdouble tstep) +{ + cta_jni_setJavaEnv(env); + + CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this); + int retVal=CTA_Time_SetSpan(ctaHandle, tstart, tend); + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaTime", "Could not setSpanMJD ", retVal); + return; + } + + retVal=CTA_Time_SetStep(ctaHandle, tstep); + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaTime", "Could not setSpanMJD ", retVal); + return; + } +} + +/* + * Class: org_costa_CtaTime + * Method: setUserSpan + * Signature: (DDD)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaTime_setUserSpan + (JNIEnv *env, jobject, jdouble, jdouble, jdouble) +{ + cta_jni_setJavaEnv(env); + +} + +/* + * Class: org_costa_CtaTime + * Method: getStepCount + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_openda_costa_CtaTime_getStepCount + (JNIEnv * env, jobject obj_this) +{ + cta_jni_setJavaEnv(env); + return 0; +} + +/* + * Class: org_costa_CtaTime + * Method: clone + * Signature: ()Lorg/openda/interfaces/ITime; + */ +JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaTime_clone + (JNIEnv * env, jobject obj_this) +{ + cta_jni_setJavaEnv(env); + return 0; +} + diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaTreeVector.cpp b/costa/native/openda/bridge/src/org_openda_costa_CtaTreeVector.cpp new file mode 100644 index 000000000..dafc0970a --- /dev/null +++ b/costa/native/openda/bridge/src/org_openda_costa_CtaTreeVector.cpp @@ -0,0 +1,513 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/openda/bridge/org_openda_costa_CtaTreeVector.cpp $ +$Revision: 2763 $, $Date: 2011-09-13 13:52:01 +0200 (Tue, 13 Sep 2011) $ + +OpenDA interface for COSTA. +Copyright (C) 2007 Stef Hummel / Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + + +#include "org_openda_costa_CtaTreeVector.h" +#include "cta_treevector.h" +#include "jni_cta_utils.h" +#include "cta_errors.h" +#include "cta_defaults.h" +#include "cta_metainfo.h" + + +/* + * Class: org_costa_CtaTreeVector + * Method: getId + * Signature: ()Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_org_openda_costa_CtaTreeVector_getId +(JNIEnv *env, jobject obj_this){ + + cta_jni_setJavaEnv(env); + + char id[CTA_STRLEN_TAG]; + CTA_TreeVector ctaHandle = cta_jni_getCtaHandle(env, obj_this); + + int retVal = CTA_TreeVector_GetTag (ctaHandle, id); + + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaVector", "Could not get ID", retVal); + return NULL; + } + + return env->NewStringUTF(id); + +} + +/* + * Class: org_costa_CtaTreeVector + * Method: ctaGetNumSubTreeVectors + * Signature: (Ljava/lang/String;)I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTreeVector_ctaGetNumSubTreeVectors +(JNIEnv *env, jobject obj_this){ + cta_jni_setJavaEnv(env); + CTA_TreeVector ctaHandle = cta_jni_getCtaHandle(env, obj_this); + int numSubTrees; + CTA_TreeVector_GetNumSubTree(ctaHandle, &numSubTrees); + return numSubTrees; +} + +/* + * Class: org_costa_CtaTreeVector + * Method: ctaGetSubTreeVector + * Signature: (Ljava/lang/String;)I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTreeVector_ctaGetSubTreeVector +(JNIEnv *env, jobject obj_this, jstring path){ + + cta_jni_setJavaEnv(env); + + CTA_TreeVector ctaHandle = cta_jni_getCtaHandle(env, obj_this); + CTA_TreeVector subtreevec; + const char *spath = env->GetStringUTFChars(path, 0); + + int retVal=CTA_TreeVector_GetSubTreeVec (ctaHandle, spath, &subtreevec); + env->ReleaseStringUTFChars(path, spath); + + + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaVector", "Could not get SubTreeVector", retVal); + return 0; + } + + /* Increase reference count to subtree-vector */ + CTA_TreeVector_IncRefCount(subtreevec); + + return subtreevec; +} + + +/* + * Class: org_costa_ctatreevector + * Method: ctaSetRegGrid + * Signature: ()V + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTreeVector_ctaSetRegGrid + (JNIEnv * env, jobject obj_this, jint nx, jint ny, jint nz, jdouble x0, jdouble y0, jdouble z0, + jdouble dx, jdouble dy, jdouble dz) +{ + int retval; + CTA_Metainfo minfo; + CTA_TreeVector ctaTreeVec = cta_jni_getCtaHandle(env, obj_this); + char cGrid[]="grid"; + + CTA_Metainfo_Create(&minfo); + + retval = CTA_Metainfo_setRegGrid(minfo,cGrid,nx,ny,nz,x0,y0,z0,dx,dy,dz); + if (retval) return retval; + retval = CTA_TreeVector_SetMetainfo(ctaTreeVec, minfo); + + return retval; +} + + +/* + * Class: org_costa_treevector + * Method: ctaNetcdfClose + * Signature: ()V + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTreeVector_ctaNetcdfClose + (JNIEnv * env, jobject obj_this, jint ctafilehandle) +{ + cta_jni_setJavaEnv(env); + + int ierr; + int foutput; + + + foutput = ctafilehandle; + ierr = CTA_File_Free(&foutput); + if ( ierr != CTA_OK ) { + cta_jni_exception(env, "CtaTreeVector_export", "Could not close (netcdf) cta_file", ierr); + return -1; + + } + return ierr; + + +} + +/* + * Class: org_resultwriters_NetcdfResultWriter + * Method: ctaNetcdfClose + * Signature: ()V + */ +JNIEXPORT jint JNICALL Java_org_openda_resultwriters_NetcdfResultWriter_ctaNetcdfClose + (JNIEnv * env, jobject obj_this, jint ctafilehandle) +{ + cta_jni_setJavaEnv(env); + + int ierr; + int foutput; + + + foutput = ctafilehandle; + ierr = CTA_File_Free(&foutput); + if ( ierr != CTA_OK ) { + cta_jni_exception(env, "CtaTreeVector_export", "Could not close (netcdf) cta_file", ierr); + return -1; + + } + return ierr; +} + + +/* + * Class: org_costa_treevector + * Method: ctaNetcdfInit + * Signature: (Ljava/lang/String;)I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTreeVector_ctaNetcdfInit + (JNIEnv * env, jobject obj_this, jstring path, jstring action) +{ + cta_jni_setJavaEnv(env); + + int ierr; + int foutput; + int soutfile; + int haction; + + const char *spath = env->GetStringUTFChars(path, 0); + const char *saction = env->GetStringUTFChars(action, 0); + + // Maybe we should first check if filename in 'path' does exist and is opened? + // In that case, we need to retrieve the file handle somehow. + + + ierr = CTA_String_Create(&soutfile); + if ( ierr != CTA_OK ) { + cta_jni_exception(env, "CtaTreeVector_netcdfinit", "Could not create string", ierr); + return -1; + } + ierr = CTA_String_Set(soutfile, spath); + if ( ierr != CTA_OK ) { + cta_jni_exception(env, "CtaTreeVector_netcdfinit", "Could not set filename string", ierr); + return -1; + } + // Note: if file already exists, this is not OK. + ierr = CTA_File_Create(&foutput); + if ( ierr != CTA_OK ) { + cta_jni_exception(env, "CtaTreeVector_netcdfinit", "Could not create file handle", ierr); + return -1; + } + // Note: if file is already open, export should occur as 'append' + // in netcdf case. + // Note further, that cta_file_open will delete the contents of an existing netcdf file! + + env->ReleaseStringUTFChars(path, spath); + env->ReleaseStringUTFChars(action, saction); + + CTA_String_Create(&haction); + CTA_String_Set(haction, saction); + ierr = CTA_File_Open(foutput,soutfile,haction); + if ( ierr != CTA_OK ) { + cta_jni_exception(env, "CtaTreeVector_netcdfinit", "Could not open file", ierr); + return -1; + } + + CTA_String_Free(&haction); + CTA_String_Free(&soutfile); + + return foutput; //this is the cta filehandle! + +} + +/* + * Class: org_resultwriters_NetcdfResultWriter + * Method: ctaNetcdfInit + * Signature: (Ljava/lang/String;)I + */ +JNIEXPORT jint JNICALL Java_org_openda_resultwriters_NetcdfResultWriter_ctaNetcdfInit + (JNIEnv * env, jobject obj_this, jstring path, jstring action) +{ + cta_jni_setJavaEnv(env); + + int ierr; + int foutput; + int soutfile; + int haction; + + const char *spath = env->GetStringUTFChars(path, 0); + const char *saction = env->GetStringUTFChars(action, 0); + + // Maybe we should first check if filename in 'path' does exist and is opened? + // In that case, we need to retrieve the file handle somehow. + + + ierr = CTA_String_Create(&soutfile); + if ( ierr != CTA_OK ) { + cta_jni_exception(env, "CtaTreeVector_netcdfinit", "Could not create string", ierr); + return -1; + } + ierr = CTA_String_Set(soutfile, spath); + if ( ierr != CTA_OK ) { + cta_jni_exception(env, "CtaTreeVector_netcdfinit", "Could not set filename string", ierr); + return -1; + } + // Note: if file already exists, this is not OK. + ierr = CTA_File_Create(&foutput); + if ( ierr != CTA_OK ) { + cta_jni_exception(env, "CtaTreeVector_netcdfinit", "Could not create file handle", ierr); + return -1; + } + + // Note: if file is already open, export should occur as 'append' + // in netcdf case + CTA_String_Create(&haction); + CTA_String_Set(haction, saction); + ierr = CTA_File_Open(foutput,soutfile,haction); + if ( ierr != CTA_OK ) { + cta_jni_exception(env, "CtaTreeVector_netcdfinit", "Could not open file", ierr); + return -1; + } + env->ReleaseStringUTFChars(path, spath); + env->ReleaseStringUTFChars(action, saction); + CTA_String_Free(&haction); + ierr = CTA_String_Free(&soutfile); + if ( ierr != CTA_OK ) { + cta_jni_exception(env, "CtaTreeVector_netcdfinit", "Could not free cta_string", ierr); + return -1; + } + + return foutput; //this is the cta filehandle! + +} + + + + + +/* + * Class: org_costa_CtaTreeVector + * Method: ctaTreeExport + * Signature: (Ljava/lang/String;)I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTreeVector_ctaExport +(JNIEnv *env, jobject obj_this, jint ctafilehandle){ + + cta_jni_setJavaEnv(env); + + // int soutfile; + int foutput; + + CTA_TreeVector ctaHandletv = cta_jni_getCtaHandle(env, obj_this); + + foutput = ctafilehandle; + + int retVal=CTA_TreeVector_Export (ctaHandletv, foutput); + + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaTreeVector_export", "Could not export Treevector", retVal); + return 0; + } + + return 0; +} + +/* + * Class: org_costa_CtaTreeVector + * Method: ctaImport + * Signature: (Ljava/lang/String;)I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTreeVector_ctaImport +(JNIEnv *env, jobject obj_this, jint ctafilehandle){ + + cta_jni_setJavaEnv(env); + + int finput; + + CTA_TreeVector ctaHandletv = cta_jni_getCtaHandle(env, obj_this); + + finput = ctafilehandle; + int retVal=CTA_TreeVector_Import (ctaHandletv, finput); + + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaTreeVector_import", "Could not import Treevector", retVal); + return 0; + } + + return 0; +} + +/* + * Class: org_costa_CtaTreeVector + * Method: ctaVImport + * Signature: (Ljava/lang/String;)I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTreeVector_ctaVImport +(JNIEnv *env, jobject obj_this, jint ctafilehandle){ + + cta_jni_setJavaEnv(env); + + int finput; + + CTA_TreeVector ctaHandletv = cta_jni_getCtaHandle(env, obj_this); + + finput = ctafilehandle; + int retVal=CTA_TreeVector_VImport (ctaHandletv, finput); + + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaTreeVector_Vimport", "Could not import Treevector", retVal); + return 0; + } + + return 0; +} + +/* + * Class: org_costa_CtaTreeVector + * Method: ctaGetSubTreeVectorId + * Signature: ()Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_org_openda_costa_CtaTreeVector_ctaGetSubTreeVectorId +(JNIEnv *env, jobject obj_this, jint index){ + char tag[CTA_STRLEN_TAG]; + cta_jni_setJavaEnv(env); + CTA_TreeVector ctaTreeVec = cta_jni_getCtaHandle(env, obj_this); + CTA_TreeVector_GetSubTreeVecId (ctaTreeVec, index, tag); + return env->NewStringUTF(tag); +} + + +/* + * Class: org_costa_CtaTreeVector + * Method: getCaption + * Signature: ()Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_org_openda_costa_CtaTreeVector_getCaption +(JNIEnv *env, jobject obj_this){ + cta_jni_exception(env, "CtaTreeVector", "Get Caption is not implemented", 911); + + cta_jni_setJavaEnv(env); + + return NULL; + +} + +/* + * Class: org_costa_CtaTreeVector + * Method: ctaCreateFromVector + * Signature: ()I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTreeVector_ctaCreateFromVector +(JNIEnv *env , jobject obj_this, jstring str_id, jstring str_tag, jint ctavectorhandle){ + + cta_jni_setJavaEnv(env); + + int handle = CTA_NULL; + const char *sid = env->GetStringUTFChars(str_id, 0); + const char *stag = env->GetStringUTFChars(str_tag, 0); + int retVal = CTA_TreeVector_Create(sid,stag, &handle); + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaTreeVector I", "Could not create treevector", retVal); + return 0; + } + env->ReleaseStringUTFChars(str_id, sid); + env->ReleaseStringUTFChars(str_tag, stag); + + // now immediately fill the treevector! + retVal = CTA_TreeVector_SetVec(handle,ctavectorhandle); + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaTreeVector I", "Could not fill the created treevector", retVal); + return 0; + } + + return handle; +}; + +/* + * Class: org_costa_CtaTreeVector + * Method: ctaCreateFromSubtreevectors + * Signature: ()I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTreeVector_ctaCreateFromSubtreevectors +(JNIEnv *env , jobject obj_this, jstring str_id, jstring str_tag, jint nsubtrees, jintArray jsubtreevectors){ + + cta_jni_setJavaEnv(env); + + int handle = CTA_NULL; + const char *sid = env->GetStringUTFChars(str_id, 0); + const char *stag = env->GetStringUTFChars(str_tag, 0); + int retVal = CTA_TreeVector_Create(sid,stag, &handle); + env->ReleaseStringUTFChars(str_id, sid); + env->ReleaseStringUTFChars(str_tag, stag); + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaTreeVector II", "Could not create treevector", retVal); + return 0; + } + // now concatenate the subtreevectors! + int cCount = env->GetArrayLength(jsubtreevectors); + + jint * csubtreevectors = env->GetIntArrayElements(jsubtreevectors, 0); + int *csub2 = new int[cCount] ; + + // a bit awkward, but it is not possible to retrieve csubtreevectors as int instead of jint? + for (int i=0;iFindClass("org/openda/costa/CtaTreeVector"); + jmethodID constructorID = env->GetMethodID (clsTreeVector, "", "()V"); + jobject jTreeVec = env->NewObject(clsTreeVector, constructorID); + + cta_jni_setCtaHandle(env, jTreeVec, ctaDuplicate); + return jTreeVec; + + + } + + + + + diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaUtils.cpp b/costa/native/openda/bridge/src/org_openda_costa_CtaUtils.cpp new file mode 100644 index 000000000..018dd61b0 --- /dev/null +++ b/costa/native/openda/bridge/src/org_openda_costa_CtaUtils.cpp @@ -0,0 +1,39 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/openda_1/public/trunk/core/native/src/openda/bridge/org_openda_costa_CtaVector.cpp $ +$Revision: 2738 $, $Date: 2011-09-05 10:48:32 +0200 (Mon, 05 Sep 2011) $ + +OpenDA interface for COSTA. +Copyright (C) 2013 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "org_openda_costa_CtaUtils.h" +#include "jni_cta_utils.h" +#include "cta_handles.h" + +/* + * Class: org_openda_costa_CtaUtils + * Method: print_memory + * Signature: (Ljava/lang/String;I)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaUtils_print_1memory + (JNIEnv *env, jclass obj_this, jstring where, jint level){ + + cta_jni_setJavaEnv(env); + const char *swhere = env->GetStringUTFChars(where, 0); + CTA_Handle_PrintInfo(swhere); + env->ReleaseStringUTFChars(where, swhere); +} diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaVector.cpp b/costa/native/openda/bridge/src/org_openda_costa_CtaVector.cpp new file mode 100644 index 000000000..30972474a --- /dev/null +++ b/costa/native/openda/bridge/src/org_openda_costa_CtaVector.cpp @@ -0,0 +1,547 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/openda/bridge/org_openda_costa_CtaVector.cpp $ +$Revision: 4445 $, $Date: 2014-06-03 08:56:12 +0200 (Tue, 03 Jun 2014) $ + +OpenDA interface for COSTA. +Copyright (C) 2007 Stef Hummel / Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + + +#include "org_openda_costa_CtaVector.h" +#include "cta_vector.h" +#include "jni_cta_utils.h" +#include "cta_errors.h" +#include "cta_defaults.h" + +/* + * Class: org_costa_CtaVector + * Method: setConstant + * Signature: (D)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaVector_setConstant + (JNIEnv * env, jobject obj_this, jdouble val){ + + cta_jni_setJavaEnv(env); + + CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this); + + CTA_Datatype datatype; + CTA_Handle_GetDatatype (ctaHandle, &datatype); + + int retVal; + if (datatype==CTA_VECTOR){ + retVal = CTA_Vector_SetConstant(ctaHandle, &val, CTA_DOUBLE); + } + else { + retVal = CTA_TreeVector_SetConstant(ctaHandle, &val, CTA_DOUBLE); + } + + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaVector", "Could not set constant ", retVal); + return; + } +} + +/* + * Class: org_costa_CtaVector + * Method: scale + * Signature: (D)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaVector_scale + (JNIEnv * env, jobject obj_this, jdouble val){ + + cta_jni_setJavaEnv(env); + + CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this); + + CTA_Datatype datatype; + CTA_Handle_GetDatatype (ctaHandle, &datatype); + + int retVal; + if (datatype==CTA_VECTOR){ + retVal = CTA_Vector_Scal(ctaHandle, val); + } + else { + retVal = CTA_TreeVector_Scal(ctaHandle, val); + } + + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaVector", "Could not scale ", retVal); + return; + } + } + + + +/* + * Class: org_costa_CtaVector + * Method: ctaPrintHandles + * Signature: (Z)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaVector_ctaPrintHandles +(JNIEnv *env, jobject jModel, jstring jlocation){ + + cta_jni_setJavaEnv(env); + + const char *location = env->GetStringUTFChars(jlocation, 0); + + CTA_Handle_PrintInfo(location); + +} + +/* + * Class: org_costa_CtaVector + * Method: setValues + * Signature: ([D)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaVector_setValues + (JNIEnv * env, jobject obj_this, jdoubleArray jValues) { + + cta_jni_setJavaEnv(env); + + CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this); + + int cCount = env->GetArrayLength(jValues); + jboolean isCopy = JNI_FALSE; + jdouble * cValues = env->GetDoubleArrayElements(jValues, &isCopy); + + CTA_Datatype datatype; + CTA_Handle_GetDatatype (ctaHandle, &datatype); + + int retVal; + if (datatype==CTA_VECTOR){ + retVal = CTA_Vector_SetVals(ctaHandle, (void *) cValues, cCount, CTA_DOUBLE); + } + else { + retVal = CTA_TreeVector_SetVals(ctaHandle, (void *) cValues, cCount, CTA_DOUBLE); + } + + env->ReleaseDoubleArrayElements(jValues, cValues,0); + + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaVector", "Could not set values", retVal); + return; + } +} + + +/* + * Class: org_costa_CtaVector + * Method: getValues + * Signature: ()[D + */ +JNIEXPORT jdoubleArray JNICALL Java_org_openda_costa_CtaVector_getValues + (JNIEnv * env, jobject obj_this) { + + cta_jni_setJavaEnv(env); + + CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this); + + int cCount = 0; + + CTA_Datatype datatype; + CTA_Handle_GetDatatype (ctaHandle, &datatype); + + int retVal; + if (datatype==CTA_VECTOR){ + retVal = CTA_Vector_GetSize(ctaHandle, &cCount); + } else { + retVal = CTA_TreeVector_GetSize(ctaHandle, &cCount); + } + + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaVector", "Could not get size", retVal); + return NULL; + } + + jdoubleArray jValues = env->NewDoubleArray(cCount); + double * cValues = new double[cCount]; + + if (datatype==CTA_VECTOR){ + retVal = CTA_Vector_GetVals(ctaHandle, (void *) cValues, cCount, CTA_DOUBLE); + } + else { + retVal = CTA_TreeVector_GetVals(ctaHandle, (void *) cValues, cCount, CTA_DOUBLE); + } + + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaVector", "Could not get values", retVal); + return NULL; + } + else { + env->SetDoubleArrayRegion(jValues, 0, cCount, cValues); + } + delete [] cValues; + return jValues; +} + +/* + * Class: org_costa_CtaVector + * Method: setValue + * Signature: (ID)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaVector_setValue + (JNIEnv * env, jobject obj_this, jint indx, jdouble val){ + + cta_jni_setJavaEnv(env); + + CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this); + + CTA_Datatype datatype; + CTA_Handle_GetDatatype (ctaHandle, &datatype); + + int retVal; + if (datatype==CTA_VECTOR){ + retVal = CTA_Vector_SetVal(ctaHandle, indx+1, &val, CTA_DOUBLE); + } + else { + retVal = CTA_TreeVector_SetVal(ctaHandle, indx+1, &val, CTA_DOUBLE); + } + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaVector", "Could not set value ", retVal); + return; + } +} + +/* + * Class: org_costa_CtaVector + * Method: getValue + * Signature: (I)D + */ +JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaVector_getValue + (JNIEnv * env, jobject obj_this, jint indx){ + + cta_jni_setJavaEnv(env); + + CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this); + + double val; + CTA_Datatype datatype; + CTA_Handle_GetDatatype (ctaHandle, &datatype); + + int retVal; + if (datatype==CTA_VECTOR){ + retVal = CTA_Vector_GetVal(ctaHandle, indx+1, &val, CTA_DOUBLE); + } + else { + retVal = CTA_TreeVector_GetVal(ctaHandle, indx+1, &val, CTA_DOUBLE); + } + + + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaVector", "Could not get value ", retVal); + return 0.0; + } + return val; +} + +/* + * Class: org_costa_CtaVector + * Method: getSize + * Signature: ()I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaVector_getSize + (JNIEnv * env, jobject obj_this){ + + cta_jni_setJavaEnv(env); + + CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this); + int n; + + CTA_Datatype datatype; + CTA_Handle_GetDatatype (ctaHandle, &datatype); + + int retVal; + if (datatype==CTA_VECTOR){ + retVal = CTA_Vector_GetSize(ctaHandle, &n); + } + else { + retVal = CTA_TreeVector_GetSize(ctaHandle, &n); + } + + + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaVector", "Could not get value ", retVal); + return 0; + } + return n; +} + +/* + * Class: org_costa_CtaVector + * Method: norm2 + * Signature: ()D + */ +JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaVector_norm2 + (JNIEnv * env, jobject obj_this){ + + cta_jni_setJavaEnv(env); + + CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this); + double nrm; + + CTA_Datatype datatype; + CTA_Handle_GetDatatype (ctaHandle, &datatype); + + int retVal; + if (datatype==CTA_VECTOR){ + retVal = CTA_Vector_Nrm2(ctaHandle, &nrm); + } + else { + retVal = CTA_TreeVector_Nrm2(ctaHandle, &nrm); + } + + + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaVector", "Could not compute 2-norm ", retVal); + return 0.0; + } + return nrm; +} + +/* + * Class: org_costa_CtaVector + * Method: create + * Signature: (I)I + */ +JNIEXPORT jint JNICALL Java_org_openda_costa_CtaVector_ctaCreate + (JNIEnv * env, jobject obj_this, jint length) { + + cta_jni_setJavaEnv(env); + + int handle = CTA_NULL; + int retVal = CTA_Vector_Create(CTA_DEFAULT_VECTOR, length, CTA_DOUBLE, CTA_NULL, &handle); + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaVector", "Could not create vector", retVal); + + return 0; + } else { + double initialValue = 0; + int retVal = CTA_Vector_SetConstant(handle, &initialValue, CTA_DOUBLE); + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaVector", "Could not set initial values", retVal); + return 0; + } + } + return handle; +} + +/* + * Class: org_costa_CtaVector + * Method: ctaDotProduct + * Signature: (I)D + */ +JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaVector_ctaDotProduct + (JNIEnv * env, jobject obj_this, jint handleToOtherVector) { + + cta_jni_setJavaEnv(env); + + CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this); + double dot; + + CTA_Datatype datatype; + CTA_Handle_GetDatatype (ctaHandle, &datatype); + + int retVal; + if (datatype==CTA_VECTOR){ + retVal = CTA_Vector_Dot(ctaHandle, handleToOtherVector, &dot); + } + else { + retVal = CTA_TreeVector_Dot(ctaHandle, handleToOtherVector, &dot); + } + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaVector", "Could not compute dot product", retVal); + return 0.0; + } + return dot; +} + +/* + * Class: org_costa_CtaVector + * Method: ctaPointwiseDivide + * Signature: (I)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaVector_ctaPointwiseDivide + (JNIEnv *env, jobject obj_this, jint handleToOtherVector){ + + cta_jni_setJavaEnv(env); + + CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this); + + CTA_Datatype datatype; + CTA_Handle_GetDatatype (ctaHandle, &datatype); + + int retVal; + if (datatype==CTA_VECTOR){ + retVal = CTA_Vector_ElmDiv(ctaHandle, handleToOtherVector); + } + else { + retVal = CTA_TreeVector_ElmDiv(ctaHandle, handleToOtherVector); + } + + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaVector", "Could not compute pointwise division", retVal); + return; + } +} + +/* + * Class: org_costa_CtaVector + * Method: ctaPointwiseMultiply + * Signature: (I)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaVector_ctaPointwiseMultiply + (JNIEnv *env, jobject obj_this, jint handleToOtherVector){ + + cta_jni_setJavaEnv(env); + + CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this); + + CTA_Datatype datatype; + CTA_Handle_GetDatatype (ctaHandle, &datatype); + + int retVal; + if (datatype==CTA_VECTOR){ + retVal = CTA_Vector_ElmProd(ctaHandle, handleToOtherVector); + } + else { + retVal = CTA_TreeVector_ElmProd(ctaHandle, handleToOtherVector); + } + + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaVector", "Could not compute pointwise multiply", retVal); + return; + } +} + +/* + * Class: org_costa_CtaVectorJava_org_openda_costa_CtaObject_ctaInit + * Method: ctaSetValues + * Signature: (I)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaVector_ctaSetValues + (JNIEnv *env, jobject obj_this, jint handleToOtherVector){ + + cta_jni_setJavaEnv(env); + + CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this); + + CTA_Datatype datatype; + CTA_Handle_GetDatatype (ctaHandle, &datatype); + + int retVal; + if (datatype==CTA_VECTOR){ + retVal=CTA_Vector_Copy( handleToOtherVector, ctaHandle); + } + else { + retVal=CTA_TreeVector_Copy( handleToOtherVector, ctaHandle); + } + + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaVector", "Could not copy values of vectors", retVal); + return; + } + +} + +/* + * Class: org_costa_CtaVector + * Method: ctaAxpy + * Signature: (DI)V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaVector_ctaAxpy + (JNIEnv *env, jobject obj_this, jdouble alpha, jint handleToOtherVector){ + + cta_jni_setJavaEnv(env); + + CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this); + + CTA_Datatype datatype; + CTA_Handle_GetDatatype (ctaHandle, &datatype); + + int retVal; + if (datatype==CTA_VECTOR){ + retVal=CTA_Vector_Axpy(ctaHandle, alpha, handleToOtherVector); + } + else { + retVal=CTA_TreeVector_Axpy(ctaHandle, alpha, handleToOtherVector); + } + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaVector", "Could not compute axpy", retVal); + return; + } +} + +/* + * Class: org_costa_CtaVector + * Method: clone + * Signature: ()Lorg/openda/interfaces/IVector; + */ +JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaVector_clone + (JNIEnv *env, jobject obj_this){ + + cta_jni_setJavaEnv(env); + + CTA_Vector ctaHandle = cta_jni_getCtaHandle(env, obj_this); + CTA_Vector ctaDuplicate; + int retVal=CTA_Vector_Duplicate(ctaHandle, &ctaDuplicate); + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaVector", "Could not duplicate", retVal); + return NULL; + } + + /* Create a Java CtaVector */ + jclass clsVector = env->FindClass("org/openda/costa/CtaVector"); + jmethodID constructorID = env->GetMethodID (clsVector, "", "()V"); + jobject jVec = env->NewObject(clsVector, constructorID); + + cta_jni_setCtaHandle(env, jVec, ctaDuplicate); + + return jVec; + } + + +/* + * Class: org_costa_CtaVector + * Method: ctaSqrt + * Signature: ()V + */ +JNIEXPORT void JNICALL Java_org_openda_costa_CtaVector_ctaSqrt + (JNIEnv *env, jobject obj_this){ + + cta_jni_setJavaEnv(env); + + CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this); + + CTA_Datatype datatype; + CTA_Handle_GetDatatype (ctaHandle, &datatype); + + int retVal; + if (datatype==CTA_VECTOR){ + retVal=CTA_Vector_ElmSqrt(ctaHandle); + } + else { + retVal=CTA_TreeVector_ElmSqrt(ctaHandle); + } + if ( retVal != CTA_OK ) { + cta_jni_exception(env, "CtaVector", "Could compute sqrt", retVal); + return; + } +} + + + diff --git a/costa/native/openda/bridge/src/org_openda_resultwriters_NativeResultWriter.cpp b/costa/native/openda/bridge/src/org_openda_resultwriters_NativeResultWriter.cpp new file mode 100644 index 000000000..268cad73e --- /dev/null +++ b/costa/native/openda/bridge/src/org_openda_resultwriters_NativeResultWriter.cpp @@ -0,0 +1,122 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/openda_1/public/trunk/core/native/src/openda/bridge/org_openda_costa_CtaVector.cpp $ +$Revision: 2738 $, $Date: 2011-09-05 10:48:32 +0200 (Mon, 05 Sep 2011) $ + +OpenDA interface for COSTA. +Copyright (C) 2013 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include +#include "org_openda_resultwriters_NativeResultWriter.h" +#include "jni_cta_utils.h" +#include "cta_resultwriter.h" + +char *copyJavaStringToNormalString(JNIEnv *env, jstring javaString){ + + if (javaString==NULL) return NULL; + const char *sJava=env->GetStringUTFChars(javaString, 0); + size_t n=strlen(sJava)+1; + char *sString = (char*) malloc(n*sizeof(char)); + for (size_t i=0;iReleaseStringUTFChars(javaString, sJava); + return sString; + +} + + + +/* + * Class: org_openda_resultwriters_NativeResultWriter + * Method: putMessage + * Signature: (ILjava/lang/String;Ljava/lang/String;Ljava/lang/String;)V + */ +JNIEXPORT void JNICALL Java_org_openda_resultwriters_NativeResultWriter_putMessage + (JNIEnv *env, jobject, jint iDWriter, jstring config, jstring workingDir, jstring message){ + cta_jni_setJavaEnv(env); + + char *sConfig = copyJavaStringToNormalString(env, config); + char *sWorkingDir = copyJavaStringToNormalString(env, workingDir); + char *sMessage = copyJavaStringToNormalString(env, message); + + // To do add method + CTA_Resultwriter_putmessage(iDWriter, sConfig, sWorkingDir, sMessage); + + if (sConfig) free(sConfig); + if (sWorkingDir) free(sWorkingDir); + if (sMessage) free(sMessage); +} + +/* + * Class: org_openda_resultwriters_NativeResultWriter + * Method: putValue + * Signature: (ILjava/lang/String;Ljava/lang/String;Ljava/lang/String;IILjava/lang/String;I)V + */ +JNIEXPORT void JNICALL Java_org_openda_resultwriters_NativeResultWriter_putValue + (JNIEnv *env, jobject, jint iDWriter, jstring config, jstring workingDir, jstring id, jint handle, jint outputLevel, jstring context, jint iteration){ + cta_jni_setJavaEnv(env); + + char *sConfig = copyJavaStringToNormalString(env, config); + char *sWorkingDir = copyJavaStringToNormalString(env, workingDir); + char *sId = copyJavaStringToNormalString(env, id); + char *sContext = copyJavaStringToNormalString(env, context); + + // To do add method + CTA_Resultwriter_putvalue(iDWriter, sConfig, sWorkingDir, sId, handle, outputLevel, sContext, iteration); + if (sConfig) free(sConfig); + if (sWorkingDir) free(sWorkingDir); + if (sId) free(sId); + if (sContext) free(sContext); +} +/* + * Class: org_openda_resultwriters_NativeResultWriter + * Method: putIterationReport + * Signature: (ILjava/lang/String;Ljava/lang/String;IDI)V + */ +JNIEXPORT void JNICALL Java_org_openda_resultwriters_NativeResultWriter_putIterationReport + (JNIEnv *env, jobject, jint iDWriter, jstring config, jstring workingDir, jint iteration, jdouble cost, jint handle){ + cta_jni_setJavaEnv(env); + + char *sConfig = copyJavaStringToNormalString(env, config); + char *sWorkingDir = copyJavaStringToNormalString(env, workingDir); + + // To do add method + CTA_Resultwriter_putiterationreport(iDWriter, sConfig, sWorkingDir, iteration, cost, handle); + + if (sConfig) free(sConfig); + if (sWorkingDir) free(sWorkingDir); +} + +/* + * Class: org_openda_resultwriters_NativeResultWriter + * Method: free + * Signature: (I)V + */ +JNIEXPORT void JNICALL Java_org_openda_resultwriters_NativeResultWriter_free + (JNIEnv *, jobject, jint iDWriter){ + + // To do add method + CTA_Resultwriter_free(iDWriter); +} + + + + + + + diff --git a/costa/native/openda/bridge/src/org_openda_resultwriters_NetcdfResultWriterNative.cpp b/costa/native/openda/bridge/src/org_openda_resultwriters_NetcdfResultWriterNative.cpp new file mode 100644 index 000000000..ababb0bbd --- /dev/null +++ b/costa/native/openda/bridge/src/org_openda_resultwriters_NetcdfResultWriterNative.cpp @@ -0,0 +1,111 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/openda_1/public/trunk/core/native/src/openda/bridge/org_openda_costa_CtaVector.cpp $ +$Revision: 2738 $, $Date: 2011-09-05 10:48:32 +0200 (Mon, 05 Sep 2011) $ + +OpenDA interface for COSTA. +Copyright (C) 2013 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include +#include "org_openda_resultwriters_NetcdfResultWriterNative.h" +#include "jni_cta_utils.h" +#include "cta_resultwriter.h" + + +CTA_String copyJavaStringToCostaString(JNIEnv *env, jstring javaString){ + + int ierr; + CTA_String hString=CTA_NULL; + + if (javaString==NULL) return hString; + const char *sJava=env->GetStringUTFChars(javaString, 0); + size_t n=strlen(sJava)+1; + char *sString = (char*) malloc(n*sizeof(char)); + for (size_t i=0;iReleaseStringUTFChars(javaString, sJava); + + ierr=CTA_String_Create(&hString); + if (ierr!=CTA_OK){ + cta_jni_exception(env, "copyJavaStringToCostaString", "Cannot create COSTA string.", ierr); + } + ierr=CTA_String_Set(hString, sString); + if (ierr!=CTA_OK){ + cta_jni_exception(env, "copyJavaStringToCostaString", "Cannot set value of COSTA string.", ierr); + } + + free(sString); + + return hString; +} + + +/* + * Class: org_openda_resultwriters_NetcdfResultWriterNative + * Method: ctaNetcdfInit + * Signature: (Ljava/lang/String;Ljava/lang/String;)I + */ +JNIEXPORT jint JNICALL Java_org_openda_resultwriters_NetcdfResultWriterNative_ctaNetcdfInit + (JNIEnv *env , jobject , jstring netcdfname, jstring action){ + + cta_jni_setJavaEnv(env); + + int ierr; + CTA_String hFilename = copyJavaStringToCostaString(env, netcdfname); + CTA_String hAction = copyJavaStringToCostaString(env, action); + CTA_File hFile; + + ierr=CTA_File_Create(&hFile); + if (ierr!=CTA_OK){ + cta_jni_exception(env, "Java_org_openda_resultwriters_NetcdfResultWriterNative_ctaNetcdfInit", + "Cannot create COSTA file.", ierr); + } + + ierr=CTA_File_Open(hFile,hFilename,hAction); + if (ierr!=CTA_OK){ + cta_jni_exception(env, "Java_org_openda_resultwriters_NetcdfResultWriterNative_ctaNetcdfInit", + "Cannot open Netcdf file.", ierr); + } + + CTA_String_Free(&hFilename); + CTA_String_Free(&hAction); + return hFile; + +} + +/* + * Class: org_openda_resultwriters_NetcdfResultWriterNative + * Method: ctaNetcdfClose + * Signature: (I)I + */ +JNIEXPORT jint JNICALL Java_org_openda_resultwriters_NetcdfResultWriterNative_ctaNetcdfClose + (JNIEnv *env, jobject, jint hFile){ + + cta_jni_setJavaEnv(env); + + int ierr; + int iHFile = hFile; + ierr=CTA_File_Free(&iHFile); + if (ierr!=CTA_OK){ + cta_jni_exception(env, "Java_org_openda_resultwriters_NetcdfResultWriterNative_ctaNetcdfInit", + "Cannot close Netcdf file.", ierr); + } + return ierr; +} + + From dabf0c4e5de77cc19a3d0b3433d912c5d9b62271 Mon Sep 17 00:00:00 2001 From: Werner Kramer Date: Wed, 13 Sep 2023 12:32:30 +0200 Subject: [PATCH 03/15] Move openda/bridge to bridge --- costa/native/CMakeLists.txt | 2 +- .../bridge/include/jni_cta_CtaObsdescr_NativeToJava.c | 0 .../bridge/include/jni_cta_CtaObsdescr_NativeToJava.h | 0 costa/native/{openda => }/bridge/include/jni_cta_utils.h | 0 costa/native/{openda => }/bridge/include/jni_datatypes.h | 0 .../{openda => }/bridge/include/org_openda_costa_CtaArray.h | 0 .../bridge/include/org_openda_costa_CtaInitialize.h | 0 .../bridge/include/org_openda_costa_CtaModelState.h | 0 .../{openda => }/bridge/include/org_openda_costa_CtaObject.h | 0 .../include/org_openda_costa_CtaObservationDescriptions.h | 0 .../bridge/include/org_openda_costa_CtaOpenDaModel.h | 0 .../{openda => }/bridge/include/org_openda_costa_CtaParallel.h | 0 .../bridge/include/org_openda_costa_CtaRelationTable.h | 0 .../bridge/include/org_openda_costa_CtaStochObserver.h | 0 .../{openda => }/bridge/include/org_openda_costa_CtaTime.h | 0 .../bridge/include/org_openda_costa_CtaTreeVector.h | 0 .../{openda => }/bridge/include/org_openda_costa_CtaUtils.h | 0 .../{openda => }/bridge/include/org_openda_costa_CtaVector.h | 0 .../include/org_openda_resultwriters_NativeResultWriter.h | 0 .../include/org_openda_resultwriters_NetcdfResultWriterNative.h | 0 costa/native/{openda => }/bridge/src/CMakeLists.txt | 0 costa/native/{openda => }/bridge/src/jni_cta_utils.cpp | 0 .../{openda => }/bridge/src/org_openda_costa_CtaArray.cpp | 0 .../{openda => }/bridge/src/org_openda_costa_CtaInitialize.cpp | 0 .../{openda => }/bridge/src/org_openda_costa_CtaModelState.cpp | 0 .../{openda => }/bridge/src/org_openda_costa_CtaObject.cpp | 0 .../bridge/src/org_openda_costa_CtaObservationDescriptions.cpp | 0 .../{openda => }/bridge/src/org_openda_costa_CtaOpenDaModel.cpp | 0 .../{openda => }/bridge/src/org_openda_costa_CtaParallel.cpp | 0 .../bridge/src/org_openda_costa_CtaRelationTable.cpp | 0 .../bridge/src/org_openda_costa_CtaStochObserver.cpp | 0 .../native/{openda => }/bridge/src/org_openda_costa_CtaTime.cpp | 0 .../{openda => }/bridge/src/org_openda_costa_CtaTreeVector.cpp | 0 .../{openda => }/bridge/src/org_openda_costa_CtaUtils.cpp | 0 .../{openda => }/bridge/src/org_openda_costa_CtaVector.cpp | 0 .../bridge/src/org_openda_resultwriters_NativeResultWriter.cpp | 0 .../src/org_openda_resultwriters_NetcdfResultWriterNative.cpp | 0 37 files changed, 1 insertion(+), 1 deletion(-) rename costa/native/{openda => }/bridge/include/jni_cta_CtaObsdescr_NativeToJava.c (100%) rename costa/native/{openda => }/bridge/include/jni_cta_CtaObsdescr_NativeToJava.h (100%) rename costa/native/{openda => }/bridge/include/jni_cta_utils.h (100%) rename costa/native/{openda => }/bridge/include/jni_datatypes.h (100%) rename costa/native/{openda => }/bridge/include/org_openda_costa_CtaArray.h (100%) rename costa/native/{openda => }/bridge/include/org_openda_costa_CtaInitialize.h (100%) rename costa/native/{openda => }/bridge/include/org_openda_costa_CtaModelState.h (100%) rename costa/native/{openda => }/bridge/include/org_openda_costa_CtaObject.h (100%) rename costa/native/{openda => }/bridge/include/org_openda_costa_CtaObservationDescriptions.h (100%) rename costa/native/{openda => }/bridge/include/org_openda_costa_CtaOpenDaModel.h (100%) rename costa/native/{openda => }/bridge/include/org_openda_costa_CtaParallel.h (100%) rename costa/native/{openda => }/bridge/include/org_openda_costa_CtaRelationTable.h (100%) rename costa/native/{openda => }/bridge/include/org_openda_costa_CtaStochObserver.h (100%) rename costa/native/{openda => }/bridge/include/org_openda_costa_CtaTime.h (100%) rename costa/native/{openda => }/bridge/include/org_openda_costa_CtaTreeVector.h (100%) rename costa/native/{openda => }/bridge/include/org_openda_costa_CtaUtils.h (100%) rename costa/native/{openda => }/bridge/include/org_openda_costa_CtaVector.h (100%) rename costa/native/{openda => }/bridge/include/org_openda_resultwriters_NativeResultWriter.h (100%) rename costa/native/{openda => }/bridge/include/org_openda_resultwriters_NetcdfResultWriterNative.h (100%) rename costa/native/{openda => }/bridge/src/CMakeLists.txt (100%) rename costa/native/{openda => }/bridge/src/jni_cta_utils.cpp (100%) rename costa/native/{openda => }/bridge/src/org_openda_costa_CtaArray.cpp (100%) rename costa/native/{openda => }/bridge/src/org_openda_costa_CtaInitialize.cpp (100%) rename costa/native/{openda => }/bridge/src/org_openda_costa_CtaModelState.cpp (100%) rename costa/native/{openda => }/bridge/src/org_openda_costa_CtaObject.cpp (100%) rename costa/native/{openda => }/bridge/src/org_openda_costa_CtaObservationDescriptions.cpp (100%) rename costa/native/{openda => }/bridge/src/org_openda_costa_CtaOpenDaModel.cpp (100%) rename costa/native/{openda => }/bridge/src/org_openda_costa_CtaParallel.cpp (100%) rename costa/native/{openda => }/bridge/src/org_openda_costa_CtaRelationTable.cpp (100%) rename costa/native/{openda => }/bridge/src/org_openda_costa_CtaStochObserver.cpp (100%) rename costa/native/{openda => }/bridge/src/org_openda_costa_CtaTime.cpp (100%) rename costa/native/{openda => }/bridge/src/org_openda_costa_CtaTreeVector.cpp (100%) rename costa/native/{openda => }/bridge/src/org_openda_costa_CtaUtils.cpp (100%) rename costa/native/{openda => }/bridge/src/org_openda_costa_CtaVector.cpp (100%) rename costa/native/{openda => }/bridge/src/org_openda_resultwriters_NativeResultWriter.cpp (100%) rename costa/native/{openda => }/bridge/src/org_openda_resultwriters_NetcdfResultWriterNative.cpp (100%) diff --git a/costa/native/CMakeLists.txt b/costa/native/CMakeLists.txt index 92c16cb1e..a84fb4394 100644 --- a/costa/native/CMakeLists.txt +++ b/costa/native/CMakeLists.txt @@ -7,5 +7,5 @@ add_subdirectory(external/lapack) add_subdirectory(cta/src) add_subdirectory(cta_f90/generated) -add_subdirectory(openda/bridge/src) +add_subdirectory(bridge/src) diff --git a/costa/native/openda/bridge/include/jni_cta_CtaObsdescr_NativeToJava.c b/costa/native/bridge/include/jni_cta_CtaObsdescr_NativeToJava.c similarity index 100% rename from costa/native/openda/bridge/include/jni_cta_CtaObsdescr_NativeToJava.c rename to costa/native/bridge/include/jni_cta_CtaObsdescr_NativeToJava.c diff --git a/costa/native/openda/bridge/include/jni_cta_CtaObsdescr_NativeToJava.h b/costa/native/bridge/include/jni_cta_CtaObsdescr_NativeToJava.h similarity index 100% rename from costa/native/openda/bridge/include/jni_cta_CtaObsdescr_NativeToJava.h rename to costa/native/bridge/include/jni_cta_CtaObsdescr_NativeToJava.h diff --git a/costa/native/openda/bridge/include/jni_cta_utils.h b/costa/native/bridge/include/jni_cta_utils.h similarity index 100% rename from costa/native/openda/bridge/include/jni_cta_utils.h rename to costa/native/bridge/include/jni_cta_utils.h diff --git a/costa/native/openda/bridge/include/jni_datatypes.h b/costa/native/bridge/include/jni_datatypes.h similarity index 100% rename from costa/native/openda/bridge/include/jni_datatypes.h rename to costa/native/bridge/include/jni_datatypes.h diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaArray.h b/costa/native/bridge/include/org_openda_costa_CtaArray.h similarity index 100% rename from costa/native/openda/bridge/include/org_openda_costa_CtaArray.h rename to costa/native/bridge/include/org_openda_costa_CtaArray.h diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaInitialize.h b/costa/native/bridge/include/org_openda_costa_CtaInitialize.h similarity index 100% rename from costa/native/openda/bridge/include/org_openda_costa_CtaInitialize.h rename to costa/native/bridge/include/org_openda_costa_CtaInitialize.h diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaModelState.h b/costa/native/bridge/include/org_openda_costa_CtaModelState.h similarity index 100% rename from costa/native/openda/bridge/include/org_openda_costa_CtaModelState.h rename to costa/native/bridge/include/org_openda_costa_CtaModelState.h diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaObject.h b/costa/native/bridge/include/org_openda_costa_CtaObject.h similarity index 100% rename from costa/native/openda/bridge/include/org_openda_costa_CtaObject.h rename to costa/native/bridge/include/org_openda_costa_CtaObject.h diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaObservationDescriptions.h b/costa/native/bridge/include/org_openda_costa_CtaObservationDescriptions.h similarity index 100% rename from costa/native/openda/bridge/include/org_openda_costa_CtaObservationDescriptions.h rename to costa/native/bridge/include/org_openda_costa_CtaObservationDescriptions.h diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaOpenDaModel.h b/costa/native/bridge/include/org_openda_costa_CtaOpenDaModel.h similarity index 100% rename from costa/native/openda/bridge/include/org_openda_costa_CtaOpenDaModel.h rename to costa/native/bridge/include/org_openda_costa_CtaOpenDaModel.h diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaParallel.h b/costa/native/bridge/include/org_openda_costa_CtaParallel.h similarity index 100% rename from costa/native/openda/bridge/include/org_openda_costa_CtaParallel.h rename to costa/native/bridge/include/org_openda_costa_CtaParallel.h diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaRelationTable.h b/costa/native/bridge/include/org_openda_costa_CtaRelationTable.h similarity index 100% rename from costa/native/openda/bridge/include/org_openda_costa_CtaRelationTable.h rename to costa/native/bridge/include/org_openda_costa_CtaRelationTable.h diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaStochObserver.h b/costa/native/bridge/include/org_openda_costa_CtaStochObserver.h similarity index 100% rename from costa/native/openda/bridge/include/org_openda_costa_CtaStochObserver.h rename to costa/native/bridge/include/org_openda_costa_CtaStochObserver.h diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaTime.h b/costa/native/bridge/include/org_openda_costa_CtaTime.h similarity index 100% rename from costa/native/openda/bridge/include/org_openda_costa_CtaTime.h rename to costa/native/bridge/include/org_openda_costa_CtaTime.h diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaTreeVector.h b/costa/native/bridge/include/org_openda_costa_CtaTreeVector.h similarity index 100% rename from costa/native/openda/bridge/include/org_openda_costa_CtaTreeVector.h rename to costa/native/bridge/include/org_openda_costa_CtaTreeVector.h diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaUtils.h b/costa/native/bridge/include/org_openda_costa_CtaUtils.h similarity index 100% rename from costa/native/openda/bridge/include/org_openda_costa_CtaUtils.h rename to costa/native/bridge/include/org_openda_costa_CtaUtils.h diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaVector.h b/costa/native/bridge/include/org_openda_costa_CtaVector.h similarity index 100% rename from costa/native/openda/bridge/include/org_openda_costa_CtaVector.h rename to costa/native/bridge/include/org_openda_costa_CtaVector.h diff --git a/costa/native/openda/bridge/include/org_openda_resultwriters_NativeResultWriter.h b/costa/native/bridge/include/org_openda_resultwriters_NativeResultWriter.h similarity index 100% rename from costa/native/openda/bridge/include/org_openda_resultwriters_NativeResultWriter.h rename to costa/native/bridge/include/org_openda_resultwriters_NativeResultWriter.h diff --git a/costa/native/openda/bridge/include/org_openda_resultwriters_NetcdfResultWriterNative.h b/costa/native/bridge/include/org_openda_resultwriters_NetcdfResultWriterNative.h similarity index 100% rename from costa/native/openda/bridge/include/org_openda_resultwriters_NetcdfResultWriterNative.h rename to costa/native/bridge/include/org_openda_resultwriters_NetcdfResultWriterNative.h diff --git a/costa/native/openda/bridge/src/CMakeLists.txt b/costa/native/bridge/src/CMakeLists.txt similarity index 100% rename from costa/native/openda/bridge/src/CMakeLists.txt rename to costa/native/bridge/src/CMakeLists.txt diff --git a/costa/native/openda/bridge/src/jni_cta_utils.cpp b/costa/native/bridge/src/jni_cta_utils.cpp similarity index 100% rename from costa/native/openda/bridge/src/jni_cta_utils.cpp rename to costa/native/bridge/src/jni_cta_utils.cpp diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaArray.cpp b/costa/native/bridge/src/org_openda_costa_CtaArray.cpp similarity index 100% rename from costa/native/openda/bridge/src/org_openda_costa_CtaArray.cpp rename to costa/native/bridge/src/org_openda_costa_CtaArray.cpp diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaInitialize.cpp b/costa/native/bridge/src/org_openda_costa_CtaInitialize.cpp similarity index 100% rename from costa/native/openda/bridge/src/org_openda_costa_CtaInitialize.cpp rename to costa/native/bridge/src/org_openda_costa_CtaInitialize.cpp diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaModelState.cpp b/costa/native/bridge/src/org_openda_costa_CtaModelState.cpp similarity index 100% rename from costa/native/openda/bridge/src/org_openda_costa_CtaModelState.cpp rename to costa/native/bridge/src/org_openda_costa_CtaModelState.cpp diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaObject.cpp b/costa/native/bridge/src/org_openda_costa_CtaObject.cpp similarity index 100% rename from costa/native/openda/bridge/src/org_openda_costa_CtaObject.cpp rename to costa/native/bridge/src/org_openda_costa_CtaObject.cpp diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaObservationDescriptions.cpp b/costa/native/bridge/src/org_openda_costa_CtaObservationDescriptions.cpp similarity index 100% rename from costa/native/openda/bridge/src/org_openda_costa_CtaObservationDescriptions.cpp rename to costa/native/bridge/src/org_openda_costa_CtaObservationDescriptions.cpp diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaOpenDaModel.cpp b/costa/native/bridge/src/org_openda_costa_CtaOpenDaModel.cpp similarity index 100% rename from costa/native/openda/bridge/src/org_openda_costa_CtaOpenDaModel.cpp rename to costa/native/bridge/src/org_openda_costa_CtaOpenDaModel.cpp diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaParallel.cpp b/costa/native/bridge/src/org_openda_costa_CtaParallel.cpp similarity index 100% rename from costa/native/openda/bridge/src/org_openda_costa_CtaParallel.cpp rename to costa/native/bridge/src/org_openda_costa_CtaParallel.cpp diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaRelationTable.cpp b/costa/native/bridge/src/org_openda_costa_CtaRelationTable.cpp similarity index 100% rename from costa/native/openda/bridge/src/org_openda_costa_CtaRelationTable.cpp rename to costa/native/bridge/src/org_openda_costa_CtaRelationTable.cpp diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaStochObserver.cpp b/costa/native/bridge/src/org_openda_costa_CtaStochObserver.cpp similarity index 100% rename from costa/native/openda/bridge/src/org_openda_costa_CtaStochObserver.cpp rename to costa/native/bridge/src/org_openda_costa_CtaStochObserver.cpp diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaTime.cpp b/costa/native/bridge/src/org_openda_costa_CtaTime.cpp similarity index 100% rename from costa/native/openda/bridge/src/org_openda_costa_CtaTime.cpp rename to costa/native/bridge/src/org_openda_costa_CtaTime.cpp diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaTreeVector.cpp b/costa/native/bridge/src/org_openda_costa_CtaTreeVector.cpp similarity index 100% rename from costa/native/openda/bridge/src/org_openda_costa_CtaTreeVector.cpp rename to costa/native/bridge/src/org_openda_costa_CtaTreeVector.cpp diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaUtils.cpp b/costa/native/bridge/src/org_openda_costa_CtaUtils.cpp similarity index 100% rename from costa/native/openda/bridge/src/org_openda_costa_CtaUtils.cpp rename to costa/native/bridge/src/org_openda_costa_CtaUtils.cpp diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaVector.cpp b/costa/native/bridge/src/org_openda_costa_CtaVector.cpp similarity index 100% rename from costa/native/openda/bridge/src/org_openda_costa_CtaVector.cpp rename to costa/native/bridge/src/org_openda_costa_CtaVector.cpp diff --git a/costa/native/openda/bridge/src/org_openda_resultwriters_NativeResultWriter.cpp b/costa/native/bridge/src/org_openda_resultwriters_NativeResultWriter.cpp similarity index 100% rename from costa/native/openda/bridge/src/org_openda_resultwriters_NativeResultWriter.cpp rename to costa/native/bridge/src/org_openda_resultwriters_NativeResultWriter.cpp diff --git a/costa/native/openda/bridge/src/org_openda_resultwriters_NetcdfResultWriterNative.cpp b/costa/native/bridge/src/org_openda_resultwriters_NetcdfResultWriterNative.cpp similarity index 100% rename from costa/native/openda/bridge/src/org_openda_resultwriters_NetcdfResultWriterNative.cpp rename to costa/native/bridge/src/org_openda_resultwriters_NetcdfResultWriterNative.cpp From 888b7cf5876010fa8911b430f48b94dfeead2f2e Mon Sep 17 00:00:00 2001 From: Werner Kramer Date: Wed, 13 Sep 2023 12:37:30 +0200 Subject: [PATCH 04/15] Link libraries, add install --- costa/native/.gitignore | 5 + costa/native/CMakeLists.txt | 13 +- costa/native/bridge/src/CMakeLists.txt | 14 +- costa/native/cta/src/CMakeLists.txt | 15 +- .../native/cta_f90/cta_f90-create-interfaces | 948 ++++++++++++++++++ costa/native/cta_f90/generated/CMakeLists.txt | 3 +- 6 files changed, 985 insertions(+), 13 deletions(-) create mode 100644 costa/native/.gitignore create mode 100644 costa/native/cta_f90/cta_f90-create-interfaces diff --git a/costa/native/.gitignore b/costa/native/.gitignore new file mode 100644 index 000000000..b8d3856e6 --- /dev/null +++ b/costa/native/.gitignore @@ -0,0 +1,5 @@ +build/ +install/ +sangoma/ +user_templates/ + diff --git a/costa/native/CMakeLists.txt b/costa/native/CMakeLists.txt index a84fb4394..2eb93442e 100644 --- a/costa/native/CMakeLists.txt +++ b/costa/native/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.9.1) project(openda) -#add_subdirectory(libxml) + add_subdirectory(external/blas) add_subdirectory(external/lapack) @@ -9,3 +9,14 @@ add_subdirectory(cta/src) add_subdirectory(cta_f90/generated) add_subdirectory(bridge/src) +file(GLOB CTA_PUBLIC_HEADERS + "cta/include/*.h" +) + +install(TARGETS cta cta_f90 opendabridge + LIBRARY DESTINATION lib +) + +install(FILES ${CTA_PUBLIC_HEADERS} + DESTINATION include +) diff --git a/costa/native/bridge/src/CMakeLists.txt b/costa/native/bridge/src/CMakeLists.txt index 7e4c9e1b7..d96444e7b 100644 --- a/costa/native/bridge/src/CMakeLists.txt +++ b/costa/native/bridge/src/CMakeLists.txt @@ -16,11 +16,11 @@ set(SOURCES add_library(opendabridge SHARED ${SOURCES}) - -target_include_directories(opendabridge PUBLIC ../include) -target_include_directories(opendabridge PUBLIC ${CMAKE_SOURCE_DIR}/cta/include) -target_include_directories(opendabridge PUBLIC ${CMAKE_SOURCE_DIR}/external) -target_include_directories(opendabridge PUBLIC ${LIBXML2_INCLUDE_DIR}) -target_include_directories(opendabridge PUBLIC ${JNI_INCLUDE_DIRS}) -#target_link_libraries(cta INTERFACE libxml) +target_include_directories(opendabridge PRIVATE ../include) +target_include_directories(opendabridge PRIVATE ${CMAKE_SOURCE_DIR}/cta/include) +target_include_directories(opendabridge PRIVATE ${CMAKE_SOURCE_DIR}/external) +target_include_directories(opendabridge PRIVATE ${LIBXML2_INCLUDE_DIR}) +target_include_directories(opendabridge PRIVATE ${JNI_INCLUDE_DIRS}) +target_link_libraries(opendabridge PRIVATE ${LIBXML2_LIBRARIES}) +target_link_libraries(opendabridge PRIVATE cta) diff --git a/costa/native/cta/src/CMakeLists.txt b/costa/native/cta/src/CMakeLists.txt index d225b4c14..72c8a1ef7 100644 --- a/costa/native/cta/src/CMakeLists.txt +++ b/costa/native/cta/src/CMakeLists.txt @@ -17,10 +17,17 @@ set(SOURCES ) add_library(cta SHARED ${SOURCES}) +file(GLOB HEADER_FILES + "../include/*.h" +) + +set(CTA_PUBLIC_HEADERS ${HEADER_FILES}) +target_include_directories(cta PRIVATE ../include) +target_include_directories(cta PRIVATE ${CMAKE_SOURCE_DIR}/external) +target_include_directories(cta PRIVATE ${LIBXML2_INCLUDE_DIR}) +target_link_libraries(cta PRIVATE ${LIBXML2_LIBRARIES}) +target_link_libraries(cta PRIVATE blas) +target_link_libraries(cta PRIVATE lapack) -target_include_directories(cta PUBLIC ../include) -target_include_directories(cta PUBLIC ${CMAKE_SOURCE_DIR}/external) -target_include_directories(cta PUBLIC ${LIBXML2_INCLUDE_DIR}) -#target_link_libraries(cta INTERFACE libxml) diff --git a/costa/native/cta_f90/cta_f90-create-interfaces b/costa/native/cta_f90/cta_f90-create-interfaces new file mode 100644 index 000000000..aadb5b844 --- /dev/null +++ b/costa/native/cta_f90/cta_f90-create-interfaces @@ -0,0 +1,948 @@ +#! /usr/bin/env python + +# $URL: https://repos.deltares.nl/repos/openda/openda_1/public/trunk/core/native/src/cta/cta_vector.c $ +# $Revision: 2751 $, $Date: 2011-09-09 08:58:46 +0200 (Fri, 09 Sep 2011) $ +# +# COSTA: Problem solving environment for data assimilation +# Copyright (C) 2012 Arjo Segers +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +""" +Generate Fortran90 interfaces from function headers. +Tool for OpenDA native core (COSTA). +Uses: explicit and implicit tags: + implicit: header files containing usr, methods containing CTAI + explicit: CTAEXPORT to export or CTANOEXPORT or CTANOF90 to ingnore. +""" + +# modules: +import os +import shutil + + +######################################################################## +### +### routines +### +######################################################################## + + +def cta_f90_interface( procname, procargs, comments ) : + + """ + Return list with Fortran90 source lines with the interface to C function. + + Arguments: + procname + Procedure name. + procargs + List with function argument descriptions. + Each item of the list is a dictionairy that of the form: + { 'name' : 'values', # variable name + 'type' : 'int', # data type + 'intent' : 'in', # intent + 'dims' : '(*)' } # array shape (if relevant) + comments + List with the original DoxyGen comment in the C header file. + """ + + # target name: + newprocname = procname.replace('CTA','CTA_F90') + + # create argument list: + argsline = '' + for iarg in range(len(procargs)) : + farg = procargs[iarg] + if iarg > 0 : argsline = argsline+', ' + argsline = argsline+farg['name'] + #endfor + + # init list with required parameters: + params = [] + # scan arguments: + for arg in procargs : + # search for known parameters: + for param in ['CTA_HANDLE_IKIND','CTA_TIME_RKIND'] : + # needed for this type ? + if param in arg['type'] : + # add if not present yet: + if param not in params : params.append(param) + #endif + #endfor # known parameters + #endfor # arguments + + # void arguments ? + with_void_args = False + for arg in procargs : + if 'void' in arg['type'] : + with_void_args = True + break + #endif + #endfor + + + # init ifcult: + ifc = [] + + # indent etc: + indent = ' ' + # add original comment: + for comment in comments : + ifc.append( indent+'! %s' % comment ) + #endfor + # extra seperation comment: + ifc.append( indent+'!' ) + # comment ? + if with_void_args : indent = ' !' + # start interface: + ifc.append( indent+'interface %s' % newprocname ) + # routine header: + ifc.append( indent+' subroutine %s( %s )' % (procname,argsline) ) + # use parameters if necessary: + for param in params : + ifc.append( indent+' use CTA_F90_Parameters, only : %s' % param ) + #endfor + # loop over arguments: + for arg in procargs : + # add argument line: + ifc.append( indent+' %-30s, intent(%-5s) :: %s%s' % (arg['type'],arg['intent'],arg['name'],arg['dims']) ) + #endfor + # end of routine interface: + ifc.append( indent+' end subroutine %s' % procname ) + # end of interface: + ifc.append( indent+'end interface' ) + ## add external declaration if void arguments were present .. + #if with_void_args : ifc.append( ' external :: %s' % procname ) + ifc.append( '' ) + + # ok + return ifc,[] + +#enddef # cta_f90_interface + + +# *** + + +def cta_f90_interface_generic( procname, procargs, comments, specials ) : + + """ + Return list with Fortran90 source lines with the interface to C function. + + Implement a number of specific routines for different argument data types, + for example 'integer', 'real(4)', 'real(8)', etc. + Add a generic interface to collect the specific routines. + + Arguments: + procname + Procedure name. + procargs + List with function argument descriptions. + Each item of the list is a dictionairy that of the form: + { 'name' : 'values', # variable name + 'type' : 'int', # data type + 'intent' : 'in', # intent + 'dims' : '(*)' } # array shape (if relevant) + comments + List with the original DoxyGen comment in the C header file. + + """ + + # target name: + newprocname = procname.replace('CTA','CTA_F90') + + # current specials: + if procname in specials.keys() : + special = specials[procname] + else : + special = {} + #endif + + ## info ... + #print 'xxx3 special = ', special + + # dummy: + argname_nval = 'no-nval' + argname_datatype = 'no-datatype' + multishape = False + # create argument lists: + argsline = '' + argsline_call = '' + # loop over initial arguments: + for iarg in range(len(procargs)) : + # current: + farg = procargs[iarg] + # extract: + argname = farg['name'] + ## info ... + #print 'xxx4 testing argument "%s" ...' % argname + # add to line with arguments to called routine: + if len(argsline_call) > 0 : argsline_call = argsline_call+', ' + argsline_call = argsline_call+argname + # generic argument ? + if ('generic' in special.keys()) and (argname == special['generic']) : + ## info ... + #print ' x4 generic variable ...' + # multi shape dimension ? + if (argname+'_dims' in special.keys()) and (special[argname+'_dims'] == '(*)') : + #print ' x4 multi shape ...' + multishape = True + #endif + #endif + # 'nval' is not needed, implied by specific routine: + if argname == 'nval' : + # store for later usage: + argname_nval = argname + # next: + continue + #endif + # 'datatype' is not needed, implied by specific routine: + if argname == 'datatype' : + # store for later usage: + argname_datatype = argname + # next: + continue + #endif + # add to line: + if len(argsline) > 0 : argsline = argsline+', ' + argsline = argsline+argname + #endfor + + # init list with required parameters: + params = [] + # scan arguments: + for arg in procargs : + # search for known parameters: + for param in ['CTA_HANDLE_IKIND','CTA_TIME_RKIND'] : + # needed for this type ? + if param in arg['type'] : + # add if not present yet: + if param not in params : params.append(param) + #endif + #endfor # known parameters + #endfor # arguments + + # init ifcult: + ifc = [] + rtn = [] + + # generic types: + gtypes = ['integer','real4','real8'] + if procname == 'CTA_Vector_SetVal' : gtypes.append('char') + + # add original comment: + for comment in comments : + ifc.append( ' ! %s' % comment ) + #endfor + # extra seperation comment: + ifc.append( ' !' ) + # start interface: + ifc.append( ' interface %s' % newprocname ) + # loop over types: + for gtype in gtypes : + + ## info ... + #print ' xxx generic type : ', gtype, '; multishape ', multishape + + # set: + if gtype == 'integer' : + spectype = 'integer' + cta_kind = 'CTA_INTEGER' + elif gtype == 'real4' : + spectype = 'real(4)' + cta_kind = 'CTA_REAL' + elif gtype == 'real8' : + spectype = 'real(8)' + cta_kind = 'CTA_DOUBLE' + elif gtype == 'char' : + spectype = 'character(len=*)' + cta_kind = 'CTA_STRING' + else : + print 'ERROR - generic type "%s" not supported' % gtype + raise Exception + #endif + + # multiple shapes ? + if multishape and (gtype != 'char') : + + # loop over suported ranks: + for rank in range(1,8) : + + # specific routine: + rname = '%s_%s_%id' % (procname,spectype.replace('(','').replace(')',''),rank) + + # add line to interface: + ifc.append( ' module procedure %s' % rname ) + + # routine header: + rtn.append( ' subroutine %s( %s )' % (rname,argsline) ) + # use parameters if necessary: + for param in params : + rtn.append( ' use CTA_F90_Parameters, only : %s' % param ) + #endfor + # use kind parameter: + rtn.append( ' use CTA_F90_Parameters, only : %s' % cta_kind ) + # loop over arguments: + for arg in procargs : + # extract: + argname = arg['name'] + argtype = arg['type'] + argdims = arg['dims'] + # skip some ... + if argname == argname_nval : continue + if argname == argname_datatype : continue + # change to specific type: + if argtype == 'void' : argtype = spectype + # adhoc ... + if argname+'_dims' in special.keys() : argdims = special[argname+'_dims'] + # replace: + if argdims == '(*)' : + for k in range(rank) : + if k == 0 : + argdims = '(:' + else : + argdims = argdims+',:' + #endif + #endfor + argdims = argdims+')' + #endif + ## info ... + #print ' xxx argument : ', argname, argdims + # add argument line: + rtn.append( ' %-30s, intent(%-5s) :: %s%s' % (argtype,arg['intent'],arg['name'],argdims) ) + #endfor + # add call to C-routine: + rtn.append( ' call %s( %s )' % (procname,argsline_call.replace(argname_datatype,cta_kind).replace(argname_nval,'size(val)')) ) + # end of routine interface: + rtn.append( ' end subroutine %s' % rname ) + rtn.append( '' ) + + #endfor # ranks + + else : + + # specific routine: + rname = '%s_%s' % (procname,gtype) + + # add line to interface: + ifc.append( ' module procedure %s' % rname ) + + # routine header: + rtn.append( ' subroutine %s( %s )' % (rname,argsline) ) + # use parameters if necessary: + for param in params : + rtn.append( ' use CTA_F90_Parameters, only : %s' % param ) + #endfor + # use kind parameter: + rtn.append( ' use CTA_F90_Parameters, only : %s' % cta_kind ) + # other, not always necessary actually: + rtn.append( ' use CTA_F90_Parameters, only : CTA_OK' ) + # loop over arguments: + for arg in procargs : + # extract: + argname = arg['name'] + argtype = arg['type'] + argdims = arg['dims'] + # skip some ... + if argname == argname_nval : continue + if argname == argname_datatype : continue + # change to specific type: + if argtype == 'void' : argtype = spectype + # adhoc ... + if argname+'_dims' in special.keys() : argdims = special[argname+'_dims'] + # add argument line: + rtn.append( ' %-30s, intent(%-5s) :: %s%s' % (argtype,arg['intent'],arg['name'],argdims) ) + #endfor + + # switch: + if (procname == 'CTA_Vector_SetVal') and (gtype == 'char') : + + # add calls to C-routines: + rtn.append( ' integer(CTA_HANDLE_IKIND) :: sval' ) + rtn.append( ' call CTA_String_Create( sval, status )' ) + rtn.append( ' if (status/=CTA_OK) return' ) + rtn.append( ' call CTA_String_Set( sval, val, status )' ) + rtn.append( ' if (status/=CTA_OK) return' ) + rtn.append( ' call %s( %s )' % (procname,argsline_call.replace('val','sval').replace(argname_datatype,cta_kind).replace(argname_nval,'size(val)')) ) + rtn.append( ' if (status/=CTA_OK) return' ) + rtn.append( ' call CTA_String_Free( sval, status )' ) + rtn.append( ' if (status/=CTA_OK) return' ) + + else : + + # add call to C-routine: + rtn.append( ' call %s( %s )' % (procname,argsline_call.replace(argname_datatype,cta_kind).replace(argname_nval,'size(val)')) ) + + #endif + + # end of routine interface: + rtn.append( ' end subroutine %s' % rname ) + rtn.append( '' ) + + #endif # multiple shapes + + #endfor # generic types + # end of interface: + ifc.append( ' end interface' ) + ifc.append( '' ) + + # add routine for 'handle' type ? + if procname in ['CTA_Vector_GetVal','CTA_Vector_SetVal'] : + + # specific routines: + # CTA_Vector_GetVal -> CTA_F90_Vector_GetHandle + # CTA_Vector_SetVal -> CTA_F90_Vector_SetHandle + rname = procname.replace('CTA_','CTA_F90_').replace('Val','Handle') + # which kind ? + spectype = 'integer' + cta_kind = 'CTA_HANDLE' + + # routine header: + rtn.append( ' subroutine %s( %s )' % (rname,argsline) ) + # use parameters if necessary: + for param in params : + rtn.append( ' use CTA_F90_Parameters, only : %s' % param ) + #endfor + # use kind parameter: + rtn.append( ' use CTA_F90_Parameters, only : %s' % cta_kind ) + # other + rtn.append( ' use CTA_F90_Parameters, only : CTA_OK' ) + # loop over arguments: + for arg in procargs : + # extract: + argname = arg['name'] + argtype = arg['type'] + argdims = arg['dims'] + # skip some ... + if argname == argname_nval : continue + if argname == argname_datatype : continue + # change to specific type: + if argtype == 'void' : argtype = spectype + # adhoc ... + if argname+'_dims' in special.keys() : argdims = special[argname+'_dims'] + # add argument line: + rtn.append( ' %-30s, intent(%-5s) :: %s%s' % (argtype,arg['intent'],arg['name'],argdims) ) + #endfor + + # add call to C-routine: + rtn.append( ' call %s( %s )' % (procname,argsline_call.replace(argname_datatype,cta_kind).replace(argname_nval,'size(val)')) ) + + # end of routine interface: + rtn.append( ' end subroutine %s' % rname ) + rtn.append( '' ) + + #endif # handle type + + # ok + return ifc,rtn + +#enddef # cta_f90_interface_generic + + +# *** + + +def cta_module_from_hfile( hfile, modname, specials ) : + + """ + Create F90 module from C header file. + """ + + # basename: + bname = os.path.basename(hfile).rstrip('.h') + + # read file: + f = open( hfile, 'r' ) + lines = f.readlines() + f.close() + + # start of function header: + fheader_comment_start = '/** \\brief' + + # init results: + interface_lines = [] + routine_lines = [] + + # loop over lines: + while len(lines) > 0 : + + # next line: + line = lines.pop(0).strip() + + # headers start with DoxyGen desxription: + if not line.startswith(fheader_comment_start) : continue + + # collect comment: + comments = [ line.replace('/**','',1) ] + while True : + # next line: + line = lines.pop(0).strip() + # end of comment ? then leave: + if line.startswith('*/') : break + # add: + comments.append(line.replace('*','',1)) + #endwhile + + # collect function definition; + # first get next line: + line = lines.pop(0).strip() + # remove trailing comment: xxx /* ... */ + if '/*' in line : line,cmnt = line.split('/*') + # init function line: + fline = line + + ## testing ... + ##testing = 'CTA_TreeVector_GetVals' + #testing = 'CTA_TreeVector_SetVals' + #if testing not in fline : continue + #print 'TESTING "%s" ...' % testing + + # info ... + print ' found procedure line: %s ...' % fline + + # skip pre-processor functions: + if fline.startswith('#define') : + print ' --> pre-processor function; skip ...' + continue + #endif + # add rest: + while not fline.endswith(';') : + # next line: + line = lines.pop(0).strip() + # remove trailing comment: xxx /* ... */ + if '/*' in line : line,cmnt = line.split('/*') + # add: + fline = fline+line + #endwhile + # split in function result/name and arguments: + ret_and_name,argline = fline.rstrip(');').split('(') + # split in result and name: + procret,procname = ret_and_name.rsplit(None,1) + # info: + print ' name : %s' % procname + print ' args : %s' % argline + print ' ret : %s' % procret + + # no internal routines: + if procname.startswith('CTAI_') : + print ' --> internal procedure, skip ...' + continue + #endif + + # no usr modules: + if hfile.find('usr_')>=0 : + print ' --> example procedure, skip ...' + continue + #endif + + # explicitly skip this ? + if (procname in specials.keys()) and specials[procname].has_key('skip') : + print ' --> special, skip this procedure ...' + continue + #endif + + # by default no return status assumed: + with_return_status = False + # only certain return values supported ... + if procret == 'CTAEXPORT int' : + # set flag: + with_return_status = True + elif procret == 'CTAEXPORT void' : + # noting special .. + pass + elif procret.startswith('CTANOF90') : + # skip because of this label + print ' --> CTANOEXPORT found, skip' + continue + elif procret.startswith('CTANOEXPORT') : + # skip because of this label + print ' --> CTANOEXPORT found, skip' + continue + else : + # unknown .. + print ' --> ERROR: unsupported return value type, skip ...' + raise Exception + #endif + + # add status argument ? + if with_return_status : + # extend argument line: + if len(argline) > 0 : argline = argline+', ' + argline = argline+'int status' + # loop over comment lines to search for '\return' tag; + # note that if no such tag is found, an error will be raised + # later on since no '\param status' description is present + for icomment in range(len(comments)) : + # contains \return key ? + if '\\return' in comments[icomment] : + # change return status from function result to argument: + comments[icomment] = comments[icomment].replace('\\return','\\param status O') + # leave loop over comments: + break + #endif + #endfor # comment lines + #endif # return status argument + + # extract argument info from comment: + # argis['data']{'intent' : 'in', + # 'description' : 'input data'} + argis = {} + # loop over + for comment in comments : + # cleanup: + comment = comment.strip() + # only the parameter descriptions: + if not comment.startswith('\param') : continue + # split: + try : + tag,name,ichar,description = comment.split(None,3) + except : + print 'ERROR - could not split comment into tag, name intent, and description :' + print 'ERROR - comment line : %s' % comment + print 'ERROR - file : %s' % hfile + raise Exception + # convert intent: + ichar = ichar.lower() + if ichar in ['i','(i)','(input)'] : + intent = 'in' + elif ichar in ['o','(o)','(output)'] : + intent = 'out' + elif ichar in ['io','(io)','(input/output)'] : + intent = 'inout' + else : + print 'ERROR - could not translate intent "%s"' % ichar + print 'ERROR - comment line : %s' % comment + print 'ERROR - file : %s' % hfile + raise Exception + #endif + # store: + argis[name] = { 'intent' : intent, 'description' : description } + #endfor + + # init list with fortan argument descriptions: + fargs = [] + # any arguments at all ? + if len(argline) > 0 : + # split into c-arguments: + cargs = argline.split(',') + # translate into fortran arguments: + for carg in cargs : + # cleanup: + carg = carg.strip() + ## remove 'const' part: + #if carg.startswith('const') : carg = carg[6:] + # split into type and name: + try : + argtype,argname = carg.rsplit(None,1) + except : + print 'ERROR - could not split carg "%s"' % carg + raise Exception + #endtry + # dereferenced point ? remove starts: + if argname.startswith('**') : argname = argname[2:] + # pointer values ? + ispointer = argname.startswith('*') + if ispointer : argname = argname[1:] + # translate type: + ctype = argtype + if ctype in ['char','const char','char*','const char*'] : + ftype = 'character(len=*)' + elif ctype in ['int','const int','int*'] : + ftype = 'integer' + elif ctype == 'double' : + if 'time' in procname.lower() : + ftype = 'real(CTA_TIME_RKIND)' + else : + ftype = 'real(8)' + #endif + elif ctype in ['CTA_Datatype','BOOL','FILE'] : + ftype = 'integer' + elif ctype.startswith('CTA_') or ctype.startswith('const CTA_') : + ftype = 'integer(CTA_HANDLE_IKIND)' + elif ctype in ['void','const void'] : + ftype = 'void' + else : + print 'ERROR - could not translate C-type : %s' % ctype + raise Exception + #endif + # cleanup name: + argname = argname.replace('*','') + # array ? + if '[' in argname : + argname,dimlist = argname.rstrip(']').split('[') + ndim = dimlist.count(',')+1 + argdims = '(' + for idim in range(ndim) : + if idim > 0 : argdims = argdims+',' + argdims = argdims+'*' + #endfor + argdims = argdims+')' + else : + argdims = '' + #endif + # check .. + if argname not in argis.keys() : + print 'ERROR - no argument description found for "%s"' % argname + print 'ERROR - procedure : %s' % procname + print 'ERROR - filename : %s' % hfile + raise Exception + #endif + # extract intent: + intent = argis[argname]['intent'] + # input pointer ? probably an array: + if ispointer and (intent == 'in') and \ + (ctype not in ['const char','FILE']) : + argdims = '(*)' + #endif + # add: + fargs.append( { 'type' : ftype, 'intent' : intent, 'name' : argname, 'dims' : argdims } ) + #endfor # arguments + #endif # any arguments at all + + # get f90 interface lines: + if (procname in specials.keys()) and ('generic' in specials[procname]) : + ## info ... + #print 'TESTING: create generic interfaces' + # convert: + interface,routines = cta_f90_interface_generic( procname, fargs, comments, specials ) + else : + ## info ... + #print 'TESTING: create interface' + # convert: + interface,routines = cta_f90_interface( procname, fargs, comments ) + #endif + + ## testing ... + #if procname == 'CTA_File_Set' : + # for iline in ilines : print iline + # print '' + # raise Exception + ##endif + + # add: + if len(interface) > 0 : interface_lines.extend( interface ) + + # add: + if len(routines) > 0 : routine_lines.extend( routines ) + + #endwhile + + # init empty module: + mlines = [] + # interfaces defined ? + if (len(interface_lines) > 0) or (len(routine_lines) > 0) : + # add module header: + mlines.append( 'module %s' % modname ) + mlines.append( '' ) + mlines.append( ' implicit none' ) + mlines.append( '' ) + mlines.append( ' public' ) + mlines.append( '' ) + if len(interface_lines) : + mlines.extend(interface_lines) + #endif + mlines.append( '' ) + if len(routine_lines) > 0 : + mlines.append( 'contains' ) + mlines.append( '' ) + mlines.extend( routine_lines ) + mlines.append( 'end module %s' % modname ) + mlines.append( '' ) + #endif + + # ok + return mlines + +#enddef # cta_module_from_hfile + + +#################################################################### +### +### main program +### +#################################################################### + +if __name__ == "__main__" : + + # info ... + print '' + print '** genenerate cta_f90 interface **' + print '' + + # directory with include files: + #hdir = 'openda_1.0/public/core/native/include' + #hdir = 'openda_1.1_beta/core/native/include/' + #hdir = 'openda_2.0/core/native/include/' + #hdir = 'openda_2.x/core/native/include/' + hdir = '../../include' + + # extra source files, mainly templates: + srcdir = '.' + + # Adhoc fixes .. + # Fill a dictionairy with CTA routine names for which something should be fixed. + # Assign a dictionairy to this with one or more of the following key/value pairs: + # + # "generic" : # is generic, thus integer, real, etc. + # + # "_dims" : # dimensions of are not explicitly + # # mentioned in header file, therefore specify them here + specials = {} + specials['CTA_Vector_GetVal' ] = { 'generic' : 'vals', 'val_dims' : '' } # should have been "val" ... + specials['CTA_Vector_GetVals' ] = { 'generic' : 'vals', 'vals_dims' : '(*)' } + specials['CTA_Vector_SetVal' ] = { 'generic' : 'val' , 'val_dims' : '' } + specials['CTA_Vector_SetVals' ] = { 'generic' : 'vals', 'vals_dims' : '(*)' } +# specials['CTA_Vector_AppendVal' ] = { 'generic' : 'val' , 'val_dims' : '' } + specials['CTA_Vector_AppendVal' ] = { 'skip' : True } + specials['CTA_Vector_SetConstant' ] = { 'generic' : 'val' , 'val_dims' : '' } + specials['CTA_TreeVector_GetVal' ] = { 'generic' : 'val' , 'val_dims' : '' } + specials['CTA_TreeVector_GetVals' ] = { 'generic' : 'val' , 'val_dims' : '(*)' } + specials['CTA_TreeVector_SetVal' ] = { 'generic' : 'val' , 'val_dims' : '' } + specials['CTA_TreeVector_SetVals' ] = { 'generic' : 'val' , 'val_dims' : '(*)' } # should have been "vals" ... + specials['CTA_TreeVector_AppendVal' ] = { 'generic' : 'val' , 'val_dims' : '' } + specials['CTA_TreeVector_SetConstant'] = { 'generic' : 'val' , 'val_dims' : '' } + specials['CTA_Pack_Add' ] = { 'generic' : 'data', 'data_dims' : '(*)' } + specials['CTA_Pack_Get' ] = { 'generic' : 'data', 'data_dims' : '(*)' } + + # destination: + mdir = 'generated' + + # create destination directory if necessary: + if not os.path.isdir(mdir) : os.makedirs(mdir) + + # list of created modules: + modnames = [] + + # info ... + print 'scan "%s" for header files ...' % hdir + + # loop over all files: + for fname in os.listdir(hdir) : + + # only header files ... + if not fname.startswith('cta_') : continue + if not fname.endswith('.h') : continue + + ## testing ... + #if fname != 'cta_treevector.h' : continue + #print 'TESTING: only file "%s" ...' % fname + + ## info ... + #print ' header file "%s" ...' % fname + + # full path: + hfile = os.path.join(hdir,fname) + + # split: + bname,ext = os.path.splitext(fname) + # target module name: + modname = bname.replace('cta_','cta_f90_') + + # avoid problems with module names that are the same as one of their routines: + if modname in ['cta_f90_flush'] : + modname = modname+'_mod' + #endif + + # info ... + print '' + print 'processing %s ...' % hfile + + # get module lines: + mlines = cta_module_from_hfile( hfile, modname, specials ) + + # write ? + if len(mlines) > 0 : + # name of module file: + mfile = os.path.join( mdir, modname+'.f90' ) + # write: + f = open( mfile, 'w' ) + for mline in mlines : + f.write( '%s\n' % mline ) + #endfor + f.close() + # add to collection: + modnames.append( modname ) + else : + print ' (no procedures found)' + #endif + + #endfor # filenames + + # collection module: + mname = 'cta_f90' + # info ... + print '' + print 'create collection module %s ...' % mname + # + # copy parameter module: + pfile = 'cta_f90_parameters.f90' + print ' copy %s ...' % pfile + shutil.copy( os.path.join(srcdir,pfile), os.path.join(mdir,pfile) ) + # + # copy makefile + #mfile = 'cta_f90.make' + #print ' copy %s ...' % mfile + #shutil.copy( os.path.join(srcdir,mfile), os.path.join(mdir,'Makefile') ) + # + # read contained routines: + incfile = 'cta_f90_contains.inc' + print ' read %s ...' % incfile + f = open( os.path.join(srcdir,incfile) ) + lines = f.readlines() + f.close() + # cleanup: + inclines = [] + for line in lines : + inclines.append( line.rstrip() ) + #endfor + # + # fill lines: + print ' fill main module ...' + mlines = [] + mlines.append( 'module %s' % mname ) + mlines.append( '' ) + mlines.append( ' use cta_f90_parameters' ) + for modname in modnames : + mlines.append( ' use %s' % modname ) + #endfor + mlines.append( '' ) + mlines.append( ' implicit none' ) + mlines.append( '' ) + mlines.append( ' public' ) + mlines.append( '' ) + mlines.append( 'contains' ) + mlines.append( '' ) + mlines.extend( inclines ) + mlines.append( '' ) + mlines.append( 'end module %s' % mname ) + # target file: + mfile = os.path.join( mdir, mname+'.f90' ) + # write: + print ' write %s ...' % mfile + f = open( mfile, 'w' ) + for mline in mlines : + f.write( '%s\n' % mline ) + #endfor + f.close() + + # ok + print '' + print 'End.' + print '' + +#endif # __main__ + diff --git a/costa/native/cta_f90/generated/CMakeLists.txt b/costa/native/cta_f90/generated/CMakeLists.txt index e94c5652e..7b24ac3e7 100644 --- a/costa/native/cta_f90/generated/CMakeLists.txt +++ b/costa/native/cta_f90/generated/CMakeLists.txt @@ -15,5 +15,6 @@ cta_f90_initialise.f90 cta_f90_model_factory.f90 cta_f90_string.f90 ) add_library(cta_f90 SHARED ${SOURCES}) -target_include_directories(cta_f90 PUBLIC ../include) +target_include_directories(cta_f90 PRIVATE ../include) +target_link_libraries(cta_f90 PRIVATE cta) From 35fb2f8793f61ae84b93b7a7c137b4aee2c375eb Mon Sep 17 00:00:00 2001 From: Werner Kramer Date: Wed, 13 Sep 2023 14:08:18 +0200 Subject: [PATCH 05/15] Add blas and lapack to install targets --- costa/native/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/costa/native/CMakeLists.txt b/costa/native/CMakeLists.txt index 2eb93442e..a4e393e6e 100644 --- a/costa/native/CMakeLists.txt +++ b/costa/native/CMakeLists.txt @@ -13,7 +13,7 @@ file(GLOB CTA_PUBLIC_HEADERS "cta/include/*.h" ) -install(TARGETS cta cta_f90 opendabridge +install(TARGETS blas lapack cta cta_f90 opendabridge LIBRARY DESTINATION lib ) From 7aeb8658d10125f0cc49baafd9dd0b52637fd860 Mon Sep 17 00:00:00 2001 From: Werner Kramer Date: Wed, 13 Sep 2023 15:02:17 +0200 Subject: [PATCH 06/15] Add missing openda bridge .c and .h files --- costa/native/bridge/src/CMakeLists.txt | 2 + .../src/jni_cta_CtaObsdescr_NativeToJava.c | 258 ++++++++++++++++++ .../src/jni_cta_CtaObsdescr_NativeToJava.h | 33 +++ costa/native/bridge/src/jni_cta_utils.h | 45 +++ costa/native/bridge/src/jni_datatypes.h | 31 +++ 5 files changed, 369 insertions(+) create mode 100644 costa/native/bridge/src/jni_cta_CtaObsdescr_NativeToJava.c create mode 100644 costa/native/bridge/src/jni_cta_CtaObsdescr_NativeToJava.h create mode 100644 costa/native/bridge/src/jni_cta_utils.h create mode 100644 costa/native/bridge/src/jni_datatypes.h diff --git a/costa/native/bridge/src/CMakeLists.txt b/costa/native/bridge/src/CMakeLists.txt index d96444e7b..843749bbc 100644 --- a/costa/native/bridge/src/CMakeLists.txt +++ b/costa/native/bridge/src/CMakeLists.txt @@ -4,6 +4,8 @@ find_package(JNI REQUIRED) set(LIBRARY_OUTPUT_PATH ${CMAKE_BINARY_DIR}/lib) set(SOURCES + jni_cta_CtaObsdescr_NativeToJava.c jni_cta_CtaObsdescr_NativeToJava.h + jni_cta_utils.h jni_datatypes.h jni_cta_utils.cpp org_openda_costa_CtaRelationTable.cpp org_openda_costa_CtaArray.cpp org_openda_costa_CtaStochObserver.cpp org_openda_costa_CtaInitialize.cpp org_openda_costa_CtaTime.cpp diff --git a/costa/native/bridge/src/jni_cta_CtaObsdescr_NativeToJava.c b/costa/native/bridge/src/jni_cta_CtaObsdescr_NativeToJava.c new file mode 100644 index 000000000..a331b2210 --- /dev/null +++ b/costa/native/bridge/src/jni_cta_CtaObsdescr_NativeToJava.c @@ -0,0 +1,258 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/trunk/costa/src/cta/cta_obsdescr_combine.c $ +$Revision: 671 $, $Date: 2008-10-07 14:49:42 +0200 (Tue, 07 Oct 2008) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2010 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/** +\file cta_handles.h +\brief Implemenation of a generic OpenDA observation description wrapper for use in native COSTA code +*/ + + +#include +#include +#include "cta.h" + +#include "jni_datatypes.h" +#include "jni.h" +#include "jni_cta_utils.h" + + +#define CLASSNAME "CtaObsDescr_nativeToJava" + +#define IDEBUG (0) + +typedef struct { +CTA_Handle myhandle; +sJni_Class classInstance; +} CTAI_ObsDescr_NativeToJava; + + +#define METHOD "CTAI_ObsDescr_nativeToJava_Create_Size" +void CTAI_ObsDescr_nativeToJava_Create_Size(int *memsize, int *retval){ + *memsize=(int) sizeof(CTAI_ObsDescr_NativeToJava); + *retval=CTA_OK; +}; + +#undef METHOD +#define METHOD "CTAI_ObsDescr_nativeToJava_Create_Init" +void CTAI_ObsDescr_nativeToJava_Create_Init(CTA_ObsDescr *myhandle, CTAI_ObsDescr_NativeToJava *descr, + CTA_Handle *usrdat, int *retval) +{ + JNIEnv *env; + sJni_Class *classIn; + + //Set the runtime and class from userdata + + CTA_Handle_GetData(*usrdat, (void *) &classIn); + env = classIn->env; + + descr->classInstance.env = env; + descr->classInstance.cls = (*env)->NewGlobalRef(env, classIn->cls); + descr->classInstance.obj = (*env)->NewGlobalRef(env, classIn->obj); + descr->myhandle = *myhandle; + +}; + +#undef METHOD +#define METHOD "CTAI_ObsDescr_nativeToJava_CreateSel" +void CTAI_ObsDescr_nativeToJava_CreateSel(CTAI_ObsDescr_NativeToJava *descr, + CTA_String *selection, CTA_RelTable *reltab, + CTA_ObsDescr *myhandle_out, + CTAI_ObsDescr_NativeToJava *descrout, int *retval){ + +CTA_WRITE_ERROR("Method not implemented"); +*retval=CTA_NOT_IMPLEMENTED; + +} + + + +#undef METHOD +#define METHOD "CTAI_ObsDescr_nativeToJava_Get_Keys" +void CTAI_ObsDescr_nativeToJava_Get_Keys(CTAI_ObsDescr_NativeToJava *descr, + CTA_Vector *keys, int *retval) +{ + jobject jKeys; + + // Find method + JNIEnv *env= descr->classInstance.env; + jclass cls= descr->classInstance.cls; + jobject obj = descr->classInstance.obj; + jmethodID func; + + /* Gewoon aanroepen zoals in alle voorbeelden staat */ + func = (*env)->GetMethodID(env, cls, "getPropertyKeys", "()[Ljava/lang/String;"); + if (func) { + // call method and get keys + jKeys =(*env)->CallObjectMethod(env,obj,func); + + // convert java array of strings to native vector of strings + *retval = cta_jni_JavaStringVecToNativeVec(env, (jobjectArray) jKeys, *keys); + } + else { + CTA_WRITE_ERROR("Cannot find java method name=getPropertyKeys signature=()[Ljava/lang/String;"); + *retval = CTA_INTERNAL_ERROR; + } +}; + +#undef METHOD +#define METHOD "CTAI_ObsDescr_nativeToJava_Property_Count" +void CTAI_ObsDescr_nativeToJava_Property_Count( + CTAI_ObsDescr_NativeToJava *descr, + int *nkeys, + int *retval) +{ + // Find method + JNIEnv *env = descr->classInstance.env; + jobject obj = descr->classInstance.obj; + jclass cls = descr->classInstance.cls; + + jmethodID func = (*env)->GetMethodID(env, cls, "getPropertyCount", "()I"); + if (func) { + // Call method + *nkeys=(*env)->CallIntMethod(env,obj,func); + *retval=CTA_OK; + } + else { + CTA_WRITE_ERROR("Cannot find java method name=getPropertyCount signature=()I"); + *retval=CTA_INTERNAL_ERROR; + } +} +; + +#undef METHOD +#define METHOD "CTAI_ObsDescr_nativeToJava_Observation_Count" + void CTAI_ObsDescr_nativeToJava_Observation_Count( + CTAI_ObsDescr_NativeToJava *descr, + int *nobs, + int *retval) +{ + + JNIEnv *env= descr->classInstance.env; + jobject obj = descr->classInstance.obj; + jclass cls= descr->classInstance.cls; + + jmethodID func = (*env)->GetMethodID(env, cls, "getObservationCount", "()I"); + if (func) { + + // Call method + *nobs=(*env)->CallIntMethod(env,obj,func); + *retval=CTA_OK; + } + else { + CTA_WRITE_ERROR("Cannot find java method name=getObservationCount signature=()I"); + *retval=CTA_INTERNAL_ERROR; + } +}; + + +#undef METHOD +#define METHOD "CTAI_ObsDescr_nativeToJava_Get_Properties" +void CTAI_ObsDescr_nativeToJava_Get_Properties( + CTAI_ObsDescr_NativeToJava *descr, + const char *Key, + CTA_Vector *Properties, + CTA_Datatype *datatype, + int *retval) +{ + jobject jProperties; + jstring jKey; + + // Find method + JNIEnv *env= descr->classInstance.env; + jclass cls= descr->classInstance.cls; + jobject obj = descr->classInstance.obj; + jmethodID func; + + func = (*env)->GetMethodID(env, cls, "getStringProperties", "(Ljava/lang/String;)[Ljava/lang/String;"); + if (func) { + // call method and get properties + jKey= (*env)->NewStringUTF(env, Key); + + jProperties =(*env)->CallObjectMethod(env,obj,func,jKey); + + // convert java array of strings to native vector + if (jProperties){ + *retval = cta_jni_JavaStringVecToNativeVec(env, (jobjectArray) jProperties, *Properties); + } + else { + CTA_WRITE_ERROR("getStringProperties returned no properties"); + *retval = CTA_JNI_INTERFACING_ERROR; + } + } + else { + CTA_WRITE_ERROR("Cannot find java method name=getStringProperties signature=(Ljava/lang/String;)[Ljava/lang/String;"); + *retval = CTA_INTERNAL_ERROR; + } +} + + +#undef METHOD +#define METHOD "CTAI_ObsDescr_nativeToJava_Free" +void CTAI_ObsDescr_nativeToJava_Free( + CTAI_ObsDescr_NativeToJava *descr, + int *retval) +{ + // Release the global references + JNIEnv *env = descr->classInstance.env; + (*env)->DeleteGlobalRef(env, descr->classInstance.cls); + (*env)->DeleteGlobalRef(env, descr->classInstance.obj); + + // Since we did not allocate any memory, there is nothing to do + *retval=CTA_OK; +} + + + +#undef METHOD +#define METHOD "CTA_ObsDescr_nativeToJava_initialise" +void CTA_ObsDescr_nativeToJava_initialise(CTA_ObsDescrClass *hobsdescrcl) +{ + CTA_Intf hintf=0; + CTA_Func h_func[I_CTA_OBSDESCR_NUMFUNC]; + + int ierr; + + // The vector h_func is filled with COSTA-function handles of the + // implementations in this file. + ierr=CTA_Func_Create(" ",&CTAI_ObsDescr_nativeToJava_Create_Size, hintf, + &h_func[I_CTA_OBSDESCR_CREATE_SIZE]); + ierr=CTA_Func_Create(" ",&CTAI_ObsDescr_nativeToJava_Create_Init, hintf, + &h_func[I_CTA_OBSDESCR_CREATE_INIT]); + ierr=CTA_Func_Create(" ",&CTAI_ObsDescr_nativeToJava_Property_Count, hintf, + &h_func[I_CTA_OBSDESCR_COUNT_PROPERTIES]); + ierr=CTA_Func_Create(" ",&CTAI_ObsDescr_nativeToJava_Get_Properties, hintf, + &h_func[I_CTA_OBSDESCR_GET_PROPERTIES]); + ierr=CTA_Func_Create(" ",&CTAI_ObsDescr_nativeToJava_Observation_Count, hintf, + &h_func[I_CTA_OBSDESCR_COUNT_OBSERVATIONS]); + ierr=CTA_Func_Create(" ",&CTAI_ObsDescr_nativeToJava_Get_Keys, hintf, + &h_func[I_CTA_OBSDESCR_GET_KEYS]); + ierr=CTA_Func_Create(" ",&CTAI_ObsDescr_nativeToJava_Free, hintf, + &h_func[I_CTA_OBSDESCR_FREE]); + ierr=CTA_Func_Create(" ",&CTAI_ObsDescr_nativeToJava_CreateSel, hintf, + &h_func[I_CTA_OBSDESCR_SELECTION]); + ierr=CTA_ObsDescr_DefineClass("cta_obsdescr_nativeToJava",h_func,hobsdescrcl); + +} + + + + diff --git a/costa/native/bridge/src/jni_cta_CtaObsdescr_NativeToJava.h b/costa/native/bridge/src/jni_cta_CtaObsdescr_NativeToJava.h new file mode 100644 index 000000000..b642da618 --- /dev/null +++ b/costa/native/bridge/src/jni_cta_CtaObsdescr_NativeToJava.h @@ -0,0 +1,33 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/trunk/costa/src/cta/cta_obsdescr_combine.c $ +$Revision: 671 $, $Date: 2008-10-07 14:49:42 +0200 (Tue, 07 Oct 2008) $ + +COSTA: Problem solving environment for data assimilation +Copyright (C) 2010 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifdef __cplusplus +extern "C" { +#endif + +void CTA_ObsDescr_nativeToJava_initialise(CTA_ObsDescrClass *hobsdescrcl); + +#ifdef __cplusplus +} +#endif + + diff --git a/costa/native/bridge/src/jni_cta_utils.h b/costa/native/bridge/src/jni_cta_utils.h new file mode 100644 index 000000000..c22e4c6d6 --- /dev/null +++ b/costa/native/bridge/src/jni_cta_utils.h @@ -0,0 +1,45 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/openda/bridge/jni_cta_utils.h $ +$Revision: 1553 $, $Date: 2010-05-07 14:55:23 +0200 (Fri, 07 May 2010) $ + +OpenDA interface for COSTA. +Copyright (C) 2007 Stef Hummel / Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + + +#include "jni.h" +#include "cta.h" +void cta_jni_setJavaEnv(JNIEnv *env); +#ifdef __cplusplus +void cta_jni_exception(JNIEnv *env, const char *name, const char *msg); +#endif +void cta_jni_exception(JNIEnv *env, const char *name, const char *msg, int retVal); +void cta_jni_free(JNIEnv * env, jobject obj_this); +CTA_Handle cta_jni_getCtaHandle(JNIEnv * env, jobject obj_this); + +void cta_jni_setCtaHandle(JNIEnv * env, jobject obj_this, CTA_Handle ctaHandle); +void cta_jni_ExternalMessageWriter(char *className, char *method, char *message, char type); +void cta_jni_ExternalMessageWriterSetID(JNIEnv *env, jclass classID, jmethodID methodID); + +#ifdef __cplusplus +extern "C" { +#endif +int cta_jni_JavaStringVecToNativeVec(JNIEnv *env, jobjectArray jArray, CTA_Vector cVec); + +#ifdef __cplusplus +} +#endif diff --git a/costa/native/bridge/src/jni_datatypes.h b/costa/native/bridge/src/jni_datatypes.h new file mode 100644 index 000000000..4ec184a68 --- /dev/null +++ b/costa/native/bridge/src/jni_datatypes.h @@ -0,0 +1,31 @@ +/* +$URL: https://repos.deltares.nl/repos/openda/openda_1/public/trunk/core/native/src/openda/bridge/jni_cta_utils.h $ +$Revision: 1257 $, $Date: 2009-12-21 13:42:01 +0100 (ma, 21 dec 2009) $ + +OpenDA interface for COSTA. +Copyright (C) 2010 Nils van Velzen + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + + +#include + +typedef struct { +JNIEnv *env; //Poiner to Java runtime environment; +jclass cls; //JNI handle to the java class instance; +jobject obj; //JNI object handle; +} sJni_Class; + From 20cab716c79546e0b1be8970c1ea8a6223c72a4b Mon Sep 17 00:00:00 2001 From: Werner Kramer Date: Wed, 13 Sep 2023 17:25:36 +0200 Subject: [PATCH 07/15] Link to sqlite3 --- costa/native/cta/src/CMakeLists.txt | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/costa/native/cta/src/CMakeLists.txt b/costa/native/cta/src/CMakeLists.txt index 72c8a1ef7..c4b45d21e 100644 --- a/costa/native/cta/src/CMakeLists.txt +++ b/costa/native/cta/src/CMakeLists.txt @@ -1,5 +1,6 @@ cmake_minimum_required(VERSION 3.9.1) find_package(LibXml2 REQUIRED) +find_package(SQLite3 REQUIRED) set(LIBRARY_OUTPUT_PATH ${CMAKE_BINARY_DIR}/lib) set(SOURCES @@ -25,8 +26,12 @@ set(CTA_PUBLIC_HEADERS ${HEADER_FILES}) target_include_directories(cta PRIVATE ../include) target_include_directories(cta PRIVATE ${CMAKE_SOURCE_DIR}/external) -target_include_directories(cta PRIVATE ${LIBXML2_INCLUDE_DIR}) +target_include_directories(cta PRIVATE ${LIBXML2_INCLUDE_DIR}) +target_include_directories(cta PRIVATE ${SQLite3_INCLUDE_DIRS}) + target_link_libraries(cta PRIVATE ${LIBXML2_LIBRARIES}) +target_link_libraries(cta PRIVATE ${SQLite3_LIBRARIES}) + target_link_libraries(cta PRIVATE blas) target_link_libraries(cta PRIVATE lapack) From 6f8e5a8aa3988c9ba6bb116adea7ab6e5de1db46 Mon Sep 17 00:00:00 2001 From: Werner Kramer Date: Wed, 13 Sep 2023 17:49:31 +0200 Subject: [PATCH 08/15] Link to netcdf --- costa/native/cta/src/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/costa/native/cta/src/CMakeLists.txt b/costa/native/cta/src/CMakeLists.txt index c4b45d21e..4e4d779cb 100644 --- a/costa/native/cta/src/CMakeLists.txt +++ b/costa/native/cta/src/CMakeLists.txt @@ -2,6 +2,9 @@ cmake_minimum_required(VERSION 3.9.1) find_package(LibXml2 REQUIRED) find_package(SQLite3 REQUIRED) +string(REPLACE ":" ";" LIBRARY_DIRS $ENV{LD_LIBRARY_PATH}) +find_library (NETCDF_LIBRARIES libnetcdf.so PATHS ${LIBRARY_DIRS}) + set(LIBRARY_OUTPUT_PATH ${CMAKE_BINARY_DIR}/lib) set(SOURCES cta_array.c cta_matrix_blas.c cta_obsdescr_combine.c cta_sobs_combine.c cta_util_sort.c @@ -31,6 +34,7 @@ target_include_directories(cta PRIVATE ${SQLite3_INCLUDE_DIRS}) target_link_libraries(cta PRIVATE ${LIBXML2_LIBRARIES}) target_link_libraries(cta PRIVATE ${SQLite3_LIBRARIES}) +target_link_libraries(cta PRIVATE ${NETCDF_LIBRARIES}) target_link_libraries(cta PRIVATE blas) target_link_libraries(cta PRIVATE lapack) From dd7337237beed89c93a91f1b61dbef48a5001b66 Mon Sep 17 00:00:00 2001 From: Werner Kramer Date: Fri, 22 Sep 2023 13:32:12 +0200 Subject: [PATCH 09/15] Add check for HAVE_DLFCN_H --- costa/native/cta/src/CMakeLists.txt | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/costa/native/cta/src/CMakeLists.txt b/costa/native/cta/src/CMakeLists.txt index 4e4d779cb..f8e5ffdbd 100644 --- a/costa/native/cta/src/CMakeLists.txt +++ b/costa/native/cta/src/CMakeLists.txt @@ -1,4 +1,13 @@ cmake_minimum_required(VERSION 3.9.1) +# Check for the presence of dlfcn.h +include(CheckIncludeFile) + +# Perform the check +CHECK_INCLUDE_FILE("dlfcn.h" HAVE_DLFCN_H) +if(HAVE_DLFCN_H) + add_definitions(-DHAVE_DLFCN_H) +endif() + find_package(LibXml2 REQUIRED) find_package(SQLite3 REQUIRED) From 7a004ff3085b77f21f808bb43081cd75e1b63a1f Mon Sep 17 00:00:00 2001 From: Werner Kramer Date: Tue, 26 Sep 2023 16:24:15 +0200 Subject: [PATCH 10/15] Add FindMPI --- costa/native/cta/src/CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/costa/native/cta/src/CMakeLists.txt b/costa/native/cta/src/CMakeLists.txt index f8e5ffdbd..e12fa6af7 100644 --- a/costa/native/cta/src/CMakeLists.txt +++ b/costa/native/cta/src/CMakeLists.txt @@ -1,6 +1,12 @@ cmake_minimum_required(VERSION 3.9.1) # Check for the presence of dlfcn.h include(CheckIncludeFile) +include(FindMPI) + +if(MPI_FOUND) + message("Found MPI: ${MPI_FOUND}\n") + add_definitions(-DUSE_MPI) +endif() # Perform the check CHECK_INCLUDE_FILE("dlfcn.h" HAVE_DLFCN_H) From f6e8eaff215523ddde84dbc45e4af01eb608f6c1 Mon Sep 17 00:00:00 2001 From: Werner Kramer Date: Tue, 26 Sep 2023 16:25:36 +0200 Subject: [PATCH 11/15] Remove unused libraries for bridge --- costa/native/bridge/src/CMakeLists.txt | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/costa/native/bridge/src/CMakeLists.txt b/costa/native/bridge/src/CMakeLists.txt index 843749bbc..a00d1fd57 100644 --- a/costa/native/bridge/src/CMakeLists.txt +++ b/costa/native/bridge/src/CMakeLists.txt @@ -18,11 +18,10 @@ set(SOURCES add_library(opendabridge SHARED ${SOURCES}) +target_include_directories(opendabridge PRIVATE ${LIBXML2_INCLUDE_DIR}) +target_include_directories(opendabridge PRIVATE ${JNI_INCLUDE_DIRS}) + target_include_directories(opendabridge PRIVATE ../include) target_include_directories(opendabridge PRIVATE ${CMAKE_SOURCE_DIR}/cta/include) -target_include_directories(opendabridge PRIVATE ${CMAKE_SOURCE_DIR}/external) -target_include_directories(opendabridge PRIVATE ${LIBXML2_INCLUDE_DIR}) -target_include_directories(opendabridge PRIVATE ${JNI_INCLUDE_DIRS}) -target_link_libraries(opendabridge PRIVATE ${LIBXML2_LIBRARIES}) target_link_libraries(opendabridge PRIVATE cta) From a354666437bc60665afc08404198b7cb3b06fb63 Mon Sep 17 00:00:00 2001 From: Werner Kramer Date: Wed, 27 Sep 2023 09:39:37 +0200 Subject: [PATCH 12/15] Enable DEBUG in cta_par.c --- costa/native/cta/src/cta_par.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/costa/native/cta/src/cta_par.c b/costa/native/cta/src/cta_par.c index f0b86abdf..134ad27e6 100644 --- a/costa/native/cta/src/cta_par.c +++ b/costa/native/cta/src/cta_par.c @@ -36,7 +36,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #define CTA_PAR_CREATEGROUPS_F77 F77_CALL(cta_par_creategroups,CTA_PAR_CREATEGROUPS) #define CTA_PAR_GETGROUPINFO_F77 F77_CALL(cta_par_getgroupinfo,CTA_PAR_GETGROUPINFO) -#define IDEBUG (0) +#define IDEBUG (1) enum CTAI_ParType {WorkerWorker,MasterWorker}; enum CTA_ParProcType CTA_MY_PROC_TYPE=CTA_ParMaster; int CTA_IS_PARALLEL=CTA_FALSE; From a2cc35fc6ce3fcf6cbe24a3e62a5a7210dc1435e Mon Sep 17 00:00:00 2001 From: Werner Kramer Date: Wed, 27 Sep 2023 11:31:02 +0200 Subject: [PATCH 13/15] Add HAVE_LIBNETCDF to definitions --- costa/native/cta/src/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/costa/native/cta/src/CMakeLists.txt b/costa/native/cta/src/CMakeLists.txt index e12fa6af7..94f097c5b 100644 --- a/costa/native/cta/src/CMakeLists.txt +++ b/costa/native/cta/src/CMakeLists.txt @@ -20,6 +20,8 @@ find_package(SQLite3 REQUIRED) string(REPLACE ":" ";" LIBRARY_DIRS $ENV{LD_LIBRARY_PATH}) find_library (NETCDF_LIBRARIES libnetcdf.so PATHS ${LIBRARY_DIRS}) +add_definitions(-DHAVE_LIBNETCDF) + set(LIBRARY_OUTPUT_PATH ${CMAKE_BINARY_DIR}/lib) set(SOURCES cta_array.c cta_matrix_blas.c cta_obsdescr_combine.c cta_sobs_combine.c cta_util_sort.c From e9c6c0f5c35615340cf206e768e5cd507c53afb7 Mon Sep 17 00:00:00 2001 From: Werner Kramer Date: Wed, 27 Sep 2023 14:57:06 +0200 Subject: [PATCH 14/15] Add netcdf include dir --- costa/native/cta/src/CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/costa/native/cta/src/CMakeLists.txt b/costa/native/cta/src/CMakeLists.txt index 94f097c5b..59a327bbd 100644 --- a/costa/native/cta/src/CMakeLists.txt +++ b/costa/native/cta/src/CMakeLists.txt @@ -19,8 +19,12 @@ find_package(SQLite3 REQUIRED) string(REPLACE ":" ";" LIBRARY_DIRS $ENV{LD_LIBRARY_PATH}) find_library (NETCDF_LIBRARIES libnetcdf.so PATHS ${LIBRARY_DIRS}) +set(NETCDF_INCLUDE_DIRS ${NETCDF_LIBRARIES}) +cmake_path(REMOVE_FILENAME NETCDF_INCLUDE_DIRS) +string(REPLACE "/lib" "/include" NETCDF_INCLUDE_DIRS ${NETCDF_INCLUDE_DIRS}) add_definitions(-DHAVE_LIBNETCDF) +message("NetCDF include : ${NETCDF_INCLUDE_DIRS}\n") set(LIBRARY_OUTPUT_PATH ${CMAKE_BINARY_DIR}/lib) set(SOURCES @@ -48,6 +52,7 @@ target_include_directories(cta PRIVATE ../include) target_include_directories(cta PRIVATE ${CMAKE_SOURCE_DIR}/external) target_include_directories(cta PRIVATE ${LIBXML2_INCLUDE_DIR}) target_include_directories(cta PRIVATE ${SQLite3_INCLUDE_DIRS}) +target_include_directories(cta PRIVATE ${NETCDF_INCLUDE_DIRS}) target_link_libraries(cta PRIVATE ${LIBXML2_LIBRARIES}) target_link_libraries(cta PRIVATE ${SQLite3_LIBRARIES}) From 9b986739572c95043e82f721af3029d3635b707c Mon Sep 17 00:00:00 2001 From: Werner Kramer Date: Mon, 13 Nov 2023 13:43:18 +0100 Subject: [PATCH 15/15] Remove .so extension from find_library --- costa/native/cta/src/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/costa/native/cta/src/CMakeLists.txt b/costa/native/cta/src/CMakeLists.txt index 59a327bbd..567cb54b2 100644 --- a/costa/native/cta/src/CMakeLists.txt +++ b/costa/native/cta/src/CMakeLists.txt @@ -18,7 +18,7 @@ find_package(LibXml2 REQUIRED) find_package(SQLite3 REQUIRED) string(REPLACE ":" ";" LIBRARY_DIRS $ENV{LD_LIBRARY_PATH}) -find_library (NETCDF_LIBRARIES libnetcdf.so PATHS ${LIBRARY_DIRS}) +find_library (NETCDF_LIBRARIES libnetcdf PATHS ${LIBRARY_DIRS}) set(NETCDF_INCLUDE_DIRS ${NETCDF_LIBRARIES}) cmake_path(REMOVE_FILENAME NETCDF_INCLUDE_DIRS)